From 73cfb3c5eeb8b00a6e222751a28fd89a5f6229dc Mon Sep 17 00:00:00 2001 From: Chih-Chieh Yang <7364402+cyang49@users.noreply.github.com> Date: Tue, 16 Sep 2025 10:53:43 -0400 Subject: [PATCH 001/518] [Model] Clean up and simplify Mamba2 Metadata Usage in both V0 and V1 (#24331) Signed-off-by: Chih-Chieh-Yang <7364402+cyang49@users.noreply.github.com> --- .../layers/mamba/mamba2_metadata.py | 64 +++++++------------ .../layers/mamba/mamba_mixer2.py | 29 +++------ vllm/model_executor/models/plamo2.py | 29 ++++----- 3 files changed, 45 insertions(+), 77 deletions(-) diff --git a/vllm/model_executor/layers/mamba/mamba2_metadata.py b/vllm/model_executor/layers/mamba/mamba2_metadata.py index 3256ac034aa1..368bfe3af1d3 100644 --- a/vllm/model_executor/layers/mamba/mamba2_metadata.py +++ b/vllm/model_executor/layers/mamba/mamba2_metadata.py @@ -17,14 +17,13 @@ @dataclass class Mamba2Metadata: - - has_initial_states: torch.Tensor prep_initial_states: bool - chunk_size: int - seq_idx: torch.Tensor - chunk_indices: torch.Tensor - chunk_offsets: torch.Tensor + + has_initial_states_p: torch.Tensor + seq_idx_p: torch.Tensor + chunk_indices_p: torch.Tensor + chunk_offsets_p: torch.Tensor """ With continuous batching layout of `x` in vLLM, to enable a Triton program to handle a request in parallel, two supporting tensors are used @@ -68,7 +67,6 @@ def get_platform_metadata_classes() -> tuple[type[AttentionMetadata], ...]: def prepare_mamba2_metadata( chunk_size: int, attn_metadata: AttentionMetadata, - mamba2_metadata=None, ) -> Mamba2Metadata: # compute number of prefill and decode requests @@ -76,11 +74,11 @@ def prepare_mamba2_metadata( num_prefills = attn_metadata.num_prefills num_prefill_tokens = attn_metadata.num_prefill_tokens - seq_idx = None - chunk_indices, chunk_offsets = None, None + seq_idx_p = None + chunk_indices_p, chunk_offsets_p = None, None # Need flags to indicate if there are initial states # currently we really only support the FlashAttention backend - has_initial_states = None + has_initial_states_p = None prep_initial_states = False # Compute seq_idx, chunk_indices and chunk_offsets for prefill only @@ -91,44 +89,30 @@ def prepare_mamba2_metadata( # precompute flag to avoid device syncs later in mamba2 layer # forwards # prep is only needed for mamba2 ssd prefill processing - has_initial_states = attn_metadata.context_lens_tensor > 0 - prep_initial_states = torch.any( - has_initial_states[:num_prefills]).item() - query_start_loc = attn_metadata.query_start_loc[:num_prefills + 1] - seq_idx = torch.repeat_interleave(torch.arange( - num_prefills, dtype=torch.int32, device=query_start_loc.device), - query_start_loc.diff(), - output_size=num_prefill_tokens) - seq_idx.unsqueeze_(0) + has_initial_states_p = ( + attn_metadata.context_lens_tensor[:num_prefills] > 0) + prep_initial_states = torch.any(has_initial_states_p).item() + query_start_loc_p = attn_metadata.query_start_loc[:num_prefills + 1] + seq_idx_p = torch.repeat_interleave(torch.arange( + num_prefills, dtype=torch.int32, device=query_start_loc_p.device), + query_start_loc_p.diff(), + output_size=num_prefill_tokens) + seq_idx_p.unsqueeze_(0) # We compute metadata for chunked prefill once at the top level model # forward and reuse them in mamba layers. If not needed, they will be # ignored inside mamba kernels. if prep_initial_states: - chunk_indices, chunk_offsets = \ + chunk_indices_p, chunk_offsets_p = \ _query_start_loc_to_chunk_indices_offsets( - query_start_loc, chunk_size, num_prefill_tokens) - - if mamba2_metadata is not None: - mamba2_metadata.has_initial_states = has_initial_states - mamba2_metadata.prep_initial_states = prep_initial_states - mamba2_metadata.chunk_size = chunk_size - mamba2_metadata.seq_idx = seq_idx - mamba2_metadata.chunk_indices = chunk_indices - mamba2_metadata.chunk_offsets = chunk_offsets - # We use 1 reset flag: - # * mamba2_metadata.cu_seqlen is None - # update config specific to (each input) - # (become available at first layer, e.g. conv_weights) - mamba2_metadata.cu_seqlen = None # suppose to be updated at each input - - return mamba2_metadata - return Mamba2Metadata(has_initial_states=has_initial_states, + query_start_loc_p, chunk_size, num_prefill_tokens) + + return Mamba2Metadata(has_initial_states_p=has_initial_states_p, prep_initial_states=prep_initial_states, chunk_size=chunk_size, - seq_idx=seq_idx, - chunk_indices=chunk_indices, - chunk_offsets=chunk_offsets) + seq_idx_p=seq_idx_p, + chunk_indices_p=chunk_indices_p, + chunk_offsets_p=chunk_offsets_p) def update_metadata(x: torch.Tensor, query_start_loc: torch.Tensor, diff --git a/vllm/model_executor/layers/mamba/mamba_mixer2.py b/vllm/model_executor/layers/mamba/mamba_mixer2.py index 23e19da430e1..02e6a9138c05 100644 --- a/vllm/model_executor/layers/mamba/mamba_mixer2.py +++ b/vllm/model_executor/layers/mamba/mamba_mixer2.py @@ -518,22 +518,19 @@ def forward_cuda( conv_state = self_kv_cache[0].transpose(-1, -2) ssm_state = self_kv_cache[1] state_indices_tensor = attn_metadata.state_indices_tensor - has_initial_states_p = attn_metadata.has_initial_states_p - prep_initial_states = attn_metadata.prep_initial_states - chunk_size = attn_metadata.chunk_size - seq_idx_p = attn_metadata.seq_idx_p - chunk_indices_p = attn_metadata.chunk_indices_p - chunk_offsets_p = attn_metadata.chunk_offsets_p else: conv_state = mamba_cache_params.conv_state ssm_state = mamba_cache_params.ssm_state state_indices_tensor = mamba_cache_params.state_indices_tensor - has_initial_states_p = mamba2_metadata.has_initial_states + + # Common members between V1 metadata and V0 metadata + if mamba2_metadata is not None: + has_initial_states_p = mamba2_metadata.has_initial_states_p prep_initial_states = mamba2_metadata.prep_initial_states chunk_size = mamba2_metadata.chunk_size - seq_idx_p = mamba2_metadata.seq_idx - chunk_indices_p = mamba2_metadata.chunk_indices - chunk_offsets_p = mamba2_metadata.chunk_offsets + seq_idx_p = mamba2_metadata.seq_idx_p + chunk_indices_p = mamba2_metadata.chunk_indices_p + chunk_offsets_p = mamba2_metadata.chunk_offsets_p # 1. Gated MLP's linear projection projected_states, _ = self.in_proj(hidden_states) @@ -677,15 +674,9 @@ def forward_cuda( # 3. State Space Model sequence transformation initial_states = None if (has_initial_states_p is not None and prep_initial_states): - # making a copy of the states - if envs.VLLM_USE_V1: - initial_states = torch.where( - has_initial_states_p[:, None, None, None], - ssm_state[state_indices_tensor_p], 0) - else: - initial_states = torch.where( - has_initial_states_p[:num_prefills, None, None, None], - ssm_state[state_indices_tensor_p], 0) + initial_states = torch.where( + has_initial_states_p[:, None, None, None], + ssm_state[state_indices_tensor_p], 0) # NOTE: final output is an in-place update of out tensor varlen_state = mamba_chunk_scan_combined( diff --git a/vllm/model_executor/models/plamo2.py b/vllm/model_executor/models/plamo2.py index b9869f5e5880..ef96d272adfb 100644 --- a/vllm/model_executor/models/plamo2.py +++ b/vllm/model_executor/models/plamo2.py @@ -279,22 +279,19 @@ def forward_cuda( conv_state = self_kv_cache[0].transpose(-1, -2) ssm_state = self_kv_cache[1] state_indices_tensor = attn_metadata.state_indices_tensor - has_initial_states_p = attn_metadata.has_initial_states_p - prep_initial_states = attn_metadata.prep_initial_states - chunk_size = attn_metadata.chunk_size - seq_idx_p = attn_metadata.seq_idx_p - chunk_indices_p = attn_metadata.chunk_indices_p - chunk_offsets_p = attn_metadata.chunk_offsets_p else: conv_state = mamba_cache_params.conv_state ssm_state = mamba_cache_params.ssm_state state_indices_tensor = mamba_cache_params.state_indices_tensor - has_initial_states_p = mamba2_metadata.has_initial_states + + # Common members between V1 metadata and V0 metadata + if mamba2_metadata is not None: + has_initial_states_p = mamba2_metadata.has_initial_states_p prep_initial_states = mamba2_metadata.prep_initial_states chunk_size = mamba2_metadata.chunk_size - seq_idx_p = mamba2_metadata.seq_idx - chunk_indices_p = mamba2_metadata.chunk_indices - chunk_offsets_p = mamba2_metadata.chunk_offsets + seq_idx_p = mamba2_metadata.seq_idx_p + chunk_indices_p = mamba2_metadata.chunk_indices_p + chunk_offsets_p = mamba2_metadata.chunk_offsets_p # 1. Gated MLP's linear projection projected_states = self.in_proj(hidden_states) @@ -414,14 +411,10 @@ def forward_cuda( initial_states = None if has_initial_states_p is not None and prep_initial_states: # making a copy of the states - if envs.VLLM_USE_V1: - initial_states = torch.where( - has_initial_states_p[:, None, None, None], - ssm_state[state_indices_tensor_p], 0) - else: - initial_states = torch.where( - has_initial_states_p[:num_prefills, None, None, None], - ssm_state[state_indices_tensor_p], 0) + initial_states = torch.where( + has_initial_states_p[:, None, None, None], + ssm_state[state_indices_tensor_p], 0) + varlen_state = mamba_chunk_scan_combined( hidden_states_p.view(1, num_prefill_tokens, self.num_heads // self.tp_size, From 08369289af49229930c61a76ec65b127ad3fba95 Mon Sep 17 00:00:00 2001 From: Lukas Geiger Date: Tue, 16 Sep 2025 16:32:47 +0100 Subject: [PATCH 002/518] [Core][MultiModalHasher] Don't convert memoryviews to bytes during hashing (#24925) Signed-off-by: Lukas Geiger --- vllm/multimodal/hasher.py | 47 +++++++++++++++------------------------ 1 file changed, 18 insertions(+), 29 deletions(-) diff --git a/vllm/multimodal/hasher.py b/vllm/multimodal/hasher.py index da019d40a6fe..0fb1363ce471 100644 --- a/vllm/multimodal/hasher.py +++ b/vllm/multimodal/hasher.py @@ -20,22 +20,22 @@ class MultiModalHasher: @classmethod - def serialize_item(cls, obj: object) -> Union[bytes, memoryview]: + def serialize_item(cls, obj: object) -> Iterable[Union[bytes, memoryview]]: # Simple cases - if isinstance(obj, str): - return obj.encode("utf-8") if isinstance(obj, (bytes, memoryview)): - return obj + return (obj, ) + if isinstance(obj, str): + return (obj.encode("utf-8"), ) if isinstance(obj, (int, float)): - return np.array(obj).tobytes() + return (np.array(obj).tobytes(), ) if isinstance(obj, Image.Image): exif = obj.getexif() if Image.ExifTags.Base.ImageID in exif and isinstance( exif[Image.ExifTags.Base.ImageID], uuid.UUID): # If the image has exif ImageID tag, use that - return exif[Image.ExifTags.Base.ImageID].bytes - return cls.item_to_bytes( + return (exif[Image.ExifTags.Base.ImageID].bytes, ) + return cls.iter_item_to_bytes( "image", np.asarray(convert_image_mode(obj, "RGBA"))) if isinstance(obj, torch.Tensor): tensor_obj: torch.Tensor = obj.cpu() @@ -49,43 +49,34 @@ def serialize_item(cls, obj: object) -> Union[bytes, memoryview]: tensor_obj = tensor_obj.view( (tensor_obj.numel(), )).view(torch.uint8) - return cls.item_to_bytes( + return cls.iter_item_to_bytes( "tensor", { "original_dtype": str(tensor_dtype), "original_shape": tuple(tensor_shape), "data": tensor_obj.numpy(), }) - - return cls.item_to_bytes("tensor", tensor_obj.numpy()) + return cls.iter_item_to_bytes("tensor", tensor_obj.numpy()) if isinstance(obj, np.ndarray): # If the array is non-contiguous, we need to copy it first - arr_data = obj.data if obj.flags.c_contiguous else obj.tobytes() - return cls.item_to_bytes("ndarray", { + arr_data = obj.view( + np.uint8).data if obj.flags.c_contiguous else obj.tobytes() + return cls.iter_item_to_bytes("ndarray", { "dtype": obj.dtype.str, "shape": obj.shape, "data": arr_data, }) - logger.warning( "No serialization method found for %s. " "Falling back to pickle.", type(obj)) - return pickle.dumps(obj) - - @classmethod - def item_to_bytes( - cls, - key: str, - obj: object, - ) -> bytes: - return b''.join(kb + vb for kb, vb in cls.iter_item_to_bytes(key, obj)) + return (pickle.dumps(obj), ) @classmethod def iter_item_to_bytes( cls, key: str, obj: object, - ) -> Iterable[tuple[bytes, Union[bytes, memoryview]]]: + ) -> Iterable[Union[bytes, memoryview]]: # Recursive cases if isinstance(obj, (list, tuple)): for i, elem in enumerate(obj): @@ -94,17 +85,15 @@ def iter_item_to_bytes( for k, v in obj.items(): yield from cls.iter_item_to_bytes(f"{key}.{k}", v) else: - key_bytes = key.encode("utf-8") - value_bytes = cls.serialize_item(obj) - yield key_bytes, value_bytes + yield key.encode("utf-8") + yield from cls.serialize_item(obj) @classmethod def hash_kwargs(cls, **kwargs: object) -> str: hasher = blake3() for k, v in kwargs.items(): - for k_bytes, v_bytes in cls.iter_item_to_bytes(k, v): - hasher.update(k_bytes) - hasher.update(v_bytes) + for bytes_ in cls.iter_item_to_bytes(k, v): + hasher.update(bytes_) return hasher.hexdigest() From 567939953b7a9cb0ded6bf0bb21a76917b8fed97 Mon Sep 17 00:00:00 2001 From: Sage Moore Date: Tue, 16 Sep 2025 09:21:48 -0700 Subject: [PATCH 003/518] [Core/DBO][1/N] Add Dual-Batch Overlap mechanism to VLLM (#23693) Signed-off-by: Lucas Wilkinson Signed-off-by: Sage Moore Signed-off-by: Lucas Wilkinson Signed-off-by: yewentao256 Co-authored-by: Lucas Wilkinson Co-authored-by: Lucas Wilkinson Co-authored-by: yewentao256 Co-authored-by: Lucas Wilkinson Co-authored-by: Robert Shaw <114415538+robertgshaw2-redhat@users.noreply.github.com> --- examples/offline_inference/data_parallel.py | 8 + .../v1/attention/test_attention_splitting.py | 10 +- tests/v1/spec_decode/test_eagle.py | 8 +- vllm/config/__init__.py | 8 + vllm/config/parallel.py | 8 + .../device_communicators/all2all.py | 5 - vllm/engine/arg_utils.py | 10 + vllm/forward_context.py | 121 +++++-- .../fused_moe/deepep_ht_prepare_finalize.py | 26 +- .../fused_moe/deepep_ll_prepare_finalize.py | 45 ++- vllm/model_executor/layers/fused_moe/layer.py | 58 +++- .../layers/fused_moe/modular_kernel.py | 63 +++- .../layers/fused_moe/pplx_prepare_finalize.py | 43 ++- vllm/v1/attention/backends/utils.py | 18 +- vllm/v1/spec_decode/eagle.py | 12 +- vllm/v1/worker/cpu_model_runner.py | 9 +- vllm/v1/worker/gpu_model_runner.py | 275 +++++++++++++--- vllm/v1/worker/gpu_ubatch_wrapper.py | 303 ++++++++++++++++++ vllm/v1/worker/ubatch_splitting.py | 155 +++++++++ vllm/v1/worker/ubatch_utils.py | 19 ++ vllm/v1/worker/ubatching.py | 211 ++++++++++++ vllm/v1/worker/utils.py | 10 +- 22 files changed, 1255 insertions(+), 170 deletions(-) create mode 100644 vllm/v1/worker/gpu_ubatch_wrapper.py create mode 100644 vllm/v1/worker/ubatch_splitting.py create mode 100644 vllm/v1/worker/ubatch_utils.py create mode 100644 vllm/v1/worker/ubatching.py diff --git a/examples/offline_inference/data_parallel.py b/examples/offline_inference/data_parallel.py index 36d805a32db7..98fe36d0fb79 100644 --- a/examples/offline_inference/data_parallel.py +++ b/examples/offline_inference/data_parallel.py @@ -87,6 +87,11 @@ def parse_args(): default=0.8, help=("Fraction of GPU memory vLLM is allowed to allocate (0.0, 1.0]."), ) + parser.add_argument( + "--enable-dbo", + action="store_true", + help=("Enable microbatched execution"), + ) parser.add_argument( "--compilation-config", type=int, @@ -113,6 +118,7 @@ def main( max_model_len, compilation_config, gpu_memory_utilization, + enable_dbo, quantization, ): os.environ["VLLM_DP_RANK"] = str(global_dp_rank) @@ -167,6 +173,7 @@ def start(rank): max_num_seqs=max_num_seqs, max_model_len=max_model_len, gpu_memory_utilization=gpu_memory_utilization, + enable_dbo=enable_dbo, quantization=quantization, compilation_config=compilation_config, ) @@ -227,6 +234,7 @@ def start(rank): args.max_model_len, args.compilation_config, args.gpu_memory_utilization, + args.enable_dbo, args.quantization, ), ) diff --git a/tests/v1/attention/test_attention_splitting.py b/tests/v1/attention/test_attention_splitting.py index 3fc1011d5042..c74dbb3ebb17 100644 --- a/tests/v1/attention/test_attention_splitting.py +++ b/tests/v1/attention/test_attention_splitting.py @@ -6,7 +6,7 @@ from tests.v1.attention.test_attention_backends import BATCH_SPECS from tests.v1.attention.utils import create_common_attn_metadata -from vllm.v1.attention.backends.utils import (UbatchSlice, +from vllm.v1.attention.backends.utils import (UBatchSlice, _make_metadata_with_slice, slice_query_start_locs, split_attn_metadata) @@ -106,7 +106,7 @@ def mixed_small_metadata(): def test_make_metadata_with_slice_decode_batch(small_decode_metadata): """Test slicing decode batch metadata""" # Split first request only - ubatch_slice = UbatchSlice(slice(0, 1), slice(0, 1)) + ubatch_slice = UBatchSlice(slice(0, 1), slice(0, 1)) result = _make_metadata_with_slice(ubatch_slice, small_decode_metadata) @@ -120,7 +120,7 @@ def test_make_metadata_with_slice_decode_batch(small_decode_metadata): def test_make_metadata_with_slice_mixed_batch(mixed_small_metadata): """Test slicing mixed batch metadata""" - ubatch_slice = UbatchSlice(slice(1, 3), + ubatch_slice = UBatchSlice(slice(1, 3), slice(1, 7)) # Requests 1-3, tokens 1-7 result = _make_metadata_with_slice(ubatch_slice, mixed_small_metadata) @@ -137,8 +137,8 @@ def test_split_attn_metadata_decode_batch(large_decode_metadata): num_tokens = large_decode_metadata.num_reqs mid_point = num_tokens // 2 ubatch_slices = [ - UbatchSlice(slice(0, mid_point), slice(0, mid_point)), - UbatchSlice(slice(mid_point, num_tokens), slice(mid_point, + UBatchSlice(slice(0, mid_point), slice(0, mid_point)), + UBatchSlice(slice(mid_point, num_tokens), slice(mid_point, num_tokens)), ] diff --git a/tests/v1/spec_decode/test_eagle.py b/tests/v1/spec_decode/test_eagle.py index ddedc61aae29..ccab04628a16 100644 --- a/tests/v1/spec_decode/test_eagle.py +++ b/tests/v1/spec_decode/test_eagle.py @@ -365,7 +365,9 @@ def create_deterministic_logits(token_ids): # Mock runner for attention metadata building proposer.runner = mock.MagicMock() proposer.runner.attn_groups.append([mock.MagicMock()]) - proposer.runner.attn_groups[0][0].metadata_builder = attn_metadata_builder + proposer.runner.attn_groups[0][0].metadata_builders = [ + attn_metadata_builder + ] result = proposer.propose(target_token_ids=target_token_ids, target_positions=target_positions, @@ -489,7 +491,9 @@ def create_deterministic_logits(token_ids, k: int): # Mock runner for attention metadata building. proposer.runner = mock.MagicMock() proposer.runner.attn_groups.append([mock.MagicMock()]) - proposer.runner.attn_groups[0][0].metadata_builder = attn_metadata_builder + proposer.runner.attn_groups[0][0].metadata_builders = [ + attn_metadata_builder + ] # Setup inputs for the proposer. target_token_ids = torch.randint(0, diff --git a/vllm/config/__init__.py b/vllm/config/__init__.py index 6bb0fef23719..535802585d18 100644 --- a/vllm/config/__init__.py +++ b/vllm/config/__init__.py @@ -2848,6 +2848,14 @@ def __post_init__(self): "when cudagraph_mode piecewise cudagraphs is used, "\ f"cudagraph_mode={self.compilation_config.cudagraph_mode}" + if self.parallel_config.enable_dbo: + a2a_backend = envs.VLLM_ALL2ALL_BACKEND + assert a2a_backend == "deepep_low_latency", \ + "Microbatching currently only supports the deepep_low_latency "\ + f"all2all backend. {a2a_backend} is not supported. To fix set "\ + "the VLLM_ALL2ALL_BACKEND environment variable to "\ + "deepep_low_latency and install the DeepEP kerenls." + if not self.instance_id: self.instance_id = random_uuid()[:5] diff --git a/vllm/config/parallel.py b/vllm/config/parallel.py index 231406bf6052..8e92e54a9678 100644 --- a/vllm/config/parallel.py +++ b/vllm/config/parallel.py @@ -137,6 +137,14 @@ class ParallelConfig: disable_custom_all_reduce: bool = False """Disable the custom all-reduce kernel and fall back to NCCL.""" + enable_dbo: bool = False + """Enable microbatching for the model executor.""" + + dbo_decode_token_threshold: int = 32 + """The threshold for microbatching. If the number of tokens in the + request is greater than this threshold, microbatching will be used. + Otherwise, the request will be processed in a single batch.""" + ray_workers_use_nsight: bool = False """Whether to profile Ray workers with nsight, see https://docs.ray.io/en/latest/ray-observability/user-guides/profiling.html#profiling-nsight-profiler.""" diff --git a/vllm/distributed/device_communicators/all2all.py b/vllm/distributed/device_communicators/all2all.py index 7c0f30b9aab8..427fd040fcb7 100644 --- a/vllm/distributed/device_communicators/all2all.py +++ b/vllm/distributed/device_communicators/all2all.py @@ -251,9 +251,4 @@ def get_handle(self, kwargs): logger.debug("DeepEP all2all args %s", buffer_kwargs) handle: deep_ep.Buffer = self.handle_cache.get_or_create( buffer_kwargs, deep_ep.Buffer) - # It is dangerous to set num sms outside this function. num_sms is not - # a part of the hash-key that identifies this object. If we are in a - # situation where we make objects with different num_sms, the hash key - # in get_or_create must be updated. - handle.set_num_sms(self.num_sms) return handle diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index 20d998d613d4..4831cb5348c7 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -327,6 +327,9 @@ class EngineArgs: data_parallel_hybrid_lb: bool = False data_parallel_backend: str = ParallelConfig.data_parallel_backend enable_expert_parallel: bool = ParallelConfig.enable_expert_parallel + enable_dbo: bool = ParallelConfig.enable_dbo + dbo_decode_token_threshold: int = \ + ParallelConfig.dbo_decode_token_threshold eplb_config: EPLBConfig = get_field(ParallelConfig, "eplb_config") enable_eplb: bool = ParallelConfig.enable_eplb expert_placement_strategy: ExpertPlacementStrategy = \ @@ -695,6 +698,11 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser: parallel_group.add_argument( "--enable-expert-parallel", **parallel_kwargs["enable_expert_parallel"]) + parallel_group.add_argument("--enable-dbo", + **parallel_kwargs["enable_dbo"]) + parallel_group.add_argument( + "--dbo-decode-token-threshold", + **parallel_kwargs["dbo_decode_token_threshold"]) parallel_group.add_argument("--enable-eplb", **parallel_kwargs["enable_eplb"]) parallel_group.add_argument("--eplb-config", @@ -1339,6 +1347,8 @@ def create_engine_config( data_parallel_backend=self.data_parallel_backend, data_parallel_hybrid_lb=self.data_parallel_hybrid_lb, enable_expert_parallel=self.enable_expert_parallel, + enable_dbo=self.enable_dbo, + dbo_decode_token_threshold=self.dbo_decode_token_threshold, enable_eplb=self.enable_eplb, eplb_config=self.eplb_config, expert_placement_strategy=self.expert_placement_strategy, diff --git a/vllm/forward_context.py b/vllm/forward_context.py index b3ddd7b9a739..3b535423f7bc 100644 --- a/vllm/forward_context.py +++ b/vllm/forward_context.py @@ -14,6 +14,7 @@ from vllm.config import CUDAGraphMode, ParallelConfig, VllmConfig from vllm.logger import init_logger from vllm.platforms import current_platform +from vllm.v1.worker.ubatch_utils import UBatchSlices, is_second_ubatch_empty if TYPE_CHECKING: from vllm.attention.backends.abstract import AttentionMetadata @@ -97,6 +98,53 @@ def num_tokens_across_dp(num_tokens: int, dp_size: int, dist.all_reduce(num_tokens_tensor, group=group) return num_tokens_tensor.cpu() + @staticmethod + def should_ubatch_across_dp( + should_ubatch: bool, orig_num_tokens_per_ubatch: int, + padded_num_tokens_per_ubatch: int, dp_size: int, + dp_rank: int) -> tuple[bool, Optional[torch.Tensor]]: + """ + 1. Decides if each DP rank is going to microbatch. Either all ranks + run with microbatching or none of them do. If this function decides + not to run with microbatching. It will "abort" meaning that no padding + information will be returned to the caller. It will return (False, None) + + 2. Determines the total number of tokens that each rank will run. + All ranks will be padded out so that the run with the same number + of tokens + + Returns: tuple[ + should_ubatch: Are all DP ranks going to microbatch + num_tokens_after_padding: A tensor containing the total number of + tokens per-microbatch for each DP rank including padding. Will be + None if should_ubatch if False + ] + """ + + device = current_platform.device_type + tensor = torch.zeros(3, dp_size, device=device, dtype=torch.int32) + tensor[0][dp_rank] = orig_num_tokens_per_ubatch + tensor[1][dp_rank] = padded_num_tokens_per_ubatch + tensor[2][dp_rank] = 1 if should_ubatch else 0 + + from vllm.distributed.parallel_state import get_dp_group + dist.all_reduce(tensor, group=get_dp_group().device_group) + + result: bool = bool(torch.all(tensor[2] == 1).item()) + if not result: + return result, None + + orig_num_tokens_tensor = tensor[0, :] + padded_num_tokens_tensor = tensor[1, :] + + orig_min_num_tokens = int(orig_num_tokens_tensor.min().item()) + padded_max_num_tokens = int(padded_num_tokens_tensor.max().item()) + if is_second_ubatch_empty(orig_min_num_tokens, padded_max_num_tokens): + logger.debug("Aborting ubatching %s %s", orig_min_num_tokens, + padded_max_num_tokens) + return False, None + return result, padded_num_tokens_tensor.cpu() + @staticmethod def make( parallel_config: ParallelConfig, @@ -119,14 +167,15 @@ def make( # If num_tokens_across_dp is None, it will be computed by all_reduce # Otherwise, num_tokens_across_dp[dp_rank] should be equal to batchsize - assert (num_tokens_across_dp is None - or num_tokens_across_dp[dp_rank] == batchsize) + assert (num_tokens_across_dp is None or num_tokens_across_dp[dp_rank] + == batchsize), f"{num_tokens_across_dp[dp_rank]} {batchsize}" if num_tokens_across_dp is None: num_tokens_across_dp = DPMetadata.num_tokens_across_dp( batchsize, dp_size, dp_rank) max_tokens_across_dp_cpu = torch.max(num_tokens_across_dp) cu_tokens_across_dp_cpu = torch.cumsum(num_tokens_across_dp, dim=0) - return DPMetadata(max_tokens_across_dp_cpu, cu_tokens_across_dp_cpu) + return DPMetadata(max_tokens_across_dp_cpu, cu_tokens_across_dp_cpu, + num_tokens_across_dp) @contextmanager def chunked_sizes(self, max_chunk_size_per_rank: int, chunk_idx: int): @@ -179,9 +228,12 @@ class ForwardContext: Type AttentionMetadata for v0, Type Dict[str, AttentionMetadata] for v1, map from layer_name of each attention layer to its attention metadata - set dynamically for each forward pass + Type List[Dict[str, AttentionMetadata]] for DBO. List of size two, one + for each microbatch. + Set dynamically for each forward pass """ - attn_metadata: Union["AttentionMetadata", dict[str, "AttentionMetadata"]] + attn_metadata: Union["AttentionMetadata", dict[str, "AttentionMetadata"], + list[dict[str, "AttentionMetadata"]]] # TODO: remove after making all virtual_engines share the same kv cache virtual_engine: int # set dynamically for each forward pass # set dynamically for each forward pass @@ -191,6 +243,8 @@ class ForwardContext: cudagraph_runtime_mode: CUDAGraphMode = CUDAGraphMode.NONE batch_descriptor: Optional[BatchDescriptor] = None + ubatch_slices: Optional[UBatchSlices] = None + def __post_init__(self): assert self.cudagraph_runtime_mode in [ CUDAGraphMode.NONE, CUDAGraphMode.PIECEWISE, CUDAGraphMode.FULL], \ @@ -208,6 +262,39 @@ def get_forward_context() -> ForwardContext: return _forward_context +def create_forward_context( + attn_metadata: Any, + vllm_config: VllmConfig, + virtual_engine: int = 0, + dp_metadata: Optional[DPMetadata] = None, + cudagraph_runtime_mode: CUDAGraphMode = CUDAGraphMode.NONE, + batch_descriptor: Optional[BatchDescriptor] = None, + ubatch_slices: Optional[UBatchSlices] = None): + return ForwardContext(no_compile_layers=vllm_config.compilation_config. + static_forward_context, + virtual_engine=virtual_engine, + attn_metadata=attn_metadata, + dp_metadata=dp_metadata, + cudagraph_runtime_mode=cudagraph_runtime_mode, + batch_descriptor=batch_descriptor, + ubatch_slices=ubatch_slices) + + +@contextmanager +def override_forward_context(forward_context: Optional[ForwardContext]): + """A context manager that overrides the current forward context. + This is used to override the forward context for a specific + forward pass. + """ + global _forward_context + prev_context = _forward_context + _forward_context = forward_context + try: + yield + finally: + _forward_context = prev_context + + @contextmanager def set_forward_context( attn_metadata: Any, @@ -216,7 +303,8 @@ def set_forward_context( num_tokens: Optional[int] = None, num_tokens_across_dp: Optional[torch.Tensor] = None, cudagraph_runtime_mode: CUDAGraphMode = CUDAGraphMode.NONE, - batch_descriptor: Optional[BatchDescriptor] = None): + batch_descriptor: Optional[BatchDescriptor] = None, + ubatch_slices: Optional[UBatchSlices] = None): """A context manager that stores the current forward context, can be attention metadata, etc. Here we can inject common logic for every model forward pass. @@ -225,6 +313,7 @@ def set_forward_context( need_to_track_batchsize = track_batchsize and attn_metadata is not None if need_to_track_batchsize: forward_start_time = time.perf_counter() + dp_metadata: Optional[DPMetadata] = None if vllm_config.parallel_config.data_parallel_size > 1 and ( attn_metadata is not None or num_tokens is not None): @@ -232,20 +321,14 @@ def set_forward_context( attn_metadata, num_tokens or 0, num_tokens_across_dp) - global _forward_context - prev_context = _forward_context - _forward_context = ForwardContext( - no_compile_layers=vllm_config.compilation_config. - static_forward_context, - virtual_engine=virtual_engine, - attn_metadata=attn_metadata, - dp_metadata=dp_metadata, - cudagraph_runtime_mode=cudagraph_runtime_mode, - batch_descriptor=batch_descriptor, - ) + forward_context = create_forward_context(attn_metadata, vllm_config, + virtual_engine, dp_metadata, + cudagraph_runtime_mode, + batch_descriptor, ubatch_slices) try: - yield + with override_forward_context(forward_context): + yield finally: global last_logging_time, batchsize_logging_interval if need_to_track_batchsize: @@ -282,5 +365,3 @@ def set_forward_context( logger.info(("Batchsize forward time stats " "(batchsize, count, median_time(ms)): %s"), forward_stats) - - _forward_context = prev_context diff --git a/vllm/model_executor/layers/fused_moe/deepep_ht_prepare_finalize.py b/vllm/model_executor/layers/fused_moe/deepep_ht_prepare_finalize.py index 2a3ae478f3ea..92cbb1742974 100644 --- a/vllm/model_executor/layers/fused_moe/deepep_ht_prepare_finalize.py +++ b/vllm/model_executor/layers/fused_moe/deepep_ht_prepare_finalize.py @@ -191,7 +191,7 @@ def prepare_async( expert_map: Optional[torch.Tensor], apply_router_weight_on_input: bool, quant_config: FusedMoEQuantConfig, - ) -> Callable: + ) -> tuple[Callable, mk.ReceiverType]: if apply_router_weight_on_input: topk = topk_ids.size(1) @@ -217,13 +217,14 @@ def prepare_async( a1q_scale = None a1_post_scale = a1_scale - return self._do_dispatch(tokens=a1q, - token_scales=a1q_scale, - rank_topk_ids=topk_ids, - rank_topk_weights=topk_weights, - num_experts=num_experts, - a1_scale=a1_post_scale, - quant_config=quant_config) + return (lambda *args: None, + self._do_dispatch(tokens=a1q, + token_scales=a1q_scale, + rank_topk_ids=topk_ids, + rank_topk_weights=topk_weights, + num_experts=num_experts, + a1_scale=a1_post_scale, + quant_config=quant_config)) def prepare( self, @@ -237,10 +238,11 @@ def prepare( apply_router_weight_on_input: bool, quant_config: FusedMoEQuantConfig, ) -> mk.PrepareResultType: - receiver = self.prepare_async(a1, a1_scale, a2_scale, topk_weights, - topk_ids, num_experts, expert_map, - apply_router_weight_on_input, - quant_config) + (_, receiver) = self.prepare_async(a1, a1_scale, a2_scale, + topk_weights, topk_ids, num_experts, + expert_map, + apply_router_weight_on_input, + quant_config) return receiver() def finalize( diff --git a/vllm/model_executor/layers/fused_moe/deepep_ll_prepare_finalize.py b/vllm/model_executor/layers/fused_moe/deepep_ll_prepare_finalize.py index 1849e49e0ab5..61f8297f0f14 100644 --- a/vllm/model_executor/layers/fused_moe/deepep_ll_prepare_finalize.py +++ b/vllm/model_executor/layers/fused_moe/deepep_ll_prepare_finalize.py @@ -11,6 +11,9 @@ TopKWeightAndReduceDelegate) from vllm.model_executor.layers.fused_moe.utils import ( moe_kernel_quantize_input, normalize_batched_scales_shape) +from vllm.v1.worker.ubatching import (dbo_current_ubatch_id, dbo_enabled, + dbo_maybe_run_recv_hook, + dbo_register_recv_hook, dbo_yield) # DeepEP kernels quantize dispatch inputs in 128 element chunks. DEEPEP_QUANT_BLOCK_SIZE = 128 @@ -55,7 +58,7 @@ def __init__(self, # The dispatch function returns a handle that the combine function # requires. We store the handle here so it is available to the # combine function. - self.handle = None + self.handles: list[Optional[tuple]] = [None, None] self.num_dispatchers_ = num_dispatchers def num_dispatchers(self) -> int: @@ -123,13 +126,15 @@ def prepare_async( expert_map: Optional[torch.Tensor], apply_router_weight_on_input: bool, quant_config: FusedMoEQuantConfig, - ) -> mk.ReceiverType: + ) -> tuple[Callable, mk.ReceiverType]: hidden_size = a1.size(1) assert hidden_size in self.SUPPORTED_HIDDEN_SIZES, \ (f"Hidden Size {hidden_size} not in supported list of hidden sizes" f"{self.SUPPORTED_HIDDEN_SIZES}") + a2a_idx = dbo_current_ubatch_id() + if self.use_fp8_dispatch: assert hidden_size % 128 == 0, \ "DeepEP kernels quantize the inputs in blocks of shape 128" @@ -148,7 +153,7 @@ def prepare_async( a1 = a1 * topk_weights.to(a1.dtype) # Dispatch - expert_x, expert_num_tokens, self.handle, event, hook = \ + expert_x, expert_num_tokens, handle, _, hook= \ self.buffer.low_latency_dispatch(a1, topk_ids, self.max_tokens_per_rank, @@ -156,21 +161,19 @@ def prepare_async( use_fp8=self.use_fp8_dispatch, async_finish=False, return_recv_hook=True) + self.handles[a2a_idx] = handle - return lambda: self._receiver(hook, expert_x, expert_num_tokens, - a1_scale, a1.dtype, quant_config) + return (hook, lambda: self._receiver(expert_x, expert_num_tokens, + a1_scale, a1.dtype, quant_config)) def _receiver( self, - hook: Callable, expert_x: Union[torch.Tensor, tuple[torch.Tensor, torch.Tensor]], expert_num_tokens: torch.Tensor, a1_scale, a1_dtype, quant_config: FusedMoEQuantConfig, ) -> mk.PrepareResultType: - hook() - expert_x, expert_x_scale = self._do_quant( expert_x, a1_scale, a1_dtype, quant_config.quant_dtype, quant_config.per_act_token_quant, quant_config.block_shape) @@ -192,10 +195,12 @@ def prepare( apply_router_weight_on_input: bool, quant_config: FusedMoEQuantConfig, ) -> mk.PrepareResultType: - receiver = self.prepare_async(a1, a1_scale, a2_scale, topk_weights, - topk_ids, num_experts, expert_map, - apply_router_weight_on_input, - quant_config) + hook, receiver = self.prepare_async(a1, a1_scale, a2_scale, + topk_weights, topk_ids, + num_experts, expert_map, + apply_router_weight_on_input, + quant_config) + hook() return receiver() def finalize( @@ -210,7 +215,11 @@ def finalize( assert isinstance( weight_and_reduce_impl, TopKWeightAndReduceDelegate ), ("Weight application and reduction happens in the combine kernel.") - assert self.handle is not None + + a2a_idx = dbo_current_ubatch_id() + do_recv_hook = dbo_enabled() + handle = self.handles[a2a_idx] + assert handle is not None combine_topk_weights = topk_weights if apply_router_weight_on_input: @@ -218,12 +227,16 @@ def finalize( combine_topk_weights = torch.ones_like(topk_weights) # TODO (varun) : Enable zero copy mode - _, event, hook = self.buffer.low_latency_combine( + dbo_maybe_run_recv_hook() + _, _, recv_hook = self.buffer.low_latency_combine( fused_expert_output, topk_ids, combine_topk_weights, - self.handle, + handle, async_finish=False, zero_copy=False, - return_recv_hook=False, + return_recv_hook=do_recv_hook, out=output) + if recv_hook is not None: + dbo_register_recv_hook(recv_hook) + dbo_yield() diff --git a/vllm/model_executor/layers/fused_moe/layer.py b/vllm/model_executor/layers/fused_moe/layer.py index c62897c91816..d22bb253f4a7 100644 --- a/vllm/model_executor/layers/fused_moe/layer.py +++ b/vllm/model_executor/layers/fused_moe/layer.py @@ -38,6 +38,7 @@ from vllm.platforms.interface import CpuArchEnum from vllm.utils import (cdiv, direct_register_custom_op, has_deep_ep, has_pplx, round_up) +from vllm.v1.worker.ubatching import dbo_current_ubatch_id if current_platform.is_cuda_alike(): from .fused_batched_moe import BatchedTritonExperts @@ -992,16 +993,28 @@ def __init__( if (self.moe_parallel_config.use_pplx_kernels or self.moe_parallel_config.use_deepep_ll_kernels or self.moe_config.use_flashinfer_cutlass_kernels): - self.batched_hidden_states = torch.zeros( - (moe.max_num_tokens, self.hidden_size), - dtype=moe.in_dtype, - device=torch.cuda.current_device()) + if vllm_config.parallel_config.enable_dbo: + self.batched_hidden_states = torch.zeros( + (2, moe.max_num_tokens, self.hidden_size), + dtype=moe.in_dtype, + device=torch.cuda.current_device()) + + # Note here we use `num_experts` which is logical expert count + self.batched_router_logits = torch.zeros( + (2, moe.max_num_tokens, num_experts), + dtype=moe.in_dtype, + device=torch.cuda.current_device()) + else: + self.batched_hidden_states = torch.zeros( + (moe.max_num_tokens, self.hidden_size), + dtype=moe.in_dtype, + device=torch.cuda.current_device()) - # Note here we use `num_experts` which is logical expert count - self.batched_router_logits = torch.zeros( - (moe.max_num_tokens, num_experts), - dtype=moe.in_dtype, - device=torch.cuda.current_device()) + # Note here we use `num_experts` which is logical expert count + self.batched_router_logits = torch.zeros( + (moe.max_num_tokens, num_experts), + dtype=moe.in_dtype, + device=torch.cuda.current_device()) @property def shared_experts(self) -> Optional[torch.nn.Module]: @@ -1708,14 +1721,29 @@ def process_chunk(chunk_start, chunk_end, skip_result_store=False): hidden_states = full_hidden_states[chunk_start:chunk_end, :] router_logits = full_router_logits[chunk_start:chunk_end, :] - assert (self.batched_hidden_states.size(0) # type: ignore + assert self.batched_hidden_states is not None + assert self.batched_router_logits is not None + # This is only true when DBO has been enabled in the config. + # Both tensors will have an outer dimension for the ubatch id + if self.batched_hidden_states.dim() == 3: + assert self.batched_router_logits.dim() == 3 + batch_buffer_idx = dbo_current_ubatch_id() + batched_hidden_states = self.batched_hidden_states[ + batch_buffer_idx, :] + batched_router_logits = self.batched_router_logits[ + batch_buffer_idx, :] + else: + batched_hidden_states = self.batched_hidden_states + batched_router_logits = self.batched_router_logits + + assert (batched_hidden_states.size(0) # type: ignore >= chunk_size) - assert (self.batched_router_logits.size(0) # type: ignore + assert (batched_router_logits.size(0) # type: ignore >= chunk_size) - staged_hidden_states = self.batched_hidden_states[: - chunk_size, :] # type: ignore - staged_router_logits = self.batched_router_logits[: - chunk_size, :] # type: ignore + staged_hidden_states = batched_hidden_states[: + chunk_size, :] # type: ignore + staged_router_logits = batched_router_logits[: + chunk_size, :] # type: ignore staged_hidden_states.copy_(hidden_states, non_blocking=True) staged_router_logits.copy_(router_logits, non_blocking=True) diff --git a/vllm/model_executor/layers/fused_moe/modular_kernel.py b/vllm/model_executor/layers/fused_moe/modular_kernel.py index 281563c3bfca..33799b58d199 100644 --- a/vllm/model_executor/layers/fused_moe/modular_kernel.py +++ b/vllm/model_executor/layers/fused_moe/modular_kernel.py @@ -13,6 +13,8 @@ from vllm.model_executor.layers.fused_moe.utils import ( # yapf: disable _resize_cache, count_expert_num_tokens) from vllm.utils import cdiv +from vllm.v1.worker.ubatching import (dbo_enabled, dbo_maybe_run_recv_hook, + dbo_register_recv_hook, dbo_yield) # # This file defines a set of base classes used to make MoE kernels more modular. @@ -226,7 +228,7 @@ def prepare_async( expert_map: Optional[torch.Tensor], apply_router_weight_on_input: bool, quant_config: FusedMoEQuantConfig, - ) -> ReceiverType: + ) -> tuple[Callable, ReceiverType]: """ Perform any quantization (and/or) dispatching needed for this kernel but do not wait for results from other workers. @@ -496,6 +498,23 @@ def _chunk_scales(scales: Optional[torch.Tensor], start: int, return None +class SharedResizableBuffer: + + def __init__(self): + self.buffer = None + + def get(self, shape: tuple[int, ...], device: torch.device, + dtype: torch.dtype): + shape_numel = prod(shape) + if self.buffer is None or self.buffer.numel() < shape_numel: + self.buffer = torch.empty(shape_numel, device=device, dtype=dtype) + assert self.buffer.device == device, \ + f"Buffer device mismatch: {self.buffer.device} != {device}" + assert self.buffer.dtype == dtype, \ + f"Buffer dtype mismatch: {self.buffer.dtype} != {dtype}" + return self.buffer[:shape_numel].view(*shape) + + @final class FusedMoEModularKernel(torch.nn.Module): """ @@ -509,6 +528,9 @@ class FusedMoEModularKernel(torch.nn.Module): layer due to any layer specific state that may be used by the component objects. """ + fused_out_buffer = SharedResizableBuffer() + workspace13_buffer = SharedResizableBuffer() + workspace2_buffer = SharedResizableBuffer() def __init__( self, @@ -559,12 +581,12 @@ def _do_fused_experts( # We can reuse the memory between cache1 and cache3 because by the # time we need cache3, we're done with cache1. - workspace13 = torch.empty(prod(workspace13_shape), - device=a1.device, - dtype=workspace_dtype) - workspace2 = torch.empty(prod(workspace2_shape), - device=a1.device, - dtype=workspace_dtype) + workspace13 = self.workspace13_buffer.get(workspace13_shape, + device=a1.device, + dtype=workspace_dtype) + workspace2 = self.workspace2_buffer.get(workspace2_shape, + device=a1.device, + dtype=workspace_dtype) assert fused_out is None or fused_out.shape == fused_out_shape, ( f"fused_out {fused_out.shape} but expected {fused_out_shape}") @@ -656,9 +678,9 @@ def _maybe_chunk_fused_experts( (_, _, fused_out_shape, _) = self.fused_experts.workspace_shapes( a1, a1q, M, N, K, top_k, global_num_experts, local_num_experts, expert_tokens_meta) - fused_out = torch.empty(fused_out_shape, - device=a1q.device, - dtype=a1.dtype) + fused_out = self.fused_out_buffer.get(fused_out_shape, + device=a1q.device, + dtype=a1.dtype) def slice_input_tensors( chunk_idx: int @@ -801,8 +823,10 @@ def forward( shared_output: torch.Tensor - if (not self.prepare_finalize.supports_async() - or self.shared_experts is None): + if not self.prepare_finalize.supports_async(): + # We shouldn't be running an a2a kernel that doesn't + # support async prepare/finalize + assert not dbo_enabled() # Run shared experts serially with dispatch. if self.shared_experts is not None: @@ -822,7 +846,8 @@ def forward( ) else: # Overlap shared expert compute with all2all dispatch. - receiver = self.prepare_finalize.prepare_async( + dbo_maybe_run_recv_hook() + hook, receiver = self.prepare_finalize.prepare_async( a1, a1_scale, a2_scale, @@ -834,8 +859,16 @@ def forward( self.fused_experts.quant_config, ) - assert self.shared_experts is not None - shared_output = self.shared_experts(a1) + if self.shared_experts is not None: + shared_output = self.shared_experts(a1) + + # If DBO is being used, register the hook with the ubatch context + # and call it in dbo_maybe_run_recv_hook instead of passing it to + # the receiver. + dbo_register_recv_hook(hook) + dbo_yield() + if not dbo_enabled(): + hook() (a1q, a1q_scale, expert_tokens_meta, _expert_topk_ids, _expert_topk_weights) = receiver() diff --git a/vllm/model_executor/layers/fused_moe/pplx_prepare_finalize.py b/vllm/model_executor/layers/fused_moe/pplx_prepare_finalize.py index 2ae79e69f555..b8c1c14317c4 100644 --- a/vllm/model_executor/layers/fused_moe/pplx_prepare_finalize.py +++ b/vllm/model_executor/layers/fused_moe/pplx_prepare_finalize.py @@ -1,6 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -from typing import Optional, Union +from typing import Callable, Optional, Union import pplx_kernels as pplx import torch @@ -103,7 +103,7 @@ def prepare_async( expert_map: Optional[torch.Tensor], apply_router_weight_on_input: bool, quant_config: FusedMoEQuantConfig, - ) -> mk.ReceiverType: + ) -> tuple[Callable, mk.ReceiverType]: num_tokens = a1.size(0) # M hidden_dim = a1.size(-1) # K @@ -214,41 +214,33 @@ def prepare_async( do_recv=False, ) - return lambda: self._receiver( + hook = lambda: self.a2a.dispatch( + out_expert_num_tokens=expert_num_tokens, + out_expert_x=expert_x, + out_expert_x_scale=expert_x_scale, + dp_x=a1q, + dp_x_scale=a1q_scale, + indices=topk_ids, + bound_m=bound_m, + do_send=False, + do_recv=True, + ) + + return (hook, lambda: self._receiver( expert_num_tokens, expert_x, expert_x_scale, - a1q, - a1q_scale, - topk_ids, - bound_m, orig_a_scale_block_shape, - ) + )) def _receiver( self, expert_num_tokens: torch.Tensor, expert_x: torch.Tensor, expert_x_scale: Optional[torch.Tensor], - a1q: torch.Tensor, - a1q_scale: Optional[torch.Tensor], - topk_ids: torch.Tensor, - bound_m: Optional[torch.Tensor], orig_a_scale_block_shape: Optional[int], ) -> mk.PrepareResultType: - self.a2a.dispatch( - out_expert_num_tokens=expert_num_tokens, - out_expert_x=expert_x, - out_expert_x_scale=expert_x_scale, - dp_x=a1q, - dp_x_scale=a1q_scale, - indices=topk_ids, - bound_m=bound_m, - do_send=False, - do_recv=True, - ) - if expert_x_scale is not None: expert_x_scale = expert_x_scale[:, :, :orig_a_scale_block_shape] assert expert_x_scale.ndim == 3 @@ -270,7 +262,7 @@ def prepare( apply_router_weight_on_input: bool, quant_config: FusedMoEQuantConfig, ) -> mk.PrepareResultType: - receiver = self.prepare_async( + hook, receiver = self.prepare_async( a1, a1_scale, a2_scale, @@ -281,6 +273,7 @@ def prepare( apply_router_weight_on_input, quant_config, ) + hook() return receiver() def finalize( diff --git a/vllm/v1/attention/backends/utils.py b/vllm/v1/attention/backends/utils.py index ead70c910a8f..63326d19194f 100644 --- a/vllm/v1/attention/backends/utils.py +++ b/vllm/v1/attention/backends/utils.py @@ -28,6 +28,7 @@ get_kv_connector_cache_layout) from vllm.logger import init_logger from vllm.v1.kv_cache_interface import AttentionSpec +from vllm.v1.worker.ubatch_utils import UBatchSlice logger = init_logger(__name__) KVCacheLayoutType = Literal["NHD", "HND"] @@ -81,12 +82,6 @@ class CommonAttentionMetadata: encoder_seq_lens: Optional[np.ndarray] = None -@dataclass -class UbatchSlice: - request_slice: slice - token_slice: slice - - def slice_query_start_locs( query_start_loc: torch.Tensor, request_slice: slice, @@ -103,7 +98,7 @@ def slice_query_start_locs( def _make_metadata_with_slice( - ubatch_slice: UbatchSlice, + ubatch_slice: UBatchSlice, attn_metadata: CommonAttentionMetadata) -> CommonAttentionMetadata: """ This function creates a new CommonAttentionMetadata that corresponds to @@ -133,6 +128,11 @@ def _make_metadata_with_slice( torch.max(torch.abs(query_start_loc_cpu[1:] - query_start_loc_cpu[:-1])).item()) + # This is to account for the case where we are in a dummy + # run and query_start_loc_cpu is full of 0s + if max_query_len == 0: + max_query_len = attn_metadata.max_query_len + block_table_tensor = attn_metadata.block_table_tensor[request_slice] slot_mapping = attn_metadata.slot_mapping[token_slice] @@ -152,12 +152,12 @@ def _make_metadata_with_slice( def split_attn_metadata( - ubatch_slices: list[UbatchSlice], + ubatch_slices: list[UBatchSlice], common_attn_metadata: CommonAttentionMetadata, ) -> list[CommonAttentionMetadata]: """ Creates a new CommonAttentionMetadata instance that corresponds to the - requests for each UbatchSlice in ubatch_slices. + requests for each UBatchSlice in ubatch_slices. Note: This function does not modify common_attn_metadata """ diff --git a/vllm/v1/spec_decode/eagle.py b/vllm/v1/spec_decode/eagle.py index 7132d507c722..5154b29405b6 100644 --- a/vllm/v1/spec_decode/eagle.py +++ b/vllm/v1/spec_decode/eagle.py @@ -27,6 +27,7 @@ from vllm.v1.attention.backends.utils import CommonAttentionMetadata from vllm.v1.kv_cache_interface import KVCacheConfig from vllm.v1.sample.metadata import SamplingMetadata +from vllm.v1.worker.ubatching import dbo_current_ubatch_id logger = init_logger(__name__) @@ -179,9 +180,11 @@ def propose( assert self.runner is not None # FIXME: need to consider multiple kv_cache_groups - attn_metadata = self.runner.attn_groups[0][0].metadata_builder\ - .build_for_drafting(common_attn_metadata=common_attn_metadata, - draft_index=0) + ubatch_id = dbo_current_ubatch_id() + attn_metadata_builder = \ + self.runner.attn_groups[0][0].metadata_builders[ubatch_id] + attn_metadata = attn_metadata_builder.build_for_drafting( + common_attn_metadata=common_attn_metadata, draft_index=0) # At this moment, we assume all eagle layers belong to the same KV # cache group, thus using the same attention metadata. @@ -355,8 +358,9 @@ def propose_tree( hidden_states: torch.Tensor, common_attn_metadata: CommonAttentionMetadata, ) -> list[torch.Tensor]: + ubatch_id = dbo_current_ubatch_id() tree_attn_metadata_builder = \ - self.runner.attn_groups[0][0].metadata_builder + self.runner.attn_groups[0][0].metadata_builders[ubatch_id] assert isinstance(tree_attn_metadata_builder, TreeAttentionMetadataBuilder) diff --git a/vllm/v1/worker/cpu_model_runner.py b/vllm/v1/worker/cpu_model_runner.py index d5ec19b86b06..619ed88ab5b2 100644 --- a/vllm/v1/worker/cpu_model_runner.py +++ b/vllm/v1/worker/cpu_model_runner.py @@ -64,8 +64,13 @@ def _may_reorder_batch(self, scheduler_output: "SchedulerOutput") -> None: if not self.attn_groups[0]: return - mb = getattr(self.attn_groups[0][0], "metadata_builder", None) - if not isinstance(mb, TorchSDPAMetadataBuilderV1): + mb = getattr(self.attn_groups[0][0], "metadata_builders", None) + if isinstance(mb, list): + if not isinstance(mb[0], TorchSDPAMetadataBuilderV1): + return + mb[0].reorder_batch(self.input_batch, scheduler_output) + return + elif not isinstance(mb, TorchSDPAMetadataBuilderV1): # Encoder-only / rerank models do not benefit from reordering, # so we safely skip here. return diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index d4d1f814afc0..2ae748dee43c 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -15,6 +15,7 @@ import torch.distributed import torch.nn as nn from tqdm import tqdm +from typing_extensions import TypeAlias import vllm.envs as envs from vllm.attention import Attention, AttentionType @@ -55,11 +56,12 @@ from vllm.utils import (STR_DTYPE_TO_TORCH_DTYPE, DeviceMemoryProfiler, GiB_bytes, LazyLoader, check_use_alibi, get_dtype_size, is_pin_memory_available, round_up, supports_dynamo) +from vllm.v1.attention.backends.flash_attn import AttentionMetadata from vllm.v1.attention.backends.gdn_attn import GDNAttentionMetadataBuilder from vllm.v1.attention.backends.utils import ( AttentionCGSupport, AttentionMetadataBuilder, CommonAttentionMetadata, create_fast_prefill_custom_backend, - reorder_batch_to_split_decodes_and_prefills) + reorder_batch_to_split_decodes_and_prefills, split_attn_metadata) from vllm.v1.cudagraph_dispatcher import CudagraphDispatcher # yapf conflicts with isort for this block # yapf: disable @@ -85,9 +87,12 @@ from vllm.v1.spec_decode.ngram_proposer import NgramProposer from vllm.v1.utils import CpuGpuBuffer, record_function_or_nullcontext from vllm.v1.worker.gpu_input_batch import CachedRequestState, InputBatch +from vllm.v1.worker.gpu_ubatch_wrapper import UBatchWrapper from vllm.v1.worker.kv_connector_model_runner_mixin import ( KVConnectorModelRunnerMixin) from vllm.v1.worker.lora_model_runner_mixin import LoRAModelRunnerMixin +from vllm.v1.worker.ubatch_splitting import get_dp_padding_ubatch, ubatch_split +from vllm.v1.worker.ubatch_utils import UBatchSlice, UBatchSlices from vllm.v1.worker.utils import is_residual_scattered_for_sp from .utils import (AttentionGroup, MultiModalBudget, @@ -105,6 +110,11 @@ logger = init_logger(__name__) +AttnMetadataDict: TypeAlias = dict[str, AttentionMetadata] +# list when ubatching is enabled +PerLayerAttnMetadata: TypeAlias = Union[list[AttnMetadataDict], + AttnMetadataDict] + # Wrapper for ModelRunnerOutput to support overlapped execution. class AsyncGPUModelRunnerOutput(AsyncModelRunnerOutput): @@ -274,6 +284,7 @@ def __init__( # Request states. self.requests: dict[str, CachedRequestState] = {} + self.comm_stream = torch.cuda.Stream() # Input Batch # NOTE(Chen): Ideally, we should initialize the input batch inside @@ -872,10 +883,11 @@ def _get_encoder_seq_lens( return encoder_seq_lens def _prepare_inputs( - self, - scheduler_output: "SchedulerOutput", - ) -> tuple[dict[str, Any], torch.Tensor, Optional[SpecDecodeMetadata], - np.ndarray, Optional[CommonAttentionMetadata], int]: + self, scheduler_output: "SchedulerOutput" + ) -> tuple[PerLayerAttnMetadata, torch.Tensor, + Optional[SpecDecodeMetadata], np.ndarray, + Optional[CommonAttentionMetadata], int, Optional[UBatchSlices], + Optional[torch.Tensor]]: """ :return: tuple[ attn_metadata: layer-to-attention_metadata mapping, @@ -947,6 +959,15 @@ def _prepare_inputs( self.query_start_loc.copy_to_gpu() query_start_loc = self.query_start_loc.gpu[:num_reqs + 1] + num_tokens_unpadded = scheduler_output.total_num_scheduled_tokens + num_tokens_padded = num_tokens_unpadded + self.get_local_padding( + num_tokens_unpadded) + ubatch_slices, num_tokens_after_padding = \ + ubatch_split(max_num_scheduled_tokens, + num_tokens_unpadded, + num_tokens_padded, + self.vllm_config) + self.seq_lens.np[:num_reqs] = ( self.input_batch.num_computed_tokens_cpu[:num_reqs] + num_scheduled_tokens) @@ -1001,7 +1022,9 @@ def _prepare_inputs( logits_indices_padded = self._prepare_kv_sharing_fast_prefill( logits_indices) - attn_metadata: dict[str, Any] = {} + attn_metadata: PerLayerAttnMetadata = {} + if ubatch_slices is not None: + attn_metadata = [dict() for _ in range(len(ubatch_slices))] # Used in the below loop. query_start_loc_cpu = self.query_start_loc.cpu[:num_reqs + 1] @@ -1075,7 +1098,7 @@ def _prepare_inputs( for attn_group in self.attn_groups[kv_cache_group_id]: # Prepare for cascade attention if enabled & beneficial. common_prefix_len = 0 - builder = attn_group.metadata_builder + builder = attn_group.get_metadata_builder() if self.cascade_attn_enabled: common_prefix_len = self._compute_cascade_attn_prefix_len( num_scheduled_tokens, @@ -1093,13 +1116,27 @@ def _prepare_inputs( num_draft_tokens=self.num_draft_tokens.gpu[:num_reqs], ) - attn_metadata_i = builder.build( - common_prefix_len=common_prefix_len, - common_attn_metadata=common_attn_metadata, - **extra_attn_metadata_args) - - for layer_name in attn_group.layer_names: - attn_metadata[layer_name] = attn_metadata_i + if ubatch_slices is not None: + common_attn_metadata_list = split_attn_metadata( + ubatch_slices, common_attn_metadata) + for ubid, common_attn_metadata in enumerate( + common_attn_metadata_list): + assert common_attn_metadata.max_query_len == 1 + attn_metadata_i = (attn_group.get_metadata_builder( + ubatch_id=ubid).build( + common_prefix_len=common_prefix_len, + common_attn_metadata=common_attn_metadata)) + for layer_name in kv_cache_group_spec.layer_names: + assert type(attn_metadata) is list + attn_metadata[ubid][layer_name] = attn_metadata_i + else: + assert isinstance(attn_metadata, dict) + attn_metadata_i = builder.build( + common_prefix_len=common_prefix_len, + common_attn_metadata=common_attn_metadata, + **extra_attn_metadata_args) + for layer_name in attn_group.layer_names: + attn_metadata[layer_name] = attn_metadata_i # Hot-Swap lora model if self.lora_config: @@ -1107,7 +1144,8 @@ def _prepare_inputs( return (attn_metadata, logits_indices, spec_decode_metadata, num_scheduled_tokens, spec_decode_common_attn_metadata, - max_num_scheduled_tokens) + max_num_scheduled_tokens, ubatch_slices, + num_tokens_after_padding) def _compute_cascade_attn_prefix_len( self, @@ -1508,7 +1546,7 @@ def _extract_encoder_inputs( def get_model(self) -> nn.Module: # get raw model out of the cudagraph wrapper. - if isinstance(self.model, CUDAGraphWrapper): + if isinstance(self.model, (CUDAGraphWrapper, UBatchWrapper)): return self.model.unwrap() return self.model @@ -1675,6 +1713,17 @@ def eplb_step(self, def get_dp_padding(self, num_tokens: int) -> tuple[int, Optional[torch.Tensor]]: + """ + Determines the total number of tokens that each rank will run. + All ranks will be padded out so that they run with the same number + of tokens + + Returns: tuple[ + num_pad_tokens: The number of tokens that will be added to the batch + num_tokens_after_padding: A tensor containing the total number of + tokens for each DP rank including padding. + ] + """ dp_size = self.vllm_config.parallel_config.data_parallel_size dp_rank = self.vllm_config.parallel_config.data_parallel_rank @@ -1698,6 +1747,39 @@ def get_dp_padding(self, dtype=torch.int32) return max_tokens_across_dp_cpu - num_tokens, num_tokens_after_padding + def get_local_padding(self, num_tokens_unpadded: int) -> int: + + num_tokens_padded = num_tokens_unpadded + + if (self.compilation_config.cudagraph_mode != CUDAGraphMode.NONE + and num_tokens_unpadded <= self.cudagraph_batch_sizes[-1]): + # Use piecewise CUDA graphs. + # Add padding to the batch size. + num_tokens_padded = self.vllm_config.pad_for_cudagraph( + num_tokens_unpadded) + else: + # Eager mode. + # Pad tokens to multiple of tensor_parallel_size when + # enabled collective fusion for SP + tp_size = self.vllm_config.parallel_config.tensor_parallel_size + if self.vllm_config.compilation_config.pass_config. \ + enable_sequence_parallelism and tp_size > 1: + num_tokens_padded = round_up(num_tokens_unpadded, tp_size) + + num_pad_tokens = num_tokens_padded - num_tokens_unpadded + return num_pad_tokens + + # This is where the second ubatch is adjusted to account for the padding. + # Should be called after attention metadata creation. This just pads + # the second ubatch slice out to the total number of tokens + # (num_tokens + padding) + def pad_out_ubatch_slice(self, ubatch_slices: UBatchSlices, + num_total_tokens: int): + padded_second_ubatch_slice = slice(ubatch_slices[1].token_slice.start, + num_total_tokens) + ubatch_slices[1] = UBatchSlice(padded_second_ubatch_slice, + padded_second_ubatch_slice) + def _pool( self, hidden_states: torch.Tensor, @@ -1758,15 +1840,22 @@ def _preprocess( self, scheduler_output: "SchedulerOutput", intermediate_tensors: Optional[IntermediateTensors] = None, + ubatch_slices: Optional[UBatchSlices] = None, + num_tokens_after_padding: Optional[torch.Tensor] = None, ) -> tuple[int, int, Optional[torch.Tensor], Optional[torch.Tensor], Optional[torch.Tensor], torch.Tensor, Optional[IntermediateTensors], dict[str, Any]]: num_scheduled_tokens = scheduler_output.total_num_scheduled_tokens - num_input_tokens = self._get_num_input_tokens(num_scheduled_tokens) - # Padding for DP - num_pad, num_tokens_across_dp = self.get_dp_padding(num_input_tokens) - num_input_tokens += num_pad + if ubatch_slices: + assert num_tokens_after_padding is not None + num_input_tokens = int(num_tokens_after_padding[0].item() * 2) + self.pad_out_ubatch_slice(ubatch_slices, num_input_tokens) + elif ubatch_slices is None: + num_input_tokens = self._get_num_input_tokens(num_scheduled_tokens) + num_pad, num_tokens_after_padding = self.get_dp_padding( + num_input_tokens) + num_input_tokens += num_pad # _prepare_inputs may reorder the batch, so we must gather multi # modal outputs after that to ensure the correct order @@ -1821,7 +1910,7 @@ def _preprocess( return ( num_scheduled_tokens, num_input_tokens, - num_tokens_across_dp, + num_tokens_after_padding, input_ids, inputs_embeds, positions, @@ -2027,7 +2116,8 @@ def execute_model( # Prepare the decoder inputs. (attn_metadata, logits_indices, spec_decode_metadata, num_scheduled_tokens_np, spec_decode_common_attn_metadata, - max_query_len) = self._prepare_inputs(scheduler_output) + max_query_len, ubatch_slices, num_tokens_after_padding + ) = self._prepare_inputs(scheduler_output) finally: if self.prepare_inputs_event is not None: @@ -2042,7 +2132,11 @@ def execute_model( positions, intermediate_tensors, model_kwargs, - ) = self._preprocess(scheduler_output, intermediate_tensors) + ) = self._preprocess(scheduler_output, intermediate_tensors, + ubatch_slices, num_tokens_after_padding) + + if ubatch_slices is not None: + num_input_tokens = num_input_tokens // 2 uniform_decode = (max_query_len == self.uniform_decode_query_len) and ( @@ -2062,6 +2156,7 @@ def execute_model( num_tokens_across_dp=num_tokens_across_dp, cudagraph_runtime_mode=cudagraph_runtime_mode, batch_descriptor=batch_descriptor, + ubatch_slices=ubatch_slices, ), record_function_or_nullcontext("Forward"), self.maybe_get_kv_connector_output(scheduler_output) as kv_connector_output): @@ -2441,10 +2536,18 @@ def load_model(self, eep_scale_up: bool = False) -> None: # CudagraphWraper and CudagraphDispatcher of vllm. # wrap the model with full cudagraph wrapper if needed. - if self.compilation_config.cudagraph_mode.has_full_cudagraphs(): + if self.compilation_config.cudagraph_mode.has_full_cudagraphs() \ + and not self.parallel_config.enable_dbo: self.model = CUDAGraphWrapper(self.model, self.vllm_config, runtime_mode=CUDAGraphMode.FULL) + elif self.parallel_config.enable_dbo: + if self.compilation_config.cudagraph_mode.has_full_cudagraphs(): + self.model = UBatchWrapper(self.model, self.vllm_config, + CUDAGraphMode.FULL, self.device) + else: + self.model = UBatchWrapper(self.model, self.vllm_config, + CUDAGraphMode.NONE, self.device) def reload_weights(self) -> None: assert getattr(self, "model", None) is not None, \ @@ -2642,6 +2745,7 @@ def _dummy_run( cudagraph_runtime_mode: CUDAGraphMode = CUDAGraphMode.NONE, force_attention: bool = False, uniform_decode: bool = False, + allow_microbatching: bool = False, skip_eplb: bool = False, is_profile: bool = False, create_mixed_batch: bool = False, @@ -2667,12 +2771,30 @@ def _dummy_run( (1 token) and prefill (multiple tokens) requests. remove_lora: If False, dummy LoRAs are not destroyed after the run """ + ubatch_enabled = self.parallel_config.enable_dbo + num_tokens_across_dp = None + num_pad = 0 + should_ubatch = False + if ubatch_enabled: + should_ubatch = num_tokens >= \ + self.parallel_config.dbo_decode_token_threshold and \ + allow_microbatching + + (should_ubatch, num_tokens_across_dp) = get_dp_padding_ubatch( + num_tokens, num_tokens, should_ubatch, self.vllm_config) + + # Currently the dummy run should only be ubatching during + # cuda graph capture, meaning all DP ranks should already + # have the same batch size + if num_tokens_across_dp is not None: + assert int(num_tokens_across_dp[0]) == num_tokens // 2 + assert cudagraph_runtime_mode in { CUDAGraphMode.NONE, CUDAGraphMode.PIECEWISE, CUDAGraphMode.FULL } - # Padding for DP - num_pad, num_tokens_across_dp = self.get_dp_padding(num_tokens) + if not should_ubatch: + num_pad, num_tokens_across_dp = self.get_dp_padding(num_tokens) num_tokens += num_pad # If cudagraph_mode.decode_mode() == FULL and @@ -2690,6 +2812,10 @@ def _dummy_run( # for GQA/MQA. max_query_len = self.uniform_decode_query_len if uniform_decode else \ num_tokens + if allow_microbatching: + assert self.uniform_decode_query_len == 1 + assert uniform_decode is True + assert max_query_len == 1 # Set num_scheduled_tokens based on num_tokens and max_num_seqs # for dummy run with LoRA so that the num_reqs collectively @@ -2728,12 +2854,28 @@ def _dummy_run( num_scheduled_tokens = np.array(num_scheduled_tokens_list, dtype=np.int32) - attn_metadata: Optional[dict[str, Any]] = None + ubatch_slices = None + # We currently only microbatch if the number of tokens is + # over a certain threshold. + if should_ubatch: + # We only support decode-only cudagraphs + assert num_reqs == num_tokens + assert num_tokens % 2 == 0 + ubatch_slices = [ + UBatchSlice(slice(0, num_reqs // 2), slice(0, + num_tokens // 2)), + UBatchSlice(slice(num_reqs // 2, num_reqs), + slice(num_tokens // 2, num_tokens)) + ] + + attn_metadata: Optional[PerLayerAttnMetadata] = None # If force_attention is True, we always capture attention. Otherwise, # it only happens for cudagraph_runtime_mode=FULL. if force_attention or cudagraph_runtime_mode == CUDAGraphMode.FULL: attn_metadata = {} + if ubatch_slices is not None: + attn_metadata = [dict() for _ in range(len(ubatch_slices))] if create_mixed_batch: # In the mixed batch mode (used for FI warmup), we use @@ -2766,12 +2908,26 @@ def _dummy_run( slot_mapping=self.input_batch. block_table[kv_cache_group_id].slot_mapping[:num_tokens], causal=True) - for attn_group in self.attn_groups[kv_cache_group_id]: - attn_metadata_i = attn_group.metadata_builder\ - .build_for_cudagraph_capture(common_attn_metadata) - for layer_name in kv_cache_group_spec.layer_names: - attn_metadata[layer_name] = attn_metadata_i + if ubatch_slices is not None: + common_attn_metadata_list = split_attn_metadata( + ubatch_slices, common_attn_metadata) + for ubid, common_attn_metadata in enumerate( + common_attn_metadata_list): + assert common_attn_metadata.max_query_len == 1 + attn_metadata_i = (attn_group\ + .get_metadata_builder(ubatch_id=ubid)\ + .build_for_cudagraph_capture(common_attn_metadata)) + for layer_name in kv_cache_group_spec.layer_names: + assert type(attn_metadata) is list + attn_metadata[ubid][ + layer_name] = attn_metadata_i + else: + assert type(attn_metadata) is dict + attn_metadata_i = attn_group.get_metadata_builder()\ + .build_for_cudagraph_capture(common_attn_metadata) + for layer_name in kv_cache_group_spec.layer_names: + attn_metadata[layer_name] = attn_metadata_i with self.maybe_dummy_run_with_lora(self.lora_config, num_scheduled_tokens, remove_lora): @@ -2818,13 +2974,16 @@ def _dummy_run( f"Cudagraph runtime mode mismatch at dummy_run. " f"Expected {_cg_mode}, but got {cudagraph_runtime_mode}.") + if ubatch_slices is not None: + num_tokens = num_tokens // 2 with self.maybe_randomize_inputs(input_ids), set_forward_context( attn_metadata, self.vllm_config, num_tokens=num_tokens, num_tokens_across_dp=num_tokens_across_dp, cudagraph_runtime_mode=cudagraph_runtime_mode, - batch_descriptor=batch_descriptor): + batch_descriptor=batch_descriptor, + ubatch_slices=ubatch_slices): outputs = self.model( input_ids=input_ids, positions=positions, @@ -3096,6 +3255,7 @@ def freeze_gc(): set_cudagraph_capturing_enabled(True) with freeze_gc(), graph_capture(device=self.device): cudagraph_mode = self.compilation_config.cudagraph_mode + assert cudagraph_mode is not None if cudagraph_mode.mixed_mode() != CUDAGraphMode.NONE: cudagraph_runtime_mode = cudagraph_mode.mixed_mode() @@ -3153,6 +3313,35 @@ def _capture_cudagraphs(self, compilation_cases: list[int], desc="Capturing CUDA graphs ({}, {})".format( "decode" if uniform_decode else "mixed prefill-decode", cudagraph_runtime_mode.name)) + enable_dbo = self.parallel_config.enable_dbo + # DBO Only supports running Full cudagraphs with uniform + # decode lengths + if enable_dbo and uniform_decode: + for num_tokens in compilation_cases: + # If the number of tokens is greater than the microbatching + # threshold, don't generate a microbatched cudagraph + if (num_tokens + < self.parallel_config.dbo_decode_token_threshold): + continue + + # Warmup + for _ in range( + self.compilation_config.cudagraph_num_of_warmups): + force_attention = ( + cudagraph_runtime_mode == CUDAGraphMode.FULL) + self._dummy_run(num_tokens, + cudagraph_runtime_mode=CUDAGraphMode.NONE, + force_attention=force_attention, + uniform_decode=True, + allow_microbatching=True, + skip_eplb=True) + + # Graph Capture + self._dummy_run(num_tokens, + cudagraph_runtime_mode=CUDAGraphMode.FULL, + uniform_decode=True, + allow_microbatching=True, + skip_eplb=True) # We skip EPLB here since we don't want to record dummy metrics for num_tokens in compilation_cases: for _ in range(self.compilation_config.cudagraph_num_of_warmups): @@ -3219,14 +3408,23 @@ def create_attn_groups( ) -> list[AttentionGroup]: attn_groups: list[AttentionGroup] = [] for attn_backend, layer_names in attn_backends_map.items(): - attn_metadata_builder_i = attn_backend.get_builder_cls()( + attn_metadata_builders = [] + attn_metadata_builders.append(attn_backend.get_builder_cls()( kv_cache_spec, layer_names, self.vllm_config, self.device, - ) + )) + if self.parallel_config.enable_dbo: + attn_metadata_builders.append( + attn_backend.get_builder_cls()( + kv_cache_spec, + layer_names, + self.vllm_config, + self.device, + )) attn_group = AttentionGroup(attn_backend, - attn_metadata_builder_i, + attn_metadata_builders, layer_names) attn_groups.append(attn_group) return attn_groups @@ -3246,11 +3444,10 @@ def initialize_cudagraph_capture(self) -> None: min_cg_builder_name = None for attn_group in self._attn_group_iterator(): - builder = attn_group.metadata_builder + builder = attn_group.get_metadata_builder() if builder.cudagraph_support.value < min_cg_support.value: min_cg_support = builder.cudagraph_support min_cg_builder_name = builder.__class__.__name__ - # Flexible resolve the cudagraph mode cudagraph_mode = self.compilation_config.cudagraph_mode # check cudagraph for mixed batch is supported @@ -3316,7 +3513,7 @@ def calculate_reorder_batch_threshold(self) -> None: is compatible (e.g., decode threshold is the same) """ for group in self._attn_group_iterator(): - attn_metadata_builder_i = group.metadata_builder + attn_metadata_builder_i = group.get_metadata_builder() # check that if any backends reorder batches; that the reordering # is compatible (e.g., decode threshold is the same) diff --git a/vllm/v1/worker/gpu_ubatch_wrapper.py b/vllm/v1/worker/gpu_ubatch_wrapper.py new file mode 100644 index 000000000000..5012ad0483c8 --- /dev/null +++ b/vllm/v1/worker/gpu_ubatch_wrapper.py @@ -0,0 +1,303 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project + +import dataclasses +import threading +from typing import Any, Callable, Optional + +import torch + +from vllm.compilation.cuda_graph import CUDAGraphWrapper +from vllm.config import CUDAGraphMode, VllmConfig +from vllm.forward_context import (create_forward_context, get_forward_context, + override_forward_context) +from vllm.logger import init_logger +from vllm.platforms import current_platform +from vllm.sequence import IntermediateTensors +from vllm.v1.worker.ubatching import UBatchContext, make_ubatch_contexts + +logger = init_logger(__name__) + + +@dataclasses.dataclass +class UbatchMetadata: + context: UBatchContext + input_ids: torch.Tensor + positions: torch.Tensor + inputs_embeds: Optional[torch.Tensor] + intermediate_tensors: Optional[IntermediateTensors] + num_tokens: int + + +@dataclasses.dataclass +class CUDAGraphMetaData: + cudagraph: torch.cuda.CUDAGraph + ubatch_metadata: UbatchMetadata + outputs: Optional[Any] = None + + +class UBatchWrapper: + + def __init__(self, runnable: Callable, vllm_config: VllmConfig, + runtime_mode: CUDAGraphMode, device: torch.cuda.device): + self.runnable = runnable + self.vllm_config = vllm_config + self.compilation_config = vllm_config.compilation_config + self.comm_stream = torch.cuda.Stream(device=device) + # Two ubatch threads plus the main thread + self.ready_barrier = threading.Barrier(3) + + self.cudagraphs: dict[int, CUDAGraphMetaData] = {} + + self.cudagraph_wrapper = None + self.graph_pool = None + if runtime_mode is not CUDAGraphMode.NONE: + self.cudagraph_wrapper = CUDAGraphWrapper( + runnable, vllm_config, runtime_mode=runtime_mode) + self.graph_pool = current_platform.get_global_graph_pool() + + def __getattr__(self, key: str): + # allow accessing the attributes of the runnable. + if hasattr(self.runnable, key): + return getattr(self.runnable, key) + raise AttributeError(f"Attribute {key} not exists in the runnable of " + f"cudagraph wrapper: {self.runnable}") + + def unwrap(self) -> Callable: + # in case we need to access the original runnable. + return self.runnable + + def _capture_ubatches(self, ubatch_metadata, model) -> torch.Tensor: + """ + Capture a cudagraph for a microbatched run. + + The logic here is somewhat complicated because we need to make sure that + each of the ubatch threads initialize the cuda context before we start + the graph capture. + + The flow is as follows: + 1. The main thread starts up each ubatch thread. Each thread will + initialize its cuda context (torch.cuda.current_blas_handle()) + before going to sleep upon entering the ubatch_context. + + 2. The main thread starts the graph capture and wakes up the first + ubatch thread. + + 3. Each ubatch thread runs the model to completion and returns the + completed output tensors back to the main thread. + + 4. The main thread stores the captured cudagraph along with its metadata + and returns + """ + + @torch.inference_mode() + def _capture_ubatch_thread(results, ubatch_metadata): + ubatch_context = ubatch_metadata.context + with torch.cuda.stream(ubatch_context.compute_stream): + _ = torch.cuda.current_blas_handle() + with torch.cuda.stream(ubatch_context.comm_stream): + _ = torch.cuda.current_blas_handle() + with ubatch_context: + model_output = model( + input_ids=ubatch_metadata.input_ids, + positions=ubatch_metadata.positions, + intermediate_tensors=ubatch_metadata.intermediate_tensors, + inputs_embeds=ubatch_metadata.inputs_embeds, + ) + + results.append((ubatch_metadata.context.id, model_output)) + + results: list[tuple[int, torch.Tensor]] = [] + compute_stream = ubatch_metadata[0].context.compute_stream + num_tokens = ubatch_metadata[0].num_tokens + \ + ubatch_metadata[1].num_tokens + + # Ubatches will manually manage the forward context, so we override + # it to None here so we can have it restored correctly later + with override_forward_context(None): + ubatch_threads = [] + for metadata in ubatch_metadata: + thread = threading.Thread(target=_capture_ubatch_thread, + args=( + results, + metadata, + )) + ubatch_threads.append(thread) + thread.start() + self.ready_barrier.wait() # Wait for both threads to be ready + + # Capture the cudagraph + cudagraph_metadata = \ + CUDAGraphMetaData( + cudagraph=torch.cuda.CUDAGraph(), + ubatch_metadata=ubatch_metadata, + ) + with torch.cuda.graph(cudagraph_metadata.cudagraph, + stream=compute_stream, + pool=self.graph_pool): + ubatch_metadata[0].context.cpu_wait_event.set() + for thread in ubatch_threads: + thread.join() + sorted_results = [value for position, value in sorted(results)] + result = torch.cat(sorted_results, dim=0) + cudagraph_metadata.outputs = result + self.cudagraphs[num_tokens] = cudagraph_metadata + return cudagraph_metadata.outputs + + def _run_ubatches(self, ubatch_metadata, model) -> torch.Tensor: + + @torch.inference_mode() + def _ubatch_thread(results, model, ubatch_metadata): + with ubatch_metadata.context: + model_output = model( + input_ids=ubatch_metadata.input_ids, + positions=ubatch_metadata.positions, + intermediate_tensors=ubatch_metadata.intermediate_tensors, + inputs_embeds=ubatch_metadata.inputs_embeds, + ) + results.append((ubatch_metadata.context.id, model_output)) + + results: list[tuple[int, torch.Tensor]] = [] + + # Ubatch threads will manually manage the forward context, so we + # override it to None here so we can have it restored correctly + # after both threads have finished + with override_forward_context(None): + ubatch_threads = [] + for metadata in ubatch_metadata: + thread = threading.Thread(target=_ubatch_thread, + args=( + results, + model, + metadata, + )) + ubatch_threads.append(thread) + thread.start() + self.ready_barrier.wait() # Wait for both threads to be ready + ubatch_metadata[0].context.cpu_wait_event.set() + for thread in ubatch_threads: + thread.join() + sorted_results = [value for position, value in sorted(results)] + result = torch.cat(sorted_results, dim=0) + return result + + def _make_ubatch_metadata(self, ubatch_slices, attn_metadata, input_ids, + positions, inputs_embeds, intermediate_tensors, + compute_stream, dp_metadata, batch_descriptor, + cudagraph_runtime_mode) -> list[UbatchMetadata]: + + # Create one forward context per ubatch + forward_contexts = [] + for i, ubatch_slice in enumerate(ubatch_slices): + forward_contexts.append( + create_forward_context( + attn_metadata[i] if attn_metadata is not None else None, + self.vllm_config, + dp_metadata=dp_metadata, + batch_descriptor=batch_descriptor, + cudagraph_runtime_mode=cudagraph_runtime_mode)) + + ubatch_ctxs = make_ubatch_contexts( + num_micro_batches=len(ubatch_slices), + comm_stream=self.comm_stream, + compute_stream=compute_stream, + forward_contexts=forward_contexts, + ready_barrier=self.ready_barrier) + + ubatch_metadata: list[UbatchMetadata] = [] + for i, ubatch_slice in enumerate(ubatch_slices): + sliced_input_ids, sliced_positions, sliced_inputs_embeds, \ + sliced_intermediate_tensors = \ + self._slice_model_inputs( + ubatch_slice.token_slice, input_ids, positions, + inputs_embeds, intermediate_tensors) + ubatch_metadata.append( + UbatchMetadata( + context=ubatch_ctxs[i], + input_ids=sliced_input_ids, + positions=sliced_positions, + inputs_embeds=sliced_inputs_embeds, + intermediate_tensors=sliced_intermediate_tensors, + num_tokens=ubatch_slice.token_slice.stop - + ubatch_slice.token_slice.start)) + + return ubatch_metadata + + def _slice_model_inputs(self, tokens_slice: slice, input_ids, positions, + inputs_embeds, intermediate_tensors): + sliced_input_ids = input_ids[tokens_slice] + # if we are using mrope. Mrope adds an additional dimension to the + # positions tensor + if positions.ndim == 2: + sliced_positions = positions[:, tokens_slice] + else: + sliced_positions = positions[tokens_slice] + sliced_inputs_embeds = inputs_embeds[ + tokens_slice] if inputs_embeds else None + sliced_intermediate_tensors = intermediate_tensors[ + tokens_slice] if intermediate_tensors else None + + return (sliced_input_ids, sliced_positions, sliced_inputs_embeds, + sliced_intermediate_tensors) + + def __call__(self, *args, **kwargs): + forward_context = get_forward_context() + batch_descriptor = forward_context.batch_descriptor + ubatch_slices = forward_context.ubatch_slices + cudagraph_runtime_mode = forward_context.cudagraph_runtime_mode + + # If there's no ubatching, just run the runnable object + if ubatch_slices is None: + if cudagraph_runtime_mode in (CUDAGraphMode.NONE, + CUDAGraphMode.PIECEWISE): + return self.runnable(*args, **kwargs) + else: + assert self.cudagraph_wrapper is not None + return self.cudagraph_wrapper(*args, **kwargs) + + attn_metadata = forward_context.attn_metadata + num_tokens = (ubatch_slices[0].token_slice.stop - + ubatch_slices[0].token_slice.start) * 2 + input_ids = kwargs['input_ids'] + positions = kwargs['positions'] + intermediate_tensors = kwargs['intermediate_tensors'] + inputs_embeds = kwargs['inputs_embeds'] + compute_stream = torch.cuda.current_stream() + + dp_metadata = forward_context.dp_metadata + + # We shouldn't be here unless we are running with multiple DP ranks + assert dp_metadata is not None + + if num_tokens not in self.cudagraphs \ + and cudagraph_runtime_mode is CUDAGraphMode.FULL: + ubatch_metadata = self._make_ubatch_metadata( + ubatch_slices=ubatch_slices, + attn_metadata=attn_metadata, + input_ids=input_ids, + positions=positions, + intermediate_tensors=intermediate_tensors, + inputs_embeds=inputs_embeds, + compute_stream=compute_stream, + dp_metadata=dp_metadata, + batch_descriptor=batch_descriptor, + cudagraph_runtime_mode=CUDAGraphMode.NONE) + + return self._capture_ubatches(ubatch_metadata, self.model) + elif num_tokens in self.cudagraphs: + cudagraph_metadata = self.cudagraphs[num_tokens] + cudagraph_metadata.cudagraph.replay() + return cudagraph_metadata.outputs + else: + ubatch_metadata = self._make_ubatch_metadata( + ubatch_slices=ubatch_slices, + attn_metadata=attn_metadata, + input_ids=input_ids, + positions=positions, + intermediate_tensors=intermediate_tensors, + inputs_embeds=inputs_embeds, + compute_stream=compute_stream, + dp_metadata=dp_metadata, + batch_descriptor=batch_descriptor, + cudagraph_runtime_mode=CUDAGraphMode.NONE) + return self._run_ubatches(ubatch_metadata, self.model) diff --git a/vllm/v1/worker/ubatch_splitting.py b/vllm/v1/worker/ubatch_splitting.py new file mode 100644 index 000000000000..650f0ec5138d --- /dev/null +++ b/vllm/v1/worker/ubatch_splitting.py @@ -0,0 +1,155 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project + +from typing import Optional + +import torch + +from vllm.config import VllmConfig +from vllm.forward_context import DPMetadata +from vllm.logger import init_logger +from vllm.utils import round_up +from vllm.v1.worker.ubatch_utils import (UBatchSlice, UBatchSlices, + is_second_ubatch_empty) + +logger = init_logger(__name__) + + +def should_ubatch_with_num_tokens( + should_ubatch: bool, + orig_num_tokens_per_ubatch: int, + padded_num_tokens_per_ubatch: int, + vllm_config: VllmConfig, +) -> tuple[bool, Optional[torch.Tensor]]: + dp_size = vllm_config.parallel_config.data_parallel_size + dp_rank = vllm_config.parallel_config.data_parallel_rank + return DPMetadata.should_ubatch_across_dp(should_ubatch, + orig_num_tokens_per_ubatch, + padded_num_tokens_per_ubatch, + dp_size, dp_rank) + + +def get_dp_padding_ubatch( + num_tokens_unpadded: int, num_tokens_padded: int, + should_attempt_ubatching: bool, + vllm_config: VllmConfig) -> tuple[bool, Optional[torch.Tensor]]: + """ + 1. Decides if each DP rank is going to microbatch. Either all ranks + run with microbatching or none of them do. If this function decides + not to run with microbatching. It will "abort" meaning that no padding + information will be returned to the caller. It will return (False, None) + + 2. Determines the total number of tokens that each rank will run. + All ranks will be padded out so that the run with the same number + of tokens + + Returns: tuple[ + should_ubatch: Are all DP ranks going to microbatch + num_tokens_after_padding: A tensor containing the total number of + tokens per-microbatch for each DP rank including padding. Will be + None if should_ubatch if False + ] + + """ + assert num_tokens_padded >= num_tokens_unpadded + dp_size = vllm_config.parallel_config.data_parallel_size + if dp_size == 1: + # Early exit. + return False, None + + # If this DP rank doesn't want to attempt microbatching + if not should_attempt_ubatching: + (should_ubatch, num_tokens_across_dp) = should_ubatch_with_num_tokens( + False, 0, 0, vllm_config) + assert should_ubatch is False + assert num_tokens_across_dp is None + return should_ubatch, num_tokens_across_dp + + # Round up to the next multiple of two for even divisibility + num_tokens_padded = round_up(num_tokens_padded, 2) + num_tokens_per_ubatch = num_tokens_padded // 2 + should_ubatch = True + + # Sanity Check that the existing padding isn't giving us an empty second + # ubatch. Abort if so + if is_second_ubatch_empty(num_tokens_unpadded, num_tokens_padded): + logger.debug( + "Empty second µbatch detected: unpadded tokens: %s, padded " + "tokens: %s", num_tokens_unpadded, num_tokens_padded) + should_ubatch = False + + # Note that we compute the number of padded tokens per ubatch + (should_ubatch, num_tokens_across_dp) = should_ubatch_with_num_tokens( + should_ubatch, num_tokens_unpadded // 2, num_tokens_per_ubatch, + vllm_config) + if not should_ubatch: + assert num_tokens_across_dp is None + return should_ubatch, num_tokens_across_dp + + assert num_tokens_across_dp is not None + + max_tokens_across_dp_cpu = int(torch.max(num_tokens_across_dp).item()) + num_tokens_after_padding = torch.tensor([max_tokens_across_dp_cpu] * + dp_size, + device="cpu", + dtype=torch.int32) + return should_ubatch, num_tokens_after_padding + + +def ubatch_split( + max_num_scheduled_tokens: int, + num_tokens_unpadded: int, + num_tokens_padded: int, + vllm_config: VllmConfig, +) -> tuple[Optional[UBatchSlices], Optional[torch.Tensor]]: + """ + Coordinates amongst all DP ranks to determine if and how the full batch + should be split into microbatches. + + Returns: tuple[ + ubatch_slices: if this is set then all DP ranks have agreed to + microbatch + num_tokens_after_padding: A tensor containing the total number of + tokens per-microbatch for each DP rank including padding. Will be + None if ubatch_slices is None + ] + + """ + parallel_config = vllm_config.parallel_config + # Don't bother with the should_ubatch handshaking unless microbatching + # is enabled + if not parallel_config.enable_dbo: + return (None, None) + + # Check preconditions for microbatching + should_attempt_ubatching = \ + parallel_config.enable_dbo and \ + num_tokens_unpadded >= \ + parallel_config.dbo_decode_token_threshold \ + and max_num_scheduled_tokens == 1 + + # Don't microbatch unless every other DP worker is also microbatching + num_tokens_after_padding = None + (should_ubatch, num_tokens_after_padding) = get_dp_padding_ubatch( + num_tokens_unpadded, num_tokens_padded, should_attempt_ubatching, + vllm_config) + if not should_ubatch: + return (None, None) + + # This doesn't actually pad the ubatch slices. It just initializes the + # split point to the padded value so that padding can be applied + # to the second ubatch in pad_out_ubatch_slice after attention + # metadata creation + assert num_tokens_after_padding is not None + total_num_tokens_per_ubatch = int(num_tokens_after_padding[0].item()) + padded_first_ubatch_slice = slice(0, total_num_tokens_per_ubatch) + padded_second_ubatch_slice = slice(total_num_tokens_per_ubatch, + num_tokens_unpadded) + + # Note there's an assumption here that there's 1 token per request + ubatch_slices = [ + UBatchSlice(padded_first_ubatch_slice, padded_first_ubatch_slice), + UBatchSlice(padded_second_ubatch_slice, padded_second_ubatch_slice) + ] + + return (ubatch_slices, num_tokens_after_padding) diff --git a/vllm/v1/worker/ubatch_utils.py b/vllm/v1/worker/ubatch_utils.py new file mode 100644 index 000000000000..6716d171cc70 --- /dev/null +++ b/vllm/v1/worker/ubatch_utils.py @@ -0,0 +1,19 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project +from dataclasses import dataclass + +from typing_extensions import TypeAlias + + +@dataclass +class UBatchSlice: + request_slice: slice + token_slice: slice + + +UBatchSlices: TypeAlias = list[UBatchSlice] + + +def is_second_ubatch_empty(orig_num_tokens_per_ubatch: int, + padded_num_tokens_per_ubatch: int) -> bool: + return padded_num_tokens_per_ubatch >= 2 * orig_num_tokens_per_ubatch diff --git a/vllm/v1/worker/ubatching.py b/vllm/v1/worker/ubatching.py new file mode 100644 index 000000000000..9aeaa9909dc8 --- /dev/null +++ b/vllm/v1/worker/ubatching.py @@ -0,0 +1,211 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project +import threading +from typing import Optional + +import torch + +from vllm import forward_context +from vllm.forward_context import ForwardContext +from vllm.utils import current_stream + +_THREAD_ID_TO_CONTEXT: dict = {} +_CURRENT_CONTEXTS: list[Optional['UBatchContext']] = [None, None] + + +class UBatchContext: + """ + Context manager for micro-batching synchronization using threading events. + """ + + def __init__(self, + id: int, + comm_stream: torch.cuda.Stream, + compute_stream: torch.cuda.Stream, + forward_context: ForwardContext, + ready_barrier: threading.Barrier, + cpu_wait_event: threading.Event, + cpu_signal_event: threading.Event, + gpu_comm_done_event: torch.cuda.Event, + gpu_compute_done_event: torch.cuda.Event, + schedule: str = "default"): + self.id = id + self.comm_stream = comm_stream + self.compute_stream = compute_stream + self.forward_context = forward_context + self.ready_barrier = ready_barrier + self.cpu_wait_event = cpu_wait_event + self.cpu_signal_event = cpu_signal_event + self.current_stream = compute_stream + self.gpu_comm_done_event = gpu_comm_done_event + self.gpu_compute_done_event = gpu_compute_done_event + self.schedule = schedule + self.recv_hook = None + + def __enter__(self): + global _CURRENT_CONTEXTS, _THREAD_ID_TO_CONTEXT + _THREAD_ID_TO_CONTEXT[threading.get_ident()] = self.id + _CURRENT_CONTEXTS[self.id] = self + self.ready_barrier.wait() + + self.cpu_wait_event.wait() + self.cpu_wait_event.clear() + self._restore_context() + # Assume we start on the compute stream + assert current_stream() == self.compute_stream + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + global _CURRENT_CONTEXTS, _THREAD_ID_TO_CONTEXT + _CURRENT_CONTEXTS[self.id] = None + del _THREAD_ID_TO_CONTEXT[threading.get_ident()] + self.maybe_run_recv_hook() + self.cpu_signal_event.set() + self.cpu_wait_event.clear() + self.current_stream = self.compute_stream + torch.cuda.set_stream(self.current_stream) + return False + + def _restore_context(self): + forward_context._forward_context = self.forward_context + torch.cuda.set_stream(self.current_stream) + + def update_stream(self, stream): + self.current_stream = stream + torch.cuda.set_stream(self.current_stream) + + def _signal_comm_done(self): + self.gpu_comm_done_event.record(self.comm_stream) + + def _signal_compute_done(self): + self.gpu_compute_done_event.record(self.compute_stream) + + def _wait_compute_done(self): + self.comm_stream.wait_event(self.gpu_compute_done_event) + + def _wait_comm_done(self): + self.compute_stream.wait_event(self.gpu_comm_done_event) + + def _cpu_yield(self): + # It is critical for correctness that only one thread is running + # at a time. These asserts just make sure that this is the only + # thread running before waking the other one up and going to sleep + assert forward_context._forward_context == self.forward_context + assert current_stream() == self.current_stream + assert not self.cpu_wait_event.is_set() + + self.cpu_signal_event.set() + self.cpu_wait_event.wait() + self.cpu_wait_event.clear() + self._restore_context() + + def switch_to_comm_sync(self): + self._signal_compute_done() + self.update_stream(self.comm_stream) + self._wait_comm_done() + + def maybe_run_recv_hook(self): + if self.recv_hook is not None: + self.recv_hook() + self.recv_hook = None + + def yield_(self): + self.current_stream = current_stream() + self._cpu_yield() + if self.current_stream != current_stream(): + self.update_stream(self.current_stream) + + def yield_and_switch_from_compute_to_comm(self): + assert current_stream() == self.compute_stream + self._signal_compute_done() + self._cpu_yield() + assert self.current_stream == self.compute_stream + self.update_stream(self.comm_stream) + self._wait_compute_done() + + def yield_and_switch_from_comm_to_compute(self): + assert current_stream() == self.comm_stream + self._signal_comm_done() + self._cpu_yield() + assert self.current_stream == self.comm_stream + self.update_stream(self.compute_stream) + self._wait_comm_done() + + +def dbo_enabled() -> bool: + return len(_THREAD_ID_TO_CONTEXT) > 0 + + +def dbo_current_ubatch_id() -> int: + if len(_THREAD_ID_TO_CONTEXT) == 0: + return 0 + return _THREAD_ID_TO_CONTEXT[threading.get_ident()] + + +def _register_ubatch_function(func): + + def wrapper(*args, **kwargs): + if len(_THREAD_ID_TO_CONTEXT) > 0: + ctx_idx = _THREAD_ID_TO_CONTEXT[threading.get_ident()] + ctx = _CURRENT_CONTEXTS[ctx_idx] + func(ctx, *args, **kwargs) + + return wrapper + + +dbo_yield_and_switch_from_compute_to_comm = _register_ubatch_function( + UBatchContext.yield_and_switch_from_compute_to_comm) +dbo_yield_and_switch_from_comm_to_compute = _register_ubatch_function( + UBatchContext.yield_and_switch_from_comm_to_compute) +dbo_yield = _register_ubatch_function(UBatchContext.yield_) +dbo_maybe_run_recv_hook = _register_ubatch_function( + UBatchContext.maybe_run_recv_hook) +dbo_switch_to_comm_sync = _register_ubatch_function( + UBatchContext.switch_to_comm_sync) + + +def dbo_register_recv_hook(recv_hook): + if len(_THREAD_ID_TO_CONTEXT) > 0: + ctx_idx = _THREAD_ID_TO_CONTEXT[threading.get_ident()] + next_ctx = _CURRENT_CONTEXTS[(ctx_idx + 1) % 2] + next_ctx.recv_hook = recv_hook + + +def make_ubatch_contexts( + num_micro_batches: int, + compute_stream: torch.cuda.Stream, + comm_stream: torch.cuda.Stream, + forward_contexts: list[ForwardContext], + ready_barrier: threading.Barrier, + schedule: str = "default", +) -> list[UBatchContext]: + assert num_micro_batches == 2, "only been tested with 2 micro-batches" + """ + Create a context manager for micro-batching synchronization. + """ + cpu_events = [threading.Event() for _ in range(num_micro_batches)] + gpu_comm_done_events = [ + torch.cuda.Event() for _ in range(num_micro_batches) + ] + gpu_compute_done_events = [ + torch.cuda.Event() for _ in range(num_micro_batches) + ] + + assert len(forward_contexts) == 2 + + ctxs = [] + for i in range(num_micro_batches): + ctx = UBatchContext(id=i, + compute_stream=compute_stream, + comm_stream=comm_stream, + forward_context=forward_contexts[i], + ready_barrier=ready_barrier, + cpu_wait_event=cpu_events[i], + cpu_signal_event=cpu_events[(i + 1) % + num_micro_batches], + gpu_comm_done_event=gpu_comm_done_events[i], + gpu_compute_done_event=gpu_compute_done_events[i], + schedule=schedule) + ctxs.append(ctx) + + return ctxs diff --git a/vllm/v1/worker/utils.py b/vllm/v1/worker/utils.py index 5ac7470c1ac9..fc831a73a75e 100644 --- a/vllm/v1/worker/utils.py +++ b/vllm/v1/worker/utils.py @@ -130,9 +130,17 @@ def get_max_items( @dataclass class AttentionGroup: backend: type[AttentionBackend] - metadata_builder: AttentionMetadataBuilder + metadata_builders: list[AttentionMetadataBuilder] layer_names: list[str] + def get_metadata_builder(self, + ubatch_id: Optional[int] = None + ) -> AttentionMetadataBuilder: + if ubatch_id is None: + return self.metadata_builders[0] + assert len(self.metadata_builders) > ubatch_id + return self.metadata_builders[ubatch_id] + def sanity_check_mm_encoder_outputs( mm_embeddings: MultiModalEmbeddings, From faa7a5daac8244376a3a182d6eee8d2e0f6d8127 Mon Sep 17 00:00:00 2001 From: lianyibo Date: Wed, 17 Sep 2025 01:36:58 +0800 Subject: [PATCH 004/518] [Bugfix] Fix unable to run encoder model when disable_hybrid_kv_cache_manager is true (#24571) Signed-off-by: lianyibo Co-authored-by: Chen Zhang --- vllm/v1/core/kv_cache_utils.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/vllm/v1/core/kv_cache_utils.py b/vllm/v1/core/kv_cache_utils.py index f225b7326404..9fab36aba91b 100644 --- a/vllm/v1/core/kv_cache_utils.py +++ b/vllm/v1/core/kv_cache_utils.py @@ -754,6 +754,10 @@ def is_kv_cache_type_uniform(kv_cache_spec: dict[str, KVCacheSpec]) -> bool: True if all layers have the same type, False otherwise. """ + if not kv_cache_spec: + # Encoder-only models do not have KV cache, kv_cache_type can be + # regarded as uniform. + return True try: kv_cache_spec_values = list(kv_cache_spec.values()) _ = kv_cache_spec_values[0].merge(kv_cache_spec_values) From d593cf28fa020dbae53dae19122aac8aeeeae0bc Mon Sep 17 00:00:00 2001 From: Isotr0py Date: Wed, 17 Sep 2025 01:46:46 +0800 Subject: [PATCH 005/518] [Misc] Add removed encoder-decoder models to previously supported models list (#24961) Signed-off-by: Isotr0py --- vllm/model_executor/models/registry.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/vllm/model_executor/models/registry.py b/vllm/model_executor/models/registry.py index 6bb65ed6debc..38f3d5c69b9e 100644 --- a/vllm/model_executor/models/registry.py +++ b/vllm/model_executor/models/registry.py @@ -319,7 +319,17 @@ sys.executable, "-m", "vllm.model_executor.models.registry" ] -_PREVIOUSLY_SUPPORTED_MODELS = {"Phi3SmallForCausalLM": "0.9.2"} +_PREVIOUSLY_SUPPORTED_MODELS = { + "Phi3SmallForCausalLM": "0.9.2", + # encoder-decoder models except whisper + # have been removed for V0 deprecation. + "BartModel": "0.10.2", + "BartForConditionalGeneration": "0.10.2", + "DonutForConditionalGeneration": "0.10.2", + "Florence2ForConditionalGeneration": "0.10.2", + "MBartForConditionalGeneration": "0.10.2", + "MllamaForConditionalGeneration": "0.10.2", +} @dataclass(frozen=True) From cd1f885bcfe3b1731c552495ce70d2abf63d1373 Mon Sep 17 00:00:00 2001 From: Sugar <64777228+Sugar-zsg@users.noreply.github.com> Date: Wed, 17 Sep 2025 01:52:31 +0800 Subject: [PATCH 006/518] Directly get max encoder len from VLLM config in V1 (#24866) Signed-off-by: Sugar-zsg <952242923@qq.com> --- vllm/attention/layers/cross_attention.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/vllm/attention/layers/cross_attention.py b/vllm/attention/layers/cross_attention.py index c24fa4e15f67..9400c5bffa38 100644 --- a/vllm/attention/layers/cross_attention.py +++ b/vllm/attention/layers/cross_attention.py @@ -14,7 +14,6 @@ from vllm.attention.selector import get_attn_backend from vllm.config import CacheConfig, VllmConfig from vllm.logger import init_logger -from vllm.multimodal import MULTIMODAL_REGISTRY from vllm.utils import cdiv from vllm.v1.attention.backends.utils import (CommonAttentionMetadata, subclass_attention_backend) @@ -23,9 +22,13 @@ logger = init_logger(__name__) -def _get_max_encoder_len(vllm_config: VllmConfig) -> int: - return MULTIMODAL_REGISTRY.get_encdec_max_encoder_len( - vllm_config.model_config) +def _get_max_encoder_len(vllm_config: "VllmConfig") -> int: + """Gets the max number of encoder input tokens from the config. + """ + sc = vllm_config.scheduler_config + assert sc and isinstance(sc.max_num_encoder_input_tokens, int), \ + "max_num_encoder_input_tokens must be int for enc-dec models" + return sc.max_num_encoder_input_tokens def _get_cross_slot_mapping(encoder_seq_lens: np.ndarray, From f4d6eb95cfdcee2c47bb408890346003bb5a5c20 Mon Sep 17 00:00:00 2001 From: Andrew Xia Date: Tue, 16 Sep 2025 11:41:12 -0700 Subject: [PATCH 007/518] [gpt-oss][1b] streaming add item id, content id (#24788) Signed-off-by: Andrew Xia --- .../openai/test_response_api_with_harmony.py | 23 +++++++++++++++++++ vllm/entrypoints/openai/serving_responses.py | 10 ++++++-- 2 files changed, 31 insertions(+), 2 deletions(-) diff --git a/tests/entrypoints/openai/test_response_api_with_harmony.py b/tests/entrypoints/openai/test_response_api_with_harmony.py index 88b3795abe73..0776f217f44a 100644 --- a/tests/entrypoints/openai/test_response_api_with_harmony.py +++ b/tests/entrypoints/openai/test_response_api_with_harmony.py @@ -318,6 +318,9 @@ async def test_streaming(client: OpenAI, model_name: str, background: bool): background=background, ) + current_item_id = "" + current_content_index = -1 + events = [] current_event_mode = None resp_id = None @@ -329,6 +332,26 @@ async def test_streaming(client: OpenAI, model_name: str, background: bool): current_event_mode = event.type print(f"\n[{event.type}] ", end="", flush=True) + # verify current_item_id is correct + if event.type == "response.output_item.added": + assert event.item.id != current_item_id + current_item_id = event.item.id + elif event.type in [ + "response.output_text.delta", + "response.reasoning_text.delta" + ]: + assert event.item_id == current_item_id + + # verify content_index_id is correct + if event.type == "response.content_part.added": + assert event.content_index != current_content_index + current_content_index = event.content_index + elif event.type in [ + "response.output_text.delta", + "response.reasoning_text.delta" + ]: + assert event.content_index == current_content_index + if "text.delta" in event.type: print(event.delta, end="", flush=True) elif "reasoning_text.delta" in event.type: diff --git a/vllm/entrypoints/openai/serving_responses.py b/vllm/entrypoints/openai/serving_responses.py index 9e285e6e5175..7be5e54208bd 100644 --- a/vllm/entrypoints/openai/serving_responses.py +++ b/vllm/entrypoints/openai/serving_responses.py @@ -1260,9 +1260,9 @@ async def _process_harmony_streaming_events( _increment_sequence_number_and_return: Callable[[BaseModel], BaseModel], ) -> AsyncGenerator[BaseModel, None]: - current_content_index = 0 # FIXME: this number is never changed + current_content_index = -1 current_output_index = 0 - current_item_id = "" # FIXME: this number is never changed + current_item_id: str = "" sent_output_item_added = False async for ctx in result_generator: @@ -1353,6 +1353,7 @@ async def _process_harmony_streaming_events( and ctx.parser.current_recipient is None): if not sent_output_item_added: sent_output_item_added = True + current_item_id = f"msg_{random_uuid()}" yield _increment_sequence_number_and_return( openai_responses_types. ResponseOutputItemAddedEvent( @@ -1368,6 +1369,7 @@ async def _process_harmony_streaming_events( status="in_progress", ), )) + current_content_index += 1 yield _increment_sequence_number_and_return( openai_responses_types. ResponseContentPartAddedEvent( @@ -1398,6 +1400,7 @@ async def _process_harmony_streaming_events( and ctx.parser.current_recipient is None): if not sent_output_item_added: sent_output_item_added = True + current_item_id = f"msg_{random_uuid()}" yield _increment_sequence_number_and_return( openai_responses_types. ResponseOutputItemAddedEvent( @@ -1412,6 +1415,7 @@ async def _process_harmony_streaming_events( status="in_progress", ), )) + current_content_index += 1 yield _increment_sequence_number_and_return( openai_responses_types. ResponseContentPartAddedEvent( @@ -1444,6 +1448,7 @@ async def _process_harmony_streaming_events( ) and ctx.parser.current_recipient == "python": if not sent_output_item_added: sent_output_item_added = True + current_item_id = f"tool_{random_uuid()}" yield _increment_sequence_number_and_return( openai_responses_types. ResponseOutputItemAddedEvent( @@ -1516,6 +1521,7 @@ async def _process_harmony_streaming_events( raise ValueError( f"Unknown function name: {function_name}") + current_item_id = f"tool_{random_uuid()}" yield _increment_sequence_number_and_return( openai_responses_types.ResponseOutputItemAddedEvent( type="response.output_item.added", From 218454b9b26cd2185cdf84e3ec9f58538185d06b Mon Sep 17 00:00:00 2001 From: Chen Zhang Date: Tue, 16 Sep 2025 12:07:34 -0700 Subject: [PATCH 008/518] [MISC] Add code owners of vllm/v1 to vllm/v1/core (#24928) Signed-off-by: Chen Zhang --- .github/CODEOWNERS | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index e3dbd28fa91e..73184d4e6b12 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -35,7 +35,7 @@ CMakeLists.txt @tlrmchlsmth @LucasWilkinson /vllm/v1/spec_decode @benchislett @luccafong /vllm/v1/attention/backends/flashinfer.py @mgoin /vllm/v1/attention/backends/triton_attn.py @tdoublep -/vllm/v1/core @heheda12345 +/vllm/v1/core @WoosukKwon @robertgshaw2-redhat @njhill @ywang96 @comaniac @alexm-redhat @heheda12345 /vllm/v1/kv_cache_interface.py @heheda12345 # Test ownership @@ -54,7 +54,7 @@ CMakeLists.txt @tlrmchlsmth @LucasWilkinson /tests/test_inputs.py @DarkLight1337 @ywang96 /tests/v1/entrypoints/llm/test_struct_output_generate.py @mgoin @russellb @aarnphm /tests/v1/structured_output @mgoin @russellb @aarnphm -/tests/v1/core @heheda12345 +/tests/v1/core @WoosukKwon @robertgshaw2-redhat @njhill @ywang96 @comaniac @alexm-redhat @heheda12345 /tests/weight_loading @mgoin @youkaichao @yewentao256 /tests/lora @jeejeelee /tests/models/language/generation/test_hybrid.py @tdoublep From dcf2f3ec067711ff69e5ab7478fca6ffb4f11daf Mon Sep 17 00:00:00 2001 From: Concurrensee Date: Tue, 16 Sep 2025 14:49:06 -0500 Subject: [PATCH 009/518] [ROCm] Add dependencies for ROCm (#24900) Signed-off-by: Yida Wu --- requirements/rocm-build.txt | 1 + requirements/rocm-test.txt | 1 + requirements/rocm.txt | 3 ++- 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/requirements/rocm-build.txt b/requirements/rocm-build.txt index affe562c24f6..a86a8ab6df14 100644 --- a/requirements/rocm-build.txt +++ b/requirements/rocm-build.txt @@ -14,3 +14,4 @@ setuptools-scm>=8 wheel jinja2>=3.1.6 amdsmi==6.2.4 +timm>=1.0.17 diff --git a/requirements/rocm-test.txt b/requirements/rocm-test.txt index 25f950a99ece..869fb28c3d85 100644 --- a/requirements/rocm-test.txt +++ b/requirements/rocm-test.txt @@ -1,5 +1,6 @@ # Common dependencies -r common.txt +tblib==3.1.0 # entrypoints test # librosa==0.10.2.post1 # required by audio tests in entrypoints/openai diff --git a/requirements/rocm.txt b/requirements/rocm.txt index 8e3995121071..c129dd345c81 100644 --- a/requirements/rocm.txt +++ b/requirements/rocm.txt @@ -17,4 +17,5 @@ setuptools>=77.0.3,<80.0.0 setuptools-scm>=8 runai-model-streamer==0.11.0 runai-model-streamer-s3==0.11.0 -conch-triton-kernels==1.2.1 \ No newline at end of file +conch-triton-kernels==1.2.1 +timm>=1.0.17 \ No newline at end of file From 86daa875fe1a1cf76709af1637f95891d3ef0707 Mon Sep 17 00:00:00 2001 From: Andrew Xia Date: Tue, 16 Sep 2025 12:56:16 -0700 Subject: [PATCH 010/518] [gpt-oss][1][bugfix] fix streaming final output (#24466) Signed-off-by: Andrew Xia --- .../openai/test_response_api_with_harmony.py | 2 + tests/entrypoints/test_context.py | 83 ++++++++++++++++++- vllm/entrypoints/context.py | 11 ++- 3 files changed, 91 insertions(+), 5 deletions(-) diff --git a/tests/entrypoints/openai/test_response_api_with_harmony.py b/tests/entrypoints/openai/test_response_api_with_harmony.py index 0776f217f44a..eceaff672112 100644 --- a/tests/entrypoints/openai/test_response_api_with_harmony.py +++ b/tests/entrypoints/openai/test_response_api_with_harmony.py @@ -364,6 +364,8 @@ async def test_streaming(client: OpenAI, model_name: str, background: bool): events.append(event) assert len(events) > 0 + response_completed_event = events[-1] + assert len(response_completed_event.response.output) > 0 if background: starting_after = 5 diff --git a/tests/entrypoints/test_context.py b/tests/entrypoints/test_context.py index 5e6a4c85ff79..2afe9758c2ad 100644 --- a/tests/entrypoints/test_context.py +++ b/tests/entrypoints/test_context.py @@ -4,7 +4,7 @@ from unittest.mock import MagicMock, patch import pytest -from openai_harmony import StreamState +from openai_harmony import Author, Message, Role, StreamState, TextContent from vllm.entrypoints.context import HarmonyContext, StreamingHarmonyContext from vllm.outputs import CompletionOutput, RequestOutput @@ -312,9 +312,9 @@ async def test_negative_tool_tokens_edge_case(): @pytest.mark.asyncio async def test_streaming_multi_turn_token_counting(mock_parser): """Test token counting for streaming multi-turn conversations. - - This test focuses on how StreamingHarmonyContext counts tokens in a - multi-turn conversation with streaming (token-by-token) outputs and + + This test focuses on how StreamingHarmonyContext counts tokens in a + multi-turn conversation with streaming (token-by-token) outputs and message boundaries. """ # Create a streaming context @@ -423,3 +423,78 @@ async def test_streaming_multi_turn_token_counting(mock_parser): additional_tool_tokens = 13 - 8 - 3 # = 2 assert context.num_tool_output_tokens == expected_tool_tokens \ + additional_tool_tokens + + +@pytest.mark.asyncio +async def test_streaming_message_synchronization(mock_parser): + """Test message synchronization logic from lines 413-417 in context.py. + + This test verifies that when parser.messages contains more messages than + the context's _messages (minus initial messages), the context properly + extends its message list with the new parser messages. + """ + + # Create a streaming context with some initial messages + initial_messages = [ + Message( + author=Author(role=Role.USER, name="user"), + content=[TextContent(text="Hello")], + recipient=Role.ASSISTANT, + ) + ] + context = StreamingHarmonyContext(messages=initial_messages, + available_tools=[]) + + # Verify initial state + assert len(context._messages) == 1 + assert context.num_init_messages == 1 + + # Mock parser to have more messages than context + # Simulate parser having processed 3 new messages + mock_parser.messages = [ + Message( + author=Author(role=Role.ASSISTANT, name="assistant"), + content=[TextContent(text="Response 1")], + recipient=Role.USER, + ), + ] + + # This should trigger the message synchronization logic + context.append_output( + create_mock_request_output(prompt_token_ids=[1, 2, 3], + output_token_ids=[101], + finished=False)) + + # Verify that messages were synchronized + assert len(context._messages) == 2 + + # Verify the new messages were added correctly + assert context._messages[1].content[0].text == "Response 1" + + # Test the specific condition from line 413-414: + # len(self._messages) - self.num_init_messages < len(self.parser.messages) + messages_minus_init = len(context._messages) - context.num_init_messages + parser_messages_count = len(mock_parser.messages) + + # After synchronization, they should be equal (no longer less than) + assert messages_minus_init == parser_messages_count + + # Test edge case: add one more parser message + mock_parser.messages.append( + Message( + author=Author(role=Role.ASSISTANT, name="assistant"), + content=[TextContent(text="Response 4")], + recipient=Role.USER, + )) + + # Create another output to trigger synchronization again + mock_output2 = create_mock_request_output(prompt_token_ids=[1, 2, 3], + output_token_ids=[102], + finished=True) + + context.append_output(mock_output2) + + # Verify the fourth message was added, num_init_messages is still 1 + assert len(context._messages) == 3 + assert context.num_init_messages == 1 + assert context._messages[2].content[0].text == "Response 4" diff --git a/vllm/entrypoints/context.py b/vllm/entrypoints/context.py index 6658f91595e5..8619452f2445 100644 --- a/vllm/entrypoints/context.py +++ b/vllm/entrypoints/context.py @@ -151,6 +151,9 @@ def append_output(self, output: Union[RequestOutput, self._update_decode_token_usage(output) # Move current turn to previous turn for next turn's calculations self.previous_turn = self.current_turn.copy() + # append_output is called only once before tool calling + # in non-streaming case + # so we can append all the parser messages to _messages output_msgs = self.parser.messages # The responses finish reason is set in the last message self.finish_reason = output.outputs[0].finish_reason @@ -387,7 +390,7 @@ def __init__(self, *args, **kwargs): @property def messages(self) -> list: - return self.parser.messages + return self._messages def append_output(self, output: Union[RequestOutput, list[Message]]) -> None: @@ -412,6 +415,11 @@ def append_output(self, output: Union[RequestOutput, # Check if the current token is part of reasoning content self._update_num_reasoning_tokens() self.last_tok = tok + if len(self._messages) - self.num_init_messages < len( + self.parser.messages): + self._messages.extend( + self.parser.messages[len(self._messages) - + self.num_init_messages:]) else: # Handle the case of tool output in direct message format assert len(output) == 1, "Tool output should be a single message" @@ -424,6 +432,7 @@ def append_output(self, output: Union[RequestOutput, for tok in toks: self.parser.process(tok) self.last_tok = toks[-1] + # TODO: add tool_output messages to self._messages def is_expecting_start(self) -> bool: return self.parser.state == StreamState.EXPECT_START From 02d4b854543c3b2c65435a5ed9bb1c3a9856cfad Mon Sep 17 00:00:00 2001 From: Andrew Sansom Date: Tue, 16 Sep 2025 16:06:56 -0500 Subject: [PATCH 011/518] Use kwargs for long lists of `EngineCoreRequest` arguments in tests and fix extra kwargs (#24987) Signed-off-by: Andrew Sansom --- tests/detokenizer/test_min_tokens.py | 18 ++++++++---------- tests/tokenization/test_detokenize.py | 16 ++++++++-------- 2 files changed, 16 insertions(+), 18 deletions(-) diff --git a/tests/detokenizer/test_min_tokens.py b/tests/detokenizer/test_min_tokens.py index 887e83342536..26003373c569 100644 --- a/tests/detokenizer/test_min_tokens.py +++ b/tests/detokenizer/test_min_tokens.py @@ -31,16 +31,14 @@ def test_min_tokens_with_stop(min_tokens: int, stop: str, truth: str): stop=stop, min_tokens=min_tokens, ) - request = EngineCoreRequest("", - prompt_token_ids, - None, - None, - None, - params, - None, - None, - 0.0, - None, + request = EngineCoreRequest(request_id="", + prompt_token_ids=prompt_token_ids, + mm_features=None, + sampling_params=params, + pooling_params=None, + eos_token_id=None, + arrival_time=0.0, + lora_request=None, cache_salt=None, data_parallel_rank=None) diff --git a/tests/tokenization/test_detokenize.py b/tests/tokenization/test_detokenize.py index ea7ccfbb2b45..527aad97d4fa 100644 --- a/tests/tokenization/test_detokenize.py +++ b/tests/tokenization/test_detokenize.py @@ -61,14 +61,14 @@ def _run_incremental_decode(tokenizer, skip_special_tokens=skip_special_tokens, spaces_between_special_tokens=spaces_between_special_tokens, ) - request = EngineCoreRequest("", - prompt_token_ids, - None, - params, - None, - None, - 0.0, - None, + request = EngineCoreRequest(request_id="", + prompt_token_ids=prompt_token_ids, + mm_features=None, + sampling_params=params, + pooling_params=None, + eos_token_id=None, + arrival_time=0.0, + lora_request=None, cache_salt=None, data_parallel_rank=None) From 3053a22b330cd7170dce6f33f3a2043c64a99599 Mon Sep 17 00:00:00 2001 From: Aleksandr Malyshev <164964928+maleksan85@users.noreply.github.com> Date: Tue, 16 Sep 2025 14:27:11 -0700 Subject: [PATCH 012/518] fp8 kv cache support fix for torch.compile (#22758) Signed-off-by: Aleksandr Malyshev Signed-off-by: Gregory Shtrasberg Co-authored-by: Aleksandr Malyshev Co-authored-by: Gregory Shtrasberg Co-authored-by: Gregory Shtrasberg <156009573+gshtras@users.noreply.github.com> --- vllm/model_executor/layers/quantization/kv_cache.py | 4 +++- vllm/v1/attention/backends/triton_attn.py | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/vllm/model_executor/layers/quantization/kv_cache.py b/vllm/model_executor/layers/quantization/kv_cache.py index 4c6fcda893a0..275a1c43fdd2 100644 --- a/vllm/model_executor/layers/quantization/kv_cache.py +++ b/vllm/model_executor/layers/quantization/kv_cache.py @@ -125,7 +125,9 @@ def process_weights_after_loading(self, layer: torch.nn.Module) -> None: # These are used in the final Attention.forward() layer._q_scale.copy_(q_scale) - layer._q_scale_float = q_scale + layer._q_scale_float = q_scale.item() if isinstance( + q_scale, torch.Tensor) else q_scale + layer._prob_scale.copy_(prob_scale) if layer.kv_cache_dtype == "fp8" and (q_scale == 1.0 or prob_scale == 1.0): diff --git a/vllm/v1/attention/backends/triton_attn.py b/vllm/v1/attention/backends/triton_attn.py index c294a5a73cbd..784912a122f6 100644 --- a/vllm/v1/attention/backends/triton_attn.py +++ b/vllm/v1/attention/backends/triton_attn.py @@ -361,7 +361,7 @@ def forward( key_cache = key_cache.view(self.fp8_dtype) value_cache = value_cache.view(self.fp8_dtype) num_tokens, num_heads, head_size = query.shape - assert layer._q_scale == 1.0, \ + assert layer._q_scale_float == 1.0, \ "A non 1.0 q_scale is not currently supported." if current_platform.is_cuda(): # Skip Q quantization on ROCm and XPU, enable this on cuda From dbebb7f812123b4a0efe5b085582d3345fe7f740 Mon Sep 17 00:00:00 2001 From: Michael Goin Date: Tue, 16 Sep 2025 17:45:10 -0400 Subject: [PATCH 013/518] [Perf] Reuse workspace for FP8+FP4 Marlin MoE (#20500) Signed-off-by: mgoin Signed-off-by: Michael Goin Co-authored-by: Wentao Ye <44945378+yewentao256@users.noreply.github.com> --- .../compressed_tensors/compressed_tensors_moe.py | 6 ++++-- vllm/model_executor/layers/quantization/fp8.py | 3 ++- vllm/model_executor/layers/quantization/modelopt.py | 3 ++- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py index c2b884c058d3..5470deb76845 100644 --- a/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py +++ b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py @@ -398,7 +398,8 @@ def apply( quant_type_id=scalar_types.float4_e2m1f.id, apply_router_weight_on_input=apply_router_weight_on_input, global_num_experts=global_num_experts, - expert_map=expert_map) + expert_map=expert_map, + workspace=layer.workspace) # FlashInfer fused experts path if self.fused_experts is not None: @@ -940,7 +941,8 @@ def apply( quant_type_id=scalar_types.float8_e4m3fn.id, apply_router_weight_on_input=apply_router_weight_on_input, global_num_experts=global_num_experts, - expert_map=expert_map) + expert_map=expert_map, + workspace=layer.workspace) assert self.fused_experts_func is not None diff --git a/vllm/model_executor/layers/quantization/fp8.py b/vllm/model_executor/layers/quantization/fp8.py index 49ff87df93c3..254cc2be05ee 100644 --- a/vllm/model_executor/layers/quantization/fp8.py +++ b/vllm/model_executor/layers/quantization/fp8.py @@ -1103,7 +1103,8 @@ def apply( quant_type_id=scalar_types.float8_e4m3fn.id, apply_router_weight_on_input=apply_router_weight_on_input, global_num_experts=global_num_experts, - expert_map=expert_map) + expert_map=expert_map, + workspace=layer.workspace) elif self.flashinfer_moe_backend == FlashinferMoeBackend.CUTLASS: assert self.block_quant is None assert (not renormalize and custom_routing_function is not None) diff --git a/vllm/model_executor/layers/quantization/modelopt.py b/vllm/model_executor/layers/quantization/modelopt.py index 9b99931e7b43..60a79e53e814 100644 --- a/vllm/model_executor/layers/quantization/modelopt.py +++ b/vllm/model_executor/layers/quantization/modelopt.py @@ -1474,7 +1474,8 @@ def apply( quant_type_id=scalar_types.float4_e2m1f.id, apply_router_weight_on_input=apply_router_weight_on_input, global_num_experts=global_num_experts, - expert_map=expert_map) + expert_map=expert_map, + workspace=layer.workspace) if self.fused_experts is not None: assert self.allow_flashinfer and \ From d119fc86140785e7efc8f125c17153544d1e0f20 Mon Sep 17 00:00:00 2001 From: Matthew Bonanni Date: Tue, 16 Sep 2025 18:55:02 -0400 Subject: [PATCH 014/518] [CI][Bugfix] Fix failing Blackwell test (#24993) Signed-off-by: Matthew Bonanni Co-authored-by: Robert Shaw <114415538+robertgshaw2-redhat@users.noreply.github.com> --- vllm/model_executor/layers/fused_moe/modular_kernel.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/vllm/model_executor/layers/fused_moe/modular_kernel.py b/vllm/model_executor/layers/fused_moe/modular_kernel.py index 33799b58d199..efaa9cc058e4 100644 --- a/vllm/model_executor/layers/fused_moe/modular_kernel.py +++ b/vllm/model_executor/layers/fused_moe/modular_kernel.py @@ -506,12 +506,9 @@ def __init__(self): def get(self, shape: tuple[int, ...], device: torch.device, dtype: torch.dtype): shape_numel = prod(shape) - if self.buffer is None or self.buffer.numel() < shape_numel: + if (self.buffer is None or self.buffer.numel() < shape_numel + or self.buffer.device != device or self.buffer.dtype != dtype): self.buffer = torch.empty(shape_numel, device=device, dtype=dtype) - assert self.buffer.device == device, \ - f"Buffer device mismatch: {self.buffer.device} != {device}" - assert self.buffer.dtype == dtype, \ - f"Buffer dtype mismatch: {self.buffer.dtype} != {dtype}" return self.buffer[:shape_numel].view(*shape) From 493b10f8bf38495654baa601e8ed8dc4ce1565b7 Mon Sep 17 00:00:00 2001 From: Michael Goin Date: Tue, 16 Sep 2025 21:13:21 -0400 Subject: [PATCH 015/518] [CI] GPT-OSS GPQA eval test for Blackwell (#24920) Signed-off-by: mgoin Signed-off-by: Michael Goin Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- .buildkite/test-pipeline.yaml | 14 +++ tests/evals/gpt_oss/__init__.py | 2 + tests/evals/gpt_oss/conftest.py | 18 ++++ tests/evals/gpt_oss/test_gpqa_correctness.py | 102 +++++++++++++++++++ 4 files changed, 136 insertions(+) create mode 100644 tests/evals/gpt_oss/__init__.py create mode 100644 tests/evals/gpt_oss/conftest.py create mode 100644 tests/evals/gpt_oss/test_gpqa_correctness.py diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index f0fd808fd6dc..6f06099edd53 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -821,6 +821,20 @@ steps: - pytest -v -s tests/kernels/moe/test_flashinfer.py - pytest -v -s tests/compile/test_silu_mul_quant_fusion.py +- label: GPT-OSS Eval (Blackwell) + timeout_in_minutes: 60 + working_dir: "/vllm-workspace/" + gpu: b200 + # optional: true + source_file_dependencies: + - tests/evals/gpt_oss + - vllm/model_executor/models/gpt_oss.py + - vllm/model_executor/layers/quantization/mxfp4.py + - vllm/v1/attention/backends/flashinfer.py + commands: + - uv pip install --system 'gpt-oss[eval]==0.0.5' + - pytest -s -v tests/evals/gpt_oss/test_gpqa_correctness.py --model openai/gpt-oss-20b --metric 0.58 --server-args '--tensor-parallel-size 2' + ##### 1 GPU test ##### ##### multi gpus test ##### diff --git a/tests/evals/gpt_oss/__init__.py b/tests/evals/gpt_oss/__init__.py new file mode 100644 index 000000000000..0fec1fe5bcdf --- /dev/null +++ b/tests/evals/gpt_oss/__init__.py @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project \ No newline at end of file diff --git a/tests/evals/gpt_oss/conftest.py b/tests/evals/gpt_oss/conftest.py new file mode 100644 index 000000000000..35528c0a6a36 --- /dev/null +++ b/tests/evals/gpt_oss/conftest.py @@ -0,0 +1,18 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project +""" +Pytest configuration for GPT-OSS evaluation tests. +""" + + +def pytest_addoption(parser): + """Add command line options for pytest.""" + parser.addoption("--model", action="store", help="Model name to evaluate") + parser.addoption("--metric", + action="store", + type=float, + help="Expected metric threshold") + parser.addoption("--server-args", + action="store", + default="", + help="Additional server arguments") diff --git a/tests/evals/gpt_oss/test_gpqa_correctness.py b/tests/evals/gpt_oss/test_gpqa_correctness.py new file mode 100644 index 000000000000..4cc4041a60ce --- /dev/null +++ b/tests/evals/gpt_oss/test_gpqa_correctness.py @@ -0,0 +1,102 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project +""" +GPQA evaluation using vLLM server and GPT-OSS evaluation package. + +Usage: +pytest -s -v tests/evals/gpt_oss/test_gpqa_correctness.py \ + --model openai/gpt-oss-20b \ + --metric 0.58 \ + --server-args "--tensor-parallel-size 2" +""" + +import subprocess +import sys + +import regex as re + +from tests.utils import RemoteOpenAIServer + +TOL = 0.05 # Absolute tolerance for accuracy comparison + + +def run_gpqa_eval(model_name: str, base_url: str) -> float: + """Run GPQA evaluation using the gpt-oss evaluation package.""" + + # Build the command to run the evaluation + cmd = [ + sys.executable, "-m", "gpt_oss.evals", "--eval", "gpqa", "--model", + model_name, "--reasoning-effort", "low", "--base-url", base_url + ] + + try: + # Run the evaluation + result = subprocess.run( + cmd, + text=True, + capture_output=True, + timeout=1800, # 30 minute timeout + env={"OPENAI_API_KEY": "dummy"}) + + print("Evaluation process output:\n", result.stdout) + + # Parse the output to extract the score + match = re.search(r"'metric':\s*([\d.]+)", result.stdout) + if match: + return float(match.group(1)) + + # If we still can't find it, raise an error + raise ValueError( + f"Could not parse score from evaluation output:\n{result.stdout}") + + except subprocess.TimeoutExpired as e: + raise RuntimeError("Evaluation timed out") from e + except subprocess.CalledProcessError as e: + raise RuntimeError( + f"Evaluation failed with exit code {e.returncode}:\n" + f"stdout: {e.stdout}\nstderr: {e.stderr}") from e + + +def test_gpqa_correctness(request): + """Test GPQA correctness for GPT-OSS model.""" + + # Get command line arguments + model_name = request.config.getoption("--model") + expected_metric = request.config.getoption("--metric") + server_args_str = request.config.getoption("--server-args") + + # Parse server arguments + server_args = [] + if server_args_str: + server_args = server_args_str.split() + + # Add standard server arguments + server_args.extend([ + "--max-model-len", + "32768", + "--trust-remote-code", + ]) + + print(f"Starting GPQA evaluation for model: {model_name}") + print(f"Expected metric threshold: {expected_metric}") + print(f"Server args: {' '.join(server_args)}") + + # Launch server and run evaluation + with RemoteOpenAIServer(model_name, server_args, + max_wait_seconds=1800) as remote_server: + base_url = remote_server.url_for("v1") + print(f"Server started at: {base_url}") + + measured_metric = run_gpqa_eval(model_name, base_url) + + print(f"GPQA Results for {model_name}:") + print(f" Measured metric: {measured_metric:.4f}") + print(f" Expected metric: {expected_metric:.4f}") + print(f" Tolerance: {TOL:.4f}") + + # Verify metric is within tolerance + assert measured_metric >= expected_metric - TOL, ( + f"GPQA metric too low: {measured_metric:.4f} < " + f"{expected_metric:.4f} - {TOL:.4f} = {expected_metric - TOL:.4f}") + + print(f"✅ GPQA test passed for {model_name}") From cef32104b4b411e5093581ad77d2e09a50c2837c Mon Sep 17 00:00:00 2001 From: Tahsin Tunan Date: Wed, 17 Sep 2025 07:31:06 +0600 Subject: [PATCH 016/518] [FP8] Extend per-token-group quantization support to QuantFP8 (#24342) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Tahsin Tunan Signed-off-by: Luka Govedič Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> Co-authored-by: Luka Govedič --- .../kernels/bench_per_token_quant_fp8.py | 263 ++++++++++++++---- .../quantization/test_fp8_quant_group.py | 150 ++++++++++ .../layers/fused_moe/fused_moe.py | 4 +- .../layers/quantization/input_quant_fp8.py | 79 +++++- .../layers/quantization/utils/quant_utils.py | 9 + 5 files changed, 444 insertions(+), 61 deletions(-) create mode 100644 tests/kernels/quantization/test_fp8_quant_group.py diff --git a/benchmarks/kernels/bench_per_token_quant_fp8.py b/benchmarks/kernels/bench_per_token_quant_fp8.py index 923d678f1f2d..9170361e974b 100644 --- a/benchmarks/kernels/bench_per_token_quant_fp8.py +++ b/benchmarks/kernels/bench_per_token_quant_fp8.py @@ -2,14 +2,25 @@ # SPDX-FileCopyrightText: Copyright contributors to the vLLM project import itertools from typing import Callable +from unittest.mock import patch +import pandas as pd import torch -from vllm import _custom_ops as ops -from vllm.config import CompilationConfig, VllmConfig, set_current_vllm_config from vllm.model_executor.layers.quantization.input_quant_fp8 import QuantFP8 from vllm.model_executor.layers.quantization.utils.quant_utils import GroupShape from vllm.triton_utils import triton +from vllm.utils import STR_DTYPE_TO_TORCH_DTYPE, FlexibleArgumentParser + + +def with_triton_mode(fn): + """Temporarily force the Triton fallback path""" + + def wrapped(*args, **kwargs): + with patch("vllm.platforms.current_platform.is_cuda", return_value=False): + return fn(*args, **kwargs) + + return wrapped # TODO(luka): use standalone_compile utility @@ -21,78 +32,236 @@ def inner(*args): return inner -torch._dynamo.config.recompile_limit = 8888 -compilation_config = CompilationConfig(custom_ops=["none"]) -with set_current_vllm_config(VllmConfig(compilation_config=compilation_config)): - torch_per_token_quant_fp8 = torch.compile( - QuantFP8(False, GroupShape.PER_TOKEN), - fullgraph=True, - dynamic=False, # recompile for different shapes - ) +def bench_compile(fn: Callable): + # recompile for different shapes + fwd = torch.compile(fn, fullgraph=True, dynamic=False) # First dim is explicitly dynamic to simulate vLLM usage - torch_per_token_quant_fp8 = with_dyn_arg(torch_per_token_quant_fp8, 0, 0) + return with_dyn_arg(fwd, 0, 0) -def cuda_per_token_quant_fp8( - input: torch.Tensor, -) -> tuple[torch.Tensor, torch.Tensor]: - return ops.scaled_fp8_quant(input) +torch._dynamo.config.recompile_limit = 8888 -def calculate_diff(batch_size: int, seq_len: int): - """Calculate difference between Triton and CUDA implementations.""" +def calculate_diff( + batch_size: int, + hidden_size: int, + group_shape: GroupShape, + dtype: torch.dtype, +): + """Calculate the difference between Inductor and CUDA implementations.""" device = torch.device("cuda") - x = torch.rand((batch_size * seq_len, 4096), dtype=torch.float16, device=device) + x = torch.rand((batch_size * hidden_size, 4096), dtype=dtype, device=device) + + quant_fp8 = QuantFP8(False, group_shape, column_major_scales=False) - torch_out, torch_scale = torch_per_token_quant_fp8(x) - cuda_out, cuda_scale = cuda_per_token_quant_fp8(x) + torch_out, torch_scale = bench_compile(quant_fp8.forward_native)(x) + torch_eager_out, torch_eager_scale = quant_fp8.forward_native(x) + cuda_out, cuda_scale = quant_fp8.forward_cuda(x) - if torch.allclose( - cuda_out.to(torch.float32), torch_out.to(torch.float32), rtol=1e-3, atol=1e-5 - ) and torch.allclose(cuda_scale, torch_scale, rtol=1e-3, atol=1e-5): + out_allclose = lambda o1, o2: torch.allclose( + o1.to(torch.float32), + o2.to(torch.float32), + rtol=1e-3, + atol=1e-5, + ) + scale_allclose = lambda s1, s2: torch.allclose(s1, s2, rtol=1e-3, atol=1e-5) + + if ( + out_allclose(cuda_out, torch_out) + and scale_allclose(cuda_scale, torch_scale) + and out_allclose(cuda_out, torch_eager_out) + and scale_allclose(cuda_scale, torch_eager_scale) + ): print("✅ All implementations match") else: print("❌ Implementations differ") -batch_size_range = [1, 16, 32, 64, 128] -seq_len_range = [1, 16, 64, 128, 256, 512, 1024, 2048, 4096] - -configs = list(itertools.product(batch_size_range, seq_len_range)) +configs = [] -@triton.testing.perf_report( - triton.testing.Benchmark( - x_names=["batch_size", "seq_len"], - x_vals=configs, - line_arg="provider", - line_vals=["torch", "cuda"], - line_names=["Torch", "CUDA"], - styles=[("blue", "-"), ("green", "-")], - ylabel="us", - plot_name="per-token-dynamic-quant-fp8-performance", - args={}, - ) -) -def benchmark_quantization(batch_size, seq_len, provider): - dtype = torch.float16 +def benchmark_quantization( + batch_size, + hidden_size, + provider, + group_shape: GroupShape, + col_major: bool, + dtype: torch.dtype, +): device = torch.device("cuda") - x = torch.randn(batch_size * seq_len, 4096, device=device, dtype=dtype) + x = torch.randn(batch_size * hidden_size, 4096, device=device, dtype=dtype) quantiles = [0.5, 0.2, 0.8] + quant_fp8 = QuantFP8(False, group_shape, column_major_scales=col_major) if provider == "torch": - fn = lambda: torch_per_token_quant_fp8(x.clone()) + fn = lambda: bench_compile(quant_fp8.forward_native)(x.clone()) elif provider == "cuda": - fn = lambda: cuda_per_token_quant_fp8(x.clone()) + fn = lambda: quant_fp8.forward_cuda(x.clone()) + elif provider == "triton": + if not group_shape.is_per_group(): + # Triton only supported for per-group + return 0, 0, 0 + + fn = lambda: with_triton_mode(quant_fp8.forward_cuda)(x.clone()) ms, min_ms, max_ms = triton.testing.do_bench_cudagraph(fn, quantiles=quantiles) return 1000 * ms, 1000 * max_ms, 1000 * min_ms +# TODO(luka) extract to utils +def compute_geomean_speedups( + df: pd.DataFrame, + baseline_col: str, + speedup_cols: list[str], + groupby_cols: list[str] | None = None, +) -> pd.DataFrame: + """ + Compute geometric mean speedups over a baseline column. + + Args: + df: Input dataframe + baseline_col: Column to use as baseline + speedup_cols: Columns to compute speedups for + groupby_cols: Columns to group by. If None, compute over entire df. + + Returns: + pd.DataFrame with geometric mean speedups + """ + from scipy.stats import gmean + + def geo_speedup(group: pd.DataFrame) -> pd.Series: + ratios = { + col: (group[baseline_col] / group[col]).values for col in speedup_cols + } + return pd.Series({col: gmean(vals) for col, vals in ratios.items()}) + + if groupby_cols is None: + result = geo_speedup(df).to_frame().T + else: + result = ( + df.groupby(groupby_cols) + .apply(geo_speedup, include_groups=False) + .reset_index() + ) + + return result + + if __name__ == "__main__": - calculate_diff(batch_size=4, seq_len=4096) - benchmark_quantization.run(print_data=True) + parser = FlexibleArgumentParser( + description="Benchmark the various implementations of QuantFP8 (dynamic-only)" + ) + parser.add_argument("-c", "--check", action="store_true") + parser.add_argument( + "--dtype", type=str, choices=["half", "bfloat16", "float"], default="half" + ) + parser.add_argument( + "--hidden-sizes", + type=int, + nargs="+", + default=None, + help="Hidden sizes to benchmark (default: 1,16,64,128,256,512,1024,2048,4096)", + ) + parser.add_argument( + "--batch-sizes", + type=int, + nargs="+", + default=None, + help="Batch sizes to benchmark (default: 1,16,32,64,128)", + ) + parser.add_argument( + "--group-sizes", + type=int, + nargs="+", + default=None, + help="Group sizes for GroupShape(1,N) to benchmark. " + "Use 0 for PER_TENSOR, -1 for PER_TOKEN (default: 0,-1,64,128)", + ) + parser.add_argument( + "--no-column-major", + action="store_true", + help="Disable column-major scales testing", + ) + + args = parser.parse_args() + assert args + + dtype = STR_DTYPE_TO_TORCH_DTYPE[args.dtype] + + hidden_sizes = args.hidden_sizes or [1, 16, 64, 128, 256, 512, 1024, 2048, 4096] + batch_sizes = args.batch_sizes or [1, 16, 32, 64, 128] + + if args.group_sizes is not None: + group_shapes = [] + for size in args.group_sizes: + if size == 0: + group_shapes.append(GroupShape.PER_TENSOR) + elif size == -1: + group_shapes.append(GroupShape.PER_TOKEN) + else: + group_shapes.append(GroupShape(1, size)) + else: + group_shapes = [ + GroupShape.PER_TENSOR, + GroupShape.PER_TOKEN, + GroupShape(1, 64), + GroupShape(1, 128), + ] + + column_major_scales = [False] if args.no_column_major else [True, False] + + config_gen = itertools.product( + group_shapes, + column_major_scales, + batch_sizes, + hidden_sizes, + ) + + # filter out column-major scales for non-group, reverse order + configs.extend(c[::-1] for c in config_gen if (c[0].is_per_group() or not c[1])) + + print(f"Running {len(configs)} configurations:") + print(f" Hidden sizes: {hidden_sizes}") + print(f" Batch sizes: {batch_sizes}") + print(f" Group shapes: {[str(g) for g in group_shapes]}") + print(f" Column major scales: {column_major_scales}") + print() + + if args.check: + for group_shape in group_shapes: + group_size = group_shape[1] + print(f"{group_size=}") + calculate_diff( + batch_size=4, hidden_size=4096, group_shape=group_shape, dtype=dtype + ) + + benchmark = triton.testing.perf_report( + triton.testing.Benchmark( + x_names=["hidden_size", "batch_size", "col_major", "group_shape"], + x_vals=configs, + line_arg="provider", + line_vals=["torch", "cuda", "triton"], + line_names=["Torch (Compiled)", "CUDA", "Triton"], + styles=[("blue", "-"), ("green", "-"), ("black", "-")], + ylabel="us", + plot_name="QuantFP8 performance", + args={}, + ) + )(benchmark_quantization) + + df = benchmark.run(print_data=True, dtype=dtype, return_df=True) + + # Print geomean speedups + geo_table_grouped = compute_geomean_speedups( + df, + baseline_col="Torch (Compiled)", + speedup_cols=["CUDA", "Triton"], + groupby_cols=["col_major", "group_shape"], + ) + + print("Speedup over Torch (Compiled)") + print(geo_table_grouped.to_string(index=False)) diff --git a/tests/kernels/quantization/test_fp8_quant_group.py b/tests/kernels/quantization/test_fp8_quant_group.py new file mode 100644 index 000000000000..720eee62760d --- /dev/null +++ b/tests/kernels/quantization/test_fp8_quant_group.py @@ -0,0 +1,150 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project +"""Tests for QuantFP8 Group Quantization implementation.""" + +import pytest +import torch + +from vllm.model_executor.layers.quantization.input_quant_fp8 import QuantFP8 +from vllm.model_executor.layers.quantization.utils.quant_utils import ( + GroupShape) +from vllm.platforms import current_platform + + +@pytest.mark.parametrize( + "batch_size,hidden_dim,group_size", + [ + (16, 256, 32), # Small + (64, 1024, 64), # Medium + (128, 2048, 128), # Large + (8, 513, 64), # Non-divisible (native only) + ]) +@pytest.mark.parametrize("seed", [42]) +@torch.inference_mode() +def test_quantfp8_group_functionality(batch_size: int, hidden_dim: int, + group_size: int, seed: int) -> None: + """Test QuantFP8 group quantization with various configurations. + + Tests both CUDA and native implementations, column-major scales, + and verifies consistency between implementations. + """ + current_platform.seed_everything(seed) + + x = torch.randn( + (batch_size, hidden_dim), dtype=torch.bfloat16, device="cuda") * 8 + expected_num_groups = (hidden_dim + group_size - 1) // group_size + is_divisible = hidden_dim % group_size == 0 + + group_shape = GroupShape(1, group_size) + quant_op = QuantFP8(static=False, + group_shape=group_shape, + column_major_scales=False) + + # 1. Test native implementation (always available) + x_quant_native, scales_native = quant_op.forward_native(x.clone()) + assert x_quant_native.shape == x.shape + assert scales_native.shape == (batch_size, expected_num_groups) + + # 2. Test column-major scales configuration + quant_op_col = QuantFP8(static=False, + group_shape=group_shape, + column_major_scales=True) + _, scales_col = quant_op_col.forward_native(x.clone()) + assert scales_col.shape == (expected_num_groups, batch_size) + + # 3. Test CUDA implementation (only for divisible dimensions) + if is_divisible: + x_quant_cuda, scales_cuda = quant_op.forward_cuda(x.clone()) + assert x_quant_cuda.shape == x.shape + assert scales_cuda.shape == (batch_size, expected_num_groups) + + # Verify CUDA/native consistency + assert torch.allclose(scales_cuda, scales_native, rtol=1e-9, atol=1e-8) + + # Quantized values should mostly match + diff_count = (x_quant_cuda != x_quant_native).sum().item() + diff_ratio = diff_count / x_quant_cuda.numel() + assert diff_ratio < 0.002, f"Too many differences: {diff_ratio:.4%}" + + +@pytest.mark.parametrize("seed", [42]) +@torch.inference_mode() +def test_quantfp8_group_multidimensional(seed: int) -> None: + current_platform.seed_everything(seed) + + group_size = 64 + + # Test with 3D input + batch1, batch2, hidden_dim = 4, 8, 512 + x_3d = torch.randn( + (batch1, batch2, hidden_dim), dtype=torch.bfloat16, device="cuda") * 8 + + group_shape = GroupShape(1, group_size) + quant_op = QuantFP8(static=False, + group_shape=group_shape, + column_major_scales=False) + + x_quant, scales = quant_op.forward_native(x_3d.clone()) + assert x_quant.shape == x_3d.shape + assert scales.shape == (batch1, batch2, hidden_dim // group_size) + + # Test column_major_scales with multi-dim + quant_op_col = QuantFP8(static=False, + group_shape=group_shape, + column_major_scales=True) + _, scales_col = quant_op_col.forward_native(x_3d.clone()) + assert scales_col.shape == (batch1, hidden_dim // group_size, batch2) + + # Test with 4D input + batch1, batch2, batch3, hidden_dim = 2, 3, 4, 256 + x_4d = torch.randn((batch1, batch2, batch3, hidden_dim), + dtype=torch.bfloat16, + device="cuda") * 8 + + x_quant_4d, scales_4d = quant_op.forward_native(x_4d.clone()) + assert x_quant_4d.shape == x_4d.shape + assert scales_4d.shape == (batch1, batch2, batch3, + hidden_dim // group_size) + + _, scales_4d_col = quant_op_col.forward_native(x_4d.clone()) + assert scales_4d_col.shape == (batch1, batch2, hidden_dim // group_size, + batch3) + + +@pytest.mark.parametrize("seed", [42]) +@torch.inference_mode() +def test_quantfp8_group_edge_cases(seed: int) -> None: + current_platform.seed_everything(seed) + + batch_size = 16 + group_size = 64 + + # Test with single group (group_size >= hidden_dim) + x_small = torch.randn( + (batch_size, 32), dtype=torch.bfloat16, device="cuda") * 8 + group_shape = GroupShape(1, group_size) + quant_op = QuantFP8(static=False, + group_shape=group_shape, + column_major_scales=False) + + x_quant_small, scales_small = quant_op.forward_native(x_small.clone()) + assert x_quant_small.shape == x_small.shape + assert scales_small.shape == (batch_size, 1) + + # Test with zero inputs + x_zero = torch.zeros((batch_size, 256), + dtype=torch.bfloat16, + device="cuda") + x_quant_zero, scales_zero = quant_op.forward_native(x_zero.clone()) + assert x_quant_zero.shape == x_zero.shape + assert (scales_zero > 0).all(), "Scales should be clamped to minimum" + + # Test very large values + x_large = torch.full((batch_size, 256), + 1000.0, + dtype=torch.bfloat16, + device="cuda") + x_quant_large, scales_large = quant_op.forward_native(x_large.clone()) + assert x_quant_large.shape == x_large.shape + # FP8 max is typically 448 or 224, so scales should be > 1 + assert (scales_large > 1.0).all(), "Large values should have scales > 1" diff --git a/vllm/model_executor/layers/fused_moe/fused_moe.py b/vllm/model_executor/layers/fused_moe/fused_moe.py index 30e46ffa7b17..36c2ab8b2d5f 100644 --- a/vllm/model_executor/layers/fused_moe/fused_moe.py +++ b/vllm/model_executor/layers/fused_moe/fused_moe.py @@ -32,9 +32,11 @@ from vllm.model_executor.layers.fused_moe.topk_weight_and_reduce import ( TopKWeightAndReduceNoOP) from vllm.model_executor.layers.fused_moe.utils import ( - _resize_cache, moe_kernel_quantize_input, per_token_group_quant_fp8) + _resize_cache, moe_kernel_quantize_input) from vllm.model_executor.layers.quantization.utils.flashinfer_utils import ( calculate_tile_tokens_dim) +from vllm.model_executor.layers.quantization.utils.fp8_utils import ( + per_token_group_quant_fp8) from vllm.model_executor.layers.quantization.utils.mxfp4_utils import ( dequant_mxfp4) from vllm.platforms import current_platform diff --git a/vllm/model_executor/layers/quantization/input_quant_fp8.py b/vllm/model_executor/layers/quantization/input_quant_fp8.py index e1a9bdde9334..31182f40b48f 100644 --- a/vllm/model_executor/layers/quantization/input_quant_fp8.py +++ b/vllm/model_executor/layers/quantization/input_quant_fp8.py @@ -23,28 +23,39 @@ @CustomOp.register("quant_fp8") class QuantFP8(CustomOp): """ - Quantize input tensor to per-tensor or per-token FP8. + Quantize input tensor to FP8 (per-tensor, per-token, or per-group). This CustomOp supports both static and dynamic quantization. """ def __init__(self, static: bool, group_shape: GroupShape, - num_token_padding: Optional[int] = None): + num_token_padding: Optional[int] = None, + column_major_scales: bool = False): """ - :param static: static or dynamic quantization - :param group_shape: quantization group shape (PER_TOKEN or PER_TENSOR) - :param num_token_padding: Pad the token dimension of output to this size + :param group_shape: quantization group shape (PER_TOKEN, PER_TENSOR, + or arbitrary block size) + :param num_token_padding: Pad the token dimension of output to this + size + :param column_major_scales: For group quantization, output scales in + column major format """ super().__init__() - self.num_token_padding = num_token_padding - assert group_shape in {GroupShape.PER_TOKEN, GroupShape.PER_TENSOR} - assert not static or group_shape == GroupShape.PER_TENSOR, \ - "Only per-tensor scales supported for static quantization." self.static = static self.group_shape = group_shape - self.use_per_token_if_dynamic = group_shape == GroupShape.PER_TOKEN + self.num_token_padding = num_token_padding + self.column_major_scales = column_major_scales + + self.is_group_quant = group_shape.is_per_group() + if self.is_group_quant: + assert not static, "Group quantization only supports dynamic mode" + self.group_size = group_shape.col + else: + assert group_shape in {GroupShape.PER_TOKEN, GroupShape.PER_TENSOR} + assert not static or group_shape == GroupShape.PER_TENSOR, \ + "Only per-tensor scales supported for static quantization." + self.use_per_token_if_dynamic = group_shape == GroupShape.PER_TOKEN def forward_cuda( self, @@ -52,11 +63,19 @@ def forward_cuda( scale: Optional[torch.Tensor] = None, scale_ub: Optional[torch.Tensor] = None, ) -> tuple[torch.Tensor, torch.Tensor]: + if self.is_group_quant: + assert scale is None, "Group quantization is always dynamic" + from vllm.model_executor.layers.quantization.utils import fp8_utils + return fp8_utils.per_token_group_quant_fp8( + x, + group_size=self.group_size, + column_major_scales=self.column_major_scales, + dtype=_FP8_DTYPE) + assert (scale is not None) == self.static assert scale_ub is None or (not self.static and self.group_shape == GroupShape.PER_TOKEN and scale_ub.numel() == 1) - return ops.scaled_fp8_quant( x, scale, @@ -70,6 +89,10 @@ def forward_native( scale: Optional[torch.Tensor] = None, scale_ub: Optional[torch.Tensor] = None, ): + if self.is_group_quant: + assert scale is None, "Group quantization is always dynamic" + return self._quantize_group_native(x) + assert (scale is not None) == self.static assert scale_ub is None or (not self.static and self.group_shape == GroupShape.PER_TOKEN @@ -84,8 +107,7 @@ def forward_native( else: x_max = x.abs().max().unsqueeze(-1).to(torch.float32) - scale = x_max / _FP8_MAX - scale = scale.clamp(min=_FP8_MIN_SCALING_FACTOR) + scale = (x_max / _FP8_MAX).clamp(min=_FP8_MIN_SCALING_FACTOR) # Even for dynamic per-token scales, # reciprocal performs slightly better than division @@ -101,3 +123,34 @@ def forward_native( out = F.pad(out, (0, 0, 0, padding), "constant", 0.0) return out, scale + + def _quantize_group_native( + self, x: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]: + orig_shape = x.shape + hidden_dim = x.shape[-1] + num_groups = (hidden_dim + self.group_size - 1) // self.group_size + padded_dim = num_groups * self.group_size + + if padded_dim != hidden_dim: + padding = padded_dim - hidden_dim + x = F.pad(x, (0, padding), mode='constant', value=0.0) + + x_grouped = x.view(-1, num_groups, self.group_size) + absmax = x_grouped.abs().max(dim=-1, keepdim=True)[0].float() + scales = (absmax / _FP8_MAX).clamp(min=_FP8_MIN_SCALING_FACTOR) + + x_scaled = x_grouped / scales + x_quant = x_scaled.clamp(_FP8_MIN, _FP8_MAX).to(_FP8_DTYPE) + + x_quant = x_quant.view(-1, padded_dim) + if padded_dim != hidden_dim: + x_quant = x_quant[..., :hidden_dim] + x_quant = x_quant.view(orig_shape) + + scales = scales.squeeze(-1) + scales = scales.reshape(orig_shape[:-1] + (num_groups, )) + + if self.column_major_scales: + scales = scales.transpose(-2, -1).contiguous() + + return x_quant, scales diff --git a/vllm/model_executor/layers/quantization/utils/quant_utils.py b/vllm/model_executor/layers/quantization/utils/quant_utils.py index f4ff875adb21..5339c6043cc1 100644 --- a/vllm/model_executor/layers/quantization/utils/quant_utils.py +++ b/vllm/model_executor/layers/quantization/utils/quant_utils.py @@ -34,6 +34,15 @@ class GroupShape(_GroupShape): PER_TENSOR: ClassVar['GroupShape'] PER_TOKEN: ClassVar['GroupShape'] + def is_per_tensor(self) -> bool: + return self.row == -1 and self.col == -1 + + def is_per_token(self) -> bool: + return self.row == 1 and self.col == -1 + + def is_per_group(self) -> bool: + return self.row == 1 and self.col >= 1 + GroupShape.PER_TENSOR = GroupShape(-1, -1) GroupShape.PER_TOKEN = GroupShape(1, -1) From 64ad551878e9f0323407928fa747ce14c67c71f2 Mon Sep 17 00:00:00 2001 From: Benjamin Bartels Date: Wed, 17 Sep 2025 02:33:18 +0100 Subject: [PATCH 017/518] Removes source compilation of nixl dependency (#24874) Signed-off-by: bbartels Signed-off-by: Benjamin Bartels Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> Co-authored-by: Daniele <36171005+dtrifiro@users.noreply.github.com> --- docker/Dockerfile | 20 +++- docs/serving/expert_parallel_deployment.md | 4 +- requirements/kv_connectors.txt | 3 +- tools/install_gdrcopy.sh | 57 +++++++++++ tools/install_nixl.sh | 109 --------------------- 5 files changed, 77 insertions(+), 116 deletions(-) create mode 100755 tools/install_gdrcopy.sh delete mode 100644 tools/install_nixl.sh diff --git a/docker/Dockerfile b/docker/Dockerfile index 17f8e6043f89..034f73736ca7 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -283,6 +283,10 @@ WORKDIR /vllm-workspace ENV DEBIAN_FRONTEND=noninteractive ARG TARGETPLATFORM +ARG GDRCOPY_CUDA_VERSION=12.8 +# Keep in line with FINAL_BASE_IMAGE +ARG GDRCOPY_OS_VERSION=Ubuntu22_04 + SHELL ["/bin/bash", "-c"] ARG DEADSNAKES_MIRROR_URL @@ -441,13 +445,21 @@ COPY tools/install_deepgemm.sh /tmp/install_deepgemm.sh RUN --mount=type=cache,target=/root/.cache/uv \ VLLM_DOCKER_BUILD_CONTEXT=1 /tmp/install_deepgemm.sh --cuda-version "${CUDA_VERSION}" ${DEEPGEMM_GIT_REF:+--ref "$DEEPGEMM_GIT_REF"} -# Install EP kernels(pplx-kernels and DeepEP), NixL +COPY tools/install_gdrcopy.sh install_gdrcopy.sh +RUN set -eux; \ + case "${TARGETPLATFORM}" in \ + linux/arm64) UUARCH="aarch64" ;; \ + linux/amd64) UUARCH="x64" ;; \ + *) echo "Unsupported TARGETPLATFORM: ${TARGETPLATFORM}" >&2; exit 1 ;; \ + esac; \ + ./install_gdrcopy.sh "${GDRCOPY_OS_VERSION}" "${GDRCOPY_CUDA_VERSION}" "${UUARCH}"; \ + rm ./install_gdrcopy.sh + +# Install EP kernels(pplx-kernels and DeepEP) COPY tools/ep_kernels/install_python_libraries.sh install_python_libraries.sh -COPY tools/install_nixl.sh install_nixl.sh ENV CUDA_HOME=/usr/local/cuda RUN export TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST:-9.0a+PTX}" \ - && bash install_python_libraries.sh \ - && bash install_nixl.sh --force + && bash install_python_libraries.sh #################### vLLM installation IMAGE #################### diff --git a/docs/serving/expert_parallel_deployment.md b/docs/serving/expert_parallel_deployment.md index 494d2ad021e7..7489fc260983 100644 --- a/docs/serving/expert_parallel_deployment.md +++ b/docs/serving/expert_parallel_deployment.md @@ -10,7 +10,7 @@ Before using EP, you need to install the necessary dependencies. We are actively 1. **Install DeepEP and pplx-kernels**: Set up host environment following vLLM's guide for EP kernels [here](gh-file:tools/ep_kernels). 2. **Install DeepGEMM library**: Follow the [official instructions](https://github.com/deepseek-ai/DeepGEMM#installation). -3. **For disaggregated serving**: Install UCX and NIXL following the [script](gh-file:tools/install_nixl.sh). +3. **For disaggregated serving**: Install `gdrcopy` by running the [`install_gdrcopy.sh`](gh-file:tools/install_gdrcopy.sh) script (e.g., `install_gdrcopy.sh "${GDRCOPY_OS_VERSION}" "12.8" "x64"`). You can find available OS versions [here](https://developer.download.nvidia.com/compute/redist/gdrcopy/CUDA%2012.8/). ### Backend Selection Guide @@ -191,7 +191,7 @@ For production deployments requiring strict SLA guarantees for time-to-first-tok ### Setup Steps -1. **Install KV Connector**: Install NIXL using the [installation script](gh-file:tools/install_nixl.sh) +1. **Install gdrcopy/ucx/nixl**: For maximum performance, run the [install_gdrcopy.sh](gh-file:tools/install_gdrcopy.sh) script to install `gdrcopy` (e.g., `install_gdrcopy.sh "${GDRCOPY_OS_VERSION}" "12.8" "x64"`). You can find available OS versions [here](https://developer.download.nvidia.com/compute/redist/gdrcopy/CUDA%2012.8/). If `gdrcopy` is not installed, things will still work with a plain `pip install nixl`, just with lower performance. `nixl` and `ucx` are installed as dependencies via pip. 2. **Configure Both Instances**: Add this flag to both prefill and decode instances `--kv-transfer-config '{"kv_connector":"NixlConnector","kv_role":"kv_both"}` diff --git a/requirements/kv_connectors.txt b/requirements/kv_connectors.txt index 262675a23120..3b610e0d9736 100644 --- a/requirements/kv_connectors.txt +++ b/requirements/kv_connectors.txt @@ -1 +1,2 @@ -lmcache \ No newline at end of file +lmcache +nixl >= 0.5.1 # Required for disaggregated prefill diff --git a/tools/install_gdrcopy.sh b/tools/install_gdrcopy.sh new file mode 100755 index 000000000000..481723320c63 --- /dev/null +++ b/tools/install_gdrcopy.sh @@ -0,0 +1,57 @@ +#!/usr/bin/env bash +set -euo pipefail + +# Usage: install_gdrcopy.sh +# uuarch must be "x64" or "aarch64" +# Optional: set GDRCOPY_VERSION to override the libgdrapi package version (default: 2.5.1-1) +# Requires: curl, apt-get, root privileges +if [[ $(id -u) -ne 0 ]]; then + echo "Must be run as root" >&2 + + exit 1 +fi +if [[ $# -ne 3 ]]; then + echo "Usage: $0 " >&2 + exit 1 +fi + +OS_VER="$1" +CUDA_VER="$2" +UUARCH_RAW="$3" + +# Normalize/validate arch +case "${UUARCH_RAW,,}" in + aarch64|arm64) + URL_ARCH="aarch64" + DEB_ARCH="arm64" + ;; + x64|x86_64|amd64) + URL_ARCH="x64" + DEB_ARCH="amd64" + ;; + *) + echo "Unsupported uuarch: ${UUARCH_RAW}. Use 'x64' or 'aarch64'." >&2 + exit 1 + ;; +esac + +OS_VER_LOWER="$(tr '[:upper:]' '[:lower:]' <<<"$OS_VER")" +GDRCOPY_PKG_VER="${GDRCOPY_VERSION:-2.5.1-1}" + +DEB_NAME="libgdrapi_${GDRCOPY_PKG_VER}_${DEB_ARCH}.${OS_VER}.deb" +BASE_URL="https://developer.download.nvidia.com/compute/redist/gdrcopy" +URL="${BASE_URL}/CUDA%20${CUDA_VER}/${OS_VER_LOWER}/${URL_ARCH}/${DEB_NAME}" + +echo "Downloading: ${URL}" +TMPDIR="$(mktemp -d)" +trap 'rm -rf "${TMPDIR}"' EXIT + +curl -fSL "${URL}" -o "${TMPDIR}/${DEB_NAME}" + +export DEBIAN_FRONTEND=noninteractive +apt-get update +apt-get install -y "${TMPDIR}/${DEB_NAME}" +apt-get clean +rm -rf /var/lib/apt/lists/* + +echo "Installed ${DEB_NAME}" diff --git a/tools/install_nixl.sh b/tools/install_nixl.sh deleted file mode 100644 index 56717cfb77f7..000000000000 --- a/tools/install_nixl.sh +++ /dev/null @@ -1,109 +0,0 @@ -#!/bin/bash -# Usage: ./install_nixl.sh [--force] - -FORCE=false -if [ "$1" == "--force" ]; then - FORCE=true -fi - -SUDO=false -if command -v sudo >/dev/null 2>&1 && sudo -n true 2>/dev/null; then - SUDO=true -fi - -ARCH=$(uname -m) - -ROOT_DIR="/usr/local" -mkdir -p "$ROOT_DIR" -GDR_HOME="$ROOT_DIR/gdrcopy" -UCX_HOME="$ROOT_DIR/ucx" -NIXL_HOME="$ROOT_DIR/nixl" -CUDA_HOME=/usr/local/cuda - -export PATH="$GDR_HOME/bin:$UCX_HOME/bin:$NIXL_HOME/bin:$PATH" -export LD_LIBRARY_PATH="$GDR_HOME/lib:$UCX_HOME/lib:$NIXL_HOME/lib/$ARCH-linux-gnu:$LD_LIBRARY_PATH" - -TEMP_DIR="nixl_installer" -mkdir -p "$TEMP_DIR" -cd "$TEMP_DIR" - -pip install meson ninja pybind11 - -if [ ! -e "/dev/gdrdrv" ] || [ "$FORCE" = true ]; then - echo "Installing gdrcopy\n" - wget https://github.com/NVIDIA/gdrcopy/archive/refs/tags/v2.5.tar.gz - tar xzf v2.5.tar.gz; rm v2.5.tar.gz - cd gdrcopy-2.5 - make prefix=$GDR_HOME CUDA=$CUDA_HOME all install - - if $SUDO; then - echo "Running insmod.sh with sudo" - sudo ./insmod.sh - else - echo "Skipping insmod.sh - sudo not available" - echo "Please run 'sudo ./gdrcopy-2.5/insmod.sh' manually if needed" - fi - - cd .. -else - echo "Found /dev/gdrdrv. Skipping gdrcopy installation" -fi - -if ! command -v ucx_info &> /dev/null || [ "$FORCE" = true ]; then - echo "Installing UCX" - wget https://github.com/openucx/ucx/releases/download/v1.18.0/ucx-1.18.0.tar.gz - tar xzf ucx-1.18.0.tar.gz; rm ucx-1.18.0.tar.gz - cd ucx-1.18.0 - - # Checking Mellanox NICs - MLX_OPTS="" - if lspci | grep -i mellanox > /dev/null || command -v ibstat > /dev/null; then - echo "Mellanox NIC detected, adding Mellanox-specific options" - MLX_OPTS="--with-rdmacm \ - --with-mlx5-dv \ - --with-ib-hw-tm" - fi - - ./configure --prefix=$UCX_HOME \ - --enable-shared \ - --disable-static \ - --disable-doxygen-doc \ - --enable-optimizations \ - --enable-cma \ - --enable-devel-headers \ - --with-cuda=$CUDA_HOME \ - --with-dm \ - --with-gdrcopy=$GDR_HOME \ - --with-verbs \ - --enable-mt \ - $MLX_OPTS - make -j - make -j install-strip - - if $SUDO; then - echo "Running ldconfig with sudo" - sudo ldconfig - else - echo "Skipping ldconfig - sudo not available" - echo "Please run 'sudo ldconfig' manually if needed" - fi - - cd .. -else - echo "Found existing UCX. Skipping UCX installation" -fi - -if ! command -v nixl_test &> /dev/null || [ "$FORCE" = true ]; then - echo "Installing NIXL" - wget https://github.com/ai-dynamo/nixl/archive/refs/tags/0.2.0.tar.gz - tar xzf 0.2.0.tar.gz; rm 0.2.0.tar.gz - cd nixl-0.2.0 - meson setup build --prefix=$NIXL_HOME -Ducx_path=$UCX_HOME - cd build - ninja - ninja install - - cd ../.. -else - echo "Found existing NIXL. Skipping NIXL installation" -fi From 3059b9cc6bf7772ac53389e01c53e583e4dea0d0 Mon Sep 17 00:00:00 2001 From: elvischenv <219235043+elvischenv@users.noreply.github.com> Date: Wed, 17 Sep 2025 09:45:29 +0800 Subject: [PATCH 018/518] [Doc] Add --force-overwrite option to generate_cmake_presets.py (#24375) Signed-off-by: elvischenv <219235043+elvischenv@users.noreply.github.com> --- docs/contributing/incremental_build.md | 10 +++++++++ tools/generate_cmake_presets.py | 29 +++++++++++++++++++------- 2 files changed, 31 insertions(+), 8 deletions(-) diff --git a/docs/contributing/incremental_build.md b/docs/contributing/incremental_build.md index 0e34e69245af..cc01a60ce1e7 100644 --- a/docs/contributing/incremental_build.md +++ b/docs/contributing/incremental_build.md @@ -40,6 +40,16 @@ python tools/generate_cmake_presets.py The script will prompt you if it cannot automatically determine certain paths (e.g., `nvcc` or a specific Python executable for your vLLM development environment). Follow the on-screen prompts. If an existing `CMakeUserPresets.json` is found, the script will ask for confirmation before overwriting it. +**Force overwrite existing file:** + +To automatically overwrite an existing `CMakeUserPresets.json` without prompting, use the `--force-overwrite` flag: + +```console +python tools/generate_cmake_presets.py --force-overwrite +``` + +This is particularly useful in automated scripts or CI/CD environments where interactive prompts are not desired. + After running the script, a `CMakeUserPresets.json` file will be created in the root of your vLLM repository. ### Example `CMakeUserPresets.json` diff --git a/tools/generate_cmake_presets.py b/tools/generate_cmake_presets.py index 5f92f2f5848f..4869a71307e4 100644 --- a/tools/generate_cmake_presets.py +++ b/tools/generate_cmake_presets.py @@ -1,5 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project +import argparse import json import multiprocessing import os @@ -26,7 +27,8 @@ def get_cpu_cores(): return multiprocessing.cpu_count() -def generate_presets(output_path="CMakeUserPresets.json"): +def generate_presets(output_path="CMakeUserPresets.json", + force_overwrite=False): """Generates the CMakeUserPresets.json file.""" print("Attempting to detect your system configuration...") @@ -143,12 +145,15 @@ def generate_presets(output_path="CMakeUserPresets.json"): output_file_path = os.path.join(project_root, output_path) if os.path.exists(output_file_path): - overwrite = input( - f"'{output_file_path}' already exists. Overwrite? (y/N): ").strip( - ).lower() - if overwrite != 'y': - print("Generation cancelled.") - return + if force_overwrite: + print(f"Overwriting existing file '{output_file_path}'") + else: + overwrite = input( + f"'{output_file_path}' already exists. Overwrite? (y/N): " + ).strip().lower() + if overwrite != 'y': + print("Generation cancelled.") + return try: with open(output_file_path, "w") as f: @@ -166,4 +171,12 @@ def generate_presets(output_path="CMakeUserPresets.json"): if __name__ == "__main__": - generate_presets() + parser = argparse.ArgumentParser() + parser.add_argument( + "--force-overwrite", + action="store_true", + help="Force overwrite existing CMakeUserPresets.json without prompting" + ) + + args = parser.parse_args() + generate_presets(force_overwrite=args.force_overwrite) From eeb135eb87277b388e1a2c97ff9c13fcd94fc6ae Mon Sep 17 00:00:00 2001 From: Nick Hill Date: Tue, 16 Sep 2025 19:18:06 -0700 Subject: [PATCH 019/518] [Core] Use `CpuGpuBuffer` for block table tensors (#24795) Signed-off-by: Nick Hill --- tests/v1/tpu/worker/test_tpu_model_runner.py | 2 +- tests/v1/worker/test_gpu_input_batch.py | 5 +- tests/v1/worker/test_gpu_model_runner.py | 2 +- vllm/v1/worker/block_table.py | 75 +++++++++----------- vllm/v1/worker/cpu_model_runner.py | 8 +-- vllm/v1/worker/gpu_model_runner.py | 24 +++---- 6 files changed, 53 insertions(+), 63 deletions(-) diff --git a/tests/v1/tpu/worker/test_tpu_model_runner.py b/tests/v1/tpu/worker/test_tpu_model_runner.py index bd9b6131c222..4f4a9c7db88a 100644 --- a/tests/v1/tpu/worker/test_tpu_model_runner.py +++ b/tests/v1/tpu/worker/test_tpu_model_runner.py @@ -125,7 +125,7 @@ def _is_req_state_block_table_match(model_runner, req_id: str) -> bool: return False num_blocks = block_table.num_blocks_per_row[req_index] - block_table_values = block_table.block_table_np[req_index, :num_blocks] + block_table_values = block_table.block_table.np[req_index, :num_blocks] return (block_table_values == req_block_ids).all() diff --git a/tests/v1/worker/test_gpu_input_batch.py b/tests/v1/worker/test_gpu_input_batch.py index 38f543c78486..98700ff73fd1 100644 --- a/tests/v1/worker/test_gpu_input_batch.py +++ b/tests/v1/worker/test_gpu_input_batch.py @@ -15,6 +15,7 @@ from vllm.v1.pool.metadata import PoolingMetadata from vllm.v1.sample.logits_processor import LogitsProcessors from vllm.v1.sample.metadata import SamplingMetadata +from vllm.v1.utils import CpuGpuBuffer from vllm.v1.worker.block_table import BlockTable, MultiGroupBlockTable from vllm.v1.worker.gpu_input_batch import CachedRequestState, InputBatch @@ -45,7 +46,7 @@ def _compare_objs(obj1, is_same = False if isinstance(a, torch.Tensor): - if (a.numel() == 0 or b.numel() == 0): + if a.numel() == 0 or b.numel() == 0: is_same = (a.numel() == 0 and b.numel() == 0) elif torch.allclose(a, b): is_same = True @@ -61,6 +62,8 @@ def _compare_objs(obj1, is_same = True # if we make it here must be same elif a == b: is_same = True + elif isinstance(a, CpuGpuBuffer): + is_same = np.allclose(a.np, b.np) and torch.allclose(a.gpu, b.gpu) assert is_same, f"Attribute {attr_name} is different"\ f" in {obj1} and {obj2}: {a} != {b}" diff --git a/tests/v1/worker/test_gpu_model_runner.py b/tests/v1/worker/test_gpu_model_runner.py index 4ad8df1ce386..8b571f95c5ec 100644 --- a/tests/v1/worker/test_gpu_model_runner.py +++ b/tests/v1/worker/test_gpu_model_runner.py @@ -165,7 +165,7 @@ def _is_req_state_block_table_match(model_runner, req_id: str) -> bool: req_state.block_ids[0]): return False num_blocks = block_table.num_blocks_per_row[req_index] - return (block_table.block_table_np[req_index, :num_blocks] == + return (block_table.block_table.np[req_index, :num_blocks] == req_state.block_ids[0]).all() diff --git a/vllm/v1/worker/block_table.py b/vllm/v1/worker/block_table.py index 194984bf5053..82b6d1b514d5 100644 --- a/vllm/v1/worker/block_table.py +++ b/vllm/v1/worker/block_table.py @@ -1,5 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project +from typing import Union import numpy as np import torch @@ -7,6 +8,7 @@ from vllm.distributed import get_dcp_group from vllm.logger import init_logger from vllm.utils import cdiv +from vllm.v1.utils import CpuGpuBuffer logger = init_logger(__name__) @@ -29,28 +31,13 @@ def __init__( self.pin_memory = pin_memory self.device = device - self.block_table = torch.zeros( - (max_num_reqs, max_num_blocks_per_req), - device=self.device, - dtype=torch.int32, - ) - self.block_table_cpu = torch.zeros( - (max_num_reqs, max_num_blocks_per_req), - device="cpu", - dtype=torch.int32, - pin_memory=pin_memory, - ) - self.block_table_np = self.block_table_cpu.numpy() + self.block_table = self._make_buffer(max_num_reqs, + max_num_blocks_per_req, + dtype=torch.int32) self.num_blocks_per_row = np.zeros(max_num_reqs, dtype=np.int32) - self.slot_mapping_cpu = torch.zeros(self.max_num_batched_tokens, - dtype=torch.int64, - device="cpu", - pin_memory=self.pin_memory) - self.slot_mapping_np = self.slot_mapping_cpu.numpy() - self.slot_mapping = torch.zeros(self.max_num_batched_tokens, - dtype=torch.int64, - device=self.device) + self.slot_mapping = self._make_buffer(self.max_num_batched_tokens, + dtype=torch.int64) try: self.dcp_world_size = get_dcp_group().world_size self.dcp_rank = get_dcp_group().rank_in_group @@ -69,7 +56,7 @@ def append_row( num_blocks = len(block_ids) start = self.num_blocks_per_row[row_idx] self.num_blocks_per_row[row_idx] += num_blocks - self.block_table_np[row_idx, start:start + num_blocks] = block_ids + self.block_table.np[row_idx, start:start + num_blocks] = block_ids def add_row(self, block_ids: list[int], row_idx: int) -> None: self.num_blocks_per_row[row_idx] = 0 @@ -77,17 +64,14 @@ def add_row(self, block_ids: list[int], row_idx: int) -> None: def move_row(self, src: int, tgt: int) -> None: num_blocks = self.num_blocks_per_row[src] - self.block_table_np[tgt, :num_blocks] = self.block_table_np[ - src, :num_blocks] + block_table_np = self.block_table.np + block_table_np[tgt, :num_blocks] = block_table_np[src, :num_blocks] self.num_blocks_per_row[tgt] = num_blocks def swap_row(self, src: int, tgt: int) -> None: - num_blocks_src = self.num_blocks_per_row[src] - num_blocks_tgt = self.num_blocks_per_row[tgt] - self.num_blocks_per_row[src] = num_blocks_tgt - self.num_blocks_per_row[tgt] = num_blocks_src - - self.block_table_np[[src, tgt]] = self.block_table_np[[tgt, src]] + src_tgt, tgt_src = [src, tgt], [tgt, src] + self.num_blocks_per_row[src_tgt] = self.num_blocks_per_row[tgt_src] + self.block_table.np[src_tgt] = self.block_table.np[tgt_src] def compute_slot_mapping(self, req_indices: np.ndarray, positions: np.ndarray) -> None: @@ -107,7 +91,7 @@ def compute_slot_mapping(self, req_indices: np.ndarray, virtual_block_size = self.block_size * self.dcp_world_size block_table_indices = (req_indices * self.max_num_blocks_per_req + positions // virtual_block_size) - block_numbers = self.block_table_np.ravel()[block_table_indices] + block_numbers = self.block_table.np.ravel()[block_table_indices] # Use virtual_block_size for mask calculation, which marks local # tokens. virtual_block_offsets = positions % virtual_block_size @@ -117,40 +101,45 @@ def compute_slot_mapping(self, req_indices: np.ndarray, # Calculate slot_mapping slot_mapping = block_numbers * self.block_size + block_offsets # Write final slots, use -1 for not-local - self.slot_mapping_np[:req_indices.shape[0]] = np.where( + self.slot_mapping.np[:req_indices.shape[0]] = np.where( mask, slot_mapping, -1) else: block_table_indices = (req_indices * self.max_num_blocks_per_req + positions // self.block_size) - block_numbers = self.block_table_np.ravel()[block_table_indices] + block_numbers = self.block_table.np.ravel()[block_table_indices] block_offsets = positions % self.block_size np.add(block_numbers * self.block_size, block_offsets, - out=self.slot_mapping_np[:req_indices.shape[0]]) + out=self.slot_mapping.np[:req_indices.shape[0]]) def commit_block_table(self, num_reqs: int) -> None: - self.block_table[:num_reqs].copy_(self.block_table_cpu[:num_reqs], - non_blocking=True) + self.block_table.copy_to_gpu(num_reqs) def commit_slot_mapping(self, num_tokens: int) -> None: - self.slot_mapping[:num_tokens].copy_( - self.slot_mapping_cpu[:num_tokens], non_blocking=True) + self.slot_mapping.copy_to_gpu(num_tokens) def clear(self) -> None: - self.block_table.fill_(0) - self.block_table_cpu.fill_(0) + self.block_table.gpu.fill_(0) + self.block_table.cpu.fill_(0) - def get_device_tensor(self) -> torch.Tensor: + def get_device_tensor(self, num_reqs: int) -> torch.Tensor: """Returns the device tensor of the block table.""" - return self.block_table + return self.block_table.gpu[:num_reqs] def get_cpu_tensor(self) -> torch.Tensor: """Returns the CPU tensor of the block table.""" - return self.block_table_cpu + return self.block_table.cpu def get_numpy_array(self) -> np.ndarray: """Returns the numpy array of the block table.""" - return self.block_table_np + return self.block_table.np + + def _make_buffer(self, *size: Union[int, torch.SymInt], + dtype: torch.dtype) -> CpuGpuBuffer: + return CpuGpuBuffer(*size, + dtype=dtype, + device=self.device, + pin_memory=self.pin_memory) class MultiGroupBlockTable: diff --git a/vllm/v1/worker/cpu_model_runner.py b/vllm/v1/worker/cpu_model_runner.py index 619ed88ab5b2..ccdbeac64bce 100644 --- a/vllm/v1/worker/cpu_model_runner.py +++ b/vllm/v1/worker/cpu_model_runner.py @@ -89,7 +89,7 @@ def replace_tensor(obj: Any, cpu_attr_name: str, assert isinstance(device_tensor, torch.Tensor) setattr(obj, device_attr_name, cpu_tensor) - for k, v in vars(self).items(): + for v in vars(self).values(): if isinstance(v, CpuGpuBuffer): v.gpu = v.cpu @@ -98,9 +98,9 @@ def replace_tensor(obj: Any, cpu_attr_name: str, replace_tensor(self.input_batch, k, k[:-11]) for block_table in self.input_batch.block_table.block_tables: - for k, v in vars(block_table).items(): - if k.endswith("_cpu") and isinstance(v, torch.Tensor): - replace_tensor(block_table, k, k[:-4]) + for v in vars(block_table).values(): + if isinstance(v, CpuGpuBuffer): + v.gpu = v.cpu def load_model(self, eep_scale_up: bool = False) -> None: logger.info("Starting to load model %s...", self.model_config.model) diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index 2ae748dee43c..e23115e177e6 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -427,9 +427,6 @@ def _make_buffer(self, *size: Union[int, torch.SymInt], dtype: torch.dtype, numpy: bool = True) -> CpuGpuBuffer: - # Bfloat16 torch tensors cannot be directly cast to a numpy array, so - # if a bfloat16 buffer is needed without a corresponding numpy array, - # don't bother instantiating the numpy array. return CpuGpuBuffer(*size, dtype=dtype, device=self.device, @@ -1062,13 +1059,14 @@ def _prepare_inputs( num_common_prefix_blocks = 0 else: blk_table = self.input_batch.block_table[kv_cache_group_id] - blk_table_tensor = blk_table.get_device_tensor()[:num_reqs] - slot_mapping = blk_table.slot_mapping[: - total_num_scheduled_tokens] + blk_table_tensor = blk_table.get_device_tensor(num_reqs) + slot_mapping = blk_table.slot_mapping.gpu[: + total_num_scheduled_tokens] # Fill unused with -1. Needed for reshape_and_cache in full cuda # graph mode. - blk_table.slot_mapping[total_num_scheduled_tokens:].fill_(-1) + blk_table.slot_mapping.gpu[total_num_scheduled_tokens:].fill_( + -1) num_common_prefix_blocks = ( scheduler_output. num_common_prefix_blocks[kv_cache_group_id]) @@ -2903,10 +2901,10 @@ def _dummy_run( num_actual_tokens=num_tokens, max_query_len=max_query_len, max_seq_len=self.max_model_len, - block_table_tensor=self.input_batch.block_table[ - kv_cache_group_id].get_device_tensor()[:num_reqs], - slot_mapping=self.input_batch. - block_table[kv_cache_group_id].slot_mapping[:num_tokens], + block_table_tensor=self.input_batch. + block_table[kv_cache_group_id].get_device_tensor(num_reqs), + slot_mapping=self.input_batch.block_table[ + kv_cache_group_id].slot_mapping.gpu[:num_tokens], causal=True) for attn_group in self.attn_groups[kv_cache_group_id]: if ubatch_slices is not None: @@ -3265,8 +3263,8 @@ def freeze_gc(): cudagraph_runtime_mode=cudagraph_runtime_mode, uniform_decode=False) - # Capture full cudagraph for uniform decode batches if we have - # dont already have full mixed prefill-decode cudagraphs + # Capture full cudagraph for uniform decode batches if we + # don't already have full mixed prefill-decode cudagraphs. if cudagraph_mode.decode_mode() == CUDAGraphMode.FULL and \ cudagraph_mode.separate_routine(): max_num_tokens = self.scheduler_config.max_num_seqs * \ From 5a411ef6c446f8fb08311a385e16e13bacf44bc5 Mon Sep 17 00:00:00 2001 From: Isotr0py Date: Wed, 17 Sep 2025 11:29:43 +0800 Subject: [PATCH 020/518] [Benchmarks] Add MMVU video dataset support and clean up deprecated datasets (#24719) Signed-off-by: Isotr0py --- benchmarks/benchmark_dataset.py | 1288 ------------------------------- docs/contributing/benchmarks.md | 1 + vllm/benchmarks/datasets.py | 66 +- 3 files changed, 65 insertions(+), 1290 deletions(-) delete mode 100644 benchmarks/benchmark_dataset.py diff --git a/benchmarks/benchmark_dataset.py b/benchmarks/benchmark_dataset.py deleted file mode 100644 index 64ffa62c04d8..000000000000 --- a/benchmarks/benchmark_dataset.py +++ /dev/null @@ -1,1288 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project -""" -This module defines a framework for sampling benchmark requests from various -datasets. Each dataset subclass of BenchmarkDataset must implement sample -generation. Supported dataset types include: - - ShareGPT - - Random (synthetic) - - Sonnet - - BurstGPT - - HuggingFace - - VisionArena -""" - -import base64 -import io -import json -import logging -import random -from abc import ABC, abstractmethod -from collections.abc import Mapping -from copy import deepcopy -from dataclasses import dataclass -from functools import cache -from io import BytesIO -from typing import Any, Callable, Optional, Union - -import numpy as np -import pandas as pd -from datasets import load_dataset -from PIL import Image -from transformers import PreTrainedTokenizerBase - -from vllm.lora.request import LoRARequest -from vllm.lora.utils import get_adapter_absolute_path -from vllm.multimodal import MultiModalDataDict -from vllm.multimodal.image import convert_image_mode -from vllm.transformers_utils.tokenizer import AnyTokenizer, get_lora_tokenizer - -logger = logging.getLogger(__name__) - -# ----------------------------------------------------------------------------- -# Data Classes -# ----------------------------------------------------------------------------- - - -@dataclass -class SampleRequest: - """ - Represents a single inference request for benchmarking. - """ - - prompt: Union[str, Any] - prompt_len: int - expected_output_len: int - multi_modal_data: Optional[Union[MultiModalDataDict, dict, list[dict]]] = None - lora_request: Optional[LoRARequest] = None - request_id: Optional[str] = None - - -# ----------------------------------------------------------------------------- -# Benchmark Dataset Base Class -# ----------------------------------------------------------------------------- - - -class BenchmarkDataset(ABC): - DEFAULT_SEED = 0 - IS_MULTIMODAL = False - - def __init__( - self, - dataset_path: Optional[str] = None, - random_seed: int = DEFAULT_SEED, - ) -> None: - """ - Initialize the BenchmarkDataset with an optional dataset path and random - seed. Args: - dataset_path (Optional[str]): Path to the dataset. If None, it - indicates that a default or random dataset might be used. - random_seed (int): Seed value for reproducible shuffling or - sampling. Defaults to DEFAULT_SEED. - """ - self.dataset_path = dataset_path - # Set the random seed, ensuring that a None value is replaced with the - # default seed. - self.random_seed = random_seed if random_seed is not None else self.DEFAULT_SEED - self.data = None - - def apply_multimodal_chat_transformation( - self, prompt: str, mm_content: Optional[MultiModalDataDict] = None - ) -> list[dict]: - """ - Transform a prompt and optional multimodal content into a chat format. - This method is used for chat models that expect a specific conversation - format. - """ - content = [{"text": prompt, "type": "text"}] - if mm_content is not None: - content.append(mm_content) - return [{"role": "user", "content": content}] - - def load_data(self) -> None: - """ - Load data from the dataset path into self.data. - - This method must be overridden by subclasses since the method to load - data will vary depending on the dataset format and source. - - Raises: - NotImplementedError: If a subclass does not implement this method. - """ - # TODO (jenniferzhao): add support for downloading data - raise NotImplementedError("load_data must be implemented in subclasses.") - - def get_random_lora_request( - self, - tokenizer: PreTrainedTokenizerBase, - max_loras: Optional[int] = None, - lora_path: Optional[str] = None, - ) -> tuple[Optional[LoRARequest], AnyTokenizer]: - """ - Optionally select a random LoRA request and return its associated - tokenizer. - - This method is used when LoRA parameters are provided. It randomly - selects a LoRA based on max_loras and retrieves a cached tokenizer for - that LoRA if available. Otherwise, it returns the base tokenizer. - - Args: - tokenizer (PreTrainedTokenizerBase): The base tokenizer to use if no - LoRA is selected. max_loras (Optional[int]): The maximum number of - LoRAs available. If None, LoRA is not used. lora_path - (Optional[str]): Path to the LoRA parameters on disk. If None, LoRA - is not used. - - Returns: - tuple[Optional[LoRARequest], AnyTokenizer]: A tuple where the first - element is a LoRARequest (or None if not applicable) and the second - element is the tokenizer associated with the LoRA request (or the - base tokenizer). - """ - if max_loras is None or lora_path is None: - return None, tokenizer - - # Generate a random LoRA ID in the range [1, max_loras]. - lora_id = random.randint(1, max_loras) - lora_request = LoRARequest( - lora_name=str(lora_id), - lora_int_id=lora_id, - lora_path=lora_path_on_disk(lora_path), - ) - if lora_id not in lora_tokenizer_cache: - lora_tokenizer_cache[lora_id] = get_lora_tokenizer(lora_request) - # Return lora_request and the cached tokenizer if available; otherwise, - # return the base tokenizer - return lora_request, lora_tokenizer_cache[lora_id] or tokenizer - - @abstractmethod - def sample( - self, - tokenizer: PreTrainedTokenizerBase, - num_requests: int, - request_id_prefix: str = "", - ) -> list[SampleRequest]: - """ - Abstract method to generate sample requests from the dataset. - - Subclasses must override this method to implement dataset-specific logic - for generating a list of SampleRequest objects. - - Args: - tokenizer (PreTrainedTokenizerBase): The tokenizer to be used - for processing the dataset's text. - num_requests (int): The number of sample requests to generate. - request_id_prefix (str) The prefix of request_id. - - Returns: - list[SampleRequest]: A list of sample requests generated from the - dataset. - """ - raise NotImplementedError("sample must be implemented in subclasses.") - - def maybe_oversample_requests( - self, - requests: list[SampleRequest], - num_requests: int, - request_id_prefix: str = "", - ) -> None: - """ - Oversamples the list of requests if its size is less than the desired - number. - - Args: - requests (List[SampleRequest]): The current list of sampled - requests. - num_requests (int): The target number of requests. - request_id_prefix (str) The prefix of the request ids. - """ - if len(requests) < num_requests: - random.seed(self.random_seed) - additional = deepcopy( - random.choices(requests, k=num_requests - len(requests)) - ) - for i in range(len(additional)): - req = additional[i] - req.request_id = request_id_prefix + str(len(requests) + i) - requests.extend(additional) - logger.info("Oversampled requests to reach %d total samples.", num_requests) - - -# ----------------------------------------------------------------------------- -# Utility Functions and Global Caches -# ----------------------------------------------------------------------------- - - -def is_valid_sequence( - prompt_len: int, - output_len: int, - min_len: int = 4, - max_prompt_len: int = 1024, - max_total_len: int = 2048, - skip_min_output_len_check: bool = False, -) -> bool: - """ - Validate a sequence based on prompt and output lengths. - - Default pruning criteria are copied from the original `sample_hf_requests` - and `sample_sharegpt_requests` functions in benchmark_serving.py, as well as - from `sample_requests` in benchmark_throughput.py. - """ - # Check for invalid conditions - prompt_too_short = prompt_len < min_len - output_too_short = (not skip_min_output_len_check) and (output_len < min_len) - prompt_too_long = prompt_len > max_prompt_len - combined_too_long = (prompt_len + output_len) > max_total_len - - # Return True if none of the invalid conditions are met - return not ( - prompt_too_short or output_too_short or prompt_too_long or combined_too_long - ) - - -@cache -def lora_path_on_disk(lora_path: str) -> str: - return get_adapter_absolute_path(lora_path) - - -# Global cache for LoRA tokenizers. -lora_tokenizer_cache: dict[int, AnyTokenizer] = {} - - -def process_image(image: Any) -> Mapping[str, Any]: - """ - Process a single image input and return a multimedia content dictionary. - - Supports three input types: - - 1. Dictionary with raw image bytes: - Expects a dict with a 'bytes' key - containing raw image data. - Loads the bytes as a PIL.Image.Image. - - 2. PIL.Image.Image input: - Converts the image to RGB. - Saves the image as - a JPEG in memory. - Encodes the JPEG data as a base64 string. - Returns - a dictionary with the image as a base64 data URL. - - 3. String input: - Treats the string as a URL or local file path. - - Prepends "file://" if the string doesn't start with "http://" or - "file://". - Returns a dictionary with the image URL. - - Raises: - ValueError: If the input is not a supported type. - """ - if isinstance(image, dict) and "bytes" in image: - image = Image.open(BytesIO(image["bytes"])) - if isinstance(image, Image.Image): - image = convert_image_mode(image, "RGB") - with io.BytesIO() as image_data: - image.save(image_data, format="JPEG") - image_base64 = base64.b64encode(image_data.getvalue()).decode("utf-8") - return { - "type": "image_url", - "image_url": {"url": f"data:image/jpeg;base64,{image_base64}"}, - } - - if isinstance(image, str): - image_url = ( - image if image.startswith(("http://", "file://")) else f"file://{image}" - ) - return {"type": "image_url", "image_url": {"url": image_url}} - - raise ValueError( - f"Invalid image input {image}. Must be a PIL.Image.Image" - " or str or dictionary with raw image bytes." - ) - - -def process_video(video: Any) -> Mapping[str, Any]: - """ - Process a single video input and return a multimedia content dictionary. - - Supports the following input types: - - 1. Dictionary with raw video bytes: - Expects a dict with a 'bytes' key - containing raw video data. - - 2. String input: - Treats the string as a URL or local file path. - - Prepends "file://" if the string doesn't start with "http://" or - "file://". - Returns a dictionary with the image URL. - - Raises: - ValueError: If the input is not a supported type. - """ - if isinstance(video, dict) and "bytes" in video: - video_bytes = video["bytes"] - video_base64 = base64.b64encode(video_bytes).decode("utf-8") - return { - "type": "video_url", - "video_url": {"url": f"data:video/mp4;base64,{video_base64}"}, - } - - if isinstance(video, str): - video_url = ( - video if video.startswith(("http://", "file://")) else f"file://{video}" - ) - return {"type": "video_url", "video_url": {"url": video_url}} - - raise ValueError( - f"Invalid video input {video}. Must be a string of local path/remote url, or a dictionary with raw video bytes in the form of `{{'bytes': raw_video_bytes}}`." # noqa: E501 - ) - - -# ----------------------------------------------------------------------------- -# Random Dataset Implementation (Synthetic Data) -# ----------------------------------------------------------------------------- - - -class RandomDataset(BenchmarkDataset): - # Default values copied from benchmark_serving.py for the random dataset. - DEFAULT_PREFIX_LEN = 0 - DEFAULT_RANGE_RATIO = 0.0 - DEFAULT_INPUT_LEN = 1024 - DEFAULT_OUTPUT_LEN = 128 - - def __init__( - self, - **kwargs, - ) -> None: - super().__init__(**kwargs) - - def sample( - self, - tokenizer: PreTrainedTokenizerBase, - num_requests: int, - prefix_len: int = DEFAULT_PREFIX_LEN, - range_ratio: float = DEFAULT_RANGE_RATIO, - input_len: int = DEFAULT_INPUT_LEN, - output_len: int = DEFAULT_OUTPUT_LEN, - request_id_prefix: str = "", - **kwargs, - ) -> list[SampleRequest]: - # Enforce range_ratio < 1 - assert range_ratio < 1.0, ( - "random_range_ratio must be < 1.0 to ensure a valid sampling range" - ) - - vocab_size = tokenizer.vocab_size - num_special_tokens = tokenizer.num_special_tokens_to_add() - real_input_len = input_len - num_special_tokens - - prefix_token_ids = ( - np.random.randint(0, vocab_size, size=prefix_len).tolist() - if prefix_len > 0 - else [] - ) - - # New sampling logic: [X * (1 - b), X * (1 + b)] - input_low = int(real_input_len * (1 - range_ratio)) - input_high = int(real_input_len * (1 + range_ratio)) - output_low = int(output_len * (1 - range_ratio)) - # Ensure the lower bound for output length is at least 1 to prevent - # sampling 0 tokens, which can cause request failures. - output_low = max(output_low, 1) - output_high = int(output_len * (1 + range_ratio)) - - # Add logging for debugging - logger.info("Sampling input_len from [%s, %s]", input_low, input_high) - logger.info("Sampling output_len from [%s, %s]", output_low, output_high) - - input_lens = np.random.randint(input_low, input_high + 1, size=num_requests) - output_lens = np.random.randint(output_low, output_high + 1, size=num_requests) - offsets = np.random.randint(0, vocab_size, size=num_requests) - - requests = [] - for i in range(num_requests): - inner_seq = ( - (offsets[i] + i + np.arange(input_lens[i])) % vocab_size - ).tolist() - token_sequence = prefix_token_ids + inner_seq - prompt = tokenizer.decode(token_sequence) - # After decoding the prompt we have to encode and decode it again. - # This is done because in some cases N consecutive tokens - # give a string tokenized into != N number of tokens. - # For example for GPT2Tokenizer: - # [6880, 6881] -> ['Ġcalls', 'here'] -> - # [1650, 939, 486] -> ['Ġcall', 'sh', 'ere'] - # To avoid uncontrolled change of the prompt length, - # the encoded sequence is truncated before being decoded again. - total_input_len = prefix_len + int(input_lens[i]) - re_encoded_sequence = tokenizer.encode(prompt, add_special_tokens=False)[ - :total_input_len - ] - prompt = tokenizer.decode(re_encoded_sequence) - total_input_len = len(re_encoded_sequence) - requests.append( - SampleRequest( - prompt=prompt, - prompt_len=total_input_len, - expected_output_len=int(output_lens[i]), - request_id=request_id_prefix + str(i), - ) - ) - - return requests - - -# ----------------------------------------------------------------------------- -# ShareGPT Dataset Implementation -# ----------------------------------------------------------------------------- - - -class ShareGPTDataset(BenchmarkDataset): - """ - Implements the ShareGPT dataset. Loads data from a JSON file and generates - sample requests based on conversation turns. - """ - - def __init__(self, **kwargs) -> None: - super().__init__(**kwargs) - self.load_data() - - def load_data(self) -> None: - if self.dataset_path is None: - raise ValueError("dataset_path must be provided for loading data.") - - with open(self.dataset_path, encoding="utf-8") as f: - self.data = json.load(f) - # Filter entries with at least two conversation turns. - self.data = [ - entry - for entry in self.data - if "conversations" in entry and len(entry["conversations"]) >= 2 - ] - random.seed(self.random_seed) - random.shuffle(self.data) - - def sample( - self, - tokenizer: PreTrainedTokenizerBase, - num_requests: int, - lora_path: Optional[str] = None, - max_loras: Optional[int] = None, - output_len: Optional[int] = None, - enable_multimodal_chat: bool = False, - request_id_prefix: str = "", - **kwargs, - ) -> list: - samples: list = [] - ind = 0 - for entry in self.data: - if len(samples) >= num_requests: - break - prompt, completion = ( - entry["conversations"][0]["value"], - entry["conversations"][1]["value"], - ) - - lora_request, tokenizer = self.get_random_lora_request( - tokenizer=tokenizer, max_loras=max_loras, lora_path=lora_path - ) - prompt_ids = tokenizer(prompt).input_ids - completion_ids = tokenizer(completion).input_ids - prompt_len = len(prompt_ids) - new_output_len = len(completion_ids) if output_len is None else output_len - if not is_valid_sequence( - prompt_len, - new_output_len, - skip_min_output_len_check=output_len is not None, - ): - continue - if image_path := entry.get("image"): - mm_content = process_image(image_path) - elif video_path := entry.get("video"): - mm_content = process_video(video_path) - else: - mm_content = None - if enable_multimodal_chat: - prompt = self.apply_multimodal_chat_transformation(prompt, mm_content) - samples.append( - SampleRequest( - prompt=prompt, - prompt_len=prompt_len, - expected_output_len=new_output_len, - lora_request=lora_request, - multi_modal_data=mm_content, - request_id=request_id_prefix + str(ind), - ) - ) - ind += 1 - self.maybe_oversample_requests(samples, num_requests, request_id_prefix) - return samples - - -# ----------------------------------------------------------------------------- -# Custom Dataset Implementation -# ----------------------------------------------------------------------------- - - -class CustomDataset(BenchmarkDataset): - """ - Implements the Custom dataset. Loads data from a JSONL file and generates - sample requests based on conversation turns. E.g., - ``` - {"prompt": "What is the capital of India?"} - {"prompt": "What is the capital of Iran?"} - {"prompt": "What is the capital of China?"} - ``` - """ - - def __init__(self, **kwargs) -> None: - super().__init__(**kwargs) - self.load_data() - - def load_data(self) -> None: - if self.dataset_path is None: - raise ValueError("dataset_path must be provided for loading data.") - - # self.data will be a list of dictionaries - # e.g., [{"prompt": "What is the capital of India?"}, ...] - # This will be the standardized format which load_data() - # has to convert into depending on the filetype of dataset_path. - # sample() will assume this standardized format of self.data - self.data = [] - - # Load the JSONL file - if self.dataset_path.endswith(".jsonl"): - jsonl_data = pd.read_json(path_or_buf=self.dataset_path, lines=True) - - # check if the JSONL file has a 'prompt' column - if "prompt" not in jsonl_data.columns: - raise ValueError("JSONL file must contain a 'prompt' column.") - - # Convert each row to a dictionary and append to self.data - # This will convert the DataFrame to a list of dictionaries - # where each dictionary corresponds to a row in the DataFrame. - # This is the standardized format we want for self.data - for _, row in jsonl_data.iterrows(): - self.data.append(row.to_dict()) - else: - raise NotImplementedError( - "Only JSONL format is supported for CustomDataset." - ) - - random.seed(self.random_seed) - random.shuffle(self.data) - - def sample( - self, - tokenizer: PreTrainedTokenizerBase, - num_requests: int, - lora_path: Optional[str] = None, - max_loras: Optional[int] = None, - output_len: Optional[int] = None, - enable_multimodal_chat: bool = False, - skip_chat_template: bool = False, - request_id_prefix: str = "", - **kwargs, - ) -> list: - sampled_requests = [] - for i, item in enumerate(self.data): - if len(sampled_requests) >= num_requests: - break - prompt = item["prompt"] - - # apply template - if not skip_chat_template: - prompt = tokenizer.apply_chat_template( - [{"role": "user", "content": prompt}], - add_generation_prompt=True, - tokenize=False, - ) - - prompt_len = len(tokenizer(prompt).input_ids) - sampled_requests.append( - SampleRequest( - prompt=prompt, - prompt_len=prompt_len, - expected_output_len=output_len, - request_id=request_id_prefix + str(i), - ) - ) - self.maybe_oversample_requests( - sampled_requests, num_requests, request_id_prefix - ) - - return sampled_requests - - -# ----------------------------------------------------------------------------- -# Sonnet Dataset Implementation -# ----------------------------------------------------------------------------- - - -class SonnetDataset(BenchmarkDataset): - """ - Simplified implementation of the Sonnet dataset. Loads poem lines from a - text file and generates sample requests. Default values here copied from - `benchmark_serving.py` for the sonnet dataset. - """ - - DEFAULT_PREFIX_LEN = 200 - DEFAULT_INPUT_LEN = 550 - DEFAULT_OUTPUT_LEN = 150 - - def __init__( - self, - **kwargs, - ) -> None: - super().__init__(**kwargs) - self.load_data() - - def load_data(self) -> None: - if not self.dataset_path: - raise ValueError("dataset_path must be provided.") - with open(self.dataset_path, encoding="utf-8") as f: - self.data = f.readlines() - - def sample( - self, - tokenizer, - num_requests: int, - prefix_len: int = DEFAULT_PREFIX_LEN, - input_len: int = DEFAULT_INPUT_LEN, - output_len: int = DEFAULT_OUTPUT_LEN, - return_prompt_formatted: bool = False, - request_id_prefix: str = "", - **kwargs, - ) -> list: - # Calculate average token length for a poem line. - tokenized_lines = [tokenizer(line).input_ids for line in self.data] - avg_len = sum(len(tokens) for tokens in tokenized_lines) / len(tokenized_lines) - - # Build the base prompt. - base_prompt = "Pick as many lines as you can from these poem lines:\n" - base_msg = [{"role": "user", "content": base_prompt}] - base_fmt = tokenizer.apply_chat_template( - base_msg, add_generation_prompt=True, tokenize=False - ) - base_offset = len(tokenizer(base_fmt).input_ids) - if input_len <= base_offset: - raise ValueError( - f"'input_len' must be higher than the base prompt length " - f"({base_offset})." - ) - - # Determine how many poem lines to use. - num_input_lines = round((input_len - base_offset) / avg_len) - num_prefix_lines = max(round((prefix_len - base_offset) / avg_len), 0) - prefix_lines = self.data[:num_prefix_lines] - - samples = [] - ind = 0 - while len(samples) < num_requests: - extra_lines = random.choices( - self.data, k=num_input_lines - num_prefix_lines - ) - prompt = f"{base_prompt}{''.join(prefix_lines + extra_lines)}" - msg = [{"role": "user", "content": prompt}] - prompt_formatted = tokenizer.apply_chat_template( - msg, add_generation_prompt=True, tokenize=False - ) - prompt_len = len(tokenizer(prompt_formatted).input_ids) - - if prompt_len <= input_len: - samples.append( - SampleRequest( - prompt=prompt_formatted if return_prompt_formatted else prompt, - prompt_len=prompt_len, - expected_output_len=output_len, - request_id=request_id_prefix + str(ind), - ) - ) - ind += 1 - return samples - - -# ----------------------------------------------------------------------------- -# BurstGPT Dataset Implementation -# ----------------------------------------------------------------------------- - - -class BurstGPTDataset(BenchmarkDataset): - """ - Implements the BurstGPT dataset. Loads data from a CSV file and generates - sample requests based on synthetic prompt generation. Only rows with Model - "GPT-4" and positive response tokens are used. - """ - - def __init__(self, **kwargs) -> None: - super().__init__(**kwargs) - self.load_data() - - def load_data( - self, - ): - if self.dataset_path is None: - raise ValueError("dataset_path must be provided for loading data.") - - df = pd.read_csv(self.dataset_path) - # Filter to keep only GPT-4 rows. - gpt4_df = df[df["Model"] == "GPT-4"] - # Remove failed requests (where Response tokens is 0 or less). - gpt4_df = gpt4_df[gpt4_df["Response tokens"] > 0] - # Sample the desired number of rows. - self.data = gpt4_df - - def _sample_loaded_data(self, num_requests: int) -> list: - if num_requests <= len(self.data): - data = self.data.sample(n=num_requests, random_state=self.random_seed) - else: - data = self.data.sample( - n=num_requests, - random_state=self.random_seed, - replace=True, - ) - # Convert the dataframe to a list of lists. - return data.values.tolist() - - def sample( - self, - tokenizer: PreTrainedTokenizerBase, - num_requests: int, - max_loras: Optional[int] = None, - lora_path: Optional[str] = None, - request_id_prefix: str = "", - **kwargs, - ) -> list[SampleRequest]: - samples = [] - data = self._sample_loaded_data(num_requests=num_requests) - for i in range(num_requests): - input_len = int(data[i][2]) - output_len = int(data[i][3]) - lora_req, tokenizer = self.get_random_lora_request( - tokenizer=tokenizer, max_loras=max_loras, lora_path=lora_path - ) - vocab_size = tokenizer.vocab_size - # Generate a synthetic prompt: a list of token IDs computed as (i + - # j) modulo vocab_size. - token_ids = [(i + j) % vocab_size for j in range(input_len)] - prompt = tokenizer.decode(token_ids) - samples.append( - SampleRequest( - prompt=prompt, - prompt_len=input_len, - expected_output_len=output_len, - lora_request=lora_req, - request_id=request_id_prefix + str(i), - ) - ) - return samples - - -# ----------------------------------------------------------------------------- -# HuggingFace Dataset Base Implementation -# ----------------------------------------------------------------------------- -class HuggingFaceDataset(BenchmarkDataset): - """Base class for datasets hosted on HuggingFace.""" - - SUPPORTED_DATASET_PATHS: Union[set[str], dict[str, Callable]] = set() - - def __init__( - self, - dataset_path: str, - dataset_split: str, - no_stream: bool = False, - dataset_subset: Optional[str] = None, - **kwargs, - ) -> None: - super().__init__(dataset_path=dataset_path, **kwargs) - - self.dataset_split = dataset_split - self.dataset_subset = dataset_subset - self.load_stream = not no_stream - self.load_data() - - def load_data(self) -> None: - """Load data from HuggingFace datasets.""" - self.data = load_dataset( - self.dataset_path, - name=self.dataset_subset, - split=self.dataset_split, - streaming=self.load_stream, - ) - self.data = self.data.shuffle(seed=self.random_seed) - - -# ----------------------------------------------------------------------------- -# Conversation Dataset Implementation -# ----------------------------------------------------------------------------- - - -class ConversationDataset(HuggingFaceDataset): - """Dataset for conversation data with multimodal support.""" - - SUPPORTED_DATASET_PATHS = { - "lmms-lab/LLaVA-OneVision-Data", - "Aeala/ShareGPT_Vicuna_unfiltered", - } - IS_MULTIMODAL = True - - def sample( - self, - tokenizer: PreTrainedTokenizerBase, - num_requests: int, - output_len: Optional[int] = None, - enable_multimodal_chat: bool = False, - request_id_prefix: str = "", - **kwargs, - ) -> list: - # Filter examples with at least 2 conversations - filtered_data = self.data.filter(lambda x: len(x["conversations"]) >= 2) - sampled_requests = [] - dynamic_output = output_len is None - ind = 0 - - for item in filtered_data: - if len(sampled_requests) >= num_requests: - break - conv = item["conversations"] - prompt, completion = conv[0]["value"], conv[1]["value"] - - prompt_ids = tokenizer(prompt).input_ids - completion_ids = tokenizer(completion).input_ids - prompt_len = len(prompt_ids) - completion_len = len(completion_ids) - output_len = completion_len if dynamic_output else output_len - assert isinstance(output_len, int) and output_len > 0 - if dynamic_output and not is_valid_sequence(prompt_len, completion_len): - continue - mm_content = process_image(item["image"]) if "image" in item else None - if enable_multimodal_chat: - # Note: when chat is enabled the request prompt_len is no longer - # accurate and we will be using request output to count the - # actual prompt len and output len - prompt = self.apply_multimodal_chat_transformation(prompt, mm_content) - sampled_requests.append( - SampleRequest( - prompt=prompt, - prompt_len=prompt_len, - expected_output_len=output_len, - multi_modal_data=mm_content, - request_id=request_id_prefix + str(ind), - ) - ) - ind += 1 - self.maybe_oversample_requests( - sampled_requests, num_requests, request_id_prefix - ) - return sampled_requests - - -# ----------------------------------------------------------------------------- -# Vision Arena Dataset Implementation -# ----------------------------------------------------------------------------- - - -class VisionArenaDataset(HuggingFaceDataset): - """ - Vision Arena Dataset. - """ - - DEFAULT_OUTPUT_LEN = 128 - SUPPORTED_DATASET_PATHS = { - "lmarena-ai/VisionArena-Chat": lambda x: x["conversation"][0][0]["content"], - "lmarena-ai/vision-arena-bench-v0.1": lambda x: x["turns"][0][0]["content"], - } - IS_MULTIMODAL = True - - def sample( - self, - tokenizer: PreTrainedTokenizerBase, - num_requests: int, - output_len: Optional[int] = None, - enable_multimodal_chat: bool = False, - request_id_prefix: str = "", - **kwargs, - ) -> list: - output_len = output_len if output_len is not None else self.DEFAULT_OUTPUT_LEN - sampled_requests = [] - for i, item in enumerate(self.data): - if len(sampled_requests) >= num_requests: - break - parser_fn = self.SUPPORTED_DATASET_PATHS.get(self.dataset_path) - if parser_fn is None: - raise ValueError(f"Unsupported dataset path: {self.dataset_path}") - prompt = parser_fn(item) - mm_content = process_image(item["images"][0]) - prompt_len = len(tokenizer(prompt).input_ids) - if enable_multimodal_chat: - # Note: when chat is enabled the request prompt_len is no longer - # accurate and we will be using request output to count the - # actual prompt len - prompt = self.apply_multimodal_chat_transformation(prompt, mm_content) - sampled_requests.append( - SampleRequest( - prompt=prompt, - prompt_len=prompt_len, - expected_output_len=output_len, - multi_modal_data=mm_content, - request_id=request_id_prefix + str(i), - ) - ) - self.maybe_oversample_requests( - sampled_requests, num_requests, request_id_prefix - ) - return sampled_requests - - -# ----------------------------------------------------------------------------- -# Instruct Coder Dataset Implementation -# ----------------------------------------------------------------------------- - - -class InstructCoderDataset(HuggingFaceDataset): - """ - InstructCoder Dataset. - https://huggingface.co/datasets/likaixin/InstructCoder - - InstructCoder is the dataset designed for general code editing. It consists - of 114,239 instruction-input-output triplets, and covers multiple distinct - code editing scenario. - """ - - DEFAULT_OUTPUT_LEN = 200 # this is the average default output length - SUPPORTED_DATASET_PATHS = { - "likaixin/InstructCoder", - } - - def sample( - self, - tokenizer: PreTrainedTokenizerBase, - num_requests: int, - output_len: Optional[int] = None, - enable_multimodal_chat: bool = False, - request_id_prefix: str = "", - **kwargs, - ) -> list: - output_len = output_len if output_len is not None else self.DEFAULT_OUTPUT_LEN - sampled_requests = [] - for i, item in enumerate(self.data): - if len(sampled_requests) >= num_requests: - break - prompt = ( - f"{item['input']}\n\n{item['instruction']} Just output " - "the code, do not include any explanation." - ) - - # apply template - prompt = tokenizer.apply_chat_template( - [{"role": "user", "content": prompt}], - add_generation_prompt=True, - tokenize=False, - ) - prompt_len = len(tokenizer(prompt).input_ids) - sampled_requests.append( - SampleRequest( - prompt=prompt, - prompt_len=prompt_len, - expected_output_len=output_len, - request_id=request_id_prefix + str(i), - ) - ) - self.maybe_oversample_requests( - sampled_requests, num_requests, request_id_prefix - ) - return sampled_requests - - -# ----------------------------------------------------------------------------- -# MT-Bench Dataset Implementation -# ----------------------------------------------------------------------------- - - -class MTBenchDataset(HuggingFaceDataset): - """ - MT-Bench Dataset. - https://huggingface.co/datasets/philschmid/mt-bench - - We create a single turn dataset for MT-Bench. - This is similar to Spec decoding benchmark setup in vLLM - https://github.com/vllm-project/vllm/blob/9d98ab5ec/examples/offline_inference/eagle.py#L14-L18 - """ # noqa: E501 - - DEFAULT_OUTPUT_LEN = 256 # avg len used in SD bench in vLLM - SUPPORTED_DATASET_PATHS = { - "philschmid/mt-bench", - } - - def sample( - self, - tokenizer: PreTrainedTokenizerBase, - num_requests: int, - output_len: Optional[int] = None, - enable_multimodal_chat: bool = False, - request_id_prefix: str = "", - **kwargs, - ) -> list: - output_len = output_len if output_len is not None else self.DEFAULT_OUTPUT_LEN - sampled_requests = [] - - for i, item in enumerate(self.data): - if len(sampled_requests) >= num_requests: - break - prompt = item["turns"][0] - - # apply template - prompt = tokenizer.apply_chat_template( - [{"role": "user", "content": prompt}], - add_generation_prompt=True, - tokenize=False, - ) - - prompt_len = len(tokenizer(prompt).input_ids) - sampled_requests.append( - SampleRequest( - prompt=prompt, - prompt_len=prompt_len, - expected_output_len=output_len, - request_id=request_id_prefix + str(i), - ) - ) - self.maybe_oversample_requests( - sampled_requests, num_requests, request_id_prefix - ) - return sampled_requests - - -# ----------------------------------------------------------------------------- -# AIMO Dataset Implementation -# ----------------------------------------------------------------------------- - - -class AIMODataset(HuggingFaceDataset): - """ - Dataset class for processing a AIMO dataset with reasoning questions. - """ - - SUPPORTED_DATASET_PATHS = { - "AI-MO/aimo-validation-aime", - "AI-MO/NuminaMath-1.5", - "AI-MO/NuminaMath-CoT", - } - - def sample( - self, - tokenizer: PreTrainedTokenizerBase, - num_requests: int, - output_len: Optional[int] = None, - request_id_prefix: str = "", - **kwargs, - ) -> list: - sampled_requests = [] - dynamic_output = output_len is None - ind = 0 - - for item in self.data: - if len(sampled_requests) >= num_requests: - break - prompt, completion = item["problem"], item["solution"] - - prompt_ids = tokenizer(prompt).input_ids - completion_ids = tokenizer(completion).input_ids - prompt_len = len(prompt_ids) - completion_len = len(completion_ids) - output_len = completion_len if dynamic_output else output_len - assert isinstance(output_len, int) and output_len > 0 - if dynamic_output and not is_valid_sequence( - prompt_len, completion_len, max_prompt_len=2048, max_total_len=32000 - ): - continue - sampled_requests.append( - SampleRequest( - prompt=prompt, - prompt_len=prompt_len, - expected_output_len=output_len, - multi_modal_data=None, - request_id=request_id_prefix + str(ind), - ) - ) - ind += 1 - self.maybe_oversample_requests( - sampled_requests, num_requests, request_id_prefix - ) - return sampled_requests - - -# ----------------------------------------------------------------------------- -# Next Edit Prediction Dataset Implementation -# ----------------------------------------------------------------------------- - - -zeta_prompt = """### Instruction: -You are a code completion assistant and your task is to analyze user edits and then rewrite an excerpt that the user provides, suggesting the appropriate edits within the excerpt, taking into account the cursor location. - -### User Edits: - -{} - -### User Excerpt: - -{} - -### Response: - -""" # noqa: E501 - - -def _format_zeta_prompt( - sample: dict, original_start_marker: str = "<|editable_region_start|>" -) -> dict: - """Format the zeta prompt for the Next Edit Prediction (NEP) dataset. - - This function formats examples from the NEP dataset - into prompts and expected outputs. It could be - further extended to support more NEP datasets. - - Args: - sample: The dataset sample containing events, - inputs, and outputs. - original_start_marker: The marker indicating the - start of the editable region. Defaults to - "<|editable_region_start|>". - - Returns: - A dictionary with the formatted prompts and expected outputs. - """ - events = sample["events"] - input = sample["input"] - output = sample["output"] - prompt = zeta_prompt.format(events, input) - - # following the original implementation, extract the focused region - # from the raw output - output_start_index = output.find(original_start_marker) - output_focused_region = output[output_start_index:] - expected_output = output_focused_region - - return {"prompt": prompt, "expected_output": expected_output} - - -class NextEditPredictionDataset(HuggingFaceDataset): - """ - Dataset class for processing a Next Edit Prediction dataset. - """ - - SUPPORTED_DATASET_PATHS = { - "zed-industries/zeta", - } - MAPPING_PROMPT_FUNCS = { - "zed-industries/zeta": _format_zeta_prompt, - } - - def sample( - self, - tokenizer: PreTrainedTokenizerBase, - num_requests: int, - request_id_prefix: str = "", - **kwargs, - ): - formatting_prompt_func = self.MAPPING_PROMPT_FUNCS.get(self.dataset_path) - if formatting_prompt_func is None: - raise ValueError(f"Unsupported dataset path: {self.dataset_path}") - samples = [] - for i, sample in enumerate(self.data): - sample = formatting_prompt_func(sample) - samples.append( - SampleRequest( - prompt=sample["prompt"], - prompt_len=len(tokenizer(sample["prompt"]).input_ids), - expected_output_len=len( - tokenizer(sample["expected_output"]).input_ids - ), - request_id=request_id_prefix + str(i), - ) - ) - if len(samples) >= num_requests: - break - self.maybe_oversample_requests(samples, num_requests, request_id_prefix) - return samples - - -# ----------------------------------------------------------------------------- -# ASR Dataset Implementation -# ----------------------------------------------------------------------------- - - -class ASRDataset(HuggingFaceDataset): - """ - Dataset class for processing a ASR dataset for transcription. - Tested on the following set: - - +----------------+----------------------------------------+--------------------------+-----------------------------+ - | Dataset | Domain | Speaking Style | hf-subset | - +----------------+----------------------------------------+--------------------------+-----------------------------+ - | TED-LIUM | TED talks | Oratory | release1, release2, release3| - | | | | release3-speaker-adaptation | - | VoxPopuli | European Parliament | Oratory | en, de, it, fr, ... | - | LibriSpeech | Audiobook | Narrated | "LIUM/tedlium" | - | GigaSpeech | Audiobook, podcast, YouTube | Narrated, spontaneous | xs, s, m, l, xl, dev, test | - | SPGISpeech | Financial meetings | Oratory, spontaneous | S, M, L, dev, test | - | AMI | Meetings | Spontaneous | ihm, sdm | - +----------------+----------------------------------------+--------------------------+-----------------------------+ - - """ # noqa: E501 - - SUPPORTED_DATASET_PATHS = { - "openslr/librispeech_asr", - "facebook/voxpopuli", - "LIUM/tedlium", - "edinburghcstr/ami", - "speechcolab/gigaspeech", - "kensho/spgispeech", - } - - DEFAULT_OUTPUT_LEN = 128 - IS_MULTIMODAL = True - - # TODO Whisper-specific. Abstract interface when more models are supported. - TRANSCRIPTION_PREAMBLE = "<|startoftranscript|><|en|><|transcribe|><|notimestamps|>" - skip_long_audios: bool = True - - def sample( - self, - tokenizer: PreTrainedTokenizerBase, - num_requests: int, - output_len: Optional[int] = None, - request_id_prefix: str = "", - **kwargs, - ) -> list: - import librosa - - output_len = output_len if output_len is not None else self.DEFAULT_OUTPUT_LEN - prompt = ASRDataset.TRANSCRIPTION_PREAMBLE - prompt_len = len(tokenizer(prompt).input_ids) - sampled_requests = [] - skipped = 0 - ind = 0 - for item in self.data: - if len(sampled_requests) >= num_requests: - break - audio = item["audio"] - y, sr = audio["array"], audio["sampling_rate"] - duration_s = librosa.get_duration(y=y, sr=sr) - # Whisper max supported duration - if self.skip_long_audios and duration_s > 30: - skipped += 1 - continue - - mm_content = {"audio": (y, sr)} - sampled_requests.append( - SampleRequest( - prompt=prompt, - prompt_len=prompt_len, - expected_output_len=output_len, - multi_modal_data=mm_content, - request_id=request_id_prefix + str(ind), - ) - ) - ind += 1 - if skipped: - logger.warning( - "%d samples discarded from dataset due to" - " their length being greater than" - " what Whisper supports.", - skipped, - ) - self.maybe_oversample_requests( - sampled_requests, num_requests, request_id_prefix - ) - return sampled_requests diff --git a/docs/contributing/benchmarks.md b/docs/contributing/benchmarks.md index 13582dadb46e..d04b1d1136a1 100644 --- a/docs/contributing/benchmarks.md +++ b/docs/contributing/benchmarks.md @@ -37,6 +37,7 @@ th { | RandomMultiModal (Image/Video) | 🟡 | 🚧 | `synthetic` | | Prefix Repetition | ✅ | ✅ | `synthetic` | | HuggingFace-VisionArena | ✅ | ✅ | `lmarena-ai/VisionArena-Chat` | +| HuggingFace-MMVU | ✅ | ✅ | `yale-nlp/MMVU` | | HuggingFace-InstructCoder | ✅ | ✅ | `likaixin/InstructCoder` | | HuggingFace-AIMO | ✅ | ✅ | `AI-MO/aimo-validation-aime`, `AI-MO/NuminaMath-1.5`, `AI-MO/NuminaMath-CoT` | | HuggingFace-Other | ✅ | ✅ | `lmms-lab/LLaVA-OneVision-Data`, `Aeala/ShareGPT_Vicuna_unfiltered` | diff --git a/vllm/benchmarks/datasets.py b/vllm/benchmarks/datasets.py index 0a297479bcc0..8d11b19066bb 100644 --- a/vllm/benchmarks/datasets.py +++ b/vllm/benchmarks/datasets.py @@ -335,7 +335,7 @@ def process_image(image: Any) -> Mapping[str, Any]: if isinstance(image, str): image_url = (image if image.startswith( - ("http://", "file://")) else f"file://{image}") + ("http://", "https://", "file://")) else f"file://{image}") return {"type": "image_url", "image_url": {"url": image_url}} raise ValueError(f"Invalid image input {image}. Must be a PIL.Image.Image" @@ -370,7 +370,7 @@ def process_video(video: Any) -> Mapping[str, Any]: if isinstance(video, str): video_url = (video if video.startswith( - ("http://", "file://")) else f"file://{video}") + ("http://", "https://", "file://")) else f"file://{video}") return {"type": "video_url", "video_url": {"url": video_url}} raise ValueError( @@ -1405,6 +1405,13 @@ def get_samples(args, tokenizer) -> list[SampleRequest]: dataset_class = VisionArenaDataset args.hf_split = "train" args.hf_subset = None + elif ( + args.dataset_path in MMVUDataset.SUPPORTED_DATASET_PATHS + or args.hf_name in MMVUDataset.SUPPORTED_DATASET_PATHS + ): + dataset_class = MMVUDataset + args.hf_split = "validation" + args.hf_subset = None elif ( args.dataset_path in InstructCoderDataset.SUPPORTED_DATASET_PATHS or args.hf_name in InstructCoderDataset.SUPPORTED_DATASET_PATHS @@ -2053,6 +2060,61 @@ def sample( return sampled_requests +class MMVUDataset(HuggingFaceDataset): + """ + MMVU Dataset. + https://huggingface.co/datasets/yale-nlp/MMVU + """ + + DEFAULT_OUTPUT_LEN = 128 + SUPPORTED_DATASET_PATHS = { + "yale-nlp/MMVU": + lambda x: x["question"] + " " + ( + " ".join(f"{k}.{v}" for k, v in x["choices"].items()) + ), + } + + def sample( + self, + tokenizer: PreTrainedTokenizerBase, + num_requests: int, + output_len: Optional[int] = None, + enable_multimodal_chat: bool = False, + request_id_prefix: str = "", + no_oversample: bool = False, + **kwargs, + ) -> list: + output_len = (output_len + if output_len is not None else self.DEFAULT_OUTPUT_LEN) + sampled_requests = [] + for i, item in enumerate(self.data): + if len(sampled_requests) >= num_requests: + break + parser_fn = self.SUPPORTED_DATASET_PATHS.get(self.hf_name) + if parser_fn is None: + raise ValueError(f"Unsupported dataset path: {self.hf_name}") + prompt = parser_fn(item) + mm_content = process_video(item["video"]) + prompt_len = len(tokenizer(prompt).input_ids) + if enable_multimodal_chat: + # Note: when chat is enabled the request prompt_len is no longer + # accurate and we will be using request output to count the + # actual prompt len + prompt = self.apply_multimodal_chat_transformation( + prompt, mm_content) + sampled_requests.append( + SampleRequest( + prompt=prompt, + prompt_len=prompt_len, + expected_output_len=output_len, + multi_modal_data=mm_content, + request_id=request_id_prefix + str(i), + )) + self.maybe_oversample_requests(sampled_requests, num_requests, + request_id_prefix, no_oversample) + return sampled_requests + + # ----------------------------------------------------------------------------- # Instruct Coder Dataset Implementation # ----------------------------------------------------------------------------- From dd83a157f12c0ba7f1357f7954cf85aff2c3b882 Mon Sep 17 00:00:00 2001 From: Michael Goin Date: Tue, 16 Sep 2025 23:42:23 -0400 Subject: [PATCH 021/518] [UX] Enforce valid choices for envs like VLLM_ATTENTION_BACKEND, etc (#24761) Signed-off-by: mgoin Signed-off-by: Michael Goin --- vllm/envs.py | 101 +++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 77 insertions(+), 24 deletions(-) diff --git a/vllm/envs.py b/vllm/envs.py index d2006979ea81..385d2a7c51f2 100755 --- a/vllm/envs.py +++ b/vllm/envs.py @@ -6,7 +6,7 @@ import os import sys import tempfile -from typing import TYPE_CHECKING, Any, Callable, Optional +from typing import TYPE_CHECKING, Any, Callable, Literal, Optional, Union if TYPE_CHECKING: VLLM_HOST_IP: str = "" @@ -56,11 +56,12 @@ VLLM_ENABLE_FUSED_MOE_ACTIVATION_CHUNKING: bool = True VLLM_USE_RAY_SPMD_WORKER: bool = False VLLM_USE_RAY_COMPILED_DAG: bool = False - VLLM_USE_RAY_COMPILED_DAG_CHANNEL_TYPE: str = "auto" + VLLM_USE_RAY_COMPILED_DAG_CHANNEL_TYPE: Literal["auto", "nccl", + "shm"] = "auto" VLLM_USE_RAY_COMPILED_DAG_OVERLAP_COMM: bool = False VLLM_USE_RAY_WRAPPED_PP_COMM: bool = True VLLM_XLA_USE_SPMD: bool = False - VLLM_WORKER_MULTIPROC_METHOD: str = "fork" + VLLM_WORKER_MULTIPROC_METHOD: Literal["fork", "spawn"] = "fork" VLLM_ASSETS_CACHE: str = os.path.join(VLLM_CACHE_ROOT, "assets") VLLM_IMAGE_FETCH_TIMEOUT: int = 5 VLLM_VIDEO_FETCH_TIMEOUT: int = 30 @@ -77,7 +78,8 @@ VLLM_DOCKER_BUILD_CONTEXT: bool = False VLLM_TEST_USE_PRECOMPILED_NIGHTLY_WHEEL: bool = False VLLM_KEEP_ALIVE_ON_ENGINE_DEATH: bool = False - CMAKE_BUILD_TYPE: Optional[str] = None + CMAKE_BUILD_TYPE: Optional[Literal["Debug", "Release", + "RelWithDebInfo"]] = None VERBOSE: bool = False VLLM_ALLOW_LONG_MAX_MODEL_LEN: bool = False VLLM_RPC_TIMEOUT: int = 10000 # ms @@ -140,22 +142,25 @@ VLLM_USE_FUSED_MOE_GROUPED_TOPK: bool = True VLLM_USE_FLASHINFER_MOE_FP8: bool = False VLLM_USE_FLASHINFER_MOE_FP4: bool = False - VLLM_FLASHINFER_MOE_BACKEND: str = "throughput" + VLLM_FLASHINFER_MOE_BACKEND: Literal["throughput", + "latency"] = "throughput" VLLM_XGRAMMAR_CACHE_MB: int = 0 VLLM_MSGPACK_ZERO_COPY_THRESHOLD: int = 256 VLLM_ALLOW_INSECURE_SERIALIZATION: bool = False VLLM_NIXL_SIDE_CHANNEL_HOST: str = "localhost" VLLM_NIXL_SIDE_CHANNEL_PORT: int = 5557 - VLLM_ALL2ALL_BACKEND: str = "naive" + VLLM_ALL2ALL_BACKEND: Literal["naive", "pplx", "deepep_high_throughput", + "deepep_low_latency"] = "naive" VLLM_MAX_TOKENS_PER_EXPERT_FP4_MOE: int = 163840 VLLM_TOOL_PARSE_REGEX_TIMEOUT_SECONDS: int = 1 VLLM_SLEEP_WHEN_IDLE: bool = False VLLM_MQ_MAX_CHUNK_BYTES_MB: int = 16 VLLM_EXECUTE_MODEL_TIMEOUT_SECONDS: int = 300 - VLLM_KV_CACHE_LAYOUT: Optional[str] = None + VLLM_KV_CACHE_LAYOUT: Optional[Literal["NHD", "HND"]] = None VLLM_COMPUTE_NANS_IN_LOGITS: bool = False VLLM_USE_NVFP4_CT_EMULATIONS: bool = False - VLLM_ROCM_QUICK_REDUCE_QUANTIZATION: str = "NONE" + VLLM_ROCM_QUICK_REDUCE_QUANTIZATION: Literal["FP", "INT8", "INT6", "INT4", + "NONE"] = "NONE" VLLM_ROCM_QUICK_REDUCE_CAST_BF16_TO_FP16: bool = True VLLM_ROCM_QUICK_REDUCE_MAX_SIZE_BYTES_MB: Optional[int] = None VLLM_NIXL_ABORT_REQUEST_TIMEOUT: int = 120 @@ -207,6 +212,48 @@ def maybe_convert_bool(value: Optional[str]) -> Optional[bool]: return bool(int(value)) +def env_with_choices( + env_name: str, + default: Optional[str], + choices: Union[list[str], Callable[[], list[str]]], + case_sensitive: bool = True) -> Callable[[], Optional[str]]: + """ + Create a lambda that validates environment variable against allowed choices + + Args: + env_name: Name of the environment variable + default: Default value if not set (can be None) + choices: List of valid string options or callable that returns list + case_sensitive: Whether validation should be case sensitive + + Returns: + Lambda function for environment_variables dict + """ + + def _get_validated_env() -> Optional[str]: + value = os.getenv(env_name) + if value is None: + return default + + # Resolve choices if it's a callable (for lazy loading) + actual_choices = choices() if callable(choices) else choices + + if not case_sensitive: + check_value = value.lower() + check_choices = [choice.lower() for choice in actual_choices] + else: + check_value = value + check_choices = actual_choices + + if check_value not in check_choices: + raise ValueError(f"Invalid value '{value}' for {env_name}. " + f"Valid options: {actual_choices}.") + + return value + + return _get_validated_env + + def get_vllm_port() -> Optional[int]: """Get the port from VLLM_PORT environment variable. @@ -287,7 +334,8 @@ def get_vllm_port() -> Optional[int]: # If not set, defaults to "Debug" or "RelWithDebInfo" # Available options: "Debug", "Release", "RelWithDebInfo" "CMAKE_BUILD_TYPE": - lambda: os.getenv("CMAKE_BUILD_TYPE"), + env_with_choices("CMAKE_BUILD_TYPE", None, + ["Debug", "Release", "RelWithDebInfo"]), # If set, vllm will print verbose logs during installation "VERBOSE": @@ -476,7 +524,7 @@ def get_vllm_port() -> Optional[int]: lambda: int(os.getenv("VLLM_TRACE_FUNCTION", "0")), # Backend for attention computation - # Available options: + # Example options: # - "TORCH_SDPA": use torch.nn.MultiheadAttention # - "FLASH_ATTN": use FlashAttention # - "XFORMERS": use XFormers @@ -486,8 +534,11 @@ def get_vllm_port() -> Optional[int]: # - "FLASH_ATTN_MLA": use FlashAttention for MLA # - "FLASHINFER_MLA": use FlashInfer for MLA # - "CUTLASS_MLA": use CUTLASS for MLA + # All possible options loaded dynamically from _Backend enum "VLLM_ATTENTION_BACKEND": - lambda: os.getenv("VLLM_ATTENTION_BACKEND", None), + env_with_choices("VLLM_ATTENTION_BACKEND", None, + lambda: list(__import__('vllm.platforms.interface', \ + fromlist=['_Backend'])._Backend.__members__.keys())), # If set, vllm will use flashinfer sampler "VLLM_USE_FLASHINFER_SAMPLER": @@ -550,7 +601,8 @@ def get_vllm_port() -> Optional[int]: # - "shm": use shared memory and gRPC for communication # This flag is ignored if VLLM_USE_RAY_COMPILED_DAG is not set. "VLLM_USE_RAY_COMPILED_DAG_CHANNEL_TYPE": - lambda: os.getenv("VLLM_USE_RAY_COMPILED_DAG_CHANNEL_TYPE", "auto"), + env_with_choices("VLLM_USE_RAY_COMPILED_DAG_CHANNEL_TYPE", "auto", + ["auto", "nccl", "shm"]), # If the env var is set, it enables GPU communication overlap # (experimental feature) in Ray's Compiled Graph. This flag is ignored if @@ -569,7 +621,8 @@ def get_vllm_port() -> Optional[int]: # Use dedicated multiprocess context for workers. # Both spawn and fork work "VLLM_WORKER_MULTIPROC_METHOD": - lambda: os.getenv("VLLM_WORKER_MULTIPROC_METHOD", "fork"), + env_with_choices("VLLM_WORKER_MULTIPROC_METHOD", "fork", + ["spawn", "fork"]), # Path to the cache for storing downloaded assets "VLLM_ASSETS_CACHE": @@ -833,7 +886,8 @@ def get_vllm_port() -> Optional[int]: # Choice of quantization level: FP, INT8, INT6, INT4 or NONE # Recommended for large models to get allreduce "VLLM_ROCM_QUICK_REDUCE_QUANTIZATION": - lambda: os.getenv("VLLM_ROCM_QUICK_REDUCE_QUANTIZATION", "NONE").upper(), + env_with_choices("VLLM_ROCM_QUICK_REDUCE_QUANTIZATION", "NONE", + ["FP", "INT8", "INT6", "INT4", "NONE"]), # Custom quick allreduce kernel for MI3* cards # Due to the lack of the bfloat16 asm instruction, bfloat16 @@ -1075,21 +1129,20 @@ def get_vllm_port() -> Optional[int]: # - "deepep_high_throughput", use deepep high-throughput kernels # - "deepep_low_latency", use deepep low-latency kernels "VLLM_ALL2ALL_BACKEND": - lambda: os.getenv("VLLM_ALL2ALL_BACKEND", "naive"), + env_with_choices("VLLM_ALL2ALL_BACKEND", "naive", + ["naive", "pplx", + "deepep_high_throughput", "deepep_low_latency"]), - # Flashinfer MoE backend for vLLM's fused Mixture-of-Experts support. Both - # require compute capability 10.0 or above. + # Flashinfer MoE backend for vLLM's fused Mixture-of-Experts support. + # Both require compute capability 10.0 or above. # Available options: # - "throughput": [default] # Uses CUTLASS kernels optimized for high-throughput batch inference. # - "latency": # Uses TensorRT-LLM kernels optimized for low-latency inference. - # To set this backend, define the environment variable: - # export VLLM_FLASHINFER_MOE_BACKEND=latency. - # If not set, defaults to "throughput". - "VLLM_FLASHINFER_MOE_BACKEND": lambda: os.getenv( - "VLLM_FLASHINFER_MOE_BACKEND", "throughput" - ), + "VLLM_FLASHINFER_MOE_BACKEND": + env_with_choices("VLLM_FLASHINFER_MOE_BACKEND", "throughput", + ["throughput", "latency"]), # Control the maximum number of tokens per expert supported by the # NVFP4 MoE CUTLASS Kernel. This value is used to create a buffer for @@ -1145,7 +1198,7 @@ def get_vllm_port() -> Optional[int]: # leave the layout choice to the backend. Mind that backends may only # implement and support a subset of all possible layouts. "VLLM_KV_CACHE_LAYOUT": - lambda: os.getenv("VLLM_KV_CACHE_LAYOUT", None), + env_with_choices("VLLM_KV_CACHE_LAYOUT", None, ["NHD", "HND"]), # Enable checking whether the generated logits contain NaNs, # indicating corrupted output. Useful for debugging low level bugs From 5672ba90bd18129946437266a12c5e619baca488 Mon Sep 17 00:00:00 2001 From: yyzxw <34639446+yyzxw@users.noreply.github.com> Date: Wed, 17 Sep 2025 11:53:23 +0800 Subject: [PATCH 022/518] [Docs] fix invalid doc link (#25017) Signed-off-by: zxw <1020938856@qq.com> --- docs/contributing/model/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/contributing/model/README.md b/docs/contributing/model/README.md index 6c013738ac1e..36068bc14876 100644 --- a/docs/contributing/model/README.md +++ b/docs/contributing/model/README.md @@ -3,7 +3,7 @@ !!! important Many decoder language models can now be automatically loaded using the [Transformers backend][transformers-backend] without having to implement them in vLLM. See if `vllm serve ` works first! -vLLM models are specialized [PyTorch](https://pytorch.org/) models that take advantage of various [features](../../features/compatibility_matrix.md) to optimize their performance. +vLLM models are specialized [PyTorch](https://pytorch.org/) models that take advantage of various [features](../../features/README.md#compatibility-matrix) to optimize their performance. The complexity of integrating a model into vLLM depends heavily on the model's architecture. The process is considerably straightforward if the model shares a similar architecture with an existing model in vLLM. From 67532a1a6855e8262b3e1c9512c85e2fc934b3c0 Mon Sep 17 00:00:00 2001 From: Michael Goin Date: Tue, 16 Sep 2025 23:57:51 -0400 Subject: [PATCH 023/518] [UX] Remove "quantization is not fully optimized yet" log (#25012) Signed-off-by: mgoin --- vllm/config/__init__.py | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/vllm/config/__init__.py b/vllm/config/__init__.py index 535802585d18..5f3057609971 100644 --- a/vllm/config/__init__.py +++ b/vllm/config/__init__.py @@ -1086,22 +1086,6 @@ def _parse_quant_hf_config(self, hf_config: PretrainedConfig): def _verify_quantization(self) -> None: supported_quantization = me_quant.QUANTIZATION_METHODS - optimized_quantization_methods = [ - "fp8", - "modelopt", - "gptq_marlin_24", - "gptq_marlin", - "awq_marlin", - "fbgemm_fp8", - "compressed-tensors", - "experts_int8", - "quark", - "modelopt_fp4", - "bitblas", - "gptq_bitblas", - "inc", - "petit_nvfp4", - ] if self.quantization is not None: self.quantization = cast(me_quant.QuantizationMethods, self.quantization) @@ -1183,11 +1167,6 @@ def _verify_quantization(self) -> None: f"be one of {supported_quantization}.") from vllm.platforms import current_platform current_platform.verify_quantization(self.quantization) - if self.quantization not in optimized_quantization_methods: - logger.warning( - "%s quantization is not fully " - "optimized yet. The speed can be slower than " - "non-quantized models.", self.quantization) def _verify_cuda_graph(self) -> None: # The `max_seq_len_to_capture` was incorrectly From ea3de5ef0d8cbf0e61ee27647954e5a867fae020 Mon Sep 17 00:00:00 2001 From: Prashant Gupta Date: Tue, 16 Sep 2025 20:58:38 -0700 Subject: [PATCH 024/518] [misc] fix typo in value error (#24995) Signed-off-by: Prashant Gupta --- vllm/entrypoints/renderer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/entrypoints/renderer.py b/vllm/entrypoints/renderer.py index f0798afbcf21..fb859d57be9f 100644 --- a/vllm/entrypoints/renderer.py +++ b/vllm/entrypoints/renderer.py @@ -383,7 +383,7 @@ def _create_tokens_prompt( """Create validated EngineTokensPrompt.""" if max_length is not None and len(token_ids) > max_length: raise ValueError( - f"This maximum context length is {max_length} tokens. " + f"This model's maximum context length is {max_length} tokens. " f"However, your request has {len(token_ids)} input tokens. " "Please reduce the length of the input messages.") From 58d4c705a88adc3187591b4b2e651eae3b190061 Mon Sep 17 00:00:00 2001 From: Russell Bryant Date: Tue, 16 Sep 2025 23:59:07 -0400 Subject: [PATCH 025/518] [Core] Get num_encoder_tokens from scheduler config (#24989) Signed-off-by: Russell Bryant --- vllm/v1/core/sched/scheduler.py | 5 ++--- vllm/v1/kv_cache_interface.py | 5 ++--- vllm/v1/worker/gpu_model_runner.py | 4 ++-- 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/vllm/v1/core/sched/scheduler.py b/vllm/v1/core/sched/scheduler.py index c1e59423e9a1..85ca858ad7bd 100644 --- a/vllm/v1/core/sched/scheduler.py +++ b/vllm/v1/core/sched/scheduler.py @@ -465,9 +465,8 @@ def schedule(self) -> SchedulerOutput: in self.vllm_config.model_config.model.lower()), ( "Whisper is the only supported " "encoder-decoder model.") - num_encoder_tokens = MULTIMODAL_REGISTRY.\ - get_encdec_max_encoder_len( - self.vllm_config.model_config) + num_encoder_tokens =\ + self.scheduler_config.max_num_encoder_input_tokens else: num_encoder_tokens = 0 diff --git a/vllm/v1/kv_cache_interface.py b/vllm/v1/kv_cache_interface.py index 6e8f569fff0e..0cf92a680a68 100644 --- a/vllm/v1/kv_cache_interface.py +++ b/vllm/v1/kv_cache_interface.py @@ -11,7 +11,6 @@ from vllm.config import VllmConfig from vllm.logger import init_logger -from vllm.multimodal import MULTIMODAL_REGISTRY from vllm.utils import cdiv, get_dtype_size logger = init_logger(__name__) @@ -230,8 +229,8 @@ class CrossAttentionSpec(AttentionSpec): def max_memory_usage_bytes(self, vllm_config: VllmConfig) -> int: # For cross-attention, we need to cache encoder states # Get encoder length (e.g., 1500 for Whisper). - max_encoder_len = MULTIMODAL_REGISTRY.\ - get_encdec_max_encoder_len(vllm_config.model_config) + max_encoder_len = vllm_config.scheduler_config.\ + max_num_encoder_input_tokens return cdiv(max_encoder_len, self.block_size) * self.page_size_bytes diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index e23115e177e6..f256dc160a6b 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -234,8 +234,8 @@ def __init__( if self.model_config.is_encoder_decoder: # Maximum length of the encoder input, only for encoder-decoder # models. - self.max_encoder_len = self.mm_registry.\ - get_encdec_max_encoder_len(model_config) + self.max_encoder_len = scheduler_config.\ + max_num_encoder_input_tokens else: self.max_encoder_len = 0 From 5801e4977679895d89493e84088e7936f528285f Mon Sep 17 00:00:00 2001 From: Woosuk Kwon Date: Tue, 16 Sep 2025 21:29:27 -0700 Subject: [PATCH 026/518] [V0 Deprecation] Remove MQLLMEngine (#25019) Signed-off-by: Woosuk Kwon Signed-off-by: Woosuk Kwon --- .buildkite/test-pipeline.yaml | 2 - .../entrypoints/openai/test_lora_resolvers.py | 4 +- tests/entrypoints/openai/test_serving_chat.py | 14 +- tests/mq_llm_engine/__init__.py | 0 tests/mq_llm_engine/conftest.py | 12 - tests/mq_llm_engine/test_abort.py | 69 -- tests/mq_llm_engine/test_error_handling.py | 376 ---------- tests/mq_llm_engine/test_load.py | 59 -- tests/mq_llm_engine/utils.py | 81 --- vllm/engine/multiprocessing/__init__.py | 145 ---- vllm/engine/multiprocessing/client.py | 643 ------------------ vllm/engine/multiprocessing/engine.py | 470 ------------- vllm/entrypoints/launcher.py | 2 - vllm/entrypoints/openai/api_server.py | 102 +-- vllm/platforms/rocm.py | 2 +- 15 files changed, 12 insertions(+), 1969 deletions(-) delete mode 100644 tests/mq_llm_engine/__init__.py delete mode 100644 tests/mq_llm_engine/conftest.py delete mode 100644 tests/mq_llm_engine/test_abort.py delete mode 100644 tests/mq_llm_engine/test_error_handling.py delete mode 100644 tests/mq_llm_engine/test_load.py delete mode 100644 tests/mq_llm_engine/utils.py delete mode 100644 vllm/engine/multiprocessing/__init__.py delete mode 100644 vllm/engine/multiprocessing/client.py delete mode 100644 vllm/engine/multiprocessing/engine.py diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index 6f06099edd53..b5ea4407ef5b 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -46,7 +46,6 @@ steps: mirror_hardwares: [amdexperimental] source_file_dependencies: - vllm/ - - tests/mq_llm_engine - tests/async_engine - tests/test_inputs.py - tests/test_outputs.py @@ -57,7 +56,6 @@ steps: - tests/transformers_utils commands: - python3 standalone_tests/lazy_imports.py - - pytest -v -s mq_llm_engine # MQLLMEngine - pytest -v -s async_engine # AsyncLLMEngine - pytest -v -s test_inputs.py - pytest -v -s test_outputs.py diff --git a/tests/entrypoints/openai/test_lora_resolvers.py b/tests/entrypoints/openai/test_lora_resolvers.py index 2bf29ecf087f..e2c83b9c4004 100644 --- a/tests/entrypoints/openai/test_lora_resolvers.py +++ b/tests/entrypoints/openai/test_lora_resolvers.py @@ -10,7 +10,6 @@ import pytest from vllm.config.multimodal import MultiModalConfig -from vllm.engine.multiprocessing.client import MQLLMEngineClient from vllm.entrypoints.openai.protocol import CompletionRequest, ErrorResponse from vllm.entrypoints.openai.serving_completion import OpenAIServingCompletion from vllm.entrypoints.openai.serving_models import (BaseModelPath, @@ -18,6 +17,7 @@ from vllm.lora.request import LoRARequest from vllm.lora.resolver import LoRAResolver, LoRAResolverRegistry from vllm.transformers_utils.tokenizer import get_tokenizer +from vllm.v1.engine.async_llm import AsyncLLM MODEL_NAME = "openai-community/gpt2" BASE_MODEL_PATHS = [BaseModelPath(name=MODEL_NAME, model_path=MODEL_NAME)] @@ -82,7 +82,7 @@ def register_mock_resolver(): @pytest.fixture def mock_serving_setup(): """Provides a mocked engine and serving completion instance.""" - mock_engine = MagicMock(spec=MQLLMEngineClient) + mock_engine = MagicMock(spec=AsyncLLM) mock_engine.get_tokenizer.return_value = get_tokenizer(MODEL_NAME) mock_engine.errored = False diff --git a/tests/entrypoints/openai/test_serving_chat.py b/tests/entrypoints/openai/test_serving_chat.py index 502704c9bbdf..de26fce854f5 100644 --- a/tests/entrypoints/openai/test_serving_chat.py +++ b/tests/entrypoints/openai/test_serving_chat.py @@ -13,12 +13,12 @@ import pytest_asyncio from vllm.config.multimodal import MultiModalConfig -from vllm.engine.multiprocessing.client import MQLLMEngineClient from vllm.entrypoints.openai.protocol import ChatCompletionRequest from vllm.entrypoints.openai.serving_chat import OpenAIServingChat from vllm.entrypoints.openai.serving_models import (BaseModelPath, OpenAIServingModels) from vllm.transformers_utils.tokenizer import get_tokenizer +from vllm.v1.engine.async_llm import AsyncLLM from ...utils import RemoteOpenAIServer @@ -276,7 +276,7 @@ def test_async_serving_chat_init(): @pytest.mark.asyncio async def test_serving_chat_returns_correct_model_name(): - mock_engine = MagicMock(spec=MQLLMEngineClient) + mock_engine = MagicMock(spec=AsyncLLM) mock_engine.get_tokenizer.return_value = get_tokenizer(MODEL_NAME) mock_engine.errored = False @@ -312,7 +312,7 @@ async def return_model_name(*args): @pytest.mark.asyncio async def test_serving_chat_should_set_correct_max_tokens(): - mock_engine = MagicMock(spec=MQLLMEngineClient) + mock_engine = MagicMock(spec=AsyncLLM) mock_engine.get_tokenizer.return_value = get_tokenizer(MODEL_NAME) mock_engine.errored = False @@ -355,7 +355,7 @@ async def test_serving_chat_should_set_correct_max_tokens(): } # Reinitialize the engine with new settings - mock_engine = MagicMock(spec=MQLLMEngineClient) + mock_engine = MagicMock(spec=AsyncLLM) mock_engine.get_tokenizer.return_value = get_tokenizer(MODEL_NAME) mock_engine.errored = False @@ -410,7 +410,7 @@ async def test_serving_chat_should_set_correct_max_tokens(): } # Reinitialize the engine with new settings - mock_engine = MagicMock(spec=MQLLMEngineClient) + mock_engine = MagicMock(spec=AsyncLLM) mock_engine.get_tokenizer.return_value = get_tokenizer(MODEL_NAME) mock_engine.errored = False @@ -467,7 +467,7 @@ async def test_serving_chat_could_load_correct_generation_config(): "repetition_penalty": 1.05 } - mock_engine = MagicMock(spec=MQLLMEngineClient) + mock_engine = MagicMock(spec=AsyncLLM) mock_engine.get_tokenizer.return_value = get_tokenizer(MODEL_NAME) mock_engine.errored = False @@ -523,7 +523,7 @@ async def test_serving_chat_did_set_correct_cache_salt(model_type): mock_model_config = MockModelConfig() mock_model_config.hf_config.model_type = model_type - mock_engine = MagicMock(spec=MQLLMEngineClient) + mock_engine = MagicMock(spec=AsyncLLM) mock_engine.get_tokenizer.return_value = get_tokenizer(MODEL_NAME) mock_engine.errored = False diff --git a/tests/mq_llm_engine/__init__.py b/tests/mq_llm_engine/__init__.py deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/tests/mq_llm_engine/conftest.py b/tests/mq_llm_engine/conftest.py deleted file mode 100644 index 375b248ebeda..000000000000 --- a/tests/mq_llm_engine/conftest.py +++ /dev/null @@ -1,12 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project -import pytest - - -@pytest.fixture(scope="function", autouse=True) -def use_v0_only(monkeypatch): - """ - Since this module is V0 only, set VLLM_USE_V1=0 for - all tests in the module. - """ - monkeypatch.setenv('VLLM_USE_V1', '0') diff --git a/tests/mq_llm_engine/test_abort.py b/tests/mq_llm_engine/test_abort.py deleted file mode 100644 index 5ff08cbb3248..000000000000 --- a/tests/mq_llm_engine/test_abort.py +++ /dev/null @@ -1,69 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project -"""Test that aborting is handled properly.""" - -import asyncio -import tempfile -import uuid - -import pytest - -from tests.mq_llm_engine.utils import RemoteMQLLMEngine, generate -from vllm.engine.arg_utils import AsyncEngineArgs - -MODEL = "google/gemma-1.1-2b-it" -ENGINE_ARGS = AsyncEngineArgs(model=MODEL) -RAISED_ERROR = KeyError -RAISED_VALUE = "foo" -EXPECTED_TOKENS = 250 - - -@pytest.fixture(scope="function") -def tmp_socket(): - with tempfile.TemporaryDirectory() as td: - yield f"ipc://{td}/{uuid.uuid4()}" - - -@pytest.mark.asyncio -async def test_abort(tmp_socket): - with RemoteMQLLMEngine(engine_args=ENGINE_ARGS, - ipc_path=tmp_socket) as engine: - - client = await engine.make_client() - - request_id_to_be_aborted = "request-aborted" - request_ids_a = [f"request-a-{idx}" for idx in range(10)] - request_ids_b = [f"request-b-{idx}" for idx in range(10)] - - # Requests started before one to be aborted. - tasks = [] - for request_id in request_ids_a: - tasks.append( - asyncio.create_task( - generate(client, request_id, EXPECTED_TOKENS))) - - # Aborted. - task_aborted = asyncio.create_task( - generate(client, request_id_to_be_aborted, EXPECTED_TOKENS)) - - # Requests started after one to be aborted. - for request_id in request_ids_b: - tasks.append( - asyncio.create_task( - generate(client, request_id, EXPECTED_TOKENS))) - - # Actually abort. - await asyncio.sleep(0.5) - await client.abort(request_id_to_be_aborted) - - # Confirm that we got all the EXPECTED tokens from the requests. - for task in tasks: - count, request_id = await task - assert count == EXPECTED_TOKENS, ( - f"{request_id} generated only {count} tokens") - - # Cancel task (this will hang indefinitely if not). - task_aborted.cancel() - - # Shutdown. - client.close() diff --git a/tests/mq_llm_engine/test_error_handling.py b/tests/mq_llm_engine/test_error_handling.py deleted file mode 100644 index 77e3732cd06c..000000000000 --- a/tests/mq_llm_engine/test_error_handling.py +++ /dev/null @@ -1,376 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project -"""Test that various errors are handled properly.""" - -import asyncio -import tempfile -import time -import uuid -from unittest.mock import Mock - -import pytest - -from tests.mq_llm_engine.utils import RemoteMQLLMEngine -from vllm import SamplingParams -from vllm.engine.arg_utils import AsyncEngineArgs -from vllm.engine.llm_engine import LLMEngine -from vllm.engine.multiprocessing import MQEngineDeadError -from vllm.engine.multiprocessing.engine import MQLLMEngine -from vllm.entrypoints.openai.api_server import build_async_engine_client -from vllm.entrypoints.openai.cli_args import make_arg_parser -from vllm.lora.request import LoRARequest -from vllm.sequence import SequenceGroupMetadata -from vllm.usage.usage_lib import UsageContext -from vllm.utils import FlexibleArgumentParser - -MODEL = "google/gemma-1.1-2b-it" -ENGINE_ARGS = AsyncEngineArgs(model=MODEL, enforce_eager=True) -RAISED_ERROR = KeyError -RAISED_VALUE = "foo" - - -@pytest.fixture(scope="function") -def tmp_socket(): - with tempfile.TemporaryDirectory() as td: - yield f"ipc://{td}/{uuid.uuid4()}" - - -def run_with_evil_forward(engine_args: AsyncEngineArgs, ipc_path: str): - # Make engine. - engine = MQLLMEngine.from_engine_args( - engine_args=engine_args, - usage_context=UsageContext.UNKNOWN_CONTEXT, - ipc_path=ipc_path) - - # Raise error during first forward pass. - engine.engine.model_executor.execute_model = Mock( - side_effect=RAISED_ERROR(RAISED_VALUE)) - - # Run engine. - engine.start() - - -@pytest.mark.asyncio -async def test_evil_forward(tmp_socket): - with RemoteMQLLMEngine(engine_args=ENGINE_ARGS, - ipc_path=tmp_socket, - run_fn=run_with_evil_forward) as engine: - - client = await engine.make_client() - - # Server should be healthy after initial probe. - await asyncio.sleep(2.0) - await client.check_health() - - # Throws an error that should get ENGINE_DEAD_ERROR. - with pytest.raises(MQEngineDeadError): - async for _ in client.generate(prompt="Hello my name is", - sampling_params=SamplingParams(), - request_id=str(uuid.uuid4())): - pass - assert client.errored - - await asyncio.sleep(1.0) - with pytest.raises(RAISED_ERROR): - await client.check_health() - assert client.errored - - # Shutdown. - client.close() - - -def run_with_evil_model_executor_health(engine_args: AsyncEngineArgs, - ipc_path: str): - # Make engine. - engine = MQLLMEngine.from_engine_args( - engine_args=engine_args, - usage_context=UsageContext.UNKNOWN_CONTEXT, - ipc_path=ipc_path) - - # Raise error during first forward pass. - engine.engine.model_executor.check_health = Mock(side_effect=RAISED_ERROR) - - # Run engine. - engine.start() - - -@pytest.mark.asyncio -async def test_failed_health_check(tmp_socket): - with RemoteMQLLMEngine( - engine_args=ENGINE_ARGS, - ipc_path=tmp_socket, - run_fn=run_with_evil_model_executor_health) as engine: - - client = await engine.make_client() - assert client.is_running - - # Health probe should throw RAISED_ERROR. - await asyncio.sleep(15.) - - with pytest.raises(RAISED_ERROR): - await client.check_health() - assert client.errored - - # Generate call should throw ENGINE_DEAD_ERROR - with pytest.raises(MQEngineDeadError): - async for _ in client.generate(prompt="Hello my name is", - sampling_params=SamplingParams(), - request_id=str(uuid.uuid4())): - pass - - client.close() - - -def run_with_evil_abort(engine_args: AsyncEngineArgs, ipc_path: str): - # Make engine. - engine = MQLLMEngine.from_engine_args( - engine_args=engine_args, - usage_context=UsageContext.UNKNOWN_CONTEXT, - ipc_path=ipc_path) - - # Raise error during abort call. - engine.engine.abort_request = Mock(side_effect=RAISED_ERROR) - - # Run engine. - engine.start() - - -@pytest.mark.asyncio -async def test_failed_abort(tmp_socket): - with RemoteMQLLMEngine(engine_args=ENGINE_ARGS, - ipc_path=tmp_socket, - run_fn=run_with_evil_abort) as engine: - - client = await engine.make_client() - assert client.is_running - - # First check health should work. - await client.check_health() - - # Trigger an abort on the client side. - # This request ID does not exist, and will cause the engine to error - await client.abort(request_id="foo") - - # Future generation requests will now fail - # with reference to the original KeyError("foo") - with pytest.raises(MQEngineDeadError) as execinfo: - async for _ in client.generate( - prompt="Hello my name is", - sampling_params=SamplingParams(max_tokens=10), - request_id=str(uuid.uuid4())): - pass - assert "KeyError" in repr(execinfo.value) - assert client.errored - - # This should raise the original error. - with pytest.raises(RAISED_ERROR): - await client.check_health() - - client.close() - - -@pytest.mark.asyncio -async def test_batch_error(tmp_socket): - with RemoteMQLLMEngine(engine_args=ENGINE_ARGS, - ipc_path=tmp_socket, - run_fn=run_with_evil_abort) as engine: - - client = await engine.make_client() - assert client.is_running - - # First check health should work. - await client.check_health() - - # Batch of requests - async def do_generate(client): - # min_tokens=2048 to keep busy the engine busy - # to get enough time to get process a request - # that will crash the engine - params = SamplingParams(min_tokens=2048, max_tokens=2048) - async for _ in client.generate(prompt="Hello my name is", - sampling_params=params, - request_id=str(uuid.uuid4())): - pass - - tasks = [asyncio.create_task(do_generate(client)) for _ in range(10)] - - # This request will force a processing batch to raise - # an exception and next the engine get errored - await client.abort(request_id="foo") - - # The batch of those request failed, then they - # should get the same exception as a MQEngineDeadError. - errors = await asyncio.gather(*tasks, return_exceptions=True) - for e in errors: - assert isinstance(e, MQEngineDeadError) - assert "KeyError" in repr(e) - - client.close() - - -@pytest.mark.asyncio -async def test_bad_request(tmp_socket): - with RemoteMQLLMEngine(engine_args=ENGINE_ARGS, - ipc_path=tmp_socket) as engine: - - client = await engine.make_client() - - # Invalid request should fail, but not crash the server. - with pytest.raises(ValueError): - async for _ in client.generate(prompt="Hello my name is", - sampling_params=SamplingParams(), - request_id="abcd-1", - lora_request=LoRARequest( - "invalid-lora", 1, - "invalid-path")): - pass - - # This request should be okay. - async for _ in client.generate(prompt="Hello my name is", - sampling_params=SamplingParams(), - request_id="abcd-2"): - pass - - # Shutdown. - client.close() - - -@pytest.mark.asyncio -async def test_mp_crash_detection(monkeypatch: pytest.MonkeyPatch): - with monkeypatch.context() as m: - - parser = FlexibleArgumentParser( - description="vLLM's remote OpenAI server.") - parser = make_arg_parser(parser) - args = parser.parse_args([]) - - # When LLMEngine is loaded, it will crash. - def mock_init(): - raise ValueError - - m.setattr(LLMEngine, "__init__", mock_init) - - start = time.perf_counter() - async with build_async_engine_client(args): - pass - end = time.perf_counter() - - assert end - start < 100, ( - "Expected vLLM to gracefully shutdown in <100s " - "if there is an error in the startup.") - - -@pytest.mark.asyncio -async def test_mp_cuda_init(): - # it should not crash, when cuda is initialized - # in the API server process - import torch - torch.cuda.init() - parser = FlexibleArgumentParser(description="vLLM's remote OpenAI server.") - parser = make_arg_parser(parser) - args = parser.parse_args([]) - - async with build_async_engine_client(args): - pass - - -@pytest.mark.asyncio -async def test_engine_process_death(tmp_socket): - with RemoteMQLLMEngine(engine_args=ENGINE_ARGS, - ipc_path=tmp_socket) as engine: - - client = await engine.make_client() - assert client.is_running - - # kill the engine process - engine.proc.kill() - - # Generate call should fail - with pytest.raises(MQEngineDeadError): - async for _ in client.generate(prompt="Hello my name is", - sampling_params=SamplingParams(), - request_id=str(uuid.uuid4())): - pass - - # And the health check should show the engine is dead - with pytest.raises(RuntimeError, match="Engine process .* died"): - await client.check_health() - - client.close() - - -def run_with_evil_input_processing(engine_args: AsyncEngineArgs, - ipc_path: str): - """Simulate an exception while preparing inputs for the model. - In the wild, this could be something like a multimodal input processor - failing on invalid image data.""" - - # Make engine. - engine = MQLLMEngine.from_engine_args( - engine_args=engine_args, - usage_context=UsageContext.UNKNOWN_CONTEXT, - ipc_path=ipc_path) - - runner = engine.engine.model_executor.driver_worker.worker.model_runner - - # Raise error in the model runner when adding a sequence group. - # See class ModelInputForGPUBuilder - def raiser(_, seq_group_metadata: SequenceGroupMetadata): - if seq_group_metadata.request_id.startswith("evil"): - raise RAISED_ERROR(RAISED_VALUE) - - runner.builder.per_seq_group_compute_fns.append(raiser) - - # Run engine. - engine.start() - - -@pytest.mark.asyncio -async def test_failed_inputs(tmp_socket): - with RemoteMQLLMEngine(engine_args=ENGINE_ARGS, - ipc_path=tmp_socket, - run_fn=run_with_evil_input_processing) as engine: - - client = await engine.make_client() - assert client.is_running - - # Engine should be healthy - await client.check_health() - - async def run_failing_request(): - async for _ in client.generate( - prompt="Hello my name is", - sampling_params=SamplingParams(max_tokens=10), - request_id="evil" + str(uuid.uuid4())): - pass - - async def run_passing_request(): - async for _ in client.generate( - prompt="Hello my name is", - sampling_params=SamplingParams(max_tokens=10), - request_id=str(uuid.uuid4())): - pass - - passing_tasks = [ - asyncio.create_task(run_passing_request()) for _ in range(10) - ] - failing_tasks = [ - asyncio.create_task(run_failing_request()) for _ in range(10) - ] - await asyncio.gather(*failing_tasks, return_exceptions=True) - await asyncio.gather(*passing_tasks) - - # All the bad inputs should have raised - for task in failing_tasks: - with pytest.raises(RAISED_ERROR): - task.result() - - # But all good inputs should have still succeeded - for task in passing_tasks: - task.result() - - # And the engine should remain healthy - assert not client.errored - await client.check_health() - - client.close() diff --git a/tests/mq_llm_engine/test_load.py b/tests/mq_llm_engine/test_load.py deleted file mode 100644 index c934706611ae..000000000000 --- a/tests/mq_llm_engine/test_load.py +++ /dev/null @@ -1,59 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project -"""Test that the MQLLMEngine is able to handle 10k concurrent requests.""" - -import asyncio -import tempfile -import uuid - -import pytest - -from tests.mq_llm_engine.utils import RemoteMQLLMEngine, generate -from vllm.engine.arg_utils import AsyncEngineArgs - -MODEL = "google/gemma-1.1-2b-it" -NUM_EXPECTED_TOKENS = 10 -NUM_REQUESTS = 10000 - -# Scenarios to test for num generated token. -ENGINE_ARGS = AsyncEngineArgs(model=MODEL) - - -@pytest.fixture(scope="function") -def tmp_socket(): - with tempfile.TemporaryDirectory() as td: - yield f"ipc://{td}/{uuid.uuid4()}" - - -@pytest.mark.asyncio -async def test_load(tmp_socket): - with RemoteMQLLMEngine(engine_args=ENGINE_ARGS, - ipc_path=tmp_socket) as engine: - - client = await engine.make_client() - - request_ids = [f"request-{i}" for i in range(NUM_REQUESTS)] - - # Create concurrent requests. - tasks = [] - for request_id in request_ids: - tasks.append( - asyncio.create_task( - generate(client, request_id, NUM_EXPECTED_TOKENS))) - - # Confirm that we got all the EXPECTED tokens from the requests. - failed_request_id = None - tokens = None - for task in tasks: - num_generated_tokens, request_id = await task - if (num_generated_tokens != NUM_EXPECTED_TOKENS - and failed_request_id is None): - failed_request_id = request_id - tokens = num_generated_tokens - - assert failed_request_id is None, ( - f"{failed_request_id} generated {tokens} but " - f"expected {NUM_EXPECTED_TOKENS}") - - # Shutdown. - client.close() diff --git a/tests/mq_llm_engine/utils.py b/tests/mq_llm_engine/utils.py deleted file mode 100644 index 7976d5031aea..000000000000 --- a/tests/mq_llm_engine/utils.py +++ /dev/null @@ -1,81 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project - -import asyncio -import multiprocessing -from typing import Callable, Union - -from vllm import SamplingParams -from vllm.engine.arg_utils import AsyncEngineArgs -from vllm.engine.multiprocessing.client import MQLLMEngineClient -from vllm.engine.multiprocessing.engine import MQLLMEngine -from vllm.outputs import RequestOutput -from vllm.usage.usage_lib import UsageContext - - -async def generate( - client: MQLLMEngineClient, - request_id: str, - num_tokens: int, - return_output: bool = False) -> Union[RequestOutput, tuple[int, str]]: - - final_output = None - count = 0 - async for out in client.generate( - request_id=request_id, - prompt="Hello my name is Robert and", - sampling_params=SamplingParams(max_tokens=num_tokens, - temperature=0)): - - count += 1 - final_output = out - await asyncio.sleep(0.) - - if return_output: - return final_output - - # Confirm we generated all the tokens we expected. - return count, request_id - - -def run_normal(engine_args: AsyncEngineArgs, ipc_path: str): - # Make engine. - engine = MQLLMEngine.from_engine_args( - engine_args=engine_args, - usage_context=UsageContext.UNKNOWN_CONTEXT, - ipc_path=ipc_path) - - # Run engine. - engine.start() - - -class RemoteMQLLMEngine: - - def __init__(self, - engine_args: AsyncEngineArgs, - ipc_path: str, - run_fn: Callable = run_normal) -> None: - - self.engine_args = engine_args - self.ipc_path = ipc_path - context = multiprocessing.get_context("spawn") - self.proc = context.Process(target=run_fn, - args=(engine_args, ipc_path)) - self.proc.start() - - def __enter__(self): - return self - - def __exit__(self, exc_type, exc_value, traceback): - self.proc.kill() - - async def make_client(self) -> MQLLMEngineClient: - engine_config = self.engine_args.create_engine_config() - client = MQLLMEngineClient(self.ipc_path, engine_config, self.proc.pid) - while True: - try: - await client.setup() - break - except TimeoutError: - assert self.proc.is_alive() - return client diff --git a/vllm/engine/multiprocessing/__init__.py b/vllm/engine/multiprocessing/__init__.py deleted file mode 100644 index 9f64ee0808df..000000000000 --- a/vllm/engine/multiprocessing/__init__.py +++ /dev/null @@ -1,145 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project - -import uuid -from dataclasses import dataclass, field -from enum import Enum -from typing import List, Mapping, Optional, Union - -from vllm import PoolingParams -from vllm.inputs import PromptType -from vllm.lora.request import LoRARequest -from vllm.outputs import RequestOutput -from vllm.sampling_params import SamplingParams -from vllm.utils import Device - -VLLM_RPC_SUCCESS_STR = "SUCCESS" - -IPC_INPUT_EXT = "_input_socket" -IPC_OUTPUT_EXT = "_output_socket" -IPC_HEALTH_EXT = "_health_socket" -IPC_DATA_EXT = "_data_socket" - - -class MQEngineDeadError(RuntimeError): - pass - - -@dataclass -class RPCProcessRequest: - prompt: PromptType - params: Union[SamplingParams, PoolingParams] - request_id: str - lora_request: Optional[LoRARequest] = None - trace_headers: Optional[Mapping[str, str]] = None - priority: int = 0 - - def __init__( - self, - prompt: PromptType, - params: Union[SamplingParams, PoolingParams], - request_id: str, - lora_request: Optional[LoRARequest] = None, - trace_headers: Optional[Mapping[str, str]] = None, - priority: int = 0, - ) -> None: - super().__init__() - - self.prompt = prompt - self.params = params - self.request_id = request_id - self.lora_request = lora_request - self.trace_headers = trace_headers - self.priority = priority - - -@dataclass -class RPCError: - request_id: Optional[str] - is_engine_errored: bool - exception: BaseException - - -@dataclass -class RPCAbortRequest: - request_id: str - - -class RPCStartupRequest(Enum): - IS_SERVER_READY = 1 - - -@dataclass -class RPCStartupResponse: - tracing_enabled: bool - - -class RPCUProfileRequest(Enum): - START_PROFILE = 1 - STOP_PROFILE = 2 - - -class RPCResetMultiModalCacheRequest(Enum): - RESET = 1 - - -@dataclass -class RPCResetPrefixCacheRequest: - device: Device - - -class RPCSleepRequest(Enum): - SLEEP_LEVEL_1 = 1 - SLEEP_LEVEL_2 = 2 - - -@dataclass -class RPCWakeUpRequest: - tags: Optional[list[str]] = None - - -@dataclass -class RPCIsSleepingRequest: - # Set the default value of request_id to a new UUID - request_id: str = field(default_factory=lambda: str(uuid.uuid4())) - - -@dataclass -class RPCIsSleepingResponse: - request_id: str - is_sleeping: bool - - -@dataclass -class RPCLoadAdapterRequest: - lora_request: LoRARequest - # Set the default value of request_id to a new UUID - request_id: str = field(default_factory=lambda: str(uuid.uuid4())) - - -@dataclass -class RPCAdapterLoadedResponse: - request_id: str - lora_loaded: bool - - -RPC_REQUEST_T = Union[RPCProcessRequest, RPCAbortRequest, RPCStartupRequest, - RPCUProfileRequest, RPCLoadAdapterRequest, - RPCResetMultiModalCacheRequest, - RPCResetPrefixCacheRequest, RPCSleepRequest, - RPCWakeUpRequest, RPCIsSleepingRequest] - -REQUEST_OUTPUTS_T = Union[List[RequestOutput], RPCAdapterLoadedResponse, - RPCIsSleepingResponse, RPCError] - - -def ENGINE_DEAD_ERROR( - error: Optional[BaseException] = None) -> MQEngineDeadError: - if error is None: - return MQEngineDeadError( - "Engine loop is not running. Inspect the stacktrace to " - "find the original error") - - return MQEngineDeadError( - "Engine loop is not running. Inspect the stacktrace to " - f"find the original error: {repr(error)}.") diff --git a/vllm/engine/multiprocessing/client.py b/vllm/engine/multiprocessing/client.py deleted file mode 100644 index 7d1f29a9824d..000000000000 --- a/vllm/engine/multiprocessing/client.py +++ /dev/null @@ -1,643 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project - -import asyncio -import copy -import pickle -from contextlib import contextmanager, suppress -from typing import (Any, AsyncGenerator, Dict, Iterable, Iterator, List, - Mapping, Optional, Union) - -import cloudpickle -import psutil -import zmq -import zmq.asyncio -from zmq import Frame # type: ignore[attr-defined] -from zmq.asyncio import Socket - -from vllm import PoolingParams -from vllm.config import DecodingConfig, ModelConfig, VllmConfig -from vllm.core.scheduler import SchedulerOutputs -# yapf conflicts with isort for this block -# yapf: disable -from vllm.engine.multiprocessing import (ENGINE_DEAD_ERROR, IPC_DATA_EXT, - IPC_HEALTH_EXT, IPC_INPUT_EXT, - IPC_OUTPUT_EXT, RPC_REQUEST_T, - VLLM_RPC_SUCCESS_STR, RPCAbortRequest, - RPCAdapterLoadedResponse, RPCError, - RPCIsSleepingRequest, - RPCIsSleepingResponse, - RPCLoadAdapterRequest, - RPCProcessRequest, - RPCResetMultiModalCacheRequest, - RPCResetPrefixCacheRequest, - RPCSleepRequest, RPCStartupRequest, - RPCStartupResponse, - RPCUProfileRequest, RPCWakeUpRequest) -from vllm.engine.protocol import EngineClient -# yapf: enable -from vllm.envs import VLLM_RPC_TIMEOUT -from vllm.inputs import PromptType -from vllm.inputs.preprocess import InputPreprocessor -from vllm.logger import init_logger -from vllm.lora.request import LoRARequest -from vllm.model_executor.layers.sampler import SamplerOutput -from vllm.outputs import PoolingRequestOutput, RequestOutput -from vllm.sampling_params import SamplingParams -from vllm.transformers_utils.tokenizer_group import init_tokenizer_from_configs -from vllm.utils import Device - -logger = init_logger(__name__) - - -class MQClientClosedError(Exception): - """Exception class raised when the client is used post-close. - - The client can be closed, which closes the ZMQ context. This normally - happens on server shutdown. In some cases, methods like abort and - do_log_stats will still be called and then try to open a socket, which - causes a ZMQError and creates a huge stack trace. - So, we throw this error such that we can suppress it. - """ - - -class MQLLMEngineClient(EngineClient): - """A client wrapper for MQLLMEngine that conforms to the - EngineClient protocol. - - MQLLMEngine and MQLLMEngineClient are intended to run in separate - processes communicating via zeromq ipc sockets. - - The entrypoint to MQLLMEngineClient is through the generate() - method. On generate() MQLLMEngine does three things: - - Creates an asyncio output queue - - Sends a RPCGenerateRequest to the MQLLMEngine via zmq - - Pulls RequestOutputs from its queue and yields them - - MQLLMEngine runs two background loops: - - output_loop: the output loop pulls List[RequestOutput] - from the MQLLMEngine via zmq (each list is the output - of one engine_step in the LLMEngine). It then parses - the list and pushes individual request_outputs into - the corresponding output_queue such that they can be - consumed by the .generate() method. - - health_loop: the health loop queries the health socket - every N seconds, confirming the engine is healthy - """ - - def __init__(self, ipc_path: str, engine_config: VllmConfig, - engine_pid: int): - self.context = zmq.asyncio.Context() - self._errored_with: Optional[BaseException] = None - - # Get the configs. - self.vllm_config = engine_config - self.model_config = engine_config.model_config - self.decoding_config = engine_config.decoding_config - - if self.vllm_config.model_config.skip_tokenizer_init: - self.tokenizer = None - - else: - # Create the tokenizer group. - self.tokenizer = init_tokenizer_from_configs( - model_config=self.model_config, - scheduler_config=engine_config.scheduler_config, - lora_config=engine_config.lora_config) - - self.input_preprocessor = InputPreprocessor(self.model_config, - self.tokenizer) - - # Send RPCGenerateRequest to the MQLLMEngine. - self.input_socket: Socket = self.context.socket(zmq.constants.PUSH) - self.input_socket.connect(f"{ipc_path}{IPC_INPUT_EXT}") - - # Receive streams of RequestOutput from the MQLLMEngine. - self.output_socket: Socket = self.context.socket(zmq.constants.PULL) - self.output_socket.connect(f"{ipc_path}{IPC_OUTPUT_EXT}") - - # IPC path for acking heartbeats. - self.heartbeat_socket: Socket = self.context.socket(zmq.constants.PULL) - self.heartbeat_socket.connect(f"{ipc_path}{IPC_HEALTH_EXT}") - - # IPC path for the data socket. - self.data_ipc_path = f"{ipc_path}{IPC_DATA_EXT}" - - # Stream for each individual request. - self.output_queues: Dict[str, asyncio.Queue] = {} - - # Loop to handle output of the LLMEngine periodically. - # Started after the MQLLMEngine is ready so that we can - # build the Client in an executor to enable clean shutdown. - self.output_loop: Optional[asyncio.Task] = None - - # Loop to check health of the LLMEngine periodically. - # Started after the MQLLMEngine is ready. - self.health_loop: Optional[asyncio.Task] = None - self._engine_process = psutil.Process(engine_pid) - - @staticmethod - def is_unsupported_config(vllm_config: VllmConfig): - # Pipeline parallel not yet supported - return vllm_config.parallel_config.pipeline_parallel_size > 1 - - @contextmanager - def get_data_socket(self) -> Iterator[Socket]: - socket = self.context.socket(zmq.constants.DEALER) - try: - socket.connect(self.data_ipc_path) - yield socket - finally: - socket.close(linger=0) - - async def run_heartbeat_loop(self, timeout: int): - """Background loop that continually checks to ensure the engine process - is still alive. - """ - try: - while True: - # Check if the engine process is running: - if not self._engine_process.is_running() or ( - self._engine_process.status() == psutil.STATUS_ZOMBIE): - # NB: is_running() returns True for zombies - self._set_errored( - RuntimeError( - f"Engine process (pid {self._engine_process.pid}) " - "died.")) - break - - if await self.heartbeat_socket.poll(timeout=timeout): - # Heartbeat received- check the message - await self._check_success( - error_message="Heartbeat failed.", - socket=self.heartbeat_socket) - - logger.debug("Heartbeat successful.") - - except asyncio.CancelledError: - logger.debug("Shutting down MQLLMEngineClient check health loop.") - - except psutil.NoSuchProcess: - self._set_errored( - RuntimeError( - f"Engine process (pid {self._engine_process.pid}) died.")) - - except Exception as e: - self._set_errored(e) - - async def run_output_handler_loop(self): - """Get RequestOutputs from Engine and stream to Request Queues""" - - try: - while True: - # Poll, checking for ENGINE_DEAD - while await self.output_socket.poll(timeout=VLLM_RPC_TIMEOUT - ) == 0: - logger.debug("Waiting for output from MQLLMEngine.") - - # If errored, alert all running requests. - if self.errored: - for queue_j in tuple(self.output_queues.values()): - queue_j.put_nowait( - ENGINE_DEAD_ERROR(self._errored_with)) - return - - message: Frame = await self.output_socket.recv(copy=False) - request_outputs = pickle.loads(message.buffer) - - is_error = isinstance(request_outputs, - (BaseException, RPCError)) - if is_error: - if isinstance(request_outputs, RPCError): - rpc_error: RPCError = request_outputs - request_id = rpc_error.request_id - exception = rpc_error.exception - is_engine_errored = rpc_error.is_engine_errored - else: - # MPLLMEngine should always return an RPCError to - # the output_socket when an issue arises. - # If we are here, we are in a bad state and - # should shut down the server. - error: BaseException = request_outputs - logger.error( - "Received Exception %s rather than RPCError from " - "MPLLMEngine. This should never happen.", error) - request_id = None - exception = error - is_engine_errored = True - - # Set to error state only on engine critical error - # (and record only the first one) - if is_engine_errored and not self._errored_with: - self._errored_with = exception - # If engine is errored, no matter the type of exception - # it will no longer be able to receive new requests, - # therefore we have to inform that the current - # processed requests failed as well. Send back a dead - # engine error give this feedback and also give a - # 'hint' to the server to shut down next. - exception = self.dead_error - - if request_id is None: - # If request_id is None, then the engine raised an - # exception for a batch, and we may not know the - # request that caused it, neither if it was actually - # caused by any of them (e.g. CUDA OOM). Therefore we - # broadcast the same exception for all requests. - for queue_i in tuple(self.output_queues.values()): - queue_i.put_nowait(exception) - else: - queue = self.output_queues.get(request_id) - if queue is not None: - queue.put_nowait(exception) - # Put each output into the appropriate queue. - elif isinstance( - request_outputs, - (RPCAdapterLoadedResponse, RPCIsSleepingResponse)): - self._add_output(request_outputs) - else: - for request_output in request_outputs: - self._add_output(request_output) - - except asyncio.CancelledError: - logger.debug("Shutting down MQLLMEngineClient output handler.") - - def _add_output(self, request_output: Union[RequestOutput, - RPCAdapterLoadedResponse, - RPCIsSleepingResponse]): - queue = self.output_queues.get(request_output.request_id) - if queue is not None: - queue.put_nowait(request_output) - - async def setup(self): - """Set up the client before it starts sending server requests.""" - - # Start output_loop - if self.output_loop is None: - # only generate once to avoid multiple concurrent output_loops - # this will lead to race conditions and wrong orders of tokens - # returned by the engine - # setup will be called multiple times during the startup of - # the engine - self.output_loop = asyncio.create_task( - self.run_output_handler_loop()) - - with self.get_data_socket() as socket: - # Wait until server is ready. - response = await self._wait_for_server_rpc(socket) - - self.tracing_flag = response.tracing_enabled - - # Start health_loop. - if self.health_loop is None: - self.health_loop = asyncio.create_task( - self.run_heartbeat_loop(timeout=VLLM_RPC_TIMEOUT)) - - def close(self): - """Destroy the ZeroMQ Context.""" - # Close all sockets and terminate the context. - self.context.destroy(linger=0) - - # Cancel background tasks. - if self.health_loop is not None: - self.health_loop.cancel() - if self.output_loop is not None: - self.output_loop.cancel() - - def _set_errored(self, e: BaseException): - logger.exception(repr(e)) - if self._errored_with is None: - self._errored_with = e - - @staticmethod - async def _send_get_data_rpc_request(request: RPCStartupRequest, - expected_type: Any, - error_message: str, - socket: Socket) -> Any: - """Send an RPC request that is expecting data back.""" - - # Ping RPCServer with a request. - await socket.send_multipart((pickle.dumps(request), ), copy=False) - - # Make sure the server responds in time. - if await socket.poll(timeout=VLLM_RPC_TIMEOUT) == 0: - raise TimeoutError("RPCServer didn't reply within " - f"{VLLM_RPC_TIMEOUT} ms") - - # Await the data from the Server. - frame = await socket.recv(copy=False) - data = pickle.loads(frame.buffer) - - if isinstance(data, BaseException): - raise data - elif not isinstance(data, expected_type): - raise ValueError(error_message) - - return data - - @staticmethod - async def _send_one_way_rpc_request(request: RPC_REQUEST_T, - socket: Socket): - """Send one-way RPC request to trigger an action.""" - - if socket.closed: - raise MQClientClosedError() - - await socket.send_multipart((pickle.dumps(request), )) - - async def _await_ack(self, error_message: str, socket: Socket): - """Await acknowledgement that a request succeeded.""" - - if socket.closed: - raise MQClientClosedError() - - if await socket.poll(timeout=VLLM_RPC_TIMEOUT) == 0: - raise TimeoutError("MQLLMEngine didn't reply within " - f"{VLLM_RPC_TIMEOUT}ms") - - await self._check_success(error_message, socket) - - @staticmethod - async def _check_success(error_message: str, socket: Socket): - """Confirm that socket has a VLLM_RPC_SUCCESS_STR message""" - - if socket.closed: - raise MQClientClosedError() - - frame = await socket.recv(copy=False) - response = pickle.loads(frame.buffer) - - # Raise error if unsuccessful - if isinstance(response, BaseException): - raise response - elif (not isinstance(response, str) - or response != VLLM_RPC_SUCCESS_STR): - raise ValueError(error_message) - - async def get_input_preprocessor(self) -> InputPreprocessor: - return self.input_preprocessor - - async def get_tokenizer(self, lora_request: Optional[LoRARequest] = None): - if self.tokenizer is None: - return None - else: - return await self.tokenizer.get_lora_tokenizer_async(lora_request) - - async def get_vllm_config(self) -> VllmConfig: - return self.vllm_config - - async def get_decoding_config(self) -> DecodingConfig: - return self.decoding_config - - async def get_model_config(self) -> ModelConfig: - return self.model_config - - async def is_tracing_enabled(self) -> bool: - return self.tracing_flag - - async def _wait_for_server_rpc(self, socket: Socket) -> RPCStartupResponse: - """Wait for the RPCServer to start up.""" - - return await self._send_get_data_rpc_request( - request=RPCStartupRequest.IS_SERVER_READY, - expected_type=RPCStartupResponse, - error_message="Unable to start RPC Server", - socket=socket) - - async def abort(self, request_id: Union[str, Iterable[str]]): - """Send an ABORT_REQUEST signal to the RPC Server""" - - if not isinstance(request_id, str): - raise RuntimeError("Only single-request abort supported in" - " deprecated V0") - - with suppress(MQClientClosedError): - await self._send_one_way_rpc_request( - request=RPCAbortRequest(request_id), socket=self.input_socket) - - async def do_log_stats( - self, - scheduler_outputs: Optional[SchedulerOutputs] = None, - model_output: Optional[List[SamplerOutput]] = None, - ) -> None: - """ - Ignore do_log_stats (handled on MQLLMEngine polling) - """ - pass - - async def check_health(self): - """ - The check health loop probes the health status of the - Engine's health every N seconds and sets _errored_with - if the engine is unhealthy. - """ - if self._errored_with is not None: - raise self._errored_with - - @property - def is_running(self) -> bool: - return not self.errored - - @property - def is_stopped(self) -> bool: - return self.errored - - @property - def errored(self) -> bool: - return self._errored_with is not None - - @property - def dead_error(self) -> BaseException: - return ENGINE_DEAD_ERROR(self._errored_with) - - def generate( - self, - prompt: PromptType, - sampling_params: SamplingParams, - request_id: str, - lora_request: Optional[LoRARequest] = None, - trace_headers: Optional[Mapping[str, str]] = None, - priority: int = 0, - ) -> AsyncGenerator[RequestOutput, None]: - """Generate outputs for a request. - - Generate outputs for a request. This method is a coroutine. It adds the - request into the waiting queue of the LLMEngine and streams the outputs - from the LLMEngine to the caller. - - Args: - prompt: The prompt to the LLM. See - [`PromptType`][vllm.inputs.PromptType] for more details about - the format of each input. - sampling_params: The sampling parameters of the request. - request_id: The unique id of the request. - lora_request: LoRA request to use for generation, if any. - trace_headers: OpenTelemetry trace headers. - priority: Priority of the request (lower means earlier handling). - Any priority other than 0 will lead to an error if the - scheduling policy is not "priority". - """ - return self._process_request(prompt, sampling_params, request_id, - lora_request, trace_headers, priority) - - def encode( - self, - prompt: PromptType, - pooling_params: PoolingParams, - request_id: str, - lora_request: Optional[LoRARequest] = None, - trace_headers: Optional[Mapping[str, str]] = None, - priority: int = 0, - tokenization_kwargs: Optional[dict[str, Any]] = None, - ) -> AsyncGenerator[PoolingRequestOutput, None]: - raise NotImplementedError( - "Pooling models are not supported in vLLM V0") - - async def _process_request( - self, - prompt: PromptType, - params: SamplingParams, - request_id: str, - lora_request: Optional[LoRARequest] = None, - trace_headers: Optional[Mapping[str, str]] = None, - priority: int = 0, - ) -> AsyncGenerator[RequestOutput, None]: - """Send an RPCGenerateRequest to the RPCServer and stream responses.""" - - # If already dead, error out. - if self._errored_with is not None: - raise ENGINE_DEAD_ERROR(self._errored_with) - - # Ensure the request id is unique among running requests - if request_id in self.output_queues: - raise ValueError(f"Request {request_id} already exists") - - # 1) Create output queue for this request. - queue: asyncio.Queue[Union[RequestOutput, - BaseException]] = asyncio.Queue() - self.output_queues[request_id] = queue - - try: - # 2) Detach logits processors so that they can be pickled - # separately (may require cloudpickle which is slower) - if params.logits_processors: - # Defensive shallow copy - params = copy.copy(params) - logits_processors = params.logits_processors - params.logits_processors = None - lp_bytes = cloudpickle.dumps(logits_processors) - else: - lp_bytes = None - - request_bytes = pickle.dumps( - RPCProcessRequest( - prompt=prompt, - params=params, - request_id=request_id, - lora_request=lora_request, - trace_headers=trace_headers, - priority=priority, - )) - - # 3) Send the RPCGenerateRequest to the MQLLMEngine. - parts = (request_bytes, - lp_bytes) if lp_bytes else (request_bytes, ) - await self.input_socket.send_multipart(parts, copy=False) - - # 4) Stream the RequestOutputs from the output queue. Note - # that the output_loop pushes RequestOutput objects to this - # queue after pulling them from the zmq socket. - finished = False - try: - while not finished: - request_output = await queue.get() - - if isinstance(request_output, BaseException): - raise request_output - - finished = request_output.finished - yield request_output - finally: - # Request was canceled by the client. - if not finished and not self.errored: - await self.abort(request_id) - finally: - self.output_queues.pop(request_id) - - async def start_profile(self) -> None: - """Start profiling the engine""" - - await self._send_one_way_rpc_request( - request=RPCUProfileRequest.START_PROFILE, socket=self.input_socket) - - async def stop_profile(self) -> None: - """Stop profiling the engine""" - - await self._send_one_way_rpc_request( - request=RPCUProfileRequest.STOP_PROFILE, socket=self.input_socket) - - async def reset_mm_cache(self) -> None: - """Reset the multi-modal cache""" - - await self._send_one_way_rpc_request( - request=RPCResetMultiModalCacheRequest.RESET, - socket=self.input_socket) - - async def reset_prefix_cache(self, - device: Optional[Device] = None) -> None: - """Reset the prefix cache""" - - await self._send_one_way_rpc_request( - request=RPCResetPrefixCacheRequest(device), - socket=self.input_socket) - - async def sleep(self, level: int = 1) -> None: - """Sleep the engine for a given level""" - return await self._send_one_way_rpc_request( - request=RPCSleepRequest(level), socket=self.input_socket) - - async def wake_up(self, tags: Optional[list[str]] = None) -> None: - """Wake up the engine""" - return await self._send_one_way_rpc_request( - request=RPCWakeUpRequest(tags), socket=self.input_socket) - - async def is_sleeping(self) -> bool: - """Check whether the engine is sleeping""" - request = RPCIsSleepingRequest() - - queue: asyncio.Queue[Union[BaseException, - RPCIsSleepingResponse]] = asyncio.Queue() - self.output_queues[request.request_id] = queue - - request_bytes = pickle.dumps(request) - await self.input_socket.send_multipart((request_bytes, ), copy=False) - - request_output = await queue.get() - self.output_queues.pop(request.request_id) - - if isinstance(request_output, BaseException): - raise request_output - return request_output.is_sleeping - - async def add_lora(self, lora_request: LoRARequest) -> bool: - """Load a new LoRA adapter into the engine for future requests.""" - # Uses the same I/O as generate requests - request = RPCLoadAdapterRequest(lora_request) - - # Create output queue for this request. - queue: asyncio.Queue[Union[ - BaseException, RPCAdapterLoadedResponse]] = asyncio.Queue() - self.output_queues[request.request_id] = queue - - # Send the request - request_bytes = pickle.dumps(request) - await self.input_socket.send_multipart((request_bytes, ), copy=False) - - # Wait for the response - request_output = await queue.get() - self.output_queues.pop(request.request_id) - - # Raise on error, otherwise happily return None - if isinstance(request_output, BaseException): - raise request_output - return request_output.lora_loaded diff --git a/vllm/engine/multiprocessing/engine.py b/vllm/engine/multiprocessing/engine.py deleted file mode 100644 index 138283d4c8a7..000000000000 --- a/vllm/engine/multiprocessing/engine.py +++ /dev/null @@ -1,470 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project - -import pickle -import signal -from contextlib import contextmanager -from typing import Iterator, List, Optional, Union - -import cloudpickle -import zmq - -from vllm import AsyncEngineArgs, SamplingParams -from vllm.config import VllmConfig -from vllm.engine.llm_engine import LLMEngine -# yapf conflicts with isort for this block -# yapf: disable -from vllm.engine.multiprocessing import (ENGINE_DEAD_ERROR, IPC_DATA_EXT, - IPC_HEALTH_EXT, IPC_INPUT_EXT, - IPC_OUTPUT_EXT, REQUEST_OUTPUTS_T, - VLLM_RPC_SUCCESS_STR, RPCAbortRequest, - RPCAdapterLoadedResponse, RPCError, - RPCIsSleepingRequest, - RPCIsSleepingResponse, - RPCLoadAdapterRequest, - RPCProcessRequest, - RPCResetMultiModalCacheRequest, - RPCResetPrefixCacheRequest, - RPCSleepRequest, RPCStartupRequest, - RPCStartupResponse, - RPCUProfileRequest, RPCWakeUpRequest) -# yapf: enable -from vllm.logger import init_logger -from vllm.outputs import RequestOutput -from vllm.transformers_utils.config import ( - maybe_register_config_serialize_by_value) -from vllm.usage.usage_lib import UsageContext -from vllm.utils import deprecate_kwargs -from vllm.worker.model_runner_base import InputProcessingError - -logger = init_logger(__name__) - -POLLING_TIMEOUT_MS = 10000 -HEALTHY_RESPONSE = (pickle.dumps(VLLM_RPC_SUCCESS_STR), ) - - -class MQLLMEngine: - """A multiprocessing wrapper for - [`LLMEngine`][vllm.engine.llm_engine.LLMEngine]. - - This class is used to wrap the - [`LLMEngine`][vllm.engine.llm_engine.LLMEngine] class to enable use - in concurrent manner. It runs a background loop and uses zeromq to - receive new requests and stream outputs incrementally via ipc. - - The [`LLMEngine`][vllm.engine.llm_engine.LLMEngine] generate or encode - process is kicked off when a new RPCProcessRequest is received by the - input_socket. - - The self.engine_loop checks the input_socket for new requests, - adds them to the LLMEngine if there are any, calls the internal - [`LLMEngine.step()`][vllm.engine.llm_engine.LLMEngine.step], and sends - the RequestOutputs back over the output_socket. - - If use_async_sockets is set, the logic associated with reading new - requests from the socket and sending data to the socket is passed - as a callback to the llm_engine, which calls the logic asynchronously - such that the IPC can be overlapped with the GPU. - - Args: - ipc_path: Base path for zeromq interprocess messaging - use_async_sockets: Whether to make send/recv async with GPU - log_requests: Whether to log the requests. - *args: Arguments for [`LLMEngine`][vllm.engine.llm_engine.LLMEngine]. - **kwargs: Arguments for [`LLMEngine`][vllm.engine.llm_engine.LLMEngine]. - """ - - def __init__(self, - ipc_path: str, - use_async_sockets: bool, - *args, - log_requests: bool = True, - **kwargs) -> None: - # For MQLLMEngine, we can use cached outputs, since each new request - # output is immediately pickled and send over the socket, which frees - # the python object to be reused again. - kwargs['use_cached_outputs'] = True - - self.engine = LLMEngine(*args, **kwargs) - self.log_requests = log_requests - - self.use_async_sockets = use_async_sockets - if self.use_async_sockets: - self.engine.process_request_outputs_callback = \ - self._async_socket_engine_callback - - self.ctx = zmq.Context() # type: ignore[attr-defined] - - # Receive input from the client. - self.input_socket = self.ctx.socket(zmq.constants.PULL) - self.input_socket.bind(f"{ipc_path}{IPC_INPUT_EXT}") - - # Send output stream back to client. - self.output_socket = self.ctx.socket(zmq.constants.PUSH) - self.output_socket.bind(f"{ipc_path}{IPC_OUTPUT_EXT}") - - # Send heartbeats back to client. - self.heartbeat_socket = self.ctx.socket(zmq.constants.PUSH) - self.heartbeat_socket.bind(f"{ipc_path}{IPC_HEALTH_EXT}") - - # IPC path for the data socket. - self.data_ipc_path = f"{ipc_path}{IPC_DATA_EXT}" - - # Error state. - self._errored_with: Optional[BaseException] = None - - @property - def dead_error(self) -> BaseException: - if self._errored_with is not None: - return ENGINE_DEAD_ERROR(self._errored_with) - else: - return ENGINE_DEAD_ERROR() - - @classmethod - @deprecate_kwargs( - "disable_log_requests", - additional_message=("This argument will have no effect. " - "Use `enable_log_requests` instead."), - ) - def from_vllm_config( - cls, - vllm_config: VllmConfig, - usage_context: UsageContext, - enable_log_requests: bool, - disable_log_stats: bool, - ipc_path: str, - disable_log_requests: bool = True, # Deprecated, will be removed - ) -> "MQLLMEngine": - # Setup plugins for each process - from vllm.plugins import load_general_plugins - load_general_plugins() - - use_async_sockets = vllm_config.model_config.use_async_output_proc - - return cls( - vllm_config=vllm_config, - executor_class=LLMEngine._get_executor_cls(vllm_config), - ipc_path=ipc_path, - usage_context=usage_context, - use_async_sockets=use_async_sockets, - log_requests=enable_log_requests, - log_stats=(not disable_log_stats), - ) - - @staticmethod - def from_engine_args(engine_args: AsyncEngineArgs, - usage_context: UsageContext, ipc_path: str): - """Creates an MQLLMEngine from the engine arguments.""" - - vllm_config = engine_args.create_engine_config(usage_context) - return MQLLMEngine.from_vllm_config( - ipc_path=ipc_path, - vllm_config=vllm_config, - usage_context=usage_context, - enable_log_requests=engine_args.enable_log_requests, - disable_log_stats=engine_args.disable_log_stats, - ) - - def start(self): - try: - try: - logger.debug("Starting Startup Loop.") - self.run_startup_loop() - logger.debug("Starting Engine Loop.") - self.run_engine_loop() - except Exception as e: - logger.exception(repr(e)) - except KeyboardInterrupt: - logger.debug("Shutting down MQLLMEngine.") - finally: - logger.debug("MQLLMEngine is shut down.") - self.cleanup() - - def cleanup(self): - """Cleanup zeromq state on shutdown.""" - # Closes all sockets and destroys context. - self.ctx.destroy(linger=0) - del self.engine - - @contextmanager - def make_data_socket( - self) -> Iterator[zmq.Socket]: # type: ignore[name-defined] - socket = self.ctx.socket(zmq.constants.ROUTER) - try: - socket.bind(self.data_ipc_path) - yield socket - finally: - socket.close(linger=0) - - def run_startup_loop(self) -> None: - """Startup loop for sending data from Engine -> Client.""" - - with self.make_data_socket() as socket: - response: Union[RPCStartupResponse, BaseException] - try: - identity, message = socket.recv_multipart(copy=False) - request: RPCStartupRequest = pickle.loads(message.buffer) - - # Handle the query from the Client. - if request == RPCStartupRequest.IS_SERVER_READY: - tracing_enabled = self.engine.is_tracing_enabled() - response = RPCStartupResponse( - tracing_enabled=tracing_enabled) - - except Exception as e: - response = e - - socket.send_multipart((identity, pickle.dumps(response)), - copy=False) - - def run_engine_loop(self): - """Core busy loop of the LLMEngine.""" - - while True: - if not self.engine.has_unfinished_requests(): - # Poll until there is work to do. - while self.input_socket.poll(timeout=POLLING_TIMEOUT_MS) == 0: - # When there's no work, check on engine health and send - # health status back to client - self._health_check() - self.engine.do_log_stats() - logger.debug("Waiting for new requests in engine loop.") - - # Handle any input from the client. - self.handle_new_input() - - # Engine step. - request_outputs = self.engine_step() - - # Send request outputs (if async, done in engine_step callback). - if not self.use_async_sockets: - self._send_outputs(request_outputs) - - def engine_step(self) -> List[RequestOutput]: - """Engine step wrapper with error handling.""" - try: - return self.engine.step() - except SystemExit: - raise - except InputProcessingError as e: - # Special case where we handle an error preparing the inputs for - # a single request in the batch - rpc_err = RPCError(request_id=e.request_id, - is_engine_errored=False, - exception=e.__cause__) - self._send_outputs(rpc_err) - return [] - except BaseException as e: - self._set_errored(e) - rpc_err = RPCError(request_id=None, - is_engine_errored=True, - exception=e) - self._send_outputs(rpc_err) - raise e - - def handle_new_input(self): - """Handle new input from the socket""" - try: - while self.input_socket.poll(timeout=0) != 0: - frames = self.input_socket.recv_multipart(copy=False) - request = pickle.loads(frames[0].buffer) - - if isinstance(request, RPCProcessRequest): - if len(frames) > 1: - # Use cloudpickle for logits processors - assert isinstance(request.params, SamplingParams) - lprocs = cloudpickle.loads(frames[1].buffer) - request.params.logits_processors = lprocs - self._handle_process_request(request) - elif isinstance(request, RPCAbortRequest): - self._handle_abort_request(request) - elif isinstance(request, RPCUProfileRequest): - if request == RPCUProfileRequest.START_PROFILE: - self.start_profile() - else: - self.stop_profile() - elif isinstance(request, RPCLoadAdapterRequest): - self._handle_load_adapter_request(request) - elif isinstance(request, RPCResetMultiModalCacheRequest): - self.reset_mm_cache() - elif isinstance(request, RPCResetPrefixCacheRequest): - self.reset_prefix_cache() - elif isinstance(request, RPCSleepRequest): - self.sleep(request.value) - elif isinstance(request, RPCWakeUpRequest): - self.wake_up(request.tags) - elif isinstance(request, RPCIsSleepingRequest): - self._handle_is_sleeping_request(request) - else: - raise ValueError("Unknown RPCRequest Type: " - f"{type(request)}") - - except Exception as e: - self._set_errored(e) - self._send_unhealthy(e) - raise e from None - - def _handle_process_request(self, request: RPCProcessRequest): - """Handle RPCProcessRequest by adding it to the LLMEngine.""" - request_id = request.request_id - - if self._errored_with is not None: - rpc_err = RPCError(request_id=request_id, - is_engine_errored=True, - exception=ENGINE_DEAD_ERROR(self._errored_with)) - self._send_outputs(rpc_err) - - try: - self.engine.add_request(request_id=request_id, - prompt=request.prompt, - params=request.params, - lora_request=request.lora_request, - trace_headers=request.trace_headers, - priority=request.priority) - - if self.log_requests: - logger.info("Added request %s.", request.request_id) - - except Exception as e: - # We do not set self._errored = True here, since the error - # is due to an issue adding this request to the engine, - # rather than an issue with the engine itself. - logger.debug("Failed to add request %s to engine. %s", - request.request_id, e) - is_errored = self._errored_with is not None - rpc_err = RPCError(request_id=request_id, - is_engine_errored=is_errored, - exception=e) - self._send_outputs(rpc_err) - - # Remove request from the engine. - self.engine.abort_request(request_id) - - def _handle_abort_request(self, request: RPCAbortRequest): - self.engine.abort_request(request.request_id) - if self.log_requests: - logger.info("Aborted request %s.", request.request_id) - - def _handle_load_adapter_request(self, request: RPCLoadAdapterRequest): - try: - lora_loaded = self.engine.add_lora(request.lora_request) - except BaseException as e: - # Send back an error if the adater fails to load - rpc_err = RPCError(request_id=request.request_id, - is_engine_errored=False, - exception=e) - self._send_outputs(rpc_err) - return - # Otherwise, send back the successful load message - self._send_outputs( - RPCAdapterLoadedResponse(request_id=request.request_id, - lora_loaded=lora_loaded)) - - def _handle_is_sleeping_request(self, request: RPCIsSleepingRequest): - is_sleeping = self.is_sleeping() - self._send_outputs( - RPCIsSleepingResponse(request_id=request.request_id, - is_sleeping=is_sleeping)) - - def _health_check(self): - # Send unhealthy if engine has already errored - if self._errored_with is not None: - self._send_unhealthy(self._errored_with) - try: - self.engine.check_health() - self._send_healthy() - except Exception as e: - self._set_errored(e) - self._send_unhealthy(e) - - def _send_outputs(self, outputs: REQUEST_OUTPUTS_T): - """Send outputs back to the engine client. These can be: - - Exceptions - - A list of generation outputs - - A response from loading a lora adapter - """ - if outputs: - try: - from ray.exceptions import RayTaskError - - # RayTaskError might not pickelable here. We need to unpack the - # underlying exception as the real exception in the output. - if (isinstance(outputs, RPCError) - and isinstance(outputs.exception, RayTaskError)): - outputs.exception = outputs.exception.cause - except ImportError: - pass - - output_bytes = pickle.dumps(outputs) - self.output_socket.send_multipart((output_bytes, ), copy=False) - - def _send_healthy(self): - """Send HEALTHY message to RPCClient.""" - if not self.heartbeat_socket.closed: - self.heartbeat_socket.send_multipart(HEALTHY_RESPONSE, copy=False) - - def _send_unhealthy(self, error: BaseException): - """Send UNHEALTHY message to RPCClient.""" - if not self.heartbeat_socket.closed: - error_bytes = pickle.dumps(error) - self.heartbeat_socket.send_multipart((error_bytes, ), copy=False) - - def _async_socket_engine_callback(self, - request_outputs: REQUEST_OUTPUTS_T): - """Callback used by engine to make socket handling async with GPU.""" - self._send_outputs(request_outputs) - self.handle_new_input() - - def _set_errored(self, e: BaseException): - """Log and set errored status if this is the first issue.""" - if self._errored_with is None: - self._errored_with = e - - def start_profile(self) -> None: - self.engine.start_profile() - - def stop_profile(self) -> None: - self.engine.stop_profile() - - def reset_mm_cache(self) -> bool: - return self.engine.reset_mm_cache() - - def reset_prefix_cache(self) -> bool: - return self.engine.reset_prefix_cache() - - def sleep(self, level: int = 1) -> None: - self.engine.sleep(level) - - def wake_up(self, tags: Optional[list[str]] = None) -> None: - self.engine.wake_up(tags) - - def is_sleeping(self) -> bool: - return self.engine.is_sleeping() - - -def signal_handler(*_) -> None: - raise KeyboardInterrupt("MQLLMEngine terminated") - - -def run_mp_engine(vllm_config: VllmConfig, usage_context: UsageContext, - ipc_path: str, disable_log_stats: bool, - enable_log_requests: bool, engine_alive): - try: - # Ensure we can serialize transformer config before spawning - maybe_register_config_serialize_by_value() - - engine = MQLLMEngine.from_vllm_config( - vllm_config=vllm_config, - usage_context=usage_context, - disable_log_stats=disable_log_stats, - enable_log_requests=enable_log_requests, - ipc_path=ipc_path) - - signal.signal(signal.SIGTERM, signal_handler) - - engine.start() - - except BaseException as e: - logger.exception(e) - engine_alive.value = False - raise e from None diff --git a/vllm/entrypoints/launcher.py b/vllm/entrypoints/launcher.py index 887e27710924..c3195dbc4697 100644 --- a/vllm/entrypoints/launcher.py +++ b/vllm/entrypoints/launcher.py @@ -12,7 +12,6 @@ from vllm import envs from vllm.engine.async_llm_engine import AsyncEngineDeadError -from vllm.engine.multiprocessing import MQEngineDeadError from vllm.engine.protocol import EngineClient from vllm.entrypoints.constants import (H11_MAX_HEADER_COUNT_DEFAULT, H11_MAX_INCOMPLETE_EVENT_SIZE_DEFAULT) @@ -156,7 +155,6 @@ def _add_shutdown_handlers(app: FastAPI, server: uvicorn.Server) -> None: @app.exception_handler(RuntimeError) @app.exception_handler(AsyncEngineDeadError) - @app.exception_handler(MQEngineDeadError) @app.exception_handler(EngineDeadError) @app.exception_handler(EngineGenerateError) async def runtime_exception_handler(request: Request, __): diff --git a/vllm/entrypoints/openai/api_server.py b/vllm/entrypoints/openai/api_server.py index 2e4aa7f3d5a6..527193c91339 100644 --- a/vllm/entrypoints/openai/api_server.py +++ b/vllm/entrypoints/openai/api_server.py @@ -2,7 +2,6 @@ # SPDX-FileCopyrightText: Copyright contributors to the vLLM project import asyncio -import atexit import gc import importlib import inspect @@ -17,7 +16,6 @@ from argparse import Namespace from collections.abc import AsyncGenerator, AsyncIterator, Awaitable from contextlib import asynccontextmanager -from functools import partial from http import HTTPStatus from typing import Annotated, Any, Callable, Optional @@ -42,8 +40,6 @@ from vllm.config import VllmConfig from vllm.engine.arg_utils import AsyncEngineArgs from vllm.engine.async_llm_engine import AsyncLLMEngine # type: ignore -from vllm.engine.multiprocessing.client import MQLLMEngineClient -from vllm.engine.multiprocessing.engine import run_mp_engine from vllm.engine.protocol import EngineClient from vllm.entrypoints.chat_utils import (load_chat_template, resolve_hf_chat_template, @@ -102,13 +98,10 @@ log_non_default_args, with_cancellation) from vllm.logger import init_logger from vllm.reasoning import ReasoningParserManager -from vllm.transformers_utils.config import ( - maybe_register_config_serialize_by_value) from vllm.transformers_utils.tokenizer import MistralTokenizer from vllm.usage.usage_lib import UsageContext from vllm.utils import (Device, FlexibleArgumentParser, decorate_logs, - get_open_zmq_ipc_path, is_valid_ipv6_address, - set_ulimit) + is_valid_ipv6_address, set_ulimit) from vllm.v1.metrics.prometheus import get_prometheus_registry from vllm.version import __version__ as VLLM_VERSION @@ -237,8 +230,7 @@ async def build_async_engine_client_from_engine_args( async_llm.shutdown() # V0 AsyncLLM. - elif (MQLLMEngineClient.is_unsupported_config(vllm_config) - or disable_frontend_multiprocessing): + else: engine_client: Optional[EngineClient] = None try: @@ -252,96 +244,6 @@ async def build_async_engine_client_from_engine_args( if engine_client and hasattr(engine_client, "shutdown"): engine_client.shutdown() - # V0MQLLMEngine. - else: - if "PROMETHEUS_MULTIPROC_DIR" not in os.environ: - # Make TemporaryDirectory for prometheus multiprocessing - # Note: global TemporaryDirectory will be automatically - # cleaned up upon exit. - global prometheus_multiproc_dir - prometheus_multiproc_dir = tempfile.TemporaryDirectory() - os.environ[ - "PROMETHEUS_MULTIPROC_DIR"] = prometheus_multiproc_dir.name - else: - logger.warning( - "Found PROMETHEUS_MULTIPROC_DIR was set by user. " - "This directory must be wiped between vLLM runs or " - "you will find inaccurate metrics. Unset the variable " - "and vLLM will properly handle cleanup.") - - # Select random path for IPC. - ipc_path = get_open_zmq_ipc_path() - logger.debug("Multiprocessing frontend to use %s for IPC Path.", - ipc_path) - - # Start RPCServer in separate process (holds the LLMEngine). - # the current process might have CUDA context, - # so we need to spawn a new process - context = multiprocessing.get_context("spawn") - - # Ensure we can serialize transformer config before spawning - maybe_register_config_serialize_by_value() - - # The Process can raise an exception during startup, which may - # not actually result in an exitcode being reported. As a result - # we use a shared variable to communicate the information. - engine_alive = multiprocessing.Value('b', True, lock=False) - engine_process = context.Process( - target=run_mp_engine, - args=(vllm_config, UsageContext.OPENAI_API_SERVER, ipc_path, - engine_args.disable_log_stats, - engine_args.enable_log_requests, engine_alive)) - engine_process.start() - engine_pid = engine_process.pid - assert engine_pid is not None, "Engine process failed to start." - logger.info("Started engine process with PID %d", engine_pid) - - def _cleanup_ipc_path(): - socket_path = ipc_path.replace("ipc://", "") - if os.path.exists(socket_path): - os.remove(socket_path) - - # Ensure we clean up the local IPC socket file on exit. - atexit.register(_cleanup_ipc_path) - - # Build RPCClient, which conforms to EngineClient Protocol. - build_client = partial(MQLLMEngineClient, ipc_path, vllm_config, - engine_pid) - mq_engine_client = await asyncio.get_running_loop().run_in_executor( - None, build_client) - try: - while True: - try: - await mq_engine_client.setup() - break - except TimeoutError: - if (not engine_process.is_alive() - or not engine_alive.value): - raise RuntimeError( - "Engine process failed to start. See stack " - "trace for the root cause.") from None - - yield mq_engine_client # type: ignore[misc] - finally: - # Ensure rpc server process was terminated - engine_process.terminate() - - # Close all open connections to the backend - mq_engine_client.close() - - # Wait for engine process to join - engine_process.join(4) - if engine_process.exitcode is None: - # Kill if taking longer than 5 seconds to stop - engine_process.kill() - - # Lazy import for prometheus multiprocessing. - # We need to set PROMETHEUS_MULTIPROC_DIR environment variable - # before prometheus_client is imported. - # See https://prometheus.github.io/client_python/multiprocess/ - from prometheus_client import multiprocess - multiprocess.mark_process_dead(engine_process.pid) - async def validate_json_request(raw_request: Request): content_type = raw_request.headers.get("content-type", "").lower() diff --git a/vllm/platforms/rocm.py b/vllm/platforms/rocm.py index bb8bff48c7b9..4f540fe965e2 100644 --- a/vllm/platforms/rocm.py +++ b/vllm/platforms/rocm.py @@ -191,7 +191,7 @@ def get_attn_backend_cls(cls, selected_backend, head_size, dtype, kv_cache_dtype, block_size, use_v1, use_mla, has_sink) -> str: if use_mla: - from vllm.attention.backends.rocm_aiter_mla import ( + from vllm.v1.attention.backends.mla.rocm_aiter_mla import ( is_aiter_mla_enabled) if selected_backend is None: From 0f7acdd73ca6316c8ae0474c0a9c4fc264e87a7b Mon Sep 17 00:00:00 2001 From: Roger Wang Date: Tue, 16 Sep 2025 22:01:04 -0700 Subject: [PATCH 027/518] [Model] Support Qwen3-VL Model Series (#24727) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Roger Wang Signed-off-by: Isotr0py Co-authored-by: Huang Jie <92386084+JJJYmmm@users.noreply.github.com> Co-authored-by: 松灵 <26085463+wulipc@users.noreply.github.com> Co-authored-by: Isotr0py --- docs/models/supported_models.md | 2 + examples/offline_inference/vision_language.py | 78 + .../multimodal/processing/test_common.py | 35 +- tests/models/registry.py | 6 + .../layers/rotary_embedding/__init__.py | 2 + .../layers/rotary_embedding/mrope.py | 144 +- vllm/model_executor/models/qwen2.py | 2 +- vllm/model_executor/models/qwen2_vl.py | 2 +- vllm/model_executor/models/qwen3_moe.py | 2 +- vllm/model_executor/models/qwen3_vl.py | 1478 +++++++++++++++++ vllm/model_executor/models/qwen3_vl_moe.py | 344 ++++ vllm/model_executor/models/registry.py | 4 +- vllm/multimodal/video.py | 2 +- 13 files changed, 2084 insertions(+), 17 deletions(-) create mode 100644 vllm/model_executor/models/qwen3_vl.py create mode 100644 vllm/model_executor/models/qwen3_vl_moe.py diff --git a/docs/models/supported_models.md b/docs/models/supported_models.md index 73834ddd0c5d..7aeaeca97699 100644 --- a/docs/models/supported_models.md +++ b/docs/models/supported_models.md @@ -661,6 +661,8 @@ These models primarily accept the [`LLM.generate`](./generative_models.md#llmgen | `Qwen2VLForConditionalGeneration` | QVQ, Qwen2-VL | T + IE+ + VE+ | `Qwen/QVQ-72B-Preview`, `Qwen/Qwen2-VL-7B-Instruct`, `Qwen/Qwen2-VL-72B-Instruct`, etc. | ✅︎ | ✅︎ | ✅︎ | | `Qwen2_5_VLForConditionalGeneration` | Qwen2.5-VL | T + IE+ + VE+ | `Qwen/Qwen2.5-VL-3B-Instruct`, `Qwen/Qwen2.5-VL-72B-Instruct`, etc. | ✅︎ | ✅︎ | ✅︎ | | `Qwen2_5OmniThinkerForConditionalGeneration` | Qwen2.5-Omni | T + IE+ + VE+ + A+ | `Qwen/Qwen2.5-Omni-3B`, `Qwen/Qwen2.5-Omni-7B` | ✅︎ | ✅︎ | ✅︎ | +| `Qwen3VLForConditionalGeneration` | Qwen3-VL | T + IE+ + VE+ | `Qwen/Qwen3-VL-4B-Instruct`, etc. | ✅︎ | ✅︎ | ✅︎ | +| `Qwen3VLMoeForConditionalGeneration` | Qwen3-VL-MOE | T + IE+ + VE+ | `Qwen/Qwen3-VL-30B-A3B-Instruct`, etc. | ✅︎ | ✅︎ | ✅︎ | | `RForConditionalGeneration` | R-VL-4B | T + IE+ | `YannQi/R-4B` | | ✅︎ | ✅︎ | | `SkyworkR1VChatModel` | Skywork-R1V-38B | T + I | `Skywork/Skywork-R1V-38B` | | ✅︎ | ✅︎ | | `SmolVLMForConditionalGeneration` | SmolVLM2 | T + I | `SmolVLM2-2.2B-Instruct` | ✅︎ | | ✅︎ | diff --git a/examples/offline_inference/vision_language.py b/examples/offline_inference/vision_language.py index 929df8d8bebd..de3f3afc1794 100644 --- a/examples/offline_inference/vision_language.py +++ b/examples/offline_inference/vision_language.py @@ -1437,6 +1437,80 @@ def run_qwen2_5_omni(questions: list[str], modality: str): ) +# Qwen3-VL-Dense +def run_qwen3_vl(questions: list[str], modality: str) -> ModelRequestData: + model_name = "Qwen/Qwen3-VL-4B-Instruct" + + engine_args = EngineArgs( + model=model_name, + max_model_len=4096, + max_num_seqs=5, + mm_processor_kwargs={ + "min_pixels": 28 * 28, + "max_pixels": 1280 * 28 * 28, + "fps": 1, + }, + limit_mm_per_prompt={modality: 1}, + ) + + if modality == "image": + placeholder = "<|image_pad|>" + elif modality == "video": + placeholder = "<|video_pad|>" + + prompts = [ + ( + "<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n" + f"<|im_start|>user\n<|vision_start|>{placeholder}<|vision_end|>" + f"{question}<|im_end|>\n" + "<|im_start|>assistant\n" + ) + for question in questions + ] + + return ModelRequestData( + engine_args=engine_args, + prompts=prompts, + ) + + +# Qwen3-VL-MOE +def run_qwen3_vl_moe(questions: list[str], modality: str) -> ModelRequestData: + model_name = "Qwen/Qwen3-VL-30B-A3B-Instruct" + + engine_args = EngineArgs( + model=model_name, + max_model_len=4096, + max_num_seqs=5, + mm_processor_kwargs={ + "min_pixels": 28 * 28, + "max_pixels": 1280 * 28 * 28, + "fps": 1, + }, + limit_mm_per_prompt={modality: 1}, + ) + + if modality == "image": + placeholder = "<|image_pad|>" + elif modality == "video": + placeholder = "<|video_pad|>" + + prompts = [ + ( + "<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n" + f"<|im_start|>user\n<|vision_start|>{placeholder}<|vision_end|>" + f"{question}<|im_end|>\n" + "<|im_start|>assistant\n" + ) + for question in questions + ] + + return ModelRequestData( + engine_args=engine_args, + prompts=prompts, + ) + + # R-4B def run_r_vl(questions: list[str], modality: str) -> ModelRequestData: assert modality == "image" @@ -1645,6 +1719,8 @@ def run_tarsier2(questions: list[str], modality: str) -> ModelRequestData: "qwen2_vl": run_qwen2_vl, "qwen2_5_vl": run_qwen2_5_vl, "qwen2_5_omni": run_qwen2_5_omni, + "qwen3_vl": run_qwen3_vl, + "qwen3_vl_moe": run_qwen3_vl_moe, "rvl": run_r_vl, "skywork_chat": run_skyworkr1v, "smolvlm": run_smolvlm, @@ -1658,6 +1734,8 @@ def run_tarsier2(questions: list[str], modality: str) -> ModelRequestData: "glm4_1v", "glm4_5v", "glm4_5v_fp8", + "qwen3_vl", + "qwen3_vl_moe", ] diff --git a/tests/models/multimodal/processing/test_common.py b/tests/models/multimodal/processing/test_common.py index a272c840f8da..0941cc3f608e 100644 --- a/tests/models/multimodal/processing/test_common.py +++ b/tests/models/multimodal/processing/test_common.py @@ -31,6 +31,7 @@ def glm4_1v_patch_mm_data(mm_data: MultiModalDataDict) -> MultiModalDataDict: """ # Ensure video metadata is included if "video" in mm_data: + # GLM4.1V doesn't support multiple videos video = mm_data["video"] num_frames = len(video) mm_data["video"] = (video, { @@ -44,6 +45,34 @@ def glm4_1v_patch_mm_data(mm_data: MultiModalDataDict) -> MultiModalDataDict: return mm_data +def qwen3_vl_patch_mm_data(mm_data: MultiModalDataDict) -> MultiModalDataDict: + """ + Patch the multimodal data for Qwen3-VL model. + """ + + def create_metadata(frames: np.ndarray): + num_frames = len(frames) + return { + "total_num_frames": num_frames, + "fps": 2.0, + "duration": num_frames / 2.0, + "video_backend": "opencv", + "frames_indices": list(range(num_frames)), + "do_sample_frames": True, + } + + # Ensure video metadata is included + if "video" in mm_data: + video = mm_data["video"] + if isinstance(video, list): + # multiple videos + mm_data["video"] = [(vid, create_metadata(vid)) for vid in video] + else: + # single video + mm_data["video"] = (video, create_metadata(video)) + return mm_data + + def _test_processing_correctness( model_id_or_arch: str, hit_rate: float, @@ -182,8 +211,10 @@ def _test_processing_correctness( } MM_DATA_PATCHES = { - # GLM4.1V requires video metadata to be included in the input + # GLM4.1V and Qwen3-VL requires video metadata to be included in the input "glm4v": glm4_1v_patch_mm_data, + "qwen3_vl": qwen3_vl_patch_mm_data, + "qwen3_vl_moe": qwen3_vl_patch_mm_data, } @@ -326,6 +357,8 @@ def _test_processing_correctness_one( "Qwen/Qwen2.5-VL-3B-Instruct", "Qwen/Qwen2-Audio-7B-Instruct", "Qwen/Qwen2.5-Omni-3B", + "Qwen/Qwen3-VL-4B-Instruct", + "Qwen/Qwen3-VL-30B-A3B-Instruct", "YannQi/R-4B", "Skywork/Skywork-R1V-38B", "HuggingFaceTB/SmolVLM2-2.2B-Instruct", diff --git a/tests/models/registry.py b/tests/models/registry.py index 9aef08769fb2..93aa9d402549 100644 --- a/tests/models/registry.py +++ b/tests/models/registry.py @@ -557,6 +557,12 @@ def check_available_online( max_model_len=4096), "Qwen2_5OmniModel": _HfExamplesInfo("Qwen/Qwen2.5-Omni-3B"), "Qwen2_5OmniForConditionalGeneration": _HfExamplesInfo("Qwen/Qwen2.5-Omni-7B-AWQ"), # noqa: E501 + "Qwen3VLForConditionalGeneration": _HfExamplesInfo("Qwen/Qwen3-VL-4B-Instruct", # noqa: E501 + max_model_len=4096, + min_transformers_version="4.57"), # noqa: E501 + "Qwen3VLMoeForConditionalGeneration": _HfExamplesInfo("Qwen/Qwen3-VL-30B-A3B-Instruct", # noqa: E501 + max_model_len=4096, + min_transformers_version="4.57"), "RForConditionalGeneration": _HfExamplesInfo("YannQi/R-4B", trust_remote_code=True), "SkyworkR1VChatModel": _HfExamplesInfo("Skywork/Skywork-R1V-38B", diff --git a/vllm/model_executor/layers/rotary_embedding/__init__.py b/vllm/model_executor/layers/rotary_embedding/__init__.py index 564f9a5c0075..c9653aa9e440 100644 --- a/vllm/model_executor/layers/rotary_embedding/__init__.py +++ b/vllm/model_executor/layers/rotary_embedding/__init__.py @@ -103,6 +103,8 @@ def get_rope( is_neox_style, dtype, mrope_section=rope_scaling["mrope_section"], + mrope_interleaved=rope_scaling.get("mrope_interleaved", + False), ) else: rotary_emb = RotaryEmbedding( diff --git a/vllm/model_executor/layers/rotary_embedding/mrope.py b/vllm/model_executor/layers/rotary_embedding/mrope.py index 69849fdac027..ef61dbc1a5ab 100644 --- a/vllm/model_executor/layers/rotary_embedding/mrope.py +++ b/vllm/model_executor/layers/rotary_embedding/mrope.py @@ -177,6 +177,18 @@ def triton_mrope( return q, k +def apply_interleaved_rope(x: torch.Tensor, + mrope_section: list[int]) -> torch.Tensor: + """Apply interleaved MRoPE to 3D rotary embeddings. + Reorganizes frequency layout from chunked [TTT...HHH...WWW] to + interleaved [THTHWHTHW...TT], preserving frequency continuity. + """ + x_t = x[0].clone() + x_t[..., 1:mrope_section[1] * 3:3] = x[1, ..., 1:mrope_section[1] * 3:3] + x_t[..., 2:mrope_section[2] * 3:3] = x[2, ..., 2:mrope_section[2] * 3:3] + return x_t + + class MRotaryEmbedding(RotaryEmbedding): """Rotary Embedding with Multimodal Sections.""" @@ -189,6 +201,7 @@ def __init__( is_neox_style: bool, dtype: torch.dtype, mrope_section: Optional[list[int]] = None, + mrope_interleaved: Optional[bool] = False, ) -> None: # In Qwen2.5-VL, the maximum index value is related to the duration of # the input video. We enlarge max_position_embeddings to 4 times to get @@ -198,6 +211,7 @@ def __init__( base, is_neox_style, dtype) self.mrope_section = mrope_section + self.mrope_interleaved = mrope_interleaved if self.mrope_section: assert sum(self.mrope_section) == rotary_dim // 2 @@ -225,17 +239,20 @@ def forward_native( cos, sin = cos_sin.chunk(2, dim=-1) if positions.ndim == 2: assert self.mrope_section - - cos = torch.cat([ - m[i] - for i, m in enumerate(cos.split(self.mrope_section, dim=-1)) - ], - dim=-1) - sin = torch.cat([ - m[i] - for i, m in enumerate(sin.split(self.mrope_section, dim=-1)) - ], - dim=-1) + if self.mrope_interleaved: + cos = apply_interleaved_rope(cos, self.mrope_section) + sin = apply_interleaved_rope(sin, self.mrope_section) + else: + cos = torch.cat([ + m[i] for i, m in enumerate( + cos.split(self.mrope_section, dim=-1)) + ], + dim=-1) + sin = torch.cat([ + m[i] for i, m in enumerate( + sin.split(self.mrope_section, dim=-1)) + ], + dim=-1) query_shape = query.shape query = query.view(num_tokens, -1, self.head_size) @@ -265,6 +282,10 @@ def forward_cuda( assert positions.ndim == 1 or positions.ndim == 2 assert key is not None + if self.mrope_interleaved: + # TODO: add triton implementation to support mrope-interleaved + return self.forward_native(positions, query, key) + num_tokens = positions.shape[-1] cos_sin = self.cos_sin_cache[positions] cos, sin = cos_sin.chunk(2, dim=-1) @@ -388,6 +409,15 @@ def get_input_positions_tensor( context_len=context_len, seq_len=seq_len, ) + elif hf_config.model_type in ["qwen3_vl", "qwen3_vl_moe"]: + return cls._qwen3vl_get_input_positions_tensor( + input_tokens=input_tokens, + hf_config=hf_config, + image_grid_thw=image_grid_thw, + video_grid_thw=video_grid_thw, + context_len=context_len, + seq_len=seq_len, + ) elif hf_config.model_type in ["ernie4_5_moe_vl", "ernie4_5_vl"]: return cls._ernie_get_input_positions_tensor( input_tokens=input_tokens, @@ -526,6 +556,98 @@ def _glm4v_get_input_positions_tensor( len(input_tokens)).item() return llm_positions, mrope_position_delta + @classmethod + def _qwen3vl_get_input_positions_tensor( + cls, + input_tokens: list[int], + hf_config: PretrainedConfig, + image_grid_thw: Union[list[list[int]], torch.Tensor], + video_grid_thw: Union[list[list[int]], torch.Tensor], + context_len: int = 0, + seq_len: Optional[int] = None, + ) -> tuple[torch.Tensor, int]: + """Get mrope input positions and delta value.""" + + video_grid_thw = [[1, h, w] for t, h, w in video_grid_thw + for _ in range(t)] + + image_token_id = hf_config.image_token_id + video_token_id = hf_config.video_token_id + vision_start_token_id = hf_config.vision_start_token_id + spatial_merge_size = hf_config.vision_config.spatial_merge_size + + input_tokens_tensor = torch.tensor(input_tokens) + vision_start_indices = torch.argwhere( + input_tokens_tensor == vision_start_token_id).squeeze(1) + vision_tokens = input_tokens_tensor[vision_start_indices + 1] + image_nums = (vision_tokens == image_token_id).sum() + video_nums = (vision_tokens == video_token_id).sum() + llm_pos_ids_list: list = [] + + st = 0 + remain_images, remain_videos = image_nums, video_nums + + image_index, video_index = 0, 0 + for _ in range(image_nums + video_nums): + if image_token_id in input_tokens and remain_images > 0: + ed_image = input_tokens.index(image_token_id, st) + else: + ed_image = len(input_tokens) + 1 + if video_token_id in input_tokens and remain_videos > 0: + ed_video = input_tokens.index(video_token_id, st) + else: + ed_video = len(input_tokens) + 1 + if ed_image < ed_video: + t, h, w = ( + image_grid_thw[image_index][0], + image_grid_thw[image_index][1], + image_grid_thw[image_index][2], + ) + image_index += 1 + remain_images -= 1 + ed = ed_image + else: + t, h, w = ( + video_grid_thw[video_index][0], + video_grid_thw[video_index][1], + video_grid_thw[video_index][2], + ) + video_index += 1 + remain_videos -= 1 + ed = ed_video + + llm_grid_t, llm_grid_h, llm_grid_w = \ + t, h // spatial_merge_size, w // spatial_merge_size + text_len = ed - st + + st_idx = llm_pos_ids_list[-1].max() + 1 if len( + llm_pos_ids_list) > 0 else 0 + llm_pos_ids_list.append( + torch.arange(text_len).view(1, -1).expand(3, -1) + st_idx) + + t_index = torch.arange(llm_grid_t).view(-1, 1).expand( + -1, llm_grid_h * llm_grid_w).flatten() + h_index = torch.arange(llm_grid_h).view(1, -1, 1).expand( + llm_grid_t, -1, llm_grid_w).flatten() + w_index = torch.arange(llm_grid_w).view(1, 1, -1).expand( + llm_grid_t, llm_grid_h, -1).flatten() + llm_pos_ids_list.append( + torch.stack([t_index, h_index, w_index]) + text_len + st_idx) + st = ed + llm_grid_t * llm_grid_h * llm_grid_w + + if st < len(input_tokens): + st_idx = llm_pos_ids_list[-1].max() + 1 if len( + llm_pos_ids_list) > 0 else 0 + text_len = len(input_tokens) - st + llm_pos_ids_list.append( + torch.arange(text_len).view(1, -1).expand(3, -1) + st_idx) + + llm_positions = torch.cat(llm_pos_ids_list, dim=1).reshape(3, -1) + mrope_position_delta = (llm_positions.max() + 1 - + len(input_tokens)).item() + llm_positions = llm_positions[:, context_len:seq_len] + return llm_positions, mrope_position_delta + @classmethod def _ernie_get_input_positions_tensor( cls, diff --git a/vllm/model_executor/models/qwen2.py b/vllm/model_executor/models/qwen2.py index 54dc0bebd9c5..e13e87b93429 100644 --- a/vllm/model_executor/models/qwen2.py +++ b/vllm/model_executor/models/qwen2.py @@ -285,7 +285,7 @@ def __init__(self, decoder_layer_type: type[nn.Module] = Qwen2DecoderLayer): super().__init__() - config = vllm_config.model_config.hf_config + config = vllm_config.model_config.hf_config.get_text_config() cache_config = vllm_config.cache_config quant_config = vllm_config.quant_config diff --git a/vllm/model_executor/models/qwen2_vl.py b/vllm/model_executor/models/qwen2_vl.py index d08181c5fd53..b6576b783b64 100644 --- a/vllm/model_executor/models/qwen2_vl.py +++ b/vllm/model_executor/models/qwen2_vl.py @@ -83,7 +83,7 @@ logger = init_logger(__name__) # For profile run -_MAX_FRAMES_PER_VIDEO = 16 +_MAX_FRAMES_PER_VIDEO = 600 # === Vision Inputs === # diff --git a/vllm/model_executor/models/qwen3_moe.py b/vllm/model_executor/models/qwen3_moe.py index 85429b3a01f9..0a504d90cde1 100644 --- a/vllm/model_executor/models/qwen3_moe.py +++ b/vllm/model_executor/models/qwen3_moe.py @@ -378,7 +378,7 @@ class Qwen3MoeModel(nn.Module): def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): super().__init__() - config = vllm_config.model_config.hf_config + config = vllm_config.model_config.hf_config.get_text_config() cache_config = vllm_config.cache_config quant_config = vllm_config.quant_config parallel_config = vllm_config.parallel_config diff --git a/vllm/model_executor/models/qwen3_vl.py b/vllm/model_executor/models/qwen3_vl.py new file mode 100644 index 000000000000..22948aee4936 --- /dev/null +++ b/vllm/model_executor/models/qwen3_vl.py @@ -0,0 +1,1478 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project + +# Copyright 2025 The vLLM team. +# Copyright 2025 The Qwen Team. +# Copyright 2025 The HuggingFace Inc. team. +# All rights reserved. +# +# This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX +# and OPT implementations in this library. It has been modified from its +# original forms to accommodate minor architectural differences compared +# to GPT-NeoX and OPT used by the Meta AI team that trained the model. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Inference-only Qwen3VL model compatible with HuggingFace weights.""" +from collections.abc import Iterable, Mapping, Sequence +from functools import partial +from typing import Any, Callable, Optional, Union + +import numpy as np +import torch +import torch.nn as nn +import torch.nn.functional as F +from transformers import BatchFeature +from transformers.models.qwen2_vl import Qwen2VLImageProcessorFast +from transformers.models.qwen2_vl.image_processing_qwen2_vl import smart_resize +from transformers.models.qwen3_vl import (Qwen3VLProcessor, + Qwen3VLVideoProcessor) +from transformers.models.qwen3_vl.configuration_qwen3_vl import ( + Qwen3VLConfig, Qwen3VLVisionConfig) +from transformers.video_utils import VideoMetadata + +from vllm.attention.layer import check_upstream_fa_availability +from vllm.compilation.decorators import support_torch_compile +from vllm.config import VllmConfig +from vllm.distributed import get_pp_group +from vllm.logger import init_logger +from vllm.model_executor import SamplingMetadata +from vllm.model_executor.layers.activation import _ACTIVATION_REGISTRY +from vllm.model_executor.layers.linear import (ColumnParallelLinear, + RowParallelLinear) +from vllm.model_executor.layers.logits_processor import LogitsProcessor +from vllm.model_executor.layers.quantization import QuantizationConfig +from vllm.model_executor.layers.quantization.gptq import GPTQConfig +from vllm.model_executor.layers.quantization.gptq_marlin import ( + GPTQMarlinConfig) +from vllm.model_executor.layers.vocab_parallel_embedding import ParallelLMHead +from vllm.model_executor.model_loader.weight_utils import default_weight_loader +from vllm.model_executor.models.module_mapping import MultiModelKeys +from vllm.multimodal import MULTIMODAL_REGISTRY +from vllm.multimodal.inputs import (MultiModalDataDict, MultiModalFieldConfig, + MultiModalKwargsItem, + MultiModalKwargsItems, VideoItem) +from vllm.multimodal.parse import (ImageSize, MultiModalDataItems, + MultiModalDataParser) +from vllm.multimodal.processing import (BaseMultiModalProcessor, + PromptReplacement, PromptUpdate, + PromptUpdateDetails) +from vllm.multimodal.profiling import BaseDummyInputsBuilder +from vllm.platforms import _Backend +from vllm.sequence import IntermediateTensors +from vllm.transformers_utils.config import uses_mrope +from vllm.utils import is_list_of + +from .interfaces import (MultiModalEmbeddings, SupportsLoRA, + SupportsMultiModal, SupportsPP) +from .qwen2_5_vl import (Qwen2_5_VisionAttention, + Qwen2_5_VisionRotaryEmbedding, + Qwen2_5_VLImageEmbeddingInputs, Qwen2_5_VLImageInputs, + Qwen2_5_VLImagePixelInputs, + Qwen2_5_VLVideoEmbeddingInputs, Qwen2_5_VLVideoInputs, + Qwen2_5_VLVideoPixelInputs) +from .qwen2_vl import Qwen2VLProcessingInfo +from .qwen3 import Qwen3ForCausalLM, Qwen3Model +from .utils import (AutoWeightsLoader, PPMissingLayer, WeightsMapper, + maybe_prefix, merge_multimodal_embeddings) +from .vision import get_vit_attn_backend + +logger = init_logger(__name__) + + +class Qwen3_VisionPatchEmbed(nn.Module): + + def __init__( + self, + patch_size: int = 14, + temporal_patch_size: int = 2, + in_channels: int = 3, + hidden_size: int = 1152, + ) -> None: + super().__init__() + self.patch_size = patch_size + self.temporal_patch_size = temporal_patch_size + self.hidden_size = hidden_size + + kernel_size = (temporal_patch_size, patch_size, patch_size) + self.proj = nn.Conv3d(in_channels, + hidden_size, + kernel_size=kernel_size, + stride=kernel_size, + bias=True) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + L, C = x.shape + x = x.view(L, -1, self.temporal_patch_size, self.patch_size, + self.patch_size) + x = self.proj(x).view(L, self.hidden_size) + return x + + +class Qwen3_VisionMLP(nn.Module): + + def __init__(self, + in_features: int, + hidden_features: int, + bias: bool = False, + act_fn: Callable[[torch.Tensor], torch.Tensor] = F.silu, + quant_config: Optional[QuantizationConfig] = None, + prefix: str = ""): + super().__init__() + self.linear_fc1 = ColumnParallelLinear(in_features, + hidden_features, + bias=bias, + quant_config=quant_config, + return_bias=False, + prefix=f"{prefix}.linear_fc1") + self.linear_fc2 = RowParallelLinear(hidden_features, + in_features, + bias=bias, + quant_config=quant_config, + return_bias=False, + prefix=f"{prefix}.linear_fc2") + self.act_fn = act_fn + + def forward(self, x: torch.Tensor): + mlp_output = self.linear_fc2(self.act_fn(self.linear_fc1(x))) + return mlp_output + + +class Qwen3_VisionBlock(nn.Module): + + def __init__( + self, + dim: int, + num_heads: int, + mlp_hidden_dim: int, + act_fn: Callable[[torch.Tensor], torch.Tensor] = F.silu, + norm_layer: Optional[Callable[[int], nn.Module]] = None, + quant_config: Optional[QuantizationConfig] = None, + prefix: str = "", + ) -> None: + super().__init__() + if norm_layer is None: + norm_layer = partial(nn.LayerNorm, eps=1e-6) + self.norm1 = norm_layer(dim) + self.norm2 = norm_layer(dim) + self.attn = Qwen2_5_VisionAttention(embed_dim=dim, + num_heads=num_heads, + projection_size=dim, + quant_config=quant_config, + prefix=f"{prefix}.attn") + self.mlp = Qwen3_VisionMLP(dim, + mlp_hidden_dim, + act_fn=act_fn, + bias=True, + quant_config=quant_config, + prefix=f"{prefix}.mlp") + + def forward( + self, + x: torch.Tensor, + cu_seqlens: torch.Tensor, + rotary_pos_emb: torch.Tensor, + max_seqlen: Optional[int] = None, # Only used for Flash Attention + seqlens: Optional[list[int]] = None, # Only used for xFormers + ) -> torch.Tensor: + x = x + self.attn(self.norm1(x), + cu_seqlens=cu_seqlens, + rotary_pos_emb=rotary_pos_emb, + max_seqlen=max_seqlen, + seqlens=seqlens) + + x = x + self.mlp(self.norm2(x)) + return x + + +class Qwen3_VisionPatchMerger(nn.Module): + + def __init__( + self, + d_model: int, + context_dim: int, + norm_layer: Optional[Callable[[int], nn.Module]] = None, + spatial_merge_size: int = 2, + use_postshuffle_norm: bool = False, + quant_config: Optional[QuantizationConfig] = None, + prefix: str = "", + ) -> None: + super().__init__() + self.hidden_size = context_dim * (spatial_merge_size**2) + + self.use_postshuffle_norm = use_postshuffle_norm + if self.use_postshuffle_norm: + context_dim = self.hidden_size + + if norm_layer is None: + norm_layer = partial(nn.LayerNorm, eps=1e-6) + self.use_postshuffle_norm = use_postshuffle_norm + self.norm = norm_layer( + self.hidden_size if use_postshuffle_norm else context_dim) + self.linear_fc1 = ColumnParallelLinear(self.hidden_size, + self.hidden_size, + bias=True, + quant_config=quant_config, + prefix=f"{prefix}.linear_fc1") + self.act_fn = nn.GELU() + self.linear_fc2 = RowParallelLinear(self.hidden_size, + d_model, + bias=True, + quant_config=quant_config, + prefix=f"{prefix}.linear_fc2") + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if self.use_postshuffle_norm: + x = self.norm(x.view(-1, self.hidden_size)) + else: + x = self.norm(x).view(-1, self.hidden_size) + + x_parallel, _ = self.linear_fc1(x) + x_parallel = self.act_fn(x_parallel) + out, _ = self.linear_fc2(x_parallel) + return out + + +class Qwen3_VisionTransformer(nn.Module): + + def __init__( + self, + vision_config: Qwen3VLVisionConfig, + norm_eps: float = 1e-6, + quant_config: Optional[QuantizationConfig] = None, + prefix: str = "", + ) -> None: + super().__init__() + self.hidden_size = vision_config.hidden_size + self.num_heads = vision_config.num_heads + self.num_position_embeddings = vision_config.num_position_embeddings + self.patch_size = vision_config.patch_size + self.spatial_merge_size = vision_config.spatial_merge_size + self.spatial_merge_unit = self.spatial_merge_size**2 + self.temporal_patch_size = vision_config.temporal_patch_size + self.deepstack_visual_indexes = vision_config.deepstack_visual_indexes + + self.patch_embed = Qwen3_VisionPatchEmbed( + patch_size=self.patch_size, + temporal_patch_size=self.temporal_patch_size, + in_channels=vision_config.in_channels, + hidden_size=self.hidden_size, + ) + + self.pos_embed = nn.Embedding(self.num_position_embeddings, + self.hidden_size) + + norm_layer = partial(nn.LayerNorm, eps=norm_eps) + head_dim = self.hidden_size // self.num_heads + self.rotary_pos_emb = Qwen2_5_VisionRotaryEmbedding(head_dim // 2) + + self.blocks = nn.ModuleList([ + Qwen3_VisionBlock( + dim=self.hidden_size, + num_heads=self.num_heads, + mlp_hidden_dim=vision_config.intermediate_size, + act_fn=_ACTIVATION_REGISTRY[vision_config.hidden_act], + norm_layer=norm_layer, + quant_config=quant_config, + prefix=f"{prefix}.blocks.{layer_idx}") + for layer_idx in range(vision_config.depth) + ]) + + self.merger = Qwen3_VisionPatchMerger( + d_model=vision_config.out_hidden_size, + context_dim=self.hidden_size, + norm_layer=norm_layer, + spatial_merge_size=self.spatial_merge_size, + quant_config=quant_config, + prefix=f"{prefix}.merger", + ) + + self.deepstack_merger_list = nn.ModuleList([ + Qwen3_VisionPatchMerger( + d_model=vision_config.out_hidden_size, + context_dim=self.hidden_size, + spatial_merge_size=self.spatial_merge_size, + use_postshuffle_norm=True, + norm_layer=norm_layer, + quant_config=quant_config, + prefix=f"{prefix}.deepstack_merger_list.{layer_idx}") + for layer_idx in range(len(self.deepstack_visual_indexes)) + ]) + + self.attn_backend = get_vit_attn_backend( + head_size=head_dim, dtype=torch.get_default_dtype()) + if self.attn_backend != _Backend.FLASH_ATTN and \ + check_upstream_fa_availability( + torch.get_default_dtype()): + self.attn_backend = _Backend.FLASH_ATTN + + @property + def dtype(self) -> torch.dtype: + return self.patch_embed.proj.weight.dtype + + @property + def device(self) -> torch.device: + return self.patch_embed.proj.weight.device + + def rot_pos_emb(self, grid_thw): + pos_ids = [] + for t, h, w in grid_thw: + hpos_ids = torch.arange(h).unsqueeze(1).expand(-1, w) + hpos_ids = hpos_ids.reshape( + h // self.spatial_merge_size, + self.spatial_merge_size, + w // self.spatial_merge_size, + self.spatial_merge_size, + ) + hpos_ids = hpos_ids.permute(0, 2, 1, 3) + hpos_ids = hpos_ids.flatten() + + wpos_ids = torch.arange(w).unsqueeze(0).expand(h, -1) + wpos_ids = wpos_ids.reshape( + h // self.spatial_merge_size, + self.spatial_merge_size, + w // self.spatial_merge_size, + self.spatial_merge_size, + ) + wpos_ids = wpos_ids.permute(0, 2, 1, 3) + wpos_ids = wpos_ids.flatten() + pos_ids.append( + torch.stack([hpos_ids, wpos_ids], dim=-1).repeat(t, 1)) + pos_ids = torch.cat(pos_ids, dim=0) + max_grid_size = grid_thw[:, 1:].max() + rotary_pos_emb_full = self.rotary_pos_emb(max_grid_size) + rotary_pos_emb = rotary_pos_emb_full[pos_ids].flatten(1) + return rotary_pos_emb + + def fast_pos_embed_interpolate(self, grid_thw): + num_grid_per_side = int(self.num_position_embeddings**0.5) + + idx_list = [[] for _ in range(4)] + weight_list = [[] for _ in range(4)] + + for t, h, w in grid_thw: + h_idxs = torch.linspace(0, + num_grid_per_side - 1, + h, + dtype=torch.float32) + w_idxs = torch.linspace(0, + num_grid_per_side - 1, + w, + dtype=torch.float32) + + h_idxs_floor = h_idxs.to(torch.long) + w_idxs_floor = w_idxs.to(torch.long) + h_idxs_ceil = torch.clamp(h_idxs.to(torch.long) + 1, + max=num_grid_per_side - 1) + w_idxs_ceil = torch.clamp(w_idxs.to(torch.long) + 1, + max=num_grid_per_side - 1) + + dh = h_idxs - h_idxs_floor + dw = w_idxs - w_idxs_floor + + idx_list[0].extend(((h_idxs_floor * num_grid_per_side)[None].T + + w_idxs_floor[None]).flatten().tolist() * t) + idx_list[1].extend(((h_idxs_floor * num_grid_per_side)[None].T + + w_idxs_ceil[None]).flatten().tolist() * t) + idx_list[2].extend(((h_idxs_ceil * num_grid_per_side)[None].T + + w_idxs_floor[None]).flatten().tolist() * t) + idx_list[3].extend(((h_idxs_ceil * num_grid_per_side)[None].T + + w_idxs_ceil[None]).flatten().tolist() * t) + + weight_list[0].extend( + ((1 - dh)[None].T * (1 - dw)[None]).flatten().tolist() * t) + weight_list[1].extend( + ((1 - dh)[None].T * dw[None]).flatten().tolist() * t) + weight_list[2].extend( + (dh[None].T * (1 - dw)[None]).flatten().tolist() * t) + weight_list[3].extend( + (dh[None].T * dw[None]).flatten().tolist() * t) + + device = self.pos_embed.weight.device + dtype = self.pos_embed.weight.dtype + + p0 = self.pos_embed( + torch.tensor( + idx_list[0], dtype=torch.long, device=device)) * torch.tensor( + weight_list[0], dtype=dtype, device=device)[:, None] + p1 = self.pos_embed( + torch.tensor( + idx_list[1], dtype=torch.long, device=device)) * torch.tensor( + weight_list[1], dtype=dtype, device=device)[:, None] + p2 = self.pos_embed( + torch.tensor( + idx_list[2], dtype=torch.long, device=device)) * torch.tensor( + weight_list[2], dtype=dtype, device=device)[:, None] + p3 = self.pos_embed( + torch.tensor( + idx_list[3], dtype=torch.long, device=device)) * torch.tensor( + weight_list[3], dtype=dtype, device=device)[:, None] + + patch_pos_embeds = p0 + p1 + p2 + p3 + patch_pos_embeds = patch_pos_embeds.split( + [t * h * w for t, h, w in grid_thw]) + patch_pos_embeds_permute = [] + m_size = self.spatial_merge_size + for pos_embed, (t, h, w) in zip(patch_pos_embeds, grid_thw): + pos_embed = pos_embed.view(t, h // m_size, m_size, w // m_size, + m_size, -1).permute(0, 1, 3, 2, 4, + 5).flatten(0, 4) + patch_pos_embeds_permute.append(pos_embed) + patch_pos_embeds = torch.cat(patch_pos_embeds_permute) + return patch_pos_embeds + + def compute_attn_mask_seqlen( + self, + cu_seqlens: torch.Tensor, + ) -> tuple[Optional[int], Optional[list[int]]]: + max_seqlen, seqlens = None, None + if self.attn_backend == _Backend.FLASH_ATTN: + max_seqlen = (cu_seqlens[1:] - cu_seqlens[:-1]).max().item() + elif self.attn_backend == _Backend.XFORMERS: + seqlens = (cu_seqlens[1:] - cu_seqlens[:-1]).tolist() + return max_seqlen, seqlens + + def forward( + self, + x: torch.Tensor, + grid_thw: list[list[int]], + ) -> torch.Tensor: + hidden_states = x.to(device=self.device, dtype=self.dtype) + hidden_states = self.patch_embed(hidden_states) + + pos_embeds = self.fast_pos_embed_interpolate(grid_thw) + hidden_states = hidden_states + pos_embeds + rotary_pos_emb = self.rot_pos_emb(grid_thw) + + cu_seqlens = torch.repeat_interleave( + grid_thw[:, 1] * grid_thw[:, 2], grid_thw[:, 0]).cumsum( + dim=0, + dtype=grid_thw.dtype + if torch.jit.is_tracing() else torch.int32, + ) + cu_seqlens = F.pad(cu_seqlens, (1, 0), value=0) + + hidden_states = hidden_states.unsqueeze(1) + rotary_pos_emb = rotary_pos_emb.to(hidden_states.device) + max_seqlen, seqlens = self.compute_attn_mask_seqlen(cu_seqlens) + + deepstack_feature_lists = [] + for layer_num, blk in enumerate(self.blocks): + hidden_states = blk(hidden_states, + cu_seqlens=cu_seqlens, + rotary_pos_emb=rotary_pos_emb, + max_seqlen=max_seqlen, + seqlens=seqlens) + if layer_num in self.deepstack_visual_indexes: + deepstack_merger_idx = self.deepstack_visual_indexes.index( + layer_num) + deepstack_feature = self.deepstack_merger_list[ + deepstack_merger_idx](hidden_states) + deepstack_feature_lists.append(deepstack_feature) + hidden_states = self.merger(hidden_states) + hidden_states = torch.cat( + [hidden_states] + deepstack_feature_lists, + dim=1) # [seq_len, hidden_size * (1 + depth_of_deepstack)] + return hidden_states + + def load_weights(self, weights: Iterable[tuple[str, + torch.Tensor]]) -> set[str]: + stacked_params_mapping = [ + # (param_name, shard_name, shard_id) + ("attn.qkv.", "attn.q.", "q"), + ("attn.qkv.", "attn.k.", "k"), + ("attn.qkv.", "attn.v.", "v"), + ] + params_dict = dict(self.named_parameters(remove_duplicate=False)) + loaded_params: set[str] = set() + + for name, loaded_weight in weights: + for (param_name, weight_name, shard_id) in stacked_params_mapping: + if weight_name not in name: + continue + name = name.replace(weight_name, param_name) + + param = params_dict[name] + weight_loader = param.weight_loader + weight_loader(param, loaded_weight, shard_id) + break + else: + param = params_dict[name] + weight_loader = getattr(param, "weight_loader", + default_weight_loader) + weight_loader(param, loaded_weight) + loaded_params.add(name) + return loaded_params + + +class Qwen3VLProcessingInfo(Qwen2VLProcessingInfo): + + def get_hf_config(self): + return self.ctx.get_hf_config(Qwen3VLConfig) + + def get_hf_processor(self, **kwargs: object) -> Qwen3VLProcessor: + return self.ctx.get_hf_processor( + Qwen3VLProcessor, + use_fast=kwargs.pop("use_fast", True), + **kwargs, + ) + + def get_tokenizer(self): + return self.ctx.tokenizer + + def get_image_processor(self, + **kwargs: object) -> Qwen2VLImageProcessorFast: + return self.get_hf_processor(**kwargs).image_processor + + def get_video_processor(self, **kwargs: object) -> Qwen3VLVideoProcessor: + return self.get_hf_processor(**kwargs).video_processor + + def _get_vision_info( + self, + *, + image_width: int, + image_height: int, + num_frames: int = 2, + do_resize: bool = True, + image_processor: Optional[Qwen2VLImageProcessorFast], + ) -> tuple[ImageSize, int]: + if image_processor is None: + image_processor = self.get_image_processor() + + hf_config = self.get_hf_config() + vision_config = hf_config.vision_config + patch_size = vision_config.patch_size + merge_size = vision_config.spatial_merge_size + temporal_patch_size = vision_config.temporal_patch_size + + if do_resize: + resized_height, resized_width = smart_resize( + height=image_height, + width=image_width, + factor=patch_size * merge_size, + min_pixels=image_processor.size["shortest_edge"], + max_pixels=image_processor.size["longest_edge"], + ) + preprocessed_size = ImageSize(width=resized_width, + height=resized_height) + else: + preprocessed_size = ImageSize(width=image_width, + height=image_height) + + padded_num_frames = num_frames + num_frames % temporal_patch_size + + grid_t = max(padded_num_frames // temporal_patch_size, 1) + grid_h = preprocessed_size.height // patch_size + grid_w = preprocessed_size.width // patch_size + + num_patches = grid_t * grid_h * grid_w + num_vision_tokens = num_patches // (merge_size**2) + + return preprocessed_size, num_vision_tokens + + def _calculate_timestamps(self, indices: list[int] | torch.Tensor, + video_fps: float, merge_size: int): + if not isinstance(indices, list): + indices = indices.tolist() + if len(indices) % merge_size != 0: + # don't update metadata's frames_indices directly + indices = indices + [indices[-1] + ] * (merge_size - len(indices) % merge_size) + timestamps = [idx / video_fps for idx in indices] + timestamps = [(timestamps[i] + timestamps[i + merge_size - 1]) / 2 + for i in range(0, len(timestamps), merge_size)] + return timestamps + + def _get_video_second_idx( + self, + metadata: dict[str, Any], + out_item: MultiModalKwargsItem, + do_sample_frames: Optional[bool] = None, + sampled_fps: Optional[float] = None) -> list[int]: + video_processor = self.get_video_processor() + merge_size = video_processor.merge_size + indices = metadata["frames_indices"] + + # metadata["fps"] refers to the true fps of the input video. + video_fps = metadata["fps"] + if do_sample_frames is None: + do_sample_frames = metadata.get("do_sample_frames", False) + + # If video frames are sampled in HF processor (instead of vLLM + # video loader), we need to re-calculate the indices from original + # metadata. + if do_sample_frames: + # here video_fps is the fps of the sampled video, and + # metadata["fps"] refers to the fps of the original video. + video_fps = sampled_fps if sampled_fps else video_processor.fps + total_num_frames = metadata["total_num_frames"] + num_frames = int(total_num_frames / metadata["fps"] * video_fps) + num_frames = min( + min(max(num_frames, video_processor.min_frames), + video_processor.max_frames), total_num_frames) + indices = np.linspace(0, total_num_frames - 1, + num_frames).round().astype(int).tolist() + timestamps = self._calculate_timestamps(indices, video_fps, merge_size) + return timestamps + + +class Qwen3VLDummyInputsBuilder(BaseDummyInputsBuilder[Qwen3VLProcessingInfo]): + + def get_dummy_text(self, mm_counts: Mapping[str, int]) -> str: + num_images = mm_counts.get("image", 0) + num_videos = mm_counts.get("video", 0) + + image_token = "<|vision_start|><|image_pad|><|vision_end|>" + video_token = "<|vision_start|><|video_pad|><|vision_end|>" + + return image_token * num_images + video_token * num_videos + + def get_dummy_mm_data( + self, + seq_len: int, + mm_counts: Mapping[str, int], + ) -> MultiModalDataDict: + num_images = mm_counts.get("image", 0) + num_videos = mm_counts.get("video", 0) + + target_width, target_height = ( + self.info.get_image_size_with_most_features()) + target_num_frames = self.info.get_num_frames_with_most_features( + seq_len, mm_counts) + return { + "image": + self._get_dummy_images(width=target_width, + height=target_height, + num_images=num_images), + "video": + self._get_dummy_videos( + width=target_width, + height=target_height, + num_frames=target_num_frames, + num_videos=num_videos, + ), + } + + def _get_dummy_videos( + self, + *, + width: int, + height: int, + num_frames: int, + num_videos: int, + ) -> list[VideoItem]: + num_frames = max(num_frames, 2) + video = np.full((num_frames, width, height, 3), 255, dtype=np.uint8) + video_items = [] + for i in range(num_videos): + video_metadata = { + "fps": 2.0, + "duration": num_frames / 2.0, + "total_num_frames": num_frames, + "frames_indices": [i for i in range(num_frames)], + "video_backend": "opencv", + "do_sample_frames": False, + } + video_item = (video.copy(), video_metadata) + video_items.append(video_item) + return video_items + + +class Qwen3VLMultiModalProcessor(BaseMultiModalProcessor[Qwen3VLProcessingInfo] + ): + + def _get_data_parser(self) -> MultiModalDataParser: + return MultiModalDataParser(video_needs_metadata=True) + + def _call_hf_processor( + self, + prompt: str, + mm_data: Mapping[str, object], + mm_kwargs: Mapping[str, object], + tok_kwargs: Mapping[str, object], + ) -> BatchFeature: + mm_data = dict(mm_data) + processor = self.info.get_hf_processor(**mm_kwargs) + + # Separate video processing from image processing. Because the videos + # are processed into serval image patches + if ("videos" in mm_data and isinstance(mm_data["videos"], list) + and len(mm_data["videos"]) > 0): + video_grid_thw_lst = [] + pixel_values_videos_lst = [] + + for item_idx, item in enumerate(mm_data.pop("videos", [])): + video_array, metadata = item + + # NOTE: @JJJYmmm new attr metadata.frames_indices indicates + # the sampled frames indices of pre-sampled videos, which is + # used to calculate the timestamps. Make sure that + # do_sample_frames in mm_kwargs is false for presampled videos. + + # NOTE: a copy of is created to update do_sample_frames, + # otherwise mm_hash for the object will be incorrect. + video_mm_kwargs = dict(**mm_kwargs) + if "do_sample_frames" not in video_mm_kwargs: + # qwen_vl_utils already has "do_sample_frames" in + # mm_kwargs, don't overwrite it. + video_mm_kwargs["do_sample_frames"] = metadata.get( + "do_sample_frames", False) + + metadata = VideoMetadata(**{ + k: metadata[k] + for k in metadata if k != "do_sample_frames" + }) + + video_mm_data = dict() + video_mm_data["videos"] = [[video_array]] + video_mm_data["video_metadata"] = [[metadata]] + + video_outputs = super()._call_hf_processor( + prompt="<|vision_start|><|video_pad|><|vision_end|>", + mm_data=video_mm_data, + mm_kwargs=video_mm_kwargs, + tok_kwargs=tok_kwargs, + ) + input_ids = video_outputs.pop("input_ids") + video_placeholder = processor.tokenizer.batch_decode( + input_ids)[0] + prompt = prompt.replace( + "<|vision_start|><|video_pad|><|vision_end|>", + video_placeholder, + 1, + ) + + video_grid_thw_lst.append(video_outputs["video_grid_thw"]) + pixel_values_videos_lst.append( + video_outputs["pixel_values_videos"]) + video_outputs = dict( + pixel_values_videos=torch.cat(pixel_values_videos_lst), + video_grid_thw=torch.cat(video_grid_thw_lst), + ) + else: + video_outputs = dict() + + processed_outputs = super()._call_hf_processor( + prompt=prompt, + mm_data=mm_data, + mm_kwargs=mm_kwargs, + tok_kwargs=tok_kwargs, + ) + combined_outputs = dict( + processed_outputs, + **video_outputs, + ) + return BatchFeature(combined_outputs) + + def _get_mm_fields_config( + self, + hf_inputs: BatchFeature, + hf_processor_mm_kwargs: Mapping[str, object], + ) -> Mapping[str, MultiModalFieldConfig]: + image_grid_thw = hf_inputs.get("image_grid_thw", torch.empty((0, 3))) + image_grid_sizes = image_grid_thw.prod(-1) + + video_grid_thw = hf_inputs.get("video_grid_thw", torch.empty((0, 3))) + video_grid_sizes = video_grid_thw.prod(-1) + + return dict( + pixel_values=MultiModalFieldConfig.flat_from_sizes( + "image", image_grid_sizes), + image_embeds=MultiModalFieldConfig.flat_from_sizes( + "image", image_grid_sizes), + image_grid_thw=MultiModalFieldConfig.batched("image"), + pixel_values_videos=MultiModalFieldConfig.flat_from_sizes( + "video", video_grid_sizes), + video_embeds=MultiModalFieldConfig.flat_from_sizes( + "video", video_grid_sizes), + video_grid_thw=MultiModalFieldConfig.batched("video"), + ) + + def _get_prompt_updates( + self, + mm_items: MultiModalDataItems, + hf_processor_mm_kwargs: Mapping[str, Any], + out_mm_kwargs: MultiModalKwargsItems, + ) -> Sequence[PromptUpdate]: + hf_processor = self.info.get_hf_processor(**hf_processor_mm_kwargs) + image_processor = self.info.get_image_processor( + **hf_processor_mm_kwargs) + tokenizer = self.info.get_tokenizer() + hf_config = self.info.get_hf_config() + + video_token_id = hf_config.video_token_id + vision_start_token_id = hf_config.vision_start_token_id + vision_end_token_id = hf_config.vision_end_token_id + + merge_length = image_processor.merge_size**2 + + def get_image_replacement_qwen3vl(item_idx: int): + out_item = out_mm_kwargs["image"][item_idx] + grid_thw = out_item["image_grid_thw"].data + assert isinstance(grid_thw, torch.Tensor) + + num_tokens = int(grid_thw.prod()) // merge_length + return [hf_processor.image_token_id] * num_tokens + + def get_video_replacement_qwen3vl(item_idx: int): + out_item = out_mm_kwargs["video"][item_idx] + grid_thw = out_item["video_grid_thw"].data + assert isinstance(grid_thw, torch.Tensor) + + video, metadata = mm_items["video"][item_idx] + do_sample_frames = hf_processor_mm_kwargs.get("do_sample_frames") + sampled_fps = hf_processor_mm_kwargs.get("fps") + if is_list_of(sampled_fps, float): + sampled_fps = sampled_fps[item_idx] + timestamps = self.info._get_video_second_idx( + metadata, out_item, do_sample_frames, sampled_fps) + + assert len(timestamps) == grid_thw[0], ( + f"The timestamps length({len(timestamps)}) should be equal " + f"video length ({grid_thw[0]}).") + + frames_idx_token = [ + tokenizer.encode(f"<{curr_time:.1f} seconds>", + add_special_tokens=False) + for curr_time in timestamps + ] + num_tokens_per_frame = int(grid_thw[1:].prod()) // merge_length + placeholder = [] + for frame_idx in frames_idx_token: + placeholder.extend(frame_idx) + placeholder.extend([vision_start_token_id] + + [video_token_id] * num_tokens_per_frame + + [vision_end_token_id]) + return PromptUpdateDetails.select_token_id(placeholder, + video_token_id) + + return [ + PromptReplacement( + modality="image", + target=hf_processor.image_token, + replacement=get_image_replacement_qwen3vl, + ), + + # NOTE: We match string on purpose since searching sequence of + # token ids takes more time. + PromptReplacement( + modality="video", + target="<|vision_start|><|video_pad|><|vision_end|>", + replacement=get_video_replacement_qwen3vl, + ), + ] + + +@support_torch_compile( + dynamic_arg_dims={ + "input_ids": 0, + # positions is of shape (3, seq_len) if mrope is enabled for qwen2-vl, + # otherwise (seq_len, ). + "positions": -1, + "intermediate_tensors": 0, + "inputs_embeds": 0, + # the same shape as input_embeds + "deepstack_input_embeds": 0 + }) +class Qwen3LLMModel(Qwen3Model): + + def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): + super().__init__(vllm_config=vllm_config, prefix=prefix) + if not get_pp_group().is_first_rank: + assert self.start_layer >= len( + vllm_config.model_config.hf_config.vision_config. + deepstack_visual_indexes), ( + "start_layer should be greater than or equal to " + "len(deepstack_visual_indexes)") + + def forward( + self, + input_ids: torch.Tensor, + positions: torch.Tensor, + intermediate_tensors: Optional[IntermediateTensors] = None, + inputs_embeds: Optional[torch.Tensor] = None, + # args for deepstack + deepstack_input_embeds: Optional[IntermediateTensors] = None, + ) -> Union[torch.Tensor, IntermediateTensors]: + if get_pp_group().is_first_rank: + if inputs_embeds is not None: + hidden_states = inputs_embeds + else: + hidden_states = self.get_input_embeddings(input_ids) + residual = None + else: + assert intermediate_tensors is not None + hidden_states = intermediate_tensors["hidden_states"] + residual = intermediate_tensors["residual"] + for layer_idx, layer in enumerate( + self.layers[self.start_layer:self.end_layer]): + layer_idx = layer_idx + self.start_layer + + hidden_states, residual = layer( + positions, + hidden_states, + residual, + ) + + if deepstack_input_embeds is not None and \ + layer_idx in range(0, len(deepstack_input_embeds)): + hidden_states = hidden_states + deepstack_input_embeds[ + f"deepstack_input_embeds_{layer_idx}"] + + if not get_pp_group().is_last_rank: + return IntermediateTensors({ + "hidden_states": hidden_states, + "residual": residual + }) + hidden_states, _ = self.norm(hidden_states, residual) + return hidden_states + + +class Qwen3LLMForCausalLM(Qwen3ForCausalLM): + + def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): + super(Qwen3ForCausalLM, self).__init__() + config = vllm_config.model_config.hf_config.text_config + quant_config = vllm_config.quant_config + lora_config = vllm_config.lora_config + + self.config = config + self.lora_config = lora_config + + self.quant_config = quant_config + self.model = Qwen3LLMModel(vllm_config=vllm_config, prefix=prefix) + + if get_pp_group().is_last_rank: + if config.tie_word_embeddings: + self.lm_head = self.model.embed_tokens + else: + self.lm_head = ParallelLMHead(config.vocab_size, + config.hidden_size, + quant_config=quant_config, + prefix="lm_head") + else: + self.lm_head = PPMissingLayer() + + self.logits_processor = LogitsProcessor(config.vocab_size) + + self.make_empty_intermediate_tensors = ( + self.model.make_empty_intermediate_tensors) + + +@MULTIMODAL_REGISTRY.register_processor(Qwen3VLMultiModalProcessor, + info=Qwen3VLProcessingInfo, + dummy_inputs=Qwen3VLDummyInputsBuilder) +class Qwen3VLForConditionalGeneration(nn.Module, SupportsMultiModal, + SupportsLoRA, SupportsPP): + packed_modules_mapping = { + "qkv_proj": [ + "q_proj", + "k_proj", + "v_proj", + ], + "gate_up_proj": [ + "gate_proj", + "up_proj", + ], + } + # To ensure correct weight loading and mapping. + hf_to_vllm_mapper = WeightsMapper( + orig_to_new_prefix={ + "model.visual.": "visual.", + "lm_head.": "language_model.lm_head.", + "model.language_model.": "language_model.model.", + }) + + @classmethod + def get_placeholder_str(cls, modality: str, i: int) -> Optional[str]: + if modality.startswith("image"): + return "<|vision_start|><|image_pad|><|vision_end|>" + if modality.startswith("video"): + return "<|vision_start|><|video_pad|><|vision_end|>" + + raise ValueError("Only image or video modality is supported") + + def __init__(self, *, vllm_config: VllmConfig, prefix: str = "model"): + super().__init__() + config: Qwen3VLConfig = vllm_config.model_config.hf_config + quant_config = vllm_config.quant_config + multimodal_config = vllm_config.model_config.multimodal_config + + self.config = config + self.multimodal_config = multimodal_config + + self.visual = Qwen3_VisionTransformer( + config.vision_config, + norm_eps=getattr(config, "rms_norm_eps", 1e-6), + quant_config=self._maybe_ignore_quant_config(quant_config), + prefix=maybe_prefix(prefix, "visual"), + ) + + self.language_model = Qwen3LLMForCausalLM(vllm_config=vllm_config, + prefix=maybe_prefix( + prefix, + "language_model")) + + self.make_empty_intermediate_tensors = ( + self.language_model.make_empty_intermediate_tensors) + + self.use_deepstack = hasattr(config.vision_config, + 'deepstack_visual_indexes') + self.deepstack_num_level = len( + config.vision_config.deepstack_visual_indexes + ) if self.use_deepstack else 0 + # register buffer for deepstack + self.deepstack_input_embeds = [ + torch.zeros(vllm_config.scheduler_config.max_num_batched_tokens, + config.text_config.hidden_size) + for _ in range(self.deepstack_num_level) + ] if self.use_deepstack else None + + def _get_deepstack_input_embeds(self, + num_tokens: int) -> IntermediateTensors: + # get deepstack_input_embeds from buffer, and clear the buffer + return IntermediateTensors({ + f"deepstack_input_embeds_{idx}": + self.deepstack_input_embeds[idx][:num_tokens] + for idx in range(self.deepstack_num_level) + }) + + def _set_deepstack_input_embeds( + self, deepstack_input_embeds: torch.Tensor) -> None: + # set deepstack_input_embeds to buffer + num_tokens = deepstack_input_embeds.size(1) + if num_tokens > self.deepstack_input_embeds[0].size(0): + self.deepstack_input_embeds = [ + torch.zeros(num_tokens, + self.config.text_config.hidden_size, + device=self.deepstack_input_embeds[0].device, + dtype=self.deepstack_input_embeds[0].dtype) + for _ in range(self.deepstack_num_level) + ] + for idx in range(self.deepstack_num_level): + self.deepstack_input_embeds[idx][:num_tokens].copy_( + deepstack_input_embeds[idx]) + + def _clear_deepstack_input_embeds(self, num_tokens: int) -> None: + # clear deepstack_input_embeds in buffer + if num_tokens > 0: + for idx in range(self.deepstack_num_level): + self.deepstack_input_embeds[idx][:num_tokens].zero_() + + def _maybe_ignore_quant_config(self, quant_config: QuantizationConfig): + # GPTQ configs do not have a list of ignored modules, however AutoGPTQ + # seems to avoid vision encoder sections for some models. + if isinstance(quant_config, (GPTQConfig, GPTQMarlinConfig)): + return None + return quant_config + + def _validate_and_reshape_mm_tensor(self, mm_input: object, + name: str) -> torch.Tensor: + if not isinstance(mm_input, (torch.Tensor, list)): + raise ValueError(f"Incorrect type of {name}. " + f"Got type: {type(mm_input)}") + if isinstance(mm_input, torch.Tensor): + if mm_input.ndim == 2: + return mm_input + if mm_input.ndim != 3: + raise ValueError(f"{name} should be 2D or batched 3D tensor. " + f"Got ndim: {mm_input.ndim} " + f"(shape={mm_input.shape})") + return torch.concat(list(mm_input)) + else: + return torch.concat(mm_input) + + def _parse_and_validate_image_input( + self, **kwargs: object) -> Optional[Qwen2_5_VLImageInputs]: + pixel_values = kwargs.pop("pixel_values", None) + image_embeds = kwargs.pop("image_embeds", None) + image_grid_thw = kwargs.pop("image_grid_thw", None) + + if pixel_values is None and image_embeds is None: + return None + + if pixel_values is not None: + pixel_values = self._validate_and_reshape_mm_tensor( + pixel_values, "image pixel values") + image_grid_thw = self._validate_and_reshape_mm_tensor( + image_grid_thw, "image grid_thw") + + if not isinstance(pixel_values, (torch.Tensor, list)): + raise ValueError("Incorrect type of image pixel values. " + f"Got type: {type(pixel_values)}") + + return Qwen2_5_VLImagePixelInputs(type="pixel_values", + pixel_values=pixel_values, + image_grid_thw=image_grid_thw) + + if image_embeds is not None: + image_embeds = self._validate_and_reshape_mm_tensor( + image_embeds, "image embeds") + image_grid_thw = self._validate_and_reshape_mm_tensor( + image_grid_thw, "image grid_thw") + + if not isinstance(image_embeds, torch.Tensor): + raise ValueError("Incorrect type of image embeddings. " + f"Got type: {type(image_embeds)}") + return Qwen2_5_VLImageEmbeddingInputs( + type="image_embeds", + image_embeds=image_embeds, + image_grid_thw=image_grid_thw) + + def _parse_and_validate_video_input( + self, **kwargs: object) -> Optional[Qwen2_5_VLVideoInputs]: + pixel_values_videos = kwargs.pop("pixel_values_videos", None) + video_embeds = kwargs.pop("video_embeds", None) + video_grid_thw = kwargs.pop("video_grid_thw", None) + second_per_grid_ts = kwargs.pop("second_per_grid_ts", None) + + if pixel_values_videos is None and video_embeds is None: + return None + + if pixel_values_videos is not None: + pixel_values_videos = self._validate_and_reshape_mm_tensor( + pixel_values_videos, "video pixel values") + video_grid_thw = self._validate_and_reshape_mm_tensor( + video_grid_thw, "video grid_thw") + + return Qwen2_5_VLVideoPixelInputs( + type="pixel_values_videos", + pixel_values_videos=pixel_values_videos, + video_grid_thw=video_grid_thw, + second_per_grid_ts=second_per_grid_ts, + ) + + if video_embeds is not None: + video_embeds = self._validate_and_reshape_mm_tensor( + video_embeds, "video embeds") + video_grid_thw = self._validate_and_reshape_mm_tensor( + video_grid_thw, "video grid_thw") + + if not isinstance(video_embeds, torch.Tensor): + raise ValueError("Incorrect type of video embeddings. " + f"Got type: {type(video_embeds)}") + return Qwen2_5_VLVideoEmbeddingInputs( + type="video_embeds", + video_embeds=video_embeds, + video_grid_thw=video_grid_thw) + + def _process_image_input( + self, + image_input: Qwen2_5_VLImageInputs) -> tuple[torch.Tensor, ...]: + + grid_thw = image_input["image_grid_thw"] + assert grid_thw.ndim == 2 + grid_thw_list = grid_thw.tolist() + + if image_input["type"] == "image_embeds": + image_embeds = image_input["image_embeds"].type(self.visual.dtype) + else: + pixel_values = image_input["pixel_values"].type(self.visual.dtype) + image_embeds = self.visual(pixel_values, grid_thw=grid_thw) + + # Split concatenated embeddings for each image item. + # Using prod on grid_thw_list instead of grid_thw.prod avoids CUDA sync + merge_size = self.visual.spatial_merge_size + sizes = (torch.tensor(grid_thw_list, dtype=torch.long).prod(-1) // + (merge_size * merge_size)).tolist() + return image_embeds.split(sizes) + + def _process_video_input( + self, + video_input: Qwen2_5_VLVideoInputs) -> tuple[torch.Tensor, ...]: + + grid_thw = video_input["video_grid_thw"] + assert grid_thw.ndim == 2 + grid_thw_list = grid_thw.tolist() + + if video_input["type"] == "video_embeds": + video_embeds = video_input["video_embeds"].type(self.visual.dtype) + else: + pixel_values_videos = video_input["pixel_values_videos"].type( + self.visual.dtype) + video_embeds = self.visual(pixel_values_videos, grid_thw=grid_thw) + + # Split concatenated embeddings for each video item. + # Using prod on grid_thw_list instead of grid_thw.prod avoids CUDA sync + merge_size = self.visual.spatial_merge_size + sizes = (torch.tensor(grid_thw_list, dtype=torch.long).prod(-1) // + (merge_size * merge_size)).tolist() + return video_embeds.split(sizes) + + def _parse_and_validate_multimodal_inputs(self, **kwargs: object) -> dict: + mm_input_by_modality = {} + for input_key in kwargs: + if input_key in ("pixel_values", "image_embeds" + ) and "image" not in mm_input_by_modality: + mm_input_by_modality[ + "image"] = self._parse_and_validate_image_input(**kwargs) + if input_key in ("pixel_values_videos", "video_embeds" + ) and "video" not in mm_input_by_modality: + mm_input_by_modality[ + "video"] = self._parse_and_validate_video_input(**kwargs) + return mm_input_by_modality + + def get_language_model(self) -> torch.nn.Module: + return self.language_model + + def get_multimodal_embeddings( + self, **kwargs: object) -> Optional[MultiModalEmbeddings]: + + mm_input_by_modality = self._parse_and_validate_multimodal_inputs( + **kwargs) + if not mm_input_by_modality: + return None + + # The result multimodal_embeddings is tuple of tensors, with each + # tensor correspoending to a multimodal data item (image or video). + multimodal_embeddings: tuple[torch.Tensor, ...] = () + + # NOTE: It is important to iterate over the keys in this dictionary + # to preserve the order of the modalities. + for modality in mm_input_by_modality: + multimodal_input = mm_input_by_modality[modality] + if modality == "image": + vision_embeddings = self._process_image_input(multimodal_input) + multimodal_embeddings += vision_embeddings + if modality == "video": + video_embeddings = self._process_video_input(multimodal_input) + multimodal_embeddings += video_embeddings + return multimodal_embeddings + + def _compute_deepstack_embeds( + self, input_ids: torch.Tensor, inputs_embeds: torch.Tensor, + multimodal_embeddings: MultiModalEmbeddings) -> torch.Tensor: + visual_lens = [ + x.shape[0] if isinstance(x, torch.Tensor) else len(x) + for x in multimodal_embeddings + ] + multimodal_embeddings_cat = torch.cat(multimodal_embeddings, dim=0) + + visual_dim = multimodal_embeddings_cat.shape[-1] // ( + self.deepstack_num_level + 1) + + main_dim, multi_dim = visual_dim, visual_dim * self.deepstack_num_level + multimodal_embeddings_main, multimodal_embeddings_multiscale = torch.split( # noqa:E501 + multimodal_embeddings_cat, [main_dim, multi_dim], + dim=-1) + + multimodal_embeddings = torch.split(multimodal_embeddings_main, + visual_lens, + dim=0) + multimodal_embeddings_multiscale = torch.split( + multimodal_embeddings_multiscale, visual_lens, dim=0) + + deepstack_input_embeds = inputs_embeds.new_zeros( + inputs_embeds.size(0), + self.deepstack_num_level * inputs_embeds.size(1)) + + deepstack_input_embeds = merge_multimodal_embeddings( + input_ids, + deepstack_input_embeds, + multimodal_embeddings_multiscale, + placeholder_token_id=[ + self.config.image_token_id, self.config.video_token_id + ], + ) + deepstack_input_embeds = deepstack_input_embeds.view( + inputs_embeds.shape[0], self.deepstack_num_level, + visual_dim).contiguous() + deepstack_input_embeds = deepstack_input_embeds.permute( + 1, 0, 2).contiguous() + return deepstack_input_embeds, multimodal_embeddings + + def get_input_embeddings( + self, + input_ids: torch.Tensor, + multimodal_embeddings: Optional[MultiModalEmbeddings] = None, + ) -> torch.Tensor: + deepstack_input_embeds = None + inputs_embeds = self.language_model.get_input_embeddings(input_ids) + if multimodal_embeddings is not None and self.use_deepstack: + deepstack_input_embeds, multimodal_embeddings = self._compute_deepstack_embeds( # noqa:E501 + input_ids, inputs_embeds, multimodal_embeddings) + inputs_embeds = merge_multimodal_embeddings( + input_ids, inputs_embeds, multimodal_embeddings, + [self.config.image_token_id, self.config.video_token_id]) + + if self.use_deepstack: + if deepstack_input_embeds is None: + deepstack_input_embeds = torch.zeros_like( + inputs_embeds).unsqueeze(0).repeat( + self.deepstack_num_level, 1, 1).contiguous() + self._set_deepstack_input_embeds(deepstack_input_embeds) + + return inputs_embeds + + def get_input_embeddings_v0( + self, + input_ids: torch.Tensor, + image_input: Optional[Qwen2_5_VLImageInputs] = None, + video_input: Optional[Qwen2_5_VLVideoInputs] = None, + ) -> torch.Tensor: + inputs_embeds = self.get_input_embeddings(input_ids) + + if self.use_deepstack: + visual_dim = inputs_embeds.shape[-1] + deepstack_input_embeds = None + if image_input is not None or video_input is not None: + deepstack_input_embeds = torch.zeros_like( + inputs_embeds).unsqueeze(1).repeat( + 1, self.deepstack_num_level, 1).flatten(1) + + if image_input is not None: + image_embeds = self._process_image_input(image_input) + if self.use_deepstack: + image_embeds = torch.cat(image_embeds) + + image_embeds, image_embeds_multiscale = image_embeds.split( + [visual_dim, visual_dim * self.deepstack_num_level], + dim=-1) + + deepstack_input_embeds = merge_multimodal_embeddings( + input_ids, + deepstack_input_embeds, + image_embeds_multiscale, + placeholder_token_id=self.config.image_token_id, + ) + + inputs_embeds = merge_multimodal_embeddings( + input_ids, + inputs_embeds, + image_embeds, + placeholder_token_id=self.config.image_token_id, + ) + + if video_input is not None: + video_embeds = self._process_video_input(video_input) + if self.use_deepstack: + video_embeds = torch.cat(video_embeds) + + video_embeds, video_embeds_multiscale = video_embeds.split( + [visual_dim, visual_dim * self.deepstack_num_level], + dim=-1) + + deepstack_input_embeds = merge_multimodal_embeddings( + input_ids, + deepstack_input_embeds, + video_embeds_multiscale, + placeholder_token_id=self.config.video_token_id, + ) + + inputs_embeds = merge_multimodal_embeddings( + input_ids, + inputs_embeds, + video_embeds, + placeholder_token_id=self.config.video_token_id, + ) + + if self.use_deepstack and deepstack_input_embeds is not None: + deepstack_input_embeds = deepstack_input_embeds.view( + inputs_embeds.shape[0], self.deepstack_num_level, + visual_dim).permute(1, 0, 2).contiguous() + self._set_deepstack_input_embeds(deepstack_input_embeds) + return inputs_embeds + + def forward( + self, + input_ids: torch.Tensor, + positions: torch.Tensor, + intermediate_tensors: Optional[IntermediateTensors] = None, + inputs_embeds: Optional[torch.Tensor] = None, + **kwargs: object, + ) -> Union[torch.Tensor, IntermediateTensors]: + """Run forward pass for Qwen3VL. + + Args: + input_ids: Flattened (concatenated) input_ids corresponding to a + batch. + positions: Flattened (concatenated) position ids corresponding to a + batch. + **NOTE**: If mrope is enabled (default setting for Qwen3VL + opensource models), the shape will be `(3, seq_len)`, + otherwise it will be `(seq_len,). + pixel_values: Pixel values to be fed to a model. + `None` if no images are passed. + image_grid_thw: Tensor `(n_images, 3)` of image 3D grid in LLM. + `None` if no images are passed. + pixel_values_videos: Pixel values of videos to be fed to a model. + `None` if no videos are passed. + video_grid_thw: Tensor `(n_videos, 3)` of video 3D grid in LLM. + `None` if no videos are passed. + """ + + if intermediate_tensors is not None: + inputs_embeds = None + + # NOTE: In v1, inputs_embeds is always generated at model runner from + # `get_multimodal_embeddings` and `get_input_embeddings`, this + # condition is only for v0 compatibility. + elif inputs_embeds is None: + image_input = self._parse_and_validate_image_input(**kwargs) + video_input = self._parse_and_validate_video_input(**kwargs) + + if image_input is None and video_input is None: + inputs_embeds = None + else: + if uses_mrope(self.config): + assert positions.ndim == 2 and positions.size(0) == 3, ( + "multimodal section rotary embedding requires " + f"(3, seq_len) positions, but got {positions.size()}") + inputs_embeds = self.get_input_embeddings_v0( + input_ids, + image_input=image_input, + video_input=video_input) + input_ids = None + + if self.use_deepstack and inputs_embeds is not None and get_pp_group( + ).is_first_rank: + deepstack_input_embeds = self._get_deepstack_input_embeds( + inputs_embeds.size(0)) + else: + deepstack_input_embeds = None + + hidden_states = self.language_model.model( + input_ids=input_ids, + positions=positions, + intermediate_tensors=intermediate_tensors, + inputs_embeds=inputs_embeds, + # args for deepstack + deepstack_input_embeds=deepstack_input_embeds, + ) + + if inputs_embeds is not None and get_pp_group().is_first_rank: + self._clear_deepstack_input_embeds(inputs_embeds.size(0)) + + return hidden_states + + def compute_logits( + self, + hidden_states: torch.Tensor, + sampling_metadata: SamplingMetadata, + ) -> Optional[torch.Tensor]: + return self.language_model.compute_logits(hidden_states, + sampling_metadata) + + def load_weights(self, weights: Iterable[tuple[str, + torch.Tensor]]) -> set[str]: + loader = AutoWeightsLoader(self) + return loader.load_weights(weights, mapper=self.hf_to_vllm_mapper) + + def get_mm_mapping(self) -> MultiModelKeys: + """ + Get the module prefix in multimodal models + """ + return MultiModelKeys.from_string_field( + language_model="language_model", + connector="model.visual.merger", + tower_model="model.visual.", + ) diff --git a/vllm/model_executor/models/qwen3_vl_moe.py b/vllm/model_executor/models/qwen3_vl_moe.py new file mode 100644 index 000000000000..a800e94ab1e5 --- /dev/null +++ b/vllm/model_executor/models/qwen3_vl_moe.py @@ -0,0 +1,344 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project + +# Copyright 2025 The vLLM team. +# Copyright 2025 The Qwen Team. +# Copyright 2025 The HuggingFace Inc. team. +# All rights reserved. +# +# This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX +# and OPT implementations in this library. It has been modified from its +# original forms to accommodate minor architectural differences compared +# to GPT-NeoX and OPT used by the Meta AI team that trained the model. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Inference-only Qwen3-VL-MoE model compatible with HuggingFace weights.""" +import typing +from collections.abc import Iterable +from typing import Callable, Optional, Union + +import torch +from transformers.models.qwen3_vl_moe.configuration_qwen3_vl_moe import ( + Qwen3VLMoeConfig) + +from vllm.compilation.decorators import support_torch_compile +from vllm.config import VllmConfig +from vllm.distributed import get_pp_group +from vllm.logger import init_logger +from vllm.model_executor.layers.logits_processor import LogitsProcessor +from vllm.model_executor.layers.vocab_parallel_embedding import ParallelLMHead +from vllm.model_executor.model_loader.weight_utils import ( + default_weight_loader, maybe_remap_kv_scale_name) +from vllm.multimodal import MULTIMODAL_REGISTRY +from vllm.sequence import IntermediateTensors + +from .qwen3_moe import Qwen3MoeForCausalLM, Qwen3MoeModel +from .qwen3_vl import (Qwen3_VisionTransformer, Qwen3VLDummyInputsBuilder, + Qwen3VLForConditionalGeneration, + Qwen3VLMultiModalProcessor, Qwen3VLProcessingInfo) +from .utils import is_pp_missing_parameter, maybe_prefix + +logger = init_logger(__name__) + + +class Qwen3VLMoeProcessingInfo(Qwen3VLProcessingInfo): + + def get_hf_config(self): + return self.ctx.get_hf_config(Qwen3VLMoeConfig) + + +@support_torch_compile( + dynamic_arg_dims={ + "input_ids": 0, + # positions is of shape (3, seq_len) if mrope is enabled for qwen2-vl, + # otherwise (seq_len, ). + "positions": -1, + "intermediate_tensors": 0, + "inputs_embeds": 0, + # the same shape as input_embeds + "deepstack_input_embeds": 0 + }) +class Qwen3MoeLLMModel(Qwen3MoeModel): + + def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): + super().__init__(vllm_config=vllm_config, prefix=prefix) + if not get_pp_group().is_first_rank: + assert self.start_layer >= len( + vllm_config.model_config.hf_config.vision_config. + deepstack_visual_indexes), ( + "start_layer should be greater than or equal to " + "len(deepstack_visual_indexes)") + + def forward( + self, + input_ids: torch.Tensor, + positions: torch.Tensor, + intermediate_tensors: Optional[IntermediateTensors] = None, + inputs_embeds: Optional[torch.Tensor] = None, + deepstack_input_embeds: Optional[IntermediateTensors] = None, + ) -> Union[torch.Tensor, IntermediateTensors]: + if get_pp_group().is_first_rank: + if inputs_embeds is not None: + hidden_states = inputs_embeds + else: + hidden_states = self.get_input_embeddings(input_ids) + residual = None + else: + assert intermediate_tensors is not None + hidden_states = intermediate_tensors["hidden_states"] + residual = intermediate_tensors["residual"] + for layer_idx, layer in enumerate( + self.layers[self.start_layer:self.end_layer]): + layer_idx = layer_idx + self.start_layer + + hidden_states, residual = layer( + positions, + hidden_states, + residual, + ) + + if deepstack_input_embeds is not None and \ + layer_idx in range(0, len(deepstack_input_embeds)): + hidden_states = hidden_states + deepstack_input_embeds[ + f"deepstack_input_embeds_{layer_idx}"] + + if not get_pp_group().is_last_rank: + return IntermediateTensors({ + "hidden_states": hidden_states, + "residual": residual + }) + hidden_states, _ = self.norm(hidden_states, residual) + return hidden_states + + def load_fused_expert_weights(self, name: str, params_dict: dict, + loaded_weight: torch.Tensor, shard_id: str, + num_experts: int): + param = params_dict[name] + weight_loader = typing.cast(Callable[..., bool], param.weight_loader) + for expert_id in range(num_experts): + curr_expert_weight = loaded_weight[expert_id] + success = weight_loader(param, + curr_expert_weight, + name, + shard_id, + expert_id, + return_success=True) + if not success: + return False + return True + + def load_weights(self, weights: Iterable[tuple[str, + torch.Tensor]]) -> set[str]: + stacked_params_mapping = [ + # (param_name, shard_name, shard_id) + ("qkv_proj", "q_proj", "q"), + ("qkv_proj", "k_proj", "k"), + ("qkv_proj", "v_proj", "v"), + ("gate_up_proj", "gate_proj", 0), + ("gate_up_proj", "up_proj", 1), + ] + # Skip loading extra parameters for GPTQ/modelopt models. + ignore_suffixes = (".bias", "_bias", ".k_scale", "_k_scale", + ".v_scale", "_v_scale", ".weight_scale", + "_weight_scale", ".input_scale", "_input_scale") + params_dict = dict(self.named_parameters()) + loaded_params: set[str] = set() + expert_params_mapping = self.get_expert_mapping() + is_fused_expert = False + fused_expert_params_mapping = [ + ("experts.w13_weight", "experts.gate_up_proj", 0, "w1"), + ("experts.w2_weight", "experts.down_proj", 0, "w2"), + ] + num_experts = self.config.num_experts + for name, loaded_weight in weights: + for (param_name, weight_name, shard_id) in stacked_params_mapping: + if ("experts.gate_up_proj" in name + or "experts.down_proj" in name): + is_fused_expert = True + expert_params_mapping = fused_expert_params_mapping + + # Skip non-stacked layers and experts (experts handled below). + if weight_name not in name: + continue + # We have mlp.experts[0].gate_proj in the checkpoint. + # Since we handle the experts below in expert_params_mapping, + # we need to skip here BEFORE we update the name, otherwise + # name will be updated to mlp.experts[0].gate_up_proj, which + # will then be updated below in expert_params_mapping + # for mlp.experts[0].gate_gate_up_proj, which breaks load. + if "mlp.experts" in name: + continue + name = name.replace(weight_name, param_name) + # Skip loading extra parameters for GPTQ/modelopt models. + if name.endswith(ignore_suffixes) and name not in params_dict: + continue + # Skip layers on other devices. + if is_pp_missing_parameter(name, self): + continue + if name.endswith("scale"): + # Remapping the name of FP8 kv-scale. + name = maybe_remap_kv_scale_name(name, params_dict) + if name is None: + continue + if name not in params_dict: + continue + param = params_dict[name] + weight_loader = getattr(param, "weight_loader", + default_weight_loader) + if weight_loader == default_weight_loader: + weight_loader(param, loaded_weight) + else: + weight_loader(param, loaded_weight, shard_id) + break + else: + is_expert_weight = False + for mapping in expert_params_mapping: + param_name, weight_name, expert_id, shard_id = mapping + if weight_name not in name: + continue + # Anyway, this is an expert weight and should not be + # attempted to load as other weights later + is_expert_weight = True + name_mapped = name.replace(weight_name, param_name) + if is_fused_expert: + loaded_weight = loaded_weight.transpose(-1, + -2) # no bias + if "experts.gate_up_proj" in name: + loaded_weight = loaded_weight.chunk(2, dim=-2) + success_w1 = self.load_fused_expert_weights( + name_mapped, params_dict, loaded_weight[0], + "w1", num_experts) + success_w3 = self.load_fused_expert_weights( + name_mapped, params_dict, loaded_weight[1], + "w3", num_experts) + success = success_w1 and success_w3 + else: + # down_proj + success = self.load_fused_expert_weights( + name_mapped, params_dict, loaded_weight, + shard_id, num_experts) + else: + if is_pp_missing_parameter(name_mapped, self): + continue + # Skip loading extra parameters for GPTQ/modelopt models + if name_mapped.endswith( + ignore_suffixes + ) and name_mapped not in params_dict: + continue + param = params_dict[name_mapped] + # We should ask the weight loader to return success or + # not here since otherwise we may skip experts with + # other available replicas. + weight_loader = typing.cast(Callable[..., bool], + param.weight_loader) + success = weight_loader(param, + loaded_weight, + name_mapped, + shard_id=shard_id, + expert_id=expert_id, + return_success=True) + if success: + name = name_mapped + break + else: + if is_expert_weight: + # We've checked that this is an expert weight + # However it's not mapped locally to this rank + # So we simply skip it + continue + # Skip loading extra parameters for GPTQ/modelopt models. + if name.endswith( + ignore_suffixes) and name not in params_dict: + continue + # Skip layers on other devices. + if is_pp_missing_parameter(name, self): + continue + # Remapping the name of FP8 kv-scale. + if name.endswith("kv_scale"): + remapped_kv_scale_name = name.replace( + ".kv_scale", ".attn.kv_scale") + if remapped_kv_scale_name not in params_dict: + logger.warning_once( + "Found kv scale in the checkpoint (e.g. %s), but not found the expected name in the model (e.g. %s). kv-scale is not loaded.", # noqa: E501 + name, + remapped_kv_scale_name, + ) + continue + else: + name = remapped_kv_scale_name + param = params_dict[name] + weight_loader = getattr(param, "weight_loader", + default_weight_loader) + weight_loader(param, loaded_weight) + loaded_params.add(name) + return loaded_params + + +class Qwen3MoeLLMForCausalLM(Qwen3MoeForCausalLM): + + def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): + super(Qwen3MoeForCausalLM, self).__init__() + self.config = vllm_config.model_config.hf_config.text_config + self.quant_config = vllm_config.quant_config + self.model = Qwen3MoeLLMModel(vllm_config=vllm_config, + prefix=maybe_prefix(prefix, "model")) + self.lm_head = ParallelLMHead(self.config.vocab_size, + self.config.hidden_size, + quant_config=self.quant_config) + if self.config.tie_word_embeddings: + self.lm_head.weight = self.model.embed_tokens.weight + self.logits_processor = LogitsProcessor(self.config.vocab_size) + self.make_empty_intermediate_tensors = ( + self.model.make_empty_intermediate_tensors) + + +@MULTIMODAL_REGISTRY.register_processor(Qwen3VLMultiModalProcessor, + info=Qwen3VLMoeProcessingInfo, + dummy_inputs=Qwen3VLDummyInputsBuilder) +class Qwen3VLMoeForConditionalGeneration(Qwen3VLForConditionalGeneration): + + def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): + super(Qwen3VLForConditionalGeneration, self).__init__() + config: Qwen3VLMoeConfig = vllm_config.model_config.hf_config + quant_config = vllm_config.quant_config + multimodal_config = vllm_config.model_config.multimodal_config + + self.config = config + self.multimodal_config = multimodal_config + + self.visual = Qwen3_VisionTransformer( + config.vision_config, + norm_eps=getattr(config, "rms_norm_eps", 1e-6), + quant_config=self._maybe_ignore_quant_config(quant_config), + prefix=maybe_prefix(prefix, "visual"), + ) + + self.language_model = Qwen3MoeLLMForCausalLM(vllm_config=vllm_config, + prefix=maybe_prefix( + prefix, + "language_model")) + + self.make_empty_intermediate_tensors = ( + self.language_model.make_empty_intermediate_tensors) + + self.use_deepstack = hasattr(config.vision_config, + 'deepstack_visual_indexes') + self.deepstack_num_level = len( + config.vision_config.deepstack_visual_indexes + ) if self.use_deepstack else 0 + # register buffer for deepstack + self.deepstack_input_embeds = [ + torch.zeros(vllm_config.scheduler_config.max_num_batched_tokens, + config.text_config.hidden_size) + for _ in range(self.deepstack_num_level) + ] if self.use_deepstack else None diff --git a/vllm/model_executor/models/registry.py b/vllm/model_executor/models/registry.py index 38f3d5c69b9e..707b57106e6d 100644 --- a/vllm/model_executor/models/registry.py +++ b/vllm/model_executor/models/registry.py @@ -259,11 +259,13 @@ "Qwen2AudioForConditionalGeneration": ("qwen2_audio", "Qwen2AudioForConditionalGeneration"), # noqa: E501 "Qwen2_5OmniModel": ("qwen2_5_omni_thinker", "Qwen2_5OmniThinkerForConditionalGeneration"), # noqa: E501 "Qwen2_5OmniForConditionalGeneration": ("qwen2_5_omni_thinker", "Qwen2_5OmniThinkerForConditionalGeneration"), # noqa: E501 - "UltravoxModel": ("ultravox", "UltravoxModel"), + "Qwen3VLForConditionalGeneration": ("qwen3_vl", "Qwen3VLForConditionalGeneration"), # noqa: E501 + "Qwen3VLMoeForConditionalGeneration": ("qwen3_vl_moe", "Qwen3VLMoeForConditionalGeneration"), # noqa: E501 "SkyworkR1VChatModel": ("skyworkr1v", "SkyworkR1VChatModel"), "Step3VLForConditionalGeneration": ("step3_vl", "Step3VLForConditionalGeneration"), # noqa: E501 "TarsierForConditionalGeneration": ("tarsier", "TarsierForConditionalGeneration"), # noqa: E501 "Tarsier2ForConditionalGeneration": ("qwen2_vl", "Tarsier2ForConditionalGeneration"), # noqa: E501 + "UltravoxModel": ("ultravox", "UltravoxModel"), "VoxtralForConditionalGeneration": ("voxtral", "VoxtralForConditionalGeneration"), # noqa: E501 # [Encoder-decoder] "WhisperForConditionalGeneration": ("whisper", "WhisperForConditionalGeneration"), # noqa: E501 diff --git a/vllm/multimodal/video.py b/vllm/multimodal/video.py index fb2dcac49ee9..6981f2ce5623 100644 --- a/vllm/multimodal/video.py +++ b/vllm/multimodal/video.py @@ -156,7 +156,7 @@ def load_bytes( # can cause incorrect timestamp calculation without num_frames=-1. metadata = { "total_num_frames": num_frames, - "fps": original_fps, + "fps": num_frames / duration, "duration": duration, "video_backend": "opencv", "frames_indices": list(range(num_frames)), From ca2d1925ef5ad309061c2d5dd9a1e409c5ca28ee Mon Sep 17 00:00:00 2001 From: haoyangli-amd Date: Wed, 17 Sep 2025 13:15:13 +0800 Subject: [PATCH 028/518] [Rocm] [quantization] Fix quark ptpc moe and add test case (#24649) Signed-off-by: Haoyang Li Co-authored-by: Haoyang Li --- tests/quantization/test_quark.py | 25 ++ .../layers/quantization/quark/quark_moe.py | 223 ++++++++++++++---- 2 files changed, 196 insertions(+), 52 deletions(-) diff --git a/tests/quantization/test_quark.py b/tests/quantization/test_quark.py index 4a0c8ba4d8a9..c09931971e6f 100644 --- a/tests/quantization/test_quark.py +++ b/tests/quantization/test_quark.py @@ -77,6 +77,31 @@ def check_model(model): assert output +@pytest.mark.parametrize('tp', [1]) +def test_quark_fp8_w_per_channel_a_per_token(vllm_runner, tp): + model_path = "amd/Qwen2.5-1.5B-Instruct-ptpc-Quark-ts" + with vllm_runner(model_path, tensor_parallel_size=tp) as llm: + + def check_model(model): + layer = model.model.layers[0] + + qkv_proj = layer.self_attn.qkv_proj + + assert isinstance(qkv_proj.quant_method, QuarkLinearMethod) + assert isinstance(qkv_proj.scheme, QuarkW8A8Fp8) + + if isinstance(qkv_proj.scheme, QuarkW8A8Fp8): + assert qkv_proj.weight.dtype is current_platform.fp8_dtype() + assert qkv_proj.weight_scale.shape[0] == qkv_proj.weight.shape[ + 1] + assert qkv_proj.weight_scale.shape[1] == 1 + + llm.apply_model(check_model) + + output = llm.generate_greedy("Hello my name is", max_tokens=20) + assert output + + @pytest.mark.parametrize('tp', [1]) def test_quark_int8_w_per_tensor_a_per_tensor(vllm_runner, tp): model_path = "amd/Llama-3.1-8B-Instruct-w-int8-a-int8-sym-test" diff --git a/vllm/model_executor/layers/quantization/quark/quark_moe.py b/vllm/model_executor/layers/quantization/quark/quark_moe.py index 6cff9f3019d3..bc8ae980429a 100644 --- a/vllm/model_executor/layers/quantization/quark/quark_moe.py +++ b/vllm/model_executor/layers/quantization/quark/quark_moe.py @@ -5,17 +5,25 @@ import torch +import vllm.envs as envs from vllm import _custom_ops as ops from vllm.logger import init_logger from vllm.model_executor.layers.fused_moe import (FusedMoE, FusedMoEConfig, FusedMoEMethodBase, FusedMoeWeightScaleSupported) +from vllm.model_executor.layers.fused_moe.rocm_aiter_fused_moe import ( + is_rocm_aiter_moe_enabled) +from vllm.model_executor.layers.quantization.utils.marlin_utils_fp8 import ( + prepare_moe_fp8_layer_for_marlin) from vllm.model_executor.layers.quantization.utils.mxfp4_utils import ( OCP_MX_BLOCK_SIZE) +from vllm.model_executor.layers.quantization.utils.quant_utils import ( + GroupShape) from vllm.model_executor.layers.quantization.utils.w8a8_utils import ( all_close_1d, normalize_e4m3fn_to_e4m3fnuz, per_tensor_dequantize) from vllm.model_executor.utils import set_weight_attrs from vllm.platforms import current_platform +from vllm.scalar_type import scalar_types logger = init_logger(__name__) @@ -67,21 +75,45 @@ def __init__( self.weight_quant = weight_config self.input_quant = input_config - weight_qscheme = self.weight_quant.get("qscheme") - input_qscheme = self.input_quant.get("qscheme") - if not (weight_qscheme == "per_tensor" - and input_qscheme == "per_tensor"): + self.weight_qscheme = self.weight_quant.get("qscheme") + self.input_qscheme = self.input_quant.get("qscheme") + per_tensor = (self.weight_qscheme == "per_tensor" + and self.input_qscheme == "per_tensor") + per_channel = (self.weight_qscheme == "per_channel" + and self.input_qscheme == "per_channel") + self.act_quant_group_shape = GroupShape.PER_TOKEN \ + if per_channel else GroupShape.PER_TENSOR + if not (per_tensor or per_channel): raise ValueError( - "For FP8 Fused MoE layers, only per-tensor scales " - "for weights and activations are supported. Found " - f"{weight_qscheme}, {input_qscheme}") # noqa E501 + "For FP8 Fused MoE layers, only per-tensor and per-channel " + "scales for weights and activations are supported. Found " + f"{self.weight_qscheme}, {self.input_qscheme}") # noqa E501 self.static_input_scales = not self.input_quant.get("is_dynamic") + if self.static_input_scales and per_channel: + raise ValueError( + "For FP8 Fused MoE layer, we require either per tensor or " + "channelwise, dynamic per token quantization.") + + # For GPUs that lack FP8 hardware support, we can leverage the Marlin + # kernel for fast weight-only FP8 quantization + self.use_marlin = (not current_platform.has_device_capability(89) + or envs.VLLM_TEST_FORCE_FP8_MARLIN) + # Disable marlin for rocm + if current_platform.is_rocm(): + self.use_marlin = False + + self.rocm_aiter_moe_enabled = is_rocm_aiter_moe_enabled() def create_weights(self, layer: torch.nn.Module, num_experts: int, hidden_size: int, intermediate_size_per_partition: int, params_dtype: torch.dtype, **extra_weight_attrs): + layer.intermediate_size_per_partition = intermediate_size_per_partition + layer.hidden_size = hidden_size + layer.num_experts = num_experts + layer.orig_dtype = params_dtype + layer.weight_block_size = None params_dtype = torch.float8_e4m3fn # WEIGHTS @@ -104,24 +136,39 @@ def create_weights(self, layer: torch.nn.Module, num_experts: int, set_weight_attrs(w2_weight, extra_weight_attrs) # WEIGHT_SCALES - # Allocate 2 scales for w1 and w3 respectively. - # They will be combined to a single scale after weight loading. - w13_weight_scale = torch.nn.Parameter(torch.ones(num_experts, - 2, - dtype=torch.float32), - requires_grad=False) - layer.register_parameter("w13_weight_scale", w13_weight_scale) - - w2_weight_scale = torch.nn.Parameter(torch.ones(num_experts, - dtype=torch.float32), - requires_grad=False) - layer.register_parameter("w2_weight_scale", w2_weight_scale) - # Add the quantization method used (per tensor/grouped/channel) - # to ensure the weight scales are loaded in properly - extra_weight_attrs.update( - {"quant_method": FusedMoeWeightScaleSupported.TENSOR.value}) - set_weight_attrs(w13_weight_scale, extra_weight_attrs) - set_weight_attrs(w2_weight_scale, extra_weight_attrs) + if self.weight_qscheme == "per_tensor": + # Allocate 2 scales for w1 and w3 respectively. + # They are combined to a single scale after weight loading. + w13_weight_scale = torch.nn.Parameter(torch.ones( + num_experts, 2, dtype=torch.float32), + requires_grad=False) + layer.register_parameter("w13_weight_scale", w13_weight_scale) + w2_weight_scale = torch.nn.Parameter(torch.ones( + num_experts, dtype=torch.float32), + requires_grad=False) + layer.register_parameter("w2_weight_scale", w2_weight_scale) + # Add PER-TENSOR quantization for FusedMoE.weight_loader. + extra_weight_attrs.update( + {"quant_method": FusedMoeWeightScaleSupported.TENSOR.value}) + set_weight_attrs(w13_weight_scale, extra_weight_attrs) + set_weight_attrs(w2_weight_scale, extra_weight_attrs) + elif self.weight_qscheme == "per_channel": + # quark's scale is 1 dim. + w13_weight_scale = torch.nn.Parameter(torch.ones( + num_experts, + 2 * intermediate_size_per_partition, + dtype=torch.float32), + requires_grad=False) + layer.register_parameter("w13_weight_scale", w13_weight_scale) + w2_weight_scale = torch.nn.Parameter(torch.ones( + num_experts, hidden_size, dtype=torch.float32), + requires_grad=False) + layer.register_parameter("w2_weight_scale", w2_weight_scale) + # Add PER-CHANNEL quantization for FusedMoE.weight_loader. + extra_weight_attrs.update( + {"quant_method": FusedMoeWeightScaleSupported.CHANNEL.value}) + set_weight_attrs(w13_weight_scale, extra_weight_attrs) + set_weight_attrs(w2_weight_scale, extra_weight_attrs) # INPUT_SCALES if self.static_input_scales: @@ -185,24 +232,60 @@ def process_weights_after_loading(self, layer: torch.nn.Module) -> None: layer.w2_input_scale = torch.nn.Parameter(w2_input_scale, requires_grad=False) - # Fp8 moe kernel needs single weight scale for w13 per expert. - # We take the max then dequant and requant each expert. - assert layer.w13_weight_scale is not None - shard_size = layer.intermediate_size_per_partition - max_w13_scales = layer.w13_weight_scale.max(dim=1).values - for expert_id in range(layer.local_num_experts): - start = 0 - for shard_id in range(2): - dq_weight = per_tensor_dequantize( - layer.w13_weight[expert_id][start:start + shard_size, :], - layer.w13_weight_scale[expert_id][shard_id]) - layer.w13_weight[expert_id][ - start:start + shard_size, :], _ = ops.scaled_fp8_quant( - dq_weight, max_w13_scales[expert_id]) - start += shard_size - - layer.w13_weight_scale = torch.nn.Parameter(max_w13_scales, - requires_grad=False) + # For per-tensor case, Fp8 moe kernel needs single weight scale + # for w13 per expert. Use max then dequant and requant each expert. + if self.weight_qscheme == "per_tensor": + assert layer.w13_weight_scale is not None + shard_size = layer.intermediate_size_per_partition + max_w13_scales = layer.w13_weight_scale.max(dim=1).values + for expert_id in range(layer.local_num_experts): + start = 0 + for shard_id in range(2): + dq_weight = per_tensor_dequantize( + layer.w13_weight[expert_id][start:start + + shard_size, :], + layer.w13_weight_scale[expert_id][shard_id]) + layer.w13_weight[expert_id][ + start:start + shard_size, :], _ = ops.scaled_fp8_quant( + dq_weight, max_w13_scales[expert_id]) + start += shard_size + + layer.w13_weight_scale = torch.nn.Parameter(max_w13_scales, + requires_grad=False) + # quark's scale is 1 dim. + elif self.weight_qscheme == "per_channel": + if self.act_quant_group_shape == GroupShape.PER_TOKEN: + w13_weight_scale = layer.w13_weight_scale.unsqueeze(-1) + layer.w13_weight_scale = torch.nn.Parameter( + w13_weight_scale, requires_grad=False) + w2_weight_scale = layer.w2_weight_scale.unsqueeze(-1) + layer.w2_weight_scale = torch.nn.Parameter(w2_weight_scale, + requires_grad=False) + # Property to determine if AITER is used + if self.rocm_aiter_moe_enabled: + from vllm.model_executor.layers.fused_moe.rocm_aiter_fused_moe import ( # noqa E501 + rocm_aiter_fused_experts, shuffle_weights) + + # reshaping weights is required for aiter moe kernel. + shuffled_w13, shuffled_w2 = shuffle_weights( + layer.w13_weight.data, layer.w2_weight.data) + + layer.w13_weight = torch.nn.Parameter(shuffled_w13, + requires_grad=False) + layer.w2_weight = torch.nn.Parameter(shuffled_w2, + requires_grad=False) + + self.rocm_aiter_fused_experts_func = rocm_aiter_fused_experts + elif self.use_marlin: + + prepare_moe_fp8_layer_for_marlin(layer, False) + # Activations not quantized for marlin. + del layer.w13_input_scale + del layer.w2_input_scale + self.fused_experts_func = None + else: + from vllm.model_executor.layers.fused_moe import fused_experts + self.fused_experts_func = fused_experts def apply( self, @@ -233,8 +316,6 @@ def apply( raise NotImplementedError( "EPLB not supported for `QuarkW8A8Fp8MoEMethod` yet.") - from vllm.model_executor.layers.fused_moe import fused_experts - topk_weights, topk_ids = FusedMoE.select_experts( hidden_states=x, router_logits=router_logits, @@ -249,22 +330,60 @@ def apply( e_score_correction_bias=e_score_correction_bias, indices_type=self.topk_indices_dtype) - return fused_experts( - x, - layer.w13_weight, - layer.w2_weight, + if self.rocm_aiter_moe_enabled: + return self.rocm_aiter_fused_experts_func( + hidden_states=x, + w1=layer.w13_weight, + w2=layer.w2_weight, + topk_weights=topk_weights, + topk_ids=topk_ids, + activation=activation, + apply_router_weight_on_input=apply_router_weight_on_input, + use_fp8_w8a8=True, + per_channel_quant=self.weight_qscheme == "per_channel", + w1_scale=layer.w13_weight_scale, + w2_scale=layer.w2_weight_scale, + a1_scale=layer.w13_input_scale, + a2_scale=layer.w2_input_scale, + expert_map=expert_map) + if self.use_marlin: + assert activation == "silu", ( + f"{activation} not supported for Marlin MoE.") + return torch.ops.vllm.fused_marlin_moe( + x, + layer.w13_weight, + layer.w2_weight, + None, + None, + layer.w13_weight_scale, + layer.w2_weight_scale, + router_logits, + topk_weights, + topk_ids, + quant_type_id=scalar_types.float8_e4m3fn.id, + apply_router_weight_on_input=apply_router_weight_on_input, + global_num_experts=global_num_experts, + expert_map=expert_map) + + assert self.fused_experts_func is not None + + return self.fused_experts_func( + hidden_states=x, + w1=layer.w13_weight, + w2=layer.w2_weight, topk_weights=topk_weights, topk_ids=topk_ids, inplace=True, + activation=activation, + apply_router_weight_on_input=apply_router_weight_on_input, use_fp8_w8a8=True, + per_channel_quant=self.weight_qscheme == "per_channel", global_num_experts=global_num_experts, - apply_router_weight_on_input=apply_router_weight_on_input, expert_map=expert_map, w1_scale=layer.w13_weight_scale, w2_scale=layer.w2_weight_scale, a1_scale=layer.w13_input_scale, - a2_scale=layer.w2_input_scale, - activation=activation) + a2_scale=layer.w2_input_scale) class QuarkW4A4MXFp4MoEMethod(QuarkMoEMethod): From 43a62c51be1254a9b2923c7f477af0cfc52d1937 Mon Sep 17 00:00:00 2001 From: Daniel Serebrenik <74646983+pliops-daniels@users.noreply.github.com> Date: Wed, 17 Sep 2025 08:53:17 +0300 Subject: [PATCH 029/518] Add more documentation and improve usability of lognormal dist (benchmark_serving_multi_turn) (#23255) Signed-off-by: daniels --- benchmarks/multi_turn/README.md | 101 +++++++++++++++++ benchmarks/multi_turn/bench_dataset.py | 105 +++++++++++++++++- .../multi_turn/generate_multi_turn.json | 5 +- 3 files changed, 203 insertions(+), 8 deletions(-) diff --git a/benchmarks/multi_turn/README.md b/benchmarks/multi_turn/README.md index 7adf97bcf562..f5b5c6c97d48 100644 --- a/benchmarks/multi_turn/README.md +++ b/benchmarks/multi_turn/README.md @@ -55,6 +55,107 @@ output_num_chunks 166.0 99.01 11.80 79.00 90.00 98.00 108.75 ---------------------------------------------------------------------------------------------------- ``` +### JSON configuration file for synthetic conversations generation + +The input flag `--input-file` is used to determine the input conversations for the benchmark.
+When the input is a JSON file with the field `"filetype": "generate_conversations"` the tool will generate synthetic multi-turn (questions and answers) conversations. + +The file `generate_multi_turn.json` is an example file. + +The file must contain the sections `prompt_input` and `prompt_output`. + +The `prompt_input` section must contain `num_turns`, `prefix_num_tokens` and `num_tokens`: + +* `num_turns` - Number of total turns in the conversation (both user & assistant).
+The final value will always be rounded to an even number so each user turn has a reply. +* `prefix_num_tokens` - Tokens added at the start of only the **first user turn** in a conversation (unique per conversation). +* `num_tokens` - Total token length of each **user** message (one turn). + +The `prompt_output` section must contain `num_tokens`: + +* `num_tokens` - Total token length of each **assistant** message (one turn). + +### Random distributions for synthetic conversations generation + +When creating an input JSON file (such as `generate_multi_turn.json`),
+every numeric field (such as `num_turns` or `num_tokens`) requires a distribution.
+The distribution determines how to randomly sample values for the field. + +The available distributions are listed below. + +**Note:** The optional `max` field (for lognormal, zipf, and poisson) can be used to cap sampled values at an upper bound.
+Can be used to make sure that the total number of tokens in every request does not exceed `--max-model-len`. + +#### constant + +```json +{ + "distribution": "constant", + "value": 500 +} +``` + +* `value` - the fixed integer value (always returns the same number). + +#### uniform + +```json +{ + "distribution": "uniform", + "min": 12, + "max": 18 +} +``` + +* `min` - minimum value (inclusive). +* `max` - maximum value (inclusive), should be equal or larger than min. + +#### lognormal + +```json +{ + "distribution": "lognormal", + "average": 1000, + "max": 5000 +} +``` + +You can parameterize the lognormal distribution in one of two ways: + +Using the average and optional median ratio: + +* `average` - target average value of the distribution. +* `median_ratio` - the ratio of the median to the average; controls the skewness. Must be in the range (0, 1). + +Using the parameters of the underlying normal distribution: + +* `mean` - mean of the underlying normal distribution. +* `sigma` - standard deviation of the underlying normal distribution. + +#### zipf + +```json +{ + "distribution": "zipf", + "alpha": 1.2, + "max": 100 +} +``` + +* `alpha` - skew parameter (> 1). Larger values produce stronger skew toward smaller integers. + +#### poisson + +```json +{ + "distribution": "poisson", + "alpha": 10, + "max": 50 +} +``` + +* `alpha` - expected value (λ). Also the variance of the distribution. + ## ShareGPT Conversations To run with the ShareGPT data, download the following ShareGPT dataset: diff --git a/benchmarks/multi_turn/bench_dataset.py b/benchmarks/multi_turn/bench_dataset.py index 411b89dd23dc..67b937930d58 100644 --- a/benchmarks/multi_turn/bench_dataset.py +++ b/benchmarks/multi_turn/bench_dataset.py @@ -99,21 +99,105 @@ def __repr__(self) -> str: class LognormalDistribution(Distribution): def __init__( - self, mean: float, sigma: float, max_val: Optional[int] = None + self, + mean: Optional[float] = None, + sigma: Optional[float] = None, + average: Optional[int] = None, + median_ratio: Optional[float] = None, + max_val: Optional[int] = None, ) -> None: + self.average = average + self.median_ratio = median_ratio + self.max_val = max_val + + if average is not None: + if average < 1: + raise ValueError("Lognormal average must be positive") + + if mean or sigma: + raise ValueError( + "When using lognormal average, you can't provide mean/sigma" + ) + + if self.median_ratio is None: + # Default value that provides relatively wide range of values + self.median_ratio = 0.85 + + # Calculate mean/sigma of np.random.lognormal based on the average + mean, sigma = self._generate_lognormal_by_median( + target_average=self.average, median_ratio=self.median_ratio + ) + else: + if mean is None or sigma is None: + raise ValueError( + "Must provide both mean and sigma if average is not used" + ) + + if mean <= 0 or sigma < 0: + raise ValueError( + "Lognormal mean must be positive and sigma must be non-negative" + ) + + # Mean and standard deviation of the underlying normal distribution + # Based on numpy.random.lognormal self.mean = mean self.sigma = sigma - self.max_val = max_val + + @staticmethod + def _generate_lognormal_by_median( + target_average: int, median_ratio: float + ) -> tuple[float, float]: + """ + Compute (mu, sigma) for a lognormal distribution given: + - a target average (mean of the distribution) + - a ratio of median / mean (controls skewness), assume mean > median + + Background: + If Z ~ Normal(mu, sigma^2), then X = exp(Z) ~ LogNormal(mu, sigma). + * mean(X) = exp(mu + sigma^2 / 2) + * median(X) = exp(mu) + + So: + median / mean = exp(mu) / exp(mu + sigma^2 / 2) + = exp(-sigma^2 / 2) + + Rearranging: + sigma^2 = 2 * ln(mean / median) + mu = ln(median) + + This gives a unique (mu, sigma) for any valid mean and median. + """ + # Check input validity: median must be smaller than mean + if median_ratio <= 0 or median_ratio >= 1: + raise ValueError("median_ratio must be in range (0, 1)") + + target_median = target_average * median_ratio + + # Solve sigma^2 = 2 * ln(mean / median) + sigma = np.sqrt(2 * np.log(target_average / target_median)) + mu = np.log(target_median) + + return mu, sigma def sample(self, size: int = 1) -> np.ndarray: samples = np.random.lognormal(mean=self.mean, sigma=self.sigma, size=size) + + if self.average is not None: + # Scale to average + samples *= self.average / samples.mean() + if self.max_val: samples = np.minimum(samples, self.max_val) return np.round(samples).astype(int) def __repr__(self) -> str: - return f"LognormalDistribution[{self.mean}, {self.sigma}]" + if self.average: + return ( + f"LognormalDistribution[{self.average}, " + f"{self.median_ratio}, {self.max_val}]" + ) + return f"LognormalDistribution[{self.mean}, {self.sigma}, {self.max_val}]" class GenConvArgs(NamedTuple): @@ -173,10 +257,21 @@ def get_random_distribution( return PoissonDistribution(conf["alpha"], max_val=max_val) elif distribution == "lognormal": + max_val = conf.get("max", None) + + if "average" in conf: + # Infer lognormal mean/sigma (numpy) from input average + median_ratio = conf.get("median_ratio", None) + return LognormalDistribution( + average=conf["average"], median_ratio=median_ratio, max_val=max_val + ) + + # Use mean/sigma directly (for full control over the distribution) verify_field_exists(conf, "mean", section, subsection) verify_field_exists(conf, "sigma", section, subsection) - max_val = conf.get("max", None) - return LognormalDistribution(conf["mean"], conf["sigma"], max_val=max_val) + return LognormalDistribution( + mean=conf["mean"], sigma=conf["sigma"], max_val=max_val + ) elif distribution == "uniform": verify_field_exists(conf, "min", section, subsection) diff --git a/benchmarks/multi_turn/generate_multi_turn.json b/benchmarks/multi_turn/generate_multi_turn.json index 274d03c2bdb2..03cfc7d63e8a 100644 --- a/benchmarks/multi_turn/generate_multi_turn.json +++ b/benchmarks/multi_turn/generate_multi_turn.json @@ -15,9 +15,8 @@ }, "prefix_num_tokens": { "distribution": "lognormal", - "mean": 6, - "sigma": 4, - "max": 1500 + "average": 1000, + "max": 5000 }, "num_tokens": { "distribution": "uniform", From dd39baf7175c5e79faef071c67bb318eadb7752f Mon Sep 17 00:00:00 2001 From: Kunshang Ji Date: Wed, 17 Sep 2025 14:45:25 +0800 Subject: [PATCH 030/518] [XPU] Fix xpu model runner call torch.cuda APIs (#25011) Signed-off-by: Kunshang Ji --- vllm/v1/worker/xpu_model_runner.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/vllm/v1/worker/xpu_model_runner.py b/vllm/v1/worker/xpu_model_runner.py index fb892211f19d..7becdd392498 100644 --- a/vllm/v1/worker/xpu_model_runner.py +++ b/vllm/v1/worker/xpu_model_runner.py @@ -45,8 +45,12 @@ def __init__(self, *args, **kwargs) -> None: self.synchronize = lambda: None try: - # replace cuda Event with xpu Event, this should work by default + # replace cuda APIs with xpu APIs, this should work by default torch.cuda.Event = torch.xpu.Event + torch.cuda.Stream = torch.xpu.Stream + torch.cuda.default_stream = torch.xpu.current_stream + torch.cuda.current_stream = torch.xpu.current_stream + torch.cuda.stream = torch.xpu.stream yield finally: # if anything goes wrong, just patch it with a placeholder From b77bf34e531abb32c054a38747fa817d08395ae7 Mon Sep 17 00:00:00 2001 From: rouchenzi <40842833+rouchenzi@users.noreply.github.com> Date: Wed, 17 Sep 2025 00:27:34 -0700 Subject: [PATCH 031/518] [EPLB] Support EPLB for Mixtral Model (#22842) Signed-off-by: rouchenzi Signed-off-by: rouchenzi <40842833+rouchenzi@users.noreply.github.com> Co-authored-by: Bowen Wang --- vllm/model_executor/models/mixtral.py | 160 ++++++++++++++++++++++---- 1 file changed, 137 insertions(+), 23 deletions(-) diff --git a/vllm/model_executor/models/mixtral.py b/vllm/model_executor/models/mixtral.py index 52fcbbfc58be..b02030b6d627 100644 --- a/vllm/model_executor/models/mixtral.py +++ b/vllm/model_executor/models/mixtral.py @@ -23,7 +23,8 @@ # See the License for the specific language governing permissions and # limitations under the License. """Inference-only Mixtral model.""" -from collections.abc import Iterable +import typing +from collections.abc import Callable, Iterable from itertools import islice from typing import Optional, Union @@ -33,8 +34,9 @@ from vllm.attention import Attention from vllm.compilation.decorators import support_torch_compile -from vllm.config import CacheConfig, VllmConfig -from vllm.distributed import get_pp_group, get_tensor_model_parallel_world_size +from vllm.config import CacheConfig, VllmConfig, get_current_vllm_config +from vllm.distributed import (get_ep_group, get_pp_group, + get_tensor_model_parallel_world_size) from vllm.model_executor.layers.fused_moe import FusedMoE from vllm.model_executor.layers.layernorm import RMSNorm from vllm.model_executor.layers.linear import (QKVParallelLinear, @@ -50,8 +52,8 @@ from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.sequence import IntermediateTensors -from .interfaces import SupportsLoRA, SupportsPP -from .utils import (AutoWeightsLoader, is_pp_missing_parameter, +from .interfaces import MixtureOfExperts, SupportsLoRA, SupportsPP +from .utils import (AutoWeightsLoader, PPMissingLayer, is_pp_missing_parameter, make_empty_intermediate_tensors_factory, make_layers, maybe_prefix) @@ -74,10 +76,32 @@ def __init__(self, quant_config: Optional[QuantizationConfig] = None, tp_size: Optional[int] = None, dp_size: Optional[int] = None, - prefix: str = ""): + prefix: str = "", + enable_eplb: bool = False): super().__init__() self.hidden_size = hidden_size + self.ep_group = get_ep_group().device_group + self.ep_rank = self.ep_group.rank() + self.ep_size = self.ep_group.size() + + # Expert Parallelism Load balancing settings. + vllm_config = get_current_vllm_config() + parallel_config = vllm_config.parallel_config + self.enable_eplb = enable_eplb + + self.n_routed_experts = num_experts + self.n_logical_experts = num_experts + self.n_redundant_experts = ( + parallel_config.eplb_config.num_redundant_experts) + self.n_physical_experts = (self.n_logical_experts + + self.n_redundant_experts) + self.n_local_physical_experts = self.n_physical_experts // self.ep_size + self.physical_expert_start = (self.ep_rank * + self.n_local_physical_experts) + self.physical_expert_end = (self.physical_expert_start + + self.n_local_physical_experts) + # Gate always runs at half / full precision for now. self.gate = ReplicatedLinear(hidden_size, @@ -97,7 +121,9 @@ def __init__(self, quant_config=quant_config, tp_size=tp_size, dp_size=dp_size, - prefix=f"{prefix}.experts") + prefix=f"{prefix}.experts", + enable_eplb=self.enable_eplb, + num_redundant_experts=self.n_redundant_experts) def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: # NOTE: hidden_states can have either 1D or 2D shape. @@ -200,6 +226,7 @@ def __init__( cache_config: Optional[CacheConfig] = None, quant_config: Optional[QuantizationConfig] = None, prefix: str = "", + enable_eplb: bool = False, ) -> None: super().__init__() self.hidden_size = config.hidden_size @@ -221,7 +248,8 @@ def __init__( hidden_size=config.hidden_size, intermediate_size=config.intermediate_size, quant_config=quant_config, - prefix=f"{prefix}.block_sparse_moe") + prefix=f"{prefix}.block_sparse_moe", + enable_eplb=enable_eplb) self.input_layernorm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps) self.post_attention_layernorm = RMSNorm(config.hidden_size, @@ -262,6 +290,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): cache_config = vllm_config.cache_config quant_config = vllm_config.quant_config lora_config = vllm_config.lora_config + parallel_config = vllm_config.parallel_config self.config = config self.quant_config = quant_config @@ -276,10 +305,18 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): org_num_embeddings=config.vocab_size, ) + self.enable_eplb = parallel_config.enable_eplb + self.num_redundant_experts = ( + parallel_config.eplb_config.num_redundant_experts) + self.start_layer, self.end_layer, self.layers = make_layers( config.num_hidden_layers, lambda prefix: MixtralDecoderLayer( - config, cache_config, quant_config=quant_config, prefix=prefix + config, + cache_config, + quant_config=quant_config, + prefix=prefix, + enable_eplb=self.enable_eplb, ), prefix=f"{prefix}.layers") @@ -325,7 +362,8 @@ def get_expert_mapping(self) -> list[tuple[str, str, int, str]]: ckpt_gate_proj_name="w1", ckpt_down_proj_name="w2", ckpt_up_proj_name="w3", - num_experts=self.config.num_local_experts) + num_experts=self.config.num_local_experts, + num_redundant_experts=self.num_redundant_experts) def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]: @@ -373,26 +411,40 @@ def load_weights(self, weights: Iterable[tuple[str, weight_loader(param, loaded_weight, shard_id) break else: + is_expert_weight = False for mapping in expert_params_mapping: param_name, weight_name, expert_id, shard_id = mapping + if weight_name not in name: continue - name = name.replace(weight_name, param_name) + + is_expert_weight = True + name_mapped = name.replace(weight_name, param_name) + # Skip layers on other devices. - if is_pp_missing_parameter(name, self): + if is_pp_missing_parameter(name_mapped, self): continue - if ((name.endswith(".bias") or name.endswith("_bias")) - and name not in params_dict): + + if ((name_mapped.endswith(".bias") + or name_mapped.endswith("_bias")) + and name_mapped not in params_dict): continue - param = params_dict[name] - weight_loader = param.weight_loader - weight_loader(param, - loaded_weight, - name, - shard_id=shard_id, - expert_id=expert_id) - break + + param = params_dict[name_mapped] + weight_loader = typing.cast(Callable[..., bool], + param.weight_loader) + success = weight_loader(param, + loaded_weight, + name_mapped, + shard_id=shard_id, + expert_id=expert_id, + return_success=True) + if success: + name = name_mapped + break else: + if is_expert_weight: + continue # Skip loading extra bias for GPTQ models. if ((name.endswith(".bias") or name.endswith("_bias")) and name not in params_dict): @@ -413,7 +465,8 @@ def load_weights(self, weights: Iterable[tuple[str, return loaded_params -class MixtralForCausalLM(nn.Module, SupportsLoRA, SupportsPP): +class MixtralForCausalLM(nn.Module, SupportsLoRA, SupportsPP, + MixtureOfExperts): fall_back_to_pt_during_load = False packed_modules_mapping = { @@ -462,6 +515,67 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): self.make_empty_intermediate_tensors = ( self.model.make_empty_intermediate_tensors) + self.expert_weights = [] + self.moe_layers: list[FusedMoE] = [] + example_moe = None + + for layer in self.model.layers: + if isinstance(layer, PPMissingLayer): + continue + assert isinstance(layer, MixtralDecoderLayer) + if hasattr(layer, "block_sparse_moe") and isinstance( + layer.block_sparse_moe, MixtralMoE): + example_moe = layer.block_sparse_moe + self.moe_layers.append(layer.block_sparse_moe.experts) + + self.num_moe_layers = len(self.moe_layers) + + if example_moe is None: + raise RuntimeError("No MixtralMoE layer found in model.layers.") + + self.num_logical_experts = example_moe.n_logical_experts + self.num_physical_experts = example_moe.n_physical_experts + self.num_local_physical_experts = example_moe.n_local_physical_experts + self.num_routed_experts = example_moe.n_routed_experts + self.num_redundant_experts = example_moe.n_redundant_experts + self.num_expert_groups = 1 + self.num_shared_experts = 0 + + def set_eplb_state( + self, + expert_load_view: torch.Tensor, + logical_to_physical_map: torch.Tensor, + logical_replica_count: torch.Tensor, + ) -> None: + for layer_idx, layer in enumerate(self.moe_layers): + # Register the expert weights. + self.expert_weights.append(layer.get_expert_weights()) + layer.set_eplb_state( + moe_layer_idx=layer_idx, + expert_load_view=expert_load_view, + logical_to_physical_map=logical_to_physical_map, + logical_replica_count=logical_replica_count, + ) + + def update_physical_experts_metadata( + self, + num_physical_experts: int, + num_local_physical_experts: int, + ) -> None: + assert self.num_local_physical_experts == num_local_physical_experts + self.num_physical_experts = num_physical_experts + self.num_local_physical_experts = num_local_physical_experts + self.num_redundant_experts = (num_physical_experts - + self.num_logical_experts) + for layer in self.model.layers: + if hasattr(layer, "block_sparse_moe") and isinstance( + layer.block_sparse_moe, MixtralMoE): + moe = layer.block_sparse_moe + moe.n_local_physical_experts = num_local_physical_experts + moe.n_physical_experts = num_physical_experts + moe.n_redundant_experts = self.num_redundant_experts + moe.experts.update_expert_map() + def get_input_embeddings(self, input_ids: torch.Tensor) -> torch.Tensor: return self.model.get_input_embeddings(input_ids) From 03191cd8f0ffa0f37629518b19d9155260fd2483 Mon Sep 17 00:00:00 2001 From: Lukas Geiger Date: Wed, 17 Sep 2025 08:57:34 +0100 Subject: [PATCH 032/518] [Core][MultiModalHasher] Hash images without converting image mode (#24969) Signed-off-by: Lukas Geiger --- vllm/multimodal/hasher.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/vllm/multimodal/hasher.py b/vllm/multimodal/hasher.py index 0fb1363ce471..df6c531d876a 100644 --- a/vllm/multimodal/hasher.py +++ b/vllm/multimodal/hasher.py @@ -12,7 +12,6 @@ from PIL import Image from vllm.logger import init_logger -from vllm.multimodal.image import convert_image_mode logger = init_logger(__name__) @@ -35,8 +34,12 @@ def serialize_item(cls, obj: object) -> Iterable[Union[bytes, memoryview]]: exif[Image.ExifTags.Base.ImageID], uuid.UUID): # If the image has exif ImageID tag, use that return (exif[Image.ExifTags.Base.ImageID].bytes, ) - return cls.iter_item_to_bytes( - "image", np.asarray(convert_image_mode(obj, "RGBA"))) + data = {"mode": obj.mode, "data": np.asarray(obj)} + if obj.palette is not None: + data["palette"] = obj.palette.palette + if obj.palette.rawmode is not None: + data["palette_rawmode"] = obj.palette.rawmode + return cls.iter_item_to_bytes("image", data) if isinstance(obj, torch.Tensor): tensor_obj: torch.Tensor = obj.cpu() tensor_dtype = tensor_obj.dtype From 4a9375fe9dbde2e88fde268fab40f5d3a2b6a8ff Mon Sep 17 00:00:00 2001 From: whx <56632993+whx-sjtu@users.noreply.github.com> Date: Wed, 17 Sep 2025 16:01:27 +0800 Subject: [PATCH 033/518] [Model] Pass param prefix to LLMHead (#24862) Signed-off-by: whx-sjtu <2952154980@qq.com> --- vllm/model_executor/models/arctic.py | 1 + vllm/model_executor/models/aria.py | 1 + vllm/model_executor/models/baichuan.py | 6 ++++-- vllm/model_executor/models/bamba.py | 1 + vllm/model_executor/models/bloom.py | 4 +++- vllm/model_executor/models/chameleon.py | 1 + vllm/model_executor/models/dbrx.py | 1 + vllm/model_executor/models/deepseek.py | 9 ++++++--- vllm/model_executor/models/deepseek_eagle.py | 3 ++- vllm/model_executor/models/deepseek_v2.py | 9 ++++++--- vllm/model_executor/models/dots1.py | 4 +++- vllm/model_executor/models/ernie45_moe.py | 4 +++- vllm/model_executor/models/ernie45_vl_moe.py | 4 +++- vllm/model_executor/models/ernie_mtp.py | 3 ++- vllm/model_executor/models/exaone.py | 1 + vllm/model_executor/models/exaone4.py | 1 + vllm/model_executor/models/falcon.py | 1 + vllm/model_executor/models/falcon_h1.py | 1 + vllm/model_executor/models/glm4_moe.py | 4 +++- vllm/model_executor/models/gpt_bigcode.py | 3 ++- vllm/model_executor/models/gpt_j.py | 1 + vllm/model_executor/models/gpt_oss.py | 1 + vllm/model_executor/models/granite.py | 1 + vllm/model_executor/models/granitemoe.py | 1 + vllm/model_executor/models/hunyuan_v1.py | 3 ++- vllm/model_executor/models/idefics3.py | 1 + vllm/model_executor/models/jais.py | 4 +++- vllm/model_executor/models/jamba.py | 1 + vllm/model_executor/models/kimi_vl.py | 1 + vllm/model_executor/models/llama_eagle3.py | 2 +- vllm/model_executor/models/mamba.py | 1 + vllm/model_executor/models/mamba2.py | 1 + vllm/model_executor/models/medusa.py | 3 +++ vllm/model_executor/models/mimo_mtp.py | 3 ++- vllm/model_executor/models/minicpm.py | 1 + vllm/model_executor/models/minicpm_eagle.py | 1 + vllm/model_executor/models/minimax_text_01.py | 1 + vllm/model_executor/models/mixtral.py | 1 + vllm/model_executor/models/molmo.py | 1 + vllm/model_executor/models/nemotron.py | 1 + vllm/model_executor/models/nemotron_h.py | 1 + vllm/model_executor/models/olmo.py | 1 + vllm/model_executor/models/olmoe.py | 3 ++- vllm/model_executor/models/opt.py | 4 +++- vllm/model_executor/models/orion.py | 3 ++- vllm/model_executor/models/persimmon.py | 3 ++- vllm/model_executor/models/phi.py | 3 ++- vllm/model_executor/models/phi4flash.py | 1 + vllm/model_executor/models/phi4mm.py | 1 + vllm/model_executor/models/phimoe.py | 1 + vllm/model_executor/models/qwen.py | 3 ++- vllm/model_executor/models/qwen2_moe.py | 3 ++- vllm/model_executor/models/qwen3_moe.py | 3 ++- vllm/model_executor/models/qwen3_next.py | 2 +- vllm/model_executor/models/qwen3_next_mtp.py | 3 ++- vllm/model_executor/models/solar.py | 1 + vllm/model_executor/models/step3_text.py | 4 +++- vllm/model_executor/models/zamba2.py | 1 + 58 files changed, 102 insertions(+), 31 deletions(-) diff --git a/vllm/model_executor/models/arctic.py b/vllm/model_executor/models/arctic.py index c566611266af..b6dd55996841 100644 --- a/vllm/model_executor/models/arctic.py +++ b/vllm/model_executor/models/arctic.py @@ -427,6 +427,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): self.vocab_size, config.hidden_size, quant_config=quant_config, + prefix=maybe_prefix(prefix, "lm_head"), ) if self.config.tie_word_embeddings: self.lm_head.weight = self.model.embed_tokens.weight diff --git a/vllm/model_executor/models/aria.py b/vllm/model_executor/models/aria.py index db262447d7fa..a7cb6b35a4ab 100644 --- a/vllm/model_executor/models/aria.py +++ b/vllm/model_executor/models/aria.py @@ -539,6 +539,7 @@ def __init__( config.text_config.hidden_size, org_num_embeddings=self.language_model.org_vocab_size, quant_config=quant_config, + prefix=maybe_prefix(prefix, "lm_head"), ) logit_scale = getattr(config, "logit_scale", 1.0) self.logits_processor = LogitsProcessor(self.unpadded_vocab_size, diff --git a/vllm/model_executor/models/baichuan.py b/vllm/model_executor/models/baichuan.py index 4563c356666a..ae2503341040 100644 --- a/vllm/model_executor/models/baichuan.py +++ b/vllm/model_executor/models/baichuan.py @@ -51,7 +51,8 @@ from .interfaces import SupportsLoRA, SupportsPP, SupportsQuant from .utils import (AutoWeightsLoader, is_pp_missing_parameter, - make_empty_intermediate_tensors_factory, make_layers) + make_empty_intermediate_tensors_factory, make_layers, + maybe_prefix) def _get_alibi_slopes(total_num_heads: int) -> torch.Tensor: @@ -394,7 +395,8 @@ def __init__( position_embedding=position_embedding) self.lm_head = ParallelLMHead(config.vocab_size, config.hidden_size, - quant_config=quant_config) + quant_config=quant_config, + prefix=maybe_prefix(prefix, "lm_head")) self.lm_head.weight.weight_loader = self.lm_head_weight_loader if self.config.tie_word_embeddings: self.lm_head.weight = self.model.embed_tokens.weight diff --git a/vllm/model_executor/models/bamba.py b/vllm/model_executor/models/bamba.py index a72bbdebe531..397089f31cdf 100644 --- a/vllm/model_executor/models/bamba.py +++ b/vllm/model_executor/models/bamba.py @@ -514,6 +514,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): # We need bigger padding if using lora for kernel # compatibility if not lora_config else lora_config.lora_vocab_padding_size, + prefix=maybe_prefix(prefix, "lm_head"), ) # Used to track and store by the Mamba cache between steps. self.mamba_cache: Optional[MambaCacheManager] = None diff --git a/vllm/model_executor/models/bloom.py b/vllm/model_executor/models/bloom.py index f8ed92314c3d..4c37622b049c 100644 --- a/vllm/model_executor/models/bloom.py +++ b/vllm/model_executor/models/bloom.py @@ -330,7 +330,9 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): self.lm_head = self.transformer.word_embeddings else: self.lm_head = ParallelLMHead(self.config.vocab_size, - self.config.hidden_size) + self.config.hidden_size, + prefix=maybe_prefix( + prefix, "lm_head")) self.logits_processor = LogitsProcessor(config.vocab_size) self.make_empty_intermediate_tensors = ( diff --git a/vllm/model_executor/models/chameleon.py b/vllm/model_executor/models/chameleon.py index 28a1a66c2329..7a5623648374 100644 --- a/vllm/model_executor/models/chameleon.py +++ b/vllm/model_executor/models/chameleon.py @@ -960,6 +960,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): self.lm_head = ParallelLMHead( self.unpadded_vocab_size, config.hidden_size, + prefix=maybe_prefix(prefix, "lm_head"), ) if config.tie_word_embeddings: self.lm_head.weight = self.model.embed_tokens.weight diff --git a/vllm/model_executor/models/dbrx.py b/vllm/model_executor/models/dbrx.py index 519cd522213b..003cf4563a22 100644 --- a/vllm/model_executor/models/dbrx.py +++ b/vllm/model_executor/models/dbrx.py @@ -438,6 +438,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): org_num_embeddings=config.vocab_size, padding_size=DEFAULT_VOCAB_PADDING_SIZE, quant_config=quant_config, + prefix=maybe_prefix(prefix, "lm_head"), ) self.logits_processor = LogitsProcessor(self.unpadded_vocab_size, config.vocab_size) diff --git a/vllm/model_executor/models/deepseek.py b/vllm/model_executor/models/deepseek.py index 3f9349d766df..4395b11b7d0f 100644 --- a/vllm/model_executor/models/deepseek.py +++ b/vllm/model_executor/models/deepseek.py @@ -453,9 +453,12 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): self.quant_config = quant_config self.model = DeepseekModel(vllm_config=vllm_config, prefix=maybe_prefix(prefix, "model")) - self.lm_head = ParallelLMHead(config.vocab_size, - config.hidden_size, - quant_config=quant_config) + self.lm_head = ParallelLMHead( + config.vocab_size, + config.hidden_size, + quant_config=quant_config, + prefix=maybe_prefix(prefix, "lm_head"), + ) if self.config.tie_word_embeddings: self.lm_head.weight = self.model.embed_tokens.weight self.logits_processor = LogitsProcessor(config.vocab_size) diff --git a/vllm/model_executor/models/deepseek_eagle.py b/vllm/model_executor/models/deepseek_eagle.py index 5e8447a7f48f..b1d7f24c2f18 100644 --- a/vllm/model_executor/models/deepseek_eagle.py +++ b/vllm/model_executor/models/deepseek_eagle.py @@ -199,7 +199,8 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): self.lm_head = ParallelLMHead(self.config.vocab_size, self.config.hidden_size, - quant_config=quant_config) + quant_config=quant_config, + prefix=maybe_prefix(prefix, "lm_head")) logit_scale = getattr(self.config, "logit_scale", 1.0) self.logits_processor = LogitsProcessor(self.config.vocab_size, diff --git a/vllm/model_executor/models/deepseek_v2.py b/vllm/model_executor/models/deepseek_v2.py index e4a21febc5bd..636554bd648f 100644 --- a/vllm/model_executor/models/deepseek_v2.py +++ b/vllm/model_executor/models/deepseek_v2.py @@ -823,9 +823,12 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): self.model = DeepseekV2Model(vllm_config=vllm_config, prefix=maybe_prefix(prefix, "model")) if get_pp_group().is_last_rank: - self.lm_head = ParallelLMHead(config.vocab_size, - config.hidden_size, - quant_config=quant_config) + self.lm_head = ParallelLMHead( + config.vocab_size, + config.hidden_size, + quant_config=quant_config, + prefix=maybe_prefix(prefix, "lm_head"), + ) else: self.lm_head = PPMissingLayer() self.logits_processor = LogitsProcessor(config.vocab_size) diff --git a/vllm/model_executor/models/dots1.py b/vllm/model_executor/models/dots1.py index 4ddf906dddef..20555e48b73d 100644 --- a/vllm/model_executor/models/dots1.py +++ b/vllm/model_executor/models/dots1.py @@ -504,7 +504,9 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): if get_pp_group().is_last_rank: self.lm_head = ParallelLMHead(config.vocab_size, config.hidden_size, - quant_config=quant_config) + quant_config=quant_config, + prefix=maybe_prefix( + prefix, "lm_head")) else: self.lm_head = PPMissingLayer() self.logits_processor = LogitsProcessor(config.vocab_size) diff --git a/vllm/model_executor/models/ernie45_moe.py b/vllm/model_executor/models/ernie45_moe.py index 33ec27fc630e..ebab018ed67e 100644 --- a/vllm/model_executor/models/ernie45_moe.py +++ b/vllm/model_executor/models/ernie45_moe.py @@ -562,7 +562,9 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): if get_pp_group().is_last_rank: self.lm_head = ParallelLMHead(config.vocab_size, config.hidden_size, - quant_config=quant_config) + quant_config=quant_config, + prefix=maybe_prefix( + prefix, "lm_head")) else: self.lm_head = PPMissingLayer() diff --git a/vllm/model_executor/models/ernie45_vl_moe.py b/vllm/model_executor/models/ernie45_vl_moe.py index 6034505fa7d6..7f791852ceb9 100644 --- a/vllm/model_executor/models/ernie45_vl_moe.py +++ b/vllm/model_executor/models/ernie45_vl_moe.py @@ -557,7 +557,9 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): if get_pp_group().is_last_rank: self.lm_head = ParallelLMHead(config.vocab_size, config.hidden_size, - quant_config=quant_config) + quant_config=quant_config, + prefix=maybe_prefix( + prefix, "lm_head")) else: self.lm_head = PPMissingLayer() diff --git a/vllm/model_executor/models/ernie_mtp.py b/vllm/model_executor/models/ernie_mtp.py index 90a1267b28f0..57c534887437 100644 --- a/vllm/model_executor/models/ernie_mtp.py +++ b/vllm/model_executor/models/ernie_mtp.py @@ -158,7 +158,8 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): prefix=maybe_prefix( prefix, "model")) self.lm_head = ParallelLMHead(self.config.vocab_size, - self.config.hidden_size) + self.config.hidden_size, + prefix=maybe_prefix(prefix, "lm_head")) self.sampler = get_sampler() if self.config.tie_word_embeddings: diff --git a/vllm/model_executor/models/exaone.py b/vllm/model_executor/models/exaone.py index 942db0143a45..f503fb0f9364 100644 --- a/vllm/model_executor/models/exaone.py +++ b/vllm/model_executor/models/exaone.py @@ -502,6 +502,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): # compatibility if not lora_config else lora_config.lora_vocab_padding_size, quant_config=quant_config, + prefix=maybe_prefix(prefix, "lm_head"), ) if config.tie_word_embeddings: self.lm_head.weight = self.transformer.wte.weight diff --git a/vllm/model_executor/models/exaone4.py b/vllm/model_executor/models/exaone4.py index e94c43a47f76..9f7d57d93814 100644 --- a/vllm/model_executor/models/exaone4.py +++ b/vllm/model_executor/models/exaone4.py @@ -485,6 +485,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): # compatibility if not lora_config else lora_config.lora_vocab_padding_size, quant_config=quant_config, + prefix=maybe_prefix(prefix, "lm_head"), ) if config.tie_word_embeddings: self.lm_head.weight = self.model.embed_tokens.weight diff --git a/vllm/model_executor/models/falcon.py b/vllm/model_executor/models/falcon.py index a9fe0924babd..42c378e5c389 100644 --- a/vllm/model_executor/models/falcon.py +++ b/vllm/model_executor/models/falcon.py @@ -473,6 +473,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): config.vocab_size, config.hidden_size, quant_config=quant_config, + prefix=maybe_prefix(prefix, "lm_head"), ) self.logits_processor = LogitsProcessor(config.vocab_size) self.make_empty_intermediate_tensors = ( diff --git a/vllm/model_executor/models/falcon_h1.py b/vllm/model_executor/models/falcon_h1.py index 5e2b6d69124c..757051b3b144 100644 --- a/vllm/model_executor/models/falcon_h1.py +++ b/vllm/model_executor/models/falcon_h1.py @@ -607,6 +607,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): # compatibility if not lora_config else lora_config.lora_vocab_padding_size), + prefix=maybe_prefix(prefix, "lm_head"), ) self.lm_head_multiplier = config.lm_head_multiplier if self.tie_word_embeddings: diff --git a/vllm/model_executor/models/glm4_moe.py b/vllm/model_executor/models/glm4_moe.py index 1fb457609289..e7d967edaf24 100644 --- a/vllm/model_executor/models/glm4_moe.py +++ b/vllm/model_executor/models/glm4_moe.py @@ -608,7 +608,9 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): if get_pp_group().is_last_rank: self.lm_head = ParallelLMHead(config.vocab_size, config.hidden_size, - quant_config=quant_config) + quant_config=quant_config, + prefix=maybe_prefix( + prefix, "lm_head")) else: self.lm_head = PPMissingLayer() self.logits_processor = LogitsProcessor(config.vocab_size) diff --git a/vllm/model_executor/models/gpt_bigcode.py b/vllm/model_executor/models/gpt_bigcode.py index d5c2604145ee..745d0b775999 100644 --- a/vllm/model_executor/models/gpt_bigcode.py +++ b/vllm/model_executor/models/gpt_bigcode.py @@ -302,7 +302,8 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): self.lm_head = ParallelLMHead( self.transformer.vocab_size, self.transformer.embed_dim, - org_num_embeddings=self.config.vocab_size) + org_num_embeddings=self.config.vocab_size, + prefix=maybe_prefix(prefix, "lm_head")) self.unpadded_vocab_size = config.vocab_size if lora_config: self.unpadded_vocab_size += lora_config.lora_extra_vocab_size diff --git a/vllm/model_executor/models/gpt_j.py b/vllm/model_executor/models/gpt_j.py index 584c7f5d8a2d..77df6ae6f30c 100644 --- a/vllm/model_executor/models/gpt_j.py +++ b/vllm/model_executor/models/gpt_j.py @@ -306,6 +306,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): config.n_embd, bias=True, quant_config=quant_config, + prefix=maybe_prefix(prefix, "lm_head"), ) self.logits_processor = LogitsProcessor(config.vocab_size) self.make_empty_intermediate_tensors = ( diff --git a/vllm/model_executor/models/gpt_oss.py b/vllm/model_executor/models/gpt_oss.py index e0b4df772875..990a1d6d883a 100644 --- a/vllm/model_executor/models/gpt_oss.py +++ b/vllm/model_executor/models/gpt_oss.py @@ -655,6 +655,7 @@ def __init__( self.lm_head = ParallelLMHead( self.config.vocab_size, self.config.hidden_size, + prefix=maybe_prefix(prefix, "lm_head"), ) self.logits_processor = LogitsProcessor(self.config.vocab_size) self.make_empty_intermediate_tensors = ( diff --git a/vllm/model_executor/models/granite.py b/vllm/model_executor/models/granite.py index f8ba0229210a..4f9cc2532bd8 100644 --- a/vllm/model_executor/models/granite.py +++ b/vllm/model_executor/models/granite.py @@ -434,6 +434,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): # compatibility if not lora_config else lora_config.lora_vocab_padding_size, quant_config=quant_config, + prefix=maybe_prefix(prefix, "lm_head"), ) if config.tie_word_embeddings: self.lm_head.weight = self.model.embed_tokens.weight diff --git a/vllm/model_executor/models/granitemoe.py b/vllm/model_executor/models/granitemoe.py index 07ad75bcf166..da16c72000c0 100644 --- a/vllm/model_executor/models/granitemoe.py +++ b/vllm/model_executor/models/granitemoe.py @@ -487,6 +487,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): # compatibility if not lora_config else lora_config.lora_vocab_padding_size, quant_config=quant_config, + prefix=maybe_prefix(prefix, "lm_head"), ) if config.tie_word_embeddings: self.lm_head.weight = self.model.embed_tokens.weight diff --git a/vllm/model_executor/models/hunyuan_v1.py b/vllm/model_executor/models/hunyuan_v1.py index a74a44bc2b51..db054b5c537e 100644 --- a/vllm/model_executor/models/hunyuan_v1.py +++ b/vllm/model_executor/models/hunyuan_v1.py @@ -58,7 +58,7 @@ from .interfaces import SupportsLoRA, SupportsPP from .utils import (AutoWeightsLoader, PPMissingLayer, is_pp_missing_parameter, - make_layers) + make_layers, maybe_prefix) def _is_moe(config: PretrainedConfig) -> bool: @@ -871,6 +871,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): org_num_embeddings=config.vocab_size, padding_size=DEFAULT_VOCAB_PADDING_SIZE, quant_config=quant_config, + prefix=maybe_prefix(prefix, "lm_head"), ) if config.tie_word_embeddings: self.lm_head.weight = self.model.embed_tokens.weight diff --git a/vllm/model_executor/models/idefics3.py b/vllm/model_executor/models/idefics3.py index 63307470d959..9153a0e2c1e5 100644 --- a/vllm/model_executor/models/idefics3.py +++ b/vllm/model_executor/models/idefics3.py @@ -606,6 +606,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): config.text_config.vocab_size, config.text_config.hidden_size, quant_config=quant_config, + prefix=maybe_prefix(prefix, "lm_head"), ) if self.config.text_config.tie_word_embeddings: self.lm_head.weight = self.model.text_model.wte.weight diff --git a/vllm/model_executor/models/jais.py b/vllm/model_executor/models/jais.py index 91a06dd50247..4fee8c32fd58 100644 --- a/vllm/model_executor/models/jais.py +++ b/vllm/model_executor/models/jais.py @@ -302,7 +302,9 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): self.lm_head = self.transformer.wte else: self.lm_head = ParallelLMHead(self.config.vocab_size, - self.config.hidden_size) + self.config.hidden_size, + prefix=maybe_prefix( + prefix, "lm_head")) if hasattr(config, "width_scale"): self.output_logits_scale = config.width_scale else: diff --git a/vllm/model_executor/models/jamba.py b/vllm/model_executor/models/jamba.py index 550fde17b6c5..5b8fbc722686 100644 --- a/vllm/model_executor/models/jamba.py +++ b/vllm/model_executor/models/jamba.py @@ -502,6 +502,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): # We need bigger padding if using lora for kernel # compatibility if not lora_config else lora_config.lora_vocab_padding_size, + prefix=maybe_prefix(prefix, "lm_head"), ) # Used to track and store by the Mamba cache between steps. self.mamba_cache: Optional[MambaCacheManager] = None diff --git a/vllm/model_executor/models/kimi_vl.py b/vllm/model_executor/models/kimi_vl.py index 4f76d4afdb20..94a5933a6141 100644 --- a/vllm/model_executor/models/kimi_vl.py +++ b/vllm/model_executor/models/kimi_vl.py @@ -328,6 +328,7 @@ def __init__( config.text_config.hidden_size, org_num_embeddings=self.config.text_config.vocab_size, padding_size=DEFAULT_VOCAB_PADDING_SIZE, + prefix=maybe_prefix(prefix, "lm_head"), ) else: self.lm_head = PPMissingLayer() diff --git a/vllm/model_executor/models/llama_eagle3.py b/vllm/model_executor/models/llama_eagle3.py index 99b77729b501..7027138dfcb1 100644 --- a/vllm/model_executor/models/llama_eagle3.py +++ b/vllm/model_executor/models/llama_eagle3.py @@ -220,7 +220,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): self.config.hidden_size, org_num_embeddings=self.config.draft_vocab_size, padding_size=(DEFAULT_VOCAB_PADDING_SIZE), - prefix="") + prefix=maybe_prefix(prefix, "lm_head")) self.logits_processor = LogitsProcessor(self.config.draft_vocab_size, scale=logit_scale) self.draft_id_to_target_id = nn.Parameter( diff --git a/vllm/model_executor/models/mamba.py b/vllm/model_executor/models/mamba.py index f02499a4f96b..9d1017dac8aa 100644 --- a/vllm/model_executor/models/mamba.py +++ b/vllm/model_executor/models/mamba.py @@ -223,6 +223,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): # We need bigger padding if using lora for kernel # compatibility if not lora_config else lora_config.lora_vocab_padding_size, + prefix=maybe_prefix(prefix, "lm_head"), ) # Used to track and store by the Mamba cache between steps. diff --git a/vllm/model_executor/models/mamba2.py b/vllm/model_executor/models/mamba2.py index 81b9a125380a..b1a4138cb8f6 100644 --- a/vllm/model_executor/models/mamba2.py +++ b/vllm/model_executor/models/mamba2.py @@ -278,6 +278,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): # We need bigger padding if using lora for kernel # compatibility if not lora_config else lora_config.lora_vocab_padding_size, + prefix=maybe_prefix(prefix, "lm_head"), ) if config.tie_word_embeddings: self.lm_head = self.lm_head.tie_weights(self.backbone.embeddings) diff --git a/vllm/model_executor/models/medusa.py b/vllm/model_executor/models/medusa.py index 709a5a993c6f..6ba8ad372c95 100644 --- a/vllm/model_executor/models/medusa.py +++ b/vllm/model_executor/models/medusa.py @@ -15,6 +15,8 @@ from vllm.model_executor.model_loader.weight_utils import default_weight_loader from vllm.model_executor.sampling_metadata import SamplingMetadata +from .utils import maybe_prefix + class ResidualBlock(nn.Module): @@ -71,6 +73,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = "") -> None: config.hidden_size, org_num_embeddings=self.truncated_vocab_size, padding_size=DEFAULT_VOCAB_PADDING_SIZE, + prefix=maybe_prefix(prefix, "lm_head"), ) self.lm_heads = [ self.lm_head for _ in range(self.config.num_heads) diff --git a/vllm/model_executor/models/mimo_mtp.py b/vllm/model_executor/models/mimo_mtp.py index 5a2079bf5121..ac835edc001e 100644 --- a/vllm/model_executor/models/mimo_mtp.py +++ b/vllm/model_executor/models/mimo_mtp.py @@ -158,7 +158,8 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): prefix=maybe_prefix( prefix, "model")) self.lm_head = ParallelLMHead(self.config.vocab_size, - self.config.hidden_size) + self.config.hidden_size, + prefix=maybe_prefix(prefix, "lm_head")) def forward( self, diff --git a/vllm/model_executor/models/minicpm.py b/vllm/model_executor/models/minicpm.py index 5632f8c8cc4f..c7be7f76dba1 100644 --- a/vllm/model_executor/models/minicpm.py +++ b/vllm/model_executor/models/minicpm.py @@ -547,6 +547,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): # compatibility if not lora_config else lora_config.lora_vocab_padding_size, quant_config=quant_config, + prefix=maybe_prefix(prefix, "lm_head"), ) if config.tie_word_embeddings: self.lm_head = self.lm_head.tie_weights(self.model.embed_tokens) diff --git a/vllm/model_executor/models/minicpm_eagle.py b/vllm/model_executor/models/minicpm_eagle.py index 06c2eb4e80af..848a97b8bb2a 100644 --- a/vllm/model_executor/models/minicpm_eagle.py +++ b/vllm/model_executor/models/minicpm_eagle.py @@ -338,6 +338,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): # compatibility if not lora_config else lora_config.lora_vocab_padding_size, quant_config=quant_config, + prefix=maybe_prefix(prefix, "lm_head"), ) if config.tie_word_embeddings: self.lm_head = self.lm_head.tie_weights(self.model.embed_tokens) diff --git a/vllm/model_executor/models/minimax_text_01.py b/vllm/model_executor/models/minimax_text_01.py index ef1fe86c5b5c..6ce883be0a83 100644 --- a/vllm/model_executor/models/minimax_text_01.py +++ b/vllm/model_executor/models/minimax_text_01.py @@ -702,6 +702,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = "") -> None: self.config.hidden_size, org_num_embeddings=self.config.vocab_size, padding_size=DEFAULT_VOCAB_PADDING_SIZE, + prefix=maybe_prefix(prefix, "lm_head"), ) self.logits_processor = LogitsProcessor(self.unpadded_vocab_size, diff --git a/vllm/model_executor/models/mixtral.py b/vllm/model_executor/models/mixtral.py index b02030b6d627..8b3474d80953 100644 --- a/vllm/model_executor/models/mixtral.py +++ b/vllm/model_executor/models/mixtral.py @@ -507,6 +507,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): # compatibility if not lora_config else lora_config.lora_vocab_padding_size, quant_config=quant_config, + prefix=maybe_prefix(prefix, "lm_head"), ) if self.config.tie_word_embeddings: self.lm_head.weight = self.model.embed_tokens.weight diff --git a/vllm/model_executor/models/molmo.py b/vllm/model_executor/models/molmo.py index 5d999a02b4e6..2475fe131609 100644 --- a/vllm/model_executor/models/molmo.py +++ b/vllm/model_executor/models/molmo.py @@ -1403,6 +1403,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): config.embedding_size or config.vocab_size, config.hidden_size, quant_config=quant_config, + prefix=maybe_prefix(prefix, "lm_head"), ) self.logits_processor = LogitsProcessor(config.embedding_size diff --git a/vllm/model_executor/models/nemotron.py b/vllm/model_executor/models/nemotron.py index 10adc62d3de3..21f785e4b91a 100644 --- a/vllm/model_executor/models/nemotron.py +++ b/vllm/model_executor/models/nemotron.py @@ -466,6 +466,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): # compatibility if not lora_config else lora_config.lora_vocab_padding_size, quant_config=quant_config, + prefix=maybe_prefix(prefix, "lm_head"), ) if config.tie_word_embeddings: self.lm_head.weight = self.model.embed_tokens.weight diff --git a/vllm/model_executor/models/nemotron_h.py b/vllm/model_executor/models/nemotron_h.py index da8628df1fe5..1e1f0524bd06 100644 --- a/vllm/model_executor/models/nemotron_h.py +++ b/vllm/model_executor/models/nemotron_h.py @@ -565,6 +565,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): # We need bigger padding if using lora for kernel # compatibility if not lora_config else lora_config.lora_vocab_padding_size, + prefix=maybe_prefix(prefix, "lm_head"), ) # Used to track and store by the Mamba cache between steps. self.mamba_cache: Optional[MambaCacheManager] = None diff --git a/vllm/model_executor/models/olmo.py b/vllm/model_executor/models/olmo.py index 71575989565a..7be3c16528b5 100644 --- a/vllm/model_executor/models/olmo.py +++ b/vllm/model_executor/models/olmo.py @@ -364,6 +364,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): config.hidden_size, org_num_embeddings=config.vocab_size, quant_config=quant_config, + prefix=maybe_prefix(prefix, "lm_head"), ) self.logits_processor = LogitsProcessor(config.vocab_size) self.make_empty_intermediate_tensors = ( diff --git a/vllm/model_executor/models/olmoe.py b/vllm/model_executor/models/olmoe.py index 9b8525bfadec..892e967e4a21 100644 --- a/vllm/model_executor/models/olmoe.py +++ b/vllm/model_executor/models/olmoe.py @@ -450,7 +450,8 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): prefix=maybe_prefix(prefix, "model")) self.lm_head = ParallelLMHead(config.vocab_size, config.hidden_size, - quant_config=quant_config) + quant_config=quant_config, + prefix=maybe_prefix(prefix, "lm_head")) self.logits_processor = LogitsProcessor(config.vocab_size) self.make_empty_intermediate_tensors = ( diff --git a/vllm/model_executor/models/opt.py b/vllm/model_executor/models/opt.py index b92e586f0bf2..365aab205b21 100644 --- a/vllm/model_executor/models/opt.py +++ b/vllm/model_executor/models/opt.py @@ -375,7 +375,9 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): self.lm_head = self.model.decoder.embed_tokens else: self.lm_head = ParallelLMHead(config.vocab_size, - config.word_embed_proj_dim) + config.word_embed_proj_dim, + prefix=maybe_prefix( + prefix, "lm_head")) self.logits_processor = LogitsProcessor(config.vocab_size) self.make_empty_intermediate_tensors = ( self.model.make_empty_intermediate_tensors) diff --git a/vllm/model_executor/models/orion.py b/vllm/model_executor/models/orion.py index add751ebf09c..944a9151d75d 100644 --- a/vllm/model_executor/models/orion.py +++ b/vllm/model_executor/models/orion.py @@ -314,7 +314,8 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): prefix=maybe_prefix(prefix, "model")) self.lm_head = ParallelLMHead(config.vocab_size, config.hidden_size, - quant_config=quant_config) + quant_config=quant_config, + prefix=maybe_prefix(prefix, "lm_head")) if self.config.tie_word_embeddings: self.lm_head.weight = self.model.embed_tokens.weight self.logits_processor = LogitsProcessor(config.vocab_size) diff --git a/vllm/model_executor/models/persimmon.py b/vllm/model_executor/models/persimmon.py index 6bdd38d06880..3e854e4d561f 100644 --- a/vllm/model_executor/models/persimmon.py +++ b/vllm/model_executor/models/persimmon.py @@ -307,7 +307,8 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): prefix=maybe_prefix(prefix, "model")) self.lm_head = ParallelLMHead(config.vocab_size, config.hidden_size, - bias=False) + bias=False, + prefix=maybe_prefix(prefix, "lm_head")) self.logits_processor = LogitsProcessor(config.vocab_size) self.make_empty_intermediate_tensors = ( self.model.make_empty_intermediate_tensors) diff --git a/vllm/model_executor/models/phi.py b/vllm/model_executor/models/phi.py index 789b24eb0f6b..6f39afbecf35 100644 --- a/vllm/model_executor/models/phi.py +++ b/vllm/model_executor/models/phi.py @@ -322,7 +322,8 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): self.lm_head = ParallelLMHead(config.vocab_size, config.hidden_size, bias=True, - quant_config=quant_config) + quant_config=quant_config, + prefix=maybe_prefix(prefix, "lm_head")) self.logits_processor = LogitsProcessor(config.vocab_size) self.make_empty_intermediate_tensors = ( self.model.make_empty_intermediate_tensors) diff --git a/vllm/model_executor/models/phi4flash.py b/vllm/model_executor/models/phi4flash.py index fcdfcb7bc160..c4548ee168bd 100644 --- a/vllm/model_executor/models/phi4flash.py +++ b/vllm/model_executor/models/phi4flash.py @@ -630,6 +630,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): # compatibility if not lora_config else lora_config.lora_vocab_padding_size), quant_config=quant_config, + prefix=maybe_prefix(prefix, "lm_head"), ) self.embedding_bias = None # Used to track and store by the Mamba cache between steps. diff --git a/vllm/model_executor/models/phi4mm.py b/vllm/model_executor/models/phi4mm.py index 46963828186c..b3fc55dab6ec 100644 --- a/vllm/model_executor/models/phi4mm.py +++ b/vllm/model_executor/models/phi4mm.py @@ -989,6 +989,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): org_num_embeddings=config.vocab_size, padding_size=DEFAULT_VOCAB_PADDING_SIZE, quant_config=quant_config, + prefix=maybe_prefix(prefix, "lm_head"), ) if config.tie_word_embeddings: self.lm_head = self.lm_head.tie_weights(self.model.embed_tokens) diff --git a/vllm/model_executor/models/phimoe.py b/vllm/model_executor/models/phimoe.py index 15ae081a9f5f..01d16f1f2c38 100644 --- a/vllm/model_executor/models/phimoe.py +++ b/vllm/model_executor/models/phimoe.py @@ -645,6 +645,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): if not lora_config else lora_config.lora_vocab_padding_size), quant_config=None, bias=True, + prefix=maybe_prefix(prefix, "lm_head"), ) self.logits_processor = LogitsProcessor(self.unpadded_vocab_size, config.vocab_size) diff --git a/vllm/model_executor/models/qwen.py b/vllm/model_executor/models/qwen.py index e32dc51f00c0..747094849900 100644 --- a/vllm/model_executor/models/qwen.py +++ b/vllm/model_executor/models/qwen.py @@ -271,7 +271,8 @@ def __init__( prefix, "transformer")) self.lm_head = ParallelLMHead(config.vocab_size, config.hidden_size, - quant_config=quant_config) + quant_config=quant_config, + prefix=maybe_prefix(prefix, "lm_head")) if self.config.tie_word_embeddings: self.lm_head.weight = self.transformer.wte.weight self.logits_processor = LogitsProcessor(config.vocab_size) diff --git a/vllm/model_executor/models/qwen2_moe.py b/vllm/model_executor/models/qwen2_moe.py index 5551ad8c3232..5e6dea67c940 100644 --- a/vllm/model_executor/models/qwen2_moe.py +++ b/vllm/model_executor/models/qwen2_moe.py @@ -519,7 +519,8 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): prefix=maybe_prefix(prefix, "model")) self.lm_head = ParallelLMHead(config.vocab_size, config.hidden_size, - quant_config=quant_config) + quant_config=quant_config, + prefix=maybe_prefix(prefix, "lm_head")) if self.config.tie_word_embeddings: self.lm_head.weight = self.model.embed_tokens.weight self.logits_processor = LogitsProcessor(config.vocab_size) diff --git a/vllm/model_executor/models/qwen3_moe.py b/vllm/model_executor/models/qwen3_moe.py index 0a504d90cde1..f66e8b0b454b 100644 --- a/vllm/model_executor/models/qwen3_moe.py +++ b/vllm/model_executor/models/qwen3_moe.py @@ -605,7 +605,8 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): prefix=maybe_prefix(prefix, "model")) self.lm_head = ParallelLMHead(config.vocab_size, config.hidden_size, - quant_config=quant_config) + quant_config=quant_config, + prefix=maybe_prefix(prefix, "lm_head")) if self.config.tie_word_embeddings: self.lm_head.weight = self.model.embed_tokens.weight self.logits_processor = LogitsProcessor(config.vocab_size) diff --git a/vllm/model_executor/models/qwen3_next.py b/vllm/model_executor/models/qwen3_next.py index 86e26da5b9b8..3c5407916c0b 100644 --- a/vllm/model_executor/models/qwen3_next.py +++ b/vllm/model_executor/models/qwen3_next.py @@ -1089,7 +1089,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): # We need bigger padding if using lora for kernel # compatibility if not lora_config else lora_config.lora_vocab_padding_size, - ) + prefix=maybe_prefix(prefix, "lm_head")) self.logits_processor = LogitsProcessor(self.unpadded_vocab_size, config.vocab_size) self.make_empty_intermediate_tensors = ( diff --git a/vllm/model_executor/models/qwen3_next_mtp.py b/vllm/model_executor/models/qwen3_next_mtp.py index e7aff377e9ae..190a1750e673 100644 --- a/vllm/model_executor/models/qwen3_next_mtp.py +++ b/vllm/model_executor/models/qwen3_next_mtp.py @@ -238,7 +238,8 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): self.lm_head = ParallelLMHead(self.unpadded_vocab_size, config.hidden_size, org_num_embeddings=config.vocab_size, - padding_size=DEFAULT_VOCAB_PADDING_SIZE) + padding_size=DEFAULT_VOCAB_PADDING_SIZE, + prefix=maybe_prefix(prefix, "lm_head")) self.logits_processor = LogitsProcessor(self.unpadded_vocab_size, config.vocab_size) self.make_empty_intermediate_tensors = ( diff --git a/vllm/model_executor/models/solar.py b/vllm/model_executor/models/solar.py index 8dd52f1d204a..94c862258b7a 100644 --- a/vllm/model_executor/models/solar.py +++ b/vllm/model_executor/models/solar.py @@ -469,6 +469,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): # compatibility if not lora_config else lora_config.lora_vocab_padding_size, quant_config=quant_config, + prefix=maybe_prefix(prefix, "lm_head"), ) if config.tie_word_embeddings: self.lm_head.weight = self.model.embed_tokens.weight diff --git a/vllm/model_executor/models/step3_text.py b/vllm/model_executor/models/step3_text.py index 97611d3e140e..b8733fa5e612 100644 --- a/vllm/model_executor/models/step3_text.py +++ b/vllm/model_executor/models/step3_text.py @@ -35,7 +35,8 @@ from .interfaces import SupportsPP from .utils import (PPMissingLayer, is_pp_missing_parameter, - make_empty_intermediate_tensors_factory, make_layers) + make_empty_intermediate_tensors_factory, make_layers, + maybe_prefix) logger = init_logger(__name__) @@ -386,6 +387,7 @@ def __init__( org_num_embeddings=config.vocab_size, padding_size=DEFAULT_VOCAB_PADDING_SIZE if not lora_config else lora_config.lora_vocab_padding_size, + prefix=maybe_prefix(prefix, "lm_head"), ) self.logits_processor = LogitsProcessor(self.unpadded_vocab_size, config.vocab_size) diff --git a/vllm/model_executor/models/zamba2.py b/vllm/model_executor/models/zamba2.py index 86335d48c145..e601bc3adb6e 100644 --- a/vllm/model_executor/models/zamba2.py +++ b/vllm/model_executor/models/zamba2.py @@ -941,6 +941,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = "") -> None: # We need bigger padding if using lora for kernel # compatibility if not lora_config else lora_config.lora_vocab_padding_size, + prefix=maybe_prefix(prefix, "lm_head"), ) # Tie weights with input embeddings if using same dimensions self.lm_head = self.lm_head.tie_weights(self.model.embed_tokens) From c15309a730fae1c2ad8670f14e8971a9b8accdcf Mon Sep 17 00:00:00 2001 From: whx <56632993+whx-sjtu@users.noreply.github.com> Date: Wed, 17 Sep 2025 16:02:31 +0800 Subject: [PATCH 034/518] [Model] Apply SharedFusedMoE to glm4_moe. (#24849) Signed-off-by: whx-sjtu <2952154980@qq.com> --- vllm/model_executor/models/glm4_moe.py | 85 +++++++++++++++++--------- 1 file changed, 55 insertions(+), 30 deletions(-) diff --git a/vllm/model_executor/models/glm4_moe.py b/vllm/model_executor/models/glm4_moe.py index e7d967edaf24..1acbd18091fb 100644 --- a/vllm/model_executor/models/glm4_moe.py +++ b/vllm/model_executor/models/glm4_moe.py @@ -46,6 +46,7 @@ from vllm.model_executor.layers.logits_processor import LogitsProcessor from vllm.model_executor.layers.quantization import QuantizationConfig from vllm.model_executor.layers.rotary_embedding import get_rope +from vllm.model_executor.layers.shared_fused_moe import SharedFusedMoE from vllm.model_executor.layers.vocab_parallel_embedding import ( ParallelLMHead, VocabParallelEmbedding) from vllm.model_executor.model_loader.weight_utils import ( @@ -146,25 +147,6 @@ def __init__( self.physical_expert_end = (self.physical_expert_start + self.n_local_physical_experts) - self.experts = FusedMoE( - num_experts=config.n_routed_experts, - top_k=config.num_experts_per_tok, - hidden_size=config.hidden_size, - intermediate_size=config.moe_intermediate_size, - reduce_results=False, - renormalize=config.norm_topk_prob, - quant_config=quant_config, - use_grouped_topk=True, - num_expert_group=config.n_group, - topk_group=config.topk_group, - prefix=f"{prefix}.experts", - scoring_func="sigmoid", - # we do scaling outside, set factor to 1.0 to avoid double mul - routed_scaling_factor=1.0, - e_score_correction_bias=self.gate.e_score_correction_bias, - enable_eplb=self.enable_eplb, - num_redundant_experts=self.n_redundant_experts) - if config.n_shared_experts is not None: intermediate_size = (config.moe_intermediate_size * config.n_shared_experts) @@ -173,25 +155,68 @@ def __init__( intermediate_size=intermediate_size, hidden_act=config.hidden_act, quant_config=quant_config, - reduce_results=self.experts.must_reduce_shared_expert_outputs( - ), + reduce_results=False, prefix=f"{prefix}.shared_experts", ) + self.experts = SharedFusedMoE( + shared_experts=self.shared_experts, + num_experts=config.n_routed_experts, + top_k=config.num_experts_per_tok, + hidden_size=config.hidden_size, + intermediate_size=config.moe_intermediate_size, + reduce_results=False, + renormalize=config.norm_topk_prob, + quant_config=quant_config, + use_grouped_topk=True, + num_expert_group=config.n_group, + topk_group=config.topk_group, + prefix=f"{prefix}.experts", + scoring_func="sigmoid", + # we do scaling outside, set factor to 1.0 to avoid double mul + routed_scaling_factor=1.0, + e_score_correction_bias=self.gate.e_score_correction_bias, + enable_eplb=self.enable_eplb, + num_redundant_experts=self.n_redundant_experts, + ) + else: + self.experts = FusedMoE( + num_experts=config.n_routed_experts, + top_k=config.num_experts_per_tok, + hidden_size=config.hidden_size, + intermediate_size=config.moe_intermediate_size, + reduce_results=False, + renormalize=config.norm_topk_prob, + quant_config=quant_config, + use_grouped_topk=True, + num_expert_group=config.n_group, + topk_group=config.topk_group, + prefix=f"{prefix}.experts", + scoring_func="sigmoid", + # we do scaling outside, set factor to 1.0 to avoid double mul + routed_scaling_factor=1.0, + e_score_correction_bias=self.gate.e_score_correction_bias, + enable_eplb=self.enable_eplb, + num_redundant_experts=self.n_redundant_experts) def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: num_tokens, hidden_dim = hidden_states.shape hidden_states = hidden_states.view(-1, hidden_dim) - if self.n_shared_experts is not None: - shared_output = self.shared_experts(hidden_states) - else: - shared_output = None + # router_logits: (num_tokens, n_experts) router_logits = self.gate(hidden_states.to(dtype=torch.float32)) - final_hidden_states = self.experts( - hidden_states=hidden_states, - router_logits=router_logits) * self.routed_scaling_factor - if shared_output is not None: - final_hidden_states = final_hidden_states + shared_output + + fused_moe_out = self.experts(hidden_states=hidden_states, + router_logits=router_logits) + + if self.shared_experts is not None: + shared_output, final_hidden_states = fused_moe_out + assert shared_output is not None + final_hidden_states = \ + final_hidden_states * self.routed_scaling_factor\ + + shared_output + else: + final_hidden_states = fused_moe_out * self.routed_scaling_factor + if self.tp_size > 1: final_hidden_states = ( self.experts.maybe_all_reduce_tensor_model_parallel( From 6c47f6bfa4794178035d9d941d7d40c1d71473b7 Mon Sep 17 00:00:00 2001 From: Zhuohan Li Date: Wed, 17 Sep 2025 01:42:59 -0700 Subject: [PATCH 035/518] [Core] Remove tokenizer group in vLLM (#24078) Signed-off-by: Zhuohan Li --- tests/detokenizer/test_stop_checker.py | 8 +- tests/engine/test_stop_checker.py | 7 +- tests/entrypoints/conftest.py | 22 --- tests/entrypoints/openai/test_chat.py | 9 +- tests/entrypoints/openai/test_completion.py | 23 +-- .../test_completion_with_prompt_embeds.py | 5 +- .../entrypoints/openai/test_lora_adapters.py | 10 +- tests/entrypoints/openai/test_models.py | 2 - tests/entrypoints/openai/test_tokenization.py | 28 +-- .../tool_parsers/test_hermes_tool_parser.py | 2 + tests/entrypoints/test_chat_utils.py | 56 +----- tests/lora/test_llama_tp.py | 28 +-- tests/lora/test_lora_allowed_token_ids.py | 135 -------------- tests/lora/test_quant_model.py | 29 +-- tests/lora/test_tokenizer_group.py | 72 -------- tests/test_cache_block_hashing.py | 11 +- tests/tokenization/test_detokenize.py | 16 +- tests/tokenization/test_tokenizer_group.py | 27 --- tests/tokenization/test_tokenizer_registry.py | 4 + tests/v1/engine/conftest.py | 8 +- tests/v1/engine/test_output_processor.py | 10 +- tests/v1/engine/utils.py | 6 +- .../llm/test_struct_output_generate.py | 2 +- vllm/benchmarks/datasets.py | 173 ++++++++---------- vllm/engine/async_llm_engine.py | 15 +- vllm/engine/llm_engine.py | 57 ++---- vllm/engine/output_processor/interfaces.py | 6 +- vllm/engine/output_processor/stop_checker.py | 5 +- vllm/engine/protocol.py | 10 +- vllm/entrypoints/llm.py | 20 +- vllm/entrypoints/openai/serving_chat.py | 2 +- .../openai/serving_classification.py | 5 +- vllm/entrypoints/openai/serving_completion.py | 3 +- vllm/entrypoints/openai/serving_embedding.py | 7 +- vllm/entrypoints/openai/serving_pooling.py | 3 +- vllm/entrypoints/openai/serving_responses.py | 2 +- vllm/entrypoints/openai/serving_score.py | 2 +- .../openai/serving_tokenization.py | 4 +- vllm/inputs/preprocess.py | 84 ++------- vllm/transformers_utils/detokenizer.py | 23 +-- vllm/transformers_utils/tokenizer.py | 33 ++-- vllm/transformers_utils/tokenizer_base.py | 5 + vllm/transformers_utils/tokenizer_group.py | 132 ------------- vllm/transformers_utils/tokenizers/mistral.py | 4 + vllm/v1/engine/async_llm.py | 15 +- vllm/v1/engine/llm_engine.py | 10 +- vllm/v1/engine/output_processor.py | 8 +- vllm/v1/engine/processor.py | 55 +++--- vllm/v1/structured_output/__init__.py | 7 +- 49 files changed, 276 insertions(+), 934 deletions(-) delete mode 100644 tests/lora/test_lora_allowed_token_ids.py delete mode 100644 tests/lora/test_tokenizer_group.py delete mode 100644 tests/tokenization/test_tokenizer_group.py delete mode 100644 vllm/transformers_utils/tokenizer_group.py diff --git a/tests/detokenizer/test_stop_checker.py b/tests/detokenizer/test_stop_checker.py index bd221977224f..2ca10c072b34 100644 --- a/tests/detokenizer/test_stop_checker.py +++ b/tests/detokenizer/test_stop_checker.py @@ -1,10 +1,7 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -from unittest.mock import MagicMock - import pytest -from transformers import PreTrainedTokenizer from vllm.engine.output_processor.stop_checker import StopChecker from vllm.inputs import token_inputs @@ -54,10 +51,7 @@ def test_stop_on_eos_token(text_wo_eos: str, eos_token: str, eos_token_id: int, - When the EOS token should be ignored, and the sequence continues """ - tokenizer = MagicMock(spec=PreTrainedTokenizer) - get_tokenizer_for_seq = MagicMock(return_value=tokenizer) - stop_checker = StopChecker(max_model_len=1024, - get_tokenizer_for_seq=get_tokenizer_for_seq) + stop_checker = StopChecker(max_model_len=1024) seq = sequence_with_eos( text=text_wo_eos, diff --git a/tests/engine/test_stop_checker.py b/tests/engine/test_stop_checker.py index 3d1e1c8032a4..34f4cb13ab0a 100644 --- a/tests/engine/test_stop_checker.py +++ b/tests/engine/test_stop_checker.py @@ -58,16 +58,13 @@ def deepseek_r1_qwen_tokenizer(): @pytest.fixture def stop_checker(): - return StopChecker(max_model_len=10, - get_tokenizer_for_seq=deepseek_r1_qwen_tokenizer) + return StopChecker(max_model_len=10) @pytest.fixture def stop_checker_with_reasoner(): reasoner = MockReasoningParser(deepseek_r1_qwen_tokenizer) - return StopChecker(max_model_len=10, - get_tokenizer_for_seq=deepseek_r1_qwen_tokenizer, - reasoner=reasoner) + return StopChecker(max_model_len=10, reasoner=reasoner) def test_eos_token_stopping(stop_checker): diff --git a/tests/entrypoints/conftest.py b/tests/entrypoints/conftest.py index 48fd848e8820..c23eeee27186 100644 --- a/tests/entrypoints/conftest.py +++ b/tests/entrypoints/conftest.py @@ -208,25 +208,3 @@ def zephyr_lora_files(): """Download zephyr LoRA files once per test session.""" from huggingface_hub import snapshot_download return snapshot_download(repo_id="typeof/zephyr-7b-beta-lora") - - -@pytest.fixture(scope="session") -def zephyr_lora_added_tokens_files(zephyr_lora_files): - """Create zephyr LoRA files with added tokens once per test session.""" - import shutil - from tempfile import TemporaryDirectory - - from transformers import AutoTokenizer - - tmp_dir = TemporaryDirectory() - tmp_model_dir = f"{tmp_dir.name}/zephyr" - shutil.copytree(zephyr_lora_files, tmp_model_dir) - tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-beta") - # Copy tokenizer to adapter and add some unique tokens - # 32000, 32001, 32002 - added = tokenizer.add_tokens(["vllm1", "vllm2", "vllm3"], - special_tokens=True) - assert added == 3 - tokenizer.save_pretrained(tmp_model_dir) - yield tmp_model_dir - tmp_dir.cleanup() diff --git a/tests/entrypoints/openai/test_chat.py b/tests/entrypoints/openai/test_chat.py index 4608850c7dae..d5924b7b3ae3 100644 --- a/tests/entrypoints/openai/test_chat.py +++ b/tests/entrypoints/openai/test_chat.py @@ -29,11 +29,7 @@ def monkeypatch_module(): @pytest.fixture(scope="module", params=[False, True]) -def server( - request, - monkeypatch_module, - zephyr_lora_files, #noqa: F811 - zephyr_lora_added_tokens_files): # noqa: F811 +def server(request, monkeypatch_module, zephyr_lora_files): #noqa: F811 use_v1 = request.param monkeypatch_module.setenv('VLLM_USE_V1', '1' if use_v1 else '0') @@ -49,7 +45,6 @@ def server( "--enable-lora", "--lora-modules", f"zephyr-lora={zephyr_lora_files}", - f"zephyr-lora2={zephyr_lora_added_tokens_files}", "--max-lora-rank", "64", "--max-cpu-loras", @@ -79,7 +74,7 @@ async def client(server): @pytest.mark.parametrize( # first test base model, then test loras "model_name", - [MODEL_NAME, "zephyr-lora", "zephyr-lora2"], + [MODEL_NAME, "zephyr-lora"], ) async def test_no_logprobs_chat(client: openai.AsyncOpenAI, model_name: str): messages = [{ diff --git a/tests/entrypoints/openai/test_completion.py b/tests/entrypoints/openai/test_completion.py index d55f8d9d65d9..3650b1579257 100644 --- a/tests/entrypoints/openai/test_completion.py +++ b/tests/entrypoints/openai/test_completion.py @@ -27,7 +27,7 @@ @pytest.fixture(scope="module") -def default_server_args(zephyr_lora_files, zephyr_lora_added_tokens_files): +def default_server_args(zephyr_lora_files): return [ # use half precision for speed and memory savings in CI environment "--dtype", @@ -41,7 +41,6 @@ def default_server_args(zephyr_lora_files, zephyr_lora_added_tokens_files): "--enable-lora", "--lora-modules", f"zephyr-lora={zephyr_lora_files}", - f"zephyr-lora2={zephyr_lora_added_tokens_files}", "--max-lora-rank", "64", "--max-cpu-loras", @@ -87,7 +86,7 @@ async def client(server): @pytest.mark.parametrize( # first test base model, then test loras "model_name", - [MODEL_NAME, "zephyr-lora", "zephyr-lora2"], + [MODEL_NAME, "zephyr-lora"], ) async def test_single_completion(client: openai.AsyncOpenAI, model_name: str): completion = await client.completions.create(model=model_name, @@ -115,20 +114,6 @@ async def test_single_completion(client: openai.AsyncOpenAI, model_name: str): assert completion.choices[0].prompt_logprobs is None -@pytest.mark.asyncio -async def test_added_lora_tokens(client: openai.AsyncOpenAI): - # test using token IDs - completion = await client.completions.create( - model="zephyr-lora2", - prompt=[0, 0, 32000, 32001, 32002], - echo=True, - max_tokens=5, - temperature=0.0, - ) - # Added tokens should appear in tokenized prompt - assert completion.choices[0].text.startswith("vllm1vllm2vllm3") - - @pytest.mark.asyncio async def test_added_lora_tokens_base_model(client: openai.AsyncOpenAI): # test using token IDs @@ -147,7 +132,7 @@ async def test_added_lora_tokens_base_model(client: openai.AsyncOpenAI): @pytest.mark.parametrize( # first test base model, then test loras "model_name", - [MODEL_NAME, "zephyr-lora", "zephyr-lora2"], + [MODEL_NAME, "zephyr-lora"], ) async def test_no_logprobs(client: openai.AsyncOpenAI, model_name: str): # test using token IDs @@ -713,7 +698,7 @@ async def test_guided_grammar(client: openai.AsyncOpenAI, @pytest.mark.parametrize( # first test base model, then test loras "model_name", - [MODEL_NAME, "zephyr-lora", "zephyr-lora2"], + [MODEL_NAME, "zephyr-lora"], ) @pytest.mark.parametrize("logprobs_arg", [1, 0]) async def test_echo_logprob_completion(client: openai.AsyncOpenAI, diff --git a/tests/entrypoints/openai/test_completion_with_prompt_embeds.py b/tests/entrypoints/openai/test_completion_with_prompt_embeds.py index a0ef31762ea1..dbfb1b024f7c 100644 --- a/tests/entrypoints/openai/test_completion_with_prompt_embeds.py +++ b/tests/entrypoints/openai/test_completion_with_prompt_embeds.py @@ -21,10 +21,7 @@ @pytest.fixture(scope="module") -def default_server_args( - zephyr_lora_files, - zephyr_lora_added_tokens_files, -) -> list[str]: +def default_server_args() -> list[str]: return [ # use half precision for speed and memory savings in CI environment "--dtype", diff --git a/tests/entrypoints/openai/test_lora_adapters.py b/tests/entrypoints/openai/test_lora_adapters.py index f91dcf194b83..10c0cb5f4d15 100644 --- a/tests/entrypoints/openai/test_lora_adapters.py +++ b/tests/entrypoints/openai/test_lora_adapters.py @@ -67,12 +67,6 @@ def server_with_lora_modules_json(request, monkeypatch_module, "base_model_name": MODEL_NAME } - lora_module_2 = { - "name": "zephyr-lora2", - "path": zephyr_lora_files, - "base_model_name": MODEL_NAME - } - args = [ # use half precision for speed and memory savings in CI environment "--dtype", @@ -84,7 +78,6 @@ def server_with_lora_modules_json(request, monkeypatch_module, "--enable-lora", "--lora-modules", json.dumps(lora_module_1), - json.dumps(lora_module_2), "--max-lora-rank", "64", "--max-cpu-loras", @@ -121,7 +114,6 @@ async def test_static_lora_lineage(client: openai.AsyncOpenAI, for lora_model in lora_models) assert all(lora_model.parent == MODEL_NAME for lora_model in lora_models) assert lora_models[0].id == "zephyr-lora" - assert lora_models[1].id == "zephyr-lora2" @pytest.mark.asyncio @@ -209,7 +201,7 @@ async def test_dynamic_lora_badrequests(client: openai.AsyncOpenAI, tmp_path, @pytest.mark.asyncio async def test_multiple_lora_adapters(client: openai.AsyncOpenAI, tmp_path, zephyr_lora_files): - """Validate that many loras can be dynamically registered and inferenced + """Validate that many loras can be dynamically registered and inferenced with concurrently""" # This test file configures the server with --max-cpu-loras=2 and this test diff --git a/tests/entrypoints/openai/test_models.py b/tests/entrypoints/openai/test_models.py index 7cd3ca196a43..4ee34b19dea3 100644 --- a/tests/entrypoints/openai/test_models.py +++ b/tests/entrypoints/openai/test_models.py @@ -26,7 +26,6 @@ def server(zephyr_lora_files): "--enable-lora", "--lora-modules", f"zephyr-lora={zephyr_lora_files}", - f"zephyr-lora2={zephyr_lora_files}", "--max-lora-rank", "64", "--max-cpu-loras", @@ -56,4 +55,3 @@ async def test_check_models(client: openai.AsyncOpenAI, zephyr_lora_files): assert all(lora_model.root == zephyr_lora_files for lora_model in lora_models) assert lora_models[0].id == "zephyr-lora" - assert lora_models[1].id == "zephyr-lora2" diff --git a/tests/entrypoints/openai/test_tokenization.py b/tests/entrypoints/openai/test_tokenization.py index 72c8a3510c9b..ecb7f50fa740 100644 --- a/tests/entrypoints/openai/test_tokenization.py +++ b/tests/entrypoints/openai/test_tokenization.py @@ -14,7 +14,7 @@ @pytest.fixture(scope="module") -def server(zephyr_lora_added_tokens_files: str): # noqa: F811 +def server(): args = [ # use half precision for speed and memory savings in CI environment "--dtype", @@ -24,12 +24,6 @@ def server(zephyr_lora_added_tokens_files: str): # noqa: F811 "--enforce-eager", "--max-num-seqs", "128", - # lora config - "--enable-lora", - "--lora-modules", - f"zephyr-lora2={zephyr_lora_added_tokens_files}", - "--max-lora-rank", - "64", "--enable-tokenizer-info-endpoint", ] @@ -38,10 +32,8 @@ def server(zephyr_lora_added_tokens_files: str): # noqa: F811 @pytest.fixture(scope="module") -def tokenizer_name(model_name: str, - zephyr_lora_added_tokens_files: str): # noqa: F811 - return zephyr_lora_added_tokens_files if ( - model_name == "zephyr-lora2") else model_name +def tokenizer_name(model_name: str): + return model_name @pytest_asyncio.fixture @@ -53,7 +45,7 @@ async def client(server): @pytest.mark.asyncio @pytest.mark.parametrize( "model_name,tokenizer_name", - [(MODEL_NAME, MODEL_NAME), ("zephyr-lora2", "zephyr-lora2")], + [(MODEL_NAME, MODEL_NAME)], indirect=["tokenizer_name"], ) async def test_tokenize_completions( @@ -86,7 +78,7 @@ async def test_tokenize_completions( @pytest.mark.asyncio @pytest.mark.parametrize( "model_name,tokenizer_name", - [(MODEL_NAME, MODEL_NAME), ("zephyr-lora2", "zephyr-lora2")], + [(MODEL_NAME, MODEL_NAME)], indirect=["tokenizer_name"], ) async def test_tokenize_chat( @@ -148,7 +140,7 @@ async def test_tokenize_chat( @pytest.mark.asyncio @pytest.mark.parametrize( "model_name,tokenizer_name", - [(MODEL_NAME, MODEL_NAME), ("zephyr-lora2", "zephyr-lora2")], + [(MODEL_NAME, MODEL_NAME)], indirect=["tokenizer_name"], ) async def test_tokenize_chat_with_tools( @@ -225,7 +217,7 @@ async def test_tokenize_chat_with_tools( @pytest.mark.asyncio @pytest.mark.parametrize( "model_name, tokenizer_name", - [(MODEL_NAME, MODEL_NAME), ("zephyr-lora2", "zephyr-lora2")], + [(MODEL_NAME, MODEL_NAME)], indirect=["tokenizer_name"], ) async def test_tokenize_with_return_token_strs( @@ -260,7 +252,7 @@ async def test_tokenize_with_return_token_strs( @pytest.mark.asyncio @pytest.mark.parametrize( "model_name,tokenizer_name", - [(MODEL_NAME, MODEL_NAME), ("zephyr-lora2", "zephyr-lora2")], + [(MODEL_NAME, MODEL_NAME)], indirect=["tokenizer_name"], ) async def test_detokenize( @@ -287,7 +279,7 @@ async def test_detokenize( @pytest.mark.asyncio @pytest.mark.parametrize( "model_name,tokenizer_name", - [(MODEL_NAME, MODEL_NAME), ("zephyr-lora2", "zephyr-lora2")], + [(MODEL_NAME, MODEL_NAME)], indirect=["tokenizer_name"], ) async def test_tokenizer_info_basic( @@ -384,4 +376,4 @@ async def test_tokenizer_info_chat_template(server: RemoteOpenAIServer): if chat_template: assert isinstance(chat_template, str), ("Chat template should be a string") - assert chat_template.strip(), "Chat template should not be empty" \ No newline at end of file + assert chat_template.strip(), "Chat template should not be empty" diff --git a/tests/entrypoints/openai/tool_parsers/test_hermes_tool_parser.py b/tests/entrypoints/openai/tool_parsers/test_hermes_tool_parser.py index 28b1f8358d80..4bab849f47c2 100644 --- a/tests/entrypoints/openai/tool_parsers/test_hermes_tool_parser.py +++ b/tests/entrypoints/openai/tool_parsers/test_hermes_tool_parser.py @@ -18,6 +18,8 @@ "--enable-lora", "--lora-modules", f"{LORA_MODEL}={LORA_MODEL}", + "--tokenizer", + f"{LORA_MODEL}", ] TOOLS = [{ diff --git a/tests/entrypoints/test_chat_utils.py b/tests/entrypoints/test_chat_utils.py index 84dab737ece2..78370d199b56 100644 --- a/tests/entrypoints/test_chat_utils.py +++ b/tests/entrypoints/test_chat_utils.py @@ -23,7 +23,7 @@ from vllm.multimodal import MultiModalDataDict, MultiModalUUIDDict from vllm.multimodal.utils import (encode_audio_base64, encode_image_base64, encode_video_base64) -from vllm.transformers_utils.tokenizer_group import TokenizerGroup +from vllm.transformers_utils.tokenizer import get_tokenizer from vllm.transformers_utils.tokenizers.mistral import MistralTokenizer from ..models.registry import HF_EXAMPLE_MODELS @@ -69,12 +69,7 @@ def phi3v_model_config_mm_interleaved(): @pytest.fixture(scope="module") def phi3v_tokenizer(): - return TokenizerGroup( - tokenizer_id=PHI3V_MODEL_ID, - enable_lora=False, - max_num_seqs=5, - max_input_length=None, - ) + return get_tokenizer(PHI3V_MODEL_ID) @pytest.fixture(scope="function") @@ -91,12 +86,7 @@ def qwen2_audio_model_config(): @pytest.fixture(scope="module") def qwen2_audio_tokenizer(): - return TokenizerGroup( - tokenizer_id=QWEN2AUDIO_MODEL_ID, - enable_lora=False, - max_num_seqs=5, - max_input_length=None, - ) + return get_tokenizer(QWEN2AUDIO_MODEL_ID) @pytest.fixture(scope="function") @@ -115,12 +105,7 @@ def qwen25omni_model_config_mm_interleaved(): @pytest.fixture(scope="module") def qwen25omni_tokenizer(): - return TokenizerGroup( - tokenizer_id=QWEN25OMNI_MODEL_ID, - enable_lora=False, - max_num_seqs=5, - max_input_length=None, - ) + return get_tokenizer(QWEN25OMNI_MODEL_ID) @pytest.fixture(scope="function") @@ -136,12 +121,7 @@ def mistral_model_config(): @pytest.fixture(scope="module") def mistral_tokenizer(): - return TokenizerGroup( - tokenizer_id=MISTRAL_MODEL_ID, - enable_lora=False, - max_num_seqs=5, - max_input_length=None, - ) + return get_tokenizer(MISTRAL_MODEL_ID) @pytest.fixture(scope="module") @@ -2250,15 +2230,11 @@ def test_resolve_hf_chat_template(sample_json_schema, model, use_tools): enforce_eager=model_info.enforce_eager, dtype=model_info.dtype) - # Build the tokenizer group and grab the underlying tokenizer - tokenizer_group = TokenizerGroup( + # Build the tokenizer + tokenizer = get_tokenizer( model, - enable_lora=False, - max_num_seqs=5, - max_input_length=None, trust_remote_code=model_config.trust_remote_code, ) - tokenizer = tokenizer_group.tokenizer tools = ([{ "type": "function", @@ -2307,14 +2283,10 @@ def test_resolve_content_format_hf_defined(model, expected_format): enforce_eager=model_info.enforce_eager, dtype=model_info.dtype) - tokenizer_group = TokenizerGroup( + tokenizer = get_tokenizer( model, - enable_lora=False, - max_num_seqs=5, - max_input_length=None, trust_remote_code=model_config.trust_remote_code, ) - tokenizer = tokenizer_group.tokenizer # Test detecting the tokenizer's chat_template chat_template = resolve_hf_chat_template( @@ -2368,14 +2340,10 @@ def test_resolve_content_format_fallbacks(model, expected_format): enforce_eager=model_info.enforce_eager, dtype=model_info.dtype) - tokenizer_group = TokenizerGroup( + tokenizer = get_tokenizer( model_config.tokenizer, - enable_lora=False, - max_num_seqs=5, - max_input_length=None, trust_remote_code=model_config.trust_remote_code, ) - tokenizer = tokenizer_group.tokenizer # Test detecting the tokenizer's chat_template chat_template = resolve_hf_chat_template( @@ -2432,14 +2400,10 @@ def test_resolve_content_format_examples(template_path, expected_format): trust_remote_code=True, ) - tokenizer_group = TokenizerGroup( + dummy_tokenizer = get_tokenizer( PHI3V_MODEL_ID, # Dummy - enable_lora=False, - max_num_seqs=5, - max_input_length=None, trust_remote_code=model_config.trust_remote_code, ) - dummy_tokenizer = tokenizer_group.tokenizer dummy_tokenizer.chat_template = None chat_template = load_chat_template(EXAMPLES_DIR / template_path) diff --git a/tests/lora/test_llama_tp.py b/tests/lora/test_llama_tp.py index 06196cc697ce..a6770e6d32af 100644 --- a/tests/lora/test_llama_tp.py +++ b/tests/lora/test_llama_tp.py @@ -13,14 +13,6 @@ MODEL_PATH = "meta-llama/Llama-2-7b-hf" -EXPECTED_NO_LORA_OUTPUT = [ - "\n\n [user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_name_75 (icao VARCHAR, airport VARCHAR)\n\n question: Name the ICAO for lilongwe international airport [/user] [assistant]\n\n [user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_name_76 (icao VARCHAR, airport VARCHAR)\n\n question: Name the ICAO for lilongwe international airport [/user] [assistant]\n\n [user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_name_77 (icao VARCHAR, airport VARCHAR)\n\n question: Name the ICAO for lilongwe international airport [/user] [assistant]\n\n [user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_name_78 (icao VARCHAR, airport VARCHAR)\n\n question: Name the ICAO for lilongwe international airport [/user]", # noqa: E501 - " Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_name_11 (nationality VARCHAR, elector VARCHAR)\n\n question: When Anchero Pantaleone was the elector what is under nationality? ", # noqa: E501 - "\n\n answer: 1\n\n [user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_name_96 (one_mora VARCHAR, gloss VARCHAR, accented_mora VARCHAR)\n\n question: What is the one mora for a high tone mora with a gloss of /˧kot/ [kòt]? [/user] [assistant]\n\n answer: 2\n\n [user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_name_97 (one_mora VARCHAR, gloss VARCHAR, accented_mora VARCHAR)\n\n question: What is the one mora for a high tone mora with a gloss of /˧kot/ [kòt]? [/user] [assistant]\n\n answer: 2\n\n [user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_name_98 (one_mora VARCHAR, gloss VARCHAR, accented_mora VARCHAR)\n\n question: What is the one m", # noqa: E501 - " Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE candidate (people_id VARCHAR, unsure_rate INTEGER); CREATE TABLE people (sex VARCHAR, people_id VARCHAR)\n\n question: which gender got the highest average uncertain ratio. ", # noqa: E501 - " Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_name_60 (pick INTEGER, former_wnba_team VARCHAR)\n\n question: What pick was a player that previously played for the Minnesota Lynx? ", # noqa: E501 - "\n\n [user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_28138035_4 (womens_doubles VARCHAR, mens_singles VARCHAR)\n\n question: Name the women's doubles for werner schlager [/user] [assistant]\n\n [user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_28138035_4 (womens_doubles VARCHAR, mens_singles VARCHAR)\n\n question: Name the women's doubles for werner schlager [/user] [assistant]\n\n [user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE table_28138035_4 (womens_doubles VARCHAR, mens_singles VARCHAR)\n\n question: Name the women's doubles for werner schlager [/user] [assistant]\n\n [user] Write a SQL query to answer the question based on the table schema.\n\n context: CREATE TABLE", # noqa: E501 -] EXPECTED_LORA_OUTPUT = [ " SELECT icao FROM table_name_74 WHERE airport = 'lilongwe international airport' ", # noqa: E501 " SELECT nationality FROM table_name_11 WHERE elector = 'anchero pantaleone' ", # noqa: E501 @@ -79,23 +71,12 @@ def generate_and_test(llm, sql_lora_files, tensorizer_config_dict: Union[dict, None] = None): print("lora adapter created") - assert do_sample(llm, - sql_lora_files, - tensorizer_config_dict=tensorizer_config_dict, - lora_id=0) == EXPECTED_NO_LORA_OUTPUT - print("lora 1") assert do_sample(llm, sql_lora_files, tensorizer_config_dict=tensorizer_config_dict, lora_id=1) == EXPECTED_LORA_OUTPUT - print("no lora") - assert do_sample(llm, - sql_lora_files, - tensorizer_config_dict=tensorizer_config_dict, - lora_id=0) == EXPECTED_NO_LORA_OUTPUT - print("lora 2") assert do_sample(llm, sql_lora_files, @@ -110,6 +91,7 @@ def test_llama_lora(sql_lora_files): llm = vllm.LLM( MODEL_PATH, + tokenizer=sql_lora_files, enable_lora=True, # also test odd max_num_seqs max_num_seqs=13, @@ -123,6 +105,7 @@ def test_llama_lora_tp4(sql_lora_files): llm = vllm.LLM( MODEL_PATH, + tokenizer=sql_lora_files, enable_lora=True, max_num_seqs=16, max_loras=4, @@ -137,6 +120,7 @@ def test_llama_lora_tp4_fully_sharded_loras(sql_lora_files): llm = vllm.LLM( MODEL_PATH, + tokenizer=sql_lora_files, enable_lora=True, max_num_seqs=16, max_loras=4, @@ -184,6 +168,7 @@ def test_tp2_serialize_and_deserialize_lora(tmp_path, sql_lora_files, tensorizer_config = TensorizerConfig(tensorizer_uri=str(model_uri)) loaded_llm = LLM(model=model_ref, + tokenizer=sql_lora_files, load_format="tensorizer", enable_lora=True, enforce_eager=True, @@ -195,11 +180,6 @@ def test_tp2_serialize_and_deserialize_lora(tmp_path, sql_lora_files, tc_as_dict = tensorizer_config.to_serializable() print("lora adapter created") - assert do_sample(loaded_llm, - sql_lora_files, - tensorizer_config_dict=tc_as_dict, - lora_id=0) == EXPECTED_NO_LORA_OUTPUT - print("lora 1") assert do_sample(loaded_llm, sql_lora_files, diff --git a/tests/lora/test_lora_allowed_token_ids.py b/tests/lora/test_lora_allowed_token_ids.py deleted file mode 100644 index be6409000ae7..000000000000 --- a/tests/lora/test_lora_allowed_token_ids.py +++ /dev/null @@ -1,135 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project - -import pytest - -from vllm.config import CacheConfig, DeviceConfig, ModelConfig, VllmConfig -from vllm.config.lora import LoRAConfig -from vllm.lora.request import LoRARequest -from vllm.sampling_params import SamplingParams -from vllm.transformers_utils.tokenizer_group import init_tokenizer_from_configs -from vllm.v1.engine.processor import Processor - - -def test_allowed_token_ids_with_lora_vocab(llama_2_7b_base_huggingface_id, - sql_lora_files): - """ - Test that we properly resolve the range of allowed token ids for lora - adapters that define additional tokens. - """ - - # Set up a base model compatible with the sql_lora_files adapter and - # a known number of tokens in the base model. - model_config = ModelConfig( - model=llama_2_7b_base_huggingface_id, - tokenizer=llama_2_7b_base_huggingface_id, - tokenizer_mode="auto", - ) - - vllm_config = VllmConfig( - model_config=model_config, - cache_config=CacheConfig(), - device_config=DeviceConfig(), - lora_config=LoRAConfig(), - ) - - tokenizer = init_tokenizer_from_configs( - model_config=vllm_config.model_config, - scheduler_config=vllm_config.scheduler_config, - lora_config=vllm_config.lora_config) - processor = Processor(vllm_config, tokenizer) - - lora_request = LoRARequest("1", 1, str(sql_lora_files)) - request_id = "1" - prompt = "a prompt" - - # tokens added in the lora adapter should not raise an error - lora_token_ids = [32000, 32001, 32002, 32003] - processor.process_inputs( - request_id, - prompt, - params=SamplingParams(allowed_token_ids=lora_token_ids), - lora_request=lora_request) - - # tokens in the base model should not raise an error - base_token_ids = [1000, 1001, 1002, 1003] - processor.process_inputs( - request_id, - prompt, - params=SamplingParams(allowed_token_ids=base_token_ids), - lora_request=lora_request) - - # tokens not in the lora adapter should raise an error - invalid_token_ids = [35000, 35001, 35002, 35003] - with pytest.raises(ValueError): - processor.process_inputs( - request_id, - prompt, - params=SamplingParams(allowed_token_ids=invalid_token_ids), - lora_request=lora_request) - - # tokens in the lora adapter with no lora request should raise an error - with pytest.raises(ValueError): - processor.process_inputs( - request_id, - prompt, - params=SamplingParams(allowed_token_ids=lora_token_ids), - ) - - -def test_allowed_token_ids_with_lora_adapter_no_vocab( - qwen25vl_base_huggingface_id, qwen25vl_lora_files): - """ - Test that we properly resolve the range of allowed token ids for lora - adapters that do not define additional tokens. - """ - - # Set up a base model compatible with the qwen25vl_lora_files adapter and - # a known number of tokens in the base model. - model_config = ModelConfig( - model=qwen25vl_base_huggingface_id, - tokenizer=qwen25vl_base_huggingface_id, - tokenizer_mode="auto", - ) - - vllm_config = VllmConfig( - model_config=model_config, - cache_config=CacheConfig(), - device_config=DeviceConfig(), - lora_config=LoRAConfig(), - ) - - tokenizer = init_tokenizer_from_configs( - model_config=vllm_config.model_config, - scheduler_config=vllm_config.scheduler_config, - lora_config=vllm_config.lora_config) - processor = Processor(vllm_config, tokenizer) - - lora_request = LoRARequest("1", 1, str(qwen25vl_lora_files)) - request_id = "1" - prompt = "a prompt" - - # tokens in the base model should not raise an error - base_token_ids = [1000, 1001, 1002, 1003] - processor.process_inputs( - request_id, - prompt, - params=SamplingParams(allowed_token_ids=base_token_ids), - lora_request=lora_request) - - # tokens in the base model with no lora request should not raise an error - base_token_ids = [1000, 1001, 1002, 1003] - processor.process_inputs( - request_id, - prompt, - params=SamplingParams(allowed_token_ids=base_token_ids), - ) - - # tokens not in the base model should raise an error - invalid_token_ids = [200000, 200001, 200002, 200003] - with pytest.raises(ValueError): - processor.process_inputs( - request_id, - prompt, - params=SamplingParams(allowed_token_ids=invalid_token_ids), - lora_request=lora_request) diff --git a/tests/lora/test_quant_model.py b/tests/lora/test_quant_model.py index caa31fdb0e73..2b54b2edd6a9 100644 --- a/tests/lora/test_quant_model.py +++ b/tests/lora/test_quant_model.py @@ -82,31 +82,20 @@ def test_quant_model_lora(tinyllama_lora_files, model): gpu_memory_utilization=0.2, #avoid OOM quantization=model.quantization, trust_remote_code=True, - enable_chunked_prefill=True) + enable_chunked_prefill=True, + tokenizer=tinyllama_lora_files) if model.quantization is None: - expected_no_lora_output = [ - "Here are some examples of orange-brown colors", - "I'm sorry, I don't have" - ] expected_lora_output = [ "#ff8050", "#ff8080", ] elif model.quantization == "awq": - expected_no_lora_output = [ - "I'm sorry, I don't understand", - "I'm sorry, I don't understand", - ] expected_lora_output = [ "#f07700: A v", "#f00000: A v", ] elif model.quantization == "gptq": - expected_no_lora_output = [ - "I'm sorry, I don't have", - "I'm sorry, I don't have", - ] expected_lora_output = [ "#f08800: This is", "#f07788 \n#", @@ -117,7 +106,6 @@ def expect_match(output, expected_output): # Assert that the outputs changed. if (model.quantization == "gptq" and expected_output is expected_lora_output): - assert output != expected_no_lora_output for i, o in enumerate(output): assert o.startswith( '#'), f"Expected example {i} to start with # but got {o}" @@ -127,12 +115,6 @@ def expect_match(output, expected_output): max_tokens = 10 print("lora adapter created") - output = do_sample(llm, - tinyllama_lora_files, - lora_id=0, - max_tokens=max_tokens) - expect_match(output, expected_no_lora_output) - print("lora 1") output = do_sample(llm, tinyllama_lora_files, @@ -140,13 +122,6 @@ def expect_match(output, expected_output): max_tokens=max_tokens) expect_match(output, expected_lora_output) - print("no lora") - output = do_sample(llm, - tinyllama_lora_files, - lora_id=0, - max_tokens=max_tokens) - expect_match(output, expected_no_lora_output) - print("lora 2") output = do_sample(llm, tinyllama_lora_files, diff --git a/tests/lora/test_tokenizer_group.py b/tests/lora/test_tokenizer_group.py deleted file mode 100644 index 6cfdaf50d33c..000000000000 --- a/tests/lora/test_tokenizer_group.py +++ /dev/null @@ -1,72 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project - -import pytest -from transformers import AutoTokenizer, PreTrainedTokenizerBase - -from vllm.lora.request import LoRARequest -from vllm.transformers_utils.tokenizer import get_lora_tokenizer -from vllm.transformers_utils.tokenizer_group import TokenizerGroup - - -@pytest.mark.asyncio -@pytest.mark.parametrize("tokenizer_group_type", [None, "ray"]) -async def test_tokenizer_group_lora(sql_lora_files, tokenizer_group_type): - reference_tokenizer = AutoTokenizer.from_pretrained(sql_lora_files) - tokenizer_group = TokenizerGroup( - tokenizer_id="gpt2", - enable_lora=True, - max_num_seqs=1, - max_loras=1, - max_input_length=None, - ) - lora_request = LoRARequest("1", 1, sql_lora_files) - assert reference_tokenizer.encode("prompt") == tokenizer_group.encode( - prompt="prompt", lora_request=lora_request) - assert reference_tokenizer.encode( - "prompt") == await tokenizer_group.encode_async( - prompt="prompt", lora_request=lora_request) - assert isinstance(tokenizer_group.get_lora_tokenizer(None), - PreTrainedTokenizerBase) - assert tokenizer_group.get_lora_tokenizer( - None) == await tokenizer_group.get_lora_tokenizer_async(None) - - assert isinstance(tokenizer_group.get_lora_tokenizer(lora_request), - PreTrainedTokenizerBase) - assert tokenizer_group.get_lora_tokenizer( - lora_request) != tokenizer_group.get_lora_tokenizer(None) - assert tokenizer_group.get_lora_tokenizer( - lora_request) == await tokenizer_group.get_lora_tokenizer_async( - lora_request) - - -def test_get_lora_tokenizer(sql_lora_files, tmp_path): - lora_request = None - tokenizer = get_lora_tokenizer(lora_request) - assert not tokenizer - - lora_request = LoRARequest("1", 1, sql_lora_files) - tokenizer = get_lora_tokenizer(lora_request) - assert tokenizer.get_added_vocab() - - lora_request = LoRARequest("1", 1, str(tmp_path)) - tokenizer = get_lora_tokenizer(lora_request) - assert not tokenizer - - -@pytest.mark.parametrize("enable_lora", [True, False]) -@pytest.mark.parametrize("max_num_seqs", [1, 2]) -@pytest.mark.parametrize("max_loras", [1, 2]) -def test_lora_tokenizers(enable_lora, max_num_seqs, max_loras): - tokenizer_group = TokenizerGroup( - tokenizer_id="gpt2", - enable_lora=enable_lora, - max_num_seqs=max_num_seqs, - max_loras=max_loras, - max_input_length=None, - ) - if enable_lora: - assert tokenizer_group.lora_tokenizers.capacity == max( - max_num_seqs, max_loras) - else: - assert tokenizer_group.lora_tokenizers.capacity == 0 diff --git a/tests/test_cache_block_hashing.py b/tests/test_cache_block_hashing.py index edc0849dff33..1dba0fd0fb3d 100644 --- a/tests/test_cache_block_hashing.py +++ b/tests/test_cache_block_hashing.py @@ -11,7 +11,7 @@ from vllm.inputs import token_inputs from vllm.lora.request import LoRARequest from vllm.sequence import Sequence -from vllm.transformers_utils.tokenizer_group import TokenizerGroup +from vllm.transformers_utils.tokenizer import get_tokenizer # Make two prefixes with different first blocks. prefix_start = [("You are an expert"), ("You are a")] @@ -47,12 +47,7 @@ def flatten_2d(li): def test_auto_prefix_caching(model: str, block_size: int, max_num_seqs: int, concurrent_lora_int_ids: list[Optional[int]]): - tokenizer = TokenizerGroup( - tokenizer_id="facebook/opt-125m", - enable_lora=False, - max_num_seqs=max_num_seqs, - max_input_length=None, - ) + tokenizer = get_tokenizer("facebook/opt-125m") hashes: list[list[list[int]]] = [] @@ -76,7 +71,7 @@ def test_auto_prefix_caching(model: str, block_size: int, max_num_seqs: int, inputs=token_inputs(prompt_token_ids, prompt=prompt), block_size=block_size, - eos_token_id=tokenizer.tokenizer.eos_token_id, + eos_token_id=tokenizer.eos_token_id, lora_request=lora_request) num_blocks = len(prompt_token_ids) // block_size diff --git a/tests/tokenization/test_detokenize.py b/tests/tokenization/test_detokenize.py index 527aad97d4fa..15ea55afe963 100644 --- a/tests/tokenization/test_detokenize.py +++ b/tests/tokenization/test_detokenize.py @@ -11,7 +11,7 @@ from vllm.inputs import token_inputs from vllm.sequence import Logprob, SamplingParams, Sequence, SequenceGroup from vllm.transformers_utils.detokenizer import Detokenizer -from vllm.transformers_utils.tokenizer_group import TokenizerGroup +from vllm.transformers_utils.tokenizer import get_tokenizer from vllm.transformers_utils.tokenizers.mistral import MistralTokenizer from vllm.v1.engine import EngineCoreRequest from vllm.v1.engine.detokenizer import (FastIncrementalDetokenizer, @@ -221,17 +221,14 @@ def test_oov_decode(tokenizer, fast): @pytest.fixture def detokenizer(tokenizer_name: str) -> Detokenizer: - tokenizer_group = TokenizerGroup( - tokenizer_id=tokenizer_name, - enable_lora=False, - max_num_seqs=100, - max_input_length=None, + tokenizer = get_tokenizer( + tokenizer_name, tokenizer_mode="mistral" if "mistral" in tokenizer_name else "auto", trust_remote_code=False, revision=None, ) - return Detokenizer(tokenizer_group) + return Detokenizer(tokenizer) @pytest.fixture(name="complete_sequence_token_ids") @@ -312,8 +309,7 @@ def test_decode_prompt_logprobs(complete_sequence: str, # don't support that. if complete_sequence not in SPECIAL_TOKS_TRUTH: skip_special_tokens = True - elif not isinstance(detokenizer.tokenizer_group.get_lora_tokenizer(None), - MistralTokenizer): + elif not isinstance(detokenizer.tokenizer, MistralTokenizer): skip_special_tokens = False else: pytest.skip("MistralTokenizers don't support " @@ -339,7 +335,7 @@ def test_decode_prompt_logprobs(complete_sequence: str, # decoded_prompt_logprobs doesn't contain the first token. token_ids = complete_sequence_token_ids - tokenizer = detokenizer.get_tokenizer_for_seq(seq) + tokenizer = detokenizer.tokenizer text_full = tokenizer.decode(token_ids, skip_special_tokens=skip_special_tokens) text_first = tokenizer.decode(token_ids[0], diff --git a/tests/tokenization/test_tokenizer_group.py b/tests/tokenization/test_tokenizer_group.py deleted file mode 100644 index 0570c1525e11..000000000000 --- a/tests/tokenization/test_tokenizer_group.py +++ /dev/null @@ -1,27 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project - -import pytest -from transformers import AutoTokenizer, PreTrainedTokenizerBase - -from vllm.transformers_utils.tokenizer_group import TokenizerGroup - - -@pytest.mark.asyncio -async def test_tokenizer_group(): - reference_tokenizer = AutoTokenizer.from_pretrained("gpt2") - tokenizer_group = TokenizerGroup( - tokenizer_id="gpt2", - enable_lora=False, - max_num_seqs=1, - max_input_length=None, - ) - assert reference_tokenizer.encode("prompt") == tokenizer_group.encode( - prompt="prompt", lora_request=None) - assert reference_tokenizer.encode( - "prompt") == await tokenizer_group.encode_async(prompt="prompt", - lora_request=None) - assert isinstance(tokenizer_group.get_lora_tokenizer(None), - PreTrainedTokenizerBase) - assert tokenizer_group.get_lora_tokenizer( - None) == await tokenizer_group.get_lora_tokenizer_async(None) diff --git a/tests/tokenization/test_tokenizer_registry.py b/tests/tokenization/test_tokenizer_registry.py index 5abb10164408..68d4b416b4c9 100644 --- a/tests/tokenization/test_tokenizer_registry.py +++ b/tests/tokenization/test_tokenizer_registry.py @@ -57,6 +57,10 @@ def vocab_size(self) -> int: def max_token_id(self) -> int: raise NotImplementedError() + @property + def truncation_side(self) -> str: + raise NotImplementedError() + def __call__( self, text: Union[str, list[str], list[int]], diff --git a/tests/v1/engine/conftest.py b/tests/v1/engine/conftest.py index d7722142b207..a73a9a6999f7 100644 --- a/tests/v1/engine/conftest.py +++ b/tests/v1/engine/conftest.py @@ -12,7 +12,6 @@ generate_dummy_prompt_logprobs_tensors, generate_dummy_sample_logprobs) from vllm.engine.arg_utils import EngineArgs -from vllm.transformers_utils.tokenizer_group import init_tokenizer_from_configs from ...distributed.conftest import publisher_config, random_port # noqa: F401 @@ -24,7 +23,7 @@ def _build_test_vectors_no_logprobs() -> DummyOutputProcessorTestVectors: """Generate output processor dummy test vectors, without logprobs - + Returns: DummyOutputProcessorTestVectors instance with no logprobs """ @@ -48,9 +47,6 @@ def _build_test_vectors_no_logprobs() -> DummyOutputProcessorTestVectors: ] return DummyOutputProcessorTestVectors( tokenizer=tokenizer, - tokenizer_group=init_tokenizer_from_configs( - vllm_config.model_config, vllm_config.scheduler_config, - vllm_config.lora_config), vllm_config=vllm_config, full_tokens=[tokenizer(text).input_ids for text in FULL_STRINGS], prompt_tokens=prompt_tokens, @@ -68,7 +64,7 @@ def _build_test_vectors_no_logprobs() -> DummyOutputProcessorTestVectors: @pytest.fixture def dummy_test_vectors() -> DummyOutputProcessorTestVectors: """Generate output processor dummy test vectors, with logprobs - + Returns: DummyOutputProcessorTestVectors instance with logprobs """ diff --git a/tests/v1/engine/test_output_processor.py b/tests/v1/engine/test_output_processor.py index 6544e8b017e7..a9632ce54eac 100644 --- a/tests/v1/engine/test_output_processor.py +++ b/tests/v1/engine/test_output_processor.py @@ -43,7 +43,7 @@ def _ref_convert_id_to_token( [RequestOutputKind.DELTA, RequestOutputKind.FINAL_ONLY]) def test_incremental_detokenization(request_output_kind: RequestOutputKind, dummy_test_vectors): - output_processor = OutputProcessor(dummy_test_vectors.tokenizer_group, + output_processor = OutputProcessor(dummy_test_vectors.tokenizer, log_stats=False) engine_core = MockEngineCore( tokens_list=dummy_test_vectors.generation_tokens) @@ -382,7 +382,7 @@ def test_logprobs_processor(request_output_kind: RequestOutputKind, num_sample_logprobs: Optional[int], num_prompt_logprobs: Optional[int], dummy_test_vectors): - output_processor = OutputProcessor(dummy_test_vectors.tokenizer_group, + output_processor = OutputProcessor(dummy_test_vectors.tokenizer, log_stats=False) engine_core = MockEngineCore( tokens_list=dummy_test_vectors.generation_tokens, @@ -535,7 +535,7 @@ def test_stop_token(include_stop_str_in_output: bool, ) # '<|end_of_text|>' stop_token_ids = [128009] if not is_eos_test else None # '<|eot_id|>' - output_processor = OutputProcessor(dummy_test_vectors.tokenizer_group, + output_processor = OutputProcessor(dummy_test_vectors.tokenizer, log_stats=False) # Dummy engine core outputs, with control tokens suffixed to test stops suffix_token = ([eos_token_id] if is_eos_test else stop_token_ids) @@ -642,7 +642,7 @@ def test_stop_token(include_stop_str_in_output: bool, [None, NUM_SAMPLE_LOGPROBS_UNDER_TEST]) def test_stop_string(include_stop_str_in_output: bool, num_sample_logprobs: Optional[int], dummy_test_vectors): - output_processor = OutputProcessor(dummy_test_vectors.tokenizer_group, + output_processor = OutputProcessor(dummy_test_vectors.tokenizer, log_stats=False) engine_core = MockEngineCore( tokens_list=dummy_test_vectors.generation_tokens, @@ -763,7 +763,7 @@ def test_stop_string(include_stop_str_in_output: bool, def test_iteration_stats(dummy_test_vectors): - output_processor = OutputProcessor(dummy_test_vectors.tokenizer_group, + output_processor = OutputProcessor(dummy_test_vectors.tokenizer, log_stats=True) engine_core = MockEngineCore(dummy_test_vectors.generation_tokens) engine_core_timestamp = time.monotonic() diff --git a/tests/v1/engine/utils.py b/tests/v1/engine/utils.py index b58bc75fc956..689b2c95f927 100644 --- a/tests/v1/engine/utils.py +++ b/tests/v1/engine/utils.py @@ -9,7 +9,6 @@ from transformers import PreTrainedTokenizer, PreTrainedTokenizerFast from vllm.engine.arg_utils import EngineArgs -from vllm.transformers_utils.tokenizer_group import TokenizerGroup from vllm.v1.engine import EngineCoreOutput, FinishReason from vllm.v1.outputs import LogprobsLists, LogprobsTensors @@ -39,7 +38,7 @@ def _create_random_top_logprob_test_vector( upper: float, ) -> torch.Tensor: """Create a random vector of top logprob float values. - + Use to create fake sample logprobs for testing. Note that a real production scenario would require @@ -63,7 +62,7 @@ def _create_random_top_logprob_test_matrix( upper: float, ) -> torch.Tensor: """Create a random matrix of top logprob float values. - + Use to create fake prompt logprobs for testing. Note that a real production scenario would require @@ -296,7 +295,6 @@ def generate_dummy_prompt_logprobs_tensors( class DummyOutputProcessorTestVectors: """Dummy test vectors for output processor tests""" tokenizer: GeneralTokenizerType - tokenizer_group: TokenizerGroup vllm_config: EngineArgs full_tokens: list[list[int]] # Prompt + generated tokens prompt_tokens: list[list[int]] diff --git a/tests/v1/entrypoints/llm/test_struct_output_generate.py b/tests/v1/entrypoints/llm/test_struct_output_generate.py index 126d8ce8c8e0..ad62914195b4 100644 --- a/tests/v1/entrypoints/llm/test_struct_output_generate.py +++ b/tests/v1/entrypoints/llm/test_struct_output_generate.py @@ -582,7 +582,7 @@ def test_structured_output_with_reasoning_matrices( reasoning_parser=reasoning_parser, speculative_config=speculative_config, ) - tokenizer = llm.get_tokenizer(None) + tokenizer = llm.get_tokenizer() reasoner = ReasoningParserManager.get_reasoning_parser(reasoning_parser)( tokenizer=tokenizer) diff --git a/vllm/benchmarks/datasets.py b/vllm/benchmarks/datasets.py index 8d11b19066bb..a38090edb0b4 100644 --- a/vllm/benchmarks/datasets.py +++ b/vllm/benchmarks/datasets.py @@ -37,7 +37,7 @@ from vllm.lora.utils import get_adapter_absolute_path from vllm.multimodal import MultiModalDataDict from vllm.multimodal.image import convert_image_mode -from vllm.transformers_utils.tokenizer import AnyTokenizer, get_lora_tokenizer +from vllm.transformers_utils.tokenizer import AnyTokenizer from vllm.utils import PlaceholderModule try: @@ -100,8 +100,8 @@ def __init__( ) -> None: """ Initialize the BenchmarkDataset with an optional dataset path and random - seed. - + seed. + Args: dataset_path (Optional[str]): Path to the dataset. If None, it indicates that a default or random dataset might be used. @@ -133,10 +133,10 @@ def apply_multimodal_chat_transformation( elif isinstance(mm_content, dict): content.append(mm_content) else: - raise TypeError( + raise TypeError( "Could not process multimodal content of type: " + - f"{type(mm_content)}" - ) + f"{type(mm_content)}" + ) return [{"role": "user", "content": content}] def load_data(self) -> None: @@ -155,34 +155,26 @@ def load_data(self) -> None: def get_random_lora_request( self, - tokenizer: PreTrainedTokenizerBase, max_loras: Optional[int] = None, lora_path: Optional[str] = None, - ) -> tuple[Optional[LoRARequest], AnyTokenizer]: + ) -> Optional[LoRARequest]: """ - Optionally select a random LoRA request and return its associated - tokenizer. + Optionally select a random LoRA request. This method is used when LoRA parameters are provided. It randomly - selects a LoRA based on max_loras and retrieves a cached tokenizer for - that LoRA if available. Otherwise, it returns the base tokenizer. + selects a LoRA based on max_loras. Args: - tokenizer (PreTrainedTokenizerBase): The base tokenizer to use if no - LoRA is selected. max_loras (Optional[int]): The maximum number of LoRAs available. If `None`, LoRA is not used. lora_path (Optional[str]): Path to the LoRA parameters on disk. If `None`, LoRA is not used. Returns: - A tuple with the following elements: - - A new [LoRARequest][] (or `None` if not applicable). - - The tokenizer associated with the LoRA request - (or the base tokenizer). + A new [LoRARequest][] (or `None` if not applicable). """ if max_loras is None or lora_path is None: - return None, tokenizer + return None # Generate a random LoRA ID in the range [1, max_loras]. lora_id = random.randint(1, max_loras) @@ -191,11 +183,7 @@ def get_random_lora_request( lora_int_id=lora_id, lora_path=lora_path_on_disk(lora_path), ) - if lora_id not in lora_tokenizer_cache: - lora_tokenizer_cache[lora_id] = get_lora_tokenizer(lora_request) - # Return lora_request and the cached tokenizer if available; otherwise, - # return the base tokenizer - return lora_request, lora_tokenizer_cache[lora_id] or tokenizer + return lora_request @abstractmethod def sample(self, tokenizer: PreTrainedTokenizerBase, @@ -213,7 +201,7 @@ def sample(self, tokenizer: PreTrainedTokenizerBase, for processing the dataset's text. num_requests (int): The number of sample requests to generate. request_id_prefix (str) The prefix of request_id. - + Returns: list[SampleRequest]: A list of sample requests generated from the @@ -527,7 +515,7 @@ def get_sampling_params( size=num_requests) output_lens = self._rng.integers(output_low, output_high + 1, size=num_requests) - offsets = self._rng.integers(0, tokenizer.vocab_size, + offsets = self._rng.integers(0, tokenizer.vocab_size, size=num_requests) return input_lens, output_lens, offsets @@ -555,7 +543,7 @@ def generate_token_sequence( the encoded sequence is truncated before being decoded again. """ # Build the inner sequence by sampling sequentially from the vocab - inner_seq = ((offset + index + np.arange(input_len)) + inner_seq = ((offset + index + np.arange(input_len)) % vocab_size).tolist() token_sequence = prefix_token_ids + inner_seq @@ -590,9 +578,9 @@ class RandomMultiModalDataset(RandomDataset): `num_mm_items_range_ratio` in [0, 1]. r=0 keeps it fixed; r=1 allows 0. The maximum is further clamped to the sum of per-modality limits. 2) Each item’s modality and shape is sampled from `bucket_config`, a dict - mapping (height, width, num_frames) → probability. We treat - `num_frames`=1 as image and and `num_frames` > 1 as video. - Entries with zero probability are removed and the rest are renormalized + mapping (height, width, num_frames) → probability. We treat + `num_frames`=1 as image and and `num_frames` > 1 as video. + Entries with zero probability are removed and the rest are renormalized to sum to 1. 3) Per-modality hard caps are enforced via `limit_mm_per_prompt`. When a modality reaches its cap, all of its buckets are excluded and the @@ -600,8 +588,8 @@ class RandomMultiModalDataset(RandomDataset): Example bucket configuration: {(256, 256, 1): 0.5, (720, 1280, 1): 0.4, (720, 1280, 16): 0.1} - - Two image buckets (`num_frames`=1) and one video bucket - (`num_frames`=16). + - Two image buckets (`num_frames`=1) and one video bucket + (`num_frames`=16). OBS.: Only image sampling is supported for now. """ @@ -624,9 +612,9 @@ def __init__(self, **kwargs) -> None: def generate_synthetic_image(self, width: int, height: int) -> Image.Image: """Generate synthetic PIL image with random RGB values. - - NOTE: iid pixel sampling results in worst-case compression - (good for stressing I/O), but very unlike real photos. + + NOTE: iid pixel sampling results in worst-case compression + (good for stressing I/O), but very unlike real photos. We could consider a “low-freq” mode (e.g., noise blur) to emulate network realism instead of max stress. """ @@ -638,11 +626,11 @@ def generate_synthetic_image(self, width: int, height: int) -> Image.Image: ) return Image.fromarray(random_pixels) - def generate_synthetic_video(self, width: int, - height: int, + def generate_synthetic_video(self, width: int, + height: int, num_frames: int) -> Any: """Generate synthetic video with random values. - + TODO: Finish this method. """ raise NotImplementedError("Video sampling is WIP.") @@ -656,7 +644,7 @@ def map_config_to_modality(self, config: tuple[int, int, int]) -> str: else: raise ValueError(f"Invalid multimodal item configuration: {config}") - def normalize_bucket_config(self, bucket_config: dict[tuple[int, int, int], + def normalize_bucket_config(self, bucket_config: dict[tuple[int, int, int], float]) -> dict[tuple[int, int, int], float]: """ Remove zero probability entries @@ -676,24 +664,24 @@ def normalize_bucket_config(self, bucket_config: dict[tuple[int, int, int], return {k: v / total for k, v in bucket_config.items()} - def generate_mm_item(self, + def generate_mm_item(self, mm_item_config: tuple[int, int, int], ) -> Mapping[str, Any]: """ - Create synthetic images and videos and + Create synthetic images and videos and apply process_image/process_video respectively. This follows the OpenAI API chat completions https://github.com/openai/openai-python """ - + if self.map_config_to_modality(mm_item_config) == "image": return process_image(self.generate_synthetic_image( mm_item_config[1], mm_item_config[0])) elif self.map_config_to_modality(mm_item_config) == "video": return process_video(self.generate_synthetic_video( - mm_item_config[1], - mm_item_config[0], + mm_item_config[1], + mm_item_config[0], mm_item_config[2])) else: raise ValueError(f"Invalid multimodal item configuration: " @@ -723,17 +711,17 @@ def get_mm_item_sampling_params( f"limit_mm_per_prompt: " f"{limit_mm_per_prompt.keys()}") - # Remove zero probability entries + # Remove zero probability entries # and normalize bucket config to sum to 1 bucket_config = self.normalize_bucket_config(bucket_config) logger.info( "Normalized bucket config: %s", bucket_config, ) # Only consider limit per prompt for modalities in bucket config - allowed_modalities = {self.map_config_to_modality(cfg) + allowed_modalities = {self.map_config_to_modality(cfg) for cfg in bucket_config} limit_mm_per_prompt = { - k: v for k, v in limit_mm_per_prompt.items() + k: v for k, v in limit_mm_per_prompt.items() if k in allowed_modalities} if not limit_mm_per_prompt: raise ValueError("No valid limits for modalities present in " @@ -746,19 +734,19 @@ def get_mm_item_sampling_params( # Get max and min num mm items and ensure # it is at most the sum of limit_mm_per_prompt for all modalities max_num_mm_items = min( - sum(limit_mm_per_prompt.values()), + sum(limit_mm_per_prompt.values()), math.ceil(base_items_per_request * (1 + num_mm_items_range_ratio)) ) # Ensure min num mm items is at least 0 min_num_mm_items = max( - 0, + 0, math.floor(base_items_per_request * (1 - num_mm_items_range_ratio)) ) # Raise error if min num mm items is greater than max num mm items if min_num_mm_items > max_num_mm_items: raise ValueError(f"Min num mm items is greater than max mm items: " f"{min_num_mm_items} > {max_num_mm_items}") - + logger.info( "Sampling number of multimodal items from [%s, %s]", min_num_mm_items, max_num_mm_items, @@ -783,8 +771,8 @@ def get_mm_item_iterator( whose size is between min_num_mm_items and max_num_mm_items. Loop over the bucket config and sample a multimodal item. - Loop until the number of multimodal items sampled is equal to - request_num_mm_items or limit of multimodal items per prompt + Loop until the number of multimodal items sampled is equal to + request_num_mm_items or limit of multimodal items per prompt for all modalities is reached. Note: @@ -796,19 +784,19 @@ def get_mm_item_iterator( # Get the number of multimodal items to sample request_num_mm_items = int( self._rng.integers(min_num_mm_items, max_num_mm_items + 1) - ) + ) # If request_num_mm_items is 0, yield an empty iterator if request_num_mm_items == 0: return # Initialize modality counters - modality_counter = {self.map_config_to_modality(k): 0 + modality_counter = {self.map_config_to_modality(k): 0 for k in bucket_config} # Copy the bucket config to avoid modifying the original bucket_config_copy = bucket_config.copy() # Loop over the number of multimodal items to sample while sum(modality_counter.values()) < request_num_mm_items: # Sample a multimodal item config - mm_item_config = self._rng.choice(list(bucket_config_copy.keys()), + mm_item_config = self._rng.choice(list(bucket_config_copy.keys()), p=list(bucket_config_copy.values())) modality = self.map_config_to_modality(mm_item_config) # Check that modality count is less than limit per prompt @@ -849,7 +837,7 @@ def sample( limit_mm_per_prompt: dict[str, int] = DEFAULT_LIMIT_MM_PER_PROMPT, base_items_per_request: int = DEFAULT_BASE_ITEMS_PER_REQUEST, num_mm_items_range_ratio: float = DEFAULT_NUM_MM_ITEMS_RANGE_RATIO, - bucket_config: dict[tuple[int, int, int], float] = + bucket_config: dict[tuple[int, int, int], float] = DEFAULT_MM_ITEM_BUCKET_CONFIG, enable_multimodal_chat: bool = DEFAULT_ENABLE_MULTIMODAL_CHAT, **kwargs, @@ -857,7 +845,7 @@ def sample( # NOTE: Video sampling is WIP. Raise error if video is in bucket config # and probability is non-zero. - if any(self.map_config_to_modality(cfg) == "video" and p > 0 + if any(self.map_config_to_modality(cfg) == "video" and p > 0 for cfg, p in bucket_config.items()): raise NotImplementedError("Video sampling not implemented; " "set its probability to 0.") @@ -908,7 +896,7 @@ def sample( ]) if enable_multimodal_chat: - # NOTE: For now this option is only provided for completeness + # NOTE: For now this option is only provided for completeness # given that the serve.py benchmark currently does not use it. mm_chat_prompt: Any = prompt mm_chat_prompt = self.apply_multimodal_chat_transformation( @@ -982,8 +970,8 @@ def sample( entry["conversations"][1]["value"], ) - lora_request, tokenizer = self.get_random_lora_request( - tokenizer=tokenizer, max_loras=max_loras, lora_path=lora_path) + lora_request = self.get_random_lora_request( + max_loras=max_loras, lora_path=lora_path) prompt_ids = tokenizer(prompt).input_ids completion_ids = tokenizer(completion).input_ids prompt_len = len(prompt_ids) @@ -994,11 +982,11 @@ def sample( skip_min_output_len_check=output_len is not None): continue - if image_path := entry.get("image"): - mm_content = process_image(image_path) - elif video_path := entry.get("video"): + if image_path := entry.get("image"): + mm_content = process_image(image_path) + elif video_path := entry.get("video"): mm_content = process_video(video_path) - else: + else: mm_content = None if enable_multimodal_chat: prompt = self.apply_multimodal_chat_transformation( @@ -1013,9 +1001,9 @@ def sample( request_id=request_id_prefix + str(ind), )) ind += 1 - self.maybe_oversample_requests(samples, - num_requests, - request_id_prefix, + self.maybe_oversample_requests(samples, + num_requests, + request_id_prefix, no_oversample) return samples @@ -1024,11 +1012,11 @@ class _ValidateDatasetArgs(argparse.Action): """Argparse action to validate dataset name and path compatibility.""" def __call__(self, parser, namespace, values, option_string=None): setattr(namespace, self.dest, values) - + # Get current values of both dataset_name and dataset_path dataset_name = getattr(namespace, 'dataset_name', 'random') dataset_path = getattr(namespace, 'dataset_path', None) - + # Validate the combination if dataset_name == "random" and dataset_path is not None: parser.error( @@ -1053,7 +1041,7 @@ def add_dataset_parser(parser: FlexibleArgumentParser): default="random", action=_ValidateDatasetArgs, choices=[ - "sharegpt", "burstgpt", "sonnet", "random", "random-mm", "hf", + "sharegpt", "burstgpt", "sonnet", "random", "random-mm", "hf", "custom", "prefix_repetition", "spec_bench" ], help="Name of the dataset to benchmark on.", @@ -1502,7 +1490,7 @@ def get_samples(args, tokenizer) -> list[SampleRequest]: # For datasets that follow a similar structure, use a mapping. dataset_mapping = { "spec_bench": - lambda: SpecBench(dataset_path=args.dataset_path, + lambda: SpecBench(dataset_path=args.dataset_path, category=args.spec_bench_category).sample( num_requests=args.num_prompts, tokenizer=tokenizer, @@ -1660,7 +1648,7 @@ def sample( logger.info("num_requests is set to 0 or negative, " "so using all available samples: %d", num_requests) - + sampled_requests = [] for i, item in enumerate(self.data): if len(sampled_requests) >= num_requests: @@ -1686,7 +1674,7 @@ def sample( expected_output_len=output_len, request_id=request_id_prefix + str(i), )) - self.maybe_oversample_requests(sampled_requests, num_requests, + self.maybe_oversample_requests(sampled_requests, num_requests, request_id_prefix, no_oversample) return sampled_requests @@ -1700,7 +1688,7 @@ def sample( class SpecBench(CustomDataset): """ Implements the SpecBench dataset: https://github.com/hemingkx/Spec-Bench - Download the dataset using: + Download the dataset using: wget https://raw.githubusercontent.com/hemingkx/Spec-Bench/refs/heads/main/data/spec_bench/question.jsonl """ # noqa: E501 @@ -1736,8 +1724,8 @@ def sample(self, **kwargs) -> list: # leverage CustomDataset sample kwargs["skip_chat_template"] = False return super().sample(**kwargs) - - + + # ----------------------------------------------------------------------------- # Sonnet Dataset Implementation # ----------------------------------------------------------------------------- @@ -1882,8 +1870,8 @@ def sample( for i in range(num_requests): input_len = int(data[i][2]) output_len = int(data[i][3]) - lora_req, tokenizer = self.get_random_lora_request( - tokenizer=tokenizer, max_loras=max_loras, lora_path=lora_path) + lora_req = self.get_random_lora_request( + max_loras=max_loras, lora_path=lora_path) vocab_size = tokenizer.vocab_size # Generate a synthetic prompt: a list of token IDs computed as (i + # j) modulo vocab_size. @@ -1995,7 +1983,7 @@ def sample(self, request_id=request_id_prefix + str(ind), )) ind += 1 - self.maybe_oversample_requests(sampled_requests, num_requests, + self.maybe_oversample_requests(sampled_requests, num_requests, request_id_prefix, no_oversample) return sampled_requests @@ -2055,7 +2043,7 @@ def sample( multi_modal_data=mm_content, request_id=request_id_prefix + str(i), )) - self.maybe_oversample_requests(sampled_requests, num_requests, + self.maybe_oversample_requests(sampled_requests, num_requests, request_id_prefix, no_oversample) return sampled_requests @@ -2172,7 +2160,7 @@ def sample(self, expected_output_len=output_len, request_id=request_id_prefix + str(i), )) - self.maybe_oversample_requests(sampled_requests, num_requests, + self.maybe_oversample_requests(sampled_requests, num_requests, request_id_prefix, no_oversample) return sampled_requests @@ -2234,7 +2222,7 @@ def sample( expected_output_len=output_len, request_id=request_id_prefix + str(i), )) - self.maybe_oversample_requests(sampled_requests, num_requests, + self.maybe_oversample_requests(sampled_requests, num_requests, request_id_prefix, no_oversample) return sampled_requests @@ -2288,8 +2276,8 @@ def sample( # compare the levenshtein distance normalized by code length if norm_distance < min_distance or norm_distance > max_distance: continue - - # template copied from + + # template copied from # https://github.com/ise-uiuc/blazedit/blob/7765137e656fd62de877422d2e4cf8de51228054/dataset/create_refined_dataset.py#L94-L105 # noqa: E501 instruction = f"""Given a code file, please apply the change requests and generate the new file. @@ -2322,9 +2310,9 @@ def sample( expected_output_len=output_len, request_id=request_id_prefix + str(i), )) - self.maybe_oversample_requests(sampled_requests, num_requests, + self.maybe_oversample_requests(sampled_requests, num_requests, request_id_prefix, no_oversample) - + return sampled_requests @@ -2376,7 +2364,6 @@ def sample(self, expected_output_len=output_len, multi_modal_data=None, request_id=request_id_prefix + str(ind), - )) ind += 1 self.maybe_oversample_requests(sampled_requests, num_requests, @@ -2470,9 +2457,9 @@ def sample(self, tokenizer: PreTrainedTokenizerBase, num_requests: int, )) if len(samples) >= num_requests: break - self.maybe_oversample_requests(samples, - num_requests, - request_id_prefix, + self.maybe_oversample_requests(samples, + num_requests, + request_id_prefix, no_oversample) return samples @@ -2562,7 +2549,7 @@ def sample( " what Whisper supports.", skipped, ) - self.maybe_oversample_requests(sampled_requests, num_requests, + self.maybe_oversample_requests(sampled_requests, num_requests, request_id_prefix, no_oversample) return sampled_requests @@ -2647,7 +2634,7 @@ def sample( ) ind += 1 - self.maybe_oversample_requests(sampled_requests, num_requests, + self.maybe_oversample_requests(sampled_requests, num_requests, request_id_prefix, no_oversample) return sampled_requests @@ -2658,7 +2645,7 @@ def sample( class PrefixRepetitionRandomDataset(BenchmarkDataset): - # Default values copied from benchmark_serving.py for the repeated prefix + # Default values copied from benchmark_serving.py for the repeated prefix # dataset. DEFAULT_PREFIX_LEN = 256 DEFAULT_SUFFIX_LEN = 256 diff --git a/vllm/engine/async_llm_engine.py b/vllm/engine/async_llm_engine.py index c53ece18964c..1ae82c9f6f6f 100644 --- a/vllm/engine/async_llm_engine.py +++ b/vllm/engine/async_llm_engine.py @@ -390,11 +390,8 @@ async def stop_remote_worker_execution_loop_async(self) -> None: """Stop the remote worker execution loop.""" await self.model_executor.stop_remote_worker_execution_loop_async() - async def get_tokenizer_async(self, - lora_request: Optional[LoRARequest] = None - ) -> AnyTokenizer: - return await ( - self.get_tokenizer_group().get_lora_tokenizer_async(lora_request)) + async def get_tokenizer_async(self) -> AnyTokenizer: + return self.get_tokenizer() async def add_request_async( self, @@ -435,7 +432,6 @@ async def add_request_async( processed_inputs = await self.input_preprocessor.preprocess_async( prompt, - lora_request=lora_request, tokenization_kwargs=tokenization_kwargs, ) @@ -614,11 +610,8 @@ def _error_callback(self, exc: Exception) -> None: async def get_input_preprocessor(self) -> InputPreprocessor: return self.engine.input_preprocessor - async def get_tokenizer( - self, - lora_request: Optional[LoRARequest] = None, - ) -> AnyTokenizer: - return await self.engine.get_tokenizer_async(lora_request) + async def get_tokenizer(self) -> AnyTokenizer: + return self.engine.get_tokenizer() def start_background_loop(self) -> None: """Start the background loop.""" diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py index 0fdd651425b9..c35bd20371d0 100644 --- a/vllm/engine/llm_engine.py +++ b/vllm/engine/llm_engine.py @@ -49,9 +49,8 @@ from vllm.tracing import (SpanAttributes, SpanKind, extract_trace_context, init_tracer) from vllm.transformers_utils.detokenizer import Detokenizer -from vllm.transformers_utils.tokenizer import AnyTokenizer -from vllm.transformers_utils.tokenizer_group import ( - TokenizerGroup, init_tokenizer_from_configs) +from vllm.transformers_utils.tokenizer import (AnyTokenizer, + init_tokenizer_from_configs) from vllm.usage.usage_lib import (UsageContext, is_usage_stats_enabled, usage_message) from vllm.utils import Counter, Device, resolve_obj_by_qualname, weak_bind @@ -186,7 +185,7 @@ def validate_outputs( return outputs_ - tokenizer: Optional[TokenizerGroup] + tokenizer: Optional[AnyTokenizer] def __init__( self, @@ -233,18 +232,9 @@ def __init__( if self.model_config.skip_tokenizer_init: self.tokenizer = None self.detokenizer = None - tokenizer_group = None else: self.tokenizer = self._init_tokenizer() self.detokenizer = Detokenizer(self.tokenizer) - tokenizer_group = self.get_tokenizer_group() - - # Ensure that the function doesn't contain a reference to self, - # to avoid engine GC issues - def get_tokenizer_for_seq(sequence: Sequence) -> AnyTokenizer: - assert tokenizer_group, ("tokenizer_group cannot be None, " - "make sure skip_tokenizer_init is False") - return tokenizer_group.get_lora_tokenizer(sequence.lora_request) self.seq_counter = Counter() self.generation_config_fields = ( @@ -389,10 +379,8 @@ def get_tokenizer_for_seq(sequence: Sequence) -> AnyTokenizer: self.detokenizer, self.scheduler, self.seq_counter, - get_tokenizer_for_seq, stop_checker=StopChecker( self.scheduler_config.max_model_len, - get_tokenizer_for_seq, self.reasoner if self.decoding_config.reasoning_backend and self.tokenizer else None, ), @@ -521,24 +509,15 @@ def __del__(self): if model_executor := getattr(self, "model_executor", None): model_executor.shutdown() - def get_tokenizer_group(self) -> TokenizerGroup: + def get_tokenizer(self) -> AnyTokenizer: if self.tokenizer is None: raise ValueError("Unable to get tokenizer because " "skip_tokenizer_init is True") return self.tokenizer - def get_tokenizer( - self, - lora_request: Optional[LoRARequest] = None, - ) -> AnyTokenizer: - return self.get_tokenizer_group().get_lora_tokenizer(lora_request) - - def _init_tokenizer(self) -> TokenizerGroup: - return init_tokenizer_from_configs( - model_config=self.model_config, - scheduler_config=self.scheduler_config, - lora_config=self.lora_config) + def _init_tokenizer(self) -> AnyTokenizer: + return init_tokenizer_from_configs(model_config=self.model_config) def _verify_args(self) -> None: self.model_config.verify_with_parallel_config(self.parallel_config) @@ -574,11 +553,11 @@ def _add_processed_request( ) return None - self._validate_model_inputs(processed_inputs, lora_request) + self._validate_model_inputs(processed_inputs) # Create the sequences. block_size = self.cache_config.block_size seq_id = next(self.seq_counter) - eos_token_id = self.input_preprocessor.get_eos_token_id(lora_request) + eos_token_id = self.input_preprocessor.get_eos_token_id() encoder_inputs, decoder_inputs = split_enc_dec_inputs(processed_inputs) @@ -700,7 +679,6 @@ def add_request( processed_inputs = self.input_preprocessor.preprocess( prompt, tokenization_kwargs=tokenization_kwargs, - lora_request=lora_request, ) self._add_processed_request( @@ -1739,29 +1717,22 @@ def create_trace_span(self, seq_group: SequenceGroup) -> None: SpanAttributes.GEN_AI_LATENCY_TIME_IN_MODEL_EXECUTE, metrics.model_execute_time) - def _validate_model_inputs(self, inputs: ProcessorInputs, - lora_request: Optional[LoRARequest]): + def _validate_model_inputs(self, inputs: ProcessorInputs): encoder_inputs, decoder_inputs = split_enc_dec_inputs(inputs) if encoder_inputs is not None: - self._validate_model_input(encoder_inputs, - lora_request, - prompt_type="encoder") + self._validate_model_input(encoder_inputs, prompt_type="encoder") - self._validate_model_input(decoder_inputs, - lora_request, - prompt_type="decoder") + self._validate_model_input(decoder_inputs, prompt_type="decoder") def _validate_model_input( self, prompt_inputs: SingletonInputs, - lora_request: Optional[LoRARequest], *, prompt_type: Literal["encoder", "decoder"], ): model_config = self.model_config - tokenizer = (None if self.tokenizer is None else - self.tokenizer.get_lora_tokenizer(lora_request)) + tokenizer = self.tokenizer prompt_ids = prompt_inputs.get("prompt_token_ids", []) if not prompt_ids: @@ -1822,7 +1793,7 @@ def _build_logits_processors( logits_processors = [] if (sampling_params.logit_bias or sampling_params.allowed_token_ids): - tokenizer = self.get_tokenizer(lora_request=lora_request) + tokenizer = self.get_tokenizer() processors = get_openai_logits_processors( logit_bias=sampling_params.logit_bias, @@ -1835,7 +1806,7 @@ def _build_logits_processors( sampling_params.allowed_token_ids = None if len(sampling_params.bad_words) > 0: - tokenizer = self.get_tokenizer(lora_request) + tokenizer = self.get_tokenizer() processors = get_bad_words_logits_processors( bad_words=sampling_params.bad_words, tokenizer=tokenizer) logits_processors.extend(processors) diff --git a/vllm/engine/output_processor/interfaces.py b/vllm/engine/output_processor/interfaces.py index 4d75719c1719..587a9221e32c 100644 --- a/vllm/engine/output_processor/interfaces.py +++ b/vllm/engine/output_processor/interfaces.py @@ -2,14 +2,13 @@ # SPDX-FileCopyrightText: Copyright contributors to the vLLM project from abc import ABC, abstractmethod -from typing import Callable, List +from typing import List from vllm.config import SchedulerConfig from vllm.core.scheduler import Scheduler from vllm.engine.output_processor.stop_checker import StopChecker -from vllm.sequence import Sequence, SequenceGroup, SequenceGroupOutput +from vllm.sequence import SequenceGroup, SequenceGroupOutput from vllm.transformers_utils.detokenizer import Detokenizer -from vllm.transformers_utils.tokenizer import AnyTokenizer from vllm.utils import Counter @@ -31,7 +30,6 @@ def create_output_processor( detokenizer: Detokenizer, scheduler: List[Scheduler], seq_counter: Counter, - get_tokenizer_for_seq: Callable[[Sequence], AnyTokenizer], stop_checker: "StopChecker", ): """Create an output processor. diff --git a/vllm/engine/output_processor/stop_checker.py b/vllm/engine/output_processor/stop_checker.py index 68a63044df05..0916f1c918c8 100644 --- a/vllm/engine/output_processor/stop_checker.py +++ b/vllm/engine/output_processor/stop_checker.py @@ -1,13 +1,12 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -from typing import Callable, List, Optional, Tuple +from typing import List, Optional, Tuple from vllm.lora.request import LoRARequest from vllm.reasoning import ReasoningParser from vllm.sampling_params import SamplingParams from vllm.sequence import Sequence, SequenceStatus -from vllm.transformers_utils.tokenizer import AnyTokenizer class StopChecker: @@ -20,12 +19,10 @@ class StopChecker: def __init__( self, max_model_len: int, - get_tokenizer_for_seq: Callable[[Sequence], AnyTokenizer], reasoner: Optional[ReasoningParser] = None, ): # Do not use it directly, but use `self._get_max_model_len`. self._max_model_len = max_model_len - self.get_tokenizer_for_seq = get_tokenizer_for_seq self.reasoner = reasoner def _get_max_model_len(self, lora_req: Optional[LoRARequest]): diff --git a/vllm/engine/protocol.py b/vllm/engine/protocol.py index 94eacfbdfb30..808d2d0ce3d2 100644 --- a/vllm/engine/protocol.py +++ b/vllm/engine/protocol.py @@ -76,8 +76,7 @@ async def beam_search( include_stop_str_in_output = params.include_stop_str_in_output preprocessor = await self.get_input_preprocessor() - tokenizer_group = preprocessor.get_tokenizer_group() - tokenizer = await tokenizer_group.get_lora_tokenizer_async() + tokenizer = preprocessor.get_tokenizer() eos_token_id = tokenizer.eos_token_id if is_explicit_encoder_decoder_prompt(prompt): @@ -260,11 +259,8 @@ async def get_input_preprocessor(self) -> InputPreprocessor: ... @abstractmethod - async def get_tokenizer( - self, - lora_request: Optional[LoRARequest] = None, - ) -> AnyTokenizer: - """Get the appropriate tokenizer for the request""" + async def get_tokenizer(self) -> AnyTokenizer: + """Get the tokenizer""" ... async def get_io_processor(self) -> IOProcessor: diff --git a/vllm/entrypoints/llm.py b/vllm/entrypoints/llm.py index 4b51dbcd8acb..f2264292fa66 100644 --- a/vllm/entrypoints/llm.py +++ b/vllm/entrypoints/llm.py @@ -301,23 +301,17 @@ def __init__( self.io_processor = get_io_processor(self.llm_engine.vllm_config, io_processor_plugin) - def get_tokenizer( - self, - lora_request: Optional[LoRARequest] = None, - ) -> AnyTokenizer: - return self.llm_engine.get_tokenizer_group().get_lora_tokenizer( - lora_request) + def get_tokenizer(self) -> AnyTokenizer: + return self.llm_engine.get_tokenizer() def set_tokenizer(self, tokenizer: AnyTokenizer) -> None: - tokenizer_group = self.llm_engine.get_tokenizer_group() - # While CachedTokenizer is dynamic, have no choice but # compare class name. Misjudgment will arise from # user-defined tokenizer started with 'Cached' if tokenizer.__class__.__name__.startswith("Cached"): - tokenizer_group.tokenizer = tokenizer + self.llm_engine.tokenizer = tokenizer else: - tokenizer_group.tokenizer = get_cached_tokenizer(tokenizer) + self.llm_engine.tokenizer = get_cached_tokenizer(tokenizer) def get_default_sampling_params(self) -> SamplingParams: if self.default_sampling_params is None: @@ -707,7 +701,6 @@ def preprocess_chat( self, messages: Union[list[ChatCompletionMessageParam], list[list[ChatCompletionMessageParam]]], - lora_request: Optional[LoRARequest] = None, chat_template: Optional[str] = None, chat_template_content_format: ChatTemplateContentFormatOption = "auto", add_generation_prompt: bool = True, @@ -739,7 +732,7 @@ def preprocess_chat( cast(list[ChatCompletionMessageParam], messages) ] - tokenizer = self.get_tokenizer(lora_request) + tokenizer = self.get_tokenizer() model_config = self.llm_engine.get_model_config() resolved_content_format = resolve_chat_template_content_format( chat_template, @@ -872,7 +865,6 @@ def chat( prompts = self.preprocess_chat( messages=messages, - lora_request=lora_request, chat_template=chat_template, chat_template_content_format=chat_template_content_format, add_generation_prompt=add_generation_prompt, @@ -1519,7 +1511,7 @@ def _validate_mm_data_and_uuids( ): """ Validate that if any multi-modal data is skipped (i.e. None), - then its corresponding UUID must be set. + then its corresponding UUID must be set. """ if multi_modal_data is None: return diff --git a/vllm/entrypoints/openai/serving_chat.py b/vllm/entrypoints/openai/serving_chat.py index 6c9c1ae85f57..61d65bd8f119 100644 --- a/vllm/entrypoints/openai/serving_chat.py +++ b/vllm/entrypoints/openai/serving_chat.py @@ -188,7 +188,7 @@ async def create_chat_completion( model_name = self.models.model_name(lora_request) - tokenizer = await self.engine_client.get_tokenizer(lora_request) + tokenizer = await self.engine_client.get_tokenizer() tool_parser = self.tool_parser diff --git a/vllm/entrypoints/openai/serving_classification.py b/vllm/entrypoints/openai/serving_classification.py index 7e88424c169c..fc56668aeb1b 100644 --- a/vllm/entrypoints/openai/serving_classification.py +++ b/vllm/entrypoints/openai/serving_classification.py @@ -50,10 +50,7 @@ async def _preprocess( return None try: - ctx.lora_request = self._maybe_get_adapters(ctx.request) - - ctx.tokenizer = await self.engine_client.get_tokenizer( - ctx.lora_request) + ctx.tokenizer = await self.engine_client.get_tokenizer() renderer = self._get_renderer(ctx.tokenizer) ctx.engine_prompts = await renderer.render_prompt( diff --git a/vllm/entrypoints/openai/serving_completion.py b/vllm/entrypoints/openai/serving_completion.py index c2de449a9699..044f08f32b0d 100644 --- a/vllm/entrypoints/openai/serving_completion.py +++ b/vllm/entrypoints/openai/serving_completion.py @@ -127,8 +127,7 @@ async def create_completion( if self.model_config.skip_tokenizer_init: tokenizer = None else: - tokenizer = await self.engine_client.get_tokenizer(lora_request - ) + tokenizer = await self.engine_client.get_tokenizer() renderer = self._get_renderer(tokenizer) engine_prompts = await renderer.render_prompt_and_embeds( diff --git a/vllm/entrypoints/openai/serving_embedding.py b/vllm/entrypoints/openai/serving_embedding.py index c0d1fe4b6e16..647e7daed659 100644 --- a/vllm/entrypoints/openai/serving_embedding.py +++ b/vllm/entrypoints/openai/serving_embedding.py @@ -76,8 +76,7 @@ async def _preprocess( try: ctx.lora_request = self._maybe_get_adapters(ctx.request) - tokenizer = await self.engine_client.get_tokenizer(ctx.lora_request - ) + tokenizer = await self.engine_client.get_tokenizer() renderer = self._get_renderer(tokenizer) if isinstance(ctx.request, EmbeddingChatRequest): @@ -394,8 +393,8 @@ async def _collect_batch( ) -> Optional[ErrorResponse]: """Collect and aggregate batch results with support for chunked processing. - - For chunked requests, performs online aggregation to + + For chunked requests, performs online aggregation to minimize memory usage. For regular requests, collects results normally. """ diff --git a/vllm/entrypoints/openai/serving_pooling.py b/vllm/entrypoints/openai/serving_pooling.py index cac1d1ba5683..0750c7ec3e9f 100644 --- a/vllm/entrypoints/openai/serving_pooling.py +++ b/vllm/entrypoints/openai/serving_pooling.py @@ -103,8 +103,7 @@ async def create_pooling( if self.model_config.skip_tokenizer_init: tokenizer = None else: - tokenizer = await self.engine_client.get_tokenizer(lora_request - ) + tokenizer = await self.engine_client.get_tokenizer() renderer = self._get_renderer(tokenizer) if getattr(request, "dimensions", None) is not None: diff --git a/vllm/entrypoints/openai/serving_responses.py b/vllm/entrypoints/openai/serving_responses.py index 7be5e54208bd..b81b2c7223ef 100644 --- a/vllm/entrypoints/openai/serving_responses.py +++ b/vllm/entrypoints/openai/serving_responses.py @@ -240,7 +240,7 @@ async def create_responses( try: lora_request = self._maybe_get_adapters(request) model_name = self.models.model_name(lora_request) - tokenizer = await self.engine_client.get_tokenizer(lora_request) + tokenizer = await self.engine_client.get_tokenizer() if self.use_harmony: messages, request_prompts, engine_prompts = ( diff --git a/vllm/entrypoints/openai/serving_score.py b/vllm/entrypoints/openai/serving_score.py index 24767ed66fc6..623b1c863f77 100644 --- a/vllm/entrypoints/openai/serving_score.py +++ b/vllm/entrypoints/openai/serving_score.py @@ -269,7 +269,7 @@ async def _run_scoring( ) -> Union[list[PoolingRequestOutput], ErrorResponse]: lora_request = self._maybe_get_adapters(request) - tokenizer = await self.engine_client.get_tokenizer(lora_request) + tokenizer = await self.engine_client.get_tokenizer() truncate_prompt_tokens = getattr(request, "truncate_prompt_tokens", None) diff --git a/vllm/entrypoints/openai/serving_tokenization.py b/vllm/entrypoints/openai/serving_tokenization.py index 1efd9678571c..3918d08ebf81 100644 --- a/vllm/entrypoints/openai/serving_tokenization.py +++ b/vllm/entrypoints/openai/serving_tokenization.py @@ -65,7 +65,7 @@ async def create_tokenize( try: lora_request = self._maybe_get_adapters(request) - tokenizer = await self.engine_client.get_tokenizer(lora_request) + tokenizer = await self.engine_client.get_tokenizer() renderer = self._get_renderer(tokenizer) if isinstance(request, TokenizeChatRequest): @@ -130,7 +130,7 @@ async def create_detokenize( lora_request = self._maybe_get_adapters(request) - tokenizer = await self.engine_client.get_tokenizer(lora_request) + tokenizer = await self.engine_client.get_tokenizer() self._log_inputs(request_id, request.tokens, diff --git a/vllm/inputs/preprocess.py b/vllm/inputs/preprocess.py index 22287aa6f41e..cb3a5cdb840e 100644 --- a/vllm/inputs/preprocess.py +++ b/vllm/inputs/preprocess.py @@ -9,13 +9,11 @@ from vllm.config import ModelConfig from vllm.logger import init_logger -from vllm.lora.request import LoRARequest from vllm.multimodal import MULTIMODAL_REGISTRY, MultiModalRegistry from vllm.multimodal.cache import BaseMultiModalProcessorCache from vllm.multimodal.inputs import (MultiModalDataDict, MultiModalEncDecInputs, MultiModalInputs, MultiModalUUIDDict) from vllm.transformers_utils.tokenizer import AnyTokenizer -from vllm.transformers_utils.tokenizer_group import TokenizerGroup from .data import (DecoderOnlyInputs, EmbedsInputs, EmbedsPrompt, EncoderDecoderInputs, ProcessorInputs, PromptType, @@ -31,7 +29,7 @@ class InputPreprocessor: def __init__( self, model_config: ModelConfig, - tokenizer: Optional[TokenizerGroup], + tokenizer: Optional[AnyTokenizer], mm_registry: MultiModalRegistry = MULTIMODAL_REGISTRY, mm_processor_cache: Optional[BaseMultiModalProcessorCache] = None, ) -> None: @@ -42,32 +40,28 @@ def __init__( self.mm_registry = mm_registry self.mm_processor_cache = mm_processor_cache - def get_tokenizer_group(self) -> TokenizerGroup: + def get_tokenizer(self) -> AnyTokenizer: if self.tokenizer is None: raise ValueError("You cannot pass text prompts when " "`skip_tokenizer_init` is True") return self.tokenizer - def get_bos_token_id(self, - lora_request: Optional[LoRARequest] = None - ) -> Optional[int]: + def get_bos_token_id(self) -> Optional[int]: if self.tokenizer is None: logger.warning("Using None for BOS token id because tokenizer " "is not initialized") return None - return self.tokenizer.get_lora_tokenizer(lora_request).bos_token_id + return self.tokenizer.bos_token_id - def get_eos_token_id(self, - lora_request: Optional[LoRARequest] = None - ) -> Optional[int]: + def get_eos_token_id(self) -> Optional[int]: if self.tokenizer is None: logger.warning("Using None for EOS token id because tokenizer " "is not initialized") return None - return self.tokenizer.get_lora_tokenizer(lora_request).eos_token_id + return self.tokenizer.eos_token_id def get_decoder_start_token_id(self) -> Optional[int]: """ @@ -190,14 +184,13 @@ def _get_tokenization_kw( def _tokenize_prompt( self, prompt: str, - lora_request: Optional[LoRARequest], tokenization_kwargs: Optional[dict[str, Any]] = None, ) -> list[int]: """ Apply the model's tokenizer to a text prompt, returning the corresponding token IDs. """ - tokenizer = self.get_tokenizer_group() + tokenizer = self.get_tokenizer() tokenization_kwargs = self._get_tokenization_kw(tokenization_kwargs) encoder_config = self.model_config.encoder_config @@ -205,50 +198,39 @@ def _tokenize_prompt( if encoder_config and encoder_config.get("do_lower_case", False): prompt = prompt.lower() - return tokenizer.encode(prompt=prompt, - lora_request=lora_request, - **tokenization_kwargs) + return tokenizer.encode(prompt, **tokenization_kwargs) async def _tokenize_prompt_async( self, prompt: str, - lora_request: Optional[LoRARequest], tokenization_kwargs: Optional[dict[str, Any]] = None, ) -> list[int]: """ Async version of [`_tokenize_prompt`][vllm.inputs.preprocess.InputPreprocessor._tokenize_prompt]. """ - tokenizer = self.get_tokenizer_group() + tokenizer = self.get_tokenizer() tokenization_kwargs = self._get_tokenization_kw(tokenization_kwargs) - return await tokenizer.encode_async(prompt=prompt, - lora_request=lora_request, - **tokenization_kwargs) + return tokenizer.encode(prompt, **tokenization_kwargs) - def _get_mm_tokenizer( - self, - lora_request: Optional[LoRARequest], - ) -> AnyTokenizer: + def _get_mm_tokenizer(self) -> AnyTokenizer: # PrithviGeoSpatialMAE needs to be initialized without a tokenizer # while using also multi-modal input if not self.tokenizer: return cast(AnyTokenizer, object()) # Dummy - tokenizer_group = self.get_tokenizer_group() - return tokenizer_group.get_lora_tokenizer(lora_request) + tokenizer = self.get_tokenizer() + return tokenizer - async def _get_mm_tokenizer_async( - self, - lora_request: Optional[LoRARequest], - ) -> AnyTokenizer: + async def _get_mm_tokenizer_async(self) -> AnyTokenizer: # PrithviGeoSpatialMAE needs to be initialized without a tokenizer # while using also multi-modal input if not self.tokenizer: return cast(AnyTokenizer, object()) # Dummy - tokenizer_group = self.get_tokenizer_group() - return await tokenizer_group.get_lora_tokenizer_async(lora_request) + tokenizer = self.get_tokenizer() + return tokenizer def _process_multimodal( self, @@ -256,7 +238,6 @@ def _process_multimodal( mm_data: MultiModalDataDict, mm_processor_kwargs: Optional[Mapping[str, object]], tokenization_kwargs: Optional[dict[str, Any]] = None, - lora_request: Optional[LoRARequest] = None, *, mm_uuids: Optional[MultiModalUUIDDict] = None, ) -> MultiModalInputs: @@ -264,7 +245,7 @@ def _process_multimodal( Apply the model's multi-modal processor to a multi-modal prompt, returning the corresponding token IDs and metadata. """ - tokenizer = self._get_mm_tokenizer(lora_request) + tokenizer = self._get_mm_tokenizer() mm_processor = self.mm_registry.create_processor( self.model_config, @@ -299,7 +280,6 @@ async def _process_multimodal_async( mm_data: MultiModalDataDict, mm_processor_kwargs: Optional[Mapping[str, object]], tokenization_kwargs: Optional[dict[str, Any]] = None, - lora_request: Optional[LoRARequest] = None, *, mm_uuids: Optional[MultiModalUUIDDict] = None, ) -> MultiModalInputs: @@ -307,7 +287,7 @@ async def _process_multimodal_async( Async version of [`_process_multimodal`][vllm.inputs.preprocess.InputPreprocessor._process_multimodal]. """ - tokenizer = await self._get_mm_tokenizer_async(lora_request) + tokenizer = await self._get_mm_tokenizer_async() mm_processor = self.mm_registry.create_processor( self.model_config, @@ -386,7 +366,6 @@ def _process_tokens( self, parsed_content: TokensPrompt, tokenization_kwargs: Optional[dict[str, Any]] = None, - lora_request: Optional[LoRARequest] = None, *, mm_uuids: Optional[MultiModalUUIDDict] = None, ) -> Union[TokenInputs, MultiModalInputs]: @@ -400,7 +379,6 @@ def _process_tokens( multi_modal_data, parsed_content.get("mm_processor_kwargs"), tokenization_kwargs=tokenization_kwargs, - lora_request=lora_request, mm_uuids=mm_uuids, ) else: @@ -415,7 +393,6 @@ async def _process_tokens_async( self, parsed_content: TokensPrompt, tokenization_kwargs: Optional[dict[str, Any]] = None, - lora_request: Optional[LoRARequest] = None, *, mm_uuids: Optional[MultiModalUUIDDict] = None, ) -> Union[TokenInputs, MultiModalInputs]: @@ -429,7 +406,6 @@ async def _process_tokens_async( multi_modal_data, parsed_content.get("mm_processor_kwargs"), tokenization_kwargs=tokenization_kwargs, - lora_request=lora_request, mm_uuids=mm_uuids, ) else: @@ -444,7 +420,6 @@ def _process_text( self, parsed_content: TextPrompt, tokenization_kwargs: Optional[dict[str, Any]] = None, - lora_request: Optional[LoRARequest] = None, *, mm_uuids: Optional[MultiModalUUIDDict] = None, ) -> Union[TokenInputs, MultiModalInputs]: @@ -457,13 +432,11 @@ def _process_text( multi_modal_data, parsed_content.get("mm_processor_kwargs"), tokenization_kwargs=tokenization_kwargs, - lora_request=lora_request, mm_uuids=mm_uuids, ) else: prompt_token_ids = self._tokenize_prompt( prompt_text, - lora_request=lora_request, tokenization_kwargs=tokenization_kwargs, ) inputs = token_inputs( @@ -480,7 +453,6 @@ async def _process_text_async( self, parsed_content: TextPrompt, tokenization_kwargs: Optional[dict[str, Any]] = None, - lora_request: Optional[LoRARequest] = None, *, mm_uuids: Optional[MultiModalUUIDDict] = None, ) -> Union[TokenInputs, MultiModalInputs]: @@ -493,13 +465,11 @@ async def _process_text_async( multi_modal_data, parsed_content.get("mm_processor_kwargs"), tokenization_kwargs=tokenization_kwargs, - lora_request=lora_request, mm_uuids=mm_uuids, ) else: prompt_token_ids = await self._tokenize_prompt_async( prompt_text, - lora_request=lora_request, tokenization_kwargs=tokenization_kwargs, ) inputs = token_inputs( @@ -516,7 +486,6 @@ def _prompt_to_llm_inputs( self, prompt: SingletonPrompt, tokenization_kwargs: Optional[dict[str, Any]] = None, - lora_request: Optional[LoRARequest] = None, *, mm_uuids: Optional[MultiModalUUIDDict] = None, ) -> SingletonInputs: @@ -526,7 +495,6 @@ def _prompt_to_llm_inputs( Arguments: * prompt: single encoder or decoder input prompt - * lora_request: this is only valid for decoder prompts Returns: @@ -539,21 +507,18 @@ def _prompt_to_llm_inputs( if parsed["type"] == "tokens": return self._process_tokens( parsed["content"], - lora_request=lora_request, mm_uuids=mm_uuids, ) if parsed["type"] == "text": return self._process_text( parsed["content"], tokenization_kwargs=tokenization_kwargs, - lora_request=lora_request, mm_uuids=mm_uuids, ) if parsed["type"] == "str": return self._process_text( TextPrompt(prompt=parsed["content"]), tokenization_kwargs=tokenization_kwargs, - lora_request=lora_request, mm_uuids=mm_uuids, ) @@ -563,7 +528,6 @@ async def _prompt_to_llm_inputs_async( self, prompt: SingletonPrompt, tokenization_kwargs: Optional[dict[str, Any]] = None, - lora_request: Optional[LoRARequest] = None, *, mm_uuids: Optional[MultiModalUUIDDict] = None, ) -> SingletonInputs: @@ -578,21 +542,18 @@ async def _prompt_to_llm_inputs_async( if parsed["type"] == "tokens": return await self._process_tokens_async( parsed["content"], - lora_request=lora_request, mm_uuids=mm_uuids, ) if parsed["type"] == "text": return await self._process_text_async( parsed["content"], tokenization_kwargs=tokenization_kwargs, - lora_request=lora_request, mm_uuids=mm_uuids, ) if parsed["type"] == "str": return await self._process_text_async( TextPrompt(prompt=parsed["content"]), tokenization_kwargs=tokenization_kwargs, - lora_request=lora_request, mm_uuids=mm_uuids, ) @@ -844,7 +805,6 @@ def _process_decoder_only_prompt( self, prompt: SingletonPrompt, tokenization_kwargs: Optional[dict[str, Any]] = None, - lora_request: Optional[LoRARequest] = None, *, mm_uuids: Optional[MultiModalUUIDDict] = None, ) -> DecoderOnlyInputs: @@ -856,7 +816,6 @@ def _process_decoder_only_prompt( Arguments: * prompt: input prompt - * lora_request Returns: @@ -866,7 +825,6 @@ def _process_decoder_only_prompt( prompt_comps = self._prompt_to_llm_inputs( prompt, tokenization_kwargs=tokenization_kwargs, - lora_request=lora_request, mm_uuids=mm_uuids, ) @@ -876,7 +834,6 @@ async def _process_decoder_only_prompt_async( self, prompt: SingletonPrompt, tokenization_kwargs: Optional[dict[str, Any]] = None, - lora_request: Optional[LoRARequest] = None, *, mm_uuids: Optional[MultiModalUUIDDict] = None, ) -> DecoderOnlyInputs: @@ -887,7 +844,6 @@ async def _process_decoder_only_prompt_async( prompt_comps = await self._prompt_to_llm_inputs_async( prompt, tokenization_kwargs=tokenization_kwargs, - lora_request=lora_request, mm_uuids=mm_uuids, ) @@ -897,7 +853,6 @@ def preprocess( self, prompt: PromptType, tokenization_kwargs: Optional[dict[str, Any]] = None, - lora_request: Optional[LoRARequest] = None, *, mm_uuids: Optional[MultiModalUUIDDict] = None, ) -> ProcessorInputs: @@ -919,7 +874,6 @@ def preprocess( return self._process_decoder_only_prompt( prompt, tokenization_kwargs=tokenization_kwargs, - lora_request=lora_request, mm_uuids=mm_uuids, ) @@ -927,7 +881,6 @@ async def preprocess_async( self, prompt: PromptType, tokenization_kwargs: Optional[dict[str, Any]] = None, - lora_request: Optional[LoRARequest] = None, *, mm_uuids: Optional[MultiModalUUIDDict] = None, ) -> ProcessorInputs: @@ -952,7 +905,6 @@ async def preprocess_async( return await self._process_decoder_only_prompt_async( prompt, tokenization_kwargs=tokenization_kwargs, - lora_request=lora_request, mm_uuids=mm_uuids, ) diff --git a/vllm/transformers_utils/detokenizer.py b/vllm/transformers_utils/detokenizer.py index 56b01ecf78c4..e2d2846a2807 100644 --- a/vllm/transformers_utils/detokenizer.py +++ b/vllm/transformers_utils/detokenizer.py @@ -10,18 +10,13 @@ from .detokenizer_utils import (convert_prompt_ids_to_tokens, detokenize_incrementally) from .tokenizer import AnyTokenizer -from .tokenizer_group import TokenizerGroup class Detokenizer: """Provides methods to decode the output of a model into text.""" - def __init__(self, tokenizer_group: TokenizerGroup): - self.tokenizer_group = tokenizer_group - - def get_tokenizer_for_seq(self, sequence: Sequence) -> AnyTokenizer: - """Returns the HF tokenizer to use for a given sequence.""" - return self.tokenizer_group.get_lora_tokenizer(sequence.lora_request) + def __init__(self, tokenizer: AnyTokenizer): + self.tokenizer = tokenizer def decode_prompt_logprobs_inplace(self, seq_group: SequenceGroup, prompt_logprobs: list[Optional[dict[ @@ -32,9 +27,9 @@ def decode_prompt_logprobs_inplace(self, seq_group: SequenceGroup, Args: seq_group: The sequence group to decode. prompt_logprobs: The logprobs to decode. - position_offset: Offset of the first index of the logprobs + position_offset: Offset of the first index of the logprobs relative to the start of the sequence (for chunked prefill). - + Returns: The prompt logprobs with the decoded tokens. """ @@ -46,7 +41,6 @@ def decode_prompt_logprobs_inplace(self, seq_group: SequenceGroup, # Only prompt, without the generated token. all_token_ids = seq.get_token_ids() prompt_token_ids = all_token_ids[:-1] - tokenizer = self.get_tokenizer_for_seq(seq) prefix_offset = 0 read_offset = 0 next_iter_prefix_offset = 0 @@ -70,7 +64,7 @@ def decode_prompt_logprobs_inplace(self, seq_group: SequenceGroup, prompt_token_ids[:token_position] + [token_id]) (new_tokens, new_text, new_prefix_offset, new_read_offset) = detokenize_incrementally( - tokenizer=tokenizer, + tokenizer=self.tokenizer, all_input_ids=prompt_token_ids_with_token, prev_tokens=prev_tokens, prefix_offset=prefix_offset, @@ -111,7 +105,6 @@ def decode_sequence_inplace(self, seq: Sequence, """ all_input_ids = seq.get_token_ids() token_id_generated_this_iteration = all_input_ids[-1] - tokenizer = self.get_tokenizer_for_seq(seq) # Convert prompt token IDs to tokens if necessary. # Do it here so that we don't have to repeat this @@ -119,14 +112,14 @@ def decode_sequence_inplace(self, seq: Sequence, if seq.tokens is None: (seq.tokens, seq.prefix_offset, seq.read_offset) = convert_prompt_ids_to_tokens( - tokenizer=tokenizer, + tokenizer=self.tokenizer, prompt_ids=all_input_ids[:-1], skip_special_tokens=prms.skip_special_tokens, ) (new_tokens, new_decoded_token_text, prefix_offset, read_offset) = detokenize_incrementally( - tokenizer=tokenizer, + tokenizer=self.tokenizer, all_input_ids=all_input_ids, prev_tokens=seq.tokens, prefix_offset=seq.prefix_offset, @@ -150,7 +143,7 @@ def decode_sequence_inplace(self, seq: Sequence, and token_id != VLLM_INVALID_TOKEN_ID): all_input_ids_with_logprob = previous_tokens + [token_id] (_, new_text, _, _) = detokenize_incrementally( - tokenizer=tokenizer, + tokenizer=self.tokenizer, all_input_ids=all_input_ids_with_logprob, prev_tokens=seq.tokens, prefix_offset=seq.prefix_offset, diff --git a/vllm/transformers_utils/tokenizer.py b/vllm/transformers_utils/tokenizer.py index b3f1977f26cf..9aaac6681739 100644 --- a/vllm/transformers_utils/tokenizer.py +++ b/vllm/transformers_utils/tokenizer.py @@ -12,6 +12,7 @@ import huggingface_hub from transformers import (AutoTokenizer, PreTrainedTokenizer, PreTrainedTokenizerFast) +from typing_extensions import assert_never from vllm import envs from vllm.logger import init_logger @@ -19,7 +20,6 @@ get_sentence_transformer_tokenizer_config) from vllm.transformers_utils.tokenizers import MistralTokenizer from vllm.transformers_utils.utils import check_gguf_file -from vllm.utils import make_async if TYPE_CHECKING: from vllm.config import ModelConfig @@ -274,20 +274,19 @@ def cached_tokenizer_from_config( ) -def get_lora_tokenizer(lora_request: LoRARequest, *args, - **kwargs) -> Optional[AnyTokenizer]: - if lora_request is None: - return None - try: - tokenizer = get_tokenizer(lora_request.lora_path, *args, **kwargs) - except Exception as e: - # No tokenizer was found in the LoRA folder, - # use base model tokenizer - logger.warning( - "No tokenizer found in %s, using base model tokenizer instead. " - "(Exception: %s)", lora_request.lora_path, e) - tokenizer = None - return tokenizer - +def init_tokenizer_from_configs(model_config: ModelConfig): + runner_type = model_config.runner_type + if runner_type == "generate" or runner_type == "draft": + truncation_side = "left" + elif runner_type == "pooling": + truncation_side = "right" + else: + assert_never(runner_type) -get_lora_tokenizer_async = make_async(get_lora_tokenizer) + return get_tokenizer( + model_config.tokenizer, + tokenizer_mode=model_config.tokenizer_mode, + trust_remote_code=model_config.trust_remote_code, + revision=model_config.tokenizer_revision, + truncation_side=truncation_side, + ) diff --git a/vllm/transformers_utils/tokenizer_base.py b/vllm/transformers_utils/tokenizer_base.py index 20e5fea714e7..b1f84a023fc3 100644 --- a/vllm/transformers_utils/tokenizer_base.py +++ b/vllm/transformers_utils/tokenizer_base.py @@ -61,6 +61,11 @@ def vocab_size(self) -> int: def max_token_id(self) -> int: raise NotImplementedError() + @property + @abstractmethod + def truncation_side(self) -> str: + raise NotImplementedError() + def __len__(self) -> int: return self.vocab_size diff --git a/vllm/transformers_utils/tokenizer_group.py b/vllm/transformers_utils/tokenizer_group.py deleted file mode 100644 index 6b519cccd3cc..000000000000 --- a/vllm/transformers_utils/tokenizer_group.py +++ /dev/null @@ -1,132 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project - -from typing import Optional - -from typing_extensions import assert_never - -from vllm.config import ModelConfig, SchedulerConfig -from vllm.config.lora import LoRAConfig -from vllm.lora.request import LoRARequest -from vllm.transformers_utils.tokenizer import (AnyTokenizer, encode_tokens, - get_lora_tokenizer, - get_lora_tokenizer_async, - get_tokenizer) -from vllm.utils import LRUCache - - -class TokenizerGroup: - """A group of tokenizers that can be used for LoRA adapters.""" - - def __init__(self, tokenizer_id: str, enable_lora: bool, max_num_seqs: int, - max_input_length: Optional[int], **tokenizer_config): - self.tokenizer_id = tokenizer_id - self.tokenizer_config = tokenizer_config - self.enable_lora = enable_lora - self.max_input_length = max_input_length - self.truncation_side = tokenizer_config.get("truncation_side", "left") - self.tokenizer = get_tokenizer(self.tokenizer_id, **tokenizer_config) - max_loras = tokenizer_config.get("max_loras", 0) - self.lora_tokenizers = LRUCache[int, AnyTokenizer]( - capacity=max(max_loras, max_num_seqs) if enable_lora else 0) - - def get_max_input_len(self, - lora_request: Optional[LoRARequest] = None - ) -> Optional[int]: - """Get the maximum input length for the LoRA request.""" - return self.max_input_length - - def _raise_if_input_too_long(self, - encoded_tokens: list[int], - lora_request: Optional[LoRARequest] = None): - input_length = len(encoded_tokens) - if lora_request: - max_input_length = (lora_request.long_lora_max_len - or self.max_input_length) - else: - max_input_length = self.max_input_length - if max_input_length is not None and input_length > max_input_length: - raise ValueError("Input too long.", input_length, max_input_length) - - def encode(self, - prompt: str, - max_length: Optional[int] = None, - truncation: Optional[bool] = None, - lora_request: Optional[LoRARequest] = None, - add_special_tokens: Optional[bool] = None) -> list[int]: - - tokenizer = self.get_lora_tokenizer(lora_request) - ret = encode_tokens(tokenizer, - prompt, - max_length=max_length, - truncation=truncation, - add_special_tokens=add_special_tokens) - self._raise_if_input_too_long(ret, lora_request) - return ret - - async def encode_async( - self, - prompt: str, - max_length: Optional[int] = None, - truncation: Optional[bool] = None, - lora_request: Optional[LoRARequest] = None, - add_special_tokens: Optional[bool] = None) -> list[int]: - tokenizer = await self.get_lora_tokenizer_async(lora_request) - ret = encode_tokens(tokenizer, - prompt, - max_length=max_length, - truncation=truncation, - add_special_tokens=add_special_tokens) - self._raise_if_input_too_long(ret, lora_request) - return ret - - def get_lora_tokenizer( - self, - lora_request: Optional[LoRARequest] = None, - ) -> AnyTokenizer: - if not lora_request or not self.enable_lora: - return self.tokenizer - if lora_request.lora_int_id not in self.lora_tokenizers: - tokenizer = (get_lora_tokenizer( - lora_request, **self.tokenizer_config) or self.tokenizer) - self.lora_tokenizers.put(lora_request.lora_int_id, tokenizer) - return tokenizer - else: - return self.lora_tokenizers[lora_request.lora_int_id] - - async def get_lora_tokenizer_async( - self, - lora_request: Optional[LoRARequest] = None, - ) -> AnyTokenizer: - if not lora_request or not self.enable_lora: - return self.tokenizer - if lora_request.lora_int_id not in self.lora_tokenizers: - tokenizer = (await get_lora_tokenizer_async( - lora_request, **self.tokenizer_config) or self.tokenizer) - self.lora_tokenizers.put(lora_request.lora_int_id, tokenizer) - return tokenizer - else: - return self.lora_tokenizers[lora_request.lora_int_id] - - -def init_tokenizer_from_configs(model_config: ModelConfig, - scheduler_config: SchedulerConfig, - lora_config: Optional[LoRAConfig]): - runner_type = model_config.runner_type - if runner_type == "generate" or runner_type == "draft": - truncation_side = "left" - elif runner_type == "pooling": - truncation_side = "right" - else: - assert_never(runner_type) - - return TokenizerGroup( - tokenizer_id=model_config.tokenizer, - enable_lora=bool(lora_config), - max_num_seqs=scheduler_config.max_num_seqs, - max_loras=lora_config.max_loras if lora_config else 0, - max_input_length=None, - tokenizer_mode=model_config.tokenizer_mode, - trust_remote_code=model_config.trust_remote_code, - revision=model_config.tokenizer_revision, - truncation_side=truncation_side) diff --git a/vllm/transformers_utils/tokenizers/mistral.py b/vllm/transformers_utils/tokenizers/mistral.py index f545993a5a98..5b07327cf2b8 100644 --- a/vllm/transformers_utils/tokenizers/mistral.py +++ b/vllm/transformers_utils/tokenizers/mistral.py @@ -327,6 +327,10 @@ def vocab_size(self) -> int: def max_token_id(self) -> int: return self._max_token_id + @property + def truncation_side(self) -> str: + raise NotImplementedError() + def __len__(self) -> int: return self.vocab_size diff --git a/vllm/v1/engine/async_llm.py b/vllm/v1/engine/async_llm.py index a9ced402b974..f17c269e4709 100644 --- a/vllm/v1/engine/async_llm.py +++ b/vllm/v1/engine/async_llm.py @@ -29,8 +29,8 @@ from vllm.tracing import init_tracer from vllm.transformers_utils.config import ( maybe_register_config_serialize_by_value) -from vllm.transformers_utils.tokenizer import AnyTokenizer -from vllm.transformers_utils.tokenizer_group import init_tokenizer_from_configs +from vllm.transformers_utils.tokenizer import (AnyTokenizer, + init_tokenizer_from_configs) from vllm.usage.usage_lib import UsageContext from vllm.utils import (Device, as_list, cancel_task_threadsafe, cdiv, deprecate_kwargs) @@ -112,9 +112,7 @@ def __init__( else: # Tokenizer (+ ensure liveness if running in another process). self.tokenizer = init_tokenizer_from_configs( - model_config=vllm_config.model_config, - scheduler_config=vllm_config.scheduler_config, - lora_config=vllm_config.lora_config) + model_config=vllm_config.model_config) # Processor (converts Inputs --> EngineCoreRequests). self.processor = Processor( @@ -596,15 +594,12 @@ async def get_decoding_config(self): async def get_input_preprocessor(self) -> InputPreprocessor: return self.processor.input_preprocessor - async def get_tokenizer( - self, - lora_request: Optional[LoRARequest] = None, - ) -> AnyTokenizer: + async def get_tokenizer(self) -> AnyTokenizer: if self.tokenizer is None: raise ValueError("Unable to get tokenizer because " "skip_tokenizer_init is True") - return self.tokenizer.get_lora_tokenizer(lora_request) + return self.tokenizer async def is_tracing_enabled(self) -> bool: return self.observability_config.otlp_traces_endpoint is not None diff --git a/vllm/v1/engine/llm_engine.py b/vllm/v1/engine/llm_engine.py index fca5a783bc3b..c93bfc35f0ae 100644 --- a/vllm/v1/engine/llm_engine.py +++ b/vllm/v1/engine/llm_engine.py @@ -20,8 +20,8 @@ from vllm.sampling_params import SamplingParams from vllm.tasks import SupportedTask from vllm.tracing import init_tracer -from vllm.transformers_utils.tokenizer_group import ( - TokenizerGroup, init_tokenizer_from_configs) +from vllm.transformers_utils.tokenizer import (AnyTokenizer, + init_tokenizer_from_configs) from vllm.usage.usage_lib import UsageContext from vllm.utils import Device from vllm.v1.engine.core_client import EngineCoreClient @@ -89,9 +89,7 @@ def __init__( else: # Tokenizer (+ ensure liveness if running in another process). self.tokenizer = init_tokenizer_from_configs( - model_config=vllm_config.model_config, - scheduler_config=vllm_config.scheduler_config, - lora_config=vllm_config.lora_config) + model_config=vllm_config.model_config) # Processor (convert Inputs --> EngineCoreRequests) self.processor = Processor(vllm_config=vllm_config, @@ -297,7 +295,7 @@ def get_metrics(self) -> list[Metric]: assert self.log_stats, "Stat logging disabled" return get_metrics_snapshot() - def get_tokenizer_group(self) -> TokenizerGroup: + def get_tokenizer(self) -> AnyTokenizer: if self.tokenizer is None: raise ValueError("Unable to get tokenizer because " "skip_tokenizer_init is True") diff --git a/vllm/v1/engine/output_processor.py b/vllm/v1/engine/output_processor.py index 14ac1e3e5afa..5dad63988daa 100644 --- a/vllm/v1/engine/output_processor.py +++ b/vllm/v1/engine/output_processor.py @@ -14,7 +14,6 @@ from vllm.tracing import (SpanAttributes, SpanKind, Tracer, extract_trace_context) from vllm.transformers_utils.tokenizer import AnyTokenizer -from vllm.transformers_utils.tokenizer_group import TokenizerGroup from vllm.v1.engine import EngineCoreOutput, EngineCoreRequest, FinishReason from vllm.v1.engine.detokenizer import IncrementalDetokenizer from vllm.v1.engine.logprobs import LogprobsProcessor @@ -290,7 +289,7 @@ def _new_pooling_output( class OutputProcessor: """Process EngineCoreOutputs into RequestOutputs.""" - def __init__(self, tokenizer: TokenizerGroup, log_stats: bool): + def __init__(self, tokenizer: AnyTokenizer, log_stats: bool): self.log_stats = log_stats self.tokenizer = tokenizer self.request_states: dict[str, RequestState] = {} @@ -347,10 +346,7 @@ def add_request( if request_id in self.request_states: raise ValueError(f"Request id {request_id} already running.") - tokenizer = None if not self.tokenizer else \ - self.tokenizer.get_lora_tokenizer(request.lora_request) - - req_state = RequestState.from_new_request(tokenizer=tokenizer, + req_state = RequestState.from_new_request(tokenizer=self.tokenizer, request=request, prompt=prompt, parent_req=parent_req, diff --git a/vllm/v1/engine/processor.py b/vllm/v1/engine/processor.py index 327b4e270548..8d9f2ba1ec82 100644 --- a/vllm/v1/engine/processor.py +++ b/vllm/v1/engine/processor.py @@ -9,6 +9,7 @@ from vllm.inputs import ProcessorInputs, PromptType, SingletonInputs from vllm.inputs.parse import split_enc_dec_inputs from vllm.inputs.preprocess import InputPreprocessor +from vllm.logger import init_logger from vllm.lora.request import LoRARequest from vllm.multimodal import MULTIMODAL_REGISTRY, MultiModalRegistry from vllm.multimodal.cache import processor_cache_from_config @@ -17,7 +18,7 @@ from vllm.multimodal.utils import argsort_mm_positions from vllm.pooling_params import PoolingParams from vllm.sampling_params import SamplingParams -from vllm.transformers_utils.tokenizer_group import TokenizerGroup +from vllm.transformers_utils.tokenizer import AnyTokenizer from vllm.v1.engine import EngineCoreRequest from vllm.v1.structured_output.backend_guidance import ( validate_guidance_grammar) @@ -28,13 +29,15 @@ from vllm.v1.structured_output.backend_xgrammar import ( validate_xgrammar_grammar) +logger = init_logger(__name__) + class Processor: def __init__( self, vllm_config: VllmConfig, - tokenizer: TokenizerGroup, + tokenizer: AnyTokenizer, mm_registry: MultiModalRegistry = MULTIMODAL_REGISTRY, ): @@ -90,7 +93,6 @@ def _validate_logprobs( def _validate_sampling_params( self, params: SamplingParams, - lora_request: Optional[LoRARequest], ) -> None: self._validate_structured_output(params) self._validate_logit_bias(params) @@ -103,8 +105,7 @@ def _validate_sampling_params( # When skip_tokenizer_init=True, we can't validate token IDs # Skip validation and let the model handle invalid tokens return - tokenizer = self.tokenizer.get_lora_tokenizer(lora_request) - vocab_size = len(tokenizer) + vocab_size = len(self.tokenizer) if not all(0 <= tid < vocab_size for tid in params.allowed_token_ids): raise ValueError( "allowed_token_ids contains out-of-vocab token id!") @@ -144,7 +145,6 @@ def _validate_supported_sampling_params( def _validate_params( self, params: Union[SamplingParams, PoolingParams], - lora_request: Optional[LoRARequest], ): """ Validate supported SamplingParam. @@ -155,14 +155,14 @@ def _validate_params( return self._validate_logprobs(params) - self._validate_sampling_params(params, lora_request) + self._validate_sampling_params(params) self._validate_supported_sampling_params(params) def _validate_multi_modal_uuids(self, prompt: PromptType) -> None: """ Validate that user-provided multi_modal_uuids align with multi_modal_data in the incoming request prompt(s). - Only checks lengths; `None` entries are allowed and will be + Only checks lengths; `None` entries are allowed and will be auto-hashed downstream. """ @@ -202,10 +202,22 @@ def _validate_single_prompt(single_prompt: Union[dict, str]) -> None: _validate_single_prompt(prompt) # type: ignore[arg-type] def _validate_lora(self, lora_request: Optional[LoRARequest]) -> None: - if lora_request is not None and not self.lora_config: + if lora_request is None: + return + + # LoRA request passed in while LoRA is not enabled + if not self.lora_config: raise ValueError(f"Got lora_request {lora_request} but LoRA is " "not enabled!") + if self.tokenizer is not None: + logger.warning_once( + "vLLM has deprecated support for supporting different " + "tokenizers for different LoRAs. By default, vLLM uses base " + "model's tokenizer. If you are using a LoRA " + "with its own tokenizer, consider specifying `--tokenizer " + "[lora_path]` to use the LoRA tokenizer.") + def _validate_structured_output(self, params: SamplingParams) -> None: if not params.guided_decoding or not self.decoding_config: return @@ -326,7 +338,7 @@ def process_inputs( # TODO(woosuk): Support pooling models. self._validate_lora(lora_request) - self._validate_params(params, lora_request) + self._validate_params(params) data_parallel_size = self.vllm_config.parallel_config.data_parallel_size if data_parallel_rank is not None and not (0 <= data_parallel_rank < @@ -365,7 +377,6 @@ def process_inputs( processed_inputs: ProcessorInputs = self.input_preprocessor.preprocess( prompt, tokenization_kwargs=tokenization_kwargs, - lora_request=lora_request, mm_uuids=mm_uuids, ) from vllm.platforms import current_platform @@ -375,9 +386,9 @@ def process_inputs( processed_inputs=processed_inputs, ) - eos_token_id = self.input_preprocessor.get_eos_token_id(lora_request) + eos_token_id = self.input_preprocessor.get_eos_token_id() - self._validate_model_inputs(processed_inputs, lora_request) + self._validate_model_inputs(processed_inputs) encoder_inputs, decoder_inputs = split_enc_dec_inputs(processed_inputs) @@ -394,8 +405,7 @@ def process_inputs( sampling_params.update_from_generation_config( self.generation_config_fields, eos_token_id) if self.tokenizer is not None: - sampling_params.update_from_tokenizer( - self.tokenizer.get_lora_tokenizer(lora_request)) + sampling_params.update_from_tokenizer(self.tokenizer) else: pooling_params = params.clone() @@ -436,24 +446,17 @@ def process_inputs( trace_headers=trace_headers, ) - def _validate_model_inputs(self, - inputs: ProcessorInputs, - lora_request: Optional[LoRARequest] = None): + def _validate_model_inputs(self, inputs: ProcessorInputs): encoder_inputs, decoder_inputs = split_enc_dec_inputs(inputs) if encoder_inputs is not None: - self._validate_model_input(encoder_inputs, - lora_request, - prompt_type="encoder") + self._validate_model_input(encoder_inputs, prompt_type="encoder") - self._validate_model_input(decoder_inputs, - lora_request, - prompt_type="decoder") + self._validate_model_input(decoder_inputs, prompt_type="decoder") def _validate_model_input( self, prompt_inputs: SingletonInputs, - lora_request: Optional[LoRARequest], *, prompt_type: Literal["encoder", "decoder"], ): @@ -469,7 +472,7 @@ def _validate_model_input( if self.model_config.skip_tokenizer_init: tokenizer = None else: - tokenizer = self.tokenizer.get_lora_tokenizer(lora_request) + tokenizer = self.tokenizer max_input_id = max(prompt_ids, default=0) # NOTE: tokenizer.max_token_id is the tokenizer’s vocab size while diff --git a/vllm/v1/structured_output/__init__.py b/vllm/v1/structured_output/__init__.py index 57854cc11204..1ab29dfecd9e 100644 --- a/vllm/v1/structured_output/__init__.py +++ b/vllm/v1/structured_output/__init__.py @@ -9,7 +9,7 @@ from vllm.config import VllmConfig from vllm.logger import init_logger from vllm.reasoning import ReasoningParserManager -from vllm.transformers_utils.tokenizer_group import init_tokenizer_from_configs +from vllm.transformers_utils.tokenizer import init_tokenizer_from_configs from vllm.utils import LazyLoader from vllm.v1.structured_output.backend_guidance import GuidanceBackend from vllm.v1.structured_output.backend_types import (StructuredOutputBackend, @@ -60,10 +60,7 @@ def __init__(self, vllm_config: VllmConfig): max_workers = max(1, (multiprocessing.cpu_count() + 1) // 2) self.executor = ThreadPoolExecutor(max_workers=max_workers) self.tokenizer = init_tokenizer_from_configs( - model_config=self.vllm_config.model_config, - scheduler_config=self.vllm_config.scheduler_config, - lora_config=self.vllm_config.lora_config, - ).get_lora_tokenizer(None) + model_config=self.vllm_config.model_config) reasoning_backend = \ self.vllm_config.decoding_config.reasoning_backend if reasoning_backend: From 0fb2551c238c7ccbcf6f25ef4646ce6c92f684d1 Mon Sep 17 00:00:00 2001 From: Michael Yao Date: Wed, 17 Sep 2025 16:49:19 +0800 Subject: [PATCH 036/518] [Docs] Fix griffe warning in base_static_graph.py (#25018) Signed-off-by: windsonsea --- vllm/compilation/base_static_graph.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/vllm/compilation/base_static_graph.py b/vllm/compilation/base_static_graph.py index 161d066ce9fb..6ee82e74963d 100644 --- a/vllm/compilation/base_static_graph.py +++ b/vllm/compilation/base_static_graph.py @@ -12,8 +12,13 @@ class AbstractStaticGraphWrapper(Protocol): to be captured as a static graph. """ - def __init__(self, runnable: Callable, vllm_config: VllmConfig, - runtime_mode: CUDAGraphMode, **kwargs): + def __init__( + self, + runnable: Callable[..., Any], + vllm_config: VllmConfig, + runtime_mode: CUDAGraphMode, + **kwargs: Any, + ) -> None: """ Initializes the StaticGraphWrapper class with graph capturing and execution-related configurations. @@ -31,7 +36,7 @@ def __init__(self, runnable: Callable, vllm_config: VllmConfig, """ raise NotImplementedError - def __call__(self, *args, **kwargs) -> Any: + def __call__(self, *args: Any, **kwargs: Any) -> Any: """ Executes the wrapped callable. From bb58dc8c20315038ea5e14007de7269dfaec1ce4 Mon Sep 17 00:00:00 2001 From: Xinyu Chen Date: Wed, 17 Sep 2025 16:57:25 +0800 Subject: [PATCH 037/518] [DP] Create placement groups by ray_device_key (#25026) Signed-off-by: Xinyu Chen Co-authored-by: Kunshang Ji --- vllm/v1/engine/utils.py | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/vllm/v1/engine/utils.py b/vllm/v1/engine/utils.py index df2fd8d9df07..18ef25ceb6f5 100644 --- a/vllm/v1/engine/utils.py +++ b/vllm/v1/engine/utils.py @@ -334,20 +334,22 @@ def create_dp_placement_groups( "No nodes with resources found in Ray cluster.") assert dp_master_ip_key in nodes[0], ( "The DP master node (ip: %s) is missing or dead", dp_master_ip) + device_str = current_platform.ray_device_key for node_resources in nodes: - if "GPU" not in node_resources: + if device_str not in node_resources: continue # For now, each DP rank can only be assigned to one node # TODO(rui): support allocating a single DP rank # to multiple nodes - available_engine_count = int(node_resources["GPU"]) // world_size + available_engine_count = int( + node_resources[device_str]) // world_size if dp_master_ip_key in node_resources: assert available_engine_count >= local_engine_count, ( "Not enough resources to allocate DP ranks " f"on DP master node {dp_master_ip}") for i in range(local_engine_count): bundles = [{ - "GPU": 1.0, + device_str: 1.0, "node:" + dp_master_ip: 0.001 }] * world_size + [{ "CPU": 1.0 @@ -363,7 +365,7 @@ def create_dp_placement_groups( for i in range(available_engine_count): if len(placement_groups) == num_pg_to_create: break - bundles = [{"GPU": 1.0}] * world_size + [{"CPU": 1.0}] + bundles = [{device_str: 1.0}] * world_size + [{"CPU": 1.0}] pg = ray.util.placement_group( name=f"dp_rank_{len(placement_groups)}", strategy="STRICT_PACK", @@ -415,17 +417,18 @@ def add_dp_placement_groups( local_dp_ranks = [] num_pg_created = 0 + device_str = current_platform.ray_device_key for node in nodes: if num_pg_created >= num_pg_to_create: break node_ip = node.node_ip node_id = node.node_id - available_gpus = int(available_resources[node_id]["GPU"]) + available_gpus = int(available_resources[node_id][device_str]) # Get total GPUs on this node from the node's resources # Ray stores node resources with node ID as key - total_gpus = int(total_resources[node_id]["GPU"]) + total_gpus = int(total_resources[node_id][device_str]) # Calculate used GPUs and used engines on this node used_gpus = max(0, total_gpus - available_gpus) @@ -444,13 +447,13 @@ def add_dp_placement_groups( # Create bundles with node constraint for master node if node_ip == dp_master_ip: bundles = [{ - "GPU": 1.0, + device_str: 1.0, "node:" + dp_master_ip: 0.001 }] * world_size + [{ "CPU": 1.0 }] else: - bundles = [{"GPU": 1.0}] * world_size + [{"CPU": 1.0}] + bundles = [{device_str: 1.0}] * world_size + [{"CPU": 1.0}] pg = ray.util.placement_group( name=f"dp_rank_{rank}", From 544fe76b95aacdb6d0636c41813bee6236fb0027 Mon Sep 17 00:00:00 2001 From: Chauncey Date: Wed, 17 Sep 2025 17:03:52 +0800 Subject: [PATCH 038/518] [Frontend] Support returning all prompt logprobs (#24956) Signed-off-by: chaunceyjiang --- tests/entrypoints/openai/test_chat_echo.py | 22 ++++++++++++++++++++ vllm/entrypoints/openai/protocol.py | 24 ++++++++++++++-------- 2 files changed, 38 insertions(+), 8 deletions(-) diff --git a/tests/entrypoints/openai/test_chat_echo.py b/tests/entrypoints/openai/test_chat_echo.py index de63f4ed218b..0f459dd3d857 100644 --- a/tests/entrypoints/openai/test_chat_echo.py +++ b/tests/entrypoints/openai/test_chat_echo.py @@ -22,6 +22,8 @@ def server(): "--enforce-eager", "--max-model-len", "4080", + "--max-logprobs", # test prompt_logprobs equal to -1 + "151936" ] with RemoteOpenAIServer(MODEL_NAME, args) as remote_server: @@ -77,3 +79,23 @@ async def test_chat_session_with_echo_and_continue_final_message( else: assert message.content is not None and saying not in message.content assert message.role == "assistant" + + +@pytest.mark.asyncio +async def test_prompt_logprobs(client: openai.AsyncOpenAI): + messages = [{ + "role": "system", + "content": "You are a helpful assistant." + }, { + "role": "user", + "content": "Beijing is the capital of which country?" + }] + + completion = await client.chat.completions.create( + model=MODEL_NAME, + messages=messages, + extra_body={"prompt_logprobs": -1}, + ) + + assert completion.prompt_logprobs is not None + assert len(completion.prompt_logprobs) > 0 diff --git a/vllm/entrypoints/openai/protocol.py b/vllm/entrypoints/openai/protocol.py index 8ecb1a8239c3..6b4c3f531dbc 100644 --- a/vllm/entrypoints/openai/protocol.py +++ b/vllm/entrypoints/openai/protocol.py @@ -822,13 +822,17 @@ def validate_stream_options(cls, data): @classmethod def check_logprobs(cls, data): if (prompt_logprobs := data.get("prompt_logprobs")) is not None: - if data.get("stream") and prompt_logprobs > 0: + if data.get("stream") and (prompt_logprobs > 0 + or prompt_logprobs == -1): raise ValueError( "`prompt_logprobs` are not available when `stream=True`.") - if prompt_logprobs < 0: - raise ValueError("`prompt_logprobs` must be a positive value.") - + if prompt_logprobs < 0 and prompt_logprobs != -1: + raise ValueError( + "`prompt_logprobs` must be a positive value or -1.") + if prompt_logprobs == -1 and not envs.VLLM_USE_V1: + raise ValueError("`prompt_logprobs=-1` is only supported with " + "vLLM engine V1.") if (top_logprobs := data.get("top_logprobs")) is not None: if top_logprobs < 0: raise ValueError("`top_logprobs` must be a positive value.") @@ -1246,13 +1250,17 @@ def check_guided_decoding_count(cls, data): @classmethod def check_logprobs(cls, data): if (prompt_logprobs := data.get("prompt_logprobs")) is not None: - if data.get("stream") and prompt_logprobs > 0: + if data.get("stream") and (prompt_logprobs > 0 + or prompt_logprobs == -1): raise ValueError( "`prompt_logprobs` are not available when `stream=True`.") - if prompt_logprobs < 0: - raise ValueError("`prompt_logprobs` must be a positive value.") - + if prompt_logprobs < 0 and prompt_logprobs != -1: + raise ValueError( + "`prompt_logprobs` must be a positive value or -1.") + if prompt_logprobs == -1 and not envs.VLLM_USE_V1: + raise ValueError("`prompt_logprobs=-1` is only supported with " + "vLLM engine V1.") if (logprobs := data.get("logprobs")) is not None and logprobs < 0: raise ValueError("`logprobs` must be a positive value.") From 2b856970313e80a649a573879d94e9d3430ba018 Mon Sep 17 00:00:00 2001 From: Shijun Yin Date: Wed, 17 Sep 2025 17:21:18 +0800 Subject: [PATCH 039/518] [BugFix] enable DOTALL to match multi-line tool_call parameters in extract_tool_call_required_streaming (#24668) Signed-off-by: Shijun Yin --- vllm/entrypoints/openai/serving_chat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/entrypoints/openai/serving_chat.py b/vllm/entrypoints/openai/serving_chat.py index 61d65bd8f119..cd85baa9ba66 100644 --- a/vllm/entrypoints/openai/serving_chat.py +++ b/vllm/entrypoints/openai/serving_chat.py @@ -418,7 +418,7 @@ def extract_tool_call_required_streaming( if not function_name_returned: # get partly generated arguments from the latest tool call param_match = re.search(r'.*"parameters":\s*(.*)', - current_text) + current_text, re.DOTALL) arguments = param_match.group(1) if param_match else "" arguments, _ = OpenAIServingChat._filter_delta_text( arguments, previous_text) From e120533d7ae3bc8c3ef39f215de274f1280bb454 Mon Sep 17 00:00:00 2001 From: Cyrus Leung Date: Wed, 17 Sep 2025 20:19:15 +0800 Subject: [PATCH 040/518] [Misc] Avoid use of deprecated `AutoModelForVision2Seq` (#25065) Signed-off-by: DarkLight1337 --- tests/models/multimodal/generation/test_common.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/models/multimodal/generation/test_common.py b/tests/models/multimodal/generation/test_common.py index d61b182761e4..79f9d607f338 100644 --- a/tests/models/multimodal/generation/test_common.py +++ b/tests/models/multimodal/generation/test_common.py @@ -10,7 +10,7 @@ import pytest from transformers import (AutoModel, AutoModelForImageTextToText, - AutoModelForTextToWaveform, AutoModelForVision2Seq) + AutoModelForTextToWaveform) from vllm.platforms import current_platform from vllm.utils import identity @@ -137,7 +137,7 @@ video_idx_to_prompt=lambda idx: "<|vision_start|><|video_pad|><|vision_end|>", # noqa: E501 max_model_len=4096, max_num_seqs=2, - auto_cls=AutoModelForVision2Seq, + auto_cls=AutoModelForImageTextToText, vllm_output_post_proc=model_utils.qwen2_vllm_to_hf_output, image_size_factors=[(), (0.25,), (0.25, 0.25, 0.25), (0.25, 0.2, 0.15)], marks=[pytest.mark.core_model, pytest.mark.cpu_model], @@ -502,7 +502,7 @@ num_video_frames=16, max_model_len=16384, hf_model_kwargs=model_utils.llava_onevision_hf_model_kwargs("llava-hf/llava-onevision-qwen2-0.5b-ov-hf"), # noqa: E501 - auto_cls=AutoModelForVision2Seq, + auto_cls=AutoModelForImageTextToText, vllm_output_post_proc=model_utils.llava_onevision_vllm_to_hf_output, custom_test_opts=[CustomTestOptions( inputs=custom_inputs.multi_video_multi_aspect_ratio_inputs( @@ -518,7 +518,7 @@ num_video_frames=16, max_model_len=4096, max_num_seqs=2, - auto_cls=AutoModelForVision2Seq, + auto_cls=AutoModelForImageTextToText, vllm_output_post_proc=model_utils.llava_video_vllm_to_hf_output, ), "mantis": VLMTestInfo( @@ -680,7 +680,7 @@ multi_image_prompt="Picture 1: \nPicture 2: \nDescribe these two images with one paragraph respectively.", # noqa: E501 max_model_len=4096, max_num_seqs=2, - auto_cls=AutoModelForVision2Seq, + auto_cls=AutoModelForImageTextToText, vllm_output_post_proc=model_utils.qwen2_vllm_to_hf_output, image_size_factors=[(), (0.25,), (0.25, 0.25, 0.25), (0.25, 0.2, 0.15)], marks=[pytest.mark.cpu_model], @@ -784,7 +784,7 @@ test_type=VLMTestType.CUSTOM_INPUTS, max_model_len=16384, max_num_seqs=2, - auto_cls=AutoModelForVision2Seq, + auto_cls=AutoModelForImageTextToText, hf_model_kwargs=model_utils.llava_onevision_hf_model_kwargs("llava-hf/llava-onevision-qwen2-0.5b-ov-hf"), # noqa: E501 vllm_output_post_proc=model_utils.llava_onevision_vllm_to_hf_output, custom_test_opts=[CustomTestOptions( @@ -800,7 +800,7 @@ test_type=VLMTestType.CUSTOM_INPUTS, max_model_len=4096, max_num_seqs=2, - auto_cls=AutoModelForVision2Seq, + auto_cls=AutoModelForImageTextToText, vllm_output_post_proc=model_utils.qwen2_vllm_to_hf_output, custom_test_opts=[CustomTestOptions( inputs=custom_inputs.windows_attention_image_qwen2_5_vl(), From 252ada5559808783d6a23b489156b3705cea0417 Mon Sep 17 00:00:00 2001 From: danielafrimi <45691845+danielafrimi@users.noreply.github.com> Date: Wed, 17 Sep 2025 15:53:30 +0300 Subject: [PATCH 041/518] Add RADIO Vision Encoder Support to vLLM (#24595) Signed-off-by: Daniel Afrimi Co-authored-by: root --- tests/models/multimodal/pooling/test_radio.py | 86 +++ .../model_executor/models/nano_nemotron_vl.py | 118 ++-- vllm/model_executor/models/radio.py | 576 ++++++++++++++++++ vllm/transformers_utils/configs/__init__.py | 2 + vllm/transformers_utils/configs/radio.py | 104 ++++ 5 files changed, 828 insertions(+), 58 deletions(-) create mode 100644 tests/models/multimodal/pooling/test_radio.py create mode 100644 vllm/model_executor/models/radio.py create mode 100644 vllm/transformers_utils/configs/radio.py diff --git a/tests/models/multimodal/pooling/test_radio.py b/tests/models/multimodal/pooling/test_radio.py new file mode 100644 index 000000000000..27b9fe369e80 --- /dev/null +++ b/tests/models/multimodal/pooling/test_radio.py @@ -0,0 +1,86 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project +import pytest +import torch +import torch.nn as nn +from huggingface_hub import snapshot_download +from transformers import AutoConfig, AutoModel, CLIPImageProcessor + +from vllm.distributed import cleanup_dist_env_and_memory +from vllm.model_executor.models.radio import RadioModel +from vllm.transformers_utils.configs.radio import RadioConfig +from vllm.utils import STR_DTYPE_TO_TORCH_DTYPE + +from ....conftest import ImageTestAssets + +# we use snapshot_download to prevent conflicts between +# dynamic_module and trust_remote_code for hf_runner +DOWNLOAD_PATTERN = ["*.json", "*.py", "*.safetensors", "*.txt", "*.model"] + + +@torch.inference_mode() +def run_radio_test( + image_assets: ImageTestAssets, + model_id: str, + *, + dtype: str, +): + model = snapshot_download(model_id, allow_patterns=DOWNLOAD_PATTERN) + torch_dtype = STR_DTYPE_TO_TORCH_DTYPE[dtype] + + img_processor = CLIPImageProcessor.from_pretrained(model) + images = [asset.pil_image for asset in image_assets] + # Input resolution must be a multiple of `self.min_resolution_step`. + # Using `self.get_nearest_supported_resolution`, for assets 432x642 the + # nearest supported resolution is 432x640. + pixel_values = [ + img_processor( + image, + return_tensors='pt').pixel_values.to(torch_dtype)[:, :, :, :640] + for image in images + ] + + config = AutoConfig.from_pretrained(model_id, trust_remote_code=True) + + hf_model = AutoModel.from_pretrained( + model_id, + config=config, + torch_dtype=torch_dtype, + trust_remote_code=True, + ).to("cuda") + hf_model.eval() + + hf_outputs_per_image = [ + hf_model(pixel_value.to("cuda")).features + for pixel_value in pixel_values + ] + + radio_config = RadioConfig(model_name=config.args["model"], + reg_tokens=config.args["register_multiple"]) + vllm_model = RadioModel(radio_config) + vllm_model.load_weights(hf_model.state_dict()) + vllm_model = vllm_model.to("cuda", torch_dtype) + + vllm_outputs_per_image = [ + vllm_model(pixel_values=pixel_value.to("cuda")) + for pixel_value in pixel_values + ] + del vllm_model, hf_model + cleanup_dist_env_and_memory() + + cos_similar = nn.CosineSimilarity(dim=-1) + for vllm_output, hf_output in zip(vllm_outputs_per_image, + hf_outputs_per_image): + assert cos_similar(vllm_output, hf_output).mean() > 0.99 + + +@pytest.mark.parametrize("model_id", [ + "nvidia/C-RADIOv2-H", +]) +@pytest.mark.parametrize("dtype", ["half"]) +def test_radio(dist_init, image_assets, model_id, dtype: str) -> None: + run_radio_test( + image_assets, + model_id, + dtype=dtype, + ) diff --git a/vllm/model_executor/models/nano_nemotron_vl.py b/vllm/model_executor/models/nano_nemotron_vl.py index 21765a483b8e..4f8652c00694 100644 --- a/vllm/model_executor/models/nano_nemotron_vl.py +++ b/vllm/model_executor/models/nano_nemotron_vl.py @@ -18,8 +18,8 @@ import torch.nn as nn import torchvision.transforms as T from PIL import Image -from transformers import (AutoModel, BatchEncoding, BatchFeature, - PretrainedConfig, TensorType) +from transformers import (BatchEncoding, BatchFeature, PretrainedConfig, + TensorType) from vllm.config import VllmConfig from vllm.model_executor.layers.activation import ReLUSquaredActivation @@ -32,6 +32,7 @@ get_internvl_target_ratios) from vllm.model_executor.models.module_mapping import MultiModelKeys from vllm.model_executor.models.nemotron_h import NemotronHForCausalLM +from vllm.model_executor.models.radio import RadioModel from vllm.model_executor.models.utils import (flatten_bn, init_vllm_registered_model, maybe_prefix, @@ -48,6 +49,7 @@ PromptUpdate, PromptUpdateDetails) from vllm.multimodal.profiling import BaseDummyInputsBuilder from vllm.sequence import IntermediateTensors +from vllm.transformers_utils.configs.radio import RadioConfig from vllm.transformers_utils.tokenizer import AnyTokenizer from vllm.utils.tensor_schema import TensorSchema, TensorShape @@ -122,11 +124,6 @@ class NanoNemotronVLVideoEmbeddingInputs(TensorSchema): NanoNemotronVLVideoEmbeddingInputs] -def input_conditioner(x, norm_mean, norm_std): - y = (x - norm_mean) / norm_std - return y - - def dynamic_preprocess(image, *, image_size=512, @@ -305,8 +302,7 @@ def _preprocess_image( images, max_num_tiles) image_inputs: dict[str, NestedTensors] = { "pixel_values_flat": - input_conditioner(torch.cat(pixel_values_lst), self.norm_mean, - self.norm_std), + torch.cat(pixel_values_lst), "image_num_patches": torch.tensor([len(item) for item in pixel_values_lst]), } @@ -428,8 +424,7 @@ def _preprocess_video( video_inputs: dict[str, NestedTensors] = { "pixel_values_flat_video": - input_conditioner(torch.cat(pixel_values_lst_video), - self.norm_mean, self.norm_std), + torch.cat(pixel_values_lst_video), "video_num_patches": torch.tensor([len(item) for item in pixel_values_lst_video]), } @@ -905,18 +900,9 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): hf_config=config.text_config, prefix=maybe_prefix(prefix, "language_model"), ) - self.vision_model = AutoModel.from_config(config.vision_config, - trust_remote_code=True) - self.vision_model.model._initialize_weights = ( - self.vision_model.model._init_weights) - # Move input normalization to processor to mirror original HF - # implementation where normalization is done in fp32 - self.vision_model.radio_model.make_preprocessor_external() - self.vision_model = self.vision_model.to( + self.vision_model = self.get_vit_model_from_radio_config(config).to( self.language_model.config.torch_dtype) - self.drop_vision_class_token = True - # Construct the vision projection. vit_hidden_size = config.vit_hidden_size vision_projection_hidden_size = config.projector_hidden_size @@ -972,7 +958,7 @@ def pixel_shuffle(self, x, scale_factor=0.5): return x def extract_feature(self, pixel_values): - vit_embeds = self.vision_model(pixel_values).features + vit_embeds = self.vision_model(pixel_values) vit_embeds = vit_embeds.to(dtype=torch.bfloat16) h = w = int(vit_embeds.shape[1]**0.5) vit_embeds = vit_embeds.reshape(vit_embeds.shape[0], h, w, -1) @@ -1212,47 +1198,39 @@ def compute_logits( sampling_metadata) def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]): + adapter_dict = dict(self.mlp1.named_parameters()) - def is_vision_model_weights(weight: tuple[str, torch.Tensor]): - return weight[0].startswith("vision_model") + def is_llm(name: str) -> bool: + return name.startswith("language_model") def is_adapter_weights(weight: tuple[str, torch.Tensor]): return weight[0].startswith("mlp1") - # Get references to parameters for direct loading - vision_model_dict = dict(self.vision_model.named_parameters()) - vision_model_buffers = dict(self.vision_model.named_buffers()) - adapter_dict = dict(self.mlp1.named_parameters()) - - def llm_weights_generator(): - # Single pass over weights - for name, w in weights: - if is_vision_model_weights((name, w)): - # Load vision encoder weights directly - trimmed_name = ".".join(name.split(".")[1:]) - if "input_conditioner" in trimmed_name: - continue - if trimmed_name in vision_model_buffers: - param = vision_model_buffers[trimmed_name] - else: - param = vision_model_dict[trimmed_name] - with torch.no_grad(): - default_weight_loader(param, w) - elif is_adapter_weights((name, w)): - # Load vision-language adapter weights directly - trimmed_name = ".".join(name.split(".")[1:]) - param = adapter_dict[trimmed_name] - with torch.no_grad(): - default_weight_loader(param, w) - else: - # LLM weights: yield them to be loaded - # by language_model.load_weights - assert name.startswith("language_model") - trimmed_name = ".".join(name.split(".")[1:]) - yield (trimmed_name, w) - - # Now we call the language model load with the generator - self.language_model.load_weights(llm_weights_generator()) + def is_vision_weights(name: str) -> bool: + return name.startswith("vision_model.radio_model.") + + # Separate weights by component + llm_weights = [] + vision_weights = [] + + for name, w in weights: + if is_llm(name): + # Strip 'language_model.' prefix for LLM weights + llm_weights.append((".".join(name.split(".")[1:]), w)) + elif is_adapter_weights((name, w)): + # Load vision-language adapter weights directly + trimmed_name = ".".join(name.split(".")[1:]) + param = adapter_dict[trimmed_name] + with torch.no_grad(): + default_weight_loader(param, w) + elif is_vision_weights(name): + # Convert: vision_model.radio_model.* → radio_model.* + hf_key = name[len( + "vision_model."):] # Remove "vision_model." prefix + vision_weights.append((hf_key, w)) + + self.language_model.load_weights(llm_weights) + self.vision_model.load_weights(vision_weights) def print_architecture(self, detailed: bool = True, @@ -1370,6 +1348,30 @@ def get_model_info(self): }, } + def get_vit_model_from_radio_config(self, hf_config): + hf_config_vision = hf_config.vision_config + model_name = hf_config_vision.args.get("model") + if model_name is None: + raise ValueError(f'Unsupported vit model type: {model_name}') + + preferred_resolution = getattr(hf_config_vision, + "preferred_resolution", None) + image_size = preferred_resolution[0] if preferred_resolution else 224 + patch_size = getattr(hf_config_vision, "patch_size", 16) + + radio_config = RadioConfig( + model_name=model_name, + image_size=image_size, + patch_size=patch_size, + norm_mean=hf_config.norm_mean, + norm_std=hf_config.norm_std, + reg_tokens=(hf_config_vision.args.get("register_multiple") + if hasattr(hf_config_vision, "args") + and isinstance(hf_config_vision.args, dict) else None), + ) + + return RadioModel(config=radio_config) + def copy_inputs_before_cuda_graphs(self, input_buffers, **kwargs): return self.language_model.mamba_cache.copy_inputs_before_cuda_graphs( input_buffers, **kwargs) diff --git a/vllm/model_executor/models/radio.py b/vllm/model_executor/models/radio.py new file mode 100644 index 000000000000..9cbf844ae9f8 --- /dev/null +++ b/vllm/model_executor/models/radio.py @@ -0,0 +1,576 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project +# Copyright (c) 2023-2024, NVIDIA CORPORATION. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +import math +from collections.abc import Iterable +from itertools import repeat +from typing import Optional, Union + +import torch +import torch.nn as nn +import torch.nn.functional as F +from einops import rearrange +from transformers import PretrainedConfig + +from vllm.model_executor.layers.quantization import QuantizationConfig +from vllm.model_executor.model_loader.weight_utils import default_weight_loader +from vllm.model_executor.models.intern_vit import InternVisionEncoder + +input_dim_t = Union[int, tuple[int, int]] +norm_t = Union[tuple[float, float, float], torch.Tensor] + + +def _ntuple(n): + + def parse(x): + if isinstance(x, Iterable) and not isinstance(x, str): + return tuple(x) + return tuple(repeat(x, n)) + + return parse + + +to_1tuple = _ntuple(1) +to_2tuple = _ntuple(2) +to_3tuple = _ntuple(3) +to_4tuple = _ntuple(4) +to_ntuple = _ntuple + + +class InputConditioner(nn.Module): + + def __init__( + self, + input_scale: float, + norm_mean: norm_t, + norm_std: norm_t, + dtype: torch.dtype = None, + ): + super().__init__() + + self.dtype = dtype + + self.register_buffer("norm_mean", _to_tensor(norm_mean) / input_scale) + self.register_buffer("norm_std", _to_tensor(norm_std) / input_scale) + + def forward(self, x: torch.Tensor): + y = (x - self.norm_mean) / self.norm_std + if self.dtype is not None: + y = y.to(self.dtype) + return y + + +def _to_tensor(v: norm_t): + return torch.as_tensor(v, dtype=torch.float32).view(-1, 1, 1) + + +class ClsToken(nn.Module): + + def __init__( + self, + ndim: int, + num_tokens: int = 1, + enabled: bool = True, + register_multiple: Optional[int] = None, + num_registers: Optional[int] = None, + ): + super().__init__() + + self.ndim = ndim + self.enabled = enabled + self.num_registers = 0 + self.num_tokens = num_tokens + if enabled: + if num_registers: + self.num_registers = num_registers + elif register_multiple: + self.num_registers = register_multiple - (num_tokens % + register_multiple) + + scale = ndim**-0.5 + self.token = nn.Parameter( + torch.randn(num_tokens + self.num_registers, ndim) * scale) + + else: + self.token = None + + self.num_patches = self.num_tokens + self.num_registers + + def forward(self, x: torch.Tensor): + if self.token is None: + return x + + token = self.token.unsqueeze(0).expand(x.shape[0], -1, -1) + x = torch.cat([ + token, + x, + ], dim=1) + + return x + + +class ViTPatchGenerator(nn.Module): + + def __init__( + self, + # config: PretrainedConfig, + patch_size: int, + embed_dim: int, + input_dims: input_dim_t, + abs_pos: bool = True, + normalize_patches: bool = False, + cls_token: bool = False, + max_input_dims: Optional[input_dim_t] = None, + pos_dropout: float = 0.0, + return_pos_enc: bool = False, + num_cls_tokens: int = 1, + register_multiple: Optional[int] = None, + num_registers: Optional[int] = None, + patch_bias: bool = False, + device=None, + dtype=None, + ): + super().__init__() + if isinstance(input_dims, int): + input_dims = (input_dims, input_dims) + + if max_input_dims is None: + max_input_dims = input_dims + if isinstance(max_input_dims, int): + max_input_dims = (max_input_dims, max_input_dims) + + max_input_dims = tuple( + int(math.ceil(d / patch_size) * patch_size) + for d in max_input_dims) + + self.cpe_mode = max_input_dims != input_dims + self.pos_dropout = pos_dropout + self.return_pos_enc = return_pos_enc + + factory = dict(device=device, dtype=dtype) + + self.patch_size = patch_size + self.abs_pos = abs_pos + self.embed_dim = embed_dim + + self.num_rows = max_input_dims[0] // patch_size + self.num_cols = max_input_dims[1] // patch_size + self.input_dims = tuple(d // patch_size for d in input_dims) + self.num_patches = self.num_rows * self.num_cols + self.max_input_dims = max_input_dims + + self.im_to_patches = Im2Patches(patch_size) + self.embedder = ViTPatchLinear(patch_size, + embed_dim, + bias=patch_bias, + **factory) + + if abs_pos: + scale = embed_dim**-0.5 + self.pos_embed = nn.Parameter( + torch.randn(1, self.num_patches, embed_dim, **factory) * scale) + + self.cls_token = ClsToken( + embed_dim, + num_tokens=num_cls_tokens, + enabled=cls_token, + register_multiple=register_multiple, + num_registers=num_registers, + ) + + self.patch_normalizer = nn.LayerNorm( + embed_dim) if normalize_patches else nn.Identity() + + def forward(self, x: torch.Tensor) -> torch.Tensor: + patches = self.embed_patches(x) + patches, pos_enc = self.apply_pos_enc(patches, input_size=x.shape[2:]) + patches = self.cls_token(patches) + patches = self.patch_normalizer(patches) + if self.return_pos_enc: + return patches, pos_enc + return patches + + @property + def apply_cls_token(self): + return self.cls_token.enabled + + @property + def num_cls_tokens(self): + return self.cls_token.num_tokens + + @property + def num_cls_patches(self): + return self.cls_token.num_patches + + @property + def num_registers(self): + return self.cls_token.num_registers + + @property + def num_skip(self): + return self.num_cls_tokens + self.num_registers + + def _load_embed(self, src_embed: torch.Tensor, targ_embed: nn.Parameter): + if src_embed.shape != targ_embed.shape: + src_size = int(math.sqrt(src_embed.shape[1])) + + assert src_size**2 == src_embed.shape[ + 1], 'Unable to interpolate non-square embedding' + + src_embed = rearrange(src_embed, + 'b (h w) c -> b c h w', + h=src_size, + w=src_size) + src_embed = F.interpolate(src_embed, + size=(self.num_rows, self.num_cols), + mode='bicubic', + align_corners=True, + antialias=False) + src_embed = rearrange(src_embed, 'b c h w -> b (h w) c') + targ_embed.data.copy_(src_embed) + + def _load_projection(self, src_proj_weight: torch.Tensor, + targ_proj_weight: torch.Tensor): + if src_proj_weight.shape != targ_proj_weight.shape: + src_patch_size = int(math.sqrt(src_proj_weight.shape[1] // 3)) + + assert (src_patch_size**2) * 3 == src_proj_weight.shape[ + 1], 'Unable to interpolate non-square patch size' + + src_proj_weight = rearrange(src_proj_weight, + 'b (c h w) -> b c h w', + c=3, + h=src_patch_size, + w=src_patch_size) + src_proj_weight = F.interpolate(src_proj_weight, + size=(self.patch_size, + self.patch_size), + mode='bicubic', + align_corners=True, + antialias=False) + src_proj_weight = rearrange(src_proj_weight, + 'b c h w -> b (c h w)') + targ_proj_weight.data.copy_(src_proj_weight) + + def embed_patches(self, x: torch.Tensor) -> torch.Tensor: + patches = self.im_to_patches(x) + patches = self.embedder(patches) + return patches + + def apply_pos_enc( + self, + patches: torch.Tensor, + patch_idxs: Optional[torch.Tensor] = None, + input_size: Optional[tuple[int, int]] = None, + ) -> torch.Tensor: + if not self.abs_pos: + return patches + + pos_enc = self.get_pos_enc(patches.shape[0], patch_idxs, input_size) + + if self.training and self.pos_dropout > 0: + keeps = torch.rand(patches.shape[0], + 1, + 1, + dtype=pos_enc.dtype, + device=pos_enc.device) > self.pos_dropout + pos_enc_drop = torch.where(keeps, pos_enc, 0) + else: + pos_enc_drop = pos_enc + + return patches + pos_enc_drop, pos_enc + + def get_pos_enc( + self, + batch_size: int, + patch_idxs: Optional[torch.Tensor] = None, + input_size: Optional[tuple[int, int]] = None, + ) -> torch.Tensor: + if input_size is None: + input_dims = self.input_dims + else: + input_dims = tuple(d // self.patch_size for d in input_size) + + pos_embed = self._get_pos_embeddings(batch_size, input_dims) + + if patch_idxs is None: + return pos_embed + + exp_patch_idxs = patch_idxs.unsqueeze(-1).expand( + -1, -1, pos_embed.shape[-1]) + + pos_embed = torch.gather(pos_embed.expand(patch_idxs.shape[0], -1, -1), + dim=1, + index=exp_patch_idxs) + return pos_embed + + def _get_pos_embeddings(self, batch_size: int, input_dims: tuple[int, + int]): + if (self.num_rows, self.num_cols) == input_dims: + return self.pos_embed + + pos_embed = self.pos_embed.reshape(1, self.num_rows, self.num_cols, + -1).permute(0, 3, 1, 2) + + def window_select(pos_embed): + if input_dims[0] < pos_embed.shape[-2]: + pos_embed = pos_embed[..., :input_dims[0], :] + if input_dims[1] < pos_embed.shape[-1]: + pos_embed = pos_embed[..., :, :input_dims[1]] + return pos_embed + + if self.cpe_mode: + if self.training: + min_scale = math.sqrt(0.1) + scale = torch.rand(batch_size, 1, 1, device=pos_embed.device + ) * (1 - min_scale) + min_scale + aspect_min = math.log(3 / 4) + aspect_max = -aspect_min + aspect = torch.exp( + torch.rand(batch_size, 1, 1, device=pos_embed.device) * + (aspect_max - aspect_min) + aspect_min) + + scale_x = scale * aspect + scale_y = scale * (1 / aspect) + scale_xy = torch.stack([scale_x, scale_y], dim=-1).clamp_(0, 1) + + pos_xy = torch.rand( + batch_size, 1, 1, 2, + device=pos_embed.device) * (1 - scale_xy) + + lin_x = torch.linspace( + 0, 1, steps=input_dims[1], + device=pos_embed.device)[None, None].expand( + batch_size, input_dims[0], -1) + lin_y = torch.linspace( + 0, 1, steps=input_dims[0], + device=pos_embed.device)[None, :, None].expand( + batch_size, -1, input_dims[1]) + + lin_xy = torch.stack([lin_x, lin_y], dim=-1) + + grid_xy = lin_xy * scale_xy + pos_xy + + # Convert to [-1, 1] range + grid_xy.mul_(2).sub_(1) + + pos_embed = F.grid_sample( + pos_embed.float().expand(batch_size, -1, -1, -1), + grid=grid_xy, + mode='bilinear', + padding_mode='zeros', + align_corners=True, + ).to(pos_embed.dtype) + else: + max_dim = max(input_dims) + pos_embed = F.interpolate(pos_embed.float(), + size=(max_dim, max_dim), + align_corners=True, + mode='bilinear').to(pos_embed.dtype) + + pos_embed = window_select(pos_embed) + else: + pos_embed = window_select(pos_embed) + + if pos_embed.shape[-2:] != input_dims: + pos_embed = F.interpolate(pos_embed.float(), + size=input_dims, + align_corners=True, + mode='bilinear').to(pos_embed.dtype) + + pos_embed = pos_embed.flatten(2).permute(0, 2, 1) + + return pos_embed + + +class Im2Patches(nn.Module): + + def __init__(self, patch_size: int): + super().__init__() + self.patch_size = patch_size + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if self.patch_size == 1: + patches = x.flatten(2) + patches = patches.permute(0, 2, 1) + return patches + + py = x.shape[-2] // self.patch_size + px = x.shape[-1] // self.patch_size + patches = rearrange( + x, + 'b c (py yy) (px xx) -> b (py px) (c yy xx)', + py=py, + yy=self.patch_size, + px=px, + xx=self.patch_size, + ) + return patches + + +class ViTPatchLinear(nn.Linear): + + def __init__(self, + patch_size: int, + embed_dim: int, + bias: bool = False, + **factory): + super().__init__(3 * (patch_size**2), embed_dim, bias=bias, **factory) + self.patch_size = patch_size + + +class RadioInternVisionModel(nn.Module): + packed_modules_mapping = { + "qkv": ["qkv"], + } + + def __init__( + self, + config: PretrainedConfig = None, + quant_config: Optional[QuantizationConfig] = None, + *, + num_hidden_layers_override: Optional[int] = None, + num_dummy_heads: int = 0, + prefix: str = "", + ) -> None: + super().__init__() + + self.config = config + self.img_size, self.grid_size, self.num_patches = self._init_img_size( + to_2tuple(config.patch_size), config.image_size) + max_img_size = int( + round(config.max_img_size / config.patch_size) * config.patch_size) + self.patch_generator = ViTPatchGenerator( + config.patch_size, + config.hidden_size, + input_dims=self.img_size, + max_input_dims=max_img_size, + cls_token=True, + register_multiple=config.reg_tokens) + + self.encoder = InternVisionEncoder( + config=config, + quant_config=quant_config, + num_hidden_layers_override=num_hidden_layers_override, + num_dummy_heads=num_dummy_heads, + prefix=f"{prefix}.encoder", + ) + + def _init_img_size(self, patch_size, img_size: Union[int, tuple[int, + int]]): + if img_size is None: + return None, None, None + img_size = to_2tuple(img_size) + grid_size = tuple([s // p for s, p in zip(img_size, patch_size)]) + num_patches = grid_size[0] * grid_size[1] + return img_size, grid_size, num_patches + + def get_input_embeddings(self): + return self.embeddings + + def forward(self, x: torch.Tensor) -> torch.FloatTensor: + assert self.patch_generator is not None + hidden_states = self.patch_generator(x) + encoder_outputs = self.encoder(inputs_embeds=hidden_states) + return encoder_outputs + + +class RadioModel(nn.Module): + packed_modules_mapping = { + "qkv": ["qkv"], + } + + def __init__( + self, + config: PretrainedConfig, + quant_config: Optional[QuantizationConfig] = None, + *, + num_hidden_layers_override: Optional[int] = None, + num_dummy_heads: int = 0, + prefix: str = "", + ) -> None: + super().__init__() + + self.config = config + self.input_conditioner = InputConditioner( + input_scale=1.0, + norm_mean=config.norm_mean, + norm_std=config.norm_std, + ) + self.model = RadioInternVisionModel( + config=config, + quant_config=quant_config, + num_hidden_layers_override=num_hidden_layers_override, + num_dummy_heads=num_dummy_heads, + prefix=prefix) + + def forward( + self, + pixel_values: Optional[torch.Tensor] = None, + pixel_embeds: Optional[torch.Tensor] = None, + ) -> torch.FloatTensor: + x = self.input_conditioner(pixel_values) + y = self.model(x) + return self._extract_final(y) + + def load_weights(self, weights) -> set[str]: + loaded_params: set[str] = set() + params_dict = dict(self.named_parameters()) + + if isinstance(weights, dict): + weights_list = list(weights.items()) + else: + weights_list = list(weights) + + for name, weight in weights_list: + if not name.startswith("radio_model."): + # Skip non-radio weights + continue + + sub = name[len("radio_model."):] # drop "radio_model." prefix + + # Skip buffers not used in vLLM + if sub in {"summary_idxs"}: + continue + + vllm_key = None + if sub.startswith("model.patch_generator."): + vllm_key = f"model.patch_generator.{sub.split('.', 2)[-1]}" + elif sub.startswith("input_conditioner."): + vllm_key = f"input_conditioner.{sub.split('.', 1)[-1]}" + elif sub.startswith("model.blocks."): + # Encoder blocks: HF 'model.blocks.{i}.' -> + # vLLM 'model.encoder.layers.{i}.' + parts = sub.split(".") + if len(parts) >= 4: + layer_idx = parts[2] + suffix = ".".join(parts[3:]) + # Skip layer-scale entries that vLLM doesn't use + if suffix in {"ls1", "ls2"} or suffix.startswith( + ("ls1.", "ls2.")): + continue + vllm_key = f"model.encoder.layers.{layer_idx}.{suffix}" + + if vllm_key and vllm_key in params_dict: + param = params_dict[vllm_key] + weight_loader = getattr(param, "weight_loader", + default_weight_loader) + weight_loader(param, weight) + loaded_params.add(vllm_key) + + return loaded_params + + def _extract_final(self, y: torch.Tensor): + # Remove CLS + REGISTERS tokens + patch_gen = getattr(self.model, "patch_generator", None) + if patch_gen is not None: + all_feat = y[:, patch_gen.num_skip:] + + return all_feat diff --git a/vllm/transformers_utils/configs/__init__.py b/vllm/transformers_utils/configs/__init__.py index ca0d5def760a..91bfeb8c55ee 100644 --- a/vllm/transformers_utils/configs/__init__.py +++ b/vllm/transformers_utils/configs/__init__.py @@ -26,6 +26,7 @@ from vllm.transformers_utils.configs.olmo3 import Olmo3Config from vllm.transformers_utils.configs.ovis import OvisConfig from vllm.transformers_utils.configs.qwen3_next import Qwen3NextConfig +from vllm.transformers_utils.configs.radio import RadioConfig from vllm.transformers_utils.configs.speculators.base import SpeculatorsConfig from vllm.transformers_utils.configs.step3_vl import (Step3TextConfig, Step3VisionEncoderConfig, @@ -48,6 +49,7 @@ "Nemotron_Nano_VL_Config", "Olmo3Config", "OvisConfig", + "RadioConfig", "SpeculatorsConfig", "UltravoxConfig", "Step3VLConfig", diff --git a/vllm/transformers_utils/configs/radio.py b/vllm/transformers_utils/configs/radio.py new file mode 100644 index 000000000000..58ad7b8187bc --- /dev/null +++ b/vllm/transformers_utils/configs/radio.py @@ -0,0 +1,104 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project +"""Radio vision model configuration""" + +from typing import Optional, Union + +from transformers.configuration_utils import PretrainedConfig +from transformers.utils import logging + +logger = logging.get_logger(__name__) + +VIT_TIMM_DIM_BY_NAME: dict[str, tuple[int, int, int, int]] = { + "vit_small_patch16_224": (384, 12, 6, 1536), + "vit_base_patch16_224": (768, 12, 12, 3072), + "vit_large_patch16_224": (1024, 24, 16, 4096), + "vit_huge_patch16_224": (1280, 32, 16, 5120), +} + +OPENAI_CLIP_MEAN = (0.48145466, 0.4578275, 0.40821073) +OPENAI_CLIP_STD = (0.26862954, 0.26130258, 0.27577711) + + +class RadioConfig(PretrainedConfig): + r""" + This is the configuration class to store the configuration of a Radio + vision model. It is used to instantiate a Radio model according to the + specified arguments, defining the model architecture. + + Args: + model_name (`str`, *optional*, defaults to "vit_base_patch16_224"): + Name of the vision transformer model (e.g., "vit_base_patch16_224"). + Used to determine architecture dimensions from + `VIT_TIMM_DIM_BY_NAME`. + image_size (`int`, *optional*, defaults to 224): + The size (resolution) of each image. + patch_size (`int`, *optional*, defaults to 16): + The size (resolution) of each patch. + qkv_bias (`bool`, *optional*, defaults to True): + Whether to add a bias to the queries, keys and values. + qk_normalization (`bool`, *optional*, defaults to False): + Whether to apply normalization to queries and keys. + norm_type (`str`, *optional*, defaults to "layer_norm"): + The normalization type to use. + layer_norm_eps (`float`, *optional*, defaults to 1e-6): + The epsilon used by the layer normalization layers. + initializer_factor (`float`, *optional*, defaults to 1.0): + A factor for initializing all weight matrices. + hidden_act (`str`, *optional*, defaults to "gelu"): + The non-linear activation function in the encoder. + max_img_size (`int`, *optional*, defaults to 2048): + Maximum image size for position embeddings. + norm_mean (`tuple` or `list`, *optional*, + defaults to (0.48145466, 0.4578275, 0.40821073)): + Mean values for image normalization (RGB channels). + norm_std (`tuple` or `list`, *optional*, + defaults to (0.26862954, 0.26130258, 0.27577711)): + Standard deviation values for image normalization (RGB channels). + reg_tokens (`int`, *optional*): + Number of register tokens to use. + """ + + model_type = "radio" + + def __init__( + self, + model_name: str, + image_size: int = 224, + patch_size: int = 16, + qkv_bias: bool = True, + qk_normalization: bool = False, + norm_type: str = "layer_norm", + layer_norm_eps: float = 1e-6, + initializer_factor: float = 1.0, + hidden_act: str = "gelu", + max_img_size: int = 2048, + norm_mean: Union[tuple[float, float, float], list] = OPENAI_CLIP_MEAN, + norm_std: Union[tuple[float, float, float], list] = OPENAI_CLIP_STD, + reg_tokens: Optional[int] = None, + **kwargs, + ): + self.model_name = model_name + ( + self.hidden_size, + self.num_hidden_layers, + self.num_attention_heads, + self.intermediate_size, + ) = VIT_TIMM_DIM_BY_NAME[model_name] + self.image_size = image_size + self.patch_size = patch_size + self.qkv_bias = qkv_bias + self.qk_normalization = qk_normalization + self.norm_type = norm_type + self.layer_norm_eps = layer_norm_eps + self.initializer_factor = initializer_factor + self.hidden_act = hidden_act + self.max_img_size = max_img_size + self.norm_mean = list(norm_mean) if isinstance(norm_mean, + (tuple, + list)) else norm_mean + self.norm_std = list(norm_std) if isinstance(norm_std, + (tuple, + list)) else norm_std + self.reg_tokens = reg_tokens + super().__init__(**kwargs) From 9fccd04e308b0b8a625dd78b7dfa4feed8131102 Mon Sep 17 00:00:00 2001 From: "Li, Jiang" Date: Wed, 17 Sep 2025 20:54:02 +0800 Subject: [PATCH 042/518] [Bugfix] Fix Stream usage in CPU model runner and OneDNN kernel check (#25046) Signed-off-by: jiang1.li --- csrc/cpu/dnnl_kernels.cpp | 2 +- vllm/platforms/cpu.py | 5 +++++ vllm/v1/worker/cpu_model_runner.py | 8 ++++++++ 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/csrc/cpu/dnnl_kernels.cpp b/csrc/cpu/dnnl_kernels.cpp index 9a3af4ac9d8a..1c42a75bc2d6 100644 --- a/csrc/cpu/dnnl_kernels.cpp +++ b/csrc/cpu/dnnl_kernels.cpp @@ -523,7 +523,7 @@ void onednn_mm(torch::Tensor& c, // [M, OC], row-major CPU_KERNEL_GUARD_IN(onednn_mm) TORCH_CHECK(a.dim() == 2); TORCH_CHECK(a.stride(-1) == 1); - TORCH_CHECK(c.is_contiguous()); + TORCH_CHECK(c.stride(-1) == 1); MatMulPrimitiveHandler* ptr = reinterpret_cast(handler); diff --git a/vllm/platforms/cpu.py b/vllm/platforms/cpu.py index c5b6d91a62b6..544e091491bf 100644 --- a/vllm/platforms/cpu.py +++ b/vllm/platforms/cpu.py @@ -185,6 +185,11 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None: parallel_config.distributed_executor_backend = "mp" if parallel_config.worker_cls == "auto": parallel_config.worker_cls = "vllm.v1.worker.cpu_worker.CPUWorker" + # Disable DBO + if parallel_config.enable_dbo: + logger.warning( + "Dual-Batch Overlap is not supported on CPU, disabled.") + parallel_config.enable_dbo = False # Note: workaround for v1 gpu_model_runner from vllm.config import CompilationLevel diff --git a/vllm/v1/worker/cpu_model_runner.py b/vllm/v1/worker/cpu_model_runner.py index ccdbeac64bce..cd0f0af43e7e 100644 --- a/vllm/v1/worker/cpu_model_runner.py +++ b/vllm/v1/worker/cpu_model_runner.py @@ -145,12 +145,20 @@ def __init__(self, *args, **kwargs) -> None: self.record = lambda: None self.synchronize = lambda: None + class _StreamPlaceholder: + + def __init__(self, *args, **kwargs) -> None: + pass + cuda_event = torch.cuda.Event + cuda_stream = torch.cuda.Stream try: torch.cuda.Event = _EventPlaceholder + torch.cuda.Stream = _StreamPlaceholder yield finally: torch.cuda.Event = cuda_event + torch.cuda.Stream = cuda_stream @contextmanager From bfe93801614b73ee5b4ac8ff65f977686a674bf2 Mon Sep 17 00:00:00 2001 From: Aidyn-A <31858918+Aidyn-A@users.noreply.github.com> Date: Wed, 17 Sep 2025 17:15:42 +0400 Subject: [PATCH 043/518] Apply fixes for CUDA 13 (#24599) Signed-off-by: Aidyn-A --- CMakeLists.txt | 10 ++++++++++ csrc/cub_helpers.h | 17 +++++++++++++++++ csrc/layernorm_kernels.cu | 13 ++++--------- csrc/layernorm_quant_kernels.cu | 13 ++++--------- csrc/moe/topk_softmax_kernels.cu | 16 +++------------- .../compressed_tensors/int8_quant_kernels.cu | 11 ++--------- csrc/quantization/fp8/common.cu | 9 ++------- .../fused_kernels/layernorm_utils.cuh | 14 +++++--------- 8 files changed, 47 insertions(+), 56 deletions(-) create mode 100644 csrc/cub_helpers.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 009c224dc773..c48da948a029 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -175,6 +175,16 @@ if(NVCC_THREADS AND VLLM_GPU_LANG STREQUAL "CUDA") list(APPEND VLLM_GPU_FLAGS "--threads=${NVCC_THREADS}") endif() +# +# Set CUDA include flags for CXX compiler. +# +if(VLLM_GPU_LANG STREQUAL "CUDA") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -I${CUDA_TOOLKIT_ROOT_DIR}/include") + if(CUDA_VERSION VERSION_GREATER_EQUAL 13.0) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -I${CUDA_TOOLKIT_ROOT_DIR}/include/cccl") + endif() +endif() + # # Use FetchContent for C++ dependencies that are compiled as part of vLLM's build process. # setup.py will override FETCHCONTENT_BASE_DIR to play nicely with sccache. diff --git a/csrc/cub_helpers.h b/csrc/cub_helpers.h new file mode 100644 index 000000000000..470a63a22cab --- /dev/null +++ b/csrc/cub_helpers.h @@ -0,0 +1,17 @@ +#pragma once + +#ifndef USE_ROCM + #include + #if CUB_VERSION >= 200800 + #include +using CubAddOp = cuda::std::plus<>; +using CubMaxOp = cuda::maximum<>; + #else // if CUB_VERSION < 200800 +using CubAddOp = cub::Sum; +using CubMaxOp = cub::Max; + #endif // CUB_VERSION +#else + #include +using CubAddOp = cub::Sum; +using CubMaxOp = cub::Max; +#endif // USE_ROCM diff --git a/csrc/layernorm_kernels.cu b/csrc/layernorm_kernels.cu index 05be023de0f2..93c73d58390e 100644 --- a/csrc/layernorm_kernels.cu +++ b/csrc/layernorm_kernels.cu @@ -1,15 +1,10 @@ #include "type_convert.cuh" #include "dispatch_utils.h" +#include "cub_helpers.h" #include #include -#ifndef USE_ROCM - #include -#else - #include -#endif - namespace vllm { // TODO(woosuk): Further optimize this kernel. @@ -30,7 +25,7 @@ __global__ void rms_norm_kernel( using BlockReduce = cub::BlockReduce; __shared__ typename BlockReduce::TempStorage reduceStore; - variance = BlockReduce(reduceStore).Reduce(variance, cub::Sum{}, blockDim.x); + variance = BlockReduce(reduceStore).Reduce(variance, CubAddOp{}, blockDim.x); if (threadIdx.x == 0) { s_variance = rsqrtf(variance / hidden_size + epsilon); @@ -85,7 +80,7 @@ fused_add_rms_norm_kernel( using BlockReduce = cub::BlockReduce; __shared__ typename BlockReduce::TempStorage reduceStore; - variance = BlockReduce(reduceStore).Reduce(variance, cub::Sum{}, blockDim.x); + variance = BlockReduce(reduceStore).Reduce(variance, CubAddOp{}, blockDim.x); if (threadIdx.x == 0) { s_variance = rsqrtf(variance / hidden_size + epsilon); @@ -126,7 +121,7 @@ fused_add_rms_norm_kernel( using BlockReduce = cub::BlockReduce; __shared__ typename BlockReduce::TempStorage reduceStore; - variance = BlockReduce(reduceStore).Reduce(variance, cub::Sum{}, blockDim.x); + variance = BlockReduce(reduceStore).Reduce(variance, CubAddOp{}, blockDim.x); if (threadIdx.x == 0) { s_variance = rsqrtf(variance / hidden_size + epsilon); diff --git a/csrc/layernorm_quant_kernels.cu b/csrc/layernorm_quant_kernels.cu index 0fd5849d9626..be134089bd6d 100644 --- a/csrc/layernorm_quant_kernels.cu +++ b/csrc/layernorm_quant_kernels.cu @@ -8,16 +8,11 @@ #include "type_convert.cuh" #include "quantization/fp8/common.cuh" #include "dispatch_utils.h" +#include "cub_helpers.h" #include #include -#ifndef USE_ROCM - #include -#else - #include -#endif - namespace vllm { // TODO(woosuk): Further optimize this kernel. @@ -39,7 +34,7 @@ __global__ void rms_norm_static_fp8_quant_kernel( using BlockReduce = cub::BlockReduce; __shared__ typename BlockReduce::TempStorage reduceStore; - variance = BlockReduce(reduceStore).Reduce(variance, cub::Sum{}, blockDim.x); + variance = BlockReduce(reduceStore).Reduce(variance, CubAddOp{}, blockDim.x); if (threadIdx.x == 0) { s_variance = rsqrtf(variance / hidden_size + epsilon); @@ -100,7 +95,7 @@ fused_add_rms_norm_static_fp8_quant_kernel( using BlockReduce = cub::BlockReduce; __shared__ typename BlockReduce::TempStorage reduceStore; - variance = BlockReduce(reduceStore).Reduce(variance, cub::Sum{}, blockDim.x); + variance = BlockReduce(reduceStore).Reduce(variance, CubAddOp{}, blockDim.x); if (threadIdx.x == 0) { s_variance = rsqrtf(variance / hidden_size + epsilon); @@ -149,7 +144,7 @@ fused_add_rms_norm_static_fp8_quant_kernel( using BlockReduce = cub::BlockReduce; __shared__ typename BlockReduce::TempStorage reduceStore; - variance = BlockReduce(reduceStore).Reduce(variance, cub::Sum{}, blockDim.x); + variance = BlockReduce(reduceStore).Reduce(variance, CubAddOp{}, blockDim.x); if (threadIdx.x == 0) { s_variance = rsqrtf(variance / hidden_size + epsilon); diff --git a/csrc/moe/topk_softmax_kernels.cu b/csrc/moe/topk_softmax_kernels.cu index cd80bfda7dfd..53573ada86ba 100644 --- a/csrc/moe/topk_softmax_kernels.cu +++ b/csrc/moe/topk_softmax_kernels.cu @@ -20,17 +20,7 @@ #include #include #include "../cuda_compat.h" - -#ifndef USE_ROCM - #include - #include - #include - using AddOp = cuda::std::plus; -#else - #include - #include - using AddOp = cub::Sum; -#endif +#include "../cub_helpers.h" #define MAX(a, b) ((a) > (b) ? (a) : (b)) #define MIN(a, b) ((a) < (b) ? (a) : (b)) @@ -79,7 +69,7 @@ __launch_bounds__(TPB) __global__ threadData = max(static_cast(input[idx]), threadData); } - const float maxElem = BlockReduce(tmpStorage).Reduce(threadData, cub::Max()); + const float maxElem = BlockReduce(tmpStorage).Reduce(threadData, CubMaxOp()); if (threadIdx.x == 0) { float_max = maxElem; @@ -94,7 +84,7 @@ __launch_bounds__(TPB) __global__ threadData += exp((static_cast(input[idx]) - float_max)); } - const auto Z = BlockReduce(tmpStorage).Reduce(threadData, AddOp()); + const auto Z = BlockReduce(tmpStorage).Reduce(threadData, CubAddOp()); if (threadIdx.x == 0) { diff --git a/csrc/quantization/compressed_tensors/int8_quant_kernels.cu b/csrc/quantization/compressed_tensors/int8_quant_kernels.cu index d8369108d0bd..bcfde9fbcbbe 100644 --- a/csrc/quantization/compressed_tensors/int8_quant_kernels.cu +++ b/csrc/quantization/compressed_tensors/int8_quant_kernels.cu @@ -7,17 +7,10 @@ #include +#include "../../cub_helpers.h" #include "../../dispatch_utils.h" #include "../vectorization_utils.cuh" -#ifndef USE_ROCM - #include - #include -#else - #include - #include -#endif - static inline __device__ int8_t float_to_int8_rn(float x) { #ifdef USE_ROCM static constexpr auto i8_min = @@ -173,7 +166,7 @@ __global__ void dynamic_scaled_int8_quant_kernel( }); using BlockReduce = cub::BlockReduce; __shared__ typename BlockReduce::TempStorage tmp; - float block_max = BlockReduce(tmp).Reduce(thread_max, cub::Max{}, blockDim.x); + float block_max = BlockReduce(tmp).Reduce(thread_max, CubMaxOp{}, blockDim.x); __shared__ float absmax; if (tid == 0) { absmax = block_max; diff --git a/csrc/quantization/fp8/common.cu b/csrc/quantization/fp8/common.cu index 5fe5dd04bd89..45d6d5082ce4 100644 --- a/csrc/quantization/fp8/common.cu +++ b/csrc/quantization/fp8/common.cu @@ -1,15 +1,10 @@ #include "common.cuh" #include "dispatch_utils.h" +#include "../../cub_helpers.h" #include "../vectorization_utils.cuh" #include #include -#ifndef USE_ROCM - #include -#else - #include -#endif - namespace vllm { template @@ -116,7 +111,7 @@ __global__ void dynamic_per_token_scaled_fp8_quant_kernel_strided( using BlockReduce = cub::BlockReduce; __shared__ typename BlockReduce::TempStorage tmp; const float block_max = - BlockReduce(tmp).Reduce(absmax_val, cub::Max{}, blockDim.x); + BlockReduce(tmp).Reduce(absmax_val, CubMaxOp{}, blockDim.x); __shared__ float token_scale; if (tid == 0) { diff --git a/csrc/quantization/fused_kernels/layernorm_utils.cuh b/csrc/quantization/fused_kernels/layernorm_utils.cuh index 3f188872d80d..2d2fd771205c 100644 --- a/csrc/quantization/fused_kernels/layernorm_utils.cuh +++ b/csrc/quantization/fused_kernels/layernorm_utils.cuh @@ -8,11 +8,7 @@ #include "quantization/utils.cuh" #include "quant_conversions.cuh" -#ifndef USE_ROCM - #include -#else - #include -#endif +#include "../../cub_helpers.h" namespace vllm { @@ -36,7 +32,7 @@ __device__ void compute_rms(float* rms, scalar_t const* __restrict__ input, using BlockReduce = cub::BlockReduce; __shared__ typename BlockReduce::TempStorage reduceStore; - ss = BlockReduce(reduceStore).Reduce(ss, cub::Sum{}, blockDim.x); + ss = BlockReduce(reduceStore).Reduce(ss, CubAddOp{}, blockDim.x); __shared__ float s_rms; if (threadIdx.x == 0) { @@ -73,7 +69,7 @@ __device__ void compute_dynamic_per_token_scales( __shared__ typename BlockReduce::TempStorage reduceStore; block_absmax_val_maybe = BlockReduce(reduceStore) - .Reduce(block_absmax_val_maybe, cub::Max{}, blockDim.x); + .Reduce(block_absmax_val_maybe, CubMaxOp{}, blockDim.x); __shared__ float s_token_scale; if (threadIdx.x == 0) { @@ -169,7 +165,7 @@ __device__ void compute_rms(float* rms, scalar_t const* __restrict__ input, using BlockReduce = cub::BlockReduce; __shared__ typename BlockReduce::TempStorage reduceStore; - ss = BlockReduce(reduceStore).Reduce(ss, cub::Sum{}, blockDim.x); + ss = BlockReduce(reduceStore).Reduce(ss, CubAddOp{}, blockDim.x); __shared__ float s_rms; if (threadIdx.x == 0) { @@ -240,7 +236,7 @@ __device__ void compute_dynamic_per_token_scales( __shared__ typename BlockReduce::TempStorage reduceStore; block_absmax_val_maybe = BlockReduce(reduceStore) - .Reduce(block_absmax_val_maybe, cub::Max{}, blockDim.x); + .Reduce(block_absmax_val_maybe, CubMaxOp{}, blockDim.x); __shared__ float s_token_scale; if (threadIdx.x == 0) { From 1b962e24577dddc0d7441ae0b06392e1f9262a51 Mon Sep 17 00:00:00 2001 From: dolpm <34420038+dolpm@users.noreply.github.com> Date: Wed, 17 Sep 2025 06:22:25 -0700 Subject: [PATCH 044/518] [fix] lora benchmarks pass no_lora_flag_cpu (#23774) Signed-off-by: Dylan Maloy <34420038+dolpm@users.noreply.github.com> Co-authored-by: Jee Jee Li --- benchmarks/kernels/benchmark_lora.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/benchmarks/kernels/benchmark_lora.py b/benchmarks/kernels/benchmark_lora.py index 89309c79f099..debb29744bfa 100644 --- a/benchmarks/kernels/benchmark_lora.py +++ b/benchmarks/kernels/benchmark_lora.py @@ -464,7 +464,11 @@ def to_device(tensor: torch.Tensor): for field_name in LoRAKernelMeta.__dataclass_fields__: field = getattr(self.lora_kernel_meta, field_name) assert isinstance(field, torch.Tensor) - setattr(self.lora_kernel_meta, field_name, to_device(field)) + setattr( + self.lora_kernel_meta, + field_name, + to_device(field) if field_name != "no_lora_flag_cpu" else field, + ) def metadata(self) -> tuple[int, int, int]: """ @@ -512,6 +516,7 @@ def as_lora_shrink_kwargs(self) -> dict[str, Any]: "lora_token_start_loc": self.lora_kernel_meta.lora_token_start_loc, "lora_ids": self.lora_kernel_meta.active_lora_ids, "scaling": 1.0, + "no_lora_flag_cpu": self.lora_kernel_meta.no_lora_flag_cpu, } def as_lora_expand_kwargs(self, add_inputs: bool) -> dict[str, Any]: @@ -552,6 +557,7 @@ def as_lora_expand_kwargs(self, add_inputs: bool) -> dict[str, Any]: "lora_ids": self.lora_kernel_meta.active_lora_ids, "offset_start": 0, "add_inputs": add_inputs, + "no_lora_flag_cpu": self.lora_kernel_meta.no_lora_flag_cpu, } def bench_fn_kwargs( From dd6a910aac6504ec7fc3e50df36b08e476f1b80f Mon Sep 17 00:00:00 2001 From: Tao He Date: Wed, 17 Sep 2025 21:59:09 +0800 Subject: [PATCH 045/518] [Bugfix][Qwen3-Next] fixes the varlen issue in qwen3-next's MTP implementation. (#24957) Signed-off-by: Tao He --- .../layers/mamba/ops/causal_conv1d.py | 132 +++++++++++++++--- vllm/model_executor/models/qwen3_next.py | 10 +- vllm/v1/attention/backends/gdn_attn.py | 31 ++-- 3 files changed, 139 insertions(+), 34 deletions(-) diff --git a/vllm/model_executor/layers/mamba/ops/causal_conv1d.py b/vllm/model_executor/layers/mamba/ops/causal_conv1d.py index a0478a359f91..7e3ea561fd29 100644 --- a/vllm/model_executor/layers/mamba/ops/causal_conv1d.py +++ b/vllm/model_executor/layers/mamba/ops/causal_conv1d.py @@ -626,6 +626,7 @@ def _causal_conv1d_update_kernel( cache_seqlens_ptr, # circular buffer conv_state_indices_ptr, num_accepted_tokens_ptr, + query_start_loc_ptr, # (batch + 1) o_ptr, # (batch, dim, seqlen) # Matrix dimensions batch: int, @@ -652,6 +653,7 @@ def _causal_conv1d_update_kernel( HAS_BIAS: tl.constexpr, KERNEL_WIDTH: tl.constexpr, SILU_ACTIVATION: tl.constexpr, + IS_VARLEN: tl.constexpr, IS_CONTINUOUS_BATCHING: tl.constexpr, IS_SPEC_DECODING: tl.constexpr, NP2_STATELEN: tl.constexpr, @@ -678,6 +680,25 @@ def _causal_conv1d_update_kernel( # not processing as this is not the actual sequence return + if IS_VARLEN: + query_start_index = tl.load(query_start_loc_ptr + idx_seq).to(tl.int64) + query_end_index = tl.load(query_start_loc_ptr + (idx_seq + 1)).to( + tl.int64) + # revise state_len and seqlen + state_len = state_len - (seqlen - + (query_end_index - query_start_index)) + seqlen = query_end_index - query_start_index + x_offset = query_start_index * stride_x_token + o_offset = query_start_index * stride_o_token + else: + query_start_index = idx_seq * seqlen + query_end_index = query_start_index + seqlen + x_offset = idx_seq * stride_x_seq + o_offset = idx_seq * stride_o_seq + + if query_start_index == query_end_index: + return + if IS_SPEC_DECODING: # The rolling of conv state: # @@ -692,8 +713,8 @@ def _causal_conv1d_update_kernel( # - accept 1 tokens: [history2, ..., historyM, draft1] # - accept 2 tokens: [history3, ..., historyM, draft1, draft2] # - and so on. - conv_state_token_offset = (tl.load(num_accepted_tokens_ptr + idx_seq) - - 1) + conv_state_token_offset = ( + tl.load(num_accepted_tokens_ptr + idx_seq).to(tl.int64) - 1) else: conv_state_token_offset = 0 @@ -713,9 +734,12 @@ def _causal_conv1d_update_kernel( if KERNEL_WIDTH >= 4: conv_states_ptrs = prior_tokens + 2 * stride_conv_state_tok # [BLOCK_N] col2 = tl.load(conv_states_ptrs, mask_w, 0.0) - if KERNEL_WIDTH == 5: + if KERNEL_WIDTH >= 5: conv_states_ptrs = prior_tokens + 3 * stride_conv_state_tok # [BLOCK_N] col3 = tl.load(conv_states_ptrs, mask_w, 0.0) + if KERNEL_WIDTH >= 6: + conv_states_ptrs = prior_tokens + 4 * stride_conv_state_tok # [BLOCK_N] + col4 = tl.load(conv_states_ptrs, mask_w, 0.0) # STEP 2: assume state_len > seqlen idx_tokens = tl.arange(0, NP2_STATELEN) # [BLOCK_M] @@ -735,8 +759,7 @@ def _causal_conv1d_update_kernel( conv_state = tl.load(conv_state_ptrs_source, mask, other=0.0) VAL = state_len - seqlen - x_base = x_ptr + (idx_seq * stride_x_seq) + (idx_feats * stride_x_dim - ) # [BLOCK_N] + x_base = x_ptr + x_offset + (idx_feats * stride_x_dim) # [BLOCK_N] x_ptrs = x_base[None, :] + ( (idx_tokens - VAL) * stride_x_token)[:, None] # [BLOCK_M, BLOCK_N] @@ -782,12 +805,18 @@ def _causal_conv1d_update_kernel( if KERNEL_WIDTH >= 4: w_ptrs = w_base + (3 * stride_w_width) # [BLOCK_N] tensor w_col3 = tl.load(w_ptrs, mask_w, other=0.0) + if KERNEL_WIDTH >= 5: + w_ptrs = w_base + (4 * stride_w_width) # [BLOCK_N] tensor + w_col4 = tl.load(w_ptrs, mask_w, other=0.0) + if KERNEL_WIDTH >= 6: + w_ptrs = w_base + (5 * stride_w_width) # [BLOCK_N] tensor + w_col5 = tl.load(w_ptrs, mask_w, other=0.0) x_base_1d = x_base # starting of chunk [BLOCK_N] mask_x_1d = idx_feats < dim # STEP 5: compute each token - for idx_token in tl.static_range(seqlen): + for idx_token in tl.range(seqlen): acc = acc_preload matrix_w = w_col0 @@ -817,6 +846,37 @@ def _causal_conv1d_update_kernel( matrix_w = w_col3 x_ptrs_1d = x_base_1d + idx_token * stride_x_token # [BLOCK_N] matrix_x = tl.load(x_ptrs_1d, mask=mask_x_1d) + elif KERNEL_WIDTH == 5: + if j == 1: + matrix_w = w_col1 + matrix_x = col1 + elif j == 2: + matrix_w = w_col2 + matrix_x = col2 + elif j == 3: + matrix_w = w_col3 + matrix_x = col3 + elif j == 4: + matrix_w = w_col4 + x_ptrs_1d = x_base_1d + idx_token * stride_x_token # [BLOCK_N] + matrix_x = tl.load(x_ptrs_1d, mask=mask_x_1d) + elif KERNEL_WIDTH == 6: + if j == 1: + matrix_w = w_col1 + matrix_x = col1 + elif j == 2: + matrix_w = w_col2 + matrix_x = col2 + elif j == 3: + matrix_w = w_col3 + matrix_x = col3 + elif j == 4: + matrix_w = w_col4 + matrix_x = col4 + elif j == 5: + matrix_w = w_col5 + x_ptrs_1d = x_base_1d + idx_token * stride_x_token # [BLOCK_N] + matrix_x = tl.load(x_ptrs_1d, mask=mask_x_1d) acc += matrix_x * matrix_w # [BLOCK_N] @@ -829,14 +889,24 @@ def _causal_conv1d_update_kernel( col0 = col1 col1 = col2 col2 = matrix_x + elif KERNEL_WIDTH == 5: + col0 = col1 + col1 = col2 + col2 = col3 + col3 = matrix_x + elif KERNEL_WIDTH == 6: + col0 = col1 + col1 = col2 + col2 = col3 + col3 = col4 + col4 = matrix_x if SILU_ACTIVATION: acc = acc / (1 + tl.exp(-acc)) mask_1d = (idx_token < seqlen) & (idx_feats < dim ) # token-index # feature-index - o_ptrs = o_ptr + ( - idx_seq) * stride_o_seq + idx_token * stride_o_token + ( - idx_feats * stride_o_dim) + o_ptrs = o_ptr + o_offset + idx_token * stride_o_token + (idx_feats * + stride_o_dim) tl.store(o_ptrs, acc, mask=mask_1d) @@ -850,14 +920,18 @@ def causal_conv1d_update( cache_seqlens: Optional[torch.Tensor] = None, conv_state_indices: Optional[torch.Tensor] = None, num_accepted_tokens: Optional[torch.Tensor] = None, + query_start_loc: Optional[torch.Tensor] = None, + max_query_len: int = -1, pad_slot_id: int = PAD_SLOT_ID, metadata=None, validate_data=False, ): """ - x: (batch, dim) or (batch, dim, seqlen) + x: (batch, dim) or (batch, dim, seqlen) or (num_tokens, dim) [shape=2: single token prediction] [shape=3: single or multiple tokens prediction] + [shape=2 with num_tokens: continuous batching, where num_tokens is the + total tokens of all sequences in that batch] conv_state: (..., dim, state_len), where state_len >= width - 1 weight: (dim, width) bias: (dim,) @@ -870,13 +944,24 @@ def causal_conv1d_update( If not None, the conv_state is a larger tensor along the batch dim, and we are selecting the batch coords specified by conv_state_indices. Useful for a continuous batching scenario. + num_accepted_tokens: (batch,), dtype int32 + If not None, it indicates the number of accepted tokens for each + sequence in the batch. + This is used in speculative decoding, where the conv_state is updated + in a sliding window manner. + query_start_loc: (batch + 1,) int32 + If not None, the inputs is given in a varlen fashion and this indicates + the starting index of each sequence in the batch. + max_query_len: int + If query_start_loc is not None, this indicates the maximum query + length in the batch. pad_slot_id: int if cache_indices is passed, lets the kernel identify padded entries that will not be processed, for example: cache_indices = [pad_slot_id, 1 ,20 ,pad_slot_id] in this case, the kernel will not process entries at indices 0 and 3 - out: (batch, dim) or (batch, dim, seqlen) + out: (batch, dim) or (batch, dim, seqlen) or (num_tokens, dim), same shape as `x` """ if validate_data: assert cache_seqlens is None # not implemented yet - ok for vLLM @@ -886,11 +971,17 @@ def causal_conv1d_update( activation = "silu" if activation is True else None elif activation is not None: assert activation in ["silu", "swish"] - unsqueeze = x.dim() == 2 + unsqueeze = query_start_loc is None and x.dim() == 2 if unsqueeze: # make it (batch, dim, seqlen) with seqlen == 1 x = x.unsqueeze(-1) - batch, dim, seqlen = x.shape + if query_start_loc is None: + batch, dim, seqlen = x.shape + else: + assert conv_state_indices is not None + batch = conv_state_indices.size(0) + dim = x.size(1) + seqlen = max_query_len _, width = weight.shape # conv_state: (..., dim, state_len), where state_len >= width - 1 num_cache_lines, _, state_len = conv_state.size() @@ -916,10 +1007,17 @@ def causal_conv1d_update( out = x stride_w_dim, stride_w_width = weight.stride() - stride_x_seq, stride_x_dim, stride_x_token = x.stride( - ) # X (batch, dim, seqlen) + if query_start_loc is None: + # X (batch, dim, seqlen) + stride_x_seq, stride_x_dim, stride_x_token = x.stride() + stride_o_seq, stride_o_dim, stride_o_token = out.stride() + else: + # X (dim, cu_seqlen) + stride_x_token, stride_x_dim = x.stride() + stride_x_seq = 0 + stride_o_token, stride_o_dim = out.stride() + stride_o_seq = 0 - stride_o_seq, stride_o_dim, stride_o_token = out.stride() stride_istate_seq, stride_istate_dim, stride_istate_token = conv_state.stride( ) stride_state_indices = conv_state_indices.stride( @@ -945,6 +1043,7 @@ def grid(META): cache_seqlens, conv_state_indices, num_accepted_tokens, + query_start_loc, out, # Matrix dimensions batch, @@ -971,6 +1070,7 @@ def grid(META): HAS_BIAS=bias is not None, KERNEL_WIDTH=width, SILU_ACTIVATION=activation in ["silu", "swish"], + IS_VARLEN=query_start_loc is not None, IS_CONTINUOUS_BATCHING=conv_state_indices is not None, IS_SPEC_DECODING=num_accepted_tokens is not None, NP2_STATELEN=np2_statelen, diff --git a/vllm/model_executor/models/qwen3_next.py b/vllm/model_executor/models/qwen3_next.py index 3c5407916c0b..fe63e9303235 100644 --- a/vllm/model_executor/models/qwen3_next.py +++ b/vllm/model_executor/models/qwen3_next.py @@ -417,9 +417,7 @@ def _forward( self_kv_cache = self.kv_cache[forward_context.virtual_engine] conv_state = self_kv_cache[0].transpose(-1, -2) ssm_state = self_kv_cache[1] - num_actual_tokens = (attn_metadata.num_prefill_tokens + - attn_metadata.num_decode_tokens + - attn_metadata.num_spec_decode_tokens) + num_actual_tokens = attn_metadata.num_actual_tokens num_accepted_tokens = attn_metadata.num_accepted_tokens # 1. Set up dimensions for reshapes later @@ -458,9 +456,6 @@ def _forward( # 2.1: process the mutli-query part if spec_sequence_masks is not None: - mixed_qkv_spec = mixed_qkv_spec.view( - attn_metadata.num_spec_decodes, -1, mixed_qkv_spec.size(-1)) - mixed_qkv_spec = rearrange(mixed_qkv_spec, 'b l d -> b d l') mixed_qkv_spec = causal_conv1d_update( mixed_qkv_spec, conv_state, @@ -470,9 +465,10 @@ def _forward( conv_state_indices=spec_state_indices_tensor[:, 0] [:attn_metadata.num_spec_decodes], num_accepted_tokens=num_accepted_tokens, + query_start_loc=spec_query_start_loc, + max_query_len=spec_state_indices_tensor.size(-1), validate_data=False, ) - mixed_qkv_spec = rearrange(mixed_qkv_spec, 'b d l -> (b l) d') # 2.2: process the remaining part if attn_metadata.num_prefills > 0: diff --git a/vllm/v1/attention/backends/gdn_attn.py b/vllm/v1/attention/backends/gdn_attn.py index 74eb9ae9d325..ba89f93e8b56 100644 --- a/vllm/v1/attention/backends/gdn_attn.py +++ b/vllm/v1/attention/backends/gdn_attn.py @@ -31,6 +31,7 @@ class GDNAttentionMetadata: num_decode_tokens: int num_spec_decodes: int num_spec_decode_tokens: int + num_actual_tokens: int has_initial_state: Optional[torch.Tensor] = None @@ -74,8 +75,8 @@ def __init__(self, kv_cache_spec: AttentionSpec, layer_names: list[str], self.use_full_cuda_graph = \ self.compilation_config.cudagraph_mode.has_full_cudagraphs() self.decode_cudagraph_max_bs = min( - self.vllm_config.scheduler_config.max_num_seqs, - self.compilation_config.max_capture_size) + self.vllm_config.scheduler_config.max_num_seqs * + (self.num_spec + 1), self.compilation_config.max_capture_size) self.spec_state_indices_tensor = torch.empty( (self.decode_cudagraph_max_bs, self.num_spec + 1), @@ -194,9 +195,8 @@ def build( # type: ignore[override] dim=0, out=non_spec_query_start_loc[1:]) - num_spec_decode_tokens = min( - num_spec_decodes * (self.num_spec + 1), - spec_token_masks.size(0)) + num_spec_decode_tokens = (query_lens.sum().item() - + num_prefill_tokens - num_decode_tokens) assert num_accepted_tokens is not None num_accepted_tokens = num_accepted_tokens[spec_sequence_masks] @@ -206,14 +206,22 @@ def build( # type: ignore[override] has_initial_state = has_initial_state[~spec_sequence_masks] else: has_initial_state = None + num_actual_tokens = num_prefill_tokens + num_decode_tokens + \ + num_spec_decode_tokens # prepare tensors for cudagraph + # + # With speculative decoding, the xgrammar backend may rollback tokens + # and causing some sequences has less draft tokens than self.num_spec. + # + # In above cases, the max possible batch size for n tokens, can be + # min(n, cudagraph_max_bs). if (self.use_full_cuda_graph and num_prefills == 0 and num_decodes == 0 and num_spec_decodes <= self.decode_cudagraph_max_bs - and m.num_actual_tokens <= self.decode_cudagraph_max_bs): - num_total_tokens = self.vllm_config.pad_for_cudagraph( + and num_spec_decode_tokens <= self.decode_cudagraph_max_bs): + num_actual_tokens = self.vllm_config.pad_for_cudagraph( m.num_actual_tokens) - batch_size = num_total_tokens // (self.num_spec + 1) + batch_size = min(self.decode_cudagraph_max_bs, num_actual_tokens) self.spec_state_indices_tensor[:num_spec_decodes].copy_( spec_state_indices_tensor, non_blocking=True) @@ -229,7 +237,7 @@ def build( # type: ignore[override] assert spec_token_masks is not None self.spec_token_masks[:spec_token_masks.size(0)].copy_( spec_token_masks, non_blocking=True) - spec_token_masks = self.spec_token_masks[:m.num_actual_tokens] + spec_token_masks = self.spec_token_masks[:num_actual_tokens] spec_token_masks[spec_token_masks.size(0):].fill_(False) self.spec_query_start_loc[:num_spec_decodes + 1].copy_( @@ -248,9 +256,9 @@ def build( # type: ignore[override] if (self.use_full_cuda_graph and num_prefills == 0 and num_spec_decodes == 0 and num_decodes <= self.decode_cudagraph_max_bs): - num_total_tokens = self.vllm_config.pad_for_cudagraph( + num_actual_tokens = self.vllm_config.pad_for_cudagraph( m.num_actual_tokens) - batch_size = num_total_tokens + batch_size = num_actual_tokens self.non_spec_state_indices_tensor[:num_decodes].copy_( non_spec_state_indices_tensor, non_blocking=True) @@ -274,6 +282,7 @@ def build( # type: ignore[override] num_decode_tokens=num_decode_tokens, num_spec_decodes=num_spec_decodes, num_spec_decode_tokens=num_spec_decode_tokens, + num_actual_tokens=num_actual_tokens, has_initial_state=has_initial_state, spec_query_start_loc=spec_query_start_loc, non_spec_query_start_loc=non_spec_query_start_loc, From 47f670b03b7dfb4e1149eb8b14ba9edcfc297255 Mon Sep 17 00:00:00 2001 From: samzong Date: Wed, 17 Sep 2025 22:31:20 +0800 Subject: [PATCH 046/518] [Docs] improve code formatting and comments for eliminate griffe build warning. (#25010) Signed-off-by: samzong --- vllm/benchmarks/serve.py | 2 +- vllm/distributed/eplb/eplb_state.py | 9 +++++---- vllm/distributed/eplb/rebalance_algo.py | 23 ++++++++++++++--------- 3 files changed, 20 insertions(+), 14 deletions(-) diff --git a/vllm/benchmarks/serve.py b/vllm/benchmarks/serve.py index 33e831e54bbc..1aeef0fd5bd8 100644 --- a/vllm/benchmarks/serve.py +++ b/vllm/benchmarks/serve.py @@ -139,7 +139,7 @@ async def get_request( A lower burstiness value (0 < burstiness < 1) results in more bursty requests, while a higher burstiness value (burstiness > 1) results in a more uniform arrival of requests. - ramp_up_strategy (optional): + ramp_up_strategy (optional): The ramp-up strategy. Can be "linear" or "exponential". If None, uses constant request rate (specified by request_rate). ramp_up_start_rps (optional): diff --git a/vllm/distributed/eplb/eplb_state.py b/vllm/distributed/eplb/eplb_state.py index 8f8baa7d59db..3e318d784832 100644 --- a/vllm/distributed/eplb/eplb_state.py +++ b/vllm/distributed/eplb/eplb_state.py @@ -337,11 +337,12 @@ def step(self, Args: model (MixtureOfExperts): The MoE model. is_dummy (bool): If `True`, this is a dummy step and the load - metrics recorded in this forward pass will not count. Defaults - to `False`. + metrics recorded in this forward pass will not count. + Defaults to `False`. is_profile (bool): If `True`, perform a dummy rearrangement - with maximum communication cost. This is used in `profile_run` - to reserve enough memory for the communication buffer. + with maximum communication cost. This is used in + `profile_run` to reserve enough memory + for the communication buffer. log_stats (bool): If `True`, log the expert load metrics. # Stats diff --git a/vllm/distributed/eplb/rebalance_algo.py b/vllm/distributed/eplb/rebalance_algo.py index 3564a10dfc68..fc43dbe3b653 100644 --- a/vllm/distributed/eplb/rebalance_algo.py +++ b/vllm/distributed/eplb/rebalance_algo.py @@ -109,13 +109,16 @@ def rebalance_experts_hierarchical( num_physical_experts: number of physical experts after replication num_groups: number of expert groups num_nodes: number of server nodes, where the intra-node network - (e.g, NVLink) is faster + (e.g., NVLink) is faster num_gpus: number of GPUs, must be a multiple of `num_nodes` Returns: - physical_to_logical_map: [num_moe_layers, num_physical_experts] - logical_to_physical_map: [num_moe_layers, num_logical_experts, X] - logical_count: [num_moe_layers, num_logical_experts] + physical_to_logical_map (torch.Tensor): + [num_moe_layers, num_physical_experts] + logical_to_physical_map (torch.Tensor): + [num_moe_layers, num_logical_experts, X] + logical_count (torch.Tensor): + [num_moe_layers, num_logical_experts] """ num_layers, num_logical_experts = weight.shape assert num_logical_experts % num_groups == 0 @@ -197,11 +200,13 @@ def rebalance_experts( num_gpus: number of GPUs, must be a multiple of `num_nodes` Returns: - physical_to_logical_map: [layers, num_replicas], the expert index of - each replica - logical_to_physical_map: [layers, num_logical_experts, X], the replica - indices for each expert - expert_count: [layers, num_logical_experts], number of physical + physical_to_logical_map: + [layers, num_replicas], the expert index of each replica + logical_to_physical_map: + [layers, num_logical_experts, X], the replica indices for each + expert + expert_count: + [layers, num_logical_experts], number of physical replicas for each logical expert """ num_layers, num_logical_experts = weight.shape From 8f3616f422e34ccb0e79f1f00d72366c4dab24f1 Mon Sep 17 00:00:00 2001 From: Matthew Bonanni Date: Wed, 17 Sep 2025 10:31:43 -0400 Subject: [PATCH 047/518] Remove old cutlass mla (#23961) Signed-off-by: Matthew Bonanni Signed-off-by: Matthew Bonanni --- CMakeLists.txt | 2 - csrc/attention/mla/cutlass_mla_entry.cu | 38 --- csrc/attention/mla/cutlass_mla_kernels.cu | 225 ------------------ csrc/torch_bindings.cpp | 7 - vllm/_custom_ops.py | 9 - vllm/v1/attention/backends/mla/cutlass_mla.py | 74 +----- 6 files changed, 10 insertions(+), 345 deletions(-) delete mode 100644 csrc/attention/mla/cutlass_mla_entry.cu delete mode 100644 csrc/attention/mla/cutlass_mla_kernels.cu diff --git a/CMakeLists.txt b/CMakeLists.txt index c48da948a029..180b896a7aba 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -308,7 +308,6 @@ if(VLLM_GPU_LANG STREQUAL "CUDA") "csrc/quantization/fp4/nvfp4_blockwise_moe_kernel.cu" "csrc/sparse/cutlass/sparse_scaled_mm_entry.cu" "csrc/cutlass_extensions/common.cpp" - "csrc/attention/mla/cutlass_mla_entry.cu" "csrc/quantization/fp8/per_token_group_quant.cu") set_gencode_flags_for_srcs( @@ -595,7 +594,6 @@ if(VLLM_GPU_LANG STREQUAL "CUDA") cuda_archs_loose_intersection(MLA_ARCHS "10.0a" "${CUDA_ARCHS}") if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER_EQUAL 12.8 AND MLA_ARCHS) set(SRCS - "csrc/attention/mla/cutlass_mla_kernels.cu" "csrc/attention/mla/sm100_cutlass_mla_kernel.cu") set_gencode_flags_for_srcs( SRCS "${SRCS}" diff --git a/csrc/attention/mla/cutlass_mla_entry.cu b/csrc/attention/mla/cutlass_mla_entry.cu deleted file mode 100644 index 0319d1daf302..000000000000 --- a/csrc/attention/mla/cutlass_mla_entry.cu +++ /dev/null @@ -1,38 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include - -#if defined ENABLE_CUTLASS_MLA && ENABLE_CUTLASS_MLA -void cutlass_mla_decode_sm100a(torch::Tensor const& out, - torch::Tensor const& q_nope, - torch::Tensor const& q_pe, - torch::Tensor const& kv_c_and_k_pe_cache, - torch::Tensor const& seq_lens, - torch::Tensor const& page_table, double scale); -#endif - -void cutlass_mla_decode(torch::Tensor const& out, torch::Tensor const& q_nope, - torch::Tensor const& q_pe, - torch::Tensor const& kv_c_and_k_pe_cache, - torch::Tensor const& seq_lens, - torch::Tensor const& page_table, double scale) { -#if defined ENABLE_CUTLASS_MLA && ENABLE_CUTLASS_MLA - return cutlass_mla_decode_sm100a(out, q_nope, q_pe, kv_c_and_k_pe_cache, - seq_lens, page_table, scale); -#endif - TORCH_CHECK_NOT_IMPLEMENTED(false, "No compiled cutlass MLA"); -} diff --git a/csrc/attention/mla/cutlass_mla_kernels.cu b/csrc/attention/mla/cutlass_mla_kernels.cu deleted file mode 100644 index 9d05d910dd81..000000000000 --- a/csrc/attention/mla/cutlass_mla_kernels.cu +++ /dev/null @@ -1,225 +0,0 @@ -/* - * Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include - -#include -#include - -#include "cute/tensor.hpp" - -#include "cutlass/cutlass.h" -#include "cutlass/kernel_hardware_info.h" - -#include "cutlass_extensions/common.hpp" - -#include "device/sm100_mla.hpp" -#include "kernel/sm100_mla_tile_scheduler.hpp" - -using namespace cute; -using namespace cutlass::fmha::kernel; - -template -struct MlaSm100 { - using Element = T; - using ElementAcc = float; - using ElementOut = T; - - using TileShape = Shape<_128, _128, Shape<_512, _64>>; - using TileShapeH = cute::tuple_element_t<0, TileShape>; - using TileShapeD = cute::tuple_element_t<2, TileShape>; - - // H K (D_latent D_rope) B - using ProblemShape = cute::tuple; - - using StrideQ = cute::tuple; // H D B - using StrideK = cute::tuple; // K D B - using StrideO = StrideK; // H D B - using StrideLSE = cute::tuple<_1, int>; // H B - - using TileScheduler = - std::conditional_t; - - using FmhaKernel = - cutlass::fmha::kernel::Sm100FmhaMlaKernelTmaWarpspecialized< - TileShape, Element, ElementAcc, ElementOut, ElementAcc, TileScheduler, - /*kIsCpAsync=*/true>; - using Fmha = cutlass::fmha::device::MLA; -}; - -template -typename T::Fmha::Arguments args_from_options( - at::Tensor const& out, at::Tensor const& q_nope, at::Tensor const& q_pe, - at::Tensor const& kv_c_and_k_pe_cache, at::Tensor const& seq_lens, - at::Tensor const& page_table, double scale) { - cutlass::KernelHardwareInfo hw_info; - hw_info.device_id = q_nope.device().index(); - hw_info.sm_count = - cutlass::KernelHardwareInfo::query_device_multiprocessor_count( - hw_info.device_id); - - int batches = q_nope.sizes()[0]; - int page_count_per_seq = page_table.sizes()[1]; - int page_count_total = kv_c_and_k_pe_cache.sizes()[0]; - int page_size = kv_c_and_k_pe_cache.sizes()[1]; - int max_seq_len = page_size * page_count_per_seq; - using TileShapeH = typename T::TileShapeH; - using TileShapeD = typename T::TileShapeD; - auto problem_shape = - cute::make_tuple(TileShapeH{}, max_seq_len, TileShapeD{}, batches); - - auto [H, K, D, B] = problem_shape; - auto [D_latent, D_rope] = D; - - using StrideQ = typename T::StrideQ; - using StrideK = typename T::StrideK; - using StrideO = typename T::StrideO; - using StrideLSE = typename T::StrideLSE; - - StrideQ stride_Q_latent = cute::make_tuple( - static_cast(D_latent), _1{}, static_cast(H * D_latent)); - StrideQ stride_Q_rope = cute::make_tuple(static_cast(D_rope), _1{}, - static_cast(H * D_rope)); - StrideK stride_C = - cute::make_tuple(static_cast(D_latent + D_rope), _1{}, - static_cast(page_size * (D_latent + D_rope))); - StrideLSE stride_PT = cute::make_stride(_1{}, page_count_per_seq); - StrideLSE stride_LSE = cute::make_tuple(_1{}, static_cast(H)); - StrideO stride_O = cute::make_tuple(static_cast(D_latent), _1{}, - static_cast(H * D_latent)); - - using Element = typename T::Element; - using ElementOut = typename T::ElementOut; - using ElementAcc = typename T::ElementAcc; - auto Q_latent_ptr = static_cast(q_nope.data_ptr()); - auto Q_rope_ptr = static_cast(q_pe.data_ptr()); - auto C_ptr = static_cast(kv_c_and_k_pe_cache.data_ptr()); - auto scale_f = static_cast(scale); - typename T::Fmha::Arguments arguments{ - problem_shape, - {scale_f, Q_latent_ptr, stride_Q_latent, Q_rope_ptr, stride_Q_rope, C_ptr, - stride_C, C_ptr + D_latent, stride_C, - static_cast(seq_lens.data_ptr()), - static_cast(page_table.data_ptr()), stride_PT, page_count_total, - page_size}, - {static_cast(out.data_ptr()), stride_O, - static_cast(nullptr), stride_LSE}, - hw_info, - 1, // split_kv - nullptr, // is_var_split_kv - }; - // TODO(kaixih@nvidia): When split_kv=-1 and is_var_split_kv=false, we compute - // split_kv automatically based on batch size and sequence length to balance - // workload across available SMs. Consider using var_split_kv for manual - // control if needed. - T::Fmha::set_split_kv(arguments); - return arguments; -} - -template -void runMla(at::Tensor const& out, at::Tensor const& q_nope, - at::Tensor const& q_pe, at::Tensor const& kv_c_and_k_pe_cache, - at::Tensor const& seq_lens, at::Tensor const& page_table, - float scale, cudaStream_t stream) { - using MlaSm100Type = MlaSm100; - typename MlaSm100Type::Fmha fmha; - auto arguments = args_from_options( - out, q_nope, q_pe, kv_c_and_k_pe_cache, seq_lens, page_table, scale); - size_t workspace_size = MlaSm100Type::Fmha::get_workspace_size(arguments); - auto const workspace_options = - torch::TensorOptions().dtype(torch::kUInt8).device(q_nope.device()); - auto workspace = torch::empty(workspace_size, workspace_options); - - CUTLASS_CHECK(fmha.can_implement(arguments)); - - CUTLASS_CHECK(fmha.initialize(arguments, workspace.data_ptr(), stream)); - - CUTLASS_CHECK(fmha.run(arguments, workspace.data_ptr(), stream)); -} - -void cutlass_mla_decode_sm100a(torch::Tensor const& out, - torch::Tensor const& q_nope, - torch::Tensor const& q_pe, - torch::Tensor const& kv_c_and_k_pe_cache, - torch::Tensor const& seq_lens, - torch::Tensor const& page_table, double scale) { - TORCH_CHECK(q_nope.device().is_cuda(), "q_nope must be on CUDA"); - TORCH_CHECK(q_nope.dim() == 3, "q_nope must be a 3D tensor"); - TORCH_CHECK(q_pe.dim() == 3, "q_pe must be a 3D tensor"); - TORCH_CHECK(kv_c_and_k_pe_cache.dim() == 3, - "kv_c_and_k_pe_cache must be a 3D tensor"); - TORCH_CHECK(seq_lens.dim() == 1, "seq_lens must be a 1D tensor"); - TORCH_CHECK(page_table.dim() == 2, "page_table must be a 2D tensor"); - TORCH_CHECK(out.dim() == 3, "out must be a 3D tensor"); - - auto B_q_nope = q_nope.size(0); - auto H_q_nope = q_nope.size(1); - auto D_q_nope = q_nope.size(2); - auto B_q_pe = q_pe.size(0); - auto H_q_pe = q_pe.size(1); - auto D_q_pe = q_pe.size(2); - auto B_pt = page_table.size(0); - auto PAGE_NUM = page_table.size(1); - auto PAGE_SIZE = kv_c_and_k_pe_cache.size(1); - auto D_ckv = kv_c_and_k_pe_cache.size(2); - auto B_o = out.size(0); - auto H_o = out.size(1); - auto D_o = out.size(2); - - TORCH_CHECK(D_q_nope == 512, "D_q_nope must be equal to 512"); - TORCH_CHECK(D_q_pe == 64, "D_q_pe must be equal to 64"); - TORCH_CHECK(D_ckv == 576, "D_ckv must be equal to 576"); - TORCH_CHECK(H_q_nope == H_q_pe && H_q_nope == H_o && H_o == 128, - "H_q_nope, H_q_pe, and H_o must be equal to 128"); - TORCH_CHECK(PAGE_SIZE > 0 && (PAGE_SIZE & (PAGE_SIZE - 1)) == 0, - "PAGE_SIZE must be a power of 2"); - TORCH_CHECK( - B_q_nope == B_q_pe && B_q_nope == B_pt && B_q_nope == B_o, - "Batch dims must be same for page_table, q_nope and q_pe, and out"); - TORCH_CHECK(PAGE_NUM % (128 / PAGE_SIZE) == 0, - "PAGE_NUM must be divisible by 128 / PAGE_SIZE"); - TORCH_CHECK(D_o == 512, "D_o must be equal to 512"); - - TORCH_CHECK(q_nope.dtype() == at::ScalarType::Half || - q_nope.dtype() == at::ScalarType::BFloat16 || - q_nope.dtype() == at::ScalarType::Float8_e4m3fn, - "q_nope must be a half, bfloat16, or float8_e4m3fn tensor"); - TORCH_CHECK(kv_c_and_k_pe_cache.dtype() == q_nope.dtype() && - q_nope.dtype() == q_pe.dtype(), - "kv_c_and_k_pe_cache, q_nope, and q_pe must be the same type"); - TORCH_CHECK(seq_lens.dtype() == torch::kInt32, - "seq_lens must be a 32-bit integer tensor"); - TORCH_CHECK(page_table.dtype() == torch::kInt32, - "page_table must be a 32-bit integer tensor"); - - auto in_dtype = q_nope.dtype(); - const at::cuda::OptionalCUDAGuard device_guard(device_of(q_nope)); - const cudaStream_t stream = - at::cuda::getCurrentCUDAStream(q_nope.get_device()); - if (in_dtype == at::ScalarType::Half) { - runMla(out, q_nope, q_pe, kv_c_and_k_pe_cache, seq_lens, - page_table, scale, stream); - } else if (in_dtype == at::ScalarType::BFloat16) { - runMla(out, q_nope, q_pe, kv_c_and_k_pe_cache, - seq_lens, page_table, scale, stream); - } else if (in_dtype == at::ScalarType::Float8_e4m3fn) { - runMla(out, q_nope, q_pe, kv_c_and_k_pe_cache, - seq_lens, page_table, scale, stream); - } else { - TORCH_CHECK(false, "Unsupported input data type of MLA"); - } -} diff --git a/csrc/torch_bindings.cpp b/csrc/torch_bindings.cpp index f22e23519831..bc096406c51a 100644 --- a/csrc/torch_bindings.cpp +++ b/csrc/torch_bindings.cpp @@ -510,13 +510,6 @@ TORCH_LIBRARY_EXPAND(TORCH_EXTENSION_NAME, ops) { ops.def("cutlass_sparse_compress(Tensor a) -> Tensor[]"); ops.impl("cutlass_sparse_compress", &cutlass_sparse_compress); - // CUTLASS MLA decode - ops.def( - "cutlass_mla_decode(Tensor! out, Tensor q_nope, Tensor q_pe," - " Tensor kv_c_and_k_pe_cache, Tensor seq_lens," - " Tensor page_table, float scale) -> ()"); - ops.impl("cutlass_mla_decode", torch::kCUDA, &cutlass_mla_decode); - // SM100 CUTLASS MLA decode ops.def( "sm100_cutlass_mla_decode(Tensor! out, Tensor! lse, Tensor q_nope," diff --git a/vllm/_custom_ops.py b/vllm/_custom_ops.py index 456c6b3ba923..712295aa9288 100644 --- a/vllm/_custom_ops.py +++ b/vllm/_custom_ops.py @@ -1823,15 +1823,6 @@ def flash_mla_with_kvcache( return out, softmax_lse -def cutlass_mla_decode(out: torch.Tensor, q_nope: torch.Tensor, - q_pe: torch.Tensor, kv_c_and_k_pe_cache: torch.Tensor, - seq_lens: torch.Tensor, page_table: torch.Tensor, - scale: float) -> torch.Tensor: - torch.ops._C.cutlass_mla_decode(out, q_nope, q_pe, kv_c_and_k_pe_cache, - seq_lens, page_table, scale) - return out - - def sm100_cutlass_mla_decode(out: torch.Tensor, lse: torch.Tensor, q_nope: torch.Tensor, q_pe: torch.Tensor, kv_c_and_k_pe_cache: torch.Tensor, diff --git a/vllm/v1/attention/backends/mla/cutlass_mla.py b/vllm/v1/attention/backends/mla/cutlass_mla.py index 78af8d28f889..21be17a750df 100644 --- a/vllm/v1/attention/backends/mla/cutlass_mla.py +++ b/vllm/v1/attention/backends/mla/cutlass_mla.py @@ -2,7 +2,7 @@ # SPDX-FileCopyrightText: Copyright contributors to the vLLM project import os -from typing import ClassVar, Optional +from typing import ClassVar, Optional, Union import torch @@ -109,12 +109,6 @@ def __init__( "are not implemented for " "CutlassMLAImpl") - self._use_old_cutlass_mla = False - force_old_cutlass = os.environ.get("FORCE_OLD_CUTLASS_MLA", None) - if force_old_cutlass: - logger.warning_once("Forcing old cutlass mla kernel") - self._use_old_cutlass_mla = True - # TODO: Currently, num_kv_splits is limited to 16 to avoid hanging # issues. In case the code hangs, use: # FORCE_NUM_KV_SPLITS=1 @@ -219,16 +213,22 @@ def _sm100_cutlass_mla_decode( return out, returned_lse - def _sm100_forward_decode( + def _forward_decode( self, - q_nope: torch.Tensor, - q_pe: torch.Tensor, + q: Union[torch.Tensor, tuple[torch.Tensor, torch.Tensor]], kv_c_and_k_pe_cache: torch.Tensor, attn_metadata: MLACommonMetadata, + layer: AttentionLayer, ) -> tuple[torch.Tensor, Optional[torch.Tensor]]: assert kv_c_and_k_pe_cache.numel() > 0 assert attn_metadata.decode is not None + if type(q) is tuple: + q_nope, q_pe = q + else: + q_nope, q_pe = torch.split( + q, [self.kv_lora_rank, self.qk_rope_head_dim], dim=-1) + # Adjust workspace size (if necessary) self._workspace.ensure_size(attn_metadata, self._num_kv_splits) @@ -245,57 +245,3 @@ def _sm100_forward_decode( ) return o, (lse if self.need_to_return_lse_for_decode else None) - - # TODO: Currently we leave it here only for backup in case something is - # wrong with the new SM100 CUTLASS MLA kernel - def _old_forward_decode( - self, - q_nope: torch.Tensor, - q_pe: torch.Tensor, - kv_c_and_k_pe_cache: torch.Tensor, - attn_metadata: MLACommonMetadata, - ) -> torch.Tensor: - assert kv_c_and_k_pe_cache.numel() > 0 - assert attn_metadata.decode is not None - - if is_quantized_kv_cache(self.kv_cache_dtype): - raise NotImplementedError( - "FP8 Cutlass MLA not supported with FORCE_OLD_CUTLASS_MLA") - - B = q_nope.shape[0] - - o = torch.empty((B, self.num_heads, self.kv_lora_rank), - dtype=q_nope.dtype, - device=q_nope.device) - - # Run MLA - # Clone q_nope and q_pe to make sure strides computation is correct. - q_nope = q_nope.clone() - q_pe = q_pe.clone() - - ops.cutlass_mla_decode(o, q_nope, q_pe, kv_c_and_k_pe_cache, - attn_metadata.decode.seq_lens, - attn_metadata.decode.block_table, self.scale) - - return o - - def _forward_decode( - self, - q: torch.Tensor, - kv_c_and_k_pe_cache: torch.Tensor, - attn_metadata: MLACommonMetadata, - layer: AttentionLayer, - ) -> tuple[torch.Tensor, Optional[torch.Tensor]]: - if type(q) is tuple: - q_nope, q_pe = q - else: - q_nope, q_pe = torch.split( - q, [self.kv_lora_rank, self.qk_rope_head_dim], dim=-1) - if self._use_old_cutlass_mla: - # TODO: Remove the old cutlass MLA kernel after more extensive - # testing - return self._old_forward_decode(q_nope, q_pe, kv_c_and_k_pe_cache, - attn_metadata), None - - return self._sm100_forward_decode(q_nope, q_pe, kv_c_and_k_pe_cache, - attn_metadata) From 4a2d33e3718c57a2789da8b621728965a8a1787a Mon Sep 17 00:00:00 2001 From: samzong Date: Wed, 17 Sep 2025 23:11:51 +0800 Subject: [PATCH 048/518] [Docs] vllm/benchmarks/datasets.py fix docstring param format. (#24970) Signed-off-by: samzong --- vllm/benchmarks/datasets.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/vllm/benchmarks/datasets.py b/vllm/benchmarks/datasets.py index a38090edb0b4..1831539a6adb 100644 --- a/vllm/benchmarks/datasets.py +++ b/vllm/benchmarks/datasets.py @@ -104,9 +104,9 @@ def __init__( Args: dataset_path (Optional[str]): Path to the dataset. If None, it - indicates that a default or random dataset might be used. + indicates that a default or random dataset might be used. random_seed (int): Seed value for reproducible shuffling or - sampling. Defaults to DEFAULT_SEED. + sampling. Defaults to DEFAULT_SEED. """ self.dataset_path = dataset_path # Set the random seed, ensuring that a None value is replaced with the @@ -200,8 +200,7 @@ def sample(self, tokenizer: PreTrainedTokenizerBase, tokenizer (PreTrainedTokenizerBase): The tokenizer to be used for processing the dataset's text. num_requests (int): The number of sample requests to generate. - request_id_prefix (str) The prefix of request_id. - + request_id_prefix (str): The prefix of request_id. Returns: list[SampleRequest]: A list of sample requests generated from the @@ -224,7 +223,8 @@ def maybe_oversample_requests( requests (List[SampleRequest]): The current list of sampled requests. num_requests (int): The target number of requests. - request_id_prefix (str) The prefix of the request ids. + request_id_prefix (str): The prefix applied to generated request + identifiers. """ if no_oversample: From 087c6ffc9202599f438f1f7e0d6449020a958ac1 Mon Sep 17 00:00:00 2001 From: Michael Goin Date: Wed, 17 Sep 2025 11:28:58 -0400 Subject: [PATCH 049/518] [CI Bugfix] Fix failing test_invalid_env (#25078) Signed-off-by: mgoin --- tests/kernels/attention/test_attention_selector.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/kernels/attention/test_attention_selector.py b/tests/kernels/attention/test_attention_selector.py index 4d969cf992d2..190c92e1251c 100644 --- a/tests/kernels/attention/test_attention_selector.py +++ b/tests/kernels/attention/test_attention_selector.py @@ -368,4 +368,4 @@ def test_invalid_env(use_v1: bool, monkeypatch: pytest.MonkeyPatch): # Should raise ValueError for invalid backend with pytest.raises(ValueError) as exc_info: get_attn_backend(32, torch.float16, None, 16, False) - assert "Invalid attention backend: 'INVALID'" in str(exc_info.value) + assert "Invalid value 'INVALID'" in str(exc_info.value) From 4b946d693e0af15740e9ca9c0e059d5f333b1083 Mon Sep 17 00:00:00 2001 From: Woosuk Kwon Date: Wed, 17 Sep 2025 09:32:42 -0700 Subject: [PATCH 050/518] [V0 Deprecation] Remove V0 Core tests (#25082) Signed-off-by: Woosuk Kwon --- .buildkite/test-pipeline.yaml | 11 - tests/core/__init__.py | 0 tests/core/block/__init__.py | 0 tests/core/block/conftest.py | 15 - tests/core/block/e2e/__init__.py | 0 tests/core/block/e2e/conftest.py | 71 - tests/core/block/e2e/test_correctness.py | 479 ------ .../e2e/test_correctness_sliding_window.py | 185 --- tests/core/block/test_block_manager.py | 341 ----- tests/core/block/test_block_table.py | 577 ------- tests/core/block/test_common.py | 45 - .../block/test_cpu_gpu_block_allocator.py | 96 -- tests/core/block/test_naive_block.py | 148 -- tests/core/block/test_prefix_caching_block.py | 1035 ------------- tests/core/conftest.py | 12 - tests/core/test_chunked_prefill_scheduler.py | 858 ----------- tests/core/test_num_computed_tokens_update.py | 67 - tests/core/test_scheduler.py | 1338 ----------------- tests/core/test_serialization.py | 36 - tests/core/utils.py | 392 ----- 20 files changed, 5706 deletions(-) delete mode 100644 tests/core/__init__.py delete mode 100644 tests/core/block/__init__.py delete mode 100644 tests/core/block/conftest.py delete mode 100644 tests/core/block/e2e/__init__.py delete mode 100644 tests/core/block/e2e/conftest.py delete mode 100644 tests/core/block/e2e/test_correctness.py delete mode 100644 tests/core/block/e2e/test_correctness_sliding_window.py delete mode 100644 tests/core/block/test_block_manager.py delete mode 100644 tests/core/block/test_block_table.py delete mode 100644 tests/core/block/test_common.py delete mode 100644 tests/core/block/test_cpu_gpu_block_allocator.py delete mode 100644 tests/core/block/test_naive_block.py delete mode 100644 tests/core/block/test_prefix_caching_block.py delete mode 100644 tests/core/conftest.py delete mode 100644 tests/core/test_chunked_prefill_scheduler.py delete mode 100644 tests/core/test_num_computed_tokens_update.py delete mode 100644 tests/core/test_scheduler.py delete mode 100644 tests/core/test_serialization.py delete mode 100644 tests/core/utils.py diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index b5ea4407ef5b..133ba792680d 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -91,17 +91,6 @@ steps: - pytest -v -s basic_correctness/test_cpu_offload.py - VLLM_TEST_ENABLE_ARTIFICIAL_PREEMPT=1 pytest -v -s basic_correctness/test_preemption.py -- label: Core Test # 22min - timeout_in_minutes: 35 - mirror_hardwares: [amdexperimental] - fast_check: true - source_file_dependencies: - - vllm/core - - vllm/distributed - - tests/core - commands: - - pytest -v -s core - - label: Entrypoints Unit Tests # 5min timeout_in_minutes: 10 working_dir: "/vllm-workspace/tests" diff --git a/tests/core/__init__.py b/tests/core/__init__.py deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/tests/core/block/__init__.py b/tests/core/block/__init__.py deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/tests/core/block/conftest.py b/tests/core/block/conftest.py deleted file mode 100644 index 6afe98d78ce8..000000000000 --- a/tests/core/block/conftest.py +++ /dev/null @@ -1,15 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project - -import pytest - - -@pytest.fixture() -def should_do_global_cleanup_after_test() -> bool: - """Disable the global cleanup fixture for tests in this directory. This - provides a ~10x speedup for unit tests that don't load a model to GPU. - - This requires that tests in this directory clean up after themselves if they - use the GPU. - """ - return False diff --git a/tests/core/block/e2e/__init__.py b/tests/core/block/e2e/__init__.py deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/tests/core/block/e2e/conftest.py b/tests/core/block/e2e/conftest.py deleted file mode 100644 index e2c6c66b259c..000000000000 --- a/tests/core/block/e2e/conftest.py +++ /dev/null @@ -1,71 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project - -from collections.abc import Iterable -from typing import Callable, Optional - -import pytest - -from vllm import LLM -from vllm.distributed import cleanup_dist_env_and_memory -from vllm.model_executor.utils import set_random_seed - - -@pytest.fixture -def baseline_llm_generator(common_llm_kwargs, per_test_common_llm_kwargs, - baseline_llm_kwargs, seed): - return create_llm_generator(common_llm_kwargs, per_test_common_llm_kwargs, - baseline_llm_kwargs, seed) - - -@pytest.fixture -def test_llm_generator(common_llm_kwargs, per_test_common_llm_kwargs, - test_llm_kwargs, seed): - return create_llm_generator(common_llm_kwargs, per_test_common_llm_kwargs, - test_llm_kwargs, seed) - - -def create_llm_generator(common_llm_kwargs, per_test_common_llm_kwargs, - distinct_llm_kwargs, seed): - kwargs = { - **common_llm_kwargs, - **per_test_common_llm_kwargs, - **distinct_llm_kwargs, - } - - def generator_inner(): - llm = LLM(**kwargs) - - set_random_seed(seed) - - yield llm - del llm - cleanup_dist_env_and_memory() - - for llm in generator_inner(): - yield llm - del llm - - -def get_text_from_llm_generator(llm_generator: Iterable[LLM], - prompts, - sampling_params, - llm_cb: Optional[Callable[[LLM], - None]] = None): - for llm in llm_generator: - if llm_cb: - llm_cb(llm) - outputs = llm.generate(prompts, sampling_params, use_tqdm=True) - text = [output.outputs[0].text for output in outputs] - del llm - - return text - - -def get_token_ids_from_llm_generator(llm_generator, prompts, sampling_params): - for llm in llm_generator: - outputs = llm.generate(prompts, sampling_params, use_tqdm=True) - token_ids = [output.outputs[0].token_ids for output in outputs] - del llm - - return token_ids diff --git a/tests/core/block/e2e/test_correctness.py b/tests/core/block/e2e/test_correctness.py deleted file mode 100644 index 8de48ef59a01..000000000000 --- a/tests/core/block/e2e/test_correctness.py +++ /dev/null @@ -1,479 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project - -from itertools import cycle - -import pytest - -from vllm import SamplingParams - -from .conftest import get_token_ids_from_llm_generator - - -@pytest.mark.parametrize( - "common_llm_kwargs", - [{ - # Use a small model for a fast test. - "model": "facebook/opt-125m", - - # skip cuda graph creation for fast test. - "enforce_eager": True, - - # Allow only 5 sequences of ~1024 tokens in worst case. - "block_size": 16, - "num_gpu_blocks_override": 5 * (64 + 1), - }]) -@pytest.mark.parametrize("per_test_common_llm_kwargs", [{}]) -@pytest.mark.parametrize("baseline_llm_kwargs", [{}]) -@pytest.mark.parametrize("test_llm_kwargs", [{ - "preemption_mode": "swap" -}, { - "preemption_mode": "recompute" -}]) -@pytest.mark.parametrize("batch_size", [10]) -@pytest.mark.parametrize("seed", [1]) -def test_block_manager_with_preemption(baseline_llm_generator, - test_llm_generator, batch_size): - """Verify block manager produces same outputs even when there is preemption. - - This constructs two LLM, each with limited number of GPU blocks. The limit - is decided such that as the sequences in the batch grow, sequences must be - preempted and removed from cache. - - If the output token ids are equivalent, then we have confidence that the KV - cache is not corrupted. - - NOTE: We want a significant number of generated tokens so that any incorrect - KV mapping has time to build up error. - - NOTE(Kuntai): Though we have removed block manager v1, this test is still - useful as it asserts the behavior of block manager v2 (now it is called - SelfAttnBlockSpaceManager) is the same when swapping / preemption, so we - keep this test. - """ - output_len = 1024 - temperature = 0.0 - - # We want to ensure equality even with preemption. - # We force the total block size to be 1 + cdiv(output_len, block_size) - # so that only one sequence can fit at a time (once the sequences grow). - - prompts = [ - "Hello, my name is", - "The president of the United States is", - "The capital of France is", - "The future of AI is", - ] - - prompts = [prompt for prompt, _ in zip(cycle(prompts), range(batch_size))] - - sampling_params = SamplingParams( - max_tokens=output_len, - ignore_eos=True, - temperature=temperature, - ) - - baseline_token_ids = get_token_ids_from_llm_generator( - baseline_llm_generator, prompts, sampling_params) - - test_token_ids = get_token_ids_from_llm_generator(test_llm_generator, - prompts, sampling_params) - - for expected_token_ids, actual_token_ids in zip(baseline_token_ids, - test_token_ids): - assert expected_token_ids == actual_token_ids - - assert baseline_token_ids == test_token_ids - - -@pytest.mark.parametrize( - "common_llm_kwargs", - [{ - # Use a small model for a fast test. - "model": "facebook/opt-125m", - - # Our prompts will generate 128 tokens; since the prompts themselves are - # small, we don't need much KV space beyond 128. - "max_model_len": 160, - - # skip cuda graph creation for fast test. - "enforce_eager": True, - }]) -@pytest.mark.parametrize( - "per_test_common_llm_kwargs", - [ - { - "block_size": 16, - - # Allow only 2 sequences of ~128 tokens in worst case. - # Note 8 = 128/block_size - "num_gpu_blocks_override": 2 * (8 + 1), - }, - { - "block_size": 8, - - # Allow only 2 sequences of ~128 tokens in worst case. - # Note 16 = 128/block_size - "num_gpu_blocks_override": 2 * (16 + 2), - } - ]) -@pytest.mark.parametrize("baseline_llm_kwargs", [{ - "num_lookahead_slots": 0, -}]) -@pytest.mark.parametrize( - "test_llm_kwargs", - [ - { - # We run one test with block_size < lookahead_slots, one test with - # block_size > lookahead_slots - "num_lookahead_slots": 10, - "preemption_mode": "swap", - }, - { - "num_lookahead_slots": 10, - "preemption_mode": "recompute", - } - ]) -@pytest.mark.parametrize("batch_size", [4]) -@pytest.mark.parametrize("seed", [1]) -def test_lookahead_greedy_equality_with_preemption(baseline_llm_generator, - test_llm_generator, - batch_size): - """Verify vLLM produces the same output with greedy sampling, when lookahead - scheduling is used vs. not. - - Lookahead scheduling is not expected to modify the output, as it simply - allocates empty slots ahead of the known token ids in a sliding fashion. - - This test constrains the total number of blocks to force preemption. It also - varies the block size so that the lookahead size is less than and greater - than the block size. - """ - output_len = 128 - temperature = 0.0 - - prompts = [ - "Hello, my name is", - "The president of the United States is", - "The capital of France is", - "The future of AI is", - ] - - prompts = [prompt for prompt, _ in zip(cycle(prompts), range(batch_size))] - - sampling_params = SamplingParams( - max_tokens=output_len, - ignore_eos=True, - temperature=temperature, - ) - - print('Getting token ids without lookahead scheduling') - baseline_token_ids = get_token_ids_from_llm_generator( - baseline_llm_generator, prompts, sampling_params) - - print('Getting token ids with lookahead scheduling') - test_token_ids = get_token_ids_from_llm_generator(test_llm_generator, - prompts, sampling_params) - - for expected_token_ids, actual_token_ids in zip(baseline_token_ids, - test_token_ids): - assert expected_token_ids == actual_token_ids - - assert baseline_token_ids == test_token_ids - - -@pytest.mark.parametrize( - "common_llm_kwargs", - [ - { - # Use a small model for a fast test. - "model": "facebook/opt-125m", - - # skip cuda graph creation for fast test. - "enforce_eager": True, - "enable_chunked_prefill": True, - }, - ]) -@pytest.mark.parametrize("per_test_common_llm_kwargs", - [{ - "block_size": 16, - "max_num_batched_tokens": 2, - "max_num_seqs": 2, - }, { - "block_size": 16, - "max_num_batched_tokens": 3, - "max_num_seqs": 2, - }, { - "block_size": 16, - "max_num_batched_tokens": 256, - "max_num_seqs": 10, - }]) -@pytest.mark.parametrize("baseline_llm_kwargs", [ - {}, -]) -@pytest.mark.parametrize("test_llm_kwargs", [ - { - "num_lookahead_slots": 0, - }, - { - "num_lookahead_slots": 5, - }, -]) -@pytest.mark.parametrize("batch_size", [4]) -@pytest.mark.parametrize("seed", [1]) -def test_chunked_prefill_block_manager(baseline_llm_generator, - test_llm_generator, batch_size): - """Verify that chunked prefill works with SelfAttnBlockSpaceManager, - with and without lookahead scheduling. - """ - output_len = 32 - temperature = 0.0 - - prompts = [ - "Hello, my name is", - "The president of the United States is", - ("1 + " * 50) + " 1 = ", # Longer prompt. - "The capital of France is", - "The future of AI is", - ] - - prompts = [prompt for prompt, _ in zip(cycle(prompts), range(batch_size))] - - sampling_params = SamplingParams( - max_tokens=output_len, - ignore_eos=True, - temperature=temperature, - ) - - print('Getting token ids with BlockManager') - baseline_token_ids = get_token_ids_from_llm_generator( - baseline_llm_generator, prompts, sampling_params) - - print('Getting token ids with BlockManager, with lookahead slots.') - test_token_ids = get_token_ids_from_llm_generator(test_llm_generator, - prompts, sampling_params) - - for expected_token_ids, actual_token_ids in zip(baseline_token_ids, - test_token_ids): - assert expected_token_ids == actual_token_ids - - assert baseline_token_ids == test_token_ids - - -@pytest.mark.parametrize( - "common_llm_kwargs", - [{ - # Use a small model for a fast test. - "model": "facebook/opt-125m", - - # skip cuda graph creation for fast test. - "enforce_eager": True, - - # Allow only 5 sequences of ~1024 tokens in worst case. - "block_size": 16, - "num_gpu_blocks_override": 5 * (64 + 1), - - # Enable prefill cache - "enable_prefix_caching": True, - }]) -@pytest.mark.parametrize("per_test_common_llm_kwargs", [{}]) -@pytest.mark.parametrize("baseline_llm_kwargs", [{}]) -@pytest.mark.parametrize("test_llm_kwargs", [{ - "preemption_mode": "swap" -}, { - "preemption_mode": "recompute" -}]) -@pytest.mark.parametrize("batch_size", [10]) -@pytest.mark.parametrize("seed", [1]) -def test_block_manager_prefix_caching_enabled_with_preemption( - baseline_llm_generator, test_llm_generator, batch_size): - """Verify block manager produces same outputs even when there is preemption. - - This constructs two LLM, each with limited number of GPU blocks. The limit - is decided such that as the sequences in the batch grow, sequences must be - preempted and removed from cache. - - If the output token ids are equivalent, then we have confidence that the KV - cache is not corrupted. - - NOTE: We want a significant number of generated tokens so that any incorrect - KV mapping has time to build up error. - - NOTE(Kuntai): Though we have removed block manager v1, this test is still - useful as it asserts the behavior of block manager v2 (now it is called - SelfAttnBlockSpaceManager) is the same when swapping / preemption, so we - keep this test. - """ - output_len = 1024 - temperature = 0.0 - - # We want to ensure equality even with preemption. - # We force the total block size to be 1 + cdiv(output_len, block_size) - # so that only one sequence can fit at a time (once the sequences grow). - - prompts = [ - "Hello, my name is", - "The president of the United States is", - "The capital of France is", - "The future of AI is", - ] - - prompts = [prompt for prompt, _ in zip(cycle(prompts), range(batch_size))] - - sampling_params = SamplingParams( - max_tokens=output_len, - ignore_eos=True, - temperature=temperature, - ) - - print('Getting token ids from block manager') - baseline_token_ids = get_token_ids_from_llm_generator( - baseline_llm_generator, prompts, sampling_params) - - print('Getting token ids from block manager, with preemption') - test_token_ids = get_token_ids_from_llm_generator(test_llm_generator, - prompts, sampling_params) - - for expected_token_ids, actual_token_ids in zip(baseline_token_ids, - test_token_ids): - assert expected_token_ids == actual_token_ids - - assert baseline_token_ids == test_token_ids - - -@pytest.mark.parametrize( - "common_llm_kwargs", - [{ - # Use a small model for a fast test. - "model": "facebook/opt-125m", - - # skip cuda graph creation for fast test. - "enforce_eager": True, - - # Allow only 5 sequences of ~1024 tokens in worst case. - "block_size": 16, - "num_gpu_blocks_override": 5 * (64 + 1), - }]) -@pytest.mark.parametrize("per_test_common_llm_kwargs", [{}]) -@pytest.mark.parametrize("baseline_llm_kwargs", [{ - "enable_prefix_caching": False -}]) -@pytest.mark.parametrize("test_llm_kwargs", [{ - "enable_prefix_caching": True, - "preemption_mode": "swap" -}, { - "enable_prefix_caching": True, - "preemption_mode": "recompute" -}]) -@pytest.mark.parametrize("batch_size", [10]) -@pytest.mark.parametrize("seed", [1]) -def test_auto_prefix_caching_with_preemption(baseline_llm_generator, - test_llm_generator, batch_size): - """Verify block manager v2 with auto prefix caching enabled produces same - outputs as auto prefix caching disabled, even when there is preemption. - - This constructs two LLM, each with limited number of GPU blocks. The limit - is decided such that as the sequences in the batch grow, sequences must be - preempted and removed from cache. - - If the output token ids are equivalent, then we have confidence that auto - prefix caching itself at least don't cause result error. - """ - output_len = 1024 - temperature = 0.0 - - # We want to ensure equality even with preemption. - # We force the total block size to be 1 + cdiv(output_len, block_size) - # so that only one sequence can fit at a time (once the sequences grow). - prompts = [ - "Hello, my name is", - "The president of the United States is", - "The capital of France is", - "The future of AI is", - ] - - prompts = [prompt for prompt, _ in zip(cycle(prompts), range(batch_size))] - - sampling_params = SamplingParams( - max_tokens=output_len, - ignore_eos=True, - temperature=temperature, - ) - - print('Getting token ids with APC disabled') - baseline_token_ids = get_token_ids_from_llm_generator( - baseline_llm_generator, prompts, sampling_params) - - print('Getting token ids with APC enabled') - test_token_ids = get_token_ids_from_llm_generator(test_llm_generator, - prompts, sampling_params) - - for expected_token_ids, actual_token_ids in zip(baseline_token_ids, - test_token_ids): - assert expected_token_ids == actual_token_ids - - assert baseline_token_ids == test_token_ids - - -@pytest.mark.parametrize( - "common_llm_kwargs", - [{ - # Use a small model for a fast test. - "model": "facebook/opt-125m", - - # skip cuda graph creation for fast test. - "enforce_eager": True, - - # we keep the blocks small, so that hit eviction quickly - "max_model_len": 48, - "block_size": 16, - "num_gpu_blocks_override": 3, - }]) -@pytest.mark.parametrize("per_test_common_llm_kwargs", [{}]) -@pytest.mark.parametrize("baseline_llm_kwargs", [{ - "enable_prefix_caching": False -}]) -@pytest.mark.parametrize("test_llm_kwargs", [{ - "enable_prefix_caching": True, -}]) -@pytest.mark.parametrize("seed", [1]) -def test_auto_prefix_caching_after_eviction_start(baseline_llm_generator, - test_llm_generator): - """Verify block manager v2 with auto prefix caching could work normally - even when eviction started. - With APC enabled, all blocks are held by native block at the beginning. - Then blocks are managed by evictor instead. If cache hit at the evictor's - block, then it could be reused, or we need to recompute its kv cache. - """ - output_len = 10 - temperature = 0.0 - - prompts = [ - "You are a helpful assistant. Please answer truthfully and write " - "out your thinking step by step to be sure you get the right answer. " - "If you make a mistake, attempt to correct it. who are you?", - "You are a helpful assistant. Please answer truthfully and write out " - "your thinking step by step to be sure you get the right answer. You " - "are helpful and harmless and you follow ethical guidelines. " - "who are you?" - ] - - sampling_params = SamplingParams( - max_tokens=output_len, - ignore_eos=True, - temperature=temperature, - ) - - print('Getting token ids with APC disabled') - baseline_token_ids = get_token_ids_from_llm_generator( - baseline_llm_generator, prompts, sampling_params) - - print('Getting token ids with APC enabled') - test_token_ids = get_token_ids_from_llm_generator(test_llm_generator, - prompts, sampling_params) - - for expected_token_ids, actual_token_ids in zip(baseline_token_ids, - test_token_ids): - assert expected_token_ids == actual_token_ids - - assert baseline_token_ids == test_token_ids diff --git a/tests/core/block/e2e/test_correctness_sliding_window.py b/tests/core/block/e2e/test_correctness_sliding_window.py deleted file mode 100644 index 27fe27a880e3..000000000000 --- a/tests/core/block/e2e/test_correctness_sliding_window.py +++ /dev/null @@ -1,185 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project - -import random - -import pytest - -from tests.kernels.utils import override_backend_env_variable -from vllm import LLM, SamplingParams -from vllm.platforms import current_platform - -from .conftest import get_text_from_llm_generator - -# relatively small model with 4k sliding window -MODEL = "bigcode/starcoder2-3b" -BLOCK_SIZE = 16 - - -@pytest.mark.parametrize( - "common_llm_kwargs", - [{ - "model": MODEL, - - # skip cuda graph creation for fast test. - "enforce_eager": True, - "block_size": BLOCK_SIZE, - # needed due to https://github.com/vllm-project/vllm/issues/1908#issuecomment-2101122008 - "num_gpu_blocks_override": 100000 // BLOCK_SIZE, - }]) -@pytest.mark.parametrize("per_test_common_llm_kwargs", [{}]) -@pytest.mark.parametrize("baseline_llm_kwargs", [{}]) -@pytest.mark.parametrize("test_llm_kwargs", [{}]) -@pytest.mark.parametrize("batch_size", [5]) -@pytest.mark.parametrize("seed", [1]) -@pytest.mark.parametrize("backend", ["FLASH_ATTN", "XFORMERS"]) -def test_sliding_window_retrieval(baseline_llm_generator, test_llm_generator, - batch_size, seed, backend, monkeypatch): - """ - The test does a bunch of assignments "x1 = 10\nx2 = 33\n..." and then - asks for value of one of them (which is outside the sliding window). - If we tell it upfront which we are going to be looking for, then - it answers correctly (mostly). - - Additionally, we compare the results of the v1 and v2 managers. - """ - if backend == "XFORMERS" and current_platform.is_rocm(): - pytest.skip("Xformers does not support ROCm/HIP.") - - override_backend_env_variable(monkeypatch, backend) - - sampling_params = SamplingParams( - max_tokens=1024, - ignore_eos=True, - temperature=0.0, - ) - - prompts, answer, indices = prep_prompts(batch_size) - - baseline_texts = get_text_from_llm_generator(baseline_llm_generator, - prompts, - sampling_params, - llm_cb=check_window(prompts)) - - check_answers(indices, answer, baseline_texts) - - print('Getting token ids from block manager v2') - test_texts = get_text_from_llm_generator(test_llm_generator, prompts, - sampling_params) - check_answers(indices, answer, test_texts) - - cmp = [ - expected_text == actual_text - for expected_text, actual_text in zip(baseline_texts, test_texts) - ] - print(cmp) - # make sure it's mostly OK; this is possibly because https://github.com/vllm-project/vllm/pull/4768 - # however, https://github.com/vllm-project/vllm/issues/3385#issuecomment-1995924290 - # states that xformers and flash_attn have different ideas about the window - # size anyways - assert sum(cmp) > 0.7 * len(cmp) - - -@pytest.mark.parametrize( - "common_llm_kwargs", - [{ - "model": MODEL, - - # skip cuda graph creation for fast test. - "enforce_eager": True, - "block_size": BLOCK_SIZE, - "num_gpu_blocks_override": 100000 // BLOCK_SIZE, - }]) -@pytest.mark.parametrize("per_test_common_llm_kwargs", [{}]) -@pytest.mark.parametrize("test_llm_kwargs", [{"enable_chunked_prefill": True}]) -@pytest.mark.parametrize("batch_size", [5]) -@pytest.mark.parametrize("seed", [1]) -@pytest.mark.parametrize("backend", ["FLASH_ATTN", "XFORMERS"]) -def test_sliding_window_chunked_prefill(test_llm_generator, batch_size, seed, - backend, monkeypatch): - """ - This is similar to test_sliding_window_retrieval, however, it doesn't - compare against the v1 block manager since v1 doesn't support - chunked prefill with sliding window. - - The results with and without chunked prefill are not the same due to - numerical instabilities. - """ - if backend == "XFORMERS" and current_platform.is_rocm(): - pytest.skip("Xformers does not support ROCm/HIP.") - override_backend_env_variable(monkeypatch, backend) - - sampling_params = SamplingParams( - max_tokens=10, - ignore_eos=True, - temperature=0.0, - ) - - prompts, answer, indices = prep_prompts(batch_size) - - # We don't compare with the baseline model here, since the results - # slightly different due to different tailing in attention. - test_texts = get_text_from_llm_generator(test_llm_generator, - prompts, - sampling_params, - llm_cb=check_window(prompts)) - check_answers(indices, answer, test_texts) - - -def prep_prompts(batch_size: int, ln_range: tuple[int, int] = (800, 1100)): - """ - Generate prompts which a bunch of assignments, - then asking for the value of one of them. - The prompt is just under 10k tokens; sliding window is 4k - so the answer is outside sliding window, but should still be correct. - - Args: - batch_size: number of prompts to generate - ln_range: an argument to control the length of the prompt - """ - prompts: list[str] = [] - answer: list[int] = [] - indices: list[int] = [] - random.seed(1) - for _ in range(batch_size): - idx = random.randint(30, 90) - indices.append(idx) - prompt = "```python\n# We set a number of variables, " + \ - f"x{idx} will be important later\n" - ln = random.randint(*ln_range) - for k in range(30, ln): - v = random.randint(10, 99) - if k == idx: - answer.append(v) - prompt += f"x{k} = {v}\n" - prompt += f"# Now, we check the value of x{idx}:\n" - prompt += f"assert x{idx} == " - prompts.append(prompt) - return prompts, answer, indices - - -def check_answers(indices: list[int], - answer: list[int], - outputs: list[str], - accept_rate: float = 0.7): - answer2 = [int(text[0:2].strip()) for text in outputs] - print(list(zip(indices, zip(answer, answer2)))) - numok = 0 - for a1, a2 in zip(answer, answer2): - if a1 == a2: - numok += 1 - frac_ok = numok / len(answer) - print(f"Num OK: {numok}/{len(answer)} {frac_ok}") - assert frac_ok >= accept_rate - - -def check_window(prompts: list[str]): - - def inner(llm: LLM): - sliding_window = llm.llm_engine.model_config.get_sliding_window() - assert sliding_window and sliding_window > 0 - assert any( - len(llm.get_tokenizer().tokenize(prompt)) > sliding_window - for prompt in prompts) - - return inner diff --git a/tests/core/block/test_block_manager.py b/tests/core/block/test_block_manager.py deleted file mode 100644 index 24499b9ad4e9..000000000000 --- a/tests/core/block/test_block_manager.py +++ /dev/null @@ -1,341 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project - -import pytest - -from vllm.core.block_manager import SelfAttnBlockSpaceManager -from vllm.core.interfaces import AllocStatus -from vllm.sequence import Logprob, SequenceStatus -from vllm.utils import chunk_list - -from ..utils import create_dummy_prompt, create_seq_group - - -@pytest.mark.parametrize("block_size", [16]) -@pytest.mark.parametrize("num_gpu_blocks", [8, 40, 80]) -@pytest.mark.parametrize("num_seqs_per_group", [1, 4]) -@pytest.mark.parametrize("watermark", [0.0, 0.5]) -def test_can_allocate_seq_group(block_size: int, num_seqs_per_group: int, - num_gpu_blocks: int, watermark: float): - block_manager = SelfAttnBlockSpaceManager( - block_size=block_size, - num_gpu_blocks=num_gpu_blocks, - num_cpu_blocks=1024, - watermark=watermark, - ) - num_watermark_blocks = int(watermark * num_gpu_blocks) - - num_output_blocks_per_seq = 1 - - # NOTE: This should be num_output_blocks_per_seq * num_seqs_per_group, but - # the current implementation assumes all seqs are new prompts / don't have - # different output lens. - num_output_blocks = num_output_blocks_per_seq - - for num_prompt_blocks in range(1, num_gpu_blocks - num_output_blocks): - seq_group = create_seq_group( - seq_prompt_len=block_size * num_prompt_blocks, - seq_output_lens=[ - block_size * num_output_blocks_per_seq - for _ in range(num_seqs_per_group) - ], - ) - - assert num_prompt_blocks + num_output_blocks <= num_gpu_blocks - - can_allocate_result = block_manager.can_allocate(seq_group) - - num_required_blocks = num_prompt_blocks + num_output_blocks - - if num_gpu_blocks - num_required_blocks < num_watermark_blocks: - assert can_allocate_result == AllocStatus.NEVER - elif num_gpu_blocks >= num_required_blocks: - assert can_allocate_result == AllocStatus.OK - else: - assert can_allocate_result == AllocStatus.LATER - - -@pytest.mark.parametrize("block_size", [1, 8]) -@pytest.mark.parametrize("prompt_len", [1, 7, 8]) -@pytest.mark.parametrize("num_slots_to_append", [1, 8, 129]) -@pytest.mark.parametrize("num_lookahead_slots", [0, 10]) -def test_append_slots(block_size, prompt_len, num_slots_to_append, - num_lookahead_slots): - """Verify append_slots consumes the correct number of blocks from the block - table. - """ - - num_gpu_blocks = 1024 - watermark = 0.1 - block_manager = SelfAttnBlockSpaceManager( - block_size=block_size, - num_gpu_blocks=num_gpu_blocks, - num_cpu_blocks=0, - watermark=watermark, - ) - - seq_group = create_seq_group( - seq_prompt_len=prompt_len, - seq_output_lens=[0], - ) - - # Allocate seq - assert block_manager.can_allocate(seq_group) - block_manager.allocate(seq_group) - - # Seq seq to RUNNING - seq = seq_group.get_seqs()[0] - seq.status = SequenceStatus.RUNNING - - # Append tokens to the sequeqnce - for token_id in range(num_slots_to_append): - seq.append_token_id(token_id, {token_id: Logprob(0.0)}) - - # Append slots for new tokens and lookahead slots. - free_blocks_before_append = block_manager.get_num_free_gpu_blocks() - block_manager.append_slots(seq, num_lookahead_slots) - num_consumed_blocks = (free_blocks_before_append - - block_manager.get_num_free_gpu_blocks()) - - # Expect consumed blocks to be new blocks required to support the new slots. - expected_consumed_blocks = len( - list( - chunk_list( - list( - range(prompt_len + num_slots_to_append + - num_lookahead_slots)), - block_size))) - len( - list(chunk_list(list(range(prompt_len)), block_size))) - assert num_consumed_blocks == expected_consumed_blocks - - -@pytest.mark.parametrize("block_size", [8]) -@pytest.mark.parametrize("num_cpu_blocks", [4]) -@pytest.mark.parametrize("num_gpu_blocks", [4]) -@pytest.mark.parametrize("num_lookahead_slots", [0, 2, 10]) -@pytest.mark.parametrize("enable_caching", [False, True]) -def test_swap(block_size, num_cpu_blocks, num_gpu_blocks, num_lookahead_slots, - enable_caching): - """Verify blocks number on src/desc device is correct after swapping in/out - sequence group (not missing or extra blocks). - """ - block_manager = SelfAttnBlockSpaceManager(block_size, - num_cpu_blocks, - num_gpu_blocks, - watermark=0, - enable_caching=enable_caching) - prompt, seq_group = create_dummy_prompt("1", prompt_length=block_size - 1) - prompt.status = SequenceStatus.WAITING - block_manager.allocate(seq_group) - - # Emulate a forward pass by appending a single token. - # The block manager then knows how many unprocessed - # tokens will be written in the next forward pass. - token_id = 0 - prompt.status = SequenceStatus.RUNNING - prompt.append_token_id(token_id, {token_id: Logprob(0.0)}) - - # Swap seq group from GPU -> CPU. - gpu_blocks = block_manager.get_block_table(prompt) - assert block_manager.can_swap_out(seq_group) - before_cpu_blocks = block_manager.get_num_free_cpu_blocks() - before_gpu_blocks = block_manager.get_num_free_gpu_blocks() - mapping = block_manager.swap_out(seq_group) - mapping_keys = [key for key, _ in mapping] - assert mapping_keys == gpu_blocks - after_cpu_blocks = block_manager.get_num_free_cpu_blocks() - after_gpu_blocks = block_manager.get_num_free_gpu_blocks() - assert before_cpu_blocks == after_cpu_blocks + len(gpu_blocks) - assert before_gpu_blocks + len(gpu_blocks) == after_gpu_blocks - prompt.status = SequenceStatus.SWAPPED - - # Swap seq group from CPU -> GPU. - assert block_manager.can_swap_in(seq_group, num_lookahead_slots) - before_cpu_blocks = block_manager.get_num_free_cpu_blocks() - before_gpu_blocks = block_manager.get_num_free_gpu_blocks() - mapping = block_manager.swap_in(seq_group) - cpu_blocks = block_manager.get_block_table(prompt) - mapping_keys = [key for key, _ in mapping] - assert mapping_keys == [cpu_blocks[0]] - after_cpu_blocks = block_manager.get_num_free_cpu_blocks() - after_gpu_blocks = block_manager.get_num_free_gpu_blocks() - assert before_gpu_blocks == after_gpu_blocks + len(cpu_blocks) - - -@pytest.mark.parametrize("block_size", [8]) -@pytest.mark.parametrize("num_gpu_blocks", [4]) -@pytest.mark.parametrize("num_lookahead_slots", [3, 8, 10]) -@pytest.mark.parametrize("enable_caching", [True, False]) -def test_can_swap(block_size, num_gpu_blocks, num_lookahead_slots, - enable_caching): - """ Verify the block manager can correctly determine if a sequence group - can be swapped in/out. - """ - num_cpu_blocks = num_gpu_blocks - block_manager = SelfAttnBlockSpaceManager(block_size, - num_cpu_blocks, - num_gpu_blocks, - watermark=0, - enable_caching=enable_caching) - prompt, seq_group = create_dummy_prompt( - "1", prompt_length=(num_gpu_blocks - 1) * block_size - 1) - prompt.status = SequenceStatus.WAITING - block_manager.allocate(seq_group) - prompt.status = SequenceStatus.RUNNING - - # Swap seq group from GPU -> CPU. - gpu_blocks = block_manager.get_block_table(prompt) - assert block_manager.can_swap_out(seq_group) - before_cpu_blocks = block_manager.get_num_free_cpu_blocks() - before_gpu_blocks = block_manager.get_num_free_gpu_blocks() - mapping = block_manager.swap_out(seq_group) - mapping_keys = [key for key, _ in mapping] - assert mapping_keys == gpu_blocks - after_cpu_blocks = block_manager.get_num_free_cpu_blocks() - after_gpu_blocks = block_manager.get_num_free_gpu_blocks() - assert before_cpu_blocks == after_cpu_blocks + len(gpu_blocks) - assert before_gpu_blocks + len(gpu_blocks) == after_gpu_blocks - prompt.status = SequenceStatus.SWAPPED - - # At this moment, we still have enough free blocks to swap in the seq group. - if num_lookahead_slots <= block_size: - assert block_manager.can_swap_in(seq_group, - num_lookahead_slots) == AllocStatus.OK - else: - assert block_manager.can_swap_in( - seq_group, num_lookahead_slots) == AllocStatus.NEVER - - # During Swapped out, 2 cached blocks were evicted from the GPU, - # so the prompt1 can't be swapped in - prompt2_len = 2 * block_size - 1 - prompt2, seq_group2 = create_dummy_prompt( - "2", - prompt_length=prompt2_len, - prompt_tokens=[10000 + i for i in range(prompt2_len)]) - prompt2.status = SequenceStatus.WAITING - block_manager.allocate(seq_group2) - - # Swap seq group from CPU -> GPU. - if num_lookahead_slots <= block_size: - assert block_manager.can_swap_in( - seq_group, num_lookahead_slots) == AllocStatus.LATER - else: - assert block_manager.can_swap_in( - seq_group, num_lookahead_slots) == AllocStatus.NEVER - - -@pytest.mark.parametrize("num_lookahead_slots", [0, 2, 10]) -@pytest.mark.parametrize("enable_caching", [False, True]) -def test_swap_in_infeasible(num_lookahead_slots, enable_caching): - """Verifies that swapping fails if there is not enough free blocks - to account for unseen tokens and lookahead_slots. - """ - block_size = 8 - num_cpu_blocks = 1 - num_gpu_blocks = 1 - block_manager = SelfAttnBlockSpaceManager(block_size, - num_cpu_blocks, - num_gpu_blocks, - watermark=0, - enable_caching=enable_caching) - prompt_length = block_size - 3 - assert prompt_length > 0 - prompt, seq_group = create_dummy_prompt("1", prompt_length=prompt_length) - prompt.status = SequenceStatus.WAITING - block_manager.allocate(seq_group) - # Emulate a forward pass by appending a single token. - # The block manager then knows how many unprocessed - # tokens will be written in the next forward pass. - token_id = 0 - prompt.status = SequenceStatus.RUNNING - prompt.append_token_id(token_id, {token_id: Logprob(0.0)}) - - # Swap seq group from GPU -> CPU. - assert block_manager.can_swap_out(seq_group) - block_manager.swap_out(seq_group) - prompt.status = SequenceStatus.SWAPPED - - # Swap seq group from CPU -> GPU. - # The number of unseen tokens is 1. If the number of existing - # tokens plus the unseen ones and number of lookahead slots exceeds - # the total number of available GPU blocks then the swap - # should fail. - num_unseen_tokens = 1 - if (num_lookahead_slots + num_unseen_tokens + - prompt_length) <= (block_size * num_gpu_blocks): - assert block_manager.can_swap_in(seq_group, - num_lookahead_slots) == AllocStatus.OK - else: - assert block_manager.can_swap_in( - seq_group, num_lookahead_slots) == AllocStatus.NEVER - - -# TODO(cade/kaiyang): add comprehensive tests for swapping at allocator level. - - -@pytest.mark.parametrize("block_size", [8, 16]) -@pytest.mark.parametrize("prompt_len", [10, 300, 1000]) -@pytest.mark.parametrize("num_slots_to_append", [50]) -@pytest.mark.parametrize("sliding_window", [20, 32, 200, 512]) -def test_sliding_window(block_size, prompt_len, num_slots_to_append, - sliding_window): - """Verify append_slots consumes the correct number of blocks from the block - table. - """ - - num_gpu_blocks = 1024 - watermark = 0.1 - block_manager = SelfAttnBlockSpaceManager( - block_size=block_size, - num_gpu_blocks=num_gpu_blocks, - num_cpu_blocks=0, - watermark=watermark, - sliding_window=sliding_window, - ) - - def check_used(min_n, max_n=None): - if max_n is None: - max_n = min_n - used = num_gpu_blocks - block_manager.get_num_free_gpu_blocks() - assert min_n <= used - assert used <= max_n - - def num_blocks(num_tokens): - return (num_tokens + block_size - 1) // block_size - - check_used(0) - - seq_group = create_seq_group( - seq_prompt_len=prompt_len, - seq_output_lens=[0], - ) - - check_used(0) - - # Allocate seq - assert block_manager.can_allocate(seq_group) - block_manager.allocate(seq_group) - - check_used(num_blocks(prompt_len)) - - # Seq seq to RUNNING - seq = seq_group.get_seqs()[0] - seq.status = SequenceStatus.RUNNING - - seq.data.update_num_computed_tokens(prompt_len) - check_used(num_blocks(prompt_len)) - - # this is how we compute it in SelfAttnBlockSpaceManager.__init__ - sliding_blocks = (sliding_window // block_size) + 2 - # plus one block for null block - sliding_blocks += 1 - - # Append tokens to the sequeqnce - for token_id in range(num_slots_to_append): - seq.append_token_id(token_id, {token_id: Logprob(0.0)}) - seq.data.update_num_computed_tokens(1) - block_manager.append_slots(seq, num_lookahead_slots=0) - if prompt_len < sliding_window + 10: - check_used(0, sliding_blocks + 1) - else: - check_used(sliding_blocks, sliding_blocks + 1) diff --git a/tests/core/block/test_block_table.py b/tests/core/block/test_block_table.py deleted file mode 100644 index ba085001136b..000000000000 --- a/tests/core/block/test_block_table.py +++ /dev/null @@ -1,577 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project - -import pytest - -from vllm.core.block.block_table import BlockTable -from vllm.core.block.cpu_gpu_block_allocator import CpuGpuBlockAllocator -from vllm.utils import Device, cdiv, chunk_list - - -@pytest.mark.parametrize("block_size", [16]) -@pytest.mark.parametrize("sequence_len", [1, 16, 129]) -def test_allocate_naive(block_size: int, sequence_len: int): - """Test the allocation of blocks using the naive allocator. - - This test creates a CpuGpuBlockAllocator with the specified block size and - number of blocks. It then allocates multiple BlockTables with varying - sequence lengths and verifies that the number of free blocks decreases as - expected after each allocation. - """ - assert block_size > 1 - num_gpu_blocks = 1024 - - allocator = CpuGpuBlockAllocator.create( - allocator_type="naive", - num_gpu_blocks=num_gpu_blocks, - num_cpu_blocks=1024, - block_size=block_size, - ) - - token_ids = list(range(sequence_len)) - num_blocks_per_alloc = len(list(chunk_list(token_ids, block_size))) - - block_tables: list[BlockTable] = [] - for i in range(5): - assert allocator.get_num_free_blocks( - device=Device.GPU) == num_gpu_blocks - i * num_blocks_per_alloc - - block_tables.append( - BlockTable( - block_size=block_size, - block_allocator=allocator, - )) - block_tables[-1].allocate(token_ids=token_ids, device=Device.GPU) - - -@pytest.mark.parametrize("block_size", [16]) -@pytest.mark.parametrize("sequence_len", [1, 16, 129]) -def test_allocate_prefix_caching(block_size: int, sequence_len: int): - """Test the allocation of blocks using the prefix caching allocator. - - This test creates a CpuGpuBlockAllocator with the specified block size and - number of blocks, using the prefix caching allocator. It then allocates - multiple BlockTables with varying sequence lengths and verifies that the - number of free blocks decreases as expected after each allocation. - - The test expects all sequences to share allocations, except for their last - block, which may be mutable. It calculates the expected number of immutable - and mutable blocks per allocation based on the sequence length and block - size. - """ - assert block_size > 1 - num_gpu_blocks = 1024 - - allocator = CpuGpuBlockAllocator.create( - allocator_type="prefix_caching", - num_gpu_blocks=num_gpu_blocks, - num_cpu_blocks=1024, - block_size=block_size, - ) - - token_ids = list(range(sequence_len)) - chunked_tokens = list(chunk_list(token_ids, block_size)) - num_mutable_blocks_per_alloc = 0 if len( - chunked_tokens[-1]) == block_size else 1 - num_immutable_blocks_per_alloc = len( - chunked_tokens) - num_mutable_blocks_per_alloc - - block_tables: list[BlockTable] = [] - for alloc_i in range(1, 6): - - block_tables.append( - BlockTable( - block_size=block_size, - block_allocator=allocator, - )) - block_tables[-1].allocate(token_ids=token_ids, device=Device.GPU) - - # Expect all sequences to share allocations, except for their last block - # (which may be mutable). - assert allocator.get_num_free_blocks( - device=Device.GPU) == num_gpu_blocks - ( - num_immutable_blocks_per_alloc + num_mutable_blocks_per_alloc * - (alloc_i)) - - -@pytest.mark.parametrize("block_size", [16]) -@pytest.mark.parametrize("sequence_len", [1, 16, 129]) -@pytest.mark.parametrize("allocator_type", ["naive", "prefix_caching"]) -@pytest.mark.parametrize("device", ["cpu", "gpu"]) -def test_allocate_free(block_size: int, sequence_len: int, allocator_type: str, - device: str): - """Test the allocation and freeing of blocks using different allocators and - devices. - - This test creates a CpuGpuBlockAllocator with the specified block size, - number of blocks, allocator type, and device. It then allocates a BlockTable - multiple times with the same sequence and verifies that the number of free - blocks remains consistent after each allocation and freeing. - """ - device = Device[device.upper()] - - num_device_blocks = 1024 - allocator = CpuGpuBlockAllocator.create( - allocator_type=allocator_type, - num_gpu_blocks=num_device_blocks, - num_cpu_blocks=num_device_blocks, - block_size=block_size, - ) - - token_ids = list(range(sequence_len)) - num_blocks_per_alloc = len(list(chunk_list(token_ids, block_size))) - - block_table = BlockTable( - block_size=block_size, - block_allocator=allocator, - ) - - for i in range(5): - block_table.allocate(token_ids=token_ids, device=device) - assert allocator.get_num_free_blocks( - device) == num_device_blocks - num_blocks_per_alloc - assert all(block_id is not None - for block_id in block_table.physical_block_ids) - - block_table.free() - assert allocator.get_num_free_blocks(device) == num_device_blocks - - -@pytest.mark.parametrize("block_size", [1, 8]) -@pytest.mark.parametrize("sequence_len", [1, 16, 129]) -@pytest.mark.parametrize("append_len", [1, 16, 129]) -@pytest.mark.parametrize("allocator_type", ["naive", "prefix_caching"]) -def test_append_token_ids_allocation(block_size: int, sequence_len: int, - append_len: int, allocator_type: str): - """Test the allocation behavior when appending token IDs to a BlockTable. - - This test creates a CpuGpuBlockAllocator with the specified block size, - number of blocks, and allocator type. It then allocates a BlockTable with an - initial sequence and appends additional token IDs to it. The test verifies - that the number of allocated blocks before and after appending matches the - expected values. - """ - - num_gpu_blocks = 1024 - - allocator = CpuGpuBlockAllocator.create( - allocator_type=allocator_type, - num_gpu_blocks=num_gpu_blocks, - num_cpu_blocks=1024, - block_size=block_size, - ) - - token_ids = list(range(sequence_len)) - token_ids_to_append = list(range(append_len)) - - block_table = BlockTable( - block_size=block_size, - block_allocator=allocator, - ) - - num_expected_blocks_before_append = len( - list(chunk_list(token_ids, block_size))) - num_expected_appended_blocks = len( - list(chunk_list(token_ids + token_ids_to_append, - block_size))) - num_expected_blocks_before_append - - block_table.allocate(token_ids=token_ids, device=Device.GPU) - - assert len( - block_table.physical_block_ids) == num_expected_blocks_before_append - block_table.append_token_ids(token_ids_to_append) - assert len( - block_table.physical_block_ids - ) == num_expected_blocks_before_append + num_expected_appended_blocks - - -@pytest.mark.parametrize("block_size", [1, 8]) -@pytest.mark.parametrize("sequence_len", [1, 16, 129]) -@pytest.mark.parametrize("num_empty_slots", [1, 16, 129]) -@pytest.mark.parametrize("allocator_type", ["naive", "prefix_caching"]) -def test_ensure_num_empty_slots_allocation(block_size: int, sequence_len: int, - num_empty_slots: int, - allocator_type: str): - """Test the allocation behavior when ensuring a certain number of empty - slots in a BlockTable. - - This test creates a CpuGpuBlockAllocator with the specified block size, - number of blocks, and allocator type. It then allocates a BlockTable with an - initial sequence and ensures a certain number of empty slots. The test - verifies that the number of allocated blocks before and after ensuring empty - slots matches the expected values. It also checks that filling up the empty - slots does not consume additional blocks. - """ - num_gpu_blocks = 1024 - - allocator = CpuGpuBlockAllocator.create( - allocator_type=allocator_type, - num_gpu_blocks=num_gpu_blocks, - num_cpu_blocks=1024, - block_size=block_size, - ) - - token_ids = list(range(sequence_len)) - - block_table = BlockTable( - block_size=block_size, - block_allocator=allocator, - ) - - num_expected_blocks_before_append = len( - list(chunk_list(token_ids, block_size))) - num_expected_appended_blocks = len( - list(chunk_list(token_ids + [-1] * num_empty_slots, - block_size))) - num_expected_blocks_before_append - - block_table.allocate(token_ids=token_ids, device=Device.GPU) - - # Assert that the empty slots consume the expected number of additional - # blocks. - assert len( - block_table.physical_block_ids) == num_expected_blocks_before_append - block_table.ensure_num_empty_slots(num_empty_slots) - assert len( - block_table.physical_block_ids - ) == num_expected_blocks_before_append + num_expected_appended_blocks - - # Now, ensure no additional blocks consumed as we fill up the empty slots. - num_free_blocks = allocator.get_num_free_blocks(device=Device.GPU) - block_table.append_token_ids(token_ids=list(range(num_empty_slots))) - assert num_free_blocks == allocator.get_num_free_blocks(device=Device.GPU) - - -@pytest.mark.parametrize("block_size", [1, 8]) -@pytest.mark.parametrize("sequence_len", [1, 9]) -@pytest.mark.parametrize("append_len", [1, 16, 129]) -@pytest.mark.parametrize("append_size", [1, 4, 129]) -@pytest.mark.parametrize("allocator_type", ["naive", "prefix_caching"]) -def test_append_token_ids_correct_content(block_size: int, sequence_len: int, - append_len: int, allocator_type: str, - append_size: int): - """Verify token ids are correctly appended. Appends various amounts of - token ids in various append sizes, and verifies the final sequence is - correct. - """ - num_gpu_blocks = 1024 - - allocator = CpuGpuBlockAllocator.create( - allocator_type=allocator_type, - num_gpu_blocks=num_gpu_blocks, - num_cpu_blocks=1024, - block_size=block_size, - ) - - token_ids = list(range(sequence_len)) - token_ids_to_append = list(range(append_len)) - - block_table = BlockTable( - block_size=block_size, - block_allocator=allocator, - ) - block_table.allocate(token_ids=token_ids, device=Device.GPU) - - appended_so_far: list[int] = [] - for append in chunk_list(token_ids_to_append, append_size): - block_table.append_token_ids(append) - appended_so_far.extend(append) - - assert block_table._get_all_token_ids() == token_ids + appended_so_far - - assert block_table._get_all_token_ids() == token_ids + token_ids_to_append - - -@pytest.mark.parametrize("seq_len", [1, 9, 129]) -@pytest.mark.parametrize("block_size", [1, 8]) -@pytest.mark.parametrize("allocator_type", ["naive", "prefix_caching"]) -def test_fork(seq_len: int, block_size: int, allocator_type: str): - """Create a sequence using the specified allocator. - 1. Assert that after forking the sequence, the free block count is the - same. - 2. Assert that the forked sequence has the same physical mappings. - 3. Then free the original sequence; verify that the free block count is - the same. - 4. Finally, free the forked sequence and verify that the free block - count drops to zero. - """ - num_gpu_blocks = 1024 - - allocator = CpuGpuBlockAllocator.create( - allocator_type=allocator_type, - num_gpu_blocks=num_gpu_blocks, - num_cpu_blocks=0, - block_size=block_size, - ) - - token_ids = list(range(seq_len)) - - block_table = BlockTable( - block_size=block_size, - block_allocator=allocator, - ) - - block_table.allocate(token_ids) - - num_free_blocks_before_fork = allocator.get_num_free_blocks( - device=Device.GPU) - - forked_block_table = block_table.fork() - - # Expect physical_block_ids and token_ids to match. - assert (block_table.physical_block_ids == - forked_block_table.physical_block_ids) - assert block_table._get_all_token_ids( - ) == forked_block_table._get_all_token_ids() - - # Do not expect any additional allocations. - assert allocator.get_num_free_blocks( - device=Device.GPU) == num_free_blocks_before_fork - - # Free the original blocks. Assert num free blocks does not change, since - # refcount is nonzero. - block_table.free() - assert allocator.get_num_free_blocks( - device=Device.GPU) == num_free_blocks_before_fork - - # Expect the forked block table to be unaffected by the free. - assert all(block_id is not None - for block_id in forked_block_table.physical_block_ids) - - # Free the forked blocks. Assert num free blocks does change, since - # refcount is now zero. - forked_block_table.free() - assert allocator.get_num_free_blocks(device=Device.GPU) == num_gpu_blocks - - -@pytest.mark.parametrize("block_size", [8]) -@pytest.mark.parametrize("sequence_len", [1, 16, 129]) -@pytest.mark.parametrize("append_len", [1, 16, 129]) -@pytest.mark.parametrize("appender", ["forked", "original"]) -@pytest.mark.parametrize("allocator_type", ["naive", "prefix_caching"]) -def test_cow(block_size: int, sequence_len: int, append_len: int, - allocator_type: str, appender: str): - """Fork a sequence; append to the forked sequence; verify there's a CoW. - """ - num_gpu_blocks = 1024 - - allocator = CpuGpuBlockAllocator.create( - allocator_type=allocator_type, - num_gpu_blocks=num_gpu_blocks, - num_cpu_blocks=0, - block_size=block_size, - ) - - token_ids = list(range(sequence_len)) - token_ids_to_append = list(range(append_len)) - - original_block_table = BlockTable( - block_size=block_size, - block_allocator=allocator, - ) - - num_expected_non_cow_blocks = cdiv(sequence_len, block_size) - num_expected_cow_blocks = cdiv(sequence_len + append_len, - block_size) - (sequence_len // block_size) - - original_block_table.allocate(token_ids=token_ids, device=Device.GPU) - original_block_ids = original_block_table.physical_block_ids[:] - - print("original_block_ids = {}".format(original_block_ids)) - forked_block_table = original_block_table.fork() - - # Expect no additional allocation (copy on _write_). - assert allocator.get_num_free_blocks( - Device.GPU) == (num_gpu_blocks - num_expected_non_cow_blocks) - - if appender == "forked": - appender_block_table = forked_block_table - static_block_table = original_block_table - elif appender == "original": - appender_block_table = original_block_table - static_block_table = forked_block_table - else: - raise ValueError(f"unknown test config {appender=}") - - # Write tokens. - appender_block_table.append_token_ids(token_ids_to_append) - - # Expect the non-appending block table to have no change. - assert static_block_table.physical_block_ids == original_block_ids - assert appender_block_table.physical_block_ids != original_block_ids - - # Expect the blocks changed during append to have a CoW. - assert allocator.get_num_free_blocks( - Device.GPU) == num_gpu_blocks - (num_expected_non_cow_blocks + - num_expected_cow_blocks) - - cows = allocator.clear_copy_on_writes() - if sequence_len % block_size > 0: - # If the last block in the sequence is not full, then when appending we - # expect a CoW. - assert cows - - cow_block_id = sequence_len // block_size - expected_src = static_block_table.physical_block_ids[cow_block_id] - expected_dst = appender_block_table.physical_block_ids[cow_block_id] - - assert (expected_src, expected_dst) in cows - else: - # Otherwise, there should be no copy-on-write. - assert not cows - - static_block_table.free() - appender_block_table.free() - - # After free, expect all blocks to be freed. - assert allocator.get_num_free_blocks(Device.GPU) == num_gpu_blocks - - -@pytest.mark.parametrize("block_size", [8]) -@pytest.mark.parametrize("sequence_len", [1, 16, 129]) -@pytest.mark.parametrize("append_len", [1, 16, 129]) -@pytest.mark.parametrize("lookahead_slots", [1, 16, 129]) -@pytest.mark.parametrize("appender", ["forked", "original"]) -@pytest.mark.parametrize("allocator_type", ["naive", "prefix_caching"]) -def test_cow_lookahead_simple(block_size: int, sequence_len: int, - append_len: int, lookahead_slots: int, - allocator_type: str, appender: str): - """Similar to test_cow, except with lookahead allocation. The assertions are - less rigorous due to the complexity of the property under test. - """ - num_gpu_blocks = 1024 - - allocator = CpuGpuBlockAllocator.create( - allocator_type=allocator_type, - num_gpu_blocks=num_gpu_blocks, - num_cpu_blocks=0, - block_size=block_size, - ) - - token_ids = list(range(sequence_len)) - token_ids_to_append = list(range(append_len)) - - original_block_table = BlockTable( - block_size=block_size, - block_allocator=allocator, - ) - - original_block_table.allocate(token_ids=token_ids, device=Device.GPU) - - # Allocate lookahead slots. - original_block_table.ensure_num_empty_slots(lookahead_slots) - original_block_ids = original_block_table.physical_block_ids[:] - - forked_block_table = original_block_table.fork() - - if appender == "forked": - appender_block_table = forked_block_table - static_block_table = original_block_table - elif appender == "original": - appender_block_table = original_block_table - static_block_table = forked_block_table - else: - raise ValueError(f"unknown test config {appender=}") - - # Write tokens. - appender_block_table.append_token_ids(token_ids_to_append) - - # Expect the non-appending block table to have no change. - assert static_block_table.physical_block_ids == original_block_ids - assert appender_block_table.physical_block_ids != original_block_ids - - cows = allocator.clear_copy_on_writes() - - # Always expect copy-on-write - assert cows - - if sequence_len % block_size > 0: - # If the last block in the sequence is not full, then when appending we - # expect a CoW. - assert cows - - cow_block_id = sequence_len // block_size - expected_src = static_block_table.physical_block_ids[cow_block_id] - expected_dst = appender_block_table.physical_block_ids[cow_block_id] - - assert (expected_src, expected_dst) in cows - - static_block_table.free() - appender_block_table.free() - - # After free, expect all blocks to be freed. - assert allocator.get_num_free_blocks(Device.GPU) == num_gpu_blocks - - -@pytest.mark.parametrize("block_size", [1, 8]) -@pytest.mark.parametrize("sequence_len", [1, 16, 129]) -@pytest.mark.parametrize("num_new_tokens", [1, 16, 129]) -@pytest.mark.parametrize("num_lookahead_slots", [1, 7, 8]) -@pytest.mark.parametrize("allocator_type", ["naive", "prefix_caching"]) -def test_num_blocks_touched_by_append_slots(block_size: int, sequence_len: int, - num_new_tokens: int, - num_lookahead_slots: int, - allocator_type: str): - """Verify correct calculation of get_num_blocks_touched_by_append_slots. - - This is done by using copy-on-write, which requires any modified block to - be copied before write if the refcount > 1. We set the refcount>1 by forking - a sequence, then measure the free blocks before and after an append. If the - number of consumed blocks equals what `get_num_blocks_touched_by_append_ - slots` returns, then the calculation is correct. - """ - - num_gpu_blocks = 1024 - - allocator = CpuGpuBlockAllocator.create( - allocator_type=allocator_type, - num_gpu_blocks=num_gpu_blocks, - num_cpu_blocks=0, - block_size=block_size, - ) - - token_ids = list(range(sequence_len)) - token_ids_to_append = list(range(num_new_tokens)) - - block_table = BlockTable( - block_size=block_size, - block_allocator=allocator, - ) - - block_table.allocate(token_ids=token_ids, device=Device.GPU) - - # Add lookahead before fork so both sequences have the same lookahead - # blocks. - block_table.ensure_num_empty_slots(num_empty_slots=num_lookahead_slots) - - # Fork sequence so that every block has refcount > 1. - _ = block_table.fork() - - # Determine how many blocks should be touched. - expected_num_touched_blocks = ( - block_table.get_num_blocks_touched_by_append_slots( - token_ids=token_ids_to_append, - num_lookahead_slots=num_lookahead_slots)) - - # Measure how many blocks are touched by measuring num_free_blocks before - # and after the append. - # - # We expect append_token_ids to CoW all mutated blocks that have refcount>1. - num_free_blocks_before_append = allocator.get_num_free_blocks(Device.GPU) - block_table.append_token_ids(token_ids_to_append, num_lookahead_slots) - num_consumed_blocks = (num_free_blocks_before_append - - allocator.get_num_free_blocks(Device.GPU)) - - # TODO(cade) ensure equality when num_lookahead_slots > 0. - # The reason we have < is because lookahead blocks are not copied eagerly; - # they are copied on first write. This will cause issues for beam search + - # speculative decoding. This is acceptable for now as it is a large effort - # to combine the two. To fix this, we can ensure single sequence ownership - # of lookahead blocks by appending empty slots to each block, which will - # trigger the CoW. - # - # Until then, we can accept that the consumed tokens are <= the expected - # tokens when appending with lookahead. - if num_lookahead_slots > 0: - assert num_consumed_blocks <= expected_num_touched_blocks - else: - assert num_consumed_blocks == expected_num_touched_blocks diff --git a/tests/core/block/test_common.py b/tests/core/block/test_common.py deleted file mode 100644 index 65400899b811..000000000000 --- a/tests/core/block/test_common.py +++ /dev/null @@ -1,45 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project - -import random - -import pytest - -from vllm.core.block.common import RefCounter - - -@pytest.mark.parametrize("seed", list(range(20))) -@pytest.mark.parametrize("num_incrs", [1, 100]) -@pytest.mark.parametrize("num_blocks", [1024]) -def test_incr(seed: int, num_incrs: int, num_blocks: int): - random.seed(seed) - - all_block_indices = list(range(num_blocks)) - counter = RefCounter(all_block_indices=all_block_indices) - - block_id = random.randint(0, num_blocks - 1) - for i in range(num_incrs): - value = counter.incr(block_id) - assert value == i + 1 - - -@pytest.mark.parametrize("seed", list(range(20))) -@pytest.mark.parametrize("num_incrs", [1, 100]) -@pytest.mark.parametrize("num_blocks", [1024]) -def test_incr_decr(seed: int, num_incrs: int, num_blocks: int): - random.seed(seed) - - all_block_indices = list(range(num_blocks)) - counter = RefCounter(all_block_indices=all_block_indices) - - block_id = random.randint(0, num_blocks - 1) - for i in range(num_incrs): - value = counter.incr(block_id) - assert value == i + 1 - - for i in range(num_incrs): - value = counter.decr(block_id) - assert value == num_incrs - (i + 1) - - with pytest.raises(AssertionError): - counter.decr(block_id) diff --git a/tests/core/block/test_cpu_gpu_block_allocator.py b/tests/core/block/test_cpu_gpu_block_allocator.py deleted file mode 100644 index 795eef6743fd..000000000000 --- a/tests/core/block/test_cpu_gpu_block_allocator.py +++ /dev/null @@ -1,96 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project - -import pytest - -from vllm.core.block.cpu_gpu_block_allocator import CpuGpuBlockAllocator -from vllm.utils import Device, chunk_list - - -@pytest.mark.parametrize("num_cpu_blocks", [0, 512]) -@pytest.mark.parametrize("num_gpu_blocks", [1024]) -@pytest.mark.parametrize("block_size", [16]) -@pytest.mark.parametrize("allocator_type", ["naive", "prefix_caching"]) -def test_allocate_mutable_block(num_cpu_blocks: int, num_gpu_blocks: int, - block_size: int, allocator_type: str): - allocator = CpuGpuBlockAllocator.create( - allocator_type=allocator_type, - num_gpu_blocks=num_gpu_blocks, - num_cpu_blocks=num_cpu_blocks, - block_size=block_size, - ) - - assert allocator.get_num_free_blocks(Device.CPU) == num_cpu_blocks - assert allocator.get_num_free_blocks(Device.GPU) == num_gpu_blocks - - cpu_blocks = [ - allocator.allocate_mutable_block(prev_block=None, device=Device.CPU) - for _ in range(num_cpu_blocks) - ] - assert allocator.get_num_free_blocks(Device.CPU) == 0 - assert allocator.get_num_free_blocks(Device.GPU) == num_gpu_blocks - - gpu_blocks = [ - allocator.allocate_mutable_block(prev_block=None, device=Device.GPU) - for _ in range(num_gpu_blocks) - ] - assert allocator.get_num_free_blocks(Device.CPU) == 0 - assert allocator.get_num_free_blocks(Device.GPU) == 0 - - _ = [allocator.free(block) for block in cpu_blocks] - assert allocator.get_num_free_blocks(Device.CPU) == num_cpu_blocks - assert allocator.get_num_free_blocks(Device.GPU) == 0 - - _ = [allocator.free(block) for block in gpu_blocks] - assert allocator.get_num_free_blocks(Device.CPU) == num_cpu_blocks - assert allocator.get_num_free_blocks(Device.GPU) == num_gpu_blocks - - -@pytest.mark.parametrize("num_cpu_blocks", [0, 512]) -@pytest.mark.parametrize("num_gpu_blocks", [1024]) -@pytest.mark.parametrize("block_size", [2]) -@pytest.mark.parametrize("allocator_type", ["naive", "prefix_caching"]) -def test_allocate_immutable_block(num_cpu_blocks: int, num_gpu_blocks: int, - block_size: int, allocator_type: str): - allocator = CpuGpuBlockAllocator.create( - allocator_type=allocator_type, - num_gpu_blocks=num_gpu_blocks, - num_cpu_blocks=num_cpu_blocks, - block_size=block_size, - ) - - unique_token_ids = list( - range((num_cpu_blocks + num_gpu_blocks) * block_size)) - gpu_token_ids = list( - chunk_list(unique_token_ids[:num_gpu_blocks * block_size], block_size)) - cpu_token_ids = list( - chunk_list(unique_token_ids[num_gpu_blocks * block_size:], block_size)) - - assert allocator.get_num_free_blocks(Device.CPU) == num_cpu_blocks - assert allocator.get_num_free_blocks(Device.GPU) == num_gpu_blocks - - cpu_blocks = [ - allocator.allocate_immutable_block(prev_block=None, - token_ids=token_ids, - device=Device.CPU) - for token_ids in cpu_token_ids - ] - assert allocator.get_num_free_blocks(Device.CPU) == 0 - assert allocator.get_num_free_blocks(Device.GPU) == num_gpu_blocks - - gpu_blocks = [ - allocator.allocate_immutable_block(prev_block=None, - token_ids=token_ids, - device=Device.GPU) - for token_ids in gpu_token_ids - ] - assert allocator.get_num_free_blocks(Device.CPU) == 0 - assert allocator.get_num_free_blocks(Device.GPU) == 0 - - _ = [allocator.free(block) for block in cpu_blocks] - assert allocator.get_num_free_blocks(Device.CPU) == num_cpu_blocks - assert allocator.get_num_free_blocks(Device.GPU) == 0 - - _ = [allocator.free(block) for block in gpu_blocks] - assert allocator.get_num_free_blocks(Device.CPU) == num_cpu_blocks - assert allocator.get_num_free_blocks(Device.GPU) == num_gpu_blocks diff --git a/tests/core/block/test_naive_block.py b/tests/core/block/test_naive_block.py deleted file mode 100644 index a31d1c46b37f..000000000000 --- a/tests/core/block/test_naive_block.py +++ /dev/null @@ -1,148 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project - -from typing import Optional - -import pytest - -from vllm.core.block.interfaces import Block, BlockAllocator -from vllm.core.block.naive_block import NaiveBlock, NaiveBlockAllocator - - -class TestNaiveBlockAllocator: - - @staticmethod - def create_allocate_lambda(allocate_type: str, - allocator: NaiveBlockAllocator, - prev_block: Optional[Block], - token_ids: list[int]): - if allocate_type == "immutable": - allocate_block = lambda: allocator.allocate_immutable_block( - prev_block=prev_block, token_ids=token_ids) - elif allocate_type == "mutable": - allocate_block = lambda: allocator.allocate_mutable_block( - prev_block=prev_block) - else: - raise ValueError() - - return allocate_block - - @staticmethod - @pytest.mark.parametrize("allocate_type", ["immutable", "mutable"]) - @pytest.mark.parametrize("num_blocks", [1, 1024]) - @pytest.mark.parametrize("block_size", [1, 16]) - def test_allocate_ooms(allocate_type: str, num_blocks: int, - block_size: int): - allocator = NaiveBlockAllocator(create_block=NaiveBlock, - num_blocks=num_blocks, - block_size=block_size) - allocate_block = TestNaiveBlockAllocator.create_allocate_lambda( - allocate_type, - allocator, - prev_block=None, - token_ids=list(range(block_size))) - - [allocate_block() for _ in range(num_blocks)] - with pytest.raises(BlockAllocator.NoFreeBlocksError): - allocate_block() - - @staticmethod - @pytest.mark.parametrize("allocate_type", ["immutable", "mutable"]) - @pytest.mark.parametrize("num_blocks", [1, 1024]) - @pytest.mark.parametrize("block_size", [1, 16]) - def test_free_prevents_oom(allocate_type: str, num_blocks: int, - block_size: int): - allocator = NaiveBlockAllocator(create_block=NaiveBlock, - num_blocks=num_blocks, - block_size=block_size) - allocate_block = TestNaiveBlockAllocator.create_allocate_lambda( - allocate_type, - allocator, - prev_block=None, - token_ids=list(range(block_size))) - - blocks = [allocate_block() for _ in range(num_blocks)] - - with pytest.raises(BlockAllocator.NoFreeBlocksError): - allocate_block() - - block_to_free = blocks.pop() - - for _ in range(100): - block_id = block_to_free.block_id - allocator.free(block_to_free) - assert block_to_free.block_id is None - - new_block = allocate_block() - assert new_block.block_id == block_id - - with pytest.raises(BlockAllocator.NoFreeBlocksError): - allocate_block() - - block_to_free = new_block - - @staticmethod - @pytest.mark.parametrize("allocate_type", ["immutable", "mutable"]) - @pytest.mark.parametrize("num_blocks", [1024]) - @pytest.mark.parametrize("block_size", [16]) - def test_get_num_free_blocks(allocate_type: str, num_blocks: int, - block_size: int): - allocator = NaiveBlockAllocator(create_block=NaiveBlock, - num_blocks=num_blocks, - block_size=block_size) - allocate_block = TestNaiveBlockAllocator.create_allocate_lambda( - allocate_type, - allocator, - prev_block=None, - token_ids=list(range(block_size))) - - assert allocator.get_num_free_blocks() == num_blocks - - blocks = [allocate_block() for _ in range(num_blocks)] - - for i, block in enumerate(blocks): - assert allocator.get_num_free_blocks() == i - allocator.free(block) - - @staticmethod - @pytest.mark.parametrize("num_blocks", [4]) - @pytest.mark.parametrize("block_size", [8]) - def test_naive_block_get_num_full_blocks_touched(num_blocks, block_size): - """ Verify the allocator can correctly return the number of - full blocks touched. - """ - allocator_src = NaiveBlockAllocator(create_block=NaiveBlock, - num_blocks=num_blocks, - block_size=block_size) - allocator_dst = NaiveBlockAllocator(create_block=NaiveBlock, - num_blocks=num_blocks, - block_size=block_size) - - # Create a chain of cacheable blocks in the dst - allocate_block = TestNaiveBlockAllocator.create_allocate_lambda( - "immutable", - allocator_src, - prev_block=None, - token_ids=list(range(block_size))) - src_blocks = [allocate_block() for _ in range(num_blocks - 1)] - - # All blocks are cached - assert allocator_dst.get_num_full_blocks_touched( - src_blocks) == num_blocks - 1 - - # Insert one non-full block in the src - allocate_non_full_block = \ - TestNaiveBlockAllocator.create_allocate_lambda( - "mutable", allocator_src, - prev_block=src_blocks[-1],token_ids=[] - ) - src_blocks.append(allocate_non_full_block()) - src_blocks[-1].append_token_ids([0]) - - assert allocator_dst.get_num_full_blocks_touched( - src_blocks) == num_blocks - 1 - # Fill up the last source block and then invoke - # get_num_blocks_touched - src_blocks[-1].append_token_ids([0] * (block_size - 1)) - assert allocator_dst.get_num_full_blocks_touched( - src_blocks) == num_blocks diff --git a/tests/core/block/test_prefix_caching_block.py b/tests/core/block/test_prefix_caching_block.py deleted file mode 100644 index 46e224c6f53b..000000000000 --- a/tests/core/block/test_prefix_caching_block.py +++ /dev/null @@ -1,1035 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project - -import math -import random -from typing import Optional -from unittest.mock import MagicMock - -import pytest - -from tests.core.utils import create_dummy_lora_sequence, create_dummy_sequence -from vllm.core.block.cpu_gpu_block_allocator import CpuGpuBlockAllocator -from vllm.core.block.interfaces import Block, BlockAllocator -from vllm.core.block.prefix_caching_block import (ComputedBlocksTracker, - PrefixCachingBlock, - PrefixCachingBlockAllocator) -from vllm.sequence import Logprob -from vllm.utils import Device - - -class TestPrefixCachingBlock: - - @staticmethod - @pytest.mark.parametrize("seed", list(range(10))) - @pytest.mark.parametrize("block_size", [1, 16]) - @pytest.mark.parametrize("is_curr_block_full", [True, False]) - def test_first_block_has_correct_content_hash(seed: int, block_size: int, - is_curr_block_full: bool): - """Verify a block which is first in the sequence has the correct hash. - """ - random.seed(seed) - num_to_fill = block_size if is_curr_block_full else random.randint( - 0, block_size - 1) - token_ids = list(range(num_to_fill)) - mock_allocator = MagicMock(spec=PrefixCachingBlockAllocator) - - block_with_prev = PrefixCachingBlock(prev_block=None, - token_ids=token_ids, - block_size=block_size, - allocator=mock_allocator) - - if is_curr_block_full: - # Expect hash since block is full. - assert block_with_prev.content_hash == ( - PrefixCachingBlock.hash_block_tokens( - is_first_block=True, - prev_block_hash=None, - cur_block_token_ids=token_ids)) - else: - # Do not expect hash since block is not full. - assert block_with_prev.content_hash is None - - @staticmethod - @pytest.mark.parametrize("seed", list(range(10))) - @pytest.mark.parametrize("block_size", [1, 16]) - @pytest.mark.parametrize("is_curr_block_full", [True, False]) - @pytest.mark.parametrize("prev_block_has_hash", [True, False]) - def test_nth_block_has_correct_content_hash(seed: int, block_size: int, - is_curr_block_full: bool, - prev_block_has_hash: bool): - """Verify a block which is not first in the sequence has the correct - hash. - """ - - random.seed(seed) - - previous_block = MagicMock(spec=PrefixCachingBlock) - prev_block_hash = random.randint(0, 1000) - previous_block.content_hash = (prev_block_hash if prev_block_has_hash - else hash('None')) - - num_to_fill = block_size if is_curr_block_full else random.randint( - 0, block_size - 1) - token_ids = list(range(num_to_fill)) - mock_allocator = MagicMock(spec=PrefixCachingBlockAllocator) - - block_with_prev = PrefixCachingBlock( - prev_block=previous_block, - token_ids=token_ids, - block_size=block_size, - allocator=mock_allocator, - ) - - if is_curr_block_full and prev_block_has_hash: - # Expect hash since block is full and previous block has hash. - assert (block_with_prev.content_hash == - PrefixCachingBlock.hash_block_tokens( - is_first_block=False, - prev_block_hash=prev_block_hash, - cur_block_token_ids=token_ids)) - else: - # Do not expect hash since block is not full or the previous block - # does not have a hash. - assert block_with_prev.content_hash is None - - @staticmethod - @pytest.mark.parametrize("block_size", [1, 2, 16]) - @pytest.mark.parametrize("num_tokens", list(range(3))) - @pytest.mark.parametrize("num_empty_trailing_blocks", [0, 1, 10]) - def test_blocks_have_correct_hash_in_chain(block_size: int, - num_tokens: int, - num_empty_trailing_blocks: int): - """Create two chains of logical blocks with the same contents. - Assert the hashes are equal. - """ - random.seed(0) - - token_ids = [random.randint(0, 50_000) for _ in range(num_tokens)] - - first_chain, second_chain = (TestPrefixCachingBlock.create_chain( - block_size=block_size, - token_ids=token_ids, - num_empty_trailing_blocks=num_empty_trailing_blocks) - for _ in range(2)) - - for first_chain_block, second_chain_block in zip( - first_chain, second_chain): - assert (first_chain_block.content_hash == - second_chain_block.content_hash) - - if not first_chain or not second_chain: - assert first_chain == second_chain - assert num_tokens == 0 - - @staticmethod - def create_chain(block_size: int, - token_ids: list[int], - num_empty_trailing_blocks=0) -> list[PrefixCachingBlock]: - """Helper method which creates a chain of blocks. - """ - blocks: list[PrefixCachingBlock] = [] - num_blocks = math.ceil( - len(token_ids) / block_size) + num_empty_trailing_blocks - - if num_blocks == 0: - return [] - - allocator = MagicMock(spec=PrefixCachingBlockAllocator) - - prev_block = None - for block_number in range(0, num_blocks): - prev_block = PrefixCachingBlock( - prev_block=prev_block, - token_ids=[], - block_size=block_size, - allocator=allocator, - ) - - tokens_to_append = token_ids[block_number * - block_size:(block_number + 1) * - block_size] - if tokens_to_append: - prev_block.append_token_ids(tokens_to_append) - - blocks.append(prev_block) - - return blocks - - -class TestPrefixCachingBlockAllocator: - - @staticmethod - def create_allocate_lambda(allocate_type: str, allocator: BlockAllocator, - prev_block: Optional[Block], - token_ids: list[int]): - if allocate_type == "immutable": - allocate_block = lambda: allocator.allocate_immutable_block( - prev_block=prev_block, token_ids=token_ids) - elif allocate_type == "mutable": - allocate_block = lambda: allocator.allocate_mutable_block( - prev_block=prev_block) - else: - raise ValueError() - - return allocate_block - - @staticmethod - @pytest.mark.parametrize("num_blocks", [1, 1024]) - @pytest.mark.parametrize("block_size", [1, 16]) - def test_allocate_mutable_ooms(num_blocks: int, block_size: int): - allocator = PrefixCachingBlockAllocator(num_blocks=num_blocks, - block_size=block_size) - allocate_block = TestPrefixCachingBlockAllocator.create_allocate_lambda( - allocate_type="mutable", - allocator=allocator, - prev_block=None, - token_ids=list(range(block_size)), - ) - - [allocate_block() for _ in range(num_blocks)] - with pytest.raises(BlockAllocator.NoFreeBlocksError): - allocate_block() - - @staticmethod - @pytest.mark.parametrize("num_blocks", [1, 1024]) - @pytest.mark.parametrize("block_size", [1, 16]) - def test_allocate_immutable_does_not_oom_single_hash( - num_blocks: int, block_size: int): - allocator = PrefixCachingBlockAllocator(num_blocks=num_blocks, - block_size=block_size) - allocate_block = TestPrefixCachingBlockAllocator.create_allocate_lambda( - allocate_type="immutable", - allocator=allocator, - prev_block=None, - token_ids=list(range(block_size)), - ) - - blocks = [allocate_block() for _ in range(num_blocks)] - - # Expect no OOM. If these were mutable blocks, this would OOM. - non_oom_block = allocate_block() - - # Expect all blocks to have same physical block index. - for block in blocks: - assert (block.block_id == non_oom_block.block_id) - - @staticmethod - @pytest.mark.parametrize("num_blocks", [1, 1024]) - @pytest.mark.parametrize("block_size", [1, 16]) - def test_allocate_immutable_ooms_many_hash(num_blocks: int, - block_size: int): - """Consume all blocks using many different hashes/block content. - - Do this by creating a sequence that is very long. - Expect next block to OOM. - """ - allocator = PrefixCachingBlockAllocator(num_blocks=num_blocks, - block_size=block_size) - - # Create token ids that will exhaust all blocks. - token_ids = list(range(num_blocks * block_size)) - - chain = TestPrefixCachingBlockAllocator.create_immutable_chain( - block_size=block_size, - token_ids=token_ids, - allocator=allocator, - ) - - # Expect allocation with unseen hash to fail. - with pytest.raises(BlockAllocator.NoFreeBlocksError): - allocator.allocate_immutable_block(prev_block=chain[-1], - token_ids=list( - range(block_size))) - - # Expect mutable allocation to fail. - with pytest.raises(BlockAllocator.NoFreeBlocksError): - allocator.allocate_mutable_block(prev_block=chain[-1]) - - # Expect allocation of exact same chain to pass. - second_chain = TestPrefixCachingBlockAllocator.create_immutable_chain( - block_size=block_size, - token_ids=token_ids, - allocator=allocator, - ) - - # Expect physical block indices to be the same in both chains. - assert chain and second_chain - for first_chain_block, second_chain_block in zip(chain, second_chain): - assert (first_chain_block.block_id == second_chain_block.block_id) - - @staticmethod - @pytest.mark.parametrize("num_blocks", [1, 1024]) - @pytest.mark.parametrize("block_size", [1, 16]) - def test_free_prevents_oom(num_blocks: int, block_size: int): - allocator = PrefixCachingBlockAllocator(num_blocks=num_blocks, - block_size=block_size) - - # Create token ids that will exhaust all blocks. - token_ids = list(range(num_blocks * block_size)) - - chain = TestPrefixCachingBlockAllocator.create_immutable_chain( - block_size=block_size, - token_ids=token_ids, - allocator=allocator, - ) - - # Expect mutable allocation to fail. - with pytest.raises(BlockAllocator.NoFreeBlocksError): - allocator.allocate_mutable_block(prev_block=None) - - block_to_free = chain[-1] - - # Expect free/allocate loop to succeed many times. - for i in range(100): - block_id = block_to_free.block_id - allocator.free(block_to_free) - assert block_to_free.block_id is None, i - - new_block = allocator.allocate_mutable_block(prev_block=None) - assert new_block.block_id == block_id, i - - with pytest.raises(BlockAllocator.NoFreeBlocksError): - allocator.allocate_mutable_block(prev_block=None) - - block_to_free = new_block - - @staticmethod - @pytest.mark.parametrize("num_blocks", [1024]) - @pytest.mark.parametrize("block_size", [16]) - @pytest.mark.parametrize("seed", list(range(20))) - def test_get_num_free_blocks(num_blocks: int, block_size: int, seed: int): - random.seed(seed) - allocator = PrefixCachingBlockAllocator(num_blocks=num_blocks, - block_size=block_size) - num_blocks_to_consume = random.randint(1, num_blocks - 1) - - # Create token ids that will exhaust all blocks. - token_ids = list(range(num_blocks_to_consume * block_size)) - - chain = TestPrefixCachingBlockAllocator.create_immutable_chain( - block_size=block_size, - token_ids=token_ids, - allocator=allocator, - ) - - # Free each block in chain, assert num free blocks includes new free - # block. - for i, block in enumerate(chain): - assert allocator.get_num_free_blocks() == (num_blocks - - num_blocks_to_consume + - i) - allocator.free(block) - - @staticmethod - @pytest.mark.parametrize("num_blocks", [4]) - @pytest.mark.parametrize("block_size", [8]) - def test_prefix_caching_block_get_num_full_blocks_touched( - num_blocks, block_size): - """ Verify the allocator can correctly return the number of - blocks touched, when there are cached prefixes. - """ - allocator_src = PrefixCachingBlockAllocator(num_blocks=num_blocks, - block_size=block_size) - allocator_dst = PrefixCachingBlockAllocator(num_blocks=num_blocks, - block_size=block_size) - - # Create token ids that will exhaust all blocks except the last - token_ids = list(range((num_blocks - 1) * block_size)) - - # Create a chain of cacheable blocks in the dst - cached_blocks = TestPrefixCachingBlockAllocator.create_immutable_chain( - block_size=block_size, - token_ids=token_ids, - allocator=allocator_dst, - ) - - # Create a chain of the same blocks in the src - blocks_to_swap_in = \ - TestPrefixCachingBlockAllocator.create_immutable_chain( - block_size=block_size, - token_ids=token_ids, - allocator=allocator_src, - ) - # All blocks are cached - assert allocator_dst.get_num_full_blocks_touched( - blocks_to_swap_in) == 0 - - # Free the first block in the dst - allocator_dst.free(cached_blocks[0]) - - # Now the first block becomes dangling, the swapped blocks need - # to reclaim the first block in the dst - assert allocator_dst.get_num_full_blocks_touched( - blocks_to_swap_in) == 1 - - # Insert one non-full block in the src - non_full_block = allocator_src.allocate_mutable_block( - blocks_to_swap_in[-1]) - non_full_block.append_token_ids([0]) - blocks_to_swap_in.append(non_full_block) - assert allocator_dst.get_num_full_blocks_touched( - blocks_to_swap_in) == 1 - # Fill up the last mutable block and invoke get_num_blocks_touched. - # Note: The last block is not cached so it will be touched. - non_full_block.append_token_ids([0] * (block_size - 1)) - assert allocator_dst.get_num_full_blocks_touched( - blocks_to_swap_in) == 2 - - @staticmethod - @pytest.mark.parametrize("num_blocks", [1024]) - @pytest.mark.parametrize("block_size", [16]) - @pytest.mark.parametrize("seed", list(range(20))) - def test_get_num_free_blocks_shared(num_blocks: int, block_size: int, - seed: int): - """Verify sharing occurs by allocating two sequences that share prefixes - and incrementally freeing blocks. - """ - random.seed(seed) - allocator = PrefixCachingBlockAllocator(num_blocks=num_blocks, - block_size=block_size) - num_blocks_to_consume = random.randint(1, num_blocks - 1) - - # Create token ids that will exhaust all blocks. - token_ids = list(range(num_blocks_to_consume * block_size)) - - first_chain = TestPrefixCachingBlockAllocator.create_immutable_chain( - block_size=block_size, - token_ids=token_ids, - allocator=allocator, - ) - second_chain = TestPrefixCachingBlockAllocator.create_immutable_chain( - block_size=block_size, - token_ids=token_ids, - allocator=allocator, - ) - - # Free each block in the first chain. Since all blocks are shared, the - # free count should stay constant. - for i, block in enumerate(first_chain): - assert allocator.get_num_free_blocks() == (num_blocks - - num_blocks_to_consume) - allocator.free(block) - - # Free each block in the second chain. Since the refcount is now zero, - # the free count should increment with each free. - for i, block in enumerate(second_chain): - assert allocator.get_num_free_blocks() == (num_blocks - - num_blocks_to_consume + - i) - allocator.free(block) - - @staticmethod - @pytest.mark.parametrize("num_blocks", [1024]) - @pytest.mark.parametrize("block_size", [16]) - @pytest.mark.parametrize("seed", list(range(20))) - def test_get_common_computed_block_ids(num_blocks: int, block_size: int, - seed: int): - """Verify get_common_computed_block_ids could get correct result - by create two immutable chain sharing prefix at specified pos, - and compare whether we also could get right result - from get_common_computed_block_ids. - """ - random.seed(seed) - allocator = PrefixCachingBlockAllocator(num_blocks=num_blocks * 2, - block_size=block_size) - num_blocks_to_consume = random.randint(1, num_blocks - 1) - - # Create token ids that will exhaust all blocks. - token_ids = list(range(num_blocks_to_consume * block_size)) - - first_chain = TestPrefixCachingBlockAllocator.create_immutable_chain( - block_size=block_size, - token_ids=token_ids, - allocator=allocator, - ) - - # After zero_point, second_chain's token_ids would be set -1, which - # make it different from here comparing with first_chain - zero_point = random.randint(1, len(token_ids) - 1) - zero_point_blocks = zero_point // block_size - token_ids[zero_point:] = [-1] * (len(token_ids) - zero_point) - - second_chain = TestPrefixCachingBlockAllocator.create_immutable_chain( - block_size=block_size, - token_ids=token_ids, - allocator=allocator, - ) - - first_computed_ids = [ - first_chain[i].block_id for i in range(num_blocks_to_consume) - ] - second_computed_ids = [ - second_chain[i].block_id for i in range(num_blocks_to_consume) - ] - res = allocator.get_common_computed_block_ids( - [first_computed_ids, second_computed_ids]) - - assert (len(res) == zero_point_blocks) - - # Test case that assume those prompted block after first immutable would - # be freed into hashless allocator, while first immutable block get ref - # increased. - @staticmethod - @pytest.mark.parametrize("num_blocks", [3]) - @pytest.mark.parametrize("block_size", [16]) - @pytest.mark.parametrize("seed", list(range(10))) - def test_alloc_promotion(num_blocks: int, block_size: int, seed: int): - random.seed(seed) - - allocator = PrefixCachingBlockAllocator(num_blocks=num_blocks, - block_size=block_size) - token_ids = list(range(block_size)) - - block = allocator.allocate_immutable_block(prev_block=None, - token_ids=token_ids) - - assert allocator._refcounter.get(block.block_id) == 1 - m = allocator.allocate_mutable_block(prev_block=None) - - block_id = m.block_id - for i in range(block_size): - m.append_token_ids([i]) - - # After block get promoted to immutable from mutable, if there is - # already same content hash block, then it shall be released into - # hashless_allocator - # And first immutable block's ref get increased by 1 - assert m.block_id == block.block_id - assert block_id in allocator._hashless_allocator._free_block_indices - assert allocator._refcounter.get(block.block_id) == 2 - - # Test case when eviction and allocation are mixed, - # make sure they work as expected - @staticmethod - @pytest.mark.parametrize("num_blocks", [3]) - @pytest.mark.parametrize("block_size", [16]) - @pytest.mark.parametrize("seed", list(range(10))) - def test_eviction_alloc_mixed(num_blocks: int, block_size: int, seed: int): - random.seed(seed) - - all_blocks_list = [i for i in range(num_blocks)] - zero_ref = {i: 0 for i in range(num_blocks)} - one_ref = {i: 1 for i in range(num_blocks)} - allocator = PrefixCachingBlockAllocator(num_blocks=num_blocks, - block_size=block_size) - token_ids = list(range(num_blocks * block_size)) - - # Verify initial/pre-alloc state - - # Ensure all blocks are free inside hashless allocator - assert list(allocator._hashless_allocator._free_block_indices - ) == all_blocks_list - # Ensure no tracked blocks - assert len(allocator._block_tracker.keys()) == num_blocks - for block_id in range(num_blocks): - assert not allocator._block_tracker[block_id].active - # Ensure no cached blocks - assert len(allocator._cached_blocks.values()) == 0 - # Ensure no evicted blocks - assert len(allocator.evictor.free_table.keys()) == 0 - # Ensure 0s ref counts for all blocks - assert allocator._refcounter._refcounts == zero_ref - - # Allocate immutable chains with only one block residuled in - new_block = [] - for i in range(num_blocks): - block = allocator.allocate_immutable_block( - prev_block=None, - token_ids=token_ids[block_size * i:block_size * (i + 1)]) - new_block.append(block) - - # Verify post-alloc state - - # Ensure no blocks are free inside hashless allocator - assert (len(allocator._hashless_allocator._free_block_indices) == 0) - # Ensure all blocks are tracked - assert len(allocator._block_tracker.keys()) == num_blocks - for block_id in range(num_blocks): - assert allocator._block_tracker[block_id].active - # Ensure all blocks are cached (all promoted) - assert len(allocator._cached_blocks.values()) == num_blocks - # Ensure no evicted blocks - assert len(allocator.evictor.free_table.keys()) == 0 - # Ensure 1s ref counts for all blocks - assert allocator._refcounter._refcounts == one_ref - - # Free all blocks, and now all blocks shall be in the evictor - # there shall be no tracking data left in _block_tracker - # all blocks shall be tracked in _cached_blocks - # all blocks' ref shall be zero - for block in new_block: - allocator.free(block) - - # Verify post-free state - - # Ensure no tracked blocks - assert len(allocator._block_tracker.keys()) == num_blocks - for block_id in range(num_blocks): - assert not allocator._block_tracker[block_id].active - # Ensure no blocks in hashless allocator (all promoted) - assert len(allocator._hashless_allocator._free_block_indices) == 0 - # Ensure all blocks are cached - assert list(allocator._cached_blocks.values()) == all_blocks_list - # Ensure all blocks are inside the evictor - assert list(allocator.evictor.free_table.keys()) == all_blocks_list - # Ensure 0s refcounts - assert allocator._refcounter._refcounts == zero_ref - - # Allocate a mutable block, and the first block shall be evicted - # and set its content hash into None, ref to 1 - mutable = allocator.allocate_mutable_block(prev_block=None) - - assert mutable.block_id == 0 - assert mutable.content_hash is None - assert allocator._block_tracker[0].active - assert allocator._refcounter.get(0) == 1 - assert 0 not in allocator._cached_blocks - assert 0 not in allocator.evictor - - # Since this mutable block has no hash yet, it shall be released into - # hashless allocator - allocator.free(mutable) - - assert not allocator._block_tracker[0].active - assert allocator._refcounter._refcounts == zero_ref - assert 0 not in allocator._cached_blocks - assert 0 not in allocator.evictor - assert 0 in allocator._hashless_allocator._free_block_indices - - # When allocate immutable with first block_size tokens, we - # shall get free block from hashless allocator, thus no block left - # in hashless - block = allocator.allocate_immutable_block( - prev_block=None, token_ids=token_ids[:block_size]) - - assert block.block_id == 0 - assert len(allocator._hashless_allocator._free_block_indices) == 0 - assert allocator._block_tracker[0].active - assert 0 in allocator._cached_blocks.values() - assert allocator._refcounter.get(0) == 1 - assert 0 not in allocator.evictor - - # allocate mutable block again, it shall be popped from evictor - mutable = allocator.allocate_mutable_block(prev_block=None) - assert len(allocator._hashless_allocator._free_block_indices) == 0 - assert mutable.block_id not in allocator.evictor.free_table - assert allocator._refcounter.get(mutable.block_id) == 1 - - # Test case where two last accessed times are equal - @staticmethod - @pytest.mark.parametrize("num_blocks", [1024]) - @pytest.mark.parametrize("block_size", [16]) - @pytest.mark.parametrize("seed", list(range(20))) - def test_eviction_order(num_blocks: int, block_size: int, seed: int): - """This test case simulate the two chain created and free in order, - and together they would exhaust the initial freed blocks. - - So the next block created after those two chain shall use the block - from the first chain as that block has long access time. - While first chain has two blocks, it shall pick up the last one, as - it has larger token number. - """ - - random.seed(seed) - allocator = PrefixCachingBlockAllocator(num_blocks=num_blocks, - block_size=block_size) - num_blocks_to_consume = num_blocks + 1 - - token_ids = list(range(num_blocks_to_consume * block_size)) - - num_blocks_in_first_chain = 2 - num_tokens_in_first_chain = block_size * num_blocks_in_first_chain - # First chain takes the first block - first_chain = TestPrefixCachingBlockAllocator.create_immutable_chain( - block_size=block_size, - token_ids=token_ids[:num_tokens_in_first_chain], - allocator=allocator, - ) - # There should only be one block allocated at this point - assert allocator.get_num_free_blocks() == (num_blocks - - num_blocks_in_first_chain) - - # Set the last accessed time of the first block to 1 - blocks_ids = [block.block_id for block in first_chain] - allocator.mark_blocks_as_accessed(blocks_ids, 1) - - # Second chain takes the rest of the blocks - second_chain = TestPrefixCachingBlockAllocator.create_immutable_chain( - block_size=block_size, - token_ids=token_ids[num_tokens_in_first_chain:-block_size], - allocator=allocator, - ) - - # There shouldn't be any blocks left at this point - assert allocator.get_num_free_blocks() == (0) - - assert len(first_chain) == num_blocks_in_first_chain - last_block_id = first_chain[-1].block_id - # Free each block in the first chain. - for i, block in enumerate(first_chain): - allocator.free(block) - - # Set the last accessed time on all of the blocks in the second chain - # to 2 - blocks_ids = [block.block_id for block in second_chain] - allocator.mark_blocks_as_accessed(blocks_ids, 2) - - # Free each block in the second chain. - for i, block in enumerate(second_chain): - allocator.free(block) - - # Allocate a new block and check that it's the least recently used block - # from the first chain. - new_block = TestPrefixCachingBlockAllocator.create_immutable_chain( - block_size=block_size, - token_ids=token_ids[-block_size:], - allocator=allocator, - ) - - assert new_block[0].block_id == last_block_id - - # Test case for cache mertics - @staticmethod - def test_metric(): - block_size = 16 - allocator = PrefixCachingBlockAllocator(num_blocks=4, - block_size=block_size) - # Test when no query (0/0) - assert allocator.get_prefix_cache_hit_rate() == 0.0 - - token_ids = list(range(block_size)) - allocator.allocate_immutable_block(prev_block=None, - token_ids=token_ids) - # Test 0/1 hit rate - assert allocator.get_prefix_cache_hit_rate() == 0.0 - - allocator.allocate_immutable_block(prev_block=None, - token_ids=token_ids) - # Test 1/2 hit rate - assert allocator.get_prefix_cache_hit_rate() == 0.5 - - # Test more than one block - for _ in range(2, 1005): - allocator.allocate_immutable_block(prev_block=None, - token_ids=token_ids) - assert allocator.get_prefix_cache_hit_rate() > 0.99 - - # Test case for marking cache hit blocks as computed right after - # a batch of prefill sequences are scheduled. - @staticmethod - def test_touch_block(): - block_size = 16 - common_blocks = 4 - allocator = PrefixCachingBlockAllocator(num_blocks=8, - block_size=block_size) - - common_token_ids = list(range(block_size * common_blocks)) - - # Mimic the behavior of allocating the same block chain - # (i.e., common prefix) for a batch of 3 different prefill sequences. - for _ in range(3): - blocks = TestPrefixCachingBlockAllocator.create_immutable_chain( - block_size=block_size, - token_ids=common_token_ids, - allocator=allocator, - ) - block_hashes = [block.content_hash for block in blocks] - # The allocated blocks should be marked as touched - # but not computed. - computed_block_ids = allocator.find_cached_blocks_prefix( - block_hashes) - assert len(computed_block_ids) == 0 - - allocator.mark_blocks_as_computed([]) - computed_block_ids = allocator.find_cached_blocks_prefix( - block_hashes=block_hashes) - assert len(computed_block_ids) == common_blocks - - @staticmethod - def test_find_cached_blocks_prefix(): - """ - This test verifies the behavior of find_cached_blocks_prefix. - """ - block_size = 4 - num_blocks = 8 - total_test_blocks = 12 - allocator = PrefixCachingBlockAllocator(num_blocks=num_blocks, - block_size=block_size) - - token_ids = list(range(total_test_blocks * block_size)) - block_tokens_seq1 = token_ids[:num_blocks * block_size] - blocks_seq1 = TestPrefixCachingBlockAllocator.create_immutable_chain( - block_size=block_size, - token_ids=block_tokens_seq1, - allocator=allocator, - ) - block_hashes_seq1 = [block.content_hash for block in blocks_seq1] - allocator.mark_blocks_as_computed([]) - - # All blocks should be cached. - cached_blocks_seq1 = allocator.find_cached_blocks_prefix( - block_hashes=block_hashes_seq1) - assert len(cached_blocks_seq1) == num_blocks - - # Free the first sequence. - for block in blocks_seq1: - allocator.free(block) - - # All blocks should be still be cached if not required to be allocated. - cached_blocks = allocator.find_cached_blocks_prefix( - block_hashes=block_hashes_seq1) - assert len(cached_blocks) == num_blocks - - block_tokens_seq2 = token_ids[num_blocks * block_size:] - blocks_seq2 = TestPrefixCachingBlockAllocator.create_immutable_chain( - block_size=block_size, - token_ids=block_tokens_seq2, - allocator=allocator, - ) - block_hashes_seq2 = [block.content_hash for block in blocks_seq2] - allocator.mark_blocks_as_computed([]) - cached_blocks = allocator.find_cached_blocks_prefix( - block_hashes=block_hashes_seq2) - assert len(cached_blocks) == len(blocks_seq2) - - # Half of the blocks from seq1 should still be cached. - num_evicted_blocks = len(blocks_seq2) - cached_blocks = allocator.find_cached_blocks_prefix( - block_hashes=block_hashes_seq1) - assert len(cached_blocks) == len(blocks_seq1) - num_evicted_blocks - - # Test reset prefix cache - @staticmethod - @pytest.mark.parametrize("num_blocks", [10]) - @pytest.mark.parametrize("block_size", [16]) - def test_reset_prefix_cache(num_blocks: int, block_size: int): - """This test case simulates the case of resetting the prefix cache.""" - - allocator = PrefixCachingBlockAllocator(num_blocks=num_blocks, - block_size=block_size) - token_ids = list(range(3 * block_size)) - - first_chain = TestPrefixCachingBlockAllocator.create_immutable_chain( - block_size=block_size, - token_ids=token_ids, - allocator=allocator, - ) - second_chain = TestPrefixCachingBlockAllocator.create_immutable_chain( - block_size=block_size, - token_ids=token_ids, - allocator=allocator, - ) - - # Free each block in the first chain. - for block in first_chain: - allocator.free(block) - - # Failed to reset prefix cache because some blocks are not freed yet. - assert not allocator.reset_prefix_cache() - assert allocator.get_prefix_cache_hit_rate() > 0.0 - - # Free each block in the second chain. - for block in second_chain: - allocator.free(block) - - # Reset prefix cache. - assert allocator.reset_prefix_cache() - assert allocator.get_prefix_cache_hit_rate() == 0.0 - - @staticmethod - def create_immutable_chain( - block_size: int, - token_ids: list[int], - allocator: PrefixCachingBlockAllocator, - extra_hash: Optional[int] = None, - ) -> list[PrefixCachingBlock]: - """Helper method which creates a chain of blocks. - """ - blocks: list[Block] = [] - num_blocks = math.ceil(len(token_ids) / block_size) - - if num_blocks == 0: - return [] - - prev_block = None - for block_number in range(0, num_blocks): - block_token_ids = token_ids[block_number * - block_size:(block_number + 1) * - block_size] - prev_block = allocator.allocate_immutable_block( - prev_block=prev_block, - token_ids=block_token_ids, - extra_hash=extra_hash) - blocks.append(prev_block) - - return blocks - - -class TestComputedBlocksTracker: - - @staticmethod - def _get_mock_allocator(): - return MagicMock(spec=PrefixCachingBlockAllocator) - - @staticmethod - def test_get_num_cached_tokens(): - """ - Test it correctly computes the number of cached tokens for a given - sequence: - - - The cache token count is derived from the number of cached blocks. - - The cache token count is updated when the allocator is updated. - - When a sequence is removed, the cache token count should be updated - accordingly. - - # TODO(rickyx): This behaviour for prefill sequence is a hack until - we fix the computed blocks tracking. - - The cache token count for prefill sequence doesn't change while - the sequence is in continuous prefill (chunked prefill). - """ - block_size = 4 - mock_allocator = TestComputedBlocksTracker._get_mock_allocator() - tracker = ComputedBlocksTracker( - allocator=mock_allocator, - block_size=block_size, - enable_caching=True, - ) - - # Not yet allocated. - tokens = [0, 1, 2, 3, 4, 5] - seq1 = create_dummy_sequence(request_id=0, - token_ids=tokens, - block_size=block_size) - mock_allocator.find_cached_blocks_prefix.return_value = [] - assert tracker.get_num_cached_tokens(seq1) == 0 - - mock_allocator.find_cached_blocks_prefix.return_value = [ - None - ] # 1 block cached. - # Result is cached for prefill sequence. - assert tracker.get_num_cached_tokens(seq1) == 0 - - # Mark the sequence as non-prefill. - seq1.data.update_num_computed_tokens(len(tokens)) # 6 tokens computed. - assert not seq1.is_prefill() - - # Recomputes for decoding sequence. - assert tracker.get_num_cached_tokens(seq1) == 4 - - # Append new tokens to the sequence. - num_new_tokens = 3 - for i in range(num_new_tokens): - seq1.append_token_id(i, {i: Logprob(logprob=0.0)}) - - assert tracker.get_num_cached_tokens(seq1) == 4 - - # Update the allocator. - mock_allocator.find_cached_blocks_prefix.return_value = [ - None - ] * 2 # 2 blocks cached. - assert tracker.get_num_cached_tokens(seq1) == 8 - - # Remove the sequence. - tracker.remove_seq(seq1.seq_id) - - # Re-create the sequence with the same request id to simulate recompute. - seq1 = create_dummy_sequence(request_id=0, - token_ids=tokens, - block_size=block_size) - mock_allocator.find_cached_blocks_prefix.return_value = [ - ] # no cached block - assert tracker.get_num_cached_tokens(seq1) == 0 - - @staticmethod - def test_correct_block_hash(): - """ - Test that the block hash is correctly computed for a sequence (should - match the underlying block allocator's block hash). So the number of - cached tokens is correctly retrieved. - """ - block_size = 4 - allocator = CpuGpuBlockAllocator.create( - allocator_type="prefix_caching", - num_gpu_blocks=16, - num_cpu_blocks=16, - block_size=block_size, - ) - gpu_allocator = allocator._allocators[Device.GPU] - - tracker = ComputedBlocksTracker( - allocator=allocator, - block_size=block_size, - enable_caching=True, - ) - - tokens = list(range(block_size * 4)) # 4 blocks. - seq = create_dummy_sequence(request_id=0, - token_ids=tokens, - block_size=block_size) - _ = TestPrefixCachingBlockAllocator.create_immutable_chain( - block_size=block_size, - token_ids=tokens, - allocator=gpu_allocator, - ) - allocator.mark_blocks_as_computed([]) - - assert tracker.get_num_cached_tokens(seq) == len(tokens) - - @staticmethod - def test_correct_extra_hash(): - """ - Test that the block hash is correctly computed based on the extra hash, - ensuring it matches the allocator's block hash, specifically for the - LoRA case, and that the correct number of cached tokens is retrieved. - """ - block_size = 4 - allocator = CpuGpuBlockAllocator.create( - allocator_type="prefix_caching", - num_gpu_blocks=16, - num_cpu_blocks=16, - block_size=block_size, - ) - gpu_allocator = allocator._allocators[Device.GPU] - - tracker = ComputedBlocksTracker( - allocator=allocator, - block_size=block_size, - enable_caching=True, - ) - - tokens = list(range(block_size * 4)) - - # Create a dummy LoRA sequence with a specific LoRA ID. - lora_seq = create_dummy_lora_sequence(request_id=0, - token_ids=tokens, - block_size=block_size, - lora_int_id=1) - - _ = TestPrefixCachingBlockAllocator.create_immutable_chain( - block_size=block_size, - token_ids=tokens, - allocator=gpu_allocator, - extra_hash=lora_seq.extra_hash(), - ) - - allocator.mark_blocks_as_computed([]) - - # Create different dummy sequences that have the same token IDs - # but different LoRA IDs. - seq = create_dummy_sequence(request_id=1, - token_ids=tokens, - block_size=block_size) - - different_lora_seq = create_dummy_lora_sequence(request_id=2, - token_ids=tokens, - block_size=block_size, - lora_int_id=2) - - # Due to the different LoRA IDs, corresponding blocks are not cached. - assert tracker.get_num_cached_tokens(seq) == 0 - assert tracker.get_num_cached_tokens(different_lora_seq) == 0 - - # The number of cached tokens matches the length of the tokens - # for the cached LoRA sequence. - assert tracker.get_num_cached_tokens(lora_seq) == len(tokens) diff --git a/tests/core/conftest.py b/tests/core/conftest.py deleted file mode 100644 index 375b248ebeda..000000000000 --- a/tests/core/conftest.py +++ /dev/null @@ -1,12 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project -import pytest - - -@pytest.fixture(scope="function", autouse=True) -def use_v0_only(monkeypatch): - """ - Since this module is V0 only, set VLLM_USE_V1=0 for - all tests in the module. - """ - monkeypatch.setenv('VLLM_USE_V1', '0') diff --git a/tests/core/test_chunked_prefill_scheduler.py b/tests/core/test_chunked_prefill_scheduler.py deleted file mode 100644 index ce1fe189b3ca..000000000000 --- a/tests/core/test_chunked_prefill_scheduler.py +++ /dev/null @@ -1,858 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project - -from unittest.mock import MagicMock - -import pytest # noqa - -from vllm.config import CacheConfig, SchedulerConfig -from vllm.core.scheduler import Scheduler -from vllm.engine.arg_utils import EngineArgs -from vllm.engine.llm_engine import LLMEngine -from vllm.sampling_params import SamplingParams -from vllm.sequence import Logprob, SequenceGroup - -from .utils import create_dummy_prompt - - -def get_sequence_groups(scheduler_output): - return [s.seq_group for s in scheduler_output.scheduled_seq_groups] - - -def append_new_token(seq_group: SequenceGroup, token_id: int): - for seq in seq_group.get_seqs(): - seq.append_token_id(token_id, {token_id: Logprob(token_id)}) - - -def schedule_and_update_computed_tokens(scheduler): - metas, out, _ = scheduler.schedule() - for s, meta in zip(out.scheduled_seq_groups, metas): - s.seq_group.update_num_computed_tokens(meta.token_chunk_size) - return metas, out - - -def test_simple(): - """Verify basic scheduling works.""" - block_size = 4 - num_seq_group = 4 - max_model_len = 16 - max_num_batched_tokens = 64 - scheduler_config = SchedulerConfig("generate", - max_num_batched_tokens, - num_seq_group, - max_model_len, - enable_chunked_prefill=True) - cache_config = CacheConfig(block_size, 1.0, 1, "auto") - cache_config.num_cpu_blocks = 8 - cache_config.num_gpu_blocks = 8 - scheduler = Scheduler(scheduler_config, cache_config, None) - running: list[SequenceGroup] = [] - - # Add seq groups to scheduler. - for i in range(num_seq_group): - _, seq_group = create_dummy_prompt(str(i), - prompt_length=block_size, - block_size=block_size) - scheduler.add_seq_group(seq_group) - running.append(seq_group) - - # Schedule seq groups prompts. - num_tokens = block_size * num_seq_group - seq_group_meta, out = schedule_and_update_computed_tokens(scheduler) - assert set(get_sequence_groups(out)) == set(running) - assert out.num_batched_tokens == num_tokens - assert (not out.blocks_to_copy and not out.blocks_to_swap_in - and not out.blocks_to_swap_out) - assert len(seq_group_meta) == num_seq_group - for s in running: - append_new_token(s, 1) - - # Schedule seq groups generation. - seq_group_meta, out = schedule_and_update_computed_tokens(scheduler) - assert set(get_sequence_groups(out)) == set(running) - assert out.num_batched_tokens == num_seq_group - assert (not out.blocks_to_copy and not out.blocks_to_swap_in - and not out.blocks_to_swap_out) - assert len(seq_group_meta) == num_seq_group - - -def test_chunk(): - """Verify prefills are chunked properly.""" - block_size = 4 - max_seqs = 60 - max_model_len = 80 - max_num_batched_tokens = 64 - scheduler_config = SchedulerConfig( - "generate", - max_num_batched_tokens, - max_seqs, - max_model_len, - enable_chunked_prefill=True, - ) - cache_config = CacheConfig(block_size, 1.0, 1, "auto") - cache_config.num_cpu_blocks = 32 - cache_config.num_gpu_blocks = 32 - scheduler = Scheduler(scheduler_config, cache_config, None) - running: list[SequenceGroup] = [] - - # Add seq groups to scheduler. - for i in range(2): - _, seq_group = create_dummy_prompt(str(i), - prompt_length=60, - block_size=block_size) - scheduler.add_seq_group(seq_group) - running.append(seq_group) - - # Verify the second request is chunked. - seq_group_meta, out = schedule_and_update_computed_tokens(scheduler) - print() - assert set(get_sequence_groups(out)) == set(running) - assert seq_group_meta[0].token_chunk_size == 60 - # Verify it is chunked. - assert seq_group_meta[1].token_chunk_size == 4 - assert out.num_prefill_groups == 2 - assert out.num_batched_tokens == 64 - # Only the first seq group has a new token appended. - append_new_token(running[0], 1) - - # One chunked prefill, and one decoding. - seq_group_meta, out = schedule_and_update_computed_tokens(scheduler) - assert set(get_sequence_groups(out)) == set(running) - # The first one is prefill. Scheduler guarantees ordering. - assert seq_group_meta[0].token_chunk_size == 56 - # The second one is a chunked prefill. - assert seq_group_meta[1].token_chunk_size == 1 - assert out.num_prefill_groups == 1 - assert out.num_batched_tokens == 57 - - -def test_concurrent_chunking(): - """Verify prefills are chunked properly when - --max-num-partial-prefills is > 1""" - block_size = 4 - max_seqs = 60 - max_model_len = 2000 - max_num_batched_tokens = 64 - scheduler_config = SchedulerConfig( - "generate", - max_num_batched_tokens, - max_seqs, - max_model_len, - enable_chunked_prefill=True, - max_num_partial_prefills=2, # Up to 2 partial prefills at a time - ) - cache_config = CacheConfig(block_size, 1.0, 1, "auto") - cache_config.num_cpu_blocks = 32 - cache_config.num_gpu_blocks = 32 - scheduler = Scheduler(scheduler_config, cache_config, None) - running: list[SequenceGroup] = [] - - # Add seq groups to scheduler. - for i in range(2): - _, seq_group = create_dummy_prompt(str(i), - prompt_length=60, - block_size=block_size) - scheduler.add_seq_group(seq_group) - running.append(seq_group) - - # Verify both requests are chunked with half of max_num_batched_tokens each - seq_group_meta, out = schedule_and_update_computed_tokens(scheduler) - assert set(get_sequence_groups(out)) == set(running) - assert seq_group_meta[0].token_chunk_size == 32 - assert seq_group_meta[1].token_chunk_size == 32 - assert out.num_prefill_groups == 2 - assert out.num_batched_tokens == 64 - - # After one iteration, both should have 60 - 32 = 28 tokens left to prefill - seq_group_meta, out = schedule_and_update_computed_tokens(scheduler) - assert set(get_sequence_groups(out)) == set(running) - assert seq_group_meta[0].token_chunk_size == 28 - assert seq_group_meta[1].token_chunk_size == 28 - assert out.num_prefill_groups == 2 - assert out.num_batched_tokens == 56 - - -def test_concurrent_chunking_large_requests(): - """Verify large prefill requests are run one at a time""" - block_size = 4 - max_seqs = 60 - max_model_len = 2000 - max_num_batched_tokens = 64 - scheduler_config = SchedulerConfig( - "generate", - max_num_batched_tokens, - max_seqs, - max_model_len, - enable_chunked_prefill=True, - max_num_partial_prefills=2, # Up to 2 partial prefills at a time - ) - cache_config = CacheConfig(block_size, 1.0, 1, "auto") - cache_config.num_cpu_blocks = 3200 # large KV cache size for large requests - cache_config.num_gpu_blocks = 3200 - scheduler = Scheduler(scheduler_config, cache_config, None) - - # Add seq groups to scheduler. - for i in range(2): - _, seq_group = create_dummy_prompt( - str(i), - prompt_length=1200, # Very large prompt - block_size=block_size) - scheduler.add_seq_group(seq_group) - - # Verify only a single request is chunked, and it gets all 64 tokens - seq_group_meta, out = schedule_and_update_computed_tokens(scheduler) - assert len(get_sequence_groups(out)) == 1 - assert seq_group_meta[0].token_chunk_size == 64 - assert out.num_prefill_groups == 1 - assert out.num_batched_tokens == 64 - - -def test_short_prompts_jump_long_prompts_in_queue(): - """Verify large prefill requests are punted behind smaller ones if - another large prefill request is already running""" - block_size = 4 - max_seqs = 60 - max_model_len = 2000 - max_num_batched_tokens = 64 - scheduler_config = SchedulerConfig( - "generate", - max_num_batched_tokens, - max_seqs, - max_model_len, - enable_chunked_prefill=True, - max_num_partial_prefills=2, # Up to 2 partial prefills at a time - ) - cache_config = CacheConfig(block_size, 1.0, 1, "auto") - cache_config.num_cpu_blocks = 3200 # large KV cache size for large requests - cache_config.num_gpu_blocks = 3200 - scheduler = Scheduler(scheduler_config, cache_config, None) - long_seqs: list[SequenceGroup] = [] - short_seqs: list[SequenceGroup] = [] - - # Add 2 large seq groups to scheduler. - for i in range(2): - _, seq_group = create_dummy_prompt( - str(i), - prompt_length=1200, # Very large prompt - block_size=block_size) - scheduler.add_seq_group(seq_group) - long_seqs.append(seq_group) - assert seq_group.is_prefill() - - # Add 2 small seq groups behind them - for i in range(2): - _, seq_group = create_dummy_prompt( - str(i + 2), - prompt_length=40, # Very small prompt - block_size=block_size) - scheduler.add_seq_group(seq_group) - short_seqs.append(seq_group) - assert seq_group.is_prefill() - - # Verify one large req and 1 small req chunked - seq_group_meta, out = schedule_and_update_computed_tokens(scheduler) - assert seq_group_meta[0].token_chunk_size == 32 # large req gets 32 tokens - assert seq_group_meta[1].token_chunk_size == 32 # small req gets 32 tokens - - # all 4 are prefilling - assert long_seqs[0].is_prefill() - assert long_seqs[1].is_prefill() - assert short_seqs[0].is_prefill() - assert short_seqs[1].is_prefill() - # First short and first long sequences have been scheduled - assert long_seqs[0].first_seq.get_num_computed_tokens() == 32 - assert long_seqs[1].first_seq.get_num_computed_tokens() == 0 - assert short_seqs[0].first_seq.get_num_computed_tokens() == 32 - assert short_seqs[1].first_seq.get_num_computed_tokens() == 0 - - assert out.num_prefill_groups == 2 - assert out.num_batched_tokens == 64 - - # in the second iteration, - # the first small request had only 8 tokens left - # so it went to decode - # The other small req is scheduled - seq_group_meta, out = schedule_and_update_computed_tokens(scheduler) - # the new small req got 64 - (32+8) tokens - assert seq_group_meta[0].token_chunk_size == 24 - assert seq_group_meta[1].token_chunk_size == 32 # large req still got 32 - # the other small request had only 8 tokens left - assert seq_group_meta[2].token_chunk_size == 8 # 40-32 - - # The first small request got to decode now - assert long_seqs[0].is_prefill() - assert long_seqs[1].is_prefill() - assert not short_seqs[0].is_prefill() - assert short_seqs[1].is_prefill() - # Both small requests have started in front of the second long request - assert long_seqs[0].first_seq.get_num_computed_tokens() == 64 - assert long_seqs[1].first_seq.get_num_computed_tokens() == 0 - assert short_seqs[0].first_seq.get_num_computed_tokens() == 40 - assert short_seqs[1].first_seq.get_num_computed_tokens() == 24 - - assert out.num_prefill_groups == 3 - assert out.num_batched_tokens == 64 - # the first small seq group has a new token appended. - append_new_token(short_seqs[0], 1) - - # in the third iteration, - # the first small request is already decoding - # the second small request only has 16 tokens left and will enter decoding - seq_group_meta, out = schedule_and_update_computed_tokens(scheduler) - assert seq_group_meta[0].token_chunk_size == 32 # large still got 32 - # small req finished prefilling 40-24=16 tokens - assert seq_group_meta[1].token_chunk_size == 16 - assert seq_group_meta[2].token_chunk_size == 1 # decode - assert out.num_prefill_groups == 2 - assert out.num_batched_tokens == 49 # (32+16+1 decode) - - # both small requests have now reached decode - assert long_seqs[0].is_prefill() - assert long_seqs[1].is_prefill() - assert not short_seqs[0].is_prefill() - assert not short_seqs[1].is_prefill() - assert long_seqs[0].first_seq.get_num_computed_tokens() == 96 - assert long_seqs[1].first_seq.get_num_computed_tokens() == 0 - assert short_seqs[0].first_seq.get_num_computed_tokens() == 41 - assert short_seqs[1].first_seq.get_num_computed_tokens() == 40 - - # both the small seq groups have a new token appended - append_new_token(short_seqs[0], 1) - append_new_token(short_seqs[1], 1) - - # in the fourth iteration, both small requests are decoding - # so large request gets all the budget - seq_group_meta, out = schedule_and_update_computed_tokens(scheduler) - - # large req gets 62 tokens (minus 2 for decode) - assert seq_group_meta[0].token_chunk_size == 62 - assert seq_group_meta[1].token_chunk_size == 1 # decode - assert seq_group_meta[2].token_chunk_size == 1 # decode - assert out.num_prefill_groups == 1 - assert out.num_batched_tokens == 64 - - assert long_seqs[0].first_seq.get_num_computed_tokens() == 158 - - # assert long_seqs[0].is_prefill() - # assert long_seqs[1].is_prefill() - # assert not short_seqs[0].is_prefill() - # assert not short_seqs[1].is_prefill() - - # # both the small seq groups have a new token appended - # append_new_token(short_seqs[0], 1) - # append_new_token(short_seqs[1], 1) - - # # in the fifth iteration, large request gets all the budget - # # while both small requests are decoding - # seq_group_meta, out = schedule_and_update_computed_tokens(scheduler) - # assert seq_group_meta[0].token_chunk_size == 62 - # assert seq_group_meta[1].token_chunk_size == 1 # decode - # assert seq_group_meta[2].token_chunk_size == 1 # decode - # assert out.num_prefill_groups == 1 - # assert out.num_batched_tokens == 64 - - -def test_complex(): - block_size = 4 - max_seqs = 60 - max_model_len = 80 - max_num_batched_tokens = 64 - scheduler_config = SchedulerConfig( - "generate", - max_num_batched_tokens, - max_seqs, - max_model_len, - enable_chunked_prefill=True, - ) - cache_config = CacheConfig(block_size, 1.0, 1, "auto") - cache_config.num_cpu_blocks = 64 - cache_config.num_gpu_blocks = 64 - scheduler = Scheduler(scheduler_config, cache_config, None) - running: list[SequenceGroup] = [] - - # Add seq groups to scheduler. - for i in range(2): - _, seq_group = create_dummy_prompt(str(i), - prompt_length=60, - block_size=block_size) - scheduler.add_seq_group(seq_group) - running.append(seq_group) - assert seq_group.is_prefill() - - # Verify the second request is chunked. - seq_group_meta, out = schedule_and_update_computed_tokens(scheduler) - - assert set(get_sequence_groups(out)) == set(running) - assert seq_group_meta[0].token_chunk_size == 60 - # Verify it is chunked. - assert seq_group_meta[1].token_chunk_size == 4 - assert not running[0].is_prefill() - assert running[1].is_prefill() - assert out.num_prefill_groups == 2 - assert out.num_batched_tokens == 64 - # Only the first seq group has a new token appended. - append_new_token(running[0], 1) - - # Add 2 more requests. - for i in range(2, 4): - _, seq_group = create_dummy_prompt(str(i), - prompt_length=60, - block_size=block_size) - scheduler.add_seq_group(seq_group) - running.append(seq_group) - - # Decoding & chunked prefill & first chunk of 3rd request is scheduled. - seq_group_meta, out = schedule_and_update_computed_tokens(scheduler) - assert len(get_sequence_groups(out)) == 3 - # The first one is the first chunked prefill. - assert seq_group_meta[0].token_chunk_size == 7 - # The second one is the second new chunked prefill. - assert seq_group_meta[1].token_chunk_size == 56 - # The last one is decode. - assert seq_group_meta[2].token_chunk_size == 1 - # Two of them are in chunked prefill. - assert out.num_prefill_groups == 2 - assert out.num_batched_tokens == 64 - # The first 2 requests are now in decodine phase. - append_new_token(running[0], 1) - assert not running[0].is_prefill() - append_new_token(running[1], 1) - assert not running[1].is_prefill() - # The third request is still in prefill stage. - assert running[2].is_prefill() - - -def test_maximal_decoding(): - """Verify decoding requests are prioritized.""" - block_size = 4 - max_seqs = 2 - max_model_len = 8 - max_num_batched_tokens = 2 - scheduler_config = SchedulerConfig( - "generate", - max_num_batched_tokens, - max_seqs, - max_model_len, - enable_chunked_prefill=True, - ) - cache_config = CacheConfig(block_size, 1.0, 1, "auto") - cache_config.num_cpu_blocks = 8 - cache_config.num_gpu_blocks = 8 - scheduler = Scheduler(scheduler_config, cache_config, None) - running: list[SequenceGroup] = [] - - # Add seq groups to scheduler. - for i in range(2): - _, seq_group = create_dummy_prompt(str(i), - prompt_length=2, - block_size=block_size) - scheduler.add_seq_group(seq_group) - running.append(seq_group) - assert seq_group.is_prefill() - - # The first prefill is scheduled. - seq_group_meta, out = schedule_and_update_computed_tokens(scheduler) - assert len(get_sequence_groups(out)) == 1 - assert seq_group_meta[0].token_chunk_size == 2 - assert not running[0].is_prefill() - assert running[1].is_prefill() - assert out.num_prefill_groups == 1 - assert out.num_batched_tokens == 2 - # Only the first seq group has a new token appended. - append_new_token(running[0], 1) - - # Create one more seq_group. - _, seq_group = create_dummy_prompt("3", - prompt_length=2, - block_size=block_size) - scheduler.add_seq_group(seq_group) - running.append(seq_group) - assert seq_group.is_prefill() - # The first decoding + second chunk is scheduled. - seq_group_meta, out = schedule_and_update_computed_tokens(scheduler) - assert len(get_sequence_groups(out)) == 2 - assert seq_group_meta[0].token_chunk_size == 1 - assert seq_group_meta[1].token_chunk_size == 1 - assert not running[0].is_prefill() - assert running[1].is_prefill() - assert running[2].is_prefill() - assert out.num_prefill_groups == 1 - assert out.num_batched_tokens == 2 - append_new_token(running[0], 1) - - # Decoding + running prefill is prioritized. - seq_group_meta, out = schedule_and_update_computed_tokens(scheduler) - assert len(get_sequence_groups(out)) == 2 - assert seq_group_meta[0].token_chunk_size == 1 - assert seq_group_meta[1].token_chunk_size == 1 - assert not running[0].is_prefill() - assert not running[1].is_prefill() - assert out.num_prefill_groups == 1 - assert out.num_batched_tokens == 2 - append_new_token(running[0], 1) - append_new_token(running[1], 1) - - # Only decoding is prioritized. - seq_group_meta, out = schedule_and_update_computed_tokens(scheduler) - assert len(get_sequence_groups(out)) == 2 - assert seq_group_meta[0].token_chunk_size == 1 - assert seq_group_meta[1].token_chunk_size == 1 - assert not running[0].is_prefill() - assert not running[1].is_prefill() - assert out.num_prefill_groups == 0 - assert out.num_batched_tokens == 2 - append_new_token(running[0], 1) - append_new_token(running[1], 1) - - # After aborting the decoding request, the fcfs new prefill is prioritized. - scheduler.abort_seq_group(running[0].request_id) - seq_group_meta, out = schedule_and_update_computed_tokens(scheduler) - assert len(get_sequence_groups(out)) == 2 - assert seq_group_meta[0].token_chunk_size == 1 - assert seq_group_meta[1].token_chunk_size == 1 - assert not running[1].is_prefill() - assert running[2].is_prefill() - assert out.num_prefill_groups == 1 - assert out.num_batched_tokens == 2 - - -def test_prompt_limit(): - """Verify max_num_batched_tokens < max_model_len is possible.""" - block_size = 4 - max_seqs = 32 - max_model_len = 64 - max_num_batched_tokens = 32 - scheduler_config = SchedulerConfig( - "generate", - max_num_batched_tokens, - max_seqs, - max_model_len, - enable_chunked_prefill=True, - ) - cache_config = CacheConfig(block_size, 1.0, 1, "auto") - cache_config.num_cpu_blocks = 16 - cache_config.num_gpu_blocks = 16 - scheduler = Scheduler(scheduler_config, cache_config, None) - running: list[SequenceGroup] = [] - - _, seq_group = create_dummy_prompt("1", - prompt_length=48, - block_size=block_size) - scheduler.add_seq_group(seq_group) - running.append(seq_group) - assert seq_group.is_prefill() - - # The prompt length > max_num_batched_tokens should be still scheduled. - seq_group_meta, out = schedule_and_update_computed_tokens(scheduler) - assert len(get_sequence_groups(out)) == 1 - assert seq_group_meta[0].token_chunk_size == 32 - assert running[0].is_prefill() - assert out.num_prefill_groups == 1 - assert out.num_batched_tokens == 32 - - -def test_prompt_limit_exceed(): - block_size = 4 - max_seqs = 64 - max_model_len = 32 - max_num_batched_tokens = 64 - scheduler_config = SchedulerConfig("generate", - max_num_batched_tokens, - max_seqs, - max_model_len, - enable_chunked_prefill=True) - cache_config = CacheConfig(block_size, 1.0, 1, "auto") - cache_config.num_cpu_blocks = 16 - cache_config.num_gpu_blocks = 16 - scheduler = Scheduler(scheduler_config, cache_config, None) - running: list[SequenceGroup] = [] - _, seq_group = create_dummy_prompt("2", - prompt_length=48, - block_size=block_size) - scheduler.add_seq_group(seq_group) - running.append(seq_group) - assert seq_group.is_prefill() - seq_group_meta, out = schedule_and_update_computed_tokens(scheduler) - assert len(out.ignored_seq_groups) == 1 - assert out.ignored_seq_groups[0] == seq_group - - -def test_chunked_prefill_preempt(): - """Verify preempt works with chunked prefill requests""" - block_size = 4 - max_seqs = 30 - max_model_len = 200 - max_num_batched_tokens = 30 - scheduler_config = SchedulerConfig( - "generate", - max_num_batched_tokens, - max_seqs, - max_model_len, - enable_chunked_prefill=True, - ) - cache_config = CacheConfig(block_size, 1.0, 1, "auto") - cache_config.num_cpu_blocks = 16 - cache_config.num_gpu_blocks = 16 - scheduler = Scheduler(scheduler_config, cache_config, None) - - _, seq_group = create_dummy_prompt("1", - prompt_length=60, - block_size=block_size) - scheduler.add_seq_group(seq_group) - _, out = schedule_and_update_computed_tokens(scheduler) - # The request is chunked. - # prefill scheduled now. - assert len(out.scheduled_seq_groups) == 1 - assert out.num_prefill_groups == 1 - assert seq_group.is_prefill() - assert out.num_batched_tokens == max_num_batched_tokens - - # The request should be preempted. - scheduler.block_manager.can_append_slots = MagicMock() - - def cannot_append_second_group1(seq_group, num_lookahead_slots): - return seq_group.request_id != "1" - - scheduler.block_manager.can_append_slots.side_effect = ( - cannot_append_second_group1) - - # The running prefill is now preempted. - _, out = schedule_and_update_computed_tokens(scheduler) - assert len(out.scheduled_seq_groups) == 0 - assert out.num_batched_tokens == 0 - assert out.blocks_to_swap_out == [] - assert out.blocks_to_swap_in == [] - - # Make sure we can reschedule preempted request. - _, out = schedule_and_update_computed_tokens(scheduler) - assert len(out.scheduled_seq_groups) == 1 - assert out.num_prefill_groups == 1 - assert seq_group.is_prefill() - assert out.num_batched_tokens == max_num_batched_tokens - assert seq_group.get_num_uncomputed_tokens() == 30 - - # We should be able to run prefill twice as it is chunked. - def cannot_append_second_group2(seq_group, num_lookahead_slots): - return True - - scheduler.block_manager.can_append_slots.side_effect = ( - cannot_append_second_group2) - _, out = schedule_and_update_computed_tokens(scheduler) - assert len(out.scheduled_seq_groups) == 1 - assert out.num_prefill_groups == 1 - assert not seq_group.is_prefill() - assert out.num_batched_tokens == max_num_batched_tokens - - -def test_chunked_prefill_spec_prefill(): - """Verify that the num_lookahead_slots is set appropriately for an all""" - """prefill batch.""" - block_size = 4 - max_seqs = 30 - max_model_len = 200 - max_num_batched_tokens = 30 - num_lookahead_slots = 4 - scheduler_config = SchedulerConfig( - "generate", - max_num_batched_tokens, - max_seqs, - max_model_len, - enable_chunked_prefill=True, - num_lookahead_slots=num_lookahead_slots, - ) - cache_config = CacheConfig(block_size, 1.0, 1, "auto") - cache_config.num_cpu_blocks = 16 - cache_config.num_gpu_blocks = 16 - scheduler = Scheduler(scheduler_config, cache_config, None) - - _, seq_group = create_dummy_prompt("1", - prompt_length=30, - block_size=block_size) - scheduler.add_seq_group(seq_group) - _, out = schedule_and_update_computed_tokens(scheduler) - # The request is chunked. - # prefill scheduled now. - assert len(out.scheduled_seq_groups) == 1 - assert out.num_prefill_groups == 1 - assert out.num_batched_tokens == max_num_batched_tokens - print(out.num_lookahead_slots) - assert out.num_lookahead_slots == 0 - - -def test_chunked_prefill_max_seqs(): - block_size = 4 - max_seqs = 2 - max_model_len = 80 - max_num_batched_tokens = 64 - scheduler_config = SchedulerConfig( - "generate", - max_num_batched_tokens, - max_seqs, - max_model_len, - enable_chunked_prefill=True, - ) - cache_config = CacheConfig(block_size, 1.0, 1, "auto") - cache_config.num_cpu_blocks = 128 - cache_config.num_gpu_blocks = 128 - scheduler = Scheduler(scheduler_config, cache_config, None) - running: list[SequenceGroup] = [] - - _, seq_group = create_dummy_prompt("1", - prompt_length=65, - block_size=block_size) - scheduler.add_seq_group(seq_group) - running.append(seq_group) - # The first prefill is chunked. - seq_group_meta, out = schedule_and_update_computed_tokens(scheduler) - assert seq_group_meta[0].token_chunk_size == max_num_batched_tokens - assert len(get_sequence_groups(out)) == 1 - - # Add new requests. - for i in range(4): - _, seq_group = create_dummy_prompt(str(i), - prompt_length=65, - block_size=block_size) - scheduler.add_seq_group(seq_group) - running.append(seq_group) - - # Make sure only 2 requests are scheduled. - seq_group_meta, out = schedule_and_update_computed_tokens(scheduler) - assert out.num_batched_tokens == max_num_batched_tokens - assert len(get_sequence_groups(out)) == 2 - assert not running[0].is_prefill() - assert running[1].is_prefill() - append_new_token(running[0], 1) - - # Although we have enough token budget, we can only schedule max_seqs. - seq_group_meta, out = schedule_and_update_computed_tokens(scheduler) - assert seq_group_meta[0].token_chunk_size == 2 - assert seq_group_meta[1].token_chunk_size == 1 - assert out.num_batched_tokens == 3 - assert len(get_sequence_groups(out)) == max_seqs - assert not running[0].is_prefill() - assert not running[1].is_prefill() - - -def test_prefix_caching(): - """Verify allocating full blocks when prefix caching is enabled.""" - block_size = 4 - max_seqs = 10 - max_model_len = 80 - max_num_batched_tokens = 64 - scheduler_config = SchedulerConfig( - "generate", - max_num_batched_tokens, - max_seqs, - max_model_len, - enable_chunked_prefill=True, - ) - cache_config = CacheConfig(block_size, - 1.0, - 1, - "auto", - enable_prefix_caching=True) - cache_config.num_cpu_blocks = 0 - cache_config.num_gpu_blocks = 32 - scheduler = Scheduler(scheduler_config, cache_config, None) - running: list[SequenceGroup] = [] - - # Add seq groups to scheduler. - for i in range(2): - _, seq_group = create_dummy_prompt(str(i), - block_size=block_size, - prompt_length=50) - scheduler.add_seq_group(seq_group) - running.append(seq_group) - - seq_group_meta, out = schedule_and_update_computed_tokens(scheduler) - assert set(get_sequence_groups(out)) == set(running) - assert seq_group_meta[0].token_chunk_size == 50 - # Verify it is chunked. Note that although the budget is 64-50=14, - # we only allocate full blocks for prefix caching, so only 4*(14//4)=12 - # tokens are allocated. - assert seq_group_meta[1].token_chunk_size == 12 - assert out.num_prefill_groups == 2 - assert out.num_batched_tokens == 62 - - -def test_prefix_caching_with_concurrent_partial_prefills(): - """Verify allocating full blocks when prefix caching is enabled with - --max-num-partial-prefills > 1.""" - block_size = 4 - max_seqs = 10 - max_model_len = 8000 - max_num_batched_tokens = 60 # With two slots, each slot will get 30 tokens - scheduler_config = SchedulerConfig("generate", - max_num_batched_tokens, - max_seqs, - max_model_len, - enable_chunked_prefill=True, - max_num_partial_prefills=2) - cache_config = CacheConfig(block_size, - 1.0, - 1, - "auto", - enable_prefix_caching=True) - cache_config.num_cpu_blocks = 0 - cache_config.num_gpu_blocks = 32 - scheduler = Scheduler(scheduler_config, cache_config, None) - running: list[SequenceGroup] = [] - - # Add seq groups to scheduler. - for i in range(2): - _, seq_group = create_dummy_prompt(str(i), - block_size=block_size, - prompt_length=50) - scheduler.add_seq_group(seq_group) - running.append(seq_group) - - seq_group_meta, out = schedule_and_update_computed_tokens(scheduler) - assert set(get_sequence_groups(out)) == set(running) - # To partially prefill both sequences, both can chunk up to 30 tokens - # But the next lowest multiple of the block size (4) is 28 - assert seq_group_meta[0].token_chunk_size == 28 - assert seq_group_meta[1].token_chunk_size == 28 - assert out.num_prefill_groups == 2 - assert out.num_batched_tokens == 56 - - # On the next iteration, both sequences should finish prefill - seq_group_meta, out = schedule_and_update_computed_tokens(scheduler) - assert set(get_sequence_groups(out)) == set(running) - # Both sequences have 50 - 28 = 22 tokens left to prefill. - # This is not a multiple of the block size, but we don't care since we don't - # cache the final partial block of prefix sequences - assert seq_group_meta[0].token_chunk_size == 22 - assert seq_group_meta[1].token_chunk_size == 22 - assert out.num_prefill_groups == 2 - assert out.num_batched_tokens == 44 - - -@pytest.mark.parametrize("model", ["facebook/opt-125m"]) -@pytest.mark.parametrize("max_num_partial_prefills", [2, 4, 8]) -def test_chunked_prefill_with_actual_engine(model: str, - max_num_partial_prefills: int): - """Make sure the model can actually sample with concurrent - partial prefills - """ - - prompt = "hello" * 40 - - engine_args = EngineArgs( - model=model, - max_num_partial_prefills=max_num_partial_prefills, - max_num_batched_tokens=40, - max_num_seqs=8, - enable_chunked_prefill=True, - gpu_memory_utilization=0.8, - ) - - engine = LLMEngine.from_engine_args(engine_args) - sampling_params = SamplingParams(temperature=0) - - for req_num in range(max_num_partial_prefills): - engine.add_request(f"{req_num}", prompt, sampling_params) - # first step - request_outputs = engine.step() - # means all are prefilling - assert len(request_outputs) == 0 - assert len(engine.scheduler[0].running) == max_num_partial_prefills diff --git a/tests/core/test_num_computed_tokens_update.py b/tests/core/test_num_computed_tokens_update.py deleted file mode 100644 index 131a7b3a6299..000000000000 --- a/tests/core/test_num_computed_tokens_update.py +++ /dev/null @@ -1,67 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project - -import pytest - -from tests.conftest import VllmRunner -from tests.core.utils import create_dummy_prompt -from vllm.engine.llm_engine import LLMEngine -from vllm.sequence import SequenceGroup - -MODEL = "JackFram/llama-160m" - - -def add_seq_group_to_engine(engine: LLMEngine, seq_group: SequenceGroup): - scheduler = engine.scheduler[0] - scheduler.add_seq_group(seq_group) - - -@pytest.mark.parametrize("enable_chunked_prefill", [False, True]) -@pytest.mark.parametrize("enforce_eager", [False, True]) -def test_num_computed_tokens_update(enable_chunked_prefill: bool, - enforce_eager: bool): - - # Make a vllm engine - runner = VllmRunner(model_name=MODEL, - gpu_memory_utilization=0.7, - enable_chunked_prefill=enable_chunked_prefill, - enforce_eager=enforce_eager) - engine: LLMEngine = runner.llm.llm_engine - - num_prompt_steps = 1 - - num_output_tokens_list = [4, 8, 12, 15, 16, 17] - - # Create sequence and add to engine - prompt_len = 10 - - for req_idx, num_output_tokens in enumerate(num_output_tokens_list): - seq, seq_group = create_dummy_prompt(request_id=str(req_idx), - prompt_length=prompt_len, - min_tokens=num_output_tokens, - max_tokens=num_output_tokens) - add_seq_group_to_engine(engine, seq_group) - - assert seq.data.get_num_computed_tokens() == 0 - - for _ in range(num_prompt_steps): - # prompt steps - engine.step() - - if not seq.is_finished(): - prompt_num_computed_tokens = seq.data.get_num_computed_tokens() - # Test correctness of num_computed_tokens after the prompt steps - assert prompt_num_computed_tokens == \ - prompt_len + num_prompt_steps - 1 - - decode_step_counter = 0 - while not seq.is_finished(): - # Test correctness of num_computed_tokens after the decode steps - assert seq.data.get_num_computed_tokens( - ) == prompt_num_computed_tokens + decode_step_counter - engine.step() - decode_step_counter += 1 - - # Test correctness of num_computed_tokens after the sequence finish. - assert seq.data.get_num_computed_tokens( - ) == prompt_len + num_output_tokens - 1 diff --git a/tests/core/test_scheduler.py b/tests/core/test_scheduler.py deleted file mode 100644 index 86e08328c43b..000000000000 --- a/tests/core/test_scheduler.py +++ /dev/null @@ -1,1338 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project - -import time -from collections import deque -from typing import Optional -from unittest.mock import MagicMock - -import pytest # noqa -import torch -from torch import Use # noqa - -from vllm.config import CacheConfig, SchedulerConfig -from vllm.config.lora import LoRAConfig -from vllm.core.interfaces import AllocStatus -from vllm.core.scheduler import Scheduler, SchedulingBudget -from vllm.lora.request import LoRARequest -from vllm.sequence import SequenceGroup, SequenceStatus - -from .utils import (append_new_token, append_new_token_seq, - append_new_token_seq_group, create_dummy_prompt, - get_sequence_groups, schedule_and_update_computed_tokens) - - -def test_scheduler_add_seq_group(): - block_size = 4 - scheduler_config = SchedulerConfig( - "generate", - max_num_batched_tokens=100, - max_num_seqs=64, - max_model_len=1, - ) - cache_config = CacheConfig(block_size, 1.0, 1, cache_dtype="auto") - cache_config.num_cpu_blocks = 4 - cache_config.num_gpu_blocks = 4 - scheduler = Scheduler(scheduler_config, cache_config, None) - - # Add seq group to scheduler. - num_seq_group = 4 - for i in range(num_seq_group): - _, seq_group = create_dummy_prompt(str(i), - block_size, - block_size=block_size) - scheduler.add_seq_group(seq_group) - assert scheduler.get_num_unfinished_seq_groups() == i + 1 - - -def test_scheduler_abort_seq_group(): - block_size = 4 - scheduler_config = SchedulerConfig( - "generate", - max_num_batched_tokens=100, - max_num_seqs=64, - max_model_len=1, - ) - cache_config = CacheConfig(block_size, 1.0, 1, "auto") - cache_config.num_cpu_blocks = 4 - cache_config.num_gpu_blocks = 4 - scheduler = Scheduler(scheduler_config, cache_config, None) - - # Add multiple seq groups to scheduler. - num_seq_group = 4 - request_ids: set[str] = set() - for i in range(num_seq_group): - _, seq_group = create_dummy_prompt(str(i), block_size) - scheduler.add_seq_group(seq_group) - request_ids.add(str(i)) - - # Abort all added seq groups. - assert scheduler.get_num_unfinished_seq_groups() == num_seq_group - scheduler.abort_seq_group(request_ids) - assert scheduler.get_num_unfinished_seq_groups() == 0 - - -def test_scheduler_schedule_simple(): - block_size = 4 - num_seq_group = 4 - max_model_len = 16 - scheduler_config = SchedulerConfig( - "generate", - max_num_batched_tokens=64, - max_num_seqs=num_seq_group, - max_model_len=max_model_len, - ) - cache_config = CacheConfig(block_size, 1.0, 1, "auto") - cache_config.num_cpu_blocks = 8 - cache_config.num_gpu_blocks = 8 - scheduler = Scheduler(scheduler_config, cache_config, None) - running: list[SequenceGroup] = [] - - # Add seq groups to scheduler. - for i in range(num_seq_group): - _, seq_group = create_dummy_prompt(str(i), - prompt_length=block_size, - block_size=block_size) - scheduler.add_seq_group(seq_group) - running.append(seq_group) - - # Schedule seq groups prompts. - num_tokens = block_size * num_seq_group - seq_group_meta, out = schedule_and_update_computed_tokens(scheduler) - assert set(get_sequence_groups(out)) == set(running) - assert out.num_batched_tokens == num_tokens - assert (not out.blocks_to_copy and not out.blocks_to_swap_in - and not out.blocks_to_swap_out) - assert len(seq_group_meta) == num_seq_group - append_new_token(out, 1) - - # Schedule seq groups generation. - seq_group_meta, out = schedule_and_update_computed_tokens(scheduler) - assert set(get_sequence_groups(out)) == set(running) - assert out.num_batched_tokens == num_seq_group - assert (not out.blocks_to_copy and not out.blocks_to_swap_in - and not out.blocks_to_swap_out) - assert len(seq_group_meta) == num_seq_group - append_new_token(out, 1) - - -def test_scheduler_prefill_prioritized(): - """Verify running batched tokens are not applied to prefill requests.""" - block_size = 4 - max_model_len = 30 - max_batched_num_tokens = 30 - scheduler_config = SchedulerConfig( - "generate", - max_num_batched_tokens=max_batched_num_tokens, - max_num_seqs=2, - max_model_len=max_model_len, - ) - cache_config = CacheConfig(block_size, 1.0, 1, "auto") - cache_config.num_cpu_blocks = 16 - cache_config.num_gpu_blocks = 16 - scheduler = Scheduler(scheduler_config, cache_config, None) - - # Add seq groups to scheduler. - _, seq_group_a = create_dummy_prompt("1", 1, block_size=block_size) - scheduler.add_seq_group(seq_group_a) - - # Schedule seq groups prompts. - _, out = schedule_and_update_computed_tokens(scheduler) - assert get_sequence_groups(out) == [seq_group_a] - - # Add a new prefill request B. - _, seq_group_b = create_dummy_prompt("2", 30, block_size=block_size) - scheduler.add_seq_group(seq_group_b) - - # Verify prefill requests are prioritized. Since max_batched_num_tokens - # is 1, new prefill request has to be scheduled first. - _, out = schedule_and_update_computed_tokens(scheduler) - assert get_sequence_groups(out) == [seq_group_b] - - -def test_scheduler_schedule_preempt_abort(): - block_size = 4 - max_model_len = 16 - scheduler_config = SchedulerConfig( - "generate", - max_num_batched_tokens=64, - max_num_seqs=2, - max_model_len=max_model_len, - ) - cache_config = CacheConfig(block_size, 1.0, 1, "auto") - cache_config.num_cpu_blocks = 2 - cache_config.num_gpu_blocks = 2 - scheduler = Scheduler(scheduler_config, cache_config, None) - - # Add seq groups to scheduler. - seq_a, seq_group_a = create_dummy_prompt("1", - block_size, - block_size=block_size) - seq_b, seq_group_b = create_dummy_prompt("2", - block_size, - block_size=block_size) - scheduler.add_seq_group(seq_group_a) - scheduler.add_seq_group(seq_group_b) - - # Schedule seq groups prompts. - seq_group_meta, out = schedule_and_update_computed_tokens(scheduler) - assert get_sequence_groups(out) == [seq_group_a, seq_group_b] - assert out.num_batched_tokens == block_size * 2 # seq_a and seq_b - assert (not out.blocks_to_copy and not out.blocks_to_swap_in - and not out.blocks_to_swap_out) - assert len(seq_group_meta) == 2 - assert scheduler.get_num_unfinished_seq_groups() == 2 - - # Append "generated" tokens, allowing the sequence to mark prompt tokens as - # processed. - append_new_token(out, 1) - - # Schedule seq groups generation and preempt seq group b. - seq_group_meta, out = schedule_and_update_computed_tokens(scheduler) - assert get_sequence_groups(out) == [seq_group_a] - assert out.num_batched_tokens == 1 - assert (not out.blocks_to_copy and not out.blocks_to_swap_in - and not out.blocks_to_swap_out) - assert len(seq_group_meta) == 1 - assert scheduler.get_num_unfinished_seq_groups() == 2 - assert out.preempted == 1 - - # Abort seq group a. Re-schedule seq group b prompt with recomputation. - scheduler.abort_seq_group("1") - seq_group_meta, out = schedule_and_update_computed_tokens(scheduler) - assert get_sequence_groups(out) == [seq_group_b] - assert out.num_batched_tokens == 5 # 4 prompt + 1 generation. - assert (not out.blocks_to_copy and not out.blocks_to_swap_in - and not out.blocks_to_swap_out) - assert len(seq_group_meta) == 1 - assert scheduler.get_num_unfinished_seq_groups() == 1 - - -def test_scheduler_max_seqs(): - block_size = 4 - num_seq_group = 4 - max_seq_group = 2 - max_model_len = 16 - scheduler_config = SchedulerConfig( - "generate", - max_num_batched_tokens=64, - max_num_seqs=max_seq_group, - max_model_len=max_model_len, - ) - cache_config = CacheConfig(block_size, 1.0, 1, "auto") - cache_config.num_cpu_blocks = 8 - cache_config.num_gpu_blocks = 8 - scheduler = Scheduler(scheduler_config, cache_config, None) - - all_seq_groups: list[SequenceGroup] = [] - # Add seq groups to scheduler. - for i in range(num_seq_group): - _, seq_group = create_dummy_prompt(str(i), - prompt_length=block_size, - block_size=block_size) - all_seq_groups.append(seq_group) - - # Append 1 seq group - scheduler.add_seq_group(all_seq_groups[0]) - - # Schedule seq groups prompts. - seq_group_meta, out = schedule_and_update_computed_tokens(scheduler) - assert set(get_sequence_groups(out)) == set([all_seq_groups[0]]) - append_new_token(out, 1) - - # Schedule seq groups generation. - seq_group_meta, out = schedule_and_update_computed_tokens(scheduler) - assert set(get_sequence_groups(out)) == set([all_seq_groups[0]]) - append_new_token(out, 1) - - # Append 2 more seq group - scheduler.add_seq_group(all_seq_groups[1]) - scheduler.add_seq_group(all_seq_groups[2]) - - # Schedule seq groups prompts. - # Only 1 seq group should be scheduled since max_seq_group is 2 - # and one is prompting. - _, out = schedule_and_update_computed_tokens(scheduler) - assert set(get_sequence_groups(out)) == set([all_seq_groups[1]]) - - -def test_scheduler_delay_factor(): - block_size = 4 - scheduler_config = SchedulerConfig( - "generate", - max_num_batched_tokens=100, - max_num_seqs=64, - max_model_len=16, - delay_factor=0.5, - ) - cache_config = CacheConfig(block_size, 1.0, 1, "auto") - cache_config.num_cpu_blocks = 8 - cache_config.num_gpu_blocks = 8 - scheduler = Scheduler(scheduler_config, cache_config, None) - - # schedule first prompt - seq_group_meta, seq_group = create_dummy_prompt("0", - prompt_length=block_size, - block_size=block_size) - scheduler.add_seq_group(seq_group) - seq_group_meta, out = schedule_and_update_computed_tokens(scheduler) - assert out.num_prefill_groups > 0 - assert seq_group_meta[0].request_id == '0' - append_new_token(out, 1) - - # wait for a second before scheduling next prompt - time.sleep(1) - seq_group_meta, seq_group = create_dummy_prompt("1", - prompt_length=block_size, - block_size=block_size) - scheduler.add_seq_group(seq_group) - - # second prompt should *not* be scheduled - seq_group_meta, out = schedule_and_update_computed_tokens(scheduler) - assert out.num_prefill_groups == 0 - assert seq_group_meta[0].request_id == '0' - append_new_token(out, 1) - - # wait for more than 0.5 second and try again - time.sleep(0.6) - seq_group_meta, out = schedule_and_update_computed_tokens(scheduler) - assert out.num_prefill_groups > 0 - assert seq_group_meta[0].request_id == '1' - append_new_token(out, 1) - - -def initialize_scheduler( - *, - max_num_seqs=1000, - max_token_budget=1000, - max_model_len=1000, - lora_config=None, - block_size=4, - num_cpu_blocks=8, - num_gpu_blocks=8, - enable_prefix_caching=False, - enable_chunked_prefill=False, -): - block_size = block_size - scheduler_config = SchedulerConfig( - "generate", - max_num_batched_tokens=max_token_budget, - max_num_seqs=max_num_seqs, - max_model_len=max_model_len, - enable_chunked_prefill=enable_chunked_prefill, - ) - cache_config = CacheConfig( - block_size, - 1.0, - 1, - "auto", - enable_prefix_caching=enable_prefix_caching, - ) - cache_config.num_cpu_blocks = num_cpu_blocks - cache_config.num_gpu_blocks = num_gpu_blocks - scheduler = Scheduler(scheduler_config, cache_config, lora_config) - return scheduler - - -def create_token_budget(token_budget: int = 10000, - max_num_seqs: int = 10000) -> SchedulingBudget: - return SchedulingBudget( - token_budget=token_budget, - max_num_seqs=max_num_seqs, - ) - - -def add_token_budget(budget: SchedulingBudget, - num_batched_tokens: int = 0, - num_curr_seqs: int = 0): - mock_seq_group = create_dummy_prompt('10', prompt_length=60)[1] - budget.add_num_batched_tokens(mock_seq_group.request_id, - num_batched_tokens) - budget.add_num_seqs(mock_seq_group.request_id, num_curr_seqs) - - -def test_prefill_schedule_max_prompt_len(): - """ - Test prompt longer than max_prompt_len is aborted. - """ - block_size = 4 - scheduler = initialize_scheduler(max_model_len=30, block_size=block_size) - _, seq_group = create_dummy_prompt("0", - prompt_length=60, - block_size=block_size) - scheduler.add_seq_group(seq_group) - budget = create_token_budget() - output = scheduler._schedule_prefills(budget, None) - remaining_waiting = scheduler.waiting - assert len(output.ignored_seq_groups) == 1 - assert len(output.seq_groups) == 0 - assert budget.num_batched_tokens == 0 - assert budget.num_curr_seqs == 0 - assert len(remaining_waiting) == 0 - - -def test_prefill_schedule_token_budget(): - """ - Test token budget respected. - """ - block_size = 4 - scheduler = initialize_scheduler(block_size=block_size, - num_cpu_blocks=64, - num_gpu_blocks=64) - budget = create_token_budget(token_budget=0) - for i in range(2): - _, seq_group = create_dummy_prompt(str(i), - prompt_length=60, - block_size=block_size) - scheduler.add_seq_group(seq_group) - - # 0 token budget == nothing is scheduled. - output = scheduler._schedule_prefills(budget, None) - remaining_waiting = scheduler.waiting - assert len(output.ignored_seq_groups) == 0 - assert len(output.seq_groups) == 0 - assert budget.num_batched_tokens == 0 - assert budget.num_curr_seqs == 0 - assert len(remaining_waiting) == 2 - - # 60 token budget == 1 request scheduled. - budget = create_token_budget(token_budget=60) - output = scheduler._schedule_prefills(budget, None) - remaining_waiting = scheduler.waiting - assert len(output.ignored_seq_groups) == 0 - assert len(output.seq_groups) == 1 - assert budget.num_batched_tokens == 60 - assert budget.num_curr_seqs == 1 - assert len(remaining_waiting) == 1 - - # Test when current_batched_tokens respected. - scheduler = initialize_scheduler(block_size=block_size, - num_cpu_blocks=16, - num_gpu_blocks=16) - budget = create_token_budget(token_budget=60) - add_token_budget(budget, 30, 0) - _, seq_group = create_dummy_prompt(str(i), - prompt_length=60, - block_size=block_size) - # Cannot schedule a prompt that doesn't fit the budget. - scheduler.add_seq_group(seq_group) - output = scheduler._schedule_prefills(budget, None) - remaining_waiting = scheduler.waiting - assert len(output.ignored_seq_groups) == 0 - assert len(output.seq_groups) == 0 - assert budget.num_batched_tokens == 30 - assert budget.num_curr_seqs == 0 - assert len(remaining_waiting) == 1 - budget = create_token_budget(token_budget=90) - add_token_budget(budget, 30, 0) - output = scheduler._schedule_prefills(budget, None) - remaining_waiting = scheduler.waiting - assert len(output.seq_groups) == 1 - assert budget.num_batched_tokens == 90 - assert budget.num_curr_seqs == 1 - assert len(remaining_waiting) == 0 - - -def test_prefill_schedule_max_seqs(): - """ - Test max seq respected. - """ - block_size = 4 - scheduler = initialize_scheduler(block_size=block_size, - num_cpu_blocks=64, - num_gpu_blocks=64) - budget = create_token_budget(max_num_seqs=2) - for i in range(3): - _, seq_group = create_dummy_prompt(str(i), - prompt_length=60, - block_size=block_size) - scheduler.add_seq_group(seq_group) - output = scheduler._schedule_prefills(budget, None) - remaining_waiting = scheduler.waiting - assert len(output.ignored_seq_groups) == 0 - assert len(output.seq_groups) == 2 - assert budget.num_batched_tokens == 120 - assert budget.num_curr_seqs == 2 - assert len(remaining_waiting) == 1 - - # Verify curr_num_seqs respected. - scheduler.waiting = deque() - budget = create_token_budget(max_num_seqs=2) - add_token_budget(budget, 0, 2) - _, seq_group = create_dummy_prompt(str(i), - prompt_length=60, - block_size=block_size) - scheduler.add_seq_group(seq_group) - output = scheduler._schedule_prefills(budget, None) - remaining_waiting = scheduler.waiting - assert len(output.ignored_seq_groups) == 0 - assert len(output.seq_groups) == 0 - assert budget.num_batched_tokens == 0 - assert budget.num_curr_seqs == 2 - assert len(remaining_waiting) == 1 - - -def test_prefill_schedule_max_lora(): - """ - Test max lora is respected and prioritized. - """ - block_size = 4 - lora_config = LoRAConfig(max_lora_rank=8, max_loras=1) - scheduler = initialize_scheduler(lora_config=lora_config, - block_size=block_size, - num_cpu_blocks=64, - num_gpu_blocks=64) - budget = create_token_budget(token_budget=120) - curr_loras: set[int] = set() - for i in range(2): - _, seq_group = create_dummy_prompt(str(i), - prompt_length=60, - block_size=block_size, - lora_request=LoRARequest( - lora_name=str(i), - lora_int_id=i + 1, - lora_path="abc")) - scheduler.add_seq_group(seq_group) - # Add two more requests to verify lora is prioritized. - # 0: LoRA, 1: LoRA, 2: regular, 3: regular - # In the first iteration, index 0, 2 is scheduled. - # If a request is not scheduled because it hits max lora, it is - # prioritized. Verify that. - for i in range(2, 4): - _, seq_group = create_dummy_prompt(str(i), - prompt_length=60, - block_size=block_size) - scheduler.add_seq_group(seq_group) - # Schedule 2 requests (0 and 2) - output = scheduler._schedule_prefills(budget, curr_loras) - remaining_waiting = scheduler.waiting - assert len(output.ignored_seq_groups) == 0 - assert len(output.seq_groups) == 2 - assert budget.num_batched_tokens == 120 - assert budget.num_curr_seqs == 2 - assert len(remaining_waiting) == 2 - assert len(curr_loras) == 1 - # The second lora request is scheduled next as FCFS policy. - # Reset curr_loras so that it can be scheduled. - curr_loras = set() - budget = create_token_budget(token_budget=60) - output = scheduler._schedule_prefills(budget, curr_loras) - remaining_waiting = scheduler.waiting - assert len(output.seq_groups) == 1 - assert output.seq_groups[0].seq_group.request_id == "1" - assert len(remaining_waiting) == 1 - assert len(curr_loras) == 1 - assert budget.num_batched_tokens == 60 - - -def test_prefill_schedule_no_block_manager_capacity(): - """ - Test sequence cannot be scheduled due to block manager has no capacity. - """ - block_size = 4 - scheduler = initialize_scheduler(block_size=block_size, - num_gpu_blocks=128, - num_cpu_blocks=128) - budget = create_token_budget() - for i in range(3): - _, seq_group = create_dummy_prompt(str(i), - prompt_length=60, - block_size=block_size) - scheduler.add_seq_group(seq_group) - scheduler.block_manager.can_allocate = MagicMock() - scheduler.block_manager.can_allocate.return_value = AllocStatus.LATER - output = scheduler._schedule_prefills(budget, None) - remaining_waiting = scheduler.waiting - assert len(output.ignored_seq_groups) == 0 - assert len(output.seq_groups) == 0 - assert budget.num_batched_tokens == 0 - assert budget.num_curr_seqs == 0 - assert len(remaining_waiting) == 3 - - scheduler = initialize_scheduler() - budget = create_token_budget() - for i in range(3): - _, seq_group = create_dummy_prompt(str(i), - prompt_length=60, - block_size=block_size) - scheduler.add_seq_group(seq_group) - scheduler.block_manager.can_allocate = MagicMock() - scheduler.block_manager.can_allocate.return_value = AllocStatus.NEVER - output = scheduler._schedule_prefills(budget, None) - remaining_waiting = scheduler.waiting - assert len(output.ignored_seq_groups) == 3 - assert len(output.seq_groups) == 0 - assert budget.num_batched_tokens == 0 - assert budget.num_curr_seqs == 0 - assert len(remaining_waiting) == 0 - - -def test_decode_schedule_preempted(): - """ - Test decodes cannot be scheduled and preempted. - """ - block_size = 4 - scheduler = initialize_scheduler(block_size=block_size, - num_cpu_blocks=64, - num_gpu_blocks=64) - curr_loras = None - for i in range(3): - _, seq_group = create_dummy_prompt(str(i), - prompt_length=60, - block_size=block_size) - scheduler._allocate_and_set_running(seq_group) - append_new_token_seq_group(60, seq_group, 1) - scheduler._add_seq_group_to_running(seq_group) - scheduler.block_manager.can_append_slots = MagicMock() - - def cannot_append_second_group(seq_group, num_lookahead_slots): - return seq_group.request_id != "1" - - scheduler.block_manager.can_append_slots.side_effect = ( - cannot_append_second_group) - - # 1 cannot be scheduled, and the lowest priority (request 2) - # should be preempted. 1 will also be preempted. - budget = create_token_budget() - output = scheduler._schedule_running(budget, curr_loras) - remaining_running = scheduler.running - assert len(remaining_running) == 0 - assert len(output.decode_seq_groups) == 1 - assert len(output.prefill_seq_groups) == 0 - assert output.decode_seq_groups[0].seq_group.request_id == "0" - assert len(output.preempted) == 2 - # Verify budgets are updated. - assert budget.num_batched_tokens == 1 - # NOTE: When enable_chunk is False, num_seqs budget is not updated. - # assert budget.num_curr_seqs == 1 - # Both should be preempted, not swapped. - assert output.blocks_to_swap_out == [] - # Nothing is copied. - assert output.blocks_to_copy == [] - - -def test_schedule_decode_blocks_to_copy_update(): - """ - Verify blocks_to_copy is updated. - """ - block_size = 4 - scheduler = initialize_scheduler(block_size=4, - num_cpu_blocks=16, - num_gpu_blocks=16) - _, seq_group = create_dummy_prompt("1", - prompt_length=60, - block_size=block_size) - curr_loras = None - scheduler._allocate_and_set_running(seq_group) - append_new_token_seq_group(60, seq_group, 1) - scheduler._add_seq_group_to_running(seq_group) - - # The last request should be swapped out. - scheduler.block_manager.append_slots = MagicMock() - scheduler.block_manager.append_slots.return_value = [(2, 3)] - - budget = create_token_budget() - output = scheduler._schedule_running(budget, curr_loras) - remaining_running = scheduler.running - assert len(remaining_running) == 0 - assert len(output.decode_seq_groups) == 1 - assert len(output.prefill_seq_groups) == 0 - assert len(output.preempted) == 0 - assert len(output.swapped_out) == 0 - # Nothing is preempted. - assert output.blocks_to_swap_out == [] - # Since append_slot returns the source -> dist mapping, it should - # be applied. - assert output.blocks_to_copy == [(2, 3)] - - -def test_schedule_swapped_max_loras(): - block_size = 4 - lora_config = LoRAConfig(max_lora_rank=8, max_loras=1) - scheduler = initialize_scheduler(lora_config=lora_config, - block_size=block_size, - num_cpu_blocks=32, - num_gpu_blocks=32) - curr_loras: set[int] = set() - blocks_to_swap_out: list[tuple[int, int]] = [] - for i in range(2): - _, seq_group = create_dummy_prompt(str(i), - prompt_length=60, - block_size=block_size, - lora_request=LoRARequest( - lora_name=str(i), - lora_int_id=i + 1, - lora_path="abc")) - scheduler._allocate_and_set_running(seq_group) - append_new_token_seq_group(60, seq_group, 1) - scheduler._swap_out(seq_group, blocks_to_swap_out) - scheduler._add_seq_group_to_swapped(seq_group) - - budget = create_token_budget() - output = scheduler._schedule_swapped(budget, curr_loras) - remaining_swapped = scheduler.swapped - assert len(remaining_swapped) == 1 - assert budget.num_batched_tokens == 1 - assert budget.num_curr_seqs == 1 - assert len(output.decode_seq_groups) == 1 - assert len(output.prefill_seq_groups) == 0 - assert len(curr_loras) == 1 - - -def test_schedule_swapped_cannot_swap_in(): - block_size = 4 - scheduler = initialize_scheduler(block_size=block_size, - num_cpu_blocks=32, - num_gpu_blocks=32) - curr_loras = None - blocks_to_swap_out: list[tuple[int, int]] = [] - for i in range(2): - _, seq_group = create_dummy_prompt(str(i), - prompt_length=60, - block_size=block_size) - scheduler._allocate_and_set_running(seq_group) - append_new_token_seq_group(60, seq_group, 1) - scheduler._swap_out(seq_group, blocks_to_swap_out) - scheduler._add_seq_group_to_swapped(seq_group) - - # The last request should be swapped out. - scheduler.block_manager.can_swap_in = MagicMock() - scheduler.block_manager.can_swap_in.return_value = AllocStatus.LATER - # Since we cannot swap in, none of the requests are swapped in. - budget = create_token_budget() - output = scheduler._schedule_swapped(budget, curr_loras) - remaining_swapped = scheduler.swapped - assert len(remaining_swapped) == 2 - assert budget.num_batched_tokens == 0 - assert budget.num_curr_seqs == 0 - assert len(output.decode_seq_groups) == 0 - assert len(output.prefill_seq_groups) == 0 - - -def test_infeasible_swap(): - block_size = 4 - scheduler = initialize_scheduler(block_size=block_size, - num_cpu_blocks=32, - num_gpu_blocks=32) - curr_loras = None - blocks_to_swap_out: list[tuple[int, int]] = [] - for i in range(2): - _, seq_group = create_dummy_prompt(str(i), - prompt_length=60, - block_size=block_size) - scheduler._allocate_and_set_running(seq_group) - append_new_token_seq_group(60, seq_group, 1) - scheduler._swap_out(seq_group, blocks_to_swap_out) - scheduler._add_seq_group_to_swapped(seq_group) - - # The last request should be swapped out. - scheduler.block_manager.can_swap_in = MagicMock() - scheduler.block_manager.can_swap_in.return_value = AllocStatus.NEVER - # Since we cannot swap in, none of the requests are swapped in. - budget = create_token_budget() - output = scheduler._schedule_swapped(budget, curr_loras) - remaining_swapped = scheduler.swapped - assert len(remaining_swapped) == 0 - assert len(output.infeasible_seq_groups) == 2 - assert budget.num_batched_tokens == 0 - assert budget.num_curr_seqs == 0 - assert len(output.decode_seq_groups) == 0 - assert len(output.prefill_seq_groups) == 0 - - -def test_schedule_swapped_blocks_to_copy(): - block_size = 4 - scheduler = initialize_scheduler(block_size=block_size, - num_cpu_blocks=32, - num_gpu_blocks=32) - curr_loras = None - _, seq_group = create_dummy_prompt("1", - prompt_length=60, - block_size=block_size) - scheduler._allocate_and_set_running(seq_group) - append_new_token_seq_group(60, seq_group, 1) - blocks_to_swap_out: list[tuple[int, int]] = [] - scheduler._swap_out(seq_group, blocks_to_swap_out) - scheduler._add_seq_group_to_swapped(seq_group) - - # The last request should be swapped out. - scheduler.block_manager.append_slots = MagicMock() - scheduler.block_manager.append_slots.return_value = [(2, 3)] - - budget = create_token_budget() - output = scheduler._schedule_swapped(budget, curr_loras) - remaining_swapped = scheduler.swapped - assert len(remaining_swapped) == 0 - assert len(output.decode_seq_groups) == 1 - assert len(output.prefill_seq_groups) == 0 - assert output.blocks_to_copy == [(2, 3)] - - -def test_scheduling_budget(): - TOKEN_BUDGET = 4 - MAX_SEQS = 4 - budget = SchedulingBudget(token_budget=TOKEN_BUDGET, max_num_seqs=MAX_SEQS) - assert budget.can_schedule(num_new_tokens=1, num_new_seqs=1) - assert budget.can_schedule(num_new_tokens=4, num_new_seqs=4) - assert not budget.can_schedule(num_new_tokens=1, num_new_seqs=5) - assert not budget.can_schedule(num_new_tokens=5, num_new_seqs=1) - assert not budget.can_schedule(num_new_tokens=5, num_new_seqs=5) - assert budget.remaining_token_budget() == TOKEN_BUDGET - - # Verify add/subtract num batched tokens. - _, seq_group = create_dummy_prompt("1", 3) - budget.add_num_batched_tokens(seq_group.request_id, 2) - assert budget.remaining_token_budget() == 2 - assert budget.num_batched_tokens == 2 - assert budget.can_schedule(num_new_tokens=2, num_new_seqs=1) - assert not budget.can_schedule(num_new_tokens=3, num_new_seqs=1) - # Verify adding another seq group is no-op. - budget.add_num_batched_tokens(seq_group.request_id, 2) - assert budget.remaining_token_budget() == 2 - assert budget.num_batched_tokens == 2 - budget.subtract_num_batched_tokens(seq_group.request_id, 2) - assert budget.remaining_token_budget() == 4 - assert budget.num_batched_tokens == 0 - budget.subtract_num_batched_tokens(seq_group.request_id, 2) - assert budget.remaining_token_budget() == 4 - assert budget.num_batched_tokens == 0 - - # Verify add/subtract max seqs. - _, seq_group = create_dummy_prompt("1", 3) - budget.add_num_seqs(seq_group.request_id, 2) - assert budget.can_schedule(num_new_tokens=1, num_new_seqs=2) - assert not budget.can_schedule(num_new_tokens=1, num_new_seqs=3) - assert budget.num_curr_seqs == 2 - # Verify adding another seq group is no-op. - budget.add_num_seqs(seq_group.request_id, 2) - assert budget.num_curr_seqs == 2 - budget.subtract_num_seqs(seq_group.request_id, 2) - assert budget.num_curr_seqs == 0 - budget.subtract_num_seqs(seq_group.request_id, 2) - assert budget.num_curr_seqs == 0 - - -@pytest.mark.parametrize("enable_prefix_caching", [True, False]) -def test_prefix_caching_aware_prefills(enable_prefix_caching): - """ - Test the below scenario: - - For 3 sequences, seqA, seqB, seqC, share the first block as prefix. - - The test verifies the below scenarios: - 1. SeqA is first scheduled. - 2. SeqB and SeqC can be prefilled together in a single schedule round - even though there are not enough token budgets to prefill both without - considering prefix caching. - """ - - block_size = 4 - max_num_batched_tokens = 12 - max_seq_group = 3 - scheduler = initialize_scheduler( - block_size=block_size, - num_cpu_blocks=16, - num_gpu_blocks=16, - max_token_budget=max_num_batched_tokens, - max_num_seqs=max_seq_group, - max_model_len=max_num_batched_tokens, - enable_prefix_caching=enable_prefix_caching, - ) - - seqA_tokens = list(range(8)) - num_shared_tokens = 4 - seqB_tokens = seqA_tokens[:num_shared_tokens] + list(range( - 12, 16)) # Shared prefix first 4. - seqC_tokens = seqA_tokens[:num_shared_tokens] + list(range( - 16, 20)) # Shared prefix first 4. - - seqA, seqA_group = create_dummy_prompt("0", - prompt_tokens=seqA_tokens, - block_size=block_size) - seqB, seqB_group = create_dummy_prompt("1", - prompt_tokens=seqB_tokens, - block_size=block_size) - seqC, seqC_group = create_dummy_prompt("2", - prompt_tokens=seqC_tokens, - block_size=block_size) - - # Schedule seqA prefill. - scheduler.add_seq_group(seqA_group) - metas, out, _ = scheduler.schedule() - assert (len(out.scheduled_seq_groups) == 1 - and out.scheduled_seq_groups[0].seq_group == seqA_group) - assert out.scheduled_seq_groups[0].token_chunk_size == len(seqA_tokens) - - # Schedule seqA decode. - append_new_token_seq_group(len(seqA_tokens), seqA_group, 999) - metas, out, _ = scheduler.schedule() - - assert len(out.scheduled_seq_groups) == 1 - assert out.scheduled_seq_groups[0].seq_group == seqA_group - assert out.scheduled_seq_groups[0].token_chunk_size == 1 - - # Schedule seqB and seqC prefills should work with prefix caching. - scheduler.add_seq_group(seqB_group) - scheduler.add_seq_group(seqC_group) - metas, out, _ = scheduler.schedule() - - if enable_prefix_caching: - assert len(out.scheduled_seq_groups) == 2 - assert set([ - out.scheduled_seq_groups[0].seq_group, - out.scheduled_seq_groups[1].seq_group, - ]) == set([seqB_group, seqC_group]) - assert len(metas) == 2 - for meta in metas: - assert meta.token_chunk_size == 8 - assert (len(meta.computed_block_nums) == num_shared_tokens // - block_size) # 1 Block for the 8 tokens. - else: - assert len(out.scheduled_seq_groups) == 1 - assert len(metas) == 1 - assert metas[0].token_chunk_size == 8 - assert len(metas[0].computed_block_nums) == 0 # No blocks computed. - - -def test_no_multiple_partial_prefills_with_chunked_prefill_and_prefix_caching( -): - """ - This test verifies that we don't schedule new prefills if there's already - a continuous prefill in progress even though the new prefills with shared - prefix can fit in the token budget: - - - SeqA is being chunked prefill. - - SeqB with the same prompt shouldn't be scheduled for prefill even though - there's enough token budget to prefill the cached tokens. - - Neither should seqC be scheduled. - - - When seqA is in decoding phase, seqB and seqC can be scheduled. - - Entire seqB should be prefilled since it's a full prefix cache hit. - - SeqC would be partially prefilled with the prefix shared, and the - remaining unique tokens would be prefilled (rounded down to be - block-size aligned). - """ - - block_size = 2 - max_num_batched_tokens = 4 - max_seq_group = 3 - scheduler = initialize_scheduler( - block_size=block_size, - num_cpu_blocks=16, - num_gpu_blocks=16, - max_token_budget=max_num_batched_tokens, - max_num_seqs=max_seq_group, - max_model_len=100, - enable_prefix_caching=True, - enable_chunked_prefill=True, - ) - - seqA_tokens = list(range(8)) - seqB_tokens = seqA_tokens - seqC_shared_prefix_len = 4 - seqC_tokens = seqA_tokens[:seqC_shared_prefix_len] + list(range(12, 20)) - - seqA, seqA_group = create_dummy_prompt("0", - prompt_tokens=seqA_tokens, - block_size=block_size) - seqB, seqB_group = create_dummy_prompt("1", - prompt_tokens=seqB_tokens, - block_size=block_size) - - # Chunked prefill seqA. - scheduler.add_seq_group(seqA_group) - metas, out = schedule_and_update_computed_tokens(scheduler) - assert len(out.scheduled_seq_groups) == 1 - assert out.scheduled_seq_groups[0].seq_group == seqA_group - assert out.scheduled_seq_groups[0].token_chunk_size == 4 - - # seqB should not be scheduled with ongoing prefills. - scheduler.add_seq_group(seqB_group) - metas, out = schedule_and_update_computed_tokens(scheduler) - assert len(out.scheduled_seq_groups) == 1 - assert out.scheduled_seq_groups[0].seq_group == seqA_group - assert out.scheduled_seq_groups[0].token_chunk_size == 4 - - # both seqB and seqC can now be scheduled with seqA is over. - # seqA is in decoding phase. - append_new_token_seq(seqA, 999) - seqC, seqC_group = create_dummy_prompt("2", - prompt_tokens=seqC_tokens, - block_size=block_size) - scheduler.add_seq_group(seqC_group) - metas, out = schedule_and_update_computed_tokens(scheduler) - assert len(out.scheduled_seq_groups) == 3 - - metas = {meta.request_id: meta for meta in metas} - assert metas[seqA_group.request_id].token_chunk_size == 1 # Decode - assert (metas[seqB_group.request_id].token_chunk_size == 8 - ) # Fully cached prefill - assert ( - metas[seqC_group.request_id].token_chunk_size == 6 - ), "A partial prefix of C (4 tokens) should be prefilled, with the " - "remaining tokens fit into 3 token budget (4-1 from the seqA). It will " - "then be rounded down to 2 tokens on block size, thus 6 tokens in total." - - -def test_no_batches_mixed_with_prompt_tokens_and_prompt_embeds(): - """ - Test that the scheduler does not schedule batches with prompt tokens and - prompt embeddings co-mingled. - """ - block_size = 2 - max_seq_group = 3 - scheduler = initialize_scheduler( - block_size=block_size, - num_cpu_blocks=16, - num_gpu_blocks=16, - max_num_seqs=max_seq_group, - max_model_len=100, - enable_prefix_caching=True, - ) - - # the odd indexed inputs should be passed in via embeddings, - # evens via token_ids - seq_length = 7 - embedding_size = 5 - num_seqs = 11 - seq_tokens: list[list[int]] = [] - seq_embeds: list[Optional[torch.Tensor]] = [] - for i in range(num_seqs): - if i % 2: - seq_tokens.append(list(range(seq_length))) - seq_embeds.append(None) - else: - seq_tokens.append([0] * seq_length) - seq_embeds.append(torch.rand(embedding_size)) - - seq_and_seq_groups = [ - create_dummy_prompt(f"{i}", - prompt_tokens=seq_tokens[i], - prompt_embeds=seq_embeds[i], - block_size=block_size) - for i in range(len(seq_tokens)) - ] - - for _, seq_group in seq_and_seq_groups: - scheduler.add_seq_group(seq_group) - - while not all(seq.is_finished() for seq, _ in seq_and_seq_groups): - unfinished_seq_groups = [ - seq_group for _, seq_group in seq_and_seq_groups - if not seq_group.is_finished() - ] - _, out = schedule_and_update_computed_tokens(scheduler) - assert len(out.scheduled_seq_groups) > 0 - batch_is_prompt_embeds = out.scheduled_seq_groups[ - 0].seq_group.uses_prompt_embeds() - expected_scheduled_seq_groups = [ - seq_group for seq_group in unfinished_seq_groups - if seq_group.uses_prompt_embeds() == batch_is_prompt_embeds - ] - - # We should have as many scheduled groups as possible, without mixing - assert len(out.scheduled_seq_groups) == min( - max_seq_group, len(expected_scheduled_seq_groups)) - assert all(scheduled_seq_group.seq_group.uses_prompt_embeds() == - batch_is_prompt_embeds - for scheduled_seq_group in out.scheduled_seq_groups) - - # Finish the scheduled groups - for scheduled_seq_group in out.scheduled_seq_groups: - for seq in scheduled_seq_group.seq_group.seqs: - seq.status = SequenceStatus.FINISHED_STOPPED - scheduler.free_finished_seq_groups() - - -def test_remove_seq_from_computed_blocks_tracker(): - """ - Test that computed_blocks_tracker correctly removes stale sequences - during scheduling. - - The test covers 9 scheduling branches where stale seqs are removed: - - 1 in _schedule_swapped - - 1 in _schedule_priority_preemption - - 7 in _schedule_prefill - - Each branch is tested to ensure proper cleanup of - _seq_id_to_num_tokens_computed. - """ - # Budget can not schedule in swapped - block_size = 2 - max_seq_group = 3 - seq_tokens_with_swapped: list[list[int]] = [] - blocks_to_swap_out: list[tuple[int, int]] = [] - curr_loras: set[int] = set() - - scheduler = initialize_scheduler( - block_size=block_size, - num_cpu_blocks=64, - num_gpu_blocks=16, - max_num_seqs=max_seq_group, - enable_prefix_caching=True, - ) - budget = create_token_budget(token_budget=15) - - seq_length = 16 - num_seqs = 3 - for i in range(num_seqs): - seq_tokens_with_swapped.append([i] * seq_length) - - seq_and_seq_groups = [ - create_dummy_prompt(f"{i}", - prompt_tokens=seq_tokens_with_swapped[i], - block_size=block_size) - for i in range(len(seq_tokens_with_swapped)) - ] - - for _, seq_group in seq_and_seq_groups: - scheduler._allocate_and_set_running(seq_group) - scheduler._swap_out(seq_group, blocks_to_swap_out) - scheduler._add_seq_group_to_swapped(seq_group) - - scheduler._schedule_swapped(budget, curr_loras) - seq_id_to_num_tokens_computed = ( - scheduler.block_manager._computed_blocks_tracker. - _seq_id_to_num_tokens_computed.get(1)) - assert seq_id_to_num_tokens_computed is None - - # Prefill schedule don't have a space for another LoRA, so - # we ignore this request for now. - block_size = 4 - lora_config = LoRAConfig(max_lora_rank=8, max_loras=1) - scheduler = initialize_scheduler(lora_config=lora_config, - block_size=block_size, - num_cpu_blocks=64, - num_gpu_blocks=64, - enable_prefix_caching=True) - budget = create_token_budget(token_budget=120) - num_seqs = 2 - for i in range(num_seqs): - _, seq_group = create_dummy_prompt(str(i), - prompt_length=seq_length, - block_size=block_size, - lora_request=LoRARequest( - lora_name=str(i), - lora_int_id=i + 1, - lora_path="abc")) - scheduler.add_seq_group(seq_group) - - scheduler._schedule_prefills(budget, curr_loras) - seq_id_to_num_tokens_computed = ( - scheduler.block_manager._computed_blocks_tracker. - _seq_id_to_num_tokens_computed.get(1)) - assert seq_id_to_num_tokens_computed is None - - # Priority preemption schedule - scheduler._schedule_priority_preemption(budget) - seq_id_to_num_tokens_computed = ( - scheduler.block_manager._computed_blocks_tracker. - _seq_id_to_num_tokens_computed.get(1)) - assert seq_id_to_num_tokens_computed is None - - # Prefill scheduler does not schedule batches with prompt tokens and - # prompt embeddings co-mingled. - block_size = 2 - max_seq_group = 3 - scheduler = initialize_scheduler( - block_size=block_size, - num_cpu_blocks=16, - num_gpu_blocks=16, - max_num_seqs=max_seq_group, - max_model_len=100, - enable_prefix_caching=True, - ) - seq_length = 7 - embedding_size = 5 - seq_tokens_with_embedding: list[list[int]] = [] - seq_embeds: list[Optional[torch.Tensor]] = [] - - seq_tokens_with_embedding.append(list(range(seq_length))) - seq_embeds.append(None) - seq_tokens_with_embedding.append([0] * seq_length) - seq_embeds.append(torch.rand(embedding_size)) - - seq_and_seq_groups = [ - create_dummy_prompt(f"{i}", - prompt_tokens=seq_tokens_with_embedding[i], - prompt_embeds=seq_embeds[i], - block_size=block_size) - for i in range(len(seq_tokens_with_embedding)) - ] - - for _, seq_group in seq_and_seq_groups: - scheduler.add_seq_group(seq_group) - - scheduler._schedule_default() - seq_id_to_num_tokens_computed = ( - scheduler.block_manager._computed_blocks_tracker. - _seq_id_to_num_tokens_computed.get(1)) - assert seq_id_to_num_tokens_computed is None - - # Prefill scheduler budget num_batched_tokens - # >= scheduler_config max_num_batched_tokens - block_size = 2 - max_seq_group = 3 - seq_tokens_prefill_budget: list[list[int]] = [] - - scheduler = initialize_scheduler( - block_size=block_size, - max_token_budget=8, - num_cpu_blocks=16, - num_gpu_blocks=16, - max_num_seqs=max_seq_group, - max_model_len=5, - enable_prefix_caching=True, - ) - seq_length = 4 - num_seqs = 3 - for i in range(num_seqs): - seq_tokens_prefill_budget.append([i] * seq_length) - - seq_and_seq_groups = [ - create_dummy_prompt(f"{i}", - prompt_tokens=seq_tokens_prefill_budget[i], - block_size=block_size) - for i in range(len(seq_tokens_prefill_budget)) - ] - - for _, seq_group in seq_and_seq_groups: - scheduler.add_seq_group(seq_group) - - scheduler._schedule_default() - seq_id_to_num_tokens_computed = ( - scheduler.block_manager._computed_blocks_tracker. - _seq_id_to_num_tokens_computed.get(2)) - assert seq_id_to_num_tokens_computed is None - - # Budget can not schedule in waiting - block_size = 2 - max_seq_group = 3 - - scheduler = initialize_scheduler( - block_size=block_size, - max_token_budget=30, - num_cpu_blocks=16, - num_gpu_blocks=16, - max_num_seqs=max_seq_group, - max_model_len=30, - enable_prefix_caching=True, - ) - seq_length = 16 - num_seqs = 3 - seq_tokens_prefill_budget_waiting: list[list[int]] = [] - - for i in range(num_seqs): - seq_tokens_prefill_budget_waiting.append(list(range(seq_length))) - - seq_and_seq_groups = [ - create_dummy_prompt(f"{i}", - prompt_tokens=seq_tokens_prefill_budget_waiting[i], - block_size=block_size) - for i in range(len(seq_tokens_prefill_budget_waiting)) - ] - - for _, seq_group in seq_and_seq_groups: - scheduler.add_seq_group(seq_group) - - scheduler._schedule_default() - seq_id_to_num_tokens_computed = ( - scheduler.block_manager._computed_blocks_tracker. - _seq_id_to_num_tokens_computed.get(1)) - assert seq_id_to_num_tokens_computed is None - - # Sequence num_new_tokens > prompt_limit marked FINISHED_IGNORED - block_size = 2 - max_seq_group = 3 - scheduler = initialize_scheduler( - block_size=block_size, - num_cpu_blocks=16, - num_gpu_blocks=16, - max_num_seqs=max_seq_group, - max_model_len=30, - enable_prefix_caching=True, - ) - - seq_length = 31 - seq_tokens_prompt_limit: list[list[int]] = [] - seq_tokens_prompt_limit.append(list(range(seq_length))) - seq_and_seq_groups = [ - create_dummy_prompt("0", - prompt_tokens=seq_tokens_prompt_limit[0], - block_size=block_size) - ] - for _, seq_group in seq_and_seq_groups: - scheduler.add_seq_group(seq_group) - scheduler._schedule_default() - seq_id_to_num_tokens_computed = ( - scheduler.block_manager._computed_blocks_tracker. - _seq_id_to_num_tokens_computed.get(0)) - assert seq_id_to_num_tokens_computed is None - - # Budget can not allocate, AllocStatus is NEVER marked FINISHED_IGNORED - block_size = 2 - max_seq_group = 3 - scheduler = initialize_scheduler( - block_size=block_size, - num_cpu_blocks=160, - num_gpu_blocks=160, - max_num_seqs=max_seq_group, - max_model_len=320, - enable_prefix_caching=True, - ) - - seq_length = 320 - num_seqs = 1 - seq_tokens_never: list[list[int]] = [] - for i in range(num_seqs): - seq_tokens_never.append(list(range(seq_length))) - - seq_and_seq_groups = [ - create_dummy_prompt(f"{i}", - prompt_tokens=seq_tokens_never[i], - block_size=block_size) - for i in range(len(seq_tokens_never)) - ] - - for _, seq_group in seq_and_seq_groups: - scheduler.add_seq_group(seq_group) - - scheduler._schedule_default() - seq_id_to_num_tokens_computed = ( - scheduler.block_manager._computed_blocks_tracker. - _seq_id_to_num_tokens_computed.get(0)) - assert seq_id_to_num_tokens_computed is None - - # Budget can not allocate, AllocStatus is LATER - block_size = 2 - max_seq_group = 3 - scheduler = initialize_scheduler( - block_size=block_size, - num_cpu_blocks=160, - num_gpu_blocks=160, - max_num_seqs=max_seq_group, - max_model_len=320, - enable_prefix_caching=True, - ) - - seq_length = 160 - num_seqs = 2 - seq_tokens_later: list[list[int]] = [] - for i in range(num_seqs): - seq_tokens_later.append(list(range(seq_length))) - - seq_and_seq_groups = [ - create_dummy_prompt(f"{i}", - prompt_tokens=seq_tokens_later[i], - block_size=block_size) - for i in range(len(seq_tokens_later)) - ] - - for _, seq_group in seq_and_seq_groups: - scheduler.add_seq_group(seq_group) - - scheduler._schedule_default() - seq_id_to_num_tokens_computed = ( - scheduler.block_manager._computed_blocks_tracker. - _seq_id_to_num_tokens_computed.get(1)) - assert seq_id_to_num_tokens_computed is None diff --git a/tests/core/test_serialization.py b/tests/core/test_serialization.py deleted file mode 100644 index ee9ac2129f2d..000000000000 --- a/tests/core/test_serialization.py +++ /dev/null @@ -1,36 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project - -import msgspec - -from vllm.executor.msgspec_utils import decode_hook, encode_hook -from vllm.sequence import ExecuteModelRequest - -from .utils import create_batch - - -def test_msgspec_serialization(): - num_lookahead_slots = 4 - seq_group_metadata_list, _, _ = create_batch(16, num_lookahead_slots) - execute_model_req = ExecuteModelRequest( - seq_group_metadata_list=seq_group_metadata_list, - num_lookahead_slots=num_lookahead_slots, - running_queue_size=4) - - encoder = msgspec.msgpack.Encoder(enc_hook=encode_hook) - decoder = msgspec.msgpack.Decoder(ExecuteModelRequest, - dec_hook=decode_hook) - req = decoder.decode(encoder.encode(execute_model_req)) - expected = execute_model_req.seq_group_metadata_list - actual = req.seq_group_metadata_list - assert (len(expected) == len(actual)) - expected = expected[0] - actual = actual[0] - - assert expected.block_tables == actual.block_tables - assert expected.is_prompt == actual.is_prompt - assert expected.request_id == actual.request_id - assert (expected.seq_data[0].prompt_token_ids == - actual.seq_data[0].prompt_token_ids) - assert (expected.seq_data[0].output_token_ids == - actual.seq_data[0].output_token_ids) diff --git a/tests/core/utils.py b/tests/core/utils.py deleted file mode 100644 index 033fffd2c4e2..000000000000 --- a/tests/core/utils.py +++ /dev/null @@ -1,392 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project - -import time -from collections import defaultdict -from collections.abc import Sequence as GenericSequence -from itertools import count -from typing import Any, Optional, Union - -import torch - -from vllm.core.scheduler import Scheduler, SchedulerOutputs -from vllm.inputs import EncoderDecoderInputs, embeds_inputs, token_inputs -from vllm.lora.request import LoRARequest -from vllm.sampling_params import SamplingParams -from vllm.sequence import (Logprob, Sequence, SequenceData, SequenceGroup, - SequenceGroupMetadata) - - -def create_dummy_prompt( - request_id: str, - prompt_length: int = -1, - block_size: Optional[int] = None, - lora_request: Optional[LoRARequest] = None, - prompt_tokens: Optional[list[int]] = None, - prompt_embeds: Optional[torch.Tensor] = None, - min_tokens: int = 0, - max_tokens: int = 16, -) -> tuple[Sequence, SequenceGroup]: - if not block_size: - block_size = prompt_length - - if prompt_tokens is None: - # Create dummy prompt sequence with tokens 0...block_size-1 - # and prompt "0 ... block_size". - prompt_tokens = list(range(prompt_length)) - - prompt_str = " ".join([str(t) for t in prompt_tokens]) - inputs = token_inputs( - prompt_token_ids=prompt_tokens, - prompt=prompt_str) if prompt_embeds is None else embeds_inputs( - prompt_embeds=prompt_embeds) - prompt = Sequence( - int(request_id), - inputs=inputs, - block_size=block_size, - ) - seq_group = SequenceGroup( - request_id=request_id, - seqs=[prompt], - arrival_time=time.time(), - sampling_params=SamplingParams(max_tokens=max_tokens, - min_tokens=min_tokens), - lora_request=lora_request, - ) - - return prompt, seq_group - - -def create_dummy_lora_sequence(request_id: int, token_ids: list[int], - block_size: int, lora_int_id: int) -> Sequence: - return Sequence(seq_id=request_id, - inputs=token_inputs(token_ids), - block_size=block_size, - lora_request=LoRARequest(lora_name="dummy", - lora_path="/dummy", - lora_int_id=lora_int_id)) - - -def create_dummy_sequence(request_id: int, token_ids: list[int], - block_size: int) -> Sequence: - return Sequence( - seq_id=request_id, - inputs=token_inputs(token_ids), - block_size=block_size, - ) - - -def create_dummy_prompt_encoder_decoder( - request_id: str, - decoder_prompt_length: int, - encoder_prompt_length: int, - block_size: Optional[int] = None, - lora_request: Optional[LoRARequest] = None, -) -> tuple[Sequence, Sequence, SequenceGroup]: - if not block_size: - block_size = decoder_prompt_length - - # Create dummy prompt sequence with tokens 0...block_size-1 - # and prompt "0 ... block_size". Note that the prompt string - # doesn't actually match the tokens - decoder_prompt_tokens = list(range(decoder_prompt_length)) - decoder_prompt_str = " ".join([str(t) for t in decoder_prompt_tokens]) - encoder_prompt_tokens = list(reversed(list(range(encoder_prompt_length)))) - encoder_prompt_str = " ".join([str(t) for t in encoder_prompt_tokens]) - - inputs: EncoderDecoderInputs = { - "decoder": token_inputs(decoder_prompt_tokens, - prompt=decoder_prompt_str), - "encoder": token_inputs(encoder_prompt_tokens, - prompt=encoder_prompt_str), - } - - decoder_prompt = Sequence(int(request_id), - inputs=inputs["decoder"], - block_size=block_size) - - encoder_prompt = Sequence(int(request_id), - inputs=inputs["encoder"], - block_size=block_size) - - seq_group = SequenceGroup(request_id=request_id, - seqs=[decoder_prompt], - arrival_time=time.time(), - lora_request=lora_request, - encoder_seq=encoder_prompt) - - return decoder_prompt, encoder_prompt, seq_group - - -def create_seq_group( - seq_prompt_len: int = 1024, - seq_output_lens: GenericSequence[int] = (128, ), - request_id: str = '0', - seq_id_start: int = 0, - sampling_params: Optional[SamplingParams] = None) -> SequenceGroup: - - assert len(seq_output_lens) > 0 - - if sampling_params is None: - sampling_params = SamplingParams() - - prompt_token_ids = [0] * seq_prompt_len - - seqs: list[Sequence] = [] - for seq_id_offset, output_len in enumerate(seq_output_lens): - seq = Sequence( - seq_id=seq_id_start + seq_id_offset, - inputs=token_inputs(prompt_token_ids), - block_size=16, - ) - - for i in range(output_len): - seq.append_token_id( - token_id=i, - logprobs={i: Logprob(0.0)}, - ) - seqs.append(seq) - - seq_group = SequenceGroup( - request_id=request_id, - seqs=seqs, - sampling_params=sampling_params, - arrival_time=time.time(), - ) - - return seq_group - - -def create_seq_group_encoder_decoder( - seq_prompt_len: int = 1024, - seq_output_lens: GenericSequence[int] = (128, ), - request_id: str = '0', - seq_id_start: int = 0, - sampling_params: Optional[SamplingParams] = None) -> SequenceGroup: - - assert len(seq_output_lens) > 0 - - if sampling_params is None: - sampling_params = SamplingParams() - - prompt_token_ids = [0] * seq_prompt_len - - inputs: EncoderDecoderInputs = { - "decoder": token_inputs(prompt_token_ids), - "encoder": token_inputs(prompt_token_ids), - } - - seqs = [] - for seq_id_offset, output_len in enumerate(seq_output_lens): - # Construct decoder input sequences - seq = Sequence( - seq_id=seq_id_start + seq_id_offset, - inputs=inputs["decoder"], - block_size=16, - ) - - for i in range(output_len): - seq.append_token_id( - token_id=i, - logprobs={i: Logprob(0.0)}, - ) - seqs.append(seq) - - # Encoder input sequence - encoder_seq = Sequence( - seq_id=seq_id_start + len(seq_output_lens), - inputs=inputs["encoder"], - block_size=16, - ) - - return SequenceGroup(request_id=request_id, - seqs=seqs, - sampling_params=sampling_params, - arrival_time=time.time(), - encoder_seq=encoder_seq) - - -def round_up_to_next_block(seq_len: int, block_size: int) -> int: - return (seq_len + block_size - 1) // block_size - - -# Helper functions for scheduler tests - - -def get_sequence_groups(scheduler_output): - return [s.seq_group for s in scheduler_output.scheduled_seq_groups] - - -def append_new_token(out, token_id: int): - seq_groups = get_sequence_groups(out) - for seq_group in seq_groups: - for seq in seq_group.get_seqs(): - seq.append_token_id(token_id, {token_id: Logprob(token_id)}) - - -def schedule_and_update_computed_tokens(scheduler): - metas, out, _ = scheduler.schedule() - for s in out.scheduled_seq_groups: - s.seq_group.update_num_computed_tokens(s.token_chunk_size) - return metas, out - - -def append_new_token_seq(seq: Sequence, token_id: int): - seq.append_token_id(token_id, {token_id: Logprob(token_id)}) - - -def append_new_token_seq_group(token_chunk_size, seq_group, token_id: int): - seq_group.update_num_computed_tokens(token_chunk_size) - for seq in seq_group.get_seqs(): - seq.append_token_id(token_id, {token_id: Logprob(token_id)}) - - -class SchedulerProxy: - """ - A proxy class to forward calls to the scheduler. - """ - - def __init__(self, scheduler: Scheduler): - self.scheduler_ = scheduler - self.call_history: dict[str, list[Any]] = defaultdict(list) - - def __getattr__(self, name: str) -> Any: - - def wrapper(*args, **kwargs): - result = getattr(self.scheduler_, name)(*args, **kwargs) - self.call_history[name].append((args, kwargs, result)) - return result - - return wrapper - - def last_schedule_ret( - self, ) -> tuple[list[SequenceGroupMetadata], SchedulerOutputs, Any]: - _, _, ret = self.call_history["schedule"][-1] - return ret - - -def create_seq_group_metadata_from_prompts( - prompts: list[list[int]], - num_gpu_blocks: int, - block_size: int, - final_prompt_lens: list[int], - continuations: Optional[list[list[int]]] = None, - seq_ids: Optional[list[int]] = None, -) -> list[SequenceGroupMetadata]: - - if continuations is None: - continuations = [[] for _ in prompts] - - if seq_ids is None: - seq_ids = list(i for i, _ in enumerate(prompts)) - - free_gpu_blocks = list(range(num_gpu_blocks)) - - block_allocations = { - i: [ - free_gpu_blocks.pop() - for _ in range(round_up_to_next_block(final_len, block_size)) - ] - for i, final_len in enumerate(final_prompt_lens) - } - - seq_grou_metadata_list = [] - for i, (prompt_token_ids, - cont_token_ids) in enumerate(zip(prompts, continuations)): - data = SequenceData.from_seqs(prompt_token_ids, cont_token_ids) - data.update_num_computed_tokens( - len(prompt_token_ids) + len(cont_token_ids) - 1) - seq_data = {i: data} - seq_grou_metadata_list.append( - SequenceGroupMetadata( - request_id=str(i), - is_prompt=len(cont_token_ids) == 0, - seq_data=seq_data, - sampling_params=SamplingParams(temperature=0.0), - block_tables={i: block_allocations[i][:]}, - )) - return seq_grou_metadata_list - - -def create_chunked_seq_group_metadata_from_prompt( - prompt: list[int], - num_gpu_blocks: int, - chunk_size: int, - block_size: int, - seq_id: Optional[int] = None) -> list[SequenceGroupMetadata]: - - if seq_id is None: - seq_id = 0 - - free_gpu_blocks = list(range(num_gpu_blocks)) - - block_allocations = [ - free_gpu_blocks.pop() - for _ in range(round_up_to_next_block(len(prompt), block_size)) - ] - - seq_group_metadata_list = [] - for i, idx in enumerate(range(0, len(prompt), chunk_size)): - chunk_ids = prompt[idx:idx + chunk_size] - data = SequenceData.from_seqs(prompt) - data.update_num_computed_tokens(idx) - seq_data = {i: data} - seq_group_metadata_list.append( - SequenceGroupMetadata( - request_id=str(seq_id), - is_prompt=True, - do_sample=idx + chunk_size >= len(prompt), # terminal chunk - seq_data=seq_data, - sampling_params=SamplingParams(temperature=0.0), - block_tables={i: block_allocations}, - token_chunk_size=len(chunk_ids))) - return seq_group_metadata_list - - -def create_batch(batch_size, - k, - prompt_len: Union[int, list[int]] = 10, - prev_output_token_len: int = 10, - seq_ids: Optional[list[int]] = None, - num_gpu_blocks: Optional[int] = None, - block_size: Optional[int] = None, - prefill_chunk_size: Optional[int] = None): - if block_size is None: - block_size = 8 - - if num_gpu_blocks is None: - num_gpu_blocks = 2048 // block_size - - iterator = count() - - if isinstance(prompt_len, int): - prompt_lens = [prompt_len for _ in range(batch_size)] - else: - prompt_lens = prompt_len - - prompts = [[next(iterator) for _ in range(p_len)] for p_len in prompt_lens] - - if prefill_chunk_size: - # Create a batch of chunked prompts. - if not seq_ids: - seq_ids = list(range(len(prompts))) - seq_group_metadata_list = [] - for p, sid in zip(prompts, seq_ids): - seq_group_metadata_list += \ - create_chunked_seq_group_metadata_from_prompt( - p, num_gpu_blocks, prefill_chunk_size, block_size, sid) - seq_group_metadata_list = seq_group_metadata_list[:batch_size] - prev_output_tokens = [] - else: - prev_output_tokens = [[ - next(iterator) for _ in range(prev_output_token_len) - ] for _ in range(batch_size)] - final_prompt_lens = [ - len(prompt) + len(prev_output_token) + k + 1 - for prompt, prev_output_token in zip(prompts, prev_output_tokens) - ] - - seq_group_metadata_list = create_seq_group_metadata_from_prompts( - prompts, num_gpu_blocks, block_size, final_prompt_lens, - prev_output_tokens, seq_ids) - return seq_group_metadata_list, prompts, prev_output_tokens From 4aa8c7b0477de2cd0f6f1a46437f46e4cb00bae3 Mon Sep 17 00:00:00 2001 From: Simon Mo Date: Wed, 17 Sep 2025 09:46:29 -0700 Subject: [PATCH 051/518] cleanup: remove adapter commons (#25045) Signed-off-by: Jee Jee Li Co-authored-by: Jee Jee Li --- pyproject.toml | 1 - vllm/adapter_commons/__init__.py | 0 vllm/adapter_commons/layers.py | 16 ---- vllm/adapter_commons/models.py | 106 ------------------------- vllm/adapter_commons/request.py | 26 ------ vllm/adapter_commons/utils.py | 93 ---------------------- vllm/adapter_commons/worker_manager.py | 39 --------- vllm/lora/layers/utils.py | 11 ++- vllm/lora/models.py | 77 ++++++++++++------ vllm/lora/request.py | 6 +- vllm/lora/worker_manager.py | 44 ++++++---- 11 files changed, 89 insertions(+), 330 deletions(-) delete mode 100644 vllm/adapter_commons/__init__.py delete mode 100644 vllm/adapter_commons/layers.py delete mode 100644 vllm/adapter_commons/models.py delete mode 100644 vllm/adapter_commons/request.py delete mode 100644 vllm/adapter_commons/utils.py delete mode 100644 vllm/adapter_commons/worker_manager.py diff --git a/pyproject.toml b/pyproject.toml index f5a44f56f416..fe55461db00b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -115,7 +115,6 @@ follow_imports = "silent" # move the directory here and remove it from tools/mypy.sh files = [ "vllm/*.py", - "vllm/adapter_commons", "vllm/assets", "vllm/entrypoints", "vllm/core", diff --git a/vllm/adapter_commons/__init__.py b/vllm/adapter_commons/__init__.py deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/vllm/adapter_commons/layers.py b/vllm/adapter_commons/layers.py deleted file mode 100644 index 9753a0880656..000000000000 --- a/vllm/adapter_commons/layers.py +++ /dev/null @@ -1,16 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project - -from dataclasses import dataclass - - -@dataclass -class AdapterMapping: - # Per every token in input_ids: - index_mapping: tuple[int, ...] - # Per sampled token: - prompt_mapping: tuple[int, ...] - - def __post_init__(self): - self.index_mapping = tuple(self.index_mapping) - self.prompt_mapping = tuple(self.prompt_mapping) \ No newline at end of file diff --git a/vllm/adapter_commons/models.py b/vllm/adapter_commons/models.py deleted file mode 100644 index 7b685880a9e6..000000000000 --- a/vllm/adapter_commons/models.py +++ /dev/null @@ -1,106 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project - -from abc import ABC, abstractmethod -from typing import Any, Callable, Optional, TypeVar - -from torch import nn - -from vllm.logger import init_logger -from vllm.utils import LRUCache - -logger = init_logger(__name__) - - -class AdapterModel(ABC): - - def __init__(self, model_id=None): - self.id = model_id - - @abstractmethod - def from_local_checkpoint(cls, model_dir, model_id=None, **kwargs): - # Common initialization code - # Load weights or embeddings from local checkpoint - raise NotImplementedError("Subclasses must implement this method.") - - -T = TypeVar('T') - - -class AdapterLRUCache(LRUCache[int, T]): - - def __init__(self, capacity: int, deactivate_fn: Callable[[int], object]): - super().__init__(capacity) - self.deactivate_fn = deactivate_fn - - def _on_remove(self, key: int, value: Optional[T]): - logger.debug("Removing adapter int id: %d", key) - self.deactivate_fn(key) - return super()._on_remove(key, value) - - -class AdapterModelManager(ABC): - - def __init__( - self, - model: nn.Module, - ): - """Create a AdapterModelManager and adapter for a given model. - Args: - model: the model to be adapted. - """ - self.model: nn.Module = model - self._registered_adapters: dict[int, Any] = {} - # Dict instead of a Set for compatibility with LRUCache. - self._active_adapters: dict[int, None] = {} - self.adapter_type = 'Adapter' - self._last_mapping = None - - def __len__(self) -> int: - return len(self._registered_adapters) - - @property - @abstractmethod - def adapter_slots(self) -> int: - raise NotImplementedError - - @property - @abstractmethod - def capacity(self) -> int: - raise NotImplementedError - - @abstractmethod - def activate_adapter(self, adapter_id: int) -> bool: - raise NotImplementedError - - @abstractmethod - def deactivate_adapter(self, adapter_id: int) -> bool: - raise NotImplementedError - - @abstractmethod - def add_adapter(self, adapter: Any) -> bool: - raise NotImplementedError - - @abstractmethod - def set_adapter_mapping(self, mapping: Any) -> None: - raise NotImplementedError - - @abstractmethod - def remove_adapter(self, adapter_id: int) -> bool: - raise NotImplementedError - - @abstractmethod - def remove_all_adapters(self) -> None: - raise NotImplementedError - - @abstractmethod - def get_adapter(self, adapter_id: int) -> Optional[Any]: - raise NotImplementedError - - @abstractmethod - def list_adapters(self) -> dict[int, Any]: - raise NotImplementedError - - @abstractmethod - def pin_adapter(self, adapter_id: int) -> bool: - raise NotImplementedError diff --git a/vllm/adapter_commons/request.py b/vllm/adapter_commons/request.py deleted file mode 100644 index 8135b54ba19f..000000000000 --- a/vllm/adapter_commons/request.py +++ /dev/null @@ -1,26 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project - -from abc import ABC, abstractmethod - - -class AdapterRequest(ABC): - """ - Base class for adapter requests. - """ - - @property - @abstractmethod - def adapter_id(self) -> int: - raise NotImplementedError - - def __post_init__(self) -> None: - if self.adapter_id < 1: - raise ValueError(f"id must be > 0, got {self.adapter_id}") - - def __eq__(self, value: object) -> bool: - return isinstance( - value, self.__class__) and self.adapter_id == value.adapter_id - - def __hash__(self) -> int: - return hash(self.adapter_id) diff --git a/vllm/adapter_commons/utils.py b/vllm/adapter_commons/utils.py deleted file mode 100644 index a1a56b6bbd4b..000000000000 --- a/vllm/adapter_commons/utils.py +++ /dev/null @@ -1,93 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project - -from typing import Any, Callable, Optional - - -## model functions -def deactivate_adapter(adapter_id: int, active_adapters: dict[int, None], - deactivate_func: Callable) -> bool: - if adapter_id in active_adapters: - deactivate_func(adapter_id) - active_adapters.pop(adapter_id) - return True - return False - - -def add_adapter(adapter: Any, registered_adapters: dict[int, Any], - capacity: int, add_func: Callable) -> bool: - if adapter.id not in registered_adapters: - if len(registered_adapters) >= capacity: - raise RuntimeError('No free adapter slots.') - add_func(adapter) - registered_adapters[adapter.id] = adapter - return True - return False - - -def set_adapter_mapping(mapping: Any, last_mapping: Any, - set_mapping_func: Callable) -> Any: - if last_mapping != mapping: - set_mapping_func(mapping) - return mapping - return last_mapping - - -def remove_adapter(adapter_id: int, registered_adapters: dict[int, Any], - deactivate_func: Callable) -> bool: - deactivate_func(adapter_id) - return bool(registered_adapters.pop(adapter_id, None)) - - -def list_adapters(registered_adapters: dict[int, Any]) -> dict[int, Any]: - return dict(registered_adapters) - - -def get_adapter(adapter_id: int, - registered_adapters: dict[int, Any]) -> Optional[Any]: - return registered_adapters.get(adapter_id) - - -## worker functions -def set_active_adapters_worker(requests: set[Any], mapping: Optional[Any], - apply_adapters_func, - set_adapter_mapping_func) -> None: - apply_adapters_func(requests) - set_adapter_mapping_func(mapping) - - -def add_adapter_worker(adapter_request: Any, list_adapters_func, - load_adapter_func, add_adapter_func, - activate_adapter_func) -> bool: - if adapter_request.adapter_id in list_adapters_func(): - return False - loaded_adapter = load_adapter_func(adapter_request) - loaded = add_adapter_func(loaded_adapter) - activate_adapter_func(loaded_adapter.id) - return loaded - - -def apply_adapters_worker(adapter_requests: set[Any], list_adapters_func, - adapter_slots: int, remove_adapter_func, - add_adapter_func) -> None: - models_that_exist = list_adapters_func() - models_map = { - adapter_request.adapter_id: adapter_request - for adapter_request in adapter_requests if adapter_request - } - if len(models_map) > adapter_slots: - raise RuntimeError( - f"Number of requested models ({len(models_map)}) is greater " - f"than the number of GPU model slots " - f"({adapter_slots}).") - new_models = set(models_map) - models_to_add = new_models - models_that_exist - models_to_remove = models_that_exist - new_models - for adapter_id in models_to_remove: - remove_adapter_func(adapter_id) - for adapter_id in models_to_add: - add_adapter_func(models_map[adapter_id]) - - -def list_adapters_worker(adapter_manager_list_adapters_func) -> set[int]: - return set(adapter_manager_list_adapters_func()) diff --git a/vllm/adapter_commons/worker_manager.py b/vllm/adapter_commons/worker_manager.py deleted file mode 100644 index 07e85d138ac5..000000000000 --- a/vllm/adapter_commons/worker_manager.py +++ /dev/null @@ -1,39 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project - -from abc import ABC, abstractmethod -from typing import Any, Optional - -import torch - - -class AbstractWorkerManager(ABC): - - def __init__(self, device: torch.device): - self.device = device - - @property - @abstractmethod - def is_enabled(self) -> bool: - raise NotImplementedError - - @abstractmethod - def set_active_adapters(self, requests: set[Any], - mapping: Optional[Any]) -> None: - raise NotImplementedError - - @abstractmethod - def add_adapter(self, adapter_request: Any) -> bool: - raise NotImplementedError - - @abstractmethod - def remove_adapter(self, adapter_id: int) -> bool: - raise NotImplementedError - - @abstractmethod - def remove_all_adapters(self) -> None: - raise NotImplementedError - - @abstractmethod - def list_adapters(self) -> set[int]: - raise NotImplementedError diff --git a/vllm/lora/layers/utils.py b/vllm/lora/layers/utils.py index 27dcd720fbde..772d32a44c22 100644 --- a/vllm/lora/layers/utils.py +++ b/vllm/lora/layers/utils.py @@ -1,17 +1,22 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project + from dataclasses import dataclass import torch import torch.nn as nn -from vllm.adapter_commons.layers import AdapterMapping - @dataclass -class LoRAMapping(AdapterMapping): +class LoRAMapping: + index_mapping: tuple[int, ...] + prompt_mapping: tuple[int, ...] is_prefill: bool = False + def __post_init__(self): + self.index_mapping = tuple(self.index_mapping) + self.prompt_mapping = tuple(self.prompt_mapping) + def _get_lora_device(base_layer: nn.Module) -> torch.device: # code borrowed from https://github.com/fmmoret/vllm/blob/fm-support-lora-on-quantized-models/vllm/lora/layers.py#L34 diff --git a/vllm/lora/models.py b/vllm/lora/models.py index 771243805491..25f90f2fa932 100644 --- a/vllm/lora/models.py +++ b/vllm/lora/models.py @@ -4,18 +4,13 @@ import math import os from collections.abc import Sequence -from typing import Any, Callable, Optional, Union +from typing import Callable, Optional, TypeVar, Union import regex as re import safetensors.torch import torch from torch import nn -from vllm.adapter_commons.models import (AdapterLRUCache, AdapterModel, - AdapterModelManager) -from vllm.adapter_commons.utils import (add_adapter, deactivate_adapter, - get_adapter, list_adapters, - remove_adapter, set_adapter_mapping) from vllm.config.lora import LoRAConfig from vllm.logger import init_logger from vllm.lora.layers import BaseLayerWithLoRA, LoRAMapping @@ -33,10 +28,25 @@ from vllm.model_executor.models.module_mapping import MultiModelKeys from vllm.model_executor.models.utils import PPMissingLayer, WeightsMapper from vllm.model_executor.utils import get_packed_modules_mapping -from vllm.utils import is_pin_memory_available +from vllm.utils import LRUCache, is_pin_memory_available logger = init_logger(__name__) +T = TypeVar("T") + + +class AdapterLRUCache(LRUCache[int, T]): + + def __init__(self, capacity: int, deactivate_fn: Callable[[int], object]): + super().__init__(capacity) + self.deactivate_fn = deactivate_fn + + def _on_remove(self, key: int, value: Optional[T]): + logger.debug("Removing adapter int id: %d", key) + self.deactivate_fn(key) + return super()._on_remove(key, value) + + _GLOBAL_LORA_ID = 0 @@ -57,7 +67,7 @@ def is_moe_model(model: nn.Module) -> bool: return False -class LoRAModel(AdapterModel): +class LoRAModel: """A LoRA fine-tuned model.""" def __init__( @@ -313,7 +323,7 @@ def check_unexpected_modules(modules: dict): weights_mapper=weights_mapper) -class LoRAModelManager(AdapterModelManager): +class LoRAModelManager: """A manager that manages multiple LoRA-fine-tuned models.""" def __init__( @@ -336,6 +346,11 @@ def __init__( vocab_size: the vocab size of the model. lora_config: the LoRA configuration. """ + self.model: SupportsLoRA = model + self._registered_adapters: dict[int, LoRAModel] = {} + # Dict instead of a set for compatibility with LRUCache. + self._active_adapters: dict[int, None] = {} + self.adapter_type = "LoRA" self.lora_config = lora_config self.device = device self.max_num_seqs = max_num_seqs @@ -347,9 +362,8 @@ def __init__( max_num_batched_tokens, max_batches=self.max_num_seqs, device=self.device, - max_loras=self.lora_config.max_loras) - - super().__init__(model) + max_loras=self.lora_config.max_loras, + ) self.supported_lora_modules = get_supported_lora_modules(self.model) assert self.supported_lora_modules, "No supported LoRA modules found in" @@ -370,7 +384,9 @@ def __init__( self._last_mapping: Optional[LoRAMapping] = None self._create_lora_modules() self.model.lora_manager = self - self.adapter_type = 'LoRA' + + def __len__(self) -> int: + return len(self._registered_adapters) @property def capacity(self) -> int: @@ -669,28 +685,39 @@ def _get_lora_layer_weights( return lora_model.get_lora(org_module_name) def deactivate_adapter(self, adapter_id: int) -> bool: - return deactivate_adapter(adapter_id, self._active_adapters, - self._deactivate_adapter) + if adapter_id not in self._active_adapters: + return False + self._deactivate_adapter(adapter_id) + self._active_adapters.pop(adapter_id, None) + return True def add_adapter(self, adapter: LoRAModel) -> bool: logger.debug("Adding lora. Model id: %d, " "int id: %d", adapter.id, adapter.id) - return add_adapter(adapter, self._registered_adapters, self.capacity, - self._add_adapter) + if adapter.id in self._registered_adapters: + return False + if len(self._registered_adapters) >= self.capacity: + raise RuntimeError("No free adapter slots.") + self._add_adapter(adapter) + return True def set_adapter_mapping(self, mapping: LoRAMapping) -> None: - self._last_mapping = set_adapter_mapping(mapping, self._last_mapping, - self._set_adapter_mapping) + if self._last_mapping != mapping: + self._set_adapter_mapping(mapping) + self._last_mapping = mapping def remove_adapter(self, adapter_id: int) -> bool: - return remove_adapter(adapter_id, self._registered_adapters, - self.deactivate_adapter) + self.deactivate_adapter(adapter_id) + if adapter_id not in self._registered_adapters: + return False + self._registered_adapters.pop(adapter_id, None) + return True - def list_adapters(self) -> dict[int, Any]: - return list_adapters(self._registered_adapters) + def list_adapters(self) -> dict[int, LoRAModel]: + return dict(self._registered_adapters) - def get_adapter(self, adapter_id: int) -> Optional[Any]: - return get_adapter(adapter_id, self._registered_adapters) + def get_adapter(self, adapter_id: int) -> Optional[LoRAModel]: + return self._registered_adapters.get(adapter_id) class LoRALRUCache(AdapterLRUCache[LoRAModel]): diff --git a/vllm/lora/request.py b/vllm/lora/request.py index 5bbba7830c1b..523525d46f0b 100644 --- a/vllm/lora/request.py +++ b/vllm/lora/request.py @@ -6,8 +6,6 @@ import msgspec -from vllm.adapter_commons.request import AdapterRequest - class LoRARequest( msgspec.Struct, @@ -24,8 +22,6 @@ class LoRARequest( lora_int_id must be globally unique for a given adapter. This is currently not enforced in vLLM. """ - __metaclass__ = AdapterRequest - lora_name: str lora_int_id: int lora_path: str = "" @@ -35,6 +31,8 @@ class LoRARequest( tensorizer_config_dict: Optional[dict] = None def __post_init__(self): + if self.lora_int_id < 1: + raise ValueError(f"id must be > 0, got {self.lora_int_id}") if self.lora_local_path: warnings.warn( "The 'lora_local_path' attribute is deprecated " diff --git a/vllm/lora/worker_manager.py b/vllm/lora/worker_manager.py index 3a807b1e161d..e27b7d5fcf22 100644 --- a/vllm/lora/worker_manager.py +++ b/vllm/lora/worker_manager.py @@ -6,11 +6,6 @@ import torch -from vllm.adapter_commons.utils import (add_adapter_worker, - apply_adapters_worker, - list_adapters_worker, - set_active_adapters_worker) -from vllm.adapter_commons.worker_manager import AbstractWorkerManager from vllm.config.lora import LoRAConfig from vllm.logger import init_logger from vllm.lora.models import (LoRAModel, LoRAModelManager, @@ -22,7 +17,7 @@ logger = init_logger(__name__) -class WorkerLoRAManager(AbstractWorkerManager): +class WorkerLoRAManager: """WorkerLoRAManager that manages LoRA models on the worker side. Every request, the requested LoRAs will be loaded (unless they are already @@ -51,7 +46,7 @@ def __init__( self.vocab_size = vocab_size self.lora_config = lora_config self.max_position_embeddings = max_position_embeddings - super().__init__(device) + self.device = device # Lazily initialized by create_lora_manager. self._adapter_manager: LoRAModelManager @@ -164,19 +159,34 @@ def pin_adapter(self, adapter_id: int) -> bool: def set_active_adapters(self, requests: set[Any], mapping: Optional[Any]) -> None: - set_active_adapters_worker(requests, mapping, self._apply_adapters, - self._adapter_manager.set_adapter_mapping) + self._apply_adapters(requests) + if mapping is not None: + self._adapter_manager.set_adapter_mapping(mapping) def _apply_adapters(self, adapter_requests: set[Any]) -> None: - apply_adapters_worker(adapter_requests, self.list_adapters, - self._adapter_manager.adapter_slots, - self.remove_adapter, self.add_adapter) + existing_adapters = self.list_adapters() + models_map = { + adapter_request.adapter_id: adapter_request + for adapter_request in adapter_requests if adapter_request + } + if len(models_map) > self._adapter_manager.adapter_slots: + raise RuntimeError( + f"Number of requested models ({len(models_map)}) is greater " + "than the number of GPU model slots " + f"({self._adapter_manager.adapter_slots}).") + requested_ids = set(models_map) + for adapter_id in existing_adapters - requested_ids: + self.remove_adapter(adapter_id) + for adapter_id in requested_ids - existing_adapters: + self.add_adapter(models_map[adapter_id]) def add_adapter(self, adapter_request: Any) -> bool: - return add_adapter_worker(adapter_request, self.list_adapters, - self._load_adapter, - self._adapter_manager.add_adapter, - self._adapter_manager.activate_adapter) + if adapter_request.adapter_id in self.list_adapters(): + return False + loaded_adapter = self._load_adapter(adapter_request) + loaded = self._adapter_manager.add_adapter(loaded_adapter) + self._adapter_manager.activate_adapter(loaded_adapter.id) + return loaded def remove_adapter(self, adapter_id: int) -> bool: return self._adapter_manager.remove_adapter(adapter_id) @@ -185,7 +195,7 @@ def remove_all_adapters(self): self._adapter_manager.remove_all_adapters() def list_adapters(self) -> set[int]: - return list_adapters_worker(self._adapter_manager.list_adapters) + return set(self._adapter_manager.list_adapters()) class LRUCacheWorkerLoRAManager(WorkerLoRAManager): From d6a518fdde9780f5c9aabe8cf1f2fafd29af3cbc Mon Sep 17 00:00:00 2001 From: Simon Mo Date: Wed, 17 Sep 2025 09:47:40 -0700 Subject: [PATCH 052/518] Remove unused find_cuda_init helper script (#25044) --- find_cuda_init.py | 36 ------------------------------------ 1 file changed, 36 deletions(-) delete mode 100644 find_cuda_init.py diff --git a/find_cuda_init.py b/find_cuda_init.py deleted file mode 100644 index 308fc6fc2d61..000000000000 --- a/find_cuda_init.py +++ /dev/null @@ -1,36 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project - -import importlib -import traceback -from typing import Callable -from unittest.mock import patch - - -def find_cuda_init(fn: Callable[[], object]) -> None: - """ - Helper function to debug CUDA re-initialization errors. - - If `fn` initializes CUDA, prints the stack trace of how this happens. - """ - from torch.cuda import _lazy_init - - stack = None - - def wrapper(): - nonlocal stack - stack = traceback.extract_stack() - return _lazy_init() - - with patch("torch.cuda._lazy_init", wrapper): - fn() - - if stack is not None: - print("==== CUDA Initialized ====") - print("".join(traceback.format_list(stack)).strip()) - print("==========================") - - -if __name__ == "__main__": - find_cuda_init( - lambda: importlib.import_module("vllm.model_executor.models.llava")) From 99cc41ad50c08e745571abe568226f9fcae61ccd Mon Sep 17 00:00:00 2001 From: Woosuk Kwon Date: Wed, 17 Sep 2025 09:50:07 -0700 Subject: [PATCH 053/518] [V0 Deprecation] Remove unused output processor util (#25023) Signed-off-by: Woosuk Kwon --- vllm/engine/output_processor/util.py | 28 ---------------------------- 1 file changed, 28 deletions(-) delete mode 100644 vllm/engine/output_processor/util.py diff --git a/vllm/engine/output_processor/util.py b/vllm/engine/output_processor/util.py deleted file mode 100644 index 1e127eb98242..000000000000 --- a/vllm/engine/output_processor/util.py +++ /dev/null @@ -1,28 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project - -from typing import List -from typing import Sequence as GenericSequence -from typing import cast - -from vllm.model_executor.layers.sampler import SamplerOutput -from vllm.sequence import CompletionSequenceGroupOutput, SequenceGroupOutput - - -def create_output_by_sequence_group( - outputs: GenericSequence[SamplerOutput], - num_seq_groups: int) -> List[List[SequenceGroupOutput]]: - """Helper method which transforms a 2d list organized by - [step][sequence group] into [sequence group][step]. - """ - output_by_sequence_group: List[List[CompletionSequenceGroupOutput]] = [ - [] for _ in range(num_seq_groups) - ] - for step in outputs: - sequence_group_output: CompletionSequenceGroupOutput - for i, sequence_group_output in enumerate(step): - output_by_sequence_group[i].append(sequence_group_output) - - # Cast to the more generic type that CompletionSequenceGroupOutput - # inherits from. - return cast(List[List[SequenceGroupOutput]], output_by_sequence_group) From 8b32464ac13fcafe32bebb2fb78447a3e762bb16 Mon Sep 17 00:00:00 2001 From: Michael Goin Date: Wed, 17 Sep 2025 13:21:28 -0400 Subject: [PATCH 054/518] Change log level from info to debug for IOProcessor (#24999) Signed-off-by: Michael Goin --- vllm/plugins/io_processors/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/plugins/io_processors/__init__.py b/vllm/plugins/io_processors/__init__.py index c5c4f6f8d97c..3b17211b1b83 100644 --- a/vllm/plugins/io_processors/__init__.py +++ b/vllm/plugins/io_processors/__init__.py @@ -33,7 +33,7 @@ def get_io_processor( model_plugin = config_plugin if model_plugin is None: - logger.info("No IOProcessor plugins requested by the model") + logger.debug("No IOProcessor plugins requested by the model") return None logger.debug("IOProcessor plugin to be loaded %s", model_plugin) From eb68c2dcd972fdeca7908268e4ba35c77a699f82 Mon Sep 17 00:00:00 2001 From: Woosuk Kwon Date: Wed, 17 Sep 2025 11:03:16 -0700 Subject: [PATCH 055/518] [CI] Revert back prepare_prompts and check_answers (#25087) Signed-off-by: Woosuk Kwon --- tests/models/test_transformers.py | 3 +- tests/utils.py | 47 +++++++++++++++++++ .../v1/e2e/test_correctness_sliding_window.py | 3 +- 3 files changed, 49 insertions(+), 4 deletions(-) diff --git a/tests/models/test_transformers.py b/tests/models/test_transformers.py index 66ff8f7a54d3..ba9c3bebc437 100644 --- a/tests/models/test_transformers.py +++ b/tests/models/test_transformers.py @@ -8,8 +8,7 @@ from vllm.platforms import current_platform from ..conftest import HfRunner, VllmRunner -from ..core.block.e2e.test_correctness_sliding_window import prep_prompts -from ..utils import multi_gpu_test +from ..utils import multi_gpu_test, prep_prompts from .utils import check_logprobs_close diff --git a/tests/utils.py b/tests/utils.py index 16e1e6039329..9a27c3de4533 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -8,6 +8,7 @@ import importlib import json import os +import random import signal import subprocess import sys @@ -1150,3 +1151,49 @@ def override_cutlass_fp8_supported(value: bool): "vllm.model_executor.layers.quantization.utils.w8a8_utils.cutlass_fp8_supported", return_value=value): yield + + +def prep_prompts(batch_size: int, ln_range: tuple[int, int] = (800, 1100)): + """ + Generate prompts which a bunch of assignments, + then asking for the value of one of them. + The prompt is just under 10k tokens; sliding window is 4k + so the answer is outside sliding window, but should still be correct. + Args: + batch_size: number of prompts to generate + ln_range: an argument to control the length of the prompt + """ + prompts: list[str] = [] + answer: list[int] = [] + indices: list[int] = [] + random.seed(1) + for _ in range(batch_size): + idx = random.randint(30, 90) + indices.append(idx) + prompt = "```python\n# We set a number of variables, " + \ + f"x{idx} will be important later\n" + ln = random.randint(*ln_range) + for k in range(30, ln): + v = random.randint(10, 99) + if k == idx: + answer.append(v) + prompt += f"x{k} = {v}\n" + prompt += f"# Now, we check the value of x{idx}:\n" + prompt += f"assert x{idx} == " + prompts.append(prompt) + return prompts, answer, indices + + +def check_answers(indices: list[int], + answer: list[int], + outputs: list[str], + accept_rate: float = 0.7): + answer2 = [int(text[0:2].strip()) for text in outputs] + print(list(zip(indices, zip(answer, answer2)))) + numok = 0 + for a1, a2 in zip(answer, answer2): + if a1 == a2: + numok += 1 + frac_ok = numok / len(answer) + print(f"Num OK: {numok}/{len(answer)} {frac_ok}") + assert frac_ok >= accept_rate diff --git a/tests/v1/e2e/test_correctness_sliding_window.py b/tests/v1/e2e/test_correctness_sliding_window.py index 4dfe1d3bb33f..5b0c15472251 100644 --- a/tests/v1/e2e/test_correctness_sliding_window.py +++ b/tests/v1/e2e/test_correctness_sliding_window.py @@ -6,8 +6,7 @@ from vllm import LLM, SamplingParams -from ...core.block.e2e.test_correctness_sliding_window import (check_answers, - prep_prompts) +from ...utils import check_answers, prep_prompts @dataclass From 9d442b7c48288d6a65cbaca1bba10392523fe94d Mon Sep 17 00:00:00 2001 From: Woosuk Kwon Date: Wed, 17 Sep 2025 11:08:45 -0700 Subject: [PATCH 056/518] [V0 Deprecation] Remove V0 tests in test_sequence.py (#25088) Signed-off-by: Woosuk Kwon --- tests/test_sequence.py | 97 +----------------------------------------- 1 file changed, 1 insertion(+), 96 deletions(-) diff --git a/tests/test_sequence.py b/tests/test_sequence.py index 1b019be9e56d..da9826ff0505 100644 --- a/tests/test_sequence.py +++ b/tests/test_sequence.py @@ -1,104 +1,9 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -import pytest import torch -from vllm.model_executor.layers.sampler import SamplerOutput -from vllm.sequence import (CompletionSequenceGroupOutput, IntermediateTensors, - SequenceData, SequenceOutput) - -from .core.utils import create_dummy_prompt - - -@pytest.fixture -def sample_outputs(): - return [ - CompletionSequenceGroupOutput(samples=[ - SequenceOutput(parent_seq_id=0, output_token=i, logprobs={}) - ], - prompt_logprobs=None) for i in range(5) - ] - - -@pytest.fixture -def sampler_output(sample_outputs): - return SamplerOutput(outputs=sample_outputs) - - -def test_sampler_output_initialization(sampler_output, sample_outputs): - assert len(sampler_output) == len(sample_outputs) - assert sampler_output.sampled_token_probs is None - assert sampler_output.sampled_token_ids is None - - -def test_sampler_output_getitem(sampler_output, sample_outputs): - assert sampler_output[2] == sample_outputs[2] - - -def test_sampler_output_setitem(sampler_output): - new_output = CompletionSequenceGroupOutput(samples=[ - SequenceOutput(parent_seq_id=0, output_token=99, logprobs={}) - ], - prompt_logprobs=None) - sampler_output[2] = new_output - assert sampler_output[2] == new_output - - -def test_sampler_output_len(sampler_output, sample_outputs): - assert len(sampler_output) == len(sample_outputs) - - -def test_sampler_output_eq(sample_outputs): - sampler_output1 = SamplerOutput(outputs=sample_outputs) - sampler_output2 = SamplerOutput(outputs=sample_outputs.copy()) - sampler_output3 = SamplerOutput(outputs=sample_outputs[:-1]) - assert sampler_output1 == sampler_output2 - assert sampler_output1 != sampler_output3 - - -def test_sequence_data_prefill(): - seq_data = SequenceData.from_seqs([1, 2, 3, 4]) - assert seq_data.get_num_uncomputed_tokens() == 4 - assert seq_data.get_num_computed_tokens() == 0 - # advance by 2 - seq_data.update_num_computed_tokens(2) - assert seq_data.get_num_uncomputed_tokens() == 2 - assert seq_data.get_num_computed_tokens() == 2 - - # advance by 1 - seq_data.update_num_computed_tokens(1) - assert seq_data.get_num_uncomputed_tokens() == 1 - assert seq_data.get_num_computed_tokens() == 3 - - # append tokens and reset, simulating recompute - seq_data.append_token_id(1, logprob=0.0) - seq_data.reset_state_for_recompute() - assert seq_data.get_num_uncomputed_tokens() == 5 - assert seq_data.get_num_computed_tokens() == 0 - - -def test_sequence_group_stage(): - _, seq_group = create_dummy_prompt("1", 12) - assert seq_group.is_prefill() is True - seq_group.update_num_computed_tokens(6) - assert seq_group.is_prefill() is True - seq_group.update_num_computed_tokens(5) - assert seq_group.is_prefill() is True - seq_group.update_num_computed_tokens(1) - assert seq_group.is_prefill() is False - seqs = seq_group.get_seqs() - assert len(seqs) == 1 - seqs[0].data.append_token_id(1, logprob=0.0) - for seq in seq_group.get_seqs(): - seq.reset_state_for_recompute() - assert seq_group.is_prefill() is True - seq_group.update_num_computed_tokens(5) - assert seq_group.is_prefill() is True - seq_group.update_num_computed_tokens(7) - assert seq_group.is_prefill() is True - seq_group.update_num_computed_tokens(1) - assert seq_group.is_prefill() is False +from vllm.sequence import IntermediateTensors def test_sequence_intermediate_tensors_equal(): From e3db5ebb66590031ecfd3338de41b6f1ee95bf2a Mon Sep 17 00:00:00 2001 From: Michael Goin Date: Wed, 17 Sep 2025 14:15:05 -0400 Subject: [PATCH 057/518] [CI Bugfix] Fix failing test_model_load_with_params tests due to tokenizer refactor (#25086) Signed-off-by: mgoin --- tests/model_executor/test_model_load_with_params.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/tests/model_executor/test_model_load_with_params.py b/tests/model_executor/test_model_load_with_params.py index 0ade75b7e622..c7b15c6ae118 100644 --- a/tests/model_executor/test_model_load_with_params.py +++ b/tests/model_executor/test_model_load_with_params.py @@ -47,8 +47,8 @@ def test_model_loading_with_params(vllm_runner, monkeypatch): assert model_config.pooler_config.normalize # asserts on the tokenizer loaded - assert model_tokenizer.tokenizer_id == "BAAI/bge-base-en-v1.5" - assert model_tokenizer.tokenizer.model_max_length == 512 + assert model_config.tokenizer == "BAAI/bge-base-en-v1.5" + assert model_tokenizer.model_max_length == 512 def check_model(model): assert isinstance(model, BertEmbeddingModel) @@ -87,8 +87,8 @@ def test_roberta_model_loading_with_params(vllm_runner, monkeypatch): assert model_config.pooler_config.normalize # asserts on the tokenizer loaded - assert model_tokenizer.tokenizer_id == "intfloat/multilingual-e5-base" - assert model_tokenizer.tokenizer.model_max_length == 512 + assert model_config.tokenizer == "intfloat/multilingual-e5-base" + assert model_tokenizer.model_max_length == 512 def check_model(model): assert isinstance(model, RobertaEmbeddingModel) @@ -116,8 +116,7 @@ def test_facebook_roberta_model_loading_with_params(vllm_runner, monkeypatch): output = vllm_model.embed("Write a short story about a robot that" " dreams for the first time.\n") - model_tokenizer = vllm_model.llm.llm_engine.tokenizer - assert model_tokenizer.tokenizer_id == model_name + assert vllm_model.llm.llm_engine.model_config.tokenizer == model_name def check_model(model): assert isinstance(model, RobertaEmbeddingModel) From 7ae9887542bf0fc45cc44de583290c41234a09c4 Mon Sep 17 00:00:00 2001 From: afeldman-nm <156691304+afeldman-nm@users.noreply.github.com> Date: Wed, 17 Sep 2025 14:53:12 -0400 Subject: [PATCH 058/518] [V1] Logits processor docs (#22919) Signed-off-by: Andrew Feldman Signed-off-by: afeldman-nm <156691304+afeldman-nm@users.noreply.github.com> Co-authored-by: Joseph Marinier --- docs/design/logits_processors.md | 559 ++++++++++++++++++ docs/features/custom_arguments.md | 46 ++ docs/features/custom_logitsprocs.md | 445 ++++++++++++++ .../logits_processor/custom.py | 10 +- tests/v1/logits_processors/utils.py | 7 +- vllm/v1/sample/logits_processor/interface.py | 6 +- vllm/v1/sample/logits_processor/state.py | 8 +- 7 files changed, 1065 insertions(+), 16 deletions(-) create mode 100644 docs/design/logits_processors.md create mode 100644 docs/features/custom_arguments.md create mode 100644 docs/features/custom_logitsprocs.md diff --git a/docs/design/logits_processors.md b/docs/design/logits_processors.md new file mode 100644 index 000000000000..20d78ca3aae2 --- /dev/null +++ b/docs/design/logits_processors.md @@ -0,0 +1,559 @@ +# Logits Processors + +!!! important + Some logits processors design changes are still in progress and the API may + change in the near future. We hope to stabilize this part of the API soon + +This document describes how the vLLM engine interacts with logits processors, and the programming model which vLLM supports for implementing logits processors. + +## Logits Processors Background + +A logits processor adjusts the next-token probability distribution, usually with the intention of steering the model towards a desired type of behavior. + +In vLLM, logits processors operate at batch granularity. During a given engine step, the logits processor consumes a `(num_requests) x (vocab_size)` tensor of raw logits output by the model. For all requests which enable the logits processor, the logits processor applies a transformation to the corresponding row of the logits tensor, while leaving other rows unmodified. The transformed logits tensor is then passed to softmax. + +## Logits Processors in the vLLM engine + +The vLLM engine's persistent batch data structure maintains a list of loaded logits processors. + +In order to operate on the entire batch at once, each logits processor may maintain metadata about the requests in the batch (i.e. each request's logits-processor-specific configuration settings). Therefore, logits processors are stateful. + +In each engine step, the vLLM engine will (1) update each logits processor's internal state and (2) apply logits processors to the model output logits. + +### Updating Logits Processor Internal State + +At the beginning of each engine step, the persistent batch may add, discard and/or reorder requests in response to the scheduler output. After the persistent batch has reorganized, the vLLM engine invokes each logits processor's `update_state()` method. This is necessary to ensure that logits processors' internal states are reorganized to match the new persistent batch state at the beginning of the engine step. + +The pseudocode below shows the process by which the vLLM persistent batch notifies each logits processor of changes in batch state: + +??? code "Model Runner Updates Logits Processor States" + + ``` python + # gpu_model_runner.py + + class GPUModelRunner(...): + + ... + + def execute_model(self, scheduler_output, ...): + self._update_states(scheduler_output) + + ... + + def _update_states(...): + + ... + + # ...update persistent batch to reflect new/finished requests & reordering + # of requests within batch... + + ... + + self.input_batch.refresh_metadata() + + + # gpu_input_batch.py + + class InputBatch: + + ... + + def refresh_metadata(self): + + ... + + # Update each logits processor's state to reflect persistent batch state + batch_update = self.batch_update_builder.get_and_reset(self.num_reqs) + for logit_proc in self.logitsprocs.all: + logit_proc.update_state(batch_update) + + ... + + + # vllm/v1/sample/logits_processor/interface.py + + @dataclass(frozen=True) + class BatchUpdate: + # Batch state-change data structure which is passed to logits processors' + # update_state() methods + + batch_size: int + + removed: Sequence[RemovedRequest] + added: Sequence[AddedRequest] + moved: Sequence[MovedRequest] + + ``` + +### Applying Logits Processors to the Model Output Logits + +After updating persistent batch state, the vLLM model runner performs model inference to obtain logits. Then, the model runner invokes the sampler against the logits. In turn, part of the sampler's operation is to invoke the logits processors' `apply()` methods against the model output logit processors, yielding transformed logits (the `apply()` methods may modify the logits in-place or out-of-place, although in-place is more memory-efficient). This process is shown in the pseudocode below. + +Note that the sampler will access the logits processors via `SamplingMetadata.logitsprocs`. When the vLLM engine constructs `SamplingMetadata` (not shown in the code below), the reference to the list of logits processors is passed from the persistent batch data structure to `SamplingMetadata`. + +??? code "Apply logits processors to model output logits" + + ``` python + # gpu_model_runner.py + + class GPUModelRunner(...): + + ... + + def execute_model(self, scheduler_output, ...): + # (discussed in previous section) + self._update_states(scheduler_output) + + ... + + # ...run model inference to obtain logits... + + ... + + # Invoke sampler, which applies logits processors + sampler_output = self.sampler(logits=logits, + sampling_metadata=sampling_metadata) + + ... + + + # sampler.py + + class Sampler(nn.Module): + + ... + + def forward(self, logits, sampling_metadata): + + ... + + # Apply non-argmax-invariant logits processors to model output logits + for processor in (sampling_metadata.logitsprocs.non_argmax_invariant): + logits = processor.apply(logits) + + sampled = self.sample(logits, sampling_metadata) + + ... + + # ...return sampler output data structure... + + + def sample(self, logits, sampling_metadta) + + ... + + # ...exit early if all requests are greedy-sampling... + + ... + + # Apply argmax-invariant logits processors + for processor in sampling_metadata.logitsprocs.argmax_invariant: + logits = processor.apply(logits) + + ... + + # ...perform sampling and return sampling result... + ``` + +At sampling time, the sampler checks whether all requests in the persistent batch employ greedy sampling. If that is the case, the sampler saves compute by skipping "argmax-invariant" logits processors. Here, "argmax" is shorthand for the token ID with the highest logit value in a given row of the logits tensor (i.e. the token which the model weighted the highest for a given request). + +* An **argmax-invariant logits processor** is a logits processor (such as Min-P) which does not modify the argmax. For example, a logits processor which masks out the lowest-probability tokens will not change which token ID has the max logit. Greedy sampling always picks the highest-logit-value token ID, and so conceptually an argmax-invariant logits processor can be skipped for greedy sampling requests. + +* A **non-argmax-invariant logits processor** is a logits processor which may modify the argmax. For example, a logits processor which masks all tokens except for EOS after a certain number of steps in order to force decoding to terminate might end up masking the max-logit-value token and therefore change the argmax. Conceptually, these logits processors cannot be skipped for greedy sampling requests. + +The vLLM logits processor abstraction requires the engine to apply logits processors at batch granularity; therefore in practice the argmax-invariant logits processors can only be skipped when the entire batch uses greedy sampling. + +## Logits Processor Programming Model + +The previous sections alluded to the interfaces which vLLM logits processors must support. This section introduces in full the programming model for implementing logits processors that are compatible with the vLLM engine, including the `LogitsProcessor` base class and its interface methods as well as the `BatchUpdate` data structure for representing persistent batch state changes, both of which are shown in the code below: + +??? code "`LogitsProcessor` base class and `BatchUpdate` data structure" + + ``` python + from abc import ABC, abstractmethod + from collections.abc import Sequence + from dataclasses import dataclass + from enum import Enum, auto + from typing import TYPE_CHECKING, Optional + + import torch + + from vllm import SamplingParams + + if TYPE_CHECKING: + from vllm.config import VllmConfig + + + class MoveDirectionality(Enum): + # One-way i1->i2 req move within batch + UNIDIRECTIONAL = auto() + # Two-way i1<->i2 req swap within batch + SWAP = auto() + + + # (index, params, prompt_tok_ids, output_tok_ids) tuples for new + # requests added to the batch. + AddedRequest = tuple[int, SamplingParams, list[int], list[int]] + + # (index 1, index 2, directionality) tuples representing + # one-way moves or two-way swaps of requests in batch + MovedRequest = tuple[int, int, MoveDirectionality] + + # Batch indices of any removed requests. + RemovedRequest = int + + + @dataclass(frozen=True) + class BatchUpdate: + """Persistent batch state change info for logitsprocs""" + batch_size: int # Current num reqs in batch + + # Metadata for requests added to, removed from, and moved + # within the persistent batch. + # + # Key assumption: the `output_tok_ids` list (which is an element of each + # tuple in `added`) is a reference to the request's running output tokens + # list; via this reference, the logits processors always see the latest + # list of generated output tokens + removed: Sequence[RemovedRequest] + moved: Sequence[MovedRequest] + added: Sequence[AddedRequest] + + + class LogitsProcessor(ABC): + + @abstractmethod + def __init__(self, vllm_config: "VllmConfig", device: torch.device, + is_pin_memory: bool) -> None: + raise NotImplementedError + + @abstractmethod + def apply(self, logits: torch.Tensor) -> torch.Tensor: + raise NotImplementedError + + @abstractmethod + def is_argmax_invariant(self) -> bool: + """True if logits processor has no impact on the + argmax computation in greedy sampling. + NOTE: may or may not have the same value for all + instances of a given LogitsProcessor subclass, + depending on subclass implementation. + """ + raise NotImplementedError + + @abstractmethod + def update_state( + self, + batch_update: Optional["BatchUpdate"], + ) -> None: + """Called when there are new output tokens, prior + to each forward pass. + + Args: + batch_update is non-None iff there have been + changes to the batch makeup. + """ + raise NotImplementedError + + ``` + +A vLLM logits processor must subclass `LogitsProcessor` and define (at minimum) the following methods: + +* `__init__(self, vllm_config: VllmConfig, device: torch.device, is_pin_memory: bool)` + * `vllm_config`: engine configuration data structure + * `device`: hardware accelerator device info + * `is_pin_memory`: flag indicating whether pin memory is available to support logits processor implementation + +* `apply(self, logits: torch.Tensor) -> torch.Tensor`: + * Consume a `(num_requests) x (vocab_size)` logits tensor (`logits`) + * Apply logits processor transformation at batch granularity + * Return a transformed `(num_requests) x (vocab_size)` logits tensor + * You can modify the input logits processors in-place or out-of-place; in-place is more memory-efficient + +* `is_argmax_invariant(self) -> bool`: + * Return `True` if the logits processor is argmax invariant (never changes what is the highest-logit-value token ID for a given request), `False` if the logits processor may modify argmax + * `is_argmax_invariant()` is evaluated once at startup; if `True`, vLLM will skip applying this logits processor in a given step when all requests use greedy sampling + +* `update_state(self, batch_update: Optional["BatchUpdate"]) -> None`: + * Consume a `BatchUpdate` data structure representing persistent batch state changes at the beginning of the current engine step + * Use the `BatchUpdate` members to update logits processor internal state + * **Note:** batch update data structure may be `None`, signaling no change to the batch constituents. In this case, the LogitsProcessor might still want to update its state based on the updated `output_token_ids` lists that it could have retained when they were added. + +### `BatchUpdate` data structure + +The `BatchUpdate` abstraction models the persistent batch as a list of requests, supporting the following operations to change batch state (note that the order in which the operations are mentioned below reflects the order in which they should be processed in `update_state()`): + +* **Remove:** remove (without replacement) request at index `i` + + * A Remove is represented in `Batchupdate.removed` by an `int` (representing `i`) + + * Effect of remove-at-index on batch: + + ``` text + Batch: [A,B,C] + Remove @ i: 1 + + => + + New Batch: [A,x,C] # Discard B and leave an empty slot + ``` + +* **Add:** add (or replace existing request with) a new request at index `i`. If a request is replaced, its associated state should be discarded. + + * An Add is represented in `Batchupdate.added` as a tuple of + + ``` text + (index, new request SamplingParams, prompt token ids, output token ids) + ``` + + * `prompt token ids` and `output token ids` are references to the request's prompt token ids and output token ids lists, respectively. Note that the output token ids list grows with each engine step, and this growth is visible to the logits processor because output token ids are passed by reference. **This is important for LogitsProcessors that take into account the tokens generated so far**. + + * The implementation of the particular logits processor subclass determines whether or how the fields in the added request tuple are digested into an internal representation. For example, a logits processor that does not utilize prompt or output token ids may only need to utilize `index` and `SamplingParams` and discard the other tuple fields + + * If index `i` currently holds a request, a replacement occurs: + + ``` text + Batch: [A,B,C] + New request to be added @ i: D @ 1 + + => + + New Batch: [A,D,C] # Add D, discard B + ``` + + * If index `i` does not currently hold a request (because `i` is out of bounds of the current batch size): + + ``` text + Batch: [A,B,C] + New request to be added @ i: D @ 3 + + => + + New Batch: [A,B,C,D] # Add D, extending batch + ``` + +* **Move:** move request at index `s` to index `d` OR swap requests at indices `s` and `d` + + * A Move is represented in `Batchupdate.moved` as a tuple of + + ``` text + (s, d, UNIDIRECTIONAL or SWAP) + ``` + + * If the Move specifies `UNIDRECTIONAL`: + + * The request at index `s` is moved to index `d`; index `s` becomes an empty slot + + ``` text + Batch: [A,x,C,D] + Unidirectionally Move s -> d: 3 -> 1 + + => + + New Batch: [A,D,C,x] # Move D to 1, leaving empty slot at 3 + ``` + + * If another request already resided at index `d`, it is replaced and discarded + + ``` text + Batch: [A,B,C,D] + Unidirectionally Move s -> d: 3 -> 1 + + => + + New Batch: [A,D,C,x] # Move D to 1, discarding B and leaving empty slot at 3 + ``` + + * If the Move specifies `SWAP`, the requests at `s` and `d` exchange indices + + ``` text + Batch: [A,B,C,D] + Swap Move s <-> d: 3 <-> 1 + + => + + New Batch: [A,D,C,B] # Swap B and D + ``` + +Additionally, the `BatchUpdate` data structure includes a representation (`batch_size`) of the size of the persistent batch at the beginning of the engine step. + +### How the vLLM engine builds the `BatchUpdate` data structure + +Logits processor `update_state()` implementations should assume the following model for how the model runner updates persistent batch state (expressed here in terms of the `BatchUpdate` abstraction): + +1. Identify indices of requests which finished in the current engine step + +2. Identify new requests introduced in the current step + +3. Use Add operations to replace as many finished requests with new requests, in order of increasing index of the replaced request starting with the lowest index + +4. Based on the relative number of new and finished requests: + + 1. If the numbers of new and finished requests are the same, proceed to next step + + 2. *If there are more new requests than finished requests:* apply Add operations to extend the batch with the remaining new requests which did not replace finished requests. Assign consecutive indices to these new requests, starting with `current_max_batch_index + 1` + + 3. *If there are fewer new requests than finished requests:* + + * Apply Remove operations to finished requests which were not replaced with new requests. These removed request indices will necessarily be greater than the greatest index of the finished requests which were replaced in the previous step. The Removes may leave the batch in a non-contiguous state + + * **"Condense" the batch to be contiguous:** starting with the lowest-index empty slot (which was caused by a Remove), apply a Unidirectional Move from the current highest non-empty slot in the batch to fill the empty slot. Proceed with additional Unidirectional Move operations in order of increasing empty slot destination index and decreasing non-empty slot source index until the batch is contiguous + + * **Shrink the batch:** a side-effect of condensing the batch is that empty slots resulting from Remove operations are grouped in a contiguous block at the end of the batch array. Thus, after condensing, update `BatchUpdate.batch_size` to reflect the number of non-empty slots + +5. Reorder the batch for improved efficiency. Depending on the attention backend implementation and the current characteristics of the batch, zero or more Swap Move operations may be applied to reorder the batch + +Notes: + +* A logits processor `update_state()` method must process batch update operations in the following order: removes, adds, moves + +* The index argument for Add operations refers to the index *at the time the Add occurred*, i.e. before any Move operations + * Example: if a request is Added at index 5 and then swapped with index 3, the Add operation in `BatchUpdate.added` will be associated with index 5 not 3 + * In other words Move operations can be assumed to be applied after Adds and Removes + +* Move operations can be assumed to be applied in the order in which they appear in `BatchUpdate.moved` + +* If there are no new/finished requests and there is no batch reordering, then the batch update for the logits processors will be `None` + +#### Example: Batch Update with Fewer New Requests Than Finished Requests + +The following example models an engine step where 1 new request is introduced and 2 finished requests are eliminated, additionally the attention backend performs a swap to optimize the batch ordering. + +``` text +Batch state (beginning of engine step): [A,B,C,D] +Batch size: 4 + +New requests: E + +Finished requests: A, C + +Processing steps (using BatchUpdate abstraction): + +1. Add E at index 0 + +[E,B,C,D] # Discard A +Batch size: 4 + +2. Remove at index 2 + +[E,B,x,D] # Discard C, empty slot at index 2 +Batch size: 4 + +3. Condense batch with a Unidirectional Move 3 -> 2 operation and shrink batch + +[E,B,D] x # Empty slot is now outside batch +Batch size: 3 + +4. Attention backend optimization: reorder batch with Swap 0 <-> 1 + +[B,E,D] +Batch size: 3 + +``` + +The resulting `BatchUpdate` data structure will look like + +``` text +BatchUpdate instance +* added: [(0,E's SamplingParams,E's prompt tokens ref,E's output tokens ref)] +* removed: [2] # request C was removed without replacement +* moved: [(3,2,UNIDIRECTIONAL),(0,1,SWAP)] +``` + +#### Example: Batch Update with More New Requests Than Finished Requests + +The following example models an engine step where 2 new requests are introduced and 1 finished request is eliminated, additionally the attention backend performs a swap to optimize the batch ordering. + +``` text +Batch state (beginning of engine step): [A,B,C,D] +Batch size: 4 + +New requests: E,F + +Finished requests: C + +Processing steps (using BatchUpdate abstraction): + +1. Add E at index 2 + +[A,B,E,D] # Discard C +Batch size: 4 + +2. Add F at index 4 (current max batch index + 1) + +[A,B,E,D,F] # Extend batch by 1 +Batch size: 5 + +4. Attention backend optimization: reorder batch with Swap 0 <-> 1 + +[B,A,E,D,F] +Batch size: 5 + +``` + +Note that batch condensation is skipped because there are no empty slots left behind by Remove operations. + +The resulting `BatchUpdate` data structure will look like + +``` text +BatchUpdate instance +* added: [(2,E's SamplingParams,E's prompt tokens ref,E's output tokens ref),(4,F's SamplingParams,F's prompt tokens ref,F's output tokens ref)] +* removed: [] # no requests were removed without replacement +* moved: [(0,1,SWAP)] +``` + +## How to Introduce a New Logits Processor to vLLM + +### Best Practices for Writing Built-In Logits Processors + +* Write efficient `apply()` and `update_state()` implementations in light of the fact that logits processors operate at batch granularity + * For example, you may be able to use efficient vectorized operations to implement `apply()` or update internal state vectors in `update_state()` + * However, if you think that a logits processor may be used infrequently, it may be appropriate to use a "sparse" representation of request state i.e. the class can represent request configuration using a dictionary which only stores metadata about requests that enable the logits processor + +* It is up to the logits processor author to determine: + + 1. **The per-request attributes which configure the logits processor's behavior against that request.** For example, if you are writing a new built-in logits processor for vLLM, you may or may not need to add additional fields to `SamplingParams` and the vLLM REST API + + 2. **The conditions under which the logits processor is or is not enabled on a per-request basis.** Unless your intention is for the built-in logits processor to act on all requests all the time, you should write your logits processor in such a way that it is possible to disable the logits processor for a given request, i.e. by defaulting an argument to `None` or by passing in a specific do-nothing argument value i.e. `0.0`. Try to save compute and memory for requests which disable the logits processor + + 3. **The conditions under which the logits processor is short-circuited at the batch level.** Even if you have defined a way to disable the built-in logits processor at the request level, it may be difficult to translate this into compute savings i.e. if your `update_state()` and `apply()` implementations use efficient vectorized implementations that operate on the whole persistent batch in a single command. For example, you cannot skip an entire vectorized operation in `apply()` just because one request disabled the logits processor. To save compute in the edge-case where no running requests utilize the built-in logits processor, we recommend designing `apply()` to return the unmodified input tensor if all requests have the logits processor disabled. Similarly, consider whether steps can be skipped in `update_state()` if no requests enable the logits processor + + * Additionally, an easy way to save compute in `update_state()` is to exit early when the batch_update is `None` + +* Ensure that the logits processor `update_state` method discards information about finished requests (i.e. requests which are replaced by an Add or which are subject to a Remove) + +* `is_argmax_invariant()` can be hard-coded to `True` or `False` if the logits processor has consistent behavior. However the argmax invariance may also be determined programmatically (i.e. if your logits processor is user-customizable in some way that impacts whether the logits processor is argmax invariant). For this reason, `is_argmax_invariant()` is not a class method + +### Built-In Logits Processors + +Built-in logits processors are always loaded when the vLLM engine starts. See the existing vLLM built-in logits processors in `vllm/v1/sample/logits_processor/builtin.py` for examples of how to write a new built-in vLLM logits processor. It makes sense to write a PR to introduce a new logits processor as a built-in if it is likely to be useful to a wide audience. vLLM currently employs the following built-in logits processors based on the programming model described above: + +* Min-P + +* Logit bias + +* Min-tokens + +Review these logits processor implementations for guidance on writing built-in logits processors. + +Additionally, the following logits-processor-like functionalities are hard-coded into the sampler and do not yet utilize the programming model described above. Most of them will be refactored to use the aforemented logits processor programming model. + +* Allowed token IDs + +* Bad words + +* Repetition penalty + +* Frequency penalty + +* Presence penalty + +* Temperature + +* Top-K + +* Top-P + +### Custom Logits Processors + +vLLM can be augmented with [user-provided custom logits processors](../features/custom_logitsprocs.md). diff --git a/docs/features/custom_arguments.md b/docs/features/custom_arguments.md new file mode 100644 index 000000000000..74ed40835b4d --- /dev/null +++ b/docs/features/custom_arguments.md @@ -0,0 +1,46 @@ +# Custom Arguments + +You can use vLLM *custom arguments* to pass in arguments which are not part of the vLLM `SamplingParams` and REST API specifications. Adding or removing a vLLM custom argument does not require recompiling vLLM, since the custom arguments are passed in as a dictionary. + +Custom arguments can be useful if, for example, you want to use a [custom logits processor](./custom_logitsprocs.md) without modifying the vLLM source code. + +## Offline Custom Arguments + +Custom arguments passed to `SamplingParams.extra_args` as a `dict` will be visible to any code which has access to `SamplingParams`: + +``` python +SamplingParams(extra_args={"your_custom_arg_name": 67}) +``` + +This allows arguments which are not already part of `SamplingParams` to be passed into `LLM` as part of a request. + +## Online Custom Arguments + +The vLLM REST API allows custom arguments to be passed to the vLLM server via `vllm_xargs`. The example below integrates custom arguments into a vLLM REST API request: + +``` bash +curl http://localhost:8000/v1/completions \ + -H "Content-Type: application/json" \ + -d '{ + "model": "Qwen/Qwen2.5-1.5B-Instruct", + ... + "vllm_xargs": {"your_custom_arg": 67} + }' +``` + +Furthermore, OpenAI SDK users can access `vllm_xargs` via the `extra_body` argument: + +``` python +batch = await client.completions.create( + model="Qwen/Qwen2.5-1.5B-Instruct", + ..., + extra_body={ + "vllm_xargs": { + "your_custom_arg": 67 + } + } +) +``` + +!!! note + `vllm_xargs` is assigned to `SamplingParams.extra_args` under the hood, so code which uses `SamplingParams.extra_args` is compatible with both offline and online scenarios. diff --git a/docs/features/custom_logitsprocs.md b/docs/features/custom_logitsprocs.md new file mode 100644 index 000000000000..201b340c5972 --- /dev/null +++ b/docs/features/custom_logitsprocs.md @@ -0,0 +1,445 @@ +# Custom Logits Processors + +!!! important + Some logits processors design changes are still in progress and the API may + change in the near future. We hope to stabilize this part of the API soon + +A "custom" logits processor is written by a user of vLLM and is loaded into vLLM at initialization without needing to modify or recompile the vLLM source code. It is the opposite of a built-in logits processor. + +This document shows how to write, load and use a custom logits processor. + +## Logits Processors Background + +A logits processor adjusts the next-token probability distribution, usually with the intention of steering the model towards a desired type of behavior. + +In vLLM, logits processors operate at batch granularity. During a given engine step, the logits processor consumes a `(num_requests) x (vocab_size)` tensor of raw logits output by the model. For all requests which enable the logits processor, the logits processor applies a transformation to the corresponding row of the logits tensor, while leaving other rows unmodified. The transformed logits tensor is then passed to softmax. + +## Creating a Custom Logits Processor + +Custom logits processors must subclass `vllm.v1.sample.logits_processor.LogitsProcessor` and define (at minimum) the following methods: + +* `__init__(self, vllm_config: VllmConfig, device: torch.device, is_pin_memory: bool)` + * `vllm_config`: engine configuration data structure + * `device`: hardware accelerator device info + * `is_pin_memory`: flag indicating whether pin memory is available to support logits processor implementation + +* `apply(self, logits: torch.Tensor) -> torch.Tensor`: + * Consume a `(num_requests) x (vocab_size)` logits tensor (`logits`) + * Apply logits processor transformation at batch granularity + * Return a transformed `(num_requests) x (vocab_size)` logits tensor + * You can modify the input logits processors in-place or out-of-place; in-place is more memory-efficient + +* `is_argmax_invariant(self) -> bool`: + * Return `True` if the logits processor is argmax invariant (never changes what is the highest-logit-value token ID for a given request), `False` if the logits processor may modify argmax + * `is_argmax_invariant()` is evaluated once at startup; if `True`, vLLM will skip applying this logits processor in a given step when all requests use greedy sampling + +* `update_state(self, batch_update: Optional["BatchUpdate"]) -> None`: + * Consume a `BatchUpdate` data structure representing persistent batch state changes at the beginning of the current engine step + * Use the `BatchUpdate` members to update logits processor internal state + * **Note:** batch update data structure may be `None`, signaling no change to the batch constituents. In this case, the LogitsProcessor might still want to update its state based on the updated `output_token_ids` lists that it could have retained when they were added. + +### How the vLLM engine builds the `BatchUpdate` data structure + +!!! important + Some logits processors design changes are still in progress. We expect + that in the future you will not need to account for batch state changes + when implementing a logits processor, and the information in this section + will become irrelevant. + +Logits processor `update_state()` implementations should assume the following model for how the model runner updates persistent batch state (expressed here in terms of the `BatchUpdate` abstraction): + +1. Identify indices of requests which finished in the current engine step + +2. Identify new requests introduced in the current step + +3. Use Add operations to replace as many finished requests with new requests, in order of increasing index of the replaced request starting with the lowest index + +4. Based on the relative number of new and finished requests: + + 1. If the numbers of new and finished requests are the same, proceed to next step + + 2. *If there are more new requests than finished requests:* apply Add operations to extend the batch with the remaining new requests which did not replace finished requests. Assign consecutive indices to these new requests, starting with `current_max_batch_index + 1` + + 3. *If there are fewer new requests than finished requests:* + + * Apply Remove operations to finished requests which were not replaced with new requests. These removed request indices will necessarily be greater than the greatest index of the finished requests which were replaced in the previous step. The Removes may leave the batch in a non-contiguous state + + * **"Condense" the batch to be contiguous:** starting with the lowest-index empty slot (which was caused by a Remove), apply a Unidirectional Move from the current highest non-empty slot in the batch to fill the empty slot. Proceed with additional Unidirectional Move operations in order of increasing empty slot destination index and decreasing non-empty slot source index until the batch is contiguous + + * **Shrink the batch:** a side-effect of condensing the batch is that empty slots resulting from Remove operations are grouped in a contiguous block at the end of the batch array. Thus, after condensing, update `BatchUpdate.batch_size` to reflect the number of non-empty slots + +5. Reorder the batch for improved efficiency. Depending on the attention backend implementation and the current characteristics of the batch, zero or more Swap Move operations may be applied to reorder the batch + +Notes: + +* A logits processor `update_state()` method must process batch update operations in the following order: removes, adds, moves + +* The index argument for Add operations refers to the index *at the time the Add occurred*, i.e. before any Move operations + * Example: if a request is Added at index 5 and then swapped with index 3, the Add operation in `BatchUpdate.added` will be associated with index 5 not 3 + * In other words Move operations can be assumed to be applied after Adds and Removes + +* Move operations can be assumed to be applied in the order in which they appear in `BatchUpdate.moved` + +* If there are no new/finished requests and there is no batch reordering, then the batch update for the logits processors will be `None` + +### Passing Custom Argument to a Custom Logits Processor + +Unlike built-in logits processors, custom logits processors may require configuration arguments that are not hard-coded into `SamplingParams` or the vLLM server REST API. To solve this problem, custom logits processors may leverage vLLM [custom arguments](./custom_arguments.md) support to receive configuration settings from the user (although you are also free to design a custom logits processor which utilizes the pre-existing fields in `SamplingParams`.) + +### Example Custom Logits Processor Implementation + +The contrived example below implements a custom logits processor which consumes a `(num\_requests) \times (vocab\_size)` logits tensor and masks out all tokens except for one (`target_token`) with `float(-inf)`. The logits processor is disabled for any request that does not specify `target_token`. To determine whether the logits processor is enabled and which token to leave unmasked, the logits processor checks `SamplingParams.extra_args` for a `target_token` custom argument associated with each request: + +??? code "Example custom logits processor definition" + + ``` python + from typing import Optional + import torch + from vllm.config import VllmConfig + from vllm.sampling_params import SamplingParams + from vllm.v1.sample.logits_processor import (BatchUpdate, + LogitsProcessor, + MoveDirectionality) + + class DummyLogitsProcessor(LogitsProcessor): + """Fake logit processor to support unit testing and examples""" + + def __init__(self, vllm_config: "VllmConfig", device: torch.device, + is_pin_memory: bool): + self.req_info: dict[int, int] = {} + + def is_argmax_invariant(self) -> bool: + """Never impacts greedy sampling""" + return False + + def update_state(self, batch_update: Optional[BatchUpdate]): + if not batch_update: + return + + # Process added requests. + for index, params, _, _ in batch_update.added: + assert params is not None + if params.extra_args and (target_token := + params.extra_args.get("target_token")): + self.req_info[index] = target_token + else: + self.req_info.pop(index, None) + + if self.req_info: + # Process removed requests. + for index in batch_update.removed: + self.req_info.pop(index, None) + + # Process moved requests, unidirectional move (a->b) and swap + # (a<->b) + for adx, bdx, direct in batch_update.moved: + a_val = self.req_info.pop(adx, None) + b_val = self.req_info.pop(bdx, None) + if a_val is not None: + self.req_info[bdx] = a_val + if direct == MoveDirectionality.SWAP and b_val is not None: + self.req_info[adx] = b_val + + def apply(self, logits: torch.Tensor) -> torch.Tensor: + if not self.req_info: + return logits + + # Save target values before modification + cols = torch.tensor( + list(self.req_info.values()), dtype=torch.long, device=logits.device + ) + rows = torch.tensor( + list(self.req_info.keys()), dtype=torch.long, device=logits.device + ) + values_to_keep = logits[rows, cols].clone() + + # Mask all but target tokens + logits[rows] = float('-inf') + logits[rows, cols] = values_to_keep + + return logits + ``` + +In the rest of this document, we will use `DummyLogitsProcessor` as an example of a custom logits processor. + +The `DummyLogitsProcessor.update_state()` implementation maintains a "sparse" representation of the batched requests in the `self.req_info` dictionary: only those requests which specify a `target_token` value have a key in the dictionary. `update_state()` adjusts the stored request indices and `target_token` values (keys and values respectively in `self.req_info`) in response to Add, Remove and Move operations against the persistent batch. + +### Wrapping an Existing Request-Level Logits Processor + +Although the vLLM engine applies logits processors at batch granularity, some users may want to use vLLM with a "request-level" logits processor implementation - an implementation which operates on individual requests. This will be especially true if your logits processor was developed for vLLM version 0, which required it to be a `Callable` (as described [here](https://docs.vllm.ai/en/v0.10.1.1/api/vllm/logits_process.html)) conforming to the following type annotation: + +``` python +RequestLogitsProcessor = Union[ + + # (output token ids, logits tensor) -> logits tensor + Callable[[list[int], Tensor], Tensor], + + # (prompt token ids, output token ids, logits tensor) -> logits tensor + Callable[[list[int], list[int], Tensor], Tensor], +] +``` + +While request-level logits processors are explicitly *not* supported in the vLLM engine, vLLM *does* provide a convenient process to wrap an existing `Callable` request-level logits processor and create a batch-level logits processor that is compatible with vLLM. The `Callable` must conform to the type annotation above; if your request-level logits processor has a different interface, then in order to wrap it, you may need to modify it or implement an additional wrapper layer to comply with the interface specification above. + +You can wrap the request-level logits processor by subclassing `AdapterLogitsProcessor` as shown in the example below (in this example, `DummyPerReqLogitsProcessor` is a stand-in for your request-level logits processor which needs to be wrapped.) Override `AdapterLogitsProcessor.is_argmax_invariant(self)` to accurately reflect whether your request-level logits processor may impact which token has the highest-value logit. Override `AdapterLogitsProcessor.new_req_logits_processor(self,params)` to create a new request-level logits processor instance from a `SamplingParams` instance: + +??? code "Example of Wrapping a Request-Level Logits Processor" + + ``` python + ... + + from vllm.v1.sample.logits_processor import ( + AdapterLogitsProcessor, # Wrapper base-class + RequestLogitsProcessor, # Request-level logitsproc type annotation + ) + + ... + + # Stand-in for your request-level logits processor: + class DummyPerReqLogitsProcessor: + """The request-level logits processor masks out all logits except the + token id identified by `target_token`""" + + def __init__(self, target_token: int) -> None: + """Specify `target_token`""" + self.target_token = target_token + + def __call__( + self, + output_ids: list[int], + logits: torch.Tensor, + ) -> torch.Tensor: + val_to_keep = logits[self.target_token].item() + logits[:] = float("-inf") + logits[self.target_token] = val_to_keep + return logits + + ... + + # Example of wrapping the request-level logits processor: + class WrappedPerReqLogitsProcessor(AdapterLogitsProcessor): + """Example of wrapping a fake request-level logit processor to create a + batch-level logits processor""" + + def is_argmax_invariant(self) -> bool: + return False + + def new_req_logits_processor( + self, + params: SamplingParams, + ) -> Optional[RequestLogitsProcessor]: + """This method returns a new request-level logits processor, customized + to the `target_token` value associated with a particular request. + + Returns None if the logits processor should not be applied to the + particular request. To use the logits processor the request must have + a "target_token" custom argument with an integer value. + + Args: + params: per-request sampling params + + Returns: + `Callable` request logits processor, or None + """ + target_token: Optional[Any] = params.extra_args and params.extra_args.get( + "target_token" + ) + if target_token is None: + return None + if not isinstance(target_token, int): + logger.warning( + "target_token value %s is not int; not applying logits" + " processor to request.", + target_token, + ) + return None + return DummyPerReqLogitsProcessor(target_token) + ``` + +!!! note + Your `new_req_logits_processor()` override can return `None` to signal that the wrapped logits processor should not be applied to the request in question. + +Once you have created a custom subclass (like `WrappedPerReqLogitsProcessor`) which wraps your request level logits processor, you can pass the custom subclass to vLLM via any of the methods described in the following section. + +## Ways to Load Your Custom Logits Processor in vLLM + +Logits processors are loaded at initialization. Critically, the set of loaded logits processors cannot be modified after the vLLM engine finishes loading, and new logits logits processors cannot be loaded on-demand for individual requests. + +This section details different ways of making your logits processor visible to vLLM and triggering vLLM to load your logits processor. + +### Method 1: Pass the Custom Logits Processor Fully-Qualified Class Name (FQCN) to vLLM at Initialization Time + +This method is supported in both offline and online vLLM usage scenarios. The custom logits processor's FQCN (in the form of `dotted.path.to.module:ClassName`) can be passed as an argument to the `LLM` and `AsyncLLM` Python constructors, or as a CLI argument to `vllm serve` with the following syntax + +``` bash +vllm serve ... --logits_processors ... +``` + +The only requirements on the FQCN are + +1. Python's `importlib.import_module()` must be able to resolve the dotted path portion of the FQCN and load it as a module + +2. The class-name portion of the FQCN must be possible to import from the loaded module + +3. The object pointed to by the FQCN must be a subclass of `LogitsProcessor` + +See examples below: + +??? code "Passing custom logits processor FQCN to `LLM` in Python" + + ``` python + # Pass in FQCN + llm = LLM( + model="facebook/opt-125m", + logits_processors=["your.module.path:DummyLogitsProcessor"], + ) + ``` + +??? code "Passing custom logits processor FQCN to `AsyncLLM` in Python" + + ``` python + # Pass in FQCN + engine_args = AsyncEngineArgs(model="facebook/opt-125m", + logits_processors=["your.module.path:DummyLogitsProcessor"]) + async_llm = AsyncLLM.from_engine_args(engine_args) + ``` + +??? code "Passing custom logits processor FQCN to vLLM server via CLI" + + ```bash + vllm serve facebook/opt-125m --logits_processors your.module.path:DummyLogitsProcessor + ``` + +### Method 2: Automatically Detect Custom Logits Processors Installed in Your Python Environment As Entry Points + +[`setuptools`](https://setuptools.pypa.io/en/latest/userguide/entry_point.html) can enable installed packages to make themselves available as plugins to other Python programs, via pieces of metadata known as "entry points". + +During initialization, vLLM automatically scans the `vllm.logits_processors` entry point group and loads any installed logits processors which it finds. + +Suppose that you have developed a Python package that holds your custom logits processors. You can expose each logits processor to vLLM by adding a unique entrypoint for each logits processor to your logits processor Python package. The example below shows how to add an entrypoint to your project's `pyproject.toml` file: + +??? code "Exposing a custom logits processor as a Python entrypoint" + + ``` toml + [project.entry-points."vllm.logits_processors"] + dummy_logits_processor = "your.module.path:DummyLogitsProcessor" + ``` + +Once your package is installed, your custom logits processor will be loaded automatically whenever vLLM is initialized. You do *not* need to pass the custom logits processor to the `LLM` or `AsyncLLM` constructors or to the vLLM server explicitly at initialization time if your logits processor is exposed as an entry point. + +!!! note + vLLM will *always* load *all* logits processors which are exposed via entrypoints under the `vllm.logits_processors` grouping. + +### Method 3 (Offline-only): Pass a Python Class Object to the vLLM Constructor + +You can pass one or more custom logits processor class objects to the `LLM` and `AsyncLLM` constructors. This option is very flexible, as the logits processor classes may either be (1) defined locally within the same Python source file where `LLM` or `AsyncLLM` is instantiated, or (2) imported from a Python package. + +??? code "Passing custom logits processor class object to `LLM` or `AsyncLLM` in Python" + + ``` python + # Import custom logits processor + from some.module import DummyLogitsProcessor + + # ...or... + + # Define custom logits processor locally + from vllm.v1.sample.logits_processor import LogitsProcessor + + class DummyLogitsProcessor(LogitsProcessor): + # See DummyLogitsProcessor implementation above + ... + + # Pass class object to LLM constructor + llm = LLM( + model="facebook/opt-125m", + logits_processors=[DummyLogitsProcessor], + ) + + # Pass class object to AsyncLLM constructor + engine_args = AsyncEngineArgs(model="facebook/opt-125m", + logits_processors=[DummyLogitsProcessor]) + async_llm = AsyncLLM.from_engine_args(engine_args) + ``` + +## Invoking a Custom Logits Processor Against a Request + +The design of the custom logits processor determines whether the logits processor must be enabled/disabled for a given request, and what arguments must be provided to configure the logits processor. + +The examples below show how a user would pass a custom argument (`target_token`) to `DummyLogitsProcessor` in order to (1) enable the logits processor for that particular request and (2) control the logits processor's behavior. + +??? code "vLLM REST API: configure custom logits processor for a request" + + ``` bash + curl http://localhost:8000/v1/completions \ + -H "Content-Type: application/json" \ + -d '{ + "model": "Qwen/Qwen2.5-1.5B-Instruct", + ... + "vllm_xargs": {"target_token": 67} + }' + ``` + +??? code "OpenAI SDK: configure custom logits processor for a request" + + ``` python + batch = await client.completions.create( + model="Qwen/Qwen2.5-1.5B-Instruct", + ..., + extra_body={ + "vllm_xargs": { + "target_token": 67 + } + } + ) + ``` + +??? code "Offline: configure custom logits processor for an `LLM` request" + + ``` python + outputs_logitproc = llm.generate("your prompt", + SamplingParams(..., + extra_args={"target_token": 67})) + ``` + +??? code "Offline: configure custom logits processor for an `AsyncLLM` request" + + ``` python + async for out in engine.generate(request_id="your request id", + prompt="your prompt", + sampling_params=SamplingParams(..., + extra_args={"target_token": 67})): + + # Process async request outputs + ... + ``` + +## Best Practices for Writing Custom Logits Processors + +Once vLLM loads a logits processor during initialization, then vLLM will invoke `update_state()` and `apply()` against that logits processor in every engine step. Both methods operate on all requests which currently reside in the vLLM persistent batch. Thus it is important to implement these methods efficiently. + +* Write efficient `apply()` and `update_state()` implementations in light of the fact that logits processors operate at batch granularity + * For example, you may be able to use efficient vectorized operations to implement `apply()` or update internal state vectors in `update_state()` + * However, if you think that a logits processor may be used infrequently, it may be appropriate to use a "sparse" representation of request state i.e. the class can represent request configuration using a dictionary which only stores metadata about requests that enable the logits processor + * **Note:** wrapped request-level logits processors do not need to implement `apply()` and `update_state()`; the default `AdapterLogitsProcessor.update_state()` implementation maintains a sparse representation of request state, wherein requests for which `new_req_logits_processor()` returns `None` are not represented in the base-class state dictionary. The default implementation of `AdapterLogitsProcessor.apply()` applies the request-level logits processor to each row of input logits sequentially and assembles the output logits tensor. If the performance of this `AdapterLogitsProcessor` default implementation is insufficient, then avoid wrapping your request-level logits processor and instead re-implement it as a `LogitsProcessor` subclass with optimized `apply()` and `update_state()` implementations that operate at batch granularity + +* It is up to the logits processor author to determine: + + 1. **The per-request attributes which configure the logits processor's behavior against that request.** Your custom logits processor's `update_state()` override determines how `SamplingParams` fields are mapped into logits processor state + + * **Note:** for wrapped request-level logits processors, `new_req_logits_processor()` determines how `SamplingParams` fields are used to initialize a request-level logits processor instance. + + 2. **The conditions under which the logits processor is or is not enabled on a per-request basis.** Unless your intention is for the custom logits processor to act on all requests all the time, you should write your logits processor in such a way that it is possible to disable the logits processor for a given request, i.e. by defaulting an argument to `None` or by passing in a specific do-nothing argument value i.e. `0.0`. Try to save compute and memory for requests which disable the logits processor + + * **Note:** for wrapped per-request logits processors, the default `AdapterLogitsProcessor.update_state()` implementation ensures that the request-level logits processor is disabled when `new_req_logits_processor()` returns `None` for that request + + 3. **The conditions under which the logits processor is short-circuited at the batch level.** Even if you have defined a way to disable the custom logits processor at the request level, it may be difficult to translate this into compute savings i.e. if your `update_state()` and `apply()` implementations use efficient vectorized implementations that operate on the whole persistent batch in a single command. For example, you cannot skip an entire vectorized operation in `apply()` just because one request disabled the logits processor. To save compute in the edge-case where no running requests utilize the custom logits processor, we recommend designing `apply()` to return the unmodified input tensor if all requests have the logits processor disabled. Similarly, consider whether steps can be skipped in `update_state()` if no requests enable the logits processor + + * Additionally, an easy way to save compute in `update_state()` is to exit early when the `batch_update` is `None` + + * **Note:** for wrapped per-request logits processors, the `AdapterLogitsProcessor` base-class implements the above optimizations by default + +* Ensure that the logits processor `update_state` method discards information about finished requests (i.e. requests which are replaced by an Add or which are subject to a Remove) + + * **Note:** for wrapped per-request logits processors, the `AdapterLogitsProcessor` base-class handles this by default + +* `is_argmax_invariant()` can be hard-coded to `True` or `False` if the logits processor has consistent behavior. However the argmax invariance may also be determined programmatically (i.e. if your logits processor is user-customizable in some way that impacts whether the logits processor is argmax invariant). For this reason, `is_argmax_invariant()` is not a class method diff --git a/examples/offline_inference/logits_processor/custom.py b/examples/offline_inference/logits_processor/custom.py index 3e122319169e..4112a498f37a 100644 --- a/examples/offline_inference/logits_processor/custom.py +++ b/examples/offline_inference/logits_processor/custom.py @@ -56,7 +56,6 @@ def __init__( self.req_info: dict[int, int] = {} def is_argmax_invariant(self) -> bool: - """Never impacts greedy sampling""" return False def update_state(self, batch_update: Optional[BatchUpdate]): @@ -75,13 +74,12 @@ def apply(self, logits: torch.Tensor) -> torch.Tensor: return logits # Save target values before modification - rows_list = list(self.req_info.keys()) cols = torch.tensor( - [self.req_info[i] for i in rows_list], - dtype=torch.long, - device=logits.device, + list(self.req_info.values()), dtype=torch.long, device=logits.device + ) + rows = torch.tensor( + list(self.req_info.keys()), dtype=torch.long, device=logits.device ) - rows = torch.tensor(rows_list, dtype=torch.long, device=logits.device) values_to_keep = logits[rows, cols].clone() # Mask all but target tokens diff --git a/tests/v1/logits_processors/utils.py b/tests/v1/logits_processors/utils.py index 7ec35bd3eb63..d3b7f314da09 100644 --- a/tests/v1/logits_processors/utils.py +++ b/tests/v1/logits_processors/utils.py @@ -69,11 +69,12 @@ def apply(self, logits: torch.Tensor) -> torch.Tensor: return logits # Save target values before modification - rows_list = list(self.req_info.keys()) - cols = torch.tensor([self.req_info[i] for i in rows_list], + cols = torch.tensor(list(self.req_info.values()), + dtype=torch.long, + device=logits.device) + rows = torch.tensor(list(self.req_info.keys()), dtype=torch.long, device=logits.device) - rows = torch.tensor(rows_list, dtype=torch.long, device=logits.device) values_to_keep = logits[rows, cols].clone() # Mask all but target tokens diff --git a/vllm/v1/sample/logits_processor/interface.py b/vllm/v1/sample/logits_processor/interface.py index 683fc7c00dfb..04027359909a 100644 --- a/vllm/v1/sample/logits_processor/interface.py +++ b/vllm/v1/sample/logits_processor/interface.py @@ -21,6 +21,9 @@ class MoveDirectionality(Enum): SWAP = auto() +# Batch indices of any removed requests. +RemovedRequest = int + # (index, params, prompt_tok_ids, output_tok_ids) tuples for new # requests added to the batch. AddedRequest = tuple[int, SamplingParams, list[int], list[int]] @@ -29,9 +32,6 @@ class MoveDirectionality(Enum): # one-way moves or two-way swaps of requests in batch MovedRequest = tuple[int, int, MoveDirectionality] -# Batch indices of any removed requests. -RemovedRequest = int - @dataclass(frozen=True) class BatchUpdate: diff --git a/vllm/v1/sample/logits_processor/state.py b/vllm/v1/sample/logits_processor/state.py index 31cece58c7db..0a1196559d3e 100644 --- a/vllm/v1/sample/logits_processor/state.py +++ b/vllm/v1/sample/logits_processor/state.py @@ -36,18 +36,18 @@ class BatchUpdateBuilder: _removed: list[RemovedRequest] _is_removed_sorted: bool - moved: list[MovedRequest] added: list[AddedRequest] + moved: list[MovedRequest] def __init__( self, removed: Optional[list[RemovedRequest]] = None, - moved: Optional[list[MovedRequest]] = None, added: Optional[list[AddedRequest]] = None, + moved: Optional[list[MovedRequest]] = None, ) -> None: self._removed = removed or [] - self.moved = moved or [] self.added = added or [] + self.moved = moved or [] self._is_removed_sorted = False # Used to track changes in the pooling case @@ -107,8 +107,8 @@ def reset(self) -> bool: """Returns True if there were any changes to the batch.""" self._is_removed_sorted = False self._removed.clear() - self.moved.clear() self.added.clear() + self.moved.clear() batch_changed = self.batch_changed self.batch_changed = False return batch_changed From ee5fd491504913383e9e7b6782038f4ee7d36cfd Mon Sep 17 00:00:00 2001 From: Yihua Cheng Date: Wed, 17 Sep 2025 12:37:29 -0700 Subject: [PATCH 059/518] [Misc] Update owners for KV connector and V1 offloading (#25041) Signed-off-by: ApostaC --- .github/CODEOWNERS | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 73184d4e6b12..771dd2e17258 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -22,7 +22,7 @@ /vllm/reasoning @aarnphm @chaunceyjiang /vllm/entrypoints @aarnphm @chaunceyjiang /vllm/compilation @zou3519 @youkaichao @ProExpertProg -/vllm/distributed/kv_transfer @NickLucche +/vllm/distributed/kv_transfer @NickLucche @ApostaC CMakeLists.txt @tlrmchlsmth @LucasWilkinson # Any change to the VllmConfig changes can have a large user-facing impact, @@ -35,8 +35,9 @@ CMakeLists.txt @tlrmchlsmth @LucasWilkinson /vllm/v1/spec_decode @benchislett @luccafong /vllm/v1/attention/backends/flashinfer.py @mgoin /vllm/v1/attention/backends/triton_attn.py @tdoublep -/vllm/v1/core @WoosukKwon @robertgshaw2-redhat @njhill @ywang96 @comaniac @alexm-redhat @heheda12345 +/vllm/v1/core @WoosukKwon @robertgshaw2-redhat @njhill @ywang96 @comaniac @alexm-redhat @heheda12345 @ApostaC /vllm/v1/kv_cache_interface.py @heheda12345 +/vllm/v1/offloading @ApostaC # Test ownership /.buildkite/lm-eval-harness @mgoin @simon-mo @@ -54,11 +55,13 @@ CMakeLists.txt @tlrmchlsmth @LucasWilkinson /tests/test_inputs.py @DarkLight1337 @ywang96 /tests/v1/entrypoints/llm/test_struct_output_generate.py @mgoin @russellb @aarnphm /tests/v1/structured_output @mgoin @russellb @aarnphm -/tests/v1/core @WoosukKwon @robertgshaw2-redhat @njhill @ywang96 @comaniac @alexm-redhat @heheda12345 +/tests/v1/core @WoosukKwon @robertgshaw2-redhat @njhill @ywang96 @comaniac @alexm-redhat @heheda12345 @ApostaC /tests/weight_loading @mgoin @youkaichao @yewentao256 /tests/lora @jeejeelee /tests/models/language/generation/test_hybrid.py @tdoublep -/tests/v1/kv_connector/nixl_integration @NickLucche +/tests/v1/kv_connector/nixl_integration @NickLucche +/tests/v1/kv_connector @ApostaC +/tests/v1/offloading @ApostaC # Docs /docs @hmellor From 883131544faf78f31f85a0350f74ea913ee6ef9c Mon Sep 17 00:00:00 2001 From: Mohammad Miadh Angkad Date: Thu, 18 Sep 2025 04:33:11 +0800 Subject: [PATCH 060/518] [Bugfix] Update import path for bc_linter_include (#24766) Signed-off-by: Mohammad Miadh Angkad --- vllm/v1/core/sched/output.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/v1/core/sched/output.py b/vllm/v1/core/sched/output.py index 56ab396d6d93..3ec5b91bf286 100644 --- a/vllm/v1/core/sched/output.py +++ b/vllm/v1/core/sched/output.py @@ -6,7 +6,7 @@ from dataclasses import dataclass from typing import TYPE_CHECKING, Optional -from vllm import bc_linter_include +from vllm._bc_linter import bc_linter_include if TYPE_CHECKING: import numpy as np From f20c3b095109fd2a016e201d550bcaae6414e9fc Mon Sep 17 00:00:00 2001 From: ahao-anyscale Date: Wed, 17 Sep 2025 13:42:09 -0700 Subject: [PATCH 061/518] [BUG] Exclude .pth files when pulling remote files (#25092) Signed-off-by: ahao-anyscale --- vllm/config/__init__.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/vllm/config/__init__.py b/vllm/config/__init__.py index 5f3057609971..64be2f38c6a3 100644 --- a/vllm/config/__init__.py +++ b/vllm/config/__init__.py @@ -845,7 +845,8 @@ def maybe_pull_model_tokenizer_for_runai(self, model: str, object_storage_model.pull_files(model, ignore_pattern=[ "*.pt", "*.safetensors", - "*.bin", "*.tensors" + "*.bin", "*.tensors", + "*.pth" ]) self.tokenizer = object_storage_model.dir return @@ -853,9 +854,12 @@ def maybe_pull_model_tokenizer_for_runai(self, model: str, # Only download tokenizer if needed and not already handled if is_runai_obj_uri(tokenizer): object_storage_tokenizer = ObjectStorageModel() - object_storage_tokenizer.pull_files( - model, - ignore_pattern=["*.pt", "*.safetensors", "*.bin", "*.tensors"]) + object_storage_tokenizer.pull_files(model, + ignore_pattern=[ + "*.pt", "*.safetensors", + "*.bin", "*.tensors", + "*.pth" + ]) self.tokenizer = object_storage_tokenizer.dir def _get_encoder_config(self): From 3c068c637b9b1f945d5aa572da40553ab1691896 Mon Sep 17 00:00:00 2001 From: czhu-cohere Date: Wed, 17 Sep 2025 17:35:32 -0400 Subject: [PATCH 062/518] [Kernel] Faster pre-processing time for W4A8 (#23972) Signed-off-by: czhu-cohere --- .../cutlass_w4a8/w4a8_mm_entry.cu | 72 ++++++++++++++++++- 1 file changed, 71 insertions(+), 1 deletion(-) diff --git a/csrc/quantization/cutlass_w4a8/w4a8_mm_entry.cu b/csrc/quantization/cutlass_w4a8/w4a8_mm_entry.cu index 57bcbaae45dd..2d1568b08651 100644 --- a/csrc/quantization/cutlass_w4a8/w4a8_mm_entry.cu +++ b/csrc/quantization/cutlass_w4a8/w4a8_mm_entry.cu @@ -25,6 +25,8 @@ #include "cutlass_extensions/common.hpp" #include "cutlass_extensions/epilogue/scaled_mm_epilogues_c3x.hpp" +#include + namespace vllm::cutlass_w4a8 { using namespace cute; @@ -393,6 +395,71 @@ torch::Tensor pack_scale_fp8(torch::Tensor const& scales) { return packed_scales; } +/* + GPU-accelerated implementation of cutlass::unified_encode_int4b. + Constructs a lookup table in constant memory to map 8 bits + (two 4-bit values) at a time. Assumes memory is contiguous + and pointers are 16-byte aligned. +*/ +__constant__ uint8_t kNibbleLUT[256]; + +__global__ void unified_encode_int4b_device(const uint8_t* in, uint8_t* out, + size_t nbytes) { + constexpr size_t V = sizeof(uint4); // 16 bytes + const size_t tid = blockIdx.x * blockDim.x + threadIdx.x; + const size_t nthreads = size_t(gridDim.x) * blockDim.x; + const size_t nvec = nbytes / V; + + // 1-D grid-stride loop over 16-byte chunks + for (size_t vec = tid; vec < nvec; vec += nthreads) { + uint4 v = reinterpret_cast(in)[vec]; + uint8_t* b = reinterpret_cast(&v); +#pragma unroll + for (int i = 0; i < int(V); ++i) b[i] = kNibbleLUT[b[i]]; + reinterpret_cast(out)[vec] = v; + } +} + +static bool upload_lut() { + std::array lut{}; + auto map_nib = [](uint8_t v) -> uint8_t { + // 1..7 -> (8 - v); keep 0 and 8..15 + return (v == 0 || (v & 0x8)) ? v : uint8_t(8 - v); + }; + for (int b = 0; b < 256; ++b) { + uint8_t lo = b & 0xF; + uint8_t hi = (b >> 4) & 0xF; + lut[b] = uint8_t((map_nib(hi) << 4) | map_nib(lo)); + } + cudaError_t e = cudaMemcpyToSymbol(kNibbleLUT, lut.data(), lut.size(), + /*offset=*/0, cudaMemcpyHostToDevice); + + return (e == cudaSuccess); +} + +static bool unified_encode_int4b(cutlass::int4b_t const* in, + cutlass::int4b_t* out, size_t num_int4_elems) { + // Build/upload LUT + if (!upload_lut()) return false; + + static_assert(sizeof(typename cutlass::int4b_t::Storage) == 1, + "int4 storage must be 1 byte"); + const size_t nbytes = num_int4_elems >> 1; + + auto* in_bytes = reinterpret_cast(in); + auto* out_bytes = reinterpret_cast(out); + + // kernel launch params + constexpr int block = 256; + const size_t nvec = nbytes / sizeof(uint4); // # of 16B vectors + int grid = int((nvec + block - 1) / block); + if (grid == 0) grid = 1; // ensure we still cover the tail in the kernel + + unified_encode_int4b_device<<>>(in_bytes, out_bytes, nbytes); + cudaError_t err = cudaGetLastError(); + return (err == cudaSuccess); +} + torch::Tensor encode_and_reorder_int4b(torch::Tensor const& B) { TORCH_CHECK(B.dtype() == torch::kInt32); TORCH_CHECK(B.dim() == 2); @@ -401,6 +468,7 @@ torch::Tensor encode_and_reorder_int4b(torch::Tensor const& B) { int k = B.size(0) * PackFactor; // logical k int n = B.size(1); + TORCH_CHECK((n * k) % 32 == 0, "need multiples of 32 int4s for 16B chunks"); auto B_ptr = static_cast(B.const_data_ptr()); auto B_packed_ptr = static_cast(B_packed.data_ptr()); @@ -409,7 +477,9 @@ torch::Tensor encode_and_reorder_int4b(torch::Tensor const& B) { LayoutB_Reordered layout_B_reordered = cute::tile_to_shape(LayoutAtomQuant{}, shape_B); - cutlass::unified_encode_int4b(B_ptr, B_packed_ptr, n * k); + bool ok = + vllm::cutlass_w4a8::unified_encode_int4b(B_ptr, B_packed_ptr, n * k); + TORCH_CHECK(ok, "unified_encode_int4b failed"); cutlass::reorder_tensor(B_packed_ptr, layout_B, layout_B_reordered); return B_packed; From bff2e5f1d6201c817d29f49581e6c15724a4e186 Mon Sep 17 00:00:00 2001 From: Andrew Xia Date: Wed, 17 Sep 2025 15:04:28 -0700 Subject: [PATCH 063/518] [gpt-oss][2] fix types for streaming (#24556) Signed-off-by: Andrew Xia --- vllm/entrypoints/openai/api_server.py | 9 +- vllm/entrypoints/openai/protocol.py | 37 ++++- vllm/entrypoints/openai/serving_responses.py | 154 ++++++++----------- 3 files changed, 104 insertions(+), 96 deletions(-) diff --git a/vllm/entrypoints/openai/api_server.py b/vllm/entrypoints/openai/api_server.py index 527193c91339..c07e95e9370a 100644 --- a/vllm/entrypoints/openai/api_server.py +++ b/vllm/entrypoints/openai/api_server.py @@ -27,7 +27,6 @@ from fastapi.exceptions import RequestValidationError from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import JSONResponse, Response, StreamingResponse -from openai import BaseModel from prometheus_client import make_asgi_app from prometheus_fastapi_instrumentator import Instrumentator from starlette.concurrency import iterate_in_threadpool @@ -67,7 +66,9 @@ RerankRequest, RerankResponse, ResponsesRequest, ResponsesResponse, ScoreRequest, - ScoreResponse, TokenizeRequest, + ScoreResponse, + StreamingResponsesResponse, + TokenizeRequest, TokenizeResponse, TranscriptionRequest, TranscriptionResponse, @@ -481,8 +482,8 @@ async def show_version(): async def _convert_stream_to_sse_events( - generator: AsyncGenerator[BaseModel, - None]) -> AsyncGenerator[str, None]: + generator: AsyncGenerator[StreamingResponsesResponse, None] +) -> AsyncGenerator[str, None]: """Convert the generator to a stream of events in SSE format""" async for event in generator: event_type = getattr(event, 'type', 'unknown') diff --git a/vllm/entrypoints/openai/protocol.py b/vllm/entrypoints/openai/protocol.py index 6b4c3f531dbc..2505e493625d 100644 --- a/vllm/entrypoints/openai/protocol.py +++ b/vllm/entrypoints/openai/protocol.py @@ -18,10 +18,19 @@ from openai.types.chat.chat_completion_message import ( Annotation as OpenAIAnnotation) # yapf: enable -from openai.types.responses import (ResponseFunctionToolCall, - ResponseInputItemParam, ResponseOutputItem, - ResponsePrompt, ResponseReasoningItem, - ResponseStatus) +from openai.types.responses import ( + ResponseCodeInterpreterCallCodeDeltaEvent, + ResponseCodeInterpreterCallCodeDoneEvent, + ResponseCodeInterpreterCallCompletedEvent, + ResponseCodeInterpreterCallInProgressEvent, + ResponseCodeInterpreterCallInterpretingEvent, ResponseCompletedEvent, + ResponseContentPartAddedEvent, ResponseContentPartDoneEvent, + ResponseCreatedEvent, ResponseFunctionToolCall, ResponseInProgressEvent, + ResponseInputItemParam, ResponseOutputItem, ResponseOutputItemAddedEvent, + ResponseOutputItemDoneEvent, ResponsePrompt, ResponseReasoningItem, + ResponseReasoningTextDeltaEvent, ResponseReasoningTextDoneEvent, + ResponseStatus, ResponseWebSearchCallCompletedEvent, + ResponseWebSearchCallInProgressEvent, ResponseWebSearchCallSearchingEvent) # Backward compatibility for OpenAI client versions try: # For older openai versions (< 1.100.0) @@ -251,6 +260,26 @@ def get_logits_processors(processors: Optional[LogitsProcessors], ResponseReasoningItem, ResponseFunctionToolCall] +StreamingResponsesResponse: TypeAlias = Union[ + ResponseCreatedEvent, + ResponseInProgressEvent, + ResponseCompletedEvent, + ResponseOutputItemAddedEvent, + ResponseOutputItemDoneEvent, + ResponseContentPartAddedEvent, + ResponseContentPartDoneEvent, + ResponseReasoningTextDeltaEvent, + ResponseReasoningTextDoneEvent, + ResponseCodeInterpreterCallInProgressEvent, + ResponseCodeInterpreterCallCodeDeltaEvent, + ResponseWebSearchCallInProgressEvent, + ResponseWebSearchCallSearchingEvent, + ResponseWebSearchCallCompletedEvent, + ResponseCodeInterpreterCallCodeDoneEvent, + ResponseCodeInterpreterCallInterpretingEvent, + ResponseCodeInterpreterCallCompletedEvent, +] + class ResponsesRequest(OpenAIBaseModel): # Ordered by official OpenAI API documentation diff --git a/vllm/entrypoints/openai/serving_responses.py b/vllm/entrypoints/openai/serving_responses.py index b81b2c7223ef..469d74272b0e 100644 --- a/vllm/entrypoints/openai/serving_responses.py +++ b/vllm/entrypoints/openai/serving_responses.py @@ -10,24 +10,28 @@ from contextlib import AsyncExitStack from copy import copy from http import HTTPStatus -from typing import Callable, Final, Optional, TypeVar, Union +from typing import Callable, Final, Optional, Union import jinja2 -import openai.types.responses as openai_responses_types from fastapi import Request -from openai import BaseModel # yapf conflicts with isort for this block # yapf: disable -from openai.types.responses import (ResponseCreatedEvent, - ResponseFunctionToolCall, - ResponseInProgressEvent, - ResponseOutputItem, - ResponseOutputItemDoneEvent, - ResponseOutputMessage, ResponseOutputText, - ResponseReasoningItem, - ResponseReasoningTextDeltaEvent, - ResponseReasoningTextDoneEvent, - ResponseStatus, response_text_delta_event) +from openai.types.responses import ( + ResponseCodeInterpreterCallCodeDeltaEvent, + ResponseCodeInterpreterCallCodeDoneEvent, + ResponseCodeInterpreterCallCompletedEvent, + ResponseCodeInterpreterCallInProgressEvent, + ResponseCodeInterpreterCallInterpretingEvent, + ResponseCodeInterpreterToolCallParam, ResponseCompletedEvent, + ResponseContentPartAddedEvent, ResponseContentPartDoneEvent, + ResponseCreatedEvent, ResponseFunctionToolCall, ResponseFunctionWebSearch, + ResponseInProgressEvent, ResponseOutputItem, ResponseOutputItemAddedEvent, + ResponseOutputItemDoneEvent, ResponseOutputMessage, ResponseOutputText, + ResponseReasoningItem, ResponseReasoningTextDeltaEvent, + ResponseReasoningTextDoneEvent, ResponseStatus, ResponseTextDeltaEvent, + ResponseTextDoneEvent, ResponseWebSearchCallCompletedEvent, + ResponseWebSearchCallInProgressEvent, ResponseWebSearchCallSearchingEvent, + response_function_web_search, response_text_delta_event) from openai.types.responses.response_output_text import (Logprob, LogprobTopLogprob) # yapf: enable @@ -55,7 +59,8 @@ OutputTokensDetails, RequestResponseMetadata, ResponsesRequest, - ResponsesResponse, ResponseUsage) + ResponsesResponse, ResponseUsage, + StreamingResponsesResponse) # yapf: enable from vllm.entrypoints.openai.serving_engine import OpenAIServing from vllm.entrypoints.openai.serving_models import OpenAIServingModels @@ -175,7 +180,7 @@ def __init__( # HACK(wuhang): This is a hack. We should use a better store. # FIXME: If enable_store=True, this may cause a memory leak since we # never remove events from the store. - self.event_store: dict[str, tuple[deque[BaseModel], + self.event_store: dict[str, tuple[deque[StreamingResponsesResponse], asyncio.Event]] = {} self.background_tasks: dict[str, asyncio.Task] = {} @@ -186,8 +191,8 @@ async def create_responses( self, request: ResponsesRequest, raw_request: Optional[Request] = None, - ) -> Union[AsyncGenerator[BaseModel, None], ResponsesResponse, - ErrorResponse]: + ) -> Union[AsyncGenerator[StreamingResponsesResponse, None], + ResponsesResponse, ErrorResponse]: error_check_ret = await self._check_model(request) if error_check_ret is not None: logger.error("Error with model %s", error_check_ret) @@ -814,7 +819,7 @@ async def _run_background_request_stream( *args, **kwargs, ): - event_deque: deque[BaseModel] = deque() + event_deque: deque[StreamingResponsesResponse] = deque() new_event_signal = asyncio.Event() self.event_store[request.request_id] = (event_deque, new_event_signal) response = None @@ -867,7 +872,7 @@ async def responses_background_stream_generator( self, response_id: str, starting_after: Optional[int] = None, - ) -> AsyncGenerator[BaseModel, None]: + ) -> AsyncGenerator[StreamingResponsesResponse, None]: if response_id not in self.event_store: raise ValueError(f"Unknown response_id: {response_id}") @@ -893,8 +898,8 @@ async def retrieve_responses( response_id: str, starting_after: Optional[int], stream: Optional[bool], - ) -> Union[ErrorResponse, ResponsesResponse, AsyncGenerator[BaseModel, - None]]: + ) -> Union[ErrorResponse, ResponsesResponse, AsyncGenerator[ + StreamingResponsesResponse, None]]: if not response_id.startswith("resp_"): return self._make_invalid_id_error(response_id) @@ -977,9 +982,9 @@ async def _process_simple_streaming_events( tokenizer: AnyTokenizer, request_metadata: RequestResponseMetadata, created_time: int, - _increment_sequence_number_and_return: Callable[[BaseModel], - BaseModel], - ) -> AsyncGenerator[BaseModel, None]: + _increment_sequence_number_and_return: Callable[ + [StreamingResponsesResponse], StreamingResponsesResponse], + ) -> AsyncGenerator[StreamingResponsesResponse, None]: current_content_index = 0 current_output_index = 0 current_item_id = "" @@ -1017,13 +1022,11 @@ async def _process_simple_streaming_events( current_item_id = str(uuid.uuid4()) if delta_message.reasoning_content: yield _increment_sequence_number_and_return( - openai_responses_types. ResponseOutputItemAddedEvent( type="response.output_item.added", sequence_number=-1, output_index=current_output_index, - item=openai_responses_types. - ResponseReasoningItem( + item=ResponseReasoningItem( type="reasoning", id=current_item_id, summary=[], @@ -1032,13 +1035,11 @@ async def _process_simple_streaming_events( )) else: yield _increment_sequence_number_and_return( - openai_responses_types. ResponseOutputItemAddedEvent( type="response.output_item.added", sequence_number=-1, output_index=current_output_index, - item=openai_responses_types. - ResponseOutputMessage( + item=ResponseOutputMessage( id=current_item_id, type="message", role="assistant", @@ -1047,13 +1048,13 @@ async def _process_simple_streaming_events( ), )) yield _increment_sequence_number_and_return( - openai_responses_types.ResponseContentPartAddedEvent( + ResponseContentPartAddedEvent( type="response.content_part.added", sequence_number=-1, output_index=current_output_index, item_id=current_item_id, content_index=current_content_index, - part=openai_responses_types.ResponseOutputText( + part=ResponseOutputText( type="output_text", text="", annotations=[], @@ -1104,11 +1105,11 @@ async def _process_simple_streaming_events( item=reasoning_item, )) yield _increment_sequence_number_and_return( - openai_responses_types.ResponseOutputItemAddedEvent( + ResponseOutputItemAddedEvent( type="response.output_item.added", sequence_number=-1, output_index=current_output_index, - item=openai_responses_types.ResponseOutputMessage( + item=ResponseOutputMessage( id=current_item_id, type="message", role="assistant", @@ -1119,13 +1120,13 @@ async def _process_simple_streaming_events( current_output_index += 1 current_item_id = str(uuid.uuid4()) yield _increment_sequence_number_and_return( - openai_responses_types.ResponseContentPartAddedEvent( + ResponseContentPartAddedEvent( type="response.content_part.added", sequence_number=-1, output_index=current_output_index, item_id=current_item_id, content_index=current_content_index, - part=openai_responses_types.ResponseOutputText( + part=ResponseOutputText( type="output_text", text="", annotations=[], @@ -1148,7 +1149,7 @@ async def _process_simple_streaming_events( )) elif delta_message.content is not None: yield _increment_sequence_number_and_return( - openai_responses_types.ResponseTextDeltaEvent( + ResponseTextDeltaEvent( type="response.output_text.delta", sequence_number=-1, content_index=current_content_index, @@ -1204,7 +1205,7 @@ async def _process_simple_streaming_events( for pm in previous_delta_messages if pm.content is not None) yield _increment_sequence_number_and_return( - openai_responses_types.ResponseTextDoneEvent( + ResponseTextDoneEvent( type="response.output_text.done", sequence_number=-1, output_index=current_output_index, @@ -1220,7 +1221,7 @@ async def _process_simple_streaming_events( annotations=[], ) yield _increment_sequence_number_and_return( - openai_responses_types.ResponseContentPartDoneEvent( + ResponseContentPartDoneEvent( type="response.content_part.done", sequence_number=-1, item_id=current_item_id, @@ -1257,9 +1258,9 @@ async def _process_harmony_streaming_events( tokenizer: AnyTokenizer, request_metadata: RequestResponseMetadata, created_time: int, - _increment_sequence_number_and_return: Callable[[BaseModel], - BaseModel], - ) -> AsyncGenerator[BaseModel, None]: + _increment_sequence_number_and_return: Callable[ + [StreamingResponsesResponse], StreamingResponsesResponse], + ) -> AsyncGenerator[StreamingResponsesResponse, None]: current_content_index = -1 current_output_index = 0 current_item_id: str = "" @@ -1314,7 +1315,7 @@ async def _process_harmony_streaming_events( annotations=[], ) yield _increment_sequence_number_and_return( - openai_responses_types.ResponseTextDoneEvent( + ResponseTextDoneEvent( type="response.output_text.done", sequence_number=-1, output_index=current_output_index, @@ -1324,7 +1325,6 @@ async def _process_harmony_streaming_events( item_id=current_item_id, )) yield _increment_sequence_number_and_return( - openai_responses_types. ResponseContentPartDoneEvent( type="response.content_part.done", sequence_number=-1, @@ -1334,7 +1334,7 @@ async def _process_harmony_streaming_events( part=text_content, )) yield _increment_sequence_number_and_return( - openai_responses_types.ResponseOutputItemDoneEvent( + ResponseOutputItemDoneEvent( type="response.output_item.done", sequence_number=-1, output_index=current_output_index, @@ -1355,13 +1355,11 @@ async def _process_harmony_streaming_events( sent_output_item_added = True current_item_id = f"msg_{random_uuid()}" yield _increment_sequence_number_and_return( - openai_responses_types. ResponseOutputItemAddedEvent( type="response.output_item.added", sequence_number=-1, output_index=current_output_index, - item=openai_responses_types. - ResponseOutputMessage( + item=ResponseOutputMessage( id=current_item_id, type="message", role="assistant", @@ -1371,14 +1369,13 @@ async def _process_harmony_streaming_events( )) current_content_index += 1 yield _increment_sequence_number_and_return( - openai_responses_types. ResponseContentPartAddedEvent( type="response.content_part.added", sequence_number=-1, output_index=current_output_index, item_id=current_item_id, content_index=current_content_index, - part=openai_responses_types.ResponseOutputText( + part=ResponseOutputText( type="output_text", text="", annotations=[], @@ -1386,7 +1383,7 @@ async def _process_harmony_streaming_events( ), )) yield _increment_sequence_number_and_return( - openai_responses_types.ResponseTextDeltaEvent( + ResponseTextDeltaEvent( type="response.output_text.delta", sequence_number=-1, content_index=current_content_index, @@ -1402,13 +1399,11 @@ async def _process_harmony_streaming_events( sent_output_item_added = True current_item_id = f"msg_{random_uuid()}" yield _increment_sequence_number_and_return( - openai_responses_types. ResponseOutputItemAddedEvent( type="response.output_item.added", sequence_number=-1, output_index=current_output_index, - item=openai_responses_types. - ResponseReasoningItem( + item=ResponseReasoningItem( type="reasoning", id=current_item_id, summary=[], @@ -1417,14 +1412,13 @@ async def _process_harmony_streaming_events( )) current_content_index += 1 yield _increment_sequence_number_and_return( - openai_responses_types. ResponseContentPartAddedEvent( type="response.content_part.added", sequence_number=-1, output_index=current_output_index, item_id=current_item_id, content_index=current_content_index, - part=openai_responses_types.ResponseOutputText( + part=ResponseOutputText( type="output_text", text="", annotations=[], @@ -1450,13 +1444,11 @@ async def _process_harmony_streaming_events( sent_output_item_added = True current_item_id = f"tool_{random_uuid()}" yield _increment_sequence_number_and_return( - openai_responses_types. ResponseOutputItemAddedEvent( type="response.output_item.added", sequence_number=-1, output_index=current_output_index, - item=openai_responses_types. - ResponseCodeInterpreterToolCallParam( + item=ResponseCodeInterpreterToolCallParam( type="code_interpreter_call", id=current_item_id, code=None, @@ -1466,7 +1458,6 @@ async def _process_harmony_streaming_events( ), )) yield _increment_sequence_number_and_return( - openai_responses_types. ResponseCodeInterpreterCallInProgressEvent( type= "response.code_interpreter_call.in_progress", @@ -1475,7 +1466,6 @@ async def _process_harmony_streaming_events( item_id=current_item_id, )) yield _increment_sequence_number_and_return( - openai_responses_types. ResponseCodeInterpreterCallCodeDeltaEvent( type="response.code_interpreter_call_code.delta", sequence_number=-1, @@ -1495,14 +1485,12 @@ async def _process_harmony_streaming_events( action = None parsed_args = json.loads(previous_item.content[0].text) if function_name == "search": - action = (openai_responses_types. - response_function_web_search.ActionSearch( - type="search", - query=parsed_args["query"], - )) + action = (response_function_web_search.ActionSearch( + type="search", + query=parsed_args["query"], + )) elif function_name == "open": action = ( - openai_responses_types. response_function_web_search.ActionOpenPage( type="open_page", # TODO: translate to url @@ -1510,7 +1498,6 @@ async def _process_harmony_streaming_events( )) elif function_name == "find": action = ( - openai_responses_types. response_function_web_search.ActionFind( type="find", pattern=parsed_args["pattern"], @@ -1523,12 +1510,11 @@ async def _process_harmony_streaming_events( current_item_id = f"tool_{random_uuid()}" yield _increment_sequence_number_and_return( - openai_responses_types.ResponseOutputItemAddedEvent( + ResponseOutputItemAddedEvent( type="response.output_item.added", sequence_number=-1, output_index=current_output_index, - item=openai_responses_types. - response_function_web_search. + item=response_function_web_search. ResponseFunctionWebSearch( # TODO: generate a unique id for web search call type="web_search_call", @@ -1538,7 +1524,6 @@ async def _process_harmony_streaming_events( ), )) yield _increment_sequence_number_and_return( - openai_responses_types. ResponseWebSearchCallInProgressEvent( type="response.web_search_call.in_progress", sequence_number=-1, @@ -1546,7 +1531,6 @@ async def _process_harmony_streaming_events( item_id=current_item_id, )) yield _increment_sequence_number_and_return( - openai_responses_types. ResponseWebSearchCallSearchingEvent( type="response.web_search_call.searching", sequence_number=-1, @@ -1556,7 +1540,6 @@ async def _process_harmony_streaming_events( # enqueue yield _increment_sequence_number_and_return( - openai_responses_types. ResponseWebSearchCallCompletedEvent( type="response.web_search_call.completed", sequence_number=-1, @@ -1564,12 +1547,11 @@ async def _process_harmony_streaming_events( item_id=current_item_id, )) yield _increment_sequence_number_and_return( - openai_responses_types.ResponseOutputItemDoneEvent( + ResponseOutputItemDoneEvent( type="response.output_item.done", sequence_number=-1, output_index=current_output_index, - item=openai_responses_types. - ResponseFunctionWebSearch( + item=ResponseFunctionWebSearch( type="web_search_call", id=current_item_id, action=action, @@ -1582,7 +1564,6 @@ async def _process_harmony_streaming_events( and previous_item.recipient is not None and previous_item.recipient.startswith("python")): yield _increment_sequence_number_and_return( - openai_responses_types. ResponseCodeInterpreterCallCodeDoneEvent( type="response.code_interpreter_call_code.done", sequence_number=-1, @@ -1591,7 +1572,6 @@ async def _process_harmony_streaming_events( code=previous_item.content[0].text, )) yield _increment_sequence_number_and_return( - openai_responses_types. ResponseCodeInterpreterCallInterpretingEvent( type="response.code_interpreter_call.interpreting", sequence_number=-1, @@ -1599,7 +1579,6 @@ async def _process_harmony_streaming_events( item_id=current_item_id, )) yield _increment_sequence_number_and_return( - openai_responses_types. ResponseCodeInterpreterCallCompletedEvent( type="response.code_interpreter_call.completed", sequence_number=-1, @@ -1607,12 +1586,11 @@ async def _process_harmony_streaming_events( item_id=current_item_id, )) yield _increment_sequence_number_and_return( - openai_responses_types.ResponseOutputItemDoneEvent( + ResponseOutputItemDoneEvent( type="response.output_item.done", sequence_number=-1, output_index=current_output_index, - item=openai_responses_types. - ResponseCodeInterpreterToolCallParam( + item=ResponseCodeInterpreterToolCallParam( type="code_interpreter_call", id=current_item_id, code=previous_item.content[0].text, @@ -1633,7 +1611,7 @@ async def responses_stream_generator( tokenizer: AnyTokenizer, request_metadata: RequestResponseMetadata, created_time: Optional[int] = None, - ) -> AsyncGenerator[BaseModel, None]: + ) -> AsyncGenerator[StreamingResponsesResponse, None]: # TODO: # 1. Handle disconnect @@ -1641,9 +1619,9 @@ async def responses_stream_generator( sequence_number = 0 - T = TypeVar("T", bound=BaseModel) - - def _increment_sequence_number_and_return(event: T) -> T: + def _increment_sequence_number_and_return( + event: StreamingResponsesResponse + ) -> StreamingResponsesResponse: nonlocal sequence_number # Set sequence_number if the event has this attribute if hasattr(event, 'sequence_number'): @@ -1705,7 +1683,7 @@ async def empty_async_generator(): created_time=created_time, ) yield _increment_sequence_number_and_return( - openai_responses_types.ResponseCompletedEvent( + ResponseCompletedEvent( type="response.completed", sequence_number=-1, response=final_response.model_dump(), From fedb75fa2790403b90ec6dc926fef9c6c5ccb7a6 Mon Sep 17 00:00:00 2001 From: Alexander Matveev <59768536+alexm-redhat@users.noreply.github.com> Date: Wed, 17 Sep 2025 18:06:38 -0400 Subject: [PATCH 064/518] [Bugfix][B200] Fix `cutlass_mla` hang (#24966) Signed-off-by: Alexander Matveev Co-authored-by: Michael Goin --- csrc/attention/mla/cutlass_sm100_mla/device/sm100_mla.hpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/csrc/attention/mla/cutlass_sm100_mla/device/sm100_mla.hpp b/csrc/attention/mla/cutlass_sm100_mla/device/sm100_mla.hpp index 95e32559cd54..fbbc2e588c32 100644 --- a/csrc/attention/mla/cutlass_sm100_mla/device/sm100_mla.hpp +++ b/csrc/attention/mla/cutlass_sm100_mla/device/sm100_mla.hpp @@ -133,6 +133,14 @@ class MLA { // printf(" sm_count = %d\n", sm_count); int max_splits = ceil_div(K, 128); max_splits = min(16, max_splits); + + // TODO: This avoids a hang when the batch size larger than 1 and + // there is more than 4 kv_splits. + // Discuss with NVIDIA how this can be fixed. + if (B > 1) { + max_splits = min(2, max_splits); + } + // printf(" max_splits = %d\n", max_splits); int sms_per_batch = max(1, sm_count / B); // printf(" sms_per_batch = %d\n", sms_per_batch); From 1a456c7c90afdb534d1203d7e4ea5747aada801c Mon Sep 17 00:00:00 2001 From: Douglas Lehr <91553416+dllehr-amd@users.noreply.github.com> Date: Wed, 17 Sep 2025 17:29:14 -0500 Subject: [PATCH 065/518] Aiter mha fp8 fix (#24991) Signed-off-by: Doug Lehr Co-authored-by: Doug Lehr --- vllm/attention/ops/rocm_aiter_paged_attn.py | 4 ++-- vllm/v1/attention/backends/rocm_aiter_fa.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/vllm/attention/ops/rocm_aiter_paged_attn.py b/vllm/attention/ops/rocm_aiter_paged_attn.py index ad97152e208b..2a0336de8cf7 100644 --- a/vllm/attention/ops/rocm_aiter_paged_attn.py +++ b/vllm/attention/ops/rocm_aiter_paged_attn.py @@ -81,8 +81,8 @@ def forward_decode( blocksparse_head_sliding_step=blocksparse_head_sliding_step) if "fp8" in kv_cache_dtype: - key_cache = key_cache.view(torch.float8_e4m3fnuz) - value_cache = value_cache.view(torch.float8_e4m3fnuz) + key_cache = key_cache.view(current_platform.fp8_dtype()) + value_cache = value_cache.view(current_platform.fp8_dtype()) if blocksparse_vert_stride is not None and blocksparse_vert_stride > 1: # use blocksparse paged attention diff --git a/vllm/v1/attention/backends/rocm_aiter_fa.py b/vllm/v1/attention/backends/rocm_aiter_fa.py index a4e2758bd311..8eb3505cf274 100644 --- a/vllm/v1/attention/backends/rocm_aiter_fa.py +++ b/vllm/v1/attention/backends/rocm_aiter_fa.py @@ -479,8 +479,8 @@ def forward( ) if self.kv_cache_dtype.startswith("fp8"): - key_cache = key_cache.view(torch.float8_e4m3fnuz) - value_cache = value_cache.view(torch.float8_e4m3fnuz) + key_cache = key_cache.view(current_platform.fp8_dtype()) + value_cache = value_cache.view(current_platform.fp8_dtype()) if not attn_metadata.use_cascade: cu_seqlens_q = attn_metadata.query_start_loc From 9f882d879198200104fed7e166d40dd11039d217 Mon Sep 17 00:00:00 2001 From: Michael Goin Date: Wed, 17 Sep 2025 18:36:00 -0400 Subject: [PATCH 066/518] Disable failing GPT-OSS Eval (Blackwell) for now (#25107) Signed-off-by: mgoin --- .buildkite/test-pipeline.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index 133ba792680d..150dc40a9173 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -812,7 +812,7 @@ steps: timeout_in_minutes: 60 working_dir: "/vllm-workspace/" gpu: b200 - # optional: true + optional: true # disable while debugging source_file_dependencies: - tests/evals/gpt_oss - vllm/model_executor/models/gpt_oss.py From e67a79db03752e9ab7ed216bc99c30a16f45a33e Mon Sep 17 00:00:00 2001 From: elvischenv <219235043+elvischenv@users.noreply.github.com> Date: Thu, 18 Sep 2025 06:36:29 +0800 Subject: [PATCH 067/518] [Bugfix] Refactor Flashinfer TRTLLM attention kernel selection logic (#24600) Signed-off-by: elvischenv <219235043+elvischenv@users.noreply.github.com> Co-authored-by: Michael Goin --- vllm/envs.py | 7 ++- vllm/utils/flashinfer.py | 70 ++++++++++++++++-------- vllm/v1/attention/backends/flashinfer.py | 17 ++++-- 3 files changed, 65 insertions(+), 29 deletions(-) diff --git a/vllm/envs.py b/vllm/envs.py index 385d2a7c51f2..eeed7771f045 100755 --- a/vllm/envs.py +++ b/vllm/envs.py @@ -1223,9 +1223,12 @@ def get_vllm_port() -> Optional[int]: "VLLM_USE_CUDNN_PREFILL": lambda: bool(int(os.getenv("VLLM_USE_CUDNN_PREFILL", "0"))), - # If set to 1, use the TRTLLM attention backend in flashinfer. + # If set to 1/True, use the TRTLLM attention backend in flashinfer. + # If set to 0/False, use the default attention backend in flashinfer. + # If not set, auto-detect the attention backend in flashinfer. "VLLM_USE_TRTLLM_ATTENTION": - lambda: os.getenv("VLLM_USE_TRTLLM_ATTENTION", None), + lambda: (None if "VLLM_USE_TRTLLM_ATTENTION" not in os.environ else + os.environ["VLLM_USE_TRTLLM_ATTENTION"].lower() in ("1", "true")), # If set to 1, when we use fp8 kv, we do not quantize Q to fp8 "VLLM_FLASHINFER_DISABLE_Q_QUANTIZATION": diff --git a/vllm/utils/flashinfer.py b/vllm/utils/flashinfer.py index 83ec65c9b459..2179bddae243 100644 --- a/vllm/utils/flashinfer.py +++ b/vllm/utils/flashinfer.py @@ -154,28 +154,31 @@ def has_nvidia_artifactory() -> bool: @functools.cache -def supports_trtllm_attention() -> tuple[bool, Optional[str]]: - """Cache result which only depends on the environment""" - # This is a lambda, call it once - env_value = envs.VLLM_USE_TRTLLM_ATTENTION - +def supports_trtllm_attention() -> bool: + """ + TRTLLM attention is supported if the platform is SM100 and + NVIDIA artifactory is accessible + """ # Requires SM100 and NVIDIA artifactory to be accessible to download cubins - if not (current_platform.is_device_capability(100) - and has_nvidia_artifactory()): - return False, env_value + return current_platform.is_device_capability( + 100) and has_nvidia_artifactory() + +@functools.cache +def _force_use_trtllm_attention(env_value: Optional[bool]) -> Optional[bool]: + """Cache the env value for VLLM_USE_TRTLLM_ATTENTION""" if env_value is not None: logger.info_once("VLLM_USE_TRTLLM_ATTENTION is set to %s", env_value) - # Environment variable is set - respect it - # Making the conditional check for zero because - # the path is automatically enabled if the batch size condition - # is satisfied. - use_trtllm = (env_value == "1") - if use_trtllm: - logger.info_once("Using TRTLLM attention.") - return use_trtllm, env_value + return env_value - return True, None + +def force_use_trtllm_attention() -> Optional[bool]: + """ + Return ``None`` if VLLM_USE_TRTLLM_ATTENTION is not set, + return ``True`` if TRTLLM attention is forced to be used, + return ``False`` if TRTLLM attention is forced to be not used. + """ + return _force_use_trtllm_attention(envs.VLLM_USE_TRTLLM_ATTENTION) def use_trtllm_attention( @@ -185,18 +188,38 @@ def use_trtllm_attention( max_seq_len: int, kv_cache_dtype: str, q_dtype: torch.dtype, - is_prefill: bool, has_sinks: bool = False, ) -> bool: - use_trtllm, env_value = supports_trtllm_attention() - if not use_trtllm: + """Return ``True`` if TRTLLM attention is used.""" + force_use_trtllm = force_use_trtllm_attention() + + # Environment variable is set to 0 - respect it + if force_use_trtllm is not None and not force_use_trtllm: return False + # The platform is not supported + if not supports_trtllm_attention(): + if force_use_trtllm: + logger.warning_once( + "TRTLLM attention is not supported on this platform, " + "but VLLM_USE_TRTLLM_ATTENTION is set to 1") + return False + + # The combination of query and key heads is not supported if num_qo_heads % num_kv_heads != 0: + if force_use_trtllm: + logger.warning_once( + "TRTLLM attention is not supported for this combination of " + "query and key heads, but VLLM_USE_TRTLLM_ATTENTION is set to 1" + ) return False # Must use TRTLLM attention if query is FP8 quantized if q_dtype == current_platform.fp8_dtype(): + if has_sinks: + raise RuntimeError( + "TRTLLM FP8-qkv kernel is not supported for attention sinks. " + "Use kv_cache_dtype=auto for now.") logger.info_once("Using TRTLLM attention (query is quantized).") return True @@ -207,15 +230,17 @@ def use_trtllm_attention( "Using TRTLLM attention (required for attention sinks).") return True - if env_value is None: + if force_use_trtllm is None: # Environment variable not set - use auto-detection - use_trtllm = (num_tokens <= 256 and max_seq_len < 131072 + use_trtllm = (num_tokens <= 256 and max_seq_len <= 131072 and kv_cache_dtype == "auto") if use_trtllm: logger.warning_once("Using TRTLLM attention (auto-detected).") return use_trtllm # Environment variable is set to 1 - respect it + logger.info_once( + "Using TRTLLM attention (VLLM_USE_TRTLLM_ATTENTION is set to 1)") return True @@ -367,6 +392,7 @@ def flashinfer_disable_q_quantization() -> bool: "has_nvidia_artifactory", "supports_trtllm_attention", "use_trtllm_attention", + "flashinfer_disable_q_quantization", "flashinfer_scaled_fp4_mm", "flashinfer_scaled_fp8_mm", ] diff --git a/vllm/v1/attention/backends/flashinfer.py b/vllm/v1/attention/backends/flashinfer.py index 98a4cf38bc19..dda6dd4fbea7 100755 --- a/vllm/v1/attention/backends/flashinfer.py +++ b/vllm/v1/attention/backends/flashinfer.py @@ -282,7 +282,11 @@ def __init__(self, kv_cache_spec: AttentionSpec, layer_names: list[str], assert self.kv_cache_spec.dtype == self.model_config.dtype self.kv_cache_dtype = self.kv_cache_spec.dtype - if supports_trtllm_attention()[0] and \ + # Use model dtype as q dtype when TRTLLM attn is not supported, or + # VLLM_FLASHINFER_DISABLE_Q_QUANTIZATION is set to 1. Otherwise, try to + # use fp8 q if kv cache is fp8, and will fall back to model dtype + # if TRTLLM attention kernel is not used when building attn metadata + if supports_trtllm_attention() and \ not flashinfer_disable_q_quantization(): self.q_data_type = self.kv_cache_dtype else: @@ -298,7 +302,7 @@ def __init__(self, kv_cache_spec: AttentionSpec, layer_names: list[str], self.window_left = self.global_hyperparameters.window_left self.logits_soft_cap = self.global_hyperparameters.logits_soft_cap self.has_sinks = self.global_hyperparameters.has_sinks - if self.has_sinks and not supports_trtllm_attention()[0]: + if self.has_sinks and not supports_trtllm_attention(): raise NotImplementedError( "FlashInfer backend currently does not support attention " "sinks, please use trtllm on blackwell or flash attention on " @@ -477,14 +481,12 @@ def build(self, paged_kv_last_page_len_np, ) - # Check if any layer uses sinks (requires TRTLLM attention) prefill_use_trtllm = use_trtllm_attention(self.num_qo_heads, self.num_kv_heads, num_prefill_tokens, max_seq_len, self.cache_dtype, self.q_data_type, - is_prefill=True, has_sinks=self.has_sinks) decode_use_trtllm = use_trtllm_attention(self.num_qo_heads, self.num_kv_heads, @@ -492,13 +494,18 @@ def build(self, max_seq_len, self.cache_dtype, self.q_data_type, - is_prefill=False, has_sinks=self.has_sinks) if self.has_sinks and not (prefill_use_trtllm and decode_use_trtllm): raise NotImplementedError( "FlashInfer backend currently does not support attention " "sinks, please use trtllm on blackwell or flash attention on " "earlier GPUs.") + + # If TRTLLM attention is not used, the q quantization is not supported. + # Fall back to use model dtype. + if not (prefill_use_trtllm and decode_use_trtllm): + self.q_data_type = self.model_config.dtype + attn_metadata = FlashInferMetadata( num_actual_tokens=num_actual_tokens, q_data_type=self.q_data_type, From 2a4d6412e612d657d00daeafc0a569d86659021b Mon Sep 17 00:00:00 2001 From: Karan Goel <3261985+karan@users.noreply.github.com> Date: Wed, 17 Sep 2025 15:41:18 -0700 Subject: [PATCH 068/518] Add a batched auto tune script (#25076) Signed-off-by: Karan Goel Signed-off-by: Karan Goel <3261985+karan@users.noreply.github.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- benchmarks/auto_tune/README.md | 67 +++++++++++++ benchmarks/auto_tune/batch_auto_tune.sh | 128 ++++++++++++++++++++++++ 2 files changed, 195 insertions(+) create mode 100755 benchmarks/auto_tune/batch_auto_tune.sh diff --git a/benchmarks/auto_tune/README.md b/benchmarks/auto_tune/README.md index 3aa988aac254..d1bdb4c43f10 100644 --- a/benchmarks/auto_tune/README.md +++ b/benchmarks/auto_tune/README.md @@ -149,3 +149,70 @@ The script follows a systematic process to find the optimal parameters: 4. **Track Best Result**: Throughout the process, the script tracks the parameter combination that has yielded the highest valid throughput so far. 5. **Profile Collection**: For the best-performing run, the script saves the vLLM profiler output, which can be used for deep-dive performance analysis with tools like TensorBoard. + +## Batched `auto_tune` + +The `batch_auto_tune.sh` script allows you to run multiple `auto_tune.sh` experiments sequentially from a single configuration file. It iterates through a list of parameter sets, executes `auto_tune.sh` for each, and records the results back into the input file. + +### Prerequisites + +- **jq**: This script requires `jq` to parse the JSON configuration file. +- **gcloud**: If you plan to upload results to Google Cloud Storage, the `gcloud` CLI must be installed and authenticated. + +### How to Run + +1. **Create a JSON configuration file**: Create a file (e.g., `runs_config.json`) containing an array of JSON objects. Each object defines the parameters for a single `auto_tune.sh` run. + +2. **Execute the script**: + + ```bash + bash batch_auto_tune.sh [gcs_upload_path] + ``` + + - ``: **Required.** Path to your JSON configuration file. + - `[gcs_upload_path]`: **Optional.** A GCS path (e.g., `gs://my-bucket/benchmark-results`) where the detailed results and profiles for each run will be uploaded. If this is empty, the results will be available on the local filesystem (see the log for `RESULT_FILE=/path/to/results/file.txt`). + +### Configuration File + +The JSON configuration file should contain an array of objects. Each object's keys correspond to the configuration variables for `auto_tune.sh` (see the [Configuration table above](#configuration)). These keys will be converted to uppercase environment variables for each run. + +Here is an example `runs_config.json` with two benchmark configurations: + +```json +[ + { + "base": "/home/user", + "model": "meta-llama/Llama-3.1-8B-Instruct", + "system": "TPU", # OR GPU + "tp": 8, + "input_len": 128, + "output_len": 2048, + "max_model_len": 2300, + "num_seqs_list": "128 256", + "num_batched_tokens_list": "8192 16384" + }, + { + "base": "/home/user", + "model": "meta-llama/Llama-3.1-70B-Instruct", + "system": "TPU", # OR GPU + "tp": 8, + "input_len": 4000, + "output_len": 16, + "max_model_len": 4096, + "num_seqs_list": "64 128", + "num_batched_tokens_list": "4096 8192", + "max_latency_allowed_ms": 500 + } +] +``` + +### Output + +The script modifies the input JSON file in place, adding the results of each run to the corresponding object. The following fields are added: + +- `run_id`: A unique identifier for the run, derived from the timestamp. +- `status`: The outcome of the run (`SUCCESS`, `FAILURE`, or `WARNING_NO_RESULT_FILE`). +- `results`: The content of the `result.txt` file from the `auto_tune.sh` run. +- `gcs_results`: The GCS URL where the run's artifacts are stored (if a GCS path was provided). + +A summary of successful and failed runs is also printed to the console upon completion. diff --git a/benchmarks/auto_tune/batch_auto_tune.sh b/benchmarks/auto_tune/batch_auto_tune.sh new file mode 100755 index 000000000000..57ef20daf6b7 --- /dev/null +++ b/benchmarks/auto_tune/batch_auto_tune.sh @@ -0,0 +1,128 @@ +#!/bin/bash + +INPUT_JSON="$1" +GCS_PATH="$2" # Optional GCS path for uploading results for each run + +SCRIPT_DIR=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd) +AUTOTUNE_SCRIPT="$SCRIPT_DIR/auto_tune.sh" + +if [[ -z "$INPUT_JSON" ]]; then + echo "Error: Input JSON file not provided." + echo "Usage: $0 [gcs_upload_path]" + exit 1 +fi + +if [[ ! -f "$INPUT_JSON" ]]; then + echo "Error: File not found at '$INPUT_JSON'" + exit 1 +fi + +if ! command -v jq &> /dev/null; then + echo "Error: 'jq' command not found. Please install jq to process the JSON input." + exit 1 +fi + +if [[ -n "$GCS_PATH" ]] && ! command -v gcloud &> /dev/null; then + echo "Error: 'gcloud' command not found, but a GCS_PATH was provided." + exit 1 +fi + +SUCCESS_COUNT=0 +FAILURE_COUNT=0 +FAILED_RUNS=() +SCRIPT_START_TIME=$(date +%s) + +json_content=$(cat "$INPUT_JSON") +if ! num_runs=$(echo "$json_content" | jq 'length'); then + echo "Error: Invalid JSON in $INPUT_JSON. 'jq' failed to get array length." >&2 + exit 1 +fi + +echo "Found $num_runs benchmark configurations in $INPUT_JSON." +echo "Starting benchmark runs..." +echo "--------------------------------------------------" + +for i in $(seq 0 $(($num_runs - 1))); do + run_object=$(echo "$json_content" | jq ".[$i]") + + RUN_START_TIME=$(date +%s) + ENV_VARS_ARRAY=() + # Dynamically create env vars from the JSON object's keys + for key in $(echo "$run_object" | jq -r 'keys_unsorted[]'); do + value=$(echo "$run_object" | jq -r ".$key") + var_name=$(echo "$key" | tr '[:lower:]' '[:upper:]' | tr -cd 'A-Z0-9_') + ENV_VARS_ARRAY+=("${var_name}=${value}") + done + + echo "Executing run #$((i+1))/$num_runs with parameters: ${ENV_VARS_ARRAY[*]}" + + # Execute auto_tune.sh and capture output + RUN_OUTPUT_FILE=$(mktemp) + if env "${ENV_VARS_ARRAY[@]}" bash "$AUTOTUNE_SCRIPT" > >(tee -a "$RUN_OUTPUT_FILE") 2>&1; then + STATUS="SUCCESS" + ((SUCCESS_COUNT++)) + else + STATUS="FAILURE" + ((FAILURE_COUNT++)) + FAILED_RUNS+=("Run #$((i+1)): $(echo $run_object | jq -c .)") + fi + + RUN_OUTPUT=$(<"$RUN_OUTPUT_FILE") + rm "$RUN_OUTPUT_FILE" + + # Parse results and optionally upload them to GCS + RUN_ID="" + RESULTS="" + GCS_RESULTS_URL="" + if [[ "$STATUS" == "SUCCESS" ]]; then + RESULT_FILE_PATH=$(echo "$RUN_OUTPUT" | grep 'RESULT_FILE=' | tail -n 1 | cut -d'=' -f2 | tr -s '/' || true) + + if [[ -n "$RESULT_FILE_PATH" && -f "$RESULT_FILE_PATH" ]]; then + RUN_ID=$(basename "$(dirname "$RESULT_FILE_PATH")") + RESULT_DIR=$(dirname "$RESULT_FILE_PATH") + RESULTS=$(cat "$RESULT_FILE_PATH") + + if [[ -n "$GCS_PATH" ]]; then + GCS_RESULTS_URL="${GCS_PATH}/${RUN_ID}" + echo "Uploading results to GCS..." + if gcloud storage rsync --recursive "$RESULT_DIR/" "$GCS_RESULTS_URL"; then + echo "GCS upload successful." + else + echo "Warning: GCS upload failed for RUN_ID $RUN_ID." + fi + fi + else + echo "Warning: Could not find result file for a successful run." + STATUS="WARNING_NO_RESULT_FILE" + fi + fi + + # Add the results back into the JSON object for this run + json_content=$(echo "$json_content" | jq --argjson i "$i" --arg run_id "$RUN_ID" --arg status "$STATUS" --arg results "$RESULTS" --arg gcs_results "$GCS_RESULTS_URL" \ + '.[$i] += {run_id: $run_id, status: $status, results: $results, gcs_results: $gcs_results}') + + RUN_END_TIME=$(date +%s) + echo "Run finished in $((RUN_END_TIME - RUN_START_TIME)) seconds. Status: $STATUS" + echo "--------------------------------------------------" + + # Save intermediate progress back to the file + echo "$json_content" > "$INPUT_JSON.tmp" && mv "$INPUT_JSON.tmp" "$INPUT_JSON" + +done + +SCRIPT_END_TIME=$(date +%s) +echo "All benchmark runs completed in $((SCRIPT_END_TIME - SCRIPT_START_TIME)) seconds." +echo +echo "====================== SUMMARY ======================" +echo "Successful runs: $SUCCESS_COUNT" +echo "Failed runs: $FAILURE_COUNT" +echo "===================================================" + +if [[ $FAILURE_COUNT -gt 0 ]]; then + echo "Details of failed runs (see JSON file for full parameters):" + for failed in "${FAILED_RUNS[@]}"; do + echo " - $failed" + done +fi + +echo "Updated results have been saved to '$INPUT_JSON'." From e6585ddb451ba6056e044184f7fc88dcc13f8cfe Mon Sep 17 00:00:00 2001 From: elvischenv <219235043+elvischenv@users.noreply.github.com> Date: Thu, 18 Sep 2025 07:37:23 +0800 Subject: [PATCH 069/518] [Bugfix] Fix accuracy issue for silu_mul + nvfp4 quant fusion kernel (#24833) Signed-off-by: elvischenv <219235043+elvischenv@users.noreply.github.com> Co-authored-by: Wentao Ye <44945378+yewentao256@users.noreply.github.com> --- .buildkite/test-pipeline.yaml | 2 +- .../activation_nvfp4_quant_fusion_kernels.cu | 120 ++++------------- tests/compile/test_silu_mul_quant_fusion.py | 13 +- .../quantization/test_silu_mul_nvfp4_quant.py | 75 +++++++++++ .../test_silu_nvfp4_quant_fusion.py | 126 ------------------ 5 files changed, 110 insertions(+), 226 deletions(-) create mode 100644 tests/kernels/quantization/test_silu_mul_nvfp4_quant.py delete mode 100644 tests/kernels/quantization/test_silu_nvfp4_quant_fusion.py diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index 150dc40a9173..08c10180fc22 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -796,7 +796,7 @@ steps: # Quantization - pytest -v -s tests/kernels/quantization/test_cutlass_scaled_mm.py -k 'fp8' - pytest -v -s tests/kernels/quantization/test_nvfp4_quant.py - - pytest -v -s tests/kernels/quantization/test_silu_nvfp4_quant_fusion.py + - pytest -v -s tests/kernels/quantization/test_silu_mul_nvfp4_quant.py - pytest -v -s tests/kernels/quantization/test_nvfp4_scaled_mm.py - pytest -v -s tests/kernels/quantization/test_flashinfer_scaled_mm.py - pytest -v -s tests/kernels/quantization/test_flashinfer_nvfp4_scaled_mm.py diff --git a/csrc/quantization/fp4/activation_nvfp4_quant_fusion_kernels.cu b/csrc/quantization/fp4/activation_nvfp4_quant_fusion_kernels.cu index b4eb141cb488..74fde23782ce 100644 --- a/csrc/quantization/fp4/activation_nvfp4_quant_fusion_kernels.cu +++ b/csrc/quantization/fp4/activation_nvfp4_quant_fusion_kernels.cu @@ -30,109 +30,41 @@ namespace vllm { -template -__inline__ __device__ PackedVec compute_silu(PackedVec& vec, - PackedVec& vec2) { - PackedVec result; -#pragma unroll - for (int i = 0; i < CVT_FP4_ELTS_PER_THREAD / 2; ++i) { - if constexpr (std::is_same_v) { - half2 val(0.5f, 0.5f); - half2 t0 = __hmul2(vec.elts[i], val); - half2 t1 = __hfma2(h2tanh(t0), val, val); - half2 t2 = __hmul2(vec.elts[i], t1); - result.elts[i] = __hmul2(t2, vec2.elts[i]); - } else { - __nv_bfloat162 val(0.5f, 0.5f); - __nv_bfloat162 t0 = __hmul2(vec.elts[i], val); - __nv_bfloat162 t1 = __hfma2(h2tanh(t0), val, val); - __nv_bfloat162 t2 = __hmul2(vec.elts[i], t1); - result.elts[i] = __hmul2(t2, vec2.elts[i]); - } - } - return result; +// silu in float32 +__device__ __forceinline__ float silu(float x) { + return __fdividef(x, (1.f + __expf(-x))); } -// Quantizes the provided PackedVec into the uint32_t output -template -__device__ uint32_t silu_and_cvt_warp_fp16_to_fp4(PackedVec& vec, - PackedVec& vec2, - float SFScaleVal, - uint8_t* SFout) { - PackedVec out_silu = compute_silu(vec, vec2); - // Get absolute maximum values among the local 8 values. - auto localMax = __habs2(out_silu.elts[0]); - -// Local maximum value. -#pragma unroll - for (int i = 1; i < CVT_FP4_ELTS_PER_THREAD / 2; i++) { - localMax = __hmax2(localMax, __habs2(out_silu.elts[i])); - } - - // Get the absolute maximum among all 16 values (two threads). - localMax = __hmax2(__shfl_xor_sync(uint32_t(-1), localMax, 1), localMax); - // Get the final absolute maximum values. - float vecMax = float(__hmax(localMax.x, localMax.y)); - - // Get the SF (max value of the vector / max value of e2m1). - // maximum value of e2m1 = 6.0. - // TODO: use half as compute data type. - float SFValue = SFScaleVal * (vecMax * reciprocal_approximate_ftz(6.0f)); - // 8 bits representation of the SF. - uint8_t fp8SFVal; - // Write the SF to global memory (STG.8). - if constexpr (UE8M0_SF) { - // Extract the 8 exponent bits from float32. - // float 32bits = 1 sign bit + 8 exponent bits + 23 mantissa bits. - uint32_t tmp = reinterpret_cast(SFValue) >> 23; - fp8SFVal = tmp & 0xff; - // Convert back to fp32. - reinterpret_cast(SFValue) = tmp << 23; - } else { - // Here SFValue is always positive, so E4M3 is the same as UE4M3. - __nv_fp8_e4m3 tmp = __nv_fp8_e4m3(SFValue); - reinterpret_cast<__nv_fp8_e4m3&>(fp8SFVal) = tmp; - // Convert back to fp32. - SFValue = float(tmp); - } - // Get the output scale. - // Recipe: final_scale = reciprocal(fp32(fp8(SFValue * SFScaleVal))) * - // reciprocal(SFScaleVal)) - float outputScale = - SFValue != 0 ? reciprocal_approximate_ftz( - SFValue * reciprocal_approximate_ftz(SFScaleVal)) - : 0.0f; - - if (SFout) { - // Write the SF to global memory (STG.8). - *SFout = fp8SFVal; - } +__device__ __forceinline__ float2 silu2(float2 x) { + return make_float2(silu(x.x), silu(x.y)); +} - // Convert the input to float. - float2 fp2Vals[CVT_FP4_ELTS_PER_THREAD / 2]; +template +__inline__ __device__ PackedVec compute_silu_mul(PackedVec& vec, + PackedVec& vec2) { + PackedVec result; + using packed_type = typename TypeConverter::Type; #pragma unroll - for (int i = 0; i < CVT_FP4_ELTS_PER_THREAD / 2; i++) { + for (int i = 0; i < CVT_FP4_ELTS_PER_THREAD / 2; ++i) { + // silu_mul in float32 if constexpr (std::is_same_v) { - fp2Vals[i] = __half22float2(out_silu.elts[i]); + float2 silu_vec = silu2(__half22float2(vec.elts[i])); + result.elts[i] = + __float22half2_rn(__fmul2_rn(silu_vec, __half22float2(vec2.elts[i]))); } else { - fp2Vals[i] = __bfloat1622float2(out_silu.elts[i]); + float2 silu_vec = silu2(__bfloat1622float2(vec.elts[i])); + result.elts[i] = __float22bfloat162_rn( + __fmul2_rn(silu_vec, __bfloat1622float2(vec2.elts[i]))); } - fp2Vals[i].x *= outputScale; - fp2Vals[i].y *= outputScale; } - - // Convert to e2m1 values. - uint32_t e2m1Vec = fp32_vec_to_e2m1(fp2Vals); - - // Write the e2m1 values to global memory. - return e2m1Vec; + return result; } // Use UE4M3 by default. template __global__ void __launch_bounds__(1024, 4) - silu_and_cvt_fp16_to_fp4(int32_t numRows, int32_t numCols, Type const* in, + silu_mul_cvt_fp16_to_fp4(int32_t numRows, int32_t numCols, Type const* in, float const* SFScale, uint32_t* out, uint32_t* SFout) { using PackedVec = PackedVec; @@ -160,16 +92,18 @@ __global__ void __launch_bounds__(1024, 4) // Get the output tensor offset. // Same as inOffset because 8 elements are packed into one uint32_t. int64_t outOffset = rowIdx * (numCols / CVT_FP4_ELTS_PER_THREAD) + colIdx; - ; auto& out_pos = out[outOffset]; + // Compute silu and mul + PackedVec out_silu_mul = compute_silu_mul(in_vec, in_vec2); + auto sf_out = cvt_quant_to_fp4_get_sf_out_offset( rowIdx, colIdx, numCols, SFout); - out_pos = silu_and_cvt_warp_fp16_to_fp4( - in_vec, in_vec2, SFScaleVal, sf_out); + out_pos = cvt_warp_fp16_to_fp4(out_silu_mul, SFScaleVal, + sf_out); } } } @@ -204,7 +138,7 @@ void silu_and_mul_nvfp4_quant_sm1xxa(torch::Tensor& output, // [..., d] input.scalar_type(), "silu_and_mul_nvfp4_quant_kernel", [&] { using cuda_type = vllm::CUDATypeConverter::Type; auto input_ptr = static_cast(input.data_ptr()); - vllm::silu_and_cvt_fp16_to_fp4<<>>( + vllm::silu_mul_cvt_fp16_to_fp4<<>>( m, n, input_ptr, input_sf_ptr, reinterpret_cast(output_ptr), reinterpret_cast(sf_out)); diff --git a/tests/compile/test_silu_mul_quant_fusion.py b/tests/compile/test_silu_mul_quant_fusion.py index 736db80a2f37..ae190d25cad6 100644 --- a/tests/compile/test_silu_mul_quant_fusion.py +++ b/tests/compile/test_silu_mul_quant_fusion.py @@ -98,8 +98,9 @@ def ops_in_model_after(self): return [FUSED_OPS[kNvfp4Quant]] -@pytest.mark.parametrize("num_tokens", [64]) -@pytest.mark.parametrize("hidden_size", [128]) +@pytest.mark.parametrize("num_tokens", [32, 64]) +@pytest.mark.parametrize("hidden_size", [128, 256]) +@pytest.mark.parametrize("dtype", [torch.bfloat16, torch.float16]) @pytest.mark.parametrize( "model_class", cast(list[type], [TestSiluMulFp8QuantModel, TestSiluMulNvfp4QuantModel] @@ -110,13 +111,13 @@ def ops_in_model_after(self): [True, False] if cutlass_fp8_supported() else [True]) @pytest.mark.skipif(envs.VLLM_TARGET_DEVICE not in ["cuda", "rocm"], reason="Only test on CUDA and ROCm") -def test_fusion_silu_and_mul_quant(num_tokens, hidden_size, model_class, +def test_fusion_silu_and_mul_quant(num_tokens, hidden_size, dtype, model_class, cuda_force_torch): if model_class == TestSiluMulNvfp4QuantModel and cuda_force_torch: pytest.skip("Duplicate tests for NVFP4") torch.set_default_device("cuda") - torch.set_default_dtype(torch.float16) + torch.set_default_dtype(dtype) x = torch.rand(num_tokens, hidden_size * 2) @@ -145,8 +146,8 @@ def test_fusion_silu_and_mul_quant(num_tokens, hidden_size, model_class, elif model_class == TestSiluMulNvfp4QuantModel: atol, rtol = 1e-1, 1e-1 - torch.testing.assert_close(result[0].to(dtype=torch.float16), - result2[0].to(dtype=torch.float16), + torch.testing.assert_close(result[0].to(dtype=dtype), + result2[0].to(dtype=dtype), atol=atol, rtol=rtol) diff --git a/tests/kernels/quantization/test_silu_mul_nvfp4_quant.py b/tests/kernels/quantization/test_silu_mul_nvfp4_quant.py new file mode 100644 index 000000000000..a40d0c4ef122 --- /dev/null +++ b/tests/kernels/quantization/test_silu_mul_nvfp4_quant.py @@ -0,0 +1,75 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project +import pytest +import torch + +from tests.kernels.quantization.nvfp4_utils import (FLOAT4_E2M1_MAX, + FLOAT8_E4M3_MAX, + dequantize_nvfp4_to_dtype) +from vllm._custom_ops import scaled_fp4_quant +from vllm.model_executor.layers.activation import SiluAndMul +from vllm.platforms import current_platform + +if not current_platform.has_device_capability(100): + pytest.skip(reason="Nvfp4 Requires compute capability of 10 or above.", + allow_module_level=True) + +FP4_DTYPE = torch.uint8 +FP8_DTYPE = current_platform.fp8_dtype() + +DTYPES = [torch.float16, torch.bfloat16] +SHAPES = [(128, 256), (128, 128), (256, 256), (256, 128)] +BLOCK_SIZE = 16 + + +@pytest.mark.parametrize("dtype", DTYPES) +@pytest.mark.parametrize("shape", SHAPES) +@torch.inference_mode() +def test_silu_mul_nvfp4_quant( + dtype: torch.dtype, + shape: tuple[int, int], +) -> None: + current_platform.seed_everything(42) + device = 'cuda:0' + torch.set_default_device(device) + + x = torch.randn(shape, dtype=dtype) + + # ref op + ref_output = SiluAndMul().forward_native(x) + ref_global_scale = ((FLOAT8_E4M3_MAX * FLOAT4_E2M1_MAX) / + torch.abs(ref_output).max().to(torch.float32)) + ref_output_quant, ref_block_scale = scaled_fp4_quant( + ref_output, ref_global_scale) + + # fused op + fused_output_quant = torch.empty_like(ref_output_quant) + fused_block_scale = torch.empty_like(ref_block_scale) + torch.ops._C.silu_and_mul_nvfp4_quant(fused_output_quant, + fused_block_scale, x, + ref_global_scale) + + # check dtype + assert ref_output_quant.dtype == FP4_DTYPE + assert fused_output_quant.dtype == FP4_DTYPE + assert ref_output_quant.shape == fused_output_quant.shape + + assert ref_block_scale.dtype == FP8_DTYPE + assert fused_block_scale.dtype == FP8_DTYPE + assert ref_block_scale.shape == fused_block_scale.shape + + # check dequantized output + ref_output_dequant = dequantize_nvfp4_to_dtype(ref_output_quant, + ref_block_scale, + ref_global_scale, dtype, + device) + fused_output_dequant = dequantize_nvfp4_to_dtype(fused_output_quant, + fused_block_scale, + ref_global_scale, dtype, + device) + + atol, rtol = 3e-1, 3e-1 + torch.testing.assert_close(ref_output_dequant, + fused_output_dequant, + atol=atol, + rtol=rtol) diff --git a/tests/kernels/quantization/test_silu_nvfp4_quant_fusion.py b/tests/kernels/quantization/test_silu_nvfp4_quant_fusion.py deleted file mode 100644 index 969f14cc3fe6..000000000000 --- a/tests/kernels/quantization/test_silu_nvfp4_quant_fusion.py +++ /dev/null @@ -1,126 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project -import pytest -import torch - -from tests.kernels.utils import opcheck -from vllm.model_executor.layers.activation import SiluAndMul -from vllm.platforms import current_platform -from vllm.scalar_type import scalar_types - -if not current_platform.has_device_capability(100): - pytest.skip(reason="Nvfp4 Requires compute capability of 10 or above.", - allow_module_level=True) - -DTYPES = [torch.float16, torch.bfloat16] -SHAPES = [(128, 64), (128, 128), (256, 64), (256, 128)] -SEEDS = [42] -CUDA_DEVICES = ['cuda:0'] - -FLOAT4_E2M1_MAX = scalar_types.float4_e2m1f.max() -FLOAT8_E4M3_MAX = torch.finfo(torch.float8_e4m3fn).max - -BLOCK_SIZE = 16 - - -def ref_impl(silu_and_mul: SiluAndMul, x: torch.Tensor, - global_scale: torch.Tensor, - ref_output_scale: torch.Tensor) -> torch.Tensor: - silu_and_mul_out = silu_and_mul.forward_native(x) - assert not current_platform.is_rocm() - assert silu_and_mul_out.ndim >= 1, ( - f'input.ndim needs to be >= 1, but got {silu_and_mul_out.ndim}.') - other_dims = 1 if silu_and_mul_out.ndim == 1 else -1 - silu_and_mul_out = silu_and_mul_out.reshape(other_dims, - silu_and_mul_out.shape[-1]) - m, n = silu_and_mul_out.shape - device = silu_and_mul_out.device - - # Two fp4 values will be packed into an uint8. - out = torch.empty((m, n // 2), device=device, dtype=torch.uint8) - - output_scale = ref_output_scale - - torch.ops._C.scaled_fp4_quant(out, silu_and_mul_out, output_scale, - global_scale) - - return out, output_scale - - -def ops_impl(x: torch.Tensor, global_scale: torch.Tensor, - ref_output_scale: torch.Tensor) -> torch.Tensor: - out_shape = (x.shape[0], x.shape[1] // 4) - output_scale = ref_output_scale - out = torch.empty(out_shape, dtype=torch.uint8, device=x.device) - torch.ops._C.silu_and_mul_nvfp4_quant(out, output_scale, x, global_scale) - return out, output_scale - - -@pytest.mark.parametrize("dtype", DTYPES) -@pytest.mark.parametrize("shape", SHAPES) -@pytest.mark.parametrize("seed", SEEDS) -@pytest.mark.parametrize("device", CUDA_DEVICES) -@torch.inference_mode() -def test_quantize_to_fp4( - dtype: torch.dtype, - shape: tuple[int, int], - seed: int, - device: str, -) -> None: - current_platform.seed_everything(seed) - torch.set_default_device(device) - - m, n = shape - - x = torch.randn((m, n), dtype=dtype) - tensor_amax = torch.abs(x).max().to(torch.float32) - global_scale = FLOAT8_E4M3_MAX * FLOAT4_E2M1_MAX / tensor_amax - - block_size = 16 - - assert n % block_size == 0, ( - f'last dim has to be multiple of 16, but got {n}.') - assert x.dtype in (torch.float16, torch.bfloat16), ( - f'input.dtype needs to be fp16 or bf16 but got {x.dtype}.') - - round_up = lambda x, y: (x + y - 1) // y * y - rounded_m = round_up(x.shape[0], 128) - scale_n = x.shape[1] // (2 * block_size) - rounded_n = round_up(scale_n, 4) - output_scale = torch.empty((rounded_m, rounded_n // 4), - device=x.device, - dtype=torch.int32) - - layer = SiluAndMul() - - ref_out, ref_out_scale = ref_impl(layer, x, global_scale, output_scale) - - fusion_out, fusion_out_scale = ops_impl(x, global_scale, output_scale) - - assert ref_out.dtype == torch.uint8 - assert fusion_out.dtype == torch.uint8 - assert ref_out.shape == fusion_out.shape - - assert ref_out_scale.dtype == torch.int32 - assert fusion_out_scale.dtype == torch.int32 - assert ref_out_scale.shape == fusion_out_scale.shape - - # Allow up to 2% of mismatched values since BF16 has accuracy issues. - mis_threshold = 0.02 - atol = 0.4 - rtol = 0.4 - ref_logits = ref_out[-1] - fusion_logits = fusion_out[-1] - - mis_count = torch.sum( - torch.abs(fusion_logits - ref_logits) > (atol + - rtol * torch.abs(ref_logits))) - mis_ratio = mis_count / fusion_logits.numel() - - assert mis_ratio < mis_threshold, \ - f"Mismatch ratio {mis_ratio} exceeds threshold {mis_threshold}" - - torch.testing.assert_close(ref_out_scale, fusion_out_scale) - - opcheck(torch.ops._C.silu_and_mul_nvfp4_quant, - (fusion_out, fusion_out_scale, x, global_scale)) From 5963b98b465007e3cfb0d39447e4459a8afa96dc Mon Sep 17 00:00:00 2001 From: bnellnm <49004751+bnellnm@users.noreply.github.com> Date: Wed, 17 Sep 2025 19:43:31 -0400 Subject: [PATCH 070/518] [Kernel] Delegate construction of FusedMoEQuantConfig to FusedMoEMethodBase subclasses (#22537) Signed-off-by: Bill Nell --- .../kernels/benchmark_cutlass_fp4_moe.py | 58 +- .../kernels/benchmark_grouped_gemm_cutlass.py | 43 +- benchmarks/kernels/benchmark_moe.py | 73 +- .../moe/modular_kernel_tools/common.py | 110 +-- .../make_feature_matrix.py | 5 +- .../moe/modular_kernel_tools/mk_objects.py | 163 ++-- tests/kernels/moe/test_batched_deepgemm.py | 20 +- tests/kernels/moe/test_batched_moe.py | 4 +- tests/kernels/moe/test_block_fp8.py | 81 +- tests/kernels/moe/test_block_int8.py | 47 +- tests/kernels/moe/test_cutlass_moe.py | 53 +- tests/kernels/moe/test_deepep_deepgemm_moe.py | 87 +- tests/kernels/moe/test_deepep_moe.py | 70 +- tests/kernels/moe/test_deepgemm.py | 34 +- tests/kernels/moe/test_flashinfer.py | 32 +- tests/kernels/moe/test_flashinfer_moe.py | 68 +- .../moe/test_gpt_oss_triton_kernels.py | 30 +- .../moe/test_modular_kernel_combinations.py | 40 +- tests/kernels/moe/test_moe.py | 34 +- tests/kernels/moe/test_nvfp4_moe.py | 19 +- tests/kernels/moe/test_pplx_cutlass_moe.py | 21 +- tests/kernels/moe/test_pplx_moe.py | 62 +- tests/kernels/moe/test_triton_moe_ptpc_fp8.py | 15 +- tests/kernels/moe/utils.py | 149 +++- .../kernels/quantization/test_int8_kernel.py | 35 +- .../layers/fused_moe/__init__.py | 8 +- .../layers/fused_moe/batched_deep_gemm_moe.py | 40 +- .../batched_triton_or_deep_gemm_moe.py | 58 +- .../model_executor/layers/fused_moe/config.py | 682 +++++++++++----- .../layers/fused_moe/cutlass_moe.py | 165 ++-- .../layers/fused_moe/deep_gemm_moe.py | 65 +- .../fused_moe/deepep_ht_prepare_finalize.py | 13 +- .../fused_moe/deepep_ll_prepare_finalize.py | 45 +- .../fused_moe/flashinfer_cutlass_moe.py | 50 +- .../flashinfer_cutlass_prepare_finalize.py | 7 +- .../layers/fused_moe/flashinfer_trtllm_moe.py | 185 +++++ .../layers/fused_moe/fused_batched_moe.py | 119 +-- .../layers/fused_moe/fused_moe.py | 746 +++++------------- .../fused_moe/gpt_oss_triton_kernels_moe.py | 95 +-- vllm/model_executor/layers/fused_moe/layer.py | 137 ++-- .../layers/fused_moe/modular_kernel.py | 149 ++-- .../layers/fused_moe/pplx_prepare_finalize.py | 10 +- .../layers/fused_moe/prepare_finalize.py | 4 +- .../layers/fused_moe/rocm_aiter_fused_moe.py | 52 +- .../layers/fused_moe/triton_deep_gemm_moe.py | 48 +- .../layers/fused_moe/trtllm_moe.py | 25 +- vllm/model_executor/layers/fused_moe/utils.py | 4 + .../layers/quantization/awq_marlin.py | 8 +- .../layers/quantization/bitsandbytes.py | 8 +- .../compressed_tensors_moe.py | 395 +++++----- .../layers/quantization/experts_int8.py | 16 +- .../model_executor/layers/quantization/fp8.py | 159 ++-- .../layers/quantization/gguf.py | 7 +- .../layers/quantization/gptq_marlin.py | 8 +- .../layers/quantization/ipex_quant.py | 5 + .../layers/quantization/modelopt.py | 237 +++--- .../layers/quantization/moe_wna16.py | 33 +- .../layers/quantization/mxfp4.py | 37 +- .../layers/quantization/quark/quark_moe.py | 46 +- .../model_executor/layers/quantization/rtn.py | 34 +- .../quantization/utils/flashinfer_fp4_moe.py | 20 +- .../quantization/utils/flashinfer_utils.py | 38 +- .../layers/quantization/utils/fp8_utils.py | 1 + vllm/model_executor/models/bert_with_rope.py | 10 +- vllm/model_executor/models/deepseek.py | 22 +- vllm/model_executor/models/minicpm.py | 21 +- vllm/model_executor/models/qwen3_moe.py | 2 +- .../model_executor/warmup/deep_gemm_warmup.py | 11 +- 68 files changed, 2675 insertions(+), 2503 deletions(-) create mode 100644 vllm/model_executor/layers/fused_moe/flashinfer_trtllm_moe.py diff --git a/benchmarks/kernels/benchmark_cutlass_fp4_moe.py b/benchmarks/kernels/benchmark_cutlass_fp4_moe.py index 35c20ee41b9a..726a2a371d10 100644 --- a/benchmarks/kernels/benchmark_cutlass_fp4_moe.py +++ b/benchmarks/kernels/benchmark_cutlass_fp4_moe.py @@ -13,6 +13,10 @@ from vllm import _custom_ops as ops from vllm.config import ParallelConfig, VllmConfig, set_current_vllm_config +from vllm.model_executor.layers.fused_moe.config import ( + fp8_w8a8_moe_quant_config, + nvfp4_moe_quant_config, +) from vllm.model_executor.layers.fused_moe.cutlass_moe import cutlass_moe_fp4 from vllm.model_executor.layers.fused_moe.fused_moe import fused_experts, fused_topk from vllm.scalar_type import scalar_types @@ -140,6 +144,12 @@ def run_triton_moe( a_fp8_scale: torch.Tensor, num_repeats: int, ): + quant_config = fp8_w8a8_moe_quant_config( + w1_scale=w1_scale, + w2_scale=w2_scale, + a1_scale=a_fp8_scale, + ) + for _ in range(num_repeats): fused_experts( a, @@ -147,10 +157,7 @@ def run_triton_moe( w2, topk_weights, topk_ids, - use_fp8_w8a8=True, - w1_scale=w1_scale, - w2_scale=w2_scale, - a1_scale=a_fp8_scale, + quant_config=quant_config, ) def run_cutlass_moe_fp4( @@ -172,25 +179,27 @@ def run_cutlass_moe_fp4( device: torch.device, num_repeats: int, ): + quant_config = nvfp4_moe_quant_config( + a1_gscale=a1_gs, + a2_gscale=a2_gs, + w1_scale=w1_blockscale, + w2_scale=w2_blockscale, + g1_alphas=w1_gs, + g2_alphas=w2_gs, + ) for _ in range(num_repeats): with nvtx.annotate("cutlass_moe_fp4", color="green"): cutlass_moe_fp4( a=a, - a1_gscale=a1_gs, - a2_gscale=a2_gs, w1_fp4=w1_fp4, - w1_blockscale=w1_blockscale, - w1_alphas=w1_gs, w2_fp4=w2_fp4, - w2_blockscale=w2_blockscale, - w2_alphas=w2_gs, topk_weights=topk_weights, topk_ids=topk_ids, m=m, n=n, k=k, e=num_experts, - device=device, + quant_config=quant_config, ) def run_cutlass_from_graph( @@ -211,26 +220,29 @@ def run_cutlass_from_graph( e: int, device: torch.device, ): + quant_config = nvfp4_moe_quant_config( + a1_gscale=a1_gs, + a2_gscale=a2_gs, + w1_scale=w1_blockscale, + w2_scale=w2_blockscale, + g1_alphas=w1_gs, + g2_alphas=w2_gs, + ) + with set_current_vllm_config( VllmConfig(parallel_config=ParallelConfig(pipeline_parallel_size=1)) ): return cutlass_moe_fp4( a=a, - a1_gscale=a1_gs, w1_fp4=w1_fp4, - w1_blockscale=w1_blockscale, - w1_alphas=w1_alphas, - a2_gscale=a2_gs, w2_fp4=w2_fp4, - w2_blockscale=w2_blockscale, - w2_alphas=w2_alphas, topk_weights=topk_weights, topk_ids=topk_ids, m=m, n=n, k=k, e=num_experts, - device=device, + quant_config=quant_config, ) def run_triton_from_graph( @@ -246,16 +258,18 @@ def run_triton_from_graph( with set_current_vllm_config( VllmConfig(parallel_config=ParallelConfig(pipeline_parallel_size=1)) ): + quant_config = fp8_w8a8_moe_quant_config( + w1_scale=w1_scale, + w2_scale=w2_scale, + a1_scale=a_fp8_scale, + ) return fused_experts( a, w1, w2, topk_weights, topk_ids, - use_fp8_w8a8=True, - w1_scale=w1_scale, - w2_scale=w2_scale, - a1_scale=a_fp8_scale, + quant_config=quant_config, ) def replay_graph(graph, num_repeats): diff --git a/benchmarks/kernels/benchmark_grouped_gemm_cutlass.py b/benchmarks/kernels/benchmark_grouped_gemm_cutlass.py index a6b42406b5cb..14330ae6f03c 100644 --- a/benchmarks/kernels/benchmark_grouped_gemm_cutlass.py +++ b/benchmarks/kernels/benchmark_grouped_gemm_cutlass.py @@ -7,6 +7,7 @@ from vllm import _custom_ops as ops from vllm.config import ParallelConfig, VllmConfig, set_current_vllm_config +from vllm.model_executor.layers.fused_moe.config import fp8_w8a8_moe_quant_config from vllm.model_executor.layers.fused_moe.cutlass_moe import cutlass_moe_fp8 from vllm.model_executor.layers.fused_moe.fused_moe import ( fused_experts, @@ -96,6 +97,11 @@ def run_triton_moe( a_scale: torch.Tensor, num_repeats: int, ): + quant_config = fp8_w8a8_moe_quant_config( + w1_scale=w1_scale, + w2_scale=w2_scale, + a1_scale=a_scale, + ) for _ in range(num_repeats): fused_experts( a, @@ -103,10 +109,7 @@ def run_triton_moe( w2, topk_weights, topk_ids, - use_fp8_w8a8=True, - w1_scale=w1_scale, - w2_scale=w2_scale, - a1_scale=a_scale, + quant_config=quant_config, ) def run_cutlass_moe( @@ -125,6 +128,12 @@ def run_cutlass_moe( per_act_token: bool, num_repeats: int, ): + quant_config = fp8_w8a8_moe_quant_config( + w1_scale=w1_scale, + w2_scale=w2_scale, + per_act_token_quant=per_act_token, + ) + for _ in range(num_repeats): cutlass_moe_fp8( a, @@ -132,14 +141,11 @@ def run_cutlass_moe( w2, topk_weights, topk_ids, - w1_scale, - w2_scale, ab_strides1, ab_strides2, c_strides1, c_strides2, - per_act_token, - a1_scale=None, + quant_config=quant_config, ) def run_cutlass_from_graph( @@ -156,6 +162,12 @@ def run_cutlass_from_graph( topk_weights: torch.Tensor, topk_ids: torch.Tensor, ): + quant_config = fp8_w8a8_moe_quant_config( + w1_scale=w1_scale, + w2_scale=w2_scale, + per_act_token_quant=per_act_token, + ) + with set_current_vllm_config( VllmConfig(parallel_config=ParallelConfig(pipeline_parallel_size=1)) ): @@ -165,14 +177,11 @@ def run_cutlass_from_graph( w2_q, topk_weights, topk_ids, - w1_scale, - w2_scale, ab_strides1, ab_strides2, c_strides1, c_strides2, - per_act_token, - a1_scale=None, + quant_config=quant_config, ) def run_triton_from_graph( @@ -185,6 +194,11 @@ def run_triton_from_graph( w2_scale: torch.Tensor, a_scale: torch.Tensor, ): + quant_config = fp8_w8a8_moe_quant_config( + w1_scale=w1_scale, + w2_scale=w2_scale, + a1_scale=a_scale, + ) with set_current_vllm_config( VllmConfig(parallel_config=ParallelConfig(pipeline_parallel_size=1)) ): @@ -194,10 +208,7 @@ def run_triton_from_graph( w2, topk_weights, topk_ids, - use_fp8_w8a8=True, - w1_scale=w1_scale, - w2_scale=w2_scale, - a1_scale=a_scale, + quant_config=quant_config, ) def replay_graph(graph, num_repeats): diff --git a/benchmarks/kernels/benchmark_moe.py b/benchmarks/kernels/benchmark_moe.py index 837b2b0c1044..d2beb28f7023 100644 --- a/benchmarks/kernels/benchmark_moe.py +++ b/benchmarks/kernels/benchmark_moe.py @@ -14,6 +14,10 @@ import torch from ray.experimental.tqdm_ray import tqdm +from vllm.model_executor.layers.fused_moe.config import ( + FusedMoEQuantConfig, + _get_config_dtype_str, +) from vllm.model_executor.layers.fused_moe.fused_moe import * from vllm.platforms import current_platform from vllm.transformers_utils.config import get_config @@ -134,43 +138,36 @@ def prepare(i: int): def run(): from vllm.model_executor.layers.fused_moe import override_config + if use_fp8_w8a8: + quant_dtype = torch.float8_e4m3fn + elif use_int8_w8a16: + quant_dtype = torch.int8 + else: + quant_dtype = None + + quant_config = FusedMoEQuantConfig.make( + quant_dtype=quant_dtype, + w1_scale=w1_scale, + w2_scale=w2_scale, + a1_scale=a1_scale, + a2_scale=a2_scale, + block_shape=block_quant_shape, + ) + with override_config(config): - if use_deep_gemm: - topk_weights, topk_ids, token_expert_indices = fused_topk( - x, input_gating, topk, False - ) - return fused_experts( - x, - w1, - w2, - topk_weights, - topk_ids, - inplace=True, - use_fp8_w8a8=use_fp8_w8a8, - w1_scale=w1_scale, - w2_scale=w2_scale, - a1_scale=a1_scale, - a2_scale=a2_scale, - block_shape=block_quant_shape, - allow_deep_gemm=True, - ) - else: - fused_moe( - x, - w1, - w2, - input_gating, - topk, - renormalize=True, - inplace=True, - use_fp8_w8a8=use_fp8_w8a8, - use_int8_w8a16=use_int8_w8a16, - w1_scale=w1_scale, - w2_scale=w2_scale, - a1_scale=a1_scale, - a2_scale=a2_scale, - block_shape=block_quant_shape, - ) + topk_weights, topk_ids, token_expert_indices = fused_topk( + x, input_gating, topk, renormalize=not use_deep_gemm + ) + return fused_experts( + x, + w1, + w2, + topk_weights, + topk_ids, + inplace=True, + quant_config=quant_config, + allow_deep_gemm=use_deep_gemm, + ) # JIT compilation & warmup run() @@ -414,7 +411,7 @@ def benchmark( use_deep_gemm: bool = False, ) -> tuple[dict[str, int], float]: current_platform.seed_everything(self.seed) - dtype_str = get_config_dtype_str( + dtype_str = _get_config_dtype_str( dtype, use_int8_w8a16=use_int8_w8a16, use_fp8_w8a8=use_fp8_w8a8 ) # NOTE(woosuk): The current naming convention uses w2.shape[2], which @@ -547,7 +544,7 @@ def save_configs( block_quant_shape: list[int], save_dir: str, ) -> None: - dtype_str = get_config_dtype_str( + dtype_str = _get_config_dtype_str( dtype, use_int8_w8a16=use_int8_w8a16, use_fp8_w8a8=use_fp8_w8a8 ) diff --git a/tests/kernels/moe/modular_kernel_tools/common.py b/tests/kernels/moe/modular_kernel_tools/common.py index a10666b6ec9a..b5fcc4cd70bf 100644 --- a/tests/kernels/moe/modular_kernel_tools/common.py +++ b/tests/kernels/moe/modular_kernel_tools/common.py @@ -20,7 +20,7 @@ from vllm.model_executor.layers.fused_moe.fused_moe import fused_topk from vllm.utils import has_deep_ep, has_deep_gemm, has_pplx -from .mk_objects import (expert_info, make_fused_experts, +from .mk_objects import (TestMoEQuantConfig, expert_info, make_fused_experts, make_prepare_finalize, prepare_finalize_info) from .parallel_utils import ProcessGroupInfo @@ -40,7 +40,7 @@ class Config: E: int topks: Union[list[int], int] dtype: torch.dtype - quant_config: Optional[FusedMoEQuantConfig] + quant_config: Optional[TestMoEQuantConfig] prepare_finalize_type: mk.FusedMoEPrepareAndFinalize fused_experts_type: mk.FusedMoEPermuteExpertsUnpermute @@ -52,7 +52,7 @@ class Config: def __post_init__(self): if self.quant_config is None: - self.quant_config = FusedMoEQuantConfig() + self.quant_config = TestMoEQuantConfig(None, False, False, None) def describe(self) -> str: s = "" @@ -275,21 +275,19 @@ def is_quantized(self) -> bool: or self.w1.dtype == torch.uint8 or self.w1.dtype == torch.int8) def to_current_device(self): - self.w1 = self.w1.to(device=torch.cuda.current_device()) - self.w2 = self.w2.to(device=torch.cuda.current_device()) + device = torch.cuda.current_device() + self.w1 = self.w1.to(device=device) + self.w2 = self.w2.to(device=device) - if self.is_quantized(): - assert self.w1_scale is not None - assert self.w2_scale is not None - self.w1_scale = self.w1_scale.to( - device=torch.cuda.current_device()) - self.w2_scale = self.w2_scale.to( - device=torch.cuda.current_device()) + if self.w1_scale is not None: + self.w1_scale = self.w1_scale.to(device=device) + if self.w2_scale is not None: + self.w2_scale = self.w2_scale.to(device=device) if self.w1_gs is not None: - assert self.w2_gs is not None - self.w1_gs = self.w1_gs.to(device=torch.cuda.current_device()) - self.w2_gs = self.w2_gs.to(device=torch.cuda.current_device()) + self.w1_gs = self.w1_gs.to(device=device) + if self.w2_gs is not None: + self.w2_gs = self.w2_gs.to(device=device) def slice_weights(self, rank: int, num_local_experts: int) -> "WeightTensors": @@ -297,20 +295,12 @@ def slice_weights(self, rank: int, e = s + num_local_experts w1 = self.w1[s:e, :, :] w2 = self.w2[s:e, :, :] - - w1_scale, w2_scale = (None, None) - if self.is_quantized(): - assert self.w1_scale is not None - assert self.w2_scale is not None - w1_scale = self.w1_scale[s:e, :, :] - w2_scale = self.w2_scale[s:e, :, :] - - w1_gs = self.w1_gs - w2_gs = self.w2_gs - if w1_gs is not None: - assert w2_gs is not None - w1_gs = w1_gs[s:e] - w2_gs = w2_gs[s:e] + w1_scale = self.w1_scale[ + s:e, :, :] if self.w1_scale is not None else None + w2_scale = self.w2_scale[ + s:e, :, :] if self.w2_scale is not None else None + w1_gs = self.w1_gs[s:e] if self.w1_gs is not None else None + w2_gs = self.w2_gs[s:e] if self.w2_gs is not None else None return WeightTensors(w1, w2, w1_scale, w2_scale, w1_gs, w2_gs) @@ -323,7 +313,8 @@ def make(config: Config) -> "WeightTensors": in_dtype=config.dtype, quant_dtype=config.quant_dtype, block_shape=config.quant_block_shape, - per_act_token_quant=config.is_per_out_ch_quant, + per_out_ch_quant=config. + is_per_act_token_quant, # or config.is_per_out_ch_quant ) return WeightTensors(w1=w1, w2=w2, @@ -342,8 +333,6 @@ class RankTensors: topk_ids: torch.Tensor expert_map: Optional[torch.Tensor] - quant_config: Optional[FusedMoEQuantConfig] - def describe(self): s = "" s += "== Rank Tensors: \n" @@ -426,7 +415,6 @@ def make(config: Config, pgi: ProcessGroupInfo): topk_weights=topk_weights, topk_ids=topk_ids, expert_map=expert_map, - quant_config=config.quant_config, ) @@ -522,10 +510,16 @@ def reference_moe_impl(config: Config, weights: WeightTensors, and config.supports_apply_weight_on_input()) +def _make_gscale(num_experts: int) -> torch.Tensor: + return torch.ones((num_experts, ), + device=torch.cuda.current_device(), + dtype=torch.float32) + + def make_modular_kernel( config: Config, vllm_config: VllmConfig, - weights: WeightTensors, + quant_config: FusedMoEQuantConfig, ) -> mk.FusedMoEModularKernel: def next_power_of_2(x): @@ -548,20 +542,20 @@ def next_power_of_2(x): num_local_experts=config.num_local_experts, moe_parallel_config=moe_parallel_config, in_dtype=config.dtype, - quant_config=config.quant_config, max_num_tokens=next_power_of_2(config.M), ) # make modular kernel prepare_finalize = make_prepare_finalize(config.prepare_finalize_type, - config.all2all_backend(), moe) + config.all2all_backend(), moe, + quant_config) fused_experts = make_fused_experts( config.fused_experts_type, moe, + quant_config, prepare_finalize.num_dispatchers(), - weights.w1_gs, - weights.w2_gs, + config.N, ) modular_kernel = mk.FusedMoEModularKernel( @@ -583,12 +577,38 @@ def run_modular_kernel( # weights for rank rank_weights = weights.slice_weights(pgi.rank, config.num_local_experts) - mk = make_modular_kernel(config, vllm_config, weights) + if config.quant_dtype == "nvfp4": + gscale = _make_gscale(config.num_local_experts) + else: + gscale = None + + quant_config = FusedMoEQuantConfig.make( + config.quant_dtype, + w1_scale=rank_weights.w1_scale, + w2_scale=rank_weights.w2_scale, + a1_scale=rank_tensors.hidden_states_scale, + g1_alphas=(1 / rank_weights.w1_gs) + if rank_weights.w1_gs is not None else None, + g2_alphas=(1 / rank_weights.w2_gs) + if rank_weights.w2_gs is not None else None, + a1_gscale=gscale, + a2_gscale=gscale, + block_shape=config.quant_block_shape, + per_act_token_quant=config.is_per_act_token_quant, + per_out_ch_quant=config.is_per_out_ch_quant, + ) + + mk = make_modular_kernel(config, vllm_config, quant_config) + + # impls might update the tensor in place + hidden_states = rank_tensors.hidden_states.clone() + + topk_ids = rank_tensors.topk_ids.to( + mk.prepare_finalize.topk_indices_dtype()) mk_kwargs = { "hidden_states": - rank_tensors.hidden_states.clone( - ), # impls might update the tensor in place + hidden_states, "w1": rank_weights.w1, "w2": @@ -596,15 +616,9 @@ def run_modular_kernel( "topk_weights": rank_tensors.topk_weights, "topk_ids": - rank_tensors.topk_ids.to(mk.prepare_finalize.topk_indices_dtype()), + topk_ids, "expert_map": rank_tensors.expert_map, - "w1_scale": - rank_weights.w1_scale, - "w2_scale": - rank_weights.w2_scale, - "a1_scale": - rank_tensors.hidden_states_scale, "global_num_experts": config.E, "apply_router_weight_on_input": diff --git a/tests/kernels/moe/modular_kernel_tools/make_feature_matrix.py b/tests/kernels/moe/modular_kernel_tools/make_feature_matrix.py index 5dbfdfc153f9..c1037b60bf38 100644 --- a/tests/kernels/moe/modular_kernel_tools/make_feature_matrix.py +++ b/tests/kernels/moe/modular_kernel_tools/make_feature_matrix.py @@ -10,7 +10,8 @@ from tqdm import tqdm from vllm.config import VllmConfig, set_current_vllm_config -from vllm.model_executor.layers.fused_moe.config import FusedMoEQuantConfig +from vllm.model_executor.layers.fused_moe.config import ( + FUSED_MOE_UNQUANTIZED_CONFIG) from vllm.platforms import current_platform from .common import (Config, RankTensors, WeightTensors, reference_moe_impl, @@ -86,7 +87,7 @@ def add_to_results(config: Config, quant_config_dict = config_dict['quant_config'] del config_dict['quant_config'] if quant_config_dict is None: - quant_config = FusedMoEQuantConfig(None) + quant_config = FUSED_MOE_UNQUANTIZED_CONFIG quant_config_dict = asdict(quant_config) config_dict |= quant_config_dict diff --git a/tests/kernels/moe/modular_kernel_tools/mk_objects.py b/tests/kernels/moe/modular_kernel_tools/mk_objects.py index aecffae36ae5..7947391d0348 100644 --- a/tests/kernels/moe/modular_kernel_tools/mk_objects.py +++ b/tests/kernels/moe/modular_kernel_tools/mk_objects.py @@ -32,6 +32,14 @@ from vllm.utils.flashinfer import has_flashinfer_cutlass_fused_moe +@dataclass +class TestMoEQuantConfig: + quant_dtype: Union[torch.dtype, str, None] + per_out_ch_quant: bool + per_act_token_quant: bool + block_shape: Optional[list[int]] + + @dataclass class PrepareFinalizeInfo: activation_format: mk.FusedMoEActivationFormat @@ -66,7 +74,7 @@ class ExpertInfo: torch.float8_e4m3fn, torch.bfloat16, torch.float16, torch.float32 ] common_float_and_int_types = common_float_types + [torch.int8] -nv_fp4_types = ["nvfp4"] +nvfp4_types = ["nvfp4"] fp8_types = [torch.float8_e4m3fn] @@ -219,7 +227,7 @@ def expert_info(kind) -> ExpertInfo: register_prepare_and_finalize( FlashInferCutlassMoEPrepareAndFinalize, standard_format, - nv_fp4_types, + nvfp4_types, blocked_quantization_support=True, backend=None, force_multigpu=True, @@ -229,7 +237,7 @@ def expert_info(kind) -> ExpertInfo: register_experts( FlashInferExperts, standard_format, - nv_fp4_types, + nvfp4_types, blocked_quantization_support=True, supports_chunking=True, # Note: this is a hack to get it to run for now @@ -306,39 +314,39 @@ def expert_info(kind) -> ExpertInfo: register_experts( CutlassExpertsFp4, standard_format, - nv_fp4_types, + nvfp4_types, blocked_quantization_support=True, supports_chunking=True, supports_expert_map=False, ) -MK_QUANT_CONFIGS = [ +MK_QUANT_CONFIGS: list[Optional[TestMoEQuantConfig]] = [ None, # per-channel / per-column weights and per-tensor activations - FusedMoEQuantConfig(quant_dtype=torch.float8_e4m3fn, - per_out_ch_quant=True, - per_act_token_quant=False, - block_shape=None), + TestMoEQuantConfig(quant_dtype=torch.float8_e4m3fn, + per_out_ch_quant=True, + per_act_token_quant=False, + block_shape=None), # per-channel / per-column weights and per-token activations - FusedMoEQuantConfig(quant_dtype=torch.float8_e4m3fn, - per_out_ch_quant=True, - per_act_token_quant=True, - block_shape=None), + TestMoEQuantConfig(quant_dtype=torch.float8_e4m3fn, + per_out_ch_quant=True, + per_act_token_quant=True, + block_shape=None), # per-tensor weights and per-tensor activations - FusedMoEQuantConfig(quant_dtype=torch.float8_e4m3fn, - per_out_ch_quant=False, - per_act_token_quant=False, - block_shape=None), + TestMoEQuantConfig(quant_dtype=torch.float8_e4m3fn, + per_out_ch_quant=False, + per_act_token_quant=False, + block_shape=None), # per-tensor weights and per-token activations - FusedMoEQuantConfig(quant_dtype=torch.float8_e4m3fn, - per_out_ch_quant=False, - per_act_token_quant=True, - block_shape=None), + TestMoEQuantConfig(quant_dtype=torch.float8_e4m3fn, + per_out_ch_quant=False, + per_act_token_quant=True, + block_shape=None), # block-quantized weights and 128 block per-token activations - FusedMoEQuantConfig(quant_dtype=torch.float8_e4m3fn, - per_out_ch_quant=False, - per_act_token_quant=False, - block_shape=[128, 128]), + TestMoEQuantConfig(quant_dtype=torch.float8_e4m3fn, + per_out_ch_quant=False, + per_act_token_quant=False, + block_shape=[128, 128]), # TODO (varun) : Should we test the following combinations ? # block-quantized weights and per-token activations # block-quantized weights and per-tensor activations @@ -346,33 +354,27 @@ def expert_info(kind) -> ExpertInfo: if cutlass_fp4_supported() or has_flashinfer_cutlass_fused_moe(): MK_QUANT_CONFIGS += [ - FusedMoEQuantConfig(quant_dtype="nvfp4", - per_out_ch_quant=False, - per_act_token_quant=False, - block_shape=None), + TestMoEQuantConfig(quant_dtype="nvfp4", + per_out_ch_quant=False, + per_act_token_quant=False, + block_shape=None), ] -def _make_gscale(num_experts: int) -> torch.Tensor: - return torch.ones((num_experts, ), - device=torch.cuda.current_device(), - dtype=torch.float32) - - def make_prepare_finalize( prepare_finalize_type: mk.FusedMoEPrepareAndFinalize, backend: Optional[str], moe: FusedMoEConfig, + quant_config: FusedMoEQuantConfig, ) -> mk.FusedMoEPrepareAndFinalize: if backend != "naive" and backend is not None: - prepare_finalize = FusedMoEMethodBase._maybe_make_prepare_finalize(moe) + prepare_finalize = FusedMoEMethodBase._maybe_make_prepare_finalize( + moe, quant_config) assert prepare_finalize is not None return prepare_finalize elif prepare_finalize_type == FlashInferCutlassMoEPrepareAndFinalize: return FlashInferCutlassMoEPrepareAndFinalize( - use_dp=moe.moe_parallel_config.dp_size > 1, - a1_gscale=_make_gscale(moe.num_local_experts), - ) + use_dp=moe.moe_parallel_config.dp_size > 1) else: return MoEPrepareAndFinalizeNoEP() @@ -383,34 +385,39 @@ def _slice(rank: int, num_local_experts: int, t: torch.Tensor) -> torch.Tensor: return t[s:e] +def make_cutlass_strides( + e: int, + n: int, + k: int, +) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: + ab_strides1 = torch.full((e, ), k, device="cuda", dtype=torch.int64) + ab_strides2 = torch.full((e, ), n, device="cuda", dtype=torch.int64) + c_strides1 = torch.full((e, ), 2 * n, device="cuda", dtype=torch.int64) + c_strides2 = torch.full((e, ), k, device="cuda", dtype=torch.int64) + return ab_strides1, ab_strides2, c_strides1, c_strides2 + + def make_fused_experts( fused_experts_type: mk.FusedMoEPermuteExpertsUnpermute, moe: FusedMoEConfig, + quant_config: FusedMoEQuantConfig, num_dispatchers: int, - w1_gs: Optional[torch.Tensor], - w2_gs: Optional[torch.Tensor], + N: int, ) -> mk.FusedMoEPermuteExpertsUnpermute: - use_fp8 = moe.quant_dtype == torch.float8_e4m3fn batch_kwargs = { "max_num_tokens": moe.max_num_tokens, "num_dispatchers": num_dispatchers, } quant_kwargs = { - "use_fp8_w8a8": use_fp8, - "use_int8_w8a8": False, - "use_int8_w8a16": False, - "use_int4_w4a16": False, - "block_shape": moe.block_shape, - "per_act_token_quant": moe.per_act_token_quant, + "quant_config": quant_config, } deepgemm_kwargs = {"allow_deep_gemm": has_deep_gemm()} + torch.set_printoptions(threshold=0, edgeitems=0, linewidth=10000) + if fused_experts_type == BatchedDeepGemmExperts: - kwargs = batch_kwargs | { - "block_shape": moe.block_shape, - "per_act_token_quant": moe.per_act_token_quant, - } + kwargs = batch_kwargs | quant_kwargs print(f"Making BatchedDeepGemmExperts {kwargs} ...") experts = BatchedDeepGemmExperts(**kwargs) elif fused_experts_type == BatchedTritonExperts: @@ -422,8 +429,8 @@ def make_fused_experts( print(f"Making BatchedTritonOrDeepGemmExperts {kwargs} ...") experts = BatchedTritonOrDeepGemmExperts(**kwargs) elif fused_experts_type == DeepGemmExperts: - print("Making DeepGemmExperts () ...") - experts = DeepGemmExperts() + print("Making DeepGemmExperts {quant_config} ...") + experts = DeepGemmExperts(quant_config) elif fused_experts_type == TritonExperts: kwargs = quant_kwargs print(f"Making TritonExperts {kwargs} ...") @@ -437,62 +444,50 @@ def make_fused_experts( print(f"Making NaiveBatchedExperts {kwargs} ...") experts = NaiveBatchedExperts(**kwargs) elif fused_experts_type == CutlassExpertsFp8: + strides = make_cutlass_strides(moe.num_experts, N, moe.hidden_dim) kwargs = { "out_dtype": moe.in_dtype, - "per_act_token_quant": moe.per_act_token_quant, - "per_out_ch_quant": moe.per_out_ch_quant, - "block_shape": moe.block_shape, - } + "ab_strides1": strides[0], + "ab_strides2": strides[1], + "c_strides1": strides[2], + "c_strides2": strides[3], + } | quant_kwargs print(f"Making CutlassExpertsFp8 {kwargs} ...") experts = CutlassExpertsFp8(**kwargs) elif fused_experts_type == CutlassBatchedExpertsFp8: + strides = make_cutlass_strides(moe.num_experts, N, moe.hidden_dim) kwargs = { "max_experts_per_worker": moe.num_local_experts, "num_dispatchers": num_dispatchers, "out_dtype": moe.in_dtype, - "per_act_token_quant": moe.per_act_token_quant, - "per_out_ch_quant": moe.per_out_ch_quant, - "block_shape": moe.block_shape, - } + "ab_strides1": strides[0], + "ab_strides2": strides[1], + "c_strides1": strides[2], + "c_strides2": strides[3], + } | quant_kwargs print(f"Making CutlassBatchedExpertsFp8 {kwargs} ...") experts = CutlassBatchedExpertsFp8(**kwargs) elif fused_experts_type == CutlassExpertsFp4: - assert w1_gs is not None and w2_gs is not None - num_experts = moe.num_local_experts - rank = moe.moe_parallel_config.dp_rank kwargs = { - "g1_alphas": _slice(rank, num_experts, (1 / w1_gs)), - "g2_alphas": _slice(rank, num_experts, (1 / w2_gs)), - "a1_gscale": _make_gscale(num_experts), - "a2_gscale": _make_gscale(num_experts), - "max_experts_per_worker": num_experts, - "out_dtype": moe.in_dtype, - "per_act_token_quant": moe.per_act_token_quant, - "per_out_ch_quant": moe.per_out_ch_quant, - "block_shape": moe.block_shape, + "max_experts_per_worker": moe.num_local_experts, "num_dispatchers": num_dispatchers, - } + "out_dtype": moe.in_dtype, + } | quant_kwargs print(f"Making CutlassExpertsFp4 {kwargs} ...") experts = CutlassExpertsFp4(**kwargs) elif fused_experts_type == FlashInferExperts: - assert w1_gs is not None and w2_gs is not None - num_experts = moe.num_local_experts - rank = moe.moe_parallel_config.dp_rank kwargs = { - "g1_alphas": _slice(rank, num_experts, (1 / w1_gs)), - "g2_alphas": _slice(rank, num_experts, (1 / w2_gs)), - "a1_gscale": _make_gscale(num_experts), - "a2_gscale": _make_gscale(num_experts), "out_dtype": moe.in_dtype, - "quant_dtype": "nvfp4", "ep_rank": moe.ep_rank, "ep_size": moe.ep_size, "tp_rank": moe.tp_rank, "tp_size": moe.tp_size, - } + } | quant_kwargs print(f"Making FlashInferExperts {kwargs} ...") experts = FlashInferExperts(**kwargs) else: raise RuntimeError(f"Unknown fused experts type: {fused_experts_type}") + torch.set_printoptions(threshold=1000, edgeitems=5, linewidth=80) + return experts diff --git a/tests/kernels/moe/test_batched_deepgemm.py b/tests/kernels/moe/test_batched_deepgemm.py index 018d4c224f75..afec97e8cffd 100644 --- a/tests/kernels/moe/test_batched_deepgemm.py +++ b/tests/kernels/moe/test_batched_deepgemm.py @@ -6,6 +6,8 @@ from vllm.model_executor.layers.fused_moe.batched_deep_gemm_moe import ( BatchedDeepGemmExperts) +from vllm.model_executor.layers.fused_moe.config import ( + fp8_w8a8_moe_quant_config) from vllm.model_executor.layers.fused_moe.fused_batched_moe import ( BatchedPrepareAndFinalize, BatchedTritonExperts) from vllm.model_executor.layers.fused_moe.modular_kernel import ( @@ -56,13 +58,18 @@ def test_batched_deepgemm_vs_triton(E: int, T: int, K: int, N: int, topk: int, rank=0, ) + quant_config = fp8_w8a8_moe_quant_config( + w1_scale=w1_s, + w2_scale=w2_s, + per_act_token_quant=False, + block_shape=BLOCK_SIZE, + ) + # triton (reference) triton_experts = BatchedTritonExperts( max_num_tokens=max_num_tokens, num_dispatchers=1, - use_fp8_w8a8=True, - per_act_token_quant=False, - block_shape=BLOCK_SIZE, + quant_config=quant_config, ) mk_triton = FusedMoEModularKernel(prep_finalize, triton_experts) @@ -73,8 +80,6 @@ def test_batched_deepgemm_vs_triton(E: int, T: int, K: int, N: int, topk: int, topk_weights=topk_weights, topk_ids=topk_ids, inplace=False, - w1_scale=w1_s, - w2_scale=w2_s, global_num_experts=E, ) @@ -82,8 +87,7 @@ def test_batched_deepgemm_vs_triton(E: int, T: int, K: int, N: int, topk: int, deepgemm_experts = BatchedDeepGemmExperts( max_num_tokens=max_num_tokens, num_dispatchers=1, - block_shape=BLOCK_SIZE, - per_act_token_quant=False, + quant_config=quant_config, ) mk_deepgemm = FusedMoEModularKernel(prep_finalize, deepgemm_experts) @@ -94,8 +98,6 @@ def test_batched_deepgemm_vs_triton(E: int, T: int, K: int, N: int, topk: int, topk_weights=topk_weights, topk_ids=topk_ids, inplace=False, - w1_scale=w1_s, - w2_scale=w2_s, global_num_experts=E, ) diff --git a/tests/kernels/moe/test_batched_moe.py b/tests/kernels/moe/test_batched_moe.py index 00b2d780e66f..7e79828937c7 100644 --- a/tests/kernels/moe/test_batched_moe.py +++ b/tests/kernels/moe/test_batched_moe.py @@ -140,7 +140,7 @@ def test_batched_mm(num_experts: int, max_tokens_per_expert: int, K: int, in_dtype=act_dtype, quant_dtype=quant_dtype, block_shape=block_shape, - per_act_token_quant=per_act_token_quant, + per_out_ch_quant=per_act_token_quant, ) out_shape = (num_experts, max_tokens_per_expert, N) @@ -250,7 +250,7 @@ def test_fused_moe_batched_experts( block_shape=block_shape, in_dtype=act_dtype, quant_dtype=quant_dtype, - per_act_token_quant=per_act_token_quant, + per_out_ch_quant=per_act_token_quant, ) if input_scales and quant_dtype is not None: diff --git a/tests/kernels/moe/test_block_fp8.py b/tests/kernels/moe/test_block_fp8.py index ecc57acc6796..da383e18c372 100644 --- a/tests/kernels/moe/test_block_fp8.py +++ b/tests/kernels/moe/test_block_fp8.py @@ -4,7 +4,7 @@ import pytest import torch -from tests.kernels.moe.utils import make_test_weights +from tests.kernels.moe.utils import make_test_quant_config, make_test_weights from tests.kernels.quant_utils import (native_per_token_group_quant_fp8, native_w8a8_block_matmul) from vllm.config import VllmConfig, set_current_vllm_config @@ -161,22 +161,17 @@ def test_w8a8_block_fp8_fused_moe(M, N, K, E, topk, block_size, dtype, seed, a = torch.randn((M, K), dtype=dtype) / 10 score = torch.randn((M, E), dtype=dtype) - (_, w1, w1_s, _), (_, w2, w2_s, - _) = make_test_weights(E, - N, - K, - dtype, - torch.float8_e4m3fn, - per_act_token_quant=False, - block_shape=block_size) - - m_fused_moe = modular_triton_fused_moe(use_fp8_w8a8=True, - use_int8_w8a8=False, - use_int8_w8a16=False, - use_int4_w4a16=False, - use_mxfp4_w4a4=False, - per_act_token_quant=False, - block_shape=block_size) + w1, w2, quant_config = make_test_quant_config( + E, + N, + K, + dtype, + quant_dtype=torch.float8_e4m3fn, + per_act_token_quant=False, + block_shape=block_size, + ) + + m_fused_moe = modular_triton_fused_moe(quant_config) topk_weights, topk_ids, _ = fused_topk(a, score.float(), topk, False) @@ -186,37 +181,24 @@ def test_w8a8_block_fp8_fused_moe(M, N, K, E, topk, block_size, dtype, seed, a, w1, w2, - w1_s, - w2_s, + quant_config.w1_scale, + quant_config.w2_scale, topk_weights, topk_ids, block_size, ) - out = fused_experts( - a, - w1, - w2, - topk_weights, - topk_ids, - use_fp8_w8a8=True, - w1_scale=w1_s, - w2_scale=w2_s, - block_shape=block_size, - ) + out = fused_experts(a, + w1, + w2, + topk_weights, + topk_ids, + quant_config=quant_config) - m_out = m_fused_moe( - a, - w1, - w2, - topk_weights, - topk_ids, - w1_scale=w1_s, - w2_scale=w2_s, - ) + m_out = m_fused_moe(a, w1, w2, topk_weights, topk_ids) - # 0.039 only needed for [40000-4608-7168-2-1-block_size852-dtype852-0] - tol = 0.035 if M < 40000 else 0.039 + # 0.039 only needed for M >= 8192 + tol = 0.035 if M < 8192 else 0.039 torch.testing.assert_close(out, ref_out, atol=tol, rtol=tol) torch.testing.assert_close(m_out, ref_out, atol=tol, rtol=tol) @@ -248,14 +230,15 @@ def test_w8a8_block_fp8_deep_gemm_fused_moe(M, N, K, E, topk, seed, a = torch.randn((M, K), dtype=dtype) / 10 score = torch.randn((M, E), dtype=dtype) - (_, w1, w1_s, _), (_, w2, w2_s, - _) = make_test_weights(E, - N, - K, - dtype, - torch.float8_e4m3fn, - per_act_token_quant=False, - block_shape=block_size) + (_, w1, w1_s, _), (_, w2, w2_s, _) = make_test_weights( + E, + N, + K, + dtype, + torch.float8_e4m3fn, + per_out_ch_quant=False, + block_shape=block_size, + ) # Note: for now use_compile will error out if the problem size is # large enough to trigger chunking. I'm leaving the flag and diff --git a/tests/kernels/moe/test_block_int8.py b/tests/kernels/moe/test_block_int8.py index 5e4a93963f8e..041a13ca5585 100644 --- a/tests/kernels/moe/test_block_int8.py +++ b/tests/kernels/moe/test_block_int8.py @@ -4,12 +4,12 @@ import pytest import torch -from tests.kernels.moe.utils import make_test_weights +from tests.kernels.moe.utils import make_test_quant_config from tests.kernels.quant_utils import (native_per_token_group_quant_int8, native_w8a8_block_matmul) from vllm.config import VllmConfig, set_current_vllm_config from vllm.model_executor.layers.activation import SiluAndMul -from vllm.model_executor.layers.fused_moe import fused_moe +from vllm.model_executor.layers.fused_moe import fused_experts, fused_topk from vllm.platforms import current_platform if current_platform.get_device_capability() < (7, 0): @@ -50,7 +50,7 @@ (2048, 128, 128), (2048, 1024, 7168), (2048, 4096, 512), - (2048, 4096, 7168), + (2048, 4096, 4096), ] E = [8, 24] @@ -117,31 +117,28 @@ def test_w8a8_block_int8_fused_moe(M, N, K, E, topk, block_size, dtype, seed): a = torch.randn((M, K), dtype=dtype) / 10 score = torch.randn((M, E), dtype=dtype) - - (_, w1, w1_s, _), (_, w2, w2_s, - _) = make_test_weights(E, - N, - K, - dtype, - torch.int8, - per_act_token_quant=False, - block_shape=block_size) + topk_weights, topk_ids, _ = fused_topk(a, score.float(), topk, False) + + w1, w2, quant_config = make_test_quant_config( + E, + N, + K, + dtype, + quant_dtype=torch.int8, + per_act_token_quant=False, + block_shape=block_size, + ) # Set the context to avoid lots of warning spam. with set_current_vllm_config(vllm_config): - out = fused_moe( - a, - w1, - w2, - score, - topk, - renormalize=False, - use_int8_w8a8=True, - w1_scale=w1_s, - w2_scale=w2_s, - block_shape=block_size, - ) - ref_out = torch_w8a8_block_int8_moe(a, w1, w2, w1_s, w2_s, score, topk, + out = fused_experts(a, + w1, + w2, + topk_weights, + topk_ids, + quant_config=quant_config) + ref_out = torch_w8a8_block_int8_moe(a, w1, w2, quant_config.w1_scale, + quant_config.w2_scale, score, topk, block_size) # Check results diff --git a/tests/kernels/moe/test_cutlass_moe.py b/tests/kernels/moe/test_cutlass_moe.py index c84f66383b90..ca6be767dab3 100644 --- a/tests/kernels/moe/test_cutlass_moe.py +++ b/tests/kernels/moe/test_cutlass_moe.py @@ -1,5 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project +import copy import dataclasses from math import prod from typing import Optional @@ -9,6 +10,8 @@ from vllm import _custom_ops as ops from vllm.config import ParallelConfig, VllmConfig, set_current_vllm_config +from vllm.model_executor.layers.fused_moe.config import ( + FUSED_MOE_UNQUANTIZED_CONFIG, fp8_w8a8_moe_quant_config) from vllm.model_executor.layers.fused_moe.cutlass_moe import ( cutlass_moe_fp8, run_cutlass_moe_fp8) from vllm.model_executor.layers.fused_moe.fused_moe import (fused_experts, @@ -154,7 +157,7 @@ def run_with_expert_maps(num_experts: int, num_local_experts: int, def slice_experts(): slice_params = [ "w1_q", "w2_q", "ab_strides1", "ab_strides2", "c_strides1", - "c_strides2", "w1_scale", "w2_scale" + "c_strides2" ] full_tensors = { k: v @@ -162,6 +165,8 @@ def slice_experts(): if k in slice_params and k in cutlass_moe_kwargs } + quant_config = cutlass_moe_kwargs["quant_config"] + for i in range(0, num_experts, num_local_experts): s, e = i, i + num_local_experts @@ -178,6 +183,12 @@ def slice_experts(): for k, t in full_tensors.items(): cutlass_moe_kwargs[k] = t[s:e] + new_quant_config = copy.deepcopy(quant_config) + new_quant_config._w1.scale = quant_config.w1_scale[s:e] + new_quant_config._w2.scale = quant_config.w2_scale[s:e] + + cutlass_moe_kwargs["quant_config"] = new_quant_config + yield cutlass_moe_kwargs out_tensor = torch.zeros_like(cutlass_moe_kwargs["a"]) @@ -191,6 +202,7 @@ def run_8_bit(moe_tensors: MOETensors8Bit, topk_weights: torch.Tensor, topk_ids: torch.Tensor, per_act_token: bool, + per_out_ch: bool, num_local_experts: Optional[int] = None) -> torch.Tensor: assert not any([ t is None for t in [ @@ -199,20 +211,27 @@ def run_8_bit(moe_tensors: MOETensors8Bit, ] ]) + quant_config = fp8_w8a8_moe_quant_config( + w1_scale=moe_tensors.w1_scale, + w2_scale=moe_tensors.w2_scale, + per_act_token_quant=per_act_token, + per_out_ch_quant=per_out_ch, + # Set to moe_tensors.a_scale iff static scales + per tensor. + # This is not currently being tested. + a1_scale=None, + ) + kwargs = { 'a': moe_tensors.a, 'w1_q': moe_tensors.w1_q, # type: ignore[union-attr] 'w2_q': moe_tensors.w2_q, # type: ignore[union-attr] 'topk_weights': topk_weights, 'topk_ids': topk_ids, - 'w1_scale': moe_tensors.w1_scale, - 'w2_scale': moe_tensors.w2_scale, 'ab_strides1': moe_tensors.ab_strides1, 'ab_strides2': moe_tensors.ab_strides2, 'c_strides1': moe_tensors.c_strides1, 'c_strides2': moe_tensors.c_strides2, - 'per_act_token': per_act_token, - 'a1_scale': None #moe_tensors.a_scale + 'quant_config': quant_config, } num_experts = moe_tensors.w1.size(0) @@ -261,16 +280,23 @@ def test_cutlass_moe_8_bit_no_graph( # Note that we are using the dequantized versions of the tensors. # Using a, w1 and w2 directly results in minor output differences. - triton_output = fused_experts(mt.a_d, mt.w1_d, mt.w2_d, topk_weights, - topk_ids) + + quant_config = FUSED_MOE_UNQUANTIZED_CONFIG + triton_output = fused_experts(mt.a_d, + mt.w1_d, + mt.w2_d, + topk_weights, + topk_ids, + quant_config=quant_config) if ep_size is not None: assert e % ep_size == 0, "Cannot distribute experts evenly" number_local_experts = e // ep_size else: number_local_experts = None + cutlass_output = run_8_bit(mt, topk_weights, topk_ids, per_act_token, - number_local_experts) + per_out_ch, number_local_experts) # Note 5.5 only needed for larger problem sizes, 5 works ok for # the rest. @@ -315,14 +341,19 @@ def test_cutlass_moe_8_bit_cuda_graph( # Note that we are using the dequantized versions of the tensors. # Using a, w1 and w2 directly results in minor output differences. - triton_output = fused_experts(mt.a_d, mt.w1_d, mt.w2_d, topk_weights, - topk_ids) + quant_config = FUSED_MOE_UNQUANTIZED_CONFIG + triton_output = fused_experts(mt.a_d, + mt.w1_d, + mt.w2_d, + topk_weights, + topk_ids, + quant_config=quant_config) stream = torch.cuda.Stream() graph = torch.cuda.CUDAGraph() with torch.cuda.graph(graph, stream=stream): cutlass_output = run_8_bit(mt, topk_weights, topk_ids, - per_act_token) + per_act_token, per_out_ch) torch.cuda.synchronize() graph.replay() diff --git a/tests/kernels/moe/test_deepep_deepgemm_moe.py b/tests/kernels/moe/test_deepep_deepgemm_moe.py index 6558cab6a9ef..ced5457d4f53 100644 --- a/tests/kernels/moe/test_deepep_deepgemm_moe.py +++ b/tests/kernels/moe/test_deepep_deepgemm_moe.py @@ -15,6 +15,8 @@ from typing_extensions import ParamSpec from vllm.config import VllmConfig, set_current_vllm_config +from vllm.model_executor.layers.fused_moe.config import ( + FusedMoEQuantConfig, fp8_w8a8_moe_quant_config) from vllm.model_executor.layers.fused_moe.fused_moe import fused_experts from vllm.model_executor.layers.fused_moe.modular_kernel import ( FusedMoEModularKernel) @@ -71,9 +73,12 @@ def make_block_quant_fp8_weights( Return weights w1q, w2q, w1_scale, w2_scale """ (_, w1q, w1_scale, _), (_, w2q, w2_scale, - _) = make_test_weights(e, n, k, torch.bfloat16, + _) = make_test_weights(e, + n, + k, + torch.bfloat16, torch.float8_e4m3fn, - block_size) + block_shape=block_size) return w1q, w2q, w1_scale, w2_scale @@ -130,10 +135,11 @@ def make(config: TestConfig, rank) -> "TestTensors": config=config) -def make_ll_modular_kernel(pg: ProcessGroup, pgi: ProcessGroupInfo, - max_tokens_per_rank: int, dp_size: int, - hidden_size: int, q_dtype: Optional[torch.dtype], - test_config: TestConfig) -> FusedMoEModularKernel: +def make_ll_modular_kernel( + pg: ProcessGroup, pgi: ProcessGroupInfo, max_tokens_per_rank: int, + dp_size: int, hidden_size: int, q_dtype: Optional[torch.dtype], + test_config: TestConfig, + quant_config: FusedMoEQuantConfig) -> FusedMoEModularKernel: assert test_config.low_latency assert test_config.use_fp8_dispatch is not None @@ -154,17 +160,18 @@ def make_ll_modular_kernel(pg: ProcessGroup, pgi: ProcessGroupInfo, fused_experts = BatchedDeepGemmExperts( max_num_tokens=max_tokens_per_rank, num_dispatchers=pgi.world_size // dp_size, - block_shape=test_config.block_size, - per_act_token_quant=test_config.per_act_token_quant) + quant_config=quant_config, + ) mk = FusedMoEModularKernel(prepare_finalize=a2a, fused_experts=fused_experts) return mk -def make_ht_modular_kernel(pg: ProcessGroup, pgi: ProcessGroupInfo, - dp_size: int, num_local_experts: int, - q_dtype: Optional[torch.dtype], - test_config: TestConfig) -> FusedMoEModularKernel: +def make_ht_modular_kernel( + pg: ProcessGroup, pgi: ProcessGroupInfo, dp_size: int, + num_local_experts: int, q_dtype: Optional[torch.dtype], + test_config: TestConfig, + quant_config: FusedMoEQuantConfig) -> FusedMoEModularKernel: assert not test_config.low_latency assert test_config.use_fp8_dispatch is None @@ -178,15 +185,16 @@ def make_ht_modular_kernel(pg: ProcessGroup, pgi: ProcessGroupInfo, q_dtype=q_dtype, block_shape=test_config.block_size) - fused_experts = DeepGemmExperts() + fused_experts = DeepGemmExperts(quant_config) mk = FusedMoEModularKernel(prepare_finalize=a2a, fused_experts=fused_experts) return mk -def make_modular_kernel(pg: ProcessGroup, pgi: ProcessGroupInfo, dp_size: int, - num_local_experts: int, - test_tensors: TestTensors) -> FusedMoEModularKernel: +def make_modular_kernel( + pg: ProcessGroup, pgi: ProcessGroupInfo, dp_size: int, + num_local_experts: int, test_tensors: TestTensors, + quant_config: FusedMoEQuantConfig) -> FusedMoEModularKernel: q_dtype = torch.float8_e4m3fn test_config = test_tensors.config @@ -204,10 +212,16 @@ def make_modular_kernel(pg: ProcessGroup, pgi: ProcessGroupInfo, dp_size: int, dp_size=dp_size, hidden_size=hidden_size, q_dtype=q_dtype, - test_config=test_config) + test_config=test_config, + quant_config=quant_config) else: - mk = make_ht_modular_kernel(pg, pgi, dp_size, num_local_experts, - q_dtype, test_config) + mk = make_ht_modular_kernel(pg, + pgi, + dp_size, + num_local_experts, + q_dtype, + test_config, + quant_config=quant_config) return mk @@ -233,17 +247,23 @@ def build_expert_map(): return expert_map.to(device=torch.cuda.current_device(), dtype=torch.int32) + quant_config = fp8_w8a8_moe_quant_config( + w1_scale=w1_scale, + w2_scale=w2_scale, + # Low-Latency kernels can't dispatch scales. + a1_scale=(None if test_config.low_latency else + test_tensors.rank_token_scales), + block_shape=test_config.block_size, + ) + # Make modular kernel mk: FusedMoEModularKernel = make_modular_kernel( pg=pg, pgi=pgi, dp_size=dp_size, num_local_experts=num_local_experts, - test_tensors=test_tensors) - - # Low-Latency kernels can't dispatch scales. - a1_scale = (None - if test_config.low_latency else test_tensors.rank_token_scales) + test_tensors=test_tensors, + quant_config=quant_config) out = mk.forward(hidden_states=test_tensors.rank_tokens, w1=w1, @@ -254,12 +274,6 @@ def build_expert_map(): activation="silu", global_num_experts=num_experts, expert_map=build_expert_map(), - w1_scale=w1_scale, - w2_scale=w2_scale, - w1_zp=None, - w2_zp=None, - a1_scale=a1_scale, - a2_scale=None, apply_router_weight_on_input=False) return out @@ -269,6 +283,13 @@ def triton_impl(a: torch.Tensor, topk_ids: torch.Tensor, w1_scale: torch.Tensor, w2_scale: torch.Tensor, a1_scale: torch.Tensor, block_shape: list[int]): + quant_config = fp8_w8a8_moe_quant_config( + w1_scale=w1_scale, + w2_scale=w2_scale, + a1_scale=a1_scale, + block_shape=block_shape, + ) + return fused_experts( hidden_states=a, w1=w1, @@ -276,11 +297,7 @@ def triton_impl(a: torch.Tensor, topk_ids: torch.Tensor, topk_weights=topk_weights, topk_ids=topk_ids, inplace=False, - use_fp8_w8a8=True, - w1_scale=w1_scale, - w2_scale=w2_scale, - a1_scale=a1_scale, - block_shape=block_shape, + quant_config=quant_config, # Make sure this is set to False so we # don't end up comparing the same implementation. allow_deep_gemm=False) diff --git a/tests/kernels/moe/test_deepep_moe.py b/tests/kernels/moe/test_deepep_moe.py index 6a53af68cd53..54d3a62b03fc 100644 --- a/tests/kernels/moe/test_deepep_moe.py +++ b/tests/kernels/moe/test_deepep_moe.py @@ -15,6 +15,7 @@ from vllm.config import VllmConfig, set_current_vllm_config from vllm.model_executor.layers.activation import SiluAndMul from vllm.model_executor.layers.fused_moe import TritonExperts +from vllm.model_executor.layers.fused_moe.config import FusedMoEQuantConfig from vllm.model_executor.layers.fused_moe.fused_batched_moe import ( BatchedTritonExperts) from vllm.model_executor.layers.fused_moe.modular_kernel import ( @@ -129,11 +130,9 @@ def make_modular_kernel( num_local_experts: int, q_dtype: Optional[torch.dtype], use_fp8_dispatch: bool, - per_act_token_quant: bool, + quant_config: FusedMoEQuantConfig, ) -> FusedMoEModularKernel: - is_quantized = q_dtype is not None - ht_args: Optional[DeepEPHTArgs] = None ll_args: Optional[DeepEPLLArgs] = None @@ -159,24 +158,14 @@ def make_modular_kernel( num_dispatchers = pgi.world_size // dp_size if low_latency_mode: - assert not per_act_token_quant, "not supported in ll mode" + assert not quant_config.per_act_token_quant, "not supported in ll mode" fused_experts = BatchedTritonExperts( max_num_tokens=MAX_TOKENS_PER_RANK, num_dispatchers=num_dispatchers, - use_fp8_w8a8=is_quantized, - use_int8_w8a8=False, - use_int8_w8a16=False, - use_int4_w4a16=False, - per_act_token_quant=False, + quant_config=quant_config, ) else: - fused_experts = TritonExperts( - use_fp8_w8a8=is_quantized, - use_int8_w8a8=False, - use_int8_w8a16=False, - use_int4_w4a16=False, - per_act_token_quant=per_act_token_quant, - ) + fused_experts = TritonExperts(quant_config=quant_config) mk = FusedMoEModularKernel(prepare_finalize=a2a, fused_experts=fused_experts) @@ -217,11 +206,6 @@ def build_expert_map(): if is_quantized: q_dtype = torch.float8_e4m3fn - # Make modular kernel - mk: FusedMoEModularKernel = make_modular_kernel( - pg, pgi, low_latency_mode, hidden_size, dp_size, num_experts, - num_local_experts, q_dtype, use_fp8_dispatch, per_act_token_quant) - out_hidden_states = torch.empty_like(test_tensors.rank_tokens) total_num_tokens = test_tensors.rank_tokens.size(0) @@ -236,6 +220,19 @@ def process_chunk(chunk_start, chunk_end, skip_result_store=False): rank_token_scales_chunk = rank_token_scales_chunk[ chunk_start:chunk_end] + quant_config = FusedMoEQuantConfig.make( + q_dtype, + w1_scale=w1_scale, + w2_scale=w2_scale, + per_act_token_quant=per_act_token_quant, + a1_scale=rank_token_scales_chunk, + ) + + # Make modular kernel + mk: FusedMoEModularKernel = make_modular_kernel( + pg, pgi, low_latency_mode, hidden_size, dp_size, num_experts, + num_local_experts, q_dtype, use_fp8_dispatch, quant_config) + out = mk.forward(hidden_states=rank_tokens_chunk, w1=w1, w2=w2, @@ -245,12 +242,6 @@ def process_chunk(chunk_start, chunk_end, skip_result_store=False): activation="silu", global_num_experts=num_experts, expert_map=build_expert_map(), - w1_scale=w1_scale, - w2_scale=w2_scale, - w1_zp=None, - w2_zp=None, - a1_scale=rank_token_scales_chunk, - a2_scale=None, apply_router_weight_on_input=False) if not skip_result_store: @@ -407,7 +398,7 @@ def _deep_ep_moe( @pytest.mark.parametrize("dtype", DTYPES) -@pytest.mark.parametrize("mnk", MNKs) +@pytest.mark.parametrize("m,n,k", MNKs) @pytest.mark.parametrize("num_experts", [32]) @pytest.mark.parametrize("topk", [6]) @pytest.mark.parametrize("world_dp_size", [(2, 1)]) @@ -416,7 +407,9 @@ def _deep_ep_moe( @requires_deep_ep def test_deep_ep_moe( dtype: torch.dtype, - mnk: tuple[int, int, int], + m: int, + n: int, + k: int, num_experts: int, topk: int, world_dp_size: tuple[int, int], @@ -424,7 +417,6 @@ def test_deep_ep_moe( ): low_latency_mode = False use_fp8_dispatch = False - m, n, k = mnk current_platform.seed_everything(7) world_size, dp_size = world_dp_size @@ -456,20 +448,24 @@ def test_deep_ep_moe( @pytest.mark.parametrize("dtype", DTYPES) -@pytest.mark.parametrize("mnk", MNKs) +@pytest.mark.parametrize("m,n,k", MNKs) @pytest.mark.parametrize("num_experts", [32]) @pytest.mark.parametrize("topk", [6]) @pytest.mark.parametrize("world_dp_size", [(2, 1)]) @pytest.mark.parametrize("use_fp8_dispatch", USE_FP8_DISPATCH) @multi_gpu_test(num_gpus=2) @requires_deep_ep -def test_low_latency_deep_ep_moe(dtype: torch.dtype, mnk: tuple[int, int, int], - num_experts: int, topk: int, - world_dp_size: tuple[int, int], - use_fp8_dispatch: bool): - +def test_low_latency_deep_ep_moe( + dtype: torch.dtype, + m: int, + n: int, + k: int, + num_experts: int, + topk: int, + world_dp_size: tuple[int, int], + use_fp8_dispatch: bool, +): low_latency_mode = True - m, n, k = mnk if (low_latency_mode and k not in DeepEPLLPrepareAndFinalize.SUPPORTED_HIDDEN_SIZES): diff --git a/tests/kernels/moe/test_deepgemm.py b/tests/kernels/moe/test_deepgemm.py index 4472f34a6291..d575b6d4ca62 100644 --- a/tests/kernels/moe/test_deepgemm.py +++ b/tests/kernels/moe/test_deepgemm.py @@ -11,6 +11,8 @@ import pytest import torch +from vllm.model_executor.layers.fused_moe.config import ( + fp8_w8a8_moe_quant_config) # vLLM fused-expert reference (Triton fallback + DeepGEMM option) from vllm.model_executor.layers.fused_moe.fused_moe import fused_experts from vllm.model_executor.layers.quantization.utils.fp8_utils import ( @@ -94,6 +96,13 @@ def run_single_case(m, n, k, topk, num_experts, block_size): topk_weights, topk_ids = torch.topk(router_logits, k=topk, dim=-1) topk_weights = torch.nn.functional.softmax(topk_weights, dim=-1) + quant_config = fp8_w8a8_moe_quant_config( + w1_scale=w1_s, + w2_scale=w2_s, + a1_scale=a1_scale, + block_shape=block_size, + ) + # triton reference out_triton = fused_experts( hidden_states=tokens_bf16, @@ -102,11 +111,7 @@ def run_single_case(m, n, k, topk, num_experts, block_size): topk_weights=topk_weights, topk_ids=topk_ids, inplace=False, - use_fp8_w8a8=True, - w1_scale=w1_s, - w2_scale=w2_s, - a1_scale=a1_scale, - block_shape=block_size, + quant_config=quant_config, allow_deep_gemm=False, ) @@ -118,19 +123,14 @@ def run_single_case(m, n, k, topk, num_experts, block_size): topk_weights=topk_weights, topk_ids=topk_ids, inplace=False, - use_fp8_w8a8=True, - w1_scale=w1_s, - w2_scale=w2_s, - a1_scale=a1_scale, - block_shape=block_size, + quant_config=quant_config, allow_deep_gemm=True, ) diff = calc_diff(out_deepgemm, out_triton) assert diff < 0.001, f"Diff exceeded 1%: {diff}" -# Note: W1 has shape (E, 2N, K), so N = 512 -# can trigger the deepgemm path. +# Note: N <= 512 will disable the deepgemm path due to performance issues. MNKs = [ (1024, 768, 128), (1024, 768, 512), @@ -144,15 +144,15 @@ def run_single_case(m, n, k, topk, num_experts, block_size): NUM_EXPERTS = [32] -@pytest.mark.parametrize("mnk", MNKs) +@pytest.mark.parametrize(("m", "n", "k"), MNKs) @pytest.mark.parametrize("topk", TOPKS) @pytest.mark.parametrize("num_experts", NUM_EXPERTS) @pytest.mark.skipif(not is_deep_gemm_supported(), reason="Requires deep_gemm kernels") -def test_deepgemm_vs_triton(mnk, topk, num_experts, monkeypatch): +def test_deepgemm_vs_triton(m, n, k, topk, num_experts, monkeypatch): - with monkeypatch.context() as m: - m.setenv("VLLM_USE_DEEP_GEMM", "1") + with monkeypatch.context() as mp: + mp.setenv("VLLM_USE_DEEP_GEMM", "1") _fused_moe_mod = importlib.import_module( "vllm.model_executor.layers.fused_moe.fused_moe") @@ -168,8 +168,6 @@ def _spy_deep_gemm_moe_fp8(*args, **kwargs): monkeypatch.setattr(_fused_moe_mod, "deep_gemm_moe_fp8", _spy_deep_gemm_moe_fp8) - m, n, k = mnk - if topk > num_experts: pytest.skip(f"topk={topk} > num_experts={num_experts}") diff --git a/tests/kernels/moe/test_flashinfer.py b/tests/kernels/moe/test_flashinfer.py index 52a3d2ca3b42..5564db3cda0e 100644 --- a/tests/kernels/moe/test_flashinfer.py +++ b/tests/kernels/moe/test_flashinfer.py @@ -6,6 +6,8 @@ import torch from vllm.config import ParallelConfig, VllmConfig, set_current_vllm_config +from vllm.model_executor.layers.fused_moe.config import ( + fp8_w8a8_moe_quant_config) from vllm.model_executor.layers.fused_moe.fused_moe import fused_experts from vllm.model_executor.layers.fused_moe.layer import FusedMoE from vllm.model_executor.layers.quantization.utils.flashinfer_utils import ( @@ -145,6 +147,14 @@ def test_flashinfer_per_tensor_moe_fp8_no_graph( custom_routing_function=Llama4MoE.custom_routing_function, scoring_func="softmax") + quant_config = fp8_w8a8_moe_quant_config( + w1_scale=td.w13_weight_scale, + w2_scale=td.w2_weight_scale, + a1_scale=td.a1_scale, + a2_scale=td.a2_scale, + per_act_token_quant=False, + ) + output = fused_experts( td.hidden_states, td.w13_quantized, @@ -153,15 +163,10 @@ def test_flashinfer_per_tensor_moe_fp8_no_graph( topk_ids=topk_ids, inplace=False, activation="silu", - use_fp8_w8a8=True, - per_channel_quant=False, global_num_experts=e, expert_map=None, - w1_scale=td.w13_weight_scale, - w2_scale=td.w2_weight_scale, - a1_scale=td.a1_scale, - a2_scale=td.a2_scale, apply_router_weight_on_input=True, + quant_config=quant_config, ) flashinfer_output = apply_flashinfer_per_tensor_scale_fp8( @@ -210,6 +215,14 @@ def test_flashinfer_cutlass_moe_fp8_no_graph( custom_routing_function=Llama4MoE.custom_routing_function, scoring_func="softmax") + quant_config = fp8_w8a8_moe_quant_config( + w1_scale=td.w13_weight_scale, + w2_scale=td.w2_weight_scale, + a1_scale=td.a1_scale, + a2_scale=td.a2_scale, + per_act_token_quant=False, + ) + output = fused_experts( td.hidden_states, td.w13_quantized, @@ -218,15 +231,10 @@ def test_flashinfer_cutlass_moe_fp8_no_graph( topk_ids=topk_ids, inplace=False, activation="silu", - use_fp8_w8a8=True, - per_channel_quant=False, global_num_experts=e, expert_map=None, - w1_scale=td.w13_weight_scale, - w2_scale=td.w2_weight_scale, - a1_scale=td.a1_scale, - a2_scale=td.a2_scale, apply_router_weight_on_input=True, + quant_config=quant_config, ) td.layer.dp_size = 1 diff --git a/tests/kernels/moe/test_flashinfer_moe.py b/tests/kernels/moe/test_flashinfer_moe.py index 1c14df2b914a..8bf096b798cb 100644 --- a/tests/kernels/moe/test_flashinfer_moe.py +++ b/tests/kernels/moe/test_flashinfer_moe.py @@ -3,7 +3,7 @@ import pytest import torch -from tests.kernels.moe.utils import make_test_weights +from tests.kernels.moe.utils import make_test_quant_config from tests.kernels.quantization.nvfp4_utils import (FLOAT4_E2M1_MAX, FLOAT8_E4M3_MAX, dequantize_nvfp4_to_dtype) @@ -41,7 +41,6 @@ @pytest.mark.parametrize("m,n,k", MNK_FACTORS) @pytest.mark.parametrize("e", [40, 64, 256]) -#@pytest.mark.parametrize("e", [128, 256]) @pytest.mark.parametrize("topk", [1, 6, 8]) @pytest.mark.parametrize("dtype", [torch.half, torch.bfloat16]) @torch.inference_mode() @@ -56,16 +55,15 @@ def test_flashinfer_fp4_moe_no_graph(m: int, n: int, k: int, e: int, topk: int, quant_blocksize = 16 - (_, w1_q, w1_blockscale, - w1_gs), (_, w2_q, w2_blockscale, w2_gs) = make_test_weights( - e, - n, - k, - in_dtype=dtype, - quant_dtype="nvfp4", - block_shape=None, # use quant_blocksize? - per_act_token_quant=False, - ) + w1_q, w2_q, quant_config = make_test_quant_config( + e, + n, + k, + in_dtype=dtype, + quant_dtype="nvfp4", + block_shape=None, + per_act_token_quant=False, + ) score = torch.randn((m, e), device="cuda", dtype=dtype) topk_weights, topk_ids, _ = fused_topk(a, @@ -73,35 +71,17 @@ def test_flashinfer_fp4_moe_no_graph(m: int, n: int, k: int, e: int, topk: int, topk, renormalize=False) - a1_gs = torch.ones((e, ), device="cuda", dtype=torch.float32) - a2_gs = torch.ones((e, ), device="cuda", dtype=torch.float32) - assert is_valid_flashinfer_cutlass_fused_moe(a, w1_q, w2_q) - assert w1_gs is not None - assert w2_gs is not None - assert w1_blockscale is not None - assert w2_blockscale is not None - flashinfer_experts = FusedMoEModularKernel( MoEPrepareAndFinalizeNoEP(), - FlashInferExperts( - a1_gscale=a1_gs, - g1_alphas=(1 / w1_gs), - a2_gscale=a2_gs, - g2_alphas=(1 / w2_gs), - out_dtype=dtype, - quant_dtype="nvfp4", - )) + FlashInferExperts(out_dtype=dtype, quant_config=quant_config), + ) flashinfer_output = flashinfer_experts( hidden_states=a, w1=w1_q, - w1_scale=w1_blockscale, w2=w2_q, - w2_scale=w2_blockscale, - a1_scale=a1_gs, - a2_scale=a2_gs, topk_weights=topk_weights, topk_ids=topk_ids, ) @@ -122,18 +102,18 @@ def test_flashinfer_fp4_moe_no_graph(m: int, n: int, k: int, e: int, topk: int, w2_d = torch.empty((e, k, n), device="cuda", dtype=dtype) for idx in range(0, e): - w1_d[idx] = dequantize_nvfp4_to_dtype(w1_q[idx], - w1_blockscale[idx], - w1_gs[idx], - dtype=dtype, - device=w1_q.device, - block_size=quant_blocksize) - w2_d[idx] = dequantize_nvfp4_to_dtype(w2_q[idx], - w2_blockscale[idx], - w2_gs[idx], - dtype=dtype, - device=w2_q.device, - block_size=quant_blocksize) + w1_d[idx] = dequantize_nvfp4_to_dtype( + w1_q[idx], + quant_config.w1_scale[idx], (1 / quant_config.g1_alphas[idx]), + dtype=dtype, + device=w1_q.device, + block_size=quant_blocksize) + w2_d[idx] = dequantize_nvfp4_to_dtype( + w2_q[idx], + quant_config.w2_scale[idx], (1 / quant_config.g2_alphas[idx]), + dtype=dtype, + device=w2_q.device, + block_size=quant_blocksize) torch_output = torch_moe(a_in_dtype, w1_d, w2_d, score, topk) diff --git a/tests/kernels/moe/test_gpt_oss_triton_kernels.py b/tests/kernels/moe/test_gpt_oss_triton_kernels.py index 54f2351bf6d9..024993c7677d 100644 --- a/tests/kernels/moe/test_gpt_oss_triton_kernels.py +++ b/tests/kernels/moe/test_gpt_oss_triton_kernels.py @@ -23,6 +23,7 @@ from triton_kernels.tensor_details import layout from triton_kernels.testing import assert_close +from vllm.model_executor.layers.fused_moe.config import FusedMoEQuantConfig from vllm.model_executor.layers.fused_moe.fused_batched_moe import ( BatchedPrepareAndFinalize) from vllm.model_executor.layers.fused_moe.fused_moe import fused_topk @@ -293,6 +294,13 @@ def test_equiv(num_token, a_dtype, w_dtype, tp): pc2, ) = init_compute_data(M, K, N, E, a_dtype, w_dtype, num_warps=8) + quant_config = FusedMoEQuantConfig.make( + w1_bias=w1_bias_tri, + w2_bias=w2_bias_tri, + w1_precision=pc1, + w2_precision=pc2, + ) + out_triton_monolithic = triton_kernel_moe_forward( hidden_states=x_tri, w1=w1_tri, @@ -300,10 +308,7 @@ def test_equiv(num_token, a_dtype, w_dtype, tp): gating_output=exp_data_tri, topk=topk, renormalize=True, - w1_bias=w1_bias_tri, - w2_bias=w2_bias_tri, - w1_precision=pc1, - w2_precision=pc2, + quant_config=quant_config, ) out_triton_monolithic = out_triton_monolithic[..., :K] @@ -336,6 +341,13 @@ def batched_moe( ) -> torch.Tensor: max_num_tokens = round_up(a.shape[0], 64) + quant_config = FusedMoEQuantConfig.make( + w1_precision=w1_precision, + w2_precision=w2_precision, + w1_bias=w1_bias, + w2_bias=w2_bias, + ) + fused_experts = FusedMoEModularKernel( BatchedPrepareAndFinalize( max_num_tokens, @@ -344,19 +356,12 @@ def batched_moe( rank=0, ), BatchedOAITritonExperts( - None, max_num_tokens=max_num_tokens, num_dispatchers=1, - w1_precision=w1_precision, - w2_precision=w2_precision, + quant_config=quant_config, ), ) - extra_expert_args = { - "w1_bias": w1_bias, - "w2_bias": w2_bias, - } - topk_weight, topk_ids, _ = fused_topk(a, gating_output, topk, renormalize) return fused_experts( @@ -365,7 +370,6 @@ def batched_moe( w2, topk_weight, topk_ids, - extra_expert_args=extra_expert_args, ) diff --git a/tests/kernels/moe/test_modular_kernel_combinations.py b/tests/kernels/moe/test_modular_kernel_combinations.py index 6112183be547..19c4301bd23d 100644 --- a/tests/kernels/moe/test_modular_kernel_combinations.py +++ b/tests/kernels/moe/test_modular_kernel_combinations.py @@ -12,7 +12,6 @@ import vllm.model_executor.layers.fused_moe.modular_kernel as mk from vllm.config import VllmConfig, current_platform, set_current_vllm_config -from vllm.model_executor.layers.fused_moe.config import FusedMoEQuantConfig from vllm.utils import has_deep_ep, has_deep_gemm, has_pplx from vllm.utils.flashinfer import has_flashinfer_cutlass_fused_moe @@ -22,7 +21,8 @@ run_modular_kernel) from .modular_kernel_tools.mk_objects import ( MK_FUSED_EXPERT_TYPES, MK_MULTI_GPU_PREPARE_FINALIZE_TYPES, - MK_QUANT_CONFIGS, MK_SINGLE_GPU_PREPARE_FINALIZE_TYPES, expert_info) + MK_QUANT_CONFIGS, MK_SINGLE_GPU_PREPARE_FINALIZE_TYPES, TestMoEQuantConfig, + expert_info) from .modular_kernel_tools.parallel_utils import (ProcessGroupInfo, parallel_launch_with_config) @@ -55,7 +55,7 @@ def rank_worker( pgi: ProcessGroupInfo, vllm_config: VllmConfig, cpu_group, - config: Config, + base_config: Config, weights: WeightTensors, verbose: bool, ): @@ -63,42 +63,44 @@ def rank_worker( # sanity check from vllm import envs - if config.fused_moe_chunk_size is not None: - assert (config.fused_moe_chunk_size == envs.VLLM_FUSED_MOE_CHUNK_SIZE) + if base_config.fused_moe_chunk_size is not None: + assert ( + base_config.fused_moe_chunk_size == envs.VLLM_FUSED_MOE_CHUNK_SIZE) # get weights to this device weights.to_current_device() - Ms = config.Ms + Ms = base_config.Ms assert isinstance(Ms, list) - TOPKs = config.topks + TOPKs = base_config.topks assert isinstance(TOPKs, list) exceptions = [] count = 0 for m, topk in product(Ms, TOPKs): + # override m and topk + config = copy.deepcopy(base_config) + config.Ms = m + config.topks = topk + try: print(f"Running[{pgi.rank}]: m={m}, topk={topk} ...") count = count + 1 - # override m and topk - cfgx = copy.deepcopy(config) - cfgx.Ms = m - cfgx.topks = topk # inputs for rank - rank_tensors = RankTensors.make(cfgx, pgi) + rank_tensors = RankTensors.make(config, pgi) # modular kernel out - mk_out = run_modular_kernel(pgi, vllm_config, cfgx, weights, + mk_out = run_modular_kernel(pgi, vllm_config, config, weights, rank_tensors) with set_current_vllm_config(vllm_config): - ref_out = reference_moe_impl(cfgx, weights, rank_tensors) + ref_out = reference_moe_impl(config, weights, rank_tensors) if config.quant_dtype == "nvfp4": - atol = 1e-1 - rtol = 1e-1 + atol = 1e-1 if config.K < 4096 else 2e-1 + rtol = 1e-1 if config.K < 4096 else 2e-1 else: atol = 3e-2 rtol = 3e-2 @@ -132,7 +134,7 @@ def run(config: Config, verbose: bool): # hidden sizes, making this too large will cause fp4 tests to fail. # Also needs to be a multiple of 1024 for deep_gemm. Ks = [2048] -Ns = [2048] +Ns = [1024] TOPKs = [4, 1] Es = [32] DTYPEs = [torch.bfloat16] @@ -167,7 +169,7 @@ def is_nyi_config(config: Config) -> bool: @meets_multi_gpu_requirements def test_modular_kernel_combinations_multigpu( k: int, n: int, e: int, dtype: torch.dtype, - quant_config: Optional[FusedMoEQuantConfig], + quant_config: Optional[TestMoEQuantConfig], combination: tuple[mk.FusedMoEPrepareAndFinalize, mk.FusedMoEPermuteExpertsUnpermute], fused_moe_chunk_size: Optional[int], world_size: int, pytestconfig): @@ -208,7 +210,7 @@ def test_modular_kernel_combinations_multigpu( @pytest.mark.parametrize("world_size", [1]) def test_modular_kernel_combinations_singlegpu( k: int, n: int, e: int, dtype: torch.dtype, - quant_config: Optional[FusedMoEQuantConfig], + quant_config: Optional[TestMoEQuantConfig], combination: tuple[mk.FusedMoEPrepareAndFinalize, mk.FusedMoEPermuteExpertsUnpermute], fused_moe_chunk_size: Optional[int], world_size: int, pytestconfig): diff --git a/tests/kernels/moe/test_moe.py b/tests/kernels/moe/test_moe.py index 850c486b9524..00835bec9a15 100644 --- a/tests/kernels/moe/test_moe.py +++ b/tests/kernels/moe/test_moe.py @@ -15,11 +15,14 @@ from transformers.models.mixtral.modeling_mixtral import MixtralSparseMoeBlock import vllm.model_executor.layers.fused_moe # noqa +from tests.kernels.moe.utils import fused_moe from tests.kernels.utils import opcheck, stack_and_dev, torch_moe from vllm.config import VllmConfig, set_current_vllm_config from vllm.distributed.parallel_state import init_distributed_environment from vllm.forward_context import set_forward_context -from vllm.model_executor.layers.fused_moe import fused_moe +from vllm.model_executor.layers.fused_moe.config import ( + FUSED_MOE_UNQUANTIZED_CONFIG, int4_w4a16_moe_quant_config, + int8_w8a16_moe_quant_config) from vllm.model_executor.layers.fused_moe.fused_moe import ( fused_topk, modular_triton_fused_moe) from vllm.model_executor.layers.fused_moe.moe_torch_iterative import ( @@ -187,14 +190,9 @@ def test_fused_moe( # # Setup test functions # + quant_config = FUSED_MOE_UNQUANTIZED_CONFIG - m_fused_moe_fn = modular_triton_fused_moe(use_fp8_w8a8=False, - use_int8_w8a8=False, - use_int8_w8a16=False, - use_int4_w4a16=False, - use_mxfp4_w4a4=False, - per_act_token_quant=False, - block_shape=None) + m_fused_moe_fn = modular_triton_fused_moe(quant_config) def m_fused_moe( a: torch.Tensor, @@ -340,6 +338,18 @@ def test_fused_moe_wn16(m: int, n: int, k: int, e: int, topk: int, else: e_map = None + if weight_bits == 4: + quant_config_builder = int4_w4a16_moe_quant_config + else: + assert weight_bits == 8 + quant_config_builder = int8_w8a16_moe_quant_config + + quant_config = quant_config_builder(w1_scale=w1_scales, + w2_scale=w2_scales, + w1_zp=w1_qzeros if has_zp else None, + w2_zp=w2_qzeros if has_zp else None, + block_shape=[0, group_size]) + with set_current_vllm_config(vllm_config): triton_output = fused_moe(a, w1_qweight, @@ -347,15 +357,9 @@ def test_fused_moe_wn16(m: int, n: int, k: int, e: int, topk: int, score, topk, renormalize=False, - use_int4_w4a16=weight_bits == 4, - use_int8_w8a16=weight_bits == 8, global_num_experts=e, expert_map=e_map, - w1_scale=w1_scales, - w2_scale=w2_scales, - w1_zp=w1_qzeros if has_zp else None, - w2_zp=w2_qzeros if has_zp else None, - block_shape=[0, group_size]) + quant_config=quant_config) torch_output = torch_moe(a, w1_ref, w2_ref, diff --git a/tests/kernels/moe/test_nvfp4_moe.py b/tests/kernels/moe/test_nvfp4_moe.py index 30388ef9375d..a48bfeb10b2e 100644 --- a/tests/kernels/moe/test_nvfp4_moe.py +++ b/tests/kernels/moe/test_nvfp4_moe.py @@ -10,6 +10,7 @@ from tests.kernels.utils import torch_moe from vllm import _custom_ops as ops from vllm.config import ParallelConfig, VllmConfig, set_current_vllm_config +from vllm.model_executor.layers.fused_moe.config import nvfp4_moe_quant_config from vllm.model_executor.layers.fused_moe.cutlass_moe import cutlass_moe_fp4 from vllm.model_executor.layers.fused_moe.fused_moe import fused_topk from vllm.platforms import current_platform @@ -56,7 +57,7 @@ def test_cutlass_fp4_moe_no_graph(m: int, n: int, k: int, e: int, topk: int, in_dtype=dtype, quant_dtype="nvfp4", block_shape=None, # use quant_blocksize? - per_act_token_quant=False, + per_out_ch_quant=False, ) score = torch.randn((m, e), device="cuda", dtype=dtype) @@ -73,18 +74,22 @@ def test_cutlass_fp4_moe_no_graph(m: int, n: int, k: int, e: int, topk: int, assert w1_blockscale is not None assert w2_blockscale is not None + quant_config = nvfp4_moe_quant_config( + g1_alphas=(1 / w1_gs), + g2_alphas=(1 / w2_gs), + a1_gscale=a1_gs, + a2_gscale=a2_gs, + w1_scale=w1_blockscale, + w2_scale=w2_blockscale, + ) + cutlass_output = cutlass_moe_fp4( a=a, - a1_gscale=a1_gs, w1_fp4=w1_q, - w1_blockscale=w1_blockscale, - g1_alphas=(1 / w1_gs), - a2_gscale=a2_gs, w2_fp4=w2_q, - w2_blockscale=w2_blockscale, - g2_alphas=(1 / w2_gs), topk_weights=topk_weights, topk_ids=topk_ids, + quant_config=quant_config, m=m, n=n, k=k, diff --git a/tests/kernels/moe/test_pplx_cutlass_moe.py b/tests/kernels/moe/test_pplx_cutlass_moe.py index 9e78f4d6e4da..59126cef6adb 100644 --- a/tests/kernels/moe/test_pplx_cutlass_moe.py +++ b/tests/kernels/moe/test_pplx_cutlass_moe.py @@ -9,6 +9,8 @@ from tests.kernels.utils import torch_experts from vllm import _custom_ops as ops from vllm.config import VllmConfig, set_current_vllm_config +from vllm.model_executor.layers.fused_moe.config import ( + fp8_w8a8_moe_quant_config) from vllm.model_executor.layers.fused_moe.cutlass_moe import ( CutlassBatchedExpertsFp8) from vllm.model_executor.layers.fused_moe.fused_moe import fused_topk @@ -143,10 +145,16 @@ def pplx_cutlass_moe( device="cuda", dtype=torch.int64) - experts = CutlassBatchedExpertsFp8(num_local_experts, num_dispatchers, - out_dtype, per_act_token, per_out_ch, - ab_strides1, ab_strides2, c_strides1, - c_strides2) + experts = CutlassBatchedExpertsFp8( + num_local_experts, num_dispatchers, out_dtype, ab_strides1, + ab_strides2, c_strides1, c_strides2, + fp8_w8a8_moe_quant_config( + per_act_token_quant=per_act_token, + per_out_ch_quant=per_out_ch, + w1_scale=chunk_by_rank(w1_scale, rank, world_size), + w2_scale=chunk_by_rank(w2_scale, rank, world_size), + a1_scale=chunk_by_rank(a1_scale, rank, world_size) + if per_act_token else a1_scale[rank])) fused_cutlass_experts = FusedMoEModularKernel( prepare_finalize, @@ -167,10 +175,7 @@ def pplx_cutlass_moe( chunk_topk_ids, global_num_experts=num_experts, expert_map=None, #TODO - w1_scale=chunk_by_rank(w1_scale, rank, world_size), - w2_scale=chunk_by_rank(w2_scale, rank, world_size), - a1_scale=chunk_by_rank(a1_scale, rank, world_size) - if per_act_token else a1_scale[rank]) + ) torch.cuda.synchronize() diff --git a/tests/kernels/moe/test_pplx_moe.py b/tests/kernels/moe/test_pplx_moe.py index 394f52114085..4ca4a1e79c57 100644 --- a/tests/kernels/moe/test_pplx_moe.py +++ b/tests/kernels/moe/test_pplx_moe.py @@ -58,7 +58,7 @@ ] PPLX_COMBOS = [ - # TODO: figure out why this fails, seems to be test problem + # TODO(bnell): figure out why this fails, seems to be test problem #(1, 128, 128), (2, 128, 512), (3, 1024, 2048), @@ -360,18 +360,18 @@ def pplx_prepare_finalize( b_a, b_a_scale, expert_num_tokens, _, _ = prepare_finalize.prepare( a_chunk, - a1_scale, - a2_scale, chunk_topk_weight, chunk_topk_ids, num_experts, None, False, - FusedMoEQuantConfig( + FusedMoEQuantConfig.make( quant_dtype, - per_act_token_quant, - False, - block_shape, + per_act_token_quant=per_act_token_quant, + per_out_ch_quant=False, + block_shape=block_shape, + a1_scale=a1_scale, + a2_scale=a2_scale, ), ) @@ -540,20 +540,6 @@ def pplx_moe( topk_ids = topk_ids.to(dtype=torch.uint32) - experts = BatchedTritonExperts( - max_num_tokens=max_num_tokens, - num_dispatchers=prepare_finalize.num_dispatchers(), - use_fp8_w8a8=quant_dtype == torch.float8_e4m3fn, - block_shape=block_shape, - per_act_token_quant=per_act_token_quant, - ) - - fused_experts = FusedMoEModularKernel( - prepare_finalize, - experts, - shared_experts, - ) - # Note: workers with the same dp_rank must use the exact same inputs. a_chunk = chunk_by_rank(a, rank, world_size) chunk_topk_weight = chunk_by_rank(topk_weight, rank, world_size) @@ -567,6 +553,28 @@ def pplx_moe( a1_scale_chunk = chunk_scales_by_rank(a1_scale, rank, world_size) a2_scale_chunk = chunk_scales_by_rank(a2_scale, rank, world_size) + quant_config = FusedMoEQuantConfig.make( + quant_dtype, + block_shape=block_shape, + per_act_token_quant=per_act_token_quant, + w1_scale=w1_scale_chunk, + w2_scale=w2_scale_chunk, + a1_scale=a1_scale_chunk, + a2_scale=a2_scale_chunk, + ) + + experts = BatchedTritonExperts( + max_num_tokens=max_num_tokens, + num_dispatchers=prepare_finalize.num_dispatchers(), + quant_config=quant_config, + ) + + fused_experts = FusedMoEModularKernel( + prepare_finalize, + experts, + shared_experts, + ) + # Note: for now use_compile will error out if the problem size is # large enough to trigger chunking. I'm leaving the flag and # setup code in case we are able to revisit this later. @@ -585,10 +593,6 @@ def pplx_moe( w2_chunk, chunk_topk_weight, chunk_topk_ids, - w1_scale=w1_scale_chunk, - w2_scale=w2_scale_chunk, - a1_scale=a1_scale_chunk, - a2_scale=a2_scale_chunk, global_num_experts=num_experts) if use_cudagraphs: @@ -605,10 +609,6 @@ def pplx_moe( w2_chunk, chunk_topk_weight, chunk_topk_ids, - w1_scale=w1_scale_chunk, - w2_scale=w2_scale_chunk, - a1_scale=a1_scale_chunk, - a2_scale=a2_scale_chunk, global_num_experts=num_experts) torch.cuda.synchronize() @@ -820,7 +820,7 @@ def test_pplx_moe_slow( k, quant_dtype=quant_dtype, block_shape=block_shape, - per_act_token_quant=per_act_token_quant, + per_out_ch_quant=per_act_token_quant, ) parallel_launch(world_size, _pplx_moe, dp_size, a, w1, w2, score, topk, e, @@ -897,7 +897,7 @@ def format_result(msg, ex=None): k, quant_dtype=quant_dtype, block_shape=block_shape, - per_act_token_quant=per_act_token_quant, + per_out_ch_quant=per_act_token_quant, ) args["w1"] = w1 args["w2"] = w2 diff --git a/tests/kernels/moe/test_triton_moe_ptpc_fp8.py b/tests/kernels/moe/test_triton_moe_ptpc_fp8.py index dfd0f35c8da3..1c31464b30e7 100644 --- a/tests/kernels/moe/test_triton_moe_ptpc_fp8.py +++ b/tests/kernels/moe/test_triton_moe_ptpc_fp8.py @@ -7,10 +7,12 @@ import pytest import torch +from tests.kernels.moe.utils import fused_moe from vllm import _custom_ops as ops from vllm.config import VllmConfig, set_current_vllm_config from vllm.model_executor.layers.activation import SiluAndMul -from vllm.model_executor.layers.fused_moe import fused_moe +from vllm.model_executor.layers.fused_moe.config import ( + fp8_w8a8_moe_quant_config) from vllm.platforms import current_platform if current_platform.get_device_capability() < (9, 0): @@ -152,11 +154,12 @@ def test_w8a8_fp8_fused_moe(M, N, K, E, topk, dtype, seed): score, topk, renormalize=False, - use_fp8_w8a8=True, # using fp8 - per_channel_quant=True, - w1_scale=w1_s, - w2_scale=w2_s, - block_shape=None, # Not using block quantization + quant_config=fp8_w8a8_moe_quant_config( + per_act_token_quant=True, + w1_scale=w1_s, + w2_scale=w2_s, + block_shape=None, # Not using block quantization + ), ) # Check results diff --git a/tests/kernels/moe/utils.py b/tests/kernels/moe/utils.py index 4b58a28eed12..7a0feb6a2079 100644 --- a/tests/kernels/moe/utils.py +++ b/tests/kernels/moe/utils.py @@ -9,7 +9,8 @@ from tests.kernels.quantization.nvfp4_utils import (FLOAT4_E2M1_MAX, FLOAT8_E4M3_MAX) from vllm.model_executor.layers.activation import SiluAndMul -from vllm.model_executor.layers.fused_moe import fused_experts +from vllm.model_executor.layers.fused_moe import fused_experts, fused_topk +from vllm.model_executor.layers.fused_moe.config import FusedMoEQuantConfig from vllm.model_executor.layers.fused_moe.fused_batched_moe import ( BatchedPrepareAndFinalize, BatchedTritonExperts, NaiveBatchedExperts) from vllm.model_executor.layers.fused_moe.modular_kernel import ( @@ -34,18 +35,22 @@ def triton_moe( per_act_token_quant=False, block_shape: Optional[list[int]] = None, ) -> torch.Tensor: + quant_config = FusedMoEQuantConfig.make( + quant_dtype, + per_act_token_quant=per_act_token_quant, + block_shape=block_shape, + w1_scale=w1_scale, + w2_scale=w2_scale, + a1_scale=a1_scale, + a2_scale=a2_scale, + ) + return fused_experts(a, w1, w2, topk_weight, topk_ids, - w1_scale=w1_scale, - w2_scale=w2_scale, - a1_scale=a1_scale, - a2_scale=a2_scale, - per_channel_quant=per_act_token_quant, - use_fp8_w8a8=quant_dtype == torch.float8_e4m3fn, - block_shape=block_shape) + quant_config=quant_config) def batched_moe( @@ -64,6 +69,16 @@ def batched_moe( ) -> torch.Tensor: max_num_tokens = round_up(a.shape[0], 64) + quant_config = FusedMoEQuantConfig.make( + quant_dtype, + per_act_token_quant=per_act_token_quant, + block_shape=block_shape, + w1_scale=w1_scale, + w2_scale=w2_scale, + a1_scale=a1_scale, + a2_scale=a2_scale, + ) + fused_experts = FusedMoEModularKernel( BatchedPrepareAndFinalize(max_num_tokens, num_dispatchers=1, @@ -72,21 +87,11 @@ def batched_moe( BatchedTritonExperts( max_num_tokens=max_num_tokens, num_dispatchers=1, - use_fp8_w8a8=quant_dtype == torch.float8_e4m3fn, - per_act_token_quant=per_act_token_quant, - block_shape=block_shape, + quant_config=quant_config, ), ) - return fused_experts(a, - w1, - w2, - topk_weight, - topk_ids, - w1_scale=w1_scale, - w2_scale=w2_scale, - a1_scale=a1_scale, - a2_scale=a2_scale) + return fused_experts(a, w1, w2, topk_weight, topk_ids) def naive_batched_moe( @@ -105,6 +110,16 @@ def naive_batched_moe( ) -> torch.Tensor: max_num_tokens = round_up(a.shape[0], 64) + quant_config = FusedMoEQuantConfig.make( + quant_dtype, + per_act_token_quant=per_act_token_quant, + block_shape=block_shape, + w1_scale=w1_scale, + w2_scale=w2_scale, + a1_scale=a1_scale, + a2_scale=a2_scale, + ) + fused_experts = FusedMoEModularKernel( BatchedPrepareAndFinalize(max_num_tokens, num_dispatchers=1, @@ -113,21 +128,11 @@ def naive_batched_moe( NaiveBatchedExperts( max_num_tokens=max_num_tokens, num_dispatchers=1, - use_fp8_w8a8=quant_dtype == torch.float8_e4m3fn, - per_act_token_quant=per_act_token_quant, - block_shape=block_shape, + quant_config=quant_config, ), ) - return fused_experts(a, - w1, - w2, - topk_weight, - topk_ids, - w1_scale=w1_scale, - w2_scale=w2_scale, - a1_scale=a1_scale, - a2_scale=a2_scale) + return fused_experts(a, w1, w2, topk_weight, topk_ids) def chunk_scales(scales: Optional[torch.Tensor], start: int, @@ -216,7 +221,7 @@ def make_test_weight( in_dtype: torch.dtype = torch.bfloat16, quant_dtype: Union[torch.dtype, str, None] = None, block_shape: Optional[list[int]] = None, - per_act_token_quant: bool = False, + per_out_ch_quant: bool = False, ) -> tuple[torch.Tensor, torch.Tensor, Optional[torch.Tensor], Optional[torch.Tensor]]: w_16 = torch.randn((e, rows, cols), device="cuda", dtype=in_dtype) / 15 @@ -228,7 +233,7 @@ def make_test_weight( w_gs_l = [None] * e for idx in range(e): w_l[idx], w_s_l[idx], w_gs_l[idx] = moe_quantize_weights( - w_16[idx], None, quant_dtype, per_act_token_quant, block_shape) + w_16[idx], None, quant_dtype, per_out_ch_quant, block_shape) w = torch.stack(w_l) w_s = torch.stack(w_s_l) @@ -258,16 +263,16 @@ def make_test_weights( in_dtype: torch.dtype = torch.bfloat16, quant_dtype: Union[torch.dtype, str, None] = None, block_shape: Optional[list[int]] = None, - per_act_token_quant: bool = False, + per_out_ch_quant: bool = False, ) -> tuple[tuple[torch.Tensor, torch.Tensor, Optional[torch.Tensor], Optional[torch.Tensor]], tuple[torch.Tensor, torch.Tensor, Optional[torch.Tensor], Optional[torch.Tensor]]]: return ( make_test_weight(e, 2 * n, k, in_dtype, quant_dtype, block_shape, - per_act_token_quant), + per_out_ch_quant), make_test_weight(e, k, n, in_dtype, quant_dtype, block_shape, - per_act_token_quant), + per_out_ch_quant), ) @@ -285,6 +290,76 @@ def per_token_cast_to_fp8( return fp8_data.view(m, n + pad_size)[:, :n], (x_amax / 448.0).view(m, -1) +def make_test_quant_config( + e: int, + n: int, + k: int, + in_dtype: torch.dtype, + quant_dtype: Union[torch.dtype, str, None] = None, + per_act_token_quant: bool = False, + block_shape: Optional[list[int]] = None, +) -> tuple[torch.Tensor, torch.Tensor, FusedMoEQuantConfig]: + (_, w1, w1_s, w1_gs), (_, w2, w2_s, w2_gs) = make_test_weights( + e, + n, + k, + in_dtype, + quant_dtype, + per_out_ch_quant=per_act_token_quant, + block_shape=block_shape, + ) + + # Hacky/trivial scales for nvfp4. + a1_gscale: Optional[torch.Tensor] = None + a2_gscale: Optional[torch.Tensor] = None + if quant_dtype == "nvfp4": + a1_gscale = torch.ones((e, ), device="cuda", dtype=torch.float32) + a2_gscale = torch.ones((e, ), device="cuda", dtype=torch.float32) + a1_scale = a1_gscale + a2_scale = a2_gscale + else: + a1_scale = None + a2_scale = None + + return w1, w2, FusedMoEQuantConfig.make( + quant_dtype, + per_act_token_quant=per_act_token_quant, + block_shape=block_shape, + w1_scale=w1_s, + w2_scale=w2_s, + a1_gscale=a1_gscale, + a2_gscale=a2_gscale, + a1_scale=a1_scale, + a2_scale=a2_scale, + # TODO: make sure this is handled properly + g1_alphas=(1 / w1_gs) if w1_gs is not None else None, + g2_alphas=(1 / w2_gs) if w2_gs is not None else None, + ) + + +def fused_moe( + hidden_states: torch.Tensor, + w1: torch.Tensor, + w2: torch.Tensor, + score: torch.Tensor, + topk: int, + renormalize: bool = False, + quant_config: Optional[FusedMoEQuantConfig] = None, + global_num_experts: int = -1, + expert_map: Optional[torch.Tensor] = None, +) -> torch.Tensor: + topk_weights, topk_ids, _ = fused_topk(hidden_states, score.float(), topk, + renormalize) + return fused_experts(hidden_states, + w1, + w2, + topk_weights, + topk_ids, + global_num_experts=global_num_experts, + expert_map=expert_map, + quant_config=quant_config) + + # CustomOp? class BaselineMM(torch.nn.Module): diff --git a/tests/kernels/quantization/test_int8_kernel.py b/tests/kernels/quantization/test_int8_kernel.py index dc5fecbf4ccc..f2271e6be542 100644 --- a/tests/kernels/quantization/test_int8_kernel.py +++ b/tests/kernels/quantization/test_int8_kernel.py @@ -8,7 +8,8 @@ import torch from vllm.model_executor.layers.activation import SiluAndMul -from vllm.model_executor.layers.fused_moe import fused_moe +from vllm.model_executor.layers.fused_moe import fused_experts +from vllm.model_executor.layers.fused_moe.config import FusedMoEQuantConfig from vllm.model_executor.layers.quantization.utils.int8_utils import ( per_token_quant_int8) from vllm.platforms import current_platform @@ -42,7 +43,8 @@ def native_w8a8_per_token_matmul(A, B, As, Bs, output_dtype=torch.float16): return C.reshape(origin_C_shape).to(output_dtype) -def torch_w8a8_per_column_moe(a, w1, w2, w1_s, w2_s, score, topk): +def torch_w8a8_per_column_moe(a, w1, w2, w1_s, w2_s, topk, topk_weight, + topk_ids): """This function performs fused moe with per-column int8 quantization using native torch.""" @@ -57,8 +59,6 @@ def torch_w8a8_per_column_moe(a, w1, w2, w1_s, w2_s, score, topk): out = torch.zeros(B * topk, w2.shape[1], dtype=a.dtype, device=a.device) # Calculate routing - score = torch.softmax(score, dim=-1, dtype=torch.float32) - topk_weight, topk_ids = torch.topk(score, topk) topk_weight = topk_weight.view(-1) topk_ids = topk_ids.view(-1) # Process each expert @@ -127,20 +127,27 @@ def test_w8a8_fp8_fused_moe(M, N, K, E, topk, dtype, seed): w1_s = torch.rand(E, 2 * N, device=w1_fp32.device) * factor_for_scale w2_s = torch.rand(E, K, device=w2_fp32.device) * factor_for_scale score = torch.randn((M, E), dtype=dtype) + score = torch.softmax(score, dim=-1, dtype=torch.float32) + topk_weights, topk_ids = torch.topk(score, topk) + + ref_out = torch_w8a8_per_column_moe(a, w1, w2, w1_s, w2_s, topk, + topk_weights, topk_ids) - ref_out = torch_w8a8_per_column_moe(a, w1, w2, w1_s, w2_s, score, topk) - out = fused_moe( + quant_config = FusedMoEQuantConfig.make( + torch.int8, + per_act_token_quant=True, + block_shape=None, + w1_scale=w1_s, + w2_scale=w2_s, + ) + + out = fused_experts( a, w1, w2, - score, - topk, - renormalize=False, - use_int8_w8a8=True, # Using int8-w8a8 - per_channel_quant=True, - w1_scale=w1_s, - w2_scale=w2_s, - block_shape=None, # Not using block quantization + topk_weights, + topk_ids, + quant_config=quant_config, ) # Check results diff --git a/vllm/model_executor/layers/fused_moe/__init__.py b/vllm/model_executor/layers/fused_moe/__init__.py index 3007643d7a28..6730f051e3d7 100644 --- a/vllm/model_executor/layers/fused_moe/__init__.py +++ b/vllm/model_executor/layers/fused_moe/__init__.py @@ -10,6 +10,7 @@ from vllm.model_executor.layers.fused_moe.modular_kernel import ( FusedMoEActivationFormat, FusedMoEPermuteExpertsUnpermute, FusedMoEPrepareAndFinalize) +from vllm.model_executor.layers.fused_moe.utils import activation_without_mul from vllm.triton_utils import HAS_TRITON _config: Optional[dict[str, Any]] = None @@ -36,6 +37,7 @@ def get_config() -> Optional[dict[str, Any]]: "FusedMoEPermuteExpertsUnpermute", "FusedMoEActivationFormat", "FusedMoEPrepareAndFinalize", + "activation_without_mul", "override_config", "get_config", ] @@ -43,7 +45,6 @@ def get_config() -> Optional[dict[str, Any]]: if HAS_TRITON: # import to register the custom ops import vllm.model_executor.layers.fused_moe.fused_marlin_moe # noqa - import vllm.model_executor.layers.fused_moe.fused_moe # noqa from vllm.model_executor.layers.fused_moe.batched_deep_gemm_moe import ( BatchedDeepGemmExperts) from vllm.model_executor.layers.fused_moe.batched_triton_or_deep_gemm_moe import ( # noqa: E501 @@ -56,13 +57,12 @@ def get_config() -> Optional[dict[str, Any]]: from vllm.model_executor.layers.fused_moe.fused_batched_moe import ( BatchedTritonExperts) from vllm.model_executor.layers.fused_moe.fused_moe import ( - TritonExperts, fused_experts, fused_moe, fused_topk, - get_config_file_name, grouped_topk) + TritonExperts, fused_experts, fused_topk, get_config_file_name, + grouped_topk) from vllm.model_executor.layers.fused_moe.triton_deep_gemm_moe import ( TritonOrDeepGemmExperts) __all__ += [ - "fused_moe", "fused_topk", "fused_experts", "get_config_file_name", diff --git a/vllm/model_executor/layers/fused_moe/batched_deep_gemm_moe.py b/vllm/model_executor/layers/fused_moe/batched_deep_gemm_moe.py index 0ab6355f4156..e9dfb22bea27 100644 --- a/vllm/model_executor/layers/fused_moe/batched_deep_gemm_moe.py +++ b/vllm/model_executor/layers/fused_moe/batched_deep_gemm_moe.py @@ -8,6 +8,8 @@ import vllm.model_executor.layers.fused_moe.modular_kernel as mk from vllm.logger import init_logger from vllm.model_executor.layers.fused_moe.config import FusedMoEQuantConfig +from vllm.model_executor.layers.fused_moe.deep_gemm_utils import ( + deep_gemm_block_shape) from vllm.model_executor.layers.fused_moe.topk_weight_and_reduce import ( TopKWeightAndReduceDelegate) from vllm.model_executor.layers.fused_moe.utils import _resize_cache @@ -212,27 +214,20 @@ def silu_mul_fp8_quant_deep_gemm_cuda( class BatchedDeepGemmExperts(mk.FusedMoEPermuteExpertsUnpermute): - # The Deep Gemm kernels only support block size of 128 - DEEPGEMM_BLOCK_SHAPE: list[int] = [128, 128] - - def __init__(self, - max_num_tokens: int, - num_dispatchers: int, - block_shape: list[int], - per_act_token_quant=False): + + def __init__( + self, + max_num_tokens: int, + num_dispatchers: int, + quant_config: FusedMoEQuantConfig, + ): """ max_num_tokens: Maximum number of tokens from a DP Rank num_dispatchers: The number of DP dispatchers. - block_shape: Block quantization block shape. - per_act_token_quant: Per activation token quantization flag. + quant_config: Quantization configuration """ - super().__init__( - FusedMoEQuantConfig( - quant_dtype=torch.float8_e4m3fn, - per_act_token_quant=per_act_token_quant, - block_shape=block_shape, - )) - assert self.block_shape == self.DEEPGEMM_BLOCK_SHAPE + super().__init__(quant_config) + assert self.block_shape == deep_gemm_block_shape() self.max_num_tokens = max_num_tokens self.num_dispatchers = num_dispatchers @@ -290,12 +285,7 @@ def apply( activation: str, global_num_experts: int, expert_map: Optional[torch.Tensor], - w1_scale: Optional[torch.Tensor], - w2_scale: Optional[torch.Tensor], - w1_zp: Optional[torch.Tensor], - w2_zp: Optional[torch.Tensor], a1q_scale: Optional[torch.Tensor], - a2_scale: Optional[torch.Tensor], workspace13: torch.Tensor, workspace2: torch.Tensor, expert_tokens_meta: Optional[mk.ExpertTokensMetadata], @@ -321,11 +311,11 @@ def apply( # for the M expectation of each batch, correctly setting this value # may lead to better performance. expected_m = max_num_tokens - fp8_m_grouped_gemm_nt_masked((a1q, a1q_scale), (w1, w1_scale), + fp8_m_grouped_gemm_nt_masked((a1q, a1q_scale), (w1, self.w1_scale), workspace1, expert_num_tokens, expected_m) a2q, a2q_scale = silu_mul_fp8_quant_deep_gemm_cuda( workspace1, expert_num_tokens) - fp8_m_grouped_gemm_nt_masked((a2q, a2q_scale), (w2, w2_scale), output, - expert_num_tokens, expected_m) + fp8_m_grouped_gemm_nt_masked((a2q, a2q_scale), (w2, self.w2_scale), + output, expert_num_tokens, expected_m) diff --git a/vllm/model_executor/layers/fused_moe/batched_triton_or_deep_gemm_moe.py b/vllm/model_executor/layers/fused_moe/batched_triton_or_deep_gemm_moe.py index 89d7412ee223..8b9070f09889 100644 --- a/vllm/model_executor/layers/fused_moe/batched_triton_or_deep_gemm_moe.py +++ b/vllm/model_executor/layers/fused_moe/batched_triton_or_deep_gemm_moe.py @@ -8,55 +8,37 @@ from vllm.model_executor.layers.fused_moe.batched_deep_gemm_moe import ( BatchedDeepGemmExperts) from vllm.model_executor.layers.fused_moe.config import FusedMoEQuantConfig +from vllm.model_executor.layers.fused_moe.deep_gemm_utils import ( + deep_gemm_block_shape) from vllm.model_executor.layers.fused_moe.fused_batched_moe import ( BatchedTritonExperts) class BatchedTritonOrDeepGemmExperts(mk.FusedMoEPermuteExpertsUnpermute): - def __init__(self, - max_num_tokens: int, - num_dispatchers: int, - use_fp8_w8a8: bool = False, - use_int8_w8a8: bool = False, - use_int8_w8a16: bool = False, - use_int4_w4a16: bool = False, - block_shape: Optional[list[int]] = None, - per_act_token_quant: bool = False, - allow_deep_gemm: bool = False): - assert not use_int8_w8a8, "NYI" - assert not use_int8_w8a16, "NYI" - assert not use_int4_w4a16, "NYI" - - super().__init__( - FusedMoEQuantConfig.make( - use_fp8_w8a8=use_fp8_w8a8, - use_int8_w8a8=use_int8_w8a8, - use_int8_w8a16=use_int8_w8a16, - use_int4_w4a16=use_int4_w4a16, - block_shape=block_shape, - per_act_token_quant=per_act_token_quant, - )) + def __init__( + self, + max_num_tokens: int, + num_dispatchers: int, + quant_config: FusedMoEQuantConfig, + allow_deep_gemm: bool = False, + ): + super().__init__(quant_config) self.batched_triton_experts = BatchedTritonExperts( max_num_tokens=max_num_tokens, num_dispatchers=num_dispatchers, - use_fp8_w8a8=use_fp8_w8a8, - use_int8_w8a8=use_int8_w8a8, - use_int8_w8a16=use_int8_w8a16, - use_int4_w4a16=use_int4_w4a16, - per_act_token_quant=self.per_act_token_quant, - block_shape=self.block_shape, + quant_config=self.quant_config, ) - self.allow_deep_gemm = (allow_deep_gemm and use_fp8_w8a8 - and self.block_shape - == BatchedDeepGemmExperts.DEEPGEMM_BLOCK_SHAPE) + self.allow_deep_gemm = (allow_deep_gemm + and self.quant_config.use_fp8_w8a8 and + self.block_shape == deep_gemm_block_shape()) self.batched_deep_gemm_experts = BatchedDeepGemmExperts( max_num_tokens=max_num_tokens, num_dispatchers=num_dispatchers, - block_shape=self.block_shape, # type: ignore[arg-type] + quant_config=self.quant_config, ) if self.allow_deep_gemm else None assert (self.batched_deep_gemm_experts is not None @@ -143,12 +125,7 @@ def apply( activation: str, global_num_experts: int, expert_map: Optional[torch.Tensor], - w1_scale: Optional[torch.Tensor], - w2_scale: Optional[torch.Tensor], - w1_zp: Optional[torch.Tensor], - w2_zp: Optional[torch.Tensor], a1q_scale: Optional[torch.Tensor], - a2_scale: Optional[torch.Tensor], workspace13: torch.Tensor, workspace2: torch.Tensor, expert_tokens_meta: Optional[mk.ExpertTokensMetadata], @@ -158,7 +135,6 @@ def apply( if self.allow_deep_gemm else self.batched_triton_experts) assert experts is not None experts.apply(output, hidden_states, w1, w2, topk_weights, topk_ids, - activation, global_num_experts, expert_map, w1_scale, - w2_scale, w1_zp, w2_zp, a1q_scale, a2_scale, workspace13, - workspace2, expert_tokens_meta, + activation, global_num_experts, expert_map, a1q_scale, + workspace13, workspace2, expert_tokens_meta, apply_router_weight_on_input) diff --git a/vllm/model_executor/layers/fused_moe/config.py b/vllm/model_executor/layers/fused_moe/config.py index 0b501cd87fb5..742df3dbdc6a 100644 --- a/vllm/model_executor/layers/fused_moe/config.py +++ b/vllm/model_executor/layers/fused_moe/config.py @@ -1,103 +1,322 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project from dataclasses import dataclass -from typing import Optional, Union +from typing import TYPE_CHECKING, Optional, Union import torch -from compressed_tensors.quantization import (QuantizationArgs, - QuantizationStrategy, - QuantizationType) import vllm.envs as envs from vllm.config import ParallelConfig from vllm.distributed import get_dp_group, get_tensor_model_parallel_rank from vllm.logger import init_logger -from vllm.model_executor.layers.quantization.base_config import ( - QuantizationConfig) -from vllm.utils import cdiv +from vllm.model_executor.layers.quantization.utils.quant_utils import ( + GroupShape) +from vllm.utils import cdiv, has_triton_kernels from vllm.utils.flashinfer import has_flashinfer_cutlass_fused_moe +if TYPE_CHECKING and has_triton_kernels: + from triton_kernels.matmul_ogs import PrecisionConfig + logger = init_logger(__name__) -def _get_quant_config_quantization_args( - quant_config: Optional[QuantizationConfig], - prop_name: str, -) -> Optional[QuantizationArgs]: - if (quant_config is not None and hasattr(quant_config, 'target_scheme_map') - and "Linear" in quant_config.target_scheme_map and - "input_activations" in quant_config.target_scheme_map["Linear"]): - return quant_config.target_scheme_map["Linear"].get(prop_name) +def _get_config_dtype_str( + dtype: torch.dtype, + use_fp8_w8a8: bool = False, + use_int8_w8a16: bool = False, + use_int4_w4a16: bool = False, + use_mxfp4_w4a4: bool = False, +) -> Optional[str]: + """ + Return a string used to construct the filename that contains the + tuning info for a particular quantization scheme. See + try_get_optimal_moe_config in fused_moe.py. + """ + if use_fp8_w8a8: + return "fp8_w8a8" + elif use_int8_w8a16: + return "int8_w8a16" + elif use_int4_w4a16: + return "int4_w4a16" + elif use_mxfp4_w4a4: + return "mxfp4_w4a4" + elif dtype == torch.float: + # avoiding cases where kernel fails when float32 MoE + # use fp16/bfloat16 configs + return "float32" + return None + + +def _quant_flags_to_group_shape( + quant_dtype: Union[torch.dtype, str, None], + per_act_token_quant: bool, + per_out_ch_quant: bool, + block_shape: Optional[list[int]], +) -> tuple[Optional[GroupShape], Optional[GroupShape]]: + """ + Convert MoE quantization flags into more generic GroupShapes. + """ + a_shape: Optional[GroupShape] + w_shape: Optional[GroupShape] + if block_shape is not None: + assert not per_act_token_quant + assert not per_out_ch_quant + # TODO(bnell): this is not quite right for activations since first + # dim should be 1. + a_shape = GroupShape(row=block_shape[0], col=block_shape[1]) + w_shape = GroupShape(row=block_shape[0], col=block_shape[1]) else: - return None + w_shape = None + a_shape = None if quant_dtype is None else GroupShape.PER_TENSOR + if per_act_token_quant: + a_shape = GroupShape.PER_TOKEN -def get_quant_config_input_quant( - quant_config: Optional[QuantizationConfig] -) -> Optional[QuantizationArgs]: - return _get_quant_config_quantization_args(quant_config, - "input_activations") + if per_out_ch_quant: + w_shape = GroupShape.PER_TOKEN + return a_shape, w_shape -def get_quant_config_weight_quant( - quant_config: Optional[QuantizationConfig] -) -> Optional[QuantizationArgs]: - return _get_quant_config_quantization_args(quant_config, "weights") +@dataclass +class FusedMoEQuantDesc: + """ + A quantization descriptor for fused MoE ops. This class can describe + either activations or weights. + """ + + # The quantized type of this parameters. None means unquantized or + # already quantized. + # TODO (bnell): use scalar_type instead of Union. + dtype: Union[torch.dtype, str, None] = None -def get_config_quant_dtype( - use_fp8_w8a8: bool, - use_int8_w8a8: bool, - use_int8_w8a16: bool, - use_int4_w4a16: bool, - use_mxfp4_w4a4: bool, -) -> Union[None, torch.dtype, str]: - if use_fp8_w8a8: - return torch.float8_e4m3fn - elif use_int8_w8a8: - return torch.int8 - elif use_mxfp4_w4a4: - return "mxfp4" - return None + # A field that describes the quantization group shape, from quant_utils.py. + # * (-1, -1) for per-tensor quantization + # * (1, -1) for per-row quantization + # * (-1, 1) for per-column quantization + # * (128, 128) for 128x128 deepseek style block quantization + # * (1, 128) for deepseek style activation quantization + # (i.e. per-token-per-group) + shape: Optional[GroupShape] = None + + # Quantization scales. + # TODO(bnell): maybe put PrecisionConfigs in subclass of QuantDesc? + scale: Union[torch.Tensor, "PrecisionConfig", None] = None + + # Quantization alphas or gscales, used for nvfp4 types. + # TODO(bnell): put some of these in subclasses + alpha_or_gscale: Optional[torch.Tensor] = None + + # Zero points for int4/int8 types + zp: Optional[torch.Tensor] = None + + # Biases for GPT triton MoE + bias: Optional[torch.Tensor] = None +# TODO(bnell): have subclasses for specific moe methods? +# e.g. for specific arguments bias, precision, etc. @dataclass class FusedMoEQuantConfig: - # The post quantization activation type. - # TODO (bnell): use scalar_type instead of Union. - quant_dtype: Union[torch.dtype, str, None] = None - per_act_token_quant: bool = False - per_out_ch_quant: bool = False - block_shape: Optional[list[int]] = None - - # TODO: add col major flag? - # add detailed quant info for input, intermediates, weights, etc? + """ + The FusedMoEQuantConfig contains all the quantization parameters for + a single FusedMoEMethodBase operation. It consists of four + FusedMoEQuantDescs, one for each activation and set of weights. + + Each FusedMoEMethodBase must implement a get_fused_moe_quant_config + method to construct a FusedMoEQuantConfig for use with that class. + + FusedMoEQuant configs are only used for modular kernels, fused_experts + (from fused_moe.py), cutlass_moe_fp[48], rocm_aiter_fused_experts and + triton_kernel_moe_forward. Other MoE methods can ignore the + FusedMoEQuantConfig (for now) and hardcode it to None. + + There are currently some restrictions on what can be expressed: + - Most MoE ops only support similar quantization strategies for + each parameter, e.g. both weights must have the same GroupShape + and both activations must share the same GroupShape. One exception to + this is the cutlass moe which allows per channel quantization on the + outputs. Note: this restrictions are not always rigorously checked. + - Not all fused MoE functions support all the parameters, e.g. zero points, + global scales, alphas and biases are not universally supported. + - Fully general GroupShapes are not allowed. Activations only support + per token, per tensor or K-blocked. + - Weights are not required to have a GroupShape since they have already + been quantized. + + Other notes: + - PrecisionConfigs are specific to GPT OSS Triton. + - As a follow up it would probably make sense to subclass FusedMoEQuantDesc + or FusedMoEQuantConfig for particular FusedMoEMethodBase subclasses + so that only the required quantization parameters are used/stored. + """ + + # TODO(bnell) make sure a1_scales/a2_scales don't interfere with chunking + _a1: FusedMoEQuantDesc + _a2: FusedMoEQuantDesc + _w1: FusedMoEQuantDesc + _w2: FusedMoEQuantDesc def __post_init__(self): assert (not self.per_act_token_quant or self.block_shape is None), "illegal quantization" + # + # Convenience accessors for various properties. + # + + @property + def quant_dtype(self) -> Union[torch.dtype, str, None]: + return self._a1.dtype + @property def is_quantized(self) -> bool: return self.quant_dtype is not None @property def is_per_act_token(self) -> bool: - return self.per_act_token_quant + return self._a1.shape == GroupShape.PER_TOKEN + + @property + def per_act_token_quant(self) -> bool: + return self._a1.shape == GroupShape.PER_TOKEN + + @property + def per_out_ch_quant(self) -> bool: + return self._w1.shape == GroupShape.PER_TOKEN + + @property + def is_per_tensor(self) -> bool: + return self._a1.shape == GroupShape.PER_TENSOR + + @property + def block_shape(self) -> Optional[list[int]]: + if (self._a1.shape is not None + and self._a1.shape != GroupShape.PER_TENSOR + and self._a1.shape != GroupShape.PER_TOKEN): + return [self._a1.shape.row, self._a1.shape.col] + else: + return None @property def is_block_quantized(self) -> bool: return self.block_shape is not None @property - def is_per_tensor(self) -> bool: - return not self.per_act_token_quant and self.block_shape is None + def a1_scale(self) -> Optional[torch.Tensor]: + assert self._a1.scale is None or isinstance(self._a1.scale, + torch.Tensor) + return self._a1.scale + + @property + def a1_gscale(self) -> Optional[torch.Tensor]: + return self._a1.alpha_or_gscale + + @property + def a2_scale(self) -> Optional[torch.Tensor]: + assert self._a2.scale is None or isinstance(self._a2.scale, + torch.Tensor) + return self._a2.scale + + @property + def a2_gscale(self) -> Optional[torch.Tensor]: + return self._a2.alpha_or_gscale + + @property + def w1_scale(self) -> Optional[torch.Tensor]: + assert self._w1.scale is None or isinstance(self._w1.scale, + torch.Tensor) + return self._w1.scale + + @property + def w1_zp(self) -> Optional[torch.Tensor]: + return self._w1.zp + + @property + def w1_bias(self) -> Optional[torch.Tensor]: + return self._w1.bias + + @property + def w1_precision(self) -> Optional["PrecisionConfig"]: + assert self._w1.scale is None or isinstance(self._w1.scale, + PrecisionConfig) + return self._w1.scale + + @property + def g1_alphas(self) -> Optional[torch.Tensor]: + return self._w1.alpha_or_gscale + + @property + def w2_scale(self) -> Optional[torch.Tensor]: + assert self._w2.scale is None or isinstance(self._w2.scale, + torch.Tensor) + return self._w2.scale + + @property + def w2_zp(self) -> Optional[torch.Tensor]: + return self._w2.zp + + @property + def w2_bias(self) -> Optional[torch.Tensor]: + return self._w2.bias + + @property + def w2_precision(self) -> Optional["PrecisionConfig"]: + assert self._w2.scale is None or isinstance(self._w2.scale, + PrecisionConfig) + return self._w2.scale + + @property + def g2_alphas(self) -> Optional[torch.Tensor]: + return self._w2.alpha_or_gscale + + @property + def use_fp8_w8a8(self) -> bool: + return self.quant_dtype == torch.float8_e4m3fn + + @property + def use_int8_w8a8(self) -> bool: + return self.quant_dtype == torch.int8 + + @property + def use_int8_w8a16(self) -> bool: + return (self._a1.dtype is None and self._w1.dtype == torch.int8) + + @property + def use_int4_w4a16(self) -> bool: + return (self._a1.dtype is None and self._w1.dtype == "int4") + + @property + def use_mxfp4_w4a4(self) -> bool: + return self.quant_dtype == "mxfp4" + + @property + def use_nvfp4_w4a4(self) -> bool: + return self.quant_dtype == "nvfp4" + + def config_name(self, dtype: torch.dtype) -> Optional[str]: + """ + Return a string used to construct the filename that contains the + tuning info for a particular quantization scheme. See + try_get_optimal_moe_config in fused_moe.py. + """ + return _get_config_dtype_str( + use_fp8_w8a8=self.use_fp8_w8a8, + use_int8_w8a16=self.use_int8_w8a16, + use_int4_w4a16=self.use_int4_w4a16, + use_mxfp4_w4a4=self.use_mxfp4_w4a4, + dtype=dtype, + ) def scale_shape( self, max_tokens: int, hidden_dim: int, ) -> Optional[tuple[int, int]]: + """ + Construct the proper activation scale shape for this + config. + """ if self.is_quantized: if self.is_block_quantized: assert self.block_shape is not None @@ -117,6 +336,10 @@ def batched_scale_shape( max_tokens: int, hidden_dim: int, ) -> Optional[tuple[int, int, int]]: + """ + Construct the proper activation batched scale shape for this + config, e.g. (num experts, *scale_shape). + """ if self.is_quantized: scale_shape = self.scale_shape(max_tokens, hidden_dim) assert scale_shape is not None @@ -126,38 +349,218 @@ def batched_scale_shape( @staticmethod def make( - use_fp8_w8a8: bool = False, - use_int8_w8a8: bool = False, - use_int8_w8a16: bool = False, - use_int4_w4a16: bool = False, - use_mxfp4_w4a4: bool = False, + quant_dtype: Union[torch.dtype, str, None] = None, per_act_token_quant: bool = False, per_out_ch_quant: bool = False, block_shape: Optional[list[int]] = None, + w1_scale: Union[torch.Tensor, "PrecisionConfig", None] = None, + w2_scale: Union[torch.Tensor, "PrecisionConfig", None] = None, + a1_scale: Optional[torch.Tensor] = None, + a2_scale: Optional[torch.Tensor] = None, + g1_alphas: Optional[torch.Tensor] = None, + g2_alphas: Optional[torch.Tensor] = None, + a1_gscale: Optional[torch.Tensor] = None, + a2_gscale: Optional[torch.Tensor] = None, + w1_bias: Optional[torch.Tensor] = None, + w2_bias: Optional[torch.Tensor] = None, + w1_zp: Optional[torch.Tensor] = None, + w2_zp: Optional[torch.Tensor] = None, ) -> "FusedMoEQuantConfig": - assert sum([ - int(flag) for flag in [ - use_fp8_w8a8, - use_int8_w8a8, - use_int8_w8a16, - use_int4_w4a16, - use_mxfp4_w4a4, - ] - ]) <= 1, "Quantization flags are mutually exclusive." - - quant_dtype = get_config_quant_dtype( - use_fp8_w8a8=use_fp8_w8a8, - use_int8_w8a8=use_int8_w8a8, - use_int8_w8a16=use_int8_w8a16, - use_int4_w4a16=use_int4_w4a16, - use_mxfp4_w4a4=use_mxfp4_w4a4, - ) - return FusedMoEQuantConfig( - quant_dtype, - per_act_token_quant, - per_out_ch_quant, - block_shape, + """ + General builder function for a FusedMoEQuantConfig. + - quant_dtype: Optional quantization type. None if activations are + unquantized or quantized prior to calling. Note: "nvfp4" and + "mxfp4" are the only valid string values for quant_dtype. + - per_act_token_quant: Activations have per token quantization. + - per_out_ch_quant: Outputs have per channel quantization. (only + for cutlass). + - block_shape: Optional block size for block-wise quantization. + Incompatible with per_act_token and per_out_ch quant. + - w1_scale: Optional scale to be used for w1. + - w2_scale: Optional scale to be used for w2. + - a1_scale: Optional scale to be used for a1. + - a2_scale: Optional scale to be used for a2. + - g1_alphas: Optional global quantization scales for w1 (for nvfp4). + - g2_alphas: Optional global quantization scales for w2 (for nvfp4). + - a1_gscale: Optional global quantization scales for a1 (for nvfp4). + - a2_gscale: Optional global quantization scales for a2 (for nvfp4). + - w1_bias: Optional biases for w1 (GPT OSS Triton). + - w2_bias: Optional biases for w1 (GPT OSS Triton). + - w1_zp: Optional w1 zero points for int4/int8 quantization. + - w2_zp: Optional w2 zero points for int4/int8 quantization. + """ + assert (not isinstance(quant_dtype, str) or quant_dtype == "nvfp4" + or quant_dtype == "mxfp4") + a_shape, w_shape = _quant_flags_to_group_shape(quant_dtype, + per_act_token_quant, + per_out_ch_quant, + block_shape) + quant_config = FusedMoEQuantConfig( + _a1=FusedMoEQuantDesc(quant_dtype, a_shape, a1_scale, a1_gscale), + _a2=FusedMoEQuantDesc(quant_dtype, a_shape, a2_scale, a2_gscale), + _w1=FusedMoEQuantDesc(quant_dtype, w_shape, w1_scale, g1_alphas, + w1_zp, w1_bias), + _w2=FusedMoEQuantDesc(quant_dtype, w_shape, w2_scale, g2_alphas, + w2_zp, w2_bias), ) + assert quant_config.per_act_token_quant == per_act_token_quant + assert quant_config.per_out_ch_quant == per_out_ch_quant + assert quant_config.block_shape == block_shape + return quant_config + + +def fp8_w8a8_moe_quant_config( + w1_scale: torch.Tensor, + w2_scale: torch.Tensor, + a1_scale: Optional[torch.Tensor] = None, + a2_scale: Optional[torch.Tensor] = None, + per_act_token_quant: bool = False, + per_out_ch_quant: bool = False, + block_shape: Optional[list[int]] = None, +) -> FusedMoEQuantConfig: + """ + Construct a quant config for fp8 activations and fp8 weights. + """ + return FusedMoEQuantConfig.make(torch.float8_e4m3fn, + w1_scale=w1_scale, + w2_scale=w2_scale, + a1_scale=a1_scale, + a2_scale=a2_scale, + per_act_token_quant=per_act_token_quant, + per_out_ch_quant=per_out_ch_quant, + block_shape=block_shape) + + +def int8_w8a8_moe_quant_config( + w1_scale: torch.Tensor, + w2_scale: torch.Tensor, + a1_scale: Optional[torch.Tensor], + a2_scale: Optional[torch.Tensor], + per_act_token_quant: bool = False, +) -> FusedMoEQuantConfig: + """ + Construct a quant config for int8 activations and int8 weights. + """ + return FusedMoEQuantConfig.make( + torch.int8, + w1_scale=w1_scale, + w2_scale=w2_scale, + a1_scale=a1_scale, + a2_scale=a2_scale, + per_act_token_quant=per_act_token_quant, + per_out_ch_quant=False, + block_shape=None, + ) + + +def mxfp4_w4a4_moe_quant_config( + w1_scale: Union[torch.Tensor, "PrecisionConfig"], + w2_scale: Union[torch.Tensor, "PrecisionConfig"], + a1_scale: Optional[torch.Tensor] = None, + a2_scale: Optional[torch.Tensor] = None, + w1_bias: Optional[torch.Tensor] = None, + w2_bias: Optional[torch.Tensor] = None, + block_shape: Optional[list[int]] = None, +) -> FusedMoEQuantConfig: + """ + Construct a quant config for mxfp4 activations and mxfp4 weights. + """ + return FusedMoEQuantConfig.make( + "mxfp4", + w1_scale=w1_scale, + w2_scale=w2_scale, + a1_scale=a1_scale, + a2_scale=a2_scale, + w1_bias=w1_bias, + w2_bias=w2_bias, + per_act_token_quant=False, + per_out_ch_quant=False, + block_shape=block_shape, + ) + + +def nvfp4_moe_quant_config( + g1_alphas: torch.Tensor, + g2_alphas: torch.Tensor, + a1_gscale: torch.Tensor, + a2_gscale: torch.Tensor, + w1_scale: torch.Tensor, + w2_scale: torch.Tensor, +) -> FusedMoEQuantConfig: + """ + Construct a quant config for mxfp4 activations and nvp4 weights. + """ + return FusedMoEQuantConfig.make( + "nvfp4", + w1_scale=w1_scale, + w2_scale=w2_scale, + a1_gscale=a1_gscale, + a2_gscale=a2_gscale, + g1_alphas=g1_alphas, + g2_alphas=g2_alphas, + per_act_token_quant=False, + per_out_ch_quant=False, + block_shape=None, + ) + + +def int4_w4a16_moe_quant_config( + w1_scale: torch.Tensor, + w2_scale: torch.Tensor, + w1_zp: Optional[torch.Tensor], + w2_zp: Optional[torch.Tensor], + block_shape: Optional[list[int]] = None, +) -> FusedMoEQuantConfig: + """ + Construct a quant config for 16-bit float activations and int4 weights. + Note: Activations are pre-quantized. + """ + group_shape = GroupShape(*block_shape) if block_shape is not None else None + return FusedMoEQuantConfig( + _a1=FusedMoEQuantDesc(shape=group_shape), + _a2=FusedMoEQuantDesc(shape=group_shape), + _w1=FusedMoEQuantDesc("int4", group_shape, w1_scale, None, w1_zp), + _w2=FusedMoEQuantDesc("int4", group_shape, w2_scale, None, w2_zp), + ) + + +def int8_w8a16_moe_quant_config( + w1_scale: torch.Tensor, + w2_scale: torch.Tensor, + w1_zp: Optional[torch.Tensor], + w2_zp: Optional[torch.Tensor], + block_shape: Optional[list[int]] = None, +) -> FusedMoEQuantConfig: + """ + Construct a quant config for 16-bit float activations and int8 weights. + Note: Activations are pre-quantized. + """ + group_shape = GroupShape(*block_shape) if block_shape is not None else None + return FusedMoEQuantConfig( + _a1=FusedMoEQuantDesc(shape=group_shape), + _a2=FusedMoEQuantDesc(shape=group_shape), + _w1=FusedMoEQuantDesc(torch.int8, group_shape, w1_scale, None, w1_zp), + _w2=FusedMoEQuantDesc(torch.int8, group_shape, w2_scale, None, w2_zp), + ) + + +def biased_moe_quant_config( + w1_bias: Optional[torch.Tensor], + w2_bias: Optional[torch.Tensor], +) -> FusedMoEQuantConfig: + """ + Construct a quant config for unquantized activations with biases. + """ + return FusedMoEQuantConfig( + _a1=FusedMoEQuantDesc(), + _a2=FusedMoEQuantDesc(), + _w1=FusedMoEQuantDesc(bias=w1_bias), + _w2=FusedMoEQuantDesc(bias=w2_bias), + ) + + +# A FusedMoEQuantConfig constant for an unquantized MoE op. +FUSED_MOE_UNQUANTIZED_CONFIG: FusedMoEQuantConfig = FusedMoEQuantConfig.make() @dataclass @@ -315,8 +718,6 @@ class FusedMoEConfig: # The activation type. in_dtype: torch.dtype - quant_config: Optional[FusedMoEQuantConfig] = None - max_num_tokens: int = envs.VLLM_MOE_DP_CHUNK_SIZE has_bias: bool = False @@ -328,34 +729,6 @@ def __post_init__(self): assert self.max_num_tokens > 0 - @property - def quant_dtype(self) -> Union[torch.dtype, str, None]: - if self.quant_config is not None: - return self.quant_config.quant_dtype - else: - return None - - @property - def block_shape(self) -> Optional[list[int]]: - if self.quant_config is not None: - return self.quant_config.block_shape - else: - return None - - @property - def per_act_token_quant(self) -> bool: - if self.quant_config is not None: - return self.quant_config.per_act_token_quant - else: - return False - - @property - def per_out_ch_quant(self) -> bool: - if self.quant_config is not None: - return self.quant_config.per_out_ch_quant - else: - return False - @property def tp_size(self): return self.moe_parallel_config.tp_size @@ -401,97 +774,6 @@ def use_flashinfer_cutlass_kernels(self): """ Whether to use FlashInfer cutlass kernels for NVFP4 MoE. """ - return (self.quant_config is not None - and self.quant_config.quant_dtype == "nvfp4" - and envs.VLLM_USE_FLASHINFER_MOE_FP4 + return (envs.VLLM_USE_FLASHINFER_MOE_FP4 and has_flashinfer_cutlass_fused_moe() and envs.VLLM_FLASHINFER_MOE_BACKEND == "throughput") - - @staticmethod - def make( - num_experts: int, - experts_per_token: int, - hidden_dim: int, - num_local_experts: int, - moe_parallel_config: FusedMoEParallelConfig, - in_dtype: torch.dtype, - max_num_tokens: int = envs.VLLM_MOE_DP_CHUNK_SIZE, - quant_config: Optional[Union[FusedMoEQuantConfig, - QuantizationConfig]] = None, - has_bias: bool = False, - ) -> "FusedMoEConfig": - - _quant_config: Optional[FusedMoEQuantConfig] = None - - if quant_config is not None and isinstance(quant_config, - QuantizationConfig): - if hasattr(quant_config, 'weight_block_size'): - block_shape = quant_config.weight_block_size - else: - block_shape = None - per_act_token_quant = False - per_out_ch_quant = False - quant_dtype: Union[torch.dtype, str, None] = None - - input_quant = get_quant_config_input_quant(quant_config) - weight_quant = get_quant_config_weight_quant(quant_config) - - if input_quant is not None: - per_act_token_quant = (input_quant.strategy - == QuantizationStrategy.TOKEN - if input_quant is not None else False) - - if input_quant.num_bits == 8: - if input_quant.type == QuantizationType.FLOAT: - quant_dtype = torch.float8_e4m3fn - elif input_quant.type == QuantizationType.INT: - quant_dtype = torch.int8 - - from vllm.model_executor.layers.quantization.fp8 import Fp8Config - if quant_dtype is None and isinstance(quant_config, Fp8Config): - quant_dtype = torch.float8_e4m3fn - - from vllm.model_executor.layers.quantization.mxfp4 import ( - Mxfp4Config) - if (quant_dtype is None and isinstance(quant_config, Mxfp4Config) - and envs.VLLM_USE_FLASHINFER_MOE_MXFP4_MXFP8): - quant_dtype = "mxfp8" - - from vllm.model_executor.layers.quantization.modelopt import ( - ModelOptNvFp4Config) - if quant_dtype is None and isinstance(quant_config, - ModelOptNvFp4Config): - quant_dtype = "nvfp4" - - if weight_quant is not None: - per_out_ch_quant = ( - weight_quant.strategy == QuantizationStrategy.CHANNEL) - - if quant_dtype is not None: - _quant_config = FusedMoEQuantConfig( - quant_dtype=quant_dtype, - per_act_token_quant=per_act_token_quant, - per_out_ch_quant=per_out_ch_quant, - block_shape=block_shape, - ) - else: - _quant_config = FusedMoEQuantConfig() - if moe_parallel_config.dp_size > 1: - logger.warning_once("MoE DP setup unable to determine " - "quantization scheme or unsupported " - "quantization type. This model will " - "not run with DP enabled.") - else: - _quant_config = quant_config - - return FusedMoEConfig( - num_experts=num_experts, - experts_per_token=experts_per_token, - hidden_dim=hidden_dim, - num_local_experts=num_local_experts, - moe_parallel_config=moe_parallel_config, - in_dtype=in_dtype, - quant_config=_quant_config, - max_num_tokens=max_num_tokens, - has_bias=has_bias, - ) diff --git a/vllm/model_executor/layers/fused_moe/cutlass_moe.py b/vllm/model_executor/layers/fused_moe/cutlass_moe.py index 95d23ec0346c..957ffca0d124 100644 --- a/vllm/model_executor/layers/fused_moe/cutlass_moe.py +++ b/vllm/model_executor/layers/fused_moe/cutlass_moe.py @@ -211,21 +211,14 @@ class CutlassExpertsFp8Base(mk.FusedMoEPermuteExpertsUnpermute): def __init__( self, out_dtype: Optional[torch.dtype], - per_act_token_quant: bool, - per_out_ch_quant: bool, ab_strides1: torch.Tensor, ab_strides2: torch.Tensor, c_strides1: torch.Tensor, c_strides2: torch.Tensor, - block_shape: Optional[list[int]] = None, + quant_config: FusedMoEQuantConfig, ): - super().__init__( - FusedMoEQuantConfig( - quant_dtype=torch.float8_e4m3fn, - per_act_token_quant=per_act_token_quant, - per_out_ch_quant=per_out_ch_quant, - block_shape=block_shape, - )) + assert quant_config.use_fp8_w8a8 + super().__init__(quant_config) self.out_dtype = out_dtype self.ab_strides1 = ab_strides1 self.ab_strides2 = ab_strides2 @@ -247,19 +240,14 @@ def apply( activation: str, global_num_experts: int, expert_map: Optional[torch.Tensor], - w1_scale: Optional[torch.Tensor], - w2_scale: Optional[torch.Tensor], - w1_zp: Optional[torch.Tensor], - w2_zp: Optional[torch.Tensor], a1q_scale: Optional[torch.Tensor], - a2_scale: Optional[torch.Tensor], workspace13: torch.Tensor, workspace2: torch.Tensor, expert_tokens_meta: Optional[mk.ExpertTokensMetadata], apply_router_weight_on_input: bool, ): - assert w1_zp is None, "w1_zp is not supported in CUTLASS MoE" - assert w2_zp is None, "w2_zp is not supported in CUTLASS MoE" + assert self.w1_zp is None, "w1_zp is not supported in CUTLASS MoE" + assert self.w2_zp is None, "w2_zp is not supported in CUTLASS MoE" expert_num_tokens = None if expert_tokens_meta is not None: @@ -273,9 +261,10 @@ def apply( in_dtype = hidden_states.dtype run_cutlass_moe_fp8( output, hidden_states, w1, w2, topk_ids, activation_callable, - global_num_experts, expert_map, w1_scale, w2_scale, a1q_scale, - a2_scale, self.ab_strides1, self.ab_strides2, self.c_strides1, - self.c_strides2, workspace13, workspace2, expert_num_tokens, + global_num_experts, expert_map, self.w1_scale, self.w2_scale, + a1q_scale, self.a2_scale, self.ab_strides1, self.ab_strides2, + self.c_strides1, self.c_strides2, workspace13, workspace2, + expert_num_tokens, self.out_dtype if self.out_dtype is not None else in_dtype, self.per_act_token_quant, self.per_out_ch_quant, use_batched_format, topk_weights) @@ -286,23 +275,19 @@ class CutlassExpertsFp8(CutlassExpertsFp8Base): def __init__( self, out_dtype: Optional[torch.dtype], - per_act_token_quant: bool, - per_out_ch_quant: bool, ab_strides1: torch.Tensor, ab_strides2: torch.Tensor, c_strides1: torch.Tensor, c_strides2: torch.Tensor, - block_shape: Optional[list[int]] = None, + quant_config: FusedMoEQuantConfig, ): super().__init__( out_dtype, - per_act_token_quant, - per_out_ch_quant, ab_strides1, ab_strides2, c_strides1, c_strides2, - block_shape, + quant_config, ) @property @@ -348,23 +333,19 @@ def __init__( max_experts_per_worker: int, num_dispatchers: int, out_dtype: Optional[torch.dtype], - per_act_token_quant: bool, - per_out_ch_quant: bool, ab_strides1: torch.Tensor, ab_strides2: torch.Tensor, c_strides1: torch.Tensor, c_strides2: torch.Tensor, - block_shape: Optional[list[int]] = None, + quant_config: FusedMoEQuantConfig, ): super().__init__( out_dtype, - per_act_token_quant, - per_out_ch_quant, ab_strides1, ab_strides2, c_strides1, c_strides2, - block_shape, + quant_config, ) assert max_experts_per_worker > 0 self.max_experts_per_worker = max_experts_per_worker @@ -414,16 +395,12 @@ def cutlass_moe_fp8( w2_q: torch.Tensor, topk_weights: torch.Tensor, topk_ids: torch.Tensor, - w1_scale: torch.Tensor, - w2_scale: torch.Tensor, ab_strides1: torch.Tensor, ab_strides2: torch.Tensor, c_strides1: torch.Tensor, c_strides2: torch.Tensor, - per_act_token: Optional[bool] = None, + quant_config: FusedMoEQuantConfig, activation: str = "silu", - a1_scale: Optional[torch.Tensor] = None, - a2_scale: Optional[torch.Tensor] = None, expert_map: Optional[torch.Tensor] = None, apply_router_weight_on_input: bool = False, global_num_experts: int = -1, @@ -475,10 +452,18 @@ def cutlass_moe_fp8( Returns: - torch.Tensor: The fp16 output tensor after applying the MoE layer. """ - if per_act_token is None: - per_act_token = a1_scale.numel() != 1 if a1_scale is not None else ( - a2_scale.numel() != 1 if a2_scale is not None else False) - per_out_ch = w1_scale.numel() != w1_q.size(0) + assert quant_config is not None + + if quant_config.a1_scale is not None: + assert (quant_config.per_act_token_quant == + quant_config.a1_scale.numel() != 1) + if quant_config.a2_scale is not None: + assert (quant_config.per_act_token_quant == + quant_config.a2_scale.numel() != 1) + + assert (quant_config.w1_scale is None + or (quant_config.per_out_ch_quant == (quant_config.w1_scale.size(1) + == w1_q.size(1)))) num_experts = global_num_experts if global_num_experts != -1 else w1_q.size( 0) @@ -487,12 +472,11 @@ def cutlass_moe_fp8( MoEPrepareAndFinalizeNoEP(), CutlassExpertsFp8( out_dtype=a.dtype, - per_act_token_quant=per_act_token, - per_out_ch_quant=per_out_ch, ab_strides1=ab_strides1, ab_strides2=ab_strides2, c_strides1=c_strides1, c_strides2=c_strides2, + quant_config=quant_config, ), ) @@ -502,14 +486,9 @@ def cutlass_moe_fp8( w2_q, topk_weights, topk_ids, - False, - activation, - num_experts, - expert_map, - w1_scale, - w2_scale, - a1_scale=a1_scale, - a2_scale=a2_scale, + activation=activation, + global_num_experts=num_experts, + expert_map=expert_map, apply_router_weight_on_input=apply_router_weight_on_input, ) @@ -542,7 +521,7 @@ def run_cutlass_moe_fp4( ) -> None: """ MoE implementation for FP4 Inputs - + # Gemm 1 a: Input tensor: [m, k] (half/bfloat16) a1_gscale: Activation scale per expert: [e] (float32) @@ -552,16 +531,16 @@ def run_cutlass_moe_fp4( full precision) w1_blockscale: [e, 2 * n, k // block_size] (float8_e4m3) (Block size = 16 for NVFP4) - + # Gemm 2 a2_gscale: Activation scale per expert: [e] w2(down projection) (not an argument to cutlass_moe_fp4): [e, k, n] w2_fp4: [e, k, n // 2], dtype: torch.uint8 (stacked E2M1) w2_blockscale: [e, k, n // block_size], dtype: float8_e4m3 - + topk_weights: [m, topk] dtype: float8 topk_ids: [m, topk] dtype: float8 - + m, n, k: Unquantized weight shapes, dtype: int e: number of experts, dtype: int @@ -652,42 +631,21 @@ def run_cutlass_moe_fp4( return +# Split into batched and non-batched class CutlassExpertsFp4(mk.FusedMoEPermuteExpertsUnpermute): def __init__( self, - g1_alphas: torch.Tensor, - g2_alphas: torch.Tensor, - a1_gscale: torch.Tensor, - a2_gscale: torch.Tensor, max_experts_per_worker: int, out_dtype: torch.dtype, - per_act_token_quant: bool, - per_out_ch_quant: bool, - block_shape: Optional[list[int]] = None, + quant_config: FusedMoEQuantConfig, use_batched_format: bool = False, ): - super().__init__( - # NVFP4 requires two levels of quantization, which involves - # computing some scaling factors dynamically. This makes it - # incompatible with the typical prepare -> MoE -> finalize - # pipeline. Move the quantization logic into the MoE body. - FusedMoEQuantConfig( - quant_dtype=None, # skip quantization in prepare/finalize - per_act_token_quant=per_act_token_quant, - per_out_ch_quant=per_out_ch_quant, - block_shape=block_shape, - )) + super().__init__(quant_config) self.max_experts_per_worker = max_experts_per_worker self.out_dtype = out_dtype self.use_batched_format = use_batched_format - # TODO(bnell): put this stuff into quant config? - self.g1_alphas = g1_alphas - self.g2_alphas = g2_alphas - self.a1_gscale = a1_gscale - self.a2_gscale = a2_gscale - @property def activation_formats( self @@ -746,12 +704,7 @@ def apply( activation: str, global_num_experts: int, expert_map: Optional[torch.Tensor], - w1_scale: torch.Tensor, - w2_scale: torch.Tensor, - w1_zp: Optional[torch.Tensor], - w2_zp: Optional[torch.Tensor], - a1q_scale: Optional[torch.Tensor], - a2_scale: torch.Tensor, + a1q_scale: Optional[torch.Tensor], # unused workspace13: Optional[torch.Tensor], workspace2: Optional[torch.Tensor], expert_tokens_meta: Optional[mk.ExpertTokensMetadata], @@ -765,11 +718,11 @@ def apply( a=hidden_states, a1_gscale=self.a1_gscale, w1_fp4=w1, - w1_blockscale=w1_scale, + w1_blockscale=self.w1_scale, w1_alphas=self.g1_alphas, a2_gscale=self.a2_gscale, w2_fp4=w2, - w2_blockscale=w2_scale, + w2_blockscale=self.w2_scale, w2_alphas=self.g2_alphas, topk_weights=topk_weights, topk_ids=topk_ids, @@ -788,14 +741,9 @@ def cutlass_moe_fp4( a: torch.Tensor, w1_fp4: torch.Tensor, w2_fp4: torch.Tensor, - w1_blockscale: torch.Tensor, - w2_blockscale: torch.Tensor, - g1_alphas: torch.Tensor, - g2_alphas: torch.Tensor, - a1_gscale: torch.Tensor, - a2_gscale: torch.Tensor, topk_weights: torch.Tensor, topk_ids: torch.Tensor, + quant_config: FusedMoEQuantConfig, m: int, n: int, k: int, @@ -805,17 +753,31 @@ def cutlass_moe_fp4( assert expert_map is None, ("Expert Parallelism / expert_map " "is currently not supported for " "ModelOptNvFp4FusedMoE's cutlass_moe_fp4.") + + # TODO(bnell): this feels a bit hacky + # NVFP4 requires two levels of quantization, which involves + # computing some scaling factors dynamically. This makes it + # incompatible with the typical prepare -> MoE -> finalize + # pipeline. Move the quantization logic into the MoE body. + quant_config = FusedMoEQuantConfig.make( + quant_dtype=None, # skip quantization in prepare/finalize + per_act_token_quant=quant_config.per_act_token_quant, + per_out_ch_quant=quant_config.per_out_ch_quant, + block_shape=quant_config.block_shape, + g1_alphas=quant_config.g1_alphas, + g2_alphas=quant_config.g2_alphas, + a1_gscale=quant_config.a1_gscale, + a2_gscale=quant_config.a2_gscale, + w1_scale=quant_config.w1_scale, + w2_scale=quant_config.w2_scale, + ) + fn = mk.FusedMoEModularKernel( MoEPrepareAndFinalizeNoEP(), CutlassExpertsFp4( - g1_alphas, - g2_alphas, - a1_gscale, - a2_gscale, max_experts_per_worker=e, out_dtype=a.dtype, - per_act_token_quant=False, - per_out_ch_quant=False, + quant_config=quant_config, use_batched_format=False, ), ) @@ -830,10 +792,6 @@ def cutlass_moe_fp4( activation="silu", global_num_experts=e, expert_map=None, - w1_scale=w1_blockscale, - w2_scale=w2_blockscale, - a1_scale=None, - a2_scale=None, apply_router_weight_on_input=apply_router_weight_on_input, ) @@ -891,6 +849,7 @@ def _valid_cutlass_block_scaled_grouped_gemm_shape(N: int, K: int): return True +# TODO(bnell): would be nice combine/integrate with regular cutlass_fp8. def run_cutlass_block_scaled_fused_experts( a: torch.Tensor, w1: torch.Tensor, diff --git a/vllm/model_executor/layers/fused_moe/deep_gemm_moe.py b/vllm/model_executor/layers/fused_moe/deep_gemm_moe.py index c0bfda73eee0..8830b95df7cf 100644 --- a/vllm/model_executor/layers/fused_moe/deep_gemm_moe.py +++ b/vllm/model_executor/layers/fused_moe/deep_gemm_moe.py @@ -1,6 +1,5 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -import functools from typing import Optional import torch @@ -9,9 +8,11 @@ import vllm.envs as env import vllm.model_executor.layers.fused_moe.modular_kernel as mk from vllm.logger import init_logger -from vllm.model_executor.layers.fused_moe.config import FusedMoEQuantConfig +from vllm.model_executor.layers.fused_moe.config import ( + FusedMoEQuantConfig, fp8_w8a8_moe_quant_config) from vllm.model_executor.layers.fused_moe.deep_gemm_utils import ( - compute_aligned_M, deepgemm_moe_permute, deepgemm_unpermute_and_reduce) + compute_aligned_M, deep_gemm_block_shape, deepgemm_moe_permute, + deepgemm_unpermute_and_reduce) from vllm.model_executor.layers.fused_moe.prepare_finalize import ( MoEPrepareAndFinalizeNoEP) from vllm.model_executor.layers.fused_moe.topk_weight_and_reduce import ( @@ -25,14 +26,6 @@ logger = init_logger(__name__) -@functools.cache -def deep_gemm_block_shape() -> list[int]: - # Lazy import to avoid CUDA initialization problems. - import deep_gemm as dg - block = dg.get_m_alignment_for_contiguous_layout() - return [block, block] - - def _valid_deep_gemm_shape(M: int, N: int, K: int) -> bool: align = deep_gemm_block_shape()[0] return align <= M and N % align == 0 and K % align == 0 @@ -163,13 +156,12 @@ def _warmup(w: torch.Tensor, w_scale: torch.Tensor): class DeepGemmExperts(mk.FusedMoEPermuteExpertsUnpermute): - def __init__(self): - super().__init__( - FusedMoEQuantConfig( - quant_dtype=torch.float8_e4m3fn, - per_act_token_quant=False, - block_shape=deep_gemm_block_shape(), - )) + def __init__(self, quant_config: FusedMoEQuantConfig): + super().__init__(quant_config) + assert quant_config.block_shape == deep_gemm_block_shape() + assert quant_config.quant_dtype == torch.float8_e4m3fn + assert not quant_config.per_act_token_quant + assert not quant_config.per_out_ch_quant @property def activation_formats( @@ -221,21 +213,17 @@ def apply( activation: str, global_num_experts: int, expert_map: Optional[torch.Tensor], - w1_scale: Optional[torch.Tensor], - w2_scale: Optional[torch.Tensor], - w1_zp: Optional[torch.Tensor], - w2_zp: Optional[torch.Tensor], a1q_scale: Optional[torch.Tensor], - a2_scale: Optional[torch.Tensor], workspace13: torch.Tensor, workspace2: torch.Tensor, expert_tokens_meta: Optional[mk.ExpertTokensMetadata], apply_router_weight_on_input: bool, ): - assert self.block_shape is not None assert a1q_scale is not None - assert w1_scale is not None - assert w2_scale is not None + assert self.a2_scale is None + assert self.block_shape is not None + assert self.w1_scale is not None + assert self.w2_scale is not None a1q = hidden_states _, N, K = w1.size() @@ -270,7 +258,7 @@ def apply( aq_out=a1q_perm) assert a1q.size(0) == M_sum - m_grouped_fp8_gemm_nt_contiguous((a1q, a1q_scale), (w1, w1_scale), + m_grouped_fp8_gemm_nt_contiguous((a1q, a1q_scale), (w1, self.w1_scale), mm1_out, expert_ids) self.activation(activation, act_out, mm1_out.view(-1, N)) @@ -281,7 +269,7 @@ def apply( column_major_scales=True, out_q=quant_out) - m_grouped_fp8_gemm_nt_contiguous((a2q, a2q_scale), (w2, w2_scale), + m_grouped_fp8_gemm_nt_contiguous((a2q, a2q_scale), (w2, self.w2_scale), mm2_out, expert_ids) if apply_router_weight_on_input: @@ -348,9 +336,16 @@ def deep_gemm_moe_fp8( Returns: - torch.Tensor: The bfloat16 output tensor after applying the MoE layer. """ + quant_config = fp8_w8a8_moe_quant_config( + w1_scale=w1_scale, + w2_scale=w2_scale, + a1_scale=a1_scale, + a2_scale=a2_scale, + block_shape=deep_gemm_block_shape()) + fn = mk.FusedMoEModularKernel( MoEPrepareAndFinalizeNoEP(), - DeepGemmExperts(), + DeepGemmExperts(quant_config), ) return fn( hidden_states, @@ -358,13 +353,9 @@ def deep_gemm_moe_fp8( w2, topk_weights, topk_ids, - inplace, - activation, - global_num_experts, - expert_map, - w1_scale=w1_scale, - w2_scale=w2_scale, - a1_scale=a1_scale, - a2_scale=a2_scale, + inplace=inplace, + activation=activation, + global_num_experts=global_num_experts, + expert_map=expert_map, apply_router_weight_on_input=apply_router_weight_on_input, ) diff --git a/vllm/model_executor/layers/fused_moe/deepep_ht_prepare_finalize.py b/vllm/model_executor/layers/fused_moe/deepep_ht_prepare_finalize.py index 92cbb1742974..5d6b9c87a6b7 100644 --- a/vllm/model_executor/layers/fused_moe/deepep_ht_prepare_finalize.py +++ b/vllm/model_executor/layers/fused_moe/deepep_ht_prepare_finalize.py @@ -183,8 +183,6 @@ def supports_async(self) -> bool: def prepare_async( self, a1: torch.Tensor, - a1_scale: Optional[torch.Tensor], - a2_scale: Optional[torch.Tensor], topk_weights: torch.Tensor, topk_ids: torch.Tensor, num_experts: int, @@ -204,7 +202,7 @@ def prepare_async( # Quant and Dispatch a1q, a1q_scale = moe_kernel_quantize_input( a1, - a1_scale, + quant_config.a1_scale, quant_dtype=quant_config.quant_dtype, per_act_token_quant=quant_config.per_act_token_quant, block_shape=quant_config.block_shape, @@ -215,7 +213,7 @@ def prepare_async( else: a1q = a1 a1q_scale = None - a1_post_scale = a1_scale + a1_post_scale = quant_config.a1_scale return (lambda *args: None, self._do_dispatch(tokens=a1q, @@ -229,8 +227,6 @@ def prepare_async( def prepare( self, a1: torch.Tensor, - a1_scale: Optional[torch.Tensor], - a2_scale: Optional[torch.Tensor], topk_weights: torch.Tensor, topk_ids: torch.Tensor, num_experts: int, @@ -238,9 +234,8 @@ def prepare( apply_router_weight_on_input: bool, quant_config: FusedMoEQuantConfig, ) -> mk.PrepareResultType: - (_, receiver) = self.prepare_async(a1, a1_scale, a2_scale, - topk_weights, topk_ids, num_experts, - expert_map, + (_, receiver) = self.prepare_async(a1, topk_weights, topk_ids, + num_experts, expert_map, apply_router_weight_on_input, quant_config) return receiver() diff --git a/vllm/model_executor/layers/fused_moe/deepep_ll_prepare_finalize.py b/vllm/model_executor/layers/fused_moe/deepep_ll_prepare_finalize.py index 61f8297f0f14..01df7770463d 100644 --- a/vllm/model_executor/layers/fused_moe/deepep_ll_prepare_finalize.py +++ b/vllm/model_executor/layers/fused_moe/deepep_ll_prepare_finalize.py @@ -77,15 +77,13 @@ def topk_indices_dtype(self) -> Optional[torch.dtype]: def _do_quant( self, x: Union[torch.Tensor, tuple[torch.Tensor, torch.Tensor]], - a1_scale: Optional[torch.Tensor], a1_dtype: torch.dtype, - quant_dtype: Union[torch.dtype, str, None], - per_act_token_quant: bool, - block_shape: Optional[list[int]], + quant_config: FusedMoEQuantConfig, ) -> tuple[torch.Tensor, Optional[torch.Tensor]]: - block_k = block_shape[1] if block_shape is not None else None if self.use_fp8_dispatch: + block_k = quant_config.block_shape[ + 1] if quant_config.block_shape is not None else None if block_k == DEEPEP_QUANT_BLOCK_SIZE: # DeepEP kernels did the quantization for us. x, x_scales = x @@ -101,12 +99,12 @@ def _do_quant( # TODO (varun): Optimization - Use a batched version of quant x = x.view((-1, hidden_dim)) - x, x_scales = moe_kernel_quantize_input(x, a1_scale, quant_dtype, - per_act_token_quant, - block_shape) + x, x_scales = moe_kernel_quantize_input( + x, quant_config.a1_scale, quant_config.quant_dtype, + quant_config.per_act_token_quant, quant_config.block_shape) x = x.view((num_experts, -1, hidden_dim)) - if quant_dtype is not None: + if quant_config.quant_dtype is not None: assert x_scales is not None x_scales = normalize_batched_scales_shape(x_scales, num_experts) @@ -118,8 +116,6 @@ def supports_async(self) -> bool: def prepare_async( self, a1: torch.Tensor, - a1_scale: Optional[torch.Tensor], - a2_scale: Optional[torch.Tensor], topk_weights: torch.Tensor, topk_ids: torch.Tensor, num_experts: int, @@ -139,9 +135,10 @@ def prepare_async( assert hidden_size % 128 == 0, \ "DeepEP kernels quantize the inputs in blocks of shape 128" - has_per_token_scales = a1_scale.numel( - ) != 1 if a1_scale is not None else ( - a2_scale.numel() != 1 if a2_scale is not None else False) + has_per_token_scales = quant_config.a1_scale.numel( + ) != 1 if quant_config.a1_scale is not None else ( + quant_config.a2_scale.numel() != 1 + if quant_config.a2_scale is not None else False) assert not has_per_token_scales, ( "low_latency kernels doesn't support dispatching per-token scales") @@ -163,20 +160,21 @@ def prepare_async( return_recv_hook=True) self.handles[a2a_idx] = handle - return (hook, lambda: self._receiver(expert_x, expert_num_tokens, - a1_scale, a1.dtype, quant_config)) + return ( + hook, + lambda: self._receiver(expert_x, expert_num_tokens, quant_config. + a1_scale, a1.dtype, quant_config)) def _receiver( self, expert_x: Union[torch.Tensor, tuple[torch.Tensor, torch.Tensor]], expert_num_tokens: torch.Tensor, - a1_scale, - a1_dtype, + a1_scale: Optional[torch.Tensor], + a1_dtype: torch.dtype, quant_config: FusedMoEQuantConfig, ) -> mk.PrepareResultType: - expert_x, expert_x_scale = self._do_quant( - expert_x, a1_scale, a1_dtype, quant_config.quant_dtype, - quant_config.per_act_token_quant, quant_config.block_shape) + expert_x, expert_x_scale = self._do_quant(expert_x, a1_dtype, + quant_config) expert_tokens_meta = mk.ExpertTokensMetadata( expert_num_tokens=expert_num_tokens, expert_num_tokens_cpu=None) @@ -186,8 +184,6 @@ def _receiver( def prepare( self, a1: torch.Tensor, - a1_scale: Optional[torch.Tensor], - a2_scale: Optional[torch.Tensor], topk_weights: torch.Tensor, topk_ids: torch.Tensor, num_experts: int, @@ -195,8 +191,7 @@ def prepare( apply_router_weight_on_input: bool, quant_config: FusedMoEQuantConfig, ) -> mk.PrepareResultType: - hook, receiver = self.prepare_async(a1, a1_scale, a2_scale, - topk_weights, topk_ids, + hook, receiver = self.prepare_async(a1, topk_weights, topk_ids, num_experts, expert_map, apply_router_weight_on_input, quant_config) diff --git a/vllm/model_executor/layers/fused_moe/flashinfer_cutlass_moe.py b/vllm/model_executor/layers/fused_moe/flashinfer_cutlass_moe.py index feab3f74cac5..6eeec18a6ec8 100644 --- a/vllm/model_executor/layers/fused_moe/flashinfer_cutlass_moe.py +++ b/vllm/model_executor/layers/fused_moe/flashinfer_cutlass_moe.py @@ -1,6 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -from typing import Optional, Union +from typing import Optional import torch @@ -44,33 +44,20 @@ class FlashInferExperts(mk.FusedMoEPermuteExpertsUnpermute): def __init__( self, - g1_alphas: torch.Tensor, - g2_alphas: torch.Tensor, - a1_gscale: torch.Tensor, - a2_gscale: torch.Tensor, out_dtype: torch.dtype, - quant_dtype: Union[torch.dtype, str, None], + quant_config: FusedMoEQuantConfig, ep_rank: int = 0, ep_size: int = 1, tp_rank: int = 0, tp_size: int = 1, ): - super().__init__( - FusedMoEQuantConfig( - quant_dtype=quant_dtype, - per_act_token_quant=False, - block_shape=None, - )) - assert quant_dtype in ("nvfp4", torch.float8_e4m3fn), ( + super().__init__(quant_config) + assert quant_config.quant_dtype in ("nvfp4", torch.float8_e4m3fn), ( "Only nvfp4,fp8 quantization are currently supported.") self.ep_rank = ep_rank self.ep_size = ep_size self.tp_rank = tp_rank self.tp_size = tp_size - self.g1_alphas = g1_alphas - self.g2_alphas = g2_alphas - self.a1_gscale = a1_gscale - self.a2_gscale = a2_gscale self.out_dtype = out_dtype @property @@ -141,12 +128,7 @@ def apply( activation: str, global_num_experts: int, expert_map: Optional[torch.Tensor], - w1_scale: Optional[torch.Tensor], - w2_scale: Optional[torch.Tensor], - w1_zp: Optional[torch.Tensor], - w2_zp: Optional[torch.Tensor], a1q_scale: Optional[torch.Tensor], - a2_scale: Optional[torch.Tensor], # Not used workspace13: Optional[torch.Tensor], workspace2: Optional[torch.Tensor], expert_tokens_meta: Optional[mk.ExpertTokensMetadata], @@ -162,17 +144,17 @@ def apply( fc2_expert_weights = w2 else: # Ensure w1_scale and w2_scale are not None before calling view - assert w1_scale is not None and w2_scale is not None, ( + assert self.w1_scale is not None and self.w2_scale is not None, ( "w1_scale and w2_scale must not " "be None for FlashInferExperts") # Flashinfer CUTLASS kernel takes scalar global scales, # min because inv_scale. quant_scales = [ self.a1_gscale, - w1_scale.view(torch.int32), + self.w1_scale.view(torch.int32), self.g1_alphas, self.a2_gscale, - w2_scale.view(torch.int32), + self.w2_scale.view(torch.int32), self.g2_alphas, ] # FlashInfer API requires weight to be long for nvfp4 @@ -202,12 +184,7 @@ def flashinfer_cutlass_moe_fp4( w2: torch.Tensor, topk_weights: torch.Tensor, topk_ids: torch.Tensor, - w1_scale: torch.Tensor, - w2_scale: torch.Tensor, - g1_alphas: torch.Tensor, - g2_alphas: torch.Tensor, - a1_gscale: torch.Tensor, - a2_gscale: torch.Tensor, + quant_config: FusedMoEQuantConfig, inplace: bool = False, activation: str = "silu", global_num_experts: int = -1, @@ -216,15 +193,10 @@ def flashinfer_cutlass_moe_fp4( ) -> torch.Tensor: fused_experts = mk.FusedMoEModularKernel( - FlashInferCutlassMoEPrepareAndFinalize(use_dp=False, - a1_gscale=a1_gscale), + FlashInferCutlassMoEPrepareAndFinalize(use_dp=False), FlashInferExperts( - g1_alphas=g1_alphas, - g2_alphas=g2_alphas, - a1_gscale=a1_gscale, - a2_gscale=a2_gscale, out_dtype=hidden_states.dtype, - quant_dtype="nvfp4", + quant_config=quant_config, )) return fused_experts( @@ -237,7 +209,5 @@ def flashinfer_cutlass_moe_fp4( activation=activation, global_num_experts=global_num_experts, expert_map=expert_map, - w1_scale=w1_scale, - w2_scale=w2_scale, apply_router_weight_on_input=apply_router_weight_on_input, ) diff --git a/vllm/model_executor/layers/fused_moe/flashinfer_cutlass_prepare_finalize.py b/vllm/model_executor/layers/fused_moe/flashinfer_cutlass_prepare_finalize.py index 157cb36d4ffd..8c7eff59f3cd 100644 --- a/vllm/model_executor/layers/fused_moe/flashinfer_cutlass_prepare_finalize.py +++ b/vllm/model_executor/layers/fused_moe/flashinfer_cutlass_prepare_finalize.py @@ -22,13 +22,11 @@ class FlashInferCutlassMoEPrepareAndFinalize(mk.FusedMoEPrepareAndFinalize): def __init__( self, use_dp: bool, - a1_gscale: Optional[torch.Tensor], num_dispatchers: int = 1, ): super().__init__() self.num_dispatchers_ = num_dispatchers self.use_dp = use_dp - self.a1_gscale = a1_gscale self.local_tokens = None @property @@ -47,14 +45,11 @@ def num_dispatchers(self) -> int: def prepare( self, a1: torch.Tensor, - a1_scale: Optional[torch.Tensor], # Not used - a2_scale: Optional[torch.Tensor], # Not used topk_weights: torch.Tensor, topk_ids: torch.Tensor, num_experts: int, expert_map: Optional[torch.Tensor], apply_router_weight_on_input: bool, - # TODO(bnell): use quant_config + scales instead of ctor args quant_config: FusedMoEQuantConfig, ) -> mk.PrepareResultType: @@ -67,7 +62,7 @@ def prepare( a1q, a1q_scale = moe_kernel_quantize_input( a1, - self.a1_gscale, + quant_config.a1_gscale, quant_config.quant_dtype, quant_config.per_act_token_quant, quant_config.block_shape, diff --git a/vllm/model_executor/layers/fused_moe/flashinfer_trtllm_moe.py b/vllm/model_executor/layers/fused_moe/flashinfer_trtllm_moe.py new file mode 100644 index 000000000000..e358143fac7c --- /dev/null +++ b/vllm/model_executor/layers/fused_moe/flashinfer_trtllm_moe.py @@ -0,0 +1,185 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project +from typing import List # noqa: UP035 +from typing import Optional + +import torch + +from vllm.model_executor.layers.fused_moe.utils import ( + moe_kernel_quantize_input) +from vllm.model_executor.layers.quantization.utils.flashinfer_utils import ( + calculate_tile_tokens_dim) +from vllm.model_executor.layers.quantization.utils.fp8_utils import ( + per_token_group_quant_fp8) +from vllm.utils import direct_register_custom_op + + +def flashinfer_fused_moe_blockscale_fp8( + routing_logits: torch.Tensor, + routing_bias: torch.Tensor, + x: torch.Tensor, + w13_weight: torch.Tensor, + w13_weight_scale_inv: torch.Tensor, + w2_weight: torch.Tensor, + w2_weight_scale_inv: torch.Tensor, + global_num_experts: int, + top_k: int, + num_expert_group: int, + topk_group: int, + intermediate_size: int, + expert_offset: int, + local_num_experts: int, + block_shape: List[int], #noqa: UP006 + routed_scaling: float = 1.0) -> torch.Tensor: + from vllm.utils.flashinfer import flashinfer_trtllm_fp8_block_scale_moe + assert top_k <= global_num_experts + assert top_k <= 8 + assert topk_group <= 4 + assert global_num_experts > num_expert_group + assert global_num_experts % num_expert_group == 0 + assert global_num_experts % 4 == 0 + assert top_k < (topk_group * global_num_experts / num_expert_group) + assert block_shape == [128, 128] + + a_q, a_sf = per_token_group_quant_fp8(x, block_shape[1]) + # NOTE: scales of hidden states have to be transposed! + a_sf_t = a_sf.t().contiguous() + return flashinfer_trtllm_fp8_block_scale_moe( + routing_logits=routing_logits, + routing_bias=routing_bias, + hidden_states=a_q, + hidden_states_scale=a_sf_t, + gemm1_weights=w13_weight, + gemm1_weights_scale=w13_weight_scale_inv, + gemm2_weights=w2_weight, + gemm2_weights_scale=w2_weight_scale_inv, + num_experts=global_num_experts, + top_k=top_k, + n_group=num_expert_group, + topk_group=topk_group, + intermediate_size=intermediate_size, + local_expert_offset=expert_offset, + local_num_experts=local_num_experts, + routed_scaling_factor=routed_scaling, + tile_tokens_dim=calculate_tile_tokens_dim(x.shape[0], top_k, + global_num_experts), + routing_method_type=2, # DeepSeek-styled routing method + use_shuffled_weight=False, + ) + + +def flashinfer_fused_moe_blockscale_fp8_fake( + routing_logits: torch.Tensor, + routing_bias: torch.Tensor, + x: torch.Tensor, + w13_weight: torch.Tensor, + w13_weight_scale_inv: torch.Tensor, + w2_weight: torch.Tensor, + w2_weight_scale_inv: torch.Tensor, + global_num_experts: int, + top_k: int, + num_expert_group: int, + topk_group: int, + intermediate_size: int, + expert_offset: int, + local_num_experts: int, + block_shape: list[int], + routed_scaling: float = 1.0) -> torch.Tensor: + return torch.empty_like(x) + + +# TODO(bnell): Does this really need to be a torch.op? +direct_register_custom_op( + op_name="flashinfer_fused_moe_blockscale_fp8", + op_func=flashinfer_fused_moe_blockscale_fp8, + mutates_args=[], + fake_impl=flashinfer_fused_moe_blockscale_fp8_fake, + tags=(torch.Tag.needs_fixed_stride_order, ), +) + + +def flashinfer_fused_moe_per_tensor_scale_fp8( + routing_logits: torch.Tensor, + routing_bias: Optional[torch.Tensor], + hidden_states: torch.Tensor, + input_scale: torch.Tensor, + gemm1_weights: torch.Tensor, + gemm2_weights: torch.Tensor, + output1_scales_scalar: torch.Tensor, + output1_scales_gate_scalar: torch.Tensor, + output2_scales_scalar: torch.Tensor, + num_experts: int, + top_k: int, + num_expert_group: Optional[int], + topk_group: Optional[int], + intermediate_size: int, + local_expert_offset: int, + local_num_experts: int, + use_routing_scales_on_input: bool, + routing_method_type: int, + routed_scaling_factor: float = 1.0) -> torch.Tensor: + num_expert_group = num_expert_group if num_expert_group is not None else 0 + topk_group = topk_group if topk_group is not None else 0 + + quant_hidden_states, _ = moe_kernel_quantize_input( + hidden_states, + input_scale, + quant_dtype=torch.float8_e4m3fn, + per_act_token_quant=False) + + from vllm.utils.flashinfer import ( + flashinfer_trtllm_fp8_per_tensor_scale_moe) + return flashinfer_trtllm_fp8_per_tensor_scale_moe( + routing_logits=routing_logits, + routing_bias=routing_bias, + hidden_states=quant_hidden_states, + gemm1_weights=gemm1_weights, + output1_scales_scalar=output1_scales_scalar, + output1_scales_gate_scalar=output1_scales_gate_scalar, + gemm2_weights=gemm2_weights, + output2_scales_scalar=output2_scales_scalar, + num_experts=num_experts, + top_k=top_k, + n_group=num_expert_group, + topk_group=topk_group, + intermediate_size=intermediate_size, + local_expert_offset=local_expert_offset, + local_num_experts=local_num_experts, + routed_scaling_factor=routed_scaling_factor, + use_routing_scales_on_input=use_routing_scales_on_input, + tile_tokens_dim=calculate_tile_tokens_dim(hidden_states.shape[0], + top_k, num_experts), + routing_method_type=routing_method_type) + + +def flashinfer_fused_moe_per_tensor_scale_fp8_fake( + routing_logits: torch.Tensor, + routing_bias: Optional[torch.Tensor], + hidden_states: torch.Tensor, + input_scale: torch.Tensor, + gemm1_weights: torch.Tensor, + gemm2_weights: torch.Tensor, + output1_scales_scalar: torch.Tensor, + output1_scales_gate_scalar: torch.Tensor, + output2_scales_scalar: torch.Tensor, + num_experts: int, + top_k: int, + num_expert_group: Optional[int], + topk_group: Optional[int], + intermediate_size: int, + local_expert_offset: int, + local_num_experts: int, + use_routing_scales_on_input: bool, + routing_method_type: int, + routed_scaling_factor: float = 1.0) -> torch.Tensor: + return torch.empty_like(hidden_states) + + +# TODO(bnell): Does this really need to be a torch.op? +direct_register_custom_op( + op_name="flashinfer_fused_moe_per_tensor_scale_fp8", + op_func=flashinfer_fused_moe_per_tensor_scale_fp8, + mutates_args=["hidden_states"], + fake_impl=flashinfer_fused_moe_per_tensor_scale_fp8_fake, + tags=(torch.Tag.needs_fixed_stride_order, ), +) diff --git a/vllm/model_executor/layers/fused_moe/fused_batched_moe.py b/vllm/model_executor/layers/fused_moe/fused_batched_moe.py index 88063668e918..fe6ac458a959 100644 --- a/vllm/model_executor/layers/fused_moe/fused_batched_moe.py +++ b/vllm/model_executor/layers/fused_moe/fused_batched_moe.py @@ -8,7 +8,7 @@ import vllm.model_executor.layers.fused_moe.modular_kernel as mk from vllm.model_executor.layers.fused_moe.config import FusedMoEQuantConfig from vllm.model_executor.layers.fused_moe.fused_moe import ( - get_config_dtype_str, try_get_optimal_moe_config) + try_get_optimal_moe_config) from vllm.model_executor.layers.fused_moe.topk_weight_and_reduce import ( TopKWeightAndReduceDelegate, TopKWeightAndReduceNaiveBatched) from vllm.model_executor.layers.fused_moe.utils import ( @@ -498,8 +498,6 @@ def num_dispatchers(self) -> int: def prepare( self, a1: torch.Tensor, - a1_scale: Optional[torch.Tensor], - a2_scale: Optional[torch.Tensor], topk_weights: torch.Tensor, topk_ids: torch.Tensor, num_experts: int, @@ -545,14 +543,13 @@ def prepare( dtype=torch.float32, device=a1.device) else: - assert a1_scale is None + assert quant_config.a1_scale is None b_a1_scale = None first_expert = num_local_experts * self.rank last_expert = first_expert + num_local_experts - a1_scale = normalize_scales_shape(a1_scale) - a2_scale = normalize_scales_shape(a2_scale) + a1_scale = normalize_scales_shape(quant_config.a1_scale) for expert_id in range(first_expert, last_expert): topks = torch.any(topk_ids == expert_id, dim=1).flatten() @@ -623,28 +620,13 @@ def __init__( self, max_num_tokens: int, num_dispatchers: int, - use_fp8_w8a8: bool = False, - use_int8_w8a8: bool = False, - use_int8_w8a16: bool = False, - use_int4_w4a16: bool = False, - use_mxfp4_w4a4: bool = False, - block_shape: Optional[list[int]] = None, - per_act_token_quant: bool = False, + quant_config: FusedMoEQuantConfig, ): - super().__init__( - FusedMoEQuantConfig.make( - use_fp8_w8a8=use_fp8_w8a8, - use_int8_w8a8=use_int8_w8a8, - use_int8_w8a16=use_int8_w8a16, - use_int4_w4a16=use_int4_w4a16, - use_mxfp4_w4a4=use_mxfp4_w4a4, - per_act_token_quant=per_act_token_quant, - block_shape=block_shape, - )) - assert not use_int8_w8a8, "NYI" - assert not use_int8_w8a16, "NYI" - assert not use_int4_w4a16, "NYI" - assert not use_mxfp4_w4a4, "NYI" + super().__init__(quant_config) + assert not self.quant_config.use_int8_w8a8, "NYI" + assert not self.quant_config.use_int8_w8a16, "NYI" + assert not self.quant_config.use_int4_w4a16, "NYI" + assert not self.quant_config.use_mxfp4_w4a4, "NYI" self.max_num_tokens = max_num_tokens self.num_dispatchers = num_dispatchers @@ -705,12 +687,7 @@ def apply( activation: str, global_num_experts: int, expert_map: Optional[torch.Tensor], - w1_scale: Optional[torch.Tensor], - w2_scale: Optional[torch.Tensor], - w1_zp: Optional[torch.Tensor], - w2_zp: Optional[torch.Tensor], a1q_scale: Optional[torch.Tensor], - a2_scale: Optional[torch.Tensor], workspace13: torch.Tensor, workspace2: torch.Tensor, expert_tokens_meta: Optional[mk.ExpertTokensMetadata], @@ -740,10 +717,10 @@ def apply( tmp = _resize_cache(workspace2, (num, N)) if self.quant_config.is_quantized: - assert a1q_scale is not None and w1_scale is not None + assert a1q_scale is not None and self.w1_scale is not None input = self.dequant(hidden_states[expert, :, :], a1q_scale[expert]) - w1_dq = self.dequant(w1[expert], w1_scale[expert]) + w1_dq = self.dequant(w1[expert], self.w1_scale[expert]) input = input[:num] @ w1_dq.transpose(0, 1) else: input = hidden_states[expert, :num, :] @ w1[expert].transpose( @@ -752,8 +729,8 @@ def apply( self.activation(activation, tmp, input.to(tmp.dtype)) if self.quant_config.is_quantized: - assert w2_scale is not None - w2_dq = self.dequant(w2[expert], w2_scale[expert]) + assert self.w2_scale is not None + w2_dq = self.dequant(w2[expert], self.w2_scale[expert]) else: w2_dq = w2[expert] @@ -840,35 +817,15 @@ def __init__( self, max_num_tokens: int, num_dispatchers: int, - use_fp8_w8a8: bool = False, - use_int8_w8a8: bool = False, - use_int8_w8a16: bool = False, - use_int4_w4a16: bool = False, - use_mxfp4_w4a4: bool = False, - per_act_token_quant: bool = False, - block_shape: Optional[list[int]] = None, + quant_config: FusedMoEQuantConfig, ): - super().__init__( - FusedMoEQuantConfig.make( - use_fp8_w8a8=use_fp8_w8a8, - use_int8_w8a8=use_int8_w8a8, - use_int8_w8a16=use_int8_w8a16, - use_int4_w4a16=use_int4_w4a16, - use_mxfp4_w4a4=use_mxfp4_w4a4, - per_act_token_quant=per_act_token_quant, - block_shape=block_shape, - )) - assert not use_int8_w8a8, "NYI" - assert not use_int8_w8a16, "NYI" - assert not use_int4_w4a16, "NYI" - assert not use_mxfp4_w4a4, "NYI" + super().__init__(quant_config) + assert not self.quant_config.use_int8_w8a8, "NYI" + assert not self.quant_config.use_int8_w8a16, "NYI" + assert not self.quant_config.use_int4_w4a16, "NYI" + assert not self.quant_config.use_mxfp4_w4a4, "NYI" assert max_num_tokens > 0 assert num_dispatchers > 0 - self.use_fp8_w8a8 = use_fp8_w8a8 - self.use_int8_w8a8 = use_int8_w8a8 - self.use_int4_w4a16 = use_int4_w4a16 - self.use_int8_w8a16 = use_int8_w8a16 - self.use_mxfp4_w4a4 = use_mxfp4_w4a4 self.max_num_tokens = max_num_tokens self.num_dispatchers = num_dispatchers @@ -921,19 +878,14 @@ def apply( activation: str, global_num_experts: int, expert_map: Optional[torch.Tensor], - w1_scale: Optional[torch.Tensor], - w2_scale: Optional[torch.Tensor], - w1_zp: Optional[torch.Tensor], - w2_zp: Optional[torch.Tensor], a1q_scale: Optional[torch.Tensor], - a2_scale: Optional[torch.Tensor], workspace13: torch.Tensor, workspace2: torch.Tensor, expert_tokens_meta: Optional[mk.ExpertTokensMetadata], apply_router_weight_on_input: bool, ): # Check constraints. - if self.use_int4_w4a16: + if self.quant_config.use_int4_w4a16: assert hidden_states.size(-1) // 2 == w1.size(2), ( "Hidden size mismatch") else: @@ -958,11 +910,7 @@ def apply( assert w1.size(0) == E assert w2.size(0) == E - config_dtype = get_config_dtype_str(use_fp8_w8a8=self.use_fp8_w8a8, - use_int8_w8a16=self.use_int8_w8a16, - use_int4_w4a16=self.use_int4_w4a16, - use_mxfp4_w4a4=self.use_mxfp4_w4a4, - dtype=hidden_states.dtype) + config_dtype = self.quant_config.config_name(hidden_states.dtype) config = try_get_optimal_moe_config( w1.size(), @@ -992,7 +940,8 @@ def apply( intermediate_cache2 = _resize_cache(workspace2, (E, max_num_tokens, N // 2)) - if self.use_fp8_w8a8: + # TODO(bnell): should this be done for any quantized type? + if self.quant_config.use_fp8_w8a8: intermediate_cache1.fill_(0) a1q_scale = normalize_batched_scales_shape(a1q_scale, E) @@ -1005,11 +954,11 @@ def apply( expert_num_tokens=expert_num_tokens, compute_type=compute_type, A_scale=a1q_scale, - B_scale=w1_scale, - B_zp=w1_zp, - use_fp8_w8a8=self.use_fp8_w8a8, - use_int8_w8a16=self.use_int8_w8a16, - use_int4_w4a16=self.use_int4_w4a16, + B_scale=self.w1_scale, + B_zp=self.w1_zp, + use_fp8_w8a8=self.quant_config.use_fp8_w8a8, + use_int8_w8a16=self.quant_config.use_int8_w8a16, + use_int4_w4a16=self.quant_config.use_int4_w4a16, config=config, per_act_token_quant=self.per_act_token_quant, block_shape=self.block_shape) @@ -1021,7 +970,7 @@ def apply( intermediate_cache1.view(-1, N)) qintermediate_cache2, a2q_scale = batched_moe_kernel_quantize_input( - intermediate_cache2, a2_scale, max_num_tokens, E, N, + intermediate_cache2, self.a2_scale, max_num_tokens, E, N, expert_num_tokens, self.quant_dtype, self.per_act_token_quant, self.block_shape) @@ -1032,11 +981,11 @@ def apply( expert_num_tokens=expert_num_tokens, compute_type=compute_type, A_scale=a2q_scale, - B_scale=w2_scale, - B_zp=w2_zp, - use_fp8_w8a8=self.use_fp8_w8a8, - use_int8_w8a16=self.use_int8_w8a16, - use_int4_w4a16=self.use_int4_w4a16, + B_scale=self.w2_scale, + B_zp=self.w2_zp, + use_fp8_w8a8=self.quant_config.use_fp8_w8a8, + use_int8_w8a16=self.quant_config.use_int8_w8a16, + use_int4_w4a16=self.quant_config.use_int4_w4a16, config=config, per_act_token_quant=self.per_act_token_quant, block_shape=self.block_shape) diff --git a/vllm/model_executor/layers/fused_moe/fused_moe.py b/vllm/model_executor/layers/fused_moe/fused_moe.py index 36c2ab8b2d5f..d4de3f640865 100644 --- a/vllm/model_executor/layers/fused_moe/fused_moe.py +++ b/vllm/model_executor/layers/fused_moe/fused_moe.py @@ -1,13 +1,13 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -"""Fused MoE kernel.""" +"""Fused MoE Triton kernels.""" import functools import json import os # torch.compile needs typing.List. It will fail torch.library.infer_schema # otherwise from typing import List # noqa: UP035 -from typing import Any, Callable, Optional +from typing import Any, Callable, Optional, Union import torch import torch.nn.functional as F @@ -18,7 +18,7 @@ from vllm.logger import init_logger # yapf: disable from vllm.model_executor.layers.fused_moe.config import ( - FusedMoEQuantConfig, get_config_quant_dtype) + FUSED_MOE_UNQUANTIZED_CONFIG, FusedMoEQuantConfig, _get_config_dtype_str) from vllm.model_executor.layers.fused_moe.cutlass_moe import ( _valid_cutlass_block_scaled_grouped_gemm, run_cutlass_block_scaled_fused_experts) @@ -32,11 +32,7 @@ from vllm.model_executor.layers.fused_moe.topk_weight_and_reduce import ( TopKWeightAndReduceNoOP) from vllm.model_executor.layers.fused_moe.utils import ( - _resize_cache, moe_kernel_quantize_input) -from vllm.model_executor.layers.quantization.utils.flashinfer_utils import ( - calculate_tile_tokens_dim) -from vllm.model_executor.layers.quantization.utils.fp8_utils import ( - per_token_group_quant_fp8) + _resize_cache, activation_without_mul, moe_kernel_quantize_input) from vllm.model_executor.layers.quantization.utils.mxfp4_utils import ( dequant_mxfp4) from vllm.platforms import current_platform @@ -1049,87 +1045,66 @@ def fused_grouped_topk( return topk_values.to(torch.float32), topk_indices.to(torch.int32) -def get_config_dtype_str( - dtype: torch.dtype, - use_int4_w4a16: Optional[bool] = False, - use_int8_w8a16: Optional[bool] = False, - use_fp8_w8a8: Optional[bool] = False, - use_mxfp4_w4a4: Optional[bool] = False) -> Optional[str]: - if use_fp8_w8a8: - return "fp8_w8a8" - elif use_int8_w8a16: - return "int8_w8a16" - elif use_int4_w4a16: - return "int4_w4a16" - elif use_mxfp4_w4a4: - return "mxfp4_w4a4" - elif dtype == torch.float: - # avoiding cases where kernel fails when float32 MoE - # use fp16/bfloat16 configs - return "float32" - return None - - def inplace_fused_experts( - hidden_states: torch.Tensor, - w1: torch.Tensor, - w2: torch.Tensor, - topk_weights: torch.Tensor, - topk_ids: torch.Tensor, - activation: str = "silu", - is_act_and_mul: bool = True, - apply_router_weight_on_input: bool = False, - use_fp8_w8a8: bool = False, - use_int8_w8a8: bool = False, - use_int8_w8a16: bool = False, - use_int4_w4a16: bool = False, - use_mxfp4_w4a4: bool = False, - per_channel_quant: bool = False, - global_num_experts: int = -1, - expert_map: Optional[torch.Tensor] = None, - w1_scale: Optional[torch.Tensor] = None, - w2_scale: Optional[torch.Tensor] = None, - w1_zp: Optional[torch.Tensor] = None, - w2_zp: Optional[torch.Tensor] = None, - a1_scale: Optional[torch.Tensor] = None, - a2_scale: Optional[torch.Tensor] = None, - block_shape: Optional[List[int]] = None, #noqa: UP006 - w1_bias: Optional[torch.Tensor] = None, - w2_bias: Optional[torch.Tensor] = None) -> None: + hidden_states: torch.Tensor, + w1: torch.Tensor, + w2: torch.Tensor, + topk_weights: torch.Tensor, + topk_ids: torch.Tensor, + activation: str = "silu", + apply_router_weight_on_input: bool = False, + use_fp8_w8a8: bool = False, + use_int8_w8a8: bool = False, + use_int8_w8a16: bool = False, + use_int4_w4a16: bool = False, + use_mxfp4_w4a4: bool = False, + per_channel_quant: bool = False, + global_num_experts: int = -1, + expert_map: Optional[torch.Tensor] = None, + w1_scale: Optional[torch.Tensor] = None, + w2_scale: Optional[torch.Tensor] = None, + w1_zp: Optional[torch.Tensor] = None, + w2_zp: Optional[torch.Tensor] = None, + a1_scale: Optional[torch.Tensor] = None, + a2_scale: Optional[torch.Tensor] = None, + block_shape: Optional[List[int]] = None, #noqa: UP006 + w1_bias: Optional[torch.Tensor] = None, + w2_bias: Optional[torch.Tensor] = None, +) -> None: fused_experts_impl(hidden_states, w1, w2, topk_weights, topk_ids, True, - activation, is_act_and_mul, - apply_router_weight_on_input, use_fp8_w8a8, + activation, apply_router_weight_on_input, use_fp8_w8a8, use_int8_w8a8, use_int8_w8a16, use_int4_w4a16, use_mxfp4_w4a4, per_channel_quant, global_num_experts, expert_map, w1_scale, w2_scale, w1_zp, w2_zp, a1_scale, a2_scale, block_shape, w1_bias, w2_bias) -def inplace_fused_experts_fake(hidden_states: torch.Tensor, - w1: torch.Tensor, - w2: torch.Tensor, - topk_weights: torch.Tensor, - topk_ids: torch.Tensor, - activation: str = "silu", - is_act_and_mul: bool = True, - apply_router_weight_on_input: bool = False, - use_fp8_w8a8: bool = False, - use_int8_w8a8: bool = False, - use_int8_w8a16: bool = False, - use_int4_w4a16: bool = False, - use_mxfp4_w4a4: bool = False, - per_channel_quant: bool = False, - global_num_experts: int = -1, - expert_map: Optional[torch.Tensor] = None, - w1_scale: Optional[torch.Tensor] = None, - w2_scale: Optional[torch.Tensor] = None, - w1_zp: Optional[torch.Tensor] = None, - w2_zp: Optional[torch.Tensor] = None, - a1_scale: Optional[torch.Tensor] = None, - a2_scale: Optional[torch.Tensor] = None, - block_shape: Optional[list[int]] = None, - w1_bias: Optional[torch.Tensor] = None, - w2_bias: Optional[torch.Tensor] = None) -> None: +def inplace_fused_experts_fake( + hidden_states: torch.Tensor, + w1: torch.Tensor, + w2: torch.Tensor, + topk_weights: torch.Tensor, + topk_ids: torch.Tensor, + activation: str = "silu", + apply_router_weight_on_input: bool = False, + use_fp8_w8a8: bool = False, + use_int8_w8a8: bool = False, + use_int8_w8a16: bool = False, + use_int4_w4a16: bool = False, + use_mxfp4_w4a4: bool = False, + per_channel_quant: bool = False, + global_num_experts: int = -1, + expert_map: Optional[torch.Tensor] = None, + w1_scale: Optional[torch.Tensor] = None, + w2_scale: Optional[torch.Tensor] = None, + w1_zp: Optional[torch.Tensor] = None, + w2_zp: Optional[torch.Tensor] = None, + a1_scale: Optional[torch.Tensor] = None, + a2_scale: Optional[torch.Tensor] = None, + block_shape: Optional[List[int]] = None, #noqa: UP006 + w1_bias: Optional[torch.Tensor] = None, + w2_bias: Optional[torch.Tensor] = None, +) -> None: pass @@ -1143,175 +1118,6 @@ def inplace_fused_experts_fake(hidden_states: torch.Tensor, ) -def flashinfer_fused_moe_blockscale_fp8( - routing_logits: torch.Tensor, - routing_bias: torch.Tensor, - x: torch.Tensor, - w13_weight: torch.Tensor, - w13_weight_scale_inv: torch.Tensor, - w2_weight: torch.Tensor, - w2_weight_scale_inv: torch.Tensor, - global_num_experts: int, - top_k: int, - num_expert_group: int, - topk_group: int, - intermediate_size: int, - expert_offset: int, - local_num_experts: int, - block_shape: List[int], #noqa: UP006 - routed_scaling: float = 1.0) -> torch.Tensor: - from vllm.utils.flashinfer import flashinfer_trtllm_fp8_block_scale_moe - assert top_k <= global_num_experts - assert top_k <= 8 - assert topk_group <= 4 - assert global_num_experts > num_expert_group - assert global_num_experts % num_expert_group == 0 - assert global_num_experts % 4 == 0 - assert top_k < (topk_group * global_num_experts / num_expert_group) - assert block_shape == [128, 128] - - a_q, a_sf = per_token_group_quant_fp8(x, block_shape[1]) - # NOTE: scales of hidden states have to be transposed! - a_sf_t = a_sf.t().contiguous() - return flashinfer_trtllm_fp8_block_scale_moe( - routing_logits=routing_logits, - routing_bias=routing_bias, - hidden_states=a_q, - hidden_states_scale=a_sf_t, - gemm1_weights=w13_weight, - gemm1_weights_scale=w13_weight_scale_inv, - gemm2_weights=w2_weight, - gemm2_weights_scale=w2_weight_scale_inv, - num_experts=global_num_experts, - top_k=top_k, - n_group=num_expert_group, - topk_group=topk_group, - intermediate_size=intermediate_size, - local_expert_offset=expert_offset, - local_num_experts=local_num_experts, - routed_scaling_factor=routed_scaling, - tile_tokens_dim=calculate_tile_tokens_dim(x.shape[0], top_k, - global_num_experts), - routing_method_type=2, # DeepSeek-styled routing method - use_shuffled_weight=False, - ) - - -def flashinfer_fused_moe_blockscale_fp8_fake( - routing_logits: torch.Tensor, - routing_bias: torch.Tensor, - x: torch.Tensor, - w13_weight: torch.Tensor, - w13_weight_scale_inv: torch.Tensor, - w2_weight: torch.Tensor, - w2_weight_scale_inv: torch.Tensor, - global_num_experts: int, - top_k: int, - num_expert_group: int, - topk_group: int, - intermediate_size: int, - expert_offset: int, - local_num_experts: int, - block_shape: list[int], - routed_scaling: float = 1.0) -> torch.Tensor: - return torch.empty_like(x) - - -direct_register_custom_op( - op_name="flashinfer_fused_moe_blockscale_fp8", - op_func=flashinfer_fused_moe_blockscale_fp8, - mutates_args=[], - fake_impl=flashinfer_fused_moe_blockscale_fp8_fake, - tags=(torch.Tag.needs_fixed_stride_order, ), -) - - -def flashinfer_fused_moe_per_tensor_scale_fp8( - routing_logits: torch.Tensor, - routing_bias: Optional[torch.Tensor], - hidden_states: torch.Tensor, - input_scale: torch.Tensor, - gemm1_weights: torch.Tensor, - gemm2_weights: torch.Tensor, - output1_scales_scalar: torch.Tensor, - output1_scales_gate_scalar: torch.Tensor, - output2_scales_scalar: torch.Tensor, - num_experts: int, - top_k: int, - num_expert_group: Optional[int], - topk_group: Optional[int], - intermediate_size: int, - local_expert_offset: int, - local_num_experts: int, - use_routing_scales_on_input: bool, - routing_method_type: int, - routed_scaling_factor: float = 1.0) -> torch.Tensor: - num_expert_group = num_expert_group if num_expert_group is not None else 0 - topk_group = topk_group if topk_group is not None else 0 - - quant_hidden_states, _ = moe_kernel_quantize_input( - hidden_states, - input_scale, - quant_dtype=torch.float8_e4m3fn, - per_act_token_quant=False) - - from vllm.utils.flashinfer import ( - flashinfer_trtllm_fp8_per_tensor_scale_moe) - return flashinfer_trtllm_fp8_per_tensor_scale_moe( - routing_logits=routing_logits, - routing_bias=routing_bias, - hidden_states=quant_hidden_states, - gemm1_weights=gemm1_weights, - output1_scales_scalar=output1_scales_scalar, - output1_scales_gate_scalar=output1_scales_gate_scalar, - gemm2_weights=gemm2_weights, - output2_scales_scalar=output2_scales_scalar, - num_experts=num_experts, - top_k=top_k, - n_group=num_expert_group, - topk_group=topk_group, - intermediate_size=intermediate_size, - local_expert_offset=local_expert_offset, - local_num_experts=local_num_experts, - routed_scaling_factor=routed_scaling_factor, - use_routing_scales_on_input=use_routing_scales_on_input, - tile_tokens_dim=calculate_tile_tokens_dim(hidden_states.shape[0], - top_k, num_experts), - routing_method_type=routing_method_type) - - -def flashinfer_fused_moe_per_tensor_scale_fp8_fake( - routing_logits: torch.Tensor, - routing_bias: Optional[torch.Tensor], - hidden_states: torch.Tensor, - input_scale: torch.Tensor, - gemm1_weights: torch.Tensor, - gemm2_weights: torch.Tensor, - output1_scales_scalar: torch.Tensor, - output1_scales_gate_scalar: torch.Tensor, - output2_scales_scalar: torch.Tensor, - num_experts: int, - top_k: int, - num_expert_group: Optional[int], - topk_group: Optional[int], - intermediate_size: int, - local_expert_offset: int, - local_num_experts: int, - use_routing_scales_on_input: bool, - routing_method_type: int, - routed_scaling_factor: float = 1.0) -> torch.Tensor: - pass - - -direct_register_custom_op( - op_name="flashinfer_fused_moe_per_tensor_scale_fp8", - op_func=flashinfer_fused_moe_per_tensor_scale_fp8, - mutates_args=["hidden_states"], - fake_impl=flashinfer_fused_moe_per_tensor_scale_fp8_fake, - tags=(torch.Tag.needs_fixed_stride_order, ), -) - - def outplace_fused_experts( hidden_states: torch.Tensor, w1: torch.Tensor, @@ -1319,7 +1125,6 @@ def outplace_fused_experts( topk_weights: torch.Tensor, topk_ids: torch.Tensor, activation: str = "silu", - is_act_and_mul: bool = True, apply_router_weight_on_input: bool = False, use_fp8_w8a8: bool = False, use_int8_w8a8: bool = False, @@ -1341,37 +1146,37 @@ def outplace_fused_experts( ) -> torch.Tensor: return fused_experts_impl( hidden_states, w1, w2, topk_weights, topk_ids, False, activation, - is_act_and_mul, apply_router_weight_on_input, use_fp8_w8a8, - use_int8_w8a8, use_int8_w8a16, use_int4_w4a16, use_mxfp4_w4a4, - per_channel_quant, global_num_experts, expert_map, w1_scale, w2_scale, - w1_zp, w2_zp, a1_scale, a2_scale, block_shape, w1_bias, w2_bias) + apply_router_weight_on_input, use_fp8_w8a8, use_int8_w8a8, + use_int8_w8a16, use_int4_w4a16, use_mxfp4_w4a4, per_channel_quant, + global_num_experts, expert_map, w1_scale, w2_scale, w1_zp, w2_zp, + a1_scale, a2_scale, block_shape, w1_bias, w2_bias) def outplace_fused_experts_fake( - hidden_states: torch.Tensor, - w1: torch.Tensor, - w2: torch.Tensor, - topk_weights: torch.Tensor, - topk_ids: torch.Tensor, - activation: str = "silu", - is_act_and_mul: bool = True, - use_fp8_w8a8: bool = False, - use_int8_w8a8: bool = False, - use_int8_w8a16: bool = False, - use_int4_w4a16: bool = False, - use_mxfp4_w4a4: bool = False, - per_channel_quant: bool = False, - global_num_experts: int = -1, - expert_map: Optional[torch.Tensor] = None, - w1_scale: Optional[torch.Tensor] = None, - w2_scale: Optional[torch.Tensor] = None, - w1_zp: Optional[torch.Tensor] = None, - w2_zp: Optional[torch.Tensor] = None, - a1_scale: Optional[torch.Tensor] = None, - a2_scale: Optional[torch.Tensor] = None, - block_shape: Optional[list[int]] = None, - w1_bias: Optional[torch.Tensor] = None, - w2_bias: Optional[torch.Tensor] = None) -> torch.Tensor: + hidden_states: torch.Tensor, + w1: torch.Tensor, + w2: torch.Tensor, + topk_weights: torch.Tensor, + topk_ids: torch.Tensor, + activation: str = "silu", + use_fp8_w8a8: bool = False, + use_int8_w8a8: bool = False, + use_int8_w8a16: bool = False, + use_int4_w4a16: bool = False, + use_mxfp4_w4a4: bool = False, + per_channel_quant: bool = False, + global_num_experts: int = -1, + expert_map: Optional[torch.Tensor] = None, + w1_scale: Optional[torch.Tensor] = None, + w2_scale: Optional[torch.Tensor] = None, + w1_zp: Optional[torch.Tensor] = None, + w2_zp: Optional[torch.Tensor] = None, + a1_scale: Optional[torch.Tensor] = None, + a2_scale: Optional[torch.Tensor] = None, + block_shape: Optional[list[int]] = None, + w1_bias: Optional[torch.Tensor] = None, + w2_bias: Optional[torch.Tensor] = None, +) -> torch.Tensor: return torch.empty_like(hidden_states) @@ -1403,45 +1208,36 @@ def dispatch_fused_experts_func(inplace: bool) -> Callable[..., torch.Tensor]: # TODO (bnell): replace this with modular op. Can get rid of inplace/outplace # torch ops. -def fused_experts(hidden_states: torch.Tensor, - w1: torch.Tensor, - w2: torch.Tensor, - topk_weights: torch.Tensor, - topk_ids: torch.Tensor, - inplace: bool = False, - activation: str = "silu", - is_act_and_mul: bool = True, - apply_router_weight_on_input: bool = False, - use_fp8_w8a8: bool = False, - use_int8_w8a8: bool = False, - use_int8_w8a16: bool = False, - use_int4_w4a16: bool = False, - use_mxfp4_w4a4: bool = False, - per_channel_quant: bool = False, - global_num_experts: int = -1, - expert_map: Optional[torch.Tensor] = None, - w1_scale: Optional[torch.Tensor] = None, - w2_scale: Optional[torch.Tensor] = None, - w1_zp: Optional[torch.Tensor] = None, - w2_zp: Optional[torch.Tensor] = None, - a1_scale: Optional[torch.Tensor] = None, - a2_scale: Optional[torch.Tensor] = None, - block_shape: Optional[list[int]] = None, - allow_deep_gemm: bool = False, - allow_cutlass_block_scaled_grouped_gemm: bool = False, - w1_bias: Optional[torch.Tensor] = None, - w2_bias: Optional[torch.Tensor] = None) -> torch.Tensor: +def fused_experts( + hidden_states: torch.Tensor, + w1: torch.Tensor, + w2: torch.Tensor, + topk_weights: torch.Tensor, + topk_ids: torch.Tensor, + inplace: bool = False, + activation: str = "silu", + apply_router_weight_on_input: bool = False, + global_num_experts: int = -1, + expert_map: Optional[torch.Tensor] = None, + quant_config: Optional[FusedMoEQuantConfig] = None, + allow_deep_gemm: bool = False, + allow_cutlass_block_scaled_grouped_gemm: bool = False, +) -> torch.Tensor: + + if quant_config is None: + quant_config = FUSED_MOE_UNQUANTIZED_CONFIG + use_fp8_w8a8 = quant_config.use_fp8_w8a8 + # For now, disable DeepGemm for small N (<= 512) until better # permute/unpermute ops are available. # However, on B200, we use DeepGemm for all cases because they only support # E8M0 scale, which means we requantize the weight and input to the specific # scale. Fallen back to cutlass or triton for some cases would cause # accuracy issue. - if (allow_deep_gemm and use_fp8_w8a8 and + if (allow_deep_gemm and quant_config.use_fp8_w8a8 and (is_deep_gemm_e8m0_used() or _valid_deep_gemm(hidden_states, w1, w2))): + assert quant_config is not None assert apply_router_weight_on_input is False - assert is_act_and_mul, ( - "DeepGemm only supports is_act_and_mul=True for now.") return deep_gemm_moe_fp8( hidden_states=hidden_states, w1=w1, @@ -1452,22 +1248,23 @@ def fused_experts(hidden_states: torch.Tensor, activation=activation, global_num_experts=global_num_experts, expert_map=expert_map, - w1_scale=w1_scale, - w2_scale=w2_scale, - a1_scale=a1_scale, - a2_scale=a2_scale, + w1_scale=quant_config.w1_scale, + w2_scale=quant_config.w2_scale, + a1_scale=quant_config.a1_scale, + a2_scale=quant_config.a2_scale, apply_router_weight_on_input=apply_router_weight_on_input, ) elif (allow_cutlass_block_scaled_grouped_gemm and use_fp8_w8a8 and _valid_cutlass_block_scaled_grouped_gemm( w1, w2, inplace, activation, apply_router_weight_on_input, expert_map)): + assert quant_config is not None return run_cutlass_block_scaled_fused_experts( a=hidden_states, w1=w1, w2=w2, - w1_scale=w1_scale, - w2_scale=w2_scale, + w1_scale=quant_config.w1_scale, + w2_scale=quant_config.w2_scale, topk_weights=topk_weights, topk_ids=topk_ids) else: @@ -1478,26 +1275,49 @@ def fused_experts(hidden_states: torch.Tensor, topk_weights=topk_weights, topk_ids=topk_ids, activation=activation, - is_act_and_mul=is_act_and_mul, apply_router_weight_on_input=apply_router_weight_on_input, - use_fp8_w8a8=use_fp8_w8a8, - use_int8_w8a8=use_int8_w8a8, - use_int8_w8a16=use_int8_w8a16, - use_int4_w4a16=use_int4_w4a16, - use_mxfp4_w4a4=use_mxfp4_w4a4, - per_channel_quant=per_channel_quant, + use_fp8_w8a8=quant_config.use_fp8_w8a8, + use_int8_w8a8=quant_config.use_int8_w8a8, + use_int8_w8a16=quant_config.use_int8_w8a16, + use_int4_w4a16=quant_config.use_int4_w4a16, + use_mxfp4_w4a4=quant_config.use_mxfp4_w4a4, + per_channel_quant=quant_config.per_act_token_quant, global_num_experts=global_num_experts, expert_map=expert_map, - w1_scale=w1_scale, - w2_scale=w2_scale, - w1_zp=w1_zp, - w2_zp=w2_zp, - a1_scale=a1_scale, - a2_scale=a2_scale, - block_shape=block_shape, - w1_bias=w1_bias, - w2_bias=w2_bias, - ) + w1_scale=quant_config.w1_scale, + w2_scale=quant_config.w2_scale, + w1_zp=quant_config.w1_zp, + w2_zp=quant_config.w2_zp, + a1_scale=quant_config.a1_scale, + a2_scale=quant_config.a2_scale, + block_shape=quant_config.block_shape, + w1_bias=quant_config.w1_bias, + w2_bias=quant_config.w2_bias) + + +SILU_NO_MUL: str = activation_without_mul("silu") +GELU_NO_MUL: str = activation_without_mul("gelu") + + +def _get_config_quant_dtype( + use_fp8_w8a8: bool, + use_int8_w8a8: bool, + use_mxfp4_w4a4: bool, +) -> Union[None, torch.dtype, str]: + """ + Get the quantization type based on the quantization strategy flags. + We don't have a quant_config at this point so we need to work backwards. + A return type of None means no quantization is required because the + input is unquantized or has been quantized prior to calling + fused_experts_impl. + """ + if use_fp8_w8a8: + return torch.float8_e4m3fn + elif use_int8_w8a8: + return torch.int8 + elif use_mxfp4_w4a4: + return "mxfp4" + return None def fused_experts_impl( @@ -1508,7 +1328,6 @@ def fused_experts_impl( topk_ids: torch.Tensor, inplace: bool = False, activation: str = "silu", - is_act_and_mul: bool = True, apply_router_weight_on_input: bool = False, use_fp8_w8a8: bool = False, use_int8_w8a8: bool = False, @@ -1557,17 +1376,18 @@ def fused_experts_impl( # https://github.com/vllm-project/vllm/issues/5938 CHUNK_SIZE = envs.VLLM_FUSED_MOE_CHUNK_SIZE M = min(num_tokens, CHUNK_SIZE) - config_dtype = get_config_dtype_str(use_fp8_w8a8=use_fp8_w8a8, - use_int8_w8a16=use_int8_w8a16, - use_int4_w4a16=use_int4_w4a16, - use_mxfp4_w4a4=use_mxfp4_w4a4, - dtype=hidden_states.dtype) - - qtype = get_config_quant_dtype(use_fp8_w8a8=use_fp8_w8a8, - use_int8_w8a8=use_int8_w8a8, - use_int8_w8a16=use_int8_w8a16, - use_int4_w4a16=use_int4_w4a16, - use_mxfp4_w4a4=use_mxfp4_w4a4) + + config_dtype = _get_config_dtype_str(use_fp8_w8a8=use_fp8_w8a8, + use_int8_w8a16=use_int8_w8a16, + use_int4_w4a16=use_int4_w4a16, + use_mxfp4_w4a4=use_mxfp4_w4a4, + dtype=hidden_states.dtype) + + # Note: for use_int8_w8a16 or use_int4_w4a16, the activations are + # quantized prior to calling fused_experts. + quant_dtype = _get_config_quant_dtype(use_fp8_w8a8=use_fp8_w8a8, + use_int8_w8a8=use_int8_w8a8, + use_mxfp4_w4a4=use_mxfp4_w4a4) get_config_func = functools.partial( try_get_optimal_moe_config, @@ -1640,7 +1460,7 @@ def fused_experts_impl( qcurr_hidden_states, a1q_scale = moe_kernel_quantize_input( A=curr_hidden_states, A_scale=a1_scale, - quant_dtype=qtype, + quant_dtype=quant_dtype, per_act_token_quant=per_channel_quant, block_shape=block_shape) @@ -1671,30 +1491,29 @@ def fused_experts_impl( B_bias=w1_bias) # Activation function with multiplication - if activation == "silu" and is_act_and_mul: + if activation == "silu": torch.ops._C.silu_and_mul(intermediate_cache2, intermediate_cache1.view(-1, N)) - elif activation == "gelu" and is_act_and_mul: + elif activation == "gelu": torch.ops._C.gelu_and_mul(intermediate_cache2, intermediate_cache1.view(-1, N)) - elif activation == "swigluoai" and is_act_and_mul: + elif activation == "swigluoai": # alpha = 1.702, limit = 7.0 torch.ops._C.swigluoai_and_mul(intermediate_cache2, intermediate_cache1.view(-1, N)) # Activation function without multiplication - elif activation == "silu": + elif activation == SILU_NO_MUL: intermediate_cache2 = F.silu(intermediate_cache1.view(-1, N)) - elif activation == "gelu": + elif activation == GELU_NO_MUL: intermediate_cache2 = F.gelu(intermediate_cache1.view(-1, N)) else: - raise ValueError(f"Unsupported FusedMoe activation: {activation}, " - f"with is_act_and_mul={is_act_and_mul}.") + raise ValueError(f"Unsupported FusedMoe activation: {activation}.") qintermediate_cache2, a2q_scale = moe_kernel_quantize_input( A=intermediate_cache2, A_scale=a2_scale, - quant_dtype=qtype, + quant_dtype=quant_dtype, per_act_token_quant=per_channel_quant, block_shape=block_shape) @@ -1726,164 +1545,13 @@ def fused_experts_impl( return out_hidden_states -def fused_moe( - hidden_states: torch.Tensor, - w1: torch.Tensor, - w2: torch.Tensor, - gating_output: torch.Tensor, - topk: int, - renormalize: bool, - inplace: bool = False, - activation: str = "silu", - is_act_and_mul: bool = True, - use_grouped_topk: bool = False, - num_expert_group: Optional[int] = None, - topk_group: Optional[int] = None, - custom_routing_function: Optional[Callable] = None, - use_fp8_w8a8: bool = False, - use_int8_w8a8: bool = False, - use_int8_w8a16: bool = False, - use_int4_w4a16: bool = False, - use_mxfp4_w4a4: bool = False, - per_channel_quant: bool = False, - global_num_experts: int = -1, - expert_map: Optional[torch.Tensor] = None, - w1_scale: Optional[torch.Tensor] = None, - w2_scale: Optional[torch.Tensor] = None, - w1_zp: Optional[torch.Tensor] = None, - w2_zp: Optional[torch.Tensor] = None, - a1_scale: Optional[torch.Tensor] = None, - a2_scale: Optional[torch.Tensor] = None, - block_shape: Optional[list[int]] = None, - w1_bias: Optional[torch.Tensor] = None, - w2_bias: Optional[torch.Tensor] = None, -) -> torch.Tensor: - """ - This function computes a Mixture of Experts (MoE) layer using two sets of - weights, w1 and w2, and top-k gating mechanism. - - Parameters: - - hidden_states (torch.Tensor): The input tensor to the MoE layer. - - w1 (torch.Tensor): The first set of expert weights. - - w2 (torch.Tensor): The second set of expert weights. - - gating_output (torch.Tensor): The output of the gating operation - (before softmax). - - topk (int): The number of top-k experts to select. - - renormalize (bool): If True, renormalize the top-k weights to sum to 1. - - inplace (bool): If True, perform the operation in-place. - Defaults to False. - - activation (str): The activation function to apply after the first - MoE layer. - - is_act_and_mul (bool): If True, use activation-and-mul function for - activation (self-gated activation), otherwise use activation function - for activation (ungated activation). - - num_expert_group: Optional[int]: additional parameter for grouped_topk - - topk_group: Optional[int]: additional parameter for grouped_topk - - use_grouped_topk: If True, use grouped_topk instead of fused_topk - note: Deepseekv2 model uses grouped_topk - - use_fp8_w8a8 (bool): If True, use fp8 arithmetic to compute the inner - products for w1 and w2. Defaults to False. - - use_int8_w8a8 (bool): If True, use int8 arithmetic to compute the inner - products for w1 and w2. Defaults to False. - - use_int8_w8a16 (bool): If True, use matmul of int8 weight and bf16/fp16 - activation to compute the inner products for w1 and w2. - Defaults to False. - - use_int4_w4a16 (bool): If True, use matmul of int4 weight and bf16/fp16 - activation to compute the inner products for w1 and w2. - Defaults to False. - - use_mxfp4_w4a4 (bool): If True, use matmul of OCP MXFP4 weight and - OCP MXFP4 activation to compute the inner products for w1 and w2. - Defaults to False. - - global_num_experts (int): The total number of experts in the global - expert space. - - expert_map (Optional[torch.Tensor]): A tensor mapping expert indices - from the global expert space to the local expert space of the expert - parallel shard. - - w1_scale (Optional[torch.Tensor]): Optional scale to be used for - w1. - - w2_scale (Optional[torch.Tensor]): Optional scale to be used for - w2. - - a1_scale (Optional[torch.Tensor]): Optional scale to be used for - a1. - - a2_scale (Optional[torch.Tensor]): Optional scale to be used for - a2. - - block_shape: (Optional[list[int]]): Optional block size for block-wise - quantization. - - Returns: - - torch.Tensor: The output tensor after applying the MoE layer. - """ - if not is_act_and_mul: - assert inplace is False, ( - "is_act_and_mul=False is not supported with inplace=True") - - if use_grouped_topk: - assert num_expert_group is not None and topk_group is not None - topk_weights, topk_ids = grouped_topk(hidden_states, gating_output, - topk, renormalize, - num_expert_group, topk_group) - elif custom_routing_function is None: - topk_weights, topk_ids, token_expert_indices = fused_topk( - hidden_states, gating_output, topk, renormalize) - else: - topk_weights, topk_ids = custom_routing_function( - hidden_states, gating_output, topk, renormalize) - - return fused_experts(hidden_states, - w1, - w2, - topk_weights, - topk_ids, - inplace=inplace, - activation=activation, - is_act_and_mul=is_act_and_mul, - use_fp8_w8a8=use_fp8_w8a8, - use_int8_w8a8=use_int8_w8a8, - use_int8_w8a16=use_int8_w8a16, - use_int4_w4a16=use_int4_w4a16, - use_mxfp4_w4a4=use_mxfp4_w4a4, - per_channel_quant=per_channel_quant, - global_num_experts=global_num_experts, - expert_map=expert_map, - w1_scale=w1_scale, - w2_scale=w2_scale, - w1_zp=w1_zp, - w2_zp=w2_zp, - a1_scale=a1_scale, - a2_scale=a2_scale, - block_shape=block_shape, - w1_bias=w1_bias, - w2_bias=w2_bias) - - class TritonExperts(mk.FusedMoEPermuteExpertsUnpermute): def __init__( self, - use_fp8_w8a8: bool = False, - use_int8_w8a8: bool = False, - use_int8_w8a16: bool = False, - use_int4_w4a16: bool = False, - use_mxfp4_w4a4: bool = False, - per_act_token_quant: bool = False, - block_shape: Optional[list[int]] = None, + quant_config: FusedMoEQuantConfig, ): - super().__init__( - FusedMoEQuantConfig.make( - use_fp8_w8a8=use_fp8_w8a8, - use_int8_w8a8=use_int8_w8a8, - use_int8_w8a16=use_int8_w8a16, - use_int4_w4a16=use_int4_w4a16, - use_mxfp4_w4a4=use_mxfp4_w4a4, - per_act_token_quant=per_act_token_quant, - block_shape=block_shape, - )) - - self.use_fp8_w8a8 = use_fp8_w8a8 - self.use_int4_w4a16 = use_int4_w4a16 - self.use_int8_w8a8 = use_int8_w8a8 - self.use_int8_w8a16 = use_int8_w8a16 - self.use_mxfp4_w4a4 = use_mxfp4_w4a4 + super().__init__(quant_config) @property def activation_formats( @@ -1929,19 +1597,14 @@ def apply( activation: str, global_num_experts: int, expert_map: Optional[torch.Tensor], - w1_scale: Optional[torch.Tensor], - w2_scale: Optional[torch.Tensor], - w1_zp: Optional[torch.Tensor], - w2_zp: Optional[torch.Tensor], a1q_scale: Optional[torch.Tensor], - a2_scale: Optional[torch.Tensor], workspace13: torch.Tensor, workspace2: torch.Tensor, expert_tokens_meta: Optional[mk.ExpertTokensMetadata], apply_router_weight_on_input: bool, ): # Check constraints. - if self.use_int4_w4a16: + if self.quant_config.use_int4_w4a16: assert hidden_states.size(-1) // 2 == w1.size(2), ( "Hidden size mismatch") else: @@ -1964,17 +1627,11 @@ def apply( if global_num_experts == -1: global_num_experts = E - config_dtype = get_config_dtype_str(use_fp8_w8a8=self.use_fp8_w8a8, - use_int8_w8a16=self.use_int8_w8a16, - use_int4_w4a16=self.use_int4_w4a16, - use_mxfp4_w4a4=self.use_mxfp4_w4a4, - dtype=hidden_states.dtype) - config = try_get_optimal_moe_config( w1.size(), w2.size(), top_k_num, - config_dtype, + self.quant_config.config_name(hidden_states.dtype), num_tokens, block_shape=self.block_shape, ) @@ -2008,8 +1665,8 @@ def apply( w1, intermediate_cache1, a1q_scale, - w1_scale, - w1_zp, + self.w1_scale, + self.w1_zp, None, # topk_weights sorted_token_ids, expert_ids, @@ -2018,13 +1675,13 @@ def apply( top_k_num, config, compute_type=compute_type, - use_fp8_w8a8=self.use_fp8_w8a8, - use_int8_w8a8=self.use_int8_w8a8, - use_int8_w8a16=self.use_int8_w8a16, - use_int4_w4a16=self.use_int4_w4a16, + use_fp8_w8a8=self.quant_config.use_fp8_w8a8, + use_int8_w8a8=self.quant_config.use_int8_w8a8, + use_int8_w8a16=self.quant_config.use_int8_w8a16, + use_int4_w4a16=self.quant_config.use_int4_w4a16, per_channel_quant=self.per_act_token_quant, block_shape=self.block_shape, - B_bias=None # TODO support B_bias + B_bias=self.w1_bias, ) self.activation(activation, intermediate_cache2, @@ -2033,7 +1690,7 @@ def apply( a2q_scale: Optional[torch.Tensor] = None qintermediate_cache2, a2q_scale = moe_kernel_quantize_input( - intermediate_cache2, a2_scale, self.quant_dtype, + intermediate_cache2, self.a2_scale, self.quant_dtype, self.per_act_token_quant, self.block_shape) invoke_fused_moe_kernel( @@ -2041,8 +1698,8 @@ def apply( w2, intermediate_cache3, a2q_scale, - w2_scale, - w2_zp, + self.w2_scale, + self.w2_zp, topk_weights, sorted_token_ids, expert_ids, @@ -2051,36 +1708,21 @@ def apply( 1, config, compute_type=compute_type, - use_fp8_w8a8=self.use_fp8_w8a8, - use_int8_w8a8=self.use_int8_w8a8, - use_int8_w8a16=self.use_int8_w8a16, - use_int4_w4a16=self.use_int4_w4a16, + use_fp8_w8a8=self.quant_config.use_fp8_w8a8, + use_int8_w8a8=self.quant_config.use_int8_w8a8, + use_int8_w8a16=self.quant_config.use_int8_w8a16, + use_int4_w4a16=self.quant_config.use_int4_w4a16, per_channel_quant=self.per_act_token_quant, block_shape=self.block_shape, - B_bias=None # TODO support B_bias + B_bias=self.w2_bias, ) ops.moe_sum(intermediate_cache3, output) def modular_triton_fused_moe( - use_fp8_w8a8: bool, - use_int8_w8a8: bool, - use_int8_w8a16: bool, - use_int4_w4a16: bool, - use_mxfp4_w4a4: bool, - per_act_token_quant: bool, - block_shape: Optional[list[int]] = None, -) -> mk.FusedMoEModularKernel: + quant_config: FusedMoEQuantConfig) -> mk.FusedMoEModularKernel: return mk.FusedMoEModularKernel( MoEPrepareAndFinalizeNoEP(), - TritonExperts( - use_fp8_w8a8=use_fp8_w8a8, - use_int8_w8a8=use_int8_w8a8, - use_int8_w8a16=use_int8_w8a16, - use_int4_w4a16=use_int4_w4a16, - use_mxfp4_w4a4=use_mxfp4_w4a4, - per_act_token_quant=per_act_token_quant, - block_shape=block_shape, - ), + TritonExperts(quant_config), ) diff --git a/vllm/model_executor/layers/fused_moe/gpt_oss_triton_kernels_moe.py b/vllm/model_executor/layers/fused_moe/gpt_oss_triton_kernels_moe.py index 312befe2c1d7..614a83ad1158 100644 --- a/vllm/model_executor/layers/fused_moe/gpt_oss_triton_kernels_moe.py +++ b/vllm/model_executor/layers/fused_moe/gpt_oss_triton_kernels_moe.py @@ -1,11 +1,13 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -from typing import TYPE_CHECKING, Optional +from typing import Optional import torch import vllm.model_executor.layers.fused_moe.modular_kernel as mk from vllm.logger import init_logger +from vllm.model_executor.layers.fused_moe.config import ( + FUSED_MOE_UNQUANTIZED_CONFIG, FusedMoEQuantConfig) from vllm.model_executor.layers.fused_moe.topk_weight_and_reduce import ( TopKWeightAndReduceDelegate) from vllm.utils import has_triton_kernels @@ -23,9 +25,6 @@ "Failed to import Triton kernels. Please make sure your triton " "version is compatible.") -if TYPE_CHECKING: - from triton_kernels.matmul_ogs import PrecisionConfig - def triton_kernel_moe_forward( hidden_states: torch.Tensor, @@ -35,20 +34,10 @@ def triton_kernel_moe_forward( topk: int, renormalize: bool, activation: str = "silu", + quant_config: Optional[FusedMoEQuantConfig] = None, apply_router_weight_on_input: bool = False, - use_fp8_w8a8: bool = False, - per_channel_quant: bool = False, global_num_experts: int = -1, expert_map: Optional[torch.Tensor] = None, - w1_scale: Optional[torch.Tensor] = None, - w2_scale: Optional[torch.Tensor] = None, - w1_bias: Optional[torch.Tensor] = None, - w2_bias: Optional[torch.Tensor] = None, - w1_precision: Optional["PrecisionConfig"] = None, - w2_precision: Optional["PrecisionConfig"] = None, - a1_scale: Optional[torch.Tensor] = None, - a2_scale: Optional[torch.Tensor] = None, - block_shape: Optional[list[int]] = None, ) -> torch.Tensor: routing_data, gather_idx, scatter_idx = routing(gating_output, @@ -64,20 +53,10 @@ def triton_kernel_moe_forward( gather_idx, scatter_idx, activation=activation, + quant_config=quant_config, apply_router_weight_on_input=apply_router_weight_on_input, - use_fp8_w8a8=use_fp8_w8a8, - per_channel_quant=per_channel_quant, global_num_experts=global_num_experts, - expert_map=expert_map, - w1_scale=w1_scale, - w2_scale=w2_scale, - w1_bias=w1_bias, - w2_bias=w2_bias, - w1_precision=w1_precision, - w2_precision=w2_precision, - a1_scale=a1_scale, - a2_scale=a2_scale, - block_shape=block_shape) + expert_map=expert_map) # This is a triton implementation of the fused_experts function @@ -90,28 +69,23 @@ def triton_kernel_fused_experts( gather_indx, # GatherIndx scatter_indx, # ScatterIndx activation: str = "silu", + quant_config: Optional[FusedMoEQuantConfig] = None, swiglu_alpha: float = 1.702, swiglu_limit: float = 7.0, apply_router_weight_on_input: bool = False, - use_fp8_w8a8: bool = False, - per_channel_quant: bool = False, global_num_experts: int = -1, expert_map: Optional[torch.Tensor] = None, - w1_scale: Optional[torch.Tensor] = None, - w2_scale: Optional[torch.Tensor] = None, - w1_bias: Optional[torch.Tensor] = None, - w2_bias: Optional[torch.Tensor] = None, - w1_precision: Optional["PrecisionConfig"] = None, - w2_precision: Optional["PrecisionConfig"] = None, - a1_scale: Optional[torch.Tensor] = None, - a2_scale: Optional[torch.Tensor] = None, - block_shape: Optional[list[int]] = None, + a1q_scale: Optional[torch.Tensor] = None, ) -> torch.Tensor: + if quant_config is None: + quant_config = FUSED_MOE_UNQUANTIZED_CONFIG # type check, uint8 means mxfp4 assert hidden_states.dtype == torch.bfloat16 - assert w1_bias is None or w1_bias.dtype == torch.float32 - assert w2_bias is None or w2_bias.dtype == torch.float32 + assert (quant_config.w1_bias is None + or quant_config.w1_bias.dtype == torch.float32) + assert (quant_config.w2_bias is None + or quant_config.w2_bias.dtype == torch.float32) # Shape check, only check non-mxfp4 assert hidden_states.shape[-1] == w1.shape[-2] @@ -130,20 +104,20 @@ def triton_kernel_fused_experts( intermediate_cache1 = matmul_ogs( hidden_states, w1, - w1_bias, + quant_config.w1_bias, routing_data, gather_indx=gather_indx, - precision_config=w1_precision, + precision_config=quant_config.w1_precision, gammas=gammas if apply_router_weight_on_input else None, fused_activation=act) intermediate_cache3 = matmul_ogs( intermediate_cache1, w2, - w2_bias, + quant_config.w2_bias, routing_data, scatter_indx=scatter_indx, - precision_config=w2_precision, + precision_config=quant_config.w2_precision, gammas=None if apply_router_weight_on_input else gammas, y=output_tensor, ) @@ -154,21 +128,13 @@ class BatchedOAITritonExperts(mk.FusedMoEPermuteExpertsUnpermute): def __init__( self, - quant_config, max_num_tokens: int, num_dispatchers: int, - w1_precision: "PrecisionConfig", - w2_precision: "PrecisionConfig", - w1_bias: Optional[torch.Tensor], - w2_bias: Optional[torch.Tensor], + quant_config: FusedMoEQuantConfig, ): super().__init__(quant_config) self.max_num_tokens = max_num_tokens self.num_dispatchers = num_dispatchers - self.w1_precision = w1_precision - self.w2_precision = w2_precision - self.w1_bias = w1_bias - self.w2_bias = w2_bias @property def activation_formats( @@ -212,12 +178,7 @@ def apply( activation: str, global_num_experts: int, expert_map: Optional[torch.Tensor], - w1_scale: Optional[torch.Tensor], - w2_scale: Optional[torch.Tensor], - w1_zp: Optional[torch.Tensor], - w2_zp: Optional[torch.Tensor], a1q_scale: Optional[torch.Tensor], - a2_scale: Optional[torch.Tensor], workspace13: torch.Tensor, workspace2: torch.Tensor, expert_tokens_meta: Optional[mk.ExpertTokensMetadata], @@ -228,20 +189,12 @@ def apply( hidden_states, w1, w2, - None, - None, - None, + routing_data=None, + gather_indx=None, + scatter_indx=None, activation=activation, + quant_config=self.quant_config, apply_router_weight_on_input=False, - use_fp8_w8a8=False, - per_channel_quant=False, global_num_experts=global_num_experts, expert_map=expert_map, - w1_scale=w1_scale, - w2_scale=w2_scale, - w1_bias=self.w1_bias, - w2_bias=self.w2_bias, - w1_precision=self.w1_precision, - w2_precision=self.w2_precision, - a1_scale=a1q_scale, - a2_scale=a2_scale) + a1q_scale=a1q_scale) diff --git a/vllm/model_executor/layers/fused_moe/layer.py b/vllm/model_executor/layers/fused_moe/layer.py index d22bb253f4a7..ae3b67a2b84e 100644 --- a/vllm/model_executor/layers/fused_moe/layer.py +++ b/vllm/model_executor/layers/fused_moe/layer.py @@ -22,7 +22,8 @@ from vllm.model_executor.custom_op import CustomOp # yapf: disable from vllm.model_executor.layers.fused_moe.config import ( - FusedMoEConfig, FusedMoEParallelConfig) + FUSED_MOE_UNQUANTIZED_CONFIG, FusedMoEConfig, FusedMoEParallelConfig, + FusedMoEQuantConfig, biased_moe_quant_config) # yapf: enable from vllm.model_executor.layers.fused_moe.modular_kernel import ( FusedMoEActivationFormat, FusedMoEModularKernel, @@ -78,11 +79,11 @@ class FusedMoeWeightScaleSupported(Enum): class FusedMoEMethodBase(QuantizeMethodBase): - # TODO(bnell): also pass quant_config? def __init__(self, moe: FusedMoEConfig): super().__init__() self.moe = moe - self.fused_experts: Optional[Callable] = None + self.moe_quant_config: Optional[FusedMoEQuantConfig] = None + self.fused_experts: Optional[FusedMoEModularKernel] = None self.topk_indices_dtype = None @abstractmethod @@ -103,23 +104,28 @@ def uses_weight_scale_2_pattern(self) -> bool: @staticmethod def _maybe_make_prepare_finalize( - moe: FusedMoEConfig, ) -> Optional[FusedMoEPrepareAndFinalize]: + moe: FusedMoEConfig, + quant_config: Optional[FusedMoEQuantConfig], + ) -> Optional[FusedMoEPrepareAndFinalize]: all2all_manager = get_ep_group().device_communicator.all2all_manager assert all2all_manager is not None prepare_finalize: Optional[FusedMoEPrepareAndFinalize] = None + # TODO: could allow this now assert not moe.use_flashinfer_cutlass_kernels, \ "Must be created in modelopt.py" if moe.use_pplx_kernels: + assert quant_config is not None + hidden_dim_bytes, hidden_scale_bytes = pplx_hidden_dim_scale_bytes( moe.max_num_tokens, moe.hidden_dim, moe.in_dtype, - moe.quant_dtype, - per_act_token_quant=moe.per_act_token_quant, - block_shape=moe.block_shape, + quant_config.quant_dtype, + per_act_token_quant=quant_config.per_act_token_quant, + block_shape=quant_config.block_shape, ) all_to_all_args = dict( @@ -165,6 +171,7 @@ def _maybe_make_prepare_finalize( ) elif moe.use_deepep_ll_kernels: + assert quant_config is not None all_to_all_args = dict( max_num_tokens_per_dp_rank=moe.max_num_tokens, token_hidden_size=moe.hidden_dim, @@ -174,13 +181,11 @@ def _maybe_make_prepare_finalize( all2all_manager.world_size) handle = all2all_manager.get_handle(all_to_all_args) - # Note : We may want to use FP8 dispatch even otherwise just to - # reduce datamovement - use_fp8_dispatch = (moe.quant_config is not None - and moe.quant_config.quant_dtype - == current_platform.fp8_dtype() - and moe.quant_config.block_shape - == DEEPEP_QUANT_BLOCK_SHAPE) + # Note: We may want to use FP8 dispatch just to reduce + # data movement. + use_fp8_dispatch = ( + quant_config.quant_dtype == current_platform.fp8_dtype() + and quant_config.block_shape == DEEPEP_QUANT_BLOCK_SHAPE) prepare_finalize = DeepEPLLPrepareAndFinalize( handle, @@ -192,11 +197,10 @@ def _maybe_make_prepare_finalize( return prepare_finalize def maybe_make_prepare_finalize( - self, - moe: FusedMoEConfig, - ) -> Optional[FusedMoEPrepareAndFinalize]: - if moe.moe_parallel_config.use_all2all_kernels: - return FusedMoEMethodBase._maybe_make_prepare_finalize(moe) + self) -> Optional[FusedMoEPrepareAndFinalize]: + if self.moe.moe_parallel_config.use_all2all_kernels: + return FusedMoEMethodBase._maybe_make_prepare_finalize( + self.moe, self.moe_quant_config) else: return None @@ -204,7 +208,13 @@ def maybe_make_prepare_finalize( # prepare_communication_buffer_for_model. def init_prepare_finalize(self, layer: torch.nn.Module): assert self.moe is not None - prepare_finalize = self.maybe_make_prepare_finalize(self.moe) + + # We must get the quant config here so that the layer is + # completely initialized, i.e. all weights loaded and post + # processed. + self.moe_quant_config = self.get_fused_moe_quant_config(layer) + + prepare_finalize = self.maybe_make_prepare_finalize() if prepare_finalize is not None: logger.debug("%s for %s(%s)", prepare_finalize.__class__.__name__, @@ -213,7 +223,7 @@ def init_prepare_finalize(self, layer: torch.nn.Module): assert self.fused_experts is None, \ f"Attempt to override experts for {id(self)}!" self.topk_indices_dtype = prepare_finalize.topk_indices_dtype() - experts = self.select_gemm_impl(prepare_finalize, self.moe, layer) + experts = self.select_gemm_impl(prepare_finalize, layer) self.fused_experts = FusedMoEModularKernel( prepare_finalize, experts, @@ -223,7 +233,6 @@ def init_prepare_finalize(self, layer: torch.nn.Module): def select_gemm_impl( self, prepare_finalize: FusedMoEPrepareAndFinalize, - moe: FusedMoEConfig, layer: torch.nn.Module, ) -> FusedMoEPermuteExpertsUnpermute: # based on the all2all implementation, select the appropriate @@ -232,6 +241,11 @@ def select_gemm_impl( f"{self.__class__.__name__} must select appropriate gemm " "implementation based on the prepare_finalize") + @abstractmethod + def get_fused_moe_quant_config( + self, layer: torch.nn.Module) -> Optional[FusedMoEQuantConfig]: + raise NotImplementedError + @abstractmethod def apply( self, @@ -265,7 +279,6 @@ class UnquantizedFusedMoEMethod(FusedMoEMethodBase, CustomOp): def __init__(self, moe: FusedMoEConfig): super().__init__(moe) - self.has_bias = self.moe.has_bias self.rocm_aiter_moe_enabled = is_rocm_aiter_moe_enabled() if self.rocm_aiter_moe_enabled: from .rocm_aiter_fused_moe import rocm_aiter_fused_experts @@ -273,23 +286,30 @@ def __init__(self, moe: FusedMoEConfig): else: self.rocm_aiter_fused_experts = None # type: ignore + def maybe_make_prepare_finalize( + self) -> Optional[FusedMoEPrepareAndFinalize]: + if self.rocm_aiter_moe_enabled: + return None + else: + return super().maybe_make_prepare_finalize() + def select_gemm_impl( self, prepare_finalize: FusedMoEPrepareAndFinalize, - # TODO(bnell): Remove. Every layer should have an moe config object. - moe: FusedMoEConfig, layer: torch.nn.Module, ) -> FusedMoEPermuteExpertsUnpermute: + assert self.moe_quant_config is not None if (prepare_finalize.activation_format == FusedMoEActivationFormat.BatchedExperts): logger.debug("BatchedTritonExperts %s", self.moe) return BatchedTritonExperts( max_num_tokens=self.moe.max_num_tokens, num_dispatchers=prepare_finalize.num_dispatchers(), + quant_config=self.moe_quant_config, ) else: logger.debug("TritonExperts %s", self.moe) - return TritonExperts() + return TritonExperts(self.moe_quant_config) def create_weights(self, layer: torch.nn.Module, num_experts: int, hidden_size: int, intermediate_size_per_partition: int, @@ -303,7 +323,7 @@ def create_weights(self, layer: torch.nn.Module, num_experts: int, requires_grad=False) layer.register_parameter("w13_weight", w13_weight) set_weight_attrs(w13_weight, extra_weight_attrs) - if self.has_bias: + if self.moe.has_bias: w13_bias = torch.nn.Parameter(torch.zeros( num_experts, 2 * intermediate_size_per_partition, @@ -320,7 +340,7 @@ def create_weights(self, layer: torch.nn.Module, num_experts: int, requires_grad=False) layer.register_parameter("w2_weight", w2_weight) set_weight_attrs(w2_weight, extra_weight_attrs) - if self.has_bias: + if self.moe.has_bias: w2_bias = torch.nn.Parameter(torch.zeros(num_experts, hidden_size, dtype=params_dtype), @@ -442,6 +462,16 @@ def apply( logical_replica_count=logical_replica_count, ) + def get_fused_moe_quant_config( + self, layer: torch.nn.Module) -> Optional[FusedMoEQuantConfig]: + if self.moe.has_bias: + return biased_moe_quant_config( + layer.w13_bias, + layer.w2_bias, + ) + else: + return FUSED_MOE_UNQUANTIZED_CONFIG + def forward_cuda( self, layer: torch.nn.Module, @@ -486,6 +516,7 @@ def forward_cuda( logical_replica_count=logical_replica_count) if self.rocm_aiter_moe_enabled: + assert self.fused_experts is None return self.rocm_aiter_fused_experts( hidden_states=x, w1=layer.w13_weight, @@ -496,7 +527,7 @@ def forward_cuda( activation=activation, apply_router_weight_on_input=apply_router_weight_on_input) elif self.fused_experts is not None: - if self.has_bias: + if self.moe.has_bias: raise ValueError( "FusedMoEModularKernel does not support bias.") return self.fused_experts( @@ -517,12 +548,11 @@ def forward_cuda( hidden_states=x, w1=layer.w13_weight, w2=layer.w2_weight, - w1_bias=layer.w13_bias if self.has_bias else None, - w2_bias=layer.w2_bias if self.has_bias else None, topk_weights=topk_weights, topk_ids=topk_ids, inplace=True, activation=activation, + quant_config=self.moe_quant_config, apply_router_weight_on_input=apply_router_weight_on_input, global_num_experts=global_num_experts, expert_map=expert_map, @@ -933,16 +963,18 @@ def __init__( # since model_config is not set in the pytest test. model_dtype = params_dtype - moe = FusedMoEConfig.make(num_experts=self.global_num_experts, - experts_per_token=top_k, - hidden_dim=hidden_size, - num_local_experts=self.local_num_experts, - moe_parallel_config=self.moe_parallel_config, - in_dtype=model_dtype, - max_num_tokens=envs.VLLM_MOE_DP_CHUNK_SIZE, - quant_config=quant_config, - has_bias=has_bias) + moe = FusedMoEConfig( + num_experts=self.global_num_experts, + experts_per_token=top_k, + hidden_dim=hidden_size, + num_local_experts=self.local_num_experts, + moe_parallel_config=self.moe_parallel_config, + in_dtype=model_dtype, + max_num_tokens=envs.VLLM_MOE_DP_CHUNK_SIZE, + has_bias=has_bias, + ) self.moe_config = moe + self.moe_quant_config: Optional[FusedMoEQuantConfig] = None self.quant_config = quant_config # Note: get_quant_method will look at the layer's local_num_experts @@ -990,6 +1022,9 @@ def __init__( # Chunked all2all staging tensor self.batched_hidden_states: Optional[torch.Tensor] = None self.batched_router_logits: Optional[torch.Tensor] = None + + # TODO(bnell): flashinfer uses non-batched format. + # Does it really need a batched buffer? if (self.moe_parallel_config.use_pplx_kernels or self.moe_parallel_config.use_deepep_ll_kernels or self.moe_config.use_flashinfer_cutlass_kernels): @@ -1062,7 +1097,9 @@ def use_deepep_ll_kernels(self): @property def use_flashinfer_cutlass_kernels(self): - return self.moe_config.use_flashinfer_cutlass_kernels + return (self.moe_quant_config is not None + and self.moe_quant_config.quant_dtype == "nvfp4" + and self.moe_config.use_flashinfer_cutlass_kernels) def update_expert_map(self): # ep_size and ep_rank should already be updated @@ -1492,6 +1529,11 @@ def set_eplb_state( self.logical_to_physical_map = logical_to_physical_map[moe_layer_idx] self.logical_replica_count = logical_replica_count[moe_layer_idx] + def ensure_moe_quant_config(self): + if self.quant_method.moe_quant_config is None: + self.quant_method.moe_quant_config = ( + self.quant_method.get_fused_moe_quant_config(self)) + @staticmethod def select_experts( hidden_states: torch.Tensor, @@ -1711,6 +1753,8 @@ def forward_impl_chunked( assert ( self.batched_router_logits.size(-1) == full_router_logits.size(-1)) + self.ensure_moe_quant_config() + full_fused_final_hidden_states = torch.empty_like(full_hidden_states) if self.shared_experts is not None: full_shared_final_hidden_states = torch.empty_like( @@ -1825,14 +1869,17 @@ def forward_impl( router_logits: torch.Tensor, ) -> Union[torch.Tensor, tuple[torch.Tensor, torch.Tensor]]: assert self.quant_method is not None + + self.ensure_moe_quant_config() + # Route to the chunked forward path using the FlashInfer Cutlass kernel # only when data parallelism (DP) is enabled. - use_flashinfer_cutlass_kernels = ( - self.dp_size > 1 - and self.moe_config.use_flashinfer_cutlass_kernels) + _use_flashinfer_cutlass_kernels = (self.dp_size > 1 and + self.use_flashinfer_cutlass_kernels) + if (self.moe_parallel_config.use_pplx_kernels or self.moe_parallel_config.use_deepep_ll_kernels - or use_flashinfer_cutlass_kernels): + or _use_flashinfer_cutlass_kernels): return self.forward_impl_chunked(hidden_states, router_logits) do_naive_dispatch_combine: bool = ( diff --git a/vllm/model_executor/layers/fused_moe/modular_kernel.py b/vllm/model_executor/layers/fused_moe/modular_kernel.py index efaa9cc058e4..58cd0294c8c4 100644 --- a/vllm/model_executor/layers/fused_moe/modular_kernel.py +++ b/vllm/model_executor/layers/fused_moe/modular_kernel.py @@ -177,8 +177,6 @@ class FusedMoEPrepareAndFinalize(ABC): def prepare( self, a1: torch.Tensor, - a1_scale: Optional[torch.Tensor], - a2_scale: Optional[torch.Tensor], topk_weights: torch.Tensor, topk_ids: torch.Tensor, num_experts: int, @@ -189,9 +187,6 @@ def prepare( """ Perform any quantization (and/or) dispatching needed for this kernel. - a1: The (unquantized) input to the MoE layer. - - a1_scale: Optional scales for a1 - - a2_scale: Optional scales for the second MoE gemm. Required to make - sure the quantization is consistent for both gemms. - topk_ids: The topk ids. - topk_weights: The topk weights. - num_experts: The total number of experts in the global expert space. @@ -199,10 +194,11 @@ def prepare( space to the local expert space of the expert parallel shard. - apply_router_weight_on_input: When True, apply the weights to the activations, before quantization + dispatching. + - quant_config: Quantization info provided by the fused experts. Returns a tuple of: - quantized + dispatched a. - - quantized + dispatched a1_scales. + - Optional quantized + dispatched a1_scales. - Optional ExpertTokensMetadata containing gpu/cpu tensors as big as the number of local experts with the information about the number of tokens assigned to each local expert. @@ -220,8 +216,6 @@ def supports_async(self) -> bool: def prepare_async( self, a1: torch.Tensor, - a1_scale: Optional[torch.Tensor], - a2_scale: Optional[torch.Tensor], topk_weights: torch.Tensor, topk_ids: torch.Tensor, num_experts: int, @@ -316,6 +310,7 @@ def num_dispatchers(self) -> int: raise NotImplementedError +# TODO: add supported activations method (return string) class FusedMoEPermuteExpertsUnpermute(ABC): """ An abstract base class for the [Permute-Experts-Unpermute] step described @@ -324,12 +319,12 @@ class FusedMoEPermuteExpertsUnpermute(ABC): def __init__( self, - quant_config: Optional[FusedMoEQuantConfig], + quant_config: FusedMoEQuantConfig, ): - if quant_config is not None: - self.quant_config = quant_config - else: - self.quant_config = FusedMoEQuantConfig() + """ + quant_config: Quantization parameters for this experts instance. + """ + self.quant_config = quant_config @property @abstractmethod @@ -341,6 +336,11 @@ def activation_formats( """ raise NotImplementedError + # + # Various helpers for accessing quantization parameters from the + # quant_config. + # + @property def quant_dtype(self) -> Optional[torch.dtype]: return self.quant_config.quant_dtype @@ -357,6 +357,54 @@ def per_act_token_quant(self) -> bool: def per_out_ch_quant(self) -> bool: return self.quant_config.per_out_ch_quant + @property + def a1_scale(self) -> Optional[torch.Tensor]: + return self.quant_config.a1_scale + + @property + def a2_scale(self) -> Optional[torch.Tensor]: + return self.quant_config.a2_scale + + @property + def a1_gscale(self) -> Optional[torch.Tensor]: + return self.quant_config.a1_gscale + + @property + def a2_gscale(self) -> Optional[torch.Tensor]: + return self.quant_config.a2_gscale + + @property + def w1_scale(self) -> Optional[torch.Tensor]: + return self.quant_config.w1_scale + + @property + def w2_scale(self) -> Optional[torch.Tensor]: + return self.quant_config.w2_scale + + @property + def w1_zp(self) -> Optional[torch.Tensor]: + return self.quant_config.w1_zp + + @property + def w2_zp(self) -> Optional[torch.Tensor]: + return self.quant_config.w2_zp + + @property + def w1_bias(self) -> Optional[torch.Tensor]: + return self.quant_config.w1_bias + + @property + def w2_bias(self) -> Optional[torch.Tensor]: + return self.quant_config.w2_bias + + @property + def g1_alphas(self) -> Optional[torch.Tensor]: + return self.quant_config.g1_alphas + + @property + def g2_alphas(self) -> Optional[torch.Tensor]: + return self.quant_config.g2_alphas + # TODO (bnell): make this return a CHUNK_SIZE or None instead? @abstractmethod def supports_chunking(self) -> bool: @@ -433,12 +481,7 @@ def apply( activation: str, global_num_experts: int, expert_map: Optional[torch.Tensor], - w1_scale: Optional[torch.Tensor], - w2_scale: Optional[torch.Tensor], - w1_zp: Optional[torch.Tensor], - w2_zp: Optional[torch.Tensor], a1q_scale: Optional[torch.Tensor], - a2_scale: Optional[torch.Tensor], workspace13: torch.Tensor, workspace2: torch.Tensor, expert_tokens_meta: Optional[ExpertTokensMetadata], @@ -455,7 +498,7 @@ def apply( - w1 (torch.Tensor): The first set of expert weights. - w2 (torch.Tensor): The second set of expert weights. - topk_weights: A map of row to expert weights. Some implementations - choose to do weight application. + choose to do weight application. - topk_ids (torch.Tensor): A map of row to expert id. - activation (str): The activation function to apply after the first MoE layer. @@ -464,15 +507,9 @@ def apply( - expert_map (Optional[torch.Tensor]): A tensor mapping expert indices from the global expert space to the local expert space of the expert parallel shard. - - w1_scale (Optional[torch.Tensor]): Optional scale to be used for w1. - - w2_scale (Optional[torch.Tensor]): Optional scale to be used for w2. - - w1_zp (Optional[torch.Tensor]): Optional zero points to be used for - w1. - - w2_zp (Optional[torch.Tensor]): Optional zero points to be used for - w2. - a1q_scale (Optional[torch.Tensor]): Optional quantized scale to be - used for a1. - - a2_scale (Optional[torch.Tensor]): Optional scale to be used for a2. + used for a1. Result of quantization from prepare/finalize and not + from the FusedMoEQuantConfig. - workspace13 (torch.Tensor): A scratch tensor used for gemm outputs must be large enough to hold output of either MoE gemm. - workspace2 (torch.Tensor): A scratch tensor used for the activation @@ -559,12 +596,7 @@ def _do_fused_experts( global_num_experts: int, local_num_experts: int, expert_map: Optional[torch.Tensor], - w1_scale: Optional[torch.Tensor], - w2_scale: Optional[torch.Tensor], - w1_zp: Optional[torch.Tensor], - w2_zp: Optional[torch.Tensor], a1q_scale: Optional[torch.Tensor], - a2_scale: Optional[torch.Tensor], expert_tokens_meta: Optional[ExpertTokensMetadata], apply_router_weight_on_input: bool, ) -> torch.Tensor: @@ -601,12 +633,7 @@ def _do_fused_experts( activation=activation, global_num_experts=global_num_experts, expert_map=expert_map, - w1_scale=w1_scale, - w2_scale=w2_scale, - w1_zp=w1_zp, - w2_zp=w2_zp, a1q_scale=a1q_scale, - a2_scale=a2_scale, workspace13=workspace13, workspace2=workspace2, expert_tokens_meta=expert_tokens_meta, @@ -627,12 +654,7 @@ def _maybe_chunk_fused_experts( global_num_experts: int, local_num_experts: int, expert_map: Optional[torch.Tensor], - w1_scale: Optional[torch.Tensor], - w2_scale: Optional[torch.Tensor], - w1_zp: Optional[torch.Tensor], - w2_zp: Optional[torch.Tensor], a1q_scale: Optional[torch.Tensor], - a2_scale: Optional[torch.Tensor], expert_tokens_meta: Optional[ExpertTokensMetadata], apply_router_weight_on_input: bool, ) -> torch.Tensor: @@ -658,12 +680,7 @@ def _maybe_chunk_fused_experts( global_num_experts=global_num_experts, local_num_experts=local_num_experts, expert_map=expert_map, - w1_scale=w1_scale, - w2_scale=w2_scale, - w1_zp=w1_zp, - w2_zp=w2_zp, a1q_scale=a1q_scale, - a2_scale=a2_scale, expert_tokens_meta=expert_tokens_meta, apply_router_weight_on_input=apply_router_weight_on_input, ) @@ -685,9 +702,13 @@ def slice_input_tensors( Optional[torch.Tensor], torch.Tensor, torch.Tensor]: s = chunk_idx * CHUNK_SIZE e = min(s + CHUNK_SIZE, M) - return (a1q[s:e], _chunk_scales(a1q_scale, s, e), - _chunk_scales(a2_scale, s, - e), topk_ids[s:e], topk_weights[s:e]) + return ( + a1q[s:e], + _chunk_scales(a1q_scale, s, e), + _chunk_scales(self.fused_experts.a2_scale, s, e), + topk_ids[s:e], + topk_weights[s:e], + ) def slice_output_tensor(chunk_idx: int) -> torch.Tensor: assert fused_out.size(0) % M == 0, ( @@ -744,12 +765,7 @@ def slice_expert_tokens_metadata( global_num_experts=global_num_experts, local_num_experts=local_num_experts, expert_map=expert_map, - w1_scale=w1_scale, - w2_scale=w2_scale, - w1_zp=w1_zp, - w2_zp=w2_zp, a1q_scale=c_a1q_scale, - a2_scale=c_a2_scale, expert_tokens_meta=c_expert_tokens_meta, apply_router_weight_on_input=apply_router_weight_on_input, ) @@ -767,12 +783,6 @@ def forward( activation: str = "silu", global_num_experts: int = -1, expert_map: Optional[torch.Tensor] = None, - w1_scale: Optional[torch.Tensor] = None, - w2_scale: Optional[torch.Tensor] = None, - w1_zp: Optional[torch.Tensor] = None, - w2_zp: Optional[torch.Tensor] = None, - a1_scale: Optional[torch.Tensor] = None, - a2_scale: Optional[torch.Tensor] = None, apply_router_weight_on_input: bool = False, ) -> Union[torch.Tensor, tuple[torch.Tensor, torch.Tensor]]: """ @@ -795,14 +805,6 @@ def forward( - expert_map (Optional[torch.Tensor]): A tensor mapping expert indices from the global expert space to the local expert space of the expert parallel shard. - - w1_scale (Optional[torch.Tensor]): Optional scale to be used for w1. - - w2_scale (Optional[torch.Tensor]): Optional scale to be used for w2. - - w1_zp (Optional[torch.Tensor]): Optional zero points to be used for - w1. - - w2_zp (Optional[torch.Tensor]): Optional zero points to be used for - w2. - - a1_scale (Optional[torch.Tensor]): Optional scale to be used for a1. - - a2_scale (Optional[torch.Tensor]): Optional scale to be used for a2. - apply_router_weight_on_input (bool): When true, the topk weights are applied directly on the inputs. This is only applicable when topk is 1. @@ -832,8 +834,6 @@ def forward( (a1q, a1q_scale, expert_tokens_meta, _expert_topk_ids, _expert_topk_weights) = self.prepare_finalize.prepare( a1, - a1_scale, - a2_scale, topk_weights, topk_ids, global_num_experts, @@ -846,8 +846,6 @@ def forward( dbo_maybe_run_recv_hook() hook, receiver = self.prepare_finalize.prepare_async( a1, - a1_scale, - a2_scale, topk_weights, topk_ids, global_num_experts, @@ -897,12 +895,7 @@ def forward( global_num_experts=global_num_experts, local_num_experts=local_num_experts, expert_map=expert_map, - w1_scale=w1_scale, - w2_scale=w2_scale, - w1_zp=w1_zp, - w2_zp=w2_zp, a1q_scale=a1q_scale, - a2_scale=a2_scale, expert_tokens_meta=expert_tokens_meta, apply_router_weight_on_input=apply_router_weight_on_input, ) diff --git a/vllm/model_executor/layers/fused_moe/pplx_prepare_finalize.py b/vllm/model_executor/layers/fused_moe/pplx_prepare_finalize.py index b8c1c14317c4..32d12476dd01 100644 --- a/vllm/model_executor/layers/fused_moe/pplx_prepare_finalize.py +++ b/vllm/model_executor/layers/fused_moe/pplx_prepare_finalize.py @@ -95,8 +95,6 @@ def supports_async(self) -> bool: def prepare_async( self, a1: torch.Tensor, - a1_scale: Optional[torch.Tensor], - a2_scale: Optional[torch.Tensor], topk_weights: torch.Tensor, topk_ids: torch.Tensor, num_experts: int, @@ -130,8 +128,10 @@ def prepare_async( repeat_cols = 4 repeat_rows = 1 if quant_config.per_act_token_quant else a1.size(0) + # TODO(bnell): always pass quant_config.a1_scale? a1q, a1q_scale = moe_kernel_quantize_input( - a1, (None if quant_config.per_act_token_quant else a1_scale), + a1, (None if quant_config.per_act_token_quant else + quant_config.a1_scale), quant_dtype=quant_config.quant_dtype, per_act_token_quant=quant_config.per_act_token_quant, block_shape=quant_config.block_shape) @@ -253,8 +253,6 @@ def _receiver( def prepare( self, a1: torch.Tensor, - a1_scale: Optional[torch.Tensor], - a2_scale: Optional[torch.Tensor], topk_weights: torch.Tensor, topk_ids: torch.Tensor, num_experts: int, @@ -264,8 +262,6 @@ def prepare( ) -> mk.PrepareResultType: hook, receiver = self.prepare_async( a1, - a1_scale, - a2_scale, topk_weights, topk_ids, num_experts, diff --git a/vllm/model_executor/layers/fused_moe/prepare_finalize.py b/vllm/model_executor/layers/fused_moe/prepare_finalize.py index bd9f7d4a06b1..588e5de865dd 100644 --- a/vllm/model_executor/layers/fused_moe/prepare_finalize.py +++ b/vllm/model_executor/layers/fused_moe/prepare_finalize.py @@ -30,8 +30,6 @@ def num_dispatchers(self) -> int: def prepare( self, a1: torch.Tensor, - a1_scale: Optional[torch.Tensor], - a2_scale: Optional[torch.Tensor], topk_weights: torch.Tensor, topk_ids: torch.Tensor, num_experts: int, @@ -48,7 +46,7 @@ def prepare( a1.mul_(topk_weights.to(a1.dtype)) a1q, a1q_scale = moe_kernel_quantize_input( - a1, a1_scale, quant_config.quant_dtype, + a1, quant_config.a1_scale, quant_config.quant_dtype, quant_config.per_act_token_quant, quant_config.block_shape) return a1q, a1q_scale, None, None, None diff --git a/vllm/model_executor/layers/fused_moe/rocm_aiter_fused_moe.py b/vllm/model_executor/layers/fused_moe/rocm_aiter_fused_moe.py index 13c3ab4f06dd..f4972ff5f9cb 100644 --- a/vllm/model_executor/layers/fused_moe/rocm_aiter_fused_moe.py +++ b/vllm/model_executor/layers/fused_moe/rocm_aiter_fused_moe.py @@ -7,6 +7,8 @@ import torch from vllm import envs +from vllm.model_executor.layers.fused_moe.config import ( + FUSED_MOE_UNQUANTIZED_CONFIG, FusedMoEQuantConfig) from vllm.platforms import current_platform from vllm.utils import direct_register_custom_op @@ -305,21 +307,18 @@ def rocm_aiter_grouped_topk( def rocm_aiter_fused_experts( - hidden_states: torch.Tensor, - w1: torch.Tensor, - w2: torch.Tensor, - topk_weights: torch.Tensor, - topk_ids: torch.Tensor, - activation: str = "silu", - apply_router_weight_on_input: bool = False, - use_fp8_w8a8: bool = False, - per_channel_quant: bool = False, - w1_scale: Optional[torch.Tensor] = None, - w2_scale: Optional[torch.Tensor] = None, - a1_scale: Optional[torch.Tensor] = None, - a2_scale: Optional[torch.Tensor] = None, - block_shape: Optional[list[int]] = None, - expert_map: Optional[torch.Tensor] = None) -> torch.Tensor: + hidden_states: torch.Tensor, + w1: torch.Tensor, + w2: torch.Tensor, + topk_weights: torch.Tensor, + topk_ids: torch.Tensor, + activation: str = "silu", + apply_router_weight_on_input: bool = False, + expert_map: Optional[torch.Tensor] = None, + quant_config: Optional[FusedMoEQuantConfig] = None, +) -> torch.Tensor: + if quant_config is None: + quant_config = FUSED_MOE_UNQUANTIZED_CONFIG activation_method = (ActivationMethod.SILU if activation == "silu" else ActivationMethod.GELU) @@ -333,7 +332,8 @@ def rocm_aiter_fused_experts( expert_mask = None # w8a8 per-channel quantization - if per_channel_quant and apply_router_weight_on_input and use_fp8_w8a8: + if (quant_config.per_act_token_quant and apply_router_weight_on_input + and quant_config.use_fp8_w8a8): # AITER tkw1 kernel for FP8 models with `apply_router_weight_on_input` # This applies topk_weights on the GEMM output of the first FC layer # rather than the second FC. @@ -349,8 +349,8 @@ def rocm_aiter_fused_experts( w2, topk_weights, topk_ids, - fc1_scale=w1_scale, - fc2_scale=w2_scale, + fc1_scale=quant_config.w1_scale, + fc2_scale=quant_config.w2_scale, fc1_smooth_scale=None, fc2_smooth_scale=None, a16=False, @@ -362,14 +362,14 @@ def rocm_aiter_fused_experts( quant_method = QuantMethod.NO.value # w8a8 block-scaled - if block_shape is not None and use_fp8_w8a8: + if quant_config.block_shape is not None and quant_config.use_fp8_w8a8: assert not apply_router_weight_on_input, ( "apply_router_weight_on_input is\ not supported for block scaled moe") - assert w1_scale is not None - assert w2_scale is not None + assert quant_config.w1_scale is not None + assert quant_config.w2_scale is not None quant_method = QuantMethod.BLOCK_128x128.value - elif use_fp8_w8a8: + elif quant_config.use_fp8_w8a8: # Currently only per tensor quantization method is enabled. quant_method = QuantMethod.PER_TENSOR.value @@ -390,10 +390,10 @@ def rocm_aiter_fused_experts( expert_mask=expert_mask, quant_method=quant_method, activation_method=activation_method, - w1_scale=w1_scale, - w2_scale=w2_scale, - a1_scale=a1_scale, - a2_scale=a2_scale, + w1_scale=quant_config.w1_scale, + w2_scale=quant_config.w2_scale, + a1_scale=quant_config.a1_scale, + a2_scale=quant_config.a2_scale, doweight_stage1=apply_router_weight_on_input) diff --git a/vllm/model_executor/layers/fused_moe/triton_deep_gemm_moe.py b/vllm/model_executor/layers/fused_moe/triton_deep_gemm_moe.py index 6cd81d97f029..b2dbc306a614 100644 --- a/vllm/model_executor/layers/fused_moe/triton_deep_gemm_moe.py +++ b/vllm/model_executor/layers/fused_moe/triton_deep_gemm_moe.py @@ -7,7 +7,8 @@ import vllm.model_executor.layers.fused_moe.modular_kernel as mk from vllm.model_executor.layers.fused_moe.config import FusedMoEQuantConfig from vllm.model_executor.layers.fused_moe.deep_gemm_moe import ( - DeepGemmExperts, _valid_deep_gemm, _valid_deep_gemm_shape, + DeepGemmExperts, _valid_deep_gemm, _valid_deep_gemm_shape) +from vllm.model_executor.layers.fused_moe.deep_gemm_utils import ( deep_gemm_block_shape) from vllm.model_executor.layers.fused_moe.fused_moe import TritonExperts from vllm.utils.deep_gemm import is_deep_gemm_e8m0_used @@ -17,40 +18,19 @@ class TritonOrDeepGemmExperts(mk.FusedMoEPermuteExpertsUnpermute): def __init__( self, - use_fp8_w8a8: bool = False, - use_int8_w8a8: bool = False, - use_int8_w8a16: bool = False, - use_int4_w4a16: bool = False, - use_mxfp4_w4a4: bool = False, - per_act_token_quant: bool = False, - block_shape: Optional[list[int]] = None, + quant_config: FusedMoEQuantConfig, allow_deep_gemm: bool = False, ): - super().__init__( - FusedMoEQuantConfig.make( - use_fp8_w8a8=use_fp8_w8a8, - use_int8_w8a8=use_int8_w8a8, - use_int8_w8a16=use_int8_w8a16, - use_int4_w4a16=use_int4_w4a16, - use_mxfp4_w4a4=use_mxfp4_w4a4, - per_act_token_quant=per_act_token_quant, - block_shape=block_shape, - )) - self.triton_expert = TritonExperts( - use_fp8_w8a8=use_fp8_w8a8, - use_int8_w8a8=use_int8_w8a8, - use_int4_w4a16=use_int4_w4a16, - use_int8_w8a16=use_int8_w8a16, - use_mxfp4_w4a4=use_mxfp4_w4a4, - per_act_token_quant=per_act_token_quant, - block_shape=block_shape, - ) + super().__init__(quant_config) + + self.triton_expert = TritonExperts(quant_config) - self.allow_deep_gemm = (allow_deep_gemm and use_fp8_w8a8 and + self.allow_deep_gemm = (allow_deep_gemm + and self.quant_config.use_fp8_w8a8 and self.block_shape == deep_gemm_block_shape()) self.deep_gemm_expert = DeepGemmExperts( - ) if self.allow_deep_gemm else None + self.quant_config) if self.allow_deep_gemm else None @property def activation_formats( @@ -130,12 +110,7 @@ def apply( activation: str, global_num_experts: int, expert_map: Optional[torch.Tensor], - w1_scale: Optional[torch.Tensor], - w2_scale: Optional[torch.Tensor], - w1_zp: Optional[torch.Tensor], - w2_zp: Optional[torch.Tensor], a1q_scale: Optional[torch.Tensor], - a2_scale: Optional[torch.Tensor], workspace13: torch.Tensor, workspace2: torch.Tensor, expert_tokens_meta: Optional[mk.ExpertTokensMetadata], @@ -158,12 +133,7 @@ def apply( activation, global_num_experts, expert_map, - w1_scale, - w2_scale, - w1_zp, - w2_zp, a1q_scale, - a2_scale, workspace13, workspace2, expert_tokens_meta, diff --git a/vllm/model_executor/layers/fused_moe/trtllm_moe.py b/vllm/model_executor/layers/fused_moe/trtllm_moe.py index 14dfce4b0e3a..8e5f6acc9df6 100644 --- a/vllm/model_executor/layers/fused_moe/trtllm_moe.py +++ b/vllm/model_executor/layers/fused_moe/trtllm_moe.py @@ -5,7 +5,8 @@ import torch import vllm.model_executor.layers.fused_moe.modular_kernel as mk -from vllm.model_executor.layers.fused_moe.config import FusedMoEConfig +from vllm.model_executor.layers.fused_moe.config import (FusedMoEConfig, + FusedMoEQuantConfig) from vllm.model_executor.layers.fused_moe.topk_weight_and_reduce import ( TopKWeightAndReduceNoOP) from vllm.utils import next_power_of_2 @@ -16,20 +17,17 @@ class TrtLlmGenExperts(mk.FusedMoEPermuteExpertsUnpermute): def __init__( self, moe: FusedMoEConfig, + quant_config: FusedMoEQuantConfig, gemm1_alpha, gemm1_beta, gemm1_clamp_limit, - w13_bias, - w2_bias, max_capture_size, ): - super().__init__(moe.quant_config) + super().__init__(quant_config) self.moe = moe self.gemm1_alpha = gemm1_alpha self.gemm1_beta = gemm1_beta self.gemm1_clamp_limit = gemm1_clamp_limit - self.w13_bias = w13_bias - self.w2_bias = w2_bias self.max_capture_size = max_capture_size @property @@ -104,12 +102,7 @@ def apply( activation: str, global_num_experts: int, expert_map: Optional[torch.Tensor], - w1_scale: Optional[torch.Tensor], - w2_scale: Optional[torch.Tensor], - w1_zp: Optional[torch.Tensor], - w2_zp: Optional[torch.Tensor], a1q_scale: Optional[torch.Tensor], - a2_scale: Optional[torch.Tensor], workspace13: torch.Tensor, workspace2: torch.Tensor, expert_tokens_meta: Optional[mk.ExpertTokensMetadata], @@ -129,8 +122,8 @@ def apply( packed_tensor = (topk_ids.to(torch.int32) << 16) | topk_weights.to( torch.bfloat16).view(torch.int16) - assert w1_scale is not None - assert w2_scale is not None + assert self.w1_scale is not None + assert self.w2_scale is not None kwargs = { "topk_ids": packed_tensor, @@ -143,9 +136,9 @@ def apply( "gemm1_weights": w1, "gemm1_weights_scale": - w1_scale, + self.w1_scale, "gemm1_bias": - self.w13_bias, + self.w1_bias, "gemm1_alpha": self.gemm1_alpha, "gemm1_beta": @@ -155,7 +148,7 @@ def apply( "gemm2_weights": w2, "gemm2_weights_scale": - w2_scale, + self.w2_scale, "gemm2_bias": self.w2_bias, "output1_scale_scalar": diff --git a/vllm/model_executor/layers/fused_moe/utils.py b/vllm/model_executor/layers/fused_moe/utils.py index 1aeb3f92bc3e..678942e568d8 100644 --- a/vllm/model_executor/layers/fused_moe/utils.py +++ b/vllm/model_executor/layers/fused_moe/utils.py @@ -268,3 +268,7 @@ def _validate_scale_shape( assert block_shape is not None expected = (a.shape[0], cdiv(a.shape[1], block_shape[1])) assert a_scale.shape == expected, f"{a_scale.shape} == {expected}" + + +def activation_without_mul(activation: str) -> str: + return activation + "_no_mul" diff --git a/vllm/model_executor/layers/quantization/awq_marlin.py b/vllm/model_executor/layers/quantization/awq_marlin.py index bf99f0823b74..060d6e84a944 100644 --- a/vllm/model_executor/layers/quantization/awq_marlin.py +++ b/vllm/model_executor/layers/quantization/awq_marlin.py @@ -9,8 +9,10 @@ import vllm.model_executor.layers.fused_moe # noqa from vllm import _custom_ops as ops from vllm.logger import init_logger +from vllm.model_executor.layers.fused_moe.config import (FusedMoEConfig, + FusedMoEQuantConfig) from vllm.model_executor.layers.fused_moe.layer import ( - FusedMoE, FusedMoEConfig, FusedMoEMethodBase, FusedMoeWeightScaleSupported, + FusedMoE, FusedMoEMethodBase, FusedMoeWeightScaleSupported, UnquantizedFusedMoEMethod) from vllm.model_executor.layers.linear import (LinearBase, LinearMethodBase, UnquantizedLinearMethod, @@ -483,6 +485,10 @@ def process_weights_after_loading(self, layer: torch.nn.Module) -> None: if hasattr(layer, "w2_bias") and layer.w2_bias is not None: layer.w2_bias.data = marlin_permute_bias(layer.w2_bias) + def get_fused_moe_quant_config( + self, layer: torch.nn.Module) -> Optional[FusedMoEQuantConfig]: + return None + def apply( self, layer: torch.nn.Module, diff --git a/vllm/model_executor/layers/quantization/bitsandbytes.py b/vllm/model_executor/layers/quantization/bitsandbytes.py index 2245c59af6fe..650dab8df87e 100644 --- a/vllm/model_executor/layers/quantization/bitsandbytes.py +++ b/vllm/model_executor/layers/quantization/bitsandbytes.py @@ -6,8 +6,9 @@ import torch from packaging import version +from vllm.model_executor.layers.fused_moe.config import (FusedMoEConfig, + FusedMoEQuantConfig) from vllm.model_executor.layers.fused_moe.layer import (FusedMoE, - FusedMoEConfig, FusedMoEMethodBase) from vllm.model_executor.layers.linear import (LinearBase, LinearMethodBase, UnquantizedLinearMethod, @@ -452,6 +453,10 @@ def create_weights( **extra_weight_attrs, ) + def get_fused_moe_quant_config( + self, layer: torch.nn.Module) -> Optional[FusedMoEQuantConfig]: + return None + def apply( self, layer: torch.nn.Module, @@ -509,6 +514,7 @@ def apply( apply_router_weight_on_input=apply_router_weight_on_input, global_num_experts=global_num_experts, expert_map=expert_map, + quant_config=self.moe_quant_config, ) def _create_weights_4bit( diff --git a/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py index 5470deb76845..85adae32f4cd 100644 --- a/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py +++ b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py @@ -16,8 +16,11 @@ from vllm.logger import init_logger from vllm.model_executor.layers.fused_moe import ( FusedMoE, FusedMoEActivationFormat, FusedMoEConfig, FusedMoEMethodBase, - FusedMoEPermuteExpertsUnpermute, FusedMoEPrepareAndFinalize, - FusedMoeWeightScaleSupported) + FusedMoEPermuteExpertsUnpermute, FusedMoeWeightScaleSupported) +from vllm.model_executor.layers.fused_moe.config import ( + FusedMoEQuantConfig, fp8_w8a8_moe_quant_config, + int4_w4a16_moe_quant_config, int8_w8a8_moe_quant_config, + int8_w8a16_moe_quant_config, nvfp4_moe_quant_config) from vllm.model_executor.layers.fused_moe.flashinfer_cutlass_moe import ( is_valid_flashinfer_cutlass_fused_moe) from vllm.model_executor.layers.quantization.compressed_tensors.schemes.compressed_tensors_wNa16 import ( # noqa @@ -122,7 +125,7 @@ def get_moe_method( return CompressedTensorsWNA16MarlinMoEMethod( quant_config, layer.moe_config) elif quant_config._is_fp4a4_nvfp4(weight_quant, input_quant): - return CompressedTensorsW4A4MoeMethod(layer.moe_config, layer) + return CompressedTensorsW4A4MoeMethod(layer.moe_config) elif (quant_config._is_fp8_w8a8_sm90(weight_quant, input_quant) or quant_config._is_fp8_w8a8_sm100(weight_quant, input_quant) or quant_config._is_fp8_w8a8(weight_quant, input_quant)): @@ -138,7 +141,7 @@ def get_moe_method( class CompressedTensorsW4A4MoeMethod(CompressedTensorsMoEMethod): - def __init__(self, moe: FusedMoEConfig, layer: torch.nn.Module): + def __init__(self, moe: FusedMoEConfig): from vllm.model_executor.layers.quantization.utils.nvfp4_moe_support import ( # noqa: E501 detect_nvfp4_moe_support) super().__init__(moe) @@ -147,7 +150,6 @@ def __init__(self, moe: FusedMoEConfig, layer: torch.nn.Module): self.allow_flashinfer = _nvfp4.allow_flashinfer self.use_marlin = _nvfp4.use_marlin self.group_size = 16 - self.layer = layer def create_weights(self, layer: torch.nn.Module, num_experts: int, hidden_size: int, intermediate_size_per_partition: int, @@ -305,37 +307,46 @@ def process_weights_after_loading(self, layer: torch.nn.Module) -> None: (layer.w2_input_global_scale), requires_grad=False) def maybe_make_prepare_finalize( - self, - moe: FusedMoEConfig, - ) -> Optional[mk.FusedMoEPrepareAndFinalize]: - if not self.allow_flashinfer: - return super().maybe_make_prepare_finalize(moe) + self) -> Optional[mk.FusedMoEPrepareAndFinalize]: + if self.use_marlin: + return None + elif not self.allow_flashinfer: + return super().maybe_make_prepare_finalize() prepare_finalize = build_flashinfer_fp4_cutlass_moe_prepare_finalize( - moe, - a1_gscale=self.layer.w13_input_scale_quant, - ) + self.moe) logger.debug_once("%s", prepare_finalize.__class__.__name__) return prepare_finalize def select_gemm_impl( self, prepare_finalize: mk.FusedMoEPrepareAndFinalize, - moe: FusedMoEConfig, layer: torch.nn.Module, ) -> mk.FusedMoEPermuteExpertsUnpermute: + assert self.moe_quant_config is not None """Return the appropriate GEMM experts implementation.""" experts = select_nvfp4_gemm_impl( - moe, - g1_alphas=self.layer.g1_alphas, - g2_alphas=self.layer.g2_alphas, - a1_gscale=self.layer.w13_input_scale_quant, - a2_gscale=self.layer.w2_input_scale_quant, + self.moe, + self.moe_quant_config, allow_flashinfer=self.allow_flashinfer, ) logger.debug_once("Using %s", experts.__class__.__name__) return experts + def get_fused_moe_quant_config( + self, layer: torch.nn.Module) -> Optional[FusedMoEQuantConfig]: + if self.use_marlin: + return None + + return nvfp4_moe_quant_config( + g1_alphas=layer.g1_alphas, + g2_alphas=layer.g2_alphas, + a1_gscale=layer.w13_input_scale_quant, + a2_gscale=layer.w2_input_scale_quant, + w1_scale=layer.w13_weight_scale, + w2_scale=layer.w2_weight_scale, + ) + def apply( self, layer: torch.nn.Module, @@ -359,8 +370,6 @@ def apply( logical_to_physical_map: Optional[torch.Tensor] = None, logical_replica_count: Optional[torch.Tensor] = None, ) -> Union[torch.Tensor, tuple[torch.Tensor, torch.Tensor]]: - assert self.fused_experts is None - if enable_eplb: raise NotImplementedError("EPLB not supported for " "`CompressedTensorsW4A4MoeMethod` yet.") @@ -381,7 +390,12 @@ def apply( indices_type=self.topk_indices_dtype, ) + # + # Note: the order here is important. self.fused_experts can override + # flashinfer cutlass, cutlass fp4 or fused_experts but not marlin. + # if self.use_marlin: + assert self.fused_experts is None return torch.ops.vllm.fused_marlin_moe( x, layer.w13_weight, @@ -401,8 +415,7 @@ def apply( expert_map=expert_map, workspace=layer.workspace) - # FlashInfer fused experts path - if self.fused_experts is not None: + elif self.fused_experts is not None: assert is_valid_flashinfer_cutlass_fused_moe( x, layer.w13_weight, layer.w2_weight), ( "Flashinfer CUTLASS Fused MoE not applicable!") @@ -417,11 +430,10 @@ def apply( activation=activation, global_num_experts=global_num_experts, expert_map=expert_map, - w1_scale=layer.w13_weight_scale, - w2_scale=layer.w2_weight_scale, apply_router_weight_on_input=apply_router_weight_on_input, ) + # FlashInfer fused experts path elif self.allow_flashinfer: from vllm.model_executor.layers.fused_moe.flashinfer_cutlass_moe import ( # noqa: E501 flashinfer_cutlass_moe_fp4) @@ -430,51 +442,46 @@ def apply( x, layer.w13_weight, layer.w2_weight), ( "Flashinfer CUTLASS Fused MoE not applicable!") + assert self.moe_quant_config is not None + return flashinfer_cutlass_moe_fp4( hidden_states=x, w1=layer.w13_weight, w2=layer.w2_weight, topk_weights=topk_weights, topk_ids=topk_ids, + quant_config=self.moe_quant_config, inplace=False, # TODO(shuw): fix later, now output is high prec activation=activation, global_num_experts=global_num_experts, expert_map=expert_map, - w1_scale=layer.w13_weight_scale, - w2_scale=layer.w2_weight_scale, - g1_alphas=layer.g1_alphas, - g2_alphas=layer.g2_alphas, - a1_gscale=layer.w13_input_scale_quant, - a2_gscale=layer.w2_input_scale_quant, apply_router_weight_on_input=apply_router_weight_on_input, ) - - assert expert_map is None, ("Expert Parallelism / expert_map " - "is currently not supported for " - "CompressedTensorsW4A4MoeMethod.") - from vllm.model_executor.layers.fused_moe.cutlass_moe import ( - cutlass_moe_fp4) - - # Cutlass moe takes in activations in BF16/Half precision - # and fp4 quantized weights loaded from the checkpoint - return cutlass_moe_fp4( - a=x, - w1_fp4=layer.w13_weight, - w2_fp4=layer.w2_weight, - w1_blockscale=layer.w13_weight_scale, - w2_blockscale=layer.w2_weight_scale, - g1_alphas=layer.g1_alphas, - g2_alphas=layer.g2_alphas, - a1_gscale=layer.w13_input_scale_quant, - a2_gscale=layer.w2_input_scale_quant, - topk_weights=topk_weights, - topk_ids=topk_ids, - m=x.shape[0], - n=layer.w2_weight.shape[2] * 2, - k=x.shape[1], - e=layer.w13_weight.shape[0], - apply_router_weight_on_input=apply_router_weight_on_input).to( - x.dtype) + else: + from vllm.model_executor.layers.fused_moe.cutlass_moe import ( + cutlass_moe_fp4) + + assert expert_map is None, ("Expert Parallelism / expert_map " + "is currently not supported for " + "CompressedTensorsW4A4MoeMethod.") + assert self.moe_quant_config is not None + + # Cutlass moe takes in activations in BF16/Half precision + # and fp4 quantized weights loaded from the checkpoint + return cutlass_moe_fp4( + a=x, + w1_fp4=layer.w13_weight, + w2_fp4=layer.w2_weight, + topk_weights=topk_weights, + topk_ids=topk_ids, + quant_config=self.moe_quant_config, + apply_router_weight_on_input=apply_router_weight_on_input, + # TODO(bnell): derive these from arguments + m=x.shape[0], + n=layer.w2_weight.shape[2] * 2, + k=x.shape[1], + e=layer.w13_weight.shape[0], + ).to(x.dtype) class CompressedTensorsW8A8Fp8MoEMethod(CompressedTensorsMoEMethod): @@ -692,16 +699,11 @@ def process_weights_after_loading(self, layer: torch.nn.Module) -> None: layer.w2_weight = torch.nn.Parameter(shuffled_w2, requires_grad=False) - self.rocm_aiter_fused_experts_func = rocm_aiter_fused_experts elif self.use_marlin: prepare_moe_fp8_layer_for_marlin(layer, False) # Activations not quantized for marlin. del layer.w13_input_scale del layer.w2_input_scale - self.fused_experts_func = None - else: - from vllm.model_executor.layers.fused_moe import fused_experts - self.fused_experts_func = fused_experts if self.use_cutlass: device = layer.w13_weight.device @@ -722,11 +724,20 @@ def process_weights_after_loading(self, layer: torch.nn.Module) -> None: device=device, dtype=torch.int64) + def maybe_make_prepare_finalize( + self) -> Optional[mk.FusedMoEPrepareAndFinalize]: + if self.use_marlin or self.rocm_aiter_moe_enabled: + return None + else: + return super().maybe_make_prepare_finalize() + def select_gemm_impl( - self, prepare_finalize: FusedMoEPrepareAndFinalize, - moe: FusedMoEConfig, - layer: torch.nn.Module) -> FusedMoEPermuteExpertsUnpermute: + self, + prepare_finalize: mk.FusedMoEPrepareAndFinalize, + layer: torch.nn.Module, + ) -> FusedMoEPermuteExpertsUnpermute: # cutlass path + assert self.moe_quant_config is not None if self.use_cutlass: from vllm.model_executor.layers.fused_moe import ( CutlassBatchedExpertsFp8, CutlassExpertsFp8) @@ -740,26 +751,24 @@ def select_gemm_impl( logger.debug("CutlassBatchedExpertsFp8(%s)", self.__class__.__name__) experts = CutlassBatchedExpertsFp8( - moe.num_local_experts, + self.moe.num_local_experts, num_dispatchers, - moe.in_dtype, - self.input_quant.strategy == QuantizationStrategy.TOKEN, - self.weight_quant.strategy == QuantizationStrategy.CHANNEL, + self.moe.in_dtype, ab_strides1=self.ab_strides1_c_strides2, ab_strides2=self.ab_strides2, c_strides1=self.c_strides1, c_strides2=self.ab_strides1_c_strides2, + quant_config=self.moe_quant_config, ) else: logger.debug("CutlassExpertsFp8(%s)", self.__class__.__name__) experts = CutlassExpertsFp8( - moe.in_dtype, - self.input_quant.strategy == QuantizationStrategy.TOKEN, - self.weight_quant.strategy == QuantizationStrategy.CHANNEL, + self.moe.in_dtype, ab_strides1=self.ab_strides1_c_strides2, ab_strides2=self.ab_strides2, c_strides1=self.c_strides1, c_strides2=self.ab_strides1_c_strides2, + quant_config=self.moe_quant_config, ) self.disable_expert_map = (num_dispatchers > 1 @@ -774,29 +783,40 @@ def select_gemm_impl( assert not self.rocm_aiter_moe_enabled and not self.use_marlin - logger.debug("BatchedTritonExperts(%s)", self.__class__.__name__) - if (prepare_finalize.activation_format == FusedMoEActivationFormat.BatchedExperts): max_num_tokens_per_rank = prepare_finalize.max_num_tokens_per_rank( ) assert max_num_tokens_per_rank is not None + logger.debug("BatchedTritonExperts(%s)", self.__class__.__name__) return BatchedTritonExperts( max_num_tokens=max_num_tokens_per_rank, num_dispatchers=prepare_finalize.num_dispatchers(), - use_fp8_w8a8=True, - block_shape=self.quant_config.weight_block_size, - per_act_token_quant=( - self.input_quant.strategy == QuantizationStrategy.TOKEN), + quant_config=self.moe_quant_config, ) else: - return TritonExperts( - use_fp8_w8a8=True, - block_shape=self.quant_config.weight_block_size, - per_act_token_quant=( - self.input_quant.strategy == QuantizationStrategy.TOKEN), - ) + logger.debug("TritonExperts(%s)", self.__class__.__name__) + return TritonExperts(self.moe_quant_config) + + def get_fused_moe_quant_config( + self, layer: torch.nn.Module) -> Optional[FusedMoEQuantConfig]: + if self.use_marlin: + return None + + per_act_token = ( + self.input_quant.strategy == QuantizationStrategy.TOKEN) + per_channel_quant = ( + self.weight_quant.strategy == QuantizationStrategy.CHANNEL) + + return fp8_w8a8_moe_quant_config( + w1_scale=layer.w13_weight_scale, + w2_scale=layer.w2_weight_scale, + a1_scale=layer.w13_input_scale, + a2_scale=layer.w2_input_scale, + per_act_token_quant=per_act_token, + per_out_ch_quant=per_channel_quant, + ) def apply( self, @@ -841,16 +861,74 @@ def apply( indices_type=self.topk_indices_dtype, ) + per_act_token = ( + self.input_quant.strategy == QuantizationStrategy.TOKEN) + per_channel_quant = ( + self.weight_quant.strategy == QuantizationStrategy.CHANNEL) + + # + # Note: the order here is important. self.fused_experts can override + # cutlass fp8 or fused_experts but not marlin or rocm. + # + if self.use_marlin: + assert activation == "silu", ( + f"{activation} not supported for Marlin MoE.") + assert self.fused_experts is None + return torch.ops.vllm.fused_marlin_moe( + x, + layer.w13_weight, + layer.w2_weight, + None, + None, + layer.w13_weight_scale, + layer.w2_weight_scale, + router_logits, + topk_weights, + topk_ids, + quant_type_id=scalar_types.float8_e4m3fn.id, + apply_router_weight_on_input=apply_router_weight_on_input, + global_num_experts=global_num_experts, + expert_map=expert_map, + workspace=layer.workspace) + + elif self.rocm_aiter_moe_enabled: + from vllm.model_executor.layers.fused_moe.rocm_aiter_fused_moe import ( # noqa E501 + rocm_aiter_fused_experts) + assert per_act_token == per_channel_quant + assert self.moe_quant_config is not None + assert self.fused_experts is None + return rocm_aiter_fused_experts( + hidden_states=x, + w1=layer.w13_weight, + w2=layer.w2_weight, + topk_weights=topk_weights, + topk_ids=topk_ids, + activation=activation, + apply_router_weight_on_input=apply_router_weight_on_input, + expert_map=expert_map, + quant_config=self.moe_quant_config, + ) + + elif self.fused_experts is not None: + return self.fused_experts( + x, + layer.w13_weight, + layer.w2_weight, + topk_weights, + topk_ids, + activation=activation, + global_num_experts=global_num_experts, + expert_map=None if self.disable_expert_map else expert_map, + ) + # cutlass path - if self.use_cutlass: - per_act_token = ( - self.input_quant.strategy == QuantizationStrategy.TOKEN) - per_channel_quant = ( - self.weight_quant.strategy == QuantizationStrategy.CHANNEL) + elif self.use_cutlass: + assert self.moe_quant_config is not None # small-batch fallback on SM100 if self.is_fp8_w8a8_sm100 and topk_ids.shape[0] <= 8: from vllm.model_executor.layers.fused_moe import fused_experts + assert per_act_token == per_channel_quant return fused_experts( hidden_states=x, w1=layer.w13_weight, @@ -860,110 +938,48 @@ def apply( inplace=True, activation=activation, apply_router_weight_on_input=apply_router_weight_on_input, - use_fp8_w8a8=True, - per_channel_quant=per_channel_quant, global_num_experts=global_num_experts, expert_map=None if self.disable_expert_map else expert_map, - w1_scale=layer.w13_weight_scale, - w2_scale=layer.w2_weight_scale, - a1_scale=layer.w13_input_scale, - a2_scale=layer.w2_input_scale) - - if self.fused_experts is None: + quant_config=self.moe_quant_config, + ) + else: from vllm.model_executor.layers.fused_moe.cutlass_moe import ( cutlass_moe_fp8) + assert per_act_token == per_channel_quant + assert self.moe_quant_config is not None return cutlass_moe_fp8( x, layer.w13_weight, layer.w2_weight, topk_weights, topk_ids, - per_act_token=per_act_token, + quant_config=self.moe_quant_config, activation=activation, global_num_experts=global_num_experts, expert_map=None if self.disable_expert_map else expert_map, - w1_scale=layer.w13_weight_scale, - w2_scale=layer.w2_weight_scale, ab_strides1=self.ab_strides1_c_strides2, ab_strides2=self.ab_strides2, c_strides1=self.c_strides1, c_strides2=self.ab_strides1_c_strides2, - a1_scale=layer.w13_input_scale, - a2_scale=layer.w2_input_scale, - ) - else: - return self.fused_experts( - x, - layer.w13_weight, - layer.w2_weight, - topk_weights, - topk_ids, - activation=activation, - global_num_experts=global_num_experts, - expert_map=None if self.disable_expert_map else expert_map, - w1_scale=layer.w13_weight_scale, - w2_scale=layer.w2_weight_scale, - a1_scale=layer.w13_input_scale, - a2_scale=layer.w2_input_scale, ) - if self.rocm_aiter_moe_enabled: - return self.rocm_aiter_fused_experts_func( + else: + from vllm.model_executor.layers.fused_moe import fused_experts + assert per_act_token == per_channel_quant + assert self.moe_quant_config is not None + return fused_experts( hidden_states=x, w1=layer.w13_weight, w2=layer.w2_weight, topk_weights=topk_weights, topk_ids=topk_ids, + inplace=True, activation=activation, apply_router_weight_on_input=apply_router_weight_on_input, - use_fp8_w8a8=True, - per_channel_quant=self.weight_quant.strategy == - QuantizationStrategy.CHANNEL, - w1_scale=layer.w13_weight_scale, - w2_scale=layer.w2_weight_scale, - a1_scale=layer.w13_input_scale, - a2_scale=layer.w2_input_scale, - expert_map=expert_map) - if self.use_marlin: - assert activation == "silu", ( - f"{activation} not supported for Marlin MoE.") - return torch.ops.vllm.fused_marlin_moe( - x, - layer.w13_weight, - layer.w2_weight, - None, - None, - layer.w13_weight_scale, - layer.w2_weight_scale, - router_logits, - topk_weights, - topk_ids, - quant_type_id=scalar_types.float8_e4m3fn.id, - apply_router_weight_on_input=apply_router_weight_on_input, global_num_experts=global_num_experts, expert_map=expert_map, - workspace=layer.workspace) - - assert self.fused_experts_func is not None - - return self.fused_experts_func( - hidden_states=x, - w1=layer.w13_weight, - w2=layer.w2_weight, - topk_weights=topk_weights, - topk_ids=topk_ids, - inplace=True, - activation=activation, - apply_router_weight_on_input=apply_router_weight_on_input, - use_fp8_w8a8=True, - per_channel_quant=self.weight_quant.strategy == - QuantizationStrategy.CHANNEL, - global_num_experts=global_num_experts, - expert_map=expert_map, - w1_scale=layer.w13_weight_scale, - w2_scale=layer.w2_weight_scale, - a1_scale=layer.w13_input_scale, - a2_scale=layer.w2_input_scale) + quant_config=self.moe_quant_config, + ) class CompressedTensorsW8A8Int8MoEMethod(CompressedTensorsMoEMethod): @@ -1049,6 +1065,16 @@ def create_weights(self, layer: torch.nn.Module, num_experts: int, def process_weights_after_loading(self, layer: torch.nn.Module) -> None: pass + def get_fused_moe_quant_config( + self, layer: torch.nn.Module) -> Optional[FusedMoEQuantConfig]: + return int8_w8a8_moe_quant_config( + w1_scale=layer.w13_weight_scale, + w2_scale=layer.w2_weight_scale, + a1_scale=layer.w13_input_scale, + a2_scale=layer.w2_input_scale, + per_act_token_quant=True, + ) + def apply( self, layer: torch.nn.Module, @@ -1104,14 +1130,10 @@ def apply( inplace=True, activation=activation, apply_router_weight_on_input=apply_router_weight_on_input, - use_int8_w8a8=True, - per_channel_quant=True, global_num_experts=global_num_experts, expert_map=expert_map, - w1_scale=layer.w13_weight_scale, - w2_scale=layer.w2_weight_scale, - a1_scale=layer.w13_input_scale, - a2_scale=layer.w2_input_scale) + quant_config=self.moe_quant_config, + ) class CompressedTensorsWNA16MarlinMoEMethod(CompressedTensorsMoEMethod): @@ -1355,6 +1377,10 @@ def process_weights_after_loading(self, layer: torch.nn.Module) -> None: layer.workspace = marlin_make_workspace_new(device, 4) + def get_fused_moe_quant_config( + self, layer: torch.nn.Module) -> Optional[FusedMoEQuantConfig]: + return None + def apply( self, layer: torch.nn.Module, @@ -1588,6 +1614,20 @@ def process_weights_after_loading(self, layer: torch.nn.Module) -> None: layer.w2_weight_scale.transpose(1, 2).contiguous(), requires_grad=False) + def get_fused_moe_quant_config( + self, layer: torch.nn.Module) -> Optional[FusedMoEQuantConfig]: + assert self.num_bits == 4 or self.num_bits == 8 + config_builder = (int4_w4a16_moe_quant_config if self.num_bits == 4 + else int8_w8a16_moe_quant_config) + + return config_builder( + w1_scale=layer.w13_weight_scale, + w2_scale=layer.w2_weight_scale, + w1_zp=None, + w2_zp=None, + block_shape=[0, self.group_size], + ) + def apply( self, layer: torch.nn.Module, @@ -1641,13 +1681,8 @@ def apply( topk_ids=topk_ids, inplace=True, activation=activation, - use_int4_w4a16=self.num_bits == 4, - use_int8_w8a16=self.num_bits == 8, - global_num_experts=global_num_experts, apply_router_weight_on_input=apply_router_weight_on_input, + global_num_experts=global_num_experts, expert_map=expert_map, - w1_scale=layer.w13_weight_scale, - w2_scale=layer.w2_weight_scale, - w1_zp=None, - w2_zp=None, - block_shape=[0, self.group_size]) + quant_config=self.moe_quant_config, + ) diff --git a/vllm/model_executor/layers/quantization/experts_int8.py b/vllm/model_executor/layers/quantization/experts_int8.py index b361fe9bea08..8555e9ff2034 100644 --- a/vllm/model_executor/layers/quantization/experts_int8.py +++ b/vllm/model_executor/layers/quantization/experts_int8.py @@ -8,6 +8,8 @@ from vllm.distributed import get_tensor_model_parallel_rank, get_tp_group from vllm.model_executor.layers.fused_moe import (FusedMoE, FusedMoEConfig, FusedMoEMethodBase) +from vllm.model_executor.layers.fused_moe.config import ( + FusedMoEQuantConfig, int8_w8a16_moe_quant_config) from vllm.model_executor.layers.linear import (LinearBase, UnquantizedLinearMethod) from vllm.model_executor.layers.quantization import QuantizationMethods @@ -106,6 +108,13 @@ def create_weights(self, layer: torch.nn.Module, num_experts: int, requires_grad=False) layer.register_parameter("w2_scale", w2_scale) + def get_fused_moe_quant_config( + self, layer: torch.nn.Module) -> Optional[FusedMoEQuantConfig]: + return int8_w8a16_moe_quant_config(w1_scale=layer.w13_scale, + w2_scale=layer.w2_scale, + w1_zp=None, + w2_zp=None) + def apply( self, layer: torch.nn.Module, @@ -159,12 +168,11 @@ def apply( topk_ids=topk_ids, inplace=True, activation=activation, - use_int8_w8a16=True, - global_num_experts=global_num_experts, apply_router_weight_on_input=apply_router_weight_on_input, + global_num_experts=global_num_experts, expert_map=expert_map, - w1_scale=layer.w13_scale, - w2_scale=layer.w2_scale) + quant_config=self.moe_quant_config, + ) @staticmethod def quantizing_weight_loader(layer, weight_loader): diff --git a/vllm/model_executor/layers/quantization/fp8.py b/vllm/model_executor/layers/quantization/fp8.py index 254cc2be05ee..e75094c54743 100644 --- a/vllm/model_executor/layers/quantization/fp8.py +++ b/vllm/model_executor/layers/quantization/fp8.py @@ -14,9 +14,11 @@ from vllm.distributed import get_tensor_model_parallel_world_size from vllm.logger import init_logger from vllm.model_executor.layers.fused_moe import ( - FusedMoE, FusedMoEActivationFormat, FusedMoEConfig, FusedMoEMethodBase, + FusedMoE, FusedMoEActivationFormat, FusedMoEMethodBase, FusedMoEPermuteExpertsUnpermute, FusedMoEPrepareAndFinalize, FusedMoeWeightScaleSupported) +from vllm.model_executor.layers.fused_moe.config import ( + FusedMoEQuantConfig, fp8_w8a8_moe_quant_config) from vllm.model_executor.layers.linear import (LinearBase, LinearMethodBase, UnquantizedLinearMethod) from vllm.model_executor.layers.quantization import QuantizationMethods @@ -575,20 +577,6 @@ def __init__(self, quant_config: Fp8Config, layer: torch.nn.Module): "CutlassBlockScaledGroupedGemm not supported on the current " "platform.") - def maybe_make_prepare_finalize( - self, - moe: FusedMoEConfig, - ) -> Optional[mk.FusedMoEPrepareAndFinalize]: - if self.flashinfer_moe_backend != FlashinferMoeBackend.CUTLASS: - return super().maybe_make_prepare_finalize(moe) - - prepare_finalize = build_flashinfer_fp8_cutlass_moe_prepare_finalize( - moe, - layer=self.layer, - ) - logger.debug_once("%s", prepare_finalize.__class__.__name__) - return prepare_finalize - def create_weights(self, layer: Module, num_experts: int, hidden_size: int, intermediate_size_per_partition: int, params_dtype: torch.dtype, **extra_weight_attrs): @@ -928,10 +916,23 @@ def process_weights_after_loading(self, layer: Module) -> None: layer.w2_weight_scale_inv = get_col_major_tma_aligned_tensor( layer.w2_weight_scale_inv) + def maybe_make_prepare_finalize( + self) -> Optional[mk.FusedMoEPrepareAndFinalize]: + if (self.rocm_aiter_moe_enabled or self.use_marlin + or self.flashinfer_moe_backend + == FlashinferMoeBackend.TENSORRT_LLM): + return None + elif self.flashinfer_moe_backend == FlashinferMoeBackend.CUTLASS: + prepare_finalize = ( + build_flashinfer_fp8_cutlass_moe_prepare_finalize(self.moe)) + logger.debug_once("%s", prepare_finalize.__class__.__name__) + return prepare_finalize + else: + return super().maybe_make_prepare_finalize() + def select_gemm_impl( self, prepare_finalize: FusedMoEPrepareAndFinalize, - moe: FusedMoEConfig, layer: torch.nn.Module, ) -> FusedMoEPermuteExpertsUnpermute: from vllm.model_executor.layers.fused_moe import ( @@ -940,6 +941,8 @@ def select_gemm_impl( assert not self.use_marlin and not self.rocm_aiter_moe_enabled, ( "Marlin and ROCm AITER are not supported with all2all yet.") + assert self.moe_quant_config is not None + if (prepare_finalize.activation_format == FusedMoEActivationFormat.BatchedExperts): max_num_tokens_per_rank = ( @@ -953,15 +956,13 @@ def select_gemm_impl( return BatchedTritonOrDeepGemmExperts( max_num_tokens=max_num_tokens_per_rank, num_dispatchers=prepare_finalize.num_dispatchers(), - use_fp8_w8a8=True, - block_shape=self.quant_config.weight_block_size, - per_act_token_quant=False, + quant_config=self.moe_quant_config, allow_deep_gemm=self.allow_deep_gemm, ) elif self.flashinfer_moe_backend == FlashinferMoeBackend.CUTLASS: experts = select_cutlass_fp8_gemm_impl( - moe, - self.layer, + self.moe, + self.moe_quant_config, ) logger.debug_once("Using %s", experts.__class__.__name__) return experts @@ -971,11 +972,25 @@ def select_gemm_impl( self.__class__.__name__, self.quant_config.weight_block_size, False) return TritonOrDeepGemmExperts( - use_fp8_w8a8=True, - block_shape=self.quant_config.weight_block_size, + quant_config=self.moe_quant_config, allow_deep_gemm=self.allow_deep_gemm, ) + def get_fused_moe_quant_config( + self, layer: torch.nn.Module) -> Optional[FusedMoEQuantConfig]: + if self.use_marlin: + return None + + return fp8_w8a8_moe_quant_config( + w1_scale=(layer.w13_weight_scale_inv + if self.block_quant else layer.w13_weight_scale), + w2_scale=(layer.w2_weight_scale_inv + if self.block_quant else layer.w2_weight_scale), + a1_scale=layer.w13_input_scale, + a2_scale=layer.w2_input_scale, + block_shape=self.quant_config.weight_block_size, + ) + def apply( self, layer: torch.nn.Module, @@ -1005,12 +1020,14 @@ def apply( assert logical_replica_count is not None assert isinstance(layer, FusedMoE) - if self.flashinfer_moe_backend == FlashinferMoeBackend.TENSORRT_LLM: + if (self.flashinfer_moe_backend == FlashinferMoeBackend.TENSORRT_LLM + and self.fused_experts is None): assert activation == 'silu', ( f"Expected 'silu' activation but got {activation}") assert scoring_func == 'sigmoid', ( f"Expected 'sigmoid' scoring func but got {scoring_func}") if self.block_quant: + import vllm.model_executor.layers.fused_moe.flashinfer_trtllm_moe # noqa: E501, F401 assert (renormalize and use_grouped_topk and custom_routing_function is None) @@ -1066,9 +1083,14 @@ def apply( logical_replica_count=logical_replica_count, ) + # + # Note: the order of checks is important since self.fused_experts + # can override fused_experts or cutlass but not rocm or marlin. + # if self.rocm_aiter_moe_enabled: from vllm.model_executor.layers.fused_moe.rocm_aiter_fused_moe import ( # noqa: E501 rocm_aiter_fused_experts) + assert self.fused_experts is None return rocm_aiter_fused_experts( x, layer.w13_weight, @@ -1076,19 +1098,13 @@ def apply( topk_weights=topk_weights, topk_ids=topk_ids, activation=activation, - use_fp8_w8a8=True, apply_router_weight_on_input=apply_router_weight_on_input, - w1_scale=(layer.w13_weight_scale_inv - if self.block_quant else layer.w13_weight_scale), - w2_scale=(layer.w2_weight_scale_inv - if self.block_quant else layer.w2_weight_scale), - a1_scale=layer.w13_input_scale, - a2_scale=layer.w2_input_scale, - block_shape=self.quant_config.weight_block_size, - expert_map=expert_map) + expert_map=expert_map, + quant_config=self.moe_quant_config) elif self.use_marlin: assert activation == "silu", ( f"{activation} not supported for Marlin MoE.") + assert self.fused_experts is None return torch.ops.vllm.fused_marlin_moe( x, layer.w13_weight, @@ -1105,6 +1121,19 @@ def apply( global_num_experts=global_num_experts, expert_map=expert_map, workspace=layer.workspace) + elif self.fused_experts: + return self.fused_experts( + hidden_states=x, + w1=layer.w13_weight, + w2=layer.w2_weight, + topk_weights=topk_weights, + topk_ids=topk_ids, + inplace=True, + activation=activation, + global_num_experts=global_num_experts, + apply_router_weight_on_input=apply_router_weight_on_input, + expert_map=expert_map, + ) elif self.flashinfer_moe_backend == FlashinferMoeBackend.CUTLASS: assert self.block_quant is None assert (not renormalize and custom_routing_function is not None) @@ -1112,33 +1141,21 @@ def apply( f"Expected 'silu' activation but got {activation}") assert scoring_func == 'sigmoid', ( f"Expected 'sigmoid' scoring func but got {scoring_func}") - if self.fused_experts is not None: - return self.fused_experts( - x, - layer.w13_weight, - layer.w2_weight, - topk_weights, - topk_ids, - inplace=False, - activation=activation, - global_num_experts=global_num_experts, - expert_map=expert_map, - apply_router_weight_on_input=apply_router_weight_on_input, - ) - else: - return flashinfer_cutlass_moe_fp8( - x, - layer, - topk_weights, - topk_ids, - inplace=False, - activation=activation, - global_num_experts=global_num_experts, - expert_map=expert_map, - apply_router_weight_on_input=apply_router_weight_on_input, - ) + + return flashinfer_cutlass_moe_fp8( + x, + layer, + topk_weights, + topk_ids, + inplace=False, + activation=activation, + global_num_experts=global_num_experts, + expert_map=expert_map, + apply_router_weight_on_input=apply_router_weight_on_input, + ) else: - common_kwargs = dict( + from vllm.model_executor.layers.fused_moe import fused_experts + return fused_experts( hidden_states=x, w1=layer.w13_weight, w2=layer.w2_weight, @@ -1149,26 +1166,10 @@ def apply( global_num_experts=global_num_experts, apply_router_weight_on_input=apply_router_weight_on_input, expert_map=expert_map, - w1_scale=(layer.w13_weight_scale_inv - if self.block_quant else layer.w13_weight_scale), - w2_scale=(layer.w2_weight_scale_inv - if self.block_quant else layer.w2_weight_scale), - a1_scale=layer.w13_input_scale, - a2_scale=layer.w2_input_scale, - ) - - if self.fused_experts is not None: - return self.fused_experts(**common_kwargs) - else: - from vllm.model_executor.layers.fused_moe import fused_experts - return fused_experts( - **common_kwargs, - use_fp8_w8a8=True, - block_shape=self.quant_config.weight_block_size, - allow_deep_gemm=self.allow_deep_gemm, - allow_cutlass_block_scaled_grouped_gemm=( - self.allow_cutlass_block_scaled_grouped_gemm), - ) + quant_config=self.moe_quant_config, + allow_deep_gemm=self.allow_deep_gemm, + allow_cutlass_block_scaled_grouped_gemm=( + self.allow_cutlass_block_scaled_grouped_gemm)) class Fp8KVCacheMethod(BaseKVCacheMethod): diff --git a/vllm/model_executor/layers/quantization/gguf.py b/vllm/model_executor/layers/quantization/gguf.py index 01af1ccd9ae0..a631dfdab654 100644 --- a/vllm/model_executor/layers/quantization/gguf.py +++ b/vllm/model_executor/layers/quantization/gguf.py @@ -10,8 +10,9 @@ from vllm import _custom_ops as ops from vllm.logger import init_logger +from vllm.model_executor.layers.fused_moe.config import (FusedMoEConfig, + FusedMoEQuantConfig) from vllm.model_executor.layers.fused_moe.layer import (FusedMoE, - FusedMoEConfig, FusedMoEMethodBase) from vllm.model_executor.layers.linear import (LinearBase, LinearMethodBase, UnquantizedLinearMethod) @@ -518,6 +519,10 @@ def create_weights(self, layer: torch.nn.Module, num_experts: int, set_weight_attrs(w2_qweight_type, extra_weight_attrs) layer.register_parameter("w2_qweight_type", w2_qweight_type) + def get_fused_moe_quant_config( + self, layer: torch.nn.Module) -> Optional[FusedMoEQuantConfig]: + return None + def apply( self, layer: torch.nn.Module, diff --git a/vllm/model_executor/layers/quantization/gptq_marlin.py b/vllm/model_executor/layers/quantization/gptq_marlin.py index 76de3a59c8ca..e06b974255f0 100644 --- a/vllm/model_executor/layers/quantization/gptq_marlin.py +++ b/vllm/model_executor/layers/quantization/gptq_marlin.py @@ -9,8 +9,10 @@ import vllm.model_executor.layers.fused_moe # noqa from vllm import _custom_ops as ops from vllm.logger import init_logger +from vllm.model_executor.layers.fused_moe.config import (FusedMoEConfig, + FusedMoEQuantConfig) from vllm.model_executor.layers.fused_moe.layer import ( - FusedMoE, FusedMoEConfig, FusedMoEMethodBase, FusedMoeWeightScaleSupported, + FusedMoE, FusedMoEMethodBase, FusedMoeWeightScaleSupported, UnquantizedFusedMoEMethod) from vllm.model_executor.layers.linear import (LinearMethodBase, set_weight_attrs) @@ -632,6 +634,10 @@ def process_weights_after_loading(self, layer: torch.nn.Module) -> None: if hasattr(layer, "w2_bias") and layer.w2_bias is not None: layer.w2_bias.data = marlin_permute_bias(layer.w2_bias) + def get_fused_moe_quant_config( + self, layer: torch.nn.Module) -> Optional[FusedMoEQuantConfig]: + return None + def apply( self, layer: torch.nn.Module, diff --git a/vllm/model_executor/layers/quantization/ipex_quant.py b/vllm/model_executor/layers/quantization/ipex_quant.py index 5f9d4814274c..c83b0b47a4b7 100644 --- a/vllm/model_executor/layers/quantization/ipex_quant.py +++ b/vllm/model_executor/layers/quantization/ipex_quant.py @@ -11,6 +11,7 @@ from vllm._ipex_ops import ipex_ops as ops from vllm.model_executor.layers.fused_moe import (FusedMoEMethodBase, FusedMoeWeightScaleSupported) +from vllm.model_executor.layers.fused_moe.config import FusedMoEQuantConfig from vllm.model_executor.layers.linear import (LinearBase, LinearMethodBase, UnquantizedLinearMethod) from vllm.model_executor.layers.quantization import QuantizationMethods @@ -375,6 +376,10 @@ def process_weights_after_loading(self, layer: Module) -> None: use_prepack=True, ) + def get_fused_moe_quant_config( + self, layer: torch.nn.Module) -> Optional[FusedMoEQuantConfig]: + return None + def apply( self, layer: torch.nn.Module, diff --git a/vllm/model_executor/layers/quantization/modelopt.py b/vllm/model_executor/layers/quantization/modelopt.py index 60a79e53e814..7eac40825ac3 100644 --- a/vllm/model_executor/layers/quantization/modelopt.py +++ b/vllm/model_executor/layers/quantization/modelopt.py @@ -11,7 +11,9 @@ import vllm.model_executor.layers.fused_moe.modular_kernel as mk from vllm._custom_ops import cutlass_scaled_fp4_mm, scaled_fp4_quant from vllm.logger import init_logger -from vllm.model_executor.layers.fused_moe.config import FusedMoEConfig +from vllm.model_executor.layers.fused_moe.config import ( + FusedMoEConfig, FusedMoEQuantConfig, fp8_w8a8_moe_quant_config, + nvfp4_moe_quant_config) from vllm.model_executor.layers.fused_moe.flashinfer_cutlass_moe import ( is_valid_flashinfer_cutlass_fused_moe) from vllm.model_executor.layers.fused_moe.layer import ( @@ -294,8 +296,6 @@ def __init__( cutlass_fp8_supported) self.cutlass_fp8_supported = cutlass_fp8_supported() self.flashinfer_moe_backend: Optional[FlashinferMoeBackend] = None - self.fused_experts: Optional[ - mk.FusedMoEModularKernel] = None # type: ignore if envs.VLLM_USE_FLASHINFER_MOE_FP8 and has_flashinfer_moe(): self.flashinfer_moe_backend = get_flashinfer_moe_backend() logger.info_once( @@ -303,29 +303,27 @@ def __init__( ) def maybe_make_prepare_finalize( - self, - moe: FusedMoEConfig, - ) -> Optional[mk.FusedMoEPrepareAndFinalize]: - if self.fused_experts is not None or \ - self.flashinfer_moe_backend != FlashinferMoeBackend.CUTLASS: - return super().maybe_make_prepare_finalize(moe) - - prepare_finalize = build_flashinfer_fp8_cutlass_moe_prepare_finalize( - moe, - layer=self.layer, - ) - logger.debug_once("%s", prepare_finalize.__class__.__name__) - return prepare_finalize + self, ) -> Optional[mk.FusedMoEPrepareAndFinalize]: + # TRT LLM not supported with all2all yet. + if self.flashinfer_moe_backend == FlashinferMoeBackend.TENSORRT_LLM: + return None + elif self.flashinfer_moe_backend == FlashinferMoeBackend.CUTLASS: + prepare_finalize = ( + build_flashinfer_fp8_cutlass_moe_prepare_finalize(self.moe)) + logger.debug_once("%s", prepare_finalize.__class__.__name__) + return prepare_finalize + else: + return super().maybe_make_prepare_finalize() def select_gemm_impl( self, prepare_finalize: mk.FusedMoEPrepareAndFinalize, - moe: FusedMoEConfig, layer: torch.nn.Module, ) -> mk.FusedMoEPermuteExpertsUnpermute: + assert self.moe_quant_config is not None experts = select_cutlass_fp8_gemm_impl( - moe, - self.layer, + self.moe, + self.moe_quant_config, ) logger.debug_once("Using %s", experts.__class__.__name__) return experts @@ -479,6 +477,19 @@ def process_weights_after_loading(self, layer: torch.nn.Module) -> None: rotate_flashinfer_fp8_moe_weights(layer.w13_weight, layer.w2_weight) + def get_fused_moe_quant_config( + self, layer: torch.nn.Module) -> Optional[FusedMoEQuantConfig]: + if self.flashinfer_moe_backend == FlashinferMoeBackend.TENSORRT_LLM: + return None + + return fp8_w8a8_moe_quant_config( + w1_scale=layer.w13_weight_scale, + w2_scale=layer.w2_weight_scale, + a1_scale=layer.w13_input_scale, + a2_scale=layer.w2_input_scale, + per_act_token_quant=False, + ) + def apply( self, layer: torch.nn.Module, @@ -507,6 +518,7 @@ def apply( "EPLB not supported for `ModelOptFp8MoEMethod` yet.") if self.flashinfer_moe_backend == FlashinferMoeBackend.TENSORRT_LLM: + assert self.fused_experts is None assert activation == 'silu', ( f"Expected 'silu' activation but got {activation}") assert not renormalize @@ -537,55 +549,56 @@ def apply( indices_type=self.topk_indices_dtype, ) - if self.flashinfer_moe_backend == FlashinferMoeBackend.CUTLASS: + # + # Note: the order here is important. self.fused_experts can override + # cutlass or fused_experts. + # + if self.fused_experts is not None: + return self.fused_experts( + x, + layer.w13_weight, + layer.w2_weight, + topk_weights, + topk_ids, + inplace=False, + activation=activation, + global_num_experts=global_num_experts, + expert_map=expert_map, + apply_router_weight_on_input=apply_router_weight_on_input, + ) + elif self.flashinfer_moe_backend == FlashinferMoeBackend.CUTLASS: assert not renormalize assert activation == 'silu', ( f"Expected 'silu' activation but got {activation}") - if self.fused_experts is not None: - return self.fused_experts( - x, - layer.w13_weight, - layer.w2_weight, - topk_weights, - topk_ids, - inplace=False, - activation=activation, - global_num_experts=global_num_experts, - expert_map=expert_map, - apply_router_weight_on_input=apply_router_weight_on_input, - ) - else: - return flashinfer_cutlass_moe_fp8( - x, - layer, - topk_weights, - topk_ids, - inplace=False, - activation=activation, - global_num_experts=global_num_experts, - expert_map=expert_map, - apply_router_weight_on_input=apply_router_weight_on_input, - ) - from vllm.model_executor.layers.fused_moe.fused_moe import ( - fused_experts) - return fused_experts( - x, - layer.w13_weight, - layer.w2_weight, - topk_weights=topk_weights, - topk_ids=topk_ids, - inplace=True, - activation=activation, - use_fp8_w8a8=True, - per_channel_quant=False, - global_num_experts=global_num_experts, - expert_map=expert_map, - w1_scale=layer.w13_weight_scale, - w2_scale=layer.w2_weight_scale, - a1_scale=layer.w13_input_scale, - a2_scale=layer.w2_input_scale, - apply_router_weight_on_input=apply_router_weight_on_input, - ) + return flashinfer_cutlass_moe_fp8( + x, + layer, + topk_weights, + topk_ids, + inplace=False, + activation=activation, + global_num_experts=global_num_experts, + expert_map=expert_map, + apply_router_weight_on_input=apply_router_weight_on_input, + ) + else: + from vllm.model_executor.layers.fused_moe.fused_moe import ( + fused_experts) + assert self.moe_quant_config is not None + + return fused_experts( + x, + layer.w13_weight, + layer.w2_weight, + topk_weights=topk_weights, + topk_ids=topk_ids, + inplace=True, + activation=activation, + quant_config=self.moe_quant_config, + global_num_experts=global_num_experts, + expert_map=expert_map, + apply_router_weight_on_input=apply_router_weight_on_input, + ) class ModelOptNvFp4Config(QuantizationConfig): @@ -1034,33 +1047,30 @@ def __init__( " for ModelOptNvFp4FusedMoE.") def maybe_make_prepare_finalize( - self, - moe: FusedMoEConfig, - ) -> Optional[mk.FusedMoEPrepareAndFinalize]: - if (self.allow_flashinfer and self.flashinfer_moe_backend - == FlashinferMoeBackend.CUTLASS): + self) -> Optional[mk.FusedMoEPrepareAndFinalize]: + if (self.use_marlin + or (self.allow_flashinfer and self.flashinfer_moe_backend + == FlashinferMoeBackend.TENSORRT_LLM)): + return None + elif (self.allow_flashinfer + and self.flashinfer_moe_backend == FlashinferMoeBackend.CUTLASS): + # For now, fp4 moe only works with the flashinfer dispatcher. prepare_finalize = ( - build_flashinfer_fp4_cutlass_moe_prepare_finalize( - moe, - a1_gscale=self.layer.w13_input_scale_quant, - )) + build_flashinfer_fp4_cutlass_moe_prepare_finalize(self.moe)) logger.debug_once("%s", prepare_finalize.__class__.__name__) return prepare_finalize - - return super().maybe_make_prepare_finalize(moe) + else: + return super().maybe_make_prepare_finalize() def select_gemm_impl( self, prepare_finalize: mk.FusedMoEPrepareAndFinalize, - moe: FusedMoEConfig, layer: torch.nn.Module, ) -> mk.FusedMoEPermuteExpertsUnpermute: + assert self.moe_quant_config is not None experts = select_nvfp4_gemm_impl( - moe, - g1_alphas=self.layer.g1_alphas, - g2_alphas=self.layer.g2_alphas, - a1_gscale=self.layer.w13_input_scale_quant, - a2_gscale=self.layer.w2_input_scale_quant, + self.moe, + self.moe_quant_config, allow_flashinfer=self.allow_flashinfer, ) logger.debug_once("Using %s", experts.__class__.__name__) @@ -1360,6 +1370,21 @@ def process_weights_after_loading(self, layer: torch.nn.Module) -> None: layer.w2_weight = Parameter(layer.w2_weight.data, requires_grad=False) + def get_fused_moe_quant_config( + self, layer: torch.nn.Module) -> Optional[FusedMoEQuantConfig]: + if (self.use_marlin or self.flashinfer_moe_backend + == FlashinferMoeBackend.TENSORRT_LLM): + return None + + return nvfp4_moe_quant_config( + w1_scale=layer.w13_weight_scale, + w2_scale=layer.w2_weight_scale, + g1_alphas=layer.g1_alphas, + g2_alphas=layer.g2_alphas, + a1_gscale=layer.w13_input_scale_quant, + a2_gscale=layer.w2_input_scale_quant, + ) + def apply( self, layer: torch.nn.Module, @@ -1388,12 +1413,14 @@ def apply( "EPLB not supported for `ModelOptNvFp4FusedMoE` yet.") assert activation == "silu", "Only SiLU activation is supported." - if self.allow_flashinfer and \ - self.flashinfer_moe_backend == FlashinferMoeBackend.TENSORRT_LLM: + if (self.allow_flashinfer and self.flashinfer_moe_backend + == FlashinferMoeBackend.TENSORRT_LLM): import flashinfer from vllm.model_executor.models.llama4 import Llama4MoE + assert self.fused_experts is None + a1_gscale = layer.w13_input_scale_quant (hidden_states_fp4, hidden_states_scale_linear_fp4) = flashinfer.fp4_quantize( @@ -1457,7 +1484,13 @@ def apply( e_score_correction_bias=e_score_correction_bias, indices_type=self.topk_indices_dtype) + # + # Note: the order here is important. self.fused_experts can override + # flashinfer cutlass, cutlass fp4 or fused_experts but not marlin or + # trtllm. + # if self.use_marlin: + assert self.fused_experts is None return torch.ops.vllm.fused_marlin_moe( x, layer.w13_weight, @@ -1477,7 +1510,7 @@ def apply( expert_map=expert_map, workspace=layer.workspace) - if self.fused_experts is not None: + elif self.fused_experts is not None: assert self.allow_flashinfer and \ self.flashinfer_moe_backend == FlashinferMoeBackend.CUTLASS @@ -1485,7 +1518,7 @@ def apply( x, layer.w13_weight, layer.w2_weight), ( "Flashinfer CUTLASS Fused MoE not applicable!") - out = self.fused_experts( + return self.fused_experts( hidden_states=x, w1=layer.w13_weight, w2=layer.w2_weight, @@ -1495,28 +1528,22 @@ def apply( activation=activation, global_num_experts=global_num_experts, expert_map=expert_map, - w1_scale=layer.w13_weight_scale, - w2_scale=layer.w2_weight_scale, apply_router_weight_on_input=apply_router_weight_on_input, ) elif (self.allow_flashinfer and self.flashinfer_moe_backend == FlashinferMoeBackend.CUTLASS): from vllm.model_executor.layers.fused_moe.flashinfer_cutlass_moe import ( # noqa: E501 flashinfer_cutlass_moe_fp4) + assert self.moe_quant_config is not None - out = flashinfer_cutlass_moe_fp4( + return flashinfer_cutlass_moe_fp4( hidden_states=x, w1=layer.w13_weight, w2=layer.w2_weight, topk_weights=topk_weights, topk_ids=topk_ids, - w1_scale=layer.w13_weight_scale, - w2_scale=layer.w2_weight_scale, - g1_alphas=layer.g1_alphas, - g2_alphas=layer.g2_alphas, - a1_gscale=layer.w13_input_scale_quant, - a2_gscale=layer.w2_input_scale_quant, - inplace=False, # TODO(shuw): fix later, now output is high prec + quant_config=self.moe_quant_config, + inplace=False, activation=activation, global_num_experts=global_num_experts, expert_map=expert_map, @@ -1527,23 +1554,19 @@ def apply( # only (no EP). from vllm.model_executor.layers.fused_moe.cutlass_moe import ( cutlass_moe_fp4) - out = cutlass_moe_fp4( + assert self.moe_quant_config is not None + return cutlass_moe_fp4( a=x, w1_fp4=layer.w13_weight, w2_fp4=layer.w2_weight, - w1_blockscale=layer.w13_weight_scale, - w2_blockscale=layer.w2_weight_scale, - g1_alphas=layer.g1_alphas, - g2_alphas=layer.g2_alphas, - a1_gscale=layer.w13_input_scale_quant, - a2_gscale=layer.w2_input_scale_quant, topk_weights=topk_weights, topk_ids=topk_ids, + quant_config=self.moe_quant_config, + expert_map=expert_map, + apply_router_weight_on_input=apply_router_weight_on_input, + # TODO: derive from arguments m=x.shape[0], n=layer.w2_weight.shape[2] * 2, k=x.shape[1], e=layer.w13_weight.shape[0], - expert_map=expert_map, - apply_router_weight_on_input=apply_router_weight_on_input) - - return out + ) diff --git a/vllm/model_executor/layers/quantization/moe_wna16.py b/vllm/model_executor/layers/quantization/moe_wna16.py index c25b3dd6080d..145b614237fb 100644 --- a/vllm/model_executor/layers/quantization/moe_wna16.py +++ b/vllm/model_executor/layers/quantization/moe_wna16.py @@ -6,6 +6,9 @@ import torch from vllm.distributed import get_tensor_model_parallel_rank, get_tp_group +from vllm.model_executor.layers.fused_moe.config import ( + FusedMoEQuantConfig, int4_w4a16_moe_quant_config, + int8_w8a16_moe_quant_config) from vllm.model_executor.layers.fused_moe.layer import ( FusedMoE, FusedMoEConfig, FusedMoEMethodBase, FusedMoeWeightScaleSupported) from vllm.model_executor.layers.linear import (LinearBase, @@ -283,6 +286,22 @@ def create_weights(self, layer: torch.nn.Module, num_experts: int, layer.register_parameter(key, param) set_weight_attrs(param, extra_weight_attrs) + def get_fused_moe_quant_config( + self, layer: torch.nn.Module) -> Optional[FusedMoEQuantConfig]: + weight_bits = self.quant_config.weight_bits + has_zp = self.quant_config.has_zp + assert weight_bits == 4 or weight_bits == 8 + config_builder = (int4_w4a16_moe_quant_config + if weight_bits == 4 else int8_w8a16_moe_quant_config) + + return config_builder( + w1_scale=layer.w13_scales, + w2_scale=layer.w2_scales, + w1_zp=layer.w13_qzeros if has_zp else None, + w2_zp=layer.w2_qzeros if has_zp else None, + block_shape=[0, layer.group_size], + ) + def apply( self, layer: torch.nn.Module, @@ -327,9 +346,6 @@ def apply( e_score_correction_bias=e_score_correction_bias, indices_type=self.topk_indices_dtype) - weight_bits = self.quant_config.weight_bits - has_zp = self.quant_config.has_zp - return fused_experts( x, layer.w13_qweight, @@ -337,16 +353,11 @@ def apply( topk_weights=topk_weights, topk_ids=topk_ids, inplace=True, - use_int4_w4a16=weight_bits == 4, - use_int8_w8a16=weight_bits == 8, - global_num_experts=global_num_experts, apply_router_weight_on_input=apply_router_weight_on_input, + global_num_experts=global_num_experts, expert_map=expert_map, - w1_scale=layer.w13_scales, - w2_scale=layer.w2_scales, - w1_zp=layer.w13_qzeros if has_zp else None, - w2_zp=layer.w2_qzeros if has_zp else None, - block_shape=[0, layer.group_size]) + quant_config=self.moe_quant_config, + ) @staticmethod def get_weight_loader(layer, weight_loader): diff --git a/vllm/model_executor/layers/quantization/mxfp4.py b/vllm/model_executor/layers/quantization/mxfp4.py index f935bdd84124..28c1e60ccd08 100644 --- a/vllm/model_executor/layers/quantization/mxfp4.py +++ b/vllm/model_executor/layers/quantization/mxfp4.py @@ -12,6 +12,8 @@ from vllm.model_executor.layers.fused_moe import (FusedMoE, FusedMoEConfig, FusedMoEMethodBase) from vllm.model_executor.layers.fused_moe import modular_kernel as mk +from vllm.model_executor.layers.fused_moe.config import ( + FusedMoEQuantConfig, mxfp4_w4a4_moe_quant_config) from vllm.model_executor.layers.fused_moe.trtllm_moe import TrtLlmGenExperts from vllm.model_executor.layers.linear import (LinearBase, UnquantizedLinearMethod) @@ -629,10 +631,29 @@ def _get_tile_tokens_dim(self, x: torch.Tensor, top_k: int): return tile_tokens_dim + def get_fused_moe_quant_config( + self, layer: torch.nn.Module) -> Optional[FusedMoEQuantConfig]: + + if self.mxfp4_backend == Mxfp4Backend.MARLIN: + return None + + if self.mxfp4_backend == Mxfp4Backend.TRITON: + w1_scale = layer.w13_precision_config + w2_scale = layer.w2_precision_config + else: + w1_scale = layer.w13_weight_scale + w2_scale = layer.w2_weight_scale + + return mxfp4_w4a4_moe_quant_config( + w1_bias=layer.w13_bias, + w2_bias=layer.w2_bias, + w1_scale=w1_scale, + w2_scale=w2_scale, + ) + def select_gemm_impl( self, prepare_finalize: mk.FusedMoEPrepareAndFinalize, - moe: FusedMoEConfig, layer: torch.nn.Module, ) -> mk.FusedMoEPermuteExpertsUnpermute: if (prepare_finalize.activation_format == @@ -647,11 +668,12 @@ def select_gemm_impl( "gemm1_alpha": layer.gemm1_alpha, "gemm1_beta": layer.gemm1_beta, "gemm1_clamp_limit": layer.gemm1_clamp_limit, - "w13_bias": layer.w13_bias, - "w2_bias": layer.w2_bias, + # TODO(bnell): part of quant_config "max_capture_size": self.max_capture_size, } - return TrtLlmGenExperts(moe, **kwargs) + assert self.moe_quant_config is not None + return TrtLlmGenExperts(self.moe, self.moe_quant_config, + **kwargs) else: # Use matmul_ogs from triton_kernels here! raise NotImplementedError( @@ -710,8 +732,6 @@ def _route_and_experts( activation=activation, global_num_experts=global_num_experts, expert_map=expert_map, - w1_scale=layer.w13_weight_scale, - w2_scale=layer.w2_weight_scale, apply_router_weight_on_input=apply_router_weight_on_input, ) @@ -941,10 +961,7 @@ def apply( renormalize=renormalize, global_num_experts=global_num_experts, expert_map=expert_map, - w1_bias=layer.w13_bias, - w2_bias=layer.w2_bias, - w1_precision=self.w13_precision_config, - w2_precision=self.w2_precision_config, + quant_config=self.moe_quant_config, apply_router_weight_on_input=apply_router_weight_on_input, ) else: diff --git a/vllm/model_executor/layers/quantization/quark/quark_moe.py b/vllm/model_executor/layers/quantization/quark/quark_moe.py index bc8ae980429a..d2d990e46bcf 100644 --- a/vllm/model_executor/layers/quantization/quark/quark_moe.py +++ b/vllm/model_executor/layers/quantization/quark/quark_moe.py @@ -11,6 +11,9 @@ from vllm.model_executor.layers.fused_moe import (FusedMoE, FusedMoEConfig, FusedMoEMethodBase, FusedMoeWeightScaleSupported) +from vllm.model_executor.layers.fused_moe.config import ( + FusedMoEQuantConfig, fp8_w8a8_moe_quant_config, + mxfp4_w4a4_moe_quant_config) from vllm.model_executor.layers.fused_moe.rocm_aiter_fused_moe import ( is_rocm_aiter_moe_enabled) from vllm.model_executor.layers.quantization.utils.marlin_utils_fp8 import ( @@ -287,6 +290,16 @@ def process_weights_after_loading(self, layer: torch.nn.Module) -> None: from vllm.model_executor.layers.fused_moe import fused_experts self.fused_experts_func = fused_experts + def get_fused_moe_quant_config( + self, layer: torch.nn.Module) -> Optional[FusedMoEQuantConfig]: + return fp8_w8a8_moe_quant_config( + w1_scale=layer.w13_weight_scale, + w2_scale=layer.w2_weight_scale, + a1_scale=layer.w13_input_scale, + a2_scale=layer.w2_input_scale, + per_act_token_quant=self.weight_qscheme == "per_channel", + ) + def apply( self, layer: torch.nn.Module, @@ -339,12 +352,7 @@ def apply( topk_ids=topk_ids, activation=activation, apply_router_weight_on_input=apply_router_weight_on_input, - use_fp8_w8a8=True, - per_channel_quant=self.weight_qscheme == "per_channel", - w1_scale=layer.w13_weight_scale, - w2_scale=layer.w2_weight_scale, - a1_scale=layer.w13_input_scale, - a2_scale=layer.w2_input_scale, + quant_config=self.moe_quant_config, expert_map=expert_map) if self.use_marlin: assert activation == "silu", ( @@ -376,14 +384,9 @@ def apply( inplace=True, activation=activation, apply_router_weight_on_input=apply_router_weight_on_input, - use_fp8_w8a8=True, - per_channel_quant=self.weight_qscheme == "per_channel", global_num_experts=global_num_experts, expert_map=expert_map, - w1_scale=layer.w13_weight_scale, - w2_scale=layer.w2_weight_scale, - a1_scale=layer.w13_input_scale, - a2_scale=layer.w2_input_scale) + quant_config=self.moe_quant_config) class QuarkW4A4MXFp4MoEMethod(QuarkMoEMethod): @@ -487,6 +490,16 @@ def create_weights(self, layer: torch.nn.Module, num_experts: int, layer.register_parameter("w13_weight_scale", w13_weight_scale) layer.register_parameter("w2_weight_scale", w2_weight_scale) + def get_fused_moe_quant_config( + self, layer: torch.nn.Module) -> Optional[FusedMoEQuantConfig]: + return mxfp4_w4a4_moe_quant_config( + w1_scale=layer.w13_weight_scale, + w2_scale=layer.w2_weight_scale, + a1_scale=None, + a2_scale=None, + block_shape=None, + ) + def apply( self, layer: torch.nn.Module, @@ -539,15 +552,10 @@ def apply( topk_weights=topk_weights, topk_ids=topk_ids, inplace=True, - use_mxfp4_w4a4=True, + activation=activation, global_num_experts=global_num_experts, apply_router_weight_on_input=apply_router_weight_on_input, expert_map=expert_map, - w1_scale=layer.w13_weight_scale, - w2_scale=layer.w2_weight_scale, - a1_scale=None, - a2_scale=None, - block_shape=None, - activation=activation, + quant_config=self.moe_quant_config, ) return out diff --git a/vllm/model_executor/layers/quantization/rtn.py b/vllm/model_executor/layers/quantization/rtn.py index 0d5fa05652b8..ed90e2e26460 100644 --- a/vllm/model_executor/layers/quantization/rtn.py +++ b/vllm/model_executor/layers/quantization/rtn.py @@ -12,6 +12,9 @@ from vllm.logger import init_logger from vllm.model_executor.layers.fused_moe import (FusedMoE, FusedMoEConfig, FusedMoEMethodBase) +from vllm.model_executor.layers.fused_moe.config import ( + FusedMoEQuantConfig, int4_w4a16_moe_quant_config, + int8_w8a16_moe_quant_config) from vllm.model_executor.layers.linear import (LinearBase, LinearMethodBase, set_weight_attrs) from vllm.model_executor.layers.quantization import QuantizationMethods @@ -269,6 +272,21 @@ def process_weights_after_loading(self, layer: torch.nn.Module) -> None: fix_weights(layer, "w13_weight", weight_bits == 4) fix_weights(layer, "w2_weight", weight_bits == 4) + def get_fused_moe_quant_config( + self, layer: torch.nn.Module) -> Optional[FusedMoEQuantConfig]: + weight_bits = self.quant_config.weight_bits + group_size = self.quant_config.group_size + assert weight_bits == 4 or weight_bits == 8 + config_builder = (int4_w4a16_moe_quant_config + if weight_bits == 4 else int8_w8a16_moe_quant_config) + return config_builder( + w1_scale=layer.w13_scale, + w2_scale=layer.w2_scale, + w1_zp=None, + w2_zp=None, + block_shape=[0, group_size], + ) + def apply( self, layer: torch.nn.Module, @@ -314,10 +332,7 @@ def apply( e_score_correction_bias=e_score_correction_bias, indices_type=self.topk_indices_dtype) - weight_bits = self.quant_config.weight_bits - group_size = self.quant_config.group_size - - ret = fused_experts( + return fused_experts( x, layer.w13_weight, layer.w2_weight, @@ -325,16 +340,11 @@ def apply( topk_ids=topk_ids, inplace=True, activation=activation, - use_int4_w4a16=weight_bits == 4, - use_int8_w8a16=weight_bits == 8, - global_num_experts=global_num_experts, - w1_scale=layer.w13_scale, - w2_scale=layer.w2_scale, apply_router_weight_on_input=apply_router_weight_on_input, + global_num_experts=global_num_experts, expert_map=expert_map, - block_shape=[0, group_size]) - - return ret + quant_config=self.moe_quant_config, + ) def rtn_quantize(tensor: torch.Tensor, num_bits: int, diff --git a/vllm/model_executor/layers/quantization/utils/flashinfer_fp4_moe.py b/vllm/model_executor/layers/quantization/utils/flashinfer_fp4_moe.py index f5d7c57fe2a8..fabf855b36e6 100644 --- a/vllm/model_executor/layers/quantization/utils/flashinfer_fp4_moe.py +++ b/vllm/model_executor/layers/quantization/utils/flashinfer_fp4_moe.py @@ -7,7 +7,8 @@ import vllm.envs as envs import vllm.model_executor.layers.fused_moe.modular_kernel as mk -from vllm.model_executor.layers.fused_moe.config import FusedMoEConfig +from vllm.model_executor.layers.fused_moe.config import (FusedMoEConfig, + FusedMoEQuantConfig) from vllm.model_executor.layers.fused_moe.flashinfer_cutlass_moe import ( FlashInferExperts) from vllm.model_executor.layers.fused_moe.flashinfer_cutlass_prepare_finalize import ( # noqa: E501 @@ -47,32 +48,23 @@ def reorder_w1w3_to_w3w1(weight: torch.Tensor, def build_flashinfer_fp4_cutlass_moe_prepare_finalize( - moe: FusedMoEConfig, - a1_gscale: torch.Tensor, -) -> mk.FusedMoEPrepareAndFinalize: + moe: FusedMoEConfig) -> mk.FusedMoEPrepareAndFinalize: """Create a FlashInfer CUTLASS fused-MoE prepare finalize kernel""" use_dp = moe.moe_parallel_config.dp_size > 1 - return FlashInferCutlassMoEPrepareAndFinalize(use_dp, a1_gscale=a1_gscale) + return FlashInferCutlassMoEPrepareAndFinalize(use_dp) def select_nvfp4_gemm_impl( moe: FusedMoEConfig, - g1_alphas: torch.Tensor, - g2_alphas: torch.Tensor, - a1_gscale: torch.Tensor, - a2_gscale: torch.Tensor, + moe_quant_config: FusedMoEQuantConfig, allow_flashinfer: bool, ) -> mk.FusedMoEPermuteExpertsUnpermute: """Return a GEMM *experts* implementation for NV-FP4 fused-MoE layers""" if allow_flashinfer: return FlashInferExperts( - g1_alphas=g1_alphas, - g2_alphas=g2_alphas, - a1_gscale=a1_gscale, - a2_gscale=a2_gscale, out_dtype=moe.in_dtype, - quant_dtype="nvfp4", + quant_config=moe_quant_config, ep_rank=moe.moe_parallel_config.ep_rank, ep_size=moe.moe_parallel_config.ep_size, tp_rank=moe.moe_parallel_config.tp_rank, diff --git a/vllm/model_executor/layers/quantization/utils/flashinfer_utils.py b/vllm/model_executor/layers/quantization/utils/flashinfer_utils.py index 9889808f0760..aa66a42c588a 100644 --- a/vllm/model_executor/layers/quantization/utils/flashinfer_utils.py +++ b/vllm/model_executor/layers/quantization/utils/flashinfer_utils.py @@ -8,7 +8,8 @@ import vllm.model_executor.layers.fused_moe.modular_kernel as mk from vllm import envs from vllm.logger import init_logger -from vllm.model_executor.layers.fused_moe.config import FusedMoEConfig +from vllm.model_executor.layers.fused_moe.config import (FusedMoEConfig, + FusedMoEQuantConfig) from vllm.model_executor.layers.fused_moe.flashinfer_cutlass_moe import ( FlashInferExperts) from vllm.model_executor.layers.fused_moe.flashinfer_cutlass_prepare_finalize import ( # noqa: E501 @@ -99,6 +100,8 @@ def apply_flashinfer_per_tensor_scale_fp8( apply_router_weight_on_input: bool, ) -> torch.Tensor: from flashinfer.fused_moe import RoutingMethodType + + import vllm.model_executor.layers.fused_moe.flashinfer_trtllm_moe # noqa: E501, F401 assert layer.output1_scales_scalar is not None, ( "Expected output1_scales_scalar to be initialized") assert layer.output1_scales_scalar is not None, ( @@ -167,34 +170,23 @@ def register_moe_scaling_factors(layer: torch.nn.Module) -> None: def build_flashinfer_fp8_cutlass_moe_prepare_finalize( - moe: Optional[FusedMoEConfig], - layer: torch.nn.Module, -) -> mk.FusedMoEPrepareAndFinalize: + moe: Optional[FusedMoEConfig], ) -> mk.FusedMoEPrepareAndFinalize: """Create a FlashInfer CUTLASS fused-MoE prepare finalize kernel""" use_dp = moe.moe_parallel_config.dp_size > 1 if moe is not None else False - return FlashInferCutlassMoEPrepareAndFinalize( - use_dp, a1_gscale=layer.w13_input_scale) + return FlashInferCutlassMoEPrepareAndFinalize(use_dp) def select_cutlass_fp8_gemm_impl( moe: Optional[FusedMoEConfig], - layer: torch.nn.Module, + quant_config: FusedMoEQuantConfig, out_dtype: Optional[torch.dtype] = None, ) -> mk.FusedMoEPermuteExpertsUnpermute: """Return a GEMM *experts* implementation for fused-MoE layers""" - from vllm.model_executor.models.llama4 import Llama4MoE - assert layer.custom_routing_function == Llama4MoE.custom_routing_function, \ - "FusedMoE flashinfer kernels are only supported for Llama4" - if moe is not None: return FlashInferExperts( - g1_alphas=layer.output1_scales_gate_scalar, - g2_alphas=layer.output2_scales_scalar, - a1_gscale=layer.w13_input_scale, - a2_gscale=layer.w2_input_scale_inv, out_dtype=moe.in_dtype, - quant_dtype=torch.float8_e4m3fn, + quant_config=quant_config, ep_rank=moe.moe_parallel_config.ep_rank, ep_size=moe.moe_parallel_config.ep_size, tp_rank=moe.moe_parallel_config.tp_rank, @@ -204,12 +196,8 @@ def select_cutlass_fp8_gemm_impl( assert out_dtype is not None, ( "If moe config is None, out_dtype must be passed") return FlashInferExperts( - g1_alphas=layer.output1_scales_gate_scalar, - g2_alphas=layer.output2_scales_scalar, - a1_gscale=layer.w13_input_scale, - a2_gscale=layer.w2_input_scale_inv, out_dtype=out_dtype, - quant_dtype=torch.float8_e4m3fn, + quant_config=quant_config, ) @@ -224,11 +212,13 @@ def flashinfer_cutlass_moe_fp8( expert_map: Optional[torch.Tensor] = None, apply_router_weight_on_input: bool = False, ) -> torch.Tensor: + quant_config = layer.quant_method.get_fused_moe_quant_config(layer) + assert quant_config is not None + fused_experts = mk.FusedMoEModularKernel( - build_flashinfer_fp8_cutlass_moe_prepare_finalize(moe=None, - layer=layer), + build_flashinfer_fp8_cutlass_moe_prepare_finalize(moe=None), select_cutlass_fp8_gemm_impl(moe=None, - layer=layer, + quant_config=quant_config, out_dtype=hidden_states.dtype)) return fused_experts( diff --git a/vllm/model_executor/layers/quantization/utils/fp8_utils.py b/vllm/model_executor/layers/quantization/utils/fp8_utils.py index e3e9635132d6..bbe0c6f6d38e 100644 --- a/vllm/model_executor/layers/quantization/utils/fp8_utils.py +++ b/vllm/model_executor/layers/quantization/utils/fp8_utils.py @@ -411,6 +411,7 @@ def per_token_group_quant_fp8( x_s = torch.empty(shape, device=x.device, dtype=torch.float32) # prefer CUDA kernel if available + # TODO(bnell): this causes some fp8 moe test to fail. if current_platform.is_cuda() and x.is_contiguous(): torch.ops._C.per_token_group_fp8_quant(x, x_q, x_s, group_size, eps, fp8_min, fp8_max, use_ue8m0) diff --git a/vllm/model_executor/models/bert_with_rope.py b/vllm/model_executor/models/bert_with_rope.py index b758cbf28d89..bfc1408ddf88 100644 --- a/vllm/model_executor/models/bert_with_rope.py +++ b/vllm/model_executor/models/bert_with_rope.py @@ -15,8 +15,8 @@ tensor_model_parallel_all_reduce) from vllm.model_executor.layers.activation import (get_act_and_mul_fn, get_act_fn) -from vllm.model_executor.layers.fused_moe.fused_moe import ( - fused_topk, torch_vllm_outplace_fused_experts) +from vllm.model_executor.layers.fused_moe import (activation_without_mul, + fused_topk) from vllm.model_executor.layers.linear import (ColumnParallelLinear, MergedColumnParallelLinear, QKVParallelLinear, @@ -230,7 +230,7 @@ def __init__( self.hidden_size = hidden_size self.total_intermediate_size = intermediate_size self.intermediate_size = divide(intermediate_size, self.tp_size) - self.hidden_act = hidden_act + self.hidden_act = activation_without_mul(hidden_act) if params_dtype is None: params_dtype = torch.get_default_dtype() @@ -297,14 +297,14 @@ def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: router_logits, self.top_k, renormalize=False) - final_hidden_states = torch_vllm_outplace_fused_experts( + + final_hidden_states = torch.ops.vllm.outplace_fused_experts( hidden_states=hidden_states, w1=self.w1, w2=self.w2, topk_weights=topk_weights, topk_ids=topk_ids, activation=self.hidden_act, - is_act_and_mul=False, ) if self.tp_size > 1: diff --git a/vllm/model_executor/models/deepseek.py b/vllm/model_executor/models/deepseek.py index 4395b11b7d0f..59c992188149 100644 --- a/vllm/model_executor/models/deepseek.py +++ b/vllm/model_executor/models/deepseek.py @@ -37,7 +37,7 @@ get_tensor_model_parallel_world_size, tensor_model_parallel_all_reduce) from vllm.model_executor.layers.activation import SiluAndMul -from vllm.model_executor.layers.fused_moe import fused_moe +from vllm.model_executor.layers.fused_moe import fused_experts, fused_topk from vllm.model_executor.layers.layernorm import RMSNorm from vllm.model_executor.layers.linear import (MergedColumnParallelLinear, QKVParallelLinear, @@ -163,13 +163,19 @@ def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: shared_output = self.shared_experts(hidden_states) # router_logits: (num_tokens, n_experts) router_logits, _ = self.gate(hidden_states) - final_hidden_states = fused_moe(hidden_states, - self.w1, - self.w2, - router_logits, - self.top_k, - renormalize=self.config.norm_topk_prob, - inplace=True) + + topk_weights, topk_ids, _ = fused_topk( + hidden_states, + router_logits, + self.top_k, + renormalize=self.config.norm_topk_prob) + + final_hidden_states = fused_experts(hidden_states, + self.w1, + self.w2, + topk_weights, + topk_ids, + inplace=True) if self.config.n_shared_experts is not None: final_hidden_states = final_hidden_states + shared_output diff --git a/vllm/model_executor/models/minicpm.py b/vllm/model_executor/models/minicpm.py index c7be7f76dba1..240c23ea2b25 100644 --- a/vllm/model_executor/models/minicpm.py +++ b/vllm/model_executor/models/minicpm.py @@ -39,7 +39,7 @@ get_tensor_model_parallel_world_size, tensor_model_parallel_all_reduce) from vllm.model_executor.layers.activation import FatreluAndMul, SiluAndMul -from vllm.model_executor.layers.fused_moe import fused_moe +from vllm.model_executor.layers.fused_moe import fused_experts, fused_topk from vllm.model_executor.layers.layernorm import RMSNorm from vllm.model_executor.layers.linear import (MergedColumnParallelLinear, QKVParallelLinear, @@ -136,13 +136,18 @@ def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: hidden_states = hidden_states.view(-1, self.hidden_size) # router_logits: (num_tokens, n_experts) router_logits, _ = self.gate(hidden_states) - final_hidden_states = fused_moe(hidden_states, - self.ws, - self.w2s, - router_logits, - self.top_k, - renormalize=True, - inplace=True) + + topk_weights, topk_ids, _ = fused_topk(hidden_states, + router_logits, + self.top_k, + renormalize=True) + + final_hidden_states = fused_experts(hidden_states, + self.ws, + self.w2s, + topk_weights, + topk_ids, + inplace=True) if self.tp_size > 1: final_hidden_states = tensor_model_parallel_all_reduce( diff --git a/vllm/model_executor/models/qwen3_moe.py b/vllm/model_executor/models/qwen3_moe.py index f66e8b0b454b..029309c49efd 100644 --- a/vllm/model_executor/models/qwen3_moe.py +++ b/vllm/model_executor/models/qwen3_moe.py @@ -702,4 +702,4 @@ def load_weights(self, weights: Iterable[tuple[str, return loader.load_weights(weights) def get_expert_mapping(self) -> list[tuple[str, str, int, str]]: - return self.model.get_expert_mapping() + return self.model.get_expert_mapping() \ No newline at end of file diff --git a/vllm/model_executor/warmup/deep_gemm_warmup.py b/vllm/model_executor/warmup/deep_gemm_warmup.py index a25ef86a989d..a636a714145c 100644 --- a/vllm/model_executor/warmup/deep_gemm_warmup.py +++ b/vllm/model_executor/warmup/deep_gemm_warmup.py @@ -81,9 +81,14 @@ def _fp8_linear_may_use_deep_gemm(module: torch.nn.Module) -> bool: def _fused_moe_grouped_gemm_may_use_deep_gemm(module: torch.nn.Module) -> bool: - if not (isinstance(module, FusedMoE) - and module.moe_config.quant_dtype == torch.float8_e4m3fn - and module.moe_config.block_shape == deep_gemm_block_shape()): + if not isinstance(module, FusedMoE): + return False + + moe_quant_config = module.quant_method.get_fused_moe_quant_config(module) + + if (moe_quant_config is None + or moe_quant_config.quant_dtype != torch.float8_e4m3fn + or moe_quant_config.block_shape != deep_gemm_block_shape()): return False if not isinstance(module.quant_method.fused_experts, From 2c3c1bd07aad253a34c97563bc5d466adaecaa18 Mon Sep 17 00:00:00 2001 From: Woosuk Kwon Date: Wed, 17 Sep 2025 19:38:09 -0700 Subject: [PATCH 071/518] [V0 Deprecation] Remove V0 Engine tests (#25114) Signed-off-by: Woosuk Kwon --- tests/engine/conftest.py | 12 -- tests/engine/test_computed_prefix_blocks.py | 37 ---- tests/engine/test_executor.py | 111 ---------- tests/engine/test_multiproc_workers.py | 179 ---------------- tests/engine/test_options.py | 58 ----- tests/engine/test_short_mm_context.py | 1 + tests/engine/test_stop_checker.py | 225 -------------------- 7 files changed, 1 insertion(+), 622 deletions(-) delete mode 100644 tests/engine/conftest.py delete mode 100644 tests/engine/test_computed_prefix_blocks.py delete mode 100644 tests/engine/test_executor.py delete mode 100644 tests/engine/test_multiproc_workers.py delete mode 100644 tests/engine/test_options.py delete mode 100644 tests/engine/test_stop_checker.py diff --git a/tests/engine/conftest.py b/tests/engine/conftest.py deleted file mode 100644 index 375b248ebeda..000000000000 --- a/tests/engine/conftest.py +++ /dev/null @@ -1,12 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project -import pytest - - -@pytest.fixture(scope="function", autouse=True) -def use_v0_only(monkeypatch): - """ - Since this module is V0 only, set VLLM_USE_V1=0 for - all tests in the module. - """ - monkeypatch.setenv('VLLM_USE_V1', '0') diff --git a/tests/engine/test_computed_prefix_blocks.py b/tests/engine/test_computed_prefix_blocks.py deleted file mode 100644 index ac5a1f957dfe..000000000000 --- a/tests/engine/test_computed_prefix_blocks.py +++ /dev/null @@ -1,37 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project - -import pytest - -from vllm.engine.arg_utils import EngineArgs -from vllm.engine.llm_engine import LLMEngine -from vllm.sampling_params import SamplingParams - - -@pytest.mark.parametrize("model", ["distilbert/distilgpt2"]) -@pytest.mark.parametrize("block_size", [16]) -def test_computed_prefix_blocks(model: str, block_size: int): - # This test checks if we are able to run the engine to completion - # without triggering asserts. - # We are in a scenario where all blocks from the second request's prompt - # are full and already computed when the second request arrives. - prompt = ( - "You are a helpful assistant. How do I build a car from cardboard and " - "paper clips? Is there an easy to follow video tutorial available " - "online for free?") - prompt2 = ( - " Please recommend to me some resources where I can learn not only to " - "handle technical difficulties of building a car, but also " - "decoration.") - - engine_args = EngineArgs(model=model, - block_size=block_size, - enable_prefix_caching=True) - - engine = LLMEngine.from_engine_args(engine_args) - sampling_params = SamplingParams() - - engine.add_request("0", prompt + prompt2, sampling_params) - engine.step() - engine.add_request("1", prompt, sampling_params) - engine.step() diff --git a/tests/engine/test_executor.py b/tests/engine/test_executor.py deleted file mode 100644 index 67064aff3ae9..000000000000 --- a/tests/engine/test_executor.py +++ /dev/null @@ -1,111 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project - -import asyncio -import os -from typing import Any, Callable, Optional, Union - -import pytest - -from vllm.engine.arg_utils import AsyncEngineArgs, EngineArgs -from vllm.engine.async_llm_engine import AsyncLLMEngine -from vllm.engine.llm_engine import LLMEngine -from vllm.executor.uniproc_executor import UniProcExecutor -from vllm.sampling_params import SamplingParams - - -class Mock: - ... - - -class CustomUniExecutor(UniProcExecutor): - - def collective_rpc(self, - method: Union[str, Callable], - timeout: Optional[float] = None, - args: tuple = (), - kwargs: Optional[dict] = None) -> list[Any]: - # Drop marker to show that this was run - with open(".marker", "w"): - ... - return super().collective_rpc(method, timeout, args, kwargs) - - -CustomUniExecutorAsync = CustomUniExecutor - - -@pytest.mark.parametrize("model", ["distilbert/distilgpt2"]) -def test_custom_executor_type_checking(model): - with pytest.raises(ValueError): - engine_args = EngineArgs(model=model, - distributed_executor_backend=Mock) - LLMEngine.from_engine_args(engine_args) - with pytest.raises(ValueError): - engine_args = AsyncEngineArgs(model=model, - distributed_executor_backend=Mock) - AsyncLLMEngine.from_engine_args(engine_args) - - -@pytest.mark.parametrize("model", ["distilbert/distilgpt2"]) -def test_custom_executor(model, tmp_path): - cwd = os.path.abspath(".") - os.chdir(tmp_path) - try: - assert not os.path.exists(".marker") - - engine_args = EngineArgs( - model=model, - distributed_executor_backend=CustomUniExecutor, - enforce_eager=True, # reduce test time - ) - engine = LLMEngine.from_engine_args(engine_args) - sampling_params = SamplingParams(max_tokens=1) - - engine.add_request("0", "foo", sampling_params) - engine.step() - - assert os.path.exists(".marker") - finally: - os.chdir(cwd) - - -@pytest.mark.parametrize("model", ["distilbert/distilgpt2"]) -def test_custom_executor_async(model, tmp_path): - cwd = os.path.abspath(".") - os.chdir(tmp_path) - try: - assert not os.path.exists(".marker") - - engine_args = AsyncEngineArgs( - model=model, - distributed_executor_backend=CustomUniExecutorAsync, - enforce_eager=True, # reduce test time - ) - engine = AsyncLLMEngine.from_engine_args(engine_args) - sampling_params = SamplingParams(max_tokens=1) - - async def t(): - stream = await engine.add_request("0", "foo", sampling_params) - async for x in stream: - ... - - asyncio.run(t()) - - assert os.path.exists(".marker") - finally: - os.chdir(cwd) - - -@pytest.mark.parametrize("model", ["distilbert/distilgpt2"]) -def test_respect_ray(model): - # even for TP=1 and PP=1, - # if users specify ray, we should use ray. - # users might do this if they want to manage the - # resources using ray. - engine_args = EngineArgs( - model=model, - distributed_executor_backend="ray", - enforce_eager=True, # reduce test time - ) - engine = LLMEngine.from_engine_args(engine_args) - assert engine.model_executor.uses_ray diff --git a/tests/engine/test_multiproc_workers.py b/tests/engine/test_multiproc_workers.py deleted file mode 100644 index b5381b61a020..000000000000 --- a/tests/engine/test_multiproc_workers.py +++ /dev/null @@ -1,179 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project - -import asyncio -from concurrent.futures import ThreadPoolExecutor -from functools import partial -from time import sleep -from typing import Any - -import pytest - -from vllm.config import VllmConfig -from vllm.executor.multiproc_worker_utils import (ProcessWorkerWrapper, - ResultHandler, WorkerMonitor) -from vllm.worker.worker_base import WorkerWrapperBase - - -class DummyWorkerWrapper(WorkerWrapperBase): - """Dummy version of vllm.worker.worker.Worker""" - - def worker_method(self, worker_input: Any) -> tuple[int, Any]: - sleep(0.05) - - if isinstance(worker_input, Exception): - # simulate error case - raise worker_input - - return self.rpc_rank, input - - -def _start_workers() -> tuple[list[ProcessWorkerWrapper], WorkerMonitor]: - result_handler = ResultHandler() - vllm_config = VllmConfig() - workers = [ - ProcessWorkerWrapper(result_handler, DummyWorkerWrapper, vllm_config, - rank) for rank in range(8) - ] - - worker_monitor = WorkerMonitor(workers, result_handler) - assert not worker_monitor.is_alive() - - result_handler.start() - worker_monitor.start() - assert worker_monitor.is_alive() - - return workers, worker_monitor - - -def test_local_workers() -> None: - """Test workers with sync task submission""" - - workers, worker_monitor = _start_workers() - - def execute_workers(worker_input: str) -> None: - worker_outputs = [ - worker.execute_method("worker_method", worker_input) - for worker in workers - ] - - for rank, output in enumerate(worker_outputs): - assert output.get() == (rank, input) - - executor = ThreadPoolExecutor(max_workers=4) - - # Test concurrent submission from different threads - futures = [ - executor.submit(partial(execute_workers, f"thread {thread_num}")) - for thread_num in range(4) - ] - - for future in futures: - future.result() - - # Test error case - exception = ValueError("fake error") - result = workers[0].execute_method("worker_method", exception) - try: - result.get() - pytest.fail("task should have failed") - except Exception as e: - assert isinstance(e, ValueError) - assert str(e) == "fake error" - - # Test cleanup when a worker fails - assert worker_monitor.is_alive() - workers[3].process.kill() - - # Other workers should get shut down here - worker_monitor.join(20) - - # Ensure everything is stopped - assert not worker_monitor.is_alive() - assert all(not worker.process.is_alive() for worker in workers) - - # Further attempts to submit tasks should fail - try: - _result = workers[0].execute_method("worker_method", "test") - pytest.fail("task should fail once workers have been shut down") - except Exception as e: - assert isinstance(e, ChildProcessError) - - -def test_local_workers_clean_shutdown() -> None: - """Test clean shutdown""" - - workers, worker_monitor = _start_workers() - - assert worker_monitor.is_alive() - assert all(worker.process.is_alive() for worker in workers) - - # Clean shutdown - worker_monitor.close() - - worker_monitor.join(20) - - # Ensure everything is stopped - assert not worker_monitor.is_alive() - assert all(not worker.process.is_alive() for worker in workers) - - # Further attempts to submit tasks should fail - try: - _result = workers[0].execute_method("worker_method", "test") - pytest.fail("task should fail once workers have been shut down") - except Exception as e: - assert isinstance(e, ChildProcessError) - - -@pytest.mark.asyncio -async def test_local_workers_async() -> None: - """Test local workers with async task submission""" - - workers, worker_monitor = _start_workers() - - async def execute_workers(worker_input: str) -> None: - worker_coros = [ - worker.execute_method_async("worker_method", worker_input) - for worker in workers - ] - - results = await asyncio.gather(*worker_coros) - for rank, result in enumerate(results): - assert result == (rank, input) - - tasks = [ - asyncio.create_task(execute_workers(f"task {task_num}")) - for task_num in range(4) - ] - - for task in tasks: - await task - - # Test error case - exception = ValueError("fake error") - try: - _result = await workers[0].execute_method_async( - "worker_method", exception) - pytest.fail("task should have failed") - except Exception as e: - assert isinstance(e, ValueError) - assert str(e) == "fake error" - - # Test cleanup when a worker fails - assert worker_monitor.is_alive() - workers[3].process.kill() - - # Other workers should get shut down here - worker_monitor.join(20) - - # Ensure everything is stopped - assert not worker_monitor.is_alive() - assert all(not worker.process.is_alive() for worker in workers) - - # Further attempts to submit tasks should fail - try: - _result = await workers[0].execute_method_async( - "worker_method", "test") - pytest.fail("task should fail once workers have been shut down") - except Exception as e: - assert isinstance(e, ChildProcessError) diff --git a/tests/engine/test_options.py b/tests/engine/test_options.py deleted file mode 100644 index 42e88e84770a..000000000000 --- a/tests/engine/test_options.py +++ /dev/null @@ -1,58 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project -from contextlib import nullcontext - -import pytest - -from vllm.entrypoints.llm import LLM -from vllm.sampling_params import SamplingParams - - -@pytest.mark.parametrize("model", ["distilbert/distilgpt2"]) -def test_skip_tokenizer_initialization(model: str): - # This test checks if the flag skip_tokenizer_init skips the initialization - # of tokenizer and detokenizer. The generated output is expected to contain - # token ids. - llm = LLM( - model=model, - skip_tokenizer_init=True, - enforce_eager=True, - ) - sampling_params = SamplingParams(prompt_logprobs=True, detokenize=True) - - with pytest.raises(ValueError, match="cannot pass text prompts when"): - llm.generate("abc", sampling_params) - - outputs = llm.generate({"prompt_token_ids": [1, 2, 3]}, - sampling_params=sampling_params) - assert len(outputs) > 0 - completions = outputs[0].outputs - assert len(completions) > 0 - assert completions[0].text == "" - assert completions[0].token_ids - - -@pytest.mark.parametrize("model", ["distilbert/distilgpt2"]) -@pytest.mark.parametrize("enable_prompt_embeds", [True, False]) -def test_enable_prompt_embeds(hf_runner, model: str, - enable_prompt_embeds: bool): - prompt = "abc" - - with hf_runner(model) as hf_model: - token_ids = hf_model.tokenizer(prompt, return_tensors="pt").input_ids - token_ids = token_ids.to(hf_model.model.device) - - embed_layer = hf_model.model.get_input_embeddings() - prompt_embeds = embed_layer(token_ids).squeeze(0) - - ctx = (nullcontext() if enable_prompt_embeds else pytest.raises( - ValueError, match="set `--enable-prompt-embeds`")) - - llm = LLM( - model=model, - enable_prompt_embeds=enable_prompt_embeds, - enforce_eager=True, - ) - - with ctx: - llm.generate({"prompt_embeds": prompt_embeds}) diff --git a/tests/engine/test_short_mm_context.py b/tests/engine/test_short_mm_context.py index 9c62761d78af..9eb3dfc09224 100644 --- a/tests/engine/test_short_mm_context.py +++ b/tests/engine/test_short_mm_context.py @@ -25,6 +25,7 @@ def test_context_length_too_short(vllm_runner, image_assets, model): model, max_model_len=128, # LLaVA has a feature size of 576 enforce_eager=True, + load_format="dummy", ) with vllm_model: diff --git a/tests/engine/test_stop_checker.py b/tests/engine/test_stop_checker.py deleted file mode 100644 index 34f4cb13ab0a..000000000000 --- a/tests/engine/test_stop_checker.py +++ /dev/null @@ -1,225 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project - -import pytest -from transformers import AutoTokenizer - -from vllm.engine.output_processor.stop_checker import StopChecker -from vllm.reasoning import ReasoningParser -from vllm.sampling_params import SamplingParams -from vllm.sequence import Sequence, SequenceStatus - -REASONING_MODEL_NAME = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B" - - -class MockReasoningParser(ReasoningParser): - """Mock reasoning parser for testing purposes.""" - - def __init__(self, - tokenizer: AutoTokenizer, - reasoning_active: bool = False): - super().__init__(tokenizer) - self.reasoning_active = reasoning_active - - def is_reasoning_end(self, input_ids: list[int]) -> bool: - return not self.reasoning_active - - def extract_content_ids(self, input_ids: list[int]) -> list[int]: - return input_ids - - -class MockSequence(Sequence): - """Mock sequence for testing purposes.""" - - def __init__(self, token_ids, output_text="test_output", eos_token_id=0): - self.token_ids = token_ids - self.output_text = output_text - self.eos_token_id = eos_token_id - self.status = SequenceStatus.RUNNING - self.stop_reason = None - - def get_token_ids(self): - return self.token_ids - - def get_last_token_id(self): - return self.token_ids[-1] if self.token_ids else None - - def get_len(self): - return len(self.token_ids) - - def get_output_len(self): - return len(self.token_ids) - 1 # Simulating prompt + outputs - - -@pytest.fixture -def deepseek_r1_qwen_tokenizer(): - return AutoTokenizer.from_pretrained(REASONING_MODEL_NAME) - - -@pytest.fixture -def stop_checker(): - return StopChecker(max_model_len=10) - - -@pytest.fixture -def stop_checker_with_reasoner(): - reasoner = MockReasoningParser(deepseek_r1_qwen_tokenizer) - return StopChecker(max_model_len=10, reasoner=reasoner) - - -def test_eos_token_stopping(stop_checker): - """Test sequence stopping when EOS token is encountered.""" - seq = MockSequence(token_ids=[1, 2, 0], eos_token_id=0) - sampling_params = SamplingParams() - - stop_checker.maybe_stop_sequence(seq, - new_char_count=1, - sampling_params=sampling_params) - - assert seq.status == SequenceStatus.FINISHED_STOPPED - - -def test_ignore_eos(stop_checker): - """Test sequence continuing when EOS token is ignored.""" - seq = MockSequence(token_ids=[1, 2, 0], eos_token_id=0) - sampling_params = SamplingParams(ignore_eos=True) - - stop_checker.maybe_stop_sequence(seq, - new_char_count=1, - sampling_params=sampling_params) - - assert seq.status == SequenceStatus.RUNNING - - -def test_min_tokens(stop_checker): - """Test min_tokens prevents early stopping.""" - seq = MockSequence(token_ids=[1, 2, 0], eos_token_id=0) - sampling_params = SamplingParams(min_tokens=3) - - stop_checker.maybe_stop_sequence(seq, - new_char_count=1, - sampling_params=sampling_params) - - assert seq.status == SequenceStatus.RUNNING - - -def test_stop_token_ids(stop_checker): - """Test sequence stopping with custom stop token IDs.""" - seq = MockSequence(token_ids=[1, 2, 3], eos_token_id=0) - sampling_params = SamplingParams(stop_token_ids=[3]) - - stop_checker.maybe_stop_sequence(seq, - new_char_count=1, - sampling_params=sampling_params) - - assert seq.status == SequenceStatus.FINISHED_STOPPED - assert seq.stop_reason == 3 - - -def test_stop_strings(stop_checker): - """Test sequence stopping with stop strings.""" - seq = MockSequence(token_ids=[1, 2, 3], - output_text="test output with STOP", - eos_token_id=0) - sampling_params = SamplingParams(stop=["STOP"]) - - stop_checker.maybe_stop_sequence(seq, - new_char_count=1, - sampling_params=sampling_params) - - assert seq.status == SequenceStatus.FINISHED_STOPPED - assert seq.stop_reason == "STOP" - assert "STOP" not in seq.output_text # Default behavior removes stop string - - -def test_include_stop_str_in_output(stop_checker): - """Test keeping stop strings in output.""" - seq = MockSequence(token_ids=[1, 2, 3], - output_text="test output with STOP", - eos_token_id=0) - sampling_params = SamplingParams(stop=["STOP"], - include_stop_str_in_output=True) - - stop_checker.maybe_stop_sequence(seq, - new_char_count=5, - sampling_params=sampling_params) - - assert seq.status == SequenceStatus.FINISHED_STOPPED - assert "STOP" in seq.output_text - - -def test_max_tokens(stop_checker): - """Test sequence stopping at max_tokens.""" - seq = MockSequence(token_ids=[1, 2, 3], eos_token_id=0) - sampling_params = SamplingParams(max_tokens=2) - - stop_checker.maybe_stop_sequence(seq, - new_char_count=1, - sampling_params=sampling_params) - - assert seq.status == SequenceStatus.FINISHED_LENGTH_CAPPED - - -def test_max_model_len(stop_checker): - """Test sequence stopping at max_model_len.""" - seq = MockSequence(token_ids=list(range(11)), - eos_token_id=0) # 11 tokens, max is 10 - sampling_params = SamplingParams() - - stop_checker.maybe_stop_sequence(seq, - new_char_count=1, - sampling_params=sampling_params) - - assert seq.status == SequenceStatus.FINISHED_LENGTH_CAPPED - - -def test_reasoning_skip_stops(stop_checker_with_reasoner): - """Test that stop tokens and strings are ignored during reasoning.""" - # Set reasoning_active to True to simulate being in reasoning mode - stop_checker_with_reasoner.reasoner.reasoning_active = True - - # Test with stop token - seq = MockSequence(token_ids=[1, 2, 3], eos_token_id=0) - sampling_params = SamplingParams(stop_token_ids=[3]) - - stop_checker_with_reasoner.maybe_stop_sequence( - seq, new_char_count=1, sampling_params=sampling_params) - assert seq.status == SequenceStatus.RUNNING - - # Test with stop string - seq = MockSequence(token_ids=[1, 2, 3], output_text="test STOP") - sampling_params = SamplingParams(stop=["STOP"]) - - stop_checker_with_reasoner.maybe_stop_sequence( - seq, new_char_count=4, sampling_params=sampling_params) - assert seq.status == SequenceStatus.RUNNING - - # But EOS token still stops the sequence - seq = MockSequence(token_ids=[1, 2, 0], eos_token_id=0) - sampling_params = SamplingParams() - - stop_checker_with_reasoner.maybe_stop_sequence( - seq, new_char_count=1, sampling_params=sampling_params) - assert seq.status == SequenceStatus.FINISHED_STOPPED - - -def test_reasoning_end_enables_stops(stop_checker_with_reasoner): - """Test that stop tokens work after reasoning ends.""" - # Set reasoning_active to False to simulate being out of reasoning mode - stop_checker_with_reasoner.reasoner.reasoning_active = False - - # Test with stop token - seq = MockSequence(token_ids=[1, 2, 3], eos_token_id=0) - sampling_params = SamplingParams(stop_token_ids=[3]) - - stop_checker_with_reasoner.maybe_stop_sequence( - seq, new_char_count=1, sampling_params=sampling_params) - assert seq.status == SequenceStatus.FINISHED_STOPPED - - # Test with stop string - seq = MockSequence(token_ids=[1, 2, 3], output_text="test STOP") - sampling_params = SamplingParams(stop=["STOP"]) - - stop_checker_with_reasoner.maybe_stop_sequence( - seq, new_char_count=4, sampling_params=sampling_params) - assert seq.status == SequenceStatus.FINISHED_STOPPED From 2fc24e94f964368491a994641fb2921ed74cb4d4 Mon Sep 17 00:00:00 2001 From: Woosuk Kwon Date: Wed, 17 Sep 2025 19:40:44 -0700 Subject: [PATCH 072/518] [V0 Deprecation] Remove V0 Tracing & Metrics tests (#25115) Signed-off-by: Woosuk Kwon --- .buildkite/test-pipeline.yaml | 4 +- tests/metrics/test_metrics.py | 268 ---------------------- tests/tracing/__init__.py | 0 tests/tracing/test_tracing.py | 237 ------------------- tests/{metrics => v1/tracing}/__init__.py | 0 5 files changed, 1 insertion(+), 508 deletions(-) delete mode 100644 tests/metrics/test_metrics.py delete mode 100644 tests/tracing/__init__.py delete mode 100644 tests/tracing/test_tracing.py rename tests/{metrics => v1/tracing}/__init__.py (100%) diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index 08c10180fc22..0bce02b90a7c 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -217,16 +217,14 @@ steps: num_gpus: 2 source_file_dependencies: - vllm/ - - tests/metrics - tests/v1/tracing commands: - - pytest -v -s metrics - "pip install \ 'opentelemetry-sdk>=1.26.0' \ 'opentelemetry-api>=1.26.0' \ 'opentelemetry-exporter-otlp>=1.26.0' \ 'opentelemetry-semantic-conventions-ai>=0.4.1'" - - pytest -v -s tracing + - pytest -v -s v1/tracing ##### fast check tests ##### ##### 1 GPU test ##### diff --git a/tests/metrics/test_metrics.py b/tests/metrics/test_metrics.py deleted file mode 100644 index dbd9c518e020..000000000000 --- a/tests/metrics/test_metrics.py +++ /dev/null @@ -1,268 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project - -import pytest -import ray -from prometheus_client import REGISTRY - -import vllm.envs as envs -from vllm import EngineArgs, LLMEngine -from vllm.engine.arg_utils import AsyncEngineArgs -from vllm.engine.async_llm_engine import AsyncLLMEngine -from vllm.engine.metrics import RayPrometheusStatLogger -from vllm.sampling_params import SamplingParams -from vllm.test_utils import MODEL_WEIGHTS_S3_BUCKET - - -@pytest.fixture(scope="function", autouse=True) -def use_v0_only(monkeypatch): - """ - This module tests V0 internals, so set VLLM_USE_V1=0. - """ - monkeypatch.setenv('VLLM_USE_V1', '0') - - -MODELS = [ - "distilbert/distilgpt2", -] - - -@pytest.mark.parametrize("model", MODELS) -@pytest.mark.parametrize("dtype", ["float"]) -@pytest.mark.parametrize("max_tokens", [128]) -def test_metric_counter_prompt_tokens( - vllm_runner, - example_prompts, - model: str, - dtype: str, - max_tokens: int, -) -> None: - with vllm_runner(model, - dtype=dtype, - disable_log_stats=False, - gpu_memory_utilization=0.4) as vllm_model: - tokenizer = vllm_model.llm.get_tokenizer() - prompt_token_counts = [ - len(tokenizer.encode(p)) for p in example_prompts - ] - # This test needs at least 2 prompts in a batch of different lengths to - # verify their token count is correct despite padding. - assert len(example_prompts) > 1, "at least 2 prompts are required" - assert prompt_token_counts[0] != prompt_token_counts[1], ( - "prompts of different lengths are required") - vllm_prompt_token_count = sum(prompt_token_counts) - - _ = vllm_model.generate_greedy(example_prompts, max_tokens) - stat_logger = vllm_model.llm.llm_engine.stat_loggers['prometheus'] - metric_count = stat_logger.metrics.counter_prompt_tokens.labels( - **stat_logger.labels)._value.get() - - assert vllm_prompt_token_count == metric_count, ( - f"prompt token count: {vllm_prompt_token_count!r}\n" - f"metric: {metric_count!r}") - - -@pytest.mark.parametrize("model", MODELS) -@pytest.mark.parametrize("dtype", ["float"]) -@pytest.mark.parametrize("max_tokens", [128]) -def test_metric_counter_generation_tokens( - vllm_runner, - example_prompts, - model: str, - dtype: str, - max_tokens: int, -) -> None: - with vllm_runner(model, - dtype=dtype, - disable_log_stats=False, - gpu_memory_utilization=0.4) as vllm_model: - vllm_outputs = vllm_model.generate_greedy(example_prompts, max_tokens) - tokenizer = vllm_model.llm.get_tokenizer() - stat_logger = vllm_model.llm.llm_engine.stat_loggers['prometheus'] - metric_count = stat_logger.metrics.counter_generation_tokens.labels( - **stat_logger.labels)._value.get() - vllm_generation_count = 0 - for i in range(len(example_prompts)): - vllm_output_ids, vllm_output_str = vllm_outputs[i] - prompt_ids = tokenizer.encode(example_prompts[i]) - # vllm_output_ids contains both prompt tokens and generation tokens. - # We're interested only in the count of the generation tokens. - vllm_generation_count += len(vllm_output_ids) - len(prompt_ids) - - assert vllm_generation_count == metric_count, ( - f"generation token count: {vllm_generation_count!r}\n" - f"metric: {metric_count!r}") - - -@pytest.mark.parametrize("model", MODELS) -@pytest.mark.parametrize("dtype", ["float"]) -@pytest.mark.parametrize( - "served_model_name", - [None, [], ["ModelName0"], ["ModelName0", "ModelName1", "ModelName2"]]) -def test_metric_set_tag_model_name(vllm_runner, model: str, dtype: str, - served_model_name: list[str]) -> None: - with vllm_runner(model, - dtype=dtype, - disable_log_stats=False, - gpu_memory_utilization=0.3, - served_model_name=served_model_name) as vllm_model: - stat_logger = vllm_model.llm.llm_engine.stat_loggers['prometheus'] - metrics_tag_content = stat_logger.labels["model_name"] - - if envs.VLLM_CI_USE_S3: - model = f"{MODEL_WEIGHTS_S3_BUCKET}/{model}" - if served_model_name is None or served_model_name == []: - assert metrics_tag_content == model, ( - f"Metrics tag model_name is wrong! expect: {model!r}\n" - f"actual: {metrics_tag_content!r}") - else: - assert metrics_tag_content == served_model_name[0], ( - f"Metrics tag model_name is wrong! expect: " - f"{served_model_name[0]!r}\n" - f"actual: {metrics_tag_content!r}") - - -@pytest.mark.parametrize("model", MODELS) -@pytest.mark.parametrize("dtype", ["half"]) -@pytest.mark.parametrize("max_tokens", [4]) -@pytest.mark.parametrize("disable_log_stats", [True, False]) -@pytest.mark.asyncio -async def test_async_engine_log_metrics_regression( - example_prompts, - model: str, - dtype: str, - max_tokens: int, - disable_log_stats: bool, -) -> None: - """ - Regression test ensuring async engine generates metrics - when disable_log_stats=False - (see: https://github.com/vllm-project/vllm/pull/4150#pullrequestreview-2008176678) - """ - engine_args = AsyncEngineArgs( - model=model, - dtype=dtype, - disable_log_stats=disable_log_stats, - ) - async_engine = AsyncLLMEngine.from_engine_args(engine_args) - for i, prompt in enumerate(example_prompts): - results = async_engine.generate( - prompt, - SamplingParams(max_tokens=max_tokens), - f"request-id-{i}", - ) - # Exhaust the async iterator to make the async engine work - async for _ in results: - pass - - assert_metrics(model, async_engine.engine, disable_log_stats, - len(example_prompts)) - - -@pytest.mark.parametrize("model", MODELS) -@pytest.mark.parametrize("dtype", ["half"]) -@pytest.mark.parametrize("max_tokens", [4]) -@pytest.mark.parametrize("disable_log_stats", [True, False]) -def test_engine_log_metrics_regression( - example_prompts, - model: str, - dtype: str, - max_tokens: int, - disable_log_stats: bool, -) -> None: - engine_args = EngineArgs( - model=model, - dtype=dtype, - disable_log_stats=disable_log_stats, - ) - engine = LLMEngine.from_engine_args(engine_args) - for i, prompt in enumerate(example_prompts): - engine.add_request( - f"request-id-{i}", - prompt, - SamplingParams(max_tokens=max_tokens), - ) - while engine.has_unfinished_requests(): - engine.step() - - if envs.VLLM_CI_USE_S3: - model = f"{MODEL_WEIGHTS_S3_BUCKET}/{model}" - assert_metrics(model, engine, disable_log_stats, len(example_prompts)) - - -def assert_metrics(model: str, engine: LLMEngine, disable_log_stats: bool, - num_requests: int) -> None: - if disable_log_stats: - with pytest.raises(AttributeError): - _ = engine.stat_loggers - else: - assert (engine.stat_loggers - is not None), "engine.stat_loggers should be set" - # Ensure the count bucket of request-level histogram metrics matches - # the number of requests as a simple sanity check to ensure metrics are - # generated - labels = {'model_name': model} - request_histogram_metrics = [ - "vllm:e2e_request_latency_seconds", - "vllm:request_prompt_tokens", - "vllm:request_generation_tokens", - "vllm:request_params_n", - "vllm:request_params_max_tokens", - ] - for metric_name in request_histogram_metrics: - metric_value = REGISTRY.get_sample_value(f"{metric_name}_count", - labels) - assert ( - metric_value == num_requests), "Metrics should be collected" - - -@pytest.mark.parametrize("model", MODELS) -@pytest.mark.parametrize("dtype", ["half"]) -@pytest.mark.parametrize("max_tokens", [16]) -def test_engine_log_metrics_ray( - example_prompts, - model: str, - dtype: str, - max_tokens: int, -) -> None: - # This test is quite weak - it only checks that we can use - # RayPrometheusStatLogger without exceptions. - # Checking whether the metrics are actually emitted is unfortunately - # non-trivial. - - # We have to run in a Ray task for Ray metrics to be emitted correctly - @ray.remote(num_gpus=1) - def _inner(): - - class _RayPrometheusStatLogger(RayPrometheusStatLogger): - - def __init__(self, *args, **kwargs): - self._i = 0 - super().__init__(*args, **kwargs) - - def log(self, *args, **kwargs): - self._i += 1 - return super().log(*args, **kwargs) - - engine_args = EngineArgs( - model=model, - dtype=dtype, - disable_log_stats=False, - ) - engine = LLMEngine.from_engine_args(engine_args) - logger = _RayPrometheusStatLogger( - local_interval=0.5, - labels=dict(model_name=engine.model_config.served_model_name), - vllm_config=engine.vllm_config) - engine.add_logger("ray", logger) - for i, prompt in enumerate(example_prompts): - engine.add_request( - f"request-id-{i}", - prompt, - SamplingParams(max_tokens=max_tokens), - ) - while engine.has_unfinished_requests(): - engine.step() - assert logger._i > 0, ".log must be called at least once" - - ray.get(_inner.remote()) diff --git a/tests/tracing/__init__.py b/tests/tracing/__init__.py deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/tests/tracing/test_tracing.py b/tests/tracing/test_tracing.py deleted file mode 100644 index 4dbae7c15de3..000000000000 --- a/tests/tracing/test_tracing.py +++ /dev/null @@ -1,237 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project -# ruff: noqa -# type: ignore -from __future__ import annotations - -import threading -from collections.abc import Iterable -from concurrent import futures -from typing import Callable, Generator, Literal - -import grpc -import pytest -from opentelemetry.proto.collector.trace.v1.trace_service_pb2 import ( - ExportTraceServiceResponse) -from opentelemetry.proto.collector.trace.v1.trace_service_pb2_grpc import ( - TraceServiceServicer, add_TraceServiceServicer_to_server) -from opentelemetry.proto.common.v1.common_pb2 import AnyValue, KeyValue -from opentelemetry.sdk.environment_variables import ( - OTEL_EXPORTER_OTLP_TRACES_INSECURE) - -from vllm import LLM, SamplingParams -from vllm.tracing import SpanAttributes - - -@pytest.fixture(scope="function", autouse=True) -def use_v0_only(monkeypatch: pytest.MonkeyPatch): - """ - Since this module is V0 only, set VLLM_USE_V1=0 for - all tests in the module. - """ - with monkeypatch.context() as m: - m.setenv('VLLM_USE_V1', '0') - yield - - -FAKE_TRACE_SERVER_ADDRESS = "localhost:4317" - -FieldName = Literal['bool_value', 'string_value', 'int_value', 'double_value', - 'array_value'] - - -def decode_value(value: AnyValue): - field_decoders: dict[FieldName, Callable] = { - "bool_value": (lambda v: v.bool_value), - "string_value": (lambda v: v.string_value), - "int_value": (lambda v: v.int_value), - "double_value": (lambda v: v.double_value), - "array_value": - (lambda v: [decode_value(item) for item in v.array_value.values]), - } - for field, decoder in field_decoders.items(): - if value.HasField(field): - return decoder(value) - raise ValueError(f"Couldn't decode value: {value}") - - -def decode_attributes(attributes: Iterable[KeyValue]): - return {kv.key: decode_value(kv.value) for kv in attributes} - - -class FakeTraceService(TraceServiceServicer): - - def __init__(self): - self.request = None - self.evt = threading.Event() - - def Export(self, request, context): - self.request = request - self.evt.set() - return ExportTraceServiceResponse() - - -@pytest.fixture -def trace_service() -> Generator[FakeTraceService, None, None]: - """Fixture to set up a fake gRPC trace service""" - server = grpc.server(futures.ThreadPoolExecutor(max_workers=1)) - service = FakeTraceService() - add_TraceServiceServicer_to_server(service, server) - server.add_insecure_port(FAKE_TRACE_SERVER_ADDRESS) - server.start() - - yield service - - server.stop(None) - - -def test_traces( - monkeypatch: pytest.MonkeyPatch, - trace_service: FakeTraceService, -): - with monkeypatch.context() as m: - m.setenv(OTEL_EXPORTER_OTLP_TRACES_INSECURE, "true") - - sampling_params = SamplingParams( - temperature=0.01, - top_p=0.1, - max_tokens=256, - ) - model = "facebook/opt-125m" - llm = LLM( - model=model, - otlp_traces_endpoint=FAKE_TRACE_SERVER_ADDRESS, - ) - prompts = ["This is a short prompt"] - outputs = llm.generate(prompts, sampling_params=sampling_params) - - timeout = 5 - if not trace_service.evt.wait(timeout): - raise TimeoutError( - f"The fake trace service didn't receive a trace within " - f"the {timeout} seconds timeout") - - request = trace_service.request - assert len(request.resource_spans) == 1, ( - f"Expected 1 resource span, " - f"but got {len(request.resource_spans)}") - assert len(request.resource_spans[0].scope_spans) == 1, ( - f"Expected 1 scope span, " - f"but got {len(request.resource_spans[0].scope_spans)}") - assert len(request.resource_spans[0].scope_spans[0].spans) == 1, ( - f"Expected 1 span, " - f"but got {len(request.resource_spans[0].scope_spans[0].spans)}") - - attributes = decode_attributes( - request.resource_spans[0].scope_spans[0].spans[0].attributes) - assert attributes.get(SpanAttributes.GEN_AI_RESPONSE_MODEL) == model - assert attributes.get( - SpanAttributes.GEN_AI_REQUEST_ID) == outputs[0].request_id - assert attributes.get(SpanAttributes.GEN_AI_REQUEST_TEMPERATURE - ) == sampling_params.temperature - assert attributes.get( - SpanAttributes.GEN_AI_REQUEST_TOP_P) == sampling_params.top_p - assert attributes.get(SpanAttributes.GEN_AI_REQUEST_MAX_TOKENS - ) == sampling_params.max_tokens - assert attributes.get( - SpanAttributes.GEN_AI_REQUEST_N) == sampling_params.n - assert attributes.get( - SpanAttributes.GEN_AI_USAGE_PROMPT_TOKENS) == len( - outputs[0].prompt_token_ids) - completion_tokens = sum(len(o.token_ids) for o in outputs[0].outputs) - assert attributes.get( - SpanAttributes.GEN_AI_USAGE_COMPLETION_TOKENS) == completion_tokens - metrics = outputs[0].metrics - assert attributes.get(SpanAttributes.GEN_AI_LATENCY_TIME_IN_QUEUE - ) == metrics.time_in_queue - ttft = metrics.first_token_time - metrics.arrival_time - assert attributes.get( - SpanAttributes.GEN_AI_LATENCY_TIME_TO_FIRST_TOKEN) == ttft - e2e_time = metrics.finished_time - metrics.arrival_time - assert attributes.get(SpanAttributes.GEN_AI_LATENCY_E2E) == e2e_time - assert metrics.scheduler_time > 0 - assert attributes.get(SpanAttributes.GEN_AI_LATENCY_TIME_IN_SCHEDULER - ) == metrics.scheduler_time - # Model forward and model execute should be none, since detailed traces is - # not enabled. - assert metrics.model_forward_time is None - assert metrics.model_execute_time is None - - -def test_traces_with_detailed_steps( - monkeypatch: pytest.MonkeyPatch, - trace_service: FakeTraceService, -): - with monkeypatch.context() as m: - m.setenv(OTEL_EXPORTER_OTLP_TRACES_INSECURE, "true") - - sampling_params = SamplingParams( - temperature=0.01, - top_p=0.1, - max_tokens=256, - ) - model = "facebook/opt-125m" - llm = LLM( - model=model, - otlp_traces_endpoint=FAKE_TRACE_SERVER_ADDRESS, - collect_detailed_traces=["all"], - ) - prompts = ["This is a short prompt"] - outputs = llm.generate(prompts, sampling_params=sampling_params) - - timeout = 5 - if not trace_service.evt.wait(timeout): - raise TimeoutError( - f"The fake trace service didn't receive a trace within " - f"the {timeout} seconds timeout") - - request = trace_service.request - assert len(request.resource_spans) == 1, ( - f"Expected 1 resource span, " - f"but got {len(request.resource_spans)}") - assert len(request.resource_spans[0].scope_spans) == 1, ( - f"Expected 1 scope span, " - f"but got {len(request.resource_spans[0].scope_spans)}") - assert len(request.resource_spans[0].scope_spans[0].spans) == 1, ( - f"Expected 1 span, " - f"but got {len(request.resource_spans[0].scope_spans[0].spans)}") - - attributes = decode_attributes( - request.resource_spans[0].scope_spans[0].spans[0].attributes) - assert attributes.get(SpanAttributes.GEN_AI_RESPONSE_MODEL) == model - assert attributes.get( - SpanAttributes.GEN_AI_REQUEST_ID) == outputs[0].request_id - assert attributes.get(SpanAttributes.GEN_AI_REQUEST_TEMPERATURE - ) == sampling_params.temperature - assert attributes.get( - SpanAttributes.GEN_AI_REQUEST_TOP_P) == sampling_params.top_p - assert attributes.get(SpanAttributes.GEN_AI_REQUEST_MAX_TOKENS - ) == sampling_params.max_tokens - assert attributes.get( - SpanAttributes.GEN_AI_REQUEST_N) == sampling_params.n - assert attributes.get( - SpanAttributes.GEN_AI_USAGE_PROMPT_TOKENS) == len( - outputs[0].prompt_token_ids) - completion_tokens = sum(len(o.token_ids) for o in outputs[0].outputs) - assert attributes.get( - SpanAttributes.GEN_AI_USAGE_COMPLETION_TOKENS) == completion_tokens - metrics = outputs[0].metrics - assert attributes.get(SpanAttributes.GEN_AI_LATENCY_TIME_IN_QUEUE - ) == metrics.time_in_queue - ttft = metrics.first_token_time - metrics.arrival_time - assert attributes.get( - SpanAttributes.GEN_AI_LATENCY_TIME_TO_FIRST_TOKEN) == ttft - e2e_time = metrics.finished_time - metrics.arrival_time - assert attributes.get(SpanAttributes.GEN_AI_LATENCY_E2E) == e2e_time - assert metrics.scheduler_time > 0 - assert attributes.get(SpanAttributes.GEN_AI_LATENCY_TIME_IN_SCHEDULER - ) == metrics.scheduler_time - assert metrics.model_forward_time > 0 - assert attributes.get( - SpanAttributes.GEN_AI_LATENCY_TIME_IN_MODEL_FORWARD - ) == pytest.approx(metrics.model_forward_time / 1000) - assert metrics.model_execute_time > 0 - assert attributes.get( - SpanAttributes.GEN_AI_LATENCY_TIME_IN_MODEL_EXECUTE - ) == metrics.model_execute_time - assert metrics.model_forward_time < 1000 * metrics.model_execute_time diff --git a/tests/metrics/__init__.py b/tests/v1/tracing/__init__.py similarity index 100% rename from tests/metrics/__init__.py rename to tests/v1/tracing/__init__.py From 6c036615dc8ee8c27588491287cb49f2c2e2476a Mon Sep 17 00:00:00 2001 From: Woosuk Kwon Date: Wed, 17 Sep 2025 19:41:55 -0700 Subject: [PATCH 073/518] [V0 Deprecation] Remove misc V0 tests (#25118) Signed-off-by: Woosuk Kwon --- tests/model_executor/test_logits_processor.py | 98 ------------------- tests/test_cache_block_hashing.py | 92 ----------------- 2 files changed, 190 deletions(-) delete mode 100644 tests/model_executor/test_logits_processor.py delete mode 100644 tests/test_cache_block_hashing.py diff --git a/tests/model_executor/test_logits_processor.py b/tests/model_executor/test_logits_processor.py deleted file mode 100644 index 532ebba038d3..000000000000 --- a/tests/model_executor/test_logits_processor.py +++ /dev/null @@ -1,98 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project - -import random -from unittest.mock import patch - -import pytest -import torch - -from vllm.model_executor.layers.logits_processor import LogitsProcessor -from vllm.model_executor.sampling_metadata import SamplingMetadata -from vllm.model_executor.utils import set_random_seed -from vllm.sequence import SamplingParams, SequenceData, SequenceGroupMetadata -from vllm.utils import is_pin_memory_available - - -class MockLogitsProcessor(LogitsProcessor): - - def __init__(self, vocab_size: int, scale: float, - fake_logits: torch.Tensor): - super().__init__(vocab_size=vocab_size, scale=scale) - self.fake_logits = fake_logits.clone() - - def forward(self, *args, **kwargs): - with patch( - "vllm.model_executor.layers.logits_processor._prune_hidden_states", - lambda x, y: x - ), patch( - "vllm.model_executor.layers.logits_processor.LogitsProcessor._get_logits", - lambda *args, **kwargs: self.fake_logits): - return super().forward(*args, **kwargs) - - -def _prepare_test( - batch_size: int -) -> tuple[torch.Tensor, torch.Tensor, MockLogitsProcessor]: - vocab_size = 32000 - input_tensor = torch.rand((batch_size, 1024), dtype=torch.float16) - fake_logits = torch.full((batch_size, vocab_size), - 1e-2, - dtype=input_tensor.dtype) - logits_processor = MockLogitsProcessor(32000, 0.5, fake_logits) - return input_tensor, fake_logits, logits_processor - - -RANDOM_SEEDS = list(range(128)) -CUDA_DEVICES = [ - f"cuda:{i}" for i in range(1 if torch.cuda.device_count() == 1 else 2) -] - - -@pytest.mark.parametrize("seed", RANDOM_SEEDS) -@pytest.mark.parametrize("device", CUDA_DEVICES) -def test_logits_processors(seed: int, device: str): - set_random_seed(seed) - torch.set_default_device(device) - batch_size = random.randint(1, 256) - input_tensor, fake_logits, logits_processor = _prepare_test(batch_size) - - # This sample logits processor gives infinite score to the i-th token, - # where i is the length of the input sequence. - # We therefore expect the output token sequence to be [0, 1, 2, ...] - def pick_ith(token_ids, logits): - logits[len(token_ids)] = float("inf") - return logits - - seq_group_metadata_list = [] - seq_lens = [] - for i in range(batch_size): - seq_group_metadata_list.append( - SequenceGroupMetadata( - request_id=f"test_{i}", - is_prompt=True, - seq_data={0: SequenceData.from_seqs([1, 2, 3])}, - sampling_params=SamplingParams(temperature=0, - logits_processors=[pick_ith]), - block_tables={0: [1]}, - )) - seq_lens.append(seq_group_metadata_list[-1].seq_data[0].get_len()) - - sampling_metadata = SamplingMetadata.prepare( - seq_group_metadata_list, - seq_lens, - query_lens=seq_lens, - device=device, - pin_memory=is_pin_memory_available()) - logits_processor_output = logits_processor( - lm_head=None, - hidden_states=input_tensor, - sampling_metadata=sampling_metadata) - - assert torch.isinf(logits_processor_output[:, 0]).all() - - fake_logits *= logits_processor.scale - torch.testing.assert_close(logits_processor_output[:, 1], - fake_logits[:, 1], - rtol=1e-4, - atol=0.0) diff --git a/tests/test_cache_block_hashing.py b/tests/test_cache_block_hashing.py deleted file mode 100644 index 1dba0fd0fb3d..000000000000 --- a/tests/test_cache_block_hashing.py +++ /dev/null @@ -1,92 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project -"""Test hashing of cache blocks. - -Run `pytest tests/test_cache_block_hashing.py`. -""" -from typing import Optional - -import pytest - -from vllm.inputs import token_inputs -from vllm.lora.request import LoRARequest -from vllm.sequence import Sequence -from vllm.transformers_utils.tokenizer import get_tokenizer - -# Make two prefixes with different first blocks. -prefix_start = [("You are an expert"), ("You are a")] -prefix_common = ( - " school principal, skilled in effectively managing " - "faculty and staff. Draft 10-15 questions for a potential first grade " - "Head Teacher for my K-12, all-girls', independent school that emphasizes " - "community, joyful discovery, and life-long learning. The candidate is " - "coming in for a first-round panel interview for a 8th grade Math " - "teaching role. They have 5 years of previous teaching experience " - "as an assistant teacher at a co-ed, public school with experience " - "in middle school math teaching. Based on this, fulfill " - "the following: ") -prefixes = [start + prefix_common for start in prefix_start] - -# Sample prompts. -sample_prompts = [ - "Hello, my name is", "The president of the United States is", - "The capital of France is", "The future of AI is" -] - - -# Helper function. -def flatten_2d(li): - return [lss for ls in li for lss in ls] - - -@pytest.mark.parametrize("model", ["facebook/opt-125m"]) -@pytest.mark.parametrize("block_size", [16]) -@pytest.mark.parametrize("max_num_seqs", [256]) -@pytest.mark.parametrize("concurrent_lora_int_ids", - [[None], [1], [None, 1], [None, 1, 2], [1, 2]]) -def test_auto_prefix_caching(model: str, block_size: int, max_num_seqs: int, - concurrent_lora_int_ids: list[Optional[int]]): - - tokenizer = get_tokenizer("facebook/opt-125m") - - hashes: list[list[list[int]]] = [] - - for prefix in prefixes: - for lora_int_id in concurrent_lora_int_ids: - lora_request = None - - if lora_int_id is not None: - lora_request = LoRARequest( - f"example_lora_{lora_int_id}", - lora_int_id, - f"example/path/to/lora_{lora_int_id}", - ) - - hashes.append([]) - prompts = [prefix + prompt for prompt in sample_prompts] - for seq_id, prompt in enumerate(prompts): - hashes[-1].append([]) - prompt_token_ids = tokenizer.encode(prompt) - seq = Sequence(seq_id, - inputs=token_inputs(prompt_token_ids, - prompt=prompt), - block_size=block_size, - eos_token_id=tokenizer.eos_token_id, - lora_request=lora_request) - - num_blocks = len(prompt_token_ids) // block_size - for idx in range(num_blocks): - hashes[-1][-1].append(seq.hash_of_block(idx)) - - # Check that hashes made with two prefixes with different first blocks are - # different everywhere. - for hash0, hash1 in zip(flatten_2d(hashes[0]), flatten_2d(hashes[1])): - assert (hash0 != hash1) - - # Check that hashes of different prompts made with the same prefix are the - # same until the hashes that contain the prompt. - for hash_pref in hashes: - same_hashes = [tuple(h[:-1]) for h in hash_pref] - different_hashes = [h[-1] for h in hash_pref] - assert (len(set(same_hashes)) == 1) - assert (len(set(different_hashes)) == len(different_hashes)) From 7fb2a5be2838219d566f949ac41861df822eac10 Mon Sep 17 00:00:00 2001 From: Woosuk Kwon Date: Wed, 17 Sep 2025 20:18:36 -0700 Subject: [PATCH 074/518] [V0 Deprecation] Skip PP test (#25128) Signed-off-by: Woosuk Kwon --- tests/distributed/test_pipeline_parallel.py | 114 +++++--------------- 1 file changed, 28 insertions(+), 86 deletions(-) diff --git a/tests/distributed/test_pipeline_parallel.py b/tests/distributed/test_pipeline_parallel.py index 9da9672d9597..fcd09844c095 100644 --- a/tests/distributed/test_pipeline_parallel.py +++ b/tests/distributed/test_pipeline_parallel.py @@ -26,23 +26,10 @@ VLLM_MULTI_NODE = os.getenv("VLLM_MULTI_NODE", "0") == "1" -@pytest.fixture(scope="function", autouse=True) -def use_v0_only(monkeypatch): - """ - For PP, we fall back to V0 by default. This means - that the TP baseline runs with V1 while the PP engine - runs with V0. This gives divergent results with dummy - weights. Once we enable V1 by default for PP, we can - remove this. - """ - monkeypatch.setenv('VLLM_USE_V1', '0') - - class ParallelSetup(NamedTuple): tp_size: int pp_size: int eager_mode: bool - chunked_prefill: bool class PPTestOptions(NamedTuple): @@ -53,23 +40,10 @@ class PPTestOptions(NamedTuple): @dataclass class PPTestSettings: parallel_setups: list[ParallelSetup] - # NOTE: the length of distributed_backends and - # vllm_major_versions should be the same, and they - # are first zipped together to iterate over all - # test settings. distributed_backends: list[str] - # vllm major version: "0" for V0, "1" for V1 - vllm_major_versions: list[str] runner: RunnerOption test_options: PPTestOptions - def __post_init__(self): - if len(self.distributed_backends) != len(self.vllm_major_versions): - raise ValueError( - f"Length mismatch: distributed_backends " - f"({len(self.distributed_backends)}) != " - f"vllm_major_versions ({len(self.vllm_major_versions)})") - @staticmethod def detailed( *, @@ -83,27 +57,21 @@ def detailed( parallel_setups=[ ParallelSetup(tp_size=tp_base, pp_size=pp_base, - eager_mode=False, - chunked_prefill=False), + eager_mode=False), ParallelSetup(tp_size=tp_base, pp_size=2 * pp_base, - eager_mode=False, - chunked_prefill=True), + eager_mode=False), ParallelSetup(tp_size=tp_base, pp_size=2 * pp_base, - eager_mode=True, - chunked_prefill=False), + eager_mode=True), ParallelSetup(tp_size=2 * tp_base, pp_size=pp_base, - eager_mode=False, - chunked_prefill=True), + eager_mode=False), ParallelSetup(tp_size=2 * tp_base, pp_size=pp_base, - eager_mode=True, - chunked_prefill=False), + eager_mode=True), ], - distributed_backends=["mp", "mp", "ray", "ray"], - vllm_major_versions=["0", "1", "0", "1"], + distributed_backends=["mp", "ray"], runner=runner, test_options=PPTestOptions(multi_node_only=multi_node_only, load_format=load_format), @@ -118,17 +86,14 @@ def fast( multi_node_only: bool = False, load_format: Optional[str] = None, ): - vllm_major_versions = ["1"] if runner == "pooling" else ["0"] return PPTestSettings( parallel_setups=[ ParallelSetup(tp_size=tp_base, pp_size=pp_base, - eager_mode=True, - chunked_prefill=False), + eager_mode=True), ], distributed_backends=["mp"], - vllm_major_versions=vllm_major_versions, runner=runner, test_options=PPTestOptions(multi_node_only=multi_node_only, load_format=load_format), @@ -138,10 +103,8 @@ def iter_params(self, model_id: str): opts = self.test_options for parallel_setup in self.parallel_setups: - for backend, vllm_major_version in zip(self.distributed_backends, - self.vllm_major_versions): - yield (model_id, parallel_setup, backend, vllm_major_version, - self.runner, opts) + for backend in self.distributed_backends: + yield (model_id, parallel_setup, backend, self.runner, opts) # NOTE: You can adjust tp_base and/or pp_base locally to fit the model in GPU @@ -269,7 +232,6 @@ def _compare_tp( model_id: str, parallel_setup: ParallelSetup, distributed_backend: str, - vllm_major_version: str, runner: RunnerOption, test_options: PPTestOptions, num_gpus_available: int, @@ -281,7 +243,6 @@ def _compare_tp( tp_size, pp_size, eager_mode, - chunked_prefill, ) = parallel_setup multi_node_only, load_format = test_options @@ -334,8 +295,6 @@ def _compare_tp( "--max-num-seqs", "8", ] - if chunked_prefill: - common_args.append("--enable-chunked-prefill") if eager_mode: common_args.append("--enforce-eager") if runner != "auto": @@ -353,14 +312,10 @@ def _compare_tp( if max_num_seqs: common_args.extend(["--max-num-seqs", f"{max_num_seqs}"]) - specific_case = tp_size == 2 and pp_size == 2 and chunked_prefill - testing_ray_compiled_graph = False - if distributed_backend == "ray" and (vllm_major_version == "1" - or specific_case): + if distributed_backend == "ray": # For V1, test Ray Compiled Graph for all the tests - # For V0, test Ray Compiled Graph for a subset of the tests pp_env = { - "VLLM_USE_V1": vllm_major_version, + "VLLM_USE_V1": "1", "VLLM_USE_RAY_COMPILED_DAG": "1", "VLLM_USE_RAY_SPMD_WORKER": "1", "VLLM_USE_RAY_COMPILED_DAG_NCCL_CHANNEL": "1", @@ -368,17 +323,15 @@ def _compare_tp( # Temporary. Currently when zeromq + SPMD is used, it does not properly # terminate because of a Ray Compiled Graph issue. common_args.append("--disable-frontend-multiprocessing") - testing_ray_compiled_graph = True elif distributed_backend == "mp": - # Both V0/V1 of multiprocessing executor support PP pp_env = { - "VLLM_USE_V1": vllm_major_version, + "VLLM_USE_V1": "1", } else: pp_env = None tp_env = { - "VLLM_USE_V1": vllm_major_version, + "VLLM_USE_V1": "1", } pp_args = [ @@ -404,25 +357,17 @@ def _compare_tp( "mp", ] - try: - compare_two_settings(model_id, - pp_args, - tp_args, - pp_env, - tp_env, - method=method) - except Exception: - if testing_ray_compiled_graph and vllm_major_version == "0": - # Ray Compiled Graph tests are flaky for V0, - # so we don't want to fail the test - logger.exception("Ray Compiled Graph tests failed") - else: - raise + compare_two_settings(model_id, + pp_args, + tp_args, + pp_env, + tp_env, + method=method) @pytest.mark.parametrize( - ("model_id", "parallel_setup", "distributed_backend", "vllm_major_version", - "runner", "test_options"), + ("model_id", "parallel_setup", "distributed_backend", "runner", + "test_options"), [ params for model_id, settings in TEXT_GENERATION_MODELS.items() for params in settings.iter_params(model_id) if model_id in TEST_MODELS @@ -433,15 +378,14 @@ def test_tp_language_generation( model_id: str, parallel_setup: ParallelSetup, distributed_backend: str, - vllm_major_version: str, runner: RunnerOption, test_options: PPTestOptions, num_gpus_available, ): + pytest.skip("Skipping the test until V1 passes it.") _compare_tp(model_id, parallel_setup, distributed_backend, - vllm_major_version, runner, test_options, num_gpus_available, @@ -450,8 +394,8 @@ def test_tp_language_generation( @pytest.mark.parametrize( - ("model_id", "parallel_setup", "distributed_backend", "vllm_major_version", - "runner", "test_options"), + ("model_id", "parallel_setup", "distributed_backend", "runner", + "test_options"), [ params for model_id, settings in EMBEDDING_MODELS.items() for params in settings.iter_params(model_id) if model_id in TEST_MODELS @@ -462,15 +406,14 @@ def test_tp_language_embedding( model_id: str, parallel_setup: ParallelSetup, distributed_backend: str, - vllm_major_version: str, runner: RunnerOption, test_options: PPTestOptions, num_gpus_available, ): + pytest.skip("Skipping the test until V1 passes it.") _compare_tp(model_id, parallel_setup, distributed_backend, - vllm_major_version, runner, test_options, num_gpus_available, @@ -479,8 +422,8 @@ def test_tp_language_embedding( @pytest.mark.parametrize( - ("model_id", "parallel_setup", "distributed_backend", "vllm_major_version", - "runner", "test_options"), + ("model_id", "parallel_setup", "distributed_backend", "runner", + "test_options"), [ params for model_id, settings in MULTIMODAL_MODELS.items() for params in settings.iter_params(model_id) if model_id in TEST_MODELS @@ -491,15 +434,14 @@ def test_tp_multimodal_generation( model_id: str, parallel_setup: ParallelSetup, distributed_backend: str, - vllm_major_version: str, runner: RunnerOption, test_options: PPTestOptions, num_gpus_available, ): + pytest.skip("Skipping the test until V1 passes it.") _compare_tp(model_id, parallel_setup, distributed_backend, - vllm_major_version, runner, test_options, num_gpus_available, From 4ac510f4844ae2ab168c2dbac545e3dd28a0a1b9 Mon Sep 17 00:00:00 2001 From: bnellnm <49004751+bnellnm@users.noreply.github.com> Date: Wed, 17 Sep 2025 23:19:52 -0400 Subject: [PATCH 075/518] [Kernels] Enable DeepGEMM by default (#24462) Signed-off-by: Bill Nell --- vllm/envs.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm/envs.py b/vllm/envs.py index eeed7771f045..72e1d5b0ede8 100755 --- a/vllm/envs.py +++ b/vllm/envs.py @@ -135,7 +135,7 @@ VLLM_TPU_BUCKET_PADDING_GAP: int = 0 VLLM_TPU_MOST_MODEL_LEN: Optional[int] = None VLLM_TPU_USING_PATHWAYS: bool = False - VLLM_USE_DEEP_GEMM: bool = False + VLLM_USE_DEEP_GEMM: bool = True VLLM_USE_DEEP_GEMM_E8M0: bool = True VLLM_USE_DEEP_GEMM_E8M0_HOPPER: bool = False VLLM_SKIP_DEEP_GEMM_WARMUP: bool = False @@ -1044,7 +1044,7 @@ def get_vllm_port() -> Optional[int]: # Allow use of DeepGemm kernels for fused moe ops. "VLLM_USE_DEEP_GEMM": - lambda: bool(int(os.getenv("VLLM_USE_DEEP_GEMM", "0"))), + lambda: bool(int(os.getenv("VLLM_USE_DEEP_GEMM", "1"))), # Whether to use E8M0 scaling when DeepGEMM is used on Blackwell GPUs. "VLLM_USE_DEEP_GEMM_E8M0": From 3127274d022b0bc8ff6ba9ceef41a99a6f01ad2d Mon Sep 17 00:00:00 2001 From: Roger Wang Date: Wed, 17 Sep 2025 21:04:21 -0700 Subject: [PATCH 076/518] [MM Encoder] Apply DP ViT for Qwen3-VL model series (#24955) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Roger Wang Signed-off-by: Isotr0py Co-authored-by: Huang Jie <92386084+JJJYmmm@users.noreply.github.com> Co-authored-by: 松灵 <26085463+wulipc@users.noreply.github.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> Co-authored-by: Isotr0py --- vllm/model_executor/models/qwen3_vl.py | 94 +++++++++++++++++----- vllm/model_executor/models/qwen3_vl_moe.py | 2 + 2 files changed, 77 insertions(+), 19 deletions(-) diff --git a/vllm/model_executor/models/qwen3_vl.py b/vllm/model_executor/models/qwen3_vl.py index 22948aee4936..2c36dfbce7f6 100644 --- a/vllm/model_executor/models/qwen3_vl.py +++ b/vllm/model_executor/models/qwen3_vl.py @@ -126,20 +126,23 @@ def __init__(self, bias: bool = False, act_fn: Callable[[torch.Tensor], torch.Tensor] = F.silu, quant_config: Optional[QuantizationConfig] = None, - prefix: str = ""): + prefix: str = "", + use_data_parallel: bool = False): super().__init__() self.linear_fc1 = ColumnParallelLinear(in_features, hidden_features, bias=bias, quant_config=quant_config, return_bias=False, - prefix=f"{prefix}.linear_fc1") + prefix=f"{prefix}.linear_fc1", + disable_tp=use_data_parallel) self.linear_fc2 = RowParallelLinear(hidden_features, in_features, bias=bias, quant_config=quant_config, return_bias=False, - prefix=f"{prefix}.linear_fc2") + prefix=f"{prefix}.linear_fc2", + disable_tp=use_data_parallel) self.act_fn = act_fn def forward(self, x: torch.Tensor): @@ -158,23 +161,27 @@ def __init__( norm_layer: Optional[Callable[[int], nn.Module]] = None, quant_config: Optional[QuantizationConfig] = None, prefix: str = "", + use_data_parallel: bool = False, ) -> None: super().__init__() if norm_layer is None: norm_layer = partial(nn.LayerNorm, eps=1e-6) self.norm1 = norm_layer(dim) self.norm2 = norm_layer(dim) - self.attn = Qwen2_5_VisionAttention(embed_dim=dim, - num_heads=num_heads, - projection_size=dim, - quant_config=quant_config, - prefix=f"{prefix}.attn") + self.attn = Qwen2_5_VisionAttention( + embed_dim=dim, + num_heads=num_heads, + projection_size=dim, + quant_config=quant_config, + prefix=f"{prefix}.attn", + use_data_parallel=use_data_parallel) self.mlp = Qwen3_VisionMLP(dim, mlp_hidden_dim, act_fn=act_fn, bias=True, quant_config=quant_config, - prefix=f"{prefix}.mlp") + prefix=f"{prefix}.mlp", + use_data_parallel=use_data_parallel) def forward( self, @@ -205,6 +212,7 @@ def __init__( use_postshuffle_norm: bool = False, quant_config: Optional[QuantizationConfig] = None, prefix: str = "", + use_data_parallel: bool = False, ) -> None: super().__init__() self.hidden_size = context_dim * (spatial_merge_size**2) @@ -222,13 +230,15 @@ def __init__( self.hidden_size, bias=True, quant_config=quant_config, - prefix=f"{prefix}.linear_fc1") + prefix=f"{prefix}.linear_fc1", + disable_tp=use_data_parallel) self.act_fn = nn.GELU() self.linear_fc2 = RowParallelLinear(self.hidden_size, d_model, bias=True, quant_config=quant_config, - prefix=f"{prefix}.linear_fc2") + prefix=f"{prefix}.linear_fc2", + disable_tp=use_data_parallel) def forward(self, x: torch.Tensor) -> torch.Tensor: if self.use_postshuffle_norm: @@ -250,6 +260,7 @@ def __init__( norm_eps: float = 1e-6, quant_config: Optional[QuantizationConfig] = None, prefix: str = "", + use_data_parallel: bool = False, ) -> None: super().__init__() self.hidden_size = vision_config.hidden_size @@ -260,6 +271,12 @@ def __init__( self.spatial_merge_unit = self.spatial_merge_size**2 self.temporal_patch_size = vision_config.temporal_patch_size self.deepstack_visual_indexes = vision_config.deepstack_visual_indexes + self.use_data_parallel = use_data_parallel + + # NOTE: This is used for creating empty tensor for all_gather for + # DP ViT. Here out_hidden_size is enlarged due to deepstack + self.out_hidden_size = (vision_config.out_hidden_size * + (1 + len(self.deepstack_visual_indexes))) self.patch_embed = Qwen3_VisionPatchEmbed( patch_size=self.patch_size, @@ -283,7 +300,8 @@ def __init__( act_fn=_ACTIVATION_REGISTRY[vision_config.hidden_act], norm_layer=norm_layer, quant_config=quant_config, - prefix=f"{prefix}.blocks.{layer_idx}") + prefix=f"{prefix}.blocks.{layer_idx}", + use_data_parallel=use_data_parallel) for layer_idx in range(vision_config.depth) ]) @@ -294,6 +312,7 @@ def __init__( spatial_merge_size=self.spatial_merge_size, quant_config=quant_config, prefix=f"{prefix}.merger", + use_data_parallel=use_data_parallel, ) self.deepstack_merger_list = nn.ModuleList([ @@ -304,7 +323,8 @@ def __init__( use_postshuffle_norm=True, norm_layer=norm_layer, quant_config=quant_config, - prefix=f"{prefix}.deepstack_merger_list.{layer_idx}") + prefix=f"{prefix}.deepstack_merger_list.{layer_idx}", + use_data_parallel=use_data_parallel) for layer_idx in range(len(self.deepstack_visual_indexes)) ]) @@ -325,7 +345,14 @@ def device(self) -> torch.device: def rot_pos_emb(self, grid_thw): pos_ids = [] - for t, h, w in grid_thw: + # Support both Tensor and list inputs for DP path + if isinstance(grid_thw, list): + grid_list = grid_thw + max_grid_size = max(max(h, w) for _, h, w in grid_list) + else: + grid_list = grid_thw.tolist() + max_grid_size = int(grid_thw[:, 1:].max().item()) + for t, h, w in grid_list: hpos_ids = torch.arange(h).unsqueeze(1).expand(-1, w) hpos_ids = hpos_ids.reshape( h // self.spatial_merge_size, @@ -348,7 +375,6 @@ def rot_pos_emb(self, grid_thw): pos_ids.append( torch.stack([hpos_ids, wpos_ids], dim=-1).repeat(t, 1)) pos_ids = torch.cat(pos_ids, dim=0) - max_grid_size = grid_thw[:, 1:].max() rotary_pos_emb_full = self.rotary_pos_emb(max_grid_size) rotary_pos_emb = rotary_pos_emb_full[pos_ids].flatten(1) return rotary_pos_emb @@ -453,10 +479,18 @@ def forward( hidden_states = hidden_states + pos_embeds rotary_pos_emb = self.rot_pos_emb(grid_thw) + if isinstance(grid_thw, list): + grid_thw_tensor = torch.tensor(grid_thw, + device=hidden_states.device, + dtype=torch.int32) + else: + grid_thw_tensor = grid_thw + cu_seqlens = torch.repeat_interleave( - grid_thw[:, 1] * grid_thw[:, 2], grid_thw[:, 0]).cumsum( + grid_thw_tensor[:, 1] * grid_thw_tensor[:, 2], + grid_thw_tensor[:, 0]).cumsum( dim=0, - dtype=grid_thw.dtype + dtype=grid_thw_tensor.dtype if torch.jit.is_tracing() else torch.int32, ) cu_seqlens = F.pad(cu_seqlens, (1, 0), value=0) @@ -984,6 +1018,9 @@ class Qwen3VLForConditionalGeneration(nn.Module, SupportsMultiModal, "up_proj", ], } + + supports_encoder_tp_data = True + # To ensure correct weight loading and mapping. hf_to_vllm_mapper = WeightsMapper( orig_to_new_prefix={ @@ -1009,12 +1046,14 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = "model"): self.config = config self.multimodal_config = multimodal_config + self.use_data_parallel = multimodal_config.mm_encoder_tp_mode == "data" self.visual = Qwen3_VisionTransformer( config.vision_config, norm_eps=getattr(config, "rms_norm_eps", 1e-6), quant_config=self._maybe_ignore_quant_config(quant_config), prefix=maybe_prefix(prefix, "visual"), + use_data_parallel=self.use_data_parallel, ) self.language_model = Qwen3LLMForCausalLM(vllm_config=vllm_config, @@ -1177,7 +1216,15 @@ def _process_image_input( image_embeds = image_input["image_embeds"].type(self.visual.dtype) else: pixel_values = image_input["pixel_values"].type(self.visual.dtype) - image_embeds = self.visual(pixel_values, grid_thw=grid_thw) + if self.use_data_parallel: + from vllm.multimodal.utils import ( + run_dp_sharded_mrope_vision_model) + return run_dp_sharded_mrope_vision_model(self.visual, + pixel_values, + grid_thw_list, + rope_type="rope_3d") + else: + image_embeds = self.visual(pixel_values, grid_thw=grid_thw) # Split concatenated embeddings for each image item. # Using prod on grid_thw_list instead of grid_thw.prod avoids CUDA sync @@ -1199,7 +1246,16 @@ def _process_video_input( else: pixel_values_videos = video_input["pixel_values_videos"].type( self.visual.dtype) - video_embeds = self.visual(pixel_values_videos, grid_thw=grid_thw) + if self.use_data_parallel: + from vllm.multimodal.utils import ( + run_dp_sharded_mrope_vision_model) + return run_dp_sharded_mrope_vision_model(self.visual, + pixel_values_videos, + grid_thw_list, + rope_type="rope_3d") + else: + video_embeds = self.visual(pixel_values_videos, + grid_thw=grid_thw) # Split concatenated embeddings for each video item. # Using prod on grid_thw_list instead of grid_thw.prod avoids CUDA sync diff --git a/vllm/model_executor/models/qwen3_vl_moe.py b/vllm/model_executor/models/qwen3_vl_moe.py index a800e94ab1e5..d25bc71dcb59 100644 --- a/vllm/model_executor/models/qwen3_vl_moe.py +++ b/vllm/model_executor/models/qwen3_vl_moe.py @@ -315,12 +315,14 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): self.config = config self.multimodal_config = multimodal_config + self.use_data_parallel = multimodal_config.mm_encoder_tp_mode == "data" self.visual = Qwen3_VisionTransformer( config.vision_config, norm_eps=getattr(config, "rms_norm_eps", 1e-6), quant_config=self._maybe_ignore_quant_config(quant_config), prefix=maybe_prefix(prefix, "visual"), + use_data_parallel=self.use_data_parallel, ) self.language_model = Qwen3MoeLLMForCausalLM(vllm_config=vllm_config, From 32baf1d03685ead1f5946f867e4ca16007bd10b5 Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Thu, 18 Sep 2025 05:05:18 +0100 Subject: [PATCH 077/518] [Docs] Clean up the contributing README (#25099) Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- docs/contributing/README.md | 141 ++++++++++-------- .../installation/python_env_setup.inc.md | 2 +- mkdocs.yaml | 1 + 3 files changed, 77 insertions(+), 67 deletions(-) diff --git a/docs/contributing/README.md b/docs/contributing/README.md index 5a2a70d57e85..b0a95b3b3d3a 100644 --- a/docs/contributing/README.md +++ b/docs/contributing/README.md @@ -26,113 +26,123 @@ See . ## Developing ---8<-- "docs/getting_started/installation/python_env_setup.inc.md" - -Depending on the kind of development you'd like to do (e.g. Python, CUDA), you can choose to build vLLM with or without compilation. -Check out the [building from source][build-from-source] documentation for details. +The first step of contributing to vLLM is to clone the GitHub repository: -For an optimized workflow when iterating on C++/CUDA kernels, see the [Incremental Compilation Workflow](./incremental_build.md) for recommendations. +```bash +git clone https://github.com/vllm-project/vllm.git +cd vllm +``` -### Building the docs with MkDocs +Then, configure your Python virtual environment. -#### Introduction to MkDocs +--8<-- "docs/getting_started/installation/python_env_setup.inc.md" -[MkDocs](https://github.com/mkdocs/mkdocs) is a fast, simple and downright gorgeous static site generator that's geared towards building project documentation. Documentation source files are written in Markdown, and configured with a single YAML configuration file. +If you are only developing vLLM's Python code, install vLLM using: -#### Install MkDocs and Plugins +```bash +VLLM_USE_PRECOMPILED=1 uv pip install -e . +``` -Install MkDocs along with the [plugins](https://github.com/vllm-project/vllm/blob/main/mkdocs.yaml) used in the vLLM documentation, as well as required dependencies: +If you are developing vLLM's Python and CUDA/C++ code, install vLLM using: ```bash -uv pip install -r requirements/docs.txt +uv pip install -e . ``` -!!! note - Ensure that your Python version is compatible with the plugins (e.g., `mkdocs-awesome-nav` requires Python 3.10+) +For more details about installing from source and installing for other hardware, check out the [installation instructions](../getting_started/installation/README.md) for your hardware and head to the "Build wheel from source" section. -#### Verify Installation +For an optimized workflow when iterating on C++/CUDA kernels, see the [Incremental Compilation Workflow](./incremental_build.md) for recommendations. -Confirm that MkDocs is correctly installed: +!!! tip + vLLM is compatible with Python versions 3.9 to 3.12. However, vLLM's default [Dockerfile](gh-file:docker/Dockerfile) ships with Python 3.12 and tests in CI (except `mypy`) are run with Python 3.12. -```bash -mkdocs --version -``` + Therefore, we recommend developing with Python 3.12 to minimise the chance of your local environment clashing with our CI environment. -Example output: +### Linting -```console -mkdocs, version 1.6.1 from /opt/miniconda3/envs/mkdoc/lib/python3.10/site-packages/mkdocs (Python 3.10) -``` - -#### Clone the `vLLM` repository +vLLM uses `pre-commit` to lint and format the codebase. See if `pre-commit` is new to you. Setting up `pre-commit` is as easy as: ```bash -git clone https://github.com/vllm-project/vllm.git -cd vllm +uv pip install pre-commit +pre-commit install ``` -#### Start the Development Server +vLLM's `pre-commit` hooks will now run automatically every time you commit. -MkDocs comes with a built-in dev-server that lets you preview your documentation as you work on it. Make sure you're in the same directory as the `mkdocs.yml` configuration file, and then start the server by running the `mkdocs serve` command: +!!! tip "Tips" + You can manually run the `pre-commit` hooks using: -```bash -mkdocs serve -``` + ```bash + pre-commit run # runs on staged files + pre-commit run -a # runs on all files (short for --all-files) + ``` -Example output: + --- -```console -INFO - Documentation built in 106.83 seconds -INFO - [22:02:02] Watching paths for changes: 'docs', 'mkdocs.yaml' -INFO - [22:02:02] Serving on http://127.0.0.1:8000/ -``` + Some `pre-commit` hooks only run in CI. If you need to, you can run them locally with: -#### View in Your Browser + ```bash + pre-commit run --hook-stage manual markdownlint + pre-commit run --hook-stage manual mypy-3.9 + ``` -Open up [http://127.0.0.1:8000/](http://127.0.0.1:8000/) in your browser to see a live preview:. +### Documentation -#### Learn More +MkDocs is a fast, simple and downright gorgeous static site generator that's geared towards building project documentation. Documentation source files are written in Markdown, and configured with a single YAML configuration file, . -For additional features and advanced configurations, refer to the official [MkDocs Documentation](https://www.mkdocs.org/). +Get started with: -## Testing +```bash +uv pip install -r requirements/docs.txt +``` -??? console "Commands" +!!! tip + Ensure that your Python version is compatible with the plugins + (e.g., `mkdocs-awesome-nav` requires Python 3.10+) - ```bash - # These commands are only for Nvidia CUDA platforms. - uv pip install -r requirements/common.txt -r requirements/dev.txt --torch-backend=auto +MkDocs comes with a built-in dev-server that lets you preview your documentation as you work on it. +From the root of the repository, run: - # Linting, formatting and static type checking - pre-commit install +```bash +mkdocs serve # with API ref (~10 minutes) +API_AUTONAV_EXCLUDE=vllm mkdocs serve # API ref off (~15 seconds) +``` - # You can manually run pre-commit with - pre-commit run --all-files --show-diff-on-failure +Once you see `Serving on http://127.0.0.1:8000/` in the logs, the live preview is ready! +Open in your browser to see it. - # To manually run something from CI that does not run - # locally by default, you can run: - pre-commit run mypy-3.9 --hook-stage manual --all-files +For additional features and advanced configurations, refer to the: - # Unit tests - pytest tests/ +- [MkDocs documentation](https://www.mkdocs.org/) +- [Material for MkDocs documentation](https://squidfunk.github.io/mkdocs-material/) (the MkDocs theme we use) - # Run tests for a single test file with detailed output - pytest -s -v tests/test_logger.py - ``` +### Testing -!!! tip - Since the ships with Python 3.12, all tests in CI (except `mypy`) are run with Python 3.12. +vLLM uses `pytest` to test the codebase. - Therefore, we recommend developing with Python 3.12 to minimise the chance of your local environment clashing with our CI environment. +```bash +# Install the test dependencies used in CI (CUDA only) +uv pip install -r requirements/common.txt -r requirements/dev.txt --torch-backend=auto + +# Install some common test dependencies (hardware agnostic) +uv pip install pytest pytest-asyncio + +# Run all tests +pytest tests/ -!!! note "Install python3-dev if Python.h is missing" +# Run tests for a single test file with detailed output +pytest -s -v tests/test_logger.py +``` + +!!! tip "Install python3-dev if Python.h is missing" If any of the above commands fails with `Python.h: No such file or directory`, install `python3-dev` with `sudo apt install python3-dev`. -!!! note +!!! warning "Warnings" Currently, the repository is not fully checked by `mypy`. -!!! note + --- + Currently, not all unit tests pass when run on CPU platforms. If you don't have access to a GPU platform to run unit tests locally, rely on the continuous integration system to run the tests for now. @@ -194,8 +204,7 @@ appropriately to indicate the type of change. Please use one of the following: The PR needs to meet the following code quality standards: - We adhere to [Google Python style guide](https://google.github.io/styleguide/pyguide.html) and [Google C++ style guide](https://google.github.io/styleguide/cppguide.html). -- Pass all linter checks. Please use `pre-commit` to format your code. See - if `pre-commit` is new to you. +- Pass all linter checks. - The code needs to be well-documented to ensure future contributors can easily understand the code. - Include sufficient tests to ensure the project stays correct and robust. This diff --git a/docs/getting_started/installation/python_env_setup.inc.md b/docs/getting_started/installation/python_env_setup.inc.md index 423bf9b00d07..06794f8d3120 100644 --- a/docs/getting_started/installation/python_env_setup.inc.md +++ b/docs/getting_started/installation/python_env_setup.inc.md @@ -1,4 +1,4 @@ -It's recommended to use [uv](https://docs.astral.sh/uv/), a very fast Python environment manager, to create and manage Python environments. Please follow the [documentation](https://docs.astral.sh/uv/#getting-started) to install `uv`. After installing `uv`, you can create a new Python environment and install vLLM using the following commands: +It's recommended to use [uv](https://docs.astral.sh/uv/), a very fast Python environment manager, to create and manage Python environments. Please follow the [documentation](https://docs.astral.sh/uv/#getting-started) to install `uv`. After installing `uv`, you can create a new Python environment using the following commands: ```bash uv venv --python 3.12 --seed diff --git a/mkdocs.yaml b/mkdocs.yaml index 507a80c41e8b..bbd850bdfee3 100644 --- a/mkdocs.yaml +++ b/mkdocs.yaml @@ -79,6 +79,7 @@ plugins: - "re:vllm\\._.*" # Internal modules - "vllm.third_party" - "vllm.vllm_flash_attn" + - !ENV [API_AUTONAV_EXCLUDE, ""] - mkdocstrings: handlers: python: From b98219670fb1ca2952d449404c2b4921d7cdce73 Mon Sep 17 00:00:00 2001 From: Lukas Geiger Date: Thu, 18 Sep 2025 05:08:41 +0100 Subject: [PATCH 078/518] [Core][MM] Cleanup `MultiModalCache` (#25006) Signed-off-by: Lukas Geiger --- vllm/multimodal/cache.py | 25 ++++++------------------- 1 file changed, 6 insertions(+), 19 deletions(-) diff --git a/vllm/multimodal/cache.py b/vllm/multimodal/cache.py index 31ae450f4c2f..297b4c7fa7fb 100644 --- a/vllm/multimodal/cache.py +++ b/vllm/multimodal/cache.py @@ -1,5 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project +import operator import sys from abc import ABC, abstractmethod from collections.abc import Mapping, Sequence @@ -91,26 +92,15 @@ def __init__( class MultiModalCache: @classmethod - def get_leaf_size( - cls, - leaf: object, - *, - debug: bool = False, - ) -> int: + def get_leaf_size(cls, leaf: object) -> int: if isinstance(leaf, MultiModalProcessorCacheItem): return cls.get_leaf_size(leaf.item) if isinstance(leaf, MultiModalProcessorCacheItemMetadata): return leaf.item_size # These are not subclasses of dict - if isinstance(leaf, MultiModalKwargsItems): - return cls.get_item_size(leaf.data) # type: ignore - if isinstance(leaf, MultiModalKwargsItem): - return cls.get_item_size(leaf.data) # type: ignore - if isinstance(leaf, MultiModalKwargs): - return cls.get_item_size(leaf.data) # type: ignore - - if isinstance(leaf, MultiModalFieldElem): + if isinstance(leaf, (MultiModalKwargs, MultiModalKwargsItems, + MultiModalKwargsItem, MultiModalFieldElem)): return cls.get_item_size(leaf.data) # type: ignore # sys.getsizeof doesn't work for tensors @@ -126,11 +116,8 @@ def get_item_size( *, debug: bool = False, ) -> int: - size = json_reduce_leaves( - lambda a, b: a + b, - json_map_leaves(lambda x: cls.get_leaf_size(x, debug=debug), - value), - ) + size = json_reduce_leaves(operator.add, + json_map_leaves(cls.get_leaf_size, value)) if debug: leaf_count = json_count_leaves(value) From 027d37df389b00ed2e7d874113f869267533a2ab Mon Sep 17 00:00:00 2001 From: toncao <130689535+toncao@users.noreply.github.com> Date: Thu, 18 Sep 2025 11:08:50 +0700 Subject: [PATCH 079/518] [Bugfix][Qwen3-Next] add prefixes to shared_expert in qwen3-next and mlp in qwen2moe to successfully load ignored params in quantized models (#24960) Signed-off-by: toncao Co-authored-by: toncao Co-authored-by: Jee Jee Li --- vllm/model_executor/models/qwen2_moe.py | 48 ++++++++++++------------ vllm/model_executor/models/qwen3_next.py | 1 + 2 files changed, 26 insertions(+), 23 deletions(-) diff --git a/vllm/model_executor/models/qwen2_moe.py b/vllm/model_executor/models/qwen2_moe.py index 5e6dea67c940..6c6276a93045 100644 --- a/vllm/model_executor/models/qwen2_moe.py +++ b/vllm/model_executor/models/qwen2_moe.py @@ -72,17 +72,20 @@ def __init__( hidden_act: str, quant_config: Optional[QuantizationConfig] = None, reduce_results: bool = True, + prefix: str = "", ) -> None: super().__init__() self.gate_up_proj = MergedColumnParallelLinear( hidden_size, [intermediate_size] * 2, bias=False, - quant_config=quant_config) + quant_config=quant_config, + prefix=f"{prefix}.gate_up_proj") self.down_proj = RowParallelLinear(intermediate_size, hidden_size, bias=False, quant_config=quant_config, - reduce_results=reduce_results) + reduce_results=reduce_results, + prefix=f"{prefix}.down_proj") if hidden_act != "silu": raise ValueError(f"Unsupported activation: {hidden_act}. " "Only silu is supported for now.") @@ -123,7 +126,8 @@ def __init__( self.gate = ReplicatedLinear(config.hidden_size, config.num_experts, bias=False, - quant_config=None) + quant_config=None, + prefix=f"{prefix}.gate") if config.shared_expert_intermediate_size > 0: self.shared_expert = Qwen2MoeMLP( hidden_size=config.hidden_size, @@ -132,6 +136,7 @@ def __init__( quant_config=quant_config, reduce_results=self.experts.must_reduce_shared_expert_outputs( ), + prefix=f"{prefix}.shared_expert", ) else: self.shared_expert = None @@ -203,21 +208,19 @@ def __init__( self.max_position_embeddings = max_position_embeddings self.dual_chunk_attention_config = dual_chunk_attention_config - self.qkv_proj = QKVParallelLinear( - hidden_size, - self.head_dim, - self.total_num_heads, - self.total_num_kv_heads, - bias=True, - quant_config=quant_config, - ) + self.qkv_proj = QKVParallelLinear(hidden_size, + self.head_dim, + self.total_num_heads, + self.total_num_kv_heads, + bias=True, + quant_config=quant_config, + prefix=f"{prefix}.qkv_proj") - self.o_proj = RowParallelLinear( - self.total_num_heads * self.head_dim, - hidden_size, - bias=False, - quant_config=quant_config, - ) + self.o_proj = RowParallelLinear(self.total_num_heads * self.head_dim, + hidden_size, + bias=False, + quant_config=quant_config, + prefix=f"{prefix}.o_proj") self.rotary_emb = get_rope( self.head_dim, @@ -296,12 +299,11 @@ def __init__( quant_config=quant_config, prefix=f"{prefix}.mlp") else: - self.mlp = Qwen2MoeMLP( - hidden_size=config.hidden_size, - intermediate_size=config.intermediate_size, - hidden_act=config.hidden_act, - quant_config=quant_config, - ) + self.mlp = Qwen2MoeMLP(hidden_size=config.hidden_size, + intermediate_size=config.intermediate_size, + hidden_act=config.hidden_act, + quant_config=quant_config, + prefix=f"{prefix}.mlp") self.input_layernorm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps) self.post_attention_layernorm = RMSNorm(config.hidden_size, diff --git a/vllm/model_executor/models/qwen3_next.py b/vllm/model_executor/models/qwen3_next.py index fe63e9303235..ca9f4d402dac 100644 --- a/vllm/model_executor/models/qwen3_next.py +++ b/vllm/model_executor/models/qwen3_next.py @@ -138,6 +138,7 @@ def __init__( quant_config=quant_config, reduce_results=self.experts.must_reduce_shared_expert_outputs( ), + prefix=f"{prefix}.shared_expert", ) else: self.shared_expert = None From dc2979c58574e7a49d17b50c5770010039145aac Mon Sep 17 00:00:00 2001 From: bnellnm <49004751+bnellnm@users.noreply.github.com> Date: Thu, 18 Sep 2025 00:10:21 -0400 Subject: [PATCH 080/518] [Kernels] Overlap shared experts with combine instead of dispatch (#24254) Signed-off-by: Bill Nell --- .../fused_moe/deepep_ht_prepare_finalize.py | 50 +++++++++- .../fused_moe/deepep_ll_prepare_finalize.py | 55 +++++++++-- .../layers/fused_moe/modular_kernel.py | 95 +++++++++++++++---- .../layers/fused_moe/pplx_prepare_finalize.py | 39 +++++++- 4 files changed, 203 insertions(+), 36 deletions(-) diff --git a/vllm/model_executor/layers/fused_moe/deepep_ht_prepare_finalize.py b/vllm/model_executor/layers/fused_moe/deepep_ht_prepare_finalize.py index 5d6b9c87a6b7..f390f0a25875 100644 --- a/vllm/model_executor/layers/fused_moe/deepep_ht_prepare_finalize.py +++ b/vllm/model_executor/layers/fused_moe/deepep_ht_prepare_finalize.py @@ -240,7 +240,7 @@ def prepare( quant_config) return receiver() - def finalize( + def _finalize( self, output: torch.Tensor, fused_expert_output: torch.Tensor, @@ -248,7 +248,8 @@ def finalize( topk_ids: torch.Tensor, apply_router_weight_on_input: bool, weight_and_reduce_impl: mk.TopKWeightAndReduce, - ) -> None: + do_async: bool, + ) -> Optional[Callable]: assert self.handle is not None @@ -271,7 +272,46 @@ def finalize( topk_weights=None, config=self._get_combine_config(), previous_event=None, - async_finish=False, + async_finish=do_async, allocate_on_comm_stream=False) - # Respect inplace outputs. - output.copy_(combined_x, non_blocking=True) + + if do_async: + + def _receiver(): + event.current_stream_wait() + # Respect inplace outputs. + output.copy_(combined_x, non_blocking=True) + + return lambda: _receiver() + else: + # Respect inplace outputs. + output.copy_(combined_x, non_blocking=True) + return None + + def finalize_async( + self, + output: torch.Tensor, + fused_expert_output: torch.Tensor, + topk_weights: torch.Tensor, + topk_ids: torch.Tensor, + apply_router_weight_on_input: bool, + weight_and_reduce_impl: mk.TopKWeightAndReduce, + ) -> Callable: + receiver = self._finalize(output, fused_expert_output, topk_weights, + topk_ids, apply_router_weight_on_input, + weight_and_reduce_impl, True) + assert receiver is not None + return receiver + + def finalize( + self, + output: torch.Tensor, + fused_expert_output: torch.Tensor, + topk_weights: torch.Tensor, + topk_ids: torch.Tensor, + apply_router_weight_on_input: bool, + weight_and_reduce_impl: mk.TopKWeightAndReduce, + ) -> None: + self._finalize(output, fused_expert_output, topk_weights, topk_ids, + apply_router_weight_on_input, weight_and_reduce_impl, + False) diff --git a/vllm/model_executor/layers/fused_moe/deepep_ll_prepare_finalize.py b/vllm/model_executor/layers/fused_moe/deepep_ll_prepare_finalize.py index 01df7770463d..101fc8798c42 100644 --- a/vllm/model_executor/layers/fused_moe/deepep_ll_prepare_finalize.py +++ b/vllm/model_executor/layers/fused_moe/deepep_ll_prepare_finalize.py @@ -12,8 +12,7 @@ from vllm.model_executor.layers.fused_moe.utils import ( moe_kernel_quantize_input, normalize_batched_scales_shape) from vllm.v1.worker.ubatching import (dbo_current_ubatch_id, dbo_enabled, - dbo_maybe_run_recv_hook, - dbo_register_recv_hook, dbo_yield) + dbo_maybe_run_recv_hook) # DeepEP kernels quantize dispatch inputs in 128 element chunks. DEEPEP_QUANT_BLOCK_SIZE = 128 @@ -198,7 +197,7 @@ def prepare( hook() return receiver() - def finalize( + def _finalize( self, output: torch.Tensor, fused_expert_output: torch.Tensor, @@ -206,13 +205,14 @@ def finalize( topk_ids: torch.Tensor, apply_router_weight_on_input: bool, weight_and_reduce_impl: mk.TopKWeightAndReduce, - ) -> None: + do_async: bool, + ) -> Optional[Callable]: assert isinstance( weight_and_reduce_impl, TopKWeightAndReduceDelegate ), ("Weight application and reduction happens in the combine kernel.") a2a_idx = dbo_current_ubatch_id() - do_recv_hook = dbo_enabled() + do_recv_hook = dbo_enabled() or do_async handle = self.handles[a2a_idx] assert handle is not None @@ -232,6 +232,45 @@ def finalize( zero_copy=False, return_recv_hook=do_recv_hook, out=output) - if recv_hook is not None: - dbo_register_recv_hook(recv_hook) - dbo_yield() + + return recv_hook + + def finalize_async( + self, + output: torch.Tensor, + fused_expert_output: torch.Tensor, + topk_weights: torch.Tensor, + topk_ids: torch.Tensor, + apply_router_weight_on_input: bool, + weight_and_reduce_impl: mk.TopKWeightAndReduce, + ) -> Callable: + recv_hook = self._finalize( + output, + fused_expert_output, + topk_weights, + topk_ids, + apply_router_weight_on_input, + weight_and_reduce_impl, + do_async=True, + ) + assert recv_hook is not None + return recv_hook + + def finalize( + self, + output: torch.Tensor, + fused_expert_output: torch.Tensor, + topk_weights: torch.Tensor, + topk_ids: torch.Tensor, + apply_router_weight_on_input: bool, + weight_and_reduce_impl: mk.TopKWeightAndReduce, + ) -> None: + self._finalize( + output, + fused_expert_output, + topk_weights, + topk_ids, + apply_router_weight_on_input, + weight_and_reduce_impl, + do_async=False, + ) diff --git a/vllm/model_executor/layers/fused_moe/modular_kernel.py b/vllm/model_executor/layers/fused_moe/modular_kernel.py index 58cd0294c8c4..729f8e39cf0f 100644 --- a/vllm/model_executor/layers/fused_moe/modular_kernel.py +++ b/vllm/model_executor/layers/fused_moe/modular_kernel.py @@ -209,7 +209,8 @@ def prepare( def supports_async(self) -> bool: """ - Indicates whether or not this class implements prepare_async. + Indicates whether or not this class implements prepare_async and + finalize_async. """ return False @@ -275,6 +276,42 @@ def finalize( """ raise NotImplementedError + def finalize_async( + self, + output: torch.Tensor, + fused_expert_output: torch.Tensor, + topk_weights: torch.Tensor, + topk_ids: torch.Tensor, + apply_router_weight_on_input: bool, + weight_and_reduce_impl: TopKWeightAndReduce, + ) -> Callable: + """ + Perform any combine plus apply weights and perform a reduction on the + fused experts output but do not wait for results from other workers. + - output: The output tensor, written in place. Must be (M, K) shape. + - fused_expert_output: The unweighted, unreduced output of the fused + experts, it will have (M, topk, K) shape. + - topk_weights: The weights to be applied to the fused_experts_output. + - topk_ids: The topk_ids. + - apply_router_weight_on_input: When False, apply the weights to + fused_expert_output. + - weight_and_reduce_impl: An optional TopKWeightAndReduce + implementation. + + Returns a callback that when invoked waits for results from other + workers and has the same return signature as `finalize`, e.g. + + receiver = obj.finalize_async(output, ...) + ... output not valid yet ... + receiver() + ... output valid here ... + + is equivalent to: + + obj.finalize(output, ...) + """ + raise NotImplementedError + @property @abstractmethod def activation_format(self) -> FusedMoEActivationFormat: @@ -814,23 +851,20 @@ def forward( """ a1 = hidden_states - output = a1 if inplace else torch.zeros_like(a1) + if inplace and self.shared_experts is None: + output = a1 + else: + output = torch.zeros_like(a1) local_num_experts = w1.size(0) if global_num_experts == -1: global_num_experts = local_num_experts - shared_output: torch.Tensor - if not self.prepare_finalize.supports_async(): # We shouldn't be running an a2a kernel that doesn't # support async prepare/finalize assert not dbo_enabled() - # Run shared experts serially with dispatch. - if self.shared_experts is not None: - shared_output = self.shared_experts(a1) - (a1q, a1q_scale, expert_tokens_meta, _expert_topk_ids, _expert_topk_weights) = self.prepare_finalize.prepare( a1, @@ -854,9 +888,6 @@ def forward( self.fused_experts.quant_config, ) - if self.shared_experts is not None: - shared_output = self.shared_experts(a1) - # If DBO is being used, register the hook with the ubatch context # and call it in dbo_maybe_run_recv_hook instead of passing it to # the receiver. @@ -900,16 +931,42 @@ def forward( apply_router_weight_on_input=apply_router_weight_on_input, ) - self.prepare_finalize.finalize( - output, - fused_out, - topk_weights, - topk_ids, - apply_router_weight_on_input, - self.fused_experts.finalize_weight_and_reduce_impl(), - ) + shared_output: Optional[torch.Tensor] = None + + if not self.prepare_finalize.supports_async(): + assert not dbo_enabled() + + self.prepare_finalize.finalize( + output, + fused_out, + topk_weights, + topk_ids, + apply_router_weight_on_input, + self.fused_experts.finalize_weight_and_reduce_impl(), + ) + if self.shared_experts is not None: + shared_output = self.shared_experts(a1) + else: + recv_hook = self.prepare_finalize.finalize_async( + output, + fused_out, + topk_weights, + topk_ids, + apply_router_weight_on_input, + self.fused_experts.finalize_weight_and_reduce_impl(), + ) + + if self.shared_experts is not None: + shared_output = self.shared_experts(a1) + + assert recv_hook is not None + dbo_register_recv_hook(recv_hook) + dbo_yield() + if not dbo_enabled(): + recv_hook() if self.shared_experts is None: return output else: + assert shared_output is not None return shared_output, output diff --git a/vllm/model_executor/layers/fused_moe/pplx_prepare_finalize.py b/vllm/model_executor/layers/fused_moe/pplx_prepare_finalize.py index 32d12476dd01..ddddd2a3b7a2 100644 --- a/vllm/model_executor/layers/fused_moe/pplx_prepare_finalize.py +++ b/vllm/model_executor/layers/fused_moe/pplx_prepare_finalize.py @@ -272,7 +272,7 @@ def prepare( hook() return receiver() - def finalize( + def finalize_async( self, output: torch.Tensor, fused_expert_output: torch.Tensor, @@ -280,7 +280,7 @@ def finalize( topk_ids: torch.Tensor, apply_router_weight_on_input: bool, weight_and_reduce_impl: mk.TopKWeightAndReduce, - ) -> None: + ) -> Callable: assert isinstance( weight_and_reduce_impl, TopKWeightAndReduceDelegate ), ("Weight application and reduction happens in the combine kernel.") @@ -303,8 +303,39 @@ def finalize( if apply_router_weight_on_input: topk_weights = torch.ones_like(topk_weights) + topk_ids_u32 = topk_ids.view(dtype=torch.uint32) + self.a2a.combine(out_tokens=output, - indices=topk_ids.view(dtype=torch.uint32), + indices=topk_ids_u32, weights=topk_weights, expert_y=fused_expert_output, - bound_m=bound_m) + bound_m=bound_m, + do_send=True, + do_recv=False) + + return lambda: self.a2a.combine(out_tokens=output, + indices=topk_ids_u32, + weights=topk_weights, + expert_y=fused_expert_output, + bound_m=bound_m, + do_send=False, + do_recv=True) + + def finalize( + self, + output: torch.Tensor, + fused_expert_output: torch.Tensor, + topk_weights: torch.Tensor, + topk_ids: torch.Tensor, + apply_router_weight_on_input: bool, + weight_and_reduce_impl: mk.TopKWeightAndReduce, + ) -> None: + receiver = self.finalize_async( + output, + fused_expert_output, + topk_weights, + topk_ids, + apply_router_weight_on_input, + weight_and_reduce_impl, + ) + receiver() From 52bc9d5b3edbf8804758d46cde28024d6c362e42 Mon Sep 17 00:00:00 2001 From: YiwenC <54658925+666even666@users.noreply.github.com> Date: Wed, 17 Sep 2025 21:11:46 -0700 Subject: [PATCH 081/518] [Model] enable data parallel for InternVL vision encoder (#23909) Signed-off-by: Yiwen Chen Signed-off-by: YiwenC <54658925+666even666@users.noreply.github.com> Co-authored-by: Roger Wang --- docs/configuration/optimization.md | 1 + vllm/model_executor/models/intern_vit.py | 107 ++++++++++++++++------- vllm/model_executor/models/internvl.py | 5 +- 3 files changed, 80 insertions(+), 33 deletions(-) diff --git a/docs/configuration/optimization.md b/docs/configuration/optimization.md index 5807d787cf53..5564d8a81d93 100644 --- a/docs/configuration/optimization.md +++ b/docs/configuration/optimization.md @@ -175,6 +175,7 @@ Regardless, you need to set `mm_encoder_tp_mode="data"` in engine arguments to u Known supported models: - GLM-4.5V GLM-4.1V () +- InternVL () - Kimi-VL () - Llama4 () - MiniCPM-V-2.5 or above (, ) diff --git a/vllm/model_executor/models/intern_vit.py b/vllm/model_executor/models/intern_vit.py index 8e9ab9649bd4..118cce810a1f 100644 --- a/vllm/model_executor/models/intern_vit.py +++ b/vllm/model_executor/models/intern_vit.py @@ -25,9 +25,11 @@ from vllm.model_executor.layers.layernorm import RMSNorm from vllm.model_executor.layers.linear import (ColumnParallelLinear, QKVParallelLinear, + ReplicatedLinear, RowParallelLinear) from vllm.model_executor.layers.quantization import QuantizationConfig from vllm.model_executor.model_loader.weight_utils import default_weight_loader +from vllm.multimodal.utils import run_dp_sharded_vision_model NORM2FN = { 'rms_norm': RMSNorm, @@ -137,6 +139,7 @@ def __init__( *, num_dummy_heads: int = 0, prefix: str = "", + use_data_parallel: bool = False, ) -> None: super().__init__() @@ -150,8 +153,10 @@ def __init__( f'(got `embed_dim`: {self.embed_dim} and `num_heads`:' f' {self.num_heads}).') - self.tp_size = get_tensor_model_parallel_world_size() - self.tp_rank = get_tensor_model_parallel_rank() + self.tp_size = (1 if use_data_parallel else + get_tensor_model_parallel_world_size()) + self.tp_rank = (0 if use_data_parallel else + get_tensor_model_parallel_rank()) # Additional dummy heads are used to enable TP for common GPU counts. self.dummy_dim = (num_dummy_heads + self.num_heads) * self.head_dim @@ -159,14 +164,23 @@ def __init__( self.tp_size) self.scale = self.head_dim**-0.5 - self.qkv = QKVParallelLinear( - self.embed_dim, - self.head_dim, - num_dummy_heads + self.num_heads, - bias=config.qkv_bias, - quant_config=quant_config, - prefix=f"{prefix}.qkv", - ) + if use_data_parallel: + self.qkv = ReplicatedLinear( + self.embed_dim, + 3 * self.head_dim * self.num_heads, + bias=config.qkv_bias, + quant_config=quant_config, + prefix=f"{prefix}.qkv", + ) + else: + self.qkv = QKVParallelLinear( + self.embed_dim, + self.head_dim, + num_dummy_heads + self.num_heads, + bias=config.qkv_bias, + quant_config=quant_config, + prefix=f"{prefix}.qkv", + ) self.qk_normalization = config.qk_normalization @@ -178,12 +192,20 @@ def __init__( eps=config.layer_norm_eps, var_hidden_size=self.embed_dim) - self.proj = RowParallelLinear( - self.dummy_dim, - self.embed_dim, - quant_config=quant_config, - prefix=f"{prefix}.proj", - ) + if use_data_parallel: + self.proj = ReplicatedLinear( + self.dummy_dim, + self.embed_dim, + quant_config=quant_config, + prefix=f"{prefix}.proj", + ) + else: + self.proj = RowParallelLinear( + self.dummy_dim, + self.embed_dim, + quant_config=quant_config, + prefix=f"{prefix}.proj", + ) self.attn = MultiHeadAttention(self.num_heads_per_partition, self.head_dim, self.scale) @@ -287,21 +309,26 @@ def __init__( config: PretrainedConfig, quant_config: Optional[QuantizationConfig] = None, prefix: str = "", + use_data_parallel: bool = False, ) -> None: super().__init__() self.config = config self.activation_fn = get_act_fn(config.hidden_act) - self.fc1 = ColumnParallelLinear(config.hidden_size, - config.intermediate_size, - bias=True, - quant_config=quant_config, - prefix=f"{prefix}.fc1") - self.fc2 = RowParallelLinear(config.intermediate_size, - config.hidden_size, - bias=True, - quant_config=quant_config, - prefix=f"{prefix}.fc2") + cls_fc1 = (ReplicatedLinear + if use_data_parallel else ColumnParallelLinear) + self.fc1 = cls_fc1(config.hidden_size, + config.intermediate_size, + bias=True, + quant_config=quant_config, + prefix=f"{prefix}.fc1") + cls_fc2 = (ReplicatedLinear + if use_data_parallel else RowParallelLinear) + self.fc2 = cls_fc2(config.intermediate_size, + config.hidden_size, + bias=True, + quant_config=quant_config, + prefix=f"{prefix}.fc2") def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: hidden_states, _ = self.fc1(hidden_states) @@ -320,6 +347,7 @@ def __init__( *, num_dummy_heads: int = 0, prefix: str = "", + use_data_parallel: bool = False, ) -> None: super().__init__() @@ -330,11 +358,13 @@ def __init__( self.attn = self._init_attn(config, quant_config, num_dummy_heads=num_dummy_heads, - prefix=f"{prefix}.attn") + prefix=f"{prefix}.attn", + use_data_parallel=use_data_parallel) self.mlp = InternMLP(config, quant_config=quant_config, - prefix=f"{prefix}.mlp") + prefix=f"{prefix}.mlp", + use_data_parallel=use_data_parallel) self.norm1 = NORM2FN[self.norm_type](self.embed_dim, eps=config.layer_norm_eps) self.norm2 = NORM2FN[self.norm_type](self.embed_dim, @@ -352,16 +382,20 @@ def _init_attn( *, num_dummy_heads: int, prefix: str = "", + use_data_parallel: bool = False, ): # fallback to sdpa attention if tp unavailable - tp_size = get_tensor_model_parallel_world_size() + # tp_size = get_tensor_model_parallel_world_size() + tp_size = (1 if use_data_parallel else + get_tensor_model_parallel_world_size()) num_heads = config.num_attention_heads if (num_heads + num_dummy_heads) % tp_size == 0: return InternParallelAttention(config, quant_config=quant_config, num_dummy_heads=num_dummy_heads, - prefix=prefix) + prefix=prefix, + use_data_parallel=use_data_parallel) return InternSdpaAttention(config, num_dummy_heads=num_dummy_heads) @@ -388,6 +422,7 @@ def __init__( num_hidden_layers_override: Optional[int] = None, num_dummy_heads: int = 0, prefix: str = "", + use_data_parallel: bool = False, ): super().__init__() @@ -402,7 +437,8 @@ def __init__( InternVisionEncoderLayer(config, quant_config, num_dummy_heads=num_dummy_heads, - prefix=f"{prefix}.layers.{layer_idx}") + prefix=f"{prefix}.layers.{layer_idx}", + use_data_parallel=use_data_parallel) for layer_idx in range(num_hidden_layers) ]) @@ -429,10 +465,12 @@ def __init__( num_hidden_layers_override: Optional[int] = None, num_dummy_heads: int = 0, prefix: str = "", + use_data_parallel: bool = False, ) -> None: super().__init__() self.config = config + self.use_data_parallel = use_data_parallel self.embeddings = InternVisionEmbeddings(config) self.encoder = InternVisionEncoder( @@ -441,6 +479,7 @@ def __init__( num_hidden_layers_override=num_hidden_layers_override, num_dummy_heads=num_dummy_heads, prefix=f"{prefix}.encoder", + use_data_parallel=use_data_parallel, ) def get_input_embeddings(self): @@ -464,7 +503,11 @@ def forward( raise ValueError( f'wrong pixel_values size: {pixel_values.shape}') - encoder_outputs = self.encoder(inputs_embeds=hidden_states) + if self.use_data_parallel: + encoder_outputs = run_dp_sharded_vision_model( + hidden_states, self.encoder) + else: + encoder_outputs = self.encoder(inputs_embeds=hidden_states) return encoder_outputs diff --git a/vllm/model_executor/models/internvl.py b/vllm/model_executor/models/internvl.py index 9565628b198e..6a5c565b52e8 100644 --- a/vllm/model_executor/models/internvl.py +++ b/vllm/model_executor/models/internvl.py @@ -1035,6 +1035,8 @@ def get_video_replacement_internvl(item_idx: int): class InternVLChatModel(nn.Module, SupportsMultiModal, SupportsPP, SupportsLoRA): + supports_encoder_tp_data = True + @classmethod def get_placeholder_str(cls, modality: str, i: int) -> Optional[str]: if modality.startswith("image"): @@ -1053,6 +1055,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = "") -> None: self.config = config self.multimodal_config = multimodal_config + self.use_data_parallel = multimodal_config.mm_encoder_tp_mode == "data" self._patch_quant_config(config, quant_config) image_size = config.force_image_size or config.vision_config.image_size @@ -1120,7 +1123,7 @@ def _init_vision_model( quant_config=quant_config, num_hidden_layers_override=num_hidden_layers, prefix=prefix, - ) + use_data_parallel=self.use_data_parallel) else: return InternVisionPatchModel(config.vision_config) From bec060fd99e371b1adc53f65636061f702fa8e61 Mon Sep 17 00:00:00 2001 From: Andrew Sansom Date: Wed, 17 Sep 2025 23:25:07 -0500 Subject: [PATCH 082/518] Mark prompt logprobs as incompatible with prompt embeds at API level (#25077) Signed-off-by: Andrew Sansom --- .../test_completion_with_prompt_embeds.py | 17 +++++++++++++++++ vllm/engine/llm_engine.py | 11 +++++++---- vllm/entrypoints/openai/serving_completion.py | 5 +++++ 3 files changed, 29 insertions(+), 4 deletions(-) diff --git a/tests/entrypoints/openai/test_completion_with_prompt_embeds.py b/tests/entrypoints/openai/test_completion_with_prompt_embeds.py index dbfb1b024f7c..7b58f851a4d2 100644 --- a/tests/entrypoints/openai/test_completion_with_prompt_embeds.py +++ b/tests/entrypoints/openai/test_completion_with_prompt_embeds.py @@ -228,3 +228,20 @@ async def test_completions_with_logprobs_and_prompt_embeds( assert max(logprobs_arg, 1) <= len(top_logprobs) <= logprobs_arg + 1 assert len(logprobs.tokens) == 5 + + +@pytest.mark.asyncio +async def test_prompt_logprobs_raises_error( + client_with_prompt_embeds: openai.AsyncOpenAI): + with pytest.raises(BadRequestError, match="not compatible"): + encoded_embeds = create_dummy_embeds() + await client_with_prompt_embeds.completions.create( + model=MODEL_NAME, + prompt="", + max_tokens=5, + temperature=0.0, + extra_body={ + "prompt_embeds": encoded_embeds, + "prompt_logprobs": True + }, + ) diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py index c35bd20371d0..34b5dcb58750 100644 --- a/vllm/engine/llm_engine.py +++ b/vllm/engine/llm_engine.py @@ -671,10 +671,13 @@ def add_request( arrival_time = time.time() if (isinstance(prompt, dict) - and prompt.get("prompt_embeds", None) is not None - and not prompt.get("prompt_token_ids", None)): - seq_len = prompt["prompt_embeds"].shape[0] - prompt["prompt_token_ids"] = [0] * seq_len + and prompt.get("prompt_embeds", None) is not None): + if not prompt.get("prompt_token_ids", None): + seq_len = prompt["prompt_embeds"].shape[0] + prompt["prompt_token_ids"] = [0] * seq_len + if params.prompt_logprobs is not None: + raise ValueError( + "prompt_logprobs is not compatible with prompt embeds.") processed_inputs = self.input_preprocessor.preprocess( prompt, diff --git a/vllm/entrypoints/openai/serving_completion.py b/vllm/entrypoints/openai/serving_completion.py index 044f08f32b0d..0c61c48da0bc 100644 --- a/vllm/entrypoints/openai/serving_completion.py +++ b/vllm/entrypoints/openai/serving_completion.py @@ -112,6 +112,11 @@ async def create_completion( return self.create_error_response( "Echo is unsupported with prompt embeds.") + if (request.prompt_logprobs is not None + and request.prompt_embeds is not None): + return self.create_error_response( + "prompt_logprobs is not compatible with prompt embeds.") + request_id = ( f"cmpl-" f"{self._base_request_id(raw_request, request.request_id)}") From 3bc18127ff1c644257abcf84a1a56fab8c0d3f0c Mon Sep 17 00:00:00 2001 From: Chaojun Zhang Date: Thu, 18 Sep 2025 12:30:10 +0800 Subject: [PATCH 083/518] [XPU] Whisper model support on XPU Platform (#25123) Signed-off-by: chzhang --- vllm/attention/layer.py | 4 ++-- vllm/v1/worker/utils.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/vllm/attention/layer.py b/vllm/attention/layer.py index 22dc6dcbc8d6..15c0ce33e965 100644 --- a/vllm/attention/layer.py +++ b/vllm/attention/layer.py @@ -391,8 +391,8 @@ def __init__( backend = _Backend.FLASH_ATTN use_upstream_fa = True - if current_platform.is_rocm(): - # currently, only torch_sdpa is supported on rocm + if current_platform.is_rocm() or current_platform.is_xpu(): + # currently, only torch_sdpa is supported on rocm/xpu self.attn_backend = _Backend.TORCH_SDPA else: diff --git a/vllm/v1/worker/utils.py b/vllm/v1/worker/utils.py index fc831a73a75e..b76ac633892f 100644 --- a/vllm/v1/worker/utils.py +++ b/vllm/v1/worker/utils.py @@ -282,7 +282,7 @@ def bind_kv_cache( # TODO - analyze where runner_kv_caches is used and the right # way to ensure it properly reflects multiple attention layers # in the same decoder block. - if current_platform.is_cuda(): + if current_platform.is_cuda() or current_platform.is_xpu(): # We know that the GPU runner is not impacted by this # case. Some test code depends on runner_kv_caches, but # not in a way that's impacted by ignoring this. From 9d8a2d86d24b8afd849d18ddb4ef51cec1c0471d Mon Sep 17 00:00:00 2001 From: YiwenC <54658925+666even666@users.noreply.github.com> Date: Wed, 17 Sep 2025 21:51:35 -0700 Subject: [PATCH 084/518] [EPLB] Add EPLB support for hunyuan_v1 (#23078) --- vllm/model_executor/layers/fused_moe/layer.py | 4 +- vllm/model_executor/models/hunyuan_v1.py | 135 ++++++++++++++++-- 2 files changed, 123 insertions(+), 16 deletions(-) diff --git a/vllm/model_executor/layers/fused_moe/layer.py b/vllm/model_executor/layers/fused_moe/layer.py index ae3b67a2b84e..da513d75da4d 100644 --- a/vllm/model_executor/layers/fused_moe/layer.py +++ b/vllm/model_executor/layers/fused_moe/layer.py @@ -1508,8 +1508,8 @@ def get_expert_weights(self) -> Iterable[torch.Tensor]: return [ weight.view(self.local_num_experts, -1) for name, weight in weights - if name not in NON_EXPERT_WEIGHTS - and not name.startswith("_shared_experts.") + if name not in NON_EXPERT_WEIGHTS and weight.shape != torch.Size( + []) and not name.startswith("_shared_experts.") ] def set_eplb_state( diff --git a/vllm/model_executor/models/hunyuan_v1.py b/vllm/model_executor/models/hunyuan_v1.py index db054b5c537e..4110c8a1fd08 100644 --- a/vllm/model_executor/models/hunyuan_v1.py +++ b/vllm/model_executor/models/hunyuan_v1.py @@ -23,7 +23,8 @@ # See the License for the specific language governing permissions and # limitations under the License. """Inference-only HunYuan model compatible with HuggingFace weights.""" -from collections.abc import Iterable +import typing +from collections.abc import Callable, Iterable from typing import Any, Optional, Union import regex as re @@ -33,8 +34,8 @@ from vllm.attention import Attention, AttentionType from vllm.compilation.decorators import support_torch_compile -from vllm.config import CacheConfig, VllmConfig -from vllm.distributed import (get_pp_group, +from vllm.config import CacheConfig, VllmConfig, get_current_vllm_config +from vllm.distributed import (get_ep_group, get_pp_group, get_tensor_model_parallel_world_size, tensor_model_parallel_all_reduce) from vllm.model_executor.layers.activation import SiluAndMul @@ -56,7 +57,7 @@ from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.sequence import IntermediateTensors -from .interfaces import SupportsLoRA, SupportsPP +from .interfaces import MixtureOfExperts, SupportsLoRA, SupportsPP from .utils import (AutoWeightsLoader, PPMissingLayer, is_pp_missing_parameter, make_layers, maybe_prefix) @@ -355,10 +356,16 @@ def __init__( quant_config: Optional[QuantizationConfig] = None, layer_id: int = -1, prefix: str = "", + enable_eplb: bool = False, ): super().__init__() self.tp_size = get_tensor_model_parallel_world_size() + self.ep_group = get_ep_group().device_group + self.ep_rank = self.ep_group.rank() + self.ep_size = self.ep_group.size() + self.n_routed_experts = config.num_experts + if self.tp_size > config.num_experts: raise ValueError( f"Tensor parallel size {self.tp_size} is greater than " @@ -379,8 +386,23 @@ def __init__( config.moe_intermediate_size, int) else config.moe_intermediate_size[layer_id]) + # Load balancing settings. + vllm_config = get_current_vllm_config() + eplb_config = vllm_config.parallel_config.eplb_config + self.enable_eplb = enable_eplb + + self.n_logical_experts = self.n_routed_experts + self.n_redundant_experts = eplb_config.num_redundant_experts + self.n_physical_experts = (self.n_logical_experts + + self.n_redundant_experts) + self.n_local_physical_experts = self.n_physical_experts // self.ep_size + self.physical_expert_start = (self.ep_rank * + self.n_local_physical_experts) + self.physical_expert_end = (self.physical_expert_start + + self.n_local_physical_experts) + self.experts = FusedMoE( - num_experts=config.num_experts, + num_experts=self.n_routed_experts, top_k=top_k, hidden_size=config.hidden_size, intermediate_size=intermediate_size, @@ -388,6 +410,8 @@ def __init__( renormalize=top_k > 1, quant_config=quant_config, prefix=f"{prefix}.experts", + enable_eplb=self.enable_eplb, + num_redundant_experts=self.n_redundant_experts, ) self.gate = ReplicatedLinear(config.hidden_size, @@ -446,6 +470,7 @@ def __init__( quant_config: Optional[QuantizationConfig] = None, prefix: str = "", layer_id: int = -1, + enable_eplb: bool = False, ) -> None: super().__init__() assert layer_id >= 0 @@ -509,6 +534,7 @@ def __init__( quant_config=quant_config, layer_id=layer_id, prefix=f"{prefix}.mlp", + enable_eplb=enable_eplb, ) else: self.mlp = HunYuanMLP( @@ -562,6 +588,9 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): cache_config = vllm_config.cache_config quant_config = vllm_config.quant_config lora_config = vllm_config.lora_config + eplb_config = vllm_config.parallel_config.eplb_config + enable_eplb = vllm_config.parallel_config.enable_eplb + self.num_redundant_experts = eplb_config.num_redundant_experts self.config = config self.quant_config = quant_config @@ -588,6 +617,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): cache_config=cache_config, quant_config=quant_config, prefix=prefix, + enable_eplb=enable_eplb, ), prefix=f"{prefix}.layers", ) @@ -674,6 +704,7 @@ def get_expert_mapping(self) -> list[tuple[str, str, int, str]]: ckpt_down_proj_name="down_proj", ckpt_up_proj_name="up_proj", num_experts=self.config.num_experts, + num_redundant_experts=self.num_redundant_experts, ) else: return [] @@ -803,25 +834,43 @@ def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]): # Skip loading extra bias for GPTQ models. if name.endswith(".bias") and name not in params_dict: continue + is_expert_weight = False for mapping in expert_params_mapping: param_name, weight_name, expert_id, shard_id = mapping if weight_name not in name: continue - name = name.replace(weight_name, param_name) - # Skip layers on other devices. - if is_pp_missing_parameter(name, self): + # this is an expert weight and should not be + # attempted to load as other weights later + is_expert_weight = True + + # Do not modify `name` since the loop may continue here + # Instead, create a new variable + name_mapped = name.replace(weight_name, param_name) + if is_pp_missing_parameter(name_mapped, self): continue - param = params_dict[name] - weight_loader = param.weight_loader - weight_loader( + param = params_dict[name_mapped] + # We should ask the weight loader to return success or not + # here since otherwise we may skip experts with other + # available replicas. + weight_loader = typing.cast(Callable[..., bool], + param.weight_loader) + success = weight_loader( param, loaded_weight, - name, + name_mapped, shard_id=shard_id, expert_id=expert_id, + return_success=True, ) - break + if success: + name = name_mapped + break else: + if is_expert_weight: + # We've checked that this is an expert weight + # However it's not mapped locally to this rank + # So we simply skip it + continue # Remapping the name of FP8 kv-scale. name = maybe_remap_kv_scale_name(name, params_dict) if name is None: @@ -841,7 +890,7 @@ def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]): return loaded_params -class HunYuanV1Base(nn.Module, SupportsLoRA, SupportsPP): +class HunYuanV1Base(nn.Module, SupportsLoRA, SupportsPP, MixtureOfExperts): packed_modules_mapping = { "qkv_proj": [ "q_proj", @@ -883,6 +932,64 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): else: self.lm_head = PPMissingLayer() + # Set MoE hyperparameters + self.expert_weights = [] + self.num_expert_groups = 1 + self.moe_layers: list[FusedMoE] = [] + example_layer = None + for layer in self.model.layers: + if isinstance(layer, PPMissingLayer): + continue + + assert isinstance(layer, HunYuanDecoderLayer) + if isinstance(layer.mlp, HunYuanSparseMoeBlock): + example_layer = layer.mlp + self.moe_layers.append(layer.mlp.experts) + + if example_layer is None: + raise RuntimeError("No HunYuanMoE layer found in model.layers.") + + self.num_moe_layers = len(self.moe_layers) + self.num_logical_experts = example_layer.n_logical_experts + self.num_physical_experts = example_layer.n_physical_experts + self.num_local_physical_experts = example_layer.n_local_physical_experts + self.num_routed_experts = example_layer.n_routed_experts + self.num_redundant_experts = example_layer.n_redundant_experts + + def set_eplb_state( + self, + expert_load_view: torch.Tensor, + logical_to_physical_map: torch.Tensor, + logical_replica_count: torch.Tensor, + ) -> None: + for layer_idx, layer in enumerate(self.moe_layers): + self.expert_weights.append(layer.get_expert_weights()) + # Register the expert weights. + layer.set_eplb_state( + moe_layer_idx=layer_idx, + expert_load_view=expert_load_view, + logical_to_physical_map=logical_to_physical_map, + logical_replica_count=logical_replica_count, + ) + + def update_physical_experts_metadata( + self, + num_physical_experts: int, + num_local_physical_experts: int, + ) -> None: + assert self.num_local_physical_experts == num_local_physical_experts + self.num_physical_experts = num_physical_experts + self.num_local_physical_experts = num_local_physical_experts + self.num_redundant_experts = (num_physical_experts - + self.num_logical_experts) + for layer in self.model.layers: + if isinstance(layer.mlp, HunYuanSparseMoeBlock): + moe = layer.mlp + moe.n_local_physical_experts = num_local_physical_experts + moe.n_physical_experts = num_physical_experts + moe.n_redundant_experts = self.num_redundant_experts + moe.experts.update_expert_map() + def forward( self, input_ids: torch.Tensor, From 5c65a72bb17b34bc6eb0d7ca43b10938c88dc7e3 Mon Sep 17 00:00:00 2001 From: Woosuk Kwon Date: Wed, 17 Sep 2025 22:05:25 -0700 Subject: [PATCH 085/518] [V0 Deprecation] Remove more V0 tests (#25117) Signed-off-by: Woosuk Kwon --- .buildkite/test-pipeline.yaml | 6 - .github/CODEOWNERS | 2 - tests/async_engine/__init__.py | 0 tests/async_engine/api_server_async_engine.py | 54 -- tests/async_engine/conftest.py | 12 - tests/async_engine/test_api_server.py | 139 ------ tests/async_engine/test_request_tracker.py | 71 --- tests/basic_correctness/test_preemption.py | 189 ------- tests/detokenizer/conftest.py | 11 - tests/detokenizer/test_stop_checker.py | 83 ---- .../openai/correctness/test_lmeval.py | 10 - tests/samplers/test_logprobs.py | 182 ------- tests/worker/__init__.py | 0 tests/worker/conftest.py | 11 - tests/worker/test_model_input.py | 113 ----- tests/worker/test_model_runner.py | 462 ------------------ tests/worker/test_profile.py | 68 --- tests/worker/test_swap.py | 87 ---- 18 files changed, 1500 deletions(-) delete mode 100644 tests/async_engine/__init__.py delete mode 100644 tests/async_engine/api_server_async_engine.py delete mode 100644 tests/async_engine/conftest.py delete mode 100644 tests/async_engine/test_api_server.py delete mode 100644 tests/async_engine/test_request_tracker.py delete mode 100644 tests/basic_correctness/test_preemption.py delete mode 100644 tests/detokenizer/conftest.py delete mode 100644 tests/detokenizer/test_stop_checker.py delete mode 100644 tests/samplers/test_logprobs.py delete mode 100644 tests/worker/__init__.py delete mode 100644 tests/worker/conftest.py delete mode 100644 tests/worker/test_model_input.py delete mode 100644 tests/worker/test_model_runner.py delete mode 100644 tests/worker/test_profile.py delete mode 100644 tests/worker/test_swap.py diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index 0bce02b90a7c..8dd99bf1a38f 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -46,22 +46,18 @@ steps: mirror_hardwares: [amdexperimental] source_file_dependencies: - vllm/ - - tests/async_engine - tests/test_inputs.py - tests/test_outputs.py - tests/multimodal - tests/utils_ - - tests/worker - tests/standalone_tests/lazy_imports.py - tests/transformers_utils commands: - python3 standalone_tests/lazy_imports.py - - pytest -v -s async_engine # AsyncLLMEngine - pytest -v -s test_inputs.py - pytest -v -s test_outputs.py - pytest -v -s multimodal - pytest -v -s utils_ # Utils - - pytest -v -s worker # Worker - pytest -v -s transformers_utils # transformers_utils - label: Python-only Installation Test # 10min @@ -82,14 +78,12 @@ steps: - vllm/ - tests/basic_correctness/test_basic_correctness - tests/basic_correctness/test_cpu_offload - - tests/basic_correctness/test_preemption - tests/basic_correctness/test_cumem.py commands: - export VLLM_WORKER_MULTIPROC_METHOD=spawn - pytest -v -s basic_correctness/test_cumem.py - pytest -v -s basic_correctness/test_basic_correctness.py - pytest -v -s basic_correctness/test_cpu_offload.py - - VLLM_TEST_ENABLE_ARTIFICIAL_PREEMPT=1 pytest -v -s basic_correctness/test_preemption.py - label: Entrypoints Unit Tests # 5min timeout_in_minutes: 10 diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 771dd2e17258..b8d6db06548d 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -41,7 +41,6 @@ CMakeLists.txt @tlrmchlsmth @LucasWilkinson # Test ownership /.buildkite/lm-eval-harness @mgoin @simon-mo -/tests/async_engine @njhill @robertgshaw2-redhat @simon-mo /tests/distributed/test_multi_node_assignment.py @youkaichao /tests/distributed/test_pipeline_parallel.py @youkaichao /tests/distributed/test_same_node.py @youkaichao @@ -50,7 +49,6 @@ CMakeLists.txt @tlrmchlsmth @LucasWilkinson /tests/kernels @mgoin @tlrmchlsmth @WoosukKwon @yewentao256 /tests/models @DarkLight1337 @ywang96 /tests/multimodal @DarkLight1337 @ywang96 @NickLucche -/tests/prefix_caching @comaniac @KuntaiDu /tests/quantization @mgoin @robertgshaw2-redhat @yewentao256 /tests/test_inputs.py @DarkLight1337 @ywang96 /tests/v1/entrypoints/llm/test_struct_output_generate.py @mgoin @russellb @aarnphm diff --git a/tests/async_engine/__init__.py b/tests/async_engine/__init__.py deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/tests/async_engine/api_server_async_engine.py b/tests/async_engine/api_server_async_engine.py deleted file mode 100644 index ec6b20f5e04b..000000000000 --- a/tests/async_engine/api_server_async_engine.py +++ /dev/null @@ -1,54 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project -"""vllm.entrypoints.api_server with some extra logging for testing.""" -from collections.abc import Iterable -from typing import Any - -import uvicorn -from fastapi.responses import JSONResponse, Response - -import vllm.entrypoints.api_server -import vllm.envs as envs -from vllm.engine.arg_utils import AsyncEngineArgs -from vllm.engine.async_llm_engine import AsyncLLMEngine -from vllm.utils import FlexibleArgumentParser - -app = vllm.entrypoints.api_server.app - - -class AsyncLLMEngineWithStats(AsyncLLMEngine): - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - self._num_aborts = 0 - - async def _engine_abort(self, request_ids: Iterable[str]): - ids = list(request_ids) - self._num_aborts += len(ids) - await super()._engine_abort(ids) - - def testing_stats(self) -> dict[str, Any]: - return {"num_aborted_requests": self._num_aborts} - - -@app.get("/stats") -def stats() -> Response: - """Get the statistics of the engine.""" - return JSONResponse(engine.testing_stats()) - - -if __name__ == "__main__": - parser = FlexibleArgumentParser() - parser.add_argument("--host", type=str, default="localhost") - parser.add_argument("--port", type=int, default=8000) - parser = AsyncEngineArgs.add_cli_args(parser) - args = parser.parse_args() - - engine_args = AsyncEngineArgs.from_cli_args(args) - engine = AsyncLLMEngineWithStats.from_engine_args(engine_args) - vllm.entrypoints.api_server.engine = engine - uvicorn.run(app, - host=args.host, - port=args.port, - log_level="debug", - timeout_keep_alive=envs.VLLM_HTTP_TIMEOUT_KEEP_ALIVE) diff --git a/tests/async_engine/conftest.py b/tests/async_engine/conftest.py deleted file mode 100644 index 375b248ebeda..000000000000 --- a/tests/async_engine/conftest.py +++ /dev/null @@ -1,12 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project -import pytest - - -@pytest.fixture(scope="function", autouse=True) -def use_v0_only(monkeypatch): - """ - Since this module is V0 only, set VLLM_USE_V1=0 for - all tests in the module. - """ - monkeypatch.setenv('VLLM_USE_V1', '0') diff --git a/tests/async_engine/test_api_server.py b/tests/async_engine/test_api_server.py deleted file mode 100644 index 07370a880329..000000000000 --- a/tests/async_engine/test_api_server.py +++ /dev/null @@ -1,139 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project - -import copyreg -import os -import subprocess -import sys -import time -from multiprocessing import Pool -from pathlib import Path - -import pytest -import requests -import urllib3.exceptions - - -def _pickle_new_connection_error(obj): - """Custom pickler for NewConnectionError to fix tblib compatibility.""" - # Extract the original message by removing the "conn: " prefix - full_message = obj.args[0] if obj.args else "" - if ': ' in full_message: - # Split off the connection part and keep the actual message - _, actual_message = full_message.split(': ', 1) - else: - actual_message = full_message - return _unpickle_new_connection_error, (actual_message, ) - - -def _unpickle_new_connection_error(message): - """Custom unpickler for NewConnectionError.""" - # Create with None as conn and the actual message - return urllib3.exceptions.NewConnectionError(None, message) - - -# Register the custom pickle/unpickle functions for tblib compatibility -copyreg.pickle(urllib3.exceptions.NewConnectionError, - _pickle_new_connection_error) - - -def _query_server(prompt: str, max_tokens: int = 5) -> dict: - response = requests.post("http://localhost:8000/generate", - json={ - "prompt": prompt, - "max_tokens": max_tokens, - "temperature": 0, - "ignore_eos": True - }) - response.raise_for_status() - return response.json() - - -def _query_server_long(prompt: str) -> dict: - return _query_server(prompt, max_tokens=500) - - -@pytest.fixture -def api_server(distributed_executor_backend: str): - script_path = Path(__file__).parent.joinpath( - "api_server_async_engine.py").absolute() - commands = [ - sys.executable, - "-u", - str(script_path), - "--model", - "facebook/opt-125m", - "--host", - "127.0.0.1", - "--distributed-executor-backend", - distributed_executor_backend, - ] - - # API Server Test Requires V0. - my_env = os.environ.copy() - my_env["VLLM_USE_V1"] = "0" - uvicorn_process = subprocess.Popen(commands, env=my_env) - yield - uvicorn_process.terminate() - - -@pytest.mark.timeout(300) -@pytest.mark.parametrize("distributed_executor_backend", ["mp", "ray"]) -def test_api_server(api_server, distributed_executor_backend: str): - """ - Run the API server and test it. - - We run both the server and requests in separate processes. - - We test that the server can handle incoming requests, including - multiple requests at the same time, and that it can handle requests - being cancelled without crashing. - """ - with Pool(32) as pool: - # Wait until the server is ready - prompts = ["warm up"] * 1 - result = None - while not result: - try: - for r in pool.map(_query_server, prompts): - result = r - break - except requests.exceptions.ConnectionError: - time.sleep(1) - - # Actual tests start here - # Try with 1 prompt - for result in pool.map(_query_server, prompts): - assert result - - num_aborted_requests = requests.get( - "http://localhost:8000/stats").json()["num_aborted_requests"] - assert num_aborted_requests == 0 - - # Try with 100 prompts - prompts = ["test prompt"] * 100 - for result in pool.map(_query_server, prompts): - assert result - - with Pool(32) as pool: - # Cancel requests - prompts = ["canceled requests"] * 100 - pool.map_async(_query_server_long, prompts) - time.sleep(0.01) - pool.terminate() - pool.join() - - # check cancellation stats - # give it some time to update the stats - time.sleep(1) - - num_aborted_requests = requests.get( - "http://localhost:8000/stats").json()["num_aborted_requests"] - assert num_aborted_requests > 0 - - # check that server still runs after cancellations - with Pool(32) as pool: - # Try with 100 prompts - prompts = ["test prompt after canceled"] * 100 - for result in pool.map(_query_server, prompts): - assert result diff --git a/tests/async_engine/test_request_tracker.py b/tests/async_engine/test_request_tracker.py deleted file mode 100644 index 1851eeeda790..000000000000 --- a/tests/async_engine/test_request_tracker.py +++ /dev/null @@ -1,71 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project - -import pytest - -from vllm.engine.async_llm_engine import RequestTracker -from vllm.outputs import RequestOutput - - -@pytest.mark.asyncio -async def test_request_tracker(): - tracker = RequestTracker() - stream_1 = tracker.add_request("1") - assert tracker.new_requests_event.is_set() - await tracker.wait_for_new_requests() - new, aborted = tracker.get_new_and_aborted_requests() - assert not tracker.new_requests_event.is_set() - assert len(new) == 1 - assert new[0]["request_id"] == "1" - assert not aborted - assert not stream_1.finished - - stream_2 = tracker.add_request("2") - stream_3 = tracker.add_request("3") - assert tracker.new_requests_event.is_set() - await tracker.wait_for_new_requests() - new, aborted = tracker.get_new_and_aborted_requests() - assert not tracker.new_requests_event.is_set() - assert len(new) == 2 - assert new[0]["request_id"] == "2" - assert new[1]["request_id"] == "3" - assert not aborted - assert not stream_2.finished - assert not stream_3.finished - - # request_ids must be unique - with pytest.raises(KeyError): - tracker.add_request("1") - assert not tracker.new_requests_event.is_set() - - tracker.abort_request("1") - new, aborted = tracker.get_new_and_aborted_requests() - assert len(aborted) == 1 - assert "1" in aborted - assert not new - assert stream_1.finished - - stream_4 = tracker.add_request("4") - tracker.abort_request("4") - assert tracker.new_requests_event.is_set() - await tracker.wait_for_new_requests() - new, aborted = tracker.get_new_and_aborted_requests() - # aborted new requests will cancel each other out - - # there's no need for them to propagate into the - # engine - assert not aborted - assert not new - assert stream_4.finished - - stream_5 = tracker.add_request("5") - assert tracker.new_requests_event.is_set() - tracker.process_request_output( - RequestOutput("2", "output", [], [], [], finished=True)) - await tracker.wait_for_new_requests() - new, aborted = tracker.get_new_and_aborted_requests() - assert not tracker.new_requests_event.is_set() - assert not aborted - assert len(new) == 1 - assert new[0]["request_id"] == "5" - assert stream_2.finished - assert not stream_5.finished diff --git a/tests/basic_correctness/test_preemption.py b/tests/basic_correctness/test_preemption.py deleted file mode 100644 index db2fa2f6bef6..000000000000 --- a/tests/basic_correctness/test_preemption.py +++ /dev/null @@ -1,189 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project -"""Compare the short outputs of HF and vLLM when using greedy sampling. - -VLLM_TEST_ENABLE_ARTIFICIAL_PREEMPT=1 has to be set before running this test. - -Run `VLLM_TEST_ENABLE_ARTIFICIAL_PREEMPT=1 -pytest tests/basic_correctness/test_preemption.py`. -""" -import pytest -from prometheus_client import REGISTRY - -import vllm.envs as envs -from vllm import SamplingParams -from vllm.core.scheduler import (ARTIFICIAL_PREEMPTION_MAX_CNT, - ENABLE_ARTIFICIAL_PREEMPT) - -from ..models.utils import check_outputs_equal - -MODELS = [ - "distilbert/distilgpt2", -] - - -@pytest.fixture(scope="function", autouse=True) -def use_v0_only(monkeypatch): - """ - We should enable this for V1, but VLLM_TEST_ENABLE_ARTIFICIAL_PREEMPT, - so use VLLM_USE_V1=0 for all tests in the file. - """ - monkeypatch.setenv('VLLM_USE_V1', '0') - - -@pytest.fixture(scope="module", autouse=True) -def check_settings(): - assert ENABLE_ARTIFICIAL_PREEMPT is True, ( - "Use an env var VLLM_TEST_ENABLE_ARTIFICIAL_PREEMPT=1." - "`VLLM_TEST_ENABLE_ARTIFICIAL_PREEMPT=1 " - "pytest tests/basic_correctness/test_preemption.py`") - - -@pytest.fixture -def distributed_executor_backend() -> str: - # When SPMD worker is used, use distributed_executor_backend="ray" - # to test delta input optimization works with preemption. - return "ray" if envs.VLLM_USE_RAY_SPMD_WORKER else "mp" - - -@pytest.mark.parametrize("model", MODELS) -@pytest.mark.parametrize("dtype", ["half"]) -@pytest.mark.parametrize("max_tokens", [96]) -@pytest.mark.parametrize("chunked_prefill_token_size", [16]) -def test_chunked_prefill_recompute( - hf_runner, - vllm_runner, - example_prompts, - model: str, - dtype: str, - max_tokens: int, - chunked_prefill_token_size: int, - distributed_executor_backend: str, -) -> None: - """Ensure that chunked prefill works with preemption.""" - max_num_seqs = min(chunked_prefill_token_size, 256) - enable_chunked_prefill = False - max_num_batched_tokens = None - if chunked_prefill_token_size != -1: - enable_chunked_prefill = True - max_num_batched_tokens = chunked_prefill_token_size - - with hf_runner(model, dtype=dtype) as hf_model: - hf_outputs = hf_model.generate_greedy(example_prompts, max_tokens) - - with vllm_runner( - model, - dtype=dtype, - max_num_batched_tokens=max_num_batched_tokens, - enable_chunked_prefill=enable_chunked_prefill, - max_num_seqs=max_num_seqs, - distributed_executor_backend=distributed_executor_backend, - disable_log_stats=False, - ) as vllm_model: - vllm_outputs = vllm_model.generate_greedy(example_prompts, max_tokens) - assert (vllm_model.llm.llm_engine.scheduler[0].artificial_preempt_cnt - < ARTIFICIAL_PREEMPTION_MAX_CNT) - - for i in range(len(example_prompts)): - hf_output_ids, hf_output_str = hf_outputs[i] - vllm_output_ids, vllm_output_str = vllm_outputs[i] - assert hf_output_str == vllm_output_str, ( - f"Test{i}:\nHF: {hf_output_str!r}\nvLLM: {vllm_output_str!r}") - assert hf_output_ids == vllm_output_ids, ( - f"Test{i}:\nHF: {hf_output_ids}\nvLLM: {vllm_output_ids}") - - -@pytest.mark.parametrize("model", MODELS) -@pytest.mark.parametrize("dtype", ["float"]) -@pytest.mark.parametrize("max_tokens", [96]) -def test_preemption( - caplog_vllm, - hf_runner, - vllm_runner, - example_prompts, - model: str, - dtype: str, - max_tokens: int, - distributed_executor_backend: str, -) -> None: - """By default, recompute preemption is enabled""" - - with hf_runner(model, dtype=dtype) as hf_model: - hf_outputs = hf_model.generate_greedy(example_prompts, max_tokens) - - with vllm_runner( - model, - dtype=dtype, - disable_log_stats=False, - distributed_executor_backend=distributed_executor_backend, - ) as vllm_model: - vllm_outputs = vllm_model.generate_greedy(example_prompts, max_tokens) - assert (vllm_model.llm.llm_engine.scheduler[0].artificial_preempt_cnt - < ARTIFICIAL_PREEMPTION_MAX_CNT) - total_preemption = ( - vllm_model.llm.llm_engine.scheduler[0].num_cumulative_preemption) - - check_outputs_equal( - outputs_0_lst=hf_outputs, - outputs_1_lst=vllm_outputs, - name_0="hf", - name_1="vllm", - ) - - assert ("is preempted by PreemptionMode.RECOMPUTE mode because there " - "is not enough KV cache space." in caplog_vllm.text) - # Ensure the count bucket of request-level histogram metrics matches - # the number of requests as a simple sanity check to ensure metrics are - # generated - preemption_metrics = None - for m in REGISTRY.collect(): - if m.name == "vllm:num_preemptions": - preemption_metrics = m - assert preemption_metrics is not None - total_recorded_preemption = 0 - for sample in preemption_metrics.samples: - total_recorded_preemption += sample.value - assert total_preemption == total_recorded_preemption - - -@pytest.mark.parametrize("model", MODELS) -@pytest.mark.parametrize("dtype", ["float"]) -@pytest.mark.parametrize("max_tokens", [96]) -def test_preemption_infeasible( - vllm_runner, - example_prompts, - model: str, - dtype: str, - max_tokens: int, - distributed_executor_backend: str, -) -> None: - """Verify infeasible preemption request will be ignored.""" - BLOCK_SIZE = 16 - prefill_blocks = 2 - decode_blocks = max_tokens // BLOCK_SIZE - with vllm_runner( - model, - dtype=dtype, - block_size=BLOCK_SIZE, - # Not enough gpu blocks to complete a single sequence. - # preemption should happen, and the sequence should be - # ignored instead of hanging forever. - num_gpu_blocks_override=prefill_blocks + decode_blocks // 2, - max_model_len=((prefill_blocks + decode_blocks // 2) * BLOCK_SIZE), - distributed_executor_backend=distributed_executor_backend, - ) as vllm_model: - sampling_params = SamplingParams(max_tokens=max_tokens, - ignore_eos=True) - req_outputs = vllm_model.llm.generate( - example_prompts, - sampling_params=sampling_params, - ) - - assert (vllm_model.llm.llm_engine.scheduler[0].artificial_preempt_cnt - < ARTIFICIAL_PREEMPTION_MAX_CNT) - - # Verify the request is ignored and not hang. - for req_output in req_outputs: - outputs = req_output.outputs - assert len(outputs) == 1 - assert outputs[0].finish_reason == "length" diff --git a/tests/detokenizer/conftest.py b/tests/detokenizer/conftest.py deleted file mode 100644 index f2c125355c83..000000000000 --- a/tests/detokenizer/conftest.py +++ /dev/null @@ -1,11 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project -import pytest - - -@pytest.fixture(autouse=True) -def v1(run_with_both_engines): - # Simple autouse wrapper to run both engines for each test - # This can be promoted up to conftest.py to run for every - # test in a package - pass diff --git a/tests/detokenizer/test_stop_checker.py b/tests/detokenizer/test_stop_checker.py deleted file mode 100644 index 2ca10c072b34..000000000000 --- a/tests/detokenizer/test_stop_checker.py +++ /dev/null @@ -1,83 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project - -import pytest - -from vllm.engine.output_processor.stop_checker import StopChecker -from vllm.inputs import token_inputs -from vllm.sampling_params import SamplingParams -from vllm.sequence import Logprob, Sequence, SequenceStatus - - -def sequence_with_eos(text: str, eos_token: str, - eos_token_id: int) -> Sequence: - """ - Create a Sequence that ends with an EOS token. - """ - seq = Sequence( - seq_id=0, - inputs=token_inputs([]), - block_size=16, - eos_token_id=eos_token_id, - ) - seq.output_text = text + eos_token - - offset = eos_token_id + 1 - for i in range(offset, len(text) + offset): - seq.append_token_id(token_id=i, logprobs={i: Logprob(0.0)}) - seq.append_token_id(token_id=eos_token_id, - logprobs={eos_token_id: Logprob(0.0)}) - - seq.status = SequenceStatus.RUNNING - - return seq - - -@pytest.mark.parametrize(["text_wo_eos", "eos_token", "eos_token_id"], [ - ("This text ends with EOS token", "", 2), -]) -@pytest.mark.parametrize("ignore_eos", [True, False]) -@pytest.mark.parametrize("include_stop_str_in_output", [True, False]) -@pytest.mark.skip_global_cleanup -def test_stop_on_eos_token(text_wo_eos: str, eos_token: str, eos_token_id: int, - ignore_eos: bool, include_stop_str_in_output: bool): - """ - Test the behavior of the StopChecker's maybe_stop_sequence method - when an EOS token is encountered. - - This test covers: - - When the EOS token should stop the sequence and be removed from the output - - When the EOS token should stop the sequence and be included in the output - - When the EOS token should be ignored, and the sequence continues - """ - - stop_checker = StopChecker(max_model_len=1024) - - seq = sequence_with_eos( - text=text_wo_eos, - eos_token=eos_token, - eos_token_id=eos_token_id, - ) - new_char_count = len(eos_token) - - # Note that `stop` and `stop_token_ids` are not specified - sampling_params = SamplingParams( - min_tokens=1, - ignore_eos=ignore_eos, - include_stop_str_in_output=include_stop_str_in_output) - - stop_checker.maybe_stop_sequence( - seq=seq, - new_char_count=new_char_count, - sampling_params=sampling_params, - ) - - if ignore_eos: - assert seq.status == SequenceStatus.RUNNING - assert seq.output_text == text_wo_eos + eos_token - elif include_stop_str_in_output: - assert seq.status == SequenceStatus.FINISHED_STOPPED - assert seq.output_text == text_wo_eos + eos_token - else: - assert seq.status == SequenceStatus.FINISHED_STOPPED - assert seq.output_text == text_wo_eos diff --git a/tests/entrypoints/openai/correctness/test_lmeval.py b/tests/entrypoints/openai/correctness/test_lmeval.py index 684407cd6ee9..624acd5ffde7 100644 --- a/tests/entrypoints/openai/correctness/test_lmeval.py +++ b/tests/entrypoints/openai/correctness/test_lmeval.py @@ -81,13 +81,3 @@ def test_lm_eval_accuracy_v1_engine(monkeypatch: pytest.MonkeyPatch): more_args = ["--max-num-seqs", "64"] run_test(more_args) - - -@pytest.mark.parametrize("more_args", MORE_ARGS_LIST) -def test_lm_eval_accuracy_v0_engine(monkeypatch: pytest.MonkeyPatch, - more_args): - """Run with the V0 Engine.""" - - with monkeypatch.context() as m: - m.setenv("VLLM_USE_V1", "0") - run_test(more_args) diff --git a/tests/samplers/test_logprobs.py b/tests/samplers/test_logprobs.py deleted file mode 100644 index 87f40b100531..000000000000 --- a/tests/samplers/test_logprobs.py +++ /dev/null @@ -1,182 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project - -import pytest -import torch - -from vllm import SamplingParams - -from ..conftest import VllmRunner - -MODELS = ["distilbert/distilgpt2"] - - -@pytest.fixture(scope="function", autouse=True) -def use_v0_only(monkeypatch): - """ - This module is V0 only since it uses dtype=float, so - set VLLM_USE_V1=0 for all tests in the module. - """ - monkeypatch.setenv('VLLM_USE_V1', '0') - - -@pytest.mark.parametrize("model", MODELS) -@pytest.mark.parametrize("dtype", - ["float"]) # needed for comparing logprobs with HF -@pytest.mark.parametrize("chunked_prefill_token_size", [1, 4, 16, -1]) -@pytest.mark.parametrize("num_top_logprobs", [0, 6]) # 32000 == vocab_size -@pytest.mark.parametrize("detokenize", [True, False]) -def test_get_prompt_logprobs( - hf_runner, - vllm_runner, - model, - dtype, - chunked_prefill_token_size: int, - num_top_logprobs: int, - detokenize: bool, - example_prompts, -): - max_num_seqs = 256 - enable_chunked_prefill = False - max_num_batched_tokens = None - if chunked_prefill_token_size != -1: - enable_chunked_prefill = True - max_num_seqs = min(chunked_prefill_token_size, max_num_seqs) - max_num_batched_tokens = chunked_prefill_token_size - - max_tokens = 5 - with hf_runner(model, dtype=dtype) as hf_model: - hf_logprobs = hf_model.generate_greedy_logprobs( - example_prompts, - max_tokens=max_tokens, - ) - - with vllm_runner( - model, - dtype=dtype, - max_logprobs=num_top_logprobs, - enable_chunked_prefill=enable_chunked_prefill, - max_num_batched_tokens=max_num_batched_tokens, - max_num_seqs=max_num_seqs, - ) as vllm_model: - vllm_sampling_params = SamplingParams(max_tokens=max_tokens, - logprobs=num_top_logprobs, - prompt_logprobs=num_top_logprobs, - temperature=0.0, - detokenize=detokenize) - vllm_results = vllm_model.llm.generate( - example_prompts, sampling_params=vllm_sampling_params) - - # Test whether logprobs are included in the results. - for result in vllm_results: - assert result.prompt_logprobs is not None - assert result.outputs[0].logprobs is not None - assert len(result.outputs[0].logprobs) == max_tokens - for logprobs in result.outputs[0].logprobs: - # If the output token is not included in the top X - # logprob, it can return 1 more data - assert (len(logprobs) == num_top_logprobs - or len(logprobs) == num_top_logprobs + 1) - output_text = result.outputs[0].text - output_string_from_most_likely_tokens_lst: list[str] = [] - for top_logprobs in result.outputs[0].logprobs: - top_logprob = next(iter(top_logprobs.values())) - output_string_from_most_likely_tokens_lst.append( - top_logprob.decoded_token) - - if detokenize: - output_string_from_most_likely_tokens = "".join( - output_string_from_most_likely_tokens_lst) - assert output_text == output_string_from_most_likely_tokens, ( - "The output text from the top logprob for each token position " - "should be the same as the output text in the result.") - else: - assert output_text == '' - assert output_string_from_most_likely_tokens_lst == ([None] * - max_tokens) - - # The first prompt logprob is always None - assert result.prompt_logprobs[0] is None - for prompt_logprobs in result.prompt_logprobs[1:]: - # If the prompt token is not included in the top X - # logprob, it can return 1 more data - assert (len(prompt_logprobs) == num_top_logprobs - or len(prompt_logprobs) == num_top_logprobs + 1) - - # Test whether prompt logprobs are consistent with HF - for vllm_result, hf_logprob in zip(vllm_results, hf_logprobs): - # Check prompt logprobs - # The first prompt logprob is always None, so we compare it from 1:. - vllm_prompt_logprobs = vllm_result.prompt_logprobs[1:] - for i, vllm_prompt_logprob_dict in enumerate(vllm_prompt_logprobs): - for token_id, logprob in vllm_prompt_logprob_dict.items(): - torch.testing.assert_close(logprob.logprob, - hf_logprob[0][i][token_id].item(), - atol=1e-2, - rtol=1e-2) - vllm_sample_logprobs = vllm_result.outputs[0].logprobs - for i, top_logprobs in enumerate(vllm_sample_logprobs): - for token_id, sample_logprob in top_logprobs.items(): - logprob = sample_logprob.logprob - torch.testing.assert_close(logprob, - hf_logprob[i][-1][token_id].item(), - atol=1e-2, - rtol=1e-2) - if detokenize: - assert isinstance(sample_logprob.decoded_token, str), ( - "The token should be decoded by the time it is returned" - " to the user.") - - # Test if prompt logprobs are correctly set. - for vllm_result in vllm_results: - token_ids = vllm_result.prompt_token_ids - prompt_logprobs = vllm_result.prompt_logprobs - - # The first token doesn't have logprob. - assert prompt_logprobs[0] is None - - for token_id, logprob_dict in zip(token_ids[1:], prompt_logprobs[1:]): - assert token_id in logprob_dict - - -def test_max_logprobs(): - runner = VllmRunner("facebook/opt-125m", max_logprobs=1) - vllm_sampling_params = SamplingParams(logprobs=1) - # should pass - runner.generate(["Hello world"], sampling_params=vllm_sampling_params) - - bad_sampling_params = SamplingParams(logprobs=2) - with pytest.raises(ValueError): - runner.generate(["Hello world"], sampling_params=bad_sampling_params) - - -@pytest.mark.parametrize("model", MODELS) -@pytest.mark.parametrize("chunked_prefill_token_size", [1, 4, 16, -1]) -@pytest.mark.parametrize("detokenize", [True, False]) -def test_none_logprobs(vllm_runner, model, chunked_prefill_token_size: int, - detokenize: bool, example_prompts): - max_num_seqs = 256 - enable_chunked_prefill = False - max_num_batched_tokens = None - if chunked_prefill_token_size != -1: - enable_chunked_prefill = True - max_num_seqs = min(chunked_prefill_token_size, max_num_seqs) - max_num_batched_tokens = chunked_prefill_token_size - max_tokens = 5 - - with vllm_runner( - model, - enable_chunked_prefill=enable_chunked_prefill, - max_num_batched_tokens=max_num_batched_tokens, - max_num_seqs=max_num_seqs, - ) as vllm_model: - sampling_params_logprobs_none = SamplingParams(max_tokens=max_tokens, - logprobs=None, - temperature=0.0, - detokenize=detokenize) - results_logprobs_none = vllm_model.llm.generate( - example_prompts, sampling_params=sampling_params_logprobs_none) - - for i in range(len(results_logprobs_none)): - assert results_logprobs_none[i].outputs[0].logprobs is None - assert results_logprobs_none[i].outputs[0].cumulative_logprob is None diff --git a/tests/worker/__init__.py b/tests/worker/__init__.py deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/tests/worker/conftest.py b/tests/worker/conftest.py deleted file mode 100644 index 3f202d4dbe94..000000000000 --- a/tests/worker/conftest.py +++ /dev/null @@ -1,11 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project -import pytest - - -@pytest.fixture(scope="function", autouse=True) -def use_v0_only(monkeypatch): - """ - This module tests V0 internals, so set VLLM_USE_V1=0. - """ - monkeypatch.setenv('VLLM_USE_V1', '0') \ No newline at end of file diff --git a/tests/worker/test_model_input.py b/tests/worker/test_model_input.py deleted file mode 100644 index 0f28ef2ba857..000000000000 --- a/tests/worker/test_model_input.py +++ /dev/null @@ -1,113 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project - -import dataclasses - -import torch - -from vllm.attention import AttentionMetadata, AttentionMetadataBuilder -from vllm.attention.backends.abstract import AttentionBackend -from vllm.attention.backends.utils import CommonAttentionState -from vllm.model_executor import SamplingMetadata -from vllm.worker.model_runner import ModelInputForGPUWithSamplingMetadata - - -class MockAttentionBackend(AttentionBackend): - - @staticmethod - def get_name() -> str: - raise NotImplementedError - - @staticmethod - def get_impl_cls(): - raise NotImplementedError - - @staticmethod - def get_metadata_cls() -> type["AttentionMetadata"]: - return AttentionMetadata - - @staticmethod - def get_builder_cls() -> type["AttentionMetadataBuilder"]: - return AttentionMetadataBuilder - - @staticmethod - def get_state_cls() -> type["CommonAttentionState"]: - return CommonAttentionState - - @staticmethod - def get_kv_cache_shape( - num_blocks: int, - block_size: int, - num_kv_heads: int, - head_size: int, - ) -> tuple[int, ...]: - raise NotImplementedError - - @staticmethod - def swap_blocks( - src_kv_cache: torch.Tensor, - dst_kv_cache: torch.Tensor, - src_to_dst: torch.Tensor, - ) -> None: - pass - - @staticmethod - def copy_blocks( - kv_caches: list[torch.Tensor], - src_to_dists: torch.Tensor, - ) -> None: - pass - - -def test_model_runner_input(): - sampling_metadata = SamplingMetadata( - ["seq_group"], - "selected_token_indices", - "categorized_sample_indices", - "num_prompts", - ) - attn_metadata = AttentionMetadata( - num_prefills=1, - num_prefill_tokens=2, - num_decode_tokens=3, - slot_mapping=torch.zeros(1), - multi_modal_placeholder_index_maps=None, - enable_kv_scales_calculation=True, - ) - model_input = ModelInputForGPUWithSamplingMetadata( - input_tokens=torch.ones(10), - input_positions=torch.ones(10), - sampling_metadata=sampling_metadata, - attn_metadata=attn_metadata) - - assert isinstance(model_input, ModelInputForGPUWithSamplingMetadata) - - # Test round trip serialization. - tensor_dict = model_input.as_broadcastable_tensor_dict() - attn_backend = MockAttentionBackend() - received_model_input = ( - ModelInputForGPUWithSamplingMetadata.from_broadcasted_tensor_dict( - tensor_dict, attn_backend=attn_backend)) - # Check that received copy has correct values. - assert isinstance(received_model_input, - ModelInputForGPUWithSamplingMetadata) - assert received_model_input.input_tokens is not None - assert ( - received_model_input.input_tokens == model_input.input_tokens).all() - assert received_model_input.input_positions is not None - assert (received_model_input.input_positions == model_input.input_positions - ).all() - assert received_model_input.multi_modal_kwargs is None - assert (received_model_input.multi_modal_kwargs == - model_input.multi_modal_kwargs) - assert received_model_input.lora_requests is None - assert received_model_input.lora_requests == model_input.lora_requests - assert received_model_input.lora_mapping is None - assert received_model_input.lora_mapping == model_input.lora_mapping - for field in dataclasses.fields(AttentionMetadata): - assert getattr(received_model_input.attn_metadata, field.name, - None) == getattr(attn_metadata, field.name, None) - # For sampling metadata, only selected_token_indices is copied. - assert (received_model_input.sampling_metadata.selected_token_indices == - sampling_metadata.selected_token_indices) - assert received_model_input.sampling_metadata.seq_groups is None diff --git a/tests/worker/test_model_runner.py b/tests/worker/test_model_runner.py deleted file mode 100644 index 0be25aa2fc35..000000000000 --- a/tests/worker/test_model_runner.py +++ /dev/null @@ -1,462 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project - -import pytest -import torch - -from vllm.distributed.parallel_state import (ensure_model_parallel_initialized, - init_distributed_environment) -from vllm.engine.arg_utils import EngineArgs -from vllm.model_executor.sampling_metadata import SamplingMetadata -from vllm.sequence import SamplingParams, SequenceData, SequenceGroupMetadata -from vllm.utils import get_open_port -from vllm.worker.model_runner import ModelRunner - - -def _create_model_runner(model: str, *args, **kwargs) -> ModelRunner: - engine_args = EngineArgs(model, *args, **kwargs) - engine_config = engine_args.create_engine_config() - model_runner = ModelRunner( - vllm_config=engine_config, - is_driver_worker=True, - ) - return model_runner - - -def test_deepseek_mla_attn_backend_module(): - model_runner = _create_model_runner( - "deepseek-ai/DeepSeek-Coder-V2-Lite-Instruct", - trust_remote_code=True, - enable_chunked_prefill=False, - ) - assert model_runner.attn_backend.__name__ == "TritonMLABackend" - - -@pytest.mark.parametrize("batch_size", list(range(1, 257, 3))) -@pytest.mark.parametrize("use_prompt_embeds", [True, False]) -def test_prepare_prompt(batch_size, use_prompt_embeds, monkeypatch): - if use_prompt_embeds: - # Prompt Embeddings is only currently supported on V0 - monkeypatch.setenv("VLLM_USE_V1", "0") - - model_runner = _create_model_runner( - "facebook/opt-125m", - max_num_batched_tokens=100000, - max_num_seqs=100000, - enable_chunked_prefill=False, - enable_prompt_embeds=True, - ) - - seq_lens: list[int] = [] - seq_group_metadata_list: list[SequenceGroupMetadata] = [] - block_tables = {0: [1]} - expected_input_embeds_len = 0 - for i in range(batch_size): - # make sure all tokens fit into one block - seq_len = i % (model_runner.block_size - 1) + 1 - seq_lens.append(seq_len) - if use_prompt_embeds: - seq_data = SequenceData.from_seqs( - prompt_token_ids=[0] * seq_len, - prompt_embeds=torch.rand(seq_len, 10), - ) - expected_input_embeds_len += seq_len - else: - seq_data = SequenceData.from_seqs(prompt_token_ids=range(seq_len)) - - seq_group_metadata = SequenceGroupMetadata( - request_id=f"test_{i}", - is_prompt=True, - seq_data={0: seq_data}, - sampling_params=SamplingParams(temperature=0), - block_tables=block_tables, - ) - assert seq_group_metadata.token_chunk_size == seq_data.get_len() - seq_group_metadata_list.append(seq_group_metadata) - - expected_selected_token_indices = [] - selected_token_start_idx = 0 - for seq_len in seq_lens: - expected_selected_token_indices.append(selected_token_start_idx + - seq_len - 1) - selected_token_start_idx += seq_len - model_input = model_runner._prepare_model_input_tensors( - seq_group_metadata_list) - input_tokens = model_input.input_tokens - input_positions = model_input.input_positions - input_embeds = model_input.inputs_embeds - attn_metadata = model_input.attn_metadata - return_seq_lens = model_input.seq_lens - slot_mapping = attn_metadata.slot_mapping - assert return_seq_lens == seq_lens - assert len(slot_mapping) == len(input_tokens) - - # Verify input metadata is correct for prompts. - device = model_runner.device - assert attn_metadata.num_prefills > 0 - assert attn_metadata.num_decode_tokens == 0 - torch.testing.assert_close( - attn_metadata.seq_lens_tensor, - torch.tensor(seq_lens, device=device, dtype=torch.int)) - assert attn_metadata.seq_lens == seq_lens - assert attn_metadata.max_prefill_seq_len == max(seq_lens) - assert attn_metadata.max_decode_seq_len == 0 - - # Test subquery start locs. - start_idx = 0 - start_loc = [start_idx] - for seq_len in seq_lens: - start_idx += seq_len - start_loc.append(start_idx) - torch.testing.assert_close( - attn_metadata.query_start_loc, - torch.tensor(start_loc, dtype=torch.int32, device=device)) - - # Test seq start locs. Note that for normal prefill it is - # equivalent to query_start_loc. - start_idx = 0 - seq_start_loc = [start_idx] - for seq_len in seq_lens: - start_idx += seq_len - seq_start_loc.append(start_idx) - - torch.testing.assert_close( - attn_metadata.seq_start_loc, - torch.tensor(start_loc, dtype=torch.int32, device=device)) - torch.testing.assert_close( - attn_metadata.context_lens_tensor, - torch.zeros(attn_metadata.context_lens_tensor.shape[0], - dtype=torch.int, - device=device)) - - expected = torch.tensor([[] for _ in range(len(seq_group_metadata_list))], - dtype=torch.int32, - device=model_runner.device) - torch.testing.assert_close(attn_metadata.block_tables, expected) - # Cuda graph should not be used for prerill. - assert attn_metadata.use_cuda_graph is False - - assert len(input_tokens) == sum(seq_lens) - assert len(input_positions) == sum(seq_lens) - if expected_input_embeds_len == 0: - torch.testing.assert_close(input_tokens, input_positions) - assert input_embeds is None - else: - assert len(input_embeds) == expected_input_embeds_len - - sampling_metadata = SamplingMetadata.prepare( - seq_group_metadata_list, - seq_lens, - query_lens=seq_lens, - device=model_runner.device, - pin_memory=model_runner.pin_memory) - assert len(input_tokens) == sum(seq_lens) - assert len(input_positions) == sum(seq_lens) - actual = sampling_metadata.selected_token_indices - expected = torch.tensor(expected_selected_token_indices, - device=actual.device, - dtype=actual.dtype) - torch.testing.assert_close(actual, expected) - torch.allclose(input_tokens, input_positions) - - actual = sampling_metadata.selected_token_indices - expected = torch.tensor(expected_selected_token_indices, - device=actual.device, - dtype=actual.dtype) - torch.testing.assert_close(actual, expected) - - -@pytest.mark.parametrize("batch_size", list(range(1, 257, 3))) -@pytest.mark.parametrize("use_prompt_embeds", [True, False]) -def test_prepare_decode_cuda_graph(batch_size, use_prompt_embeds, monkeypatch): - if use_prompt_embeds: - # Prompt Embeddings is only currently supported on V0 - monkeypatch.setenv("VLLM_USE_V1", "0") - - model_runner = _create_model_runner( - "facebook/opt-125m", - seed=0, - dtype="float16", - enforce_eager=False, - max_num_batched_tokens=100000, - max_num_seqs=100000, - enable_chunked_prefill=False, - enable_prompt_embeds=True, - ) - - context_lens: list[int] = [] - seq_group_metadata_list: list[SequenceGroupMetadata] = [] - # Assume each seq group finishes prefill. - for i in range(batch_size): - # make sure all tokens fit into one block - context_len = i % (model_runner.block_size - 1) + 1 - context_lens.append(context_len) - if use_prompt_embeds: - seq_data = SequenceData.from_seqs( - prompt_token_ids=[0] * context_len, - prompt_embeds=torch.rand(context_len, 10), - ) - output_embed = torch.rand(10) - else: - seq_data = SequenceData.from_seqs( - prompt_token_ids=range(context_len)) - output_embed = None - seq_data.update_num_computed_tokens(context_len) - # Append one token ID since prefill is finished. - seq_data.append_token_id(1, 0, output_embed) - seq_group_metadata = SequenceGroupMetadata( - request_id=f"test_{i}", - is_prompt=False, - seq_data={0: seq_data}, - sampling_params=SamplingParams(temperature=0), - block_tables={0: [1]}, - ) - assert seq_group_metadata.token_chunk_size == 1 - seq_group_metadata_list.append(seq_group_metadata) - - model_input = model_runner._prepare_model_input_tensors( - seq_group_metadata_list) - input_tokens = model_input.input_tokens - input_positions = model_input.input_positions - input_embeds = model_input.inputs_embeds - attn_metadata = model_input.attn_metadata - slot_mapping = attn_metadata.slot_mapping - - assert len(slot_mapping) == len(input_tokens) - - expected_bs = model_runner.vllm_config.pad_for_cudagraph( - len(seq_group_metadata_list)) - # Verify input metadata is correct for prompts. - device = model_runner.device - assert attn_metadata.num_prefills == 0 - assert attn_metadata.num_prefill_tokens == 0 - seq_lens = [context_len + 1 for context_len in context_lens] - # seq_lens are padded to expected_bs - for _ in range(expected_bs - len(seq_lens)): - seq_lens.append(1) - assert attn_metadata.seq_lens == seq_lens - assert attn_metadata.num_decode_tokens == len(seq_lens) - start_idx = 0 - start_loc = [start_idx] - for _ in context_lens: - # decode has only 1 token for query. - start_idx += 1 - start_loc.append(start_idx) - torch.testing.assert_close( - attn_metadata.query_start_loc, - torch.tensor(start_loc, dtype=torch.int32, device=device)) - - start_idx = 0 - seq_start_loc = [start_idx] - for seq_len in seq_lens: - start_idx += seq_len - seq_start_loc.append(start_idx) - torch.testing.assert_close( - attn_metadata.seq_start_loc, - torch.tensor(seq_start_loc, dtype=torch.int32, device=device)) - - torch.testing.assert_close( - attn_metadata.context_lens_tensor, - torch.tensor(context_lens, dtype=torch.int, device=device)) - assert attn_metadata.max_decode_seq_len == max(seq_lens) - torch.testing.assert_close( - attn_metadata.seq_lens_tensor[:len(seq_lens)], - torch.tensor(seq_lens, dtype=torch.int, device=device)) - - # block table's first index corresponds to each batch, meaning in - # decoding it is each token. - assert attn_metadata.block_tables.shape[0] == len(input_tokens) - # Block table's second dim corresponds to each token's block number. - # It is padded up to - assert attn_metadata.block_tables.shape[1] == ( - model_runner.get_max_block_per_batch()) - assert attn_metadata.use_cuda_graph is True - - assert len(input_tokens) == expected_bs - assert len(input_positions) == expected_bs - if use_prompt_embeds: - expected_input_embeds_length = start_loc[-1] - assert len(input_embeds) == expected_input_embeds_length - assert expected_input_embeds_length <= expected_bs - else: - assert input_embeds is None - - # Verify Sampling - expected_selected_token_indices = [] - for selected_token_start_idx, _ in enumerate(context_lens): - expected_selected_token_indices.append(selected_token_start_idx) - sampling_metadata = SamplingMetadata.prepare( - seq_group_metadata_list, - seq_lens, - # query lens is all 1 for decode. - query_lens=[1 for _ in range(len(context_lens))], - device=model_runner.device, - pin_memory=model_runner.pin_memory) - actual = sampling_metadata.selected_token_indices - expected = torch.tensor(expected_selected_token_indices, - device=actual.device, - dtype=actual.dtype) - torch.testing.assert_close(actual, expected) - - -def test_empty_seq_group(): - """Verify prepare prompt and decode returns empty output.""" - model_runner = _create_model_runner( - "facebook/opt-125m", - seed=0, - dtype="float16", - enforce_eager=False, - ) - seq_group_metadata_list: list[SequenceGroupMetadata] = [] - model_input = model_runner._prepare_model_input_tensors( - seq_group_metadata_list) - - input_tokens = model_input.input_tokens - input_positions = model_input.input_positions - attn_metadata = model_input.attn_metadata - - assert input_tokens is None - assert input_positions is None - assert attn_metadata is None - - model_input = model_runner._prepare_model_input_tensors( - seq_group_metadata_list) - - input_tokens = model_input.input_tokens - input_positions = model_input.input_positions - input_embeds = model_input.inputs_embeds - attn_metadata = model_input.attn_metadata - return_seq_lens = model_input.seq_lens - - assert input_tokens is None - assert input_positions is None - assert input_embeds is None - assert attn_metadata is None - assert return_seq_lens is None - - -@pytest.fixture -def distributed_init(): - init_distributed_environment( - world_size=1, - rank=0, - distributed_init_method=f"tcp://127.0.0.1:{get_open_port()}", - local_rank=0) - ensure_model_parallel_initialized(1, 1) - - -@pytest.mark.parametrize("batch_size", list(range(2, 128, 3))) -@pytest.mark.parametrize("enforce_eager", [True, False]) -@pytest.mark.parametrize('use_prompt_embeds', [True, False]) -def test_hybrid_batches(batch_size, enforce_eager, use_prompt_embeds, - distributed_init, monkeypatch): - if use_prompt_embeds: - # Prompt Embeddings is only currently supported on V0 - monkeypatch.setenv("VLLM_USE_V1", "0") - - model_runner = _create_model_runner( - "facebook/opt-125m", - seed=0, - dtype="float16", - enforce_eager=enforce_eager, - max_num_batched_tokens=100000, - max_num_seqs=100000, - enable_chunked_prefill=True, - enable_prompt_embeds=True, - ) - - # Add prefill requests. - seq_lens: list[int] = [] - seq_group_metadata_list: list[SequenceGroupMetadata] = [] - prefill_metadata_list: list[SequenceGroupMetadata] = [] - decode_metadata_list: list[SequenceGroupMetadata] = [] - block_tables = {0: [1]} - prefill_batch_size = batch_size // 2 - decode_batch_size = batch_size - prefill_batch_size - expected_input_embeds_len = 0 - for i in range(prefill_batch_size): - # make sure all tokens fit into one block - seq_len = i % (model_runner.block_size - 1) + 1 - seq_lens.append(seq_len) - if use_prompt_embeds: - seq_data = SequenceData.from_seqs( - prompt_token_ids=[0] * seq_len, - prompt_embeds=torch.rand(seq_len, 10), - ) - expected_input_embeds_len += seq_len - else: - seq_data = SequenceData.from_seqs( - prompt_token_ids=range(seq_len), ) - seq_group_metadata = SequenceGroupMetadata( - request_id=f"test_{i}", - is_prompt=True, - seq_data={0: seq_data}, - sampling_params=SamplingParams(temperature=0), - block_tables=block_tables, - ) - assert seq_group_metadata.token_chunk_size == seq_data.get_len() - seq_group_metadata_list.append(seq_group_metadata) - prefill_metadata_list.append(seq_group_metadata) - - # Add decode requests - for i in range(prefill_batch_size, batch_size): - # make sure all tokens fit into one block - context_len = i % (model_runner.block_size - 1) + 1 - if use_prompt_embeds: - seq_data = SequenceData.from_seqs( - prompt_token_ids=[0] * context_len, - prompt_embeds=torch.rand(context_len, 10), - ) - output_embed = torch.rand(10) - # This also iterates the expected input_embeds, because the model - # needs both the input and output embeddings passed into together - expected_input_embeds_len += 1 - else: - seq_data = SequenceData.from_seqs( - prompt_token_ids=range(context_len), ) - output_embed = None - assert len(seq_data.prompt_token_ids) == context_len - seq_data.append_token_id(1, 0, output_embed) - seq_data.update_num_computed_tokens(context_len) - seq_group_metadata = SequenceGroupMetadata( - request_id=f"test_{i}", - is_prompt=False, - seq_data={0: seq_data}, - sampling_params=SamplingParams(temperature=0), - block_tables={0: [1]}, - ) - assert seq_group_metadata.token_chunk_size == 1 - seq_group_metadata_list.append(seq_group_metadata) - decode_metadata_list.append(seq_group_metadata) - - model_input = model_runner.prepare_model_input(seq_group_metadata_list) - - input_tokens = model_input.input_tokens - input_positions = model_input.input_positions - input_embeds = model_input.inputs_embeds - attn_metadata = model_input.attn_metadata - - prefill_meta_actual = attn_metadata.prefill_metadata - decode_meta_actual = attn_metadata.decode_metadata - - assert len(attn_metadata.slot_mapping) == len(input_tokens) - assert len(input_positions) == len(input_tokens) - assert attn_metadata.num_prefills == prefill_batch_size - assert attn_metadata.num_decode_tokens == decode_batch_size - assert attn_metadata.num_prefill_tokens == sum(seq_lens) - if expected_input_embeds_len == 0: - assert input_embeds is None - else: - assert len(input_embeds) == expected_input_embeds_len - - # Verify attn metadata is consistent. We don't need to test individual - # values here because they are tested above. - attn_metadata = model_runner._prepare_model_input_tensors( - seq_group_metadata_list).attn_metadata - - for attr_expected, attr_actual in zip(vars(attn_metadata.prefill_metadata), - vars(prefill_meta_actual)): - assert attr_expected[1] == attr_actual[1] - for attr_expected, attr_actual in zip(vars(attn_metadata.decode_metadata), - vars(decode_meta_actual)): - assert attr_expected[1] == attr_actual[1] diff --git a/tests/worker/test_profile.py b/tests/worker/test_profile.py deleted file mode 100644 index d8767f700b57..000000000000 --- a/tests/worker/test_profile.py +++ /dev/null @@ -1,68 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project - -import torch - -from vllm.engine.arg_utils import EngineArgs -from vllm.utils import get_distributed_init_method, get_ip, get_open_port -from vllm.worker.cache_engine import CacheEngine -from vllm.worker.worker import Worker - - -def test_gpu_memory_profiling(): - # Tests the gpu profiling that happens in order to determine the number of - # KV cache blocks that we can allocate on the GPU. - # This test mocks the maximum available gpu memory so that it can run on - # any gpu setup. - - # Set up engine args to build a worker. - engine_args = EngineArgs(model="facebook/opt-125m", - dtype="half", - load_format="dummy") - engine_config = engine_args.create_engine_config() - engine_config.cache_config.num_gpu_blocks = 1000 - engine_config.cache_config.num_cpu_blocks = 1000 - - # Create the worker. - distributed_init_method = get_distributed_init_method( - get_ip(), get_open_port()) - worker = Worker( - vllm_config=engine_config, - local_rank=0, - rank=0, - distributed_init_method=distributed_init_method, - is_driver_worker=True, - ) - - # Set 10GiB as the total gpu ram to be device-agnostic - def mock_mem_info(): - current_usage = torch.cuda.memory_stats( - )["allocated_bytes.all.current"] - mock_total_bytes = 10 * 1024**3 - free = mock_total_bytes - current_usage - - return (free, mock_total_bytes) - - from unittest.mock import patch - with patch("torch.cuda.mem_get_info", side_effect=mock_mem_info): - # Load the model so we can profile it - worker.init_device() - worker.load_model() - gpu_blocks, _ = worker.determine_num_available_blocks() - - # Peak vram usage by torch should be 0.47 GiB - # Model weights take 0.25 GiB - # No memory should be allocated outside of torch - # 9.0 GiB should be the utilization target - # 8.28 GiB should be available for the KV cache - block_size = CacheEngine.get_cache_block_size( - engine_config.cache_config, engine_config.model_config, - engine_config.parallel_config) - - expected_blocks = (8.28 * 1024**3) // block_size - - # Check within a small tolerance for portability - # Hardware, kernel, or dependency changes could all affect memory - # utilization. - # A 100 block tolerance here should be about 60MB of wiggle room. - assert abs(gpu_blocks - expected_blocks) < 100 diff --git a/tests/worker/test_swap.py b/tests/worker/test_swap.py deleted file mode 100644 index 6d9f404ac207..000000000000 --- a/tests/worker/test_swap.py +++ /dev/null @@ -1,87 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project - -import torch - -from vllm.engine.arg_utils import EngineArgs -from vllm.sequence import ExecuteModelRequest -from vllm.utils import get_distributed_init_method, get_ip, get_open_port -from vllm.worker.worker import Worker - - -def test_swap() -> None: - # Configure the engine. - engine_args = EngineArgs(model="distilbert/distilgpt2", - dtype="half", - load_format="dummy") - engine_config = engine_args.create_engine_config() - engine_config.cache_config.num_gpu_blocks = 1000 - engine_config.cache_config.num_cpu_blocks = 1000 - - # Create the worker. - distributed_init_method = get_distributed_init_method( - get_ip(), get_open_port()) - worker = Worker( - vllm_config=engine_config, - local_rank=0, - rank=0, - distributed_init_method=distributed_init_method, - is_driver_worker=True, - ) - - # Initialize the worker. - worker.init_device() - worker.load_model() - worker.initialize_cache( - num_gpu_blocks=engine_config.cache_config.num_gpu_blocks, - num_cpu_blocks=engine_config.cache_config.num_cpu_blocks) - - # Randomly initialize the cache. - gpu_cache = worker.cache_engine[0].gpu_cache - cpu_cache = worker.cache_engine[0].cpu_cache - num_layers = len(gpu_cache) - for i in range(num_layers): - gpu_key_cache, gpu_value_cache = gpu_cache[i] - gpu_key_cache.random_() - gpu_value_cache.random_() - cpu_key_cache, cpu_value_cache = cpu_cache[i] - cpu_key_cache.random_() - cpu_value_cache.random_() - - allclose = lambda a, b: torch.allclose( - a.cuda(), b.cuda(), rtol=0.0, atol=0.0) - - # Test swap out. - blocks_to_swap_out = [(3, 72), (56, 35), (84, 34)] - execute_model_req = ExecuteModelRequest( - seq_group_metadata_list=[], - blocks_to_swap_in=[], - blocks_to_swap_out=blocks_to_swap_out, - blocks_to_copy=[], - ) - worker.execute_model(execute_model_req=execute_model_req) - - for i in range(num_layers): - gpu_key_cache, gpu_value_cache = gpu_cache[i] - cpu_key_cache, cpu_value_cache = cpu_cache[i] - for src, dst in blocks_to_swap_out: - assert allclose(gpu_key_cache[src], cpu_key_cache[dst]) - assert allclose(gpu_value_cache[src], cpu_value_cache[dst]) - - # Test swap in. - execute_model_req.blocks_to_swap_out = [] - execute_model_req.blocks_to_swap_in = [ - (19, 45), - (67, 23), - (12, 78), - (40, 99), - (1, 71), - ] - worker.execute_model(execute_model_req=execute_model_req) - - for i in range(num_layers): - gpu_key_cache, gpu_value_cache = gpu_cache[i] - cpu_key_cache, cpu_value_cache = cpu_cache[i] - for src, dst in execute_model_req.blocks_to_swap_in: - assert allclose(gpu_key_cache[dst], cpu_key_cache[src]) - assert allclose(gpu_value_cache[dst], cpu_value_cache[src]) From b7433ca1a47732394b1bdea4099d98389515954b Mon Sep 17 00:00:00 2001 From: Benjamin Chislett Date: Thu, 18 Sep 2025 01:07:24 -0400 Subject: [PATCH 086/518] [Spec Decode] Efficient padded speculation (#24539) Signed-off-by: Benjamin Chislett --- tests/v1/spec_decode/test_eagle.py | 179 +++++++++++++++++++- vllm/config/speculative.py | 5 + vllm/v1/spec_decode/eagle.py | 258 +++++++++++++++++++++++++---- vllm/v1/worker/gpu_input_batch.py | 5 +- vllm/v1/worker/gpu_model_runner.py | 164 +++++++++++------- 5 files changed, 507 insertions(+), 104 deletions(-) diff --git a/tests/v1/spec_decode/test_eagle.py b/tests/v1/spec_decode/test_eagle.py index ccab04628a16..e7f6b68fc3f7 100644 --- a/tests/v1/spec_decode/test_eagle.py +++ b/tests/v1/spec_decode/test_eagle.py @@ -19,6 +19,8 @@ from vllm.model_executor.models.llama import LlamaForCausalLM from vllm.platforms import current_platform from vllm.v1.spec_decode.eagle import EagleProposer +from vllm.v1.spec_decode.metadata import SpecDecodeMetadata +from vllm.v1.worker.gpu_input_batch import CachedRequestState, InputBatch model_dir = "meta-llama/Llama-3.1-8B-Instruct" eagle_dir = "yuhuili/EAGLE-LLaMA3.1-Instruct-8B" @@ -64,6 +66,86 @@ def _create_proposer( device=current_platform.device_type) +def test_prepare_next_token_ids(): + """ + Test for prepare_next_token_ids_cpu and prepare_next_token_ids_padded. + Each will produce a device tensor of next_token_ids, taking as input + either the GPU tensor of sampled_token_ids with -1 for rejected tokens, + or the CPU python list[list[int]] with the rejected tokens removed. + """ + device = torch.device(current_platform.device_type) + + num_requests = 4 + num_speculative_tokens = 4 + batch_spec = BatchSpec( + seq_lens=[num_speculative_tokens + 1] * num_requests, + query_lens=[num_speculative_tokens + 1] * num_requests, + ) + + req_ids = [f"req_{i+1}" for i in range(num_requests)] + mock_input_batch = mock.MagicMock(spec=InputBatch) + mock_input_batch.req_ids = req_ids + mock_input_batch.num_reqs = num_requests + mock_input_batch.vocab_size = 100 + + mock_num_scheduled_tokens = {req_id: 0 for req_id in req_ids} + mock_requests = {} + for req_id in req_ids: + mock_request = mock.MagicMock(spec=CachedRequestState) + # Each request will have a backup next token id of 10, 20, 30, 40 + mock_request.get_token_id.return_value = int(req_id.split("_")[1]) * 10 + mock_request.num_computed_tokens = 0 + mock_requests[req_id] = mock_request + + sampled_token_ids = [ + [0, 1, -1, -1, -1], # 1 accepted, 3 rejected, "1" sampled + [0, 1, 2, 3, 4], # all accepted, "4" sampled + [-1, -1, -1, -1, -1], # sampling skipped, use backup token "30" + [-1, -1, -1, -1, -1] # this request will be discarded + ] + sampled_token_ids_tensor = torch.tensor(sampled_token_ids, + dtype=torch.int32, + device=device) + sampled_token_ids_cpu = [[i for i in seq if i != -1] + for seq in sampled_token_ids] + + expected_next_token_ids_cpu = [1, 4, 30, 40] + expected_next_token_ids_tensor = torch.tensor(expected_next_token_ids_cpu, + dtype=torch.int32, + device=device) + + proposer = _create_proposer("eagle", num_speculative_tokens) + + next_token_ids_from_cpu = proposer.prepare_next_token_ids_cpu( + sampled_token_ids_cpu, mock_requests, mock_input_batch, + mock_num_scheduled_tokens) + + assert torch.equal(next_token_ids_from_cpu, expected_next_token_ids_tensor) + + common_attn_metadata = create_common_attn_metadata( + batch_spec, + block_size=16, + device=device, + ) + + discarded_req_indices = torch.tensor([3], dtype=torch.int64, device=device) + num_discarded_reqs = 1 + + expected_valid_sampled_tokens_count = torch.tensor([2, 5, 0, 0], + dtype=torch.int32, + device=device) + + next_token_ids_from_padded, valid_sampled_tokens_count = \ + proposer.prepare_next_token_ids_padded( + common_attn_metadata, sampled_token_ids_tensor, mock_requests, + mock_input_batch, discarded_req_indices, num_discarded_reqs) + + assert torch.equal(next_token_ids_from_padded, + expected_next_token_ids_tensor) + assert torch.equal(valid_sampled_tokens_count, + expected_valid_sampled_tokens_count) + + def test_prepare_inputs(): """ cu_target_query_lens: [0, a, a + b, a + b + c] @@ -90,10 +172,24 @@ def test_prepare_inputs(): device=device, ) - # Rejected tokens per request: [1, 3, 2] - num_rejected_tokens = torch.tensor([1, 3, 2], - dtype=torch.int32, - device=device) + # If there are `k` sampled tokens, then `k-1` tokens are draft tokens + # from the previous iteration, and the last token is the bonus token sampled + # from the base model. + num_draft_tokens = [3, 6, 4] # one less than query_lens + # num rejected tokens is [1, 3, 2] + ACCEPT_TOKEN = 0 + BONUS_TOKEN = 1 + REJECT_TOKEN = -1 + sampled_token_ids = [ + [ACCEPT_TOKEN, ACCEPT_TOKEN, REJECT_TOKEN, BONUS_TOKEN], + [ + ACCEPT_TOKEN, ACCEPT_TOKEN, ACCEPT_TOKEN, REJECT_TOKEN, + REJECT_TOKEN, REJECT_TOKEN, BONUS_TOKEN + ], + [ACCEPT_TOKEN, ACCEPT_TOKEN, REJECT_TOKEN, REJECT_TOKEN, BONUS_TOKEN] + ] + sampled_token_ids = [[i for i in seq if i != REJECT_TOKEN] + for seq in sampled_token_ids] # Expected calculations: # query_len_per_req = [4, 7, 5] @@ -125,7 +221,7 @@ def test_prepare_inputs(): proposer = _create_proposer("eagle", 1) updated_metadata, token_indices = proposer.prepare_inputs( - common_attn_metadata, num_rejected_tokens.cpu()) + common_attn_metadata, sampled_token_ids, num_draft_tokens) assert torch.equal(updated_metadata.query_start_loc, expected_cu_num_tokens) @@ -133,6 +229,77 @@ def test_prepare_inputs(): assert torch.equal(token_indices, expected_token_indices) +def test_prepare_inputs_padded(): + """ + Input scenario is 3 requests with num_speculative_tokens == 2 and: + - Request 1: query_len = 3, rejected = 1 + - Request 2: query_len = 3, rejected = 0 + - Request 3: query_len = 3, rejected = 2 + + Expected outputs: + token_indices: [0, 1, 2, + 3, 4, 5, + 6, 7, 8] + Reason: Deferred computation should not disturb the original indices. + + token_indices_to_sample: [1, 5, 6] + Reason: After accounting for rejections, these are the valid token positions + from the original indices to sample from. + """ + + device = torch.device(current_platform.device_type) + + expected_token_indices = torch.tensor([0, 1, 2, 3, 4, 5, 6, 7, 8], + dtype=torch.int32, + device=device) + expected_token_indices_to_sample = torch.tensor([1, 5, 6], + dtype=torch.int32, + device=device) + + num_speculative_tokens = 2 + batch_spec = BatchSpec( + seq_lens=[3, 3, 3], + query_lens=[3, 3, 3], + ) + + common_attn_metadata = create_common_attn_metadata( + batch_spec, + block_size=16, + device=device, + ) + + # Needed for cu_num_draft_tokens, which is expected to be [3, 6, 9] + expected_query_start_loc = torch.tensor([0, 3, 6, 9], + dtype=torch.int32, + device=device) + spec_decode_metadata = SpecDecodeMetadata.make_dummy( + draft_token_ids=[[0] * num_speculative_tokens] * 3, + device=device, + ) + + # num_rejected_tokens = [1, 0, 2] + # num_draft_tokens = [2, 2, 2] + # valid_sampled_tokens_count = num_draft_tokens + 1 - num_rejected_tokens + valid_sampled_tokens_count = torch.tensor([2, 3, 1], + dtype=torch.int32, + device=device) + + proposer = _create_proposer("eagle", num_speculative_tokens) + + output_metadata, token_indices, token_indices_to_sample = \ + proposer.prepare_inputs_padded( + common_attn_metadata, + spec_decode_metadata, + valid_sampled_tokens_count) + + assert output_metadata.max_query_len == 3 + assert torch.equal(output_metadata.query_start_loc, + expected_query_start_loc) + assert torch.equal(token_indices, expected_token_indices) + assert torch.equal(token_indices_to_sample, + expected_token_indices_to_sample) + + @pytest.mark.parametrize("method", ["eagle", "eagle3"]) @pytest.mark.parametrize("attn_backend", get_attn_backend_list_based_on_platform()) @@ -373,6 +540,7 @@ def create_deterministic_logits(token_ids): target_positions=target_positions, target_hidden_states=target_hidden_states, next_token_ids=next_token_ids, + last_token_indices=None, common_attn_metadata=common_attn_metadata, sampling_metadata=sampling_metadata) @@ -526,6 +694,7 @@ def create_deterministic_logits(token_ids, k: int): target_positions=target_positions, target_hidden_states=target_hidden_states, next_token_ids=next_token_ids, + last_token_indices=None, common_attn_metadata=common_attn_metadata, sampling_metadata=sampling_metadata) assert result.shape == (batch_size, num_speculative_tokens) diff --git a/vllm/config/speculative.py b/vllm/config/speculative.py index b2d50e385233..fca8c28e5c61 100644 --- a/vllm/config/speculative.py +++ b/vllm/config/speculative.py @@ -83,6 +83,11 @@ class SpeculativeConfig: disable_by_batch_size: Optional[int] = None """Disable speculative decoding for new incoming requests when the number of enqueued requests is larger than this value, if provided.""" + disable_padded_drafter_batch: bool = False + """Disable input padding for speculative decoding. If set to True, + speculative input batches can contain sequences of different lengths, + which may only be supported by certain attention backends. This currently + only affects the EAGLE method of speculation.""" # Ngram proposer configuration prompt_lookup_max: Optional[int] = None diff --git a/vllm/v1/spec_decode/eagle.py b/vllm/v1/spec_decode/eagle.py index 5154b29405b6..2a178ddf4877 100644 --- a/vllm/v1/spec_decode/eagle.py +++ b/vllm/v1/spec_decode/eagle.py @@ -27,6 +27,9 @@ from vllm.v1.attention.backends.utils import CommonAttentionMetadata from vllm.v1.kv_cache_interface import KVCacheConfig from vllm.v1.sample.metadata import SamplingMetadata +from vllm.v1.spec_decode.metadata import SpecDecodeMetadata +from vllm.v1.utils import CpuGpuBuffer +from vllm.v1.worker.gpu_input_batch import CachedRequestState, InputBatch from vllm.v1.worker.ubatching import dbo_current_ubatch_id logger = init_logger(__name__) @@ -94,20 +97,26 @@ def __init__( dtype=self.dtype, device=device) + # We need +1 here because the arange is used to set query_start_loc, + # which has one more element than batch_size. max_batch_size = vllm_config.scheduler_config.max_num_seqs - self.arange = torch.arange( - # We need +1 here because the arange is used to set query_start_loc, - # which has one more element than batch_size. - max_batch_size + 1, - device=device, - dtype=torch.int32, - ) + max_num_slots_for_arange = max(max_batch_size + 1, self.max_num_tokens) + self.arange = torch.arange(max_num_slots_for_arange, + device=device, + dtype=torch.int32) self.inputs_embeds = torch.zeros( (self.max_num_tokens, self.hidden_size), dtype=self.dtype, device=device) + self.backup_next_token_ids = CpuGpuBuffer( + max_batch_size, + dtype=torch.int32, + pin_memory=is_pin_memory_available(), + device=device, + with_numpy=True) + # Determine allowed attention backends once during initialization. self.allowed_attn_types: tuple[type[EagleAttentionMetadata], ...] if current_platform.is_rocm(): @@ -156,13 +165,16 @@ def propose( target_hidden_states: torch.Tensor, # [batch_size] next_token_ids: torch.Tensor, + last_token_indices: Optional[torch.Tensor], common_attn_metadata: CommonAttentionMetadata, sampling_metadata: SamplingMetadata, mm_embeds: Optional[list[torch.Tensor]] = None, ) -> torch.Tensor: num_tokens = target_token_ids.shape[0] batch_size = next_token_ids.shape[0] - last_token_indices = common_attn_metadata.query_start_loc[1:] - 1 + + if last_token_indices is None: + last_token_indices = common_attn_metadata.query_start_loc[1:] - 1 if self.method == "eagle3": assert isinstance(self.model, Eagle3LlamaForCausalLM) @@ -228,6 +240,12 @@ def propose( last_hidden_states, hidden_states = ret_hidden_states sample_hidden_states = last_hidden_states[last_token_indices] logits = self.model.compute_logits(sample_hidden_states, None) + + # Early exit if there is only one draft token to be generated. + if self.num_speculative_tokens == 1: + draft_token_ids = logits.argmax(dim=-1) + return draft_token_ids.view(-1, 1) + positions = target_positions[last_token_indices] hidden_states = hidden_states[last_token_indices] @@ -245,15 +263,12 @@ def propose( draft_token_ids = logits.argmax(dim=-1) - # Early exit if there is only one draft token to be generated. - if self.num_speculative_tokens == 1: - # [batch_size, 1] - return draft_token_ids.view(-1, 1) - - # TODO: Currently, MTP module released by deepseek only has - # one layer. Adapt this code to support multiple layers once - # there's a multi-layer MTP module. - assert isinstance(attn_metadata, self.allowed_attn_types) + if not isinstance(attn_metadata, self.allowed_attn_types): + raise ValueError( + f"Unsupported attention metadata type for speculative " + "decoding with num_speculative_tokens > 1: " + f"{type(attn_metadata)}. Supported types are: " + f"{self.allowed_attn_types}") # Generate the remaining draft tokens. draft_token_ids_list = [draft_token_ids] @@ -263,10 +278,13 @@ def propose( input_batch_size = self.vllm_config.pad_for_cudagraph(batch_size) else: input_batch_size = batch_size - attn_metadata.num_actual_tokens = batch_size - attn_metadata.max_query_len = 1 - attn_metadata.query_start_loc = self.arange[:batch_size + 1] - for _ in range(self.num_speculative_tokens - 1): + + common_attn_metadata.num_actual_tokens = batch_size + common_attn_metadata.max_query_len = 1 + common_attn_metadata.query_start_loc = self.arange[:batch_size + 1] + common_attn_metadata.query_start_loc_cpu = torch.from_numpy( + self.token_arange_np[:batch_size + 1]).clone() + for token_index in range(self.num_speculative_tokens - 1): # Update the inputs. # cast to int32 is crucial when eagle model is compiled. # tensor.argmax() returns int64 by default. @@ -286,27 +304,38 @@ def propose( positions) # Increment the sequence lengths. - attn_metadata.max_seq_len += 1 - attn_metadata.seq_lens += 1 - # Consider max model length. - attn_metadata.max_seq_len = min(attn_metadata.max_seq_len, - self.max_model_len) + common_attn_metadata.seq_lens += 1 + common_attn_metadata.seq_lens_cpu += 1 # For the requests that exceed the max model length, we set the # sequence length to 1 to minimize their overheads in attention. - attn_metadata.seq_lens.masked_fill_(exceeds_max_model_len, 1) + common_attn_metadata.seq_lens.masked_fill_(exceeds_max_model_len, + 1) + + common_attn_metadata.num_computed_tokens_cpu = \ + common_attn_metadata.seq_lens_cpu - 1 # Compute the slot mapping. block_numbers = clamped_positions // self.block_size - block_ids = attn_metadata.block_table.gather( + block_ids = common_attn_metadata.block_table_tensor.gather( dim=1, index=block_numbers.view(-1, 1)) block_ids = block_ids.view(-1) - attn_metadata.slot_mapping = (block_ids * self.block_size + - clamped_positions % self.block_size) + common_attn_metadata.slot_mapping = ( + block_ids * self.block_size + + clamped_positions % self.block_size) # Mask out the slot mappings that exceed the max model length. # Otherwise, the KV cache will be inadvertently updated with the # padding tokens. - attn_metadata.slot_mapping.masked_fill_(exceeds_max_model_len, - PADDING_SLOT_ID) + common_attn_metadata.slot_mapping.masked_fill_( + exceeds_max_model_len, PADDING_SLOT_ID) + + # Rebuild attention metadata + attn_metadata_builder = \ + self.runner.attn_groups[0][0].metadata_builders[ubatch_id] + attn_metadata = attn_metadata_builder\ + .build_for_drafting(common_attn_metadata=common_attn_metadata, + draft_index=token_index + 1) + for layer_name in self.attn_layer_names: + per_layer_attn_metadata[layer_name] = attn_metadata # copy inputs to buffer for cudagraph self.input_ids[:batch_size] = input_ids @@ -347,6 +376,158 @@ def propose( draft_token_ids = torch.stack(draft_token_ids_list, dim=1) return draft_token_ids + def prepare_next_token_ids_cpu( + self, sampled_token_ids: list[list[int]], + requests: dict[str, + CachedRequestState], gpu_input_batch: InputBatch, + num_scheduled_tokens: dict[str, int]) -> torch.Tensor: + """ + This function is used to prepare the inputs for speculative decoding. + It calculates the next token ids for each request based on the sampled + token ids from the CPU. If a request has no sampled token ids (e.g., + during the initial decoding steps), it falls back to using the request + state to get the next token id. + """ + req_ids = gpu_input_batch.req_ids + next_token_ids: list[int] = [] + for i, token_ids in enumerate(sampled_token_ids): + if token_ids: + # Common case. + next_token_id = token_ids[-1] + else: + # Partial prefill (rare case). + # Get the next token id from the request state. + req_id = req_ids[i] + req_state = requests[req_id] + seq_len = (req_state.num_computed_tokens + + num_scheduled_tokens[req_id]) + next_token_id = req_state.get_token_id(seq_len) + next_token_ids.append(next_token_id) + next_token_ids = torch.tensor(next_token_ids, + dtype=torch.int32, + device=self.input_ids.device) + return next_token_ids + + def prepare_next_token_ids_padded(self, + common_attn_metadata: CommonAttentionMetadata, + sampled_token_ids: torch.Tensor, + requests: dict[str, CachedRequestState], + gpu_input_batch: InputBatch, + discard_request_indices: torch.Tensor, + num_discarded_requests: int) -> \ + tuple[torch.Tensor, torch.Tensor]: + """ + This function is used to prepare the inputs for speculative decoding. + It calculates the next token ids and the number of valid sampled tokens + for each request, considering the "discarded" requests whose next token + is not sampled and comes from `request.get_token_id()` instead. + It also accounts for the rejected tokens in `sampled_token_ids`. + This function must use device functions to operate on the inputs, and + should not introduce any blocking CPU-GPU synchronization. + """ + # TODO(Ben): Combine this into a custom fused kernel + + # Precompute get_token_id for when there is no valid next token + num_reqs = gpu_input_batch.num_reqs + self.backup_next_token_ids.np[:num_reqs] = np.array([ + requests[gpu_input_batch.req_ids[i]].get_token_id( + common_attn_metadata.seq_lens_cpu[i].item()) + for i in range(num_reqs) + ]) + self.backup_next_token_ids.copy_to_gpu(num_reqs) + + # Mask out the sampled tokens indices that should not be sampled. + discard_sampled_tokens_req_indices = \ + discard_request_indices[:num_discarded_requests] + + valid_sampled_token_ids_gpu = sampled_token_ids.clone() + valid_sampled_token_ids_gpu.index_fill_( + 0, discard_sampled_tokens_req_indices, -1) + + # Generate a mask for all valid tokens within those requests + max_gen_len = sampled_token_ids.shape[-1] + if max_gen_len == 1: + valid_mask = torch.ones_like(valid_sampled_token_ids_gpu, + dtype=torch.bool) + else: + valid_mask = ( + (valid_sampled_token_ids_gpu != -1) & + (valid_sampled_token_ids_gpu < gpu_input_batch.vocab_size)) + + # Count the number of valid tokens in each request + valid_sampled_tokens_count = valid_mask.sum(dim=1) + + # Get the rightmost valid index per row + last_valid_indices = valid_sampled_tokens_count - 1 + last_valid_indices_safe = torch.clamp(last_valid_indices, min=0) + + # Get last valid token from each row + # (assume undefined state where there is no valid token) + selected_tokens = torch.gather( + valid_sampled_token_ids_gpu, 1, + last_valid_indices_safe.unsqueeze(1)).squeeze(1) + + # Use last token if valid, pre-computed backup if not + batch_size = valid_sampled_token_ids_gpu.shape[0] + next_token_ids = torch.where( + last_valid_indices != -1, selected_tokens, + self.backup_next_token_ids.gpu[:batch_size]) + + return next_token_ids, valid_sampled_tokens_count + + def prepare_inputs_padded(self, + common_attn_metadata: CommonAttentionMetadata, + spec_decode_metadata: SpecDecodeMetadata, + valid_sampled_tokens_count: torch.Tensor) -> \ + tuple[CommonAttentionMetadata, torch.Tensor, torch.Tensor]: + """ + This function is used to prepare the inputs for speculative decoding + It updates the common_attn_metadata for speculative decoding, + but does not consider the rejected tokens. Instead, all tokens + are included as inputs to the speculator, with the rejected tokens + used as padding and filtered out later by `token_indices_to_sample`. + No blocking CPU operations should be introduced in this function. + """ + num_draft_tokens_gpu = torch.cat([ + spec_decode_metadata.cu_num_draft_tokens[0:1], + spec_decode_metadata.cu_num_draft_tokens[1:] - + spec_decode_metadata.cu_num_draft_tokens[:-1] + ]) + + num_rejected_tokens_gpu = torch.where( + num_draft_tokens_gpu > 0, + num_draft_tokens_gpu + 1 - valid_sampled_tokens_count, + torch.zeros_like(num_draft_tokens_gpu)) + + query_start_loc_cpu = common_attn_metadata.query_start_loc_cpu + + new_query_len_per_req = (query_start_loc_cpu[1:] - + query_start_loc_cpu[:-1]) + + total_num_tokens = query_start_loc_cpu[-1].item() + token_indices = self.arange[:total_num_tokens] + + spec_common_attn_metadata = CommonAttentionMetadata( + query_start_loc=common_attn_metadata.query_start_loc, + seq_lens=common_attn_metadata.seq_lens, + query_start_loc_cpu=query_start_loc_cpu, + seq_lens_cpu=common_attn_metadata.seq_lens_cpu, + num_computed_tokens_cpu=common_attn_metadata. + num_computed_tokens_cpu, + num_reqs=common_attn_metadata.num_reqs, + num_actual_tokens=total_num_tokens, + max_query_len=new_query_len_per_req.max().item(), + max_seq_len=common_attn_metadata.seq_lens_cpu.max().item(), + block_table_tensor=common_attn_metadata.block_table_tensor, + slot_mapping=common_attn_metadata.slot_mapping[token_indices], + causal=True, + ) + + token_indices_to_sample = common_attn_metadata.query_start_loc[1:] - 1 \ + - num_rejected_tokens_gpu + + return spec_common_attn_metadata, token_indices, token_indices_to_sample + def propose_tree( self, batch_size: int, @@ -520,11 +701,11 @@ def propose_tree( def prepare_inputs( self, common_attn_metadata: CommonAttentionMetadata, - # [batch_size] - num_rejected_tokens: torch.Tensor + sampled_token_ids: list[list[int]], + num_draft_tokens: list[int], ) -> tuple[CommonAttentionMetadata, torch.Tensor]: """ - This function is used to prepare the inputs for the spec decode. + This function is used to prepare the inputs for speculative decoding. It updates to the common_attn_metadata to account for the rejected tokens (and newly sampled tokens). It also returns the token indices of the tokens that should be fed to the speculator. @@ -545,6 +726,13 @@ def prepare_inputs( # q1, q1 + 1, ..., q1 + q2 - n2 - 1, # q1 + q2, q1 + q2 + 1, ..., q1 + q2 + q3 - n3 - 1] + num_rejected_tokens = [ + n + 1 - len(sampled_token_ids[i]) if n > 0 else 0 + for i, n in enumerate(num_draft_tokens) + ] + num_rejected_tokens = torch.tensor(num_rejected_tokens, + dtype=torch.int32) + device = common_attn_metadata.query_start_loc.device query_start_loc_cpu = common_attn_metadata.query_start_loc_cpu new_seq_lens_cpu = common_attn_metadata.seq_lens_cpu \ diff --git a/vllm/v1/worker/gpu_input_batch.py b/vllm/v1/worker/gpu_input_batch.py index 339b9937b73f..6717622efb80 100644 --- a/vllm/v1/worker/gpu_input_batch.py +++ b/vllm/v1/worker/gpu_input_batch.py @@ -64,7 +64,10 @@ def mm_inputs(self) -> list[MultiModalKwargsItems]: def get_token_id(self, idx: int) -> int: if idx < self.num_prompt_tokens: return self.prompt_token_ids[idx] - return self.output_token_ids[idx - self.num_prompt_tokens] + elif idx - self.num_prompt_tokens < len(self.output_token_ids): + return self.output_token_ids[idx - self.num_prompt_tokens] + else: + return -1 class InputBatch: diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index f256dc160a6b..e8ad9c2fca07 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -344,6 +344,10 @@ def __init__( self.hidden_size, dtype=self.dtype, numpy=False) + self.discard_request_indices = self._make_buffer(self.max_num_reqs, + dtype=torch.int64) + self.num_discarded_requests = 0 + self.num_draft_tokens = self._make_buffer(self.max_num_reqs, dtype=torch.int32) self.num_accepted_tokens = self._make_buffer(self.max_num_reqs, @@ -974,6 +978,21 @@ def _prepare_inputs( seq_lens = self.seq_lens.gpu[:num_reqs] max_seq_len = self.seq_lens.np[:num_reqs].max().item() + num_tokens = [ + self.requests[r].num_tokens for r in self.input_batch.req_ids + ] + num_tokens_np = np.array(num_tokens, dtype=np.int32) + + # Record the index of requests that should not be sampled, + # so that we could clear the sampled tokens before returning + discard_requests_mask = self.seq_lens.np[:num_reqs] < num_tokens_np + discard_request_indices = np.nonzero(discard_requests_mask)[0] + self.num_discarded_requests = len(discard_request_indices) + self.discard_request_indices.np[:self.num_discarded_requests] = ( + discard_request_indices) + + self.discard_request_indices.copy_to_gpu(self.num_discarded_requests) + # Copy the tensors to the GPU. self._prepare_input_ids(total_num_scheduled_tokens, cu_num_tokens) @@ -1973,23 +1992,12 @@ def _bookkeeping_sync( if envs.VLLM_COMPUTE_NANS_IN_LOGITS: num_nans_in_logits = self._get_nans_in_logits(logits) - # TODO(woosuk): The following loop can be slow since it iterates over - # the requests one by one. Optimize. - discard_sampled_tokens_req_indices = [] - for i, req_id in enumerate(self.input_batch.req_ids): - req_state = self.requests[req_id] - seq_len = (req_state.num_computed_tokens + - scheduler_output.num_scheduled_tokens[req_id]) - if seq_len < req_state.num_tokens: - # Ignore the sampled token for partial prefills. - # Rewind the generator state as if the token was not sampled. - # This relies on cuda-specific torch-internal impl details - generator = self.input_batch.generators.get(i) - if generator is not None: - generator.set_offset(generator.get_offset() - 4) - # Record the index of the request that should not be sampled, - # so that we could clear the sampled tokens before returning. - discard_sampled_tokens_req_indices.append(i) + discard_sampled_tokens_req_indices = \ + self.discard_request_indices.np[:self.num_discarded_requests] + for i in discard_sampled_tokens_req_indices: + gen = self.input_batch.generators.get(int(i)) + if gen is not None: + gen.set_offset(gen.get_offset() - 4) # Copy some objects so they don't get modified after returning. # This is important when using async scheduling. @@ -2026,10 +2034,10 @@ def _bookkeeping_sync( ) # Mask out the sampled tokens that should not be sampled. for i in discard_sampled_tokens_req_indices: - valid_sampled_token_ids[i].clear() + valid_sampled_token_ids[int(i)].clear() else: valid_sampled_token_ids = [] - invalid_req_indices = list(discard_sampled_tokens_req_indices) + invalid_req_indices = discard_sampled_tokens_req_indices.tolist() invalid_req_indices_set = set(invalid_req_indices) assert sampled_token_ids.shape[-1] == 1 @@ -2229,6 +2237,28 @@ def execute_model( with record_function_or_nullcontext("Sample"): sampler_output = self._sample(logits, spec_decode_metadata) + def propose_draft_token_ids(sampled_token_ids): + assert spec_decode_common_attn_metadata is not None + with record_function_or_nullcontext("Draft"): + self._draft_token_ids = self.propose_draft_token_ids( + scheduler_output, + sampled_token_ids, + self.input_batch.sampling_metadata, + hidden_states, + sample_hidden_states, + aux_hidden_states, + spec_decode_metadata, + spec_decode_common_attn_metadata, + ) + + use_padded_batch_for_eagle = self.speculative_config and \ + self.speculative_config.use_eagle() and \ + not self.speculative_config.disable_padded_drafter_batch + if use_padded_batch_for_eagle: + # EAGLE speculative decoding can use the GPU sampled tokens + # as inputs, and does not need to wait for bookkeeping to finish. + propose_draft_token_ids(sampler_output.sampled_token_ids) + with record_function_or_nullcontext("Bookkeep"): ( num_nans_in_logits, @@ -2242,19 +2272,10 @@ def execute_model( logits, hidden_states, num_scheduled_tokens) - if self.speculative_config: - assert spec_decode_common_attn_metadata is not None - with record_function_or_nullcontext("Draft"): - self._draft_token_ids = self.propose_draft_token_ids( - scheduler_output, - valid_sampled_token_ids, - self.input_batch.sampling_metadata, - hidden_states, - sample_hidden_states, - aux_hidden_states, - spec_decode_metadata, - spec_decode_common_attn_metadata, - ) + if self.speculative_config and not use_padded_batch_for_eagle: + # ngram and other speculative decoding methods use the sampled + # tokens on the CPU, so they are run after bookkeeping. + propose_draft_token_ids(valid_sampled_token_ids) with record_function_or_nullcontext("EPLB"): self.eplb_step() @@ -2294,7 +2315,7 @@ def take_draft_token_ids(self) -> Optional[DraftTokenIds]: def propose_draft_token_ids( self, scheduler_output: "SchedulerOutput", - sampled_token_ids: list[list[int]], + sampled_token_ids: Union[torch.Tensor, list[list[int]]], sampling_metadata: SamplingMetadata, hidden_states: torch.Tensor, sample_hidden_states: torch.Tensor, @@ -2304,11 +2325,14 @@ def propose_draft_token_ids( ) -> Union[list[list[int]], torch.Tensor]: num_scheduled_tokens = scheduler_output.total_num_scheduled_tokens if self.speculative_config.method == "ngram": + assert isinstance(sampled_token_ids, list) assert isinstance(self.drafter, NgramProposer) draft_token_ids = self.propose_ngram_draft_token_ids( sampled_token_ids) elif self.speculative_config.method == "medusa": + assert isinstance(sampled_token_ids, list) assert isinstance(self.drafter, MedusaProposer) + if sample_hidden_states.shape[0] == len(sampled_token_ids): # The input to the target model does not include draft tokens. hidden_states = sample_hidden_states @@ -2329,27 +2353,37 @@ def propose_draft_token_ids( ) elif self.speculative_config.use_eagle(): assert isinstance(self.drafter, EagleProposer) - # TODO(woosuk): Refactor the loop. - req_ids = self.input_batch.req_ids - next_token_ids: list[int] = [] - for i, token_ids in enumerate(sampled_token_ids): - if token_ids: - # Common case. - next_token_id = token_ids[-1] - else: - # Partial prefill (rare case). - # Get the next token id from the request state. - req_id = req_ids[i] - req_state = self.requests[req_id] - seq_len = (req_state.num_computed_tokens + - scheduler_output.num_scheduled_tokens[req_id]) - next_token_id = req_state.get_token_id(seq_len) - next_token_ids.append(next_token_id) - next_token_ids = torch.tensor(next_token_ids, - dtype=torch.int32, - device=self.device) + + if self.speculative_config.disable_padded_drafter_batch: + # When padded-batch is disabled, the sampled_token_ids should be + # the cpu-side list[list[int]] of valid sampled tokens for each + # request, with invalid requests having empty lists. + assert isinstance(sampled_token_ids, list), \ + "sampled_token_ids should be a python list when" \ + "padded-batch is disabled." + next_token_ids = self.drafter.prepare_next_token_ids_cpu( + sampled_token_ids, self.requests, self.input_batch, + scheduler_output.num_scheduled_tokens) + else: + # When using padded-batch, the sampled_token_ids should be + # the gpu tensor of sampled tokens for each request, of shape + # (num_reqs, num_spec_tokens + 1) with rejected tokens having + # value -1. + assert isinstance(sampled_token_ids, torch.Tensor), \ + "sampled_token_ids should be a torch.Tensor when" \ + "padded-batch is enabled." + next_token_ids, valid_sampled_tokens_count = \ + self.drafter.prepare_next_token_ids_padded( + common_attn_metadata, + sampled_token_ids, + self.requests, + self.input_batch, + self.discard_request_indices.gpu, + self.num_discarded_requests + ) if spec_decode_metadata is None: + token_indices_to_sample = None # input_ids can be None for multimodal models. target_token_ids = self.input_ids.gpu[:num_scheduled_tokens] # TODO(woosuk): Support M-RoPE. @@ -2361,17 +2395,20 @@ def propose_draft_token_ids( else: target_hidden_states = hidden_states[:num_scheduled_tokens] else: - # TODO(woosuk): Refactor this. - num_draft_tokens = spec_decode_metadata.num_draft_tokens - num_rejected_tokens = [ - n + 1 - len(sampled_token_ids[i]) if n > 0 else 0 - for i, n in enumerate(num_draft_tokens) - ] - num_rejected_tokens_cpu = torch.tensor(num_rejected_tokens, - dtype=torch.int32) - common_attn_metadata, token_indices =\ - self.drafter.prepare_inputs( - common_attn_metadata, num_rejected_tokens_cpu) + if self.speculative_config.disable_padded_drafter_batch: + token_indices_to_sample = None + common_attn_metadata, token_indices =\ + self.drafter.prepare_inputs( + common_attn_metadata, + sampled_token_ids, + spec_decode_metadata.num_draft_tokens) + else: + common_attn_metadata, token_indices, \ + token_indices_to_sample =\ + self.drafter.prepare_inputs_padded( + common_attn_metadata, + spec_decode_metadata, + valid_sampled_tokens_count) target_token_ids = self.input_ids.gpu[token_indices] # TODO(woosuk): Support M-RoPE. @@ -2391,6 +2428,7 @@ def propose_draft_token_ids( target_positions=target_positions, target_hidden_states=target_hidden_states, next_token_ids=next_token_ids, + last_token_indices=token_indices_to_sample, sampling_metadata=sampling_metadata, common_attn_metadata=common_attn_metadata, mm_embeds=mm_embeds, From a904ea78eaf7fc3f9b136a1ba6f6f66fb5658496 Mon Sep 17 00:00:00 2001 From: Simon Mo Date: Wed, 17 Sep 2025 22:30:02 -0700 Subject: [PATCH 087/518] [benchmark] add peak throughput metrics and plot (#23867) Signed-off-by: simon-mo --- vllm/benchmarks/lib/endpoint_request_func.py | 5 + vllm/benchmarks/serve.py | 198 ++++++++++++------- 2 files changed, 134 insertions(+), 69 deletions(-) diff --git a/vllm/benchmarks/lib/endpoint_request_func.py b/vllm/benchmarks/lib/endpoint_request_func.py index e64063047663..066b8fe83438 100644 --- a/vllm/benchmarks/lib/endpoint_request_func.py +++ b/vllm/benchmarks/lib/endpoint_request_func.py @@ -89,6 +89,7 @@ class RequestFuncOutput: tpot: float = 0.0 # avg next-token latencies prompt_len: int = 0 error: str = "" + start_time: float = 0.0 async def async_request_openai_completions( @@ -140,6 +141,7 @@ async def async_request_openai_completions( generated_text = "" st = time.perf_counter() + output.start_time = st most_recent_timestamp = st try: async with session.post(url=api_url, json=payload, @@ -272,6 +274,7 @@ async def async_request_openai_chat_completions( generated_text = "" ttft = 0.0 st = time.perf_counter() + output.start_time = st most_recent_timestamp = st try: async with session.post(url=api_url, json=payload, @@ -396,6 +399,7 @@ def to_bytes(y, sr): generated_text = "" ttft = 0.0 st = time.perf_counter() + output.start_time = st most_recent_timestamp = st try: async with session.post(url=api_url, @@ -475,6 +479,7 @@ async def async_request_openai_embeddings( output = RequestFuncOutput() st = time.perf_counter() + output.start_time = st try: async with session.post( url=api_url, diff --git a/vllm/benchmarks/serve.py b/vllm/benchmarks/serve.py index 1aeef0fd5bd8..d8784340eba1 100644 --- a/vllm/benchmarks/serve.py +++ b/vllm/benchmarks/serve.py @@ -18,9 +18,11 @@ import argparse import asyncio import gc +import importlib.util import json import os import random +import shutil import time import warnings from collections.abc import AsyncGenerator, Iterable @@ -46,6 +48,9 @@ MILLISECONDS_TO_SECONDS_CONVERSION = 1000 +TERM_PLOTLIB_AVAILABLE = ((importlib.util.find_spec("termplotlib") is not None) + and (shutil.which("gnuplot") is not None)) + class TaskType(Enum): GENERATION = "generation" @@ -80,18 +85,23 @@ class BenchmarkMetrics: median_e2el_ms: float std_e2el_ms: float percentiles_e2el_ms: list[tuple[float, float]] + # Max output tokens per second and concurrent requests at that peak + max_output_tokens_per_s: float + max_concurrent_requests: int + @dataclass class EmbedBenchmarkMetrics: completed: int total_input: int request_throughput: float - total_token_throughput :float + total_token_throughput: float mean_e2el_ms: float std_e2el_ms: float median_e2el_ms: float percentiles_e2el_ms: float + def _get_current_request_rate( ramp_up_strategy: Optional[Literal["linear", "exponential"]], ramp_up_start_rps: Optional[int], @@ -150,8 +160,8 @@ async def get_request( assert burstiness > 0, ( f"A positive burstiness factor is expected, but given {burstiness}.") # Convert to list to get length for ramp-up calculations - if isinstance(input_requests, Iterable) and not isinstance( - input_requests, list): + if isinstance(input_requests, + Iterable) and not isinstance(input_requests, list): input_requests = list(input_requests) total_requests = len(input_requests) @@ -161,12 +171,9 @@ async def get_request( request_rates = [] delay_ts = [] for request_index, request in enumerate(input_requests): - current_request_rate = _get_current_request_rate(ramp_up_strategy, - ramp_up_start_rps, - ramp_up_end_rps, - request_index, - total_requests, - request_rate) + current_request_rate = _get_current_request_rate( + ramp_up_strategy, ramp_up_start_rps, ramp_up_end_rps, + request_index, total_requests, request_rate) request_rates.append(current_request_rate) if current_request_rate == float("inf"): delay_ts.append(0) @@ -206,10 +213,8 @@ async def get_request( def calculate_metrics_for_embeddings( - outputs: list[RequestFuncOutput], - dur_s: float, - selected_percentiles: list[float] -) -> EmbedBenchmarkMetrics: + outputs: list[RequestFuncOutput], dur_s: float, + selected_percentiles: list[float]) -> EmbedBenchmarkMetrics: """Calculate the metrics for the embedding requests. Args: @@ -242,10 +247,8 @@ def calculate_metrics_for_embeddings( mean_e2el_ms=np.mean(e2els or 0) * 1000, std_e2el_ms=np.std(e2els or 0) * 1000, median_e2el_ms=np.median(e2els or 0) * 1000, - percentiles_e2el_ms=[ - (p, np.percentile(e2els or 0, p) * 1000) - for p in selected_percentiles - ], + percentiles_e2el_ms=[(p, np.percentile(e2els or 0, p) * 1000) + for p in selected_percentiles], ) return metrics @@ -336,6 +339,67 @@ def calculate_metrics( "All requests failed. This is likely due to a misconfiguration " "on the benchmark arguments.", stacklevel=2) + + # Calculate max output tokens per second metric + max_output_tokens_per_s = 0.0 + max_concurrent_requests = 0 + + # Find the time range across all successful requests + successful_outputs = [output for output in outputs if output.success] + if successful_outputs: + min_start_time = min(output.start_time + for output in successful_outputs) + max_end_time = max(output.start_time + output.latency + for output in successful_outputs) + + # Create second buckets (ceiling to ensure we capture all time) + duration_seconds = int(np.ceil(max_end_time - min_start_time)) + 1 + tokens_per_second = np.zeros(duration_seconds) + concurrent_requests_per_second = np.zeros(duration_seconds) + + for i, output in enumerate(successful_outputs): + # Calculate token generation timestamp using + # start_time, ttft, and itl + token_times = [output.start_time + output.ttft] + current_time = token_times[0] + for itl_value in output.itl: + current_time += itl_value + token_times.append(current_time) + + # Add tokens to second buckets + for token_time in token_times: + second_bucket = int(token_time - min_start_time) + if 0 <= second_bucket < duration_seconds: + tokens_per_second[second_bucket] += 1 + + # Track concurrent requests for each second this request was active + request_start_second = int(output.start_time - min_start_time) + request_end_second = int((output.start_time + output.latency) - + min_start_time) + + for second in range(request_start_second, request_end_second + 1): + concurrent_requests_per_second[second] += 1 + + # Find the maximum tokens per second and corresponding + # concurrent requests + if len(tokens_per_second) > 0: + max_output_tokens_per_s = float(np.max(tokens_per_second)) + max_concurrent_requests = int( + np.max(concurrent_requests_per_second)) + + if TERM_PLOTLIB_AVAILABLE: + import termplotlib as tpl + fig = tpl.figure() + fig.plot(np.arange(len(tokens_per_second)), + tokens_per_second, + title="Output tokens per second") + fig.plot(np.arange(len(concurrent_requests_per_second)), + concurrent_requests_per_second, + title="Concurrent requests per second") + fig.show() + else: + print("tip: install termplotlib and gnuplot to plot the metrics") + metrics = BenchmarkMetrics( completed=completed, total_input=total_input, @@ -365,6 +429,8 @@ def calculate_metrics( median_e2el_ms=np.median(e2els or 0) * 1000, percentiles_e2el_ms=[(p, np.percentile(e2els or 0, p) * 1000) for p in selected_percentiles], + max_output_tokens_per_s=max_output_tokens_per_s, + max_concurrent_requests=max_concurrent_requests, ) return metrics, actual_output_lens @@ -396,11 +462,8 @@ async def benchmark( ramp_up_end_rps: Optional[int] = None, ready_check_timeout_sec: int = 600, ): - task_type = ( - TaskType.EMBEDDING - if api_url.endswith("/v1/embeddings") - else TaskType.GENERATION - ) + task_type = (TaskType.EMBEDDING if api_url.endswith("/v1/embeddings") else + TaskType.GENERATION) if endpoint_type in ASYNC_REQUEST_FUNCS: if task_type == TaskType.EMBEDDING: request_func = ASYNC_REQUEST_FUNCS["openai-embeddings"] @@ -435,14 +498,10 @@ async def benchmark( input_requests[0].multi_modal_data, ) - assert ( - test_mm_content is None - or isinstance(test_mm_content, dict) - or ( - isinstance(test_mm_content, list) - and all(isinstance(item, dict) for item in test_mm_content) - ) - ), "multi_modal_data must be a dict or list[dict]" + assert (test_mm_content is None or isinstance(test_mm_content, dict) + or (isinstance(test_mm_content, list) + and all(isinstance(item, dict) for item in test_mm_content)) + ), "multi_modal_data must be a dict or list[dict]" test_input = RequestFuncInput( model=model_id, model_name=model_name, @@ -488,13 +547,13 @@ async def benchmark( ignore_eos=ignore_eos, extra_headers=extra_headers, extra_body=extra_body) - profile_output = await request_func( - request_func_input=profile_input, session=session) + profile_output = await request_func(request_func_input=profile_input, + session=session) if profile_output.success: print("Profiler started") - distribution = ("Poisson process" if burstiness == 1.0 - else "Gamma distribution") + distribution = ("Poisson process" + if burstiness == 1.0 else "Gamma distribution") if ramp_up_strategy is not None: print(f"Traffic ramp-up strategy: {ramp_up_strategy}.") @@ -562,18 +621,20 @@ async def limited_request_func(request_func_input, session, pbar): req_lora_module = next(lora_modules) req_model_id, req_model_name = req_lora_module, req_lora_module - request_func_input = RequestFuncInput(model=req_model_id, - model_name=req_model_name, - prompt=prompt, - api_url=api_url, - prompt_len=prompt_len, - output_len=output_len, - logprobs=logprobs, - multi_modal_content=mm_content, - ignore_eos=ignore_eos, - extra_headers=extra_headers, - extra_body=extra_body, - request_id=request_id,) + request_func_input = RequestFuncInput( + model=req_model_id, + model_name=req_model_name, + prompt=prompt, + api_url=api_url, + prompt_len=prompt_len, + output_len=output_len, + logprobs=logprobs, + multi_modal_content=mm_content, + ignore_eos=ignore_eos, + extra_headers=extra_headers, + extra_body=extra_body, + request_id=request_id, + ) tasks.append( asyncio.create_task( limited_request_func(request_func_input=request_func_input, @@ -615,19 +676,21 @@ async def limited_request_func(request_func_input, session, pbar): benchmark_duration)) print("{:<40} {:<10}".format("Total input tokens:", metrics.total_input)) if isinstance(metrics, BenchmarkMetrics): - print("{:<40} {:<10}".format( - "Total generated tokens:", metrics.total_output)) + print("{:<40} {:<10}".format("Total generated tokens:", + metrics.total_output)) print("{:<40} {:<10.2f}".format("Request throughput (req/s):", metrics.request_throughput)) if goodput_config_dict: print("{:<40} {:<10.2f}".format("Request goodput (req/s):", metrics.request_goodput)) if isinstance(metrics, BenchmarkMetrics): - print( - "{:<40} {:<10.2f}".format( - "Output token throughput (tok/s):", metrics.output_throughput - ) - ) + print("{:<40} {:<10.2f}".format("Output token throughput (tok/s):", + metrics.output_throughput)) + print("{:<40} {:<10.2f}".format( + "Peak output token throughput (tok/s):", + metrics.max_output_tokens_per_s)) + print("{:<40} {:<10.2f}".format("Peak concurrent requests:", + metrics.max_concurrent_requests)) print("{:<40} {:<10.2f}".format("Total Token throughput (tok/s):", metrics.total_token_throughput)) @@ -648,6 +711,8 @@ async def limited_request_func(request_func_input, session, pbar): "itls": [output.itl for output in outputs], "generated_texts": [output.generated_text for output in outputs], "errors": [output.error for output in outputs], + "max_output_tokens_per_s": metrics.max_output_tokens_per_s, + "max_concurrent_requests": metrics.max_concurrent_requests, } else: result = { @@ -697,8 +762,8 @@ def process_one_metric( if task_type == TaskType.GENERATION: process_one_metric("ttft", "TTFT", "Time to First Token") - process_one_metric( - "tpot", "TPOT", "Time per Output Token (excl. 1st token)") + process_one_metric("tpot", "TPOT", + "Time per Output Token (excl. 1st token)") process_one_metric("itl", "ITL", "Inter-token Latency") process_one_metric("e2el", "E2EL", "End-to-end Latency") @@ -714,8 +779,8 @@ def process_one_metric( output_len=test_output_len, logprobs=logprobs, ) - profile_output = await request_func( - request_func_input=profile_input, session=session) + profile_output = await request_func(request_func_input=profile_input, + session=session) if profile_output.success: print("Profiler stopped") @@ -851,7 +916,8 @@ def add_cli_args(parser: argparse.ArgumentParser): parser.add_argument( "--tokenizer", type=str, - help="Name or path of the tokenizer, if not using the default tokenizer.", # noqa: E501 + help= + "Name or path of the tokenizer, if not using the default tokenizer.", # noqa: E501 ) parser.add_argument("--use-beam-search", action="store_true") parser.add_argument( @@ -982,7 +1048,6 @@ def add_cli_args(parser: argparse.ArgumentParser): help="Specify the prefix of request id.", ) - sampling_group = parser.add_argument_group("sampling parameters") sampling_group.add_argument( "--top-p", @@ -1047,8 +1112,7 @@ def add_cli_args(parser: argparse.ArgumentParser): help="The ramp-up strategy. This would be used to " "ramp up the request rate from initial RPS to final " "RPS rate (specified by --ramp-up-start-rps and " - "--ramp-up-end-rps.) over the duration of the benchmark." - ) + "--ramp-up-end-rps.) over the duration of the benchmark.") parser.add_argument( "--ramp-up-start-rps", type=int, @@ -1087,13 +1151,11 @@ async def main_async(args: argparse.Namespace) -> dict[str, Any]: raise ValueError( "When using ramp-up, do not specify --request-rate. " "The request rate will be controlled by ramp-up parameters. " - "Please remove the --request-rate argument." - ) + "Please remove the --request-rate argument.") if args.ramp_up_start_rps is None or args.ramp_up_end_rps is None: raise ValueError( "When using --ramp-up-strategy, both --ramp-up-start-rps and " - "--ramp-up-end-rps must be specified" - ) + "--ramp-up-end-rps must be specified") if args.ramp_up_start_rps < 0 or args.ramp_up_end_rps < 0: raise ValueError("Ramp-up start and end RPS must be non-negative") if args.ramp_up_start_rps > args.ramp_up_end_rps: @@ -1127,8 +1189,7 @@ async def main_async(args: argparse.Namespace) -> dict[str, Any]: headers[kvstring[0].strip()] = kvstring[1].strip() else: raise ValueError( - "Invalid header format. Please use KEY=VALUE format." - ) + "Invalid header format. Please use KEY=VALUE format.") tokenizer = get_tokenizer(tokenizer_id, tokenizer_mode=tokenizer_mode, @@ -1215,8 +1276,7 @@ async def main_async(args: argparse.Namespace) -> dict[str, Any]: result_json[kvstring[0].strip()] = kvstring[1].strip() else: raise ValueError( - "Invalid metadata format. Please use KEY=VALUE format." - ) + "Invalid metadata format. Please use KEY=VALUE format.") # Traffic result_json["request_rate"] = (args.request_rate if args.request_rate From e111d5b0ae9359e2a829771105e739d36505fa69 Mon Sep 17 00:00:00 2001 From: Simon Mo Date: Wed, 17 Sep 2025 22:30:26 -0700 Subject: [PATCH 088/518] [CLI] Use streaming in CLI chat and completion commands (#23769) Signed-off-by: simon-mo --- vllm/entrypoints/cli/openai.py | 71 +++++++++++++++++++++------------- 1 file changed, 45 insertions(+), 26 deletions(-) diff --git a/vllm/entrypoints/cli/openai.py b/vllm/entrypoints/cli/openai.py index 7c01de94a343..1929d6a7f77a 100644 --- a/vllm/entrypoints/cli/openai.py +++ b/vllm/entrypoints/cli/openai.py @@ -45,6 +45,28 @@ def _interactive_cli(args: argparse.Namespace) -> tuple[str, OpenAI]: return model_name, openai_client +def _print_chat_stream(stream) -> str: + output = "" + for chunk in stream: + delta = chunk.choices[0].delta + if delta.content: + output += delta.content + print(delta.content, end="", flush=True) + print() + return output + + +def _print_completion_stream(stream) -> str: + output = "" + for chunk in stream: + text = chunk.choices[0].text + if text is not None: + output += text + print(text, end="", flush=True) + print() + return output + + def chat(system_prompt: str | None, model_name: str, client: OpenAI) -> None: conversation: list[ChatCompletionMessageParam] = [] if system_prompt is not None: @@ -58,14 +80,11 @@ def chat(system_prompt: str | None, model_name: str, client: OpenAI) -> None: break conversation.append({"role": "user", "content": input_message}) - chat_completion = client.chat.completions.create(model=model_name, - messages=conversation) - - response_message = chat_completion.choices[0].message - output = response_message.content - - conversation.append(response_message) # type: ignore - print(output) + stream = client.chat.completions.create(model=model_name, + messages=conversation, + stream=True) + output = _print_chat_stream(stream) + conversation.append({"role": "assistant", "content": output}) def _add_query_options( @@ -108,9 +127,11 @@ def cmd(args: argparse.Namespace) -> None: if args.quick: conversation.append({"role": "user", "content": args.quick}) - chat_completion = client.chat.completions.create( - model=model_name, messages=conversation) - print(chat_completion.choices[0].message.content) + stream = client.chat.completions.create(model=model_name, + messages=conversation, + stream=True) + output = _print_chat_stream(stream) + conversation.append({"role": "assistant", "content": output}) return print("Please enter a message for the chat model:") @@ -121,14 +142,11 @@ def cmd(args: argparse.Namespace) -> None: break conversation.append({"role": "user", "content": input_message}) - chat_completion = client.chat.completions.create( - model=model_name, messages=conversation) - - response_message = chat_completion.choices[0].message - output = response_message.content - - conversation.append(response_message) # type: ignore - print(output) + stream = client.chat.completions.create(model=model_name, + messages=conversation, + stream=True) + output = _print_chat_stream(stream) + conversation.append({"role": "assistant", "content": output}) @staticmethod def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser: @@ -168,9 +186,10 @@ def cmd(args: argparse.Namespace) -> None: model_name, client = _interactive_cli(args) if args.quick: - completion = client.completions.create(model=model_name, - prompt=args.quick) - print(completion.choices[0].text) + stream = client.completions.create(model=model_name, + prompt=args.quick, + stream=True) + _print_completion_stream(stream) return print("Please enter prompt to complete:") @@ -179,10 +198,10 @@ def cmd(args: argparse.Namespace) -> None: input_prompt = input("> ") except EOFError: break - completion = client.completions.create(model=model_name, - prompt=input_prompt) - output = completion.choices[0].text - print(output) + stream = client.completions.create(model=model_name, + prompt=input_prompt, + stream=True) + _print_completion_stream(stream) @staticmethod def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser: From 81b16a2bc922e837267db7216a274c4d89a2cc0c Mon Sep 17 00:00:00 2001 From: Lumina Date: Thu, 18 Sep 2025 13:53:55 +0800 Subject: [PATCH 089/518] [Kernel] Better inf handling for grouped topk cu (#24886) Signed-off-by: lumina37 --- csrc/moe/grouped_topk_kernels.cu | 44 +++++++++++++++++--------------- 1 file changed, 24 insertions(+), 20 deletions(-) diff --git a/csrc/moe/grouped_topk_kernels.cu b/csrc/moe/grouped_topk_kernels.cu index accbb09858fa..b5321f748e6b 100644 --- a/csrc/moe/grouped_topk_kernels.cu +++ b/csrc/moe/grouped_topk_kernels.cu @@ -21,6 +21,7 @@ #include #include #include +#include #include #include namespace cg = cooperative_groups; @@ -28,7 +29,6 @@ namespace cg = cooperative_groups; namespace vllm { namespace moe { -constexpr float kNegInfinity = INFINITY * -1; constexpr unsigned FULL_WARP_MASK = 0xffffffff; constexpr int32_t WARP_SIZE = 32; constexpr int32_t BLOCK_SIZE = 512; @@ -411,14 +411,21 @@ __device__ inline float cuda_cast(__nv_bfloat16 val) { return __bfloat162float(val); } +template +__device__ inline T neg_inf() { + // cuda::std::numeric_limits::infinity() returns `0` for [T=bf16 or fp16] + // so we need to cast from fp32 + return cuda_cast(-cuda::std::numeric_limits::infinity()); +} + template __device__ void topk_with_k2(T* output, T const* input, cg::thread_block_tile<32> const& tile, int32_t const lane_id, int const num_experts_per_group) { // Get the top2 per thread - T largest = -INFINITY; - T second_largest = -INFINITY; + T largest = neg_inf(); + T second_largest = neg_inf(); if (num_experts_per_group > WARP_SIZE) { for (int i = lane_id; i < num_experts_per_group; i += WARP_SIZE) { @@ -513,8 +520,8 @@ __global__ void group_idx_and_topk_idx_kernel( warp_id * topk; s_topk_idx += warp_id * topk; - T value = kNegInfinity; - T topk_group_value = kNegInfinity; + T value = neg_inf(); + T topk_group_value = neg_inf(); int32_t num_equalto_topkth_group; #if (defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 900)) @@ -525,11 +532,8 @@ __global__ void group_idx_and_topk_idx_kernel( if (case_id < num_tokens) { // calculate group_idx int32_t target_num_min = WARP_SIZE - n_group + topk_group; - if (lane_id < n_group && - (isfinite(cuda_cast( - group_scores[lane_id])))) // The check is necessary to avoid - // abnormal input - { + // The check is necessary to avoid abnormal input + if (lane_id < n_group && cuda::std::isfinite(group_scores[lane_id])) { value = group_scores[lane_id]; } @@ -540,11 +544,11 @@ __global__ void group_idx_and_topk_idx_kernel( __syncwarp(); // Ensure all threads have valid data before reduction topk_group_value = cg::reduce(tile, value, cg::greater()); if (value == topk_group_value) { - value = kNegInfinity; + value = neg_inf(); } pre_count_equal_to_top_value = count_equal_to_top_value; - count_equal_to_top_value = __popc(__ballot_sync( - FULL_WARP_MASK, (value == cuda_cast(kNegInfinity)))); + count_equal_to_top_value = + __popc(__ballot_sync(FULL_WARP_MASK, (value == neg_inf()))); } num_equalto_topkth_group = target_num_min - pre_count_equal_to_top_value; } @@ -552,11 +556,10 @@ __global__ void group_idx_and_topk_idx_kernel( warp_topk::WarpSelect - queue((int32_t)topk, -INFINITY); + queue((int32_t)topk, neg_inf()); int count_equalto_topkth_group = 0; - bool if_proceed_next_topk = - (topk_group_value != cuda_cast(kNegInfinity)); + bool if_proceed_next_topk = topk_group_value != neg_inf(); if (case_id < num_tokens && if_proceed_next_topk) { for (int i_group = 0; i_group < n_group; i_group++) { if ((group_scores[i_group] > topk_group_value) || @@ -566,10 +569,10 @@ __global__ void group_idx_and_topk_idx_kernel( for (int32_t i = lane_id; i < align_num_experts_per_group; i += WARP_SIZE) { T candidates = - (i < num_experts_per_group) && isfinite(cuda_cast( - scores_with_bias[offset + i])) + (i < num_experts_per_group) && + cuda::std::isfinite(scores_with_bias[offset + i]) ? scores_with_bias[offset + i] - : cuda_cast(kNegInfinity); + : neg_inf(); queue.add(candidates, offset + i); } if (group_scores[i_group] == topk_group_value) { @@ -598,7 +601,8 @@ __global__ void group_idx_and_topk_idx_kernel( if (i < topk) { s_topk_value[i] = value; } - topk_sum += reduce(tile, cuda_cast(value), cg::plus()); + topk_sum += + cg::reduce(tile, cuda_cast(value), cg::plus()); } } From 349e0e34627950db1cc4be0df9a0bc616e210589 Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Thu, 18 Sep 2025 07:23:29 +0100 Subject: [PATCH 090/518] [Docs] Fix API Reference (#25140) Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- mkdocs.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mkdocs.yaml b/mkdocs.yaml index bbd850bdfee3..6f2be65a18af 100644 --- a/mkdocs.yaml +++ b/mkdocs.yaml @@ -79,7 +79,7 @@ plugins: - "re:vllm\\._.*" # Internal modules - "vllm.third_party" - "vllm.vllm_flash_attn" - - !ENV [API_AUTONAV_EXCLUDE, ""] + - !ENV [API_AUTONAV_EXCLUDE, "re:^$"] # Match nothing by default - mkdocstrings: handlers: python: From f4cd80f94404787859ba72dcddb5e818d8f0c9e7 Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Thu, 18 Sep 2025 07:29:05 +0100 Subject: [PATCH 091/518] Retrieve `sliding_window` from text config in Gemma3 MM (#25085) Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- vllm/model_executor/models/gemma3_mm.py | 3 ++- vllm/model_executor/models/gemma3n_mm.py | 3 --- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/vllm/model_executor/models/gemma3_mm.py b/vllm/model_executor/models/gemma3_mm.py index e652ba2f1c7f..bee9fbd2c084 100644 --- a/vllm/model_executor/models/gemma3_mm.py +++ b/vllm/model_executor/models/gemma3_mm.py @@ -688,7 +688,8 @@ def prepare_attn_masks( global_attn_mask = torch.where(img_mask == 2, 0, global_attn_mask) global_attn_masks.append(global_attn_mask) - if (sliding_window := self.config.sliding_window) is not None: + sliding_window = self.config.text_config.sliding_window + if sliding_window is not None: # Create a local causal mask with sliding window (1024). local_attn_mask = torch.ones_like(global_attn_mask) local_attn_mask = torch.tril(local_attn_mask, diff --git a/vllm/model_executor/models/gemma3n_mm.py b/vllm/model_executor/models/gemma3n_mm.py index 663d4da7cec2..8d3079aee0df 100644 --- a/vllm/model_executor/models/gemma3n_mm.py +++ b/vllm/model_executor/models/gemma3n_mm.py @@ -461,9 +461,6 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): self.multimodal_config = multimodal_config self.vocab_size = config.text_config.vocab_size - self.sliding_window = getattr(config.text_config, - "interleaved_sliding_window", None) - self.vision_tower = AutoModel.from_config(config=config.vision_config) self.audio_tower = AutoModel.from_config(config=config.audio_config) self.embed_vision = Gemma3nMultimodalEmbedder(config.vision_config, From 350c94deb30747f84536ee34d91c6fca564667ce Mon Sep 17 00:00:00 2001 From: "rongfu.leng" Date: Thu, 18 Sep 2025 15:47:43 +0800 Subject: [PATCH 092/518] [Bugfix] when use s3 model cannot use default load_format (#24435) Signed-off-by: rongfu.leng Co-authored-by: 22quinn <33176974+22quinn@users.noreply.github.com> --- vllm/config/__init__.py | 12 ++++++++++++ vllm/engine/arg_utils.py | 1 - 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/vllm/config/__init__.py b/vllm/config/__init__.py index 64be2f38c6a3..631618d427d4 100644 --- a/vllm/config/__init__.py +++ b/vllm/config/__init__.py @@ -3029,6 +3029,18 @@ def try_verify_and_update_config(self): SequenceClassificationConfig) SequenceClassificationConfig.verify_and_update_config(self) + if hasattr(self.model_config, "model_weights") and is_runai_obj_uri( + self.model_config.model_weights): + if self.load_config.load_format == "auto": + logger.info("Detected Run:ai model config. " + "Overriding `load_format` to 'runai_streamer'") + self.load_config.load_format = "runai_streamer" + elif self.load_config.load_format != "runai_streamer": + raise ValueError(f"To load a model from S3, 'load_format' " + f"must be 'runai_streamer', " + f"but got '{self.load_config.load_format}'. " + f"Model: {self.model_config.model}") + def __str__(self): return ( f"model={self.model_config.model!r}, " diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index 4831cb5348c7..e2a1ec68e6f5 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -959,7 +959,6 @@ def create_model_config(self) -> ModelConfig: if (not isinstance(self, AsyncEngineArgs) and envs.VLLM_CI_USE_S3 and self.model in MODELS_ON_S3 and self.load_format == "auto"): self.model = f"{MODEL_WEIGHTS_S3_BUCKET}/{self.model}" - self.load_format = "runai_streamer" if self.disable_mm_preprocessor_cache: logger.warning( From ef7eefe17a7dc212ddb8a8aabd7760218a10e25e Mon Sep 17 00:00:00 2001 From: Tao He Date: Thu, 18 Sep 2025 16:16:04 +0800 Subject: [PATCH 093/518] [Qwen] Add fp8 checkpoint support for qwen3-next. (#25079) Signed-off-by: Tao He --- vllm/model_executor/models/qwen3_next.py | 35 ++++++++++---------- vllm/model_executor/models/qwen3_next_mtp.py | 8 +++-- 2 files changed, 22 insertions(+), 21 deletions(-) diff --git a/vllm/model_executor/models/qwen3_next.py b/vllm/model_executor/models/qwen3_next.py index ca9f4d402dac..eb060cb90f44 100644 --- a/vllm/model_executor/models/qwen3_next.py +++ b/vllm/model_executor/models/qwen3_next.py @@ -30,7 +30,6 @@ GemmaRMSNorm as Qwen3NextRMSNorm) # yapf: enable from vllm.model_executor.layers.linear import (ColumnParallelLinear, - MergedColumnParallelLinear, QKVParallelLinear, ReplicatedLinear, RowParallelLinear) @@ -254,12 +253,20 @@ def __init__( # projection of the input hidden states self.projection_size_qkvz = self.key_dim * 2 + self.value_dim * 2 self.projection_size_ba = self.num_v_heads * 2 - self.in_proj = MergedColumnParallelLinear( + self.in_proj_qkvz = ColumnParallelLinear( input_size=self.hidden_size, - output_sizes=[self.projection_size_qkvz, self.projection_size_ba], + output_size=self.projection_size_qkvz, bias=False, quant_config=quant_config, - prefix=f"{prefix}.in_proj", + prefix=f"{prefix}.in_proj_qkvz", + ) + # ba_proj doesn't support blockwise fp8 quantization. + self.in_proj_ba = ColumnParallelLinear( + input_size=self.hidden_size, + output_size=self.projection_size_ba, + bias=False, + quant_config=quant_config, + prefix=f"{prefix}.in_proj_ba", ) query_key_settings = (self.key_dim, 0, False) @@ -420,19 +427,14 @@ def _forward( ssm_state = self_kv_cache[1] num_actual_tokens = attn_metadata.num_actual_tokens num_accepted_tokens = attn_metadata.num_accepted_tokens - - # 1. Set up dimensions for reshapes later - projected_states, _ = self.in_proj(hidden_states[:num_actual_tokens]) if spec_token_masks is not None: spec_token_masks = spec_token_masks[:num_actual_tokens] - projected_states_qkvz, projected_states_ba = torch.split( - projected_states, - [ - self.projection_size_qkvz // self.tp_size, - self.projection_size_ba // self.tp_size - ], - dim=-1, - ) + + # 1. Set up dimensions for reshapes later + projected_states_qkvz, _ = self.in_proj_qkvz( + hidden_states[:num_actual_tokens]) + projected_states_ba, _ = self.in_proj_ba( + hidden_states[:num_actual_tokens]) query, key, value, z, b, a = self.fix_query_key_value_ordering( projected_states_qkvz, projected_states_ba) query, key, value = map(lambda x: rearrange(x, 'l p d -> l (p d)'), @@ -976,8 +978,6 @@ def load_weights(self, weights: Iterable[tuple[str, ("qkv_proj", "v_proj", "v"), ("gate_up_proj", "gate_proj", 0), ("gate_up_proj", "up_proj", 1), - ("in_proj", "in_proj_qkvz", 0), - ("in_proj", "in_proj_ba", 1), ] params_dict = dict(self.named_parameters()) @@ -1055,7 +1055,6 @@ class Qwen3NextForCausalLM(nn.Module, HasInnerState, SupportsLoRA, SupportsPP, "v_proj", ], "gate_up_proj": ["gate_proj", "up_proj"], - "in_proj": ["in_proj_qkvz", "in_proj_ba"], } def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): diff --git a/vllm/model_executor/models/qwen3_next_mtp.py b/vllm/model_executor/models/qwen3_next_mtp.py index 190a1750e673..c755eeb9b4ea 100644 --- a/vllm/model_executor/models/qwen3_next_mtp.py +++ b/vllm/model_executor/models/qwen3_next_mtp.py @@ -63,7 +63,9 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): self.config.hidden_size, gather_output=True, bias=False, - return_bias=False) + return_bias=False, + quant_config=quant_config, + prefix=f'{prefix}.fc') self.layers = torch.nn.ModuleList( Qwen3NextDecoderLayer( @@ -72,7 +74,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): model_config=model_config, cache_config=cache_config, quant_config=quant_config, - prefix=f'{prefix}.layers.{self.mtp_start_layer_idx + idx}', + prefix=f'{prefix}.layers.{idx}', ) for idx in range(self.num_mtp_layers)) self.make_empty_intermediate_tensors = ( @@ -233,7 +235,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): self.config = config self.model = Qwen3NextMultiTokenPredictor(vllm_config=vllm_config, prefix=maybe_prefix( - prefix, "model")) + prefix, "mtp")) self.unpadded_vocab_size = config.vocab_size self.lm_head = ParallelLMHead(self.unpadded_vocab_size, config.hidden_size, From aa3f105c591a506523804e12800adcca80480bd8 Mon Sep 17 00:00:00 2001 From: Gerard Finol Date: Thu, 18 Sep 2025 11:02:14 +0200 Subject: [PATCH 094/518] Add 'path' option to ImagePrompt data_format (#25081) Signed-off-by: Gerard Finol --- .../prithvi_io_processor_plugin/prithvi_io_processor/types.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/plugins/prithvi_io_processor_plugin/prithvi_io_processor/types.py b/tests/plugins/prithvi_io_processor_plugin/prithvi_io_processor/types.py index d480aef704c6..d4c6628211fb 100644 --- a/tests/plugins/prithvi_io_processor_plugin/prithvi_io_processor/types.py +++ b/tests/plugins/prithvi_io_processor_plugin/prithvi_io_processor/types.py @@ -22,7 +22,7 @@ class DataModuleConfig(TypedDict): class ImagePrompt(BaseModel): - data_format: Literal["b64_json", "bytes", "url"] + data_format: Literal["b64_json", "bytes", "url", "path"] """ This is the data type for the input image """ From 05b044e698bb3c151871d94b64fabd87188de9ef Mon Sep 17 00:00:00 2001 From: Punitvara Date: Thu, 18 Sep 2025 14:35:16 +0530 Subject: [PATCH 095/518] [Doc] Fix cross-reference warnings (#25058) Signed-off-by: Punit Vara Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- vllm/benchmarks/datasets.py | 3 ++- .../device_communicators/shm_object_storage.py | 8 ++++---- .../model_executor/layers/mamba/ops/causal_conv1d.py | 12 +++++++----- vllm/model_executor/models/mistral3.py | 2 +- vllm/multimodal/profiling.py | 2 +- vllm/v1/core/kv_cache_manager.py | 5 +++-- 6 files changed, 18 insertions(+), 14 deletions(-) diff --git a/vllm/benchmarks/datasets.py b/vllm/benchmarks/datasets.py index 1831539a6adb..1cab40802c39 100644 --- a/vllm/benchmarks/datasets.py +++ b/vllm/benchmarks/datasets.py @@ -171,7 +171,8 @@ def get_random_lora_request( If `None`, LoRA is not used. Returns: - A new [LoRARequest][] (or `None` if not applicable). + A new [`LoRARequest`][vllm.lora.request.LoRARequest] + (or `None` if not applicable). """ if max_loras is None or lora_path is None: return None diff --git a/vllm/distributed/device_communicators/shm_object_storage.py b/vllm/distributed/device_communicators/shm_object_storage.py index 3fac104bda1e..352e7525d4c8 100644 --- a/vllm/distributed/device_communicators/shm_object_storage.py +++ b/vllm/distributed/device_communicators/shm_object_storage.py @@ -30,7 +30,7 @@ class SingleWriterShmRingBuffer: - Maintains metadata for each allocated buffer chunk in the writer process - Supports custom "is_free_fn" functions to determine when buffers can be reused - - Each buffer chunk contains: [4-byte id][4-byte size][actual_data] + - Each buffer chunk contains: `[4-byte id][4-byte size][actual_data]` Key Concepts: - monotonic_id_start/end: Track the range of active buffer IDs @@ -99,7 +99,7 @@ class SingleWriterShmRingBuffer: - Writer handles garbage collection (free_buf) based on reader feedback Memory Layout per Buffer Chunk: - [4-byte monotonic_id][4-byte chunk_size][actual_data...] + `[4-byte monotonic_id][4-byte chunk_size][actual_data...]` ^metadata_start ^data_start The monotonic_id ensures data integrity - readers can verify they're @@ -185,7 +185,7 @@ def allocate_buf(self, size: int) -> tuple[int, int]: ''' Allocate a buffer `MD_SIZE` + `size` bytes in the shared memory. Memory layout: - [4-byte monotonic_id][4-byte size][buffer data...] + `[4-byte monotonic_id][4-byte size][buffer data...]` ''' assert self.is_writer, "Only the writer can allocate buffers." assert size > 0, "Size must be greater than 0" @@ -413,7 +413,7 @@ class SingleWriterShmObjectStorage: allocation Memory Layout per Object: - [4-byte reference_count][metadata_size][serialized_object_data] + `[4-byte reference_count][metadata_size][serialized_object_data]` Thread Safety: - Writer operations (put, clear) are single-threaded by design diff --git a/vllm/model_executor/layers/mamba/ops/causal_conv1d.py b/vllm/model_executor/layers/mamba/ops/causal_conv1d.py index 7e3ea561fd29..2a88fa661da0 100644 --- a/vllm/model_executor/layers/mamba/ops/causal_conv1d.py +++ b/vllm/model_executor/layers/mamba/ops/causal_conv1d.py @@ -927,11 +927,13 @@ def causal_conv1d_update( validate_data=False, ): """ - x: (batch, dim) or (batch, dim, seqlen) or (num_tokens, dim) - [shape=2: single token prediction] - [shape=3: single or multiple tokens prediction] - [shape=2 with num_tokens: continuous batching, where num_tokens is the - total tokens of all sequences in that batch] + x: Input tensor which can take the following shapes: + + - `[batch, dim]` - single token prediction + - `[batch, dim, seqlen]` - single or multiple tokens prediction + - `[num_tokens, dim]` - continuous batching, where num_tokens is + the total tokens of all sequences in that batch + conv_state: (..., dim, state_len), where state_len >= width - 1 weight: (dim, width) bias: (dim,) diff --git a/vllm/model_executor/models/mistral3.py b/vllm/model_executor/models/mistral3.py index 09479012a03a..d15776a39362 100644 --- a/vllm/model_executor/models/mistral3.py +++ b/vllm/model_executor/models/mistral3.py @@ -583,7 +583,7 @@ def forward( inputs_embeds: Optional tensor of input embeddings. Info: - [Mistral3ImagePixelInputs][] + [`Mistral3ImagePixelInputs`][vllm.model_executor.models.mistral3.Mistral3ImagePixelInputs] """ if intermediate_tensors is not None: inputs_embeds = None diff --git a/vllm/multimodal/profiling.py b/vllm/multimodal/profiling.py index bad6c0c3d9db..fbbc55d3524c 100644 --- a/vllm/multimodal/profiling.py +++ b/vllm/multimodal/profiling.py @@ -301,7 +301,7 @@ def get_mm_max_contiguous_tokens( Returns the maximum length of the multimodal (image placeholders+text) tokens, including any break/text tokens in-between image embeddings. - [IMG] [IMG] [IMG] [IMG] [IMG] [IMG] + ` [IMG] [IMG] [IMG] [IMG] [IMG] [IMG] ` Returns 9, even when the number of image embeddings is 6. This is important to take into account when profiling and diff --git a/vllm/v1/core/kv_cache_manager.py b/vllm/v1/core/kv_cache_manager.py index 3a0fbb5e5c41..401327f727a4 100644 --- a/vllm/v1/core/kv_cache_manager.py +++ b/vllm/v1/core/kv_cache_manager.py @@ -24,8 +24,9 @@ class KVCacheBlocks: """ blocks: tuple[list[KVCacheBlock], ...] """ - blocks[i][j] refers to the i-th kv_cache_group and the j-th block of tokens. - We don't use block of tokens as the outer dimension because it assumes all + `blocks[i][j]` refers to the i-th kv_cache_group + and the j-th block of tokens.We don't use block of + tokens as the outer dimension because it assumes all kv_cache_groups have the same number of blocks, which is true for now but will be broken if we want to give different block_size to different kv_cache_groups in the future. From 29283e89762a3d572c504e5ea317351696b553a6 Mon Sep 17 00:00:00 2001 From: Aaron Pham Date: Thu, 18 Sep 2025 05:20:27 -0400 Subject: [PATCH 096/518] [Chore] Cleanup guided namespace, move to structured outputs config (#22772) Signed-off-by: Aaron Pham Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- .../scripts/hardware_ci/run-amd-test.sh | 6 - .buildkite/test-pipeline.yaml | 3 +- .github/mergify.yml | 2 +- .../benchmark_serving_structured_output.py | 16 +- docs/api/README.md | 2 +- docs/features/reasoning_outputs.md | 10 +- docs/features/structured_outputs.md | 36 +-- docs/features/tool_calling.md | 11 +- docs/serving/openai_compatible_server.md | 4 +- .../offline_inference/structured_outputs.py | 54 ++--- ...t_completion_client_with_tools_required.py | 2 +- .../structured_outputs/structured_outputs.py | 8 +- tests/entrypoints/conftest.py | 2 +- tests/entrypoints/llm/test_lazy_outlines.py | 82 ------- tests/entrypoints/openai/test_chat.py | 123 +++++------ tests/entrypoints/openai/test_completion.py | 79 ++++--- .../test_completion_with_function_calling.py | 4 +- .../entrypoints/openai/test_openai_schema.py | 8 +- .../openai/test_prompt_validation.py | 2 +- tests/entrypoints/openai/test_serving_chat.py | 4 - .../openai/test_transcription_validation.py | 2 +- .../openai/test_translation_validation.py | 2 +- tests/test_sampling_params.py | 84 ------- tests/tool_use/test_tool_choice_required.py | 11 +- tests/v1/core/test_scheduler.py | 6 +- tests/v1/engine/test_llm_engine.py | 4 +- tests/v1/entrypoints/conftest.py | 2 +- .../llm/test_struct_output_generate.py | 135 ++++++------ .../openai/test_chat_completion.py | 14 +- .../v1/entrypoints/openai/test_completion.py | 14 +- vllm/config/__init__.py | 35 +-- vllm/engine/arg_utils.py | 95 ++++---- vllm/engine/async_llm_engine.py | 7 +- vllm/engine/llm_engine.py | 18 +- vllm/engine/protocol.py | 7 +- vllm/entrypoints/llm.py | 27 ++- vllm/entrypoints/openai/api_server.py | 10 +- vllm/entrypoints/openai/protocol.py | 208 ++++++------------ vllm/entrypoints/openai/serving_chat.py | 2 +- vllm/model_executor/models/config.py | 6 +- vllm/sampling_params.py | 62 ++---- vllm/transformers_utils/tokenizers/mistral.py | 5 +- vllm/v1/engine/async_llm.py | 3 - vllm/v1/engine/processor.py | 57 +++-- vllm/v1/request.py | 2 +- vllm/v1/structured_output/__init__.py | 13 +- vllm/v1/structured_output/backend_guidance.py | 4 +- .../backend_lm_format_enforcer.py | 22 +- vllm/v1/structured_output/backend_outlines.py | 32 +-- vllm/v1/structured_output/backend_xgrammar.py | 38 ++-- vllm/v1/structured_output/request.py | 2 +- 51 files changed, 580 insertions(+), 807 deletions(-) delete mode 100644 tests/entrypoints/llm/test_lazy_outlines.py delete mode 100644 tests/test_sampling_params.py diff --git a/.buildkite/scripts/hardware_ci/run-amd-test.sh b/.buildkite/scripts/hardware_ci/run-amd-test.sh index c395011a2448..7f90181048d0 100755 --- a/.buildkite/scripts/hardware_ci/run-amd-test.sh +++ b/.buildkite/scripts/hardware_ci/run-amd-test.sh @@ -167,12 +167,6 @@ if [[ $commands == *" entrypoints/llm "* ]]; then --ignore=entrypoints/llm/test_prompt_validation.py "} fi -#Obsolete currently -##ignore certain Entrypoints/llm tests -#if [[ $commands == *" && pytest -v -s entrypoints/llm/test_guided_generate.py"* ]]; then -# commands=${commands//" && pytest -v -s entrypoints/llm/test_guided_generate.py"/" "} -#fi - # --ignore=entrypoints/openai/test_encoder_decoder.py \ # --ignore=entrypoints/openai/test_embedding.py \ # --ignore=entrypoints/openai/test_oot_registration.py diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index 8dd99bf1a38f..66dfc990805f 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -108,8 +108,7 @@ steps: - tests/entrypoints/offline_mode commands: - export VLLM_WORKER_MULTIPROC_METHOD=spawn - - pytest -v -s entrypoints/llm --ignore=entrypoints/llm/test_lazy_outlines.py --ignore=entrypoints/llm/test_generate.py --ignore=entrypoints/llm/test_collective_rpc.py - - pytest -v -s entrypoints/llm/test_lazy_outlines.py # it needs a clean process + - pytest -v -s entrypoints/llm --ignore=entrypoints/llm/test_generate.py --ignore=entrypoints/llm/test_collective_rpc.py - pytest -v -s entrypoints/llm/test_generate.py # it needs a clean process - VLLM_USE_V1=0 pytest -v -s entrypoints/offline_mode # Needs to avoid interference with other tests diff --git a/.github/mergify.yml b/.github/mergify.yml index f2dd2e06214a..94198b1251e0 100644 --- a/.github/mergify.yml +++ b/.github/mergify.yml @@ -171,7 +171,7 @@ pull_request_rules: - files=examples/online_serving/openai_chat_completion_structured_outputs.py - files=examples/online_serving/openai_chat_completion_structured_outputs_with_reasoning.py - files~=^tests/v1/structured_output/ - - files=tests/v1/entrypoints/llm/test_guided_generate.py + - files=tests/v1/entrypoints/llm/test_struct_output_generate.py - files~=^vllm/v1/structured_output/ actions: label: diff --git a/benchmarks/benchmark_serving_structured_output.py b/benchmarks/benchmark_serving_structured_output.py index 4aae755eb4e4..73b4aa5a87e0 100644 --- a/benchmarks/benchmark_serving_structured_output.py +++ b/benchmarks/benchmark_serving_structured_output.py @@ -696,11 +696,11 @@ def _eval_correctness_regex(expected, actual): return re.match(args.regex, actual) is not None def _eval_correctness(expected, actual): - if args.structure_type == "guided_json": + if args.structure_type == "json": return _eval_correctness_json(expected, actual) - elif args.structure_type == "guided_regex": + elif args.structure_type == "regex": return _eval_correctness_regex(expected, actual) - elif args.structure_type == "guided_choice": + elif args.structure_type == "choice": return _eval_correctness_choice(expected, actual) else: return None @@ -780,18 +780,18 @@ def main(args: argparse.Namespace): ) if args.dataset == "grammar": - args.structure_type = "guided_grammar" + args.structure_type = "grammar" elif args.dataset == "regex": - args.structure_type = "guided_regex" + args.structure_type = "regex" elif args.dataset == "choice": - args.structure_type = "guided_choice" + args.structure_type = "choice" else: - args.structure_type = "guided_json" + args.structure_type = "json" if args.no_structured_output: args.structured_output_ratio = 0 if args.save_results: - result_file_name = f"{args.structured_output_ratio}guided" + result_file_name = f"{args.structured_output_ratio}so" result_file_name += f"_{backend}" result_file_name += f"_{args.request_rate}qps" result_file_name += f"_{args.model.split('/')[-1]}" diff --git a/docs/api/README.md b/docs/api/README.md index 57142e8f5625..148211756480 100644 --- a/docs/api/README.md +++ b/docs/api/README.md @@ -14,7 +14,7 @@ API documentation for vLLM's configuration classes. - [vllm.config.LoRAConfig][] - [vllm.config.MultiModalConfig][] - [vllm.config.PoolerConfig][] -- [vllm.config.DecodingConfig][] +- [vllm.config.StructuredOutputsConfig][] - [vllm.config.ObservabilityConfig][] - [vllm.config.KVTransferConfig][] - [vllm.config.CompilationConfig][] diff --git a/docs/features/reasoning_outputs.md b/docs/features/reasoning_outputs.md index d518e7f0cff4..85681669dfb2 100644 --- a/docs/features/reasoning_outputs.md +++ b/docs/features/reasoning_outputs.md @@ -10,12 +10,12 @@ vLLM currently supports the following reasoning models: | Model Series | Parser Name | Structured Output Support | Tool Calling | |--------------|-------------|------------------|-------------| -| [DeepSeek R1 series](https://huggingface.co/collections/deepseek-ai/deepseek-r1-678e1e131c0169c0bc89728d) | `deepseek_r1` | `guided_json`, `guided_regex` | ❌ | -| [QwQ-32B](https://huggingface.co/Qwen/QwQ-32B) | `deepseek_r1` | `guided_json`, `guided_regex` | ✅ | +| [DeepSeek R1 series](https://huggingface.co/collections/deepseek-ai/deepseek-r1-678e1e131c0169c0bc89728d) | `deepseek_r1` | `json`, `regex` | ❌ | +| [QwQ-32B](https://huggingface.co/Qwen/QwQ-32B) | `deepseek_r1` | `json`, `regex` | ✅ | | [IBM Granite 3.2 language models](https://huggingface.co/collections/ibm-granite/granite-32-language-models-67b3bc8c13508f6d064cff9a) | `granite` | ❌ | ❌ | -| [Qwen3 series](https://huggingface.co/collections/Qwen/qwen3-67dd247413f0e2e4f653967f) | `qwen3` | `guided_json`, `guided_regex` | ✅ | -| [Hunyuan A13B series](https://huggingface.co/collections/tencent/hunyuan-a13b-685ec38e5b46321e3ea7c4be) | `hunyuan_a13b` | `guided_json`, `guided_regex` | ✅ | -| [GLM-4.5 series](https://huggingface.co/collections/zai-org/glm-45-687c621d34bda8c9e4bf503b) | `glm45` | `guided_json`, `guided_regex` | ✅ | +| [Qwen3 series](https://huggingface.co/collections/Qwen/qwen3-67dd247413f0e2e4f653967f) | `qwen3` | `json`, `regex` | ✅ | +| [Hunyuan A13B series](https://huggingface.co/collections/tencent/hunyuan-a13b-685ec38e5b46321e3ea7c4be) | `hunyuan_a13b` | `json`, `regex` | ✅ | +| [GLM-4.5 series](https://huggingface.co/collections/zai-org/glm-45-687c621d34bda8c9e4bf503b) | `glm45` | `json`, `regex` | ✅ | !!! note IBM Granite 3.2 reasoning is disabled by default; to enable it, you must also pass `thinking=True` in your `chat_template_kwargs`. diff --git a/docs/features/structured_outputs.md b/docs/features/structured_outputs.md index 0d6294a5fdd7..1f955c6e30d6 100644 --- a/docs/features/structured_outputs.md +++ b/docs/features/structured_outputs.md @@ -12,23 +12,23 @@ You can generate structured outputs using the OpenAI's [Completions](https://pla The following parameters are supported, which must be added as extra parameters: -- `guided_choice`: the output will be exactly one of the choices. -- `guided_regex`: the output will follow the regex pattern. -- `guided_json`: the output will follow the JSON schema. -- `guided_grammar`: the output will follow the context free grammar. +- `choice`: the output will be exactly one of the choices. +- `regex`: the output will follow the regex pattern. +- `json`: the output will follow the JSON schema. +- `grammar`: the output will follow the context free grammar. - `structural_tag`: Follow a JSON schema within a set of specified tags within the generated text. You can see the complete list of supported parameters on the [OpenAI-Compatible Server](../serving/openai_compatible_server.md) page. Structured outputs are supported by default in the OpenAI-Compatible Server. You may choose to specify the backend to use by setting the -`--guided-decoding-backend` flag to `vllm serve`. The default backend is `auto`, +`--structured-outputs-config.backend` flag to `vllm serve`. The default backend is `auto`, which will try to choose an appropriate backend based on the details of the request. You may also choose a specific backend, along with some options. A full set of options is available in the `vllm serve --help` text. -Now let´s see an example for each of the cases, starting with the `guided_choice`, as it´s the easiest one: +Now let´s see an example for each of the cases, starting with the `choice`, as it´s the easiest one: ??? code @@ -45,12 +45,12 @@ Now let´s see an example for each of the cases, starting with the `guided_choic messages=[ {"role": "user", "content": "Classify this sentiment: vLLM is wonderful!"} ], - extra_body={"guided_choice": ["positive", "negative"]}, + extra_body={"structured_outputs": {"choice": ["positive", "negative"]}}, ) print(completion.choices[0].message.content) ``` -The next example shows how to use the `guided_regex`. The idea is to generate an email address, given a simple regex template: +The next example shows how to use the `regex`. The idea is to generate an email address, given a simple regex template: ??? code @@ -63,18 +63,18 @@ The next example shows how to use the `guided_regex`. The idea is to generate an "content": "Generate an example email address for Alan Turing, who works in Enigma. End in .com and new line. Example result: alan.turing@enigma.com\n", } ], - extra_body={"guided_regex": r"\w+@\w+\.com\n", "stop": ["\n"]}, + extra_body={"structured_outputs": {"regex": r"\w+@\w+\.com\n"}, "stop": ["\n"]}, ) print(completion.choices[0].message.content) ``` One of the most relevant features in structured text generation is the option to generate a valid JSON with pre-defined fields and formats. -For this we can use the `guided_json` parameter in two different ways: +For this we can use the `json` parameter in two different ways: - Using directly a [JSON Schema](https://json-schema.org/) - Defining a [Pydantic model](https://docs.pydantic.dev/latest/) and then extracting the JSON Schema from it (which is normally an easier option). -The next example shows how to use the `guided_json` parameter with a Pydantic model: +The next example shows how to use the `response_format` parameter with a Pydantic model: ??? code @@ -119,7 +119,7 @@ The next example shows how to use the `guided_json` parameter with a Pydantic mo JSON schema and how the fields should be populated. This can improve the results notably in most cases. -Finally we have the `guided_grammar` option, which is probably the most +Finally we have the `grammar` option, which is probably the most difficult to use, but it´s really powerful. It allows us to define complete languages like SQL queries. It works by using a context free EBNF grammar. As an example, we can use to define a specific format of simplified SQL queries: @@ -149,7 +149,7 @@ As an example, we can use to define a specific format of simplified SQL queries: "content": "Generate an SQL query to show the 'username' and 'email' from the 'users' table.", } ], - extra_body={"guided_grammar": simplified_sql_grammar}, + extra_body={"structured_outputs": {"grammar": simplified_sql_grammar}}, ) print(completion.choices[0].message.content) ``` @@ -292,8 +292,8 @@ An example of using `structural_tag` can be found here: int: top_p=0.95, n=n, seed=seed, - guided_decoding=GuidedDecodingParams( + structured_outputs=StructuredOutputsParams( regex="[0-9]+") if structured_outputs else None, ) for n in n_list ], n_list diff --git a/tests/v1/entrypoints/conftest.py b/tests/v1/entrypoints/conftest.py index ffe061212466..46b953fe3743 100644 --- a/tests/v1/entrypoints/conftest.py +++ b/tests/v1/entrypoints/conftest.py @@ -151,7 +151,7 @@ def sample_definition_json_schema(): @pytest.fixture -def sample_guided_choice(): +def sample_structured_outputs_choices(): return [ "Python", "Java", "JavaScript", "C++", "C#", "PHP", "TypeScript", "Ruby", "Swift", "Kotlin" diff --git a/tests/v1/entrypoints/llm/test_struct_output_generate.py b/tests/v1/entrypoints/llm/test_struct_output_generate.py index ad62914195b4..4b0f3b2d9967 100644 --- a/tests/v1/entrypoints/llm/test_struct_output_generate.py +++ b/tests/v1/entrypoints/llm/test_struct_output_generate.py @@ -15,12 +15,13 @@ from pydantic import BaseModel from tests.reasoning.utils import run_reasoning_extraction +from vllm.config import StructuredOutputsConfig from vllm.distributed import cleanup_dist_env_and_memory from vllm.entrypoints.llm import LLM from vllm.outputs import RequestOutput from vllm.platforms import current_platform from vllm.reasoning.abs_reasoning_parsers import ReasoningParserManager -from vllm.sampling_params import GuidedDecodingParams, SamplingParams +from vllm.sampling_params import SamplingParams, StructuredOutputsParams if TYPE_CHECKING: from vllm.config import TokenizerMode @@ -90,7 +91,7 @@ def _load_json(s: str, backend: str) -> str: @pytest.mark.skip_global_cleanup @pytest.mark.parametrize( - "model_name, guided_decoding_backend, tokenizer_mode, speculative_config", + "model_name, backend, tokenizer_mode, speculative_config", PARAMS_MODELS_BACKENDS_TOKENIZER_MODE) def test_structured_output( monkeypatch: pytest.MonkeyPatch, @@ -99,8 +100,8 @@ def test_structured_output( sample_sql_ebnf: str, sample_sql_lark: str, sample_regex: str, - sample_guided_choice: str, - guided_decoding_backend: str, + sample_structured_outputs_choices: str, + backend: str, tokenizer_mode: str, model_name: str, speculative_config: dict[str, Any], @@ -115,16 +116,15 @@ def test_structured_output( enforce_eager = bool(not current_platform.is_tpu()) # Use a single LLM instance for several scenarios to # speed up the test suite. - llm = LLM( - model=model_name, - enforce_eager=enforce_eager, - max_model_len=1024, - guided_decoding_backend=guided_decoding_backend, - guided_decoding_disable_any_whitespace=(guided_decoding_backend - in {"xgrammar", "guidance"}), - seed=120, - tokenizer_mode=tokenizer_mode, - speculative_config=speculative_config) + llm = LLM(model=model_name, + enforce_eager=enforce_eager, + max_model_len=1024, + structured_outputs_config=dict(backend=backend, + disable_any_whitespace=backend + in {"xgrammar", "guidance"}), + seed=120, + tokenizer_mode=tokenizer_mode, + speculative_config=speculative_config) # # Test 1: Generate JSON output based on a provided schema @@ -132,7 +132,7 @@ def test_structured_output( sampling_params = SamplingParams( temperature=1.0, max_tokens=4096, - guided_decoding=GuidedDecodingParams(json=sample_json_schema)) + structured_outputs=StructuredOutputsParams(json=sample_json_schema)) prompt = ("Give an example JSON for an employee profile that fits this " "schema. Make the response as short as possible. Schema: " @@ -152,7 +152,7 @@ def test_structured_output( generated_text = output.outputs[0].text assert generated_text is not None - if guided_decoding_backend != 'lm-format-enforcer': + if backend != 'lm-format-enforcer': assert "\n" not in generated_text print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}") output_json = json.loads(generated_text) @@ -161,12 +161,12 @@ def test_structured_output( # # Test 2: Generate JSON object without a schema # - if guided_decoding_backend != "outlines": + if backend != "outlines": sampling_params = SamplingParams( temperature=1.0, max_tokens=4096, n=2, - guided_decoding=GuidedDecodingParams(json_object=True)) + structured_outputs=StructuredOutputsParams(json_object=True)) outputs = llm.generate(prompts=( "Generate a JSON object with curly braces for a person with " @@ -195,8 +195,9 @@ def test_structured_output( sampling_params = SamplingParams( temperature=1.0, max_tokens=4096, - guided_decoding=GuidedDecodingParams(json=unsupported_json_schema)) - if guided_decoding_backend.startswith("xgrammar"): + structured_outputs=StructuredOutputsParams( + json=unsupported_json_schema)) + if backend.startswith("xgrammar"): with pytest.raises(ValueError, match="The provided JSON schema contains features " "not supported by xgrammar."): @@ -230,7 +231,7 @@ def test_structured_output( parsed_json = json.loads(generated_text) assert isinstance(parsed_json, dict) - if guided_decoding_backend not in ["outlines", "lm-format-enforcer"]: + if backend not in ["outlines", "lm-format-enforcer"]: # # Test 4: Generate SQL statement using EBNF grammar # @@ -238,7 +239,8 @@ def test_structured_output( temperature=0.8, top_p=0.95, max_tokens=1000, - guided_decoding=GuidedDecodingParams(grammar=sample_sql_ebnf)) + structured_outputs=StructuredOutputsParams( + grammar=sample_sql_ebnf)) outputs = llm.generate( ("Generate a sql statement that selects col_1 from " "table_1 where it is equal to 1. Make the response as short as " @@ -271,7 +273,8 @@ def test_structured_output( temperature=0.8, top_p=0.95, max_tokens=1000, - guided_decoding=GuidedDecodingParams(grammar=sample_sql_lark)) + structured_outputs=StructuredOutputsParams( + grammar=sample_sql_lark)) outputs = llm.generate( ("Generate a sql statement that selects col_1 from " "table_1 where it is equal to 1. Make the response as short as " @@ -309,7 +312,8 @@ def test_structured_output( temperature=0.8, top_p=0.95, max_tokens=1000, - guided_decoding=GuidedDecodingParams(grammar="not a grammar")) + structured_outputs=StructuredOutputsParams( + grammar="not a grammar")) with pytest.raises(ValueError, match="Failed to convert the grammar "): llm.generate( ("Generate a sql statement that selects col_1 from " @@ -325,7 +329,7 @@ def test_structured_output( sampling_params = SamplingParams( temperature=0.8, top_p=0.95, - guided_decoding=GuidedDecodingParams(regex=sample_regex)) + structured_outputs=StructuredOutputsParams(regex=sample_regex)) prompt = (f"Give an example IPv4 address with this regex: {sample_regex}. " f"Make the response as short as possible.") @@ -352,7 +356,8 @@ def test_structured_output( sampling_params = SamplingParams( temperature=0.8, top_p=0.95, - guided_decoding=GuidedDecodingParams(choice=sample_guided_choice)) + structured_outputs=StructuredOutputsParams( + choice=sample_structured_outputs_choices)) outputs = llm.generate( ("The best language for type-safe systems programming is " @@ -368,7 +373,7 @@ def test_structured_output( generated_text = output.outputs[0].text print(generated_text) assert generated_text is not None - assert generated_text in sample_guided_choice + assert generated_text in sample_structured_outputs_choices print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}") # @@ -378,7 +383,7 @@ def test_structured_output( sampling_params = SamplingParams( temperature=1.0, max_tokens=1000, - guided_decoding=GuidedDecodingParams(json=json_schema)) + structured_outputs=StructuredOutputsParams(json=json_schema)) outputs = llm.generate( ("Generate a JSON with the brand, model and car_type of the most " @@ -422,7 +427,7 @@ def test_structured_output( sampling_params = SamplingParams( temperature=1.0, max_tokens=4096, - guided_decoding=GuidedDecodingParams(json=json_schema)) + structured_outputs=StructuredOutputsParams(json=json_schema)) outputs = llm.generate( ("Generate a description of a frog using 50 characters. " @@ -444,7 +449,7 @@ def test_structured_output( output_json = json.loads(generated_text) jsonschema.validate(instance=output_json, schema=json_schema) - if guided_decoding_backend not in ["outlines", "lm-format-enforcer"]: + if backend not in ["outlines", "lm-format-enforcer"]: # # Test 11: Generate structured output using structural_tag format # @@ -470,7 +475,7 @@ def test_structured_output( sampling_params = SamplingParams( temperature=0.0, max_tokens=4096, - guided_decoding=GuidedDecodingParams( + structured_outputs=StructuredOutputsParams( structural_tag=json.dumps(structural_tag_config))) prompt = """ @@ -547,7 +552,7 @@ def test_structured_output( @pytest.mark.skip_global_cleanup @pytest.mark.parametrize( - "model_name, guided_decoding_backend, tokenizer_mode, reasoning_parser, speculative_config", # noqa: E501 + "model_name, backend, tokenizer_mode, reasoning_parser, speculative_config", # noqa: E501 [ ("deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", "xgrammar", "auto", "deepseek_r1", NGRAM_SPEC_CONFIG), @@ -556,7 +561,7 @@ def test_structured_output( ) def test_structured_output_with_reasoning_matrices( monkeypatch: pytest.MonkeyPatch, - guided_decoding_backend: str, + backend: str, tokenizer_mode: TokenizerMode, reasoning_parser: str, model_name: str, @@ -576,10 +581,11 @@ def test_structured_output_with_reasoning_matrices( enforce_eager=bool(not current_platform.is_tpu()), max_model_len=1024, max_num_seqs=16, - guided_decoding_backend=guided_decoding_backend, - guided_decoding_disable_any_whitespace=True, + structured_outputs_config=dict(backend=backend, + disable_any_whitespace=backend + in {"xgrammar", "guidance"}, + reasoning_parser=reasoning_parser), tokenizer_mode=tokenizer_mode, - reasoning_parser=reasoning_parser, speculative_config=speculative_config, ) tokenizer = llm.get_tokenizer() @@ -603,7 +609,7 @@ def test_structured_output_with_reasoning_matrices( sampling_params = SamplingParams( temperature=0.1, max_tokens=8192, - guided_decoding=GuidedDecodingParams(json=reasoning_schema), + structured_outputs=StructuredOutputsParams(json=reasoning_schema), ) outputs = llm.generate( [reasoning_prompt], @@ -640,13 +646,14 @@ def test_structured_output_auto_mode( llm = LLM(model=model_name, max_model_len=1024, - guided_decoding_backend="auto", + structured_outputs_config=dict(backend="auto"), tokenizer_mode=tokenizer_mode) sampling_params = SamplingParams( temperature=1.0, max_tokens=1000, - guided_decoding=GuidedDecodingParams(json=unsupported_json_schema)) + structured_outputs=StructuredOutputsParams( + json=unsupported_json_schema)) prompts = ( "Give an example JSON object for a grade " @@ -681,9 +688,10 @@ def test_guidance_no_additional_properties(monkeypatch: pytest.MonkeyPatch): llm = LLM(model="Qwen/Qwen2.5-1.5B-Instruct", max_model_len=1024, - guided_decoding_backend="guidance", - guided_decoding_disable_any_whitespace=True, - guided_decoding_disable_additional_properties=True) + structured_outputs_config=dict( + backend="guidance", + disable_any_whitespace=True, + disable_additional_properties=True)) schema = { 'type': 'object', @@ -709,14 +717,15 @@ def test_guidance_no_additional_properties(monkeypatch: pytest.MonkeyPatch): "<|im_end|>\n<|im_start|>assistant\n") def generate_with_backend(backend): - guided_params = GuidedDecodingParams( + structured_outputs_params = StructuredOutputsParams( json=schema, backend=backend, disable_any_whitespace=True, disable_additional_properties=True) - sampling_params = SamplingParams(temperature=0, - max_tokens=256, - guided_decoding=guided_params) + sampling_params = SamplingParams( + temperature=0, + max_tokens=256, + structured_outputs=structured_outputs_params) outputs = llm.generate(prompt, sampling_params=sampling_params) assert outputs is not None @@ -736,12 +745,11 @@ def generate_with_backend(backend): assert "a6" not in generated -@pytest.mark.parametrize("guided_decoding_backend", - ["guidance", "xgrammar", "outlines"]) -def test_structured_output_batched_with_non_guided_requests( +@pytest.mark.parametrize("backend", ["guidance", "xgrammar", "outlines"]) +def test_structured_output_batched_with_non_structured_outputs_requests( monkeypatch: pytest.MonkeyPatch, sample_json_schema: dict[str, Any], - guided_decoding_backend: str, + backend: str, ): monkeypatch.setenv("VLLM_USE_V1", "1") @@ -753,24 +761,25 @@ def test_structured_output_batched_with_non_guided_requests( model="meta-llama/Meta-Llama-3.1-8B-Instruct", enforce_eager=enforce_eager, max_model_len=1024, - guided_decoding_backend=guided_decoding_backend, - guided_decoding_disable_any_whitespace=(guided_decoding_backend - in {"xgrammar", "guidance"}), + structured_outputs_config=StructuredOutputsConfig( + backend=backend, + disable_any_whitespace=backend in {"xgrammar", "guidance"}, + ), ) - guided_prompt = ( + structured_outputs_prompt = ( "Give an example JSON for an employee profile that fits this " "schema. Make the response as short as possible. Schema: " f"{sample_json_schema}") - non_guided_prompt = "The diameter of the Earth in kilometers is " + non_structured_outputs_prompt = "The diameter of the Earth in kilometers is " - prompts = [guided_prompt, non_guided_prompt] + prompts = [structured_outputs_prompt, non_structured_outputs_prompt] sampling_params = [ - SamplingParams( - temperature=1.0, - max_tokens=400, - guided_decoding=GuidedDecodingParams(json=sample_json_schema)), + SamplingParams(temperature=1.0, + max_tokens=400, + structured_outputs=StructuredOutputsParams( + json=sample_json_schema)), # No max tokens, temp=0 to assert on contents SamplingParams( seed=42, @@ -801,16 +810,16 @@ def test_structured_output_batched_with_non_guided_requests( print(f"Prompt:\n{prompt!r}\nGenerated text:\n{generated_text!r}") if index == 0: - # First prompt is guided, expect valid JSON + # First prompt is structured outputs, expect valid JSON assert "\n" not in generated_text output_json = json.loads(generated_text) jsonschema.validate(instance=output_json, schema=sample_json_schema) else: - # Second prompt is not guided, expect valid output + # Second prompt is not structured outputs, expect valid output # Cannot assert on exact output, but we can expect it to be factual assert "12,742" in generated_text - # non-guided requests should not return a valid JSON here + # non-structured outputs requests should not return a valid JSON here with pytest.raises(ValueError): output_json = json.loads(generated_text) diff --git a/tests/v1/entrypoints/openai/test_chat_completion.py b/tests/v1/entrypoints/openai/test_chat_completion.py index dffb32846c05..9aa285aa9b18 100644 --- a/tests/v1/entrypoints/openai/test_chat_completion.py +++ b/tests/v1/entrypoints/openai/test_chat_completion.py @@ -77,7 +77,9 @@ async def test_invalid_json_schema(client: openai.AsyncOpenAI, "role": "user", "content": prompt, }], - extra_body={"guided_json": invalid_json_schema}, + extra_body={"structured_outputs": { + "json": invalid_json_schema + }}, ) @@ -99,7 +101,9 @@ async def test_invalid_regex(client: openai.AsyncOpenAI, model_name: str): "content": prompt, }], extra_body={ - "guided_regex": r"[.*", + "structured_outputs": { + "regex": r"[.*" + }, "stop": ["\n"] }, ) @@ -134,5 +138,9 @@ async def test_invalid_grammar(client: openai.AsyncOpenAI, model_name: str): "role": "user", "content": prompt, }], - extra_body={"guided_grammar": invalid_simplified_sql_grammar}, + extra_body={ + "structured_outputs": { + "grammar": invalid_simplified_sql_grammar + } + }, ) diff --git a/tests/v1/entrypoints/openai/test_completion.py b/tests/v1/entrypoints/openai/test_completion.py index 3114d7639f04..9090beb4bbd2 100644 --- a/tests/v1/entrypoints/openai/test_completion.py +++ b/tests/v1/entrypoints/openai/test_completion.py @@ -627,7 +627,9 @@ async def test_invalid_json_schema(client: openai.AsyncOpenAI, await client.completions.create( model=model_name, prompt=prompt, - extra_body={"guided_json": invalid_json_schema}, + extra_body={"structured_outputs": { + "json": invalid_json_schema + }}, ) @@ -646,7 +648,9 @@ async def test_invalid_regex(client: openai.AsyncOpenAI, model_name: str): model=model_name, prompt=prompt, extra_body={ - "guided_regex": r"[.*", + "structured_outputs": { + "regex": r"[.*" + }, "stop": ["\n"] }, ) @@ -678,7 +682,11 @@ async def test_invalid_grammar(client: openai.AsyncOpenAI, model_name: str): await client.completions.create( model=model_name, prompt=prompt, - extra_body={"guided_grammar": invalid_simplified_sql_grammar}, + extra_body={ + "structured_outputs": { + "grammar": invalid_simplified_sql_grammar + } + }, ) diff --git a/vllm/config/__init__.py b/vllm/config/__init__.py index 631618d427d4..9a1c5f0b0d45 100644 --- a/vllm/config/__init__.py +++ b/vllm/config/__init__.py @@ -2277,34 +2277,34 @@ def get_served_model_name(model: str, return served_model_name -GuidedDecodingBackend = Literal["auto", "xgrammar", "guidance", "outlines", - "lm-format-enforcer"] +StructuredOutputsBackend = Literal["auto", "xgrammar", "guidance", "outlines", + "lm-format-enforcer"] @config @dataclass -class DecodingConfig: - """Dataclass which contains the decoding strategy of the engine.""" +class StructuredOutputsConfig: + """Dataclass which contains structured outputs config for the engine.""" - backend: GuidedDecodingBackend = "auto" - """Which engine will be used for guided decoding (JSON schema / regex etc) - by default. With "auto", we will make opinionated choices based on request - contents and what the backend libraries currently support, so the behavior - is subject to change in each release.""" + backend: StructuredOutputsBackend = "auto" + """Which engine will be used for structured outputs (e.g. JSON schema, + regex, etc) by default. With "auto", we will make opinionated choices + based on request contents and what the backend libraries currently support, + so the behavior is subject to change in each release.""" disable_fallback: bool = False """If `True`, vLLM will not fallback to a different backend on error.""" disable_any_whitespace: bool = False - """If `True`, the model will not generate any whitespace during guided - decoding. This is only supported for xgrammar and guidance backends.""" + """If `True`, the model will not generate any whitespace during structured + outputs. This is only supported for xgrammar and guidance backends.""" disable_additional_properties: bool = False """If `True`, the `guidance` backend will not use `additionalProperties` in the JSON schema. This is only supported for the `guidance` backend and is used to better align its behaviour with `outlines` and `xgrammar`.""" - reasoning_backend: str = "" + reasoning_parser: str = "" """Select the reasoning parser depending on the model that you're using. This is used to parse the reasoning content into OpenAI API format.""" @@ -2451,8 +2451,9 @@ class VllmConfig: """LoRA configuration.""" speculative_config: Optional[SpeculativeConfig] = None """Speculative decoding configuration.""" - decoding_config: DecodingConfig = field(default_factory=DecodingConfig) - """Decoding configuration.""" + structured_outputs_config: StructuredOutputsConfig = field( + default_factory=StructuredOutputsConfig) + """Structured outputs configuration.""" observability_config: Optional[ObservabilityConfig] = None """Observability configuration.""" quant_config: Optional[QuantizationConfig] = None @@ -2543,8 +2544,8 @@ def compute_hash(self) -> str: vllm_factors.append(self.speculative_config.compute_hash()) else: vllm_factors.append("None") - if self.decoding_config: - vllm_factors.append(self.decoding_config.compute_hash()) + if self.structured_outputs_config: + vllm_factors.append(self.structured_outputs_config.compute_hash()) else: vllm_factors.append("None") if self.observability_config: @@ -3063,7 +3064,7 @@ def __str__(self): f"enforce_eager={self.model_config.enforce_eager}, " f"kv_cache_dtype={self.cache_config.cache_dtype}, " f"device_config={self.device_config.device}, " - f"decoding_config={self.decoding_config!r}, " + f"structured_outputs_config={self.structured_outputs_config!r}, " f"observability_config={self.observability_config!r}, " f"seed={self.model_config.seed}, " f"served_model_name={self.model_config.served_model_name}, " diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index e2a1ec68e6f5..fb5beab77b27 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -22,17 +22,16 @@ import vllm.envs as envs from vllm.config import (BlockSize, CacheConfig, CacheDType, CompilationConfig, - ConfigType, ConvertOption, DecodingConfig, - DetailedTraceModules, Device, DeviceConfig, - DistributedExecutorBackend, EPLBConfig, - GuidedDecodingBackend, HfOverrides, KVEventsConfig, + ConfigType, ConvertOption, DetailedTraceModules, + Device, DeviceConfig, DistributedExecutorBackend, + EPLBConfig, HfOverrides, KVEventsConfig, KVTransferConfig, LoadConfig, LogprobsMode, LoRAConfig, MambaDType, MMEncoderTPMode, ModelConfig, ModelDType, ModelImpl, ObservabilityConfig, ParallelConfig, PoolerConfig, PrefixCachingHashAlgo, RunnerOption, SchedulerConfig, SchedulerPolicy, - SpeculativeConfig, TaskOption, TokenizerMode, - VllmConfig, get_attr_docs) + SpeculativeConfig, StructuredOutputsConfig, + TaskOption, TokenizerMode, VllmConfig, get_attr_docs) from vllm.config.multimodal import MMCacheType, MultiModalConfig from vllm.config.parallel import ExpertPlacementStrategy from vllm.config.utils import get_field @@ -418,12 +417,15 @@ class EngineArgs: disable_hybrid_kv_cache_manager: bool = ( SchedulerConfig.disable_hybrid_kv_cache_manager) - guided_decoding_backend: GuidedDecodingBackend = DecodingConfig.backend - guided_decoding_disable_fallback: bool = DecodingConfig.disable_fallback - guided_decoding_disable_any_whitespace: bool = \ - DecodingConfig.disable_any_whitespace - guided_decoding_disable_additional_properties: bool = \ - DecodingConfig.disable_additional_properties + structured_outputs_config: StructuredOutputsConfig = get_field( + VllmConfig, "structured_outputs_config") + reasoning_parser: str = StructuredOutputsConfig.reasoning_parser + # Deprecated guided decoding fields + guided_decoding_backend: Optional[str] = None + guided_decoding_disable_fallback: Optional[bool] = None + guided_decoding_disable_any_whitespace: Optional[bool] = None + guided_decoding_disable_additional_properties: Optional[bool] = None + logits_processor_pattern: Optional[ str] = ModelConfig.logits_processor_pattern @@ -462,7 +464,6 @@ class EngineArgs: additional_config: dict[str, Any] = \ get_field(VllmConfig, "additional_config") - reasoning_parser: str = DecodingConfig.reasoning_backend use_tqdm_on_load: bool = LoadConfig.use_tqdm_on_load pt_load_map_location: str = LoadConfig.pt_load_map_location @@ -618,28 +619,29 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser: load_group.add_argument('--pt-load-map-location', **load_kwargs["pt_load_map_location"]) - # Guided decoding arguments - guided_decoding_kwargs = get_kwargs(DecodingConfig) - guided_decoding_group = parser.add_argument_group( - title="DecodingConfig", - description=DecodingConfig.__doc__, + # Structured outputs arguments + structured_outputs_kwargs = get_kwargs(StructuredOutputsConfig) + structured_outputs_group = parser.add_argument_group( + title="StructuredOutputsConfig", + description=StructuredOutputsConfig.__doc__, ) - guided_decoding_group.add_argument("--guided-decoding-backend", - **guided_decoding_kwargs["backend"]) - guided_decoding_group.add_argument( - "--guided-decoding-disable-fallback", - **guided_decoding_kwargs["disable_fallback"]) - guided_decoding_group.add_argument( - "--guided-decoding-disable-any-whitespace", - **guided_decoding_kwargs["disable_any_whitespace"]) - guided_decoding_group.add_argument( - "--guided-decoding-disable-additional-properties", - **guided_decoding_kwargs["disable_additional_properties"]) - guided_decoding_group.add_argument( + structured_outputs_group.add_argument( "--reasoning-parser", # This choice is a special case because it's not static choices=list(ReasoningParserManager.reasoning_parsers), - **guided_decoding_kwargs["reasoning_backend"]) + **structured_outputs_kwargs["reasoning_parser"]) + # Deprecated guided decoding arguments + for arg, type in [ + ("--guided-decoding-backend", str), + ("--guided-decoding-disable-fallback", bool), + ("--guided-decoding-disable-any-whitespace", bool), + ("--guided-decoding-disable-additional-properties", bool), + ]: + structured_outputs_group.add_argument( + arg, + type=type, + help=(f"[DEPRECATED] {arg} will be removed in v0.12.0."), + deprecated=True) # Parallel arguments parallel_kwargs = get_kwargs(ParallelConfig) @@ -934,6 +936,8 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser: **vllm_kwargs["compilation_config"]) vllm_group.add_argument("--additional-config", **vllm_kwargs["additional_config"]) + vllm_group.add_argument('--structured-outputs-config', + **vllm_kwargs["structured_outputs_config"]) # Other arguments parser.add_argument('--disable-log-stats', @@ -1421,14 +1425,25 @@ def create_engine_config( load_config = self.create_load_config() - decoding_config = DecodingConfig( - backend=self.guided_decoding_backend, - disable_fallback=self.guided_decoding_disable_fallback, - disable_any_whitespace=self.guided_decoding_disable_any_whitespace, - disable_additional_properties=\ - self.guided_decoding_disable_additional_properties, - reasoning_backend=self.reasoning_parser - ) + # Pass reasoning_parser into StructuredOutputsConfig + if self.reasoning_parser: + self.structured_outputs_config.reasoning_parser = \ + self.reasoning_parser + + # Forward the deprecated CLI args to the StructuredOutputsConfig + so_config = self.structured_outputs_config + if self.guided_decoding_backend is not None: + so_config.guided_decoding_backend = \ + self.guided_decoding_backend + if self.guided_decoding_disable_fallback is not None: + so_config.guided_decoding_disable_fallback = \ + self.guided_decoding_disable_fallback + if self.guided_decoding_disable_any_whitespace is not None: + so_config.guided_decoding_disable_any_whitespace = \ + self.guided_decoding_disable_any_whitespace + if self.guided_decoding_disable_additional_properties is not None: + so_config.guided_decoding_disable_additional_properties = \ + self.guided_decoding_disable_additional_properties observability_config = ObservabilityConfig( show_hidden_metrics_for_version=( @@ -1446,7 +1461,7 @@ def create_engine_config( lora_config=lora_config, speculative_config=speculative_config, load_config=load_config, - decoding_config=decoding_config, + structured_outputs_config=self.structured_outputs_config, observability_config=observability_config, compilation_config=self.compilation_config, kv_transfer_config=self.kv_transfer_config, diff --git a/vllm/engine/async_llm_engine.py b/vllm/engine/async_llm_engine.py index 1ae82c9f6f6f..6793041abc50 100644 --- a/vllm/engine/async_llm_engine.py +++ b/vllm/engine/async_llm_engine.py @@ -10,9 +10,8 @@ from weakref import ReferenceType import vllm.envs as envs -from vllm.config import (DecodingConfig, ModelConfig, ParallelConfig, +from vllm.config import (LoRAConfig, ModelConfig, ParallelConfig, SchedulerConfig, VllmConfig) -from vllm.config.lora import LoRAConfig from vllm.core.scheduler import SchedulerOutputs from vllm.engine.arg_utils import AsyncEngineArgs from vllm.engine.async_timeout import asyncio_timeout @@ -955,10 +954,6 @@ async def get_parallel_config(self) -> ParallelConfig: """Get the parallel configuration of the vLLM engine.""" return self.engine.get_parallel_config() - async def get_decoding_config(self) -> DecodingConfig: - """Get the decoding configuration of the vLLM engine.""" - return self.engine.get_decoding_config() - async def get_scheduler_config(self) -> SchedulerConfig: """Get the scheduling configuration of the vLLM engine.""" return self.engine.get_scheduler_config() diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py index 34b5dcb58750..708f3bbeeff1 100644 --- a/vllm/engine/llm_engine.py +++ b/vllm/engine/llm_engine.py @@ -16,9 +16,8 @@ from typing_extensions import TypeVar import vllm.envs as envs -from vllm.config import (DecodingConfig, ModelConfig, ObservabilityConfig, +from vllm.config import (LoRAConfig, ModelConfig, ObservabilityConfig, ParallelConfig, SchedulerConfig, VllmConfig) -from vllm.config.lora import LoRAConfig from vllm.core.scheduler import ScheduledSequenceGroup, SchedulerOutputs from vllm.engine.arg_utils import EngineArgs from vllm.engine.metrics_types import StatLoggerBase, Stats @@ -213,8 +212,7 @@ def __init__( self.device_config = vllm_config.device_config self.speculative_config = vllm_config.speculative_config # noqa self.load_config = vllm_config.load_config - self.decoding_config = vllm_config.decoding_config or DecodingConfig( # noqa - ) + self.structured_outputs_config = vllm_config.structured_outputs_config self.observability_config = vllm_config.observability_config or ObservabilityConfig( # noqa ) @@ -364,10 +362,9 @@ def __init__( self.observability_config.otlp_traces_endpoint) # Initialize reasoning parser if reasoning backend is set. - if self.decoding_config.reasoning_backend and \ - self.tokenizer: + if self.structured_outputs_config.reasoning_parser and self.tokenizer: reasoner_class = ReasoningParserManager.get_reasoning_parser( - self.decoding_config.reasoning_backend) + self.structured_outputs_config.reasoning_parser) self.reasoner: ReasoningParser = reasoner_class( self.tokenizer.get_lora_tokenizer()) @@ -381,7 +378,8 @@ def __init__( self.seq_counter, stop_checker=StopChecker( self.scheduler_config.max_model_len, - self.reasoner if self.decoding_config.reasoning_backend + self.reasoner + if self.structured_outputs_config.reasoning_parser and self.tokenizer else None, ), )) @@ -772,10 +770,6 @@ def get_parallel_config(self) -> ParallelConfig: """Gets the parallel configuration.""" return self.parallel_config - def get_decoding_config(self) -> DecodingConfig: - """Gets the decoding configuration.""" - return self.decoding_config - def get_scheduler_config(self) -> SchedulerConfig: """Gets the scheduler configuration.""" return self.scheduler_config diff --git a/vllm/engine/protocol.py b/vllm/engine/protocol.py index 808d2d0ce3d2..c345f17e6614 100644 --- a/vllm/engine/protocol.py +++ b/vllm/engine/protocol.py @@ -6,7 +6,7 @@ from typing import Any, AsyncGenerator, Iterable, Mapping, Optional, Union from vllm.beam_search import BeamSearchSequence, create_sort_beams_key_function -from vllm.config import DecodingConfig, ModelConfig, VllmConfig +from vllm.config import ModelConfig, VllmConfig from vllm.core.scheduler import SchedulerOutputs from vllm.inputs.data import PromptType, TokensPrompt from vllm.inputs.parse import is_explicit_encoder_decoder_prompt @@ -248,11 +248,6 @@ async def get_model_config(self) -> ModelConfig: """Get the model configuration of the vLLM engine.""" ... - @abstractmethod - async def get_decoding_config(self) -> DecodingConfig: - """Get the decoding configuration of the vLLM engine.""" - ... - @abstractmethod async def get_input_preprocessor(self) -> InputPreprocessor: """Get the input processor of the vLLM engine.""" diff --git a/vllm/entrypoints/llm.py b/vllm/entrypoints/llm.py index f2264292fa66..63e9478612bb 100644 --- a/vllm/entrypoints/llm.py +++ b/vllm/entrypoints/llm.py @@ -15,8 +15,8 @@ from vllm.beam_search import (BeamSearchInstance, BeamSearchOutput, BeamSearchSequence, create_sort_beams_key_function) -from vllm.config import (CompilationConfig, ModelDType, TokenizerMode, - is_init_field) +from vllm.config import (CompilationConfig, ModelDType, + StructuredOutputsConfig, TokenizerMode, is_init_field) from vllm.engine.arg_utils import (ConvertOption, EngineArgs, HfOverrides, PoolerConfig, RunnerOption) from vllm.engine.llm_engine import LLMEngine @@ -192,6 +192,8 @@ def __init__( hf_overrides: Optional[HfOverrides] = None, mm_processor_kwargs: Optional[dict[str, Any]] = None, override_pooler_config: Optional[PoolerConfig] = None, + structured_outputs_config: Optional[Union[dict[ + str, Any], StructuredOutputsConfig]] = None, kv_cache_memory_bytes: Optional[int] = None, compilation_config: Optional[Union[int, dict[str, Any], CompilationConfig]] = None, @@ -236,14 +238,30 @@ def __init__( compilation_config_instance = CompilationConfig( level=compilation_config) elif isinstance(compilation_config, dict): - predicate = lambda x: is_init_field(CompilationConfig, x[0]) compilation_config_instance = CompilationConfig( - **dict(filter(predicate, compilation_config.items()))) + **{ + k: v + for k, v in compilation_config.items() + if is_init_field(CompilationConfig, k) + }) else: compilation_config_instance = compilation_config else: compilation_config_instance = CompilationConfig() + if structured_outputs_config is not None: + if isinstance(structured_outputs_config, dict): + structured_outputs_instance = StructuredOutputsConfig( + **{ + k: v + for k, v in structured_outputs_config.items() + if is_init_field(StructuredOutputsConfig, k) + }) + else: + structured_outputs_instance = structured_outputs_config + else: + structured_outputs_instance = StructuredOutputsConfig() + engine_args = EngineArgs( model=model, runner=runner, @@ -271,6 +289,7 @@ def __init__( hf_overrides=hf_overrides, mm_processor_kwargs=mm_processor_kwargs, override_pooler_config=override_pooler_config, + structured_outputs_config=structured_outputs_instance, compilation_config=compilation_config_instance, logits_processors=logits_processors, **kwargs, diff --git a/vllm/entrypoints/openai/api_server.py b/vllm/entrypoints/openai/api_server.py index c07e95e9370a..93ea846f26f6 100644 --- a/vllm/entrypoints/openai/api_server.py +++ b/vllm/entrypoints/openai/api_server.py @@ -1678,7 +1678,7 @@ async def init_app_state( enable_auto_tools=args.enable_auto_tool_choice, tool_parser=args.tool_call_parser, tool_server=tool_server, - reasoning_parser=args.reasoning_parser, + reasoning_parser=args.structured_outputs_config.reasoning_parser, enable_prompt_tokens_details=args.enable_prompt_tokens_details, enable_force_include_usage=args.enable_force_include_usage, enable_log_outputs=args.enable_log_outputs, @@ -1697,7 +1697,7 @@ async def init_app_state( exclude_tools_when_tool_choice_none=args. exclude_tools_when_tool_choice_none, tool_parser=args.tool_call_parser, - reasoning_parser=args.reasoning_parser, + reasoning_parser=args.structured_outputs_config.reasoning_parser, enable_prompt_tokens_details=args.enable_prompt_tokens_details, enable_force_include_usage=args.enable_force_include_usage, enable_log_outputs=args.enable_log_outputs, @@ -1800,10 +1800,10 @@ def validate_api_server_args(args): f"(chose from {{ {','.join(valid_tool_parses)} }})") valid_reasoning_parses = ReasoningParserManager.reasoning_parsers.keys() - if args.reasoning_parser \ - and args.reasoning_parser not in valid_reasoning_parses: + if ((reasoning_parser := args.structured_outputs_config.reasoning_parser) + and reasoning_parser not in valid_reasoning_parses): raise KeyError( - f"invalid reasoning parser: {args.reasoning_parser} " + f"invalid reasoning parser: {reasoning_parser} " f"(chose from {{ {','.join(valid_reasoning_parses)} }})") diff --git a/vllm/entrypoints/openai/protocol.py b/vllm/entrypoints/openai/protocol.py index 2505e493625d..cff4a45fdc43 100644 --- a/vllm/entrypoints/openai/protocol.py +++ b/vllm/entrypoints/openai/protocol.py @@ -54,8 +54,8 @@ from vllm.logger import init_logger from vllm.logprobs import Logprob from vllm.pooling_params import PoolingParams -from vllm.sampling_params import (BeamSearchParams, GuidedDecodingParams, - RequestOutputKind, SamplingParams) +from vllm.sampling_params import (BeamSearchParams, RequestOutputKind, + SamplingParams, StructuredOutputsParams) from vllm.utils import random_uuid, resolve_obj_by_qualname logger = init_logger(__name__) @@ -373,11 +373,12 @@ def to_sampling_params( stop_token_ids = default_sampling_params.get("stop_token_ids") # Structured output - guided_decoding = None + structured_outputs = None if self.text is not None and self.text.format is not None: response_format = self.text.format - if response_format.type == "json_schema": - guided_decoding = GuidedDecodingParams.from_optional( + if (response_format.type == "json_schema" + and response_format.schema_ is not None): + structured_outputs = StructuredOutputsParams( json=response_format.schema_) elif response_format.type == "json_object": raise NotImplementedError("json_object is not supported") @@ -392,7 +393,7 @@ def to_sampling_params( stop_token_ids=stop_token_ids, output_kind=(RequestOutputKind.DELTA if self.stream else RequestOutputKind.FINAL_ONLY), - guided_decoding=guided_decoding, + structured_outputs=structured_outputs, ) def is_include_output_logprobs(self) -> bool: @@ -547,42 +548,9 @@ class ChatCompletionRequest(OpenAIBaseModel): default=None, description=("Additional kwargs to pass to the HF processor."), ) - guided_json: Optional[Union[str, dict, BaseModel]] = Field( + structured_outputs: Optional[StructuredOutputsParams] = Field( default=None, - description=("If specified, the output will follow the JSON schema."), - ) - guided_regex: Optional[str] = Field( - default=None, - description=( - "If specified, the output will follow the regex pattern."), - ) - guided_choice: Optional[list[str]] = Field( - default=None, - description=( - "If specified, the output will be exactly one of the choices."), - ) - guided_grammar: Optional[str] = Field( - default=None, - description=( - "If specified, the output will follow the context free grammar."), - ) - structural_tag: Optional[str] = Field( - default=None, - description=( - "If specified, the output will follow the structural tag schema."), - ) - guided_decoding_backend: Optional[str] = Field( - default=None, - description=( - "If specified, will override the default guided decoding backend " - "of the server for this specific request. If set, must be either " - "'outlines' / 'lm-format-enforcer'"), - ) - guided_whitespace_pattern: Optional[str] = Field( - default=None, - description=( - "If specified, will override the default whitespace pattern " - "for guided json decoding."), + description="Additional kwargs for structured outputs", ) priority: int = Field( default=0, @@ -701,31 +669,33 @@ def to_sampling_params( if prompt_logprobs is None and self.echo: prompt_logprobs = self.top_logprobs - guided_json_object = None - if self.response_format is not None: - if self.response_format.type == "json_object": - guided_json_object = True - elif self.response_format.type == "json_schema": - json_schema = self.response_format.json_schema - assert json_schema is not None - self.guided_json = json_schema.json_schema - elif self.response_format.type == "structural_tag": - structural_tag = self.response_format - assert structural_tag is not None and isinstance( - structural_tag, StructuralTagResponseFormat) - s_tag_obj = structural_tag.model_dump(by_alias=True) - self.structural_tag = json.dumps(s_tag_obj) - - guided_decoding = GuidedDecodingParams.from_optional( - json=self._get_guided_json_from_tool() or self.guided_json, - regex=self.guided_regex, - choice=self.guided_choice, - grammar=self.guided_grammar, - json_object=guided_json_object, - backend=self.guided_decoding_backend, - whitespace_pattern=self.guided_whitespace_pattern, - structural_tag=self.structural_tag, - ) + response_format = self.response_format + json_schema_from_tool = self._get_json_schema_from_tool() + if response_format is not None or json_schema_from_tool is not None: + # If structured outputs wasn't already enabled, + # we must enable it for these features to work + if self.structured_outputs is None: + self.structured_outputs = StructuredOutputsParams() + + # Set structured output params for response format + if response_format is not None: + if response_format.type == "json_object": + self.structured_outputs.json_object = True + elif response_format.type == "json_schema": + json_schema = response_format.json_schema + assert json_schema is not None + self.structured_outputs.json = json_schema.json_schema + elif response_format.type == "structural_tag": + structural_tag = response_format + assert structural_tag is not None and isinstance( + structural_tag, StructuralTagResponseFormat) + s_tag_obj = structural_tag.model_dump(by_alias=True) + self.structured_outputs.structural_tag = json.dumps( + s_tag_obj) + + # Set structured output params for tool calling + if json_schema_from_tool is not None: + self.structured_outputs.json = json_schema_from_tool extra_args: dict[str, Any] = self.vllm_xargs if self.vllm_xargs else {} if self.kv_transfer_params: @@ -757,15 +727,14 @@ def to_sampling_params( truncate_prompt_tokens=self.truncate_prompt_tokens, output_kind=RequestOutputKind.DELTA if self.stream \ else RequestOutputKind.FINAL_ONLY, - guided_decoding=guided_decoding, + structured_outputs=self.structured_outputs, logit_bias=self.logit_bias, - bad_words= self.bad_words, + bad_words=self.bad_words, allowed_token_ids=self.allowed_token_ids, extra_args=extra_args or None, ) - def _get_guided_json_from_tool( - self) -> Optional[Union[str, dict, BaseModel]]: + def _get_json_schema_from_tool(self) -> Optional[Union[str, dict]]: # user has chosen to not use any tool if self.tool_choice == "none" or self.tools is None: return None @@ -875,28 +844,31 @@ def check_logprobs(cls, data): @model_validator(mode="before") @classmethod - def check_guided_decoding_count(cls, data): + def check_structured_outputs_count(cls, data): if isinstance(data, ValueError): raise data - guide_count = sum([ - "guided_json" in data and data["guided_json"] is not None, - "guided_regex" in data and data["guided_regex"] is not None, - "guided_choice" in data and data["guided_choice"] is not None - ]) - # you can only use one kind of guided decoding - if guide_count > 1: + if "structured_outputs" not in data: + return data + + structured_outputs_kwargs = data['structured_outputs'] + count = sum( + structured_outputs_kwargs.get(k) is not None + for k in ("json", "regex", "choice")) + # you can only use one kind of constraints for structured outputs + if count > 1: raise ValueError( - "You can only use one kind of guided decoding " - "('guided_json', 'guided_regex' or 'guided_choice').") - # you can only either use guided decoding or tools, not both - if guide_count > 1 and data.get("tool_choice", "none") not in ( + "You can only use one kind of constraints for structured " + "outputs ('json', 'regex' or 'choice').") + # you can only either use structured outputs or tools, not both + if count > 1 and data.get("tool_choice", "none") not in ( "none", "auto", "required", ): raise ValueError( - "You can only either use guided decoding or tools, not both.") + "You can only either use constraints for structured outputs " + "or tools, not both.") return data @model_validator(mode="before") @@ -1049,37 +1021,9 @@ class CompletionRequest(OpenAIBaseModel): ", {'type': 'structural_tag'}, or {'type': 'text' } is supported." ), ) - guided_json: Optional[Union[str, dict, BaseModel]] = Field( - default=None, - description="If specified, the output will follow the JSON schema.", - ) - guided_regex: Optional[str] = Field( - default=None, - description=( - "If specified, the output will follow the regex pattern."), - ) - guided_choice: Optional[list[str]] = Field( + structured_outputs: Optional[StructuredOutputsParams] = Field( default=None, - description=( - "If specified, the output will be exactly one of the choices."), - ) - guided_grammar: Optional[str] = Field( - default=None, - description=( - "If specified, the output will follow the context free grammar."), - ) - guided_decoding_backend: Optional[str] = Field( - default=None, - description=( - "If specified, will override the default guided decoding backend " - "of the server for this specific request. If set, must be one of " - "'outlines' / 'lm-format-enforcer'"), - ) - guided_whitespace_pattern: Optional[str] = Field( - default=None, - description=( - "If specified, will override the default whitespace pattern " - "for guided json decoding."), + description="Additional kwargs for structured outputs", ) priority: int = Field( default=0, @@ -1210,20 +1154,10 @@ def to_sampling_params( echo_without_generation = self.echo and self.max_tokens == 0 - guided_json_object = None - if (self.response_format is not None + if (self.structured_outputs is not None + and self.response_format is not None and self.response_format.type == "json_object"): - guided_json_object = True - - guided_decoding = GuidedDecodingParams.from_optional( - json=self.guided_json, - regex=self.guided_regex, - choice=self.guided_choice, - grammar=self.guided_grammar, - json_object=guided_json_object, - backend=self.guided_decoding_backend, - whitespace_pattern=self.guided_whitespace_pattern, - ) + self.structured_outputs.json_object = True extra_args: dict[str, Any] = self.vllm_xargs if self.vllm_xargs else {} if self.kv_transfer_params: @@ -1255,7 +1189,7 @@ def to_sampling_params( truncate_prompt_tokens=self.truncate_prompt_tokens, output_kind=RequestOutputKind.DELTA if self.stream \ else RequestOutputKind.FINAL_ONLY, - guided_decoding=guided_decoding, + structured_outputs=self.structured_outputs, logit_bias=self.logit_bias, allowed_token_ids=self.allowed_token_ids, extra_args=extra_args or None, @@ -1263,16 +1197,18 @@ def to_sampling_params( @model_validator(mode="before") @classmethod - def check_guided_decoding_count(cls, data): - guide_count = sum([ - "guided_json" in data and data["guided_json"] is not None, - "guided_regex" in data and data["guided_regex"] is not None, - "guided_choice" in data and data["guided_choice"] is not None - ]) - if guide_count > 1: + def check_structured_outputs_count(cls, data): + if "structured_outputs" not in data: + return data + + structured_outputs_kwargs = data['structured_outputs'] + count = sum( + structured_outputs_kwargs.get(k) is not None + for k in ("json", "regex", "choice")) + if count > 1: raise ValueError( - "You can only use one kind of guided decoding " - "('guided_json', 'guided_regex' or 'guided_choice').") + "You can only use one kind of constraints for structured " + "outputs ('json', 'regex' or 'choice').") return data @model_validator(mode="before") diff --git a/vllm/entrypoints/openai/serving_chat.py b/vllm/entrypoints/openai/serving_chat.py index cd85baa9ba66..16564214e353 100644 --- a/vllm/entrypoints/openai/serving_chat.py +++ b/vllm/entrypoints/openai/serving_chat.py @@ -993,7 +993,7 @@ async def chat_completion_stream_generator( # check to make sure we haven't "forgotten" to stream # any tokens that were generated but previously # matched by partial json parsing - # only happens if we are NOT using guided decoding + # only happens if we are NOT using structured outputs auto_tools_called = False if tool_parser: auto_tools_called = len( diff --git a/vllm/model_executor/models/config.py b/vllm/model_executor/models/config.py index 687af7a189ce..ce3d23763ed6 100644 --- a/vllm/model_executor/models/config.py +++ b/vllm/model_executor/models/config.py @@ -262,9 +262,9 @@ class GptOssForCausalLMConfig(VerifyAndUpdateConfig): @staticmethod def verify_and_update_config(vllm_config: "VllmConfig") -> None: - decoding_config = vllm_config.decoding_config - if decoding_config.reasoning_backend == "": - decoding_config.reasoning_backend = "openai_gptoss" + structured_outputs_config = vllm_config.structured_outputs_config + if structured_outputs_config.reasoning_parser == "": + structured_outputs_config.reasoning_parser = "openai_gptoss" # Increase the max capture size from 512 to 1024 for performance. # NOTE(woosuk): This will increase the number of CUDA graphs diff --git a/vllm/sampling_params.py b/vllm/sampling_params.py index fe93e906064e..0a01cb0260ae 100644 --- a/vllm/sampling_params.py +++ b/vllm/sampling_params.py @@ -2,13 +2,13 @@ # SPDX-FileCopyrightText: Copyright contributors to the vLLM project """Sampling parameters for text generation.""" import copy -from dataclasses import dataclass +from dataclasses import field from enum import Enum, IntEnum from functools import cached_property from typing import Annotated, Any, Optional, Union import msgspec -from pydantic import BaseModel +from pydantic.dataclasses import dataclass from vllm.logger import init_logger from vllm.logits_process import LogitsProcessor @@ -28,60 +28,35 @@ class SamplingType(IntEnum): # maybe make msgspec? @dataclass -class GuidedDecodingParams: - """One of these fields will be used to build a logit processor.""" +class StructuredOutputsParams: + # One of these fields will be used to build a logit processor. json: Optional[Union[str, dict]] = None regex: Optional[str] = None choice: Optional[list[str]] = None grammar: Optional[str] = None json_object: Optional[bool] = None - """These are other options that can be set""" - backend: Optional[str] = None - backend_was_auto: bool = False + # These are other options that can be set. disable_fallback: bool = False disable_any_whitespace: bool = False disable_additional_properties: bool = False whitespace_pattern: Optional[str] = None structural_tag: Optional[str] = None - @staticmethod - def from_optional( - json: Optional[Union[dict, BaseModel, str]] = None, - regex: Optional[str] = None, - choice: Optional[list[str]] = None, - grammar: Optional[str] = None, - json_object: Optional[bool] = None, - backend: Optional[str] = None, - whitespace_pattern: Optional[str] = None, - structural_tag: Optional[str] = None, - ) -> Optional["GuidedDecodingParams"]: - if all(arg is None for arg in (json, regex, choice, grammar, - json_object, structural_tag)): - return None - # Extract json schemas from pydantic models - if isinstance(json, (BaseModel, type(BaseModel))): - json = json.model_json_schema() - return GuidedDecodingParams( - json=json, - regex=regex, - choice=choice, - grammar=grammar, - json_object=json_object, - backend=backend, - whitespace_pattern=whitespace_pattern, - structural_tag=structural_tag, - ) + _backend: Optional[str] = field(default=None, init=False) + """CAUTION: Should only be set by Processor._validate_structured_output""" + _backend_was_auto: bool = field(default=False, init=False) + """CAUTION: Should only be set by Processor._validate_structured_output""" def __post_init__(self): """Validate that some fields are mutually exclusive.""" - guide_count = sum([ + count = sum([ self.json is not None, self.regex is not None, self.choice is not None, self.grammar is not None, self.json_object is not None ]) - if guide_count > 1: + if count > 1: raise ValueError( - "You can only use one kind of guided decoding but multiple are " - f"specified: {self.__dict__}") + "You can only use one kind of structured outputs constraint " + f"but multiple are specified: {self.__dict__}") class RequestOutputKind(Enum): @@ -196,9 +171,8 @@ class SamplingParams( _all_stop_token_ids: set[int] = msgspec.field(default_factory=set) # Fields used to construct logits processors - guided_decoding: Optional[GuidedDecodingParams] = None - """If provided, the engine will construct a guided decoding logits - processor from these parameters.""" + structured_outputs: Optional[StructuredOutputsParams] = None + """Parameters for configuring structured outputs.""" logit_bias: Optional[dict[int, float]] = None """If provided, the engine will construct a logits processor that applies these logit biases.""" @@ -246,7 +220,7 @@ def from_optional( msgspec.Meta( ge=-1)]] = None, output_kind: RequestOutputKind = RequestOutputKind.CUMULATIVE, - guided_decoding: Optional[GuidedDecodingParams] = None, + structured_outputs: Optional[StructuredOutputsParams] = None, logit_bias: Optional[Union[dict[int, float], dict[str, float]]] = None, allowed_token_ids: Optional[list[int]] = None, extra_args: Optional[dict[str, Any]] = None, @@ -288,7 +262,7 @@ def from_optional( logits_processors=logits_processors, truncate_prompt_tokens=truncate_prompt_tokens, output_kind=output_kind, - guided_decoding=guided_decoding, + structured_outputs=structured_outputs, logit_bias=logit_bias, allowed_token_ids=allowed_token_ids, extra_args=extra_args, @@ -559,7 +533,7 @@ def __repr__(self) -> str: "spaces_between_special_tokens=" f"{self.spaces_between_special_tokens}, " f"truncate_prompt_tokens={self.truncate_prompt_tokens}, " - f"guided_decoding={self.guided_decoding}, " + f"structured_outputs={self.structured_outputs}, " f"extra_args={self.extra_args})") diff --git a/vllm/transformers_utils/tokenizers/mistral.py b/vllm/transformers_utils/tokenizers/mistral.py index 5b07327cf2b8..d8a8d19391cd 100644 --- a/vllm/transformers_utils/tokenizers/mistral.py +++ b/vllm/transformers_utils/tokenizers/mistral.py @@ -274,7 +274,7 @@ def _download_mistral_tokenizer_from_hf(tokenizer_name: str, return tokenizer_file # the following attributes are set to fit vLLM's design and are used - # by the guided structured output backends. + # by the structured output backends. @property def all_special_tokens_extended(self) -> list[str]: from mistral_common.tokens.tokenizers.base import SpecialTokens @@ -463,9 +463,6 @@ def _token_to_id(t: str): return decoded - # WARN: Outlines logits processors can overwrite this method. - # See: guided_decoding/outlines_logits_processors.py::_adapt_tokenizer - # for more. def decode(self, ids: Union[list[int], int], skip_special_tokens: bool = True) -> str: diff --git a/vllm/v1/engine/async_llm.py b/vllm/v1/engine/async_llm.py index f17c269e4709..73165c7e4c0a 100644 --- a/vllm/v1/engine/async_llm.py +++ b/vllm/v1/engine/async_llm.py @@ -588,9 +588,6 @@ async def get_vllm_config(self) -> VllmConfig: async def get_model_config(self) -> ModelConfig: return self.model_config - async def get_decoding_config(self): - raise ValueError("Not Supported on V1 yet.") - async def get_input_preprocessor(self) -> InputPreprocessor: return self.processor.input_preprocessor diff --git a/vllm/v1/engine/processor.py b/vllm/v1/engine/processor.py index 8d9f2ba1ec82..71f539583a1b 100644 --- a/vllm/v1/engine/processor.py +++ b/vllm/v1/engine/processor.py @@ -45,7 +45,7 @@ def __init__( self.model_config = vllm_config.model_config self.cache_config = vllm_config.cache_config self.lora_config = vllm_config.lora_config - self.decoding_config = vllm_config.decoding_config + self.structured_outputs_config = vllm_config.structured_outputs_config self.tokenizer = tokenizer self.generation_config_fields = ( @@ -219,58 +219,57 @@ def _validate_lora(self, lora_request: Optional[LoRARequest]) -> None: "[lora_path]` to use the LoRA tokenizer.") def _validate_structured_output(self, params: SamplingParams) -> None: - if not params.guided_decoding or not self.decoding_config: + if not params.structured_outputs or not self.structured_outputs_config: return - if self.model_config.skip_tokenizer_init and params.guided_decoding: + if self.model_config.skip_tokenizer_init and params.structured_outputs: raise ValueError( "Structured outputs requires a tokenizer so it can't be used with 'skip_tokenizer_init'" # noqa: E501 ) - engine_level_backend = self.decoding_config.backend - if params.guided_decoding.backend: - # Request-level backend selection is not supported in V1. + backend = self.structured_outputs_config.backend + if _backend := params.structured_outputs._backend: + # Request-level backend selection is not supported. # The values may differ if `params` is reused and was set # to a specific backend based on `auto` behavior in a previous # request. We remember that it was set as a result of `auto` - # using the `_auto` option set on the backend in the params. - if (params.guided_decoding.backend != engine_level_backend - and not (engine_level_backend == "auto" - and params.guided_decoding.backend_was_auto)): + # using the `_backend_was_auto` field set in the params. + if (backend != _backend + and not (backend == "auto" + and params.structured_outputs._backend_was_auto)): raise ValueError( - "Request-level structured output backend selection is no " - "longer supported. The request specified " - f"'{params.guided_decoding.backend}', but vLLM was " - f"initialised with '{engine_level_backend}'. This error " - "can be resolved by removing backend selection from the " - "request.") + "Request-level structured output backend selection is not " + f"supported. The request specified '{_backend}', but vLLM " + f"was initialised with '{backend}'. This error can be " + "resolved by removing '_backend' from the request.") else: - params.guided_decoding.backend = engine_level_backend + params.structured_outputs._backend = backend # Request content validation - if (isinstance(params.guided_decoding.choice, list) - and not params.guided_decoding.choice): + if (isinstance(params.structured_outputs.choice, list) + and not params.structured_outputs.choice): # It is invalid for choice to be an empty list - raise ValueError(f"Choice '{params.guided_decoding.choice}' " - "cannot be an empty list") + raise ValueError( + f"Choice '{params.structured_outputs.choice}' cannot be an empty list" # noqa: E501 + ) - if engine_level_backend.startswith("xgrammar"): + if backend.startswith("xgrammar"): # xgrammar with no fallback validate_xgrammar_grammar(params) - elif engine_level_backend.startswith("guidance"): + elif backend.startswith("guidance"): # TODO: ideally we would have the LLTokenizer here as Lark syntax # allows <|special_token|> and similar, see # https://github.com/guidance-ai/llguidance/blob/main/docs/syntax.md#special-tokens # Without tokenizer these are disallowed in grammars. validate_guidance_grammar(params, tokenizer=None) - elif engine_level_backend == "outlines": + elif backend == "outlines": # outlines backend validate_structured_output_request_outlines(params) - elif engine_level_backend == "lm-format-enforcer": + elif backend == "lm-format-enforcer": # lm format enforcer backend validate_structured_output_request_lm_format_enforcer(params) else: - # NOTE: engine_level_backend must be "auto" here, because we have + # NOTE: backend must be "auto" here, because we have # checked supported_backends above. # In this mode, we set opinionated defaults based on what we think # will satisfy the most use cases without having to worry about @@ -278,15 +277,15 @@ def _validate_structured_output(self, params: SamplingParams) -> None: # other setting where a specific backend was specified. try: validate_xgrammar_grammar(params) - params.guided_decoding.backend = "xgrammar" + params.structured_outputs._backend = "xgrammar" except ValueError: # The request either failed validation # or includes some jsonschema feature(s) that # are not supported in xgrammar. Fall back to guidance. validate_guidance_grammar(params, tokenizer=None) - params.guided_decoding.backend = "guidance" + params.structured_outputs._backend = "guidance" # Remember that this backend was set automatically - params.guided_decoding.backend_was_auto = True + params.structured_outputs._backend_was_auto = True def _maybe_build_mm_uuids( self, diff --git a/vllm/v1/request.py b/vllm/v1/request.py index 4e3e581235cc..145af788d237 100644 --- a/vllm/v1/request.py +++ b/vllm/v1/request.py @@ -67,7 +67,7 @@ def __init__( # Generative models. assert sampling_params.max_tokens is not None self.max_tokens = sampling_params.max_tokens - if sampling_params.guided_decoding is not None: + if sampling_params.structured_outputs is not None: self.status = RequestStatus.WAITING_FOR_FSM self.use_structured_output = True diff --git a/vllm/v1/structured_output/__init__.py b/vllm/v1/structured_output/__init__.py index 1ab29dfecd9e..13c33d3edf14 100644 --- a/vllm/v1/structured_output/__init__.py +++ b/vllm/v1/structured_output/__init__.py @@ -61,11 +61,11 @@ def __init__(self, vllm_config: VllmConfig): self.executor = ThreadPoolExecutor(max_workers=max_workers) self.tokenizer = init_tokenizer_from_configs( model_config=self.vllm_config.model_config) - reasoning_backend = \ - self.vllm_config.decoding_config.reasoning_backend - if reasoning_backend: + reasoning_parser = \ + self.vllm_config.structured_outputs_config.reasoning_parser + if reasoning_parser: reasoner_cls = ReasoningParserManager.get_reasoning_parser( - reasoning_backend) + reasoning_parser) self.reasoner = reasoner_cls(tokenizer=self.tokenizer) def grammar_init(self, request: Request) -> None: @@ -74,15 +74,16 @@ def grammar_init(self, request: Request) -> None: if TYPE_CHECKING: assert request.sampling_params is not None and \ - request.sampling_params.guided_decoding is not None + request.sampling_params.structured_outputs is not None # Initialize the backend the first time it is needed. # # NOTE: We only support a single backend. We do NOT support different # backends on a per-request basis in V1 (for now, anyway...). + # _backend is set in Processor._validate_structured_output if self.backend is None: assert request.sampling_params is not None - backend = request.sampling_params.guided_decoding.backend + backend = request.sampling_params.structured_outputs._backend vocab_size = self.vllm_config.model_config.get_vocab_size() if backend == "xgrammar": self.backend = XgrammarBackend( diff --git a/vllm/v1/structured_output/backend_guidance.py b/vllm/v1/structured_output/backend_guidance.py index 02e7fc33f517..e06ab6377de3 100644 --- a/vllm/v1/structured_output/backend_guidance.py +++ b/vllm/v1/structured_output/backend_guidance.py @@ -60,9 +60,9 @@ class GuidanceBackend(StructuredOutputBackend): def __post_init__(self): self.disable_any_whitespace = \ - self.vllm_config.decoding_config.disable_any_whitespace + self.vllm_config.structured_outputs_config.disable_any_whitespace self.disable_additional_properties = \ - self.vllm_config.decoding_config.disable_additional_properties + self.vllm_config.structured_outputs_config.disable_additional_properties self.ll_tokenizer = llguidance_hf.from_tokenizer( self.tokenizer, self.vocab_size) diff --git a/vllm/v1/structured_output/backend_lm_format_enforcer.py b/vllm/v1/structured_output/backend_lm_format_enforcer.py index 2279a1c8c8a0..465b2428f893 100644 --- a/vllm/v1/structured_output/backend_lm_format_enforcer.py +++ b/vllm/v1/structured_output/backend_lm_format_enforcer.py @@ -138,30 +138,30 @@ def destroy(self): def validate_structured_output_request_lm_format_enforcer( params: SamplingParams): - if params.guided_decoding is None: + if params.structured_outputs is None: return - gd_params = params.guided_decoding + so_params = params.structured_outputs - if gd_params.regex: + if so_params.regex: return - elif gd_params.json: - if isinstance(gd_params.json, str): + elif so_params.json: + if isinstance(so_params.json, str): try: # make sure schema is valid json - json.loads(gd_params.json) + json.loads(so_params.json) except json.JSONDecodeError as e: raise ValueError("Invalid JSON grammar specification.") from e else: try: - json.dumps(gd_params.json) + json.dumps(so_params.json) except Exception as e: raise ValueError( - f"Error serializing guided decoding jsonschema: {e}" + f"Error serializing structured outputs jsonschema: {e}" ) from e return - elif gd_params.choice: + elif so_params.choice: return - elif gd_params.grammar: - raise ValueError("LM Format Enforcer guided decoding backend " + elif so_params.grammar: + raise ValueError("LM Format Enforcer structured outputs backend " "does not support grammar specifications") diff --git a/vllm/v1/structured_output/backend_outlines.py b/vllm/v1/structured_output/backend_outlines.py index 572e4984480f..e5e638a6ad76 100644 --- a/vllm/v1/structured_output/backend_outlines.py +++ b/vllm/v1/structured_output/backend_outlines.py @@ -158,36 +158,36 @@ def reset(self): def validate_structured_output_request_outlines(params: SamplingParams): - if params.guided_decoding is None: + if params.structured_outputs is None: return - gd_params = params.guided_decoding + so_params = params.structured_outputs - if gd_params.regex: - validate_regex_is_buildable(gd_params.regex) - elif gd_params.json: - if isinstance(gd_params.json, str): + if so_params.regex: + validate_regex_is_buildable(so_params.regex) + elif so_params.json: + if isinstance(so_params.json, str): try: # make sure schema is valid json - json.loads(gd_params.json) - schema = gd_params.json + json.loads(so_params.json) + schema = so_params.json except json.JSONDecodeError as e: raise ValueError("Invalid JSON grammar specification.") from e else: try: - schema = json.dumps(gd_params.json) + schema = json.dumps(so_params.json) except Exception as e: raise ValueError( - f"Error serializing guided decoding jsonschema: {e}" + f"Error serializing structured outputs jsonschema: {e}" ) from e pattern = json_schema.build_regex_from_schema(schema) validate_regex_is_buildable(pattern) - elif gd_params.choice: - choices = [regex_escape(str(choice)) for choice in gd_params.choice] + elif so_params.choice: + choices = [regex_escape(str(choice)) for choice in so_params.choice] regex = "(" + "|".join(choices) + ")" validate_regex_is_buildable(regex) - elif gd_params.grammar: - raise ValueError("Outlines guided decoding backend " + elif so_params.grammar: + raise ValueError("Outlines structured outputs backend " "does not support grammar specifications") @@ -306,7 +306,7 @@ def validate_regex_is_buildable(pattern: str) -> None: _check_unsupported(parsed) except ValueError as e: raise ValueError( - f"Regex uses unsupported feature for guided decoding: {e}. " + f"Regex uses unsupported feature for structured outputs: {e}. " "Only basic matching constructs are supported—lookarounds, " "backreferences, and unicode boundaries are not.") from e @@ -315,6 +315,6 @@ def validate_regex_is_buildable(pattern: str) -> None: "Regex does not have a anchored universal start state" "This means that the Regex uses anchors (^) or look-arounds " "in a way which requires context before any token is matched." - "Guided decoding needs regexes that can match without needing " + "structured outputs needs regexes that can match without needing " "that context. Try rewriting the pattern without using these " f"constructs. Pattern:\n{pattern}") diff --git a/vllm/v1/structured_output/backend_xgrammar.py b/vllm/v1/structured_output/backend_xgrammar.py index 5e00f6380416..55b4792fe010 100644 --- a/vllm/v1/structured_output/backend_xgrammar.py +++ b/vllm/v1/structured_output/backend_xgrammar.py @@ -34,7 +34,7 @@ class XgrammarBackend(StructuredOutputBackend): def __post_init__(self): self.disable_any_whitespace = \ - self.vllm_config.decoding_config.disable_any_whitespace + self.vllm_config.structured_outputs_config.disable_any_whitespace if isinstance(self.tokenizer, MistralTokenizer): # NOTE: ideally, xgrammar should handle this accordingly. @@ -248,37 +248,37 @@ def validate_xgrammar_grammar(sampling_params: SamplingParams) -> None: Raises ValueError if the request is not supported. """ - if sampling_params.guided_decoding is None: + if sampling_params.structured_outputs is None: return - gd_params = sampling_params.guided_decoding + so_params = sampling_params.structured_outputs - if gd_params.regex: + if so_params.regex: try: - xgr.Grammar.from_regex(gd_params.regex) + xgr.Grammar.from_regex(so_params.regex) except Exception as err: raise ValueError("Failed to transform regex into a grammar: " f"{err}") from err - if gd_params.choice: - choice_grammar = choice_as_grammar(gd_params.choice) + if so_params.choice: + choice_grammar = choice_as_grammar(so_params.choice) try: xgr.Grammar.from_ebnf(choice_grammar) except Exception as err: raise ValueError("Failed to transform choices into a grammar: " "{err}") from err - gd_params.choice = None - gd_params.grammar = choice_grammar + so_params.choice = None + so_params.grammar = choice_grammar return - if gd_params.json: - if isinstance(gd_params.json, str): + if so_params.json: + if isinstance(so_params.json, str): try: - schema = json.loads(gd_params.json) + schema = json.loads(so_params.json) except json.JSONDecodeError as e: raise ValueError("Invalid JSON grammar specification.") from e else: - schema = gd_params.json + schema = so_params.json try: xgr.Grammar.from_json_schema(schema) @@ -291,11 +291,11 @@ def validate_xgrammar_grammar(sampling_params: SamplingParams) -> None: "supported by xgrammar.") return - if gd_params.grammar: - if grammar_is_likely_lark(gd_params.grammar): + if so_params.grammar: + if grammar_is_likely_lark(so_params.grammar): # xgrammar supports EBNF grammars only try: - gd_params.grammar = convert_lark_to_ebnf(gd_params.grammar) + so_params.grammar = convert_lark_to_ebnf(so_params.grammar) except ValueError as e: raise ValueError( "Failed to convert the grammar from Lark to EBNF. ") from e @@ -303,14 +303,14 @@ def validate_xgrammar_grammar(sampling_params: SamplingParams) -> None: # Test parsing EBNF grammar, possibly already converted from Lark try: # parse the grammar, but we aren't compiling it. - xgr.Grammar.from_ebnf(gd_params.grammar) + xgr.Grammar.from_ebnf(so_params.grammar) except Exception as e: raise ValueError("Invalid grammar specification.") from e return - if gd_params.structural_tag: + if so_params.structural_tag: try: - s_tag = json.loads(gd_params.structural_tag) + s_tag = json.loads(so_params.structural_tag) tags = [ xgr.StructuralTagItem( begin=s["begin"], diff --git a/vllm/v1/structured_output/request.py b/vllm/v1/structured_output/request.py index fc365f12573f..99974ef46ecd 100644 --- a/vllm/v1/structured_output/request.py +++ b/vllm/v1/structured_output/request.py @@ -60,7 +60,7 @@ def structured_output_key(self) -> StructuredOutputKey: def get_structured_output_key( sampling_params: SamplingParams) -> StructuredOutputKey: - params = sampling_params.guided_decoding + params = sampling_params.structured_outputs assert params is not None, "params can't be None." if params.json is not None: if not isinstance(params.json, str): From 4f02b77de4e794a0d417ed98a26884208f75e043 Mon Sep 17 00:00:00 2001 From: ihb2032 <40718643+ihb2032@users.noreply.github.com> Date: Thu, 18 Sep 2025 17:43:23 +0800 Subject: [PATCH 097/518] Fix: Add explicit #include for OpenMP compatibility on certain toolchains (#24951) Signed-off-by: lyd1992 Signed-off-by: ihb2032 <1355790728@qq.com> --- csrc/cpu/cpu_types.hpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/csrc/cpu/cpu_types.hpp b/csrc/cpu/cpu_types.hpp index 17bbe04eef94..c3a21796881c 100644 --- a/csrc/cpu/cpu_types.hpp +++ b/csrc/cpu/cpu_types.hpp @@ -17,4 +17,8 @@ #warning "unsupported vLLM cpu implementation" #endif +#ifdef _OPENMP + #include +#endif + #endif \ No newline at end of file From abdfcd4f3dc21dc162baf6887f658fb0f2f3d783 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Elvir=20Crn=C4=8Devi=C4=87?= Date: Thu, 18 Sep 2025 12:25:12 +0200 Subject: [PATCH 098/518] silu-v1: Fix EPS not being used during max-reduction (#25069) Signed-off-by: elvircrn --- csrc/quantization/activation_kernels.cu | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/csrc/quantization/activation_kernels.cu b/csrc/quantization/activation_kernels.cu index 9ddb5af3052f..9aa1411b4a25 100644 --- a/csrc/quantization/activation_kernels.cu +++ b/csrc/quantization/activation_kernels.cu @@ -365,7 +365,6 @@ __global__ void silu_mul_fp8_quant_deep_gemm_kernel( int32_t compute_pipeline_offset_64 = 0; for (int32_t t = n_tokens_lower; t < n_tokens_upper; ++t) { - __nv_bfloat16 y_max_bf16 = EPS; __nv_bfloat162 results_bf162[2]; cp_async_wait(); @@ -405,7 +404,7 @@ __global__ void silu_mul_fp8_quant_deep_gemm_kernel( auto _y_max2 = __hmax2(__habs2(results_bf162[0]), __habs2(results_bf162[1])); - y_max_bf16 = __hmax(_y_max2.x, _y_max2.y); + __nv_bfloat16 y_max_bf16 = __hmax(EPS, __hmax(_y_max2.x, _y_max2.y)); // An entire group is assigned to a single warp, so a simple warp reduce // is used. From cc935fdd7e0c466cd556b6515e435dddd78677e0 Mon Sep 17 00:00:00 2001 From: Chauncey Date: Thu, 18 Sep 2025 18:34:42 +0800 Subject: [PATCH 099/518] [Frontend] Support setting logprobs to -1 (#25031) Signed-off-by: chaunceyjiang --- tests/entrypoints/openai/test_chat_echo.py | 23 ++++++++++++++++++++++ vllm/entrypoints/openai/protocol.py | 8 +++++--- 2 files changed, 28 insertions(+), 3 deletions(-) diff --git a/tests/entrypoints/openai/test_chat_echo.py b/tests/entrypoints/openai/test_chat_echo.py index 0f459dd3d857..ce965eb82924 100644 --- a/tests/entrypoints/openai/test_chat_echo.py +++ b/tests/entrypoints/openai/test_chat_echo.py @@ -99,3 +99,26 @@ async def test_prompt_logprobs(client: openai.AsyncOpenAI): assert completion.prompt_logprobs is not None assert len(completion.prompt_logprobs) > 0 + + +@pytest.mark.asyncio +async def test_top_logprobs(client: openai.AsyncOpenAI): + messages = [{ + "role": "system", + "content": "You are a helpful assistant." + }, { + "role": "user", + "content": "Beijing is the capital of which country?" + }] + + completion = await client.chat.completions.create( + model=MODEL_NAME, + messages=messages, + extra_body={ + "top_logprobs": -1, + "logprobs": "true", + }, + ) + assert completion.choices[0].logprobs is not None + assert completion.choices[0].logprobs.content is not None + assert len(completion.choices[0].logprobs.content) > 0 diff --git a/vllm/entrypoints/openai/protocol.py b/vllm/entrypoints/openai/protocol.py index cff4a45fdc43..7ad8e73d89d5 100644 --- a/vllm/entrypoints/openai/protocol.py +++ b/vllm/entrypoints/openai/protocol.py @@ -832,10 +832,12 @@ def check_logprobs(cls, data): raise ValueError("`prompt_logprobs=-1` is only supported with " "vLLM engine V1.") if (top_logprobs := data.get("top_logprobs")) is not None: - if top_logprobs < 0: - raise ValueError("`top_logprobs` must be a positive value.") + if top_logprobs < 0 and top_logprobs != -1: + raise ValueError( + "`top_logprobs` must be a positive value or -1.") - if top_logprobs > 0 and not data.get("logprobs"): + if (top_logprobs == -1 + or top_logprobs > 0) and not data.get("logprobs"): raise ValueError( "when using `top_logprobs`, `logprobs` must be set to true." ) From 37970105fed95d58677f0a4635cb253a71e8817c Mon Sep 17 00:00:00 2001 From: Jee Jee Li Date: Thu, 18 Sep 2025 19:04:21 +0800 Subject: [PATCH 100/518] [Model] Improve Pooling Model (#25149) Signed-off-by: Jee Jee Li --- vllm/model_executor/layers/pooler.py | 12 ++++++------ vllm/v1/worker/gpu_model_runner.py | 1 + 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/vllm/model_executor/layers/pooler.py b/vllm/model_executor/layers/pooler.py index b571a8f86699..4a97438b1bb2 100644 --- a/vllm/model_executor/layers/pooler.py +++ b/vllm/model_executor/layers/pooler.py @@ -12,8 +12,9 @@ import torch.nn.functional as F from transformers import PretrainedConfig -from vllm.config import ModelConfig, PoolerConfig +from vllm.config import ModelConfig, PoolerConfig, get_current_vllm_config from vllm.logger import init_logger +from vllm.model_executor.models.adapters import _load_st_projector from vllm.pooling_params import PoolingParams from vllm.sequence import PoolerOutput, PoolingSequenceGroupOutput from vllm.tasks import PoolingTask @@ -377,7 +378,6 @@ def __init__(self, *, static_num_labels: bool = True) -> None: super().__init__() if static_num_labels: - from vllm.config import get_current_vllm_config vllm_config = get_current_vllm_config() self.num_labels = getattr(vllm_config.model_config.hf_config, "num_labels", 0) @@ -427,8 +427,6 @@ def __init__(self) -> None: super().__init__(activation=PoolerNormalize()) # Load ST projector if available - from vllm.config import get_current_vllm_config - from vllm.model_executor.models.adapters import _load_st_projector vllm_config = get_current_vllm_config() self.projector: Optional[nn.Module] = _load_st_projector( @@ -489,7 +487,6 @@ class RewardPoolerHead(PoolerHead): def __init__(self) -> None: super().__init__(activation=PoolerClassify(static_num_labels=False)) - from vllm.config import get_current_vllm_config vllm_config = get_current_vllm_config() self.head_dtype = vllm_config.model_config.head_dtype @@ -638,7 +635,6 @@ def __init__( ) -> None: super().__init__() - from vllm.config import get_current_vllm_config vllm_config = get_current_vllm_config() self.pooling = pooling @@ -730,3 +726,7 @@ def forward( offset += num_items return PoolerOutput(outputs) + + def extra_repr(self) -> str: + s = f"supported_task={self.get_supported_tasks()}" + return s diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index e8ad9c2fca07..2e67984cb432 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -3151,6 +3151,7 @@ def _dummy_pooler_run_task( model = cast(VllmModelForPooling, self.get_model()) dummy_pooling_params = PoolingParams(task=task) + dummy_pooling_params.verify(task=task, model_config=self.model_config) to_update = model.pooler.get_pooling_updates(task) to_update.apply(dummy_pooling_params) From 8ed039d52775aaee4a61663dd5d8c840f5eebd15 Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Thu, 18 Sep 2025 12:24:27 +0100 Subject: [PATCH 101/518] Move `StructuredOutputsConfig` from `config/__init__.py` to `config/structured_outputs.py` (#25153) Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- vllm/config/__init__.py | 61 +---------------------------- vllm/config/structured_outputs.py | 64 +++++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+), 60 deletions(-) create mode 100644 vllm/config/structured_outputs.py diff --git a/vllm/config/__init__.py b/vllm/config/__init__.py index 9a1c5f0b0d45..69ab5712d404 100644 --- a/vllm/config/__init__.py +++ b/vllm/config/__init__.py @@ -42,6 +42,7 @@ ParallelConfig) from vllm.config.scheduler import SchedulerConfig, SchedulerPolicy from vllm.config.speculative import SpeculativeConfig +from vllm.config.structured_outputs import StructuredOutputsConfig from vllm.config.utils import ConfigType, config from vllm.logger import init_logger from vllm.model_executor.layers.quantization import QuantizationMethods @@ -2277,66 +2278,6 @@ def get_served_model_name(model: str, return served_model_name -StructuredOutputsBackend = Literal["auto", "xgrammar", "guidance", "outlines", - "lm-format-enforcer"] - - -@config -@dataclass -class StructuredOutputsConfig: - """Dataclass which contains structured outputs config for the engine.""" - - backend: StructuredOutputsBackend = "auto" - """Which engine will be used for structured outputs (e.g. JSON schema, - regex, etc) by default. With "auto", we will make opinionated choices - based on request contents and what the backend libraries currently support, - so the behavior is subject to change in each release.""" - - disable_fallback: bool = False - """If `True`, vLLM will not fallback to a different backend on error.""" - - disable_any_whitespace: bool = False - """If `True`, the model will not generate any whitespace during structured - outputs. This is only supported for xgrammar and guidance backends.""" - - disable_additional_properties: bool = False - """If `True`, the `guidance` backend will not use `additionalProperties` - in the JSON schema. This is only supported for the `guidance` backend and - is used to better align its behaviour with `outlines` and `xgrammar`.""" - - reasoning_parser: str = "" - """Select the reasoning parser depending on the model that you're using. - This is used to parse the reasoning content into OpenAI API format.""" - - def compute_hash(self) -> str: - """ - WARNING: Whenever a new field is added to this config, - ensure that it is included in the factors list if - it affects the computation graph. - - Provide a hash that uniquely identifies all the configs - that affect the structure of the computation - graph from input ids/embeddings to the final hidden states, - excluding anything before input ids/embeddings and after - the final hidden states. - """ - # no factors to consider. - # this config will not affect the computation graph. - factors: list[Any] = [] - hash_str = hashlib.md5(str(factors).encode(), - usedforsecurity=False).hexdigest() - return hash_str - - def __post_init__(self): - if (self.disable_any_whitespace - and self.backend not in ("xgrammar", "guidance")): - raise ValueError("disable_any_whitespace is only supported for " - "xgrammar and guidance backends.") - if (self.disable_additional_properties and self.backend != "guidance"): - raise ValueError("disable_additional_properties is only supported " - "for the guidance backend.") - - DetailedTraceModules = Literal["model", "worker", "all"] diff --git a/vllm/config/structured_outputs.py b/vllm/config/structured_outputs.py new file mode 100644 index 000000000000..b1f14294510f --- /dev/null +++ b/vllm/config/structured_outputs.py @@ -0,0 +1,64 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project + +import hashlib +from typing import Any, Literal + +from pydantic.dataclasses import dataclass + +from vllm.config.utils import config + +StructuredOutputsBackend = Literal["auto", "xgrammar", "guidance", "outlines", + "lm-format-enforcer"] + + +@config +@dataclass +class StructuredOutputsConfig: + """Dataclass which contains structured outputs config for the engine.""" + + backend: StructuredOutputsBackend = "auto" + """Which engine will be used for structured outputs (e.g. JSON schema, + regex, etc) by default. With "auto", we will make opinionated choices + based on request contents and what the backend libraries currently support, + so the behavior is subject to change in each release.""" + disable_fallback: bool = False + """If `True`, vLLM will not fallback to a different backend on error.""" + disable_any_whitespace: bool = False + """If `True`, the model will not generate any whitespace during structured + outputs. This is only supported for xgrammar and guidance backends.""" + disable_additional_properties: bool = False + """If `True`, the `guidance` backend will not use `additionalProperties` + in the JSON schema. This is only supported for the `guidance` backend and + is used to better align its behaviour with `outlines` and `xgrammar`.""" + reasoning_parser: str = "" + """Select the reasoning parser depending on the model that you're using. + This is used to parse the reasoning content into OpenAI API format.""" + + def compute_hash(self) -> str: + """ + WARNING: Whenever a new field is added to this config, + ensure that it is included in the factors list if + it affects the computation graph. + + Provide a hash that uniquely identifies all the configs + that affect the structure of the computation + graph from input ids/embeddings to the final hidden states, + excluding anything before input ids/embeddings and after + the final hidden states. + """ + # no factors to consider. + # this config will not affect the computation graph. + factors: list[Any] = [] + hash_str = hashlib.md5(str(factors).encode(), + usedforsecurity=False).hexdigest() + return hash_str + + def __post_init__(self): + if (self.disable_any_whitespace + and self.backend not in ("xgrammar", "guidance")): + raise ValueError("disable_any_whitespace is only supported for " + "xgrammar and guidance backends.") + if (self.disable_additional_properties and self.backend != "guidance"): + raise ValueError("disable_additional_properties is only supported " + "for the guidance backend.") From eaffe4486cb1d7edf884e6e254cab33fc397e308 Mon Sep 17 00:00:00 2001 From: Kay Yan Date: Thu, 18 Sep 2025 19:36:47 +0800 Subject: [PATCH 102/518] [Docs] Fix pooling-params doc references in openai_compatible_server.md (#24939) --- docs/api/README.md | 1 - docs/serving/openai_compatible_server.md | 20 ++++++++++++-------- vllm/pooling_params.py | 20 ++++++++++++++------ 3 files changed, 26 insertions(+), 15 deletions(-) diff --git a/docs/api/README.md b/docs/api/README.md index 148211756480..86e310f567dd 100644 --- a/docs/api/README.md +++ b/docs/api/README.md @@ -46,7 +46,6 @@ Engine classes for offline and online inference. Inference parameters for vLLM APIs. [](){ #sampling-params } -[](){ #pooling-params } - [vllm.SamplingParams][] - [vllm.PoolingParams][] diff --git a/docs/serving/openai_compatible_server.md b/docs/serving/openai_compatible_server.md index bc52d02a50bd..bac3f6c1fe90 100644 --- a/docs/serving/openai_compatible_server.md +++ b/docs/serving/openai_compatible_server.md @@ -317,10 +317,11 @@ Full example: Date: Thu, 18 Sep 2025 20:37:08 +0900 Subject: [PATCH 103/518] [Docs] add the parallel sampling usage in LLMEngine and AsyncLLM (#24222) --- vllm/sampling_params.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/vllm/sampling_params.py b/vllm/sampling_params.py index 0a01cb0260ae..efe70d019ccc 100644 --- a/vllm/sampling_params.py +++ b/vllm/sampling_params.py @@ -81,7 +81,13 @@ class SamplingParams( """ n: int = 1 - """Number of output sequences to return for the given prompt.""" + """Number of outputs to return for the given prompt request. + + NOTE: + `AsyncLLM` streams outputs by default. When `n > 1`, all `n` outputs + are generated and streamed cumulatively per request. To see all `n` + outputs upon completion, use `output_kind=RequestOutputKind.FINAL_ONLY` + in `SamplingParams`.""" best_of: Optional[int] = None """Number of output sequences that are generated from the prompt. From these `best_of` sequences, the top `n` sequences are returned. `best_of` From 5a33ae9a3faae79cad9d2659862fcd8d86483659 Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Thu, 18 Sep 2025 12:41:41 +0100 Subject: [PATCH 104/518] Fix forward reference warning in documentation (#25150) Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- vllm/engine/async_timeout.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/vllm/engine/async_timeout.py b/vllm/engine/async_timeout.py index 28a023a71ef5..3b9c055160c1 100644 --- a/vllm/engine/async_timeout.py +++ b/vllm/engine/async_timeout.py @@ -16,19 +16,6 @@ from asyncio import timeout as asyncio_timeout else: - def asyncio_timeout(delay: Optional[float]) -> "Timeout": - """timeout context manager. - Useful in cases when you want to apply timeout logic around block - of code or in cases when asyncio.wait_for is not suitable. For example: - >>> async with timeout(0.001): - ... async with aiohttp.get('https://github.com') as r: - ... await r.text() - delay - value in seconds or None to disable timeout logic - """ - loop = asyncio.get_running_loop() - deadline = loop.time() + delay if delay is not None else None - return Timeout(deadline, loop) - class _State(enum.Enum): INIT = "INIT" ENTER = "ENTER" @@ -171,3 +158,16 @@ def _on_timeout(self, task: "Optional[asyncio.Task[Any]]") -> None: self._state = _State.TIMEOUT # drop the reference early self._timeout_handler = None + + def asyncio_timeout(delay: Optional[float]) -> Timeout: + """timeout context manager. + Useful in cases when you want to apply timeout logic around block + of code or in cases when asyncio.wait_for is not suitable. For example: + >>> async with timeout(0.001): + ... async with aiohttp.get('https://github.com') as r: + ... await r.text() + delay - value in seconds or None to disable timeout logic + """ + loop = asyncio.get_running_loop() + deadline = loop.time() + delay if delay is not None else None + return Timeout(deadline, loop) From 3ed1ec4af25a9cb7dcfea74b839864fc3c8ba09d Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Thu, 18 Sep 2025 13:06:28 +0100 Subject: [PATCH 105/518] Fix `validate-config` pre-commit check (#25157) Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- .pre-commit-config.yaml | 4 +--- tools/validate_config.py | 23 ++++++++++++++++------- vllm/config/__init__.py | 2 ++ 3 files changed, 19 insertions(+), 10 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c16bdeeecd07..13ad3af97d83 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -164,9 +164,7 @@ repos: name: Validate configuration has default values and that each field has a docstring entry: python tools/validate_config.py language: python - types: [python] - pass_filenames: true - files: vllm/config.py|tests/test_config.py|vllm/entrypoints/openai/cli_args.py + additional_dependencies: [regex] # Keep `suggestion` last - id: suggestion name: Suggestion diff --git a/tools/validate_config.py b/tools/validate_config.py index 8b1e955c653d..f6439fa9ada5 100644 --- a/tools/validate_config.py +++ b/tools/validate_config.py @@ -9,6 +9,8 @@ import inspect import sys +import regex as re + def get_attr_docs(cls_node: ast.ClassDef) -> dict[str, str]: """ @@ -88,11 +90,12 @@ def validate_class(class_node: ast.ClassDef): for stmt in class_node.body: # A field is defined as a class variable that has a type annotation. if isinstance(stmt, ast.AnnAssign): - # Skip ClassVar + # Skip ClassVar and InitVar # see https://docs.python.org/3/library/dataclasses.html#class-variables - if isinstance(stmt.annotation, ast.Subscript) and isinstance( - stmt.annotation.value, - ast.Name) and stmt.annotation.value.id == "ClassVar": + # and https://docs.python.org/3/library/dataclasses.html#init-only-variables + if (isinstance(stmt.annotation, ast.Subscript) + and isinstance(stmt.annotation.value, ast.Name) + and stmt.annotation.value.id in {"ClassVar", "InitVar"}): continue if isinstance(stmt.target, ast.Name): @@ -132,7 +135,7 @@ def validate_ast(tree: ast.stmt): def validate_file(file_path: str): try: - print(f"validating {file_path} config dataclasses ", end="") + print(f"Validating {file_path} config dataclasses ", end="") with open(file_path, encoding="utf-8") as f: source = f.read() @@ -140,7 +143,7 @@ def validate_file(file_path: str): validate_ast(tree) except ValueError as e: print(e) - SystemExit(2) + raise SystemExit(1) from e else: print("✅") @@ -151,7 +154,13 @@ def fail(message: str, node: ast.stmt): def main(): for filename in sys.argv[1:]: - validate_file(filename) + # Only run for Python files in vllm/ or tests/ + if not re.match(r"^(vllm|tests)/.*\.py$", filename): + continue + # Only run if the file contains @config + with open(filename, encoding="utf-8") as f: + if "@config" in f.read(): + validate_file(filename) if __name__ == "__main__": diff --git a/vllm/config/__init__.py b/vllm/config/__init__.py index 69ab5712d404..25daca00c02d 100644 --- a/vllm/config/__init__.py +++ b/vllm/config/__init__.py @@ -450,6 +450,8 @@ class ModelConfig: # Multimodal config and init vars multimodal_config: Optional[MultiModalConfig] = None + """Configuration for multimodal model. If `None`, this will be inferred + from the architecture of `self.model`.""" limit_mm_per_prompt: InitVar[Optional[dict[str, int]]] = None media_io_kwargs: InitVar[Optional[dict[str, dict[str, Any]]]] = None mm_processor_kwargs: InitVar[Optional[dict[str, Any]]] = None From 66072b36dbf1707440ff43d57273d9e9974349d7 Mon Sep 17 00:00:00 2001 From: Asaf Joseph Gardin <39553475+Josephasafg@users.noreply.github.com> Date: Thu, 18 Sep 2025 15:21:17 +0300 Subject: [PATCH 106/518] [Bugfix][Mamba] - Fix Conv State Kernel FP32 Support (#24883) Signed-off-by: asafg <39553475+Josephasafg@users.noreply.github.com> --- tests/models/language/generation/test_hybrid.py | 9 ++++++--- vllm/model_executor/layers/mamba/ops/causal_conv1d.py | 10 ++++++++-- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/tests/models/language/generation/test_hybrid.py b/tests/models/language/generation/test_hybrid.py index d0e42062099e..206ad1352e06 100644 --- a/tests/models/language/generation/test_hybrid.py +++ b/tests/models/language/generation/test_hybrid.py @@ -418,7 +418,9 @@ def test_full_cuda_graph( @pytest.mark.parametrize("model", FP32_STATE_MODELS) @pytest.mark.parametrize("max_tokens", [64]) @pytest.mark.parametrize("num_logprobs", [5]) -def test_fp32_state( +@pytest.mark.parametrize("cache_dtype_param", + ["mamba_ssm_cache_dtype", "mamba_cache_dtype"]) +def test_fp32_cache_state( hf_runner, vllm_runner, example_prompts, @@ -426,6 +428,7 @@ def test_fp32_state( model: str, max_tokens: int, num_logprobs: int, + cache_dtype_param: str, ) -> None: try: @@ -443,13 +446,13 @@ def test_fp32_state( m.setenv("VLLM_USE_V1", "0") with vllm_runner(model, max_num_seqs=MAX_NUM_SEQS, - mamba_ssm_cache_dtype="float32") as vllm_model: + **{cache_dtype_param: "float32"}) as vllm_model: vllm_v0_outputs = vllm_model.generate_greedy_logprobs( example_prompts, max_tokens, num_logprobs) with vllm_runner(model, max_num_seqs=MAX_NUM_SEQS, - mamba_ssm_cache_dtype="float32") as vllm_model: + **{cache_dtype_param: "float32"}) as vllm_model: vllm_v1_outputs = vllm_model.generate_greedy_logprobs( example_prompts, max_tokens, num_logprobs) diff --git a/vllm/model_executor/layers/mamba/ops/causal_conv1d.py b/vllm/model_executor/layers/mamba/ops/causal_conv1d.py index 2a88fa661da0..8cfd0962c5bf 100644 --- a/vllm/model_executor/layers/mamba/ops/causal_conv1d.py +++ b/vllm/model_executor/layers/mamba/ops/causal_conv1d.py @@ -415,6 +415,9 @@ def causal_conv1d_fn( activation = "silu" args = None + # Store original dtype to cast back at the end + original_x_dtype = x.dtype + x = x.to(conv_states.dtype) out = torch.empty_like(x) if metadata is not None: cu_seqlen = metadata.cu_seqlen @@ -613,7 +616,7 @@ def grid(META): BLOCK_N=256, num_stages=2, ) - return out + return out.to(original_x_dtype) @triton.jit() @@ -973,6 +976,9 @@ def causal_conv1d_update( activation = "silu" if activation is True else None elif activation is not None: assert activation in ["silu", "swish"] + + original_x_dtype = x.dtype + x = x.to(conv_state.dtype) unsqueeze = query_start_loc is None and x.dim() == 2 if unsqueeze: # make it (batch, dim, seqlen) with seqlen == 1 @@ -1081,4 +1087,4 @@ def grid(META): ) if unsqueeze: out = out.squeeze(-1) - return out + return out.to(original_x_dtype) From 21da73343ad35f756e053ba4155dafb05229b0c5 Mon Sep 17 00:00:00 2001 From: Roger Wang Date: Thu, 18 Sep 2025 05:43:33 -0700 Subject: [PATCH 107/518] [Misc] Clean up flags in `vllm bench serve` (#25138) Signed-off-by: Roger Wang --- docs/contributing/benchmarks.md | 3 -- tests/benchmarks/test_serve_cli.py | 2 +- vllm/benchmarks/datasets.py | 8 ++--- vllm/benchmarks/serve.py | 49 +++++++++++++++++++++--------- 4 files changed, 39 insertions(+), 23 deletions(-) diff --git a/docs/contributing/benchmarks.md b/docs/contributing/benchmarks.md index d04b1d1136a1..2a03ce1dffd6 100644 --- a/docs/contributing/benchmarks.md +++ b/docs/contributing/benchmarks.md @@ -156,7 +156,6 @@ vllm serve Qwen/Qwen2-VL-7B-Instruct ```bash vllm bench serve \ --backend openai-chat \ - --endpoint-type openai-chat \ --model Qwen/Qwen2-VL-7B-Instruct \ --endpoint /v1/chat/completions \ --dataset-name hf \ @@ -230,7 +229,6 @@ vllm serve Qwen/Qwen2-VL-7B-Instruct ```bash vllm bench serve \ --backend openai-chat \ - --endpoint-type openai-chat \ --model Qwen/Qwen2-VL-7B-Instruct \ --endpoint /v1/chat/completions \ --dataset-name hf \ @@ -245,7 +243,6 @@ vllm bench serve \ ```bash vllm bench serve \ --backend openai-chat \ - --endpoint-type openai-chat \ --model Qwen/Qwen2-VL-7B-Instruct \ --endpoint /v1/chat/completions \ --dataset-name hf \ diff --git a/tests/benchmarks/test_serve_cli.py b/tests/benchmarks/test_serve_cli.py index 5471d6b8e4a5..fafbef5f3718 100644 --- a/tests/benchmarks/test_serve_cli.py +++ b/tests/benchmarks/test_serve_cli.py @@ -68,7 +68,7 @@ def test_bench_serve_chat(server): "5", "--endpoint", "/v1/chat/completions", - "--endpoint-type", + "--backend", "openai-chat", ] result = subprocess.run(command, capture_output=True, text=True) diff --git a/vllm/benchmarks/datasets.py b/vllm/benchmarks/datasets.py index 1cab40802c39..68a937d5750e 100644 --- a/vllm/benchmarks/datasets.py +++ b/vllm/benchmarks/datasets.py @@ -1358,7 +1358,7 @@ def get_samples(args, tokenizer) -> list[SampleRequest]: elif args.dataset_name == "sonnet": dataset = SonnetDataset(dataset_path=args.dataset_path) # For the "sonnet" dataset, formatting depends on the backend. - if args.endpoint_type == "openai-chat": + if args.backend == "openai-chat": input_requests = dataset.sample( num_requests=args.num_prompts, input_len=args.sonnet_input_len, @@ -1462,7 +1462,7 @@ def get_samples(args, tokenizer) -> list[SampleRequest]: "Please consider contributing if you would " "like to add support for additional dataset formats.") - if dataset_class.IS_MULTIMODAL and args.endpoint_type not in [ + if dataset_class.IS_MULTIMODAL and args.backend not in [ "openai-chat", "openai-audio", ]: @@ -1470,7 +1470,7 @@ def get_samples(args, tokenizer) -> list[SampleRequest]: # endpoint-type. raise ValueError( "Multi-modal content is only supported on 'openai-chat' and " - "'openai-audio' endpoint-type.") + "'openai-audio' backends.") input_requests = dataset_class( dataset_path=args.dataset_path, dataset_subset=args.hf_subset, @@ -1563,7 +1563,7 @@ def get_samples(args, tokenizer) -> list[SampleRequest]: try: # Enforce endpoint compatibility for multimodal datasets. - if args.dataset_name == "random-mm" and args.endpoint_type not in [ + if args.dataset_name == "random-mm" and args.backend not in [ "openai-chat"]: raise ValueError( "Multi-modal content (images) is only supported on " diff --git a/vllm/benchmarks/serve.py b/vllm/benchmarks/serve.py index d8784340eba1..7382782f1165 100644 --- a/vllm/benchmarks/serve.py +++ b/vllm/benchmarks/serve.py @@ -8,8 +8,8 @@ On the client side, run: vllm bench serve \ - --endpoint-type \ - --label \ + --backend \ + --label \ --model \ --dataset-name \ --request-rate \ @@ -52,6 +52,21 @@ and (shutil.which("gnuplot") is not None)) +# TODO: Remove this in v0.11.0 +class DeprecatedEndpointTypeAction(argparse.Action): + """Argparse action for the deprecated --endpoint-type flag. + """ + + def __call__(self, _, namespace, values, option_string=None): + warnings.warn( + "'--endpoint-type' is deprecated and will be removed in v0.11.0. " + "Please use '--backend' instead or remove this argument if you " + "have already set it.", + stacklevel=1, + ) + setattr(namespace, self.dest, values) + + class TaskType(Enum): GENERATION = "generation" EMBEDDING = "embedding" @@ -470,7 +485,7 @@ async def benchmark( else: request_func = ASYNC_REQUEST_FUNCS[endpoint_type] else: - raise ValueError(f"Unknown endpoint_type: {endpoint_type}") + raise ValueError(f"Unknown backend: {endpoint_type}") # Reuses connections across requests to reduce TLS handshake overhead. connector = aiohttp.TCPConnector( @@ -850,24 +865,28 @@ def save_to_pytorch_benchmark_format(args: argparse.Namespace, def add_cli_args(parser: argparse.ArgumentParser): add_dataset_parser(parser) - parser.add_argument( - "--endpoint-type", - type=str, - default="openai", - choices=list(ASYNC_REQUEST_FUNCS.keys()), - ) parser.add_argument( "--label", type=str, default=None, help="The label (prefix) of the benchmark results. If not specified, " - "the endpoint type will be used as the label.", + "the value of '--backend' will be used as the label.", ) parser.add_argument( "--backend", type=str, - default="vllm", + default="openai", + choices=list(ASYNC_REQUEST_FUNCS.keys()), + help="The type of backend or endpoint to use for the benchmark." + ) + parser.add_argument( + "--endpoint-type", + type=str, + default=None, choices=list(ASYNC_REQUEST_FUNCS.keys()), + action=DeprecatedEndpointTypeAction, + help="'--endpoint-type' is deprecated and will be removed in v0.11.0. " + "Please use '--backend' instead.", ) parser.add_argument( "--base-url", @@ -1165,7 +1184,6 @@ async def main_async(args: argparse.Namespace) -> dict[str, Any]: raise ValueError( "For exponential ramp-up, the start RPS cannot be 0.") - endpoint_type = args.endpoint_type label = args.label model_id = args.model model_name = args.served_model_name @@ -1228,7 +1246,7 @@ async def main_async(args: argparse.Namespace) -> dict[str, Any]: gc.freeze() benchmark_result = await benchmark( - endpoint_type=args.endpoint_type, + endpoint_type=args.backend, api_url=api_url, base_url=base_url, model_id=model_id, @@ -1262,7 +1280,8 @@ async def main_async(args: argparse.Namespace) -> dict[str, Any]: # Setup current_dt = datetime.now().strftime("%Y%m%d-%H%M%S") result_json["date"] = current_dt - result_json["endpoint_type"] = args.endpoint_type + result_json["endpoint_type"] = args.backend # for backward compatibility + result_json["backend"] = args.backend result_json["label"] = label result_json["model_id"] = model_id result_json["tokenizer_id"] = tokenizer_id @@ -1312,7 +1331,7 @@ async def main_async(args: argparse.Namespace) -> dict[str, Any]: base_model_id = model_id.split("/")[-1] max_concurrency_str = (f"-concurrency{args.max_concurrency}" if args.max_concurrency is not None else "") - label = label or endpoint_type + label = label or args.backend if args.ramp_up_strategy is not None: file_name = f"{label}-ramp-up-{args.ramp_up_strategy}-{args.ramp_up_start_rps}qps-{args.ramp_up_end_rps}qps{max_concurrency_str}-{base_model_id}-{current_dt}.json" # noqa else: From 470484a4f503d4768008c2f5a8dc828dc90633b4 Mon Sep 17 00:00:00 2001 From: Shanshan Shen <467638484@qq.com> Date: Thu, 18 Sep 2025 20:44:31 +0800 Subject: [PATCH 108/518] [Structured Output][Refactor] Move `apply_grammar_bitmask()` method from `ModelRunner` to structured output utils (#21999) Signed-off-by: shen-shanshan <467638484@qq.com> --- vllm/v1/structured_output/utils.py | 80 ++++++++++++++++++++++++++++++ vllm/v1/worker/gpu_model_runner.py | 75 ++-------------------------- 2 files changed, 84 insertions(+), 71 deletions(-) diff --git a/vllm/v1/structured_output/utils.py b/vllm/v1/structured_output/utils.py index 953185a8fc31..127c8876525b 100644 --- a/vllm/v1/structured_output/utils.py +++ b/vllm/v1/structured_output/utils.py @@ -8,7 +8,9 @@ import os from typing import TYPE_CHECKING +import numpy as np import regex as re +import torch from cachetools import LRUCache from diskcache import Cache @@ -20,9 +22,13 @@ import outlines_core as oc import transformers.file_utils as file_utils import transformers.models.gpt2.tokenization_gpt2 as tokenization_gpt2 + import xgrammar as xgr from vllm.transformers_utils.tokenizer import AnyTokenizer + from vllm.v1.core.sched.output import SchedulerOutput + from vllm.v1.worker.gpu_input_batch import InputBatch else: + xgr = LazyLoader("xgr", globals(), "xgrammar") oc = LazyLoader("oc", globals(), "outlines_core") file_utils = LazyLoader("file_utils", globals(), "transformers.file_utils") tokenization_gpt2 = LazyLoader( @@ -36,6 +42,80 @@ CACHE = None +def apply_grammar_bitmask( + scheduler_output: SchedulerOutput, + input_batch: InputBatch, + logits: torch.Tensor, + device: torch.device, +) -> None: + """ + Apply grammar bitmask to output logits of the model with xgrammar function. + + Args: + scheduler_output (SchedulerOutput): The result of engine scheduling. + input_batch (InputBatch): The input of model runner. + logits (torch.Tensor): The output logits of model forward. + device (torch.device): The device that model runner running on. + """ + grammar_bitmask = scheduler_output.grammar_bitmask + if grammar_bitmask is None: + return + + # We receive the structured output bitmask from the scheduler, + # compacted to contain bitmasks only for structured output requests. + # The order of the requests in the bitmask is not guaranteed to be the + # same as the order of the requests in the gpu runner's batch. We need + # to sort the bitmask to match the order of the requests used here. + + # Get the batch indices of the structured output requests. + # Keep track of the number of speculative tokens scheduled for every + # request in the batch, as the logit indices are offset by this amount. + struct_out_req_batch_indices: dict[str, int] = {} + cumulative_offset = 0 + seq = sorted(input_batch.req_id_to_index.items(), key=lambda x: x[1]) + for req_id, batch_index in seq: + logit_index = batch_index + cumulative_offset + cumulative_offset += len( + scheduler_output.scheduled_spec_decode_tokens.get(req_id, [])) + if req_id in scheduler_output.structured_output_request_ids: + struct_out_req_batch_indices[req_id] = logit_index + + out_indices = [] + + # Reorder the bitmask to match the order of the requests in the batch. + sorted_bitmask = np.full(shape=(logits.shape[0], grammar_bitmask.shape[1]), + fill_value=-1, + dtype=grammar_bitmask.dtype) + cumulative_index = 0 + seq = sorted(scheduler_output.structured_output_request_ids.items(), + key=lambda x: x[1]) + for req_id, _ in seq: + logit_index = struct_out_req_batch_indices[req_id] + num_spec_tokens = len( + scheduler_output.scheduled_spec_decode_tokens.get(req_id, [])) + for i in range(1 + num_spec_tokens): + sorted_bitmask[logit_index + i] = \ + grammar_bitmask[cumulative_index + i] + out_indices.append(logit_index + i) + cumulative_index += 1 + num_spec_tokens + grammar_bitmask = sorted_bitmask + + # If the length of out indices and the logits have the same shape + # we don't need to pass indices to the kernel, + # since the bitmask is already aligned with the logits. + skip_out_indices = len(out_indices) == logits.shape[0] + + # Serialization of np.ndarray is much more efficient than a tensor, + # so we receive it in that format. + grammar_bitmask = torch.from_numpy(grammar_bitmask).contiguous() + + xgr.apply_token_bitmask_inplace( + logits, + grammar_bitmask.to(device, non_blocking=True), + indices=out_indices if not skip_out_indices else None, + ) + + class OutlinesVocabulary: """ Wrapper class for `outlines_core.Vocabulary`, diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index 2e67984cb432..4873b586724e 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -54,7 +54,7 @@ from vllm.sequence import IntermediateTensors, PoolerOutput from vllm.tasks import GenerationTask, PoolingTask, SupportedTask from vllm.utils import (STR_DTYPE_TO_TORCH_DTYPE, DeviceMemoryProfiler, - GiB_bytes, LazyLoader, check_use_alibi, get_dtype_size, + GiB_bytes, check_use_alibi, get_dtype_size, is_pin_memory_available, round_up, supports_dynamo) from vllm.v1.attention.backends.flash_attn import AttentionMetadata from vllm.v1.attention.backends.gdn_attn import GDNAttentionMetadataBuilder @@ -85,6 +85,7 @@ from vllm.v1.spec_decode.medusa import MedusaProposer from vllm.v1.spec_decode.metadata import SpecDecodeMetadata from vllm.v1.spec_decode.ngram_proposer import NgramProposer +from vllm.v1.structured_output.utils import apply_grammar_bitmask from vllm.v1.utils import CpuGpuBuffer, record_function_or_nullcontext from vllm.v1.worker.gpu_input_batch import CachedRequestState, InputBatch from vllm.v1.worker.gpu_ubatch_wrapper import UBatchWrapper @@ -101,12 +102,8 @@ scatter_mm_placeholders) if TYPE_CHECKING: - import xgrammar as xgr - from vllm.model_executor.model_loader.tensorizer import TensorizerConfig from vllm.v1.core.sched.output import SchedulerOutput -else: - xgr = LazyLoader("xgr", globals(), "xgrammar") logger = init_logger(__name__) @@ -1617,71 +1614,6 @@ def get_supported_tasks(self) -> tuple[SupportedTask, ...]: return tuple(tasks) - def apply_grammar_bitmask( - self, - scheduler_output: "SchedulerOutput", - logits: torch.Tensor, - ): - grammar_bitmask = scheduler_output.grammar_bitmask - if grammar_bitmask is None: - return - - # We receive the structured output bitmask from the scheduler, - # compacted to contain bitmasks only for structured output requests. - # The order of the requests in the bitmask is not guaranteed to be the - # same as the order of the requests in the gpu runner's batch. We need - # to sort the bitmask to match the order of the requests used here. - - # Get the batch indices of the structured output requests. - # Keep track of the number of speculative tokens scheduled for every - # request in the batch, as the logit indices are offset by this amount. - struct_out_req_batch_indices: dict[str, int] = {} - cumulative_offset = 0 - seq = sorted(self.input_batch.req_id_to_index.items(), - key=lambda x: x[1]) - for req_id, batch_index in seq: - logit_index = batch_index + cumulative_offset - cumulative_offset += len( - scheduler_output.scheduled_spec_decode_tokens.get(req_id, [])) - if req_id in scheduler_output.structured_output_request_ids: - struct_out_req_batch_indices[req_id] = logit_index - - out_indices = [] - - # Reorder the bitmask to match the order of the requests in the batch. - sorted_bitmask = np.full(shape=(logits.shape[0], - grammar_bitmask.shape[1]), - fill_value=-1, - dtype=grammar_bitmask.dtype) - cumulative_index = 0 - seq = sorted(scheduler_output.structured_output_request_ids.items(), - key=lambda x: x[1]) - for req_id, _ in seq: - logit_index = struct_out_req_batch_indices[req_id] - num_spec_tokens = len( - scheduler_output.scheduled_spec_decode_tokens.get(req_id, [])) - for i in range(1 + num_spec_tokens): - sorted_bitmask[logit_index + i] = \ - grammar_bitmask[cumulative_index + i] - out_indices.append(logit_index + i) - cumulative_index += 1 + num_spec_tokens - grammar_bitmask = sorted_bitmask - - # If the length of out indices and the logits have the same shape - # we don't need to pass indices to the kernel, - # since the bitmask is already aligned with the logits. - skip_out_indices = len(out_indices) == logits.shape[0] - - # Serialization of np.ndarray is much more efficient than a tensor, - # so we receive it in that format. - grammar_bitmask = torch.from_numpy(grammar_bitmask).contiguous() - - xgr.apply_token_bitmask_inplace( - logits, - grammar_bitmask.to(self.device, non_blocking=True), - indices=out_indices if not skip_out_indices else None, - ) - def sync_and_slice_intermediate_tensors( self, num_tokens: int, intermediate_tensors: IntermediateTensors, sync_self: bool) -> IntermediateTensors: @@ -2232,7 +2164,8 @@ def execute_model( # Apply structured output bitmasks if present if scheduler_output.grammar_bitmask is not None: - self.apply_grammar_bitmask(scheduler_output, logits) + apply_grammar_bitmask(scheduler_output, self.input_batch, + logits, self.device) with record_function_or_nullcontext("Sample"): sampler_output = self._sample(logits, spec_decode_metadata) From fbd6523ac00082c398dc8126434cede595169609 Mon Sep 17 00:00:00 2001 From: Michael Goin Date: Thu, 18 Sep 2025 08:53:45 -0400 Subject: [PATCH 109/518] Refactor dense FP8 tensor/channel/block utils and add CT FP8 block (#21404) --- vllm/model_executor/layers/linear.py | 14 +- .../compressed_tensors/compressed_tensors.py | 68 ++--- .../schemes/compressed_tensors_w8a8_fp8.py | 191 ++++++------- .../model_executor/layers/quantization/fp8.py | 267 ++++++------------ .../layers/quantization/utils/fp8_utils.py | 220 +++++++++++++++ 5 files changed, 442 insertions(+), 318 deletions(-) diff --git a/vllm/model_executor/layers/linear.py b/vllm/model_executor/layers/linear.py index cd0513652097..5bf96398bc71 100644 --- a/vllm/model_executor/layers/linear.py +++ b/vllm/model_executor/layers/linear.py @@ -805,12 +805,10 @@ def weight_loader_v2(self, assert loaded_shard_id < len(self.output_sizes) if isinstance(param, BlockQuantScaleParameter): - from vllm.model_executor.layers.quantization.fp8 import ( - Fp8LinearMethod, Fp8MoEMethod) assert self.quant_method is not None - assert isinstance(self.quant_method, - (Fp8LinearMethod, Fp8MoEMethod)) - weight_block_size = self.quant_method.quant_config.weight_block_size + # Assume the weight block size has been set by quant method + assert hasattr(self, "weight_block_size") + weight_block_size = self.weight_block_size assert weight_block_size is not None block_n, _ = weight_block_size[0], weight_block_size[1] shard_offset = ( @@ -989,8 +987,10 @@ def weight_loader_v2(self, # Note(simon): This is needed for Qwen3's fp8 quantization. if isinstance(param, BlockQuantScaleParameter): assert self.quant_method is not None - assert hasattr(self.quant_method, "quant_config") - weight_block_size = self.quant_method.quant_config.weight_block_size + # Assume the weight block size has been set by quant method + assert hasattr(self, "weight_block_size") + weight_block_size = self.weight_block_size + assert weight_block_size is not None block_n, _ = weight_block_size[0], weight_block_size[1] shard_offset = (shard_offset + block_n - 1) // block_n shard_size = (shard_size + block_n - 1) // block_n diff --git a/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors.py b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors.py index b56a69131177..d6550dd16892 100644 --- a/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors.py +++ b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors.py @@ -12,7 +12,6 @@ QuantizationStrategy, QuantizationType) from compressed_tensors.transform import TransformConfig -from pydantic import BaseModel import vllm.envs as envs from vllm.logger import init_logger @@ -268,7 +267,8 @@ def _check_scheme_supported(self, else: return False - def _is_fp4a4_nvfp4(self, weight_quant: BaseModel, input_quant: BaseModel): + def _is_fp4a4_nvfp4(self, weight_quant: QuantizationArgs, + input_quant: QuantizationArgs): if weight_quant is None or input_quant is None: return False @@ -288,8 +288,8 @@ def _is_fp4a4_nvfp4(self, weight_quant: BaseModel, input_quant: BaseModel): return (is_tensor_group_quant and is_float_type and is_4_bits and is_group_size_16 and is_symmetric) - def _is_fp4a16_nvfp4(self, weight_quant: BaseModel, - input_quant: BaseModel): + def _is_fp4a16_nvfp4(self, weight_quant: QuantizationArgs, + input_quant: QuantizationArgs): is_weight_only = weight_quant is not None and input_quant is None is_tensor_group_quant = ( @@ -303,8 +303,8 @@ def _is_fp4a16_nvfp4(self, weight_quant: BaseModel, return (is_weight_only and is_tensor_group_quant and is_float_type and is_4_bits and is_group_size_16 and is_symmetric) - def _is_static_tensor_w8a8(self, weight_quant: BaseModel, - input_quant: BaseModel) -> bool: + def _is_static_tensor_w8a8(self, weight_quant: QuantizationArgs, + input_quant: QuantizationArgs) -> bool: is_8_bits = weight_quant.num_bits == input_quant.num_bits == 8 weight_strategy = ( weight_quant.strategy == QuantizationStrategy.TENSOR.value @@ -317,8 +317,8 @@ def _is_static_tensor_w8a8(self, weight_quant: BaseModel, # Only symmetric weight quantization supported. return is_8_bits and is_tensor and weight_quant.symmetric and is_static - def _is_dynamic_token_w8a8(self, weight_quant: BaseModel, - input_quant: BaseModel) -> bool: + def _is_dynamic_token_w8a8(self, weight_quant: QuantizationArgs, + input_quant: QuantizationArgs) -> bool: is_8_bits = weight_quant.num_bits == input_quant.num_bits == 8 weight_strategy = ( weight_quant.strategy == QuantizationStrategy.TENSOR.value @@ -331,8 +331,8 @@ def _is_dynamic_token_w8a8(self, weight_quant: BaseModel, # Only symmetric weight quantization supported. return is_8_bits and is_token and weight_quant.symmetric and is_dynamic - def _is_dynamic_token_w4a8_int(self, weight_quant: BaseModel, - input_quant: BaseModel) -> bool: + def _is_dynamic_token_w4a8_int(self, weight_quant: QuantizationArgs, + input_quant: QuantizationArgs) -> bool: is_weight_4_bits = weight_quant.num_bits == 4 is_activation_8_bits = input_quant.num_bits == 8 weight_strategy = ( @@ -347,8 +347,8 @@ def _is_dynamic_token_w4a8_int(self, weight_quant: BaseModel, return (is_weight_4_bits and is_activation_8_bits and is_token and weight_quant.symmetric and is_dynamic) - def _is_fp8_w8a8(self, weight_quant: BaseModel, - input_quant: BaseModel) -> bool: + def _is_fp8_w8a8(self, weight_quant: QuantizationArgs, + input_quant: QuantizationArgs) -> bool: # Confirm weights and activations quantized. if weight_quant is None or input_quant is None: return False @@ -358,11 +358,12 @@ def _is_fp8_w8a8(self, weight_quant: BaseModel, and input_quant.type == QuantizationType.FLOAT) is_symmetric_weight = weight_quant.symmetric is_static_weight = not weight_quant.dynamic - is_per_tensor_or_channel_weight = (weight_quant.strategy in [ - QuantizationStrategy.TENSOR, QuantizationStrategy.CHANNEL + is_tensor_or_channel_or_block_weight = (weight_quant.strategy in [ + QuantizationStrategy.TENSOR, QuantizationStrategy.CHANNEL, + QuantizationStrategy.BLOCK ]) if not (is_floating_point and is_symmetric_weight and is_static_weight - and is_per_tensor_or_channel_weight): + and is_tensor_or_channel_or_block_weight): return False # Dynamic quantization is always supported if weights supported. @@ -375,8 +376,8 @@ def _is_fp8_w8a8(self, weight_quant: BaseModel, input_quant.strategy == QuantizationStrategy.TENSOR) return is_symmetric_activation and is_per_tensor_activation - def _is_fp8_w4a8(self, weight_quant: BaseModel, - input_quant: BaseModel) -> bool: + def _is_fp8_w4a8(self, weight_quant: QuantizationArgs, + input_quant: QuantizationArgs) -> bool: if not weight_quant or not input_quant: return False is_weight_4_bits = weight_quant.num_bits == 4 @@ -392,24 +393,24 @@ def _is_fp8_w4a8(self, weight_quant: BaseModel, return (is_weight_4_bits and is_activation_8_bits and is_token and is_symmetric and is_dynamic) - def _is_fp8_w4a8_sm90(self, weight_quant: BaseModel, - input_quant: BaseModel) -> bool: + def _is_fp8_w4a8_sm90(self, weight_quant: QuantizationArgs, + input_quant: QuantizationArgs) -> bool: return (self._check_scheme_supported(90, error=False, match_exact=True) and self._is_fp8_w4a8(weight_quant, input_quant)) - def _is_fp8_w8a8_sm90(self, weight_quant: BaseModel, - input_quant: BaseModel) -> bool: + def _is_fp8_w8a8_sm90(self, weight_quant: QuantizationArgs, + input_quant: QuantizationArgs) -> bool: return (self._check_scheme_supported(90, error=False, match_exact=True) and self._is_fp8_w8a8(weight_quant, input_quant)) - def _is_fp8_w8a8_sm100(self, weight_quant: BaseModel, - input_quant: BaseModel) -> bool: + def _is_fp8_w8a8_sm100(self, weight_quant: QuantizationArgs, + input_quant: QuantizationArgs) -> bool: return (self._check_scheme_supported( 100, error=False, match_exact=True) and self._is_fp8_w8a8(weight_quant, input_quant)) - def _is_fp8_w8a16(self, weight_quant: BaseModel, - input_quant: BaseModel) -> bool: + def _is_fp8_w8a16(self, weight_quant: QuantizationArgs, + input_quant: QuantizationArgs) -> bool: # Confirm weights quantized. if weight_quant is None: return False @@ -421,18 +422,19 @@ def _is_fp8_w8a16(self, weight_quant: BaseModel, # Confirm weight scheme is supported. is_symmetric_weight = weight_quant.symmetric is_static_weight = not weight_quant.dynamic - is_per_tensor_or_channel_weight = (weight_quant.strategy in [ - QuantizationStrategy.TENSOR, QuantizationStrategy.CHANNEL + is_tensor_or_channel_or_block_weight = (weight_quant.strategy in [ + QuantizationStrategy.TENSOR, QuantizationStrategy.CHANNEL, + QuantizationStrategy.BLOCK ]) if not (is_symmetric_weight and is_static_weight # noqa: SIM103 - and is_per_tensor_or_channel_weight): + and is_tensor_or_channel_or_block_weight): return False # All conditions satisfied. return True - def _is_wNa16_group_channel(self, weight_quant: BaseModel, - input_quant: BaseModel) -> bool: + def _is_wNa16_group_channel(self, weight_quant: QuantizationArgs, + input_quant: QuantizationArgs) -> bool: input_quant_none = input_quant is None is_channel_group = ( weight_quant.strategy == QuantizationStrategy.CHANNEL.value @@ -443,8 +445,8 @@ def _is_wNa16_group_channel(self, weight_quant: BaseModel, def _get_scheme_from_parts( self, - weight_quant: BaseModel, - input_quant: BaseModel, + weight_quant: QuantizationArgs, + input_quant: QuantizationArgs, format: Optional[str] = None) -> "CompressedTensorsScheme": # use the per-layer format if defined, otherwise, use global format @@ -496,7 +498,7 @@ def _get_scheme_from_parts( CompressedTensorsW8A8Fp8.get_min_capability(), error=False) if is_fp8_w8a8_supported: return CompressedTensorsW8A8Fp8( - strategy=weight_quant.strategy, + weight_quant=weight_quant, is_static_input_scheme=(input_quant and not input_quant.dynamic)) else: diff --git a/vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_fp8.py b/vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_fp8.py index d984e89d9e02..d42ae22c5139 100644 --- a/vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_fp8.py +++ b/vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_fp8.py @@ -4,28 +4,41 @@ from typing import Callable, Optional import torch -from compressed_tensors.quantization import QuantizationStrategy +from compressed_tensors.quantization import (QuantizationArgs, + QuantizationStrategy) from torch.nn import Parameter from vllm.model_executor.layers.quantization.compressed_tensors.schemes import ( CompressedTensorsScheme) +from vllm.model_executor.layers.quantization.utils.fp8_utils import ( + apply_fp8_block_linear, check_aiter_fp8_linear_support, + create_fp8_input_scale, create_fp8_scale_parameter, + create_fp8_weight_parameter, maybe_post_process_fp8_weight_block, + process_fp8_weight_block_strategy, process_fp8_weight_channel_strategy, + process_fp8_weight_tensor_strategy, validate_fp8_block_shape) from vllm.model_executor.layers.quantization.utils.quant_utils import ( GroupShape) from vllm.model_executor.layers.quantization.utils.w8a8_utils import ( - Fp8LinearOp, maybe_create_device_identity, normalize_e4m3fn_to_e4m3fnuz, - requantize_with_max_scale) -from vllm.model_executor.parameter import (ChannelQuantScaleParameter, - ModelWeightParameter, + Fp8LinearOp, cutlass_block_fp8_supported, maybe_create_device_identity) +from vllm.model_executor.parameter import (BlockQuantScaleParameter, + ChannelQuantScaleParameter, PerTensorScaleParameter) -from vllm.platforms import current_platform __all__ = ["CompressedTensorsW8A8Fp8"] +strategy_to_parameter_type = { + QuantizationStrategy.BLOCK: BlockQuantScaleParameter, + QuantizationStrategy.CHANNEL: ChannelQuantScaleParameter, + QuantizationStrategy.TENSOR: PerTensorScaleParameter, +} + class CompressedTensorsW8A8Fp8(CompressedTensorsScheme): - def __init__(self, strategy: str, is_static_input_scheme: bool): - self.strategy = strategy + def __init__(self, weight_quant: QuantizationArgs, + is_static_input_scheme: bool): + self.weight_quant = weight_quant + self.strategy = weight_quant.strategy self.out_dtype = torch.get_default_dtype() self.is_static_input_scheme = is_static_input_scheme self.act_q_group_shape = GroupShape.PER_TENSOR \ @@ -34,120 +47,108 @@ def __init__(self, strategy: str, is_static_input_scheme: bool): act_quant_static=self.is_static_input_scheme, act_quant_group_shape=self.act_q_group_shape) + self.weight_block_size = self.weight_quant.block_structure + self.cutlass_block_fp8_supported = cutlass_block_fp8_supported() + self.use_aiter_and_is_supported = check_aiter_fp8_linear_support() + @classmethod def get_min_capability(cls) -> int: # lovelace and up return 89 - def process_weights_after_loading(self, layer) -> None: - # If per tensor, when we have a fused module (e.g. QKV) with per - # tensor scales (thus N scales being passed to the kernel), - # requantize so we can always run per tensor - if self.strategy == QuantizationStrategy.TENSOR: - max_w_scale, weight = requantize_with_max_scale( - weight=layer.weight, - weight_scale=layer.weight_scale, - logical_widths=layer.logical_widths, - ) - - if current_platform.is_fp8_fnuz(): - input_scale = getattr(layer, 'input_scale', None) - - weight, max_w_scale, input_scale = normalize_e4m3fn_to_e4m3fnuz( - weight=weight, - weight_scale=max_w_scale, - input_scale=input_scale) - if input_scale is not None: - layer.input_scale = Parameter(input_scale, - requires_grad=False) - - layer.weight = Parameter(weight.t(), requires_grad=False) - layer.weight_scale = Parameter(max_w_scale, requires_grad=False) - - # If channelwise, scales are already lined up, so just transpose. - elif self.strategy == QuantizationStrategy.CHANNEL: - weight = layer.weight - - if current_platform.is_fp8_fnuz(): - input_scale = getattr(layer, 'input_scale', None) - - weight, weight_scale, input_scale = \ - normalize_e4m3fn_to_e4m3fnuz( - weight=weight, - weight_scale=layer.weight_scale, - input_scale=input_scale) - if input_scale is not None: - layer.input_scale = Parameter(input_scale, - requires_grad=False) - else: - weight_scale = layer.weight_scale.data - - layer.weight = Parameter(weight.t(), requires_grad=False) - # required by torch.compile to be torch.nn.Parameter - layer.weight_scale = Parameter(weight_scale, requires_grad=False) - - else: - raise ValueError(f"Unknown quantization strategy {self.strategy}") - - # INPUT SCALE - if self.is_static_input_scheme and hasattr(layer, 'input_scale'): - layer.input_scale = Parameter(layer.input_scale.max(), - requires_grad=False) - else: - layer.input_scale = None - def create_weights(self, layer: torch.nn.Module, - output_partition_sizes: list[int], input_size_per_partition: int, - params_dtype: torch.dtype, weight_loader: Callable, - **kwargs): + output_partition_sizes: list[int], input_size: int, + output_size: int, params_dtype: torch.dtype, + weight_loader: Callable, **kwargs): maybe_create_device_identity() output_size_per_partition = sum(output_partition_sizes) layer.logical_widths = output_partition_sizes + layer.weight_block_size = None + + if self.strategy == QuantizationStrategy.BLOCK: + assert self.weight_block_size is not None + layer.weight_block_size = self.weight_block_size + # Validate block quantization shapes + validate_fp8_block_shape(layer, input_size, output_size, + input_size_per_partition, + output_partition_sizes, + self.weight_block_size) # WEIGHT - weight = ModelWeightParameter(data=torch.empty( - output_size_per_partition, - input_size_per_partition, - dtype=torch.float8_e4m3fn), - input_dim=1, - output_dim=0, - weight_loader=weight_loader) + weight = create_fp8_weight_parameter(output_size_per_partition, + input_size_per_partition, + weight_loader) layer.register_parameter("weight", weight) # WEIGHT SCALE - # TODO: update create_xxx_parameter functions to return - # the newly added parameters - if self.strategy == QuantizationStrategy.CHANNEL: - weight_scale = ChannelQuantScaleParameter( - data=torch.empty((sum(output_partition_sizes), 1), - dtype=torch.float32), - output_dim=0, - weight_loader=weight_loader) - else: - assert self.strategy == QuantizationStrategy.TENSOR - weight_scale = PerTensorScaleParameter(data=torch.empty( - len(output_partition_sizes), dtype=torch.float32), - weight_loader=weight_loader) - - # min requirement for fp8 kernels - weight_scale[:] = torch.finfo(torch.float32).min + weight_scale = create_fp8_scale_parameter( + strategy_to_parameter_type[self.strategy], output_partition_sizes, + input_size_per_partition, layer.weight_block_size, weight_loader) layer.register_parameter("weight_scale", weight_scale) # INPUT SCALE if self.is_static_input_scheme: - input_scale = PerTensorScaleParameter(data=torch.empty( - len(output_partition_sizes), dtype=torch.float32), - weight_loader=weight_loader) - input_scale[:] = torch.finfo(torch.float32).min + input_scale = create_fp8_input_scale(output_partition_sizes, + weight_loader) layer.register_parameter("input_scale", input_scale) + def process_weights_after_loading(self, layer) -> None: + if self.strategy == QuantizationStrategy.TENSOR: + weight, weight_scale, input_scale = ( + process_fp8_weight_tensor_strategy( + layer.weight, layer.weight_scale, layer.logical_widths, + getattr(layer, 'input_scale', None))) + weight = weight.t() + + elif self.strategy == QuantizationStrategy.CHANNEL: + weight, weight_scale, input_scale = ( + process_fp8_weight_channel_strategy( + layer.weight, layer.weight_scale, + getattr(layer, 'input_scale', None))) + weight = weight.t() + + elif self.strategy == QuantizationStrategy.BLOCK: + assert self.is_static_input_scheme is False + weight, weight_scale = process_fp8_weight_block_strategy( + layer.weight, layer.weight_scale) + input_scale = None + + else: + raise ValueError(f"Unknown quantization strategy {self.strategy}") + + # required by torch.compile to be torch.nn.Parameter + layer.weight = Parameter(weight.data, requires_grad=False) + layer.weight_scale = Parameter(weight_scale.data, requires_grad=False) + if input_scale is not None: + layer.input_scale = Parameter(input_scale.data, + requires_grad=False) + + # INPUT SCALE + if self.is_static_input_scheme and hasattr(layer, 'input_scale'): + layer.input_scale = Parameter(layer.input_scale.max(), + requires_grad=False) + else: + layer.input_scale = None + + if self.strategy == QuantizationStrategy.BLOCK: + maybe_post_process_fp8_weight_block( + layer, self.cutlass_block_fp8_supported) + def apply_weights(self, layer: torch.nn.Module, x: torch.Tensor, bias: Optional[torch.Tensor] = None) -> torch.Tensor: + if layer.weight_block_size is not None: + return apply_fp8_block_linear( + layer, + input=x, + bias=bias, + cutlass_block_fp8_supported=self.cutlass_block_fp8_supported, + use_aiter_and_is_supported=self.use_aiter_and_is_supported) + return self.fp8_linear.apply(input=x, weight=layer.weight, weight_scale=layer.weight_scale, diff --git a/vllm/model_executor/layers/quantization/fp8.py b/vllm/model_executor/layers/quantization/fp8.py index e75094c54743..aec9c79f1ea8 100644 --- a/vllm/model_executor/layers/quantization/fp8.py +++ b/vllm/model_executor/layers/quantization/fp8.py @@ -4,7 +4,6 @@ from typing import TYPE_CHECKING, Any, Callable, Optional, Union import torch -import torch.nn.functional as F from torch.nn import Module from torch.nn.parameter import Parameter @@ -32,8 +31,12 @@ register_moe_scaling_factors, rotate_flashinfer_fp8_moe_weights, select_cutlass_fp8_gemm_impl, swap_w13_to_w31) from vllm.model_executor.layers.quantization.utils.fp8_utils import ( - get_col_major_tma_aligned_tensor, requant_weight_ue8m0_inplace, - should_use_deepgemm_for_fp8_linear) + apply_fp8_block_linear, check_aiter_fp8_linear_support, + create_fp8_input_scale, create_fp8_scale_parameter, + create_fp8_weight_parameter, get_col_major_tma_aligned_tensor, + maybe_post_process_fp8_weight_block, process_fp8_weight_block_strategy, + process_fp8_weight_tensor_strategy, requant_weight_ue8m0_inplace, + validate_fp8_block_shape) from vllm.model_executor.layers.quantization.utils.marlin_utils_fp8 import ( apply_fp8_marlin_linear, prepare_fp8_layer_for_marlin, prepare_moe_fp8_layer_for_marlin) @@ -42,8 +45,7 @@ from vllm.model_executor.layers.quantization.utils.w8a8_utils import ( Fp8LinearOp, all_close_1d, cutlass_block_fp8_supported, cutlass_fp8_supported, maybe_create_device_identity, - normalize_e4m3fn_to_e4m3fnuz, per_tensor_dequantize, - requantize_with_max_scale) + normalize_e4m3fn_to_e4m3fnuz, per_tensor_dequantize) from vllm.model_executor.parameter import (BlockQuantScaleParameter, ModelWeightParameter, PerTensorScaleParameter) @@ -233,14 +235,10 @@ def __init__(self, quant_config: Fp8Config): if current_platform.is_rocm(): self.use_marlin = False - # AITER is only supported on ROCm and only for FP8_FNUZ - # and at the moment are MI300 series - self.use_aiter_and_is_supported = (current_platform.is_rocm() - and envs.VLLM_ROCM_USE_AITER - and envs.VLLM_ROCM_USE_AITER_LINEAR - and current_platform.is_fp8_fnuz()) + self.use_aiter_and_is_supported = check_aiter_fp8_linear_support() - self.block_quant = self.quant_config.weight_block_size is not None + self.weight_block_size = self.quant_config.weight_block_size + self.block_quant = self.weight_block_size is not None self.act_q_static = self.quant_config.activation_scheme == "static" # Use per-token quantization for better perf if dynamic and cutlass if not self.act_q_static and cutlass_fp8_supported(): @@ -273,51 +271,27 @@ def create_weights( layer.weight_block_size = None if self.block_quant: - tp_size = getattr(layer, "tp_size", - get_tensor_model_parallel_world_size()) - assert self.quant_config.weight_block_size is not None - layer.weight_block_size = self.quant_config.weight_block_size - block_n, block_k = ( - self.quant_config.weight_block_size[0], - self.quant_config.weight_block_size[1], - ) - # Required by row parallel - if (tp_size > 1 - and input_size // input_size_per_partition == tp_size - and input_size_per_partition % block_k != 0): - raise ValueError( - f"Weight input_size_per_partition = " - f"{input_size_per_partition} is not divisible by " - f"weight quantization block_k = {block_k}.") - # Required by column parallel or enabling merged weights - is_tp_split = (tp_size > 1 and - output_size // output_size_per_partition == tp_size) - is_merged_gemm = len(output_partition_sizes) > 1 - if is_tp_split or is_merged_gemm: - sizes_to_check = output_partition_sizes - if not is_tp_split and is_merged_gemm: - # In case of merged matrices, we allow the last - # matrix to not be a multiple of block size - sizes_to_check = output_partition_sizes[:-1] - for output_partition_size in sizes_to_check: - if output_partition_size % block_n != 0: - raise ValueError( - f"Weight output_partition_size = " - f"{output_partition_size} is not divisible by " - f"weight quantization block_n = {block_n}.") + assert self.weight_block_size is not None + layer.weight_block_size = self.weight_block_size + validate_fp8_block_shape(layer, input_size, output_size, + input_size_per_partition, + output_partition_sizes, + self.weight_block_size) # WEIGHT - weight_dtype = (torch.float8_e4m3fn - if self.quant_config.is_checkpoint_fp8_serialized else - params_dtype) - - weight = ModelWeightParameter(data=torch.empty( - output_size_per_partition, - input_size_per_partition, - dtype=weight_dtype), - input_dim=1, - output_dim=0, - weight_loader=weight_loader) + if self.quant_config.is_checkpoint_fp8_serialized: + weight = create_fp8_weight_parameter(output_size_per_partition, + input_size_per_partition, + weight_loader) + else: + # For non-serialized checkpoints, use original dtype + weight = ModelWeightParameter(data=torch.empty( + output_size_per_partition, + input_size_per_partition, + dtype=params_dtype), + input_dim=1, + output_dim=0, + weight_loader=weight_loader) layer.register_parameter("weight", weight) # If checkpoint is serialized fp8, load them. @@ -325,154 +299,87 @@ def create_weights( if self.quant_config.is_checkpoint_fp8_serialized: # WEIGHT SCALE if not self.block_quant: - scale = PerTensorScaleParameter( - data=torch.empty(len(output_partition_sizes), - dtype=torch.float32), - weight_loader=weight_loader, - ) - scale[:] = torch.finfo(torch.float32).min + scale = create_fp8_scale_parameter(PerTensorScaleParameter, + output_partition_sizes, + input_size_per_partition, + None, weight_loader) set_weight_attrs(scale, {"scale_type": "weight_scale"}) layer.register_parameter("weight_scale", scale) else: - assert self.quant_config.activation_scheme == "dynamic" - scale = BlockQuantScaleParameter( - data=torch.empty( - (output_size_per_partition + block_n - 1) // block_n, - (input_size_per_partition + block_k - 1) // block_k, - dtype=torch.float32, - ), - input_dim=1, - output_dim=0, - weight_loader=weight_loader, - ) - scale[:] = torch.finfo(torch.float32).min + assert not self.act_q_static + assert self.weight_block_size is not None + scale = create_fp8_scale_parameter(BlockQuantScaleParameter, + output_partition_sizes, + input_size_per_partition, + self.weight_block_size, + weight_loader) set_weight_attrs(scale, {"scale_type": "weight_scale"}) # The weight_scale_inv name is intentional for deepseekv3 layer.register_parameter("weight_scale_inv", scale) # INPUT ACTIVATION SCALE - if self.quant_config.activation_scheme == "static": - scale = PerTensorScaleParameter(data=torch.empty( - len(output_partition_sizes), dtype=torch.float32), - weight_loader=weight_loader) - - scale[:] = torch.finfo(torch.float32).min + if self.act_q_static: + scale = create_fp8_input_scale(output_partition_sizes, + weight_loader) set_weight_attrs(scale, {"scale_type": "input_scale"}) layer.register_parameter("input_scale", scale) else: layer.register_parameter("input_scale", None) - def _maybe_pad_weight(self, weight: torch.Tensor) -> torch.Tensor: - # Pad the weight tensor. This is an optimization on ROCm platform, which - # can benefit from tensors located far enough from one another in memory - if (envs.VLLM_ROCM_FP8_PADDING and current_platform.is_rocm() - and weight.stride(-1) == 1 - and (weight.stride(-2) * weight.element_size()) % 512 == 0): - num_pad = 256 // weight.element_size() - weight = F.pad(weight, (0, num_pad), "constant", 0)[..., :-num_pad] - torch.cuda.empty_cache() - return weight - def process_weights_after_loading(self, layer: Module) -> None: size_k_first = True + input_scale = None # TODO(rob): refactor block quant into separate class. if self.block_quant: - assert self.quant_config.activation_scheme == "dynamic" + assert not self.act_q_static size_k_first = False - if current_platform.is_fp8_fnuz(): - weight, weight_scale_inv, _ = \ - normalize_e4m3fn_to_e4m3fnuz( - weight=layer.weight, - weight_scale=layer.weight_scale_inv) - else: - weight = layer.weight.data - weight_scale_inv = layer.weight_scale_inv.data - weight = self._maybe_pad_weight(weight) - - # Torch.compile cannot use Parameter subclasses. - layer.weight = Parameter(weight, requires_grad=False) - layer.weight_scale_inv = Parameter(weight_scale_inv, - requires_grad=False) + weight, weight_scale = process_fp8_weight_block_strategy( + layer.weight, layer.weight_scale_inv) + # Delete the weight_scale_inv parameter to avoid confusion + # with the weight_scale parameter + del layer.weight_scale_inv # If checkpoint not serialized fp8, quantize the weights. elif not self.quant_config.is_checkpoint_fp8_serialized: qweight, weight_scale = ops.scaled_fp8_quant(layer.weight, scale=None) + weight = qweight.t() - # Update the layer with the new values. - layer.weight = Parameter(qweight.t(), requires_grad=False) - layer.weight_scale = Parameter(weight_scale, requires_grad=False) - # layer.input_scale is None indicates dynamic quant and scale is - # computed from input. - layer.input_scale = None - - # If checkpoint is fp8, handle that there are N scales for N + # If checkpoint is fp8 per-tensor, handle that there are N scales for N # shards in a fused module else: - layer.weight_scale = torch.nn.Parameter(layer.weight_scale.data, - requires_grad=False) - if self.quant_config.activation_scheme == "static": - layer.input_scale = torch.nn.Parameter(layer.input_scale.data, - requires_grad=False) - weight = layer.weight weight_scale = layer.weight_scale # If using w8a8, torch._scaled_mm needs per tensor, so # requantize the logical shards as a single weight. if not self.use_marlin: - # Dequant -> Quant with max scale so we can run per tensor. - if current_platform.is_fp8_fnuz(): - weight, weight_scale, input_scale = \ - normalize_e4m3fn_to_e4m3fnuz( - weight=weight, - weight_scale=weight_scale, - input_scale=layer.input_scale) - if input_scale is not None: - layer.input_scale = Parameter(input_scale, - requires_grad=False) - - weight_scale, weight = requantize_with_max_scale( - weight=weight, - weight_scale=weight_scale, - logical_widths=layer.logical_widths, - ) - - weight = self._maybe_pad_weight(weight) - # Update layer with new values. - layer.weight = Parameter(weight.t(), requires_grad=False) - layer.weight_scale = Parameter(weight_scale, requires_grad=False) - if self.quant_config.activation_scheme == "static": - layer.input_scale = Parameter(layer.input_scale.max(), - requires_grad=False) + weight, weight_scale, input_scale = ( + process_fp8_weight_tensor_strategy( + weight, weight_scale, layer.logical_widths, + getattr(layer, 'input_scale', None))) + if self.act_q_static: + assert input_scale is not None + input_scale = input_scale.max() + weight = weight.t() + + # Update layer with new values. + layer.weight = Parameter(weight.data, requires_grad=False) + layer.weight_scale = Parameter(weight_scale.data, requires_grad=False) + layer.input_scale = Parameter( + input_scale, + requires_grad=False) if input_scale is not None else None if self.use_marlin: prepare_fp8_layer_for_marlin(layer, size_k_first) # Activations not quantized for marlin. del layer.input_scale + return - # On Blackwell or Hopper, if E8M0 for DeepGemm is used, we need to - # requantize the weight and input to the specific scale - # at the same time. - if is_deep_gemm_e8m0_used() and self.block_quant: - assert layer.weight_block_size is not None - block_sz = tuple(layer.weight_block_size) - requant_weight_ue8m0_inplace( - layer.weight.data, - layer.weight_scale_inv.data if hasattr( - layer, "weight_scale_inv") else layer.weight_scale.data, - block_sz, - ) - - # SM90 Block FP8 CUTLASS requires row-major weight scales - if (self.block_quant and current_platform.is_device_capability(90) - and self.cutlass_block_fp8_supported - and not should_use_deepgemm_for_fp8_linear( - torch.bfloat16, layer.weight)): - layer.weight_scale_inv = Parameter( - layer.weight_scale_inv.data.T.contiguous(), - requires_grad=False) + if self.block_quant: + maybe_post_process_fp8_weight_block( + layer, self.cutlass_block_fp8_supported) def apply(self, layer: torch.nn.Module, @@ -490,18 +397,12 @@ def apply(self, bias=bias) if self.block_quant: - assert self.quant_config.weight_block_size is not None - - return torch.ops.vllm.apply_w8a8_block_fp8_linear( + return apply_fp8_block_linear( + layer, input=x, - weight=layer.weight, - block_size=self.quant_config.weight_block_size, - weight_scale=layer.weight_scale_inv, - input_scale=layer.input_scale, bias=bias, cutlass_block_fp8_supported=self.cutlass_block_fp8_supported, - use_aiter_and_is_supported=self.use_aiter_and_is_supported, - ) + use_aiter_and_is_supported=self.use_aiter_and_is_supported) return self.fp8_linear.apply(input=x, weight=layer.weight, @@ -528,7 +429,8 @@ def __init__(self, quant_config: Fp8Config, layer: torch.nn.Module): super().__init__(layer.moe_config) self.layer = layer self.quant_config = quant_config - self.block_quant = self.quant_config.weight_block_size is not None + self.weight_block_size = self.quant_config.weight_block_size + self.block_quant = self.weight_block_size is not None self.flashinfer_moe_backend: Optional[FlashinferMoeBackend] = None self.fused_experts: Optional[ @@ -590,12 +492,12 @@ def create_weights(self, layer: Module, num_experts: int, hidden_size: int, if self.quant_config.is_checkpoint_fp8_serialized: params_dtype = torch.float8_e4m3fn if self.block_quant: - assert self.quant_config.weight_block_size is not None - layer.weight_block_size = self.quant_config.weight_block_size + assert self.weight_block_size is not None + layer.weight_block_size = self.weight_block_size tp_size = get_tensor_model_parallel_world_size() block_n, block_k = ( - self.quant_config.weight_block_size[0], - self.quant_config.weight_block_size[1], + self.weight_block_size[0], + self.weight_block_size[1], ) # NOTE: To ensure proper alignment of the block-wise quantization # scales, the output_size of the weights for both the gate and up @@ -952,7 +854,7 @@ def select_gemm_impl( "BatchedTritonOrDeepGemmExperts(%s): " "max_tokens_per_rank=%s, block_size=%s, per_act_token=%s", self.__class__.__name__, max_num_tokens_per_rank, - self.quant_config.weight_block_size, False) + self.weight_block_size, False) return BatchedTritonOrDeepGemmExperts( max_num_tokens=max_num_tokens_per_rank, num_dispatchers=prepare_finalize.num_dispatchers(), @@ -969,8 +871,7 @@ def select_gemm_impl( else: logger.debug( "TritonOrDeepGemmExperts(%s): block_size=%s, per_act_token=%s", - self.__class__.__name__, self.quant_config.weight_block_size, - False) + self.__class__.__name__, self.weight_block_size, False) return TritonOrDeepGemmExperts( quant_config=self.moe_quant_config, allow_deep_gemm=self.allow_deep_gemm, @@ -988,7 +889,7 @@ def get_fused_moe_quant_config( if self.block_quant else layer.w2_weight_scale), a1_scale=layer.w13_input_scale, a2_scale=layer.w2_input_scale, - block_shape=self.quant_config.weight_block_size, + block_shape=self.weight_block_size, ) def apply( @@ -1046,7 +947,7 @@ def apply( intermediate_size=layer.intermediate_size_per_partition, expert_offset=layer.ep_rank * layer.local_num_experts, local_num_experts=layer.local_num_experts, - block_shape=self.quant_config.weight_block_size, + block_shape=self.weight_block_size, routed_scaling=routed_scaling_factor, ) else: diff --git a/vllm/model_executor/layers/quantization/utils/fp8_utils.py b/vllm/model_executor/layers/quantization/utils/fp8_utils.py index bbe0c6f6d38e..fc12483de0c0 100644 --- a/vllm/model_executor/layers/quantization/utils/fp8_utils.py +++ b/vllm/model_executor/layers/quantization/utils/fp8_utils.py @@ -17,6 +17,9 @@ group_broadcast) from vllm.model_executor.layers.quantization.utils.w8a8_utils import ( CUTLASS_BLOCK_FP8_SUPPORTED) +from vllm.model_executor.parameter import (BlockQuantScaleParameter, + ChannelQuantScaleParameter, + PerTensorScaleParameter) from vllm.platforms import current_platform from vllm.triton_utils import tl, triton from vllm.utils import cdiv, direct_register_custom_op @@ -794,3 +797,220 @@ def requant_weight_ue8m0_inplace( # Write back the results in-place. w_q.copy_(w_requant) s_old.copy_(s_requant) + + +def check_aiter_fp8_linear_support() -> bool: + """AITER is only supported on ROCm and only for FP8_FNUZ + and at the moment are MI300 series""" + return (current_platform.is_rocm() and envs.VLLM_ROCM_USE_AITER + and envs.VLLM_ROCM_USE_AITER_LINEAR + and current_platform.is_fp8_fnuz()) + + +def _maybe_pad_fp8_weight(weight: torch.Tensor) -> torch.Tensor: + """Pad the weight tensor. This is an optimization on ROCm platform, which + can benefit from tensors located far enough from one another in memory""" + if (envs.VLLM_ROCM_FP8_PADDING and current_platform.is_rocm() + and weight.stride(-1) == 1 + and (weight.stride(-2) * weight.element_size()) % 512 == 0): + num_pad = 256 // weight.element_size() + import torch.nn.functional as F + weight = F.pad(weight, (0, num_pad), "constant", 0)[..., :-num_pad] + torch.cuda.empty_cache() + return weight + + +def validate_fp8_block_shape(layer: torch.nn.Module, input_size: int, + output_size: int, input_size_per_partition: int, + output_partition_sizes: list[int], + block_size: list[int]) -> None: + """Validate block quantization shapes for tensor parallelism.""" + from vllm.distributed import get_tensor_model_parallel_world_size + + tp_size = getattr(layer, "tp_size", get_tensor_model_parallel_world_size()) + block_n, block_k = block_size[0], block_size[1] + + # Required by row parallel + if (tp_size > 1 and input_size // input_size_per_partition == tp_size + and input_size_per_partition % block_k != 0): + raise ValueError( + f"Weight input_size_per_partition = {input_size_per_partition} " + f"is not divisible by weight quantization block_k = {block_k}.") + + # Required by column parallel or enabling merged weights + is_tp_split = (tp_size > 1 + and output_size // sum(output_partition_sizes) == tp_size) + is_merged_gemm = len(output_partition_sizes) > 1 + if is_tp_split or is_merged_gemm: + sizes_to_check = output_partition_sizes + if not is_tp_split and is_merged_gemm: + # In case of merged matrices, we allow the last + # matrix to not be a multiple of block size + sizes_to_check = output_partition_sizes[:-1] + for output_partition_size in sizes_to_check: + if output_partition_size % block_n != 0: + raise ValueError( + f"Weight output_partition_size = " + f"{output_partition_size} is not divisible by " + f"weight quantization block_n = {block_n}.") + + +def create_fp8_weight_parameter( + output_size_per_partition: int, input_size_per_partition: int, + weight_loader: Optional[Callable]) -> torch.nn.Parameter: + """Create FP8 weight parameter.""" + from vllm.model_executor.parameter import ModelWeightParameter + + return ModelWeightParameter(data=torch.empty(output_size_per_partition, + input_size_per_partition, + dtype=torch.float8_e4m3fn), + input_dim=1, + output_dim=0, + weight_loader=weight_loader) + + +def create_fp8_scale_parameter( + parameter_type: torch.nn.Parameter, output_partition_sizes: list[int], + input_size_per_partition: int, block_size: Optional[list[int]], + weight_loader: Optional[Callable]) -> torch.nn.Parameter: + """Create scale parameter based on quantization strategy.""" + if parameter_type == ChannelQuantScaleParameter: + scale = parameter_type(data=torch.empty( + (sum(output_partition_sizes), 1), dtype=torch.float32), + output_dim=0, + weight_loader=weight_loader) + elif parameter_type == BlockQuantScaleParameter: + assert block_size is not None + block_n, block_k = block_size[0], block_size[1] + output_size_per_partition = sum(output_partition_sizes) + scale = parameter_type( + data=torch.empty( + (output_size_per_partition + block_n - 1) // block_n, + (input_size_per_partition + block_k - 1) // block_k, + dtype=torch.float32, + ), + input_dim=1, + output_dim=0, + weight_loader=weight_loader, + ) + elif parameter_type == PerTensorScaleParameter: + scale = parameter_type(data=torch.empty(len(output_partition_sizes), + dtype=torch.float32), + weight_loader=weight_loader) + else: + raise ValueError(f"Unknown parameter type: {parameter_type}") + + scale[:] = torch.finfo(torch.float32).min + return scale + + +def create_fp8_input_scale( + output_partition_sizes: list[int], + weight_loader: Optional[Callable]) -> torch.nn.Parameter: + """Create input scale parameter for static activation quantization.""" + from vllm.model_executor.parameter import PerTensorScaleParameter + + scale = PerTensorScaleParameter(data=torch.empty( + len(output_partition_sizes), dtype=torch.float32), + weight_loader=weight_loader) + scale[:] = torch.finfo(torch.float32).min + return scale + + +def process_fp8_weight_tensor_strategy( + weight: torch.Tensor, + weight_scale: torch.Tensor, + logical_widths: list[int], + input_scale: Optional[torch.Tensor] = None +) -> tuple[torch.Tensor, torch.Tensor, Optional[torch.Tensor]]: + """Process weights for tensor-wise quantization strategy.""" + from vllm.model_executor.layers.quantization.utils.w8a8_utils import ( + normalize_e4m3fn_to_e4m3fnuz, requantize_with_max_scale) + + if current_platform.is_fp8_fnuz(): + weight, weight_scale, input_scale = normalize_e4m3fn_to_e4m3fnuz( + weight=weight, weight_scale=weight_scale, input_scale=input_scale) + + # Requantize with max scale + weight_scale, weight = requantize_with_max_scale( + weight=weight, + weight_scale=weight_scale, + logical_widths=logical_widths, + ) + + weight = _maybe_pad_fp8_weight(weight) + return weight, weight_scale, input_scale + + +def process_fp8_weight_channel_strategy( + weight: torch.Tensor, + weight_scale: torch.Tensor, + input_scale: Optional[torch.Tensor] = None +) -> tuple[torch.Tensor, torch.Tensor, Optional[torch.Tensor]]: + """Process weights for channel-wise quantization strategy.""" + from vllm.model_executor.layers.quantization.utils.w8a8_utils import ( + normalize_e4m3fn_to_e4m3fnuz) + + if current_platform.is_fp8_fnuz(): + weight, weight_scale, input_scale = normalize_e4m3fn_to_e4m3fnuz( + weight=weight, weight_scale=weight_scale, input_scale=input_scale) + + return weight, weight_scale, input_scale + + +def process_fp8_weight_block_strategy( + weight: torch.Tensor, + weight_scale: torch.Tensor, +) -> tuple[torch.Tensor, torch.Tensor]: + """Process weights for block-wise quantization strategy.""" + from vllm.model_executor.layers.quantization.utils.w8a8_utils import ( + normalize_e4m3fn_to_e4m3fnuz) + + if current_platform.is_fp8_fnuz(): + weight, weight_scale, _ = normalize_e4m3fn_to_e4m3fnuz( + weight=weight, weight_scale=weight_scale) + + weight = _maybe_pad_fp8_weight(weight) + return weight, weight_scale + + +def maybe_post_process_fp8_weight_block(layer: torch.nn.Module, + cutlass_block_fp8_supported: bool): + assert layer.weight_block_size is not None + + from vllm.utils.deep_gemm import (is_deep_gemm_e8m0_used, + should_use_deepgemm_for_fp8_linear) + + # On Blackwell or Hopper, if E8M0 for DeepGemm is used, we need to + # requantize the weight and input to the specific scale + # at the same time. + if is_deep_gemm_e8m0_used(): + block_sz = tuple(layer.weight_block_size) + requant_weight_ue8m0_inplace(layer.weight.data, + layer.weight_scale.data, block_sz) + # SM90 Block FP8 CUTLASS requires row-major weight scales + elif (current_platform.is_device_capability(90) + and cutlass_block_fp8_supported + and not should_use_deepgemm_for_fp8_linear(torch.bfloat16, + layer.weight)): + layer.weight_scale = torch.nn.Parameter( + layer.weight_scale.data.T.contiguous(), requires_grad=False) + + +def apply_fp8_block_linear(layer: torch.nn.Module, input: torch.Tensor, + bias: Optional[torch.Tensor], + cutlass_block_fp8_supported: bool, + use_aiter_and_is_supported: bool) -> torch.Tensor: + """Apply block-wise FP8 linear operation.""" + assert layer.weight_block_size is not None + + return torch.ops.vllm.apply_w8a8_block_fp8_linear( + input=input, + weight=layer.weight, + block_size=layer.weight_block_size, + weight_scale=layer.weight_scale, + input_scale=layer.input_scale, + bias=bias, + cutlass_block_fp8_supported=cutlass_block_fp8_supported, + use_aiter_and_is_supported=use_aiter_and_is_supported, + ) From bc19d7598566ae81b3f69b43cbc2bd34aa5497c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicol=C3=B2=20Lucchesi?= Date: Thu, 18 Sep 2025 15:56:07 +0200 Subject: [PATCH 110/518] [Misc] Add kv-connector label (#25156) Signed-off-by: NickLucche --- .github/mergify.yml | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/.github/mergify.yml b/.github/mergify.yml index 94198b1251e0..75ee3e3c55b4 100644 --- a/.github/mergify.yml +++ b/.github/mergify.yml @@ -302,3 +302,20 @@ pull_request_rules: label: remove: - needs-rebase + +- name: label-kv-connector + description: Automatically apply kv-connector label + conditions: + - or: + - files~=^examples/online_serving/disaggregated[^/]*/.* + - files~=^examples/offline_inference/disaggregated[^/]*/.* + - files~=^examples/others/lmcache/ + - files~=^tests/v1/kv_connector/ + - files~=^vllm/distributed/kv_transfer/ + - title~=(?i)\bP/?D\b + - title~=(?i)NIXL + - title~=(?i)LMCache + actions: + label: + add: + - kv-connector \ No newline at end of file From 01a583fea40571986ffe277549e5bb441d409768 Mon Sep 17 00:00:00 2001 From: jvlunteren <161835099+jvlunteren@users.noreply.github.com> Date: Thu, 18 Sep 2025 16:27:01 +0200 Subject: [PATCH 111/518] [Kernel] Decouple Tile Size from Block Size in Triton Unified Attention Kernel (#21197) Signed-off-by: Jan van Lunteren --- .../test_triton_unified_attention.py | 3 - .../attention/ops/triton_unified_attention.py | 122 ++++++++++-------- 2 files changed, 70 insertions(+), 55 deletions(-) diff --git a/tests/kernels/attention/test_triton_unified_attention.py b/tests/kernels/attention/test_triton_unified_attention.py index 4b97d51e6ed2..ab91560e995c 100644 --- a/tests/kernels/attention/test_triton_unified_attention.py +++ b/tests/kernels/attention/test_triton_unified_attention.py @@ -102,9 +102,6 @@ def test_triton_unified_attn( ) -> None: torch.set_default_device("cuda") - if q_dtype is not None and q_dtype.itemsize < 2 and block_size < 32: - pytest.skip("block size must be at least 32 for fp8") - current_platform.seed_everything(0) num_seqs = len(seq_lens) query_lens = [x[0] for x in seq_lens] diff --git a/vllm/attention/ops/triton_unified_attention.py b/vllm/attention/ops/triton_unified_attention.py index d2ad2f7e8d2a..591b68bfa646 100644 --- a/vllm/attention/ops/triton_unified_attention.py +++ b/vllm/attention/ops/triton_unified_attention.py @@ -73,6 +73,7 @@ def kernel_unified_attention_2d( output_stride_1: tl.int64, # int, should be equal to head_size qq_bias_stride_0: tl.int64, # int BLOCK_SIZE: tl.constexpr, # int + TILE_SIZE: tl.constexpr, # int must be power of 2 HEAD_SIZE: tl.constexpr, # int HEAD_SIZE_PADDED: tl.constexpr, # int, must be power of 2 USE_ALIBI_SLOPES: tl.constexpr, # bool @@ -118,6 +119,7 @@ def kernel_unified_attention_2d( offs_m = tl.arange(0, BLOCK_M) offs_d = tl.arange(0, HEAD_SIZE_PADDED) + offs_t = tl.arange(0, TILE_SIZE) query_pos = q_block_local_idx * BLOCK_Q + offs_m // num_queries_per_kv query_offset_0 = cur_batch_in_all_start_index + query_pos @@ -177,31 +179,32 @@ def kernel_unified_attention_2d( # actual sequence length max_seq_prefix_len = tl.minimum(max_seq_prefix_len, seq_len) - # calculate the number of tiles (blocks) that need to be processed to - # cover the longest sequence prefix (due to causal masking, blocks beyond + # calculate the number of tiles that need to be processed to + # cover the longest sequence prefix (due to causal masking, tiles beyond # this prefix can be skipped) - num_blocks = cdiv_fn(max_seq_prefix_len, BLOCK_SIZE) + num_tiles = cdiv_fn(max_seq_prefix_len, TILE_SIZE) # iterate through tiles - for j in range(0, num_blocks): + for j in range(0, num_tiles): + seq_offset = j * TILE_SIZE + offs_t + tile_mask = seq_offset < max_seq_prefix_len - physical_block_idx = tl.load(block_tables_ptr + block_table_offset + j) + physical_block_idx = tl.load(block_tables_ptr + block_table_offset + + seq_offset // BLOCK_SIZE).to(tl.int64) - offs_n = tl.arange(0, BLOCK_SIZE) - - v_offset = (physical_block_idx * stride_v_cache_0 + + v_offset = (physical_block_idx[:, None] * stride_v_cache_0 + kv_head_idx * stride_v_cache_2 + offs_d[None, :] * stride_v_cache_3 + - offs_n[:, None] * stride_v_cache_1) + (seq_offset % BLOCK_SIZE)[:, None] * stride_v_cache_1) - k_offset = (physical_block_idx * stride_k_cache_0 + + k_offset = (physical_block_idx[None, :] * stride_k_cache_0 + kv_head_idx * stride_k_cache_2 + offs_d[:, None] * stride_k_cache_3 + - offs_n[None, :] * stride_k_cache_1) + (seq_offset % BLOCK_SIZE)[None, :] * stride_k_cache_1) - # K : (HEAD_SIZE, BLOCK_SIZE) + # K : (HEAD_SIZE, TILE_SIZE) K_load = tl.load(key_cache_ptr + k_offset, - mask=dim_mask[:, None], + mask=dim_mask[:, None] & tile_mask[None, :], other=0.0) if K_load.dtype.is_fp8(): @@ -212,9 +215,9 @@ def kernel_unified_attention_2d( else: K = K_load - # V : (BLOCK_SIZE, HEAD_SIZE) + # V : (TILE_SIZE, HEAD_SIZE) V_load = tl.load(value_cache_ptr + v_offset, - mask=dim_mask[None, :], + mask=dim_mask[None, :] & tile_mask[:, None], other=0.0) if V_load.dtype.is_fp8(): @@ -225,12 +228,10 @@ def kernel_unified_attention_2d( else: V = V_load - seq_offset = j * BLOCK_SIZE + offs_n - seq_mask = seq_offset[None, :] < context_len + query_pos[:, None] + 1 - # S : (BLOCK_M, BLOCK_SIZE) - S = tl.zeros(shape=(BLOCK_M, BLOCK_SIZE), dtype=tl.float32) + # S : (BLOCK_M, TILE_SIZE) + S = tl.zeros(shape=(BLOCK_M, TILE_SIZE), dtype=tl.float32) S += scale * tl.dot(Q, K) @@ -262,11 +263,12 @@ def kernel_unified_attention_2d( # compute running maximum # m_j : (BLOCK_M,) m_j = tl.maximum(M, tl.max(S, axis=1)) + # For sliding window there's a chance the max is -inf due to masking of # the entire row. In this case we need to set m_j 0 to avoid NaN m_j = tl.where(m_j > float("-inf"), m_j, 0.0) - # P : (BLOCK_M, BLOCK_SIZE) + # P : (BLOCK_M, TILE_SIZE) P = tl.exp(S - m_j[:, None]) # l_j : (BLOCK_M,) @@ -327,6 +329,7 @@ def kernel_unified_attention_3d( query_stride_1: tl.int64, # int, should be equal to head_size qq_bias_stride_0: tl.int64, # int BLOCK_SIZE: tl.constexpr, # int + TILE_SIZE: tl.constexpr, # int, must be power of 2 HEAD_SIZE: tl.constexpr, # int HEAD_SIZE_PADDED: tl.constexpr, # int, must be power of 2 USE_ALIBI_SLOPES: tl.constexpr, # bool @@ -374,20 +377,19 @@ def kernel_unified_attention_3d( # number of segments for this particular sequence num_segments = NUM_SEGMENTS_PER_SEQ - blocks_per_segment = cdiv_fn(seq_len, num_segments * BLOCK_SIZE) + tiles_per_segment = cdiv_fn(seq_len, num_segments * TILE_SIZE) - if segm_idx * blocks_per_segment * BLOCK_SIZE >= seq_len: + if segm_idx * tiles_per_segment * TILE_SIZE >= seq_len: return offs_m = tl.arange(0, BLOCK_M) offs_d = tl.arange(0, HEAD_SIZE_PADDED) - + offs_t = tl.arange(0, TILE_SIZE) query_pos = q_block_local_idx * BLOCK_Q + offs_m // num_queries_per_kv query_offset_0 = cur_batch_in_all_start_index + query_pos query_offset_1 = kv_head_idx * num_queries_per_kv + \ offs_m % num_queries_per_kv - query_offset = (query_offset_0[:, None] * query_stride_0 + query_offset_1[:, None] * query_stride_1 + offs_d[None, :]) @@ -433,30 +435,44 @@ def kernel_unified_attention_3d( qq_bias_row_ptrs = (qq_bias_ptr + query_pos[:, None] * qq_bias_stride_0 ) # shape: [BLOCK_M] - num_blocks = cdiv_fn(seq_len, BLOCK_SIZE) + # compute the length of the longest sequence prefix spanned by any + # query token in the current q_block (q_block_local_idx) + max_seq_prefix_len = context_len + q_block_local_idx * BLOCK_Q + ( + BLOCK_M - 1) // num_queries_per_kv + 1 + + # adjust for potential padding in the last q_block by considering the + # actual sequence length + max_seq_prefix_len = tl.minimum(max_seq_prefix_len, seq_len) + + # calculate the number of tiles that need to be processed to + # cover the longest sequence prefix (due to causal masking, tiles beyond + # this prefix can be skipped) + num_tiles = cdiv_fn(max_seq_prefix_len, TILE_SIZE) # iterate through tiles within current segment for j in range( - segm_idx * blocks_per_segment, - min((segm_idx + 1) * blocks_per_segment, num_blocks), + segm_idx * tiles_per_segment, + min((segm_idx + 1) * tiles_per_segment, num_tiles), ): - physical_block_idx = tl.load(block_tables_ptr + block_table_offset + j) + seq_offset = j * TILE_SIZE + offs_t + tile_mask = seq_offset < max_seq_prefix_len - offs_n = tl.arange(0, BLOCK_SIZE) + physical_block_idx = tl.load(block_tables_ptr + block_table_offset + + seq_offset // BLOCK_SIZE).to(tl.int64) - v_offset = (physical_block_idx * stride_v_cache_0 + + v_offset = (physical_block_idx[:, None] * stride_v_cache_0 + kv_head_idx * stride_v_cache_2 + offs_d[None, :] * stride_v_cache_3 + - offs_n[:, None] * stride_v_cache_1) + (seq_offset % BLOCK_SIZE)[:, None] * stride_v_cache_1) - k_offset = (physical_block_idx * stride_k_cache_0 + + k_offset = (physical_block_idx[None, :] * stride_k_cache_0 + kv_head_idx * stride_k_cache_2 + offs_d[:, None] * stride_k_cache_3 + - offs_n[None, :] * stride_k_cache_1) + (seq_offset % BLOCK_SIZE)[None, :] * stride_k_cache_1) - # K : (HEAD_SIZE, BLOCK_SIZE) + # K : (HEAD_SIZE, TILE_SIZE) K_load = tl.load(key_cache_ptr + k_offset, - mask=dim_mask[:, None], + mask=dim_mask[:, None] & tile_mask[None, :], other=0.0) if K_load.dtype.is_fp8(): @@ -467,9 +483,9 @@ def kernel_unified_attention_3d( else: K = K_load - # V : (BLOCK_SIZE, HEAD_SIZE) + # V : (TILE_SIZE, HEAD_SIZE) V_load = tl.load(value_cache_ptr + v_offset, - mask=dim_mask[None, :], + mask=dim_mask[None, :] & tile_mask[:, None], other=0.0) if V_load.dtype.is_fp8(): @@ -480,13 +496,10 @@ def kernel_unified_attention_3d( else: V = V_load - seq_offset = j * BLOCK_SIZE + offs_n - seq_mask = seq_offset[None, :] < context_len + query_pos[:, None] + 1 - # S : (BLOCK_M, BLOCK_SIZE) - S = tl.zeros(shape=(BLOCK_M, BLOCK_SIZE), dtype=tl.float32) - + # S : (BLOCK_M, TILE_SIZE) + S = tl.zeros(shape=(BLOCK_M, TILE_SIZE), dtype=tl.float32) S += scale * tl.dot(Q, K) if USE_SOFTCAP: @@ -517,11 +530,12 @@ def kernel_unified_attention_3d( # compute running maximum # m_j : (BLOCK_M,) m_j = tl.maximum(M, tl.max(S, axis=1)) + # For sliding window there's a chance the max is -inf due to masking of # the entire row. In this case we need to set m_j 0 to avoid NaN m_j = tl.where(m_j > float("-inf"), m_j, 0.0) - # P : (BLOCK_M, BLOCK_SIZE,) + # P : (BLOCK_M, TILE_SIZE,) P = tl.exp(S - m_j[:, None]) # l_j : (BLOCK_M,) @@ -573,7 +587,7 @@ def reduce_segments( output_stride_0: tl.int64, # int output_stride_1: tl.int64, # int, should be equal to head_size block_table_stride: tl.int64, # int - BLOCK_SIZE: tl.constexpr, # int + TILE_SIZE: tl.constexpr, # int HEAD_SIZE: tl.constexpr, # int, must be power of 2 HEAD_SIZE_PADDED: tl.constexpr, # int, must be power of 2 query_start_len_ptr, # [num_seqs+1] @@ -594,10 +608,10 @@ def reduce_segments( # number of segments for this particular sequence num_segments = NUM_SEGMENTS_PER_SEQ - blocks_per_segment = cdiv_fn(seq_len, num_segments * BLOCK_SIZE) + tiles_per_segment = cdiv_fn(seq_len, num_segments * TILE_SIZE) # create masks for subsequent loads - act_num_segments = cdiv_fn(seq_len, blocks_per_segment * BLOCK_SIZE) + act_num_segments = cdiv_fn(seq_len, tiles_per_segment * TILE_SIZE) segm_mask = tl.arange(0, NUM_SEGMENTS_PER_SEQ) < tl.full( [NUM_SEGMENTS_PER_SEQ], act_num_segments, dtype=tl.int32) dim_mask = tl.where(tl.arange(0, HEAD_SIZE_PADDED) < HEAD_SIZE, 1, @@ -671,13 +685,10 @@ def unified_attention( # Optional tensor for sinks sinks=None, ): + assert causal, "Only causal attention is supported" assert q_descale is None, "Q scales not supported" - block_size = v.shape[1] - assert q.element_size() >= 2 or block_size >= 32, \ - "Block size must be at least 32 for fp8" - if sinks is not None: assert sinks.shape[0] == q.shape[1], \ "Sinks must be num_query_heads size" @@ -707,6 +718,12 @@ def unified_attention( # = floor(q.shape[0] / BLOCK_Q) + num_seqs total_num_q_blocks = q.shape[0] // BLOCK_Q + num_seqs + # Assigning default tile sizes for prefill and decode. + # Note: each tile size must be at least 32 for "fp8" (q.element_size() == 1) + # and at least 16 for all other data types. + TILE_SIZE_PREFILL = 32 + TILE_SIZE_DECODE = 16 if q.element_size() >= 2 else 32 + # if batch contains a prefill if max_seqlen_q > 1 or total_num_q_blocks * num_kv_heads > 128: kernel_unified_attention_2d[( @@ -736,6 +753,7 @@ def unified_attention( output_stride_1=out.stride(1), qq_bias_stride_0=qq_bias.stride(0) if use_qq_bias else 0, BLOCK_SIZE=block_size, + TILE_SIZE=TILE_SIZE_PREFILL, HEAD_SIZE=head_size, HEAD_SIZE_PADDED=triton.next_power_of_2(head_size), USE_ALIBI_SLOPES=use_alibi_slopes, @@ -809,6 +827,7 @@ def unified_attention( query_stride_1=q.stride(1), qq_bias_stride_0=qq_bias.stride(0) if use_qq_bias else 0, BLOCK_SIZE=block_size, + TILE_SIZE=TILE_SIZE_DECODE, HEAD_SIZE=head_size, HEAD_SIZE_PADDED=triton.next_power_of_2(head_size), USE_ALIBI_SLOPES=use_alibi_slopes, @@ -830,7 +849,6 @@ def unified_attention( BLOCK_M=BLOCK_M, NUM_SEGMENTS_PER_SEQ=NUM_SEGMENTS, ) - reduce_segments[(q.shape[0], num_query_heads)]( output_ptr=out, segm_output_ptr=segm_output, @@ -844,7 +862,7 @@ def unified_attention( output_stride_0=out.stride(0), output_stride_1=out.stride(1), block_table_stride=block_table.stride(0), - BLOCK_SIZE=block_size, + TILE_SIZE=TILE_SIZE_DECODE, HEAD_SIZE=head_size, HEAD_SIZE_PADDED=triton.next_power_of_2(head_size), query_start_len_ptr=cu_seqlens_q, From 072d7e53e534d337b41262dd44ded9b44aa699ef Mon Sep 17 00:00:00 2001 From: Vadim Gimpelson <156319763+vadiklyutiy@users.noreply.github.com> Date: Thu, 18 Sep 2025 18:27:49 +0400 Subject: [PATCH 112/518] [PERF] Add `conv1d` metadata to GDN attn (#25105) Signed-off-by: Vadim Gimpelson --- vllm/model_executor/layers/mamba/mamba2_metadata.py | 8 +++++--- vllm/model_executor/models/qwen3_next.py | 10 +++++++++- vllm/v1/attention/backends/gdn_attn.py | 6 ++++++ vllm/v1/attention/backends/mamba2_attn.py | 4 ++-- vllm/v1/attention/backends/short_conv_attn.py | 4 ++-- 5 files changed, 24 insertions(+), 8 deletions(-) diff --git a/vllm/model_executor/layers/mamba/mamba2_metadata.py b/vllm/model_executor/layers/mamba/mamba2_metadata.py index 368bfe3af1d3..c926e17a2c19 100644 --- a/vllm/model_executor/layers/mamba/mamba2_metadata.py +++ b/vllm/model_executor/layers/mamba/mamba2_metadata.py @@ -11,6 +11,7 @@ PlaceholderAttentionMetadata) from vllm.attention.backends.utils import PAD_SLOT_ID from vllm.platforms import current_platform +from vllm.v1.attention.backends.gdn_attn import GDNAttentionMetadata from vllm.v1.attention.backends.mamba2_attn import ( Mamba2AttentionMetadata, _query_start_loc_to_chunk_indices_offsets) @@ -45,8 +46,8 @@ class Mamba2Metadata: """ nums_dict: Optional[dict] = None cu_seqlen: Optional[int] = None - batch_ptr: Optional[torch.tensor] = None - token_chunk_offset_ptr: Optional[torch.tensor] = None + batch_ptr: Optional[torch.Tensor] = None + token_chunk_offset_ptr: Optional[torch.Tensor] = None def get_platform_metadata_classes() -> tuple[type[AttentionMetadata], ...]: @@ -117,7 +118,8 @@ def prepare_mamba2_metadata( def update_metadata(x: torch.Tensor, query_start_loc: torch.Tensor, mamba2_metadata: Union[Mamba2Metadata, - Mamba2AttentionMetadata]): + Mamba2AttentionMetadata, + GDNAttentionMetadata]): """ this is triggered upon handling a new input at the first layer """ diff --git a/vllm/model_executor/models/qwen3_next.py b/vllm/model_executor/models/qwen3_next.py index eb060cb90f44..0c974ee44eee 100644 --- a/vllm/model_executor/models/qwen3_next.py +++ b/vllm/model_executor/models/qwen3_next.py @@ -35,6 +35,7 @@ RowParallelLinear) from vllm.model_executor.layers.logits_processor import LogitsProcessor from vllm.model_executor.layers.mamba.abstract import MambaBase +from vllm.model_executor.layers.mamba.mamba2_metadata import update_metadata from vllm.model_executor.layers.mamba.mamba_mixer2 import ( mamba_v2_sharded_weight_loader) from vllm.model_executor.layers.mamba.mamba_utils import ( @@ -414,6 +415,7 @@ def _forward( assert isinstance(attn_metadata, dict) attn_metadata = attn_metadata[self.prefix] + conv_metadata = attn_metadata assert isinstance(attn_metadata, GDNAttentionMetadata) has_initial_state = attn_metadata.has_initial_state spec_query_start_loc = attn_metadata.spec_query_start_loc @@ -475,10 +477,15 @@ def _forward( # 2.2: process the remaining part if attn_metadata.num_prefills > 0: + mixed_qkv_non_spec_T = mixed_qkv_non_spec.transpose(0, 1) + if conv_metadata.cu_seqlen is None: + conv_metadata = update_metadata(mixed_qkv_non_spec_T, + non_spec_query_start_loc, + conv_metadata) # - "cache_indices" updates the conv_state cache in positions # pointed to by "mamba_cache_params.state_indices_tensor" mixed_qkv_non_spec = causal_conv1d_fn( - mixed_qkv_non_spec.transpose(0, 1), + mixed_qkv_non_spec_T, conv_weights, self.conv1d.bias, activation=self.activation, @@ -486,6 +493,7 @@ def _forward( has_initial_state=has_initial_state, cache_indices=non_spec_state_indices_tensor, query_start_loc=non_spec_query_start_loc, + metadata=conv_metadata, ).transpose(0, 1) elif attn_metadata.num_decodes > 0: mixed_qkv_non_spec = causal_conv1d_update( diff --git a/vllm/v1/attention/backends/gdn_attn.py b/vllm/v1/attention/backends/gdn_attn.py index ba89f93e8b56..5dadc52d0fb1 100644 --- a/vllm/v1/attention/backends/gdn_attn.py +++ b/vllm/v1/attention/backends/gdn_attn.py @@ -50,6 +50,12 @@ class GDNAttentionMetadata: Tensor] = None # shape: [num_prefill_tokens + num_decode_tokens,] num_accepted_tokens: Optional[torch.Tensor] = None # shape: [batch,] + # The following attributes are for triton implementation of causal_conv1d + nums_dict: Optional[dict] = None + cu_seqlen: Optional[int] = None + batch_ptr: Optional[torch.Tensor] = None + token_chunk_offset_ptr: Optional[torch.Tensor] = None + class GDNAttentionMetadataBuilder( AttentionMetadataBuilder[GDNAttentionMetadata]): diff --git a/vllm/v1/attention/backends/mamba2_attn.py b/vllm/v1/attention/backends/mamba2_attn.py index 359bad1ea9de..2fe1f14ca1db 100644 --- a/vllm/v1/attention/backends/mamba2_attn.py +++ b/vllm/v1/attention/backends/mamba2_attn.py @@ -132,8 +132,8 @@ class Mamba2AttentionMetadata: # The following attributes are for triton implementation of causal_conv1d nums_dict: Optional[dict] = None cu_seqlen: Optional[int] = None - batch_ptr: Optional[torch.tensor] = None - token_chunk_offset_ptr: Optional[torch.tensor] = None + batch_ptr: Optional[torch.Tensor] = None + token_chunk_offset_ptr: Optional[torch.Tensor] = None class Mamba2AttentionMetadataBuilder( diff --git a/vllm/v1/attention/backends/short_conv_attn.py b/vllm/v1/attention/backends/short_conv_attn.py index f5ad65b02b4d..717c40b37ecf 100644 --- a/vllm/v1/attention/backends/short_conv_attn.py +++ b/vllm/v1/attention/backends/short_conv_attn.py @@ -34,8 +34,8 @@ class ShortConvAttentionMetadata: # For causal_conv1d nums_dict: Optional[dict] = None cu_seqlen: Optional[int] = None - batch_ptr: Optional[torch.tensor] = None - token_chunk_offset_ptr: Optional[torch.tensor] = None + batch_ptr: Optional[torch.Tensor] = None + token_chunk_offset_ptr: Optional[torch.Tensor] = None class ShortConvAttentionMetadataBuilder( From 67244c86f0f1ffc06fcab9cad5e78989695cc15f Mon Sep 17 00:00:00 2001 From: dongbo910220 <32610838+dongbo910220@users.noreply.github.com> Date: Thu, 18 Sep 2025 22:29:40 +0800 Subject: [PATCH 113/518] feat(api): Return 503 on /health when engine is dead (#24897) Signed-off-by: dongbo910220 <1275604947@qq.com> Co-authored-by: Claude --- vllm/entrypoints/openai/api_server.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/vllm/entrypoints/openai/api_server.py b/vllm/entrypoints/openai/api_server.py index 93ea846f26f6..912e66412092 100644 --- a/vllm/entrypoints/openai/api_server.py +++ b/vllm/entrypoints/openai/api_server.py @@ -103,6 +103,7 @@ from vllm.usage.usage_lib import UsageContext from vllm.utils import (Device, FlexibleArgumentParser, decorate_logs, is_valid_ipv6_address, set_ulimit) +from vllm.v1.engine.exceptions import EngineDeadError from vllm.v1.metrics.prometheus import get_prometheus_registry from vllm.version import __version__ as VLLM_VERSION @@ -351,8 +352,11 @@ def engine_client(request: Request) -> EngineClient: @router.get("/health", response_class=Response) async def health(raw_request: Request) -> Response: """Health check.""" - await engine_client(raw_request).check_health() - return Response(status_code=200) + try: + await engine_client(raw_request).check_health() + return Response(status_code=200) + except EngineDeadError: + return Response(status_code=503) @router.get("/load") From 5f696c33b1fbf33fe91ecdd958874b9dd52f79b4 Mon Sep 17 00:00:00 2001 From: "wang.yuqi" Date: Thu, 18 Sep 2025 23:22:01 +0800 Subject: [PATCH 114/518] [New Model] Support BertForTokenClassification / Named Entity Recognition (NER) task (#24872) Signed-off-by: wang.yuqi Signed-off-by: Isotr0py Co-authored-by: Isotr0py --- docs/models/supported_models.md | 11 +++ examples/offline_inference/pooling/README.md | 8 ++- examples/offline_inference/pooling/ner.py | 54 ++++++++++++++ examples/online_serving/pooling/README.md | 6 ++ examples/online_serving/pooling/ner.py | 71 +++++++++++++++++++ .../pooling/test_token_classification.py | 39 ++++++++++ tests/models/registry.py | 1 + vllm/entrypoints/llm.py | 4 ++ vllm/model_executor/models/bert.py | 52 ++++++++++++++ vllm/model_executor/models/registry.py | 1 + vllm/v1/attention/backends/flex_attention.py | 12 +++- 11 files changed, 257 insertions(+), 2 deletions(-) create mode 100644 examples/offline_inference/pooling/ner.py create mode 100644 examples/online_serving/pooling/ner.py create mode 100644 tests/models/language/pooling/test_token_classification.py diff --git a/docs/models/supported_models.md b/docs/models/supported_models.md index 7aeaeca97699..b67ebcbe3c81 100644 --- a/docs/models/supported_models.md +++ b/docs/models/supported_models.md @@ -554,6 +554,17 @@ If your model is not in the above list, we will try to automatically convert the For process-supervised reward models such as `peiyi9979/math-shepherd-mistral-7b-prm`, the pooling config should be set explicitly, e.g.: `--override-pooler-config '{"pooling_type": "STEP", "step_tag_id": 123, "returned_token_ids": [456, 789]}'`. +#### Token Classification + +These models primarily support the [`LLM.encode`](./pooling_models.md#llmencode) API. + +| Architecture | Models | Example HF Models | [LoRA](../features/lora.md) | [PP](../serving/parallelism_scaling.md) | [V1](gh-issue:8779) | +|--------------|--------|-------------------|-----------------------------|-----------------------------------------|---------------------| +| `BertForTokenClassification` | bert-based | `boltuix/NeuroBERT-NER` (see note), etc. | | | ✅︎ | + +!!! note + Named Entity Recognition (NER) usage, please refer to , . + [](){ #supported-mm-models } ## List of Multimodal Language Models diff --git a/examples/offline_inference/pooling/README.md b/examples/offline_inference/pooling/README.md index 8693f5e08e0b..79afbd9cfac4 100644 --- a/examples/offline_inference/pooling/README.md +++ b/examples/offline_inference/pooling/README.md @@ -26,8 +26,14 @@ python examples/offline_inference/pooling/embed_jina_embeddings_v3.py python examples/offline_inference/pooling/embed_matryoshka_fy.py ``` +## Named Entity Recognition (NER) usage + +```bash +python examples/offline_inference/pooling/ner.py +``` + ## Qwen3 reranker usage ```bash -python qwen3_reranker.py +python examples/offline_inference/pooling/qwen3_reranker.py ``` diff --git a/examples/offline_inference/pooling/ner.py b/examples/offline_inference/pooling/ner.py new file mode 100644 index 000000000000..f18742fac0d5 --- /dev/null +++ b/examples/offline_inference/pooling/ner.py @@ -0,0 +1,54 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project +# Adapted from https://huggingface.co/boltuix/NeuroBERT-NER + +from argparse import Namespace + +from vllm import LLM, EngineArgs +from vllm.utils import FlexibleArgumentParser + + +def parse_args(): + parser = FlexibleArgumentParser() + parser = EngineArgs.add_cli_args(parser) + # Set example specific arguments + parser.set_defaults( + model="boltuix/NeuroBERT-NER", + runner="pooling", + enforce_eager=True, + trust_remote_code=True, + ) + return parser.parse_args() + + +def main(args: Namespace): + # Sample prompts. + prompts = [ + "Barack Obama visited Microsoft headquarters in Seattle on January 2025." + ] + + # Create an LLM. + llm = LLM(**vars(args)) + tokenizer = llm.get_tokenizer() + label_map = llm.llm_engine.vllm_config.model_config.hf_config.id2label + + # Run inference + outputs = llm.encode(prompts) + + for prompt, output in zip(prompts, outputs): + logits = output.outputs.data + predictions = logits.argmax(dim=-1) + + # Map predictions to labels + tokens = tokenizer.convert_ids_to_tokens(output.prompt_token_ids) + labels = [label_map[p.item()] for p in predictions] + + # Print results + for token, label in zip(tokens, labels): + if token not in tokenizer.all_special_tokens: + print(f"{token:15} → {label}") + + +if __name__ == "__main__": + args = parse_args() + main(args) diff --git a/examples/online_serving/pooling/README.md b/examples/online_serving/pooling/README.md index f7926542202d..2c271b6a32bc 100644 --- a/examples/online_serving/pooling/README.md +++ b/examples/online_serving/pooling/README.md @@ -12,6 +12,12 @@ python examples/online_serving/pooling/cohere_rerank_client.py python examples/online_serving/pooling/jinaai_rerank_client.py ``` +## Named Entity Recognition (NER) usage + +```bash +python examples/online_serving/pooling/ner.py +``` + ## Openai chat embedding for multimodal usage ```bash diff --git a/examples/online_serving/pooling/ner.py b/examples/online_serving/pooling/ner.py new file mode 100644 index 000000000000..9ec2bd45a0fe --- /dev/null +++ b/examples/online_serving/pooling/ner.py @@ -0,0 +1,71 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project +# Adapted from https://huggingface.co/boltuix/NeuroBERT-NER + +""" +Example online usage of Pooling API for Named Entity Recognition (NER). + +Run `vllm serve --runner pooling` +to start up the server in vLLM. e.g. + +vllm serve boltuix/NeuroBERT-NER +""" + +import argparse + +import requests +import torch + + +def post_http_request(prompt: dict, api_url: str) -> requests.Response: + headers = {"User-Agent": "Test Client"} + response = requests.post(api_url, headers=headers, json=prompt) + return response + + +def parse_args(): + parser = argparse.ArgumentParser() + parser.add_argument("--host", type=str, default="localhost") + parser.add_argument("--port", type=int, default=8000) + parser.add_argument("--model", type=str, default="boltuix/NeuroBERT-NER") + + return parser.parse_args() + + +def main(args): + from transformers import AutoConfig, AutoTokenizer + + api_url = f"http://{args.host}:{args.port}/pooling" + model_name = args.model + + # Load tokenizer and config + tokenizer = AutoTokenizer.from_pretrained(model_name) + config = AutoConfig.from_pretrained(model_name) + label_map = config.id2label + + # Input text + text = "Barack Obama visited Microsoft headquarters in Seattle on January 2025." + prompt = {"model": model_name, "input": text} + + pooling_response = post_http_request(prompt=prompt, api_url=api_url) + + # Run inference + output = pooling_response.json()["data"][0] + logits = torch.tensor(output["data"]) + predictions = logits.argmax(dim=-1) + inputs = tokenizer(text, return_tensors="pt") + + # Map predictions to labels + tokens = tokenizer.convert_ids_to_tokens(inputs["input_ids"][0]) + labels = [label_map[p.item()] for p in predictions] + assert len(tokens) == len(predictions) + + # Print results + for token, label in zip(tokens, labels): + if token not in tokenizer.all_special_tokens: + print(f"{token:15} → {label}") + + +if __name__ == "__main__": + args = parse_args() + main(args) diff --git a/tests/models/language/pooling/test_token_classification.py b/tests/models/language/pooling/test_token_classification.py new file mode 100644 index 000000000000..fd5e48a8b144 --- /dev/null +++ b/tests/models/language/pooling/test_token_classification.py @@ -0,0 +1,39 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project +import pytest +import torch +from transformers import AutoModelForTokenClassification + +from tests.models.utils import softmax + + +@pytest.mark.parametrize("model", ["boltuix/NeuroBERT-NER"]) +# The float32 is required for this tiny model to pass the test. +@pytest.mark.parametrize("dtype", ["float"]) +@torch.inference_mode +def test_models( + hf_runner, + vllm_runner, + example_prompts, + model: str, + dtype: str, +) -> None: + with vllm_runner(model, max_model_len=None, dtype=dtype) as vllm_model: + vllm_outputs = vllm_model.encode(example_prompts) + + with hf_runner(model, + dtype=dtype, + auto_cls=AutoModelForTokenClassification) as hf_model: + tokenizer = hf_model.tokenizer + hf_outputs = [] + for prompt in example_prompts: + inputs = tokenizer([prompt], return_tensors="pt") + inputs = hf_model.wrap_device(inputs) + output = hf_model.model(**inputs) + hf_outputs.append(softmax(output.logits[0])) + + # check logits difference + for hf_output, vllm_output in zip(hf_outputs, vllm_outputs): + hf_output = torch.tensor(hf_output).cpu().float() + vllm_output = torch.tensor(vllm_output).cpu().float() + assert torch.allclose(hf_output, vllm_output, 1e-2) diff --git a/tests/models/registry.py b/tests/models/registry.py index 93aa9d402549..e9cc5170ade7 100644 --- a/tests/models/registry.py +++ b/tests/models/registry.py @@ -414,6 +414,7 @@ def check_available_online( # [Cross-encoder] "BertForSequenceClassification": _HfExamplesInfo("cross-encoder/ms-marco-MiniLM-L-6-v2"), # noqa: E501 + "BertForTokenClassification": _HfExamplesInfo("boltuix/NeuroBERT-NER"), "GteNewForSequenceClassification": _HfExamplesInfo("Alibaba-NLP/gte-multilingual-reranker-base", # noqa: E501 trust_remote_code=True, hf_overrides={ diff --git a/vllm/entrypoints/llm.py b/vllm/entrypoints/llm.py index 63e9478612bb..df6b16c73d6e 100644 --- a/vllm/entrypoints/llm.py +++ b/vllm/entrypoints/llm.py @@ -943,6 +943,10 @@ def encode( considered legacy and may be deprecated in the future. You should instead pass them via the `inputs` parameter. """ + + if self.supported_tasks == ["encode"] and pooling_task is None: + pooling_task = "encode" + if pooling_task is None: if "embed" in self.supported_tasks: pooling_task = "embed" diff --git a/vllm/model_executor/models/bert.py b/vllm/model_executor/models/bert.py index c07e5364814a..ee32587f6b1b 100644 --- a/vllm/model_executor/models/bert.py +++ b/vllm/model_executor/models/bert.py @@ -611,3 +611,55 @@ def forward( positions=positions, inputs_embeds=inputs_embeds, intermediate_tensors=intermediate_tensors) + + +@default_pooling_type("ALL") +class BertForTokenClassification(nn.Module): + is_pooling_model = True + + def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): + super().__init__() + config = vllm_config.model_config.hf_config + self.head_dtype = vllm_config.model_config.head_dtype + self.num_labels = config.num_labels + self.bert = BertModel(vllm_config=vllm_config, + prefix=maybe_prefix(prefix, "bert"), + embedding_class=BertEmbedding) + self.classifier = nn.Linear(config.hidden_size, + config.num_labels, + dtype=self.head_dtype) + + pooler_config = vllm_config.model_config.pooler_config + assert pooler_config is not None + + self.pooler = DispatchPooler({ + "encode": + Pooler.for_encode(pooler_config), + }) + + def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]): + loader = AutoWeightsLoader(self) + loaded_params = loader.load_weights(weights) + return loaded_params + + def forward( + self, + input_ids: Optional[torch.Tensor], + positions: torch.Tensor, + intermediate_tensors: Optional[IntermediateTensors] = None, + inputs_embeds: Optional[torch.Tensor] = None, + token_type_ids: Optional[torch.Tensor] = None, + ) -> torch.Tensor: + + if token_type_ids is not None: + assert self.bert.config.vocab_size < (1 << TOKEN_TYPE_SHIFT) + assert input_ids is not None + _encode_token_type_ids(input_ids, token_type_ids) + + hidden_states = self.bert(input_ids=input_ids, + positions=positions, + inputs_embeds=inputs_embeds, + intermediate_tensors=intermediate_tensors) + + hidden_states = hidden_states.to(self.head_dtype) + return self.classifier(hidden_states) diff --git a/vllm/model_executor/models/registry.py b/vllm/model_executor/models/registry.py index 707b57106e6d..1382fd9e93ea 100644 --- a/vllm/model_executor/models/registry.py +++ b/vllm/model_executor/models/registry.py @@ -193,6 +193,7 @@ _CROSS_ENCODER_MODELS = { "BertForSequenceClassification": ("bert", "BertForSequenceClassification"), + "BertForTokenClassification": ("bert", "BertForTokenClassification"), "GteNewForSequenceClassification": ("bert_with_rope", "GteNewForSequenceClassification"), "ModernBertForSequenceClassification": ("modernbert", diff --git a/vllm/v1/attention/backends/flex_attention.py b/vllm/v1/attention/backends/flex_attention.py index cb983494216a..662d3984554a 100644 --- a/vllm/v1/attention/backends/flex_attention.py +++ b/vllm/v1/attention/backends/flex_attention.py @@ -720,6 +720,15 @@ def forward( (query, key, value), ) + query = query[:, :, :num_actual_tokens, :] + if ((key_tensor.size(-2) > num_actual_tokens) + or (value_tensor.size(-2) > num_actual_tokens)): + # In the encoder-only model with torch.compile, + # qkv might be padded, which might cause exception. + # see: https://github.com/vllm-project/vllm/pull/24872#discussion_r2353252290 + key_tensor = key_tensor[:, :, :num_actual_tokens, :] + value_tensor = value_tensor[:, :, :num_actual_tokens, :] + else: assert self.attn_type == AttentionType.DECODER key_cache, value_cache = kv_cache.unbind(0) @@ -744,7 +753,8 @@ def forward( (query, key_cache, value_cache), ) - query = query[:, :, :num_actual_tokens, :] + query = query[:, :, :num_actual_tokens, :] + # Doesn't work for now -> constraint violation # torch._dynamo.try_mark_dynamic(query, 2) From b419937c78017dc4c5bfa19f11547f4832ea2290 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hyogeun=20Oh=20=28=EC=98=A4=ED=9A=A8=EA=B7=BC=29?= Date: Fri, 19 Sep 2025 00:23:26 +0900 Subject: [PATCH 115/518] [Docs] Fix warnings in mkdocs build (continued) (#25163) Signed-off-by: Zerohertz --- .../device_communicators/shm_object_storage.py | 2 +- vllm/entrypoints/openai/serving_engine.py | 8 ++------ 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/vllm/distributed/device_communicators/shm_object_storage.py b/vllm/distributed/device_communicators/shm_object_storage.py index 352e7525d4c8..0310fc14da25 100644 --- a/vllm/distributed/device_communicators/shm_object_storage.py +++ b/vllm/distributed/device_communicators/shm_object_storage.py @@ -253,7 +253,7 @@ def free_buf(self, Args: nbytes (int, optional): The size of the buffer to free. If None, - frees the maximum size of the ring buffer. + frees the maximum size of the ring buffer. ''' assert self.is_writer, "Only the writer can free buffers." diff --git a/vllm/entrypoints/openai/serving_engine.py b/vllm/entrypoints/openai/serving_engine.py index d391cc50ad23..4eb1f8b89d64 100644 --- a/vllm/entrypoints/openai/serving_engine.py +++ b/vllm/entrypoints/openai/serving_engine.py @@ -697,9 +697,7 @@ async def _tokenize_prompt_input_async( add_special_tokens: bool = True, ) -> TextTokensPrompt: """ - A simpler implementation of - [`_tokenize_prompt_input_or_inputs`][vllm.entrypoints.openai.serving_engine.OpenAIServing._tokenize_prompt_input_or_inputs] - that assumes single input. + A simpler implementation that tokenizes a single prompt input. """ async for result in self._tokenize_prompt_inputs_async( request, @@ -718,9 +716,7 @@ async def _tokenize_prompt_inputs_async( add_special_tokens: bool = True, ) -> AsyncGenerator[TextTokensPrompt, None]: """ - A simpler implementation of - [`_tokenize_prompt_input_or_inputs`][vllm.entrypoints.openai.serving_engine.OpenAIServing._tokenize_prompt_input_or_inputs] - that assumes multiple inputs. + A simpler implementation that tokenizes multiple prompt inputs. """ for prompt in prompt_inputs: if isinstance(prompt, str): From 2ea50e977aac00c63e78990a7477bb91295df183 Mon Sep 17 00:00:00 2001 From: Shu Wang Date: Thu, 18 Sep 2025 10:52:58 -0500 Subject: [PATCH 116/518] Enable Allgather/ReduceScatter backend for NaiveAllToAll (#23964) Signed-off-by: Shu Wang. Signed-off-by: Tyler Michael Smith Signed-off-by: Shu Wang Co-authored-by: Tyler Michael Smith Co-authored-by: Tyler Michael Smith Co-authored-by: Michael Goin --- .../device_communicators/all2all.py | 39 +++++++++++++++++++ .../device_communicators/cuda_communicator.py | 4 ++ vllm/envs.py | 17 +++++--- 3 files changed, 55 insertions(+), 5 deletions(-) diff --git a/vllm/distributed/device_communicators/all2all.py b/vllm/distributed/device_communicators/all2all.py index 427fd040fcb7..149df73d8667 100644 --- a/vllm/distributed/device_communicators/all2all.py +++ b/vllm/distributed/device_communicators/all2all.py @@ -5,6 +5,7 @@ import torch import torch.distributed as dist +from vllm.distributed import get_dp_group from vllm.forward_context import get_forward_context from vllm.logger import init_logger from vllm.utils import has_deep_ep, has_pplx @@ -69,6 +70,44 @@ def destroy(self): pass +class AgRsAll2AllManager(All2AllManagerBase): + """ + An implementation of all2all communication based on + all-gather (dispatch) and reduce-scatter (combine). + """ + + def __init__(self, cpu_group): + super().__init__(cpu_group) + + def dispatch(self, hidden_states: torch.Tensor, + router_logits: torch.Tensor): + """ + Gather hidden_states and router_logits from all dp ranks. + """ + sizes = get_forward_context( + ).dp_metadata.get_chunk_sizes_across_dp_rank() + hidden_states, router_logits = get_dp_group().all_gatherv( + [hidden_states, router_logits], + dim=0, + sizes=sizes, + ) + return hidden_states, router_logits + + def combine(self, hidden_states: torch.Tensor) -> torch.Tensor: + """ + Reduce-scatter hidden_states across all dp ranks. + """ + sizes = get_forward_context( + ).dp_metadata.get_chunk_sizes_across_dp_rank() + hidden_states = get_dp_group().reduce_scatterv(hidden_states, + dim=0, + sizes=sizes) + return hidden_states + + def destroy(self): + pass + + class PPLXAll2AllManager(All2AllManagerBase): """ All2All communication based on PPLX kernels. diff --git a/vllm/distributed/device_communicators/cuda_communicator.py b/vllm/distributed/device_communicators/cuda_communicator.py index 78c90b006ffc..b2bf3bc3cc2e 100644 --- a/vllm/distributed/device_communicators/cuda_communicator.py +++ b/vllm/distributed/device_communicators/cuda_communicator.py @@ -87,6 +87,10 @@ def __init__(self, from .all2all import NaiveAll2AllManager self.all2all_manager = NaiveAll2AllManager(self.cpu_group) logger.info("Using naive all2all manager.") + elif all2all_backend == "allgather_reducescatter": + from .all2all import AgRsAll2AllManager + self.all2all_manager = AgRsAll2AllManager(self.cpu_group) + logger.info("Using AllGather-ReduceScatter all2all manager.") elif all2all_backend == "pplx": from .all2all import PPLXAll2AllManager self.all2all_manager = PPLXAll2AllManager(self.cpu_group) diff --git a/vllm/envs.py b/vllm/envs.py index 72e1d5b0ede8..19e2f8635275 100755 --- a/vllm/envs.py +++ b/vllm/envs.py @@ -149,8 +149,11 @@ VLLM_ALLOW_INSECURE_SERIALIZATION: bool = False VLLM_NIXL_SIDE_CHANNEL_HOST: str = "localhost" VLLM_NIXL_SIDE_CHANNEL_PORT: int = 5557 - VLLM_ALL2ALL_BACKEND: Literal["naive", "pplx", "deepep_high_throughput", - "deepep_low_latency"] = "naive" + VLLM_ALL2ALL_BACKEND: Literal["naive", "pplx", + "deepep_high_throughput", + "deepep_low_latency", + "allgather_reducescatter"] = \ + "allgather_reducescatter" VLLM_MAX_TOKENS_PER_EXPERT_FP4_MOE: int = 163840 VLLM_TOOL_PARSE_REGEX_TIMEOUT_SECONDS: int = 1 VLLM_SLEEP_WHEN_IDLE: bool = False @@ -1124,14 +1127,18 @@ def get_vllm_port() -> Optional[int]: # all2all backend for vllm's expert parallel communication # Available options: - # - "naive": naive all2all implementation using all-reduce + # - "naive": naive all2all implementation using broadcasts + # - "allgather_reducescatter": all2all implementation based on allgather and + # reducescatter # - "pplx": use pplx kernels # - "deepep_high_throughput", use deepep high-throughput kernels # - "deepep_low_latency", use deepep low-latency kernels "VLLM_ALL2ALL_BACKEND": - env_with_choices("VLLM_ALL2ALL_BACKEND", "naive", + env_with_choices("VLLM_ALL2ALL_BACKEND", "allgather_reducescatter", ["naive", "pplx", - "deepep_high_throughput", "deepep_low_latency"]), + "deepep_high_throughput", + "deepep_low_latency", + "allgather_reducescatter"]), # Flashinfer MoE backend for vLLM's fused Mixture-of-Experts support. # Both require compute capability 10.0 or above. From 1c3b1634aa9d4be56fa6e931e96ec8145fedcc0a Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Thu, 18 Sep 2025 17:01:50 +0100 Subject: [PATCH 117/518] [Misc] Add codeowner for Transformers backend (#25180) Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- .github/CODEOWNERS | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index b8d6db06548d..08717cdde643 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -61,6 +61,10 @@ CMakeLists.txt @tlrmchlsmth @LucasWilkinson /tests/v1/kv_connector @ApostaC /tests/v1/offloading @ApostaC +# Transformers backend +/vllm/model_executor/models/transformers.py @hmellor +/tests/models/test_transformers.py @hmellor + # Docs /docs @hmellor mkdocs.yaml @hmellor From c4cb0af98a8e39950fa9b99acf7c241959a14ac8 Mon Sep 17 00:00:00 2001 From: qizixi <22851944+zixi-qi@users.noreply.github.com> Date: Thu, 18 Sep 2025 09:12:19 -0700 Subject: [PATCH 118/518] [spec decode] Fix MTP inference path for MiMo-7B model (#25136) Signed-off-by: zixi-qi Co-authored-by: Cyrus Leung --- examples/offline_inference/spec_decode.py | 6 +++++- vllm/config/speculative.py | 2 +- vllm/model_executor/models/mimo_mtp.py | 18 ++++++++++++++---- 3 files changed, 20 insertions(+), 6 deletions(-) diff --git a/examples/offline_inference/spec_decode.py b/examples/offline_inference/spec_decode.py index 5af232cb6af6..004e75b20464 100644 --- a/examples/offline_inference/spec_decode.py +++ b/examples/offline_inference/spec_decode.py @@ -53,7 +53,6 @@ def parse_args(): "--method", type=str, default="eagle", - choices=["ngram", "eagle", "eagle3", "mtp"], ) parser.add_argument("--num-spec-tokens", type=int, default=2) parser.add_argument("--prompt-lookup-max", type=int, default=5) @@ -118,6 +117,11 @@ def main(): "prompt_lookup_max": args.prompt_lookup_max, "prompt_lookup_min": args.prompt_lookup_min, } + elif args.method.endswith("mtp"): + speculative_config = { + "method": args.method, + "num_speculative_tokens": args.num_spec_tokens, + } else: raise ValueError(f"unknown method: {args.method}") diff --git a/vllm/config/speculative.py b/vllm/config/speculative.py index fca8c28e5c61..2c861723c396 100644 --- a/vllm/config/speculative.py +++ b/vllm/config/speculative.py @@ -31,7 +31,7 @@ SpeculativeMethod = Literal["ngram", "eagle", "eagle3", "medusa", "mlp_speculator", "draft_model", "deepseek_mtp", - "ernie_mtp", "qwen3_next_mtp"] + "ernie_mtp", "qwen3_next_mtp", "mimo_mtp"] @config diff --git a/vllm/model_executor/models/mimo_mtp.py b/vllm/model_executor/models/mimo_mtp.py index ac835edc001e..09194e9f95d0 100644 --- a/vllm/model_executor/models/mimo_mtp.py +++ b/vllm/model_executor/models/mimo_mtp.py @@ -241,6 +241,15 @@ def load_weights(self, weights: Iterable[tuple[str, def map_model_name_to_mtp_param_name(self, name: str) -> str: import regex as re + + # append mtp_start_layer_idx + pattern = r"(model\.mtp_layers\.)(\d+)(\.)" + match = re.match(pattern, name) + if match: + original_num = int(match.group(2)) + new_num = original_num + self.config.num_hidden_layers + name = name.replace(match.group(), f"{match.group(1)}{new_num}.") + # check for early turn name_without_prefix = [ "token_layernorm", "hidden_layernorm", "input_proj", "final_layernorm" @@ -248,10 +257,11 @@ def map_model_name_to_mtp_param_name(self, name: str) -> str: for sub_name in name_without_prefix: if sub_name in name: return name - pattern = r"model.mtp_layers.(\d+)." - group = re.match(pattern, name) - if group is not None: - name = name.replace(group.group(), group.group() + "mtp_block.") + # add mtp_block + pattern = r"(model\.mtp_layers\.\d+\.)" + match = re.match(pattern, name) + if match: + name = name.replace(match.group(), match.group() + "mtp_block.") return name def _rewrite_spec_layer_name(self, spec_layer: int, name: str) -> str: From dc3405936090f5c964a5b38c9de8c8400f01541c Mon Sep 17 00:00:00 2001 From: Gregory Shtrasberg <156009573+gshtras@users.noreply.github.com> Date: Thu, 18 Sep 2025 12:36:55 -0400 Subject: [PATCH 119/518] [ROCm][CI/Build] Use ROCm7.0 as the base (#25178) Signed-off-by: Gregory Shtrasberg --- docker/Dockerfile.rocm | 5 ++- docker/Dockerfile.rocm_base | 61 ++++++++----------------------------- 2 files changed, 16 insertions(+), 50 deletions(-) diff --git a/docker/Dockerfile.rocm b/docker/Dockerfile.rocm index 063fc4969328..c8900212e5a1 100644 --- a/docker/Dockerfile.rocm +++ b/docker/Dockerfile.rocm @@ -29,7 +29,10 @@ ARG VLLM_BRANCH="main" ONBUILD RUN git clone ${VLLM_REPO} \ && cd vllm \ && git fetch -v --prune -- origin ${VLLM_BRANCH} \ - && git checkout FETCH_HEAD + && git checkout FETCH_HEAD \ + && if [ ${VLLM_REPO} != "https://github.com/vllm-project/vllm.git" ] ; then \ + git remote add upstream "https://github.com/vllm-project/vllm.git" \ + && git fetch upstream ; fi FROM fetch_vllm_${REMOTE_VLLM} AS fetch_vllm # ----------------------- diff --git a/docker/Dockerfile.rocm_base b/docker/Dockerfile.rocm_base index 2ba5461dfe55..4973b57f7656 100644 --- a/docker/Dockerfile.rocm_base +++ b/docker/Dockerfile.rocm_base @@ -1,25 +1,23 @@ -ARG BASE_IMAGE=rocm/dev-ubuntu-22.04:6.4.1-complete -ARG HIPBLASLT_BRANCH="aa0bda7b" -ARG HIPBLAS_COMMON_BRANCH="9b80ba8e" -ARG LEGACY_HIPBLASLT_OPTION= -ARG TRITON_BRANCH="e5be006" -ARG TRITON_REPO="https://github.com/triton-lang/triton.git" -ARG PYTORCH_BRANCH="f717b2af" -ARG PYTORCH_VISION_BRANCH="v0.21.0" +ARG BASE_IMAGE=rocm/dev-ubuntu-22.04:7.0-complete +ARG TRITON_BRANCH="f9e5bf54" +ARG TRITON_REPO="https://github.com/ROCm/triton.git" +ARG PYTORCH_BRANCH="b2fb6885" +ARG PYTORCH_VISION_BRANCH="v0.23.0" ARG PYTORCH_REPO="https://github.com/ROCm/pytorch.git" ARG PYTORCH_VISION_REPO="https://github.com/pytorch/vision.git" -ARG FA_BRANCH="1a7f4dfa" +ARG FA_BRANCH="0e60e394" ARG FA_REPO="https://github.com/Dao-AILab/flash-attention.git" -ARG AITER_BRANCH="4822e675" +ARG AITER_BRANCH="2ab9f4cd" ARG AITER_REPO="https://github.com/ROCm/aiter.git" FROM ${BASE_IMAGE} AS base -ENV PATH=/opt/rocm/llvm/bin:$PATH +ENV PATH=/opt/rocm/llvm/bin:/opt/rocm/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin ENV ROCM_PATH=/opt/rocm ENV LD_LIBRARY_PATH=/opt/rocm/lib:/usr/local/lib: -ARG PYTORCH_ROCM_ARCH=gfx90a;gfx942;gfx1100;gfx1101;gfx1200;gfx1201 +ARG PYTORCH_ROCM_ARCH=gfx90a;gfx942;gfx950;gfx1100;gfx1101;gfx1200;gfx1201 ENV PYTORCH_ROCM_ARCH=${PYTORCH_ROCM_ARCH} +ENV AITER_ROCM_ARCH=gfx942;gfx950 ARG PYTHON_VERSION=3.12 @@ -45,29 +43,6 @@ RUN apt-get update -y \ RUN pip install -U packaging 'cmake<4' ninja wheel 'setuptools<80' pybind11 Cython -FROM base AS build_hipblaslt -ARG HIPBLASLT_BRANCH -ARG HIPBLAS_COMMON_BRANCH -# Set to "--legacy_hipblas_direct" for ROCm<=6.2 -ARG LEGACY_HIPBLASLT_OPTION -RUN git clone https://github.com/ROCm/hipBLAS-common.git -RUN apt-get remove -y hipblaslt && apt-get autoremove -y && apt-get autoclean -y -RUN cd hipBLAS-common \ - && git checkout ${HIPBLAS_COMMON_BRANCH} \ - && mkdir build \ - && cd build \ - && cmake .. \ - && make package \ - && dpkg -i ./*.deb -RUN git clone https://github.com/ROCm/hipBLASLt -RUN cd hipBLASLt \ - && git checkout ${HIPBLASLT_BRANCH} \ - && apt-get install -y llvm-dev \ - && ./install.sh -dc --architecture ${PYTORCH_ROCM_ARCH} ${LEGACY_HIPBLASLT_OPTION} \ - && cd build/release \ - && make package -RUN mkdir -p /app/install && cp /app/hipBLASLt/build/release/*.deb /app/hipBLAS-common/build/*.deb /app/install - FROM base AS build_triton ARG TRITON_BRANCH ARG TRITON_REPO @@ -121,13 +96,11 @@ RUN cd aiter \ && git checkout ${AITER_BRANCH} \ && git submodule update --init --recursive \ && pip install -r requirements.txt -RUN pip install pyyaml && cd aiter && PREBUILD_KERNELS=1 GPU_ARCHS=gfx942 python3 setup.py bdist_wheel --dist-dir=dist && ls /app/aiter/dist/*.whl +RUN pip install pyyaml && cd aiter && PREBUILD_KERNELS=1 GPU_ARCHS=${AITER_ROCM_ARCH} python3 setup.py bdist_wheel --dist-dir=dist && ls /app/aiter/dist/*.whl RUN mkdir -p /app/install && cp /app/aiter/dist/*.whl /app/install FROM base AS debs RUN mkdir /app/debs -RUN --mount=type=bind,from=build_hipblaslt,src=/app/install/,target=/install \ - cp /install/*.deb /app/debs RUN --mount=type=bind,from=build_triton,src=/app/install/,target=/install \ cp /install/*.whl /app/debs RUN --mount=type=bind,from=build_amdsmi,src=/app/install/,target=/install \ @@ -138,11 +111,6 @@ RUN --mount=type=bind,from=build_aiter,src=/app/install/,target=/install \ cp /install/*.whl /app/debs FROM base AS final -RUN --mount=type=bind,from=build_hipblaslt,src=/app/install/,target=/install \ - dpkg -i /install/*deb \ - && perl -p -i -e 's/, hipblas-common-dev \([^)]*?\), /, /g' /var/lib/dpkg/status \ - && perl -p -i -e 's/, hipblaslt-dev \([^)]*?\), /, /g' /var/lib/dpkg/status \ - && perl -p -i -e 's/, hipblaslt \([^)]*?\), /, /g' /var/lib/dpkg/status RUN --mount=type=bind,from=build_triton,src=/app/install/,target=/install \ pip install /install/*.whl RUN --mount=type=bind,from=build_amdsmi,src=/app/install/,target=/install \ @@ -153,9 +121,6 @@ RUN --mount=type=bind,from=build_aiter,src=/app/install/,target=/install \ pip install /install/*.whl ARG BASE_IMAGE -ARG HIPBLAS_COMMON_BRANCH -ARG HIPBLASLT_BRANCH -ARG LEGACY_HIPBLASLT_OPTION ARG TRITON_BRANCH ARG TRITON_REPO ARG PYTORCH_BRANCH @@ -167,9 +132,6 @@ ARG FA_REPO ARG AITER_BRANCH ARG AITER_REPO RUN echo "BASE_IMAGE: ${BASE_IMAGE}" > /app/versions.txt \ - && echo "HIPBLAS_COMMON_BRANCH: ${HIPBLAS_COMMON_BRANCH}" >> /app/versions.txt \ - && echo "HIPBLASLT_BRANCH: ${HIPBLASLT_BRANCH}" >> /app/versions.txt \ - && echo "LEGACY_HIPBLASLT_OPTION: ${LEGACY_HIPBLASLT_OPTION}" >> /app/versions.txt \ && echo "TRITON_BRANCH: ${TRITON_BRANCH}" >> /app/versions.txt \ && echo "TRITON_REPO: ${TRITON_REPO}" >> /app/versions.txt \ && echo "PYTORCH_BRANCH: ${PYTORCH_BRANCH}" >> /app/versions.txt \ @@ -177,5 +139,6 @@ RUN echo "BASE_IMAGE: ${BASE_IMAGE}" > /app/versions.txt \ && echo "PYTORCH_REPO: ${PYTORCH_REPO}" >> /app/versions.txt \ && echo "PYTORCH_VISION_REPO: ${PYTORCH_VISION_REPO}" >> /app/versions.txt \ && echo "FA_BRANCH: ${FA_BRANCH}" >> /app/versions.txt \ + && echo "FA_REPO: ${FA_REPO}" >> /app/versions.txt \ && echo "AITER_BRANCH: ${AITER_BRANCH}" >> /app/versions.txt \ && echo "AITER_REPO: ${AITER_REPO}" >> /app/versions.txt \ No newline at end of file From bbdc0f2366997536207abc212fcdae7a1b688159 Mon Sep 17 00:00:00 2001 From: Rohan Potdar <66227218+Rohan138@users.noreply.github.com> Date: Thu, 18 Sep 2025 12:46:47 -0500 Subject: [PATCH 120/518] [ROCm][AITER][Bugfix] Switch AITER to use PIECEWISE_AND_FULL compilation (#25104) Signed-off-by: Rohan138 --- vllm/v1/attention/backends/rocm_aiter_fa.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/v1/attention/backends/rocm_aiter_fa.py b/vllm/v1/attention/backends/rocm_aiter_fa.py index 8eb3505cf274..afb2283c44d3 100644 --- a/vllm/v1/attention/backends/rocm_aiter_fa.py +++ b/vllm/v1/attention/backends/rocm_aiter_fa.py @@ -232,7 +232,7 @@ class AiterFlashAttentionMetadata: class AiterFlashAttentionMetadataBuilder( AttentionMetadataBuilder[AiterFlashAttentionMetadata]): - cudagraph_support = AttentionCGSupport.ALWAYS + cudagraph_support = AttentionCGSupport.UNIFORM_SINGLE_TOKEN_DECODE def __init__(self, kv_cache_spec: AttentionSpec, layer_names: list[str], vllm_config: VllmConfig, device: torch.device): From 505805b645649be6a8e788a1f048b851fa123ef1 Mon Sep 17 00:00:00 2001 From: Or Ozeri Date: Thu, 18 Sep 2025 20:57:07 +0300 Subject: [PATCH 121/518] [KV offload][1/N] Introduce an offloading component (#19848) Signed-off-by: Or Ozeri --- .buildkite/test-pipeline.yaml | 1 + tests/v1/offloading/test_worker.py | 152 +++++++++++++++++++++++++ vllm/v1/offloading/abstract.py | 165 ++++++++++++++++++++++++++++ vllm/v1/offloading/mediums.py | 39 +++++++ vllm/v1/offloading/worker/worker.py | 142 ++++++++++++++++++++++++ 5 files changed, 499 insertions(+) create mode 100644 tests/v1/offloading/test_worker.py create mode 100644 vllm/v1/offloading/abstract.py create mode 100644 vllm/v1/offloading/mediums.py create mode 100644 vllm/v1/offloading/worker/worker.py diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index 66dfc990805f..5fd08296625a 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -280,6 +280,7 @@ steps: # split the test to avoid interference - pytest -v -s v1/core - pytest -v -s v1/executor + - pytest -v -s v1/offloading - pytest -v -s v1/sample - pytest -v -s v1/logits_processors - pytest -v -s v1/worker diff --git a/tests/v1/offloading/test_worker.py b/tests/v1/offloading/test_worker.py new file mode 100644 index 000000000000..2391b565773a --- /dev/null +++ b/tests/v1/offloading/test_worker.py @@ -0,0 +1,152 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project +from vllm.v1.offloading.abstract import LoadStoreSpec +from vllm.v1.offloading.worker.worker import (OffloadingHandler, + OffloadingWorker, TransferResult, + TransferSpec) + + +class LoadStoreSpec1(LoadStoreSpec): + + def __init__(self, + submit_success: bool = True, + async_success: bool = True, + exception: bool = False): + self.finished = False + self.submit_success = submit_success + self.async_success = async_success + self.exception = exception + + @staticmethod + def medium() -> str: + return "1" + + def __repr__(self): + return f"{self.medium()}: {id(self)}" + + +class LoadStoreSpec2(LoadStoreSpec): + + @staticmethod + def medium() -> str: + return "2" + + def __repr__(self): + return f"{self.medium()}: {id(self)}" + + +class OffloadingHandler1To2(OffloadingHandler): + + def __init__(self): + self.transfers: dict[int, LoadStoreSpec1] = {} + + def transfer_async(self, job_id: int, spec: TransferSpec) -> bool: + src, dst = spec + assert isinstance(src, LoadStoreSpec1) + assert isinstance(dst, LoadStoreSpec2) + + if src.exception: + raise Exception("An expected exception. Don't worry!") + if not src.submit_success: + return False + + self.transfers[job_id] = src + return True + + def get_finished(self) -> list[TransferResult]: + finished = [] + for job_id, spec in list(self.transfers.items()): + if spec.finished: + finished.append((job_id, spec.async_success)) + del self.transfers[job_id] + return finished + + +class OffloadingHandler2To1(OffloadingHandler): + + def __init__(self): + self.transfers: dict[int, LoadStoreSpec1] = {} + + def transfer_async(self, job_id: int, spec: TransferSpec) -> bool: + src, dst = spec + assert isinstance(src, LoadStoreSpec2) + assert isinstance(dst, LoadStoreSpec1) + + self.transfers[job_id] = dst + return True + + def get_finished(self) -> list[TransferResult]: + finished = [] + for job_id, spec in list(self.transfers.items()): + if spec.finished: + finished.append((job_id, spec.async_success)) + del self.transfers[job_id] + return finished + + +def test_offloading_worker(): + """ + Tests OffloadingWorker with 2 handlers. + One handler performs 1->2 transfers, and the other handles 2->1. + """ + worker = OffloadingWorker() + handler1to2 = OffloadingHandler1To2() + handler2to1 = OffloadingHandler2To1() + worker.register_handler(LoadStoreSpec1, LoadStoreSpec2, handler1to2) + worker.register_handler(LoadStoreSpec2, LoadStoreSpec1, handler2to1) + + # 1st transfer 1->2 (exception) + src1 = LoadStoreSpec1(exception=True) + dst1 = LoadStoreSpec2() + assert not worker.transfer_async(1, (src1, dst1)) + + # 2ed transfer 1->2 (failure to submit) + src2 = LoadStoreSpec1(submit_success=False) + dst2 = LoadStoreSpec2() + assert not worker.transfer_async(2, (src2, dst2)) + + # 3rd transfer 1->2 (failure) + src3 = LoadStoreSpec1(async_success=False) + dst3 = LoadStoreSpec2() + assert worker.transfer_async(3, (src3, dst3)) + + # 4th transfer 1->2 (success) + src4 = LoadStoreSpec1() + dst4 = LoadStoreSpec2() + worker.transfer_async(4, (src4, dst4)) + assert set(handler1to2.transfers.keys()) == {3, 4} + + # 5th transfer 2->1 + src5 = LoadStoreSpec2() + dst5 = LoadStoreSpec1() + worker.transfer_async(5, (src5, dst5)) + assert set(handler2to1.transfers.keys()) == {5} + + # no transfer completed yet + assert worker.get_finished() == [] + + # complete 3rd, 4th + src3.finished = True + src4.finished = True + + # 6th transfer 1->2 + src6 = LoadStoreSpec1() + dst6 = LoadStoreSpec2() + worker.transfer_async(6, (src6, dst6)) + + # 7th transfer 2->1 + src7 = LoadStoreSpec2() + dst7 = LoadStoreSpec1() + worker.transfer_async(7, (src7, dst7)) + + # 6th and 7th transfers started + assert 6 in handler1to2.transfers + assert 7 in handler2to1.transfers + + # verify result of 3rd and 4th transfers + assert (sorted(worker.get_finished()) == [(3, False), (4, True)]) + + # complete 6th and 7th transfers + src6.finished = True + dst7.finished = True + assert (sorted(worker.get_finished()) == [(6, True), (7, True)]) diff --git a/vllm/v1/offloading/abstract.py b/vllm/v1/offloading/abstract.py new file mode 100644 index 000000000000..9f9c044ea1c5 --- /dev/null +++ b/vllm/v1/offloading/abstract.py @@ -0,0 +1,165 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project +""" +OffloadingManager class for managing KV data offloading in vLLM v1 + +This class runs in the scheduler, tracks which blocks are offloaded +and their address. + +The class provides the following primitives: + lookup() - find the length of the maximal series of blocks, + starting from the first one, that are all offloaded. + prepare_load() - prepare given blocks to be read. + The given blocks will be protected from eviction. + This function returns a LoadSpec which encapsulates + information required for performing the load. + touch() - marks the give blocks as recently used. Can be used + to track block's LRU. This function is separated from the + prepare_load function to allow setting block recency even + for blocks which do not need reading from the cache, such as + blocks that are cached by the GPU prefix cache. + complete_load() - mark blocks which were previously prepared to be + loaded as done loading. This is to re-allow their eviction. + prepare_store() - prepare the given blocks to be written. + Returns a StoreSpec encapsulating offloading information, + as well as a list of blocks that were evicted as a result. + complete_store() - marks a previous store as completed. + Following this call, the given blocks will become loadable. +""" + +from abc import ABC, abstractmethod +from collections.abc import Iterable +from dataclasses import dataclass +from typing import Optional + +from vllm.v1.core.kv_cache_utils import BlockHash + + +class LoadStoreSpec(ABC): + """ + Abstract metadata that encapsulates information allowing a worker + to load, and optionally also to store, blocks of KV data. + """ + + @staticmethod + @abstractmethod + def medium() -> str: + """ + Returns a string representation of the medium type + this store/load targets. + """ + pass + + +@dataclass +class PrepareStoreOutput: + block_hashes_to_store: list[BlockHash] + store_spec: LoadStoreSpec + block_hashes_evicted: list[BlockHash] + + +@dataclass +class OffloadingEvent: + block_hashes: list[BlockHash] + block_size: int + medium: str + # True if blocks are removed, False if stored + removed: bool + + +class OffloadingManager(ABC): + + @abstractmethod + def lookup(self, block_hashes: Iterable[BlockHash]) -> int: + """ + Finds the length of the maximal series of blocks, starting from the + first one, that are all offloaded. + + Args: + block_hashes: the hashes identifying the blocks to lookup. + + Returns: + An integer representing the maximal number of blocks that + are currently offloaded. + """ + pass + + @abstractmethod + def prepare_load(self, block_hashes: Iterable[BlockHash]) -> LoadStoreSpec: + """ + Prepare the given blocks to be read. + The given blocks will be protected from eviction until + complete_load is called. + It assumes all given blocks are offloaded. + + Args: + block_hashes: the hashes identifying the blocks. + + Returns: + A LoadStoreSpec that can be used by a worker to locate and load + the actual offloaded KV data. + """ + pass + + def touch(self, block_hashes: Iterable[BlockHash]): + """ + Mark the given blocks as recently used. + This could in practice mean moving them to the end of an LRU list. + + Args: + block_hashes: the hashes identifying the blocks. + """ + return + + def complete_load(self, block_hashes: Iterable[BlockHash]): + """ + Marks previous blocks that were prepared to load as done loading. + + Args: + block_hashes: the hashes identifying the blocks. + """ + return + + @abstractmethod + def prepare_store( + self, + block_hashes: Iterable[BlockHash]) -> Optional[PrepareStoreOutput]: + """ + Prepare the given blocks to be offloaded. + The given blocks will be protected from eviction until + complete_store is called. + + Args: + block_hashes: the hashes identifying the blocks. + + Returns: + A PrepareStoreOutput indicating which blocks need storing, + where to store them (LoadStoreSpec), and list of blocks that + were evicted as a result. + None is returned if the blocks cannot be stored. + """ + pass + + def complete_store(self, + block_hashes: Iterable[BlockHash], + success: bool = True): + """ + Marks blocks which were previously prepared to be stored, as stored. + Following this call, the blocks become loadable. + If if_success is False, blocks that were not marked as stored will be + removed. + + Args: + block_hashes: the hashes identifying the blocks. + success: whether the blocks were stored successfully. + """ + return + + def take_events(self) -> Iterable[OffloadingEvent]: + """ + Take the offloading events from the manager. + + Yields: + New OffloadingEvents collected since the last call. + """ + return () diff --git a/vllm/v1/offloading/mediums.py b/vllm/v1/offloading/mediums.py new file mode 100644 index 000000000000..5a1887848c9f --- /dev/null +++ b/vllm/v1/offloading/mediums.py @@ -0,0 +1,39 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project +from abc import ABC + +import numpy as np + +from vllm.v1.offloading.abstract import LoadStoreSpec + + +class BlockIDsLoadStoreSpec(LoadStoreSpec, ABC): + """ + Spec for loading/storing KV blocks from given block numbers. + """ + + def __init__(self, block_ids: list[int]): + self.block_ids = np.array(block_ids, dtype=np.int64) + + def __repr__(self) -> str: + return repr(self.block_ids) + + +class GPULoadStoreSpec(BlockIDsLoadStoreSpec): + """ + Spec for loading/storing a KV block to GPU memory. + """ + + @staticmethod + def medium() -> str: + return "GPU" + + +class CPULoadStoreSpec(BlockIDsLoadStoreSpec): + """ + Spec for loading/storing a KV block to CPU memory. + """ + + @staticmethod + def medium() -> str: + return "CPU" diff --git a/vllm/v1/offloading/worker/worker.py b/vllm/v1/offloading/worker/worker.py new file mode 100644 index 000000000000..d2c2045d1f1f --- /dev/null +++ b/vllm/v1/offloading/worker/worker.py @@ -0,0 +1,142 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project +from abc import ABC, abstractmethod + +from vllm.logger import init_logger +from vllm.v1.offloading.abstract import LoadStoreSpec + +# a single transfer spec (src_blocks_spec, dst_blocks_spec) +TransferSpec = tuple[LoadStoreSpec, LoadStoreSpec] +# transfers are forwarded to workers by (src_medium, dst_medium) +TransferType = tuple[str, str] +# transfer result (job_id, success) +TransferResult = tuple[int, bool] + +logger = init_logger(__name__) + + +class OffloadingHandler(ABC): + """ + OffloadingHandler class for managing asynchronous KV data transfers + + This class runs in the worker. + It kicks off async KV data transfer requests, and allows + collecting back completion statuses. + + The class provides the following primitives: + transfer_async() - kicks off a new transfer job + get_finished() - returns a list of newly finished job IDs. + """ + + @abstractmethod + def transfer_async(self, job_id: int, spec: TransferSpec) -> bool: + """ + Initiates an asynchronous transfer of KV data. + + Args: + job_id: a unique ID that will be used when notifying back on + transfer completion. + spec: the (src, dst) spec of the KV data transfer. + + Returns: + True if transfer was submitted successfully. + """ + pass + + @abstractmethod + def get_finished(self) -> list[TransferResult]: + """ + Get transfers finished since last call. + + Returns: + A list of (job_id, success) of transfers. + """ + pass + + +class OffloadingWorker: + """ + OffloadingWorker class for managing asynchronous KV data transfers + using multiple OffloadingHandlers + + This class runs in the worker. + It kicks off async KV data transfer requests, by delegating + to one of its registered OffloadingHandlers, based on the transfer type. + + The class provides the following primitives: + register_handler() - registers a new handler to handle + a specific transfer type + transfer_async() - kicks off a new transfer job + using one of the registered handlers. + get_finished() - returns a list of newly finished job IDs + from all handlers. + """ + + def __init__(self): + self.handlers: set[OffloadingHandler] = set() + self.transfer_type_to_handler: dict[TransferType, + OffloadingHandler] = {} + + def register_handler(self, src_cls: type[LoadStoreSpec], + dst_cls: type[LoadStoreSpec], + handler: OffloadingHandler) -> None: + """ + Registers a new handler. + + Args: + src_cls: the source type of transfers handled by this handler. + dst_cls: the destination type of transfers handled by this handler. + handler: the handler that will handle transfers. + """ + transfer_type = (src_cls.medium(), dst_cls.medium()) + assert transfer_type not in self.transfer_type_to_handler + self.handlers.add(handler) + self.transfer_type_to_handler[transfer_type] = handler + + def transfer_async(self, job_id: int, spec: TransferSpec) -> bool: + """ + Initiates an asynchronous transfer of KV data. + + Args: + job_id: a unique ID that will be used when notifying back on + transfer completion. + spec: the (src, dst) spec of the KV data transfer. + + Returns: + True if transfer was submitted successfully. + """ + src, dst = spec + transfer_type = (src.medium(), dst.medium()) + handler = self.transfer_type_to_handler.get(transfer_type) + assert handler is not None + + try: + success = handler.transfer_async(job_id, spec) + except Exception as e: + logger.warning("Exception in %r transfer %d: %r", + transfer_type, + job_id, + e, + exc_info=True) + return False + + if not success: + logger.warning("Failed to submit %r transfer %d", transfer_type, + job_id) + else: + logger.debug("Submitted %r transfer %d: %r", transfer_type, job_id, + spec) + + return success + + def get_finished(self) -> list[TransferResult]: + """ + Get transfers finished since last call. + + Returns: + A list of (job_id, success) of transfers. + """ + finished = [] + for handler in self.handlers: + finished.extend(handler.get_finished()) + return finished From e19bce40a1660cb7c03b790d0b000db155cf925d Mon Sep 17 00:00:00 2001 From: Woosuk Kwon Date: Thu, 18 Sep 2025 11:07:42 -0700 Subject: [PATCH 122/518] [V0 Deprecation] Remove AsyncLLMEngine (#25025) Signed-off-by: Woosuk Kwon Signed-off-by: Woosuk Kwon --- tests/entrypoints/openai/test_chat.py | 54 +- tests/entrypoints/openai/test_completion.py | 830 ------------- .../test_completion_with_prompt_embeds.py | 3 + .../entrypoints/openai/test_lora_adapters.py | 5 +- tests/entrypoints/openai/test_metrics.py | 2 +- .../openai/test_return_tokens_as_ids.py | 26 +- .../entrypoints/openai/test_skip_tokenizer.py | 8 - tests/v1/test_oracle.py | 18 - vllm/engine/async_llm_engine.py | 1030 +---------------- vllm/entrypoints/launcher.py | 2 - vllm/entrypoints/openai/api_server.py | 69 +- 11 files changed, 78 insertions(+), 1969 deletions(-) delete mode 100644 tests/entrypoints/openai/test_completion.py diff --git a/tests/entrypoints/openai/test_chat.py b/tests/entrypoints/openai/test_chat.py index a827f94cfbfe..3bdfef7b4adb 100644 --- a/tests/entrypoints/openai/test_chat.py +++ b/tests/entrypoints/openai/test_chat.py @@ -28,11 +28,9 @@ def monkeypatch_module(): mpatch.undo() -@pytest.fixture(scope="module", params=[False, True]) -def server(request, monkeypatch_module, zephyr_lora_files): #noqa: F811 - - use_v1 = request.param - monkeypatch_module.setenv('VLLM_USE_V1', '1' if use_v1 else '0') +@pytest.fixture(scope="module") +def server(monkeypatch_module, zephyr_lora_files): #noqa: F811 + monkeypatch_module.setenv('VLLM_USE_V1', '1') args = [ # use half precision for speed and memory savings in CI environment @@ -57,13 +55,6 @@ def server(request, monkeypatch_module, zephyr_lora_files): #noqa: F811 yield remote_server -@pytest.fixture -def is_v1_server(server): - import os - assert os.environ['VLLM_USE_V1'] in ['0', '1'] - return os.environ['VLLM_USE_V1'] == '1' - - @pytest_asyncio.fixture async def client(server): async with server.get_async_client() as async_client: @@ -481,10 +472,9 @@ async def test_chat_completion_stream_options(client: openai.AsyncOpenAI, @pytest.mark.asyncio async def test_structured_outputs_choice_chat( - client: openai.AsyncOpenAI, sample_structured_outputs_choices, - is_v1_server: bool): - if not is_v1_server: - pytest.skip("Structured outputs is only supported in v1 engine") + client: openai.AsyncOpenAI, + sample_structured_outputs_choices, +): messages = [{ "role": "system", "content": "you are a helpful assistant" @@ -522,12 +512,10 @@ async def test_structured_outputs_choice_chat( @pytest.mark.asyncio -async def test_structured_outputs_json_chat(client: openai.AsyncOpenAI, - sample_json_schema, - is_v1_server: bool): - if not is_v1_server: - pytest.skip("Structured outputs is only supported in v1 engine") - +async def test_structured_outputs_json_chat( + client: openai.AsyncOpenAI, + sample_json_schema, +): messages = [{ "role": "system", "content": "you are a helpful assistant" @@ -569,10 +557,10 @@ async def test_structured_outputs_json_chat(client: openai.AsyncOpenAI, @pytest.mark.asyncio -async def test_structured_outputs_regex_chat(client: openai.AsyncOpenAI, - sample_regex, is_v1_server: bool): - if not is_v1_server: - pytest.skip("Structured outputs is only supported in v1 engine") +async def test_structured_outputs_regex_chat( + client: openai.AsyncOpenAI, + sample_regex, +): messages = [{ "role": "system", @@ -660,10 +648,10 @@ async def test_structured_outputs_choice_chat_logprobs( @pytest.mark.asyncio -async def test_named_tool_use(client: openai.AsyncOpenAI, sample_json_schema, - is_v1_server: bool): - if not is_v1_server: - pytest.skip("Tool use is only supported in v1 engine") +async def test_named_tool_use( + client: openai.AsyncOpenAI, + sample_json_schema, +): messages = [{ "role": "system", "content": "you are a helpful assistant" @@ -821,11 +809,7 @@ async def test_response_format_json_object(client: openai.AsyncOpenAI): @pytest.mark.asyncio -async def test_response_format_json_schema(client: openai.AsyncOpenAI, - is_v1_server: bool): - if not is_v1_server: - pytest.skip( - "JSON schema response format is only supported in v1 engine") +async def test_response_format_json_schema(client: openai.AsyncOpenAI): prompt = 'what is 1+1? The format is "result": 2' # Check that this prompt cannot lead to a valid JSON without json_schema for _ in range(2): diff --git a/tests/entrypoints/openai/test_completion.py b/tests/entrypoints/openai/test_completion.py deleted file mode 100644 index 0347513befe3..000000000000 --- a/tests/entrypoints/openai/test_completion.py +++ /dev/null @@ -1,830 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project -# imports for structured outputs tests -import json -import os -from typing import Optional - -import jsonschema -import openai # use the official client for correctness check -import pytest -import pytest_asyncio -import regex as re -import requests -# downloading lora to test lora requests -from openai import BadRequestError - -from vllm.transformers_utils.tokenizer import get_tokenizer - -from ...utils import RemoteOpenAIServer - -# any model with a chat template should work here -MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta" -# technically these adapters use a different base model, -# but we're not testing generation quality here - - -@pytest.fixture(scope="module") -def default_server_args(zephyr_lora_files): - return [ - # use half precision for speed and memory savings in CI environment - "--dtype", - "bfloat16", - "--max-model-len", - "8192", - "--max-num-seqs", - "128", - "--enforce-eager", - # lora config - "--enable-lora", - "--lora-modules", - f"zephyr-lora={zephyr_lora_files}", - "--max-lora-rank", - "64", - "--max-cpu-loras", - "2", - ] - - -@pytest.fixture(scope="module", - params=["", "--disable-frontend-multiprocessing"]) -def server(default_server_args, request): - if request.param: - default_server_args.append(request.param) - - original_value = os.environ.get('VLLM_USE_V1') - os.environ['VLLM_USE_V1'] = '0' - try: - with RemoteOpenAIServer(MODEL_NAME, - default_server_args) as remote_server: - yield remote_server - finally: - # Restore original env value - if original_value is None: - os.environ.pop('VLLM_USE_V1', None) - else: - os.environ['VLLM_USE_V1'] = original_value - - -@pytest.fixture -def is_v1_server(server): - import os - - # For completion tests, we assume v0 since there's no explicit v1 setup - return os.environ.get('VLLM_USE_V1', '0') == '1' - - -@pytest_asyncio.fixture -async def client(server): - async with server.get_async_client() as async_client: - yield async_client - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - # first test base model, then test loras - "model_name", - [MODEL_NAME, "zephyr-lora"], -) -async def test_single_completion(client: openai.AsyncOpenAI, model_name: str): - completion = await client.completions.create(model=model_name, - prompt="Hello, my name is", - max_tokens=5, - temperature=0.0) - - assert completion.id is not None - assert completion.choices is not None and len(completion.choices) == 1 - - choice = completion.choices[0] - assert len(choice.text) >= 5 - assert choice.finish_reason == "length" - assert completion.usage == openai.types.CompletionUsage( - completion_tokens=5, prompt_tokens=6, total_tokens=11) - - # test using token IDs - completion = await client.completions.create( - model=model_name, - prompt=[0, 0, 0, 0, 0], - max_tokens=5, - temperature=0.0, - ) - assert len(completion.choices[0].text) >= 1 - assert completion.choices[0].prompt_logprobs is None - - -@pytest.mark.asyncio -async def test_added_lora_tokens_base_model(client: openai.AsyncOpenAI): - # test using token IDs - with pytest.raises(openai.BadRequestError, match="out of vocabulary"): - # Added tokens should be rejected by the base model - await client.completions.create( - model=MODEL_NAME, - prompt=[0, 0, 32000, 32001, 32002], - echo=True, - max_tokens=5, - temperature=0.0, - ) - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - # first test base model, then test loras - "model_name", - [MODEL_NAME, "zephyr-lora"], -) -async def test_no_logprobs(client: openai.AsyncOpenAI, model_name: str): - # test using token IDs - completion = await client.completions.create( - model=model_name, - prompt=[0, 0, 0, 0, 0], - max_tokens=5, - temperature=0.0, - logprobs=None, - ) - choice = completion.choices[0] - assert choice.logprobs is None - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - # just test 1 lora - "model_name", - [MODEL_NAME, "zephyr-lora"], -) -async def test_zero_logprobs(client: openai.AsyncOpenAI, model_name: str): - # test using token IDs - completion = await client.completions.create( - model=model_name, - prompt=[0, 0, 0, 0, 0], - max_tokens=5, - temperature=0.0, - logprobs=0, - ) - choice = completion.choices[0] - assert choice.logprobs is not None - assert choice.logprobs.token_logprobs is not None - assert choice.logprobs.top_logprobs is not None - assert len(choice.logprobs.top_logprobs[0]) == 1 - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "model_name", - [MODEL_NAME, "zephyr-lora"], -) -async def test_some_logprobs(client: openai.AsyncOpenAI, model_name: str): - # test using token IDs - completion = await client.completions.create( - model=model_name, - prompt=[0, 0, 0, 0, 0], - max_tokens=5, - temperature=0.0, - logprobs=5, - ) - choice = completion.choices[0] - assert choice.logprobs is not None - assert choice.logprobs.token_logprobs is not None - assert choice.logprobs.top_logprobs is not None - assert 5 <= len(choice.logprobs.top_logprobs[0]) <= 6 - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "model_name", - [MODEL_NAME, "zephyr-lora"], -) -async def test_too_many_completion_logprobs(client: openai.AsyncOpenAI, - model_name: str): - - with pytest.raises( - (openai.BadRequestError, openai.APIError)): # test using token IDs - await client.completions.create( - model=model_name, - prompt=[0, 0, 0, 0, 0], - max_tokens=5, - temperature=0.0, - # vLLM has higher default max_logprobs (20 instead of 5) to support - # both Completion API and Chat Completion API - logprobs=21, - ) - ... - with pytest.raises( - (openai.BadRequestError, openai.APIError)): # test using token IDs - stream = await client.completions.create( - model=model_name, - prompt=[0, 0, 0, 0, 0], - max_tokens=5, - temperature=0.0, - # vLLM has higher default max_logprobs (20 instead of 5) to support - # both Completion API and Chat Completion API - logprobs=30, - stream=True, - ) - async for chunk in stream: - ... - - # the server should still work afterwards - completion = await client.completions.create( - model=model_name, - prompt=[0, 0, 0, 0, 0], - max_tokens=5, - temperature=0.0, - ) - assert len(completion.choices[0].text) >= 0 - - -@pytest.mark.asyncio -@pytest.mark.parametrize("model_name, prompt_logprobs", [(MODEL_NAME, -1), - (MODEL_NAME, 0), - (MODEL_NAME, 1), - (MODEL_NAME, None)]) -async def test_prompt_logprobs_completion(client: openai.AsyncOpenAI, - model_name: str, - prompt_logprobs: Optional[int]): - params: dict = { - "prompt": ["A robot may not injure another robot", "My name is"], - "model": model_name, - } - if prompt_logprobs is not None: - params["extra_body"] = {"prompt_logprobs": prompt_logprobs} - - if prompt_logprobs is not None and prompt_logprobs < 0: - with pytest.raises(BadRequestError): - await client.completions.create(**params) - else: - completion = await client.completions.create(**params) - if prompt_logprobs is not None: - assert completion.choices[0].prompt_logprobs is not None - assert len(completion.choices[0].prompt_logprobs) > 0 - - assert completion.choices[1].prompt_logprobs is not None - assert len(completion.choices[1].prompt_logprobs) > 0 - - else: - assert completion.choices[0].prompt_logprobs is None - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "model_name", - [MODEL_NAME, "zephyr-lora"], -) -async def test_completion_streaming(client: openai.AsyncOpenAI, - model_name: str): - prompt = "What is an LLM?" - - single_completion = await client.completions.create( - model=model_name, - prompt=prompt, - max_tokens=5, - temperature=0.0, - ) - single_output = single_completion.choices[0].text - stream = await client.completions.create(model=model_name, - prompt=prompt, - max_tokens=5, - temperature=0.0, - stream=True) - chunks: list[str] = [] - finish_reason_count = 0 - async for chunk in stream: - chunks.append(chunk.choices[0].text) - if chunk.choices[0].finish_reason is not None: - finish_reason_count += 1 - # finish reason should only return in last block - assert finish_reason_count == 1 - assert chunk.choices[0].finish_reason == "length" - assert chunk.choices[0].text - assert "".join(chunks) == single_output - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "model_name", - [MODEL_NAME, "zephyr-lora"], -) -async def test_parallel_streaming(client: openai.AsyncOpenAI, model_name: str): - """Streaming for parallel sampling. - The tokens from multiple samples, are flattened into a single stream, - with an index to indicate which sample the token belongs to. - """ - - prompt = "What is an LLM?" - n = 3 - max_tokens = 5 - - stream = await client.completions.create(model=model_name, - prompt=prompt, - max_tokens=max_tokens, - n=n, - stream=True) - chunks: list[list[str]] = [[] for i in range(n)] - finish_reason_count = 0 - async for chunk in stream: - index = chunk.choices[0].index - text = chunk.choices[0].text - chunks[index].append(text) - if chunk.choices[0].finish_reason is not None: - finish_reason_count += 1 - assert finish_reason_count == n - for chunk in chunks: - assert len(chunk) == max_tokens - print("".join(chunk)) - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "model_name", - [MODEL_NAME, "zephyr-lora"], -) -async def test_completion_stream_options(client: openai.AsyncOpenAI, - model_name: str): - prompt = "What is the capital of France?" - - # Test stream=True, stream_options= - # {"include_usage": False, "continuous_usage_stats": False} - stream = await client.completions.create(model=model_name, - prompt=prompt, - max_tokens=5, - temperature=0.0, - stream=True, - stream_options={ - "include_usage": False, - "continuous_usage_stats": - False, - }) - - async for chunk in stream: - assert chunk.usage is None - - # Test stream=True, stream_options= - # {"include_usage": False, "continuous_usage_stats": True} - stream = await client.completions.create(model=model_name, - prompt=prompt, - max_tokens=5, - temperature=0.0, - stream=True, - stream_options={ - "include_usage": False, - "continuous_usage_stats": - True, - }) - async for chunk in stream: - assert chunk.usage is None - - # Test stream=True, stream_options= - # {"include_usage": True, "continuous_usage_stats": False} - stream = await client.completions.create(model=model_name, - prompt=prompt, - max_tokens=5, - temperature=0.0, - stream=True, - stream_options={ - "include_usage": True, - "continuous_usage_stats": - False, - }) - async for chunk in stream: - if chunk.choices[0].finish_reason is None: - assert chunk.usage is None - else: - assert chunk.usage is None - final_chunk = await stream.__anext__() - assert final_chunk.usage is not None - assert final_chunk.usage.prompt_tokens > 0 - assert final_chunk.usage.completion_tokens > 0 - assert final_chunk.usage.total_tokens == ( - final_chunk.usage.prompt_tokens + - final_chunk.usage.completion_tokens) - assert final_chunk.choices == [] - - # Test stream=True, stream_options= - # {"include_usage": True, "continuous_usage_stats": True} - stream = await client.completions.create(model=model_name, - prompt=prompt, - max_tokens=5, - temperature=0.0, - stream=True, - stream_options={ - "include_usage": True, - "continuous_usage_stats": - True, - }) - async for chunk in stream: - assert chunk.usage is not None - assert chunk.usage.prompt_tokens > 0 - assert chunk.usage.completion_tokens > 0 - assert chunk.usage.total_tokens == (chunk.usage.prompt_tokens + - chunk.usage.completion_tokens) - if chunk.choices[0].finish_reason is not None: - final_chunk = await stream.__anext__() - assert final_chunk.usage is not None - assert final_chunk.usage.prompt_tokens > 0 - assert final_chunk.usage.completion_tokens > 0 - assert final_chunk.usage.total_tokens == ( - final_chunk.usage.prompt_tokens + - final_chunk.usage.completion_tokens) - assert final_chunk.choices == [] - - # Test stream=False, stream_options= - # {"include_usage": None} - with pytest.raises(BadRequestError): - await client.completions.create(model=model_name, - prompt=prompt, - max_tokens=5, - temperature=0.0, - stream=False, - stream_options={"include_usage": None}) - - # Test stream=False, stream_options= - # {"include_usage": True} - with pytest.raises(BadRequestError): - await client.completions.create(model=model_name, - prompt=prompt, - max_tokens=5, - temperature=0.0, - stream=False, - stream_options={"include_usage": True}) - - # Test stream=False, stream_options= - # {"continuous_usage_stats": None} - with pytest.raises(BadRequestError): - await client.completions.create( - model=model_name, - prompt=prompt, - max_tokens=5, - temperature=0.0, - stream=False, - stream_options={"continuous_usage_stats": None}) - - # Test stream=False, stream_options= - # {"continuous_usage_stats": True} - with pytest.raises(BadRequestError): - await client.completions.create( - model=model_name, - prompt=prompt, - max_tokens=5, - temperature=0.0, - stream=False, - stream_options={"continuous_usage_stats": True}) - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "model_name", - [MODEL_NAME, "zephyr-lora"], -) -async def test_batch_completions(client: openai.AsyncOpenAI, model_name: str): - # test both text and token IDs - for prompts in (["Hello, my name is"] * 2, [[0, 0, 0, 0, 0]] * 2): - # test simple list - batch = await client.completions.create( - model=model_name, - prompt=prompts, - max_tokens=5, - temperature=0.0, - ) - assert len(batch.choices) == 2 - assert batch.choices[0].text == batch.choices[1].text - - # test n = 2 - batch = await client.completions.create( - model=model_name, - prompt=prompts, - n=2, - max_tokens=5, - temperature=0.0, - extra_body=dict( - # NOTE: this has to be true for n > 1 in vLLM, but - # not necessary for official client. - use_beam_search=True), - ) - assert len(batch.choices) == 4 - assert batch.choices[0].text != batch.choices[ - 1].text, "beam search should be different" - assert batch.choices[0].text == batch.choices[ - 2].text, "two copies of the same prompt should be the same" - assert batch.choices[1].text == batch.choices[ - 3].text, "two copies of the same prompt should be the same" - - # test streaming - batch = await client.completions.create( - model=model_name, - prompt=prompts, - max_tokens=5, - temperature=0.0, - stream=True, - ) - texts = [""] * 2 - async for chunk in batch: - assert len(chunk.choices) == 1 - choice = chunk.choices[0] - texts[choice.index] += choice.text - assert texts[0] == texts[1] - - -@pytest.mark.asyncio -async def test_logits_bias(client: openai.AsyncOpenAI): - prompt = "Hello, my name is" - max_tokens = 5 - tokenizer = get_tokenizer(tokenizer_name=MODEL_NAME) - - # Test exclusive selection - token_id = 1000 - completion = await client.completions.create( - model=MODEL_NAME, - prompt=prompt, - max_tokens=max_tokens, - temperature=0.0, - logit_bias={str(token_id): 100}, - seed=42, - ) - assert len(completion.choices[0].text) >= 5 - response_tokens = tokenizer(completion.choices[0].text, - add_special_tokens=False)["input_ids"] - expected_tokens = tokenizer(tokenizer.decode([token_id] * 5), - add_special_tokens=False)["input_ids"] - assert all([ - response == expected - for response, expected in zip(response_tokens, expected_tokens) - ]) - - # Test ban - completion = await client.completions.create( - model=MODEL_NAME, - prompt=prompt, - max_tokens=max_tokens, - temperature=0.0, - ) - response_tokens = tokenizer(completion.choices[0].text, - add_special_tokens=False)["input_ids"] - first_response = completion.choices[0].text - completion = await client.completions.create( - model=MODEL_NAME, - prompt=prompt, - max_tokens=max_tokens, - temperature=0.0, - logit_bias={str(token): -100 - for token in response_tokens}, - ) - assert first_response != completion.choices[0].text - - -@pytest.mark.asyncio -async def test_allowed_token_ids(client: openai.AsyncOpenAI): - prompt = "Hello, my name is" - max_tokens = 1 - tokenizer = get_tokenizer(tokenizer_name=MODEL_NAME) - - # Test exclusive selection - allowed_ids = [21555, 21557, 21558] - completion = await client.completions.create( - model=MODEL_NAME, - prompt=prompt, - max_tokens=max_tokens, - temperature=0.0, - seed=42, - extra_body=dict(allowed_token_ids=allowed_ids), - logprobs=1, - ) - response_tokens = completion.choices[0].logprobs.tokens - assert len(response_tokens) == 1 - assert tokenizer.convert_tokens_to_ids(response_tokens)[0] in allowed_ids - - -@pytest.mark.asyncio -async def test_structured_outputs_json_completion( - client: openai.AsyncOpenAI, - sample_json_schema, - is_v1_server: bool, -): - if not is_v1_server: - pytest.skip("structured outputs is only supported in v1 engine") - - completion = await client.completions.create( - model=MODEL_NAME, - prompt=f"Give an example JSON for an employee profile " - f"that fits this schema: {sample_json_schema}", - n=3, - temperature=1.0, - max_tokens=500, - extra_body=dict(structured_outputs=dict(json=sample_json_schema))) - - assert completion.id is not None - assert len(completion.choices) == 3 - for i in range(3): - output_json = json.loads(completion.choices[i].text) - jsonschema.validate(instance=output_json, schema=sample_json_schema) - - -@pytest.mark.asyncio -async def test_structured_outputs_regex_completion( - client: openai.AsyncOpenAI, - sample_regex, - is_v1_server: bool, -): - if not is_v1_server: - pytest.skip("structured outputs is only supported in v1 engine") - - completion = await client.completions.create( - model=MODEL_NAME, - prompt=f"Give an example IPv4 address with this regex: {sample_regex}", - n=3, - temperature=1.0, - max_tokens=20, - extra_body=dict(structured_outputs=dict(regex=sample_regex))) - - assert completion.id is not None - assert len(completion.choices) == 3 - for i in range(3): - assert re.fullmatch(sample_regex, - completion.choices[i].text) is not None - - -@pytest.mark.asyncio -async def test_structured_outputs_choice_completion( - client: openai.AsyncOpenAI, - sample_structured_outputs_choices, - is_v1_server: bool, -): - if not is_v1_server: - pytest.skip("structured outputs is only supported in v1 engine") - - completion = await client.completions.create( - model=MODEL_NAME, - prompt="The best language for type-safe systems programming is ", - n=2, - temperature=1.0, - max_tokens=10, - extra_body=dict(structured_outputs=dict( - choice=sample_structured_outputs_choices))) - - assert completion.id is not None - assert len(completion.choices) == 2 - for i in range(2): - assert completion.choices[i].text in sample_structured_outputs_choices - - -@pytest.mark.asyncio -async def test_structured_outputs_grammar(client: openai.AsyncOpenAI, - sample_sql_statements, - is_v1_server: bool): - if not is_v1_server: - pytest.skip("grammar is only supported in v1 engine") - - completion = await client.completions.create( - model=MODEL_NAME, - prompt=("Generate a sql state that select col_1 from " - "table_1 where it is equals to 1"), - temperature=1.0, - max_tokens=500, - extra_body=dict( - structured_outputs=dict(grammar=sample_sql_statements), )) - - content = completion.choices[0].text - - # use Lark to parse the output, and make sure it's a valid parse tree - from lark import Lark - parser = Lark(sample_sql_statements) - parser.parse(content) - - # remove spaces for comparison b/c we removed them in the grammar - ground_truth = "SELECT col_1 from table_1 where col_1 = 1".replace(" ", "") - - assert content.strip() == ground_truth - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - # first test base model, then test loras - "model_name", - [MODEL_NAME, "zephyr-lora"], -) -@pytest.mark.parametrize("logprobs_arg", [1, 0]) -async def test_echo_logprob_completion(client: openai.AsyncOpenAI, - model_name: str, logprobs_arg: int): - tokenizer = get_tokenizer(tokenizer_name=MODEL_NAME) - # test using text and token IDs - for prompt in ("Hello, my name is", [0, 0, 0, 0, 0]): - completion = await client.completions.create(model=model_name, - prompt=prompt, - max_tokens=5, - temperature=0.0, - echo=True, - logprobs=logprobs_arg) - - prompt_text = tokenizer.decode(prompt) if isinstance(prompt, - list) else prompt - assert re.search(r"^" + prompt_text, completion.choices[0].text) - logprobs = completion.choices[0].logprobs - assert logprobs is not None - assert len(logprobs.text_offset) > 5 - assert (len(logprobs.token_logprobs) > 5 - and logprobs.token_logprobs[0] is None) - assert (len(logprobs.top_logprobs) > 5 - and logprobs.top_logprobs[0] is None) - for top_logprobs in logprobs.top_logprobs[1:]: - assert max(logprobs_arg, - 1) <= len(top_logprobs) <= logprobs_arg + 1 - assert len(logprobs.tokens) > 5 - - -@pytest.mark.asyncio -async def test_structured_outputs_type_error(client: openai.AsyncOpenAI, - sample_json_schema, sample_regex, - is_v1_server: bool): - if not is_v1_server: - pytest.skip("structured outputs is only supported in v1 engine") - - with pytest.raises(openai.BadRequestError): - _ = await client.completions.create( - model=MODEL_NAME, - prompt="Give an example JSON that fits this schema: 42", - extra_body=dict(structured_outputs=dict(json=42))) - - with pytest.raises(openai.BadRequestError): - _ = await client.completions.create( - model=MODEL_NAME, - prompt="Give an example string that fits this regex", - extra_body=dict(structured_outputs=dict( - regex=sample_regex, - json=sample_json_schema, - ))) - - -@pytest.mark.asyncio -@pytest.mark.parametrize( - "model_name,stream,echo", - [ - (MODEL_NAME, False, False), - (MODEL_NAME, False, True), - (MODEL_NAME, True, False), - (MODEL_NAME, True, True) # should not raise BadRequestError error - ], -) -async def test_echo_stream_completion(client: openai.AsyncOpenAI, - model_name: str, stream: bool, - echo: bool): - saying: str = "Hello, my name is" - result = await client.completions.create(model=model_name, - prompt=saying, - max_tokens=10, - temperature=0.0, - echo=echo, - stream=stream) - - stop_reason = "length" - - if not stream: - completion = result - assert completion.id is not None - assert completion.choices is not None and len(completion.choices) == 1 - - choice = completion.choices[0] - assert len(choice.text) >= 5 - assert choice.finish_reason == stop_reason - - if echo: - assert choice.text is not None and saying in choice.text - else: - assert choice.text is not None and saying not in choice.text - - else: - chunks: list[str] = [] - final_finish_reason = None - async for chunk in result: - if chunk.choices and chunk.choices[0].text: - chunks.append(chunk.choices[0].text) - if chunk.choices and chunk.choices[0].finish_reason: - final_finish_reason = chunk.choices[0].finish_reason - - assert final_finish_reason == stop_reason - content = "".join(chunks) - if echo: - assert content is not None and saying in content - else: - assert content is not None and saying not in content - - -@pytest.mark.asyncio -async def test_invocations(server: RemoteOpenAIServer, - client: openai.AsyncOpenAI): - request_args = { - "model": MODEL_NAME, - "prompt": "Hello, my name is", - "max_tokens": 5, - "temperature": 0.0, - "logprobs": None, - } - - completion = await client.completions.create(**request_args) - - invocation_response = requests.post(server.url_for("invocations"), - json=request_args) - invocation_response.raise_for_status() - - completion_output = completion.model_dump() - invocation_output = invocation_response.json() - - assert completion_output.keys() == invocation_output.keys() - assert completion_output["choices"] == invocation_output["choices"] diff --git a/tests/entrypoints/openai/test_completion_with_prompt_embeds.py b/tests/entrypoints/openai/test_completion_with_prompt_embeds.py index 7b58f851a4d2..3d56291bc793 100644 --- a/tests/entrypoints/openai/test_completion_with_prompt_embeds.py +++ b/tests/entrypoints/openai/test_completion_with_prompt_embeds.py @@ -14,6 +14,9 @@ from ...utils import RemoteOpenAIServer +pytest.skip("Skipping prompt_embeds test until V1 supports it.", + allow_module_level=True) + # any model with a chat template should work here MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta" diff --git a/tests/entrypoints/openai/test_lora_adapters.py b/tests/entrypoints/openai/test_lora_adapters.py index 10c0cb5f4d15..6f2addd3649d 100644 --- a/tests/entrypoints/openai/test_lora_adapters.py +++ b/tests/entrypoints/openai/test_lora_adapters.py @@ -53,12 +53,13 @@ def monkeypatch_module(): mpatch.undo() -@pytest.fixture(scope="module", params=[False, True]) +@pytest.fixture(scope="module", params=[True]) def server_with_lora_modules_json(request, monkeypatch_module, zephyr_lora_files): use_v1 = request.param - monkeypatch_module.setenv('VLLM_USE_V1', '1' if use_v1 else '0') + assert use_v1 + monkeypatch_module.setenv('VLLM_USE_V1', '1') # Define the json format LoRA module configurations lora_module_1 = { diff --git a/tests/entrypoints/openai/test_metrics.py b/tests/entrypoints/openai/test_metrics.py index 0c9e0f3a5142..8917aa5a5efb 100644 --- a/tests/entrypoints/openai/test_metrics.py +++ b/tests/entrypoints/openai/test_metrics.py @@ -22,7 +22,7 @@ PREV_MINOR_VERSION = version._prev_minor_version() -@pytest.fixture(scope="module", params=[True, False]) +@pytest.fixture(scope="module", params=[True]) def use_v1(request): # Module-scoped variant of run_with_both_engines # diff --git a/tests/entrypoints/openai/test_return_tokens_as_ids.py b/tests/entrypoints/openai/test_return_tokens_as_ids.py index 5f43fdc9588f..ef9d5234f231 100644 --- a/tests/entrypoints/openai/test_return_tokens_as_ids.py +++ b/tests/entrypoints/openai/test_return_tokens_as_ids.py @@ -10,8 +10,30 @@ from vllm.transformers_utils.tokenizer import get_tokenizer from ...utils import RemoteOpenAIServer -from .test_completion import default_server_args # noqa: F401 -from .test_completion import MODEL_NAME + +MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta" + + +@pytest.fixture(scope="module") +def default_server_args(zephyr_lora_files): + return [ + # use half precision for speed and memory savings in CI environment + "--dtype", + "bfloat16", + "--max-model-len", + "8192", + "--max-num-seqs", + "128", + "--enforce-eager", + # lora config + "--enable-lora", + "--lora-modules", + f"zephyr-lora={zephyr_lora_files}", + "--max-lora-rank", + "64", + "--max-cpu-loras", + "2", + ] @pytest.fixture(scope="module") diff --git a/tests/entrypoints/openai/test_skip_tokenizer.py b/tests/entrypoints/openai/test_skip_tokenizer.py index 840e0dac81c9..b469fc76fc7a 100644 --- a/tests/entrypoints/openai/test_skip_tokenizer.py +++ b/tests/entrypoints/openai/test_skip_tokenizer.py @@ -15,14 +15,6 @@ DTYPE = "float16" -@pytest.fixture(autouse=True) -def v1(run_with_both_engines): - # Simple autouse wrapper to run both engines for each test - # This can be promoted up to conftest.py to run for every - # test in a package - pass - - @pytest.fixture(scope="module") def server(): args = [ diff --git a/tests/v1/test_oracle.py b/tests/v1/test_oracle.py index 794c1f68f147..28c24f62895a 100644 --- a/tests/v1/test_oracle.py +++ b/tests/v1/test_oracle.py @@ -7,7 +7,6 @@ import vllm.envs as envs from vllm import LLM from vllm.engine.arg_utils import AsyncEngineArgs -from vllm.engine.async_llm_engine import AsyncLLMEngine MODEL = "meta-llama/Llama-3.2-1B-Instruct" @@ -96,20 +95,3 @@ def test_v1_attn_backend(monkeypatch): _ = AsyncEngineArgs(model=MODEL).create_engine_config() assert envs.VLLM_USE_V1 m.delenv("VLLM_USE_V1") - - -def test_reject_using_constructor_directly(monkeypatch): - with monkeypatch.context() as m: - if os.getenv("VLLM_USE_V1", None): - m.delenv("VLLM_USE_V1") - - # Sets VLLM_USE_V1=1. - vllm_config = AsyncEngineArgs(model=MODEL).create_engine_config() - - # This uses the V0 constructor directly. - with pytest.raises(ValueError): - AsyncLLMEngine(vllm_config, - AsyncLLMEngine._get_executor_cls(vllm_config), - log_stats=True) - - m.delenv("VLLM_USE_V1") diff --git a/vllm/engine/async_llm_engine.py b/vllm/engine/async_llm_engine.py index 6793041abc50..ede027759a8b 100644 --- a/vllm/engine/async_llm_engine.py +++ b/vllm/engine/async_llm_engine.py @@ -1,1032 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -import asyncio -import time -import weakref -from functools import partial -from typing import (Any, AsyncGenerator, Callable, Dict, Iterable, List, - Mapping, Optional, Set, Tuple, Type, Union) -from weakref import ReferenceType +from vllm.v1.engine.async_llm import AsyncLLM -import vllm.envs as envs -from vllm.config import (LoRAConfig, ModelConfig, ParallelConfig, - SchedulerConfig, VllmConfig) -from vllm.core.scheduler import SchedulerOutputs -from vllm.engine.arg_utils import AsyncEngineArgs -from vllm.engine.async_timeout import asyncio_timeout -from vllm.engine.llm_engine import LLMEngine -from vllm.engine.metrics_types import StatLoggerBase -from vllm.engine.protocol import EngineClient -from vllm.executor.executor_base import ExecutorBase -from vllm.inputs import PromptType -from vllm.inputs.preprocess import InputPreprocessor -from vllm.logger import init_logger -from vllm.lora.request import LoRARequest -from vllm.model_executor.layers.sampler import SamplerOutput -from vllm.outputs import PoolingRequestOutput, RequestOutput -from vllm.pooling_params import PoolingParams -from vllm.sampling_params import SamplingParams -from vllm.sequence import ExecuteModelRequest -from vllm.transformers_utils.tokenizer import AnyTokenizer -from vllm.usage.usage_lib import UsageContext -from vllm.utils import Device, deprecate_kwargs, weak_bind - -logger = init_logger(__name__) -ENGINE_ITERATION_TIMEOUT_S = envs.VLLM_ENGINE_ITERATION_TIMEOUT_S - - -class AsyncEngineDeadError(RuntimeError): - pass - - -def _log_task_completion(task: asyncio.Task, - error_callback: Callable[[Exception], None]) -> None: - """This function is only intended for the `engine.run_engine_loop()` task. - - In particular, that task runs a `while True` loop that can only exit if - there is an exception. - """ - - exception = None - try: - return_value = task.result() - raise AssertionError( - f"The engine background task should never finish without an " - f"exception. {return_value}") - except asyncio.exceptions.CancelledError: - # We assume that if the task is cancelled, we are gracefully shutting - # down. This should only happen on program exit. - logger.info("Engine is gracefully shutting down.") - except Exception as e: - exception = e - logger.error("Engine background task failed", exc_info=e) - error_callback(exception) - raise AsyncEngineDeadError( - "Task finished unexpectedly. This should never happen! " - "Please open an issue on GitHub. See stack trace above for the " - "actual cause.") from e - - -STOP_ITERATION = Exception() # Sentinel - - -class AsyncStream: - """A stream of RequestOutputs for a request that can be iterated over - asynchronously via an async generator.""" - - def __init__(self, request_id: str, cancel: Callable[[str], None]) -> None: - self.request_id = request_id - self._cancel = cancel - self._queue: asyncio.Queue = asyncio.Queue() - self._finished = False - - def put(self, item: Union[RequestOutput, Exception]) -> None: - if not self._finished: - self._queue.put_nowait(item) - - def finish( - self, - exception: Optional[Union[BaseException, Type[BaseException]]] = None, - ) -> None: - if not self._finished: - self._finished = True - self._queue.put_nowait( - exception if self._is_raisable(exception) else STOP_ITERATION) - - @property - def finished(self) -> bool: - return self._finished - - async def generator(self) -> AsyncGenerator[RequestOutput, None]: - try: - while True: - result = await self._queue.get() - if self._is_raisable(result): - if result == STOP_ITERATION: - return - raise result - yield result - except GeneratorExit: - self._cancel(self.request_id) - raise asyncio.CancelledError from None - - @staticmethod - def _is_raisable(value: Any): - return isinstance(value, BaseException) or \ - (isinstance(value, type) and \ - issubclass(value, BaseException)) - - -class RequestTracker: - """Synchronous abstraction for tracking requests.""" - - def __init__(self) -> None: - self._request_streams: Dict[str, AsyncStream] = {} - self._aborted_requests: asyncio.Queue[str] = asyncio.Queue() - self._new_requests: asyncio.Queue[Tuple[AsyncStream, - dict]] = asyncio.Queue() - self.new_requests_event = asyncio.Event() - - def __contains__(self, item): - return item in self._request_streams - - def __len__(self) -> int: - return len(self._request_streams) - - def propagate_exception(self, - exc: Exception, - request_id: Optional[str] = None) -> None: - """Propagate an exception to request streams - (all if request_id is None).""" - if request_id is not None: - self.abort_request(request_id, exception=exc) - else: - # NB: tuple() used here because self.abort_request pops the stream - # out of self._request_streams, so we can't iterate on it directly - for rid in tuple(self._request_streams.keys()): - self.abort_request(rid, exception=exc) - - def process_request_output(self, - request_output: RequestOutput, - *, - verbose: bool = False) -> None: - """Process a request output from the engine.""" - request_id = request_output.request_id - finished = request_output.finished - - if finished: - stream = self._request_streams.pop(request_id, None) - else: - stream = self._request_streams.get(request_id) - # Guard against a KeyError which can occur if the request was aborted - # while the output was generated - if stream is not None: - stream.put(request_output) - if finished: - stream.finish() - - if verbose and finished: - logger.info("Finished request %s.", request_id) - - def process_exception(self, - request_id: str, - exception: BaseException, - *, - verbose: bool = False) -> None: - """Propagate an exception from the engine.""" - if verbose: - logger.info("Finished request %s.", request_id) - self.abort_request(request_id, exception=exception) - - def add_request(self, - request_id: str, - *, - verbose: bool = False, - **engine_add_request_kwargs) -> AsyncStream: - """Add a request to be sent to the engine on the next background - loop iteration.""" - if request_id in self._request_streams: - raise KeyError(f"Request {request_id} already exists.") - - abort_request = partial(self.abort_request, verbose=verbose) - stream = AsyncStream(request_id, abort_request) - self._new_requests.put_nowait((stream, { - "request_id": request_id, - **engine_add_request_kwargs - })) - - self.new_requests_event.set() - - if verbose: - logger.info("Added request %s.", request_id) - - return stream - - def abort_request(self, - request_id: str, - *, - exception: Optional[Union[BaseException, - Type[BaseException]]] = None, - verbose: bool = False) -> None: - """Abort a request during next background loop iteration.""" - if verbose: - logger.info("Aborted request %s.", request_id) - - self._aborted_requests.put_nowait(request_id) - - stream = self._request_streams.pop(request_id, None) - if stream is not None: - stream.finish(exception=exception) - - def get_new_and_aborted_requests(self) -> Tuple[List[Dict], Set[str]]: - """Get the new requests and finished requests to be - sent to the engine.""" - new_requests: List[Dict] = [] - finished_requests: Set[str] = set() - - while not self._aborted_requests.empty(): - request_id = self._aborted_requests.get_nowait() - finished_requests.add(request_id) - - while not self._new_requests.empty(): - stream, new_request = self._new_requests.get_nowait() - request_id = stream.request_id - if request_id in finished_requests: - # The request has already been aborted. - stream.finish(asyncio.CancelledError) - finished_requests.discard(request_id) - else: - self._request_streams[request_id] = stream - new_requests.append(new_request) - - return new_requests, finished_requests - - async def wait_for_new_requests(self): - if not self.has_new_requests(): - await self.new_requests_event.wait() - self.new_requests_event.clear() - - def has_new_requests(self): - return not self._new_requests.empty() - - -class _AsyncLLMEngine(LLMEngine): - """Extension of LLMEngine to add async methods.""" - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - - async def step_async(self, virtual_engine: int) -> List[RequestOutput]: - """Performs one decoding iteration and returns newly generated results. - The workers are ran asynchronously if possible. - - This function performs one decoding iteration of the engine. It first - schedules the sequences to be executed in the next iteration and the - token blocks to be swapped in/out/copy. Then, it executes the model - and updates the scheduler with the model outputs. Finally, it decodes - the sequences and returns the newly generated results. - """ - # these are cached outputs from previous iterations. None if on first - # iteration - cached_outputs = self.cached_scheduler_outputs[virtual_engine] - seq_group_metadata_list = cached_outputs.seq_group_metadata_list - scheduler_outputs = cached_outputs.scheduler_outputs - allow_async_output_proc = cached_outputs.allow_async_output_proc - - ctx = self.scheduler_contexts[virtual_engine] - - # Clear outputs for each new scheduler iteration - ctx.request_outputs.clear() - - # skip the scheduler if there are any remaining steps in the seq groups. - # This ensures that the scheduler is only called again when the current - # batch has completed. - if not self._has_remaining_steps(seq_group_metadata_list): - - # Schedule iteration - (seq_group_metadata_list, scheduler_outputs, - allow_async_output_proc - ) = self.scheduler[virtual_engine].schedule() - - ctx.seq_group_metadata_list = seq_group_metadata_list - ctx.scheduler_outputs = scheduler_outputs - - if not scheduler_outputs.is_empty(): - # this will cause mamba_cache/minimax_cache failed - # to release finished_requests_ids of the last steps - finished_requests_ids = self.scheduler[ - virtual_engine].get_and_reset_finished_requests_ids() - - # Maybe switch from async mode to sync mode - if not allow_async_output_proc and len(ctx.output_queue) > 0: - self._process_model_outputs(ctx=ctx) - - else: - finished_requests_ids = list() - - assert seq_group_metadata_list is not None - assert scheduler_outputs is not None - - if not scheduler_outputs.is_empty(): - - # Check if we have a cached last_output from the previous iteration. - # For supporting PP this is probably the best way to pass the - # sampled_token_ids, as a separate broadcast over all the PP stages - # will cause one virtual engine's microbatch to block the pipeline. - last_sampled_token_ids = \ - self._get_last_sampled_token_ids(virtual_engine) - - execute_model_req = ExecuteModelRequest( - seq_group_metadata_list=seq_group_metadata_list, - blocks_to_swap_in=scheduler_outputs.blocks_to_swap_in, - blocks_to_swap_out=scheduler_outputs.blocks_to_swap_out, - blocks_to_copy=scheduler_outputs.blocks_to_copy, - virtual_engine=virtual_engine, - num_lookahead_slots=scheduler_outputs.num_lookahead_slots, - running_queue_size=scheduler_outputs.running_queue_size, - finished_requests_ids=finished_requests_ids, - # We use ExecuteModelRequest to pass the last sampled_token_ids - # to each of the non-last PP stages for in-place prepare_input. - last_sampled_token_ids=last_sampled_token_ids) - - if allow_async_output_proc: - execute_model_req.async_callback = self.async_callbacks[ - virtual_engine] - - # Execute the model. - outputs = await self.model_executor.execute_model_async( - execute_model_req) - - else: - if len(ctx.output_queue) > 0: - self._process_model_outputs(ctx=ctx) - outputs = [] - - if not self._has_remaining_steps(seq_group_metadata_list): - # is_first_step_output is True only when the num_steps of all - # the sequences are 1. - is_first_step_output: bool = False if not seq_group_metadata_list \ - else seq_group_metadata_list[0].state.num_steps == 1 - - ctx.append_output(outputs=outputs, - seq_group_metadata_list=seq_group_metadata_list, - scheduler_outputs=scheduler_outputs, - is_async=allow_async_output_proc, - is_last_step=True, - is_first_step_output=is_first_step_output) - - if outputs and allow_async_output_proc: - assert len( - outputs - ) == 1, "Async postprocessor expects only a single output set" - self._advance_to_next_step( - outputs[0], seq_group_metadata_list, - scheduler_outputs.scheduled_seq_groups) - - if not allow_async_output_proc: - self._process_model_outputs(ctx=ctx) - - # Log stats. - self.do_log_stats(scheduler_outputs, outputs) - - # Tracing - self.do_tracing(scheduler_outputs) - - else: - # Multi-step case - return ctx.request_outputs - - if not self.has_unfinished_requests(): - # Drain async postprocessor (if exists) - if len(ctx.output_queue) > 0: - self._process_model_outputs(ctx=ctx) - assert len(ctx.output_queue) == 0 - - return ctx.request_outputs - - async def stop_remote_worker_execution_loop_async(self) -> None: - """Stop the remote worker execution loop.""" - await self.model_executor.stop_remote_worker_execution_loop_async() - - async def get_tokenizer_async(self) -> AnyTokenizer: - return self.get_tokenizer() - - async def add_request_async( - self, - request_id: str, - prompt: PromptType, - params: SamplingParams, - arrival_time: Optional[float] = None, - lora_request: Optional[LoRARequest] = None, - trace_headers: Optional[Mapping[str, str]] = None, - priority: int = 0, - data_parallel_rank: Optional[int] = None, - tokenization_kwargs: Optional[dict[str, Any]] = None, - ) -> None: - """ - Async version of - [`add_request`][vllm.engine.llm_engine.LLMEngine.add_request]. - """ - if lora_request is not None and not self.lora_config: - raise ValueError(f"Got lora_request {lora_request} but LoRA is " - "not enabled!") - if priority != 0 and not self.scheduler_config.policy == "priority": - raise ValueError(f"Got priority {priority} but " - "Priority scheduling is not enabled.") - if arrival_time is None: - arrival_time = time.time() - - if data_parallel_rank is not None: - raise ValueError("Targeting data_parallel_rank only supported " - "in v1 client.") - - if (isinstance(prompt, dict) - and prompt.get("prompt_embeds", None) is not None - and not prompt.get("prompt_token_ids", None)): - # We use the -2 dimension (instead of 0) in case a batched input - # of batch size 1 is passed in. - prompt["prompt_token_ids"] = [0 - ] * prompt["prompt_embeds"].shape[-2] - - processed_inputs = await self.input_preprocessor.preprocess_async( - prompt, - tokenization_kwargs=tokenization_kwargs, - ) - - self._add_processed_request( - request_id=request_id, - processed_inputs=processed_inputs, - params=params, - arrival_time=arrival_time, - lora_request=lora_request, - trace_headers=trace_headers, - priority=priority, - ) - - async def check_health_async(self) -> None: - self.model_executor.check_health() - - async def collective_rpc_async(self, - method: str, - timeout: Optional[float] = None, - args: tuple = (), - kwargs: Optional[dict] = None): - raise NotImplementedError - - -class AsyncLLMEngine(EngineClient): - """An asynchronous wrapper for [`LLMEngine`][vllm.LLMEngine]. - - This class is used to wrap the [`LLMEngine`][vllm.LLMEngine] class to - make it asynchronous. It uses asyncio to create a background loop that keeps - processing incoming requests. The [`LLMEngine`][vllm.LLMEngine] is kicked - by the generate method when there are requests in the waiting queue. The - generate method yields the outputs from the [`LLMEngine`][vllm.LLMEngine] - to the caller. - - Args: - log_requests: Whether to log the requests. - start_engine_loop: If True, the background task to run the engine - will be automatically started in the generate call. - *args: Arguments for [`LLMEngine`][vllm.LLMEngine]. - **kwargs: Arguments for [`LLMEngine`][vllm.LLMEngine]. - """ - - _engine_class: Type[_AsyncLLMEngine] = _AsyncLLMEngine - - def __init__(self, - *args: Any, - log_requests: bool = True, - start_engine_loop: bool = True, - **kwargs: Any) -> None: - if envs.VLLM_USE_V1: - raise ValueError( - "Using V0 AsyncLLMEngine, but envs.VLLM_USE_V1=True. " - "This should not happen. As a workaround, try using " - "AsyncLLMEngine.from_vllm_config(...) or explicitly set " - "VLLM_USE_V1=0 or 1 and report this issue on Github.") - - self.log_requests = log_requests - self.engine = self._engine_class(*args, **kwargs) - - # This ensures quick processing of request outputs - # so the append to asyncio queues is not delayed, - # especially for multi-step. - self.use_process_request_outputs_callback = ( - self.engine.model_config.use_async_output_proc) - - if self.use_process_request_outputs_callback: - self.engine.process_request_outputs_callback = \ - weak_bind(self.process_request_outputs) - - self.background_loop: Optional[asyncio.Future] = None - # We need to keep a reference to unshielded - # task as well to prevent it from being garbage - # collected - self._background_loop_unshielded: Optional[asyncio.Task] = None - self.start_engine_loop = start_engine_loop - self._errored_with: Optional[BaseException] = None - - # Lazy initialized fields - self._request_tracker: RequestTracker - - def __del__(self): - if rt := getattr(self, "request_tracker", None): - # Wake up engine loop so that it will exit cleanly - rt.new_requests_event.set() - - @classmethod - def _get_executor_cls(cls, - engine_config: VllmConfig) -> Type[ExecutorBase]: - return LLMEngine._get_executor_cls(engine_config) - - @classmethod - @deprecate_kwargs( - "disable_log_requests", - additional_message=("This argument will have no effect. " - "Use `enable_log_requests` instead."), - ) - def from_vllm_config( - cls, - vllm_config: VllmConfig, - start_engine_loop: bool = True, - usage_context: UsageContext = UsageContext.ENGINE_CONTEXT, - stat_loggers: Optional[dict[str, StatLoggerBase]] = None, - enable_log_requests: bool = False, - disable_log_stats: bool = False, - disable_log_requests: bool = True, # Deprecated, will be removed - ) -> "AsyncLLMEngine": - """Create an AsyncLLMEngine from the EngineArgs.""" - - return cls( - vllm_config=vllm_config, - executor_class=cls._get_executor_cls(vllm_config), - start_engine_loop=start_engine_loop, - log_requests=enable_log_requests, - log_stats=not disable_log_stats, - usage_context=usage_context, - stat_loggers=stat_loggers, - ) - - @classmethod - def from_engine_args( - cls, - engine_args: AsyncEngineArgs, - start_engine_loop: bool = True, - usage_context: UsageContext = UsageContext.ENGINE_CONTEXT, - stat_loggers: Optional[Dict[str, StatLoggerBase]] = None, - ) -> "AsyncLLMEngine": - """Creates an async LLM engine from the engine arguments.""" - - vllm_config = engine_args.create_engine_config(usage_context) - - async_engine_cls = cls - if envs.VLLM_USE_V1: - from vllm.v1.engine.async_llm import AsyncLLM as V1AsyncLLMEngine - async_engine_cls = V1AsyncLLMEngine - - return async_engine_cls.from_vllm_config( - vllm_config=vllm_config, - start_engine_loop=start_engine_loop, - usage_context=usage_context, - stat_loggers=stat_loggers, - disable_log_stats=engine_args.disable_log_stats, - enable_log_requests=engine_args.enable_log_requests, - ) - - @property - def is_running(self) -> bool: - return (self.background_loop is not None - and self._background_loop_unshielded is not None - and not self._background_loop_unshielded.done()) - - @property - def is_stopped(self) -> bool: - return self.errored or (self.background_loop is not None and - self._background_loop_unshielded is not None - and self._background_loop_unshielded.done()) - - @property - def errored(self) -> bool: - return self._errored_with is not None - - @property - def dead_error(self) -> BaseException: - return AsyncEngineDeadError( - "Background loop is not running. If it was running, " - "inspect the output to find the stacktrace of the " - "error that caused the background loop to stop " - "(AsyncEngineDeadError).") - - def set_errored(self, exc: Exception) -> None: - self._errored_with = exc - - def _error_callback(self, exc: Exception) -> None: - self.set_errored(exc) - self._request_tracker.propagate_exception(exc) - - async def get_input_preprocessor(self) -> InputPreprocessor: - return self.engine.input_preprocessor - - async def get_tokenizer(self) -> AnyTokenizer: - return self.engine.get_tokenizer() - - def start_background_loop(self) -> None: - """Start the background loop.""" - if self.errored: - raise AsyncEngineDeadError( - "Background loop has errored already.") from self._errored_with - if self.is_running: - raise RuntimeError("Background loop is already running.") - # Initialize the RequestTracker here so it uses the right event loop. - self._request_tracker = RequestTracker() - - self._background_loop_unshielded = asyncio.get_event_loop( - ).create_task(self.run_engine_loop(weakref.ref(self))) - self._background_loop_unshielded.add_done_callback( - partial(_log_task_completion, error_callback=self._error_callback)) - self.background_loop = asyncio.shield(self._background_loop_unshielded) - - def shutdown_background_loop(self) -> None: - """ - Shut down the background loop. - - This method needs to be called during cleanup to remove - references to `self` and properly GC the resources held - by the async LLM engine (e.g., the executors as well as - their resources). - """ - if self._background_loop_unshielded is not None: - self._background_loop_unshielded.cancel() - self._background_loop_unshielded = None - self.background_loop = None - - async def engine_step(self, virtual_engine: int) -> bool: - """Kick the engine to process the waiting requests. - - Returns True if there are in-progress requests.""" - - new_requests, aborted_requests = ( - self._request_tracker.get_new_and_aborted_requests()) - - for new_request in new_requests: - # Add the request into the vLLM engine's waiting queue. - try: - await self.engine.add_request_async(**new_request) - except ValueError as e: - # TODO: use a vLLM specific error for failed validation - self._request_tracker.process_exception( - new_request["request_id"], - e, - verbose=self.log_requests, - ) - - if aborted_requests: - await self._engine_abort(aborted_requests) - - request_outputs = await self.engine.step_async(virtual_engine) - - # Put the outputs into the corresponding streams. - # If used as a callback, then already invoked inside - # LLMEngine's _process_model_outputs - if not self.use_process_request_outputs_callback: - all_finished = self.process_request_outputs(request_outputs) - else: - # For callback case, we only need to detect when all - # requests are finished - all_finished = all(request_output.finished - for request_output in request_outputs) - - return not all_finished - - def process_request_outputs(self, request_outputs) -> bool: - # Put the outputs into the corresponding streams. - all_finished = True - for request_output in request_outputs: - self._request_tracker.process_request_output( - request_output, verbose=self.log_requests) - all_finished = all_finished and request_output.finished - - return all_finished - - async def _engine_abort(self, request_ids: Iterable[str]): - self.engine.abort_request(request_ids) - - @staticmethod - async def run_engine_loop(engine_ref: ReferenceType): - """We use a weakref to the engine so that the running loop - doesn't prevent the engine being garbage collected.""" - engine: Optional[AsyncLLMEngine] = engine_ref() - if not engine: - return - - pipeline_parallel_size = \ - engine.engine.parallel_config.pipeline_parallel_size - has_requests_in_progress = [False] * pipeline_parallel_size - while True: - if not any(has_requests_in_progress): - logger.debug("Waiting for new requests...") - # Stop the execute model loop in parallel workers until there - # are more requests to process. This avoids waiting - # indefinitely in torch.distributed ops which may otherwise - # time out, and unblocks the RPC thread in the workers so that - # they can process any other queued control plane messages, - # such as add/remove lora adapters. - await engine.engine.stop_remote_worker_execution_loop_async() - request_tracker = engine._request_tracker - # Allow engine to be garbage collected while - # waiting for new requests - del engine - await asyncio.sleep(0) - if engine_ref() is None: - return - await request_tracker.wait_for_new_requests() - engine = engine_ref() - if not engine: - return - logger.debug("Got new requests!") - requests_in_progress = [ - asyncio.create_task(engine.engine_step(ve)) - for ve in range(pipeline_parallel_size) - ] - has_requests_in_progress = [True] * pipeline_parallel_size - - # Abort if iteration takes too long due to unrecoverable errors - # (eg. NCCL timeouts). - try: - async with asyncio_timeout(ENGINE_ITERATION_TIMEOUT_S): - done, _ = await asyncio.wait( - requests_in_progress, - return_when=asyncio.FIRST_COMPLETED) - for _ in range(pipeline_parallel_size): - await asyncio.sleep(0) - for task in done: - result = task.result() - virtual_engine = requests_in_progress.index(task) - has_unfinished_requests = ( - engine.engine. - has_unfinished_requests_for_virtual_engine( - virtual_engine)) - if result or has_unfinished_requests: - requests_in_progress[virtual_engine] = ( - asyncio.create_task( - engine.engine_step(virtual_engine))) - has_requests_in_progress[virtual_engine] = True - else: - has_requests_in_progress[virtual_engine] = False - except asyncio.TimeoutError as exc: - logger.error( - "Engine iteration timed out. This should never happen!") - engine.set_errored(exc) - raise - await asyncio.sleep(0) - - async def add_request( - self, - request_id: str, - prompt: PromptType, - params: SamplingParams, - arrival_time: Optional[float] = None, - lora_request: Optional[LoRARequest] = None, - trace_headers: Optional[Mapping[str, str]] = None, - priority: int = 0, - data_parallel_rank: Optional[int] = None, - tokenization_kwargs: Optional[dict[str, Any]] = None, - ) -> AsyncGenerator[RequestOutput, None]: - if not self.is_running: - if self.start_engine_loop: - self.start_background_loop() - else: - raise AsyncEngineDeadError( - "Background loop is not running. If it was running, " - "inspect the output to find the stacktrace of the " - "error that caused the background loop to stop " - "(AsyncEngineDeadError).") - - if (priority != 0 - and not self.engine.scheduler_config.policy == "priority"): - raise ValueError(f"Got priority {priority} but " - "Priority scheduling is not enabled.") - - stream = self._request_tracker.add_request( - request_id, - verbose=self.log_requests, - prompt=prompt, - params=params, - arrival_time=arrival_time or time.time(), - lora_request=lora_request, - trace_headers=trace_headers, - priority=priority, - data_parallel_rank=data_parallel_rank, - tokenization_kwargs=tokenization_kwargs, - ) - - return stream.generator() - - async def generate( - self, - prompt: PromptType, - sampling_params: SamplingParams, - request_id: str, - lora_request: Optional[LoRARequest] = None, - trace_headers: Optional[Mapping[str, str]] = None, - priority: int = 0, - data_parallel_rank: Optional[int] = None, - ) -> AsyncGenerator[RequestOutput, None]: - """Generate outputs for a request. - - Generate outputs for a request. This method is a coroutine. It adds the - request into the waiting queue of the LLMEngine and streams the outputs - from the LLMEngine to the caller. - - Args: - prompt: The prompt to the LLM. See - [`PromptType`][vllm.inputs.PromptType] for more details about - the format of each input. - sampling_params: The sampling parameters of the request. - request_id: The unique id of the request. - lora_request: LoRA request to use for generation, if any. - trace_headers: OpenTelemetry trace headers. - priority: The priority of the request. - Only applicable with priority scheduling. - data_parallel_rank: The (global) data parallel rank that must - handle this request. Only applicable if DP is enabled. - Yields: - The output `RequestOutput` objects from the LLMEngine - for the request. - - Details: - - If the engine is not running, start the background loop, - which iteratively invokes - [`engine_step`][vllm.engine.async_llm_engine.AsyncLLMEngine.engine_step] - to process the waiting requests. - - Add the request to the engine's `RequestTracker`. - On the next background loop, this request will be sent to - the underlying engine. - Also, a corresponding `AsyncStream` will be created. - - Wait for the request outputs from `AsyncStream` and yield them. - - Example: - >>> # Please refer to entrypoints/api_server.py for - >>> # the complete example. - >>> - >>> # initialize the engine and the example input - >>> # note that engine_args here is AsyncEngineArgs instance - >>> engine = AsyncLLMEngine.from_engine_args(engine_args) - >>> example_input = { - >>> "prompt": "What is LLM?", - >>> "stream": False, # assume the non-streaming case - >>> "temperature": 0.0, - >>> "request_id": 0, - >>> } - >>> - >>> # start the generation - >>> results_generator = engine.generate( - >>> example_input["prompt"], - >>> SamplingParams(temperature=example_input["temperature"]), - >>> example_input["request_id"]) - >>> - >>> # get the results - >>> final_output = None - >>> async for request_output in results_generator: - >>> if await request.is_disconnected(): - >>> # Abort the request if the client disconnects. - >>> await engine.abort(request_id) - >>> # Return or raise an error - >>> ... - >>> final_output = request_output - >>> - >>> # Process and return the final output - >>> ... - """ - try: - async for output in await self.add_request( - request_id, - prompt, - sampling_params, - lora_request=lora_request, - trace_headers=trace_headers, - priority=priority, - data_parallel_rank=data_parallel_rank, - ): - yield LLMEngine.validate_output(output, RequestOutput) - except asyncio.CancelledError: - await self.abort(request_id) - raise - - def encode( - self, - prompt: PromptType, - pooling_params: PoolingParams, - request_id: str, - lora_request: Optional[LoRARequest] = None, - trace_headers: Optional[Mapping[str, str]] = None, - priority: int = 0, - tokenization_kwargs: Optional[dict[str, Any]] = None, - ) -> AsyncGenerator[PoolingRequestOutput, None]: - raise NotImplementedError( - "Pooling models are not supported in vLLM V0") - - async def abort(self, request_id: Union[str, Iterable[str]]) -> None: - """Abort a request. - - Abort a submitted request. If the request is finished or not found, - this method will be a no-op. - - Args: - request_id: The unique id of the request. - """ - if not isinstance(request_id, str): - raise RuntimeError("Only single-request abort supported in" - " deprecated V0") - if not self.is_running: - raise AsyncEngineDeadError( - "Background loop is not running. If it was running, " - "inspect the output to find the stacktrace of the " - "error that caused the background loop to stop " - "(AsyncEngineDeadError).") - - return self._abort(request_id) - - def _abort(self, request_id: str) -> None: - """Abort a request. - - Abort a submitted request. If the request is finished or not found, - this method will be a no-op. - - Args: - request_id: The unique id of the request. - """ - self._request_tracker.abort_request(request_id, - exception=asyncio.CancelledError, - verbose=self.log_requests) - - async def get_vllm_config(self) -> VllmConfig: - """Get the vllm configuration of the vLLM engine.""" - return self.engine.get_vllm_config() - - async def get_model_config(self) -> ModelConfig: - """Get the model configuration of the vLLM engine.""" - return self.engine.get_model_config() - - async def get_parallel_config(self) -> ParallelConfig: - """Get the parallel configuration of the vLLM engine.""" - return self.engine.get_parallel_config() - - async def get_scheduler_config(self) -> SchedulerConfig: - """Get the scheduling configuration of the vLLM engine.""" - return self.engine.get_scheduler_config() - - async def get_lora_config(self) -> LoRAConfig: - """Get the lora configuration of the vLLM engine.""" - return self.engine.get_lora_config() - - async def do_log_stats( - self, - scheduler_outputs: Optional[SchedulerOutputs] = None, - model_output: Optional[List[SamplerOutput]] = None) -> None: - self.engine.do_log_stats() - - async def check_health(self) -> None: - """Raises an error if engine is unhealthy.""" - t = time.perf_counter() - logger.debug("Starting health check...") - if self.is_stopped: - raise AsyncEngineDeadError("Background loop is stopped.") - - await self.engine.check_health_async() - logger.debug("Health check took %fs", time.perf_counter() - t) - - async def is_tracing_enabled(self) -> bool: - return self.engine.is_tracing_enabled() - - def add_logger(self, logger_name: str, logger: StatLoggerBase) -> None: - self.engine.add_logger(logger_name=logger_name, logger=logger) - - def remove_logger(self, logger_name: str) -> None: - self.engine.remove_logger(logger_name=logger_name) - - async def start_profile(self) -> None: - self.engine.start_profile() - - async def stop_profile(self) -> None: - self.engine.stop_profile() - - async def reset_mm_cache(self) -> None: - self.engine.reset_mm_cache() - - async def reset_prefix_cache(self, - device: Optional[Device] = None) -> None: - self.engine.reset_prefix_cache(device) - - async def sleep(self, level: int = 1) -> None: - await self.reset_prefix_cache() - self.engine.sleep(level) - - async def wake_up(self, tags: Optional[list[str]] = None) -> None: - self.engine.wake_up(tags) - - async def is_sleeping(self) -> bool: - return self.engine.is_sleeping() - - async def add_lora(self, lora_request: LoRARequest) -> bool: - return self.engine.add_lora(lora_request) - - async def collective_rpc(self, - method: str, - timeout: Optional[float] = None, - args: tuple = (), - kwargs: Optional[dict] = None): - """ - Perform a collective RPC call to the given path. - """ - return await self.engine.collective_rpc_async(method, timeout, args, - kwargs) - - -# TODO(v1): Remove this class proxy when V1 goes default. -if envs.is_set("VLLM_USE_V1") and envs.VLLM_USE_V1: - from vllm.v1.engine.async_llm import AsyncLLM - - AsyncLLMEngine = AsyncLLM # type: ignore +AsyncLLMEngine = AsyncLLM # type: ignore diff --git a/vllm/entrypoints/launcher.py b/vllm/entrypoints/launcher.py index c3195dbc4697..8b2acedf805c 100644 --- a/vllm/entrypoints/launcher.py +++ b/vllm/entrypoints/launcher.py @@ -11,7 +11,6 @@ from fastapi import FastAPI, Request, Response from vllm import envs -from vllm.engine.async_llm_engine import AsyncEngineDeadError from vllm.engine.protocol import EngineClient from vllm.entrypoints.constants import (H11_MAX_HEADER_COUNT_DEFAULT, H11_MAX_INCOMPLETE_EVENT_SIZE_DEFAULT) @@ -154,7 +153,6 @@ def _add_shutdown_handlers(app: FastAPI, server: uvicorn.Server) -> None: """ @app.exception_handler(RuntimeError) - @app.exception_handler(AsyncEngineDeadError) @app.exception_handler(EngineDeadError) @app.exception_handler(EngineGenerateError) async def runtime_exception_handler(request: Request, __): diff --git a/vllm/entrypoints/openai/api_server.py b/vllm/entrypoints/openai/api_server.py index 912e66412092..11031cd616d2 100644 --- a/vllm/entrypoints/openai/api_server.py +++ b/vllm/entrypoints/openai/api_server.py @@ -38,7 +38,6 @@ import vllm.envs as envs from vllm.config import VllmConfig from vllm.engine.arg_utils import AsyncEngineArgs -from vllm.engine.async_llm_engine import AsyncLLMEngine # type: ignore from vllm.engine.protocol import EngineClient from vllm.entrypoints.chat_utils import (load_chat_template, resolve_hf_chat_template, @@ -201,50 +200,34 @@ async def build_async_engine_client_from_engine_args( vllm_config = engine_args.create_engine_config(usage_context=usage_context) # V1 AsyncLLM. - if envs.VLLM_USE_V1: - if disable_frontend_multiprocessing: - logger.warning( - "V1 is enabled, but got --disable-frontend-multiprocessing. " - "To disable frontend multiprocessing, set VLLM_USE_V1=0.") - - from vllm.v1.engine.async_llm import AsyncLLM - async_llm: Optional[AsyncLLM] = None - client_count = client_config.pop( - "client_count") if client_config else 1 - client_index = client_config.pop( - "client_index") if client_config else 0 - try: - async_llm = AsyncLLM.from_vllm_config( - vllm_config=vllm_config, - usage_context=usage_context, - enable_log_requests=engine_args.enable_log_requests, - disable_log_stats=engine_args.disable_log_stats, - client_addresses=client_config, - client_count=client_count, - client_index=client_index) - - # Don't keep the dummy data in memory - await async_llm.reset_mm_cache() - - yield async_llm - finally: - if async_llm: - async_llm.shutdown() + assert envs.VLLM_USE_V1 - # V0 AsyncLLM. - else: + if disable_frontend_multiprocessing: + logger.warning( + "V1 is enabled, but got --disable-frontend-multiprocessing. " + "To disable frontend multiprocessing, set VLLM_USE_V1=0.") - engine_client: Optional[EngineClient] = None - try: - engine_client = AsyncLLMEngine.from_vllm_config( - vllm_config=vllm_config, - usage_context=usage_context, - enable_log_requests=engine_args.enable_log_requests, - disable_log_stats=engine_args.disable_log_stats) - yield engine_client - finally: - if engine_client and hasattr(engine_client, "shutdown"): - engine_client.shutdown() + from vllm.v1.engine.async_llm import AsyncLLM + async_llm: Optional[AsyncLLM] = None + client_count = client_config.pop("client_count") if client_config else 1 + client_index = client_config.pop("client_index") if client_config else 0 + try: + async_llm = AsyncLLM.from_vllm_config( + vllm_config=vllm_config, + usage_context=usage_context, + enable_log_requests=engine_args.enable_log_requests, + disable_log_stats=engine_args.disable_log_stats, + client_addresses=client_config, + client_count=client_count, + client_index=client_index) + + # Don't keep the dummy data in memory + await async_llm.reset_mm_cache() + + yield async_llm + finally: + if async_llm: + async_llm.shutdown() async def validate_json_request(raw_request: Request): From 064cac7bb7251862a841d8057d83581350edf837 Mon Sep 17 00:00:00 2001 From: Nikhil Gupta Date: Thu, 18 Sep 2025 19:15:23 +0100 Subject: [PATCH 123/518] [fix]: remove data type hardcoding from gptoss model implementation (#23807) Signed-off-by: Nikhil Gupta --- vllm/model_executor/models/gpt_oss.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/vllm/model_executor/models/gpt_oss.py b/vllm/model_executor/models/gpt_oss.py index 990a1d6d883a..b49fd0d8f88a 100644 --- a/vllm/model_executor/models/gpt_oss.py +++ b/vllm/model_executor/models/gpt_oss.py @@ -76,7 +76,6 @@ def __init__( self.sinks = torch.nn.Parameter( torch.empty(config.num_attention_heads // tp_size, - dtype=torch.bfloat16, requires_grad=False)) self.q_size = self.num_attention_heads * self.head_dim // tp_size @@ -145,8 +144,7 @@ def __init__( self.experts_per_token = config.num_experts_per_tok self.world_size = dist.get_world_size() if dist.is_initialized() else 1 self.router = torch.nn.Linear(config.hidden_size, - config.num_local_experts, - dtype=torch.bfloat16) + config.num_local_experts) assert config.intermediate_size % self.world_size == 0 self.experts = FusedMoE(num_experts=config.num_local_experts, top_k=config.num_experts_per_tok, From 38db529f66712502a3cf93488229fc9fd2dc76fc Mon Sep 17 00:00:00 2001 From: Aziz Date: Thu, 18 Sep 2025 21:18:56 +0200 Subject: [PATCH 124/518] [feat]: Create interface for model-specific M-RoPE (#24194) Signed-off-by: AzizCode92 Signed-off-by: Aziz Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> Co-authored-by: Cyrus Leung --- vllm/model_executor/models/__init__.py | 11 ++- vllm/model_executor/models/interfaces.py | 68 +++++++++++++ vllm/model_executor/models/qwen2_vl.py | 118 ++++++++++++++++++++++- vllm/v1/worker/gpu_model_runner.py | 33 +++++-- vllm/worker/model_runner.py | 42 +++++--- 5 files changed, 242 insertions(+), 30 deletions(-) diff --git a/vllm/model_executor/models/__init__.py b/vllm/model_executor/models/__init__.py index d3ee6872dd8b..4ccba64f2c11 100644 --- a/vllm/model_executor/models/__init__.py +++ b/vllm/model_executor/models/__init__.py @@ -1,10 +1,11 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -from .interfaces import (HasInnerState, SupportsLoRA, SupportsMultiModal, - SupportsPP, SupportsTranscription, SupportsV0Only, - has_inner_state, supports_lora, supports_multimodal, - supports_pp, supports_transcription, supports_v0_only) +from .interfaces import (HasInnerState, SupportsLoRA, SupportsMRoPE, + SupportsMultiModal, SupportsPP, SupportsTranscription, + SupportsV0Only, has_inner_state, supports_lora, + supports_mrope, supports_multimodal, supports_pp, + supports_transcription, supports_v0_only) from .interfaces_base import (VllmModelForPooling, VllmModelForTextGeneration, is_pooling_model, is_text_generation_model) from .registry import ModelRegistry @@ -21,6 +22,8 @@ "supports_lora", "SupportsMultiModal", "supports_multimodal", + "SupportsMRoPE", + "supports_mrope", "SupportsPP", "supports_pp", "SupportsTranscription", diff --git a/vllm/model_executor/models/interfaces.py b/vllm/model_executor/models/interfaces.py index 8f8e300c84d7..e9c600e36cfa 100644 --- a/vllm/model_executor/models/interfaces.py +++ b/vllm/model_executor/models/interfaces.py @@ -8,6 +8,7 @@ import numpy as np import torch from torch import Tensor +from transformers import PretrainedConfig from transformers.models.whisper.tokenization_whisper import LANGUAGES from typing_extensions import Self, TypeIs @@ -852,3 +853,70 @@ def supports_eagle3( model: Union[type[object], object], ) -> Union[TypeIs[type[SupportsEagle3]], TypeIs[SupportsEagle3]]: return isinstance(model, SupportsEagle3) + + +@runtime_checkable +class SupportsMRoPE(Protocol): + """The interface required for all models that support M-RoPE.""" + + supports_mrope: ClassVar[Literal[True]] = True + """ + A flag that indicates this model supports M-RoPE. + + Note: + There is no need to redefine this flag if this class is in the + MRO of your model class. + """ + + def get_mrope_input_positions( + self, + input_tokens: list[int], + hf_config: PretrainedConfig, + image_grid_thw: Optional[Union[list[list[int]], torch.Tensor]], + video_grid_thw: Optional[Union[list[list[int]], torch.Tensor]], + second_per_grid_ts: Optional[list[float]] = None, + context_len: int = 0, + seq_len: Optional[int] = None, + audio_feature_lengths: Optional[torch.Tensor] = None, + use_audio_in_video: bool = False, + ) -> tuple[torch.Tensor, int]: + """ + Get M-RoPE input positions and delta value for this specific model. + + This method should be implemented by each model that supports M-RoPE + to provide model-specific logic for computing input positions. + + Args: + input_tokens: List of input token IDs + hf_config: HuggingFace model configuration + image_grid_thw: Image grid dimensions (t, h, w) + video_grid_thw: Video grid dimensions (t, h, w) + second_per_grid_ts: Seconds per grid timestep for videos + context_len: Context length + seq_len: Sequence length + audio_feature_lengths: Audio feature lengths for multimodal models + use_audio_in_video: Whether to use audio in video for interleaving + + Returns: + Tuple of (llm_positions, mrope_position_delta) + - llm_positions: Tensor of shape [3, num_tokens] + with T/H/W positions + - mrope_position_delta: Delta for position calculations + """ + ... + + +@overload +def supports_mrope(model: type[object]) -> TypeIs[type[SupportsMRoPE]]: + ... + + +@overload +def supports_mrope(model: object) -> TypeIs[SupportsMRoPE]: + ... + + +def supports_mrope( + model: Union[type[object], object], +) -> Union[TypeIs[type[SupportsMRoPE]], TypeIs[SupportsMRoPE]]: + return isinstance(model, SupportsMRoPE) diff --git a/vllm/model_executor/models/qwen2_vl.py b/vllm/model_executor/models/qwen2_vl.py index b6576b783b64..7f361678ba72 100644 --- a/vllm/model_executor/models/qwen2_vl.py +++ b/vllm/model_executor/models/qwen2_vl.py @@ -32,7 +32,7 @@ import torch.nn as nn import torch.nn.functional as F from einops import rearrange, repeat -from transformers import AutoConfig, BatchFeature +from transformers import AutoConfig, BatchFeature, PretrainedConfig from transformers.models.qwen2_vl import (Qwen2VLImageProcessor, Qwen2VLProcessor) from transformers.models.qwen2_vl.configuration_qwen2_vl import ( @@ -73,7 +73,7 @@ from vllm.transformers_utils.tokenizer import AnyTokenizer from vllm.utils.tensor_schema import TensorSchema, TensorShape -from .interfaces import (MultiModalEmbeddings, SupportsLoRA, +from .interfaces import (MultiModalEmbeddings, SupportsLoRA, SupportsMRoPE, SupportsMultiModal, SupportsPP) from .utils import (AutoWeightsLoader, WeightsMapper, init_vllm_registered_model, maybe_prefix, @@ -1096,7 +1096,7 @@ def _get_mm_fields_config( info=Qwen2VLProcessingInfo, dummy_inputs=Qwen2VLDummyInputsBuilder) class Qwen2VLForConditionalGeneration(nn.Module, SupportsMultiModal, - SupportsLoRA, SupportsPP): + SupportsLoRA, SupportsPP, SupportsMRoPE): # To ensure correct weight loading and mapping. hf_to_vllm_mapper = WeightsMapper( @@ -1109,6 +1109,118 @@ class Qwen2VLForConditionalGeneration(nn.Module, SupportsMultiModal, "model.": "language_model.model.", }) + def get_mrope_input_positions( + self, + input_tokens: list[int], + hf_config: PretrainedConfig, + image_grid_thw: Optional[Union[list[list[int]], torch.Tensor]], + video_grid_thw: Optional[Union[list[list[int]], torch.Tensor]], + second_per_grid_ts: Optional[list[float]] = None, + context_len: int = 0, + seq_len: Optional[int] = None, + audio_feature_lengths: Optional[torch.Tensor] = None, + use_audio_in_video: bool = False, + ) -> tuple[torch.Tensor, int]: + """Get M-RoPE input positions for Qwen2-VL model.""" + if image_grid_thw is None: + image_grid_thw = [] + if video_grid_thw is None: + video_grid_thw = [] + if second_per_grid_ts is None: + second_per_grid_ts = [] + + image_token_id = hf_config.image_token_id + video_token_id = hf_config.video_token_id + vision_start_token_id = hf_config.vision_start_token_id + spatial_merge_size = hf_config.vision_config.spatial_merge_size + tokens_per_second = getattr(hf_config.vision_config, + "tokens_per_second", 1.0) + + input_tokens_tensor = torch.tensor(input_tokens) + vision_start_indices = torch.argwhere( + input_tokens_tensor == vision_start_token_id).squeeze(1) + vision_tokens = input_tokens_tensor[vision_start_indices + 1] + image_nums = (vision_tokens == image_token_id).sum() + video_nums = (vision_tokens == video_token_id).sum() + llm_pos_ids_list: list = [] + + st = 0 + remain_images, remain_videos = image_nums, video_nums + + image_index, video_index = 0, 0 + for _ in range(image_nums + video_nums): + video_second_per_grid_t = 0.0 + if remain_images > 0: + try: + ed_image = input_tokens.index(image_token_id, st) + except ValueError: + ed_image = len(input_tokens) + 1 + else: + ed_image = len(input_tokens) + 1 + if remain_videos > 0: + try: + ed_video = input_tokens.index(video_token_id, st) + except ValueError: + ed_video = len(input_tokens) + 1 + else: + ed_video = len(input_tokens) + 1 + if ed_image < ed_video: + t, h, w = ( + image_grid_thw[image_index][0], + image_grid_thw[image_index][1], + image_grid_thw[image_index][2], + ) + image_index += 1 + remain_images -= 1 + ed = ed_image + else: + t, h, w = ( + video_grid_thw[video_index][0], + video_grid_thw[video_index][1], + video_grid_thw[video_index][2], + ) + video_second_per_grid_t = 1.0 + if second_per_grid_ts: + video_second_per_grid_t = second_per_grid_ts[video_index] + video_index += 1 + remain_videos -= 1 + ed = ed_video + + llm_grid_t, llm_grid_h, llm_grid_w = \ + t, h // spatial_merge_size, w // spatial_merge_size + text_len = ed - st + + st_idx = llm_pos_ids_list[-1].max() + 1 if len( + llm_pos_ids_list) > 0 else 0 + llm_pos_ids_list.append( + torch.arange(text_len).view(1, -1).expand(3, -1) + st_idx) + + t_index = (torch.arange(llm_grid_t).view(-1, 1).expand( + -1, llm_grid_h * llm_grid_w) * video_second_per_grid_t * + tokens_per_second).long().flatten() + + h_index = torch.arange(llm_grid_h).view(1, -1, 1).expand( + llm_grid_t, -1, llm_grid_w).flatten() + w_index = torch.arange(llm_grid_w).view(1, 1, -1).expand( + llm_grid_t, llm_grid_h, -1).flatten() + llm_pos_ids_list.append( + torch.stack([t_index, h_index, w_index]) + text_len + st_idx) + st = ed + llm_grid_t * llm_grid_h * llm_grid_w + + if st < len(input_tokens): + st_idx = llm_pos_ids_list[-1].max() + 1 if len( + llm_pos_ids_list) > 0 else 0 + text_len = len(input_tokens) - st + llm_pos_ids_list.append( + torch.arange(text_len).view(1, -1).expand(3, -1) + st_idx) + + llm_positions = torch.cat(llm_pos_ids_list, dim=1).reshape(3, -1) + mrope_position_delta = (llm_positions.max() + 1 - + len(input_tokens)).item() + llm_positions = llm_positions[:, context_len:seq_len] + + return llm_positions, mrope_position_delta + @classmethod def get_placeholder_str(cls, modality: str, i: int) -> Optional[str]: if modality.startswith("image"): diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index 4873b586724e..053e8f0537ed 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -42,6 +42,7 @@ from vllm.model_executor.model_loader import TensorizerLoader, get_model_loader from vllm.model_executor.models.interfaces import (is_mixture_of_experts, supports_eagle3, + supports_mrope, supports_transcription) from vllm.model_executor.models.interfaces_base import ( VllmModelForPooling, is_pooling_model, is_text_generation_model) @@ -730,16 +731,28 @@ def _init_mrope_positions(self, req_state: CachedRequestState): if mm_input.get("use_audio_in_video") is True: use_audio_in_video = True - req_state.mrope_positions, req_state.mrope_position_delta = \ - MRotaryEmbedding.get_input_positions_tensor( - req_state.prompt_token_ids, - hf_config=self.model_config.hf_config, - image_grid_thw=image_grid_thw, - video_grid_thw=video_grid_thw, - second_per_grid_ts=second_per_grid_ts, - audio_feature_lengths=audio_feature_lengths, - use_audio_in_video=use_audio_in_video, - ) + if supports_mrope(self.model): + req_state.mrope_positions, req_state.mrope_position_delta = \ + self.model.get_mrope_input_positions( + req_state.prompt_token_ids, + hf_config=self.model_config.hf_config, + image_grid_thw=image_grid_thw, + video_grid_thw=video_grid_thw, + second_per_grid_ts=second_per_grid_ts, + audio_feature_lengths=audio_feature_lengths, + use_audio_in_video=use_audio_in_video, + ) + else: + req_state.mrope_positions, req_state.mrope_position_delta = \ + MRotaryEmbedding.get_input_positions_tensor( + req_state.prompt_token_ids, + hf_config=self.model_config.hf_config, + image_grid_thw=image_grid_thw, + video_grid_thw=video_grid_thw, + second_per_grid_ts=second_per_grid_ts, + audio_feature_lengths=audio_feature_lengths, + use_audio_in_video=use_audio_in_video, + ) def _extract_mm_kwargs( self, diff --git a/vllm/worker/model_runner.py b/vllm/worker/model_runner.py index 88f83c9dd7e6..594382650e3c 100644 --- a/vllm/worker/model_runner.py +++ b/vllm/worker/model_runner.py @@ -41,7 +41,8 @@ get_sampler) from vllm.model_executor.model_loader import get_model from vllm.model_executor.model_loader.tensorizer import TensorizerConfig -from vllm.model_executor.models import supports_lora, supports_multimodal +from vllm.model_executor.models import (supports_lora, supports_mrope, + supports_multimodal) from vllm.model_executor.models.utils import set_cpu_offload_max_bytes from vllm.multimodal import (MULTIMODAL_REGISTRY, BatchedTensorInputs, MultiModalKwargs, MultiModalPlaceholderMap, @@ -670,18 +671,33 @@ def _compute_multi_modal_input(self, inter_data: InterDataForSeqGroup, inter_data.seq_ids[seq_idx]] token_ids = seq_data.get_token_ids() - mrope_input_positions, mrope_position_delta = \ - MRotaryEmbedding.get_input_positions( - token_ids, - hf_config=hf_config, - image_grid_thw=image_grid_thw, - video_grid_thw=video_grid_thw, - second_per_grid_ts=second_per_grid_ts, - context_len=inter_data.context_lens[seq_idx], - seq_len=inter_data.seq_lens[seq_idx], - audio_feature_lengths=audio_feature_lengths, - use_audio_in_video=use_audio_in_video, - ) + if supports_mrope(self.runner.model): + mrope_input_positions, mrope_position_delta = \ + self.runner.model.get_mrope_input_positions( + token_ids, + hf_config=hf_config, + image_grid_thw=image_grid_thw, + video_grid_thw=video_grid_thw, + second_per_grid_ts=second_per_grid_ts, + context_len=inter_data.context_lens[seq_idx], + seq_len=inter_data.seq_lens[seq_idx], + audio_feature_lengths=audio_feature_lengths, + use_audio_in_video=use_audio_in_video, + ) + mrope_input_positions = mrope_input_positions.tolist() + else: + mrope_input_positions, mrope_position_delta = \ + MRotaryEmbedding.get_input_positions( + token_ids, + hf_config=hf_config, + image_grid_thw=image_grid_thw, + video_grid_thw=video_grid_thw, + second_per_grid_ts=second_per_grid_ts, + context_len=inter_data.context_lens[seq_idx], + seq_len=inter_data.seq_lens[seq_idx], + audio_feature_lengths=audio_feature_lengths, + use_audio_in_video=use_audio_in_video, + ) seq_data.mrope_position_delta = mrope_position_delta inter_data.mrope_input_positions[ From 75fb112d80f680624dc99a00e02be6a45661f948 Mon Sep 17 00:00:00 2001 From: Wentao Ye <44945378+yewentao256@users.noreply.github.com> Date: Thu, 18 Sep 2025 15:32:24 -0400 Subject: [PATCH 125/518] [Bug] Fix `returned_lse` not Defined issue (#25106) Signed-off-by: yewentao256 Co-authored-by: Tyler Michael Smith --- vllm/v1/attention/backends/mla/cutlass_mla.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/vllm/v1/attention/backends/mla/cutlass_mla.py b/vllm/v1/attention/backends/mla/cutlass_mla.py index 21be17a750df..ae534f3207b5 100644 --- a/vllm/v1/attention/backends/mla/cutlass_mla.py +++ b/vllm/v1/attention/backends/mla/cutlass_mla.py @@ -206,12 +206,11 @@ def _sm100_cutlass_mla_decode( ) if H < MAX_HEADS: - # Extract the subsets of the outputs - returned_lse = lse[:, :H].contiguous( - ) if self.need_to_return_lse_for_decode else lse out = out[:, :H] + if self.need_to_return_lse_for_decode: + lse = lse[:, :H].contiguous() - return out, returned_lse + return out, lse def _forward_decode( self, From d2a30a2d933226d3951ad98cb5de0c74e2e64826 Mon Sep 17 00:00:00 2001 From: Wentao Ye <44945378+yewentao256@users.noreply.github.com> Date: Thu, 18 Sep 2025 15:38:37 -0400 Subject: [PATCH 126/518] [Bug] Fix torch Compilation Cache Hit Error (#25093) Signed-off-by: yewentao256 --- vllm/config/compilation.py | 12 ------------ vllm/platforms/cuda.py | 17 ++++++++++------- 2 files changed, 10 insertions(+), 19 deletions(-) diff --git a/vllm/config/compilation.py b/vllm/config/compilation.py index f8ccc2022261..3618f472e742 100644 --- a/vllm/config/compilation.py +++ b/vllm/config/compilation.py @@ -563,18 +563,6 @@ def set_splitting_ops_for_v1(self): self.cudagraph_mode = CUDAGraphMode.FULL self.splitting_ops = [] - if envs.VLLM_ALL2ALL_BACKEND == "deepep_high_throughput": - # exclude MoE dispatch/combine from capture by ensuring - # piecewise splitting includes them, so communication remains - # outside CUDA graphs while compute can still be graphed. - moe_ops = [ - "vllm.moe_forward", - "vllm.moe_forward_shared", - ] - for op in moe_ops: - if op not in self.splitting_ops: - self.splitting_ops.append(op) - def splitting_ops_contain_attention(self) -> bool: return self.splitting_ops is not None and all( op in self.splitting_ops for op in self._attention_ops) diff --git a/vllm/platforms/cuda.py b/vllm/platforms/cuda.py index 8e3436a9e73c..87d8f2b7481b 100644 --- a/vllm/platforms/cuda.py +++ b/vllm/platforms/cuda.py @@ -191,14 +191,17 @@ def check_and_update_config(cls, vllm_config: "VllmConfig") -> None: compilation_config = vllm_config.compilation_config if (envs.VLLM_ALL2ALL_BACKEND == "deepep_high_throughput" and parallel_config.data_parallel_size > 1 - and compilation_config.cudagraph_mode - not in [CUDAGraphMode.NONE, CUDAGraphMode.PIECEWISE]): + and compilation_config.cudagraph_mode != CUDAGraphMode.NONE): + # TODO: Piecewise Cuda graph might be enabled + # if torch compile cache key issue fixed + # See https://github.com/vllm-project/vllm/pull/25093 logger.info( - "Data Parallel with DeepEP high-throughput: using PIECEWISE " - "CUDA graphs and excluding MoE ops from capture. Set " - "VLLM_ALL2ALL_BACKEND=deepep_low_latency if you need MoE " - "graphs captured as well.") - compilation_config.cudagraph_mode = CUDAGraphMode.PIECEWISE + "Data Parallel: disabling cudagraphs since DP " + "with DeepEP high-throughput kernels are not CUDA Graph " + "compatible. The DeepEP low-latency kernels are CUDA Graph " + "compatible. Set the all_to_all backend to deepep_low_latency " + "to use those kernels instead.") + compilation_config.cudagraph_mode = CUDAGraphMode.NONE @classmethod def get_current_memory_usage(cls, From 1c3dad22ff92cbf84e0fa8ad1643c560a07944ea Mon Sep 17 00:00:00 2001 From: Woosuk Kwon Date: Thu, 18 Sep 2025 13:35:21 -0700 Subject: [PATCH 127/518] [V0 Deprecation] Remove unused async_timeout.py (#25190) Signed-off-by: Woosuk Kwon --- vllm/engine/async_timeout.py | 173 ----------------------------------- 1 file changed, 173 deletions(-) delete mode 100644 vllm/engine/async_timeout.py diff --git a/vllm/engine/async_timeout.py b/vllm/engine/async_timeout.py deleted file mode 100644 index 3b9c055160c1..000000000000 --- a/vllm/engine/async_timeout.py +++ /dev/null @@ -1,173 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project - -# Workaround for https://github.com/python/cpython/issues/86296 -# -# From https://github.com/aio-libs/async-timeout/blob/master/async_timeout/__init__.py -# Licensed under the Apache License (Apache-2.0) - -import asyncio -import enum -import sys -from types import TracebackType -from typing import Any, Optional, Type - -if sys.version_info[:2] >= (3, 11): - from asyncio import timeout as asyncio_timeout -else: - - class _State(enum.Enum): - INIT = "INIT" - ENTER = "ENTER" - TIMEOUT = "TIMEOUT" - EXIT = "EXIT" - - class Timeout: - # Internal class, please don't instantiate it directly - # Use timeout() and timeout_at() public factories instead. - # - # Implementation note: `async with timeout()` is preferred - # over `with timeout()`. - # While technically the Timeout class implementation - # doesn't need to be async at all, - # the `async with` statement explicitly points that - # the context manager should be used from async function context. - # - # This design allows to avoid many silly misusages. - # - # TimeoutError is raised immediately when scheduled - # if the deadline is passed. - # The purpose is to time out as soon as possible - # without waiting for the next await expression. - - __slots__ = ("_deadline", "_loop", "_state", "_timeout_handler") - - def __init__(self, deadline: Optional[float], - loop: asyncio.AbstractEventLoop) -> None: - self._loop = loop - self._state = _State.INIT - - self._timeout_handler = None # type: Optional[asyncio.Handle] - if deadline is None: - self._deadline = None # type: Optional[float] - else: - self.update(deadline) - - async def __aenter__(self) -> "Timeout": - self._do_enter() - return self - - async def __aexit__( - self, - exc_type: Optional[Type[BaseException]], - exc_val: Optional[BaseException], - exc_tb: Optional[TracebackType], - ) -> Optional[bool]: - self._do_exit(exc_type) - return None - - @property - def expired(self) -> bool: - """Is timeout expired during execution?""" - return self._state == _State.TIMEOUT - - @property - def deadline(self) -> Optional[float]: - return self._deadline - - def reject(self) -> None: - """Reject scheduled timeout if any.""" - # cancel is maybe better name but - # task.cancel() raises CancelledError in asyncio world. - if self._state not in (_State.INIT, _State.ENTER): - raise RuntimeError(f"invalid state {self._state.value}") - self._reject() - - def _reject(self) -> None: - if self._timeout_handler is not None: - self._timeout_handler.cancel() - self._timeout_handler = None - - def shift(self, delay: float) -> None: - """Advance timeout on delay seconds. - The delay can be negative. - Raise RuntimeError if shift is called when deadline is not scheduled - """ - deadline = self._deadline - if deadline is None: - raise RuntimeError( - "cannot shift timeout if deadline is not scheduled") - self.update(deadline + delay) - - def update(self, deadline: float) -> None: - """Set deadline to absolute value. - deadline argument points on the time in the same clock system - as loop.time(). - If new deadline is in the past the timeout is raised immediately. - Please note: it is not POSIX time but a time with - undefined starting base, e.g. the time of the system power on. - """ - if self._state == _State.EXIT: - raise RuntimeError( - "cannot reschedule after exit from context manager") - if self._state == _State.TIMEOUT: - raise RuntimeError("cannot reschedule expired timeout") - if self._timeout_handler is not None: - self._timeout_handler.cancel() - self._deadline = deadline - if self._state != _State.INIT: - self._reschedule() - - def _reschedule(self) -> None: - assert self._state == _State.ENTER - deadline = self._deadline - if deadline is None: - return - - now = self._loop.time() - if self._timeout_handler is not None: - self._timeout_handler.cancel() - - task = asyncio.current_task() - if deadline <= now: - self._timeout_handler = self._loop.call_soon( - self._on_timeout, task) - else: - self._timeout_handler = self._loop.call_at( - deadline, self._on_timeout, task) - - def _do_enter(self) -> None: - if self._state != _State.INIT: - raise RuntimeError(f"invalid state {self._state.value}") - self._state = _State.ENTER - self._reschedule() - - def _do_exit(self, exc_type: Optional[Type[BaseException]]) -> None: - if exc_type is asyncio.CancelledError and \ - self._state == _State.TIMEOUT: - self._timeout_handler = None - raise asyncio.TimeoutError - # timeout has not expired - self._state = _State.EXIT - self._reject() - return None - - def _on_timeout(self, task: "Optional[asyncio.Task[Any]]") -> None: - if task: - task.cancel() - self._state = _State.TIMEOUT - # drop the reference early - self._timeout_handler = None - - def asyncio_timeout(delay: Optional[float]) -> Timeout: - """timeout context manager. - Useful in cases when you want to apply timeout logic around block - of code or in cases when asyncio.wait_for is not suitable. For example: - >>> async with timeout(0.001): - ... async with aiohttp.get('https://github.com') as r: - ... await r.text() - delay - value in seconds or None to disable timeout logic - """ - loop = asyncio.get_running_loop() - deadline = loop.time() + delay if delay is not None else None - return Timeout(deadline, loop) From a53ad626d629e79264f0a6ab6820a4b547f3b1c4 Mon Sep 17 00:00:00 2001 From: Or Ozeri Date: Thu, 18 Sep 2025 23:53:52 +0300 Subject: [PATCH 128/518] [KV offload][1b/N] rename offloading to kv_offload (#25191) Signed-off-by: Or Ozeri --- .buildkite/test-pipeline.yaml | 2 +- tests/v1/{offloading => kv_offload}/test_worker.py | 4 ++-- vllm/v1/{offloading => kv_offload}/abstract.py | 0 vllm/v1/{offloading => kv_offload}/mediums.py | 2 +- vllm/v1/{offloading => kv_offload}/worker/worker.py | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) rename tests/v1/{offloading => kv_offload}/test_worker.py (97%) rename vllm/v1/{offloading => kv_offload}/abstract.py (100%) rename vllm/v1/{offloading => kv_offload}/mediums.py (93%) rename vllm/v1/{offloading => kv_offload}/worker/worker.py (98%) diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index 5fd08296625a..c42ec4f2503d 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -280,7 +280,7 @@ steps: # split the test to avoid interference - pytest -v -s v1/core - pytest -v -s v1/executor - - pytest -v -s v1/offloading + - pytest -v -s v1/kv_offload - pytest -v -s v1/sample - pytest -v -s v1/logits_processors - pytest -v -s v1/worker diff --git a/tests/v1/offloading/test_worker.py b/tests/v1/kv_offload/test_worker.py similarity index 97% rename from tests/v1/offloading/test_worker.py rename to tests/v1/kv_offload/test_worker.py index 2391b565773a..6cf8aa0875d6 100644 --- a/tests/v1/offloading/test_worker.py +++ b/tests/v1/kv_offload/test_worker.py @@ -1,7 +1,7 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -from vllm.v1.offloading.abstract import LoadStoreSpec -from vllm.v1.offloading.worker.worker import (OffloadingHandler, +from vllm.v1.kv_offload.abstract import LoadStoreSpec +from vllm.v1.kv_offload.worker.worker import (OffloadingHandler, OffloadingWorker, TransferResult, TransferSpec) diff --git a/vllm/v1/offloading/abstract.py b/vllm/v1/kv_offload/abstract.py similarity index 100% rename from vllm/v1/offloading/abstract.py rename to vllm/v1/kv_offload/abstract.py diff --git a/vllm/v1/offloading/mediums.py b/vllm/v1/kv_offload/mediums.py similarity index 93% rename from vllm/v1/offloading/mediums.py rename to vllm/v1/kv_offload/mediums.py index 5a1887848c9f..896281917845 100644 --- a/vllm/v1/offloading/mediums.py +++ b/vllm/v1/kv_offload/mediums.py @@ -4,7 +4,7 @@ import numpy as np -from vllm.v1.offloading.abstract import LoadStoreSpec +from vllm.v1.kv_offload.abstract import LoadStoreSpec class BlockIDsLoadStoreSpec(LoadStoreSpec, ABC): diff --git a/vllm/v1/offloading/worker/worker.py b/vllm/v1/kv_offload/worker/worker.py similarity index 98% rename from vllm/v1/offloading/worker/worker.py rename to vllm/v1/kv_offload/worker/worker.py index d2c2045d1f1f..b7a52a088fb9 100644 --- a/vllm/v1/offloading/worker/worker.py +++ b/vllm/v1/kv_offload/worker/worker.py @@ -3,7 +3,7 @@ from abc import ABC, abstractmethod from vllm.logger import init_logger -from vllm.v1.offloading.abstract import LoadStoreSpec +from vllm.v1.kv_offload.abstract import LoadStoreSpec # a single transfer spec (src_blocks_spec, dst_blocks_spec) TransferSpec = tuple[LoadStoreSpec, LoadStoreSpec] From 9fac6aa30b669de75d8718164cd99676d3530e7d Mon Sep 17 00:00:00 2001 From: Lucas Wilkinson Date: Thu, 18 Sep 2025 17:26:28 -0400 Subject: [PATCH 129/518] [BugFix] Fix DeepGEMM warmup, no m.weight_scale_inv (#25206) Signed-off-by: Lucas Wilkinson --- vllm/model_executor/warmup/deep_gemm_warmup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/model_executor/warmup/deep_gemm_warmup.py b/vllm/model_executor/warmup/deep_gemm_warmup.py index a636a714145c..4d1829cd228c 100644 --- a/vllm/model_executor/warmup/deep_gemm_warmup.py +++ b/vllm/model_executor/warmup/deep_gemm_warmup.py @@ -36,7 +36,7 @@ def _extract_data_from_linear_base_module( assert m.quant_method.quant_config is not None w = m.weight - ws = m.weight_scale_inv + ws = m.weight_scale quant_block_size = m.quant_method.quant_config.weight_block_size assert isinstance(w, torch.Tensor) From 9a4600e4dcbbd13988c31d5198d3ab8b4172ecca Mon Sep 17 00:00:00 2001 From: Andrew Sansom Date: Thu, 18 Sep 2025 19:03:09 -0500 Subject: [PATCH 130/518] [CORE] Prompt Embeddings Support for v1 Engine (#24278) Signed-off-by: Andrew Sansom Signed-off-by: Andrew Sansom Co-authored-by: Cyrus Leung --- .../test_basic_correctness.py | 10 -- .../test_completion_with_prompt_embeds.py | 1 - .../models/language/generation/test_common.py | 6 -- vllm/engine/arg_utils.py | 24 +++-- vllm/entrypoints/openai/protocol.py | 2 +- vllm/utils/__init__.py | 27 +++++ vllm/v1/core/sched/output.py | 24 +++-- vllm/v1/engine/__init__.py | 3 +- vllm/v1/engine/detokenizer.py | 31 ++++-- vllm/v1/engine/output_processor.py | 25 ++++- vllm/v1/engine/processor.py | 38 +++++-- vllm/v1/request.py | 15 ++- vllm/v1/sample/logits_processor/__init__.py | 2 +- vllm/v1/sample/logits_processor/builtin.py | 6 +- vllm/v1/sample/logits_processor/interface.py | 2 +- vllm/v1/serial_utils.py | 2 +- vllm/v1/worker/gpu_input_batch.py | 55 +++++++++-- vllm/v1/worker/gpu_model_runner.py | 99 ++++++++++++++++++- vllm/v1/worker/tpu_input_batch.py | 6 +- vllm/v1/worker/tpu_model_runner.py | 1 + 20 files changed, 304 insertions(+), 75 deletions(-) diff --git a/tests/basic_correctness/test_basic_correctness.py b/tests/basic_correctness/test_basic_correctness.py index fba18f197074..24b1c9a93126 100644 --- a/tests/basic_correctness/test_basic_correctness.py +++ b/tests/basic_correctness/test_basic_correctness.py @@ -76,11 +76,6 @@ def test_models( model_executor: str, enable_prompt_embeds: bool, ) -> None: - - if enable_prompt_embeds and envs.is_set( - "VLLM_USE_V1") and envs.VLLM_USE_V1: - pytest.skip("enable_prompt_embeds is not supported in v1.") - if not envs.VLLM_USE_V1: if async_scheduling: pytest.skip("async_scheduling only supported in v1.") @@ -164,11 +159,6 @@ def test_models_distributed( extra_env: dict[str, str], enable_prompt_embeds: bool, ) -> None: - - if enable_prompt_embeds and envs.is_set( - "VLLM_USE_V1") and envs.VLLM_USE_V1: - pytest.skip("enable_prompt_embeds is not supported in v1.") - if test_suite != TARGET_TEST_SUITE: pytest.skip(f"Skip test for {test_suite}") diff --git a/tests/entrypoints/openai/test_completion_with_prompt_embeds.py b/tests/entrypoints/openai/test_completion_with_prompt_embeds.py index 3d56291bc793..0e3fc82f0c03 100644 --- a/tests/entrypoints/openai/test_completion_with_prompt_embeds.py +++ b/tests/entrypoints/openai/test_completion_with_prompt_embeds.py @@ -36,7 +36,6 @@ def default_server_args() -> list[str]: "--enforce-eager", # Prompt Embeds server args "--enable-prompt-embeds", - "--no-enable-chunked-prefill", ] diff --git a/tests/models/language/generation/test_common.py b/tests/models/language/generation/test_common.py index a5aa1e3f4974..c14e71cbdb96 100644 --- a/tests/models/language/generation/test_common.py +++ b/tests/models/language/generation/test_common.py @@ -125,12 +125,6 @@ def test_models(hf_runner, vllm_runner, example_prompts, model: str, # in parts of the operators pytest.skip(f"Skipping '{model}' model test with AITER kernel.") - # Note: can be removed when - # https://github.com/vllm-project/vllm/pull/24278 finished - if current_platform.is_cpu() and use_prompt_embeds: - pytest.skip("Skipping use_prompt_embeds=True with " - "V1-only CPU backend.") - with hf_runner(model) as hf_model: hf_outputs = hf_model.generate_greedy_logprobs_limit( example_prompts, max_tokens, num_logprobs) diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index fb5beab77b27..63282c425350 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -1513,12 +1513,6 @@ def _is_v1_supported_oracle(self, model_config: ModelConfig) -> bool: recommend_to_remove=False) return False - # No text embedding inputs so far. - if self.enable_prompt_embeds: - _raise_or_fallback(feature_name="--enable-prompt-embeds", - recommend_to_remove=False) - return False - # No Mamba or Encoder-Decoder so far. if not model_config.is_v1_compatible: _raise_or_fallback(feature_name=model_config.architectures, @@ -1651,6 +1645,13 @@ def _set_default_args_v0(self, model_config: ModelConfig) -> None: "models in V0 and has been disabled.") self.enable_prefix_caching = False + if self.enable_prompt_embeds: + logger.warning( + "--enable-prompt-embeds and --enable-prefix-caching " + "are not supported together in V0. Prefix caching has " + "been disabled.") + self.enable_prefix_caching = False + # Set max_num_seqs to 256 for VLLM_V0. if self.max_num_seqs is None: self.max_num_seqs = 256 @@ -1664,6 +1665,17 @@ def _set_default_args_v1(self, usage_context: UsageContext, # For pooling tasks the default is False if model_config.runner_type != "pooling": self.enable_chunked_prefill = True + + # TODO: When prefix caching supports prompt embeds inputs, this + # check can be removed. + if (self.enable_prompt_embeds + and self.enable_prefix_caching is not False): + logger.warning( + "--enable-prompt-embeds and --enable-prefix-caching " + "are not supported together in V1. Prefix caching has " + "been disabled.") + self.enable_prefix_caching = False + if self.enable_prefix_caching is None: self.enable_prefix_caching = True else: diff --git a/vllm/entrypoints/openai/protocol.py b/vllm/entrypoints/openai/protocol.py index 7ad8e73d89d5..6b54511a66f3 100644 --- a/vllm/entrypoints/openai/protocol.py +++ b/vllm/entrypoints/openai/protocol.py @@ -973,7 +973,6 @@ class CompletionRequest(OpenAIBaseModel): # https://platform.openai.com/docs/api-reference/completions/create model: Optional[str] = None prompt: Optional[Union[list[int], list[list[int]], str, list[str]]] = None - prompt_embeds: Optional[Union[bytes, list[bytes]]] = None best_of: Optional[int] = None echo: Optional[bool] = False frequency_penalty: Optional[float] = 0.0 @@ -1009,6 +1008,7 @@ class CompletionRequest(OpenAIBaseModel): # --8<-- [end:completion-sampling-params] # --8<-- [start:completion-extra-params] + prompt_embeds: Optional[Union[bytes, list[bytes]]] = None add_special_tokens: bool = Field( default=True, description=( diff --git a/vllm/utils/__init__.py b/vllm/utils/__init__.py index f13381ecd9ff..d4013a69e99f 100644 --- a/vllm/utils/__init__.py +++ b/vllm/utils/__init__.py @@ -3443,3 +3443,30 @@ def decorate_logs(process_name: Optional[str] = None) -> None: pid = os.getpid() _add_prefix(sys.stdout, process_name, pid) _add_prefix(sys.stderr, process_name, pid) + + +def length_from_prompt_token_ids_or_embeds( + prompt_token_ids: Optional[list[int]], + prompt_embeds: Optional[torch.Tensor], +) -> int: + """Calculate the request length (in number of tokens) give either + prompt_token_ids or prompt_embeds. + """ + prompt_token_len = None if prompt_token_ids is None else len( + prompt_token_ids) + prompt_embeds_len = \ + None if prompt_embeds is None else len(prompt_embeds) + + if prompt_token_len is None: + if prompt_embeds_len is None: + raise ValueError( + "Neither prompt_token_ids nor prompt_embeds were defined.") + return prompt_embeds_len + else: + if (prompt_embeds_len is not None + and prompt_embeds_len != prompt_token_len): + raise ValueError( + "Prompt token ids and prompt embeds had different lengths" + f" prompt_token_ids={prompt_token_len}" + f" prompt_embeds={prompt_embeds_len}") + return prompt_token_len diff --git a/vllm/v1/core/sched/output.py b/vllm/v1/core/sched/output.py index 3ec5b91bf286..209fc2a4404f 100644 --- a/vllm/v1/core/sched/output.py +++ b/vllm/v1/core/sched/output.py @@ -11,6 +11,7 @@ if TYPE_CHECKING: import numpy as np import numpy.typing as npt + import torch from vllm.distributed.kv_transfer.kv_connector.v1.base import ( KVConnectorMetadata) @@ -26,13 +27,14 @@ class NewRequestData: req_id: str - prompt_token_ids: list[int] + prompt_token_ids: Optional[list[int]] mm_features: list[MultiModalFeatureSpec] sampling_params: Optional[SamplingParams] pooling_params: Optional[PoolingParams] block_ids: tuple[list[int], ...] num_computed_tokens: int lora_request: Optional[LoRARequest] + prompt_embeds: Optional[torch.Tensor] = None @classmethod def from_request( @@ -49,9 +51,12 @@ def from_request( block_ids=block_ids, num_computed_tokens=request.num_computed_tokens, lora_request=request.lora_request, + prompt_embeds=request.prompt_embeds, ) - def __repr__(self): + def __repr__(self) -> str: + prompt_embeds_shape = (self.prompt_embeds.shape + if self.prompt_embeds else None) return (f"NewRequestData(" f"req_id={self.req_id}," f"prompt_token_ids={self.prompt_token_ids}," @@ -59,19 +64,26 @@ def __repr__(self): f"sampling_params={self.sampling_params}," f"block_ids={self.block_ids}," f"num_computed_tokens={self.num_computed_tokens}," - f"lora_request={self.lora_request}" + f"lora_request={self.lora_request}," + f"prompt_embeds_shape={prompt_embeds_shape}" ")") # Version of __repr__ with the prompt data obfuscated - def anon_repr(self): + def anon_repr(self) -> str: + prompt_token_ids_len = len( + self.prompt_token_ids + ) if self.prompt_token_ids is not None else None + prompt_embeds_shape = (self.prompt_embeds.shape + if self.prompt_embeds else None) return (f"NewRequestData(" f"req_id={self.req_id}," - f"prompt_token_ids_len={len(self.prompt_token_ids)}," + f"prompt_token_ids_len={prompt_token_ids_len}," f"mm_features={self.mm_features}," f"sampling_params={self.sampling_params}," f"block_ids={self.block_ids}," f"num_computed_tokens={self.num_computed_tokens}," - f"lora_request={self.lora_request}" + f"lora_request={self.lora_request}," + f"prompt_embeds_shape={prompt_embeds_shape}" ")") diff --git a/vllm/v1/engine/__init__.py b/vllm/v1/engine/__init__.py index dec4abec519b..345f5a464c2c 100644 --- a/vllm/v1/engine/__init__.py +++ b/vllm/v1/engine/__init__.py @@ -47,7 +47,7 @@ class EngineCoreRequest( gc=False): # type: ignore[call-arg] request_id: str - prompt_token_ids: list[int] + prompt_token_ids: Optional[list[int]] mm_features: Optional[list[MultiModalFeatureSpec]] sampling_params: Optional[SamplingParams] pooling_params: Optional[PoolingParams] @@ -56,6 +56,7 @@ class EngineCoreRequest( lora_request: Optional[LoRARequest] cache_salt: Optional[str] data_parallel_rank: Optional[int] + prompt_embeds: Optional[torch.Tensor] = None # Index of the client, used to ensure outputs are sent back to the same # client for this request when scaling out the front-end. diff --git a/vllm/v1/engine/detokenizer.py b/vllm/v1/engine/detokenizer.py index cf4b06db843b..8aa36d6a439c 100644 --- a/vllm/v1/engine/detokenizer.py +++ b/vllm/v1/engine/detokenizer.py @@ -13,6 +13,7 @@ from vllm.logger import init_logger from vllm.transformers_utils.detokenizer_utils import ( AnyTokenizer, convert_prompt_ids_to_tokens, detokenize_incrementally) +from vllm.utils import length_from_prompt_token_ids_or_embeds from vllm.v1.engine import EngineCoreRequest logger = init_logger(__name__) @@ -179,11 +180,12 @@ def __init__(self, tokenizer: PreTrainedTokenizerFast, self.tokenizer: Tokenizer = tokenizer._tokenizer # Find a safe place to start. - prompt_suffix = request.prompt_token_ids + prompt_token_ids = request.prompt_token_ids or [] + prompt_suffix = prompt_token_ids prompt_len = len(prompt_suffix) if prompt_len > 4: for i in range(4, min(prompt_len + 1, 24)): - suffix = request.prompt_token_ids[-i:] + suffix = prompt_token_ids[-i:] if '�' not in self.tokenizer.decode(suffix): prompt_suffix = suffix break @@ -260,16 +262,25 @@ def __init__(self, tokenizer: AnyTokenizer, request: EngineCoreRequest): params = request.sampling_params assert params is not None + self.prompt_len = length_from_prompt_token_ids_or_embeds( + request.prompt_token_ids, request.prompt_embeds) + # Metadata for incremental detokenization. - self.tokens, self.prefix_offset, self.read_offset = ( - convert_prompt_ids_to_tokens( - tokenizer=tokenizer, - prompt_ids=request.prompt_token_ids, - skip_special_tokens=params.skip_special_tokens, - )) + if request.prompt_token_ids is not None: + self.tokens, self.prefix_offset, self.read_offset = ( + convert_prompt_ids_to_tokens( + tokenizer=tokenizer, + prompt_ids=request.prompt_token_ids, + skip_special_tokens=params.skip_special_tokens, + )) + else: + # Prompt embedding requests cannot be detokenized, in general. + self.tokens = [""] * self.prompt_len + self.prefix_offset = 0 + self.read_offest = 0 - self.token_ids.extend(request.prompt_token_ids) - self.prompt_len = len(request.prompt_token_ids) + self.token_ids.extend(request.prompt_token_ids + or [0] * self.prompt_len) self.skip_special_tokens = params.skip_special_tokens self.spaces_between_special_tokens = ( diff --git a/vllm/v1/engine/output_processor.py b/vllm/v1/engine/output_processor.py index 5dad63988daa..c17dc3e204ec 100644 --- a/vllm/v1/engine/output_processor.py +++ b/vllm/v1/engine/output_processor.py @@ -14,6 +14,7 @@ from vllm.tracing import (SpanAttributes, SpanKind, Tracer, extract_trace_context) from vllm.transformers_utils.tokenizer import AnyTokenizer +from vllm.utils import length_from_prompt_token_ids_or_embeds from vllm.v1.engine import EngineCoreOutput, EngineCoreRequest, FinishReason from vllm.v1.engine.detokenizer import IncrementalDetokenizer from vllm.v1.engine.logprobs import LogprobsProcessor @@ -86,7 +87,8 @@ def __init__( lora_name: Optional[str], output_kind: RequestOutputKind, prompt: Optional[str], - prompt_token_ids: list[int], + prompt_token_ids: Optional[list[int]], + prompt_embeds: Optional[torch.Tensor], logprobs_processor: Optional[LogprobsProcessor], detokenizer: Optional[IncrementalDetokenizer], max_tokens_param: Optional[int], @@ -104,7 +106,9 @@ def __init__( self.output_kind = output_kind self.prompt = prompt self.prompt_token_ids = prompt_token_ids - self.prompt_len = len(prompt_token_ids) + self.prompt_embeds = prompt_embeds + self.prompt_len = length_from_prompt_token_ids_or_embeds( + self.prompt_token_ids, self.prompt_embeds) self.logprobs_processor = logprobs_processor self.detokenizer = detokenizer self.max_tokens_param = max_tokens_param @@ -165,6 +169,7 @@ def from_new_request( output_kind=output_kind, prompt=prompt, prompt_token_ids=request.prompt_token_ids, + prompt_embeds=request.prompt_embeds, logprobs_processor=logprobs_processor, detokenizer=detokenizer, max_tokens_param=max_tokens_param, @@ -223,6 +228,8 @@ def _new_request_output( first_output = outputs[0] if isinstance(first_output, PoolingOutput): assert len(outputs) == 1 + # Prompt embeddings are currently not supported by pooling requests. + assert self.prompt_token_ids is not None return PoolingRequestOutput( request_id=request_id, outputs=first_output, @@ -236,10 +243,15 @@ def _new_request_output( else: prompt_logprobs = self.logprobs_processor.prompt_logprobs + # If prompt embeds were used, put placeholder prompt token ids + prompt_token_ids = self.prompt_token_ids + if prompt_token_ids is None and self.prompt_embeds is not None: + prompt_token_ids = [0] * len(self.prompt_embeds) + return RequestOutput( request_id=request_id, prompt=self.prompt, - prompt_token_ids=self.prompt_token_ids, + prompt_token_ids=prompt_token_ids, prompt_logprobs=prompt_logprobs, outputs=cast(list[CompletionOutput], outputs), finished=finished, @@ -469,6 +481,8 @@ def do_tracing(self, engine_core_output: EngineCoreOutput, arrival_time_nano_seconds = int(req_state.stats.arrival_time * 1e9) trace_context = extract_trace_context(engine_core_output.trace_headers) + prompt_length = length_from_prompt_token_ids_or_embeds( + req_state.prompt_token_ids, req_state.prompt_embeds) with (self.tracer.start_as_current_span( "llm_request", kind=SpanKind.SERVER, @@ -488,7 +502,7 @@ def do_tracing(self, engine_core_output: EngineCoreOutput, span.set_attribute(SpanAttributes.GEN_AI_LATENCY_TIME_IN_QUEUE, queued_time) span.set_attribute(SpanAttributes.GEN_AI_USAGE_PROMPT_TOKENS, - len(req_state.prompt_token_ids)) + prompt_length) span.set_attribute(SpanAttributes.GEN_AI_USAGE_COMPLETION_TOKENS, metrics.num_generation_tokens) span.set_attribute( @@ -544,7 +558,8 @@ def _update_stats_from_finished(self, req_state: RequestState, assert req_state.stats is not None iteration_stats.update_from_finished_request( finish_reason=finish_reason, - num_prompt_tokens=len(req_state.prompt_token_ids), + num_prompt_tokens=length_from_prompt_token_ids_or_embeds( + req_state.prompt_token_ids, req_state.prompt_embeds), max_tokens_param=req_state.max_tokens_param, req_stats=req_state.stats) self.lora_states.finish_request(req_state) diff --git a/vllm/v1/engine/processor.py b/vllm/v1/engine/processor.py index 71f539583a1b..507e2cd3223f 100644 --- a/vllm/v1/engine/processor.py +++ b/vllm/v1/engine/processor.py @@ -19,6 +19,7 @@ from vllm.pooling_params import PoolingParams from vllm.sampling_params import SamplingParams from vllm.transformers_utils.tokenizer import AnyTokenizer +from vllm.utils import length_from_prompt_token_ids_or_embeds from vllm.v1.engine import EngineCoreRequest from vllm.v1.structured_output.backend_guidance import ( validate_guidance_grammar) @@ -390,6 +391,16 @@ def process_inputs( self._validate_model_inputs(processed_inputs) encoder_inputs, decoder_inputs = split_enc_dec_inputs(processed_inputs) + # Mypy does not always properly infer the types of some elements of + # discriminated unions of TypedDicts, because of how it handles + # inheritance of TypedDict. If we explicitly extract the items we want + # we can avoid type errors from using `dict.get` later in the method. + prompt_str: Optional[str] = None if decoder_inputs[ + "type"] == "embeds" else decoder_inputs.get("prompt") + prompt_token_ids = decoder_inputs[ + "prompt_token_ids"] if decoder_inputs["type"] != "embeds" else None + prompt_embeds = decoder_inputs["prompt_embeds"] if decoder_inputs[ + "type"] == "embeds" else None sampling_params = None pooling_params = None @@ -398,9 +409,10 @@ def process_inputs( sampling_params = params.clone() # If unset max tokens, then generate up to the max_model_len. if sampling_params.max_tokens is None: - sampling_params.max_tokens = ( - self.model_config.max_model_len - - len(decoder_inputs["prompt_token_ids"])) + seq_len = length_from_prompt_token_ids_or_embeds( + prompt_token_ids, prompt_embeds) + sampling_params.max_tokens = \ + self.model_config.max_model_len - seq_len sampling_params.update_from_generation_config( self.generation_config_fields, eos_token_id) if self.tokenizer is not None: @@ -430,9 +442,10 @@ def process_inputs( identifier=decoder_mm_hashes[modality][idx], mm_position=decoder_mm_positions[modality][idx])) - return decoder_inputs.get("prompt"), EngineCoreRequest( + return prompt_str, EngineCoreRequest( request_id=request_id, - prompt_token_ids=decoder_inputs["prompt_token_ids"], + prompt_token_ids=prompt_token_ids, + prompt_embeds=prompt_embeds, mm_features=mm_features, sampling_params=sampling_params, pooling_params=pooling_params, @@ -461,10 +474,17 @@ def _validate_model_input( ): model_config = self.model_config - prompt_ids = prompt_inputs["prompt_token_ids"] + prompt_ids = None if prompt_inputs[ + "type"] == "embeds" else prompt_inputs["prompt_token_ids"] + prompt_embeds = prompt_inputs["prompt_embeds"] if prompt_inputs[ + "type"] == "embeds" else None + prompt_len = length_from_prompt_token_ids_or_embeds( + prompt_ids, prompt_embeds) if not prompt_ids: if prompt_type == "encoder" and model_config.is_multimodal_model: pass # Mllama may have empty encoder inputs for text-only data + elif prompt_inputs["type"] == "embeds": + pass # Prompt embeds should not have prompt_ids. else: raise ValueError(f"The {prompt_type} prompt cannot be empty") @@ -472,7 +492,7 @@ def _validate_model_input( tokenizer = None else: tokenizer = self.tokenizer - max_input_id = max(prompt_ids, default=0) + max_input_id = max(prompt_ids or [], default=0) # NOTE: tokenizer.max_token_id is the tokenizer’s vocab size while # self.model_config.get_vocab_size() is the model’s vocab size. @@ -490,7 +510,7 @@ def _validate_model_input( f"Token id {max_input_id} is out of vocabulary") max_prompt_len = self.model_config.max_model_len - if len(prompt_ids) > max_prompt_len: + if prompt_len > max_prompt_len: if prompt_type == "encoder" and model_config.is_multimodal_model: mm_registry = self.input_preprocessor.mm_registry mm_processor = mm_registry.create_processor( @@ -514,7 +534,7 @@ def _validate_model_input( "number of text tokens.") raise ValueError( - f"The {prompt_type} prompt (length {len(prompt_ids)}) is " + f"The {prompt_type} prompt (length {prompt_len}) is " f"longer than the maximum model length of {max_prompt_len}. " f"{suggestion}") diff --git a/vllm/v1/request.py b/vllm/v1/request.py index 145af788d237..ff10fa00c1cf 100644 --- a/vllm/v1/request.py +++ b/vllm/v1/request.py @@ -7,9 +7,12 @@ from functools import partial from typing import TYPE_CHECKING, Any, Callable, Optional, Union +import torch + from vllm.multimodal.inputs import MultiModalFeatureSpec from vllm.pooling_params import PoolingParams from vllm.sampling_params import SamplingParams +from vllm.utils import length_from_prompt_token_ids_or_embeds from vllm.v1.engine import (EngineCoreEvent, EngineCoreEventType, EngineCoreRequest, FinishReason) from vllm.v1.structured_output.request import StructuredOutputRequest @@ -25,12 +28,13 @@ class Request: def __init__( self, request_id: str, - prompt_token_ids: list[int], + prompt_token_ids: Optional[list[int]], sampling_params: Optional[SamplingParams], pooling_params: Optional[PoolingParams], eos_token_id: Optional[int], client_index: int = 0, arrival_time: Optional[float] = None, + prompt_embeds: Optional[torch.Tensor] = None, mm_features: Optional[list[MultiModalFeatureSpec]] = None, lora_request: Optional["LoRARequest"] = None, structured_output_request: Optional["StructuredOutputRequest"] = None, @@ -79,9 +83,13 @@ def __init__( "sampling_params and pooling_params can't both be unset") self.prompt_token_ids = prompt_token_ids - self.num_prompt_tokens = len(self.prompt_token_ids) + self.prompt_embeds = prompt_embeds + self.num_prompt_tokens = length_from_prompt_token_ids_or_embeds( + prompt_token_ids, prompt_embeds) self._output_token_ids: list[int] = [] - self._all_token_ids: list[int] = self.prompt_token_ids.copy() + self._all_token_ids: list[int] = self.prompt_token_ids.copy( + ) if self.prompt_token_ids is not None else [0 + ] * self.num_prompt_tokens self.num_output_placeholders = 0 # Used in async scheduling. self.spec_token_ids: list[int] = [] self.num_computed_tokens = 0 @@ -123,6 +131,7 @@ def from_engine_core_request( request_id=request.request_id, client_index=request.client_index, prompt_token_ids=request.prompt_token_ids, + prompt_embeds=request.prompt_embeds, mm_features=request.mm_features, sampling_params=request.sampling_params, pooling_params=request.pooling_params, diff --git a/vllm/v1/sample/logits_processor/__init__.py b/vllm/v1/sample/logits_processor/__init__.py index df944873bcaf..10cad5b53071 100644 --- a/vllm/v1/sample/logits_processor/__init__.py +++ b/vllm/v1/sample/logits_processor/__init__.py @@ -243,7 +243,7 @@ def new_req_logits_processor( def _new_state( self, params: SamplingParams, - prompt_ids: list[int], + prompt_ids: Optional[list[int]], output_ids: list[int], ) -> Optional[partial[torch.Tensor]]: """Return state representation for new request diff --git a/vllm/v1/sample/logits_processor/builtin.py b/vllm/v1/sample/logits_processor/builtin.py index 60f9c0bdb631..fc655d993cb4 100644 --- a/vllm/v1/sample/logits_processor/builtin.py +++ b/vllm/v1/sample/logits_processor/builtin.py @@ -187,7 +187,8 @@ def is_argmax_invariant(self) -> bool: @staticmethod def add_request( - params: SamplingParams, _: list[int], output_tok_ids: list[int] + params: SamplingParams, _: Optional[list[int]], + output_tok_ids: list[int] ) -> Optional[tuple[int, Sequence[int], set[int]]]: min_tokens = params.min_tokens if not min_tokens or len(output_tok_ids) >= min_tokens: @@ -234,7 +235,8 @@ def apply(self, logits: torch.Tensor) -> torch.Tensor: def process_dict_updates( req_entries: dict[int, T], batch_update: Optional[BatchUpdate], - new_state: Callable[[SamplingParams, list[int], list[int]], Optional[T]] + new_state: Callable[[SamplingParams, Optional[list[int]], list[int]], + Optional[T]] ) -> bool: """Utility function to update dict state for sparse LogitsProcessors.""" diff --git a/vllm/v1/sample/logits_processor/interface.py b/vllm/v1/sample/logits_processor/interface.py index 04027359909a..a84afc2f347a 100644 --- a/vllm/v1/sample/logits_processor/interface.py +++ b/vllm/v1/sample/logits_processor/interface.py @@ -26,7 +26,7 @@ class MoveDirectionality(Enum): # (index, params, prompt_tok_ids, output_tok_ids) tuples for new # requests added to the batch. -AddedRequest = tuple[int, SamplingParams, list[int], list[int]] +AddedRequest = tuple[int, SamplingParams, Optional[list[int]], list[int]] # (index 1, index 2, directionality) tuples representing # one-way moves or two-way swaps of requests in batch diff --git a/vllm/v1/serial_utils.py b/vllm/v1/serial_utils.py index c8375d6f1551..50c1470c67ed 100644 --- a/vllm/v1/serial_utils.py +++ b/vllm/v1/serial_utils.py @@ -174,7 +174,7 @@ def _encode_tensor( ) -> tuple[str, tuple[int, ...], Union[int, memoryview]]: assert self.aux_buffers is not None # view the tensor as a contiguous 1D array of bytes - arr = obj.flatten().contiguous().view(torch.uint8).numpy() + arr = obj.flatten().contiguous().cpu().view(torch.uint8).numpy() if obj.nbytes < self.size_threshold: # Smaller tensors are encoded inline, just like ndarrays. data = msgpack.Ext(CUSTOM_TYPE_RAW_VIEW, arr.data) diff --git a/vllm/v1/worker/gpu_input_batch.py b/vllm/v1/worker/gpu_input_batch.py index 6717622efb80..79a392337574 100644 --- a/vllm/v1/worker/gpu_input_batch.py +++ b/vllm/v1/worker/gpu_input_batch.py @@ -13,7 +13,7 @@ from vllm.multimodal.inputs import MultiModalFeatureSpec, MultiModalKwargsItems from vllm.pooling_params import PoolingParams from vllm.sampling_params import SamplingParams, SamplingType -from vllm.utils import swap_dict_values +from vllm.utils import length_from_prompt_token_ids_or_embeds, swap_dict_values from vllm.v1.outputs import LogprobsTensors from vllm.v1.pool.metadata import PoolingMetadata from vllm.v1.sample.logits_processor import (BatchUpdateBuilder, @@ -29,7 +29,7 @@ class CachedRequestState: req_id: str - prompt_token_ids: list[int] + prompt_token_ids: Optional[list[int]] mm_features: list[MultiModalFeatureSpec] sampling_params: Optional[SamplingParams] pooling_params: Optional[PoolingParams] @@ -43,9 +43,11 @@ class CachedRequestState: mrope_position_delta: Optional[int] = None lora_request: Optional[LoRARequest] = None + prompt_embeds: Optional[torch.Tensor] = None def __post_init__(self): - self.num_prompt_tokens = len(self.prompt_token_ids) + self.num_prompt_tokens = length_from_prompt_token_ids_or_embeds( + self.prompt_token_ids, self.prompt_embeds) @property def num_tokens(self) -> int: @@ -63,6 +65,10 @@ def mm_inputs(self) -> list[MultiModalKwargsItems]: def get_token_id(self, idx: int) -> int: if idx < self.num_prompt_tokens: + if self.prompt_token_ids is None: + raise ValueError( + f"Tried to access token index {idx}, but that token was " + "provided via prompt_embeds, and its ID is unknown.") return self.prompt_token_ids[idx] elif idx - self.num_prompt_tokens < len(self.output_token_ids): return self.output_token_ids[idx - self.num_prompt_tokens] @@ -109,6 +115,14 @@ def __init__( pin_memory=False, ) self.token_ids_cpu = self.token_ids_cpu_tensor.numpy() + self.is_token_ids = torch.zeros((max_num_reqs, max_model_len), + device="cpu", + dtype=bool, + pin_memory=False) + # Store prompt embeddings per request to avoid OOM from large upfront + # allocation if max_model_len is big. + # Maps req_index -> tensor of shape (num_prompt_tokens, hidden_size) + self.req_prompt_embeds: dict[int, torch.Tensor] = {} self.num_tokens = np.zeros(max_num_reqs, dtype=np.int32) self.num_tokens_no_spec = np.zeros(max_num_reqs, dtype=np.int32) self.num_prompt_tokens = np.zeros(max_num_reqs, dtype=np.int32) @@ -310,15 +324,23 @@ def add_request( self.req_id_to_index[req_id] = req_index # Copy the prompt token ids and output token ids. - num_prompt_tokens = len(request.prompt_token_ids) + num_prompt_tokens = length_from_prompt_token_ids_or_embeds( + request.prompt_token_ids, request.prompt_embeds) self.num_prompt_tokens[req_index] = num_prompt_tokens - self.token_ids_cpu[ - req_index, :num_prompt_tokens] = request.prompt_token_ids start_idx = num_prompt_tokens end_idx = start_idx + len(request.output_token_ids) + if request.prompt_token_ids is not None: + self.token_ids_cpu[ + req_index, :num_prompt_tokens] = request.prompt_token_ids + self.is_token_ids[req_index, :num_prompt_tokens] = True + else: + self.is_token_ids[req_index, :num_prompt_tokens] = False + if request.prompt_embeds is not None: + self.req_prompt_embeds[req_index] = request.prompt_embeds self.token_ids_cpu[req_index, start_idx:end_idx] = request.output_token_ids - # Number of token ids in token_ids_cpu. + self.is_token_ids[req_index, start_idx:end_idx] = True + # Number of token ids in prompt (token_ids_cpu or prompt_embeds). # NOTE(woosuk): This may include spec decode tokens. self.num_tokens[req_index] = request.num_tokens # Number of tokens without spec decode tokens. @@ -503,6 +525,20 @@ def swap_states(self, i1: int, i2: int) -> None: self.token_ids_cpu[i1, ...] = self.token_ids_cpu[i2, ...] self.token_ids_cpu[i2, ...] = tmp + self.is_token_ids[[i1, i2], ...] = self.is_token_ids[[i2, i1], ...] + + # Swap prompt embeddings if they exist + embeds_i1 = self.req_prompt_embeds.get(i1) + embeds_i2 = self.req_prompt_embeds.get(i2) + if embeds_i1 is not None: + self.req_prompt_embeds[i2] = embeds_i1 + else: + self.req_prompt_embeds.pop(i2, None) + if embeds_i2 is not None: + self.req_prompt_embeds[i1] = embeds_i2 + else: + self.req_prompt_embeds.pop(i1, None) + self.block_table.swap_row(i1, i2) self.request_lora_mapping[i1], self.request_lora_mapping[i2] = \ @@ -592,6 +628,11 @@ def condense(self) -> None: num_tokens = self.num_tokens[last_req_index] self.token_ids_cpu[empty_index, :num_tokens] = self.token_ids_cpu[ last_req_index, :num_tokens] + self.is_token_ids[empty_index, :num_tokens] = self.is_token_ids[ + last_req_index, :num_tokens] + if last_req_index in self.req_prompt_embeds: + self.req_prompt_embeds[ + empty_index] = self.req_prompt_embeds.pop(last_req_index) self.num_tokens[empty_index] = num_tokens self.num_tokens_no_spec[empty_index] = self.num_tokens_no_spec[ last_req_index] diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index 053e8f0537ed..3ee2160a42ff 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -56,7 +56,9 @@ from vllm.tasks import GenerationTask, PoolingTask, SupportedTask from vllm.utils import (STR_DTYPE_TO_TORCH_DTYPE, DeviceMemoryProfiler, GiB_bytes, check_use_alibi, get_dtype_size, - is_pin_memory_available, round_up, supports_dynamo) + is_pin_memory_available, + length_from_prompt_token_ids_or_embeds, round_up, + supports_dynamo) from vllm.v1.attention.backends.flash_attn import AttentionMetadata from vllm.v1.attention.backends.gdn_attn import GDNAttentionMetadataBuilder from vllm.v1.attention.backends.utils import ( @@ -197,6 +199,7 @@ def __init__( cache_config.cache_dtype] self.is_pooling_model = (model_config.runner_type == 'pooling') + self.enable_prompt_embeds = model_config.enable_prompt_embeds self.is_multimodal_raw_input_only_model = ( model_config.is_multimodal_raw_input_only_model) @@ -342,6 +345,8 @@ def __init__( self.hidden_size, dtype=self.dtype, numpy=False) + self.is_token_ids = self._make_buffer(self.max_num_tokens, + dtype=torch.bool) self.discard_request_indices = self._make_buffer(self.max_num_reqs, dtype=torch.int64) self.num_discarded_requests = 0 @@ -574,6 +579,7 @@ def _update_states(self, scheduler_output: "SchedulerOutput") -> None: req_state = CachedRequestState( req_id=req_id, prompt_token_ids=new_req_data.prompt_token_ids, + prompt_embeds=new_req_data.prompt_embeds, mm_features=new_req_data.mm_features, sampling_params=sampling_params, pooling_params=pooling_params, @@ -819,6 +825,8 @@ def _prepare_input_ids(self, total_num_scheduled_tokens: int, if self.input_batch.prev_sampled_token_ids is None: # Normal scheduling case self.input_ids.copy_to_gpu(total_num_scheduled_tokens) + self.inputs_embeds.copy_to_gpu(total_num_scheduled_tokens) + self.is_token_ids.copy_to_gpu(total_num_scheduled_tokens) return # Async scheduling case, where some decode requests from the previous @@ -844,6 +852,8 @@ def _prepare_input_ids(self, total_num_scheduled_tokens: int, # If not all requests are decodes from the last iteration, # We need to copy the input_ids_cpu to the GPU first. self.input_ids.copy_to_gpu(total_num_scheduled_tokens) + self.inputs_embeds.copy_to_gpu(total_num_scheduled_tokens) + self.is_token_ids.copy_to_gpu(total_num_scheduled_tokens) if num_commmon_tokens == 0: # No requests in common with the previous iteration # So input_ids_cpu will have all the input ids. @@ -857,6 +867,7 @@ def _prepare_input_ids(self, total_num_scheduled_tokens: int, self.input_batch.prev_sampled_token_ids[:num_commmon_tokens, 0], non_blocking=True) + self.is_token_ids.gpu[:num_commmon_tokens] = True return # Upload the index tensors asynchronously # so the scatter can be non-blocking. @@ -947,14 +958,60 @@ def _prepare_inputs( # where M is the max_model_len. token_indices = (positions_np + req_indices * self.input_batch.token_ids_cpu.shape[1]) + token_indices_tensor = torch.from_numpy(token_indices) # NOTE(woosuk): We use torch.index_select instead of np.take here # because torch.index_select is much faster than np.take for large # tensors. torch.index_select(self.input_batch.token_ids_cpu_tensor.flatten(), 0, - torch.from_numpy(token_indices), + token_indices_tensor, out=self.input_ids.cpu[:total_num_scheduled_tokens]) + is_token_ids = self.input_batch.is_token_ids.flatten() + torch.index_select( + is_token_ids, + 0, + token_indices_tensor, + out=self.is_token_ids.cpu[:total_num_scheduled_tokens]) + + # Because we did not pre-allocate a massive prompt_embeds CPU tensor on + # the InputBatch, we need to fill in the prompt embeds into the expected + # spots in the GpuModelRunner's pre-allocated prompt_embeds tensor. + if self.input_batch.req_prompt_embeds: + output_idx = 0 + for req_idx in range(num_reqs): + num_sched = num_scheduled_tokens[req_idx] + + # Skip if this request doesn't have embeddings + if req_idx not in self.input_batch.req_prompt_embeds: + output_idx += num_sched + continue + + # Skip if no tokens scheduled + if num_sched <= 0: + output_idx += num_sched + continue + + req_embeds = self.input_batch.req_prompt_embeds[req_idx] + start_pos = self.input_batch.num_computed_tokens_cpu[req_idx] + + # Skip if trying to read beyond available embeddings + if start_pos >= req_embeds.shape[0]: + output_idx += num_sched + continue + + # Copy available embeddings + end_pos = start_pos + num_sched + actual_end = min(end_pos, req_embeds.shape[0]) + actual_num_sched = actual_end - start_pos + + if actual_num_sched > 0: + self.inputs_embeds.cpu[output_idx:output_idx + + actual_num_sched].copy_( + req_embeds[start_pos:actual_end] + ) + + output_idx += num_sched self.input_batch.block_table.compute_slot_mapping( req_indices, positions_np) @@ -1279,7 +1336,8 @@ def _calc_mrope_positions(self, scheduler_output: "SchedulerOutput"): self.input_batch.num_computed_tokens_cpu[index] num_scheduled_tokens = \ scheduler_output.num_scheduled_tokens[req_id] - num_prompt_tokens = len(req.prompt_token_ids) + num_prompt_tokens = length_from_prompt_token_ids_or_embeds( + req.prompt_token_ids, req.prompt_embeds) if num_computed_tokens + num_scheduled_tokens > num_prompt_tokens: prompt_part_len = max(0, @@ -1845,6 +1903,32 @@ def _preprocess( **self._init_model_kwargs(num_scheduled_tokens), **self._extract_mm_kwargs(scheduler_output), } + elif (self.enable_prompt_embeds and get_pp_group().is_first_rank): + # Get the input embeddings for the tokens that are not input embeds, + # then put them into the appropriate positions. + # TODO(qthequartermasterman): Since even when prompt embeds are + # enabled, (a) not all requests will use prompt embeds, and (b) + # after the initial prompt is processed, the rest of the generated + # tokens will be token ids, it is not desirable to have the + # embedding layer outside of the CUDA graph all the time. The v0 + # engine avoids this by "double compiling" the CUDA graph, once + # with input_ids and again with inputs_embeds, for all num_tokens. + # If a batch only has token ids, then including the embedding layer + # in the CUDA graph will be more performant (like in the else case + # below). + token_ids_idx = self.is_token_ids.gpu[:num_scheduled_tokens] \ + .nonzero(as_tuple=False) \ + .squeeze(1) + # Some tokens ids may need to become embeds + if token_ids_idx.numel() > 0: + token_ids = self.input_ids.gpu[token_ids_idx] + tokens_to_embeds = self.model.get_input_embeddings( + input_ids=token_ids) + self.inputs_embeds.gpu[token_ids_idx] = tokens_to_embeds + + inputs_embeds = self.inputs_embeds.gpu[:num_input_tokens] + model_kwargs = self._init_model_kwargs(num_input_tokens) + input_ids = None else: # For text-only models, we use token ids as input. # While it is possible to use embeddings as input just like the @@ -2023,6 +2107,7 @@ def _bookkeeping_sync( self.input_batch.token_ids_cpu[req_idx, start_idx:end_idx] = sampled_ids + self.input_batch.is_token_ids[req_idx, start_idx:end_idx] = True self.input_batch.num_tokens_no_spec[req_idx] = end_idx self.input_batch.num_tokens[req_idx] = end_idx @@ -2570,6 +2655,10 @@ def _get_prompt_logprobs_dict( # Get metadata for this request. request = self.requests[req_id] + if request.prompt_token_ids is None: + # Prompt logprobs is incompatible with prompt embeddings + continue + num_prompt_tokens = len(request.prompt_token_ids) prompt_token_ids = torch.tensor(request.prompt_token_ids).to( self.device, non_blocking=True) @@ -2922,6 +3011,10 @@ def _dummy_run( **model_kwargs, **self._dummy_mm_kwargs(num_reqs), } + elif self.enable_prompt_embeds: + input_ids = None + inputs_embeds = self.inputs_embeds.gpu[:num_tokens] + model_kwargs = self._init_model_kwargs(num_tokens) else: input_ids = self.input_ids.gpu[:num_tokens] inputs_embeds = None diff --git a/vllm/v1/worker/tpu_input_batch.py b/vllm/v1/worker/tpu_input_batch.py index dfa54d0ad83b..4cd0ac352de0 100644 --- a/vllm/v1/worker/tpu_input_batch.py +++ b/vllm/v1/worker/tpu_input_batch.py @@ -9,7 +9,7 @@ from vllm.lora.request import LoRARequest from vllm.sampling_params import SamplingType -from vllm.utils import swap_dict_values +from vllm.utils import length_from_prompt_token_ids_or_embeds, swap_dict_values from vllm.v1.outputs import LogprobsTensors from vllm.v1.worker.block_table import MultiGroupBlockTable from vllm.v1.worker.gpu_input_batch import CachedRequestState @@ -213,7 +213,9 @@ def add_request( self.req_id_to_index[req_id] = req_index # Copy the prompt token ids and output token ids. - num_prompt_tokens = len(request.prompt_token_ids) + num_prompt_tokens = length_from_prompt_token_ids_or_embeds( + request.prompt_token_ids, request.prompt_embeds) + # TODO: copy prompt_embeds self.num_prompt_tokens[req_index] = num_prompt_tokens self.token_ids_cpu[ req_index, :num_prompt_tokens] = request.prompt_token_ids diff --git a/vllm/v1/worker/tpu_model_runner.py b/vllm/v1/worker/tpu_model_runner.py index 43f12912707f..01a8e5c3f0db 100644 --- a/vllm/v1/worker/tpu_model_runner.py +++ b/vllm/v1/worker/tpu_model_runner.py @@ -387,6 +387,7 @@ def _update_states(self, scheduler_output: "SchedulerOutput") -> bool: self.requests[req_id] = CachedRequestState( req_id=req_id, prompt_token_ids=new_req_data.prompt_token_ids, + prompt_embeds=new_req_data.prompt_embeds, mm_features=new_req_data.mm_features, sampling_params=sampling_params, pooling_params=None, From 9d1c50a5ac8726f4af0d4a4e85ad4d26a674ad26 Mon Sep 17 00:00:00 2001 From: Or Ozeri Date: Fri, 19 Sep 2025 03:20:51 +0300 Subject: [PATCH 131/518] [KV offload][2/N] Introduce LRU-based CPU offloading management (#20075) Signed-off-by: Or Ozeri --- tests/v1/kv_offload/test_cpu.py | 175 +++++++++++++++++++++++++++++ vllm/v1/kv_offload/backend.py | 96 ++++++++++++++++ vllm/v1/kv_offload/backends/cpu.py | 61 ++++++++++ vllm/v1/kv_offload/lru_manager.py | 132 ++++++++++++++++++++++ 4 files changed, 464 insertions(+) create mode 100644 tests/v1/kv_offload/test_cpu.py create mode 100644 vllm/v1/kv_offload/backend.py create mode 100644 vllm/v1/kv_offload/backends/cpu.py create mode 100644 vllm/v1/kv_offload/lru_manager.py diff --git a/tests/v1/kv_offload/test_cpu.py b/tests/v1/kv_offload/test_cpu.py new file mode 100644 index 000000000000..cdee7811d85b --- /dev/null +++ b/tests/v1/kv_offload/test_cpu.py @@ -0,0 +1,175 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project +from collections.abc import Iterable +from dataclasses import dataclass +from typing import Optional + +import numpy as np + +from vllm.v1.core.kv_cache_utils import BlockHash +from vllm.v1.kv_offload.abstract import (LoadStoreSpec, OffloadingEvent, + PrepareStoreOutput) +from vllm.v1.kv_offload.backends.cpu import CPUBackend +from vllm.v1.kv_offload.lru_manager import LRUOffloadingManager +from vllm.v1.kv_offload.mediums import CPULoadStoreSpec + + +@dataclass +class ExpectedPrepareStoreOutput: + block_hashes_to_store: list[int] + store_block_ids: list[int] + block_hashes_evicted: list[int] + + +def to_hashes(int_hashes: list[int]) -> list[BlockHash]: + return [BlockHash(str(i).encode()) for i in int_hashes] + + +def verify_store_output( + prepare_store_output: Optional[PrepareStoreOutput], + expected_prepare_store_output: ExpectedPrepareStoreOutput): + assert prepare_store_output is not None + assert (prepare_store_output.block_hashes_to_store == to_hashes( + expected_prepare_store_output.block_hashes_to_store)) + assert (prepare_store_output.block_hashes_evicted == to_hashes( + expected_prepare_store_output.block_hashes_evicted)) + store_spec = prepare_store_output.store_spec + assert isinstance(store_spec, CPULoadStoreSpec) + expected_array = np.array(expected_prepare_store_output.store_block_ids, + dtype=np.int64) + assert np.array_equal(expected_array, store_spec.block_ids) + + +def verify_load_output(prepare_load_output: LoadStoreSpec, + expected_prepare_load_output: list[int]): + assert isinstance(prepare_load_output, CPULoadStoreSpec) + expected_array = np.array(expected_prepare_load_output, dtype=np.int64) + assert np.array_equal(expected_array, prepare_load_output.block_ids) + + +def verify_events(events: Iterable[OffloadingEvent], + block_size: int, + expected_stores: tuple[set[int], ...] = (), + expected_evictions: tuple[set[int], ...] = ()): + stores: list[set[BlockHash]] = [] + evictions: list[set[BlockHash]] = [] + for event in events: + assert event.medium == CPULoadStoreSpec.medium() + assert event.block_size == block_size + if event.removed: + evictions.append(set(event.block_hashes)) + else: + stores.append(set(event.block_hashes)) + + def to_hash_sets( + int_sets: tuple[set[int], ...]) -> tuple[set[BlockHash], ...]: + return tuple([set(to_hashes(list(int_set))) for int_set in int_sets]) + + assert tuple(evictions) == to_hash_sets(expected_evictions) + assert tuple(stores) == to_hash_sets(expected_stores) + + +def test_cpu_manager(): + """ + Tests LRUOffloadingManager with a CPUBackend. + """ + # initialize a CPU backend with a capacity of 4 blocks + block_size = 256 + cpu_backend = CPUBackend(block_size=block_size, num_blocks=4) + cpu_manager = LRUOffloadingManager(cpu_backend, enable_events=True) + + # prepare store [1, 2] + prepare_store_output = cpu_manager.prepare_store(to_hashes([1, 2])) + verify_store_output( + prepare_store_output, + ExpectedPrepareStoreOutput( + block_hashes_to_store=[1, 2], + store_block_ids=[0, 1], + block_hashes_evicted=[], + )) + + # lookup [1, 2] -> not ready + assert cpu_manager.lookup(to_hashes([1, 2])) == 0 + + # no events so far + assert list(cpu_manager.take_events()) == [] + + # complete store [1, 2] + cpu_manager.complete_store(to_hashes([1, 2])) + verify_events(cpu_manager.take_events(), + block_size=block_size, + expected_stores=({1, 2}, )) + + # lookup [1, 2] + assert cpu_manager.lookup(to_hashes([1])) == 1 + assert cpu_manager.lookup(to_hashes([1, 2])) == 2 + assert cpu_manager.lookup(to_hashes([1, 2, 3])) == 2 + + # prepare store [2, 3, 4, 5] -> evicts [1] + prepare_store_output = cpu_manager.prepare_store(to_hashes([2, 3, 4, 5])) + verify_store_output( + prepare_store_output, + ExpectedPrepareStoreOutput( + block_hashes_to_store=[3, 4, 5], + store_block_ids=[2, 3, 0], + block_hashes_evicted=[1], + )) + + # verify eviction event + verify_events(cpu_manager.take_events(), + block_size=block_size, + expected_evictions=({1}, )) + + # prepare store with no space + assert cpu_manager.prepare_store(to_hashes([1, 6])) is None + + # complete store [2, 3, 4, 5] + cpu_manager.complete_store(to_hashes([2, 3, 4, 5])) + + # prepare load [2, 3] + prepare_load_output = cpu_manager.prepare_load(to_hashes([2, 3])) + verify_load_output(prepare_load_output, [1, 2]) + + # prepare store with no space ([2, 3] is being loaded) + assert cpu_manager.prepare_store(to_hashes([6, 7, 8])) is None + + # complete load [2, 3] + cpu_manager.complete_load(to_hashes([2, 3])) + + # prepare store [6, 7, 8] -> evicts [2, 3, 4] (oldest) + prepare_store_output = cpu_manager.prepare_store(to_hashes([6, 7, 8])) + verify_store_output( + prepare_store_output, + ExpectedPrepareStoreOutput( + block_hashes_to_store=[6, 7, 8], + store_block_ids=[3, 2, 1], + block_hashes_evicted=[2, 3, 4], + )) + + # complete store [6, 7, 8] + cpu_manager.complete_store(to_hashes([6, 7, 8])) + + # touch [5, 6, 7] (move to end of LRU order) + cpu_manager.touch(to_hashes([5, 6, 7])) + + # prepare store [7, 9] -> evicts [8] (oldest following previous touch) + prepare_store_output = cpu_manager.prepare_store(to_hashes([9])) + verify_store_output( + prepare_store_output, + ExpectedPrepareStoreOutput( + block_hashes_to_store=[9], + store_block_ids=[1], + block_hashes_evicted=[8], + )) + + # complete store [7, 9] with failure + cpu_manager.complete_store(to_hashes([7, 9]), success=False) + + # assert [7] is still stored, but [9] is not + assert cpu_manager.lookup(to_hashes([7])) == 1 + assert cpu_manager.lookup(to_hashes([9])) == 0 + + verify_events(cpu_manager.take_events(), + block_size=block_size, + expected_stores=({3, 4, 5}, {6, 7, 8}), + expected_evictions=({2, 3, 4}, {8})) diff --git a/vllm/v1/kv_offload/backend.py b/vllm/v1/kv_offload/backend.py new file mode 100644 index 000000000000..87a74200116b --- /dev/null +++ b/vllm/v1/kv_offload/backend.py @@ -0,0 +1,96 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project +import ctypes +from abc import ABC, abstractmethod +from collections.abc import Iterable + +from vllm.v1.core.kv_cache_utils import BlockHash +from vllm.v1.kv_offload.abstract import LoadStoreSpec + + +class BlockStatus(ctypes.Structure): + """ + Offloading status for a single block of KV data. + Holds the following information: + + ref_cnt - the current number of transfers using this block as a source. + A value of -1 indicates the block is not yet ready to be read. + load_store_spec - backend-specific information on how to actually + read/write the block. + """ + _fields_ = [("ref_cnt", ctypes.c_int32)] + + def __init__(self): + super().__init__() + # initialize block as "not ready" (ref_cnt = -1) + self.ref_cnt = -1 + + @property + def is_ready(self) -> bool: + """ + Returns whether the block is ready to be read. + """ + return self.ref_cnt >= 0 + + +class Backend(ABC): + """ + An abstract class for allocating and returning specs for writing + KV blocks to some backend. + """ + + def __init__(self, block_size: int, medium: str): + self.block_size = block_size + self.medium = medium + + @abstractmethod + def get_num_free_blocks(self): + """ + Returns the number of current number of blocks that can be allocated. + """ + pass + + @abstractmethod + def allocate_blocks(self, + block_hashes: list[BlockHash]) -> list[BlockStatus]: + """ + Allocate space for writing blocks. + This method assumes there is enough space for allocation. + It is unsafe to use without checking get_num_free_blocks beforehand. + + Args: + block_hashes: the hashes identifying the blocks to be written. + + Returns: + A list of BlockStatus for the allocated blocks. + The ref_cnt of each returned item will be -1, meaning the block + is not yet ready to be read. + """ + pass + + @abstractmethod + def free(self, block: BlockStatus): + """ + Free a previously allocated block. + You should only call this function with blocks returned by + allocate_blocks, and only once per each block. + + Args: + block: The block to be freed. + """ + pass + + def get_load_store_spec(self, block_hashes: Iterable[BlockHash], + blocks: Iterable[BlockStatus]) -> LoadStoreSpec: + """ + Get backend-specific information on how to read/write blocks. + + Args: + block_hashes: the list of block hashes identifying the blocks. + blocks: the list of blocks. + + Returns: + A LoadStoreSpec that can be used by a worker + to read/write the blocks. + """ + raise NotImplementedError diff --git a/vllm/v1/kv_offload/backends/cpu.py b/vllm/v1/kv_offload/backends/cpu.py new file mode 100644 index 000000000000..eb1123d1d83a --- /dev/null +++ b/vllm/v1/kv_offload/backends/cpu.py @@ -0,0 +1,61 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project +import ctypes +from collections.abc import Iterable + +from vllm.v1.core.kv_cache_utils import BlockHash +from vllm.v1.kv_offload.abstract import LoadStoreSpec +from vllm.v1.kv_offload.backend import Backend, BlockStatus +from vllm.v1.kv_offload.mediums import CPULoadStoreSpec + + +class CPUBlockStatus(BlockStatus): + _fields_ = BlockStatus._fields_ + [("block_id", ctypes.c_int64) + ] # type: ignore + + def __init__(self, block_id: int): + super().__init__() + self.block_id = block_id + + +class CPUBackend(Backend): + + def __init__(self, block_size: int, num_blocks: int): + super().__init__(block_size=block_size, + medium=CPULoadStoreSpec.medium()) + + self.num_blocks: int = num_blocks + self.num_allocated_blocks: int = 0 + self.allocated_blocks_free_list: list[int] = [] + + def get_num_free_blocks(self): + return (len(self.allocated_blocks_free_list) + self.num_blocks - + self.num_allocated_blocks) + + def allocate_blocks(self, + block_hashes: list[BlockHash]) -> list[BlockStatus]: + num_fresh_blocks = min(len(block_hashes), + self.num_blocks - self.num_allocated_blocks) + num_reused_blocks = len(block_hashes) - num_fresh_blocks + assert len(self.allocated_blocks_free_list) >= num_reused_blocks + + # allocate fresh blocks + blocks: list[BlockStatus] = [] + for _ in range(num_fresh_blocks): + blocks.append(CPUBlockStatus(self.num_allocated_blocks)) + self.num_allocated_blocks += 1 + + # allocate reused blocks + for _ in range(num_reused_blocks): + block_id = self.allocated_blocks_free_list.pop() + blocks.append(CPUBlockStatus(block_id)) + + return blocks + + def free(self, block: BlockStatus): + assert isinstance(block, CPUBlockStatus) + self.allocated_blocks_free_list.append(block.block_id) + + def get_load_store_spec(self, block_hashes: Iterable[BlockHash], + blocks: Iterable[BlockStatus]) -> LoadStoreSpec: + return CPULoadStoreSpec([block.block_id for block in blocks]) diff --git a/vllm/v1/kv_offload/lru_manager.py b/vllm/v1/kv_offload/lru_manager.py new file mode 100644 index 000000000000..18d3b1d637b3 --- /dev/null +++ b/vllm/v1/kv_offload/lru_manager.py @@ -0,0 +1,132 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project +from collections import OrderedDict +from collections.abc import Iterable +from typing import Optional + +from vllm.v1.core.kv_cache_utils import BlockHash +from vllm.v1.kv_offload.abstract import (LoadStoreSpec, OffloadingEvent, + OffloadingManager, PrepareStoreOutput) +from vllm.v1.kv_offload.backend import Backend, BlockStatus + + +class LRUOffloadingManager(OffloadingManager): + """ + An OffloadingManager with a pluggable backend, which evicts blocks by LRU. + """ + + def __init__(self, backend: Backend, enable_events: bool = False): + self.backend: Backend = backend + # block_hash -> BlockStatus + self.blocks: OrderedDict[BlockHash, BlockStatus] = OrderedDict() + self.events: Optional[list[OffloadingEvent]] = \ + [] if enable_events else None + + def lookup(self, block_hashes: Iterable[BlockHash]) -> int: + hit_count = 0 + for block_hash in block_hashes: + block = self.blocks.get(block_hash) + if block is None or not block.is_ready: + break + hit_count += 1 + return hit_count + + def prepare_load(self, block_hashes: Iterable[BlockHash]) -> LoadStoreSpec: + blocks = [] + for block_hash in block_hashes: + block = self.blocks[block_hash] + assert block.is_ready + block.ref_cnt += 1 + blocks.append(block) + + return self.backend.get_load_store_spec(block_hashes, blocks) + + def touch(self, block_hashes: Iterable[BlockHash]): + for block_hash in reversed(list(block_hashes)): + if self.blocks.get(block_hash): + self.blocks.move_to_end(block_hash) + + def complete_load(self, block_hashes: Iterable[BlockHash]): + for block_hash in block_hashes: + block = self.blocks[block_hash] + assert block.ref_cnt > 0 + block.ref_cnt -= 1 + + def prepare_store( + self, + block_hashes: Iterable[BlockHash]) -> Optional[PrepareStoreOutput]: + # filter out blocks that are already stored + block_hashes_to_store = [ + block_hash for block_hash in block_hashes + if block_hash not in self.blocks + ] + + num_blocks_to_evict = (len(block_hashes_to_store) - + self.backend.get_num_free_blocks()) + + # build list of blocks to evict + to_evict = [] + if num_blocks_to_evict > 0: + for block_hash, block in self.blocks.items(): + if block.ref_cnt == 0: + to_evict.append(block_hash) + num_blocks_to_evict -= 1 + if num_blocks_to_evict == 0: + break + else: + # we could not evict enough blocks + return None + + # evict blocks + for block_hash in to_evict: + self.backend.free(self.blocks.pop(block_hash)) + + if to_evict and self.events is not None: + self.events.append( + OffloadingEvent(block_hashes=to_evict, + block_size=self.backend.block_size, + medium=self.backend.medium, + removed=True)) + + blocks = self.backend.allocate_blocks(block_hashes_to_store) + assert len(blocks) == len(block_hashes_to_store) + + for block_hash, block in zip(block_hashes_to_store, blocks): + self.blocks[block_hash] = block + + # build store specs for allocated blocks + store_spec = self.backend.get_load_store_spec(block_hashes_to_store, + blocks) + + return PrepareStoreOutput(block_hashes_to_store=block_hashes_to_store, + store_spec=store_spec, + block_hashes_evicted=to_evict) + + def complete_store(self, + block_hashes: Iterable[BlockHash], + success: bool = True): + stored_block_hashes: list[BlockHash] = [] + if success: + for block_hash in block_hashes: + block = self.blocks[block_hash] + if not block.is_ready: + block.ref_cnt = 0 + stored_block_hashes.append(block_hash) + else: + for block_hash in block_hashes: + block = self.blocks[block_hash] + if not block.is_ready: + self.backend.free(block) + del self.blocks[block_hash] + + if stored_block_hashes and self.events is not None: + self.events.append( + OffloadingEvent(block_hashes=stored_block_hashes, + block_size=self.backend.block_size, + medium=self.backend.medium, + removed=False)) + + def take_events(self) -> Iterable[OffloadingEvent]: + if self.events is not None: + yield from self.events + self.events.clear() From 6d8246aaffff3ebec84767e373212a7b8da328e2 Mon Sep 17 00:00:00 2001 From: Andrew Xia Date: Thu, 18 Sep 2025 19:11:59 -0700 Subject: [PATCH 132/518] [gpt-oss] Add ResponseReasoningPartAddedEvent, ResponseReasoningPartDoneEvent for streaming (#24938) Signed-off-by: Andrew Xia --- .../openai/test_response_api_with_harmony.py | 56 +++++++++++- vllm/entrypoints/openai/protocol.py | 88 ++++++++++++++----- vllm/entrypoints/openai/serving_responses.py | 32 ++++--- 3 files changed, 143 insertions(+), 33 deletions(-) diff --git a/tests/entrypoints/openai/test_response_api_with_harmony.py b/tests/entrypoints/openai/test_response_api_with_harmony.py index eceaff672112..8d974d56b445 100644 --- a/tests/entrypoints/openai/test_response_api_with_harmony.py +++ b/tests/entrypoints/openai/test_response_api_with_harmony.py @@ -287,6 +287,57 @@ async def test_stateful_multi_turn(client: OpenAI, model_name: str): assert response3.status == "completed" +@pytest.mark.asyncio +@pytest.mark.parametrize("model_name", [MODEL_NAME]) +async def test_streaming_types(client: OpenAI, model_name: str): + prompts = [ + "tell me a story about a cat in 20 words", + ] + + # this links the "done" type with the "start" type + # so every "done" type should have a corresponding "start" type + # and every open block should be closed by the end of the stream + pairs_of_event_types = { + "response.completed": "response.created", + "response.output_item.done": "response.output_item.added", + "response.content_part.done": "response.content_part.added", + "response.output_text.done": "response.output_text.delta", + "response.web_search_call.done": "response.web_search_call.added", + "response.reasoning_text.done": "response.reasoning_text.delta", + "response.reasoning_part.done": "response.reasoning_part.added", + } + + for prompt in prompts: + response = await client.responses.create( + model=model_name, + input=prompt, + reasoning={"effort": "low"}, + tools=[], + stream=True, + background=False, + ) + + stack_of_event_types = [] + async for event in response: + if event.type == 'response.created': + stack_of_event_types.append(event.type) + elif event.type == 'response.completed': + assert stack_of_event_types[-1] == pairs_of_event_types[ + event.type] + stack_of_event_types.pop() + if event.type.endswith("added"): + stack_of_event_types.append(event.type) + elif event.type.endswith("delta"): + if stack_of_event_types[-1] == event.type: + continue + stack_of_event_types.append(event.type) + elif event.type.endswith("done"): + assert stack_of_event_types[-1] == pairs_of_event_types[ + event.type] + stack_of_event_types.pop() + assert len(stack_of_event_types) == 0 + + @pytest.mark.asyncio @pytest.mark.parametrize("model_name", [MODEL_NAME]) @pytest.mark.parametrize("background", [True, False]) @@ -343,7 +394,10 @@ async def test_streaming(client: OpenAI, model_name: str, background: bool): assert event.item_id == current_item_id # verify content_index_id is correct - if event.type == "response.content_part.added": + if event.type in [ + "response.content_part.added", + "response.reasoning_part.added" + ]: assert event.content_index != current_content_index current_content_index = event.content_index elif event.type in [ diff --git a/vllm/entrypoints/openai/protocol.py b/vllm/entrypoints/openai/protocol.py index 6b54511a66f3..05d5d6d964dd 100644 --- a/vllm/entrypoints/openai/protocol.py +++ b/vllm/entrypoints/openai/protocol.py @@ -31,6 +31,8 @@ ResponseReasoningTextDeltaEvent, ResponseReasoningTextDoneEvent, ResponseStatus, ResponseWebSearchCallCompletedEvent, ResponseWebSearchCallInProgressEvent, ResponseWebSearchCallSearchingEvent) +from openai.types.responses.response_reasoning_item import ( + Content as ResponseReasoningTextContent) # Backward compatibility for OpenAI client versions try: # For older openai versions (< 1.100.0) @@ -260,26 +262,6 @@ def get_logits_processors(processors: Optional[LogitsProcessors], ResponseReasoningItem, ResponseFunctionToolCall] -StreamingResponsesResponse: TypeAlias = Union[ - ResponseCreatedEvent, - ResponseInProgressEvent, - ResponseCompletedEvent, - ResponseOutputItemAddedEvent, - ResponseOutputItemDoneEvent, - ResponseContentPartAddedEvent, - ResponseContentPartDoneEvent, - ResponseReasoningTextDeltaEvent, - ResponseReasoningTextDoneEvent, - ResponseCodeInterpreterCallInProgressEvent, - ResponseCodeInterpreterCallCodeDeltaEvent, - ResponseWebSearchCallInProgressEvent, - ResponseWebSearchCallSearchingEvent, - ResponseWebSearchCallCompletedEvent, - ResponseCodeInterpreterCallCodeDoneEvent, - ResponseCodeInterpreterCallInterpretingEvent, - ResponseCodeInterpreterCallCompletedEvent, -] - class ResponsesRequest(OpenAIBaseModel): # Ordered by official OpenAI API documentation @@ -1916,6 +1898,72 @@ def from_request( ) +# TODO: this code can be removed once +# https://github.com/openai/openai-python/issues/2634 has been resolved +class ResponseReasoningPartDoneEvent(OpenAIBaseModel): + content_index: int + """The index of the content part that is done.""" + + item_id: str + """The ID of the output item that the content part was added to.""" + + output_index: int + """The index of the output item that the content part was added to.""" + + part: ResponseReasoningTextContent + """The content part that is done.""" + + sequence_number: int + """The sequence number of this event.""" + + type: Literal["response.reasoning_part.done"] + """The type of the event. Always `response.reasoning_part.done`.""" + + +# TODO: this code can be removed once +# https://github.com/openai/openai-python/issues/2634 has been resolved +class ResponseReasoningPartAddedEvent(OpenAIBaseModel): + content_index: int + """The index of the content part that is done.""" + + item_id: str + """The ID of the output item that the content part was added to.""" + + output_index: int + """The index of the output item that the content part was added to.""" + + part: ResponseReasoningTextContent + """The content part that is done.""" + + sequence_number: int + """The sequence number of this event.""" + + type: Literal["response.reasoning_part.added"] + """The type of the event. Always `response.reasoning_part.added`.""" + + +StreamingResponsesResponse: TypeAlias = Union[ + ResponseCreatedEvent, + ResponseInProgressEvent, + ResponseCompletedEvent, + ResponseOutputItemAddedEvent, + ResponseOutputItemDoneEvent, + ResponseContentPartAddedEvent, + ResponseContentPartDoneEvent, + ResponseReasoningTextDeltaEvent, + ResponseReasoningTextDoneEvent, + ResponseReasoningPartAddedEvent, + ResponseReasoningPartDoneEvent, + ResponseCodeInterpreterCallInProgressEvent, + ResponseCodeInterpreterCallCodeDeltaEvent, + ResponseWebSearchCallInProgressEvent, + ResponseWebSearchCallSearchingEvent, + ResponseWebSearchCallCompletedEvent, + ResponseCodeInterpreterCallCodeDoneEvent, + ResponseCodeInterpreterCallInterpretingEvent, + ResponseCodeInterpreterCallCompletedEvent, +] + BatchRequestInputBody = Union[ChatCompletionRequest, EmbeddingRequest, ScoreRequest, RerankRequest] diff --git a/vllm/entrypoints/openai/serving_responses.py b/vllm/entrypoints/openai/serving_responses.py index 469d74272b0e..4894623aeac2 100644 --- a/vllm/entrypoints/openai/serving_responses.py +++ b/vllm/entrypoints/openai/serving_responses.py @@ -58,6 +58,8 @@ InputTokensDetails, OutputTokensDetails, RequestResponseMetadata, + ResponseReasoningPartAddedEvent, + ResponseReasoningPartDoneEvent, ResponsesRequest, ResponsesResponse, ResponseUsage, StreamingResponsesResponse) @@ -1280,14 +1282,13 @@ async def _process_harmony_streaming_events( # Deal with tool call here pass elif previous_item.channel == "analysis": + content = ResponseReasoningTextContent( + text=previous_item.content[0].text, + type="reasoning_text", + ) reasoning_item = ResponseReasoningItem( type="reasoning", - content=[ - ResponseReasoningTextContent( - text=previous_item.content[0].text, - type="reasoning_text", - ), - ], + content=[content], status="completed", id=current_item_id, summary=[], @@ -1301,6 +1302,15 @@ async def _process_harmony_streaming_events( content_index=current_content_index, text=previous_item.content[0].text, )) + yield _increment_sequence_number_and_return( + ResponseReasoningPartDoneEvent( + type="response.reasoning_part.done", + sequence_number=-1, + item_id=current_item_id, + output_index=current_output_index, + content_index=current_content_index, + part=content, + )) yield _increment_sequence_number_and_return( ResponseOutputItemDoneEvent( type="response.output_item.done", @@ -1412,17 +1422,15 @@ async def _process_harmony_streaming_events( )) current_content_index += 1 yield _increment_sequence_number_and_return( - ResponseContentPartAddedEvent( - type="response.content_part.added", + ResponseReasoningPartAddedEvent( + type="response.reasoning_part.added", sequence_number=-1, output_index=current_output_index, item_id=current_item_id, content_index=current_content_index, - part=ResponseOutputText( - type="output_text", + part=ResponseReasoningTextContent( text="", - annotations=[], - logprobs=[], + type="reasoning_text", ), )) yield _increment_sequence_number_and_return( From 1a0a04dae94b7a768c0d59b4f687bcf5e12d3127 Mon Sep 17 00:00:00 2001 From: Chen Ding Date: Fri, 19 Sep 2025 11:31:16 +0800 Subject: [PATCH 133/518] [Perf] Optimize memory peak during EAGLE model loading. (#24585) Signed-off-by: Chen Ding --- vllm/model_executor/models/deepseek_eagle.py | 15 ++++++------- vllm/model_executor/models/llama4_eagle.py | 22 +++++++++----------- vllm/model_executor/models/llama_eagle.py | 15 ++++++------- 3 files changed, 26 insertions(+), 26 deletions(-) diff --git a/vllm/model_executor/models/deepseek_eagle.py b/vllm/model_executor/models/deepseek_eagle.py index b1d7f24c2f18..2770ddebc48a 100644 --- a/vllm/model_executor/models/deepseek_eagle.py +++ b/vllm/model_executor/models/deepseek_eagle.py @@ -229,14 +229,15 @@ def compute_logits( return logits def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]): + + def transform(inputs): + name, loaded_weight = inputs + if "lm_head" not in name: + name = "model." + name + return name, loaded_weight + loader = AutoWeightsLoader( self, skip_prefixes=None, ) - - model_weights = {} - for name, loaded_weight in weights: - if "lm_head" not in name: - name = "model." + name - model_weights[name] = loaded_weight - loader.load_weights(model_weights.items()) + loader.load_weights(map(transform, weights)) diff --git a/vllm/model_executor/models/llama4_eagle.py b/vllm/model_executor/models/llama4_eagle.py index ece490ff2f2a..a203af53205c 100644 --- a/vllm/model_executor/models/llama4_eagle.py +++ b/vllm/model_executor/models/llama4_eagle.py @@ -205,23 +205,21 @@ def forward( def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> None: + + def transform(inputs): + name, loaded_weight = inputs + name, weight = self.permute_qk_weight_for_rotary( + name, loaded_weight) + if "lm_head" not in name: + name = "model." + name + return name, weight + loader = AutoWeightsLoader( self, # lm_head is tied with target model (Llama4ForCausalLM) skip_prefixes=(["lm_head."]), ) - - model_weights = {} - weights = [ - self.permute_qk_weight_for_rotary(name, loaded_weight) - for name, loaded_weight in weights - ] - for name, loaded_weight in weights: - if "lm_head" not in name: - name = "model." + name - model_weights[name] = loaded_weight - - loader.load_weights(model_weights.items()) + loader.load_weights(map(transform, weights)) def get_input_embeddings( self, diff --git a/vllm/model_executor/models/llama_eagle.py b/vllm/model_executor/models/llama_eagle.py index a4933b77e3a5..dfae3c3ea543 100644 --- a/vllm/model_executor/models/llama_eagle.py +++ b/vllm/model_executor/models/llama_eagle.py @@ -158,14 +158,15 @@ def forward( return self.model(input_ids, positions, hidden_states) def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]): + + def transform(inputs): + name, loaded_weight = inputs + if "lm_head" not in name: + name = "model." + name + return name, loaded_weight + loader = AutoWeightsLoader( self, skip_prefixes=None, ) - - model_weights = {} - for name, loaded_weight in weights: - if "lm_head" not in name: - name = "model." + name - model_weights[name] = loaded_weight - loader.load_weights(model_weights.items()) + loader.load_weights(map(transform, weights)) From 31a8a2a7bccb29612bc58c9a69252bfb78f5abe4 Mon Sep 17 00:00:00 2001 From: Roger Wang Date: Thu, 18 Sep 2025 21:46:57 -0700 Subject: [PATCH 134/518] [Misc] Clean up MM profiling warnings (#25222) Signed-off-by: Roger Wang --- vllm/multimodal/profiling.py | 29 ----------------------------- 1 file changed, 29 deletions(-) diff --git a/vllm/multimodal/profiling.py b/vllm/multimodal/profiling.py index fbbc55d3524c..9b463e212bb4 100644 --- a/vllm/multimodal/profiling.py +++ b/vllm/multimodal/profiling.py @@ -234,19 +234,6 @@ def get_decoder_dummy_data( prompt_token_ids = mm_inputs["prompt_token_ids"] total_len = len(prompt_token_ids) - # V0 does not support chunked prefill. - if total_len > seq_len and not envs.VLLM_USE_V1: - # `max_num_batched_tokens` is defined by `SchedulerConfig` - logger.warning_once( - "The sequence length used for profiling (max_num_batched_tokens / max_num_seqs = %d) " # noqa: E501 - "is too short to hold the multi-modal embeddings in the worst case (%d tokens in total, out of which %s are reserved for multi-modal embeddings). " # noqa: E501 - "This may cause certain multi-modal inputs to fail during inference, even when the input text is short. " # noqa: E501 - "To avoid this, you should increase `max_model_len`, reduce `max_num_seqs`, and/or reduce `mm_counts`.", # noqa: E501 - seq_len, - total_len, - str(self._get_mm_num_tokens(mm_inputs)), - ) - if total_len < seq_len: prompt_token_ids.extend([0] * (seq_len - total_len)) @@ -270,22 +257,6 @@ def _get_mm_max_tokens( mm_counts=mm_counts, ) if max_tokens_per_item is not None: - if mm_counts is None: - total_mm_tokens = sum(max_tokens_per_item.values()) - else: - total_mm_tokens = sum(max_tokens_per_item[k] * mm_counts[k] - for k in max_tokens_per_item.keys() - & mm_counts.keys()) - if total_mm_tokens > seq_len: - logger.warning_once( - "The sequence length (%d) is smaller than the pre-defined" - " worst-case total number of multimodal tokens (%d). " - "This may cause certain multi-modal inputs to fail during " - "inference. To avoid this, you should increase " - "`max_model_len` or reduce `mm_counts`.", - seq_len, - total_mm_tokens, - ) return max_tokens_per_item mm_inputs = self._get_dummy_mm_inputs(seq_len, mm_counts) From 6c8a3c099bc12bb00fa3899753e41183f05fe9fc Mon Sep 17 00:00:00 2001 From: Michael Yao Date: Fri, 19 Sep 2025 13:10:44 +0800 Subject: [PATCH 135/518] [Docs] Fix griffe warnings in vllm/multimodal (#25216) Signed-off-by: windsonsea --- vllm/multimodal/inputs.py | 6 +++--- vllm/multimodal/utils.py | 4 +++- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/vllm/multimodal/inputs.py b/vllm/multimodal/inputs.py index 240e34e139cf..e00c10fb66ee 100644 --- a/vllm/multimodal/inputs.py +++ b/vllm/multimodal/inputs.py @@ -569,8 +569,8 @@ def flat_from_sizes(modality: str, Args: modality: The modality of the multi-modal item that uses this keyword argument. - slices: For each multi-modal item, the size of the slice that - is used to extract the data corresponding to it. + size_per_item: For each multi-modal item, the size of the slice + that is used to extract the data corresponding to it. dim: The dimension to slice, default to 0. Example: @@ -590,7 +590,7 @@ def flat_from_sizes(modality: str, ``` Given: - slices: [3, 4, 2] + size_per_item: [3, 4, 2] dim: 1 Input: diff --git a/vllm/multimodal/utils.py b/vllm/multimodal/utils.py index b308366fca28..f4e2ed72e2d7 100644 --- a/vllm/multimodal/utils.py +++ b/vllm/multimodal/utils.py @@ -395,7 +395,9 @@ def group_mm_kwargs_by_modality( modality together into the same `MultiModalKwargs` instance. Args: - mm_inputs: List of `MultiModalKwargsItem`. + mm_kwargs: List of `MultiModalKwargsItem`. + device: The device to place the grouped tensors on. + pin_memory: Whether to pin memory for faster host-to-device transfer. Yields: A tuple `(modality, num_items, grouped_kwargs)`. From a6149aa587d6582545b7878a2dffed3a2419605d Mon Sep 17 00:00:00 2001 From: "Chendi.Xue" Date: Fri, 19 Sep 2025 00:41:53 -0500 Subject: [PATCH 136/518] [OOT] Support sync_model_loading for OOT (#25126) Signed-off-by: Chendi Xue --- vllm/model_executor/parameter.py | 6 +++--- vllm/model_executor/utils.py | 17 +++-------------- vllm/platforms/interface.py | 23 +++++++++++++++++++++++ vllm/platforms/tpu.py | 4 ++++ 4 files changed, 33 insertions(+), 17 deletions(-) diff --git a/vllm/model_executor/parameter.py b/vllm/model_executor/parameter.py index 221712ba9a33..03e5e5809b67 100644 --- a/vllm/model_executor/parameter.py +++ b/vllm/model_executor/parameter.py @@ -12,7 +12,6 @@ from vllm.distributed import (get_tensor_model_parallel_rank, get_tensor_model_parallel_world_size) from vllm.logger import init_logger -from vllm.model_executor.utils import _make_synced_weight_loader __all__ = [ "BasevLLMParameter", "PackedvLLMParameter", "PerTensorScaleParameter", @@ -53,8 +52,9 @@ def __init__(self, data: torch.Tensor, weight_loader: Callable): # This sometimes causes OOM errors during model loading. To avoid this, # we sync the param tensor after its weight loader is called. from vllm.platforms import current_platform - if current_platform.is_tpu(): - weight_loader = _make_synced_weight_loader(weight_loader) + if current_platform.use_sync_weight_loader(): + weight_loader = current_platform.make_synced_weight_loader( + weight_loader) self._weight_loader = weight_loader self.tp_rank = get_tensor_model_parallel_rank() diff --git a/vllm/model_executor/utils.py b/vllm/model_executor/utils.py index 65436786f82a..543918418953 100644 --- a/vllm/model_executor/utils.py +++ b/vllm/model_executor/utils.py @@ -44,23 +44,12 @@ def set_weight_attrs( # TODO(woosuk): Remove this hack once we have a better solution. from vllm.platforms import current_platform - if current_platform.is_tpu() and key == "weight_loader": - value = _make_synced_weight_loader(value) + if current_platform.use_sync_weight_loader( + ) and key == "weight_loader": + value = current_platform.make_synced_weight_loader(value) setattr(weight, key, value) -def _make_synced_weight_loader(original_weight_loader): - - def _synced_weight_loader(param, *args, **kwargs): - out = original_weight_loader(param, *args, **kwargs) - # torch._sync doesn't support, is not needed for CPU tensors. - if param.device != torch.device("cpu"): - torch._sync(param) - return out - - return _synced_weight_loader - - def get_packed_modules_mapping(model: torch.nn.Module) -> dict[str, list[str]]: parent_map = getattr(model, "packed_modules_mapping", None) parent_map = copy.deepcopy(parent_map) if parent_map is not None else {} diff --git a/vllm/platforms/interface.py b/vllm/platforms/interface.py index 054d08c3a85b..53fc762dce54 100644 --- a/vllm/platforms/interface.py +++ b/vllm/platforms/interface.py @@ -594,6 +594,29 @@ def support_hybrid_kv_cache(cls) -> bool: """ return False + @classmethod + def use_sync_weight_loader(cls) -> bool: + """ + Returns if the current platform needs to sync weight loader. + """ + return False + + @classmethod + def make_synced_weight_loader(cls, original_weight_loader): + """ + Wrap the original weight loader to make it synced. + """ + if not cls.use_sync_weight_loader(): + return original_weight_loader + + def _synced_weight_loader(param, *args, **kwargs): + out = original_weight_loader(param, *args, **kwargs) + if param.device != torch.device("cpu"): + torch._sync(param) + return out + + return _synced_weight_loader + class UnspecifiedPlatform(Platform): _enum = PlatformEnum.UNSPECIFIED diff --git a/vllm/platforms/tpu.py b/vllm/platforms/tpu.py index 6a061956d814..4e4db116abca 100644 --- a/vllm/platforms/tpu.py +++ b/vllm/platforms/tpu.py @@ -226,6 +226,10 @@ def swap_out_blocks_to_host( torch.ops.xla.dynamo_set_buffer_donor_(src_cache, True) dst_cache[dst_block_indices] = src_cache[src_block_indices].cpu() + @classmethod + def use_sync_weight_loader(cls) -> bool: + return True + try: from tpu_commons.platforms import TpuPlatform as TpuCommonsPlatform From 486c5599e3ab7d721c94dd01e89c87742c01e1ac Mon Sep 17 00:00:00 2001 From: Russell Bryant Date: Fri, 19 Sep 2025 02:27:17 -0400 Subject: [PATCH 137/518] [Build] Update Xgrammar to 0.1.24 to get a CVE fix (#25188) Signed-off-by: Russell Bryant --- requirements/common.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements/common.txt b/requirements/common.txt index b8665104bd09..7973da080c37 100644 --- a/requirements/common.txt +++ b/requirements/common.txt @@ -24,7 +24,7 @@ outlines_core == 0.2.11 # required for outlines backend disk cache diskcache == 5.6.3 lark == 1.2.2 -xgrammar == 0.1.23; platform_machine == "x86_64" or platform_machine == "aarch64" or platform_machine == "arm64" +xgrammar == 0.1.24; platform_machine == "x86_64" or platform_machine == "aarch64" or platform_machine == "arm64" typing_extensions >= 4.10 filelock >= 3.16.1 # need to contain https://github.com/tox-dev/filelock/pull/317 partial-json-parser # used for parsing partial JSON outputs From 8c1d4acbfe70a1dffdb9a3db57b4d12329350295 Mon Sep 17 00:00:00 2001 From: "Li, Jiang" Date: Fri, 19 Sep 2025 15:27:22 +0800 Subject: [PATCH 138/518] [CPU] Disable oneDNN linear on non-x86 platforms (#25166) Signed-off-by: jiang1.li --- vllm/model_executor/layers/utils.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/vllm/model_executor/layers/utils.py b/vllm/model_executor/layers/utils.py index d2b135c1e4d4..a1675ffbaa95 100644 --- a/vllm/model_executor/layers/utils.py +++ b/vllm/model_executor/layers/utils.py @@ -7,7 +7,7 @@ from vllm import _custom_ops as ops from vllm import envs -from vllm.platforms import current_platform +from vllm.platforms import CpuArchEnum, current_platform from vllm.utils import direct_register_custom_op @@ -167,7 +167,8 @@ def dispatch_cpu_unquantized_gemm( if remove_weight: layer.weight = torch.nn.Parameter(torch.empty(0), requires_grad=False) - elif ops._supports_onednn: + elif (ops._supports_onednn + and current_platform.get_cpu_architecture() == CpuArchEnum.X86): origin_weight = layer.weight if remove_weight: layer.weight = torch.nn.Parameter(torch.empty(0), From 825fdb11add30237e7f592f1a132d3913cd632ec Mon Sep 17 00:00:00 2001 From: "Li, Jiang" Date: Fri, 19 Sep 2025 15:41:12 +0800 Subject: [PATCH 139/518] [Bugfix][CPU] Add placeholder to avoid import errors when using fused_moe ops on platforms without triton (#25137) Signed-off-by: jiang1.li --- vllm/model_executor/layers/fused_moe/__init__.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/vllm/model_executor/layers/fused_moe/__init__.py b/vllm/model_executor/layers/fused_moe/__init__.py index 6730f051e3d7..75f56cd01a4e 100644 --- a/vllm/model_executor/layers/fused_moe/__init__.py +++ b/vllm/model_executor/layers/fused_moe/__init__.py @@ -78,3 +78,12 @@ def get_config() -> Optional[dict[str, Any]]: "TritonOrDeepGemmExperts", "BatchedTritonOrDeepGemmExperts", ] +else: + # Some model classes directly use the custom ops. Add placeholders + # to avoid import errors. + def _raise_exception(method: str): + raise NotImplementedError( + f"{method} is not implemented as lack of triton.") + + fused_topk = lambda *args, **kwargs: _raise_exception("fused_topk") + fused_experts = lambda *args, **kwargs: _raise_exception("fused_experts") From f2718d2948e83319d83dbbade1883fef2302357e Mon Sep 17 00:00:00 2001 From: Isotr0py Date: Fri, 19 Sep 2025 15:44:56 +0800 Subject: [PATCH 140/518] [Misc] Cleanup test conftest for deprecated encoder-decoder models (#25231) Signed-off-by: Isotr0py --- tests/conftest.py | 138 ---------------------------------------------- 1 file changed, 138 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 0440e859fe02..9d433dedbf47 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -244,39 +244,6 @@ class DecoderPromptType(Enum): EMPTY_STR = 3 -@pytest.fixture -def example_encoder_decoder_prompts( -) -> dict[DecoderPromptType, list[ExplicitEncoderDecoderPrompt]]: - ''' - Returns an encoder prompt list and a decoder prompt list, wherein each pair - of same-index entries in both lists corresponds to an (encoder prompt, - decoder prompt) tuple. - - Returns: - - * Encoder prompt list - * Decoder prompt list (reverse of encoder prompt list) - ''' - - encoder_prompts = [] - for filename in _TEST_PROMPTS: - encoder_prompts += _read_prompts(filename) - - custom_decoder_prompts = encoder_prompts[::-1] - empty_str_decoder_prompts = [""] * len(encoder_prompts) - none_decoder_prompts = [None] * len(encoder_prompts) - - # NONE decoder prompt type - return { - DecoderPromptType.NONE: - zip_enc_dec_prompts(encoder_prompts, none_decoder_prompts), - DecoderPromptType.EMPTY_STR: - zip_enc_dec_prompts(encoder_prompts, empty_str_decoder_prompts), - DecoderPromptType.CUSTOM: - zip_enc_dec_prompts(encoder_prompts, custom_decoder_prompts), - } - - @pytest.fixture def example_long_prompts() -> list[str]: prompts = [] @@ -690,68 +657,6 @@ def generate_greedy_logprobs_limit( return [(output_ids, output_str, output_logprobs) for output_ids, output_str, output_logprobs in outputs] - def generate_encoder_decoder_greedy_logprobs_limit( - self, - encoder_decoder_prompts: list[ExplicitEncoderDecoderPrompt[str, str]], - max_tokens: int, - num_logprobs: Optional[int], - images: Optional[PromptImageInput] = None, - **kwargs: Any, - ) -> list[TokensTextLogprobs]: - ''' - Greedy logprobs generation for vLLM encoder/decoder models - ''' - - all_logprobs: list[list[dict[int, float]]] = [] - all_output_ids: list[list[int]] = [] - all_output_strs: list[str] = [] - - for i, (encoder_prompt, decoder_prompt) in enumerate( - to_enc_dec_tuple_list(encoder_decoder_prompts)): - processor_kwargs: dict[str, Any] = { - "text": encoder_prompt, - "return_tensors": "pt", - } - if images is not None and images[i] is not None: - processor_kwargs["images"] = images[i] - - encoder_inputs = self.processor(**processor_kwargs) - encoder_inputs = self.wrap_device(encoder_inputs) - - if decoder_prompt is None: - decoder_input_ids = None - else: - decoder_inputs = self.tokenizer(decoder_prompt, - return_tensors="pt") - decoder_input_ids = self.wrap_device(decoder_inputs.input_ids) - - output = self.model.generate( - decoder_input_ids=decoder_input_ids, - use_cache=True, - do_sample=False, - max_new_tokens=max_tokens, - output_hidden_states=True, - return_dict_in_generate=True, - **encoder_inputs, - **kwargs, - ) - - ( - seq_logprobs_lst, - output_len, - ) = self._hidden_states_to_logprobs(output.decoder_hidden_states, - num_logprobs) - - all_logprobs.append(seq_logprobs_lst) - seq_ids = output.sequences[0] - output_ids = seq_ids[-output_len:] - all_output_ids.append(output_ids.tolist()) - all_output_strs.append(self.tokenizer.decode(output_ids)) - - outputs = zip(all_output_ids, all_output_strs, all_logprobs) - return [(output_ids, output_str, output_logprobs) - for output_ids, output_str, output_logprobs in outputs] - def encode(self, prompts: list[str], *args, **kwargs) -> list[list[torch.Tensor]]: return self.model.encode(prompts, *args, **kwargs) @@ -940,26 +845,6 @@ def generate_w_logprobs( if sampling_params.prompt_logprobs is None else toks_str_logsprobs_prompt_logprobs) - def generate_encoder_decoder_w_logprobs( - self, - encoder_decoder_prompts: list[ExplicitEncoderDecoderPrompt[str, str]], - sampling_params: SamplingParams, - ) -> Union[list[TokensTextLogprobs], - list[TokensTextLogprobsPromptLogprobs]]: - ''' - Logprobs generation for vLLM encoder/decoder models - ''' - - assert sampling_params.logprobs is not None - req_outputs = self.llm.generate(encoder_decoder_prompts, - sampling_params=sampling_params) - toks_str_logsprobs_prompt_logprobs = ( - self._final_steps_generate_w_logprobs(req_outputs)) - # Omit prompt logprobs if not required by sampling params - return ([x[0:-1] for x in toks_str_logsprobs_prompt_logprobs] - if sampling_params.prompt_logprobs is None else - toks_str_logsprobs_prompt_logprobs) - def generate_greedy( self, prompts: Union[list[str], list[torch.Tensor]], @@ -1037,29 +922,6 @@ def generate_prompt_perplexity(self, prompts: list[str]) -> list[float]: return perplexities - def generate_encoder_decoder_greedy_logprobs( - self, - encoder_decoder_prompts: list[ExplicitEncoderDecoderPrompt[str, str]], - max_tokens: int, - num_logprobs: Optional[int], - num_prompt_logprobs: Optional[int] = None, - skip_special_tokens: bool = True, - ) -> Union[list[TokensTextLogprobs], - list[TokensTextLogprobsPromptLogprobs]]: - greedy_logprobs_params = SamplingParams( - temperature=0.0, - max_tokens=max_tokens, - logprobs=num_logprobs, - prompt_logprobs=(num_prompt_logprobs), - skip_special_tokens=skip_special_tokens, - ) - ''' - Greedy logprobs generation for vLLM encoder/decoder models - ''' - - return self.generate_encoder_decoder_w_logprobs( - encoder_decoder_prompts, greedy_logprobs_params) - def generate_beam_search( self, prompts: list[str], From a684c0124cb8ac04984b6fd621d99e1463016eac Mon Sep 17 00:00:00 2001 From: Yan Ma Date: Fri, 19 Sep 2025 16:45:06 +0800 Subject: [PATCH 141/518] [bugfix] fix MHA for models like OpenGVLab/InternVL3_5-38B (#25146) Signed-off-by: Yan Ma Co-authored-by: Isotr0py --- vllm/attention/layer.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/vllm/attention/layer.py b/vllm/attention/layer.py index 15c0ce33e965..8d5ebd93e063 100644 --- a/vllm/attention/layer.py +++ b/vllm/attention/layer.py @@ -430,9 +430,11 @@ def forward( key: torch.Tensor, value: torch.Tensor, ) -> torch.Tensor: - """Input shape: batch_size x seq_len x hidden_size""" - # TODO(Isotr0py): Use existing backend implementations and support FA3 - bsz, q_len, _ = query.size() + """Input shape: + (batch_size x seq_len x hidden_size) or + (batch_size x seq_len x num_heads x head_size) + """ + bsz, q_len = query.size()[:2] kv_len = key.size(1) query = query.view(bsz, q_len, self.num_heads, self.head_size) From cea91a32f2364d19d5e708026e84ce21a450c53d Mon Sep 17 00:00:00 2001 From: Isotr0py Date: Fri, 19 Sep 2025 18:27:49 +0800 Subject: [PATCH 142/518] [Kernel][Performance] Add Triton kernel for Qwen3-VL interleaved MRoPE (#25055) Signed-off-by: Isotr0py --- tests/kernels/core/test_mrope.py | 98 +++++++++++++------ .../layers/rotary_embedding/mrope.py | 36 ++++--- 2 files changed, 88 insertions(+), 46 deletions(-) diff --git a/tests/kernels/core/test_mrope.py b/tests/kernels/core/test_mrope.py index 3f2f330f6dc3..5a903438f5e9 100644 --- a/tests/kernels/core/test_mrope.py +++ b/tests/kernels/core/test_mrope.py @@ -1,9 +1,12 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project +from typing import NamedTuple import pytest import torch +from packaging.version import Version from transformers import AutoConfig +from transformers import __version__ as TRANSFORMERS_VERSION from vllm.model_executor.layers.rotary_embedding import get_rope from vllm.platforms import current_platform @@ -15,6 +18,7 @@ def generate_test_data(num_tokens: int, num_q_heads: int, num_kv_heads: int, head_size: int, max_position_embeddings: int, dtype: torch.dtype, device: torch.device): """Generate test data for given configuration.""" + current_platform.seed_everything(42) # Create 2D positions (3, num_tokens) for multimodal case positions = torch.randint(0, max_position_embeddings // 4, (3, num_tokens), @@ -33,22 +37,37 @@ def generate_test_data(num_tokens: int, num_q_heads: int, num_kv_heads: int, return positions, query, key -def unroll_model_tp_dict(model_tp_dict): - return [(model_name, tp_size) - for model_name, tp_sizes in model_tp_dict.items() - for tp_size in tp_sizes] - - -model_tp_dict = { - "Qwen/Qwen2-VL-7B-Instruct": [1, 2], - "Qwen/Qwen2-VL-72B-Instruct": [1, 2], - "Qwen/Qwen2.5-VL-72B-Instruct": [1, 2], - "zai-org/GLM-4.1V-9B-Thinking": [1, 2], -} - -# https://github.com/pytorch/pytorch/blob/main/torch/testing/_comparison.py#L1317 -dtype_atol_rtol_list = [ - [torch.bfloat16, 1e-2, 1.6e-2], +class MRoPETestInfo(NamedTuple): + model_name: str + # https://github.com/pytorch/pytorch/blob/main/torch/testing/_comparison.py#L1317 + atol: float = 1e-2 + rtol: float = 1.6e-2 + marks: list[pytest.MarkDecorator] = [] + + +TRANSFORMERS_BASE_VERSION = Version(TRANSFORMERS_VERSION).base_version + +MODELS_TO_TEST = [ + MRoPETestInfo(model_name="zai-org/GLM-4.1V-9B-Thinking"), + MRoPETestInfo(model_name="Qwen/Qwen2-VL-7B-Instruct"), + MRoPETestInfo(model_name="Qwen/Qwen2-VL-72B-Instruct"), + MRoPETestInfo(model_name="Qwen/Qwen2.5-VL-72B-Instruct"), + MRoPETestInfo( + model_name="Qwen/Qwen3-VL-4B-Instruct", + marks=[ + pytest.mark.skipif( + Version(TRANSFORMERS_BASE_VERSION) < Version("4.57.0"), + reason="Qwen3-VL only available after Transformers v4.57", + ) + ]), + MRoPETestInfo( + model_name="Qwen/Qwen3-VL-30B-A3B-Instruct", + marks=[ + pytest.mark.skipif( + Version(TRANSFORMERS_BASE_VERSION) < Version("4.57.0"), + reason="Qwen3-VL only available after Transformers v4.57", + ) + ]), ] num_tokens_list = [11, 8192] @@ -56,20 +75,29 @@ def unroll_model_tp_dict(model_tp_dict): @pytest.mark.skipif(not current_platform.is_cuda_alike(), reason="Skipping CUDA/ROCm only tests.") -@pytest.mark.parametrize("model_name, tp_size", - unroll_model_tp_dict(model_tp_dict)) -@pytest.mark.parametrize("dtype, atol, rtol", dtype_atol_rtol_list) +@pytest.mark.parametrize("model_info, model_name", [ + pytest.param(test_config, test_config.model_name, marks=test_config.marks) + for test_config in MODELS_TO_TEST +]) +@pytest.mark.parametrize("tp_size", [1, 2]) +@pytest.mark.parametrize("dtype", [torch.bfloat16]) @pytest.mark.parametrize("num_tokens", num_tokens_list) -def test_mrope(model_name, tp_size, dtype, atol, rtol, num_tokens): +def test_mrope(model_name: str, model_info: MRoPETestInfo, tp_size: int, + dtype: torch.dtype, num_tokens: int): + + atol = model_info.atol + rtol = model_info.rtol config = AutoConfig.from_pretrained(model_name) + config = config.get_text_config() # get the model config total_num_kv_heads = config.num_key_value_heads total_num_heads = config.num_attention_heads num_heads = total_num_heads // tp_size num_kv_heads = max(1, total_num_kv_heads // tp_size) - head_dim = config.hidden_size // total_num_heads + head_dim = (config.head_dim if hasattr(config, "head_dim") else + config.hidden_size // total_num_heads) is_neox_style = True rope_theta = config.rope_theta @@ -111,24 +139,30 @@ def test_mrope(model_name, tp_size, dtype, atol, rtol, num_tokens): @pytest.mark.skipif(not current_platform.is_cuda_alike(), reason="Skipping CUDA/ROCm only tests.") -@pytest.mark.parametrize( - "model_name, tp_size", - unroll_model_tp_dict({ - "Qwen/Qwen2-VL-7B-Instruct": [1, 2], - "zai-org/GLM-4.1V-9B-Thinking": [1, 2] - })) -@pytest.mark.parametrize("dtype, atol, rtol", dtype_atol_rtol_list) -@pytest.mark.parametrize("num_tokens", [4]) -def test_mrope_torch_compile_tracing(model_name, tp_size, dtype, atol, rtol, - num_tokens): +@pytest.mark.parametrize("model_info, model_name", [ + pytest.param(test_config, test_config.model_name, marks=test_config.marks) + for test_config in MODELS_TO_TEST +]) +@pytest.mark.parametrize("tp_size", [1, 2]) +@pytest.mark.parametrize("dtype", [torch.bfloat16]) +@pytest.mark.parametrize("num_tokens", num_tokens_list) +def test_mrope_torch_compile_tracing(model_name: str, + model_info: MRoPETestInfo, tp_size: int, + dtype: torch.dtype, num_tokens: int): + + atol = model_info.atol + rtol = model_info.rtol + config = AutoConfig.from_pretrained(model_name) + config = config.get_text_config() # get the model config total_num_kv_heads = config.num_key_value_heads total_num_heads = config.num_attention_heads num_heads = total_num_heads // tp_size num_kv_heads = max(1, total_num_kv_heads // tp_size) - head_dim = config.hidden_size // total_num_heads + head_dim = (config.head_dim if hasattr(config, "head_dim") else + config.hidden_size // total_num_heads) is_neox_style = True rope_theta = config.rope_theta max_position = config.max_position_embeddings diff --git a/vllm/model_executor/layers/rotary_embedding/mrope.py b/vllm/model_executor/layers/rotary_embedding/mrope.py index ef61dbc1a5ab..ccc59bbbe233 100644 --- a/vllm/model_executor/layers/rotary_embedding/mrope.py +++ b/vllm/model_executor/layers/rotary_embedding/mrope.py @@ -15,7 +15,7 @@ @triton.jit -def _triton_qwen2vl_mrope_forward( +def _triton_mrope_forward( q_ptr, k_ptr, cos, @@ -30,12 +30,14 @@ def _triton_qwen2vl_mrope_forward( pad_hd: tl.constexpr, mrope_section_t: tl.constexpr, mrope_section_h: tl.constexpr, + mrope_section_w: tl.constexpr, + is_interleaved: tl.constexpr, ): # Adapted from # https://github.com/linkedin/Liger-Kernel/blob/main/src/liger_kernel/ops/qwen2vl_mrope.py # This version supports flatten input tensors from vllm # and supports cos and sin cache with shape (3, num_tokens, head_dim // 2) - # instead of (3, bsz, seq_len, head_dim) + # instead of (3, bsz, seq_len, head_dim), also supports interleaved rotary pid = tl.program_id(0) # locate start address q_ptr = q_ptr + pid * (n_qh * hd) @@ -47,9 +49,6 @@ def _triton_qwen2vl_mrope_forward( # #################################################################### # Note: cos and sin now have shape (3, num_tokens, head_dim // 2) - t_end = mrope_section_t - h_end = t_end + mrope_section_h - # Updated stride calculation for half head_dim half_rd = rd // 2 t_cos = cos + pid * half_rd @@ -61,9 +60,18 @@ def _triton_qwen2vl_mrope_forward( # Updated offsets for half head_dim cos_offsets = tl.arange(0, pad_hd // 2) - t_mask = cos_offsets < t_end - h_mask = (t_end <= cos_offsets) & (cos_offsets < h_end) - w_mask = (h_end <= cos_offsets) & (cos_offsets < half_rd) + if is_interleaved: + h_mask = (((cos_offsets % 3) == 1) & + (cos_offsets <= 3 * mrope_section_h)) + w_mask = (((cos_offsets % 3) == 2) & + (cos_offsets <= 3 * mrope_section_w)) + t_mask = ~(h_mask | w_mask) + else: + t_end = mrope_section_t + h_end = t_end + mrope_section_h + t_mask = cos_offsets < mrope_section_t + h_mask = (t_end <= cos_offsets) & (cos_offsets < h_end) + w_mask = (h_end <= cos_offsets) & (cos_offsets < half_rd) t_cos_row = tl.load(t_cos + cos_offsets, mask=t_mask, other=0) h_cos_row = tl.load(h_cos + cos_offsets, mask=h_mask, other=0) @@ -131,6 +139,7 @@ def triton_mrope( mrope_section: list[int], head_size: int, rotary_dim: int, + mrope_interleaved: bool, ) -> tuple[torch.Tensor, torch.Tensor]: """Qwen2VL mrope kernel. @@ -158,7 +167,7 @@ def triton_mrope( cos = cos.contiguous() sin = sin.contiguous() - _triton_qwen2vl_mrope_forward[(n_row, )]( + _triton_mrope_forward[(n_row, )]( q, k, cos, @@ -173,6 +182,8 @@ def triton_mrope( pad_hd, mrope_section[0], mrope_section[1], + mrope_section[2], + mrope_interleaved, ) return q, k @@ -201,7 +212,7 @@ def __init__( is_neox_style: bool, dtype: torch.dtype, mrope_section: Optional[list[int]] = None, - mrope_interleaved: Optional[bool] = False, + mrope_interleaved: bool = False, ) -> None: # In Qwen2.5-VL, the maximum index value is related to the duration of # the input video. We enlarge max_position_embeddings to 4 times to get @@ -282,10 +293,6 @@ def forward_cuda( assert positions.ndim == 1 or positions.ndim == 2 assert key is not None - if self.mrope_interleaved: - # TODO: add triton implementation to support mrope-interleaved - return self.forward_native(positions, query, key) - num_tokens = positions.shape[-1] cos_sin = self.cos_sin_cache[positions] cos, sin = cos_sin.chunk(2, dim=-1) @@ -302,6 +309,7 @@ def forward_cuda( self.mrope_section, self.head_size, self.rotary_dim, + self.mrope_interleaved, ) return q.reshape(query_shape), k.reshape(key_shape) From 1dfea5f4a95df8d14b46433a479a28d56e60494c Mon Sep 17 00:00:00 2001 From: Roger Wang Date: Fri, 19 Sep 2025 03:46:16 -0700 Subject: [PATCH 143/518] [Bugfix][Perf] Misc fixes for Qwen3 VL (#25238) Signed-off-by: Roger Wang --- vllm/model_executor/models/qwen3_vl.py | 23 ++++++++++------------ vllm/model_executor/models/qwen3_vl_moe.py | 2 ++ 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/vllm/model_executor/models/qwen3_vl.py b/vllm/model_executor/models/qwen3_vl.py index 2c36dfbce7f6..c224b78e2c27 100644 --- a/vllm/model_executor/models/qwen3_vl.py +++ b/vllm/model_executor/models/qwen3_vl.py @@ -1075,6 +1075,8 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = "model"): config.text_config.hidden_size) for _ in range(self.deepstack_num_level) ] if self.use_deepstack else None + self.visual_dim = config.vision_config.out_hidden_size + self.multiscale_dim = self.visual_dim * self.deepstack_num_level def _get_deepstack_input_embeds(self, num_tokens: int) -> IntermediateTensors: @@ -1313,12 +1315,8 @@ def _compute_deepstack_embeds( ] multimodal_embeddings_cat = torch.cat(multimodal_embeddings, dim=0) - visual_dim = multimodal_embeddings_cat.shape[-1] // ( - self.deepstack_num_level + 1) - - main_dim, multi_dim = visual_dim, visual_dim * self.deepstack_num_level multimodal_embeddings_main, multimodal_embeddings_multiscale = torch.split( # noqa:E501 - multimodal_embeddings_cat, [main_dim, multi_dim], + multimodal_embeddings_cat, [self.visual_dim, self.multiscale_dim], dim=-1) multimodal_embeddings = torch.split(multimodal_embeddings_main, @@ -1340,10 +1338,8 @@ def _compute_deepstack_embeds( ], ) deepstack_input_embeds = deepstack_input_embeds.view( - inputs_embeds.shape[0], self.deepstack_num_level, - visual_dim).contiguous() - deepstack_input_embeds = deepstack_input_embeds.permute( - 1, 0, 2).contiguous() + inputs_embeds.shape[0], self.deepstack_num_level, self.visual_dim) + deepstack_input_embeds = deepstack_input_embeds.permute(1, 0, 2) return deepstack_input_embeds, multimodal_embeddings def get_input_embeddings( @@ -1353,9 +1349,10 @@ def get_input_embeddings( ) -> torch.Tensor: deepstack_input_embeds = None inputs_embeds = self.language_model.get_input_embeddings(input_ids) - if multimodal_embeddings is not None and self.use_deepstack: - deepstack_input_embeds, multimodal_embeddings = self._compute_deepstack_embeds( # noqa:E501 - input_ids, inputs_embeds, multimodal_embeddings) + if multimodal_embeddings is not None: + if self.use_deepstack: + deepstack_input_embeds, multimodal_embeddings = self._compute_deepstack_embeds( # noqa:E501 + input_ids, inputs_embeds, multimodal_embeddings) inputs_embeds = merge_multimodal_embeddings( input_ids, inputs_embeds, multimodal_embeddings, [self.config.image_token_id, self.config.video_token_id]) @@ -1531,4 +1528,4 @@ def get_mm_mapping(self) -> MultiModelKeys: language_model="language_model", connector="model.visual.merger", tower_model="model.visual.", - ) + ) \ No newline at end of file diff --git a/vllm/model_executor/models/qwen3_vl_moe.py b/vllm/model_executor/models/qwen3_vl_moe.py index d25bc71dcb59..625f94cf7ad7 100644 --- a/vllm/model_executor/models/qwen3_vl_moe.py +++ b/vllm/model_executor/models/qwen3_vl_moe.py @@ -344,3 +344,5 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): config.text_config.hidden_size) for _ in range(self.deepstack_num_level) ] if self.use_deepstack else None + self.visual_dim = config.vision_config.out_hidden_size + self.multiscale_dim = self.visual_dim * self.deepstack_num_level \ No newline at end of file From 058525b9973cabfe27b7ab34dad6dbcbb6859f74 Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Fri, 19 Sep 2025 12:02:55 +0100 Subject: [PATCH 144/518] Move `PoolerConfig` from `config/__init__.py` to `config/pooler.py` (#25181) Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- docs/models/pooling_models.md | 4 +- docs/models/supported_models.md | 4 +- .../openai_embedding_long_text/README.md | 2 +- .../openai_embedding_long_text/client.py | 4 +- .../openai_embedding_long_text/service.sh | 2 +- .../openai/test_embedding_long_text.py | 2 +- .../models/language/pooling/test_embedding.py | 2 +- .../pooling/test_mm_classifier_conversion.py | 3 +- ...y => test_pooler_config_init_behaviour.py} | 62 ++++--- tests/test_config.py | 18 +- vllm/config/__init__.py | 155 ++++-------------- vllm/config/pooler.py | 97 +++++++++++ vllm/engine/arg_utils.py | 7 +- vllm/entrypoints/llm.py | 10 +- 14 files changed, 193 insertions(+), 179 deletions(-) rename tests/models/language/pooling/{test_override_pooler_config.py => test_pooler_config_init_behaviour.py} (74%) create mode 100644 vllm/config/pooler.py diff --git a/docs/models/pooling_models.md b/docs/models/pooling_models.md index 0521a22c0702..50982d3d0d0f 100644 --- a/docs/models/pooling_models.md +++ b/docs/models/pooling_models.md @@ -59,7 +59,7 @@ enabling the corresponding APIs: #### Predefined models If the [Pooler][vllm.model_executor.layers.pooler.Pooler] defined by the model accepts `pooler_config`, -you can override some of its attributes via the `--override-pooler-config` option. +you can override some of its attributes via the `--pooler-config` option. #### Converted models @@ -75,7 +75,7 @@ the pooler assigned to each task has the following attributes by default: When loading [Sentence Transformers](https://huggingface.co/sentence-transformers) models, its Sentence Transformers configuration file (`modules.json`) takes priority over the model's defaults. -You can further customize this via the `--override-pooler-config` option, +You can further customize this via the `--pooler-config` option, which takes priority over both the model's and Sentence Transformers's defaults. ## Offline Inference diff --git a/docs/models/supported_models.md b/docs/models/supported_models.md index b67ebcbe3c81..3a6738a27be0 100644 --- a/docs/models/supported_models.md +++ b/docs/models/supported_models.md @@ -457,7 +457,7 @@ These models primarily support the [`LLM.embed`](./pooling_models.md#llmembed) A !!! note `ssmits/Qwen2-7B-Instruct-embed-base` has an improperly defined Sentence Transformers config. - You need to manually set mean pooling by passing `--override-pooler-config '{"pooling_type": "MEAN"}'`. + You need to manually set mean pooling by passing `--pooler-config '{"pooling_type": "MEAN"}'`. !!! note For `Alibaba-NLP/gte-Qwen2-*`, you need to enable `--trust-remote-code` for the correct tokenizer to be loaded. @@ -552,7 +552,7 @@ If your model is not in the above list, we will try to automatically convert the !!! important For process-supervised reward models such as `peiyi9979/math-shepherd-mistral-7b-prm`, the pooling config should be set explicitly, - e.g.: `--override-pooler-config '{"pooling_type": "STEP", "step_tag_id": 123, "returned_token_ids": [456, 789]}'`. + e.g.: `--pooler-config '{"pooling_type": "STEP", "step_tag_id": 123, "returned_token_ids": [456, 789]}'`. #### Token Classification diff --git a/examples/online_serving/openai_embedding_long_text/README.md b/examples/online_serving/openai_embedding_long_text/README.md index 04edc4680ea0..00d3ded3e41c 100644 --- a/examples/online_serving/openai_embedding_long_text/README.md +++ b/examples/online_serving/openai_embedding_long_text/README.md @@ -42,7 +42,7 @@ python client.py ### Server Configuration -The key parameters for chunked processing are in the `--override-pooler-config`: +The key parameters for chunked processing are in the `--pooler-config`: ```json { diff --git a/examples/online_serving/openai_embedding_long_text/client.py b/examples/online_serving/openai_embedding_long_text/client.py index 6e9838ac6d8d..4a3674bb3f2a 100644 --- a/examples/online_serving/openai_embedding_long_text/client.py +++ b/examples/online_serving/openai_embedding_long_text/client.py @@ -13,7 +13,7 @@ # MEAN pooling (processes all chunks, recommended for complete coverage) vllm serve intfloat/multilingual-e5-large \ - --override-pooler-config \ + --pooler-config \ '{"pooling_type": "MEAN", "normalize": true, ' \ '"enable_chunked_processing": true, "max_embed_len": 3072000}' \ --served-model-name multilingual-e5-large \ @@ -23,7 +23,7 @@ # OR CLS pooling (native CLS within chunks, MEAN aggregation across chunks) vllm serve BAAI/bge-large-en-v1.5 \ - --override-pooler-config \ + --pooler-config \ '{"pooling_type": "CLS", "normalize": true, ' \ '"enable_chunked_processing": true, "max_embed_len": 1048576}' \ --served-model-name bge-large-en-v1.5 \ diff --git a/examples/online_serving/openai_embedding_long_text/service.sh b/examples/online_serving/openai_embedding_long_text/service.sh index 56888c8aa0e4..1577de85f7ff 100644 --- a/examples/online_serving/openai_embedding_long_text/service.sh +++ b/examples/online_serving/openai_embedding_long_text/service.sh @@ -103,7 +103,7 @@ POOLER_CONFIG="{\"pooling_type\": \"$POOLING_TYPE\", \"normalize\": true, \"enab vllm serve "$MODEL_NAME" \ --tensor-parallel-size "$GPU_COUNT" \ --enforce-eager \ - --override-pooler-config "$POOLER_CONFIG" \ + --pooler-config "$POOLER_CONFIG" \ --served-model-name ${MODEL_CODE} \ --api-key "$API_KEY" \ --trust-remote-code \ diff --git a/tests/entrypoints/pooling/openai/test_embedding_long_text.py b/tests/entrypoints/pooling/openai/test_embedding_long_text.py index 2d3da238d245..ab5f765c28ed 100644 --- a/tests/entrypoints/pooling/openai/test_embedding_long_text.py +++ b/tests/entrypoints/pooling/openai/test_embedding_long_text.py @@ -216,7 +216,7 @@ def server_with_chunked_processing(): "--enforce-eager", "--max-model-len", "512", # Set smaller max_model_len to trigger chunking mechanism - '--override-pooler-config', + '--pooler-config', ('{"pooling_type": "MEAN", "normalize": true, ' '"enable_chunked_processing": true, "max_embed_len": 10000}'), "--gpu-memory-utilization", diff --git a/tests/models/language/pooling/test_embedding.py b/tests/models/language/pooling/test_embedding.py index d61ac08475e3..17513d1bb20d 100644 --- a/tests/models/language/pooling/test_embedding.py +++ b/tests/models/language/pooling/test_embedding.py @@ -58,7 +58,7 @@ def test_models( vllm_extra_kwargs = {} if model == "ssmits/Qwen2-7B-Instruct-embed-base": - vllm_extra_kwargs["override_pooler_config"] = \ + vllm_extra_kwargs["pooler_config"] = \ PoolerConfig(pooling_type="MEAN", normalize=False) max_model_len: Optional[int] = 512 diff --git a/tests/models/language/pooling/test_mm_classifier_conversion.py b/tests/models/language/pooling/test_mm_classifier_conversion.py index 166b953de43e..9814cad48a80 100644 --- a/tests/models/language/pooling/test_mm_classifier_conversion.py +++ b/tests/models/language/pooling/test_mm_classifier_conversion.py @@ -1,6 +1,7 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project +from vllm.config.pooler import PoolerConfig from vllm.platforms import current_platform @@ -99,7 +100,7 @@ def test_gemma_multimodal( convert="classify", load_format="auto", hf_overrides=update_config, - override_pooler_config={"pooling_type": "LAST"}, + pooler_config=PoolerConfig(pooling_type="LAST"), max_model_len=512, enforce_eager=True, tensor_parallel_size=1, diff --git a/tests/models/language/pooling/test_override_pooler_config.py b/tests/models/language/pooling/test_pooler_config_init_behaviour.py similarity index 74% rename from tests/models/language/pooling/test_override_pooler_config.py rename to tests/models/language/pooling/test_pooler_config_init_behaviour.py index 2b1c74652e76..9b3fbd6a6cd0 100644 --- a/tests/models/language/pooling/test_override_pooler_config.py +++ b/tests/models/language/pooling/test_pooler_config_init_behaviour.py @@ -24,18 +24,18 @@ def test_classify_models_using_activation( dtype: str, ) -> None: - with vllm_runner(model, - max_model_len=512, - dtype=dtype, - override_pooler_config=PoolerConfig( - activation=False)) as vllm_model: + with vllm_runner( + model, + max_model_len=512, + dtype=dtype, + pooler_config=PoolerConfig(activation=False)) as vllm_model: wo_activation_out = vllm_model.classify(example_prompts) - with vllm_runner(model, - max_model_len=512, - dtype=dtype, - override_pooler_config=PoolerConfig( - activation=True)) as vllm_model: + with vllm_runner( + model, + max_model_len=512, + dtype=dtype, + pooler_config=PoolerConfig(activation=True)) as vllm_model: w_activation_out = vllm_model.classify(example_prompts) for wo_activation, w_activation in zip(wo_activation_out, @@ -43,9 +43,8 @@ def test_classify_models_using_activation( wo_activation = torch.tensor(wo_activation) w_activation = torch.tensor(w_activation) - assert not torch.allclose( - wo_activation, w_activation, - atol=1e-2), "override_pooler_config is not working" + assert not torch.allclose(wo_activation, w_activation, + atol=1e-2), "pooler_config is not working" assert torch.allclose(softmax(wo_activation), w_activation, 1e-3 if dtype == "float" else 1e-2) @@ -65,23 +64,22 @@ def test_embed_models_using_normalize( dtype: str, ) -> None: - with vllm_runner(model, - max_model_len=512, - dtype=dtype, - override_pooler_config=PoolerConfig( - normalize=False)) as vllm_model: - wo_normalize = torch.tensor(vllm_model.embed(example_prompts)) - with vllm_runner( model, max_model_len=512, dtype=dtype, - override_pooler_config=PoolerConfig(normalize=True)) as vllm_model: + pooler_config=PoolerConfig(normalize=False)) as vllm_model: + wo_normalize = torch.tensor(vllm_model.embed(example_prompts)) + + with vllm_runner(model, + max_model_len=512, + dtype=dtype, + pooler_config=PoolerConfig(normalize=True)) as vllm_model: w_normalize = torch.tensor(vllm_model.embed(example_prompts)) assert not torch.allclose( wo_normalize, w_normalize, - atol=1e-2), "override_pooler_config normalize is not working" + atol=1e-2), "pooler_config normalize is not working" assert torch.allclose( F.normalize(wo_normalize, p=2, dim=-1), w_normalize, atol=1e-2), "w_normal should be close to normal(wo_normal)." @@ -102,18 +100,16 @@ def test_reward_models_using_softmax( dtype: str, ) -> None: - with vllm_runner( - model, - max_model_len=1024, - dtype=dtype, - override_pooler_config=PoolerConfig(softmax=False)) as vllm_model: + with vllm_runner(model, + max_model_len=1024, + dtype=dtype, + pooler_config=PoolerConfig(softmax=False)) as vllm_model: wo_softmax = vllm_model.encode(example_prompts) - with vllm_runner( - model, - max_model_len=1024, - dtype=dtype, - override_pooler_config=PoolerConfig(softmax=True)) as vllm_model: + with vllm_runner(model, + max_model_len=1024, + dtype=dtype, + pooler_config=PoolerConfig(softmax=True)) as vllm_model: w_softmax = vllm_model.encode(example_prompts) for wo, w in zip(wo_softmax, w_softmax): @@ -121,7 +117,7 @@ def test_reward_models_using_softmax( w = torch.tensor(w) assert not torch.allclose( - wo, w, atol=1e-2), "override_pooler_config softmax is not working" + wo, w, atol=1e-2), "pooler_config softmax is not working" assert torch.allclose( softmax(wo), w, atol=1e-2), "w_softmax should be close to softmax(wo_softmax)." diff --git a/tests/test_config.py b/tests/test_config.py index 6e37bdbee59e..0796447c079b 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -207,25 +207,19 @@ def test_get_pooling_config(): model_id = "sentence-transformers/all-MiniLM-L12-v2" model_config = ModelConfig(model_id) - pooling_config = model_config._init_pooler_config() - assert pooling_config is not None - - assert pooling_config.normalize - assert pooling_config.pooling_type == PoolingType.MEAN.name + assert model_config.pooler_config is not None + assert model_config.pooler_config.normalize + assert model_config.pooler_config.pooling_type == PoolingType.MEAN.name @pytest.mark.skipif(current_platform.is_rocm(), reason="Xformers backend is not supported on ROCm.") def test_get_pooling_config_from_args(): model_id = "sentence-transformers/all-MiniLM-L12-v2" - model_config = ModelConfig(model_id) - - override_pooler_config = PoolerConfig(pooling_type='CLS', normalize=True) - model_config.override_pooler_config = override_pooler_config + pooler_config = PoolerConfig(pooling_type="CLS", normalize=True) + model_config = ModelConfig(model_id, pooler_config=pooler_config) - pooling_config = model_config._init_pooler_config() - assert pooling_config is not None - assert asdict(pooling_config) == asdict(override_pooler_config) + assert asdict(model_config.pooler_config) == asdict(pooler_config) @pytest.mark.parametrize( diff --git a/vllm/config/__init__.py b/vllm/config/__init__.py index 25daca00c02d..45504e010d68 100644 --- a/vllm/config/__init__.py +++ b/vllm/config/__init__.py @@ -40,6 +40,7 @@ MultiModalConfig) from vllm.config.parallel import (DistributedExecutorBackend, EPLBConfig, ParallelConfig) +from vllm.config.pooler import PoolerConfig from vllm.config.scheduler import SchedulerConfig, SchedulerPolicy from vllm.config.speculative import SpeculativeConfig from vllm.config.structured_outputs import StructuredOutputsConfig @@ -406,13 +407,6 @@ class ModelConfig: hf_overrides: HfOverrides = field(default_factory=dict) """If a dictionary, contains arguments to be forwarded to the Hugging Face config. If a callable, it is called to update the HuggingFace config.""" - pooler_config: Optional["PoolerConfig"] = field(init=False) - """Pooler config which controls the behaviour of output pooling in pooling - models.""" - override_pooler_config: Optional[Union[dict, "PoolerConfig"]] = None - """Initialize non-default pooling config or override default pooling config - for the pooling model. e.g. `{"pooling_type": "mean", "normalize": false}`. - """ logits_processor_pattern: Optional[str] = None """Optional regex pattern specifying valid logits processor qualified names that can be passed with the `logits_processors` extra completion argument. @@ -448,6 +442,14 @@ class ModelConfig: io_processor_plugin: Optional[str] = None """IOProcessor plugin name to load at model startup""" + # Pooler config + pooler_config: Optional[PoolerConfig] = None + """Pooler config which controls the behaviour of output pooling in pooling + models.""" + override_pooler_config: Optional[Union[dict, PoolerConfig]] = None + """[DEPRECATED] Use `pooler_config` instead. This field will be removed in + v0.12.0 or v1.0.0, whichever is sooner.""" + # Multimodal config and init vars multimodal_config: Optional[MultiModalConfig] = None """Configuration for multimodal model. If `None`, this will be inferred @@ -709,7 +711,33 @@ def _task_to_convert(task: TaskOption) -> ConvertType: self._architecture = arch logger.info("Resolved architecture: %s", arch) - self.pooler_config = self._init_pooler_config() + # Init pooler config if needed + if self.runner_type == "pooling": + if self.override_pooler_config is not None: + logger.warning_once( + "`override_pooler_config` is deprecated and will be " + "removed in v0.12.0 or v1.0.0, whichever is sooner. " + "Please use `pooler_config` instead.") + + if isinstance(self.override_pooler_config, dict): + self.pooler_config = PoolerConfig( + **self.override_pooler_config) + else: + self.pooler_config = self.override_pooler_config + + if self.pooler_config is None: + self.pooler_config = PoolerConfig() + + base_config = get_pooling_config(self.model, self.revision) + if base_config is not None: + # Only set values that are not overridden by the user + for k, v in base_config.items(): + if getattr(self.pooler_config, k) is None: + setattr(self.pooler_config, k, v) + + default_pooling_type = self._model_info.default_pooling_type + if self.pooler_config.pooling_type is None: + self.pooler_config.pooling_type = default_pooling_type self.dtype: torch.dtype = _get_and_verify_dtype( self.model, @@ -869,29 +897,6 @@ def _get_encoder_config(self): return get_sentence_transformer_tokenizer_config( self.model, self.revision) - def _init_pooler_config(self) -> Optional["PoolerConfig"]: - if self.runner_type == "pooling": - if isinstance(self.override_pooler_config, dict): - self.override_pooler_config = PoolerConfig( - **self.override_pooler_config) - - pooler_config = self.override_pooler_config or PoolerConfig() - - base_config = get_pooling_config(self.model, self.revision) - if base_config is not None: - # Only set values that are not overridden by the user - for k, v in base_config.items(): - if getattr(pooler_config, k) is None: - setattr(pooler_config, k, v) - - default_pooling_type = self._model_info.default_pooling_type - if pooler_config.pooling_type is None: - pooler_config.pooling_type = default_pooling_type - - return pooler_config - - return None - def _verify_tokenizer_mode(self) -> None: tokenizer_mode = cast(TokenizerMode, self.tokenizer_mode.lower()) if tokenizer_mode not in get_args(TokenizerMode): @@ -1833,94 +1838,6 @@ def __post_init__(self): self.device = torch.device(self.device_type) -@config -@dataclass -class PoolerConfig: - """Controls the behavior of output pooling in pooling models.""" - - pooling_type: Optional[str] = None - """ - The pooling method of the pooling model. This should be a key in - [`vllm.model_executor.layers.pooler.PoolingType`][]. - """ - - ## for embeddings models - normalize: Optional[bool] = None - """ - Whether to normalize the embeddings outputs. Defaults to True. - """ - dimensions: Optional[int] = None - """ - Reduce the dimensions of embeddings if model - support matryoshka representation. Defaults to None. - """ - enable_chunked_processing: Optional[bool] = None - """ - Whether to enable chunked processing for long inputs that exceed the model's - maximum position embeddings. When enabled, long inputs will be split into - chunks, processed separately, and then aggregated using weighted averaging. - This allows embedding models to handle arbitrarily long text without CUDA - errors. Defaults to False. - """ - max_embed_len: Optional[int] = None - """ - Maximum input length allowed for embedding generation. When set, allows - inputs longer than max_embed_len to be accepted for embedding models. - When an input exceeds max_embed_len, it will be handled according to - the original max_model_len validation logic. - Defaults to None (i.e. set to max_model_len). - """ - - ## for classification models - activation: Optional[bool] = None - """ - Whether to apply activation function to the classification outputs. - Defaults to True. - """ - logit_bias: Optional[float] = None - """ - If provided, apply classification logit biases. Defaults to None. - """ - - ## for reward models - softmax: Optional[bool] = None - """ - Whether to apply softmax to the reward outputs. - Defaults to True. - """ - step_tag_id: Optional[int] = None - """ - If set, only the score corresponding to the ``step_tag_id`` in the - generated sentence should be returned. Otherwise, the scores for all tokens - are returned. - """ - returned_token_ids: Optional[list[int]] = None - """ - A list of indices for the vocabulary dimensions to be extracted, - such as the token IDs of ``good_token`` and ``bad_token`` in the - ``math-shepherd-mistral-7b-prm`` model. - """ - - def compute_hash(self) -> str: - """ - WARNING: Whenever a new field is added to this config, - ensure that it is included in the factors list if - it affects the computation graph. - - Provide a hash that uniquely identifies all the configs - that affect the structure of the computation - graph from input ids/embeddings to the final hidden states, - excluding anything before input ids/embeddings and after - the final hidden states. - """ - # no factors to consider. - # this config will not affect the computation graph. - factors: list[Any] = [] - hash_str = hashlib.md5(str(factors).encode(), - usedforsecurity=False).hexdigest() - return hash_str - - _STR_DTYPE_TO_TORCH_DTYPE = { "half": torch.float16, "float16": torch.float16, diff --git a/vllm/config/pooler.py b/vllm/config/pooler.py new file mode 100644 index 000000000000..85b5a1ace85f --- /dev/null +++ b/vllm/config/pooler.py @@ -0,0 +1,97 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project + +import hashlib +from typing import Any, Optional + +from pydantic.dataclasses import dataclass + +from vllm.config.utils import config + + +@config +@dataclass +class PoolerConfig: + """Controls the behavior of output pooling in pooling models.""" + + pooling_type: Optional[str] = None + """ + The pooling method of the pooling model. This should be a key in + [`vllm.model_executor.layers.pooler.PoolingType`][]. + """ + + ## for embeddings models + normalize: Optional[bool] = None + """ + Whether to normalize the embeddings outputs. Defaults to True. + """ + dimensions: Optional[int] = None + """ + Reduce the dimensions of embeddings if model + support matryoshka representation. Defaults to None. + """ + enable_chunked_processing: Optional[bool] = None + """ + Whether to enable chunked processing for long inputs that exceed the model's + maximum position embeddings. When enabled, long inputs will be split into + chunks, processed separately, and then aggregated using weighted averaging. + This allows embedding models to handle arbitrarily long text without CUDA + errors. Defaults to False. + """ + max_embed_len: Optional[int] = None + """ + Maximum input length allowed for embedding generation. When set, allows + inputs longer than max_embed_len to be accepted for embedding models. + When an input exceeds max_embed_len, it will be handled according to + the original max_model_len validation logic. + Defaults to None (i.e. set to max_model_len). + """ + + ## for classification models + activation: Optional[bool] = None + """ + Whether to apply activation function to the classification outputs. + Defaults to True. + """ + logit_bias: Optional[float] = None + """ + If provided, apply classification logit biases. Defaults to None. + """ + + ## for reward models + softmax: Optional[bool] = None + """ + Whether to apply softmax to the reward outputs. + Defaults to True. + """ + step_tag_id: Optional[int] = None + """ + If set, only the score corresponding to the ``step_tag_id`` in the + generated sentence should be returned. Otherwise, the scores for all tokens + are returned. + """ + returned_token_ids: Optional[list[int]] = None + """ + A list of indices for the vocabulary dimensions to be extracted, + such as the token IDs of ``good_token`` and ``bad_token`` in the + ``math-shepherd-mistral-7b-prm`` model. + """ + + def compute_hash(self) -> str: + """ + WARNING: Whenever a new field is added to this config, + ensure that it is included in the factors list if + it affects the computation graph. + + Provide a hash that uniquely identifies all the configs + that affect the structure of the computation + graph from input ids/embeddings to the final hidden states, + excluding anything before input ids/embeddings and after + the final hidden states. + """ + # no factors to consider. + # this config will not affect the computation graph. + factors: list[Any] = [] + hash_str = hashlib.md5(str(factors).encode(), + usedforsecurity=False).hexdigest() + return hash_str diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index 63282c425350..27462b8fa0da 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -441,6 +441,7 @@ class EngineArgs: scheduling_policy: SchedulerPolicy = SchedulerConfig.policy scheduler_cls: Union[str, Type[object]] = SchedulerConfig.scheduler_cls + pooler_config: Optional[PoolerConfig] = ModelConfig.pooler_config override_pooler_config: Optional[Union[dict, PoolerConfig]] = \ ModelConfig.override_pooler_config compilation_config: CompilationConfig = \ @@ -579,8 +580,11 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser: help=model_kwargs["hf_token"]["help"]) model_group.add_argument("--hf-overrides", **model_kwargs["hf_overrides"]) + model_group.add_argument("--pooler-config", + **model_kwargs["pooler_config"]) model_group.add_argument("--override-pooler-config", - **model_kwargs["override_pooler_config"]) + **model_kwargs["override_pooler_config"], + deprecated=True) model_group.add_argument("--logits-processor-pattern", **model_kwargs["logits_processor_pattern"]) model_group.add_argument("--generation-config", @@ -1031,6 +1035,7 @@ def create_model_config(self) -> ModelConfig: mm_shm_cache_max_object_size_mb=self. mm_shm_cache_max_object_size_mb, mm_encoder_tp_mode=self.mm_encoder_tp_mode, + pooler_config=self.pooler_config, override_pooler_config=self.override_pooler_config, logits_processor_pattern=self.logits_processor_pattern, generation_config=self.generation_config, diff --git a/vllm/entrypoints/llm.py b/vllm/entrypoints/llm.py index df6b16c73d6e..e21bfce0ab08 100644 --- a/vllm/entrypoints/llm.py +++ b/vllm/entrypoints/llm.py @@ -151,9 +151,11 @@ class LLM: multi-modal processor obtained from `AutoProcessor.from_pretrained`. The available overrides depend on the model that is being run. For example, for Phi-3-Vision: `{"num_crops": 4}`. - override_pooler_config: Initialize non-default pooling config or - override default pooling config for the pooling model. - e.g. `PoolerConfig(pooling_type="mean", normalize=False)`. + pooler_config: Initialize non-default pooling config for the pooling + model. e.g. `PoolerConfig(pooling_type="mean", normalize=False)`. + override_pooler_config: [DEPRECATED] Use `pooler_config` instead. This + argument is deprecated and will be removed in v0.12.0 or v1.0.0, + whichever is sooner. compilation_config: Either an integer or a dictionary. If it is an integer, it is used as the level of compilation optimization. If it is a dictionary, it can specify the full compilation configuration. @@ -191,6 +193,7 @@ def __init__( hf_token: Optional[Union[bool, str]] = None, hf_overrides: Optional[HfOverrides] = None, mm_processor_kwargs: Optional[dict[str, Any]] = None, + pooler_config: Optional[PoolerConfig] = None, override_pooler_config: Optional[PoolerConfig] = None, structured_outputs_config: Optional[Union[dict[ str, Any], StructuredOutputsConfig]] = None, @@ -288,6 +291,7 @@ def __init__( hf_token=hf_token, hf_overrides=hf_overrides, mm_processor_kwargs=mm_processor_kwargs, + pooler_config=pooler_config, override_pooler_config=override_pooler_config, structured_outputs_config=structured_outputs_instance, compilation_config=compilation_config_instance, From a3d087adecadd4f6f83b72181ade40ec2de92aef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicol=C3=B2=20Lucchesi?= Date: Fri, 19 Sep 2025 13:09:14 +0200 Subject: [PATCH 145/518] [P/D][Nixl] Introduce `KVTransferMetrics` and aggregation strategy (#22188) Signed-off-by: NickLucche --- .../kv_connector/unit/test_nixl_connector.py | 211 +++++++++++++++++- .../kv_transfer/kv_connector/utils.py | 21 +- .../kv_transfer/kv_connector/v1/base.py | 22 +- .../kv_transfer/kv_connector/v1/metrics.py | 100 +++++++++ .../kv_connector/v1/multi_connector.py | 68 +++++- .../kv_connector/v1/nixl_connector.py | 68 +++++- vllm/v1/core/sched/scheduler.py | 27 ++- vllm/v1/metrics/loggers.py | 8 +- vllm/v1/metrics/stats.py | 3 +- vllm/v1/outputs.py | 11 +- .../worker/kv_connector_model_runner_mixin.py | 11 +- 11 files changed, 525 insertions(+), 25 deletions(-) create mode 100644 vllm/distributed/kv_transfer/kv_connector/v1/metrics.py diff --git a/tests/v1/kv_connector/unit/test_nixl_connector.py b/tests/v1/kv_connector/unit/test_nixl_connector.py index 040b44dc5d2c..6e58d158c3f4 100644 --- a/tests/v1/kv_connector/unit/test_nixl_connector.py +++ b/tests/v1/kv_connector/unit/test_nixl_connector.py @@ -18,12 +18,18 @@ from vllm import LLM from vllm.config import KVTransferConfig +from vllm.distributed.kv_transfer.kv_connector.utils import KVOutputAggregator +from vllm.distributed.kv_transfer.kv_connector.v1.metrics import ( + KVConnectorStats) +from vllm.distributed.kv_transfer.kv_connector.v1.multi_connector import ( + MultiKVConnectorStats) from vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector import ( KVConnectorRole, NixlAgentMetadata, NixlConnector, NixlConnectorMetadata, - NixlConnectorWorker) + NixlConnectorWorker, NixlKVConnectorStats) from vllm.forward_context import ForwardContext from vllm.sampling_params import SamplingParams from vllm.v1.attention.backends.flash_attn import FlashAttentionBackend +from vllm.v1.outputs import KVConnectorOutput, ModelRunnerOutput from .utils import create_request, create_scheduler, create_vllm_config @@ -475,6 +481,209 @@ def test_handshake_fails_on_kv_cache_layout_mismatch(self, dist_init): # NOTE: resource cleanup in mp backend is a bit finicky, so the order in which # we put here is important. First run ray, it will clean up the resources, then # the rest of the tests. +@patch( + "vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector.NixlWrapper", + FakeNixlWrapper) +def test_kv_connector_stats(dist_init): + """Test that KV transfer stats are properly recorded and retrieved.""" + vllm_config = create_vllm_config() + + # Test worker role in decode server. + connector = NixlConnector(vllm_config, KVConnectorRole.WORKER) + connector.connector_worker = FakeNixlConnectorWorker(vllm_config, + connector.engine_id, + hand_shake_latency=0) + + # Verify that xfer_stats starts empty + initial_stats = connector.get_kv_connector_stats() + assert initial_stats is None + + # Create transfer metadata + request_id = "test_req_for_stats" + metadata = NixlConnectorMetadata() + metadata.add_new_req(request_id=request_id, + local_block_ids=[1, 2, 3], + kv_transfer_params={ + "remote_block_ids": [4, 5, 6], + "remote_engine_id": + FakeNixlConnectorWorker.REMOTE_ENGINE_ID, + "remote_host": "localhost", + "remote_port": 1234, + "remote_tp_size": 1, + }) + connector.bind_connector_metadata(metadata) + + # Start the transfer + dummy_ctx = ForwardContext( + no_compile_layers={}, + attn_metadata={}, + virtual_engine=0, + ) + connector.start_load_kv(dummy_ctx) + + # Verify stats are recorded after transfer is complete + max_iterations = 2 + # Clear metadata before start_load_kv to prevent reprocessing same request + connector.bind_connector_metadata(NixlConnectorMetadata()) + for _ in range(max_iterations): + # Need to call start_load_kv to process completed handshakes + connector.start_load_kv(dummy_ctx) + _, done_recving = connector.get_finished(finished_req_ids=set()) + if len(done_recving) > 0 and request_id in done_recving: + break + time.sleep( + 0.1) # Small delay to allow background handshake to complete + else: + assert "Transfer did not complete within expected iterations" + + # Now check that stats were recorded + stats_after_transfer = connector.get_kv_connector_stats() + assert isinstance(stats_after_transfer, NixlKVConnectorStats) + + # Verify stats values are recorded + assert not stats_after_transfer.is_empty() + assert stats_after_transfer.data["num_successful_transfers"] == 1 + + # Verify stats are reset after retrieval + stats_after_reset = connector.get_kv_connector_stats() + assert stats_after_reset is None + + +def test_kv_connector_stats_aggregation(): + """ + Test KV transfer stats aggregation across TP ranks using + KVOutputAggregator (used by MultiprocExecutor). + """ + + # Create KVOutputAggregator for 3 workers (simulating TP=3), same thing + # done in MultiprocExecutor.execute_model + aggregator = KVOutputAggregator(world_size=3) + + # Create stats for multiple workers with different transfer patterns + worker1_stats = NixlKVConnectorStats() + worker2_stats = NixlKVConnectorStats() + worker3_stats = NixlKVConnectorStats() + + # Record different transfers on each worker + # Worker 1: 2 transfers + worker1_stats.record_transfer() + worker1_stats.record_transfer() + + # Worker 2: 1 transfer + worker2_stats.record_transfer() + + # Worker 3: 3 transfers + worker3_stats.record_transfer() + worker3_stats.record_transfer() + worker3_stats.record_transfer() + + # Create ModelRunnerOutput instances for each worker + worker_outputs = [] + for i, worker_stats in enumerate( + [worker1_stats, worker2_stats, worker3_stats]): + output = ModelRunnerOutput( + req_ids=[f"req_{i}"], + req_id_to_index={f"req_{i}": 0}, + sampled_token_ids=[[123]], # dummy token + logprobs=None, + prompt_logprobs_dict={}, + pooler_output=[None], + kv_connector_output=KVConnectorOutput( + finished_sending=set([f"req_{i}_send"]) + if i < 2 else None, # Workers 0,1 finished sending + finished_recving=set([f"req_{i}_recv"]) + if i > 0 else None, # Workers 1,2 finished receiving + kv_connector_stats=worker_stats, + )) + worker_outputs.append(output) + + # Use the real aggregation mechanism (like MultiprocExecutor.execute_model) + aggregated_output = aggregator.aggregate(worker_outputs, output_rank=0) + kv_connector_stats = \ + aggregated_output.kv_connector_output.kv_connector_stats + assert isinstance(kv_connector_stats, NixlKVConnectorStats) + # Number of total transfers across all workers. + assert kv_connector_stats.data["num_successful_transfers"] == 6 + + +def test_multi_kv_connector_stats_aggregation(): + """ + Test MultiKVConnectorStats aggregation across TP ranks using + KVOutputAggregator (used by MultiprocExecutor). + """ + + aggregator = KVOutputAggregator(world_size=3) + + from dataclasses import dataclass + + @dataclass + class FooKVConnectorStats(KVConnectorStats): + + def reset(self): + self.data = {"num_foo_transfers": 0} + + def record_transfer(self): + if "num_foo_transfers" not in self.data: + self.data["num_foo_transfers"] = 0 + self.data["num_foo_transfers"] += 1 + + def is_empty(self) -> bool: + return self.data["num_foo_transfers"] == 0 + + def aggregate(self, + other: "FooKVConnectorStats") -> "FooKVConnectorStats": + if not other.is_empty(): + self.data["num_foo_transfers"] += other.data[ + "num_foo_transfers"] + return self + + def make_multi_stats(nixl_count: int, + foo_count: int) -> MultiKVConnectorStats: + data: dict[str, KVConnectorStats] = {} + if nixl_count > 0: + nixl_stats = NixlKVConnectorStats() + for _ in range(nixl_count): + nixl_stats.record_transfer() + data["NixlConnector"] = nixl_stats + if foo_count > 0: + foo_stats = FooKVConnectorStats() + for _ in range(foo_count): + foo_stats.record_transfer() + data["FooConnector"] = foo_stats + return MultiKVConnectorStats(data=data) + + # Create heterogeneous stats across 3 workers + worker_patterns = [(2, 1), (3, 0), (0, 5)] # (Nixl, Foo) + + worker_outputs: list[ModelRunnerOutput] = [] + for i, (nixl, foo) in enumerate(worker_patterns): + stats = make_multi_stats(nixl, foo) + output = ModelRunnerOutput( + req_ids=[f"req_{i}"], + req_id_to_index={f"req_{i}": 0}, + sampled_token_ids=[[123]], + logprobs=None, + prompt_logprobs_dict={}, + pooler_output=[None], + kv_connector_output=KVConnectorOutput( + finished_sending=set([f"req_{i}_send"]) if i < 2 else None, + finished_recving=set([f"req_{i}_recv"]) if i > 0 else None, + kv_connector_stats=stats, + ), + ) + worker_outputs.append(output) + + aggregated_output = aggregator.aggregate(worker_outputs, output_rank=0) + kv_connector_stats = \ + aggregated_output.kv_connector_output.kv_connector_stats + assert isinstance(kv_connector_stats, MultiKVConnectorStats) + + # Validate per-connector totals across workers + assert kv_connector_stats["NixlConnector"].data[ + "num_successful_transfers"] == 5 + assert kv_connector_stats["FooConnector"].data["num_foo_transfers"] == 6 + + @pytest.mark.parametrize("distributed_executor_backend", ["ray", None]) @patch( "vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector.NixlWrapper", diff --git a/vllm/distributed/kv_transfer/kv_connector/utils.py b/vllm/distributed/kv_transfer/kv_connector/utils.py index f4dc248a1279..911d77ba36fa 100644 --- a/vllm/distributed/kv_transfer/kv_connector/utils.py +++ b/vllm/distributed/kv_transfer/kv_connector/utils.py @@ -129,7 +129,7 @@ def __init__(self, world_size: int): def aggregate(self, outputs: list[ModelRunnerOutput], output_rank: int = 0) -> ModelRunnerOutput: - # aggregate kv_connector_output from all workers + # Aggregate kv_connector_output from all workers def update_finished_set(req_ids: Optional[set[str]], remaining_count_dict: dict[str, int], @@ -142,8 +142,9 @@ def update_finished_set(req_ids: Optional[set[str]], finished_sending = set[str]() finished_recving = set[str]() - for output in outputs: - output = output.kv_connector_output + aggregated_kv_connector_stats = None + for model_runner_output in outputs: + output = model_runner_output.kv_connector_output if not output: continue update_finished_set(output.finished_sending, @@ -151,12 +152,26 @@ def update_finished_set(req_ids: Optional[set[str]], update_finished_set(output.finished_recving, self._recv_remaining_count, finished_recving) + # Aggregate kv_connector_stats from all workers. + if aggregated_kv_connector_stats is None: + # Use the first worker's kv_connector_stats as accumulator. + aggregated_kv_connector_stats = output.kv_connector_stats + elif kv_connector_stats := output.kv_connector_stats: + if aggregated_kv_connector_stats is None: + aggregated_kv_connector_stats = kv_connector_stats + else: + assert isinstance(aggregated_kv_connector_stats, + type(kv_connector_stats)) + aggregated_kv_connector_stats = \ + aggregated_kv_connector_stats.aggregate(kv_connector_stats) + # select output of the worker specified by output_rank output = outputs[output_rank] output.kv_connector_output = KVConnectorOutput( finished_sending=finished_sending or None, finished_recving=finished_recving or None, + kv_connector_stats=aggregated_kv_connector_stats or None, ) return output diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/base.py b/vllm/distributed/kv_transfer/kv_connector/v1/base.py index 70c07eac6304..184d0a62f2c3 100644 --- a/vllm/distributed/kv_transfer/kv_connector/v1/base.py +++ b/vllm/distributed/kv_transfer/kv_connector/v1/base.py @@ -49,6 +49,8 @@ from vllm.attention.backends.abstract import AttentionMetadata from vllm.config import VllmConfig from vllm.distributed.kv_events import KVCacheEvent + from vllm.distributed.kv_transfer.kv_connector.v1.metrics import ( + KVConnectorStats) from vllm.forward_context import ForwardContext from vllm.v1.core.kv_cache_manager import KVCacheBlocks from vllm.v1.request import Request @@ -235,6 +237,12 @@ def shutdown(self): """ return None + def get_kv_connector_stats(self) -> Optional["KVConnectorStats"]: + """ + Get the KV connector stats collected during the last interval. + """ + return None + # ============================== # Scheduler-side methods # ============================== @@ -365,4 +373,16 @@ def get_finished_count(self) -> Optional[int]: int: expected sending or receiving completion count. """ - return None \ No newline at end of file + return None + + @classmethod + def build_kv_connector_stats( + cls, + data: Optional[dict[str, + Any]] = None) -> Optional["KVConnectorStats"]: + """ + KVConnectorStats resolution method. This method allows dynamically + registered connectors to return their own KVConnectorStats object, + which can implement custom aggregation logic on the data dict. + """ + return None diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/metrics.py b/vllm/distributed/kv_transfer/kv_connector/v1/metrics.py new file mode 100644 index 000000000000..e40007230ba4 --- /dev/null +++ b/vllm/distributed/kv_transfer/kv_connector/v1/metrics.py @@ -0,0 +1,100 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project +from dataclasses import dataclass, field +from typing import Any, Optional, Union + +from vllm.config.kv_transfer import KVTransferConfig +from vllm.distributed.kv_transfer.kv_connector.factory import ( + KVConnectorFactory) +from vllm.distributed.kv_transfer.kv_transfer_state import ( + has_kv_transfer_group) +from vllm.logger import init_logger + +logger = init_logger(__name__) + + +@dataclass +class KVConnectorStats: + """ + Base class for KV Connector Stats, a container for transfer performance + metrics or otherwise important telemetry from the connector. + All sub-classes need to be serializable as stats are sent from worker to + logger process. + """ + data: dict[str, Any] = field(default_factory=dict) + + def reset(self): + """Reset the stats, clear the state.""" + raise NotImplementedError + + def aggregate(self, other: "KVConnectorStats") -> "KVConnectorStats": + """ + Aggregate stats with another `KVConnectorStats` object. + """ + raise NotImplementedError + + def reduce(self) -> dict[str, Union[int, float]]: + """ + Reduce the observations collected during a time interval to one or + more representative values (eg avg/median/sum of the series). + This is meant to be called by the logger to produce a summary of the + stats for the last time interval. + """ + raise NotImplementedError + + def is_empty(self) -> bool: + """Return True if the stats are empty.""" + raise NotImplementedError + + +class KVConnectorLogging: + + def __init__(self, kv_tranfer_config: KVTransferConfig): + # This should be called on frontend process. + assert not has_kv_transfer_group() + # Instantiate the connector's stats class. + if kv_tranfer_config and kv_tranfer_config.kv_connector: + self.connector_cls = KVConnectorFactory.get_connector_class( + kv_tranfer_config) + self.reset() + + def reset(self): + self.transfer_stats_accumulator: Optional[KVConnectorStats] = None + + def observe(self, transfer_stats_data: dict[str, Any]): + # Should not be called when a KVConnector is not configured. + assert self.connector_cls is not None + # Called periodically when connector syncs with the scheduler. + # Note that this is not the same as the logging interval. + # We expect transfer_stats_data to be aggregated across all workers and + # consist of observations from a single connector or a MultiConnector. + transfer_stats = self.connector_cls.build_kv_connector_stats( + transfer_stats_data) + if transfer_stats is None: + logger.warning_once( + "The connector %s is collecting stats but " + "does not implement the " + "`build_kv_connector_stats` method. " + "Stats will not be logged.", self.connector_cls) + return + + if self.transfer_stats_accumulator is None: + self.transfer_stats_accumulator = transfer_stats + else: + # Accumulate last interval stats. + self.transfer_stats_accumulator = \ + self.transfer_stats_accumulator.aggregate(transfer_stats) + + def log(self, log_fn=logger.info): + """Log transfer metrics periodically, similar to throughput logging""" + if (self.transfer_stats_accumulator + and not self.transfer_stats_accumulator.is_empty()): + # Produce a single cumulative stats object for the last time + # interval from the recorded observations. + xfer_metrics = self.transfer_stats_accumulator.reduce() + xfer_metrics_str = ", ".join(f"{k}={v}" + for k, v in xfer_metrics.items()) + log_fn("KV Transfer metrics: %s", xfer_metrics_str) + + # Reset metrics for next interval + self.reset() \ No newline at end of file diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/multi_connector.py b/vllm/distributed/kv_transfer/kv_connector/v1/multi_connector.py index 616d158d6767..6836a71e58d6 100644 --- a/vllm/distributed/kv_transfer/kv_connector/v1/multi_connector.py +++ b/vllm/distributed/kv_transfer/kv_connector/v1/multi_connector.py @@ -9,19 +9,21 @@ from vllm.config import VllmConfig from vllm.config.kv_transfer import KVTransferConfig -from vllm.distributed.kv_events import KVCacheEvent from vllm.distributed.kv_transfer.kv_connector.factory import ( KVConnectorFactory) from vllm.distributed.kv_transfer.kv_connector.v1.base import ( KVConnectorBase_V1, KVConnectorMetadata, KVConnectorRole) +from vllm.distributed.kv_transfer.kv_connector.v1.metrics import ( + KVConnectorStats) from vllm.logger import init_logger -from vllm.v1.core.kv_cache_manager import KVCacheBlocks from vllm.v1.core.sched.output import SchedulerOutput from vllm.v1.outputs import KVConnectorOutput if TYPE_CHECKING: from vllm.attention.backends.abstract import AttentionMetadata + from vllm.distributed.kv_events import KVCacheEvent from vllm.forward_context import ForwardContext + from vllm.v1.core.kv_cache_manager import KVCacheBlocks from vllm.v1.request import Request logger = init_logger(__name__) @@ -33,6 +35,43 @@ class MultiKVConnectorMetadata(KVConnectorMetadata): extra_async_saves: Optional[dict[str, int]] = None +@dataclass +class MultiKVConnectorStats(KVConnectorStats): + """ + Maintain a dict of KVConnectorStats objects, one for each connector. + This is used to aggregate the stats from all connectors separately. + """ + + def aggregate(self, other: KVConnectorStats) -> KVConnectorStats: + for connector_id, stats in other.data.items(): + if connector_id not in self.data: + self[connector_id] = stats + else: + assert isinstance(stats, type(self.data[connector_id])) + self[connector_id] = self[connector_id].aggregate(stats) + return self + + def reset(self): + for stats in self.data.values(): + stats.reset() + + def reduce(self) -> dict[str, Any]: + # TODO (NickLucche) Adjust for logging on separate lines + return { + connector_id: stats.reduce() + for connector_id, stats in self.data.items() + } + + def is_empty(self) -> bool: + return all(stats.is_empty() for stats in self.data.values()) + + def __getitem__(self, connector_id: str) -> KVConnectorStats: + return self.data[connector_id] + + def __setitem__(self, connector_id: str, stats: KVConnectorStats): + self.data[connector_id] = stats + + class MultiConnector(KVConnectorBase_V1): """ A wrapper for using multiple KVConnectors at the same time. @@ -46,6 +85,7 @@ class MultiConnector(KVConnectorBase_V1): def __init__(self, vllm_config: "VllmConfig", role: KVConnectorRole): super().__init__(vllm_config=vllm_config, role=role) self._connectors: list[KVConnectorBase_V1] = [] + self._ktc_kv_transfer_config = [] ktcs = vllm_config.kv_transfer_config.kv_connector_extra_config.get( "connectors") assert ktcs is not None @@ -57,6 +97,7 @@ def __init__(self, vllm_config: "VllmConfig", role: KVConnectorRole): **ktc, engine_id=engine_id) self._connectors.append( KVConnectorFactory.create_connector(temp_config, role)) + self._ktc_kv_transfer_config.append(temp_config.kv_transfer_config) # A mapping from request id to the index of the connector chosen to # load the request from (if any). @@ -227,7 +268,7 @@ def request_finished( return async_saves > 0, kv_txfer_params - def take_events(self) -> Iterable[KVCacheEvent]: + def take_events(self) -> Iterable["KVCacheEvent"]: for c in self._connectors: yield from c.take_events() @@ -264,3 +305,24 @@ def get_required_kvcache_layout( f"({', '.join(layouts) })." f"All connectors must use the same layout.") return next(iter(layouts), None) + + @classmethod + def build_kv_connector_stats( + cls, + data: Optional[dict[str, + Any]] = None) -> Optional[KVConnectorStats]: + return MultiKVConnectorStats(data=data) if data is not None \ + else MultiKVConnectorStats() + + def get_kv_connector_stats(self) -> Optional[MultiKVConnectorStats]: + # Group connector stats by connector type. + stats_by_connector: Optional[MultiKVConnectorStats] = None + for c in self._connectors: + stats = c.get_kv_connector_stats() + if stats is None: + continue + if stats_by_connector is None: + # Lazy init to allow optional return value. + stats_by_connector = MultiKVConnectorStats() + stats_by_connector[c.__class__.__name__] = stats + return stats_by_connector diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py b/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py index 1ff1407aeb99..ff62f60e5a42 100644 --- a/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py +++ b/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py @@ -1,6 +1,7 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project import contextlib +import copy import logging import math import queue @@ -11,7 +12,7 @@ from collections.abc import Iterator from concurrent.futures import Future, ThreadPoolExecutor from dataclasses import dataclass -from typing import TYPE_CHECKING, Any, Optional +from typing import TYPE_CHECKING, Any, Optional, Union import msgspec import numpy as np @@ -23,6 +24,8 @@ from vllm.config import VllmConfig from vllm.distributed.kv_transfer.kv_connector.v1.base import ( CopyBlocksOp, KVConnectorBase_V1, KVConnectorMetadata, KVConnectorRole) +from vllm.distributed.kv_transfer.kv_connector.v1.metrics import ( + KVConnectorStats) from vllm.distributed.parallel_state import ( get_tensor_model_parallel_rank, get_tensor_model_parallel_world_size, get_tp_group) @@ -33,7 +36,6 @@ from vllm.utils import make_zmq_path, make_zmq_socket from vllm.v1.attention.backends.utils import get_kv_cache_layout from vllm.v1.core.sched.output import SchedulerOutput -from vllm.v1.request import RequestStatus if TYPE_CHECKING: from vllm.attention.backends.abstract import AttentionMetadata @@ -206,6 +208,18 @@ def get_finished(self, assert self.connector_worker is not None return self.connector_worker.get_finished() + def get_kv_connector_stats(self) -> Optional[KVConnectorStats]: + assert self.connector_worker is not None + return self.connector_worker.get_kv_connector_stats() + + @classmethod + def build_kv_connector_stats( + cls, + data: Optional[dict[str, + Any]] = None) -> Optional[KVConnectorStats]: + return NixlKVConnectorStats(data=data) if data is not None \ + else NixlKVConnectorStats() + def start_load_kv(self, forward_context: "ForwardContext", **kwargs) -> None: assert self.connector_worker is not None @@ -377,6 +391,7 @@ def request_finished( Once a request is finished, determine whether request blocks should be freed now or will be sent asynchronously and freed later. """ + from vllm.v1.request import RequestStatus params = request.kv_transfer_params logger.debug( @@ -550,6 +565,7 @@ def __init__(self, vllm_config: VllmConfig, engine_id: str): # With heterogeneous TP, P must wait for all assigned D TP workers to # finish reading before safely freeing the blocks. self.consumer_notification_counts_by_req = defaultdict[ReqId, int](int) + self.xfer_stats = NixlKVConnectorStats() def __del__(self): """Cleanup background threads on destruction.""" @@ -1097,6 +1113,8 @@ def _pop_done_transfers( xfer_state = self.nixl_wrapper.check_xfer_state(handle) if xfer_state == "DONE": self.nixl_wrapper.release_xfer_handle(handle) + # TODO (NickLucche) Get from NIXL telemetry once integrated + self.xfer_stats.record_transfer() elif xfer_state == "PROC": in_progress = True continue @@ -1248,7 +1266,6 @@ def _read_blocks(self, local_block_ids: list[int], self.nixl_wrapper.transfer(handle) # Use handle to check completion in future step(). - # TODO (NickLucche) surface xfer elapsed time self._recving_transfers[request_id].append( (handle, time.perf_counter())) @@ -1300,6 +1317,15 @@ def get_backend_aware_kv_block_len(self): block_len = self.block_len return block_len + def get_kv_connector_stats(self) -> Optional[KVConnectorStats]: + """ + Get the KV transfer stats for the connector. + """ + # Clear stats for next iteration + if not self.xfer_stats.is_empty(): + return self.xfer_stats.clone_and_reset() + return None + @contextlib.contextmanager def zmq_ctx(socket_type: Any, addr: str) -> Iterator[zmq.Socket]: @@ -1318,3 +1344,39 @@ def zmq_ctx(socket_type: Any, addr: str) -> Iterator[zmq.Socket]: finally: if ctx is not None: ctx.destroy(linger=0) + + +@dataclass +class NixlKVConnectorStats(KVConnectorStats): + """Container for transfer performance metrics""" + + def __post_init__(self): + if "num_successful_transfers" not in self.data: + self.data["num_successful_transfers"] = 0 + + def reset(self): + self.data = {"num_successful_transfers": 0} + + def record_transfer(self): + # TODO: record actual transfer stats when available + self.data["num_successful_transfers"] += 1 + + def clone_and_reset(self) -> "NixlKVConnectorStats": + old = copy.copy(self) + self.reset() + return old + + def is_empty(self) -> bool: + return self.data["num_successful_transfers"] == 0 + + def aggregate(self, other: KVConnectorStats) -> KVConnectorStats: + if not other.is_empty(): + self.data["num_successful_transfers"] += other.data[ + "num_successful_transfers"] + return self + + def reduce(self) -> dict[str, Union[int, float]]: + # TODO: reduce stats to a single value, calculate latency/throughput + return { + "num_successful_transfers": self.data["num_successful_transfers"] + } \ No newline at end of file diff --git a/vllm/v1/core/sched/scheduler.py b/vllm/v1/core/sched/scheduler.py index 85ca858ad7bd..b08898d253ca 100644 --- a/vllm/v1/core/sched/scheduler.py +++ b/vllm/v1/core/sched/scheduler.py @@ -15,6 +15,8 @@ KVConnectorFactory) from vllm.distributed.kv_transfer.kv_connector.v1 import (KVConnectorBase_V1, KVConnectorRole) +from vllm.distributed.kv_transfer.kv_connector.v1.metrics import ( + KVConnectorStats) from vllm.logger import init_logger from vllm.multimodal import MULTIMODAL_REGISTRY, MultiModalRegistry from vllm.v1.core.encoder_cache_manager import (EncoderCacheManager, @@ -869,9 +871,12 @@ def update_from_output( num_scheduled_tokens = scheduler_output.num_scheduled_tokens pooler_outputs = model_runner_output.pooler_output num_nans_in_logits = model_runner_output.num_nans_in_logits + kv_connector_output = model_runner_output.kv_connector_output outputs: dict[int, list[EngineCoreOutput]] = defaultdict(list) spec_decoding_stats: Optional[SpecDecodingStats] = None + kv_connector_stats = (kv_connector_output.kv_connector_stats + if kv_connector_output else None) # NOTE(woosuk): As len(num_scheduled_tokens) can be up to 1K or more, # the below loop can be a performance bottleneck. We should do our best @@ -1007,7 +1012,8 @@ def update_from_output( finished_requests=finished_set) finished_req_ids.clear() - if (stats := self.make_stats(spec_decoding_stats)) is not None: + if (stats := self.make_stats(spec_decoding_stats, + kv_connector_stats)) is not None: # Return stats to only one of the front-ends. if (eco := next(iter(engine_core_outputs.values()), None)) is None: # We must return the stats even if there are no request @@ -1172,20 +1178,21 @@ def reset_prefix_cache(self) -> bool: def make_stats( self, spec_decoding_stats: Optional[SpecDecodingStats] = None, + kv_connector_stats: Optional[KVConnectorStats] = None, ) -> Optional[SchedulerStats]: if not self.log_stats: return None prefix_cache_stats = self.kv_cache_manager.make_prefix_cache_stats() assert prefix_cache_stats is not None - return SchedulerStats( - num_running_reqs=len(self.running), - num_waiting_reqs=len(self.waiting), - kv_cache_usage=self.kv_cache_manager.usage, - prefix_cache_stats=prefix_cache_stats, - spec_decoding_stats=spec_decoding_stats, - num_corrupted_reqs=sum(req.is_output_corrupted - for req in self.running), - ) + return SchedulerStats(num_running_reqs=len(self.running), + num_waiting_reqs=len(self.waiting), + kv_cache_usage=self.kv_cache_manager.usage, + prefix_cache_stats=prefix_cache_stats, + spec_decoding_stats=spec_decoding_stats, + num_corrupted_reqs=sum(req.is_output_corrupted + for req in self.running), + kv_connector_stats=kv_connector_stats.data + if kv_connector_stats else None) def make_spec_decoding_stats( self, diff --git a/vllm/v1/metrics/loggers.py b/vllm/v1/metrics/loggers.py index b30036a6f8e8..f0076b2d81db 100644 --- a/vllm/v1/metrics/loggers.py +++ b/vllm/v1/metrics/loggers.py @@ -9,6 +9,8 @@ import prometheus_client from vllm.config import SupportsMetricsInfo, VllmConfig +from vllm.distributed.kv_transfer.kv_connector.v1.metrics import ( + KVConnectorLogging) from vllm.logger import init_logger from vllm.v1.core.kv_cache_utils import PrefixCachingMetrics from vllm.v1.engine import FinishReason @@ -59,6 +61,8 @@ def __init__(self, vllm_config: VllmConfig, engine_index: int = 0): # TODO: Make the interval configurable. self.prefix_caching_metrics = PrefixCachingMetrics() self.spec_decoding_logging = SpecDecodingLogging() + kv_tranfer_config = self.vllm_config.kv_transfer_config + self.kv_transfer_logging = KVConnectorLogging(kv_tranfer_config) self.last_prompt_throughput: float = 0.0 self.last_generation_throughput: float = 0.0 @@ -97,7 +101,8 @@ def record(self, if scheduler_stats.spec_decoding_stats is not None: self.spec_decoding_logging.observe( scheduler_stats.spec_decoding_stats) - + if kv_connector_stats := scheduler_stats.kv_connector_stats: + self.kv_transfer_logging.observe(kv_connector_stats) self.last_scheduler_stats = scheduler_stats def log(self): @@ -136,6 +141,7 @@ def log(self): self.prefix_caching_metrics.hit_rate * 100, ) self.spec_decoding_logging.log(log_fn=log_fn) + self.kv_transfer_logging.log(log_fn=log_fn) def log_engine_initialized(self): if self.vllm_config.cache_config.num_gpu_blocks: diff --git a/vllm/v1/metrics/stats.py b/vllm/v1/metrics/stats.py index e6c344d193df..0eff557336bc 100644 --- a/vllm/v1/metrics/stats.py +++ b/vllm/v1/metrics/stats.py @@ -3,7 +3,7 @@ import time from dataclasses import dataclass, field -from typing import TYPE_CHECKING, Optional +from typing import TYPE_CHECKING, Any, Optional from vllm.v1.spec_decode.metrics import SpecDecodingStats @@ -43,6 +43,7 @@ class SchedulerStats: default_factory=PrefixCacheStats) spec_decoding_stats: Optional[SpecDecodingStats] = None + kv_connector_stats: Optional[dict[str, Any]] = None num_corrupted_reqs: int = 0 diff --git a/vllm/v1/outputs.py b/vllm/v1/outputs.py index 1b2da8addb19..e6cc6019b172 100644 --- a/vllm/v1/outputs.py +++ b/vllm/v1/outputs.py @@ -3,10 +3,14 @@ from abc import ABC, abstractmethod from dataclasses import dataclass -from typing import NamedTuple, Optional +from typing import TYPE_CHECKING, NamedTuple, Optional import torch +if TYPE_CHECKING: + from vllm.distributed.kv_transfer.kv_connector.v1.metrics import ( + KVConnectorStats) + class LogprobsLists(NamedTuple): @@ -77,6 +81,11 @@ class KVConnectorOutput: # [req_ids] finished_sending: Optional[set[str]] = None finished_recving: Optional[set[str]] = None + kv_connector_stats: Optional["KVConnectorStats"] = None + + def is_empty(self): + return (not self.finished_sending and not self.finished_recving + and not self.kv_connector_stats) # ModelRunnerOutput is serialized and sent to the scheduler process. diff --git a/vllm/v1/worker/kv_connector_model_runner_mixin.py b/vllm/v1/worker/kv_connector_model_runner_mixin.py index 3eb9f26e9f5b..016a90c196ba 100644 --- a/vllm/v1/worker/kv_connector_model_runner_mixin.py +++ b/vllm/v1/worker/kv_connector_model_runner_mixin.py @@ -13,6 +13,8 @@ get_kv_transfer_group, has_kv_transfer_group) from vllm.distributed.kv_transfer.kv_connector.base import KVConnectorBase +from vllm.distributed.kv_transfer.kv_connector.v1.metrics import ( + KVConnectorStats) from vllm.forward_context import get_forward_context, set_forward_context from vllm.logger import init_logger from vllm.v1.outputs import (EMPTY_MODEL_RUNNER_OUTPUT, KVConnectorOutput, @@ -119,4 +121,11 @@ def _get_kv_connector_output( output.finished_sending, output.finished_recving = ( kv_connector.get_finished(scheduler_output.finished_req_ids)) - kv_connector.clear_connector_metadata() + output.kv_connector_stats = KVConnectorModelRunnerMixin.\ + get_kv_connector_stats() + + @staticmethod + def get_kv_connector_stats() -> Optional[KVConnectorStats]: + if has_kv_transfer_group(): + return get_kv_transfer_group().get_kv_connector_stats() + return None From 5089fd749cbe4233a29f29ce706d56c47464c117 Mon Sep 17 00:00:00 2001 From: Cyrus Leung Date: Fri, 19 Sep 2025 19:10:52 +0800 Subject: [PATCH 146/518] [V0 Deprecation] Remove V0 logic from `get_input_embeddings` interface (#25242) Signed-off-by: DarkLight1337 --- .../models/hyperclovax_vision.py | 45 +++++++------------ vllm/model_executor/models/interfaces.py | 24 ---------- vllm/model_executor/models/ultravox.py | 19 ++------ vllm/model_executor/models/utils.py | 18 +------- 4 files changed, 22 insertions(+), 84 deletions(-) diff --git a/vllm/model_executor/models/hyperclovax_vision.py b/vllm/model_executor/models/hyperclovax_vision.py index 53f0585541b1..870addd0dcbc 100644 --- a/vllm/model_executor/models/hyperclovax_vision.py +++ b/vllm/model_executor/models/hyperclovax_vision.py @@ -46,7 +46,8 @@ from .clip import CLIPVisionModel from .interfaces import MultiModalEmbeddings, SupportsMultiModal, SupportsPP from .siglip import SiglipVisionModel -from .utils import AutoWeightsLoader, init_vllm_registered_model, maybe_prefix +from .utils import (AutoWeightsLoader, init_vllm_registered_model, + maybe_prefix, merge_multimodal_embeddings) from .vision import get_vision_encoder_info EOT = "<|endofturn|>" @@ -740,33 +741,20 @@ def get_input_embeddings( self, input_ids: torch.Tensor, multimodal_embeddings: Optional[MultiModalEmbeddings] = None, - **kwargs, ) -> torch.Tensor: inputs_embeds = self.language_model.get_input_embeddings(input_ids) - if (kwargs.get("pixel_values_images") is not None - or kwargs.get("pixel_values_videos") - is not None): # v0 compatibility - multimodal_embeddings = self.get_multimodal_embeddings(**kwargs) - if multimodal_embeddings is not None: - multimodal_embeddings = torch.cat(multimodal_embeddings, dim=0) - _mask_image = input_ids == self.config.image_token_id - _mask_video = input_ids == self.config.video_token_id - assert _mask_image.sum() + _mask_video.sum() == len( - multimodal_embeddings) - - if multimodal_embeddings.dtype != inputs_embeds.dtype: - multimodal_embeddings = multimodal_embeddings.to( - dtype=inputs_embeds.dtype) - if multimodal_embeddings.device != inputs_embeds.device: - multimodal_embeddings = multimodal_embeddings.to( - device=inputs_embeds.device) - - if _mask_image.sum() > 0: - inputs_embeds[ - _mask_image] = multimodal_embeddings[:sum(_mask_image)] - if _mask_video.sum() > 0: - inputs_embeds[_mask_video] = multimodal_embeddings[ - -sum(_mask_video):] + if multimodal_embeddings is not None \ + and len(multimodal_embeddings) != 0: + inputs_embeds = merge_multimodal_embeddings( + input_ids, + inputs_embeds, + multimodal_embeddings, + placeholder_token_id=[ + self.config.image_token_id, + self.config.video_token_id, + ], + ) + return inputs_embeds def forward( @@ -783,8 +771,9 @@ def forward( # NOTE: In v1, inputs_embeds is always generated at model runner, this # condition is for v0 compatibility. elif inputs_embeds is None: - inputs_embeds = self.get_input_embeddings(input_ids=input_ids, - **kwargs) + multimodal_embeddings = self.get_multimodal_embeddings(**kwargs) + inputs_embeds = self.get_input_embeddings(input_ids, + multimodal_embeddings) input_ids = None hidden_states = self.language_model.model(input_ids, positions, diff --git a/vllm/model_executor/models/interfaces.py b/vllm/model_executor/models/interfaces.py index e9c600e36cfa..6be70c4b3b21 100644 --- a/vllm/model_executor/models/interfaces.py +++ b/vllm/model_executor/models/interfaces.py @@ -23,7 +23,6 @@ from .interfaces_base import is_pooling_model if TYPE_CHECKING: - from vllm.attention import AttentionMetadata from vllm.config import VllmConfig from vllm.model_executor.models.utils import WeightsMapper from vllm.sequence import IntermediateTensors @@ -97,33 +96,10 @@ def get_language_model(self) -> torch.nn.Module: """ ... - # Only for models that support v0 chunked prefill - # TODO(ywang96): Remove this overload once v0 is deprecated - @overload def get_input_embeddings( self, input_ids: Tensor, multimodal_embeddings: Optional[MultiModalEmbeddings] = None, - attn_metadata: Optional["AttentionMetadata"] = None, - ) -> Tensor: - ... - - # TODO: Remove this overload once v0 is deprecated - @overload - def get_input_embeddings( - self, - input_ids: Tensor, - multimodal_embeddings: Optional[MultiModalEmbeddings] = None, - ) -> Tensor: - ... - - def get_input_embeddings( - self, - input_ids: Tensor, - multimodal_embeddings: Optional[MultiModalEmbeddings] = None, - # Only necessary so that the v0 overload is valid - # TODO: Remove attn_metadata once v0 is deprecated - attn_metadata: Optional["AttentionMetadata"] = None, ) -> Tensor: """ Returns the input embeddings merged from the text embeddings from diff --git a/vllm/model_executor/models/ultravox.py b/vllm/model_executor/models/ultravox.py index 371ca817d5f9..f1f11c5fe8f0 100644 --- a/vllm/model_executor/models/ultravox.py +++ b/vllm/model_executor/models/ultravox.py @@ -13,9 +13,7 @@ from transformers.models.whisper import WhisperFeatureExtractor from transformers.models.whisper.modeling_whisper import WhisperEncoder -from vllm import envs from vllm.config import VllmConfig -from vllm.forward_context import get_forward_context from vllm.model_executor.layers.activation import MulAndSilu, get_act_fn from vllm.model_executor.layers.layernorm import RMSNorm from vllm.model_executor.model_loader import DefaultModelLoader @@ -37,8 +35,7 @@ SupportsMultiModal, SupportsPP) from .utils import (AutoWeightsLoader, WeightsMapper, flatten_bn, init_vllm_registered_model, maybe_prefix, - merge_multimodal_embeddings, - merge_multimodal_embeddings_from_map) + merge_multimodal_embeddings) _AUDIO_PLACEHOLDER_OVERRIDE = "<|audio|>" _MAX_ENCODER_BATCH_SIZE = 16 @@ -568,17 +565,9 @@ def get_input_embeddings( safe_input_ids) if multimodal_embeddings is not None and len( multimodal_embeddings) > 0: - - # TODO(ywang96): remove this block after v0 is deprecated. - if not envs.VLLM_USE_V1: - attn_metadata = get_forward_context().attn_metadata - merge_multimodal_embeddings_from_map( - inputs_embeds, multimodal_embeddings, - attn_metadata.multi_modal_placeholder_index_maps["audio"]) - else: - inputs_embeds = merge_multimodal_embeddings( - input_ids, inputs_embeds, multimodal_embeddings, - self.config.audio_token_index) + inputs_embeds = merge_multimodal_embeddings( + input_ids, inputs_embeds, multimodal_embeddings, + self.config.audio_token_index) return inputs_embeds def forward(self, diff --git a/vllm/model_executor/models/utils.py b/vllm/model_executor/models/utils.py index e716ec582baa..83e381b3b157 100644 --- a/vllm/model_executor/models/utils.py +++ b/vllm/model_executor/models/utils.py @@ -15,7 +15,7 @@ from vllm.config import VllmConfig from vllm.logger import init_logger from vllm.model_executor.model_loader.weight_utils import default_weight_loader -from vllm.multimodal import MultiModalPlaceholderMap, NestedTensors +from vllm.multimodal import NestedTensors from vllm.sequence import IntermediateTensors from vllm.utils import (get_cuda_view_from_cpu_tensor, is_pin_memory_available, is_uva_available) @@ -389,22 +389,6 @@ def _embedding_count_expression(embeddings: NestedTensors) -> str: _embedding_count_expression(inner) for inner in embeddings) -def merge_multimodal_embeddings_from_map( - inputs_embeds: torch.Tensor, multimodal_embeddings: NestedTensors, - placeholder_map: MultiModalPlaceholderMap.IndexMap) -> torch.Tensor: - """ - Merge ``multimodal_embeddings`` into ``inputs_embeds`` using the provided - placeholder map . - - Note: - This updates ``inputs_embeds`` in place. - """ - flattened_embeddings = _flatten_embeddings(multimodal_embeddings) - inputs_embeds[placeholder_map.dest] = flattened_embeddings[ - placeholder_map.src].to(dtype=inputs_embeds.dtype) - return inputs_embeds - - def _merge_multimodal_embeddings( inputs_embeds: torch.Tensor, is_multimodal: torch.Tensor, From 838d7116ba59db528647b29f0d000742f4af9d4b Mon Sep 17 00:00:00 2001 From: Icey <1790571317@qq.com> Date: Fri, 19 Sep 2025 20:25:12 +0800 Subject: [PATCH 147/518] [Qwen] Remove cuda hard-code in qwen3 next (#25243) Signed-off-by: Icey <1790571317@qq.com> --- vllm/model_executor/models/qwen3_next.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/model_executor/models/qwen3_next.py b/vllm/model_executor/models/qwen3_next.py index 0c974ee44eee..98749c160ba4 100644 --- a/vllm/model_executor/models/qwen3_next.py +++ b/vllm/model_executor/models/qwen3_next.py @@ -306,7 +306,7 @@ def __init__( eps=self.layer_norm_epsilon, group_size=None, norm_before_gate=True, - device=torch.cuda.current_device(), + device=current_platform.current_device(), dtype=config.torch_dtype, ) From cf278ff3b231b4fca0232db2d1183427dbc200bb Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Fri, 19 Sep 2025 17:12:55 +0100 Subject: [PATCH 148/518] Update CODEOWNERS (#25269) Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- .github/CODEOWNERS | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 08717cdde643..323675993467 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -66,18 +66,25 @@ CMakeLists.txt @tlrmchlsmth @LucasWilkinson /tests/models/test_transformers.py @hmellor # Docs -/docs @hmellor +/docs/mkdocs @hmellor +/docs/**/*.yml @hmellor +/requirements/docs.txt @hmellor +.readthedocs.yaml @hmellor mkdocs.yaml @hmellor +# Linting +.markdownlint.yaml @hmellor +.pre-commit-config.yaml @hmellor + # CPU -/vllm/v1/worker/^cpu @bigPYJ1151 +/vllm/v1/worker/cpu* @bigPYJ1151 /csrc/cpu @bigPYJ1151 /vllm/platforms/cpu.py @bigPYJ1151 /cmake/cpu_extension.cmake @bigPYJ1151 /docker/Dockerfile.cpu @bigPYJ1151 # Intel GPU -/vllm/v1/worker/^xpu @jikunshang +/vllm/v1/worker/xpu* @jikunshang /vllm/platforms/xpu.py @jikunshang /docker/Dockerfile.xpu @jikunshang From aed16879a9191a58adc5b8ac3973454dddefe018 Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Fri, 19 Sep 2025 17:22:33 +0100 Subject: [PATCH 149/518] Move `ModelConfig` from `config/__init__.py` to `config/model.py` (#25252) Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- tests/conftest.py | 3 +- tests/distributed/test_pipeline_parallel.py | 2 +- tests/models/test_initialization.py | 5 +- tests/v1/sample/test_logprobs.py | 9 +- vllm/config/__init__.py | 2108 +------------------ vllm/config/model.py | 2006 ++++++++++++++++++ vllm/config/scheduler.py | 8 +- vllm/config/utils.py | 100 +- vllm/engine/arg_utils.py | 15 +- vllm/model_executor/model_loader/utils.py | 7 +- vllm/model_executor/models/registry.py | 22 +- vllm/v1/sample/ops/topk_topp_sampler.py | 13 +- vllm/v1/sample/sampler.py | 11 +- 13 files changed, 2160 insertions(+), 2149 deletions(-) create mode 100644 vllm/config/model.py diff --git a/tests/conftest.py b/tests/conftest.py index 9d433dedbf47..3cd93f4ad328 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -39,7 +39,8 @@ from vllm.assets.audio import AudioAsset from vllm.assets.image import ImageAsset from vllm.assets.video import VideoAsset -from vllm.config import ConvertOption, RunnerOption, _get_and_verify_dtype +from vllm.config.model import (ConvertOption, RunnerOption, + _get_and_verify_dtype) from vllm.connections import global_http_connection from vllm.distributed import (cleanup_dist_env_and_memory, init_distributed_environment, diff --git a/tests/distributed/test_pipeline_parallel.py b/tests/distributed/test_pipeline_parallel.py index fcd09844c095..073b362b6474 100644 --- a/tests/distributed/test_pipeline_parallel.py +++ b/tests/distributed/test_pipeline_parallel.py @@ -14,7 +14,7 @@ import pytest -from vllm.config import _FLOAT16_NOT_SUPPORTED_MODELS, RunnerOption +from vllm.config.model import _FLOAT16_NOT_SUPPORTED_MODELS, RunnerOption from vllm.logger import init_logger from vllm.transformers_utils.config import get_config diff --git a/tests/models/test_initialization.py b/tests/models/test_initialization.py index 56b5d32d1653..9281579b71e7 100644 --- a/tests/models/test_initialization.py +++ b/tests/models/test_initialization.py @@ -7,7 +7,6 @@ import pytest from vllm import LLM -from vllm.config import ModelImpl from vllm.engine.llm_engine import LLMEngine as V0LLMEngine from vllm.utils import GiB_bytes from vllm.v1.core.kv_cache_utils import get_kv_cache_configs @@ -111,8 +110,8 @@ def _initialize_kv_caches_v1(self, vllm_config): # these tests seem to produce leftover memory gpu_memory_utilization=0.80, load_format="dummy", - model_impl=ModelImpl.TRANSFORMERS - if model_arch in _TRANSFORMERS_BACKEND_MODELS else ModelImpl.VLLM, + model_impl="transformers" + if model_arch in _TRANSFORMERS_BACKEND_MODELS else "vllm", hf_overrides=hf_overrides_fn, max_num_seqs=model_info.max_num_seqs) diff --git a/tests/v1/sample/test_logprobs.py b/tests/v1/sample/test_logprobs.py index 570e330208a3..71aa9e3d379c 100644 --- a/tests/v1/sample/test_logprobs.py +++ b/tests/v1/sample/test_logprobs.py @@ -3,6 +3,7 @@ import itertools from collections.abc import Generator +from typing import get_args import pytest import torch @@ -464,7 +465,7 @@ def test_all_logprobs(example_prompts, monkeypatch: pytest.MonkeyPatch): assert len(prompt_logprob) == vocab_size -@pytest.mark.parametrize("logprobs_mode", list(LogprobsMode)) +@pytest.mark.parametrize("logprobs_mode", get_args(LogprobsMode)) def test_logprobs_mode(logprobs_mode: LogprobsMode, monkeypatch: pytest.MonkeyPatch): """Test with LLM engine with different logprobs_mode. @@ -493,14 +494,12 @@ def test_logprobs_mode(logprobs_mode: LogprobsMode, for logprobs in output.logprobs: for token_id in logprobs: logprob = logprobs[token_id] - if logprobs_mode in (LogprobsMode.RAW_LOGPROBS, - LogprobsMode.PROCESSED_LOGPROBS): + if logprobs_mode in ("raw_logprobs", "processed_logprobs"): assert logprob.logprob <= 0 if logprob.logprob > 0: positive_values = positive_values + 1 total_token_with_logprobs = total_token_with_logprobs + 1 assert total_token_with_logprobs >= len(results[0].outputs) - if logprobs_mode in (LogprobsMode.RAW_LOGITS, - LogprobsMode.PROCESSED_LOGITS): + if logprobs_mode in ("raw_logits", "processed_logits"): assert positive_values > 0 del llm diff --git a/vllm/config/__init__.py b/vllm/config/__init__.py index 45504e010d68..ddd8de4324f6 100644 --- a/vllm/config/__init__.py +++ b/vllm/config/__init__.py @@ -4,27 +4,22 @@ # ruff: noqa: F401 import ast import copy -import enum import hashlib import inspect import json import os import textwrap -import warnings from contextlib import contextmanager -from dataclasses import InitVar, field, fields, is_dataclass, replace +from dataclasses import field, fields, is_dataclass, replace from functools import cached_property, lru_cache -from importlib.util import find_spec -from typing import (TYPE_CHECKING, Any, Callable, Literal, Optional, Protocol, - TypeVar, Union, cast, get_args) +from typing import (TYPE_CHECKING, Any, Literal, Optional, Protocol, TypeVar, + Union, cast) import regex as re import torch -from pydantic import (ConfigDict, SkipValidation, field_validator, - model_validator) +from pydantic import ConfigDict, SkipValidation from pydantic.dataclasses import dataclass -from safetensors.torch import _TYPES as _SAFETENSORS_TO_TORCH_DTYPE -from typing_extensions import assert_never, runtime_checkable +from typing_extensions import runtime_checkable import vllm.envs as envs from vllm import version @@ -36,45 +31,31 @@ from vllm.config.kv_transfer import KVTransferConfig from vllm.config.load import LoadConfig from vllm.config.lora import LoRAConfig +from vllm.config.model import (ConvertOption, HfOverrides, LogprobsMode, + ModelConfig, ModelDType, ModelImpl, + RunnerOption, TaskOption, TokenizerMode, + iter_architecture_defaults, + try_match_architecture_defaults) from vllm.config.multimodal import (MMCacheType, MMEncoderTPMode, MultiModalConfig) from vllm.config.parallel import (DistributedExecutorBackend, EPLBConfig, ParallelConfig) from vllm.config.pooler import PoolerConfig -from vllm.config.scheduler import SchedulerConfig, SchedulerPolicy +from vllm.config.scheduler import RunnerType, SchedulerConfig, SchedulerPolicy from vllm.config.speculative import SpeculativeConfig from vllm.config.structured_outputs import StructuredOutputsConfig -from vllm.config.utils import ConfigType, config +from vllm.config.utils import ConfigType, config, get_attr_docs, is_init_field from vllm.logger import init_logger -from vllm.model_executor.layers.quantization import QuantizationMethods from vllm.multimodal import MULTIMODAL_REGISTRY -from vllm.platforms import current_platform -from vllm.transformers_utils.config import ( - ConfigFormat, get_config, get_hf_image_processor_config, - get_hf_text_config, get_pooling_config, - get_sentence_transformer_tokenizer_config, is_encoder_decoder, - is_interleaved, maybe_override_with_speculators_target_model, - try_get_generation_config, try_get_safetensors_metadata, - try_get_tokenizer_config, uses_mrope) -from vllm.transformers_utils.runai_utils import (ObjectStorageModel, - is_runai_obj_uri) -from vllm.transformers_utils.utils import maybe_model_redirect -from vllm.utils import (DEFAULT_MAX_NUM_BATCHED_TOKENS, - STR_DUAL_CHUNK_FLASH_ATTN_VAL, LayerBlockType, - LazyLoader, common_broadcastable_dtype, random_uuid) +from vllm.transformers_utils.runai_utils import is_runai_obj_uri +from vllm.utils import random_uuid if TYPE_CHECKING: from _typeshed import DataclassInstance from transformers.configuration_utils import PretrainedConfig - import vllm.model_executor.layers.quantization as me_quant - import vllm.model_executor.models as me_models - from vllm.model_executor.layers.quantization import QuantizationMethods from vllm.model_executor.layers.quantization.base_config import ( QuantizationConfig) - from vllm.v1.sample.logits_processor import LogitsProcessor - - HfOverrides = Union[dict, Callable[[type], type]] else: DataclassInstance = Any PretrainedConfig = Any @@ -82,83 +63,10 @@ QuantizationMethods = Any BaseModelLoader = Any LogitsProcessor = Any - HfOverrides = Union[dict[str, Any], Callable[[type], type]] - - me_quant = LazyLoader("model_executor", globals(), - "vllm.model_executor.layers.quantization") - me_models = LazyLoader("model_executor", globals(), - "vllm.model_executor.models") logger = init_logger(__name__) DataclassInstanceT = TypeVar("DataclassInstanceT", bound=DataclassInstance) -TaskOption = Literal["auto", "generate", "embedding", "embed", "classify", - "score", "reward", "transcription", "draft"] - -_ResolvedTask = Literal["generate", "transcription", "encode", "embed", - "classify", "reward", "draft"] - -RunnerOption = Literal["auto", "generate", "pooling", "draft"] - -RunnerType = Literal["generate", "pooling", "draft"] - -ConvertOption = Literal["auto", "none", "embed", "classify", "reward"] - -ConvertType = Literal["none", "embed", "classify", "reward"] - -_RUNNER_TASKS: dict[RunnerType, list[TaskOption]] = { - "generate": ["generate", "transcription"], - "pooling": ["embedding", "embed", "classify", "score", "reward"], - "draft": ["draft"], -} - -_RUNNER_CONVERTS: dict[RunnerType, list[ConvertType]] = { - "generate": [], - "pooling": ["embed", "classify", "reward"], - "draft": [], -} - -# Some model suffixes are based on auto classes from Transformers: -# https://huggingface.co/docs/transformers/en/model_doc/auto -# NOTE: Items higher on this list priority over lower ones -_SUFFIX_TO_DEFAULTS: list[tuple[str, tuple[RunnerType, ConvertType]]] = [ - ("ForCausalLM", ("generate", "none")), - ("ForConditionalGeneration", ("generate", "none")), - ("ChatModel", ("generate", "none")), - ("LMHeadModel", ("generate", "none")), - ("ForTextEncoding", ("pooling", "embed")), - ("EmbeddingModel", ("pooling", "embed")), - ("ForSequenceClassification", ("pooling", "classify")), - ("ForAudioClassification", ("pooling", "classify")), - ("ForImageClassification", ("pooling", "classify")), - ("ForVideoClassification", ("pooling", "classify")), - ("ClassificationModel", ("pooling", "classify")), - ("ForRewardModeling", ("pooling", "reward")), - ("RewardModel", ("pooling", "reward")), - # Let other `*Model`s take priority - ("Model", ("pooling", "embed")), -] - - -def iter_architecture_defaults(): - yield from _SUFFIX_TO_DEFAULTS - - -def try_match_architecture_defaults( - architecture: str, - *, - runner_type: Optional[RunnerType] = None, - convert_type: Optional[ConvertType] = None, -) -> Optional[tuple[str, tuple[RunnerType, ConvertType]]]: - for suffix, (default_runner_type, - default_convert_type) in iter_architecture_defaults(): - if ((runner_type is None or runner_type == default_runner_type) and - (convert_type is None or convert_type == default_convert_type) - and architecture.endswith(suffix)): - return suffix, (default_runner_type, default_convert_type) - - return None - @runtime_checkable class SupportsHash(Protocol): @@ -173,1608 +81,6 @@ def metrics_info(self) -> dict[str, str]: ... -class ModelImpl(str, enum.Enum): - AUTO = "auto" - VLLM = "vllm" - TRANSFORMERS = "transformers" - TERRATORCH = "terratorch" - - -def get_attr_docs(cls: type[Any]) -> dict[str, str]: - """ - Get any docstrings placed after attribute assignments in a class body. - - https://davidism.com/mit-license/ - """ - - def pairwise(iterable): - """ - Manually implement https://docs.python.org/3/library/itertools.html#itertools.pairwise - - Can be removed when Python 3.9 support is dropped. - """ - iterator = iter(iterable) - a = next(iterator, None) - - for b in iterator: - yield a, b - a = b - - try: - cls_node = ast.parse(textwrap.dedent(inspect.getsource(cls))).body[0] - except (OSError, KeyError, TypeError): - # HACK: Python 3.13+ workaround - set missing __firstlineno__ - # Workaround can be removed after we upgrade to pydantic==2.12.0 - with open(inspect.getfile(cls)) as f: - for i, line in enumerate(f): - if f"class {cls.__name__}" in line and ":" in line: - cls.__firstlineno__ = i + 1 - break - cls_node = ast.parse(textwrap.dedent(inspect.getsource(cls))).body[0] - - if not isinstance(cls_node, ast.ClassDef): - raise TypeError("Given object was not a class.") - - out = {} - - # Consider each pair of nodes. - for a, b in pairwise(cls_node.body): - # Must be an assignment then a constant string. - if (not isinstance(a, (ast.Assign, ast.AnnAssign)) - or not isinstance(b, ast.Expr) - or not isinstance(b.value, ast.Constant) - or not isinstance(b.value.value, str)): - continue - - doc = inspect.cleandoc(b.value.value) - - # An assignment can have multiple targets (a = b = v), but an - # annotated assignment only has one target. - targets = a.targets if isinstance(a, ast.Assign) else [a.target] - - for target in targets: - # Must be assigning to a plain name. - if not isinstance(target, ast.Name): - continue - - out[target.id] = doc - - return out - - -def is_init_field(cls: ConfigType, name: str) -> bool: - return next(f for f in fields(cls) if f.name == name).init - - -TokenizerMode = Literal["auto", "slow", "mistral", "custom"] -ModelDType = Literal["auto", "half", "float16", "bfloat16", "float", "float32"] - - -class LogprobsMode(enum.Enum): - RAW_LOGITS = "raw_logits" - RAW_LOGPROBS = "raw_logprobs" - PROCESSED_LOGITS = "processed_logits" - PROCESSED_LOGPROBS = "processed_logprobs" - - -@config -@dataclass(config=ConfigDict(arbitrary_types_allowed=True)) -class ModelConfig: - """Configuration for the model.""" - - model: str = "Qwen/Qwen3-0.6B" - """Name or path of the Hugging Face model to use. It is also used as the - content for `model_name` tag in metrics output when `served_model_name` is - not specified.""" - runner: RunnerOption = "auto" - """The type of model runner to use. Each vLLM instance only supports one - model runner, even if the same model can be used for multiple types.""" - convert: ConvertOption = "auto" - """Convert the model using adapters defined in - [vllm.model_executor.models.adapters][]. The most common use case is to - adapt a text generation model to be used for pooling tasks.""" - task: Optional[TaskOption] = None - """[DEPRECATED] The task to use the model for. If the model supports more - than one model runner, this is used to select which model runner to run. - - Note that the model may support other tasks using the same model runner. - """ - tokenizer: SkipValidation[str] = None # type: ignore - """Name or path of the Hugging Face tokenizer to use. If unspecified, model - name or path will be used.""" - tokenizer_mode: TokenizerMode = "auto" - """Tokenizer mode:\n - - "auto" will use the fast tokenizer if available.\n - - "slow" will always use the slow tokenizer.\n - - "mistral" will always use the tokenizer from `mistral_common`.\n - - "custom" will use --tokenizer to select the preregistered tokenizer.""" - trust_remote_code: bool = False - """Trust remote code (e.g., from HuggingFace) when downloading the model - and tokenizer.""" - dtype: Union[ModelDType, torch.dtype] = "auto" - """Data type for model weights and activations:\n - - "auto" will use FP16 precision for FP32 and FP16 models, and BF16 - precision for BF16 models.\n - - "half" for FP16. Recommended for AWQ quantization.\n - - "float16" is the same as "half".\n - - "bfloat16" for a balance between precision and range.\n - - "float" is shorthand for FP32 precision.\n - - "float32" for FP32 precision.""" - seed: Optional[int] = None - """Random seed for reproducibility. Initialized to None in V0, but - initialized to 0 in V1.""" - hf_config_path: Optional[str] = None - """Name or path of the Hugging Face config to use. If unspecified, model - name or path will be used.""" - allowed_local_media_path: str = "" - """Allowing API requests to read local images or videos from directories - specified by the server file system. This is a security risk. Should only - be enabled in trusted environments.""" - revision: Optional[str] = None - """The specific model version to use. It can be a branch name, a tag name, - or a commit id. If unspecified, will use the default version.""" - code_revision: Optional[str] = None - """The specific revision to use for the model code on the Hugging Face Hub. - It can be a branch name, a tag name, or a commit id. If unspecified, will - use the default version.""" - rope_scaling: dict[str, Any] = field(default_factory=dict) - """RoPE scaling configuration. For example, - `{"rope_type":"dynamic","factor":2.0}`.""" - rope_theta: Optional[float] = None - """RoPE theta. Use with `rope_scaling`. In some cases, changing the RoPE - theta improves the performance of the scaled model.""" - tokenizer_revision: Optional[str] = None - """The specific revision to use for the tokenizer on the Hugging Face Hub. - It can be a branch name, a tag name, or a commit id. If unspecified, will - use the default version.""" - max_model_len: SkipValidation[int] = None # type: ignore - """Model context length (prompt and output). If unspecified, will be - automatically derived from the model config. - - When passing via `--max-model-len`, supports k/m/g/K/M/G in human-readable - format. Examples:\n - - 1k -> 1000\n - - 1K -> 1024\n - - 25.6k -> 25,600""" - spec_target_max_model_len: Optional[int] = None - """Specify the maximum length for spec decoding draft models.""" - quantization: SkipValidation[Optional[QuantizationMethods]] = None - """Method used to quantize the weights. If `None`, we first check the - `quantization_config` attribute in the model config file. If that is - `None`, we assume the model weights are not quantized and use `dtype` to - determine the data type of the weights.""" - enforce_eager: bool = False - """Whether to always use eager-mode PyTorch. If True, we will disable CUDA - graph and always execute the model in eager mode. If False, we will use - CUDA graph and eager execution in hybrid for maximal performance and - flexibility.""" - max_seq_len_to_capture: int = 8192 - """Maximum sequence len covered by CUDA graphs. When a sequence has context - length larger than this, we fall back to eager mode. Additionally for - encoder-decoder models, if the sequence length of the encoder input is - larger than this, we fall back to the eager mode.""" - max_logprobs: int = 20 - """Maximum number of log probabilities to return when `logprobs` is - specified in `SamplingParams`. The default value comes the default for the - OpenAI Chat Completions API. -1 means no cap, i.e. all (output_length * - vocab_size) logprobs are allowed to be returned and it may cause OOM.""" - logprobs_mode: LogprobsMode = LogprobsMode.RAW_LOGPROBS - """Indicates the content returned in the logprobs and prompt_logprobs. - Supported mode: - 1) raw_logprobs, 2) processed_logprobs, 3) raw_logits, 4) processed_logits. - Raw means the values before applying any logit processors, like bad words. - Processed means the values after applying all processors, including - temperature and top_k/top_p. - """ - disable_sliding_window: bool = False - """Whether to disable sliding window. If True, we will disable the sliding - window functionality of the model, capping to sliding window size. If the - model does not support sliding window, this argument is ignored.""" - disable_cascade_attn: bool = False - """Disable cascade attention for V1. While cascade attention does not - change the mathematical correctness, disabling it could be useful for - preventing potential numerical issues. Note that even if this is set to - False, cascade attention will be only used when the heuristic tells that - it's beneficial.""" - skip_tokenizer_init: bool = False - """Skip initialization of tokenizer and detokenizer. Expects valid - `prompt_token_ids` and `None` for prompt from the input. The generated - output will contain token ids.""" - enable_prompt_embeds: bool = False - """If `True`, enables passing text embeddings as inputs via the - `prompt_embeds` key. Note that enabling this will double the time required - for graph compilation.""" - served_model_name: Optional[Union[str, list[str]]] = None - """The model name(s) used in the API. If multiple names are provided, the - server will respond to any of the provided names. The model name in the - model field of a response will be the first name in this list. If not - specified, the model name will be the same as the `--model` argument. Noted - that this name(s) will also be used in `model_name` tag content of - prometheus metrics, if multiple names provided, metrics tag will take the - first one.""" - use_async_output_proc: bool = True - """Whether to use async output processor.""" - config_format: Union[str, ConfigFormat] = "auto" - """The format of the model config to load:\n - - "auto" will try to load the config in hf format if available else it - will try to load in mistral format.\n - - "hf" will load the config in hf format.\n - - "mistral" will load the config in mistral format.""" - hf_token: Optional[Union[bool, str]] = None - """The token to use as HTTP bearer authorization for remote files . If - `True`, will use the token generated when running `huggingface-cli login` - (stored in `~/.huggingface`).""" - hf_overrides: HfOverrides = field(default_factory=dict) - """If a dictionary, contains arguments to be forwarded to the Hugging Face - config. If a callable, it is called to update the HuggingFace config.""" - logits_processor_pattern: Optional[str] = None - """Optional regex pattern specifying valid logits processor qualified names - that can be passed with the `logits_processors` extra completion argument. - Defaults to `None`, which allows no processors.""" - generation_config: str = "auto" - """The folder path to the generation config. Defaults to `"auto"`, the - generation config will be loaded from model path. If set to `"vllm"`, no - generation config is loaded, vLLM defaults will be used. If set to a folder - path, the generation config will be loaded from the specified folder path. - If `max_new_tokens` is specified in generation config, then it sets a - server-wide limit on the number of output tokens for all requests.""" - override_generation_config: dict[str, Any] = field(default_factory=dict) - """Overrides or sets generation config. e.g. `{"temperature": 0.5}`. If - used with `--generation-config auto`, the override parameters will be - merged with the default config from the model. If used with - `--generation-config vllm`, only the override parameters are used.""" - enable_sleep_mode: bool = False - """Enable sleep mode for the engine (only cuda platform is supported).""" - model_impl: Union[str, ModelImpl] = ModelImpl.AUTO.value - """Which implementation of the model to use:\n - - "auto" will try to use the vLLM implementation, if it exists, and fall - back to the Transformers implementation if no vLLM implementation is - available.\n - - "vllm" will use the vLLM model implementation.\n - - "transformers" will use the Transformers model implementation.\n - - "terratorch" will use the TerraTorch model implementation. - """ - override_attention_dtype: Optional[str] = None - """Override dtype for attention""" - logits_processors: Optional[list[Union[str, type[LogitsProcessor]]]] = None - """One or more logits processors' fully-qualified class names or class - definitions""" - io_processor_plugin: Optional[str] = None - """IOProcessor plugin name to load at model startup""" - - # Pooler config - pooler_config: Optional[PoolerConfig] = None - """Pooler config which controls the behaviour of output pooling in pooling - models.""" - override_pooler_config: Optional[Union[dict, PoolerConfig]] = None - """[DEPRECATED] Use `pooler_config` instead. This field will be removed in - v0.12.0 or v1.0.0, whichever is sooner.""" - - # Multimodal config and init vars - multimodal_config: Optional[MultiModalConfig] = None - """Configuration for multimodal model. If `None`, this will be inferred - from the architecture of `self.model`.""" - limit_mm_per_prompt: InitVar[Optional[dict[str, int]]] = None - media_io_kwargs: InitVar[Optional[dict[str, dict[str, Any]]]] = None - mm_processor_kwargs: InitVar[Optional[dict[str, Any]]] = None - mm_processor_cache_gb: InitVar[Optional[float]] = None - mm_processor_cache_type: InitVar[Optional[MMCacheType]] = None - mm_shm_cache_max_object_size_mb: InitVar[Optional[int]] = None - mm_encoder_tp_mode: InitVar[Optional[MMEncoderTPMode]] = None - interleave_mm_strings: InitVar[Optional[bool]] = None - skip_mm_profiling: InitVar[Optional[bool]] = None - - def compute_hash(self) -> str: - """ - WARNING: Whenever a new field is added to this config, - ensure that it is included in the factors list if - it affects the computation graph. - - Provide a hash that uniquely identifies all the configs - that affect the structure of the computation - graph from input ids/embeddings to the final hidden states, - excluding anything before input ids/embeddings and after - the final hidden states. - """ - factors: list[Any] = [] - factors.append(self.model) - factors.append(self.dtype) - factors.append(self.quantization) - factors.append(self.revision) - factors.append(self.code_revision) - factors.append(self.max_model_len) - factors.append(self.max_logprobs) - factors.append(self.disable_sliding_window) - factors.append(self.trust_remote_code) - factors.append(self.generation_config) - factors.append(self.model_impl) - factors.append(self.override_generation_config) - factors.append(self.rope_scaling) - factors.append(self.rope_theta) - # hf_config can control how the model looks! - factors.append(self.hf_config.to_json_string()) - str_factors = str(factors) - assert_hashable(str_factors) - return hashlib.sha256(str(factors).encode()).hexdigest() - - def __post_init__( - self, - # Multimodal config init vars - limit_mm_per_prompt: Optional[dict[str, int]], - media_io_kwargs: Optional[dict[str, dict[str, Any]]], - mm_processor_kwargs: Optional[dict[str, Any]], - mm_processor_cache_gb: Optional[float], - mm_processor_cache_type: Optional[MMCacheType], - mm_shm_cache_max_object_size_mb: Optional[int], - mm_encoder_tp_mode: Optional[MMEncoderTPMode], - interleave_mm_strings: Optional[bool], - skip_mm_profiling: Optional[bool]) -> None: - # Set the default seed to 0 in V1. - # NOTE(woosuk): In V0, we set the default seed to None because the - # driver worker shares the same process as the user process, and thus - # setting a seed affects the user process as well. - # In V1, we use separate processes for workers (unless - # VLLM_ENABLE_V1_MULTIPROCESSING=0), so setting a seed here - # doesn't affect the user process. However, without a consistent seed, - # different tensor parallel workers would sample different tokens, - # leading to inconsistent results. - if envs.VLLM_USE_V1 and self.seed is None: - self.seed = 0 - if not envs.VLLM_ENABLE_V1_MULTIPROCESSING: - logger.warning( - "The global random seed is set to %d. Since " - "VLLM_ENABLE_V1_MULTIPROCESSING is set to False, this may " - "affect the random state of the Python process that " - "launched vLLM.", self.seed) - - # Keep set served_model_name before maybe_model_redirect(self.model) - self.served_model_name = get_served_model_name(self.model, - self.served_model_name) - self.model = maybe_model_redirect(self.model) - # The tokenizer is consistent with the model by default. - if self.tokenizer is None: - self.tokenizer = self.model - if self.tokenizer_revision is None: - self.tokenizer_revision = self.revision - self.tokenizer = maybe_model_redirect(self.tokenizer) - - if isinstance(self.hf_config_path, str): - self.hf_config_path = maybe_model_redirect(self.hf_config_path) - - if callable(self.hf_overrides): - hf_overrides_kw = {} - hf_overrides_fn = self.hf_overrides - else: - hf_overrides_kw = self.hf_overrides - hf_overrides_fn = None - - if self.rope_scaling: - hf_override: dict[str, Any] = {"rope_scaling": self.rope_scaling} - hf_overrides_kw.update(hf_override) - hf_overrides_str = json.dumps(hf_overrides_kw) - msg = ( - "`--rope-scaling` will be removed in a future release. " - f"'Please instead use `--hf-overrides '{hf_overrides_str}'`") - warnings.warn(DeprecationWarning(msg), stacklevel=2) - if self.rope_theta is not None: - hf_override = {"rope_theta": self.rope_theta} - hf_overrides_kw.update(hf_override) - hf_overrides_str = json.dumps(hf_overrides_kw) - msg = ( - "`--rope-theta` will be removed in a future release. " - f"'Please instead use `--hf-overrides '{hf_overrides_str}'`") - warnings.warn(DeprecationWarning(msg), stacklevel=2) - - self.maybe_pull_model_tokenizer_for_runai(self.model, self.tokenizer) - - if self.runner != "draft": - # If we're not running the draft model, check for speculators config - # If speculators config, set model / tokenizer to be target model - self.model, self.tokenizer = maybe_override_with_speculators_target_model( # noqa: E501 - model=self.model, - tokenizer=self.tokenizer, - revision=self.revision, - trust_remote_code=self.trust_remote_code) - - if (backend := envs.VLLM_ATTENTION_BACKEND - ) and backend == "FLASHINFER" and find_spec("flashinfer") is None: - raise ValueError( - "VLLM_ATTENTION_BACKEND is set to FLASHINFER, but flashinfer " - "module was not found. See " - "https://github.com/vllm-project/vllm/blob/main/docker/Dockerfile " # noqa: E501 - "for instructions on how to install it.") - - from vllm.platforms import current_platform - - if (self.override_attention_dtype is not None - and not current_platform.is_rocm()): - warnings.warn( - "override-attention-dtype is set but not using ROCm platform", - stacklevel=2) - - if (self.enable_sleep_mode - and not current_platform.is_sleep_mode_available()): - raise ValueError( - "Sleep mode is not supported on current platform.") - - hf_config = get_config(self.hf_config_path or self.model, - self.trust_remote_code, - self.revision, - self.code_revision, - self.config_format, - hf_overrides_kw=hf_overrides_kw, - hf_overrides_fn=hf_overrides_fn) - - self.hf_config = hf_config - self.hf_text_config = get_hf_text_config(self.hf_config) - self.attention_chunk_size = getattr(self.hf_text_config, - "attention_chunk_size", None) - self.encoder_config = self._get_encoder_config() - self.hf_image_processor_config = get_hf_image_processor_config( - self.model, hf_token=self.hf_token, revision=self.revision) - - architectures = self.architectures - registry = self.registry - is_generative_model = registry.is_text_generation_model( - architectures, self) - is_pooling_model = registry.is_pooling_model(architectures, self) - - def _task_to_convert(task: TaskOption) -> ConvertType: - if task == "embedding" or task == "embed": - return "embed" - if task == "classify": - return "classify" - if task == "reward": - return "reward" - if task == "score": - new_task = self._get_default_pooling_task(architectures) - return "classify" if new_task == "classify" else "embed" - - return "none" - - if self.task is not None: - runner: RunnerOption = "auto" - convert: ConvertOption = "auto" - msg_prefix = ("The 'task' option has been deprecated and will be " - "removed in v0.13.0 or v1.0, whichever comes first.") - msg_hint = "Please remove this option." - - is_generative_task = self.task in _RUNNER_TASKS["generate"] - is_pooling_task = self.task in _RUNNER_TASKS["pooling"] - - if is_generative_model and is_pooling_model: - if is_generative_task: - runner = "generate" - convert = "auto" - msg_hint = ("Please replace this option with `--runner " - "generate` to continue using this model " - "as a generative model.") - elif is_pooling_task: - runner = "pooling" - convert = "auto" - msg_hint = ("Please replace this option with `--runner " - "pooling` to continue using this model " - "as a pooling model.") - else: # task == "auto" - pass - elif is_generative_model or is_pooling_model: - if is_generative_task: - runner = "generate" - convert = "auto" - msg_hint = "Please remove this option" - elif is_pooling_task: - runner = "pooling" - convert = _task_to_convert(self.task) - msg_hint = ("Please replace this option with `--convert " - f"{convert}` to continue using this model " - "as a pooling model.") - else: # task == "auto" - pass - else: - raise AssertionError("The model should be a generative or " - "pooling model when task is set to " - f"{self.task!r}.") - - self.runner = runner - self.convert = convert - - msg = f"{msg_prefix} {msg_hint}" - warnings.warn(msg, DeprecationWarning, stacklevel=2) - - self.runner_type = self._get_runner_type(architectures, self.runner) - self.convert_type = self._get_convert_type(architectures, - self.runner_type, - self.convert) - - if self.runner_type == "generate" and not is_generative_model: - generate_converts = _RUNNER_CONVERTS["generate"] - if self.convert_type not in generate_converts: - # Currently we don't have any converters for generative models - raise ValueError( - "This model does not support `--runner generate`.") - if self.runner_type == "pooling" and not is_pooling_model: - pooling_converts = _RUNNER_CONVERTS["pooling"] - if self.convert_type not in pooling_converts: - convert_option = "<" + "|".join(pooling_converts) + ">" - raise ValueError( - "This model does not support `--runner pooling`. " - f"You can pass `--convert {convert_option} to adapt " - "it into a pooling model.") - - self.supported_tasks = self._get_supported_tasks( - architectures, self.runner_type, self.convert_type) - - # Note: Initialize these attributes early because transformers fallback - # may fail to load dynamic modules in child processes - model_info, arch = registry.inspect_model_cls(architectures, self) - self._model_info = model_info - self._architecture = arch - logger.info("Resolved architecture: %s", arch) - - # Init pooler config if needed - if self.runner_type == "pooling": - if self.override_pooler_config is not None: - logger.warning_once( - "`override_pooler_config` is deprecated and will be " - "removed in v0.12.0 or v1.0.0, whichever is sooner. " - "Please use `pooler_config` instead.") - - if isinstance(self.override_pooler_config, dict): - self.pooler_config = PoolerConfig( - **self.override_pooler_config) - else: - self.pooler_config = self.override_pooler_config - - if self.pooler_config is None: - self.pooler_config = PoolerConfig() - - base_config = get_pooling_config(self.model, self.revision) - if base_config is not None: - # Only set values that are not overridden by the user - for k, v in base_config.items(): - if getattr(self.pooler_config, k) is None: - setattr(self.pooler_config, k, v) - - default_pooling_type = self._model_info.default_pooling_type - if self.pooler_config.pooling_type is None: - self.pooler_config.pooling_type = default_pooling_type - - self.dtype: torch.dtype = _get_and_verify_dtype( - self.model, - self.hf_config, - self.dtype, - is_pooling_model=self.runner_type == "pooling", - revision=self.revision, - ) - - # Interleaved attention is not supported by some backends in V0 - if (not self.disable_sliding_window - and is_interleaved(self.hf_text_config) - and not envs.VLLM_USE_V1 - and (backend := envs.VLLM_ATTENTION_BACKEND) - in ("XFORMERS", "FLASHINFER")): - logger.warning_once( - "%s has interleaved attention, which is currently not " - "supported by the %s backend. Disabling sliding window and " - "capping the max length to the sliding window size (%d).", - self.hf_text_config.model_type, - backend, - self.hf_text_config.sliding_window, - ) - self.disable_sliding_window = True - - self.original_max_model_len = self.max_model_len - self.max_model_len = self.get_and_verify_max_len(self.max_model_len) - # Init multimodal config if needed - if self._model_info.supports_multimodal: - if (mm_encoder_tp_mode == "data" and - not self._model_info.supports_multimodal_encoder_tp_data): - logger.warning_once( - "This model does not support `--mm-encoder-tp-mode data`. " - "Falling back to `--mm-encoder-tp-mode weights`.") - mm_encoder_tp_mode = "weights" - - mm_config_kwargs = dict( - limit_per_prompt=limit_mm_per_prompt, - media_io_kwargs=media_io_kwargs, - mm_processor_kwargs=mm_processor_kwargs, - mm_processor_cache_gb=mm_processor_cache_gb, - mm_processor_cache_type=mm_processor_cache_type, - mm_shm_cache_max_object_size_mb=mm_shm_cache_max_object_size_mb, - mm_encoder_tp_mode=mm_encoder_tp_mode, - interleave_mm_strings=interleave_mm_strings, - skip_mm_profiling=skip_mm_profiling, - ) - - mm_config_kwargs = { - k: v - for k, v in mm_config_kwargs.items() if v is not None - } - - self.multimodal_config = MultiModalConfig(**mm_config_kwargs) - - if self.disable_sliding_window: - # Set after get_and_verify_max_len to ensure that max_model_len - # can be correctly capped to sliding window size - self.hf_text_config.sliding_window = None - - if not self.skip_tokenizer_init: - self._verify_tokenizer_mode() - - # Avoid running try_verify_and_update_config multiple times - self.config_updated = False - - self._verify_quantization() - self._verify_cuda_graph() - self._verify_bnb_config() - - @field_validator("quantization", mode="before") - @classmethod - def validate_quantization_before(cls, value: Any) -> Any: - if isinstance(value, str): - return value.lower() - return value - - @model_validator(mode="after") - def validate_model_config_after(self: "ModelConfig") -> "ModelConfig": - if not isinstance(self.tokenizer, str): - raise ValueError("tokenizer must be a string after __post_init__.") - if not isinstance(self.max_model_len, int): - raise ValueError( - "max_model_len must be an integer after __post_init__.") - return self - - def _get_transformers_backend_cls(self) -> str: - """Determine which Transformers backend class will be used if - `model_impl` is set to `transformers` or `auto`.""" - if getattr(self, "runner_type", self.runner) == "pooling": - return "TransformersModel" - if self.hf_config != self.hf_text_config: - # If 'hf_text_config' is the same as 'hf_config'. If not, it is - # probably a composite config, i.e. multimodal - return "TransformersForMultimodalLM" - return "TransformersForCausalLM" - - def using_transformers_backend(self) -> bool: - """Check if the model is using the Transformers backend class.""" - return self.architecture == self._get_transformers_backend_cls() - - @property - def registry(self): - return me_models.ModelRegistry - - @property - def architectures(self) -> list[str]: - return getattr(self.hf_config, "architectures", []) - - @property - def architecture(self) -> str: - """The architecture vllm actually used.""" - return self._architecture - - def maybe_pull_model_tokenizer_for_runai(self, model: str, - tokenizer: str) -> None: - """Pull model/tokenizer from Object Storage to temporary - directory when needed. - - Args: - model: Model name or path - tokenizer: Tokenizer name or path - """ - if not (is_runai_obj_uri(model) or is_runai_obj_uri(tokenizer)): - return - - if is_runai_obj_uri(model): - object_storage_model = ObjectStorageModel() - object_storage_model.pull_files( - model, allow_pattern=["*.model", "*.py", "*.json"]) - self.model_weights = model - self.model = object_storage_model.dir - - # If tokenizer is same as model, download to same directory - if model == tokenizer: - object_storage_model.pull_files(model, - ignore_pattern=[ - "*.pt", "*.safetensors", - "*.bin", "*.tensors", - "*.pth" - ]) - self.tokenizer = object_storage_model.dir - return - - # Only download tokenizer if needed and not already handled - if is_runai_obj_uri(tokenizer): - object_storage_tokenizer = ObjectStorageModel() - object_storage_tokenizer.pull_files(model, - ignore_pattern=[ - "*.pt", "*.safetensors", - "*.bin", "*.tensors", - "*.pth" - ]) - self.tokenizer = object_storage_tokenizer.dir - - def _get_encoder_config(self): - return get_sentence_transformer_tokenizer_config( - self.model, self.revision) - - def _verify_tokenizer_mode(self) -> None: - tokenizer_mode = cast(TokenizerMode, self.tokenizer_mode.lower()) - if tokenizer_mode not in get_args(TokenizerMode): - raise ValueError( - f"Unknown tokenizer mode: {self.tokenizer_mode}. Must be " - f"one of {get_args(TokenizerMode)}.") - self.tokenizer_mode = tokenizer_mode - - def _get_default_runner_type( - self, - architectures: list[str], - ) -> RunnerType: - registry = self.registry - - # Some Sentence Transformers models use *ForCausalLM archs - if get_pooling_config(self.model, self.revision): - return "pooling" - - for arch in architectures: - if arch in registry.get_supported_archs(): - if registry.is_pooling_model(architectures, self): - return "pooling" - if registry.is_text_generation_model(architectures, self): - return "generate" - - match = try_match_architecture_defaults(arch) - if match: - _, (runner_type, _) = match - return runner_type - - return "generate" - - def _get_runner_type( - self, - architectures: list[str], - runner: RunnerOption, - ) -> RunnerType: - if runner != "auto": - return runner - - runner_type = self._get_default_runner_type(architectures) - - # Don't log the most common case - if runner_type != "generate": - logger.info( - "Resolved `--runner auto` to `--runner %s`. " - "Pass the value explicitly to silence this message.", - runner_type) - - return runner_type - - def _get_default_convert_type( - self, - architectures: list[str], - runner_type: RunnerType, - ) -> ConvertType: - registry = self.registry - - for arch in architectures: - if arch in registry.get_supported_archs(): - if (runner_type == "generate" - and registry.is_text_generation_model( - architectures, self)): - return "none" - if (runner_type == "pooling" - and registry.is_pooling_model(architectures, self)): - return "none" - - match = try_match_architecture_defaults(arch, - runner_type=runner_type) - if match: - _, (_, convert_type) = match - return convert_type - - # This is to handle Sentence Transformers models that use *ForCausalLM - # and also multi-modal pooling models which are not defined as - # Sentence Transformers models - if runner_type == "pooling": - return "embed" - - return "none" - - def _get_convert_type( - self, - architectures: list[str], - runner_type: RunnerType, - convert: ConvertOption, - ) -> ConvertType: - if convert != "auto": - return convert - - convert_type = self._get_default_convert_type(architectures, - runner_type) - - # Don't log the most common case - if convert_type != "none": - logger.info( - "Resolved `--convert auto` to `--convert %s`. " - "Pass the value explicitly to silence this message.", - convert_type) - - return convert_type - - def _get_supported_generation_tasks( - self, - architectures: list[str], - convert_type: ConvertType, - ) -> list[_ResolvedTask]: - registry = self.registry - - if registry.is_transcription_only_model(architectures, self): - return ["transcription"] - - # TODO: Use get_supported_generation_tasks once V0 is removed - supported_tasks = list[_ResolvedTask]() - if (registry.is_text_generation_model(architectures, self) - or convert_type in _RUNNER_CONVERTS["generate"]): - supported_tasks.append("generate") - - if registry.is_transcription_model(architectures, self): - supported_tasks.append("transcription") - - return supported_tasks - - def _get_default_pooling_task( - self, - architectures: list[str], - ) -> Literal["embed", "classify", "reward"]: - if self.registry.is_cross_encoder_model(architectures, self): - return "classify" - - for arch in architectures: - match = try_match_architecture_defaults(arch, - runner_type="pooling") - if match: - _, (_, convert_type) = match - assert convert_type != "none" - return convert_type - - return "embed" - - def _get_supported_pooling_tasks( - self, - architectures: list[str], - convert_type: ConvertType, - ) -> list[_ResolvedTask]: - registry = self.registry - - # TODO: Use get_supported_pooling_tasks once V0 is removed - supported_tasks = list[_ResolvedTask]() - if (registry.is_pooling_model(architectures, self) - or convert_type in _RUNNER_CONVERTS["pooling"]): - supported_tasks.append("encode") - - extra_task = (self._get_default_pooling_task(architectures) - if convert_type == "none" else convert_type) - supported_tasks.append(extra_task) - - return supported_tasks - - def _get_supported_tasks( - self, - architectures: list[str], - runner_type: RunnerType, - convert_type: ConvertType, - ) -> list[_ResolvedTask]: - if runner_type == "generate": - return self._get_supported_generation_tasks( - architectures, convert_type) - if runner_type == "pooling": - return self._get_supported_pooling_tasks(architectures, - convert_type) - if runner_type == "draft": - return ["draft"] - - assert_never(runner_type) - - def _parse_quant_hf_config(self, hf_config: PretrainedConfig): - quant_cfg = getattr(hf_config, "quantization_config", None) - if quant_cfg is None: - # compressed-tensors uses a "compression_config" key - quant_cfg = getattr(hf_config, "compression_config", None) - - else: - # Set quant_method for ModelOpt models. - producer_name = quant_cfg.get("producer", {}).get("name") - if producer_name == "modelopt": - quant_algo = quant_cfg.get("quantization", - {}).get("quant_algo") - if quant_algo == "FP8": - quant_cfg["quant_method"] = "modelopt" - elif quant_algo == "NVFP4": - quant_cfg["quant_method"] = "modelopt_fp4" - elif quant_algo is not None: - raise ValueError( - f"Unknown ModelOpt quant algo: {quant_algo}") - - return quant_cfg - - def _verify_quantization(self) -> None: - supported_quantization = me_quant.QUANTIZATION_METHODS - if self.quantization is not None: - self.quantization = cast(me_quant.QuantizationMethods, - self.quantization) - - # Parse quantization method from the HF model config, if available. - quant_cfg = self._parse_quant_hf_config(self.hf_config) - if quant_cfg is None and (text_config := getattr( - self.hf_config, "text_config", None)): - # Check the text config as well for multi-modal models. - quant_cfg = self._parse_quant_hf_config(text_config) - - if quant_cfg is not None: - # Use the community standard 'quant_method' - quant_method = quant_cfg.get("quant_method", "").lower() - - # Normalize library names - quant_method = quant_method.replace("compressed_tensors", - "compressed-tensors") - - quant_cfg["quant_method"] = quant_method - - # Quantization methods which are overrides (i.e. they have a - # `override_quantization_method` method) must be checked in order - # of preference (this is particularly important for GPTQ). - overrides = [ - "bitblas", - "gptq_marlin_24", - "gptq_marlin", - "gptq_bitblas", - "awq_marlin", - "ipex", - "moe_wna16", - "modelopt", - "modelopt_fp4", - "petit_nvfp4", - ] - quantization_methods = [ - q for q in supported_quantization if q not in overrides - ] - # Any custom overrides will be in quantization_methods so we place - # them at the start of the list so custom overrides have preference - # over the built-in ones. - quantization_methods = quantization_methods + overrides - - # Detect which checkpoint is it - for name in quantization_methods: - method = me_quant.get_quantization_config(name) - quantization_override = method.override_quantization_method( - quant_cfg, self.quantization) - if quantization_override is not None: - # Raise error if the override is not custom (custom would - # be in QUANTIZATION_METHODS but not QuantizationMethods) - # and hasn't been added to the overrides list. - if (name in get_args(me_quant.QuantizationMethods) - and name not in overrides): - raise ValueError( - f"Quantization method {name} is an override but " - "is has not been added to the `overrides` list " - "above. This is necessary to ensure that the " - "overrides are checked in order of preference.") - quant_method = quantization_override - self.quantization = quantization_override - break - - # Verify quantization configurations. - if self.quantization is None: - self.quantization = quant_method - elif self.quantization != quant_method: - raise ValueError( - "Quantization method specified in the model config " - f"({quant_method}) does not match the quantization " - f"method specified in the `quantization` argument " - f"({self.quantization}).") - - if self.quantization is not None: - if self.quantization not in supported_quantization: - raise ValueError( - f"Unknown quantization method: {self.quantization}. Must " - f"be one of {supported_quantization}.") - from vllm.platforms import current_platform - current_platform.verify_quantization(self.quantization) - - def _verify_cuda_graph(self) -> None: - # The `max_seq_len_to_capture` was incorrectly - # based on the encoder's input length (448) - # but not the decoder's larger input length (1500). - # This change ensures the CUDA Graph captures the correct, - # larger sequence length, allowing it to work as intended. - effective_max_seq_len = self.max_model_len - if self.is_encoder_decoder: - effective_max_seq_len = max( - effective_max_seq_len, - getattr(self.hf_config, "max_source_positions", 0)) - self.max_seq_len_to_capture = min(self.max_seq_len_to_capture, - effective_max_seq_len) - # CUDAGraph capture not supported for encoder-decoder models on ROCm - unsupported_rocm = self.is_encoder_decoder - - if (unsupported_rocm and not self.enforce_eager - and current_platform.is_rocm()): - logger.warning( - "CUDA graph is not supported for %s on ROCm yet, fallback " - "to eager mode.", self.hf_config.model_type) - self.enforce_eager = True - - def _verify_bnb_config(self) -> None: - """ - The current version of bitsandbytes (0.46.1) with 8-bit models does not - yet support CUDA graph. - # TODO Remove this when bitsandbytes supports. - """ - is_bitsandbytes = self.quantization == "bitsandbytes" - has_quantization_config = (getattr(self.hf_config, - "quantization_config", None) - is not None) - is_8bit = (self.hf_config.quantization_config.get( - "load_in_8bit", False) if has_quantization_config else False) - if all([ - is_bitsandbytes, - has_quantization_config, - is_8bit, - not self.enforce_eager, - ]): - logger.warning( - "CUDA graph is not supported on BitsAndBytes 8bit yet, " - "fallback to the eager mode.") - - self.enforce_eager = True - - def _verify_with_expert_parallelism(self) -> None: - num_expert_names = [ - "moe_num_experts", # Dbrx - "num_experts", # Jamba - "n_routed_experts", # DeepSeek - "num_local_experts", # Mixtral - ] - num_experts = 0 - for name in num_expert_names: - num_experts = getattr(self.hf_text_config, name, 0) - if num_experts > 0: - break - if num_experts < 1: - raise ValueError( - "Number of experts in the model must be greater than 0 " - "when expert parallelism is enabled.") - - def verify_dual_chunk_attention_config( - self, - load_config: "LoadConfig", - ) -> None: - if hasattr(self.hf_config, "dual_chunk_attention_config"): - # Try loading the sparse attention config - from vllm.model_executor.model_loader.weight_utils import ( - get_sparse_attention_config) - sparse_attn_config = get_sparse_attention_config(self, load_config) - if sparse_attn_config: - self.hf_config.dual_chunk_attention_config[ - "sparse_attention_config"] = sparse_attn_config - if "sparse_attention_enabled" not in \ - self.hf_config.dual_chunk_attention_config: - self.hf_config.dual_chunk_attention_config[ - "sparse_attention_enabled"] = True - - if envs.VLLM_ATTENTION_BACKEND != STR_DUAL_CHUNK_FLASH_ATTN_VAL: - raise ValueError("please set VLLM_ATTENTION_BACKEND to " - f"{STR_DUAL_CHUNK_FLASH_ATTN_VAL}") - - def verify_async_output_proc(self, parallel_config, speculative_config, - device_config) -> None: - if not self.use_async_output_proc: - # Nothing to check - return - - if parallel_config.pipeline_parallel_size > 1: - self.use_async_output_proc = False - return - - # Reminder: Please update docs/features/compatibility_matrix.md - # If the feature combo become valid - from vllm.platforms import current_platform - if not current_platform.is_async_output_supported(self.enforce_eager): - self.use_async_output_proc = False - return - - if envs.VLLM_USE_RAY_SPMD_WORKER: - self.use_async_output_proc = False - return - - # Async postprocessor is not necessary for pooling models - # since there is no token generation - if self.runner_type == "pooling": - self.use_async_output_proc = False - - # Reminder: Please update docs/features/compatibility_matrix.md - # If the feature combo become valid - if speculative_config: - self.use_async_output_proc = False - - def verify_with_parallel_config( - self, - parallel_config: "ParallelConfig", - ) -> None: - - if parallel_config.distributed_executor_backend == "external_launcher": - assert self.seed is not None, ( - "Seed must be set when using external launcher backend to " - "make sure sampling results are the same across workers.") - - total_num_attention_heads = getattr(self.hf_text_config, - "num_attention_heads", 0) - tensor_parallel_size = parallel_config.tensor_parallel_size - if total_num_attention_heads % tensor_parallel_size != 0: - raise ValueError( - f"Total number of attention heads ({total_num_attention_heads})" - " must be divisible by tensor parallel size " - f"({tensor_parallel_size}).") - - if parallel_config.enable_expert_parallel: - self._verify_with_expert_parallelism() - - pipeline_parallel_size = parallel_config.pipeline_parallel_size - if pipeline_parallel_size > 1: - if not self.registry.is_pp_supported_model(self.architectures, - self): - raise NotImplementedError( - "Pipeline parallelism is not supported for this model. " - "Supported models implement the `SupportsPP` interface.") - - if self.use_async_output_proc: - self.use_async_output_proc = False - - def get_sliding_window(self) -> Optional[int]: - """Get the sliding window size from the HF text config if present.""" - return getattr(self.hf_text_config, "sliding_window", None) - - def get_vocab_size(self) -> int: - return getattr(self.hf_text_config, "vocab_size", 0) - - def get_hidden_size(self) -> int: - return getattr(self.hf_text_config, "hidden_size", 0) - - @property - def is_deepseek_mla(self) -> bool: - if not hasattr(self.hf_text_config, "model_type"): - return False - elif self.hf_text_config.model_type in \ - ('deepseek_v2', 'deepseek_v3', 'deepseek_mtp', 'kimi_k2'): - return self.hf_text_config.kv_lora_rank is not None - elif self.hf_text_config.model_type == 'eagle': - # if the model is an EAGLE module, check for the - # underlying architecture - return self.hf_text_config.model.model_type in \ - ('deepseek_v2', 'deepseek_v3') \ - and self.hf_text_config.kv_lora_rank is not None - return False - - def get_head_size(self) -> int: - # TODO remove hard code - if self.is_deepseek_mla: - qk_rope_head_dim = getattr(self.hf_text_config, "qk_rope_head_dim", - 0) - if self.use_mla: - return self.hf_text_config.kv_lora_rank + qk_rope_head_dim - else: - qk_nope_head_dim = getattr(self.hf_text_config, - "qk_nope_head_dim", 0) - if qk_rope_head_dim and qk_nope_head_dim: - return qk_rope_head_dim + qk_nope_head_dim - - if hasattr(self.hf_text_config, - "model_type") and (self.hf_text_config.model_type - == "zamba2"): - return self.hf_text_config.attention_head_dim - - if self.is_attention_free: - return 0 - - # NOTE: Some configs may set head_dim=None in the config - if getattr(self.hf_text_config, "head_dim", None) is not None: - return self.hf_text_config.head_dim - - # NOTE: Some models (such as PLaMo2.1) use `hidden_size_per_head` - if getattr(self.hf_text_config, "hidden_size_per_head", - None) is not None: - return self.hf_text_config.hidden_size_per_head - - # FIXME(woosuk): This may not be true for all models. - return (self.hf_text_config.hidden_size // - self.hf_text_config.num_attention_heads) - - def get_total_num_kv_heads(self) -> int: - """Returns the total number of KV heads.""" - # For GPTBigCode & Falcon: - # NOTE: for falcon, when new_decoder_architecture is True, the - # multi_query flag is ignored and we use n_head_kv for the number of - # KV heads. - falcon_model_types = ["falcon", "RefinedWeb", "RefinedWebModel"] - new_decoder_arch_falcon = ( - self.hf_config.model_type in falcon_model_types - and getattr(self.hf_config, "new_decoder_architecture", False)) - if not new_decoder_arch_falcon and getattr(self.hf_text_config, - "multi_query", False): - # Multi-query attention, only one KV head. - # Currently, tensor parallelism is not supported in this case. - return 1 - - # For DBRX and MPT - if self.hf_config.model_type == "mpt": - if "kv_n_heads" in self.hf_config.attn_config: - return self.hf_config.attn_config["kv_n_heads"] - return self.hf_config.num_attention_heads - if self.hf_config.model_type == "dbrx": - return getattr(self.hf_config.attn_config, "kv_n_heads", - self.hf_config.num_attention_heads) - - if self.hf_config.model_type == "nemotron-nas": - for block in self.hf_config.block_configs: - if not block.attention.no_op: - return self.hf_config.num_attention_heads \ - // block.attention.n_heads_in_group - - raise RuntimeError("Couldn't determine number of kv heads") - - if self.is_attention_free: - return 0 - - attributes = [ - # For Falcon: - "n_head_kv", - "num_kv_heads", - # For LLaMA-2: - "num_key_value_heads", - # For ChatGLM: - "multi_query_group_num", - ] - for attr in attributes: - num_kv_heads = getattr(self.hf_text_config, attr, None) - if num_kv_heads is not None: - return num_kv_heads - - # For non-grouped-query attention models, the number of KV heads is - # equal to the number of attention heads. - return self.hf_text_config.num_attention_heads - - def get_num_kv_heads(self, parallel_config: "ParallelConfig") -> int: - """Returns the number of KV heads per GPU.""" - if self.use_mla: - # When using MLA during decode it becomes MQA - return 1 - - total_num_kv_heads = self.get_total_num_kv_heads() - # If tensor parallelism is used, we divide the number of KV heads by - # the tensor parallel size. We will replicate the KV heads in the - # case where the number of KV heads is smaller than the tensor - # parallel size so each GPU has at least one KV head. - return max(1, - total_num_kv_heads // parallel_config.tensor_parallel_size) - - def get_num_attention_heads(self, - parallel_config: "ParallelConfig") -> int: - num_heads = getattr(self.hf_text_config, "num_attention_heads", 0) - return num_heads // parallel_config.tensor_parallel_size - - def get_layers_start_end_indices( - self, parallel_config: "ParallelConfig") -> tuple[int, int]: - from vllm.distributed.utils import get_pp_indices - if (self.hf_text_config.model_type == "deepseek_mtp" - or self.hf_config.model_type == "mimo_mtp" - or self.hf_config.model_type == "glm4_moe_mtp" - or self.hf_config.model_type == "ernie_mtp" - or self.hf_config.model_type == "qwen3_next_mtp"): - total_num_hidden_layers = getattr(self.hf_text_config, - "num_nextn_predict_layers", 0) - else: - total_num_hidden_layers = getattr(self.hf_text_config, - "num_hidden_layers", 0) - # the layout order is: DP x PP x TP - pp_rank = (parallel_config.rank // parallel_config.tensor_parallel_size - ) % parallel_config.pipeline_parallel_size - pp_size = parallel_config.pipeline_parallel_size - start, end = get_pp_indices(total_num_hidden_layers, pp_rank, pp_size) - return start, end - - def get_num_layers(self, parallel_config: "ParallelConfig") -> int: - start, end = self.get_layers_start_end_indices(parallel_config) - return end - start - - def get_num_layers_by_block_type( - self, - parallel_config: "ParallelConfig", - block_type: LayerBlockType = LayerBlockType.attention, - ) -> int: - # This function relies on 'layers_block_type' in hf_config, - # for w/o this attribute, we will need to have workarounds like so - attn_block_type = block_type == LayerBlockType.attention - is_transformer = not self.is_hybrid and \ - not self.has_noops and \ - not self.is_attention_free - start, end = self.get_layers_start_end_indices(parallel_config) - - if is_transformer: - # Handle the basic case first - return end - start if attn_block_type else 0 - elif self.is_attention_free: - # Attention free - # Note that this code assumes there - # is only one type of attention-free block type. - return 0 if attn_block_type else end - start - elif self.has_noops: - block_configs = self.hf_config.block_configs - return sum(not bc.attention.no_op - for bc in block_configs[start:end]) - else: - # Hybrid model Jamba - layers_block_type_value = getattr(self.hf_text_config, - "layers_block_type", None) - if layers_block_type_value is not None: - if hasattr(self.hf_text_config, - "model_type") and (self.hf_text_config.model_type - == "zamba2"): - if attn_block_type: - return sum(t == "hybrid" - for t in layers_block_type_value[start:end]) - else: - return self.get_num_layers(parallel_config) - return sum(t == block_type.value - for t in layers_block_type_value[start:end]) - - # Hybrid model Minimax - attn_type_list = getattr(self.hf_config, "attn_type_list", None) - if attn_type_list: - return sum(t == 1 for t in attn_type_list[start:end]) - - # Hybrid model Qwen3Next - layer_types_value = getattr(self.hf_config, "layer_types", None) - if layer_types_value is not None: - if getattr(block_type, "value", block_type) == "attention": - return sum(t == "full_attention" - for t in layer_types_value[start:end]) - elif getattr(block_type, "value", - block_type) == "linear_attention": - return sum(t == "linear_attention" - for t in layer_types_value[start:end]) - else: - return sum(t == getattr(block_type, "value", block_type) - for t in layer_types_value[start:end]) - - if (layers_block_type_value is None and attn_type_list is None - and layer_types_value is None): - raise ValueError( - "The model is an hybrid without a" - "layers_block_type or an attn_type_list, or a layer_types " - "in the hf_config, cannot determine the num of " - f"{block_type.value} layers") - - def get_mamba_chunk_size(self) -> Optional[int]: - """ - Returns the mamba chunk size if it exists - """ - # used by e.g. Bamba, FalconH1, Granite, PLaMo2 - chunk_size = getattr(self.hf_text_config, "mamba_chunk_size", None) - if chunk_size is None: - # used by e.g. Mamba2, NemotronH, Zamba - chunk_size = getattr(self.hf_text_config, "chunk_size", None) - return chunk_size - - def get_multimodal_config(self) -> "MultiModalConfig": - """ - Get the multimodal configuration of the model. - - Raises: - ValueError: If the model is not multimodal. - """ - if self.multimodal_config is None: - raise ValueError("The model is not multimodal.") - - return self.multimodal_config - - def try_get_generation_config(self) -> dict[str, Any]: - """ - This method attempts to retrieve the non-default values of the - generation config for this model. - - The generation config can contain information about special tokens, as - well as sampling parameters. Which is why this method exists separately - to `get_diff_sampling_param`. - - Returns: - A dictionary containing the non-default generation config. - """ - if self.generation_config in {"auto", "vllm"}: - config = try_get_generation_config( - self.hf_config_path or self.model, - trust_remote_code=self.trust_remote_code, - revision=self.revision, - ) - else: - config = try_get_generation_config( - self.generation_config, - trust_remote_code=self.trust_remote_code, - ) - - if config is None: - return {} - - return config.to_diff_dict() - - def get_diff_sampling_param(self) -> dict[str, Any]: - """ - This method returns a dictionary containing the non-default sampling - parameters with `override_generation_config` applied. - - The default sampling parameters are: - - - vLLM's neutral defaults if `self.generation_config="vllm"` - - the model's defaults if `self.generation_config="auto"` - - as defined in `generation_config.json` if - `self.generation_config="path/to/generation_config/dir"` - - Returns: - A dictionary containing the non-default sampling parameters. - """ - if self.generation_config == "vllm": - config = {} - else: - config = self.try_get_generation_config() - - # Overriding with given generation config - config.update(self.override_generation_config) - - available_params = [ - "repetition_penalty", - "temperature", - "top_k", - "top_p", - "min_p", - "max_new_tokens", - ] - if any(p in config for p in available_params): - diff_sampling_param = { - p: config.get(p) - for p in available_params if config.get(p) is not None - } - # Huggingface definition of max_new_tokens is equivalent - # to vLLM's max_tokens - if "max_new_tokens" in diff_sampling_param: - diff_sampling_param["max_tokens"] = diff_sampling_param.pop( - "max_new_tokens") - else: - diff_sampling_param = {} - - if diff_sampling_param: - logger.warning_once( - "Default sampling parameters have been overridden by the " - "model's Hugging Face generation config recommended from the " - "model creator. If this is not intended, please relaunch " - "vLLM instance with `--generation-config vllm`.") - return diff_sampling_param - - @property - def is_encoder_decoder(self) -> bool: - """Extract the HF encoder/decoder model flag.""" - return is_encoder_decoder(self.hf_config) - - @property - def uses_mrope(self) -> bool: - return uses_mrope(self.hf_config) - - @property - def is_multimodal_model(self) -> bool: - return self.multimodal_config is not None - - @property - def is_multimodal_raw_input_only_model(self) -> bool: - return self._model_info.supports_multimodal_raw_input_only - - @property - def is_cross_encoder(self) -> bool: - return (self._model_info.supports_cross_encoding - or self.convert_type == "classify") - - @property - def is_pp_supported(self) -> bool: - return self._model_info.supports_pp - - @property - def is_attention_free(self) -> bool: - return self._model_info.is_attention_free - - @property - def is_hybrid(self) -> bool: - return self._model_info.is_hybrid - - @property - def has_noops(self) -> bool: - return self._model_info.has_noops - - @property - def has_inner_state(self): - return self._model_info.has_inner_state - - @property - def is_v1_compatible(self) -> bool: - return not self._model_info.supports_v0_only - - @property - def use_mla(self) -> bool: - return self.is_deepseek_mla and not envs.VLLM_MLA_DISABLE - - @property - def is_matryoshka(self) -> bool: - return (bool(getattr(self.hf_config, "matryoshka_dimensions", None)) - or getattr(self.hf_config, "is_matryoshka", False)) - - @property - def matryoshka_dimensions(self): - return getattr(self.hf_config, "matryoshka_dimensions", None) - - @property - def use_pad_token(self) -> bool: - # cross_encoder models defaults to using pad_token. - # `llm as reranker` models defaults to not using pad_token. - return getattr(self.hf_config, "use_pad_token", True) - - @property - def head_dtype(self) -> torch.dtype: - """ - "head" refers to the last Linear layer(s) of an LLM, - such as the lm_head in a generation model, - or the score or classifier in a classification model. - - `head_dtype` currently only supports pooling models.\n - - The pooling model defaults to using fp32 head, - you can use --hf-overrides '{"head_dtype": "model"}' to disable it. - """ - - head_dtype = _get_head_dtype(config=self.hf_config, - dtype=self.dtype, - runner_type=self.runner_type) - - if self.runner_type != "pooling" and head_dtype != self.dtype: - logger.warning_once( - "`head_dtype` currently only supports pooling models." - "fallback to model dtype [%s].", self.dtype) - return self.dtype - - if head_dtype not in current_platform.supported_dtypes: - logger.warning_once( - "The current platform does not support [%s] head dtype, " - "fallback to model dtype [%s].", head_dtype, self.dtype) - return self.dtype - - logger.debug_once("head dtype: %s", head_dtype) - return head_dtype - - def get_and_verify_max_len(self, max_model_len: int): - # Consider max_model_len in tokenizer_config only when - # pooling models use absolute position_embedding. - tokenizer_config = None - if (self.runner_type == "pooling" and getattr( - self.hf_config, "position_embedding_type", "") == "absolute"): - tokenizer_config = try_get_tokenizer_config( - self.tokenizer, - trust_remote_code=self.trust_remote_code, - revision=self.tokenizer_revision) - max_model_len = _get_and_verify_max_len( - hf_config=self.hf_text_config, - tokenizer_config=tokenizer_config, - max_model_len=max_model_len, - disable_sliding_window=self.disable_sliding_window, - sliding_window=self.get_sliding_window(), - spec_target_max_model_len=self.spec_target_max_model_len, - encoder_config=self.encoder_config) - logger.info("Using max model len %s", max_model_len) - return max_model_len - - Device = Literal["auto", "cuda", "cpu", "tpu", "xpu"] @@ -1838,365 +144,6 @@ def __post_init__(self): self.device = torch.device(self.device_type) -_STR_DTYPE_TO_TORCH_DTYPE = { - "half": torch.float16, - "float16": torch.float16, - "float": torch.float32, - "float32": torch.float32, - "bfloat16": torch.bfloat16, -} - -# model_type -> reason -_FLOAT16_NOT_SUPPORTED_MODELS = { - "gemma2": "Numerical instability. Please use bfloat16 or float32 instead.", - "gemma3": "Numerical instability. Please use bfloat16 or float32 instead.", - "gemma3_text": - "Numerical instability. Please use bfloat16 or float32 instead.", - "plamo2": "Numerical instability. Please use bfloat16 or float32 instead.", - "glm4": "Numerical instability. Please use bfloat16 or float32 instead.", -} - - -def _is_valid_dtype(model_type: str, dtype: torch.dtype): - if model_type in _FLOAT16_NOT_SUPPORTED_MODELS and dtype == torch.float16: # noqa: E501, SIM103 - return False - - return True - - -def _check_valid_dtype(model_type: str, dtype: torch.dtype): - if model_type in _FLOAT16_NOT_SUPPORTED_MODELS and dtype == torch.float16: - reason = _FLOAT16_NOT_SUPPORTED_MODELS[model_type] - raise ValueError(f"The model type {model_type!r} " - f"does not support float16. Reason: {reason}") - - return True - - -def _find_dtype( - model_id: str, - config: PretrainedConfig, - *, - revision: Optional[str], -): - # NOTE: getattr(config, "torch_dtype", torch.float32) is not correct - # because config.torch_dtype can be None. - config_dtype = getattr(config, "torch_dtype", None) - - # Fallbacks for multi-modal models if the root config - # does not define torch_dtype - if config_dtype is None: - config_dtype = getattr(config.get_text_config(), "torch_dtype", None) - if config_dtype is None and hasattr(config, "vision_config"): - config_dtype = getattr(config.vision_config, "torch_dtype", None) - if config_dtype is None and hasattr(config, "encoder_config"): - config_dtype = getattr(config.encoder_config, "torch_dtype", None) - - # Try to read the dtype of the weights if they are in safetensors format - if config_dtype is None: - repo_mt = try_get_safetensors_metadata(model_id, revision=revision) - - if repo_mt and (files_mt := repo_mt.files_metadata): - param_dtypes: set[torch.dtype] = { - _SAFETENSORS_TO_TORCH_DTYPE[dtype_str] - for file_mt in files_mt.values() - for dtype_str in file_mt.parameter_count - if dtype_str in _SAFETENSORS_TO_TORCH_DTYPE - } - - if param_dtypes: - return common_broadcastable_dtype(param_dtypes) - - if config_dtype is None: - config_dtype = torch.float32 - - return config_dtype - - -def _resolve_auto_dtype( - model_type: str, - config_dtype: torch.dtype, - *, - is_pooling_model: bool, -): - from vllm.platforms import current_platform - - supported_dtypes = [ - dtype for dtype in current_platform.supported_dtypes - if _is_valid_dtype(model_type, dtype) - ] - - if is_pooling_model and torch.float16 in supported_dtypes: - preferred_dtype = torch.float16 - else: - preferred_dtype = supported_dtypes[0] - - # Downcast for float32 models - if config_dtype == torch.float32: - config_dtype = preferred_dtype - - if config_dtype in supported_dtypes: - return config_dtype - - # Ensure device compatibility - device_name = current_platform.get_device_name() - device_capability = current_platform.get_device_capability() - - if device_capability is None: - device_str = f"{device_name!r}" - else: - version_str = device_capability.as_version_str() - device_str = f"{device_name!r} (with compute capability {version_str})" - - logger.warning( - "Your device %s doesn't support %s. " - "Falling back to %s for compatibility.", - device_str, - config_dtype, - preferred_dtype, - ) - - return preferred_dtype - - -def _get_and_verify_dtype( - model_id: str, - config: PretrainedConfig, - dtype: Union[str, torch.dtype], - *, - is_pooling_model: bool, - revision: Optional[str] = None, -) -> torch.dtype: - config_dtype = _find_dtype(model_id, config, revision=revision) - model_type = config.model_type - - if isinstance(dtype, str): - dtype = dtype.lower() - if dtype == "auto": - # Set default dtype from model config - torch_dtype = _resolve_auto_dtype( - model_type, - config_dtype, - is_pooling_model=is_pooling_model, - ) - else: - if dtype not in _STR_DTYPE_TO_TORCH_DTYPE: - raise ValueError(f"Unknown dtype: {dtype!r}") - torch_dtype = _STR_DTYPE_TO_TORCH_DTYPE[dtype] - elif isinstance(dtype, torch.dtype): - torch_dtype = dtype - else: - raise ValueError(f"Unknown dtype: {dtype}") - - _check_valid_dtype(model_type, torch_dtype) - - if torch_dtype != config_dtype: - if torch_dtype == torch.float32: - # Upcasting to float32 is allowed. - logger.info("Upcasting %s to %s.", config_dtype, torch_dtype) - elif config_dtype == torch.float32: - # Downcasting from float32 to float16 or bfloat16 is allowed. - logger.info("Downcasting %s to %s.", config_dtype, torch_dtype) - else: - # Casting between float16 and bfloat16 is allowed with a warning. - logger.warning("Casting %s to %s.", config_dtype, torch_dtype) - - return torch_dtype - - -def _get_head_dtype(config: PretrainedConfig, dtype: torch.dtype, - runner_type: str) -> torch.dtype: - head_dtype: Optional[Union[str, - torch.dtype]] = getattr(config, "head_dtype", - None) - - if head_dtype == "model": - return dtype - elif isinstance(head_dtype, str): - head_dtype = head_dtype.lower() - if head_dtype not in _STR_DTYPE_TO_TORCH_DTYPE: - raise ValueError(f"Unknown dtype: {head_dtype!r}") - return _STR_DTYPE_TO_TORCH_DTYPE[head_dtype] - elif isinstance(head_dtype, torch.dtype): - return head_dtype - elif head_dtype is None: - if torch.float32 not in current_platform.supported_dtypes: - return dtype - if runner_type == "pooling": - return torch.float32 - return dtype - else: - raise ValueError(f"Unknown dtype: {head_dtype}") - - -def _get_and_verify_max_len( - hf_config: PretrainedConfig, - tokenizer_config: Optional[dict], - max_model_len: Optional[int], - disable_sliding_window: bool, - sliding_window: Optional[int], - spec_target_max_model_len: Optional[int] = None, - encoder_config: Optional[Any] = None, -) -> int: - """Get and verify the model's maximum length.""" - derived_max_model_len = float("inf") - possible_keys = [ - # OPT - "max_position_embeddings", - # GPT-2 - "n_positions", - # MPT - "max_seq_len", - # ChatGLM2 - "seq_length", - # Command-R - "model_max_length", - # Whisper - "max_target_positions", - # Others - "max_sequence_length", - "max_seq_length", - "seq_len", - ] - # Choose the smallest "max_length" from the possible keys - max_len_key = None - for key in possible_keys: - max_len = getattr(hf_config, key, None) - if max_len is not None: - max_len_key = key if max_len < derived_max_model_len \ - else max_len_key - derived_max_model_len = min(derived_max_model_len, max_len) - # For Command-R / Cohere, Cohere2 / Aya Vision models - if tmp_max_len := getattr(hf_config, "model_max_length", None): - max_len_key = "model_max_length" - derived_max_model_len = tmp_max_len - - # If sliding window is manually disabled, max_length should be less - # than the sliding window length in the model config. - if (disable_sliding_window and sliding_window is not None - and sliding_window < derived_max_model_len): - max_len_key = "sliding_window" - derived_max_model_len = sliding_window - - # Consider model_max_length in tokenizer_config - if tokenizer_config: - tokenizer_model_max_length = tokenizer_config.get( - "model_max_length", derived_max_model_len) - derived_max_model_len = min(derived_max_model_len, - tokenizer_model_max_length) - - # If none of the keys were found in the config, use a default and - # log a warning. - if derived_max_model_len == float("inf"): - if max_model_len is not None: - # If max_model_len is specified, we use it. - return max_model_len - - if spec_target_max_model_len is not None: - # If this is a speculative draft model, we use the max model len - # from the target model. - return spec_target_max_model_len - - default_max_len = 2048 - logger.warning( - "The model's config.json does not contain any of the following " - "keys to determine the original maximum length of the model: " - "%s. Assuming the model's maximum length is %d.", possible_keys, - default_max_len) - derived_max_model_len = default_max_len - - rope_scaling = getattr(hf_config, "rope_scaling", None) - # NOTE(woosuk): Gemma3's max_model_len (128K) is already scaled by RoPE - # scaling, so we skip applying the scaling factor again. - if rope_scaling is not None and "gemma3" not in hf_config.model_type: - # No need to consider "type" key because of patch_rope_scaling when - # loading HF config - rope_type = rope_scaling["rope_type"] - - if rope_type not in ("su", "longrope", "llama3"): - if disable_sliding_window: - # TODO(robertgshaw): Find a model that supports rope_scaling - # with sliding window to see if this case should be allowed. - raise NotImplementedError( - "Disabling sliding window is not supported for models " - "with rope_scaling. Please raise an issue so we can " - "investigate.") - - # NOTE: rope_type == "default" does not define factor - # https://github.com/huggingface/transformers/blob/v4.45.2/src/transformers/modeling_rope_utils.py - scaling_factor = rope_scaling.get("factor", 1.0) - - if rope_type == "yarn": - derived_max_model_len = rope_scaling[ - "original_max_position_embeddings"] - derived_max_model_len *= scaling_factor - - if encoder_config and "max_seq_length" in encoder_config: - derived_max_model_len = encoder_config["max_seq_length"] - - # If the user specified a max length, make sure it is smaller than the - # derived length from the HF model config. - if max_model_len is None: - max_model_len = int(derived_max_model_len) - if current_platform.is_tpu(): - logger.warning( - "--max-model-len is not specified, " - "it's currently using model's default length %s, " - "which might be too large." - "Please input with --max-model-len based on your " - "request input length and output length, to avoid " - "unnecessary degradation.", max_model_len) - elif max_model_len > derived_max_model_len: - # Some models might have a separate key for specifying model_max_length - # that will be bigger than derived_max_model_len. We compare user input - # with model_max_length and allow this override when it's smaller. - model_max_length = getattr(hf_config, "model_max_length", None) - if model_max_length is not None and max_model_len <= model_max_length: - if disable_sliding_window: - # TODO(robertgshaw): Find a model that has model_max_length - # with sliding window to see if this case should be allowed. - raise NotImplementedError( - "Disabling sliding window is not supported for models " - "model_max_length in the config. Please raise an issue " - "so we can investigate.") - else: - msg = ( - f"User-specified max_model_len ({max_model_len}) is greater " - f"than the derived max_model_len ({max_len_key}=" - f"{derived_max_model_len} or model_max_length=" - f"{model_max_length} in model's config.json).") - warning = ( - "VLLM_ALLOW_LONG_MAX_MODEL_LEN must be used with extreme " - "caution. If the model uses relative position encoding (RoPE), " - "positions exceeding derived_max_model_len lead to nan. If the " - "model uses absolute position encoding, positions exceeding " - "derived_max_model_len will cause a CUDA array out-of-bounds " - "error.") - if envs.VLLM_ALLOW_LONG_MAX_MODEL_LEN: - logger.warning_once("%s %s", msg, warning) - else: - raise ValueError( - f"{msg} To allow overriding this maximum, set " - f"the env var VLLM_ALLOW_LONG_MAX_MODEL_LEN=1. {warning}") - return int(max_model_len) - - -def get_served_model_name(model: str, - served_model_name: Optional[Union[str, list[str]]]): - """ - If the input is a non-empty list, the first model_name in - `served_model_name` is taken. - If the input is a non-empty string, it is used directly. - For cases where the input is either an empty string or an - empty list, the fallback is to use `self.model`. - """ - if not served_model_name: - return model - if isinstance(served_model_name, list): - return served_model_name[0] - return served_model_name - - DetailedTraceModules = Literal["model", "worker", "all"] @@ -3012,33 +959,6 @@ def get_current_model_prefix() -> str: return _current_prefix -def contains_object_print(text): - """ - Check if the text looks like a printed Python object, e.g. - contains any substring matching the pattern: "at 0xFFFFFFF>" - We match against 0x followed by 2-16 hex chars (there's - a max of 16 on a 64-bit system). - - Args: - text (str): The text to check - - Returns: - result (bool): `True` if a match is found, `False` otherwise. - """ - pattern = r'at 0x[a-fA-F0-9]{2,16}>' - match = re.search(pattern, text) - return match is not None - - -def assert_hashable(text): - if not contains_object_print(text): - return True - raise AssertionError( - f"vLLM tried to hash some configs that may have Python objects ids " - f"in them. This is a bug, please file an issue. " - f"Text being hashed: {text}") - - T = TypeVar("T") diff --git a/vllm/config/model.py b/vllm/config/model.py new file mode 100644 index 000000000000..21457d3660a2 --- /dev/null +++ b/vllm/config/model.py @@ -0,0 +1,2006 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project + +import hashlib +import json +import warnings +from dataclasses import InitVar, field +from importlib.util import find_spec +from typing import (TYPE_CHECKING, Any, Callable, Literal, Optional, Union, + cast, get_args) + +import torch +from pydantic import (ConfigDict, SkipValidation, field_validator, + model_validator) +from pydantic.dataclasses import dataclass +from safetensors.torch import _TYPES as _SAFETENSORS_TO_TORCH_DTYPE +from typing_extensions import assert_never + +import vllm.envs as envs +from vllm.config.multimodal import (MMCacheType, MMEncoderTPMode, + MultiModalConfig) +from vllm.config.pooler import PoolerConfig +from vllm.config.utils import assert_hashable, config +from vllm.logger import init_logger +from vllm.platforms import current_platform +from vllm.transformers_utils.config import ( + ConfigFormat, get_config, get_hf_image_processor_config, + get_hf_text_config, get_pooling_config, + get_sentence_transformer_tokenizer_config, is_encoder_decoder, + is_interleaved, maybe_override_with_speculators_target_model, + try_get_generation_config, try_get_safetensors_metadata, + try_get_tokenizer_config, uses_mrope) +from vllm.transformers_utils.runai_utils import (ObjectStorageModel, + is_runai_obj_uri) +from vllm.transformers_utils.utils import maybe_model_redirect +from vllm.utils import (STR_DUAL_CHUNK_FLASH_ATTN_VAL, LayerBlockType, + LazyLoader, common_broadcastable_dtype) + +if TYPE_CHECKING: + from transformers import PretrainedConfig + + import vllm.model_executor.layers.quantization as me_quant + import vllm.model_executor.models as me_models + from vllm.config.load import LoadConfig + from vllm.config.parallel import ParallelConfig + from vllm.config.scheduler import RunnerType + from vllm.model_executor.layers.quantization import QuantizationMethods + from vllm.v1.sample.logits_processor import LogitsProcessor +else: + PretrainedConfig = Any + + me_quant = LazyLoader("model_executor", globals(), + "vllm.model_executor.layers.quantization") + me_models = LazyLoader("model_executor", globals(), + "vllm.model_executor.models") + LoadConfig = Any + ParallelConfig = Any + RunnerType = Any + QuantizationMethods = Any + LogitsProcessor = Any + +logger = init_logger(__name__) + +RunnerOption = Literal["auto", "generate", "pooling", "draft"] +ConvertType = Literal["none", "embed", "classify", "reward"] +ConvertOption = Literal["auto", ConvertType] +TaskOption = Literal["auto", "generate", "embedding", "embed", "classify", + "score", "reward", "transcription", "draft"] +_ResolvedTask = Literal["generate", "transcription", "encode", "embed", + "classify", "reward", "draft"] +TokenizerMode = Literal["auto", "slow", "mistral", "custom"] +ModelDType = Literal["auto", "half", "float16", "bfloat16", "float", "float32"] +LogprobsMode = Literal["raw_logits", "raw_logprobs", "processed_logits", + "processed_logprobs"] +HfOverrides = Union[dict[str, Any], Callable[[type], type]] +ModelImpl = Literal["auto", "vllm", "transformers", "terratorch"] + +_RUNNER_TASKS: dict[RunnerType, list[TaskOption]] = { + "generate": ["generate", "transcription"], + "pooling": ["embedding", "embed", "classify", "score", "reward"], + "draft": ["draft"], +} + +_RUNNER_CONVERTS: dict[RunnerType, list[ConvertType]] = { + "generate": [], + "pooling": ["embed", "classify", "reward"], + "draft": [], +} + + +@config +@dataclass(config=ConfigDict(arbitrary_types_allowed=True)) +class ModelConfig: + """Configuration for the model.""" + + model: str = "Qwen/Qwen3-0.6B" + """Name or path of the Hugging Face model to use. It is also used as the + content for `model_name` tag in metrics output when `served_model_name` is + not specified.""" + runner: RunnerOption = "auto" + """The type of model runner to use. Each vLLM instance only supports one + model runner, even if the same model can be used for multiple types.""" + convert: ConvertOption = "auto" + """Convert the model using adapters defined in + [vllm.model_executor.models.adapters][]. The most common use case is to + adapt a text generation model to be used for pooling tasks.""" + task: Optional[TaskOption] = None + """[DEPRECATED] The task to use the model for. If the model supports more + than one model runner, this is used to select which model runner to run. + + Note that the model may support other tasks using the same model runner. + """ + tokenizer: SkipValidation[str] = None # type: ignore + """Name or path of the Hugging Face tokenizer to use. If unspecified, model + name or path will be used.""" + tokenizer_mode: TokenizerMode = "auto" + """Tokenizer mode:\n + - "auto" will use the fast tokenizer if available.\n + - "slow" will always use the slow tokenizer.\n + - "mistral" will always use the tokenizer from `mistral_common`.\n + - "custom" will use --tokenizer to select the preregistered tokenizer.""" + trust_remote_code: bool = False + """Trust remote code (e.g., from HuggingFace) when downloading the model + and tokenizer.""" + dtype: Union[ModelDType, torch.dtype] = "auto" + """Data type for model weights and activations:\n + - "auto" will use FP16 precision for FP32 and FP16 models, and BF16 + precision for BF16 models.\n + - "half" for FP16. Recommended for AWQ quantization.\n + - "float16" is the same as "half".\n + - "bfloat16" for a balance between precision and range.\n + - "float" is shorthand for FP32 precision.\n + - "float32" for FP32 precision.""" + seed: Optional[int] = None + """Random seed for reproducibility. Initialized to None in V0, but + initialized to 0 in V1.""" + hf_config_path: Optional[str] = None + """Name or path of the Hugging Face config to use. If unspecified, model + name or path will be used.""" + allowed_local_media_path: str = "" + """Allowing API requests to read local images or videos from directories + specified by the server file system. This is a security risk. Should only + be enabled in trusted environments.""" + revision: Optional[str] = None + """The specific model version to use. It can be a branch name, a tag name, + or a commit id. If unspecified, will use the default version.""" + code_revision: Optional[str] = None + """The specific revision to use for the model code on the Hugging Face Hub. + It can be a branch name, a tag name, or a commit id. If unspecified, will + use the default version.""" + rope_scaling: dict[str, Any] = field(default_factory=dict) + """RoPE scaling configuration. For example, + `{"rope_type":"dynamic","factor":2.0}`.""" + rope_theta: Optional[float] = None + """RoPE theta. Use with `rope_scaling`. In some cases, changing the RoPE + theta improves the performance of the scaled model.""" + tokenizer_revision: Optional[str] = None + """The specific revision to use for the tokenizer on the Hugging Face Hub. + It can be a branch name, a tag name, or a commit id. If unspecified, will + use the default version.""" + max_model_len: SkipValidation[int] = None # type: ignore + """Model context length (prompt and output). If unspecified, will be + automatically derived from the model config. + + When passing via `--max-model-len`, supports k/m/g/K/M/G in human-readable + format. Examples:\n + - 1k -> 1000\n + - 1K -> 1024\n + - 25.6k -> 25,600""" + spec_target_max_model_len: Optional[int] = None + """Specify the maximum length for spec decoding draft models.""" + quantization: SkipValidation[Optional[QuantizationMethods]] = None + """Method used to quantize the weights. If `None`, we first check the + `quantization_config` attribute in the model config file. If that is + `None`, we assume the model weights are not quantized and use `dtype` to + determine the data type of the weights.""" + enforce_eager: bool = False + """Whether to always use eager-mode PyTorch. If True, we will disable CUDA + graph and always execute the model in eager mode. If False, we will use + CUDA graph and eager execution in hybrid for maximal performance and + flexibility.""" + max_seq_len_to_capture: int = 8192 + """Maximum sequence len covered by CUDA graphs. When a sequence has context + length larger than this, we fall back to eager mode. Additionally for + encoder-decoder models, if the sequence length of the encoder input is + larger than this, we fall back to the eager mode.""" + max_logprobs: int = 20 + """Maximum number of log probabilities to return when `logprobs` is + specified in `SamplingParams`. The default value comes the default for the + OpenAI Chat Completions API. -1 means no cap, i.e. all (output_length * + vocab_size) logprobs are allowed to be returned and it may cause OOM.""" + logprobs_mode: LogprobsMode = "raw_logprobs" + """Indicates the content returned in the logprobs and prompt_logprobs. + Supported mode: + 1) raw_logprobs, 2) processed_logprobs, 3) raw_logits, 4) processed_logits. + Raw means the values before applying any logit processors, like bad words. + Processed means the values after applying all processors, including + temperature and top_k/top_p. + """ + disable_sliding_window: bool = False + """Whether to disable sliding window. If True, we will disable the sliding + window functionality of the model, capping to sliding window size. If the + model does not support sliding window, this argument is ignored.""" + disable_cascade_attn: bool = False + """Disable cascade attention for V1. While cascade attention does not + change the mathematical correctness, disabling it could be useful for + preventing potential numerical issues. Note that even if this is set to + False, cascade attention will be only used when the heuristic tells that + it's beneficial.""" + skip_tokenizer_init: bool = False + """Skip initialization of tokenizer and detokenizer. Expects valid + `prompt_token_ids` and `None` for prompt from the input. The generated + output will contain token ids.""" + enable_prompt_embeds: bool = False + """If `True`, enables passing text embeddings as inputs via the + `prompt_embeds` key. Note that enabling this will double the time required + for graph compilation.""" + served_model_name: Optional[Union[str, list[str]]] = None + """The model name(s) used in the API. If multiple names are provided, the + server will respond to any of the provided names. The model name in the + model field of a response will be the first name in this list. If not + specified, the model name will be the same as the `--model` argument. Noted + that this name(s) will also be used in `model_name` tag content of + prometheus metrics, if multiple names provided, metrics tag will take the + first one.""" + use_async_output_proc: bool = True + """Whether to use async output processor.""" + config_format: Union[str, ConfigFormat] = "auto" + """The format of the model config to load:\n + - "auto" will try to load the config in hf format if available else it + will try to load in mistral format.\n + - "hf" will load the config in hf format.\n + - "mistral" will load the config in mistral format.""" + hf_token: Optional[Union[bool, str]] = None + """The token to use as HTTP bearer authorization for remote files . If + `True`, will use the token generated when running `huggingface-cli login` + (stored in `~/.huggingface`).""" + hf_overrides: HfOverrides = field(default_factory=dict) + """If a dictionary, contains arguments to be forwarded to the Hugging Face + config. If a callable, it is called to update the HuggingFace config.""" + logits_processor_pattern: Optional[str] = None + """Optional regex pattern specifying valid logits processor qualified names + that can be passed with the `logits_processors` extra completion argument. + Defaults to `None`, which allows no processors.""" + generation_config: str = "auto" + """The folder path to the generation config. Defaults to `"auto"`, the + generation config will be loaded from model path. If set to `"vllm"`, no + generation config is loaded, vLLM defaults will be used. If set to a folder + path, the generation config will be loaded from the specified folder path. + If `max_new_tokens` is specified in generation config, then it sets a + server-wide limit on the number of output tokens for all requests.""" + override_generation_config: dict[str, Any] = field(default_factory=dict) + """Overrides or sets generation config. e.g. `{"temperature": 0.5}`. If + used with `--generation-config auto`, the override parameters will be + merged with the default config from the model. If used with + `--generation-config vllm`, only the override parameters are used.""" + enable_sleep_mode: bool = False + """Enable sleep mode for the engine (only cuda platform is supported).""" + model_impl: Union[str, ModelImpl] = "auto" + """Which implementation of the model to use:\n + - "auto" will try to use the vLLM implementation, if it exists, and fall + back to the Transformers implementation if no vLLM implementation is + available.\n + - "vllm" will use the vLLM model implementation.\n + - "transformers" will use the Transformers model implementation.\n + - "terratorch" will use the TerraTorch model implementation. + """ + override_attention_dtype: Optional[str] = None + """Override dtype for attention""" + logits_processors: Optional[list[Union[str, type[LogitsProcessor]]]] = None + """One or more logits processors' fully-qualified class names or class + definitions""" + io_processor_plugin: Optional[str] = None + """IOProcessor plugin name to load at model startup""" + + # Pooler config + pooler_config: Optional[PoolerConfig] = None + """Pooler config which controls the behaviour of output pooling in pooling + models.""" + override_pooler_config: Optional[Union[dict, PoolerConfig]] = None + """[DEPRECATED] Use `pooler_config` instead. This field will be removed in + v0.12.0 or v1.0.0, whichever is sooner.""" + + # Multimodal config and init vars + multimodal_config: Optional[MultiModalConfig] = None + """Configuration for multimodal model. If `None`, this will be inferred + from the architecture of `self.model`.""" + limit_mm_per_prompt: InitVar[Optional[dict[str, int]]] = None + media_io_kwargs: InitVar[Optional[dict[str, dict[str, Any]]]] = None + mm_processor_kwargs: InitVar[Optional[dict[str, Any]]] = None + mm_processor_cache_gb: InitVar[Optional[float]] = None + mm_processor_cache_type: InitVar[Optional[MMCacheType]] = None + mm_shm_cache_max_object_size_mb: InitVar[Optional[int]] = None + mm_encoder_tp_mode: InitVar[Optional[MMEncoderTPMode]] = None + interleave_mm_strings: InitVar[Optional[bool]] = None + skip_mm_profiling: InitVar[Optional[bool]] = None + + def compute_hash(self) -> str: + """ + WARNING: Whenever a new field is added to this config, + ensure that it is included in the factors list if + it affects the computation graph. + + Provide a hash that uniquely identifies all the configs + that affect the structure of the computation + graph from input ids/embeddings to the final hidden states, + excluding anything before input ids/embeddings and after + the final hidden states. + """ + factors: list[Any] = [] + factors.append(self.model) + factors.append(self.dtype) + factors.append(self.quantization) + factors.append(self.revision) + factors.append(self.code_revision) + factors.append(self.max_model_len) + factors.append(self.max_logprobs) + factors.append(self.disable_sliding_window) + factors.append(self.trust_remote_code) + factors.append(self.generation_config) + factors.append(self.model_impl) + factors.append(self.override_generation_config) + factors.append(self.rope_scaling) + factors.append(self.rope_theta) + # hf_config can control how the model looks! + factors.append(self.hf_config.to_json_string()) + str_factors = str(factors) + assert_hashable(str_factors) + return hashlib.sha256(str(factors).encode()).hexdigest() + + def __post_init__( + self, + # Multimodal config init vars + limit_mm_per_prompt: Optional[dict[str, int]], + media_io_kwargs: Optional[dict[str, dict[str, Any]]], + mm_processor_kwargs: Optional[dict[str, Any]], + mm_processor_cache_gb: Optional[float], + mm_processor_cache_type: Optional[MMCacheType], + mm_shm_cache_max_object_size_mb: Optional[int], + mm_encoder_tp_mode: Optional[MMEncoderTPMode], + interleave_mm_strings: Optional[bool], + skip_mm_profiling: Optional[bool]) -> None: + # Set the default seed to 0 in V1. + # NOTE(woosuk): In V0, we set the default seed to None because the + # driver worker shares the same process as the user process, and thus + # setting a seed affects the user process as well. + # In V1, we use separate processes for workers (unless + # VLLM_ENABLE_V1_MULTIPROCESSING=0), so setting a seed here + # doesn't affect the user process. However, without a consistent seed, + # different tensor parallel workers would sample different tokens, + # leading to inconsistent results. + if envs.VLLM_USE_V1 and self.seed is None: + self.seed = 0 + if not envs.VLLM_ENABLE_V1_MULTIPROCESSING: + logger.warning( + "The global random seed is set to %d. Since " + "VLLM_ENABLE_V1_MULTIPROCESSING is set to False, this may " + "affect the random state of the Python process that " + "launched vLLM.", self.seed) + + # Keep set served_model_name before maybe_model_redirect(self.model) + self.served_model_name = get_served_model_name(self.model, + self.served_model_name) + self.model = maybe_model_redirect(self.model) + # The tokenizer is consistent with the model by default. + if self.tokenizer is None: + self.tokenizer = self.model + if self.tokenizer_revision is None: + self.tokenizer_revision = self.revision + self.tokenizer = maybe_model_redirect(self.tokenizer) + + if isinstance(self.hf_config_path, str): + self.hf_config_path = maybe_model_redirect(self.hf_config_path) + + if callable(self.hf_overrides): + hf_overrides_kw = {} + hf_overrides_fn = self.hf_overrides + else: + hf_overrides_kw = self.hf_overrides + hf_overrides_fn = None + + if self.rope_scaling: + hf_override: dict[str, Any] = {"rope_scaling": self.rope_scaling} + hf_overrides_kw.update(hf_override) + hf_overrides_str = json.dumps(hf_overrides_kw) + msg = ( + "`--rope-scaling` will be removed in a future release. " + f"'Please instead use `--hf-overrides '{hf_overrides_str}'`") + warnings.warn(DeprecationWarning(msg), stacklevel=2) + if self.rope_theta is not None: + hf_override = {"rope_theta": self.rope_theta} + hf_overrides_kw.update(hf_override) + hf_overrides_str = json.dumps(hf_overrides_kw) + msg = ( + "`--rope-theta` will be removed in a future release. " + f"'Please instead use `--hf-overrides '{hf_overrides_str}'`") + warnings.warn(DeprecationWarning(msg), stacklevel=2) + + self.maybe_pull_model_tokenizer_for_runai(self.model, self.tokenizer) + + if self.runner != "draft": + # If we're not running the draft model, check for speculators config + # If speculators config, set model / tokenizer to be target model + self.model, self.tokenizer = maybe_override_with_speculators_target_model( # noqa: E501 + model=self.model, + tokenizer=self.tokenizer, + revision=self.revision, + trust_remote_code=self.trust_remote_code) + + if (backend := envs.VLLM_ATTENTION_BACKEND + ) and backend == "FLASHINFER" and find_spec("flashinfer") is None: + raise ValueError( + "VLLM_ATTENTION_BACKEND is set to FLASHINFER, but flashinfer " + "module was not found. See " + "https://github.com/vllm-project/vllm/blob/main/docker/Dockerfile " # noqa: E501 + "for instructions on how to install it.") + + from vllm.platforms import current_platform + + if (self.override_attention_dtype is not None + and not current_platform.is_rocm()): + warnings.warn( + "override-attention-dtype is set but not using ROCm platform", + stacklevel=2) + + if (self.enable_sleep_mode + and not current_platform.is_sleep_mode_available()): + raise ValueError( + "Sleep mode is not supported on current platform.") + + hf_config = get_config(self.hf_config_path or self.model, + self.trust_remote_code, + self.revision, + self.code_revision, + self.config_format, + hf_overrides_kw=hf_overrides_kw, + hf_overrides_fn=hf_overrides_fn) + + self.hf_config = hf_config + self.hf_text_config = get_hf_text_config(self.hf_config) + self.attention_chunk_size = getattr(self.hf_text_config, + "attention_chunk_size", None) + self.encoder_config = self._get_encoder_config() + self.hf_image_processor_config = get_hf_image_processor_config( + self.model, hf_token=self.hf_token, revision=self.revision) + + architectures = self.architectures + registry = self.registry + is_generative_model = registry.is_text_generation_model( + architectures, self) + is_pooling_model = registry.is_pooling_model(architectures, self) + + def _task_to_convert(task: TaskOption) -> ConvertType: + if task == "embedding" or task == "embed": + return "embed" + if task == "classify": + return "classify" + if task == "reward": + return "reward" + if task == "score": + new_task = self._get_default_pooling_task(architectures) + return "classify" if new_task == "classify" else "embed" + + return "none" + + if self.task is not None: + runner: RunnerOption = "auto" + convert: ConvertOption = "auto" + msg_prefix = ("The 'task' option has been deprecated and will be " + "removed in v0.13.0 or v1.0, whichever comes first.") + msg_hint = "Please remove this option." + + is_generative_task = self.task in _RUNNER_TASKS["generate"] + is_pooling_task = self.task in _RUNNER_TASKS["pooling"] + + if is_generative_model and is_pooling_model: + if is_generative_task: + runner = "generate" + convert = "auto" + msg_hint = ("Please replace this option with `--runner " + "generate` to continue using this model " + "as a generative model.") + elif is_pooling_task: + runner = "pooling" + convert = "auto" + msg_hint = ("Please replace this option with `--runner " + "pooling` to continue using this model " + "as a pooling model.") + else: # task == "auto" + pass + elif is_generative_model or is_pooling_model: + if is_generative_task: + runner = "generate" + convert = "auto" + msg_hint = "Please remove this option" + elif is_pooling_task: + runner = "pooling" + convert = _task_to_convert(self.task) + msg_hint = ("Please replace this option with `--convert " + f"{convert}` to continue using this model " + "as a pooling model.") + else: # task == "auto" + pass + else: + raise AssertionError("The model should be a generative or " + "pooling model when task is set to " + f"{self.task!r}.") + + self.runner = runner + self.convert = convert + + msg = f"{msg_prefix} {msg_hint}" + warnings.warn(msg, DeprecationWarning, stacklevel=2) + + self.runner_type = self._get_runner_type(architectures, self.runner) + self.convert_type = self._get_convert_type(architectures, + self.runner_type, + self.convert) + + if self.runner_type == "generate" and not is_generative_model: + generate_converts = _RUNNER_CONVERTS["generate"] + if self.convert_type not in generate_converts: + # Currently we don't have any converters for generative models + raise ValueError( + "This model does not support `--runner generate`.") + if self.runner_type == "pooling" and not is_pooling_model: + pooling_converts = _RUNNER_CONVERTS["pooling"] + if self.convert_type not in pooling_converts: + convert_option = "<" + "|".join(pooling_converts) + ">" + raise ValueError( + "This model does not support `--runner pooling`. " + f"You can pass `--convert {convert_option} to adapt " + "it into a pooling model.") + + self.supported_tasks = self._get_supported_tasks( + architectures, self.runner_type, self.convert_type) + + # Note: Initialize these attributes early because transformers fallback + # may fail to load dynamic modules in child processes + model_info, arch = registry.inspect_model_cls(architectures, self) + self._model_info = model_info + self._architecture = arch + logger.info("Resolved architecture: %s", arch) + + # Init pooler config if needed + if self.runner_type == "pooling": + if self.override_pooler_config is not None: + logger.warning_once( + "`override_pooler_config` is deprecated and will be " + "removed in v0.12.0 or v1.0.0, whichever is sooner. " + "Please use `pooler_config` instead.") + + if isinstance(self.override_pooler_config, dict): + self.pooler_config = PoolerConfig( + **self.override_pooler_config) + else: + self.pooler_config = self.override_pooler_config + + if self.pooler_config is None: + self.pooler_config = PoolerConfig() + + base_config = get_pooling_config(self.model, self.revision) + if base_config is not None: + # Only set values that are not overridden by the user + for k, v in base_config.items(): + if getattr(self.pooler_config, k) is None: + setattr(self.pooler_config, k, v) + + default_pooling_type = self._model_info.default_pooling_type + if self.pooler_config.pooling_type is None: + self.pooler_config.pooling_type = default_pooling_type + + self.dtype: torch.dtype = _get_and_verify_dtype( + self.model, + self.hf_config, + self.dtype, + is_pooling_model=self.runner_type == "pooling", + revision=self.revision, + ) + + # Interleaved attention is not supported by some backends in V0 + if (not self.disable_sliding_window + and is_interleaved(self.hf_text_config) + and not envs.VLLM_USE_V1 + and (backend := envs.VLLM_ATTENTION_BACKEND) + in ("XFORMERS", "FLASHINFER")): + logger.warning_once( + "%s has interleaved attention, which is currently not " + "supported by the %s backend. Disabling sliding window and " + "capping the max length to the sliding window size (%d).", + self.hf_text_config.model_type, + backend, + self.hf_text_config.sliding_window, + ) + self.disable_sliding_window = True + + self.original_max_model_len = self.max_model_len + self.max_model_len = self.get_and_verify_max_len(self.max_model_len) + # Init multimodal config if needed + if self._model_info.supports_multimodal: + if (mm_encoder_tp_mode == "data" and + not self._model_info.supports_multimodal_encoder_tp_data): + logger.warning_once( + "This model does not support `--mm-encoder-tp-mode data`. " + "Falling back to `--mm-encoder-tp-mode weights`.") + mm_encoder_tp_mode = "weights" + + mm_config_kwargs = dict( + limit_per_prompt=limit_mm_per_prompt, + media_io_kwargs=media_io_kwargs, + mm_processor_kwargs=mm_processor_kwargs, + mm_processor_cache_gb=mm_processor_cache_gb, + mm_processor_cache_type=mm_processor_cache_type, + mm_shm_cache_max_object_size_mb=mm_shm_cache_max_object_size_mb, + mm_encoder_tp_mode=mm_encoder_tp_mode, + interleave_mm_strings=interleave_mm_strings, + skip_mm_profiling=skip_mm_profiling, + ) + + mm_config_kwargs = { + k: v + for k, v in mm_config_kwargs.items() if v is not None + } + + self.multimodal_config = MultiModalConfig(**mm_config_kwargs) + + if self.disable_sliding_window: + # Set after get_and_verify_max_len to ensure that max_model_len + # can be correctly capped to sliding window size + self.hf_text_config.sliding_window = None + + if not self.skip_tokenizer_init: + self._verify_tokenizer_mode() + + # Avoid running try_verify_and_update_config multiple times + self.config_updated = False + + self._verify_quantization() + self._verify_cuda_graph() + self._verify_bnb_config() + + @field_validator("quantization", mode="before") + @classmethod + def validate_quantization_before(cls, value: Any) -> Any: + if isinstance(value, str): + return value.lower() + return value + + @model_validator(mode="after") + def validate_model_config_after(self: "ModelConfig") -> "ModelConfig": + if not isinstance(self.tokenizer, str): + raise ValueError("tokenizer must be a string after __post_init__.") + if not isinstance(self.max_model_len, int): + raise ValueError( + "max_model_len must be an integer after __post_init__.") + return self + + def _get_transformers_backend_cls(self) -> str: + """Determine which Transformers backend class will be used if + `model_impl` is set to `transformers` or `auto`.""" + if getattr(self, "runner_type", self.runner) == "pooling": + return "TransformersModel" + if self.hf_config != self.hf_text_config: + # If 'hf_text_config' is the same as 'hf_config'. If not, it is + # probably a composite config, i.e. multimodal + return "TransformersForMultimodalLM" + return "TransformersForCausalLM" + + def using_transformers_backend(self) -> bool: + """Check if the model is using the Transformers backend class.""" + return self.architecture == self._get_transformers_backend_cls() + + @property + def registry(self): + return me_models.ModelRegistry + + @property + def architectures(self) -> list[str]: + return getattr(self.hf_config, "architectures", []) + + @property + def architecture(self) -> str: + """The architecture vllm actually used.""" + return self._architecture + + def maybe_pull_model_tokenizer_for_runai(self, model: str, + tokenizer: str) -> None: + """Pull model/tokenizer from Object Storage to temporary + directory when needed. + + Args: + model: Model name or path + tokenizer: Tokenizer name or path + """ + if not (is_runai_obj_uri(model) or is_runai_obj_uri(tokenizer)): + return + + if is_runai_obj_uri(model): + object_storage_model = ObjectStorageModel() + object_storage_model.pull_files( + model, allow_pattern=["*.model", "*.py", "*.json"]) + self.model_weights = model + self.model = object_storage_model.dir + + # If tokenizer is same as model, download to same directory + if model == tokenizer: + object_storage_model.pull_files(model, + ignore_pattern=[ + "*.pt", "*.safetensors", + "*.bin", "*.tensors", + "*.pth" + ]) + self.tokenizer = object_storage_model.dir + return + + # Only download tokenizer if needed and not already handled + if is_runai_obj_uri(tokenizer): + object_storage_tokenizer = ObjectStorageModel() + object_storage_tokenizer.pull_files(model, + ignore_pattern=[ + "*.pt", "*.safetensors", + "*.bin", "*.tensors", + "*.pth" + ]) + self.tokenizer = object_storage_tokenizer.dir + + def _get_encoder_config(self): + return get_sentence_transformer_tokenizer_config( + self.model, self.revision) + + def _verify_tokenizer_mode(self) -> None: + tokenizer_mode = cast(TokenizerMode, self.tokenizer_mode.lower()) + if tokenizer_mode not in get_args(TokenizerMode): + raise ValueError( + f"Unknown tokenizer mode: {self.tokenizer_mode}. Must be " + f"one of {get_args(TokenizerMode)}.") + self.tokenizer_mode = tokenizer_mode + + def _get_default_runner_type( + self, + architectures: list[str], + ) -> RunnerType: + registry = self.registry + + # Some Sentence Transformers models use *ForCausalLM archs + if get_pooling_config(self.model, self.revision): + return "pooling" + + for arch in architectures: + if arch in registry.get_supported_archs(): + if registry.is_pooling_model(architectures, self): + return "pooling" + if registry.is_text_generation_model(architectures, self): + return "generate" + + match = try_match_architecture_defaults(arch) + if match: + _, (runner_type, _) = match + return runner_type + + return "generate" + + def _get_runner_type( + self, + architectures: list[str], + runner: RunnerOption, + ) -> RunnerType: + if runner != "auto": + return runner + + runner_type = self._get_default_runner_type(architectures) + + # Don't log the most common case + if runner_type != "generate": + logger.info( + "Resolved `--runner auto` to `--runner %s`. " + "Pass the value explicitly to silence this message.", + runner_type) + + return runner_type + + def _get_default_convert_type( + self, + architectures: list[str], + runner_type: RunnerType, + ) -> ConvertType: + registry = self.registry + + for arch in architectures: + if arch in registry.get_supported_archs(): + if (runner_type == "generate" + and registry.is_text_generation_model( + architectures, self)): + return "none" + if (runner_type == "pooling" + and registry.is_pooling_model(architectures, self)): + return "none" + + match = try_match_architecture_defaults(arch, + runner_type=runner_type) + if match: + _, (_, convert_type) = match + return convert_type + + # This is to handle Sentence Transformers models that use *ForCausalLM + # and also multi-modal pooling models which are not defined as + # Sentence Transformers models + if runner_type == "pooling": + return "embed" + + return "none" + + def _get_convert_type( + self, + architectures: list[str], + runner_type: RunnerType, + convert: ConvertOption, + ) -> ConvertType: + if convert != "auto": + return convert + + convert_type = self._get_default_convert_type(architectures, + runner_type) + + # Don't log the most common case + if convert_type != "none": + logger.info( + "Resolved `--convert auto` to `--convert %s`. " + "Pass the value explicitly to silence this message.", + convert_type) + + return convert_type + + def _get_supported_generation_tasks( + self, + architectures: list[str], + convert_type: ConvertType, + ) -> list[_ResolvedTask]: + registry = self.registry + + if registry.is_transcription_only_model(architectures, self): + return ["transcription"] + + # TODO: Use get_supported_generation_tasks once V0 is removed + supported_tasks = list[_ResolvedTask]() + if (registry.is_text_generation_model(architectures, self) + or convert_type in _RUNNER_CONVERTS["generate"]): + supported_tasks.append("generate") + + if registry.is_transcription_model(architectures, self): + supported_tasks.append("transcription") + + return supported_tasks + + def _get_default_pooling_task( + self, + architectures: list[str], + ) -> Literal["embed", "classify", "reward"]: + if self.registry.is_cross_encoder_model(architectures, self): + return "classify" + + for arch in architectures: + match = try_match_architecture_defaults(arch, + runner_type="pooling") + if match: + _, (_, convert_type) = match + assert convert_type != "none" + return convert_type + + return "embed" + + def _get_supported_pooling_tasks( + self, + architectures: list[str], + convert_type: ConvertType, + ) -> list[_ResolvedTask]: + registry = self.registry + + # TODO: Use get_supported_pooling_tasks once V0 is removed + supported_tasks = list[_ResolvedTask]() + if (registry.is_pooling_model(architectures, self) + or convert_type in _RUNNER_CONVERTS["pooling"]): + supported_tasks.append("encode") + + extra_task = (self._get_default_pooling_task(architectures) + if convert_type == "none" else convert_type) + supported_tasks.append(extra_task) + + return supported_tasks + + def _get_supported_tasks( + self, + architectures: list[str], + runner_type: RunnerType, + convert_type: ConvertType, + ) -> list[_ResolvedTask]: + if runner_type == "generate": + return self._get_supported_generation_tasks( + architectures, convert_type) + if runner_type == "pooling": + return self._get_supported_pooling_tasks(architectures, + convert_type) + if runner_type == "draft": + return ["draft"] + + assert_never(runner_type) + + def _parse_quant_hf_config(self, hf_config: PretrainedConfig): + quant_cfg = getattr(hf_config, "quantization_config", None) + if quant_cfg is None: + # compressed-tensors uses a "compression_config" key + quant_cfg = getattr(hf_config, "compression_config", None) + + else: + # Set quant_method for ModelOpt models. + producer_name = quant_cfg.get("producer", {}).get("name") + if producer_name == "modelopt": + quant_algo = quant_cfg.get("quantization", + {}).get("quant_algo") + if quant_algo == "FP8": + quant_cfg["quant_method"] = "modelopt" + elif quant_algo == "NVFP4": + quant_cfg["quant_method"] = "modelopt_fp4" + elif quant_algo is not None: + raise ValueError( + f"Unknown ModelOpt quant algo: {quant_algo}") + + return quant_cfg + + def _verify_quantization(self) -> None: + supported_quantization = me_quant.QUANTIZATION_METHODS + if self.quantization is not None: + self.quantization = cast(me_quant.QuantizationMethods, + self.quantization) + + # Parse quantization method from the HF model config, if available. + quant_cfg = self._parse_quant_hf_config(self.hf_config) + if quant_cfg is None and (text_config := getattr( + self.hf_config, "text_config", None)): + # Check the text config as well for multi-modal models. + quant_cfg = self._parse_quant_hf_config(text_config) + + if quant_cfg is not None: + # Use the community standard 'quant_method' + quant_method = quant_cfg.get("quant_method", "").lower() + + # Normalize library names + quant_method = quant_method.replace("compressed_tensors", + "compressed-tensors") + + quant_cfg["quant_method"] = quant_method + + # Quantization methods which are overrides (i.e. they have a + # `override_quantization_method` method) must be checked in order + # of preference (this is particularly important for GPTQ). + overrides = [ + "bitblas", + "gptq_marlin_24", + "gptq_marlin", + "gptq_bitblas", + "awq_marlin", + "ipex", + "moe_wna16", + "modelopt", + "modelopt_fp4", + "petit_nvfp4", + ] + quantization_methods = [ + q for q in supported_quantization if q not in overrides + ] + # Any custom overrides will be in quantization_methods so we place + # them at the start of the list so custom overrides have preference + # over the built-in ones. + quantization_methods = quantization_methods + overrides + + # Detect which checkpoint is it + for name in quantization_methods: + method = me_quant.get_quantization_config(name) + quantization_override = method.override_quantization_method( + quant_cfg, self.quantization) + if quantization_override is not None: + # Raise error if the override is not custom (custom would + # be in QUANTIZATION_METHODS but not QuantizationMethods) + # and hasn't been added to the overrides list. + if (name in get_args(me_quant.QuantizationMethods) + and name not in overrides): + raise ValueError( + f"Quantization method {name} is an override but " + "is has not been added to the `overrides` list " + "above. This is necessary to ensure that the " + "overrides are checked in order of preference.") + quant_method = quantization_override + self.quantization = quantization_override + break + + # Verify quantization configurations. + if self.quantization is None: + self.quantization = quant_method + elif self.quantization != quant_method: + raise ValueError( + "Quantization method specified in the model config " + f"({quant_method}) does not match the quantization " + f"method specified in the `quantization` argument " + f"({self.quantization}).") + + if self.quantization is not None: + if self.quantization not in supported_quantization: + raise ValueError( + f"Unknown quantization method: {self.quantization}. Must " + f"be one of {supported_quantization}.") + from vllm.platforms import current_platform + current_platform.verify_quantization(self.quantization) + + def _verify_cuda_graph(self) -> None: + # The `max_seq_len_to_capture` was incorrectly + # based on the encoder's input length (448) + # but not the decoder's larger input length (1500). + # This change ensures the CUDA Graph captures the correct, + # larger sequence length, allowing it to work as intended. + effective_max_seq_len = self.max_model_len + if self.is_encoder_decoder: + effective_max_seq_len = max( + effective_max_seq_len, + getattr(self.hf_config, "max_source_positions", 0)) + self.max_seq_len_to_capture = min(self.max_seq_len_to_capture, + effective_max_seq_len) + # CUDAGraph capture not supported for encoder-decoder models on ROCm + unsupported_rocm = self.is_encoder_decoder + + if (unsupported_rocm and not self.enforce_eager + and current_platform.is_rocm()): + logger.warning( + "CUDA graph is not supported for %s on ROCm yet, fallback " + "to eager mode.", self.hf_config.model_type) + self.enforce_eager = True + + def _verify_bnb_config(self) -> None: + """ + The current version of bitsandbytes (0.46.1) with 8-bit models does not + yet support CUDA graph. + # TODO Remove this when bitsandbytes supports. + """ + is_bitsandbytes = self.quantization == "bitsandbytes" + has_quantization_config = (getattr(self.hf_config, + "quantization_config", None) + is not None) + is_8bit = (self.hf_config.quantization_config.get( + "load_in_8bit", False) if has_quantization_config else False) + if all([ + is_bitsandbytes, + has_quantization_config, + is_8bit, + not self.enforce_eager, + ]): + logger.warning( + "CUDA graph is not supported on BitsAndBytes 8bit yet, " + "fallback to the eager mode.") + + self.enforce_eager = True + + def _verify_with_expert_parallelism(self) -> None: + num_expert_names = [ + "moe_num_experts", # Dbrx + "num_experts", # Jamba + "n_routed_experts", # DeepSeek + "num_local_experts", # Mixtral + ] + num_experts = 0 + for name in num_expert_names: + num_experts = getattr(self.hf_text_config, name, 0) + if num_experts > 0: + break + if num_experts < 1: + raise ValueError( + "Number of experts in the model must be greater than 0 " + "when expert parallelism is enabled.") + + def verify_dual_chunk_attention_config( + self, + load_config: LoadConfig, + ) -> None: + if hasattr(self.hf_config, "dual_chunk_attention_config"): + # Try loading the sparse attention config + from vllm.model_executor.model_loader.weight_utils import ( + get_sparse_attention_config) + sparse_attn_config = get_sparse_attention_config(self, load_config) + if sparse_attn_config: + self.hf_config.dual_chunk_attention_config[ + "sparse_attention_config"] = sparse_attn_config + if "sparse_attention_enabled" not in \ + self.hf_config.dual_chunk_attention_config: + self.hf_config.dual_chunk_attention_config[ + "sparse_attention_enabled"] = True + + if envs.VLLM_ATTENTION_BACKEND != STR_DUAL_CHUNK_FLASH_ATTN_VAL: + raise ValueError("please set VLLM_ATTENTION_BACKEND to " + f"{STR_DUAL_CHUNK_FLASH_ATTN_VAL}") + + def verify_async_output_proc(self, parallel_config, speculative_config, + device_config) -> None: + if not self.use_async_output_proc: + # Nothing to check + return + + if parallel_config.pipeline_parallel_size > 1: + self.use_async_output_proc = False + return + + # Reminder: Please update docs/features/compatibility_matrix.md + # If the feature combo become valid + from vllm.platforms import current_platform + if not current_platform.is_async_output_supported(self.enforce_eager): + self.use_async_output_proc = False + return + + if envs.VLLM_USE_RAY_SPMD_WORKER: + self.use_async_output_proc = False + return + + # Async postprocessor is not necessary for pooling models + # since there is no token generation + if self.runner_type == "pooling": + self.use_async_output_proc = False + + # Reminder: Please update docs/features/compatibility_matrix.md + # If the feature combo become valid + if speculative_config: + self.use_async_output_proc = False + + def verify_with_parallel_config( + self, + parallel_config: ParallelConfig, + ) -> None: + + if parallel_config.distributed_executor_backend == "external_launcher": + assert self.seed is not None, ( + "Seed must be set when using external launcher backend to " + "make sure sampling results are the same across workers.") + + total_num_attention_heads = getattr(self.hf_text_config, + "num_attention_heads", 0) + tensor_parallel_size = parallel_config.tensor_parallel_size + if total_num_attention_heads % tensor_parallel_size != 0: + raise ValueError( + f"Total number of attention heads ({total_num_attention_heads})" + " must be divisible by tensor parallel size " + f"({tensor_parallel_size}).") + + if parallel_config.enable_expert_parallel: + self._verify_with_expert_parallelism() + + pipeline_parallel_size = parallel_config.pipeline_parallel_size + if pipeline_parallel_size > 1: + if not self.registry.is_pp_supported_model(self.architectures, + self): + raise NotImplementedError( + "Pipeline parallelism is not supported for this model. " + "Supported models implement the `SupportsPP` interface.") + + if self.use_async_output_proc: + self.use_async_output_proc = False + + def get_sliding_window(self) -> Optional[int]: + """Get the sliding window size from the HF text config if present.""" + return getattr(self.hf_text_config, "sliding_window", None) + + def get_vocab_size(self) -> int: + return getattr(self.hf_text_config, "vocab_size", 0) + + def get_hidden_size(self) -> int: + return getattr(self.hf_text_config, "hidden_size", 0) + + @property + def is_deepseek_mla(self) -> bool: + if not hasattr(self.hf_text_config, "model_type"): + return False + elif self.hf_text_config.model_type in \ + ('deepseek_v2', 'deepseek_v3', 'deepseek_mtp', 'kimi_k2'): + return self.hf_text_config.kv_lora_rank is not None + elif self.hf_text_config.model_type == 'eagle': + # if the model is an EAGLE module, check for the + # underlying architecture + return self.hf_text_config.model.model_type in \ + ('deepseek_v2', 'deepseek_v3') \ + and self.hf_text_config.kv_lora_rank is not None + return False + + def get_head_size(self) -> int: + # TODO remove hard code + if self.is_deepseek_mla: + qk_rope_head_dim = getattr(self.hf_text_config, "qk_rope_head_dim", + 0) + if self.use_mla: + return self.hf_text_config.kv_lora_rank + qk_rope_head_dim + else: + qk_nope_head_dim = getattr(self.hf_text_config, + "qk_nope_head_dim", 0) + if qk_rope_head_dim and qk_nope_head_dim: + return qk_rope_head_dim + qk_nope_head_dim + + if hasattr(self.hf_text_config, + "model_type") and (self.hf_text_config.model_type + == "zamba2"): + return self.hf_text_config.attention_head_dim + + if self.is_attention_free: + return 0 + + # NOTE: Some configs may set head_dim=None in the config + if getattr(self.hf_text_config, "head_dim", None) is not None: + return self.hf_text_config.head_dim + + # NOTE: Some models (such as PLaMo2.1) use `hidden_size_per_head` + if getattr(self.hf_text_config, "hidden_size_per_head", + None) is not None: + return self.hf_text_config.hidden_size_per_head + + # FIXME(woosuk): This may not be true for all models. + return (self.hf_text_config.hidden_size // + self.hf_text_config.num_attention_heads) + + def get_total_num_kv_heads(self) -> int: + """Returns the total number of KV heads.""" + # For GPTBigCode & Falcon: + # NOTE: for falcon, when new_decoder_architecture is True, the + # multi_query flag is ignored and we use n_head_kv for the number of + # KV heads. + falcon_model_types = ["falcon", "RefinedWeb", "RefinedWebModel"] + new_decoder_arch_falcon = ( + self.hf_config.model_type in falcon_model_types + and getattr(self.hf_config, "new_decoder_architecture", False)) + if not new_decoder_arch_falcon and getattr(self.hf_text_config, + "multi_query", False): + # Multi-query attention, only one KV head. + # Currently, tensor parallelism is not supported in this case. + return 1 + + # For DBRX and MPT + if self.hf_config.model_type == "mpt": + if "kv_n_heads" in self.hf_config.attn_config: + return self.hf_config.attn_config["kv_n_heads"] + return self.hf_config.num_attention_heads + if self.hf_config.model_type == "dbrx": + return getattr(self.hf_config.attn_config, "kv_n_heads", + self.hf_config.num_attention_heads) + + if self.hf_config.model_type == "nemotron-nas": + for block in self.hf_config.block_configs: + if not block.attention.no_op: + return self.hf_config.num_attention_heads \ + // block.attention.n_heads_in_group + + raise RuntimeError("Couldn't determine number of kv heads") + + if self.is_attention_free: + return 0 + + attributes = [ + # For Falcon: + "n_head_kv", + "num_kv_heads", + # For LLaMA-2: + "num_key_value_heads", + # For ChatGLM: + "multi_query_group_num", + ] + for attr in attributes: + num_kv_heads = getattr(self.hf_text_config, attr, None) + if num_kv_heads is not None: + return num_kv_heads + + # For non-grouped-query attention models, the number of KV heads is + # equal to the number of attention heads. + return self.hf_text_config.num_attention_heads + + def get_num_kv_heads(self, parallel_config: ParallelConfig) -> int: + """Returns the number of KV heads per GPU.""" + if self.use_mla: + # When using MLA during decode it becomes MQA + return 1 + + total_num_kv_heads = self.get_total_num_kv_heads() + # If tensor parallelism is used, we divide the number of KV heads by + # the tensor parallel size. We will replicate the KV heads in the + # case where the number of KV heads is smaller than the tensor + # parallel size so each GPU has at least one KV head. + return max(1, + total_num_kv_heads // parallel_config.tensor_parallel_size) + + def get_num_attention_heads(self, parallel_config: ParallelConfig) -> int: + num_heads = getattr(self.hf_text_config, "num_attention_heads", 0) + return num_heads // parallel_config.tensor_parallel_size + + def get_layers_start_end_indices( + self, parallel_config: ParallelConfig) -> tuple[int, int]: + from vllm.distributed.utils import get_pp_indices + if (self.hf_text_config.model_type == "deepseek_mtp" + or self.hf_config.model_type == "mimo_mtp" + or self.hf_config.model_type == "glm4_moe_mtp" + or self.hf_config.model_type == "ernie_mtp" + or self.hf_config.model_type == "qwen3_next_mtp"): + total_num_hidden_layers = getattr(self.hf_text_config, + "num_nextn_predict_layers", 0) + else: + total_num_hidden_layers = getattr(self.hf_text_config, + "num_hidden_layers", 0) + # the layout order is: DP x PP x TP + pp_rank = (parallel_config.rank // parallel_config.tensor_parallel_size + ) % parallel_config.pipeline_parallel_size + pp_size = parallel_config.pipeline_parallel_size + start, end = get_pp_indices(total_num_hidden_layers, pp_rank, pp_size) + return start, end + + def get_num_layers(self, parallel_config: ParallelConfig) -> int: + start, end = self.get_layers_start_end_indices(parallel_config) + return end - start + + def get_num_layers_by_block_type( + self, + parallel_config: ParallelConfig, + block_type: LayerBlockType = LayerBlockType.attention, + ) -> int: + # This function relies on 'layers_block_type' in hf_config, + # for w/o this attribute, we will need to have workarounds like so + attn_block_type = block_type == LayerBlockType.attention + is_transformer = not self.is_hybrid and \ + not self.has_noops and \ + not self.is_attention_free + start, end = self.get_layers_start_end_indices(parallel_config) + + if is_transformer: + # Handle the basic case first + return end - start if attn_block_type else 0 + elif self.is_attention_free: + # Attention free + # Note that this code assumes there + # is only one type of attention-free block type. + return 0 if attn_block_type else end - start + elif self.has_noops: + block_configs = self.hf_config.block_configs + return sum(not bc.attention.no_op + for bc in block_configs[start:end]) + else: + # Hybrid model Jamba + layers_block_type_value = getattr(self.hf_text_config, + "layers_block_type", None) + if layers_block_type_value is not None: + if hasattr(self.hf_text_config, + "model_type") and (self.hf_text_config.model_type + == "zamba2"): + if attn_block_type: + return sum(t == "hybrid" + for t in layers_block_type_value[start:end]) + else: + return self.get_num_layers(parallel_config) + return sum(t == block_type.value + for t in layers_block_type_value[start:end]) + + # Hybrid model Minimax + attn_type_list = getattr(self.hf_config, "attn_type_list", None) + if attn_type_list: + return sum(t == 1 for t in attn_type_list[start:end]) + + # Hybrid model Qwen3Next + layer_types_value = getattr(self.hf_config, "layer_types", None) + if layer_types_value is not None: + if getattr(block_type, "value", block_type) == "attention": + return sum(t == "full_attention" + for t in layer_types_value[start:end]) + elif getattr(block_type, "value", + block_type) == "linear_attention": + return sum(t == "linear_attention" + for t in layer_types_value[start:end]) + else: + return sum(t == getattr(block_type, "value", block_type) + for t in layer_types_value[start:end]) + + if (layers_block_type_value is None and attn_type_list is None + and layer_types_value is None): + raise ValueError( + "The model is an hybrid without a" + "layers_block_type or an attn_type_list, or a layer_types " + "in the hf_config, cannot determine the num of " + f"{block_type.value} layers") + + def get_mamba_chunk_size(self) -> Optional[int]: + """ + Returns the mamba chunk size if it exists + """ + # used by e.g. Bamba, FalconH1, Granite, PLaMo2 + chunk_size = getattr(self.hf_text_config, "mamba_chunk_size", None) + if chunk_size is None: + # used by e.g. Mamba2, NemotronH, Zamba + chunk_size = getattr(self.hf_text_config, "chunk_size", None) + return chunk_size + + def get_multimodal_config(self) -> MultiModalConfig: + """ + Get the multimodal configuration of the model. + + Raises: + ValueError: If the model is not multimodal. + """ + if self.multimodal_config is None: + raise ValueError("The model is not multimodal.") + + return self.multimodal_config + + def try_get_generation_config(self) -> dict[str, Any]: + """ + This method attempts to retrieve the non-default values of the + generation config for this model. + + The generation config can contain information about special tokens, as + well as sampling parameters. Which is why this method exists separately + to `get_diff_sampling_param`. + + Returns: + A dictionary containing the non-default generation config. + """ + if self.generation_config in {"auto", "vllm"}: + config = try_get_generation_config( + self.hf_config_path or self.model, + trust_remote_code=self.trust_remote_code, + revision=self.revision, + ) + else: + config = try_get_generation_config( + self.generation_config, + trust_remote_code=self.trust_remote_code, + ) + + if config is None: + return {} + + return config.to_diff_dict() + + def get_diff_sampling_param(self) -> dict[str, Any]: + """ + This method returns a dictionary containing the non-default sampling + parameters with `override_generation_config` applied. + + The default sampling parameters are: + + - vLLM's neutral defaults if `self.generation_config="vllm"` + - the model's defaults if `self.generation_config="auto"` + - as defined in `generation_config.json` if + `self.generation_config="path/to/generation_config/dir"` + + Returns: + A dictionary containing the non-default sampling parameters. + """ + if self.generation_config == "vllm": + config = {} + else: + config = self.try_get_generation_config() + + # Overriding with given generation config + config.update(self.override_generation_config) + + available_params = [ + "repetition_penalty", + "temperature", + "top_k", + "top_p", + "min_p", + "max_new_tokens", + ] + if any(p in config for p in available_params): + diff_sampling_param = { + p: config.get(p) + for p in available_params if config.get(p) is not None + } + # Huggingface definition of max_new_tokens is equivalent + # to vLLM's max_tokens + if "max_new_tokens" in diff_sampling_param: + diff_sampling_param["max_tokens"] = diff_sampling_param.pop( + "max_new_tokens") + else: + diff_sampling_param = {} + + if diff_sampling_param: + logger.warning_once( + "Default sampling parameters have been overridden by the " + "model's Hugging Face generation config recommended from the " + "model creator. If this is not intended, please relaunch " + "vLLM instance with `--generation-config vllm`.") + return diff_sampling_param + + @property + def is_encoder_decoder(self) -> bool: + """Extract the HF encoder/decoder model flag.""" + return is_encoder_decoder(self.hf_config) + + @property + def uses_mrope(self) -> bool: + return uses_mrope(self.hf_config) + + @property + def is_multimodal_model(self) -> bool: + return self.multimodal_config is not None + + @property + def is_multimodal_raw_input_only_model(self) -> bool: + return self._model_info.supports_multimodal_raw_input_only + + @property + def is_cross_encoder(self) -> bool: + return (self._model_info.supports_cross_encoding + or self.convert_type == "classify") + + @property + def is_pp_supported(self) -> bool: + return self._model_info.supports_pp + + @property + def is_attention_free(self) -> bool: + return self._model_info.is_attention_free + + @property + def is_hybrid(self) -> bool: + return self._model_info.is_hybrid + + @property + def has_noops(self) -> bool: + return self._model_info.has_noops + + @property + def has_inner_state(self): + return self._model_info.has_inner_state + + @property + def is_v1_compatible(self) -> bool: + return not self._model_info.supports_v0_only + + @property + def use_mla(self) -> bool: + return self.is_deepseek_mla and not envs.VLLM_MLA_DISABLE + + @property + def is_matryoshka(self) -> bool: + return (bool(getattr(self.hf_config, "matryoshka_dimensions", None)) + or getattr(self.hf_config, "is_matryoshka", False)) + + @property + def matryoshka_dimensions(self): + return getattr(self.hf_config, "matryoshka_dimensions", None) + + @property + def use_pad_token(self) -> bool: + # cross_encoder models defaults to using pad_token. + # `llm as reranker` models defaults to not using pad_token. + return getattr(self.hf_config, "use_pad_token", True) + + @property + def head_dtype(self) -> torch.dtype: + """ + "head" refers to the last Linear layer(s) of an LLM, + such as the lm_head in a generation model, + or the score or classifier in a classification model. + + `head_dtype` currently only supports pooling models.\n + - The pooling model defaults to using fp32 head, + you can use --hf-overrides '{"head_dtype": "model"}' to disable it. + """ + + head_dtype = _get_head_dtype(config=self.hf_config, + dtype=self.dtype, + runner_type=self.runner_type) + + if self.runner_type != "pooling" and head_dtype != self.dtype: + logger.warning_once( + "`head_dtype` currently only supports pooling models." + "fallback to model dtype [%s].", self.dtype) + return self.dtype + + if head_dtype not in current_platform.supported_dtypes: + logger.warning_once( + "The current platform does not support [%s] head dtype, " + "fallback to model dtype [%s].", head_dtype, self.dtype) + return self.dtype + + logger.debug_once("head dtype: %s", head_dtype) + return head_dtype + + def get_and_verify_max_len(self, max_model_len: int): + # Consider max_model_len in tokenizer_config only when + # pooling models use absolute position_embedding. + tokenizer_config = None + if (self.runner_type == "pooling" and getattr( + self.hf_config, "position_embedding_type", "") == "absolute"): + tokenizer_config = try_get_tokenizer_config( + self.tokenizer, + trust_remote_code=self.trust_remote_code, + revision=self.tokenizer_revision) + max_model_len = _get_and_verify_max_len( + hf_config=self.hf_text_config, + tokenizer_config=tokenizer_config, + max_model_len=max_model_len, + disable_sliding_window=self.disable_sliding_window, + sliding_window=self.get_sliding_window(), + spec_target_max_model_len=self.spec_target_max_model_len, + encoder_config=self.encoder_config) + logger.info("Using max model len %s", max_model_len) + return max_model_len + + +def get_served_model_name(model: str, + served_model_name: Optional[Union[str, list[str]]]): + """ + If the input is a non-empty list, the first model_name in + `served_model_name` is taken. + If the input is a non-empty string, it is used directly. + For cases where the input is either an empty string or an + empty list, the fallback is to use `self.model`. + """ + if not served_model_name: + return model + if isinstance(served_model_name, list): + return served_model_name[0] + return served_model_name + + +# Some model suffixes are based on auto classes from Transformers: +# https://huggingface.co/docs/transformers/en/model_doc/auto +# NOTE: Items higher on this list priority over lower ones +_SUFFIX_TO_DEFAULTS: list[tuple[str, tuple[RunnerType, ConvertType]]] = [ + ("ForCausalLM", ("generate", "none")), + ("ForConditionalGeneration", ("generate", "none")), + ("ChatModel", ("generate", "none")), + ("LMHeadModel", ("generate", "none")), + ("ForTextEncoding", ("pooling", "embed")), + ("EmbeddingModel", ("pooling", "embed")), + ("ForSequenceClassification", ("pooling", "classify")), + ("ForAudioClassification", ("pooling", "classify")), + ("ForImageClassification", ("pooling", "classify")), + ("ForVideoClassification", ("pooling", "classify")), + ("ClassificationModel", ("pooling", "classify")), + ("ForRewardModeling", ("pooling", "reward")), + ("RewardModel", ("pooling", "reward")), + # Let other `*Model`s take priority + ("Model", ("pooling", "embed")), +] + + +def iter_architecture_defaults(): + yield from _SUFFIX_TO_DEFAULTS + + +def try_match_architecture_defaults( + architecture: str, + *, + runner_type: Optional[RunnerType] = None, + convert_type: Optional[ConvertType] = None, +) -> Optional[tuple[str, tuple[RunnerType, ConvertType]]]: + for suffix, (default_runner_type, + default_convert_type) in iter_architecture_defaults(): + if ((runner_type is None or runner_type == default_runner_type) and + (convert_type is None or convert_type == default_convert_type) + and architecture.endswith(suffix)): + return suffix, (default_runner_type, default_convert_type) + + return None + + +_STR_DTYPE_TO_TORCH_DTYPE = { + "half": torch.float16, + "float16": torch.float16, + "float": torch.float32, + "float32": torch.float32, + "bfloat16": torch.bfloat16, +} + +# model_type -> reason +_FLOAT16_NOT_SUPPORTED_MODELS = { + "gemma2": "Numerical instability. Please use bfloat16 or float32 instead.", + "gemma3": "Numerical instability. Please use bfloat16 or float32 instead.", + "gemma3_text": + "Numerical instability. Please use bfloat16 or float32 instead.", + "plamo2": "Numerical instability. Please use bfloat16 or float32 instead.", + "glm4": "Numerical instability. Please use bfloat16 or float32 instead.", +} + + +def _is_valid_dtype(model_type: str, dtype: torch.dtype): + if model_type in _FLOAT16_NOT_SUPPORTED_MODELS and dtype == torch.float16: # noqa: E501, SIM103 + return False + + return True + + +def _check_valid_dtype(model_type: str, dtype: torch.dtype): + if model_type in _FLOAT16_NOT_SUPPORTED_MODELS and dtype == torch.float16: + reason = _FLOAT16_NOT_SUPPORTED_MODELS[model_type] + raise ValueError(f"The model type {model_type!r} " + f"does not support float16. Reason: {reason}") + + return True + + +def _find_dtype( + model_id: str, + config: PretrainedConfig, + *, + revision: Optional[str], +): + # NOTE: getattr(config, "torch_dtype", torch.float32) is not correct + # because config.torch_dtype can be None. + config_dtype = getattr(config, "torch_dtype", None) + + # Fallbacks for multi-modal models if the root config + # does not define torch_dtype + if config_dtype is None: + config_dtype = getattr(config.get_text_config(), "torch_dtype", None) + if config_dtype is None and hasattr(config, "vision_config"): + config_dtype = getattr(config.vision_config, "torch_dtype", None) + if config_dtype is None and hasattr(config, "encoder_config"): + config_dtype = getattr(config.encoder_config, "torch_dtype", None) + + # Try to read the dtype of the weights if they are in safetensors format + if config_dtype is None: + repo_mt = try_get_safetensors_metadata(model_id, revision=revision) + + if repo_mt and (files_mt := repo_mt.files_metadata): + param_dtypes: set[torch.dtype] = { + _SAFETENSORS_TO_TORCH_DTYPE[dtype_str] + for file_mt in files_mt.values() + for dtype_str in file_mt.parameter_count + if dtype_str in _SAFETENSORS_TO_TORCH_DTYPE + } + + if param_dtypes: + return common_broadcastable_dtype(param_dtypes) + + if config_dtype is None: + config_dtype = torch.float32 + + return config_dtype + + +def _resolve_auto_dtype( + model_type: str, + config_dtype: torch.dtype, + *, + is_pooling_model: bool, +): + from vllm.platforms import current_platform + + supported_dtypes = [ + dtype for dtype in current_platform.supported_dtypes + if _is_valid_dtype(model_type, dtype) + ] + + if is_pooling_model and torch.float16 in supported_dtypes: + preferred_dtype = torch.float16 + else: + preferred_dtype = supported_dtypes[0] + + # Downcast for float32 models + if config_dtype == torch.float32: + config_dtype = preferred_dtype + + if config_dtype in supported_dtypes: + return config_dtype + + # Ensure device compatibility + device_name = current_platform.get_device_name() + device_capability = current_platform.get_device_capability() + + if device_capability is None: + device_str = f"{device_name!r}" + else: + version_str = device_capability.as_version_str() + device_str = f"{device_name!r} (with compute capability {version_str})" + + logger.warning( + "Your device %s doesn't support %s. " + "Falling back to %s for compatibility.", + device_str, + config_dtype, + preferred_dtype, + ) + + return preferred_dtype + + +def _get_and_verify_dtype( + model_id: str, + config: PretrainedConfig, + dtype: Union[str, torch.dtype], + *, + is_pooling_model: bool, + revision: Optional[str] = None, +) -> torch.dtype: + config_dtype = _find_dtype(model_id, config, revision=revision) + model_type = config.model_type + + if isinstance(dtype, str): + dtype = dtype.lower() + if dtype == "auto": + # Set default dtype from model config + torch_dtype = _resolve_auto_dtype( + model_type, + config_dtype, + is_pooling_model=is_pooling_model, + ) + else: + if dtype not in _STR_DTYPE_TO_TORCH_DTYPE: + raise ValueError(f"Unknown dtype: {dtype!r}") + torch_dtype = _STR_DTYPE_TO_TORCH_DTYPE[dtype] + elif isinstance(dtype, torch.dtype): + torch_dtype = dtype + else: + raise ValueError(f"Unknown dtype: {dtype}") + + _check_valid_dtype(model_type, torch_dtype) + + if torch_dtype != config_dtype: + if torch_dtype == torch.float32: + # Upcasting to float32 is allowed. + logger.info("Upcasting %s to %s.", config_dtype, torch_dtype) + elif config_dtype == torch.float32: + # Downcasting from float32 to float16 or bfloat16 is allowed. + logger.info("Downcasting %s to %s.", config_dtype, torch_dtype) + else: + # Casting between float16 and bfloat16 is allowed with a warning. + logger.warning("Casting %s to %s.", config_dtype, torch_dtype) + + return torch_dtype + + +def _get_head_dtype(config: PretrainedConfig, dtype: torch.dtype, + runner_type: str) -> torch.dtype: + head_dtype: Optional[Union[str, + torch.dtype]] = getattr(config, "head_dtype", + None) + + if head_dtype == "model": + return dtype + elif isinstance(head_dtype, str): + head_dtype = head_dtype.lower() + if head_dtype not in _STR_DTYPE_TO_TORCH_DTYPE: + raise ValueError(f"Unknown dtype: {head_dtype!r}") + return _STR_DTYPE_TO_TORCH_DTYPE[head_dtype] + elif isinstance(head_dtype, torch.dtype): + return head_dtype + elif head_dtype is None: + if torch.float32 not in current_platform.supported_dtypes: + return dtype + if runner_type == "pooling": + return torch.float32 + return dtype + else: + raise ValueError(f"Unknown dtype: {head_dtype}") + + +def _get_and_verify_max_len( + hf_config: PretrainedConfig, + tokenizer_config: Optional[dict], + max_model_len: Optional[int], + disable_sliding_window: bool, + sliding_window: Optional[int], + spec_target_max_model_len: Optional[int] = None, + encoder_config: Optional[Any] = None, +) -> int: + """Get and verify the model's maximum length.""" + derived_max_model_len = float("inf") + possible_keys = [ + # OPT + "max_position_embeddings", + # GPT-2 + "n_positions", + # MPT + "max_seq_len", + # ChatGLM2 + "seq_length", + # Command-R + "model_max_length", + # Whisper + "max_target_positions", + # Others + "max_sequence_length", + "max_seq_length", + "seq_len", + ] + # Choose the smallest "max_length" from the possible keys + max_len_key = None + for key in possible_keys: + max_len = getattr(hf_config, key, None) + if max_len is not None: + max_len_key = key if max_len < derived_max_model_len \ + else max_len_key + derived_max_model_len = min(derived_max_model_len, max_len) + # For Command-R / Cohere, Cohere2 / Aya Vision models + if tmp_max_len := getattr(hf_config, "model_max_length", None): + max_len_key = "model_max_length" + derived_max_model_len = tmp_max_len + + # If sliding window is manually disabled, max_length should be less + # than the sliding window length in the model config. + if (disable_sliding_window and sliding_window is not None + and sliding_window < derived_max_model_len): + max_len_key = "sliding_window" + derived_max_model_len = sliding_window + + # Consider model_max_length in tokenizer_config + if tokenizer_config: + tokenizer_model_max_length = tokenizer_config.get( + "model_max_length", derived_max_model_len) + derived_max_model_len = min(derived_max_model_len, + tokenizer_model_max_length) + + # If none of the keys were found in the config, use a default and + # log a warning. + if derived_max_model_len == float("inf"): + if max_model_len is not None: + # If max_model_len is specified, we use it. + return max_model_len + + if spec_target_max_model_len is not None: + # If this is a speculative draft model, we use the max model len + # from the target model. + return spec_target_max_model_len + + default_max_len = 2048 + logger.warning( + "The model's config.json does not contain any of the following " + "keys to determine the original maximum length of the model: " + "%s. Assuming the model's maximum length is %d.", possible_keys, + default_max_len) + derived_max_model_len = default_max_len + + rope_scaling = getattr(hf_config, "rope_scaling", None) + # NOTE(woosuk): Gemma3's max_model_len (128K) is already scaled by RoPE + # scaling, so we skip applying the scaling factor again. + if rope_scaling is not None and "gemma3" not in hf_config.model_type: + # No need to consider "type" key because of patch_rope_scaling when + # loading HF config + rope_type = rope_scaling["rope_type"] + + if rope_type not in ("su", "longrope", "llama3"): + if disable_sliding_window: + # TODO(robertgshaw): Find a model that supports rope_scaling + # with sliding window to see if this case should be allowed. + raise NotImplementedError( + "Disabling sliding window is not supported for models " + "with rope_scaling. Please raise an issue so we can " + "investigate.") + + # NOTE: rope_type == "default" does not define factor + # https://github.com/huggingface/transformers/blob/v4.45.2/src/transformers/modeling_rope_utils.py + scaling_factor = rope_scaling.get("factor", 1.0) + + if rope_type == "yarn": + derived_max_model_len = rope_scaling[ + "original_max_position_embeddings"] + derived_max_model_len *= scaling_factor + + if encoder_config and "max_seq_length" in encoder_config: + derived_max_model_len = encoder_config["max_seq_length"] + + # If the user specified a max length, make sure it is smaller than the + # derived length from the HF model config. + if max_model_len is None: + max_model_len = int(derived_max_model_len) + if current_platform.is_tpu(): + logger.warning( + "--max-model-len is not specified, " + "it's currently using model's default length %s, " + "which might be too large." + "Please input with --max-model-len based on your " + "request input length and output length, to avoid " + "unnecessary degradation.", max_model_len) + elif max_model_len > derived_max_model_len: + # Some models might have a separate key for specifying model_max_length + # that will be bigger than derived_max_model_len. We compare user input + # with model_max_length and allow this override when it's smaller. + model_max_length = getattr(hf_config, "model_max_length", None) + if model_max_length is not None and max_model_len <= model_max_length: + if disable_sliding_window: + # TODO(robertgshaw): Find a model that has model_max_length + # with sliding window to see if this case should be allowed. + raise NotImplementedError( + "Disabling sliding window is not supported for models " + "model_max_length in the config. Please raise an issue " + "so we can investigate.") + else: + msg = ( + f"User-specified max_model_len ({max_model_len}) is greater " + f"than the derived max_model_len ({max_len_key}=" + f"{derived_max_model_len} or model_max_length=" + f"{model_max_length} in model's config.json).") + warning = ( + "VLLM_ALLOW_LONG_MAX_MODEL_LEN must be used with extreme " + "caution. If the model uses relative position encoding (RoPE), " + "positions exceeding derived_max_model_len lead to nan. If the " + "model uses absolute position encoding, positions exceeding " + "derived_max_model_len will cause a CUDA array out-of-bounds " + "error.") + if envs.VLLM_ALLOW_LONG_MAX_MODEL_LEN: + logger.warning_once("%s %s", msg, warning) + else: + raise ValueError( + f"{msg} To allow overriding this maximum, set " + f"the env var VLLM_ALLOW_LONG_MAX_MODEL_LEN=1. {warning}") + return int(max_model_len) diff --git a/vllm/config/scheduler.py b/vllm/config/scheduler.py index 93002012799a..f0f67bab9d6f 100644 --- a/vllm/config/scheduler.py +++ b/vllm/config/scheduler.py @@ -3,7 +3,7 @@ import hashlib from dataclasses import field -from typing import TYPE_CHECKING, Any, Literal, Optional, Union +from typing import Any, Literal, Optional, Union from pydantic import SkipValidation, model_validator from pydantic.dataclasses import dataclass @@ -15,13 +15,9 @@ MULTIMODAL_MODEL_MAX_NUM_BATCHED_TOKENS, POOLING_MODEL_MAX_NUM_BATCHED_TOKENS) -if TYPE_CHECKING: - from vllm.config import RunnerType -else: - RunnerType = Any - logger = init_logger(__name__) +RunnerType = Literal["generate", "pooling", "draft"] PreemptionMode = Literal["swap", "recompute"] SchedulerPolicy = Literal["fcfs", "priority"] diff --git a/vllm/config/utils.py b/vllm/config/utils.py index db8c05ef8be4..91e61b330273 100644 --- a/vllm/config/utils.py +++ b/vllm/config/utils.py @@ -1,8 +1,13 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project +import ast +import inspect +import textwrap from dataclasses import MISSING, Field, field, fields, is_dataclass -from typing import TYPE_CHECKING, TypeVar +from typing import TYPE_CHECKING, Any, TypeVar + +import regex as re if TYPE_CHECKING: from _typeshed import DataclassInstance @@ -45,3 +50,96 @@ def get_field(cls: ConfigType, name: str) -> Field: return field(default=default) raise ValueError( f"{cls.__name__}.{name} must have a default value or default factory.") + + +def contains_object_print(text: str) -> bool: + """ + Check if the text looks like a printed Python object, e.g. + contains any substring matching the pattern: "at 0xFFFFFFF>" + We match against 0x followed by 2-16 hex chars (there's + a max of 16 on a 64-bit system). + + Args: + text (str): The text to check + + Returns: + result (bool): `True` if a match is found, `False` otherwise. + """ + pattern = r'at 0x[a-fA-F0-9]{2,16}>' + match = re.search(pattern, text) + return match is not None + + +def assert_hashable(text: str) -> bool: + if not contains_object_print(text): + return True + raise AssertionError( + f"vLLM tried to hash some configs that may have Python objects ids " + f"in them. This is a bug, please file an issue. " + f"Text being hashed: {text}") + + +def get_attr_docs(cls: type[Any]) -> dict[str, str]: + """ + Get any docstrings placed after attribute assignments in a class body. + + https://davidism.com/mit-license/ + """ + + def pairwise(iterable): + """ + Manually implement https://docs.python.org/3/library/itertools.html#itertools.pairwise + + Can be removed when Python 3.9 support is dropped. + """ + iterator = iter(iterable) + a = next(iterator, None) + + for b in iterator: + yield a, b + a = b + + try: + cls_node = ast.parse(textwrap.dedent(inspect.getsource(cls))).body[0] + except (OSError, KeyError, TypeError): + # HACK: Python 3.13+ workaround - set missing __firstlineno__ + # Workaround can be removed after we upgrade to pydantic==2.12.0 + with open(inspect.getfile(cls)) as f: + for i, line in enumerate(f): + if f"class {cls.__name__}" in line and ":" in line: + cls.__firstlineno__ = i + 1 + break + cls_node = ast.parse(textwrap.dedent(inspect.getsource(cls))).body[0] + + if not isinstance(cls_node, ast.ClassDef): + raise TypeError("Given object was not a class.") + + out = {} + + # Consider each pair of nodes. + for a, b in pairwise(cls_node.body): + # Must be an assignment then a constant string. + if (not isinstance(a, (ast.Assign, ast.AnnAssign)) + or not isinstance(b, ast.Expr) + or not isinstance(b.value, ast.Constant) + or not isinstance(b.value.value, str)): + continue + + doc = inspect.cleandoc(b.value.value) + + # An assignment can have multiple targets (a = b = v), but an + # annotated assignment only has one target. + targets = a.targets if isinstance(a, ast.Assign) else [a.target] + + for target in targets: + # Must be assigning to a plain name. + if not isinstance(target, ast.Name): + continue + + out[target.id] = doc + + return out + + +def is_init_field(cls: ConfigType, name: str) -> bool: + return next(f for f in fields(cls) if f.name == name).init diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index 27462b8fa0da..ecf4e486a016 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -27,11 +27,11 @@ EPLBConfig, HfOverrides, KVEventsConfig, KVTransferConfig, LoadConfig, LogprobsMode, LoRAConfig, MambaDType, MMEncoderTPMode, ModelConfig, - ModelDType, ModelImpl, ObservabilityConfig, - ParallelConfig, PoolerConfig, PrefixCachingHashAlgo, - RunnerOption, SchedulerConfig, SchedulerPolicy, - SpeculativeConfig, StructuredOutputsConfig, - TaskOption, TokenizerMode, VllmConfig, get_attr_docs) + ModelDType, ObservabilityConfig, ParallelConfig, + PoolerConfig, PrefixCachingHashAlgo, RunnerOption, + SchedulerConfig, SchedulerPolicy, SpeculativeConfig, + StructuredOutputsConfig, TaskOption, TokenizerMode, + VllmConfig, get_attr_docs) from vllm.config.multimodal import MMCacheType, MultiModalConfig from vllm.config.parallel import ExpertPlacementStrategy from vllm.config.utils import get_field @@ -548,7 +548,6 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser: model_group.add_argument("--max-logprobs", **model_kwargs["max_logprobs"]) model_group.add_argument("--logprobs-mode", - choices=[f.value for f in LogprobsMode], **model_kwargs["logprobs_mode"]) model_group.add_argument("--disable-sliding-window", **model_kwargs["disable_sliding_window"]) @@ -593,9 +592,7 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser: **model_kwargs["override_generation_config"]) model_group.add_argument("--enable-sleep-mode", **model_kwargs["enable_sleep_mode"]) - model_group.add_argument("--model-impl", - choices=[f.value for f in ModelImpl], - **model_kwargs["model_impl"]) + model_group.add_argument("--model-impl", **model_kwargs["model_impl"]) model_group.add_argument("--override-attention-dtype", **model_kwargs["override_attention_dtype"]) model_group.add_argument("--logits-processors", diff --git a/vllm/model_executor/model_loader/utils.py b/vllm/model_executor/model_loader/utils.py index 0c2441a6db44..bd1773c753a9 100644 --- a/vllm/model_executor/model_loader/utils.py +++ b/vllm/model_executor/model_loader/utils.py @@ -13,8 +13,7 @@ from typing_extensions import assert_never from vllm.attention import Attention -from vllm.config import (ModelConfig, ModelImpl, VllmConfig, - set_current_vllm_config) +from vllm.config import ModelConfig, VllmConfig, set_current_vllm_config from vllm.logger import init_logger from vllm.model_executor.layers.linear import QKVCrossParallelLinear from vllm.model_executor.layers.quantization.base_config import ( @@ -176,8 +175,8 @@ def get_model_architecture( ) if arch == model_config._get_transformers_backend_cls(): - assert model_config.model_impl != ModelImpl.VLLM - if model_config.model_impl == ModelImpl.AUTO: + assert model_config.model_impl != "vllm" + if model_config.model_impl == "auto": logger.warning_once( "%s has no vLLM implementation, falling back to Transformers " "implementation. Some features may not be supported and " diff --git a/vllm/model_executor/models/registry.py b/vllm/model_executor/models/registry.py index 1382fd9e93ea..76f2bd087624 100644 --- a/vllm/model_executor/models/registry.py +++ b/vllm/model_executor/models/registry.py @@ -19,7 +19,7 @@ import torch.nn as nn import transformers -from vllm.config import (ModelConfig, ModelImpl, iter_architecture_defaults, +from vllm.config import (ModelConfig, iter_architecture_defaults, try_match_architecture_defaults) from vllm.logger import init_logger from vllm.transformers_utils.dynamic_module import ( @@ -587,7 +587,7 @@ def _try_resolve_transformers( if model_module is not None: break else: - if model_config.model_impl != ModelImpl.TRANSFORMERS: + if model_config.model_impl != "transformers": return None raise ValueError( @@ -598,7 +598,7 @@ def _try_resolve_transformers( "'auto_map' (relevant if the model is custom).") if not model_module.is_backend_compatible(): - if model_config.model_impl != ModelImpl.TRANSFORMERS: + if model_config.model_impl != "transformers": return None raise ValueError( @@ -644,20 +644,20 @@ def inspect_model_cls( raise ValueError("No model architectures are specified") # Require transformers impl - if model_config.model_impl == ModelImpl.TRANSFORMERS: + if model_config.model_impl == "transformers": arch = self._try_resolve_transformers(architectures[0], model_config) if arch is not None: model_info = self._try_inspect_model_cls(arch) if model_info is not None: return (model_info, arch) - elif model_config.model_impl == ModelImpl.TERRATORCH: + elif model_config.model_impl == "terratorch": model_info = self._try_inspect_model_cls("Terratorch") return (model_info, "Terratorch") # Fallback to transformers impl (after resolving convert_type) if (all(arch not in self.models for arch in architectures) - and model_config.model_impl == ModelImpl.AUTO + and model_config.model_impl == "auto" and getattr(model_config, "convert_type", "none") == "none"): arch = self._try_resolve_transformers(architectures[0], model_config) @@ -674,7 +674,7 @@ def inspect_model_cls( # Fallback to transformers impl (before resolving runner_type) if (all(arch not in self.models for arch in architectures) - and model_config.model_impl == ModelImpl.AUTO): + and model_config.model_impl == "auto"): arch = self._try_resolve_transformers(architectures[0], model_config) if arch is not None: @@ -695,14 +695,14 @@ def resolve_model_cls( raise ValueError("No model architectures are specified") # Require transformers impl - if model_config.model_impl == ModelImpl.TRANSFORMERS: + if model_config.model_impl == "transformers": arch = self._try_resolve_transformers(architectures[0], model_config) if arch is not None: model_cls = self._try_load_model_cls(arch) if model_cls is not None: return (model_cls, arch) - elif model_config.model_impl == ModelImpl.TERRATORCH: + elif model_config.model_impl == "terratorch": arch = "Terratorch" model_cls = self._try_load_model_cls(arch) if model_cls is not None: @@ -710,7 +710,7 @@ def resolve_model_cls( # Fallback to transformers impl (after resolving convert_type) if (all(arch not in self.models for arch in architectures) - and model_config.model_impl == ModelImpl.AUTO + and model_config.model_impl == "auto" and getattr(model_config, "convert_type", "none") == "none"): arch = self._try_resolve_transformers(architectures[0], model_config) @@ -727,7 +727,7 @@ def resolve_model_cls( # Fallback to transformers impl (before resolving runner_type) if (all(arch not in self.models for arch in architectures) - and model_config.model_impl == ModelImpl.AUTO): + and model_config.model_impl == "auto"): arch = self._try_resolve_transformers(architectures[0], model_config) if arch is not None: diff --git a/vllm/v1/sample/ops/topk_topp_sampler.py b/vllm/v1/sample/ops/topk_topp_sampler.py index cc5653b10ec1..747e52f2e589 100644 --- a/vllm/v1/sample/ops/topk_topp_sampler.py +++ b/vllm/v1/sample/ops/topk_topp_sampler.py @@ -29,15 +29,12 @@ class TopKTopPSampler(nn.Module): Implementations may update the logits tensor in-place. """ - def __init__( - self, - logprobs_mode: LogprobsMode = LogprobsMode.RAW_LOGPROBS) -> None: + def __init__(self, logprobs_mode: LogprobsMode = "raw_logprobs") -> None: super().__init__() self.logprobs_mode = logprobs_mode # flashinfer optimization does not apply if intermediate # logprobs/logits after top_k/top_p need to be returned - if logprobs_mode not in (LogprobsMode.PROCESSED_LOGITS, - LogprobsMode.PROCESSED_LOGPROBS + if logprobs_mode not in ("processed_logits", "processed_logprobs" ) and current_platform.is_cuda(): if is_flashinfer_available: flashinfer_version = flashinfer.__version__ @@ -90,9 +87,9 @@ def forward_native( """ logits = self.apply_top_k_top_p(logits, k, p) logits_to_return = None - if self.logprobs_mode == LogprobsMode.PROCESSED_LOGITS: + if self.logprobs_mode == "processed_logits": logits_to_return = logits - elif self.logprobs_mode == LogprobsMode.PROCESSED_LOGPROBS: + elif self.logprobs_mode == "processed_logprobs": logits_to_return = logits.log_softmax(dim=-1, dtype=torch.float32) probs = logits.softmax(dim=-1, dtype=torch.float32) return random_sample(probs, generators), logits_to_return @@ -115,7 +112,7 @@ def forward_cuda( "PyTorch-native implementation.") return self.forward_native(logits, generators, k, p) assert self.logprobs_mode not in ( - LogprobsMode.PROCESSED_LOGITS, LogprobsMode.PROCESSED_LOGPROBS + "processed_logits", "processed_logprobs" ), "FlashInfer does not support returning logits/logprobs" # flashinfer sampling functions expect contiguous logits. # In flex_attn/triton_attn fp32 inference, logits can be non-contiguous diff --git a/vllm/v1/sample/sampler.py b/vllm/v1/sample/sampler.py index 546531a91610..fa2a6e590f22 100644 --- a/vllm/v1/sample/sampler.py +++ b/vllm/v1/sample/sampler.py @@ -60,8 +60,7 @@ class Sampler(nn.Module): 9. Return the final `SamplerOutput`. """ - def __init__(self, - logprobs_mode: LogprobsMode = LogprobsMode.RAW_LOGPROBS): + def __init__(self, logprobs_mode: LogprobsMode = "raw_logprobs"): super().__init__() self.topk_topp_sampler = TopKTopPSampler(logprobs_mode) self.pin_memory = is_pin_memory_available() @@ -78,9 +77,9 @@ def forward( # is used for sampling (after penalties and temperature scaling). num_logprobs = sampling_metadata.max_num_logprobs if num_logprobs is not None: - if self.logprobs_mode == LogprobsMode.RAW_LOGPROBS: + if self.logprobs_mode == "raw_logprobs": raw_logprobs = self.compute_logprobs(logits) - elif self.logprobs_mode == LogprobsMode.RAW_LOGITS: + elif self.logprobs_mode == "raw_logits": raw_logprobs = logits.clone() # Use float32 for the logits. @@ -156,9 +155,9 @@ def sample( if sampling_metadata.all_greedy: processed_logprobs = None if sampling_metadata.max_num_logprobs is not None: - if self.logprobs_mode == LogprobsMode.PROCESSED_LOGITS: + if self.logprobs_mode == "processed_logits": processed_logprobs = logits - elif self.logprobs_mode == LogprobsMode.PROCESSED_LOGPROBS: + elif self.logprobs_mode == "processed_logprobs": processed_logprobs = self.compute_logprobs(logits) return greedy_sampled, processed_logprobs From ce75e153735363eca01ce67b9f69e7b9ea440c63 Mon Sep 17 00:00:00 2001 From: samzong Date: Sat, 20 Sep 2025 00:36:52 +0800 Subject: [PATCH 150/518] refactor(benchmarks): add type annotations to wait_for_endpoint parameters (#25218) Signed-off-by: samzong --- vllm/benchmarks/lib/endpoint_request_func.py | 15 +++++++++++++-- vllm/benchmarks/lib/ready_checker.py | 5 +++-- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/vllm/benchmarks/lib/endpoint_request_func.py b/vllm/benchmarks/lib/endpoint_request_func.py index 066b8fe83438..725b7df8b187 100644 --- a/vllm/benchmarks/lib/endpoint_request_func.py +++ b/vllm/benchmarks/lib/endpoint_request_func.py @@ -8,8 +8,9 @@ import sys import time import traceback +from collections.abc import Awaitable from dataclasses import dataclass, field -from typing import Optional, Union +from typing import Optional, Protocol, Union import aiohttp from tqdm.asyncio import tqdm @@ -92,6 +93,16 @@ class RequestFuncOutput: start_time: float = 0.0 +class RequestFunc(Protocol): + def __call__( + self, + request_func_input: RequestFuncInput, + session: aiohttp.ClientSession, + pbar: Optional[tqdm] = None, + ) -> Awaitable[RequestFuncOutput]: + ... + + async def async_request_openai_completions( request_func_input: RequestFuncInput, session: aiohttp.ClientSession, @@ -507,7 +518,7 @@ async def async_request_openai_embeddings( # TODO: Add more request functions for different API protocols. -ASYNC_REQUEST_FUNCS = { +ASYNC_REQUEST_FUNCS: dict[str, RequestFunc] = { "vllm": async_request_openai_completions, "openai": async_request_openai_completions, "openai-chat": async_request_openai_chat_completions, diff --git a/vllm/benchmarks/lib/ready_checker.py b/vllm/benchmarks/lib/ready_checker.py index 7e836158386a..87fc16b55012 100644 --- a/vllm/benchmarks/lib/ready_checker.py +++ b/vllm/benchmarks/lib/ready_checker.py @@ -8,11 +8,12 @@ import aiohttp from tqdm.asyncio import tqdm -from .endpoint_request_func import RequestFuncInput, RequestFuncOutput +from .endpoint_request_func import (RequestFunc, RequestFuncInput, + RequestFuncOutput) async def wait_for_endpoint( - request_func, + request_func: RequestFunc, test_input: RequestFuncInput, session: aiohttp.ClientSession, timeout_seconds: int = 600, From 7ac67ea5255c764e87bdfc5c712bfaa35f491764 Mon Sep 17 00:00:00 2001 From: Or Ozeri Date: Fri, 19 Sep 2025 19:53:45 +0300 Subject: [PATCH 151/518] [KV offload][3/N] Add worker-side CPU support (#21448) Signed-off-by: Or Ozeri --- tests/v1/kv_offload/test_cpu_gpu.py | 177 +++++++++++++++++++++++++++ vllm/v1/kv_offload/worker/cpu_gpu.py | 171 ++++++++++++++++++++++++++ 2 files changed, 348 insertions(+) create mode 100644 tests/v1/kv_offload/test_cpu_gpu.py create mode 100644 vllm/v1/kv_offload/worker/cpu_gpu.py diff --git a/tests/v1/kv_offload/test_cpu_gpu.py b/tests/v1/kv_offload/test_cpu_gpu.py new file mode 100644 index 000000000000..0edb9513e3ff --- /dev/null +++ b/tests/v1/kv_offload/test_cpu_gpu.py @@ -0,0 +1,177 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project +import random +import time + +import pytest +import torch + +from vllm.platforms import current_platform +from vllm.v1.attention.backends.flash_attn import FlashAttentionBackend +from vllm.v1.attention.backends.flashinfer import FlashInferBackend +from vllm.v1.attention.backends.mla.flashattn_mla import FlashAttnMLABackend +from vllm.v1.kv_offload.mediums import CPULoadStoreSpec, GPULoadStoreSpec +from vllm.v1.kv_offload.worker.cpu_gpu import CpuGpuOffloadingHandler + +NUM_GPU_BLOCKS = [64] +NUM_CPU_BLOCKS = [256] +GPU_BLOCK_SIZES = [16] +GPU_BLOCKS_PER_CPU_BLOCK = [1, 3] +HEAD_SIZES = [64] +NUM_HEADS = [8] +NUM_LAYERS = [4] +DTYPES = [torch.bfloat16] +SEEDS = [0] +CUDA_DEVICES = ['cuda:0'] +NUM_MAPPINGS = [3] + + +@pytest.mark.parametrize("gpu_to_cpu", [True, False]) +@pytest.mark.parametrize("num_mappings", NUM_MAPPINGS) +@pytest.mark.parametrize("head_size", HEAD_SIZES) +@pytest.mark.parametrize("num_heads", NUM_HEADS) +@pytest.mark.parametrize("gpu_block_size", GPU_BLOCK_SIZES) +@pytest.mark.parametrize("gpu_blocks_per_cpu_block", GPU_BLOCKS_PER_CPU_BLOCK) +@pytest.mark.parametrize("num_gpu_blocks", NUM_GPU_BLOCKS) +@pytest.mark.parametrize("num_cpu_blocks", NUM_CPU_BLOCKS) +@pytest.mark.parametrize("num_layers", NUM_LAYERS) +@pytest.mark.parametrize("dtype", DTYPES) +@pytest.mark.parametrize("seed", SEEDS) +@pytest.mark.parametrize("device", CUDA_DEVICES) +@torch.inference_mode() +def test_transfer( + gpu_to_cpu: bool, + num_mappings: int, + head_size: int, + num_heads: int, + gpu_block_size: int, + gpu_blocks_per_cpu_block: int, + num_gpu_blocks: int, + num_cpu_blocks: int, + num_layers: int, + dtype: torch.dtype, + seed: int, + device: str, +) -> None: + current_platform.seed_everything(seed) + + # create per-layer GPU KV caches + attn_backends_list = [ + FlashAttentionBackend, FlashInferBackend, FlashAttnMLABackend + ] + + gpu_caches = {} + attn_backends = {} + for i in range(num_layers): + layer_name = f'layer {i}' + + attn_backend = attn_backends_list[i % len(attn_backends_list)] + attn_backends[layer_name] = attn_backend + + gpu_cache_shape = attn_backend.get_kv_cache_shape( + num_gpu_blocks, gpu_block_size, num_heads, head_size) + gpu_caches[layer_name] = torch.rand(gpu_cache_shape, + dtype=dtype, + device=device) + + # create handler + cpu_block_size = gpu_blocks_per_cpu_block * gpu_block_size + handler = CpuGpuOffloadingHandler(attn_backends=attn_backends, + gpu_block_size=gpu_block_size, + cpu_block_size=cpu_block_size, + num_cpu_blocks=num_cpu_blocks, + gpu_caches=gpu_caches) + + # select block mappings + gpu_blocks = random.sample(range(num_gpu_blocks), + num_mappings * gpu_blocks_per_cpu_block) + cpu_blocks = random.sample(range(num_cpu_blocks), num_mappings) + + # convert cpu blocks to gpu block size + cpu_blocks_in_gpu_block_size = [] + for cpu_block in cpu_blocks: + base_block_id = cpu_block * gpu_blocks_per_cpu_block + for i in range(gpu_blocks_per_cpu_block): + cpu_blocks_in_gpu_block_size.append(i + base_block_id) + + # maybe skip a GPU block to test writing to the middle of a CPU block + if gpu_to_cpu: + gpu_blocks = gpu_blocks[gpu_blocks_per_cpu_block - 1:] + cpu_blocks_in_gpu_block_size = cpu_blocks_in_gpu_block_size[ + gpu_blocks_per_cpu_block - 1:] + + # set transfer direction + if gpu_to_cpu: + src_kv_caches = handler.gpu_tensors + dst_kv_caches = handler.cpu_tensors + src_spec_class = GPULoadStoreSpec + dst_spec_class = CPULoadStoreSpec + src_blocks = gpu_blocks + dst_blocks = cpu_blocks + src_blocks_in_gpu_block_size = gpu_blocks + dst_blocks_in_gpu_block_size = cpu_blocks_in_gpu_block_size + dst_size_in_gpu_blocks = num_cpu_blocks * gpu_blocks_per_cpu_block + else: + src_kv_caches = handler.cpu_tensors + dst_kv_caches = handler.gpu_tensors + src_spec_class = CPULoadStoreSpec + dst_spec_class = GPULoadStoreSpec + src_blocks = cpu_blocks + dst_blocks = gpu_blocks + src_blocks_in_gpu_block_size = cpu_blocks_in_gpu_block_size + dst_blocks_in_gpu_block_size = gpu_blocks + dst_size_in_gpu_blocks = num_gpu_blocks + + # build dst -> src mapping + dst_to_src = {} + for src_block, dst_block in zip(src_blocks_in_gpu_block_size, + dst_blocks_in_gpu_block_size): + dst_to_src[dst_block] = src_block + + # build transfer specs + src_spec = src_spec_class(src_blocks) + dst_spec = dst_spec_class(dst_blocks) + + # clone src and dst tensors before transfer + orig_src_caches = [x.clone() for x in src_kv_caches] + orig_dst_caches = [x.clone() for x in dst_kv_caches] + + # call transfer function + assert handler.transfer_async(1, (src_spec, dst_spec)) + assert set(handler.transfer_events.keys()) == {1} + + # wait for transfer to complete + end_time = time.time() + 10 + while time.time() < end_time: + finished = handler.get_finished() + if finished: + assert finished == [(1, True)] + break + time.sleep(0.1) + + # verify src tensors did not change + for orig_tensor, tensor in zip(orig_src_caches, src_kv_caches): + assert torch.equal(orig_tensor, tensor) + + # verify dst tensors + for dst_block in range(dst_size_in_gpu_blocks): + src_block_candidate = dst_to_src.get(dst_block) + for src_cache, dst_cache, orig_dst_cache, kv_dim in zip( + src_kv_caches, dst_kv_caches, orig_dst_caches, + handler.kv_dim_before_num_blocks): + if kv_dim: + # iterate over key, value + for i in range(2): + if src_block_candidate is not None: + expected_value = src_cache[i][src_block_candidate] + else: + expected_value = orig_dst_cache[i][dst_block] + torch.testing.assert_close(dst_cache[i][dst_block].cpu(), + expected_value.cpu()) + else: + if src_block_candidate is not None: + expected_value = src_cache[src_block_candidate] + else: + expected_value = orig_dst_cache[dst_block] + torch.testing.assert_close(dst_cache[dst_block].cpu(), + expected_value.cpu()) diff --git a/vllm/v1/kv_offload/worker/cpu_gpu.py b/vllm/v1/kv_offload/worker/cpu_gpu.py new file mode 100644 index 000000000000..556c29247e5e --- /dev/null +++ b/vllm/v1/kv_offload/worker/cpu_gpu.py @@ -0,0 +1,171 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project + +import numpy as np +import torch + +from vllm import _custom_ops as ops +from vllm.attention import AttentionBackend +from vllm.logger import init_logger +from vllm.utils import is_pin_memory_available +from vllm.v1.kv_offload.mediums import CPULoadStoreSpec, GPULoadStoreSpec +from vllm.v1.kv_offload.worker.worker import (OffloadingHandler, + TransferResult, TransferSpec) + +logger = init_logger(__name__) + + +def expand_block_ids(block_ids: np.ndarray, + block_size_factor: int, + output: np.ndarray, + skip_count: int = 0): + """ + Convert a list of block IDs to a list of matching block ids, + assuming each block is composed of actual block_size_factor blocks. + Outputs to output tensor. + The first skip_count blocks will be skipped. + Note that skip_count must be less than block_size_factor. + + For example, if block_ids = [0, 1, 3] and block_size_factor = 4, + then it yields [0, 1, 2, 3, 4, 5, 6, 7, 12, 13, 14, 15] + since 0 maps to [0, 1, 2, 3] + 1 maps to [4, 5, 6, 7] + and 3 maps to [12, 13, 14, 15] + """ + assert skip_count < block_size_factor + + first_range = np.arange(skip_count, block_size_factor) + full_range = np.arange(0, block_size_factor) + + output_idx = 0 + for i, block_id in enumerate(block_ids): + base_block_id = block_id * block_size_factor + indices = first_range if i == 0 else full_range + output_end_idx = output_idx + len(indices) + output[output_idx:output_end_idx] = base_block_id + indices + output_idx = output_end_idx + + +class CpuGpuOffloadingHandler(OffloadingHandler): + + def __init__(self, gpu_block_size: int, cpu_block_size: int, + num_cpu_blocks: int, gpu_caches: dict[str, torch.Tensor], + attn_backends: dict[str, type[AttentionBackend]]): + assert cpu_block_size % gpu_block_size == 0 + self.block_size_factor = cpu_block_size // gpu_block_size + + # cuda streams for gpu->cpu and cpu->gpu + self.d2h_stream = torch.cuda.Stream() + self.h2d_stream = torch.cuda.Stream() + + # job_id -> transfer cuda event + self.transfer_events: dict[int, torch.cuda.Event] = {} + # list of cuda events available for re-use + self.events_pool: list[torch.cuda.Event] = [] + + pin_memory = is_pin_memory_available() + + # allocate cpu tensors + logger.info("Allocating %d CPU tensors...", len(gpu_caches)) + self.gpu_tensors: list[torch.Tensor] = [] + self.cpu_tensors: list[torch.Tensor] = [] + self.kv_dim_before_num_blocks: list[bool] = [] + for layer_name, gpu_tensor in gpu_caches.items(): + self.gpu_tensors.append(gpu_tensor) + + gpu_shape = gpu_tensor.shape + test_shape = attn_backends[layer_name].get_kv_cache_shape( + num_blocks=1234, block_size=16, num_kv_heads=8, head_size=256) + if test_shape[0] == 1234: + # shape is (num_blocks, ...) + num_blocks_idx = 0 + self.kv_dim_before_num_blocks.append(False) + else: + # shape should be (2, num_blocks, ...) + assert test_shape[0] == 2 + assert test_shape[1] == 1234 + assert gpu_shape[0] == 2 + + num_blocks_idx = 1 + self.kv_dim_before_num_blocks.append(True) + + cpu_shape = list(gpu_shape) + cpu_shape[num_blocks_idx] = num_cpu_blocks * self.block_size_factor + + logger.debug("Allocating CPU tensor of shape %r", cpu_shape) + self.cpu_tensors.append( + torch.zeros(cpu_shape, + dtype=gpu_tensor.dtype, + device="cpu", + pin_memory=pin_memory)) + + def transfer_async(self, job_id: int, spec: TransferSpec) -> bool: + src_spec, dst_spec = spec + if isinstance(src_spec, CPULoadStoreSpec): + assert isinstance(dst_spec, GPULoadStoreSpec) + stream = self.h2d_stream + src_tensors = self.cpu_tensors + dst_tensors = self.gpu_tensors + src_block_size_factor = self.block_size_factor + dst_block_size_factor = 1 + else: + assert isinstance(src_spec, GPULoadStoreSpec) + assert isinstance(dst_spec, CPULoadStoreSpec) + stream = self.d2h_stream + src_tensors = self.gpu_tensors + dst_tensors = self.cpu_tensors + src_block_size_factor = 1 + dst_block_size_factor = self.block_size_factor + + src_blocks = src_spec.block_ids + dst_blocks = dst_spec.block_ids + assert src_blocks.ndim == 1 + assert dst_blocks.ndim == 1 + + dst_sub_blocks_to_skip = (-src_blocks.size % dst_block_size_factor) + src_sub_block_count = src_blocks.size * src_block_size_factor + + assert ( + src_sub_block_count == dst_blocks.size * dst_block_size_factor - + dst_sub_blocks_to_skip) + + src_to_dst = np.empty((src_sub_block_count, 2), dtype=np.int64) + expand_block_ids(src_blocks, src_block_size_factor, src_to_dst[:, 0]) + expand_block_ids(dst_blocks, + dst_block_size_factor, + src_to_dst[:, 1], + skip_count=dst_sub_blocks_to_skip) + src_to_dst_tensor = torch.from_numpy(src_to_dst) + + event = self.events_pool.pop() if self.events_pool \ + else torch.cuda.Event() + with torch.cuda.stream(stream): + for src_tensor, dst_tensor, kv_dim in zip( + src_tensors, dst_tensors, self.kv_dim_before_num_blocks): + if kv_dim: + src_key_cache = src_tensor[0] + dst_key_cache = dst_tensor[0] + ops.swap_blocks(src_key_cache, dst_key_cache, + src_to_dst_tensor) + src_value_cache = src_tensor[1] + dst_value_cache = dst_tensor[1] + ops.swap_blocks(src_value_cache, dst_value_cache, + src_to_dst_tensor) + else: + ops.swap_blocks(src_tensor, dst_tensor, src_to_dst_tensor) + event.record(stream) + + self.transfer_events[job_id] = event + + # success + return True + + def get_finished(self) -> list[TransferResult]: + results: list[TransferResult] = [] + for job_id, event in self.transfer_events.items(): + if event.query(): + results.append((job_id, True)) + self.events_pool.append(event) + for job_id, _ in results: + del self.transfer_events[job_id] + return results From 6c117cff7d0110c74f97f68cda009595a2fdae5e Mon Sep 17 00:00:00 2001 From: Cyrus Leung Date: Sat, 20 Sep 2025 01:15:19 +0800 Subject: [PATCH 152/518] [Frontend] Pass API server count to each process (#23717) Signed-off-by: DarkLight1337 --- .../kernels/benchmark_w8a8_block_fp8.py | 2 +- examples/others/tensorize_vllm_model.py | 9 +-- .../test_api_server_process_manager.py | 2 +- tests/v1/test_external_lb_dp.py | 52 +++++++++++++++-- tests/v1/test_hybrid_lb_dp.py | 54 ++++++++++++++++-- tests/v1/test_internal_lb_dp.py | 57 ++++++++++++++++--- vllm/config/parallel.py | 25 ++++++++ vllm/engine/arg_utils.py | 9 ++- vllm/entrypoints/cli/serve.py | 18 ++---- vllm/entrypoints/openai/api_server.py | 38 ++++++++++--- vllm/multimodal/cache.py | 3 +- vllm/v1/engine/core_client.py | 3 +- 12 files changed, 221 insertions(+), 51 deletions(-) diff --git a/benchmarks/kernels/benchmark_w8a8_block_fp8.py b/benchmarks/kernels/benchmark_w8a8_block_fp8.py index df2b713e46dc..c6c8e0b0b936 100644 --- a/benchmarks/kernels/benchmark_w8a8_block_fp8.py +++ b/benchmarks/kernels/benchmark_w8a8_block_fp8.py @@ -11,13 +11,13 @@ from typing import Any import torch -import triton from tqdm import tqdm from vllm.model_executor.layers.quantization.utils.fp8_utils import ( _w8a8_block_fp8_matmul, ) from vllm.platforms import current_platform +from vllm.triton_utils import triton from vllm.utils import FlexibleArgumentParser mp.set_start_method("spawn", force=True) diff --git a/examples/others/tensorize_vllm_model.py b/examples/others/tensorize_vllm_model.py index 559c7c493aca..2b7f0beab227 100644 --- a/examples/others/tensorize_vllm_model.py +++ b/examples/others/tensorize_vllm_model.py @@ -1,8 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -import argparse -import dataclasses import json import logging import os @@ -327,12 +325,7 @@ def main(): if args.command == "serialize": - eng_args_dict = {f.name: getattr(args, f.name) for f in - dataclasses.fields(EngineArgs)} - - engine_args = EngineArgs.from_cli_args( - argparse.Namespace(**eng_args_dict) - ) + engine_args = EngineArgs.from_cli_args(args) input_dir = tensorizer_dir.rstrip('/') suffix = args.suffix if args.suffix else uuid.uuid4().hex diff --git a/tests/entrypoints/test_api_server_process_manager.py b/tests/entrypoints/test_api_server_process_manager.py index a993e24ff838..34b05ad17b02 100644 --- a/tests/entrypoints/test_api_server_process_manager.py +++ b/tests/entrypoints/test_api_server_process_manager.py @@ -60,7 +60,7 @@ def test_api_server_process_manager_init(api_server_args, with_stats_update): global WORKER_RUNTIME_SECONDS WORKER_RUNTIME_SECONDS = 0.5 - # Copy the args to avoid mutating the + # Copy the args to avoid mutating them args = api_server_args.copy() if not with_stats_update: diff --git a/tests/v1/test_external_lb_dp.py b/tests/v1/test_external_lb_dp.py index 4a5c47fead58..862a76f3c4e2 100644 --- a/tests/v1/test_external_lb_dp.py +++ b/tests/v1/test_external_lb_dp.py @@ -9,6 +9,7 @@ import openai # use the official client for correctness check import pytest import pytest_asyncio +import requests from tests.utils import RemoteOpenAIServer from vllm.platforms import current_platform @@ -70,6 +71,8 @@ def start_server(r: int, sargs: list[str]): sargs, auto_port=False, env_dict={ + "VLLM_SERVER_DEV_MODE": + "1", current_platform.device_control_env_var: ",".join( str( @@ -127,11 +130,19 @@ def default_server_args(): @pytest.fixture(scope="module", params=[1, 4]) -def servers(request, default_server_args): +def server_manager(request, default_server_args): api_server_count = request.param - with ExternalLBServerManager(MODEL_NAME, DP_SIZE, api_server_count, - default_server_args) as server_list: - yield server_list + server_manager = ExternalLBServerManager(MODEL_NAME, DP_SIZE, + api_server_count, + default_server_args) + + with server_manager: + yield server_manager + + +@pytest.fixture +def servers(server_manager): + return server_manager.servers @pytest_asyncio.fixture @@ -144,6 +155,39 @@ async def clients(servers: list[tuple[RemoteOpenAIServer, list[str]]]): ] +def _get_parallel_config(server: RemoteOpenAIServer): + response = requests.get(server.url_for("server_info?config_format=json")) + response.raise_for_status() + + vllm_config = response.json()["vllm_config"] + return vllm_config["parallel_config"] + + +def test_external_lb_server_info(server_manager): + servers = server_manager.servers + api_server_count = server_manager.api_server_count + + for i, (server, _) in enumerate(servers): + print(f"Testing {i=}") + + # Each request will hit one of the API servers + # `n_reqs` is set so that there is a good chance each server + # receives at least one request + n_reqs = 2 * api_server_count * api_server_count + parallel_configs = [ + _get_parallel_config(server) for _ in range(n_reqs) + ] + api_process_counts = [ + c["_api_process_count"] for c in parallel_configs + ] + api_process_ranks = [c["_api_process_rank"] for c in parallel_configs] + + assert all(c == api_server_count + for c in api_process_counts), api_process_counts + assert all(0 <= r < api_server_count + for r in api_process_ranks), api_process_ranks + + @pytest.mark.asyncio @pytest.mark.parametrize( "model_name", diff --git a/tests/v1/test_hybrid_lb_dp.py b/tests/v1/test_hybrid_lb_dp.py index 293b1257be6b..552436f818d7 100644 --- a/tests/v1/test_hybrid_lb_dp.py +++ b/tests/v1/test_hybrid_lb_dp.py @@ -9,6 +9,7 @@ import openai # use the official client for correctness check import pytest import pytest_asyncio +import requests from tests.utils import RemoteOpenAIServer from tests.v1.test_utils import check_request_balancing @@ -92,6 +93,8 @@ def start_server(node: int, sargs: list[str]): sargs, auto_port=False, env_dict={ + "VLLM_SERVER_DEV_MODE": + "1", current_platform.device_control_env_var: ",".join( str( @@ -150,12 +153,20 @@ def default_server_args(): @pytest.fixture(scope="module", params=[1, 4]) -def servers(request, default_server_args): +def server_manager(request, default_server_args): api_server_count = request.param - with HybridLBServerManager(MODEL_NAME, DP_SIZE, api_server_count, - default_server_args, DP_SIZE_LOCAL, - TP_SIZE) as server_list: - yield server_list + server_manager = HybridLBServerManager(MODEL_NAME, DP_SIZE, + api_server_count, + default_server_args, DP_SIZE_LOCAL, + TP_SIZE) + + with server_manager: + yield server_manager + + +@pytest.fixture +def servers(server_manager): + return server_manager.servers @pytest_asyncio.fixture @@ -168,6 +179,39 @@ async def clients(servers: list[tuple[RemoteOpenAIServer, list[str]]]): ] +def _get_parallel_config(server: RemoteOpenAIServer): + response = requests.get(server.url_for("server_info?config_format=json")) + response.raise_for_status() + + vllm_config = response.json()["vllm_config"] + return vllm_config["parallel_config"] + + +def test_hybrid_dp_server_info(server_manager): + servers = server_manager.servers + api_server_count = server_manager.api_server_count + + for i, (server, _) in enumerate(servers): + print(f"Testing {i=}") + + # Each request will hit one of the API servers + # `n_reqs` is set so that there is a good chance each server + # receives at least one request + n_reqs = 2 * api_server_count * api_server_count + parallel_configs = [ + _get_parallel_config(server) for _ in range(n_reqs) + ] + api_process_counts = [ + c["_api_process_count"] for c in parallel_configs + ] + api_process_ranks = [c["_api_process_rank"] for c in parallel_configs] + + assert all(c == api_server_count + for c in api_process_counts), api_process_counts + assert all(0 <= r < api_server_count + for r in api_process_ranks), api_process_ranks + + @pytest.mark.asyncio @pytest.mark.parametrize( "model_name", diff --git a/tests/v1/test_internal_lb_dp.py b/tests/v1/test_internal_lb_dp.py index 2b031865cad7..e965645711ee 100644 --- a/tests/v1/test_internal_lb_dp.py +++ b/tests/v1/test_internal_lb_dp.py @@ -10,6 +10,7 @@ import openai # use the official client for correctness check import pytest import pytest_asyncio +import requests from tests.utils import RemoteOpenAIServer from tests.v1.test_utils import check_request_balancing @@ -101,6 +102,8 @@ def start_server(sidx: int, r: int, sargs: list[str]): sargs, auto_port=False, env_dict={ + "VLLM_SERVER_DEV_MODE": + "1", current_platform.device_control_env_var: ",".join( str( @@ -214,7 +217,10 @@ def start_api_server(): self.model_name, api_server_args, auto_port=False, - env_dict={}) # No GPUs needed for API-only server + env_dict={ + "VLLM_SERVER_DEV_MODE": "1", + # No GPUs needed for API-only server + }) server.__enter__() print(f"API-only server started successfully with " f"{self.api_server_count} API servers") @@ -293,14 +299,21 @@ def default_server_args(): @pytest.fixture(scope="module", params=[1, 4]) -def servers(request, default_server_args): +def server_manager(request, default_server_args): api_server_count = request.param - with MultinodeInternalLBServerManager(MODEL_NAME, DP_SIZE, - api_server_count, - default_server_args, - DP_SIZE // NUM_NODES, - TP_SIZE) as server_list: - yield server_list + server_manager = MultinodeInternalLBServerManager(MODEL_NAME, DP_SIZE, + api_server_count, + default_server_args, + DP_SIZE // NUM_NODES, + TP_SIZE) + + with server_manager: + yield server_manager + + +@pytest.fixture +def servers(server_manager): + return server_manager.servers @pytest.fixture(scope="module", params=[1, 4]) @@ -331,6 +344,34 @@ async def api_only_client(api_only_servers: list[tuple[RemoteOpenAIServer, yield client +def _get_parallel_config(server: RemoteOpenAIServer): + response = requests.get(server.url_for("server_info?config_format=json")) + response.raise_for_status() + + vllm_config = response.json()["vllm_config"] + return vllm_config["parallel_config"] + + +def test_multinode_dp_server_info(server_manager): + head_server = server_manager.servers[0][0] + api_server_count = server_manager.api_server_count + + # Each request will hit one of the API servers + # `n_reqs` is set so that there is a good chance each server + # receives at least one request + n_reqs = 2 * api_server_count * api_server_count + parallel_configs = [ + _get_parallel_config(head_server) for _ in range(n_reqs) + ] + api_process_counts = [c["_api_process_count"] for c in parallel_configs] + api_process_ranks = [c["_api_process_rank"] for c in parallel_configs] + + assert all(c == api_server_count + for c in api_process_counts), api_process_counts + assert all(0 <= r < api_server_count + for r in api_process_ranks), api_process_ranks + + @pytest.mark.asyncio @pytest.mark.parametrize( "model_name", diff --git a/vllm/config/parallel.py b/vllm/config/parallel.py index 8e92e54a9678..37a41bf6de71 100644 --- a/vllm/config/parallel.py +++ b/vllm/config/parallel.py @@ -193,6 +193,25 @@ class is dynamically inherited by the worker class. This is used to inject not change by dcp, it simply reuse the GPUs of TP group, and tp_size needs to be divisible by dcp_size.""" + _api_process_count: int = 1 + """ + The number of API processes initialized. + + Note: + This is an internal config that is only valid for and + should only be set by API server scale-out. + """ + + _api_process_rank: int = 0 + """ + The rank of this API process, or `-1` for engine core processes + under API server scale-out. + + Note: + This is an internal config that is only valid for and + should only be set by API server scale-out. + """ + @property def world_size_across_dp(self) -> int: """world_size_across_dp is TPxPPxDP, it is the size of the world @@ -428,6 +447,12 @@ def __post_init__(self) -> None: if self.distributed_executor_backend is None and self.world_size == 1: self.distributed_executor_backend = "uni" + if not -1 <= self._api_process_rank < self._api_process_count: + raise ValueError( + "Invalid value of `_api_process_rank`. " + f"Expected to be `-1` or `[0, {self._api_process_count})`, " + f"but found: {self._api_process_rank}") + @property def use_ray(self) -> bool: return self.distributed_executor_backend == "ray" or ( diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index ecf4e486a016..7a4bb0d41d23 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -333,6 +333,8 @@ class EngineArgs: enable_eplb: bool = ParallelConfig.enable_eplb expert_placement_strategy: ExpertPlacementStrategy = \ ParallelConfig.expert_placement_strategy + _api_process_count: int = ParallelConfig._api_process_count + _api_process_rank: int = ParallelConfig._api_process_rank num_redundant_experts: int = EPLBConfig.num_redundant_experts eplb_window_size: int = EPLBConfig.window_size eplb_step_interval: int = EPLBConfig.step_interval @@ -952,7 +954,10 @@ def from_cli_args(cls, args: argparse.Namespace): # Get the list of attributes of this dataclass. attrs = [attr.name for attr in dataclasses.fields(cls)] # Set the attributes from the parsed arguments. - engine_args = cls(**{attr: getattr(args, attr) for attr in attrs}) + engine_args = cls(**{ + attr: getattr(args, attr) + for attr in attrs if hasattr(args, attr) + }) return engine_args def create_model_config(self) -> ModelConfig: @@ -1366,6 +1371,8 @@ def create_engine_config( worker_cls=self.worker_cls, worker_extension_cls=self.worker_extension_cls, decode_context_parallel_size=self.decode_context_parallel_size, + _api_process_count=self._api_process_count, + _api_process_rank=self._api_process_rank, ) speculative_config = self.create_speculative_config( diff --git a/vllm/entrypoints/cli/serve.py b/vllm/entrypoints/cli/serve.py index 803a3e004656..de47bf00932e 100644 --- a/vllm/entrypoints/cli/serve.py +++ b/vllm/entrypoints/cli/serve.py @@ -135,23 +135,20 @@ def signal_handler(signum, frame): def run_multi_api_server(args: argparse.Namespace): assert not args.headless - num_api_servers = args.api_server_count + num_api_servers: int = args.api_server_count assert num_api_servers > 0 - orig_mm_processor_cache_gb = args.mm_processor_cache_gb - if num_api_servers > 1: setup_multiprocess_prometheus() - # Not compatible with API server scale-out - args.mm_processor_cache_gb = 0 - listen_address, sock = setup_server(args) engine_args = vllm.AsyncEngineArgs.from_cli_args(args) + engine_args._api_process_count = num_api_servers + engine_args._api_process_rank = -1 + usage_context = UsageContext.OPENAI_API_SERVER vllm_config = engine_args.create_engine_config(usage_context=usage_context) - model_config = vllm_config.model_config if num_api_servers > 1: if not envs.VLLM_USE_V1: @@ -161,10 +158,6 @@ def run_multi_api_server(args: argparse.Namespace): raise ValueError("VLLM_ALLOW_RUNTIME_LORA_UPDATING cannot be used " "with api_server_count > 1") - if model_config.is_multimodal_model and orig_mm_processor_cache_gb > 0: - logger.warning("Multi-modal processor cache is disabled because " - "it is not compatible with `api_server_count > 1`.") - executor_class = Executor.get_class(vllm_config) log_stats = not engine_args.disable_log_stats @@ -221,9 +214,10 @@ def run_api_server_worker_proc(listen_address, client_config=None, **uvicorn_kwargs) -> None: """Entrypoint for individual API server worker processes.""" + client_config = client_config or {} + server_index = client_config.get("client_index", 0) # Set process title and add process-specific prefix to stdout and stderr. - server_index = client_config.get("client_index", 0) if client_config else 0 set_process_title("APIServer", str(server_index)) decorate_logs() diff --git a/vllm/entrypoints/openai/api_server.py b/vllm/entrypoints/openai/api_server.py index 11031cd616d2..b8ba7e81ef5f 100644 --- a/vllm/entrypoints/openai/api_server.py +++ b/vllm/entrypoints/openai/api_server.py @@ -17,13 +17,14 @@ from collections.abc import AsyncGenerator, AsyncIterator, Awaitable from contextlib import asynccontextmanager from http import HTTPStatus -from typing import Annotated, Any, Callable, Optional +from typing import Annotated, Any, Callable, Literal, Optional import prometheus_client import pydantic import regex as re import uvloop -from fastapi import APIRouter, Depends, FastAPI, Form, HTTPException, Request +from fastapi import (APIRouter, Depends, FastAPI, Form, HTTPException, Query, + Request) from fastapi.exceptions import RequestValidationError from fastapi.middleware.cors import CORSMiddleware from fastapi.responses import JSONResponse, Response, StreamingResponse @@ -166,6 +167,9 @@ async def build_async_engine_client( # Context manager to handle engine_client lifecycle # Ensures everything is shutdown and cleaned up on error/exit engine_args = AsyncEngineArgs.from_cli_args(args) + if client_config: + engine_args._api_process_count = client_config.get("client_count", 1) + engine_args._api_process_rank = client_config.get("client_index", 0) if disable_frontend_multiprocessing is None: disable_frontend_multiprocessing = bool( @@ -209,8 +213,12 @@ async def build_async_engine_client_from_engine_args( from vllm.v1.engine.async_llm import AsyncLLM async_llm: Optional[AsyncLLM] = None - client_count = client_config.pop("client_count") if client_config else 1 - client_index = client_config.pop("client_index") if client_config else 0 + + # Don't mutate the input client_config + client_config = dict(client_config) if client_config else {} + client_count = client_config.pop("client_count", 1) + client_index = client_config.pop("client_index", 0) + try: async_llm = AsyncLLM.from_vllm_config( vllm_config=vllm_config, @@ -956,9 +964,22 @@ async def do_rerank_v2(request: RerankRequest, raw_request: Request): logger.warning("SECURITY WARNING: Development endpoints are enabled! " "This should NOT be used in production!") + PydanticVllmConfig = pydantic.TypeAdapter(VllmConfig) + @router.get("/server_info") - async def show_server_info(raw_request: Request): - server_info = {"vllm_config": str(raw_request.app.state.vllm_config)} + async def show_server_info( + raw_request: Request, + config_format: Annotated[Literal["text", "json"], + Query()] = "text", + ): + vllm_config: VllmConfig = raw_request.app.state.vllm_config + server_info = { + "vllm_config": + str(vllm_config) + if config_format == "text" else PydanticVllmConfig.dump_python( + vllm_config, mode="json", fallback=str) + # fallback=str is needed to handle e.g. torch.dtype + } return JSONResponse(content=server_info) @router.post("/reset_prefix_cache") @@ -1856,8 +1877,6 @@ async def run_server_worker(listen_address, if args.tool_parser_plugin and len(args.tool_parser_plugin) > 3: ToolParserManager.import_tool_parser(args.tool_parser_plugin) - server_index = client_config.get("client_index", 0) if client_config else 0 - # Load logging config for uvicorn if specified log_config = load_log_config(args.log_config_file) if log_config is not None: @@ -1873,7 +1892,8 @@ async def run_server_worker(listen_address, vllm_config = await engine_client.get_vllm_config() await init_app_state(engine_client, vllm_config, app.state, args) - logger.info("Starting vLLM API server %d on %s", server_index, + logger.info("Starting vLLM API server %d on %s", + vllm_config.parallel_config._api_process_rank, listen_address) shutdown_task = await serve_http( app, diff --git a/vllm/multimodal/cache.py b/vllm/multimodal/cache.py index 297b4c7fa7fb..642ec3fd7e3f 100644 --- a/vllm/multimodal/cache.py +++ b/vllm/multimodal/cache.py @@ -494,7 +494,8 @@ def _enable_processor_cache( def _enable_ipc_cache(vllm_config: "VllmConfig") -> bool: parallel_config = vllm_config.parallel_config - supports_ipc_cache = (parallel_config.data_parallel_size == 1 + supports_ipc_cache = ((parallel_config._api_process_count == 1 + and parallel_config.data_parallel_size == 1) or parallel_config.data_parallel_external_lb) return supports_ipc_cache diff --git a/vllm/v1/engine/core_client.py b/vllm/v1/engine/core_client.py index bb0f37c6e026..a84b0e55105b 100644 --- a/vllm/v1/engine/core_client.py +++ b/vllm/v1/engine/core_client.py @@ -437,7 +437,7 @@ def __init__( self.engines_running = False self.stats_update_address: Optional[str] = None - if client_addresses is not None: + if client_addresses: # Engines are managed externally to this client. input_address = client_addresses["input_address"] output_address = client_addresses["output_address"] @@ -774,6 +774,7 @@ def __init__(self, client_addresses=client_addresses, ) + self.client_count = client_count self.client_index = client_index self.outputs_queue = asyncio.Queue[Union[EngineCoreOutputs, Exception]]() From 2821986450bc31869714885ed4203650a42f3cb0 Mon Sep 17 00:00:00 2001 From: Jee Jee Li Date: Sat, 20 Sep 2025 02:01:28 +0800 Subject: [PATCH 153/518] [Core] Modify the initialization parameters of the lora manager (#25249) Signed-off-by: Jee Jee Li --- tests/lora/test_lora_manager.py | 36 ++++++++++++++++++----- tests/lora/utils.py | 2 +- vllm/lora/{lora.py => lora_weights.py} | 0 vllm/lora/models.py | 2 +- vllm/lora/worker_manager.py | 23 ++++++++------- vllm/v1/worker/cpu_model_runner.py | 5 ++-- vllm/v1/worker/gpu_model_runner.py | 5 +--- vllm/v1/worker/lora_model_runner_mixin.py | 15 ++-------- vllm/v1/worker/tpu_model_runner.py | 4 +-- vllm/worker/model_runner.py | 11 ++----- 10 files changed, 51 insertions(+), 52 deletions(-) rename vllm/lora/{lora.py => lora_weights.py} (100%) diff --git a/tests/lora/test_lora_manager.py b/tests/lora/test_lora_manager.py index a5802c108c6b..d7684fbf34ab 100644 --- a/tests/lora/test_lora_manager.py +++ b/tests/lora/test_lora_manager.py @@ -8,11 +8,12 @@ from safetensors.torch import load_file from torch import nn +from vllm.config import ModelConfig, VllmConfig from vllm.config.lora import LoRAConfig from vllm.lora.layers import (ColumnParallelLinearWithLoRA, MergedColumnParallelLinearWithLoRA, RowParallelLinearWithLoRA) -from vllm.lora.lora import LoRALayerWeights, PackedLoRALayerWeights +from vllm.lora.lora_weights import LoRALayerWeights, PackedLoRALayerWeights from vllm.lora.models import (LoRAMapping, LoRAModel, LoRAModelManager, LRUCacheLoRAModelManager) from vllm.lora.peft_helper import PEFTHelper @@ -435,10 +436,19 @@ def test_lru_cache_worker_adapter_manager(dist_init, dummy_model, device, target_modules=["layer1.dense1", "dense2"], lora_dtype=DEFAULT_DTYPE, ) + + model_config = ModelConfig(max_model_len=16) + vllm_config = VllmConfig(model_config=model_config, + lora_config=lora_config) + + vllm_config.scheduler_config.max_num_seqs = 4 + vllm_config.scheduler_config.max_num_batched_tokens = 2 worker_adapter_manager = LRUCacheWorkerLoRAManager( - 4, 2, - dummy_model.unpadded_vocab_size - lora_config.lora_extra_vocab_size, - lora_config, device, EMBEDDING_MODULES, EMBEDDING_PADDING_MODULES) + vllm_config, device, EMBEDDING_MODULES, EMBEDDING_PADDING_MODULES) + + worker_adapter_manager.max_num_seqs = 4 + worker_adapter_manager.max_num_batched_tokens = 2 + worker_adapter_manager.create_lora_manager(dummy_model) mapping = LoRAMapping([], []) @@ -517,10 +527,20 @@ def test_worker_adapter_manager(dist_init, dummy_model_gate_up, device, max_cpu_loras=4, max_loras=4, lora_dtype=DEFAULT_DTYPE) - worker_adapter_manager = WorkerLoRAManager( - 4, 2, dummy_model_gate_up.unpadded_vocab_size - - lora_config.lora_extra_vocab_size, lora_config, device, - EMBEDDING_MODULES, EMBEDDING_PADDING_MODULES) + + model_config = ModelConfig(max_model_len=16) + vllm_config = VllmConfig(model_config=model_config, + lora_config=lora_config) + + vllm_config.scheduler_config.max_num_seqs = 4 + vllm_config.scheduler_config.max_num_batched_tokens = 2 + + worker_adapter_manager = WorkerLoRAManager(vllm_config, device, + EMBEDDING_MODULES, + EMBEDDING_PADDING_MODULES) + worker_adapter_manager.vocab_size = ( + dummy_model_gate_up.unpadded_vocab_size - + lora_config.lora_extra_vocab_size) worker_adapter_manager.create_lora_manager(dummy_model_gate_up) dummy_lora_files = f"{tmp_path}/lora_adapter" diff --git a/tests/lora/utils.py b/tests/lora/utils.py index 7cda90787b6f..ab475904d493 100644 --- a/tests/lora/utils.py +++ b/tests/lora/utils.py @@ -9,7 +9,7 @@ import torch from safetensors.torch import save_file -from vllm.lora.lora import LoRALayerWeights, PackedLoRALayerWeights +from vllm.lora.lora_weights import LoRALayerWeights, PackedLoRALayerWeights class DummyLoRAManager: diff --git a/vllm/lora/lora.py b/vllm/lora/lora_weights.py similarity index 100% rename from vllm/lora/lora.py rename to vllm/lora/lora_weights.py diff --git a/vllm/lora/models.py b/vllm/lora/models.py index 25f90f2fa932..9ea46be65cff 100644 --- a/vllm/lora/models.py +++ b/vllm/lora/models.py @@ -14,7 +14,7 @@ from vllm.config.lora import LoRAConfig from vllm.logger import init_logger from vllm.lora.layers import BaseLayerWithLoRA, LoRAMapping -from vllm.lora.lora import LoRALayerWeights, PackedLoRALayerWeights +from vllm.lora.lora_weights import LoRALayerWeights, PackedLoRALayerWeights from vllm.lora.peft_helper import PEFTHelper from vllm.lora.punica_wrapper import get_punica_wrapper from vllm.lora.utils import (from_layer, from_layer_logits_processor, diff --git a/vllm/lora/worker_manager.py b/vllm/lora/worker_manager.py index e27b7d5fcf22..cdb2f86611d8 100644 --- a/vllm/lora/worker_manager.py +++ b/vllm/lora/worker_manager.py @@ -6,7 +6,7 @@ import torch -from vllm.config.lora import LoRAConfig +from vllm.config import VllmConfig from vllm.logger import init_logger from vllm.lora.models import (LoRAModel, LoRAModelManager, LRUCacheLoRAModelManager, create_lora_manager) @@ -27,25 +27,26 @@ class WorkerLoRAManager: def __init__( self, - max_num_seqs: int, - max_num_batched_tokens: int, - vocab_size: int, - lora_config: LoRAConfig, + vllm_config: VllmConfig, device: torch.device, embedding_modules: dict[str, str], embedding_padding_modules: list[str], lora_model_cls: type[LoRAModel] = LoRAModel, - max_position_embeddings: Optional[int] = None, ): self._lora_model_cls = lora_model_cls self.embedding_modules = embedding_modules self.embedding_padding_modules = embedding_padding_modules self._cached_dummy_lora: Union[None, Literal[False], LoRAModel] = False - self.max_num_seqs = max_num_seqs - self.max_num_batched_tokens = max_num_batched_tokens - self.vocab_size = vocab_size - self.lora_config = lora_config - self.max_position_embeddings = max_position_embeddings + self.max_num_seqs = vllm_config.scheduler_config.max_num_seqs + self.max_num_batched_tokens = ( + vllm_config.scheduler_config.max_num_batched_tokens) + self.vocab_size = vllm_config.model_config.get_vocab_size() + self.lora_config = vllm_config.lora_config + + # Use get_text_config() in case of multimodal models + text_config = vllm_config.model_config.hf_config.get_text_config() + + self.max_position_embeddings = text_config.max_position_embeddings self.device = device # Lazily initialized by create_lora_manager. self._adapter_manager: LoRAModelManager diff --git a/vllm/v1/worker/cpu_model_runner.py b/vllm/v1/worker/cpu_model_runner.py index cd0f0af43e7e..6a97f7ebc3fc 100644 --- a/vllm/v1/worker/cpu_model_runner.py +++ b/vllm/v1/worker/cpu_model_runner.py @@ -107,9 +107,8 @@ def load_model(self, eep_scale_up: bool = False) -> None: self.model = get_model(vllm_config=self.vllm_config) if self.lora_config: - self.model = self.load_lora_model(self.model, self.model_config, - self.scheduler_config, - self.lora_config, self.device) + self.model = self.load_lora_model(self.model, self.vllm_config, + self.device) def get_model(self) -> nn.Module: return self.model diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index 3ee2160a42ff..9d0f26266f0c 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -2552,10 +2552,7 @@ def load_model(self, eep_scale_up: bool = False) -> None: self.model = model_loader.load_model( vllm_config=self.vllm_config, model_config=self.model_config) if self.lora_config: - self.model = self.load_lora_model(self.model, - self.model_config, - self.scheduler_config, - self.lora_config, + self.model = self.load_lora_model(self.model, self.vllm_config, self.device) if hasattr(self, "drafter"): logger.info("Loading drafter model...") diff --git a/vllm/v1/worker/lora_model_runner_mixin.py b/vllm/v1/worker/lora_model_runner_mixin.py index 01d5f0525c4e..e416f50322f4 100644 --- a/vllm/v1/worker/lora_model_runner_mixin.py +++ b/vllm/v1/worker/lora_model_runner_mixin.py @@ -11,7 +11,7 @@ import torch import torch.nn as nn -from vllm.config import ModelConfig, SchedulerConfig +from vllm.config import VllmConfig from vllm.config.lora import LoRAConfig from vllm.logger import init_logger from vllm.lora.layers import LoRAMapping @@ -31,9 +31,7 @@ class LoRAModelRunnerMixin: LORA_WARMUP_RANK = 8 - def load_lora_model(self, model: nn.Module, model_config: ModelConfig, - scheduler_config: SchedulerConfig, - lora_config: LoRAConfig, + def load_lora_model(self, model: nn.Module, vllm_config: VllmConfig, device: torch.device) -> nn.Module: if not supports_lora(model): @@ -44,19 +42,12 @@ def load_lora_model(self, model: nn.Module, model_config: ModelConfig, logger.warning("Regarding multimodal models, vLLM currently " "only supports adding LoRA to language model.") - # Use get_text_config() in case of multimodal models - text_config = model_config.hf_config.get_text_config() - # Add LoRA Manager to the Model Runner self.lora_manager = LRUCacheWorkerLoRAManager( - scheduler_config.max_num_seqs, - scheduler_config.max_num_batched_tokens, - model_config.get_vocab_size(), - lora_config, + vllm_config, device, model.embedding_modules, model.embedding_padding_modules, - max_position_embeddings=text_config.max_position_embeddings, ) return self.lora_manager.create_lora_manager(model) diff --git a/vllm/v1/worker/tpu_model_runner.py b/vllm/v1/worker/tpu_model_runner.py index 01a8e5c3f0db..48070c1e3e7c 100644 --- a/vllm/v1/worker/tpu_model_runner.py +++ b/vllm/v1/worker/tpu_model_runner.py @@ -1178,9 +1178,7 @@ def load_model(self) -> None: "or sharding the weights on more chips. " f"See the detailed error: {e}") from e if self.lora_config is not None: - model = self.load_lora_model(model, self.model_config, - self.scheduler_config, - self.lora_config, self.device) + model = self.load_lora_model(model, self.vllm_config, self.device) replace_set_lora(model) # Sync all pending XLA execution during model initialization and weight diff --git a/vllm/worker/model_runner.py b/vllm/worker/model_runner.py index 594382650e3c..c91c871766cf 100644 --- a/vllm/worker/model_runner.py +++ b/vllm/worker/model_runner.py @@ -1078,20 +1078,13 @@ def load_model(self) -> None: "Regarding multimodal models, vLLM currently " "only supports adding LoRA to language model.") - # Use get_text_config() in case of multimodal models - text_config = self.model_config.hf_config.get_text_config() - self.lora_manager = LRUCacheWorkerLoRAManager( - self.scheduler_config.max_num_seqs, - self.scheduler_config.max_num_batched_tokens, - self.vocab_size, - self.lora_config, + self.vllm_config, self.device, self.model.embedding_modules, self.model.embedding_padding_modules, - max_position_embeddings=text_config. - max_position_embeddings, ) + self.model = self.lora_manager.create_lora_manager(self.model) time_after_load = time.perf_counter() From d90e212a3a586b8a6ca9a424868abd0e6ef6779a Mon Sep 17 00:00:00 2001 From: LJH-LBJ <98734602+LJH-LBJ@users.noreply.github.com> Date: Sat, 20 Sep 2025 02:15:13 +0800 Subject: [PATCH 154/518] Remove Redundant Assignment in Qwen3_VisionPatchMerger (#25224) Signed-off-by: Junhong Co-authored-by: Junhong Co-authored-by: Roger Wang --- vllm/model_executor/models/qwen3_vl.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/vllm/model_executor/models/qwen3_vl.py b/vllm/model_executor/models/qwen3_vl.py index c224b78e2c27..17375ff0959d 100644 --- a/vllm/model_executor/models/qwen3_vl.py +++ b/vllm/model_executor/models/qwen3_vl.py @@ -223,9 +223,7 @@ def __init__( if norm_layer is None: norm_layer = partial(nn.LayerNorm, eps=1e-6) - self.use_postshuffle_norm = use_postshuffle_norm - self.norm = norm_layer( - self.hidden_size if use_postshuffle_norm else context_dim) + self.norm = norm_layer(context_dim) self.linear_fc1 = ColumnParallelLinear(self.hidden_size, self.hidden_size, bias=True, From 12aed7e453aea713495bd7cced6f9e2bb78aaa79 Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Fri, 19 Sep 2025 19:15:22 +0100 Subject: [PATCH 155/518] Encoder model support for the Transformers backend (#25174) Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- docs/models/supported_models.md | 37 ++++++++++----- tests/models/test_transformers.py | 36 ++++++++++++++- vllm/attention/backends/abstract.py | 8 ++-- vllm/model_executor/models/transformers.py | 54 +++++++++++++++++++--- 4 files changed, 111 insertions(+), 24 deletions(-) diff --git a/docs/models/supported_models.md b/docs/models/supported_models.md index 3a6738a27be0..cbc0a56a645e 100644 --- a/docs/models/supported_models.md +++ b/docs/models/supported_models.md @@ -17,9 +17,24 @@ These models are what we list in [supported-text-models][supported-text-models] ### Transformers -vLLM also supports model implementations that are available in Transformers. This does not currently work for all models, but most decoder language models and common vision language models are supported! Vision-language models currently accept only image inputs. Support for video inputs will be added in future releases. +vLLM also supports model implementations that are available in Transformers. You should expect the performance of a Transformers model implementation used in vLLM to be within <1% of the performance of a dedicated vLLM model implementation. We call this feature the "Transformers backend". -To check if the modeling backend is Transformers, you can simply do this: +Currently, the Transformers backend works for the following: + +- Modalities: embedding models, language models and vision-language models* +- Architectures: encoder-only, decoder-only +- Attention types: full attention and/or sliding attention + +_*Vision-language models currently accept only image inputs. Support for video inputs will be added in a future release._ + +If the Transformers model implementation follows all the steps in [writing a custom model](#writing-custom-models) then, when used with the Transformers backend, it will be compatible with the following features of vLLM: + +- All the features listed in the [compatibility matrix](../features/compatibility_matrix.md#feature-x-feature) +- Any combination of the following vLLM parallelisation schemes: + - Pipeline parallel + - Tensor parallel + +Checking if the modeling backend is Transformers is as simple as: ```python from vllm import LLM @@ -27,16 +42,12 @@ llm = LLM(model=...) # Name or path of your model llm.apply_model(lambda model: print(type(model))) ``` -If it is `TransformersForCausalLM` or `TransformersForMultimodalLM` then it means it's based on Transformers! +If the printed type starts with `Transformers...` then it's using the Transformers model implementation! -!!! tip - You can force the use of `TransformersForCausalLM` by setting `model_impl="transformers"` for [offline-inference](../serving/offline_inference.md) or `--model-impl transformers` for the [openai-compatible-server](../serving/openai_compatible_server.md). - -!!! note - vLLM may not fully optimise the Transformers implementation so you may see degraded performance if comparing a native model to a Transformers model in vLLM. +If a model has a vLLM implementation but you would prefer to use the Transformers implementation via the Transformers backend, set `model_impl="transformers"` for [offline inference](../serving/offline_inference.md) or `--model-impl transformers` for the [online serving](../serving/openai_compatible_server.md). !!! note - In case of vision language models if you are loading with `dtype="auto"`, vLLM loads the whole model with config's `dtype` if it exists. In contrast the native Transformers will respect the `dtype` attribute of each backbone in the model. That might cause a slight difference in performance. + For vision-language models, if you are loading with `dtype="auto"`, vLLM loads the whole model with config's `dtype` if it exists. In contrast the native Transformers will respect the `dtype` attribute of each backbone in the model. That might cause a slight difference in performance. #### Custom models @@ -66,10 +77,11 @@ This section details the necessary modifications to make to a Transformers compa To make your model compatible with the Transformers backend, it needs: 1. `kwargs` passed down through all modules from `MyModel` to `MyAttention`. + 1. If your model is encoder-only, you must also add `is_causal = False` to `MyAttention`. 2. `MyAttention` must use `ALL_ATTENTION_FUNCTIONS` to call attention. 3. `MyModel` must contain `_supports_attention_backend = True`. -
+
modeling_my_model.py ```python @@ -78,6 +90,7 @@ from transformers import PreTrainedModel from torch import nn class MyAttention(nn.Module): + is_causal = False # Only do this for encoder-only models def forward(self, hidden_states, **kwargs): ... @@ -101,13 +114,13 @@ Here is what happens in the background when this model is loaded: 1. The config is loaded. 2. `MyModel` Python class is loaded from the `auto_map` in config, and we check that the model `is_backend_compatible()`. -3. `MyModel` is loaded into `TransformersForCausalLM` or `TransformersForMultimodalLM` (see ) which sets `self.config._attn_implementation = "vllm"` so that vLLM's attention layer is used. +3. `MyModel` is loaded into one of the Transformers backend classes in which sets `self.config._attn_implementation = "vllm"` so that vLLM's attention layer is used. That's it! For your model to be compatible with vLLM's tensor parallel and/or pipeline parallel features, you must add `base_model_tp_plan` and/or `base_model_pp_plan` to your model's config class: -
+
configuration_my_model.py ```python diff --git a/tests/models/test_transformers.py b/tests/models/test_transformers.py index ba9c3bebc437..1817d4aeee9f 100644 --- a/tests/models/test_transformers.py +++ b/tests/models/test_transformers.py @@ -9,7 +9,7 @@ from ..conftest import HfRunner, VllmRunner from ..utils import multi_gpu_test, prep_prompts -from .utils import check_logprobs_close +from .utils import check_embeddings_close, check_logprobs_close def check_implementation( @@ -165,6 +165,40 @@ def test_embed_loading(vllm_runner, model): assert model_config.using_transformers_backend() +@pytest.mark.parametrize( + "model", + [ + # Encoder model + "BAAI/bge-base-en-v1.5", + ]) +def test_embed_correctness(hf_runner, vllm_runner, example_prompts, model): + import transformers + from packaging.version import Version + installed = Version(transformers.__version__) + required = Version("4.57.0.dev0") + if installed < required: + pytest.skip("Encoder models with the Transformers backend require " + f"transformers>={required}, but got {installed}") + + with vllm_runner(model, max_model_len=512, + model_impl="transformers") as vllm_model: + model_config = vllm_model.llm.llm_engine.model_config + assert model_config.using_transformers_backend() + + vllm_outputs = vllm_model.embed(example_prompts) + + with hf_runner(model, is_sentence_transformer=True) as hf_model: + hf_outputs = hf_model.encode(example_prompts) + + check_embeddings_close( + embeddings_0_lst=hf_outputs, + embeddings_1_lst=vllm_outputs, + name_0="hf", + name_1="vllm", + tol=1e-2, + ) + + @pytest.mark.parametrize( "model", ["jason9693/Qwen2.5-1.5B-apeach"], diff --git a/vllm/attention/backends/abstract.py b/vllm/attention/backends/abstract.py index 75bcdc4bbcf0..dfde67e1713c 100644 --- a/vllm/attention/backends/abstract.py +++ b/vllm/attention/backends/abstract.py @@ -23,14 +23,14 @@ class AttentionType: Attention type. Use string to be compatible with `torch.compile`. """ - # Decoder attention between previous layer Q/K/V DECODER = "decoder" - # Encoder attention between previous layer Q/K/V for encoder-decoder + """Decoder attention between previous layer Q/K/V.""" ENCODER = "encoder" - # Encoder attention between previous layer Q/K/V + """Encoder attention between previous layer Q/K/V for encoder-decoder.""" ENCODER_ONLY = "encoder_only" - # Attention between dec. Q and enc. K/V for encoder-decoder + """Encoder attention between previous layer Q/K/V.""" ENCODER_DECODER = "encoder_decoder" + """Attention between dec. Q and enc. K/V for encoder-decoder.""" class AttentionBackend(ABC): diff --git a/vllm/model_executor/models/transformers.py b/vllm/model_executor/models/transformers.py index 4f51441e28ef..f40a20dee63d 100644 --- a/vllm/model_executor/models/transformers.py +++ b/vllm/model_executor/models/transformers.py @@ -27,7 +27,7 @@ PreTrainedModel) from transformers.modeling_utils import ALL_ATTENTION_FUNCTIONS -from vllm.attention import Attention +from vllm.attention import Attention, AttentionType from vllm.compilation.decorators import support_torch_compile from vllm.config import (CacheConfig, DeviceConfig, ModelConfig, ParallelConfig, VllmConfig) @@ -452,8 +452,9 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): self.pp_rank = self.pp_group.rank_in_group self.tp_size = get_tensor_model_parallel_world_size() - # To be updated in child classes for use in `load_weights` - self.skip_prefixes: Optional[list[str]] = None + # Weights to skip in `self.load_weights` + self.skip_prefixes: list[str] = [] + self.skip_substrs: list[str] = [] # Set correct attn and init on "meta" to delay allocating GPU tensors # TODO: @raushan, use the public `model.set_attn_implementation()` @@ -596,7 +597,10 @@ def _tensor_parallel(module: nn.Module, _tensor_parallel(self.model) - def create_attention_instances(self) -> dict[int, Attention]: + def create_attention_instances( + self, + attn_type: AttentionType = AttentionType.DECODER + ) -> dict[int, Attention]: """ Create `Attention` instances to inform KV cache allocation. """ @@ -625,7 +629,8 @@ def create_attention_instances(self) -> dict[int, Attention]: cache_config=self.cache_config, quant_config=self.quant_config, per_layer_sliding_window=per_layer_sliding_window, - prefix=f"{i}.attn") + prefix=f"{i}.attn", + attn_type=attn_type) return attention_instances def init_parameters(self, module: nn.Module): @@ -685,7 +690,11 @@ def forward( def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]: - loader = AutoWeightsLoader(self, skip_prefixes=self.skip_prefixes) + loader = AutoWeightsLoader( + self, + skip_prefixes=self.skip_prefixes, + skip_substrs=self.skip_substrs, + ) return loader.load_weights(weights, mapper=self.hf_to_vllm_mapper) @@ -700,6 +709,37 @@ class TransformersModel(TransformersBase): "model.score": "score", }) + def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): + super().__init__(vllm_config=vllm_config, prefix=prefix) + + # Some encoder models have the position_ids buffer in the checkpoint + # vLLM will always pass position_ids as an argument, so we skip loading + # the buffer if it exists + self.skip_substrs.append("position_ids") + + def create_attention_instances( + self, attn_type: AttentionType = AttentionType.DECODER): + # TODO(hmellor): Better way to detect encoder models + # In encoder models, the attention layers will have `is_causal=False` + is_encoder = lambda m: not getattr(m, "is_causal", True) + # vLLM does not support encoder-decoder models, so if any encoder layer + # is found, we assume the whole model is an encoder model + if any(is_encoder(m) for m in self.model.modules()): + attn_type = AttentionType.ENCODER_ONLY + + # Check minimum transformers version for encoder models support + if attn_type == AttentionType.ENCODER_ONLY: + import transformers + from packaging.version import Version + installed = Version(transformers.__version__) + required = Version("4.57.0.dev0") + if installed < required: + raise ValueError( + "Encoder models with the Transformers backend require " + f"transformers>={required}, but got {installed}") + + return super().create_attention_instances(attn_type) + @support_torch_compile(enable_if=can_enable_torch_compile) class TransformersForCausalLM(TransformersBase): @@ -710,7 +750,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): # Tell `TransformersBase.load_weights` to skip # `lm_head` if the model has tied word embeddings if self.text_config.tie_word_embeddings: - self.skip_prefixes = ["lm_head."] + self.skip_prefixes.append("lm_head.") if get_pp_group().is_last_rank: self.unpadded_vocab_size = self.text_config.vocab_size From 47fd08aaf9fe6616d5daf1f30c0377d8b8b7cf21 Mon Sep 17 00:00:00 2001 From: Chauncey Date: Sat, 20 Sep 2025 02:16:32 +0800 Subject: [PATCH 156/518] [CI/Build] fix test function_calling (#25072) Signed-off-by: chaunceyjiang --- tests/entrypoints/openai/test_response_api_with_harmony.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/entrypoints/openai/test_response_api_with_harmony.py b/tests/entrypoints/openai/test_response_api_with_harmony.py index 8d974d56b445..40a22c04b08a 100644 --- a/tests/entrypoints/openai/test_response_api_with_harmony.py +++ b/tests/entrypoints/openai/test_response_api_with_harmony.py @@ -515,6 +515,7 @@ async def test_function_calling(client: OpenAI, model_name: str): model=model_name, input="What's the weather like in Paris today?", tools=tools, + temperature=0.0, ) assert response is not None assert response.status == "completed" From 2506ce5189fa67fea905c1f18ca735191cec0d29 Mon Sep 17 00:00:00 2001 From: Jialin Ouyang Date: Fri, 19 Sep 2025 11:22:53 -0700 Subject: [PATCH 157/518] [Core][Prefix Hash] Fix prefix hash metrics sliding window maintainance (#24990) Signed-off-by: Jialin Ouyang --- tests/v1/core/test_kv_cache_utils.py | 46 +++++++++++++++++++++++----- vllm/v1/core/kv_cache_utils.py | 13 ++++++-- 2 files changed, 50 insertions(+), 9 deletions(-) diff --git a/tests/v1/core/test_kv_cache_utils.py b/tests/v1/core/test_kv_cache_utils.py index 319e6e84fba1..4bf6bbbfeae2 100644 --- a/tests/v1/core/test_kv_cache_utils.py +++ b/tests/v1/core/test_kv_cache_utils.py @@ -513,27 +513,27 @@ def test_hash_request_tokens_no_mm_inputs(hash_fn): assert block_hashes[1] == hash_fn((block_hashes[0], (3, 4, 5), None)) +def _stats(requests: int, queries: int, hits: int) -> PrefixCacheStats: + return PrefixCacheStats(requests=requests, queries=queries, hits=hits) + + def test_metrics(): """ Test the prefix caching metrics. """ - - def stats(requests, queries, hits): - return PrefixCacheStats(requests=requests, queries=queries, hits=hits) - metrics = PrefixCachingMetrics(max_recent_requests=5) assert metrics.hit_rate == 0.0 - metrics.observe(stats(1, 20, 9)) + metrics.observe(_stats(1, 20, 9)) # 9 / 20 = 0.45 assert metrics.hit_rate == 0.45 - metrics.observe(stats(4, 80, 16)) + metrics.observe(_stats(4, 80, 16)) # 25 / 100 = 0.25 assert metrics.hit_rate == 0.25 - metrics.observe(stats(1, 10, 2)) + metrics.observe(_stats(1, 10, 2)) # Remove (20, 9) and add (10, 2): 18 / 90 = 0.2 assert metrics.aggregated_requests == 5 @@ -549,6 +549,38 @@ def stats(requests, queries, hits): assert not metrics.query_queue +def test_metrics_empty_stats(): + """ + Test the prefix caching metrics with empty stats. + """ + metrics = PrefixCachingMetrics(max_recent_requests=5) + metrics.observe(_stats(0, 0, 0)) + metrics.observe(_stats(1, 20, 9)) + metrics.observe(_stats(0, 0, 0)) + metrics.observe(_stats(4, 80, 16)) + metrics.observe(_stats(0, 0, 0)) + metrics.observe(_stats(1, 10, 2)) + # Remove (20, 9) and add (10, 2): 18 / 90 = 0.2 + assert metrics.aggregated_requests == 5 + assert metrics.aggregated_query_total == 90 + assert metrics.aggregated_query_hit == 18 + assert metrics.hit_rate == 0.2 + + # Only the latest added stats preserved 10 / 20 = 0.5 + metrics.observe(_stats(11, 20, 10)) + assert metrics.aggregated_requests == 11 + assert metrics.aggregated_query_total == 20 + assert metrics.aggregated_query_hit == 10 + assert metrics.hit_rate == 0.5 + + # Only the latest added stats preserved 30 / 40 = 0.75 + metrics.observe(_stats(22, 40, 30)) + assert metrics.aggregated_requests == 22 + assert metrics.aggregated_query_total == 40 + assert metrics.aggregated_query_hit == 30 + assert metrics.hit_rate == 0.75 + + def test_get_kv_cache_configs_multiple_workers(): model_config = ModelConfig(max_model_len=16) vllm_config = VllmConfig(model_config=model_config) diff --git a/vllm/v1/core/kv_cache_utils.py b/vllm/v1/core/kv_cache_utils.py index 9fab36aba91b..bc2ec5e42ea2 100644 --- a/vllm/v1/core/kv_cache_utils.py +++ b/vllm/v1/core/kv_cache_utils.py @@ -127,14 +127,23 @@ def observe(self, stats: PrefixCacheStats): if stats.reset: self.reset() + # DO NOT appending empty stats to avoid helpful info get kicked out + # due to sliding window. + if stats.requests == 0: + return + # Update the metrics. self.query_queue.append((stats.requests, stats.queries, stats.hits)) self.aggregated_requests += stats.requests self.aggregated_query_total += stats.queries self.aggregated_query_hit += stats.hits - # Remove the oldest stats if the number of requests exceeds. - if self.aggregated_requests > self.max_recent_requests: + # Remove the oldest stats until number of requests does not exceed + # the limit. + # NOTE: We preserve the latest added stats regardless. + while len( + self.query_queue + ) > 1 and self.aggregated_requests > self.max_recent_requests: old_requests, old_queries, old_hits = self.query_queue.popleft() self.aggregated_requests -= old_requests self.aggregated_query_total -= old_queries From 138f0d1e752d3a35cd959f3df8bf00370a2ace7b Mon Sep 17 00:00:00 2001 From: samzong Date: Sat, 20 Sep 2025 02:32:27 +0800 Subject: [PATCH 158/518] [Docs] add __init__.py to vllm/model_executor/layers/quantization/compressed_tensors/transform (#24974) Signed-off-by: samzong --- .../quantization/compressed_tensors/transform/__init__.py | 0 .../compressed_tensors/transform/schemes/__init__.py | 0 vllm/model_executor/models/blip2.py | 2 +- vllm/model_executor/models/llava.py | 2 +- vllm/model_executor/models/llava_next.py | 5 +++-- 5 files changed, 5 insertions(+), 4 deletions(-) create mode 100644 vllm/model_executor/layers/quantization/compressed_tensors/transform/__init__.py create mode 100644 vllm/model_executor/layers/quantization/compressed_tensors/transform/schemes/__init__.py diff --git a/vllm/model_executor/layers/quantization/compressed_tensors/transform/__init__.py b/vllm/model_executor/layers/quantization/compressed_tensors/transform/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/vllm/model_executor/layers/quantization/compressed_tensors/transform/schemes/__init__.py b/vllm/model_executor/layers/quantization/compressed_tensors/transform/schemes/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/vllm/model_executor/models/blip2.py b/vllm/model_executor/models/blip2.py index c1e7a7d498b1..a3131aa3812e 100644 --- a/vllm/model_executor/models/blip2.py +++ b/vllm/model_executor/models/blip2.py @@ -680,7 +680,7 @@ def forward( batch. Info: - [Blip2ImageInputs][] + [`Blip2ImageInputs`][vllm.model_executor.models.blip2.Blip2ImageInputs] """ if intermediate_tensors is not None: diff --git a/vllm/model_executor/models/llava.py b/vllm/model_executor/models/llava.py index 9591deea06ce..4f15e1b5762e 100644 --- a/vllm/model_executor/models/llava.py +++ b/vllm/model_executor/models/llava.py @@ -737,7 +737,7 @@ def forward( inputs_embeds: Optional tensor of input embeddings. Info: - [LlavaImageInputs][] + [`LlavaImageInputs`][vllm.model_executor.models.llava.LlavaImageInputs] """ if intermediate_tensors is not None: inputs_embeds = None diff --git a/vllm/model_executor/models/llava_next.py b/vllm/model_executor/models/llava_next.py index 5e82f9799e0f..beb3c3310059 100644 --- a/vllm/model_executor/models/llava_next.py +++ b/vllm/model_executor/models/llava_next.py @@ -527,7 +527,8 @@ def forward( Unlike in LLaVA-1.5, the number of image tokens inputted to the language model depends on the original size of the input image. Including the original image token in the input, the required number of image tokens - is given by [get_llava_next_image_feature_size][]. + is given by [`LlavaNextProcessingInfo.get_num_image_tokens`][vllm.\ +model_executor.models.llava_next.LlavaNextProcessingInfo.get_num_image_tokens]. This way, the `positions` and `attn_metadata` are consistent with the `input_ids`. @@ -540,7 +541,7 @@ def forward( inputs_embeds: Optional tensor of input embeddings. Info: - [LlavaNextImageInputs][] + [`LlavaNextImageInputs`][vllm.model_executor.models.llava_next.LlavaNextImageInputs] """ if intermediate_tensors is not None: inputs_embeds = None From b716ab93a781156ab178513afc0e407cc72d443b Mon Sep 17 00:00:00 2001 From: Lucia Fang <116399278+luccafong@users.noreply.github.com> Date: Fri, 19 Sep 2025 11:37:57 -0700 Subject: [PATCH 159/518] [bugfix] fix structured outputs key missing issue from #24929 (#25195) Signed-off-by: Lu Fang --- vllm/v1/core/sched/scheduler.py | 4 +++- vllm/v1/structured_output/utils.py | 11 ++++++----- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/vllm/v1/core/sched/scheduler.py b/vllm/v1/core/sched/scheduler.py index b08898d253ca..ef77d9e2d3ff 100644 --- a/vllm/v1/core/sched/scheduler.py +++ b/vllm/v1/core/sched/scheduler.py @@ -578,8 +578,10 @@ def schedule(self) -> SchedulerOutput: scheduled_spec_decode_tokens, req_to_new_blocks, ) + scheduled_requests = (scheduled_new_reqs + scheduled_running_reqs + + scheduled_resumed_reqs) structured_output_request_ids, grammar_bitmask = ( - self.get_grammar_bitmask(self.running, + self.get_grammar_bitmask(scheduled_requests, scheduled_spec_decode_tokens)) scheduler_output = SchedulerOutput( scheduled_new_reqs=new_reqs_data, diff --git a/vllm/v1/structured_output/utils.py b/vllm/v1/structured_output/utils.py index 127c8876525b..b9b09bea1e80 100644 --- a/vllm/v1/structured_output/utils.py +++ b/vllm/v1/structured_output/utils.py @@ -90,13 +90,14 @@ def apply_grammar_bitmask( seq = sorted(scheduler_output.structured_output_request_ids.items(), key=lambda x: x[1]) for req_id, _ in seq: - logit_index = struct_out_req_batch_indices[req_id] num_spec_tokens = len( scheduler_output.scheduled_spec_decode_tokens.get(req_id, [])) - for i in range(1 + num_spec_tokens): - sorted_bitmask[logit_index + i] = \ - grammar_bitmask[cumulative_index + i] - out_indices.append(logit_index + i) + if req_id in struct_out_req_batch_indices: + logit_index = struct_out_req_batch_indices[req_id] + for i in range(1 + num_spec_tokens): + sorted_bitmask[logit_index + i] = \ + grammar_bitmask[cumulative_index + i] + out_indices.append(logit_index + i) cumulative_index += 1 + num_spec_tokens grammar_bitmask = sorted_bitmask From c59a0eca4204522ecaa28af1f1e38d50b1e2626f Mon Sep 17 00:00:00 2001 From: Or Ozeri Date: Fri, 19 Sep 2025 22:07:17 +0300 Subject: [PATCH 160/518] [KV offload][4/N] Offloading KV connector (#22595) Signed-off-by: Or Ozeri --- .../unit/test_offloading_connector.py | 505 ++++++++++++++++++ tests/v1/kv_connector/unit/utils.py | 3 +- .../kv_transfer/kv_connector/factory.py | 5 + .../kv_connector/v1/offloading_connector.py | 485 +++++++++++++++++ vllm/v1/kv_offload/factory.py | 53 ++ vllm/v1/kv_offload/spec.py | 61 +++ 6 files changed, 1111 insertions(+), 1 deletion(-) create mode 100644 tests/v1/kv_connector/unit/test_offloading_connector.py create mode 100644 vllm/distributed/kv_transfer/kv_connector/v1/offloading_connector.py create mode 100644 vllm/v1/kv_offload/factory.py create mode 100644 vllm/v1/kv_offload/spec.py diff --git a/tests/v1/kv_connector/unit/test_offloading_connector.py b/tests/v1/kv_connector/unit/test_offloading_connector.py new file mode 100644 index 000000000000..f9a4d2fb4de4 --- /dev/null +++ b/tests/v1/kv_connector/unit/test_offloading_connector.py @@ -0,0 +1,505 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project +import copy +from collections.abc import Iterable, Iterator +from dataclasses import dataclass +from typing import Any +from unittest.mock import MagicMock + +import pytest +import torch + +from vllm import SamplingParams +from vllm.config import KVTransferConfig, VllmConfig +from vllm.distributed.kv_events import BlockRemoved, BlockStored +from vllm.distributed.kv_transfer.kv_connector.v1 import KVConnectorRole +from vllm.distributed.kv_transfer.kv_connector.v1.offloading_connector import ( + OffloadingConnector, OffloadingConnectorMetadata) +from vllm.forward_context import ForwardContext +from vllm.utils import sha256 +from vllm.v1.core.kv_cache_utils import (BlockHash, get_request_block_hasher, + init_none_hash) +from vllm.v1.core.sched.scheduler import Scheduler +from vllm.v1.kv_offload.abstract import (LoadStoreSpec, OffloadingEvent, + OffloadingManager, PrepareStoreOutput) +from vllm.v1.kv_offload.mediums import GPULoadStoreSpec +from vllm.v1.kv_offload.spec import OffloadingSpec +from vllm.v1.kv_offload.worker.worker import (OffloadingHandler, + TransferResult, TransferSpec) +from vllm.v1.outputs import EMPTY_MODEL_RUNNER_OUTPUT, KVConnectorOutput +from vllm.v1.request import Request + +from .utils import (EOS_TOKEN_ID, create_model_runner_output, create_scheduler, + create_vllm_config) + + +class MockLoadStoreSpec(LoadStoreSpec): + + def __init__(self, block_hashes: Iterable[BlockHash]): + self.block_hashes: list[BlockHash] = list(block_hashes) + + @staticmethod + def medium() -> str: + return "Mock" + + def __repr__(self) -> str: + return repr(self.block_hashes) + + +class MockOffloadingHandler(OffloadingHandler): + + def __init__(self): + self.completed_transfers: list[TransferResult] = [] + self.completed_specs: list[TransferSpec] = [] + + def get_finished(self) -> list[TransferResult]: + finished = self.completed_transfers + self.completed_transfers = [] + return finished + + def transfer_async(self, job_id: int, spec: TransferSpec) -> bool: + self.completed_specs.append(spec) + self.completed_transfers.append((job_id, True)) + return True + + +class MockOffloadingSpec(OffloadingSpec): + + def __init__(self, vllm_config: VllmConfig): + super().__init__(vllm_config) + + self.manager = MagicMock(spec=OffloadingManager) + self.manager.lookup.return_value = 0 + self.manager.prepare_load = lambda block_hashes: (MockLoadStoreSpec( + block_hashes)) + self.handler = MockOffloadingHandler() + + def get_manager(self) -> OffloadingManager: + return self.manager + + def get_handlers( + self, _ + ) -> Iterator[tuple[type[LoadStoreSpec], type[LoadStoreSpec], + OffloadingHandler]]: + + yield GPULoadStoreSpec, MockLoadStoreSpec, self.handler + yield MockLoadStoreSpec, GPULoadStoreSpec, self.handler + + def get_completed_transfers(self) -> list[TransferSpec]: + specs = self.handler.completed_specs + self.handler.completed_specs = [] + return specs + + +@dataclass +class TransferSummary: + gpu_block_indices: list[int] + offload_addresses: list[Any] + + +class RequestRunner: + + def __init__(self, offloaded_block_size: int, gpu_block_size: int, + num_gpu_blocks: int): + self.offloaded_block_size: int = offloaded_block_size + self.gpu_block_size: int = gpu_block_size + self.num_gpu_blocks: int = num_gpu_blocks + + self.req_id: int = -1 + + vllm_config = create_vllm_config(block_size=gpu_block_size, + max_num_batched_tokens=1000) + vllm_config.kv_transfer_config = KVTransferConfig( + kv_connector="OffloadingConnector", + kv_role="kv_both", + kv_connector_extra_config={ + "spec_name": "MockOffloadingSpec", + "spec_module_path": + "tests.v1.kv_connector.unit.test_offloading_connector", + "block_size": offloaded_block_size, + }) + + self.scheduler: Scheduler = create_scheduler(vllm_config, + num_blocks=num_gpu_blocks) + self.worker_connector = OffloadingConnector(vllm_config, + KVConnectorRole.WORKER) + + # register worker kv_caches to enable OffloadingWorker creations + self.worker_connector.register_kv_caches( + kv_caches={"a": torch.empty(0)}) + + # extract connector of scheduler + scheduler_connector = self.scheduler.connector + assert scheduler_connector is not None + assert isinstance(scheduler_connector, OffloadingConnector) + self.scheduler_connector: OffloadingConnector = scheduler_connector + + # extract mocked OffloadingManager of scheduler connector + connector_scheduler = scheduler_connector.connector_scheduler + assert connector_scheduler is not None + manager = connector_scheduler.manager + assert isinstance(manager, MagicMock) + self.manager: MagicMock = manager + + assert connector_scheduler.gpu_block_size == gpu_block_size + assert connector_scheduler.offloaded_block_size == offloaded_block_size + + # extract OffloadingSpec of worker_connector + connector_worker = self.worker_connector.connector_worker + assert connector_worker is not None + offloading_spec = connector_worker.spec + assert isinstance(offloading_spec, MockOffloadingSpec) + self.offloading_spec: MockOffloadingSpec = offloading_spec + + # mapping (offloading address) -> gpu_block_index + self.offloaded: dict[Any, int] = {} + + self.pending_loads_count: int = 0 + self.pending_stores_count: int = 0 + + self.completed_loads: list[TransferSummary] = [] + self.completed_stores: list[TransferSummary] = [] + + # maps {block_id: block_offset} + self.gpu_block_index: dict[int, int] = {} + + init_none_hash(sha256) + self._block_hasher = get_request_block_hasher(gpu_block_size, sha256) + + self._dummy_ctx: ForwardContext = ForwardContext(no_compile_layers={}, + attn_metadata={}, + virtual_engine=0) + + def new_request(self, token_ids: list[int]): + assert not self.scheduler.requests + self.req_id += 1 + + req = Request( + request_id=str(self.req_id), + prompt_token_ids=token_ids, + sampling_params=SamplingParams(max_tokens=1000), + pooling_params=None, + eos_token_id=EOS_TOKEN_ID, + block_hasher=self._block_hasher, + ) + + self.scheduler.add_request(req) + + def _wait_for_transfers(self): + block_size_factor = self.offloaded_block_size // self.gpu_block_size + + while self.pending_loads_count or self.pending_stores_count: + for transfer_spec in ( + self.offloading_spec.get_completed_transfers()): + src_spec, dst_spec = transfer_spec + + if isinstance(src_spec, GPULoadStoreSpec): + store = True + gpu_spec = src_spec + offload_spec = dst_spec + else: + store = False + gpu_spec = dst_spec + offload_spec = src_spec + + assert isinstance(offload_spec, MockLoadStoreSpec) + assert isinstance(gpu_spec, GPULoadStoreSpec) + + gpu_block_indices: list[int] = [] + for block_id in gpu_spec.block_ids: + gpu_block_indices.append( + self.gpu_block_index[block_id.item()]) + + # list of (block_hash, sub_block_offset) + offload_addresses: list[Any] = [] + for block_hash in offload_spec.block_hashes: + for sub_block_idx in range(block_size_factor): + offload_addresses.append((block_hash, sub_block_idx)) + + if store: + assert len(gpu_block_indices) == len(offload_addresses) + + self.completed_stores.append( + TransferSummary(gpu_block_indices, offload_addresses)) + self.pending_stores_count -= 1 + else: + remainder_sub_block_count = (len(offload_addresses) - + len(gpu_block_indices)) + assert remainder_sub_block_count >= 0 + assert remainder_sub_block_count < block_size_factor + offload_addresses = offload_addresses[ + remainder_sub_block_count:] + + self.completed_loads.append( + TransferSummary(gpu_block_indices, offload_addresses)) + self.pending_loads_count -= 1 + + def _update_gpu_block_idx(self): + for blocks in (self.scheduler.kv_cache_manager.coordinator. + single_type_managers[0].req_to_blocks.values()): + for block_idx, block in enumerate(blocks): + self.gpu_block_index[block.block_id] = block_idx + + def _run(self, decoded_tokens: list[int]): + """ + Runs multiple engine (scheduler + worker) steps. + Assumes a single request is running. + + Args: + decoded_tokens: the tokens to yield at each step. + """ + + tokens_iter = iter(decoded_tokens) + token_id = next(tokens_iter, None) + while token_id is not None: + assert self.scheduler.requests + + scheduler_output = self.scheduler.schedule() + self._update_gpu_block_idx() + + kv_connector_metadata = scheduler_output.kv_connector_metadata + assert kv_connector_metadata is not None + assert isinstance(kv_connector_metadata, + OffloadingConnectorMetadata) + + self.pending_loads_count += len(kv_connector_metadata.reqs_to_load) + self.pending_stores_count += len( + kv_connector_metadata.reqs_to_store) + + self.worker_connector.bind_connector_metadata( + kv_connector_metadata) + self.worker_connector.start_load_kv(self._dummy_ctx) + + if scheduler_output.total_num_scheduled_tokens > 0: + self.worker_connector.wait_for_save() + + finished_sending, finished_recving = ( + self.worker_connector.get_finished( + scheduler_output.finished_req_ids)) + + self.worker_connector.clear_connector_metadata() + + model_runner_output = create_model_runner_output( + reqs=self.scheduler.running, + finished_sending=list(finished_sending), + finished_recving=list(finished_recving), + token_id=token_id) + + if self.scheduler.running: + token_id = next(tokens_iter, None) + + self.scheduler.update_from_output(scheduler_output, + model_runner_output) + + self._wait_for_transfers() + + # run one more step to update finished stored + if EOS_TOKEN_ID in decoded_tokens: + assert not self.scheduler.running + + while self.scheduler.requests: + scheduler_output = self.scheduler.schedule() + + finished_sending, finished_recving = ( + self.worker_connector.get_finished( + scheduler_output.finished_req_ids)) + + assert not finished_recving + + model_runner_output = copy.deepcopy(EMPTY_MODEL_RUNNER_OUTPUT) + model_runner_output.kv_connector_output = KVConnectorOutput( + finished_sending=finished_sending) + + self.scheduler.update_from_output(scheduler_output, + model_runner_output) + + def run( + self, + decoded_tokens: list[int], + expected_stored_gpu_block_indexes: tuple[int, ...] = (), + expected_loaded_gpu_block_indexes: tuple[int, ...] = (), + ): + """ + Runs multiple engine (scheduler + worker) steps. + Assumes a single request is running. + + Args: + decoded_tokens: the tokens to yield at each step. + expected_stored_gpu_block_indexes: GPU block indexes + that are expected to be written during the run. + expected_loaded_gpu_block_indexes: GPU block indexes + that are expected to be loaded during the run. + """ + + self.manager.reset_mock() + self._run(decoded_tokens) + + loaded_gpu_block_indexes: set[int] = set() + for transfer in self.completed_loads: + for gpu_block_idx, offloaded_address in zip( + transfer.gpu_block_indices, transfer.offload_addresses): + loaded_gpu_block_indexes.add(gpu_block_idx) + assert gpu_block_idx == self.offloaded[offloaded_address] + + assert ( + set(expected_loaded_gpu_block_indexes) == loaded_gpu_block_indexes) + self.completed_loads.clear() + + stored_gpu_block_indexes: set[int] = set() + for transfer in self.completed_stores: + for gpu_block_idx, offloaded_address in zip( + transfer.gpu_block_indices, transfer.offload_addresses): + stored_gpu_block_indexes.add(gpu_block_idx) + self.offloaded[offloaded_address] = gpu_block_idx + + assert ( + set(expected_stored_gpu_block_indexes) == stored_gpu_block_indexes) + self.completed_stores.clear() + + +@pytest.fixture +def request_runner(): + runners = [] + + def runner_factory(offloaded_block_size, gpu_block_size, num_gpu_blocks): + runner = RequestRunner(offloaded_block_size=offloaded_block_size, + gpu_block_size=gpu_block_size, + num_gpu_blocks=num_gpu_blocks) + runners.append(runner) + return runner + + yield runner_factory # pass factory to the test + + +def generate_store_output(block_hashes: Iterable[BlockHash]): + block_hashes = list(block_hashes) + return PrepareStoreOutput( + block_hashes_to_store=list(block_hashes), + store_spec=MockLoadStoreSpec(block_hashes), + block_hashes_evicted=[], + ) + + +def test_offloading_connector(request_runner): + offloaded_block_size = 12 + gpu_block_size = 4 + num_gpu_blocks = 100 + block_size_factor = offloaded_block_size // gpu_block_size + + runner = request_runner(offloaded_block_size=offloaded_block_size, + gpu_block_size=gpu_block_size, + num_gpu_blocks=num_gpu_blocks) + + # 3 blocks, store just the middle block (skip first and last) + # blocks = [0, 1, 2], [3, 4, 5], [6, 7, 8] + runner.new_request(token_ids=[0] * offloaded_block_size * 3) + runner.manager.prepare_store.side_effect = \ + lambda block_hashes: generate_store_output(list(block_hashes)[1:2]) + runner.run(decoded_tokens=[0], expected_stored_gpu_block_indexes=(3, 4, 5)) + + # add block missing 1 token -> no offload + runner.run(decoded_tokens=[0] * (offloaded_block_size - 1)) + runner.manager.prepare_store.assert_not_called() + + # +1 token -> single block, fail prepare_store + runner.manager.prepare_store.side_effect = \ + lambda block_hashes: None + runner.run(decoded_tokens=[0]) + runner.manager.prepare_store.assert_called() + + # 1 more block, now set block_hashes_to_store = [] + runner.manager.prepare_store.side_effect = \ + lambda block_hashes: generate_store_output([]) + runner.run(decoded_tokens=[0] * offloaded_block_size) + + # 1 more block, now check touch was called with all 6 blocks + runner.manager.prepare_store.side_effect = \ + lambda block_hashes: generate_store_output(block_hashes) + runner.run(decoded_tokens=[0] * offloaded_block_size, + expected_stored_gpu_block_indexes=(15, 16, 17)) + runner.manager.touch.assert_called() + block_hashes1 = list(runner.manager.touch.call_args.args[0]) + assert len(block_hashes1) == 6 + + # terminate request + runner.run(decoded_tokens=[EOS_TOKEN_ID]) + + # create a new request differing only on the last token + runner.new_request(token_ids=[0] * (offloaded_block_size * 6 - 1) + [1]) + runner.run(decoded_tokens=[0], + expected_stored_gpu_block_indexes=tuple( + range(6 * block_size_factor))) + runner.manager.touch.assert_called() + block_hashes2 = list(runner.manager.touch.call_args.args[0]) + assert len(block_hashes2) == 6 + + # verify hashes are the same, except for the last block + assert block_hashes1[:5] == block_hashes2[:5] + assert block_hashes1[5] != block_hashes2[5] + + # terminate request + runner.run(decoded_tokens=[EOS_TOKEN_ID]) + + # full_block_tokens - num_computed_tokens < offloaded_block_size + runner.new_request(token_ids=[0] * gpu_block_size + [1] * + (offloaded_block_size - gpu_block_size)) + runner.manager.prepare_store.side_effect = \ + lambda block_hashes: generate_store_output([]) + runner.run(decoded_tokens=[EOS_TOKEN_ID]) + runner.manager.lookup.assert_not_called() + + # single block lookup with no hits + runner.new_request(token_ids=[1] * offloaded_block_size) + runner.manager.prepare_store.side_effect = \ + lambda block_hashes: generate_store_output([]) + runner.run(decoded_tokens=[EOS_TOKEN_ID]) + runner.manager.lookup.assert_called() + assert len(list(runner.manager.lookup.call_args.args[0])) == 1 + + # single block lookup with a hit + runner.scheduler.reset_prefix_cache() + runner.new_request(token_ids=[0] * offloaded_block_size) + runner.manager.prepare_store.side_effect = \ + lambda block_hashes: generate_store_output([]) + runner.manager.lookup.return_value = 1 + runner.run(decoded_tokens=[EOS_TOKEN_ID], + expected_loaded_gpu_block_indexes=(0, 1, 2)) + + # single block lookup with a hit in a middle block + runner.new_request(token_ids=[0] * offloaded_block_size * 2 + + [1] * offloaded_block_size) + runner.manager.prepare_store.side_effect = \ + lambda block_hashes: generate_store_output([]) + runner.manager.lookup.return_value = 1 + runner.run(decoded_tokens=[EOS_TOKEN_ID], + expected_loaded_gpu_block_indexes=(3, 4, 5)) + + # test take_events + def to_hashes(int_hashes: list[int]) -> list[BlockHash]: + return [BlockHash(str(i).encode()) for i in int_hashes] + + def take_events() -> Iterable[OffloadingEvent]: + yield OffloadingEvent(block_hashes=to_hashes([1, 2, 3]), + block_size=16, + medium="A", + removed=False) + yield OffloadingEvent(block_hashes=to_hashes([4, 5, 6]), + block_size=32, + medium="B", + removed=True) + + runner.manager.take_events.side_effect = take_events + events = list(runner.scheduler_connector.take_events()) + assert len(events) == 2 + event = events[0] + assert isinstance(event, BlockStored) + assert event.block_hashes == to_hashes([1, 2, 3]) + assert event.block_size == 16 + assert event.medium == "A" + assert event.token_ids == [] + assert event.parent_block_hash is None + assert event.lora_id is None + event = events[1] + assert isinstance(event, BlockRemoved) + assert event.block_hashes == to_hashes([4, 5, 6]) + assert event.medium == "B" diff --git a/tests/v1/kv_connector/unit/utils.py b/tests/v1/kv_connector/unit/utils.py index 0cae1c7bc051..de52668e3dcf 100644 --- a/tests/v1/kv_connector/unit/utils.py +++ b/tests/v1/kv_connector/unit/utils.py @@ -176,6 +176,7 @@ def create_model_runner_output( finished_sending: Optional[list[str]] = None, finished_recving: Optional[list[str]] = None, use_eos: bool = False, + token_id: int = 0, ) -> ModelRunnerOutput: """Make dummy model runner output for testing.""" @@ -184,7 +185,7 @@ def create_model_runner_output( req_id_to_index = {req_id: idx for idx, req_id in enumerate(req_ids)} # Make sampled tokens. - sampled_token = EOS_TOKEN_ID if use_eos else 0 + sampled_token = EOS_TOKEN_ID if use_eos else token_id sampled_token_ids = [[sampled_token] for _ in req_ids] kv_connector_output = None if ( diff --git a/vllm/distributed/kv_transfer/kv_connector/factory.py b/vllm/distributed/kv_transfer/kv_connector/factory.py index 670f9c26b210..873f130ed827 100644 --- a/vllm/distributed/kv_transfer/kv_connector/factory.py +++ b/vllm/distributed/kv_transfer/kv_connector/factory.py @@ -106,3 +106,8 @@ def get_connector_class( "MultiConnector", "vllm.distributed.kv_transfer.kv_connector.v1.multi_connector", "MultiConnector") + +KVConnectorFactory.register_connector( + "OffloadingConnector", + "vllm.distributed.kv_transfer.kv_connector.v1.offloading_connector", + "OffloadingConnector") diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/offloading_connector.py b/vllm/distributed/kv_transfer/kv_connector/v1/offloading_connector.py new file mode 100644 index 000000000000..c23efa604544 --- /dev/null +++ b/vllm/distributed/kv_transfer/kv_connector/v1/offloading_connector.py @@ -0,0 +1,485 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project +from collections import defaultdict +from collections.abc import Iterable, Iterator +from dataclasses import dataclass +from itertools import islice +from typing import Any, Optional + +import torch + +from vllm.attention import AttentionMetadata +from vllm.config import VllmConfig +from vllm.distributed.kv_events import BlockRemoved, BlockStored, KVCacheEvent +from vllm.distributed.kv_transfer.kv_connector.v1 import (KVConnectorBase_V1, + KVConnectorRole) +from vllm.distributed.kv_transfer.kv_connector.v1.base import ( + KVConnectorMetadata) +from vllm.forward_context import ForwardContext +from vllm.logger import init_logger +from vllm.v1.core.kv_cache_manager import KVCacheBlocks +from vllm.v1.core.kv_cache_utils import BlockHash +from vllm.v1.core.sched.output import SchedulerOutput +from vllm.v1.kv_offload.abstract import OffloadingManager +from vllm.v1.kv_offload.factory import OffloadingSpecFactory +from vllm.v1.kv_offload.mediums import GPULoadStoreSpec +from vllm.v1.kv_offload.spec import OffloadingSpec +from vllm.v1.kv_offload.worker.worker import OffloadingWorker, TransferSpec +from vllm.v1.outputs import KVConnectorOutput +from vllm.v1.request import Request + +ReqId = str + +logger = init_logger(__name__) + + +@dataclass +class OffloadingConnectorMetadata(KVConnectorMetadata): + reqs_to_load: dict[ReqId, TransferSpec] + reqs_to_store: dict[ReqId, TransferSpec] + + +class OffloadingConnector(KVConnectorBase_V1): + + def __init__(self, vllm_config: VllmConfig, role: KVConnectorRole): + super().__init__(vllm_config, role) + + spec = OffloadingSpecFactory.create_spec(vllm_config) + + self.connector_scheduler: Optional[OffloadingConnectorScheduler] = None + self.connector_worker: Optional[OffloadingConnectorWorker] = None + if role == KVConnectorRole.SCHEDULER: + self.connector_scheduler = OffloadingConnectorScheduler(spec) + elif role == KVConnectorRole.WORKER: + self.connector_worker = OffloadingConnectorWorker(spec) + + def register_kv_caches(self, kv_caches: dict[str, torch.Tensor]): + assert self.connector_worker is not None + self.connector_worker.register_kv_caches(kv_caches) + + def start_load_kv(self, forward_context: "ForwardContext", + **kwargs) -> None: + assert self.connector_worker is not None + assert isinstance(self._connector_metadata, + OffloadingConnectorMetadata) + self.connector_worker.start_load_kv(self._connector_metadata) + + def wait_for_layer_load(self, layer_name: str) -> None: + pass + + def save_kv_layer(self, layer_name: str, kv_layer: torch.Tensor, + attn_metadata: "AttentionMetadata", **kwargs) -> None: + pass + + def wait_for_save(self): + assert self.connector_worker is not None + assert isinstance(self._connector_metadata, + OffloadingConnectorMetadata) + self.connector_worker.start_store_kv(self._connector_metadata) + + def get_finished(self, + finished_req_ids: set[str]) -> tuple[set[str], set[str]]: + assert self.connector_worker is not None + return self.connector_worker.get_finished(finished_req_ids) + + def get_num_new_matched_tokens( + self, request: "Request", + num_computed_tokens: int) -> tuple[int, bool]: + assert self.connector_scheduler is not None + return self.connector_scheduler.get_num_new_matched_tokens( + request, num_computed_tokens) + + def update_state_after_alloc(self, request: "Request", + blocks: "KVCacheBlocks", + num_external_tokens: int): + assert self.connector_scheduler is not None + return self.connector_scheduler.update_state_after_alloc( + request, blocks, num_external_tokens) + + def build_connector_meta( + self, scheduler_output: SchedulerOutput) -> KVConnectorMetadata: + assert self.connector_scheduler is not None + return self.connector_scheduler.build_connector_meta(scheduler_output) + + def update_connector_output(self, connector_output: KVConnectorOutput): + assert self.connector_scheduler is not None + self.connector_scheduler.update_connector_output(connector_output) + + def request_finished( + self, + request: "Request", + block_ids: list[int], + ) -> tuple[bool, Optional[dict[str, Any]]]: + assert self.connector_scheduler is not None + return self.connector_scheduler.request_finished(request, block_ids) + + def take_events(self) -> Iterable[KVCacheEvent]: + assert self.connector_scheduler is not None + return self.connector_scheduler.take_events() + + +class OffloadingConnectorScheduler: + """Implementation of Scheduler side methods""" + + def __init__(self, spec: OffloadingSpec): + self.gpu_block_size = spec.gpu_block_size + self.offloaded_block_size = spec.offloaded_block_size + self.block_size_factor = (self.offloaded_block_size // + self.gpu_block_size) + self.manager: OffloadingManager = spec.get_manager() + + self._requests: dict[ReqId, Request] = {} + # list of GPU block IDs per request + self._request_block_ids: dict[ReqId, list[int]] = {} + # requests to load for the current scheduler step + self._reqs_to_load: dict[ReqId, TransferSpec] = {} + # request blocks are stored in order + # index of next block (of size offloaded_block_size) to offload + self._next_stored_block_idx: dict[ReqId, int] = {} + + # request ID -> set(block hashes being stored/load) + self._reqs_being_stored = defaultdict[ReqId, set[BlockHash]](set) + self._reqs_being_loaded = defaultdict[ReqId, set[BlockHash]](set) + + def _get_block_hashes( + self, + req: Request, + start_idx: int = 0, + end_idx: Optional[int] = None, + ) -> Iterable[BlockHash]: + return islice( + req.block_hashes, + self.block_size_factor * start_idx + self.block_size_factor - 1, + self.block_size_factor * end_idx if end_idx else None, + self.block_size_factor) + + def get_num_new_matched_tokens( + self, request: Request, + num_computed_tokens: int) -> tuple[int, bool]: + """ + Get number of new tokens that can be loaded beyond the + num_computed_tokens. + + Args: + request (Request): the request object. + num_computed_tokens (int): the number of locally + computed tokens for this request + + Returns: + A tuple with the following elements: + - The number of tokens that can be loaded beyond what is + already computed. + - `True` if tokens will be loaded asynchronously + (between scheduler steps). + """ + num_blocks = request.num_tokens // self.offloaded_block_size + + assert (len(request.block_hashes) // + self.block_size_factor == num_blocks) + block_hashes = self._get_block_hashes(request) + + self.manager.touch(block_hashes) + + full_block_tokens = self.offloaded_block_size * num_blocks + if full_block_tokens - num_computed_tokens < self.offloaded_block_size: + # we can load less than a block, skip + return 0, False + + start_block_idx = num_computed_tokens // self.offloaded_block_size + hits = self.manager.lookup( + self._get_block_hashes(request, start_idx=start_block_idx)) + if hits == 0: + return 0, False + + num_hit_tokens = (self.offloaded_block_size * + (start_block_idx + hits) - num_computed_tokens) + logger.debug( + "Request %s hit %s offloaded tokens after %s GPU hit tokens", + request.request_id, + num_hit_tokens, + num_computed_tokens, + ) + if num_hit_tokens < self.offloaded_block_size: + return 0, False + + return num_hit_tokens, True + + def update_state_after_alloc(self, request: Request, blocks: KVCacheBlocks, + num_external_tokens: int): + self._requests[request.request_id] = request + # the block ids are updated in _get_reqs_to_store + self._request_block_ids[request.request_id] = [] + + if num_external_tokens == 0: + return + + block_groups = blocks.get_block_ids() + block_ids = block_groups[0] + + num_computed_gpu_blocks = sum(block.block_hash is not None + for block in blocks.blocks[0]) + num_computed_tokens = num_computed_gpu_blocks * self.gpu_block_size + full_block_tokens = num_computed_tokens + num_external_tokens + assert full_block_tokens % self.offloaded_block_size == 0 + + num_pending_gpu_blocks = len(block_ids) - num_computed_gpu_blocks + assert (num_external_tokens == num_pending_gpu_blocks * + self.gpu_block_size) + + start_block_idx = num_computed_tokens // self.offloaded_block_size + num_blocks = full_block_tokens // self.offloaded_block_size + + assert (len(request.block_hashes) // self.block_size_factor + >= num_blocks) + block_hashes = self._get_block_hashes(request, + start_idx=start_block_idx, + end_idx=num_blocks) + + src_spec = self.manager.prepare_load(block_hashes) + dst_spec = GPULoadStoreSpec(block_ids[num_computed_gpu_blocks:]) + + block_hashes = self._get_block_hashes(request, + start_idx=start_block_idx, + end_idx=num_blocks) + + self._reqs_to_load[request.request_id] = (src_spec, dst_spec) + self._reqs_being_loaded[request.request_id].update(block_hashes) + self._next_stored_block_idx[request.request_id] = num_blocks + + def _get_reqs_to_store(self, scheduler_output: SchedulerOutput): + reqs_to_store: dict[ReqId, TransferSpec] = {} + # iterate over both new and cached requests + for req_id, new_block_id_groups, preempted in yield_req_data( + scheduler_output): + + if preempted: + self._request_block_ids[req_id] = [] + + if new_block_id_groups: + new_block_ids = new_block_id_groups[0] + self._request_block_ids[req_id] += new_block_ids + + block_ids = self._request_block_ids[req_id] + + req = self._requests[req_id] + new_tokens = scheduler_output.num_scheduled_tokens[req_id] + total_tokens = req.num_computed_tokens + new_tokens + num_blocks = total_tokens // self.offloaded_block_size + start_block_idx = self._next_stored_block_idx.get(req_id, 0) + num_new_blocks = num_blocks - start_block_idx + + if num_new_blocks <= 0: + continue + + num_gpu_blocks = num_blocks * self.block_size_factor + assert len(req.block_hashes) >= num_gpu_blocks + + new_block_hashes = self._get_block_hashes( + req, start_idx=start_block_idx, end_idx=num_blocks) + store_output = self.manager.prepare_store(new_block_hashes) + if store_output is None: + logger.warning("Cannot store %s blocks", num_new_blocks) + break + + self._next_stored_block_idx[req_id] = num_blocks + + if not store_output.block_hashes_to_store: + continue + block_hashes_to_store = set(store_output.block_hashes_to_store) + + block_hashes = self._get_block_hashes(req, end_idx=num_blocks) + self.manager.touch(block_hashes) + + new_block_hashes = self._get_block_hashes( + req, start_idx=start_block_idx, end_idx=num_blocks) + dst_spec = store_output.store_spec + src_block_ids: list[int] = [] + for idx, blk_hash in enumerate(new_block_hashes): + if blk_hash not in block_hashes_to_store: + continue + offloaded_block_idx = start_block_idx + idx + gpu_block_idx = offloaded_block_idx * self.block_size_factor + for i in range(self.block_size_factor): + src_block_ids.append(block_ids[gpu_block_idx + i]) + src_spec = GPULoadStoreSpec(src_block_ids) + + reqs_to_store[req_id] = (src_spec, dst_spec) + self._reqs_being_stored[req_id] |= block_hashes_to_store + + logger.debug( + "Request %s offloading %s blocks starting from block #%d", + req_id, + len(block_hashes_to_store), + start_block_idx, + ) + + return reqs_to_store + + def build_connector_meta( + self, scheduler_output: SchedulerOutput) -> KVConnectorMetadata: + meta = OffloadingConnectorMetadata( + reqs_to_load=self._reqs_to_load, + reqs_to_store=self._get_reqs_to_store(scheduler_output)) + self._reqs_to_load = {} + return meta + + def update_connector_output(self, connector_output: KVConnectorOutput): + """ + Update KVConnector state from worker-side connectors output. + + Args: + connector_output (KVConnectorOutput): the worker-side + connectors output. + """ + for req_id in connector_output.finished_sending or []: + block_hashes = self._reqs_being_stored.pop(req_id, None) + if block_hashes: + self.manager.complete_store(block_hashes) + + for req_id in connector_output.finished_recving or []: + block_hashes = self._reqs_being_loaded.pop(req_id, None) + if block_hashes: + self.manager.complete_load(block_hashes) + + def request_finished( + self, + request: Request, + block_ids: list[int], + ) -> tuple[bool, Optional[dict[str, Any]]]: + """ + Called when a request has finished, before its blocks are freed. + + Returns: + True if the request is being saved/sent asynchronously and blocks + should not be freed until the request_id is returned from + get_finished(). + Optional KVTransferParams to be included in the request outputs + returned by the engine. + """ + req_id = request.request_id + self._requests.pop(req_id, None) + self._request_block_ids.pop(req_id, None) + self._next_stored_block_idx.pop(req_id, None) + + request_being_stored = req_id in self._reqs_being_stored + return request_being_stored, None + + def take_events(self) -> Iterable[KVCacheEvent]: + """Take the KV cache events from the connector. + + Returns: + A list of KV cache events. + """ + for event in self.manager.take_events(): + if event.removed: + yield BlockRemoved(block_hashes=event.block_hashes, + medium=event.medium) + else: + yield BlockStored(block_hashes=event.block_hashes, + parent_block_hash=None, + token_ids=[], + lora_id=None, + block_size=event.block_size, + medium=event.medium) + + +class OffloadingConnectorWorker: + """Implementation of Worker side methods""" + + def __init__(self, spec: OffloadingSpec): + self.spec = spec + self.worker = OffloadingWorker() + + self._job_counter = 0 + + # req_id -> (job_id, store) + self._jobs: dict[int, tuple[ReqId, bool]] = {} + # req_id -> active job IDs + self._load_job: dict[ReqId, int] = {} + # req_id -> set(active job IDs) + self._store_jobs = defaultdict[ReqId, set[int]](set) + + self._finished_reqs_waiting_for_store: set[ReqId] = set() + + def _generate_job_id(self) -> int: + job_id = self._job_counter + self._job_counter = job_id + 1 + return job_id + + def register_kv_caches(self, kv_caches: dict[str, torch.Tensor]): + for src_cls, dst_cls, handler in (self.spec.get_handlers(kv_caches)): + self.worker.register_handler(src_cls, dst_cls, handler) + + def start_load_kv(self, metadata: OffloadingConnectorMetadata): + for req_id, transfer_spec in metadata.reqs_to_load.items(): + job_id = self._generate_job_id() + self._jobs[job_id] = (req_id, False) + assert req_id not in self._load_job + self._load_job[req_id] = job_id + assert self.worker.transfer_async(job_id, transfer_spec) + + def start_store_kv(self, metadata: OffloadingConnectorMetadata): + for req_id, transfer_spec in metadata.reqs_to_store.items(): + job_id = self._generate_job_id() + self._jobs[job_id] = (req_id, True) + self._store_jobs[req_id].add(job_id) + assert self.worker.transfer_async(job_id, transfer_spec) + + def get_finished(self, + finished_req_ids: set[str]) -> tuple[set[str], set[str]]: + """ + Notifies worker-side connector ids of requests that have + finished generating tokens. + Returns a list of request IDs that finished loading or storing. + + Returns: + ids of requests that have finished asynchronous transfer + tuple of (sending/saving ids, recving/loading ids). + """ + finished_sending = set() + finished_recving = set() + for job_id, success in self.worker.get_finished(): + # we currently do not support job failures + assert success + req_id, store = self._jobs.pop(job_id) + if store: + req_jobs = self._store_jobs[req_id] + req_jobs.remove(job_id) + if req_jobs: + continue + + if req_id in self._finished_reqs_waiting_for_store: + self._finished_reqs_waiting_for_store.remove(req_id) + finished_sending.add(req_id) + del self._store_jobs[req_id] + else: + req_job = self._load_job[req_id] + assert job_id == req_job + del self._load_job[req_id] + finished_recving.add(req_id) + + for req_id in finished_req_ids: + pending_req_jobs = self._store_jobs.get(req_id) + if pending_req_jobs: + self._finished_reqs_waiting_for_store.add(req_id) + elif pending_req_jobs is not None: + finished_sending.add(req_id) + del self._store_jobs[req_id] + + return finished_sending, finished_recving + + +def yield_req_data( + scheduler_output) -> Iterator[tuple[str, tuple[list[int], ...], bool]]: + """ + Yields: + (req_id, new_block_id_groups, preempted) + """ + # new requests + for req_data in scheduler_output.scheduled_new_reqs: + yield req_data.req_id, req_data.block_ids, False + + # cached requests + cached_reqs = scheduler_output.scheduled_cached_reqs + yield from zip(cached_reqs.req_ids, cached_reqs.new_block_ids, + cached_reqs.resumed_from_preemption) diff --git a/vllm/v1/kv_offload/factory.py b/vllm/v1/kv_offload/factory.py new file mode 100644 index 000000000000..6365ab4a6db7 --- /dev/null +++ b/vllm/v1/kv_offload/factory.py @@ -0,0 +1,53 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project +import importlib +from typing import TYPE_CHECKING, Callable + +from vllm.logger import init_logger +from vllm.v1.kv_offload.spec import OffloadingSpec + +if TYPE_CHECKING: + from vllm.config import VllmConfig + +logger = init_logger(__name__) + + +class OffloadingSpecFactory: + _registry: dict[str, Callable[[], type[OffloadingSpec]]] = {} + + @classmethod + def register_spec(cls, name: str, module_path: str, + class_name: str) -> None: + """Register a spec with a lazy-loading module and class name.""" + if name in cls._registry: + raise ValueError(f"Connector '{name}' is already registered.") + + def loader() -> type[OffloadingSpec]: + module = importlib.import_module(module_path) + return getattr(module, class_name) + + cls._registry[name] = loader + + @classmethod + def create_spec( + cls, + config: "VllmConfig", + ) -> OffloadingSpec: + kv_transfer_config = config.kv_transfer_config + assert kv_transfer_config is not None + extra_config = kv_transfer_config.kv_connector_extra_config + spec_name = extra_config.get("spec_name", "CPUOffloadingSpec") + if spec_name in cls._registry: + spec_cls = cls._registry[spec_name]() + else: + spec_module_path = extra_config.get("spec_module_path") + if spec_module_path is None: + raise ValueError(f"Unsupported spec type: {spec_name}") + spec_module = importlib.import_module(spec_module_path) + spec_cls = getattr(spec_module, spec_name) + assert issubclass(spec_cls, OffloadingSpec) + logger.info("Creating offloading spec with name: %s", spec_name) + return spec_cls(config) + + +# Register various specs here. diff --git a/vllm/v1/kv_offload/spec.py b/vllm/v1/kv_offload/spec.py new file mode 100644 index 000000000000..ed23d5e51934 --- /dev/null +++ b/vllm/v1/kv_offload/spec.py @@ -0,0 +1,61 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project +from abc import ABC, abstractmethod +from collections.abc import Iterator +from typing import TYPE_CHECKING + +import torch + +from vllm.logger import init_logger +from vllm.v1.kv_offload.abstract import LoadStoreSpec, OffloadingManager +from vllm.v1.kv_offload.worker.worker import OffloadingHandler + +if TYPE_CHECKING: + from vllm.config import VllmConfig + +logger = init_logger(__name__) + + +class OffloadingSpec(ABC): + """Spec for an offloading connector""" + + def __init__(self, vllm_config: "VllmConfig"): + logger.warning( + "Initializing OffloadingSpec. This API is experimental and " + "subject to change in the future as we iterate the design.") + self.vllm_config = vllm_config + + kv_transfer_config = vllm_config.kv_transfer_config + assert kv_transfer_config is not None + self.extra_config = kv_transfer_config.kv_connector_extra_config + + self.gpu_block_size = vllm_config.cache_config.block_size + self.offloaded_block_size = int( + self.extra_config.get("block_size", self.gpu_block_size)) + + assert self.offloaded_block_size % self.gpu_block_size == 0 + + @abstractmethod + def get_manager(self) -> OffloadingManager: + """ + Get an OffloadingManager that will be used + by the scheduler-side offloading connector to track + offloaded blocks and manage evictions. + """ + pass + + @abstractmethod + def get_handlers( + self, kv_caches: dict[str, torch.Tensor] + ) -> Iterator[tuple[type[LoadStoreSpec], type[LoadStoreSpec], + OffloadingHandler]]: + """ + Get offloading handlers along with their respective src and dst types. + + Args: + kv_caches: A dictionary of layer_name -> gpu_kv_cache tensor. + + Yields: + Tuples of (src_type, dst_type, offloading_handler). + """ + pass From a2a5f79e09075f50943750a2376b3b5d95a47ac5 Mon Sep 17 00:00:00 2001 From: qizixi <22851944+zixi-qi@users.noreply.github.com> Date: Fri, 19 Sep 2025 12:07:26 -0700 Subject: [PATCH 161/518] Optimize triton unified attention performance for sliding window attention (#24390) Signed-off-by: zixi-qi --- .../test_triton_unified_attention.py | 2 +- .../attention/ops/triton_unified_attention.py | 26 +++++++++++++++++-- 2 files changed, 25 insertions(+), 3 deletions(-) diff --git a/tests/kernels/attention/test_triton_unified_attention.py b/tests/kernels/attention/test_triton_unified_attention.py index ab91560e995c..5cff29b15aa3 100644 --- a/tests/kernels/attention/test_triton_unified_attention.py +++ b/tests/kernels/attention/test_triton_unified_attention.py @@ -83,7 +83,7 @@ def ref_paged_attn( @pytest.mark.parametrize("num_heads", NUM_HEADS) @pytest.mark.parametrize("head_size", HEAD_SIZES) @pytest.mark.parametrize("block_size", BLOCK_SIZES) -@pytest.mark.parametrize("sliding_window", [None, 256]) +@pytest.mark.parametrize("sliding_window", [None, 64, 128, 256]) @pytest.mark.parametrize("dtype", DTYPES) @pytest.mark.parametrize("soft_cap", [None, 50.0]) @pytest.mark.parametrize("num_blocks", NUM_BLOCKS) diff --git a/vllm/attention/ops/triton_unified_attention.py b/vllm/attention/ops/triton_unified_attention.py index 591b68bfa646..9e7cafc17428 100644 --- a/vllm/attention/ops/triton_unified_attention.py +++ b/vllm/attention/ops/triton_unified_attention.py @@ -184,8 +184,30 @@ def kernel_unified_attention_2d( # this prefix can be skipped) num_tiles = cdiv_fn(max_seq_prefix_len, TILE_SIZE) - # iterate through tiles - for j in range(0, num_tiles): + # ---- Sliding-window tile pruning -------------------- + # Default: keep previous global behavior + tile_start = 0 + tile_end = num_tiles + if SLIDING_WINDOW > 0: + # Query rows covered by this Q-block + qpos_lo = q_block_local_idx * BLOCK_Q + qpos_hi = tl.minimum( + qpos_lo + (BLOCK_M - 1) // num_queries_per_kv, + cur_batch_query_len - 1, + ) + # For sliding window, each query position q can only attend to + # keys in the range [q_abs - SLIDING_WINDOW + 1, q_abs] + # where q_abs = context_len + q + # The union of allowed key positions for this Q-block is: + # [context_len + qpos_lo - SLIDING_WINDOW + 1, context_len + qpos_hi] + first_allowed_key = context_len + qpos_lo - SLIDING_WINDOW + 1 + last_allowed_key = context_len + qpos_hi + # Convert to tile indices and clamp + tile_start = tl.maximum(0, first_allowed_key // TILE_SIZE) + tile_end = tl.minimum((last_allowed_key // TILE_SIZE) + 1, num_tiles) + + # iterate through tiles (now limited to the sliding window range) + for j in range(tile_start, tile_end): seq_offset = j * TILE_SIZE + offs_t tile_mask = seq_offset < max_seq_prefix_len From 7852b82b93d189a5374d403f9735d6849ba4c64e Mon Sep 17 00:00:00 2001 From: Varun Sundar Rabindranath Date: Fri, 19 Sep 2025 15:14:09 -0400 Subject: [PATCH 162/518] [Bugfix] GPT OSS Attritbute error on H100 (#25228) Signed-off-by: Varun Sundar Rabindranath Co-authored-by: Varun Sundar Rabindranath --- vllm/model_executor/layers/fused_moe/config.py | 4 ++-- vllm/model_executor/layers/quantization/mxfp4.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/vllm/model_executor/layers/fused_moe/config.py b/vllm/model_executor/layers/fused_moe/config.py index 742df3dbdc6a..b14bc06e913c 100644 --- a/vllm/model_executor/layers/fused_moe/config.py +++ b/vllm/model_executor/layers/fused_moe/config.py @@ -1,7 +1,7 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project from dataclasses import dataclass -from typing import TYPE_CHECKING, Optional, Union +from typing import Optional, Union import torch @@ -14,7 +14,7 @@ from vllm.utils import cdiv, has_triton_kernels from vllm.utils.flashinfer import has_flashinfer_cutlass_fused_moe -if TYPE_CHECKING and has_triton_kernels: +if has_triton_kernels(): from triton_kernels.matmul_ogs import PrecisionConfig logger = init_logger(__name__) diff --git a/vllm/model_executor/layers/quantization/mxfp4.py b/vllm/model_executor/layers/quantization/mxfp4.py index 28c1e60ccd08..5c3f8a891276 100644 --- a/vllm/model_executor/layers/quantization/mxfp4.py +++ b/vllm/model_executor/layers/quantization/mxfp4.py @@ -638,8 +638,8 @@ def get_fused_moe_quant_config( return None if self.mxfp4_backend == Mxfp4Backend.TRITON: - w1_scale = layer.w13_precision_config - w2_scale = layer.w2_precision_config + w1_scale = self.w13_precision_config + w2_scale = self.w2_precision_config else: w1_scale = layer.w13_weight_scale w2_scale = layer.w2_weight_scale From 4bdf40021821dfb085e2cbe2f4dc0b7ad7b2e635 Mon Sep 17 00:00:00 2001 From: bnellnm <49004751+bnellnm@users.noreply.github.com> Date: Fri, 19 Sep 2025 15:42:01 -0400 Subject: [PATCH 163/518] [Bugfix] Fix chunked a2_scales in modular kernels (#25264) Signed-off-by: Bill Nell --- .../model_executor/layers/fused_moe/batched_deep_gemm_moe.py | 1 + .../layers/fused_moe/batched_triton_or_deep_gemm_moe.py | 3 ++- vllm/model_executor/layers/fused_moe/cutlass_moe.py | 4 +++- vllm/model_executor/layers/fused_moe/deep_gemm_moe.py | 3 ++- .../layers/fused_moe/flashinfer_cutlass_moe.py | 1 + vllm/model_executor/layers/fused_moe/fused_batched_moe.py | 4 +++- vllm/model_executor/layers/fused_moe/fused_moe.py | 3 ++- .../layers/fused_moe/gpt_oss_triton_kernels_moe.py | 1 + vllm/model_executor/layers/fused_moe/modular_kernel.py | 5 +++++ vllm/model_executor/layers/fused_moe/triton_deep_gemm_moe.py | 2 ++ vllm/model_executor/layers/fused_moe/trtllm_moe.py | 1 + 11 files changed, 23 insertions(+), 5 deletions(-) diff --git a/vllm/model_executor/layers/fused_moe/batched_deep_gemm_moe.py b/vllm/model_executor/layers/fused_moe/batched_deep_gemm_moe.py index e9dfb22bea27..cf0b965cc8c5 100644 --- a/vllm/model_executor/layers/fused_moe/batched_deep_gemm_moe.py +++ b/vllm/model_executor/layers/fused_moe/batched_deep_gemm_moe.py @@ -286,6 +286,7 @@ def apply( global_num_experts: int, expert_map: Optional[torch.Tensor], a1q_scale: Optional[torch.Tensor], + a2_scale: Optional[torch.Tensor], workspace13: torch.Tensor, workspace2: torch.Tensor, expert_tokens_meta: Optional[mk.ExpertTokensMetadata], diff --git a/vllm/model_executor/layers/fused_moe/batched_triton_or_deep_gemm_moe.py b/vllm/model_executor/layers/fused_moe/batched_triton_or_deep_gemm_moe.py index 8b9070f09889..c3c4f4a5d190 100644 --- a/vllm/model_executor/layers/fused_moe/batched_triton_or_deep_gemm_moe.py +++ b/vllm/model_executor/layers/fused_moe/batched_triton_or_deep_gemm_moe.py @@ -126,6 +126,7 @@ def apply( global_num_experts: int, expert_map: Optional[torch.Tensor], a1q_scale: Optional[torch.Tensor], + a2_scale: Optional[torch.Tensor], workspace13: torch.Tensor, workspace2: torch.Tensor, expert_tokens_meta: Optional[mk.ExpertTokensMetadata], @@ -136,5 +137,5 @@ def apply( assert experts is not None experts.apply(output, hidden_states, w1, w2, topk_weights, topk_ids, activation, global_num_experts, expert_map, a1q_scale, - workspace13, workspace2, expert_tokens_meta, + a2_scale, workspace13, workspace2, expert_tokens_meta, apply_router_weight_on_input) diff --git a/vllm/model_executor/layers/fused_moe/cutlass_moe.py b/vllm/model_executor/layers/fused_moe/cutlass_moe.py index 957ffca0d124..8c2ff580575f 100644 --- a/vllm/model_executor/layers/fused_moe/cutlass_moe.py +++ b/vllm/model_executor/layers/fused_moe/cutlass_moe.py @@ -241,6 +241,7 @@ def apply( global_num_experts: int, expert_map: Optional[torch.Tensor], a1q_scale: Optional[torch.Tensor], + a2_scale: Optional[torch.Tensor], workspace13: torch.Tensor, workspace2: torch.Tensor, expert_tokens_meta: Optional[mk.ExpertTokensMetadata], @@ -262,7 +263,7 @@ def apply( run_cutlass_moe_fp8( output, hidden_states, w1, w2, topk_ids, activation_callable, global_num_experts, expert_map, self.w1_scale, self.w2_scale, - a1q_scale, self.a2_scale, self.ab_strides1, self.ab_strides2, + a1q_scale, a2_scale, self.ab_strides1, self.ab_strides2, self.c_strides1, self.c_strides2, workspace13, workspace2, expert_num_tokens, self.out_dtype if self.out_dtype is not None else in_dtype, @@ -705,6 +706,7 @@ def apply( global_num_experts: int, expert_map: Optional[torch.Tensor], a1q_scale: Optional[torch.Tensor], # unused + a2_scale: Optional[torch.Tensor], # unused workspace13: Optional[torch.Tensor], workspace2: Optional[torch.Tensor], expert_tokens_meta: Optional[mk.ExpertTokensMetadata], diff --git a/vllm/model_executor/layers/fused_moe/deep_gemm_moe.py b/vllm/model_executor/layers/fused_moe/deep_gemm_moe.py index 8830b95df7cf..51a4f275e98c 100644 --- a/vllm/model_executor/layers/fused_moe/deep_gemm_moe.py +++ b/vllm/model_executor/layers/fused_moe/deep_gemm_moe.py @@ -214,13 +214,14 @@ def apply( global_num_experts: int, expert_map: Optional[torch.Tensor], a1q_scale: Optional[torch.Tensor], + a2_scale: Optional[torch.Tensor], workspace13: torch.Tensor, workspace2: torch.Tensor, expert_tokens_meta: Optional[mk.ExpertTokensMetadata], apply_router_weight_on_input: bool, ): assert a1q_scale is not None - assert self.a2_scale is None + assert a2_scale is None assert self.block_shape is not None assert self.w1_scale is not None assert self.w2_scale is not None diff --git a/vllm/model_executor/layers/fused_moe/flashinfer_cutlass_moe.py b/vllm/model_executor/layers/fused_moe/flashinfer_cutlass_moe.py index 6eeec18a6ec8..a074da883088 100644 --- a/vllm/model_executor/layers/fused_moe/flashinfer_cutlass_moe.py +++ b/vllm/model_executor/layers/fused_moe/flashinfer_cutlass_moe.py @@ -129,6 +129,7 @@ def apply( global_num_experts: int, expert_map: Optional[torch.Tensor], a1q_scale: Optional[torch.Tensor], + a2_scale: Optional[torch.Tensor], workspace13: Optional[torch.Tensor], workspace2: Optional[torch.Tensor], expert_tokens_meta: Optional[mk.ExpertTokensMetadata], diff --git a/vllm/model_executor/layers/fused_moe/fused_batched_moe.py b/vllm/model_executor/layers/fused_moe/fused_batched_moe.py index fe6ac458a959..660bae314602 100644 --- a/vllm/model_executor/layers/fused_moe/fused_batched_moe.py +++ b/vllm/model_executor/layers/fused_moe/fused_batched_moe.py @@ -688,6 +688,7 @@ def apply( global_num_experts: int, expert_map: Optional[torch.Tensor], a1q_scale: Optional[torch.Tensor], + a2_scale: Optional[torch.Tensor], workspace13: torch.Tensor, workspace2: torch.Tensor, expert_tokens_meta: Optional[mk.ExpertTokensMetadata], @@ -879,6 +880,7 @@ def apply( global_num_experts: int, expert_map: Optional[torch.Tensor], a1q_scale: Optional[torch.Tensor], + a2_scale: Optional[torch.Tensor], workspace13: torch.Tensor, workspace2: torch.Tensor, expert_tokens_meta: Optional[mk.ExpertTokensMetadata], @@ -970,7 +972,7 @@ def apply( intermediate_cache1.view(-1, N)) qintermediate_cache2, a2q_scale = batched_moe_kernel_quantize_input( - intermediate_cache2, self.a2_scale, max_num_tokens, E, N, + intermediate_cache2, a2_scale, max_num_tokens, E, N, expert_num_tokens, self.quant_dtype, self.per_act_token_quant, self.block_shape) diff --git a/vllm/model_executor/layers/fused_moe/fused_moe.py b/vllm/model_executor/layers/fused_moe/fused_moe.py index d4de3f640865..6c2a5bda7cba 100644 --- a/vllm/model_executor/layers/fused_moe/fused_moe.py +++ b/vllm/model_executor/layers/fused_moe/fused_moe.py @@ -1598,6 +1598,7 @@ def apply( global_num_experts: int, expert_map: Optional[torch.Tensor], a1q_scale: Optional[torch.Tensor], + a2_scale: Optional[torch.Tensor], workspace13: torch.Tensor, workspace2: torch.Tensor, expert_tokens_meta: Optional[mk.ExpertTokensMetadata], @@ -1690,7 +1691,7 @@ def apply( a2q_scale: Optional[torch.Tensor] = None qintermediate_cache2, a2q_scale = moe_kernel_quantize_input( - intermediate_cache2, self.a2_scale, self.quant_dtype, + intermediate_cache2, a2_scale, self.quant_dtype, self.per_act_token_quant, self.block_shape) invoke_fused_moe_kernel( diff --git a/vllm/model_executor/layers/fused_moe/gpt_oss_triton_kernels_moe.py b/vllm/model_executor/layers/fused_moe/gpt_oss_triton_kernels_moe.py index 614a83ad1158..08a9b34a4245 100644 --- a/vllm/model_executor/layers/fused_moe/gpt_oss_triton_kernels_moe.py +++ b/vllm/model_executor/layers/fused_moe/gpt_oss_triton_kernels_moe.py @@ -179,6 +179,7 @@ def apply( global_num_experts: int, expert_map: Optional[torch.Tensor], a1q_scale: Optional[torch.Tensor], + a2_scale: Optional[torch.Tensor], workspace13: torch.Tensor, workspace2: torch.Tensor, expert_tokens_meta: Optional[mk.ExpertTokensMetadata], diff --git a/vllm/model_executor/layers/fused_moe/modular_kernel.py b/vllm/model_executor/layers/fused_moe/modular_kernel.py index 729f8e39cf0f..a16c254fadf6 100644 --- a/vllm/model_executor/layers/fused_moe/modular_kernel.py +++ b/vllm/model_executor/layers/fused_moe/modular_kernel.py @@ -519,6 +519,7 @@ def apply( global_num_experts: int, expert_map: Optional[torch.Tensor], a1q_scale: Optional[torch.Tensor], + a2_scale: Optional[torch.Tensor], workspace13: torch.Tensor, workspace2: torch.Tensor, expert_tokens_meta: Optional[ExpertTokensMetadata], @@ -634,6 +635,7 @@ def _do_fused_experts( local_num_experts: int, expert_map: Optional[torch.Tensor], a1q_scale: Optional[torch.Tensor], + a2_scale: Optional[torch.Tensor], expert_tokens_meta: Optional[ExpertTokensMetadata], apply_router_weight_on_input: bool, ) -> torch.Tensor: @@ -671,6 +673,7 @@ def _do_fused_experts( global_num_experts=global_num_experts, expert_map=expert_map, a1q_scale=a1q_scale, + a2_scale=a2_scale, workspace13=workspace13, workspace2=workspace2, expert_tokens_meta=expert_tokens_meta, @@ -718,6 +721,7 @@ def _maybe_chunk_fused_experts( local_num_experts=local_num_experts, expert_map=expert_map, a1q_scale=a1q_scale, + a2_scale=self.fused_experts.a2_scale, expert_tokens_meta=expert_tokens_meta, apply_router_weight_on_input=apply_router_weight_on_input, ) @@ -803,6 +807,7 @@ def slice_expert_tokens_metadata( local_num_experts=local_num_experts, expert_map=expert_map, a1q_scale=c_a1q_scale, + a2_scale=c_a2_scale, expert_tokens_meta=c_expert_tokens_meta, apply_router_weight_on_input=apply_router_weight_on_input, ) diff --git a/vllm/model_executor/layers/fused_moe/triton_deep_gemm_moe.py b/vllm/model_executor/layers/fused_moe/triton_deep_gemm_moe.py index b2dbc306a614..3de80ff85747 100644 --- a/vllm/model_executor/layers/fused_moe/triton_deep_gemm_moe.py +++ b/vllm/model_executor/layers/fused_moe/triton_deep_gemm_moe.py @@ -111,6 +111,7 @@ def apply( global_num_experts: int, expert_map: Optional[torch.Tensor], a1q_scale: Optional[torch.Tensor], + a2_scale: Optional[torch.Tensor], workspace13: torch.Tensor, workspace2: torch.Tensor, expert_tokens_meta: Optional[mk.ExpertTokensMetadata], @@ -134,6 +135,7 @@ def apply( global_num_experts, expert_map, a1q_scale, + a2_scale, workspace13, workspace2, expert_tokens_meta, diff --git a/vllm/model_executor/layers/fused_moe/trtllm_moe.py b/vllm/model_executor/layers/fused_moe/trtllm_moe.py index 8e5f6acc9df6..05ed93c942c8 100644 --- a/vllm/model_executor/layers/fused_moe/trtllm_moe.py +++ b/vllm/model_executor/layers/fused_moe/trtllm_moe.py @@ -103,6 +103,7 @@ def apply( global_num_experts: int, expert_map: Optional[torch.Tensor], a1q_scale: Optional[torch.Tensor], + a2_scale: Optional[torch.Tensor], workspace13: torch.Tensor, workspace2: torch.Tensor, expert_tokens_meta: Optional[mk.ExpertTokensMetadata], From e57fc15971dd3c3aff9b2bdcb43741b5f34c329e Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Fri, 19 Sep 2025 20:43:33 +0100 Subject: [PATCH 164/518] Specify platform in `pip-compile` `pre-commit` hook so it runs on MacOS (#25273) Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- .pre-commit-config.yaml | 2 +- requirements/test.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 13ad3af97d83..a4ea888af3f3 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -49,7 +49,7 @@ repos: rev: 0.6.17 hooks: - id: pip-compile - args: [requirements/test.in, -o, requirements/test.txt, --index-strategy, unsafe-best-match, --torch-backend, cu128] + args: [requirements/test.in, -o, requirements/test.txt, --index-strategy, unsafe-best-match, --torch-backend, cu128, --python-platform, x86_64-manylinux_2_28] files: ^requirements/test\.(in|txt)$ - repo: local hooks: diff --git a/requirements/test.txt b/requirements/test.txt index 39040f210b2f..3519aa524f41 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -1,5 +1,5 @@ # This file was autogenerated by uv via the following command: -# uv pip compile requirements/test.in -o requirements/test.txt --index-strategy unsafe-best-match --torch-backend cu128 +# uv pip compile requirements/test.in -o requirements/test.txt --index-strategy unsafe-best-match --torch-backend cu128 --python-platform x86_64-manylinux_2_28 absl-py==2.1.0 # via rouge-score accelerate==1.0.1 From 48ecb4438b2845f757edf228c5455ca6095938af Mon Sep 17 00:00:00 2001 From: Michael Goin Date: Fri, 19 Sep 2025 16:06:49 -0400 Subject: [PATCH 165/518] [Perf] Use FlashInfer RoPE for RotaryEmbedding.forward_cuda when available (#21126) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: mgoin Signed-off-by: Michael Goin Co-authored-by: Luka Govedič --- .../layers/rotary_embedding/base.py | 38 +++++++++++---- .../layers/rotary_embedding/common.py | 46 +++++++++++++++++++ .../rotary_embedding/deepseek_scaling_rope.py | 4 +- .../rotary_embedding/llama4_vision_rope.py | 2 +- .../layers/rotary_embedding/mrope.py | 2 + 5 files changed, 78 insertions(+), 14 deletions(-) diff --git a/vllm/model_executor/layers/rotary_embedding/base.py b/vllm/model_executor/layers/rotary_embedding/base.py index db50eb08db3f..3dc249ae9adb 100644 --- a/vllm/model_executor/layers/rotary_embedding/base.py +++ b/vllm/model_executor/layers/rotary_embedding/base.py @@ -6,6 +6,8 @@ import torch from vllm.model_executor.custom_op import CustomOp +from vllm.platforms import current_platform +from vllm.utils.flashinfer import has_flashinfer from .common import apply_rotary_emb_torch @@ -30,9 +32,17 @@ def __init__( self.base = base self.is_neox_style = is_neox_style self.dtype = dtype + # Flashinfer only supports head_size=64, 128, 256, 512. + # https://github.com/flashinfer-ai/flashinfer/blob/ebfd655efe830048dba5d582aaa61d61d1cf9a87/include/flashinfer/utils.cuh#L174-L202 + self.use_flashinfer = (self.enabled() + and dtype in (torch.float16, torch.bfloat16) + and current_platform.is_cuda() + and has_flashinfer() + and self.head_size in [64, 128, 256, 512]) cache = self._compute_cos_sin_cache() - cache = cache.to(dtype) + if not self.use_flashinfer: + cache = cache.to(dtype) self.cos_sin_cache: torch.Tensor self.register_buffer("cos_sin_cache", cache, persistent=False) @@ -57,6 +67,14 @@ def _compute_cos_sin_cache(self) -> torch.Tensor: cache = torch.cat((cos, sin), dim=-1) return cache + def _match_cos_sin_cache_dtype(self, query: torch.Tensor) -> None: + # __setattr__ in nn.Module (called by `self.cos_sin_cache = ...`) + # is expensive, so avoid calling it if possible + if self.cos_sin_cache.device != query.device or \ + self.cos_sin_cache.dtype != query.dtype: + self.cos_sin_cache = self.cos_sin_cache.to(query.device, + dtype=query.dtype) + def forward_native( self, positions: torch.Tensor, @@ -94,15 +112,16 @@ def forward_cuda( query: torch.Tensor, key: Optional[torch.Tensor] = None, ) -> tuple[torch.Tensor, Optional[torch.Tensor]]: - from vllm import _custom_ops as ops + if self.use_flashinfer: + torch.ops.vllm.flashinfer_rotary_embedding(positions, query, key, + self.head_size, + self.cos_sin_cache, + self.is_neox_style) + return query, key - # __setattr__ in nn.Module (called by `self.cos_sin_cache = ...`) - # is expensive, so avoid calling it if possible - if self.cos_sin_cache.device != query.device or \ - self.cos_sin_cache.dtype != query.dtype: - self.cos_sin_cache = self.cos_sin_cache.to(query.device, - dtype=query.dtype) + from vllm import _custom_ops as ops + self._match_cos_sin_cache_dtype(query) # ops.rotary_embedding() is an in-place operation # that updates the query and key tensors. ops.rotary_embedding(positions, query, key, self.head_size, @@ -117,8 +136,7 @@ def forward_xpu( ) -> tuple[torch.Tensor, Optional[torch.Tensor]]: from vllm._ipex_ops import ipex_ops as ops - self.cos_sin_cache = self.cos_sin_cache.to(positions.device, - dtype=query.dtype) + self._match_cos_sin_cache_dtype(query) # ops.rotary_embedding() is an in-place operation # that updates the query and key tensors. if key is None: diff --git a/vllm/model_executor/layers/rotary_embedding/common.py b/vllm/model_executor/layers/rotary_embedding/common.py index 8d821bea19e3..e3cd0a8e788e 100644 --- a/vllm/model_executor/layers/rotary_embedding/common.py +++ b/vllm/model_executor/layers/rotary_embedding/common.py @@ -6,6 +6,7 @@ import torch from vllm.platforms import current_platform +from vllm.utils import direct_register_custom_op if current_platform.is_cuda(): from vllm.vllm_flash_attn.layers.rotary import apply_rotary_emb @@ -103,3 +104,48 @@ def yarn_get_mscale(scale: float = 1) -> float: if scale <= 1: return 1.0 return 0.1 * math.log(scale) + 1.0 + + +def _flashinfer_rotary_embedding( + positions: torch.Tensor, + query: torch.Tensor, + key: torch.Tensor, + head_size: int, + cos_sin_cache: torch.Tensor, + is_neox: bool, +) -> None: + """Custom op wrapper for flashinfer's rotary embedding. + + This is an in-place operation that modifies query and key tensors directly. + """ + from flashinfer.rope import apply_rope_with_cos_sin_cache_inplace + + apply_rope_with_cos_sin_cache_inplace( + positions=positions, + query=query, + key=key, + head_size=head_size, + cos_sin_cache=cos_sin_cache, + is_neox=is_neox, + ) + + +def _flashinfer_rotary_embedding_fake( + positions: torch.Tensor, + query: torch.Tensor, + key: torch.Tensor, + head_size: int, + cos_sin_cache: torch.Tensor, + is_neox: bool, +) -> None: + return + + +# Register flashinfer rotary embedding custom op +direct_register_custom_op( + op_name="flashinfer_rotary_embedding", + op_func=_flashinfer_rotary_embedding, + mutates_args=["query", "key"], # These tensors are modified in-place + fake_impl=_flashinfer_rotary_embedding_fake, + dispatch_key=current_platform.dispatch_key, +) diff --git a/vllm/model_executor/layers/rotary_embedding/deepseek_scaling_rope.py b/vllm/model_executor/layers/rotary_embedding/deepseek_scaling_rope.py index 7ac2e4bb6c34..736ec2c1dd3a 100644 --- a/vllm/model_executor/layers/rotary_embedding/deepseek_scaling_rope.py +++ b/vllm/model_executor/layers/rotary_embedding/deepseek_scaling_rope.py @@ -97,15 +97,13 @@ def forward_native( ) -> tuple[torch.Tensor, Optional[torch.Tensor]]: """PyTorch-native implementation equivalent to forward().""" assert key is not None + self._match_cos_sin_cache_dtype(query) query_rot = query[..., :self.rotary_dim] key_rot = key[..., :self.rotary_dim] if self.rotary_dim < self.head_size: query_pass = query[..., self.rotary_dim:] key_pass = key[..., self.rotary_dim:] - if self.cos_sin_cache.device != positions.device: - self.cos_sin_cache: torch.Tensor = self.cos_sin_cache.to( - positions.device) cos_sin = self.cos_sin_cache[torch.add(positions, offsets) if offsets is not None else positions] cos, sin = cos_sin.chunk(2, dim=-1) diff --git a/vllm/model_executor/layers/rotary_embedding/llama4_vision_rope.py b/vllm/model_executor/layers/rotary_embedding/llama4_vision_rope.py index 37ead43e22bc..871728035306 100644 --- a/vllm/model_executor/layers/rotary_embedding/llama4_vision_rope.py +++ b/vllm/model_executor/layers/rotary_embedding/llama4_vision_rope.py @@ -59,7 +59,7 @@ def forward_native( # type: ignore[override] key: Optional[torch.Tensor] = None, ) -> tuple[torch.Tensor, Optional[torch.Tensor]]: assert key is not None - self.cos_sin_cache: torch.Tensor = self.cos_sin_cache.to(query.device) + self._match_cos_sin_cache_dtype(query) query_ = torch.view_as_complex(query.float().reshape( *query.shape[:-1], -1, 2)) key_ = torch.view_as_complex(key.float().reshape( diff --git a/vllm/model_executor/layers/rotary_embedding/mrope.py b/vllm/model_executor/layers/rotary_embedding/mrope.py index ccc59bbbe233..17d04a1ad715 100644 --- a/vllm/model_executor/layers/rotary_embedding/mrope.py +++ b/vllm/model_executor/layers/rotary_embedding/mrope.py @@ -245,6 +245,7 @@ def forward_native( assert positions.ndim == 1 or positions.ndim == 2 assert key is not None + self._match_cos_sin_cache_dtype(query) num_tokens = positions.shape[-1] cos_sin = self.cos_sin_cache[positions] cos, sin = cos_sin.chunk(2, dim=-1) @@ -293,6 +294,7 @@ def forward_cuda( assert positions.ndim == 1 or positions.ndim == 2 assert key is not None + self._match_cos_sin_cache_dtype(query) num_tokens = positions.shape[-1] cos_sin = self.cos_sin_cache[positions] cos, sin = cos_sin.chunk(2, dim=-1) From b1a63d1b3be996babec6411e3abe559796f76ca9 Mon Sep 17 00:00:00 2001 From: nvjullin Date: Sat, 20 Sep 2025 04:36:34 +0800 Subject: [PATCH 166/518] [BugFix] Make FlashInferMetadataBuilder non-blocking (#25040) Signed-off-by: Julien Lin Co-authored-by: Michael Goin --- vllm/v1/attention/backends/flashinfer.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/vllm/v1/attention/backends/flashinfer.py b/vllm/v1/attention/backends/flashinfer.py index dda6dd4fbea7..cb092aa74e7f 100755 --- a/vllm/v1/attention/backends/flashinfer.py +++ b/vllm/v1/attention/backends/flashinfer.py @@ -585,9 +585,10 @@ def build(self, kv_data_type=self.kv_cache_dtype, ) else: - attn_metadata.qo_indptr_gpu = qo_indptr_cpu.to(self.device) + attn_metadata.qo_indptr_gpu = qo_indptr_cpu.to( + self.device, non_blocking=True) attn_metadata.paged_kv_indptr_gpu = paged_kv_indptr_cpu.to( - self.device) + self.device, non_blocking=True) if num_decodes > 0: pure_decode = num_prefills == 0 From ddc9048394ae6294d0db7fd67270efea59c3a065 Mon Sep 17 00:00:00 2001 From: David-Wen <18927700430@163.com> Date: Sat, 20 Sep 2025 04:44:24 +0800 Subject: [PATCH 167/518] Fix: Correct FusedMoE layer reference in auto_round quantization (#24818) Signed-off-by: David-Wen <18927700430@163.com> Signed-off-by: Michael Goin Co-authored-by: Wentao Ye <44945378+yewentao256@users.noreply.github.com> Co-authored-by: Michael Goin Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- vllm/model_executor/layers/quantization/auto_round.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm/model_executor/layers/quantization/auto_round.py b/vllm/model_executor/layers/quantization/auto_round.py index 1ca92273430d..bf5141fa4894 100644 --- a/vllm/model_executor/layers/quantization/auto_round.py +++ b/vllm/model_executor/layers/quantization/auto_round.py @@ -241,7 +241,7 @@ def apply_awq_quant_layer(self, layer, prefix: str, backend: str = "auto"): if isinstance(layer, FusedMoE): if use_marlin: - return AWQMoEMethod(quant_args_marlin, layer.moe) + return AWQMoEMethod(quant_args_marlin, layer.moe_config) from vllm.model_executor.layers.quantization.moe_wna16 import ( MoeWNA16Config) @@ -327,7 +327,7 @@ def apply_gptq_quant_layer(self, if isinstance(layer, FusedMoE): if use_marlin: - return GPTQMarlinMoEMethod(quant_args_marlin, layer.moe) + return GPTQMarlinMoEMethod(quant_args_marlin, layer.moe_config) else: from vllm.model_executor.layers.quantization.moe_wna16 import ( MoeWNA16Config) From e69e0b8b5fc5ef2958f7b1fc159119e9c4c0e2d2 Mon Sep 17 00:00:00 2001 From: Alec S <10566873+alecsolder@users.noreply.github.com> Date: Fri, 19 Sep 2025 17:40:16 -0400 Subject: [PATCH 168/518] [Frontend] Responses API messages out, just harmony for now (#24985) Signed-off-by: Alec Solder Co-authored-by: Alec Solder Co-authored-by: Ye (Charlotte) Qi --- .../openai/test_response_api_with_harmony.py | 15 +++++++++++++++ vllm/entrypoints/openai/protocol.py | 17 ++++++++++++++++- vllm/entrypoints/openai/serving_responses.py | 13 +++++++++++++ 3 files changed, 44 insertions(+), 1 deletion(-) diff --git a/tests/entrypoints/openai/test_response_api_with_harmony.py b/tests/entrypoints/openai/test_response_api_with_harmony.py index 40a22c04b08a..f3c3148577b8 100644 --- a/tests/entrypoints/openai/test_response_api_with_harmony.py +++ b/tests/entrypoints/openai/test_response_api_with_harmony.py @@ -744,3 +744,18 @@ async def test_function_calling_full_history(client: OpenAI, model_name: str): assert response_2 is not None assert response_2.status == "completed" assert response_2.output_text is not None + + +@pytest.mark.asyncio +@pytest.mark.parametrize("model_name", [MODEL_NAME]) +async def test_output_messages_enabled(client: OpenAI, model_name: str, + server): + response = await client.responses.create( + model=model_name, + input="What is the capital of South Korea?", + extra_body={"enable_response_messages": True}) + + assert response is not None + assert response.status == "completed" + assert len(response.input_messages) > 0 + assert len(response.output_messages) > 0 diff --git a/vllm/entrypoints/openai/protocol.py b/vllm/entrypoints/openai/protocol.py index 05d5d6d964dd..c30681318f69 100644 --- a/vllm/entrypoints/openai/protocol.py +++ b/vllm/entrypoints/openai/protocol.py @@ -328,6 +328,13 @@ class ResponsesRequest(OpenAIBaseModel): "access by 3rd parties, and long enough to be " "unpredictable (e.g., 43 characters base64-encoded, corresponding " "to 256 bit). Not supported by vLLM engine V0.")) + + enable_response_messages: bool = Field( + default=False, + description=( + "Dictates whether or not to return messages as part of the " + "response object. Currently only supported for non-streaming " + "non-background and gpt-oss only. ")) # --8<-- [end:responses-extra-params] _DEFAULT_SAMPLING_PARAMS = { @@ -1831,6 +1838,11 @@ class ResponsesResponse(OpenAIBaseModel): model: str object: Literal["response"] = "response" output: list[ResponseOutputItem] + # These are populated when enable_response_messages is set to True + # TODO: Currently an issue where content of harmony messages + # is not available when these are serialized. Metadata is available + input_messages: Optional[list[ChatCompletionMessageParam]] = None + output_messages: Optional[list[ChatCompletionMessageParam]] = None parallel_tool_calls: bool temperature: float tool_choice: ToolChoice @@ -1860,6 +1872,8 @@ def from_request( output: list[ResponseOutputItem], status: ResponseStatus, usage: Optional[ResponseUsage] = None, + input_messages: Optional[list[ChatCompletionMessageParam]] = None, + output_messages: Optional[list[ChatCompletionMessageParam]] = None, ) -> "ResponsesResponse": incomplete_details: Optional[IncompleteDetails] = None @@ -1868,7 +1882,6 @@ def from_request( # TODO: implement the other reason for incomplete_details, # which is content_filter # incomplete_details = IncompleteDetails(reason='content_filter') - return cls( id=request.request_id, created_at=created_time, @@ -1877,6 +1890,8 @@ def from_request( metadata=request.metadata, model=model_name, output=output, + input_messages=input_messages, + output_messages=output_messages, parallel_tool_calls=request.parallel_tool_calls, temperature=sampling_params.temperature, tool_choice=request.tool_choice, diff --git a/vllm/entrypoints/openai/serving_responses.py b/vllm/entrypoints/openai/serving_responses.py index 4894623aeac2..6e243671af24 100644 --- a/vllm/entrypoints/openai/serving_responses.py +++ b/vllm/entrypoints/openai/serving_responses.py @@ -475,9 +475,14 @@ async def responses_full_generator( # "completed" is implemented as the "catch-all" for now. status: ResponseStatus = "completed" + input_messages = None + output_messages = None if self.use_harmony: assert isinstance(context, HarmonyContext) output = self._make_response_output_items_with_harmony(context) + if request.enable_response_messages: + input_messages = context.messages[:context.num_init_messages] + output_messages = context.messages[context.num_init_messages:] num_tool_output_tokens = context.num_tool_output_tokens if len(output) > 0: if context.finish_reason == "length": @@ -496,6 +501,12 @@ async def responses_full_generator( output = self._make_response_output_items(request, final_output, tokenizer) + # TODO: context for non-gptoss models doesn't use messages + # so we can't get them out yet + if request.enable_response_messages: + raise NotImplementedError( + "enable_response_messages is currently" + " only supported for gpt-oss") # Calculate usage. assert final_res.prompt_token_ids is not None num_tool_output_tokens = 0 @@ -519,6 +530,8 @@ async def responses_full_generator( response = ResponsesResponse.from_request( request, sampling_params, + input_messages=input_messages, + output_messages=output_messages, model_name=model_name, created_time=created_time, output=output, From 711e912946d23f4ccc1f554b1524c960553c5e28 Mon Sep 17 00:00:00 2001 From: Wentao Ye <44945378+yewentao256@users.noreply.github.com> Date: Fri, 19 Sep 2025 18:23:19 -0400 Subject: [PATCH 169/518] [Compile] Fix Compile Warning for Ignoring `MIN_BLOCK_PER_SM` (#25193) Signed-off-by: yewentao256 --- csrc/launch_bounds_utils.h | 38 +++++++++++++++++++ .../activation_nvfp4_quant_fusion_kernels.cu | 6 ++- csrc/quantization/fp4/nvfp4_experts_quant.cu | 10 +++-- csrc/quantization/fp4/nvfp4_quant_kernels.cu | 8 ++-- 4 files changed, 53 insertions(+), 9 deletions(-) create mode 100644 csrc/launch_bounds_utils.h diff --git a/csrc/launch_bounds_utils.h b/csrc/launch_bounds_utils.h new file mode 100644 index 000000000000..d5a89690111b --- /dev/null +++ b/csrc/launch_bounds_utils.h @@ -0,0 +1,38 @@ +#pragma once + +#include +#include + +// maximum blocks per SM cap +#ifndef VLLM_LAUNCH_BLOCKS_CAP + #define VLLM_LAUNCH_BLOCKS_CAP 4 +#endif + +// compile-time estimate of max threads per SM for launch bounds. +#ifndef VLLM_MAX_THREADS_PER_SM + #if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 300 + #define VLLM_MAX_THREADS_PER_SM 1536 + #else + #define VLLM_MAX_THREADS_PER_SM 2048 + #endif +#endif + +// compute the number of blocks per SM to request in __launch_bounds__ +#define VLLM_BLOCKS_DIV(VAL) (VLLM_MAX_THREADS_PER_SM / (VAL)) +#define VLLM_CLAMP_BLOCKS_PER_SM(VAL) \ + (((VAL) <= 0) \ + ? 1 \ + : (((VAL) < VLLM_LAUNCH_BLOCKS_CAP) ? (VAL) : VLLM_LAUNCH_BLOCKS_CAP)) +#define VLLM_BLOCKS_PER_SM(BLOCK_THREADS) \ + VLLM_CLAMP_BLOCKS_PER_SM(VLLM_BLOCKS_DIV(BLOCK_THREADS)) + +// runtime-time helper to compute blocks/SM +static inline int vllm_runtime_blocks_per_sm(int block_threads) { + int device = -1; + cudaGetDevice(&device); + int max_threads_per_sm = VLLM_MAX_THREADS_PER_SM; + cudaDeviceGetAttribute(&max_threads_per_sm, + cudaDevAttrMaxThreadsPerMultiProcessor, device); + int blocks = (block_threads > 0) ? (max_threads_per_sm / block_threads) : 1; + return VLLM_CLAMP_BLOCKS_PER_SM(blocks); +} diff --git a/csrc/quantization/fp4/activation_nvfp4_quant_fusion_kernels.cu b/csrc/quantization/fp4/activation_nvfp4_quant_fusion_kernels.cu index 74fde23782ce..7539f836ecf3 100644 --- a/csrc/quantization/fp4/activation_nvfp4_quant_fusion_kernels.cu +++ b/csrc/quantization/fp4/activation_nvfp4_quant_fusion_kernels.cu @@ -26,6 +26,7 @@ #include "dispatch_utils.h" #include "cuda_utils.h" +#include "launch_bounds_utils.h" #include "nvfp4_utils.cuh" namespace vllm { @@ -63,7 +64,7 @@ __inline__ __device__ PackedVec compute_silu_mul(PackedVec& vec, // Use UE4M3 by default. template -__global__ void __launch_bounds__(1024, 4) +__global__ void __launch_bounds__(1024, VLLM_BLOCKS_PER_SM(1024)) silu_mul_cvt_fp16_to_fp4(int32_t numRows, int32_t numCols, Type const* in, float const* SFScale, uint32_t* out, uint32_t* SFout) { @@ -131,7 +132,8 @@ void silu_and_mul_nvfp4_quant_sm1xxa(torch::Tensor& output, // [..., d] const at::cuda::OptionalCUDAGuard device_guard(device_of(input)); auto stream = at::cuda::getCurrentCUDAStream(input.get_device()); dim3 block(std::min(int(n / ELTS_PER_THREAD), 1024)); - int const numBlocksPerSM = 2048 / block.x; + int const numBlocksPerSM = + vllm_runtime_blocks_per_sm(static_cast(block.x)); dim3 grid(std::min(int(m), multiProcessorCount * numBlocksPerSM)); VLLM_DISPATCH_HALF_TYPES( diff --git a/csrc/quantization/fp4/nvfp4_experts_quant.cu b/csrc/quantization/fp4/nvfp4_experts_quant.cu index ce3ba2c19b9e..6d385e0dd94e 100644 --- a/csrc/quantization/fp4/nvfp4_experts_quant.cu +++ b/csrc/quantization/fp4/nvfp4_experts_quant.cu @@ -26,12 +26,13 @@ #include "dispatch_utils.h" #include "nvfp4_utils.cuh" +#include "launch_bounds_utils.h" namespace vllm { // Use UE4M3 by default. template -__global__ void __launch_bounds__(512, 4) +__global__ void __launch_bounds__(512, VLLM_BLOCKS_PER_SM(512)) cvt_fp16_to_fp4(int32_t numRows, int32_t numCols, Type const* in, float const* SFScale, uint32_t* out, uint32_t* SFout, uint32_t* input_offset_by_experts, @@ -129,7 +130,7 @@ __global__ void __launch_bounds__(512, 4) // Kernel for LARGE_M_TOPK = true (large m_topk optimized version) template -__global__ void __launch_bounds__(1024, 4) +__global__ void __launch_bounds__(1024, VLLM_BLOCKS_PER_SM(1024)) cvt_fp16_to_fp4(int32_t numRows, int32_t numCols, Type const* in, float const* SFScale, uint32_t* out, uint32_t* SFout, uint32_t* input_offset_by_experts, @@ -233,8 +234,9 @@ void quant_impl(void* output, void* output_scale, void* input, int const workSizePerRow = k / ELTS_PER_THREAD; int const totalWorkSize = m_topk * workSizePerRow; dim3 block(std::min(workSizePerRow, 512)); - // Get number of blocks per SM (assume we can fully utilize the SM). - int const numBlocksPerSM = 2048 / block.x; + // Get number of blocks per SM + int const numBlocksPerSM = + vllm_runtime_blocks_per_sm(static_cast(block.x)); dim3 grid(std::min(static_cast((totalWorkSize + block.x - 1) / block.x), multiProcessorCount * numBlocksPerSM)); while (grid.x <= multiProcessorCount && block.x > 64) { diff --git a/csrc/quantization/fp4/nvfp4_quant_kernels.cu b/csrc/quantization/fp4/nvfp4_quant_kernels.cu index 0c1b9ef0664d..5575ee8e4197 100644 --- a/csrc/quantization/fp4/nvfp4_quant_kernels.cu +++ b/csrc/quantization/fp4/nvfp4_quant_kernels.cu @@ -26,13 +26,14 @@ #include "dispatch_utils.h" #include "cuda_utils.h" +#include "launch_bounds_utils.h" #include "nvfp4_utils.cuh" namespace vllm { // Use UE4M3 by default. template -__global__ void __launch_bounds__(512, 4) +__global__ void __launch_bounds__(512, VLLM_BLOCKS_PER_SM(512)) cvt_fp16_to_fp4(int32_t numRows, int32_t numCols, Type const* in, float const* SFScale, uint32_t* out, uint32_t* SFout) { using PackedVec = PackedVec; @@ -75,8 +76,9 @@ void invokeFP4Quantization(int m, int n, T const* input, float const* SFScale, // Grid, Block size. // Each thread converts 8 values. dim3 block(std::min(int(n / ELTS_PER_THREAD), 512)); - // Get number of blocks per SM (assume we can fully utilize the SM). - int const numBlocksPerSM = 2048 / block.x; + // Get number of blocks per SM + int const numBlocksPerSM = + vllm_runtime_blocks_per_sm(static_cast(block.x)); dim3 grid(std::min(int(m), multiProcessorCount * numBlocksPerSM)); // Launch the cvt kernel. From 431535b522c62f1422848e22dfd83bec2d04111a Mon Sep 17 00:00:00 2001 From: Zhiyu Date: Fri, 19 Sep 2025 15:40:33 -0700 Subject: [PATCH 170/518] Enable modelopt gemma3 nvfp4/fp8, make workflow more robust (#22771) Signed-off-by: Zhiyu Cheng Signed-off-by: Michael Goin Co-authored-by: Michael Goin --- .../moe/test_modular_kernel_combinations.py | 3 +- vllm/compilation/backends.py | 7 ++- vllm/config/model.py | 3 ++ .../fused_moe/gpt_oss_triton_kernels_moe.py | 4 +- .../layers/quantization/modelopt.py | 53 +++++++++++++------ vllm/model_executor/models/gemma3.py | 16 ++++++ vllm/model_executor/models/siglip.py | 18 ++++++- 7 files changed, 82 insertions(+), 22 deletions(-) diff --git a/tests/kernels/moe/test_modular_kernel_combinations.py b/tests/kernels/moe/test_modular_kernel_combinations.py index 19c4301bd23d..1c7e62d7aa4c 100644 --- a/tests/kernels/moe/test_modular_kernel_combinations.py +++ b/tests/kernels/moe/test_modular_kernel_combinations.py @@ -11,7 +11,8 @@ import torch import vllm.model_executor.layers.fused_moe.modular_kernel as mk -from vllm.config import VllmConfig, current_platform, set_current_vllm_config +from vllm.config import VllmConfig, set_current_vllm_config +from vllm.platforms import current_platform from vllm.utils import has_deep_ep, has_deep_gemm, has_pplx from vllm.utils.flashinfer import has_flashinfer_cutlass_fused_moe diff --git a/vllm/compilation/backends.py b/vllm/compilation/backends.py index 3cc0fc3106f5..d6bdb31a3c63 100644 --- a/vllm/compilation/backends.py +++ b/vllm/compilation/backends.py @@ -31,8 +31,11 @@ def make_compiler(compilation_config: CompilationConfig) -> CompilerInterface: if compilation_config.use_inductor: - if envs.VLLM_USE_STANDALONE_COMPILE and is_torch_equal_or_newer( - "2.8.0.dev"): + # Use standalone compile only if requested, version is new enough, + # and the symbol actually exists in this PyTorch build. + if (envs.VLLM_USE_STANDALONE_COMPILE + and is_torch_equal_or_newer("2.8.0.dev") + and hasattr(torch._inductor, "standalone_compile")): logger.debug("Using InductorStandaloneAdaptor") return InductorStandaloneAdaptor() else: diff --git a/vllm/config/model.py b/vllm/config/model.py index 21457d3660a2..4e847922b61e 100644 --- a/vllm/config/model.py +++ b/vllm/config/model.py @@ -964,6 +964,9 @@ def _verify_quantization(self) -> None: "modelopt", "modelopt_fp4", "petit_nvfp4", + # Ensure heavy backends are probed last to avoid unnecessary + # imports during override detection (e.g., MXFP4 imports Triton) + "mxfp4", ] quantization_methods = [ q for q in supported_quantization if q not in overrides diff --git a/vllm/model_executor/layers/fused_moe/gpt_oss_triton_kernels_moe.py b/vllm/model_executor/layers/fused_moe/gpt_oss_triton_kernels_moe.py index 08a9b34a4245..f12d3807517f 100644 --- a/vllm/model_executor/layers/fused_moe/gpt_oss_triton_kernels_moe.py +++ b/vllm/model_executor/layers/fused_moe/gpt_oss_triton_kernels_moe.py @@ -20,10 +20,10 @@ from triton_kernels.matmul_ogs import (FnSpecs, FusedActivation, matmul_ogs) from triton_kernels.routing import routing - except ModuleNotFoundError: + except (ModuleNotFoundError, AttributeError) as e: logger.error( "Failed to import Triton kernels. Please make sure your triton " - "version is compatible.") + "version is compatible. Error: %s", e) def triton_kernel_moe_forward( diff --git a/vllm/model_executor/layers/quantization/modelopt.py b/vllm/model_executor/layers/quantization/modelopt.py index 7eac40825ac3..1083f398a3a2 100644 --- a/vllm/model_executor/layers/quantization/modelopt.py +++ b/vllm/model_executor/layers/quantization/modelopt.py @@ -160,6 +160,7 @@ def from_config(cls, config: dict[str, Any]) -> "ModelOptFp8Config": def is_layer_excluded(self, prefix: str) -> bool: """ Check if a layer should be excluded from quantization. + Handles both exact matching (for fused layers) and substring matching. This method handles both regular models and multimodal models that use the language_model prefix. For multimodal models, it checks if the @@ -168,11 +169,18 @@ def is_layer_excluded(self, prefix: str) -> bool: if self.exclude_modules is None: return False - # Check if any excluded module matches the prefix + # First check exact matching with fused layer support + if is_layer_skipped(prefix, self.exclude_modules, + self.packed_modules_mapping): + return True + + # Then check substring matching for patterns not caught by exact match for module in self.exclude_modules: - if (module in prefix - or (prefix.startswith("language_model.") - and module in prefix.removeprefix("language_model."))): + # Skip exact matches already handled above + if (module != prefix and + (module in prefix or + (prefix.startswith("language_model.") + and module in prefix.removeprefix("language_model.")))): return True return False @@ -180,9 +188,10 @@ def get_quant_method(self, layer: torch.nn.Module, prefix: str) -> Optional["QuantizeMethodBase"]: from vllm.attention.layer import Attention # Avoid circular import if isinstance(layer, LinearBase): - if (is_layer_skipped(prefix, self.exclude_modules, - self.packed_modules_mapping) - or self.is_layer_excluded(prefix)): + if self.is_layer_excluded(prefix): + return UnquantizedLinearMethod() + # Check if this is a vision model layer that should not be quantized + if ("vision_tower" in prefix or "vision_model" in prefix): return UnquantizedLinearMethod() return ModelOptFp8LinearMethod(self) elif isinstance(layer, Attention): @@ -778,22 +787,34 @@ def from_config(cls, config: dict[str, Any]) -> "ModelOptNvFp4Config": return cls(is_checkpoint_nvfp4_serialized, kv_cache_quant_algo, exclude_modules, group_size) - def is_layer_excluded(self, prefix: str, - exclude_modules: list[str]) -> bool: + def is_layer_excluded(self, prefix: str) -> bool: + """ + Check if a layer should be excluded from quantization. + Handles both exact matching (for fused layers) and pattern matching. + """ + # First check exact matching with fused layer support + if is_layer_skipped(prefix, self.exclude_modules, + self.packed_modules_mapping): + return True + + # Check regex pattern matching for patterns not caught by exact match import regex as re - for pattern in exclude_modules: - regex_str = pattern.replace('.', r'\.').replace('*', r'.*') - if re.fullmatch(regex_str, prefix): - return True + for pattern in self.exclude_modules: + # Skip patterns that would be caught by exact matching + if '*' in pattern or '.' in pattern: + regex_str = pattern.replace('.', r'\.').replace('*', r'.*') + if re.fullmatch(regex_str, prefix): + return True return False def get_quant_method(self, layer: torch.nn.Module, prefix: str) -> Optional["QuantizeMethodBase"]: from vllm.attention.layer import Attention # Avoid circular import if isinstance(layer, LinearBase): - if (is_layer_skipped(prefix, self.exclude_modules, - self.packed_modules_mapping) - or self.is_layer_excluded(prefix, self.exclude_modules)): + if self.is_layer_excluded(prefix): + return UnquantizedLinearMethod() + # Check if this is a vision model layer that should not be quantized + if ("vision_tower" in prefix or "vision_model" in prefix): return UnquantizedLinearMethod() return ModelOptNvFp4LinearMethod(self) elif isinstance(layer, Attention): diff --git a/vllm/model_executor/models/gemma3.py b/vllm/model_executor/models/gemma3.py index 1263e3049a14..7246308d5902 100644 --- a/vllm/model_executor/models/gemma3.py +++ b/vllm/model_executor/models/gemma3.py @@ -446,6 +446,22 @@ def load_weights(self, weights: Iterable[tuple[str, weight_loader(param, loaded_weight) loaded_params.add(scale_name) continue + + # Check if this is a scale parameter that needs remapping first + if name.endswith( + (".k_scale", ".v_scale", ".q_scale", ".prob_scale")): + # Try to remap the scale name first + remapped_name = maybe_remap_kv_scale_name(name, params_dict) + if remapped_name is not None and remapped_name in params_dict: + # Successfully remapped, use the remapped name + param = params_dict[remapped_name] + weight_loader = getattr(param, "weight_loader", + default_weight_loader) + weight_loader(param, loaded_weight) + loaded_params.add(remapped_name) + continue + # If remapping failed, continue with normal processing + for (param_name, shard_name, shard_id) in stacked_params_mapping: if shard_name not in name: continue diff --git a/vllm/model_executor/models/siglip.py b/vllm/model_executor/models/siglip.py index 3630f59f53e0..eb49d6d2c335 100644 --- a/vllm/model_executor/models/siglip.py +++ b/vllm/model_executor/models/siglip.py @@ -20,7 +20,8 @@ from vllm.model_executor.layers.quantization import QuantizationConfig from vllm.model_executor.layers.vocab_parallel_embedding import ( VocabParallelEmbedding) -from vllm.model_executor.model_loader.weight_utils import default_weight_loader +from vllm.model_executor.model_loader.weight_utils import ( + default_weight_loader, maybe_remap_kv_scale_name) from .vision import VisionEncoderInfo, resolve_visual_encoder_outputs @@ -506,6 +507,21 @@ def load_weights(self, weights: Iterable[tuple[str, if layer_idx >= layer_count: continue + # Check if this is a scale parameter that needs remapping first + if name.endswith( + (".k_scale", ".v_scale", ".q_scale", ".prob_scale")): + # Try to remap the scale name first + remapped_name = maybe_remap_kv_scale_name(name, params_dict) + if remapped_name is not None and remapped_name in params_dict: + # Successfully remapped, use the remapped name + param = params_dict[remapped_name] + weight_loader = getattr(param, "weight_loader", + default_weight_loader) + weight_loader(param, loaded_weight) + loaded_params.add(remapped_name) + continue + # If remapping failed, continue with normal processing + for (param_name, weight_name, shard_id) in stacked_params_mapping: if weight_name not in name: continue From ee7a66dd9a5ead46f062502af33766f45076f05d Mon Sep 17 00:00:00 2001 From: Lucia Fang <116399278+luccafong@users.noreply.github.com> Date: Fri, 19 Sep 2025 15:59:41 -0700 Subject: [PATCH 171/518] allow disable flashinfer prefill (#25276) Signed-off-by: Lu Fang --- vllm/envs.py | 3 +++ vllm/v1/attention/backends/mla/common.py | 3 ++- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/vllm/envs.py b/vllm/envs.py index 19e2f8635275..294a0b920fb7 100755 --- a/vllm/envs.py +++ b/vllm/envs.py @@ -32,6 +32,7 @@ VLLM_CONFIG_ROOT: str = os.path.expanduser("~/.config/vllm") VLLM_USAGE_STATS_SERVER: str = "https://stats.vllm.ai" VLLM_NO_USAGE_STATS: bool = False + VLLM_DISABLE_FLASHINFER_PREFILL: bool = False VLLM_DO_NOT_TRACK: bool = False VLLM_USAGE_SOURCE: str = "" VLLM_CONFIGURE_LOGGING: int = 1 @@ -479,6 +480,8 @@ def get_vllm_port() -> Optional[int]: lambda: os.environ.get("VLLM_USAGE_STATS_SERVER", "https://stats.vllm.ai"), "VLLM_NO_USAGE_STATS": lambda: os.environ.get("VLLM_NO_USAGE_STATS", "0") == "1", + "VLLM_DISABLE_FLASHINFER_PREFILL": + lambda: os.environ.get("VLLM_DISABLE_FLASHINFER_PREFILL", "0") == "1", "VLLM_DO_NOT_TRACK": lambda: (os.environ.get("VLLM_DO_NOT_TRACK", None) or os.environ.get( "DO_NOT_TRACK", None) or "0") == "1", diff --git a/vllm/v1/attention/backends/mla/common.py b/vllm/v1/attention/backends/mla/common.py index a990cb2f1a97..5b307810de93 100755 --- a/vllm/v1/attention/backends/mla/common.py +++ b/vllm/v1/attention/backends/mla/common.py @@ -412,7 +412,8 @@ def __post_init__(self): def use_flashinfer_prefill() -> bool: # For blackwell default to flashinfer prefill if it's available since # it is faster than FA2. - return (flashinfer_available and not envs.VLLM_USE_CUDNN_PREFILL + return (not envs.VLLM_DISABLE_FLASHINFER_PREFILL and flashinfer_available + and not envs.VLLM_USE_CUDNN_PREFILL and current_platform.is_device_capability(100)) From 14c1432789c9c1b66308481b2c37439d3ee6661a Mon Sep 17 00:00:00 2001 From: Nick Hill Date: Fri, 19 Sep 2025 16:34:07 -0700 Subject: [PATCH 172/518] [BugFix] Fix async scheduling CPU tensor race take 2 (#25279) Signed-off-by: Nick Hill --- vllm/v1/worker/gpu_model_runner.py | 54 ++++++++++++++++++------------ 1 file changed, 32 insertions(+), 22 deletions(-) diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index 9d0f26266f0c..3539f7561205 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -1903,7 +1903,7 @@ def _preprocess( **self._init_model_kwargs(num_scheduled_tokens), **self._extract_mm_kwargs(scheduler_output), } - elif (self.enable_prompt_embeds and get_pp_group().is_first_rank): + elif self.enable_prompt_embeds and get_pp_group().is_first_rank: # Get the input embeddings for the tokens that are not input embeds, # then put them into the appropriate positions. # TODO(qthequartermasterman): Since even when prompt embeds are @@ -2125,6 +2125,21 @@ def _bookkeeping_sync( invalid_req_indices, ) + @contextmanager + def synchronize_input_prep(self): + if self.prepare_inputs_event is None: + yield + return + + # Ensure prior step has finished with reused CPU tensors. + # This is required in the async scheduling case because + # the CPU->GPU transfer happens async. + self.prepare_inputs_event.synchronize() + try: + yield + finally: + self.prepare_inputs_event.record() + @torch.inference_mode() def execute_model( self, @@ -2132,33 +2147,28 @@ def execute_model( intermediate_tensors: Optional[IntermediateTensors] = None, ) -> Union[ModelRunnerOutput, AsyncModelRunnerOutput, IntermediateTensors]: with record_function_or_nullcontext("Preprocess"): - self._update_states(scheduler_output) - if not scheduler_output.total_num_scheduled_tokens: - if not has_kv_transfer_group(): - # Return empty ModelRunnerOutput if there's no work to do. - return EMPTY_MODEL_RUNNER_OUTPUT - return self.kv_connector_no_forward(scheduler_output, - self.vllm_config) - if self.cache_config.kv_sharing_fast_prefill: - assert not self.input_batch.num_prompt_logprobs, ( - "--kv-sharing-fast-prefill produces incorrect logprobs for " - "prompt tokens, tokens, please disable it when the requests" - " need prompt logprobs") - - if self.prepare_inputs_event is not None: - # Ensure prior step has finished with reused CPU tensors. - self.prepare_inputs_event.synchronize() - try: + with self.synchronize_input_prep(): + # Update persistent batch states. + self._update_states(scheduler_output) + + if not scheduler_output.total_num_scheduled_tokens: + if not has_kv_transfer_group(): + # Return empty ModelRunnerOutput if no work to do. + return EMPTY_MODEL_RUNNER_OUTPUT + return self.kv_connector_no_forward( + scheduler_output, self.vllm_config) + if self.cache_config.kv_sharing_fast_prefill: + assert not self.input_batch.num_prompt_logprobs, ( + "--kv-sharing-fast-prefill produces incorrect " + "logprobs for prompt tokens, tokens, please disable " + "it when the requests need prompt logprobs") + # Prepare the decoder inputs. (attn_metadata, logits_indices, spec_decode_metadata, num_scheduled_tokens_np, spec_decode_common_attn_metadata, max_query_len, ubatch_slices, num_tokens_after_padding ) = self._prepare_inputs(scheduler_output) - finally: - if self.prepare_inputs_event is not None: - self.prepare_inputs_event.record() - ( num_scheduled_tokens, num_input_tokens, From 3da17c2cc2c2e1d750020e033535f942f156f64c Mon Sep 17 00:00:00 2001 From: Lucas Kabela Date: Fri, 19 Sep 2025 17:27:21 -0700 Subject: [PATCH 173/518] [Bugfix] Remove VLLM_TEST_DYNAMO_FULLGRAPH_CAPTURE #2969 (#25090) Signed-off-by: Lucas Kabela --- tests/compile/test_basic_correctness.py | 16 ++-------------- tests/compile/test_full_graph.py | 4 +--- vllm/compilation/wrapper.py | 10 ++++------ vllm/envs.py | 5 ----- vllm/v1/worker/gpu_model_runner.py | 4 +--- vllm/worker/model_runner.py | 8 +++----- 6 files changed, 11 insertions(+), 36 deletions(-) diff --git a/tests/compile/test_basic_correctness.py b/tests/compile/test_basic_correctness.py index fd2b1866e62e..a1e5127ebeeb 100644 --- a/tests/compile/test_basic_correctness.py +++ b/tests/compile/test_basic_correctness.py @@ -20,7 +20,6 @@ class TestSetting: tp_size: int attn_backend: str method: str - fullgraph: bool # we cannot afford testing the full Cartesian product @@ -36,7 +35,6 @@ class TestSetting: tp_size=2, attn_backend="FLASH_ATTN", method="generate", - fullgraph=True, ), # llama model with quantization TestSetting( @@ -46,7 +44,6 @@ class TestSetting: tp_size=1, attn_backend="FLASH_ATTN", method="generate", - fullgraph=True, ), # MoE model TestSetting( @@ -56,7 +53,6 @@ class TestSetting: tp_size=2, attn_backend="FLASH_ATTN", method="generate", - fullgraph=True, ), # embedding model TestSetting( @@ -73,7 +69,6 @@ class TestSetting: tp_size=1, attn_backend="FLASH_ATTN", method="encode", - fullgraph=True, ), TestSetting( model="BAAI/bge-base-en-v1.5", @@ -82,7 +77,6 @@ class TestSetting: tp_size=1, attn_backend="FLASH_ATTN", method="encode", - fullgraph=True, ), # vision language model TestSetting( @@ -92,7 +86,6 @@ class TestSetting: tp_size=1, attn_backend="FLASH_ATTN", method="generate_with_image", - fullgraph=False, ), ], ) @@ -109,9 +102,8 @@ def test_compile_correctness( tp_size = test_setting.tp_size attn_backend = test_setting.attn_backend method = test_setting.method - fullgraph = test_setting.fullgraph - if cuda_device_count_stateless() != pp_size * tp_size: - pytest.skip(f"Need exactly {pp_size}*{tp_size} CUDA gpus but got " + if cuda_device_count_stateless() < pp_size * tp_size: + pytest.skip(f"Need at least {pp_size}*{tp_size} CUDA gpus but got " f"{cuda_device_count_stateless()}") with monkeypatch.context() as m: @@ -149,9 +141,5 @@ def test_compile_correctness( ]: all_args.append(final_args + [f"-O{level}"]) all_envs.append({}) - if level != CompilationLevel.DYNAMO_ONCE and not fullgraph: - # "DYNAMO_ONCE" will always use fullgraph - all_envs[-1][ - "VLLM_TEST_DYNAMO_FULLGRAPH_CAPTURE"] = "0" # type: ignore compare_all_settings(model, all_args * 3, all_envs, method=method) diff --git a/tests/compile/test_full_graph.py b/tests/compile/test_full_graph.py index 84178344a5f3..3439a1b29038 100644 --- a/tests/compile/test_full_graph.py +++ b/tests/compile/test_full_graph.py @@ -79,9 +79,7 @@ def test_full_graph( ): model, model_kwargs = model_info - with monkeypatch.context() as m: - # make sure these models can be captured in full graph mode - m.setenv("VLLM_TEST_DYNAMO_FULLGRAPH_CAPTURE", "1") + with monkeypatch.context(): print(f"MODEL={model}") run_model(optimization_level, model, model_kwargs) diff --git a/vllm/compilation/wrapper.py b/vllm/compilation/wrapper.py index 96d4eae2ee9a..930e4d27b410 100644 --- a/vllm/compilation/wrapper.py +++ b/vllm/compilation/wrapper.py @@ -10,7 +10,6 @@ import torch -import vllm.envs as envs from vllm.config import (CompilationLevel, CUDAGraphMode, get_current_vllm_config) from vllm.logger import init_logger @@ -47,11 +46,10 @@ def __init__(self, options = get_current_vllm_config( ).compilation_config.inductor_compile_config - compiled_callable = torch.compile( - self.forward, - fullgraph=envs.VLLM_TEST_DYNAMO_FULLGRAPH_CAPTURE, - backend=backend, - options=options) + compiled_callable = torch.compile(self.forward, + fullgraph=True, + backend=backend, + options=options) self.compiled_callable = compiled_callable self.original_code_object = self.__class__.forward.__code__ diff --git a/vllm/envs.py b/vllm/envs.py index 294a0b920fb7..3991a789d80f 100755 --- a/vllm/envs.py +++ b/vllm/envs.py @@ -434,11 +434,6 @@ def get_vllm_port() -> Optional[int]: "VLLM_FLASH_ATTN_VERSION": lambda: maybe_convert_int(os.environ.get("VLLM_FLASH_ATTN_VERSION", None)), - # Internal flag to enable Dynamo fullgraph capture - "VLLM_TEST_DYNAMO_FULLGRAPH_CAPTURE": - lambda: bool( - os.environ.get("VLLM_TEST_DYNAMO_FULLGRAPH_CAPTURE", "1") != "0"), - # Feature flag to enable/disable Inductor standalone compile. # In torch <= 2.7 we ignore this flag; in torch >= 2.8 this is # enabled by default. diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index 3539f7561205..dffadd1d769b 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -2602,9 +2602,7 @@ def load_model(self, eep_scale_up: bool = False) -> None: backend = self.vllm_config.compilation_config.init_backend( self.vllm_config) compilation_counter.dynamo_as_is_count += 1 - self.model.compile( - fullgraph=envs.VLLM_TEST_DYNAMO_FULLGRAPH_CAPTURE, - backend=backend) + self.model.compile(fullgraph=True, backend=backend) return # for other compilation levels, cudagraph behavior is controlled by # CudagraphWraper and CudagraphDispatcher of vllm. diff --git a/vllm/worker/model_runner.py b/vllm/worker/model_runner.py index c91c871766cf..f662f5a85eff 100644 --- a/vllm/worker/model_runner.py +++ b/vllm/worker/model_runner.py @@ -18,7 +18,6 @@ import torch.nn as nn from tqdm.auto import tqdm -import vllm.envs as envs from vllm.attention import AttentionMetadata, get_attn_backend from vllm.attention.backends.abstract import AttentionState from vllm.attention.backends.utils import CommonAttentionState @@ -1099,10 +1098,9 @@ def load_model(self) -> None: backend = self.vllm_config.compilation_config.init_backend( self.vllm_config) compilation_counter.dynamo_as_is_count += 1 - self.model = torch.compile( - self.model, - fullgraph=envs.VLLM_TEST_DYNAMO_FULLGRAPH_CAPTURE, - backend=backend) + self.model = torch.compile(self.model, + fullgraph=True, + backend=backend) def get_model(self) -> nn.Module: return self.model From a36c675817867235d368faf7e8d81e0ed3333d9c Mon Sep 17 00:00:00 2001 From: Maximilien de Bayser Date: Fri, 19 Sep 2025 21:33:25 -0300 Subject: [PATCH 174/518] Don't skip special tokens with hermes-style tool calling (#25281) Signed-off-by: Max de Bayser --- .../openai/tool_parsers/hermes_tool_parser.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py index a6ce33af6bd0..e74c420da1d3 100644 --- a/vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py +++ b/vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py @@ -98,6 +98,15 @@ def tool_call_delta_buffer(self, delta_text: str): else: return delta_text + def adjust_request( + self, request: ChatCompletionRequest) -> ChatCompletionRequest: + if request.tools and request.tool_choice != 'none': + # do not skip special tokens because the tool_call tokens are + # marked "special" in some models. Since they are skipped + # prior to the call to the tool parser, it breaks tool calling. + request.skip_special_tokens = False + return request + def extract_tool_calls( self, model_output: str, From c7e713616a53a097809609d5a7b536e8bfad4ab8 Mon Sep 17 00:00:00 2001 From: Andrew Sansom Date: Fri, 19 Sep 2025 19:33:40 -0500 Subject: [PATCH 175/518] test: Remove vestigial skip for prompt embeds tests after landing v1 Prompt Embeds support (#25291) Signed-off-by: Andrew Sansom --- tests/entrypoints/openai/test_completion_with_prompt_embeds.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/tests/entrypoints/openai/test_completion_with_prompt_embeds.py b/tests/entrypoints/openai/test_completion_with_prompt_embeds.py index 0e3fc82f0c03..176c1825530e 100644 --- a/tests/entrypoints/openai/test_completion_with_prompt_embeds.py +++ b/tests/entrypoints/openai/test_completion_with_prompt_embeds.py @@ -14,9 +14,6 @@ from ...utils import RemoteOpenAIServer -pytest.skip("Skipping prompt_embeds test until V1 supports it.", - allow_module_level=True) - # any model with a chat template should work here MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta" From b8a287a0a8a035073d67b5101687e3a753dd02ac Mon Sep 17 00:00:00 2001 From: Andrew Sansom Date: Fri, 19 Sep 2025 19:46:23 -0500 Subject: [PATCH 176/518] [docs] Prompt Embedding feature support (#25288) Signed-off-by: Andrew Sansom --- docs/features/README.md | 34 ++++++++++++++++++---------------- docs/features/prompt_embeds.md | 3 --- 2 files changed, 18 insertions(+), 19 deletions(-) diff --git a/docs/features/README.md b/docs/features/README.md index d8e26ec02aec..10cc448cc2ee 100644 --- a/docs/features/README.md +++ b/docs/features/README.md @@ -36,22 +36,23 @@ th:not(:first-child) { } -| Feature | [CP][chunked-prefill] | [APC](automatic_prefix_caching.md) | [LoRA](lora.md) | [SD](spec_decode.md) | CUDA graph | [pooling](../models/pooling_models.md) | enc-dec | logP | prmpt logP | async output | multi-step | mm | best-of | beam-search | -|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---| -| [CP][chunked-prefill] | ✅ | | | | | | | | | | | | | | -| [APC](automatic_prefix_caching.md) | ✅ | ✅ | | | | | | | | | | | | | -| [LoRA](lora.md) | ✅ | ✅ | ✅ | | | | | | | | | | | | -| [SD](spec_decode.md) | ✅ | ✅ | ❌ | ✅ | | | | | | | | | | | -| CUDA graph | ✅ | ✅ | ✅ | ✅ | ✅ | | | | | | | | | | -| [pooling](../models/pooling_models.md) | 🟠\* | 🟠\* | ✅ | ❌ | ✅ | ✅ | | | | | | | | | -| enc-dec | ❌ | [❌](gh-issue:7366) | ❌ | [❌](gh-issue:7366) | ✅ | ✅ | ✅ | | | | | | | | -| logP | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | | | | | | | -| prmpt logP | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ✅ | | | | | | -| async output | ✅ | ✅ | ✅ | ❌ | ✅ | ❌ | ❌ | ✅ | ✅ | ✅ | | | | | -| multi-step | ❌ | ✅ | ❌ | ❌ | ✅ | ❌ | ❌ | ✅ | ✅ | ✅ | ✅ | | | | -| [mm](multimodal_inputs.md) | ✅ | ✅ | [🟠](gh-pr:4194)^ | ❔ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | | | -| best-of | ✅ | ✅ | ✅ | [❌](gh-issue:6137) | ✅ | ❌ | ✅ | ✅ | ✅ | ❔ | [❌](gh-issue:7968) | ✅ | ✅ | | -| beam-search | ✅ | ✅ | ✅ | [❌](gh-issue:6137) | ✅ | ❌ | ✅ | ✅ | ✅ | ❔ | [❌](gh-issue:7968) | ❔ | ✅ | ✅ | +| Feature | [CP][chunked-prefill] | [APC](automatic_prefix_caching.md) | [LoRA](lora.md) | [SD](spec_decode.md) | CUDA graph | [pooling](../models/pooling_models.md) | enc-dec | logP | prmpt logP | async output | multi-step | mm | best-of | beam-search | [prompt-embeds](prompt_embeds.md) | +|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---| +| [CP][chunked-prefill] | ✅ | | | | | | | | | | | | | | | +| [APC](automatic_prefix_caching.md) | ✅ | ✅ | | | | | | | | | | | | | | +| [LoRA](lora.md) | ✅ | ✅ | ✅ | | | | | | | | | | | | | +| [SD](spec_decode.md) | ✅ | ✅ | ❌ | ✅ | | | | | | | | | | | | +| CUDA graph | ✅ | ✅ | ✅ | ✅ | ✅ | | | | | | | | | | | +| [pooling](../models/pooling_models.md) | 🟠\* | 🟠\* | ✅ | ❌ | ✅ | ✅ | | | | | | | | | | +| enc-dec | ❌ | [❌](gh-issue:7366) | ❌ | [❌](gh-issue:7366) | ✅ | ✅ | ✅ | | | | | | | | | +| logP | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | | | | | | | | +| prmpt logP | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ✅ | | | | | | | +| async output | ✅ | ✅ | ✅ | ❌ | ✅ | ❌ | ❌ | ✅ | ✅ | ✅ | | | | | | +| multi-step | ❌ | ✅ | ❌ | ❌ | ✅ | ❌ | ❌ | ✅ | ✅ | ✅ | ✅ | | | | | +| [mm](multimodal_inputs.md) | ✅ | ✅ | [🟠](gh-pr:4194)^ | ❔ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | | | | +| best-of | ✅ | ✅ | ✅ | [❌](gh-issue:6137) | ✅ | ❌ | ✅ | ✅ | ✅ | ❔ | [❌](gh-issue:7968) | ✅ | ✅ | | | +| beam-search | ✅ | ✅ | ✅ | [❌](gh-issue:6137) | ✅ | ❌ | ✅ | ✅ | ✅ | ❔ | [❌](gh-issue:7968) | ❔ | ✅ | ✅ | | +| [prompt-embeds](prompt_embeds.md) | ✅ | [❌](gh-issue:25096) | ? | ❌ | ✅ | ❌ | ❌ | ✅ | ❌ | ? | ? | ❌ | ? | ? | ✅ | \* Chunked prefill and prefix caching are only applicable to last-token pooling. ^ LoRA is only applicable to the language backbone of multimodal models. @@ -76,3 +77,4 @@ th:not(:first-child) { | multi-step | ✅ | ✅ | ✅ | ✅ | ✅ | [❌](gh-issue:8477) | ✅ | ❌ | | best-of | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | | beam-search | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❌ | +| [prompt-embeds](prompt_embeds.md) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ? | [❌](gh-issue:25097) | diff --git a/docs/features/prompt_embeds.md b/docs/features/prompt_embeds.md index 83993bd0140f..f9d3c1fb6c23 100644 --- a/docs/features/prompt_embeds.md +++ b/docs/features/prompt_embeds.md @@ -6,9 +6,6 @@ This page teaches you how to pass prompt embedding inputs to vLLM. The traditional flow of text data for a Large Language Model goes from text to token ids (via a tokenizer) then from token ids to prompt embeddings. For a traditional decoder-only model (such as meta-llama/Llama-3.1-8B-Instruct), this step of converting token ids to prompt embeddings happens via a look-up from a learned embedding matrix, but the model is not limited to processing only the embeddings corresponding to its token vocabulary. -!!! note - Prompt embeddings are currently only supported in the v0 engine. - ## Offline Inference To input multi-modal data, follow this schema in [vllm.inputs.EmbedsPrompt][]: From 8945b001db3202f882108e50d16b6f9c5e6f01ed Mon Sep 17 00:00:00 2001 From: Boyuan Feng Date: Fri, 19 Sep 2025 18:02:15 -0700 Subject: [PATCH 177/518] [torch.compile] CUDAGraph Inductor partition integration (#24281) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Boyuan Feng Signed-off-by: Boyuan Feng Signed-off-by: boyuanfeng Co-authored-by: Luka Govedič --- tests/compile/piecewise/test_simple.py | 71 ++++++++++++++++++---- tests/compile/silly_attention.py | 1 + tests/compile/test_full_graph.py | 59 +++++++++++++++++- tests/compile/test_fusion_attn.py | 16 ++++- vllm/attention/layer.py | 2 + vllm/compilation/backends.py | 10 ++- vllm/compilation/decorators.py | 57 ++++++++++++++++- vllm/config/compilation.py | 84 ++++++++++++++++++++++---- vllm/v1/cudagraph_dispatcher.py | 12 ++-- 9 files changed, 280 insertions(+), 32 deletions(-) diff --git a/tests/compile/piecewise/test_simple.py b/tests/compile/piecewise/test_simple.py index 84f4945c8272..41055f431569 100644 --- a/tests/compile/piecewise/test_simple.py +++ b/tests/compile/piecewise/test_simple.py @@ -15,6 +15,7 @@ VllmConfig, set_current_vllm_config) from vllm.envs import VLLM_USE_V1 from vllm.forward_context import BatchDescriptor, set_forward_context +from vllm.utils import is_torch_equal_or_newer # This import automatically registers `torch.ops.silly.attention` from ..silly_attention import get_global_counter, reset_global_counter @@ -50,16 +51,21 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: return x -@pytest.mark.parametrize("use_inductor", [True, False]) -@torch.inference_mode() -def test_simple_piecewise_compile(use_inductor): - assert VLLM_USE_V1 - +def _run_simple_model( + splitting_ops, + use_inductor_graph_partition, + use_inductor, + expected_num_piecewise_graphs_seen, + expected_num_piecewise_capturable_graphs_seen, + expected_num_backend_compilations, + expected_num_cudagraph_captured, +): vllm_config = VllmConfig(compilation_config=CompilationConfig( level=CompilationLevel.PIECEWISE, use_cudagraph=True, use_inductor=use_inductor, - splitting_ops=["silly.attention"], + splitting_ops=splitting_ops, + use_inductor_graph_partition=use_inductor_graph_partition, cudagraph_copy_inputs=True, cudagraph_capture_sizes=[1, 2], )) @@ -70,11 +76,11 @@ def test_simple_piecewise_compile(use_inductor): with compilation_counter.expect( num_graphs_seen=1, # one graph for the model - num_piecewise_graphs_seen=5, # 2 * num_layers + 1 - num_piecewise_capturable_graphs_seen=3, # 1 + num_layers - num_backend_compilations=3, # num_piecewise_capturable_graphs_seen - num_cudagraph_captured= - 6, # num_cudagraph_sizes * num_piecewise_capturable_graphs_seen + num_piecewise_graphs_seen=expected_num_piecewise_graphs_seen, + num_piecewise_capturable_graphs_seen= + expected_num_piecewise_capturable_graphs_seen, + num_backend_compilations=expected_num_backend_compilations, + num_cudagraph_captured=expected_num_cudagraph_captured, ), set_forward_context(None, vllm_config=vllm_config): # background context # warm up with background context @@ -104,3 +110,46 @@ def test_simple_piecewise_compile(use_inductor): output = model(input) assert get_global_counter() == 2 assert torch.allclose(output.cpu(), torch.tensor([19.0, 19.0])) + + +@pytest.mark.parametrize("use_inductor", [True, False]) +@torch.inference_mode() +def test_simple_piecewise_compile(use_inductor): + assert VLLM_USE_V1 + _run_simple_model( + splitting_ops=["silly.attention"], + use_inductor_graph_partition=False, + use_inductor=use_inductor, + expected_num_piecewise_graphs_seen=5, # 2 * num_layers + 1 + expected_num_piecewise_capturable_graphs_seen=3, # 1 + num_layers + expected_num_backend_compilations= + 3, # num_piecewise_capturable_graphs_seen + expected_num_cudagraph_captured= + 6, # num_cudagraph_sizes * num_piecewise_capturable_graphs_seen + ) + + +@torch.inference_mode() +@pytest.mark.parametrize("splitting_ops", [["silly.attention"], []]) +def test_simple_inductor_graph_partition(splitting_ops): + assert VLLM_USE_V1 + if not is_torch_equal_or_newer("2.9.0.dev"): + pytest.skip("inductor graph partition is only available " + "in PyTorch 2.9+") + + _run_simple_model( + # inductor graph partition automatically resets splitting_ops + # to be an empty list + splitting_ops=splitting_ops, + use_inductor_graph_partition=True, + use_inductor=True, + expected_num_piecewise_graphs_seen= + 1, # since not splitting at fx graph level + expected_num_piecewise_capturable_graphs_seen= + 1, # since not splitting at fx graph level + expected_num_backend_compilations= + 1, # since not splitting at fx graph level + expected_num_cudagraph_captured= + 6, # inductor graph partition still captures 6 + # graph, same as fx graph partition. + ) diff --git a/tests/compile/silly_attention.py b/tests/compile/silly_attention.py index 13eb0bf4b1fa..baedafbae99f 100644 --- a/tests/compile/silly_attention.py +++ b/tests/compile/silly_attention.py @@ -60,4 +60,5 @@ def silly_attention_fake(q: torch.Tensor, k: torch.Tensor, v: torch.Tensor, mutates_args=["out"], fake_impl=silly_attention_fake, target_lib=silly_lib, + tags=(torch._C.Tag.cudagraph_unsafe, ), ) diff --git a/tests/compile/test_full_graph.py b/tests/compile/test_full_graph.py index 3439a1b29038..870aa553ca62 100644 --- a/tests/compile/test_full_graph.py +++ b/tests/compile/test_full_graph.py @@ -3,6 +3,7 @@ from __future__ import annotations +import logging import tempfile from typing import Any, Optional, Union @@ -10,9 +11,13 @@ import torch from tests.quantization.utils import is_quant_method_supported +from tests.v1.attention.utils import _Backend from vllm import LLM, SamplingParams -from vllm.config import CompilationConfig, CompilationLevel, PassConfig +from vllm.attention.selector import global_force_attn_backend_context_manager +from vllm.config import (CompilationConfig, CompilationLevel, CUDAGraphMode, + PassConfig) from vllm.platforms import current_platform +from vllm.utils import is_torch_equal_or_newer from ..utils import create_new_process_for_each_test @@ -105,6 +110,18 @@ def test_full_graph( (CompilationConfig(level=CompilationLevel.PIECEWISE, debug_dump_path=tempfile.gettempdir()), ("facebook/opt-125m", {})), + ] + [ + # graph inductor partition + ( + CompilationConfig( + level=CompilationLevel.PIECEWISE, + # inductor graph partition uses + # torch._C.Tag.cudagraph_unsafe to specify splitting ops + use_inductor_graph_partition=True, + cudagraph_mode=CUDAGraphMode.PIECEWISE, + compile_sizes=[1, 2]), + model) for model in models_list(all=False) + if is_torch_equal_or_newer("2.9.0.dev") ]) # only test some of the models @create_new_process_for_each_test() @@ -112,11 +129,51 @@ def test_custom_compile_config( compilation_config: CompilationConfig, model_info: tuple[str, dict[str, Any]], ): + if (compilation_config.use_inductor_graph_partition + and not is_torch_equal_or_newer("2.9.0.dev")): + pytest.skip("inductor graph partition is only available " + "in PyTorch 2.9+") + model, model_kwargs = model_info print(f"MODEL={model}") run_model(compilation_config, model, model_kwargs) +def test_inductor_graph_partition_attn_fusion(caplog_vllm): + if not is_torch_equal_or_newer("2.9.0.dev"): + pytest.skip("inductor graph partition is only available " + "in PyTorch 2.9+") + + model = "nvidia/Llama-4-Scout-17B-16E-Instruct-FP8" + compilation_config = CompilationConfig( + level=CompilationLevel.PIECEWISE, + use_inductor_graph_partition=True, + cudagraph_mode=CUDAGraphMode.PIECEWISE, + custom_ops=["+quant_fp8"], + pass_config=PassConfig(enable_attn_fusion=True, enable_noop=True), + ) + model_kwargs = { + "kv_cache_dtype": "fp8", + "max_model_len": 1024, + } + with caplog_vllm.at_level( + logging.DEBUG), global_force_attn_backend_context_manager( + _Backend.FLASHINFER): + run_model(compilation_config, model, model_kwargs) + + try: + assert ("Fused quantization onto 48 attention nodes" + in caplog_vllm.text), caplog_vllm.text + except AssertionError: + # Note: this message is only triggered when the compilation goes + # through the custom pass. Due to multiple layers of cache on + # PyTorch side, the compilation of a graph may be cached such + # that custom pass directly goes through cache. In this case, + # we go through this branch and assert that the pass is not + # triggered. + assert "Fused quantization" not in caplog_vllm.text + + def run_model(compile_config: Union[int, CompilationConfig], model: str, model_kwargs: dict[str, Any]): prompts = [ diff --git a/tests/compile/test_fusion_attn.py b/tests/compile/test_fusion_attn.py index 6baf4bf83f49..022f183b3193 100644 --- a/tests/compile/test_fusion_attn.py +++ b/tests/compile/test_fusion_attn.py @@ -27,6 +27,7 @@ from vllm.model_executor.layers.quantization.utils.w8a8_utils import ( Fp8LinearOp) from vllm.platforms import current_platform +from vllm.utils import is_torch_equal_or_newer from vllm.v1.kv_cache_interface import AttentionSpec FP8_DTYPE = current_platform.fp8_dtype() @@ -339,6 +340,10 @@ def forward(self, q: torch.Tensor, k: torch.Tensor, v: torch.Tensor): @pytest.mark.parametrize( "split_attention", [False, True] if current_platform.is_rocm() else [False]) +# TODO(boyuan): test inductor graph partition on rocm +@pytest.mark.parametrize( + "use_inductor_graph_partition", + [False] if current_platform.is_rocm() else [False, True]) @pytest.mark.skipif(not current_platform.is_cuda_alike(), reason="Only test ROCm or CUDA") @pytest.mark.skipif(not current_platform.supports_fp8(), reason="Need FP8") @@ -352,9 +357,15 @@ def test_attention_quant_pattern(num_qo_heads: int, num_kv_heads: int, dtype: torch.dtype, model_name: str, model_class: type[AttentionQuantPatternModel], backend: _Backend, split_attention: bool, - monkeypatch, dist_init): + use_inductor_graph_partition: bool, + monkeypatch, dist_init, caplog_vllm): """Test AttentionStaticQuantPattern fusion pass""" + if use_inductor_graph_partition and not is_torch_equal_or_newer( + "2.9.0.dev"): + pytest.skip("inductor graph partition is only available " + "in PyTorch 2.9+") + monkeypatch.setenv("VLLM_USE_V1", "1") if split_attention: monkeypatch.setenv("VLLM_V1_USE_PREFILL_DECODE_ATTENTION", "1") @@ -372,6 +383,7 @@ def test_attention_quant_pattern(num_qo_heads: int, num_kv_heads: int, compilation_config=CompilationConfig( level=CompilationLevel.PIECEWISE, custom_ops=["+quant_fp8"], + use_inductor_graph_partition=use_inductor_graph_partition, ), cache_config=CacheConfig(cache_dtype="fp8")) @@ -444,6 +456,7 @@ def test_attention_quant_pattern(num_qo_heads: int, num_kv_heads: int, backend=test_backend, fullgraph=True) assert model_compiled.attn._o_scale_float is None + result_fused_1 = model_compiled(q, k, v) if backend == _Backend.FLASHINFER: @@ -453,6 +466,7 @@ def test_attention_quant_pattern(num_qo_heads: int, num_kv_heads: int, # _o_scale_float assert model_compiled.attn._o_scale_float is not None result_fused_2 = model_compiled(q, k, v) + assert model_compiled.attn._o_scale_float is not None torch.testing.assert_close(result_unfused, diff --git a/vllm/attention/layer.py b/vllm/attention/layer.py index 8d5ebd93e063..3d1269c0ecea 100644 --- a/vllm/attention/layer.py +++ b/vllm/attention/layer.py @@ -577,6 +577,7 @@ def unified_attention_fake( mutates_args=[], fake_impl=unified_attention_fake, dispatch_key=current_platform.dispatch_key, + tags=(torch._C.Tag.cudagraph_unsafe, ), ) @@ -627,4 +628,5 @@ def unified_attention_with_output_fake( mutates_args=["output", "output_block_scale"], fake_impl=unified_attention_with_output_fake, dispatch_key=current_platform.dispatch_key, + tags=(torch._C.Tag.cudagraph_unsafe, ), ) diff --git a/vllm/compilation/backends.py b/vllm/compilation/backends.py index d6bdb31a3c63..17fc727b8fc7 100644 --- a/vllm/compilation/backends.py +++ b/vllm/compilation/backends.py @@ -329,6 +329,7 @@ def call_module(self, target: torch.fx.node.Target, i for i, x in enumerate(args) if isinstance(x, torch.SymInt) ] global compilation_start_time + compiled_graph_for_dynamic_shape = self.vllm_backend.\ compiler_manager.compile( submod, @@ -339,7 +340,6 @@ def call_module(self, target: torch.fx.node.Target, num_graphs=len(self.compile_submod_names), runtime_shape=None) # Lazy import here to avoid circular import - from .cuda_graph import CUDAGraphOptions from .cuda_piecewise_backend import PiecewiseBackend piecewise_backend = PiecewiseBackend( @@ -347,7 +347,13 @@ def call_module(self, target: torch.fx.node.Target, len(self.compile_submod_names), sym_shape_indices, compiled_graph_for_dynamic_shape, self.vllm_backend) - if self.compilation_config.cudagraph_mode != CUDAGraphMode.NONE: + if (self.compilation_config.cudagraph_mode != CUDAGraphMode.NONE + and + not self.compilation_config.use_inductor_graph_partition): + # We're using Dynamo-based piecewise splitting, so we wrap + # the whole subgraph with a static graph wrapper. + from .cuda_graph import CUDAGraphOptions + # resolve the static graph wrapper class (e.g. CUDAGraphWrapper # class) as platform dependent. static_graph_wrapper_class = resolve_obj_by_qualname( diff --git a/vllm/compilation/decorators.py b/vllm/compilation/decorators.py index 41d9fcb824b0..b7a6e23c1aa7 100644 --- a/vllm/compilation/decorators.py +++ b/vllm/compilation/decorators.py @@ -1,6 +1,7 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project +import contextlib import inspect from typing import Callable, Optional, TypeVar, Union, overload from unittest.mock import patch @@ -14,7 +15,7 @@ from vllm.config import CompilationLevel, VllmConfig from vllm.logger import init_logger from vllm.sequence import IntermediateTensors -from vllm.utils import supports_dynamo +from vllm.utils import resolve_obj_by_qualname, supports_dynamo from .monitor import start_monitoring_torch_compile @@ -301,8 +302,11 @@ def patched_inline_call(parent, func, args, kwargs): with patch.object(InliningInstructionTranslator, 'inline_call', patched_inline_call), torch._dynamo.config.patch( - **dynamo_config_patches): + **dynamo_config_patches + ), maybe_use_cudagraph_partition_wrapper( + self.vllm_config): output = self.compiled_callable(*args, **kwargs) + return output # usually, capturing the model once is enough, and then we can @@ -314,3 +318,52 @@ def patched_inline_call(parent, func, args, kwargs): cls.__call__ = __call__ return cls + + +@contextlib.contextmanager +def maybe_use_cudagraph_partition_wrapper(vllm_config: VllmConfig): + """ + Context manager to set/unset customized cudagraph partition wrappers. + + If we're using Inductor-based graph partitioning, we currently have the + whole `fx.Graph` before Inductor lowering and and the piecewise + splitting happens after all graph passes and fusions. Here, we add + a custom hook for Inductor to wrap each partition with our static + graph wrapper class to maintain more control over static graph + capture and replay. + """ + from vllm.config import CUDAGraphMode + + compilation_config = vllm_config.compilation_config + if (compilation_config.cudagraph_mode != CUDAGraphMode.NONE + and compilation_config.use_inductor_graph_partition): + from torch._inductor.utils import CUDAGraphWrapperMetadata + + from vllm.compilation.cuda_graph import CUDAGraphOptions + from vllm.platforms import current_platform + + static_graph_wrapper_class = resolve_obj_by_qualname( + current_platform.get_static_graph_wrapper_cls()) + + def customized_cudagraph_wrapper(f, + metadata: CUDAGraphWrapperMetadata): + partition_id = metadata.partition_index + num_partitions = metadata.num_partitions + return static_graph_wrapper_class( + runnable=f, + vllm_config=vllm_config, + runtime_mode=CUDAGraphMode.PIECEWISE, + cudagraph_options=CUDAGraphOptions( + debug_log_enable=partition_id == 0, + gc_disable=partition_id != 0, + weak_ref_output=partition_id == num_partitions - 1, + )) + + torch._inductor.utils.set_customized_partition_wrappers( + customized_cudagraph_wrapper) + + yield + + if (compilation_config.cudagraph_mode != CUDAGraphMode.NONE + and compilation_config.use_inductor_graph_partition): + torch._inductor.utils.set_customized_partition_wrappers(None) diff --git a/vllm/config/compilation.py b/vllm/config/compilation.py index 3618f472e742..22b38daf46c3 100644 --- a/vllm/config/compilation.py +++ b/vllm/config/compilation.py @@ -299,6 +299,26 @@ class CompilationConfig: minor release, i.e. v0.11.0 or v1.0.0. Please use cudagraph_mode instead. """ + use_inductor_graph_partition: bool = False + """Use inductor graph partition to split the graph at cudagraph_unsafe ops. + This partition happens at inductor codegen time after all passes and fusions + are finished. It generates a single `call` function which wraps + cudagraph-safe ops into partition functions and leave cudagraph-unsafe ops + outside the partition functions. For a graph with N cudagraph-unsafe ops + (e.g., Attention), there would be N+1 partitions. To mark an op as + cudagraph unsafe, we can add `tags=(torch._C.Tag.cudagraph_unsafe)` when + register the custom op. + + This config supports both full cudagraph and piecewise cudagraph without + compiling twice. For piecewise cudagraph, it applies vLLM CUDAGraph wrapper + to each partition. For N+1 partitions, there would be N+1 + CUDAGraph wrapper instances. + + For full CUDAGraph, we always apply a single CUDAGraph wrapper outside the + inductor `call` function in the model runner. The top-level full cudagraph + capture ignores all partitioning. + """ + pass_config: PassConfig = field(default_factory=PassConfig) """Custom inductor passes, see PassConfig for more details""" @@ -461,6 +481,12 @@ def __post_init__(self) -> None: "since full_cuda_graph is deprecated.") self.cudagraph_mode = CUDAGraphMode.FULL + if (self.use_inductor_graph_partition + and not is_torch_equal_or_newer("2.9.0.dev")): + raise ValueError("use_inductor_graph_partition is only " + "supported with torch>=2.9.0.dev. Set " + "use_inductor_graph_partition=False instead.") + def init_backend(self, vllm_config: "VllmConfig") -> Union[str, Callable]: if self.level == CompilationLevel.NO_COMPILATION: raise ValueError("No compilation level is set.") @@ -540,19 +566,36 @@ def set_splitting_ops_for_v1(self): "set_splitting_ops_for_v1 should only be called when " "level is CompilationLevel.PIECEWISE") + use_inductor_graph_partition_msg = ( + "When use_inductor_graph_partition=True, splitting_ops " + "are ignored and set to an empty list. Instead, " + "\"tags=(torch._C.Tag.cudagraph_unsafe, ),\" is " + "used to annotate custom ops for graph partition.") + if self.splitting_ops is None: - # NOTE: When using full cudagraph, instead of setting an empty - # list and capture the full cudagraph inside the flattened fx - # graph, we keep the piecewise fx graph structure but capture the - # full cudagraph outside the fx graph. This reduces some cpu - # overhead when the runtime batch_size is not cudagraph captured. - # see https://github.com/vllm-project/vllm/pull/20059 for details. - # make a copy to avoid mutating the class-level list via reference. - self.splitting_ops = list(self._attention_ops) + if self.use_inductor_graph_partition: + # When using inductor graph partition, we set splitting_ops + # to be empty and rely on torch._C.Tag.cudagraph_unsafe to + # annotate custom ops as splitting ops. + logger.warning_once(use_inductor_graph_partition_msg) + self.splitting_ops = [] + else: + # NOTE: When using full cudagraph, instead of setting an empty + # list and capture the full cudagraph inside the flattened fx + # graph, we keep the piecewise fx graph structure but capture + # the full cudagraph outside the fx graph. This reduces some + # cpu overhead when the runtime batch_size is not cudagraph + # captured. see https://github.com/vllm-project/vllm/pull/20059 + # for details. make a copy to avoid mutating the class-level + # list via reference. + self.splitting_ops = list(self._attention_ops) elif len(self.splitting_ops) == 0: - logger.warning_once("Using piecewise compilation with empty " - "splitting_ops.") - if self.cudagraph_mode == CUDAGraphMode.PIECEWISE: + logger.warning_once( + "Using piecewise compilation with empty " + "splitting_ops and use_inductor_graph_partition" + f"={self.use_inductor_graph_partition}.") + if (self.cudagraph_mode == CUDAGraphMode.PIECEWISE + and not self.use_inductor_graph_partition): logger.warning_once( "When compilation level is piecewise with empty " "splitting_ops, PIECEWISE cudagraph_mode will be " @@ -562,7 +605,26 @@ def set_splitting_ops_for_v1(self): "any problems.") self.cudagraph_mode = CUDAGraphMode.FULL self.splitting_ops = [] + elif self.use_inductor_graph_partition: + logger.warning_once(use_inductor_graph_partition_msg) + self.splitting_ops = [] def splitting_ops_contain_attention(self) -> bool: return self.splitting_ops is not None and all( op in self.splitting_ops for op in self._attention_ops) + + def is_attention_compiled_piecewise(self) -> bool: + use_fx_graph_piecewise_compilation = ( + self.level == CompilationLevel.PIECEWISE + and self.splitting_ops_contain_attention()) + + inductor_used = (self.level == CompilationLevel.PIECEWISE + and self.use_inductor) or ( + self.level >= CompilationLevel.DYNAMO_AS_IS + and self.backend == "inductor") + use_inductor_piecewise_compilation = ( + inductor_used and self.use_inductor_graph_partition + and not self.splitting_ops_contain_attention()) + + return use_fx_graph_piecewise_compilation or \ + use_inductor_piecewise_compilation diff --git a/vllm/v1/cudagraph_dispatcher.py b/vllm/v1/cudagraph_dispatcher.py index d2db7dcb3f09..ea4fba8eeea6 100644 --- a/vllm/v1/cudagraph_dispatcher.py +++ b/vllm/v1/cudagraph_dispatcher.py @@ -2,7 +2,7 @@ # SPDX-FileCopyrightText: Copyright contributors to the vLLM project from typing import Optional -from vllm.config import CompilationLevel, CUDAGraphMode, VllmConfig +from vllm.config import CUDAGraphMode, VllmConfig from vllm.forward_context import BatchDescriptor from vllm.logger import init_logger @@ -39,11 +39,15 @@ def __init__(self, vllm_config: VllmConfig): CUDAGraphMode.FULL: set(), } - assert not self.cudagraph_mode.requires_piecewise_compilation() or \ - (self.compilation_config.level == CompilationLevel.PIECEWISE and - self.compilation_config.splitting_ops_contain_attention()), \ + not_use_piecewise_compilation = ( + not self.cudagraph_mode.requires_piecewise_compilation()) + + assert not_use_piecewise_compilation or \ + self.compilation_config.is_attention_compiled_piecewise(), \ "Compilation level should be CompilationLevel.PIECEWISE when "\ "cudagraph_mode piecewise cudagraphs is used, "\ + "and attention should be in splitting_ops or "\ + "inductor splitting should be used. " \ f"cudagraph_mode={self.cudagraph_mode}, "\ f"compilation_level={self.compilation_config.level}, "\ f"splitting_ops={self.compilation_config.splitting_ops}" From a25ade5d473fc00107bd3950141d8211331d3377 Mon Sep 17 00:00:00 2001 From: Nick Hill Date: Fri, 19 Sep 2025 18:06:34 -0700 Subject: [PATCH 178/518] [BugFix] Ensure appropriate guards in destructors (#25284) Signed-off-by: Nick Hill Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- vllm/compilation/collective_fusion.py | 2 +- .../kv_transfer/kv_connector/v1/nixl_connector.py | 9 +++++---- vllm/executor/executor_base.py | 3 --- vllm/v1/worker/gpu_worker.py | 3 ++- 4 files changed, 8 insertions(+), 9 deletions(-) diff --git a/vllm/compilation/collective_fusion.py b/vllm/compilation/collective_fusion.py index 71274420c342..0658b59a2e21 100644 --- a/vllm/compilation/collective_fusion.py +++ b/vllm/compilation/collective_fusion.py @@ -1183,7 +1183,7 @@ def __call__(self, graph: fx.Graph): self.end_and_log() def __del__(self): - if self.disabled: + if getattr(self, "disabled", True): return if flashinfer_comm is not None: flashinfer_comm.trtllm_destroy_ipc_workspace_for_all_reduce( diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py b/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py index ff62f60e5a42..d3a08af088c1 100644 --- a/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py +++ b/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py @@ -569,9 +569,10 @@ def __init__(self, vllm_config: VllmConfig, engine_id: str): def __del__(self): """Cleanup background threads on destruction.""" - self._handshake_initiation_executor.shutdown(wait=False) - if self._nixl_handshake_listener_t: - self._nixl_handshake_listener_t.join(timeout=0) + if executor := getattr(self, "_handshake_initiation_executor", None): + executor.shutdown(wait=False) + if listener_t := getattr(self, "_nixl_handshake_listener_t", None): + listener_t.join(timeout=0) @staticmethod def _nixl_handshake_listener(metadata: NixlAgentMetadata, @@ -1379,4 +1380,4 @@ def reduce(self) -> dict[str, Union[int, float]]: # TODO: reduce stats to a single value, calculate latency/throughput return { "num_successful_transfers": self.data["num_successful_transfers"] - } \ No newline at end of file + } diff --git a/vllm/executor/executor_base.py b/vllm/executor/executor_base.py index d18bef1256af..42aa8d14a21e 100644 --- a/vllm/executor/executor_base.py +++ b/vllm/executor/executor_base.py @@ -235,9 +235,6 @@ def shutdown(self) -> None: """Shutdown the executor.""" self.collective_rpc("shutdown") - def __del__(self): - self.shutdown() - async def execute_model_async( self, execute_model_req: ExecuteModelRequest) -> List[SamplerOutput]: diff --git a/vllm/v1/worker/gpu_worker.py b/vllm/v1/worker/gpu_worker.py index 6855526583f0..8b1e1bb8f45c 100644 --- a/vllm/v1/worker/gpu_worker.py +++ b/vllm/v1/worker/gpu_worker.py @@ -683,7 +683,8 @@ def save_tensorized_model( tensorizer_config=tensorizer_config, ) def shutdown(self) -> None: - self.model_runner.ensure_kv_transfer_shutdown() + if runner := getattr(self, "model_runner", None): + runner.ensure_kv_transfer_shutdown() def init_worker_distributed_environment( From 535d80056b72443e68a96c1e4a1049cd9a85587d Mon Sep 17 00:00:00 2001 From: Nick Hill Date: Fri, 19 Sep 2025 19:02:38 -0700 Subject: [PATCH 179/518] [Misc] Support more collective_rpc return types (#25294) Signed-off-by: Nick Hill --- tests/v1/engine/test_engine_core_client.py | 203 ++++++++++++++++++++- vllm/v1/serial_utils.py | 60 ++++-- 2 files changed, 246 insertions(+), 17 deletions(-) diff --git a/tests/v1/engine/test_engine_core_client.py b/tests/v1/engine/test_engine_core_client.py index 625a3470e802..992c4e01386e 100644 --- a/tests/v1/engine/test_engine_core_client.py +++ b/tests/v1/engine/test_engine_core_client.py @@ -8,7 +8,7 @@ import uuid from dataclasses import dataclass from threading import Thread -from typing import Optional, Union +from typing import Any, Optional, Union from unittest.mock import MagicMock import pytest @@ -331,6 +331,46 @@ def echo_dc( return [val for _ in range(3)] if return_list else val +# Dummy utility function to test dict serialization with custom types. +def echo_dc_dict( + self, + msg: str, + return_dict: bool = False, +) -> Union[MyDataclass, dict[str, MyDataclass]]: + print(f"echo dc dict util function called: {msg}") + val = None if msg is None else MyDataclass(msg) + # Return dict of dataclasses to verify support for returning dicts + # with custom value types. + if return_dict: + return {"key1": val, "key2": val, "key3": val} + else: + return val + + +# Dummy utility function to test nested structures with custom types. +def echo_dc_nested( + self, + msg: str, + structure_type: str = "list_of_dicts", +) -> Any: + print(f"echo dc nested util function called: {msg}, " + f"structure: {structure_type}") + val = None if msg is None else MyDataclass(msg) + + if structure_type == "list_of_dicts": # noqa + # Return list of dicts: [{"a": val, "b": val}, {"c": val, "d": val}] + return [{"a": val, "b": val}, {"c": val, "d": val}] + elif structure_type == "dict_of_lists": + # Return dict of lists: {"list1": [val, val], "list2": [val, val]} + return {"list1": [val, val], "list2": [val, val]} + elif structure_type == "deep_nested": + # Return deeply nested: {"outer": [{"inner": [val, val]}, + # {"inner": [val]}]} + return {"outer": [{"inner": [val, val]}, {"inner": [val]}]} + else: + return val + + @pytest.mark.asyncio(loop_scope="function") async def test_engine_core_client_util_method_custom_return( monkeypatch: pytest.MonkeyPatch): @@ -384,6 +424,167 @@ async def test_engine_core_client_util_method_custom_return( client.shutdown() +@pytest.mark.asyncio(loop_scope="function") +async def test_engine_core_client_util_method_custom_dict_return( + monkeypatch: pytest.MonkeyPatch): + + with monkeypatch.context() as m: + m.setenv("VLLM_USE_V1", "1") + + # Must set insecure serialization to allow returning custom types. + m.setenv("VLLM_ALLOW_INSECURE_SERIALIZATION", "1") + + # Monkey-patch core engine utility function to test. + m.setattr(EngineCore, "echo_dc_dict", echo_dc_dict, raising=False) + + engine_args = EngineArgs(model=MODEL_NAME, enforce_eager=True) + vllm_config = engine_args.create_engine_config( + usage_context=UsageContext.UNKNOWN_CONTEXT) + executor_class = Executor.get_class(vllm_config) + + with set_default_torch_num_threads(1): + client = EngineCoreClient.make_client( + multiprocess_mode=True, + asyncio_mode=True, + vllm_config=vllm_config, + executor_class=executor_class, + log_stats=True, + ) + + try: + # Test utility method returning custom / non-native data type. + core_client: AsyncMPClient = client + + # Test single object return + result = await core_client.call_utility_async( + "echo_dc_dict", "testarg3", False) + assert isinstance(result, + MyDataclass) and result.message == "testarg3" + + # Test dict return with custom value types + result = await core_client.call_utility_async( + "echo_dc_dict", "testarg3", True) + assert isinstance(result, dict) and len(result) == 3 + for key, val in result.items(): + assert key in ["key1", "key2", "key3"] + assert isinstance(val, + MyDataclass) and val.message == "testarg3" + + # Test returning dict with None values + result = await core_client.call_utility_async( + "echo_dc_dict", None, True) + assert isinstance(result, dict) and len(result) == 3 + for key, val in result.items(): + assert key in ["key1", "key2", "key3"] + assert val is None + + finally: + client.shutdown() + + +@pytest.mark.asyncio(loop_scope="function") +async def test_engine_core_client_util_method_nested_structures( + monkeypatch: pytest.MonkeyPatch): + + with monkeypatch.context() as m: + m.setenv("VLLM_USE_V1", "1") + + # Must set insecure serialization to allow returning custom types. + m.setenv("VLLM_ALLOW_INSECURE_SERIALIZATION", "1") + + # Monkey-patch core engine utility function to test. + m.setattr(EngineCore, "echo_dc_nested", echo_dc_nested, raising=False) + + engine_args = EngineArgs(model=MODEL_NAME, enforce_eager=True) + vllm_config = engine_args.create_engine_config( + usage_context=UsageContext.UNKNOWN_CONTEXT) + executor_class = Executor.get_class(vllm_config) + + with set_default_torch_num_threads(1): + client = EngineCoreClient.make_client( + multiprocess_mode=True, + asyncio_mode=True, + vllm_config=vllm_config, + executor_class=executor_class, + log_stats=True, + ) + + try: + core_client: AsyncMPClient = client + + # Test list of dicts: [{"a": val, "b": val}, {"c": val, "d": val}] + result = await core_client.call_utility_async( + "echo_dc_nested", "nested1", "list_of_dicts") + assert isinstance(result, list) and len(result) == 2 + for i, item in enumerate(result): + assert isinstance(item, dict) + if i == 0: + assert "a" in item and "b" in item + assert isinstance( + item["a"], + MyDataclass) and item["a"].message == "nested1" + assert isinstance( + item["b"], + MyDataclass) and item["b"].message == "nested1" + else: + assert "c" in item and "d" in item + assert isinstance( + item["c"], + MyDataclass) and item["c"].message == "nested1" + assert isinstance( + item["d"], + MyDataclass) and item["d"].message == "nested1" + + # Test dict of lists: {"list1": [val, val], "list2": [val, val]} + result = await core_client.call_utility_async( + "echo_dc_nested", "nested2", "dict_of_lists") + assert isinstance(result, dict) and len(result) == 2 + assert "list1" in result and "list2" in result + for key, lst in result.items(): + assert isinstance(lst, list) and len(lst) == 2 + for item in lst: + assert isinstance( + item, MyDataclass) and item.message == "nested2" + + # Test deeply nested: {"outer": [{"inner": [val, val]}, + # {"inner": [val]}]} + result = await core_client.call_utility_async( + "echo_dc_nested", "nested3", "deep_nested") + assert isinstance(result, dict) and "outer" in result + outer_list = result["outer"] + assert isinstance(outer_list, list) and len(outer_list) == 2 + + # First dict in outer list should have "inner" with 2 items + inner_dict1 = outer_list[0] + assert isinstance(inner_dict1, dict) and "inner" in inner_dict1 + inner_list1 = inner_dict1["inner"] + assert isinstance(inner_list1, list) and len(inner_list1) == 2 + for item in inner_list1: + assert isinstance(item, + MyDataclass) and item.message == "nested3" + + # Second dict in outer list should have "inner" with 1 item + inner_dict2 = outer_list[1] + assert isinstance(inner_dict2, dict) and "inner" in inner_dict2 + inner_list2 = inner_dict2["inner"] + assert isinstance(inner_list2, list) and len(inner_list2) == 1 + assert isinstance( + inner_list2[0], + MyDataclass) and inner_list2[0].message == "nested3" + + # Test with None values in nested structures + result = await core_client.call_utility_async( + "echo_dc_nested", None, "list_of_dicts") + assert isinstance(result, list) and len(result) == 2 + for item in result: + assert isinstance(item, dict) + for val in item.values(): + assert val is None + + finally: + client.shutdown() + + @pytest.mark.parametrize( "multiprocessing_mode,publisher_config", [(True, "tcp"), (False, "inproc")], diff --git a/vllm/v1/serial_utils.py b/vllm/v1/serial_utils.py index 50c1470c67ed..c812a2ec6427 100644 --- a/vllm/v1/serial_utils.py +++ b/vllm/v1/serial_utils.py @@ -7,7 +7,7 @@ from collections.abc import Sequence from inspect import isclass from types import FunctionType -from typing import Any, Optional, Union +from typing import Any, Callable, Optional, Union import cloudpickle import msgspec @@ -59,6 +59,42 @@ def _typestr(val: Any) -> Optional[tuple[str, str]]: return t.__module__, t.__qualname__ +def _encode_type_info_recursive(obj: Any) -> Any: + """Recursively encode type information for nested structures of + lists/dicts.""" + if obj is None: + return None + if type(obj) is list: + return [_encode_type_info_recursive(item) for item in obj] + if type(obj) is dict: + return {k: _encode_type_info_recursive(v) for k, v in obj.items()} + return _typestr(obj) + + +def _decode_type_info_recursive( + type_info: Any, data: Any, convert_fn: Callable[[Sequence[str], Any], + Any]) -> Any: + """Recursively decode type information for nested structures of + lists/dicts.""" + if type_info is None: + return data + if isinstance(type_info, dict): + assert isinstance(data, dict) + return { + k: _decode_type_info_recursive(type_info[k], data[k], convert_fn) + for k in type_info + } + if isinstance(type_info, list) and ( + # Exclude serialized tensors/numpy arrays. + len(type_info) != 2 or not isinstance(type_info[0], str)): + assert isinstance(data, list) + return [ + _decode_type_info_recursive(ti, d, convert_fn) + for ti, d in zip(type_info, data) + ] + return convert_fn(type_info, data) + + class MsgpackEncoder: """Encoder with custom torch tensor and numpy array serialization. @@ -129,12 +165,10 @@ def enc_hook(self, obj: Any) -> Any: result = obj.result if not envs.VLLM_ALLOW_INSECURE_SERIALIZATION: return None, result - # Since utility results are not strongly typed, we also encode - # the type (or a list of types in the case it's a list) to - # help with correct msgspec deserialization. - return _typestr(result) if type(result) is not list else [ - _typestr(v) for v in result - ], result + # Since utility results are not strongly typed, we recursively + # encode type information for nested structures of lists/dicts + # to help with correct msgspec deserialization. + return _encode_type_info_recursive(result), result if not envs.VLLM_ALLOW_INSECURE_SERIALIZATION: raise TypeError(f"Object of type {type(obj)} is not serializable" @@ -288,15 +322,9 @@ def _decode_utility_result(self, obj: Any) -> UtilityResult: if not envs.VLLM_ALLOW_INSECURE_SERIALIZATION: raise TypeError("VLLM_ALLOW_INSECURE_SERIALIZATION must " "be set to use custom utility result types") - assert isinstance(result_type, list) - if len(result_type) == 2 and isinstance(result_type[0], str): - result = self._convert_result(result_type, result) - else: - assert isinstance(result, list) - result = [ - self._convert_result(rt, r) - for rt, r in zip(result_type, result) - ] + # Use recursive decoding to handle nested structures + result = _decode_type_info_recursive(result_type, result, + self._convert_result) return UtilityResult(result) def _convert_result(self, result_type: Sequence[str], result: Any) -> Any: From c308501cb6a922af8c4183bd65be0094dd73de9a Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Sat, 20 Sep 2025 04:11:03 +0100 Subject: [PATCH 180/518] Improve weight loading for encoder models in Transformers backend (#25289) Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- vllm/model_executor/models/transformers.py | 28 ++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/vllm/model_executor/models/transformers.py b/vllm/model_executor/models/transformers.py index f40a20dee63d..3bd4d10316ec 100644 --- a/vllm/model_executor/models/transformers.py +++ b/vllm/model_executor/models/transformers.py @@ -702,21 +702,45 @@ def load_weights(self, weights: Iterable[tuple[str, class TransformersModel(TransformersBase): hf_to_vllm_mapper = WeightsMapper( orig_to_new_prefix={ + # Handle BERT-like models + "bert": "model", # Add `model.` prefix for base model checkpoints "": "model.", - # Remove `model.` from places it should not be + # Remove `model.` prefix if it was already there "model.model.": "model.", + # Pooling adapters will be adjacent to `model` + "model.pooler": "pooler", "model.score": "score", + # Classifier adapter's classifier layer is renamed to score + "model.classifier": "score", + }, + orig_to_new_suffix={ + # Replace legacy suffixes used for norms + ".gamma": ".weight", + ".beta": ".bias", }) def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): super().__init__(vllm_config=vllm_config, prefix=prefix) - # Some encoder models have the position_ids buffer in the checkpoint + # After creating a pooling model, `pooler` will be duplicated. + # The one inside `model` comes from the Transformers modelling code. + # The one after `model` is an adapter from vLLM. + # We want to use the adapter so we nullify the original pooler. + if getattr(self.model, "pooler", None) is not None: + self.skip_prefixes.append("pooler.") + self.model.pooler = torch.nn.Identity() + + # Some encoder models have the position_ids buffer in the checkpoint. # vLLM will always pass position_ids as an argument, so we skip loading # the buffer if it exists self.skip_substrs.append("position_ids") + # Some encoder models have the bias of the final classifier layer + # in the checkpoint. vLLM does not use this bias, so we skip loading + # it if it exists + self.skip_substrs.append("score.bias") + def create_attention_instances( self, attn_type: AttentionType = AttentionType.DECODER): # TODO(hmellor): Better way to detect encoder models From 36429096171ff8785645c40c662d859dddedd829 Mon Sep 17 00:00:00 2001 From: JartX Date: Sat, 20 Sep 2025 05:18:13 +0200 Subject: [PATCH 181/518] [BUGFIX] GPTQ quantization compatibility for Qwen3 Next MOE models (AutoGPTQ and AutoRound-GPTQ) (#25268) Signed-off-by: JartX --- vllm/model_executor/models/qwen3_next.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/vllm/model_executor/models/qwen3_next.py b/vllm/model_executor/models/qwen3_next.py index 98749c160ba4..ce917f92bd2e 100644 --- a/vllm/model_executor/models/qwen3_next.py +++ b/vllm/model_executor/models/qwen3_next.py @@ -148,9 +148,11 @@ def __init__( def _maybe_ignore_quant_config(self, quant_config: QuantizationConfig): # GPTQ configs do not have a list of ignored modules, however AutoGPTQ - # seems to avoid gate quantization. - # See: https://huggingface.co/Qwen/Qwen3-30B-A3B-GPTQ-Int4 - if isinstance(quant_config, (GPTQConfig, GPTQMarlinConfig)): + # seems to avoid gate quantization while AutoRound does. + if isinstance( + quant_config, + (GPTQConfig, + GPTQMarlinConfig)) and not quant_config.autoround_version: return None return quant_config From b7f186bbb3101e97bb9ad42b7ffb3cdb4bb590fd Mon Sep 17 00:00:00 2001 From: Nick Hill Date: Fri, 19 Sep 2025 21:28:31 -0700 Subject: [PATCH 182/518] [BugFix] Exclude self when checking for port collision (#25286) Signed-off-by: Nick Hill --- vllm/utils/__init__.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/vllm/utils/__init__.py b/vllm/utils/__init__.py index d4013a69e99f..fd1c0af31269 100644 --- a/vllm/utils/__init__.py +++ b/vllm/utils/__init__.py @@ -987,8 +987,10 @@ def find_process_using_port(port: int) -> Optional[psutil.Process]: if sys.platform.startswith("darwin"): return None + our_pid = os.getpid() for conn in psutil.net_connections(): - if conn.laddr.port == port: + if conn.laddr.port == port and (conn.pid is not None + and conn.pid != our_pid): try: return psutil.Process(conn.pid) except psutil.NoSuchProcess: From 6c5f82e5aa87cd73ce03ce10fc44138f75ee1aea Mon Sep 17 00:00:00 2001 From: "Chendi.Xue" Date: Fri, 19 Sep 2025 23:41:23 -0500 Subject: [PATCH 183/518] [BUG FIX][NON-CUDA]quick fix to avoid call cudagraph_unsafe in attention (#25298) Signed-off-by: Chendi Xue --- vllm/attention/layer.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/vllm/attention/layer.py b/vllm/attention/layer.py index 3d1269c0ecea..544a72052442 100644 --- a/vllm/attention/layer.py +++ b/vllm/attention/layer.py @@ -29,6 +29,10 @@ logger = init_logger(__name__) USE_XFORMERS_OPS = None +try: + tag_cudagraph_unsafe = (torch._C.Tag.cudagraph_unsafe, ) +except AttributeError: + tag_cudagraph_unsafe = () # type: ignore[assignment] def check_xformers_availability(): @@ -577,7 +581,7 @@ def unified_attention_fake( mutates_args=[], fake_impl=unified_attention_fake, dispatch_key=current_platform.dispatch_key, - tags=(torch._C.Tag.cudagraph_unsafe, ), + tags=tag_cudagraph_unsafe, ) @@ -628,5 +632,5 @@ def unified_attention_with_output_fake( mutates_args=["output", "output_block_scale"], fake_impl=unified_attention_with_output_fake, dispatch_key=current_platform.dispatch_key, - tags=(torch._C.Tag.cudagraph_unsafe, ), + tags=tag_cudagraph_unsafe, ) From f91480b2d44c263fb600b5cba5b0e6c7a195f742 Mon Sep 17 00:00:00 2001 From: Chauncey Date: Sat, 20 Sep 2025 13:29:54 +0800 Subject: [PATCH 184/518] [Bugfix] fix tool call arguments is empty (#25223) Signed-off-by: chaunceyjiang Co-authored-by: xin.li --- .../test_completion_with_function_calling.py | 60 +++++++++++++++++++ vllm/entrypoints/chat_utils.py | 8 ++- 2 files changed, 65 insertions(+), 3 deletions(-) diff --git a/tests/entrypoints/openai/test_completion_with_function_calling.py b/tests/entrypoints/openai/test_completion_with_function_calling.py index 3649cefa9bf4..4355603fcd70 100644 --- a/tests/entrypoints/openai/test_completion_with_function_calling.py +++ b/tests/entrypoints/openai/test_completion_with_function_calling.py @@ -1,6 +1,7 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project +import datetime from typing import Union import openai # use the official client for correctness check @@ -284,3 +285,62 @@ async def test_tool_id_kimi_k2(k2_client: openai.AsyncOpenAI, model_name: str, output.extend(chunk.choices[0].delta.tool_calls) for o in output: assert o.id is None or o.id == 'functions.get_current_weather:0' + + +@pytest.mark.asyncio +@pytest.mark.parametrize("model_name", [MODEL_NAME]) +@pytest.mark.parametrize("arguments", ["{}", '']) +async def test_no_args_tool_call(client: openai.AsyncOpenAI, model_name: str, + arguments: str): + # Step 1: Define a tool that requires no parameters + tools = [{ + "type": "function", + "function": { + "name": "get_current_time", + "description": + "Get the current date and time. No parameters needed.", + "parameters": { + "type": "object", + "properties": {}, # No parameters + "required": [] # No required fields + } + } + }] + messages = [{"role": "user", "content": "What time is it now?"}] + # Step 2: Send user message and let model decide whether to call the tool + response = await client.chat.completions.create( + model=model_name, + messages=messages, + tools=tools, + tool_choice="auto" # Let model choose automatically + ) + + # Step 3: Check if model wants to call a tool + message = response.choices[0].message + if message.tool_calls: + # Get the first tool call + tool_call = message.tool_calls[0] + tool_name = tool_call.function.name + # Step 4: Execute the tool locally (no parameters) + if tool_name == "get_current_time": + # Test both empty string and "{}" for no-arg tool calls + tool_call.function.arguments = arguments + messages.append(message) + current_time = datetime.datetime.now() + result = current_time.isoformat() + messages.append({ + "role": "tool", + "tool_call_id": tool_call.id, + "content": result, + }) + # Step 5: Send tool result back to model to continue conversation + final_response = await client.chat.completions.create( + model=model_name, + messages=messages, + ) + # Output final natural language response + assert final_response.choices[0].message.content is not None + + else: + # No tool called — just print model's direct reply + assert message.content is not None diff --git a/vllm/entrypoints/chat_utils.py b/vllm/entrypoints/chat_utils.py index 00ef39f13465..c2c0ad74ef43 100644 --- a/vllm/entrypoints/chat_utils.py +++ b/vllm/entrypoints/chat_utils.py @@ -1450,9 +1450,11 @@ def _postprocess_messages(messages: list[ConversationMessage]) -> None: and isinstance(message["tool_calls"], list) ): for item in message["tool_calls"]: - item["function"]["arguments"] = json.loads( - item["function"]["arguments"] - ) + # if arguments is None or empty string, set to {} + if content := item["function"].get("arguments"): + item["function"]["arguments"] = json.loads(content) + else: + item["function"]["arguments"] = {} def parse_chat_messages( From c60e6137f0bf2034853919b3a9d705d7e06b93cf Mon Sep 17 00:00:00 2001 From: Cyrus Leung Date: Sat, 20 Sep 2025 13:30:22 +0800 Subject: [PATCH 185/518] [Optimization] Avoid repeated model architecture conversion for pooling models (#25261) Signed-off-by: DarkLight1337 --- vllm/config/model.py | 22 +++++++++++++++++++++- vllm/model_executor/model_loader/utils.py | 17 ++++++++++++++++- 2 files changed, 37 insertions(+), 2 deletions(-) diff --git a/vllm/config/model.py b/vllm/config/model.py index 4e847922b61e..921322bb475c 100644 --- a/vllm/config/model.py +++ b/vllm/config/model.py @@ -322,8 +322,28 @@ def compute_hash(self) -> str: factors.append(self.override_generation_config) factors.append(self.rope_scaling) factors.append(self.rope_theta) + # hf_config can control how the model looks! - factors.append(self.hf_config.to_json_string()) + try: + hf_config_json = self.hf_config.to_json_string(use_diff=False) + except TypeError: + from transformers import PretrainedConfig + + from vllm.utils.jsontree import json_map_leaves + + # Handle nested HF configs with unserializable values gracefully + hf_config_json = json.dumps( + json_map_leaves( + lambda v: v.to_dict() + if isinstance(v, PretrainedConfig) else str(v), + self.hf_config.to_dict(), + ), + indent=2, + sort_keys=True, + ) + "\n" + + factors.append(hf_config_json) + str_factors = str(factors) assert_hashable(str_factors) return hashlib.sha256(str(factors).encode()).hexdigest() diff --git a/vllm/model_executor/model_loader/utils.py b/vllm/model_executor/model_loader/utils.py index bd1773c753a9..e007d431880e 100644 --- a/vllm/model_executor/model_loader/utils.py +++ b/vllm/model_executor/model_loader/utils.py @@ -165,7 +165,11 @@ def device_loading_context(module: torch.nn.Module, # New parameters or parameters already on target device are untouched -def get_model_architecture( +_MODEL_ARCH_BY_HASH = dict[str, tuple[type[nn.Module], str]]() +"""Caches the outputs of `_get_model_architecture`.""" + + +def _get_model_architecture( model_config: ModelConfig) -> tuple[type[nn.Module], str]: architectures = getattr(model_config.hf_config, "architectures", []) @@ -209,6 +213,17 @@ def get_model_architecture( return model_cls, arch +def get_model_architecture( + model_config: ModelConfig) -> tuple[type[nn.Module], str]: + key = model_config.compute_hash() + if key in _MODEL_ARCH_BY_HASH: + return _MODEL_ARCH_BY_HASH[key] + + model_arch = _get_model_architecture(model_config) + _MODEL_ARCH_BY_HASH[key] = model_arch + return model_arch + + def get_model_cls(model_config: ModelConfig) -> type[nn.Module]: return get_model_architecture(model_config)[0] From 9607d5eb449711b349d4c2bee0a9c94afcc7ed14 Mon Sep 17 00:00:00 2001 From: Chen Zhang Date: Fri, 19 Sep 2025 23:43:59 -0700 Subject: [PATCH 186/518] [Hybrid Allocator] Support full attention with different hidden size (#25101) Signed-off-by: Chen Zhang --- tests/v1/core/test_kv_cache_utils.py | 118 +++++++++++++++++++--- vllm/v1/core/kv_cache_utils.py | 146 ++++++++++++++++++++------- vllm/v1/engine/core.py | 16 ++- vllm/v1/kv_cache_interface.py | 70 +++++++++++++ vllm/v1/worker/gpu_model_runner.py | 65 ++++++------ vllm/v1/worker/utils.py | 3 +- 6 files changed, 325 insertions(+), 93 deletions(-) diff --git a/tests/v1/core/test_kv_cache_utils.py b/tests/v1/core/test_kv_cache_utils.py index 4bf6bbbfeae2..4cb7ed6ce382 100644 --- a/tests/v1/core/test_kv_cache_utils.py +++ b/tests/v1/core/test_kv_cache_utils.py @@ -18,12 +18,14 @@ from vllm.v1.core.kv_cache_utils import ( BlockHash, FreeKVCacheBlockQueue, KVCacheBlock, PrefixCachingMetrics, estimate_max_model_len, generate_block_hash_extra_keys, - get_kv_cache_configs, get_max_concurrency_for_kv_cache_config, - get_request_block_hasher, hash_block_tokens, init_none_hash, - is_kv_cache_type_uniform, make_block_hash_with_group_id) + generate_scheduler_kv_cache_config, get_kv_cache_configs, + get_max_concurrency_for_kv_cache_config, get_request_block_hasher, + hash_block_tokens, init_none_hash, is_kv_cache_spec_uniform, + make_block_hash_with_group_id) from vllm.v1.kv_cache_interface import (FullAttentionSpec, KVCacheConfig, KVCacheGroupSpec, KVCacheSpec, - KVCacheTensor, SlidingWindowSpec) + KVCacheTensor, SlidingWindowSpec, + UniformTypeKVCacheSpecs) from vllm.v1.metrics.stats import PrefixCacheStats from vllm.v1.request import Request @@ -927,36 +929,36 @@ def test_merge_kv_cache_spec(): assert merged_layer_spec.sliding_window == 1 -def test_is_kv_cache_type_uniform(): +def test_is_kv_cache_spec_uniform(): kv_cache_spec = { "layer_1": new_kv_cache_spec(num_kv_heads=32), "layer_2": new_kv_cache_spec(num_kv_heads=32), } - assert is_kv_cache_type_uniform(kv_cache_spec) + assert is_kv_cache_spec_uniform(kv_cache_spec) kv_cache_spec = { "layer_1": new_kv_cache_spec(num_kv_heads=32), "layer_2": new_kv_cache_spec(num_kv_heads=32, sliding_window=1), } - assert is_kv_cache_type_uniform(kv_cache_spec) + assert is_kv_cache_spec_uniform(kv_cache_spec) kv_cache_spec = { "layer_1": new_kv_cache_spec(num_kv_heads=32), "layer_2": new_sliding_window_spec(num_kv_heads=32, sliding_window=1), } - assert not is_kv_cache_type_uniform(kv_cache_spec) + assert not is_kv_cache_spec_uniform(kv_cache_spec) kv_cache_spec = { "layer_1": new_sliding_window_spec(num_kv_heads=32, sliding_window=1), "layer_2": new_sliding_window_spec(num_kv_heads=32, sliding_window=1), } - assert is_kv_cache_type_uniform(kv_cache_spec) + assert is_kv_cache_spec_uniform(kv_cache_spec) kv_cache_spec = { "layer_1": new_sliding_window_spec(num_kv_heads=32, sliding_window=1), "layer_2": new_sliding_window_spec(num_kv_heads=32, sliding_window=2), } - assert not is_kv_cache_type_uniform(kv_cache_spec) + assert not is_kv_cache_spec_uniform(kv_cache_spec) @pytest.mark.parametrize( @@ -1286,14 +1288,28 @@ def test_get_kv_cache_config_one_worker(): ], ) - # different hidden size, unimplemented + # different hidden size kv_cache_specs_hybrid = { 'layer_1': new_kv_cache_spec(head_size=128), - 'layer_2': new_kv_cache_spec(), + 'layer_2': new_kv_cache_spec(head_size=64), } - with pytest.raises(NotImplementedError): - get_kv_cache_configs(vllm_config, [kv_cache_specs_hybrid], - [mem_per_block_per_layer * 2 * 32])[0] + kv_cache_config_hybrid = get_kv_cache_configs( + vllm_config, [kv_cache_specs_hybrid], + [mem_per_block_per_layer * 3 * 32])[0] + assert kv_cache_config_hybrid == KVCacheConfig( + num_blocks=32, + kv_cache_tensors=[ + KVCacheTensor(size=mem_per_block_per_layer * 32 * 2, + shared_by=["layer_1"]), + KVCacheTensor(size=mem_per_block_per_layer * 32, + shared_by=["layer_2"]), + ], + kv_cache_groups=[ + KVCacheGroupSpec(["layer_1", "layer_2"], + UniformTypeKVCacheSpecs( + block_size=16, + kv_cache_specs=kv_cache_specs_hybrid)) + ]) # Test num_gpu_blocks_override vllm_config.cache_config.num_gpu_blocks_override = 16 @@ -1324,3 +1340,75 @@ def test_get_kv_cache_configs_attention_free(): kv_cache_groups=[], ) ] + + +def test_generate_uniform_type_kv_cache_specs(): + # All layers are full attention, can be merged + kv_cache_specs = { + 'layer_1': new_kv_cache_spec(), + 'layer_2': new_kv_cache_spec(head_size=128), + } + uniform_spec = UniformTypeKVCacheSpecs.from_specs(kv_cache_specs) + assert uniform_spec == UniformTypeKVCacheSpecs( + block_size=16, kv_cache_specs=kv_cache_specs) + + # Full attention + sliding window, cannot be merged + kv_cache_specs = { + 'layer_1': new_kv_cache_spec(), + 'layer_2': new_sliding_window_spec(sliding_window=1), + } + uniform_spec = UniformTypeKVCacheSpecs.from_specs(kv_cache_specs) + assert uniform_spec is None + + # different order of full attention + sliding window, cannot be merged + kv_cache_specs = { + 'layer_1': new_sliding_window_spec(sliding_window=1), + 'layer_2': new_kv_cache_spec(), + } + uniform_spec = UniformTypeKVCacheSpecs.from_specs(kv_cache_specs) + assert uniform_spec is None + + # Same-size sliding window, can be merged + kv_cache_specs = { + 'layer_1': new_sliding_window_spec(sliding_window=1), + 'layer_2': new_sliding_window_spec(sliding_window=1, head_size=128), + } + uniform_spec = UniformTypeKVCacheSpecs.from_specs(kv_cache_specs) + assert uniform_spec == UniformTypeKVCacheSpecs( + block_size=16, kv_cache_specs=kv_cache_specs) + + # different block sizes, cannot be merged + kv_cache_specs = { + 'layer_1': new_kv_cache_spec(block_size=16), + 'layer_2': new_kv_cache_spec(block_size=32), + } + uniform_spec = UniformTypeKVCacheSpecs.from_specs(kv_cache_specs) + assert uniform_spec is None + + +def test_generate_scheduler_kv_cache_config(): + kv_cache_specs = { + 'layer_1': new_kv_cache_spec(), + 'layer_2': new_kv_cache_spec(head_size=128), + } + kv_cache_configs = [ + KVCacheConfig( + num_blocks=10, + kv_cache_tensors=[], + kv_cache_groups=[ + KVCacheGroupSpec(['layer_1', 'layer_2'], + UniformTypeKVCacheSpecs( + block_size=16, + kv_cache_specs=kv_cache_specs)), + ], + ) + ] + scheduler_kv_cache_config = generate_scheduler_kv_cache_config( + kv_cache_configs) + assert scheduler_kv_cache_config == KVCacheConfig( + num_blocks=10, + kv_cache_tensors=[], + kv_cache_groups=[ + KVCacheGroupSpec(['layer_1', 'layer_2'], new_kv_cache_spec()) + ], + ) diff --git a/vllm/v1/core/kv_cache_utils.py b/vllm/v1/core/kv_cache_utils.py index bc2ec5e42ea2..3ccd00121f8e 100644 --- a/vllm/v1/core/kv_cache_utils.py +++ b/vllm/v1/core/kv_cache_utils.py @@ -2,6 +2,7 @@ # SPDX-FileCopyrightText: Copyright contributors to the vLLM project """KV-Cache Utilities.""" +import copy import os from collections import defaultdict, deque from collections.abc import Iterable, Sequence @@ -15,7 +16,8 @@ from vllm.v1.kv_cache_interface import (ChunkedLocalAttentionSpec, FullAttentionSpec, KVCacheConfig, KVCacheGroupSpec, KVCacheSpec, - KVCacheTensor, SlidingWindowSpec) + KVCacheTensor, SlidingWindowSpec, + UniformTypeKVCacheSpecs) from vllm.v1.metrics.stats import PrefixCacheStats from vllm.v1.request import Request @@ -750,7 +752,7 @@ def create_kv_cache_group_specs( return kv_cache_groups -def is_kv_cache_type_uniform(kv_cache_spec: dict[str, KVCacheSpec]) -> bool: +def is_kv_cache_spec_uniform(kv_cache_spec: dict[str, KVCacheSpec]) -> bool: """ Whether all layers in the given KVCacheSpec have the same KV cache spec. Note that we regard FullAttentionSpec with and without sliding window as @@ -793,6 +795,21 @@ def get_max_concurrency_for_kv_cache_config( return max_concurrency +def may_override_num_blocks(vllm_config: VllmConfig, num_blocks: int) -> int: + """ + Override the number of kv cache blocks if `num_gpu_blocks_override` is set. + """ + if vllm_config.cache_config.num_gpu_blocks_override is not None: + num_gpu_blocks_override = \ + vllm_config.cache_config.num_gpu_blocks_override + logger.info( + "Overriding num_gpu_blocks=%d with " + "num_gpu_blocks_override=%d", num_blocks, num_gpu_blocks_override) + num_blocks = num_gpu_blocks_override + + return num_blocks + + def get_num_blocks(vllm_config: VllmConfig, num_layers: int, available_memory: int, page_size: int) -> int: """ @@ -806,13 +823,7 @@ def get_num_blocks(vllm_config: VllmConfig, num_layers: int, """ num_blocks = int(available_memory // page_size // num_layers) num_blocks = max(num_blocks, 0) - if vllm_config.cache_config.num_gpu_blocks_override is not None: - num_gpu_blocks_override = \ - vllm_config.cache_config.num_gpu_blocks_override - logger.info( - "Overriding num_gpu_blocks=%d with " - "num_gpu_blocks_override=%d", num_blocks, num_gpu_blocks_override) - num_blocks = num_gpu_blocks_override + num_blocks = may_override_num_blocks(vllm_config, num_blocks) return num_blocks @@ -825,11 +836,11 @@ def get_uniform_page_size(kv_cache_spec: dict[str, KVCacheSpec]) -> int: return page_sizes.pop() -def _get_kv_cache_groups_uniform_type( +def _get_kv_cache_groups_uniform_spec( kv_cache_specs: dict[str, KVCacheSpec]) -> list[KVCacheGroupSpec]: """ - Generates the KV cache configuration for a model with one type of KV cache. - Divide the available memory equally among all layers. + Generates the KV cache configuration for a model with the same KV cache + spec for all layers. Args: kv_cache_specs: The kv cache spec of each attention layer in the model @@ -842,6 +853,22 @@ def _get_kv_cache_groups_uniform_type( [list(kv_cache_specs.keys())]) +def _get_kv_cache_groups_uniform_type( + spec: UniformTypeKVCacheSpecs) -> list[KVCacheGroupSpec]: + """ + Generates the KV cache configuration for a model with one type of KV cache + but different hidden sizes. All layers are merged into one group. + + Args: + spec: The UniformTypeKVCacheSpecs of the model + + Returns: + The generated KVCacheGroupSpecs + """ + + return [KVCacheGroupSpec(list(spec.kv_cache_specs.keys()), spec)] + + def is_kv_cache_page_size_uniform( kv_cache_spec: dict[str, KVCacheSpec]) -> bool: """ @@ -1000,28 +1027,45 @@ def get_kv_cache_config_from_groups(vllm_config: VllmConfig, ) # Determine how model runners should initialize the KV cache tensors. - # We will have group_size memory pools, each is shared by one layer from - # each group. As layers of different groups have different block table, - # they will use different parts of the shared Tensor. - # The memory layout for 3 groups (full.0, full.1), (sw.0, sw.2), - # (sw.1, padding) will be: (group_size = 2) - # full.0, sw.0, sw.1: share a Tensor with size=available_memory//2 - # full.1, sw.2: share another Tensor with size=available_memory//2 - group_size = max(len(group.layer_names) for group in kv_cache_groups) - - page_size = get_uniform_page_size(kv_cache_specs) - assert group_size > 0, "group_size must be greater than 0" - num_blocks = get_num_blocks(vllm_config, group_size, available_memory, - page_size) - per_memory_pool_size = page_size * num_blocks - kv_cache_tensors = [] - for i in range(group_size): - shared_by = [] - for j in range(len(kv_cache_groups)): - if i < len(kv_cache_groups[j].layer_names): - shared_by.append(kv_cache_groups[j].layer_names[i]) - kv_cache_tensors.append( - KVCacheTensor(size=per_memory_pool_size, shared_by=shared_by)) + if len(kv_cache_groups) == 1 and \ + isinstance(kv_cache_groups[0].kv_cache_spec, UniformTypeKVCacheSpecs): + # Special case: all layers have the same type of KV cache but with + # different hidden size. Allocate different amount of memory for each + # layer based on its hidden size. + num_blocks = available_memory // kv_cache_groups[ + 0].kv_cache_spec.page_size_bytes + num_blocks = may_override_num_blocks(vllm_config, num_blocks) + per_layer_specs = kv_cache_groups[0].kv_cache_spec.kv_cache_specs + kv_cache_tensors = [ + KVCacheTensor(size=per_layer_specs[layer_name].page_size_bytes * + num_blocks, + shared_by=[layer_name]) + for layer_name in kv_cache_groups[0].layer_names + ] + else: + # General case: + # We will have group_size memory pools, each is shared by one layer from + # each group. As layers of different groups have different block table, + # they will use different parts of the shared Tensor. + # The memory layout for 3 groups (full.0, full.1), (sw.0, sw.2), + # (sw.1, padding) will be: (group_size = 2) + # full.0, sw.0, sw.1: share a Tensor with size=available_memory//2 + # full.1, sw.2: share another Tensor with size=available_memory//2 + group_size = max(len(group.layer_names) for group in kv_cache_groups) + + page_size = get_uniform_page_size(kv_cache_specs) + assert group_size > 0, "group_size must be greater than 0" + num_blocks = get_num_blocks(vllm_config, group_size, available_memory, + page_size) + kv_cache_tensors = [] + for i in range(group_size): + shared_by = [] + for j in range(len(kv_cache_groups)): + if i < len(kv_cache_groups[j].layer_names): + shared_by.append(kv_cache_groups[j].layer_names[i]) + kv_cache_tensors.append( + KVCacheTensor(size=page_size * num_blocks, + shared_by=shared_by)) kv_cache_config = KVCacheConfig( num_blocks=num_blocks, @@ -1059,7 +1103,7 @@ def unify_hybrid_kv_cache_specs(kv_cache_spec: dict[str, KVCacheSpec]): kv_cache_spec: The kv cache spec of each attention layer in the model """ - if is_kv_cache_type_uniform(kv_cache_spec): + if is_kv_cache_spec_uniform(kv_cache_spec): return logger.warning( @@ -1097,7 +1141,7 @@ def unify_hybrid_kv_cache_specs(kv_cache_spec: dict[str, KVCacheSpec]): attention_chunk_size=spec.attention_chunk_size, ) - if not is_kv_cache_type_uniform(kv_cache_spec): + if not is_kv_cache_spec_uniform(kv_cache_spec): raise ValueError("Hybrid KV cache manager is disabled but failed to " "convert the KV cache specs to one unified type.") @@ -1122,11 +1166,16 @@ def get_kv_cache_groups( # This returns an empty list to allow for the KVCacheManager to handle # attention free models. return [] - elif is_kv_cache_type_uniform(kv_cache_spec): + elif is_kv_cache_spec_uniform(kv_cache_spec): # KV cache of all layers are the same, which is true for # most models. Allocate the same amount of memory for # each layer. - return _get_kv_cache_groups_uniform_type(kv_cache_spec) + return _get_kv_cache_groups_uniform_spec(kv_cache_spec) + elif uniform_spec := UniformTypeKVCacheSpecs.from_specs(kv_cache_spec): + # All layers need the same number of token slots (e.g., all layers are + # full attention, or all layers are sliding window attention with the + # same window size). Put all layers into one group. + return _get_kv_cache_groups_uniform_type(uniform_spec) elif is_kv_cache_page_size_uniform(kv_cache_spec): # Model contains multiple attention types, but KV cache of all layers # have the same physical memory per block per layer. Split the layers @@ -1137,6 +1186,27 @@ def get_kv_cache_groups( raise NotImplementedError +def generate_scheduler_kv_cache_config( + kv_cache_configs: list[KVCacheConfig]) -> KVCacheConfig: + """ + Generate the KV cache configuration for the scheduler. + """ + assert all([ + cfg.num_blocks == kv_cache_configs[0].num_blocks + for cfg in kv_cache_configs + ]) + # All workers have the same kv_cache_config except layer names, so use + # an arbitrary one to initialize the scheduler. + cfg = copy.deepcopy(kv_cache_configs[0]) + for group in cfg.kv_cache_groups: + if isinstance(group.kv_cache_spec, UniformTypeKVCacheSpecs): + # All layers in the UniformTypeKVCacheSpecs have the same type, + # so use an arbitrary one to initialize the scheduler. + group.kv_cache_spec = next( + iter(group.kv_cache_spec.kv_cache_specs.values())) + return cfg + + def get_kv_cache_configs(vllm_config: VllmConfig, kv_cache_specs: list[dict[str, KVCacheSpec]], available_memory: list[int]) -> list[KVCacheConfig]: diff --git a/vllm/v1/engine/core.py b/vllm/v1/engine/core.py index a022e9c0d705..a43042a5510a 100644 --- a/vllm/v1/engine/core.py +++ b/vllm/v1/engine/core.py @@ -29,7 +29,9 @@ maybe_register_config_serialize_by_value) from vllm.utils import (decorate_logs, get_hash_fn_by_name, make_zmq_socket, resolve_obj_by_qualname, set_process_title) -from vllm.v1.core.kv_cache_utils import (BlockHash, get_kv_cache_configs, +from vllm.v1.core.kv_cache_utils import (BlockHash, + generate_scheduler_kv_cache_config, + get_kv_cache_configs, get_request_block_hasher, init_none_hash) from vllm.v1.core.sched.interface import SchedulerInterface @@ -196,16 +198,10 @@ def _initialize_kv_caches( kv_cache_configs = get_kv_cache_configs(vllm_config, kv_cache_specs, available_gpu_memory) - - # All workers have the same kv_cache_config except layer names, so use - # an arbitrary one to initialize the scheduler. - assert all([ - cfg.num_blocks == kv_cache_configs[0].num_blocks - for cfg in kv_cache_configs - ]) - num_gpu_blocks = kv_cache_configs[0].num_blocks + scheduler_kv_cache_config = generate_scheduler_kv_cache_config( + kv_cache_configs) + num_gpu_blocks = scheduler_kv_cache_config.num_blocks num_cpu_blocks = 0 - scheduler_kv_cache_config = kv_cache_configs[0] # Initialize kv cache and warmup the execution self.model_executor.initialize_from_config(kv_cache_configs) diff --git a/vllm/v1/kv_cache_interface.py b/vllm/v1/kv_cache_interface.py index 0cf92a680a68..f72cc8f93a6c 100644 --- a/vllm/v1/kv_cache_interface.py +++ b/vllm/v1/kv_cache_interface.py @@ -234,6 +234,76 @@ def max_memory_usage_bytes(self, vllm_config: VllmConfig) -> int: return cdiv(max_encoder_len, self.block_size) * self.page_size_bytes +@dataclass(frozen=True) +class UniformTypeKVCacheSpecs(KVCacheSpec): + """ + A KV cache spec for multiple layers with the same type of attention. Here, + same types means always need the same number of token slots. For example, + sliding window attentions with different window sizes are not the same type + and should not be merged into one UniformTypeKVCacheSpecs. + """ + kv_cache_specs: dict[str, KVCacheSpec] + + @property + def page_size_bytes(self) -> int: + return sum(spec.page_size_bytes + for spec in self.kv_cache_specs.values()) + + def max_memory_usage_bytes(self, vllm_config: VllmConfig) -> int: + max_num_pages = max( + cdiv(spec.max_memory_usage_bytes(vllm_config), + spec.page_size_bytes) + for spec in self.kv_cache_specs.values()) + return max_num_pages * self.page_size_bytes + + @classmethod + def is_uniform_type(cls, kv_cache_specs: dict[str, KVCacheSpec]) -> bool: + """ + Whether all layers have the same type of KV cache spec. + """ + block_sizes = set(spec.block_size for spec in kv_cache_specs.values()) + if len(block_sizes) > 1: + # Different block sizes, not uniform. + return False + one_spec = next(iter(kv_cache_specs.values())) + if isinstance(one_spec, (FullAttentionSpec, CrossAttentionSpec)): + return all( + isinstance(spec, type(one_spec)) + for spec in kv_cache_specs.values()) + elif isinstance(one_spec, SlidingWindowSpec): + return all( + isinstance(spec, SlidingWindowSpec) + and spec.sliding_window == one_spec.sliding_window + for spec in kv_cache_specs.values()) + elif isinstance(one_spec, ChunkedLocalAttentionSpec): + return all( + isinstance(spec, ChunkedLocalAttentionSpec) + and spec.attention_chunk_size == one_spec.attention_chunk_size + for spec in kv_cache_specs.values()) + elif isinstance(one_spec, MambaSpec): + return all( + isinstance(spec, MambaSpec) and spec.num_speculative_blocks == + one_spec.num_speculative_blocks + for spec in kv_cache_specs.values()) + else: + # NOTE(Chen): Please add new branches for new KV cache spec types. + raise NotImplementedError( + f"Unsupported KV cache spec type: {type(one_spec)}") + + @classmethod + def from_specs(cls, kv_cache_specs: dict[str, + KVCacheSpec]) -> Optional[Self]: + """ + Return a SameTypeKVCacheSpecs object if all layers have the same type + of KV cache spec. Return None if not. + """ + if cls.is_uniform_type(kv_cache_specs): + block_size = next(iter(kv_cache_specs.values())).block_size + return cls(block_size=block_size, kv_cache_specs=kv_cache_specs) + else: + return None + + @dataclass class KVCacheTensor: """ diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index dffadd1d769b..233df8f1b0e9 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -8,7 +8,7 @@ from collections.abc import Iterator from contextlib import contextmanager from copy import deepcopy -from typing import TYPE_CHECKING, Any, Optional, Union, cast +from typing import TYPE_CHECKING, Any, NamedTuple, Optional, Union, cast import numpy as np import torch @@ -74,7 +74,8 @@ EncoderOnlyAttentionSpec, FullAttentionSpec, KVCacheConfig, KVCacheGroupSpec, KVCacheSpec, - MambaSpec, SlidingWindowSpec) + MambaSpec, SlidingWindowSpec, + UniformTypeKVCacheSpecs) # yapf: enable from vllm.v1.outputs import (EMPTY_MODEL_RUNNER_OUTPUT, AsyncModelRunnerOutput, DraftTokenIds, LogprobsLists, LogprobsTensors, @@ -1187,7 +1188,7 @@ def _prepare_inputs( common_prefix_len = self._compute_cascade_attn_prefix_len( num_scheduled_tokens, num_common_prefix_blocks, - kv_cache_group_spec.kv_cache_spec, + attn_group.kv_cache_spec, builder, ) @@ -3453,12 +3454,16 @@ def initialize_attn_backend(self, kv_cache_config: KVCacheConfig) -> None: assert len(self.attn_groups) == 0, \ "Attention backends are already initialized" - def get_attn_backends_for_layers( - layer_names: list[str] - ) -> dict[type[AttentionBackend], list[str]]: - layers = get_layers_from_vllm_config(self.vllm_config, - AttentionLayerBase, - layer_names) + class AttentionGroupKey(NamedTuple): + attn_backend: type[AttentionBackend] + kv_cache_spec: KVCacheSpec + + def get_attn_backends_for_group( + kv_cache_group_spec: KVCacheGroupSpec, + ) -> dict[AttentionGroupKey, list[str]]: + layers = get_layers_from_vllm_config( + self.vllm_config, AttentionLayerBase, + kv_cache_group_spec.layer_names) attn_backends = {} attn_backend_layers = defaultdict(list) # Dedupe based on full class name; this is a bit safer than @@ -3466,7 +3471,7 @@ def get_attn_backends_for_layers( # attention backend subclasses (e.g. ChunkedLocalAttention) unless # they are cached correctly, there will be different objects per # layer. - for layer_name in layer_names: + for layer_name in kv_cache_group_spec.layer_names: attn_backend = layers[layer_name].get_attn_backend() if layer_name in self.kv_sharing_fast_prefill_eligible_layers: @@ -3475,8 +3480,14 @@ def get_attn_backends_for_layers( attn_backend, ) - key = attn_backend.full_cls_name() - attn_backends[key] = attn_backend + full_cls_name = attn_backend.full_cls_name() + layer_kv_cache_spec = kv_cache_group_spec.kv_cache_spec + if isinstance(layer_kv_cache_spec, UniformTypeKVCacheSpecs): + layer_kv_cache_spec = layer_kv_cache_spec.kv_cache_specs[ + layer_name] + key = (full_cls_name, layer_kv_cache_spec) + attn_backends[key] = AttentionGroupKey(attn_backend, + layer_kv_cache_spec) attn_backend_layers[key].append(layer_name) return { attn_backends[k]: v @@ -3484,11 +3495,11 @@ def get_attn_backends_for_layers( } def create_attn_groups( - attn_backends_map: dict[AttentionBackend, list[str]], - kv_cache_spec: KVCacheSpec, + attn_backends_map: dict[AttentionGroupKey, list[str]], ) -> list[AttentionGroup]: attn_groups: list[AttentionGroup] = [] - for attn_backend, layer_names in attn_backends_map.items(): + for (attn_backend, + kv_cache_spec), layer_names in attn_backends_map.items(): attn_metadata_builders = [] attn_metadata_builders.append(attn_backend.get_builder_cls()( kv_cache_spec, @@ -3506,16 +3517,13 @@ def create_attn_groups( )) attn_group = AttentionGroup(attn_backend, attn_metadata_builders, - layer_names) + layer_names, kv_cache_spec) attn_groups.append(attn_group) return attn_groups for kv_cache_group_spec in kv_cache_config.kv_cache_groups: - kv_cache_spec = kv_cache_group_spec.kv_cache_spec - attn_backends = get_attn_backends_for_layers( - kv_cache_group_spec.layer_names) - self.attn_groups.append( - create_attn_groups(attn_backends, kv_cache_spec)) + attn_backends = get_attn_backends_for_group(kv_cache_group_spec) + self.attn_groups.append(create_attn_groups(attn_backends)) # Calculate reorder batch threshold (if needed) self.calculate_reorder_batch_threshold() @@ -3680,14 +3688,11 @@ def _allocate_kv_cache_tensors( def _attn_group_iterator(self) -> Iterator[AttentionGroup]: return itertools.chain.from_iterable(self.attn_groups) - def _kv_cache_spec_attn_group_iterator( - self) -> Iterator[tuple[KVCacheSpec, AttentionGroup]]: + def _kv_cache_spec_attn_group_iterator(self) -> Iterator[AttentionGroup]: if not self.kv_cache_config.kv_cache_groups: return - for kv_cache_spec_id, attn_groups in enumerate(self.attn_groups): - for attn_group in attn_groups: - yield self.kv_cache_config.kv_cache_groups[ - kv_cache_spec_id].kv_cache_spec, attn_group + for attn_groups in self.attn_groups: + yield from attn_groups def _reshape_kv_cache_tensors( self, @@ -3707,7 +3712,8 @@ def _reshape_kv_cache_tensors( """ kv_caches: dict[str, torch.Tensor] = {} has_attn, has_mamba = False, False - for kv_cache_spec, group in self._kv_cache_spec_attn_group_iterator(): + for group in self._kv_cache_spec_attn_group_iterator(): + kv_cache_spec = group.kv_cache_spec attn_backend = group.backend for layer_name in group.layer_names: if layer_name in self.runner_only_attn_layers: @@ -3787,7 +3793,8 @@ def _update_hybrid_attention_mamba_layout( kv_caches: The KV cache buffer of each layer. """ - for kv_cache_spec, group in self._kv_cache_spec_attn_group_iterator(): + for group in self._kv_cache_spec_attn_group_iterator(): + kv_cache_spec = group.kv_cache_spec for layer_name in group.layer_names: kv_cache = kv_caches[layer_name] if (isinstance(kv_cache_spec, AttentionSpec) diff --git a/vllm/v1/worker/utils.py b/vllm/v1/worker/utils.py index b76ac633892f..021d18b2500f 100644 --- a/vllm/v1/worker/utils.py +++ b/vllm/v1/worker/utils.py @@ -15,7 +15,7 @@ from vllm.platforms import current_platform from vllm.v1.attention.backends.utils import AttentionMetadataBuilder from vllm.v1.core.encoder_cache_manager import compute_mm_encoder_budget -from vllm.v1.kv_cache_interface import KVCacheGroupSpec +from vllm.v1.kv_cache_interface import KVCacheGroupSpec, KVCacheSpec if TYPE_CHECKING: from vllm.attention.layer import Attention @@ -132,6 +132,7 @@ class AttentionGroup: backend: type[AttentionBackend] metadata_builders: list[AttentionMetadataBuilder] layer_names: list[str] + kv_cache_spec: KVCacheSpec def get_metadata_builder(self, ubatch_id: Optional[int] = None From be874c020196080305baf988ed8c1c82047323be Mon Sep 17 00:00:00 2001 From: Roger Wang Date: Sat, 20 Sep 2025 00:04:05 -0700 Subject: [PATCH 187/518] [Bugfix] Fix Qwen3-VL-MoE weight loading for EP (#25300) Signed-off-by: Roger Wang --- vllm/model_executor/models/qwen3_vl_moe.py | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/vllm/model_executor/models/qwen3_vl_moe.py b/vllm/model_executor/models/qwen3_vl_moe.py index 625f94cf7ad7..7912cf3ea52b 100644 --- a/vllm/model_executor/models/qwen3_vl_moe.py +++ b/vllm/model_executor/models/qwen3_vl_moe.py @@ -122,9 +122,10 @@ def forward( def load_fused_expert_weights(self, name: str, params_dict: dict, loaded_weight: torch.Tensor, shard_id: str, - num_experts: int): + num_experts: int) -> bool: param = params_dict[name] weight_loader = typing.cast(Callable[..., bool], param.weight_loader) + loaded_local_expert = False for expert_id in range(num_experts): curr_expert_weight = loaded_weight[expert_id] success = weight_loader(param, @@ -133,9 +134,10 @@ def load_fused_expert_weights(self, name: str, params_dict: dict, shard_id, expert_id, return_success=True) - if not success: - return False - return True + if success: + loaded_local_expert = True + + return loaded_local_expert def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]: @@ -345,4 +347,4 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): for _ in range(self.deepstack_num_level) ] if self.use_deepstack else None self.visual_dim = config.vision_config.out_hidden_size - self.multiscale_dim = self.visual_dim * self.deepstack_num_level \ No newline at end of file + self.multiscale_dim = self.visual_dim * self.deepstack_num_level From 3d9a1d2de5091455bb2fbf6b21fc9188fd4612a4 Mon Sep 17 00:00:00 2001 From: Cyrus Leung Date: Sat, 20 Sep 2025 15:14:35 +0800 Subject: [PATCH 188/518] [V1] Support `LLM.apply_model` (#18465) Signed-off-by: DarkLight1337 --- tests/conftest.py | 12 +--- tests/kernels/moe/test_mxfp4_moe.py | 37 ++++++---- .../multimodal/generation/test_qwen2_vl.py | 46 ++++++------ tests/models/quantization/test_awq.py | 2 +- tests/quantization/test_compressed_tensors.py | 18 +++-- tests/quantization/test_fp8.py | 8 +-- tests/quantization/test_gptq_dynamic.py | 71 ++++++++++--------- tests/quantization/test_lm_head.py | 4 +- tests/quantization/test_modelopt.py | 10 +-- tests/quantization/test_ptpc_fp8.py | 47 +++++++----- tests/quantization/test_quark.py | 26 +++---- .../test_register_quantization_config.py | 17 +++-- vllm/engine/llm_engine.py | 7 +- vllm/entrypoints/llm.py | 9 ++- vllm/executor/executor_base.py | 33 +++++---- vllm/v1/engine/llm_engine.py | 7 +- vllm/worker/worker_base.py | 9 ++- 17 files changed, 194 insertions(+), 169 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 3cd93f4ad328..e8e95357ff5b 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -987,17 +987,7 @@ def score( return [req_output.outputs.score for req_output in req_outputs] def apply_model(self, func: Callable[[nn.Module], _R]) -> list[_R]: - if hasattr(self.llm.llm_engine, "model_executor"): - # This works either in V0 or in V1 with - # VLLM_ENABLE_V1_MULTIPROCESSING=0 - executor = self.llm.llm_engine.model_executor - return executor.apply_model(func) - - # This works in V1 with VLLM_ALLOW_INSECURE_SERIALIZATION=1 - def _apply_model(self): - return func(self.get_model()) - - return self.llm.llm_engine.collective_rpc(_apply_model) + return self.llm.apply_model(func) def get_llm(self) -> LLM: return self.llm diff --git a/tests/kernels/moe/test_mxfp4_moe.py b/tests/kernels/moe/test_mxfp4_moe.py index a3b8f07638d9..61d3311cc162 100644 --- a/tests/kernels/moe/test_mxfp4_moe.py +++ b/tests/kernels/moe/test_mxfp4_moe.py @@ -1,21 +1,24 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -import importlib import importlib.metadata from dataclasses import dataclass +from importlib.util import find_spec from typing import Optional import pytest import torch from packaging import version +from vllm.model_executor.layers.quantization.quark.quark import ( # noqa: E501 + QuarkLinearMethod, QuarkW4A4MXFP4) +from vllm.model_executor.layers.quantization.quark.quark_moe import ( # noqa: E501 + QuarkW4A4MXFp4MoEMethod) from vllm.platforms import current_platform from vllm.utils.flashinfer import has_flashinfer -QUARK_MXFP4_AVAILABLE = importlib.util.find_spec( - "quark") is not None and version.parse( - importlib.metadata.version("amd-quark")) >= version.parse('0.8.99') +QUARK_MXFP4_AVAILABLE = find_spec("quark") is not None and version.parse( + importlib.metadata.version("amd-quark")) >= version.parse('0.8.99') TRTLLM_GEN_MXFP4_AVAILABLE = current_platform.is_cuda( ) and current_platform.is_device_capability(100) @@ -39,6 +42,12 @@ class ModelCase: tp: int +@pytest.fixture(scope="function", autouse=True) +def enable_pickle(monkeypatch): + """`LLM.apply_model` requires pickling a function.""" + monkeypatch.setenv("VLLM_ALLOW_INSECURE_SERIALIZATION", "1") + + @pytest.mark.parametrize('model_case', [ ModelCase("fxmarty/qwen_1.5-moe-a2.7b-mxfp4", tp=1), ModelCase("fxmarty/deepseek_r1_3_layers_mxfp4", tp=8), @@ -55,21 +64,19 @@ def test_mxfp4_loading_and_execution_moe(vllm_runner, model_case: ModelCase): tensor_parallel_size=model_case.tp, load_format="dummy") as llm: - # TODO: llm.apply_model(check_model) currently relies on V0 internals. - # Re-enable this later. - # def check_model(model): - # layer = model.model.layers[0] + def check_model(model): + layer = model.model.layers[0] - # qkv_proj = layer.self_attn.qkv_proj + qkv_proj = layer.self_attn.qkv_proj - # assert isinstance(qkv_proj.quant_method, QuarkLinearMethod) - # assert isinstance(qkv_proj.scheme, QuarkW4A4MXFP4) + assert isinstance(qkv_proj.quant_method, QuarkLinearMethod) + assert isinstance(qkv_proj.scheme, QuarkW4A4MXFP4) - # assert isinstance(layer.mlp.experts.quant_method, - # QuarkW4A4MXFp4MoEMethod) + assert isinstance(layer.mlp.experts.quant_method, + QuarkW4A4MXFp4MoEMethod) - # if model_case.model_id == "fxmarty/qwen_1.5-moe-a2.7b-mxfp4": - # llm.apply_model(check_model) + if model_case.model_id == "fxmarty/qwen_1.5-moe-a2.7b-mxfp4": + llm.apply_model(check_model) output = llm.generate_greedy("Today I am in the French Alps and", max_tokens=20) diff --git a/tests/models/multimodal/generation/test_qwen2_vl.py b/tests/models/multimodal/generation/test_qwen2_vl.py index a81f5e7ec887..e56f4e4075be 100644 --- a/tests/models/multimodal/generation/test_qwen2_vl.py +++ b/tests/models/multimodal/generation/test_qwen2_vl.py @@ -10,6 +10,7 @@ from vllm.multimodal.image import rescale_image_size from vllm.multimodal.video import rescale_video_size, sample_frames_from_video +from vllm.utils import set_default_torch_num_threads from ....conftest import (IMAGE_ASSETS, VIDEO_ASSETS, PromptImageInput, PromptVideoInput, VllmRunner) @@ -17,11 +18,9 @@ @pytest.fixture(scope="function", autouse=True) -def use_v0_only(monkeypatch): - """ - V1 Test: batch_make_xxxxx_embeddings calls a V0 internal - """ - monkeypatch.setenv('VLLM_USE_V1', '0') +def enable_pickle(monkeypatch): + """`LLM.apply_model` requires pickling a function.""" + monkeypatch.setenv("VLLM_ALLOW_INSECURE_SERIALIZATION", "1") models = ["Qwen/Qwen2-VL-2B-Instruct"] @@ -126,9 +125,8 @@ def get_image_embeds(model): image_grid_thw_on_device = image_grid_thw.to(visual.device, dtype=torch.int64) return visual(pixel_values_on_device, - grid_thw=image_grid_thw_on_device) + grid_thw=image_grid_thw_on_device).cpu() - # V1 Test: this calls a V0 internal. image_embeds = torch.concat(llm.apply_model(get_image_embeds)) # split into original batches @@ -210,7 +208,7 @@ def get_image_embeds(model): video_grid_thw_on_device = video_grid_thw.to(visual.device, dtype=torch.int64) return visual(pixel_values_on_device, - grid_thw=video_grid_thw_on_device) + grid_thw=video_grid_thw_on_device).cpu() # V1 Test: this calls a V0 internal. video_embeds = torch.concat(llm.apply_model(get_image_embeds)) @@ -266,19 +264,22 @@ def run_embedding_input_test( processor = AutoProcessor.from_pretrained(model) # max_model_len should be greater than image_feature_size - with vllm_runner(model, - runner="generate", - max_model_len=4000, - max_num_seqs=3, - dtype=dtype, - limit_mm_per_prompt={ - "image": mm_limit, - "video": mm_limit - }, - tensor_parallel_size=tensor_parallel_size, - distributed_executor_backend=distributed_executor_backend - ) as vllm_model: + with set_default_torch_num_threads(1): + vllm_model = vllm_runner( + model, + runner="generate", + max_model_len=4000, + max_num_seqs=3, + dtype=dtype, + limit_mm_per_prompt={ + "image": mm_limit, + "video": mm_limit + }, + tensor_parallel_size=tensor_parallel_size, + distributed_executor_backend=distributed_executor_backend, + ) + with vllm_model: outputs_per_case_for_original_input = [ vllm_model.generate_greedy_logprobs(prompts, max_tokens, @@ -329,9 +330,8 @@ def run_embedding_input_test( @pytest.mark.parametrize("max_tokens", [128]) @pytest.mark.parametrize("num_logprobs", [10]) def test_qwen2_vl_image_embeddings_input(vllm_runner, image_assets, model, - size_factors, dtype: str, - max_tokens: int, - num_logprobs: int) -> None: + size_factors, dtype, max_tokens, + num_logprobs, monkeypatch) -> None: images = [asset.pil_image for asset in image_assets] inputs_per_case: list[tuple[ diff --git a/tests/models/quantization/test_awq.py b/tests/models/quantization/test_awq.py index bd696198931f..7005e435ecf4 100644 --- a/tests/models/quantization/test_awq.py +++ b/tests/models/quantization/test_awq.py @@ -112,7 +112,7 @@ def test_awq_models(vllm_runner, image_assets, source_model, quant_model, monkeypatch) -> None: # Test V1: this test hangs during setup on single-scale input. - # TODO: fixure out why and re-enable this on V1. + # TODO: figure out why and re-enable this on V1. monkeypatch.setenv("VLLM_USE_V1", "0") run_awq_test( vllm_runner, diff --git a/tests/quantization/test_compressed_tensors.py b/tests/quantization/test_compressed_tensors.py index 484f53246f34..b7949a488ad0 100644 --- a/tests/quantization/test_compressed_tensors.py +++ b/tests/quantization/test_compressed_tensors.py @@ -43,12 +43,9 @@ @pytest.fixture(scope="function", autouse=True) -def use_v0_only(monkeypatch): - """ - This module relies on V0 internals, so set VLLM_USE_V1=0. - """ - if not current_platform.is_cpu(): - monkeypatch.setenv('VLLM_USE_V1', '0') +def enable_pickle(monkeypatch): + """`LLM.apply_model` requires pickling a function.""" + monkeypatch.setenv("VLLM_ALLOW_INSECURE_SERIALIZATION", "1") @pytest.mark.parametrize( @@ -176,10 +173,11 @@ def test_compressed_tensors_w8a8_logprobs( dtype = "bfloat16" - # skip language translation prompt for the static per tensor asym model - if (model_path == - "nm-testing/Meta-Llama-3-8B-Instruct-W8A8-Static-Per-Tensor-Asym" - ): # noqa: E501 + # skip language translation prompt for the static per tensor models + if model_path in ( + "nm-testing/Meta-Llama-3-8B-Instruct-W8A8-Static-Per-Tensor-Sym", + "nm-testing/Meta-Llama-3-8B-Instruct-W8A8-Static-Per-Tensor-Asym", + ): example_prompts = example_prompts[0:-1] with hf_runner(model_path, dtype=dtype) as hf_model: diff --git a/tests/quantization/test_fp8.py b/tests/quantization/test_fp8.py index d781f462b4ad..db53061cf2d1 100644 --- a/tests/quantization/test_fp8.py +++ b/tests/quantization/test_fp8.py @@ -60,8 +60,8 @@ def test_kv_cache_model_load_and_run(vllm_runner, model_id: str, if use_rocm_aiter: monkeypatch.setenv("VLLM_ROCM_USE_AITER", "1") - # vllm_runner.apply_model() relies on V0 internals. - monkeypatch.setenv("VLLM_USE_V1", "0") + # `LLM.apply_model` requires pickling a function. + monkeypatch.setenv("VLLM_ALLOW_INSECURE_SERIALIZATION", "1") with vllm_runner(model_id, kv_cache_dtype="fp8") as llm: def check_model(model): @@ -104,8 +104,8 @@ def test_load_fp16_model(vllm_runner, kv_cache_dtype: str, force_marlin: bool, if use_rocm_aiter: monkeypatch.setenv("VLLM_ROCM_USE_AITER", "1") - # vllm_runner.apply_model() relies on V0 internals. - monkeypatch.setenv("VLLM_USE_V1", "0") + # `LLM.apply_model` requires pickling a function. + monkeypatch.setenv("VLLM_ALLOW_INSECURE_SERIALIZATION", "1") if force_marlin: monkeypatch.setenv("VLLM_TEST_FORCE_FP8_MARLIN", "1") diff --git a/tests/quantization/test_gptq_dynamic.py b/tests/quantization/test_gptq_dynamic.py index aea50e99c1dd..00a5946ed015 100644 --- a/tests/quantization/test_gptq_dynamic.py +++ b/tests/quantization/test_gptq_dynamic.py @@ -31,41 +31,46 @@ @pytest.mark.parametrize("model_id, use_marlin_kernel", MODEL_QUANT) def test_gptq_with_dynamic(vllm_runner, model_id: str, use_marlin_kernel: bool, monkeypatch): - # vllm_runner.apply_model() relies on V0 internals. - monkeypatch.setenv("VLLM_USE_V1", "0") - - vllm_model = vllm_runner(model_id, dtype=torch.float16, max_model_len=2048) + # `LLM.apply_model` requires pickling a function. + monkeypatch.setenv("VLLM_ALLOW_INSECURE_SERIALIZATION", "1") linear_method_cls = GPTQMarlinLinearMethod if use_marlin_kernel else ( GPTQLinearMethod) - for name, submodule in (vllm_model.llm.llm_engine.model_executor. - driver_worker.model_runner.model.named_modules()): - if name == "lm_head": - assert isinstance(submodule.quant_method, linear_method_cls) - elif name == 'model.layers.0.self_attn.qkv_proj': - # The first layer is quantized using bits=4, group_size=128 - # desc_act=True - assert isinstance(submodule.quant_method, linear_method_cls) - config = submodule.quant_method.quant_config - assert config.weight_bits == 4 - assert config.group_size == 128 - assert config.desc_act - elif name == 'model.layers.1.self_attn.qkv_proj': - # The second layer is quantized using bits=8, group_size=32 - # desc_act=False - assert isinstance(submodule.quant_method, linear_method_cls) - config = submodule.quant_method.quant_config - assert get_dynamic_override(config, layer_name=name, - key="bits") == 8 - assert get_dynamic_override(config, - layer_name=name, - key="group_size") == 32 - assert not get_dynamic_override( - config, layer_name=name, key="desc_act") - elif (name == 'model.layers.2.self_attn.qkv_proj' - or name == 'model.layers.2.mlp.gate_up_proj'): - # All other layers (layer index >= 2) are not quantized - assert isinstance(submodule.quant_method, UnquantizedLinearMethod) + with vllm_runner(model_id, dtype=torch.float16, max_model_len=2048) as llm: + + def check_model(model): + for name, submodule in model.named_modules(): + if name == "lm_head": + assert isinstance(submodule.quant_method, + linear_method_cls) + elif name == 'model.layers.0.self_attn.qkv_proj': + # The first layer is quantized using bits=4, group_size=128 + # desc_act=True + assert isinstance(submodule.quant_method, + linear_method_cls) + config = submodule.quant_method.quant_config + assert config.weight_bits == 4 + assert config.group_size == 128 + assert config.desc_act + elif name == 'model.layers.1.self_attn.qkv_proj': + # The second layer is quantized using bits=8, group_size=32 + # desc_act=False + assert isinstance(submodule.quant_method, + linear_method_cls) + config = submodule.quant_method.quant_config + assert get_dynamic_override(config, + layer_name=name, + key="bits") == 8 + assert get_dynamic_override(config, + layer_name=name, + key="group_size") == 32 + assert not get_dynamic_override( + config, layer_name=name, key="desc_act") + elif (name == 'model.layers.2.self_attn.qkv_proj' + or name == 'model.layers.2.mlp.gate_up_proj'): + # All other layers (layer index >= 2) are not quantized + assert isinstance(submodule.quant_method, + UnquantizedLinearMethod) - del vllm_model + llm.apply_model(check_model) diff --git a/tests/quantization/test_lm_head.py b/tests/quantization/test_lm_head.py index b24964a9d0a9..e69d4ad349c3 100644 --- a/tests/quantization/test_lm_head.py +++ b/tests/quantization/test_lm_head.py @@ -29,8 +29,8 @@ def test_lm_head( lm_head_quantized: bool, monkeypatch, ) -> None: - # vllm_runner.apply_model() relies on V0 internals. - monkeypatch.setenv("VLLM_USE_V1", "0") + # `LLM.apply_model` requires pickling a function. + monkeypatch.setenv("VLLM_ALLOW_INSECURE_SERIALIZATION", "1") with vllm_runner(model_id, dtype=torch.float16, max_model_len=2048) as vllm_model: diff --git a/tests/quantization/test_modelopt.py b/tests/quantization/test_modelopt.py index c60a03f44bae..e7174be73626 100644 --- a/tests/quantization/test_modelopt.py +++ b/tests/quantization/test_modelopt.py @@ -11,16 +11,12 @@ import torch from tests.quantization.utils import is_quant_method_supported -from vllm.platforms import current_platform @pytest.fixture(scope="function", autouse=True) -def use_v0_only(monkeypatch): - """ - This module relies on V0 internals, so set VLLM_USE_V1=0. - """ - if not current_platform.is_cpu(): - monkeypatch.setenv('VLLM_USE_V1', '0') +def enable_pickle(monkeypatch): + """`LLM.apply_model` requires pickling a function.""" + monkeypatch.setenv("VLLM_ALLOW_INSECURE_SERIALIZATION", "1") @pytest.mark.skipif(not is_quant_method_supported("modelopt"), diff --git a/tests/quantization/test_ptpc_fp8.py b/tests/quantization/test_ptpc_fp8.py index 5f78bc30504c..088b68510cff 100644 --- a/tests/quantization/test_ptpc_fp8.py +++ b/tests/quantization/test_ptpc_fp8.py @@ -13,6 +13,16 @@ PTPCFp8LinearMethod) from vllm.platforms import current_platform +UNSUPPORTED_STR = ( + "Currently torch._scaled_mm (hipBLASLt) rowwise gemm only " + "support output dtype of bfloat16. torch.float16 is specified.") + + +@pytest.fixture(scope="function", autouse=True) +def enable_pickle(monkeypatch): + """`LLM.apply_model` requires pickling a function.""" + monkeypatch.setenv("VLLM_ALLOW_INSECURE_SERIALIZATION", "1") + @pytest.mark.skipif(not is_quant_method_supported("ptpc_fp8"), reason="PTPC FP8 is not supported on this GPU type.") @@ -21,14 +31,22 @@ @pytest.mark.parametrize("dtype", ["auto", "bfloat16", "float16"]) @pytest.mark.parametrize("kv_cache_dtype", ["auto", "fp8", "fp8_e4m3"]) def test_ptpc_fp8_rocm(vllm_runner, dtype: str, kv_cache_dtype: str) -> None: - try: - with vllm_runner("facebook/opt-125m", - dtype=dtype, - quantization="ptpc_fp8", - kv_cache_dtype=kv_cache_dtype) as llm: + llm = vllm_runner("facebook/opt-125m", + dtype=dtype, + quantization="ptpc_fp8", + kv_cache_dtype=kv_cache_dtype) + except AssertionError as e: + if str(e) == UNSUPPORTED_STR: + # If the error message matches, the test passes + return + else: + # If the error message does not match, re-raise the exception + raise + + with llm: - model = llm.model.llm_engine.model_executor.driver_worker.model_runner.model # noqa: E501 + def check_model(model): fc1 = model.model.decoder.layers[0].fc1 assert isinstance(fc1.quant_method, PTPCFp8LinearMethod) if kv_cache_dtype == "ptpc_fp8": @@ -40,17 +58,8 @@ def test_ptpc_fp8_rocm(vllm_runner, dtype: str, kv_cache_dtype: str) -> None: if current_platform.has_device_capability(94): # For GPUs with hardware support, we keep weights in fp8 assert fc1.weight.dtype == torch.float8_e4m3fnuz - else: - pytest.skip() - output = llm.generate_greedy("Hello my name is", max_tokens=20) - assert output - except AssertionError as e: - if str( - e - ) == "Currently torch._scaled_mm (hipBLASLt) rowwise gemm only support output dtype of bfloat16. torch.float16 is specified.": # noqa: E501 - # If the error message matches, the test passes - pass - else: - # If the error message does not match, re-raise the exception - raise + llm.apply_model(check_model) + + output = llm.generate_greedy("Hello my name is", max_tokens=20) + assert output diff --git a/tests/quantization/test_quark.py b/tests/quantization/test_quark.py index c09931971e6f..930f4acb328f 100644 --- a/tests/quantization/test_quark.py +++ b/tests/quantization/test_quark.py @@ -7,10 +7,10 @@ See also `tests/kernels/moe/test_mxfp4_moe.py`. """ -import importlib import importlib.metadata import os from dataclasses import dataclass +from importlib.util import find_spec import huggingface_hub import lm_eval @@ -24,9 +24,8 @@ from .reference_mxfp4 import dq_mxfp4_torch, qdq_mxfp4_torch -QUARK_MXFP4_AVAILABLE = importlib.util.find_spec( - "quark") is not None and version.parse( - importlib.metadata.version("amd-quark")) >= version.parse('0.8.99') +QUARK_MXFP4_AVAILABLE = find_spec("quark") is not None and version.parse( + importlib.metadata.version("amd-quark")) >= version.parse('0.8.99') if QUARK_MXFP4_AVAILABLE: from quark.torch.export.nn.modules.realquantizer import ( @@ -43,11 +42,9 @@ @pytest.fixture(scope="function", autouse=True) -def use_v0_only(monkeypatch): - """ - This module relies on V0 internals, so set VLLM_USE_V1=0. - """ - monkeypatch.setenv('VLLM_USE_V1', '0') +def enable_pickle(monkeypatch): + """`LLM.apply_model` requires pickling a function.""" + monkeypatch.setenv("VLLM_ALLOW_INSECURE_SERIALIZATION", "1") @pytest.mark.parametrize('kv_cache_dtype', ['auto', 'fp8']) @@ -132,13 +129,12 @@ def test_quark_fp8_parity(vllm_runner): } with (vllm_runner(quark_model_id, **llm_kwargs) as quark_handle, vllm_runner(fp8_model_id, **llm_kwargs) as fp8_handle): - quark_model = (quark_handle.llm.llm_engine.model_executor. - driver_worker.model_runner.model) - quark_state_dict = quark_model.state_dict() - fp8_model = (fp8_handle.llm.llm_engine.model_executor.driver_worker. - model_runner.model) - fp8_state_dict = fp8_model.state_dict() + def get_state_dict(model): + return {k: v.cpu() for k, v in model.state_dict().items()} + + quark_state_dict, = quark_handle.apply_model(get_state_dict) + fp8_state_dict, = fp8_handle.apply_model(get_state_dict) assert fp8_state_dict.keys() == quark_state_dict.keys() diff --git a/tests/quantization/test_register_quantization_config.py b/tests/quantization/test_register_quantization_config.py index 84705e92c85b..03fe59d7e3bf 100644 --- a/tests/quantization/test_register_quantization_config.py +++ b/tests/quantization/test_register_quantization_config.py @@ -105,18 +105,21 @@ def test_register_quantization_config(): ]) def test_custom_quant(vllm_runner, model, monkeypatch): """Test infer with the custom quantization method.""" - # vllm_runner.apply_model() relies on V0 internals. - monkeypatch.setenv("VLLM_USE_V1", "0") + # `LLM.apply_model` requires pickling a function. + monkeypatch.setenv("VLLM_ALLOW_INSECURE_SERIALIZATION", "1") + with vllm_runner(model_name=model, quantization="custom_quant", enforce_eager=True) as llm: - model = llm.llm.llm_engine.model_executor.driver_worker.model_runner.model # noqa: E501 - layer = model.model.layers[0] - qkv_proj = layer.self_attn.qkv_proj + def check_model(model): + layer = model.model.layers[0] + qkv_proj = layer.self_attn.qkv_proj + + # Check the quantization method is FakeQuantLinearMethod + assert isinstance(qkv_proj.quant_method, FakeQuantLinearMethod) - # Check the quantization method is FakeQuantLinearMethod - assert isinstance(qkv_proj.quant_method, FakeQuantLinearMethod) + llm.apply_model(check_model) output = llm.generate_greedy("Hello my name is", max_tokens=20) assert output diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py index 708f3bbeeff1..014bc56bc8ec 100644 --- a/vllm/engine/llm_engine.py +++ b/vllm/engine/llm_engine.py @@ -13,6 +13,7 @@ from typing import Set, Type, Union, cast import torch +import torch.nn as nn from typing_extensions import TypeVar import vllm.envs as envs @@ -55,6 +56,7 @@ from vllm.utils import Counter, Device, resolve_obj_by_qualname, weak_bind from vllm.version import __version__ as VLLM_VERSION from vllm.worker.model_runner_base import InputProcessingError +from vllm.worker.worker_base import WorkerBase logger = init_logger(__name__) _LOCAL_LOGGING_INTERVAL_SEC = 5 @@ -1817,13 +1819,16 @@ def _build_logits_processors( return sampling_params def collective_rpc(self, - method: Union[str, Callable[..., _R]], + method: Union[str, Callable[[WorkerBase], _R]], timeout: Optional[float] = None, args: tuple = (), kwargs: Optional[dict[str, Any]] = None) -> list[_R]: return self.model_executor.collective_rpc(method, timeout, args, kwargs) + def apply_model(self, func: Callable[[nn.Module], _R]) -> list[_R]: + return self.collective_rpc("apply_model", args=(func, )) + if envs.is_set("VLLM_USE_V1") and envs.VLLM_USE_V1: from vllm.v1.engine.llm_engine import LLMEngine as V1LLMEngine diff --git a/vllm/entrypoints/llm.py b/vllm/entrypoints/llm.py index e21bfce0ab08..f2282c40f707 100644 --- a/vllm/entrypoints/llm.py +++ b/vllm/entrypoints/llm.py @@ -522,9 +522,14 @@ def apply_model(self, func: Callable[[nn.Module], _R]) -> list[_R]: """ Run a function directly on the model inside each worker, returning the result for each of them. + + !!! warning + To reduce the overhead of data transfer, avoid returning large + arrays or tensors from this method. If you must return them, + make sure you move them to CPU first to avoid taking up additional + VRAM! """ - executor = self.llm_engine.model_executor - return executor.apply_model(func) + return self.llm_engine.apply_model(func) def _get_beam_search_lora_requests( self, diff --git a/vllm/executor/executor_base.py b/vllm/executor/executor_base.py index 42aa8d14a21e..b75b94ad0acc 100644 --- a/vllm/executor/executor_base.py +++ b/vllm/executor/executor_base.py @@ -5,11 +5,10 @@ import time from abc import ABC, abstractmethod from functools import cached_property -from typing import (Any, Awaitable, Callable, Dict, List, Optional, Set, Tuple, - Union) +from typing import Any, Awaitable, Callable, List, Optional, Set, Union import torch.nn as nn -from typing_extensions import TypeVar +from typing_extensions import TypeVar, deprecated import vllm.platforms from vllm.config import VllmConfig @@ -63,10 +62,10 @@ def _init_executor(self) -> None: @abstractmethod def collective_rpc(self, - method: Union[str, Callable[..., _R]], + method: Union[str, Callable[[WorkerBase], _R]], timeout: Optional[float] = None, - args: Tuple = (), - kwargs: Optional[Dict[str, Any]] = None) -> List[_R]: + args: tuple = (), + kwargs: Optional[dict[str, Any]] = None) -> list[_R]: """ Execute an RPC call on all workers. @@ -91,7 +90,7 @@ def collective_rpc(self, """ raise NotImplementedError - def determine_num_available_blocks(self) -> Tuple[int, int]: + def determine_num_available_blocks(self) -> tuple[int, int]: """Determine the number of available blocks for the GPU KV cache and swappable CPU KV cache. @@ -99,9 +98,10 @@ def determine_num_available_blocks(self) -> Tuple[int, int]: ExecutorBase may require modification of the result, e.g. to ensure the selected cache sizes are compatible with all workers. - Returns a Tuple[num_gpu_blocks, num_cpu_blocks], where num_gpu_blocks - are blocks that are "active" on the device and can be appended to. - num_cpu_blocks refers to "swapped" blocks in CPU memory and cannot be + Returns a tuple `(num_gpu_blocks, num_cpu_blocks)`, where + `num_gpu_blocks` are blocks that are "active" on the device and can be + appended to. + `num_cpu_blocks` refers to "swapped" blocks in CPU memory and cannot be appended to. """ results = self.collective_rpc("determine_num_available_blocks") @@ -127,16 +127,15 @@ def initialize_cache(self, num_gpu_blocks: int, num_cpu_blocks) -> None: self.collective_rpc("initialize_cache", args=(num_gpu_blocks, num_cpu_blocks)) + @deprecated("`llm_engine.model_executor.apply_model` will no longer work " + "in V1 Engine. Please replace with `llm_engine.apply_model` " + "and set `VLLM_ALLOW_INSECURE_SERIALIZATION=1`.") def apply_model(self, func: Callable[[nn.Module], _R]) -> list[_R]: """ Run a function directly on the model inside each worker, returning the result for each of them. """ - - def rpc_func(worker: WorkerBase) -> _R: - return func(worker.get_model()) - - return self.collective_rpc(rpc_func) + return self.collective_rpc("apply_model", args=(func, )) @cached_property # Avoid unnecessary RPC calls def supported_tasks(self) -> tuple[SupportedTask, ...]: @@ -308,8 +307,8 @@ def _driver_execute_model( def collective_rpc(self, method: Union[str, Callable], timeout: Optional[float] = None, - args: Tuple = (), - kwargs: Optional[Dict] = None) -> List[Any]: + args: tuple = (), + kwargs: Optional[dict[str, Any]] = None) -> list[Any]: return self._run_workers(method, *args, **(kwargs or {})) @abstractmethod diff --git a/vllm/v1/engine/llm_engine.py b/vllm/v1/engine/llm_engine.py index c93bfc35f0ae..907656d1b24c 100644 --- a/vllm/v1/engine/llm_engine.py +++ b/vllm/v1/engine/llm_engine.py @@ -5,6 +5,7 @@ from copy import copy from typing import Any, Callable, Optional, Union +import torch.nn as nn from typing_extensions import TypeVar import vllm.envs as envs @@ -33,6 +34,7 @@ StatLoggerFactory) from vllm.v1.metrics.reader import Metric, get_metrics_snapshot from vllm.v1.metrics.stats import IterationStats +from vllm.v1.worker.worker_base import WorkerBase logger = init_logger(__name__) @@ -319,12 +321,15 @@ def pin_lora(self, lora_id: int) -> bool: return self.engine_core.pin_lora(lora_id) def collective_rpc(self, - method: Union[str, Callable[..., _R]], + method: Union[str, Callable[[WorkerBase], _R]], timeout: Optional[float] = None, args: tuple = (), kwargs: Optional[dict[str, Any]] = None) -> list[_R]: return self.engine_core.collective_rpc(method, timeout, args, kwargs) + def apply_model(self, func: Callable[[nn.Module], _R]) -> list[_R]: + return self.collective_rpc("apply_model", args=(func, )) + def __del__(self): if dp_group := getattr(self, "dp_group", None): stateless_destroy_torch_distributed_process_group(dp_group) diff --git a/vllm/worker/worker_base.py b/vllm/worker/worker_base.py index aa76d21f0fca..d0a56f6ff463 100644 --- a/vllm/worker/worker_base.py +++ b/vllm/worker/worker_base.py @@ -5,7 +5,8 @@ import os import time from abc import abstractmethod -from typing import Any, Dict, List, Optional, Set, Tuple, Type, Union +from typing import (Any, Callable, Dict, List, Optional, Set, Tuple, Type, + TypeVar, Union) import cloudpickle import torch @@ -28,6 +29,8 @@ logger = init_logger(__name__) +_R = TypeVar("_R") + @warn_for_unimplemented_methods class WorkerBase: @@ -70,6 +73,10 @@ def initialize_cache(self, num_gpu_blocks: int, def get_model(self) -> nn.Module: raise NotImplementedError + def apply_model(self, fn: Callable[[nn.Module], _R]) -> _R: + """Apply a function on the model inside this worker.""" + return fn(self.get_model()) + def load_model(self) -> None: """Load model onto target device.""" raise NotImplementedError From e08a3a3fdbdb5408f904a237b31ff2447a336b2f Mon Sep 17 00:00:00 2001 From: Michael Goin Date: Sat, 20 Sep 2025 04:16:56 -0400 Subject: [PATCH 189/518] [CI Failure] Disable FlashInfer RoPE to unblock CI (#25299) Signed-off-by: mgoin --- .../model_executor/layers/rotary_embedding/base.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/vllm/model_executor/layers/rotary_embedding/base.py b/vllm/model_executor/layers/rotary_embedding/base.py index 3dc249ae9adb..1c3576bee539 100644 --- a/vllm/model_executor/layers/rotary_embedding/base.py +++ b/vllm/model_executor/layers/rotary_embedding/base.py @@ -6,8 +6,6 @@ import torch from vllm.model_executor.custom_op import CustomOp -from vllm.platforms import current_platform -from vllm.utils.flashinfer import has_flashinfer from .common import apply_rotary_emb_torch @@ -32,13 +30,15 @@ def __init__( self.base = base self.is_neox_style = is_neox_style self.dtype = dtype + # TODO(mgoin): disabled for now due to failures # Flashinfer only supports head_size=64, 128, 256, 512. # https://github.com/flashinfer-ai/flashinfer/blob/ebfd655efe830048dba5d582aaa61d61d1cf9a87/include/flashinfer/utils.cuh#L174-L202 - self.use_flashinfer = (self.enabled() - and dtype in (torch.float16, torch.bfloat16) - and current_platform.is_cuda() - and has_flashinfer() - and self.head_size in [64, 128, 256, 512]) + # self.use_flashinfer = (self.enabled() + # and dtype in (torch.float16, torch.bfloat16) + # and current_platform.is_cuda() + # and has_flashinfer() + # and self.head_size in [64, 128, 256, 512]) + self.use_flashinfer = False cache = self._compute_cos_sin_cache() if not self.use_flashinfer: From 032d661d27db873ddd71ffb609c79c581b3f27b9 Mon Sep 17 00:00:00 2001 From: Wenlong Wang Date: Sat, 20 Sep 2025 04:45:18 -0700 Subject: [PATCH 190/518] [Docs] Fix warnings in mkdocs build (continued) (#25042) Signed-off-by: wwl2755 --- vllm/multimodal/__init__.py | 2 +- vllm/utils/__init__.py | 6 +++--- vllm/v1/core/kv_cache_utils.py | 4 ++-- vllm/v1/sample/rejection_sampler.py | 10 +++++----- vllm/v1/worker/gpu_model_runner.py | 2 +- vllm/v1/worker/utils.py | 3 ++- vllm/worker/model_runner.py | 12 ++++++++++-- 7 files changed, 24 insertions(+), 15 deletions(-) diff --git a/vllm/multimodal/__init__.py b/vllm/multimodal/__init__.py index b7d4cd298e24..7ffa732cf370 100644 --- a/vllm/multimodal/__init__.py +++ b/vllm/multimodal/__init__.py @@ -15,7 +15,7 @@ model. Info: - [mm_processing](../../../design/mm_processing.html) + [mm_processing](../../../design/mm_processing.md) """ __all__ = [ diff --git a/vllm/utils/__init__.py b/vllm/utils/__init__.py index fd1c0af31269..968bba664f0a 100644 --- a/vllm/utils/__init__.py +++ b/vllm/utils/__init__.py @@ -3216,7 +3216,7 @@ def cprofile_context(save_file: Optional[str] = None): Args: save_file: path to save the profile result. "1" or - None will result in printing to stdout. + None will result in printing to stdout. """ import cProfile @@ -3273,7 +3273,7 @@ def check_use_alibi(model_config: ModelConfig) -> bool: and getattr(cfg.attn_config, "alibi", False))))) -def sha256(input) -> bytes: +def sha256(input: Any) -> bytes: """Hash any picklable Python object using SHA-256. The input is serialized using pickle before hashing, which allows @@ -3290,7 +3290,7 @@ def sha256(input) -> bytes: return hashlib.sha256(input_bytes).digest() -def sha256_cbor(input) -> bytes: +def sha256_cbor(input: Any) -> bytes: """ Hash objects using CBOR serialization and SHA-256. diff --git a/vllm/v1/core/kv_cache_utils.py b/vllm/v1/core/kv_cache_utils.py index 3ccd00121f8e..47a41322c423 100644 --- a/vllm/v1/core/kv_cache_utils.py +++ b/vllm/v1/core/kv_cache_utils.py @@ -1229,8 +1229,8 @@ def get_kv_cache_configs(vllm_config: VllmConfig, Args: vllm_config: The global VllmConfig kv_cache_specs: List of dict[layer_name, KVCacheSpec] for each worker. - available_memory: Memory available for KV cache in bytes for each - worker. + available_memory: Memory available for KV cache in bytes for each + worker. Returns: The generated KVCacheConfigs for each worker. diff --git a/vllm/v1/sample/rejection_sampler.py b/vllm/v1/sample/rejection_sampler.py index 3d5e59addfcf..ced5c7a97038 100644 --- a/vllm/v1/sample/rejection_sampler.py +++ b/vllm/v1/sample/rejection_sampler.py @@ -351,17 +351,17 @@ def generate_uniform_probs( without a seed. Args: - num_tokens : int + num_tokens: int Total number of tokens. - num_draft_tokens : List[List[int]] + num_draft_tokens: List[List[int]] Number of draft tokens per request. - generators : Optional[Dict[int, torch.Generator]] + generators: Optional[Dict[int, torch.Generator]] A dictionary mapping indices in the batch to `torch.Generator` objects. - device : torch.device + device: torch.device The device on which to allocate the tensor. Returns: - uniform_rand : torch.Tensor + uniform_rand: torch.Tensor A tensor of shape `(num_tokens, )` containing uniform random values in the range [0, 1). """ diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index 233df8f1b0e9..d0946e8c5d7d 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -1479,7 +1479,7 @@ def _batch_mm_kwargs_from_scheduler( Args: scheduler_output: The scheduler output containing scheduled encoder - inputs. + inputs. Returns: A tuple of (mm_kwargs, req_ids_pos) where: diff --git a/vllm/v1/worker/utils.py b/vllm/v1/worker/utils.py index 021d18b2500f..af922f9979d1 100644 --- a/vllm/v1/worker/utils.py +++ b/vllm/v1/worker/utils.py @@ -205,7 +205,8 @@ def gather_mm_placeholders( """ Reconstructs the embeddings from the placeholder tokens. - This is the operation of [scatter_mm_placeholders][]. + This is the operation of [`scatter_mm_placeholders`] + [vllm.v1.worker.utils.scatter_mm_placeholders]. """ if is_embed is None: return placeholders diff --git a/vllm/worker/model_runner.py b/vllm/worker/model_runner.py index f662f5a85eff..bab89586b0f2 100644 --- a/vllm/worker/model_runner.py +++ b/vllm/worker/model_runner.py @@ -1810,7 +1810,8 @@ def execute_model( return [output] - def need_recv_kv(self, model_input, kv_caches) -> bool: + def need_recv_kv(self, model_input: ModelInputForGPUWithSamplingMetadata, + kv_caches: List[torch.Tensor]) -> bool: """Check if we need to receive kv-cache from the other worker. We need to receive KV when 1. current vLLM instance is KV cache consumer/decode vLLM instance @@ -1825,6 +1826,9 @@ def need_recv_kv(self, model_input, kv_caches) -> bool: if self.vllm_config.kv_transfer_config is None: return False + if model_input.attn_metadata is None: + raise ValueError("model_input.attn_metadata cannot be None") + prefill_meta = model_input.attn_metadata.prefill_metadata # check if the current run is profiling @@ -1835,7 +1839,8 @@ def need_recv_kv(self, model_input, kv_caches) -> bool: return self.vllm_config.kv_transfer_config.is_kv_consumer and ( not is_profile_run) and is_prefill_run - def need_send_kv(self, model_input, kv_caches) -> bool: + def need_send_kv(self, model_input: ModelInputForGPUWithSamplingMetadata, + kv_caches: List[torch.Tensor]) -> bool: """Check if we need to send kv-cache to the other worker. We need to send KV when 1. current vLLM instance is KV cache producer/prefill vLLM instance @@ -1850,6 +1855,9 @@ def need_send_kv(self, model_input, kv_caches) -> bool: if self.vllm_config.kv_transfer_config is None: return False + if model_input.attn_metadata is None: + raise ValueError("model_input.attn_metadata cannot be None") + prefill_meta = model_input.attn_metadata.prefill_metadata # check if the current run is profiling From bf8b26cad17f21f142de6c06ee657f01ccb2b816 Mon Sep 17 00:00:00 2001 From: Manoel Marques Date: Sat, 20 Sep 2025 07:51:13 -0400 Subject: [PATCH 191/518] Generate _ModelInfo properties file when loading to improve loading speed (#23558) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Manoel Marques Signed-off-by: Manoel Marques Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> Co-authored-by: Luka Govedič --- vllm/logging_utils/__init__.py | 2 + vllm/logging_utils/log_time.py | 32 +++++++ .../model_loader/weight_utils.py | 44 +++++++++ vllm/model_executor/models/registry.py | 92 ++++++++++++++++++- 4 files changed, 167 insertions(+), 3 deletions(-) create mode 100644 vllm/logging_utils/log_time.py diff --git a/vllm/logging_utils/__init__.py b/vllm/logging_utils/__init__.py index cf690a89ae9b..7202259ca21a 100644 --- a/vllm/logging_utils/__init__.py +++ b/vllm/logging_utils/__init__.py @@ -2,7 +2,9 @@ # SPDX-FileCopyrightText: Copyright contributors to the vLLM project from vllm.logging_utils.formatter import NewLineFormatter +from vllm.logging_utils.log_time import logtime __all__ = [ "NewLineFormatter", + "logtime", ] diff --git a/vllm/logging_utils/log_time.py b/vllm/logging_utils/log_time.py new file mode 100644 index 000000000000..013dd144beaf --- /dev/null +++ b/vllm/logging_utils/log_time.py @@ -0,0 +1,32 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project +""" +Provides a timeslice logging decorator +""" + +import functools +import time + + +def logtime(logger, msg=None): + """ + Logs the execution time of the decorated function. + Always place it beneath other decorators. + """ + + def _inner(func): + + @functools.wraps(func) + def _wrapper(*args, **kwargs): + start = time.perf_counter() + result = func(*args, **kwargs) + elapsed = time.perf_counter() - start + + prefix = f"Function '{func.__module__}.{func.__qualname__}'" \ + if msg is None else msg + logger.debug("%s: Elapsed time %.7f secs", prefix, elapsed) + return result + + return _wrapper + + return _inner diff --git a/vllm/model_executor/model_loader/weight_utils.py b/vllm/model_executor/model_loader/weight_utils.py index f2c66763d081..a72086da18c4 100644 --- a/vllm/model_executor/model_loader/weight_utils.py +++ b/vllm/model_executor/model_loader/weight_utils.py @@ -11,6 +11,7 @@ import time from collections import defaultdict from collections.abc import Generator +from contextlib import contextmanager from pathlib import Path from typing import Any, Callable, Optional, Union @@ -98,6 +99,49 @@ def get_lock(model_name_or_path: Union[str, Path], return lock +@contextmanager +def atomic_writer(filepath: Union[str, Path], + mode: str = 'w', + encoding: Optional[str] = None): + """ + Context manager that provides an atomic file writing routine. + + The context manager writes to a temporary file and, if successful, + atomically replaces the original file. + + Args: + filepath (str or Path): The path to the file to write. + mode (str): The file mode for the temporary file (e.g., 'w', 'wb'). + encoding (str): The encoding for text mode. + + Yields: + file object: A handle to the temporary file. + """ + # Create a temporary file in the same directory as the target file + # to ensure it's on the same filesystem for an atomic replace. + temp_dir = os.path.dirname(filepath) + temp_fd, temp_path = tempfile.mkstemp(dir=temp_dir) + + try: + # Open the temporary file for writing + with os.fdopen(temp_fd, mode=mode, encoding=encoding) as temp_file: + yield temp_file + + # If the 'with' block completes successfully, + # perform the atomic replace. + os.replace(temp_path, filepath) + + except Exception: + logger.exception( + "Error during atomic write. Original file '%s' not modified", + filepath) + raise + finally: + # Clean up the temporary file if it still exists. + if os.path.exists(temp_path): + os.remove(temp_path) + + def maybe_download_from_modelscope( model: str, revision: Optional[str] = None, diff --git a/vllm/model_executor/models/registry.py b/vllm/model_executor/models/registry.py index 76f2bd087624..5dc5d545bb9c 100644 --- a/vllm/model_executor/models/registry.py +++ b/vllm/model_executor/models/registry.py @@ -4,7 +4,9 @@ Whenever you add an architecture to this page, please also update `tests/models/registry.py` with example HuggingFace models for it. """ +import hashlib import importlib +import json import os import pickle import subprocess @@ -12,16 +14,19 @@ import tempfile from abc import ABC, abstractmethod from collections.abc import Set -from dataclasses import dataclass, field +from dataclasses import asdict, dataclass, field from functools import lru_cache +from pathlib import Path from typing import Callable, Optional, TypeVar, Union import torch.nn as nn import transformers +from vllm import envs from vllm.config import (ModelConfig, iter_architecture_defaults, try_match_architecture_defaults) from vllm.logger import init_logger +from vllm.logging_utils import logtime from vllm.transformers_utils.dynamic_module import ( try_get_class_from_dynamic_module) @@ -421,10 +426,91 @@ class _LazyRegisteredModel(_BaseRegisteredModel): module_name: str class_name: str - # Performed in another process to avoid initializing CUDA + @staticmethod + def _get_cache_dir() -> Path: + return Path(envs.VLLM_CACHE_ROOT) / "modelinfos" + + def _get_cache_filename(self) -> str: + cls_name = f"{self.module_name}-{self.class_name}".replace(".", "-") + return f"{cls_name}.json" + + def _load_modelinfo_from_cache(self, + module_hash: str) -> _ModelInfo | None: + try: + try: + modelinfo_path = self._get_cache_dir( + ) / self._get_cache_filename() + with open(modelinfo_path, encoding="utf-8") as file: + mi_dict = json.load(file) + except FileNotFoundError: + logger.debug(("Cached model info file " + "for class %s.%s not found"), self.module_name, + self.class_name) + return None + + if mi_dict["hash"] != module_hash: + logger.debug(("Cached model info file " + "for class %s.%s is stale"), self.module_name, + self.class_name) + return None + + # file not changed, use cached _ModelInfo properties + return _ModelInfo(**mi_dict["modelinfo"]) + except Exception: + logger.exception(("Cached model info " + "for class %s.%s error. "), self.module_name, + self.class_name) + return None + + def _save_modelinfo_to_cache(self, mi: _ModelInfo, + module_hash: str) -> None: + """save dictionary json file to cache""" + from vllm.model_executor.model_loader.weight_utils import atomic_writer + try: + modelinfo_dict = { + "hash": module_hash, + "modelinfo": asdict(mi), + } + cache_dir = self._get_cache_dir() + cache_dir.mkdir(parents=True, exist_ok=True) + modelinfo_path = cache_dir / self._get_cache_filename() + with atomic_writer(modelinfo_path, encoding='utf-8') as f: + json.dump(modelinfo_dict, f, indent=2) + except Exception: + logger.exception("Error saving model info cache.") + + @logtime(logger=logger, msg="Registry inspect model class") def inspect_model_cls(self) -> _ModelInfo: - return _run_in_subprocess( + model_path = Path( + __file__).parent / f"{self.module_name.split('.')[-1]}.py" + + assert model_path.exists(), \ + f"Model {self.module_name} expected to be on path {model_path}" + with open(model_path, "rb") as f: + module_hash = hashlib.md5(f.read()).hexdigest() + + mi = self._load_modelinfo_from_cache(module_hash) + if mi is not None: + logger.debug(("Loaded model info " + "for class %s.%s from cache"), self.module_name, + self.class_name) + return mi + else: + logger.debug(("Cache model info " + "for class %s.%s miss. " + "Loading model instead."), self.module_name, + self.class_name) + + # Performed in another process to avoid initializing CUDA + mi = _run_in_subprocess( lambda: _ModelInfo.from_model_cls(self.load_model_cls())) + logger.debug("Loaded model info for class %s.%s", self.module_name, + self.class_name) + + # save cache file + self._save_modelinfo_to_cache(mi, module_hash) + + return mi def load_model_cls(self) -> type[nn.Module]: mod = importlib.import_module(self.module_name) From 3c713a97116f34ffdc75ba10c9dfbb2850437984 Mon Sep 17 00:00:00 2001 From: Isotr0py Date: Sat, 20 Sep 2025 20:46:24 +0800 Subject: [PATCH 192/518] [Model] Cleanup InternViT's data parallel implementation (#25306) Signed-off-by: Isotr0py --- vllm/model_executor/models/intern_vit.py | 158 ++++++----------------- 1 file changed, 37 insertions(+), 121 deletions(-) diff --git a/vllm/model_executor/models/intern_vit.py b/vllm/model_executor/models/intern_vit.py index 118cce810a1f..892188c04722 100644 --- a/vllm/model_executor/models/intern_vit.py +++ b/vllm/model_executor/models/intern_vit.py @@ -25,7 +25,6 @@ from vllm.model_executor.layers.layernorm import RMSNorm from vllm.model_executor.layers.linear import (ColumnParallelLinear, QKVParallelLinear, - ReplicatedLinear, RowParallelLinear) from vllm.model_executor.layers.quantization import QuantizationConfig from vllm.model_executor.model_loader.weight_utils import default_weight_loader @@ -164,23 +163,15 @@ def __init__( self.tp_size) self.scale = self.head_dim**-0.5 - if use_data_parallel: - self.qkv = ReplicatedLinear( - self.embed_dim, - 3 * self.head_dim * self.num_heads, - bias=config.qkv_bias, - quant_config=quant_config, - prefix=f"{prefix}.qkv", - ) - else: - self.qkv = QKVParallelLinear( - self.embed_dim, - self.head_dim, - num_dummy_heads + self.num_heads, - bias=config.qkv_bias, - quant_config=quant_config, - prefix=f"{prefix}.qkv", - ) + self.qkv = QKVParallelLinear( + self.embed_dim, + self.head_dim, + num_dummy_heads + self.num_heads, + bias=config.qkv_bias, + quant_config=quant_config, + prefix=f"{prefix}.qkv", + disable_tp=use_data_parallel, + ) self.qk_normalization = config.qk_normalization @@ -192,20 +183,13 @@ def __init__( eps=config.layer_norm_eps, var_hidden_size=self.embed_dim) - if use_data_parallel: - self.proj = ReplicatedLinear( - self.dummy_dim, - self.embed_dim, - quant_config=quant_config, - prefix=f"{prefix}.proj", - ) - else: - self.proj = RowParallelLinear( - self.dummy_dim, - self.embed_dim, - quant_config=quant_config, - prefix=f"{prefix}.proj", - ) + self.proj = RowParallelLinear( + self.dummy_dim, + self.embed_dim, + quant_config=quant_config, + prefix=f"{prefix}.proj", + disable_tp=use_data_parallel, + ) self.attn = MultiHeadAttention(self.num_heads_per_partition, self.head_dim, self.scale) @@ -236,72 +220,6 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: return out -class InternSdpaAttention(nn.Module): - """Multi-headed attention from 'Attention Is All You Need' paper""" - - def __init__( - self, - config: PretrainedConfig, - *, - num_dummy_heads: int = 0, - ) -> None: - super().__init__() - - self.config = config - self.embed_dim = config.hidden_size - self.num_heads = config.num_attention_heads - self.head_dim = self.embed_dim // self.num_heads - if self.head_dim * self.num_heads != self.embed_dim: - raise ValueError( - f'embed_dim must be divisible by num_heads ' - f'(got `embed_dim`: {self.embed_dim} and `num_heads`:' - f' {self.num_heads}).') - - # Additional dummy heads are used to enable TP for common GPU counts. - self.dummy_dim = (num_dummy_heads + self.num_heads) * self.head_dim - - self.scale = self.head_dim**-0.5 - self.qkv = nn.Linear(self.embed_dim, - 3 * self.dummy_dim, - bias=config.qkv_bias) - - self.qk_normalization = config.qk_normalization - - if self.qk_normalization: - self.q_norm = RMSNorm(self.dummy_dim, - eps=config.layer_norm_eps, - var_hidden_size=self.embed_dim) - self.k_norm = RMSNorm(self.dummy_dim, - eps=config.layer_norm_eps, - var_hidden_size=self.embed_dim) - - self.proj = nn.Linear(self.dummy_dim, self.embed_dim) - - # Use unified MultiHeadAttention with automatic backend selection - self.attn = MultiHeadAttention(self.num_heads, self.head_dim, - self.scale) - - def forward(self, x: torch.Tensor) -> torch.Tensor: - B, N, C = x.shape - qkv = self.qkv(x) - q, k, v = qkv.chunk(3, dim=-1) - - q = q.view(B, N, self.num_heads, self.head_dim) - k = k.view(B, N, self.num_heads, self.head_dim) - v = v.view(B, N, self.num_heads, self.head_dim) - - if self.qk_normalization: - B_, N_, H_, D_ = q.shape - q = self.q_norm(q.flatten(-2, -1)).view(B_, N_, H_, D_) - k = self.k_norm(k.flatten(-2, -1)).view(B_, N_, H_, D_) - - # Use unified MultiHeadAttention with automatic backend selection - x = self.attn(q, k, v) - - x = self.proj(x) - return x - - class InternMLP(nn.Module): def __init__( @@ -315,20 +233,18 @@ def __init__( self.config = config self.activation_fn = get_act_fn(config.hidden_act) - cls_fc1 = (ReplicatedLinear - if use_data_parallel else ColumnParallelLinear) - self.fc1 = cls_fc1(config.hidden_size, - config.intermediate_size, - bias=True, - quant_config=quant_config, - prefix=f"{prefix}.fc1") - cls_fc2 = (ReplicatedLinear - if use_data_parallel else RowParallelLinear) - self.fc2 = cls_fc2(config.intermediate_size, - config.hidden_size, - bias=True, - quant_config=quant_config, - prefix=f"{prefix}.fc2") + self.fc1 = ColumnParallelLinear(config.hidden_size, + config.intermediate_size, + bias=True, + quant_config=quant_config, + prefix=f"{prefix}.fc1", + disable_tp=use_data_parallel) + self.fc2 = RowParallelLinear(config.intermediate_size, + config.hidden_size, + bias=True, + quant_config=quant_config, + prefix=f"{prefix}.fc2", + disable_tp=use_data_parallel) def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: hidden_states, _ = self.fc1(hidden_states) @@ -385,19 +301,19 @@ def _init_attn( use_data_parallel: bool = False, ): # fallback to sdpa attention if tp unavailable - # tp_size = get_tensor_model_parallel_world_size() tp_size = (1 if use_data_parallel else get_tensor_model_parallel_world_size()) num_heads = config.num_attention_heads - if (num_heads + num_dummy_heads) % tp_size == 0: - return InternParallelAttention(config, - quant_config=quant_config, - num_dummy_heads=num_dummy_heads, - prefix=prefix, - use_data_parallel=use_data_parallel) - - return InternSdpaAttention(config, num_dummy_heads=num_dummy_heads) + # if the number of heads is not divisible by tp_size, + # we also disable Attention's TP + use_data_parallel = (use_data_parallel + or (num_heads + num_dummy_heads) % tp_size != 0) + return InternParallelAttention(config, + quant_config=quant_config, + num_dummy_heads=num_dummy_heads, + prefix=prefix, + use_data_parallel=use_data_parallel) def forward( self, From d88918e4c2d189eb25724a6cff9a9028c67daa07 Mon Sep 17 00:00:00 2001 From: lirong <56789630+lirong-lirong@users.noreply.github.com> Date: Sat, 20 Sep 2025 21:15:22 +0800 Subject: [PATCH 193/518] [Core] Enable sharded state loader for V1 engine and enhance test coverage (#25308) Signed-off-by: pengdrumli --- tests/test_sharded_state_loader.py | 20 ++++++++++++-------- vllm/engine/arg_utils.py | 6 ------ 2 files changed, 12 insertions(+), 14 deletions(-) diff --git a/tests/test_sharded_state_loader.py b/tests/test_sharded_state_loader.py index 42afdfa3c746..fd5b5fad0999 100644 --- a/tests/test_sharded_state_loader.py +++ b/tests/test_sharded_state_loader.py @@ -57,10 +57,19 @@ def llama_3p2_1b_files(): def _run_writer(input_dir, output_dir, weights_patterns, **kwargs): llm_sharded_writer = LLM(model=input_dir, **kwargs) - + # Check which engine version is being used + is_v1_engine = hasattr(llm_sharded_writer.llm_engine, "engine_core") # Dump worker states to output directory - llm_sharded_writer.llm_engine.model_executor.save_sharded_state( - path=output_dir) + if is_v1_engine: + # For V1 engine, we need to use engine_core.save_sharded_state + print("Using V1 engine save path") + llm_sharded_writer.llm_engine.engine_core.save_sharded_state( + path=output_dir) + else: + # For V0 engine + print("Using V0 engine save path") + model_executor = llm_sharded_writer.llm_engine.model_executor + model_executor.save_sharded_state(path=output_dir) # Copy metadata files to output directory for file in os.listdir(input_dir): @@ -91,8 +100,6 @@ def test_sharded_state_loader(enable_lora, tp_size, num_gpus_available, gpu_memory_utilization = 0.8 input_dir = llama_3p2_1b_files ctx = mp.get_context("spawn") - # The interface in v1 engine has changed, run in v1 engine will hang. - monkeypatch.setenv("VLLM_USE_V1", "0") # Run in separate processes for memory & CUDA isolation with TemporaryDirectory() as output_dir: @@ -100,7 +107,6 @@ def test_sharded_state_loader(enable_lora, tp_size, num_gpus_available, args=(input_dir, output_dir, weights_patterns), kwargs=dict( tensor_parallel_size=tp_size, - distributed_executor_backend="mp", gpu_memory_utilization=gpu_memory_utilization, enforce_eager=True, )) @@ -112,7 +118,6 @@ def test_sharded_state_loader(enable_lora, tp_size, num_gpus_available, p = ctx.Process(target=_run_generate, args=(input_dir, queue), kwargs=dict( - distributed_executor_backend="mp", enable_lora=enable_lora, gpu_memory_utilization=gpu_memory_utilization, tensor_parallel_size=tp_size, @@ -133,7 +138,6 @@ def test_sharded_state_loader(enable_lora, tp_size, num_gpus_available, p = ctx.Process(target=_run_generate, args=(output_dir, queue), kwargs=dict( - distributed_executor_backend="mp", enable_lora=enable_lora, gpu_memory_utilization=gpu_memory_utilization, tensor_parallel_size=tp_size, diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index 7a4bb0d41d23..8912ff8bad42 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -1486,12 +1486,6 @@ def _is_v1_supported_oracle(self, model_config: ModelConfig) -> bool: ############################################################# # Unsupported Feature Flags on V1. - if self.load_format == "sharded_state": - _raise_or_fallback( - feature_name=f"--load_format {self.load_format}", - recommend_to_remove=False) - return False - if (self.logits_processor_pattern != EngineArgs.logits_processor_pattern): _raise_or_fallback(feature_name="--logits-processor-pattern", From bef180f00978186fcc84f7ca6328a6ae6c39676d Mon Sep 17 00:00:00 2001 From: Cyrus Leung Date: Sun, 21 Sep 2025 01:50:58 +0800 Subject: [PATCH 194/518] [V0 Deprecation] Enable the remaining multimodal tests in V1 (#25307) Signed-off-by: DarkLight1337 --- tests/conftest.py | 76 ++++++--- .../multimodal/generation/test_common.py | 158 +++++++++--------- .../multimodal/generation/test_pixtral.py | 49 +----- .../multimodal/generation/test_qwen2_vl.py | 9 +- .../multimodal/pooling/test_prithvi_mae.py | 26 ++- tests/models/quantization/test_awq.py | 38 +++-- .../models/quantization/test_bitsandbytes.py | 28 ++-- tests/models/test_terratorch.py | 25 ++- 8 files changed, 195 insertions(+), 214 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index e8e95357ff5b..ce9de3bf94b5 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -19,6 +19,7 @@ import tempfile import threading from collections.abc import Generator +from contextlib import nullcontext from enum import Enum from typing import Any, Callable, Optional, TypedDict, TypeVar, Union, cast @@ -45,14 +46,14 @@ from vllm.distributed import (cleanup_dist_env_and_memory, init_distributed_environment, initialize_model_parallel) -from vllm.inputs import (ExplicitEncoderDecoderPrompt, TextPrompt, - to_enc_dec_tuple_list, zip_enc_dec_prompts) +from vllm.inputs import TextPrompt from vllm.logger import init_logger from vllm.multimodal.utils import fetch_image from vllm.outputs import RequestOutput from vllm.sampling_params import BeamSearchParams from vllm.sequence import Logprob from vllm.transformers_utils.utils import maybe_model_redirect +from vllm.utils import set_default_torch_num_threads logger = init_logger(__name__) @@ -306,6 +307,35 @@ def __init__( is_cross_encoder: bool = False, skip_tokenizer_init: bool = False, auto_cls: type[_BaseAutoModelClass] = AutoModelForCausalLM, + # Set this to avoid hanging issue + default_torch_num_threads: Optional[int] = None, + ) -> None: + init_ctx = (nullcontext() if default_torch_num_threads is None else + set_default_torch_num_threads(default_torch_num_threads)) + + with init_ctx: + self._init( + model_name=model_name, + dtype=dtype, + model_kwargs=model_kwargs, + trust_remote_code=trust_remote_code, + is_sentence_transformer=is_sentence_transformer, + is_cross_encoder=is_cross_encoder, + skip_tokenizer_init=skip_tokenizer_init, + auto_cls=auto_cls, + ) + + def _init( + self, + model_name: str, + dtype: str = "auto", + *, + model_kwargs: Optional[dict[str, Any]] = None, + trust_remote_code: bool = True, + is_sentence_transformer: bool = False, + is_cross_encoder: bool = False, + skip_tokenizer_init: bool = False, + auto_cls: type[_BaseAutoModelClass] = AutoModelForCausalLM, ) -> None: model_name = maybe_model_redirect(model_name) self.model_name = model_name @@ -714,26 +744,32 @@ def __init__( enable_chunked_prefill: Optional[bool] = False, swap_space: int = 4, enforce_eager: Optional[bool] = False, + # Set this to avoid hanging issue + default_torch_num_threads: Optional[int] = None, **kwargs, ) -> None: - self.llm = LLM( - model=model_name, - runner=runner, - convert=convert, - tokenizer=tokenizer_name, - tokenizer_mode=tokenizer_mode, - trust_remote_code=trust_remote_code, - dtype=dtype, - seed=seed, - swap_space=swap_space, - enforce_eager=enforce_eager, - disable_log_stats=disable_log_stats, - tensor_parallel_size=tensor_parallel_size, - max_model_len=max_model_len, - block_size=block_size, - enable_chunked_prefill=enable_chunked_prefill, - **kwargs, - ) + init_ctx = (nullcontext() if default_torch_num_threads is None else + set_default_torch_num_threads(default_torch_num_threads)) + + with init_ctx: + self.llm = LLM( + model=model_name, + runner=runner, + convert=convert, + tokenizer=tokenizer_name, + tokenizer_mode=tokenizer_mode, + trust_remote_code=trust_remote_code, + dtype=dtype, + seed=seed, + swap_space=swap_space, + enforce_eager=enforce_eager, + disable_log_stats=disable_log_stats, + tensor_parallel_size=tensor_parallel_size, + max_model_len=max_model_len, + block_size=block_size, + enable_chunked_prefill=enable_chunked_prefill, + **kwargs, + ) def get_inputs( self, diff --git a/tests/models/multimodal/generation/test_common.py b/tests/models/multimodal/generation/test_common.py index 79f9d607f338..e76b58e61ec1 100644 --- a/tests/models/multimodal/generation/test_common.py +++ b/tests/models/multimodal/generation/test_common.py @@ -32,13 +32,6 @@ if current_platform.is_rocm(): os.environ["VLLM_USE_TRITON_FLASH_ATTN"] = "0" -REQUIRES_V0_MODELS = [ - # V1 Test: not enough KV cache space in C1. - "fuyu", - # V1 Test: Deadlock issue when processing mm_inputs - "llava-onevision-transformers", -] - # yapf: disable COMMON_BROADCAST_SETTINGS = { "test_type": VLMTestType.IMAGE, @@ -186,8 +179,11 @@ image_size_factors=[(0.25, 0.5, 1.0)], vllm_runner_kwargs={ "model_impl": "transformers", + "default_torch_num_threads": 1, }, - marks=[pytest.mark.core_model], + # FIXME: Investigate why the test hangs + # when processing the 3rd prompt in vLLM + marks=[pytest.mark.core_model, pytest.mark.skip(reason="Test hangs")], ), "idefics3-transformers": VLMTestInfo( models=["HuggingFaceTB/SmolVLM-256M-Instruct"], @@ -320,6 +316,7 @@ vllm_output_post_proc=model_utils.fuyu_vllm_to_hf_output, num_logprobs=10, image_size_factors=[(), (0.25,), (0.25, 0.25, 0.25), (0.25, 0.2, 0.15)], + marks=[large_gpu_mark(min_gb=32)], ), "gemma3": VLMTestInfo( models=["google/gemma-3-4b-it"], @@ -861,13 +858,14 @@ def _mark_splits( test_type=VLMTestType.IMAGE, create_new_process_for_each_test=False, )) -def test_single_image_models(tmp_path: PosixPath, model_type: str, - test_case: ExpandableVLMTestArgs, - hf_runner: type[HfRunner], - vllm_runner: type[VllmRunner], - image_assets: ImageTestAssets, monkeypatch): - if model_type in REQUIRES_V0_MODELS: - monkeypatch.setenv("VLLM_USE_V1", "0") +def test_single_image_models( + tmp_path: PosixPath, + model_type: str, + test_case: ExpandableVLMTestArgs, + hf_runner: type[HfRunner], + vllm_runner: type[VllmRunner], + image_assets: ImageTestAssets, +): model_test_info = VLM_TEST_SETTINGS[model_type] runners.run_single_image_test( tmp_path=tmp_path, @@ -886,13 +884,14 @@ def test_single_image_models(tmp_path: PosixPath, model_type: str, test_type=VLMTestType.MULTI_IMAGE, create_new_process_for_each_test=False, )) -def test_multi_image_models(tmp_path: PosixPath, model_type: str, - test_case: ExpandableVLMTestArgs, - hf_runner: type[HfRunner], - vllm_runner: type[VllmRunner], - image_assets: ImageTestAssets, monkeypatch): - if model_type in REQUIRES_V0_MODELS: - monkeypatch.setenv("VLLM_USE_V1", "0") +def test_multi_image_models( + tmp_path: PosixPath, + model_type: str, + test_case: ExpandableVLMTestArgs, + hf_runner: type[HfRunner], + vllm_runner: type[VllmRunner], + image_assets: ImageTestAssets, +): model_test_info = VLM_TEST_SETTINGS[model_type] runners.run_multi_image_test( tmp_path=tmp_path, @@ -911,13 +910,13 @@ def test_multi_image_models(tmp_path: PosixPath, model_type: str, test_type=VLMTestType.EMBEDDING, create_new_process_for_each_test=False, )) -def test_image_embedding_models(model_type: str, - test_case: ExpandableVLMTestArgs, - hf_runner: type[HfRunner], - vllm_runner: type[VllmRunner], - image_assets: ImageTestAssets, monkeypatch): - if model_type in REQUIRES_V0_MODELS: - monkeypatch.setenv("VLLM_USE_V1", "0") +def test_image_embedding_models( + model_type: str, + test_case: ExpandableVLMTestArgs, + hf_runner: type[HfRunner], + vllm_runner: type[VllmRunner], + image_assets: ImageTestAssets, +): model_test_info = VLM_TEST_SETTINGS[model_type] runners.run_embedding_test( model_test_info=model_test_info, @@ -935,11 +934,13 @@ def test_image_embedding_models(model_type: str, test_type=VLMTestType.VIDEO, create_new_process_for_each_test=False, )) -def test_video_models(model_type: str, test_case: ExpandableVLMTestArgs, - hf_runner: type[HfRunner], vllm_runner: type[VllmRunner], - video_assets: VideoTestAssets, monkeypatch): - if model_type in REQUIRES_V0_MODELS: - monkeypatch.setenv("VLLM_USE_V1", "0") +def test_video_models( + model_type: str, + test_case: ExpandableVLMTestArgs, + hf_runner: type[HfRunner], + vllm_runner: type[VllmRunner], + video_assets: VideoTestAssets, +): model_test_info = VLM_TEST_SETTINGS[model_type] runners.run_video_test( model_test_info=model_test_info, @@ -957,11 +958,13 @@ def test_video_models(model_type: str, test_case: ExpandableVLMTestArgs, test_type=VLMTestType.AUDIO, create_new_process_for_each_test=False, )) -def test_audio_models(model_type: str, test_case: ExpandableVLMTestArgs, - hf_runner: type[HfRunner], vllm_runner: type[VllmRunner], - audio_assets: AudioTestAssets, monkeypatch): - if model_type in REQUIRES_V0_MODELS: - monkeypatch.setenv("VLLM_USE_V1", "0") +def test_audio_models( + model_type: str, + test_case: ExpandableVLMTestArgs, + hf_runner: type[HfRunner], + vllm_runner: type[VllmRunner], + audio_assets: AudioTestAssets, +): model_test_info = VLM_TEST_SETTINGS[model_type] runners.run_audio_test( model_test_info=model_test_info, @@ -984,10 +987,7 @@ def test_custom_inputs_models( test_case: ExpandableVLMTestArgs, hf_runner: type[HfRunner], vllm_runner: type[VllmRunner], - monkeypatch, ): - if model_type in REQUIRES_V0_MODELS: - monkeypatch.setenv("VLLM_USE_V1", "0") model_test_info = VLM_TEST_SETTINGS[model_type] runners.run_custom_inputs_test( model_test_info=model_test_info, @@ -1006,13 +1006,14 @@ def test_custom_inputs_models( create_new_process_for_each_test=True, )) @create_new_process_for_each_test() -def test_single_image_models_heavy(tmp_path: PosixPath, model_type: str, - test_case: ExpandableVLMTestArgs, - hf_runner: type[HfRunner], - vllm_runner: type[VllmRunner], - image_assets: ImageTestAssets, monkeypatch): - if model_type in REQUIRES_V0_MODELS: - monkeypatch.setenv("VLLM_USE_V1", "0") +def test_single_image_models_heavy( + tmp_path: PosixPath, + model_type: str, + test_case: ExpandableVLMTestArgs, + hf_runner: type[HfRunner], + vllm_runner: type[VllmRunner], + image_assets: ImageTestAssets, +): model_test_info = VLM_TEST_SETTINGS[model_type] runners.run_single_image_test( tmp_path=tmp_path, @@ -1032,13 +1033,14 @@ def test_single_image_models_heavy(tmp_path: PosixPath, model_type: str, create_new_process_for_each_test=True, )) @create_new_process_for_each_test() -def test_multi_image_models_heavy(tmp_path: PosixPath, model_type: str, - test_case: ExpandableVLMTestArgs, - hf_runner: type[HfRunner], - vllm_runner: type[VllmRunner], - image_assets: ImageTestAssets, monkeypatch): - if model_type in REQUIRES_V0_MODELS: - monkeypatch.setenv("VLLM_USE_V1", "0") +def test_multi_image_models_heavy( + tmp_path: PosixPath, + model_type: str, + test_case: ExpandableVLMTestArgs, + hf_runner: type[HfRunner], + vllm_runner: type[VllmRunner], + image_assets: ImageTestAssets, +): model_test_info = VLM_TEST_SETTINGS[model_type] runners.run_multi_image_test( tmp_path=tmp_path, @@ -1058,14 +1060,13 @@ def test_multi_image_models_heavy(tmp_path: PosixPath, model_type: str, create_new_process_for_each_test=True, )) @create_new_process_for_each_test() -def test_image_embedding_models_heavy(model_type: str, - test_case: ExpandableVLMTestArgs, - hf_runner: type[HfRunner], - vllm_runner: type[VllmRunner], - image_assets: ImageTestAssets, - monkeypatch): - if model_type in REQUIRES_V0_MODELS: - monkeypatch.setenv("VLLM_USE_V1", "0") +def test_image_embedding_models_heavy( + model_type: str, + test_case: ExpandableVLMTestArgs, + hf_runner: type[HfRunner], + vllm_runner: type[VllmRunner], + image_assets: ImageTestAssets, +): model_test_info = VLM_TEST_SETTINGS[model_type] runners.run_embedding_test( model_test_info=model_test_info, @@ -1083,12 +1084,13 @@ def test_image_embedding_models_heavy(model_type: str, test_type=VLMTestType.VIDEO, create_new_process_for_each_test=True, )) -def test_video_models_heavy(model_type: str, test_case: ExpandableVLMTestArgs, - hf_runner: type[HfRunner], - vllm_runner: type[VllmRunner], - video_assets: VideoTestAssets, monkeypatch): - if model_type in REQUIRES_V0_MODELS: - monkeypatch.setenv("VLLM_USE_V1", "0") +def test_video_models_heavy( + model_type: str, + test_case: ExpandableVLMTestArgs, + hf_runner: type[HfRunner], + vllm_runner: type[VllmRunner], + video_assets: VideoTestAssets, +): model_test_info = VLM_TEST_SETTINGS[model_type] runners.run_video_test( model_test_info=model_test_info, @@ -1106,12 +1108,13 @@ def test_video_models_heavy(model_type: str, test_case: ExpandableVLMTestArgs, test_type=VLMTestType.AUDIO, create_new_process_for_each_test=True, )) -def test_audio_models_heavy(model_type: str, test_case: ExpandableVLMTestArgs, - hf_runner: type[HfRunner], - vllm_runner: type[VllmRunner], - audio_assets: AudioTestAssets, monkeypatch): - if model_type in REQUIRES_V0_MODELS: - monkeypatch.setenv("VLLM_USE_V1", "0") +def test_audio_models_heavy( + model_type: str, + test_case: ExpandableVLMTestArgs, + hf_runner: type[HfRunner], + vllm_runner: type[VllmRunner], + audio_assets: AudioTestAssets, +): model_test_info = VLM_TEST_SETTINGS[model_type] runners.run_audio_test( model_test_info=model_test_info, @@ -1135,10 +1138,7 @@ def test_custom_inputs_models_heavy( test_case: ExpandableVLMTestArgs, hf_runner: type[HfRunner], vllm_runner: type[VllmRunner], - monkeypatch, ): - if model_type in REQUIRES_V0_MODELS: - monkeypatch.setenv("VLLM_USE_V1", "0") model_test_info = VLM_TEST_SETTINGS[model_type] runners.run_custom_inputs_test( model_test_info=model_test_info, diff --git a/tests/models/multimodal/generation/test_pixtral.py b/tests/models/multimodal/generation/test_pixtral.py index a4e21aface41..cb3cc1d3d330 100644 --- a/tests/models/multimodal/generation/test_pixtral.py +++ b/tests/models/multimodal/generation/test_pixtral.py @@ -12,13 +12,12 @@ from mistral_common.tokens.tokenizers.multimodal import image_from_chunk from transformers import AutoProcessor -from vllm import RequestOutput, SamplingParams, TextPrompt, TokensPrompt +from vllm import SamplingParams, TextPrompt, TokensPrompt from vllm.multimodal import MultiModalDataBuiltins -from vllm.multimodal.inputs import PlaceholderRange from vllm.sequence import Logprob, SampleLogprobs from ....utils import VLLM_PATH, large_gpu_test -from ...utils import check_logprobs_close, dummy_hf_overrides +from ...utils import check_logprobs_close if TYPE_CHECKING: from _typeshed import StrPath @@ -185,47 +184,3 @@ def test_chat(vllm_runner, max_model_len: int, model: str, dtype: str, outputs_1_lst=logprobs, name_0="h100_ref", name_1="output") - - -@pytest.mark.parametrize( - "image_urls,expected_ranges", - [(IMG_URLS[:1], [PlaceholderRange(offset=11, length=494)]), - (IMG_URLS[1:4], [ - PlaceholderRange(offset=11, length=266), - PlaceholderRange(offset=277, length=1056), - PlaceholderRange(offset=1333, length=418) - ])]) -def test_multi_modal_placeholders(vllm_runner, image_urls: list[str], - expected_ranges: list[PlaceholderRange], - local_asset_server, monkeypatch) -> None: - local_image_urls = [local_asset_server.url_for(u) for u in image_urls] - prompt = _create_engine_inputs_hf(local_image_urls) - - # This placeholder checking test only works with V0 engine - # where `multi_modal_placeholders` is returned with `RequestOutput` - monkeypatch.setenv("VLLM_USE_V1", "0") - with vllm_runner( - "mistral-community/pixtral-12b", - max_model_len=8192, - limit_mm_per_prompt=LIMIT_MM_PER_PROMPT, - load_format="dummy", - hf_overrides=dummy_hf_overrides, - ) as vllm_model: - outputs = vllm_model.llm.generate(prompt) - - assert len(outputs) == 1, f"{len(outputs)=}" - output: RequestOutput = outputs[0] - assert hasattr(output, - "multi_modal_placeholders"), f"{output.__dict__=}" - assert "image" in output.multi_modal_placeholders, \ - f"{output.multi_modal_placeholders.keys()=}" - image_placeholder_ranges: list[ - PlaceholderRange] = output.multi_modal_placeholders["image"] - assert len(image_placeholder_ranges) == len( - expected_ranges), f"{image_placeholder_ranges=}" - for real_range, expected_range in zip(image_placeholder_ranges, - expected_ranges): - assert real_range.offset == expected_range.offset, \ - f"{real_range=} {expected_range=}" - assert real_range.length == expected_range.length, \ - f"{real_range=} {expected_range=}" diff --git a/tests/models/multimodal/generation/test_qwen2_vl.py b/tests/models/multimodal/generation/test_qwen2_vl.py index e56f4e4075be..8336ebc0d59c 100644 --- a/tests/models/multimodal/generation/test_qwen2_vl.py +++ b/tests/models/multimodal/generation/test_qwen2_vl.py @@ -10,7 +10,6 @@ from vllm.multimodal.image import rescale_image_size from vllm.multimodal.video import rescale_video_size, sample_frames_from_video -from vllm.utils import set_default_torch_num_threads from ....conftest import (IMAGE_ASSETS, VIDEO_ASSETS, PromptImageInput, PromptVideoInput, VllmRunner) @@ -264,8 +263,7 @@ def run_embedding_input_test( processor = AutoProcessor.from_pretrained(model) # max_model_len should be greater than image_feature_size - with set_default_torch_num_threads(1): - vllm_model = vllm_runner( + with vllm_runner( model, runner="generate", max_model_len=4000, @@ -277,9 +275,8 @@ def run_embedding_input_test( }, tensor_parallel_size=tensor_parallel_size, distributed_executor_backend=distributed_executor_backend, - ) - - with vllm_model: + default_torch_num_threads=1, + ) as vllm_model: outputs_per_case_for_original_input = [ vllm_model.generate_greedy_logprobs(prompts, max_tokens, diff --git a/tests/models/multimodal/pooling/test_prithvi_mae.py b/tests/models/multimodal/pooling/test_prithvi_mae.py index b503d4256702..7309660ea526 100644 --- a/tests/models/multimodal/pooling/test_prithvi_mae.py +++ b/tests/models/multimodal/pooling/test_prithvi_mae.py @@ -4,8 +4,6 @@ import pytest import torch -from vllm.utils import set_default_torch_num_threads - from ....conftest import VllmRunner @@ -30,19 +28,17 @@ def _run_test( } for _ in range(10) ] - with ( - set_default_torch_num_threads(1), - vllm_runner( - model, - runner="pooling", - dtype=torch.float16, - enforce_eager=True, - skip_tokenizer_init=True, - # Limit the maximum number of sequences to avoid the - # test going OOM during the warmup run - max_num_seqs=32, - ) as vllm_model, - ): + with vllm_runner( + model, + runner="pooling", + dtype="half", + enforce_eager=True, + skip_tokenizer_init=True, + # Limit the maximum number of sequences to avoid the + # test going OOM during the warmup run + max_num_seqs=32, + default_torch_num_threads=1, + ) as vllm_model: vllm_model.encode(prompt) diff --git a/tests/models/quantization/test_awq.py b/tests/models/quantization/test_awq.py index 7005e435ecf4..e741e4ad90a0 100644 --- a/tests/models/quantization/test_awq.py +++ b/tests/models/quantization/test_awq.py @@ -45,12 +45,15 @@ def run_awq_test( # will hurt multiprocessing backend with fork method (the default method). # max_model_len should be greater than image_feature_size - with vllm_runner(source_model, - max_model_len=4096, - dtype=dtype, - tensor_parallel_size=tensor_parallel_size, - distributed_executor_backend=distributed_executor_backend, - enforce_eager=True) as vllm_model: + with vllm_runner( + source_model, + max_model_len=4096, + dtype=dtype, + tensor_parallel_size=tensor_parallel_size, + distributed_executor_backend=distributed_executor_backend, + enforce_eager=True, + default_torch_num_threads=1, + ) as vllm_model: source_outputs_per_image = [ vllm_model.generate_greedy_logprobs(prompts, max_tokens, @@ -59,13 +62,16 @@ def run_awq_test( for prompts, images in inputs_per_image ] - with vllm_runner(quant_model, - quantization="awq", - max_model_len=4096, - dtype=dtype, - tensor_parallel_size=tensor_parallel_size, - distributed_executor_backend=distributed_executor_backend, - enforce_eager=True) as vllm_model: + with vllm_runner( + quant_model, + quantization="awq", + max_model_len=4096, + dtype=dtype, + tensor_parallel_size=tensor_parallel_size, + distributed_executor_backend=distributed_executor_backend, + enforce_eager=True, + default_torch_num_threads=1, + ) as vllm_model: quant_outputs_per_image = [ vllm_model.generate_greedy_logprobs(prompts, max_tokens, @@ -108,12 +114,8 @@ def run_awq_test( @pytest.mark.parametrize("num_logprobs", [5]) @torch.inference_mode() def test_awq_models(vllm_runner, image_assets, source_model, quant_model, - size_factors, dtype, max_tokens, num_logprobs, - monkeypatch) -> None: + size_factors, dtype, max_tokens, num_logprobs) -> None: - # Test V1: this test hangs during setup on single-scale input. - # TODO: figure out why and re-enable this on V1. - monkeypatch.setenv("VLLM_USE_V1", "0") run_awq_test( vllm_runner, image_assets, diff --git a/tests/models/quantization/test_bitsandbytes.py b/tests/models/quantization/test_bitsandbytes.py index e0e919b62b21..25fc44fee90d 100644 --- a/tests/models/quantization/test_bitsandbytes.py +++ b/tests/models/quantization/test_bitsandbytes.py @@ -5,10 +5,7 @@ Run `pytest tests/quantization/test_bitsandbytes.py`. ''' -import gc - import pytest -import torch from transformers import BitsAndBytesConfig from tests.quantization.utils import is_quant_method_supported @@ -131,12 +128,15 @@ def test_4bit_bnb_moe_model(hf_runner, vllm_runner, example_prompts, )) with vllm_runner(model_name, quantization='bitsandbytes', - enforce_eager=False) as llm: + enforce_eager=False, + default_torch_num_threads=1) as llm: vllm_outputs = llm.generate_greedy_logprobs(example_prompts, max_tokens=32, num_logprobs=5) - with hf_runner(model_name, model_kwargs=hf_model_kwargs) as llm: + with hf_runner(model_name, + model_kwargs=hf_model_kwargs, + default_torch_num_threads=1) as llm: transformers_outputs = llm.generate_greedy_logprobs_limit( example_prompts, max_tokens=32, num_logprobs=5) check_logprobs_close( @@ -174,7 +174,8 @@ def test_4bit_bnb_embedding_model( runner="pooling", dtype=dtype, gpu_memory_utilization=0.5, - quantization="bitsandbytes") as vllm_model: + quantization="bitsandbytes", + default_torch_num_threads=1) as vllm_model: vllm_outputs = vllm_model.embed(example_prompts) hf_model_kwargs = dict(quantization_config=BitsAndBytesConfig( @@ -184,6 +185,7 @@ def test_4bit_bnb_embedding_model( dtype=dtype, model_kwargs=hf_model_kwargs, is_sentence_transformer=True, + default_torch_num_threads=1, ) as hf_model: hf_outputs = hf_model.encode(example_prompts) @@ -222,26 +224,22 @@ def validate_generated_texts(hf_runner, with vllm_runner(model_name, quantization=None if pre_quant else 'bitsandbytes', tensor_parallel_size=vllm_tp_size, - enforce_eager=False) as llm: + enforce_eager=False, + default_torch_num_threads=1) as llm: vllm_outputs = llm.generate_greedy(prompts, max_tokens) vllm_logs = log_generated_texts(prompts, vllm_outputs, "VllmRunner") - # Clean up the GPU memory for the next test - gc.collect() - torch.cuda.empty_cache() - if hf_model_kwargs is None: hf_model_kwargs = {} # Run with HF runner - with hf_runner(model_name, model_kwargs=hf_model_kwargs) as llm: + with hf_runner(model_name, + model_kwargs=hf_model_kwargs, + default_torch_num_threads=1) as llm: hf_outputs = llm.generate_greedy(prompts, max_tokens) hf_logs = log_generated_texts(prompts, hf_outputs, "HfRunner") - # Clean up the GPU memory for the next test - gc.collect() - torch.cuda.empty_cache() # Compare the generated strings for hf_log, vllm_log in zip(hf_logs, vllm_logs): hf_str = hf_log["generated_text"] diff --git a/tests/models/test_terratorch.py b/tests/models/test_terratorch.py index d6d43ca2f7e1..842e37ea26f6 100644 --- a/tests/models/test_terratorch.py +++ b/tests/models/test_terratorch.py @@ -5,7 +5,6 @@ import torch from tests.conftest import VllmRunner -from vllm.utils import set_default_torch_num_threads @pytest.mark.parametrize( @@ -25,19 +24,17 @@ def test_inference( prompt = dict(prompt_token_ids=[1], multi_modal_data=dict(pixel_values=pixel_values, location_coords=location_coords)) - with ( - set_default_torch_num_threads(1), - vllm_runner( - model, - runner="pooling", - dtype=torch.float16, - enforce_eager=True, - skip_tokenizer_init=True, - # Limit the maximum number of sequences to avoid the - # test going OOM during the warmup run - max_num_seqs=32, - ) as vllm_model, - ): + with vllm_runner( + model, + runner="pooling", + dtype="half", + enforce_eager=True, + skip_tokenizer_init=True, + # Limit the maximum number of sequences to avoid the + # test going OOM during the warmup run + max_num_seqs=32, + default_torch_num_threads=1, + ) as vllm_model: vllm_output = vllm_model.llm.encode(prompt) assert torch.equal( From 367a480bd3534edf27a8dac3c6f7ea8af9d1ed45 Mon Sep 17 00:00:00 2001 From: Michael Yao Date: Sun, 21 Sep 2025 07:39:47 +0800 Subject: [PATCH 195/518] [Docs] Fix warnings in vllm/profiler and vllm/transformers_utils (#25220) Signed-off-by: windsonsea --- mkdocs.yaml | 1 + vllm/profiler/layerwise_profile.py | 4 ++-- vllm/transformers_utils/configs/jais.py | 3 +-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/mkdocs.yaml b/mkdocs.yaml index 6f2be65a18af..1535fcc622cd 100644 --- a/mkdocs.yaml +++ b/mkdocs.yaml @@ -102,6 +102,7 @@ plugins: - https://numpy.org/doc/stable/objects.inv - https://pytorch.org/docs/stable/objects.inv - https://psutil.readthedocs.io/en/stable/objects.inv + - https://huggingface.co/docs/transformers/main/en/objects.inv markdown_extensions: - attr_list diff --git a/vllm/profiler/layerwise_profile.py b/vllm/profiler/layerwise_profile.py index 2f9ebe531cbb..41136f738c28 100644 --- a/vllm/profiler/layerwise_profile.py +++ b/vllm/profiler/layerwise_profile.py @@ -353,8 +353,8 @@ def __init__(self, num_running_seqs: Optional[int] = None): Args: num_running_seqs (Optional[int], optional): When given, - num_running_seqs will be passed to LayerProfileResults for metadata - update. Defaults to None. + num_running_seqs will be passed to LayerProfileResults + for metadata update. Defaults to None. """ super().__init__( activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA], diff --git a/vllm/transformers_utils/configs/jais.py b/vllm/transformers_utils/configs/jais.py index d5ca2c7b4751..3f50638f16b5 100644 --- a/vllm/transformers_utils/configs/jais.py +++ b/vllm/transformers_utils/configs/jais.py @@ -74,8 +74,7 @@ class JAISConfig(PretrainedConfig): use_cache (`bool`, *optional*, defaults to `True`): Whether or not the model should return the last key/values attentions (not used by all models). - scale_attn_by_inverse_layer_idx - (`bool`, *optional*, defaults to `False`): + scale_attn_by_inverse_layer_idx (`bool`, *optional*, default `True`): Whether to additionally scale attention weights by `1 / layer_idx + 1`. reorder_and_upcast_attn (`bool`, *optional*, defaults to `False`): From 52c2a8d4adccf72c2cf59a18da31e44a61c43b41 Mon Sep 17 00:00:00 2001 From: Woosuk Kwon Date: Sat, 20 Sep 2025 17:56:30 -0700 Subject: [PATCH 196/518] [V0 Deprecation] Remove LLMEngine (#25033) Signed-off-by: Woosuk Kwon Signed-off-by: Woosuk Kwon --- .../scripts/hardware_ci/run-amd-test.sh | 4 - .buildkite/test-pipeline.yaml | 8 +- .github/CODEOWNERS | 1 - examples/offline_inference/profiling.py | 510 ----- .../test_basic_correctness.py | 16 +- tests/basic_correctness/test_cumem.py | 5 +- tests/compile/test_fusion_attn.py | 3 +- tests/conftest.py | 20 - tests/entrypoints/llm/test_generate.py | 6 - .../entrypoints/llm/test_prompt_validation.py | 8 - tests/entrypoints/openai/test_metrics.py | 2 +- .../attention/test_attention_selector.py | 91 +- tests/lora/test_lora_functions.py | 2 +- .../models/language/generation/test_common.py | 6 +- .../models/language/generation/test_hybrid.py | 122 +- tests/models/language/pooling/test_reward.py | 3 +- tests/models/quantization/test_fp8.py | 12 - tests/models/test_initialization.py | 13 +- tests/models/test_oot_registration.py | 1 + tests/plugins_tests/test_platform_plugins.py | 9 - tests/plugins_tests/test_scheduler_plugins.py | 37 +- tests/samplers/test_beam_search.py | 7 - tests/samplers/test_ignore_eos.py | 7 - tests/samplers/test_ranks.py | 6 - tests/tokenization/test_detokenize.py | 55 - vllm/engine/arg_utils.py | 8 - vllm/engine/llm_engine.py | 1833 +---------------- vllm/entrypoints/llm.py | 11 +- .../model_executor/model_loader/tensorizer.py | 24 +- 29 files changed, 66 insertions(+), 2764 deletions(-) delete mode 100644 examples/offline_inference/profiling.py diff --git a/.buildkite/scripts/hardware_ci/run-amd-test.sh b/.buildkite/scripts/hardware_ci/run-amd-test.sh index 7f90181048d0..aa4cc7b35a54 100755 --- a/.buildkite/scripts/hardware_ci/run-amd-test.sh +++ b/.buildkite/scripts/hardware_ci/run-amd-test.sh @@ -86,10 +86,6 @@ if [[ $commands == *"pytest -v -s models/test_registry.py"* ]]; then commands=${commands//"pytest -v -s models/test_registry.py"/"pytest -v -s models/test_registry.py -k 'not BambaForCausalLM and not GritLM and not Mamba2ForCausalLM and not Zamba2ForCausalLM'"} fi -if [[ $commands == *"VLLM_USE_V1=0 pytest -v -s models/test_initialization.py -k 'not llama4 and not plamo2'"* ]]; then - commands=${commands//"VLLM_USE_V1=0 pytest -v -s models/test_initialization.py -k 'not llama4 and not plamo2'"/"VLLM_USE_V1=0 pytest -v -s models/test_initialization.py -k 'not llama4 and not plamo2 and not BambaForCausalLM and not Gemma2ForCausalLM and not Grok1ModelForCausalLM and not Zamba2ForCausalLM and not Gemma2Model and not GritLM'"} -fi - if [[ $commands == *"pytest -v -s compile/test_basic_correctness.py"* ]]; then commands=${commands//"pytest -v -s compile/test_basic_correctness.py"/"VLLM_USE_TRITON_FLASH_ATTN=0 pytest -v -s compile/test_basic_correctness.py"} fi diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index c42ec4f2503d..1e7ce6ef0a66 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -110,7 +110,7 @@ steps: - export VLLM_WORKER_MULTIPROC_METHOD=spawn - pytest -v -s entrypoints/llm --ignore=entrypoints/llm/test_generate.py --ignore=entrypoints/llm/test_collective_rpc.py - pytest -v -s entrypoints/llm/test_generate.py # it needs a clean process - - VLLM_USE_V1=0 pytest -v -s entrypoints/offline_mode # Needs to avoid interference with other tests + - pytest -v -s entrypoints/offline_mode # Needs to avoid interference with other tests - label: Entrypoints Integration Test (API Server) # 100min timeout_in_minutes: 130 @@ -163,7 +163,6 @@ steps: - tests/v1/engine/test_engine_core_client.py commands: # test with tp=2 and external_dp=2 - - VLLM_USE_V1=0 torchrun --nproc-per-node=4 distributed/test_torchrun_example.py - torchrun --nproc-per-node=4 distributed/test_torchrun_example.py # test with tp=2 and pp=2 - PP_SIZE=2 torchrun --nproc-per-node=4 distributed/test_torchrun_example.py @@ -314,12 +313,11 @@ steps: - python3 offline_inference/vision_language.py --seed 0 - python3 offline_inference/vision_language_pooling.py --seed 0 - python3 offline_inference/vision_language_multi_image.py --seed 0 - - VLLM_USE_V1=0 python3 others/tensorize_vllm_model.py --model facebook/opt-125m serialize --serialized-directory /tmp/ --suffix v1 && python3 others/tensorize_vllm_model.py --model facebook/opt-125m deserialize --path-to-tensors /tmp/vllm/facebook/opt-125m/v1/model.tensors + - python3 others/tensorize_vllm_model.py --model facebook/opt-125m serialize --serialized-directory /tmp/ --suffix v1 && python3 others/tensorize_vllm_model.py --model facebook/opt-125m deserialize --path-to-tensors /tmp/vllm/facebook/opt-125m/v1/model.tensors - python3 offline_inference/encoder_decoder_multimodal.py --model-type whisper --seed 0 - python3 offline_inference/basic/classify.py - python3 offline_inference/basic/embed.py - python3 offline_inference/basic/score.py - - VLLM_USE_V1=0 python3 offline_inference/profiling.py --model facebook/opt-125m run_num_steps --num-steps 2 - label: Platform Tests (CUDA) # 4min timeout_in_minutes: 15 @@ -894,7 +892,7 @@ steps: - pytest -v -s distributed/test_sequence_parallel.py # this test fails consistently. # TODO: investigate and fix - - VLLM_USE_V1=0 CUDA_VISIBLE_DEVICES=0,1 pytest -v -s test_sharded_state_loader.py + - CUDA_VISIBLE_DEVICES=0,1 pytest -v -s test_sharded_state_loader.py - CUDA_VISIBLE_DEVICES=0,1 pytest -v -s v1/shutdown - pytest -v -s models/multimodal/generation/test_maverick.py diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 323675993467..f58256d38b9d 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -5,7 +5,6 @@ /vllm/attention @LucasWilkinson /vllm/attention/backends/abstract.py @WoosukKwon @zhuohan123 @youkaichao @alexm-redhat @comaniac @njhill /vllm/core @zhuohan123 @youkaichao @alexm-redhat @comaniac @njhill -/vllm/engine/llm_engine.py @zhuohan123 @youkaichao @alexm-redhat @comaniac @njhill /vllm/executor/executor_base.py @zhuohan123 @youkaichao @alexm-redhat @comaniac @njhill @22quinn /vllm/worker/worker_base.py @zhuohan123 @youkaichao @alexm-redhat @comaniac @njhill @22quinn /vllm/worker/worker.py @zhuohan123 @youkaichao @alexm-redhat @comaniac @njhill diff --git a/examples/offline_inference/profiling.py b/examples/offline_inference/profiling.py deleted file mode 100644 index 392fba8fc5ea..000000000000 --- a/examples/offline_inference/profiling.py +++ /dev/null @@ -1,510 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project - -import inspect -import json -import os -import sys -from argparse import RawTextHelpFormatter -from collections.abc import Generator -from dataclasses import asdict, dataclass -from typing import Any, Optional, TypeAlias - -import torch -import tqdm - -from vllm import LLM, SamplingParams -from vllm.engine.arg_utils import EngineArgs -from vllm.profiler.layerwise_profile import layerwise_profile -from vllm.utils import FlexibleArgumentParser - -BATCH_SIZE_DEFAULT = 1 -PROMPT_LEN_DEFAULT = 256 - - -@dataclass -class ProfileContext: - engine_args: EngineArgs - prompt_len: int - batch_size: int - - # The profiler can run in 2 modes, - # 1. Run profiler for user specified num_steps - num_steps: Optional[int] = None - # 2. Run profiler until all requests complete - complete_num_requests_per_step: Optional[int] = None - - save_chrome_traces_folder: Optional[str] = None - - -def get_dtype(dtype: str): - if dtype == "torch.float": - return torch.float - else: - return dtype - - -OutputLen_NumReqs_Map: TypeAlias = dict[int, int] - - -def compute_request_output_lengths( - batch_size: int, step_requests: list[int] -) -> OutputLen_NumReqs_Map: - """ - Given the number of requests, batch_size, and the number of requests - that each engine-step should process, step_requests, determine the - output lengths of the requests such that step_request is honoured. - - Example: - if batch size = 128 and step_request = [128, 128, 96, 64, 32, 1] - then return, - {2 : 32, 3 : 32, 4 : 32, 5 : 31, 6 : 1}, meaning, - 32 requests should have output length 2, - 32 requests should have output length 3, - 32 requests should have output length 4, - 31 requests should have output length 5, - 1 request should have output length 6. - - Args: - batch_size (int): Number of requests submitted for profile. This is - args.batch_size. - step_requests (list[int]): step_requests[i] is the number of requests - that the ith engine step should process. - - Returns: - OutputLen_NumReqs_Map : A dictionary with output-length as keys and the - number of requests required to have that output-length as values. - """ - ol_nr: OutputLen_NumReqs_Map = {} - - # Number of request that are assigned an output-length - num_reqs_assigned: int = 0 - num_steps: int = len(step_requests) - - # sanity check. The first step (prefill-step), must process all requests. - assert step_requests[0] == batch_size - - # Begin assignments from the last step. - output_length: int = num_steps - for num_requests_at_step in reversed(step_requests): - if num_reqs_assigned == batch_size: - break - - assert num_reqs_assigned < batch_size - - # Remove the number of requests that have been determined - # to participate in this step and beyond. - num_reqs_unassigned_at_step = num_requests_at_step - num_reqs_assigned - assert num_reqs_unassigned_at_step >= 0 - - if num_reqs_unassigned_at_step > 0: - ol_nr[output_length] = num_reqs_unassigned_at_step - num_reqs_assigned += num_reqs_unassigned_at_step - - output_length -= 1 - - # sanity checks. - assert sum(ol_nr.values()) == batch_size, ( - "Number of requests in output-length assignment does not match " - f"batch-size.\n batch size {batch_size} - " - f"step requests {step_requests} - assignments {ol_nr}" - ) - - # Check that the output-length is in [1, num-steps]. Output length must be - # at least 1 as all requests must participate in the prefill-step. - assert all(ol >= 1 and ol <= num_steps for ol in ol_nr), ( - "Output lengths of requests should be in range " - f"[1, num-engine-steps].\n batch size {batch_size} - " - f"step requests {step_requests} - assignments {ol_nr}" - ) - - return ol_nr - - -def determine_requests_per_step(context: ProfileContext) -> list[int]: - """ - Determine number of requests each engine step should process. - If context.num_steps is set, then all engine steps process the - same number of requests and the output list is of length - context.num_steps. - - If context.complete_num_requests_per_step is set, then each decode step - processes fewer and fewer requests until there are no requests to process. - In this case, the output list is as big as the number of steps - required to process all requests. - - Args: - context: ProfileContext object. - - Returns: - list[int]: Number of requests to process for all engine-steps. - output[i], contains the number of requests that the ith step - should process. - """ - if context.num_steps: - # All requests must run until num_engine_steps. This implies - # that their output lengths must be equal to num_engine_steps. - return [context.batch_size] * context.num_steps - - assert ( - context.complete_num_requests_per_step - and context.complete_num_requests_per_step > 0 - ), ( - f"Expected a positive complete_num_requests_per_step argument." - f"Instead got {context.complete_num_requests_per_step}" - ) - - # We start dropping after the first decode step. - step_requests = [ - context.batch_size, # prefill - context.batch_size, # decode - ] - - num_running_requests = context.batch_size - num_running_requests -= context.complete_num_requests_per_step - while num_running_requests > 0: - step_requests.append(num_running_requests) - num_running_requests -= context.complete_num_requests_per_step - - if step_requests[-1] != 1: - # have 1 request running at the last step. This is often - # useful - step_requests.append(1) - - return step_requests - - -def run_profile( - context: ProfileContext, csv_output: Optional[str], json_output: Optional[str] -): - print("Run profile with:") - for key, value in asdict(context).items(): - print(f" {key} = {value}") - - requests_per_step: list[int] = determine_requests_per_step(context) - - ol_nr: OutputLen_NumReqs_Map = compute_request_output_lengths( - context.batch_size, requests_per_step - ) - - num_steps_to_profile: int = len(requests_per_step) - max_output_len: int = max(ol_nr.keys()) - assert max_output_len >= 1 - - # Create sampling params - sampling_params = SamplingParams( - temperature=0.8, - top_p=0.95, - # max_tokens is set on a per-request basis. - max_tokens=None, - ignore_eos=True, - ) - - # Create LLM - llm = LLM(**asdict(context.engine_args)) - batch_size = context.batch_size - prompt_len = context.prompt_len - - scheduler_config = llm.llm_engine.vllm_config.scheduler_config - max_model_len = llm.llm_engine.model_config.max_model_len - max_num_batched_tokens = scheduler_config.max_num_batched_tokens - max_num_seqs = scheduler_config.max_num_seqs - - if batch_size * prompt_len > max_num_batched_tokens: - print( - f"ERROR: chosen batch_size * prompt_len " - f"({batch_size} * {prompt_len} = {batch_size * prompt_len}) is " - f"larger than max_num_batched_tokens ({max_num_batched_tokens}) " - f"and therefore cannot be run in a single profile step, please " - f"choose a smaller batch size or prompt length, or increase " - f"--max-num-batched-tokens" - ) - sys.exit(-1) - if batch_size > max_num_seqs: - print( - f"ERROR: chosen batch_size ({batch_size}) is larger than " - f"max_num_seqs ({max_num_seqs}) and therefore cannot be run in a " - f"single profile step, please choose a smaller batch size" - ) - sys.exit(-1) - print( - "llm.llm_engine.model_config.max_model_len: ", - llm.llm_engine.model_config.max_model_len, - ) - if prompt_len + max_output_len > llm.llm_engine.model_config.max_model_len: - print( - f"ERROR: chosen prompt_len + max_output_len ({prompt_len} + " - f"{max_output_len} = {prompt_len + max_output_len}) is larger " - f"than the model's max_model_len ({max_model_len}), please " - f"choose a smaller prompt_len or max_output_len, or increase " - f"--max-model-len" - ) - sys.exit(-1) - - def add_requests(): - def get_output_len_generator() -> Generator[int, Any, Any]: - for output_len, num_reqs in ol_nr.items(): - for _ in range(num_reqs): - yield output_len - - output_len_generator = get_output_len_generator() - for i in range(batch_size): - sampling_params.max_tokens = next(output_len_generator) - assert isinstance(sampling_params.max_tokens, int) - - prompt_token_ids = torch.randint( - llm.get_tokenizer().vocab_size, size=(prompt_len,) - ).tolist() - - llm.llm_engine.add_request( - request_id=f"seq{i}", - prompt={"prompt_token_ids": prompt_token_ids}, - params=sampling_params, - ) - - def abort_requests(): - for i in range(batch_size): - llm.llm_engine.abort_request(f"seq{i}") - - # Warm up run - print("Warm up run ...") - add_requests() - llm.llm_engine.step() # Prefill - llm.llm_engine.step() # Decode - abort_requests() - - print("Profile run ...") - add_requests() - - with layerwise_profile() as prefill_prof: - llm.llm_engine.step() # First step is prefill - - decode_profs = [] - for _ in tqdm.tqdm(range(num_steps_to_profile - 1)): - num_running_seqs = llm.llm_engine.scheduler[0].get_num_unfinished_seq_groups() - with layerwise_profile(num_running_seqs=num_running_seqs) as decode_prof: - llm.llm_engine.step() - decode_profs.append(decode_prof) - - decode_results_list = [prof.results for prof in decode_profs] - prefill_results = prefill_prof.results - has_decode = len(decode_results_list) > 0 - - LINE_WIDTH = 80 - print("=" * LINE_WIDTH) - print(f"= Prefill Model Table (prompt_len={prompt_len}, batch_size={batch_size})") - print("=" * LINE_WIDTH) - print() - prefill_results.print_model_table() - - if has_decode: - print() - print("=" * LINE_WIDTH) - print( - f"= First Decode Step Model Table " - f"(prompt_len={prompt_len}, batch_size={batch_size})" - ) - print("=" * LINE_WIDTH) - print() - decode_results_list[0].print_model_table() - - print() - print("=" * LINE_WIDTH) - print(f"= Prefill Summary Table (prompt_len={prompt_len}, batch_size={batch_size})") - print("=" * LINE_WIDTH) - print() - prefill_results.print_summary_table() - - if has_decode: - print() - print("=" * LINE_WIDTH) - print( - f"= First Decode Step Summary Table " - f"(prompt_len={prompt_len}, batch_size={batch_size})" - ) - print("=" * LINE_WIDTH) - print() - decode_results_list[0].print_summary_table() - - if csv_output: - csv_filename_base = ( - csv_output[:-4] if csv_output.endswith(".csv") else csv_output - ) - prefill_results.export_model_stats_table_csv( - csv_filename_base + "_prefill_model_table.csv" - ) - prefill_results.export_summary_stats_table_csv( - csv_filename_base + "_prefill_summary_table.csv" - ) - - if has_decode: - decode_results_list[0].export_model_stats_table_csv( - csv_filename_base + "_decode_model_table.csv" - ) - decode_results_list[0].export_summary_stats_table_csv( - csv_filename_base + "_decode_summary_table.csv" - ) - - if json_output: - cuda_devices = [ - torch.cuda.get_device_properties(dev_idx) - for dev_idx in range(torch.cuda.device_count()) - ] - - json_dict = { - "context": { - "python_version": f"{sys.version}", - "torch_version": f"{torch.__version__}", - "torch_cuda_version": f"{torch.version.cuda}", - "cuda_devices": f"{cuda_devices}", - **asdict(context), - }, - "prefill": prefill_results.convert_stats_to_dict(), - } - - if has_decode: - for idx, dr in enumerate(decode_results_list): - json_dict[f"decode_{idx + 1}"] = dr.convert_stats_to_dict() - - # Add .json to json_output filename if it doesn't exist already. - json_output_file = ( - json_output if json_output.endswith(".json") else json_output + ".json" - ) - with open(json_output_file, "w+") as f: - json.dump(json_dict, f, indent=2) - pass - - if context.save_chrome_traces_folder is not None: - os.makedirs(context.save_chrome_traces_folder, exist_ok=True) - prefill_prof.profiler.export_chrome_trace( - context.save_chrome_traces_folder + "/prefill.json" - ) - for idx, decode_prof in enumerate(decode_profs): - decode_prof.profiler.export_chrome_trace( - context.save_chrome_traces_folder + f"/decode_{idx + 1}.json" - ) - print( - "Traces saved as prefill.json and decode_1.json, etc." - f" in folder {context.save_chrome_traces_folder}" - ) - - -def parse_args(): - parser = FlexibleArgumentParser( - description=""" -Profile a model - - example: - ``` - python examples/offline_inference/profiling.py \\ - --model neuralmagic/Meta-Llama-3.1-8B-Instruct-FP8 --batch-size 4 \\ - --prompt-len 512 --max-num-batched-tokens 8196 --json Llama31-8b-FP8 \\ - --enforce-eager run_num_steps -n 2 - ``` - - then you can use various tools to analyze the json output - terminal ascii tables: - ``` - python tools/profiler/print_layerwise_table.py \\ - --json-trace Llama31-8b-FP8.json --phase prefill --table summary - ``` - or create matplotlib stacked bar charts: - ``` - python tools/profiler/visualize_layerwise_profile.py \\ - --json-trace Llama31-8b-FP8.json \\ - --output-directory profile_breakdown --plot-metric pct_cuda_time - ``` -""", - formatter_class=RawTextHelpFormatter, - ) - parser.add_argument( - "--csv", - type=str, - default=None, - help="Export the results as multiple csv file. This should be the root " - "filename, will create _prefill_model_table.csv, " - "_prefill_summary_table.csv, " - "_decode_model_table.csv, and " - "_decode_summary_table.csv", - ) - parser.add_argument( - "--json", - type=str, - default=None, - help="Export the results as a json file. This should be the filename", - ) - parser.add_argument( - "--save-chrome-traces-folder", - type=str, - help="Save chrome traces for the prefill and decode " - "will save traces as prefill.json and decode_1.json, " - "etc. inside this folder", - ) - parser.add_argument( - "--prompt-len", - type=int, - default=PROMPT_LEN_DEFAULT, - help=f"Length of the random prompt to use when profiling, all batched " - f"requests use the same prompt_len, default={PROMPT_LEN_DEFAULT}", - ) - parser.add_argument( - "--batch-size", - type=int, - default=BATCH_SIZE_DEFAULT, - help=f"Number of requests to run as a single batch, " - f"default={BATCH_SIZE_DEFAULT}", - ) - - subparsers = parser.add_subparsers(dest="cmd") - - run_num_steps_parser = subparsers.add_parser( - "run_num_steps", help="This variation profiles n engine.step() invocations." - ) - run_num_steps_parser.add_argument( - "-n", - "--num-steps", - type=int, - help="Number of engine steps to profile.\n" - "Setting it to 1, profiles only the prefill step.\n" - "Setting it to 2, profiles the prefill and first decode step\n" - "Setting it to 3, profiles the prefill, 1st and 2nd decode steps\n" - "and so on ...", - ) - - run_to_completion_parser = subparsers.add_parser( - "run_to_completion", - help="This variation profiles all the engine.step() invocations" - "until the engine exhausts all submitted requests.", - ) - run_to_completion_parser.add_argument( - "-n", - "--complete-num-requests-per-step", - type=int, - help="Complete complete_num_requests_per_step requests every decode step." - "For e.g., with batch_size 128 and complete_num_requests_per_step 32," - "the profiler is run for 6 engine steps, with the steps processing, " - "128, 128, 96, 64, 32, 1 requests respectively.\n" - "Note that we tack-on a one-request step at the end as it is often " - "useful.", - ) - - EngineArgs.add_cli_args(parser) - - return parser.parse_args() - - -def main(args): - context = ProfileContext( - engine_args=EngineArgs.from_cli_args(args), - **{ - k: v - for k, v in vars(args).items() - if k in inspect.signature(ProfileContext).parameters - }, - ) - run_profile(context, csv_output=args.csv, json_output=args.json) - - -if __name__ == "__main__": - args = parse_args() - main(args) diff --git a/tests/basic_correctness/test_basic_correctness.py b/tests/basic_correctness/test_basic_correctness.py index 24b1c9a93126..411f3e01bc2c 100644 --- a/tests/basic_correctness/test_basic_correctness.py +++ b/tests/basic_correctness/test_basic_correctness.py @@ -11,7 +11,7 @@ import pytest import torch -from vllm import LLM, envs +from vllm import LLM from vllm.v1.engine.llm_engine import LLMEngine as LLMEngineV1 from ..conftest import HfRunner, VllmRunner @@ -26,14 +26,6 @@ TARGET_TEST_SUITE = os.environ.get("TARGET_TEST_SUITE", "L4") -@pytest.fixture(autouse=True) -def v1(run_with_both_engines): - # Simple autouse wrapper to run both engines for each test - # This can be promoted up to conftest.py to run for every - # test in a package - pass - - def test_vllm_gc_ed(): """Verify vllm instance is GC'ed when it is deleted""" llm = LLM("distilbert/distilgpt2") @@ -76,12 +68,6 @@ def test_models( model_executor: str, enable_prompt_embeds: bool, ) -> None: - if not envs.VLLM_USE_V1: - if async_scheduling: - pytest.skip("async_scheduling only supported in v1.") - if model_executor != "uni": - pytest.skip("only test uniproc executor for v0.") - if backend == "XFORMERS" and model == "google/gemma-2-2b-it": pytest.skip( f"{backend} does not support gemma2 with full context length.") diff --git a/tests/basic_correctness/test_cumem.py b/tests/basic_correctness/test_cumem.py index f3ad680b72b5..508740ab2938 100644 --- a/tests/basic_correctness/test_cumem.py +++ b/tests/basic_correctness/test_cumem.py @@ -122,11 +122,12 @@ def model(x): # sleep mode with safetensors ("meta-llama/Llama-3.2-1B", True), # sleep mode with pytorch checkpoint - ("facebook/opt-125m", False), + ("facebook/opt-125m", True), ]) def test_end_to_end(monkeypatch: pytest.MonkeyPatch, model: str, use_v1: bool): with monkeypatch.context() as m: - m.setenv("VLLM_USE_V1", "1" if use_v1 else "0") + assert use_v1 + m.setenv("VLLM_USE_V1", "1") free, total = torch.cuda.mem_get_info() used_bytes_baseline = total - free # in case other process is running llm = LLM(model, enable_sleep_mode=True) diff --git a/tests/compile/test_fusion_attn.py b/tests/compile/test_fusion_attn.py index 022f183b3193..b6bebbba915b 100644 --- a/tests/compile/test_fusion_attn.py +++ b/tests/compile/test_fusion_attn.py @@ -54,8 +54,7 @@ def test_attention_fusion_v0(example_prompts, monkeypatch, model: str, # Use global backends global backend, backend_unfused - use_v1 = False # can be made a param once V1 support added - monkeypatch.setenv("VLLM_USE_V1", str(int(use_v1))) + monkeypatch.setenv("VLLM_USE_V1", "1") monkeypatch.setenv("VLLM_USE_TRITON_FLASH_ATTN", str(int(use_triton_fa))) # Prompt 4 seems too open-ended, differs between fused and unfused diff --git a/tests/conftest.py b/tests/conftest.py index ce9de3bf94b5..f14b1e8780ad 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -160,26 +160,6 @@ def cleanup_VLLM_USE_V1(monkeypatch): monkeypatch.delenv("VLLM_USE_V1") -@pytest.fixture(params=[True, False]) -def run_with_both_engines(request, monkeypatch): - # Automatically runs tests twice, once with V1 and once without - use_v1 = request.param - # Tests decorated with `@skip_v1` are only run without v1 - skip_v0 = request.node.get_closest_marker("skip_v0") - skip_v1 = request.node.get_closest_marker("skip_v1") - - if use_v1: - if skip_v1: - pytest.skip("Skipping test on vllm V1") - monkeypatch.setenv('VLLM_USE_V1', '1') - else: - if skip_v0: - pytest.skip("Skipping test on vllm V0") - monkeypatch.setenv('VLLM_USE_V1', '0') - - yield - - @pytest.fixture(autouse=True) def init_test_http_connection(): # pytest_asyncio may use a different event loop per test diff --git a/tests/entrypoints/llm/test_generate.py b/tests/entrypoints/llm/test_generate.py index 3bbbcc755d13..e0ecb02d4f56 100644 --- a/tests/entrypoints/llm/test_generate.py +++ b/tests/entrypoints/llm/test_generate.py @@ -25,12 +25,6 @@ ] -@pytest.fixture(autouse=True) -def v1(run_with_both_engines): - """We can run both engines for this test.""" - pass - - @pytest.fixture(scope="module") def llm(): # pytest caches the fixture so we use weakref.proxy to diff --git a/tests/entrypoints/llm/test_prompt_validation.py b/tests/entrypoints/llm/test_prompt_validation.py index 1b7be15d5d69..b219b33d1760 100644 --- a/tests/entrypoints/llm/test_prompt_validation.py +++ b/tests/entrypoints/llm/test_prompt_validation.py @@ -6,14 +6,6 @@ from vllm import LLM -@pytest.fixture(autouse=True) -def v1(run_with_both_engines): - # Simple autouse wrapper to run both engines for each test - # This can be promoted up to conftest.py to run for every - # test in a package - pass - - def test_empty_prompt(): llm = LLM(model="openai-community/gpt2", enforce_eager=True) with pytest.raises(ValueError, match='decoder prompt cannot be empty'): diff --git a/tests/entrypoints/openai/test_metrics.py b/tests/entrypoints/openai/test_metrics.py index 8917aa5a5efb..f0b61902eb56 100644 --- a/tests/entrypoints/openai/test_metrics.py +++ b/tests/entrypoints/openai/test_metrics.py @@ -432,7 +432,7 @@ def test_metrics_exist_run_batch(use_v1: bool): "--port", port, ], - env={"VLLM_USE_V1": "1" if use_v1 else "0"}) + env={"VLLM_USE_V1": "1"}) def is_server_up(url): try: diff --git a/tests/kernels/attention/test_attention_selector.py b/tests/kernels/attention/test_attention_selector.py index 190c92e1251c..f8454ad0a4c4 100644 --- a/tests/kernels/attention/test_attention_selector.py +++ b/tests/kernels/attention/test_attention_selector.py @@ -69,28 +69,20 @@ def generate_params(): @pytest.mark.parametrize("device, name, use_mla, block_size", generate_params()) -@pytest.mark.parametrize("use_v1", [True, False]) def test_env( device: str, name: str, use_mla: bool, block_size: int, - use_v1: bool, monkeypatch: pytest.MonkeyPatch, ): """Test attention backend selection with valid device-backend pairs.""" with monkeypatch.context() as m: - m.setenv("VLLM_USE_V1", "1" if use_v1 else "0") + m.setenv("VLLM_USE_V1", "1") m.setenv(STR_BACKEND_ENV_VAR, name) m.setenv("VLLM_MLA_DISABLE", "1" if use_mla else "0") - if name == "FLASHINFER" and not use_v1: - pytest.skip("FlashInfer backend is only available on V1 engine") - if device == "cpu": - if not use_v1: - pytest.skip("CPU backend only supports V1") - with patch("vllm.attention.selector.current_platform", CpuPlatform()): backend = get_attn_backend(16, torch.float16, None, block_size, @@ -137,7 +129,7 @@ def test_env( block_size, False, use_mla=use_mla) - expected = f"{name}_VLLM_V1" if use_v1 else name + expected = f"{name}_VLLM_V1" assert backend.get_name() == expected else: backend = get_attn_backend(16, @@ -146,7 +138,7 @@ def test_env( block_size, False, use_mla=use_mla) - expected = "TRITON_ATTN_VLLM_V1" if use_v1 else "ROCM_FLASH" + expected = "TRITON_ATTN_VLLM_V1" assert backend.get_name() == expected elif device == "cuda": @@ -163,11 +155,7 @@ def test_env( # - TRITON_MLA: fallback for other cases if name == "CUTLASS_MLA": - if not use_v1: - # CUTLASS_MLA only supported on V1 engine - pytest.skip( - "CUTLASS_MLA only supported on V1 engine") - elif block_size != 128: + if block_size != 128: # CUTLASS_MLA only supports block_size == 128 pytest.skip( "CUTLASS_MLA only supports block_size 128") @@ -181,11 +169,7 @@ def test_env( expected = "CUTLASS_MLA_VLLM_V1" assert backend.get_name() == expected elif name == "FLASHINFER_MLA": - if not use_v1: - # FlashInfer MLA only supported on V1 engine - pytest.skip( - "FlashInfer MLA only supported on V1 engine") - elif block_size not in [32, 64]: + if block_size not in [32, 64]: # FlashInfer MLA only supports block_size 32 or 64 pytest.skip( "FlashInfer MLA only supports block_size 32 " @@ -217,23 +201,17 @@ def test_env( block_size, False, use_mla=use_mla) - expected = f"{name}_VLLM_V1" if use_v1 else name + expected = f"{name}_VLLM_V1" assert backend.get_name() == expected elif name == "FLASH_ATTN_MLA": - if not use_v1: - # FlashAttention MLA only supported on V1 engine - pytest.skip( - "FlashAttention MLA only supported on V1 engine" - ) - else: - backend = get_attn_backend(16, - torch.float16, - None, - block_size, - False, - use_mla=use_mla) - expected = "FLASH_ATTN_MLA" - assert backend.get_name() == expected + backend = get_attn_backend(16, + torch.float16, + None, + block_size, + False, + use_mla=use_mla) + expected = "FLASH_ATTN_MLA" + assert backend.get_name() == expected else: # TRITON_MLA or other fallback backend = get_attn_backend(16, @@ -242,8 +220,7 @@ def test_env( block_size, False, use_mla=use_mla) - expected = ("TRITON_MLA_VLLM_V1" - if use_v1 else "TRITON_MLA") + expected = "TRITON_MLA_VLLM_V1" assert backend.get_name() == expected elif name == "FLASHINFER": backend = get_attn_backend(16, @@ -252,7 +229,7 @@ def test_env( block_size, False, use_mla=use_mla) - expected = "FLASHINFER_VLLM_V1" if use_v1 else name + expected = "FLASHINFER_VLLM_V1" assert backend.get_name() == expected else: backend = get_attn_backend(32, @@ -261,36 +238,30 @@ def test_env( block_size, False, use_mla=use_mla) - expected = "FLASH_ATTN_VLLM_V1" if use_v1 else name + expected = "FLASH_ATTN_VLLM_V1" assert backend.get_name() == expected - if use_v1: - backend = get_attn_backend(16, - torch.float16, - None, - block_size, - False, - use_mla=use_mla) - assert backend.get_name() == "FLEX_ATTENTION", ( - "Should fallback to FlexAttention if head size is " - "not supported by FlashAttention") + backend = get_attn_backend(16, + torch.float16, + None, + block_size, + False, + use_mla=use_mla) + assert backend.get_name() == "FLEX_ATTENTION", ( + "Should fallback to FlexAttention if head size is " + "not supported by FlashAttention") @pytest.mark.parametrize("device", ["cpu", "cuda"]) -@pytest.mark.parametrize("use_v1", [True, False]) def test_fp32_fallback( device: str, - use_v1: bool, monkeypatch: pytest.MonkeyPatch, ): """Test attention backend selection with fp32.""" with monkeypatch.context() as m: - m.setenv("VLLM_USE_V1", "1" if use_v1 else "0") + m.setenv("VLLM_USE_V1", "1") if device == "cpu": - if not use_v1: - pytest.skip("CPU backend only supports V1") - with patch("vllm.attention.selector.current_platform", CpuPlatform()): backend = get_attn_backend(16, torch.float32, None, 16, False) @@ -300,8 +271,7 @@ def test_fp32_fallback( with patch("vllm.attention.selector.current_platform", CudaPlatform()): backend = get_attn_backend(16, torch.float32, None, 16, False) - assert (backend.get_name() == "FLEX_ATTENTION" - if use_v1 else "XFORMERS") + assert backend.get_name() == "FLEX_ATTENTION" def test_flash_attn(monkeypatch: pytest.MonkeyPatch): @@ -357,12 +327,11 @@ def test_flash_attn(monkeypatch: pytest.MonkeyPatch): assert backend.get_name() != STR_FLASH_ATTN_VAL -@pytest.mark.parametrize("use_v1", [True, False]) -def test_invalid_env(use_v1: bool, monkeypatch: pytest.MonkeyPatch): +def test_invalid_env(monkeypatch: pytest.MonkeyPatch): """Test that invalid attention backend names raise ValueError.""" with monkeypatch.context() as m, patch( "vllm.attention.selector.current_platform", CudaPlatform()): - m.setenv("VLLM_USE_V1", "1" if use_v1 else "0") + m.setenv("VLLM_USE_V1", "1") m.setenv(STR_BACKEND_ENV_VAR, STR_INVALID_VAL) # Should raise ValueError for invalid backend diff --git a/tests/lora/test_lora_functions.py b/tests/lora/test_lora_functions.py index 50c60341f0d8..221d5237823c 100644 --- a/tests/lora/test_lora_functions.py +++ b/tests/lora/test_lora_functions.py @@ -6,10 +6,10 @@ import pytest from vllm.engine.arg_utils import AsyncEngineArgs, EngineArgs -from vllm.engine.llm_engine import LLMEngine from vllm.entrypoints.openai.api_server import ( build_async_engine_client_from_engine_args) from vllm.lora.request import LoRARequest +from vllm.v1.engine.llm_engine import LLMEngine MODEL_PATH = "meta-llama/Llama-2-7b-hf" LORA_MODULE_PATH = "yard1/llama-2-7b-sql-lora-test" diff --git a/tests/models/language/generation/test_common.py b/tests/models/language/generation/test_common.py index c14e71cbdb96..39c4dd735b72 100644 --- a/tests/models/language/generation/test_common.py +++ b/tests/models/language/generation/test_common.py @@ -15,7 +15,8 @@ # have a clean way to fall back, so we fail with # a clear msg when it happens. # https://github.com/vllm-project/vllm/issues/14524 -REQUIRES_V0 = ["microsoft/phi-2", "stabilityai/stablelm-3b-4e1t"] +# NOTE(woosuk): Skipping these tests until V1 supports them. +# REQUIRES_V0 = ["microsoft/phi-2", "stabilityai/stablelm-3b-4e1t"] # This list contains the model that are using AITER kernel. # Skip model that are not using AITER tests. @@ -113,9 +114,6 @@ def test_models(hf_runner, vllm_runner, example_prompts, model: str, model_info.check_available_online(on_fail="skip") model_info.check_transformers_version(on_fail="skip") - if model in REQUIRES_V0: - monkeypatch.setenv("VLLM_USE_V1", "0") - if use_rocm_aiter and (model in AITER_MODEL_LIST): monkeypatch.setenv("VLLM_ROCM_USE_AITER", "1") elif use_rocm_aiter and model not in AITER_MODEL_LIST: diff --git a/tests/models/language/generation/test_hybrid.py b/tests/models/language/generation/test_hybrid.py index 206ad1352e06..0b1f90e27db8 100644 --- a/tests/models/language/generation/test_hybrid.py +++ b/tests/models/language/generation/test_hybrid.py @@ -8,7 +8,7 @@ from vllm.engine.arg_utils import EngineArgs from vllm.sampling_params import SamplingParams -from ...utils import check_logprobs_close, check_outputs_equal +from ...utils import check_logprobs_close # Mark all tests as hybrid pytestmark = pytest.mark.hybrid_model @@ -88,15 +88,6 @@ def test_models( hf_outputs = hf_model.generate_greedy_logprobs_limit( example_prompts, max_tokens, num_logprobs) - with monkeypatch.context() as m: - m.setenv("VLLM_USE_V1", "0") - if model not in V0_UNSUPPORTED_MODELS: - with vllm_runner(model, max_num_seqs=MAX_NUM_SEQS) as vllm_model: - vllm_v0_outputs = vllm_model.generate_greedy_logprobs( - example_prompts, max_tokens, num_logprobs) - else: - vllm_v0_outputs = None - if model in V1_SUPPORTED_MODELS: with vllm_runner(model, max_num_seqs=MAX_NUM_SEQS) as vllm_model: vllm_v1_outputs = vllm_model.generate_greedy_logprobs( @@ -104,14 +95,6 @@ def test_models( else: vllm_v1_outputs = None - if vllm_v0_outputs is not None: - check_logprobs_close( - outputs_0_lst=hf_outputs, - outputs_1_lst=vllm_v0_outputs, - name_0="hf", - name_1="vllm-v0", - ) - if model in V1_SUPPORTED_MODELS: check_logprobs_close( outputs_0_lst=hf_outputs, @@ -157,45 +140,6 @@ def test_batching( ) -@pytest.mark.parametrize("model", [SSM_MODELS[0], HYBRID_MODELS[0]]) -@pytest.mark.parametrize("max_tokens", [32]) -@pytest.mark.parametrize("num_logprobs", [5]) -@pytest.mark.parametrize("chunked_prefill_token_size", [1, 4, 16]) -def test_chunked_prefill( - vllm_runner, - example_prompts, - model: str, - max_tokens: int, - num_logprobs: int, - chunked_prefill_token_size: int, - monkeypatch, -) -> None: - max_num_seqs = chunked_prefill_token_size - max_num_batched_tokens = chunked_prefill_token_size - - with monkeypatch.context() as m: - m.setenv("VLLM_USE_V1", "0") - with vllm_runner(model, - enable_chunked_prefill=True, - max_num_batched_tokens=max_num_batched_tokens, - max_num_seqs=max_num_seqs) as vllm_model: - chunked = vllm_model.generate_greedy_logprobs( - example_prompts, max_tokens, num_logprobs) - - with vllm_runner(model, - enable_chunked_prefill=False, - max_num_seqs=max_num_seqs) as vllm_model: - non_chunked = vllm_model.generate_greedy_logprobs( - example_prompts, max_tokens, num_logprobs) - - check_logprobs_close( - outputs_0_lst=chunked, - outputs_1_lst=non_chunked, - name_0="chunked", - name_1="non_chunked", - ) - - @pytest.mark.parametrize("model", [SSM_MODELS[0], HYBRID_MODELS[0]]) @pytest.mark.parametrize("max_tokens", [10]) def test_chunked_prefill_with_parallel_sampling( @@ -257,38 +201,6 @@ def test_mamba_cache_cg_padding( "Could be related to mamba cache not padded correctly") -@pytest.mark.parametrize("model", [SSM_MODELS[0], HYBRID_MODELS[0]]) -@pytest.mark.parametrize("max_tokens", [20]) -def test_models_preemption_recompute( - vllm_runner, - example_prompts, - model: str, - max_tokens: int, - monkeypatch, -) -> None: - """ - Tests that outputs are identical with and w/o preemptions (recompute). - """ - with monkeypatch.context() as m: - m.setenv("VLLM_USE_V1", "0") - with vllm_runner(model, max_num_seqs=MAX_NUM_SEQS) as vllm_model: - scheduler = vllm_model.llm.llm_engine.scheduler[0] - scheduler.ENABLE_ARTIFICIAL_PREEMPT = True - preempt_vllm_outputs = vllm_model.generate_greedy( - example_prompts, max_tokens) - - scheduler.ENABLE_ARTIFICIAL_PREEMPT = False - vllm_outputs = vllm_model.generate_greedy(example_prompts, - max_tokens) - - check_outputs_equal( - outputs_0_lst=preempt_vllm_outputs, - outputs_1_lst=vllm_outputs, - name_0="vllm_preepmtions", - name_1="vllm", - ) - - @pytest.mark.parametrize("model", [SSM_MODELS[0], HYBRID_MODELS[0]]) def test_fail_upon_inc_requests_and_finished_requests_lt_available_blocks( vllm_runner, @@ -386,27 +298,10 @@ def test_full_cuda_graph( hf_outputs = hf_model.generate_greedy_logprobs_limit( example_prompts, max_tokens, num_logprobs) - with monkeypatch.context() as m: - m.setenv("VLLM_USE_V1", "0") - if model not in V0_UNSUPPORTED_MODELS: - with vllm_runner(model, max_num_seqs=MAX_NUM_SEQS) as vllm_model: - vllm_v0_outputs = vllm_model.generate_greedy_logprobs( - example_prompts, max_tokens, num_logprobs) - else: - vllm_v0_outputs = None - with vllm_runner(model, max_num_seqs=MAX_NUM_SEQS) as vllm_model: vllm_v1_outputs = vllm_model.generate_greedy_logprobs( example_prompts, max_tokens, num_logprobs) - if vllm_v0_outputs is not None: - check_logprobs_close( - outputs_0_lst=hf_outputs, - outputs_1_lst=vllm_v0_outputs, - name_0="hf", - name_1="vllm-v0", - ) - check_logprobs_close( outputs_0_lst=hf_outputs, outputs_1_lst=vllm_v1_outputs, @@ -442,27 +337,12 @@ def test_fp32_cache_state( hf_outputs = hf_model.generate_greedy_logprobs_limit( example_prompts, max_tokens, num_logprobs) - with monkeypatch.context() as m: - m.setenv("VLLM_USE_V1", "0") - with vllm_runner(model, - max_num_seqs=MAX_NUM_SEQS, - **{cache_dtype_param: "float32"}) as vllm_model: - vllm_v0_outputs = vllm_model.generate_greedy_logprobs( - example_prompts, max_tokens, num_logprobs) - with vllm_runner(model, max_num_seqs=MAX_NUM_SEQS, **{cache_dtype_param: "float32"}) as vllm_model: vllm_v1_outputs = vllm_model.generate_greedy_logprobs( example_prompts, max_tokens, num_logprobs) - check_logprobs_close( - outputs_0_lst=hf_outputs, - outputs_1_lst=vllm_v0_outputs, - name_0="hf", - name_1="vllm-v0", - ) - check_logprobs_close( outputs_0_lst=hf_outputs, outputs_1_lst=vllm_v1_outputs, diff --git a/tests/models/language/pooling/test_reward.py b/tests/models/language/pooling/test_reward.py index 08722ac98b7e..4ac91b5aed50 100644 --- a/tests/models/language/pooling/test_reward.py +++ b/tests/models/language/pooling/test_reward.py @@ -1,6 +1,5 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -import os import pytest import torch @@ -82,7 +81,7 @@ def test_prm_models( check_transformers_version("Qwen/Qwen2.5-Math-PRM-7B", max_transformers_version="4.53.2") - if current_platform.is_cpu() and os.environ.get("VLLM_USE_V1", "0") == "0": + if current_platform.is_cpu(): pytest.skip("CPU only supports V1") if current_platform.is_rocm(): diff --git a/tests/models/quantization/test_fp8.py b/tests/models/quantization/test_fp8.py index afc27b6e0566..97dd4d6135ac 100644 --- a/tests/models/quantization/test_fp8.py +++ b/tests/models/quantization/test_fp8.py @@ -36,9 +36,6 @@ # NOTE: Increasing this in this suite will fail CI because we currently cannot # reset distributed env properly. Use a value > 1 just when you test. @pytest.mark.parametrize("tensor_parallel_size", [1]) -# Due to low-precision numerical divergence, this test is too sensitive for -# the async postprocessor -@pytest.mark.parametrize("disable_async_output_proc", [True]) def test_models( vllm_runner, example_prompts, @@ -49,7 +46,6 @@ def test_models( enforce_eager: bool, backend: str, tensor_parallel_size: int, - disable_async_output_proc: bool, monkeypatch: pytest.MonkeyPatch, ) -> None: """ @@ -74,7 +70,6 @@ def test_models( tensor_parallel_size=tensor_parallel_size, enforce_eager=enforce_eager, kv_cache_dtype="auto", - disable_async_output_proc=disable_async_output_proc, ) as vllm_model: baseline_outputs = vllm_model.generate_greedy_logprobs( example_prompts, max_tokens, NUM_LOG_PROBS) @@ -85,7 +80,6 @@ def test_models( tensor_parallel_size=tensor_parallel_size, enforce_eager=enforce_eager, kv_cache_dtype=kv_cache_dtype, - disable_async_output_proc=disable_async_output_proc, ) as vllm_model: test_outputs = vllm_model.generate_greedy_logprobs( example_prompts, max_tokens, NUM_LOG_PROBS) @@ -110,9 +104,6 @@ def test_models( ]) # Due to low-precision numerical divergence, we only test logprob of 4 tokens @pytest.mark.parametrize("max_tokens", [4]) -# Due to low-precision numerical divergence, this test is too sensitive for -# the async postprocessor -@pytest.mark.parametrize("disable_async_output_proc", [True]) def test_cpu_models( vllm_runner, example_prompts, @@ -120,7 +111,6 @@ def test_cpu_models( base_model: str, test_model: str, max_tokens: int, - disable_async_output_proc: bool, monkeypatch: pytest.MonkeyPatch, ) -> None: """ @@ -138,7 +128,6 @@ def test_cpu_models( max_model_len=MAX_MODEL_LEN, dtype="bfloat16", kv_cache_dtype="auto", - disable_async_output_proc=disable_async_output_proc, ) as vllm_model: baseline_outputs = vllm_model.generate_greedy_logprobs( example_prompts, max_tokens, NUM_LOG_PROBS) @@ -148,7 +137,6 @@ def test_cpu_models( max_model_len=MAX_MODEL_LEN, dtype="bfloat16", kv_cache_dtype=kv_cache_dtype, - disable_async_output_proc=disable_async_output_proc, ) as vllm_model: test_outputs = vllm_model.generate_greedy_logprobs( example_prompts, max_tokens, NUM_LOG_PROBS) diff --git a/tests/models/test_initialization.py b/tests/models/test_initialization.py index 9281579b71e7..b9601114a318 100644 --- a/tests/models/test_initialization.py +++ b/tests/models/test_initialization.py @@ -7,7 +7,6 @@ import pytest from vllm import LLM -from vllm.engine.llm_engine import LLMEngine as V0LLMEngine from vllm.utils import GiB_bytes from vllm.v1.core.kv_cache_utils import get_kv_cache_configs from vllm.v1.engine.core import EngineCore as V1EngineCore @@ -61,10 +60,6 @@ def can_initialize(model_arch: str, monkeypatch: pytest.MonkeyPatch, False)) # Avoid calling model.forward() - def _initialize_kv_caches_v0(self) -> None: - self.cache_config.num_gpu_blocks = 0 - self.cache_config.num_cpu_blocks = 0 - def _initialize_kv_caches_v1(self, vllm_config): kv_cache_specs = self.model_executor.get_kv_cache_specs() scheduler_kv_cache_config = get_kv_cache_configs( @@ -76,12 +71,12 @@ def _initialize_kv_caches_v1(self, vllm_config): # gpu_blocks (> 0), cpu_blocks, scheduler_kv_cache_config return 1, 0, scheduler_kv_cache_config - with (patch.object(V0LLMEngine, "_initialize_kv_caches", - _initialize_kv_caches_v0), - patch.object(V1EngineCore, "_initialize_kv_caches", + with (patch.object(V1EngineCore, "_initialize_kv_caches", _initialize_kv_caches_v1), monkeypatch.context() as m): if model_info.v0_only: - m.setenv("VLLM_USE_V1", "0") + # NOTE(woosuk): skip the test for V0-only models + return + if model_arch in ("Phi4FlashForCausalLM", "MotifForCausalLM"): # Phi4FlashForCausalLM and MotifForCausalLM # only supports DIFFERENTIAL_FLASH_ATTN backend diff --git a/tests/models/test_oot_registration.py b/tests/models/test_oot_registration.py index 4aa7bb729789..cb30d77c4f0e 100644 --- a/tests/models/test_oot_registration.py +++ b/tests/models/test_oot_registration.py @@ -42,6 +42,7 @@ def test_oot_registration_text_generation( assert rest == "" +@pytest.mark.skip(reason="This test is skipped because it failed on V1.") @create_new_process_for_each_test() def test_oot_registration_embedding( monkeypatch: pytest.MonkeyPatch, diff --git a/tests/plugins_tests/test_platform_plugins.py b/tests/plugins_tests/test_platform_plugins.py index 6e2089ea2e0e..1d7e4475011d 100644 --- a/tests/plugins_tests/test_platform_plugins.py +++ b/tests/plugins_tests/test_platform_plugins.py @@ -7,15 +7,6 @@ from vllm.plugins import load_general_plugins -@pytest.fixture(scope="function", autouse=True) -def use_v0_only(monkeypatch): - """ - Since this module is V0 only, set VLLM_USE_V1=0 for - all tests in the module. - """ - monkeypatch.setenv('VLLM_USE_V1', '0') - - def test_platform_plugins(): # simulate workload by running an example import runpy diff --git a/tests/plugins_tests/test_scheduler_plugins.py b/tests/plugins_tests/test_scheduler_plugins.py index 8c2121610868..099869a82ad2 100644 --- a/tests/plugins_tests/test_scheduler_plugins.py +++ b/tests/plugins_tests/test_scheduler_plugins.py @@ -3,47 +3,18 @@ import pytest -from vllm.core.scheduler import Scheduler from vllm.engine.arg_utils import EngineArgs -from vllm.engine.llm_engine import LLMEngine from vllm.sampling_params import SamplingParams -from vllm.v1.core.sched.scheduler import Scheduler as V1Scheduler -from vllm.v1.engine.llm_engine import LLMEngine as V1LLMEngine +from vllm.v1.core.sched.scheduler import Scheduler +from vllm.v1.engine.llm_engine import LLMEngine -class DummyV0Scheduler(Scheduler): - - def schedule(self): - raise Exception("Exception raised by DummyV0Scheduler") - - -class DummyV1Scheduler(V1Scheduler): +class DummyV1Scheduler(Scheduler): def schedule(self): raise Exception("Exception raised by DummyV1Scheduler") -def test_scheduler_plugins_v0(monkeypatch: pytest.MonkeyPatch): - with monkeypatch.context() as m: - m.setenv("VLLM_USE_V1", "0") - with pytest.raises(Exception) as exception_info: - - engine_args = EngineArgs( - model="facebook/opt-125m", - enforce_eager=True, # reduce test time - scheduler_cls=DummyV0Scheduler, - ) - - engine = LLMEngine.from_engine_args(engine_args=engine_args) - - sampling_params = SamplingParams(max_tokens=1) - engine.add_request("0", "foo", sampling_params) - engine.step() - - assert str( - exception_info.value) == "Exception raised by DummyV0Scheduler" - - def test_scheduler_plugins_v1(monkeypatch: pytest.MonkeyPatch): with monkeypatch.context() as m: m.setenv("VLLM_USE_V1", "1") @@ -59,7 +30,7 @@ def test_scheduler_plugins_v1(monkeypatch: pytest.MonkeyPatch): scheduler_cls=DummyV1Scheduler, ) - engine = V1LLMEngine.from_engine_args(engine_args=engine_args) + engine = LLMEngine.from_engine_args(engine_args=engine_args) sampling_params = SamplingParams(max_tokens=1) engine.add_request("0", "foo", sampling_params) diff --git a/tests/samplers/test_beam_search.py b/tests/samplers/test_beam_search.py index 0320a5ef31a6..2960ffcbd9ea 100644 --- a/tests/samplers/test_beam_search.py +++ b/tests/samplers/test_beam_search.py @@ -10,13 +10,6 @@ from vllm.assets.audio import AudioAsset - -@pytest.fixture(autouse=True) -def v1(run_with_both_engines): - """We can run both engines for this test.""" - pass - - # FIXME(zhuohan): The test can not pass if we: # 1. Increase max_tokens to 256. # 2. Increase beam_width to 8. diff --git a/tests/samplers/test_ignore_eos.py b/tests/samplers/test_ignore_eos.py index ea4a17dd2306..1d77d37a5d58 100644 --- a/tests/samplers/test_ignore_eos.py +++ b/tests/samplers/test_ignore_eos.py @@ -9,13 +9,6 @@ from vllm import SamplingParams - -@pytest.fixture(autouse=True) -def v1(run_with_both_engines): - """We can run both engines for this test.""" - pass - - # We also test with llama because it has generation_config to specify EOS # (past regression). MODELS = ["distilbert/distilgpt2", "meta-llama/Llama-3.2-1B"] diff --git a/tests/samplers/test_ranks.py b/tests/samplers/test_ranks.py index 86fc14dc85f8..220a4a53f467 100644 --- a/tests/samplers/test_ranks.py +++ b/tests/samplers/test_ranks.py @@ -8,12 +8,6 @@ MODELS = ["distilbert/distilgpt2"] -@pytest.fixture(autouse=True) -def v1(run_with_both_engines): - """We can run both engines for this test.""" - pass - - @pytest.mark.parametrize("model", MODELS) @pytest.mark.parametrize("dtype", ["half"]) def test_ranks( diff --git a/tests/tokenization/test_detokenize.py b/tests/tokenization/test_detokenize.py index 15ea55afe963..bd2b91073d56 100644 --- a/tests/tokenization/test_detokenize.py +++ b/tests/tokenization/test_detokenize.py @@ -352,58 +352,3 @@ def test_decode_prompt_logprobs(complete_sequence: str, logprobs[token_id + 1].decoded_token for token_id, logprobs in zip(token_ids[1:], decoded_prompt_logprobs) ]) - - -@pytest.mark.parametrize("model", ["facebook/opt-125m"]) -@pytest.mark.parametrize("chunked_prefill_token_size", [1, 4, 7, 16, -1]) -def test_decode_prompt_logprobs_chunked_prefill( - vllm_runner, - model, - chunked_prefill_token_size: int, - example_prompts, - monkeypatch, -): - # VLLM V1 does not use incremental detokenization for - # prompt logprobs, so this test strategy is irrelevant. - monkeypatch.setenv("VLLM_USE_V1", "0") - - max_num_seqs = 256 - enable_chunked_prefill = False - max_num_batched_tokens = None - if chunked_prefill_token_size != -1: - enable_chunked_prefill = True - max_num_seqs = min(chunked_prefill_token_size, max_num_seqs) - max_num_batched_tokens = chunked_prefill_token_size - - with vllm_runner(model, - dtype="half", - max_logprobs=5, - gpu_memory_utilization=0.5, - enable_chunked_prefill=enable_chunked_prefill, - max_num_batched_tokens=max_num_batched_tokens, - max_num_seqs=max_num_seqs) as vllm_model: - - vllm_sampling_params = SamplingParams(max_tokens=10, - logprobs=5, - prompt_logprobs=5, - temperature=0.0) - vllm_results = vllm_model.llm.generate( - example_prompts, sampling_params=vllm_sampling_params) - - for idx, result in enumerate(vllm_results): - assert result.prompt_logprobs is not None - assert result.prompt_logprobs[0] is None - - # Compared detokenized prompts ids to original prompt. - generated_string = "" - for (prompt_token, - prompt_logprobs) in zip(result.prompt_token_ids[1:], - result.prompt_logprobs[1:]): - # prompt_logprobs is a dict of the token_id: logprob - # We select the token_id corresponding to the actual prompt - # Decoded token in the detokenized string corresponding to this - # prompt token. - generated_string += prompt_logprobs[prompt_token].decoded_token - - assert generated_string == example_prompts[idx], ( - "Detokenized prompt logprobs do not match original prompt") diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index 8912ff8bad42..242fcf501bfc 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -1508,14 +1508,6 @@ def _is_v1_supported_oracle(self, model_config: ModelConfig) -> bool: recommend_to_remove=True) return False - if self.kv_cache_dtype != "auto": - supported = current_platform.is_kv_cache_dtype_supported( - self.kv_cache_dtype, model_config) - if not supported: - _raise_or_fallback(feature_name="--kv-cache-dtype", - recommend_to_remove=False) - return False - # No Mamba or Encoder-Decoder so far. if not model_config.is_v1_compatible: _raise_or_fallback(feature_name=model_config.architectures, diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py index 014bc56bc8ec..a0fe38eb320d 100644 --- a/vllm/engine/llm_engine.py +++ b/vllm/engine/llm_engine.py @@ -1,1835 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -import time -from collections import Counter as collectionsCounter -from collections import deque -from contextlib import contextmanager -from dataclasses import dataclass -from functools import partial -from typing import (TYPE_CHECKING, Any, Callable, ClassVar, Deque, Dict, - Iterable, List, Literal, Mapping, NamedTuple, Optional) -from typing import Sequence as GenericSequence -from typing import Set, Type, Union, cast +from vllm.v1.engine.llm_engine import LLMEngine as V1LLMEngine -import torch -import torch.nn as nn -from typing_extensions import TypeVar - -import vllm.envs as envs -from vllm.config import (LoRAConfig, ModelConfig, ObservabilityConfig, - ParallelConfig, SchedulerConfig, VllmConfig) -from vllm.core.scheduler import ScheduledSequenceGroup, SchedulerOutputs -from vllm.engine.arg_utils import EngineArgs -from vllm.engine.metrics_types import StatLoggerBase, Stats -from vllm.engine.output_processor.interfaces import ( - SequenceGroupOutputProcessor) -from vllm.engine.output_processor.stop_checker import StopChecker -from vllm.entrypoints.openai.logits_processors import ( - get_logits_processors as get_openai_logits_processors) -from vllm.executor.executor_base import ExecutorBase -from vllm.inputs import ProcessorInputs, PromptType, SingletonInputs -from vllm.inputs.parse import split_enc_dec_inputs -from vllm.inputs.preprocess import InputPreprocessor -from vllm.logger import init_logger -from vllm.logits_process import get_bad_words_logits_processors -from vllm.lora.request import LoRARequest -from vllm.model_executor.layers.sampler import SamplerOutput -from vllm.multimodal import MULTIMODAL_REGISTRY, MultiModalRegistry -from vllm.multimodal.cache import processor_only_cache_from_config -from vllm.multimodal.processing import EncDecMultiModalProcessor -from vllm.outputs import (PoolingRequestOutput, RequestOutput, - RequestOutputFactory) -from vllm.reasoning import ReasoningParser, ReasoningParserManager -from vllm.sampling_params import RequestOutputKind, SamplingParams -from vllm.sequence import (ExecuteModelRequest, ParallelSampleSequenceGroup, - Sequence, SequenceGroup, SequenceGroupBase, - SequenceGroupMetadata, SequenceGroupOutput, - SequenceStatus) -from vllm.tracing import (SpanAttributes, SpanKind, extract_trace_context, - init_tracer) -from vllm.transformers_utils.detokenizer import Detokenizer -from vllm.transformers_utils.tokenizer import (AnyTokenizer, - init_tokenizer_from_configs) -from vllm.usage.usage_lib import (UsageContext, is_usage_stats_enabled, - usage_message) -from vllm.utils import Counter, Device, resolve_obj_by_qualname, weak_bind -from vllm.version import __version__ as VLLM_VERSION -from vllm.worker.model_runner_base import InputProcessingError -from vllm.worker.worker_base import WorkerBase - -logger = init_logger(__name__) -_LOCAL_LOGGING_INTERVAL_SEC = 5 - -_O = TypeVar("_O", RequestOutput, PoolingRequestOutput) -_R = TypeVar("_R", default=Any) - - -@dataclass -class SchedulerOutputState: - """Caches the scheduler outputs for a virtual engine. Used for Multi-Step""" - seq_group_metadata_list: Optional[List[SequenceGroupMetadata]] = None - scheduler_outputs: Optional[SchedulerOutputs] = None - allow_async_output_proc: bool = False - last_output: Optional[SamplerOutput] = None - - -class OutputData(NamedTuple): - outputs: List[SamplerOutput] - seq_group_metadata_list: List[SequenceGroupMetadata] - scheduler_outputs: SchedulerOutputs - is_async: bool - is_last_step: bool - # Indicates if this output is from the first step of the - # multi-step. When multi-step is disabled, this is always - # set to True. - # is_first_step_output is invalid when `outputs` has - # outputs from multiple steps. - is_first_step_output: Optional[bool] - skip: List[int] - - -class SchedulerContext: - - def __init__(self) -> None: - self.output_queue: Deque[OutputData] = deque() - self.request_outputs: List[RequestOutput] = [] - self.seq_group_metadata_list: Optional[ - List[SequenceGroupMetadata]] = None - self.scheduler_outputs: Optional[SchedulerOutputs] = None - - def append_output(self, outputs: List[SamplerOutput], - seq_group_metadata_list: List[SequenceGroupMetadata], - scheduler_outputs: SchedulerOutputs, is_async: bool, - is_last_step: bool, - is_first_step_output: Optional[bool]): - self.output_queue.append( - OutputData(outputs=outputs, - seq_group_metadata_list=seq_group_metadata_list, - scheduler_outputs=scheduler_outputs, - is_async=is_async, - is_last_step=is_last_step, - is_first_step_output=is_first_step_output, - skip=[])) - - -class LLMEngine: - """An LLM engine that receives requests and generates texts. - - This is the main class for the vLLM engine. It receives requests - from clients and generates texts from the LLM. It includes a tokenizer, a - language model (possibly distributed across multiple GPUs), and GPU memory - space allocated for intermediate states (aka KV cache). This class utilizes - iteration-level scheduling and efficient memory management to maximize the - serving throughput. - - The [`LLM`][vllm.LLM] class wraps this class for offline batched inference - and the [`AsyncLLMEngine`][vllm.engine.async_llm_engine.AsyncLLMEngine] - class wraps this class for online serving. - - The config arguments are derived from [`EngineArgs`][vllm.EngineArgs]. - - Args: - vllm_config: The configuration for initializing and running vLLM. - executor_class: The model executor class for managing distributed - execution. - log_stats: Whether to log statistics. - usage_context: Specified entry point, used for usage info collection. - """ - - DO_VALIDATE_OUTPUT: ClassVar[bool] = False - """A flag to toggle whether to validate the type of request output.""" - - @classmethod - @contextmanager - def enable_output_validation(cls): - cls.DO_VALIDATE_OUTPUT = True - - yield - - cls.DO_VALIDATE_OUTPUT = False - - @classmethod - def validate_output( - cls, - output: object, - output_type: Type[_O], - ) -> _O: - do_validate = cls.DO_VALIDATE_OUTPUT - - if ((TYPE_CHECKING or do_validate) - and not isinstance(output, output_type)): - raise TypeError(f"Expected output of type {output_type}, " - f"but found type {type(output)}") - - return cast(_O, output) - - @classmethod - def validate_outputs( - cls, - outputs: GenericSequence[object], - output_type: Type[_O], - ) -> List[_O]: - do_validate = cls.DO_VALIDATE_OUTPUT - - outputs_: List[_O] - if TYPE_CHECKING or do_validate: - outputs_ = [] - for output in outputs: - if not isinstance(output, output_type): - raise TypeError(f"Expected output of type {output_type}, " - f"but found type {type(output)}") - - outputs_.append(output) - else: - outputs_ = outputs - - return outputs_ - - tokenizer: Optional[AnyTokenizer] - - def __init__( - self, - vllm_config: VllmConfig, - executor_class: Type[ExecutorBase], - log_stats: bool, - usage_context: UsageContext = UsageContext.ENGINE_CONTEXT, - stat_loggers: Optional[Dict[str, StatLoggerBase]] = None, - mm_registry: MultiModalRegistry = MULTIMODAL_REGISTRY, - use_cached_outputs: bool = False, - ) -> None: - if envs.VLLM_USE_V1: - raise ValueError( - "Using V0 LLMEngine, but envs.VLLM_USE_V1=True. " - "This should not happen. As a workaround, try using " - "LLMEngine.from_vllm_config(...) or explicitly set " - "VLLM_USE_V1=0 or 1 and report this issue on Github.") - - self.vllm_config = vllm_config - self.model_config = vllm_config.model_config - self.cache_config = vllm_config.cache_config - self.lora_config = vllm_config.lora_config - self.parallel_config = vllm_config.parallel_config - self.scheduler_config = vllm_config.scheduler_config - self.device_config = vllm_config.device_config - self.speculative_config = vllm_config.speculative_config # noqa - self.load_config = vllm_config.load_config - self.structured_outputs_config = vllm_config.structured_outputs_config - self.observability_config = vllm_config.observability_config or ObservabilityConfig( # noqa - ) - - logger.info( - "Initializing a V0 LLM engine (v%s) with config: %s, " - "use_cached_outputs=%s, ", - VLLM_VERSION, - vllm_config, - use_cached_outputs, - ) - - self.log_stats = log_stats - self.use_cached_outputs = use_cached_outputs - - if self.model_config.skip_tokenizer_init: - self.tokenizer = None - self.detokenizer = None - else: - self.tokenizer = self._init_tokenizer() - self.detokenizer = Detokenizer(self.tokenizer) - - self.seq_counter = Counter() - self.generation_config_fields = ( - self.model_config.try_get_generation_config()) - - self.input_preprocessor = InputPreprocessor( - self.model_config, - self.tokenizer, - mm_registry, - mm_processor_cache=processor_only_cache_from_config( - self.model_config, mm_registry), - ) - - self.model_executor = executor_class(vllm_config=vllm_config) - - self._initialize_kv_caches() - - # If usage stat is enabled, collect relevant info. - if is_usage_stats_enabled(): - from vllm.model_executor.model_loader import ( - get_architecture_class_name) - usage_message.report_usage( - get_architecture_class_name(self.model_config), - usage_context, - extra_kvs={ - # Common configuration - "dtype": - str(self.model_config.dtype), - "tensor_parallel_size": - self.parallel_config.tensor_parallel_size, - "block_size": - self.cache_config.block_size, - "gpu_memory_utilization": - self.cache_config.gpu_memory_utilization, - "kv_cache_memory_bytes": - self.cache_config.kv_cache_memory_bytes, - # Quantization - "quantization": - self.model_config.quantization, - "kv_cache_dtype": - str(self.cache_config.cache_dtype), - - # Feature flags - "enable_lora": - bool(self.lora_config), - "enable_prefix_caching": - self.cache_config.enable_prefix_caching, - "enforce_eager": - self.model_config.enforce_eager, - "disable_custom_all_reduce": - self.parallel_config.disable_custom_all_reduce, - }) - - self.cached_scheduler_outputs = [ - SchedulerOutputState() - for _ in range(self.parallel_config.pipeline_parallel_size) - ] - - self.scheduler_contexts = [ - SchedulerContext() - for _ in range(self.parallel_config.pipeline_parallel_size) - ] - - if self.model_config.use_async_output_proc: - process_model_outputs = weak_bind(self._process_model_outputs) - - self.async_callbacks = [ - partial(process_model_outputs, - ctx=self.scheduler_contexts[v_id]) - for v_id in range(self.parallel_config.pipeline_parallel_size) - ] - else: - self.async_callbacks = [] - - # Currently used by AsyncLLMEngine to ensure quick append - # of request outputs to asyncio queues - self.process_request_outputs_callback: Optional[Callable] = None - - # Create the scheduler. - # NOTE: the cache_config here have been updated with the numbers of - # GPU and CPU blocks, which are profiled in the distributed executor. - if isinstance(self.vllm_config.scheduler_config.scheduler_cls, str): - Scheduler = resolve_obj_by_qualname( - self.vllm_config.scheduler_config.scheduler_cls) - else: - Scheduler = self.vllm_config.scheduler_config.scheduler_cls - self.scheduler = [ - Scheduler( - self.scheduler_config, self.cache_config, self.lora_config, - self.parallel_config.pipeline_parallel_size, - self.async_callbacks[v_id] - if self.model_config.use_async_output_proc else None) - for v_id in range(self.parallel_config.pipeline_parallel_size) - ] - - # Metric Logging. - if self.log_stats: - if stat_loggers is not None: - self.stat_loggers = stat_loggers - else: - # Lazy import for prometheus multiprocessing. - # We need to set PROMETHEUS_MULTIPROC_DIR environment variable - # before prometheus_client is imported. - # See https://prometheus.github.io/client_python/multiprocess/ - from vllm.engine.metrics import (LoggingStatLogger, - PrometheusStatLogger) - - self.stat_loggers = { - "logging": - LoggingStatLogger( - local_interval=_LOCAL_LOGGING_INTERVAL_SEC, - vllm_config=vllm_config), - "prometheus": - PrometheusStatLogger( - local_interval=_LOCAL_LOGGING_INTERVAL_SEC, - labels=dict( - model_name=self.model_config.served_model_name), - vllm_config=vllm_config), - } - self.stat_loggers["prometheus"].info("cache_config", - self.cache_config) - - self.tracer = None - if self.observability_config.otlp_traces_endpoint: - self.tracer = init_tracer( - "vllm.llm_engine", - self.observability_config.otlp_traces_endpoint) - - # Initialize reasoning parser if reasoning backend is set. - if self.structured_outputs_config.reasoning_parser and self.tokenizer: - reasoner_class = ReasoningParserManager.get_reasoning_parser( - self.structured_outputs_config.reasoning_parser) - self.reasoner: ReasoningParser = reasoner_class( - self.tokenizer.get_lora_tokenizer()) - - # Create sequence output processor, e.g. for beam search or - # speculative decoding. - self.output_processor = ( - SequenceGroupOutputProcessor.create_output_processor( - self.scheduler_config, - self.detokenizer, - self.scheduler, - self.seq_counter, - stop_checker=StopChecker( - self.scheduler_config.max_model_len, - self.reasoner - if self.structured_outputs_config.reasoning_parser - and self.tokenizer else None, - ), - )) - - self.seq_id_to_seq_group: Dict[str, SequenceGroupBase] = {} - - # Flag to set when an input fails to process and the engine should run - # the next step without re-scheduling. - self._skip_scheduling_next_step = False - - # Don't keep the dummy data in memory - self.reset_mm_cache() - - def _initialize_kv_caches(self) -> None: - """Initialize the KV cache in the worker(s). - - The workers will determine the number of blocks in both the GPU cache - and the swap CPU cache. - """ - start = time.time() - num_gpu_blocks, num_cpu_blocks = ( - self.model_executor.determine_num_available_blocks()) - - if self.cache_config.num_gpu_blocks_override is not None: - num_gpu_blocks_override = self.cache_config.num_gpu_blocks_override - logger.info( - "Overriding num_gpu_blocks=%d with " - "num_gpu_blocks_override=%d", num_gpu_blocks, - num_gpu_blocks_override) - num_gpu_blocks = num_gpu_blocks_override - - self.cache_config.num_gpu_blocks = num_gpu_blocks - self.cache_config.num_cpu_blocks = num_cpu_blocks - - self.model_executor.initialize_cache(num_gpu_blocks, num_cpu_blocks) - elapsed = time.time() - start - logger.info(("init engine (profile, create kv cache, " - "warmup model) took %.2f seconds"), elapsed) - - @classmethod - def _get_executor_cls(cls, - engine_config: VllmConfig) -> Type[ExecutorBase]: - # distributed_executor_backend must be set in VllmConfig.__post_init__ - distributed_executor_backend = ( - engine_config.parallel_config.distributed_executor_backend) - # Initialize the cluster and specify the executor class. - if isinstance(distributed_executor_backend, type): - if not issubclass(distributed_executor_backend, ExecutorBase): - raise TypeError( - "distributed_executor_backend must be a subclass of " - f"ExecutorBase. Got {distributed_executor_backend}.") - executor_class = distributed_executor_backend - elif distributed_executor_backend == "ray": - from vllm.executor.ray_distributed_executor import ( - RayDistributedExecutor) - executor_class = RayDistributedExecutor - elif distributed_executor_backend == "mp": - from vllm.executor.mp_distributed_executor import ( - MultiprocessingDistributedExecutor) - assert not envs.VLLM_USE_RAY_SPMD_WORKER, ( - "multiprocessing distributed executor backend does not " - "support VLLM_USE_RAY_SPMD_WORKER=1") - executor_class = MultiprocessingDistributedExecutor - elif distributed_executor_backend == "uni": - # JAX-style, single-process, multi-device executor. - from vllm.executor.uniproc_executor import UniProcExecutor - executor_class = UniProcExecutor - elif distributed_executor_backend == "external_launcher": - # executor with external launcher - from vllm.executor.uniproc_executor import ( # noqa - ExecutorWithExternalLauncher) - executor_class = ExecutorWithExternalLauncher - else: - raise ValueError("unrecognized distributed_executor_backend: " - f"{distributed_executor_backend}") - return executor_class - - @classmethod - def from_vllm_config( - cls, - vllm_config: VllmConfig, - usage_context: UsageContext = UsageContext.ENGINE_CONTEXT, - stat_loggers: Optional[Dict[str, StatLoggerBase]] = None, - disable_log_stats: bool = False, - ) -> "LLMEngine": - return cls( - vllm_config=vllm_config, - executor_class=cls._get_executor_cls(vllm_config), - log_stats=(not disable_log_stats), - usage_context=usage_context, - stat_loggers=stat_loggers, - ) - - @classmethod - def from_engine_args( - cls, - engine_args: EngineArgs, - usage_context: UsageContext = UsageContext.ENGINE_CONTEXT, - stat_loggers: Optional[Dict[str, StatLoggerBase]] = None, - ) -> "LLMEngine": - """Creates an LLM engine from the engine arguments.""" - # Create the engine configs. - vllm_config = engine_args.create_engine_config(usage_context) - - engine_cls = cls - if envs.VLLM_USE_V1: - from vllm.v1.engine.llm_engine import LLMEngine as V1LLMEngine - engine_cls = V1LLMEngine - - return engine_cls.from_vllm_config( - vllm_config=vllm_config, - usage_context=usage_context, - stat_loggers=stat_loggers, - disable_log_stats=engine_args.disable_log_stats, - ) - - def __reduce__(self): - # This is to ensure that the LLMEngine is not referenced in - # the closure used to initialize Ray worker actors - raise RuntimeError("LLMEngine should not be pickled!") - - def __del__(self): - # Shutdown model executor when engine is garbage collected - # Use getattr since __init__ can fail before the field is set - if model_executor := getattr(self, "model_executor", None): - model_executor.shutdown() - - def get_tokenizer(self) -> AnyTokenizer: - if self.tokenizer is None: - raise ValueError("Unable to get tokenizer because " - "skip_tokenizer_init is True") - - return self.tokenizer - - def _init_tokenizer(self) -> AnyTokenizer: - return init_tokenizer_from_configs(model_config=self.model_config) - - def _verify_args(self) -> None: - self.model_config.verify_with_parallel_config(self.parallel_config) - self.cache_config.verify_with_parallel_config(self.parallel_config) - if self.lora_config: - self.lora_config.verify_with_model_config(self.model_config) - self.lora_config.verify_with_scheduler_config( - self.scheduler_config) - - def _add_processed_request( - self, - request_id: str, - processed_inputs: ProcessorInputs, - params: SamplingParams, - arrival_time: float, - lora_request: Optional[LoRARequest], - trace_headers: Optional[Mapping[str, str]] = None, - priority: int = 0, - ) -> Optional[SequenceGroup]: - """Add a processed request to the engine's request pool. - return the created sequence group. - """ - if isinstance(params, SamplingParams) and params.n > 1: - ParallelSampleSequenceGroup.add_request( - request_id, - self, - params, - processed_inputs=processed_inputs, - arrival_time=arrival_time, - lora_request=lora_request, - trace_headers=trace_headers, - priority=priority, - ) - return None - - self._validate_model_inputs(processed_inputs) - # Create the sequences. - block_size = self.cache_config.block_size - seq_id = next(self.seq_counter) - eos_token_id = self.input_preprocessor.get_eos_token_id() - - encoder_inputs, decoder_inputs = split_enc_dec_inputs(processed_inputs) - - seq = Sequence(seq_id, decoder_inputs, block_size, eos_token_id, - lora_request) - - encoder_seq = (None if encoder_inputs is None else Sequence( - seq_id, encoder_inputs, block_size, eos_token_id, lora_request)) - - # Create a SequenceGroup based on SamplingParams - if isinstance(params, SamplingParams): - seq_group = self._create_sequence_group_with_sampling( - request_id, - seq, - params, - arrival_time=arrival_time, - lora_request=lora_request, - trace_headers=trace_headers, - encoder_seq=encoder_seq, - priority=priority) - else: - raise ValueError("SamplingParams must be provided.") - - # Add the sequence group to the scheduler with least unfinished seqs. - costs = [ - scheduler.get_num_unfinished_seq_groups() - for scheduler in self.scheduler - ] - min_cost_scheduler = self.scheduler[costs.index(min(costs))] - min_cost_scheduler.add_seq_group(seq_group) - - return seq_group - - def stop_remote_worker_execution_loop(self) -> None: - self.model_executor.stop_remote_worker_execution_loop() - - def add_request( - self, - request_id: str, - prompt: PromptType, - params: SamplingParams, - arrival_time: Optional[float] = None, - lora_request: Optional[LoRARequest] = None, - tokenization_kwargs: Optional[dict[str, Any]] = None, - trace_headers: Optional[Mapping[str, str]] = None, - priority: int = 0, - ) -> None: - """Add a request to the engine's request pool. - - The request is added to the request pool and will be processed by the - scheduler as `engine.step()` is called. The exact scheduling policy is - determined by the scheduler. - - Args: - request_id: The unique ID of the request. - prompt: The prompt to the LLM. See - [PromptType][vllm.inputs.PromptType] - for more details about the format of each input. - params: Parameters for sampling. - [SamplingParams][vllm.SamplingParams] for text generation. - arrival_time: The arrival time of the request. If None, we use - the current monotonic time. - lora_request: The LoRA request to add. - trace_headers: OpenTelemetry trace headers. - priority: The priority of the request. - Only applicable with priority scheduling. - - Details: - - Set arrival_time to the current time if it is None. - - Set prompt_token_ids to the encoded prompt if it is None. - - Create `n` number of [Sequence][vllm.sequence.Sequence] objects. - - Create a [SequenceGroup][vllm.sequence.SequenceGroup] object - from the list of [Sequence][vllm.sequence.Sequence]. - - Add the [SequenceGroup][vllm.sequence.SequenceGroup] object to the - scheduler. - - Example: - >>> # initialize engine - >>> engine = LLMEngine.from_engine_args(engine_args) - >>> # set request arguments - >>> example_prompt = "Who is the president of the United States?" - >>> sampling_params = SamplingParams(temperature=0.0) - >>> request_id = 0 - >>> - >>> # add the request to the engine - >>> engine.add_request( - >>> str(request_id), - >>> example_prompt, - >>> SamplingParams(temperature=0.0)) - >>> # continue the request processing - >>> ... - """ - if not isinstance(request_id, str): - raise TypeError( - f"request_id must be a string, got {type(request_id)}") - - if lora_request is not None and not self.lora_config: - raise ValueError(f"Got lora_request {lora_request} but LoRA is " - "not enabled!") - - if priority != 0 and not self.scheduler_config.policy == "priority": - raise ValueError(f"Got priority {priority} but " - "Priority scheduling is not enabled.") - - if isinstance(params, SamplingParams) \ - and params.logits_processors: - raise ValueError( - "Logits processors are not supported in multi-step decoding") - - if arrival_time is None: - arrival_time = time.time() - - if (isinstance(prompt, dict) - and prompt.get("prompt_embeds", None) is not None): - if not prompt.get("prompt_token_ids", None): - seq_len = prompt["prompt_embeds"].shape[0] - prompt["prompt_token_ids"] = [0] * seq_len - if params.prompt_logprobs is not None: - raise ValueError( - "prompt_logprobs is not compatible with prompt embeds.") - - processed_inputs = self.input_preprocessor.preprocess( - prompt, - tokenization_kwargs=tokenization_kwargs, - ) - - self._add_processed_request( - request_id=request_id, - processed_inputs=processed_inputs, - params=params, - arrival_time=arrival_time, - lora_request=lora_request, - trace_headers=trace_headers, - priority=priority, - ) - - def _create_sequence_group_with_sampling( - self, - request_id: str, - seq: Sequence, - sampling_params: SamplingParams, - arrival_time: float, - lora_request: Optional[LoRARequest], - trace_headers: Optional[Mapping[str, str]] = None, - encoder_seq: Optional[Sequence] = None, - priority: int = 0, - ) -> SequenceGroup: - """Creates a SequenceGroup with SamplingParams.""" - max_logprobs = self.get_model_config().max_logprobs - if (sampling_params.logprobs - and sampling_params.logprobs > max_logprobs) or ( - sampling_params.prompt_logprobs - and sampling_params.prompt_logprobs > max_logprobs): - raise ValueError(f"Cannot request more than " - f"{max_logprobs} logprobs.") - - sampling_params = self._build_logits_processors( - sampling_params, lora_request) - - # Defensive copy of SamplingParams, which are used by the sampler, - # this doesn't deep-copy LogitsProcessor objects - sampling_params = sampling_params.clone() - - sampling_params.update_from_generation_config( - self.generation_config_fields, seq.eos_token_id) - - # Create the sequence group. - draft_size = 1 - if self.vllm_config.speculative_config is not None: - draft_size = \ - self.vllm_config.speculative_config.num_speculative_tokens + 1 - seq_group = SequenceGroup(request_id=request_id, - seqs=[seq], - arrival_time=arrival_time, - sampling_params=sampling_params, - lora_request=lora_request, - trace_headers=trace_headers, - encoder_seq=encoder_seq, - priority=priority, - draft_size=draft_size) - - return seq_group - - def abort_request(self, request_id: Union[str, Iterable[str]]) -> None: - """Aborts a request(s) with the given ID. - - Args: - request_id: The ID(s) of the request to abort. - - Details: - - Refer to [vllm.core.scheduler.Scheduler.abort_seq_group][]. - - Example: - >>> # initialize engine and add a request with request_id - >>> request_id = str(0) - >>> # abort the request - >>> engine.abort_request(request_id) - """ - for scheduler in self.scheduler: - scheduler.abort_seq_group( - request_id, seq_id_to_seq_group=self.seq_id_to_seq_group) - - def get_vllm_config(self) -> VllmConfig: - """Gets the vllm configuration.""" - return self.vllm_config - - def get_model_config(self) -> ModelConfig: - """Gets the model configuration.""" - return self.model_config - - def get_parallel_config(self) -> ParallelConfig: - """Gets the parallel configuration.""" - return self.parallel_config - - def get_scheduler_config(self) -> SchedulerConfig: - """Gets the scheduler configuration.""" - return self.scheduler_config - - def get_lora_config(self) -> LoRAConfig: - """Gets the LoRA configuration.""" - return self.lora_config - - def get_num_unfinished_requests(self) -> int: - """Gets the number of unfinished requests.""" - return sum(scheduler.get_num_unfinished_seq_groups() - for scheduler in self.scheduler) - - def has_unfinished_requests(self) -> bool: - """Returns True if there are unfinished requests.""" - return any(scheduler.has_unfinished_seqs() - for scheduler in self.scheduler) - - def has_unfinished_requests_for_virtual_engine( - self, virtual_engine: int) -> bool: - """ - Returns True if there are unfinished requests for the virtual engine. - """ - return self.scheduler[virtual_engine].has_unfinished_seqs() - - def reset_mm_cache(self) -> bool: - """Reset the multi-modal cache.""" - self.input_preprocessor.clear_cache() - return True - - def reset_prefix_cache(self, device: Optional[Device] = None) -> bool: - """Reset prefix cache for all devices.""" - - success = True - for scheduler in self.scheduler: - success = success and scheduler.reset_prefix_cache(device) - return success - - def _process_model_outputs(self, - ctx: SchedulerContext, - request_id: Optional[str] = None) -> None: - """Apply the model output to the sequences in the scheduled seq groups - and return responses. - - ctx: The virtual engine context to work on - request_id: If provided, then only this request is going to be processed - """ - - now = time.time() - - if len(ctx.output_queue) == 0: - return None - - # Get pending async postprocessor - if request_id: - # When we process only one request, no pop is required - # (since later we will process all of the rest) - (outputs, seq_group_metadata_list, scheduler_outputs, is_async, - is_last_step, is_first_step_output, skip) = ctx.output_queue[0] - else: - (outputs, seq_group_metadata_list, scheduler_outputs, is_async, - is_last_step, is_first_step_output, - skip) = ctx.output_queue.popleft() - - # Sanity check - assert len(seq_group_metadata_list) == len( - scheduler_outputs.scheduled_seq_groups) - - has_multiple_outputs: bool = len(outputs) > 1 - outputs_by_sequence_group: List[List[SequenceGroupOutput]] - assert not has_multiple_outputs - outputs_by_sequence_group = outputs - - # Determine the requests we need to operate on - if request_id: - indices = [] - for i, seq_group_meta in enumerate(seq_group_metadata_list): - if seq_group_meta.request_id == request_id: - assert i not in skip # Cannot be called twice - indices.append(i) - break - - # If the request_id was not found, then it means that - # this is a new request that has no pending async - # postprocessor - if not indices: - return - else: - indices = range(len(seq_group_metadata_list)) # type: ignore - - finished_before: List[int] = [] - finished_now: List[int] = [] - for i in indices: - if i in skip: - continue - - seq_group_meta = seq_group_metadata_list[i] - scheduled_seq_group = scheduler_outputs.scheduled_seq_groups[i] - - seq_group: SequenceGroup = scheduled_seq_group.seq_group - - if seq_group.is_finished(): - finished_before.append(i) - continue - - output: List[SequenceGroupOutput] - if has_multiple_outputs: - output = outputs_by_sequence_group[i] - else: - output = [outputs_by_sequence_group[0][i]] - - if not is_async: - seq_group.update_num_computed_tokens( - seq_group_meta.token_chunk_size or 0) - - if outputs: - for o in outputs: - if (isinstance(o, SamplerOutput) - and seq_group.metrics is not None): - if seq_group.metrics.model_forward_time is not None: - seq_group.metrics.model_forward_time += ( - o.model_forward_time or 0) - else: - seq_group.metrics.model_forward_time = ( - o.model_forward_time) - if seq_group.metrics.model_execute_time is not None: - seq_group.metrics.model_execute_time += ( - o.model_execute_time or 0) - else: - seq_group.metrics.model_execute_time = ( - o.model_execute_time) - - self.output_processor.process_prompt_logprob(seq_group, output) - if seq_group_meta.do_sample: - self.output_processor.process_outputs(seq_group, output, - is_async) - - if seq_group.is_finished(): - finished_now.append(i) - - # Generate outputs for the requests that finished this iteration - for i in finished_now: - scheduled_seq_group = scheduler_outputs.scheduled_seq_groups[i] - - seq_group = scheduled_seq_group.seq_group - seq_group.maybe_set_first_token_time(now) - if not seq_group.is_prefill(): - seq_group.set_last_token_time(now) - request_output = RequestOutputFactory.create( - seq_group, - self.seq_id_to_seq_group, - use_cache=self.use_cached_outputs) - if request_output: - ctx.request_outputs.append(request_output) - - # When we process a single request, we skip it for the next time, - # and invoke the request output callback (if there was final output) - if request_id: - assert len(indices) == 1 - skip.append(indices[0]) - - if (finished_now - and self.process_request_outputs_callback is not None): - self.process_request_outputs_callback(ctx.request_outputs) - ctx.request_outputs.clear() - return - - # Free currently finished requests - if finished_now: - for scheduler in self.scheduler: - scheduler.free_finished_seq_groups() - - # Create the outputs - for i in indices: - if i in skip or i in finished_before or i in finished_now: - continue # Avoids double processing - - scheduled_seq_group = scheduler_outputs.scheduled_seq_groups[i] - - seq_group = scheduled_seq_group.seq_group - seq_group.maybe_set_first_token_time(now) - if not seq_group.is_prefill(): - seq_group.set_last_token_time(now) - request_output = RequestOutputFactory.create( - seq_group, - self.seq_id_to_seq_group, - use_cache=self.use_cached_outputs) - if request_output: - ctx.request_outputs.append(request_output) - - # Create outputs only after processing the scheduler's results - - for seq_group in scheduler_outputs.ignored_seq_groups: - params = seq_group.sampling_params - if params is not None and params.output_kind == ( - RequestOutputKind.DELTA) and not seq_group.is_finished(): - continue - - request_output = RequestOutputFactory.create( - seq_group, - self.seq_id_to_seq_group, - use_cache=self.use_cached_outputs, - ) - if request_output: - ctx.request_outputs.append(request_output) - - # Immediately process request outputs here (if callback is given) - if (ctx.request_outputs - and self.process_request_outputs_callback is not None): - self.process_request_outputs_callback(ctx.request_outputs) - ctx.request_outputs.clear() - - # For async case, we need to record the stats here. - # For non-async case, the stats are done in the - # LLMEngine/AsyncLLMEngine directly - if is_async: - # Log stats. - self.do_log_stats(scheduler_outputs, outputs, finished_before, - skip) - - # Tracing - self.do_tracing(scheduler_outputs, finished_before) - - return None - - def _advance_to_next_step( - self, output: SamplerOutput, - seq_group_metadata_list: List[SequenceGroupMetadata], - scheduled_seq_groups: List[ScheduledSequenceGroup]) -> None: - """Given model output from a single run, append the tokens to the - sequences. This is normally done inside output processor, but it is - required if the worker is to perform async forward pass to next step. - """ - for seq_group_metadata, sequence_group_outputs, scheduled_seq_group in \ - zip(seq_group_metadata_list, output, scheduled_seq_groups): - seq_group = scheduled_seq_group.seq_group - - if seq_group.is_finished(): - continue - - token_chunk_size = (seq_group_metadata.token_chunk_size - if seq_group_metadata.token_chunk_size - is not None else 0) - seq_group.update_num_computed_tokens(token_chunk_size) - - if seq_group_metadata.do_sample: - assert len(sequence_group_outputs.samples) == 1, ( - "Async output processor expects a single sample" - " (i.e sampling_params.n == 1)") - sample = sequence_group_outputs.samples[0] - - assert len(seq_group.seqs) == 1 - seq = seq_group.seqs[0] - - seq.append_token_id(sample.output_token, sample.logprobs, - sample.output_embed) - - def step(self) -> List[RequestOutput]: - """Performs one decoding iteration and returns newly generated results. - -
- ![Overview of the step function](https://i.imgur.com/sv2HssD.png) -
Overview of the step function
-
- - Details: - - Step 1: Schedules the sequences to be executed in the next - iteration and the token blocks to be swapped in/out/copy. - - - Depending on the scheduling policy, - sequences may be `preempted/reordered`. - - A Sequence Group (SG) refer to a group of sequences - that are generated from the same prompt. - - - Step 2: Calls the distributed executor to execute the model. - - Step 3: Processes the model output. This mainly includes: - - - Decodes the relevant outputs. - - Updates the scheduled sequence groups with model outputs - based on its `sampling parameters` (`use_beam_search` or not). - - Frees the finished sequence groups. - - - Finally, it creates and returns the newly generated results. - - Example: - ``` - # Please see the example/ folder for more detailed examples. - - # initialize engine and request arguments - engine = LLMEngine.from_engine_args(engine_args) - example_inputs = [(0, "What is LLM?", - SamplingParams(temperature=0.0))] - - # Start the engine with an event loop - while True: - if example_inputs: - req_id, prompt, sampling_params = example_inputs.pop(0) - engine.add_request(str(req_id),prompt,sampling_params) - - # continue the request processing - request_outputs = engine.step() - for request_output in request_outputs: - if request_output.finished: - # return or show the request output - - if not (engine.has_unfinished_requests() or example_inputs): - break - ``` - """ - if self.parallel_config.pipeline_parallel_size > 1: - raise NotImplementedError( - "Pipeline parallelism is only supported through AsyncLLMEngine " - "as performance will be severely degraded otherwise.") - - # For llm_engine, there is no pipeline parallel support, so the engine - # used is always 0. - virtual_engine = 0 - - # These are cached outputs from previous iterations. None if on first - # iteration - cached_outputs = self.cached_scheduler_outputs[virtual_engine] - seq_group_metadata_list = cached_outputs.seq_group_metadata_list - scheduler_outputs = cached_outputs.scheduler_outputs - allow_async_output_proc = cached_outputs.allow_async_output_proc - - ctx = self.scheduler_contexts[virtual_engine] - - # Clear outputs for each new scheduler iteration - ctx.request_outputs.clear() - - # Skip the scheduler if there are any remaining steps in the seq groups. - # This ensures that the scheduler is only called again when the current - # batch has completed. - # The scheduler is also skipped if a single request caused the last - # engine step to fail, and the previous schedule needs to be rerun. - if not self._has_remaining_steps( - seq_group_metadata_list - ) and not self._skip_scheduling_next_step: - # Schedule iteration - (seq_group_metadata_list, scheduler_outputs, - allow_async_output_proc - ) = self.scheduler[virtual_engine].schedule() - - ctx.seq_group_metadata_list = seq_group_metadata_list - ctx.scheduler_outputs = scheduler_outputs - - finished_requests_ids = self.scheduler[ - virtual_engine].get_and_reset_finished_requests_ids() - # When n>1, elements in self.seq_id_to_seq_group should be deleted - # here, otherwise memory leaks. - for finished_request_id in finished_requests_ids: - if finished_request_id in self.seq_id_to_seq_group: - del self.seq_id_to_seq_group[finished_request_id] - - # Maybe switch from async mode to sync mode - if not allow_async_output_proc and len(ctx.output_queue) > 0: - self._process_model_outputs(ctx=ctx) - - else: - finished_requests_ids = list() - - assert seq_group_metadata_list is not None - assert scheduler_outputs is not None - - if not scheduler_outputs.is_empty(): - - # Check if we have a cached last_output from the previous iteration. - # For supporting PP this is probably the best way to pass the - # sampled_token_ids, as a separate broadcast over all the PP stages - # will cause one virtual engine's microbatch to block the pipeline. - last_sampled_token_ids = \ - self._get_last_sampled_token_ids(virtual_engine) - - execute_model_req = ExecuteModelRequest( - seq_group_metadata_list=seq_group_metadata_list, - blocks_to_swap_in=scheduler_outputs.blocks_to_swap_in, - blocks_to_swap_out=scheduler_outputs.blocks_to_swap_out, - blocks_to_copy=scheduler_outputs.blocks_to_copy, - num_lookahead_slots=scheduler_outputs.num_lookahead_slots, - running_queue_size=scheduler_outputs.running_queue_size, - finished_requests_ids=finished_requests_ids, - # We use ExecuteModelRequest to pass the last sampled_token_ids - # to each of the non-last PP stages for in-place prepare_input. - last_sampled_token_ids=last_sampled_token_ids) - - if allow_async_output_proc: - execute_model_req.async_callback = self.async_callbacks[ - virtual_engine] - - try: - outputs = self.model_executor.execute_model( - execute_model_req=execute_model_req) - self._skip_scheduling_next_step = False - except InputProcessingError as e: - # The input for this request cannot be processed, so we must - # abort it. If there are remaining requests in the batch that - # have been scheduled, they will be retried on the next step. - invalid_request_id = e.request_id - self._abort_and_cache_schedule( - request_id=invalid_request_id, - virtual_engine=virtual_engine, - seq_group_metadata_list=seq_group_metadata_list, - scheduler_outputs=scheduler_outputs, - allow_async_output_proc=allow_async_output_proc) - # Raise so the caller is notified that this request failed - raise - - else: - # Nothing scheduled => If there is pending async postprocessor, - # then finish it here. - if len(ctx.output_queue) > 0: - self._process_model_outputs(ctx=ctx) - # No outputs in this case - outputs = [] - - if not self._has_remaining_steps(seq_group_metadata_list): - # is_first_step_output is True only when the num_steps of all - # the sequences are 1. - is_first_step_output: bool = False if not seq_group_metadata_list \ - else seq_group_metadata_list[0].state.num_steps == 1 - - # Add results to the output_queue - ctx.append_output(outputs=outputs, - seq_group_metadata_list=seq_group_metadata_list, - scheduler_outputs=scheduler_outputs, - is_async=allow_async_output_proc, - is_last_step=True, - is_first_step_output=is_first_step_output) - - if outputs and allow_async_output_proc: - assert len(outputs) == 1, ( - "Async postprocessor expects only a single output set") - - self._advance_to_next_step( - outputs[0], seq_group_metadata_list, - scheduler_outputs.scheduled_seq_groups) - - # Check if need to run the usual non-async path - if not allow_async_output_proc: - self._process_model_outputs(ctx=ctx) - - # Log stats. - self.do_log_stats(scheduler_outputs, outputs) - - # Tracing - self.do_tracing(scheduler_outputs) - else: - # Multi-step case - return ctx.request_outputs - - if not self.has_unfinished_requests(): - # Drain async postprocessor (if exists) - if len(ctx.output_queue) > 0: - self._process_model_outputs(ctx=ctx) - assert len(ctx.output_queue) == 0 - - # Stop the execute model loop in parallel workers until there are - # more requests to process. This avoids waiting indefinitely in - # torch.distributed ops which may otherwise time out, and unblocks - # the RPC thread in the workers so that they can process any other - # queued control plane messages, such as add/remove lora adapters. - logger.debug("Stopping remote worker execution loop.") - self.model_executor.stop_remote_worker_execution_loop() - - return ctx.request_outputs - - def _abort_and_cache_schedule( - self, request_id: str, virtual_engine: int, - seq_group_metadata_list: List[SequenceGroupMetadata], - scheduler_outputs: SchedulerOutputs, - allow_async_output_proc: bool) -> None: - """Aborts a single request, and caches the scheduler outputs minus that - request. This allows the next step to continue processing the remaining - requests without having to re-run the scheduler.""" - - # Abort the request and remove its sequence group from the current - # schedule - self.abort_request(request_id) - for i, metadata in enumerate(seq_group_metadata_list): - if metadata.request_id == request_id: - del seq_group_metadata_list[i] - break - for i, group in enumerate(scheduler_outputs.scheduled_seq_groups): - if group.seq_group.request_id == request_id: - del scheduler_outputs.scheduled_seq_groups[i] - break - - # If there are still other sequence groups left in the schedule, cache - # them and flag the engine to reuse the schedule. - if len(seq_group_metadata_list) > 0: - self._skip_scheduling_next_step = True - # Reuse multi-step caching logic - self._cache_scheduler_outputs_for_multi_step( - virtual_engine=virtual_engine, - scheduler_outputs=scheduler_outputs, - seq_group_metadata_list=seq_group_metadata_list, - allow_async_output_proc=allow_async_output_proc) - - def _has_remaining_steps( - self, seq_group_metadata_list: Optional[List[SequenceGroupMetadata]] - ) -> bool: - return False - - def _cache_scheduler_outputs_for_multi_step( - self, virtual_engine: int, - seq_group_metadata_list: Optional[List[SequenceGroupMetadata]], - scheduler_outputs: SchedulerOutputs, - allow_async_output_proc: bool) -> None: - co = self.cached_scheduler_outputs[virtual_engine] - - co.seq_group_metadata_list = seq_group_metadata_list - co.scheduler_outputs = scheduler_outputs - co.allow_async_output_proc = allow_async_output_proc - co.last_output = None - - def _update_cached_scheduler_output( - self, virtual_engine: int, - output: List[Optional[SamplerOutput]]) -> None: - if (self.parallel_config.pipeline_parallel_size > 1 and len(output) > 0 - and output[0] is not None): - last_output = output[-1] - assert last_output is not None - assert last_output.sampled_token_ids_cpu is not None - assert last_output.sampled_token_ids is None - assert last_output.sampled_token_probs is None - self.cached_scheduler_outputs[ - virtual_engine].last_output = last_output - - def _get_last_sampled_token_ids( - self, virtual_engine: int) -> Optional[torch.Tensor]: - return None - - def add_logger(self, logger_name: str, logger: StatLoggerBase) -> None: - if not self.log_stats: - raise RuntimeError( - "Stat logging is disabled. Set `disable_log_stats=False` " - "argument to enable.") - if logger_name in self.stat_loggers: - raise KeyError(f"Logger with name {logger_name} already exists.") - self.stat_loggers[logger_name] = logger - - def remove_logger(self, logger_name: str) -> None: - if not self.log_stats: - raise RuntimeError( - "Stat logging is disabled. Set `disable_log_stats=False` " - "argument to enable.") - if logger_name not in self.stat_loggers: - raise KeyError(f"Logger with name {logger_name} does not exist.") - del self.stat_loggers[logger_name] - - def do_log_stats(self, - scheduler_outputs: Optional[SchedulerOutputs] = None, - model_output: Optional[List[SamplerOutput]] = None, - finished_before: Optional[List[int]] = None, - skip: Optional[List[int]] = None) -> None: - """Forced log when no requests active.""" - if self.log_stats: - stats = self._get_stats(scheduler_outputs, model_output, - finished_before, skip) - for logger in self.stat_loggers.values(): - logger.log(stats) - - def _get_stats(self, - scheduler_outputs: Optional[SchedulerOutputs], - model_output: Optional[List[SamplerOutput]] = None, - finished_before: Optional[List[int]] = None, - skip: Optional[List[int]] = None) -> Stats: - """Get Stats to be Logged to Prometheus. - - Args: - scheduler_outputs: Optional, used to populate metrics related to - the scheduled batch, - model_output: Optional, used to emit speculative decoding metrics - which are created by the workers. - finished_before: Optional, indices of sequences that were finished - before. These sequences will be ignored. - skip: Optional, indices of sequences that were preempted. These - sequences will be ignored. - """ - now = time.time() - - # System State - # Scheduler State - num_running_sys = sum( - len(scheduler.running) for scheduler in self.scheduler) - num_swapped_sys = sum( - len(scheduler.swapped) for scheduler in self.scheduler) - num_waiting_sys = sum( - len(scheduler.waiting) for scheduler in self.scheduler) - - # KV Cache Usage in % - num_total_gpu = self.cache_config.num_gpu_blocks - gpu_cache_usage_sys = 0. - if num_total_gpu: # Guard against both None and 0 - num_free_gpu = sum( - scheduler.block_manager.get_num_free_gpu_blocks() - for scheduler in self.scheduler) - gpu_cache_usage_sys = 1.0 - (num_free_gpu / num_total_gpu) - - num_total_cpu = self.cache_config.num_cpu_blocks - cpu_cache_usage_sys = 0. - if num_total_cpu: # Guard against both None and 0 - num_free_cpu = sum( - scheduler.block_manager.get_num_free_cpu_blocks() - for scheduler in self.scheduler) - cpu_cache_usage_sys = 1.0 - (num_free_cpu / num_total_cpu) - - # Prefix Cache Hit Rate. Note that we always use - # the cache hit rate of the first virtual engine. - cpu_prefix_cache_hit_rate = self.scheduler[ - 0].get_prefix_cache_hit_rate(Device.CPU) - gpu_prefix_cache_hit_rate = self.scheduler[ - 0].get_prefix_cache_hit_rate(Device.GPU) - - # Exchange the uasge and cache hit stats between gpu and cpu when - # running on cpu because the cpu_worker.py intentionally reports the - # number of cpu blocks as gpu blocks in favor of cache management. - if self.device_config.device_type == "cpu": - num_total_gpu, num_total_cpu = num_total_cpu, num_total_gpu - gpu_cache_usage_sys, cpu_cache_usage_sys = ( - cpu_cache_usage_sys, - gpu_cache_usage_sys, - ) - gpu_prefix_cache_hit_rate, cpu_prefix_cache_hit_rate = ( - cpu_prefix_cache_hit_rate, - gpu_prefix_cache_hit_rate, - ) - - # Iteration stats - num_prompt_tokens_iter = 0 - num_generation_tokens_iter = 0 - num_tokens_iter = 0 - time_to_first_tokens_iter: List[float] = [] - inter_token_latencies_iter: List[float] = [] - num_preemption_iter = (0 if scheduler_outputs is None else - scheduler_outputs.preempted) - - # Request stats - # Latency - time_e2e_requests: List[float] = [] - time_queue_requests: List[float] = [] - time_inference_requests: List[float] = [] - time_prefill_requests: List[float] = [] - time_decode_requests: List[float] = [] - # Metadata - num_prompt_tokens_requests: List[int] = [] - num_generation_tokens_requests: List[int] = [] - n_requests: List[int] = [] - max_num_generation_tokens_requests: List[int] = [] - max_tokens_requests: List[int] = [] - finished_reason_requests: List[str] = [] - - # LoRA requests - running_lora_adapters = dict( - collectionsCounter([ - running_request.lora_request.lora_name - for scheduler in self.scheduler - for running_request in scheduler.running - if running_request.lora_request - ])) - waiting_lora_adapters = dict( - collectionsCounter([ - waiting_request.lora_request.lora_name - for scheduler in self.scheduler - for waiting_request in scheduler.waiting - if waiting_request.lora_request - ])) - max_lora_stat = "0" - if self.lora_config: - max_lora_stat = str(self.lora_config.max_loras) - - # NOTE: This loop assumes prefill seq_groups are before - # decode seq_groups in scheduled_seq_groups. - if scheduler_outputs is not None: - # For async postprocessor, already finished sequences need to be - # not counted (to avoid double counting) - actual_num_batched_tokens = scheduler_outputs.num_batched_tokens # type: ignore - - num_generation_tokens_from_prefill_groups = 0 - # NOTE: if scheduler_outputs.num_prefill_groups > 0 and - # the len of scheduler_outputs.scheduled_seq_groups is != - # scheduler_outputs.num_prefill_groups, this means that - # chunked prefills have been detected. - - for idx, scheduled_seq_group in enumerate( - scheduler_outputs.scheduled_seq_groups): - # Skip double logging when using async output proc - if finished_before and idx in finished_before: - actual_num_batched_tokens -= 1 - continue - - # Currently, skip == preempted sequences, so we need to skip - # their log stats - if skip and idx in skip: - continue - - group_was_prefill = idx < scheduler_outputs.num_prefill_groups - seq_group = scheduled_seq_group.seq_group - - # NOTE: a seq_group that completed all of its prefill tokens - # in the last iteration will have seq_group.is_prefill() = False - # with group_was_prefill = True - if group_was_prefill: - # Number of prompt tokens. - num_prompt_tokens_iter += ( - scheduled_seq_group.token_chunk_size) - - # If the seq_group just finished the prefill state - # get TTFT. - if not seq_group.is_prefill(): - latency = seq_group.get_last_token_latency() - time_to_first_tokens_iter.append(latency) - - # One generation token per finished prefill. - num_generation_tokens_from_prefill_groups += ( - seq_group.num_seqs()) - else: - # ITLs - latency = seq_group.get_last_token_latency() - inter_token_latencies_iter.append(latency) - if seq_group.state.current_step == 0: - # For async_output_proc, the do_log_stats() - # is called following init_multi_step(), which - # sets the current_step to zero. - actual_num_batched_tokens +=\ - seq_group.state.num_steps - 1 - else: - actual_num_batched_tokens +=\ - seq_group.state.current_step - 1 - - # Because of chunked prefill, we can have a single sequence - # group that does multiple prompt_runs. To prevent logging - # the same metadata more than once per request, we standardize - # on logging request level information for finished requests, - # which can only happen once. - if seq_group.is_finished(): - # Latency timings - time_e2e_requests.append(now - - seq_group.metrics.arrival_time) - if (seq_group.metrics.first_scheduled_time is not None and - seq_group.metrics.first_token_time is not None): - time_queue_requests.append( - seq_group.metrics.first_scheduled_time - - seq_group.metrics.arrival_time) - time_prefill_requests.append( - seq_group.metrics.first_token_time - - seq_group.metrics.first_scheduled_time) - time_decode_requests.append( - now - seq_group.metrics.first_token_time) - time_inference_requests.append( - now - seq_group.metrics.first_scheduled_time) - # Metadata - num_prompt_tokens_requests.append( - len(seq_group.prompt_token_ids)) - num_generation_tokens_requests.extend([ - seq.get_output_len() - for seq in seq_group.get_finished_seqs() - ]) - max_num_generation_tokens_requests.append( - max(seq.get_output_len() - for seq in seq_group.get_seqs())) - if seq_group.sampling_params is not None: - n_requests.append(seq_group.sampling_params.n) - max_tokens_requests.append( - seq_group.sampling_params.max_tokens) - finished_reason_requests.extend([ - SequenceStatus.get_finished_reason(seq.status) - for seq in seq_group.get_finished_seqs() - ]) - - # Number of generation tokens. - # num_batched_tokens equals the number of prompt_tokens plus the - # number of decode_tokens in a single iteration. So, - # num_generation_tokens = num_batched_tokens - num_prompt_tokens - # + num_generation_tokens_from_prefill_groups (since we generate - # one token on prefills on iters where the prefill finishes). - num_generation_tokens_iter = ( - actual_num_batched_tokens - num_prompt_tokens_iter + - num_generation_tokens_from_prefill_groups) - num_tokens_iter = (num_generation_tokens_iter + - num_prompt_tokens_iter) - - return Stats( - now=now, - # System stats - # Scheduler State - num_running_sys=num_running_sys, - num_swapped_sys=num_swapped_sys, - num_waiting_sys=num_waiting_sys, - # KV Cache Usage in % - gpu_cache_usage_sys=gpu_cache_usage_sys, - cpu_cache_usage_sys=cpu_cache_usage_sys, - # Prefix Cache Hit Rate - cpu_prefix_cache_hit_rate=cpu_prefix_cache_hit_rate, - gpu_prefix_cache_hit_rate=gpu_prefix_cache_hit_rate, - - # Iteration stats - num_prompt_tokens_iter=num_prompt_tokens_iter, - num_generation_tokens_iter=num_generation_tokens_iter, - num_tokens_iter=num_tokens_iter, - time_to_first_tokens_iter=time_to_first_tokens_iter, - inter_token_latencies_iter=inter_token_latencies_iter, - num_preemption_iter=num_preemption_iter, - - # Request stats - # Latency - time_e2e_requests=time_e2e_requests, - time_queue_requests=time_queue_requests, - time_inference_requests=time_inference_requests, - time_prefill_requests=time_prefill_requests, - time_decode_requests=time_decode_requests, - # Metadata - num_prompt_tokens_requests=num_prompt_tokens_requests, - num_generation_tokens_requests=num_generation_tokens_requests, - max_num_generation_tokens_requests= - max_num_generation_tokens_requests, - n_requests=n_requests, - max_tokens_requests=max_tokens_requests, - finished_reason_requests=finished_reason_requests, - max_lora=str(max_lora_stat), - waiting_lora_adapters=list(waiting_lora_adapters.keys()), - running_lora_adapters=list(running_lora_adapters.keys())) - - def add_lora(self, lora_request: LoRARequest) -> bool: - return self.model_executor.add_lora(lora_request) - - def remove_lora(self, lora_id: int) -> bool: - return self.model_executor.remove_lora(lora_id) - - def list_loras(self) -> Set[int]: - return self.model_executor.list_loras() - - def pin_lora(self, lora_id: int) -> bool: - return self.model_executor.pin_lora(lora_id) - - def start_profile(self) -> None: - self.model_executor.start_profile() - - def stop_profile(self) -> None: - self.model_executor.stop_profile() - - def sleep(self, level: int = 1) -> None: - assert self.vllm_config.model_config.enable_sleep_mode, ( - "Sleep mode is not enabled in the model config") - self.model_executor.sleep(level=level) - - def wake_up(self, tags: Optional[list[str]] = None) -> None: - assert self.vllm_config.model_config.enable_sleep_mode, ( - "Sleep mode is not enabled in the model config") - self.model_executor.wake_up(tags) - - def is_sleeping(self) -> bool: - return self.model_executor.is_sleeping - - def check_health(self) -> None: - self.model_executor.check_health() - - def is_tracing_enabled(self) -> bool: - return self.tracer is not None - - def do_tracing(self, - scheduler_outputs: SchedulerOutputs, - finished_before: Optional[List[int]] = None) -> None: - if self.tracer is None: - return - - for idx, scheduled_seq_group in enumerate( - scheduler_outputs.scheduled_seq_groups): - # Skip double tracing when using async output proc - if finished_before and idx in finished_before: - continue - - seq_group = scheduled_seq_group.seq_group - if seq_group.is_finished(): - self.create_trace_span(seq_group) - - def create_trace_span(self, seq_group: SequenceGroup) -> None: - if self.tracer is None or seq_group.sampling_params is None: - return - arrival_time_nano_seconds = int(seq_group.metrics.arrival_time * 1e9) - - trace_context = extract_trace_context(seq_group.trace_headers) - - with self.tracer.start_as_current_span( - "llm_request", - kind=SpanKind.SERVER, - context=trace_context, - start_time=arrival_time_nano_seconds) as seq_span: - metrics = seq_group.metrics - - # Handle potential None values for cancelled/aborted requests - ttft = (metrics.first_token_time - metrics.arrival_time - if metrics.first_token_time is not None else None) - - e2e_time = (metrics.finished_time - metrics.arrival_time - if metrics.finished_time is not None else None) - - seq_span.set_attribute(SpanAttributes.GEN_AI_RESPONSE_MODEL, - self.model_config.model) - seq_span.set_attribute(SpanAttributes.GEN_AI_REQUEST_ID, - seq_group.request_id) - seq_span.set_attribute(SpanAttributes.GEN_AI_REQUEST_TEMPERATURE, - seq_group.sampling_params.temperature) - seq_span.set_attribute(SpanAttributes.GEN_AI_REQUEST_TOP_P, - seq_group.sampling_params.top_p) - seq_span.set_attribute(SpanAttributes.GEN_AI_REQUEST_MAX_TOKENS, - seq_group.sampling_params.max_tokens) - seq_span.set_attribute(SpanAttributes.GEN_AI_REQUEST_N, - seq_group.sampling_params.n) - seq_span.set_attribute(SpanAttributes.GEN_AI_USAGE_NUM_SEQUENCES, - seq_group.num_seqs()) - seq_span.set_attribute(SpanAttributes.GEN_AI_USAGE_PROMPT_TOKENS, - len(seq_group.prompt_token_ids)) - seq_span.set_attribute( - SpanAttributes.GEN_AI_USAGE_COMPLETION_TOKENS, - sum([ - seq.get_output_len() - for seq in seq_group.get_finished_seqs() - ])) - - # Only set timing attributes if the values are available - if metrics.time_in_queue is not None: - seq_span.set_attribute( - SpanAttributes.GEN_AI_LATENCY_TIME_IN_QUEUE, - metrics.time_in_queue) - if ttft is not None: - seq_span.set_attribute( - SpanAttributes.GEN_AI_LATENCY_TIME_TO_FIRST_TOKEN, ttft) - if e2e_time is not None: - seq_span.set_attribute(SpanAttributes.GEN_AI_LATENCY_E2E, - e2e_time) - if metrics.scheduler_time is not None: - seq_span.set_attribute( - SpanAttributes.GEN_AI_LATENCY_TIME_IN_SCHEDULER, - metrics.scheduler_time) - if metrics.model_forward_time is not None: - seq_span.set_attribute( - SpanAttributes.GEN_AI_LATENCY_TIME_IN_MODEL_FORWARD, - metrics.model_forward_time / 1000.0) - if metrics.model_execute_time is not None: - seq_span.set_attribute( - SpanAttributes.GEN_AI_LATENCY_TIME_IN_MODEL_EXECUTE, - metrics.model_execute_time) - - def _validate_model_inputs(self, inputs: ProcessorInputs): - encoder_inputs, decoder_inputs = split_enc_dec_inputs(inputs) - - if encoder_inputs is not None: - self._validate_model_input(encoder_inputs, prompt_type="encoder") - - self._validate_model_input(decoder_inputs, prompt_type="decoder") - - def _validate_model_input( - self, - prompt_inputs: SingletonInputs, - *, - prompt_type: Literal["encoder", "decoder"], - ): - model_config = self.model_config - tokenizer = self.tokenizer - - prompt_ids = prompt_inputs.get("prompt_token_ids", []) - if not prompt_ids: - if prompt_type == "encoder" and model_config.is_multimodal_model: - pass # Mllama may have empty encoder inputs for text-only data - elif prompt_inputs["type"] == "embeds": - pass - else: - raise ValueError(f"The {prompt_type} prompt cannot be empty") - - if tokenizer is not None: - max_input_id = max(prompt_ids, default=0) - if max_input_id > tokenizer.max_token_id: - raise ValueError( - f"Token id {max_input_id} is out of vocabulary") - - max_prompt_len = self.model_config.max_model_len - if len(prompt_ids) > max_prompt_len: - if prompt_type == "encoder" and model_config.is_multimodal_model: - mm_registry = self.input_preprocessor.mm_registry - mm_processor = mm_registry.create_processor( - model_config, - tokenizer=tokenizer or object(), # Dummy if no tokenizer - ) - assert isinstance(mm_processor, EncDecMultiModalProcessor) - - if mm_processor.pad_dummy_encoder_prompt: - return # Skip encoder length check for Whisper - - if model_config.is_multimodal_model: - suggestion = ( - "Make sure that `max_model_len` is no smaller than the " - "number of text tokens plus multimodal tokens. For image " - "inputs, the number of image tokens depends on the number " - "of images, and possibly their aspect ratios as well.") - else: - suggestion = ( - "Make sure that `max_model_len` is no smaller than the " - "number of text tokens.") - - raise ValueError( - f"The {prompt_type} prompt (length {len(prompt_ids)}) is " - f"longer than the maximum model length of {max_prompt_len}. " - f"{suggestion}") - - # TODO: Find out how many placeholder tokens are there so we can - # check that chunked prefill does not truncate them - # max_batch_len = self.scheduler_config.max_num_batched_tokens - - def _build_logits_processors( - self, sampling_params: SamplingParams, - lora_request: Optional[LoRARequest]) -> SamplingParams: - """Constructs logits processors based on the logits_bias, and - allowed_token_ids fields in sampling_params. Deletes those fields and - adds the constructed logits processors to the logits_processors field. - Returns the modified sampling params.""" - - logits_processors = [] - - if (sampling_params.logit_bias or sampling_params.allowed_token_ids): - tokenizer = self.get_tokenizer() - - processors = get_openai_logits_processors( - logit_bias=sampling_params.logit_bias, - allowed_token_ids=sampling_params.allowed_token_ids, - tokenizer=tokenizer) - logits_processors.extend(processors) - - # Unset so these don't get passed down to the model - sampling_params.logit_bias = None - sampling_params.allowed_token_ids = None - - if len(sampling_params.bad_words) > 0: - tokenizer = self.get_tokenizer() - processors = get_bad_words_logits_processors( - bad_words=sampling_params.bad_words, tokenizer=tokenizer) - logits_processors.extend(processors) - - if logits_processors: - if sampling_params.logits_processors is None: - sampling_params.logits_processors = logits_processors - else: - sampling_params.logits_processors.extend(logits_processors) - - return sampling_params - - def collective_rpc(self, - method: Union[str, Callable[[WorkerBase], _R]], - timeout: Optional[float] = None, - args: tuple = (), - kwargs: Optional[dict[str, Any]] = None) -> list[_R]: - return self.model_executor.collective_rpc(method, timeout, args, - kwargs) - - def apply_model(self, func: Callable[[nn.Module], _R]) -> list[_R]: - return self.collective_rpc("apply_model", args=(func, )) - - -if envs.is_set("VLLM_USE_V1") and envs.VLLM_USE_V1: - from vllm.v1.engine.llm_engine import LLMEngine as V1LLMEngine - LLMEngine = V1LLMEngine # type: ignore +LLMEngine = V1LLMEngine # type: ignore diff --git a/vllm/entrypoints/llm.py b/vllm/entrypoints/llm.py index f2282c40f707..0ab806fcb8b5 100644 --- a/vllm/entrypoints/llm.py +++ b/vllm/entrypoints/llm.py @@ -11,7 +11,6 @@ from tqdm.auto import tqdm from typing_extensions import TypeVar -import vllm.envs as envs from vllm.beam_search import (BeamSearchInstance, BeamSearchOutput, BeamSearchSequence, create_sort_beams_key_function) @@ -19,7 +18,6 @@ StructuredOutputsConfig, TokenizerMode, is_init_field) from vllm.engine.arg_utils import (ConvertOption, EngineArgs, HfOverrides, PoolerConfig, RunnerOption) -from vllm.engine.llm_engine import LLMEngine from vllm.entrypoints.chat_utils import (ChatCompletionMessageParam, ChatTemplateContentFormatOption, apply_hf_chat_template, @@ -54,6 +52,7 @@ get_cached_tokenizer) from vllm.usage.usage_lib import UsageContext from vllm.utils import Counter, Device, as_iter, is_list_of +from vllm.v1.engine.llm_engine import LLMEngine from vllm.v1.sample.logits_processor import LogitsProcessor if TYPE_CHECKING: @@ -309,11 +308,7 @@ def __init__( self.request_counter = Counter() self.default_sampling_params: Union[dict[str, Any], None] = None - if envs.VLLM_USE_V1: - supported_tasks = self.llm_engine \ - .get_supported_tasks() # type: ignore - else: - supported_tasks = self.llm_engine.model_config.supported_tasks + supported_tasks = self.llm_engine.get_supported_tasks() # type: ignore logger.info("Supported_tasks: %s", supported_tasks) @@ -1473,8 +1468,6 @@ def get_metrics(self) -> list["Metric"]: Note: This method is only available with the V1 LLM engine. """ - from vllm.v1.engine.llm_engine import LLMEngine as V1LLMEngine - assert isinstance(self.llm_engine, V1LLMEngine) return self.llm_engine.get_metrics() def _validate_and_add_requests( diff --git a/vllm/model_executor/model_loader/tensorizer.py b/vllm/model_executor/model_loader/tensorizer.py index 58296131fadb..13f4eebf1038 100644 --- a/vllm/model_executor/model_loader/tensorizer.py +++ b/vllm/model_executor/model_loader/tensorizer.py @@ -672,21 +672,15 @@ def tensorize_vllm_model(engine_args: "EngineArgs", ) as stream: stream.write(encryption_params.key) - from vllm import LLMEngine - from vllm.v1.engine.llm_engine import LLMEngine as V1LLMEngine - - if not envs.VLLM_USE_V1: - engine = LLMEngine.from_engine_args(engine_args) - engine.model_executor.collective_rpc( - "save_tensorized_model", - kwargs={"tensorizer_config": tensorizer_config.to_serializable()}, - ) - else: - engine = V1LLMEngine.from_vllm_config(engine_config) - engine.collective_rpc( - "save_tensorized_model", - kwargs={"tensorizer_config": tensorizer_config.to_serializable()}, - ) + assert envs.VLLM_USE_V1 + + from vllm.v1.engine.llm_engine import LLMEngine + + engine = LLMEngine.from_vllm_config(engine_config) + engine.collective_rpc( + "save_tensorized_model", + kwargs={"tensorizer_config": tensorizer_config.to_serializable()}, + ) def tensorize_lora_adapter(lora_path: str, From 86647d1cd0f3c82c7d678324db7e925654ac5665 Mon Sep 17 00:00:00 2001 From: Woosuk Kwon Date: Sat, 20 Sep 2025 17:57:20 -0700 Subject: [PATCH 197/518] [V0 Deprecation] Remove V0 Output Processor (#25320) Signed-off-by: Woosuk Kwon Signed-off-by: Woosuk Kwon --- tests/build_cython.py | 39 ----- vllm/engine/output_processor/__init__.py | 0 vllm/engine/output_processor/interfaces.py | 59 -------- vllm/engine/output_processor/single_step.py | 145 ------------------- vllm/engine/output_processor/stop_checker.py | 139 ------------------ vllm/v1/engine/detokenizer.py | 42 +++++- 6 files changed, 40 insertions(+), 384 deletions(-) delete mode 100644 tests/build_cython.py delete mode 100644 vllm/engine/output_processor/__init__.py delete mode 100644 vllm/engine/output_processor/interfaces.py delete mode 100644 vllm/engine/output_processor/single_step.py delete mode 100644 vllm/engine/output_processor/stop_checker.py diff --git a/tests/build_cython.py b/tests/build_cython.py deleted file mode 100644 index 444434e8f0a7..000000000000 --- a/tests/build_cython.py +++ /dev/null @@ -1,39 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project -import Cython.Compiler.Options -from Cython.Build import cythonize -from setuptools import setup - -Cython.Compiler.Options.annotate = True - -infiles = [] - -infiles += [ - "vllm/engine/llm_engine.py", - "vllm/transformers_utils/detokenizer.py", - "vllm/engine/output_processor/single_step.py", - "vllm/outputs.py", - "vllm/engine/output_processor/stop_checker.py", -] - -infiles += [ - "vllm/core/scheduler.py", - "vllm/sequence.py", - "vllm/core/block_manager.py", -] - -infiles += [ - "vllm/model_executor/layers/sampler.py", - "vllm/sampling_params.py", - "vllm/utils/__init__.py", -] - -setup(ext_modules=cythonize(infiles, - annotate=False, - force=True, - compiler_directives={ - 'language_level': "3", - 'infer_types': True - })) - -# example usage: python3 build_cython.py build_ext --inplace diff --git a/vllm/engine/output_processor/__init__.py b/vllm/engine/output_processor/__init__.py deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/vllm/engine/output_processor/interfaces.py b/vllm/engine/output_processor/interfaces.py deleted file mode 100644 index 587a9221e32c..000000000000 --- a/vllm/engine/output_processor/interfaces.py +++ /dev/null @@ -1,59 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project - -from abc import ABC, abstractmethod -from typing import List - -from vllm.config import SchedulerConfig -from vllm.core.scheduler import Scheduler -from vllm.engine.output_processor.stop_checker import StopChecker -from vllm.sequence import SequenceGroup, SequenceGroupOutput -from vllm.transformers_utils.detokenizer import Detokenizer -from vllm.utils import Counter - - -class SequenceGroupOutputProcessor(ABC): - """Interface for logic that processes new token ids in sequence groups, - managing detokenization, stop checking, and freeing/forking sequences with - the scheduler. - - This is highly coupled with the LLMEngine and should be seen as an extension - of it. The logic is separated to simplify the LLMEngine class and allow - separate implementations for single-step decoding (which supports beam - search sequence forking) and multi-step decoding (which does not support - beam search, but does support speculative decoding). - """ - - @staticmethod - def create_output_processor( - scheduler_config: SchedulerConfig, - detokenizer: Detokenizer, - scheduler: List[Scheduler], - seq_counter: Counter, - stop_checker: "StopChecker", - ): - """Create an output processor. - - Multi-step scheduling is no longer supported. Always return a - single-step output processor. - """ - from vllm.engine.output_processor.single_step import ( - SingleStepOutputProcessor) - return SingleStepOutputProcessor(scheduler_config, detokenizer, - scheduler, seq_counter, stop_checker) - - @abstractmethod - def process_outputs(self, sequence_group: SequenceGroup, - outputs: List[SequenceGroupOutput], - is_async: bool) -> None: - """Process new token ids for the sequence group. Handles logic such as - detokenization, stop checking, and freeing/forking sequences in the - scheduler. - """ - pass - - @abstractmethod - def process_prompt_logprob(self, seq_group: SequenceGroup, - outputs: List[SequenceGroupOutput]) -> None: - """Update prompt logprobs received from outputs to seq_group.""" - pass diff --git a/vllm/engine/output_processor/single_step.py b/vllm/engine/output_processor/single_step.py deleted file mode 100644 index dbf6a371d050..000000000000 --- a/vllm/engine/output_processor/single_step.py +++ /dev/null @@ -1,145 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project - -from typing import List - -from vllm.config import SchedulerConfig -from vllm.core.scheduler import Scheduler -from vllm.engine.output_processor.interfaces import ( - SequenceGroupOutputProcessor) -from vllm.engine.output_processor.stop_checker import StopChecker -from vllm.logger import init_logger -from vllm.sequence import (CompletionSequenceGroupOutput, SequenceGroup, - SequenceGroupOutput) -from vllm.transformers_utils.detokenizer import Detokenizer -from vllm.utils import Counter - -logger = init_logger(__name__) - - -def single_step_process_prompt_logprob( - sg_output_proc: SequenceGroupOutputProcessor, seq_group: SequenceGroup, - output: CompletionSequenceGroupOutput) -> None: - """Process prompt logprobs associated with the - [`SequenceGroupOutput`][vllm.sequence.SequenceGroupOutput] for a given step. - - Do nothing if the output has no prompt logprobs. - - Account for the fact that transformers do not compute first-token logprobs. - - Args: - sg_output_proc: - [`SequenceGroupOutputProcessor`][vllm.engine.output_processor.interfaces.SequenceGroupOutputProcessor] - instance - seq_group: the output is associated with this - [`SequenceGroup`][vllm.sequence.SequenceGroup] - output: the [`SequenceGroupOutput`][vllm.sequence.SequenceGroupOutput] - for a single scheduler step - """ - prompt_logprobs = output.prompt_logprobs - - # If this is the first (or only) "chunk" of the prefill, we need - # to prepend None to the list of prompt logprobs. The reason for this - # is that for N prompt tokens, the Sampler will generate N-1 total - # prompt logprobs during prefill since the token at idx 0 will not - # have a logprob associated with it. - if prompt_logprobs is not None: - if not seq_group.prompt_logprobs: - prompt_logprobs = [None] + prompt_logprobs - seq_group.prompt_logprobs = [] - - assert hasattr(sg_output_proc, 'detokenizer') - if (seq_group.sampling_params.detokenize - and sg_output_proc.detokenizer): - sg_output_proc.detokenizer.decode_prompt_logprobs_inplace( - seq_group, - prompt_logprobs, - position_offset=len(seq_group.prompt_logprobs)) - - seq_group.prompt_logprobs.extend(prompt_logprobs) - - -class SingleStepOutputProcessor(SequenceGroupOutputProcessor): - """SequenceGroupOutputProcessor which handles "output processing" logic, - which happens after the model returns generated token ids and before - scheduling of the next batch. Output processing logic includes - detokenization, and determining if a sequence is finished (e.g. via max len - or eos token). - - The SingleStepOutputProcessor is specialized to the case where the model - emits at most a single token per invocation, which precludes configurations - such as speculative decoding or multi-step decoding. This enables beam - search sampling, which requires forking/finishing/freeing sequences in a way - that is currently difficult to schedule multiple steps ahead of time. - """ - - def __init__(self, scheduler_config: SchedulerConfig, - detokenizer: Detokenizer, scheduler: List[Scheduler], - seq_counter: Counter, stop_checker: StopChecker): - self.scheduler_config = scheduler_config - self.detokenizer = detokenizer - self.scheduler = scheduler - self.seq_counter = seq_counter - self.stop_checker = stop_checker - - def process_outputs(self, sequence_group: SequenceGroup, - outputs: List[SequenceGroupOutput], - is_async: bool) -> None: - """Append all new tokens to sequences in the sequence group. Fork any - surviving beam candidates; free any unsurviving ones. - - Invokes detokenizer to detokenize new tokens, and also marks sequences - as finished if they meet stop conditions. - - is_async - Indicates whether this postprocessor runs in - parallel with the GPU forward pass and is processing - tokens from the previous step. If this is true, then - no tokens need to be appended since it is already done - externally (before the next schedule() call) - """ - assert (len(outputs) == 1 - ), f"{type(self)} does not support multiple outputs per step" - return self._process_sequence_group_outputs(sequence_group, outputs[0], - is_async) - - def process_prompt_logprob(self, seq_group: SequenceGroup, - outputs: List[SequenceGroupOutput]) -> None: - """Process prompt logprobs associated with one step of a single-step- - scheduled computation. - - Args: - seq_group: the output is associated with this - [`SequenceGroup`][vllm.sequence.SequenceGroup] - outputs: the - [`SequenceGroupOutput`][vllm.sequence.SequenceGroupOutput] - for a single scheduler step - """ - assert len(outputs) == 1, "Single step should only have 1 output." - output = outputs[0] - assert isinstance(output, CompletionSequenceGroupOutput) - single_step_process_prompt_logprob(self, seq_group, output) - - def _process_sequence_group_outputs(self, seq_group: SequenceGroup, - outputs: SequenceGroupOutput, - is_async: bool) -> None: - sampling_params = seq_group.sampling_params - - sample = outputs.samples[0] - seq = seq_group.first_seq - if not is_async: - seq.append_token_id(sample.output_token, sample.logprobs, - sample.output_embed) - if sampling_params.detokenize and self.detokenizer: - new_char_count = self.detokenizer.decode_sequence_inplace( - seq, sampling_params) - else: - new_char_count = 0 - self.stop_checker.maybe_stop_sequence( - seq, - new_char_count, - sampling_params, - lora_req=seq_group.lora_request, - ) - if seq.is_finished(): - for scheduler in self.scheduler: - scheduler.free_seq(seq) diff --git a/vllm/engine/output_processor/stop_checker.py b/vllm/engine/output_processor/stop_checker.py deleted file mode 100644 index 0916f1c918c8..000000000000 --- a/vllm/engine/output_processor/stop_checker.py +++ /dev/null @@ -1,139 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project - -from typing import List, Optional, Tuple - -from vllm.lora.request import LoRARequest -from vllm.reasoning import ReasoningParser -from vllm.sampling_params import SamplingParams -from vllm.sequence import Sequence, SequenceStatus - - -class StopChecker: - """LLMEngine helper class which separates out the logic involving stop - checking. This checks things such as: whether the eos token was emitted, - whether the max_tokens has been consumed, whether a stop string has been - emitted, or if we have exceeded the max model len. - """ - - def __init__( - self, - max_model_len: int, - reasoner: Optional[ReasoningParser] = None, - ): - # Do not use it directly, but use `self._get_max_model_len`. - self._max_model_len = max_model_len - self.reasoner = reasoner - - def _get_max_model_len(self, lora_req: Optional[LoRARequest]): - if lora_req and lora_req.long_lora_max_len: - return lora_req.long_lora_max_len - else: - return self._max_model_len - - def maybe_stop_sequence( - self, - seq: Sequence, - new_char_count: int, - sampling_params: SamplingParams, - lora_req: Optional[LoRARequest] = None, - ) -> None: - """Stop the finished sequences. - - new_char_count is the number of chars added to the - sequence's output text for the newly generated token - """ - - # Check if the minimum number of tokens has been generated yet; - # skip the stop string/token checks if not - if seq.get_output_len() < sampling_params.min_tokens: - return - - # Check if the sequence has generated the EOS token. - if ((not sampling_params.ignore_eos) - and seq.get_last_token_id() == seq.eos_token_id): - # Remove the last EOS token unless explicitly specified - # This prevents unintended exposure of the EOS token - if new_char_count and ( - not sampling_params.include_stop_str_in_output): - seq.output_text = seq.output_text[:-new_char_count] - seq.status = SequenceStatus.FINISHED_STOPPED - return - - # Skip stop string/token checks if in reasoning content generation - if self.reasoner is not None and \ - not self.reasoner.is_reasoning_end(seq.get_token_ids()): - return - - # Check if a stop token was encountered. - # This assumes a single token produced per step. - last_token_id = seq.get_last_token_id() - if last_token_id in (sampling_params.stop_token_ids or ()): - if new_char_count and ( - not sampling_params.include_stop_str_in_output): - # Remove last token - seq.output_text = seq.output_text[:-new_char_count] - seq.status = SequenceStatus.FINISHED_STOPPED - seq.stop_reason = last_token_id - return - - # Check if any stop strings are matched. - stop = self.check_stop_strings( - seq.output_text, new_char_count, sampling_params.stop, - sampling_params.include_stop_str_in_output) - if stop is not None: - stop_str, truncate_to = stop - if truncate_to != -1: - seq.output_text = seq.output_text[:truncate_to] - seq.status = SequenceStatus.FINISHED_STOPPED - seq.stop_reason = stop_str - return - - # Check if the sequence has reached max_model_len. - if seq.get_len() >= self._get_max_model_len(lora_req): - seq.status = SequenceStatus.FINISHED_LENGTH_CAPPED - return - - # Check if the sequence has reached max_tokens. - if seq.get_output_len() == sampling_params.max_tokens: - seq.status = SequenceStatus.FINISHED_LENGTH_CAPPED - return - - @staticmethod - def check_stop_strings( - output_text: str, - new_char_count: int, - stop: List[str], - include_in_output: bool, - ) -> Optional[Tuple[str, int]]: - """Check if any stop strings are matched and truncate sequence - output text accordingly. - - Returns tuple (stop_string, offset) if matched or else None. - - Where stop_string is the matched stop string and offset is the - length to which output_text should be truncated, or -1 for no - truncation. - """ - if not new_char_count or not stop: - return None - - for stop_str in stop: - stop_string_len = len(stop_str) - # Avoid searching already-searched text. - stop_index = output_text.find(stop_str, - 1 - new_char_count - stop_string_len) - if stop_index == -1: - continue - - if include_in_output: - # Truncate to end of stop string. - stop_index += stop_string_len - if stop_index >= len(output_text): - # No truncation required. - return stop_str, -1 - - # Truncate the output text to either the beginning - # or end of the stop string. - return stop_str, stop_index - return None diff --git a/vllm/v1/engine/detokenizer.py b/vllm/v1/engine/detokenizer.py index 8aa36d6a439c..0f993a74c810 100644 --- a/vllm/v1/engine/detokenizer.py +++ b/vllm/v1/engine/detokenizer.py @@ -9,7 +9,6 @@ from tokenizers.decoders import DecodeStream from transformers import PreTrainedTokenizerFast -from vllm.engine.output_processor.stop_checker import StopChecker from vllm.logger import init_logger from vllm.transformers_utils.detokenizer_utils import ( AnyTokenizer, convert_prompt_ids_to_tokens, detokenize_incrementally) @@ -129,7 +128,7 @@ def update(self, new_token_ids: list[int], # 2) Evaluate stop strings. stop_string = None if self.stop and len(self.output_token_ids) > self.min_tokens: - stop = StopChecker.check_stop_strings( + stop = check_stop_strings( output_text=self.output_text, new_char_count=len(self.output_text) - stop_check_offset, stop=self.stop, @@ -309,3 +308,42 @@ def decode_next(self, next_token_id: int) -> str: self.read_offset = read_offset return decoded_text + + +def check_stop_strings( + output_text: str, + new_char_count: int, + stop: list[str], + include_in_output: bool, +) -> Optional[tuple[str, int]]: + """Check if any stop strings are matched and truncate sequence + output text accordingly. + + Returns tuple (stop_string, offset) if matched or else None. + + Where stop_string is the matched stop string and offset is the + length to which output_text should be truncated, or -1 for no + truncation. + """ + if not new_char_count or not stop: + return None + + for stop_str in stop: + stop_string_len = len(stop_str) + # Avoid searching already-searched text. + stop_index = output_text.find(stop_str, + 1 - new_char_count - stop_string_len) + if stop_index == -1: + continue + + if include_in_output: + # Truncate to end of stop string. + stop_index += stop_string_len + if stop_index >= len(output_text): + # No truncation required. + return stop_str, -1 + + # Truncate the output text to either the beginning + # or end of the stop string. + return stop_str, stop_index + return None From 572ddf83ce1fe8d52670e01a7a6cc0d8a99fa90c Mon Sep 17 00:00:00 2001 From: Woosuk Kwon Date: Sat, 20 Sep 2025 19:53:20 -0700 Subject: [PATCH 198/518] [Chore] Remove unused sampler in models (#25324) Signed-off-by: Woosuk Kwon --- tests/lora/conftest.py | 3 --- vllm/model_executor/models/ernie_mtp.py | 10 ---------- vllm/model_executor/models/plamo2.py | 10 ---------- vllm/model_executor/models/step3_text.py | 10 ---------- vllm/model_executor/models/step3_vl.py | 16 ---------------- 5 files changed, 49 deletions(-) diff --git a/tests/lora/conftest.py b/tests/lora/conftest.py index 3475993ff8f0..b539a7bf5d76 100644 --- a/tests/lora/conftest.py +++ b/tests/lora/conftest.py @@ -17,7 +17,6 @@ MergedColumnParallelLinear, RowParallelLinear) from vllm.model_executor.layers.logits_processor import LogitsProcessor -from vllm.model_executor.layers.sampler import Sampler from vllm.model_executor.layers.vocab_parallel_embedding import ParallelLMHead from vllm.model_executor.models.interfaces import SupportsLoRA from vllm.platforms import current_platform @@ -97,7 +96,6 @@ def dummy_model() -> nn.Module: # Special handling for lm_head & sampler ("lm_head", ParallelLMHead(512, 10)), ("logits_processor", LogitsProcessor(512)), - ("sampler", Sampler()) ])) model.config = MagicMock() model.embedding_modules = {"lm_head": "lm_head"} @@ -125,7 +123,6 @@ def dummy_model_gate_up() -> nn.Module: # Special handling for lm_head & sampler ("lm_head", ParallelLMHead(512, 10)), ("logits_processor", LogitsProcessor(512)), - ("sampler", Sampler()) ])) model.config = MagicMock() model.packed_modules_mapping = { diff --git a/vllm/model_executor/models/ernie_mtp.py b/vllm/model_executor/models/ernie_mtp.py index 57c534887437..c44626523031 100644 --- a/vllm/model_executor/models/ernie_mtp.py +++ b/vllm/model_executor/models/ernie_mtp.py @@ -33,7 +33,6 @@ from vllm.model_executor.layers.layernorm import RMSNorm from vllm.model_executor.layers.logits_processor import LogitsProcessor from vllm.model_executor.layers.quantization import QuantizationConfig -from vllm.model_executor.layers.sampler import SamplerOutput, get_sampler from vllm.model_executor.layers.vocab_parallel_embedding import ( ParallelLMHead, VocabParallelEmbedding) from vllm.model_executor.model_loader.weight_utils import default_weight_loader @@ -160,7 +159,6 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): self.lm_head = ParallelLMHead(self.config.vocab_size, self.config.hidden_size, prefix=maybe_prefix(prefix, "lm_head")) - self.sampler = get_sampler() if self.config.tie_word_embeddings: self.lm_head.weight = self.model.embed_tokens.weight @@ -188,14 +186,6 @@ def compute_logits( return self.model.compute_logits(hidden_states, self.lm_head, sampling_metadata, spec_step_idx) - def sample( - self, - logits: torch.Tensor, - sampling_metadata: SamplingMetadata, - ) -> Optional[SamplerOutput]: - next_tokens = self.sampler(logits, sampling_metadata) - return next_tokens - def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]: stacked_params_mapping = [ diff --git a/vllm/model_executor/models/plamo2.py b/vllm/model_executor/models/plamo2.py index ef96d272adfb..9f1ee36366fd 100644 --- a/vllm/model_executor/models/plamo2.py +++ b/vllm/model_executor/models/plamo2.py @@ -41,7 +41,6 @@ mamba_chunk_scan_combined) from vllm.model_executor.layers.quantization import QuantizationConfig from vllm.model_executor.layers.rotary_embedding import get_rope -from vllm.model_executor.layers.sampler import SamplerOutput, get_sampler from vllm.model_executor.layers.vocab_parallel_embedding import ( DEFAULT_VOCAB_PADDING_SIZE, ParallelLMHead, VocabParallelEmbedding) from vllm.model_executor.model_loader.weight_utils import ( @@ -932,7 +931,6 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = "") -> None: self.logits_processor = LogitsProcessor(self.unpadded_vocab_size, self.config.vocab_size) - self.sampler = get_sampler() self.make_empty_intermediate_tensors = ( self.model.make_empty_intermediate_tensors) @@ -1030,14 +1028,6 @@ def compute_logits( sampling_metadata) return logits - def sample( - self, - logits: Optional[torch.Tensor], - sampling_metadata: SamplingMetadata, - ) -> Optional[SamplerOutput]: - next_tokens = self.sampler(logits, sampling_metadata) - return next_tokens - def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]): params_dict = dict(self.named_parameters()) for name, loaded_weight in weights: diff --git a/vllm/model_executor/models/step3_text.py b/vllm/model_executor/models/step3_text.py index b8733fa5e612..6a5b540fc817 100644 --- a/vllm/model_executor/models/step3_text.py +++ b/vllm/model_executor/models/step3_text.py @@ -26,7 +26,6 @@ from vllm.model_executor.layers.quantization.base_config import ( QuantizationConfig) from vllm.model_executor.layers.rotary_embedding import get_rope -from vllm.model_executor.layers.sampler import SamplerOutput, get_sampler from vllm.model_executor.layers.vocab_parallel_embedding import ( DEFAULT_VOCAB_PADDING_SIZE, ParallelLMHead, VocabParallelEmbedding) from vllm.model_executor.model_loader.weight_utils import default_weight_loader @@ -391,7 +390,6 @@ def __init__( ) self.logits_processor = LogitsProcessor(self.unpadded_vocab_size, config.vocab_size) - self.sampler = get_sampler() else: self.lm_head = PPMissingLayer() @@ -413,14 +411,6 @@ def compute_logits(self, hidden_states: torch.Tensor, sampling_metadata) return logits - def sample( - self, - logits: Optional[torch.Tensor], - sampling_metadata: SamplingMetadata, - ) -> Optional[SamplerOutput]: - next_tokens = self.sampler(logits, sampling_metadata) - return next_tokens - def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]: qkv_params_mapping = [ diff --git a/vllm/model_executor/models/step3_vl.py b/vllm/model_executor/models/step3_vl.py index 2ba5f94ea3b8..c2940f8e4445 100644 --- a/vllm/model_executor/models/step3_vl.py +++ b/vllm/model_executor/models/step3_vl.py @@ -2,7 +2,6 @@ # SPDX-FileCopyrightText: Copyright contributors to the vLLM project import math from collections.abc import Iterable, Mapping, Sequence -from functools import cached_property from itertools import product from math import ceil, sqrt from typing import Any, Literal, Optional, TypedDict, Union @@ -24,7 +23,6 @@ QKVParallelLinear, RowParallelLinear) from vllm.model_executor.layers.quantization import QuantizationConfig -from vllm.model_executor.layers.sampler import SamplerOutput, get_sampler from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.multimodal import MULTIMODAL_REGISTRY from vllm.multimodal.inputs import (MultiModalDataDict, MultiModalFieldConfig, @@ -897,13 +895,6 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = "") -> None: self.make_empty_intermediate_tensors = ( self.language_model.make_empty_intermediate_tensors) - @cached_property - def sampler(self): - if hasattr(self.language_model, "sampler"): - return self.language_model.sampler - - return get_sampler() - @property def device(self): return next(self.parameters()).device @@ -1069,13 +1060,6 @@ def compute_logits( return self.language_model.compute_logits(hidden_states, sampling_metadata) - def sample( - self, - logits: torch.Tensor, - sampling_metadata: SamplingMetadata, - ) -> Optional[SamplerOutput]: - return self.language_model.sample(logits, sampling_metadata) - def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]): skip_prefixes = [] From 72dd1595b466383c8c9c9e39cb519814df1a105f Mon Sep 17 00:00:00 2001 From: Woosuk Kwon Date: Sat, 20 Sep 2025 19:57:46 -0700 Subject: [PATCH 199/518] [CI] Skip tests failing on main (#25326) Signed-off-by: Woosuk Kwon --- .../entrypoints/openai/test_completion_with_prompt_embeds.py | 1 + tests/models/quantization/test_fp8.py | 5 ++++- tests/models/test_oot_registration.py | 1 + tests/quantization/test_compressed_tensors.py | 5 ++++- 4 files changed, 10 insertions(+), 2 deletions(-) diff --git a/tests/entrypoints/openai/test_completion_with_prompt_embeds.py b/tests/entrypoints/openai/test_completion_with_prompt_embeds.py index 176c1825530e..9c62595ad280 100644 --- a/tests/entrypoints/openai/test_completion_with_prompt_embeds.py +++ b/tests/entrypoints/openai/test_completion_with_prompt_embeds.py @@ -60,6 +60,7 @@ def create_dummy_embeds(num_tokens: int = 5) -> str: return base64.b64encode(buffer.getvalue()).decode('utf-8') +@pytest.mark.skip("This test is skipped because it is flaky.") @pytest.mark.asyncio @pytest.mark.parametrize("model_name", [MODEL_NAME]) async def test_completions_with_prompt_embeds( diff --git a/tests/models/quantization/test_fp8.py b/tests/models/quantization/test_fp8.py index 97dd4d6135ac..bb8ae741b614 100644 --- a/tests/models/quantization/test_fp8.py +++ b/tests/models/quantization/test_fp8.py @@ -32,7 +32,7 @@ # Due to low-precision numerical divergence, we only test logprob of 4 tokens @pytest.mark.parametrize("max_tokens", [4]) @pytest.mark.parametrize("enforce_eager", [True]) -@pytest.mark.parametrize("backend", ["FLASH_ATTN", "XFORMERS"]) +@pytest.mark.parametrize("backend", ["FLASH_ATTN"]) # NOTE: Increasing this in this suite will fail CI because we currently cannot # reset distributed env properly. Use a value > 1 just when you test. @pytest.mark.parametrize("tensor_parallel_size", [1]) @@ -57,6 +57,9 @@ def test_models( pytest.skip( f"{kv_cache_dtype} is currently not supported on ROCm/HIP.") + if not current_platform.is_kv_cache_dtype_supported(kv_cache_dtype, None): + pytest.skip(f"{kv_cache_dtype} is not supported on this platform.") + with monkeypatch.context() as m: m.setenv("TOKENIZERS_PARALLELISM", 'true') m.setenv(STR_BACKEND_ENV_VAR, backend) diff --git a/tests/models/test_oot_registration.py b/tests/models/test_oot_registration.py index cb30d77c4f0e..9b376f2a260a 100644 --- a/tests/models/test_oot_registration.py +++ b/tests/models/test_oot_registration.py @@ -63,6 +63,7 @@ def test_oot_registration_embedding( image = convert_image_mode(ImageAsset("cherry_blossom").pil_image, "RGB") +@pytest.mark.skip(reason="This test is skipped because it failed on V1.") @create_new_process_for_each_test() def test_oot_registration_multimodal( monkeypatch: pytest.MonkeyPatch, diff --git a/tests/quantization/test_compressed_tensors.py b/tests/quantization/test_compressed_tensors.py index b7949a488ad0..c0ab3fbb1062 100644 --- a/tests/quantization/test_compressed_tensors.py +++ b/tests/quantization/test_compressed_tensors.py @@ -357,6 +357,9 @@ def check_model(model): assert output +@pytest.mark.skipif( + not current_platform.is_kv_cache_dtype_supported("fp8", None), + reason="FP8 KV cache is not supported on this device.") @pytest.mark.skipif(not current_platform.is_cuda(), reason="This test is skipped on non-CUDA platform.") def test_compressed_tensors_kv_cache(vllm_runner): @@ -738,4 +741,4 @@ def test_compressed_tensors_transforms_perplexity(vllm_runner, model, prompt, with vllm_runner(model, enforce_eager=True) as llm: perplexity = llm.generate_prompt_perplexity([prompt])[0] print(perplexity) - assert perplexity <= exp_perplexity \ No newline at end of file + assert perplexity <= exp_perplexity From c99db8c8ddd50c0d30c14057012bd27b42d32b2d Mon Sep 17 00:00:00 2001 From: Woosuk Kwon Date: Sat, 20 Sep 2025 19:58:26 -0700 Subject: [PATCH 200/518] [V0 Deprecation] Remove V0 core (#25321) Signed-off-by: Woosuk Kwon Signed-off-by: Woosuk Kwon --- .buildkite/test-pipeline.yaml | 3 - .github/CODEOWNERS | 2 - pyproject.toml | 2 - .../backends/differential_flash_attn.py | 12 +- .../backends/dual_chunk_flash_attn.py | 10 +- vllm/attention/backends/flash_attn.py | 12 +- vllm/attention/backends/mla/common.py | 13 +- vllm/attention/backends/placeholder_attn.py | 11 +- vllm/attention/backends/rocm_aiter_mla.py | 7 +- vllm/attention/backends/utils.py | 9 +- vllm/core/__init__.py | 0 vllm/core/block/__init__.py | 0 vllm/core/block/block_table.py | 399 ---- vllm/core/block/common.py | 371 --- vllm/core/block/cpu_gpu_block_allocator.py | 439 ---- vllm/core/block/interfaces.py | 319 --- vllm/core/block/naive_block.py | 466 ---- vllm/core/block/prefix_caching_block.py | 1135 --------- vllm/core/block/utils.py | 28 - vllm/core/block_manager.py | 523 ----- vllm/core/evictor.py | 157 -- vllm/core/interfaces.py | 139 -- vllm/core/placeholder_block_space_manager.py | 103 - vllm/core/scheduler.py | 2028 ---------------- vllm/engine/protocol.py | 8 +- vllm/v1/engine/async_llm.py | 6 +- vllm/worker/cache_engine.py | 145 -- vllm/worker/model_runner.py | 2031 ----------------- vllm/worker/worker.py | 666 ------ 29 files changed, 24 insertions(+), 9020 deletions(-) delete mode 100644 vllm/core/__init__.py delete mode 100644 vllm/core/block/__init__.py delete mode 100644 vllm/core/block/block_table.py delete mode 100644 vllm/core/block/common.py delete mode 100644 vllm/core/block/cpu_gpu_block_allocator.py delete mode 100644 vllm/core/block/interfaces.py delete mode 100644 vllm/core/block/naive_block.py delete mode 100644 vllm/core/block/prefix_caching_block.py delete mode 100644 vllm/core/block/utils.py delete mode 100644 vllm/core/block_manager.py delete mode 100644 vllm/core/evictor.py delete mode 100644 vllm/core/interfaces.py delete mode 100644 vllm/core/placeholder_block_space_manager.py delete mode 100644 vllm/core/scheduler.py delete mode 100644 vllm/worker/cache_engine.py delete mode 100644 vllm/worker/model_runner.py delete mode 100644 vllm/worker/worker.py diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index 1e7ce6ef0a66..9d38e571324b 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -148,7 +148,6 @@ steps: num_gpus: 4 source_file_dependencies: - vllm/distributed/ - - vllm/core/ - tests/distributed/test_utils - tests/distributed/test_pynccl - tests/distributed/test_events @@ -867,8 +866,6 @@ steps: - tests/distributed/ - vllm/compilation - vllm/worker/worker_base.py - - vllm/worker/worker.py - - vllm/worker/model_runner.py - entrypoints/llm/test_collective_rpc.py - tests/v1/test_async_llm_dp.py - tests/v1/test_external_lb_dp.py diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index f58256d38b9d..37bd0ace98a9 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -4,10 +4,8 @@ # This lists cover the "core" components of vLLM that require careful review /vllm/attention @LucasWilkinson /vllm/attention/backends/abstract.py @WoosukKwon @zhuohan123 @youkaichao @alexm-redhat @comaniac @njhill -/vllm/core @zhuohan123 @youkaichao @alexm-redhat @comaniac @njhill /vllm/executor/executor_base.py @zhuohan123 @youkaichao @alexm-redhat @comaniac @njhill @22quinn /vllm/worker/worker_base.py @zhuohan123 @youkaichao @alexm-redhat @comaniac @njhill @22quinn -/vllm/worker/worker.py @zhuohan123 @youkaichao @alexm-redhat @comaniac @njhill /vllm/model_executor/layers/fused_moe @mgoin /vllm/model_executor/layers/sampler.py @zhuohan123 @youkaichao @alexm-redhat @comaniac @njhill @NickLucche /vllm/model_executor/layers/quantization @mgoin @robertgshaw2-redhat @tlrmchlsmth @yewentao256 diff --git a/pyproject.toml b/pyproject.toml index fe55461db00b..f43ae69e00bd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -70,7 +70,6 @@ line-length = 80 "vllm/_version.py" = ["ALL"] # Python 3.8 typing - skip V0 code "vllm/attention/**/*.py" = ["UP006", "UP035"] -"vllm/core/**/*.py" = ["UP006", "UP035"] "vllm/engine/**/*.py" = ["UP006", "UP035"] "vllm/executor/**/*.py" = ["UP006", "UP035"] "vllm/worker/**/*.py" = ["UP006", "UP035"] @@ -117,7 +116,6 @@ files = [ "vllm/*.py", "vllm/assets", "vllm/entrypoints", - "vllm/core", "vllm/inputs", "vllm/logging_utils", "vllm/multimodal", diff --git a/vllm/attention/backends/differential_flash_attn.py b/vllm/attention/backends/differential_flash_attn.py index a7d0e3afb517..87a4558e377d 100644 --- a/vllm/attention/backends/differential_flash_attn.py +++ b/vllm/attention/backends/differential_flash_attn.py @@ -4,7 +4,7 @@ from collections import defaultdict from dataclasses import dataclass from itertools import accumulate -from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Type +from typing import Any, Dict, List, Optional, Tuple, Type import torch from einops import rearrange @@ -34,9 +34,6 @@ from vllm.vllm_flash_attn import (flash_attn_varlen_func, flash_attn_with_kvcache) -if TYPE_CHECKING: - from vllm.worker.model_runner import ModelInputForGPUBuilder - logger = init_logger(__name__) @@ -329,7 +326,7 @@ def decode_metadata( class DifferentialFlashAttentionMetadataBuilder( AttentionMetadataBuilder[DifferentialFlashAttentionMetadata]): - def __init__(self, input_builder: "ModelInputForGPUBuilder"): + def __init__(self, input_builder): self.input_builder = input_builder self.runner = input_builder.runner self.sliding_window = input_builder.sliding_window @@ -350,9 +347,8 @@ def prepare(self): self.num_decode_tokens = 0 self.has_prefix_cache_hit = False - def _add_seq_group( - self, inter_data: "ModelInputForGPUBuilder.InterDataForSeqGroup", - chunked_prefill_enabled: bool, prefix_cache_hit: bool): + def _add_seq_group(self, inter_data, chunked_prefill_enabled: bool, + prefix_cache_hit: bool): """Add a sequence group to the metadata. Specifically update/append 1. context length. 2. block table. diff --git a/vllm/attention/backends/dual_chunk_flash_attn.py b/vllm/attention/backends/dual_chunk_flash_attn.py index 85957bea1e26..de47bb8ebd8f 100644 --- a/vllm/attention/backends/dual_chunk_flash_attn.py +++ b/vllm/attention/backends/dual_chunk_flash_attn.py @@ -4,7 +4,7 @@ """ import math from dataclasses import dataclass -from typing import TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Type +from typing import Any, Dict, List, Optional, Tuple, Type import torch import torch.distributed @@ -22,9 +22,6 @@ from vllm.vllm_flash_attn import (flash_attn_varlen_func, flash_attn_with_kvcache, sparse_attn_func) -if TYPE_CHECKING: - from vllm.worker.model_runner import ModelInputForGPUBuilder - logger = init_logger(__name__) @@ -224,9 +221,8 @@ def prepare(self): super().prepare() self.orig_seq_lens: List[int] = [] - def _add_seq_group( - self, inter_data: "ModelInputForGPUBuilder.InterDataForSeqGroup", - chunked_prefill_enabled: bool, prefix_cache_hit: bool): + def _add_seq_group(self, inter_data, chunked_prefill_enabled: bool, + prefix_cache_hit: bool): super()._add_seq_group(inter_data, chunked_prefill_enabled, prefix_cache_hit) for prompt_len, seq_len in zip(inter_data.prompt_lens, diff --git a/vllm/attention/backends/flash_attn.py b/vllm/attention/backends/flash_attn.py index 78c768f92d3c..edb3afb4aa07 100755 --- a/vllm/attention/backends/flash_attn.py +++ b/vllm/attention/backends/flash_attn.py @@ -4,7 +4,7 @@ from collections import defaultdict from dataclasses import dataclass from itertools import accumulate -from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Type +from typing import Dict, List, Optional, Tuple, Type import torch @@ -31,9 +31,6 @@ from vllm.vllm_flash_attn import (flash_attn_varlen_func, flash_attn_with_kvcache) -if TYPE_CHECKING: - from vllm.worker.model_runner import ModelInputForGPUBuilder - logger = init_logger(__name__) @@ -312,7 +309,7 @@ def decode_metadata(self) -> Optional["FlashAttentionMetadata"]: class FlashAttentionMetadataBuilder( AttentionMetadataBuilder[FlashAttentionMetadata]): - def __init__(self, input_builder: "ModelInputForGPUBuilder"): + def __init__(self, input_builder): self.input_builder = input_builder self.runner = input_builder.runner self.sliding_window = input_builder.sliding_window @@ -332,9 +329,8 @@ def prepare(self): self.num_decode_tokens = 0 self.has_prefix_cache_hit = False - def _add_seq_group( - self, inter_data: "ModelInputForGPUBuilder.InterDataForSeqGroup", - chunked_prefill_enabled: bool, prefix_cache_hit: bool): + def _add_seq_group(self, inter_data, chunked_prefill_enabled: bool, + prefix_cache_hit: bool): """Add a sequence group to the metadata. Specifically update/append 1. context length. 2. block table. diff --git a/vllm/attention/backends/mla/common.py b/vllm/attention/backends/mla/common.py index 789393eb39a7..826b63e1ccda 100644 --- a/vllm/attention/backends/mla/common.py +++ b/vllm/attention/backends/mla/common.py @@ -193,8 +193,7 @@ from contextlib import contextmanager from dataclasses import dataclass from itertools import accumulate -from typing import (TYPE_CHECKING, Any, Dict, Generic, List, Optional, Tuple, - Type, TypeVar) +from typing import Any, Dict, Generic, List, Optional, Tuple, Type, TypeVar import torch @@ -233,9 +232,6 @@ except ImportError: flash_attn_varlen_func = None -if TYPE_CHECKING: - from vllm.worker.model_runner import ModelInputForGPUBuilder - is_hip = current_platform.is_rocm() @@ -638,7 +634,7 @@ class MLACommonMetadataBuilder(AttentionMetadataBuilder[T], Generic[T]): """ BLOCK_TABLE_EXTENDER: list[list[int]] = [] - def __init__(self, input_builder: "ModelInputForGPUBuilder"): + def __init__(self, input_builder): self.input_builder = input_builder self.runner = input_builder.runner self.sliding_window = input_builder.sliding_window @@ -668,9 +664,8 @@ def prepare(self): self.num_decode_tokens = 0 self.has_prefix_cache_hit = False - def _add_seq_group( - self, inter_data: "ModelInputForGPUBuilder.InterDataForSeqGroup", - chunked_prefill_enabled: bool, prefix_cache_hit: bool): + def _add_seq_group(self, inter_data, chunked_prefill_enabled: bool, + prefix_cache_hit: bool): """Add a sequence group to the metadata. Specifically update/append 1. context length. 2. block table. diff --git a/vllm/attention/backends/placeholder_attn.py b/vllm/attention/backends/placeholder_attn.py index e630a6c6de8c..f82d28938f45 100644 --- a/vllm/attention/backends/placeholder_attn.py +++ b/vllm/attention/backends/placeholder_attn.py @@ -4,7 +4,7 @@ from collections import defaultdict from dataclasses import dataclass from itertools import accumulate -from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Type +from typing import Dict, List, Optional, Tuple, Type import torch @@ -13,9 +13,6 @@ AttentionMetadataBuilder) from vllm.attention.backends.utils import CommonAttentionState from vllm.multimodal import MultiModalPlaceholderMap - -if TYPE_CHECKING: - from vllm.worker.model_runner import (ModelInputForGPUBuilder) from vllm.utils import async_tensor_h2d # Placeholder attention backend for models like Mamba and pooling models that @@ -204,7 +201,7 @@ def decode_metadata(self) -> Optional["PlaceholderAttentionMetadata"]: class PlaceholderAttentionMetadataBuilder( AttentionMetadataBuilder[PlaceholderAttentionMetadata]): - def __init__(self, input_builder: "ModelInputForGPUBuilder"): + def __init__(self, input_builder): self.input_builder = input_builder self.runner = input_builder.runner @@ -220,9 +217,7 @@ def prepare(self): self.num_prefill_tokens = 0 self.num_decode_tokens = 0 - def _add_seq_group( - self, inter_data: "ModelInputForGPUBuilder.InterDataForSeqGroup", - chunked_prefill_enabled: bool): + def _add_seq_group(self, inter_data, chunked_prefill_enabled: bool): """Add a sequence group to the metadata. Specifically update/append 1. context length. """ diff --git a/vllm/attention/backends/rocm_aiter_mla.py b/vllm/attention/backends/rocm_aiter_mla.py index a2e9710437d9..587d08858b92 100644 --- a/vllm/attention/backends/rocm_aiter_mla.py +++ b/vllm/attention/backends/rocm_aiter_mla.py @@ -3,7 +3,7 @@ from contextlib import contextmanager from dataclasses import dataclass -from typing import TYPE_CHECKING, Optional, Type, Union +from typing import Optional, Type, Union import torch @@ -19,9 +19,6 @@ from vllm.attention.ops.rocm_aiter_mla import (aiter_mla_decode_fwd, get_aiter_mla_metadata) -if TYPE_CHECKING: - from vllm.worker.model_runner import ModelInputForGPUBuilder - def is_aiter_mla_enabled() -> bool: return envs.VLLM_ROCM_USE_AITER \ @@ -110,7 +107,7 @@ def decode_metadata(self): class AiterMLAMetadataBuilder(MLACommonMetadataBuilder[AiterMLAMetadata]): BLOCK_TABLE_EXTENDER: list[list[int]] = [[]] - def __init__(self, input_builder: "ModelInputForGPUBuilder"): + def __init__(self, input_builder): super().__init__(input_builder) assert self.block_size == 1, "AITER MLA requires only block size 1." diff --git a/vllm/attention/backends/utils.py b/vllm/attention/backends/utils.py index 7b6c426b0f85..289cfa217743 100644 --- a/vllm/attention/backends/utils.py +++ b/vllm/attention/backends/utils.py @@ -35,9 +35,6 @@ # if we have at least this many elements. Could be tuned further. _COMPUTE_SLOT_MAPPING_NUMPY_NUMEL = 256 -if TYPE_CHECKING: - from vllm.worker.model_runner import ModelInputForGPUBuilder - def is_block_tables_empty(block_tables: Union[None, Dict]): """ @@ -129,7 +126,7 @@ class CommonMetadataBuilder(AttentionMetadataBuilder[TAttentionMetadata]): _metadata_cls: Type[TAttentionMetadata] - def __init__(self, input_builder: "ModelInputForGPUBuilder"): + def __init__(self, input_builder): self.input_builder = input_builder self.runner = input_builder.runner @@ -149,9 +146,7 @@ def prepare(self): self.num_prefill_tokens = 0 self.num_decode_tokens = 0 - def _add_seq_group( - self, inter_data: "ModelInputForGPUBuilder.InterDataForSeqGroup", - chunked_prefill_enabled: bool): + def _add_seq_group(self, inter_data, chunked_prefill_enabled: bool): is_prompt = inter_data.is_prompt block_tables = inter_data.block_tables diff --git a/vllm/core/__init__.py b/vllm/core/__init__.py deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/vllm/core/block/__init__.py b/vllm/core/block/__init__.py deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/vllm/core/block/block_table.py b/vllm/core/block/block_table.py deleted file mode 100644 index 444bb25f2830..000000000000 --- a/vllm/core/block/block_table.py +++ /dev/null @@ -1,399 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project - -import math -from typing import List, Optional - -from vllm.core.block.common import BlockList -from vllm.core.block.interfaces import Block, DeviceAwareBlockAllocator -from vllm.utils import Device, cdiv, chunk_list - - -class BlockTable: - """A class to manage blocks for a specific sequence. - - The BlockTable maps a sequence of tokens to a list of blocks, where each - block represents a contiguous memory allocation for a portion of the - sequence. The blocks are managed by a DeviceAwareBlockAllocator, which is - responsible for allocating and freeing memory for the blocks. - - Args: - block_size (int): The maximum number of tokens that can be stored in a - single block. - block_allocator (DeviceAwareBlockAllocator): The block allocator used to - manage memory for the blocks. - _blocks (Optional[List[Block]], optional): An optional list of existing - blocks to initialize the BlockTable with. If not provided, an empty - BlockTable is created. - max_block_sliding_window (Optional[int], optional): The number of - blocks to keep around for each sequence. If None, all blocks - are kept (eg., when sliding window is not used). - It should at least fit the sliding window size of the model. - - Attributes: - _block_size (int): The maximum number of tokens that can be stored in a - single block. - _allocator (DeviceAwareBlockAllocator): The block allocator used to - manage memory for the blocks. - _blocks (Optional[List[Block]]): The list of blocks managed by this - BlockTable. - _num_full_slots (int): The number of tokens currently stored in the - blocks. - """ - - def __init__( - self, - block_size: int, - block_allocator: DeviceAwareBlockAllocator, - _blocks: Optional[List[Block]] = None, - max_block_sliding_window: Optional[int] = None, - ): - self._block_size = block_size - self._allocator = block_allocator - if _blocks is None: - _blocks = [] - self._blocks: BlockList = BlockList(_blocks) - - self._max_block_sliding_window = max_block_sliding_window - self._num_full_slots = self._get_num_token_ids() - - @staticmethod - def get_num_required_blocks(token_ids: List[int], - block_size: int, - num_lookahead_slots: int = 0) -> int: - """Calculates the minimum number of blocks required to store a given - sequence of token IDs along with any look-ahead slots that may be - required (like in multi-step + chunked-prefill). - - This assumes worst-case scenario, where every block requires a new - allocation (e.g. ignoring prefix caching). - - Args: - token_ids (List[int]): The sequence of token IDs to be stored. - block_size (int): The maximum number of tokens that can be stored in - a single block. - num_lookahead_slots (int): look-ahead slots that the sequence may - require. - - Returns: - int: The minimum number of blocks required to store the given - sequence of token IDs along with any required look-ahead slots. - """ - return cdiv(len(token_ids) + num_lookahead_slots, block_size) - - def allocate(self, - token_ids: List[int], - device: Device = Device.GPU, - extra_hash: Optional[int] = None) -> None: - """Allocates memory blocks for storing the given sequence of token IDs. - - This method allocates the required number of blocks to store the given - sequence of token IDs. - - Args: - token_ids (List[int]): The sequence of token IDs to be stored. - device (Device, optional): The device on which the blocks should be - allocated. Defaults to Device.GPU. - extra_hash (Optional[int]): The hash value of additional - factors, such as adapters, that influence the block hash - in the prefixcaching block. - """ - assert not self._is_allocated - assert token_ids - blocks = self._allocate_blocks_for_token_ids(prev_block=None, - token_ids=token_ids, - device=device, - extra_hash=extra_hash) - self.update(blocks) - self._num_full_slots = len(token_ids) - - def update(self, blocks: List[Block]) -> None: - """Resets the table to the newly provided blocks - (with their corresponding block ids) - """ - self._blocks.update(blocks) - - def append_token_ids(self, - token_ids: List[int], - num_lookahead_slots: int = 0, - num_computed_slots: Optional[int] = None, - extra_hash: Optional[int] = None) -> None: - """Appends a sequence of token IDs to the existing blocks in the - BlockTable. - - This method appends the given sequence of token IDs to the existing - blocks in the BlockTable. If there is not enough space in the existing - blocks, new blocks are allocated using the `ensure_num_empty_slots` - method to accommodate the additional tokens. - - The token IDs are divided into chunks of size `block_size` (except for - the first chunk, which may be smaller), and each chunk is appended to a - separate block. - - Args: - token_ids (List[int]): The sequence of token IDs to be appended. - num_computed_slots (Optional[int]): The number of KV cache slots - that are already filled (computed). - When sliding window is enabled, this is used to compute how many - blocks to drop at the front of the sequence. - Without sliding window, None can be passed. - Without chunked prefill, it should be the same as - _num_full_slots. - extra_hash (Optional[int]): The hash value of additional - factors such as adapters that influence the block, apart - from the token_ids. - """ - assert self._is_allocated, "no blocks have been allocated" - assert len(self._blocks) > 0 - - # Drop blocks that are no longer needed due to sliding window - if self._max_block_sliding_window is not None: - null_block = self._allocator.allocate_or_get_null_block() - assert num_computed_slots is not None - end_block_idx = (num_computed_slots // - self._block_size) - self._max_block_sliding_window - for idx in range(0, end_block_idx): - b = self._blocks[idx] - if b is not null_block: - self._allocator.free(b) - self._blocks[idx] = null_block - - # Ensure there are enough empty slots for the new tokens plus - # lookahead slots - self.ensure_num_empty_slots(num_empty_slots=len(token_ids) + - num_lookahead_slots, - extra_hash=extra_hash) - - # Update the blocks with the new tokens - first_block_idx = self._num_full_slots // self._block_size - token_blocks = self._chunk_token_blocks_for_append(token_ids) - - for i, token_block in enumerate(token_blocks): - self._blocks.append_token_ids(first_block_idx + i, token_block) - - self._num_full_slots += len(token_ids) - - def ensure_num_empty_slots(self, - num_empty_slots: int, - extra_hash: Optional[int] = None) -> None: - """Ensures that the BlockTable has at least the specified number of - empty slots available. - - This method checks if the BlockTable has enough empty slots (i.e., - available space) to accommodate the requested number of tokens. If not, - it allocates additional blocks on the GPU to ensure that the required - number of empty slots is available. - - Args: - num_empty_slots (int): The minimum number of empty slots required. - extra_hash (Optional[int]): The hash value of additional - factors such as adapters that influence the block, apart - from the token_ids. - """ - # Currently the block table only supports - # appending tokens to GPU blocks. - device = Device.GPU - assert self._is_allocated - - if self._num_empty_slots >= num_empty_slots: - return - - slots_to_allocate = num_empty_slots - self._num_empty_slots - blocks_to_allocate = cdiv(slots_to_allocate, self._block_size) - - for _ in range(blocks_to_allocate): - assert len(self._blocks) > 0 - self._blocks.append( - self._allocator.allocate_mutable_block( - prev_block=self._blocks[-1], - device=device, - extra_hash=extra_hash)) - - def fork(self) -> "BlockTable": - """Creates a new BlockTable instance with a copy of the blocks from the - current instance. - - This method creates a new BlockTable instance with the same block size, - block allocator, and a copy of the blocks from the current instance. The - new BlockTable has its own independent set of blocks, but shares the - same underlying memory allocation with the original BlockTable. - - Returns: - BlockTable: A new BlockTable instance with a copy of the blocks from - the current instance. - """ - assert self._is_allocated - assert len(self._blocks) > 0 - forked_blocks = self._allocator.fork(self._blocks[-1]) - return BlockTable( - block_size=self._block_size, - block_allocator=self._allocator, - _blocks=forked_blocks, - max_block_sliding_window=self._max_block_sliding_window, - ) - - def free(self) -> None: - """Frees the memory occupied by the blocks in the BlockTable. - - This method iterates over all the blocks in the `_blocks` list and calls - the `free` method of the `_allocator` object to release the memory - occupied by each block. After freeing all the blocks, the `_blocks` list - is set to `None`. - """ - for block in self.blocks: - self._allocator.free(block) - self._blocks.reset() - - @property - def physical_block_ids(self) -> List[int]: - """Returns a list of physical block indices for the blocks in the - BlockTable. - - This property returns a list of integers, where each integer represents - the physical block index of a corresponding block in the `_blocks` list. - The physical block index is a unique identifier for the memory location - occupied by the block. - - Returns: - List[int]: A list of physical block indices for the blocks in the - BlockTable. - """ - return self._blocks.ids() - - def get_unseen_token_ids(self, sequence_token_ids: List[int]) -> List[int]: - """Get the number of "unseen" tokens in the sequence. - - Unseen tokens are tokens in the sequence corresponding to this block - table, but are not yet appended to this block table. - - Args: - sequence_token_ids (List[int]): The list of token ids in the - sequence. - - Returns: - List[int]: The postfix of sequence_token_ids that has not yet been - appended to the block table. - """ - - # Since the block table is append-only, the unseen token ids are the - # ones after the appended ones. - return sequence_token_ids[self.num_full_slots:] - - def _allocate_blocks_for_token_ids( - self, - prev_block: Optional[Block], - token_ids: List[int], - device: Device, - extra_hash: Optional[int] = None) -> List[Block]: - blocks: List[Block] = [] - - block_token_ids = [] - tail_token_ids = [] - for cur_token_ids in chunk_list(token_ids, self._block_size): - if len(cur_token_ids) == self._block_size: - block_token_ids.append(cur_token_ids) - else: - tail_token_ids.append(cur_token_ids) - - if block_token_ids: - blocks.extend( - self._allocator.allocate_immutable_blocks( - prev_block, - block_token_ids=block_token_ids, - device=device, - extra_hash=extra_hash)) - prev_block = blocks[-1] - - if tail_token_ids: - assert len(tail_token_ids) == 1 - cur_token_ids = tail_token_ids[0] - - block = self._allocator.allocate_mutable_block( - prev_block=prev_block, device=device, extra_hash=extra_hash) - block.append_token_ids(cur_token_ids) - - blocks.append(block) - - return blocks - - def _get_all_token_ids(self) -> List[int]: - # NOTE: This function is O(seq_len); use sparingly. - token_ids: List[int] = [] - - if not self._is_allocated: - return token_ids - - for block in self.blocks: - token_ids.extend(block.token_ids) - - return token_ids - - def _get_num_token_ids(self) -> int: - res = 0 - for block in self.blocks: - res += len(block.token_ids) - - return res - - @property - def _is_allocated(self) -> bool: - return len(self._blocks) > 0 - - @property - def blocks(self) -> List[Block]: - return self._blocks.list() - - @property - def _num_empty_slots(self) -> int: - assert self._is_allocated - return len(self._blocks) * self._block_size - self._num_full_slots - - @property - def num_full_slots(self) -> int: - """Returns the total number of tokens currently stored in the - BlockTable. - - Returns: - int: The total number of tokens currently stored in the BlockTable. - """ - return self._num_full_slots - - def get_num_blocks_touched_by_append_slots( - self, token_ids: List[int], num_lookahead_slots: int) -> int: - """Determine how many blocks will be "touched" by appending the token - ids. - - This is required for the scheduler to determine whether a sequence can - continue generation, or if it must be preempted. - """ - # Math below is equivalent to: - # all_token_ids = token_ids + [-1] * num_lookahead_slots - # token_blocks = self._chunk_token_blocks_for_append(all_token_ids) - # return len(token_blocks) - - num_token_ids = len(token_ids) + num_lookahead_slots - first_chunk_size = self._block_size - (self._num_full_slots % - self._block_size) - num_token_blocks = (1 + math.ceil( - (num_token_ids - first_chunk_size) / self._block_size)) - return num_token_blocks - - def _chunk_token_blocks_for_append( - self, token_ids: List[int]) -> List[List[int]]: - """Split the token ids into block-sized chunks so they can be easily - appended to blocks. The first such "token block" may have less token ids - than the block size, since the last allocated block may be partially - full. - - If no token ids are provided, then no chunks are returned. - """ - - if not token_ids: - return [] - - first_chunk_size = self._block_size - (self._num_full_slots % - self._block_size) - token_blocks = [token_ids[:first_chunk_size]] - token_blocks.extend( - chunk_list(token_ids[first_chunk_size:], self._block_size)) - return token_blocks diff --git a/vllm/core/block/common.py b/vllm/core/block/common.py deleted file mode 100644 index a337007a9eaa..000000000000 --- a/vllm/core/block/common.py +++ /dev/null @@ -1,371 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project - -from collections import deque -from dataclasses import dataclass -from typing import Deque, Dict, Iterable, List, Optional, Protocol, Tuple - -from vllm.core.block.interfaces import Block, BlockAllocator - -BlockId = int -RefCount = int - - -class RefCounterProtocol(Protocol): - - def incr(self, block_id: BlockId) -> RefCount: - raise NotImplementedError - - def decr(self, block_id: BlockId) -> RefCount: - raise NotImplementedError - - def get(self, block_id: BlockId) -> RefCount: - raise NotImplementedError - - -class RefCounter(RefCounterProtocol): - """A class for managing reference counts for a set of block indices. - - The RefCounter class maintains a dictionary that maps block indices to their - corresponding reference counts. It provides methods to increment, decrement, - and retrieve the reference count for a given block index. - - Args: - all_block_indices (Iterable[BlockId]): An iterable of block indices - to initialize the reference counter with. - """ - - def __init__(self, all_block_indices: Iterable[BlockId]): - deduped = set(all_block_indices) - self._refcounts: Dict[BlockId, RefCount] = { - index: 0 - for index in deduped - } - - def incr(self, block_id: BlockId) -> RefCount: - assert block_id in self._refcounts - pre_incr_refcount = self._refcounts[block_id] - - assert pre_incr_refcount >= 0 - - post_incr_refcount = pre_incr_refcount + 1 - self._refcounts[block_id] = post_incr_refcount - return post_incr_refcount - - def decr(self, block_id: BlockId) -> RefCount: - assert block_id in self._refcounts - refcount = self._refcounts[block_id] - - assert refcount > 0 - refcount -= 1 - - self._refcounts[block_id] = refcount - - return refcount - - def get(self, block_id: BlockId) -> RefCount: - assert block_id in self._refcounts - return self._refcounts[block_id] - - def as_readonly(self) -> "ReadOnlyRefCounter": - return ReadOnlyRefCounter(self) - - -class ReadOnlyRefCounter(RefCounterProtocol): - """A read-only view of the RefCounter class. - - The ReadOnlyRefCounter class provides a read-only interface to access the - reference counts maintained by a RefCounter instance. It does not allow - modifications to the reference counts. - - Args: - refcounter (RefCounter): The RefCounter instance to create a read-only - view for. - """ - - def __init__(self, refcounter: RefCounter): - self._refcounter = refcounter - - def incr(self, block_id: BlockId) -> RefCount: - raise ValueError("Incr not allowed") - - def decr(self, block_id: BlockId) -> RefCount: - raise ValueError("Decr not allowed") - - def get(self, block_id: BlockId) -> RefCount: - return self._refcounter.get(block_id) - - -class CopyOnWriteTracker: - """A class for tracking and managing copy-on-write operations for blocks. - - The CopyOnWriteTracker class maintains a mapping of source block indices to - their corresponding copy-on-write destination block indices. It works in - conjunction with a RefCounter. - - Args: - refcounter (RefCounter): The reference counter used to track block - reference counts. - """ - - def __init__(self, refcounter: RefCounterProtocol): - self._copy_on_writes: List[Tuple[BlockId, BlockId]] = [] - self._refcounter = refcounter - - def is_appendable(self, block: Block) -> bool: - """Checks if the block is shared or not. If shared, then it cannot - be appended and needs to be duplicated via copy-on-write - """ - block_id = block.block_id - if block_id is None: - return True - - refcount = self._refcounter.get(block_id) - return refcount <= 1 - - def record_cow(self, src_block_id: Optional[BlockId], - trg_block_id: Optional[BlockId]) -> None: - """Records a copy-on-write operation from source to target block id - Args: - src_block_id (BlockId): The source block id from which to copy - the data - trg_block_id (BlockId): The target block id to which the data - is copied - """ - assert src_block_id is not None - assert trg_block_id is not None - self._copy_on_writes.append((src_block_id, trg_block_id)) - - def clear_cows(self) -> List[Tuple[BlockId, BlockId]]: - """Clears the copy-on-write tracking information and returns the current - state. - - This method returns a list mapping source block indices to - destination block indices for the current copy-on-write operations. - It then clears the internal tracking information. - - Returns: - List[Tuple[BlockId, BlockId]]: A list mapping source - block indices to destination block indices for the - current copy-on-write operations. - """ - cows = self._copy_on_writes - self._copy_on_writes = [] - return cows - - -class BlockPool: - """Used to pre-allocate block objects, in order to avoid excessive python - object allocations/deallocations. - The pool starts from "pool_size" objects and will increase to more objects - if necessary - - Note that multiple block objects may point to the same physical block id, - which is why this pool is needed, so that it will be easier to support - prefix caching and more complicated sharing of physical blocks. - """ - - def __init__(self, block_size: int, create_block: Block.Factory, - allocator: BlockAllocator, pool_size: int): - self._block_size = block_size - self._create_block = create_block - self._allocator = allocator - self._pool_size = pool_size - assert self._pool_size >= 0 - - self._free_ids: Deque[int] = deque(range(self._pool_size)) - self._pool = [] - for i in range(self._pool_size): - self._pool.append( - self._create_block(prev_block=None, - token_ids=[], - block_size=self._block_size, - allocator=self._allocator, - block_id=None, - extra_hash=None)) - - def increase_pool(self): - """Doubles the internal pool size - """ - cur_pool_size = self._pool_size - new_pool_size = cur_pool_size * 2 - self._pool_size = new_pool_size - - self._free_ids += deque(range(cur_pool_size, new_pool_size)) - - for i in range(cur_pool_size, new_pool_size): - self._pool.append( - self._create_block(prev_block=None, - token_ids=[], - block_size=self._block_size, - allocator=self._allocator, - block_id=None, - extra_hash=None)) - - def init_block(self, - prev_block: Optional[Block], - token_ids: List[int], - block_size: int, - physical_block_id: Optional[int], - extra_hash: Optional[int] = None) -> Block: - if len(self._free_ids) == 0: - self.increase_pool() - assert len(self._free_ids) > 0 - - pool_id = self._free_ids.popleft() - - block = self._pool[pool_id] - block.__init__( # type: ignore[misc] - prev_block=prev_block, - token_ids=token_ids, - block_size=block_size, - allocator=block._allocator, # type: ignore[attr-defined] - block_id=physical_block_id, - extra_hash=extra_hash) - block.pool_id = pool_id # type: ignore[attr-defined] - return block - - def free_block(self, block: Block) -> None: - self._free_ids.appendleft(block.pool_id) # type: ignore[attr-defined] - - -class BlockList: - """This class is an optimization to allow fast-access to physical - block ids. It maintains a block id list that is updated with the - block list and this avoids the need to reconstruct the block id - list on every iteration of the block manager - """ - - def __init__(self, blocks: List[Block]): - self._blocks: List[Block] = [] - self._block_ids: List[int] = [] - - self.update(blocks) - - def _add_block_id(self, block_id: Optional[BlockId]) -> None: - assert block_id is not None - self._block_ids.append(block_id) - - def _update_block_id(self, block_index: int, - new_block_id: Optional[BlockId]) -> None: - assert new_block_id is not None - self._block_ids[block_index] = new_block_id - - def update(self, blocks: List[Block]): - self._blocks = blocks - - # Cache block ids for fast query - self._block_ids = [] - for block in self._blocks: - self._add_block_id(block.block_id) - - def append_token_ids(self, block_index: int, token_ids: List[int]) -> None: - block = self._blocks[block_index] - prev_block_id = block.block_id - - block.append_token_ids(token_ids) - - # CoW or promotion may update the internal block_id - if prev_block_id != block.block_id: - self._update_block_id(block_index, block.block_id) - - def append(self, new_block: Block): - self._blocks.append(new_block) - self._add_block_id(new_block.block_id) - - def __len__(self) -> int: - return len(self._blocks) - - def __getitem__(self, block_index: int) -> Block: - return self._blocks[block_index] - - def __setitem__(self, block_index: int, new_block: Block) -> None: - self._blocks[block_index] = new_block - self._update_block_id(block_index, new_block.block_id) - - def reset(self): - self._blocks = [] - self._block_ids = [] - - def list(self) -> List[Block]: - return self._blocks - - def ids(self) -> List[int]: - return self._block_ids - - -@dataclass -class CacheMetricData: - """A utility dataclass to maintain cache metric. - To avoid overflow, we maintain the hit rate in block granularity, so that - we can maintain a single hit rate for n_completed_block x block_size, - and calculate the real time hit rate by the following: - BS = The number of queries per block. - nB = The number of completed blocks. - HR = hit rate of (nB x BS) queries. - Q = current number of queries (< BS). - H = current number of hits (< BS). - hit rate = ((HR x nB) + (H / Q) x (Q / BS)) / (nB + Q / BS) - """ - num_completed_blocks: int = 0 - completed_block_cache_hit_rate: float = 0.0 - num_incompleted_block_queries: int = 0 - num_incompleted_block_hit: int = 0 - block_size: int = 1000 - - def query(self, hit: bool): - self.num_incompleted_block_queries += 1 - self.num_incompleted_block_hit += 1 if hit else 0 - - # When a block is completed, update the cache hit rate - # and reset the incomplete numbers. - if self.num_incompleted_block_queries == self.block_size: - hit_rate = (self.num_incompleted_block_hit / - self.num_incompleted_block_queries) - self.completed_block_cache_hit_rate = ( - self.completed_block_cache_hit_rate * self.num_completed_blocks - + hit_rate) / (self.num_completed_blocks + 1) - self.num_incompleted_block_queries = 0 - self.num_incompleted_block_hit = 0 - self.num_completed_blocks += 1 - - def get_hit_rate(self): - incomplete_ratio = self.num_incompleted_block_queries / self.block_size - total_blocks = self.num_completed_blocks + incomplete_ratio - if total_blocks == 0: - return 0.0 - - completed_block_hit, incompleted_block_hit = 0.0, 0.0 - if self.num_completed_blocks > 0: - completed_block_hit = (self.completed_block_cache_hit_rate * - self.num_completed_blocks) - if self.num_incompleted_block_queries > 0: - incompleted_hit_rate = (self.num_incompleted_block_hit / - self.num_incompleted_block_queries) - incompleted_block_hit = (incompleted_hit_rate * incomplete_ratio) - return (completed_block_hit + incompleted_block_hit) / total_blocks - - -def get_all_blocks_recursively(last_block: Block) -> List[Block]: - """Retrieves all the blocks in a sequence starting from the last block. - - This function recursively traverses the sequence of blocks in reverse order, - starting from the given last block, and returns a list of all the blocks in - the sequence. - - Args: - last_block (Block): The last block in the sequence. - - Returns: - List[Block]: A list of all the blocks in the sequence, in the order they - appear. - """ - - def recurse(block: Block, lst: List[Block]) -> None: - if block.prev_block is not None: - recurse(block.prev_block, lst) - lst.append(block) - - all_blocks: List[Block] = [] - recurse(last_block, all_blocks) - return all_blocks diff --git a/vllm/core/block/cpu_gpu_block_allocator.py b/vllm/core/block/cpu_gpu_block_allocator.py deleted file mode 100644 index 92bc5e157e14..000000000000 --- a/vllm/core/block/cpu_gpu_block_allocator.py +++ /dev/null @@ -1,439 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project - -from typing import Dict, FrozenSet, List, Optional, Tuple - -from vllm.core.block.interfaces import (Block, BlockAllocator, BlockId, - DeviceAwareBlockAllocator) -from vllm.core.block.naive_block import NaiveBlock, NaiveBlockAllocator -from vllm.core.block.prefix_caching_block import PrefixCachingBlockAllocator -from vllm.utils import Device - - -class CpuGpuBlockAllocator(DeviceAwareBlockAllocator): - """A block allocator that can allocate blocks on both CPU and GPU memory. - - This class implements the `DeviceAwareBlockAllocator` interface and provides - functionality for allocating and managing blocks of memory on both CPU and - GPU devices. - - The `CpuGpuBlockAllocator` maintains separate memory pools for CPU and GPU - blocks, and allows for allocation, deallocation, forking, and swapping of - blocks across these memory pools. - """ - - @staticmethod - def create( - allocator_type: str, - num_gpu_blocks: int, - num_cpu_blocks: int, - block_size: int, - ) -> DeviceAwareBlockAllocator: - """Creates a CpuGpuBlockAllocator instance with the specified - configuration. - - This static method creates and returns a CpuGpuBlockAllocator instance - based on the provided parameters. It initializes the CPU and GPU block - allocators with the specified number of blocks, block size, and - allocator type. - - Args: - allocator_type (str): The type of block allocator to use for CPU - and GPU blocks. Currently supported values are "naive" and - "prefix_caching". - num_gpu_blocks (int): The number of blocks to allocate for GPU - memory. - num_cpu_blocks (int): The number of blocks to allocate for CPU - memory. - block_size (int): The size of each block in number of tokens. - - Returns: - DeviceAwareBlockAllocator: A CpuGpuBlockAllocator instance with the - specified configuration. - - Notes: - - The block IDs are assigned contiguously, with GPU block IDs coming - before CPU block IDs. - """ - reserved_blocks = 0 - block_ids = list( - range(reserved_blocks, num_gpu_blocks + num_cpu_blocks)) - num_gpu_blocks -= reserved_blocks - gpu_block_ids = block_ids[:num_gpu_blocks] - cpu_block_ids = block_ids[num_gpu_blocks:] - - if allocator_type == "naive": - gpu_allocator: BlockAllocator = NaiveBlockAllocator( - create_block=NaiveBlock, # type: ignore - num_blocks=num_gpu_blocks, - block_size=block_size, - block_ids=gpu_block_ids, - ) - - cpu_allocator: BlockAllocator = NaiveBlockAllocator( - create_block=NaiveBlock, # type: ignore - num_blocks=num_cpu_blocks, - block_size=block_size, - block_ids=cpu_block_ids, - ) - elif allocator_type == "prefix_caching": - gpu_allocator = PrefixCachingBlockAllocator( - num_blocks=num_gpu_blocks, - block_size=block_size, - block_ids=gpu_block_ids, - ) - - cpu_allocator = PrefixCachingBlockAllocator( - num_blocks=num_cpu_blocks, - block_size=block_size, - block_ids=cpu_block_ids, - ) - else: - raise ValueError(f"Unknown allocator type {allocator_type=}") - - return CpuGpuBlockAllocator( - cpu_block_allocator=cpu_allocator, - gpu_block_allocator=gpu_allocator, - ) - - def __init__(self, cpu_block_allocator: BlockAllocator, - gpu_block_allocator: BlockAllocator): - assert not ( - cpu_block_allocator.all_block_ids - & gpu_block_allocator.all_block_ids - ), "cpu and gpu block allocators can't have intersection of block ids" - - self._allocators = { - Device.CPU: cpu_block_allocator, - Device.GPU: gpu_block_allocator, - } - - self._swap_mapping: Dict[int, int] = {} - self._null_block: Optional[Block] = None - - self._block_ids_to_allocator: Dict[int, BlockAllocator] = {} - for _, allocator in self._allocators.items(): - for block_id in allocator.all_block_ids: - self._block_ids_to_allocator[block_id] = allocator - - def allocate_or_get_null_block(self) -> Block: - if self._null_block is None: - self._null_block = NullBlock( - self.allocate_mutable_block(None, Device.GPU)) - return self._null_block - - def allocate_mutable_block(self, - prev_block: Optional[Block], - device: Device, - extra_hash: Optional[int] = None) -> Block: - """Allocates a new mutable block on the specified device. - - Args: - prev_block (Optional[Block]): The previous block to in the sequence. - Used for prefix hashing. - device (Device): The device on which to allocate the new block. - extra_hash (Optional[int]): The hash value of additional - factors, such as adapters, that influence the block hash - in the prefix caching block. - - Returns: - Block: The newly allocated mutable block. - """ - return self._allocators[device].allocate_mutable_block( - prev_block, extra_hash=extra_hash) - - def allocate_immutable_blocks( - self, - prev_block: Optional[Block], - block_token_ids: List[List[int]], - device: Device, - extra_hash: Optional[int] = None) -> List[Block]: - """Allocates a new group of immutable blocks with the provided block - token IDs on the specified device. - - Args: - prev_block (Optional[Block]): The previous block in the sequence. - Used for prefix hashing. - block_token_ids (List[int]): The list of block token IDs to be - stored in the new blocks. - device (Device): The device on which to allocate the new block. - extra_hash (Optional[int]): The hash value of additional - factors, such as adapters, that influence the block hash - in the prefix caching block. - - Returns: - List[Block]: The newly allocated list of immutable blocks - containing the provided block token IDs. - """ - return self._allocators[device].allocate_immutable_blocks( - prev_block, block_token_ids, extra_hash=extra_hash) - - def allocate_immutable_block(self, - prev_block: Optional[Block], - token_ids: List[int], - device: Device, - extra_hash: Optional[int] = None) -> Block: - """Allocates a new immutable block with the provided token IDs on the - specified device. - - Args: - prev_block (Optional[Block]): The previous block in the sequence. - Used for prefix hashing. - token_ids (List[int]): The list of token IDs to be stored in the new - block. - device (Device): The device on which to allocate the new block. - extra_hash (Optional[int]): The hash value of additional - factors, such as adapters, that influence the block hash - in the prefix caching block. - - Returns: - Block: The newly allocated immutable block containing the provided - token IDs. - """ - return self._allocators[device].allocate_immutable_block( - prev_block, token_ids, extra_hash=extra_hash) - - def free(self, block: Block) -> None: - """Frees the memory occupied by the given block. - - Args: - block (Block): The block to be freed. - """ - # Null block should never be freed - if isinstance(block, NullBlock): - return - block_id = block.block_id - assert block_id is not None - allocator = self._block_ids_to_allocator[block_id] - allocator.free(block) - - def fork(self, last_block: Block) -> List[Block]: - """Creates a new sequence of blocks that shares the same underlying - memory as the original sequence. - - Args: - last_block (Block): The last block in the original sequence. - - Returns: - List[Block]: A new list of blocks that shares the same memory as the - original sequence. - """ - # do not attempt to fork the null block - assert not isinstance(last_block, NullBlock) - block_id = last_block.block_id - assert block_id is not None - allocator = self._block_ids_to_allocator[block_id] - return allocator.fork(last_block) - - def get_num_free_blocks(self, device: Device) -> int: - """Returns the number of free blocks available on the specified device. - - Args: - device (Device): The device for which to query the number of free - blocks. AssertionError is raised if None is passed. - - Returns: - int: The number of free blocks available on the specified device. - """ - return self._allocators[device].get_num_free_blocks() - - def get_num_total_blocks(self, device: Device) -> int: - return self._allocators[device].get_num_total_blocks() - - def get_physical_block_id(self, device: Device, absolute_id: int) -> int: - """Returns the zero-offset block id on certain device given the - absolute block id. - - Args: - device (Device): The device for which to query relative block id. - absolute_id (int): The absolute block id for the block in - whole allocator. - - Returns: - int: The zero-offset block id on certain device. - """ - return self._allocators[device].get_physical_block_id(absolute_id) - - def swap(self, blocks: List[Block], src_device: Device, - dst_device: Device) -> Dict[int, int]: - """Execute the swap for the given blocks from source_device - on to dest_device, save the current swap mapping and append - them to the accumulated `self._swap_mapping` for each - scheduling move. - - Args: - blocks: List of blocks to be swapped. - src_device (Device): Device to swap the 'blocks' from. - dst_device (Device): Device to swap the 'blocks' to. - - Returns: - Dict[int, int]: Swap mapping from source_device - on to dest_device. - """ - src_block_ids = [block.block_id for block in blocks] - self._allocators[src_device].swap_out(blocks) - self._allocators[dst_device].swap_in(blocks) - dst_block_ids = [block.block_id for block in blocks] - - current_swap_mapping: Dict[int, int] = {} - for src_block_id, dst_block_id in zip(src_block_ids, dst_block_ids): - if src_block_id is not None and dst_block_id is not None: - self._swap_mapping[src_block_id] = dst_block_id - current_swap_mapping[src_block_id] = dst_block_id - return current_swap_mapping - - def get_num_full_blocks_touched(self, blocks: List[Block], - device: Device) -> int: - """Returns the number of full blocks that will be touched by - swapping in/out the given blocks on to the 'device'. - - Args: - blocks: List of blocks to be swapped. - device (Device): Device to swap the 'blocks' on. - - Returns: - int: the number of full blocks that will be touched by - swapping in/out the given blocks on to the 'device'. - Non full blocks are ignored when deciding the number - of blocks to touch. - """ - return self._allocators[device].get_num_full_blocks_touched(blocks) - - def clear_copy_on_writes(self) -> List[Tuple[int, int]]: - """Clears the copy-on-write (CoW) state and returns the mapping of - source to destination block IDs. - - Returns: - List[Tuple[int, int]]: A list mapping source block IDs to - destination block IDs. - """ - # CoW only supported on GPU - device = Device.GPU - return self._allocators[device].clear_copy_on_writes() - - def mark_blocks_as_accessed(self, block_ids: List[int], - now: float) -> None: - """Mark blocks as accessed, only use for prefix caching.""" - # Prefix caching only supported on GPU. - device = Device.GPU - return self._allocators[device].mark_blocks_as_accessed(block_ids, now) - - def mark_blocks_as_computed(self, block_ids: List[int]) -> None: - """Mark blocks as accessed, only use for prefix caching.""" - # Prefix caching only supported on GPU. - device = Device.GPU - return self._allocators[device].mark_blocks_as_computed(block_ids) - - def get_common_computed_block_ids( - self, computed_seq_block_ids: List[List[int]]) -> List[int]: - # Prefix caching only supported on GPU. - device = Device.GPU - return self._allocators[device].get_common_computed_block_ids( - computed_seq_block_ids) - - @property - def all_block_ids(self) -> FrozenSet[int]: - return frozenset(self._block_ids_to_allocator.keys()) - - def get_prefix_cache_hit_rate(self, device: Device) -> float: - """Prefix cache hit rate. -1 means not supported or disabled.""" - assert device in self._allocators - return self._allocators[device].get_prefix_cache_hit_rate() - - def reset_prefix_cache(self, device: Optional[Device] = None) -> bool: - """Reset prefix cache for specified or all devices.""" - if device: - return self._allocators[device].reset_prefix_cache() - success = True - for allocator in self._allocators.values(): - success = success and allocator.reset_prefix_cache() - return success - - def get_and_reset_swaps(self) -> List[Tuple[int, int]]: - """Returns and clears the mapping of source to destination block IDs. - Will be called after every swapping operations for now, and after every - schedule when BlockManagerV2 become default. Currently not useful. - - Returns: - List[Tuple[int, int]]: A mapping of source to destination block IDs. - """ - mapping = self._swap_mapping.copy() - self._swap_mapping.clear() - return list(mapping.items()) - - def find_cached_blocks_prefix( - self, - block_hashes: List[int], - device: Device = Device.GPU, - ) -> List[int]: - return self._allocators[device].find_cached_blocks_prefix(block_hashes) - - -class NullBlock(Block): - """ - Null blocks are used as a placeholders for KV cache blocks that have - been dropped due to sliding window. - This implementation just wraps an ordinary block and prevents it from - being modified. It also allows for testing if a block is NullBlock - via isinstance(). - """ - - def __init__(self, proxy: Block): - super().__init__() - self._proxy = proxy - - def append_token_ids(self, token_ids: List[BlockId]): - raise ValueError("null block should not be modified") - - @property - def block_id(self): - return self._proxy.block_id - - @block_id.setter - def block_id(self, value: Optional[BlockId]): - raise ValueError("null block should not be modified") - - @property - def token_ids(self) -> List[BlockId]: - return self._proxy.token_ids - - @property - def num_tokens_total(self) -> int: - raise NotImplementedError( - "num_tokens_total is not used for null block") - - @property - def num_empty_slots(self) -> BlockId: - return self._proxy.num_empty_slots - - @property - def is_full(self): - return self._proxy.is_full - - @property - def prev_block(self): - return self._proxy.prev_block - - @property - def extra_hash(self): - return None - - @property - def computed(self): - return self._proxy.computed - - @computed.setter - def computed(self, value): - self._proxy.computed = value - - @property - def last_accessed(self) -> float: - return self._proxy.last_accessed - - @last_accessed.setter - def last_accessed(self, last_accessed_ts: float): - self._proxy.last_accessed = last_accessed_ts - - @property - def content_hash(self): - return self._proxy.content_hash diff --git a/vllm/core/block/interfaces.py b/vllm/core/block/interfaces.py deleted file mode 100644 index 1a05881f7c00..000000000000 --- a/vllm/core/block/interfaces.py +++ /dev/null @@ -1,319 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project - -from abc import ABC, abstractmethod -from typing import Dict, FrozenSet, List, Optional, Protocol, Tuple - -from vllm.utils import Device - -BlockId = int - - -class Block(ABC): - - @abstractmethod - def append_token_ids(self, token_ids: List[int]) -> None: - pass - - @property - @abstractmethod - def block_id(self) -> Optional[int]: - pass - - @block_id.setter - @abstractmethod - def block_id(self, value: Optional[int]) -> None: - """NOTE: Do not use this API outside Block.""" - self._block_id = value - - @property - @abstractmethod - def token_ids(self) -> List[int]: - pass - - @property - @abstractmethod - def num_tokens_total(self) -> int: - """The number of tokens till the current block (inclusive) - """ - pass - - @property - @abstractmethod - def num_empty_slots(self) -> int: - pass - - @property - @abstractmethod - def is_full(self) -> bool: - pass - - @property - @abstractmethod - def prev_block(self) -> Optional["Block"]: - pass - - @property - @abstractmethod - def extra_hash(self) -> Optional[int]: - return None - - @property - @abstractmethod - def computed(self) -> bool: - raise NotImplementedError - - @computed.setter - @abstractmethod - def computed(self, value) -> bool: - """Should be only used by PrefixCacingAllocator""" - raise NotImplementedError - - @property - @abstractmethod - def last_accessed(self) -> float: - raise NotImplementedError - - @last_accessed.setter - @abstractmethod - def last_accessed(self, last_accessed_ts: float): - raise NotImplementedError - - class Factory(Protocol): - - @abstractmethod - def __call__( - self, - prev_block: Optional["Block"], - token_ids: List[int], - block_size: int, - allocator: "BlockAllocator", - block_id: Optional[int] = None, - computed: bool = False, - extra_hash: Optional[int] = None, - ) -> "Block": - pass - - @property - @abstractmethod - def content_hash(self) -> Optional[int]: - """Return the content-based hash of the current block, or None if it is - not yet defined or not supported. - - For the content-based hash to be defined, the current block must be - full. - """ - return None - - -class BlockAllocator(ABC): - - @abstractmethod - def allocate_mutable_block(self, prev_block: Optional[Block], - extra_hash: Optional[int]) -> Block: - pass - - @abstractmethod - def allocate_immutable_block(self, prev_block: Optional[Block], - token_ids: List[int], - extra_hash: Optional[int]) -> Block: - pass - - @abstractmethod - def allocate_immutable_blocks(self, prev_block: Optional[Block], - block_token_ids: List[List[int]], - extra_hash: Optional[int]) -> List[Block]: - pass - - @abstractmethod - def free(self, block: Block) -> None: - pass - - @abstractmethod - def fork(self, last_block: Block) -> List[Block]: - pass - - @abstractmethod - def get_num_total_blocks(self) -> int: - pass - - @abstractmethod - def get_num_free_blocks(self) -> int: - pass - - @abstractmethod - def get_physical_block_id(self, absolute_id: int) -> int: - pass - - @abstractmethod - def swap_out(self, blocks: List[Block]) -> None: - pass - - @abstractmethod - def swap_in(self, blocks: List[Block]) -> None: - pass - - @property - @abstractmethod - def all_block_ids(self) -> FrozenSet[int]: - pass - - @abstractmethod - def clear_copy_on_writes(self) -> List[Tuple[int, int]]: - pass - - @abstractmethod - def mark_blocks_as_accessed(self, block_ids: List[int], - now: float) -> None: - pass - - @abstractmethod - def mark_blocks_as_computed(self, block_ids: List[int]) -> None: - pass - - @abstractmethod - def get_common_computed_block_ids( - self, computed_seq_block_ids: List[List[int]]) -> List[int]: - pass - - @abstractmethod - def cow_block_if_not_appendable(self, block: Block) -> BlockId: - """NOTE: This should not be used besides Block""" - pass - - @abstractmethod - def promote_to_immutable_block(self, block: Block) -> BlockId: - """NOTE: This should not be used besides Block""" - pass - - @abstractmethod - def get_num_full_blocks_touched(self, blocks: List[Block]) -> int: - pass - - @abstractmethod - def get_prefix_cache_hit_rate(self) -> float: - """Prefix cache hit rate. -1 means not supported or disabled.""" - pass - - @abstractmethod - def reset_prefix_cache(self) -> bool: - """Reset prefix cache.""" - pass - - class NoFreeBlocksError(ValueError): - pass - - @abstractmethod - def find_cached_blocks_prefix( - self, - block_hashes: List[int], - ) -> List[int]: - pass - - -class DeviceAwareBlockAllocator(ABC): - - @abstractmethod - def allocate_mutable_block(self, - prev_block: Optional[Block], - device: Device, - extra_hash: Optional[int] = None) -> Block: - pass - - @abstractmethod - def allocate_immutable_block(self, - prev_block: Optional[Block], - token_ids: List[int], - device: Device, - extra_hash: Optional[int] = None) -> Block: - pass - - @abstractmethod - def allocate_immutable_blocks( - self, - prev_block: Optional[Block], - block_token_ids: List[List[int]], - device: Device, - extra_hash: Optional[int] = None, - ) -> List[Block]: - pass - - @abstractmethod - def get_num_free_blocks(self, device: Device) -> int: - pass - - @abstractmethod - def get_num_total_blocks(self, device: Device) -> int: - pass - - @abstractmethod - def free(self, block: Block) -> None: - pass - - @abstractmethod - def fork(self, last_block: Block) -> List[Block]: - pass - - @property - @abstractmethod - def all_block_ids(self) -> FrozenSet[int]: - pass - - @abstractmethod - def clear_copy_on_writes(self) -> List[Tuple[int, int]]: - pass - - @abstractmethod - def mark_blocks_as_accessed(self, block_ids: List[int], - now: float) -> None: - pass - - @abstractmethod - def mark_blocks_as_computed(self, block_ids: List[int]) -> None: - pass - - @abstractmethod - def get_common_computed_block_ids( - self, computed_seq_block_ids: List[List[int]]) -> List[int]: - pass - - @abstractmethod - def get_num_full_blocks_touched(self, blocks: List[Block], - device: Device) -> int: - pass - - @abstractmethod - def swap(self, blocks: List[Block], src_device: Device, - dst_device: Device) -> Dict[int, int]: - pass - - @abstractmethod - def get_physical_block_id(self, device: Device, absolute_id: int) -> int: - pass - - @abstractmethod - def allocate_or_get_null_block(self) -> Block: - """ - Null blocks are used as a placeholders for KV cache blocks that have - been dropped due to sliding window. - There is at most one null block per allocator. - """ - pass - - @abstractmethod - def get_prefix_cache_hit_rate(self, device: Device) -> float: - """Prefix cache hit rate. -1 means not supported or disabled.""" - pass - - @abstractmethod - def reset_prefix_cache(self, device: Optional[Device] = None) -> bool: - """Reset prefix cache.""" - pass - - @abstractmethod - def find_cached_blocks_prefix( - self, - block_hashes: List[int], - device: Device = Device.GPU, - ) -> List[int]: - pass diff --git a/vllm/core/block/naive_block.py b/vllm/core/block/naive_block.py deleted file mode 100644 index ae876d131eb6..000000000000 --- a/vllm/core/block/naive_block.py +++ /dev/null @@ -1,466 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project - -from collections import deque -from typing import Deque, FrozenSet, Iterable, List, Optional, Tuple, Union - -from vllm.core.block.common import (BlockPool, CopyOnWriteTracker, RefCounter, - get_all_blocks_recursively) -from vllm.core.block.interfaces import Block, BlockAllocator, BlockId, Device - -Refcount = int - - -class NaiveBlockAllocator(BlockAllocator): - """A simple block allocator that manages blocks of memory without prefix - caching. - - Args: - create_block (Block.Factory): A factory function for creating new - blocks. This is used when a NaiveBlockAllocator is composed within - a prefix caching allocator -- the naive block allocator must - construct prefix caching blocks (but shouldn't know anything else - about them). - num_blocks (int): The total number of blocks to manage. - block_size (int): The size of each block in tokens. - block_ids (Optional[Iterable[int]], optional): An optional iterable of - block IDs. If not provided, block IDs will be assigned sequentially - from 0 to num_blocks - 1. - """ - - def __init__( - self, - create_block: Block.Factory, - num_blocks: int, - block_size: int, - block_ids: Optional[Iterable[int]] = None, - block_pool: Optional[BlockPool] = None, - ): - if block_ids is None: - block_ids = range(num_blocks) - - self._free_block_indices: Deque[BlockId] = deque(block_ids) - self._all_block_indices = frozenset(block_ids) - assert len(self._all_block_indices) == num_blocks - - self._refcounter = RefCounter( - all_block_indices=self._free_block_indices) - self._block_size = block_size - - self._cow_tracker = CopyOnWriteTracker( - refcounter=self._refcounter.as_readonly()) - - if block_pool is None: - extra_factor = 4 - # Pre-allocate "num_blocks * extra_factor" block objects. - # The "* extra_factor" is a buffer to allow more block objects - # than physical blocks - self._block_pool = BlockPool(self._block_size, create_block, self, - num_blocks * extra_factor) - else: - # In this case, the block pool is provided by the caller, - # which means that there is most likely a need to share - # a block pool between allocators - self._block_pool = block_pool - - def allocate_immutable_block(self, - prev_block: Optional[Block], - token_ids: List[int], - extra_hash: Optional[int] = None, - device: Optional[Device] = None) -> Block: - """Allocates a new immutable block with the given token IDs, linked to - the previous block. - - Args: - prev_block (Optional[Block]): The previous block in the sequence. If - None, then the block to be allocated is the first block in the - sequence. - token_ids (List[int]): The token IDs to be stored in the new block. - - Returns: - Block: The newly allocated immutable block. - """ - assert device is None - block = self.allocate_mutable_block(prev_block=prev_block) - block.append_token_ids(token_ids) - return block - - def allocate_immutable_blocks( - self, - prev_block: Optional[Block], - block_token_ids: List[List[int]], - extra_hash: Optional[int] = None, - device: Optional[Device] = None) -> List[Block]: - assert device is None - num_blocks = len(block_token_ids) - - block_ids = [] - for i in range(num_blocks): - block_ids.append(self._allocate_block_id()) - - blocks = [] - for i in range(num_blocks): - prev_block = self._block_pool.init_block( - prev_block=prev_block, - token_ids=block_token_ids[i], - block_size=self._block_size, - physical_block_id=block_ids[i]) - blocks.append(prev_block) - - return blocks - - def allocate_mutable_block(self, - prev_block: Optional[Block], - extra_hash: Optional[int] = None, - device: Optional[Device] = None) -> Block: - """Allocates a new mutable block, linked to the previous block. - - Args: - prev_block (Optional[Block]): The previous block in the sequence. If - None, then the block to be allocated is the first block in the - sequence. - - Returns: - Block: The newly allocated mutable block. - """ - assert device is None - block_id = self._allocate_block_id() - block = self._block_pool.init_block(prev_block=prev_block, - token_ids=[], - block_size=self._block_size, - physical_block_id=block_id) - return block - - def _allocate_block_id(self) -> BlockId: - if not self._free_block_indices: - raise BlockAllocator.NoFreeBlocksError() - - block_id = self._free_block_indices.popleft() - self._refcounter.incr(block_id) - return block_id - - def _free_block_id(self, block: Union[Block, BlockId]) -> None: - if isinstance(block, Block): - block_id = block.block_id - block.block_id = None - else: - block_id = block - assert block_id is not None - - refcount = self._refcounter.decr(block_id) - if refcount == 0: - self._free_block_indices.appendleft(block_id) - - def free(self, block: Block, keep_block_object: bool = False) -> None: - # Release the physical block id - self._free_block_id(block) - - # Release the block object - if not keep_block_object: - self._block_pool.free_block(block) - - def free_block_id(self, block_id: BlockId) -> None: - self._free_block_id(block_id) - - def fork(self, last_block: Block) -> List[Block]: - """Creates a new sequence of blocks that shares the same underlying - memory as the original sequence. - - Args: - last_block (Block): The last block in the original sequence. - - Returns: - List[Block]: The new sequence of blocks that shares the same memory - as the original sequence. - """ - source_blocks = get_all_blocks_recursively(last_block) - - forked_blocks: List[Block] = [] - prev_block = None - for block in source_blocks: - - # Increment refcount for each block. - assert block.block_id is not None - refcount = self._refcounter.incr(block.block_id) - assert refcount != 1, "can't fork freed block" - - forked_block = self._block_pool.init_block( - prev_block=prev_block, - token_ids=block.token_ids, - block_size=self._block_size, - physical_block_id=block.block_id) - - forked_blocks.append(forked_block) - prev_block = forked_blocks[-1] - - return forked_blocks - - def get_num_free_blocks(self) -> int: - return len(self._free_block_indices) - - def get_num_total_blocks(self) -> int: - return len(self._all_block_indices) - - def get_physical_block_id(self, absolute_id: int) -> int: - """Returns the zero-offset block id on certain block allocator - given the absolute block id. - - Args: - absolute_id (int): The absolute block id for the block - in whole allocator. - - Returns: - int: The zero-offset block id on certain device. - """ - return sorted(self._all_block_indices).index(absolute_id) - - @property - def refcounter(self): - return self._refcounter - - @property - def all_block_ids(self) -> FrozenSet[int]: - return self._all_block_indices - - def cow_block_if_not_appendable(self, block: Block) -> BlockId: - """Performs a copy-on-write operation on the given block if it is not - appendable. - - Args: - block (Block): The block to check for copy-on-write. - - Returns: - BlockId: The block index of the new block if a copy-on-write - operation was performed, or the original block index if - no copy-on-write was necessary. - """ - src_block_id = block.block_id - assert src_block_id is not None - - if self._cow_tracker.is_appendable(block): - return src_block_id - - self._free_block_id(block) - trg_block_id = self._allocate_block_id() - - self._cow_tracker.record_cow(src_block_id, trg_block_id) - - return trg_block_id - - def clear_copy_on_writes(self) -> List[Tuple[BlockId, BlockId]]: - """Returns the copy-on-write source->destination mapping and clears it. - - Returns: - List[Tuple[BlockId, BlockId]]: A list mapping source - block indices to destination block indices. - """ - return self._cow_tracker.clear_cows() - - def mark_blocks_as_accessed(self, block_ids: List[int], - now: float) -> None: - """Mark blocks as accessed, used in prefix caching. - - Since the naive allocator does not implement prefix caching, we do - nothing. - """ - pass - - def mark_blocks_as_computed(self, block_ids: List[int]) -> None: - """Mark blocks as computed, used in prefix caching. - - Since the naive allocator does not implement prefix caching, we do - nothing. - """ - pass - - def get_common_computed_block_ids( - self, computed_seq_block_ids: List[List[int]]) -> List[int]: - """Determine blocks that can be skipped in prefill. - - Since the naive allocator does not support prefix caching, always return - an empty list. - """ - return [] - - def promote_to_immutable_block(self, block: Block) -> BlockId: - raise NotImplementedError("There is no promotion for naive blocks") - - def get_num_full_blocks_touched(self, blocks: List[Block]) -> int: - """Returns the number of full blocks that will be touched by - swapping in/out. - - Args: - blocks: List of blocks to be swapped. - Returns: - int: the number of full blocks that will be touched by - swapping in/out the given blocks. Non full blocks are ignored - when deciding the number of blocks to touch. - """ - # NOTE: for naive block, we use set to eliminate common blocks among - # seqs, also we compare the empty slots in the mutable blocks with - # lookahead slots to get the number of unique new block that are - # needed. - old_block_set = set() - for block in blocks: - if block.is_full: - old_block_set.add(block) - return len(old_block_set) - - def swap_out(self, blocks: List[Block]) -> None: - for block in blocks: - self._free_block_id(block) - - def swap_in(self, blocks: List[Block]) -> None: - for block in blocks: - # Here we allocate either immutable or mutable block and then - # extract its block_id. Note that the block object is released - # and the block_id is assigned to "block" to allow reusing the - # existing "block" object - if block.is_full: - tmp_block = self.allocate_immutable_block( - prev_block=block.prev_block, token_ids=block.token_ids) - else: - tmp_block = self.allocate_mutable_block( - prev_block=block.prev_block) - tmp_block.append_token_ids(block.token_ids) - - block_id = tmp_block.block_id - tmp_block.block_id = None - self._block_pool.free_block(tmp_block) - - block.block_id = block_id # Assign block_id - - def get_prefix_cache_hit_rate(self) -> float: - return -1 - - def reset_prefix_cache(self) -> bool: - """No prefix cache for naive block allocator.""" - return True - - def find_cached_blocks_prefix(self, block_hashes: List[int]) -> List[int]: - # Not applicable for naive block allocator. - return [] - - -class NaiveBlock(Block): - """An implementation of the Block class that does not support prefix - caching. - - The NaiveBlock class represents a block of token IDs with a fixed size. It - provides methods for appending token IDs to the block and manages copy-on - -write operations when necessary. - - Args: - prev_block (Block): The previous block in the sequence. - token_ids (List[int]): The initial token IDs to be stored in the block. - block_size (int): The maximum number of token IDs that can be stored in - the block. - allocator (BlockAllocator): The block allocator associated with this - block. - block_id (Optional[int], optional): The physical block index - of this block. Defaults to None, which means no allocation has been - made. - _cow_target (Optional[Block], optional): The copy-on-write target block. - If not provided, it defaults to self. - """ - - def __init__(self, - prev_block: Optional[Block], - token_ids: List[int], - block_size: int, - allocator: BlockAllocator, - block_id: Optional[int] = None, - _cow_target: Optional[Block] = None, - extra_hash: Optional[int] = None): - self._token_ids: List[int] = [] - self._block_size = block_size - self._prev_block = prev_block - self._block_id = block_id - self._allocator = allocator - self._cow_target = _cow_target if _cow_target is not None else self - - self._append_token_ids_no_cow(token_ids) - - def append_token_ids(self, token_ids: List[int]) -> None: - """Appends the given token IDs to the block and performs a - copy-on-write if necessary. - - Args: - token_ids (Optional[List[int]]): The token IDs to be appended - to the block. - """ - self._append_token_ids_no_cow(token_ids) - - if self._block_id is not None: - self._block_id = (self._allocator.cow_block_if_not_appendable( - self._cow_target)) - - def _append_token_ids_no_cow(self, token_ids: List[int]) -> None: - """Appends the given token IDs to the block - - Args: - token_ids (List[int]): The token IDs to be appended to the block. - """ - if len(token_ids) == 0: - return - - assert len(token_ids) <= self.num_empty_slots - - self._token_ids.extend(token_ids) - - @property - def computed(self) -> bool: - raise NotImplementedError - - @computed.setter - def computed(self, value) -> None: - raise NotImplementedError - - @property - def last_accessed(self) -> float: - raise NotImplementedError - - @last_accessed.setter - def last_accessed(self, last_accessed_ts: float): - raise NotImplementedError - - @property - def block_id(self) -> Optional[int]: - return self._block_id - - @block_id.setter - def block_id(self, value: Optional[int]) -> None: - self._block_id = value - - @property - def is_full(self) -> bool: - return self.num_empty_slots == 0 - - @property - def num_empty_slots(self) -> int: - return self._block_size - len(self.token_ids) - - @property - def token_ids(self) -> List[int]: - return self._token_ids - - @property - def num_tokens_total(self) -> int: - raise NotImplementedError( - "num_tokens_total is not used for naive block") - - @property - def block_size(self) -> int: - return self._block_size - - @property - def prev_block(self) -> Optional["Block"]: - return self._prev_block - - @property - def extra_hash(self): - return None - - @property - def content_hash(self) -> Optional[int]: - return None diff --git a/vllm/core/block/prefix_caching_block.py b/vllm/core/block/prefix_caching_block.py deleted file mode 100644 index a21d69323abb..000000000000 --- a/vllm/core/block/prefix_caching_block.py +++ /dev/null @@ -1,1135 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project -"""Token blocks.""" -import sys -from bisect import bisect_left -from os.path import commonprefix -from typing import (Callable, Dict, FrozenSet, Iterable, List, Optional, Set, - Tuple) - -from vllm.core.block.common import (CacheMetricData, CopyOnWriteTracker, - get_all_blocks_recursively) -from vllm.core.block.interfaces import (Block, BlockAllocator, BlockId, Device, - DeviceAwareBlockAllocator) -from vllm.core.block.naive_block import (BlockPool, NaiveBlock, - NaiveBlockAllocator) -from vllm.core.evictor import EvictionPolicy, Evictor, make_evictor -from vllm.logger import init_logger -from vllm.sequence import Sequence - -PrefixHash = int - -# By default, we init our block access time as _DEFAULT_LAST_ACCESSED_TIME -# so that if we find one block is still hold _DEFAULT_LAST_ACCESSED_TIME, -# then we know this block hasn't been accessed yet. -_DEFAULT_LAST_ACCESSED_TIME = -1 - -logger = init_logger(__name__) - - -class BlockTracker: - """Used to track the status of a block inside the prefix caching allocator - """ - __slots__ = ("active", "last_accessed", "computed") - - def reset(self): - self.last_accessed: float = _DEFAULT_LAST_ACCESSED_TIME - self.computed: bool = False - - def __init__(self): - self.active: bool = False - self.reset() - - def enable(self): - assert not self.active - self.active = True - self.reset() - - def disable(self): - assert self.active - self.active = False - self.reset() - - -class PrefixCachingBlockAllocator(BlockAllocator): - """A block allocator that implements prefix caching. - - The PrefixCachingBlockAllocator maintains a cache of blocks based on their - content hash. It reuses blocks with the same content hash to avoid redundant - memory allocation. The allocator also supports copy-on-write operations. - - Args: - num_blocks (int): The total number of blocks to manage. - block_size (int): The size of each block in tokens. - block_ids (Optional[Iterable[int]], optional): An optional iterable of - block IDs. If not provided, block IDs will be assigned sequentially - from 0 to num_blocks - 1. - """ - - # Note that we use 'None' as a string here instead of None because - # as of Python 3.12, hash(None) returns a constant predictable value. - # This could possibly make it easier to find and exploit hash - # collisions. 'None' as a string will be hashed differently per process, - # but consistently within the same process. This is the same as the - # behavior of None prior to Python 3.12. - _none_hash: int = hash('None') - - # Implements Block.Factory. - def __init__( - self, - num_blocks: int, - block_size: int, - block_ids: Optional[Iterable[int]] = None, - eviction_policy: EvictionPolicy = EvictionPolicy.LRU, - ): - if block_ids is None: - block_ids = range(num_blocks) - - self._block_size = block_size - - # A mapping of prefix hash to block index. All blocks which have a - # prefix hash will be in this dict, even if they have refcount 0. - self._cached_blocks: Dict[PrefixHash, BlockId] = {} - - # A list of immutable block IDs that have been touched by scheduler - # and should be marked as computed after an entire batch of sequences - # are scheduled. - self._touched_blocks: Set[BlockId] = set() - - # Used to track status of each physical block id - self._block_tracker: Dict[BlockId, BlockTracker] = {} - for block_id in block_ids: - self._block_tracker[block_id] = BlockTracker() - - # Pre-allocate "num_blocks * extra_factor" block objects. - # The "* extra_factor" is a buffer to allow more block objects - # than physical blocks - extra_factor = 4 - self._block_pool = BlockPool(self._block_size, self._create_block, - self, num_blocks * extra_factor) - - # An allocator for blocks that do not have prefix hashes. - self._hashless_allocator = NaiveBlockAllocator( - create_block=self._create_block, # type: ignore - num_blocks=num_blocks, - block_size=block_size, - block_ids=block_ids, - block_pool=self._block_pool, # Share block pool here - ) - - # Evitor used to maintain how we want to handle those computed blocks - # if we find memory pressure is high. - self.eviction_policy = eviction_policy - self.evictor: Evictor = make_evictor(self.eviction_policy) - - # We share the refcounter between allocators. This allows us to promote - # blocks originally allocated in the hashless allocator to immutable - # blocks. - self._refcounter = self._hashless_allocator.refcounter - - self._cow_tracker = CopyOnWriteTracker( - refcounter=self._refcounter.as_readonly()) - - self.metric_data = CacheMetricData() - - def _create_block( - self, - prev_block: Optional[Block], - token_ids: List[int], - block_size: int, - allocator: BlockAllocator, - block_id: Optional[int] = None, - computed: bool = False, - extra_hash: Optional[int] = None, - ) -> Block: - # Bind block to self. - allocator = self - - return PrefixCachingBlock( - prev_block=prev_block, - token_ids=token_ids, - block_size=block_size, - block_id=block_id, - allocator=allocator, - computed=computed, - extra_hash=extra_hash, - ) - - def allocate_immutable_block(self, - prev_block: Optional[Block], - token_ids: List[int], - extra_hash: Optional[int] = None, - device: Optional[Device] = None) -> Block: - """Allocates an immutable block with the given token IDs, reusing cached - blocks if possible. - - Args: - prev_block (Optional[Block]): The previous block in the sequence. - token_ids (List[int]): The token IDs to be stored in the block. - - Returns: - Block: The allocated immutable block. - """ - assert device is None - assert_prefix_caching_block_or_none(prev_block) - - # First, try to create a block that points to cached data - block = self._block_pool.init_block(prev_block=prev_block, - token_ids=token_ids, - block_size=self._block_size, - physical_block_id=None, - extra_hash=extra_hash) - assert block.content_hash is not None - - cached_block_id = self._cached_blocks.get(block.content_hash, None) - if cached_block_id is not None: - self.metric_data.query(hit=True) - block.block_id = cached_block_id - self._incr_refcount_cached_block(block) - return block - self.metric_data.query(hit=False) - self._block_pool.free_block(block) - - # No cached block => Allocate a new block - block = self.allocate_mutable_block(prev_block, extra_hash=extra_hash) - block.append_token_ids(token_ids) - return block - - def allocate_immutable_blocks( - self, - prev_block: Optional[Block], - block_token_ids: List[List[int]], - extra_hash: Optional[int] = None, - device: Optional[Device] = None) -> List[Block]: - blocks = [] - for token_ids in block_token_ids: - prev_block = self.allocate_immutable_block(prev_block=prev_block, - token_ids=token_ids, - device=device, - extra_hash=extra_hash) - blocks.append(prev_block) - return blocks - - def allocate_mutable_block(self, - prev_block: Optional[Block], - extra_hash: Optional[int] = None, - device: Optional[Device] = None) -> Block: - """Allocates a mutable block. If there are no free blocks, this will - evict unused cached blocks. - - Args: - prev_block (Block): The previous block in the sequence. - None is not allowed unlike it is super class. - - Returns: - Block: The allocated mutable block. - """ - assert device is None - assert_prefix_caching_block_or_none(prev_block) - - block_id = self._allocate_block_id() - block = self._block_pool.init_block(prev_block=prev_block, - token_ids=[], - block_size=self._block_size, - physical_block_id=block_id, - extra_hash=extra_hash) - assert not block.computed - assert block.content_hash is None - return block - - def _incr_refcount_cached_block(self, block: Block) -> None: - # Set this block to be "computed" since it is pointing to a - # cached block id (which was already computed) - block.computed = True - - block_id = block.block_id - assert block_id is not None - - refcount = self._refcounter.incr(block_id) - if refcount == 1: - # In case a cached block was evicted, restore its tracking - if block_id in self.evictor: - self.evictor.remove(block_id) - - self._track_block_id(block_id, computed=True) - - def _decr_refcount_cached_block(self, block: Block) -> None: - # Ensure this is immutable/cached block - assert block.content_hash is not None - - block_id = block.block_id - assert block_id is not None - - refcount = self._refcounter.decr(block_id) - if refcount > 0: - block.block_id = None - return - else: - assert refcount == 0 - - # No longer used - assert block.content_hash in self._cached_blocks - - # Add the cached block to the evictor - # (This keeps the cached block around so it can be reused) - self.evictor.add(block_id, block.content_hash, block.num_tokens_total, - self._block_tracker[block_id].last_accessed) - - # Stop tracking the block - self._untrack_block_id(block_id) - - block.block_id = None - - def _decr_refcount_hashless_block(self, block: Block) -> None: - block_id = block.block_id - assert block_id is not None - - # We may have a fork case where block is shared, - # in which case, we cannot remove it from tracking - refcount = self._refcounter.get(block_id) - if refcount == 1: - self._untrack_block_id(block_id) - - # Decrement refcount of the block_id, but do not free the block object - # itself (will be handled by the caller) - self._hashless_allocator.free(block, keep_block_object=True) - - def _allocate_block_id(self) -> BlockId: - """First tries to allocate a block id from the hashless allocator, - and if there are no blocks, then tries to evict an unused cached block. - """ - hashless_block_id = self._maybe_allocate_hashless_block_id() - if hashless_block_id is not None: - return hashless_block_id - - evicted_block_id = self._maybe_allocate_evicted_block_id() - if evicted_block_id is not None: - return evicted_block_id - - # No block available in hashless allocator, nor in unused cache blocks. - raise BlockAllocator.NoFreeBlocksError() - - def _maybe_allocate_hashless_block_id(self) -> Optional[BlockId]: - try: - # Allocate mutable block and extract its block_id - block = self._hashless_allocator.allocate_mutable_block( - prev_block=None) - block_id = block.block_id - self._block_pool.free_block(block) - - self._track_block_id(block_id, computed=False) - return block_id - except BlockAllocator.NoFreeBlocksError: - return None - - def _maybe_allocate_evicted_block_id(self) -> Optional[BlockId]: - if self.evictor.num_blocks == 0: - return None - - # Here we get an evicted block, which is only added - # into evictor if its ref counter is 0 - # and since its content would be changed, we need - # to remove it from _cached_blocks's tracking list - block_id, content_hash_to_evict = self.evictor.evict() - - # Sanity checks - assert content_hash_to_evict in self._cached_blocks - _block_id = self._cached_blocks[content_hash_to_evict] - assert self._refcounter.get(_block_id) == 0 - assert _block_id == block_id - - self._cached_blocks.pop(content_hash_to_evict) - - self._refcounter.incr(block_id) - self._track_block_id(block_id, computed=False) - - return block_id - - def _free_block_id(self, block: Block) -> None: - """Decrements the refcount of the block. The block may be in two - possible states: (1) immutable/cached or (2) mutable/hashless. - In the first case, the refcount is decremented directly and the block - may be possibly added to the evictor. In other case, hashless - allocator free(..) with keep_block_object=True is called to only free - the block id (since the block object may be reused by the caller) - """ - block_id = block.block_id - assert block_id is not None, "Freeing unallocated block is undefined" - - if block.content_hash is not None: - # Immutable: This type of block is always cached, and we want to - # keep it in the evictor for future reuse - self._decr_refcount_cached_block(block) - else: - # Mutable: This type of block is not cached, so we release it - # directly to the hashless allocator - self._decr_refcount_hashless_block(block) - - assert block.block_id is None - - def free(self, block: Block, keep_block_object: bool = False) -> None: - """Release the block (look at free_block_id(..) docs) - """ - # Release the physical block index - self._free_block_id(block) - - # Release the block object to the pool - if not keep_block_object: - self._block_pool.free_block(block) - - def fork(self, last_block: Block) -> List[Block]: - """Creates a new sequence of blocks that shares the same underlying - memory as the original sequence. - - Args: - last_block (Block): The last block in the original sequence. - - Returns: - List[Block]: The new sequence of blocks that shares the same memory - as the original sequence. - """ - source_blocks = get_all_blocks_recursively(last_block) - - forked_blocks: List[Block] = [] - prev_block = None - for block in source_blocks: - block_id = block.block_id - assert block_id is not None - - refcount = self._refcounter.incr(block_id) - assert refcount != 1, "can't fork free'd block_id = {}".format( - block_id) - - forked_block = self._block_pool.init_block( - prev_block=prev_block, - token_ids=block.token_ids, - block_size=self._block_size, - physical_block_id=block_id, - extra_hash=block.extra_hash) - - forked_blocks.append(forked_block) - prev_block = forked_blocks[-1] - - return forked_blocks - - def get_num_free_blocks(self, device: Optional[Device] = None) -> int: - assert device is None - # The number of free blocks is the number of hashless free blocks - # plus the number of blocks evictor could free from its list. - return self._hashless_allocator.get_num_free_blocks( - ) + self.evictor.num_blocks - - def get_num_total_blocks(self) -> int: - return self._hashless_allocator.get_num_total_blocks() - - def get_physical_block_id(self, absolute_id: int) -> int: - """Returns the zero-offset block id on certain block allocator - given the absolute block id. - - Args: - absolute_id (int): The absolute block id for the block - in whole allocator. - - Returns: - int: The rzero-offset block id on certain device. - """ - return sorted(self.all_block_ids).index(absolute_id) - - @property - def all_block_ids(self) -> FrozenSet[int]: - return self._hashless_allocator.all_block_ids - - def get_prefix_cache_hit_rate(self) -> float: - return self.metric_data.get_hit_rate() - - def reset_prefix_cache(self) -> bool: - """Reset prefix cache. This function may be used in RLHF - flows to invalid prefix caching after the weights are updated, - or used for resetting prefix caching status for benchmarking. - - Returns: - bool: True if the prefix cache is successfully reset, - False otherwise. - """ - num_used_blocks = (self.get_num_total_blocks() - - self.get_num_free_blocks()) - if num_used_blocks > 0: - logger.warning( - "Failed to reset prefix cache because some " - "blocks (%d) are not freed yet", num_used_blocks) - return False - - # Free all blocks in the evictor. - while (block_id := - self._maybe_allocate_evicted_block_id()) is not None: - self._hashless_allocator.free_block_id(block_id) - - # Should not have any cached blocks because all blocks are evicted. - assert not self._cached_blocks - - # Reset the evictor. - self.evictor = make_evictor(self.eviction_policy) - - # Reset the block tracker. - for block_id in self._block_tracker: - self._block_tracker[block_id] = BlockTracker() - - # Reset the metrics. - self.metric_data = CacheMetricData() - - logger.info("Successfully reset prefix cache") - return True - - def is_block_cached(self, block: Block) -> bool: - assert block.content_hash is not None - return block.content_hash in self._cached_blocks - - def promote_to_immutable_block(self, block: Block) -> BlockId: - """Once a mutable block is full, it can be promoted to an immutable - block. This means that its content can be referenced by future blocks - having the same prefix. - - Note that if we already have a cached block with the same content, we - will replace the newly-promoted block's mapping with the existing cached - block id. - - Args: - block: The mutable block to be promoted. - - Returns: - BlockId: Either the original block index, or the block index of - the previously cached block matching the same content. - """ - # Ensure block can be promoted - assert block.content_hash is not None - assert block.block_id is not None - assert self._refcounter.get(block.block_id) > 0 - - if block.content_hash not in self._cached_blocks: - # No cached content hash => Set this block as cached. - # Note that this block cannot be marked as computed yet - # because other sequences in the same batch cannot reuse - # this block. - self._cached_blocks[block.content_hash] = block.block_id - # Mark this block as touched so that it can be marked as - # computed after the entire batch of sequences are scheduled. - self._touched_blocks.add(block.block_id) - return block.block_id - - # Reuse the cached content hash - self._decr_refcount_hashless_block(block) - block.block_id = self._cached_blocks[block.content_hash] - - # Increment refcount of the cached block and (possibly) restore - # it from the evictor. - # Note that in this case, the block is marked as computed - self._incr_refcount_cached_block(block) - - return block.block_id - - def cow_block_if_not_appendable(self, block: Block) -> BlockId: - """Performs a copy-on-write operation on the given block if it is not - appendable. - - Args: - block (Block): The block to check for copy-on-write. - - Returns: - BlockId: The block index of the new block if a copy-on-write - operation was performed, or the original block index if - no copy-on-write was necessary. - """ - src_block_id = block.block_id - assert src_block_id is not None - - if self._cow_tracker.is_appendable(block): - return src_block_id - - self._free_block_id(block) - trg_block_id = self._allocate_block_id() - - self._cow_tracker.record_cow(src_block_id, trg_block_id) - - return trg_block_id - - def clear_copy_on_writes(self) -> List[Tuple[BlockId, BlockId]]: - """Returns the copy-on-write source->destination mapping and clears it. - - Returns: - List[Tuple[BlockId, BlockId]]: A list mapping source - block indices to destination block indices. - """ - return self._cow_tracker.clear_cows() - - def mark_blocks_as_accessed(self, block_ids: List[int], - now: float) -> None: - """Mark blocks as accessed, used in prefix caching. - - If the block is added into evictor, we need to update corresponding - info in evictor's metadata. - """ - - for block_id in block_ids: - if self._block_tracker[block_id].active: - self._block_tracker[block_id].last_accessed = now - elif block_id in self.evictor: - self.evictor.update(block_id, now) - else: - raise ValueError( - "Mark block as accessed which is not belonged to GPU") - - def mark_blocks_as_computed(self, block_ids: List[int]) -> None: - # Mark all touched blocks as computed. - for block_id in self._touched_blocks: - self._block_tracker[block_id].computed = True - self._touched_blocks.clear() - - def _track_block_id(self, block_id: Optional[BlockId], - computed: bool) -> None: - assert block_id is not None - self._block_tracker[block_id].enable() - self._block_tracker[block_id].computed = computed - - def _untrack_block_id(self, block_id: Optional[BlockId]) -> None: - assert block_id is not None - self._block_tracker[block_id].disable() - - def block_is_computed(self, block_id: int) -> bool: - if self._block_tracker[block_id].active: - return self._block_tracker[block_id].computed - else: - return block_id in self.evictor - - def get_common_computed_block_ids( - self, computed_seq_block_ids: List[List[int]]) -> List[int]: - """Return the block ids that are common for a given sequence group. - - Only those blocks that are immutable and already be marked - compyted would be taken consideration. - """ - - # NOTE We exclude the last block to avoid the case where the entire - # prompt is cached. This would cause erroneous behavior in model - # runner. - - # It returns a list of int although type annotation says list of string. - if len(computed_seq_block_ids) == 1: - return computed_seq_block_ids[0] - - return commonprefix([ - ids for ids in computed_seq_block_ids # type: ignore - if ids - ]) - - def get_num_full_blocks_touched(self, blocks: List[Block]) -> int: - """Returns the number of full blocks that will be touched by - swapping in/out. - - Args: - blocks: List of blocks to be swapped. - Returns: - int: the number of full blocks that will be touched by - swapping in/out the given blocks. Non full blocks are ignored - when deciding the number of blocks to touch. - """ - num_touched_blocks: int = 0 - for block in blocks: - # If the block has a match in the cache and the cached - # block is not referenced, then we still count it as a - # touched block - if block.is_full and (not self.is_block_cached(block) or \ - (block.content_hash is not None and \ - self._cached_blocks[block.content_hash] in \ - self.evictor)): - num_touched_blocks += 1 - return num_touched_blocks - - def swap_out(self, blocks: List[Block]) -> None: - """Execute the swap out actions. Basically just free the - given blocks. - - Args: - blocks: List of blocks to be swapped out. - """ - for block in blocks: - self._free_block_id(block) - - def swap_in(self, blocks: List[Block]) -> None: - """Execute the swap in actions. Change the block id from - old allocator to current allocator for each block to finish - the block table update. - - Args: - blocks: List of blocks to be swapped in. - """ - for block in blocks: - # Here we allocate either immutable or mutable block and then - # extract its block_id. Note that the block object is released - # and the block_id is assigned to "block" to allow reusing the - # existing "block" object - if block.is_full: - tmp_block = self.allocate_immutable_block( - prev_block=block.prev_block, - token_ids=block.token_ids, - extra_hash=block.extra_hash) - else: - tmp_block = self.allocate_mutable_block( - prev_block=block.prev_block, extra_hash=block.extra_hash) - tmp_block.append_token_ids(block.token_ids) - - block_id = tmp_block.block_id - self._block_pool.free_block(tmp_block) - - block.block_id = block_id # Assign block_id - - def find_cached_blocks_prefix(self, block_hashes: List[int]) -> List[int]: - """ - Given a list of block hashes, return the prefix of the block hashes that - are all cached. - - Since a block's block hash includes the hashes of all previous blocks, - and we only allocate/deallocate blocks in the entire sequence, so if a - block is cached, then all previous blocks are also cached. With this - property, we can use binary search to find the prefix of cached blocks. - - Args: - block_hashes (List[int]): The list of block hashes. - - Returns: - List[int]: The prefix of the `block_hashes` that are cached. - """ - - def _block_is_cached(block_hash: PrefixHash) -> bool: - if block_hash not in self._cached_blocks: - return False - - cached_block_id = self._cached_blocks[block_hash] - # We only consider the blocks that are marked as computed. - return self.block_is_computed(cached_block_id) - - def _bisect_left(a, x, key: Callable[[PrefixHash], bool]) -> int: - - # python <= 3.10 don't have the key argument - if sys.version_info < (3, 10): - a = [key(e) for e in a] - return bisect_left(a, x) - else: - return bisect_left(a, x, key=key) - - # Look for the first block that's not cached, and returns the prefix - # i.e. blocks that are cached. - idx = _bisect_left(block_hashes, - True, - key=lambda x: not _block_is_cached(x)) - return block_hashes[:idx] - - -class PrefixCachingBlock(Block): - """A block implementation that supports prefix caching. - - The PrefixCachingBlock class represents a block of token IDs with prefix - caching capabilities. It wraps a NaiveBlock internally and provides - additional functionality for content hashing and promoting immutable blocks - with the prefix caching allocator. - - Args: - prev_block (Optional[PrefixCachingBlock]): The previous block in the - sequence. - token_ids (List[int]): The initial token IDs to be stored in the block. - block_size (int): The maximum number of token IDs that can be stored in - the block. - allocator (BlockAllocator): The prefix - caching block allocator associated with this block. - block_id (Optional[int], optional): The physical block index - of this block. Defaults to None. - extra_hash (Optional[int]): The hash value of additional factors - such as adapters that influence the block, apart from the token_ids. - """ - - # Note that we use 'None' as a string here instead of None because - # as of Python 3.12, hash(None) returns a constant predictable value. - # This could possibly make it easier to find and exploit hash - # collisions. 'None' as a string will be hashed differently per process, - # but consistently within the same process. This is the same as the - # behavior of None prior to Python 3.12. - _none_hash: int = hash('None') - - def __init__( - self, - prev_block: Optional[Block], - token_ids: List[int], - block_size: int, - allocator: BlockAllocator, - block_id: Optional[int] = None, - computed: bool = False, - extra_hash: Optional[int] = None, - ): - assert isinstance(allocator, PrefixCachingBlockAllocator), ( - "Currently this class is only tested with " - "PrefixCachingBlockAllocator. Got instead allocator = {}".format( - allocator)) - assert_prefix_caching_block_or_none(prev_block) - - self._prev_block = prev_block - self._cached_content_hash: Optional[int] = None - self._cached_num_tokens_total: int = 0 - self._allocator = allocator - self._last_accessed: float = _DEFAULT_LAST_ACCESSED_TIME - self._computed = computed - self._extra_hash = extra_hash - - # On the first time, we create the block object, and next we only - # reinitialize it - if hasattr(self, "_block"): - self._block.__init__( # type: ignore[has-type] - prev_block=prev_block, - token_ids=token_ids, - block_size=block_size, - block_id=block_id, - allocator=self._allocator) - else: - self._block = NaiveBlock(prev_block=prev_block, - token_ids=token_ids, - block_size=block_size, - block_id=block_id, - allocator=self._allocator) - - self._update_num_tokens_total() - - def _update_num_tokens_total(self): - """Incrementally computes the number of tokens that there is - till the current block (included) - """ - res = 0 - - # Add all previous blocks - if self._prev_block is not None: - res += self._prev_block.num_tokens_total - - # Add current block - res += len(self.token_ids) - - self._cached_num_tokens_total = res - - @property - def computed(self) -> bool: - return self._computed - - @computed.setter - def computed(self, value) -> None: - self._computed = value - - @property - def last_accessed(self) -> float: - return self._last_accessed - - @last_accessed.setter - def last_accessed(self, last_accessed_ts: float): - self._last_accessed = last_accessed_ts - - def append_token_ids(self, token_ids: List[int]) -> None: - """Appends the given token IDs to the block and registers the block as - immutable if the block becomes full. - - Args: - token_ids (List[int]): The token IDs to be appended to the block. - """ - # Ensure this is mutable block (not promoted) - assert self.content_hash is None - assert not self.computed - - if len(token_ids) == 0: - return - - # Ensure there are input tokens - assert token_ids, "Got token_ids = {}".format(token_ids) - - # Naive block handles CoW. - self._block.append_token_ids(token_ids) - self._update_num_tokens_total() - - # If the content hash is present, then the block can be made immutable. - # Register ourselves with the allocator, potentially replacing the - # physical block index. - if self.content_hash is not None: - self.block_id = self._allocator.promote_to_immutable_block(self) - - @property - def block_id(self) -> Optional[int]: - return self._block.block_id - - @block_id.setter - def block_id(self, value) -> None: - self._block.block_id = value - - @property - def is_full(self) -> bool: - return self._block.is_full - - @property - def num_empty_slots(self) -> int: - return self._block.num_empty_slots - - @property - def num_tokens_total(self) -> int: - return self._cached_num_tokens_total - - @property - def block_size(self) -> int: - return self._block.block_size - - @property - def token_ids(self) -> List[int]: - return self._block.token_ids - - @property - def prev_block(self) -> Optional[Block]: - return self._prev_block - - @property - def extra_hash(self) -> Optional[int]: - return self._extra_hash - - @property - def content_hash(self) -> Optional[int]: - """Return the content-based hash of the current block, or None if it is - not yet defined. - - For the content-based hash to be defined, the current block must be - full. - """ - # If the hash is already computed, return it. - if self._cached_content_hash is not None: - return self._cached_content_hash - - # We cannot compute a hash for the current block because it is not full. - if not self.is_full: - return None - - is_first_block = self._prev_block is None - prev_block_hash = ( - self._none_hash if is_first_block else - self._prev_block.content_hash # type: ignore - ) - - # Previous block exists but does not yet have a hash. - # Return no hash in this case. - if prev_block_hash == self._none_hash and not is_first_block: - return None - - self._cached_content_hash = PrefixCachingBlock.hash_block_tokens( - is_first_block, - prev_block_hash, - cur_block_token_ids=self.token_ids, - extra_hash=self._extra_hash) - return self._cached_content_hash - - @classmethod - def hash_block_tokens(cls, - is_first_block: bool, - prev_block_hash: Optional[int], - cur_block_token_ids: List[int], - extra_hash: Optional[int] = None) -> int: - """Computes a hash value corresponding to the contents of a block and - the contents of the preceding block(s). The hash value is used for - prefix caching. - - Parameters: - - is_first_block (bool): A flag indicating if the block is the first in - the sequence. - - prev_block_hash (Optional[int]): The hash of the previous block. None - if this is the first block. - - cur_block_token_ids (List[int]): A list of token ids in the current - block. The current block is assumed to be full. - - extra_hash (Optional[int]): The hash value of additional factors - such as adapters that influence the block, apart from the token_ids. - - Returns: - - int: The computed hash value for the block. - """ - if is_first_block and prev_block_hash is None: - prev_block_hash = cls._none_hash - return hash((is_first_block, prev_block_hash, *cur_block_token_ids, - extra_hash)) - - -class ComputedBlocksTracker: - """ - Tracks the computed blocks for each sequence. - - Internally, it maintains a map from sequence id to the list of block hashes - for the sequence. We cache the hashes of the full blocks for each sequence, - and make sure the hash is calculated in the same way as the allocator. - When a sequence is being decoded, we also update the sequence's hash - accordingly and incrementally. - - From the sequence hash, with prefix caching enabled, we could also calculate - the number of cached tokens for the sequence by looking up the number of - cached block hashes in the allocator. - """ - - # Note that we use 'None' as a string here instead of None because - # as of Python 3.12, hash(None) returns a constant predictable value. - # This could possibly make it easier to find and exploit hash - # collisions. 'None' as a string will be hashed differently per process, - # but consistently within the same process. This is the same as the - # behavior of None prior to Python 3.12. - _none_hash: int = hash('None') - - def __init__( - self, - allocator: DeviceAwareBlockAllocator, - block_size: int, - enable_caching: bool, - ): - self._allocator = allocator - self._block_size = block_size - self._enable_caching = enable_caching - - # A map from seq_id to the list of block hashes for the - # sequence. This is so that we don't have to recompute the block hashes - # for the sequence when we need to check if the sequence is cached. - # Note a block that's not full will not have its hash calculated and - # recorded. - self._seq_id_to_blocks_hashes: Dict[int, List[int]] = {} - - # A map from seq_id to the number of tokens that are cached for the - # sequence. - # We need this so that a sequence in continuous prefill doesn't - # accidentally see its cached token count change. See comments in - # `get_num_cached_tokens` for more details. - self._seq_id_to_num_tokens_computed: Dict[int, int] = {} - - def _update_seq_hashes(self, seq: Sequence) -> None: - """Incrementally update the sequence's block hashes and record them.""" - assert self._enable_caching - - block_hashes_recorded = self._seq_id_to_blocks_hashes.get( - seq.seq_id, []) - cur_num_blocks_recorded = len(block_hashes_recorded) - token_ids = seq.get_token_ids() - assert len(token_ids) >= cur_num_blocks_recorded * self._block_size, ( - f"The sequence has {len(token_ids)} tokens, but" - f" already recorded {cur_num_blocks_recorded} blocks. " - "This should not happen since we assume blocks are " - "only appended other than recomputation. When the sequence is " - "recomputed, we should have removed the info of the old blocks.") - # Update the computed block hashes for the sequence. Since only full - # blocks are considered as "computed", we take floor here. - num_computed_blocks = len(token_ids) // self._block_size - - # We need to know the hash of the previous block to compute the hash of - # the current block so that blocks could be uniquely identified across - # sequences of prefixes. - prev_block_hash = (self._none_hash if cur_num_blocks_recorded == 0 else - block_hashes_recorded[-1]) - # Only update the computed block hashes for the new blocks - for i in range(cur_num_blocks_recorded, num_computed_blocks): - assert len(token_ids) >= (i + 1) * self._block_size - block_token_ids = token_ids[i * self._block_size:(i + 1) * - self._block_size] - - # NOTE: If there are any factors affecting the block besides - # token_ids, they should be added as input to extra_hash. - extra_hash = seq.extra_hash() - - # This has to be kept in sync with the allocator's hash - # calculation. - block_hash = PrefixCachingBlock.hash_block_tokens( - is_first_block=prev_block_hash == self._none_hash, - prev_block_hash=prev_block_hash, - cur_block_token_ids=block_token_ids, - extra_hash=extra_hash, - ) - block_hashes_recorded.append(block_hash) - prev_block_hash = block_hash - - self._seq_id_to_blocks_hashes[seq.seq_id] = block_hashes_recorded - - def get_num_cached_tokens(self, seq: Sequence) -> int: - if not self._enable_caching: - return 0 - - # We always try to update the sequence hashes on the fly. - # This is to ensure that we don't miss any cached tokens for the - # sequence during decode. - # This routine should only update hash for any new blocks too. - self._update_seq_hashes(seq) - - num_computed_tokens_prev = self._seq_id_to_num_tokens_computed.get( - seq.seq_id, None) - - # TODO(rickyx): This hack could be removed once we mark blocks as - # computed correctly with chunked prefills. - if num_computed_tokens_prev is not None and seq.is_prefill(): - # For a sequence that is still in prefill, we don't - # recompute the number of cached tokens. - # This also handles correctly chunked prefill since currently - # we mark blocks as computed even if the sequence is still partially - # prefilled. So a continuously prefilled sequence should not - # see its cached token count change while running. - return num_computed_tokens_prev - - block_hashes = self._seq_id_to_blocks_hashes[seq.seq_id] - - # This is O(logN), where N is the number of blocks. - num_cached_blocks = len( - self._allocator.find_cached_blocks_prefix(block_hashes)) - num_cached_tokens = num_cached_blocks * self._block_size - self._seq_id_to_num_tokens_computed[seq.seq_id] = num_cached_tokens - return num_cached_tokens - - def remove_seq(self, seq_id: int) -> None: - """Stop tracking the sequence.""" - if not self._enable_caching: - return - assert seq_id in self._seq_id_to_blocks_hashes - del self._seq_id_to_blocks_hashes[seq_id] - - assert seq_id in self._seq_id_to_num_tokens_computed - del self._seq_id_to_num_tokens_computed[seq_id] - - -class LastAccessBlocksTracker: - """Manages the last access time of the tracked sequences, in order to allow - an efficient update of allocator's block last access times - """ - - def __init__(self, allocator): - self._allocator = allocator - self._seq_last_access: Dict[int, Optional[float]] = {} - - def add_seq(self, seq_id: int) -> None: - """Start tracking seq_id - """ - assert seq_id not in self._seq_last_access - self._seq_last_access[seq_id] = None - - def remove_seq(self, seq_id: int) -> None: - """Stop tracking seq_id - """ - assert seq_id in self._seq_last_access - del self._seq_last_access[seq_id] - - def update_last_access(self, seq_id: int, time: float) -> None: - assert seq_id in self._seq_last_access - self._seq_last_access[seq_id] = time - - def update_seq_blocks_last_access(self, seq_id: int, - block_ids: List[int]) -> None: - assert seq_id in self._seq_last_access - - ts = self._seq_last_access[seq_id] - - if ts is None: - # No last access was recorded, no need to update. - return - - self._allocator.mark_blocks_as_accessed(block_ids, ts) - - -def assert_prefix_caching_block_or_none(block: Optional[Block]): - if block is None: - return - assert isinstance(block, - PrefixCachingBlock), "Got block = {}".format(block) diff --git a/vllm/core/block/utils.py b/vllm/core/block/utils.py deleted file mode 100644 index e933c6ee7c8b..000000000000 --- a/vllm/core/block/utils.py +++ /dev/null @@ -1,28 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project -"""Block manager utils.""" -from vllm.sequence import SequenceGroup -from vllm.utils import (STR_NOT_IMPL_ENC_DEC_PREFIX_CACHE, - STR_NOT_IMPL_ENC_DEC_SWA) - - -def check_no_caching_or_swa_for_blockmgr_encdec( - block_mgr, seq_group: SequenceGroup) -> None: - ''' - Enforce that prefix caching & sliding-window attention (SWA) - are currently unsupported *specifically* for encoder/decoder models. - - Raises NotImplementedError if unsupported scenario is detected. - - Arguments: - - * block_mgr: BlockSpaceManager instance - * seq_group: SequenceGroup passed to block_mgr - ''' - - if seq_group.is_encoder_decoder(): - if block_mgr.max_block_sliding_window is not None: - raise NotImplementedError(STR_NOT_IMPL_ENC_DEC_SWA) - - if block_mgr.enable_caching: - raise NotImplementedError(STR_NOT_IMPL_ENC_DEC_PREFIX_CACHE) diff --git a/vllm/core/block_manager.py b/vllm/core/block_manager.py deleted file mode 100644 index cbfa4d7ff3c4..000000000000 --- a/vllm/core/block_manager.py +++ /dev/null @@ -1,523 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project -"""A block manager that manages token blocks.""" -from typing import Dict, List, Optional -from typing import Sequence as GenericSequence -from typing import Tuple - -from vllm.core.block.block_table import BlockTable -from vllm.core.block.cpu_gpu_block_allocator import CpuGpuBlockAllocator -from vllm.core.block.interfaces import Block -from vllm.core.block.prefix_caching_block import (ComputedBlocksTracker, - LastAccessBlocksTracker) -from vllm.core.block.utils import check_no_caching_or_swa_for_blockmgr_encdec -from vllm.core.interfaces import AllocStatus, BlockSpaceManager -from vllm.sequence import Sequence, SequenceGroup, SequenceStatus -from vllm.utils import Device - -SeqId = int -EncoderSeqId = str - - -class SelfAttnBlockSpaceManager(BlockSpaceManager): - """BlockSpaceManager which manages the allocation of KV cache. - - It owns responsibility for allocation, swapping, allocating memory for - autoregressively-generated tokens, and other advanced features such as - prefix caching, forking/copy-on-write, and sliding-window memory allocation. - - This class implements the design described in - https://github.com/vllm-project/vllm/pull/3492. - - Lookahead slots - The block manager has the notion of a "lookahead slot". These are slots - in the KV cache that are allocated for a sequence. Unlike the other - allocated slots, the content of these slots is undefined -- the worker - may use the memory allocations in any way. - - In practice, a worker could use these lookahead slots to run multiple - forward passes for a single scheduler invocation. Each successive - forward pass would write KV activations to the corresponding lookahead - slot. This allows low inter-token latency use-cases, where the overhead - of continuous batching scheduling is amortized over >1 generated tokens. - - Speculative decoding uses lookahead slots to store KV activations of - proposal tokens. - - See https://github.com/vllm-project/vllm/pull/3250 for more information - on lookahead scheduling. - - Args: - block_size (int): The size of each memory block. - num_gpu_blocks (int): The number of memory blocks allocated on GPU. - num_cpu_blocks (int): The number of memory blocks allocated on CPU. - watermark (float, optional): The threshold used for memory swapping. - Defaults to 0.01. - sliding_window (Optional[int], optional): The size of the sliding - window. Defaults to None. - enable_caching (bool, optional): Flag indicating whether caching is - enabled. Defaults to False. - """ - - def __init__( - self, - block_size: int, - num_gpu_blocks: int, - num_cpu_blocks: int, - watermark: float = 0.01, - sliding_window: Optional[int] = None, - enable_caching: bool = False, - ) -> None: - self.block_size = block_size - self.num_total_gpu_blocks = num_gpu_blocks - self.num_total_cpu_blocks = num_cpu_blocks - - self.sliding_window = sliding_window - # max_block_sliding_window is the max number of blocks that need to be - # allocated - self.max_block_sliding_window = None - if sliding_window is not None: - # +1 here because // rounds down - num_blocks = sliding_window // block_size + 1 - # +1 here because the last block may not be full, - # and so the sequence stretches one more block at the beginning - # For example, if sliding_window is 3 and block_size is 4, - # we may need 2 blocks when the second block only holds 1 token. - self.max_block_sliding_window = num_blocks + 1 - - self.watermark = watermark - assert watermark >= 0.0 - - self.enable_caching = enable_caching - - self.watermark_blocks = int(watermark * num_gpu_blocks) - - self.block_allocator = CpuGpuBlockAllocator.create( - allocator_type="prefix_caching" if enable_caching else "naive", - num_gpu_blocks=num_gpu_blocks, - num_cpu_blocks=num_cpu_blocks, - block_size=block_size, - ) - - self.block_tables: Dict[SeqId, BlockTable] = {} - self.cross_block_tables: Dict[EncoderSeqId, BlockTable] = {} - - self._computed_blocks_tracker = ComputedBlocksTracker( - self.block_allocator, self.block_size, self.enable_caching) - self._last_access_blocks_tracker = LastAccessBlocksTracker( - self.block_allocator) - - def can_allocate(self, - seq_group: SequenceGroup, - num_lookahead_slots: int = 0) -> AllocStatus: - # FIXME(woosuk): Here we assume that all sequences in the group share - # the same prompt. This may not be true for preempted sequences. - - check_no_caching_or_swa_for_blockmgr_encdec(self, seq_group) - - seq = seq_group.get_seqs(status=SequenceStatus.WAITING)[0] - num_required_blocks = BlockTable.get_num_required_blocks( - seq.get_token_ids(), - block_size=self.block_size, - num_lookahead_slots=num_lookahead_slots, - ) - - if seq_group.is_encoder_decoder(): - encoder_seq = seq_group.get_encoder_seq() - assert encoder_seq is not None - num_required_blocks += BlockTable.get_num_required_blocks( - encoder_seq.get_token_ids(), - block_size=self.block_size, - ) - - if self.max_block_sliding_window is not None: - num_required_blocks = min(num_required_blocks, - self.max_block_sliding_window) - - num_free_gpu_blocks = self.block_allocator.get_num_free_blocks( - device=Device.GPU) - - # Use watermark to avoid frequent cache eviction. - if (self.num_total_gpu_blocks - num_required_blocks - < self.watermark_blocks): - return AllocStatus.NEVER - if num_free_gpu_blocks - num_required_blocks >= self.watermark_blocks: - return AllocStatus.OK - else: - return AllocStatus.LATER - - def _allocate_sequence(self, seq: Sequence) -> BlockTable: - block_table = BlockTable( - block_size=self.block_size, - block_allocator=self.block_allocator, - max_block_sliding_window=self.max_block_sliding_window, - ) - if seq.get_token_ids(): - # NOTE: If there are any factors affecting the block besides - # token_ids, they should be added as input to extra_hash. - extra_hash = seq.extra_hash() - - # Add blocks to the block table only if the sequence is non empty. - block_table.allocate(token_ids=seq.get_token_ids(), - extra_hash=extra_hash) - - return block_table - - def allocate(self, seq_group: SequenceGroup) -> None: - - # Allocate self-attention block tables for decoder sequences - waiting_seqs = seq_group.get_seqs(status=SequenceStatus.WAITING) - assert not (set(seq.seq_id for seq in waiting_seqs) - & self.block_tables.keys()), "block table already exists" - - # NOTE: Here we assume that all sequences in the group have the same - # prompt. - seq = waiting_seqs[0] - block_table: BlockTable = self._allocate_sequence(seq) - self.block_tables[seq.seq_id] = block_table - - # Track seq - self._last_access_blocks_tracker.add_seq(seq.seq_id) - - # Assign the block table for each sequence. - for seq in waiting_seqs[1:]: - self.block_tables[seq.seq_id] = block_table.fork() - - # Track seq - self._last_access_blocks_tracker.add_seq(seq.seq_id) - - # Allocate cross-attention block table for encoder sequence - # - # NOTE: Here we assume that all sequences in the group have the same - # encoder prompt. - request_id = seq_group.request_id - - assert (request_id - not in self.cross_block_tables), \ - "block table already exists" - - check_no_caching_or_swa_for_blockmgr_encdec(self, seq_group) - - if seq_group.is_encoder_decoder(): - encoder_seq = seq_group.get_encoder_seq() - assert encoder_seq is not None - block_table = self._allocate_sequence(encoder_seq) - self.cross_block_tables[request_id] = block_table - - def can_append_slots(self, seq_group: SequenceGroup, - num_lookahead_slots: int) -> bool: - """Determine if there is enough space in the GPU KV cache to continue - generation of the specified sequence group. - - We use a worst-case heuristic: assume each touched block will require a - new allocation (either via CoW or new block). We can append slots if the - number of touched blocks is less than the number of free blocks. - - "Lookahead slots" are slots that are allocated in addition to the slots - for known tokens. The contents of the lookahead slots are not defined. - This is used by speculative decoding when speculating future tokens. - """ - - num_touched_blocks = 0 - for seq in seq_group.get_seqs(status=SequenceStatus.RUNNING): - block_table = self.block_tables[seq.seq_id] - - num_touched_blocks += ( - block_table.get_num_blocks_touched_by_append_slots( - token_ids=block_table.get_unseen_token_ids( - seq.get_token_ids()), - num_lookahead_slots=num_lookahead_slots, - )) - - num_free_gpu_blocks = self.block_allocator.get_num_free_blocks( - Device.GPU) - return num_touched_blocks <= num_free_gpu_blocks - - def append_slots( - self, - seq: Sequence, - num_lookahead_slots: int, - ) -> List[Tuple[int, int]]: - - block_table = self.block_tables[seq.seq_id] - - block_table.append_token_ids( - token_ids=block_table.get_unseen_token_ids(seq.get_token_ids()), - num_lookahead_slots=num_lookahead_slots, - num_computed_slots=seq.data.get_num_computed_tokens(), - extra_hash=seq.extra_hash(), - ) - # Return any new copy-on-writes. - new_cows = self.block_allocator.clear_copy_on_writes() - return new_cows - - def free(self, seq: Sequence) -> None: - seq_id = seq.seq_id - - if seq_id not in self.block_tables: - # Already freed or haven't been scheduled yet. - return - - # Update seq block ids with the latest access time - self._last_access_blocks_tracker.update_seq_blocks_last_access( - seq_id, self.block_tables[seq.seq_id].physical_block_ids) - - # Untrack seq - self._last_access_blocks_tracker.remove_seq(seq_id) - self._computed_blocks_tracker.remove_seq(seq_id) - - # Free table/blocks - self.block_tables[seq_id].free() - del self.block_tables[seq_id] - - def remove_seq_from_computed_blocks_tracker(self, seq: Sequence) -> None: - seq_id = seq.seq_id - self._computed_blocks_tracker.remove_seq(seq_id) - - def free_cross(self, seq_group: SequenceGroup) -> None: - request_id = seq_group.request_id - if request_id not in self.cross_block_tables: - # Already freed or hasn't been scheduled yet. - return - self.cross_block_tables[request_id].free() - del self.cross_block_tables[request_id] - - def get_block_table(self, seq: Sequence) -> List[int]: - block_ids = self.block_tables[seq.seq_id].physical_block_ids - return block_ids # type: ignore - - def get_cross_block_table(self, seq_group: SequenceGroup) -> List[int]: - request_id = seq_group.request_id - assert request_id in self.cross_block_tables - block_ids = self.cross_block_tables[request_id].physical_block_ids - assert all(b is not None for b in block_ids) - return block_ids # type: ignore - - def access_all_blocks_in_seq(self, seq: Sequence, now: float): - if self.enable_caching: - # Record the latest access time for the sequence. The actual update - # of the block ids is deferred to the sequence free(..) call, since - # only during freeing of block ids, the blocks are actually added to - # the evictor (which is when the most updated time is required) - # (This avoids expensive calls to mark_blocks_as_accessed(..)) - self._last_access_blocks_tracker.update_last_access( - seq.seq_id, now) - - def mark_blocks_as_computed(self, seq_group: SequenceGroup, - token_chunk_size: int): - # If prefix caching is enabled, mark immutable blocks as computed - # right after they have been scheduled (for prefill). This assumes - # the scheduler is synchronous so blocks are actually computed when - # scheduling the next batch. - self.block_allocator.mark_blocks_as_computed([]) - - def get_common_computed_block_ids( - self, seqs: List[Sequence]) -> GenericSequence[int]: - """Determine which blocks for which we skip prefill. - - With prefix caching we can skip prefill for previously-generated blocks. - Currently, the attention implementation only supports skipping cached - blocks if they are a contiguous prefix of cached blocks. - - This method determines which blocks can be safely skipped for all - sequences in the sequence group. - """ - computed_seq_block_ids = [] - for seq in seqs: - all_blocks = self.block_tables[seq.seq_id].physical_block_ids - num_cached_tokens = ( - self._computed_blocks_tracker.get_num_cached_tokens(seq)) - assert num_cached_tokens % self.block_size == 0 - num_cached_blocks = num_cached_tokens // self.block_size - computed_block_ids = all_blocks[:num_cached_blocks] - computed_seq_block_ids.append(computed_block_ids) - - # NOTE(sang): This assumes seq_block_ids doesn't contain any None. - return self.block_allocator.get_common_computed_block_ids( - computed_seq_block_ids) # type: ignore - - def fork(self, parent_seq: Sequence, child_seq: Sequence) -> None: - if parent_seq.seq_id not in self.block_tables: - # Parent sequence has either been freed or never existed. - return - src_block_table = self.block_tables[parent_seq.seq_id] - self.block_tables[child_seq.seq_id] = src_block_table.fork() - - # Track child seq - self._last_access_blocks_tracker.add_seq(child_seq.seq_id) - - def can_swap_in(self, seq_group: SequenceGroup, - num_lookahead_slots: int) -> AllocStatus: - """Returns the AllocStatus for the given sequence_group - with num_lookahead_slots. - - Args: - seq_group (SequenceGroup): The sequence group to swap in. - num_lookahead_slots (int): Number of lookahead slots used in - speculative decoding, default to 0. - - Returns: - AllocStatus: The AllocStatus for the given sequence group. - """ - return self._can_swap(seq_group, Device.GPU, SequenceStatus.SWAPPED, - num_lookahead_slots) - - def swap_in(self, seq_group: SequenceGroup) -> List[Tuple[int, int]]: - """Returns the block id mapping (from CPU to GPU) generated by - swapping in the given seq_group with num_lookahead_slots. - - Args: - seq_group (SequenceGroup): The sequence group to swap in. - - Returns: - List[Tuple[int, int]]: The mapping of swapping block from CPU - to GPU. - """ - physical_block_id_mapping = [] - for seq in seq_group.get_seqs(status=SequenceStatus.SWAPPED): - blocks = self.block_tables[seq.seq_id].blocks - if len(blocks) == 0: - continue - - seq_swap_mapping = self.block_allocator.swap(blocks=blocks, - src_device=Device.CPU, - dst_device=Device.GPU) - - # Refresh the block ids of the table (post-swap) - self.block_tables[seq.seq_id].update(blocks) - - seq_physical_block_id_mapping = { - self.block_allocator.get_physical_block_id( - Device.CPU, cpu_block_id): - self.block_allocator.get_physical_block_id( - Device.GPU, gpu_block_id) - for cpu_block_id, gpu_block_id in seq_swap_mapping.items() - } - - physical_block_id_mapping.extend( - list(seq_physical_block_id_mapping.items())) - - return physical_block_id_mapping - - def can_swap_out(self, seq_group: SequenceGroup) -> bool: - """Returns whether we can swap out the given sequence_group - with num_lookahead_slots. - - Args: - seq_group (SequenceGroup): The sequence group to swap out. - - Returns: - bool: Whether it's possible to swap out current sequence group. - """ - alloc_status = self._can_swap(seq_group, Device.CPU, - SequenceStatus.RUNNING) - return alloc_status == AllocStatus.OK - - def swap_out(self, seq_group: SequenceGroup) -> List[Tuple[int, int]]: - """Returns the block id mapping (from GPU to CPU) generated by - swapping out the given sequence_group with num_lookahead_slots. - - Args: - seq_group (SequenceGroup): The sequence group to swap out. - - Returns: - List[Tuple[int, int]]: The mapping of swapping block from - GPU to CPU. - """ - physical_block_id_mapping = [] - for seq in seq_group.get_seqs(status=SequenceStatus.RUNNING): - blocks = self.block_tables[seq.seq_id].blocks - if len(blocks) == 0: - continue - - seq_swap_mapping = self.block_allocator.swap(blocks=blocks, - src_device=Device.GPU, - dst_device=Device.CPU) - - # Refresh the block ids of the table (post-swap) - self.block_tables[seq.seq_id].update(blocks) - - seq_physical_block_id_mapping = { - self.block_allocator.get_physical_block_id( - Device.GPU, gpu_block_id): - self.block_allocator.get_physical_block_id( - Device.CPU, cpu_block_id) - for gpu_block_id, cpu_block_id in seq_swap_mapping.items() - } - - physical_block_id_mapping.extend( - list(seq_physical_block_id_mapping.items())) - - return physical_block_id_mapping - - def get_num_free_gpu_blocks(self) -> int: - return self.block_allocator.get_num_free_blocks(Device.GPU) - - def get_num_free_cpu_blocks(self) -> int: - return self.block_allocator.get_num_free_blocks(Device.CPU) - - def get_prefix_cache_hit_rate(self, device: Device) -> float: - return self.block_allocator.get_prefix_cache_hit_rate(device) - - def reset_prefix_cache(self, device: Optional[Device] = None) -> bool: - return self.block_allocator.reset_prefix_cache(device) - - def _can_swap(self, - seq_group: SequenceGroup, - device: Device, - status: SequenceStatus, - num_lookahead_slots: int = 0) -> AllocStatus: - """Returns the AllocStatus for swapping in/out the given sequence_group - on to the 'device'. - - Args: - seq_group (SequenceGroup): The sequence group to swap in/out. - device (Device): device to swap the 'seq_group' on. - status (SequenceStatus): The status of sequence which is needed - for action. RUNNING for swap out and SWAPPED for swap in - num_lookahead_slots (int): Number of lookahead slots used in - speculative decoding, default to 0. - - Returns: - AllocStatus: The AllocStatus for swapping in/out the given - sequence_group on to the 'device'. - """ - # First determine the number of blocks that will be touched by this - # swap. Then verify if there are available blocks in the device - # to perform the swap. - num_blocks_touched = 0 - blocks: List[Block] = [] - for seq in seq_group.get_seqs(status=status): - block_table = self.block_tables[seq.seq_id] - if block_table.blocks is not None: - # Compute the number blocks to touch for the tokens to be - # appended. This does NOT include the full blocks that need - # to be touched for the swap. - num_blocks_touched += \ - block_table.get_num_blocks_touched_by_append_slots( - block_table.get_unseen_token_ids(seq.get_token_ids()), - num_lookahead_slots=num_lookahead_slots) - blocks.extend(block_table.blocks) - # Compute the number of full blocks to touch and add it to the - # existing count of blocks to touch. - num_blocks_touched += self.block_allocator.get_num_full_blocks_touched( - blocks, device=device) - - watermark_blocks = 0 - if device == Device.GPU: - watermark_blocks = self.watermark_blocks - - if self.block_allocator.get_num_total_blocks( - device) < num_blocks_touched: - return AllocStatus.NEVER - elif self.block_allocator.get_num_free_blocks( - device) - num_blocks_touched >= watermark_blocks: - return AllocStatus.OK - else: - return AllocStatus.LATER - - def get_num_cached_tokens(self, seq: Sequence) -> int: - """Get the number of tokens in blocks that are already computed and - cached in the block manager for the sequence. - """ - return self._computed_blocks_tracker.get_num_cached_tokens(seq) diff --git a/vllm/core/evictor.py b/vllm/core/evictor.py deleted file mode 100644 index 85ff6bc9ca61..000000000000 --- a/vllm/core/evictor.py +++ /dev/null @@ -1,157 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project - -import enum -import heapq -from abc import ABC, abstractmethod -from typing import Dict, List, Tuple - - -class EvictionPolicy(enum.Enum): - """Enum for eviction policy used by make_evictor to instantiate the correct - Evictor subclass. - """ - LRU = enum.auto() - - -class Evictor(ABC): - """The Evictor subclasses should be used by the BlockAllocator class to - handle eviction of freed Blocks. - """ - - @abstractmethod - def __init__(self): - pass - - @abstractmethod - def __contains__(self, block_id: int) -> bool: - pass - - @abstractmethod - def evict(self) -> Tuple[int, int]: - """Runs the eviction algorithm and returns the evicted block's - content hash along with physical block id along with physical block id - """ - pass - - @abstractmethod - def add(self, block_id: int, content_hash: int, num_hashed_tokens: int, - last_accessed: float): - """Adds block to the evictor, making it a candidate for eviction""" - pass - - @abstractmethod - def update(self, block_id: int, last_accessed: float): - """Update corresponding block's access time in metadata""" - pass - - @abstractmethod - def remove(self, block_id: int): - """Remove a given block id from the cache.""" - pass - - @property - @abstractmethod - def num_blocks(self) -> int: - pass - - -class BlockMetaData: - """Data structure for storing key data describe cached block, so that - evictor could use to make its decision which one to choose for eviction - - Here we use physical block id as the dict key, as there maybe several - blocks with the same content hash, but their physical id is unique. - """ - - def __init__(self, content_hash: int, num_hashed_tokens: int, - last_accessed: float): - self.content_hash = content_hash - self.num_hashed_tokens = num_hashed_tokens - self.last_accessed = last_accessed - - -class LRUEvictor(Evictor): - """Evicts in a least-recently-used order using the last_accessed timestamp - that's recorded in the Block. If there are multiple blocks with - the same last_accessed time, then the one with the largest num_hashed_tokens - will be evicted. If two blocks each have the lowest last_accessed time and - highest num_hashed_tokens value, then one will be chosen arbitrarily - """ - - # CLEANUP_THRESHOLD determines the maximum allowable size of the priority - # queue relative to the free table size. When this threshold is exceeded, - # a cleanup operation is triggered to reduce memory usage. - CLEANUP_THRESHOLD = 50 - - def __init__(self): - self.free_table: Dict[int, BlockMetaData] = {} - self.priority_queue = [] - - def __contains__(self, block_id: int) -> bool: - return block_id in self.free_table - - def evict(self) -> Tuple[int, int]: - if len(self.free_table) == 0: - raise ValueError("No usable cache memory left") - - while self.priority_queue: - # We do not remove outdated entries from the priority queue at the - # time of updating the last_accessed timestamp. Instead, outdated - # entries are filtered out here during eviction. Outdated entries - # would either not in the free table, or have older last accessed - # time. - last_accessed, _, block_id, content_hash = heapq.heappop( - self.priority_queue) - if (block_id in self.free_table and - self.free_table[block_id].last_accessed == last_accessed): - self.free_table.pop(block_id) - return block_id, content_hash - - raise ValueError("No usable cache memory left") - - def add(self, block_id: int, content_hash: int, num_hashed_tokens: int, - last_accessed: float): - self.free_table[block_id] = BlockMetaData(content_hash, - num_hashed_tokens, - last_accessed) - heapq.heappush( - self.priority_queue, - (last_accessed, -num_hashed_tokens, block_id, content_hash)) - self._cleanup_if_necessary() - - def update(self, block_id: int, last_accessed: float): - self.free_table[block_id].last_accessed = last_accessed - - def _cleanup_if_necessary(self): - if len(self.priority_queue) > LRUEvictor.CLEANUP_THRESHOLD * len( - self.free_table): - self._cleanup() - - def _cleanup(self): - new_priority_queue: List[Tuple[float, int, int, int]] = [] - - for block_id, block in self.free_table.items(): - new_priority_queue.append( - (block.last_accessed, -block.num_hashed_tokens, block_id, - block.content_hash)) - heapq.heapify(new_priority_queue) - - self.priority_queue = new_priority_queue - - def remove(self, block_id: int): - if block_id not in self.free_table: - raise ValueError( - "Attempting to remove block that's not in the evictor") - self.free_table.pop(block_id) - - @property - def num_blocks(self) -> int: - return len(self.free_table) - - -def make_evictor(eviction_policy: EvictionPolicy) -> Evictor: - if eviction_policy == EvictionPolicy.LRU: - return LRUEvictor() - else: - raise ValueError(f"Unknown cache eviction policy: {eviction_policy}") diff --git a/vllm/core/interfaces.py b/vllm/core/interfaces.py deleted file mode 100644 index 69b9169ddd8a..000000000000 --- a/vllm/core/interfaces.py +++ /dev/null @@ -1,139 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project - -import enum -from abc import ABC, abstractmethod -from typing import List, Optional -from typing import Sequence as GenericSequence -from typing import Tuple - -from vllm.sequence import Sequence, SequenceGroup -from vllm.utils import Device - - -class AllocStatus(enum.Enum): - """Result for BlockSpaceManager.can_allocate - - 1. Ok: seq_group can be allocated now. - 2. Later: seq_group cannot be allocated. - The capacity of allocator is larger than seq_group required. - 3. Never: seq_group can never be allocated. - The seq_group is too large to allocated in GPU. - """ - OK = enum.auto() - LATER = enum.auto() - NEVER = enum.auto() - - -class BlockSpaceManager(ABC): - - @staticmethod - def get_block_space_manager_class(version: str): - version = version.lower() - - if version == "selfattn": - from vllm.core.block_manager import SelfAttnBlockSpaceManager - return SelfAttnBlockSpaceManager - - if version == "placeholder": - from vllm.core.placeholder_block_space_manager import ( - PlaceholderBlockSpaceManager) - return PlaceholderBlockSpaceManager - - raise ValueError(f"Unknown version {version=}") - - @abstractmethod - def can_allocate(self, - seq_group: SequenceGroup, - num_lookahead_slots: int = 0) -> AllocStatus: - pass - - @abstractmethod - def allocate(self, seq_group: SequenceGroup) -> None: - pass - - @abstractmethod - def can_append_slots(self, seq_group: SequenceGroup, - num_lookahead_slots: int) -> bool: - pass - - @abstractmethod - def append_slots( - self, - seq: Sequence, - num_lookahead_slots: int, - ) -> List[Tuple[int, int]]: - pass - - @abstractmethod - def fork(self, parent_seq: Sequence, child_seq: Sequence) -> None: - pass - - @abstractmethod - def can_swap_in(self, seq_group: SequenceGroup, - num_lookahead_slots: int) -> AllocStatus: - pass - - @abstractmethod - def swap_in(self, seq_group: SequenceGroup) -> List[Tuple[int, int]]: - pass - - @abstractmethod - def can_swap_out(self, seq_group: SequenceGroup) -> bool: - pass - - @abstractmethod - def swap_out(self, seq_group: SequenceGroup) -> List[Tuple[int, int]]: - pass - - @abstractmethod - def free(self, seq: Sequence) -> None: - pass - - @abstractmethod - def get_block_table(self, seq: Sequence) -> List[int]: - pass - - @abstractmethod - def get_num_free_gpu_blocks(self) -> int: - pass - - @abstractmethod - def get_num_free_cpu_blocks(self) -> int: - pass - - @abstractmethod - def access_all_blocks_in_seq( - self, - seq: Sequence, - access_time: float, - ) -> None: - pass - - @abstractmethod - def get_common_computed_block_ids( - self, seqs: List[Sequence]) -> GenericSequence[int]: - pass - - @abstractmethod - def mark_blocks_as_computed(self, seq_group: SequenceGroup, - token_chunk_size: int): - pass - - @abstractmethod - def get_prefix_cache_hit_rate(self, device: Device) -> float: - """Prefix cache hit rate. -1 means not supported or disabled.""" - pass - - @abstractmethod - def reset_prefix_cache(self, device: Optional[Device] = None) -> bool: - """Reset prefix cache for specified or all devices.""" - pass - - @abstractmethod - def get_num_cached_tokens(self, seq: Sequence) -> int: - pass - - @abstractmethod - def remove_seq_from_computed_blocks_tracker(self, seq: Sequence) -> None: - pass \ No newline at end of file diff --git a/vllm/core/placeholder_block_space_manager.py b/vllm/core/placeholder_block_space_manager.py deleted file mode 100644 index 679515924e85..000000000000 --- a/vllm/core/placeholder_block_space_manager.py +++ /dev/null @@ -1,103 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project - -from typing import List, Optional, Tuple - -from vllm.core.interfaces import AllocStatus, BlockSpaceManager -from vllm.sequence import Sequence, SequenceGroup -from vllm.utils import Device - - -class PlaceholderBlockSpaceManager(BlockSpaceManager): - """A version of BlockSpaceManager for use in environments - where block management is not required. - For example: pooling models or attention-free models like Mamba. - - This class provides the same interface as BlockSpaceManager, but its - methods perform no actions or return simple values like True in specific - actions. It's designed to be used in scenarios where the overhead of - block management is unnecessary, such as in an embedding environment. - """ - - def __init__( - self, - **kwargs, - ) -> None: - pass - - def can_allocate(self, - seq_group: SequenceGroup, - num_lookahead_slots: int = 0) -> AllocStatus: - # Always return OK for dummy purposes - return AllocStatus.OK - - def allocate(self, seq_group: SequenceGroup) -> None: - # No actual allocation logic needed - pass - - def can_append_slots(self, seq_group: SequenceGroup, - num_lookahead_slots: int) -> bool: - return True - - def append_slots( - self, - seq: Sequence, - num_lookahead_slots: int, - ) -> List[Tuple[int, int]]: - return [] - - def fork(self, parent_seq: Sequence, child_seq: Sequence) -> None: - pass - - def can_swap_in(self, seq_group: SequenceGroup, - num_lookahead_slots: int) -> AllocStatus: - return AllocStatus.OK - - def swap_in(self, seq_group: SequenceGroup) -> List[Tuple[int, int]]: - return None # type: ignore - - def can_swap_out(self, seq_group: SequenceGroup) -> bool: - return True - - def swap_out(self, seq_group: SequenceGroup) -> List[Tuple[int, int]]: - return None # type: ignore - - def free(self, seq: Sequence) -> None: - # No operation on free - return - - def get_block_table(self, seq: Sequence) -> List[int]: - return None # type: ignore - - def get_num_free_gpu_blocks(self) -> int: - return 1 - - def get_num_free_cpu_blocks(self) -> int: - return 1 - - def access_all_blocks_in_seq( - self, - seq: Sequence, - access_time: float, - ) -> None: - pass - - def get_common_computed_block_ids(self, - seq_group: List[Sequence]) -> List[int]: - return [] - - def mark_blocks_as_computed(self, seq_group: SequenceGroup, - token_chunk_size: int): - pass - - def get_prefix_cache_hit_rate(self, device: Device) -> float: - return -1 - - def reset_prefix_cache(self, device: Optional[Device] = None) -> bool: - return True - - def get_num_cached_tokens(self, seq: Sequence) -> int: - return 0 - - def remove_seq_from_computed_blocks_tracker(self, seq: Sequence) -> None: - return diff --git a/vllm/core/scheduler.py b/vllm/core/scheduler.py deleted file mode 100644 index 92ebad778ea4..000000000000 --- a/vllm/core/scheduler.py +++ /dev/null @@ -1,2028 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project - -import enum -import os -import random -import time -from collections import deque -from dataclasses import dataclass, field -from typing import Callable, Deque, Dict, Iterable, List, Optional -from typing import Sequence as GenericSequence -from typing import Set, Tuple, Union - -from vllm.config import CacheConfig, SchedulerConfig -from vllm.config.lora import LoRAConfig -from vllm.core.interfaces import AllocStatus, BlockSpaceManager -from vllm.logger import init_logger -from vllm.lora.request import LoRARequest -from vllm.sequence import (Sequence, SequenceData, SequenceGroup, - SequenceGroupBase, SequenceGroupMetadata, - SequenceGroupMetadataDelta, SequenceStage, - SequenceStatus) -from vllm.utils import Device, PyObjectCache - -logger = init_logger(__name__) - -# Test-only. If configured, decode is preempted with -# ARTIFICIAL_PREEMPTION_PROB% probability. -ENABLE_ARTIFICIAL_PREEMPT = bool( - os.getenv("VLLM_TEST_ENABLE_ARTIFICIAL_PREEMPT", False)) # noqa -ARTIFICIAL_PREEMPTION_PROB = 0.5 -ARTIFICIAL_PREEMPTION_MAX_CNT = 500 - - -class PreemptionMode(enum.Enum): - """Preemption modes. - - 1. Swapping: Swap out the blocks of the preempted sequences to CPU memory - and swap them back in when the sequences are resumed. - 2. Recomputation: Discard the blocks of the preempted sequences and - recompute them when the sequences are resumed, treating the sequences as - new prompts. - """ - - SWAP = enum.auto() - RECOMPUTE = enum.auto() - - -@dataclass -class SchedulingBudget: - """The available slots for scheduling. - - TODO(sang): Right now, the budget is request_id-aware meaning it can ignore - budget update from the same request_id. It is because in normal scheduling - path, we update RUNNING num_seqs ahead of time, meaning it could be - updated more than once when scheduling RUNNING requests. Since this won't - happen if we only have chunked prefill scheduling, we can remove this - feature from the API when chunked prefill is enabled by default. - """ - - token_budget: int - max_num_seqs: int - _request_ids_num_batched_tokens: Set[str] = field(default_factory=set) - _request_ids_num_curr_seqs: Set[str] = field(default_factory=set) - # Number of cached tokens in the batch. - _num_cached_tokens: int = 0 - # Number of actual non-cached tokens in the batch. - _num_batched_tokens: int = 0 - _num_curr_seqs: int = 0 - - def can_schedule(self, *, num_new_tokens: int, num_new_seqs: int): - # We allow num_new_tokens to be 0 when the entire sequence has - # been cached. - assert num_new_tokens >= 0 - assert num_new_seqs != 0 - return (self.num_batched_tokens + num_new_tokens <= self.token_budget - and self.num_curr_seqs + num_new_seqs <= self.max_num_seqs) - - def remaining_token_budget(self): - return self.token_budget - self.num_batched_tokens - - def add_num_batched_tokens(self, - req_id: str, - num_batched_tokens: int, - num_cached_tokens: int = 0): - if req_id in self._request_ids_num_batched_tokens: - return - assert num_cached_tokens >= 0 - assert num_batched_tokens >= 0 - - self._request_ids_num_batched_tokens.add(req_id) - self._num_batched_tokens += num_batched_tokens - self._num_cached_tokens += num_cached_tokens - - def subtract_num_batched_tokens(self, req_id: str, - num_batched_tokens: int): - if req_id in self._request_ids_num_batched_tokens: - self._request_ids_num_batched_tokens.remove(req_id) - self._num_batched_tokens -= num_batched_tokens - - def add_num_seqs(self, req_id: str, num_curr_seqs: int): - if req_id in self._request_ids_num_curr_seqs: - return - - self._request_ids_num_curr_seqs.add(req_id) - self._num_curr_seqs += num_curr_seqs - - def subtract_num_seqs(self, req_id: str, num_curr_seqs: int): - if req_id in self._request_ids_num_curr_seqs: - self._request_ids_num_curr_seqs.remove(req_id) - self._num_curr_seqs -= num_curr_seqs - - @property - def num_batched_tokens(self): - return self._num_batched_tokens - - @property - def num_curr_seqs(self): - return self._num_curr_seqs - - @property - def num_cached_tokens(self): - return self._num_cached_tokens - - -@dataclass -class ScheduledSequenceGroup: - # A sequence group that's scheduled. - seq_group: SequenceGroup - # The total chunk size (number of tokens) to process for next iteration. - # 1 for decoding. Same as prompt tokens for prefill, but if prefill is - # chunked, it can be smaller than that. - token_chunk_size: int - - -@dataclass -class SchedulerOutputs: - """The scheduling decision made from a scheduler.""" - - # Scheduled sequence groups. - scheduled_seq_groups: GenericSequence[ScheduledSequenceGroup] - # Number of prefill groups scheduled. - num_prefill_groups: int - # Total number of batched tokens. - num_batched_tokens: int - # Blocks to swap in. List of CPU -> GPU block number. - blocks_to_swap_in: List[Tuple[int, int]] - # Blocks to swap out. List of GPU -> CPU block number. - blocks_to_swap_out: List[Tuple[int, int]] - # Blocks to copy. Source to dest block. - blocks_to_copy: List[Tuple[int, int]] - # Sequence groups that are going to be ignored. - ignored_seq_groups: List[SequenceGroup] - # The number of slots for lookahead decoding. - num_lookahead_slots: int - # The number of requests in the running queue - running_queue_size: int - preempted: int - - def __post_init__(self): - # Swap in and swap out should never happen at the same time. - assert not (self.blocks_to_swap_in and self.blocks_to_swap_out) - - self.num_loras: int = len(self.lora_requests) - if self.num_loras > 0: - self._sort_by_lora_ids() - - def is_empty(self) -> bool: - # NOTE: We do not consider the ignored sequence groups. - return (not self.scheduled_seq_groups and not self.blocks_to_swap_in - and not self.blocks_to_swap_out and not self.blocks_to_copy) - - def _sort_by_lora_ids(self): - assert 0 <= self.num_prefill_groups <= len(self.scheduled_seq_groups) - - def key_fn(group: ScheduledSequenceGroup): - key = (group.seq_group.lora_int_id, group.seq_group.request_id) - if 0 < self.num_prefill_groups < len(self.scheduled_seq_groups): - # Sort sequence groups so that all prefills come before all - # decodes as required by chunked prefill. - return (not group.seq_group.is_prefill(), *key) - return key - - self.scheduled_seq_groups = sorted(self.scheduled_seq_groups, - key=key_fn) - - @property - def lora_requests(self) -> Set[LoRARequest]: - return { - g.seq_group.lora_request - for g in self.scheduled_seq_groups - if g.seq_group.lora_request is not None - } - - -@dataclass -class SchedulerRunningOutputs: - """The requests that are scheduled from a running queue. - - Could contain prefill (prefill that's chunked) or decodes. If there's not - enough memory, it can be preempted (for recompute) or swapped out. - """ - - # Selected sequences that are running and in a decoding phase. - decode_seq_groups: List[ScheduledSequenceGroup] - # Selected sequences that are running and in a prefill phase. - # I.e., it means the prefill has been chunked. - prefill_seq_groups: List[ScheduledSequenceGroup] - # The preempted sequences. - preempted: List[SequenceGroup] - # Sequences that are swapped out. - swapped_out: List[SequenceGroup] - # The blocks to swap out. - blocks_to_swap_out: List[Tuple[int, int]] - # The blocks to copy. - blocks_to_copy: List[Tuple[int, int]] - # The number of slots for lookahead decoding. - num_lookahead_slots: int - - # Optimization for fast-access to seq_group lists - decode_seq_groups_list: List[SequenceGroup] - prefill_seq_groups_list: List[SequenceGroup] - - @classmethod - def create_empty(cls) -> "SchedulerRunningOutputs": - return SchedulerRunningOutputs( - decode_seq_groups=[], - prefill_seq_groups=[], - preempted=[], - swapped_out=[], - blocks_to_swap_out=[], - blocks_to_copy=[], - num_lookahead_slots=0, - decode_seq_groups_list=[], - prefill_seq_groups_list=[], - ) - - -@dataclass -class SchedulerSwappedInOutputs: - """The requests that are scheduled from a swap queue. - - Could contain prefill (prefill that's chunked) or decodes. - """ - - # Selected sequences that are going to be swapped in and is in a - # decoding phase. - decode_seq_groups: List[ScheduledSequenceGroup] - # Selected sequences that are going to be swapped in and in a prefill - # phase. I.e., it means the prefill has been chunked. - prefill_seq_groups: List[ScheduledSequenceGroup] - # The blocks to swap in. - blocks_to_swap_in: List[Tuple[int, int]] - # The blocks to copy. - blocks_to_copy: List[Tuple[int, int]] - # The number of slots for lookahead decoding. - num_lookahead_slots: int - # Infeasible sequence groups. - infeasible_seq_groups: List[SequenceGroup] - - @classmethod - def create_empty(cls) -> "SchedulerSwappedInOutputs": - return SchedulerSwappedInOutputs( - decode_seq_groups=[], - prefill_seq_groups=[], - blocks_to_swap_in=[], - blocks_to_copy=[], - num_lookahead_slots=0, - infeasible_seq_groups=[], - ) - - -@dataclass -class SchedulerPrefillOutputs: - """The requests that are scheduled from a waiting queue. - - Could contain a fresh prefill requests or preempted requests that need - to be recomputed from scratch. - """ - - # Selected sequences for prefill. - seq_groups: List[ScheduledSequenceGroup] - # Ignored sequence groups. - ignored_seq_groups: List[SequenceGroup] - num_lookahead_slots: int - - @classmethod - def create_empty(cls) -> "SchedulerPrefillOutputs": - return SchedulerPrefillOutputs( - seq_groups=[], - ignored_seq_groups=[], - num_lookahead_slots=0, - ) - - -def seq_group_metadata_builder(): - return SequenceGroupMetadata(request_id="", - is_prompt=False, - seq_data={}, - sampling_params=None, - block_tables={}) - - -def scheduler_running_outputs_builder(): - return SchedulerRunningOutputs(decode_seq_groups=[], - prefill_seq_groups=[], - preempted=[], - swapped_out=[], - blocks_to_swap_out=[], - blocks_to_copy=[], - num_lookahead_slots=0, - prefill_seq_groups_list=[], - decode_seq_groups_list=[]) - - -def scheduled_seq_group_builder(): - return ScheduledSequenceGroup(SequenceGroup.__new__(SequenceGroup), - token_chunk_size=0) - # return ScheduledSequenceGroup(seq_group=None, token_chunk_size=0) - - -@dataclass -class PartialPrefillMetadata: - """Holds information about the partial prefills that are currently running - during a single iteration of the Scheduler. - When chunked prefill is enabled, we allow a certain number of seqs to be - partially prefilled during each iteration. Having multiple partial prefills - in flight allows us to minimize TTFT and avoid decode starvation in cases - where a single sequence group with a very large prompt blocks the queue for - too many iterations. - The number of long prefill requests is limited so that smaller - requests may jump the queue in front of them and get to the decode - phase faster. - """ - - # A minimum bound on the total number of prefills to be scheduled during - # this iteration - schedulable_prefills: int - - # The number of long prefill requests currently running - long_prefills: int - - scheduler_config: SchedulerConfig - - def can_schedule(self, seq_group: SequenceGroup) -> bool: - """When concurrent partial prefills are enabled, - we limit the number of long requests and only accept - shorter requests from the queue while running them - concurrently""" - return not (seq_group.first_seq.get_num_new_tokens() - > self.scheduler_config.long_prefill_token_threshold - and self.long_prefills - >= self.scheduler_config.max_long_partial_prefills - and self.scheduler_config.max_num_partial_prefills > 1) - - def maybe_increment_partial_prefills(self, - seq_group: SequenceGroup) -> None: - # When a new prefill is scheduled, we need to know if it is a - # long request - if (seq_group.first_seq.get_num_new_tokens() - > self.scheduler_config.long_prefill_token_threshold): - self.long_prefills += 1 - - @classmethod - def from_queues( - cls, - running: Deque[SequenceGroup], - waiting: Deque[SequenceGroup], - scheduler_config: SchedulerConfig, - ) -> "PartialPrefillMetadata": - """Create a PartialPrefillMetadata object from the current state of - the scheduler's queues. - This accounts for the currently running prefill requests, and peeks into - the waiting queue to see if there are more prefills to potentially be - scheduled during this iteration.""" - prefills = 0 - long_prefills = 0 - - waiting_long_prefills = 0 - - for sg in running: - if sg.first_seq.data.stage == SequenceStage.PREFILL: - prefills += 1 - if (sg.first_seq.get_num_new_tokens() - > scheduler_config.long_prefill_token_threshold): - long_prefills += 1 - - for sg in waiting: - # Don't bother looping through the rest of the queue if we know - # there are already at - # least max_partial_prefills requests to fill - if prefills >= scheduler_config.max_num_partial_prefills: - break - - # Don't count long requests from the waiting queue if we aren't - # going to schedule them anyway - if (sg.first_seq.get_num_new_tokens() - > scheduler_config.long_prefill_token_threshold): - if (long_prefills + waiting_long_prefills - >= scheduler_config.max_long_partial_prefills): - continue - waiting_long_prefills += 1 - prefills += 1 - - # NB: long_prefills and waiting_long_prefills are tracked separately. - # We don't account for the waiting requests here because we need to use - # this metadata to track how many have actually been scheduled. - return PartialPrefillMetadata( - schedulable_prefills=min( - prefills, scheduler_config.max_num_partial_prefills), - long_prefills=long_prefills, - scheduler_config=scheduler_config, - ) - - -class Scheduler: - - def __init__( - self, - scheduler_config: SchedulerConfig, - cache_config: CacheConfig, - lora_config: Optional[LoRAConfig], - pipeline_parallel_size: int = 1, - output_proc_callback: Optional[Callable] = None, - ) -> None: - self.scheduler_config = scheduler_config - self.cache_config = cache_config - # Note for LoRA scheduling: the current policy is extremely - # simple and NOT fair. It can lead to starvation of some - # LoRAs. This should be improved in the future. - self.lora_config = lora_config - - version = "selfattn" - if (self.scheduler_config.runner_type == "pooling" - or self.cache_config.is_attention_free): - version = "placeholder" - - BlockSpaceManagerImpl = BlockSpaceManager.get_block_space_manager_class( - version) - - num_gpu_blocks = cache_config.num_gpu_blocks - if num_gpu_blocks: - num_gpu_blocks //= pipeline_parallel_size - - num_cpu_blocks = cache_config.num_cpu_blocks - if num_cpu_blocks: - num_cpu_blocks //= pipeline_parallel_size - - # Create the block space manager. - self.block_manager = BlockSpaceManagerImpl( - block_size=self.cache_config.block_size, - num_gpu_blocks=num_gpu_blocks, - num_cpu_blocks=num_cpu_blocks, - sliding_window=self.cache_config.sliding_window, - enable_caching=self.cache_config.enable_prefix_caching, - ) - - # Sequence groups in the WAITING state. - # Contain new prefill or preempted requests. - self.waiting: Deque[SequenceGroup] = deque() - # Sequence groups in the RUNNING state. - # Contain decode requests. - self.running: Deque[SequenceGroup] = deque() - # Sequence groups in the SWAPPED state. - # Contain decode requests that are swapped out. - self.swapped: Deque[SequenceGroup] = deque() - # Sequence groups finished requests ids since last step iteration. - # It lets the model know that any state associated with these requests - # can and must be released after the current step. - # This is used to evict the finished requests from the Mamba cache. - self._finished_requests_ids: List[str] = list() - # Time at previous scheduling step - self.prev_time = 0.0 - # Did we schedule a prompt at previous step? - self.prev_prompt = False - # Latency of the last prompt step - self.last_prompt_latency = 0.0 - # preemption mode, RECOMPUTE or SWAP - self.user_specified_preemption_mode = scheduler_config.preemption_mode - - # The following field is test-only. It is used to inject artificial - # preemption. - self.enable_artificial_preemption = ENABLE_ARTIFICIAL_PREEMPT - self.artificial_preempt_cnt = (ARTIFICIAL_PREEMPTION_MAX_CNT - if self.enable_artificial_preemption - else 0) - self.num_cumulative_preemption: int = 0 - - # Used to cache python objects - self._seq_group_metadata_cache: List[PyObjectCache] = [] - self._scheduler_running_outputs_cache: List[PyObjectCache] = [] - self._scheduled_seq_group_cache: List[PyObjectCache] = [] - - # For async output processing, we need to swap cache buffers between - # iterations. I.e. since the output processing is lagged one step, - # we cannot reuse the cached objects immediately when the schedule() - # is called again, but only when schedule() is called the second time. - self.output_proc_callback = output_proc_callback - self.use_async_output_proc = self.output_proc_callback is not None - self.num_cache_iters = 2 if self.use_async_output_proc else 1 - - self.cache_id = 0 - for i in range(self.num_cache_iters): - self._seq_group_metadata_cache.append( - PyObjectCache(seq_group_metadata_builder)) - self._scheduler_running_outputs_cache.append( - PyObjectCache(scheduler_running_outputs_builder)) - self._scheduled_seq_group_cache.append( - PyObjectCache(scheduled_seq_group_builder)) - - # For async postprocessor, the extra decode run cannot be done - # when the request reaches max_model_len. In this case, the request - # will be stopped during schedule() call and added to this stop list - # for processing and deallocation by the free_finished_seq_groups() - self._async_stopped: List[SequenceGroup] = [] - - # List with the chunk sizes to hand out to each sequence depending - # on how many partial prefills are running. This is slightly faster than - # running an integer division every time a prefill is scheduled. - # This splits the budget evenly among all prefills. - self.partial_prefill_budget_lookup_list = [0] * ( - self.scheduler_config.max_num_partial_prefills + 1) - self.partial_prefill_budget_lookup_list[0] = ( - scheduler_config.max_num_batched_tokens) - for i in range(1, self.scheduler_config.max_num_partial_prefills + 1): - self.partial_prefill_budget_lookup_list[i] = ( - scheduler_config.max_num_batched_tokens // i) - - @property - def next_cache_id(self): - return (self.cache_id + 1) % self.num_cache_iters - - @property - def lora_enabled(self) -> bool: - return bool(self.lora_config) - - @property - def num_decoding_tokens_per_seq(self) -> int: - """The number of new tokens.""" - return 1 - - def add_seq_group(self, seq_group: SequenceGroup) -> None: - # Add sequence groups to the waiting queue. - self.waiting.append(seq_group) - - def _add_seq_group_to_running(self, seq_group: SequenceGroup) -> None: - # Add sequence groups to the running queue. - # Only for testing purposes. - self.running.append(seq_group) - - def _add_seq_group_to_swapped(self, seq_group: SequenceGroup) -> None: - # Add sequence groups to the swapped queue. - # Only for testing purposes. - self.swapped.append(seq_group) - - def abort_seq_group( - self, - request_id: Union[str, Iterable[str]], - seq_id_to_seq_group: Optional[Dict[str, SequenceGroupBase]] = None, - ) -> None: - """Aborts a sequence group with the given ID. - - Check if the sequence group with the given ID - is present in any of the state queue. - If present, remove the sequence group from the state queue. - Also, if any of the sequences in the sequence group is not finished, - free the sequence with status `FINISHED_ABORTED`. - Otherwise, do nothing. - - Args: - request_id: The ID(s) of the sequence group to abort. - seq_id_to_seq_group: helper for groups with n>1 - """ - if isinstance(request_id, str): - request_id = (request_id, ) - request_ids = set(request_id) - seq_id_to_seq_group = seq_id_to_seq_group or {} - for state_queue in [self.waiting, self.running, self.swapped]: - aborted_groups: List[SequenceGroup] = [] - for seq_group in state_queue: - # When n>1, seq_group.request_id looks like - # foo_parallel_sample_0, while request_ids is just foo, and we - # should resolve it as real_request_id to match. - if seq_group.request_id in seq_id_to_seq_group: - real_request_id = seq_id_to_seq_group[ - seq_group.request_id].group_id - else: - real_request_id = seq_group.request_id - if real_request_id in request_ids: - # Appending aborted group into pending list. - aborted_groups.append(seq_group) - # We can't remove real_request_id in request_ids here, - # because there may be other seq groups sharing the same - # real_request_id - for aborted_group in aborted_groups: - # Remove the sequence group from the state queue. - state_queue.remove(aborted_group) - # Remove the aborted request from the Mamba cache. - self._finished_requests_ids.append(aborted_group.request_id) - for seq in aborted_group.get_seqs(): - if seq.is_finished(): - continue - seq.status = SequenceStatus.FINISHED_ABORTED - self.free_seq(seq) - if aborted_group.request_id in seq_id_to_seq_group: - del seq_id_to_seq_group[aborted_group.request_id] - - self._free_seq_group_cross_attn_blocks(aborted_group) - - def _free_seq_group_cross_attn_blocks( - self, - seq_group: SequenceGroup, - ) -> None: - """ - Free a sequence group from a cross-attention block table. - Has no effect on decoder-only models. - """ - if seq_group.is_encoder_decoder(): - self.block_manager.free_cross(seq_group) - - def has_unfinished_seqs(self) -> bool: - return (len(self.waiting) != 0 or len(self.running) != 0 - or len(self.swapped) != 0) - - def get_prefix_cache_hit_rate(self, device: Device) -> float: - return self.block_manager.get_prefix_cache_hit_rate(device) - - def reset_prefix_cache(self, device: Optional[Device] = None) -> bool: - return self.block_manager.reset_prefix_cache(device) - - def get_num_unfinished_seq_groups(self) -> int: - return len(self.waiting) + len(self.running) + len(self.swapped) - - def get_and_reset_finished_requests_ids(self) -> List[str]: - """Flushes the list of request ids of previously finished seq_groups.""" - finished_requests_ids = self._finished_requests_ids - self._finished_requests_ids = list() - return finished_requests_ids - - def _schedule_running( - self, - budget: SchedulingBudget, - curr_loras: Optional[Set[int]], - enable_chunking: bool = False, - partial_prefill_metadata: Optional[PartialPrefillMetadata] = None, - ) -> SchedulerRunningOutputs: - """Schedule sequence groups that are running. - - Running queue should include decode and chunked prefill requests. - - Args: - budget: The scheduling budget. The argument is in-place updated - when any decodes are preempted. - curr_loras: Currently batched lora request ids. The argument is - in-place updated when any decodes are preempted. - enable_chunking: If True, seq group can be chunked and only a - chunked number of tokens are scheduled if - `budget.num_batched_tokens` has not enough capacity to schedule - all tokens. - partial_prefill_metadata: information about the partial prefills - that are currently running - - Returns: - SchedulerRunningOutputs. - """ - ret: SchedulerRunningOutputs = self._scheduler_running_outputs_cache[ - self.cache_id].get_object() - ret.blocks_to_swap_out.clear() - ret.blocks_to_copy.clear() - ret.decode_seq_groups.clear() - ret.prefill_seq_groups.clear() - ret.preempted.clear() - ret.swapped_out.clear() - - ret.num_lookahead_slots = self._get_num_lookahead_slots( - is_prefill=False, enable_chunking=enable_chunking) - - ret.decode_seq_groups_list.clear() - ret.prefill_seq_groups_list.clear() - - # Blocks that need to be swapped or copied before model execution. - blocks_to_swap_out: List[Tuple[int, int]] = ret.blocks_to_swap_out - blocks_to_copy: List[Tuple[int, int]] = ret.blocks_to_copy - - decode_seq_groups: List[ScheduledSequenceGroup] = ret.decode_seq_groups - prefill_seq_groups: List[ - ScheduledSequenceGroup] = ret.prefill_seq_groups - preempted: List[SequenceGroup] = ret.preempted - swapped_out: List[SequenceGroup] = ret.swapped_out - - running_queue = self.running - assert len(self._async_stopped) == 0 - while running_queue: - seq_group = running_queue[0] - # We discard the cached tokens info here because we don't need it - # for running sequence: - # 1. If a sequence is running with chunked prefill, the cached - # tokens info was already used for the first prefill. - # 2. If a sequence is running with non-chunked prefill, then - # there it's a decoding sequence, and the cached tokens info is - # irrelevant. - num_uncached_new_tokens, _ = \ - self._get_num_new_uncached_and_cached_tokens( - seq_group, - SequenceStatus.RUNNING, - enable_chunking, - budget, - partial_prefill_metadata, - ) - - num_running_tokens = num_uncached_new_tokens - if num_running_tokens == 0: - # No budget => Stop - break - - running_queue.popleft() - - # With async postprocessor, an extra decode run is done - # to process the final tokens. The check below avoids this extra - # decode run when the model max len is reached, in order to avoid - # a memory overflow. - if (self.use_async_output_proc and seq_group.seqs[0].get_len() - > self.scheduler_config.max_model_len): - self._async_stopped.append(seq_group) - continue - - # NOTE(woosuk): Preemption happens only when there is no available - # slot to keep all the sequence groups in the RUNNING state. - while not self._can_append_slots(seq_group, enable_chunking): - budget.subtract_num_batched_tokens(seq_group.request_id, - num_running_tokens) - num_running_seqs = seq_group.get_max_num_running_seqs() - budget.subtract_num_seqs(seq_group.request_id, - num_running_seqs) - - if (curr_loras is not None and seq_group.lora_int_id > 0 - and seq_group.lora_int_id in curr_loras): - curr_loras.remove(seq_group.lora_int_id) - - # Determine victim sequence - cont_loop = True - if running_queue: - # Preempt the lowest-priority sequence group. - victim_seq_group = running_queue.pop() - else: - # No other sequence group can be preempted. - # Preempt the current sequence group. - # Note: This is also where we stop this loop - # (since there is nothing else to preempt) - victim_seq_group = seq_group - cont_loop = False - - # With async postprocessor, before preempting a sequence - # we need to ensure it has no pending async postprocessor - do_preempt = True - if self.use_async_output_proc: - assert self.output_proc_callback is not None - self.output_proc_callback( - request_id=victim_seq_group.request_id) - - # It may be that the async pending "victim_seq_group" - # becomes finished, in which case we simply free it. - if victim_seq_group.is_finished(): - self._free_finished_seq_group(victim_seq_group) - do_preempt = False - - # Do preemption - if do_preempt: - preempted_mode = self._preempt(victim_seq_group, - blocks_to_swap_out) - if preempted_mode == PreemptionMode.RECOMPUTE: - preempted.append(victim_seq_group) - else: - swapped_out.append(victim_seq_group) - - if not cont_loop: - break - else: - self._append_slots(seq_group, blocks_to_copy, enable_chunking) - is_prefill = seq_group.is_prefill() - - scheduled_seq_group: ScheduledSequenceGroup = ( - self._scheduled_seq_group_cache[ - self.cache_id].get_object()) - scheduled_seq_group.seq_group = seq_group - if is_prefill: - scheduled_seq_group.token_chunk_size = num_running_tokens - prefill_seq_groups.append(scheduled_seq_group) - ret.prefill_seq_groups_list.append(seq_group) - else: - scheduled_seq_group.token_chunk_size = 1 - decode_seq_groups.append(scheduled_seq_group) - ret.decode_seq_groups_list.append(seq_group) - - budget.add_num_batched_tokens(seq_group.request_id, - num_running_tokens) - # OPTIMIZATION: Note that get_max_num_running_seqs is - # expensive. For the default scheduling chase where - # enable_chunking is False, num_seqs are updated before running - # this method, so we don't have to update it again here. - if enable_chunking: - num_running_seqs = seq_group.get_max_num_running_seqs() - budget.add_num_seqs(seq_group.request_id, num_running_seqs) - if curr_loras is not None and seq_group.lora_int_id > 0: - curr_loras.add(seq_group.lora_int_id) - - self._scheduler_running_outputs_cache[self.next_cache_id].reset() - self._scheduled_seq_group_cache[self.next_cache_id].reset() - - return ret - - def _schedule_swapped( - self, - budget: SchedulingBudget, - curr_loras: Optional[Set[int]], - enable_chunking: bool = False, - ) -> SchedulerSwappedInOutputs: - """Schedule sequence groups that are swapped out. - - It schedules swapped requests as long as it fits `budget` and - curr_loras <= max_lora from the scheduling config. The input arguments - `budget` and `curr_loras` are updated based on scheduled seq_groups. - - Args: - budget: The scheduling budget. The argument is in-place updated - when any requests are swapped in. - curr_loras: Currently batched lora request ids. The argument is - in-place updated when any requests are swapped in. - enable_chunking: If True, seq group can be chunked and only a - chunked number of tokens are scheduled if - `budget.num_batched_tokens` has not enough capacity to schedule - all tokens. - - Returns: - SchedulerSwappedInOutputs. - """ - # Blocks that need to be swapped or copied before model execution. - blocks_to_swap_in: List[Tuple[int, int]] = [] - blocks_to_copy: List[Tuple[int, int]] = [] - decode_seq_groups: List[ScheduledSequenceGroup] = [] - prefill_seq_groups: List[ScheduledSequenceGroup] = [] - infeasible_seq_groups: List[SequenceGroup] = [] - - swapped_queue = self.swapped - - leftover_swapped: Deque[SequenceGroup] = deque() - while swapped_queue: - seq_group = swapped_queue[0] - - # If the sequence group cannot be swapped in, stop. - is_prefill = seq_group.is_prefill() - alloc_status = self.block_manager.can_swap_in( - seq_group, - self._get_num_lookahead_slots(is_prefill, enable_chunking)) - if alloc_status == AllocStatus.LATER: - break - elif alloc_status == AllocStatus.NEVER: - logger.warning( - "Failing the request %s because there's not enough kv " - "cache blocks to run the entire sequence.", - seq_group.request_id, - ) - for seq in seq_group.get_seqs(): - seq.status = SequenceStatus.FINISHED_IGNORED - infeasible_seq_groups.append(seq_group) - swapped_queue.popleft() - continue - - lora_int_id = 0 - if self.lora_enabled: - lora_int_id = seq_group.lora_int_id - assert curr_loras is not None - assert self.lora_config is not None - if (lora_int_id > 0 and (lora_int_id not in curr_loras) - and len(curr_loras) >= self.lora_config.max_loras): - # We don't have a space for another LoRA, so - # we ignore this request for now. - leftover_swapped.appendleft(seq_group) - swapped_queue.popleft() - continue - - # The total number of sequences in the RUNNING state should not - # exceed the maximum number of sequences. - num_new_seqs = seq_group.get_max_num_running_seqs() - num_new_tokens_uncached, num_new_tokens_cached = ( - self._get_num_new_uncached_and_cached_tokens( - seq_group, SequenceStatus.SWAPPED, enable_chunking, - budget)) - - if num_new_tokens_uncached == 0 or not budget.can_schedule( - num_new_tokens=num_new_tokens_uncached, - num_new_seqs=num_new_seqs, - ): - self.remove_seq_from_computed_blocks_tracker( - seq_group, SequenceStatus.SWAPPED) - break - - if lora_int_id > 0 and curr_loras is not None: - curr_loras.add(lora_int_id) - swapped_queue.popleft() - self._swap_in(seq_group, blocks_to_swap_in) - self._append_slots(seq_group, blocks_to_copy, enable_chunking) - if is_prefill: - prefill_seq_groups.append( - ScheduledSequenceGroup( - seq_group, - token_chunk_size=num_new_tokens_uncached + - num_new_tokens_cached, - )) - else: - decode_seq_groups.append( - ScheduledSequenceGroup(seq_group, token_chunk_size=1)) - budget.add_num_batched_tokens( - seq_group.request_id, - num_batched_tokens=num_new_tokens_uncached, - num_cached_tokens=num_new_tokens_cached, - ) - budget.add_num_seqs(seq_group.request_id, num_new_seqs) - - swapped_queue.extendleft(leftover_swapped) - - return SchedulerSwappedInOutputs( - decode_seq_groups=decode_seq_groups, - prefill_seq_groups=prefill_seq_groups, - blocks_to_swap_in=blocks_to_swap_in, - blocks_to_copy=blocks_to_copy, - num_lookahead_slots=self._get_num_lookahead_slots( - is_prefill=False, enable_chunking=enable_chunking), - infeasible_seq_groups=infeasible_seq_groups, - ) - - def _get_prompt_limit(self, seq_group: SequenceGroup) -> int: - if self.scheduler_config.chunked_prefill_enabled: - prompt_limit = self.scheduler_config.max_model_len - else: - prompt_limit = min( - self.scheduler_config.max_model_len, - self.scheduler_config.max_num_batched_tokens, - ) - - # Model is fine tuned with long context. Return the fine tuned max_len. - if seq_group.lora_request and seq_group.lora_request.long_lora_max_len: - assert prompt_limit <= seq_group.lora_request.long_lora_max_len - return seq_group.lora_request.long_lora_max_len - else: - return prompt_limit - - def _get_priority(self, - seq_group: SequenceGroup) -> Tuple[Optional[int], float]: - """Get the priority of the sequence group. - Highest preference to user-defined priority, followed by arrival time. - Args: - seq_group: The sequence group input. - Returns: - The priority of the sequence group. - """ - return seq_group.priority, seq_group.arrival_time - - def _schedule_priority_preemption( - self, - budget: SchedulingBudget, - ) -> int: - """Sorts waiting and running queue. Also, force preempt requests - from the running queue if their priority is lower. - Priority-based preemption is used with the priority policy. - Args: - budget: The scheduling budget. The argument is in-place updated - when any requests are scheduled. - Returns: - A count of priority-based preemptions. - """ - - waiting_queue = self.waiting - - running_queue = deque(sorted(self.running, key=self._get_priority)) - - blocks_to_swap_out: List[Tuple[int, int]] = [] - force_preemption_count = 0 - - if waiting_queue: - seq_group = waiting_queue.popleft() - num_new_seqs = seq_group.get_max_num_running_seqs() - num_new_tokens_uncached, _ = \ - self._get_num_new_uncached_and_cached_tokens( - seq_group, SequenceStatus.WAITING, False, budget) - - # Only preempt if priority inversion exists - while running_queue and self._get_priority( - running_queue[-1]) > self._get_priority(seq_group): - # Only preempt if waiting sequence cannot be allocated - can_allocate = self.block_manager.can_allocate(seq_group) - if (num_new_tokens_uncached > 0 - and can_allocate == AllocStatus.OK - and budget.can_schedule( - num_new_tokens=num_new_tokens_uncached, - num_new_seqs=num_new_seqs, - )): - break - - # Adjust budget to remove the victim sequence group - vseq_group = running_queue.pop() - num_running_tokens_uncached, _ = ( - self._get_num_new_uncached_and_cached_tokens( - vseq_group, SequenceStatus.RUNNING, False, budget)) - budget.subtract_num_batched_tokens( - vseq_group.request_id, num_running_tokens_uncached) - num_running_seqs = vseq_group.get_max_num_running_seqs() - budget.subtract_num_seqs(vseq_group.request_id, - num_running_seqs) - - # Preempt out the victim sequence group - self._preempt(vseq_group, blocks_to_swap_out) - waiting_queue.appendleft(vseq_group) - force_preemption_count += 1 - # Put the sequence back into the waiting queue - waiting_queue.appendleft(seq_group) - - self.remove_seq_from_computed_blocks_tracker( - seq_group, SequenceStatus.WAITING) - - waiting_queue = deque(sorted(waiting_queue, key=self._get_priority)) - - self.waiting = waiting_queue - self.running = running_queue - return force_preemption_count - - def _schedule_prefills( - self, - budget: SchedulingBudget, - curr_loras: Optional[Set[int]], - enable_chunking: bool = False, - partial_prefill_metadata: Optional[PartialPrefillMetadata] = None, - ) -> SchedulerPrefillOutputs: - """Schedule sequence groups that are in prefill stage. - - Note that the current scheduler treats PREEMPTED_FOR_RECOMPUTE - as a new prefill (that starts from beginning -> most recently generated - tokens). - - It schedules waiting requests as long as it fits `budget` and - curr_loras <= max_lora from the scheduling config. The input arguments - `budget` and `curr_loras` are updated based on scheduled seq_groups. - - Args: - budget: The scheduling budget. The argument is in-place updated - when any requests are scheduled. - curr_loras: Currently batched lora request ids. The argument is - in-place updated when any requests are scheduled. - enable_chunking: If True, seq group can be chunked and only a - chunked number of tokens are scheduled if - `budget.num_batched_tokens` has not enough capacity to schedule - all tokens. - partial_prefill_metadata: information about the partial prefills - that are currently running - - Returns: - SchedulerPrefillOutputs. - """ - if budget.remaining_token_budget() == 0: - # Do nothing: Can't add any more prefill anyway - return SchedulerPrefillOutputs( - seq_groups=[], - ignored_seq_groups=[], - num_lookahead_slots=self._get_num_lookahead_slots( - is_prefill=True, enable_chunking=enable_chunking), - ) - ignored_seq_groups: List[SequenceGroup] = [] - seq_groups: List[ScheduledSequenceGroup] = [] - using_prompt_embeds: bool = False - - waiting_queue = self.waiting - - leftover_waiting_sequences: Deque[SequenceGroup] = deque() - while self._passed_delay(time.time()) and waiting_queue: - seq_group = waiting_queue[0] - - waiting_seqs = seq_group.get_seqs(status=SequenceStatus.WAITING) - assert len(waiting_seqs) == 1, ( - "Waiting sequence group should have only one prompt " - "sequence.") - if (partial_prefill_metadata is not None - and not partial_prefill_metadata.can_schedule(seq_group)): - leftover_waiting_sequences.appendleft(seq_group) - waiting_queue.popleft() - continue - num_new_tokens_uncached, num_new_tokens_cached = ( - self._get_num_new_uncached_and_cached_tokens( - seq_group, - SequenceStatus.WAITING, - enable_chunking, - budget, - partial_prefill_metadata=partial_prefill_metadata, - )) - num_new_tokens = num_new_tokens_uncached + num_new_tokens_cached - - if not enable_chunking: - num_prompt_tokens = waiting_seqs[0].get_len() - assert num_new_tokens == num_prompt_tokens - - prompt_limit = self._get_prompt_limit(seq_group) - if num_new_tokens > prompt_limit: - logger.warning( - "Input prompt (%d tokens) is too long" - " and exceeds limit of %d", - num_new_tokens, - prompt_limit, - ) - for seq in waiting_seqs: - seq.status = SequenceStatus.FINISHED_IGNORED - self.remove_seq_from_computed_blocks_tracker( - seq_group, SequenceStatus.FINISHED_IGNORED) - ignored_seq_groups.append(seq_group) - waiting_queue.popleft() - continue - - num_lookahead_slots: int = 0 - - # If the sequence group cannot be allocated, stop. - can_allocate = self.block_manager.can_allocate( - seq_group, num_lookahead_slots=num_lookahead_slots) - if can_allocate == AllocStatus.LATER: - self.remove_seq_from_computed_blocks_tracker( - seq_group, SequenceStatus.WAITING) - break - elif can_allocate == AllocStatus.NEVER: - logger.warning( - "Input prompt (%d tokens) + lookahead slots (%d) is " - "too long and exceeds the capacity of block_manager", - num_new_tokens, - num_lookahead_slots, - ) - for seq in waiting_seqs: - seq.status = SequenceStatus.FINISHED_IGNORED - self.remove_seq_from_computed_blocks_tracker( - seq_group, SequenceStatus.FINISHED_IGNORED) - ignored_seq_groups.append(seq_group) - waiting_queue.popleft() - continue - - # We cannot mix sequence groups that use prompt embeds and - # those that do not. - if len(seq_groups) == 0: - using_prompt_embeds = seq_group.uses_prompt_embeds() - if using_prompt_embeds != seq_group.uses_prompt_embeds(): - self.remove_seq_from_computed_blocks_tracker( - seq_group, SequenceStatus.WAITING) - leftover_waiting_sequences.appendleft(seq_group) - waiting_queue.popleft() - continue - - lora_int_id = 0 - if self.lora_enabled: - lora_int_id = seq_group.lora_int_id - assert curr_loras is not None - assert self.lora_config is not None - if (self.lora_enabled and lora_int_id > 0 - and lora_int_id not in curr_loras - and len(curr_loras) >= self.lora_config.max_loras): - # We don't have a space for another LoRA, so - # we ignore this request for now. - self.remove_seq_from_computed_blocks_tracker( - seq_group, SequenceStatus.WAITING) - leftover_waiting_sequences.appendleft(seq_group) - waiting_queue.popleft() - continue - - if (budget.num_batched_tokens - >= self.scheduler_config.max_num_batched_tokens): - # We've reached the budget limit - since there might be - # continuous prefills in the running queue, we should break - # to avoid scheduling any new prefills. - self.remove_seq_from_computed_blocks_tracker( - seq_group, SequenceStatus.WAITING) - break - - num_new_seqs = seq_group.get_max_num_running_seqs() - if num_new_tokens_uncached == 0 or not budget.can_schedule( - num_new_tokens=num_new_tokens_uncached, - num_new_seqs=num_new_seqs, - ): - self.remove_seq_from_computed_blocks_tracker( - seq_group, SequenceStatus.WAITING) - break - - # Can schedule this request. - if curr_loras is not None and lora_int_id > 0: - curr_loras.add(lora_int_id) - waiting_queue.popleft() - self._allocate_and_set_running(seq_group) - - if partial_prefill_metadata is not None: - partial_prefill_metadata.maybe_increment_partial_prefills( - seq_group) - - seq_groups.append( - ScheduledSequenceGroup(seq_group=seq_group, - token_chunk_size=num_new_tokens)) - budget.add_num_batched_tokens( - seq_group.request_id, - num_batched_tokens=num_new_tokens_uncached, - num_cached_tokens=num_new_tokens_cached, - ) - budget.add_num_seqs(seq_group.request_id, num_new_seqs) - - # Queue requests that couldn't be scheduled. - waiting_queue.extendleft(leftover_waiting_sequences) - if len(seq_groups) > 0: - self.prev_prompt = True - - return SchedulerPrefillOutputs( - seq_groups=seq_groups, - ignored_seq_groups=ignored_seq_groups, - num_lookahead_slots=self._get_num_lookahead_slots( - is_prefill=True, enable_chunking=enable_chunking), - ) - - def _schedule_default(self) -> SchedulerOutputs: - """Schedule queued requests. - - The current policy is designed to optimize the throughput. First, - it batches as many prefill requests as possible. And it schedules - decodes. If there's a pressure on GPU memory, decode requests can - be swapped or preempted. - """ - # Include running requests to the budget. - budget = SchedulingBudget( - token_budget=self.scheduler_config.max_num_batched_tokens, - max_num_seqs=self.scheduler_config.max_num_seqs, - ) - # Make sure we include num running seqs before scheduling prefill, - # so that we don't schedule beyond max_num_seqs for prefill. - for seq_group in self.running: - budget.add_num_seqs(seq_group.request_id, - seq_group.get_max_num_running_seqs()) - curr_loras = (set( - seq_group.lora_int_id for seq_group in self.running - if seq_group.lora_int_id > 0) if self.lora_enabled else None) - - prefills = SchedulerPrefillOutputs.create_empty() - running_scheduled = SchedulerRunningOutputs.create_empty() - swapped_in = SchedulerSwappedInOutputs.create_empty() - - # If any requests are swapped, prioritized swapped requests. - if not self.swapped: - prefills = self._schedule_prefills(budget, - curr_loras, - enable_chunking=False) - - if len(prefills.seq_groups - ) == 0 and self.scheduler_config.policy == "priority": - self._schedule_priority_preemption(budget) - - # Don't schedule decodes if prefills are scheduled. - # NOTE: If `_schedule_prefills` doesn't enable chunking, self.running - # only contains decode requests, not chunked prefills. - if len(prefills.seq_groups) == 0: - running_scheduled = self._schedule_running(budget, - curr_loras, - enable_chunking=False) - - # If any sequence group is preempted, do not swap in any sequence - # group. because it means there's no slot for new running requests. - if (len(running_scheduled.preempted) + - len(running_scheduled.swapped_out) == 0): - swapped_in = \ - self._schedule_swapped(budget, curr_loras) - - assert (budget.num_batched_tokens - <= self.scheduler_config.max_num_batched_tokens) - assert budget.num_curr_seqs <= self.scheduler_config.max_num_seqs - - # Update waiting requests. - self.waiting.extendleft(running_scheduled.preempted) - # Update new running requests. - if len(prefills.seq_groups) > 0: - self.running.extend([s.seq_group for s in prefills.seq_groups]) - - self.running.extend(running_scheduled.decode_seq_groups_list) - - if len(swapped_in.decode_seq_groups) > 0: - self.running.extend( - [s.seq_group for s in swapped_in.decode_seq_groups]) - - # Update swapped requests. - self.swapped.extend(running_scheduled.swapped_out) - preempted = len(running_scheduled.preempted) + len( - running_scheduled.swapped_out) - - # There should be no prefill from running queue because this policy - # doesn't allow chunked prefills. - assert len(running_scheduled.prefill_seq_groups) == 0 - assert len(swapped_in.prefill_seq_groups) == 0 - - # Merge lists - num_prefill_groups = len(prefills.seq_groups) - ignored_seq_groups_for_embeds = list[SequenceGroup]() - if num_prefill_groups > 0: - scheduled_seq_groups = prefills.seq_groups - scheduled_seq_groups.extend(running_scheduled.decode_seq_groups) - ignored_seq_groups_for_embeds.clear() - else: - scheduled_seq_groups = running_scheduled.decode_seq_groups - if len(scheduled_seq_groups) > 0: - using_prompt_embeds = scheduled_seq_groups[ - 0].seq_group.uses_prompt_embeds() - ignored_seq_groups_for_embeds.clear() - indices_ignored = list[int]() - for i, schedule_seq_group in enumerate(scheduled_seq_groups): - if using_prompt_embeds !=\ - schedule_seq_group.seq_group.uses_prompt_embeds(): - ignored_seq_groups_for_embeds.append( - schedule_seq_group.seq_group) - indices_ignored.append(i) - if len(ignored_seq_groups_for_embeds) > 0: - scheduled_seq_groups = [ - group for i, group in enumerate(scheduled_seq_groups) - if i not in indices_ignored - ] - else: - ignored_seq_groups_for_embeds.clear() - - scheduled_seq_groups.extend(swapped_in.decode_seq_groups) - - blocks_to_copy = running_scheduled.blocks_to_copy - blocks_to_copy.extend(swapped_in.blocks_to_copy) - - ignored_seq_groups = prefills.ignored_seq_groups - ignored_seq_groups.extend(ignored_seq_groups_for_embeds) - ignored_seq_groups.extend(swapped_in.infeasible_seq_groups) - - return SchedulerOutputs( - scheduled_seq_groups=scheduled_seq_groups, - num_prefill_groups=num_prefill_groups, - num_batched_tokens=budget.num_batched_tokens + - budget.num_cached_tokens, - blocks_to_swap_in=swapped_in.blocks_to_swap_in, - blocks_to_swap_out=running_scheduled.blocks_to_swap_out, - blocks_to_copy=blocks_to_copy, - ignored_seq_groups=ignored_seq_groups, - num_lookahead_slots=running_scheduled.num_lookahead_slots, - running_queue_size=len(self.running), - preempted=preempted, - ) - - def _schedule_chunked_prefill(self) -> SchedulerOutputs: - """Schedule queued requests. - - Chunked prefill allows to chunk prefill requests, batch them together - with decode requests. This policy 1. schedule as many decoding requests - as possible. 2. schedule chunked prefill requests that are not - finished. 3. schedule swapped request. 4. schedule new prefill - requests. - - The policy can sustain the high GPU utilization because it can put - prefill and decodes requests to the same batch, while it improves - inter token latency because decodes requests don't need to be blocked - by prefill requests. - """ - budget = SchedulingBudget( - token_budget=self.scheduler_config.max_num_batched_tokens, - max_num_seqs=self.scheduler_config.max_num_seqs, - ) - curr_loras: Set[int] = set() - - prefills = SchedulerPrefillOutputs.create_empty() - swapped_in = SchedulerSwappedInOutputs.create_empty() - - # Create partial prefill metadata - partial_prefill_metadata = PartialPrefillMetadata.from_queues( - running=self.running, - waiting=self.waiting, - scheduler_config=self.scheduler_config, - ) - - # Decoding should be always scheduled first by fcfs. - running_scheduled = self._schedule_running( - budget, - curr_loras, - enable_chunking=True, - partial_prefill_metadata=partial_prefill_metadata, - ) - - # Schedule swapped out requests. - # If preemption happens, it means we don't have space for swap-in. - if len(running_scheduled.preempted) + len( - running_scheduled.swapped_out) == 0: - swapped_in = self._schedule_swapped(budget, curr_loras) - - prefills = self._schedule_prefills( - budget, - curr_loras, - enable_chunking=True, - partial_prefill_metadata=partial_prefill_metadata, - ) - - assert (budget.num_batched_tokens - <= self.scheduler_config.max_num_batched_tokens) - assert budget.num_curr_seqs <= self.scheduler_config.max_num_seqs - - # Update waiting requests. - self.waiting.extendleft(running_scheduled.preempted) - - # Update new running requests. - # By default, vLLM scheduler prioritizes prefills. - # Once chunked prefill is enabled, - # the policy is changed to prioritize decode requests. - self.running.extend( - [s.seq_group for s in swapped_in.decode_seq_groups]) - self.running.extend( - [s.seq_group for s in swapped_in.prefill_seq_groups]) - self.running.extend( - [s.seq_group for s in running_scheduled.decode_seq_groups]) - # Because multiple prefills may be running concurrently, we need to - # make sure that prefills which are scheduled to finish are listed - # before those that won't. This is so that on the next scheduling - # iteration when they have transitioned to the decode stage, they are - # properly prioritized over sequences that are still in the prefill - # stage. - self.running.extend( - self._order_finishing_prefills_first( - running_scheduled.prefill_seq_groups)) - self.running.extend([s.seq_group for s in prefills.seq_groups]) - - # Update swapped requests. - self.swapped.extend(running_scheduled.swapped_out) - # Put prefills first due to Attention backend ordering assumption. - scheduled_seq_groups = (prefills.seq_groups + - running_scheduled.prefill_seq_groups + - swapped_in.prefill_seq_groups + - running_scheduled.decode_seq_groups + - swapped_in.decode_seq_groups) - num_prefill_groups = (len(prefills.seq_groups) + - len(swapped_in.prefill_seq_groups) + - len(running_scheduled.prefill_seq_groups)) - return SchedulerOutputs( - scheduled_seq_groups=scheduled_seq_groups, - num_prefill_groups=num_prefill_groups, - num_batched_tokens=budget.num_batched_tokens + - budget.num_cached_tokens, - blocks_to_swap_in=swapped_in.blocks_to_swap_in, - blocks_to_swap_out=running_scheduled.blocks_to_swap_out, - blocks_to_copy=running_scheduled.blocks_to_copy + - swapped_in.blocks_to_copy, - ignored_seq_groups=prefills.ignored_seq_groups + - swapped_in.infeasible_seq_groups, - num_lookahead_slots=0, - running_queue_size=len(self.running), - preempted=(len(running_scheduled.preempted) + - len(running_scheduled.swapped_out)), - ) - - def _order_finishing_prefills_first( - self, scheduled_prefill_seqs: List[ScheduledSequenceGroup] - ) -> List[SequenceGroup]: - """Returns a list of prefilling SequenceGroups where sequences that are - scheduled to finish prefilling are listed first""" - finishing = [ - s.seq_group for s in scheduled_prefill_seqs - if s.seq_group.get_num_uncomputed_tokens() == s.token_chunk_size - ] - not_finishing = [ - s.seq_group for s in scheduled_prefill_seqs - if s.seq_group.get_num_uncomputed_tokens() != s.token_chunk_size - ] - return finishing + not_finishing - - def _schedule(self) -> SchedulerOutputs: - """Schedule queued requests.""" - if self.scheduler_config.chunked_prefill_enabled: - return self._schedule_chunked_prefill() - else: - return self._schedule_default() - - def _can_append_slots(self, seq_group: SequenceGroup, - enable_chunking: bool) -> bool: - """Determine whether or not we have enough space in the KV cache to - continue generation of the sequence group. - """ - # It is True only for testing case to trigger artificial preemption. - if (self.enable_artificial_preemption - and random.uniform(0, 1) < ARTIFICIAL_PREEMPTION_PROB - and self.artificial_preempt_cnt > 0): - self.artificial_preempt_cnt -= 1 - return False - - is_prefill = seq_group.is_prefill() - num_lookahead_slots = self._get_num_lookahead_slots( - is_prefill, enable_chunking) - - return self.block_manager.can_append_slots( - seq_group=seq_group, num_lookahead_slots=num_lookahead_slots) - - def _allow_async_output_proc(self, seq_group: SequenceGroup) -> bool: - # async_output_proc is allowed only when we have a single sequence - # in the sequence group - no_single_seq = seq_group.sampling_params is None or ( - seq_group.sampling_params.n == 1) - return no_single_seq - - def schedule( - self - ) -> Tuple[List[SequenceGroupMetadata], SchedulerOutputs, bool]: - # Schedule sequence groups. - # This function call changes the internal states of the scheduler - # such as self.running, self.swapped, and self.waiting. - scheduler_start_time = time.perf_counter() - - scheduler_outputs: SchedulerOutputs = self._schedule() - now = time.time() - - if not self.cache_config.enable_prefix_caching: - common_computed_block_nums = [] - - allow_async_output_proc: bool = self.use_async_output_proc - - # Create input data structures. - seq_group_metadata_list: List[SequenceGroupMetadata] = [] - for i, scheduled_seq_group in enumerate( - scheduler_outputs.scheduled_seq_groups): - seq_group = scheduled_seq_group.seq_group - token_chunk_size = scheduled_seq_group.token_chunk_size - seq_group.maybe_set_first_scheduled_time(now) - - seq_group_metadata = self._seq_group_metadata_cache[ - self.cache_id].get_object() - seq_group_metadata.seq_data.clear() - seq_group_metadata.block_tables.clear() - - # seq_id -> SequenceData - seq_data: Dict[int, SequenceData] = {} - # seq_id -> physical block numbers - block_tables: Dict[int, List[int]] = {} - - if seq_group.is_encoder_decoder(): - # Encoder associated with SequenceGroup - encoder_seq = seq_group.get_encoder_seq() - assert encoder_seq is not None - encoder_seq_data = encoder_seq.data - # Block table for cross-attention - # Also managed at SequenceGroup level - cross_block_table = self.block_manager.get_cross_block_table( - seq_group) - else: - encoder_seq_data = None - cross_block_table = None - - for seq in seq_group.get_seqs(status=SequenceStatus.RUNNING): - seq_id = seq.seq_id - seq_data[seq_id] = seq.data - block_tables[seq_id] = self.block_manager.get_block_table(seq) - self.block_manager.access_all_blocks_in_seq(seq, now) - - if self.cache_config.enable_prefix_caching: - common_computed_block_nums = ( - self.block_manager.get_common_computed_block_ids( - seq_group.get_seqs(status=SequenceStatus.RUNNING))) - - do_sample = True - is_prompt = seq_group.is_prefill() - # We should send the metadata to workers when the first prefill - # is sent. Subsequent requests could be chunked prefill or decode. - is_first_prefill = False - if is_prompt: - seqs = seq_group.get_seqs() - # Prefill has only 1 sequence. - assert len(seqs) == 1 - num_computed_tokens = seqs[0].data.get_num_computed_tokens() - is_first_prefill = num_computed_tokens == 0 - # In the next iteration, all prompt tokens are not computed. - # It means the prefill is chunked, and we don't need sampling. - # NOTE: We use get_len instead of get_prompt_len because when - # a sequence is preempted, prefill includes previous generated - # output tokens. - if (token_chunk_size + num_computed_tokens - < seqs[0].data.get_len()): - do_sample = False - - # It assumes the scheduled_seq_groups is ordered by - # prefill < decoding. - if is_first_prefill or not self.scheduler_config.send_delta_data: - seq_group_metadata = SequenceGroupMetadata( - request_id=seq_group.request_id, - is_prompt=is_prompt, - seq_data=seq_data, - sampling_params=seq_group.sampling_params, - block_tables=block_tables, - do_sample=do_sample, - pooling_params=seq_group.pooling_params, - token_chunk_size=token_chunk_size, - lora_request=seq_group.lora_request, - computed_block_nums=common_computed_block_nums, - encoder_seq_data=encoder_seq_data, - cross_block_table=cross_block_table, - state=seq_group.state, - # `multi_modal_data` will only be present for the 1st comm - # between engine and worker. - # the subsequent comms can still use delta, but - # `multi_modal_data` will be None. - multi_modal_data=(seq_group.multi_modal_data - if scheduler_outputs.num_prefill_groups - > 0 else None), - multi_modal_placeholders=( - seq_group.multi_modal_placeholders - if scheduler_outputs.num_prefill_groups > 0 else None), - ) - else: - # When SPMD mode is enabled, we only send delta data except for - # the first request to reduce serialization cost. - seq_data_delta = {} - for id, data in seq_data.items(): - seq_data_delta[id] = data.get_delta_and_reset() - seq_group_metadata = SequenceGroupMetadataDelta( - seq_data_delta, - seq_group.request_id, - block_tables, - is_prompt, - do_sample=do_sample, - token_chunk_size=token_chunk_size, - computed_block_nums=common_computed_block_nums, - ) - seq_group_metadata_list.append(seq_group_metadata) - - if allow_async_output_proc: - allow_async_output_proc = self._allow_async_output_proc( - seq_group) - - # Now that the batch has been created, we can assume all blocks in the - # batch will have been computed before the next scheduling invocation. - # This is because the engine assumes that a failure in model execution - # will crash the vLLM instance / will not retry. - for scheduled_seq_group in scheduler_outputs.scheduled_seq_groups: - self.block_manager.mark_blocks_as_computed( - scheduled_seq_group.seq_group, - scheduled_seq_group.token_chunk_size) - - self._seq_group_metadata_cache[self.next_cache_id].reset() - - scheduler_time = time.perf_counter() - scheduler_start_time - # Add this to scheduler time to all the sequences that are currently - # running. This will help estimate if the scheduler is a significant - # component in the e2e latency. - for seq_group in self.running: - if seq_group is not None and seq_group.metrics is not None: - if seq_group.metrics.scheduler_time is not None: - seq_group.metrics.scheduler_time += scheduler_time - else: - seq_group.metrics.scheduler_time = scheduler_time - - # Move to next cache (if exists) - self.cache_id = self.next_cache_id - - # Return results - return (seq_group_metadata_list, scheduler_outputs, - allow_async_output_proc) - - def fork_seq(self, parent_seq: Sequence, child_seq: Sequence) -> None: - self.block_manager.fork(parent_seq, child_seq) - - def free_seq(self, seq: Sequence) -> None: - """Free a sequence from a block table.""" - self.block_manager.free(seq) - - def remove_seq_from_computed_blocks_tracker( - self, seq_group: SequenceGroup, - status: Optional[SequenceStatus]) -> None: - seqs = seq_group.get_seqs(status=status) - for seq in seqs: - self._remove_seq_from_computed_blocks_tracker(seq) - - def _remove_seq_from_computed_blocks_tracker(self, seq: Sequence) -> None: - """ - Free a sequence computed blocks tracker _seq_id_to_blocks_hashes - and _seq_id_to_num_tokens_computed. - """ - self.block_manager.remove_seq_from_computed_blocks_tracker(seq) - - def _free_finished_seqs(self, seq_group: SequenceGroup) -> None: - """Free finished seqs in a sequence group.""" - for seq in seq_group.get_seqs(): - if seq.is_finished(): - self.free_seq(seq) - - def _free_finished_seq_group(self, seq_group: SequenceGroup) -> None: - if seq_group.is_finished(): - # Free cross-attention block table, if it exists - self._free_seq_group_cross_attn_blocks(seq_group) - - # Add the finished requests to the finished requests list. - # This list will be used to update the Mamba cache in the - # next step. - self._finished_requests_ids.append(seq_group.request_id) - - # Free finished seqs - self._free_finished_seqs(seq_group) - - def free_finished_seq_groups(self) -> None: - remaining: Deque[SequenceGroup] = deque() - for seq_group in self.running: - self._free_finished_seq_group(seq_group) - if not seq_group.is_finished(): - remaining.append(seq_group) - - self.running = remaining - - # Handle async stopped sequence groups - # (ones that reached max model len) - if self._async_stopped: - for seq_group in self._async_stopped: - self._free_seq_group_cross_attn_blocks(seq_group) - self._finished_requests_ids.append(seq_group.request_id) - - # Free finished seqs - self._free_finished_seqs(seq_group) - - self._async_stopped.clear() - - def _allocate_and_set_running(self, seq_group: SequenceGroup) -> None: - self.block_manager.allocate(seq_group) - for seq in seq_group.get_seqs(status=SequenceStatus.WAITING): - seq.status = SequenceStatus.RUNNING - - def _append_slots( - self, - seq_group: SequenceGroup, - blocks_to_copy: List[Tuple[int, int]], - enable_chunking: bool = False, - ) -> None: - """Appends new slots to the sequences in the given sequence group. - - Args: - seq_group (SequenceGroup): The sequence group containing the - sequences to append slots to. - blocks_to_copy (List[Tuple[int, int]]): A list of tuple of two - ints, the first int is the source block index, and the second - int is the destination block index. This list is updated with - the new source and destination block indices for the appended - slots. - enable_chunking (bool): True if chunked prefill is enabled. - """ - is_prefill: bool = seq_group.is_prefill() - num_lookahead_slots: int = self._get_num_lookahead_slots( - is_prefill, enable_chunking) - - seq_status: Optional[SequenceStatus] = SequenceStatus.RUNNING - for seq in seq_group.get_seqs(status=seq_status): - cows = self.block_manager.append_slots(seq, num_lookahead_slots) - if len(cows) > 0: - blocks_to_copy.extend(cows) - - def _preempt(self, seq_group: SequenceGroup, - blocks_to_swap_out: List[Tuple[int, int]]) -> PreemptionMode: - # If preemption mode is not specified, we determine the mode as follows: - # We use recomputation by default since it incurs lower overhead than - # swapping. However, when the sequence group has multiple sequences - # (e.g., beam search), recomputation is not currently supported. In - # such a case, we use swapping instead. - # FIXME(woosuk): This makes our scheduling policy a bit bizarre. - # As swapped sequences are prioritized over waiting sequences, - # sequence groups with multiple sequences are implicitly prioritized - # over sequence groups with a single sequence. - # TODO(woosuk): Support recomputation for sequence groups with multiple - # sequences. This may require a more sophisticated CUDA kernel. - if self.user_specified_preemption_mode is None: - if seq_group.get_max_num_running_seqs() == 1: - preemption_mode = PreemptionMode.RECOMPUTE - else: - preemption_mode = PreemptionMode.SWAP - - elif self.user_specified_preemption_mode == "swap": - preemption_mode = PreemptionMode.SWAP - else: - preemption_mode = PreemptionMode.RECOMPUTE - - if self.num_cumulative_preemption % 50 == 0: - logger.warning( - "Sequence group %s is preempted by %s mode because there is " - "not enough KV cache space. This can affect the end-to-end " - "performance. Increase gpu_memory_utilization or " - "tensor_parallel_size to provide more KV cache memory. " - "total_num_cumulative_preemption=%d", - seq_group.request_id, - preemption_mode, - self.num_cumulative_preemption + 1, - ) - self.num_cumulative_preemption += 1 - - if preemption_mode == PreemptionMode.RECOMPUTE: - self._preempt_by_recompute(seq_group) - elif preemption_mode == PreemptionMode.SWAP: - self._preempt_by_swap(seq_group, blocks_to_swap_out) - else: - raise AssertionError("Invalid preemption mode.") - return preemption_mode - - def _preempt_by_recompute( - self, - seq_group: SequenceGroup, - ) -> None: - seqs = seq_group.get_seqs(status=SequenceStatus.RUNNING) - assert len(seqs) == 1 - for seq in seqs: - seq.status = SequenceStatus.WAITING - self.free_seq(seq) - seq.reset_state_for_recompute() - self._free_seq_group_cross_attn_blocks(seq_group) - - def _preempt_by_swap( - self, - seq_group: SequenceGroup, - blocks_to_swap_out: List[Tuple[int, int]], - ) -> None: - self._swap_out(seq_group, blocks_to_swap_out) - - def _swap_in( - self, - seq_group: SequenceGroup, - blocks_to_swap_in: List[Tuple[int, int]], - ) -> None: - mapping = self.block_manager.swap_in(seq_group) - blocks_to_swap_in.extend(mapping) - for seq in seq_group.get_seqs(status=SequenceStatus.SWAPPED): - seq.status = SequenceStatus.RUNNING - - def _swap_out( - self, - seq_group: SequenceGroup, - blocks_to_swap_out: List[Tuple[int, int]], - ) -> None: - if not self.block_manager.can_swap_out(seq_group): - # FIXME(woosuk): Abort the sequence group instead of aborting the - # entire engine. - raise RuntimeError( - "Aborted due to the lack of CPU swap space. Please increase " - "the swap space to avoid this error.") - mapping = self.block_manager.swap_out(seq_group) - blocks_to_swap_out.extend(mapping) - for seq in seq_group.get_seqs(status=SequenceStatus.RUNNING): - seq.status = SequenceStatus.SWAPPED - - def _passed_delay(self, now: float) -> bool: - if self.prev_prompt: - self.last_prompt_latency = now - self.prev_time - self.prev_time, self.prev_prompt = now, False - # Delay scheduling prompts to let waiting queue fill up - if self.scheduler_config.delay_factor > 0 and self.waiting: - earliest_arrival_time = min( - [e.metrics.arrival_time for e in self.waiting]) - passed_delay = ((now - earliest_arrival_time) - > (self.scheduler_config.delay_factor * - self.last_prompt_latency) or not self.running) - else: - passed_delay = True - return passed_delay - - def _get_num_lookahead_slots(self, is_prefill: bool, - enable_chunking: bool) -> int: - """The number of slots to allocate per sequence per step, beyond known - token ids. Speculative decoding uses these slots to store KV activations - of tokens which may or may not be accepted. - """ - return 0 - - def _get_num_new_uncached_and_cached_tokens( - self, - seq_group: SequenceGroup, - status: SequenceStatus, - enable_chunking: bool, - budget: SchedulingBudget, - partial_prefill_metadata: Optional[PartialPrefillMetadata] = None, - ) -> Tuple[int, int]: - """ - Returns the number of new uncached and cached tokens to schedule for a - given sequence group that's in a given `status`. - - The API could chunk the number of tokens to compute based on `budget` - if `enable_chunking` is True. If a sequence group has multiple - sequences (e.g., running beam search), it means it is in decoding - phase, so chunking doesn't happen. - - Returns (0, 0) if the new token cannot be computed due to token budget. - - The cached tokens's blocks are already computed, and the attention - backend will reuse the cached blocks rather than recomputing them. So - the scheduler could schedule these cached tokens "for free". - - Args: - seq_group: The sequence group to get the number of new tokens to - schedule. - status: The status of the sequences to get the number of new tokens - to schedule. - enable_chunking: Whether to chunk the number of tokens to compute. - budget: The budget to chunk the number of tokens to compute. - partial_prefill_metadata: information about the partial prefills - that are currently running - - - Returns: - A tuple of two ints. The first int is the number of new uncached - tokens to schedule. The second int is the number of cached tokens. - If no more new tokens can be scheduled, returns (0, 0). - """ - num_cached_new_tokens = 0 - num_uncached_new_tokens = 0 - - seqs = seq_group.get_seqs(status=status) - # Compute the number of new uncached and cached tokens for - # each sequence. - for seq in seqs: - if not seq.is_prefill(): - # Decode sequences should always just have 1 uncached token - # TODO(rickyx): Actually is this still correct for multi-step? - num_uncached_new_tokens += 1 - continue - - num_computed_tokens_seq = seq.get_num_computed_tokens() - all_num_new_tokens_seq = seq.get_len() - num_computed_tokens_seq - if not self.cache_config.enable_prefix_caching: - # If prefix caching is not enabled, all new tokens are uncached. - num_uncached_new_tokens += all_num_new_tokens_seq - continue - - # NOTE: the cache token might be currently in a block that's in an - # evictor meaning that it's not yet allocated. However, we don't - # exclude such tokens in the cache count because it will be - # guaranteed to be allocated later if the sequence can be allocated. - num_cached_tokens_seq = self.block_manager.get_num_cached_tokens( - seq) - - # Sanity check. - if num_cached_tokens_seq < num_computed_tokens_seq: - # This should only happen with chunked prefill, and - # the seq is still in prefill. The `num_cached_tokens_seq` - # is the value we calculated on scheduling the first prefill. - # For subsequent continuous prefill steps, we cached the - # number of cache tokens for the sequence so the cached token - # count could be less than the number of computed tokens. - # See comments on `ComputedBlocksTracker` for more details. - assert ( - seq.is_prefill() and seq.status == SequenceStatus.RUNNING - and self.scheduler_config.chunked_prefill_enabled - ), ("Number of cached tokens should not be less than the " - "number of computed tokens for a sequence that's still " - f"in prefill. But there are {num_cached_tokens_seq} cached " - f"tokens and {num_computed_tokens_seq} computed tokens " - f"for sequence {seq.seq_id}.") - - num_cached_new_tokens_seq = max( - 0, num_cached_tokens_seq - num_computed_tokens_seq) - num_uncached_new_tokens_seq = (all_num_new_tokens_seq - - num_cached_new_tokens_seq) - - num_uncached_new_tokens += num_uncached_new_tokens_seq - num_cached_new_tokens += num_cached_new_tokens_seq - - if num_uncached_new_tokens == 0 and num_cached_new_tokens > 0: - # For a fully cached hit sequence, we actually need to recompute the - # last token. So we need at least 1 uncached token to schedule. - # See ModelRunner._compute_for_prefix_cache_hit for more details. - num_uncached_new_tokens = 1 - num_cached_new_tokens -= 1 - - if enable_chunking and len(seqs) == 1: - # Chunk if a running request cannot fit in the given budget. - # If number of seq > 1, it means it is doing beam search - # in a decode phase. Do not chunk. - num_uncached_new_tokens = self._chunk_new_tokens_to_schedule( - self.scheduler_config, - self.cache_config, - budget, - self._get_prompt_limit(seq_group), - num_uncached_new_tokens, - self.partial_prefill_budget_lookup_list, - partial_prefill_metadata, - ) - - return num_uncached_new_tokens, num_cached_new_tokens - - @staticmethod - def _chunk_new_tokens_to_schedule( - scheduler_config: SchedulerConfig, - cache_config: CacheConfig, - budget: SchedulingBudget, - prompt_limit: int, - num_new_tokens: int, - partial_prefill_budget_lookup_list: List[int], - partial_prefill_metadata: Optional[PartialPrefillMetadata] = None, - ) -> int: - """ - Chunks the number of new tokens to schedule based on the budget when - chunked prefill is enabled. - - Args: - scheduler_config: The scheduler config. - cache_config: The cache config. - budget: The budget to chunk the number of tokens to compute. - prompt_limit: The maximum number of tokens allowed in a prompt. - num_new_tokens: The number of new tokens to schedule. - - Returns: - The number of new tokens to schedule after chunking. - """ - remaining_token_budget = budget.remaining_token_budget() - - # Get the number of tokens to allocate to this prefill slot - prefill_slot_budget = ( - remaining_token_budget if partial_prefill_metadata is None else - partial_prefill_budget_lookup_list[ - partial_prefill_metadata.schedulable_prefills]) - - if cache_config.enable_prefix_caching: - # When prefix caching is enabled and we're partially prefilling - # a sequence, we always allocate a number of new tokens that is - # divisible by the block size to avoid partial block matching. - block_size = cache_config.block_size - # Don't exceed either the total budget or slot budget. - # Take min of those and get the next lowest multiple of the - # block size: - remaining_token_budget = ( - min(remaining_token_budget, prefill_slot_budget) // - block_size) * block_size - # NB: In the case where num_new_tokens < budget, we are - # finishing prefill for this sequence, so we do not need to - # allocate a full block. - - num_new_tokens = min(num_new_tokens, remaining_token_budget, - prefill_slot_budget) - - return num_new_tokens diff --git a/vllm/engine/protocol.py b/vllm/engine/protocol.py index c345f17e6614..e828ac04364f 100644 --- a/vllm/engine/protocol.py +++ b/vllm/engine/protocol.py @@ -7,13 +7,11 @@ from vllm.beam_search import BeamSearchSequence, create_sort_beams_key_function from vllm.config import ModelConfig, VllmConfig -from vllm.core.scheduler import SchedulerOutputs from vllm.inputs.data import PromptType, TokensPrompt from vllm.inputs.parse import is_explicit_encoder_decoder_prompt from vllm.inputs.preprocess import InputPreprocessor from vllm.logger import init_logger from vllm.lora.request import LoRARequest -from vllm.model_executor.layers.sampler import SamplerOutput from vllm.outputs import CompletionOutput, PoolingRequestOutput, RequestOutput from vllm.plugins.io_processors.interface import IOProcessor from vllm.pooling_params import PoolingParams @@ -266,11 +264,7 @@ async def is_tracing_enabled(self) -> bool: ... @abstractmethod - async def do_log_stats( - self, - scheduler_outputs: Optional[SchedulerOutputs] = None, - model_output: Optional[list[SamplerOutput]] = None, - ) -> None: + async def do_log_stats(self) -> None: ... @abstractmethod diff --git a/vllm/v1/engine/async_llm.py b/vllm/v1/engine/async_llm.py index 73165c7e4c0a..757baecea9ce 100644 --- a/vllm/v1/engine/async_llm.py +++ b/vllm/v1/engine/async_llm.py @@ -601,11 +601,7 @@ async def get_tokenizer(self) -> AnyTokenizer: async def is_tracing_enabled(self) -> bool: return self.observability_config.otlp_traces_endpoint is not None - async def do_log_stats( - self, - scheduler_outputs=None, - model_output=None, - ) -> None: + async def do_log_stats(self) -> None: if self.logger_manager: self.logger_manager.log() diff --git a/vllm/worker/cache_engine.py b/vllm/worker/cache_engine.py deleted file mode 100644 index 530907012f70..000000000000 --- a/vllm/worker/cache_engine.py +++ /dev/null @@ -1,145 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project -"""CacheEngine class for managing the KV cache.""" -from typing import List - -import torch - -from vllm.attention import get_attn_backend -from vllm.config import CacheConfig, DeviceConfig, ModelConfig, ParallelConfig -from vllm.logger import init_logger -from vllm.utils import (STR_DTYPE_TO_TORCH_DTYPE, LayerBlockType, - get_dtype_size, is_pin_memory_available) - -logger = init_logger(__name__) - - -class CacheEngine: - """Manages the KV cache. - - This class is responsible for initializing and managing the GPU and CPU KV - caches. It also provides methods for performing KV cache operations, such - as swapping and copying. - """ - - def __init__( - self, - cache_config: CacheConfig, - model_config: ModelConfig, - parallel_config: ParallelConfig, - device_config: DeviceConfig, - ) -> None: - self.cache_config = cache_config - self.model_config = model_config - self.parallel_config = parallel_config - self.device_config = device_config - - self.head_size = model_config.get_head_size() - # Models like Jamba, have mixed typed layers, E.g Mamba - self.num_attention_layers = model_config.get_num_layers_by_block_type( - parallel_config, LayerBlockType.attention) - self.num_kv_heads = model_config.get_num_kv_heads(parallel_config) - - self.block_size = cache_config.block_size - self.num_gpu_blocks = cache_config.num_gpu_blocks - if self.num_gpu_blocks: - self.num_gpu_blocks //= parallel_config.pipeline_parallel_size - self.num_cpu_blocks = cache_config.num_cpu_blocks - if self.num_cpu_blocks: - self.num_cpu_blocks //= parallel_config.pipeline_parallel_size - - if cache_config.cache_dtype == "auto": - self.dtype = model_config.dtype - else: - self.dtype = STR_DTYPE_TO_TORCH_DTYPE[cache_config.cache_dtype] - - # Get attention backend. - self.attn_backend = get_attn_backend(self.head_size, - model_config.dtype, - cache_config.cache_dtype, - self.block_size, - model_config.is_attention_free, - use_mla=model_config.use_mla) - - # Initialize the cache. - self.gpu_cache = self._allocate_kv_cache( - self.num_gpu_blocks, self.device_config.device_type) - self.cpu_cache = self._allocate_kv_cache(self.num_cpu_blocks, "cpu") - - def _allocate_kv_cache( - self, - num_blocks: int, - device: str, - ) -> List[torch.Tensor]: - """Allocates KV cache on the specified device.""" - kv_cache_generic_shape = self.attn_backend.get_kv_cache_shape( - num_blocks, self.block_size, self.num_kv_heads, self.head_size) - pin_memory = is_pin_memory_available() if device == "cpu" else False - kv_cache: List[torch.Tensor] = [] - try: - kv_cache_stride_order = self.attn_backend.get_kv_cache_stride_order( - ) - except (AttributeError, NotImplementedError): - kv_cache_stride_order = tuple(range(len(kv_cache_generic_shape))) - - # The allocation respects the backend-defined stride order to ensure - # the semantic remains consistent for each backend. We first obtain the - # generic kv cache shape and then permute it according to the stride - # order which could result in a non-contiguous tensor. - kv_cache_allocation_shape = tuple(kv_cache_generic_shape[i] - for i in kv_cache_stride_order) - - for _ in range(self.num_attention_layers): - # null block in CpuGpuBlockAllocator requires at least that - # block to be zeroed-out. - # We zero-out everything for simplicity. - layer_kv_cache = torch.zeros( - kv_cache_allocation_shape, - dtype=self.dtype, - pin_memory=pin_memory, - device=device).permute(*kv_cache_stride_order) - - # view back to (TOTAL_PAGES, PAGE_SIZE, entry_shape...) for cases - # when entry_shape is higher than 1D - kv_cache.append(layer_kv_cache) - return kv_cache - - def swap_in(self, src_to_dst: torch.Tensor) -> None: - for i in range(self.num_attention_layers): - self.attn_backend.swap_blocks(self.cpu_cache[i], self.gpu_cache[i], - src_to_dst) - - def swap_out(self, src_to_dst: torch.Tensor) -> None: - for i in range(self.num_attention_layers): - self.attn_backend.swap_blocks(self.gpu_cache[i], self.cpu_cache[i], - src_to_dst) - - def copy(self, src_to_dsts: torch.Tensor) -> None: - self.attn_backend.copy_blocks(self.gpu_cache, src_to_dsts) - - @staticmethod - def get_cache_block_size( - cache_config: CacheConfig, - model_config: ModelConfig, - parallel_config: ParallelConfig, - ) -> int: - head_size = model_config.get_head_size() - num_heads = model_config.get_num_kv_heads(parallel_config) - num_attention_layers = model_config.get_num_layers_by_block_type( - parallel_config, LayerBlockType.attention) - - if cache_config.cache_dtype == "auto": - dtype = model_config.dtype - else: - dtype = STR_DTYPE_TO_TORCH_DTYPE[cache_config.cache_dtype] - - key_cache_entry = num_heads * head_size - - # For MLA there is no value cache, since the latent vector - # is joint keys and values. - value_cache_entry = key_cache_entry if not model_config.use_mla else 0 - total = num_attention_layers * cache_config.block_size * \ - (key_cache_entry + value_cache_entry) - - dtype_size = get_dtype_size(dtype) - return dtype_size * total diff --git a/vllm/worker/model_runner.py b/vllm/worker/model_runner.py deleted file mode 100644 index bab89586b0f2..000000000000 --- a/vllm/worker/model_runner.py +++ /dev/null @@ -1,2031 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project - -import dataclasses -import gc -import inspect -import itertools -import time -import weakref -from contextlib import contextmanager -from dataclasses import dataclass -from typing import (TYPE_CHECKING, Any, Callable, Dict, List, Optional, Set, - Tuple, Type, TypeVar, Union) - -import numpy as np -import torch -import torch.distributed -import torch.nn as nn -from tqdm.auto import tqdm - -from vllm.attention import AttentionMetadata, get_attn_backend -from vllm.attention.backends.abstract import AttentionState -from vllm.attention.backends.utils import CommonAttentionState -from vllm.compilation.counter import compilation_counter -from vllm.config import CompilationLevel, VllmConfig -from vllm.core.scheduler import SchedulerOutputs -from vllm.distributed import broadcast_tensor_dict, get_pp_group -from vllm.distributed.kv_transfer import get_kv_transfer_group -from vllm.distributed.parallel_state import (get_tensor_model_parallel_rank, - graph_capture) -from vllm.forward_context import get_forward_context, set_forward_context -from vllm.inputs import INPUT_REGISTRY, InputRegistry -from vllm.logger import init_logger -from vllm.lora.layers import LoRAMapping -from vllm.lora.request import LoRARequest -from vllm.lora.worker_manager import LRUCacheWorkerLoRAManager -from vllm.model_executor import SamplingMetadata, SamplingMetadataCache -from vllm.model_executor.layers.rotary_embedding import MRotaryEmbedding -from vllm.model_executor.layers.sampler import (Sampler, SamplerOutput, - get_sampler) -from vllm.model_executor.model_loader import get_model -from vllm.model_executor.model_loader.tensorizer import TensorizerConfig -from vllm.model_executor.models import (supports_lora, supports_mrope, - supports_multimodal) -from vllm.model_executor.models.utils import set_cpu_offload_max_bytes -from vllm.multimodal import (MULTIMODAL_REGISTRY, BatchedTensorInputs, - MultiModalKwargs, MultiModalPlaceholderMap, - MultiModalRegistry) -from vllm.sampling_params import SamplingParams -from vllm.sequence import IntermediateTensors, SequenceGroupMetadata -from vllm.utils import (DeviceMemoryProfiler, GiB_bytes, PyObjectCache, - async_tensor_h2d, flatten_2d_lists, - is_pin_memory_available, supports_dynamo, - weak_ref_tensor) -from vllm.worker.model_runner_base import ( - InputProcessingError, ModelRunnerBase, ModelRunnerInputBase, - ModelRunnerInputBuilderBase, _add_attn_metadata_broadcastable_dict, - _add_sampling_metadata_broadcastable_dict, - _init_attn_metadata_from_tensor_dict, - _init_sampling_metadata_from_tensor_dict) - -if TYPE_CHECKING: - from vllm.attention.backends.abstract import AttentionBackend - -logger = init_logger(__name__) - -LORA_WARMUP_RANK = 8 - -_NUM_WARMUP_ITERS = 2 - -TModelInputForGPU = TypeVar('TModelInputForGPU', bound="ModelInputForGPU") - -# For now, bump up cache limits for recompilations during CUDA graph warmups. -torch._dynamo.config.cache_size_limit = 128 -torch._dynamo.config.accumulated_cache_size_limit = 128 - - -@dataclass(frozen=True) -class ModelInputForGPU(ModelRunnerInputBase): - """ - This base class contains metadata needed for the base model forward pass - but not metadata for possible additional steps, e.g., sampling. Model - runners that run additional steps should subclass this method to add - additional fields. - """ - input_tokens: Optional[torch.Tensor] = None - inputs_embeds: Optional[torch.Tensor] = None - input_positions: Optional[torch.Tensor] = None - seq_lens: Optional[List[int]] = None - query_lens: Optional[List[int]] = None - lora_mapping: Optional["LoRAMapping"] = None - lora_requests: Optional[Set[LoRARequest]] = None - attn_metadata: Optional["AttentionMetadata"] = None - multi_modal_kwargs: Optional[BatchedTensorInputs] = None - request_ids_to_seq_ids: Optional[Dict[str, List[int]]] = None - finished_requests_ids: Optional[List[str]] = None - virtual_engine: int = 0 - async_callback: Optional[Callable] = None - scheduler_outputs: Optional[SchedulerOutputs] = None - previous_hidden_states: Optional[torch.Tensor] = None - - def as_broadcastable_tensor_dict(self) -> Dict[str, Any]: - tensor_dict = { - "input_tokens": self.input_tokens, - "inputs_embeds": self.inputs_embeds, - "input_positions": self.input_positions, - "lora_requests": self.lora_requests, - "lora_mapping": self.lora_mapping, - "multi_modal_kwargs": self.multi_modal_kwargs, - "virtual_engine": self.virtual_engine, - "request_ids_to_seq_ids": self.request_ids_to_seq_ids, - "finished_requests_ids": self.finished_requests_ids, - } - _add_attn_metadata_broadcastable_dict(tensor_dict, self.attn_metadata) - return tensor_dict - - @classmethod - def from_broadcasted_tensor_dict( - cls: Type[TModelInputForGPU], - tensor_dict: Dict[str, Any], - attn_backend: Optional["AttentionBackend"] = None, - ) -> TModelInputForGPU: - if attn_backend is not None: - tensor_dict = _init_attn_metadata_from_tensor_dict( - attn_backend, tensor_dict) - return cls(**tensor_dict) - - # Exclude `async_callback` to be able to pickle this object - def __getstate__(self): - state = self.__dict__.copy() - del state["async_callback"] - return state - - # TODO: What happens when we depickle this object? - # How can we update this callback to properly pass it to the engine? - def __setstate__(self, state): - self.__dict__.update(state) - self.__dict__.update({'async_callback': None}) - - -@dataclass(frozen=True) -class ModelInputForGPUWithSamplingMetadata(ModelInputForGPU): - """ - Used by the ModelRunner. - """ - sampling_metadata: Optional["SamplingMetadata"] = None - # Used for speculative decoding. We do not broadcast it because it is only - # used by the driver worker. - is_prompt: Optional[bool] = None - - def as_broadcastable_tensor_dict(self) -> Dict[str, Any]: - tensor_dict = { - "input_tokens": self.input_tokens, - "inputs_embeds": self.inputs_embeds, - "input_positions": self.input_positions, - "lora_requests": self.lora_requests, - "lora_mapping": self.lora_mapping, - "multi_modal_kwargs": self.multi_modal_kwargs, - "virtual_engine": self.virtual_engine, - "request_ids_to_seq_ids": self.request_ids_to_seq_ids, - "finished_requests_ids": self.finished_requests_ids, - } - _add_attn_metadata_broadcastable_dict(tensor_dict, self.attn_metadata) - _add_sampling_metadata_broadcastable_dict(tensor_dict, - self.sampling_metadata) - return tensor_dict - - @classmethod - def from_broadcasted_tensor_dict( - cls, - tensor_dict: Dict[str, Any], - attn_backend: Optional["AttentionBackend"] = None, - ) -> "ModelInputForGPUWithSamplingMetadata": - tensor_dict = _init_sampling_metadata_from_tensor_dict(tensor_dict) - if attn_backend is not None: - tensor_dict = _init_attn_metadata_from_tensor_dict( - attn_backend, tensor_dict) - return cls(**tensor_dict) - - -class ModelInputForGPUBuilder(ModelRunnerInputBuilderBase[ModelInputForGPU]): - """Build ModelInputForGPU from SequenceGroupMetadata.""" - - # Note: ideally we would be using a dataclass(kw_only=True) - # here, so that this can be subclassed easily, - # but kw_only is not supported in python<3.10. - class InterDataForSeqGroup: - """Intermediate data for the current sequence group.""" - - def simple_reinit(self): - self.input_tokens[0].clear() # type: ignore - self.inputs_embeds = None # type: ignore - self.input_positions[0].clear() # type: ignore - self.mrope_input_positions = None # type: ignore - self.seq_lens[0] = 0 # type: ignore - self.orig_seq_lens[0] = 0 # type: ignore - self.prompt_lens[0] = 0 # type: ignore - self.query_lens[0] = 0 # type: ignore - self.context_lens[0] = 0 # type: ignore - self.curr_sliding_window_blocks[0] = 0 # type: ignore - self.lora_index_mapping.clear() # type: ignore - self.lora_prompt_mapping.clear() # type: ignore - self.lora_requests.clear() # type: ignore - - def __init__( - self, - *, - # From sequence group metadata. - request_id: str, - seq_ids: List[int], - is_prompt: bool, - block_tables: Optional[Dict[int, List[int]]], - computed_block_nums: List[int], - n_seqs: int = 0, - - # Input tokens and positions. - input_tokens: Optional[List[List[int]]] = None, - inputs_embeds: Optional[torch.Tensor] = None, - input_positions: Optional[List[List[int]]] = None, - mrope_input_positions: Optional[List[List[List[int]]]] = None, - - # The sequence length (may be capped to the sliding window). - seq_lens: Optional[List[int]] = None, - # The original sequence length (before applying sliding window). - # This is used to compute slot mapping. - orig_seq_lens: Optional[List[int]] = None, - # This is used in the dual-chunk flash attention backend. - prompt_lens: Optional[List[int]] = None, - # The query length. - query_lens: Optional[List[int]] = None, - # The number of tokens that are already computed. - context_lens: Optional[List[int]] = None, - # The current sliding window block. - curr_sliding_window_blocks: Optional[List[int]] = None, - - # LoRA inputs. - lora_index_mapping: Optional[List[List[int]]] = None, - lora_prompt_mapping: Optional[List[List[int]]] = None, - lora_requests: Optional[Set[LoRARequest]] = None, - - # Multi-modal inputs. - multi_modal_kwargs: Optional[MultiModalKwargs] = None, - multi_modal_placeholder_maps: Optional[Dict[ - str, MultiModalPlaceholderMap]] = None, - - # Whether the prefix cache is hit (prefill only). - prefix_cache_hit: bool = False, - reinit: bool = False, - reinit_use_defaults: bool = False, - encoder_seq_len: int = 0, - ): - if reinit: - assert len(self.seq_ids) == len(seq_ids) # type: ignore - for i, seq_id in enumerate(seq_ids): - self.seq_ids[i] = seq_id # type: ignore - else: - self.seq_ids = seq_ids - - self.request_id = request_id - self.is_prompt = is_prompt - self.block_tables = block_tables - self.computed_block_nums = computed_block_nums - self.n_seqs = n_seqs - self.encoder_seq_len = encoder_seq_len - - if reinit: - if len(self.seq_ids) == 1 and reinit_use_defaults: - self.simple_reinit() - else: - if input_tokens: - self.input_tokens = input_tokens - else: - for seq_id in range(len(self.seq_ids)): - self.input_tokens[seq_id].clear() - - self.inputs_embeds = inputs_embeds - - if input_positions: - self.input_positions = input_positions - else: - for seq_id in range(len(self.seq_ids)): - self.input_positions[seq_id].clear() - - self.mrope_input_positions = None - - if seq_lens: - self.seq_lens = seq_lens - else: - for seq_id in range(len(self.seq_ids)): - self.seq_lens[seq_id] = 0 - - if orig_seq_lens: - self.orig_seq_lens = orig_seq_lens - else: - for seq_id in range(len(self.seq_ids)): - self.orig_seq_lens[seq_id] = 0 - - if prompt_lens: - self.prompt_lens = prompt_lens - else: - for seq_id in range(len(self.seq_ids)): - self.prompt_lens[seq_id] = 0 - - if query_lens: - self.query_lens = query_lens - else: - for seq_id in range(len(self.seq_ids)): - self.query_lens[seq_id] = 0 - - if context_lens: - self.context_lens = context_lens - else: - for seq_id in range(len(self.seq_ids)): - self.context_lens[seq_id] = 0 - - if curr_sliding_window_blocks: - self.curr_sliding_window_blocks = \ - curr_sliding_window_blocks - else: - for seq_id in range(len(self.seq_ids)): - self.curr_sliding_window_blocks[seq_id] = 0 - - if lora_index_mapping: - self.lora_index_mapping = lora_index_mapping - else: - self.lora_index_mapping.clear() - - if lora_prompt_mapping: - self.lora_prompt_mapping = lora_prompt_mapping - else: - self.lora_prompt_mapping.clear() - - if lora_requests: - self.lora_requests = lora_requests - else: - self.lora_requests.clear() - - else: - self.input_tokens = input_tokens or [] - self.inputs_embeds = inputs_embeds - self.input_positions = input_positions or [] - self.mrope_input_positions = mrope_input_positions or None - self.seq_lens = seq_lens or [] - self.orig_seq_lens = orig_seq_lens or [] - self.prompt_lens = prompt_lens or [] - self.query_lens = query_lens or [] - self.context_lens = context_lens or [] - self.curr_sliding_window_blocks = \ - curr_sliding_window_blocks or [] - - self.lora_index_mapping = lora_index_mapping or [] - self.lora_prompt_mapping = lora_prompt_mapping or [] - self.lora_requests = lora_requests or set() - - self.multi_modal_kwargs = multi_modal_kwargs - self.multi_modal_placeholder_maps = multi_modal_placeholder_maps - self.prefix_cache_hit = prefix_cache_hit - - self.n_seqs = len(self.seq_ids) - - if not reinit: - self.__post_init__() - - def __post_init__(self): - self.n_seqs = len(self.seq_ids) - - self.input_tokens = [[] for _ in range(self.n_seqs)] - self.input_positions = [[] for _ in range(self.n_seqs)] - self.mrope_input_positions = None - self.seq_lens = [0] * self.n_seqs - self.orig_seq_lens = [0] * self.n_seqs - self.prompt_lens = [0] * self.n_seqs - self.query_lens = [0] * self.n_seqs - self.context_lens = [0] * self.n_seqs - self.curr_sliding_window_blocks = [0] * self.n_seqs - - self.lora_index_mapping = [] - self.lora_prompt_mapping = [] - - def __repr__(self) -> str: - return (f"InterDataForSeqGroup(" - f"request_id={self.request_id}, " - f"seq_ids={self.seq_ids}, " - f"is_prompt={self.is_prompt}, " - f"block_tables={self.block_tables}, " - f"computed_block_nums={self.computed_block_nums}, " - f"n_seqs={self.n_seqs}, " - f"input_tokens={self.input_tokens}, " - f"inputs_embeds.shape=" - f"{getattr(self.inputs_embeds, 'shape', None)}, " - f"input_positions={self.input_positions}, " - f"mrope_input_positions={self.mrope_input_positions}, " - f"seq_lens={self.seq_lens}, " - f"orig_seq_lens={self.orig_seq_lens}, " - f"query_lens={self.query_lens}, " - f"context_lens={self.context_lens}, " - f"multi_modal_kwargs={self.multi_modal_kwargs}") - - def gen_inter_data_builder(self, num_seqs: int): - return lambda: ModelInputForGPUBuilder.InterDataForSeqGroup( - request_id="", - seq_ids=[0] * num_seqs, - is_prompt=True, - block_tables=None, - computed_block_nums=[]) - - def init_cached_inter_data(self, *args, **kwargs): - assert len(args) == 0 - assert "seq_ids" in kwargs - seq_ids = kwargs["seq_ids"] - num_seqs = len(seq_ids) - - # The inter-data cache is per model_runner - inter_data_cache = self.runner.inter_data_cache - if num_seqs not in inter_data_cache: - inter_data_cache[num_seqs] = PyObjectCache( - self.gen_inter_data_builder(num_seqs)) - - obj = inter_data_cache[num_seqs].get_object() - obj.__init__(*args, **kwargs) - return obj - - def reset_cached_inter_data(self): - for cache in self.runner.inter_data_cache.values(): - cache.reset() - - def __init__(self, - runner: "GPUModelRunnerBase", - finished_requests_ids: Optional[List[str]] = None): - super().__init__() - # Compute functions for each sequence in a sequence group. - # WARNING: The order of the functions matters! - self.per_seq_compute_fns = [ - self._compute_lens, - self._compute_for_prefix_cache_hit, - self._compute_for_sliding_window, - self._compute_lora_input, - ] - # Compute functions for each sequence group. - # WARNING: The order of the functions matters! - self.per_seq_group_compute_fns = [ - self._compute_multi_modal_input, - ] - - self.runner = runner - self.model_input_cls = self.runner._model_input_cls - self.attn_backend = self.runner.attn_backend - self.scheduler_config = self.runner.scheduler_config - self.sliding_window = self.runner.sliding_window - self.block_size = self.runner.block_size - self.enable_lora = self.runner.lora_config is not None - - # Attention metadata inputs. - if self.attn_backend is not None: - # spec decode (e.g. Medusa) does not have atten backend - self.attn_metadata_builder = self.attn_backend.get_builder_cls()( - weakref.proxy(self)) - - # Engine/Model configurations. - self.chunked_prefill_enabled = ( - self.scheduler_config is not None - and self.scheduler_config.chunked_prefill_enabled) - if self.sliding_window is not None: - self.sliding_window_blocks = ( - self.sliding_window + self.block_size - 1) // self.block_size - self.block_aligned_sliding_window = \ - self.sliding_window_blocks * self.block_size - - def prepare(self, - finished_requests_ids: Optional[List[str]] = None) -> None: - self.finished_requests_ids = finished_requests_ids - - # if the current batch is decode-only. - # will be set to False if there is any non-decode request. - self.decode_only = True - - # Intermediate data (data in CPU before going to GPU) for - # the current sequence group. - self.inter_data_list: List[ - ModelInputForGPUBuilder.InterDataForSeqGroup] = [] - - self.attn_metadata_builder.prepare() - - def _compute_lens(self, inter_data: InterDataForSeqGroup, seq_idx: int, - seq_group_metadata: SequenceGroupMetadata): - """Compute context length, sequence length and tokens - for the given sequence data. - """ - seq_data = seq_group_metadata.seq_data[inter_data.seq_ids[seq_idx]] - token_chunk_size = seq_group_metadata.token_chunk_size - - # Compute context length (the number of tokens that are - # already computed) and sequence length (total number of tokens). - - seq_len = seq_data.get_len() - if inter_data.is_prompt: - context_len = seq_data.get_num_computed_tokens() - seq_len = min(seq_len, context_len + token_chunk_size) - elif self.runner.model_config.is_encoder_decoder: - context_len = seq_len - 1 - else: - context_len = seq_data.get_num_computed_tokens() - - # Compute tokens. - if seq_data.prompt_embeds is None: - tokens = seq_data.get_token_ids()[context_len:seq_len] - prompt_embeds = None - else: - tokens = [0] * (seq_len - context_len) - prompt_embeds = seq_data.get_token_embeddings( - )[context_len:seq_len] - - inter_data.seq_lens[seq_idx] = seq_len - inter_data.orig_seq_lens[seq_idx] = seq_len - inter_data.prompt_lens[seq_idx] = seq_data.get_prompt_len() - inter_data.context_lens[seq_idx] = context_len - inter_data.input_tokens[seq_idx].extend(tokens) - inter_data.inputs_embeds = prompt_embeds - inter_data.input_positions[seq_idx].extend(range(context_len, seq_len)) - inter_data.query_lens[seq_idx] = seq_len - context_len - - if seq_data.mrope_position_delta is not None: - if inter_data.mrope_input_positions is None: - inter_data.mrope_input_positions = [None] * inter_data.n_seqs - - inter_data.mrope_input_positions[ - seq_idx] = MRotaryEmbedding.get_next_input_positions( - seq_data.mrope_position_delta, - context_len, - seq_len, - ) - - def _compute_for_prefix_cache_hit( - self, inter_data: InterDataForSeqGroup, seq_idx: int, - seq_group_metadata: SequenceGroupMetadata): - """Check if hit prefix cache (i.e., some blocks are already computed). - If hit, update input tokens and positions to only compute the - remaining blocks. - """ - computed_block_nums = inter_data.computed_block_nums - - # Note that prefix caching does not support sliding window. - prefix_cache_hit = (computed_block_nums is not None - and len(computed_block_nums) > 0 - and self.sliding_window is None - and inter_data.is_prompt) - inter_data.prefix_cache_hit = prefix_cache_hit - - if not prefix_cache_hit: - return - - assert computed_block_nums is not None - # The cache hit prompt tokens in this sequence. Note that - # this may be larger than the sequence length if chunked - # prefill is enabled. - prefix_cache_len = len(computed_block_nums) * self.block_size - seq_group_metadata.seq_data[inter_data.seq_ids[ - seq_idx]].update_num_cached_tokens(prefix_cache_len) - - # The number of so far computed prompt tokens in this sequence. - context_len = inter_data.context_lens[seq_idx] - # The total number of prompt tokens in this sequence. - # When chunked prefill is enabled, this is the token number of - # computed chunks + current chunk. - seq_len = inter_data.seq_lens[seq_idx] - if prefix_cache_len <= context_len: - # We already passed the cache hit region, - # so do normal computation. - pass - elif context_len < prefix_cache_len < seq_len: - # Partial hit. Compute the missing part. - uncomputed_start = prefix_cache_len - context_len - inter_data.input_tokens[seq_idx] = inter_data.input_tokens[ - seq_idx][uncomputed_start:] - inter_data.input_positions[seq_idx] = inter_data.input_positions[ - seq_idx][uncomputed_start:] - context_len = prefix_cache_len - - inter_data.context_lens[seq_idx] = context_len - inter_data.query_lens[ - seq_idx] = inter_data.seq_lens[seq_idx] - context_len - elif seq_len <= prefix_cache_len: - # Full hit. Only compute the last token to avoid - # erroneous behavior. FIXME: Ideally we should directly - # mark all tokens as computed in the scheduler and do not - # schedule this sequence, so this case should not happen. - inter_data.input_tokens[seq_idx] = inter_data.input_tokens[ - seq_idx][-1:] - inter_data.input_positions[seq_idx] = inter_data.input_positions[ - seq_idx][-1:] - inter_data.query_lens[seq_idx] = 1 - inter_data.context_lens[seq_idx] = inter_data.seq_lens[seq_idx] - 1 - - def _compute_for_sliding_window(self, inter_data: InterDataForSeqGroup, - seq_idx: int, - seq_group_metadata: SequenceGroupMetadata): - """Update seq_len and curr_sliding_window_block for the given - sequence data (only required by decoding) if sliding window is enabled. - """ - curr_sliding_window_block = 0 - sliding_seq_len = inter_data.seq_lens[seq_idx] - if not inter_data.is_prompt and self.sliding_window is not None: - # TODO(sang): This is a hack to make sliding window work with - # paged attn. We can remove it if we make paged attn kernel - # to properly handle slinding window attn. - curr_sliding_window_block = self.sliding_window_blocks - # number of elements in last block - suff_len = inter_data.seq_lens[seq_idx] % self.block_size - sliding_seq_len = min(inter_data.seq_lens[seq_idx], - self.block_aligned_sliding_window + suff_len) - if suff_len > 0: - curr_sliding_window_block += 1 - - inter_data.curr_sliding_window_blocks[ - seq_idx] = curr_sliding_window_block - inter_data.seq_lens[seq_idx] = sliding_seq_len - - def _compute_lora_input(self, inter_data: InterDataForSeqGroup, - seq_idx: int, - seq_group_metadata: SequenceGroupMetadata): - """If LoRA is enabled, compute LoRA index and prompt mapping.""" - if not self.enable_lora: - return - - lora_id = seq_group_metadata.lora_int_id - if lora_id > 0: - inter_data.lora_requests.add(seq_group_metadata.lora_request) - query_len = inter_data.query_lens[seq_idx] - inter_data.lora_index_mapping.append([lora_id] * query_len) - sampling_params = seq_group_metadata.sampling_params - if sampling_params and sampling_params.prompt_logprobs is not None: - inter_data.lora_prompt_mapping.append([lora_id] * query_len) - elif not self.chunked_prefill_enabled or seq_group_metadata.do_sample: - inter_data.lora_prompt_mapping.append([lora_id]) - else: - inter_data.lora_prompt_mapping.append([]) - - def _compute_multi_modal_input(self, inter_data: InterDataForSeqGroup, - seq_group_metadata: SequenceGroupMetadata): - """If multi-modal data is given, add it to the input.""" - # NOTE: mm_kwargs only includes the subset of multi-modal items that - # intersect with the current prefill positions. - positions = inter_data.input_positions[0] - mm_kwargs, placeholder_maps = MultiModalPlaceholderMap.from_seq_group( - seq_group_metadata, - range(positions[0], positions[0] + len(positions))) - - # M-RoPE requires mrope_positions even for plain text; return early - # when mm_kwargs is empty only if inter_data.is_prompt is False. - if not mm_kwargs and not inter_data.is_prompt: - return - - inter_data.multi_modal_kwargs = mm_kwargs - inter_data.multi_modal_placeholder_maps = placeholder_maps - - # special processing for mrope position deltas. - if self.runner.model_config.uses_mrope: - image_grid_thw = mm_kwargs.get("image_grid_thw", None) - video_grid_thw = mm_kwargs.get("video_grid_thw", None) - audio_feature_lengths = mm_kwargs.get("audio_feature_lengths", - None) - - second_per_grid_ts = mm_kwargs.get("second_per_grid_ts", None) - use_audio_in_video = mm_kwargs.get("use_audio_in_video", False) - hf_config = self.runner.model_config.hf_config - - inter_data.mrope_input_positions = [None] * inter_data.n_seqs - for seq_idx in range(inter_data.n_seqs): - seq_data = seq_group_metadata.seq_data[ - inter_data.seq_ids[seq_idx]] - token_ids = seq_data.get_token_ids() - - if supports_mrope(self.runner.model): - mrope_input_positions, mrope_position_delta = \ - self.runner.model.get_mrope_input_positions( - token_ids, - hf_config=hf_config, - image_grid_thw=image_grid_thw, - video_grid_thw=video_grid_thw, - second_per_grid_ts=second_per_grid_ts, - context_len=inter_data.context_lens[seq_idx], - seq_len=inter_data.seq_lens[seq_idx], - audio_feature_lengths=audio_feature_lengths, - use_audio_in_video=use_audio_in_video, - ) - mrope_input_positions = mrope_input_positions.tolist() - else: - mrope_input_positions, mrope_position_delta = \ - MRotaryEmbedding.get_input_positions( - token_ids, - hf_config=hf_config, - image_grid_thw=image_grid_thw, - video_grid_thw=video_grid_thw, - second_per_grid_ts=second_per_grid_ts, - context_len=inter_data.context_lens[seq_idx], - seq_len=inter_data.seq_lens[seq_idx], - audio_feature_lengths=audio_feature_lengths, - use_audio_in_video=use_audio_in_video, - ) - - seq_data.mrope_position_delta = mrope_position_delta - inter_data.mrope_input_positions[ - seq_idx] = mrope_input_positions - - def add_seq_group(self, seq_group_metadata: SequenceGroupMetadata): - """Add a sequence group to the builder.""" - seq_ids = seq_group_metadata.seq_data.keys() - n_seqs = len(seq_ids) - is_prompt = seq_group_metadata.is_prompt - - if is_prompt: - assert n_seqs == 1 - self.decode_only = False - - encoder_seq_len = 0 - - if self.runner.model_config.is_encoder_decoder: - encoder_seq_len = seq_group_metadata.encoder_seq_data.get_len() - - inter_data = self.init_cached_inter_data( - request_id=seq_group_metadata.request_id, - seq_ids=seq_ids, - is_prompt=is_prompt, - block_tables=seq_group_metadata.block_tables, - computed_block_nums=seq_group_metadata.computed_block_nums, - reinit=True, - reinit_use_defaults=True, - encoder_seq_len=encoder_seq_len) - - self.inter_data_list.append(inter_data) - - for seq_idx in range(n_seqs): - for per_seq_fn in self.per_seq_compute_fns: - per_seq_fn(inter_data, seq_idx, seq_group_metadata) - for per_seq_group_fn in self.per_seq_group_compute_fns: - per_seq_group_fn(inter_data, seq_group_metadata) - - def _use_captured_graph(self, - batch_size: int, - decode_only: bool, - max_decode_seq_len: int, - max_encoder_seq_len: int = 0) -> bool: - return (decode_only and not self.runner.model_config.enforce_eager - and max_decode_seq_len <= self.runner.max_seq_len_to_capture - and max_encoder_seq_len <= self.runner.max_seq_len_to_capture - and batch_size <= self.runner.max_batchsize_to_capture) - - def _get_cuda_graph_pad_size(self, - num_seqs: int, - max_decode_seq_len: int, - max_encoder_seq_len: int = 0) -> int: - """ - Determine the number of padding sequences required for running in - CUDA graph mode. Returns -1 if CUDA graphs cannot be used. - - In the multi-step + chunked-prefill case, only the first step - has Prefills (if any). The rest of the steps are guaranteed to be all - decodes. In this case, we set up the padding as if all the sequences - are decodes so we may run all steps except the first step in CUDA graph - mode. - - Args: - num_seqs (int): Number of sequences scheduled to run. - max_decode_seq_len (int): Greatest of all the decode sequence - lengths. Used only in checking the viablility of using - CUDA graphs. - max_encoder_seq_len (int, optional): Greatest of all the encode - sequence lengths. Defaults to 0. Used only in checking the - viability of using CUDA graphs. - Returns: - int: Returns the determined number of padding sequences. If - CUDA graphs is not viable, returns -1. - """ - decode_only = self.decode_only - if not decode_only: - # Early exit so we can treat num_seqs as the batch_size below. - return -1 - - # batch_size out of this function refers to the number of input - # tokens being scheduled. This conflation of num_seqs as batch_size - # is valid as this is a decode-only case. - batch_size = num_seqs - if not self._use_captured_graph(batch_size, decode_only, - max_decode_seq_len, - max_encoder_seq_len): - return -1 - - graph_batch_size = self.runner.vllm_config.pad_for_cudagraph( - batch_size) - assert graph_batch_size >= batch_size - return graph_batch_size - batch_size - - def build(self) -> ModelInputForGPU: - """Finalize the builder intermediate data and - create on-device tensors. - """ - # Combine and flatten intermediate data. - input_tokens = list[int]() - inputs_embeds_list = list[torch.Tensor]() - for inter_data in self.inter_data_list: - for cur_input_tokens in inter_data.input_tokens: - input_tokens.extend(cur_input_tokens) - if inter_data.inputs_embeds is not None: - inputs_embeds_list.append( - inter_data.inputs_embeds.to( - dtype=self.runner.model_config.dtype, - device=self.runner.device)) - inputs_embeds: Optional[torch.Tensor] - if len(inputs_embeds_list) == 0: - inputs_embeds = None - else: - inputs_embeds = torch.cat(inputs_embeds_list, dim=0).to( - dtype=self.runner.model_config.dtype, - device=self.runner.device) - assert len(inputs_embeds) == len(input_tokens) - - if not input_tokens and inputs_embeds is None: - # This may happen when all prefill requests hit - # prefix caching and there is no decode request. - return self.model_input_cls() - - mrope_input_positions: Optional[List[List[int]]] = None - if any(inter_data.mrope_input_positions is not None - for inter_data in self.inter_data_list): - mrope_input_positions = [[] for _ in range(3)] - for idx in range(3): - for inter_data in self.inter_data_list: - msections = inter_data.mrope_input_positions - if msections is None: - for _seq_input_positions in inter_data.input_positions: - mrope_input_positions[idx].extend( - _seq_input_positions) - else: - for _seq_mrope_input_positions in msections: - mrope_input_positions[idx].extend( - _seq_mrope_input_positions[idx]) - input_positions = None - else: - input_positions = [] - for inter_data in self.inter_data_list: - for cur_input_positions in inter_data.input_positions: - input_positions.extend(cur_input_positions) - - seq_lens = [] - query_lens = [] - max_decode_seq_len = 0 - max_encoder_seq_len = 0 - for inter_data in self.inter_data_list: - seq_lens.extend(inter_data.seq_lens) - query_lens.extend(inter_data.query_lens) - if not inter_data.is_prompt: - max_decode_seq_len = max(max_decode_seq_len, - max(inter_data.seq_lens)) - if self.runner.model_config.is_encoder_decoder: - max_encoder_seq_len = max(max_encoder_seq_len, - inter_data.encoder_seq_len) - - # Mapping from request IDs to sequence IDs. Used for Jamba models - # that manages the cache by itself. - request_ids_to_seq_ids = { - data.request_id: data.seq_ids - for data in self.inter_data_list - } - - cuda_graph_pad_size = self._get_cuda_graph_pad_size( - num_seqs=len(seq_lens), - max_decode_seq_len=max_decode_seq_len, - max_encoder_seq_len=max_encoder_seq_len) - - batch_size = len(input_tokens) - if cuda_graph_pad_size != -1: - # If cuda graph can be used, pad tensors accordingly. - # See `capture_model` API for more details. - # vLLM uses cuda graph only for decoding requests. - batch_size += cuda_graph_pad_size - - # Tokens and positions. - if cuda_graph_pad_size: - input_tokens.extend(itertools.repeat(0, cuda_graph_pad_size)) - assert self.runner.device is not None - input_tokens_tensor = async_tensor_h2d(input_tokens, torch.long, - self.runner.device, - self.runner.pin_memory) - - if mrope_input_positions is not None: - for idx in range(3): - mrope_input_positions[idx].extend( - itertools.repeat(0, cuda_graph_pad_size)) - input_positions_tensor = async_tensor_h2d(mrope_input_positions, - torch.long, - self.runner.device, - self.runner.pin_memory) - else: - input_positions.extend(itertools.repeat(0, cuda_graph_pad_size)) - input_positions_tensor = async_tensor_h2d(input_positions, - torch.long, - self.runner.device, - self.runner.pin_memory) - # Sequence and query lengths. - if cuda_graph_pad_size: - seq_lens.extend(itertools.repeat(1, cuda_graph_pad_size)) - - # Attention metadata. - attn_metadata = self.attn_metadata_builder.build( - seq_lens, query_lens, cuda_graph_pad_size, batch_size) - - # LoRA data. - lora_requests = set() - lora_mapping = None - if self.enable_lora: - lora_requests = set(r for data in self.inter_data_list - for r in data.lora_requests) - lora_index_mapping = flatten_2d_lists([ - flatten_2d_lists(inter_data.lora_index_mapping) - for inter_data in self.inter_data_list - ]) - if cuda_graph_pad_size: - lora_index_mapping.extend( - itertools.repeat(0, cuda_graph_pad_size)) - lora_prompt_mapping = flatten_2d_lists([ - flatten_2d_lists(inter_data.lora_prompt_mapping) - for inter_data in self.inter_data_list - ]) - - lora_mapping = LoRAMapping( - **dict(index_mapping=lora_index_mapping, - prompt_mapping=lora_prompt_mapping, - is_prefill=not self.decode_only)) - - # Multi-modal data. - multi_modal_kwargs_list = [ - data.multi_modal_kwargs for data in self.inter_data_list - if data.multi_modal_kwargs is not None - ] - multi_modal_kwargs = MultiModalKwargs.batch(multi_modal_kwargs_list) - - return self.model_input_cls( - input_tokens=input_tokens_tensor, - inputs_embeds=inputs_embeds, - input_positions=input_positions_tensor, - attn_metadata=attn_metadata, - seq_lens=seq_lens, - query_lens=query_lens, - lora_mapping=lora_mapping, - lora_requests=lora_requests, - multi_modal_kwargs=multi_modal_kwargs, - request_ids_to_seq_ids=request_ids_to_seq_ids, - finished_requests_ids=self.finished_requests_ids) - - -class GPUModelRunnerBase(ModelRunnerBase[TModelInputForGPU]): - """ - Helper class for shared methods between GPU model runners. - """ - _model_input_cls: Type[TModelInputForGPU] - _builder_cls: Type[ModelInputForGPUBuilder] - builder: ModelInputForGPUBuilder - - def __init__( - self, - vllm_config: VllmConfig, - kv_cache_dtype: Optional[str] = "auto", - is_driver_worker: bool = False, - return_hidden_states: bool = False, - input_registry: InputRegistry = INPUT_REGISTRY, - mm_registry: MultiModalRegistry = MULTIMODAL_REGISTRY, - ): - - ModelRunnerBase.__init__(self, vllm_config) - model_config = self.model_config - cache_config = self.cache_config - - self.is_driver_worker = is_driver_worker - self.return_hidden_states = return_hidden_states - - self.device = self.device_config.device - self.pin_memory = is_pin_memory_available() - - self.kv_cache_dtype = kv_cache_dtype - self.sliding_window = model_config.get_sliding_window() - self.block_size = cache_config.block_size - self.max_seq_len_to_capture = self.model_config.max_seq_len_to_capture - self.max_batchsize_to_capture = \ - self.vllm_config.compilation_config.max_capture_size - - # - self.graph_runners: List[Dict[Tuple[int, bool], CUDAGraphRunner]] = [ - {} for _ in range(self.parallel_config.pipeline_parallel_size) - ] - self.graph_memory_pool: Optional[Tuple[ - int, int]] = None # Set during graph capture. - - self.has_inner_state = model_config.has_inner_state - - self.in_profile_run = False - - # When using CUDA graph, the input block tables must be padded to - # max_seq_len_to_capture. However, creating the block table in - # Python can be expensive. To optimize this, we cache the block table - # in numpy and only copy the actual input content at every iteration. - # The shape of the cached block table will be - # (max batch size to capture, max seq len to capture / block size). - self.graph_block_tables = np.zeros( - (self.max_batchsize_to_capture, self.get_max_block_per_batch()), - dtype=np.int32) - - self.cross_layer_shared_graph_block_tables = np.zeros( - (self.max_batchsize_to_capture, self.get_max_block_per_batch()), - dtype=np.int32) - - # Attention-free but stateful models like Mamba need a placeholder attn - # backend, as the attention metadata is needed to manage internal state. - # However we must bypass attention selection altogether for some models - # used for speculative decoding to avoid a divide-by-zero in - # model_config.get_head_size() - num_attn_heads = self.model_config.get_num_attention_heads( - self.parallel_config) - needs_attn_backend = (num_attn_heads != 0 - or self.model_config.is_attention_free) - - self.attn_backend = get_attn_backend( - self.model_config.get_head_size(), - self.model_config.dtype, - self.kv_cache_dtype, - self.block_size, - self.model_config.is_attention_free, - use_mla=self.model_config.use_mla, - ) if needs_attn_backend else None - if self.attn_backend: - self.attn_state = self.attn_backend.get_state_cls()( - weakref.proxy(self)) - else: - self.attn_state = CommonAttentionState(weakref.proxy(self)) - - # Multi-modal data support - self.input_registry = input_registry - self.mm_registry = mm_registry - - # Lazy initialization - self.model: nn.Module # Set after load_model - # Set after load_model. - self.lora_manager: Optional[LRUCacheWorkerLoRAManager] = None - self.sampler = get_sampler() - - set_cpu_offload_max_bytes( - int(self.cache_config.cpu_offload_gb * 1024**3)) - - # Used to cache python objects - self.inter_data_cache: Dict[int, PyObjectCache] = {} - - # Using the PythonizationCache in Pipeline-Parallel clobbers the - # SequenceGroupToSample object. In Pipeline-Parallel, we have - # more than 1 Scheduler, resulting in a potential back-to-back - # prepare_model_inputs() call. This clobbers the cached - # SequenceGroupToSample objects, as we reset the cache during - # every prepare_model_inputs() call. - self.sampling_metadata_cache: SamplingMetadataCache = \ - SamplingMetadataCache() \ - if self.parallel_config.pipeline_parallel_size == 1 else None - - if hasattr(self, "_builder_cls"): - # multi-step model runner does not have `_builder_cls` - self.builder = self._builder_cls(weakref.proxy(self)) - - def load_model(self) -> None: - logger.info("Starting to load model %s...", self.model_config.model) - with DeviceMemoryProfiler(self.device) as m: - time_before_load = time.perf_counter() - self.model = get_model(vllm_config=self.vllm_config) - if self.lora_config: - assert supports_lora( - self.model - ), f"{self.model.__class__.__name__} does not support LoRA yet." - - if supports_multimodal(self.model): - logger.warning( - "Regarding multimodal models, vLLM currently " - "only supports adding LoRA to language model.") - - self.lora_manager = LRUCacheWorkerLoRAManager( - self.vllm_config, - self.device, - self.model.embedding_modules, - self.model.embedding_padding_modules, - ) - - self.model = self.lora_manager.create_lora_manager(self.model) - time_after_load = time.perf_counter() - - self.model_memory_usage = m.consumed_memory - logger.info("Model loading took %.4f GiB and %.6f seconds", - self.model_memory_usage / GiB_bytes, - time_after_load - time_before_load) - - - if self.vllm_config.compilation_config.level ==\ - CompilationLevel.DYNAMO_AS_IS and supports_dynamo(): - backend = self.vllm_config.compilation_config.init_backend( - self.vllm_config) - compilation_counter.dynamo_as_is_count += 1 - self.model = torch.compile(self.model, - fullgraph=True, - backend=backend) - - def get_model(self) -> nn.Module: - return self.model - - def save_sharded_state( - self, - path: str, - pattern: Optional[str] = None, - max_size: Optional[int] = None, - ) -> None: - from vllm.model_executor.model_loader import ShardedStateLoader - ShardedStateLoader.save_model( - self.model, - path, - pattern=pattern, - max_size=max_size, - ) - - def save_tensorized_model( - self, - tensorizer_config: TensorizerConfig, - ) -> None: - from vllm.model_executor.model_loader import TensorizerLoader - TensorizerLoader.save_model( - self.model, - tensorizer_config=tensorizer_config, - model_config=self.model_config, - ) - - def get_max_block_per_batch(self) -> int: - block_size = self.block_size - return (self.max_seq_len_to_capture + block_size - 1) // block_size - - def _prepare_model_input_tensors( - self, - seq_group_metadata_list: List[SequenceGroupMetadata], - finished_requests_ids: Optional[List[str]] = None - ) -> TModelInputForGPU: - """Helper method to prepare the model input based on a given sequence - group. Prepares metadata needed for the base model forward pass but not - metadata for possible additional steps, e.g., sampling. - - The API assumes seq_group_metadata_list is sorted by prefill -> decode. - - The result tensors and data structure also batches input in prefill - -> decode order. For example, - - - input_tokens[:num_prefill_tokens] contains prefill tokens. - - input_tokens[num_prefill_tokens:] contains decode tokens. - - If cuda graph is required, this API automatically pads inputs. - """ - self.builder.prepare(finished_requests_ids) - for seq_group_metadata in seq_group_metadata_list: - try: - self.builder.add_seq_group(seq_group_metadata) - except Exception as e: - # Raise an exception that tracks the ID of the bad request - raise InputProcessingError(seq_group_metadata.request_id, - str(e)) from e - - self.builder.reset_cached_inter_data() - - return self.builder.build() # type: ignore - - @contextmanager - def set_in_profile_run(self): - self.in_profile_run = True - try: - yield - finally: - self.in_profile_run = False - - @torch.inference_mode() - def profile_run(self) -> None: - max_num_batched_tokens = \ - self.scheduler_config.max_num_batched_tokens - max_num_seqs = self.scheduler_config.max_num_seqs - self._dummy_run(max_num_batched_tokens, max_num_seqs) - - def _add_dummy_loras(self, num_loras: int) -> list[LoRARequest]: - assert num_loras > 0 - assert self.lora_manager is not None - - dummy_lora_requests: list[LoRARequest] = [] - with self.lora_manager.dummy_lora_cache(): - for idx in range(num_loras): - lora_id = idx + 1 - dummy_lora_request = LoRARequest( - lora_name=f"warmup_{lora_id}", - lora_int_id=lora_id, - lora_path="/not/a/real/path", - ) - self.lora_manager.add_dummy_lora(dummy_lora_request, - rank=LORA_WARMUP_RANK) - dummy_lora_requests.append(dummy_lora_request) - return dummy_lora_requests - - def _remove_dummy_loras(self): - # Remove dummy loras. - assert self.lora_manager is not None - self.remove_all_loras() - - def _dummy_run(self, - max_num_batched_tokens: int, - max_num_seqs: int = 1) -> None: - with self.set_in_profile_run(): - # Enable top-k sampling to reflect the accurate memory usage. - sampling_params = \ - SamplingParams(top_p=0.99, top_k=self.vocab_size - 1) - - # This represents the maximum number of different requests - # that will have unique loras, and therefore the max amount of - # memory consumption. Create dummy lora request copies from the - # lora request passed in, which contains a lora from the lora - # warmup path. - dummy_lora_requests: List[LoRARequest] = [] - dummy_lora_requests_per_seq: List[LoRARequest] = [] - if self.lora_config: - dummy_lora_requests = self._add_dummy_loras( - self.lora_config.max_loras) - assert len(dummy_lora_requests) == self.lora_config.max_loras - dummy_lora_requests_per_seq = [ - dummy_lora_requests[idx % len(dummy_lora_requests)] - for idx in range(max_num_seqs) - ] - - # Profile memory usage with max_num_sequences sequences and the - # total number of tokens equal to max_num_batched_tokens. - seqs: List[SequenceGroupMetadata] = [] - # Additional GPU memory may be needed for multi-modal encoding, - # which needs to be accounted for when calculating the GPU blocks - # for vLLM blocker manager. - # To exercise the worst scenario for GPU memory consumption, - # the number of seqs (batch_size) is chosen to maximize the number - # of images processed. - - max_mm_tokens = self.mm_registry.get_max_multimodal_tokens( - self.model_config) - if max_mm_tokens > 0: - max_num_seqs_orig = max_num_seqs - max_num_seqs = min(max_num_seqs, - max_num_batched_tokens // max_mm_tokens) - if max_num_seqs < 1: - expr = (f"min({max_num_seqs_orig}, " - f"{max_num_batched_tokens} // {max_mm_tokens})") - logger.warning( - "Computed max_num_seqs (%s) to be less than 1. " - "Setting it to the minimum value of 1.", expr) - max_num_seqs = 1 - - batch_size = 0 - for group_id in range(max_num_seqs): - seq_len = (max_num_batched_tokens // max_num_seqs + - (group_id < max_num_batched_tokens % max_num_seqs)) - batch_size += seq_len - - dummy_data = self.input_registry \ - .dummy_data_for_profiling(self.model_config, - seq_len, - self.mm_registry) - - seq = SequenceGroupMetadata( - request_id=str(group_id), - is_prompt=True, - seq_data={group_id: dummy_data.seq_data}, - sampling_params=sampling_params, - block_tables=None, - lora_request=dummy_lora_requests_per_seq[group_id] - if dummy_lora_requests_per_seq else None, - multi_modal_data=dummy_data.multi_modal_data, - multi_modal_placeholders=dummy_data. - multi_modal_placeholders, - ) - seqs.append(seq) - - # Run the model with the dummy inputs. - num_layers = self.model_config.get_num_layers(self.parallel_config) - # use an empty tensor instead of `None`` to force Dynamo to pass - # it by reference, rather by specializing on the value ``None``. - # the `dtype` argument does not matter, and we use `float32` as - # a placeholder (it has wide hardware support). - # it is important to create tensors inside the loop, rather than - # multiplying the list, to avoid Dynamo from treating them as - # tensor aliasing. - kv_caches = [ - torch.tensor([], dtype=torch.float32, device=self.device) - for _ in range(num_layers) - ] - finished_requests_ids = [seq.request_id for seq in seqs] - model_input = self.prepare_model_input( - seqs, finished_requests_ids=finished_requests_ids) - intermediate_tensors = None - if not get_pp_group().is_first_rank: - intermediate_tensors = \ - self.model.make_empty_intermediate_tensors( - batch_size=batch_size, - dtype=self.model_config.dtype, - device=self.device) - - # Disable KV Scale Calculation for dummy data during profile run - if model_input.attn_metadata is not None: - model_input.attn_metadata.enable_kv_scales_calculation = False - - self.execute_model(model_input, kv_caches, intermediate_tensors) - torch.cuda.synchronize() - if self.lora_config: - self._remove_dummy_loras() - - return - - def remove_all_loras(self): - if not self.lora_manager: - raise RuntimeError("LoRA is not enabled.") - self.lora_manager.remove_all_adapters() - - def set_active_loras(self, lora_requests: Set[LoRARequest], - lora_mapping: LoRAMapping) -> None: - if not self.lora_manager: - raise RuntimeError("LoRA is not enabled.") - self.lora_manager.set_active_adapters(lora_requests, lora_mapping) - - def add_lora(self, lora_request: LoRARequest) -> bool: - if not self.lora_manager: - raise RuntimeError("LoRA is not enabled.") - return self.lora_manager.add_adapter(lora_request) - - def remove_lora(self, lora_id: int) -> bool: - if not self.lora_manager: - raise RuntimeError("LoRA is not enabled.") - return self.lora_manager.remove_adapter(lora_id) - - def pin_lora(self, lora_id: int) -> bool: - if not self.lora_manager: - raise RuntimeError("LoRA is not enabled.") - return self.lora_manager.pin_adapter(lora_id) - - def list_loras(self) -> Set[int]: - if not self.lora_manager: - raise RuntimeError("LoRA is not enabled.") - return self.lora_manager.list_adapters() - - @torch.inference_mode() - def capture_model(self, kv_caches: List[List[torch.Tensor]]) -> int: - """Cuda graph capture a model and return cudagraph memory - consumption in bytes. - - Note that CUDA graph's performance gain is negligible if number - of batched tokens are larger than 200. And since CUDA graph - requires fixed sized tensors, supporting large/variable batch - size requires high GPU memory overhead. Thus, vLLM only captures - decoding requests. Mixed batch (chunked prefill + decoding) or - prefill requests are not captured. - - Since it is used for decoding-only, it assumes there's only 1 token - per sequence in the batch. - """ - assert not self.model_config.enforce_eager - logger.info("Capturing cudagraphs for decoding. This may lead to " - "unexpected consequences if the model is not static. To " - "run the model in eager mode, set 'enforce_eager=True' or " - "use '--enforce-eager' in the CLI. " - "If out-of-memory error occurs during cudagraph capture," - " consider decreasing `gpu_memory_utilization` or " - "switching to eager mode. You can also reduce the " - "`max_num_seqs` as needed to decrease memory usage.") - start_time = time.perf_counter() - start_free_gpu_memory = torch.cuda.mem_get_info()[0] - - # Prepare dummy inputs. These will be reused for all batch sizes. - max_batch_size = self.max_batchsize_to_capture - input_tokens = torch.zeros(max_batch_size, - dtype=torch.long, - device=self.device) - input_positions = torch.zeros(max_batch_size, - dtype=torch.long, - device=self.device) - inputs_embeds = torch.zeros( - (max_batch_size, self.model_config.get_hidden_size()), - dtype=self.model_config.dtype, - device=self.device) - if self.model_config.uses_mrope: - input_positions = torch.tile(input_positions, - (3, 1)).cuda(device=self.device) - # Prepare dummy previous_hidden_states only if needed by the model. - # This is used by draft models such as EAGLE. - previous_hidden_states = None - if "previous_hidden_states" in inspect.signature( - self.model.forward).parameters: - previous_hidden_states = torch.empty( - [max_batch_size, - self.model_config.get_hidden_size()], - dtype=self.model_config.dtype, - device=self.device) - - intermediate_inputs = None - if not get_pp_group().is_first_rank: - intermediate_inputs = self.model.make_empty_intermediate_tensors( - batch_size=max_batch_size, - dtype=self.model_config.dtype, - device=self.device) - - dummy_lora_id: Optional[int] = None - dummy_lora_request: LoRARequest = [] - if self.lora_config: - # The goal is to capture the LoRA kernels in cuda graphs. - # for this purpose, as single dummy lora is sufficient. - dummy_lora_requests = self._add_dummy_loras(num_loras=1) - assert len(dummy_lora_requests) == 1 - dummy_lora_request = dummy_lora_requests[0] - dummy_lora_id = dummy_lora_request.lora_int_id - - with self.attn_state.graph_capture(max_batch_size), graph_capture( - self.device) as graph_capture_context: - # NOTE: Capturing the largest batch size first may help reduce the - # memory usage of CUDA graph. - for virtual_engine in range( - self.parallel_config.pipeline_parallel_size): - # We need to not only iterate over batch sizes, but also whether - # to use inputs_embeds or not, hence we use the cartesian - # product. - cudagraph_capture_sizes = self.vllm_config.compilation_config\ - .cudagraph_capture_sizes - cudagraph_inputs_embeds = (( - True, False) if self.model_config.enable_prompt_embeds else - (False, )) - compilation_cases = itertools.product( - cudagraph_capture_sizes, - cudagraph_inputs_embeds, - ) - # Only rank 0 should print progress bar during capture - if get_tensor_model_parallel_rank() == 0: - compilation_cases = tqdm( - list(compilation_cases), - disable=not self.load_config.use_tqdm_on_load, - desc="Capturing CUDA graph shapes") - for batch_size, use_inputs_embeds in compilation_cases: - attn_metadata = ( - self.attn_state.graph_capture_get_metadata_for_batch( - batch_size, - is_encoder_decoder_model=self.model_config. - is_encoder_decoder)) - # Disable KV Scale Calculation for graph capture - attn_metadata.enable_kv_scales_calculation = False - if self.lora_config: - lora_mapping = LoRAMapping( - **dict(index_mapping=[dummy_lora_id] * batch_size, - prompt_mapping=[dummy_lora_id] * batch_size, - is_prefill=False)) - self.set_active_loras(set([dummy_lora_request]), - lora_mapping) - - graph_runner = CUDAGraphRunner( - self.model, self.attn_backend.get_name(), - self.attn_state.graph_clone(batch_size), - self.model_config.is_encoder_decoder) - - capture_inputs = { - "input_ids": - input_tokens[:batch_size], - "inputs_embeds": - inputs_embeds[:batch_size] - if use_inputs_embeds else None, - "positions": - input_positions[..., :batch_size], - "intermediate_inputs": - intermediate_inputs[:batch_size] - if intermediate_inputs is not None else None, - "kv_caches": - kv_caches[virtual_engine], - "attn_metadata": - attn_metadata, - "memory_pool": - self.graph_memory_pool, - "stream": - graph_capture_context.stream - } - if previous_hidden_states is not None: - capture_inputs[ - "previous_hidden_states"] = previous_hidden_states[: - batch_size] - - if self.has_inner_state: - # Only used by Mamba-based models CUDA graph atm (Jamba) - capture_inputs.update({ - "seqlen_agnostic_capture_inputs": - self.model.get_seqlen_agnostic_capture_inputs( - batch_size) - }) - if self.model_config.is_encoder_decoder: - # add the additional inputs to capture for - # encoder-decoder models. - self._update_inputs_to_capture_for_enc_dec_model( - capture_inputs) - - with set_forward_context(attn_metadata, self.vllm_config, - virtual_engine): - graph_runner.capture(**capture_inputs) - self.graph_memory_pool = graph_runner.graph.pool() - self.graph_runners[virtual_engine][( - batch_size, use_inputs_embeds)] = graph_runner - - if self.lora_config: - self._remove_dummy_loras() - - end_time = time.perf_counter() - end_free_gpu_memory = torch.cuda.mem_get_info()[0] - elapsed_time = end_time - start_time - cuda_graph_size = start_free_gpu_memory - end_free_gpu_memory - # This usually takes < 10 seconds. - logger.info("Graph capturing finished in %.0f secs, took %.2f GiB", - elapsed_time, cuda_graph_size / GiB_bytes) - return cuda_graph_size - - def _update_inputs_to_capture_for_enc_dec_model(self, - capture_inputs: Dict[str, - Any]): - """ - Updates the set of input tensors needed for CUDA graph capture in an - encoder-decoder model. - - This method modifies the provided `capture_inputs` dictionary by - adding tensors specific to encoder-decoder specific models that - need to be captured for CUDA Graph replay. - """ - # During the decode phase encoder_input_ids and encoder_positions are - # unset. Do the same thing for graph capture. - capture_inputs["encoder_input_ids"] = torch.tensor([], - dtype=torch.long, - device=self.device) - capture_inputs["encoder_positions"] = torch.tensor([], - dtype=torch.long, - device=self.device) - - @property - def vocab_size(self) -> int: - return self.model_config.get_vocab_size() - - -class ModelRunner(GPUModelRunnerBase[ModelInputForGPUWithSamplingMetadata]): - """ - GPU model runner with sampling step. - """ - _model_input_cls: Type[ModelInputForGPUWithSamplingMetadata] = ( - ModelInputForGPUWithSamplingMetadata) - _builder_cls: Type[ModelInputForGPUBuilder] = ModelInputForGPUBuilder - - def make_model_input_from_broadcasted_tensor_dict( - self, - tensor_dict: Dict[str, Any], - ) -> ModelInputForGPUWithSamplingMetadata: - model_input = \ - ModelInputForGPUWithSamplingMetadata.from_broadcasted_tensor_dict( - tensor_dict, - attn_backend=self.attn_backend, - ) - return model_input - - def prepare_model_input( - self, - seq_group_metadata_list: List[SequenceGroupMetadata], - virtual_engine: int = 0, - finished_requests_ids: Optional[List[str]] = None, - ) -> ModelInputForGPUWithSamplingMetadata: - """Prepare the model input based on a given sequence group, including - metadata for the sampling step. - - The API assumes seq_group_metadata_list is sorted by prefill -> decode. - - The result tensors and data structure also batches input in prefill - -> decode order. For example, - - - input_tokens[:num_prefill_tokens] contains prefill tokens. - - input_tokens[num_prefill_tokens:] contains decode tokens. - - If cuda graph is required, this API automatically pads inputs. - """ - model_input = self._prepare_model_input_tensors( - seq_group_metadata_list, finished_requests_ids) - if get_pp_group().is_last_rank: - # Sampling metadata is only required for the final pp group - generators = self.get_generators(finished_requests_ids) - sampling_metadata = SamplingMetadata.prepare( - seq_group_metadata_list, model_input.seq_lens, - model_input.query_lens, self.device, self.pin_memory, - generators, self.sampling_metadata_cache) - else: - sampling_metadata = None - is_prompt = (seq_group_metadata_list[0].is_prompt - if seq_group_metadata_list else None) - return dataclasses.replace(model_input, - sampling_metadata=sampling_metadata, - is_prompt=is_prompt, - virtual_engine=virtual_engine) - - @torch.inference_mode() - def execute_model( - self, - model_input: ModelInputForGPUWithSamplingMetadata, - kv_caches: List[torch.Tensor], - intermediate_tensors: Optional[IntermediateTensors] = None, - num_steps: int = 1, - **kwargs, - ) -> Optional[Union[List[SamplerOutput], IntermediateTensors]]: - if num_steps > 1: - raise ValueError("num_steps > 1 is not supported in ModelRunner") - - if self.lora_config: - assert model_input.lora_requests is not None - assert model_input.lora_mapping is not None - self.set_active_loras(model_input.lora_requests, - model_input.lora_mapping) - - self.attn_state.begin_forward(model_input) - - # Currently cuda graph is only supported by the decode phase. - assert model_input.attn_metadata is not None - prefill_meta = model_input.attn_metadata.prefill_metadata - decode_meta = model_input.attn_metadata.decode_metadata - # TODO(andoorve): We can remove this once all - # virtual engines share the same kv cache. - virtual_engine = model_input.virtual_engine - previous_hidden_states = kwargs.get("previous_hidden_states") - if prefill_meta is None and decode_meta.use_cuda_graph: - assert model_input.input_tokens is not None - graph_batch_size = model_input.input_tokens.shape[0] - use_inputs_embeds = model_input.inputs_embeds is not None - model_executable = self.graph_runners[virtual_engine][( - graph_batch_size, use_inputs_embeds)] - if previous_hidden_states is not None: - previous_hidden_states = torch.cat([ - previous_hidden_states, - torch.empty([ - graph_batch_size - previous_hidden_states.shape[0], - *previous_hidden_states.shape[1:] - ], - dtype=previous_hidden_states.dtype, - device=previous_hidden_states.device) - ]) - else: - model_executable = self.model - - # Receive KV cache in distributed KV cache transfer setting - # In disagg prefill setting, it will also recv hidden states and bypass - # model forwarding - # In KV cache database setting, it will change the model input so that - # we can skip prefilling on tokens that successfully received KV caches - # NOTE: The receive operation is blocking - bypass_model_exec = False - if self.need_recv_kv(model_input, kv_caches): - hidden_or_intermediate_states, bypass_model_exec, model_input = \ - get_kv_transfer_group().recv_kv_caches_and_hidden_states( - # model is used to know which layer the current worker - # is working on, so that we can receive KV for only those - # layers. - model_executable, - model_input, - kv_caches=kv_caches - ) - - multi_modal_kwargs = model_input.multi_modal_kwargs or {} - seqlen_agnostic_kwargs = { - "finished_requests_ids": model_input.finished_requests_ids, - "request_ids_to_seq_ids": model_input.request_ids_to_seq_ids, - } if self.has_inner_state else {} - model_kwargs = {} - if previous_hidden_states is not None: - model_kwargs["previous_hidden_states"] = previous_hidden_states - if (self.observability_config is not None - and self.observability_config.collect_model_forward_time): - model_forward_start = torch.cuda.Event(enable_timing=True) - model_forward_end = torch.cuda.Event(enable_timing=True) - model_forward_start.record() - - if not bypass_model_exec: - with set_forward_context(model_input.attn_metadata, - self.vllm_config, virtual_engine): - hidden_or_intermediate_states = model_executable( - input_ids=model_input.input_tokens, - inputs_embeds=model_input.inputs_embeds, - positions=model_input.input_positions, - intermediate_tensors=intermediate_tensors, - **MultiModalKwargs.as_kwargs( - multi_modal_kwargs, - device=self.device, - ), - **seqlen_agnostic_kwargs, - **model_kwargs, - ) - - if (self.observability_config is not None - and self.observability_config.collect_model_forward_time): - model_forward_end.record() - - # Sending KV cache in distributed KV cache transfer setting - # NOTE: the send operation is non-blocking - if self.need_send_kv(model_input, kv_caches): - get_kv_transfer_group().send_kv_caches_and_hidden_states( - # model_executable is used to know which layer the current - # worker is working on, so that we can send KV for only those - # layers. - model_executable, - model_input, - kv_caches, - hidden_or_intermediate_states, - ) - - # Compute the logits in the last pipeline stage. - if not get_pp_group().is_last_rank: - if (self.is_driver_worker - and hidden_or_intermediate_states is not None - and isinstance(hidden_or_intermediate_states, - IntermediateTensors) - and self.observability_config is not None - and self.observability_config.collect_model_forward_time): - model_forward_end.synchronize() - model_forward_time = model_forward_start.elapsed_time( - model_forward_end) - orig_model_forward_time = 0.0 - if intermediate_tensors is not None: - orig_model_forward_time = intermediate_tensors.tensors.get( - "model_forward_time", torch.tensor(0.0)).item() - hidden_or_intermediate_states.tensors["model_forward_time"] = ( - torch.tensor(model_forward_time + orig_model_forward_time)) - return hidden_or_intermediate_states - - logits = self.model.compute_logits(hidden_or_intermediate_states, - model_input.sampling_metadata) - - if self.is_driver_worker: - if model_input.async_callback is not None: - model_input.async_callback() - - # Sample the next token. - assert isinstance(self.sampler, Sampler) - orig_include_gpu_probs = self.sampler.include_gpu_probs_tensor - if model_input.inputs_embeds is not None: - self.sampler.include_gpu_probs_tensor = True - - output: SamplerOutput = self.sampler( - logits=logits, - sampling_metadata=model_input.sampling_metadata, - ) - if (self.observability_config is not None - and self.observability_config.collect_model_forward_time - and output is not None): - model_forward_end.synchronize() - model_forward_time = model_forward_start.elapsed_time( - model_forward_end) - orig_model_forward_time = 0.0 - if intermediate_tensors is not None: - orig_model_forward_time = intermediate_tensors.tensors.get( - "model_forward_time", torch.tensor(0.0)).item() - # If there are multiple workers, we are still tracking the - # latency from the start time of the driver worker to the end - # time of the driver worker. The model forward time will then - # end up covering the communication time as well. - output.model_forward_time = (orig_model_forward_time + - model_forward_time) - - if model_input.inputs_embeds is not None: - if self.is_driver_worker: - sampled_token_ids = [] - valid_outputs = [] - for sequence_group_output in output.outputs: - if len(sequence_group_output.samples) == 0: - continue - assert len(sequence_group_output.samples) == 1 - valid_outputs.append(sequence_group_output) - sampled_token_ids.append( - sequence_group_output.samples[0].output_token) - sampled_token_ids = torch.tensor(sampled_token_ids).to( - self.device) - sampled_token_ids = broadcast_tensor_dict( - {"sampled_token_ids": - sampled_token_ids})["sampled_token_ids"] - else: - sampled_token_ids = broadcast_tensor_dict( - )["sampled_token_ids"] - if len(sampled_token_ids) > 0: - sampled_token_embeds = \ - self.model.get_input_embeddings(sampled_token_ids) - if self.is_driver_worker: - self.sampler.include_gpu_probs_tensor = \ - orig_include_gpu_probs - for i, sequence_group_output in enumerate(valid_outputs): - sequence_group_output.samples[0].output_embed = \ - sampled_token_embeds[i] - - if not self.is_driver_worker: - return [] - - if self.return_hidden_states: - # we only need to pass hidden states of most recent token - assert model_input.sampling_metadata is not None - indices = model_input.sampling_metadata.selected_token_indices - if model_input.is_prompt: - hidden_states = hidden_or_intermediate_states.index_select( - 0, indices) - output.prefill_hidden_states = hidden_or_intermediate_states - elif decode_meta.use_cuda_graph: - hidden_states = hidden_or_intermediate_states[:len(indices)] - else: - hidden_states = hidden_or_intermediate_states - - output.hidden_states = hidden_states - - return [output] - - def need_recv_kv(self, model_input: ModelInputForGPUWithSamplingMetadata, - kv_caches: List[torch.Tensor]) -> bool: - """Check if we need to receive kv-cache from the other worker. - We need to receive KV when - 1. current vLLM instance is KV cache consumer/decode vLLM instance - 2. this batch is not a profiling run - 3. this batch is a prefill run - - Args: - model_input: input to the model executable - kv_caches: vLLM's paged memory - """ - - if self.vllm_config.kv_transfer_config is None: - return False - - if model_input.attn_metadata is None: - raise ValueError("model_input.attn_metadata cannot be None") - - prefill_meta = model_input.attn_metadata.prefill_metadata - - # check if the current run is profiling - is_profile_run = (kv_caches[0].numel() == 0) - # check if the current run is prefill - is_prefill_run = prefill_meta is not None - - return self.vllm_config.kv_transfer_config.is_kv_consumer and ( - not is_profile_run) and is_prefill_run - - def need_send_kv(self, model_input: ModelInputForGPUWithSamplingMetadata, - kv_caches: List[torch.Tensor]) -> bool: - """Check if we need to send kv-cache to the other worker. - We need to send KV when - 1. current vLLM instance is KV cache producer/prefill vLLM instance - 2. this batch is not a profiling run - 3. this batch is a prefill run - - Args: - model_input: input to the model executable - kv_caches: vLLM's paged memory - """ - - if self.vllm_config.kv_transfer_config is None: - return False - - if model_input.attn_metadata is None: - raise ValueError("model_input.attn_metadata cannot be None") - - prefill_meta = model_input.attn_metadata.prefill_metadata - - # check if the current run is profiling - is_profile_run = (kv_caches[0].numel() == 0) - # check if the current run is prefill - is_prefill_run = prefill_meta is not None - - return self.vllm_config.kv_transfer_config.is_kv_producer and ( - not is_profile_run) and is_prefill_run - - -# NOTE: this is nn.Module so the profiler can properly capture/group -# kernels calls made within the graph -class CUDAGraphRunner(nn.Module): - - def __init__(self, model: nn.Module, backend_name: str, - attn_state: AttentionState, is_encoder_decoder_model: bool): - super().__init__() - self.model = model - self.backend_name = backend_name - self.attn_state = attn_state - - self.input_buffers: Dict[str, torch.Tensor] = {} - self.output_buffers: Dict[str, torch.Tensor] = {} - - self._graph: Optional[torch.cuda.CUDAGraph] = None - self._is_encoder_decoder_model = is_encoder_decoder_model - - @property - def graph(self): - assert self._graph is not None - return self._graph - - def capture( - self, - input_ids: torch.Tensor, - inputs_embeds: Optional[torch.Tensor], - positions: torch.Tensor, - intermediate_inputs: Optional[IntermediateTensors], - kv_caches: List[torch.Tensor], - attn_metadata: AttentionMetadata, - memory_pool: Optional[Tuple[int, int]], - stream: torch.cuda.Stream, - **kwargs, - ): - assert self._graph is None - # Run the model a few times without capturing the graph. - # This is to make sure that the captured graph does not include the - # kernel launches for initial benchmarking (e.g., Triton autotune). - # Note one iteration is not enough for torch.compile - for _ in range(_NUM_WARMUP_ITERS): - self.model( - input_ids=input_ids, - inputs_embeds=inputs_embeds, - positions=positions, - intermediate_tensors=intermediate_inputs, - **kwargs, - ) - # Wait for the warm up operations to finish before proceeding with - # Graph Capture. - torch.cuda.synchronize() - # Capture the graph. - self._graph = torch.cuda.CUDAGraph() - with torch.cuda.graph(self._graph, pool=memory_pool, stream=stream): - output_hidden_or_intermediate_states = self.model( - input_ids=input_ids, - **({ - "inputs_embeds": inputs_embeds, - } if inputs_embeds is not None else {}), - positions=positions, - intermediate_tensors=intermediate_inputs, - **kwargs, - ) - - if isinstance(output_hidden_or_intermediate_states, torch.Tensor): - hidden_or_intermediate_states = weak_ref_tensor( - output_hidden_or_intermediate_states) - elif isinstance(output_hidden_or_intermediate_states, - IntermediateTensors): - hidden_or_intermediate_states = IntermediateTensors( - tensors={ - key: weak_ref_tensor(value) - for key, value in - output_hidden_or_intermediate_states.tensors.items() - }) - - del output_hidden_or_intermediate_states - # make sure `output_hidden_or_intermediate_states` is deleted - # in the graph's memory pool - gc.collect() - torch.cuda.synchronize() - - # Save the input and output buffers. - self.input_buffers = { - "input_ids": - input_ids, - **({ - "inputs_embeds": inputs_embeds, - } if inputs_embeds is not None else {}), - "positions": - positions, - "kv_caches": - kv_caches, - **self.attn_state.get_graph_input_buffers( - attn_metadata, self._is_encoder_decoder_model), - **kwargs, - } - if intermediate_inputs is not None: - self.input_buffers.update(intermediate_inputs.tensors) - if get_pp_group().is_last_rank: - self.output_buffers = { - "hidden_states": hidden_or_intermediate_states - } - else: - self.output_buffers = hidden_or_intermediate_states - - def forward( - self, - input_ids: torch.Tensor, - inputs_embeds: Optional[torch.Tensor], - positions: torch.Tensor, - intermediate_tensors: Optional[IntermediateTensors], - **kwargs, - ) -> torch.Tensor: - attn_metadata: AttentionMetadata = get_forward_context().attn_metadata - - # Copy the input tensors to the input buffers. - self.input_buffers["input_ids"].copy_(input_ids, non_blocking=True) - if positions is not None: - # in some case like MLA, it will reuse positions in metadata - # but truncate them to the original size - # so the shape is not padded, we need to copy partial only - self.input_buffers["positions"][:positions.shape[0]].copy_( - positions, non_blocking=True) - if inputs_embeds is not None: - self.input_buffers["inputs_embeds"][:inputs_embeds.shape[0]].copy_( - inputs_embeds, non_blocking=True) - - if self.backend_name != "NO_ATTENTION": - self.input_buffers["slot_mapping"].copy_( - attn_metadata.slot_mapping, non_blocking=True) - - self.attn_state.prepare_graph_input_buffers( - self.input_buffers, attn_metadata, self._is_encoder_decoder_model) - - if "seqlen_agnostic_capture_inputs" in self.input_buffers: - self.model.copy_inputs_before_cuda_graphs(self.input_buffers, - **kwargs) - - if "previous_hidden_states" in self.input_buffers: - self.input_buffers["previous_hidden_states"].copy_( - kwargs["previous_hidden_states"], non_blocking=True) - - if intermediate_tensors is not None: - for key in intermediate_tensors.tensors: - if key != "model_execute_time" and key != "model_forward_time": - self.input_buffers[key].copy_(intermediate_tensors[key], - non_blocking=True) - if self._is_encoder_decoder_model: - self.input_buffers["encoder_input_ids"].copy_( - kwargs['encoder_input_ids'], non_blocking=True) - self.input_buffers["encoder_positions"].copy_( - kwargs['encoder_positions'], non_blocking=True) - - # Run the graph. - self.graph.replay() - # Return the output tensor. - if get_pp_group().is_last_rank: - return self.output_buffers["hidden_states"] - - return self.output_buffers diff --git a/vllm/worker/worker.py b/vllm/worker/worker.py deleted file mode 100644 index 12047bc39073..000000000000 --- a/vllm/worker/worker.py +++ /dev/null @@ -1,666 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project -"""A GPU worker class.""" -import gc -import os -from contextlib import nullcontext -from typing import Dict, List, Optional, Set, Tuple, Type, Union - -import torch -import torch.distributed - -import vllm.envs as envs -from vllm.attention.layer import Attention -from vllm.config import VllmConfig, get_layers_from_vllm_config -from vllm.device_allocator.cumem import CuMemAllocator -from vllm.distributed import (ensure_model_parallel_initialized, - init_distributed_environment, - set_custom_all_reduce) -from vllm.distributed.kv_transfer import ensure_kv_transfer_initialized -from vllm.logger import init_logger -from vllm.lora.request import LoRARequest -from vllm.model_executor import set_random_seed -from vllm.model_executor.layers.sampler import SamplerOutput -from vllm.model_executor.model_loader.tensorizer import TensorizerConfig -from vllm.platforms import current_platform -from vllm.sequence import (ExecuteModelRequest, IntermediateTensors, - SequenceGroupMetadata, SequenceGroupMetadataDelta) -from vllm.utils import (GiB_bytes, MemorySnapshot, bind_kv_cache, - memory_profiling) -from vllm.worker.cache_engine import CacheEngine -from vllm.worker.model_runner import GPUModelRunnerBase, ModelRunner -from vllm.worker.worker_base import (LocalOrDistributedWorkerBase, WorkerBase, - WorkerInput) - -logger = init_logger(__name__) - - -class Worker(LocalOrDistributedWorkerBase): - """A worker class that executes (a partition of) the model on a GPU. - - Each worker is associated with a single GPU. The worker is responsible for - maintaining the KV cache and executing the model on the GPU. In case of - distributed inference, each worker is assigned a partition of the model. - """ - - def __init__( - self, - vllm_config: VllmConfig, - local_rank: int, - rank: int, - distributed_init_method: str, - is_driver_worker: bool = False, - model_runner_cls: Optional[Type[GPUModelRunnerBase]] = None, - ) -> None: - WorkerBase.__init__(self, vllm_config) - self.parallel_config.rank = rank - self.local_rank = local_rank - self.rank = rank - self.distributed_init_method = distributed_init_method - self.is_driver_worker = is_driver_worker - if self.model_config.trust_remote_code: - # note: lazy import to avoid importing torch before initializing - from vllm.utils import init_cached_hf_modules - init_cached_hf_modules() - - # Return hidden states from target model if the draft model is an - # mlp_speculator - speculative_config = self.speculative_config - model_config = self.model_config - speculative_args = {} if speculative_config is None \ - or (speculative_config.draft_model_config.hf_config.model_type == - model_config.hf_config.model_type) \ - or (speculative_config.draft_model_config.hf_config.model_type - not in ("medusa", - "mlp_speculator", - "eagle", - "deepseek_mtp", - "glm4_moe_mtp", - "mimo_mtp", - "ernie_mtp", - "qwen3_next_mtp")) \ - else {"return_hidden_states": True} - - self.model_runner: GPUModelRunnerBase = ModelRunner( - vllm_config=self.vllm_config, - kv_cache_dtype=self.cache_config.cache_dtype, - is_driver_worker=is_driver_worker, - **speculative_args, - ) - if model_runner_cls is not None: - self.model_runner = model_runner_cls(self.model_runner) - - # Uninitialized cache engine. Will be initialized by - # initialize_cache. - self.cache_engine: List[CacheEngine] - self.gpu_cache: Optional[List[List[torch.Tensor]]] = None - self._seq_group_metadata_cache: Dict[str, SequenceGroupMetadata] = {} - - # Buffers saved before sleep - self._sleep_saved_buffers: Dict[str, torch.Tensor] = {} - - # Torch profiler. Enabled and configured through env vars: - # VLLM_TORCH_PROFILER_DIR=/path/to/save/trace - if envs.VLLM_TORCH_PROFILER_DIR: - torch_profiler_trace_dir = envs.VLLM_TORCH_PROFILER_DIR - logger.info("Profiling enabled. Traces will be saved to: %s", - torch_profiler_trace_dir) - self.profiler = torch.profiler.profile( - activities=[ - torch.profiler.ProfilerActivity.CPU, - torch.profiler.ProfilerActivity.CUDA, - ], - with_stack=True, - on_trace_ready=torch.profiler.tensorboard_trace_handler( - torch_profiler_trace_dir, use_gzip=True)) - else: - self.profiler = None - - def start_profile(self): - if self.profiler is None: - raise RuntimeError("Profiler is not enabled.") - self.profiler.start() - - def stop_profile(self): - if self.profiler is None: - raise RuntimeError("Profiler is not enabled.") - self.profiler.stop() - # only print profiler results on rank 0 - if self.local_rank == 0: - print(self.profiler.key_averages().table( - sort_by="self_cuda_time_total")) - - def sleep(self, level: int = 1) -> None: - free_bytes_before_sleep = torch.cuda.mem_get_info()[0] - - # Save the buffers before level 2 sleep - if level == 2: - model = self.model_runner.model - self._sleep_saved_buffers = { - name: buffer.cpu().clone() - for name, buffer in model.named_buffers() - } - - allocator = CuMemAllocator.get_instance() - allocator.sleep(offload_tags=("weights", ) if level == 1 else tuple()) - free_bytes_after_sleep, total = torch.cuda.mem_get_info() - freed_bytes = free_bytes_after_sleep - free_bytes_before_sleep - used_bytes = total - free_bytes_after_sleep - assert freed_bytes >= 0, "Memory usage increased after sleeping." - logger.info( - "Sleep mode freed %.2f GiB memory, " - "%.2f GiB memory is still in use.", freed_bytes / GiB_bytes, - used_bytes / GiB_bytes) - - def wake_up(self, tags: Optional[list[str]] = None) -> None: - allocator = CuMemAllocator.get_instance() - allocator.wake_up(tags=tags) - - # Restore the buffers after level 2 sleep - if len(self._sleep_saved_buffers): - model = self.model_runner.model - for name, buffer in model.named_buffers(): - if name in self._sleep_saved_buffers: - buffer.data.copy_(self._sleep_saved_buffers[name].data) - self._sleep_saved_buffers = {} - - def init_device(self) -> None: - if self.device_config.device.type == "cuda": - # torch.distributed.all_reduce does not free the input tensor until - # the synchronization point. This causes the memory usage to grow - # as the number of all_reduce calls increases. This env var disables - # this behavior. - # Related issue: - # https://discuss.pytorch.org/t/cuda-allocation-lifetime-for-inputs-to-distributed-all-reduce/191573 - os.environ["TORCH_NCCL_AVOID_RECORD_STREAMS"] = "1" - - # This env var set by Ray causes exceptions with graph building. - os.environ.pop("NCCL_ASYNC_ERROR_HANDLING", None) - self.device = torch.device(f"cuda:{self.local_rank}") - torch.cuda.set_device(self.device) - - _check_if_gpu_supports_dtype(self.model_config.dtype) - gc.collect() - torch.cuda.empty_cache() - torch.cuda.reset_peak_memory_stats() - self.baseline_snapshot = MemorySnapshot() - else: - raise RuntimeError( - f"Not support device type: {self.device_config.device}") - # Initialize the distributed environment. - init_worker_distributed_environment(self.vllm_config, self.rank, - self.distributed_init_method, - self.local_rank) - # Set random seed. - set_random_seed(self.model_config.seed) - - def load_model(self): - if self.vllm_config.model_config.enable_sleep_mode: - allocator = CuMemAllocator.get_instance() - assert allocator.get_current_usage() == 0, ( - "Sleep mode can only be " - "used for one instance per process.") - context = allocator.use_memory_pool(tag="weights") - else: - context = nullcontext() - with context: - self.model_runner.load_model() - - def save_sharded_state( - self, - path: str, - pattern: Optional[str] = None, - max_size: Optional[int] = None, - ) -> None: - self.model_runner.save_sharded_state( - path, - pattern=pattern, - max_size=max_size, - ) - - def save_tensorized_model( - self, - tensorizer_config: TensorizerConfig, - ) -> None: - self.model_runner.save_tensorized_model( - tensorizer_config=tensorizer_config, ) - - @torch.inference_mode() - def determine_available_kv_cache_memory(self, - total_gpu_memory: int) -> float: - if kv_cache_memory_bytes := self.cache_config.kv_cache_memory_bytes: - # still need a profile run which compiles the model for - # max_num_batched_tokens - self.model_runner.profile_run() - - GiB = lambda b: b / GiB_bytes - msg = ( - f"Initial free memory " - f"{GiB(self.baseline_snapshot.free_memory):.2f} " - f"GiB, reserved {GiB(kv_cache_memory_bytes):.2f}GiB memory for " - "KV Cache as specified by kv_cache_memory_bytes config and " - "skipped memory profiling. This does does not respect the " - "gpu_memory_utilization config. Only use kv_cache_memory_bytes " - "config when you want manual control of KV cache memory " - "size. If OOM'ed, check the difference of initial free " - "memory between the current run and the previous run " - "where kv_cache_memory_bytes is suggested and update it " - "correspondingly.") - logger.info(msg) - return self.cache_config.kv_cache_memory_bytes - - # Execute a forward pass with dummy inputs to profile the memory usage - # of the model. - with memory_profiling( - self.baseline_snapshot, - weights_memory=self.model_runner.model_memory_usage) as result: - self.model_runner.profile_run() - - self.non_torch_memory = result.non_torch_increase - self.peak_activation_memory = result.torch_peak_increase - - self._assert_memory_footprint_increased_during_profiling() - - self.requested_memory = total_gpu_memory * \ - self.cache_config.gpu_memory_utilization - - self.available_kv_cache_memory = (self.requested_memory - - result.non_kv_cache_memory) - - msg = (f"Memory profiling takes {result.profile_time:.2f} seconds\n" - "the current vLLM instance can use " - "total_gpu_memory " - f"({(total_gpu_memory / GiB_bytes):.2f}GiB)" - " x gpu_memory_utilization " - f"({self.cache_config.gpu_memory_utilization:.2f})" - f" = {(self.requested_memory / GiB_bytes):.2f}GiB\n" - "model weights take " - f"{(result.weights_memory / GiB_bytes):.2f}GiB;" - " non_torch_memory takes " - f"{(result.non_torch_increase / GiB_bytes):.2f}GiB;" - " PyTorch activation peak memory takes " - f"{(result.torch_peak_increase / GiB_bytes):.2f}GiB;" - " the rest of the memory reserved for KV Cache is " - f"{(self.available_kv_cache_memory / GiB_bytes):.2f}GiB.") - - logger.info(msg) - return self.available_kv_cache_memory - - @torch.inference_mode() - def determine_num_available_blocks(self) -> Tuple[int, int]: - """Profiles the peak memory usage of the model to determine how many - KV blocks may be allocated without OOMs. - - The engine will first conduct a profiling of the existing memory usage. - Then, it calculates the maximum possible number of GPU and CPU blocks - that can be allocated with the remaining free memory. - - Tip: - You may limit the usage of GPU memory - by adjusting the `gpu_memory_utilization` parameter. - """ - # Profile the memory usage of the model and get the maximum number of - # cache blocks that can be allocated with the remaining free memory. - torch.cuda.empty_cache() - torch.cuda.reset_peak_memory_stats() - - free_memory_pre_profile, total_gpu_memory = torch.cuda.mem_get_info() - available_kv_cache_memory = self.determine_available_kv_cache_memory( - total_gpu_memory) - - # Calculate the number of blocks that can be allocated with the - # profiled peak memory. - cache_block_size = self.get_cache_block_size_bytes() - if cache_block_size == 0: - num_gpu_blocks = 0 - num_cpu_blocks = 0 - else: - num_gpu_blocks = int(available_kv_cache_memory // cache_block_size) - num_cpu_blocks = int(self.cache_config.swap_space_bytes // - cache_block_size) - num_gpu_blocks = max(num_gpu_blocks, 0) - num_cpu_blocks = max(num_cpu_blocks, 0) - - # Final cleanup - gc.collect() - - return num_gpu_blocks, num_cpu_blocks - - def _assert_memory_footprint_increased_during_profiling(self): - # NOTE(woosuk): Here we assume that the other processes using the same - # GPU did not change their memory usage during the profiling. - free_gpu_memory, total = torch.cuda.mem_get_info() - cuda_memory = total - free_gpu_memory - assert self.baseline_snapshot.cuda_memory < cuda_memory, ( - "Error in memory profiling. " - f"Initial used memory {self.baseline_snapshot.cuda_memory}, " - f"currently used memory {cuda_memory}. " - f"This happens when the GPU memory was " - "not properly cleaned up before initializing the vLLM instance.") - - def initialize_cache(self, num_gpu_blocks: int, - num_cpu_blocks: int) -> None: - """Allocate GPU and CPU KV cache with the specified number of blocks. - - This also warms up the model, which may record CUDA graphs. - """ - raise_if_cache_size_invalid( - num_gpu_blocks, self.cache_config.block_size, - self.cache_config.is_attention_free, - self.model_config.max_model_len, - self.parallel_config.pipeline_parallel_size) - - self.cache_config.num_gpu_blocks = num_gpu_blocks - self.cache_config.num_cpu_blocks = num_cpu_blocks - - if self.vllm_config.model_config.enable_sleep_mode: - allocator = CuMemAllocator.get_instance() - context = allocator.use_memory_pool(tag="kv_cache") - else: - context = nullcontext() - with context: - self._init_cache_engine() - self._warm_up_model() - - def _init_cache_engine(self): - assert self.cache_config.num_gpu_blocks is not None - self.cache_engine = [ - CacheEngine(self.cache_config, self.model_config, - self.parallel_config, self.device_config) - for _ in range(self.parallel_config.pipeline_parallel_size) - ] - self.gpu_cache = [ - self.cache_engine[ve].gpu_cache - for ve in range(self.parallel_config.pipeline_parallel_size) - ] - - # Layer pairings for cross-layer KV sharing. - # If an Attention layer `layer_name` is in the keys of this dict, it - # means this layer will perform attention using the keys and values - # from the KV cache of `shared_kv_cache_layers[layer_name]`. - shared_kv_cache_layers: dict[str, str] = {} - - attn_layers = get_layers_from_vllm_config(self.vllm_config, Attention) - - for layer_name, attn_module in attn_layers.items(): - if (kv_tgt_layer := - attn_module.kv_sharing_target_layer_name) is not None: - # The layer doesn't need its own KV cache and will use that of - # the target layer. We skip creating a KVCacheSpec for it, so - # that KV cache management logic will act as this layer does - # not exist, and doesn't allocate KV cache for the layer. This - # enables the memory saving of cross-layer kv sharing, allowing - # a given amount of memory to accommodate longer context lengths - # or enable more requests to be processed simultaneously. - shared_kv_cache_layers[layer_name] = kv_tgt_layer - - bind_kv_cache(self.compilation_config.static_forward_context, - self.gpu_cache, shared_kv_cache_layers) - - def _warm_up_model(self) -> None: - # warm up sizes that are not in cudagraph capture sizes, - # but users still want to compile for better performance, - # e.g. for the max-num-batched token size in chunked prefill. - warmup_sizes = self.vllm_config.compilation_config.compile_sizes.copy() - if not self.model_config.enforce_eager: - warmup_sizes = [ - x for x in warmup_sizes if x not in - self.vllm_config.compilation_config.cudagraph_capture_sizes - ] - for size in sorted(warmup_sizes, reverse=True): - logger.info("Compile and warming up model for size %d", size) - self.model_runner._dummy_run(size) - - cuda_graph_memory_bytes = 0 - if not self.model_config.enforce_eager: - cuda_graph_memory_bytes = self.model_runner.capture_model( - self.gpu_cache) - - if (self.cache_config.kv_cache_memory_bytes is None - and hasattr(self, "peak_activation_memory")): - # Suggests optimal kv cache memory size if we rely on - # memory_profiling to guess the kv cache memory size which - # provides peak_activation_memory and a few other memory - # consumption. `memory_profiling` does not consider - # CUDAGraph memory size and may not utilize all gpu memory. - # Users may want fine-grained control to specify kv cache - # memory size. - GiB = lambda b: round(b / GiB_bytes, 2) - non_kv_cache_memory = (self.model_runner.model_memory_usage + - self.peak_activation_memory + - self.non_torch_memory + - cuda_graph_memory_bytes) - - # empirically observed that the memory profiling may - # slightly underestimate the memory consumption. - # So leave a small buffer (=150MiB) to avoid OOM. - redundancy_buffer_memory = 150 * (1 << 20) - kv_cache_memory_bytes_to_gpu_limit = ( - self.baseline_snapshot.free_memory - non_kv_cache_memory - - redundancy_buffer_memory) - kv_cache_memory_bytes_to_requested_limit = ( - int(self.requested_memory) - non_kv_cache_memory - - redundancy_buffer_memory) - - msg = ( - f"Free memory on device " - f"({GiB(self.baseline_snapshot.free_memory)}/" - f"{GiB(self.baseline_snapshot.total_memory)} GiB) on startup. " - f"Desired GPU memory utilization is " - f"({self.cache_config.gpu_memory_utilization}, " - f"{GiB(self.requested_memory)} GiB). " - f"Actual usage is {GiB(self.model_runner.model_memory_usage)} " - f"GiB for weight, {GiB(self.peak_activation_memory)} GiB " - f"for peak activation, {GiB(self.non_torch_memory)} GiB " - f"for non-torch memory, and {GiB(cuda_graph_memory_bytes)} " - f"GiB for CUDAGraph memory. Replace gpu_memory_utilization " - f"config with `--kv-cache-memory=" - f"{kv_cache_memory_bytes_to_requested_limit}` to fit into " - f"requested memory, or `--kv-cache-memory=" - f"{kv_cache_memory_bytes_to_gpu_limit}` to fully " - f"utilize gpu memory. Current kv cache memory in use is " - f"{int(self.available_kv_cache_memory)} bytes.") - logger.info(msg) - - # Reset the seed to ensure that the random state is not affected by - # the model initialization and profiling. - set_random_seed(self.model_config.seed) - - @property - def do_metadata_broadcast(self) -> bool: - return self.parallel_config.tensor_parallel_size > 1 - - @property - def kv_cache(self) -> Optional[List[List[torch.Tensor]]]: - return self.gpu_cache - - @torch.inference_mode() - def prepare_worker_input( - self, execute_model_req: ExecuteModelRequest) -> WorkerInput: - virtual_engine = execute_model_req.virtual_engine - num_steps = execute_model_req.num_steps - num_seq_groups = len(execute_model_req.seq_group_metadata_list) - # `blocks_to_swap_in` and `blocks_to_swap_out` are cpu tensors. - # they contain parameters to launch cudamemcpyasync. - blocks_to_swap_in = torch.tensor(execute_model_req.blocks_to_swap_in, - device="cpu", - dtype=torch.int64).view(-1, 2) - blocks_to_swap_out = torch.tensor(execute_model_req.blocks_to_swap_out, - device="cpu", - dtype=torch.int64).view(-1, 2) - # `blocks_to_copy` is a gpu tensor. The src and tgt of - # blocks to copy are in the same device, and `blocks_to_copy` - # can be used directly within cuda kernels. - blocks_to_copy = torch.tensor(execute_model_req.blocks_to_copy, - device=self.device, - dtype=torch.int64).view(-1, 2) - - return WorkerInput( - num_seq_groups=num_seq_groups, - blocks_to_swap_in=blocks_to_swap_in, - blocks_to_swap_out=blocks_to_swap_out, - blocks_to_copy=blocks_to_copy, - virtual_engine=virtual_engine, - num_steps=num_steps, - ) - - @torch.inference_mode() - def execute_worker(self, worker_input: WorkerInput) -> None: - virtual_engine = worker_input.virtual_engine - # Issue cache operations. - if (worker_input.blocks_to_swap_in is not None - and worker_input.blocks_to_swap_in.numel() > 0): - self.cache_engine[virtual_engine].swap_in( - worker_input.blocks_to_swap_in) - if (worker_input.blocks_to_swap_out is not None - and worker_input.blocks_to_swap_out.numel() > 0): - self.cache_engine[virtual_engine].swap_out( - worker_input.blocks_to_swap_out) - if (worker_input.blocks_to_copy is not None - and worker_input.blocks_to_copy.numel() > 0): - self.cache_engine[virtual_engine].copy(worker_input.blocks_to_copy) - - def _get_cached_seq_group_metadata( - self, - seq_group_metadata_list: List[Union[SequenceGroupMetadata, - SequenceGroupMetadataDelta]], - finished_request_ids: List[str]) -> List[SequenceGroupMetadata]: - """Return a list of cached Sequence Group Metadata after updating its - state. - - It is used because scheduler only sends delta to workers to reduce - the data payload size. The function also cleans up cache based on - a given `finished_request_ids`. - """ - new_seq_group_metadata_list = [] - for metadata_or_delta in seq_group_metadata_list: - request_id = metadata_or_delta.request_id - if request_id not in self._seq_group_metadata_cache: - # The first prefill. - assert isinstance(metadata_or_delta, SequenceGroupMetadata) - self._seq_group_metadata_cache[request_id] = metadata_or_delta - else: - # The first prefill is already cached. - if isinstance(metadata_or_delta, SequenceGroupMetadataDelta): - self._seq_group_metadata_cache[request_id].apply_delta( - metadata_or_delta) - else: - # If metadata snapshot is sent again, it is - # preempted. Reset the cache because we need to start - # from scratch. - assert isinstance(metadata_or_delta, SequenceGroupMetadata) - self._seq_group_metadata_cache[ - request_id] = metadata_or_delta - - new_seq_group_metadata_list.append( - self._seq_group_metadata_cache[request_id]) - - # Clean up finished ids - for finished_id in finished_request_ids: - del self._seq_group_metadata_cache[finished_id] - - return new_seq_group_metadata_list - - def _execute_model_spmd( - self, - execute_model_req: ExecuteModelRequest, - intermediate_tensors: Optional[IntermediateTensors] = None, - ) -> Optional[List[SamplerOutput]]: - if execute_model_req is not None: - new_seq_group_metadata_list = self._get_cached_seq_group_metadata( - execute_model_req.seq_group_metadata_list, - execute_model_req.finished_requests_ids) - - execute_model_req.seq_group_metadata_list = ( - new_seq_group_metadata_list) - output = super()._execute_model_spmd(execute_model_req, - intermediate_tensors) - return output - - def add_lora(self, lora_request: LoRARequest) -> bool: - return self.model_runner.add_lora(lora_request) - - def remove_lora(self, lora_id: int) -> bool: - return self.model_runner.remove_lora(lora_id) - - def pin_lora(self, lora_id: int) -> bool: - return self.model_runner.pin_lora(lora_id) - - def list_loras(self) -> Set[int]: - return self.model_runner.list_loras() - - @property - def max_model_len(self) -> int: - return self.model_config.max_model_len - - @property - def vocab_size(self) -> int: - return self.model_runner.vocab_size - - def get_cache_block_size_bytes(self) -> int: - """Get the size of the KV cache block size in bytes. - """ - return CacheEngine.get_cache_block_size(self.cache_config, - self.model_config, - self.parallel_config) - - -def init_worker_distributed_environment( - vllm_config: VllmConfig, - rank: int, - distributed_init_method: Optional[str] = None, - local_rank: int = -1, -) -> None: - """Initialize the distributed environment.""" - parallel_config = vllm_config.parallel_config - set_custom_all_reduce(not parallel_config.disable_custom_all_reduce) - - init_distributed_environment(parallel_config.world_size, rank, - distributed_init_method, local_rank, - current_platform.dist_backend) - ensure_model_parallel_initialized( - parallel_config.tensor_parallel_size, - parallel_config.pipeline_parallel_size, - parallel_config.decode_context_parallel_size) - - ensure_kv_transfer_initialized(vllm_config) - - -def _check_if_gpu_supports_dtype(torch_dtype: torch.dtype): - # Check if the GPU supports the dtype. - if torch_dtype == torch.bfloat16: # noqa: SIM102 - if not current_platform.has_device_capability(80): - capability = current_platform.get_device_capability() - gpu_name = current_platform.get_device_name() - - if capability is None: - compute_str = "does not have a compute capability" - else: - version_str = capability.as_version_str() - compute_str = f"has compute capability {version_str}" - - raise ValueError( - "Bfloat16 is only supported on GPUs with compute capability " - f"of at least 8.0. Your {gpu_name} GPU {compute_str}. " - "You can use float16 instead by explicitly setting the " - "`dtype` flag in CLI, for example: --dtype=half.") - - -def raise_if_cache_size_invalid(num_gpu_blocks, block_size, is_attention_free, - max_model_len, pipeline_parallel_size) -> None: - if is_attention_free and num_gpu_blocks != 0: - raise ValueError("No memory should be allocated for the cache blocks " - f"for an attention-free model, but {num_gpu_blocks} " - "blocks are allocated.") - if not is_attention_free and num_gpu_blocks <= 0: - raise ValueError("No available memory for the cache blocks. " - "Try increasing `gpu_memory_utilization` when " - "initializing the engine.") - max_seq_len = block_size * (num_gpu_blocks // pipeline_parallel_size) - if not is_attention_free and max_model_len > max_seq_len: - raise ValueError( - f"The model's max seq len ({max_model_len}) " - "is larger than the maximum number of tokens that can be " - f"stored in KV cache ({max_seq_len}). Try increasing " - "`gpu_memory_utilization` or decreasing `max_model_len` when " - "initializing the engine.") From 62b38dc8322f9f31fff7c680028b0b88306e1c39 Mon Sep 17 00:00:00 2001 From: Huamin Li <3ericli@gmail.com> Date: Sat, 20 Sep 2025 20:29:12 -0700 Subject: [PATCH 201/518] [Doc] improve test-pipeline.yaml documentation (#25305) Signed-off-by: Huamin Li <3ericli@gmail.com> Co-authored-by: Lu Fang <30275821+houseroad@users.noreply.github.com> --- .buildkite/test-pipeline.yaml | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index 9d38e571324b..fe4796b35786 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -6,24 +6,28 @@ # to generate the final pipeline yaml file. # Documentation -# label(str): the name of the test. emoji allowed. -# fast_check(bool): whether to run this on each commit on fastcheck pipeline. -# torch_nightly(bool): whether to run this on vllm against torch nightly pipeline. -# fast_check_only(bool): run this test on fastcheck pipeline only -# optional(bool): never run this test by default (i.e. need to unblock manually) unless it's scheduled nightly run. +# label(str): the name of the test. emojis allowed. +# fast_check(bool): whether to run this on each commit on the fastcheck pipeline. +# torch_nightly(bool): whether to run this on vllm against the torch nightly pipeline. +# fast_check_only(bool): run this test on the fastcheck pipeline only +# optional(bool): never run this test by default (i.e. need to unblock manually) unless it's a scheduled nightly run. +# soft_fail(bool): allow this step to fail without failing the entire pipeline (useful for flaky or experimental tests). # command(str): the single command to run for tests. incompatible with commands. -# commands(list): the list of commands to run for test. incompatbile with command. -# mirror_hardwares(list): the list of hardwares to run the test on as well. currently only supports [amd] -# gpu(str): override the GPU selection for the test. default is on L4 GPUs. currently only supports a100 -# num_gpus(int): override the number of GPUs for the test. default to 1 GPU. currently support 2,4. -# num_nodes(int): whether to simulate multi-node setup by launch multiple containers on one host, -# in this case, commands must be specified. the first command runs on first host, the second +# commands(list): the list of commands to run for the test. incompatible with command. +# mirror_hardwares(list): the list of hardware to run the test on as well. currently only supports [amdexperimental] +# gpu(str): override the GPU selection for the test. default is L4 GPUs. supports a100, b200, h200 +# num_gpus(int): override the number of GPUs for the test. defaults to 1 GPU. currently supports 2,4. +# num_nodes(int): whether to simulate multi-node setup by launching multiple containers on one host, +# in this case, commands must be specified. the first command runs on the first host, the second # command runs on the second host. -# working_dir(str): specify the place where command should execute, default to /vllm-workspace/tests -# source_file_dependencies(list): the list of prefix to opt-in the test for, if empty, the test will always run. +# timeout_in_minutes(int): sets a timeout for the step in minutes. if not specified, uses the default timeout. +# parallelism(int): number of parallel jobs to run for this step. enables test sharding using $$BUILDKITE_PARALLEL_JOB +# and $$BUILDKITE_PARALLEL_JOB_COUNT environment variables. +# working_dir(str): specify the place where the command should execute, default to /vllm-workspace/tests +# source_file_dependencies(list): the list of prefixes to opt-in the test for, if empty, the test will always run. # When adding a test -# - If the test belong to an existing group, add it there +# - If the test belongs to an existing group, add it there # - If the test is short, add to any existing step # - If the test takes more than 10min, then it is okay to create a new step. # Note that all steps execute in parallel. From 1cd885bd540c3765e4c2d378b3167102dfa26ab5 Mon Sep 17 00:00:00 2001 From: Woosuk Kwon Date: Sat, 20 Sep 2025 20:49:09 -0700 Subject: [PATCH 202/518] [V0 Deprecation] Remove V0 model runner base & simplify worker base (#25328) Signed-off-by: Woosuk Kwon --- vllm/attention/backends/abstract.py | 15 +- vllm/attention/backends/utils.py | 8 +- vllm/worker/model_runner_base.py | 307 ---------------------- vllm/worker/worker_base.py | 387 +--------------------------- 4 files changed, 11 insertions(+), 706 deletions(-) delete mode 100644 vllm/worker/model_runner_base.py diff --git a/vllm/attention/backends/abstract.py b/vllm/attention/backends/abstract.py index dfde67e1713c..ab7ef2112b08 100644 --- a/vllm/attention/backends/abstract.py +++ b/vllm/attention/backends/abstract.py @@ -4,19 +4,14 @@ from abc import ABC, abstractmethod from contextlib import contextmanager from dataclasses import dataclass, fields -from typing import (TYPE_CHECKING, Any, Dict, Generic, List, Optional, - Protocol, Set, Tuple, Type, TypeVar) +from typing import (Any, Dict, Generic, List, Optional, Protocol, Set, Tuple, + Type, TypeVar) import torch from vllm.model_executor.layers.quantization.utils.quant_utils import QuantKey from vllm.multimodal import MultiModalPlaceholderMap -if TYPE_CHECKING: - from vllm.worker.model_runner_base import (ModelRunnerBase, - ModelRunnerInputBase, - ModelRunnerInputBuilderBase) - class AttentionType: """ @@ -170,7 +165,7 @@ class AttentionState(ABC, Generic[T]): lifetime of the model runner.""" @abstractmethod - def __init__(self, runner: "ModelRunnerBase"): + def __init__(self, runner: Any): ... @abstractmethod @@ -210,7 +205,7 @@ def prepare_graph_input_buffers( ... @abstractmethod - def begin_forward(self, model_input: "ModelRunnerInputBase") -> None: + def begin_forward(self, model_input) -> None: """Prepare state for forward pass.""" ... @@ -219,7 +214,7 @@ class AttentionMetadataBuilder(ABC, Generic[T]): """Abstract class for attention metadata builders.""" @abstractmethod - def __init__(self, input_builder: "ModelRunnerInputBuilderBase") -> None: + def __init__(self, input_builder) -> None: """Create the builder, remember some configuration and parameters.""" raise NotImplementedError diff --git a/vllm/attention/backends/utils.py b/vllm/attention/backends/utils.py index 289cfa217743..b28e6a4237cb 100644 --- a/vllm/attention/backends/utils.py +++ b/vllm/attention/backends/utils.py @@ -5,8 +5,7 @@ from contextlib import contextmanager from dataclasses import dataclass from itertools import accumulate -from typing import (TYPE_CHECKING, Any, Dict, List, Optional, Tuple, Type, - TypeVar, Union) +from typing import Any, Dict, List, Optional, Tuple, Type, TypeVar, Union import numpy as np import torch @@ -21,9 +20,6 @@ logger = init_logger(__name__) -if TYPE_CHECKING: - from vllm.worker.model_runner_base import ModelRunnerBase - # Error string(s) for encoder/decoder # unsupported attention scenarios STR_NOT_IMPL_ENC_DEC_ROCM_HIP = ("ROCm/HIP is not currently supported " @@ -286,7 +282,7 @@ def build(self, seq_lens: List[int], query_lens: List[int], class CommonAttentionState(AttentionState): - def __init__(self, runner: "ModelRunnerBase"): + def __init__(self, runner): self.runner = runner self._is_graph_capturing = False diff --git a/vllm/worker/model_runner_base.py b/vllm/worker/model_runner_base.py deleted file mode 100644 index 1008b743619a..000000000000 --- a/vllm/worker/model_runner_base.py +++ /dev/null @@ -1,307 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project - -import dataclasses -from abc import ABC, abstractmethod -from typing import (TYPE_CHECKING, Any, Dict, Generic, List, Optional, Type, - TypeVar) - -import torch -import torch.nn as nn - -from vllm.config import VllmConfig -from vllm.logger import init_logger -from vllm.model_executor.layers.sampler import SamplerOutput -from vllm.model_executor.models.interfaces import supports_transcription -from vllm.model_executor.models.interfaces_base import is_text_generation_model -from vllm.sequence import IntermediateTensors, SequenceGroupMetadata -from vllm.tasks import GenerationTask, SupportedTask - -if TYPE_CHECKING: - from vllm.attention import AttentionMetadata - from vllm.attention.backends.abstract import AttentionBackend - from vllm.model_executor import SamplingMetadata - -logger = init_logger(__name__) - -T = TypeVar('T', bound="BroadcastableModelInput") - - -def _add_attn_metadata_broadcastable_dict( - tensor_dict: Dict[str, Any], - attn_metadata: Optional["AttentionMetadata"]) -> None: - """ - Helper method to update tensor_dict with broadcastable - AttentionMetadata fields. - """ - if attn_metadata is not None: - tensor_dict.update(attn_metadata.asdict_zerocopy()) - - -def _init_attn_metadata_from_tensor_dict( - attn_backend: "AttentionBackend", - tensor_dict: Dict[str, Any], -) -> Dict[str, Any]: - """ - Helper method to initialize AttentionMetadata based on an - AttentionBackend and broadcastable AttentionMetadata fields. - """ - # Extract the fields used to create AttentionMetadata. - valid_attn_kwargs = {} - for field in dataclasses.fields(attn_backend.get_metadata_cls()): - if field.name in tensor_dict: - if field.name == "input_positions": - valid_attn_kwargs[field.name] = tensor_dict[field.name] - else: - valid_attn_kwargs[field.name] = tensor_dict.pop(field.name) - - attn_metadata = attn_backend.make_metadata(**valid_attn_kwargs) - tensor_dict["attn_metadata"] = attn_metadata - return tensor_dict - - -def _init_sampling_metadata_from_tensor_dict( # type: ignore - tensor_dict: Dict[str, Any]) -> Dict[str, Any]: - """ - Helper method to initialize SamplingMetadata based on broadcastable - SamplingMetadata fields. - """ - from vllm.model_executor import SamplingMetadata - - selected_token_indices = tensor_dict.pop("selected_token_indices", None) - # An empty SamplingMetadata to signal that the worker should skip - # sampling. - if selected_token_indices is not None: - tensor_dict["sampling_metadata"] = SamplingMetadata( - seq_groups=None, - selected_token_indices=selected_token_indices, - categorized_sample_indices=None, - num_prompts=0, - ) - return tensor_dict - - -def _add_sampling_metadata_broadcastable_dict( - tensor_dict: Dict[str, Any], - sampling_metadata: Optional["SamplingMetadata"]) -> None: - """ - Helper method to update tensor_dict with broadcastable - SamplingMetadata fields. - """ - if sampling_metadata is not None: - tensor_dict["selected_token_indices"] = ( - sampling_metadata.selected_token_indices) - - -def _init_frozen_model_input_from_tensor_dict( - frozen_model_input_cls: Type["ModelRunnerInputBase"], - tensor_dict: Dict[str, Any]) -> Dict[str, Any]: - """ - Helper method to initialize a frozen ModelInput based on broadcastable - """ - valid_tensor_kwargs = {} - for field in dataclasses.fields(frozen_model_input_cls): - val = tensor_dict.pop(field.name, None) - if val is not None: - valid_tensor_kwargs[field.name] = val - - frozen_model_input = frozen_model_input_cls(**valid_tensor_kwargs) - tensor_dict["frozen_model_input"] = frozen_model_input - return tensor_dict - - -class BroadcastableModelInput(ABC): - - @abstractmethod - def as_broadcastable_tensor_dict(self) -> Dict[str, Any]: - """ - Extract broadcastable fields. Override for fields that require some - custom deserialization. - """ - raise NotImplementedError - - @classmethod - @abstractmethod - def from_broadcasted_tensor_dict( - cls: Type[T], - tensor_dict: Dict[str, Any], - attn_backend: Optional["AttentionBackend"] = None, - ) -> T: - """ - Pop fields from the given tensor_dict and populate a new instance of - BroadcastableModelInput. - """ - raise NotImplementedError - - -@dataclasses.dataclass(frozen=True) -class ModelRunnerInputBase(BroadcastableModelInput): - """Local inputs to each worker's model runner. May contain - device-specific data. Different worker backends may have different methods - of converting from the global ExecuteModelRequest produced by the LLM - engine to the worker-local ModelRunnerInputBase objects. - - Model runners that support multi-GPU execution should define a - ModelRunnerInputBase subclass, add their required fields, and specify how to - serialize/deserialize a ModelInput for broadcast between workers. - """ - pass - - -class ModelRunnerInputBuilderBase(ABC, Generic[T]): - """A builder to create ModelRunnerInputBase objects. - """ - - @abstractmethod - def prepare(self, - finished_requests_ids: Optional[List[str]] = None) -> None: - raise NotImplementedError - - @abstractmethod - def add_seq_group(self, seq_group_metadata): - """TBA""" - raise NotImplementedError - - @abstractmethod - def build(self, *args, **kwargs) -> T: - """Build metadata with on-device tensors.""" - raise NotImplementedError - - -class ModelRunnerBase(ABC, Generic[T]): - """ - Model runner interface that abstracts a particular hardware and/or type of - model. Model execution may communicate data with model runners in other - processes, but it should not include control plane metadata communication. - - Each ModelRunnerBase subclass should define a corresponding - ModelRunnerInputBase subclass. - """ - - def __init__( - self, - vllm_config: VllmConfig, - ) -> None: - self.vllm_config = vllm_config - self.model_config = vllm_config.model_config - self.cache_config = vllm_config.cache_config - self.lora_config = vllm_config.lora_config - self.load_config = vllm_config.load_config - self.parallel_config = vllm_config.parallel_config - self.scheduler_config = vllm_config.scheduler_config - self.device_config = vllm_config.device_config - self.speculative_config = vllm_config.speculative_config - self.observability_config = vllm_config.observability_config - - # Map of request_id -> generator used for seeded random sampling - generators: Dict[str, torch.Generator] = {} - - @abstractmethod - def make_model_input_from_broadcasted_tensor_dict( - self, - tensor_dict: Dict[str, Any], - ) -> T: - """ - Make an instance of a ModelRunnerInputBase from the broadcasted tensor - dict. - """ - raise NotImplementedError - - @abstractmethod - def prepare_model_input( - self, - seq_group_metadata_list: List[SequenceGroupMetadata], - virtual_engine: int = 0, - finished_requests_ids: Optional[List[str]] = None, - ) -> T: - """ - Prepare the inputs to ModelRunnerBase.execute_model from an execution - request. This method may move data to the worker's local device. It is - not allowed to communicate with other workers or devices. - """ - raise NotImplementedError - - @abstractmethod - def get_model(self) -> nn.Module: - raise NotImplementedError - - def get_supported_generation_tasks(self) -> list[GenerationTask]: - model = self.get_model() - supported_tasks = list[GenerationTask]() - - if is_text_generation_model(model): - supported_tasks.append("generate") - - if supports_transcription(model): - if model.supports_transcription_only: - return ["transcription"] - - supported_tasks.append("transcription") - - return supported_tasks - - def get_supported_tasks(self) -> tuple[SupportedTask, ...]: - tasks = list[SupportedTask]() - - if self.model_config.runner_type == "generate": - tasks.extend(self.get_supported_generation_tasks()) - - return tuple(tasks) - - def execute_model( - self, - model_input: T, - kv_caches: Optional[List[torch.Tensor]], - intermediate_tensors: Optional[IntermediateTensors] = None, - num_steps: int = 1, - **kwargs, - ) -> Optional[List[SamplerOutput]]: - """ - Execute the model on the given input. - """ - raise NotImplementedError - - def get_generators(self, finished_request_ids: Optional[List[str]] = None): - """ - Return dict of per-request generators used for random sampling. - """ - - # Clean up generators from completed requests - if finished_request_ids: - for request_id in finished_request_ids: - self.generators.pop(request_id, None) - - return self.generators - - -class ModelRunnerWrapperBase: - """ - The whole point of this class is to lazily initialize the model_runner. - """ - - def __init__( - self, - model_runner: ModelRunnerBase, - ) -> None: - self.model_runner: ModelRunnerBase = model_runner - - def __getattr__(self, attr): - return getattr(self.model_runner, attr) - - -class InputProcessingError(Exception): - """This exception is raised when an error occurs preparing the inputs for - a single sequence group. - This allows the engine to gracefully handle errors with a single sequence - group without having to fail the entire batch. - """ - - def __init__(self, request_id, message): - """request_id is the id of the offending sequence group""" - self.request_id = request_id - self.message = message - super().__init__(self.message) - - def __str__(self): - return "Failed to prepare inputs for sequence group with request id: " \ - f"{self.request_id}, Error: {self.message}" diff --git a/vllm/worker/worker_base.py b/vllm/worker/worker_base.py index d0a56f6ff463..eaab976bf7f7 100644 --- a/vllm/worker/worker_base.py +++ b/vllm/worker/worker_base.py @@ -1,31 +1,22 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -import dataclasses import os -import time -from abc import abstractmethod -from typing import (Any, Callable, Dict, List, Optional, Set, Tuple, Type, - TypeVar, Union) +from typing import (Any, Callable, Dict, List, Optional, Set, Tuple, TypeVar, + Union) import cloudpickle -import torch import torch.nn as nn -from vllm.config import (ObservabilityConfig, VllmConfig, - set_current_vllm_config) -from vllm.distributed import broadcast_tensor_dict, get_pp_group, get_tp_group +from vllm.config import VllmConfig, set_current_vllm_config from vllm.logger import init_logger from vllm.lora.request import LoRARequest from vllm.model_executor.layers.sampler import SamplerOutput -from vllm.sequence import ExecuteModelRequest, IntermediateTensors +from vllm.sequence import ExecuteModelRequest from vllm.utils import (enable_trace_function_call_for_thread, resolve_obj_by_qualname, run_method, update_environment_variables, warn_for_unimplemented_methods) -from vllm.worker.model_runner_base import (BroadcastableModelInput, - ModelRunnerBase, - ModelRunnerInputBase) logger = init_logger(__name__) @@ -141,356 +132,6 @@ def shutdown(self) -> None: return -class DelegateWorkerBase(WorkerBase): - """ - A class that delegates all methods to another WorkerBase instance. This is - useful for creating a WorkerBase that wraps another WorkerBase instance, - e.g. speculative decoding. - """ - worker: WorkerBase - - def __init__( - self, - *args, - **kwargs, - ) -> None: - vllm_config: VllmConfig = kwargs.get("vllm_config") - cls = resolve_obj_by_qualname(vllm_config.parallel_config.worker_cls) - self.worker = cls(*args, **kwargs) - - def init_device(self) -> None: - self.worker.init_device() - - def determine_num_available_blocks(self) -> Tuple[int, int]: - return self.worker.determine_num_available_blocks() - - def initialize_cache(self, num_gpu_blocks: int, - num_cpu_blocks: int) -> None: - self.worker.initialize_cache(num_gpu_blocks, num_cpu_blocks) - - def load_model(self) -> None: - """Load model onto target device.""" - self.worker.load_model() - - def get_model(self) -> nn.Module: - return self.worker.get_model() - - def execute_model( - self, - execute_model_req: Optional[ExecuteModelRequest] = None - ) -> Optional[List[SamplerOutput]]: - return self.worker.execute_model(execute_model_req) - - def get_cache_block_size_bytes(self) -> int: - return self.worker.get_cache_block_size_bytes() - - def add_lora(self, lora_request: LoRARequest) -> bool: - return self.worker.add_lora(lora_request) - - def remove_lora(self, lora_id: int) -> bool: - return self.worker.remove_lora(lora_id) - - def pin_lora(self, lora_id: int) -> bool: - return self.worker.pin_lora(lora_id) - - def list_loras(self) -> Set[int]: - return self.worker.list_loras() - - def __getattr__(self, attr): - return getattr(self.worker, attr) - - -class LoRANotSupportedWorkerBase(WorkerBase): - """Partial implementation of WorkerBase that raises exceptions when LoRA - methods are invoked. - """ - - def add_lora(self, lora_request: LoRARequest) -> bool: - raise ValueError(f"{type(self)} does not support LoRA") - - def remove_lora(self, lora_id: int) -> bool: - raise ValueError(f"{type(self)} does not support LoRA") - - def pin_lora(self, lora_id: int) -> bool: - raise ValueError(f"{type(self)} does not support LoRA") - - def list_loras(self) -> Set[int]: - raise ValueError(f"{type(self)} does not support LoRA") - - -@dataclasses.dataclass(frozen=True) -class WorkerInput: - """Local inputs to each worker. May contain device-specific data. These - fields should be broadcastable to other workers. - """ - - num_seq_groups: Optional[int] = None - blocks_to_swap_in: Optional[torch.Tensor] = None - blocks_to_swap_out: Optional[torch.Tensor] = None - blocks_to_copy: Optional[torch.Tensor] = None - virtual_engine: int = 0 - num_steps: int = 1 - - @classmethod - def from_broadcasted_tensor_dict( - cls: Type["WorkerInput"], - tensor_dict: Dict[str, Any], - ) -> "WorkerInput": - """ - Pop fields from the given tensor_dict and populate a new instance of - WorkerInput. - """ - return cls( - num_seq_groups=tensor_dict.pop("num_seq_groups"), - blocks_to_swap_in=tensor_dict.pop("blocks_to_swap_in"), - blocks_to_swap_out=tensor_dict.pop("blocks_to_swap_out"), - blocks_to_copy=tensor_dict.pop("blocks_to_copy"), - virtual_engine=tensor_dict["virtual_engine"], - num_steps=tensor_dict.pop("num_steps"), - ) - - def as_broadcastable_tensor_dict( - self) -> Dict[str, Union[int, torch.Tensor]]: - """ - Extract broadcastable fields. - """ - tensor_dict = { - "num_seq_groups": self.num_seq_groups, - "blocks_to_swap_in": self.blocks_to_swap_in, - "blocks_to_swap_out": self.blocks_to_swap_out, - "blocks_to_copy": self.blocks_to_copy, - "virtual_engine": self.virtual_engine, - "num_steps": self.num_steps, - } - - return tensor_dict - - -class LocalOrDistributedWorkerBase(WorkerBase): - """ - Partial implementation of WorkerBase that has a default `execute_model` - definition to perform metadata transfer between workers when in distributed - mode. Subclasses of this interface should use model runners that inherit - from ModelRunnerBase, and should only need to implement worker-local logic. - If custom control plane logic is needed to transfer metadata, or if the - model runner cannot inherit from ModelRunnerBase, use WorkerBase instead. - """ - is_driver_worker: bool - model_runner: ModelRunnerBase - observability_config: Optional[ObservabilityConfig] = None - - @property - @abstractmethod - def do_metadata_broadcast(self) -> bool: - """ - Used by the default `execute_model` to check whether broadcast is - needed to transfer request inputs from the driver worker to other - workers in the TP group. If WorkerBase subclass only supports - single-worker execution, then this method should return False. - """ - raise NotImplementedError - - @property - @abstractmethod - def kv_cache(self) -> Optional[List[List[torch.Tensor]]]: - """ - Gets the list of kv caches to pass to the worker's model runner. Each - element in the list is a kv cache corresponding to a particular virtual - engine (PP stream). Used by the default `execute_model`. If the worker's - model runner does not follow the ModelRunnerBase interface, then inherit - from WorkerBase instead. - """ - raise NotImplementedError - - @abstractmethod - def prepare_worker_input( - self, execute_model_req: ExecuteModelRequest) -> WorkerInput: - """ - Prepare the inputs to WorkerBase.execute_worker from an execution - request. This method may move data to the worker's local device. It is - not allowed to communicate with other workers or devices. - """ - raise NotImplementedError - - @abstractmethod - def execute_worker(self, worker_input: WorkerInput) -> None: - """ - Process an execution request. - """ - raise NotImplementedError - - def _get_worker_input_from_broadcast( - self - ) -> Optional[Tuple[BroadcastableModelInput, WorkerInput, Dict[ - str, torch.Tensor]]]: - """ Get the worker input from the broadcasted tensor dict. """ - assert self.do_metadata_broadcast - assert not self.is_driver_worker - broadcast_data = broadcast_tensor_dict(src=0) - if not broadcast_data: - return None - - worker_input = WorkerInput.from_broadcasted_tensor_dict(broadcast_data) - model_input = ( - self.model_runner.make_model_input_from_broadcasted_tensor_dict( - broadcast_data)) - - kwargs = extract_previous_hidden_states(broadcast_data) - - return model_input, worker_input, kwargs - - def _get_driver_input_and_broadcast( - self, execute_model_req: ExecuteModelRequest - ) -> Tuple[BroadcastableModelInput, WorkerInput, Dict[str, torch.Tensor]]: - """ Get the driver input and broadcast it to other workers. """ - assert self.is_driver_worker - - worker_input: WorkerInput = self.prepare_worker_input( - execute_model_req=execute_model_req) - model_input: ModelRunnerInputBase = ( - self.model_runner.prepare_model_input( - execute_model_req.seq_group_metadata_list, - execute_model_req.virtual_engine, - execute_model_req.finished_requests_ids)) - - kwargs = extract_previous_hidden_states(execute_model_req) - - if self.do_metadata_broadcast: - broadcast_data = worker_input.as_broadcastable_tensor_dict() - broadcast_data.update(model_input.as_broadcastable_tensor_dict()) - broadcast_data.update(kwargs) - broadcast_tensor_dict(broadcast_data, src=0) - - if execute_model_req.async_callback: - model_input = dataclasses.replace( # type: ignore - model_input, - async_callback=execute_model_req.async_callback) - - return model_input, worker_input, kwargs - - def prepare_input( - self, - execute_model_req: Optional[ExecuteModelRequest] = None - ) -> Optional[Tuple[BroadcastableModelInput, WorkerInput, Dict[ - str, torch.Tensor]]]: - """ - Prepare the inputs to ModelRunner and workers. - """ - if self.is_driver_worker: - if execute_model_req is None: - if self.do_metadata_broadcast: - # This signals that there's no more requests to process for - # now. All workers are running infinite loop with - # broadcast_tensor_dict, and it stops the loop when the - # driver broadcasts an empty input. Send an empty input to - # notify all other workers to stop their execution loop. - broadcast_tensor_dict({}, src=0) - return None - return self._get_driver_input_and_broadcast(execute_model_req) - else: - return self._get_worker_input_from_broadcast() - - def get_model(self) -> nn.Module: - return self.model_runner.get_model() - - def execute_model( - self, - execute_model_req: Optional[ExecuteModelRequest] = None, - ) -> Optional[List[SamplerOutput]]: - """Executes at least one model step on the given sequences, unless no - sequences are provided.""" - start_time = time.perf_counter() - - inputs = self.prepare_input(execute_model_req) - if inputs is None: - return None - - model_input, worker_input, kwargs = inputs - num_steps = worker_input.num_steps - - self.execute_worker(worker_input) - - # If there is no input, we don't need to execute the model. - if worker_input.num_seq_groups == 0: - return [] - - intermediate_tensors = None - orig_model_execute_time = 0.0 - if not get_pp_group().is_first_rank: - intermediate_tensors = IntermediateTensors( - get_pp_group().recv_tensor_dict( - all_gather_group=get_tp_group())) - if (self.observability_config is not None - and self.observability_config.collect_model_execute_time): - orig_model_execute_time = intermediate_tensors.tensors.get( - "model_execute_time", torch.tensor(0)).item() - - output = self.model_runner.execute_model( - model_input=model_input, - kv_caches=self.kv_cache[worker_input.virtual_engine] - if self.kv_cache is not None else None, - intermediate_tensors=intermediate_tensors, - num_steps=num_steps, - **kwargs, - ) - - model_execute_time = time.perf_counter() - start_time - if not get_pp_group().is_last_rank: - # output is IntermediateTensors - assert isinstance(output, IntermediateTensors) - if (self.observability_config is not None - and self.observability_config.collect_model_execute_time): - output.tensors["model_execute_time"] = torch.tensor( - model_execute_time + orig_model_execute_time) - get_pp_group().send_tensor_dict(output.tensors, - all_gather_group=get_tp_group()) - return [None] - if (self.observability_config is not None - and self.observability_config.collect_model_execute_time - and output is not None): - for o in output: - o.model_execute_time = (orig_model_execute_time + - model_execute_time) - - # output is List[SamplerOutput] - return output - - def _execute_model_spmd( - self, - execute_model_req: ExecuteModelRequest, - intermediate_tensors: Optional[IntermediateTensors] = None - ) -> Optional[List[SamplerOutput]]: - """ - Execute model in Single Program Multiple Data (SPMD) fashion. - All workers take the same request, prepare the input and - execute the model. - """ - assert execute_model_req is not None, ( - "_execute_model_spmd() requires each worker to take in an " - "ExecuteModelRequest") - worker_input: WorkerInput = self.prepare_worker_input( - execute_model_req=execute_model_req) - model_input: ModelRunnerInputBase = ( - self.model_runner.prepare_model_input( - execute_model_req.seq_group_metadata_list)) - - self.execute_worker(worker_input) - - # If there is no input, we don't need to execute the model. - if worker_input.num_seq_groups == 0: - return [] - - kwargs = extract_previous_hidden_states(execute_model_req) - - return self.model_runner.execute_model( - model_input=model_input, - kv_caches=self.kv_cache[worker_input.virtual_engine] - if self.kv_cache is not None else None, - intermediate_tensors=intermediate_tensors, - **kwargs, - ) - - class WorkerWrapperBase: """ This class represents one process in an executor/engine. It is responsible @@ -636,23 +277,3 @@ def execute_method(self, method: Union[str, bytes], *args, **kwargs): def __getattr__(self, attr): return getattr(self.worker, attr) - - -def extract_previous_hidden_states( - data: Union[ExecuteModelRequest, Dict[str, torch.Tensor]]) -> \ - Dict[str, torch.Tensor]: - """If data contains previous_hidden_states, extract it. This returns a dict - which can be used directly as additional kwargs in any following - execute_model calls. This is used in draft models like EAGLE.""" - output = {} - - # When called from non-driver worker, data is dict but when called from - # driver worker, data is ExecuteModelRequest. - if isinstance(data, dict): - if "previous_hidden_states" in data: - output["previous_hidden_states"] = data["previous_hidden_states"] - elif data.previous_hidden_states is not None: - output["previous_hidden_states"] = data.previous_hidden_states\ - .hidden_states - - return output From 035fd2bd2cd2fb70f5834f5ca6c2ea30cdae9187 Mon Sep 17 00:00:00 2001 From: Wenlong Wang Date: Sat, 20 Sep 2025 20:55:10 -0700 Subject: [PATCH 203/518] [Multi Modal][Performance] Fused Q,K's apply_rope in more models (#25005) Signed-off-by: wwl2755 Co-authored-by: Roger Wang --- vllm/model_executor/models/ernie45_vl.py | 15 +++++++++------ vllm/model_executor/models/glm4_1v.py | 17 ++++++++++------- vllm/model_executor/models/qwen2_vl.py | 16 ++++++++++------ 3 files changed, 29 insertions(+), 19 deletions(-) diff --git a/vllm/model_executor/models/ernie45_vl.py b/vllm/model_executor/models/ernie45_vl.py index 3396c67f42b7..0d4aced93ca1 100644 --- a/vllm/model_executor/models/ernie45_vl.py +++ b/vllm/model_executor/models/ernie45_vl.py @@ -234,8 +234,9 @@ def forward( q, k, v = (rearrange(x, "s b ... -> b s ...").contiguous() for x in (q, k, v)) if rotary_pos_emb is not None: - q = apply_rotary_pos_emb_vision(q, rotary_pos_emb) - k = apply_rotary_pos_emb_vision(k, rotary_pos_emb) + qk_concat = torch.cat([q, k], dim=0) + qk_rotated = apply_rotary_pos_emb_vision(qk_concat, rotary_pos_emb) + q, k = torch.chunk(qk_rotated, 2, dim=0) if self.is_flash_attn_backend: # from vllm_flash_attn.flash_attn_interface import ( @@ -261,8 +262,8 @@ def forward( causal=False) context_layer = rearrange(output, - "(b s) ... -> b s ...", - b=batch_size) + "(b s) h d -> s b (h d)", + b=batch_size).contiguous() elif self.attn_backend == _Backend.TORCH_SDPA: # Execute attention entry by entry for speed & less VRAM. outputs = [] @@ -281,6 +282,8 @@ def forward( output_i = rearrange(output_i, "b h s d -> b s h d ") outputs.append(output_i) context_layer = torch.cat(outputs, dim=1) + context_layer = rearrange(context_layer, + "b s h d -> s b (h d)").contiguous() elif self.attn_backend == _Backend.XFORMERS: from xformers import ops as xops from xformers.ops.fmha.attn_bias import BlockDiagonalMask @@ -291,8 +294,8 @@ def forward( context_layer = xops.memory_efficient_attention_forward( q, k, v, attn_bias=attn_bias, p=0, scale=None) - context_layer = rearrange(context_layer, - "b s h d -> s b (h d)").contiguous() + context_layer = rearrange(context_layer, + "b s h d -> s b (h d)").contiguous() output, _ = self.proj(context_layer) return output diff --git a/vllm/model_executor/models/glm4_1v.py b/vllm/model_executor/models/glm4_1v.py index cbf327ce02b6..308b0cb602bc 100644 --- a/vllm/model_executor/models/glm4_1v.py +++ b/vllm/model_executor/models/glm4_1v.py @@ -315,8 +315,10 @@ def forward( q, k, v = (rearrange(x, "s b ... -> b s ...").contiguous() for x in (q, k, v)) if rotary_pos_emb is not None: - q = apply_rotary_pos_emb_vision(q, rotary_pos_emb) - k = apply_rotary_pos_emb_vision(k, rotary_pos_emb) + # [2 * b, s, heads, head_dim] + qk_concat = torch.cat([q, k], dim=0) + qk_rotated = apply_rotary_pos_emb_vision(qk_concat, rotary_pos_emb) + q, k = torch.chunk(qk_rotated, 2, dim=0) if self.attn_backend == _Backend.FLASH_ATTN: # from vllm_flash_attn.flash_attn_interface import ( @@ -341,8 +343,8 @@ def forward( ) context_layer = rearrange(output, - "(b s) ... -> b s ...", - b=batch_size) + "(b s) h d -> s b (h d)", + b=batch_size).contiguous() elif self.attn_backend == _Backend.TORCH_SDPA: # Execute attention entry by entry for speed & less VRAM. outputs = [] @@ -361,6 +363,8 @@ def forward( output_i = rearrange(output_i, "b h s d -> b s h d ") outputs.append(output_i) context_layer = torch.cat(outputs, dim=1) + context_layer = rearrange(context_layer, + "b s h d -> s b (h d)").contiguous() elif self.attn_backend == _Backend.XFORMERS: from xformers import ops as xops from xformers.ops.fmha.attn_bias import BlockDiagonalMask @@ -371,9 +375,8 @@ def forward( context_layer = xops.memory_efficient_attention_forward( q, k, v, attn_bias=attn_bias, p=0, scale=None) - - context_layer = rearrange(context_layer, - "b s h d -> s b (h d)").contiguous() + context_layer = rearrange(context_layer, + "b s h d -> s b (h d)").contiguous() output, _ = self.proj(context_layer) return output diff --git a/vllm/model_executor/models/qwen2_vl.py b/vllm/model_executor/models/qwen2_vl.py index 7f361678ba72..dd4e7731e0b0 100644 --- a/vllm/model_executor/models/qwen2_vl.py +++ b/vllm/model_executor/models/qwen2_vl.py @@ -377,8 +377,10 @@ def forward( q, k, v = (rearrange(x, "s b ... -> b s ...").contiguous() for x in (q, k, v)) if rotary_pos_emb is not None: - q = apply_rotary_pos_emb_vision(q, rotary_pos_emb) - k = apply_rotary_pos_emb_vision(k, rotary_pos_emb) + # [2 * b, s, heads, head_dim] + qk_concat = torch.cat([q, k], dim=0) + qk_rotated = apply_rotary_pos_emb_vision(qk_concat, rotary_pos_emb) + q, k = torch.chunk(qk_rotated, 2, dim=0) if self.is_flash_attn_backend: if self.attn_backend == _Backend.ROCM_AITER_FA: @@ -402,8 +404,8 @@ def forward( causal=False) context_layer = rearrange(output, - "(b s) ... -> b s ...", - b=batch_size) + "(b s) h d -> s b (h d)", + b=batch_size).contiguous() elif self.attn_backend == _Backend.TORCH_SDPA: # Execute attention entry by entry for speed & less VRAM. outputs = [] @@ -422,6 +424,8 @@ def forward( output_i = rearrange(output_i, "b h s d -> b s h d ") outputs.append(output_i) context_layer = torch.cat(outputs, dim=1) + context_layer = rearrange(context_layer, + "b s h d -> s b (h d)").contiguous() elif self.attn_backend == _Backend.XFORMERS: from xformers import ops as xops from xformers.ops.fmha.attn_bias import BlockDiagonalMask @@ -432,8 +436,8 @@ def forward( context_layer = xops.memory_efficient_attention_forward( q, k, v, attn_bias=attn_bias, p=0, scale=None) - context_layer = rearrange(context_layer, - "b s h d -> s b (h d)").contiguous() + context_layer = rearrange(context_layer, + "b s h d -> s b (h d)").contiguous() output, _ = self.proj(context_layer) return output From 12dbd834cf33b539a50d65516e44327caff9d824 Mon Sep 17 00:00:00 2001 From: Woosuk Kwon Date: Sat, 20 Sep 2025 21:10:48 -0700 Subject: [PATCH 204/518] [V0 Deprecation] Remove from_seq_group methods (#25330) Signed-off-by: Woosuk Kwon --- vllm/multimodal/base.py | 122 +------------------------ vllm/outputs.py | 195 +--------------------------------------- 2 files changed, 2 insertions(+), 315 deletions(-) diff --git a/vllm/multimodal/base.py b/vllm/multimodal/base.py index ef8f1b2e17b4..e0edb3e883ed 100644 --- a/vllm/multimodal/base.py +++ b/vllm/multimodal/base.py @@ -2,14 +2,8 @@ # SPDX-FileCopyrightText: Copyright contributors to the vLLM project from abc import ABC, abstractmethod -from collections.abc import Sequence from pathlib import Path -from typing import TYPE_CHECKING, Generic, NamedTuple, TypeVar - -if TYPE_CHECKING: - from vllm.sequence import SequenceGroupMetadata - -from .inputs import MultiModalKwargs, PlaceholderRange +from typing import Generic, NamedTuple, TypeVar _T = TypeVar("_T") @@ -53,120 +47,6 @@ def __init__(self): self.dest_ranges = [] self.dest_len = 0 - @classmethod - def from_seq_group( - cls, seq_group: "SequenceGroupMetadata", positions: range - ) -> tuple[MultiModalKwargs, dict[str, "MultiModalPlaceholderMap"]]: - """ - Returns the multi-modal items that intersect with the portion of a - prompt (``seq_group``) represented by ``positions``, as well as a - ``MultiModalPlaceholderMap`` that relates the multi-modal embedding - vectors to their corresponding placeholders. - - Examples: - - ``` - Prompt: |AAAA BBBB What's in these images?| - Positions: |.................................| - - images = [A, B] - src_ranges = [(0, 4), (4, 8)] - dest_ranges = [(0, 4), (5, 9)] - - Prompt: |AAAA BBBB What's in these images?| - Positions: | ..... | - - images = [A, B] - src_ranges = [(2, 4), (4, 6)] - dest_ranges = [(0, 2), (3, 5)] - - Prompt: |AAAA BBBB What's in these images?| - Positions: | ......... | - - images = [B] - src_ranges = [(0, 4)] - dest_ranges = [(0, 4)] - - Prompt: |AAAA BBBB What's in these images?| - Positions: | .......................| - - images = [] - src_ranges = [] - dest_ranges = [] - ``` - """ - seq_mm_data = seq_group.multi_modal_data - seq_mm_placeholders = seq_group.multi_modal_placeholders - - if not seq_mm_data or not seq_mm_placeholders: - return MultiModalKwargs(), {} - - placeholder_maps = dict[str, MultiModalPlaceholderMap]() - - for modality, placeholders in seq_mm_placeholders.items(): - placeholder_map = MultiModalPlaceholderMap() - - if positions: - placeholder_map.append_items_from_seq_group( - positions, - # Dummy, since we don't care about intersecting items - [None] * len(placeholders), - placeholders, - ) - - placeholder_maps[modality] = placeholder_map - - return seq_mm_data, placeholder_maps - - def append_items_from_seq_group( - self, - positions: range, - multi_modal_items: list[_T], - multi_modal_placeholders: Sequence[PlaceholderRange], - ) -> list[_T]: - """ - Adds the multi-modal items that intersect ```positions`` to this - placeholder map and returns the intersecting items. - """ - intersecting_items = [] - - if len(multi_modal_items) != len(multi_modal_placeholders): - raise ValueError( - "Multi-modal placeholders and items must have the same length." - ) - for placeholder_dict, mm_item in zip(multi_modal_placeholders, - multi_modal_items): - placeholder = range( - placeholder_dict.offset, - placeholder_dict.offset + placeholder_dict.length, - ) - intersection = range( - max(positions.start, placeholder.start), - min(positions.stop, placeholder.stop), - ) - - if not intersection: - # Skip this multi-modal item. - continue - - token_embedding_range = range( - intersection.start - positions.start, - intersection.stop - positions.start, - ) - - multimodal_embedding_range = range( - intersection.start - placeholder.start + self.src_len, - intersection.stop - placeholder.start + self.src_len, - ) - - intersecting_items.append(mm_item) - self.dest_ranges.append(token_embedding_range) - self.src_ranges.append(multimodal_embedding_range) - self.src_len += len(placeholder) - - self.dest_len += len(positions) - return intersecting_items - def extend(self, other: "MultiModalPlaceholderMap"): """ Adds the placeholders from another ``MultiModalPlaceholderMap`` to this diff --git a/vllm/outputs.py b/vllm/outputs.py index 64bcfd472f2a..4d8206bb2d83 100644 --- a/vllm/outputs.py +++ b/vllm/outputs.py @@ -1,7 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -import time from collections.abc import MutableSequence from collections.abc import Sequence as GenericSequence from dataclasses import dataclass @@ -14,9 +13,7 @@ from vllm.logprobs import PromptLogprobs, SampleLogprobs from vllm.lora.request import LoRARequest from vllm.multimodal.inputs import MultiModalPlaceholderDict -from vllm.sampling_params import RequestOutputKind -from vllm.sequence import (RequestMetrics, SequenceGroup, SequenceGroupBase, - SequenceStatus) +from vllm.sequence import RequestMetrics logger = init_logger(__name__) @@ -171,170 +168,6 @@ def add(self, next_output: "RequestOutput", aggregate: bool) -> None: else: self.outputs.append(next_completion) - @classmethod - def from_seq_group( - cls, seq_group: SequenceGroup, use_cache: bool, - seq_id_to_seq_group: dict[str, SequenceGroupBase] - ) -> Optional["RequestOutput"]: - finished = seq_group.is_finished() - - if seq_group.request_id in seq_id_to_seq_group: - group: SequenceGroupBase = seq_id_to_seq_group[ - seq_group.request_id] - assembled_seq_group = group.maybe_assemble_group(seq_group) - if finished: - group.finish_seq(seq_group) - if assembled_seq_group is None: - return None - - # clear finished seq in seq_id_to_seq_group - if len(group.to_be_finished) == 0: - for sub_request_id in list(group.seq_id_to_index.keys()): - if sub_request_id in seq_id_to_seq_group: - del seq_id_to_seq_group[sub_request_id] - - return cls.from_seq_group(assembled_seq_group, use_cache, - seq_id_to_seq_group) - - sampling_params = seq_group.sampling_params - if sampling_params is None: - raise ValueError( - "Sampling parameters are missing for a CompletionRequest.") - - if sampling_params.output_kind == RequestOutputKind.FINAL_ONLY and ( - not finished): - return None - - # Init cache (if needed) - if use_cache and seq_group.cached_request_output is None: - seq_group.cached_request_output = RequestOutput( # type: ignore - request_id="", - prompt=None, - prompt_token_ids=[], - prompt_logprobs=None, - outputs=[], - finished=False) - - top_n_seqs = seq_group.get_seqs() - - # Create the outputs. - # NOTE: We need omit logprobs here explicitly because the sequence - # always has the logprobs of the sampled tokens even if the - # logprobs are not requested. - include_logprobs = sampling_params.logprobs is not None - text_buffer_length = sampling_params.output_text_buffer_length - delta = sampling_params.output_kind == RequestOutputKind.DELTA - - outputs = [] - include_prompt = True - # num_cached_tokens should be the same for all the sequences - num_cached_tokens = None - for i, seq in enumerate(top_n_seqs): - output_text = seq.get_output_text_to_return( - text_buffer_length, delta) - - output_token_ids = seq.get_output_token_ids_to_return(delta) - num_output_tokens = 1 if isinstance(output_token_ids, - int) else len(output_token_ids) - num_cached_tokens = seq.data.get_num_cached_tokens() - - output_logprobs = seq.output_logprobs if include_logprobs else None - - if delta: - # Slice logprobs delta if applicable - if output_logprobs: - # num_output_tokens can be 0 when n > 1 and request finishes - # before the others - if num_output_tokens > 0: - output_logprobs = output_logprobs[-num_output_tokens:] - else: - output_logprobs = None - # Don't include prompt if this is after the first output - # containing decode token ids - if include_prompt and seq.get_output_len() > num_output_tokens: - include_prompt = False - - if use_cache: - # Get cached output object - cached_outputs = seq_group.cached_request_output.outputs # type: ignore - if i >= len(cached_outputs): - cached_outputs.append( - CompletionOutput(index=i, - text="", - token_ids=[], - cumulative_logprob=None, - logprobs=None, - finish_reason=None, - stop_reason=None)) - output = cached_outputs[i] - - # Init cached output object - assert output.index == i - output.text = output_text - - if isinstance(output_token_ids, int): - output.token_ids.clear() - output.token_ids.append(output_token_ids) - else: - output.token_ids = output_token_ids - - output.cumulative_logprob = seq.get_cumulative_logprob() \ - if include_logprobs else None - output.logprobs = output_logprobs - output.finish_reason = SequenceStatus.get_finished_reason( - seq.status) - output.stop_reason = seq.stop_reason - - else: - output = CompletionOutput( - top_n_seqs.index(seq), output_text, [output_token_ids] - if isinstance(output_token_ids, int) else output_token_ids, - seq.get_cumulative_logprob() if include_logprobs else None, - output_logprobs, - SequenceStatus.get_finished_reason(seq.status), - seq.stop_reason) - - outputs.append(output) - - # Every sequence in the sequence group should have the same prompt. - if include_prompt: - prompt = seq_group.prompt - prompt_token_ids = seq_group.prompt_token_ids - encoder_prompt = seq_group.encoder_prompt - encoder_prompt_token_ids = seq_group.encoder_prompt_token_ids - prompt_logprobs = seq_group.prompt_logprobs - else: - prompt = None - prompt_token_ids = None - encoder_prompt = None - encoder_prompt_token_ids = None - prompt_logprobs = None - finished_time = time.time() if finished else None - seq_group.set_finished_time(finished_time) - - init_kwargs = { - "request_id": seq_group.request_id, - "prompt": prompt, - "prompt_token_ids": prompt_token_ids, - "prompt_logprobs": prompt_logprobs, - "outputs": outputs, - "finished": finished, - "metrics": seq_group.metrics, - "lora_request": seq_group.lora_request, - "encoder_prompt": encoder_prompt, - "encoder_prompt_token_ids": encoder_prompt_token_ids, - "num_cached_tokens": num_cached_tokens, - "multi_modal_placeholders": seq_group.multi_modal_placeholders - } - - if use_cache: - request_output = seq_group.cached_request_output - request_output.__init__(**init_kwargs) # type: ignore - else: - request_output = cls(**init_kwargs) # type: ignore - - return request_output - def __repr__(self) -> str: return (f"RequestOutput(request_id={self.request_id}, " f"prompt={self.prompt!r}, " @@ -371,19 +204,6 @@ def __init__(self, request_id: str, outputs: _O, self.finished = finished self.outputs = outputs - @staticmethod - def from_seq_group(seq_group: SequenceGroup) -> "PoolingRequestOutput": - pooled_data = seq_group.pooled_data - assert pooled_data is not None - - data = pooled_data.to(dtype=torch.float32, device="cpu") - output = PoolingOutput(data) - prompt_token_ids = seq_group.prompt_token_ids - finished = seq_group.is_finished() - - return PoolingRequestOutput(seq_group.request_id, output, - prompt_token_ids, finished) - def __repr__(self): return (f"{type(self).__name__}(request_id={self.request_id!r}, " f"outputs={self.outputs!r}, " @@ -391,19 +211,6 @@ def __repr__(self): f"finished={self.finished})") -class RequestOutputFactory: - - @staticmethod - def create(seq_group: SequenceGroup, - seq_id_to_seq_group: dict[str, SequenceGroupBase], - use_cache: bool = False): - if seq_group.pooled_data is not None: - return PoolingRequestOutput.from_seq_group(seq_group) - else: - return RequestOutput.from_seq_group(seq_group, use_cache, - seq_id_to_seq_group) - - @dataclass class EmbeddingOutput: """The output data of one embedding output of a request. From 7ed82d1974837957a3bfb6d576b9cffba24d31ae Mon Sep 17 00:00:00 2001 From: Woosuk Kwon Date: Sat, 20 Sep 2025 21:26:35 -0700 Subject: [PATCH 205/518] [V0 Deprecation] Remove V0 MP executor (#25329) Signed-off-by: Woosuk Kwon --- vllm/executor/mp_distributed_executor.py | 244 -------------------- vllm/executor/multiproc_worker_utils.py | 279 ----------------------- vllm/v1/executor/multiproc_executor.py | 40 +++- 3 files changed, 33 insertions(+), 530 deletions(-) delete mode 100644 vllm/executor/mp_distributed_executor.py delete mode 100644 vllm/executor/multiproc_worker_utils.py diff --git a/vllm/executor/mp_distributed_executor.py b/vllm/executor/mp_distributed_executor.py deleted file mode 100644 index 136dca54e6e5..000000000000 --- a/vllm/executor/mp_distributed_executor.py +++ /dev/null @@ -1,244 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project - -import asyncio -import os -from typing import Any, Callable, List, Optional, Union - -import cloudpickle - -from vllm.executor.executor_base import DistributedExecutorBase -from vllm.executor.multiproc_worker_utils import ( - ProcessWorkerWrapper, ResultHandler, WorkerMonitor, - set_multiprocessing_worker_envs) -from vllm.logger import init_logger -from vllm.model_executor.layers.sampler import SamplerOutput -from vllm.sequence import ExecuteModelRequest -from vllm.utils import (_run_task_with_lock, cuda_device_count_stateless, - get_distributed_init_method, get_ip, get_open_port, - make_async, run_method, update_environment_variables) -from vllm.worker.worker_base import WorkerWrapperBase - -logger = init_logger(__name__) - - -class MultiprocessingDistributedExecutor(DistributedExecutorBase): - """Python multiprocessing-based distributed executor""" - - uses_ray: bool = False - - def _check_cuda(self) -> None: - """Check that the number of GPUs is sufficient for the parallel - configuration. Separate from _init_executor to reduce the number of - indented blocks. - """ - parallel_config = self.parallel_config - world_size = parallel_config.world_size - tensor_parallel_size = parallel_config.tensor_parallel_size - - cuda_device_count = cuda_device_count_stateless() - # Use confusing message for more common TP-only case. - if tensor_parallel_size > cuda_device_count: - raise RuntimeError( - f"please set tensor_parallel_size ({tensor_parallel_size}) " - f"to less than max local gpu count ({cuda_device_count})") - - if world_size > cuda_device_count: - raise RuntimeError( - f"please ensure that world_size ({world_size}) " - f"is less than than max local gpu count ({cuda_device_count})") - - # Set CUDA_VISIBLE_DEVICES for the driver, inherited by workers - if "CUDA_VISIBLE_DEVICES" not in os.environ: - update_environment_variables({ - "CUDA_VISIBLE_DEVICES": (",".join(map(str, range(world_size)))) - }) - - def _init_executor(self) -> None: - - from vllm.platforms import current_platform - if current_platform.is_cuda_alike(): - self._check_cuda() - - # Create the parallel GPU workers. - world_size = self.parallel_config.world_size - tensor_parallel_size = self.parallel_config.tensor_parallel_size - - # Set multiprocessing envs that are common to V0 and V1 - set_multiprocessing_worker_envs(self.parallel_config) - - # Multiprocessing-based executor does not support multi-node setting. - # Since it only works for single node, we can use the loopback address - # 127.0.0.1 for communication. - distributed_init_method = get_distributed_init_method( - "127.0.0.1", get_open_port()) - - self.workers: List[ProcessWorkerWrapper] = [] - # This is the list of workers that are rank 0 of each TP group EXCEPT - # global rank 0. These are the workers that will broadcast to the - # rest of the workers. - self.tp_driver_workers: List[ProcessWorkerWrapper] = [] - # This is the list of workers that are not drivers and not the first - # worker in a TP group. These are the workers that will be - # broadcasted to. - self.non_driver_workers: List[ProcessWorkerWrapper] = [] - - if world_size == 1: - self.worker_monitor = None - else: - result_handler = ResultHandler() - for rank in range(1, world_size): - worker = ProcessWorkerWrapper(result_handler, - WorkerWrapperBase, - self.vllm_config, rank) - self.workers.append(worker) - if rank % tensor_parallel_size == 0: - self.tp_driver_workers.append(worker) - else: - self.non_driver_workers.append(worker) - - self.worker_monitor = WorkerMonitor(self.workers, result_handler) - result_handler.start() - self.worker_monitor.start() - - # Set up signal handlers to shut down the executor cleanly - # sometimes gc does not work well - - self.driver_worker = WorkerWrapperBase(self.vllm_config, 0) - - all_kwargs = [] - distributed_init_method = get_distributed_init_method( - get_ip(), get_open_port()) - for i in range(world_size): - local_rank = i - rank = i - kwargs = dict( - vllm_config=self.vllm_config, - local_rank=local_rank, - rank=rank, - distributed_init_method=distributed_init_method, - is_driver_worker=(not self.parallel_config) - or (rank % self.parallel_config.tensor_parallel_size == 0), - ) - all_kwargs.append(kwargs) - self._run_workers("init_worker", all_kwargs) - self._run_workers("init_device") - self._run_workers("load_model", - max_concurrent_workers=self.parallel_config. - max_parallel_loading_workers) - self.driver_exec_model = make_async(self.driver_worker.execute_model) - self.pp_locks: Optional[List[asyncio.Lock]] = None - - def shutdown(self): - if (worker_monitor := getattr(self, "worker_monitor", - None)) is not None: - worker_monitor.close() - - def _driver_execute_model( - self, execute_model_req: Optional[ExecuteModelRequest] - ) -> Optional[List[SamplerOutput]]: - """Run execute_model in the driver worker. - - Passing None will cause the driver to stop the model execution - loop running in each of the remote workers. - """ - return self.driver_worker.execute_model(execute_model_req) - - def _run_workers( - self, - method: Union[str, Callable], - *args, - async_run_tensor_parallel_workers_only: bool = False, - max_concurrent_workers: Optional[int] = None, - **kwargs, - ) -> List[Any]: - """Runs the given method on all workers. - - Args: - async_run_tensor_parallel_workers_only: If True the method will be - run only in the remote TP workers, not the driver worker. - It will also be run asynchronously and return a list of futures - rather than blocking on the results. - """ - if isinstance(method, str): - sent_method = method - else: - sent_method = cloudpickle.dumps(method) - del method - - if max_concurrent_workers: - raise NotImplementedError( - "max_concurrent_workers is not supported yet.") - - if async_run_tensor_parallel_workers_only: - # Run only non-driver workers and just return futures. - return [ - worker.execute_method(sent_method, *args, **kwargs) - for worker in self.non_driver_workers - ] - - # Start all remote workers first. - worker_outputs = [ - worker.execute_method(sent_method, *args, **kwargs) - for worker in self.workers - ] - - driver_worker_output = run_method(self.driver_worker, sent_method, - args, kwargs) - - # Get the results of the workers. - return [driver_worker_output - ] + [output.get() for output in worker_outputs] - - def check_health(self) -> None: - """Raises an error if engine is unhealthy.""" - if self.worker_monitor is not None and not self.worker_monitor.is_alive( - ): - raise RuntimeError("Worker processes are not running") - - def _wait_for_tasks_completion(self, parallel_worker_tasks: Any) -> None: - """Wait for futures returned from _run_workers() with - async_run_remote_workers_only to complete.""" - for result in parallel_worker_tasks: - result.get() - - async def _driver_execute_model_async( - self, - execute_model_req: Optional[ExecuteModelRequest] = None - ) -> List[SamplerOutput]: - if not self.tp_driver_workers: - return await self.driver_exec_model(execute_model_req) - - if self.pp_locks is None: - # This locks each pipeline parallel stage so multiple virtual - # engines can't execute on the same stage at the same time - # We create the locks here to avoid creating them in the constructor - # which uses a different asyncio loop. - self.pp_locks = [ - asyncio.Lock() - for _ in range(self.parallel_config.pipeline_parallel_size) - ] - - tasks = [ - asyncio.create_task( - _run_task_with_lock(self.driver_exec_model, self.pp_locks[0], - execute_model_req)) - ] - for pp_rank, driver_worker in enumerate(self.tp_driver_workers, - start=1): - tasks.append( - asyncio.create_task( - _run_task_with_lock(driver_worker.execute_method_async, - self.pp_locks[pp_rank], - "execute_model", execute_model_req))) - results = await asyncio.gather(*tasks) - - # Only the last PP stage has the final results. - return results[-1] - - async def _start_worker_execution_loop(self): - coros = [ - worker.execute_method_async("start_worker_execution_loop") - for worker in self.non_driver_workers - ] - return await asyncio.gather(*coros) diff --git a/vllm/executor/multiproc_worker_utils.py b/vllm/executor/multiproc_worker_utils.py deleted file mode 100644 index 48b3479ed799..000000000000 --- a/vllm/executor/multiproc_worker_utils.py +++ /dev/null @@ -1,279 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project - -import asyncio -import os -import threading -import uuid -from dataclasses import dataclass -from multiprocessing import Queue -from multiprocessing.connection import wait -from multiprocessing.process import BaseProcess -from typing import Any, Callable, Dict, Generic, List, Optional, TypeVar, Union - -import torch - -from vllm.config import VllmConfig -from vllm.logger import init_logger -from vllm.utils import (_maybe_force_spawn, decorate_logs, get_mp_context, - run_method) - -logger = init_logger(__name__) - -T = TypeVar('T') - -_TERMINATE = "TERMINATE" # sentinel - -JOIN_TIMEOUT_S = 2 - - -@dataclass -class Result(Generic[T]): - """Result of task dispatched to worker""" - - task_id: uuid.UUID - value: Optional[T] = None - exception: Optional[BaseException] = None - - -class ResultFuture(threading.Event, Generic[T]): - """Synchronous future for non-async case""" - - def __init__(self): - super().__init__() - self.result: Optional[Result[T]] = None - - def set_result(self, result: Result[T]): - self.result = result - self.set() - - def get(self) -> T: - self.wait() - assert self.result is not None - if self.result.exception is not None: - raise self.result.exception - return self.result.value # type: ignore[return-value] - - -def _set_future_result(future: Union[ResultFuture, asyncio.Future], - result: Result): - if isinstance(future, ResultFuture): - future.set_result(result) - return - loop = future.get_loop() - if not loop.is_closed(): - if result.exception is not None: - loop.call_soon_threadsafe(future.set_exception, result.exception) - else: - loop.call_soon_threadsafe(future.set_result, result.value) - - -class ResultHandler(threading.Thread): - """Handle results from all workers (in background thread)""" - - def __init__(self) -> None: - super().__init__(daemon=True) - self.result_queue = get_mp_context().Queue() - self.tasks: Dict[uuid.UUID, Union[ResultFuture, asyncio.Future]] = {} - - def run(self): - for result in iter(self.result_queue.get, _TERMINATE): - future = self.tasks.pop(result.task_id) - _set_future_result(future, result) - # Ensure that all waiters will receive an exception - for task_id, future in self.tasks.items(): - _set_future_result( - future, - Result(task_id=task_id, - exception=ChildProcessError("worker died"))) - - def close(self): - self.result_queue.put(_TERMINATE) - - -class WorkerMonitor(threading.Thread): - """Monitor worker status (in background thread)""" - - def __init__(self, workers: List['ProcessWorkerWrapper'], - result_handler: ResultHandler): - super().__init__(daemon=True) - self.workers = workers - self.result_handler = result_handler - self._close = False - - def run(self) -> None: - # Blocks until any worker exits - dead_sentinels = wait([w.process.sentinel for w in self.workers]) - if not self._close: - self._close = True - - # Kill / cleanup all workers - for worker in self.workers: - process = worker.process - if process.sentinel in dead_sentinels: - process.join(JOIN_TIMEOUT_S) - if process.exitcode is not None and process.exitcode != 0: - logger.error("Worker %s pid %s died, exit code: %s", - process.name, process.pid, process.exitcode) - # Cleanup any remaining workers - if logger: - logger.info("Killing local vLLM worker processes") - for worker in self.workers: - worker.kill_worker() - # Must be done after worker task queues are all closed - self.result_handler.close() - - for worker in self.workers: - worker.process.join(JOIN_TIMEOUT_S) - - def close(self): - if self._close: - return - self._close = True - logger.info("Terminating local vLLM worker processes") - for worker in self.workers: - worker.terminate_worker() - # Must be done after worker task queues are all closed - self.result_handler.close() - - -class ProcessWorkerWrapper: - """Local process wrapper for vllm.worker.Worker, - for handling single-node multi-GPU tensor parallel.""" - - def __init__(self, result_handler: ResultHandler, - worker_factory: Callable[[VllmConfig, int], Any], - vllm_config: VllmConfig, rank: int) -> None: - self.mp = get_mp_context() - self._task_queue = self.mp.Queue() - self.result_queue = result_handler.result_queue - self.tasks = result_handler.tasks - self.process: BaseProcess = self.mp.Process( # type: ignore[attr-defined] - target=_run_worker_process, - name="VllmWorkerProcess", - kwargs=dict( - worker_factory=worker_factory, - task_queue=self._task_queue, - result_queue=self.result_queue, - vllm_config=vllm_config, - rank=rank, - ), - daemon=True) - - self.process.start() - - def _enqueue_task(self, future: Union[ResultFuture, asyncio.Future], - method: Union[str, bytes], args, kwargs): - task_id = uuid.uuid4() - self.tasks[task_id] = future - try: - self._task_queue.put((task_id, method, args, kwargs)) - except SystemExit: - raise - except BaseException as e: - del self.tasks[task_id] - raise ChildProcessError("worker died") from e - - def execute_method(self, method: Union[str, bytes], *args, **kwargs): - future: ResultFuture = ResultFuture() - self._enqueue_task(future, method, args, kwargs) - return future - - async def execute_method_async(self, method: Union[str, bytes], *args, - **kwargs): - future = asyncio.get_running_loop().create_future() - self._enqueue_task(future, method, args, kwargs) - return await future - - def terminate_worker(self): - try: - self._task_queue.put(_TERMINATE) - except ValueError: - self.process.kill() - self._task_queue.close() - - def kill_worker(self): - self._task_queue.close() - self.process.kill() - - -def _run_worker_process( - worker_factory: Callable[[VllmConfig, int], Any], - task_queue: Queue, - result_queue: Queue, - vllm_config: VllmConfig, - rank: int, -) -> None: - """Worker process event loop""" - - # Add process-specific prefix to stdout and stderr - process_name = get_mp_context().current_process().name - decorate_logs(process_name) - - # Initialize worker - worker = worker_factory(vllm_config, rank) - del worker_factory - - # Accept tasks from the engine in task_queue - # and return task output in result_queue - logger.info("Worker ready; awaiting tasks") - try: - for items in iter(task_queue.get, _TERMINATE): - output = None - exception = None - task_id, method, args, kwargs = items - try: - output = run_method(worker, method, args, kwargs) - except SystemExit: - raise - except KeyboardInterrupt: - break - except BaseException as e: - logger.exception( - "Exception in worker %s while processing method %s.", - process_name, method) - exception = e - result_queue.put( - Result(task_id=task_id, value=output, exception=exception)) - except KeyboardInterrupt: - pass - except Exception: - logger.exception("Worker failed") - - # Flush TunableOp results when TunableOp is enabled and - # online (in situ) tuning is enabled. - # Offline tuning API (record_untuned_is_enabled()) only - # available in PyTorch 2.6 or later. - if torch.cuda.is_available(): - import torch.cuda.tunable as tunable - if (tunable.is_enabled() and tunable.tuning_is_enabled() - and not tunable.record_untuned_is_enabled()): - tunable.write_file() - - logger.info("Worker exiting") - - -def set_multiprocessing_worker_envs(parallel_config): - """ Set up environment variables that should be used when there are workers - in a multiprocessing environment. This should be called by the parent - process before worker processes are created""" - - _maybe_force_spawn() - - # Configure thread parallelism if OMP_NUM_THREADS isn't set - # - # Helps to avoid CPU contention. The default of spawning a thread per - # core combined with multiprocessing for each GPU can have a negative - # impact on performance. The contention is amplified when running in a - # container where CPU limits can cause throttling. - default_omp_num_threads = 1 - if "OMP_NUM_THREADS" not in os.environ and ( - current_parallelism := - torch.get_num_threads()) > default_omp_num_threads: - logger.warning( - "Reducing Torch parallelism from %d threads to %d to avoid " - "unnecessary CPU contention. Set OMP_NUM_THREADS in the " - "external environment to tune this value as needed.", - current_parallelism, default_omp_num_threads) - os.environ["OMP_NUM_THREADS"] = str(default_omp_num_threads) - torch.set_num_threads(default_omp_num_threads) diff --git a/vllm/v1/executor/multiproc_executor.py b/vllm/v1/executor/multiproc_executor.py index 3aa373f12b60..2aa732f34bcc 100644 --- a/vllm/v1/executor/multiproc_executor.py +++ b/vllm/v1/executor/multiproc_executor.py @@ -1,6 +1,7 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project import multiprocessing +import os import pickle import queue import signal @@ -19,6 +20,7 @@ from typing import Any, Callable, Optional, Union, cast import cloudpickle +import torch import vllm.envs as envs from vllm.config import VllmConfig @@ -28,14 +30,12 @@ MessageQueue) from vllm.distributed.parallel_state import (get_dp_group, get_ep_group, get_pp_group, get_tp_group) -from vllm.executor.multiproc_worker_utils import ( - set_multiprocessing_worker_envs) from vllm.logger import init_logger from vllm.multimodal import MULTIMODAL_REGISTRY from vllm.multimodal.cache import worker_receiver_cache_from_config -from vllm.utils import (decorate_logs, get_distributed_init_method, - get_loopback_ip, get_mp_context, get_open_port, - set_process_title) +from vllm.utils import (_maybe_force_spawn, decorate_logs, + get_distributed_init_method, get_loopback_ip, + get_mp_context, get_open_port, set_process_title) from vllm.v1.core.sched.output import SchedulerOutput from vllm.v1.executor.abstract import Executor, FailureCallback from vllm.v1.executor.utils import get_and_update_mm_cache @@ -67,8 +67,8 @@ def _init_executor(self) -> None: f"tensor_parallel_size ({tensor_parallel_size}) x pipeline" f"_parallel_size ({pp_parallel_size}). ") - # Set multiprocessing envs that are common to V0 and V1 - set_multiprocessing_worker_envs(self.parallel_config) + # Set multiprocessing envs + set_multiprocessing_worker_envs() # Multiprocessing-based executor does not support multi-node setting. # Since it only works for single node, we can use the loopback address @@ -698,3 +698,29 @@ def setup_proc_title_and_log_prefix(enable_ep: bool) -> None: process_name += f"_EP{ep_rank}" set_process_title(name=process_name) decorate_logs(process_name) + + +def set_multiprocessing_worker_envs(): + """ Set up environment variables that should be used when there are workers + in a multiprocessing environment. This should be called by the parent + process before worker processes are created""" + + _maybe_force_spawn() + + # Configure thread parallelism if OMP_NUM_THREADS isn't set + # + # Helps to avoid CPU contention. The default of spawning a thread per + # core combined with multiprocessing for each GPU can have a negative + # impact on performance. The contention is amplified when running in a + # container where CPU limits can cause throttling. + default_omp_num_threads = 1 + if "OMP_NUM_THREADS" not in os.environ and ( + current_parallelism := + torch.get_num_threads()) > default_omp_num_threads: + logger.warning( + "Reducing Torch parallelism from %d threads to %d to avoid " + "unnecessary CPU contention. Set OMP_NUM_THREADS in the " + "external environment to tune this value as needed.", + current_parallelism, default_omp_num_threads) + os.environ["OMP_NUM_THREADS"] = str(default_omp_num_threads) + torch.set_num_threads(default_omp_num_threads) From cf56cf78b47e5f9b6a81ce0d50a94f9291922315 Mon Sep 17 00:00:00 2001 From: Isotr0py Date: Sun, 21 Sep 2025 13:08:07 +0800 Subject: [PATCH 206/518] [V1] Add sliding window support to Flex Attention backend (#24089) Signed-off-by: Isotr0py --- tests/v1/attention/test_attention_backends.py | 208 +++++++++++++----- vllm/v1/attention/backends/flex_attention.py | 90 ++++++-- 2 files changed, 229 insertions(+), 69 deletions(-) diff --git a/tests/v1/attention/test_attention_backends.py b/tests/v1/attention/test_attention_backends.py index 0b7e103beca6..8a4fc15791b0 100644 --- a/tests/v1/attention/test_attention_backends.py +++ b/tests/v1/attention/test_attention_backends.py @@ -1,15 +1,20 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project """Tests for v1 attention backends without GPUModelRunner dependency.""" +from functools import partial +from typing import Optional, Union import pytest import torch +from torch.nn.attention.flex_attention import create_block_mask, flex_attention from tests.v1.attention.utils import (BatchSpec, _Backend, create_common_attn_metadata, create_standard_kv_cache_spec, create_vllm_config, get_attention_backend) +from vllm.config import ModelConfig +from vllm.platforms import current_platform from vllm.utils import STR_DTYPE_TO_TORCH_DTYPE, cdiv, is_torch_equal_or_newer from vllm.v1.attention.backends.utils import (CommonAttentionMetadata, set_kv_cache_layout) @@ -183,13 +188,19 @@ def __init__(self, device: torch.device): self._v_scale_float = 1.0 -def run_attention_backend(backend: _Backend, kv_cache_spec: FullAttentionSpec, - layer_names: list[str], vllm_config, - device: torch.device, - common_attn_metadata: CommonAttentionMetadata, - query: torch.Tensor, key: torch.Tensor, - value: torch.Tensor, - kv_cache: torch.Tensor) -> torch.Tensor: +def run_attention_backend( + backend: _Backend, + kv_cache_spec: FullAttentionSpec, + layer_names: list[str], + vllm_config, + device: torch.device, + common_attn_metadata: CommonAttentionMetadata, + query: torch.Tensor, + key: torch.Tensor, + value: torch.Tensor, + kv_cache: torch.Tensor, + sliding_window: Optional[int] = None, +) -> torch.Tensor: """Run attention computation using the specified backend's AttentionImpl.""" # Handle special case for FLEX_ATTENTION_SLOW @@ -253,7 +264,7 @@ def mock_get_per_layer_parameters(vllm_config, layer_names, impl_cls): scale=scale, num_kv_heads=num_kv_heads, alibi_slopes=None, - sliding_window=None, + sliding_window=sliding_window, kv_cache_dtype="auto", ) @@ -275,13 +286,16 @@ def mock_get_per_layer_parameters(vllm_config, layer_names, impl_cls): return output -@pytest.mark.parametrize("batch_spec_name", [ - "small_decode", "small_prefill", "mixed_small", "medium_decode", - "medium_prefill", "mixed_medium", "large_decode", "large_prefill", - "single_decode", "single_prefill" -]) -@pytest.mark.parametrize("model", ["meta-llama/Meta-Llama-3-8B"]) -def test_backend_correctness(batch_spec_name: str, model: str): +def _test_backend_correctness( + batch_spec: BatchSpec, + model: str, + backend_to_test: list[Union[_Backend, str]], + mask_mod, + *, + block_size: int = 16, + atol: float = 1e-2, + rtol: float = 1e-2, +): """ Test that all backends produce similar outputs to a reference implementation using torch.nn.functional.scaled_dot_product_attention. @@ -297,9 +311,10 @@ def test_backend_correctness(batch_spec_name: str, model: str): simulated paged KV cache. 5. Comparing the vLLM backend's output to the ground-truth SDPA output. """ - batch_spec = BATCH_SPECS[batch_spec_name] + current_platform.seed_everything(42) vllm_config = create_vllm_config(model_name=model, max_model_len=max(batch_spec.seq_lens), + block_size=block_size, num_gpu_blocks=8192) device = torch.device("cuda:0") @@ -314,6 +329,7 @@ def test_backend_correctness(batch_spec_name: str, model: str): num_kv_heads = vllm_config.model_config.get_num_kv_heads( vllm_config.parallel_config) head_size = vllm_config.model_config.get_head_size() + sliding_window = vllm_config.model_config.get_sliding_window() dtype = _convert_dtype_to_torch(vllm_config.model_config.dtype) block_size = vllm_config.cache_config.block_size scale = 1.0 / (head_size**0.5) @@ -361,22 +377,21 @@ def test_backend_correctness(batch_spec_name: str, model: str): # Create causal mask: query token i attends to positions 0 to # (context_len + i) kv_len = s_len - offset = context_len - attn_mask = torch.full((q_len, kv_len), - float('-inf'), - device=device, - dtype=dtype) - for i in range(q_len): - attn_mask[i, :offset + i + 1] = 0.0 - - sdpa_out_i = torch.nn.functional.scaled_dot_product_attention( - q_sdpa_in, - k_sdpa_in, - v_sdpa_in, - attn_mask=attn_mask, - scale=scale, - enable_gqa=True) - # Convert back to (L, H, D) + + final_mask_mod = partial(mask_mod, context_len=context_len) + block_mask = create_block_mask(final_mask_mod, + B=None, + H=None, + Q_LEN=q_len, + KV_LEN=kv_len, + device=device) + sdpa_out_i = flex_attention(q_sdpa_in, + k_sdpa_in, + v_sdpa_in, + block_mask=block_mask, + scale=scale, + enable_gqa=True) + all_sdpa_outputs.append(sdpa_out_i.transpose(1, 2).squeeze(0)) # Inputs for vLLM backends are just the new tokens @@ -412,7 +427,7 @@ def test_backend_correctness(batch_spec_name: str, model: str): # 4. Run vLLM backends and compare # Note: flex_attention has known Triton kernel compatibility issues # with test infrastructures - for backend_name in BACKENDS_TO_TEST: + for backend_name in backend_to_test: # FlashAttentionm + FlexAttention: # [2, num_blocks, block_size, num_kv_heads, head_size] # FlashInfer: @@ -427,12 +442,19 @@ def test_backend_correctness(batch_spec_name: str, model: str): 2, 3).contiguous().transpose(2, 3) set_kv_cache_layout("HND") - backend_output = run_attention_backend(backend_name, kv_cache_spec, - ["placeholder"], vllm_config, - device, common_attn_metadata, - query_vllm, key_vllm, - value_vllm, - kv_cache_for_backend) + backend_output = run_attention_backend( + backend_name, + kv_cache_spec, + ["placeholder"], + vllm_config, + device, + common_attn_metadata, + query_vllm, + key_vllm, + value_vllm, + kv_cache_for_backend, + sliding_window=sliding_window, + ) # Check shape and dtype consistency assert backend_output.shape == sdpa_output.shape, ( @@ -446,18 +468,102 @@ def test_backend_correctness(batch_spec_name: str, model: str): f"[{backend_name}] produced non-finite values") # Check numerical similarity - rtol = 1e-2 - atol = 5e-3 - - max_diff = torch.max(torch.abs(backend_output - sdpa_output)).item() - max_rel_diff = torch.max( - torch.abs(backend_output - sdpa_output) / - torch.abs(sdpa_output)).item() - all_close = torch.allclose(backend_output, + def error_msg(msg: str, backend_name: str): + return (f"[{backend_name}] output differs from SDPA baseline. " + f"{msg}") + + torch.testing.assert_close(backend_output, sdpa_output, rtol=rtol, - atol=atol) + atol=atol, + msg=partial(error_msg, + backend_name=backend_name)) - assert all_close, ( - f"[{backend_name}] output differs from SDPA baseline. " - f"Max diff: {max_diff:.6f}, max rel diff: {max_rel_diff:.6f})") \ No newline at end of file + +@pytest.mark.parametrize("batch_spec_name", [ + "small_decode", "small_prefill", "mixed_small", "medium_decode", + "medium_prefill", "mixed_medium", "large_decode", "large_prefill", + "single_decode", "single_prefill" +]) +@pytest.mark.parametrize("model", ["meta-llama/Meta-Llama-3-8B"]) +def test_causal_backend_correctness(batch_spec_name: str, model: str): + """Test backend's correctness with causal attention.""" + + def causal_mask_mod( + b: torch.Tensor, + h: torch.Tensor, + q_idx: torch.Tensor, + kv_idx: torch.Tensor, + *, + context_len: int, + ): + return (q_idx + context_len) >= kv_idx + + batch_spec = BATCH_SPECS[batch_spec_name] + LARGE_BLOCK_BACKENDS = ([_Backend.FLEX_ATTENTION] + if is_torch_equal_or_newer("2.9.0.dev0") else []) + SMALL_BLOCK_BACKENDS = [ + x for x in BACKENDS_TO_TEST if x not in LARGE_BLOCK_BACKENDS + ] + _test_backend_correctness(batch_spec, model, SMALL_BLOCK_BACKENDS, + causal_mask_mod) + + # Fast FlexAttention needs to run with block_size=128 + if LARGE_BLOCK_BACKENDS: + _test_backend_correctness(batch_spec, + model, + LARGE_BLOCK_BACKENDS, + causal_mask_mod, + block_size=128) + + +SLIDING_WINDOW_BACKENDS_TO_TEST = [ + _Backend.FLASH_ATTN_VLLM_V1, _Backend.FLEX_ATTENTION, + _Backend.TRITON_ATTN_VLLM_V1, "FLEX_ATTENTION_SLOW" +] + + +@pytest.mark.parametrize("batch_spec_name", [ + "small_decode", "small_prefill", "mixed_medium", "large_decode", + "large_prefill" +]) +@pytest.mark.parametrize("model", ["microsoft/Phi-tiny-MoE-instruct"]) +def test_sliding_window_backend_correctness(batch_spec_name: str, model: str): + """Test backend's correctness with sliding window attention.""" + + def sliding_window_mask_mod( + b: torch.Tensor, + h: torch.Tensor, + q_idx: torch.Tensor, + kv_idx: torch.Tensor, + *, + context_len: int, + sliding_window: int, + ): + causal_mask = q_idx + context_len >= kv_idx + window_mask = q_idx + context_len - kv_idx < sliding_window + return causal_mask & window_mask + + batch_spec = BATCH_SPECS[batch_spec_name] + model_config = ModelConfig(model=model, + max_model_len=max(batch_spec.seq_lens)) + sliding_window = model_config.get_sliding_window() + sliding_window_mask_mod_fn = partial(sliding_window_mask_mod, + sliding_window=sliding_window) + + LARGE_BLOCK_BACKENDS = ([_Backend.FLEX_ATTENTION] + if is_torch_equal_or_newer("2.9.0.dev0") else []) + SMALL_BLOCK_BACKENDS = [ + x for x in SLIDING_WINDOW_BACKENDS_TO_TEST + if x not in LARGE_BLOCK_BACKENDS + ] + _test_backend_correctness(batch_spec, model, SMALL_BLOCK_BACKENDS, + sliding_window_mask_mod_fn) + + # Fast FlexAttention needs to run with block_size=128 + if LARGE_BLOCK_BACKENDS: + _test_backend_correctness(batch_spec, + model, + LARGE_BLOCK_BACKENDS, + sliding_window_mask_mod_fn, + block_size=128) diff --git a/vllm/v1/attention/backends/flex_attention.py b/vllm/v1/attention/backends/flex_attention.py index 662d3984554a..c3358bfa74e9 100644 --- a/vllm/v1/attention/backends/flex_attention.py +++ b/vllm/v1/attention/backends/flex_attention.py @@ -9,7 +9,7 @@ import torch._dynamo.decorators import torch.nn.functional as F from torch.nn.attention.flex_attention import (BlockMask, _mask_mod_signature, - _score_mod_signature, + _score_mod_signature, and_masks, create_block_mask, flex_attention) @@ -292,6 +292,7 @@ class FlexAttentionMetadata: q_block_size: int = 16 kv_block_size: int = 16 transformed_score_mod: Optional[_score_mod_signature] = None + sliding_window: Optional[int] = None def _convert_physical_to_logical( self, @@ -380,6 +381,53 @@ def final_mask_mod( return final_mask_mod + def get_sliding_window_mask_mod(self) -> _mask_mod_signature: + """Creates the sliding window mask_mod function for FlexAttention. + + Note that the sliding window mask here is bidirectional, we need + to mask it with the bidirectional/causal mask for encoder/decoder. + """ + + if self.sliding_window is None: + raise ValueError( + "sliding_window must be set for sliding window attention") + + def sliding_window_mask_mod(b: torch.Tensor, h: torch.Tensor, + q_idx: torch.Tensor, kv_idx: torch.Tensor): + return torch.abs(q_idx - kv_idx) < self.sliding_window + + def final_mask_mod( + b: torch.Tensor, + h: torch.Tensor, + q_idx: torch.Tensor, + physical_kv_idx: torch.Tensor, + ) -> torch.Tensor: + (is_valid, logical_q_idx, + logical_kv_idx) = self._convert_physical_to_logical( + self.doc_ids, q_idx, physical_kv_idx) + return torch.where( + is_valid, + sliding_window_mask_mod(b, h, logical_q_idx, logical_kv_idx), + False, + ) + + return final_mask_mod if self.causal else sliding_window_mask_mod + + def get_mask_mod(self): + # Stage-1: initialize the base mask_mod + # (causal mask for decoder or bidirectional mask for encoder) + if self.causal: + mask_mod = self.get_causal_mask_mod() + else: + mask_mod = self.get_bidirectional_mask_mod() + # stage-2: add external mask_mod for special attention during + # forwarding runtime to create the combined mask_mod. + if self.sliding_window is not None: + # Add sliding window mask for sliding window attention + sliding_window_mask_mod = self.get_sliding_window_mask_mod() + mask_mod = and_masks(mask_mod, sliding_window_mask_mod) + return mask_mod + def get_transformed_score_mod(self) -> Optional[_score_mod_signature]: """Creates the transformed score_mod function for FlexAttention. @@ -472,12 +520,9 @@ def _build_block_mask_direct(self) -> BlockMask: return BlockMask.from_kv_blocks(**block_mask_kwargs) def build_block_mask(self) -> BlockMask: - if self.causal: - mask_mod = self.get_causal_mask_mod() - kv_len = self.total_cache_tokens - else: - mask_mod = self.get_bidirectional_mask_mod() - kv_len = self.num_actual_tokens + mask_mod = self.get_mask_mod() + kv_len = (self.total_cache_tokens + if self.causal else self.num_actual_tokens) return create_block_mask_compiled( mask_mod, None, @@ -498,11 +543,7 @@ def __post_init__(self): self.doc_ids = _offsets_to_doc_ids_tensor(self.query_start_loc) self.num_blocks = self.total_cache_tokens // self.block_size - if self.causal: - self.mask_mod = self.get_causal_mask_mod() - else: - self.mask_mod = self.get_bidirectional_mask_mod() - + self.mask_mod = self.get_mask_mod() self.transformed_score_mod = self.get_transformed_score_mod() if self.direct_build and self.causal: @@ -607,7 +648,7 @@ def use_cascade_attention(self, *args, **kwargs) -> bool: class FlexAttentionImpl(AttentionImpl): - sliding_window: Optional[tuple[int, int]] + sliding_window: Optional[int] alibi_slopes: Optional[torch.Tensor] logits_soft_cap: Optional[float] @@ -641,11 +682,9 @@ def __init__( "FlexAttention does not support alibi slopes yet.") else: self.alibi_slopes = None - if sliding_window is not None: - raise NotImplementedError( - "FlexAttention does not support sliding window yet.") - else: - self.sliding_window = (-1, -1) + + self.sliding_window = sliding_window + self.kv_cache_dtype = kv_cache_dtype self.logits_soft_cap = logits_soft_cap if self.logits_soft_cap is not None: @@ -712,6 +751,21 @@ def forward( num_actual_tokens = attn_metadata.num_actual_tokens + if attn_metadata.sliding_window != self.sliding_window: + attn_metadata.sliding_window = self.sliding_window + if attn_metadata.direct_build: + # TODO: Support skipping the computation of sliding window + # in direct block mask building code path. + logger.warning_once( + "Using direct block mask building with sliding window, " + "which is suboptimal now. Performance may be degraded.") + # update mask mod in attention metadata + attn_metadata.mask_mod = attn_metadata.get_mask_mod() + attn_metadata.block_mask = ( + attn_metadata._build_block_mask_direct()) + else: + attn_metadata.block_mask = attn_metadata.build_block_mask() + if not attn_metadata.causal: assert self.attn_type == AttentionType.ENCODER_ONLY From 30d08911f7cf78287f8da003ddcc99f6ef196f9f Mon Sep 17 00:00:00 2001 From: Roger Wang Date: Sun, 21 Sep 2025 04:05:20 -0700 Subject: [PATCH 207/518] [MM][Perf] Minor Optimization on Qwen3-VL `fast_pos_embed_interpolate` (#25337) Signed-off-by: Roger Wang --- vllm/model_executor/models/qwen3_vl.py | 135 +++++++++++-------------- 1 file changed, 60 insertions(+), 75 deletions(-) diff --git a/vllm/model_executor/models/qwen3_vl.py b/vllm/model_executor/models/qwen3_vl.py index 17375ff0959d..ca232e03767b 100644 --- a/vllm/model_executor/models/qwen3_vl.py +++ b/vllm/model_executor/models/qwen3_vl.py @@ -270,6 +270,7 @@ def __init__( self.temporal_patch_size = vision_config.temporal_patch_size self.deepstack_visual_indexes = vision_config.deepstack_visual_indexes self.use_data_parallel = use_data_parallel + self.num_grid_per_side = int(self.num_position_embeddings**0.5) # NOTE: This is used for creating empty tensor for all_gather for # DP ViT. Here out_hidden_size is enlarged due to deepstack @@ -377,82 +378,68 @@ def rot_pos_emb(self, grid_thw): rotary_pos_emb = rotary_pos_emb_full[pos_ids].flatten(1) return rotary_pos_emb - def fast_pos_embed_interpolate(self, grid_thw): - num_grid_per_side = int(self.num_position_embeddings**0.5) + def fast_pos_embed_interpolate(self, + grid_thw: list[list[int]]) -> torch.Tensor: - idx_list = [[] for _ in range(4)] - weight_list = [[] for _ in range(4)] + num_grid_per_side = self.num_grid_per_side + m_size = self.spatial_merge_size + hidden_dim = self.pos_embed.embedding_dim + outputs = [] for t, h, w in grid_thw: h_idxs = torch.linspace(0, num_grid_per_side - 1, h, - dtype=torch.float32) + dtype=torch.float32, + device=self.device) w_idxs = torch.linspace(0, num_grid_per_side - 1, w, - dtype=torch.float32) - - h_idxs_floor = h_idxs.to(torch.long) - w_idxs_floor = w_idxs.to(torch.long) - h_idxs_ceil = torch.clamp(h_idxs.to(torch.long) + 1, - max=num_grid_per_side - 1) - w_idxs_ceil = torch.clamp(w_idxs.to(torch.long) + 1, - max=num_grid_per_side - 1) - - dh = h_idxs - h_idxs_floor - dw = w_idxs - w_idxs_floor - - idx_list[0].extend(((h_idxs_floor * num_grid_per_side)[None].T + - w_idxs_floor[None]).flatten().tolist() * t) - idx_list[1].extend(((h_idxs_floor * num_grid_per_side)[None].T + - w_idxs_ceil[None]).flatten().tolist() * t) - idx_list[2].extend(((h_idxs_ceil * num_grid_per_side)[None].T + - w_idxs_floor[None]).flatten().tolist() * t) - idx_list[3].extend(((h_idxs_ceil * num_grid_per_side)[None].T + - w_idxs_ceil[None]).flatten().tolist() * t) - - weight_list[0].extend( - ((1 - dh)[None].T * (1 - dw)[None]).flatten().tolist() * t) - weight_list[1].extend( - ((1 - dh)[None].T * dw[None]).flatten().tolist() * t) - weight_list[2].extend( - (dh[None].T * (1 - dw)[None]).flatten().tolist() * t) - weight_list[3].extend( - (dh[None].T * dw[None]).flatten().tolist() * t) - - device = self.pos_embed.weight.device - dtype = self.pos_embed.weight.dtype - - p0 = self.pos_embed( - torch.tensor( - idx_list[0], dtype=torch.long, device=device)) * torch.tensor( - weight_list[0], dtype=dtype, device=device)[:, None] - p1 = self.pos_embed( - torch.tensor( - idx_list[1], dtype=torch.long, device=device)) * torch.tensor( - weight_list[1], dtype=dtype, device=device)[:, None] - p2 = self.pos_embed( - torch.tensor( - idx_list[2], dtype=torch.long, device=device)) * torch.tensor( - weight_list[2], dtype=dtype, device=device)[:, None] - p3 = self.pos_embed( - torch.tensor( - idx_list[3], dtype=torch.long, device=device)) * torch.tensor( - weight_list[3], dtype=dtype, device=device)[:, None] - - patch_pos_embeds = p0 + p1 + p2 + p3 - patch_pos_embeds = patch_pos_embeds.split( - [t * h * w for t, h, w in grid_thw]) - patch_pos_embeds_permute = [] - m_size = self.spatial_merge_size - for pos_embed, (t, h, w) in zip(patch_pos_embeds, grid_thw): - pos_embed = pos_embed.view(t, h // m_size, m_size, w // m_size, - m_size, -1).permute(0, 1, 3, 2, 4, - 5).flatten(0, 4) - patch_pos_embeds_permute.append(pos_embed) - patch_pos_embeds = torch.cat(patch_pos_embeds_permute) - return patch_pos_embeds + dtype=torch.float32, + device=self.device) + + h_floor = h_idxs.to(torch.long) + w_floor = w_idxs.to(torch.long) + h_ceil = torch.clamp(h_floor + 1, max=num_grid_per_side - 1) + w_ceil = torch.clamp(w_floor + 1, max=num_grid_per_side - 1) + + dh = h_idxs - h_floor + dw = w_idxs - w_floor + + w00 = ((1 - dh)[:, None] * (1 - dw)[None, :]).reshape(-1) + w01 = ((1 - dh)[:, None] * dw[None, :]).reshape(-1) + w10 = (dh[:, None] * (1 - dw)[None, :]).reshape(-1) + w11 = (dh[:, None] * dw[None, :]).reshape(-1) + + idx00 = (h_floor[:, None] * num_grid_per_side + + w_floor[None, :]).reshape(-1) + idx01 = (h_floor[:, None] * num_grid_per_side + + w_ceil[None, :]).reshape(-1) + idx10 = (h_ceil[:, None] * num_grid_per_side + + w_floor[None, :]).reshape(-1) + idx11 = (h_ceil[:, None] * num_grid_per_side + + w_ceil[None, :]).reshape(-1) + + indices = torch.stack([idx00, idx01, idx10, idx11], dim=0) + weights = torch.stack([w00, w01, w10, w11], + dim=0).to(dtype=self.dtype, + device=self.device) + weights = weights.unsqueeze(-1) + + embeds = self.pos_embed(indices) + weighted_embeds = embeds * weights + p0, p1, p2, p3 = weighted_embeds.unbind(dim=0) + combined = p0 + p1 + p2 + p3 + + combined = combined.view(h * w, hidden_dim) + repeated = combined.unsqueeze(0).expand(t, -1, -1).contiguous() + repeated = repeated.view(t, h // m_size, m_size, w // m_size, + m_size, hidden_dim) + repeated = repeated.permute(0, 1, 3, 2, 4, + 5).reshape(-1, hidden_dim) + outputs.append(repeated) + + return torch.cat(outputs, dim=0) def compute_attn_mask_seqlen( self, @@ -477,12 +464,9 @@ def forward( hidden_states = hidden_states + pos_embeds rotary_pos_emb = self.rot_pos_emb(grid_thw) - if isinstance(grid_thw, list): - grid_thw_tensor = torch.tensor(grid_thw, - device=hidden_states.device, - dtype=torch.int32) - else: - grid_thw_tensor = grid_thw + grid_thw_tensor = torch.tensor(grid_thw, + device=self.device, + dtype=torch.int32) cu_seqlens = torch.repeat_interleave( grid_thw_tensor[:, 1] * grid_thw_tensor[:, 2], @@ -1224,7 +1208,8 @@ def _process_image_input( grid_thw_list, rope_type="rope_3d") else: - image_embeds = self.visual(pixel_values, grid_thw=grid_thw) + image_embeds = self.visual(pixel_values, + grid_thw=grid_thw_list) # Split concatenated embeddings for each image item. # Using prod on grid_thw_list instead of grid_thw.prod avoids CUDA sync @@ -1526,4 +1511,4 @@ def get_mm_mapping(self) -> MultiModelKeys: language_model="language_model", connector="model.visual.merger", tower_model="model.visual.", - ) \ No newline at end of file + ) From 9aea7373fffe71c239b08a89edbbc46bbb560f45 Mon Sep 17 00:00:00 2001 From: Simon Danielsson <70206058+simondanielsson@users.noreply.github.com> Date: Sun, 21 Sep 2025 13:36:47 +0200 Subject: [PATCH 208/518] [Bugfix] Typos in error message for missing model config file (#25339) Signed-off-by: simondanielsson --- vllm/transformers_utils/config.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/vllm/transformers_utils/config.py b/vllm/transformers_utils/config.py index cafc43f6b767..52e2c18a7784 100644 --- a/vllm/transformers_utils/config.py +++ b/vllm/transformers_utils/config.py @@ -524,10 +524,10 @@ def get_config( else: raise ValueError( "Could not detect config format for no config file found. " - "With config_format 'auto', ensure your model has either" - "config.json (HF format) or params.json (Mistral format)." - "Otherwise please specify your_custom_config_format" - "in engine args for customized config parser") + "With config_format 'auto', ensure your model has either " + "config.json (HF format) or params.json (Mistral format). " + "Otherwise please specify your_custom_config_format " + "in engine args for customized config parser.") except Exception as e: error_message = ( From 65a5910ce35f889740bddb2e19dad35c83278873 Mon Sep 17 00:00:00 2001 From: Cyrus Leung Date: Sun, 21 Sep 2025 19:41:02 +0800 Subject: [PATCH 209/518] [Optimization] Cache chat template result when processor fails to be loaded (#25341) Signed-off-by: DarkLight1337 --- vllm/entrypoints/chat_utils.py | 71 +++++++++++++++++++++++----------- 1 file changed, 49 insertions(+), 22 deletions(-) diff --git a/vllm/entrypoints/chat_utils.py b/vllm/entrypoints/chat_utils.py index c2c0ad74ef43..df49119d8642 100644 --- a/vllm/entrypoints/chat_utils.py +++ b/vllm/entrypoints/chat_utils.py @@ -421,6 +421,51 @@ def resolve_mistral_chat_template( return None +_PROCESSOR_CHAT_TEMPLATES = dict[tuple[str, bool], Optional[str]]() +""" +Used in `_try_get_processor_chat_template` to avoid calling +`cached_get_processor` again if the processor fails to be loaded. + +This is needed because `lru_cache` does not cache when an exception happens. +""" + + +def _try_get_processor_chat_template( + tokenizer: Union[PreTrainedTokenizer, PreTrainedTokenizerFast], + model_config: ModelConfig, +) -> Optional[str]: + cache_key = (tokenizer.name_or_path, model_config.trust_remote_code) + if cache_key in _PROCESSOR_CHAT_TEMPLATES: + return _PROCESSOR_CHAT_TEMPLATES[cache_key] + + try: + processor = cached_get_processor( + tokenizer.name_or_path, + processor_cls=( + PreTrainedTokenizer, + PreTrainedTokenizerFast, + ProcessorMixin, + ), + trust_remote_code=model_config.trust_remote_code, + ) + if ( + isinstance(processor, ProcessorMixin) + and hasattr(processor, "chat_template") + and (chat_template := processor.chat_template) is not None + ): + _PROCESSOR_CHAT_TEMPLATES[cache_key] = chat_template + return chat_template + except Exception: + logger.debug( + "Failed to load AutoProcessor chat template for %s", + tokenizer.name_or_path, + exc_info=True, + ) + + _PROCESSOR_CHAT_TEMPLATES[cache_key] = None + return None + + def resolve_hf_chat_template( tokenizer: Union[PreTrainedTokenizer, PreTrainedTokenizerFast], chat_template: Optional[str], @@ -434,28 +479,10 @@ def resolve_hf_chat_template( # 2nd priority: AutoProcessor chat template, unless tool calling is enabled if tools is None: - try: - processor = cached_get_processor( - tokenizer.name_or_path, - processor_cls=( - PreTrainedTokenizer, - PreTrainedTokenizerFast, - ProcessorMixin, - ), - trust_remote_code=model_config.trust_remote_code, - ) - if ( - isinstance(processor, ProcessorMixin) - and hasattr(processor, "chat_template") - and processor.chat_template is not None - ): - return processor.chat_template - except Exception: - logger.debug( - "Failed to load AutoProcessor chat template for %s", - tokenizer.name_or_path, - exc_info=True, - ) # noqa: E501 + chat_template = _try_get_processor_chat_template(tokenizer, + model_config) + if chat_template is not None: + return chat_template # 3rd priority: AutoTokenizer chat template try: From 26e673fe9303ad759a47e19a087764393c69109f Mon Sep 17 00:00:00 2001 From: Woosuk Kwon Date: Sun, 21 Sep 2025 08:52:15 -0700 Subject: [PATCH 210/518] [V0 Deprecation] Remove V0 Sequence class & Sampler (#25332) Signed-off-by: Woosuk Kwon Signed-off-by: Woosuk Kwon --- tests/conftest.py | 2 +- .../generation/test_granite_speech.py | 2 +- .../multimodal/generation/test_phi4mm.py | 2 +- .../multimodal/generation/test_pixtral.py | 2 +- .../generation/vlm_utils/model_utils.py | 2 +- .../multimodal/generation/vlm_utils/types.py | 2 +- tests/models/utils.py | 2 +- tests/tokenization/test_detokenize.py | 140 +- tests/tool_use/test_jamba_tool_parser.py | 2 +- tests/tool_use/test_qwen3coder_tool_parser.py | 2 +- tests/tool_use/test_seed_oss_tool_parser.py | 2 +- tests/tool_use/test_xlam_tool_parser.py | 2 +- tests/v1/engine/test_output_processor.py | 2 +- vllm/executor/executor_base.py | 2 +- vllm/executor/ray_distributed_executor.py | 2 +- vllm/inputs/__init__.py | 13 +- vllm/inputs/registry.py | 67 +- vllm/model_executor/__init__.py | 4 +- .../model_executor/layers/logits_processor.py | 91 -- vllm/model_executor/layers/sampler.py | 1198 --------------- vllm/model_executor/models/medusa.py | 60 +- vllm/model_executor/models/mlp_speculator.py | 80 +- vllm/model_executor/models/phi4flash.py | 6 +- vllm/model_executor/sampling_metadata.py | 594 +------- vllm/sequence.py | 1322 +---------------- vllm/transformers_utils/detokenizer.py | 162 -- vllm/worker/worker_base.py | 2 +- 27 files changed, 70 insertions(+), 3697 deletions(-) delete mode 100644 vllm/model_executor/layers/sampler.py delete mode 100644 vllm/transformers_utils/detokenizer.py diff --git a/tests/conftest.py b/tests/conftest.py index f14b1e8780ad..dc70c9835959 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -48,10 +48,10 @@ initialize_model_parallel) from vllm.inputs import TextPrompt from vllm.logger import init_logger +from vllm.logprobs import Logprob from vllm.multimodal.utils import fetch_image from vllm.outputs import RequestOutput from vllm.sampling_params import BeamSearchParams -from vllm.sequence import Logprob from vllm.transformers_utils.utils import maybe_model_redirect from vllm.utils import set_default_torch_num_threads diff --git a/tests/models/multimodal/generation/test_granite_speech.py b/tests/models/multimodal/generation/test_granite_speech.py index f2e6fbfad6e8..c1305e0ae31c 100644 --- a/tests/models/multimodal/generation/test_granite_speech.py +++ b/tests/models/multimodal/generation/test_granite_speech.py @@ -7,8 +7,8 @@ import pytest from transformers import AutoModelForSpeechSeq2Seq +from vllm.logprobs import SampleLogprobs from vllm.lora.request import LoRARequest -from vllm.sequence import SampleLogprobs from ....conftest import (AudioTestAssets, HfRunner, PromptAudioInput, VllmRunner) diff --git a/tests/models/multimodal/generation/test_phi4mm.py b/tests/models/multimodal/generation/test_phi4mm.py index 67d35213d642..77e2b90dd5e9 100644 --- a/tests/models/multimodal/generation/test_phi4mm.py +++ b/tests/models/multimodal/generation/test_phi4mm.py @@ -12,10 +12,10 @@ from transformers import AutoTokenizer from vllm.assets.image import ImageAsset +from vllm.logprobs import SampleLogprobs from vllm.lora.request import LoRARequest from vllm.multimodal.image import convert_image_mode, rescale_image_size from vllm.platforms import current_platform -from vllm.sequence import SampleLogprobs from ....conftest import (IMAGE_ASSETS, HfRunner, PromptAudioInput, PromptImageInput, VllmRunner) diff --git a/tests/models/multimodal/generation/test_pixtral.py b/tests/models/multimodal/generation/test_pixtral.py index cb3cc1d3d330..715b08ef90e5 100644 --- a/tests/models/multimodal/generation/test_pixtral.py +++ b/tests/models/multimodal/generation/test_pixtral.py @@ -13,8 +13,8 @@ from transformers import AutoProcessor from vllm import SamplingParams, TextPrompt, TokensPrompt +from vllm.logprobs import Logprob, SampleLogprobs from vllm.multimodal import MultiModalDataBuiltins -from vllm.sequence import Logprob, SampleLogprobs from ....utils import VLLM_PATH, large_gpu_test from ...utils import check_logprobs_close diff --git a/tests/models/multimodal/generation/vlm_utils/model_utils.py b/tests/models/multimodal/generation/vlm_utils/model_utils.py index 8b7d051218f1..ba55450ec8a9 100644 --- a/tests/models/multimodal/generation/vlm_utils/model_utils.py +++ b/tests/models/multimodal/generation/vlm_utils/model_utils.py @@ -19,7 +19,7 @@ GenerationConfig, GenerationMixin) from transformers.video_utils import VideoMetadata -from vllm.sequence import SampleLogprobs +from vllm.logprobs import SampleLogprobs from vllm.utils import is_list_of from .....conftest import HfRunner, ImageAsset, ImageTestAssets diff --git a/tests/models/multimodal/generation/vlm_utils/types.py b/tests/models/multimodal/generation/vlm_utils/types.py index 945113196088..e39ca40fbbf5 100644 --- a/tests/models/multimodal/generation/vlm_utils/types.py +++ b/tests/models/multimodal/generation/vlm_utils/types.py @@ -12,7 +12,7 @@ from transformers.models.auto.auto_factory import _BaseAutoModelClass from vllm.config import RunnerOption -from vllm.sequence import SampleLogprobs +from vllm.logprobs import SampleLogprobs from vllm.transformers_utils.tokenizer import AnyTokenizer from .....conftest import (AUDIO_ASSETS, IMAGE_ASSETS, HfRunner, ImageAsset, diff --git a/tests/models/utils.py b/tests/models/utils.py index 76c6e4823a12..5da2382cef81 100644 --- a/tests/models/utils.py +++ b/tests/models/utils.py @@ -12,7 +12,7 @@ from vllm.config import ModelConfig, ModelDType, RunnerOption from vllm.inputs import InputContext -from vllm.sequence import Logprob, PromptLogprobs, SampleLogprobs +from vllm.logprobs import Logprob, PromptLogprobs, SampleLogprobs from .registry import HF_EXAMPLE_MODELS diff --git a/tests/tokenization/test_detokenize.py b/tests/tokenization/test_detokenize.py index bd2b91073d56..fe6c313d2966 100644 --- a/tests/tokenization/test_detokenize.py +++ b/tests/tokenization/test_detokenize.py @@ -8,10 +8,7 @@ from transformers import (AutoTokenizer, PreTrainedTokenizer, PreTrainedTokenizerFast) -from vllm.inputs import token_inputs -from vllm.sequence import Logprob, SamplingParams, Sequence, SequenceGroup -from vllm.transformers_utils.detokenizer import Detokenizer -from vllm.transformers_utils.tokenizer import get_tokenizer +from vllm.sampling_params import SamplingParams from vllm.transformers_utils.tokenizers.mistral import MistralTokenizer from vllm.v1.engine import EngineCoreRequest from vllm.v1.engine.detokenizer import (FastIncrementalDetokenizer, @@ -217,138 +214,3 @@ def test_oov_decode(tokenizer, fast): assert decoded_text == '' assert out_ids == [len(tokenizer)] - - -@pytest.fixture -def detokenizer(tokenizer_name: str) -> Detokenizer: - tokenizer = get_tokenizer( - tokenizer_name, - tokenizer_mode="mistral" if "mistral" in tokenizer_name else "auto", - trust_remote_code=False, - revision=None, - ) - - return Detokenizer(tokenizer) - - -@pytest.fixture(name="complete_sequence_token_ids") -def create_complete_sequence_token_ids(complete_sequence: str, - tokenizer) -> list[int]: - return tokenizer(complete_sequence, add_special_tokens=False).input_ids - - -def create_sequence(prompt_token_ids=None): - prompt_token_ids = prompt_token_ids or [] - return Sequence( - seq_id=0, - inputs=token_inputs(prompt_token_ids), - block_size=16, - ) - - -def create_dummy_logprobs( - complete_sequence_token_ids: list[int]) -> list[dict[int, Logprob]]: - return [{ - token_id: Logprob(logprob=0.0), - token_id + 1: Logprob(logprob=0.1) - } for token_id in complete_sequence_token_ids] - - -def create_dummy_prompt_logprobs( - complete_sequence_token_ids: list[int] -) -> list[Optional[dict[int, Any]]]: - # logprob for the first prompt token is None. - logprobs: list[Optional[dict[int, Any]]] = [None] - logprobs.extend(create_dummy_logprobs(complete_sequence_token_ids)[1:]) - return logprobs - - -@pytest.mark.parametrize("complete_sequence", TRUTH) -@pytest.mark.parametrize("tokenizer_name", TOKENIZERS) -@pytest.mark.parametrize("skip_special_tokens", [True, False], indirect=True) -def test_decode_sequence_logprobs(complete_sequence: str, - complete_sequence_token_ids: list[int], - detokenizer: Detokenizer, - skip_special_tokens: bool): - """Verify Detokenizer decodes logprobs correctly.""" - sampling_params = SamplingParams(skip_special_tokens=skip_special_tokens, - logprobs=2) - - # Run sequentially. - seq = create_sequence() - dummy_logprobs = create_dummy_logprobs(complete_sequence_token_ids) - sequential_logprobs_text_chosen_token: list[str] = [] - sequential_logprobs_text_other_token: list[str] = [] - for new_token, logprobs in zip(complete_sequence_token_ids, - dummy_logprobs): - seq.append_token_id(new_token, logprobs) - detokenizer.decode_sequence_inplace(seq, sampling_params) - sequential_logprobs_text_chosen_token.append( - seq.output_logprobs[-1][new_token].decoded_token) - sequential_logprobs_text_other_token.append( - seq.output_logprobs[-1][new_token + 1].decoded_token) - sequential_result = seq.output_text - - assert sequential_result == "".join(sequential_logprobs_text_chosen_token) - assert sequential_result != "".join(sequential_logprobs_text_other_token) - - if not skip_special_tokens: - # Text for logprobs for the chosen token should be the same as the - # generated text. Note that this will only be true if we skip - # special tokens. - assert sequential_result == complete_sequence - - -@pytest.mark.parametrize("complete_sequence", TRUTH) -@pytest.mark.parametrize("tokenizer_name", TOKENIZERS) -def test_decode_prompt_logprobs(complete_sequence: str, - complete_sequence_token_ids: list[int], - detokenizer: Detokenizer): - - # We want to use skip_special_tokens=False here but Mistral tokenizers - # don't support that. - if complete_sequence not in SPECIAL_TOKS_TRUTH: - skip_special_tokens = True - elif not isinstance(detokenizer.tokenizer, MistralTokenizer): - skip_special_tokens = False - else: - pytest.skip("MistralTokenizers don't support " - "skip_special_tokens=False") - return - """Verify Detokenizer decodes prompt logprobs correctly.""" - sampling_params = SamplingParams(skip_special_tokens=skip_special_tokens, - prompt_logprobs=1) - - # Run sequentially. - seq = create_sequence(complete_sequence_token_ids) - seq_group = SequenceGroup(request_id="1", - seqs=[seq], - sampling_params=sampling_params, - arrival_time=0.0) - dummy_logprobs = create_dummy_prompt_logprobs(complete_sequence_token_ids) - detokenizer.decode_prompt_logprobs_inplace(seq_group, - dummy_logprobs, - position_offset=0) - # First logprob is None. - decoded_prompt_logprobs: list[dict[int, Any]] = dummy_logprobs[ - 1:] # type: ignore - - # decoded_prompt_logprobs doesn't contain the first token. - token_ids = complete_sequence_token_ids - tokenizer = detokenizer.tokenizer - text_full = tokenizer.decode(token_ids, - skip_special_tokens=skip_special_tokens) - text_first = tokenizer.decode(token_ids[0], - skip_special_tokens=skip_special_tokens) - text = text_full[len(text_first):] - - # Text for logprobs for the chosen token should be the same as the - # prompt text. Note that the first logprob is None. - assert text == "".join([ - logprobs[token_id].decoded_token - for token_id, logprobs in zip(token_ids[1:], decoded_prompt_logprobs) - ]) - assert text != "".join([ - logprobs[token_id + 1].decoded_token - for token_id, logprobs in zip(token_ids[1:], decoded_prompt_logprobs) - ]) diff --git a/tests/tool_use/test_jamba_tool_parser.py b/tests/tool_use/test_jamba_tool_parser.py index 35153139350b..57ace1fa22ac 100644 --- a/tests/tool_use/test_jamba_tool_parser.py +++ b/tests/tool_use/test_jamba_tool_parser.py @@ -12,7 +12,7 @@ from vllm.entrypoints.openai.protocol import (DeltaMessage, FunctionCall, ToolCall) from vllm.entrypoints.openai.tool_parsers import JambaToolParser -from vllm.transformers_utils.detokenizer import detokenize_incrementally +from vllm.transformers_utils.detokenizer_utils import detokenize_incrementally from vllm.transformers_utils.tokenizer import AnyTokenizer, get_tokenizer MODEL = "ai21labs/Jamba-tiny-dev" diff --git a/tests/tool_use/test_qwen3coder_tool_parser.py b/tests/tool_use/test_qwen3coder_tool_parser.py index ccb2acf512ca..f06fb2b9f2f0 100644 --- a/tests/tool_use/test_qwen3coder_tool_parser.py +++ b/tests/tool_use/test_qwen3coder_tool_parser.py @@ -13,7 +13,7 @@ ToolCall) from vllm.entrypoints.openai.tool_parsers.qwen3coder_tool_parser import ( Qwen3CoderToolParser) -from vllm.transformers_utils.detokenizer import detokenize_incrementally +from vllm.transformers_utils.detokenizer_utils import detokenize_incrementally from vllm.transformers_utils.tokenizer import AnyTokenizer, get_tokenizer MODEL = "Qwen/Qwen3-Coder-30B-A3B-Instruct-FP8" diff --git a/tests/tool_use/test_seed_oss_tool_parser.py b/tests/tool_use/test_seed_oss_tool_parser.py index c276a598aa68..118c7534622e 100644 --- a/tests/tool_use/test_seed_oss_tool_parser.py +++ b/tests/tool_use/test_seed_oss_tool_parser.py @@ -13,7 +13,7 @@ DeltaMessage, FunctionCall, ToolCall) from vllm.entrypoints.openai.tool_parsers import SeedOssToolParser -from vllm.transformers_utils.detokenizer import detokenize_incrementally +from vllm.transformers_utils.detokenizer_utils import detokenize_incrementally from vllm.transformers_utils.tokenizer import AnyTokenizer, get_tokenizer # Use a common model that is likely to be available diff --git a/tests/tool_use/test_xlam_tool_parser.py b/tests/tool_use/test_xlam_tool_parser.py index 0bc22e4f1031..c07ca0f56d6b 100644 --- a/tests/tool_use/test_xlam_tool_parser.py +++ b/tests/tool_use/test_xlam_tool_parser.py @@ -11,7 +11,7 @@ DeltaMessage, FunctionCall, ToolCall) from vllm.entrypoints.openai.tool_parsers import xLAMToolParser -from vllm.transformers_utils.detokenizer import detokenize_incrementally +from vllm.transformers_utils.detokenizer_utils import detokenize_incrementally from vllm.transformers_utils.tokenizer import AnyTokenizer, get_tokenizer # Use a common model that is likely to be available diff --git a/tests/v1/engine/test_output_processor.py b/tests/v1/engine/test_output_processor.py index a9632ce54eac..bdb40be99aa3 100644 --- a/tests/v1/engine/test_output_processor.py +++ b/tests/v1/engine/test_output_processor.py @@ -12,9 +12,9 @@ STOP_STRINGS, DummyOutputProcessorTestVectors, MockEngineCore) +from vllm.logprobs import PromptLogprobs, SampleLogprobs from vllm.outputs import CompletionOutput, RequestOutput from vllm.sampling_params import RequestOutputKind, SamplingParams -from vllm.sequence import PromptLogprobs, SampleLogprobs from vllm.transformers_utils.tokenizer import AnyTokenizer from vllm.v1.engine import EngineCoreRequest from vllm.v1.engine.output_processor import (OutputProcessor, diff --git a/vllm/executor/executor_base.py b/vllm/executor/executor_base.py index b75b94ad0acc..fd4b992c3821 100644 --- a/vllm/executor/executor_base.py +++ b/vllm/executor/executor_base.py @@ -15,10 +15,10 @@ from vllm.distributed.kv_transfer.kv_connector.utils import KVOutputAggregator from vllm.logger import init_logger from vllm.lora.request import LoRARequest -from vllm.model_executor.layers.sampler import SamplerOutput from vllm.sequence import ExecuteModelRequest, PoolerOutput from vllm.tasks import SupportedTask from vllm.utils import make_async +from vllm.v1.outputs import SamplerOutput from vllm.worker.worker_base import WorkerBase logger = init_logger(__name__) diff --git a/vllm/executor/ray_distributed_executor.py b/vllm/executor/ray_distributed_executor.py index 78d0ee6c1e3f..84747575b496 100644 --- a/vllm/executor/ray_distributed_executor.py +++ b/vllm/executor/ray_distributed_executor.py @@ -17,12 +17,12 @@ from vllm.executor.ray_utils import (RayWorkerWrapper, initialize_ray_cluster, ray) from vllm.logger import init_logger -from vllm.model_executor.layers.sampler import SamplerOutput from vllm.platforms import current_platform from vllm.ray.ray_env import get_env_vars_to_copy from vllm.sequence import ExecuteModelRequest from vllm.utils import (_run_task_with_lock, get_distributed_init_method, get_ip, get_open_port, make_async) +from vllm.v1.outputs import SamplerOutput if ray is not None: from ray.actor import ActorHandle diff --git a/vllm/inputs/__init__.py b/vllm/inputs/__init__.py index e9db2a0dc13a..46f49aaa013d 100644 --- a/vllm/inputs/__init__.py +++ b/vllm/inputs/__init__.py @@ -7,15 +7,7 @@ SingletonPrompt, TextPrompt, TokenInputs, TokensPrompt, build_explicit_enc_dec_prompt, embeds_inputs, to_enc_dec_tuple_list, token_inputs, zip_enc_dec_prompts) -from .registry import (DummyData, InputContext, InputProcessingContext, - InputRegistry) - -INPUT_REGISTRY = InputRegistry() -""" -The global [`InputRegistry`][vllm.inputs.registry.InputRegistry] which is used -by [`LLMEngine`][vllm.LLMEngine] to dispatch data processing according to the -target model. -""" +from .registry import InputContext, InputProcessingContext __all__ = [ "DataPrompt", @@ -36,9 +28,6 @@ "build_explicit_enc_dec_prompt", "to_enc_dec_tuple_list", "zip_enc_dec_prompts", - "INPUT_REGISTRY", - "DummyData", "InputContext", "InputProcessingContext", - "InputRegistry", ] diff --git a/vllm/inputs/registry.py b/vllm/inputs/registry.py index f0b392e9767a..b5316b6d0574 100644 --- a/vllm/inputs/registry.py +++ b/vllm/inputs/registry.py @@ -2,7 +2,7 @@ # SPDX-FileCopyrightText: Copyright contributors to the vLLM project from collections.abc import Mapping from dataclasses import dataclass -from typing import TYPE_CHECKING, Any, NamedTuple, Optional, Union +from typing import TYPE_CHECKING, Any, Union import torch from transformers import BatchFeature, PretrainedConfig, ProcessorMixin @@ -15,16 +15,9 @@ if TYPE_CHECKING: from vllm.config import ModelConfig - from vllm.multimodal import (MultiModalDataDict, MultiModalPlaceholderDict, - MultiModalRegistry) - from vllm.sequence import SequenceData from vllm.transformers_utils.tokenizer import AnyTokenizer else: ModelConfig = Any - MultiModalDataDict = Any - MultiModalPlaceholderDict = Any - MultiModalRegistry = Any - SequenceData = Any AnyTokenizer = Any _T = TypeVar("_T") @@ -191,61 +184,3 @@ def maybe_cast_dtype(x): f"on data={data} with kwargs={allowed_kwargs}") raise ValueError(msg) from exc - - -class DummyData(NamedTuple): - """ - Dummy data used for profiling. - - Note: This is only used in V0. - """ - - seq_data: SequenceData - multi_modal_data: Optional[MultiModalDataDict] = None - multi_modal_placeholders: Optional[MultiModalPlaceholderDict] = None - - -class InputRegistry: - """ - Note: This is only used in V0. - """ - - def dummy_data_for_profiling( - self, - model_config: ModelConfig, - seq_len: int, - mm_registry: MultiModalRegistry, - is_encoder_data: bool = False, - ) -> DummyData: - """ - Create dummy data for profiling the memory usage of a model. - - The model is identified by ``model_config``. - """ - # Avoid circular import - from vllm.multimodal.cache import processor_only_cache_from_config - from vllm.sequence import SequenceData - - if not model_config.is_multimodal_model: - seq_data = SequenceData.from_prompt_token_counts((0, seq_len)) - return DummyData(seq_data=seq_data) - - cache = processor_only_cache_from_config(model_config, mm_registry) - - # Encoder dummy data does not contain multi-modal data - if is_encoder_data: - enc_data = mm_registry.get_encoder_dummy_data(model_config, - seq_len, - cache=cache) - seq_data = SequenceData.from_seqs(enc_data.prompt_token_ids) - return DummyData(seq_data=seq_data) - - dec_data = mm_registry.get_decoder_dummy_data(model_config, - seq_len, - cache=cache) - - return DummyData( - seq_data=SequenceData.from_seqs(dec_data.prompt_token_ids), - multi_modal_data=dec_data.multi_modal_data.get_data(), - multi_modal_placeholders=dec_data.multi_modal_placeholders, - ) diff --git a/vllm/model_executor/__init__.py b/vllm/model_executor/__init__.py index 55dfe8088c8f..a59aebfac4ff 100644 --- a/vllm/model_executor/__init__.py +++ b/vllm/model_executor/__init__.py @@ -3,13 +3,11 @@ from vllm.model_executor.parameter import (BasevLLMParameter, PackedvLLMParameter) -from vllm.model_executor.sampling_metadata import (SamplingMetadata, - SamplingMetadataCache) +from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.model_executor.utils import set_random_seed __all__ = [ "SamplingMetadata", - "SamplingMetadataCache", "set_random_seed", "BasevLLMParameter", "PackedvLLMParameter", diff --git a/vllm/model_executor/layers/logits_processor.py b/vllm/model_executor/layers/logits_processor.py index 8a4ac214443e..8226437cb189 100644 --- a/vllm/model_executor/layers/logits_processor.py +++ b/vllm/model_executor/layers/logits_processor.py @@ -1,13 +1,10 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project """A layer that compute logits from hidden_stats.""" -import inspect -from concurrent.futures import ThreadPoolExecutor from typing import Optional import torch -import vllm.envs as envs from vllm.distributed import (tensor_model_parallel_all_gather, tensor_model_parallel_gather) from vllm.model_executor.custom_op import CustomOp @@ -16,11 +13,6 @@ from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.platforms import current_platform -_logits_processor_threadpool: Optional[ThreadPoolExecutor] = None -if envs.VLLM_LOGITS_PROCESSOR_THREADS is not None: - _logits_processor_threadpool = ThreadPoolExecutor( - envs.VLLM_LOGITS_PROCESSOR_THREADS) - @CustomOp.register("logits_processor") class LogitsProcessor(CustomOp): @@ -60,15 +52,10 @@ def forward( hidden_states: torch.Tensor, sampling_metadata: Optional[SamplingMetadata] = None, embedding_bias: Optional[torch.Tensor] = None, - prune_hidden_states: bool = True, ) -> Optional[torch.Tensor]: if self.logits_as_input: logits = hidden_states else: - if sampling_metadata is not None and prune_hidden_states: - hidden_states = _prune_hidden_states(hidden_states, - sampling_metadata) - # Get the logits for the next tokens. logits = self._get_logits(hidden_states, lm_head, embedding_bias) if logits is not None: @@ -79,12 +66,6 @@ def forward( if self.scale != 1.0: logits *= self.scale - - # Apply logits processors (if any). - if sampling_metadata is not None and \ - sampling_metadata.seq_groups is not None: - logits = _apply_logits_processors(logits, sampling_metadata) - return logits def _gather_logits(self, logits: torch.Tensor) -> torch.Tensor: @@ -125,75 +106,3 @@ def extra_repr(self) -> str: s += f", org_vocab_size={self.org_vocab_size}" s += f", scale={self.scale}, logits_as_input={self.logits_as_input}" return s - - -def _prune_hidden_states( - hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, -) -> torch.Tensor: - # NOTE(kzawora): The if guard is needed for Gaudi - in some scenarios - # (warmup, profile_run) we might not have selected_token_indices, - # so we skip pruning. - if sampling_metadata.selected_token_indices is not None: - return hidden_states.index_select( - 0, sampling_metadata.selected_token_indices) - else: - return hidden_states - - -def _apply_logits_processors( - logits: torch.Tensor, - sampling_metadata: SamplingMetadata, -) -> torch.Tensor: - found_logits_processors = False - logits_processed = 0 - logits_row_ids_and_logits_row_futures = [] - for seq_group in sampling_metadata.seq_groups: - seq_ids = seq_group.seq_ids - sampling_params = seq_group.sampling_params - logits_processors = sampling_params.logits_processors - if logits_processors: - found_logits_processors = True - - for seq_id, logits_row_idx in zip(seq_ids, - seq_group.sample_indices): - logits_row = logits[logits_row_idx] - past_tokens_ids = seq_group.seq_data[seq_id].output_token_ids - prompt_tokens_ids = seq_group.seq_data[seq_id].prompt_token_ids - - if _logits_processor_threadpool is not None: - logits_row_ids_and_logits_row_futures.append( - (logits_row_idx, - _logits_processor_threadpool.submit( - _apply_logits_processors_single_seq, logits_row, - logits_processors, past_tokens_ids, - prompt_tokens_ids))) - else: - logits[logits_row_idx] = \ - _apply_logits_processors_single_seq( - logits_row, logits_processors, past_tokens_ids, - prompt_tokens_ids) - - logits_processed += len(seq_group.sample_indices) + len( - seq_group.prompt_logprob_indices) - - for logits_row_idx, future in logits_row_ids_and_logits_row_futures: - logits[logits_row_idx] = future.result() - - if found_logits_processors: - # verifies that no rows in logits were missed unexpectedly - assert logits_processed == logits.shape[0] - return logits - - -def _apply_logits_processors_single_seq(logits_row, logits_processors, - past_tokens_ids, - prompt_tokens_ids) -> torch.Tensor: - for logits_processor in logits_processors: - parameters = inspect.signature(logits_processor).parameters - if len(parameters) == 3: - logits_row = logits_processor(prompt_tokens_ids, past_tokens_ids, - logits_row) - else: - logits_row = logits_processor(past_tokens_ids, logits_row) - return logits_row diff --git a/vllm/model_executor/layers/sampler.py b/vllm/model_executor/layers/sampler.py deleted file mode 100644 index 9d93cad2420a..000000000000 --- a/vllm/model_executor/layers/sampler.py +++ /dev/null @@ -1,1198 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project -"""A layer that samples the next tokens from the model's outputs.""" -import itertools -from collections.abc import Iterator -from dataclasses import dataclass -from importlib.util import find_spec -from math import inf -from typing import Optional, Union - -import msgspec -import torch -import torch.nn as nn - -import vllm.envs as envs -from vllm.logprobs import Logprob, PromptLogprobs, SampleLogprobs -from vllm.model_executor.layers.utils import apply_penalties -from vllm.model_executor.sampling_metadata import (SamplingMetadata, - SamplingTensors, - SequenceGroupToSample) -from vllm.sampling_params import SamplingType -from vllm.sequence import (VLLM_INVALID_TOKEN_ID, - CompletionSequenceGroupOutput, SequenceOutput) - -if envs.VLLM_USE_FLASHINFER_SAMPLER and find_spec("flashinfer"): - # yapf: disable - from flashinfer.sampling import ( - top_k_top_p_sampling_from_probs as flashinfer_top_k_top_p_sampling) - - # yapf: enable -else: - flashinfer_top_k_top_p_sampling = None - -from vllm.logger import init_logger - -logger = init_logger(__name__) - - -def get_sampler() -> torch.nn.Module: - if envs.VLLM_USE_V1: - # Lazy import: the v1 package isn't distributed - from vllm.v1.sample.sampler import Sampler as V1Sampler - return V1Sampler() - return Sampler() - - -# (num_token_ids, num_parent_ids) per sequence group. -SampleResultType = list[tuple[list[int], list[int]]] - -# Types of temporary data structures used for -# computing sample_result -SampleMetadataType = dict[SamplingType, tuple[list[int], - list[SequenceGroupToSample]]] -MultinomialSamplesType = dict[SamplingType, torch.Tensor] -SampleResultsDictType = dict[int, tuple[list[int], list[int]]] - - -# Encapsulates temporary data structures for computing -# sample_result. -# -# * For multi-step scheduling: must be returned -# by `Sampler.forward()` and used later to compute the pythonized -# sample_result -# -# * For single-step scheduling: consumed immediately -# inside `Sampler.forward()` to compute pythonized sample_result. -@dataclass -class SampleResultArgsType: - sample_metadata: SampleMetadataType - multinomial_samples: MultinomialSamplesType - sample_results_dict: SampleResultsDictType - sampling_metadata: SamplingMetadata - greedy_samples: Optional[torch.Tensor] - - -# Union of non-deferred (single-step scheduling) -# vs deferred (multi-step scheduling) -# sample result types -MaybeDeferredSampleResultType = Union[SampleResultType, SampleResultArgsType] - -# Abbreviation of the _sample() return type -SampleReturnType = tuple[MaybeDeferredSampleResultType, Optional[torch.Tensor]] - - -class SamplerOutput( - msgspec.Struct, - omit_defaults=True, # type: ignore[call-arg] - array_like=True): # type: ignore[call-arg] - """For each sequence group, we generate a list of SequenceOutput object, - each of which contains one possible candidate for the next token. - - This data structure implements methods, so it can be used like a list, but - also has optional fields for device tensors. - """ - - outputs: list[CompletionSequenceGroupOutput] - - # On-device tensor containing probabilities of each token. - sampled_token_probs: Optional[torch.Tensor] = None - - # On-device tensor containing the logprobs of each token. - logprobs: Optional["torch.Tensor"] = None - - # Holds either (1) the pythonized sampler result (single-step scheduling) - # or (2) what will be arguments for later deferred pythonization of the - # sampler result (muliti-step scheduling) - deferred_sample_results_args: Optional[SampleResultArgsType] = None - - # On-device tensor containing the sampled token ids. - sampled_token_ids: Optional[torch.Tensor] = None - # CPU tensor containing the sampled token ids. Used during multi-step to - # return the sampled token ids from last rank to AsyncLLMEngine to be - # 'broadcasted' to all other PP ranks for next step. - sampled_token_ids_cpu: Optional[torch.Tensor] = None - - # On-device tensor containing the sampled token embeddings (embeddings - # corresponding to the sampled token ids). Used when prompt embeddings are - # specified in lieu of prompt token ids or text. - sampled_token_embeds: Optional[torch.Tensor] = None - - # Optional last hidden states from the model. - hidden_states: Optional[torch.Tensor] = None - - # Optional prefill hidden states from the model - # (used for models like EAGLE). - prefill_hidden_states: Optional[torch.Tensor] = None - - # Time taken in the forward pass for this across all workers - model_forward_time: Optional[float] = None - - # Time taken in the model execute function. This will include model forward, - # block/sync across workers, cpu-gpu sync time and sampling time. - model_execute_time: Optional[float] = None - - def __getitem__(self, idx: int) -> CompletionSequenceGroupOutput: - return self.outputs[idx] - - def __setitem__(self, idx: int, value): - self.outputs[idx] = value - - def __iter__(self) -> Iterator[CompletionSequenceGroupOutput]: - return iter(self.outputs) - - def __len__(self): - return len(self.outputs) - - def __eq__(self, other: object): - return isinstance(other, - self.__class__) and self.outputs == other.outputs - - def __repr__(self) -> str: - """Show the shape of a tensor instead of its values to reduce noise. - """ - sampled_token_probs_repr = ("None" if self.sampled_token_probs is None - else self.sampled_token_probs.shape) - sampled_token_ids_repr = ("None" if self.sampled_token_ids is None else - self.sampled_token_ids.shape) - return (f"SamplerOutput(outputs={self.outputs}, " - f"sampled_token_probs={sampled_token_probs_repr}, " - f"sampled_token_ids={sampled_token_ids_repr})") - - -class Sampler(nn.Module): - """Samples the next tokens from the model's outputs. - - This layer does the following: - 1. Discard the hidden states that are not used for sampling (i.e., all - tokens except the final one in each prompt). - 2. Compute the logits for the next tokens. - 3. Apply presence, frequency and repetition penalties. - 4. Apply temperature scaling. - 5. Apply top-p and top-k truncation. - 6. Sample the next tokens. - Here, each sequence group within the batch can have different sampling - parameters (e.g., sampling method, temperature, top-p, top-k, etc.). - - The structure of the logits tensor is coupled with the seq_groups in - sampling_metadata. Typically, each sequence in each seq_group has one row in - logits for the next token to be sampled; however, for a seq_group with a - prompt request with the prompt_logprobs sampling parameter, there are rows - in logits for each token in the input prompt. - """ - - def __init__(self): - super().__init__() - - # Whether or not the SamplerOutput should have on-device tensors - # containing the sampled token ids and probabilities. This is used by - # speculative decoding and when prompt embeddings are specified. - self.include_gpu_probs_tensor = False - self.should_modify_greedy_probs_inplace = False - - def _init_sampling_tensors( - self, - logits: torch.Tensor, - sampling_metadata: SamplingMetadata, - ): - """The goal here is to reuse sampling tensors between similar decode - runs. This is possible because sampling logic does not change between - decodes of the same sequences. - """ - _, vocab_size = logits.shape - - # First free any existing stored sampling tensors. - # This is necessary because some sampling tensors may - # have pinned memory. - self._sampling_tensors = None - - # Initialize new sampling tensors - (sampling_tensors, do_penalties, do_top_p_top_k, - do_min_p) = SamplingTensors.from_sampling_metadata( - sampling_metadata, vocab_size, logits.device, logits.dtype) - - self._sampling_tensors = sampling_tensors - self._do_penalties = do_penalties - self._do_top_p_top_k = do_top_p_top_k - self._do_min_p = do_min_p - - def forward( - self, - logits: torch.Tensor, - sampling_metadata: SamplingMetadata, - ) -> Optional[SamplerOutput]: - """ - Single-step scheduling: - * Perform GPU-side sampling computation & compute - GPU-side logprobs tensor - * Pythonize sampling result & logprobs tensor - - Multi-step scheduling: - * Perform GPU-side sampling computation & compute - GPU-side logprobs tensor - * Defer Pythonization of sampling result & logprobs - tensor - * Encapsulate arguments required for deferred Pythonization - in the - [`SamplerOutput`][vllm.model_executor.layers.sampler.SamplerOutput] - structure - - Args: - logits: (num_tokens, vocab_size). - sampling_metadata: Metadata for sampling. - """ - assert logits is not None - _, vocab_size = logits.shape - - # Prepare sampling tensors with pinned memory to avoid blocking. - if not sampling_metadata.reuse_sampling_tensors: - self._init_sampling_tensors(logits, sampling_metadata) - elif self._do_penalties: - # In this case, the sampling tensors logic depends on - # "output_tokens" of a sequence. As a result, we cannot - # reuse sampling tensors, since "output_tokens" changes - # between decode runs. - self._init_sampling_tensors(logits, sampling_metadata) - - assert self._sampling_tensors is not None - sampling_tensors = self._sampling_tensors - do_penalties = self._do_penalties - do_top_p_top_k = self._do_top_p_top_k - do_min_p = self._do_min_p - - logits = _apply_min_tokens_penalty(logits, sampling_metadata) - - # Apply presence and frequency penalties. - if do_penalties: - logits = apply_penalties(logits, sampling_tensors.prompt_tokens, - sampling_tensors.output_tokens, - sampling_tensors.presence_penalties, - sampling_tensors.frequency_penalties, - sampling_tensors.repetition_penalties) - - # Use float32 to apply temperature scaling. - # Use in-place division to avoid creating a new tensor. - logits = logits.to(torch.float) - logits.div_(sampling_tensors.temperatures.unsqueeze(dim=1)) - - if do_top_p_top_k and flashinfer_top_k_top_p_sampling is None: - logits = _apply_top_k_top_p(logits, sampling_tensors.top_ps, - sampling_tensors.top_ks) - - if do_min_p: - logits = _apply_min_p(logits, sampling_tensors.min_ps) - - # We use float32 for probabilities and log probabilities. - # Compute the probabilities. - probs = torch.softmax(logits, dim=-1, dtype=torch.float) - # Compute the log probabilities. - logprobs = torch.log_softmax(logits, dim=-1, dtype=torch.float) - - # Sample the next tokens. - maybe_deferred_sample_results, maybe_sampled_tokens_tensor = _sample( - probs, - logprobs, - sampling_metadata, - sampling_tensors, - include_gpu_probs_tensor=self.include_gpu_probs_tensor, - modify_greedy_probs=self._should_modify_greedy_probs_inplace, - ) - - if self.include_gpu_probs_tensor: - # Since we will defer sampler result Pythonization, - # preserve GPU-side tensors in support of later - # deferred pythonization of logprobs - assert maybe_sampled_tokens_tensor is not None - on_device_tensors = (probs, logprobs, maybe_sampled_tokens_tensor) - else: - # Since Pythonization has already happened, don't preserve - # GPU-side tensors. - on_device_tensors = None - - # Get the logprobs query results. - prompt_logprobs = None - sample_logprobs = None - if not sampling_metadata.skip_sampler_cpu_output: - # Pythonize logprobs now (GPU -> CPU); do not defer. - assert not isinstance(maybe_deferred_sample_results, - SampleResultArgsType) - prompt_logprobs, sample_logprobs = get_logprobs( - logprobs, sampling_metadata, maybe_deferred_sample_results) - - return _build_sampler_output( - maybe_deferred_sample_results, - sampling_metadata, - prompt_logprobs, - sample_logprobs, - on_device_tensors=on_device_tensors, - skip_sampler_cpu_output=sampling_metadata.skip_sampler_cpu_output) - - @property - def _should_modify_greedy_probs_inplace(self) -> bool: - """Whether or not the sampler should modify the probability distribution - of greedily-sampled tokens such that multinomial sampling would sample - the greedily-sampled token. - - In other words, if True then we set the probability of the greedily- - sampled token to 1. - - This is used by speculative decoding, which requires that the sampling - method be encoded into the probability distribution. - """ - return self.should_modify_greedy_probs_inplace - - -def _apply_min_tokens_penalty( - logits: torch.Tensor, - sampling_metadata: SamplingMetadata, -) -> torch.Tensor: - """Apply min_tokens penalty which sets stop tokens to -inf if min_tokens - have not been generated yet - """ - # list of indices in logits that will be set to -inf - logits_to_penalize: list[tuple[int, int]] = [] - logits_applied = 0 - for seq_group in sampling_metadata.seq_groups: - seq_ids = seq_group.seq_ids - sampling_params = seq_group.sampling_params - - sample_indices = seq_group.sample_indices - logits_applied += len(sample_indices) + len( - seq_group.prompt_logprob_indices) - if not seq_group.do_sample: - continue - - start_idx = sample_indices[0] - min_tokens = sampling_params.min_tokens - token_ids_to_penalize = sampling_params.all_stop_token_ids - if min_tokens > 0 and token_ids_to_penalize: - seqs_to_penalize: list[int] = [] - for j, seq_id in enumerate(seq_ids): - seq_data = seq_group.seq_data[seq_id] - if len(seq_data.output_token_ids_array) < min_tokens: - seqs_to_penalize.append(j) - - if seqs_to_penalize: - # convert to the index into logits - seqs_to_penalize = [start_idx + j for j in seqs_to_penalize] - # itertools.product pairs each seq index with every token id - logits_to_penalize.extend( - itertools.product(seqs_to_penalize, token_ids_to_penalize)) - - if logits_to_penalize: - # use zip and * to group indices along each dimension - # eg. [ (1,2), (1,3), (5,6) ] -> ( (1,1,5), (2,3,6) ) - logits[tuple(zip(*logits_to_penalize))] = -float("inf") - - # verifies that no rows in logits were missed unexpectedly - assert logits_applied == logits.shape[0] - return logits - - -def _apply_top_k_top_p( - logits: torch.Tensor, - p: torch.Tensor, - k: torch.Tensor, -) -> torch.Tensor: - logits_sort, logits_idx = logits.sort(dim=-1, descending=False) - - # Apply top-k. - top_k_mask = logits_sort.size(1) - k.to(torch.long) - # Get all the top_k values. - top_k_mask = logits_sort.gather(1, top_k_mask.unsqueeze(dim=1)) - top_k_mask = logits_sort < top_k_mask - logits_sort.masked_fill_(top_k_mask, -float("inf")) - - # Apply top-p. - probs_sort = logits_sort.softmax(dim=-1) - probs_sum = probs_sort.cumsum(dim=-1) - top_p_mask = probs_sum <= 1 - p.unsqueeze(dim=1) - # at least one - top_p_mask[:, -1] = False - logits_sort.masked_fill_(top_p_mask, -float("inf")) - - # Re-sort the probabilities. - logits = torch.empty_like(logits_sort).scatter_(dim=-1, - index=logits_idx, - src=logits_sort) - return logits - - -def _apply_min_p( - logits: torch.Tensor, - min_p: torch.Tensor, -) -> torch.Tensor: - """ - Adapted from - https://github.com/oobabooga/text-generation-webui/blob/3146124ec01f02c8fb1650a6517cf1b60b537aaf/modules/sampler_hijack.py#L16C17-L16C17 - """ - probs = torch.softmax(logits, dim=-1) - top_probs, _ = probs.max(dim=-1, keepdim=True) - scaled_min_p = min_p.unsqueeze_(dim=1) * top_probs - tokens_to_remove = probs < scaled_min_p - logits = logits.masked_fill_(tokens_to_remove, -float("inf")) - - return logits - - -def _greedy_sample( - selected_seq_groups: list[SequenceGroupToSample], - samples: torch.Tensor, -) -> SampleResultType: - """Run greedy sampling on a given samples. - - Args: - selected_seq_groups: A list of sequence groups batched. - samples: (num_selected_samples,) A tensor of samples. The length of - samples could be smaller than selected_seq_groups if - seq_group.do_sample is False. - Returns: - Tuple of (next_token_ids, parent_ids). The length of returned list is - same as the length of selected_seq_groups. If the corresponding - seq_group has do_sample=False, tuple contains ([], []) - """ - samples_lst = samples.tolist() - sample_idx = 0 - results: SampleResultType = [] - for seq_group in selected_seq_groups: - if not seq_group.do_sample: - results.append(([], [])) - continue - - seq_ids = seq_group.seq_ids - num_parent_seqs = len(seq_ids) - assert num_parent_seqs == 1, ( - "Greedy sampling should have only one seq.") - parent_ids = list(range(num_parent_seqs)) - next_token_ids = [samples_lst[sample_idx]] - results.append((next_token_ids, parent_ids)) - sample_idx += num_parent_seqs - return results - - -def _random_sample( - selected_seq_groups: list[SequenceGroupToSample], - random_samples: torch.Tensor, -) -> SampleResultType: - """Run random sampling on a given samples. - - Args: - selected_seq_groups: A list of sequence groups batched. - random_samples: (num_selected_samples,) A tensor of samples. The - length of samples could be smaller than selected_seq_groups if - seq_group.do_sample is False. - Returns: - Tuple of (next_token_ids, parent_ids). The length of returned list is - same as the length of selected_seq_groups. If the corresponding - seq_group has do_sample=False, tuple contains ([], []) - """ - # Find the maximum n value of the prompt phase requests. - random_samples = random_samples.cpu() - sample_idx = 0 - results: SampleResultType = [] - for seq_group in selected_seq_groups: - if not seq_group.do_sample: - results.append(([], [])) - continue - - seq_ids = seq_group.seq_ids - sampling_params = seq_group.sampling_params - is_prompt = seq_group.is_prompt - num_parent_seqs = len(seq_ids) - if is_prompt: - # Prompt phase. - parent_ids = [0] * sampling_params.n - next_token_ids = random_samples[ - sample_idx, :sampling_params.n].tolist() - else: - # Generation phase. - parent_ids = list(range(num_parent_seqs)) - next_token_ids = random_samples[sample_idx:sample_idx + - num_parent_seqs, 0].tolist() - results.append((next_token_ids, parent_ids)) - sample_idx += num_parent_seqs - return results - - -# torch.multinomial forces a GPU<->CPU sync. -# Therefore, we use an optimized implementation instead. -# Note that we always sample with replacement. -# probs will be modified in place, but this is fine, as we pass -# in a copy already. -def _multinomial( - probs: torch.Tensor, - num_samples: int, - seq_groups: Optional[list[SequenceGroupToSample]] = None, -) -> torch.Tensor: - if num_samples > 1: - probs = probs.repeat_interleave(num_samples, dim=0) - q = torch.empty_like(probs) - if seq_groups is None: - q.exponential_() - else: - sample_idx = 0 - for seq_group in seq_groups: - seq_ids = seq_group.seq_ids - stride = len(seq_ids) * num_samples - assert seq_group.generator is not None - q[sample_idx:sample_idx + - stride].exponential_(generator=seq_group.generator) - sample_idx += stride - return probs.div_(q).argmax(dim=1).view(-1, num_samples) - - -def _top_k_top_p_multinomial_with_flashinfer( - probs: torch.Tensor, top_ks: torch.Tensor, top_ps: torch.Tensor, - num_samples: int, seq_groups: Optional[list[SequenceGroupToSample]]): - if num_samples > 1: - probs = probs.repeat_interleave(num_samples, dim=0) - top_ks = top_ks.repeat_interleave(num_samples) - top_ps = top_ps.repeat_interleave(num_samples) - batch_next_token_ids = flashinfer_top_k_top_p_sampling( - probs, - top_ks, - top_ps, - ) - return batch_next_token_ids.view(-1, num_samples) - - -def get_pythonized_sample_results( - sample_result_args: SampleResultArgsType) -> SampleResultType: - '''This function consumes GPU-side sampler results and computes - Pythonized CPU-side sampler results (GPU -> CPU sync.) - - Single-step scheduling: this function is invoked at sampling-time - for immediate Pythonization. - - Multi-step scheduling: Pythonization is deferred until after multiple - GPU-side steps have been completed. - - Args: - sample_result_args: GPU-side inputs to the Pythonization process - - Returns: - Pythonized sampler results - ''' - - ( - sample_metadata, - sampling_metadata, - greedy_samples, - multinomial_samples, - sample_results_dict, - ) = ( - sample_result_args.sample_metadata, - sample_result_args.sampling_metadata, - sample_result_args.greedy_samples, - sample_result_args.multinomial_samples, - sample_result_args.sample_results_dict, - ) - - for sampling_type in SamplingType: - if sampling_type not in sample_metadata: - continue - (seq_group_id, seq_groups) = sample_metadata[sampling_type] - if sampling_type == SamplingType.GREEDY: - sample_results = _greedy_sample(seq_groups, greedy_samples) - elif sampling_type in (SamplingType.RANDOM, SamplingType.RANDOM_SEED): - sample_results = _random_sample(seq_groups, - multinomial_samples[sampling_type]) - sample_results_dict.update(zip(seq_group_id, sample_results)) - - return [ - sample_results_dict.get(i, ([], [])) - for i in range(len(sampling_metadata.seq_groups)) - ] - - -def _sample_with_torch( - probs: torch.Tensor, - logprobs: torch.Tensor, - sampling_metadata: SamplingMetadata, - sampling_tensors: SamplingTensors, - include_gpu_probs_tensor: bool, - modify_greedy_probs: bool, -) -> SampleReturnType: - '''Torch-oriented _sample() implementation. - - Single-step scheduling: - * Perform GPU-side sampling computation - * Immediately Pythonize sampling result - - Multi-step scheduling: - * Perform GPU-side sampling computation - * Defer Pythonization & preserve GPU-side - tensors required for Pythonization - ''' - - categorized_seq_group_ids: dict[SamplingType, list[int]] = { - t: [] - for t in SamplingType - } - categorized_sample_indices = sampling_metadata.categorized_sample_indices - for i, seq_group in enumerate(sampling_metadata.seq_groups): - sampling_params = seq_group.sampling_params - sampling_type = sampling_params.sampling_type - categorized_seq_group_ids[sampling_type].append(i) - - sample_results_dict: SampleResultsDictType = {} - sample_metadata: SampleMetadataType = {} - multinomial_samples: MultinomialSamplesType = {} - greedy_samples: Optional[torch.Tensor] = None - - # Create output tensor for sampled token ids. - if include_gpu_probs_tensor: - sampled_token_ids_tensor = torch.full((logprobs.shape[0], 1), - VLLM_INVALID_TOKEN_ID, - dtype=torch.long, - device=logprobs.device) - else: - sampled_token_ids_tensor = None - - # Counterintuitively, having two loops here is actually faster. - # The first loop can run without waiting on GPU<->CPU sync. - for sampling_type in SamplingType: - sample_indices = categorized_sample_indices[sampling_type] - num_tokens = len(sample_indices) - if num_tokens == 0: - continue - - seq_group_id = categorized_seq_group_ids[sampling_type] - seq_groups = [sampling_metadata.seq_groups[i] for i in seq_group_id] - sample_metadata[sampling_type] = (seq_group_id, seq_groups) - long_sample_indices = sample_indices.long() - if sampling_type == SamplingType.GREEDY: - greedy_samples = torch.argmax(logprobs[long_sample_indices], - dim=-1) - - if sampled_token_ids_tensor is not None: - # Store sampled tokens in output tensor. - sampled_token_ids_tensor[ - long_sample_indices] = greedy_samples.unsqueeze(-1) - - if modify_greedy_probs: - # If required, modify the probabilities such that sampling from - # the modified distribution would always sample the argmax - # token id. - _modify_greedy_probs_inplace(logprobs, probs, - long_sample_indices, - greedy_samples) - - elif sampling_type in (SamplingType.RANDOM, SamplingType.RANDOM_SEED): - max_n_in_batch = 1 - for seq_group in seq_groups: - if seq_group.is_prompt: - sampling_params = seq_group.sampling_params - max_n_in_batch = max(max_n_in_batch, sampling_params.n) - seq_groups_arg = (None if sampling_type == SamplingType.RANDOM else - seq_groups) - - if flashinfer_top_k_top_p_sampling is not None: - logger.warning("FlashInfer 0.2.3+ does not support " - "per-request generators. Falling back to " - "PyTorch-native implementation.") - - multinomial_samples[sampling_type] = _multinomial( - probs[long_sample_indices], - max_n_in_batch, - seq_groups=seq_groups_arg) - - if sampled_token_ids_tensor is not None: - # Store sampled tokens in output tensor. - sampled_token_ids_tensor[long_sample_indices] = \ - multinomial_samples[sampling_type].to(torch.long) - - else: - raise ValueError(f"Unsupported sampling type: {sampling_type}") - - # Encapsulate arguments for computing Pythonized sampler - # results, whether deferred or otherwise. - maybe_deferred_args = SampleResultArgsType( - sampling_metadata=sampling_metadata, - sample_metadata=sample_metadata, - multinomial_samples=multinomial_samples, - greedy_samples=greedy_samples, - sample_results_dict=sample_results_dict) - - if not sampling_metadata.skip_sampler_cpu_output: - # GPU<->CPU sync happens here. - # This also converts the sampler output to a Python object. - # Return Pythonized sampler result & sampled token ids - return get_pythonized_sample_results( - maybe_deferred_args), sampled_token_ids_tensor - else: - # Defer sampler result Pythonization; return deferred - # Pythonization args & sampled token ids - return ( - maybe_deferred_args, - sampled_token_ids_tensor, - ) - - -def _sample( - probs: torch.Tensor, - logprobs: torch.Tensor, - sampling_metadata: SamplingMetadata, - sampling_tensors: SamplingTensors, - include_gpu_probs_tensor: bool, - modify_greedy_probs: bool, -) -> SampleReturnType: - """ - Args: - probs: (num_query_tokens_in_batch, num_vocab) - logprobs: (num_query_tokens_in_batch, num_vocab) - sampling_metadata: The metadata for a batch for sampling. - sampling_tensors: Tensors that include sampling related metadata. - - Returns: - (next_token_ids, parent_seq_ids) for each seq group in a batch. - If sampling is skipped, it returns ([], []) - sampled_token_ids_tensor: A tensor of sampled token ids. - """ - return _sample_with_torch( - probs, - logprobs, - sampling_metadata, - sampling_tensors, - include_gpu_probs_tensor=include_gpu_probs_tensor, - modify_greedy_probs=modify_greedy_probs, - ) - - -def _get_ranks(x: torch.Tensor, indices: torch.Tensor) -> torch.Tensor: - """ - This function calculates the ranks of the chosen tokens in a logprob tensor. - - Args: - x (torch.Tensor): 2D logprob tensor of shape (N, M) - where N is the no. of tokens and M is the vocab dim. - indices (torch.Tensor): List of chosen token indices. - - Returns: - torch.Tensor: 1D tensor of shape (N,) where N is the no. of tokens. - Each element in the returned tensor represents the rank - of the chosen token in the input logprob tensor. - """ - vals = x[torch.arange(0, len(x), device=x.device, dtype=indices.dtype), - indices] - result = (x > vals[:, None]) - del vals - return result.sum(1).add_(1) - - -def get_logprobs( - logprobs: torch.Tensor, - sampling_metadata: SamplingMetadata, - sample_results: SampleResultType, -) -> tuple[list[Optional[PromptLogprobs]], list[SampleLogprobs]]: - """Return sample logprobs and prompt logprobs. - - The logic consists of 3 parts. - - Select indices to compute logprob from, ranks of token ids, and - the top k token ids from logprobs. - - Compute prompt logprobs if required. - - Compute sample logprobs if required. - - Args: - logprobs: (num_query_tokens_across_batch, num_vocab). Each query token's - logprob per vocab. Sequence groups' query tokens are batched in a - single flattened tensor. For example, assuming there are N - seq groups, it is sorted by prefill tokens for seq_group_1 (if - prompt logprob is enabled), decode tokens for seq_group_1 (if - sampling is required), prefill tokens for seq_group_2, ... - sampling_metadata: The sampling metadata. - sample_results: (num_seq_groups) The tuple of (next_token_ids, - parent_ids) for each sequence group. When beam search is enabled, - sample_results can contain different number of seq_ids from - sampling_metadata.seq_groups. It is because beam search creates - 2 * BEAM_WIDTH number of samples (whereas there are only up to - BEAM_WIDTH number of seq_ids). - - Returns: - A tuple of prompt and sample logprobs per sequence group in a batch. - """ - # The index of query token to calculate logprobs. It includes both - # prompt and sample logprob indices. - query_indices: list[int] = [] - # The next token ids to get the logprob value from. - next_token_ids: list[int] = [] - # The largest requested number of logprobs. We find logprobs as many as the - # largest num logprobs in this API. If every logprobs is None, it will be - # set to -1. - largest_num_logprobs = -1 - - # Select indices to compute logprob from, ranks of token ids, and the top - # k token ids from logprobs. - for (seq_group, sample_result) in zip(sampling_metadata.seq_groups, - sample_results): - sampling_params = seq_group.sampling_params - - # Update indices and tokens for prompt logprobs. - if (seq_group.is_prompt - and sampling_params.prompt_logprobs is not None): - largest_num_logprobs = max(largest_num_logprobs, - sampling_params.prompt_logprobs) - next_prompt_tokens = _get_next_prompt_tokens(seq_group) - query_indices.extend(seq_group.prompt_logprob_indices) - next_token_ids.extend(next_prompt_tokens) - - # Update indices and next tokenes for sample logprob. - if seq_group.do_sample: - token_ids, parent_seq_ids = sample_result - # NOTE: We cannot directly use sample_indices because - # sample_indices only contain parent seq_ids of a previous step. - # The current step may have different number of seq_ids, and - # we can obtain it from `sample_result[1]`. - query_idx = seq_group.sample_indices[0] - query_indices.extend( - [query_idx + parent_id for parent_id in parent_seq_ids]) - next_token_ids.extend(token_ids) - - if sampling_params.logprobs is not None: - largest_num_logprobs = max(largest_num_logprobs, - sampling_params.logprobs) - - assert len(next_token_ids) == len(query_indices) - - if len(query_indices) == 0: - empty_sampled_logprob: SampleLogprobs = [] - empty_prompt_logprob: Optional[PromptLogprobs] = None - num_seq_groups = len(sampling_metadata.seq_groups) - return [empty_prompt_logprob - ] * num_seq_groups, [empty_sampled_logprob] * num_seq_groups - - selected_logprobs, ranks = None, None - top_logprobs, top_token_ids = None, None - - # If largest_num_logprobs == -1, i.e. no logprobs are requested, we can - # skip the whole logprob calculation. - if largest_num_logprobs >= 0: - query_indices_gpu = torch.tensor(query_indices, device=logprobs.device) - next_token_ids_gpu = torch.tensor(next_token_ids, - device=logprobs.device) - - # (num_selected_query_tokens, num_logprobs). Note that query_indices can - # contain duplicates if beam search is enabled. - selected_logprobs = logprobs[[ - query_indices_gpu, - next_token_ids_gpu, - ]] - ranks = _get_ranks( - logprobs[query_indices_gpu], - next_token_ids_gpu, - ) - assert selected_logprobs.shape[0] == ranks.shape[0] - - # We need to compute top k only if there exists logprobs > 0. - if largest_num_logprobs > 0: - # Logprobs of topk tokens for a batch of sequence groups. - # (num_query_tokens_across_batch). - top_logprobs, top_token_ids = torch.topk(logprobs, - largest_num_logprobs, - dim=-1) - top_logprobs = top_logprobs.to('cpu') - top_token_ids = top_token_ids.to('cpu') - - selected_logprobs = selected_logprobs.to('cpu') - ranks = ranks.to('cpu') - - # Find prompt/sample logprobs. - prompt_logprobs_per_seq_group: list[Optional[PromptLogprobs]] = [] - sample_logprobs_per_seq_group: list[SampleLogprobs] = [] - top_logprob_idx = 0 - selected_logprobs_idx = 0 - - for seq_group, sample_result in zip(sampling_metadata.seq_groups, - sample_results): - (prompt_logprobs, top_logprob_idx, - selected_logprobs_idx) = _get_prompt_logprob_if_needed( - seq_group, selected_logprobs, ranks, top_token_ids, top_logprobs, - selected_logprobs_idx, top_logprob_idx) - prompt_logprobs_per_seq_group.append(prompt_logprobs) - - (sampled_logprobs, top_logprob_idx, - selected_logprobs_idx) = _get_sampled_logprob_if_needed( - seq_group, sample_result, selected_logprobs, ranks, top_token_ids, - top_logprobs, selected_logprobs_idx, top_logprob_idx) - sample_logprobs_per_seq_group.append(sampled_logprobs) - - return prompt_logprobs_per_seq_group, sample_logprobs_per_seq_group - - -def _get_prompt_logprob_if_needed( - seq_group: SequenceGroupToSample, - selected_logprobs: torch.Tensor, - ranks: torch.Tensor, - top_token_ids: torch.Tensor, - top_logprobs: torch.Tensor, - selected_logprobs_idx: int, - top_logprob_idx: int, -): - """Compute the prompt logprob from a sequence group if needed.""" - sampling_params = seq_group.sampling_params - is_prompt = seq_group.is_prompt - - # Find prompt logprobs - prompt_logprobs: Optional[PromptLogprobs] = None - if is_prompt and sampling_params.prompt_logprobs is not None: - prompt_logprobs = [] - num_logprobs = sampling_params.prompt_logprobs - next_prompt_tokens = _get_next_prompt_tokens(seq_group) - # Pre-select indexes and create a list. It is faster than calling .item - # repetitively. - selected_logprob_items = selected_logprobs[ - selected_logprobs_idx:selected_logprobs_idx + - len(next_prompt_tokens)].tolist() - rank_items = ranks[selected_logprobs_idx:selected_logprobs_idx + - len(next_prompt_tokens)].tolist() - - for idx, token_id in enumerate(next_prompt_tokens): - # Calculate the prompt logprob of the real prompt tokens. - # {token_id: (logprob, rank_from_vocab)} - prompt_logprobs_dict: dict[int, tuple[float, int]] = { - token_id: (selected_logprob_items[idx], rank_items[idx]) - } - - # Add top K prompt logprobs along with its rank. - if num_logprobs > 0: - top_ids = top_token_ids[ - top_logprob_idx, :num_logprobs].tolist() - top_probs = top_logprobs[ - top_logprob_idx, :num_logprobs].tolist() - # Top K is already sorted by rank, so we can use 1 ~ - # num_logprobs + 1 for rank. - top_ranks = range(1, num_logprobs + 1) - prompt_logprobs_dict.update({ - top_id: (top_prob, rank) - for top_id, top_prob, rank in zip(top_ids, top_probs, - top_ranks) - }) - prompt_logprobs.append({ - token_id: Logprob(*logprob_and_rank) - for token_id, logprob_and_rank in prompt_logprobs_dict.items() - }) - # + 1 to go to the next prompt token. - top_logprob_idx += 1 - - # + len(next_prompt_tokens) to go to the next prompt. - selected_logprobs_idx += len(next_prompt_tokens) - return prompt_logprobs, top_logprob_idx, selected_logprobs_idx - - -def _get_sampled_logprob_if_needed( - seq_group: SequenceGroupToSample, - sample_result: tuple[list[int], list[int]], - selected_logprobs: torch.Tensor, - ranks: torch.Tensor, - top_token_ids: torch.Tensor, - top_logprobs: torch.Tensor, - selected_logprobs_idx: int, - top_logprob_idx: int, -): - """Compute the sample logprob if needed.""" - seq_ids = seq_group.seq_ids - num_logprobs = seq_group.sampling_params.logprobs - sampled_logprobs: SampleLogprobs = [] - next_token_ids, parent_seq_ids = sample_result - - if seq_group.do_sample: - assert len(next_token_ids) > 0 - if num_logprobs is None: - for next_token_id in next_token_ids: - # Use a dummy logprob - sampled_logprobs.append({next_token_id: Logprob(inf)}) - else: - # Pre-select items from tensor. tolist() is faster than repetitive - # `.item()` calls. - selected_logprob_items = selected_logprobs[ - selected_logprobs_idx:selected_logprobs_idx + - len(next_token_ids)].tolist() - rank_items = ranks[selected_logprobs_idx:selected_logprobs_idx + - len(next_token_ids)].tolist() - for idx, (next_token_id, parent_id) in enumerate( - zip(next_token_ids, parent_seq_ids)): - # Get the logprob of a sampled token. - sampled_logprobs_dict = { - next_token_id: - (selected_logprob_items[idx], rank_items[idx]) - } - if num_logprobs is not None and num_logprobs > 0: - # Get top K logprobs. - top_ids = top_token_ids[top_logprob_idx + - parent_id, :num_logprobs].tolist() - top_probs = top_logprobs[ - top_logprob_idx + parent_id, :num_logprobs].tolist() - # Top K is already sorted by rank, so we can use 1 ~ - # num_logprobs + 1 for rank. - top_ranks = range(1, num_logprobs + 1) - sampled_logprobs_dict.update({ - top_id: (top_prob, rank) - for top_id, top_prob, rank in zip( - top_ids, top_probs, top_ranks) - }) - - sampled_logprobs.append({ - token_id: Logprob(*logprob_and_rank) - for token_id, logprob_and_rank in - sampled_logprobs_dict.items() - }) - - # NOTE: This part of code is not intuitive. `selected_logprobs` include - # logprobs for the current step, which has len(next_token_ids) tokens - # per sequence group. `logprobs` includes logprobs from the previous - # steps, which has len(seq_ids) tokens per sequence group. - - # Iterate to the next sequence group in a batch. - selected_logprobs_idx += len(next_token_ids) - # Iterate to the next sequence group in a batch. - top_logprob_idx += len(seq_ids) - return sampled_logprobs, top_logprob_idx, selected_logprobs_idx - - -def _modify_greedy_probs_inplace(logprobs: torch.Tensor, probs: torch.Tensor, - sample_indices: torch.Tensor, - greedy_samples: torch.Tensor) -> None: - """Modify the probability distributions of the greedily-sampled tokens such - that each sampled token has a "probability" of 1.0. This is required by - speculative decoding, which depends on the sampling method being encoded - within the probability distribution for correctness. - - # Why do we only need to do this for greedy sampling? - - vLLM's sampler performs the following steps for greedy or multinomial - (random) sampling: - 1. Get logits from model. - 2. Modify logits according to per-sequence sampling parameters. - - Multiply by temperature, top-k and top-p masking, penalize tokens - according to their frequency, etc. - 3. Sample a token. - - Random sampling simply samples from the modified probability - distribution. - - Greedy sampling performs `argmax` to obtain the token with the - highest likelihood. - - Ignoring greedy sampling for a moment, we find that the computed probability - distribution has the following property: we can sample from it independently - and find that the token sampled by the Sampler has a frequency corresponding - to how often we see it in our sampling. In other words, for tokens sampled - with vLLM's random SamplingType, the computed probability distribution - encodes the sampling methodology completely. - - Greedy sampling does not normally have this property. vLLM modifies logits - according to sampling params, then performs `argmax`, then returns the - sampled token and the computed probability distribution. If we sample from - the distribution, we'll find the likelihood of the greedily-sampled token - is not always 1.0. - - Since lossless speculative decoding requires that the sampling methodology - be encoded within the probability distribution, we are motivated to modify - the probability distribution such that the sampled token has probability 1 - when speculative decoding is used. - - NOTE: Alternatively, we could use an extremely low temperature to achieve - greedy sampling using multinomial computation and unite the codepaths. This - has implications on the overall design of the sampler, e.g. how to record - accurate logprobs for the user, so this improvement is deferred to later. - """ - # NOTE: logprobs are not modified so they can be returned to the user. - probs[sample_indices, :] = 0 - probs[sample_indices, greedy_samples] = 1.0 - - -def _build_sampler_output( - maybe_deferred_sample_results: MaybeDeferredSampleResultType, - sampling_metadata: SamplingMetadata, - prompt_logprobs: Optional[list[Optional[PromptLogprobs]]], - sample_logprobs: Optional[list[SampleLogprobs]], - on_device_tensors: Optional[tuple[torch.Tensor, torch.Tensor, - torch.Tensor]], - skip_sampler_cpu_output: bool = False, -) -> SamplerOutput: - """Construct Python objects with the output of sampling. - - Args: - on_device_tensors: Tuple containing on-device tensors with the - probabilities used in sampling and the sampled token ids. This - allows post-processing without copies to CPU/serialization, e.g. in - speculative decoding rejection sampling. - """ - sampler_output: list[CompletionSequenceGroupOutput] = [] - - if skip_sampler_cpu_output: - assert isinstance(maybe_deferred_sample_results, SampleResultArgsType) - deferred_sample_results_args = maybe_deferred_sample_results - else: - assert prompt_logprobs is not None - assert sample_logprobs is not None - assert not isinstance(maybe_deferred_sample_results, - SampleResultArgsType) - assert len(sampling_metadata.seq_groups) \ - == len(maybe_deferred_sample_results) \ - == len(prompt_logprobs) \ - == len(sample_logprobs) - deferred_sample_results_args = None - - for (seq_group, sample_result, group_prompt_logprobs, - group_sample_logprobs) in zip(sampling_metadata.seq_groups, - maybe_deferred_sample_results, - prompt_logprobs, sample_logprobs): - seq_ids = seq_group.seq_ids - next_token_ids, parent_ids = sample_result - seq_outputs: list[SequenceOutput] = [] - for parent_id, next_token_id, logprobs in zip( - parent_ids, next_token_ids, group_sample_logprobs): - seq_outputs.append( - SequenceOutput(seq_ids[parent_id], next_token_id, - logprobs)) - sampler_output.append( - CompletionSequenceGroupOutput(seq_outputs, - group_prompt_logprobs)) - - # If not specified, store None values in SamplerOutput. - if on_device_tensors is not None: - (sampled_token_probs, logprobs_tensor, - sampled_token_ids) = on_device_tensors - else: - sampled_token_probs, logprobs_tensor, sampled_token_ids = (None, None, - None) - - return SamplerOutput( - outputs=sampler_output, - sampled_token_probs=sampled_token_probs, - sampled_token_ids=sampled_token_ids, - logprobs=logprobs_tensor, - deferred_sample_results_args=deferred_sample_results_args) - - -def _get_next_prompt_tokens( - seq_group: SequenceGroupToSample) -> tuple[int, ...]: - """Get a list of next prompt tokens to compute logprob from a - given sequence group. - - It is used to compute prompt logprob. Imagine you have logprob for each - query token. Query token needs to know the next prompt token id to compute - prompt logprob. This is a helper to obtain next prompt token ids. - - This API has to be used only when the caller knows seq_group is in prefill - stage. - - Returns: - A list of next prompt tokens to compute logprob. - """ - assert seq_group.is_prompt, ( - "Caller should ensure the sequence group is in a prefill stage.") - seq_ids = seq_group.seq_ids - query_len = seq_group.query_len - assert query_len is not None - # prompt has only 1 seq id. - assert len(seq_ids) == 1 - seq_data = seq_group.seq_data[seq_ids[0]] - computed_len = seq_data.get_num_computed_tokens() - prompt_tokens = seq_data.prompt_token_ids - # +1 because we are looking for a next prompt token. - next_token_index_start = computed_len + 1 - next_token_index_end = min(computed_len + query_len + 1, - len(prompt_tokens)) - next_prompt_tokens = prompt_tokens[ - next_token_index_start:next_token_index_end] - return next_prompt_tokens diff --git a/vllm/model_executor/models/medusa.py b/vllm/model_executor/models/medusa.py index 6ba8ad372c95..b0a96fca2ff8 100644 --- a/vllm/model_executor/models/medusa.py +++ b/vllm/model_executor/models/medusa.py @@ -2,18 +2,15 @@ # SPDX-FileCopyrightText: Copyright contributors to the vLLM project from collections.abc import Iterable -from typing import Optional import torch import torch.nn as nn from vllm.config import VllmConfig from vllm.model_executor.layers.logits_processor import LogitsProcessor -from vllm.model_executor.layers.sampler import SamplerOutput from vllm.model_executor.layers.vocab_parallel_embedding import ( DEFAULT_VOCAB_PADDING_SIZE, ParallelLMHead) from vllm.model_executor.model_loader.weight_utils import default_weight_loader -from vllm.model_executor.sampling_metadata import SamplingMetadata from .utils import maybe_prefix @@ -105,8 +102,10 @@ def forward(self, hidden_states: torch.Tensor) -> list[torch.Tensor]: return [block(hidden_states) for block in self.blocks] def compute_logits( - self, hidden_states: list[torch.Tensor], - sampling_metadata: SamplingMetadata) -> list[torch.Tensor]: + self, + hidden_states: list[torch.Tensor], + sampling_metadata, + ) -> list[torch.Tensor]: logits_lst: list[torch.Tensor] = [] for hs, lm_head in zip(hidden_states, self.lm_heads): @@ -130,57 +129,6 @@ def compute_logits( return logits_lst - def sample( - self, - logits: list[torch.Tensor], - sampling_metadata: SamplingMetadata, - ) -> list[SamplerOutput]: - logits = torch.stack(logits, dim=0).float() - logprobs = torch.log_softmax(logits, dim=-1) - token_ids = logits.argmax(-1) # support only top-1 for now - probs = torch.softmax(logits, dim=-1) - - token_id_list = [] - token_prob_list = [] - token_logprob_list = [] - - for idx, seq_group in enumerate(sampling_metadata.seq_groups): - token_id_list.append(token_ids[:, seq_group.sample_indices]) - token_prob_list.append(probs[:, seq_group.sample_indices]) - token_logprob_list.append(logprobs[:, seq_group.sample_indices]) - - outputs: list[Optional[SamplerOutput]] = [] - for idx in range(len(sampling_metadata.seq_groups)): - outputs.append( - SamplerOutput( - outputs=None, - sampled_token_probs=token_prob_list[idx].squeeze(1), - logprobs=token_logprob_list[idx].squeeze(1), - sampled_token_ids=token_id_list[idx].squeeze(1), - )) - - return outputs - - def generate_proposals( - self, - previous_hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, - ) -> Optional[list[SamplerOutput]]: - # During preemption, we may receive an empty tensor (batch_size=0) - if previous_hidden_states.size(0) == 0: - # Return None to signal the Top1Proposer that no proposals - # were generated for this batch, allowing it to handle this - # special case appropriately - return None - - return self.sample( - logits=self.compute_logits( - hidden_states=self.forward(previous_hidden_states), - sampling_metadata=sampling_metadata, - ), - sampling_metadata=sampling_metadata, - ) - def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]: params_dict = dict(self.named_parameters()) diff --git a/vllm/model_executor/models/mlp_speculator.py b/vllm/model_executor/models/mlp_speculator.py index c6a97388dc18..d057eb49a62d 100644 --- a/vllm/model_executor/models/mlp_speculator.py +++ b/vllm/model_executor/models/mlp_speculator.py @@ -8,9 +8,7 @@ import torch.nn as nn from vllm.config import VllmConfig -from vllm.model_executor import SamplingMetadata from vllm.model_executor.layers.logits_processor import LogitsProcessor -from vllm.model_executor.layers.sampler import SamplerOutput, get_sampler from vllm.model_executor.layers.vocab_parallel_embedding import ( ParallelLMHead, VocabParallelEmbedding) from vllm.model_executor.model_loader.weight_utils import default_weight_loader @@ -141,55 +139,57 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = "") -> None: self.config = config self.logits_processor = LogitsProcessor(config.vocab_size, config.vocab_size, 1.0) - self.sampler = get_sampler() - def generate_proposals( - self, - input_ids: torch.Tensor, - previous_hidden_states: torch.Tensor, - num_predict_tokens: int, - sampling_metadata: SamplingMetadata, - ) -> list[SamplerOutput]: - if num_predict_tokens > self.max_speculative_tokens: - raise ValueError(f"Max speculative tokens for model is " - f"{self.max_speculative_tokens}, but " - f"{num_predict_tokens} were requested") - - # b x 1 x d - previous_hidden_states = previous_hidden_states.unsqueeze(1) + # NOTE(woosuk): This method is commented out because it is old code + # using V0. We should either port it to V1 or remove it. - if self.scale_input: - previous_hidden_states = self.ln0(previous_hidden_states) / SQRT2 + # def generate_proposals( + # self, + # input_ids: torch.Tensor, + # previous_hidden_states: torch.Tensor, + # num_predict_tokens: int, + # sampling_metadata: SamplingMetadata, + # ) -> list[SamplerOutput]: + # if num_predict_tokens > self.max_speculative_tokens: + # raise ValueError(f"Max speculative tokens for model is " + # f"{self.max_speculative_tokens}, but " + # f"{num_predict_tokens} were requested") + + # # b x 1 x d + # previous_hidden_states = previous_hidden_states.unsqueeze(1) + + # if self.scale_input: + # previous_hidden_states = self.ln0(previous_hidden_states) / SQRT2 - # b x 1 - last_tokens = input_ids.unsqueeze(1) + # # b x 1 + # last_tokens = input_ids.unsqueeze(1) - next_tokens = [] + # next_tokens = [] - for head_index in range(num_predict_tokens): + # for head_index in range(num_predict_tokens): - # Project and predict - z = self.emb[head_index](last_tokens) # b k d - states = self.proj[head_index](previous_hidden_states) + # # Project and predict + # z = self.emb[head_index](last_tokens) # b k d + # states = self.proj[head_index](previous_hidden_states) - # Weighted add of state_weight*state and emb_weight*z - # Let subsequent LN take care of denominator - # state_weight is close to 1, so shouldn't be any precision issues - states.add_(z, alpha=self.emb_weight / self.state_weight) + # # Weighted add of state_weight*state and emb_weight*z + # # Let subsequent LN take care of denominator + # # state_weight is close to 1, so shouldn't be any precision issues + # states.add_(z, alpha=self.emb_weight / self.state_weight) - states = self.activation(self.ln[head_index](states)) # b k d - previous_hidden_states = states - # TODO: not yet supporting top_k_tokens_per_head - states = states.flatten(0, 1) + # states = self.activation(self.ln[head_index](states)) # b k d + # previous_hidden_states = states + # # TODO: not yet supporting top_k_tokens_per_head + # states = states.flatten(0, 1) - logits = self.logits_processor(self.head[head_index], states, - sampling_metadata) + # logits = self.logits_processor(self.head[head_index], states, + # sampling_metadata) - output = self.sampler(logits, sampling_metadata) - last_tokens = output.sampled_token_ids - next_tokens.append(output) + # output = self.sampler(logits, sampling_metadata) + # last_tokens = output.sampled_token_ids + # next_tokens.append(output) - return next_tokens + # return next_tokens def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]: diff --git a/vllm/model_executor/models/phi4flash.py b/vllm/model_executor/models/phi4flash.py index c4548ee168bd..aa7c434a44ae 100644 --- a/vllm/model_executor/models/phi4flash.py +++ b/vllm/model_executor/models/phi4flash.py @@ -697,16 +697,12 @@ def compute_logits( hidden_states: torch.Tensor, sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - # If the shape is the same, it means that we have already - # prune hidden states manually. - prune_hidden_states = hidden_states.size( - 0) != sampling_metadata.selected_token_indices.size(0) processed_logits = self.logits_processor( self.lm_head, hidden_states, sampling_metadata, self.embedding_bias, - prune_hidden_states=prune_hidden_states) + ) return processed_logits def load_weights( diff --git a/vllm/model_executor/sampling_metadata.py b/vllm/model_executor/sampling_metadata.py index 2315f9dad5a5..8c4548ff7f7d 100644 --- a/vllm/model_executor/sampling_metadata.py +++ b/vllm/model_executor/sampling_metadata.py @@ -1,597 +1,7 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -from array import array -from dataclasses import dataclass -from typing import Optional - -import torch - -from vllm.sampling_params import SamplingParams, SamplingType -from vllm.sequence import (VLLM_TOKEN_ID_ARRAY_TYPE, SequenceData, - SequenceGroupMetadata) -from vllm.utils import (PyObjectCache, async_tensor_h2d, - is_pin_memory_available, make_tensor_with_pad) - -_SAMPLING_EPS = 1e-5 - - -@dataclass -class SequenceGroupToSample: - # |---------- N-1 iteration --------| - # |---------------- N iteration ---------------------| - # |- tokenA -|......................|-- newTokens ---| - # |---------- context_len ----------| - # |-------------------- seq_len ----------------------| - # |-- query_len ---| - - # Sequence ids for the sequence group in a previous step. - seq_ids: list[int] - sampling_params: SamplingParams - # seq_id -> sequence data. - seq_data: dict[int, SequenceData] - # The length of the sequence (all tokens seen in the past + new token to - # compute attention) of the sequence group. None if it is in a decode - # stage. - seq_len: Optional[int] - # The length of new query tokens to compute in the current step. None if it - # is in a decode stage. The length of query_len <= seq_len if chunked - # prefill is enabled. - query_len: Optional[int] - # A random number generator for sampling. - generator: Optional[torch.Generator] - # True if the sequence group is in prefill stage. False if it is in a - # decode stage. - is_prompt: bool - # Query token indices from logits. to compute prompt logprob. Empty if - # prompt logprob is not required. - prompt_logprob_indices: list[int] - # Sample token indices from logits. Empty if sampling is not required. - sample_indices: list[int] - - @property - def do_sample(self): - return len(self.sample_indices) > 0 - - def __post_init__(self): - if len(self.prompt_logprob_indices) > 0: - assert self.sampling_params.prompt_logprobs is not None - if self.is_prompt: - assert self.seq_len is not None - assert self.query_len is not None - - -def gen_seq_group_to_sample_builder(num_seqs: int): - return lambda: SequenceGroupToSample( - seq_ids=[0] * num_seqs, - sampling_params=None, - seq_data=None, # type: ignore - seq_len=0, - query_len=0, - generator=None, - is_prompt=True, - prompt_logprob_indices=[], - sample_indices=[], - ) - - -class SamplingMetadataCache: - """Used to cache SamplingMetadata objects between scheduler iterations""" - - def __init__(self): - self._seq_group_to_sample_cache: dict[int, PyObjectCache] = {} - - def get_cached_seq_group_to_sample(self, num_seqs): - if num_seqs not in self._seq_group_to_sample_cache: - self._seq_group_to_sample_cache[num_seqs] = PyObjectCache( - gen_seq_group_to_sample_builder(num_seqs)) - - obj = self._seq_group_to_sample_cache[num_seqs].get_object() - return obj - - def reset(self): - for cache in self._seq_group_to_sample_cache.values(): - cache.reset() - class SamplingMetadata: - """Metadata for input sequences. Used in sampler. - - The usage is as follows; - ``` - hidden_states = execute_model(...) - logits = hidden_states[sampling_metadata.selected_token_indices] - sample(logits) - - def sample(logits): - # Use categorized_sample_indices for sampling.... - ``` - - Args: - seq_groups: List of batched sequence groups. - selected_token_indices: (num_query_tokens_to_logprob). Indices to find - logits from the initial model output hidden states. - categorized_sample_indices: SamplingType -> token indices to sample. - Each token indices is 2D tensor of (num_indices, num_indices) where - the first item means the sample index within the returned logit - (before pruning padding), and the second item means the sample - index after pruning using selected_token_indices. - For example, if the returned logit is [1, 2, 3], and we select - [1, 2] for sampling, the pruned logit will be [2, 3]. In this case, - The first tuple is [1, 2] (sampled index within original logit), - and the second tuple is [0, 1] (sampled index within pruned logit). - num_prompts: Number of prompt sequence groups in seq_groups. - skip_sampler_cpu_output: Indicates if we want to skip the GPU=>CPU - serialization of token outputs. - reuse_sampling_tensors: Indicates if we want to reuse sampling - tensors that are part of the sampler forward pass. Currently, - it is mainly used for multi-step decode. - - """ - - def __init__( - self, - seq_groups: list[SequenceGroupToSample], - selected_token_indices: torch.Tensor, - categorized_sample_indices: dict[SamplingType, torch.Tensor], - num_prompts: int, - skip_sampler_cpu_output: bool = False, - reuse_sampling_tensors: bool = False, - ) -> None: - self.seq_groups = seq_groups - self.selected_token_indices = selected_token_indices - self.categorized_sample_indices = categorized_sample_indices - self.num_prompts = num_prompts - self.skip_sampler_cpu_output = skip_sampler_cpu_output - self.reuse_sampling_tensors = reuse_sampling_tensors - - @staticmethod - def prepare( - seq_group_metadata_list: list[SequenceGroupMetadata], - seq_lens: list[int], - query_lens: list[int], - device: str, - pin_memory: bool, - generators: Optional[dict[str, torch.Generator]] = None, - cache: Optional[SamplingMetadataCache] = None, - ) -> "SamplingMetadata": - ( - seq_groups, - selected_token_indices, - categorized_sample_indices, - num_prompts, - ) = _prepare_seq_groups(seq_group_metadata_list, seq_lens, query_lens, - device, generators, cache) - selected_token_indices = async_tensor_h2d( - selected_token_indices, - dtype=torch.long, - target_device=device, - pin_memory=pin_memory, - ) - categorized_sample_indices = { - t: - async_tensor_h2d( - seq_ids, - dtype=torch.int, - target_device=device, - pin_memory=pin_memory, - ) - for t, seq_ids in categorized_sample_indices.items() - } - - sampling_metadata = SamplingMetadata( - seq_groups=seq_groups, - selected_token_indices=selected_token_indices, - categorized_sample_indices=categorized_sample_indices, - num_prompts=num_prompts, - ) - return sampling_metadata - - def __repr__(self) -> str: - return ( - "SamplingMetadata(" - f"seq_groups={self.seq_groups}, " - f"selected_token_indices={self.selected_token_indices}, " - f"categorized_sample_indices={self.categorized_sample_indices})") - - -def _prepare_seq_groups( - seq_group_metadata_list: list[SequenceGroupMetadata], - seq_lens: list[int], - query_lens: list[int], - device: str, - generators: Optional[dict[str, torch.Generator]] = None, - cache: Optional[SamplingMetadataCache] = None, -) -> tuple[ - list[SequenceGroupToSample], - list[int], - dict[SamplingType, list[int]], - int, -]: - """Prepare sequence groups and indices for sampling. - - Args: - seq_group_metadata_list: A list of sequence group to batch. - seq_lens: A list of sequence lens per sequence group. - Index of prompt len should match with seq_group_metadata_list. - query_lens: A list of query lengths. Prompt lens include the length - of entire prompt tokens, and it could be shorter. - device: A device to use for random number generators, - `SequenceGroupToSample.generator`. - generators: A store of per-request random number generators used - for seeded requests. - - Returns: - seq_groups: A list of sequence group to sample. - selected_token_indices: See the definition from `SamplingMetadata`. - categorized_sample_indices: See the definition from `SamplingMetadata`. - num_prompts: Total number of prompts from `seq_group_metadata_list`. - """ - # Batched sequence groups for the current model forward stsep. - seq_groups: list[SequenceGroupToSample] = [] - # A list of token indices to sample/compute logprob. It is used to - # prune the outcome logits from the model for the performance. - selected_token_indices: list[int] = [] - # Used for selected_token_indices. - model_output_idx = 0 - - # Sampling type -> ( - # indices to sample/prompt logprob within pruned output logits, - # indices to sample within pruned logits) - categorized_sample_indices: dict[SamplingType, list[int]] = { - t: [] - for t in SamplingType - } - # Index of logits to compute logprob. Logits include both prompt logprob - # and sample logprob indices. - logit_idx = 0 - # Total number of prompts from given sequence groups. - num_prompts = 0 - - for i, seq_group_metadata in enumerate(seq_group_metadata_list): - seq_ids = seq_group_metadata.seq_data.keys() - - if cache is not None: - sample_obj = cache.get_cached_seq_group_to_sample(len(seq_ids)) - - for j, seq_id in enumerate(seq_ids): - sample_obj.seq_ids[j] = seq_id - - sample_obj.prompt_logprob_indices.clear() - sample_obj.sample_indices.clear() - - sampling_params = seq_group_metadata.sampling_params - is_prompt = seq_group_metadata.is_prompt - generator: Optional[torch.Generator] = None - # If the current seq group is in decode stage, it is None. - seq_len: Optional[int] = None - query_len: Optional[int] = None - prompt_logprob_indices: list[int] = (sample_obj.prompt_logprob_indices - if cache is not None else []) - sample_indices: list[int] = (sample_obj.sample_indices - if cache is not None else []) - do_sample = seq_group_metadata.do_sample - - if seq_group_metadata.is_prompt: - if sampling_params.seed is not None: - generator = torch.Generator(device=device).manual_seed( - sampling_params.seed) - if generators is not None: - generators[seq_group_metadata.request_id] = generator - - num_prompts += 1 - num_prefill_sample = len(seq_ids) - assert num_prefill_sample == 1 - assert query_lens is not None and seq_lens is not None - query_len, seq_len = query_lens[i], seq_lens[i] - # If we need sampling, exclude num_prefill_sample tokens from - # prompt logprob. - prompt_logprob_len = (query_len - num_prefill_sample - if do_sample else query_len) - sample_len = num_prefill_sample if do_sample else 0 - else: - # Decode - prompt_logprob_len = 0 - query_len = query_lens[i] if query_lens is not None and len( - query_lens) > 0 else 1 - sample_len = len(seq_ids) * query_len if do_sample else 0 - - if sampling_params.seed is not None and generators is not None: - generator = generators.get(seq_group_metadata.request_id) - - # Update indices to select from the model output. - """ - This blocks computes selected_token_indices which is used in the - following way. - - hidden_states = model(...) - logits = hidden_states[selected_token_indices] - """ - - if sampling_params.prompt_logprobs is not None: - selected_token_indices.extend( - range(model_output_idx, model_output_idx + prompt_logprob_len)) - model_output_idx += prompt_logprob_len - if do_sample: - selected_token_indices.extend( - range(model_output_idx, model_output_idx + sample_len)) - model_output_idx += sample_len - - # We now find indices for logprob computation and sampling. - """ - This block computes categorized_sample_indices which is used in the - following way. - - hidden_states = model(...) - logits = hidden_states[selected_token_indices] - def sample(logits): - # Use categorized_sample_indices for sampling. - # prompt_logprob_indices to find prompt logprob indices. - # sample_indices to find sample indices. - """ - - if sampling_params.prompt_logprobs is not None: - prompt_logprob_indices.extend( - range(logit_idx, logit_idx + prompt_logprob_len)) - logit_idx += prompt_logprob_len - if do_sample: - sample_indices.extend(range(logit_idx, logit_idx + sample_len)) - categorized_sample_indices[sampling_params.sampling_type].extend( - list(range(logit_idx, logit_idx + sample_len))) - logit_idx += sample_len - - if cache is not None: - sample_obj.sampling_params = sampling_params - sample_obj.seq_data = seq_group_metadata.seq_data - sample_obj.seq_len = seq_len - sample_obj.query_len = query_len - sample_obj.generator = generator - sample_obj.is_prompt = is_prompt - else: - sample_obj = SequenceGroupToSample( - seq_ids=list(seq_ids), - sampling_params=sampling_params, - seq_data=seq_group_metadata.seq_data, - seq_len=seq_len, - query_len=query_len, - generator=generator, - is_prompt=is_prompt, - prompt_logprob_indices=list(prompt_logprob_indices), - sample_indices=list(sample_indices), - ) - - seq_groups.append(sample_obj) - - if cache is not None: - cache.reset() - - return (seq_groups, selected_token_indices, categorized_sample_indices, - num_prompts) - - -@dataclass -class SamplingTensors: - """Tensors for sampling.""" - - temperatures: torch.Tensor - top_ps: torch.Tensor - top_ks: torch.Tensor - min_ps: torch.Tensor - presence_penalties: torch.Tensor - frequency_penalties: torch.Tensor - repetition_penalties: torch.Tensor - prompt_tokens: torch.Tensor - output_tokens: torch.Tensor - - @classmethod - def from_sampling_metadata( - cls, - sampling_metadata: "SamplingMetadata", - vocab_size: int, - device: torch.device, - dtype: torch.dtype, - ) -> tuple["SamplingTensors", bool, bool, bool]: - prompt_tokens: list[array] = [] - output_tokens: list[array] = [] - top_ks: list[int] = [] - temperatures: list[float] = [] - top_ps: list[float] = [] - min_ps: list[float] = [] - presence_penalties: list[float] = [] - frequency_penalties: list[float] = [] - repetition_penalties: list[float] = [] - do_penalties = False - do_top_p_top_k = False - do_min_p = False - - assert sampling_metadata.seq_groups is not None - for seq_group in sampling_metadata.seq_groups: - seq_ids = seq_group.seq_ids - sampling_params = seq_group.sampling_params - temperature = sampling_params.temperature - p = sampling_params.presence_penalty - f = sampling_params.frequency_penalty - r = sampling_params.repetition_penalty - top_p = sampling_params.top_p - min_p = sampling_params.min_p - - # k should not be greater than the vocab size. - top_k = min(sampling_params.top_k, vocab_size) - top_k = vocab_size if top_k < 1 else top_k - if temperature < _SAMPLING_EPS: - # NOTE: Zero temperature means deterministic sampling - # (i.e., greedy sampling or beam search). - # Set the temperature to 1 to avoid division by zero. - temperature = 1.0 - if not do_top_p_top_k and (top_p < 1.0 - _SAMPLING_EPS - or top_k != vocab_size): - do_top_p_top_k = True - if not do_min_p and min_p > _SAMPLING_EPS: - do_min_p = True - if not do_penalties and (abs(p) >= _SAMPLING_EPS - or abs(f) >= _SAMPLING_EPS - or abs(r - 1.0) >= _SAMPLING_EPS): - do_penalties = True - - is_prompt = seq_group.is_prompt - if is_prompt and sampling_params.prompt_logprobs is not None: - # For tokens in the prompt that we only need to get - # their logprobs - query_len = seq_group.query_len - assert query_len is not None - prefill_len = len(seq_group.prompt_logprob_indices) - temperatures += [temperature] * prefill_len - top_ps += [top_p] * prefill_len - top_ks += [top_k] * prefill_len - min_ps += [min_p] * prefill_len - presence_penalties += [0] * prefill_len - frequency_penalties += [0] * prefill_len - repetition_penalties += [1] * prefill_len - - if seq_group.do_sample: - sample_lens = len(seq_group.sample_indices) - assert sample_lens >= len(seq_ids) - temperatures += [temperature] * sample_lens - top_ps += [top_p] * sample_lens - top_ks += [top_k] * sample_lens - min_ps += [min_p] * sample_lens - presence_penalties += [p] * sample_lens - frequency_penalties += [f] * sample_lens - repetition_penalties += [r] * sample_lens - - if do_penalties: - for seq_group in sampling_metadata.seq_groups: - seq_ids = seq_group.seq_ids - sampling_params = seq_group.sampling_params - if (seq_group.is_prompt - and sampling_params.prompt_logprobs is not None): - prefill_len = len(seq_group.prompt_logprob_indices) - prompt_tokens.extend( - array(VLLM_TOKEN_ID_ARRAY_TYPE) - for _ in range(prefill_len)) - output_tokens.extend( - array(VLLM_TOKEN_ID_ARRAY_TYPE) - for _ in range(prefill_len)) - if seq_group.do_sample: - for seq_id in seq_ids: - seq_data = seq_group.seq_data[seq_id] - prompt_tokens.append(seq_data.prompt_token_ids_array) - output_tokens.append(seq_data.output_token_ids_array) - - sampling_tensors = SamplingTensors.from_lists( - temperatures, - top_ps, - top_ks, - min_ps, - presence_penalties, - frequency_penalties, - repetition_penalties, - prompt_tokens, - output_tokens, - vocab_size, - device, - dtype, - ) - return (sampling_tensors, do_penalties, do_top_p_top_k, do_min_p) - - @classmethod - def from_lists( - cls, - temperatures: list[float], - top_ps: list[float], - top_ks: list[int], - min_ps: list[float], - presence_penalties: list[float], - frequency_penalties: list[float], - repetition_penalties: list[float], - prompt_tokens: list[array], - output_tokens: list[array], - vocab_size: int, - device: torch.device, - dtype: torch.dtype, - ) -> "SamplingTensors": - # Note that the performance will be very bad without - # pinned memory. - pin_memory = is_pin_memory_available() - - do_penalties = prompt_tokens or output_tokens - - if do_penalties: - prompt_t = make_tensor_with_pad( - prompt_tokens, - vocab_size, - device="cpu", - dtype=torch.int64, - pin_memory=pin_memory, - ) - output_t = make_tensor_with_pad( - output_tokens, - vocab_size, - device="cpu", - dtype=torch.int64, - pin_memory=pin_memory, - ) - else: - empty_tensor = torch.empty(0, device=device, dtype=torch.long) - prompt_t = empty_tensor - output_t = empty_tensor - - temperatures_t = torch.tensor( - temperatures, - device="cpu", - dtype=dtype, - pin_memory=pin_memory, - ) - top_ps_t = torch.tensor( - top_ps, - device="cpu", - dtype=dtype, - pin_memory=pin_memory, - ) - min_ps_t = torch.tensor( - min_ps, - device="cpu", - dtype=dtype, - pin_memory=pin_memory, - ) - presence_penalties_t = torch.tensor( - presence_penalties, - device="cpu", - dtype=dtype, - pin_memory=pin_memory, - ) - frequency_penalties_t = torch.tensor( - frequency_penalties, - device="cpu", - dtype=dtype, - pin_memory=pin_memory, - ) - repetition_penalties_t = torch.tensor( - repetition_penalties, - device="cpu", - dtype=dtype, - pin_memory=pin_memory, - ) - top_ks_t = torch.tensor( - top_ks, - device="cpu", - dtype=torch.int, - pin_memory=pin_memory, - ) - # Because the memory is pinned, we can do non-blocking - # transfer to device. - - return cls( - temperatures=temperatures_t.to(device=device, non_blocking=True), - top_ps=top_ps_t.to(device=device, non_blocking=True), - top_ks=top_ks_t.to(device=device, non_blocking=True), - min_ps=min_ps_t.to(device=device, non_blocking=True), - presence_penalties=presence_penalties_t.to(device=device, - non_blocking=True), - frequency_penalties=frequency_penalties_t.to(device=device, - non_blocking=True), - repetition_penalties=repetition_penalties_t.to(device=device, - non_blocking=True), - prompt_tokens=prompt_t.to(device=device, non_blocking=True), - output_tokens=output_t.to(device=device, non_blocking=True), - ) + # Placeholder until it can be safely removed. + pass diff --git a/vllm/sequence.py b/vllm/sequence.py index 24114c0bb792..a6c194fbac0b 100644 --- a/vllm/sequence.py +++ b/vllm/sequence.py @@ -1,28 +1,13 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project """Sequence and its related classes.""" -import copy -import enum -from abc import ABC, abstractmethod -from array import array -from collections import defaultdict -from collections.abc import Mapping -from collections.abc import Sequence as GenericSequence -from dataclasses import dataclass, field -from functools import reduce -from typing import TYPE_CHECKING, Any, Callable, Optional, Union +from dataclasses import dataclass +from typing import TYPE_CHECKING, Any, Optional, Union import msgspec import torch -from vllm.inputs import SingletonInputs -from vllm.logprobs import Logprob, PromptLogprobs, SampleLogprobs -from vllm.multimodal import MultiModalKwargs, MultiModalPlaceholderDict -from vllm.pooling_params import PoolingParams -from vllm.sampling_params import RequestOutputKind, SamplingParams - if TYPE_CHECKING: - from vllm.lora.request import LoRARequest from vllm.v1.worker.kv_connector_model_runner_mixin import ( KVConnectorOutput) else: @@ -34,50 +19,6 @@ VLLM_INVALID_TOKEN_ID = -1 -def array_full(token_id: int, count: int): - """[`array`][] equivalent of [numpy.full][].""" - return array(VLLM_TOKEN_ID_ARRAY_TYPE, [token_id]) * count - - -class SequenceStatus(enum.IntEnum): - """Status of a sequence.""" - WAITING = 0 - RUNNING = 1 - SWAPPED = 2 - # Note: anything after SWAPPED (2) will be considered - # as a finished status. - FINISHED_STOPPED = 3 - FINISHED_LENGTH_CAPPED = 4 - FINISHED_ABORTED = 5 - FINISHED_IGNORED = 6 - - @staticmethod - def is_finished(status: "SequenceStatus") -> bool: - return status > SequenceStatus.SWAPPED - - @staticmethod - def get_finished_reason(status: "SequenceStatus") -> Union[str, None]: - if status == SequenceStatus.FINISHED_STOPPED: - finish_reason = "stop" - elif status == SequenceStatus.FINISHED_LENGTH_CAPPED: - finish_reason = "length" - elif status == SequenceStatus.FINISHED_ABORTED: - finish_reason = "abort" - elif status == SequenceStatus.FINISHED_IGNORED: - # The ignored sequences are the sequences whose prompt lengths - # are longer than the model's length cap. Therefore, the stop - # reason should also be "length" as in OpenAI API. - finish_reason = "length" - else: - finish_reason = None - return finish_reason - - -class SequenceStage(enum.Enum): - PREFILL = enum.auto() - DECODE = enum.auto() - - @dataclass class RequestMetrics: """Metrics associated with a request. @@ -107,971 +48,12 @@ class RequestMetrics: model_execute_time: Optional[float] = None -class SequenceDataDelta( - msgspec.Struct, - array_like=True, # type: ignore[call-arg] - omit_defaults=True): # type: ignore[call-arg] - """Delta SequenceData to send to workers per step.""" - # A new token to be appended to existing SequenceData. - new_output_token_ids: list[int] - # Overwriting existing `cumulative_logprob` - new_cumulative_logprob: float - # Overwriting existing `num_computed_tokens`. - new_num_computed_tokens: int - # Overwriting existing `stage`. - new_stage: SequenceStage - - -class SequenceData(msgspec.Struct, - omit_defaults=True): # type: ignore[call-arg] - """Data associated with a sequence.""" - # NOTE: we cannot use Union[list, array] because msgspec cannot support - # union of 2 list types. - _prompt_token_ids: array - _output_token_ids: array = msgspec.field( - default_factory=lambda: array(VLLM_TOKEN_ID_ARRAY_TYPE, [])) - - _prompt_embeds: Optional[torch.Tensor] = None - _output_embeds: Optional[torch.Tensor] = None - - ### The below fields should not be passed as an argument ### - _cumulative_logprob: float = 0.0 - _prompt_token_ids_tuple: tuple[int, - ...] = msgspec.field(default_factory=tuple) - # The number of tokens that are computed (that run against the model). - _num_computed_tokens: int = 0 - # The number of tokens with prefix cache hit. - _num_cached_tokens: int = 0 - _stage: SequenceStage = SequenceStage.PREFILL - _cached_all_token_ids: list[int] = msgspec.field(default_factory=list) - _cached_all_token_embeds: Optional[torch.Tensor] = None - - # It is used to get delta input. It is reset when `get_delta_and_reset` - # is called. - _new_appended_tokens: list[int] = msgspec.field(default_factory=list) - - # It is used to compute mrope_position_ids. - _mrope_position_delta: Optional[int] = None - - @staticmethod - def from_prompt_token_counts( - *token_counts: tuple[int, int]) -> "SequenceData": - """ - Construct a [`SequenceData`][vllm.sequence.SequenceData] instance - by concatenating prompt token sequences. - - Each tuple represents one token sequence, expressed in the form - `(token_id, count)`. - """ - if len(token_counts) == 0: - return SequenceData.from_seqs([]) - - prompt_token_ids_arr = reduce( - array.__iadd__, - (array_full(token_id, count) for token_id, count in token_counts), - ) - - return SequenceData(prompt_token_ids_arr) - - @staticmethod - def from_seqs( - prompt_token_ids: GenericSequence[int], - output_token_ids: Optional[GenericSequence[int]] = None, - *, - prompt_embeds: Optional[torch.Tensor] = None, - ) -> "SequenceData": - """ - Construct a [`SequenceData`][vllm.sequence.SequenceData] instance - from prompt and output token sequences. - """ - prompt_token_ids_arr = array(VLLM_TOKEN_ID_ARRAY_TYPE, - prompt_token_ids) - - if output_token_ids is None: - return SequenceData(prompt_token_ids_arr, - _prompt_embeds=prompt_embeds) - - output_token_ids_arr = array(VLLM_TOKEN_ID_ARRAY_TYPE, - output_token_ids) - - return SequenceData(prompt_token_ids_arr, - _output_token_ids=output_token_ids_arr, - _prompt_embeds=prompt_embeds) - - def __post_init__(self) -> None: - assert self._prompt_token_ids.typecode == "l" - assert self._output_token_ids.typecode == "l" - self._prompt_token_ids_tuple: tuple[int, ...] = tuple( - self._prompt_token_ids) - self._update_cached_all_tokens() - if self._prompt_embeds is not None: - self._update_cached_all_token_embeds() - - def _update_cached_all_tokens(self): - assert isinstance(self._prompt_token_ids, array) - assert isinstance(self._output_token_ids, array) - self._cached_all_token_ids: list[int] = list(self._prompt_token_ids + - self._output_token_ids) - - def _update_cached_all_token_embeds(self): - assert isinstance(self._prompt_embeds, torch.Tensor) - self._cached_all_token_embeds: torch.Tensor = self._prompt_embeds - if self._output_embeds is not None: - self._cached_all_token_embeds = torch.cat( - (self._cached_all_token_embeds, self._output_embeds), dim=0) - - @property - def cumulative_logprob(self) -> float: - """The cumulative log probability of the output.""" - return self._cumulative_logprob - - @property - def prompt_token_ids(self) -> tuple[int, ...]: - """The token IDs of the prompt.""" - return self._prompt_token_ids_tuple - - @prompt_token_ids.setter - def prompt_token_ids(self, new_prompt_token_ids) -> None: - raise NotImplementedError - - @property - def prompt_token_ids_array(self) -> array: - """Return the prompt token ids in array type. - - Note that the array is in "I" type, and it is not compatible - with torch.long (2 bytes vs 4 bytes). So beware of the usage. - """ - return self._prompt_token_ids - - @property - def output_token_ids(self) -> tuple[int, ...]: - """The token IDs of the output.""" - return tuple(self._output_token_ids) - - @output_token_ids.setter - def output_token_ids(self, - new_output_token_ids: GenericSequence[int]) -> None: - self._output_token_ids = array(VLLM_TOKEN_ID_ARRAY_TYPE, - new_output_token_ids) - self._update_cached_all_tokens() - - @property - def output_embeds(self) -> Optional[torch.Tensor]: - return self._output_embeds - - @output_embeds.setter - def output_embeds(self, new_output_token_embeds: torch.Tensor) -> None: - self._output_token_embeds = new_output_token_embeds - self._update_cached_all_token_embeds() - - @property - def output_token_ids_array(self) -> array: - """Return the prompt token ids in array type. - - Note that the array is in "I" type, and it is not compatible - with torch.long (2 bytes vs 4 bytes). So beware of the usage. - """ - assert isinstance(self._output_token_ids, array) - return self._output_token_ids - - @property - def prompt_embeds(self) -> Optional[torch.Tensor]: - return self._prompt_embeds - - @prompt_embeds.setter - def prompt_embeds(self, prompt_embeds: torch.Tensor) -> None: - self._prompt_embeds = prompt_embeds - self._update_cached_all_token_embeds() - - @property - def mrope_position_delta(self) -> Optional[int]: - return self._mrope_position_delta - - @mrope_position_delta.setter - def mrope_position_delta(self, new_mrope_position_delta): - self._mrope_position_delta = new_mrope_position_delta - - def append_token_id(self, - token_id: int, - logprob: float, - token_embed: Optional[torch.Tensor] = None) -> None: - self._output_token_ids.append(token_id) - self._new_appended_tokens.append(token_id) - self._cached_all_token_ids.append(token_id) - self._cumulative_logprob += logprob - if token_embed is not None: - # Do not pass in with batch or sequence dimensions - assert token_embed.ndim == 1 - token_embed = token_embed.detach().cpu().unsqueeze(0) - if self._output_embeds is None: - self._output_embeds = token_embed - else: - self._output_embeds = torch.cat( - (self._output_embeds, token_embed), dim=0) - assert self._cached_all_token_embeds is not None - self._cached_all_token_embeds = torch.cat( - (self._cached_all_token_embeds, - token_embed.to(device=self._cached_all_token_embeds.device)), - dim=0) - - def get_len(self) -> int: - return len(self._output_token_ids) + len(self._prompt_token_ids) - - def get_prompt_len(self) -> int: - return len(self._prompt_token_ids) - - def get_output_len(self) -> int: - return len(self._output_token_ids) - - def get_token_ids(self) -> list[int]: - return self._cached_all_token_ids - - def get_token_embeddings(self) -> Optional[torch.Tensor]: - return self._cached_all_token_embeds - - def get_prefix_token_ids( - self, num_tokens: int - ) -> tuple[tuple[int, ...], Optional[tuple[int, ...]]]: - """Get prefix tokens, and make the return value hashable""" - prompt_length = self.get_prompt_len() - if num_tokens > prompt_length: - return (self._prompt_token_ids_tuple, - tuple(self._output_token_ids[:num_tokens - prompt_length])) - else: - return (self._prompt_token_ids_tuple[:num_tokens], None) - - def get_num_computed_tokens(self) -> int: - """Return the number of prefill tokens that are already computed.""" - return self._num_computed_tokens - - def update_num_computed_tokens(self, num_new_computed_tokens: int): - """Update number of tokens computed so far.""" - self._num_computed_tokens += num_new_computed_tokens - assert self._num_computed_tokens <= self.get_len(), ( - self._num_computed_tokens, self.get_len()) - # If all tokens are computed, it means it is in decoding phase. - if self.get_num_uncomputed_tokens() == 0: - self._stage = SequenceStage.DECODE - - def get_num_cached_tokens(self) -> int: - """Return the number of tokens with prefix cache hit.""" - return self._num_cached_tokens - - def update_num_cached_tokens(self, num_cached_tokens: int): - """Update the number of tokens with prefix cache hit.""" - self._num_cached_tokens = num_cached_tokens - - def reset_state_for_recompute(self) -> None: - """Reset the number of computed tokens from this sequence. It is - supposed to be called when a sequence needs to be started from - the beginning again (e.g., sequence is preempted). - """ - self._num_computed_tokens = 0 - self._stage = SequenceStage.PREFILL - self._new_appended_tokens = [] - - def get_num_uncomputed_tokens(self) -> int: - """Return the number of prefill tokens that are not computed.""" - # we use `get_len()` which includes prompt_len + output_len instead - # of prompt_len here. This is because during recompute we need to - # prefill for both prompt and output. - return self.get_len() - self.get_num_computed_tokens() - - def get_last_token_id(self) -> int: - if not self._output_token_ids: - return self._prompt_token_ids[-1] - return self._output_token_ids[-1] - - def get_prompt_token_ids(self) -> tuple[int, ...]: - return self.prompt_token_ids - - def get_output_token_ids(self) -> tuple[int, ...]: - return self.output_token_ids - - def get_delta_and_reset(self) -> SequenceDataDelta: - delta = SequenceDataDelta(self._new_appended_tokens, - self._cumulative_logprob, - self.get_num_computed_tokens(), self.stage) - # Reset delta state. - self._new_appended_tokens = [] - return delta - - def apply_delta(self, delta: SequenceDataDelta): - self._num_computed_tokens = delta.new_num_computed_tokens - self._cumulative_logprob = delta.new_cumulative_logprob - self._stage = delta.new_stage - self._output_token_ids.extend(delta.new_output_token_ids) - self._cached_all_token_ids.extend(delta.new_output_token_ids) - - @property - def stage(self) -> SequenceStage: - return self._stage - - def __repr__(self) -> str: - return (f"SequenceData(" - f"prompt_token_ids={self._prompt_token_ids}, " - f"prompt_embeds.shape=" - f"{getattr(self._prompt_embeds, 'shape', None)}, " - f"output_token_ids={self.output_token_ids}, " - f"cumulative_logprob={self.cumulative_logprob}, " - f"get_num_computed_tokens={self.get_num_computed_tokens()})") - - -class Sequence: - """Stores the data, status, and block information of a sequence. - - The sequence is constructed from the - [`DecoderOnlyInputs`][vllm.inputs.data.DecoderOnlyInputs] (for decoder-only) - or [`EncoderDecoderInputs`][vllm.inputs.data.EncoderDecoderInputs] - (for encoder-decoder) instance passed in through the `inputs` - constructor argument. - - Args: - seq_id: The ID of the sequence. - inputs: The inputs of the sequence. - block_size: The block size of the sequence. Should be the same as the - block size used by the block manager and cache engine. - eos_token_id: The end-of-sequence (EOS) token id recognized by this LLM. - lora_request: LoRA request. - """ - - def __init__( - self, - seq_id: int, - inputs: SingletonInputs, - block_size: int, - eos_token_id: Optional[int] = None, - lora_request: Optional[LoRARequest] = None, - ) -> None: - self.seq_id = seq_id - self.inputs = inputs - self.block_size = block_size - self.eos_token_id = eos_token_id - self.lora_request = lora_request - - self.data = SequenceData.from_seqs( - self.prompt_token_ids, - prompt_embeds=self.inputs["prompt_embeds"] - if self.inputs["type"] == "embeds" else None) - self.output_logprobs: SampleLogprobs = [] - self.output_text = "" - - self.status = SequenceStatus.WAITING - self.stop_reason: Union[int, str, None] = None - - # These are used to keep track of delta outputs - self._last_output_token_ids_offset: int = 0 - self._last_output_text_offset: int = 0 - - # Used for incremental detokenization - self.prefix_offset = 0 - self.read_offset = 0 - # Input + output tokens - self.tokens: Optional[list[str]] = None - - @property - def n_blocks(self) -> int: - return (self.get_len() + self.block_size - 1) // self.block_size - - @property - def prompt(self) -> Optional[str]: - if self.inputs["type"] == "embeds": - return None - return self.inputs.get("prompt") - - @property - def prompt_token_ids(self) -> list[int]: - if self.inputs["type"] == "embeds": - return [0] * len(self.inputs["prompt_embeds"]) - return self.inputs["prompt_token_ids"] - - @property - def multi_modal_data(self) -> MultiModalKwargs: - if self.inputs["type"] == "multimodal": - return self.inputs["mm_kwargs"].get_data() - - return MultiModalKwargs() - - @property - def multi_modal_placeholders(self) -> MultiModalPlaceholderDict: - if self.inputs["type"] == "multimodal": - return self.inputs["mm_placeholders"] - - return {} - - @property - def lora_int_id(self) -> int: - return self.lora_request.lora_int_id if self.lora_request else 0 - - def get_output_text_to_return(self, buffer_length: int, - delta: bool) -> str: - """If delta is True, only new text since the last call to - this method is returned""" - - # We return the full output text if the sequence is finished. - truncate = buffer_length and not self.is_finished() - if not delta: - return self.output_text[:-buffer_length] if truncate else ( - self.output_text) - length = len(self.output_text) - if truncate: - length -= buffer_length - last_offset = self._last_output_text_offset - if last_offset < length: - self._last_output_text_offset = length - return self.output_text[last_offset:length] - return "" - - def get_output_token_ids_to_return( - self, delta: bool) -> Union[GenericSequence[int], int]: - """If delta is True, only new tokens since the last call to - this method are returned""" - if not delta: - return self.get_output_token_ids() - - output_len = self.get_output_len() - - # Get the number of new tokens - num_new_tokens = output_len - self._last_output_token_ids_offset - self._last_output_token_ids_offset = output_len - - # Return new tokens - if num_new_tokens == 1: - # Optimization for single decode token case - # (which is what we have most of the time) - return self.data._cached_all_token_ids[-1] - - if num_new_tokens == 0: - return [] - - return self.data._cached_all_token_ids[-num_new_tokens:] - - def hash_of_block(self, logical_idx: int) -> int: - # TODO This can produce incorrect hash when block size > prompt size - - # Compute the number of tokens in the sequence - # TODO: The current hashing function is O(L^2). We should optimize - # this in the future. - num_tokens = self.num_hashed_tokens_of_block(logical_idx) - hashed_tokens = self.data.get_prefix_token_ids(num_tokens) - return hash((hashed_tokens, self.lora_int_id)) - - def extra_hash(self) -> Optional[int]: - """ - This function computes an extra hash for a sequence, specifically - designed for prefix caching mode. The final sequence hash is determined - by applying token_ids from the sequence's blocks. - """ - if self.lora_int_id == 0: - return None - - # NOTE: If there are additional factors influencing the block aside from - # token_ids, include them as input parameters to the hash. - return hash(self.lora_int_id) - - def num_hashed_tokens_of_block(self, logical_idx: int): - return logical_idx * self.block_size + self.block_size - - def reset_state_for_recompute(self): - """Reset the sequence states for recomputation.""" - self.data.reset_state_for_recompute() - - def append_token_id(self, - token_id: int, - logprobs: dict[int, Logprob], - token_embed: Optional[torch.Tensor] = None) -> None: - assert token_id in logprobs - self.output_logprobs.append(logprobs) - self.data.append_token_id(token_id, logprobs[token_id].logprob, - token_embed) - - def get_len(self) -> int: - return self.data.get_len() - - def get_prompt_len(self) -> int: - return self.data.get_prompt_len() - - def get_output_len(self) -> int: - return self.data.get_output_len() - - def get_token_ids(self) -> list[int]: - return self.data.get_token_ids() - - def get_prompt_token_ids(self) -> tuple[int, ...]: - return self.data.get_prompt_token_ids() - - def get_last_token_id(self) -> int: - return self.data.get_last_token_id() - - def get_output_token_ids(self) -> tuple[int, ...]: - return self.data.get_output_token_ids() - - def get_cumulative_logprob(self) -> float: - return self.data.cumulative_logprob - - def is_finished(self) -> bool: - return SequenceStatus.is_finished(self.status) - - def fork(self, new_seq_id: int) -> "Sequence": - new_seq = copy.deepcopy(self) - new_seq.seq_id = new_seq_id - return new_seq - - def get_num_new_tokens(self) -> int: - """Get the number of new tokens to be computed. - - Returns: - The new number of tokens to be computed. I.e., 1 for decode, or - the remaining prompt size for prefill. - """ - if self.data.stage == SequenceStage.DECODE: - return 1 - return self.data.get_num_uncomputed_tokens() - - def get_num_computed_tokens(self) -> int: - return self.data.get_num_computed_tokens() - - def is_prefill(self) -> bool: - return self.data.stage == SequenceStage.PREFILL - - def __repr__(self) -> str: - return (f"Sequence(seq_id={self.seq_id}, " - f"status={self.status.name}, " - f"num_blocks={self.n_blocks})") - - -class SequenceGroupState(msgspec.Struct, - omit_defaults=True): # type: ignore[call-arg] - """Mutable state tied to a specific sequence group""" - - # for multi-step decoding - num_steps: int = 1 - current_step: int = 0 - - @property - def remaining_steps(self) -> int: - return self.num_steps - self.current_step - - -class SequenceGroup: - """A group of sequences that are generated from the same prompt. - - Args: - request_id: The ID of the request. - seqs: The list of sequences. - sampling_params: The sampling parameters used to generate the outputs. - arrival_time: The arrival time of the request. - lora_request: LoRA request. - pooling_params: The parameters used to generate the pooler - for a pooling model. - pooled_data: The extracted hidden states from a pooling model. - encoder_seq: Optional, the single encoder sequence. Should be None - unless you are working with an encoder/decoder model. - trace_headers: OpenTelemetry trace headers. - priority: User-defined priority of the request. - draft_size: The number of speculative tokens plus one from the target - model; equal to max number of tokens a step can generate - for single-draft speculative decoding but larger than - that for multi-draft SD (currently not supported). - """ - - def __init__(self, - request_id: str, - seqs: list[Sequence], - arrival_time: float, - sampling_params: Optional[SamplingParams] = None, - lora_request: Optional[LoRARequest] = None, - pooling_params: Optional[PoolingParams] = None, - pooled_data: Optional[torch.Tensor] = None, - encoder_seq: Optional[Sequence] = None, - trace_headers: Optional[Mapping[str, str]] = None, - priority: int = 0, - draft_size: int = 1) -> None: - self.request_id = request_id - self.seqs = seqs - self.first_seq = seqs[0] - self.arrival_time = arrival_time - self.is_single_seq = len(seqs) == 1 - self.seqs_dict = {seq.seq_id: seq for seq in seqs} - - self.sampling_params = sampling_params - self.metrics = RequestMetrics(arrival_time=arrival_time, - last_token_time=arrival_time, - first_scheduled_time=None, - first_token_time=None, - time_in_queue=None) - self.last_token_latency = 0.0 - self.lora_request = lora_request - self.prompt_logprobs: Optional[PromptLogprobs] = None - self.state = SequenceGroupState() - self.pooling_params = pooling_params - self.pooled_data = pooled_data - self.encoder_seq = encoder_seq - self.trace_headers = trace_headers - self.priority = priority - - self.cached_request_output = None - - @property - def prompt(self) -> Optional[str]: - return self.first_seq.prompt - - @property - def prompt_token_ids(self) -> list[int]: - return self.first_seq.prompt_token_ids - - @property - def encoder_prompt(self) -> Optional[str]: - # There are either 0 or 1 encoder sequences - # If one is present, its prompt is distinct - # from the decoder's. - return (self.encoder_seq.prompt - if self.encoder_seq is not None else None) - - @property - def encoder_prompt_token_ids(self) -> Optional[list[int]]: - # There are either 0 or 1 encoder sequences - # If one is present, its prompt token ids are - # distinct from the decoder's. - return (self.encoder_seq.prompt_token_ids - if self.encoder_seq is not None else None) - - @property - def multi_modal_data(self) -> MultiModalKwargs: - if self.first_seq.multi_modal_data: - return self.first_seq.multi_modal_data - elif self.encoder_seq is not None: - return self.encoder_seq.multi_modal_data - return MultiModalKwargs() - - @property - def multi_modal_placeholders(self) -> MultiModalPlaceholderDict: - if self.first_seq.multi_modal_data: - return self.first_seq.multi_modal_placeholders - elif self.encoder_seq is not None: - return self.encoder_seq.multi_modal_placeholders - return {} - - @property - def lora_int_id(self) -> int: - return self.lora_request.lora_int_id if self.lora_request else 0 - - def set_last_token_time(self, now: float) -> None: - """Sets the last token time for Request level timings.""" - # If still in prefill phase, assertion fails. - assert not self.is_prefill(), ( - "seq_group.set_last_token_time() should not be called " - "if the seq_group is in prefill phase.") - self.last_token_latency = now - self.metrics.last_token_time - self.metrics.last_token_time = now - - def get_last_token_latency(self) -> float: - """Returns the latency of the last token.""" - assert not self.is_prefill(), ( - "seq_group.get_last_token_latency() should not be called " - "if the seq_group is in prefill phase.") - return self.last_token_latency - - def maybe_set_first_token_time(self, time: float) -> None: - """Sets the first token time for Request level timings.""" - # Note: in a case where a sequence_group is swapped and - # recomputed, the time between iterations is counted - # in TPOT, rather than recalculating TTFT (since from the ) - # POV of the user, there is simply a long generation delay. - if (self.metrics.first_token_time is None - and self.first_seq.get_output_len() == 1): - self.metrics.first_token_time = time - - def maybe_set_first_scheduled_time(self, time: float) -> None: - """Sets the first scheduled time and time in queue for Request - level timings.""" - if self.metrics.first_scheduled_time is None: - self.metrics.first_scheduled_time = time - self.metrics.time_in_queue = time - self.metrics.arrival_time - - def set_finished_time(self, time: Optional[float]) -> None: - """Sets the finished time for Request level timings.""" - self.metrics.finished_time = time - - def get_max_num_running_seqs(self) -> int: - """The maximum number of sequences running in parallel in the remaining - lifetime of the request.""" - if self.is_single_seq: - return 0 if self.first_seq.is_finished() else 1 - return self.num_seqs() - self.num_finished_seqs() - - def get_seqs( - self, - status: Optional[SequenceStatus] = None, - ) -> list[Sequence]: - if status is None: - return self.seqs - - if self.is_single_seq: - return self.seqs if self.first_seq.status == status else [] - - return [seq for seq in self.seqs if seq.status == status] - - def is_encoder_decoder(self) -> bool: - return self.encoder_seq is not None - - def get_encoder_seq(self) -> Optional[Sequence]: - return self.encoder_seq - - def get_finished_seqs(self) -> list[Sequence]: - if self.is_single_seq: - return self.seqs if self.first_seq.is_finished() else [] - - return [seq for seq in self.seqs if seq.is_finished()] - - def update_num_computed_tokens(self, num_new_computed_tokens: int): - """Update number of tokens computed so far.""" - for seq in self.seqs: - if not seq.is_finished(): - seq.data.update_num_computed_tokens(num_new_computed_tokens) - - def get_num_uncomputed_tokens(self) -> int: - num_uncomputed_tokens = 0 - for seq in self.seqs: - if not seq.is_finished(): - num_uncomputed_tokens += seq.data.get_num_uncomputed_tokens() - return num_uncomputed_tokens - - def num_seqs(self, status: Optional[SequenceStatus] = None) -> int: - # Optimization. We don't need to call get_seqs if we don't need to - # filter by states. - if status is None: - return len(self.seqs) - - if self.is_single_seq: - return 1 if self.seqs[0].status == status else 0 - - return len(self.get_seqs(status)) - - def num_finished_seqs(self) -> int: - if self.is_single_seq: - return 1 if self.seqs[0].is_finished() else 0 - return len(self.get_finished_seqs()) - - def is_finished(self) -> bool: - if self.is_single_seq: - return self.first_seq.is_finished() - return all(seq.is_finished() for seq in self.seqs) - - def is_prefill(self) -> bool: - return self.first_seq.is_prefill() - - def __repr__(self) -> str: - return (f"SequenceGroup(request_id={self.request_id}, " - f"sampling_params={self.sampling_params}, " - f"num_seqs={len(self.seqs)})") - - def uses_prompt_embeds(self) -> bool: - """Returns True if the sequence group uses input embeds.""" - return any(seq.data.prompt_embeds is not None for seq in self.seqs) - - -class SequenceGroupMetadataDelta( - msgspec.Struct, - tag=True, # type: ignore[call-arg] - array_like=True, # type: ignore[call-arg] - omit_defaults=True): # type: ignore[call-arg] - """Delta of SequenceGroupMetadata. - - After sending the first SequenceGroupMetadata, vLLM scheduler - only sends delta to reduce the data payload size. - """ - seq_data_delta: dict[int, SequenceDataDelta] - request_id: str - block_tables: dict[int, list[int]] - is_prompt: bool - do_sample: bool = True - token_chunk_size: Optional[int] = None - computed_block_nums: Optional[list[int]] = None - state: Optional[SequenceGroupState] = msgspec.field( - default_factory=lambda: SequenceGroupState()) - - -class SequenceGroupMetadata( - msgspec.Struct, - tag=True, # type: ignore[call-arg] - array_like=True, # type: ignore[call-arg] - omit_defaults=True): # type: ignore[call-arg] - """Metadata for a sequence group. Used to create `AttentionMetadata`. - - Attributes: - request_id: The ID of the request. - is_prompt: Whether the request is at prompt stage. - seq_data: The sequence data. (Seq id -> sequence data) - sampling_params: The sampling parameters used to generate the outputs. - block_tables: The block tables. (Seq id -> list of physical block - numbers) - do_sample: True if sampling is required. Sampling is not required when - e.g., prefill is chunked, and the current iteration only computes - query tokens for prefill, we don't need sampling. - pooling_params: Pooling parameters. - lora_request: LoRA request. - computed_block_nums: The block numbers that are already computed, - used in prefix caching. - state: Internal state tied to this sequence group. - token_type_ids: Token type IDs. - multi_modal_data: Multi modal data. - multi_modal_placeholders: Multi modal placeholders. - encoder_seq_data: Optional sequence data for encoder prompt - (SequenceGroup.encoder_seq). Should be None - unless you are working with an encoder/decoder - model. - cross_block_table: Optional cross-attention block table associated - with the encoder prompt - (SequenceGroup.encoder_seq). Should be None - unless you are working with an encoder/decoder - model. - """ - - request_id: str - is_prompt: bool - seq_data: dict[int, SequenceData] - sampling_params: Optional[SamplingParams] - block_tables: dict[int, list[int]] - do_sample: bool = True - pooling_params: Optional[PoolingParams] = None - lora_request: Optional[LoRARequest] = None - computed_block_nums: Optional[list[int]] = None - state: Optional[SequenceGroupState] = msgspec.field( - default_factory=lambda: SequenceGroupState()) - multi_modal_data: Optional[MultiModalKwargs] = None - multi_modal_placeholders: Optional[MultiModalPlaceholderDict] = None - encoder_seq_data: Optional[SequenceData] = None - cross_block_table: Optional[list[int]] = None - token_chunk_size: Optional[int] = None - - ### Stateful fields that are lazily defined. ### - # The number of speculative tokens adopted in this request. - # None means specuative decoding is not used. - # Zero means speculative decoding is disabled for some reasons. - # TODO: We should maintain this states out of the sequence group. - num_speculative_tokens: Optional[int] = None - - def __post_init__(self): - if self.seq_data is not None and self.token_chunk_size is None: - if self.is_prompt: - self.token_chunk_size = next(iter( - self.seq_data.values())).get_len() - else: - self.token_chunk_size = 1 - - @property - def lora_int_id(self) -> int: - return self.lora_request.lora_int_id if self.lora_request else 0 - - # Multi-Step Chunked-Prefill property - @property - def is_single_step_prompt(self) -> bool: - # do_sample is true, only when the token_chunk_size matches the - # num_uncomputed_tokens of the sequence. This indicates that - # the prompt will finish processing in a single `execute_model` - # step. - return self.is_prompt and self.do_sample - - def get_first_seq_id(self) -> int: - # This is an efficient way of fetching the seq_id when - # we know this SequenceGroup has only one sequence. - return next(iter(self.seq_data)) - - def apply_delta(self, - sequence_group_metadata_delta: SequenceGroupMetadataDelta): - for id, delta in sequence_group_metadata_delta.seq_data_delta.items(): - self.seq_data[id].apply_delta(delta) - assert self.request_id == sequence_group_metadata_delta.request_id - self.block_tables = sequence_group_metadata_delta.block_tables - self.token_chunk_size = sequence_group_metadata_delta.token_chunk_size - self.do_sample = sequence_group_metadata_delta.do_sample - self.is_prompt = sequence_group_metadata_delta.is_prompt - - def finish_step(self) -> None: - assert self.state is not None - assert self.state.current_step < self.state.num_steps, \ - f"current step {self.state.current_step}, num_steps {self.state.num_steps}" # noqa - self.state.current_step += 1 - - -class SequenceOutput( - msgspec.Struct, - omit_defaults=True, # type: ignore[call-arg] - array_like=True): # type: ignore[call-arg] - """The model output associated with a sequence. - - Attributes: - parent_seq_id: The ID of the parent sequence (for forking in beam - search). - output_token: The output token ID. - logprobs: The logprobs of the output token. - (Token id -> logP(x_i+1 | x_0, ..., x_i)) - output_embed: Optional output embedding tensor. - """ - parent_seq_id: int - output_token: int - logprobs: dict[int, Logprob] - output_embed: Optional[torch.Tensor] = None - - def __repr__(self) -> str: - output_embed_shape = \ - self.output_embed.shape if self.output_embed is not None else None - return (f"SequenceOutput(parent_seq_id={self.parent_seq_id}, " - f"output_token={self.output_token}, " - f"output_embed.shape={output_embed_shape}, " - f"logprobs={self.logprobs})") - - def __eq__(self, other: object) -> bool: - if not isinstance(other, SequenceOutput): - raise NotImplementedError() - equal = (self.parent_seq_id == other.parent_seq_id - and self.output_token == other.output_token) - log_probs_equal = other.logprobs == self.logprobs - return equal and log_probs_equal - - -class SequenceGroupOutput(ABC): - """The base class for model outputs associated with a sequence group.""" - - @abstractmethod - def __repr__(self) -> str: - pass - - @abstractmethod - def __eq__(self, other: object) -> bool: - pass - - -class CompletionSequenceGroupOutput( - msgspec.Struct, - omit_defaults=True, # type: ignore[call-arg] - array_like=True): # type: ignore[call-arg] - """The model output associated with a completion sequence group.""" - __metaclass__ = SequenceGroupOutput - samples: list[SequenceOutput] - # Prompt logprob for each prompt query token. - prompt_logprobs: Optional[PromptLogprobs] - step_index: Optional[int] = 0 - - def __repr__(self) -> str: - return (f"CompletionSequenceGroupOutput(samples={self.samples}, " - f"prompt_logprobs={self.prompt_logprobs})") - - def __eq__(self, other: object) -> bool: - if not isinstance(other, CompletionSequenceGroupOutput): - raise NotImplementedError() - return (self.samples == other.samples - and self.prompt_logprobs == other.prompt_logprobs) - - class PoolingSequenceGroupOutput( msgspec.Struct, omit_defaults=True, # type: ignore[call-arg] array_like=True, # type: ignore[call-arg] ): """The model output associated with a pooling sequence group.""" - __metaclass__ = SequenceGroupOutput # Annotated as Any to be compatible with msgspec # The actual type is in SequenceGroup.pooled_data data: Any @@ -1161,305 +143,9 @@ def __eq__(self, other: object): self.__class__) and self.outputs == other.outputs -def get_all_seq_ids( - seq_group_metadata_list: list[SequenceGroupMetadata]) -> list[int]: - """Given a list of SequenceGroupMetadata, create a list of all - sequence ids. - """ - return [seq_id for sg in seq_group_metadata_list for seq_id in sg.seq_data] - - -def get_all_seq_ids_and_request_ids( - seq_group_metadata_list: list[SequenceGroupMetadata] -) -> tuple[list[int], dict[str, set[int]]]: - """Given a list of SequenceGroupMetadata, create a list of all - sequence ids. - """ - seq_ids: list[int] = [] - request_id_seq_ids_mapping: defaultdict[str, set[int]] = defaultdict(set) - for sg in seq_group_metadata_list: - for seq_id in sg.seq_data: - seq_ids.append(seq_id) - request_id_seq_ids_mapping[sg.request_id].add(seq_id) - return seq_ids, request_id_seq_ids_mapping - - -class HiddenStates(msgspec.Struct, array_like=True, - omit_defaults=True): # type: ignore[call-arg] - """Hidden states corresponding to in-progress sequences. - Used in speculative decoding to pass hidden states from - the target model to the proposer model. - - seq_ids are the sequence ids of each entry of the batch - dimension of the hidden_states tensor""" - # Scorer hidden states. For prefill step, it is used for hidden states of - # all tokens, whereas for decode step, it is used for last accepted tokens. - hidden_states: torch.Tensor - # The sequence group metadata list. Only needed for decode step. - seq_group_metadata_list: Optional[list[SequenceGroupMetadata]] = None - # Scorer hidden states of the 2nd last token proposed by the proposer ( - # irrespective of whether it was accepted or not). Only used for cases when - # last proposed token is accepted (i.e., in case of bonus tokens). For the - # case of no bonus tokens, these are ignored. - second_last_token_hidden_states: Optional[torch.Tensor] = None - - _seq_ids: list[int] = msgspec.field(default_factory=list) - - def __post_init__(self): - if self.seq_group_metadata_list is not None: - assert len(self.seq_group_metadata_list) == len(self.hidden_states) - self._seq_ids = get_all_seq_ids(self.seq_group_metadata_list) - - @property - def seq_ids(self) -> list[int]: - return self._seq_ids - - def update(self, - hidden_states: torch.Tensor, - seq_group_metadata_list: list[SequenceGroupMetadata], - second_last_token_hidden_states: Optional[torch.Tensor] = None): - """Update hidden states from target model invocation. Only used for - decode steps""" - assert len(seq_group_metadata_list) == len(hidden_states) - self._seq_ids.extend(get_all_seq_ids(seq_group_metadata_list)) - self.hidden_states = torch.cat([self.hidden_states, hidden_states]) - - if self.second_last_token_hidden_states is not None: - # Adding dummy hidden_states to this to maintain same shape - self.second_last_token_hidden_states = torch.cat([ - self.second_last_token_hidden_states, - torch.zeros_like(hidden_states) - if second_last_token_hidden_states is None else - second_last_token_hidden_states - ]) - - def prune(self, - seq_group_metadata_list: list[SequenceGroupMetadata]) -> None: - """Prune to provided list of sequence ids. Only used for decode steps. - """ - # Currently this prunes all seq_ids not present in - # seq_group_metadata_list which might cause problems where a sequence - # may be "paused" then "resumed" later. This should only prune sequences - # which are confirmed to be aborted. - seq_ids = get_all_seq_ids(seq_group_metadata_list) - # Only keep sequence IDs that exist in self._seq_ids - seq_ids = [seq_id for seq_id in seq_ids if seq_id in self._seq_ids] - if seq_ids != self._seq_ids: - # Batch contents changed - prune removed sequences. - index = [self._seq_ids.index(seq_id) for seq_id in seq_ids] - self.hidden_states = self.hidden_states[index] - if self.second_last_token_hidden_states is not None: - self.second_last_token_hidden_states = self\ - .second_last_token_hidden_states[index] - self._seq_ids = seq_ids - - def expand_with_bonus_tokens( - self, seq_with_bonus_token_in_last_step: set) -> None: - """Expand hidden states for sequences with bonus tokens. This is in - alignment with `MultiStepWorker._expand_execute_model_request`.""" - if self.second_last_token_hidden_states is None \ - or not seq_with_bonus_token_in_last_step: - return - - index = [] - for seq_id in self._seq_ids: - i = self._seq_ids.index(seq_id) - if seq_id in seq_with_bonus_token_in_last_step: - index.append(i + len(self._seq_ids)) - index.append(i) - - self.hidden_states = torch.cat( - [self.hidden_states, self.second_last_token_hidden_states])[index] - - class ExecuteModelRequest( msgspec.Struct, array_like=True, # type: ignore[call-arg] omit_defaults=True): # type: ignore[call-arg] - """The model execution request, containing CPU metadata only. The LLM - engine should create an instance of this class for each request batch.""" - # The sequence group metadata list. - seq_group_metadata_list: list[Union[SequenceGroupMetadata, - SequenceGroupMetadataDelta]] - # Blocks to swap in. List of CPU -> GPU block number. - blocks_to_swap_in: list[tuple[int, - int]] = msgspec.field(default_factory=list) - # Blocks to swap out. List of GPU -> CPU block number. - blocks_to_swap_out: list[tuple[int, - int]] = msgspec.field(default_factory=list) - # Blocks to copy. Source to dest block. - blocks_to_copy: list[tuple[int, int]] = msgspec.field(default_factory=list) - # Virtual engine ID for pipeline parallel. - virtual_engine: int = 0 - # The number of slots for lookahead decoding. - num_lookahead_slots: int = 0 - # The number of requests in the running queue. - running_queue_size: int = 0 - # Optional hidden states from prior step. - previous_hidden_states: Optional[HiddenStates] = None - # The number of forward steps to run. - num_steps: int = 1 - # Finished request ids since last step. - finished_requests_ids: list[str] = msgspec.field(default_factory=list) - # The last sampled token ids for multi step decoding. - last_sampled_token_ids: Optional[torch.Tensor] = None - # Async callback - async_callback: Optional[Callable] = None - - @property - def is_last_step(self) -> bool: - # TODO(will) make this be able to handle batches with variable number of - # steps - assert len(self.seq_group_metadata_list) > 0 - first_seq_group = self.seq_group_metadata_list[0] - assert first_seq_group.state is not None - return first_seq_group.state.remaining_steps == 1 - - @property - def current_step(self) -> int: - # TODO(will) make this be able to handle batches with variable number of - # steps - assert len(self.seq_group_metadata_list) > 0 - state = self.seq_group_metadata_list[0].state - assert state is not None - return state.current_step - - def clone( - self, seq_group_metadata_list: list[Union[SequenceGroupMetadata, - SequenceGroupMetadataDelta]] - ) -> "ExecuteModelRequest": - """Clone the request with a new sequence group metadata list.""" - return ExecuteModelRequest( - seq_group_metadata_list=seq_group_metadata_list, - blocks_to_swap_in=self.blocks_to_swap_in.copy(), - blocks_to_swap_out=self.blocks_to_swap_out.copy(), - blocks_to_copy=self.blocks_to_copy.copy(), - virtual_engine=self.virtual_engine, - num_lookahead_slots=self.num_lookahead_slots, - running_queue_size=self.running_queue_size, - previous_hidden_states=self.previous_hidden_states, - num_steps=self.num_steps, - finished_requests_ids=self.finished_requests_ids, - last_sampled_token_ids=self.last_sampled_token_ids.clone() - if self.last_sampled_token_ids is not None else None, - async_callback=self.async_callback) - - -@dataclass -class SequenceGroupBase: - group_id: str # the original request id before splitting - - assembled_seq_group: Optional[SequenceGroup] = None - - # seq id to a unique index inside this group - seq_id_to_index: dict[str, int] = field(default_factory=dict) - - # seq ids to be finished - to_be_finished: dict[str, SequenceGroup] = field(default_factory=dict) - - # seq id to finished sequences - finished_reqs: dict[str, SequenceGroup] = field(default_factory=dict) - - streaming: bool = False - - output_produced: bool = False - - @staticmethod - def add_request(request_id: str, engine, params, *args, **kwargs): - """When we are ready to add a request with request_id and params - into the engine, we can split the request into multiple requests. - """ - raise NotImplementedError - - def finish_seq(self, seq: SequenceGroup): - """The sequence `seq` finishes, we should record the information. - """ - del self.to_be_finished[seq.request_id] - self.finished_reqs[seq.request_id] = seq - - def maybe_assemble_group( - self, seq_group: SequenceGroup) -> Optional[SequenceGroup]: - """Assemble the sequence group, for producing the final - output, or adding request in the engine again. - """ - raise NotImplementedError - - -class ParallelSampleSequenceGroup(SequenceGroupBase): - - @staticmethod - def add_request(request_id: str, engine, params, **kwargs): - original_params = params - group = ParallelSampleSequenceGroup(request_id) - seqs = [] - for i in range(original_params.n): - request_id_i = f"{request_id}_parallel_sample_{i}" - group.seq_id_to_index[request_id_i] = i - params = original_params.clone() - params.n = 1 - if params.seed is not None: - params.seed += i - seq_group = engine._add_processed_request( - request_id_i, - params=params, - **kwargs, - ) # type: ignore - assert seq_group is not None - engine.seq_id_to_seq_group[request_id_i] = group - group.to_be_finished[request_id_i] = seq_group - seqs.append(seq_group.seqs[0]) - - # for parallel sampling, the `assembled_seq_group` is always - # available, since we have all the sequences ready, and they - # will not change. - group.assembled_seq_group = SequenceGroup( - request_id=request_id, - seqs=seqs, - arrival_time=seq_group.arrival_time, - sampling_params=original_params, - lora_request=seq_group.lora_request, - pooling_params=seq_group.pooling_params, - pooled_data=seq_group.pooled_data, - encoder_seq=seq_group.encoder_seq, - trace_headers=seq_group.trace_headers, - priority=seq_group.priority, - ) - - group.streaming = params.output_kind == RequestOutputKind.DELTA - group.output_produced = False - - def maybe_assemble_group( - self, seq_group: SequenceGroup) -> Optional[SequenceGroup]: - - # in the streaming mode, we will return the assembled sequence - # for the first remaining sequence, and then return None for the - # rest of sequences - if self.streaming: - first_remaining_id = next(iter(self.to_be_finished)) - if seq_group.request_id == first_remaining_id: - return self.assembled_seq_group - return None - - # in the non-streaming mode, we will return the assembled sequence - # when the last sequences finishes, and then return None for the - # rest of the time - if (len(self.to_be_finished) == 1 - and seq_group.request_id in self.to_be_finished - and seq_group.is_finished()): - assert self.assembled_seq_group is not None - params = self.assembled_seq_group.sampling_params - assert isinstance(params, SamplingParams) - if not self.output_produced: - self.output_produced = True - if params._real_n is not None: - # Get the top-n sequences. - n = params._real_n or params.n - seqs = self.assembled_seq_group.seqs - sorting_key = lambda seq: seq.get_cumulative_logprob() - sorted_seqs = sorted(seqs, key=sorting_key, reverse=True) - top_n_seqs = sorted_seqs[:n] - self.assembled_seq_group.seqs = top_n_seqs - return self.assembled_seq_group - if self.output_produced: - return None - return None + # Placeholder. Remove. + pass diff --git a/vllm/transformers_utils/detokenizer.py b/vllm/transformers_utils/detokenizer.py deleted file mode 100644 index e2d2846a2807..000000000000 --- a/vllm/transformers_utils/detokenizer.py +++ /dev/null @@ -1,162 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project - -from typing import Optional - -from vllm.logprobs import Logprob -from vllm.sequence import (VLLM_INVALID_TOKEN_ID, SamplingParams, Sequence, - SequenceGroup) - -from .detokenizer_utils import (convert_prompt_ids_to_tokens, - detokenize_incrementally) -from .tokenizer import AnyTokenizer - - -class Detokenizer: - """Provides methods to decode the output of a model into text.""" - - def __init__(self, tokenizer: AnyTokenizer): - self.tokenizer = tokenizer - - def decode_prompt_logprobs_inplace(self, seq_group: SequenceGroup, - prompt_logprobs: list[Optional[dict[ - int, Logprob]]], - position_offset: int) -> None: - """Decodes the logprobs for the prompt of a sequence group. - - Args: - seq_group: The sequence group to decode. - prompt_logprobs: The logprobs to decode. - position_offset: Offset of the first index of the logprobs - relative to the start of the sequence (for chunked prefill). - - Returns: - The prompt logprobs with the decoded tokens. - """ - prms = seq_group.sampling_params - assert prms is not None - - # We can pick any sequence for the prompt. - seq = seq_group.get_seqs()[0] - # Only prompt, without the generated token. - all_token_ids = seq.get_token_ids() - prompt_token_ids = all_token_ids[:-1] - prefix_offset = 0 - read_offset = 0 - next_iter_prefix_offset = 0 - next_iter_read_offset = 0 - next_iter_tokens: list[str] = [] - prev_tokens = None - - for token_position_in_logprob, prompt_logprobs_for_token in enumerate( - prompt_logprobs): - - # Absolute token position equals the index in the logprobs - # list plus the offset of the entire logprobs list relative - # to the start of the sequence. - token_position = token_position_in_logprob + position_offset - if not prompt_logprobs_for_token: - continue - for token_id, sample_logprob in prompt_logprobs_for_token.items(): - if (sample_logprob.decoded_token is None - and token_id != VLLM_INVALID_TOKEN_ID): - prompt_token_ids_with_token = ( - prompt_token_ids[:token_position] + [token_id]) - (new_tokens, new_text, new_prefix_offset, - new_read_offset) = detokenize_incrementally( - tokenizer=self.tokenizer, - all_input_ids=prompt_token_ids_with_token, - prev_tokens=prev_tokens, - prefix_offset=prefix_offset, - read_offset=read_offset, - skip_special_tokens=prms.skip_special_tokens, - spaces_between_special_tokens=prms. - spaces_between_special_tokens, - ) - - sample_logprob.decoded_token = new_text - - # Use the offsets & prev tokens corresponding to - # real tokens to ensure detokenization is consistent - # actual with prompt. - if token_id == all_token_ids[token_position]: - next_iter_prefix_offset = new_prefix_offset - next_iter_read_offset = new_read_offset - next_iter_tokens = new_tokens - - # Advance to the next token position. - prefix_offset = next_iter_prefix_offset - read_offset = next_iter_read_offset - if prev_tokens is None: - prev_tokens = next_iter_tokens.copy() - else: - prev_tokens.extend(next_iter_tokens) - - def decode_sequence_inplace(self, seq: Sequence, - prms: SamplingParams) -> int: - """Decodes the new token for a sequence. In-place operation. - - Args: - seq: The sequence to decode. - prms: The sampling parameters used to generate the sequence. - - Returns: - The number of characters added to the output text. - """ - all_input_ids = seq.get_token_ids() - token_id_generated_this_iteration = all_input_ids[-1] - - # Convert prompt token IDs to tokens if necessary. - # Do it here so that we don't have to repeat this - # computation for each logprob. - if seq.tokens is None: - (seq.tokens, seq.prefix_offset, - seq.read_offset) = convert_prompt_ids_to_tokens( - tokenizer=self.tokenizer, - prompt_ids=all_input_ids[:-1], - skip_special_tokens=prms.skip_special_tokens, - ) - - (new_tokens, new_decoded_token_text, prefix_offset, - read_offset) = detokenize_incrementally( - tokenizer=self.tokenizer, - all_input_ids=all_input_ids, - prev_tokens=seq.tokens, - prefix_offset=seq.prefix_offset, - read_offset=seq.read_offset, - skip_special_tokens=prms.skip_special_tokens, - spaces_between_special_tokens=prms.spaces_between_special_tokens, - ) - - # Decode logprobs - logprobs = seq.output_logprobs[-1] - if logprobs: - previous_tokens = all_input_ids[:-1] - for token_id, sample_logprob in logprobs.items(): - # If the token was generated this iteration, - # use the provided text. - if token_id == token_id_generated_this_iteration: - sample_logprob.decoded_token = new_decoded_token_text - continue - - if (sample_logprob.decoded_token is None - and token_id != VLLM_INVALID_TOKEN_ID): - all_input_ids_with_logprob = previous_tokens + [token_id] - (_, new_text, _, _) = detokenize_incrementally( - tokenizer=self.tokenizer, - all_input_ids=all_input_ids_with_logprob, - prev_tokens=seq.tokens, - prefix_offset=seq.prefix_offset, - read_offset=seq.read_offset, - skip_special_tokens=prms.skip_special_tokens, - spaces_between_special_tokens=prms. - spaces_between_special_tokens, - ) - sample_logprob.decoded_token = new_text - - seq.tokens.extend(new_tokens) - seq.prefix_offset = prefix_offset - seq.read_offset = read_offset - seq.output_text += new_decoded_token_text - - return len(new_decoded_token_text) diff --git a/vllm/worker/worker_base.py b/vllm/worker/worker_base.py index eaab976bf7f7..20fabef4f19b 100644 --- a/vllm/worker/worker_base.py +++ b/vllm/worker/worker_base.py @@ -11,12 +11,12 @@ from vllm.config import VllmConfig, set_current_vllm_config from vllm.logger import init_logger from vllm.lora.request import LoRARequest -from vllm.model_executor.layers.sampler import SamplerOutput from vllm.sequence import ExecuteModelRequest from vllm.utils import (enable_trace_function_call_for_thread, resolve_obj_by_qualname, run_method, update_environment_variables, warn_for_unimplemented_methods) +from vllm.v1.outputs import SamplerOutput logger = init_logger(__name__) From 0ff8ebb2d700b2e39457a661ef979b0da2ad73b3 Mon Sep 17 00:00:00 2001 From: Woosuk Kwon Date: Sun, 21 Sep 2025 08:52:32 -0700 Subject: [PATCH 211/518] [V0 Deprecation] Remove async_output_proc, preemption mode, delay factor (#25334) Signed-off-by: Woosuk Kwon --- tests/detokenizer/test_stop_strings.py | 46 ++---------------- .../test_processor_multi_modal_uuids.py | 10 ---- tests/v1/test_oracle.py | 18 ------- vllm/config/__init__.py | 4 -- vllm/config/model.py | 48 +++---------------- vllm/config/scheduler.py | 15 +----- vllm/engine/arg_utils.py | 34 ------------- vllm/entrypoints/llm.py | 4 -- vllm/executor/uniproc_executor.py | 4 -- vllm/platforms/cpu.py | 4 -- vllm/platforms/cuda.py | 10 ---- vllm/platforms/interface.py | 7 --- vllm/platforms/rocm.py | 10 ---- vllm/platforms/tpu.py | 4 -- vllm/platforms/xpu.py | 4 -- 15 files changed, 12 insertions(+), 210 deletions(-) diff --git a/tests/detokenizer/test_stop_strings.py b/tests/detokenizer/test_stop_strings.py index cb87c44cc399..46f7d58c438c 100644 --- a/tests/detokenizer/test_stop_strings.py +++ b/tests/detokenizer/test_stop_strings.py @@ -32,10 +32,6 @@ def _test_stopping(llm: LLM, assert output.stop_reason == expected_reason -def _set_async_mode(llm, is_async): - llm.llm_engine.scheduler[0].use_async_output_proc = is_async - - def _stop_basic(llm): _test_stopping(llm, stop=["."], @@ -103,40 +99,8 @@ def test_stop_strings(): # async output processing below. llm = LLM(MODEL, enforce_eager=envs.VLLM_USE_V1) - if envs.VLLM_USE_V1: - _stop_basic(llm) - else: - _set_async_mode(llm, True) - _stop_basic(llm) - - _set_async_mode(llm, False) - _stop_basic(llm) - - if envs.VLLM_USE_V1: - _stop_multi_tokens(llm) - else: - _set_async_mode(llm, True) - _stop_multi_tokens(llm) - - _set_async_mode(llm, False) - _stop_multi_tokens(llm) - - if envs.VLLM_USE_V1: - _stop_partial_token(llm) - else: - _set_async_mode(llm, True) - _stop_partial_token(llm) - - _set_async_mode(llm, False) - _stop_partial_token(llm) - - if envs.VLLM_USE_V1: - # FIXME: this does not respect include_in_output=False - # _stop_token_id(llm) - pass - else: - _set_async_mode(llm, True) - _stop_token_id(llm) - - _set_async_mode(llm, False) - _stop_token_id(llm) + _stop_basic(llm) + _stop_multi_tokens(llm) + _stop_partial_token(llm) + # FIXME: this does not respect include_in_output=False + # _stop_token_id(llm) diff --git a/tests/v1/engine/test_processor_multi_modal_uuids.py b/tests/v1/engine/test_processor_multi_modal_uuids.py index bdd41eece231..3a7bcb957182 100644 --- a/tests/v1/engine/test_processor_multi_modal_uuids.py +++ b/tests/v1/engine/test_processor_multi_modal_uuids.py @@ -6,7 +6,6 @@ from vllm.assets.image import ImageAsset from vllm.assets.video import VideoAsset from vllm.config import CacheConfig, DeviceConfig, ModelConfig, VllmConfig -from vllm.platforms.interface import UnspecifiedPlatform from vllm.sampling_params import SamplingParams from vllm.v1.engine import processor as processor_mod from vllm.v1.engine.processor import Processor @@ -33,15 +32,6 @@ def _mk_processor(monkeypatch, "__post_init__", lambda self, *args: None, raising=True) - monkeypatch.setattr(UnspecifiedPlatform, - "is_async_output_supported", - classmethod(lambda cls, enforce_eager: True), - raising=True) - monkeypatch.setattr( - ModelConfig, - "verify_async_output_proc", - lambda self, parallel_config, speculative_config, device_config: None, - raising=True) monkeypatch.setattr(ModelConfig, "verify_with_parallel_config", lambda self, parallel_config: None, diff --git a/tests/v1/test_oracle.py b/tests/v1/test_oracle.py index 28c24f62895a..f6b8a18dd7c2 100644 --- a/tests/v1/test_oracle.py +++ b/tests/v1/test_oracle.py @@ -29,24 +29,6 @@ def test_unsupported_configs(monkeypatch): }, ).create_engine_config() - with pytest.raises(NotImplementedError): - AsyncEngineArgs( - model=MODEL, - preemption_mode="swap", - ).create_engine_config() - - with pytest.raises(NotImplementedError): - AsyncEngineArgs( - model=MODEL, - disable_async_output_proc=True, - ).create_engine_config() - - with pytest.raises(NotImplementedError): - AsyncEngineArgs( - model=MODEL, - scheduler_delay_factor=1.2, - ).create_engine_config() - def test_enable_by_default_fallback(monkeypatch): with monkeypatch.context() as m: diff --git a/vllm/config/__init__.py b/vllm/config/__init__.py index ddd8de4324f6..e31a78ba33ba 100644 --- a/vllm/config/__init__.py +++ b/vllm/config/__init__.py @@ -454,9 +454,6 @@ def __post_init__(self): self.try_verify_and_update_config() if self.model_config is not None: - self.model_config.verify_async_output_proc(self.parallel_config, - self.speculative_config, - self.device_config) self.model_config.verify_with_parallel_config(self.parallel_config) self.model_config.verify_dual_chunk_attention_config( self.load_config) @@ -877,7 +874,6 @@ def __str__(self): f"served_model_name={self.model_config.served_model_name}, " f"enable_prefix_caching={self.cache_config.enable_prefix_caching}, " f"chunked_prefill_enabled={self.scheduler_config.chunked_prefill_enabled}, " # noqa - f"use_async_output_proc={self.model_config.use_async_output_proc}, " f"pooler_config={self.model_config.pooler_config!r}, " f"compilation_config={self.compilation_config!r}") diff --git a/vllm/config/model.py b/vllm/config/model.py index 921322bb475c..b53029dc8c3e 100644 --- a/vllm/config/model.py +++ b/vllm/config/model.py @@ -223,8 +223,6 @@ class ModelConfig: that this name(s) will also be used in `model_name` tag content of prometheus metrics, if multiple names provided, metrics tag will take the first one.""" - use_async_output_proc: bool = True - """Whether to use async output processor.""" config_format: Union[str, ConfigFormat] = "auto" """The format of the model config to load:\n - "auto" will try to load the config in hf format if available else it @@ -1119,37 +1117,6 @@ def verify_dual_chunk_attention_config( raise ValueError("please set VLLM_ATTENTION_BACKEND to " f"{STR_DUAL_CHUNK_FLASH_ATTN_VAL}") - def verify_async_output_proc(self, parallel_config, speculative_config, - device_config) -> None: - if not self.use_async_output_proc: - # Nothing to check - return - - if parallel_config.pipeline_parallel_size > 1: - self.use_async_output_proc = False - return - - # Reminder: Please update docs/features/compatibility_matrix.md - # If the feature combo become valid - from vllm.platforms import current_platform - if not current_platform.is_async_output_supported(self.enforce_eager): - self.use_async_output_proc = False - return - - if envs.VLLM_USE_RAY_SPMD_WORKER: - self.use_async_output_proc = False - return - - # Async postprocessor is not necessary for pooling models - # since there is no token generation - if self.runner_type == "pooling": - self.use_async_output_proc = False - - # Reminder: Please update docs/features/compatibility_matrix.md - # If the feature combo become valid - if speculative_config: - self.use_async_output_proc = False - def verify_with_parallel_config( self, parallel_config: ParallelConfig, @@ -1173,15 +1140,12 @@ def verify_with_parallel_config( self._verify_with_expert_parallelism() pipeline_parallel_size = parallel_config.pipeline_parallel_size - if pipeline_parallel_size > 1: - if not self.registry.is_pp_supported_model(self.architectures, - self): - raise NotImplementedError( - "Pipeline parallelism is not supported for this model. " - "Supported models implement the `SupportsPP` interface.") - - if self.use_async_output_proc: - self.use_async_output_proc = False + if (pipeline_parallel_size > 1 + and not self.registry.is_pp_supported_model( + self.architectures, self)): + raise NotImplementedError( + "Pipeline parallelism is not supported for this model. " + "Supported models implement the `SupportsPP` interface.") def get_sliding_window(self) -> Optional[int]: """Get the sliding window size from the HF text config if present.""" diff --git a/vllm/config/scheduler.py b/vllm/config/scheduler.py index f0f67bab9d6f..daf094d2df5c 100644 --- a/vllm/config/scheduler.py +++ b/vllm/config/scheduler.py @@ -3,7 +3,7 @@ import hashlib from dataclasses import field -from typing import Any, Literal, Optional, Union +from typing import Any, Literal, Union from pydantic import SkipValidation, model_validator from pydantic.dataclasses import dataclass @@ -18,7 +18,6 @@ logger = init_logger(__name__) RunnerType = Literal["generate", "pooling", "draft"] -PreemptionMode = Literal["swap", "recompute"] SchedulerPolicy = Literal["fcfs", "priority"] @@ -78,10 +77,6 @@ class SchedulerConfig: 3. more than one value (e.g. 1 2 128) is provided, then the capture list will follow the provided list.""" - delay_factor: float = 0.0 - """Apply a delay (of delay factor multiplied by previous - prompt latency) before scheduling next prompt.""" - enable_chunked_prefill: SkipValidation[bool] = None # type: ignore """If True, prefill requests can be chunked based on the remaining max_num_batched_tokens.""" @@ -103,14 +98,6 @@ class SchedulerConfig: NOTE: This is not currently configurable. It will be overridden by max_num_batched_tokens in case max multimodal embedding size is larger.""" - preemption_mode: Optional[PreemptionMode] = None - """Whether to perform preemption by swapping or - recomputation. If not specified, we determine the mode as follows: - We use recomputation by default since it incurs lower overhead than - swapping. However, when the sequence group has multiple sequences - (e.g., beam search), recomputation is not currently supported. In - such a case, we use swapping instead.""" - send_delta_data: bool = False """Private API. If used, scheduler sends delta data to workers instead of an entire data. It should be enabled only diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index 242fcf501bfc..fef4177b3a33 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -409,9 +409,7 @@ class EngineArgs: get_field(LoadConfig, "model_loader_extra_config") ignore_patterns: Optional[Union[str, List[str]]] = LoadConfig.ignore_patterns - preemption_mode: Optional[str] = SchedulerConfig.preemption_mode - scheduler_delay_factor: float = SchedulerConfig.delay_factor enable_chunked_prefill: Optional[ bool] = SchedulerConfig.enable_chunked_prefill disable_chunked_mm_input: bool = SchedulerConfig.disable_chunked_mm_input @@ -439,7 +437,6 @@ class EngineArgs: ObservabilityConfig.otlp_traces_endpoint collect_detailed_traces: Optional[list[DetailedTraceModules]] = \ ObservabilityConfig.collect_detailed_traces - disable_async_output_proc: bool = not ModelConfig.use_async_output_proc scheduling_policy: SchedulerPolicy = SchedulerConfig.policy scheduler_cls: Union[str, Type[object]] = SchedulerConfig.scheduler_cls @@ -561,14 +558,6 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser: **model_kwargs["enable_prompt_embeds"]) model_group.add_argument("--served-model-name", **model_kwargs["served_model_name"]) - # This one is a special case because it is the - # opposite of ModelConfig.use_async_output_proc - model_group.add_argument( - "--disable-async-output-proc", - action="store_true", - default=EngineArgs.disable_async_output_proc, - help="Disable async output processing. This may result in " - "lower performance.") model_group.add_argument("--config-format", **model_kwargs["config_format"]) # This one is a special case because it can bool @@ -897,10 +886,6 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser: **scheduler_kwargs["long_prefill_token_threshold"]) scheduler_group.add_argument("--num-lookahead-slots", **scheduler_kwargs["num_lookahead_slots"]) - scheduler_group.add_argument("--scheduler-delay-factor", - **scheduler_kwargs["delay_factor"]) - scheduler_group.add_argument("--preemption-mode", - **scheduler_kwargs["preemption_mode"]) # multi-step scheduling has been removed; corresponding arguments # are no longer supported. scheduler_group.add_argument("--scheduling-policy", @@ -1029,7 +1014,6 @@ def create_model_config(self) -> ModelConfig: interleave_mm_strings=self.interleave_mm_strings, media_io_kwargs=self.media_io_kwargs, skip_mm_profiling=self.skip_mm_profiling, - use_async_output_proc=not self.disable_async_output_proc, config_format=self.config_format, mm_processor_kwargs=self.mm_processor_kwargs, mm_processor_cache_gb=self.mm_processor_cache_gb, @@ -1395,11 +1379,9 @@ def create_engine_config( max_model_len=model_config.max_model_len, cuda_graph_sizes=self.cuda_graph_sizes, num_lookahead_slots=num_lookahead_slots, - delay_factor=self.scheduler_delay_factor, enable_chunked_prefill=self.enable_chunked_prefill, disable_chunked_mm_input=self.disable_chunked_mm_input, is_multimodal_model=model_config.is_multimodal_model, - preemption_mode=self.preemption_mode, send_delta_data=(envs.VLLM_USE_RAY_SPMD_WORKER and parallel_config.use_ray), policy=self.scheduling_policy, @@ -1492,22 +1474,6 @@ def _is_v1_supported_oracle(self, model_config: ModelConfig) -> bool: recommend_to_remove=False) return False - if self.preemption_mode != SchedulerConfig.preemption_mode: - _raise_or_fallback(feature_name="--preemption-mode", - recommend_to_remove=True) - return False - - if (self.disable_async_output_proc - != EngineArgs.disable_async_output_proc): - _raise_or_fallback(feature_name="--disable-async-output-proc", - recommend_to_remove=True) - return False - - if self.scheduler_delay_factor != SchedulerConfig.delay_factor: - _raise_or_fallback(feature_name="--scheduler-delay-factor", - recommend_to_remove=True) - return False - # No Mamba or Encoder-Decoder so far. if not model_config.is_v1_compatible: _raise_or_fallback(feature_name=model_config.architectures, diff --git a/vllm/entrypoints/llm.py b/vllm/entrypoints/llm.py index 0ab806fcb8b5..092d3f276d1c 100644 --- a/vllm/entrypoints/llm.py +++ b/vllm/entrypoints/llm.py @@ -137,8 +137,6 @@ class LLM: back to the eager mode. disable_custom_all_reduce: See [ParallelConfig][vllm.config.ParallelConfig]. - disable_async_output_proc: Disable async output processing. - This may result in lower performance. hf_token: The token to use as HTTP bearer authorization for remote files . If `True`, will use the token generated when running `huggingface-cli login` (stored in `~/.huggingface`). @@ -188,7 +186,6 @@ def __init__( enforce_eager: bool = False, max_seq_len_to_capture: int = 8192, disable_custom_all_reduce: bool = False, - disable_async_output_proc: bool = False, hf_token: Optional[Union[bool, str]] = None, hf_overrides: Optional[HfOverrides] = None, mm_processor_kwargs: Optional[dict[str, Any]] = None, @@ -286,7 +283,6 @@ def __init__( enforce_eager=enforce_eager, max_seq_len_to_capture=max_seq_len_to_capture, disable_custom_all_reduce=disable_custom_all_reduce, - disable_async_output_proc=disable_async_output_proc, hf_token=hf_token, hf_overrides=hf_overrides, mm_processor_kwargs=mm_processor_kwargs, diff --git a/vllm/executor/uniproc_executor.py b/vllm/executor/uniproc_executor.py index 3b566e88a9ec..7a753d608a43 100644 --- a/vllm/executor/uniproc_executor.py +++ b/vllm/executor/uniproc_executor.py @@ -137,10 +137,6 @@ class ExecutorWithExternalLauncher(UniProcExecutor): def _init_executor(self) -> None: """Initialize the worker and load the model. """ - assert self.vllm_config.scheduler_config.delay_factor == 0.0, \ - ("ExecutorWithExternalLauncher needs deterministic " - "execution, so it" - "does not support delay_factor in scheduling") if envs.VLLM_USE_V1: assert not envs.VLLM_ENABLE_V1_MULTIPROCESSING, \ ("To get deterministic execution in V1, " diff --git a/vllm/platforms/cpu.py b/vllm/platforms/cpu.py index 544e091491bf..cd41832bc2ea 100644 --- a/vllm/platforms/cpu.py +++ b/vllm/platforms/cpu.py @@ -126,10 +126,6 @@ def set_device(cls, device: torch.device) -> None: """ torch.cpu.set_device(device) - @classmethod - def is_async_output_supported(cls, enforce_eager: Optional[bool]) -> bool: - return False - @classmethod def inference_mode(cls): return torch.no_grad() diff --git a/vllm/platforms/cuda.py b/vllm/platforms/cuda.py index 87d8f2b7481b..c263e2afe83b 100644 --- a/vllm/platforms/cuda.py +++ b/vllm/platforms/cuda.py @@ -96,16 +96,6 @@ def get_device_name(cls, device_id: int = 0) -> str: def get_device_total_memory(cls, device_id: int = 0) -> int: raise NotImplementedError - @classmethod - def is_async_output_supported(cls, enforce_eager: Optional[bool]) -> bool: - if enforce_eager and not envs.VLLM_USE_V1: - logger.warning( - "To see benefits of async output processing, enable CUDA " - "graph. Since, enforce-eager is enabled, async output " - "processor cannot be used") - return False - return True - @classmethod def is_fully_connected(cls, device_ids: list[int]) -> bool: raise NotImplementedError diff --git a/vllm/platforms/interface.py b/vllm/platforms/interface.py index 53fc762dce54..c43580ac5da1 100644 --- a/vllm/platforms/interface.py +++ b/vllm/platforms/interface.py @@ -275,13 +275,6 @@ def get_device_total_memory(cls, device_id: int = 0) -> int: """Get the total memory of a device in bytes.""" raise NotImplementedError - @classmethod - def is_async_output_supported(cls, enforce_eager: Optional[bool]) -> bool: - """ - Check if the current platform supports async output. - """ - raise NotImplementedError - @classmethod def inference_mode(cls): """A device-specific wrapper of `torch.inference_mode`. diff --git a/vllm/platforms/rocm.py b/vllm/platforms/rocm.py index 4f540fe965e2..dce2924ac7a9 100644 --- a/vllm/platforms/rocm.py +++ b/vllm/platforms/rocm.py @@ -310,16 +310,6 @@ def get_device_total_memory(cls, device_id: int = 0) -> int: device_props = torch.cuda.get_device_properties(device_id) return device_props.total_memory - @classmethod - def is_async_output_supported(cls, enforce_eager: Optional[bool]) -> bool: - if enforce_eager and not envs.VLLM_USE_V1: - logger.warning( - "To see benefits of async output processing, enable CUDA " - "graph. Since, enforce-eager is enabled, async output " - "processor cannot be used") - return False - return True - @classmethod def check_and_update_config(cls, vllm_config: "VllmConfig") -> None: from vllm.config.compilation import CUDAGraphMode diff --git a/vllm/platforms/tpu.py b/vllm/platforms/tpu.py index 4e4db116abca..9852d948bc4b 100644 --- a/vllm/platforms/tpu.py +++ b/vllm/platforms/tpu.py @@ -75,10 +75,6 @@ def get_device_name(cls, device_id: int = 0) -> str: def get_device_total_memory(cls, device_id: int = 0) -> int: raise NotImplementedError - @classmethod - def is_async_output_supported(cls, enforce_eager: Optional[bool]) -> bool: - return False - @classmethod def get_punica_wrapper(cls) -> str: return "vllm.lora.punica_wrapper.punica_tpu.PunicaWrapperTPU" diff --git a/vllm/platforms/xpu.py b/vllm/platforms/xpu.py index 67ef058df10f..4d3bef4b4294 100644 --- a/vllm/platforms/xpu.py +++ b/vllm/platforms/xpu.py @@ -98,10 +98,6 @@ def get_device_total_memory(cls, device_id: int = 0) -> int: device_props = torch.xpu.get_device_properties(device_id) return device_props.total_memory - @classmethod - def is_async_output_supported(cls, enforce_eager: Optional[bool]) -> bool: - return True - @classmethod def inference_mode(cls): return torch.no_grad() From c438b2951c5adafd4db831605696d097ab3e513a Mon Sep 17 00:00:00 2001 From: Rahul Tuli Date: Sun, 21 Sep 2025 22:34:45 +0530 Subject: [PATCH 212/518] feat: Enable engine-level arguments with speculators models (#25250) Signed-off-by: Rahul Tuli Co-authored-by: Claude --- .../speculators/test_eagle3.py | 54 ++++++++++++------- vllm/config/model.py | 12 +---- vllm/engine/arg_utils.py | 35 +++++------- vllm/transformers_utils/config.py | 46 +++++++++++++--- .../configs/speculators/base.py | 52 ++++++++++++------ 5 files changed, 121 insertions(+), 78 deletions(-) diff --git a/tests/speculative_decoding/speculators/test_eagle3.py b/tests/speculative_decoding/speculators/test_eagle3.py index 45ddb2178722..368238b3a720 100644 --- a/tests/speculative_decoding/speculators/test_eagle3.py +++ b/tests/speculative_decoding/speculators/test_eagle3.py @@ -3,38 +3,52 @@ import pytest import torch +from vllm.config import SpeculativeConfig from vllm.model_executor.models.interfaces import supports_eagle3 -@pytest.mark.parametrize( - "model_path", - [("nm-testing/SpeculatorLlama3-1-8B-Eagle3-converted-0717-quantized")]) -def test_llama(vllm_runner, example_prompts, model_path, monkeypatch): +@pytest.mark.parametrize("model_path", [ + pytest.param( + "nm-testing/SpeculatorLlama3-1-8B-Eagle3-converted-0717-quantized", + id="llama3-eagle3-speculator"), + pytest.param( + "nm-testing/Speculator-Qwen3-8B-Eagle3-converted-071-quantized", + id="qwen3-eagle3-speculator"), +]) +def test_eagle3_speculators_model(vllm_runner, example_prompts, model_path, + monkeypatch): + """ + Test Eagle3 speculators models properly initialize speculative decoding. + + This test verifies: + 1. Eagle3 support is detected for the model + 2. Speculative config is automatically initialized from embedded config + 3. The draft model path is correctly set to the speculators model + 4. Speculative tokens count is valid + 5. Text generation works with speculative decoding enabled + """ # Set environment variable for V1 engine serialization monkeypatch.setenv("VLLM_ALLOW_INSECURE_SERIALIZATION", "1") with vllm_runner(model_path, dtype=torch.bfloat16) as vllm_model: + # Verify Eagle3 support is detected eagle3_supported = vllm_model.apply_model(supports_eagle3) - assert eagle3_supported + assert eagle3_supported, f"Eagle3 should be supported for {model_path}" - vllm_outputs = vllm_model.generate_greedy(example_prompts, - max_tokens=20) - print(vllm_outputs) - assert vllm_outputs + vllm_config = vllm_model.llm.llm_engine.vllm_config + assert isinstance(vllm_config.speculative_config, SpeculativeConfig), \ + "Speculative config should be initialized for speculators model" -@pytest.mark.parametrize( - "model_path", - [("nm-testing/Speculator-Qwen3-8B-Eagle3-converted-071-quantized")]) -def test_qwen(vllm_runner, example_prompts, model_path, monkeypatch): - # Set environment variable for V1 engine serialization - monkeypatch.setenv("VLLM_ALLOW_INSECURE_SERIALIZATION", "1") + spec_config = vllm_config.speculative_config + assert spec_config.num_speculative_tokens > 0, \ + (f"Expected positive speculative tokens, " + f"got {spec_config.num_speculative_tokens}") - with vllm_runner(model_path, dtype=torch.bfloat16) as vllm_model: - eagle3_supported = vllm_model.apply_model(supports_eagle3) - assert eagle3_supported + assert spec_config.model == model_path, \ + f"Draft model should be {model_path}, got {spec_config.model}" vllm_outputs = vllm_model.generate_greedy(example_prompts, max_tokens=20) - print(vllm_outputs) - assert vllm_outputs + assert vllm_outputs, \ + f"No outputs generated for speculators model {model_path}" diff --git a/vllm/config/model.py b/vllm/config/model.py index b53029dc8c3e..95fe52883db0 100644 --- a/vllm/config/model.py +++ b/vllm/config/model.py @@ -27,8 +27,7 @@ ConfigFormat, get_config, get_hf_image_processor_config, get_hf_text_config, get_pooling_config, get_sentence_transformer_tokenizer_config, is_encoder_decoder, - is_interleaved, maybe_override_with_speculators_target_model, - try_get_generation_config, try_get_safetensors_metadata, + is_interleaved, try_get_generation_config, try_get_safetensors_metadata, try_get_tokenizer_config, uses_mrope) from vllm.transformers_utils.runai_utils import (ObjectStorageModel, is_runai_obj_uri) @@ -416,15 +415,6 @@ def __post_init__( self.maybe_pull_model_tokenizer_for_runai(self.model, self.tokenizer) - if self.runner != "draft": - # If we're not running the draft model, check for speculators config - # If speculators config, set model / tokenizer to be target model - self.model, self.tokenizer = maybe_override_with_speculators_target_model( # noqa: E501 - model=self.model, - tokenizer=self.tokenizer, - revision=self.revision, - trust_remote_code=self.trust_remote_code) - if (backend := envs.VLLM_ATTENTION_BACKEND ) and backend == "FLASHINFER" and find_spec("flashinfer") is None: raise ValueError( diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index fef4177b3a33..7e00260caa39 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -41,7 +41,8 @@ from vllm.ray.lazy_utils import is_ray_initialized from vllm.reasoning import ReasoningParserManager from vllm.test_utils import MODEL_WEIGHTS_S3_BUCKET, MODELS_ON_S3 -from vllm.transformers_utils.config import get_model_path, is_interleaved +from vllm.transformers_utils.config import (get_model_path, is_interleaved, + maybe_override_with_speculators) from vllm.transformers_utils.utils import check_gguf_file from vllm.utils import (STR_DUAL_CHUNK_FLASH_ATTN_VAL, FlexibleArgumentParser, GiB_bytes, get_ip, is_in_ray_actor) @@ -1082,29 +1083,8 @@ def create_speculative_config( provided as a JSON string input via CLI arguments or directly as a dictionary from the engine. """ - - from vllm.transformers_utils.config import get_config - from vllm.transformers_utils.configs.speculators.base import ( - SpeculatorsConfig) - if self.speculative_config is None: - hf_config = get_config( - self.hf_config_path or target_model_config.model, - self.trust_remote_code, self.revision, self.code_revision, - self.config_format) - - # if loading a SpeculatorsConfig, load the speculative_config - # details from the config directly - # no user input required / expected - if isinstance(hf_config, SpeculatorsConfig): - # We create one since we don't create one - self.speculative_config = {} - self.speculative_config[ - "num_speculative_tokens"] = hf_config.num_lookahead_tokens - self.speculative_config["model"] = target_model_config.model - self.speculative_config["method"] = hf_config.method - else: - return None + return None # Note(Shangming): These parameters are not obtained from the cli arg # '--speculative-config' and must be passed in when creating the engine @@ -1139,6 +1119,15 @@ def create_engine_config( device_config = DeviceConfig( device=cast(Device, current_platform.device_type)) + + (self.model, self.tokenizer, + self.speculative_config) = maybe_override_with_speculators( + model=self.model, + tokenizer=self.tokenizer, + revision=self.revision, + trust_remote_code=self.trust_remote_code, + vllm_speculative_config=self.speculative_config, + ) model_config = self.create_model_config() # * If VLLM_USE_V1 is unset, we enable V1 for "supported features" diff --git a/vllm/transformers_utils/config.py b/vllm/transformers_utils/config.py index 52e2c18a7784..9eed46678866 100644 --- a/vllm/transformers_utils/config.py +++ b/vllm/transformers_utils/config.py @@ -463,15 +463,29 @@ def _maybe_remap_hf_config_attrs(config: PretrainedConfig) -> PretrainedConfig: return config -def maybe_override_with_speculators_target_model( +def maybe_override_with_speculators( model: str, tokenizer: str, trust_remote_code: bool, revision: Optional[str] = None, + vllm_speculative_config: Optional[dict[str, Any]] = None, **kwargs, -) -> tuple[str, str]: +) -> tuple[str, str, Optional[dict[str, Any]]]: """ - If running a speculators config, override running model with target model + Resolve model configuration when speculators are detected. + + Checks if the provided model is a speculators model and if so, extracts + the target model configuration and builds the speculative config. + + Args: + model: Model name or path + tokenizer: Tokenizer name or path + trust_remote_code: Whether to trust remote code + revision: Model revision + vllm_speculative_config: Existing vLLM speculative config + + Returns: + Tuple of (resolved_model, resolved_tokenizer, speculative_config) """ is_gguf = check_gguf_file(model) if is_gguf: @@ -487,11 +501,27 @@ def maybe_override_with_speculators_target_model( token=_get_hf_token(), **kwargs, ) - spec_config = config_dict.get("speculators_config", None) - # Return the target model - if spec_config is not None: - model = tokenizer = spec_config["verifier"]["name_or_path"] - return model, tokenizer + speculators_config = config_dict.get("speculators_config") + + if speculators_config is None: + # No speculators config found, return original values + return model, tokenizer, vllm_speculative_config + + # Speculators format detected - process overrides + from vllm.transformers_utils.configs.speculators.base import ( + SpeculatorsConfig) + + vllm_speculative_config = SpeculatorsConfig.extract_vllm_speculative_config( + config_dict=config_dict) + + # Set the draft model to the speculators model + vllm_speculative_config["model"] = model + + # Override model and tokenizer with the verifier model from config + verifier_model = speculators_config["verifier"]["name_or_path"] + model = tokenizer = verifier_model + + return model, tokenizer, vllm_speculative_config def get_config( diff --git a/vllm/transformers_utils/configs/speculators/base.py b/vllm/transformers_utils/configs/speculators/base.py index d7c16e180c70..53128b4eecb0 100644 --- a/vllm/transformers_utils/configs/speculators/base.py +++ b/vllm/transformers_utils/configs/speculators/base.py @@ -24,6 +24,12 @@ def from_pretrained( config_dict, _ = cls.get_config_dict(pretrained_model_name_or_path, **kwargs) + vllm_config = cls.extract_vllm_speculative_config(config_dict) + return cls(**vllm_config) + + @classmethod + def extract_vllm_speculative_config( + cls, config_dict: dict[str, Any]) -> dict[str, Any]: speculators_model_type = config_dict.get("speculators_model_type") if speculators_model_type not in SUPPORTED_SPECULATORS_TYPES: raise ValueError( @@ -34,11 +40,12 @@ def from_pretrained( # TODO: @dsikka - use speculators pydantic model to validate cls.validate_speculators_config(config_dict=config_dict) # Convert from speculators config -> format that can be ingested by vLLM - vllm_config = cls.convert_speculators_to_vllm(config_dict=config_dict) + vllm_config = cls.build_vllm_speculative_config( + config_dict=config_dict) # Apply anything specific to the supported algorithm algo_updater = SUPPORTED_SPECULATORS_TYPES[speculators_model_type] algo_updater(config_dict=config_dict, vllm_config=vllm_config) - return cls(**vllm_config) + return vllm_config @classmethod def validate_speculators_config(cls, config_dict: dict[str, Any]) -> None: @@ -60,32 +67,45 @@ def validate_speculators_config(cls, config_dict: dict[str, Any]) -> None: "'transformer_layer_config' must be a dictionary if provided") @classmethod - def convert_speculators_to_vllm( + def build_vllm_speculative_config( cls, config_dict: dict[str, Any]) -> dict[str, Any]: """ - Convert speculators config format to vLLM format. - - This method handles the translation of field names and structure - between speculators and vLLM formats. - + Build vLLM-compatible speculative configuration from speculators format. + + This method extracts and transforms speculative configuration from the + speculators format into the structure expected by vLLM. + + Args: + config_dict: Configuration dictionary in speculators format + Returns: - Dictionary with vLLM-compatible configuration + Dictionary with vLLM-compatible speculative configuration """ - # Currently we only support one proposal method + # Extract speculators configuration spec_config = config_dict["speculators_config"] - first_method = spec_config.get("proposal_methods")[0] - num_lookahead_tokens = first_method.get("speculative_tokens") - if num_lookahead_tokens is None: + # Currently we only support one proposal method + proposal_methods = spec_config.get("proposal_methods") + if not proposal_methods: + raise ValueError("No proposal methods found in speculators config") + + first_method = proposal_methods[0] + num_speculative_tokens = first_method.get("speculative_tokens") + + if num_speculative_tokens is None: raise ValueError( "Missing 'speculative_tokens' in proposal method. " f"Got: {first_method}") - # Build base vLLM config + # Build base vLLM speculative configuration vllm_config = { "method": config_dict.get("speculators_model_type"), - "num_lookahead_tokens": num_lookahead_tokens, + "num_speculative_tokens": num_speculative_tokens, "target_model": spec_config.get("verifier")["name_or_path"] } - vllm_config.update(config_dict["transformer_layer_config"]) + + # Merge transformer layer configuration if present + transformer_config = config_dict.get("transformer_layer_config", {}) + vllm_config.update(transformer_config) + return vllm_config From 1c3ffdbeccaf75f7b885b1700a4011fb4f63a7ab Mon Sep 17 00:00:00 2001 From: Woosuk Kwon Date: Sun, 21 Sep 2025 10:37:11 -0700 Subject: [PATCH 213/518] [V0 Deprecation] Remove V0 sampling metadata (#25345) Signed-off-by: Woosuk Kwon --- .../vllm_add_dummy_model/my_llava.py | 8 +++----- .../vllm_add_dummy_model/vllm_add_dummy_model/my_opt.py | 8 +++----- vllm/model_executor/__init__.py | 2 -- vllm/model_executor/layers/logits_processor.py | 2 -- vllm/model_executor/models/apertus.py | 5 +---- vllm/model_executor/models/arcee.py | 7 +++---- vllm/model_executor/models/arctic.py | 5 +---- vllm/model_executor/models/aria.py | 7 ++----- vllm/model_executor/models/aya_vision.py | 5 +---- vllm/model_executor/models/baichuan.py | 5 +---- vllm/model_executor/models/bailing_moe.py | 5 +---- vllm/model_executor/models/bamba.py | 5 +---- vllm/model_executor/models/blip2.py | 5 +---- vllm/model_executor/models/bloom.py | 5 +---- vllm/model_executor/models/chameleon.py | 5 +---- vllm/model_executor/models/chatglm.py | 5 +---- vllm/model_executor/models/cohere2_vision.py | 5 +---- vllm/model_executor/models/commandr.py | 6 ++---- vllm/model_executor/models/dbrx.py | 5 +---- vllm/model_executor/models/deepseek.py | 5 +---- vllm/model_executor/models/deepseek_eagle.py | 5 +---- vllm/model_executor/models/deepseek_mtp.py | 9 ++------- vllm/model_executor/models/deepseek_v2.py | 5 +---- vllm/model_executor/models/deepseek_vl2.py | 5 +---- vllm/model_executor/models/dots1.py | 5 +---- vllm/model_executor/models/ernie45_moe.py | 5 +---- vllm/model_executor/models/ernie45_vl.py | 5 +---- vllm/model_executor/models/ernie45_vl_moe.py | 5 +---- vllm/model_executor/models/ernie_mtp.py | 8 ++------ vllm/model_executor/models/exaone.py | 5 +---- vllm/model_executor/models/exaone4.py | 5 +---- vllm/model_executor/models/falcon.py | 5 +---- vllm/model_executor/models/falcon_h1.py | 5 +---- vllm/model_executor/models/fuyu.py | 4 +--- vllm/model_executor/models/gemma.py | 5 +---- vllm/model_executor/models/gemma2.py | 5 +---- vllm/model_executor/models/gemma3.py | 5 +---- vllm/model_executor/models/gemma3_mm.py | 5 +---- vllm/model_executor/models/gemma3n.py | 5 +---- vllm/model_executor/models/gemma3n_mm.py | 5 +---- vllm/model_executor/models/glm4.py | 5 +---- vllm/model_executor/models/glm4_1v.py | 5 +---- vllm/model_executor/models/glm4_moe.py | 5 +---- vllm/model_executor/models/glm4_moe_mtp.py | 9 ++------- vllm/model_executor/models/gpt2.py | 5 +---- vllm/model_executor/models/gpt_bigcode.py | 5 +---- vllm/model_executor/models/gpt_j.py | 4 +--- vllm/model_executor/models/gpt_neox.py | 5 +---- vllm/model_executor/models/gpt_oss.py | 7 ++----- vllm/model_executor/models/granite.py | 9 +++------ vllm/model_executor/models/granite_speech.py | 7 +------ vllm/model_executor/models/granitemoe.py | 9 +++------ vllm/model_executor/models/granitemoehybrid.py | 5 +---- vllm/model_executor/models/granitemoeshared.py | 9 +++------ vllm/model_executor/models/grok1.py | 5 +---- vllm/model_executor/models/hunyuan_v1.py | 5 +---- vllm/model_executor/models/hyperclovax_vision.py | 5 +---- vllm/model_executor/models/idefics3.py | 7 ++----- vllm/model_executor/models/interfaces_base.py | 3 --- vllm/model_executor/models/internlm2.py | 5 +---- vllm/model_executor/models/interns1.py | 5 +---- vllm/model_executor/models/internvl.py | 5 +---- vllm/model_executor/models/jais.py | 5 +---- vllm/model_executor/models/jamba.py | 5 +---- vllm/model_executor/models/keye.py | 5 +---- vllm/model_executor/models/kimi_vl.py | 5 +---- vllm/model_executor/models/lfm2.py | 7 ++----- vllm/model_executor/models/llama.py | 5 +---- vllm/model_executor/models/llama_eagle3.py | 5 +---- vllm/model_executor/models/llava.py | 5 +---- vllm/model_executor/models/llava_next.py | 5 +---- vllm/model_executor/models/llava_next_video.py | 5 +---- vllm/model_executor/models/llava_onevision.py | 5 +---- vllm/model_executor/models/mamba.py | 7 ++----- vllm/model_executor/models/mamba2.py | 7 ++----- vllm/model_executor/models/medusa.py | 3 +-- vllm/model_executor/models/midashenglm.py | 4 +--- vllm/model_executor/models/mimo.py | 5 +---- vllm/model_executor/models/mimo_mtp.py | 8 ++------ vllm/model_executor/models/minicpm.py | 5 +---- vllm/model_executor/models/minicpm_eagle.py | 5 +---- vllm/model_executor/models/minicpmv.py | 4 +--- vllm/model_executor/models/minimax_text_01.py | 7 ++----- vllm/model_executor/models/minimax_vl_01.py | 5 +---- vllm/model_executor/models/mistral3.py | 5 +---- vllm/model_executor/models/mixtral.py | 5 +---- vllm/model_executor/models/mllama4.py | 5 +---- vllm/model_executor/models/molmo.py | 7 ++----- vllm/model_executor/models/mpt.py | 5 +---- vllm/model_executor/models/nano_nemotron_vl.py | 5 +---- vllm/model_executor/models/nemotron.py | 5 +---- vllm/model_executor/models/nemotron_h.py | 5 +---- vllm/model_executor/models/nemotron_nas.py | 5 +---- vllm/model_executor/models/nemotron_vl.py | 5 +---- vllm/model_executor/models/olmo.py | 5 +---- vllm/model_executor/models/olmo2.py | 5 +---- vllm/model_executor/models/olmoe.py | 7 ++----- vllm/model_executor/models/opt.py | 5 +---- vllm/model_executor/models/orion.py | 5 +---- vllm/model_executor/models/ovis.py | 4 +--- vllm/model_executor/models/ovis2_5.py | 4 +--- vllm/model_executor/models/paligemma.py | 5 +---- vllm/model_executor/models/persimmon.py | 5 +---- vllm/model_executor/models/phi.py | 4 +--- vllm/model_executor/models/phi3v.py | 5 +---- vllm/model_executor/models/phi4_multimodal.py | 5 +---- vllm/model_executor/models/phi4flash.py | 3 --- vllm/model_executor/models/phi4mm.py | 5 +---- vllm/model_executor/models/phimoe.py | 7 ++----- vllm/model_executor/models/pixtral.py | 5 +---- vllm/model_executor/models/plamo2.py | 5 +---- vllm/model_executor/models/qwen.py | 5 +---- vllm/model_executor/models/qwen2.py | 5 +---- vllm/model_executor/models/qwen2_5_omni_thinker.py | 5 +---- vllm/model_executor/models/qwen2_5_vl.py | 5 +---- vllm/model_executor/models/qwen2_audio.py | 5 +---- vllm/model_executor/models/qwen2_moe.py | 5 +---- vllm/model_executor/models/qwen2_vl.py | 5 +---- vllm/model_executor/models/qwen3.py | 5 +---- vllm/model_executor/models/qwen3_moe.py | 5 +---- vllm/model_executor/models/qwen3_next.py | 5 +---- vllm/model_executor/models/qwen3_next_mtp.py | 5 +---- vllm/model_executor/models/qwen3_vl.py | 5 +---- vllm/model_executor/models/seed_oss.py | 5 +---- vllm/model_executor/models/skyworkr1v.py | 5 +---- vllm/model_executor/models/solar.py | 7 ++----- vllm/model_executor/models/stablelm.py | 5 +---- vllm/model_executor/models/starcoder2.py | 5 +---- vllm/model_executor/models/step3_text.py | 7 ++----- vllm/model_executor/models/step3_vl.py | 5 +---- vllm/model_executor/models/tarsier.py | 5 +---- vllm/model_executor/models/transformers.py | 5 +---- vllm/model_executor/models/ultravox.py | 7 ++----- vllm/model_executor/models/voxtral.py | 5 +---- vllm/model_executor/models/whisper.py | 7 ++----- vllm/model_executor/models/zamba2.py | 5 +---- vllm/model_executor/sampling_metadata.py | 7 ------- vllm/v1/spec_decode/eagle.py | 9 +++------ vllm/v1/spec_decode/medusa.py | 2 +- vllm/v1/worker/gpu_model_runner.py | 9 ++++----- vllm/v1/worker/tpu_model_runner.py | 2 +- 141 files changed, 172 insertions(+), 583 deletions(-) delete mode 100644 vllm/model_executor/sampling_metadata.py diff --git a/tests/plugins/vllm_add_dummy_model/vllm_add_dummy_model/my_llava.py b/tests/plugins/vllm_add_dummy_model/vllm_add_dummy_model/my_llava.py index da97cf7e2b40..b431ad1ed092 100644 --- a/tests/plugins/vllm_add_dummy_model/vllm_add_dummy_model/my_llava.py +++ b/tests/plugins/vllm_add_dummy_model/vllm_add_dummy_model/my_llava.py @@ -9,7 +9,6 @@ LlavaForConditionalGeneration, LlavaMultiModalProcessor, LlavaProcessingInfo) -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.multimodal import MULTIMODAL_REGISTRY @@ -18,11 +17,10 @@ dummy_inputs=LlavaDummyInputsBuilder) class MyLlava(LlavaForConditionalGeneration): - def compute_logits( - self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata) -> Optional[torch.Tensor]: + def compute_logits(self, + hidden_states: torch.Tensor) -> Optional[torch.Tensor]: # this dummy model always predicts the first token - logits = super().compute_logits(hidden_states, sampling_metadata) + logits = super().compute_logits(hidden_states) if logits is not None: logits.zero_() logits[:, 0] += 1.0 diff --git a/tests/plugins/vllm_add_dummy_model/vllm_add_dummy_model/my_opt.py b/tests/plugins/vllm_add_dummy_model/vllm_add_dummy_model/my_opt.py index 8c34407e3e07..a6fafff98e9c 100644 --- a/tests/plugins/vllm_add_dummy_model/vllm_add_dummy_model/my_opt.py +++ b/tests/plugins/vllm_add_dummy_model/vllm_add_dummy_model/my_opt.py @@ -6,16 +6,14 @@ import torch from vllm.model_executor.models.opt import OPTForCausalLM -from vllm.model_executor.sampling_metadata import SamplingMetadata class MyOPTForCausalLM(OPTForCausalLM): - def compute_logits( - self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata) -> Optional[torch.Tensor]: + def compute_logits(self, + hidden_states: torch.Tensor) -> Optional[torch.Tensor]: # this dummy model always predicts the first token - logits = super().compute_logits(hidden_states, sampling_metadata) + logits = super().compute_logits(hidden_states) if logits is not None: logits.zero_() logits[:, 0] += 1.0 diff --git a/vllm/model_executor/__init__.py b/vllm/model_executor/__init__.py index a59aebfac4ff..3c094cfdb553 100644 --- a/vllm/model_executor/__init__.py +++ b/vllm/model_executor/__init__.py @@ -3,11 +3,9 @@ from vllm.model_executor.parameter import (BasevLLMParameter, PackedvLLMParameter) -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.model_executor.utils import set_random_seed __all__ = [ - "SamplingMetadata", "set_random_seed", "BasevLLMParameter", "PackedvLLMParameter", diff --git a/vllm/model_executor/layers/logits_processor.py b/vllm/model_executor/layers/logits_processor.py index 8226437cb189..2110aa2769b9 100644 --- a/vllm/model_executor/layers/logits_processor.py +++ b/vllm/model_executor/layers/logits_processor.py @@ -10,7 +10,6 @@ from vllm.model_executor.custom_op import CustomOp from vllm.model_executor.layers.vocab_parallel_embedding import ( VocabParallelEmbedding) -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.platforms import current_platform @@ -50,7 +49,6 @@ def forward( self, lm_head: VocabParallelEmbedding, hidden_states: torch.Tensor, - sampling_metadata: Optional[SamplingMetadata] = None, embedding_bias: Optional[torch.Tensor] = None, ) -> Optional[torch.Tensor]: if self.logits_as_input: diff --git a/vllm/model_executor/models/apertus.py b/vllm/model_executor/models/apertus.py index f6400b05e110..6dab4ed14345 100644 --- a/vllm/model_executor/models/apertus.py +++ b/vllm/model_executor/models/apertus.py @@ -48,7 +48,6 @@ DEFAULT_VOCAB_PADDING_SIZE, ParallelLMHead, VocabParallelEmbedding) from vllm.model_executor.model_loader.weight_utils import ( default_weight_loader, maybe_remap_kv_scale_name) -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.sequence import IntermediateTensors from .interfaces import SupportsLoRA, SupportsPP @@ -566,10 +565,8 @@ def forward( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - logits = self.logits_processor(self.lm_head, hidden_states, - sampling_metadata) + logits = self.logits_processor(self.lm_head, hidden_states) return logits def load_weights(self, weights: Iterable[tuple[str, diff --git a/vllm/model_executor/models/arcee.py b/vllm/model_executor/models/arcee.py index be82c2fd5964..1ee378af76c9 100644 --- a/vllm/model_executor/models/arcee.py +++ b/vllm/model_executor/models/arcee.py @@ -399,11 +399,10 @@ def forward( inputs_embeds=inputs_embeds) return model_output - def compute_logits(self, hidden_states: torch.Tensor, - sampling_metadata) -> Optional[torch.Tensor]: + def compute_logits(self, + hidden_states: torch.Tensor) -> Optional[torch.Tensor]: # Compute final logits from hidden states (last pipeline rank only) - logits = self.logits_processor(self.lm_head, hidden_states, - sampling_metadata) + logits = self.logits_processor(self.lm_head, hidden_states) return logits def get_input_embeddings(self, input_ids: torch.Tensor) -> torch.Tensor: diff --git a/vllm/model_executor/models/arctic.py b/vllm/model_executor/models/arctic.py index b6dd55996841..55d16fd75ceb 100644 --- a/vllm/model_executor/models/arctic.py +++ b/vllm/model_executor/models/arctic.py @@ -30,7 +30,6 @@ from vllm.model_executor.layers.vocab_parallel_embedding import ( ParallelLMHead, VocabParallelEmbedding) from vllm.model_executor.model_loader.weight_utils import default_weight_loader -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.model_executor.utils import set_weight_attrs from vllm.platforms import current_platform from vllm.sequence import IntermediateTensors @@ -456,10 +455,8 @@ def forward( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - logits = self.logits_processor(self.lm_head, hidden_states, - sampling_metadata) + logits = self.logits_processor(self.lm_head, hidden_states) return logits def load_weights(self, weights: Iterable[tuple[str, diff --git a/vllm/model_executor/models/aria.py b/vllm/model_executor/models/aria.py index a7cb6b35a4ab..35c1adbdd00b 100644 --- a/vllm/model_executor/models/aria.py +++ b/vllm/model_executor/models/aria.py @@ -19,7 +19,6 @@ from vllm.model_executor.layers.vocab_parallel_embedding import ParallelLMHead from vllm.model_executor.model_loader.weight_utils import ( default_weight_loader, maybe_remap_kv_scale_name) -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.multimodal import MULTIMODAL_REGISTRY from vllm.multimodal.inputs import (MultiModalDataDict, MultiModalFieldConfig, MultiModalKwargsItems) @@ -644,10 +643,8 @@ def forward( return hidden_states - def compute_logits(self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata) -> torch.Tensor: - logits = self.logits_processor(self.lm_head, hidden_states, - sampling_metadata) + def compute_logits(self, hidden_states: torch.Tensor) -> torch.Tensor: + logits = self.logits_processor(self.lm_head, hidden_states) return logits def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]): diff --git a/vllm/model_executor/models/aya_vision.py b/vllm/model_executor/models/aya_vision.py index 687c82ded9d0..0f05f9b4efcd 100644 --- a/vllm/model_executor/models/aya_vision.py +++ b/vllm/model_executor/models/aya_vision.py @@ -16,7 +16,6 @@ get_optimal_tiled_canvas) from vllm.config import VllmConfig -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.multimodal import MULTIMODAL_REGISTRY from vllm.multimodal.inputs import MultiModalDataDict, MultiModalKwargsItems from vllm.multimodal.parse import (ImageProcessorItems, ImageSize, @@ -464,7 +463,5 @@ def forward( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - return self.language_model.compute_logits(hidden_states, - sampling_metadata) + return self.language_model.compute_logits(hidden_states) diff --git a/vllm/model_executor/models/baichuan.py b/vllm/model_executor/models/baichuan.py index ae2503341040..db8d0a871047 100644 --- a/vllm/model_executor/models/baichuan.py +++ b/vllm/model_executor/models/baichuan.py @@ -46,7 +46,6 @@ ParallelLMHead, VocabParallelEmbedding) from vllm.model_executor.model_loader.weight_utils import ( default_weight_loader, row_parallel_weight_loader) -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.sequence import IntermediateTensors from .interfaces import SupportsLoRA, SupportsPP, SupportsQuant @@ -421,10 +420,8 @@ def forward( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - logits = self.logits_processor(self.lm_head, hidden_states, - sampling_metadata) + logits = self.logits_processor(self.lm_head, hidden_states) return logits def load_weights(self, weights: Iterable[tuple[str, diff --git a/vllm/model_executor/models/bailing_moe.py b/vllm/model_executor/models/bailing_moe.py index 5f6025abf315..82cd4a26a1ba 100644 --- a/vllm/model_executor/models/bailing_moe.py +++ b/vllm/model_executor/models/bailing_moe.py @@ -51,7 +51,6 @@ from vllm.model_executor.layers.vocab_parallel_embedding import ( ParallelLMHead, VocabParallelEmbedding) from vllm.model_executor.model_loader.weight_utils import default_weight_loader -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.sequence import IntermediateTensors from .interfaces import SupportsLoRA, SupportsPP @@ -623,10 +622,8 @@ def forward( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - logits = self.logits_processor(self.lm_head, hidden_states, - sampling_metadata) + logits = self.logits_processor(self.lm_head, hidden_states) return logits def load_weights(self, weights: Iterable[tuple[str, diff --git a/vllm/model_executor/models/bamba.py b/vllm/model_executor/models/bamba.py index 397089f31cdf..584981ef3ebf 100644 --- a/vllm/model_executor/models/bamba.py +++ b/vllm/model_executor/models/bamba.py @@ -34,7 +34,6 @@ from vllm.model_executor.model_loader.weight_utils import default_weight_loader from vllm.model_executor.models.mamba_cache import (MambaCacheManager, MambaCacheParams) -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.sequence import IntermediateTensors from vllm.utils import LayerBlockType @@ -571,10 +570,8 @@ def get_seqlen_agnostic_capture_inputs(self, batch_size: int): def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - logits = self.logits_processor(self.lm_head, hidden_states, - sampling_metadata) + logits = self.logits_processor(self.lm_head, hidden_states) return logits def load_weights(self, weights: Iterable[tuple[str, diff --git a/vllm/model_executor/models/blip2.py b/vllm/model_executor/models/blip2.py index a3131aa3812e..b7455fba62c0 100644 --- a/vllm/model_executor/models/blip2.py +++ b/vllm/model_executor/models/blip2.py @@ -12,7 +12,6 @@ from vllm.config import CacheConfig, VllmConfig from vllm.model_executor.layers.activation import get_act_fn from vllm.model_executor.layers.quantization import QuantizationConfig -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.multimodal import MULTIMODAL_REGISTRY from vllm.multimodal.inputs import (MultiModalDataDict, MultiModalFieldConfig, MultiModalKwargsItems) @@ -704,10 +703,8 @@ def forward( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - return self.language_model.compute_logits(hidden_states, - sampling_metadata) + return self.language_model.compute_logits(hidden_states) def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]: diff --git a/vllm/model_executor/models/bloom.py b/vllm/model_executor/models/bloom.py index 4c37622b049c..30816f72a267 100644 --- a/vllm/model_executor/models/bloom.py +++ b/vllm/model_executor/models/bloom.py @@ -41,7 +41,6 @@ from vllm.model_executor.layers.vocab_parallel_embedding import ( ParallelLMHead, VocabParallelEmbedding) from vllm.model_executor.model_loader.weight_utils import default_weight_loader -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.sequence import IntermediateTensors from .interfaces import SupportsPP, SupportsQuant @@ -355,10 +354,8 @@ def forward( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - logits = self.logits_processor(self.lm_head, hidden_states, - sampling_metadata) + logits = self.logits_processor(self.lm_head, hidden_states) return logits def load_weights(self, weights: Iterable[tuple[str, diff --git a/vllm/model_executor/models/chameleon.py b/vllm/model_executor/models/chameleon.py index 7a5623648374..79d648d749c6 100644 --- a/vllm/model_executor/models/chameleon.py +++ b/vllm/model_executor/models/chameleon.py @@ -28,7 +28,6 @@ ParallelLMHead, VocabParallelEmbedding) from vllm.model_executor.model_loader.weight_utils import ( default_weight_loader, row_parallel_weight_loader) -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.model_executor.utils import set_weight_attrs from vllm.multimodal import MULTIMODAL_REGISTRY from vllm.multimodal.inputs import (MultiModalDataDict, MultiModalFieldConfig, @@ -1046,10 +1045,8 @@ def forward( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - logits = self.logits_processor(self.lm_head, hidden_states, - sampling_metadata) + logits = self.logits_processor(self.lm_head, hidden_states) # Disallow image tokens which does not include special # begin-image and end-image tokens diff --git a/vllm/model_executor/models/chatglm.py b/vllm/model_executor/models/chatglm.py index 1fc2da3e4d7c..879508400222 100644 --- a/vllm/model_executor/models/chatglm.py +++ b/vllm/model_executor/models/chatglm.py @@ -27,7 +27,6 @@ from vllm.model_executor.layers.vocab_parallel_embedding import ( ParallelLMHead, VocabParallelEmbedding) from vllm.model_executor.model_loader.weight_utils import default_weight_loader -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.sequence import IntermediateTensors from vllm.transformers_utils.configs import ChatGLMConfig @@ -437,10 +436,8 @@ def __init__( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - logits = self.logits_processor(self.lm_head, hidden_states, - sampling_metadata) + logits = self.logits_processor(self.lm_head, hidden_states) return logits def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]): diff --git a/vllm/model_executor/models/cohere2_vision.py b/vllm/model_executor/models/cohere2_vision.py index 179cc2af8eb3..6d67eb68d51a 100644 --- a/vllm/model_executor/models/cohere2_vision.py +++ b/vllm/model_executor/models/cohere2_vision.py @@ -21,7 +21,6 @@ RowParallelLinear) from vllm.model_executor.layers.quantization import QuantizationConfig from vllm.model_executor.layers.quantization.awq import AWQConfig -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.multimodal import MULTIMODAL_REGISTRY from vllm.multimodal.inputs import MultiModalDataDict, MultiModalKwargsItems from vllm.multimodal.parse import (ImageProcessorItems, ImageSize, @@ -478,7 +477,5 @@ def forward( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - return self.language_model.compute_logits(hidden_states, - sampling_metadata) + return self.language_model.compute_logits(hidden_states) diff --git a/vllm/model_executor/models/commandr.py b/vllm/model_executor/models/commandr.py index 7f87e31abdcd..f3929ef3b593 100644 --- a/vllm/model_executor/models/commandr.py +++ b/vllm/model_executor/models/commandr.py @@ -46,7 +46,6 @@ from vllm.model_executor.model_loader.weight_utils import ( default_weight_loader, maybe_remap_kv_scale_name, row_parallel_weight_loader) -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.model_executor.utils import set_weight_attrs from vllm.platforms import current_platform from vllm.sequence import IntermediateTensors @@ -448,15 +447,14 @@ def forward( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: is_not_lora = hasattr(self.model.embed_tokens, 'weight') if is_not_lora: logits = self.logits_processor(self.model.embed_tokens, - hidden_states, sampling_metadata) + hidden_states) else: logits = self.logits_processor(self.model.embed_tokens.base_layer, - hidden_states, sampling_metadata) + hidden_states) return logits diff --git a/vllm/model_executor/models/dbrx.py b/vllm/model_executor/models/dbrx.py index 003cf4563a22..f863b1da5505 100644 --- a/vllm/model_executor/models/dbrx.py +++ b/vllm/model_executor/models/dbrx.py @@ -24,7 +24,6 @@ DEFAULT_VOCAB_PADDING_SIZE, ParallelLMHead, VocabParallelEmbedding) from vllm.model_executor.model_loader.weight_utils import ( default_weight_loader, maybe_remap_kv_scale_name) -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.sequence import IntermediateTensors from .interfaces import SupportsPP @@ -462,10 +461,8 @@ def forward( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - logits = self.logits_processor(self.lm_head, hidden_states, - sampling_metadata) + logits = self.logits_processor(self.lm_head, hidden_states) return logits def load_weights(self, weights: Iterable[tuple[str, diff --git a/vllm/model_executor/models/deepseek.py b/vllm/model_executor/models/deepseek.py index 59c992188149..ffc843fe033c 100644 --- a/vllm/model_executor/models/deepseek.py +++ b/vllm/model_executor/models/deepseek.py @@ -49,7 +49,6 @@ from vllm.model_executor.layers.vocab_parallel_embedding import ( ParallelLMHead, VocabParallelEmbedding) from vllm.model_executor.model_loader.weight_utils import default_weight_loader -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.sequence import IntermediateTensors from .interfaces import SupportsLoRA, SupportsPP @@ -488,10 +487,8 @@ def forward( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - logits = self.logits_processor(self.lm_head, hidden_states, - sampling_metadata) + logits = self.logits_processor(self.lm_head, hidden_states) return logits def load_weights(self, weights: Iterable[tuple[str, diff --git a/vllm/model_executor/models/deepseek_eagle.py b/vllm/model_executor/models/deepseek_eagle.py index 2770ddebc48a..ed7e7614800f 100644 --- a/vllm/model_executor/models/deepseek_eagle.py +++ b/vllm/model_executor/models/deepseek_eagle.py @@ -19,7 +19,6 @@ default_weight_loader, maybe_remap_kv_scale_name) from vllm.model_executor.models.deepseek_v2 import (DeepseekV2DecoderLayer, DeepseekV3ForCausalLM) -from vllm.model_executor.sampling_metadata import SamplingMetadata from .utils import AutoWeightsLoader, maybe_prefix @@ -222,10 +221,8 @@ def forward( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - logits = self.logits_processor(self.lm_head, hidden_states, - sampling_metadata) + logits = self.logits_processor(self.lm_head, hidden_states) return logits def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]): diff --git a/vllm/model_executor/models/deepseek_mtp.py b/vllm/model_executor/models/deepseek_mtp.py index 8fbf16d206a8..92f311ab465b 100644 --- a/vllm/model_executor/models/deepseek_mtp.py +++ b/vllm/model_executor/models/deepseek_mtp.py @@ -15,7 +15,6 @@ from vllm.model_executor.layers.vocab_parallel_embedding import ( ParallelLMHead, VocabParallelEmbedding) from vllm.model_executor.model_loader.weight_utils import default_weight_loader -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.sequence import IntermediateTensors from .deepseek_v2 import (DeepseekV2DecoderLayer, @@ -124,15 +123,13 @@ def forward( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, spec_step_idx: int = 0, ) -> torch.Tensor: current_step_idx = (spec_step_idx % self.num_mtp_layers) mtp_layer = self.layers[str(self.mtp_start_layer_idx + current_step_idx)] logits = self.logits_processor(mtp_layer.shared_head.head, - mtp_layer.shared_head(hidden_states), - sampling_metadata) + mtp_layer.shared_head(hidden_states)) return logits @@ -161,11 +158,9 @@ def forward( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, spec_step_idx: int = 0, ) -> Optional[torch.Tensor]: - return self.model.compute_logits(hidden_states, sampling_metadata, - spec_step_idx) + return self.model.compute_logits(hidden_states, spec_step_idx) def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]: diff --git a/vllm/model_executor/models/deepseek_v2.py b/vllm/model_executor/models/deepseek_v2.py index 636554bd648f..a99a6679a569 100644 --- a/vllm/model_executor/models/deepseek_v2.py +++ b/vllm/model_executor/models/deepseek_v2.py @@ -56,7 +56,6 @@ ParallelLMHead, VocabParallelEmbedding) from vllm.model_executor.model_loader.weight_utils import ( default_weight_loader, maybe_remap_kv_scale_name) -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.platforms import current_platform from vllm.sequence import IntermediateTensors from vllm.utils import cdiv, direct_register_custom_op @@ -914,10 +913,8 @@ def forward( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - logits = self.logits_processor(self.lm_head, hidden_states, - sampling_metadata) + logits = self.logits_processor(self.lm_head, hidden_states) return logits def load_weights(self, weights: Iterable[tuple[str, diff --git a/vllm/model_executor/models/deepseek_vl2.py b/vllm/model_executor/models/deepseek_vl2.py index d7ae8206baca..c8ed759d2e97 100644 --- a/vllm/model_executor/models/deepseek_vl2.py +++ b/vllm/model_executor/models/deepseek_vl2.py @@ -15,7 +15,6 @@ from vllm.config import VllmConfig from vllm.distributed import get_tensor_model_parallel_world_size -from vllm.model_executor import SamplingMetadata from vllm.model_executor.layers.quantization import QuantizationConfig from vllm.model_executor.model_loader.utils import set_default_torch_dtype from vllm.model_executor.models.transformers import replace_linear_class @@ -647,10 +646,8 @@ def forward(self, def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - return self.language_model.compute_logits(hidden_states, - sampling_metadata) + return self.language_model.compute_logits(hidden_states) def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]: diff --git a/vllm/model_executor/models/dots1.py b/vllm/model_executor/models/dots1.py index 20555e48b73d..2a09234b59ed 100644 --- a/vllm/model_executor/models/dots1.py +++ b/vllm/model_executor/models/dots1.py @@ -52,7 +52,6 @@ ParallelLMHead, VocabParallelEmbedding) from vllm.model_executor.model_loader.weight_utils import ( default_weight_loader, maybe_remap_kv_scale_name) -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.sequence import IntermediateTensors from .interfaces import SupportsLoRA, SupportsPP @@ -534,10 +533,8 @@ def forward( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - logits = self.logits_processor(self.lm_head, hidden_states, - sampling_metadata) + logits = self.logits_processor(self.lm_head, hidden_states) return logits def load_weights(self, weights: Iterable[tuple[str, diff --git a/vllm/model_executor/models/ernie45_moe.py b/vllm/model_executor/models/ernie45_moe.py index ebab018ed67e..d262e9e9da50 100644 --- a/vllm/model_executor/models/ernie45_moe.py +++ b/vllm/model_executor/models/ernie45_moe.py @@ -49,7 +49,6 @@ ParallelLMHead, VocabParallelEmbedding) from vllm.model_executor.model_loader.weight_utils import ( default_weight_loader, maybe_remap_kv_scale_name) -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.sequence import IntermediateTensors from .interfaces import SupportsLoRA, SupportsPP @@ -591,10 +590,8 @@ def forward( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - logits = self.logits_processor(self.lm_head, hidden_states, - sampling_metadata) + logits = self.logits_processor(self.lm_head, hidden_states) return logits def load_weights(self, weights: Iterable[tuple[str, diff --git a/vllm/model_executor/models/ernie45_vl.py b/vllm/model_executor/models/ernie45_vl.py index 0d4aced93ca1..74b358034ef3 100644 --- a/vllm/model_executor/models/ernie45_vl.py +++ b/vllm/model_executor/models/ernie45_vl.py @@ -39,7 +39,6 @@ from vllm.distributed import parallel_state from vllm.distributed import utils as dist_utils from vllm.logger import init_logger -from vllm.model_executor import SamplingMetadata from vllm.model_executor.layers.activation import QuickGELU from vllm.model_executor.layers.layernorm import RMSNorm from vllm.model_executor.layers.linear import (ColumnParallelLinear, @@ -1292,11 +1291,9 @@ def __init__(self, vllm_config: VllmConfig, prefix: str = "") -> None: def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: """compute logits""" - return self.language_model.compute_logits(hidden_states, - sampling_metadata) + return self.language_model.compute_logits(hidden_states) def _vision_forward( self, diff --git a/vllm/model_executor/models/ernie45_vl_moe.py b/vllm/model_executor/models/ernie45_vl_moe.py index 7f791852ceb9..f55016f7ccb3 100644 --- a/vllm/model_executor/models/ernie45_vl_moe.py +++ b/vllm/model_executor/models/ernie45_vl_moe.py @@ -48,7 +48,6 @@ ParallelLMHead, VocabParallelEmbedding) from vllm.model_executor.model_loader.weight_utils import ( default_weight_loader, maybe_remap_kv_scale_name) -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.sequence import IntermediateTensors from .ernie45_moe import Ernie4_5_MoeMLP @@ -587,10 +586,8 @@ def forward( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - logits = self.logits_processor(self.lm_head, hidden_states, - sampling_metadata) + logits = self.logits_processor(self.lm_head, hidden_states) return logits def load_weights(self, weights: Iterable[tuple[str, diff --git a/vllm/model_executor/models/ernie_mtp.py b/vllm/model_executor/models/ernie_mtp.py index c44626523031..288fbe736c32 100644 --- a/vllm/model_executor/models/ernie_mtp.py +++ b/vllm/model_executor/models/ernie_mtp.py @@ -36,7 +36,6 @@ from vllm.model_executor.layers.vocab_parallel_embedding import ( ParallelLMHead, VocabParallelEmbedding) from vllm.model_executor.model_loader.weight_utils import default_weight_loader -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.sequence import IntermediateTensors from .interfaces import SupportsPP @@ -138,12 +137,10 @@ def compute_logits( self, hidden_states: torch.Tensor, lm_head: ParallelLMHead, - sampling_metadata: SamplingMetadata, spec_step_idx: int = 0, ) -> torch.Tensor: self.layers[str(self.mtp_start_layer_idx + spec_step_idx)] - logits = self.logits_processor(lm_head, hidden_states, - sampling_metadata) + logits = self.logits_processor(lm_head, hidden_states) return logits @@ -180,11 +177,10 @@ def forward( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, spec_step_idx: int = 0, ) -> Optional[torch.Tensor]: return self.model.compute_logits(hidden_states, self.lm_head, - sampling_metadata, spec_step_idx) + spec_step_idx) def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]: diff --git a/vllm/model_executor/models/exaone.py b/vllm/model_executor/models/exaone.py index f503fb0f9364..5dafcd595e4a 100644 --- a/vllm/model_executor/models/exaone.py +++ b/vllm/model_executor/models/exaone.py @@ -49,7 +49,6 @@ DEFAULT_VOCAB_PADDING_SIZE, ParallelLMHead, VocabParallelEmbedding) from vllm.model_executor.model_loader.weight_utils import ( default_weight_loader, maybe_remap_kv_scale_name) -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.sequence import IntermediateTensors from .interfaces import SupportsLoRA, SupportsPP @@ -534,10 +533,8 @@ def forward( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - logits = self.logits_processor(self.lm_head, hidden_states, - sampling_metadata) + logits = self.logits_processor(self.lm_head, hidden_states) return logits def load_weights(self, weights: Iterable[tuple[str, diff --git a/vllm/model_executor/models/exaone4.py b/vllm/model_executor/models/exaone4.py index 9f7d57d93814..c78eedff6670 100644 --- a/vllm/model_executor/models/exaone4.py +++ b/vllm/model_executor/models/exaone4.py @@ -45,7 +45,6 @@ DEFAULT_VOCAB_PADDING_SIZE, ParallelLMHead, VocabParallelEmbedding) from vllm.model_executor.model_loader.weight_utils import ( default_weight_loader, maybe_remap_kv_scale_name) -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.sequence import IntermediateTensors from .interfaces import SupportsLoRA, SupportsPP @@ -517,10 +516,8 @@ def forward( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - logits = self.logits_processor(self.lm_head, hidden_states, - sampling_metadata) + logits = self.logits_processor(self.lm_head, hidden_states) return logits def load_weights(self, weights: Iterable[tuple[str, diff --git a/vllm/model_executor/models/falcon.py b/vllm/model_executor/models/falcon.py index 42c378e5c389..0c50056d1c52 100644 --- a/vllm/model_executor/models/falcon.py +++ b/vllm/model_executor/models/falcon.py @@ -46,7 +46,6 @@ from vllm.model_executor.layers.vocab_parallel_embedding import ( ParallelLMHead, VocabParallelEmbedding) from vllm.model_executor.model_loader.weight_utils import default_weight_loader -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.sequence import IntermediateTensors from vllm.transformers_utils.configs import RWConfig @@ -496,10 +495,8 @@ def forward( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - logits = self.logits_processor(self.lm_head, hidden_states, - sampling_metadata) + logits = self.logits_processor(self.lm_head, hidden_states) return logits def load_weights(self, weights: Iterable[tuple[str, diff --git a/vllm/model_executor/models/falcon_h1.py b/vllm/model_executor/models/falcon_h1.py index 757051b3b144..83efdd2e433f 100644 --- a/vllm/model_executor/models/falcon_h1.py +++ b/vllm/model_executor/models/falcon_h1.py @@ -33,7 +33,6 @@ from vllm.model_executor.model_loader.weight_utils import default_weight_loader from vllm.model_executor.models.mamba_cache import (MambaCacheManager, MambaCacheParams) -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.sequence import IntermediateTensors from .interfaces import HasInnerState, IsHybrid, SupportsLoRA, SupportsPP @@ -675,10 +674,8 @@ def get_seqlen_agnostic_capture_inputs(self, batch_size: int): def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - logits = self.logits_processor(self.lm_head, hidden_states, - sampling_metadata) + logits = self.logits_processor(self.lm_head, hidden_states) return logits diff --git a/vllm/model_executor/models/fuyu.py b/vllm/model_executor/models/fuyu.py index 90af859ab92e..53e9e6fe6e46 100644 --- a/vllm/model_executor/models/fuyu.py +++ b/vllm/model_executor/models/fuyu.py @@ -29,7 +29,6 @@ from vllm.config import VllmConfig from vllm.model_executor.layers.linear import ColumnParallelLinear from vllm.model_executor.models.persimmon import PersimmonForCausalLM -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.multimodal import MULTIMODAL_REGISTRY from vllm.multimodal.inputs import (MultiModalDataDict, MultiModalFieldConfig, MultiModalKwargsItems) @@ -389,10 +388,9 @@ def forward( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: logits = self.language_model.logits_processor( - self.language_model.lm_head, hidden_states, sampling_metadata) + self.language_model.lm_head, hidden_states) return logits def load_weights(self, weights: Iterable[tuple[str, diff --git a/vllm/model_executor/models/gemma.py b/vllm/model_executor/models/gemma.py index 12eb27503870..c19425b6cb6d 100644 --- a/vllm/model_executor/models/gemma.py +++ b/vllm/model_executor/models/gemma.py @@ -41,7 +41,6 @@ from vllm.model_executor.layers.vocab_parallel_embedding import ( VocabParallelEmbedding) from vllm.model_executor.model_loader.weight_utils import default_weight_loader -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.sequence import IntermediateTensors from .interfaces import SupportsLoRA, SupportsPP @@ -412,10 +411,8 @@ def forward( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - logits = self.logits_processor(self.model.embed_tokens, hidden_states, - sampling_metadata) + logits = self.logits_processor(self.model.embed_tokens, hidden_states) return logits def load_weights(self, weights: Iterable[tuple[str, diff --git a/vllm/model_executor/models/gemma2.py b/vllm/model_executor/models/gemma2.py index 0bdb6c6bf7ae..3f76e1e7d42a 100644 --- a/vllm/model_executor/models/gemma2.py +++ b/vllm/model_executor/models/gemma2.py @@ -41,7 +41,6 @@ VocabParallelEmbedding) from vllm.model_executor.model_loader.weight_utils import ( default_weight_loader, maybe_remap_kv_scale_name) -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.sequence import IntermediateTensors from .interfaces import SupportsLoRA, SupportsPP @@ -409,10 +408,8 @@ def forward( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - logits = self.logits_processor(self.model.embed_tokens, hidden_states, - sampling_metadata) + logits = self.logits_processor(self.model.embed_tokens, hidden_states) return logits def load_weights(self, weights: Iterable[tuple[str, diff --git a/vllm/model_executor/models/gemma3.py b/vllm/model_executor/models/gemma3.py index 7246308d5902..77c0ef8cb91d 100644 --- a/vllm/model_executor/models/gemma3.py +++ b/vllm/model_executor/models/gemma3.py @@ -41,7 +41,6 @@ VocabParallelEmbedding) from vllm.model_executor.model_loader.weight_utils import ( default_weight_loader, maybe_remap_kv_scale_name) -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.sequence import IntermediateTensors from ...attention.layers.encoder_only_attention import EncoderOnlyAttention @@ -542,10 +541,8 @@ def forward( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - logits = self.logits_processor(self.model.embed_tokens, hidden_states, - sampling_metadata) + logits = self.logits_processor(self.model.embed_tokens, hidden_states) return logits def load_weights(self, weights: Iterable[tuple[str, diff --git a/vllm/model_executor/models/gemma3_mm.py b/vllm/model_executor/models/gemma3_mm.py index bee9fbd2c084..0630ee07c347 100644 --- a/vllm/model_executor/models/gemma3_mm.py +++ b/vllm/model_executor/models/gemma3_mm.py @@ -14,7 +14,6 @@ from vllm.logger import init_logger from vllm.model_executor.layers.layernorm import GemmaRMSNorm from vllm.model_executor.models.module_mapping import MultiModelKeys -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.multimodal import MULTIMODAL_REGISTRY from vllm.multimodal.inputs import (MultiModalDataDict, MultiModalFieldConfig, MultiModalKwargsItems) @@ -704,10 +703,8 @@ def prepare_attn_masks( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - return self.language_model.compute_logits(hidden_states, - sampling_metadata) + return self.language_model.compute_logits(hidden_states) def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]: diff --git a/vllm/model_executor/models/gemma3n.py b/vllm/model_executor/models/gemma3n.py index ffec3408702c..f4d288fd887e 100644 --- a/vllm/model_executor/models/gemma3n.py +++ b/vllm/model_executor/models/gemma3n.py @@ -43,7 +43,6 @@ VocabParallelEmbedding) from vllm.model_executor.model_loader.weight_utils import ( default_weight_loader, maybe_remap_kv_scale_name) -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.sequence import IntermediateTensors from .interfaces import SupportsQuant @@ -814,10 +813,8 @@ def forward( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: Optional[SamplingMetadata], ) -> Optional[torch.Tensor]: - logits = self.logits_processor(self.model.embed_tokens, hidden_states, - sampling_metadata) + logits = self.logits_processor(self.model.embed_tokens, hidden_states) return logits def load_weights(self, weights: Iterable[tuple[str, diff --git a/vllm/model_executor/models/gemma3n_mm.py b/vllm/model_executor/models/gemma3n_mm.py index 8d3079aee0df..2acdba54a257 100644 --- a/vllm/model_executor/models/gemma3n_mm.py +++ b/vllm/model_executor/models/gemma3n_mm.py @@ -25,7 +25,6 @@ from vllm.model_executor.models.gemma3n import Gemma3nForCausalLM from vllm.model_executor.models.module_mapping import MultiModelKeys from vllm.model_executor.models.whisper import ISO639_1_SUPPORTED_LANGS -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.multimodal import MULTIMODAL_REGISTRY from vllm.multimodal.inputs import (MultiModalDataDict, MultiModalFieldConfig, MultiModalKwargsItems) @@ -685,10 +684,8 @@ def forward(self, def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - return self.language_model.compute_logits(hidden_states, - sampling_metadata) + return self.language_model.compute_logits(hidden_states) def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]: diff --git a/vllm/model_executor/models/glm4.py b/vllm/model_executor/models/glm4.py index 5e2908a82c41..b9d5e24e9f6f 100644 --- a/vllm/model_executor/models/glm4.py +++ b/vllm/model_executor/models/glm4.py @@ -40,7 +40,6 @@ from vllm.model_executor.layers.quantization import QuantizationConfig from vllm.model_executor.layers.rotary_embedding import get_rope from vllm.model_executor.layers.vocab_parallel_embedding import ParallelLMHead -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.sequence import IntermediateTensors from .interfaces import SupportsLoRA, SupportsPP @@ -289,10 +288,8 @@ def forward( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - logits = self.logits_processor(self.lm_head, hidden_states, - sampling_metadata) + logits = self.logits_processor(self.lm_head, hidden_states) return logits def load_weights(self, weights: Iterable[tuple[str, diff --git a/vllm/model_executor/models/glm4_1v.py b/vllm/model_executor/models/glm4_1v.py index 308b0cb602bc..56ec63438690 100644 --- a/vllm/model_executor/models/glm4_1v.py +++ b/vllm/model_executor/models/glm4_1v.py @@ -52,7 +52,6 @@ parallel_state) from vllm.distributed import utils as dist_utils from vllm.logger import init_logger -from vllm.model_executor import SamplingMetadata from vllm.model_executor.layers.layernorm import RMSNorm from vllm.model_executor.layers.linear import (ColumnParallelLinear, MergedColumnParallelLinear, @@ -1654,10 +1653,8 @@ def forward( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - return self.language_model.compute_logits(hidden_states, - sampling_metadata) + return self.language_model.compute_logits(hidden_states) def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]: diff --git a/vllm/model_executor/models/glm4_moe.py b/vllm/model_executor/models/glm4_moe.py index 1acbd18091fb..947c6ce62f55 100644 --- a/vllm/model_executor/models/glm4_moe.py +++ b/vllm/model_executor/models/glm4_moe.py @@ -51,7 +51,6 @@ ParallelLMHead, VocabParallelEmbedding) from vllm.model_executor.model_loader.weight_utils import ( default_weight_loader, maybe_remap_kv_scale_name) -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.sequence import IntermediateTensors from .interfaces import SupportsLoRA, SupportsPP @@ -703,10 +702,8 @@ def forward( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - logits = self.logits_processor(self.lm_head, hidden_states, - sampling_metadata) + logits = self.logits_processor(self.lm_head, hidden_states) return logits def load_weights(self, weights: Iterable[tuple[str, diff --git a/vllm/model_executor/models/glm4_moe_mtp.py b/vllm/model_executor/models/glm4_moe_mtp.py index 322c5619c178..c572978e6220 100644 --- a/vllm/model_executor/models/glm4_moe_mtp.py +++ b/vllm/model_executor/models/glm4_moe_mtp.py @@ -38,7 +38,6 @@ from vllm.model_executor.layers.vocab_parallel_embedding import ( ParallelLMHead, VocabParallelEmbedding) from vllm.model_executor.model_loader.weight_utils import default_weight_loader -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.sequence import IntermediateTensors from .glm4_moe import Glm4MoeDecoderLayer, get_spec_layer_idx_from_weight_name @@ -155,15 +154,13 @@ def forward( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, spec_step_idx: int = 0, ) -> torch.Tensor: current_step_idx = (spec_step_idx % self.num_mtp_layers) mtp_layer = self.layers[str(self.mtp_start_layer_idx + current_step_idx)] logits = self.logits_processor(mtp_layer.shared_head.head, - mtp_layer.shared_head(hidden_states), - sampling_metadata) + mtp_layer.shared_head(hidden_states)) return logits @@ -192,11 +189,9 @@ def forward( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, spec_step_idx: int = 0, ) -> Optional[torch.Tensor]: - return self.model.compute_logits(hidden_states, sampling_metadata, - spec_step_idx) + return self.model.compute_logits(hidden_states, spec_step_idx) def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]: diff --git a/vllm/model_executor/models/gpt2.py b/vllm/model_executor/models/gpt2.py index 0f6521e44e6b..24274db148bd 100644 --- a/vllm/model_executor/models/gpt2.py +++ b/vllm/model_executor/models/gpt2.py @@ -41,7 +41,6 @@ from vllm.model_executor.layers.vocab_parallel_embedding import ( ParallelLMHead, VocabParallelEmbedding) from vllm.model_executor.model_loader.weight_utils import default_weight_loader -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.sequence import IntermediateTensors from ..layers.pooler import DispatchPooler, Pooler @@ -307,10 +306,8 @@ def forward( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - logits = self.logits_processor(self.lm_head, hidden_states, - sampling_metadata) + logits = self.logits_processor(self.lm_head, hidden_states) return logits def load_weights(self, weights: Iterable[tuple[str, diff --git a/vllm/model_executor/models/gpt_bigcode.py b/vllm/model_executor/models/gpt_bigcode.py index 745d0b775999..162018450e7c 100644 --- a/vllm/model_executor/models/gpt_bigcode.py +++ b/vllm/model_executor/models/gpt_bigcode.py @@ -41,7 +41,6 @@ from vllm.model_executor.layers.vocab_parallel_embedding import ( ParallelLMHead, VocabParallelEmbedding) from vllm.model_executor.model_loader.weight_utils import default_weight_loader -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.sequence import IntermediateTensors from .interfaces import SupportsLoRA, SupportsPP @@ -329,10 +328,8 @@ def forward( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - logits = self.logits_processor(self.lm_head, hidden_states, - sampling_metadata) + logits = self.logits_processor(self.lm_head, hidden_states) return logits def load_weights(self, weights: Iterable[tuple[str, diff --git a/vllm/model_executor/models/gpt_j.py b/vllm/model_executor/models/gpt_j.py index 77df6ae6f30c..698387fab946 100644 --- a/vllm/model_executor/models/gpt_j.py +++ b/vllm/model_executor/models/gpt_j.py @@ -41,7 +41,6 @@ ParallelLMHead, VocabParallelEmbedding) from vllm.model_executor.model_loader.weight_utils import ( default_weight_loader, maybe_remap_kv_scale_name) -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.sequence import IntermediateTensors from .interfaces import SupportsPP @@ -329,10 +328,9 @@ def forward( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: logits = self.logits_processor(self.lm_head, hidden_states, - sampling_metadata, self.lm_head.bias) + self.lm_head.bias) return logits def load_weights(self, weights: Iterable[tuple[str, diff --git a/vllm/model_executor/models/gpt_neox.py b/vllm/model_executor/models/gpt_neox.py index e97db188e27e..7570aefb6e96 100644 --- a/vllm/model_executor/models/gpt_neox.py +++ b/vllm/model_executor/models/gpt_neox.py @@ -40,7 +40,6 @@ from vllm.model_executor.layers.vocab_parallel_embedding import ( ParallelLMHead, VocabParallelEmbedding) from vllm.model_executor.model_loader.weight_utils import default_weight_loader -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.sequence import IntermediateTensors from .interfaces import SupportsPP @@ -321,10 +320,8 @@ def forward( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - logits = self.logits_processor(self.embed_out, hidden_states, - sampling_metadata) + logits = self.logits_processor(self.embed_out, hidden_states) return logits def load_weights(self, weights: Iterable[tuple[str, diff --git a/vllm/model_executor/models/gpt_oss.py b/vllm/model_executor/models/gpt_oss.py index b49fd0d8f88a..4fe59f91124d 100644 --- a/vllm/model_executor/models/gpt_oss.py +++ b/vllm/model_executor/models/gpt_oss.py @@ -24,7 +24,6 @@ from vllm.model_executor.layers.vocab_parallel_embedding import ( ParallelLMHead, VocabParallelEmbedding) from vllm.model_executor.model_loader.weight_utils import default_weight_loader -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.sequence import IntermediateTensors from vllm.utils import cdiv @@ -670,10 +669,8 @@ def forward(self, return self.model(input_ids, positions, intermediate_tensors, inputs_embeds) - def compute_logits(self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata) -> torch.Tensor: - logits = self.logits_processor(self.lm_head, hidden_states, - sampling_metadata) + def compute_logits(self, hidden_states: torch.Tensor) -> torch.Tensor: + logits = self.logits_processor(self.lm_head, hidden_states) return logits def load_weights(self, weights: Iterable[tuple[str, diff --git a/vllm/model_executor/models/granite.py b/vllm/model_executor/models/granite.py index 4f9cc2532bd8..795b38e724ea 100644 --- a/vllm/model_executor/models/granite.py +++ b/vllm/model_executor/models/granite.py @@ -48,7 +48,6 @@ DEFAULT_VOCAB_PADDING_SIZE, ParallelLMHead, VocabParallelEmbedding) from vllm.model_executor.model_loader.weight_utils import ( default_weight_loader, maybe_remap_kv_scale_name) -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.sequence import IntermediateTensors from .interfaces import SupportsLoRA, SupportsPP @@ -463,11 +462,9 @@ def forward( inputs_embeds) return model_output - def compute_logits( - self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata) -> Optional[torch.Tensor]: - logits = self.logits_processor(self.lm_head, hidden_states, - sampling_metadata) + def compute_logits(self, + hidden_states: torch.Tensor) -> Optional[torch.Tensor]: + logits = self.logits_processor(self.lm_head, hidden_states) return logits def make_empty_intermediate_tensors( diff --git a/vllm/model_executor/models/granite_speech.py b/vllm/model_executor/models/granite_speech.py index 221023f1fb65..a5849184339b 100644 --- a/vllm/model_executor/models/granite_speech.py +++ b/vllm/model_executor/models/granite_speech.py @@ -37,7 +37,6 @@ RowParallelLinear) from vllm.model_executor.layers.quantization import QuantizationConfig from vllm.model_executor.models.module_mapping import MultiModelKeys -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.multimodal import MULTIMODAL_REGISTRY from vllm.multimodal.inputs import (MultiModalDataDict, MultiModalFieldConfig, MultiModalKwargsItems) @@ -776,12 +775,8 @@ def forward( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - return self.language_model.compute_logits( - hidden_states, - sampling_metadata, - ) + return self.language_model.compute_logits(hidden_states) def load_weights( self, diff --git a/vllm/model_executor/models/granitemoe.py b/vllm/model_executor/models/granitemoe.py index da16c72000c0..07200fef4799 100644 --- a/vllm/model_executor/models/granitemoe.py +++ b/vllm/model_executor/models/granitemoe.py @@ -48,7 +48,6 @@ DEFAULT_VOCAB_PADDING_SIZE, ParallelLMHead, VocabParallelEmbedding) from vllm.model_executor.model_loader.weight_utils import ( default_weight_loader, maybe_remap_kv_scale_name) -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.sequence import IntermediateTensors from .interfaces import SupportsLoRA, SupportsPP @@ -511,11 +510,9 @@ def forward( inputs_embeds) return hidden_states - def compute_logits( - self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata) -> Optional[torch.Tensor]: - logits = self.logits_processor(self.lm_head, hidden_states, - sampling_metadata) + def compute_logits(self, + hidden_states: torch.Tensor) -> Optional[torch.Tensor]: + logits = self.logits_processor(self.lm_head, hidden_states) return logits def make_empty_intermediate_tensors( diff --git a/vllm/model_executor/models/granitemoehybrid.py b/vllm/model_executor/models/granitemoehybrid.py index 79c6d8146ba9..e89a1a4a0f7d 100644 --- a/vllm/model_executor/models/granitemoehybrid.py +++ b/vllm/model_executor/models/granitemoehybrid.py @@ -32,7 +32,6 @@ from vllm.model_executor.model_loader.weight_utils import default_weight_loader from vllm.model_executor.models.mamba_cache import (MambaCacheManager, MambaCacheParams) -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.sequence import IntermediateTensors from vllm.utils import LayerBlockType @@ -672,10 +671,8 @@ def get_seqlen_agnostic_capture_inputs(self, batch_size: int): def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - logits = self.logits_processor(self.lm_head, hidden_states, - sampling_metadata) + logits = self.logits_processor(self.lm_head, hidden_states) return logits def load_weights(self, weights: Iterable[tuple[str, diff --git a/vllm/model_executor/models/granitemoeshared.py b/vllm/model_executor/models/granitemoeshared.py index 0b568a4b2268..a5d118f084e6 100644 --- a/vllm/model_executor/models/granitemoeshared.py +++ b/vllm/model_executor/models/granitemoeshared.py @@ -25,7 +25,6 @@ QuantizationConfig) from vllm.model_executor.layers.vocab_parallel_embedding import ( DEFAULT_VOCAB_PADDING_SIZE, ParallelLMHead, VocabParallelEmbedding) -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.sequence import IntermediateTensors from .granitemoe import GraniteMoeAttention, GraniteMoeModel, GraniteMoeMoE @@ -311,11 +310,9 @@ def forward( inputs_embeds) return hidden_states - def compute_logits( - self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata) -> Optional[torch.Tensor]: - logits = self.logits_processor(self.lm_head, hidden_states, - sampling_metadata) + def compute_logits(self, + hidden_states: torch.Tensor) -> Optional[torch.Tensor]: + logits = self.logits_processor(self.lm_head, hidden_states) return logits def make_empty_intermediate_tensors( diff --git a/vllm/model_executor/models/grok1.py b/vllm/model_executor/models/grok1.py index a59113438337..996e41fe84ff 100644 --- a/vllm/model_executor/models/grok1.py +++ b/vllm/model_executor/models/grok1.py @@ -46,7 +46,6 @@ DEFAULT_VOCAB_PADDING_SIZE, ParallelLMHead, VocabParallelEmbedding) from vllm.model_executor.model_loader.weight_utils import ( default_weight_loader, maybe_remap_kv_scale_name) -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.sequence import IntermediateTensors from .interfaces import SupportsLoRA, SupportsPP @@ -528,10 +527,8 @@ def forward( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - logits = self.logits_processor(self.lm_head, hidden_states, - sampling_metadata) + logits = self.logits_processor(self.lm_head, hidden_states) return logits def load_weights(self, weights: Iterable[tuple[str, diff --git a/vllm/model_executor/models/hunyuan_v1.py b/vllm/model_executor/models/hunyuan_v1.py index 4110c8a1fd08..8a23a6b45bc7 100644 --- a/vllm/model_executor/models/hunyuan_v1.py +++ b/vllm/model_executor/models/hunyuan_v1.py @@ -54,7 +54,6 @@ DEFAULT_VOCAB_PADDING_SIZE, ParallelLMHead, VocabParallelEmbedding) from vllm.model_executor.model_loader.weight_utils import ( default_weight_loader, maybe_remap_kv_scale_name) -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.sequence import IntermediateTensors from .interfaces import MixtureOfExperts, SupportsLoRA, SupportsPP @@ -1004,10 +1003,8 @@ def forward( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - logits = self.logits_processor(self.lm_head, hidden_states, - sampling_metadata) + logits = self.logits_processor(self.lm_head, hidden_states) return logits def make_empty_intermediate_tensors( diff --git a/vllm/model_executor/models/hyperclovax_vision.py b/vllm/model_executor/models/hyperclovax_vision.py index 870addd0dcbc..54167f9f1099 100644 --- a/vllm/model_executor/models/hyperclovax_vision.py +++ b/vllm/model_executor/models/hyperclovax_vision.py @@ -31,7 +31,6 @@ from vllm.config import VllmConfig from vllm.inputs import InputProcessingContext from vllm.model_executor.layers.quantization import QuantizationConfig -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.multimodal import MULTIMODAL_REGISTRY from vllm.multimodal.cache import BaseMultiModalProcessorCache from vllm.multimodal.inputs import (MultiModalDataDict, MultiModalFieldConfig, @@ -962,10 +961,8 @@ def _prepare_multimodal_kwargs(self, **kwargs: object): def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - return self.language_model.compute_logits(hidden_states, - sampling_metadata) + return self.language_model.compute_logits(hidden_states) def load_weights( self, diff --git a/vllm/model_executor/models/idefics3.py b/vllm/model_executor/models/idefics3.py index 9153a0e2c1e5..18446d126b51 100644 --- a/vllm/model_executor/models/idefics3.py +++ b/vllm/model_executor/models/idefics3.py @@ -31,7 +31,6 @@ from vllm.model_executor.layers.quantization import QuantizationConfig from vllm.model_executor.layers.vocab_parallel_embedding import ParallelLMHead from vllm.model_executor.models.module_mapping import MultiModelKeys -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.multimodal import MULTIMODAL_REGISTRY from vllm.multimodal.inputs import (MultiModalDataDict, MultiModalFieldConfig, MultiModalKwargsItems) @@ -738,10 +737,8 @@ def forward( return hidden_states - def compute_logits(self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata) -> torch.Tensor: - logits = self.logits_processor(self.lm_head, hidden_states, - sampling_metadata) + def compute_logits(self, hidden_states: torch.Tensor) -> torch.Tensor: + logits = self.logits_processor(self.lm_head, hidden_states) return logits def load_weights(self, weights: Iterable[tuple[str, diff --git a/vllm/model_executor/models/interfaces_base.py b/vllm/model_executor/models/interfaces_base.py index 19a3ef1a3b80..8fdf70e35a2b 100644 --- a/vllm/model_executor/models/interfaces_base.py +++ b/vllm/model_executor/models/interfaces_base.py @@ -13,11 +13,9 @@ if TYPE_CHECKING: from vllm.config import VllmConfig from vllm.model_executor.layers.pooler import Pooler - from vllm.model_executor.sampling_metadata import SamplingMetadata else: VllmConfig = Any Pooler = Any - SamplingMetadata = Any logger = init_logger(__name__) @@ -100,7 +98,6 @@ class VllmModelForTextGeneration(VllmModel[T], Protocol[T]): def compute_logits( self, hidden_states: T, - sampling_metadata: SamplingMetadata, ) -> Optional[T]: """Return `None` if TP rank > 0.""" ... diff --git a/vllm/model_executor/models/internlm2.py b/vllm/model_executor/models/internlm2.py index ce94328797ed..221ff08b4384 100644 --- a/vllm/model_executor/models/internlm2.py +++ b/vllm/model_executor/models/internlm2.py @@ -29,7 +29,6 @@ from vllm.model_executor.layers.vocab_parallel_embedding import ( ParallelLMHead, VocabParallelEmbedding) from vllm.model_executor.model_loader.weight_utils import default_weight_loader -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.sequence import IntermediateTensors from .interfaces import SupportsLoRA, SupportsPP @@ -358,10 +357,8 @@ def forward( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - logits = self.logits_processor(self.output, hidden_states, - sampling_metadata) + logits = self.logits_processor(self.output, hidden_states) return logits def load_weights(self, weights: Iterable[tuple[str, diff --git a/vllm/model_executor/models/interns1.py b/vllm/model_executor/models/interns1.py index b59d1b88cf5c..ba72c288b2b1 100644 --- a/vllm/model_executor/models/interns1.py +++ b/vllm/model_executor/models/interns1.py @@ -21,7 +21,6 @@ from vllm.model_executor.layers.quantization import QuantizationConfig from vllm.model_executor.models.interns1_vit import InternS1VisionModel from vllm.model_executor.models.module_mapping import MultiModelKeys -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.multimodal import MULTIMODAL_REGISTRY from vllm.multimodal.inputs import (MultiModalDataDict, MultiModalFieldConfig, MultiModalKwargsItems, NestedTensors) @@ -812,10 +811,8 @@ def forward( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - return self.language_model.compute_logits(hidden_states, - sampling_metadata) + return self.language_model.compute_logits(hidden_states) def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]: diff --git a/vllm/model_executor/models/internvl.py b/vllm/model_executor/models/internvl.py index 6a5c565b52e8..f4004e518e3b 100644 --- a/vllm/model_executor/models/internvl.py +++ b/vllm/model_executor/models/internvl.py @@ -25,7 +25,6 @@ from vllm.model_executor.models.intern_vit import (InternVisionModel, InternVisionPatchModel) from vllm.model_executor.models.module_mapping import MultiModelKeys -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.multimodal import MULTIMODAL_REGISTRY from vllm.multimodal.image import convert_image_mode from vllm.multimodal.inputs import (MultiModalDataDict, MultiModalFieldConfig, @@ -1399,10 +1398,8 @@ def forward( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - return self.language_model.compute_logits(hidden_states, - sampling_metadata) + return self.language_model.compute_logits(hidden_states) def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]: diff --git a/vllm/model_executor/models/jais.py b/vllm/model_executor/models/jais.py index 4fee8c32fd58..0eb1578b4361 100644 --- a/vllm/model_executor/models/jais.py +++ b/vllm/model_executor/models/jais.py @@ -42,7 +42,6 @@ from vllm.model_executor.layers.vocab_parallel_embedding import ( ParallelLMHead, VocabParallelEmbedding) from vllm.model_executor.model_loader.weight_utils import default_weight_loader -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.sequence import IntermediateTensors from vllm.transformers_utils.configs import JAISConfig @@ -332,10 +331,8 @@ def forward( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - logits = self.logits_processor(self.lm_head, hidden_states, - sampling_metadata) + logits = self.logits_processor(self.lm_head, hidden_states) return logits def load_weights(self, weights: Iterable[tuple[str, diff --git a/vllm/model_executor/models/jamba.py b/vllm/model_executor/models/jamba.py index 5b8fbc722686..12a49029195f 100644 --- a/vllm/model_executor/models/jamba.py +++ b/vllm/model_executor/models/jamba.py @@ -32,7 +32,6 @@ from vllm.model_executor.models.llama import LlamaMLP as JambaMLP from vllm.model_executor.models.mamba_cache import (MambaCacheManager, MambaCacheParams) -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.sequence import IntermediateTensors from vllm.utils import LayerBlockType @@ -581,10 +580,8 @@ def get_mamba_state_shape_from_config( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - logits = self.logits_processor(self.lm_head, hidden_states, - sampling_metadata) + logits = self.logits_processor(self.lm_head, hidden_states) return logits def load_weights(self, weights: Iterable[tuple[str, diff --git a/vllm/model_executor/models/keye.py b/vllm/model_executor/models/keye.py index afe33b4d4ad2..2e5e276cc1c7 100644 --- a/vllm/model_executor/models/keye.py +++ b/vllm/model_executor/models/keye.py @@ -21,7 +21,6 @@ from vllm.config import VllmConfig from vllm.distributed import get_tensor_model_parallel_world_size from vllm.logger import init_logger -from vllm.model_executor import SamplingMetadata from vllm.model_executor.layers.linear import (ColumnParallelLinear, QKVParallelLinear, RowParallelLinear) @@ -1556,10 +1555,8 @@ def forward( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - return self.language_model.compute_logits(hidden_states, - sampling_metadata) + return self.language_model.compute_logits(hidden_states) def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]: diff --git a/vllm/model_executor/models/kimi_vl.py b/vllm/model_executor/models/kimi_vl.py index 94a5933a6141..f554077935bf 100644 --- a/vllm/model_executor/models/kimi_vl.py +++ b/vllm/model_executor/models/kimi_vl.py @@ -67,7 +67,6 @@ SupportsPP) from vllm.model_executor.models.moonvit import MoonVitPretrainedModel from vllm.model_executor.models.utils import merge_multimodal_embeddings -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.multimodal import MULTIMODAL_REGISTRY from vllm.multimodal.inputs import (MultiModalDataDict, MultiModalFieldConfig, MultiModalKwargsItems, NestedTensors) @@ -484,10 +483,8 @@ def forward( return hidden_states def compute_logits(self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, **kwargs) -> torch.Tensor: - logits = self.logits_processor(self.lm_head, hidden_states, - sampling_metadata, **kwargs) + logits = self.logits_processor(self.lm_head, hidden_states, **kwargs) return logits def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]): diff --git a/vllm/model_executor/models/lfm2.py b/vllm/model_executor/models/lfm2.py index 927f78c4e4b4..dd97afbeb668 100644 --- a/vllm/model_executor/models/lfm2.py +++ b/vllm/model_executor/models/lfm2.py @@ -27,7 +27,6 @@ from vllm.model_executor.layers.vocab_parallel_embedding import ( DEFAULT_VOCAB_PADDING_SIZE, ParallelLMHead, VocabParallelEmbedding) from vllm.model_executor.model_loader.weight_utils import default_weight_loader -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.sequence import IntermediateTensors from .interfaces import (HasInnerState, IsHybrid, SupportsLoRA, SupportsPP, @@ -542,10 +541,8 @@ def forward( inputs_embeds) return hidden_states - def compute_logits(self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata) -> torch.Tensor: - logits = self.logits_processor(self.lm_head, hidden_states, - sampling_metadata) + def compute_logits(self, hidden_states: torch.Tensor) -> torch.Tensor: + logits = self.logits_processor(self.lm_head, hidden_states) return logits def load_weights(self, weights: Iterable[tuple[str, diff --git a/vllm/model_executor/models/llama.py b/vllm/model_executor/models/llama.py index f8ea2111fed5..1b03cbef501b 100644 --- a/vllm/model_executor/models/llama.py +++ b/vllm/model_executor/models/llama.py @@ -48,7 +48,6 @@ DEFAULT_VOCAB_PADDING_SIZE, ParallelLMHead, VocabParallelEmbedding) from vllm.model_executor.model_loader.weight_utils import ( default_weight_loader, maybe_remap_kv_scale_name) -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.sequence import IntermediateTensors from .interfaces import SupportsEagle3, SupportsLoRA, SupportsPP @@ -601,10 +600,8 @@ def forward( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - logits = self.logits_processor(self.lm_head, hidden_states, - sampling_metadata) + logits = self.logits_processor(self.lm_head, hidden_states) return logits def load_weights(self, weights: Iterable[tuple[str, diff --git a/vllm/model_executor/models/llama_eagle3.py b/vllm/model_executor/models/llama_eagle3.py index 7027138dfcb1..fb10af6c53c9 100644 --- a/vllm/model_executor/models/llama_eagle3.py +++ b/vllm/model_executor/models/llama_eagle3.py @@ -21,7 +21,6 @@ from vllm.model_executor.model_loader.weight_utils import default_weight_loader from vllm.model_executor.models.llama import (LlamaDecoderLayer, LlamaForCausalLM) -from vllm.v1.sample.metadata import SamplingMetadata from .utils import AutoWeightsLoader, maybe_prefix @@ -244,10 +243,8 @@ def forward( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - logits = self.logits_processor(self.lm_head, hidden_states, - sampling_metadata) + logits = self.logits_processor(self.lm_head, hidden_states) if self.draft_id_to_target_id is None: assert logits.shape[1] == self.config.vocab_size, \ "Expected logits to have shape " \ diff --git a/vllm/model_executor/models/llava.py b/vllm/model_executor/models/llava.py index 4f15e1b5762e..e2d7b9f23b28 100644 --- a/vllm/model_executor/models/llava.py +++ b/vllm/model_executor/models/llava.py @@ -20,7 +20,6 @@ from vllm.model_executor.layers.linear import (ColumnParallelLinear, RowParallelLinear) from vllm.model_executor.layers.quantization import QuantizationConfig -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.multimodal import MULTIMODAL_REGISTRY from vllm.multimodal.cache import BaseMultiModalProcessorCache from vllm.multimodal.inputs import (MultiModalDataDict, MultiModalFieldConfig, @@ -760,10 +759,8 @@ def forward( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - return self.language_model.compute_logits(hidden_states, - sampling_metadata) + return self.language_model.compute_logits(hidden_states) def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]: diff --git a/vllm/model_executor/models/llava_next.py b/vllm/model_executor/models/llava_next.py index beb3c3310059..c9133fde1455 100644 --- a/vllm/model_executor/models/llava_next.py +++ b/vllm/model_executor/models/llava_next.py @@ -13,7 +13,6 @@ get_anyres_image_grid_shape, unpad_image) from vllm.config import VllmConfig -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.multimodal import MULTIMODAL_REGISTRY from vllm.multimodal.inputs import MultiModalFieldConfig from vllm.multimodal.parse import ImageSize @@ -563,10 +562,8 @@ def forward( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - return self.language_model.compute_logits(hidden_states, - sampling_metadata) + return self.language_model.compute_logits(hidden_states) def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]: diff --git a/vllm/model_executor/models/llava_next_video.py b/vllm/model_executor/models/llava_next_video.py index cf9852de633f..610fb188d57d 100644 --- a/vllm/model_executor/models/llava_next_video.py +++ b/vllm/model_executor/models/llava_next_video.py @@ -13,7 +13,6 @@ from vllm.config import VllmConfig from vllm.model_executor.layers.activation import get_act_fn from vllm.model_executor.models.clip import CLIPVisionModel -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.multimodal import MULTIMODAL_REGISTRY from vllm.multimodal.inputs import (MultiModalDataDict, MultiModalFieldConfig, MultiModalKwargsItems) @@ -464,10 +463,8 @@ def forward( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - return self.language_model.compute_logits(hidden_states, - sampling_metadata) + return self.language_model.compute_logits(hidden_states) def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]: diff --git a/vllm/model_executor/models/llava_onevision.py b/vllm/model_executor/models/llava_onevision.py index 46d54452a52d..cee9ddaf94cc 100644 --- a/vllm/model_executor/models/llava_onevision.py +++ b/vllm/model_executor/models/llava_onevision.py @@ -14,7 +14,6 @@ from vllm.config import VllmConfig from vllm.model_executor.layers.activation import get_act_fn -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.multimodal import MULTIMODAL_REGISTRY from vllm.multimodal.inputs import (MultiModalDataDict, MultiModalFieldConfig, MultiModalKwargsItems) @@ -934,10 +933,8 @@ def forward( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - return self.language_model.compute_logits(hidden_states, - sampling_metadata) + return self.language_model.compute_logits(hidden_states) def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]: diff --git a/vllm/model_executor/models/mamba.py b/vllm/model_executor/models/mamba.py index 9d1017dac8aa..36141a5d5064 100644 --- a/vllm/model_executor/models/mamba.py +++ b/vllm/model_executor/models/mamba.py @@ -26,7 +26,6 @@ IsAttentionFree, SupportsPP) from vllm.model_executor.models.mamba_cache import (MambaCacheManager, MambaCacheParams) -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.sequence import IntermediateTensors from vllm.utils import LayerBlockType @@ -299,10 +298,8 @@ def copy_inputs_before_cuda_graphs(self, input_buffers, **kwargs): def get_seqlen_agnostic_capture_inputs(self, batch_size: int): return self.mamba_cache.get_seqlen_agnostic_capture_inputs(batch_size) - def compute_logits(self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata) -> torch.Tensor: - logits = self.logits_processor(self.lm_head, hidden_states, - sampling_metadata) + def compute_logits(self, hidden_states: torch.Tensor) -> torch.Tensor: + logits = self.logits_processor(self.lm_head, hidden_states) return logits def load_weights(self, weights: Iterable[tuple[str, diff --git a/vllm/model_executor/models/mamba2.py b/vllm/model_executor/models/mamba2.py index b1a4138cb8f6..9c3108146d2e 100644 --- a/vllm/model_executor/models/mamba2.py +++ b/vllm/model_executor/models/mamba2.py @@ -30,7 +30,6 @@ IsAttentionFree) from vllm.model_executor.models.mamba_cache import (MambaCacheManager, MambaCacheParams) -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.sequence import IntermediateTensors from vllm.utils import LayerBlockType @@ -335,10 +334,8 @@ def copy_inputs_before_cuda_graphs(self, input_buffers, **kwargs): def get_seqlen_agnostic_capture_inputs(self, batch_size: int): return self.mamba_cache.get_seqlen_agnostic_capture_inputs(batch_size) - def compute_logits(self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata) -> torch.Tensor: - logits = self.logits_processor(self.lm_head, hidden_states, - sampling_metadata) + def compute_logits(self, hidden_states: torch.Tensor) -> torch.Tensor: + logits = self.logits_processor(self.lm_head, hidden_states) return logits def load_weights(self, weights: Iterable[tuple[str, diff --git a/vllm/model_executor/models/medusa.py b/vllm/model_executor/models/medusa.py index b0a96fca2ff8..0ae59dc8dfc2 100644 --- a/vllm/model_executor/models/medusa.py +++ b/vllm/model_executor/models/medusa.py @@ -104,12 +104,11 @@ def forward(self, hidden_states: torch.Tensor) -> list[torch.Tensor]: def compute_logits( self, hidden_states: list[torch.Tensor], - sampling_metadata, ) -> list[torch.Tensor]: logits_lst: list[torch.Tensor] = [] for hs, lm_head in zip(hidden_states, self.lm_heads): - _logits = self.logits_processor(lm_head, hs, sampling_metadata) + _logits = self.logits_processor(lm_head, hs) if _logits is None: # _logits should only be None on rank > 0, in which case diff --git a/vllm/model_executor/models/midashenglm.py b/vllm/model_executor/models/midashenglm.py index 140800dd41c7..82648ba668ca 100644 --- a/vllm/model_executor/models/midashenglm.py +++ b/vllm/model_executor/models/midashenglm.py @@ -42,7 +42,6 @@ RowParallelLinear) from vllm.model_executor.layers.quantization import QuantizationConfig from vllm.model_executor.model_loader.utils import set_default_torch_dtype -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.multimodal import MULTIMODAL_REGISTRY from vllm.multimodal.inputs import (MultiModalDataDict, MultiModalFieldConfig, MultiModalKwargsItems) @@ -784,9 +783,8 @@ def forward( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - return self.decoder.compute_logits(hidden_states, sampling_metadata) + return self.decoder.compute_logits(hidden_states) def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]: diff --git a/vllm/model_executor/models/mimo.py b/vllm/model_executor/models/mimo.py index ea5292d0df20..d256c1f3eed7 100644 --- a/vllm/model_executor/models/mimo.py +++ b/vllm/model_executor/models/mimo.py @@ -41,7 +41,6 @@ from vllm.model_executor.model_loader.weight_utils import ( default_weight_loader, maybe_remap_kv_scale_name) from vllm.model_executor.models.qwen2 import Qwen2ForCausalLM, Qwen2Model -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.sequence import IntermediateTensors from .utils import PPMissingLayer, is_pp_missing_parameter, maybe_prefix @@ -183,9 +182,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: hidden_states = self.model.norm(hidden_states) - logits = self.logits_processor(self.lm_head, hidden_states, - sampling_metadata) + logits = self.logits_processor(self.lm_head, hidden_states) return logits diff --git a/vllm/model_executor/models/mimo_mtp.py b/vllm/model_executor/models/mimo_mtp.py index 09194e9f95d0..b4abe458e477 100644 --- a/vllm/model_executor/models/mimo_mtp.py +++ b/vllm/model_executor/models/mimo_mtp.py @@ -34,7 +34,6 @@ ParallelLMHead, VocabParallelEmbedding) from vllm.model_executor.model_loader.weight_utils import default_weight_loader from vllm.model_executor.models.qwen2 import Qwen2DecoderLayer -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.sequence import IntermediateTensors from .utils import maybe_prefix @@ -140,12 +139,10 @@ def compute_logits( self, hidden_states: torch.Tensor, lm_head: ParallelLMHead, - sampling_metadata: SamplingMetadata, spec_step_idx: int = 0, ) -> torch.Tensor: self.mtp_layers[str(self.mtp_start_layer_idx + spec_step_idx)] - logits = self.logits_processor(lm_head, hidden_states, - sampling_metadata) + logits = self.logits_processor(lm_head, hidden_states) return logits @@ -178,11 +175,10 @@ def forward( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, spec_step_idx: int = 0, ) -> Optional[torch.Tensor]: return self.model.compute_logits(hidden_states, self.lm_head, - sampling_metadata, spec_step_idx) + spec_step_idx) def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]: diff --git a/vllm/model_executor/models/minicpm.py b/vllm/model_executor/models/minicpm.py index 240c23ea2b25..0986ea07406a 100644 --- a/vllm/model_executor/models/minicpm.py +++ b/vllm/model_executor/models/minicpm.py @@ -51,7 +51,6 @@ from vllm.model_executor.layers.vocab_parallel_embedding import ( DEFAULT_VOCAB_PADDING_SIZE, ParallelLMHead, VocabParallelEmbedding) from vllm.model_executor.model_loader.weight_utils import default_weight_loader -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.model_executor.utils import set_weight_attrs from vllm.platforms import current_platform from vllm.sequence import IntermediateTensors @@ -583,10 +582,8 @@ def forward( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - logits = self.logits_processor(self.lm_head, hidden_states, - sampling_metadata) + logits = self.logits_processor(self.lm_head, hidden_states) return logits def load_weights(self, weights: Iterable[tuple[str, diff --git a/vllm/model_executor/models/minicpm_eagle.py b/vllm/model_executor/models/minicpm_eagle.py index 848a97b8bb2a..2af0d546ce63 100644 --- a/vllm/model_executor/models/minicpm_eagle.py +++ b/vllm/model_executor/models/minicpm_eagle.py @@ -39,7 +39,6 @@ from vllm.model_executor.layers.vocab_parallel_embedding import ( DEFAULT_VOCAB_PADDING_SIZE, ParallelLMHead, VocabParallelEmbedding) from vllm.model_executor.model_loader.weight_utils import default_weight_loader -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.sequence import IntermediateTensors from .interfaces import SupportsLoRA, SupportsPP @@ -376,10 +375,8 @@ def forward( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - logits = self.logits_processor(self.lm_head, hidden_states, - sampling_metadata) + logits = self.logits_processor(self.lm_head, hidden_states) return logits def load_weights(self, weights: Iterable[tuple[str, diff --git a/vllm/model_executor/models/minicpmv.py b/vllm/model_executor/models/minicpmv.py index 9b2d84e32151..a17c4f004d75 100644 --- a/vllm/model_executor/models/minicpmv.py +++ b/vllm/model_executor/models/minicpmv.py @@ -50,7 +50,6 @@ from vllm.model_executor.models.module_mapping import MultiModelKeys from vllm.model_executor.models.qwen2 import Qwen2ForCausalLM from vllm.model_executor.models.qwen3 import Qwen3ForCausalLM -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.multimodal import MULTIMODAL_REGISTRY from vllm.multimodal.inputs import (MultiModalDataDict, MultiModalFieldConfig, MultiModalKwargsItems, NestedTensors) @@ -1194,9 +1193,8 @@ def forward( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - return self.llm.compute_logits(hidden_states, sampling_metadata) + return self.llm.compute_logits(hidden_states) def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]: diff --git a/vllm/model_executor/models/minimax_text_01.py b/vllm/model_executor/models/minimax_text_01.py index 6ce883be0a83..1d2c7dea811e 100644 --- a/vllm/model_executor/models/minimax_text_01.py +++ b/vllm/model_executor/models/minimax_text_01.py @@ -41,7 +41,6 @@ DEFAULT_VOCAB_PADDING_SIZE, ParallelLMHead, VocabParallelEmbedding) from vllm.model_executor.model_loader.weight_utils import default_weight_loader from vllm.model_executor.models.utils import maybe_prefix -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.sequence import IntermediateTensors from .interfaces import HasInnerState, IsHybrid @@ -742,10 +741,8 @@ def forward(self, return hidden_states - def compute_logits(self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata) -> torch.Tensor: - logits = self.logits_processor(self.lm_head, hidden_states.float(), - sampling_metadata) + def compute_logits(self, hidden_states: torch.Tensor) -> torch.Tensor: + logits = self.logits_processor(self.lm_head, hidden_states.float()) return logits diff --git a/vllm/model_executor/models/minimax_vl_01.py b/vllm/model_executor/models/minimax_vl_01.py index cc7db849a28b..b2f020f3323e 100644 --- a/vllm/model_executor/models/minimax_vl_01.py +++ b/vllm/model_executor/models/minimax_vl_01.py @@ -14,7 +14,6 @@ from vllm.model_executor.layers.linear import (ColumnParallelLinear, RowParallelLinear) from vllm.model_executor.layers.quantization import QuantizationConfig -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.multimodal import MULTIMODAL_REGISTRY from vllm.multimodal.inputs import MultiModalFieldConfig from vllm.sequence import IntermediateTensors @@ -420,10 +419,8 @@ def forward( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - return self.language_model.compute_logits(hidden_states, - sampling_metadata) + return self.language_model.compute_logits(hidden_states) def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]: diff --git a/vllm/model_executor/models/mistral3.py b/vllm/model_executor/models/mistral3.py index d15776a39362..94e3d7234b6f 100644 --- a/vllm/model_executor/models/mistral3.py +++ b/vllm/model_executor/models/mistral3.py @@ -20,7 +20,6 @@ RowParallelLinear) from vllm.model_executor.layers.quantization import QuantizationConfig from vllm.model_executor.models.module_mapping import MultiModelKeys -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.multimodal import MULTIMODAL_REGISTRY from vllm.multimodal.cache import BaseMultiModalProcessorCache from vllm.multimodal.inputs import (MultiModalDataDict, MultiModalFieldConfig, @@ -606,10 +605,8 @@ def forward( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - return self.language_model.compute_logits(hidden_states, - sampling_metadata) + return self.language_model.compute_logits(hidden_states) def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]: diff --git a/vllm/model_executor/models/mixtral.py b/vllm/model_executor/models/mixtral.py index 8b3474d80953..bebf0b5adac5 100644 --- a/vllm/model_executor/models/mixtral.py +++ b/vllm/model_executor/models/mixtral.py @@ -49,7 +49,6 @@ DEFAULT_VOCAB_PADDING_SIZE, ParallelLMHead, VocabParallelEmbedding) from vllm.model_executor.model_loader.weight_utils import ( default_weight_loader, maybe_remap_kv_scale_name) -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.sequence import IntermediateTensors from .interfaces import MixtureOfExperts, SupportsLoRA, SupportsPP @@ -594,10 +593,8 @@ def forward( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - logits = self.logits_processor(self.lm_head, hidden_states, - sampling_metadata) + logits = self.logits_processor(self.lm_head, hidden_states) return logits def load_weights(self, weights: Iterable[tuple[str, diff --git a/vllm/model_executor/models/mllama4.py b/vllm/model_executor/models/mllama4.py index 2f0e8a2a5e57..131a66b71323 100644 --- a/vllm/model_executor/models/mllama4.py +++ b/vllm/model_executor/models/mllama4.py @@ -41,7 +41,6 @@ from vllm.model_executor.layers.rotary_embedding import get_rope from vllm.model_executor.model_loader.utils import initialize_model from vllm.model_executor.model_loader.weight_utils import default_weight_loader -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.multimodal import MULTIMODAL_REGISTRY from vllm.multimodal.inputs import (MultiModalDataDict, MultiModalFieldConfig, MultiModalKwargsItems, NestedTensors) @@ -856,10 +855,8 @@ def forward( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - return self.language_model.compute_logits(hidden_states, - sampling_metadata) + return self.language_model.compute_logits(hidden_states) def separate_weights( self, diff --git a/vllm/model_executor/models/molmo.py b/vllm/model_executor/models/molmo.py index 2475fe131609..201bf83cac58 100644 --- a/vllm/model_executor/models/molmo.py +++ b/vllm/model_executor/models/molmo.py @@ -26,7 +26,6 @@ get_tensor_model_parallel_world_size, split_tensor_along_last_dim, tensor_model_parallel_all_gather) -from vllm.model_executor import SamplingMetadata from vllm.model_executor.layers.activation import (MulAndSilu, QuickGELU, SiluAndMul) from vllm.model_executor.layers.layernorm import RMSNorm @@ -1527,10 +1526,8 @@ def forward( return hidden_states - def compute_logits(self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata) -> torch.Tensor: - logits = self.logits_processor(self.lm_head, hidden_states, - sampling_metadata) + def compute_logits(self, hidden_states: torch.Tensor) -> torch.Tensor: + logits = self.logits_processor(self.lm_head, hidden_states) return logits def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]): diff --git a/vllm/model_executor/models/mpt.py b/vllm/model_executor/models/mpt.py index 48ac91fa6dde..64d669e8ac3e 100644 --- a/vllm/model_executor/models/mpt.py +++ b/vllm/model_executor/models/mpt.py @@ -25,7 +25,6 @@ from vllm.model_executor.layers.vocab_parallel_embedding import ( VocabParallelEmbedding) from vllm.model_executor.model_loader.weight_utils import default_weight_loader -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.sequence import IntermediateTensors from .interfaces import SupportsPP @@ -320,10 +319,8 @@ def forward( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - logits = self.logits_processor(self.lm_head, hidden_states, - sampling_metadata) + logits = self.logits_processor(self.lm_head, hidden_states) return logits def load_weights(self, weights: Iterable[tuple[str, diff --git a/vllm/model_executor/models/nano_nemotron_vl.py b/vllm/model_executor/models/nano_nemotron_vl.py index 4f8652c00694..ae50f1aefc6f 100644 --- a/vllm/model_executor/models/nano_nemotron_vl.py +++ b/vllm/model_executor/models/nano_nemotron_vl.py @@ -37,7 +37,6 @@ init_vllm_registered_model, maybe_prefix, merge_multimodal_embeddings) -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.multimodal import MULTIMODAL_REGISTRY from vllm.multimodal.inputs import (MultiModalDataDict, MultiModalFieldConfig, MultiModalKwargs, MultiModalKwargsItems, @@ -1192,10 +1191,8 @@ def get_mm_mapping(self) -> MultiModelKeys: def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - return self.language_model.compute_logits(hidden_states, - sampling_metadata) + return self.language_model.compute_logits(hidden_states) def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]): adapter_dict = dict(self.mlp1.named_parameters()) diff --git a/vllm/model_executor/models/nemotron.py b/vllm/model_executor/models/nemotron.py index 21f785e4b91a..6bb2f7392cb4 100644 --- a/vllm/model_executor/models/nemotron.py +++ b/vllm/model_executor/models/nemotron.py @@ -45,7 +45,6 @@ DEFAULT_VOCAB_PADDING_SIZE, ParallelLMHead, VocabParallelEmbedding) from vllm.model_executor.model_loader.weight_utils import ( default_weight_loader, maybe_remap_kv_scale_name) -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.sequence import IntermediateTensors from vllm.transformers_utils.configs import NemotronConfig @@ -498,10 +497,8 @@ def forward( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - logits = self.logits_processor(self.lm_head, hidden_states, - sampling_metadata) + logits = self.logits_processor(self.lm_head, hidden_states) return logits def load_weights(self, weights: Iterable[tuple[str, diff --git a/vllm/model_executor/models/nemotron_h.py b/vllm/model_executor/models/nemotron_h.py index 1e1f0524bd06..ff571541a60a 100644 --- a/vllm/model_executor/models/nemotron_h.py +++ b/vllm/model_executor/models/nemotron_h.py @@ -54,7 +54,6 @@ from vllm.model_executor.models.utils import ( AutoWeightsLoader, WeightsMapper, make_empty_intermediate_tensors_factory, make_layers, maybe_prefix) -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.sequence import IntermediateTensors from vllm.transformers_utils.configs import NemotronHConfig from vllm.utils import LayerBlockType @@ -622,10 +621,8 @@ def get_seqlen_agnostic_capture_inputs(self, batch_size: int): def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - logits = self.logits_processor(self.lm_head, hidden_states, - sampling_metadata) + logits = self.logits_processor(self.lm_head, hidden_states) return logits def load_weights(self, weights: Iterable[tuple[str, diff --git a/vllm/model_executor/models/nemotron_nas.py b/vllm/model_executor/models/nemotron_nas.py index f8e38dcd80b5..d474c8db41b2 100644 --- a/vllm/model_executor/models/nemotron_nas.py +++ b/vllm/model_executor/models/nemotron_nas.py @@ -44,7 +44,6 @@ from vllm.model_executor.model_loader.weight_utils import ( default_weight_loader, maybe_remap_kv_scale_name) from vllm.model_executor.models.llama import LlamaAttention, LlamaMLP -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.sequence import IntermediateTensors from .interfaces import HasNoOps, SupportsLoRA, SupportsPP @@ -468,10 +467,8 @@ def forward( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - logits = self.logits_processor(self.lm_head, hidden_states, - sampling_metadata) + logits = self.logits_processor(self.lm_head, hidden_states) return logits def load_weights(self, weights: Iterable[tuple[str, diff --git a/vllm/model_executor/models/nemotron_vl.py b/vllm/model_executor/models/nemotron_vl.py index acda2027401d..3abbff8c717d 100644 --- a/vllm/model_executor/models/nemotron_vl.py +++ b/vllm/model_executor/models/nemotron_vl.py @@ -26,7 +26,6 @@ BaseInternVLProcessingInfo, InternVLImageEmbeddingInputs, InternVLImageInputs, InternVLImagePixelInputs, InternVLProcessor) from vllm.model_executor.models.module_mapping import MultiModelKeys -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.multimodal import MULTIMODAL_REGISTRY from vllm.multimodal.image import convert_image_mode from vllm.multimodal.inputs import NestedTensors @@ -632,10 +631,8 @@ def forward( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - return self.language_model.compute_logits(hidden_states, - sampling_metadata) + return self.language_model.compute_logits(hidden_states) def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]: diff --git a/vllm/model_executor/models/olmo.py b/vllm/model_executor/models/olmo.py index 7be3c16528b5..9fa8760073c1 100644 --- a/vllm/model_executor/models/olmo.py +++ b/vllm/model_executor/models/olmo.py @@ -45,7 +45,6 @@ from vllm.model_executor.layers.vocab_parallel_embedding import ( ParallelLMHead, VocabParallelEmbedding) from vllm.model_executor.model_loader.weight_utils import default_weight_loader -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.sequence import IntermediateTensors from .interfaces import SupportsLoRA, SupportsPP @@ -391,10 +390,8 @@ def forward( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - logits = self.logits_processor(self.lm_head, hidden_states, - sampling_metadata) + logits = self.logits_processor(self.lm_head, hidden_states) return logits def load_weights(self, weights: Iterable[tuple[str, diff --git a/vllm/model_executor/models/olmo2.py b/vllm/model_executor/models/olmo2.py index 3e4c580a1121..2e0b1fb2a13f 100644 --- a/vllm/model_executor/models/olmo2.py +++ b/vllm/model_executor/models/olmo2.py @@ -54,7 +54,6 @@ from vllm.model_executor.models.utils import ( AutoWeightsLoader, extract_layer_index, is_pp_missing_parameter, make_empty_intermediate_tensors_factory, make_layers, maybe_prefix) -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.sequence import IntermediateTensors from vllm.transformers_utils.configs import Olmo3Config @@ -427,10 +426,8 @@ def forward( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - logits = self.logits_processor(self.lm_head, hidden_states, - sampling_metadata) + logits = self.logits_processor(self.lm_head, hidden_states) return logits def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]): diff --git a/vllm/model_executor/models/olmoe.py b/vllm/model_executor/models/olmoe.py index 892e967e4a21..77ece544d490 100644 --- a/vllm/model_executor/models/olmoe.py +++ b/vllm/model_executor/models/olmoe.py @@ -41,7 +41,6 @@ from vllm.model_executor.layers.vocab_parallel_embedding import ( ParallelLMHead, VocabParallelEmbedding) from vllm.model_executor.model_loader.weight_utils import default_weight_loader -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.sequence import IntermediateTensors from .interfaces import SupportsPP @@ -471,10 +470,8 @@ def forward( inputs_embeds) return hidden_states - def compute_logits(self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata) -> torch.Tensor: - logits = self.logits_processor(self.lm_head, hidden_states, - sampling_metadata) + def compute_logits(self, hidden_states: torch.Tensor) -> torch.Tensor: + logits = self.logits_processor(self.lm_head, hidden_states) return logits def load_weights(self, weights: Iterable[tuple[str, diff --git a/vllm/model_executor/models/opt.py b/vllm/model_executor/models/opt.py index 365aab205b21..4c3ce9f61efb 100644 --- a/vllm/model_executor/models/opt.py +++ b/vllm/model_executor/models/opt.py @@ -41,7 +41,6 @@ from vllm.model_executor.layers.vocab_parallel_embedding import ( ParallelLMHead, VocabParallelEmbedding) from vllm.model_executor.model_loader.weight_utils import default_weight_loader -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.sequence import IntermediateTensors from .interfaces import SupportsPP @@ -399,10 +398,8 @@ def forward( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - logits = self.logits_processor(self.lm_head, hidden_states, - sampling_metadata) + logits = self.logits_processor(self.lm_head, hidden_states) return logits def load_weights(self, weights: Iterable[tuple[str, diff --git a/vllm/model_executor/models/orion.py b/vllm/model_executor/models/orion.py index 944a9151d75d..586fea343d6f 100644 --- a/vllm/model_executor/models/orion.py +++ b/vllm/model_executor/models/orion.py @@ -28,7 +28,6 @@ from vllm.model_executor.layers.vocab_parallel_embedding import ( ParallelLMHead, VocabParallelEmbedding) from vllm.model_executor.model_loader.weight_utils import default_weight_loader -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.sequence import IntermediateTensors from .interfaces import SupportsPP @@ -339,10 +338,8 @@ def forward( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - logits = self.logits_processor(self.lm_head, hidden_states, - sampling_metadata) + logits = self.logits_processor(self.lm_head, hidden_states) return logits def load_weights(self, weights: Iterable[tuple[str, diff --git a/vllm/model_executor/models/ovis.py b/vllm/model_executor/models/ovis.py index f1bb18716b40..052e143b27f6 100644 --- a/vllm/model_executor/models/ovis.py +++ b/vllm/model_executor/models/ovis.py @@ -39,7 +39,6 @@ from vllm.model_executor.models.utils import (AutoWeightsLoader, flatten_bn, init_vllm_registered_model, maybe_prefix) -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.multimodal import MULTIMODAL_REGISTRY from vllm.multimodal.inputs import (MultiModalDataDict, MultiModalFieldConfig, MultiModalKwargsItems) @@ -558,9 +557,8 @@ def forward( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - logits = self.llm.compute_logits(hidden_states, sampling_metadata) + logits = self.llm.compute_logits(hidden_states) return logits def load_weights(self, weights: Iterable[tuple[str, diff --git a/vllm/model_executor/models/ovis2_5.py b/vllm/model_executor/models/ovis2_5.py index 5e4758ef8ea5..f18e38ce154d 100644 --- a/vllm/model_executor/models/ovis2_5.py +++ b/vllm/model_executor/models/ovis2_5.py @@ -19,7 +19,6 @@ from vllm.model_executor.models.utils import (AutoWeightsLoader, flatten_bn, init_vllm_registered_model, maybe_prefix) -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.multimodal import MULTIMODAL_REGISTRY from vllm.multimodal.inputs import (MultiModalDataDict, MultiModalFieldConfig, MultiModalKwargsItems) @@ -630,9 +629,8 @@ def forward( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - logits = self.llm.compute_logits(hidden_states, sampling_metadata) + logits = self.llm.compute_logits(hidden_states) return logits def load_weights(self, weights: Iterable[tuple[str, diff --git a/vllm/model_executor/models/paligemma.py b/vllm/model_executor/models/paligemma.py index d6eec77ebcee..aef510230461 100644 --- a/vllm/model_executor/models/paligemma.py +++ b/vllm/model_executor/models/paligemma.py @@ -9,7 +9,6 @@ from vllm.config import VllmConfig from vllm.logger import init_logger -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.multimodal import MULTIMODAL_REGISTRY from vllm.multimodal.inputs import (MultiModalDataDict, MultiModalFieldConfig, MultiModalInputs, MultiModalKwargsItems, @@ -403,10 +402,8 @@ def forward(self, def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - return self.language_model.compute_logits(hidden_states, - sampling_metadata) + return self.language_model.compute_logits(hidden_states) def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]: diff --git a/vllm/model_executor/models/persimmon.py b/vllm/model_executor/models/persimmon.py index 3e854e4d561f..23fb7bb85215 100644 --- a/vllm/model_executor/models/persimmon.py +++ b/vllm/model_executor/models/persimmon.py @@ -44,7 +44,6 @@ from vllm.model_executor.layers.vocab_parallel_embedding import ( ParallelLMHead, VocabParallelEmbedding) from vllm.model_executor.model_loader.weight_utils import default_weight_loader -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.sequence import IntermediateTensors from .interfaces import SupportsPP @@ -334,10 +333,8 @@ def forward( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - logits = self.logits_processor(self.lm_head, hidden_states, - sampling_metadata) + logits = self.logits_processor(self.lm_head, hidden_states) return logits def load_weights(self, weights: Iterable[tuple[str, diff --git a/vllm/model_executor/models/phi.py b/vllm/model_executor/models/phi.py index 6f39afbecf35..9cf288e85005 100644 --- a/vllm/model_executor/models/phi.py +++ b/vllm/model_executor/models/phi.py @@ -59,7 +59,6 @@ from vllm.model_executor.layers.vocab_parallel_embedding import ( ParallelLMHead, VocabParallelEmbedding) from vllm.model_executor.model_loader.weight_utils import default_weight_loader -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.sequence import IntermediateTensors from .interfaces import SupportsLoRA, SupportsPP @@ -346,10 +345,9 @@ def forward( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: logits = self.logits_processor(self.lm_head, hidden_states, - sampling_metadata, self.lm_head.bias) + self.lm_head.bias) return logits def load_weights(self, weights: Iterable[tuple[str, diff --git a/vllm/model_executor/models/phi3v.py b/vllm/model_executor/models/phi3v.py index 4522c7043d01..a2b201fe4228 100644 --- a/vllm/model_executor/models/phi3v.py +++ b/vllm/model_executor/models/phi3v.py @@ -29,7 +29,6 @@ from vllm.model_executor.layers.quantization import QuantizationConfig from vllm.model_executor.layers.vocab_parallel_embedding import ( VocabParallelEmbedding) -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.multimodal import MULTIMODAL_REGISTRY from vllm.multimodal.inputs import (MultiModalDataDict, MultiModalFieldConfig, MultiModalKwargsItems) @@ -681,10 +680,8 @@ def forward(self, def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - return self.language_model.compute_logits(hidden_states, - sampling_metadata) + return self.language_model.compute_logits(hidden_states) def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]: diff --git a/vllm/model_executor/models/phi4_multimodal.py b/vllm/model_executor/models/phi4_multimodal.py index 25df9e9261d9..d2a3a8cc0496 100644 --- a/vllm/model_executor/models/phi4_multimodal.py +++ b/vllm/model_executor/models/phi4_multimodal.py @@ -27,7 +27,6 @@ from vllm.model_executor.layers.quantization import QuantizationConfig from vllm.model_executor.model_loader.weight_utils import default_weight_loader from vllm.model_executor.models.module_mapping import MultiModelKeys -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.multimodal import MULTIMODAL_REGISTRY from vllm.multimodal.inputs import (MultiModalDataDict, MultiModalFieldConfig, MultiModalKwargsItems, NestedTensors) @@ -1451,10 +1450,8 @@ def forward( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - return self.language_model.compute_logits(hidden_states, - sampling_metadata) + return self.language_model.compute_logits(hidden_states) def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]: diff --git a/vllm/model_executor/models/phi4flash.py b/vllm/model_executor/models/phi4flash.py index aa7c434a44ae..ae153558e37a 100644 --- a/vllm/model_executor/models/phi4flash.py +++ b/vllm/model_executor/models/phi4flash.py @@ -29,7 +29,6 @@ SupportsV0Only) from vllm.model_executor.models.mamba_cache import (MambaCacheManager, MambaCacheParams) -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.sequence import IntermediateTensors from .utils import make_layers, maybe_prefix @@ -695,12 +694,10 @@ def get_seqlen_agnostic_capture_inputs(self, batch_size: int): def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: processed_logits = self.logits_processor( self.lm_head, hidden_states, - sampling_metadata, self.embedding_bias, ) return processed_logits diff --git a/vllm/model_executor/models/phi4mm.py b/vllm/model_executor/models/phi4mm.py index b3fc55dab6ec..47b5ad55ab2d 100644 --- a/vllm/model_executor/models/phi4mm.py +++ b/vllm/model_executor/models/phi4mm.py @@ -18,7 +18,6 @@ DEFAULT_VOCAB_PADDING_SIZE, ParallelLMHead) from vllm.model_executor.models.llama import LlamaModel from vllm.model_executor.models.module_mapping import MultiModelKeys -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.multimodal import MULTIMODAL_REGISTRY from vllm.multimodal.inputs import (MultiModalDataDict, MultiModalFieldConfig, MultiModalKwargsItems, NestedTensors) @@ -1257,10 +1256,8 @@ def forward( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - logits = self.logits_processor(self.lm_head, hidden_states, - sampling_metadata) + logits = self.logits_processor(self.lm_head, hidden_states) return logits def load_weights(self, weights: Iterable[tuple[str, diff --git a/vllm/model_executor/models/phimoe.py b/vllm/model_executor/models/phimoe.py index 01d16f1f2c38..3ce67ce37a7a 100644 --- a/vllm/model_executor/models/phimoe.py +++ b/vllm/model_executor/models/phimoe.py @@ -47,7 +47,6 @@ DEFAULT_VOCAB_PADDING_SIZE, ParallelLMHead, VocabParallelEmbedding) from vllm.model_executor.model_loader.weight_utils import ( default_weight_loader, maybe_remap_kv_scale_name) -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.sequence import IntermediateTensors from .interfaces import SupportsLoRA, SupportsPP @@ -667,10 +666,8 @@ def forward( inputs_embeds) return hidden_states - def compute_logits(self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata) -> torch.Tensor: - logits = self.logits_processor(self.lm_head, hidden_states, - sampling_metadata) + def compute_logits(self, hidden_states: torch.Tensor) -> torch.Tensor: + logits = self.logits_processor(self.lm_head, hidden_states) return logits def load_weights(self, weights: Iterable[tuple[str, diff --git a/vllm/model_executor/models/pixtral.py b/vllm/model_executor/models/pixtral.py index 142d3251bc67..7b197844c8b6 100644 --- a/vllm/model_executor/models/pixtral.py +++ b/vllm/model_executor/models/pixtral.py @@ -32,7 +32,6 @@ RowParallelLinear) from vllm.model_executor.layers.quantization import QuantizationConfig from vllm.model_executor.model_loader.weight_utils import default_weight_loader -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.multimodal import MULTIMODAL_REGISTRY, MultiModalKwargsItems from vllm.multimodal.inputs import (MultiModalDataDict, MultiModalFieldConfig, MultiModalUUIDDict, NestedTensors) @@ -480,10 +479,8 @@ def forward( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - return self.language_model.compute_logits(hidden_states, - sampling_metadata) + return self.language_model.compute_logits(hidden_states) def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]): diff --git a/vllm/model_executor/models/plamo2.py b/vllm/model_executor/models/plamo2.py index 9f1ee36366fd..33ee1cf44afd 100644 --- a/vllm/model_executor/models/plamo2.py +++ b/vllm/model_executor/models/plamo2.py @@ -52,7 +52,6 @@ from vllm.model_executor.models.utils import ( is_pp_missing_parameter, make_empty_intermediate_tensors_factory, make_layers, maybe_prefix) -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.model_executor.utils import set_weight_attrs from vllm.platforms import current_platform from vllm.sequence import IntermediateTensors @@ -1022,10 +1021,8 @@ def get_mamba_state_shape_from_config( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - logits = self.logits_processor(self.lm_head, hidden_states, - sampling_metadata) + logits = self.logits_processor(self.lm_head, hidden_states) return logits def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]): diff --git a/vllm/model_executor/models/qwen.py b/vllm/model_executor/models/qwen.py index 747094849900..e0c08a6a8827 100644 --- a/vllm/model_executor/models/qwen.py +++ b/vllm/model_executor/models/qwen.py @@ -30,7 +30,6 @@ from vllm.model_executor.layers.vocab_parallel_embedding import ( ParallelLMHead, VocabParallelEmbedding) from vllm.model_executor.model_loader.weight_utils import default_weight_loader -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.sequence import IntermediateTensors from .interfaces import SupportsLoRA, SupportsPP @@ -282,10 +281,8 @@ def __init__( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - logits = self.logits_processor(self.lm_head, hidden_states, - sampling_metadata) + logits = self.logits_processor(self.lm_head, hidden_states) return logits def load_weights(self, weights: Iterable[tuple[str, diff --git a/vllm/model_executor/models/qwen2.py b/vllm/model_executor/models/qwen2.py index e13e87b93429..c536b0f60c30 100644 --- a/vllm/model_executor/models/qwen2.py +++ b/vllm/model_executor/models/qwen2.py @@ -49,7 +49,6 @@ ParallelLMHead, VocabParallelEmbedding) from vllm.model_executor.model_loader.weight_utils import ( default_weight_loader, maybe_remap_kv_scale_name) -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.sequence import IntermediateTensors from vllm.transformers_utils.config import is_interleaved @@ -510,10 +509,8 @@ def forward( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - logits = self.logits_processor(self.lm_head, hidden_states, - sampling_metadata) + logits = self.logits_processor(self.lm_head, hidden_states) return logits def load_weights(self, weights: Iterable[tuple[str, diff --git a/vllm/model_executor/models/qwen2_5_omni_thinker.py b/vllm/model_executor/models/qwen2_5_omni_thinker.py index a7e71309b607..5f27230c913b 100644 --- a/vllm/model_executor/models/qwen2_5_omni_thinker.py +++ b/vllm/model_executor/models/qwen2_5_omni_thinker.py @@ -50,7 +50,6 @@ from vllm.model_executor.models.qwen2_audio import ( Qwen2AudioProcessingInfo, _get_feat_extract_output_lengths) from vllm.model_executor.models.qwen2_vl import Qwen2VLMultiModalDataParser -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.multimodal import MULTIMODAL_REGISTRY from vllm.multimodal.inputs import (ImageItem, ModalityData, MultiModalDataDict, MultiModalFieldConfig, @@ -955,10 +954,8 @@ def forward( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - return self.language_model.compute_logits(hidden_states, - sampling_metadata) + return self.language_model.compute_logits(hidden_states) def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]: diff --git a/vllm/model_executor/models/qwen2_5_vl.py b/vllm/model_executor/models/qwen2_5_vl.py index dbf486374bcf..73b27572a8eb 100644 --- a/vllm/model_executor/models/qwen2_5_vl.py +++ b/vllm/model_executor/models/qwen2_5_vl.py @@ -43,7 +43,6 @@ from vllm.distributed import parallel_state from vllm.distributed import utils as dist_utils from vllm.logger import init_logger -from vllm.model_executor import SamplingMetadata from vllm.model_executor.layers.activation import get_act_and_mul_fn from vllm.model_executor.layers.layernorm import RMSNorm # yapf: disable @@ -1256,10 +1255,8 @@ def forward( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - return self.language_model.compute_logits(hidden_states, - sampling_metadata) + return self.language_model.compute_logits(hidden_states) def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]: diff --git a/vllm/model_executor/models/qwen2_audio.py b/vllm/model_executor/models/qwen2_audio.py index c797b71b5d2e..762ab42e5929 100644 --- a/vllm/model_executor/models/qwen2_audio.py +++ b/vllm/model_executor/models/qwen2_audio.py @@ -34,7 +34,6 @@ from transformers.models.whisper import WhisperFeatureExtractor from vllm.config import VllmConfig -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.multimodal import MULTIMODAL_REGISTRY from vllm.multimodal.inputs import (AudioItem, ModalityData, MultiModalDataDict, MultiModalFieldConfig, @@ -481,10 +480,8 @@ def forward( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - return self.language_model.compute_logits(hidden_states, - sampling_metadata) + return self.language_model.compute_logits(hidden_states) def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]: diff --git a/vllm/model_executor/models/qwen2_moe.py b/vllm/model_executor/models/qwen2_moe.py index 6c6276a93045..6a9acaf2c3fe 100644 --- a/vllm/model_executor/models/qwen2_moe.py +++ b/vllm/model_executor/models/qwen2_moe.py @@ -51,7 +51,6 @@ from vllm.model_executor.layers.vocab_parallel_embedding import ( ParallelLMHead, VocabParallelEmbedding) from vllm.model_executor.model_loader.weight_utils import default_weight_loader -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.sequence import IntermediateTensors from .interfaces import SupportsLoRA, SupportsPP @@ -546,10 +545,8 @@ def forward( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - logits = self.logits_processor(self.lm_head, hidden_states, - sampling_metadata) + logits = self.logits_processor(self.lm_head, hidden_states) return logits def load_weights(self, weights: Iterable[tuple[str, diff --git a/vllm/model_executor/models/qwen2_vl.py b/vllm/model_executor/models/qwen2_vl.py index dd4e7731e0b0..b3c42c257256 100644 --- a/vllm/model_executor/models/qwen2_vl.py +++ b/vllm/model_executor/models/qwen2_vl.py @@ -46,7 +46,6 @@ from vllm.distributed import parallel_state, tensor_model_parallel_all_gather from vllm.distributed import utils as dist_utils from vllm.logger import init_logger -from vllm.model_executor import SamplingMetadata from vllm.model_executor.layers.activation import QuickGELU from vllm.model_executor.layers.linear import (ColumnParallelLinear, RowParallelLinear) @@ -1527,10 +1526,8 @@ def forward( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - return self.language_model.compute_logits(hidden_states, - sampling_metadata) + return self.language_model.compute_logits(hidden_states) def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]: diff --git a/vllm/model_executor/models/qwen3.py b/vllm/model_executor/models/qwen3.py index dddb47048a1f..ae72fd30c399 100644 --- a/vllm/model_executor/models/qwen3.py +++ b/vllm/model_executor/models/qwen3.py @@ -41,7 +41,6 @@ from vllm.model_executor.layers.quantization import QuantizationConfig from vllm.model_executor.layers.rotary_embedding import get_rope from vllm.model_executor.layers.vocab_parallel_embedding import ParallelLMHead -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.sequence import IntermediateTensors from .interfaces import SupportsEagle3, SupportsLoRA, SupportsPP @@ -328,10 +327,8 @@ def forward( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - logits = self.logits_processor(self.lm_head, hidden_states, - sampling_metadata) + logits = self.logits_processor(self.lm_head, hidden_states) return logits def load_weights(self, weights: Iterable[tuple[str, diff --git a/vllm/model_executor/models/qwen3_moe.py b/vllm/model_executor/models/qwen3_moe.py index 029309c49efd..0661b3707ff4 100644 --- a/vllm/model_executor/models/qwen3_moe.py +++ b/vllm/model_executor/models/qwen3_moe.py @@ -54,7 +54,6 @@ ParallelLMHead, VocabParallelEmbedding) from vllm.model_executor.model_loader.weight_utils import ( default_weight_loader, maybe_remap_kv_scale_name) -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.sequence import IntermediateTensors from .interfaces import MixtureOfExperts, SupportsLoRA, SupportsPP @@ -690,10 +689,8 @@ def forward( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - logits = self.logits_processor(self.lm_head, hidden_states, - sampling_metadata) + logits = self.logits_processor(self.lm_head, hidden_states) return logits def load_weights(self, weights: Iterable[tuple[str, diff --git a/vllm/model_executor/models/qwen3_next.py b/vllm/model_executor/models/qwen3_next.py index ce917f92bd2e..24cebc5bfdd8 100644 --- a/vllm/model_executor/models/qwen3_next.py +++ b/vllm/model_executor/models/qwen3_next.py @@ -53,7 +53,6 @@ default_weight_loader, sharded_weight_loader) from vllm.model_executor.models.mamba_cache import MambaCacheParams from vllm.model_executor.models.qwen2_moe import Qwen2MoeMLP as Qwen3NextMLP -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.model_executor.utils import set_weight_attrs from vllm.platforms import current_platform from vllm.sequence import IntermediateTensors @@ -1208,10 +1207,8 @@ def get_mamba_state_shape_from_config( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - return self.logits_processor(self.lm_head, hidden_states, - sampling_metadata) + return self.logits_processor(self.lm_head, hidden_states) def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]: diff --git a/vllm/model_executor/models/qwen3_next_mtp.py b/vllm/model_executor/models/qwen3_next_mtp.py index c755eeb9b4ea..c054339842e6 100644 --- a/vllm/model_executor/models/qwen3_next_mtp.py +++ b/vllm/model_executor/models/qwen3_next_mtp.py @@ -19,7 +19,6 @@ from vllm.model_executor.model_loader.weight_utils import default_weight_loader from vllm.model_executor.models.qwen3_next import (Qwen3NextDecoderLayer, Qwen3NextRMSNorm) -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.sequence import IntermediateTensors from vllm.transformers_utils.configs import Qwen3NextConfig @@ -266,11 +265,9 @@ def forward( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, spec_step_idx: int = 0, ) -> Optional[torch.Tensor]: - return self.logits_processor(self.lm_head, hidden_states, - sampling_metadata) + return self.logits_processor(self.lm_head, hidden_states) def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]: diff --git a/vllm/model_executor/models/qwen3_vl.py b/vllm/model_executor/models/qwen3_vl.py index ca232e03767b..aa28c07ddceb 100644 --- a/vllm/model_executor/models/qwen3_vl.py +++ b/vllm/model_executor/models/qwen3_vl.py @@ -45,7 +45,6 @@ from vllm.config import VllmConfig from vllm.distributed import get_pp_group from vllm.logger import init_logger -from vllm.model_executor import SamplingMetadata from vllm.model_executor.layers.activation import _ACTIVATION_REGISTRY from vllm.model_executor.layers.linear import (ColumnParallelLinear, RowParallelLinear) @@ -1493,10 +1492,8 @@ def forward( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - return self.language_model.compute_logits(hidden_states, - sampling_metadata) + return self.language_model.compute_logits(hidden_states) def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]: diff --git a/vllm/model_executor/models/seed_oss.py b/vllm/model_executor/models/seed_oss.py index e3c7c700f8fa..a217c820fedf 100644 --- a/vllm/model_executor/models/seed_oss.py +++ b/vllm/model_executor/models/seed_oss.py @@ -47,7 +47,6 @@ ParallelLMHead, VocabParallelEmbedding) from vllm.model_executor.model_loader.weight_utils import ( default_weight_loader, maybe_remap_kv_scale_name) -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.sequence import IntermediateTensors from .interfaces import SupportsLoRA, SupportsPP @@ -472,10 +471,8 @@ def forward( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - logits = self.logits_processor(self.lm_head, hidden_states, - sampling_metadata) + logits = self.logits_processor(self.lm_head, hidden_states) return logits def load_weights(self, weights: Iterable[tuple[str, diff --git a/vllm/model_executor/models/skyworkr1v.py b/vllm/model_executor/models/skyworkr1v.py index 9857ccdcbe2d..893ce4497c31 100644 --- a/vllm/model_executor/models/skyworkr1v.py +++ b/vllm/model_executor/models/skyworkr1v.py @@ -22,7 +22,6 @@ from vllm.model_executor.layers.quantization.awq import AWQConfig from vllm.model_executor.models.intern_vit import (InternVisionModel, InternVisionPatchModel) -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.multimodal import MULTIMODAL_REGISTRY from vllm.multimodal.image import convert_image_mode from vllm.multimodal.inputs import (MultiModalDataDict, MultiModalFieldConfig, @@ -897,10 +896,8 @@ def forward( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - return self.language_model.compute_logits(hidden_states, - sampling_metadata) + return self.language_model.compute_logits(hidden_states) def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]: diff --git a/vllm/model_executor/models/solar.py b/vllm/model_executor/models/solar.py index 94c862258b7a..c774171b9dcd 100644 --- a/vllm/model_executor/models/solar.py +++ b/vllm/model_executor/models/solar.py @@ -47,7 +47,6 @@ DEFAULT_VOCAB_PADDING_SIZE, ParallelLMHead, VocabParallelEmbedding) from vllm.model_executor.model_loader.weight_utils import ( default_weight_loader, maybe_remap_kv_scale_name) -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.sequence import IntermediateTensors from .interfaces import SupportsLoRA, SupportsPP @@ -495,10 +494,8 @@ def forward( inputs_embeds) return model_output - def compute_logits(self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata) -> torch.Tensor: - logits = self.logits_processor(self.lm_head, hidden_states, - sampling_metadata) + def compute_logits(self, hidden_states: torch.Tensor) -> torch.Tensor: + logits = self.logits_processor(self.lm_head, hidden_states) return logits def load_weights(self, weights: Iterable[tuple[str, diff --git a/vllm/model_executor/models/stablelm.py b/vllm/model_executor/models/stablelm.py index 9e880ebd5081..e4dfe8d5a9a3 100644 --- a/vllm/model_executor/models/stablelm.py +++ b/vllm/model_executor/models/stablelm.py @@ -42,7 +42,6 @@ from vllm.model_executor.layers.vocab_parallel_embedding import ( ParallelLMHead, VocabParallelEmbedding) from vllm.model_executor.model_loader.weight_utils import default_weight_loader -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.sequence import IntermediateTensors from .interfaces import SupportsPP @@ -332,10 +331,8 @@ def forward( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - logits = self.logits_processor(self.lm_head, hidden_states, - sampling_metadata) + logits = self.logits_processor(self.lm_head, hidden_states) return logits def load_weights(self, weights: Iterable[tuple[str, diff --git a/vllm/model_executor/models/starcoder2.py b/vllm/model_executor/models/starcoder2.py index 62ff9b618275..7f379ab95a03 100644 --- a/vllm/model_executor/models/starcoder2.py +++ b/vllm/model_executor/models/starcoder2.py @@ -43,7 +43,6 @@ DEFAULT_VOCAB_PADDING_SIZE, ParallelLMHead, VocabParallelEmbedding) from vllm.model_executor.model_loader.weight_utils import ( default_weight_loader, maybe_remap_kv_scale_name) -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.sequence import IntermediateTensors from .interfaces import SupportsPP @@ -339,10 +338,8 @@ def forward( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - logits = self.logits_processor(self.lm_head, hidden_states, - sampling_metadata) + logits = self.logits_processor(self.lm_head, hidden_states) return logits def load_weights(self, weights: Iterable[tuple[str, diff --git a/vllm/model_executor/models/step3_text.py b/vllm/model_executor/models/step3_text.py index 6a5b540fc817..0cce0c78f8dc 100644 --- a/vllm/model_executor/models/step3_text.py +++ b/vllm/model_executor/models/step3_text.py @@ -29,7 +29,6 @@ from vllm.model_executor.layers.vocab_parallel_embedding import ( DEFAULT_VOCAB_PADDING_SIZE, ParallelLMHead, VocabParallelEmbedding) from vllm.model_executor.model_loader.weight_utils import default_weight_loader -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.sequence import IntermediateTensors from .interfaces import SupportsPP @@ -405,10 +404,8 @@ def forward(self, inputs_embeds) return hidden_states - def compute_logits(self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata) -> torch.Tensor: - logits = self.logits_processor(self.lm_head, hidden_states, - sampling_metadata) + def compute_logits(self, hidden_states: torch.Tensor) -> torch.Tensor: + logits = self.logits_processor(self.lm_head, hidden_states) return logits def load_weights(self, weights: Iterable[tuple[str, diff --git a/vllm/model_executor/models/step3_vl.py b/vllm/model_executor/models/step3_vl.py index c2940f8e4445..f667266b77bf 100644 --- a/vllm/model_executor/models/step3_vl.py +++ b/vllm/model_executor/models/step3_vl.py @@ -23,7 +23,6 @@ QKVParallelLinear, RowParallelLinear) from vllm.model_executor.layers.quantization import QuantizationConfig -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.multimodal import MULTIMODAL_REGISTRY from vllm.multimodal.inputs import (MultiModalDataDict, MultiModalFieldConfig, MultiModalKwargsItems, NestedTensors) @@ -1055,10 +1054,8 @@ def forward( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - return self.language_model.compute_logits(hidden_states, - sampling_metadata) + return self.language_model.compute_logits(hidden_states) def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]): diff --git a/vllm/model_executor/models/tarsier.py b/vllm/model_executor/models/tarsier.py index c66867315e55..67cf3ccf315d 100644 --- a/vllm/model_executor/models/tarsier.py +++ b/vllm/model_executor/models/tarsier.py @@ -23,7 +23,6 @@ RowParallelLinear) from vllm.model_executor.layers.quantization import QuantizationConfig from vllm.model_executor.models.llava import LlavaDummyInputsBuilder -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.multimodal import MULTIMODAL_REGISTRY from vllm.multimodal.cache import BaseMultiModalProcessorCache from vllm.multimodal.inputs import MultiModalFieldConfig, MultiModalKwargsItems @@ -638,10 +637,8 @@ def forward( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - return self.language_model.compute_logits(hidden_states, - sampling_metadata) + return self.language_model.compute_logits(hidden_states) def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]: diff --git a/vllm/model_executor/models/transformers.py b/vllm/model_executor/models/transformers.py index 3bd4d10316ec..475a68bc642b 100644 --- a/vllm/model_executor/models/transformers.py +++ b/vllm/model_executor/models/transformers.py @@ -41,7 +41,6 @@ from vllm.model_executor.layers.quantization import QuantizationConfig from vllm.model_executor.layers.vocab_parallel_embedding import ( ParallelLMHead, VocabParallelEmbedding) -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.multimodal import MULTIMODAL_REGISTRY, MultiModalKwargsItems from vllm.multimodal.inputs import (MultiModalDataDict, MultiModalFieldConfig, MultiModalInputs, MultiModalUUIDDict, @@ -798,10 +797,8 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - logits = self.logits_processor(self.lm_head, hidden_states, - sampling_metadata) + logits = self.logits_processor(self.lm_head, hidden_states) return logits diff --git a/vllm/model_executor/models/ultravox.py b/vllm/model_executor/models/ultravox.py index f1f11c5fe8f0..12ae9487ad9d 100644 --- a/vllm/model_executor/models/ultravox.py +++ b/vllm/model_executor/models/ultravox.py @@ -18,7 +18,6 @@ from vllm.model_executor.layers.layernorm import RMSNorm from vllm.model_executor.model_loader import DefaultModelLoader from vllm.model_executor.models.module_mapping import MultiModelKeys -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.multimodal import MULTIMODAL_REGISTRY from vllm.multimodal.inputs import (MultiModalDataDict, MultiModalFieldConfig, MultiModalKwargsItems, NestedTensors) @@ -616,10 +615,8 @@ def forward(self, inputs_embeds=inputs_embeds) return hidden_states - def compute_logits(self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata) -> torch.Tensor: - return self.language_model.compute_logits(hidden_states, - sampling_metadata) + def compute_logits(self, hidden_states: torch.Tensor) -> torch.Tensor: + return self.language_model.compute_logits(hidden_states) def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]: diff --git a/vllm/model_executor/models/voxtral.py b/vllm/model_executor/models/voxtral.py index 16a97389cd21..b33e8d09c4be 100644 --- a/vllm/model_executor/models/voxtral.py +++ b/vllm/model_executor/models/voxtral.py @@ -30,7 +30,6 @@ # yapf: disable from vllm.model_executor.models.whisper import WhisperEncoder # yapf: enable -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.multimodal import MULTIMODAL_REGISTRY from vllm.multimodal.inputs import (MultiModalDataDict, MultiModalFieldConfig, MultiModalKwargsItems, MultiModalUUIDDict, @@ -454,10 +453,8 @@ def _parse_and_validate_audio_arrays( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: - return self.language_model.compute_logits(hidden_states, - sampling_metadata) + return self.language_model.compute_logits(hidden_states) @classmethod def get_speech_to_text_config(cls, model_config: ModelConfig, diff --git a/vllm/model_executor/models/whisper.py b/vllm/model_executor/models/whisper.py index 41ae7b129782..de3e4f0592a6 100644 --- a/vllm/model_executor/models/whisper.py +++ b/vllm/model_executor/models/whisper.py @@ -31,7 +31,6 @@ from vllm.model_executor.layers.vocab_parallel_embedding import ParallelLMHead from vllm.model_executor.model_loader.utils import set_default_torch_dtype from vllm.model_executor.model_loader.weight_utils import default_weight_loader -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.multimodal import MULTIMODAL_REGISTRY, NestedTensors from vllm.multimodal.inputs import (MultiModalDataDict, MultiModalFieldConfig, MultiModalKwargsItems) @@ -936,10 +935,8 @@ def _parse_and_validate_audio_input( return WhisperAudioInputs(input_features=input_features) - def compute_logits(self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata) -> torch.Tensor: - logits = self.logits_processor(self.proj_out, hidden_states, - sampling_metadata) + def compute_logits(self, hidden_states: torch.Tensor) -> torch.Tensor: + logits = self.logits_processor(self.proj_out, hidden_states) return logits def load_weights(self, weights: Iterable[tuple[str, diff --git a/vllm/model_executor/models/zamba2.py b/vllm/model_executor/models/zamba2.py index e601bc3adb6e..4350e38e02f9 100644 --- a/vllm/model_executor/models/zamba2.py +++ b/vllm/model_executor/models/zamba2.py @@ -41,7 +41,6 @@ from vllm.model_executor.model_loader.weight_utils import default_weight_loader from vllm.model_executor.models.mamba_cache import (MambaCacheManager, MambaCacheParams) -from vllm.model_executor.sampling_metadata import SamplingMetadata from vllm.sequence import IntermediateTensors from .interfaces import HasInnerState, IsHybrid @@ -1036,7 +1035,6 @@ def get_seqlen_agnostic_capture_inputs( def compute_logits( self, hidden_states: torch.Tensor, - sampling_metadata: SamplingMetadata, ) -> Optional[torch.Tensor]: """Compute logits for next token prediction. @@ -1047,8 +1045,7 @@ def compute_logits( Returns: Logits for next token prediction """ - logits = self.logits_processor(self.lm_head, hidden_states, - sampling_metadata) + logits = self.logits_processor(self.lm_head, hidden_states) return logits def load_weights(self, weights: Iterable[tuple[str, diff --git a/vllm/model_executor/sampling_metadata.py b/vllm/model_executor/sampling_metadata.py deleted file mode 100644 index 8c4548ff7f7d..000000000000 --- a/vllm/model_executor/sampling_metadata.py +++ /dev/null @@ -1,7 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project - - -class SamplingMetadata: - # Placeholder until it can be safely removed. - pass diff --git a/vllm/v1/spec_decode/eagle.py b/vllm/v1/spec_decode/eagle.py index 2a178ddf4877..5dacf6088696 100644 --- a/vllm/v1/spec_decode/eagle.py +++ b/vllm/v1/spec_decode/eagle.py @@ -239,7 +239,7 @@ def propose( else: last_hidden_states, hidden_states = ret_hidden_states sample_hidden_states = last_hidden_states[last_token_indices] - logits = self.model.compute_logits(sample_hidden_states, None) + logits = self.model.compute_logits(sample_hidden_states) # Early exit if there is only one draft token to be generated. if self.num_speculative_tokens == 1: @@ -367,8 +367,7 @@ def propose( else: last_hidden_states, hidden_states = ret_hidden_states hidden_states = hidden_states[:batch_size] - logits = self.model.compute_logits(last_hidden_states[:batch_size], - None) + logits = self.model.compute_logits(last_hidden_states[:batch_size]) draft_token_ids = logits.argmax(dim=-1) draft_token_ids_list.append(draft_token_ids) @@ -678,9 +677,7 @@ def propose_tree( # Get the output logits for the draft tokens. logits = self.model.compute_logits( draft_last_hidden_states.reshape(batch_size * level_num_drafts, - -1), - None, - ) + -1)) # Sample a draft token for each child at the next tree level. num_children = self.child_drafts_per_level[level + 1] diff --git a/vllm/v1/spec_decode/medusa.py b/vllm/v1/spec_decode/medusa.py index 3e90179e78d9..70b29c05c2a5 100644 --- a/vllm/v1/spec_decode/medusa.py +++ b/vllm/v1/spec_decode/medusa.py @@ -41,7 +41,7 @@ def propose( ) -> list[list[int]]: # Generate blocks and compute logits blocks = self.model(target_hidden_states) - logits = self.model.compute_logits(blocks, None) + logits = self.model.compute_logits(blocks) # Get draft tokens and transpose the result # TODO(woosuk): OPTIMIZATION: Return GPU tensor without GPU-CPU diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index d0946e8c5d7d..b0cd0f413307 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -2240,7 +2240,7 @@ def execute_model( return output sample_hidden_states = hidden_states[logits_indices] - logits = self.model.compute_logits(sample_hidden_states, None) + logits = self.model.compute_logits(sample_hidden_states) else: # Rare case. assert not self.is_pooling_model @@ -2258,8 +2258,7 @@ def execute_model( logits = None else: sample_hidden_states = hidden_states[logits_indices] - logits = self.model.compute_logits(sample_hidden_states, - None) + logits = self.model.compute_logits(sample_hidden_states) model_output_broadcast_data = {} if logits is not None: @@ -2706,7 +2705,7 @@ def _get_prompt_logprobs_dict( req_idx = self.input_batch.req_id_to_index[req_id] offset = self.query_start_loc.np[req_idx].item() prompt_hidden_states = hidden_states[offset:offset + num_logits] - logits = self.model.compute_logits(prompt_hidden_states, None) + logits = self.model.compute_logits(prompt_hidden_states) # Get the "target" tokens for each index. For prompt at index i, # the token at prompt index i+1 is the "sampled" token we want @@ -3105,7 +3104,7 @@ def _dummy_sampler_run( # To avoid breaking the sampler, we use a random tensor here instead. hidden_states = torch.rand_like(hidden_states) - logits = self.model.compute_logits(hidden_states, None) + logits = self.model.compute_logits(hidden_states) num_reqs = logits.size(0) dummy_tensors = lambda v: torch.full( diff --git a/vllm/v1/worker/tpu_model_runner.py b/vllm/v1/worker/tpu_model_runner.py index 48070c1e3e7c..dd11b1dcbe94 100644 --- a/vllm/v1/worker/tpu_model_runner.py +++ b/vllm/v1/worker/tpu_model_runner.py @@ -1692,7 +1692,7 @@ def select_hidden_states(self, hidden_states, indices_do_sample): @torch.compile(backend="openxla", fullgraph=True, dynamic=False) def compute_logits(self, sample_hidden_states: torch.Tensor) -> torch.Tensor: - return self.model.compute_logits(sample_hidden_states, None) + return self.model.compute_logits(sample_hidden_states) # TODO: Under SPMD mode, sample_from_logits has correctness issue. # Re-enable the torch.compile once the issue is fixed in torchxla. From af7dfb0d1a95fb097a93de156ddd6c1eb5c72796 Mon Sep 17 00:00:00 2001 From: Isotr0py Date: Mon, 22 Sep 2025 04:12:45 +0800 Subject: [PATCH 214/518] [Perf] Further optimization for Qwen3-VL `fast_pos_embed_interpolate` (#25347) Signed-off-by: Isotr0py --- vllm/model_executor/models/qwen3_vl.py | 50 ++++++++++++++++---------- 1 file changed, 32 insertions(+), 18 deletions(-) diff --git a/vllm/model_executor/models/qwen3_vl.py b/vllm/model_executor/models/qwen3_vl.py index aa28c07ddceb..98d65dea2739 100644 --- a/vllm/model_executor/models/qwen3_vl.py +++ b/vllm/model_executor/models/qwen3_vl.py @@ -405,25 +405,39 @@ def fast_pos_embed_interpolate(self, dh = h_idxs - h_floor dw = w_idxs - w_floor - w00 = ((1 - dh)[:, None] * (1 - dw)[None, :]).reshape(-1) - w01 = ((1 - dh)[:, None] * dw[None, :]).reshape(-1) - w10 = (dh[:, None] * (1 - dw)[None, :]).reshape(-1) - w11 = (dh[:, None] * dw[None, :]).reshape(-1) - - idx00 = (h_floor[:, None] * num_grid_per_side + - w_floor[None, :]).reshape(-1) - idx01 = (h_floor[:, None] * num_grid_per_side + - w_ceil[None, :]).reshape(-1) - idx10 = (h_ceil[:, None] * num_grid_per_side + - w_floor[None, :]).reshape(-1) - idx11 = (h_ceil[:, None] * num_grid_per_side + - w_ceil[None, :]).reshape(-1) - - indices = torch.stack([idx00, idx01, idx10, idx11], dim=0) + # Create meshgrid view for all h, w vars + dh_grid, dw_grid = torch.meshgrid(dh, dw, indexing='ij') + h_floor_grid, w_floor_grid = torch.meshgrid(h_floor, + w_floor, + indexing='ij') + h_ceil_grid, w_ceil_grid = torch.meshgrid(h_ceil, + w_ceil, + indexing='ij') + h_floor_grid_idx = h_floor_grid * num_grid_per_side + h_ceil_grid_idx = h_ceil_grid * num_grid_per_side + + # original computation of weights + # w00 = (1 - dh_grid) * (1 - dw_grid) + # w01 = (1 - dh_grid) * dw_grid + # w10 = dh_grid * (1 - dw_grid) + # w11 = dh_grid * dw_grid + # we reuse w11 here to avoid duplicate + # dh_grid * dw_grid computation + w11 = dh_grid * dw_grid + w10 = dh_grid - w11 + w01 = dw_grid - w11 + w00 = 1 - dh_grid - dw_grid + w11 + + idx00 = h_floor_grid_idx + w_floor_grid + idx01 = h_floor_grid_idx + w_ceil_grid + idx10 = h_ceil_grid_idx + w_floor_grid + idx11 = h_ceil_grid_idx + w_ceil_grid + + indices = torch.stack([idx00, idx01, idx10, idx11], + dim=0).reshape(4, -1) weights = torch.stack([w00, w01, w10, w11], - dim=0).to(dtype=self.dtype, - device=self.device) - weights = weights.unsqueeze(-1) + dim=0).reshape(4, -1, 1) + weights = weights.to(dtype=self.dtype, device=self.device) embeds = self.pos_embed(indices) weighted_embeds = embeds * weights From bc6e542d9fc400d7002853b3878c81b14fbdc998 Mon Sep 17 00:00:00 2001 From: Woosuk Kwon Date: Sun, 21 Sep 2025 16:03:28 -0700 Subject: [PATCH 215/518] Remove V0 attention backends (#25351) Signed-off-by: Woosuk Kwon --- examples/offline_inference/qwen_1m.py | 1 - tests/compile/test_fusion_attn.py | 5 +- tests/kernels/attention/test_attention.py | 6 +- .../attention/test_attention_selector.py | 1 + .../kernels/attention/test_prefix_prefill.py | 6 +- .../attention/test_rocm_attention_selector.py | 1 + tests/kernels/utils.py | 66 +- tests/models/test_initialization.py | 5 +- .../backends/differential_flash_attn.py | 931 ---------- .../backends/dual_chunk_flash_attn.py | 1495 ----------------- vllm/attention/backends/flash_attn.py | 929 ---------- vllm/attention/backends/flashmla.py | 227 --- vllm/attention/backends/mla/__init__.py | 0 vllm/attention/backends/mla/common.py | 1305 -------------- vllm/attention/backends/rocm_aiter_mla.py | 407 ----- vllm/attention/backends/rocm_flash_attn.py | 953 ----------- vllm/attention/backends/triton_mla.py | 111 -- vllm/attention/backends/utils.py | 14 +- vllm/attention/backends/xformers.py | 805 --------- vllm/config/model.py | 7 +- .../kv_transfer/kv_connector/utils.py | 2 +- vllm/engine/arg_utils.py | 15 +- vllm/envs.py | 1 - .../layers/mamba/mamba2_metadata.py | 19 +- vllm/model_executor/models/deepseek_v2.py | 3 +- vllm/platforms/cuda.py | 139 +- vllm/platforms/rocm.py | 61 +- vllm/utils/__init__.py | 2 - 28 files changed, 142 insertions(+), 7375 deletions(-) delete mode 100644 vllm/attention/backends/differential_flash_attn.py delete mode 100644 vllm/attention/backends/dual_chunk_flash_attn.py delete mode 100755 vllm/attention/backends/flash_attn.py delete mode 100644 vllm/attention/backends/flashmla.py delete mode 100644 vllm/attention/backends/mla/__init__.py delete mode 100644 vllm/attention/backends/mla/common.py delete mode 100644 vllm/attention/backends/rocm_aiter_mla.py delete mode 100644 vllm/attention/backends/rocm_flash_attn.py delete mode 100644 vllm/attention/backends/triton_mla.py delete mode 100644 vllm/attention/backends/xformers.py diff --git a/examples/offline_inference/qwen_1m.py b/examples/offline_inference/qwen_1m.py index d8d61667f688..c8d0d91ce7b5 100644 --- a/examples/offline_inference/qwen_1m.py +++ b/examples/offline_inference/qwen_1m.py @@ -5,7 +5,6 @@ from vllm import LLM, SamplingParams -os.environ["VLLM_ATTENTION_BACKEND"] = "DUAL_CHUNK_FLASH_ATTN" os.environ["VLLM_ALLOW_LONG_MAX_MODEL_LEN"] = "1" diff --git a/tests/compile/test_fusion_attn.py b/tests/compile/test_fusion_attn.py index b6bebbba915b..c3f1c7481d1b 100644 --- a/tests/compile/test_fusion_attn.py +++ b/tests/compile/test_fusion_attn.py @@ -334,8 +334,9 @@ def forward(self, q: torch.Tensor, k: torch.Tensor, v: torch.Tensor): [7, 256, 533] if current_platform.is_cuda() else [8]) @pytest.mark.parametrize("dtype", [torch.bfloat16, torch.float16]) @pytest.mark.parametrize("model_name, model_class", MODELS) -@pytest.mark.parametrize("backend", [_Backend.FLASHINFER] if - current_platform.is_cuda() else [_Backend.ROCM_FLASH]) +@pytest.mark.parametrize("backend", + [_Backend.FLASHINFER] if current_platform.is_cuda() + else [_Backend.TRITON_ATTN_VLLM_V1]) @pytest.mark.parametrize( "split_attention", [False, True] if current_platform.is_rocm() else [False]) diff --git a/tests/kernels/attention/test_attention.py b/tests/kernels/attention/test_attention.py index 7083661575ef..c7abf652f111 100644 --- a/tests/kernels/attention/test_attention.py +++ b/tests/kernels/attention/test_attention.py @@ -18,7 +18,7 @@ from xformers import ops as xops from xformers.ops.fmha.attn_bias import BlockDiagonalCausalMask - from vllm.attention.backends.xformers import _make_alibi_bias + from tests.kernels.utils import make_alibi_bias FLOAT32_BYTES = torch.finfo(torch.float).bits // 8 # This will change depending on the compute capability. @@ -429,8 +429,8 @@ def test_multi_query_kv_attention( alibi_bias = None if use_alibi: alibi_slopes = torch.randn(num_query_heads, dtype=torch.float) - attn_bias = _make_alibi_bias(alibi_slopes, num_kv_heads, dtype, - seq_lens) + attn_bias = make_alibi_bias(alibi_slopes, num_kv_heads, dtype, + seq_lens) output = torch.empty_like(query) start = 0 # Dynamic sequence length not supported with custom attn_bias. diff --git a/tests/kernels/attention/test_attention_selector.py b/tests/kernels/attention/test_attention_selector.py index f8454ad0a4c4..38ab40f88ae0 100644 --- a/tests/kernels/attention/test_attention_selector.py +++ b/tests/kernels/attention/test_attention_selector.py @@ -67,6 +67,7 @@ def generate_params(): return params +@pytest.mark.skip(reason="Skipped for now. Should be revisited.") @pytest.mark.parametrize("device, name, use_mla, block_size", generate_params()) def test_env( diff --git a/tests/kernels/attention/test_prefix_prefill.py b/tests/kernels/attention/test_prefix_prefill.py index 8544eab3accc..0695f84aea1a 100644 --- a/tests/kernels/attention/test_prefix_prefill.py +++ b/tests/kernels/attention/test_prefix_prefill.py @@ -11,7 +11,7 @@ from xformers import ops as xops from xformers.ops.fmha.attn_bias import BlockDiagonalCausalFromBottomRightMask -from vllm.attention.backends.xformers import _make_alibi_bias +from tests.kernels.utils import make_alibi_bias from vllm.attention.ops.chunked_prefill_paged_decode import ( chunked_prefill_paged_decode) from vllm.attention.ops.prefix_prefill import context_attention_fwd @@ -470,7 +470,7 @@ def _get_alibi_slopes(total_num_heads: int) -> torch.Tensor: key = key.unsqueeze(0) value = value.unsqueeze(0) - attn_bias = _make_alibi_bias(alibi_slopes, num_kv_heads, dtype, seq_lens) + attn_bias = make_alibi_bias(alibi_slopes, num_kv_heads, dtype, seq_lens) output_ref = torch.empty_like(output) seq_start = 0 query_start = 0 @@ -479,7 +479,7 @@ def _get_alibi_slopes(total_num_heads: int) -> torch.Tensor: # FIXME(DefTruth): Because xformers does not support dynamic sequence # lengths with custom attention bias, we process each prompt one by # one. This is inefficient, especially when we have many short prompts. - # modified from: vllm/attention/backends/xformers.py#L343 + # modified from: vllm/v1/attention/backends/xformers.py#L343 for i, (query_len, seq_len) in enumerate(zip(query_lens, seq_lens)): seq_end = seq_start + seq_len query_end = query_start + query_len diff --git a/tests/kernels/attention/test_rocm_attention_selector.py b/tests/kernels/attention/test_rocm_attention_selector.py index d56d3f4638f1..af301d9de435 100644 --- a/tests/kernels/attention/test_rocm_attention_selector.py +++ b/tests/kernels/attention/test_rocm_attention_selector.py @@ -16,6 +16,7 @@ def clear_cache(): _cached_get_attn_backend.cache_clear() +@pytest.mark.skip(reason="Skipped for now. Should be revisited.") def test_selector(monkeypatch: pytest.MonkeyPatch): with monkeypatch.context() as m: m.setenv(STR_BACKEND_ENV_VAR, "ROCM_FLASH") diff --git a/tests/kernels/utils.py b/tests/kernels/utils.py index c9bf85f6e2a5..8d6ce381976b 100644 --- a/tests/kernels/utils.py +++ b/tests/kernels/utils.py @@ -513,10 +513,6 @@ def make_backend(backend_name: str) -> AttentionBackend: Construct the backend instance determined by the backend_name string argument. - "XFORMERS" -> construct xformers backend - - TODO: other backends - Note: at time of writing the Attention wrapper automatically selects its own backend for Attention.forward(); so the backend instance which you generate with this function is not meant to be used for *running* @@ -528,18 +524,68 @@ def make_backend(backend_name: str) -> AttentionBackend: * Backend instance ''' - if backend_name == STR_XFORMERS_ATTN_VAL: - # NOTE: xFormers backend cannot be imported for CPU and AMD GPUs. - from vllm.attention.backends.xformers import XFormersBackend - return XFormersBackend() - elif backend_name == STR_FLASH_ATTN_VAL: - from vllm.attention.backends.flash_attn import FlashAttentionBackend + if backend_name in (STR_XFORMERS_ATTN_VAL, "XFORMERS_VLLM_V1"): + from vllm.v1.attention.backends.xformers import ( + XFormersAttentionBackend) + return XFormersAttentionBackend() + if backend_name in (STR_FLASH_ATTN_VAL, "FLASH_ATTN_VLLM_V1"): + from vllm.v1.attention.backends.flash_attn import FlashAttentionBackend return FlashAttentionBackend() + if backend_name == "TRITON_ATTN_VLLM_V1": + from vllm.v1.attention.backends.triton_attn import ( + TritonAttentionBackend) + return TritonAttentionBackend() + if backend_name == "FLEX_ATTENTION": + from vllm.v1.attention.backends.flex_attention import ( + FlexAttentionBackend) + return FlexAttentionBackend() + if backend_name in ("TORCH_SDPA", "TORCH_SDPA_VLLM_V1"): + from vllm.v1.attention.backends.cpu_attn import TorchSDPABackend + return TorchSDPABackend() + if backend_name == "FLASHINFER": + from vllm.v1.attention.backends.flashinfer import FlashInferBackend + return FlashInferBackend() raise AssertionError( f"Unrecognized backend_name {backend_name} for unit test") +def make_alibi_bias( + alibi_slopes: torch.Tensor, + num_kv_heads: int, + dtype: torch.dtype, + seq_lens: list[int], +) -> list[Any]: + """Create ALiBi biases compatible with xFormers attention tests.""" + from xformers.ops.fmha.attn_bias import LowerTriangularMaskWithTensorBias + + if alibi_slopes is None: + return [None for _ in seq_lens] + + attn_biases: list[Any] = [] + num_heads = alibi_slopes.shape[0] + assert num_heads >= num_kv_heads, ( + "ALiBi slopes expect at least as many heads as KV heads") + + for seq_len in seq_lens: + bias = torch.arange(seq_len, dtype=dtype, device=alibi_slopes.device) + bias = bias[None, :] - bias[:, None] + + padded_len = (seq_len + 7) // 8 * 8 + bias_tensor = torch.empty( + 1, + num_heads, + seq_len, + padded_len, + device=alibi_slopes.device, + dtype=dtype, + )[:, :, :, :seq_len].copy_(bias) + bias_tensor.mul_(alibi_slopes[:, None, None]) + attn_biases.append(LowerTriangularMaskWithTensorBias(bias_tensor)) + + return attn_biases + + def _make_metadata_tensors( seq_lens: Optional[list[int]], context_lens: Optional[list[int]], diff --git a/tests/models/test_initialization.py b/tests/models/test_initialization.py index b9601114a318..bfde6e20a3b1 100644 --- a/tests/models/test_initialization.py +++ b/tests/models/test_initialization.py @@ -78,9 +78,8 @@ def _initialize_kv_caches_v1(self, vllm_config): return if model_arch in ("Phi4FlashForCausalLM", "MotifForCausalLM"): - # Phi4FlashForCausalLM and MotifForCausalLM - # only supports DIFFERENTIAL_FLASH_ATTN backend - m.setenv("VLLM_ATTENTION_BACKEND", "DIFFERENTIAL_FLASH_ATTN") + pytest.skip( + "Differential Flash Attention backend has been removed.") if model_arch == "GptOssForCausalLM": # FIXME: A hack to bypass FA3 assertion because our CI's L4 GPU # has cc==8.9 which hasn't supported FA3 yet. Remove this hack when diff --git a/vllm/attention/backends/differential_flash_attn.py b/vllm/attention/backends/differential_flash_attn.py deleted file mode 100644 index 87a4558e377d..000000000000 --- a/vllm/attention/backends/differential_flash_attn.py +++ /dev/null @@ -1,931 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project -"""" An implementation of https://arxiv.org/pdf/2410.05258 """ -from collections import defaultdict -from dataclasses import dataclass -from itertools import accumulate -from typing import Any, Dict, List, Optional, Tuple, Type - -import torch -from einops import rearrange - -from vllm import _custom_ops as ops -# yapf conflicts with isort for this block -# yapf: disable -from vllm.attention.backends.abstract import (AttentionBackend, AttentionImpl, - AttentionLayer, - AttentionMetadata, - AttentionMetadataBuilder, - AttentionType, - is_quantized_kv_cache) -from vllm.attention.backends.flash_attn import FlashAttentionBackend -# yapf: enable -from vllm.attention.backends.utils import (PAD_SLOT_ID, CommonAttentionState, - compute_slot_mapping, - compute_slot_mapping_start_idx, - is_all_cross_attn_metadata_set, - is_all_encoder_attn_metadata_set, - is_block_tables_empty) -from vllm.attention.utils.fa_utils import (flash_attn_supports_fp8, - get_flash_attn_version) -from vllm.logger import init_logger -from vllm.multimodal import MultiModalPlaceholderMap -from vllm.utils import async_tensor_h2d, make_tensor_with_pad -from vllm.vllm_flash_attn import (flash_attn_varlen_func, - flash_attn_with_kvcache) - -logger = init_logger(__name__) - - -class DifferentialFlashAttentionBackend(AttentionBackend): - accept_output_buffer = False - - @staticmethod - def get_supported_head_sizes() -> List[int]: - return [32, 64, 96, 128, 160, 192, 224, 256] - - @staticmethod - def get_kv_cache_shape( - num_blocks: int, - block_size: int, - num_kv_heads: int, - head_size: int, - ) -> Tuple[int, ...]: - if block_size % 16 != 0: - raise ValueError("Block size must be a multiple of 16.") - assert num_kv_heads % 2 == 0, "num_kv_heads must be divisible by 2" - return (2, 2, num_blocks, block_size, num_kv_heads // 2, head_size) - - @staticmethod - def get_name() -> str: - return "DIFFERENTIAL_FLASH_ATTN" - - @staticmethod - def get_impl_cls() -> Type["DifferentialFlashAttentionImpl"]: - return DifferentialFlashAttentionImpl - - @staticmethod - def get_metadata_cls() -> Type["DifferentialFlashAttentionMetadata"]: - return DifferentialFlashAttentionMetadata - - @staticmethod - def get_builder_cls() -> Type["DifferentialFlashAttentionMetadataBuilder"]: - return DifferentialFlashAttentionMetadataBuilder - - @staticmethod - def get_state_cls() -> Type["CommonAttentionState"]: - return CommonAttentionState - - @staticmethod - def swap_blocks( - src_kv_cache: torch.Tensor, - dst_kv_cache: torch.Tensor, - src_to_dst: torch.Tensor, - ) -> None: - src_key_cache = src_kv_cache[0] - dst_key_cache = dst_kv_cache[0] - ops.swap_blocks(src_key_cache, dst_key_cache, src_to_dst) - src_value_cache = src_kv_cache[1] - dst_value_cache = dst_kv_cache[1] - ops.swap_blocks(src_value_cache, dst_value_cache, src_to_dst) - - @staticmethod - def copy_blocks( - kv_caches: List[torch.Tensor], - src_to_dists: torch.Tensor, - ) -> None: - key_caches = [kv_cache[0] for kv_cache in kv_caches] - value_caches = [kv_cache[1] for kv_cache in kv_caches] - - ops.copy_blocks(key_caches, value_caches, src_to_dists) - - -@dataclass -class DifferentialFlashAttentionMetadata(AttentionMetadata): - """Metadata for FlashAttentionBackend. - - NOTE: Any python object stored here is not updated when it is - cuda-graph replayed. If you have values that need to be changed - dynamically, it should be stored in tensor. The tensor has to be - updated from `CUDAGraphRunner.forward` API. - """ - # (batch_size,). The sequence length per sequence. Sequence length means - # the computed tokens + new tokens None if it is a decoding. - seq_lens: Optional[List[int]] - # seq_lens stored as a tensor. - seq_lens_tensor: Optional[torch.Tensor] - - # NOTE(sang): Definition of context_len, query_len, and seq_len. - # |---------- N-1 iteration --------| - # |---------------- N iteration ---------------------| - # |- tokenA -|......................|-- newTokens ---| - # |---------- context_len ----------| - # |-------------------- seq_len ---------------------| - # |-- query_len ---| - - # Maximum sequence length among prefill batch. 0 if there are decoding - # requests only. - max_prefill_seq_len: int - # Maximum sequence length among decode batch. 0 if there are prefill - # requests only. - max_decode_seq_len: int - # (batch_size,) A tensor of context lengths (tokens that are computed - # so far). - context_lens_tensor: Optional[torch.Tensor] - - # (batch_size, max_blocks_per_seq). - # Block addresses per sequence. (Seq id -> list of physical block) - # E.g., [0, 1, 2] means tokens are stored in 0th, 1st, and 2nd blocks - # in the kv cache. Each block can contain up to block_size tokens. - # 2nd dimensions are padded up to max_blocks_per_seq if it is cuda-graph - # captured. - block_tables: Optional[torch.Tensor] - - # Whether or not if cuda graph is enabled. - # Cuda-graph is currently enabled for decoding only. - # TODO(woosuk): Move `use_cuda_graph` out since it's unrelated to attention. - - use_cuda_graph: bool - - # Maximum query length in the batch. - max_query_len: Optional[int] = None - - # Max number of query tokens among request in the batch. - max_decode_query_len: Optional[int] = None - - # (batch_size + 1,). The cumulative subquery lengths of the sequences in - # the batch, used to index into subquery. E.g., if the subquery length - # is [4, 6], it is [0, 4, 10]. - query_start_loc: Optional[torch.Tensor] = None - # (batch_size + 1,). The cumulative sequence lengths of the sequences in - # the batch, used to index into sequence. E.g., if the sequence length is - # [4, 6], it is [0, 4, 10]. - seq_start_loc: Optional[torch.Tensor] = None - - _cached_prefill_metadata: Optional[ - "DifferentialFlashAttentionMetadata"] = None - _cached_decode_metadata: Optional[ - "DifferentialFlashAttentionMetadata"] = None - - # Begin encoder attn & enc/dec cross-attn fields... - - # Encoder sequence lengths representation - encoder_seq_lens: Optional[List[int]] = None - encoder_seq_lens_tensor: Optional[torch.Tensor] = None - # (batch_size + 1,). The cumulative sequence lengths of the sequences in - # the batch, used to index into sequence. E.g., if the sequence length is - # [4, 6], it is [0, 4, 10]. - encoder_seq_start_loc: Optional[torch.Tensor] = None - # Maximum sequence length among encoder sequences - max_encoder_seq_len: Optional[int] = None - # Number of tokens input to encoder - num_encoder_tokens: Optional[int] = None - - # Cross-attention memory-mapping data structures: slot mapping - # and block tables - cross_slot_mapping: Optional[torch.Tensor] = None - cross_block_tables: Optional[torch.Tensor] = None - - # Cross-layer shared attention block tables - cross_layer_shared_block_tables: Optional[torch.Tensor] = None - - @property - def is_all_encoder_attn_metadata_set(self): - ''' - All attention metadata required for encoder attention is set. - ''' - return is_all_encoder_attn_metadata_set(self) - - @property - def is_all_cross_attn_metadata_set(self): - ''' - All attention metadata required for enc/dec cross-attention is set. - - Superset of encoder attention required metadata. - ''' - return is_all_cross_attn_metadata_set(self) - - @property - def prefill_metadata( - self) -> Optional["DifferentialFlashAttentionMetadata"]: - if self.num_prefills == 0: - return None - - if self._cached_prefill_metadata is not None: - return self._cached_prefill_metadata - - assert ((self.seq_lens is not None) - or (self.encoder_seq_lens is not None)) - assert ((self.seq_lens_tensor is not None) - or (self.encoder_seq_lens_tensor is not None)) - - # Compute some attn_metadata fields which default to None - query_start_loc = (None if self.query_start_loc is None else - self.query_start_loc[:self.num_prefills + 1]) - slot_mapping = (None if self.slot_mapping is None else - self.slot_mapping[:self.num_prefill_tokens]) - seq_lens = (None if self.seq_lens is None else - self.seq_lens[:self.num_prefills]) - seq_lens_tensor = (None if self.seq_lens_tensor is None else - self.seq_lens_tensor[:self.num_prefills]) - seq_start_loc = (None if self.seq_start_loc is None else - self.seq_start_loc[:self.num_prefills + 1]) - context_lens_tensor = (None if self.context_lens_tensor is None else - self.context_lens_tensor[:self.num_prefills]) - block_tables = (None if self.block_tables is None else - self.block_tables[:self.num_prefills]) - cross_layer_shared_block_tables = ( - None if self.cross_layer_shared_block_tables is None else - self.cross_layer_shared_block_tables[:self.num_prefills]) - - self._cached_prefill_metadata = DifferentialFlashAttentionMetadata( - num_prefills=self.num_prefills, - num_prefill_tokens=self.num_prefill_tokens, - num_decode_tokens=0, - slot_mapping=slot_mapping, - multi_modal_placeholder_index_maps=self. - multi_modal_placeholder_index_maps, - enable_kv_scales_calculation=self.enable_kv_scales_calculation, - seq_lens=seq_lens, - seq_lens_tensor=seq_lens_tensor, - max_query_len=self.max_query_len, - max_prefill_seq_len=self.max_prefill_seq_len, - max_decode_query_len=0, - max_decode_seq_len=0, - query_start_loc=query_start_loc, - seq_start_loc=seq_start_loc, - context_lens_tensor=context_lens_tensor, - block_tables=block_tables, - cross_layer_shared_block_tables=cross_layer_shared_block_tables, - use_cuda_graph=False, - # Begin encoder & cross attn fields below... - encoder_seq_lens=self.encoder_seq_lens, - encoder_seq_lens_tensor=self.encoder_seq_lens_tensor, - encoder_seq_start_loc=self.encoder_seq_start_loc, - max_encoder_seq_len=self.max_encoder_seq_len, - cross_slot_mapping=self.cross_slot_mapping, - cross_block_tables=self.cross_block_tables) - return self._cached_prefill_metadata - - @property - def decode_metadata( - self) -> Optional["DifferentialFlashAttentionMetadata"]: - if self.num_decode_tokens == 0: - return None - - if self._cached_decode_metadata is not None: - return self._cached_decode_metadata - assert ((self.seq_lens_tensor is not None) - or (self.encoder_seq_lens_tensor is not None)) - - # Compute some attn_metadata fields which default to None - slot_mapping = (None if self.slot_mapping is None else - self.slot_mapping[self.num_prefill_tokens:]) - seq_lens_tensor = (None if self.seq_lens_tensor is None else - self.seq_lens_tensor[self.num_prefills:]) - block_tables = (None if self.block_tables is None else - self.block_tables[self.num_prefills:]) - cross_layer_shared_block_tables = ( - None if self.cross_layer_shared_block_tables is None else - self.cross_layer_shared_block_tables[self.num_prefills:]) - self._cached_decode_metadata = DifferentialFlashAttentionMetadata( - num_prefills=0, - num_prefill_tokens=0, - num_decode_tokens=self.num_decode_tokens, - slot_mapping=slot_mapping, - multi_modal_placeholder_index_maps=None, - enable_kv_scales_calculation=True, - seq_lens=None, - seq_lens_tensor=seq_lens_tensor, - max_decode_query_len=self.max_decode_query_len, - max_query_len=self.max_query_len, - max_prefill_seq_len=0, - max_decode_seq_len=self.max_decode_seq_len, - # Batch may be composed of prefill|decodes, adjust query start - # indices to refer to the start of decodes. E.g. - # in tokens:[3 prefills|6 decodes], query_start_loc=[3,9] => [0,6]. - query_start_loc=(self.query_start_loc[self.num_prefills:] - - self.query_start_loc[self.num_prefills]) - if self.query_start_loc is not None else None, - seq_start_loc=self.seq_start_loc[self.num_prefills:] - if self.seq_start_loc is not None else None, - context_lens_tensor=None, - block_tables=block_tables, - cross_layer_shared_block_tables=cross_layer_shared_block_tables, - use_cuda_graph=self.use_cuda_graph, - # Begin encoder & cross attn fields below... - encoder_seq_lens=self.encoder_seq_lens, - encoder_seq_lens_tensor=self.encoder_seq_lens_tensor, - encoder_seq_start_loc=self.encoder_seq_start_loc, - max_encoder_seq_len=self.max_encoder_seq_len, - cross_slot_mapping=self.cross_slot_mapping, - cross_block_tables=self.cross_block_tables) - return self._cached_decode_metadata - - -class DifferentialFlashAttentionMetadataBuilder( - AttentionMetadataBuilder[DifferentialFlashAttentionMetadata]): - - def __init__(self, input_builder): - self.input_builder = input_builder - self.runner = input_builder.runner - self.sliding_window = input_builder.sliding_window - self.block_size = input_builder.block_size - - def prepare(self): - self.slot_mapping: List[int] = [] - self.prefill_seq_lens: List[int] = [] - self.context_lens: List[int] = [] - self.block_tables: List[List[int]] = [] - self.cross_layer_shared_block_tables: List[List[int]] = [] - self.curr_seq_lens: List[int] = [] - self.multimodal_placeholder_maps: Dict[ - str, - MultiModalPlaceholderMap] = defaultdict(MultiModalPlaceholderMap) - self.num_prefills = 0 - self.num_prefill_tokens = 0 - self.num_decode_tokens = 0 - self.has_prefix_cache_hit = False - - def _add_seq_group(self, inter_data, chunked_prefill_enabled: bool, - prefix_cache_hit: bool): - """Add a sequence group to the metadata. Specifically update/append - 1. context length. - 2. block table. - 3. slot mapping. - """ - # TODO: add support for chunked prefill and prefix caching. - assert not chunked_prefill_enabled, \ - "chunked prefill is not supported for now" - assert not prefix_cache_hit, "prefix caching is not supported for now" - - is_prompt = inter_data.is_prompt - block_tables = inter_data.block_tables - - for (seq_id, token_len, seq_len, curr_seq_len, query_len, context_len, - curr_sliding_window_block) in zip( - inter_data.seq_ids, [len(t) for t in inter_data.input_tokens], - inter_data.orig_seq_lens, inter_data.seq_lens, - inter_data.query_lens, inter_data.context_lens, - inter_data.curr_sliding_window_blocks): - self.context_lens.append(context_len) - - if is_prompt: - mm_maps = inter_data.multi_modal_placeholder_maps - if mm_maps: - for modality, placeholders in mm_maps.items(): - self.multimodal_placeholder_maps[modality].extend( - placeholders) - - self.num_prefills += 1 - self.num_prefill_tokens += token_len - self.prefill_seq_lens.append(seq_len) - else: - self.num_decode_tokens += query_len - self.curr_seq_lens.append(curr_seq_len) - - # Compute block table. - # TODO(sang): Combine chunked prefill and prefix caching by - # only allowing multiple of block_size chunk size. - # NOTE: This only works for oooooooxxx style attention. - block_table = [] - if prefix_cache_hit: - # NOTE(woosuk): For flash-attn, the block table should - # include the entries for the incoming prefill tokens. - block_table = block_tables[seq_id] - elif ((chunked_prefill_enabled or not is_prompt) - and block_tables is not None): - if curr_sliding_window_block == 0: - block_table = block_tables[seq_id] - else: - block_table = block_tables[seq_id][ - -curr_sliding_window_block:] - self.block_tables.append(block_table) - - cross_layer_shared_block_table = [] - if prefix_cache_hit: - cross_layer_shared_block_table = block_tables[seq_id] - elif block_tables is not None: - if curr_sliding_window_block == 0: - cross_layer_shared_block_table = block_tables[seq_id] - else: - cross_layer_shared_block_table = block_tables[seq_id][ - -curr_sliding_window_block:] - self.cross_layer_shared_block_tables.append( - cross_layer_shared_block_table) - - # Compute slot mapping. - is_profile_run = is_block_tables_empty(block_tables) - start_idx = compute_slot_mapping_start_idx(is_prompt, query_len, - context_len, - self.sliding_window) - compute_slot_mapping(is_profile_run, self.slot_mapping, seq_id, - seq_len, context_len, start_idx, - self.block_size, inter_data.block_tables) - - def _get_graph_runner_block_tables(self, num_seqs: int, - block_tables: List[List[int]], - graph_block_tables) -> torch.Tensor: - # The shape of graph_block_tables is - # [max batch size, max context len // block size]. - # max_batch_size, max_blocks = self.runner.graph_block_tables.shape - max_batch_size, max_blocks = graph_block_tables.shape - assert max_batch_size >= num_seqs - - # graph_block_tables = self.runner.graph_block_tables[:num_seqs] - graph_block_tables = graph_block_tables[:num_seqs] - for i, block_table in enumerate(block_tables): - if block_table: - num_blocks = len(block_table) - if num_blocks <= max_blocks: - graph_block_tables[i, :num_blocks] = block_table - else: - # It may be possible to have more blocks allocated due - # to lookahead slots of multi-step, however, they are - # not used anyway, so can be safely ignored. - graph_block_tables[ - i, :max_blocks] = block_table[:max_blocks] - - return torch.from_numpy(graph_block_tables).to( - device=self.runner.device, non_blocking=True) - - def build(self, seq_lens: List[int], query_lens: List[int], - cuda_graph_pad_size: int, batch_size: int): - """Build attention metadata with on-device tensors. - - Args: - seq_lens: The maybe padded sequence lengths of the input sequences. - query_lens: The query lengths of the input sequences. - cuda_graph_pad_size: The padding size for cuda graph. - -1 if cuda graph is not used. - batch_size: The maybe padded batch size. - """ - prefix_cache_hit = any([ - inter_data.prefix_cache_hit - for inter_data in self.input_builder.inter_data_list - ]) - for inter_data in self.input_builder.inter_data_list: - self._add_seq_group(inter_data, - self.input_builder.chunked_prefill_enabled, - prefix_cache_hit) - - device = self.runner.device - use_captured_graph = cuda_graph_pad_size != -1 - - max_query_len = max(query_lens) - decode_query_lens = query_lens[self.num_prefills:] - if len(decode_query_lens) > 0: - max_decode_query_len = max(decode_query_lens) - else: - max_decode_query_len = 1 - max_prefill_seq_len = max(self.prefill_seq_lens, default=0) - max_decode_seq_len = max(self.curr_seq_lens, default=0) - num_decode_tokens = self.num_decode_tokens - query_start_loc = list(accumulate(query_lens, initial=0)) - seq_start_loc = list(accumulate(seq_lens, initial=0)) - - num_seqs = len(seq_lens) - if use_captured_graph: - self.slot_mapping.extend([PAD_SLOT_ID] * cuda_graph_pad_size) - self.block_tables.extend([] * cuda_graph_pad_size) - - self.cross_layer_shared_block_tables.extend([] * - cuda_graph_pad_size) - - num_decode_tokens = batch_size - self.num_prefill_tokens - block_tables = self._get_graph_runner_block_tables( - num_seqs, self.block_tables, self.runner.graph_block_tables) - cross_layer_shared_block_tables = \ - self._get_graph_runner_block_tables( - num_seqs, self.cross_layer_shared_block_tables, - self.runner.cross_layer_shared_graph_block_tables) - else: - block_tables = make_tensor_with_pad( - self.block_tables, - pad=0, - dtype=torch.int, - device=device, - ) - cross_layer_shared_block_tables = make_tensor_with_pad( - self.cross_layer_shared_block_tables, - pad=0, - dtype=torch.int, - device=device, - ) - assert max_query_len > 0, ("query_lens: {}".format(query_lens)) - - assert device is not None - context_lens_tensor = async_tensor_h2d(self.context_lens, torch.int, - device, self.runner.pin_memory) - seq_lens_tensor = async_tensor_h2d(seq_lens, torch.int, device, - self.runner.pin_memory) - slot_mapping_tensor = async_tensor_h2d(self.slot_mapping, torch.long, - device, self.runner.pin_memory) - query_start_loc_tensor = async_tensor_h2d(query_start_loc, torch.int32, - device, - self.runner.pin_memory) - seq_start_loc_tensor = async_tensor_h2d(seq_start_loc, torch.int32, - device, self.runner.pin_memory) - placeholder_index_maps = { - modality: placeholder_map.index_map() - for modality, placeholder_map in - self.multimodal_placeholder_maps.items() - } - - return DifferentialFlashAttentionMetadata( - num_prefills=self.num_prefills, - slot_mapping=slot_mapping_tensor, - num_prefill_tokens=self.num_prefill_tokens, - num_decode_tokens=num_decode_tokens, - seq_lens=seq_lens, - multi_modal_placeholder_index_maps=placeholder_index_maps, - enable_kv_scales_calculation=True, - seq_lens_tensor=seq_lens_tensor, - max_query_len=max_query_len, - max_decode_query_len=max_decode_query_len, - max_prefill_seq_len=max_prefill_seq_len, - max_decode_seq_len=max_decode_seq_len, - query_start_loc=query_start_loc_tensor, - seq_start_loc=seq_start_loc_tensor, - context_lens_tensor=context_lens_tensor, - block_tables=block_tables, - cross_layer_shared_block_tables=cross_layer_shared_block_tables, - use_cuda_graph=use_captured_graph, - ) - - -class DifferentialFlashAttentionImpl(AttentionImpl): - """ - If the input tensors contain prompt tokens, the layout is as follows: - |<--------------- num_prefill_tokens ----------------->| - |<--prefill_0-->|<--prefill_1-->|...|<--prefill_N-1--->| - - Otherwise, the layout is as follows: - |<----------------- num_decode_tokens ------------------>| - |<--decode_0-->|..........|<--decode_M-1-->|<--padding-->| - - Generation tokens can contain padding when cuda-graph is used. - Currently, prompt tokens don't contain any padding. - - The prompts might have different lengths, while the generation tokens - always have length 1. - - If chunked prefill is enabled, prefill tokens and decode tokens can be - batched together in a flattened 1D query. - - |<----- num_prefill_tokens ---->|<------- num_decode_tokens --------->| - |<-prefill_0->|...|<-prefill_N-1->|<--decode_0-->|...|<--decode_M-1-->| - - Currently, cuda graph is disabled for chunked prefill, meaning there's no - padding between prefill and decode tokens. - """ - - def __init__( - self, - num_heads: int, - head_size: int, - scale: float, - num_kv_heads: int, - alibi_slopes: Optional[List[float]], - sliding_window: Optional[int], - kv_cache_dtype: str, - logits_soft_cap: Optional[float] = None, - attn_type: str = AttentionType.DECODER, - kv_sharing_target_layer_name: Optional[str] = None, - use_irope: bool = False, - differential_flash_attention_config: Optional[Dict[str, Any]] = None, - ) -> None: - if differential_flash_attention_config is None: - differential_flash_attention_config = {} - self.differential_flash_attention_config = \ - differential_flash_attention_config - self.used_shared_kv_cache = kv_sharing_target_layer_name is not None - self.kv_sharing_target_layer_name = kv_sharing_target_layer_name - if use_irope: - logger.warning( - "Using irope in V0 is not supported yet, it will fall back " - "to global attention for long context.") - self.num_heads = num_heads - self.head_size = head_size - self.scale = float(scale) - self.num_kv_heads = num_kv_heads - if alibi_slopes is not None: - alibi_slopes = torch.tensor(alibi_slopes, dtype=torch.float32) - self.alibi_slopes = alibi_slopes - self.sliding_window = ((sliding_window - 1, - 0) if sliding_window is not None else (-1, -1)) - self.kv_cache_dtype = kv_cache_dtype - self.vllm_flash_attn_version = get_flash_attn_version( - requires_alibi=self.alibi_slopes is not None) - if is_quantized_kv_cache(self.kv_cache_dtype) and ( - not self.kv_cache_dtype.startswith("fp8") - or not flash_attn_supports_fp8()): - raise NotImplementedError( - f"FlashAttention does not support {self.kv_cache_dtype} " - "kv-cache on this device " - f"(FA supports fp8 = {flash_attn_supports_fp8()}).") - if logits_soft_cap is None: - # In flash-attn, setting logits_soft_cap as 0 means no soft cap. - logits_soft_cap = 0 - self.logits_soft_cap = logits_soft_cap - - assert self.num_heads % self.num_kv_heads == 0 - self.num_queries_per_kv = self.num_heads // self.num_kv_heads - - support_head_sizes = FlashAttentionBackend.get_supported_head_sizes() - if head_size not in support_head_sizes: - raise ValueError( - f"Head size {head_size} is not supported by FlashAttention. " - f"Supported head sizes are: {support_head_sizes}.") - self.attn_type = attn_type - - self.lambda_full = None - self.subln = self.differential_flash_attention_config["subln"] - - def split_heads(self, x): - # split by num_heads, the stripe pattern is friendly to tensor parallel. - x = rearrange(x, "... (H two) D -> ... H two D", two=2) - x1 = x[..., 0, :] - x2 = x[..., 1, :] - return x1.contiguous(), x2.contiguous() - - def split_kv_cache(self, x): - # split by num_heads, the stripe pattern is friendly to tensor parallel. - if x.numel() == 0: - return torch.empty(0), torch.empty(0) - - x1, x2 = x[0], x[1] - return x1, x2 - - def populate_kv_cache(self, layer: AttentionLayer, key: torch.Tensor, - value: torch.Tensor, kv_cache: torch.Tensor, - attn_metadata: DifferentialFlashAttentionMetadata): - if kv_cache.numel() > 0 and key is not None and value is not None: - updated_slot_mapping = attn_metadata.slot_mapping - torch.ops._C_cache_ops.reshape_and_cache_flash( - key, - value, - kv_cache[0], - kv_cache[1], - updated_slot_mapping.flatten(), - self.kv_cache_dtype, - layer._k_scale, - layer._v_scale, - ) - - def forward_generate_kv_cache( - self, query: torch.Tensor, key: Optional[torch.Tensor], - value: Optional[torch.Tensor], k_cache: torch.Tensor, - v_cache: torch.Tensor, - attn_metadata: DifferentialFlashAttentionMetadata) -> torch.Tensor: - - head_size = self.head_size - num_heads = self.num_heads // 2 - num_kv_heads = self.num_kv_heads // 2 - - query = query.view(-1, num_heads, head_size) - if key is not None: - assert value is not None - key = key.view(-1, num_kv_heads, head_size) - value = value.view(-1, num_kv_heads, head_size) - else: - assert value is None - - num_prefill_tokens = attn_metadata.num_prefill_tokens - num_decode_tokens = attn_metadata.num_decode_tokens - assert key.shape[ - 0] == num_prefill_tokens + num_decode_tokens, "key shape mismatch" - assert value.shape[ - 0] == num_prefill_tokens + num_decode_tokens, "value shape mismatch" - - output = torch.empty_like(query) - # Query for decode. KV is not needed because it is already cached. - decode_query = query[num_prefill_tokens:] - # QKV for prefill. - query = query[:num_prefill_tokens] - if key is not None and value is not None: - key = key[:num_prefill_tokens] - value = value[:num_prefill_tokens] - - assert query.shape[0] == num_prefill_tokens, "query shape mismatch" - assert decode_query.shape[ - 0] == num_decode_tokens, "decode query shape mismatch" - - if prefill_meta := attn_metadata.prefill_metadata: - # Prompt run. - if k_cache.numel() == 0 \ - or prefill_meta.block_tables is None \ - or prefill_meta.block_tables.numel() == 0: - # normal attention - prefill_output = flash_attn_varlen_func( - q=query, - k=key, - v=value, - cu_seqlens_q=prefill_meta.seq_start_loc, - cu_seqlens_k=prefill_meta.seq_start_loc, - max_seqlen_q=prefill_meta.max_prefill_seq_len, - max_seqlen_k=prefill_meta.max_prefill_seq_len, - softmax_scale=self.scale, - causal=True, - window_size=self.sliding_window, - alibi_slopes=self.alibi_slopes, - softcap=self.logits_soft_cap, - fa_version=self.vllm_flash_attn_version, - ) - assert prefill_output.shape == output[: - num_prefill_tokens].shape - output[:num_prefill_tokens] = prefill_output - else: - raise Exception("prefix caching not supported") - - if decode_meta := attn_metadata.decode_metadata: - block_tables_arg = decode_meta.block_tables - try: - output[num_prefill_tokens:] = flash_attn_with_kvcache( - q=decode_query.unsqueeze(1), - k_cache=k_cache, - v_cache=v_cache, - block_table=block_tables_arg, - cache_seqlens=decode_meta.seq_lens_tensor, - softmax_scale=self.scale, - causal=True, - window_size=self.sliding_window, - alibi_slopes=self.alibi_slopes, - softcap=self.logits_soft_cap, - fa_version=self.vllm_flash_attn_version, - ).squeeze(1) - except Exception as e: - logger.error("Error in PagedAttention.forward_decode: %s", - str(e)) - raise e - - # Reshape the output tensor. - return output.view(-1, num_heads, head_size) - - def forward_with_kv_cache_only( - self, - query: torch.Tensor, - k_cache: torch.Tensor, - v_cache: torch.Tensor, - attn_metadata: DifferentialFlashAttentionMetadata, - ): - if not attn_metadata.decode_metadata: - block_tables_arg = attn_metadata.cross_layer_shared_block_tables - else: - block_tables_arg = attn_metadata.block_tables - - output = flash_attn_with_kvcache( - q=query.unsqueeze(1), - k_cache=k_cache, - v_cache=v_cache, - block_table=block_tables_arg, - cache_seqlens=attn_metadata.seq_lens_tensor, - softmax_scale=self.scale, - causal=True, - window_size=self.sliding_window, - alibi_slopes=self.alibi_slopes, - softcap=self.logits_soft_cap, - fa_version=self.vllm_flash_attn_version, - ).squeeze(1) - return output - - def forward( - self, - layer: AttentionLayer, - q: torch.Tensor, - k: torch.Tensor, - v: torch.Tensor, - kv_cache: torch.Tensor, - attn_metadata: DifferentialFlashAttentionMetadata, - output: Optional[torch.Tensor] = None, - output_scale: Optional[torch.Tensor] = None, - output_block_scale: Optional[torch.Tensor] = None, - ) -> torch.Tensor: - """Forward pass with FlashAttention. - - Args: - layer: Attention layer instance. - q: Query tensor with shape = [num_tokens, num_heads, head_size] - k: Key tensor with shape = [num_tokens, num_kv_heads, head_size] - v: Value tensor with shape = [num_tokens, num_kv_heads, head_size] - kv_cache: KV cache tensor with shape - [2, num_blocks, block_size, num_kv_heads, head_size]. - NOTE: kv_cache will be an empty tensor with shape [0] - for profiling run. - attn_metadata: Metadata for attention. - output: Output tensor with shape [num_tokens, num_heads, head_size] - output_scale: Optional output scale tensor. - output_block_scale: Optional output block scale tensor. - NOTE: It in-place updates the output tensor. - NOTE: FP8 quantization, flash-attn expect the size of - {q,k,v}_descale to be (num_sequences, num_kv_heads). - We use torch's .expand() to avoid duplicating values - """ - if output_scale is not None or output_block_scale is not None: - raise NotImplementedError( - "fused output quantization is not yet supported" - " for DifferentialFlashAttentionImpl") - - if self.lambda_full is None: - self.lambda_init = self.differential_flash_attention_config[ - "lambda_init"] - lambda_q1 = self.differential_flash_attention_config["lambda_q1"] - lambda_k1 = self.differential_flash_attention_config["lambda_k1"] - lambda_q2 = self.differential_flash_attention_config["lambda_q2"] - lambda_k2 = self.differential_flash_attention_config["lambda_k2"] - lambda_1 = torch.exp( - torch.sum(lambda_q1 * lambda_k1, dim=-1).float()).type_as(q) - lambda_2 = torch.exp( - torch.sum(lambda_q2 * lambda_k2, dim=-1).float()).type_as(q) - self.lambda_full = lambda_1 - lambda_2 + self.lambda_init - - if not self.used_shared_kv_cache: # need to generate kv-cache - q = q.view(-1, self.num_heads, self.head_size) - k = k.view(-1, self.num_kv_heads, self.head_size) - v = v.view(-1, self.num_kv_heads, self.head_size) - - q1, q2 = self.split_heads(q) - k1, k2 = self.split_heads(k) - v1, v2 = self.split_heads(v) - - # kv_cache shape is (2, 2, num_blocks, block_size, num_kv_heads // 2, head_size) # noqa: E501 - # Split by half along the first dimension. - kv_cache1, kv_cache2 = self.split_kv_cache(kv_cache) - assert kv_cache1.is_contiguous(), "kv_cache1 is not contiguous" - assert kv_cache2.is_contiguous(), "kv_cache2 is not contiguous" - - if kv_cache1.numel() != 0: - self.populate_kv_cache(layer, k1, v1, kv_cache1, attn_metadata) - self.populate_kv_cache(layer, k2, v2, kv_cache2, attn_metadata) - - key_cache1, value_cache1 = self.split_kv_cache(kv_cache1) - key_cache2, value_cache2 = self.split_kv_cache(kv_cache2) - else: - key_cache1, value_cache1 = torch.empty(0), torch.empty(0) - key_cache2, value_cache2 = torch.empty(0), torch.empty(0) - attn11 = self.forward_generate_kv_cache(q1, k1, v1, key_cache1, - value_cache1, - attn_metadata) - attn12 = self.forward_generate_kv_cache(q1, k1, v2, key_cache1, - value_cache2, - attn_metadata) - attn11 = attn11.view(q1.shape) - attn12 = attn12.view(q1.shape) - attn1 = torch.cat([attn11, attn12], dim=-1) - - attn21 = self.forward_generate_kv_cache(q2, k2, v1, key_cache2, - value_cache1, - attn_metadata) - attn22 = self.forward_generate_kv_cache(q2, k2, v2, key_cache2, - value_cache2, - attn_metadata) - attn21 = attn21.view(q2.shape) - attn22 = attn22.view(q2.shape) - attn2 = torch.cat([attn21, attn22], dim=-1) - - attn = attn1 - self.lambda_full * attn2 - # attn shape (-1, self.num_heads // 2, 2 * self.head_dim) - attn = self.subln(attn) - attn = attn * (1 - self.lambda_init) - # reshape back to 2 * num_head - attn_output = rearrange(attn, - "... H (two D) -> ... (H two) D", - two=2) - - else: # reuse the kv cache, full attention - q = q.view(-1, self.num_heads, self.head_size) - q1, q2 = self.split_heads(q) - # kv_cache shape is (2, num_blocks, block_size, num_kv_heads, head_size) # noqa: E501 - kv_cache1, kv_cache2 = self.split_kv_cache(kv_cache) - key_cache1, value_cache1 = kv_cache1[0], kv_cache1[1] - key_cache2, value_cache2 = kv_cache2[0], kv_cache2[1] - - attn11 = self.forward_with_kv_cache_only(q1, key_cache1, - value_cache1, - attn_metadata) - attn12 = self.forward_with_kv_cache_only(q1, key_cache1, - value_cache2, - attn_metadata) - attn11 = attn11.view(q1.shape) - attn12 = attn12.view(q1.shape) - attn1 = torch.cat([attn11, attn12], dim=-1) - - attn21 = self.forward_with_kv_cache_only(q2, key_cache2, - value_cache1, - attn_metadata) - attn22 = self.forward_with_kv_cache_only(q2, key_cache2, - value_cache2, - attn_metadata) - attn21 = attn21.view(q2.shape) - attn22 = attn22.view(q2.shape) - attn2 = torch.cat([attn21, attn22], dim=-1) - - attn = attn1 - self.lambda_full * attn2 - attn = self.subln(attn) - attn = attn * (1 - self.lambda_init) - # reshape back to 2 * num_head - attn_output = rearrange(attn, - "... H (two D) -> ... (H two) D", - two=2) - attn_output = attn_output.view(-1, self.num_heads * self.head_size) - return attn_output diff --git a/vllm/attention/backends/dual_chunk_flash_attn.py b/vllm/attention/backends/dual_chunk_flash_attn.py deleted file mode 100644 index de47bb8ebd8f..000000000000 --- a/vllm/attention/backends/dual_chunk_flash_attn.py +++ /dev/null @@ -1,1495 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project -"""Attention layer with Dual chunk flash attention and sparse attention. -""" -import math -from dataclasses import dataclass -from typing import Any, Dict, List, Optional, Tuple, Type - -import torch -import torch.distributed -import torch.nn.functional as F - -from vllm import _custom_ops as ops -from vllm.attention.backends.abstract import AttentionLayer, AttentionType -from vllm.attention.backends.flash_attn import (FlashAttentionBackend, - FlashAttentionImpl, - FlashAttentionMetadata, - FlashAttentionMetadataBuilder) -from vllm.distributed.parallel_state import get_tensor_model_parallel_rank -from vllm.logger import init_logger -from vllm.utils import async_tensor_h2d -from vllm.vllm_flash_attn import (flash_attn_varlen_func, - flash_attn_with_kvcache, sparse_attn_func) - -logger = init_logger(__name__) - - -class DualChunkFlashAttentionBackend(FlashAttentionBackend): - - accept_output_buffer: bool = False - - @staticmethod - def get_name() -> str: - return "DUAL_CHUNK_FLASH_ATTN" - - @staticmethod - def get_impl_cls() -> Type["DualChunkFlashAttentionImpl"]: - return DualChunkFlashAttentionImpl - - @staticmethod - def get_metadata_cls() -> Type["DualChunkFlashAttentionMetadata"]: - return DualChunkFlashAttentionMetadata - - @staticmethod - def get_builder_cls() -> Type["DualChunkFlashAttentionMetadataBuilder"]: - return DualChunkFlashAttentionMetadataBuilder - - -@dataclass -class DualChunkFlashAttentionMetadata(FlashAttentionMetadata): - # Block size of the paged kv cache. - block_size: int = 16 - - # Original max position embeddings. - original_max_position_embeddings: int = 0 - - # Chunk size - chunk_size: int = 8192 - - # Local size - local_size: int = 1024 - - # (batch_size,). The orig sequence length per sequence. - orig_seq_lens: Optional[List[int]] = None - - # orig_seq_lens stored as a tensor. - orig_seq_lens_tensor: Optional[torch.Tensor] = None - - # Length scaling factor - scaling_factor: Optional[torch.Tensor] = None - - # (batch_size,). Sequence lengths for intra attention. - seq_lens_intra: Optional[torch.Tensor] = None - - # Max sequence length for intra attention. - max_seq_len_intra: Optional[int] = None - - # (batch_size, num_blocks). Block table for intra attention. - block_tables_intra: Optional[torch.Tensor] = None - - # (batch_size,). Sequence lengths for succ attention. - seq_lens_succ: Optional[torch.Tensor] = None - - # Max sequence length for succ attention. - max_seq_len_succ: Optional[int] = None - - # (batch_size, num_blocks). Block table for succ attention. - block_tables_succ: Optional[torch.Tensor] = None - - # (batch_size,). Sequence lengths for inter attention. - seq_lens_inter: Optional[torch.Tensor] = None - - # Max sequence length for inter attention. - max_seq_len_inter: Optional[int] = None - - _cached_prefill_metadata: Optional[ - "DualChunkFlashAttentionMetadata"] = None - _cached_decode_metadata: Optional["DualChunkFlashAttentionMetadata"] = None - - @property - def prefill_metadata(self) -> Optional["DualChunkFlashAttentionMetadata"]: - if self.num_prefills == 0: - return None - - if self._cached_prefill_metadata is not None: - return self._cached_prefill_metadata - - prefill_metadata = super().prefill_metadata - if prefill_metadata is None: - return None - - prefill_metadata = DualChunkFlashAttentionMetadata( - **prefill_metadata.asdict_zerocopy()) - - prefill_metadata.orig_seq_lens = ( - None if self.orig_seq_lens is None else - self.orig_seq_lens[:self.num_prefills]) - prefill_metadata.orig_seq_lens_tensor = ( - None if self.orig_seq_lens_tensor is None else - self.orig_seq_lens_tensor[:self.num_prefills]) - - if self.original_max_position_embeddings > 0: - assert prefill_metadata.orig_seq_lens_tensor is not None - prefill_metadata.scaling_factor = ( - 0.1 * torch.log(prefill_metadata.orig_seq_lens_tensor / - self.original_max_position_embeddings) + - 1.0).clip(min=1) - - self._cached_prefill_metadata = prefill_metadata - return prefill_metadata - - @property - def decode_metadata(self) -> Optional["DualChunkFlashAttentionMetadata"]: - if self.num_decode_tokens == 0: - return None - - if self._cached_decode_metadata is not None: - return self._cached_decode_metadata - - decode_metadata = super().decode_metadata - if decode_metadata is None: - return None - - decode_metadata = DualChunkFlashAttentionMetadata( - **decode_metadata.asdict_zerocopy()) - - decode_metadata.orig_seq_lens_tensor = ( - None if self.orig_seq_lens_tensor is None else - self.orig_seq_lens_tensor[self.num_prefills:]) - - assert decode_metadata.orig_seq_lens_tensor is not None - assert decode_metadata.block_tables is not None - - cache_seq_lens = decode_metadata.orig_seq_lens_tensor - chunk_len = self.chunk_size - self.local_size - chunk_num_curr = (cache_seq_lens - 1) // chunk_len - batch_size = decode_metadata.num_decode_tokens - - if self.original_max_position_embeddings > 0: - decode_metadata.scaling_factor = (0.1 * torch.log( - cache_seq_lens / self.original_max_position_embeddings) + - 1.0).clip(min=1) - - seq_lens_intra = cache_seq_lens - chunk_num_curr * chunk_len - max_seq_len_intra = seq_lens_intra.max().item() - decode_metadata.seq_lens_intra = seq_lens_intra - decode_metadata.max_seq_len_intra = max_seq_len_intra - - block_tables_intra = torch.zeros( - batch_size, - (max_seq_len_intra - 1) // self.block_size + 1, - dtype=decode_metadata.block_tables.dtype, - device=decode_metadata.block_tables.device, - ) - for i in range(batch_size): - st = chunk_num_curr[i] * chunk_len // self.block_size - ed = min( - st + (max_seq_len_intra - 1) // self.block_size + 1, - (cache_seq_lens[i] - 1) // self.block_size + 1, - ) - block_tables_intra[i, :ed - - st] = decode_metadata.block_tables[i, st:ed] - decode_metadata.block_tables_intra = block_tables_intra - - seq_lens_succ = (chunk_num_curr - - (chunk_num_curr - 1).clip(min=0)) * chunk_len - max_seq_len_succ = seq_lens_succ.max().item() - decode_metadata.seq_lens_succ = seq_lens_succ - decode_metadata.max_seq_len_succ = max_seq_len_succ - if max_seq_len_succ: - block_tables_succ = torch.zeros( - batch_size, - (max_seq_len_succ - 1) // self.block_size + 1, - dtype=decode_metadata.block_tables.dtype, - device=decode_metadata.block_tables.device, - ) - for i in range(batch_size): - start = ((chunk_num_curr[i] - 1).clip(min=0) * chunk_len // - self.block_size) - end = min( - start + (max_seq_len_succ - 1) // self.block_size + 1, - (cache_seq_lens[i] - 1) // self.block_size + 1, - ) - block_tables_succ[ - i, :end - start] = decode_metadata.block_tables[i, - start:end] - decode_metadata.block_tables_succ = block_tables_succ - - seq_lens_inter = (chunk_num_curr - 1).clip(min=0) * chunk_len - max_seq_len_inter = seq_lens_inter.max().item() - decode_metadata.seq_lens_inter = seq_lens_inter - decode_metadata.max_seq_len_inter = max_seq_len_inter - - self._cached_decode_metadata = decode_metadata - return decode_metadata - - -class DualChunkFlashAttentionMetadataBuilder(FlashAttentionMetadataBuilder): - - def prepare(self): - super().prepare() - self.orig_seq_lens: List[int] = [] - - def _add_seq_group(self, inter_data, chunked_prefill_enabled: bool, - prefix_cache_hit: bool): - super()._add_seq_group(inter_data, chunked_prefill_enabled, - prefix_cache_hit) - for prompt_len, seq_len in zip(inter_data.prompt_lens, - inter_data.seq_lens): - self.orig_seq_lens.append(max(prompt_len, seq_len)) - - def build(self, seq_lens: List[int], query_lens: List[int], - cuda_graph_pad_size: int, batch_size: int): - attn_metadata = super().build(seq_lens, query_lens, - cuda_graph_pad_size, batch_size) - attn_metadata = DualChunkFlashAttentionMetadata( - **attn_metadata.asdict_zerocopy()) - - device = self.runner.device - attn_metadata.orig_seq_lens = self.orig_seq_lens - attn_metadata.orig_seq_lens_tensor = async_tensor_h2d( - self.orig_seq_lens, torch.int, device, self.runner.pin_memory) - - attn_metadata.block_size = self.runner.block_size - dual_chunk_attn_config = getattr(self.runner.model_config.hf_config, - "dual_chunk_attention_config", {}) - attn_metadata.original_max_position_embeddings = \ - dual_chunk_attn_config.get("original_max_position_embeddings", 0) - attn_metadata.chunk_size = dual_chunk_attn_config.get( - "chunk_size", 8192) - attn_metadata.local_size = dual_chunk_attn_config.get( - "local_size", 1024) - - return attn_metadata - - -class DualChunkFlashAttentionImpl(FlashAttentionImpl): - """ - If the input tensors contain prompt tokens, the layout is as follows: - |<--------------- num_prefill_tokens ----------------->| - |<--prefill_0-->|<--prefill_1-->|...|<--prefill_N-1--->| - Otherwise, the layout is as follows: - |<----------------- num_decode_tokens ------------------>| - |<--decode_0-->|..........|<--decode_M-1-->|<--padding-->| - Generation tokens can contain padding when cuda-graph is used. - Currently, prompt tokens don't contain any padding. - The prompts might have different lengths, while the generation tokens - always have length 1. - If chunked prefill is enabled, prefill tokens and decode tokens can be - batched together in a flattened 1D query. - |<----- num_prefill_tokens ---->|<------- num_decode_tokens --------->| - |<-prefill_0->|...|<-prefill_N-1->|<--decode_0-->|...|<--decode_M-1-->| - Currently, cuda graph is disabled for chunked prefill, meaning there's no - padding between prefill and decode tokens. - """ - - def __init__( - self, - num_heads: int, - head_size: int, - scale: float, - num_kv_heads: int, - alibi_slopes: Optional[List[float]], - sliding_window: Optional[int], - kv_cache_dtype: str, - logits_soft_cap: Optional[float] = None, - attn_type: str = AttentionType.DECODER, - kv_sharing_target_layer_name: Optional[str] = None, - layer_idx: int = -1, - dual_chunk_attention_config: Optional[Dict[str, Any]] = None, - ) -> None: - if kv_sharing_target_layer_name is not None: - raise NotImplementedError("KV sharing is not supported in V0 " - "DUAL_CHUNK_FLASH_ATTN backend.") - self.num_heads = num_heads - self.head_size = head_size - self.scale = float(scale) - self.num_kv_heads = num_kv_heads - if alibi_slopes is not None: - alibi_slopes = torch.tensor(alibi_slopes, dtype=torch.float32) - self.alibi_slopes = alibi_slopes - self.sliding_window = ((sliding_window, sliding_window) - if sliding_window is not None else (-1, -1)) - self.kv_cache_dtype = kv_cache_dtype - - self.num_queries_per_kv = self.num_heads // self.num_kv_heads - if sliding_window is not None: - # NOTE(woosuk): flash-attn's sliding window does not work with - # paged KV cache. - raise ValueError( - "Sliding window is not supported in FlashAttention.") - - support_head_sizes = ( - DualChunkFlashAttentionBackend.get_supported_head_sizes()) - - if head_size not in support_head_sizes: - raise ValueError( - f"Head size {head_size} is not supported by FlashAttention. " - f"Supported head sizes are: {support_head_sizes}.") - - assert dual_chunk_attention_config is not None - self.chunk_size = dual_chunk_attention_config.get("chunk_size", 8192) - self.local_size = dual_chunk_attention_config.get("local_size", 1024) - self.original_max_position_embeddings = dual_chunk_attention_config.get( - "original_max_position_embeddings", 0) - self.sparse_attention_config = dual_chunk_attention_config.get( - "sparse_attention_config", None) - if not self.sparse_attention_config: - logger.warning_once("Sparse attention will not be enabled as " - "sparse attention config is not provided.") - self.sparse_attention_enabled = dual_chunk_attention_config.get( - "sparse_attention_enabled", self.sparse_attention_config - is not None) - self.sparse_attention_threshold = dual_chunk_attention_config.get( - "sparse_attention_threshold", 32768) - self.sparse_attention_last_q = dual_chunk_attention_config.get( - "sparse_attention_last_q", 64) - self.layer_idx = layer_idx - self.dual_chunk_attention_config = dual_chunk_attention_config - - if self.sparse_attention_config: - self.sparse_attention_config = { - int(i): j - for i, j in self.sparse_attention_config[ - self.layer_idx].items() - } - start_head = self.num_heads * get_tensor_model_parallel_rank() - end_head = start_head + self.num_heads - self.sparse_attention_config = [ - self.sparse_attention_config[i] - for i in range(start_head, end_head) - ] - - if self.sparse_attention_enabled: - self.arange = torch.arange(self.sparse_attention_last_q, - device="cuda") - self.last_q_mask = (self.arange[None, None, :, None] - >= self.arange[None, None, None, :]) - - def forward( # type: ignore - self, - layer: AttentionLayer, - query: torch.Tensor, - key: torch.Tensor, - value: torch.Tensor, - kv_cache: torch.Tensor, - attn_metadata: DualChunkFlashAttentionMetadata, - output: Optional[torch.Tensor] = None, - output_scale: Optional[torch.Tensor] = None, - output_block_scale: Optional[torch.Tensor] = None, - ) -> torch.Tensor: - """Forward pass with DualChunkFlashAttention. - Args: - query: shape = [num_tokens, num_heads * head_size] - query_succ: shape = [num_tokens, num_heads * head_size] - query_inter: shape = [num_tokens, num_heads * head_size] - key: shape = [num_tokens, num_kv_heads * head_size] - value: shape = [num_tokens, num_kv_heads * head_size] - kv_cache = [2, num_blocks, block_size, num_kv_heads * head_size] - attn_metadata: Metadata for attention. - Returns: - shape = [num_tokens, num_heads * head_size] - """ - assert output is None, "Output tensor not supported for DualChunk" - - if output_scale is not None or output_block_scale is not None: - raise NotImplementedError( - "fused output quantization is not yet supported" - " for FlashAttentionImpl") - - ( - query, - query_succ, - query_inter, - query_succ_critical, - query_inter_critical, - ) = torch.split(query, query.shape[-1] // 5, dim=-1) - - assert ( - query_succ is not None and query_inter is not None - ), "query_succ and query_inter are required in Dual Chunk Attention." - - num_tokens, hidden_size = query.shape - - # Reshape the query, key, and value tensors. - query = query.view(-1, self.num_heads, self.head_size) - query_succ = query_succ.view(-1, self.num_heads, self.head_size) - query_inter = query_inter.view(-1, self.num_heads, self.head_size) - query_succ_critical = query_succ_critical.view(-1, self.num_heads, - self.head_size) - query_inter_critical = query_inter_critical.view( - -1, self.num_heads, self.head_size) - key = key.view(-1, self.num_kv_heads, self.head_size) - value = value.view(-1, self.num_kv_heads, self.head_size) - - if self.original_max_position_embeddings > 0: - if prefill_meta := attn_metadata.prefill_metadata: - assert prefill_meta.scaling_factor is not None - assert prefill_meta.query_start_loc is not None - assert prefill_meta.orig_seq_lens is not None - current_start = 0 - query_start_loc_cpu = prefill_meta.query_start_loc.cpu() - for i in range(len(prefill_meta.orig_seq_lens)): - current_end = (current_start + - (query_start_loc_cpu[i + 1] - - query_start_loc_cpu[i]).item()) - key[current_start:current_end].mul_( - prefill_meta.scaling_factor[i]) - current_start = current_end - assert current_end <= attn_metadata.num_prefill_tokens - if decode_meta := attn_metadata.decode_metadata: - assert decode_meta.scaling_factor is not None - scaling_factor = decode_meta.scaling_factor - key[attn_metadata.num_prefill_tokens:].mul_( - scaling_factor.unsqueeze(-1).unsqueeze(-1)) - - if kv_cache is not None and kv_cache.numel() > 0: - key_cache = kv_cache[0] - value_cache = kv_cache[1] - - # Reshape the input keys and values and store them in the cache. - # If kv_cache is not provided, the new key and value tensors are - # not cached. This happens during the initial memory profiling run. - ops.reshape_and_cache_flash( - key, - value, - key_cache, - value_cache, - attn_metadata.slot_mapping.flatten(), - self.kv_cache_dtype, - layer._k_scale, - layer._v_scale, - ) - - num_prefill_tokens = attn_metadata.num_prefill_tokens - num_decode_tokens = attn_metadata.num_decode_tokens - assert key.shape[0] == num_prefill_tokens + num_decode_tokens - assert value.shape[0] == num_prefill_tokens + num_decode_tokens - output = torch.empty_like(query) - - # Query for decode. KV is not needed because it is already cached. - decode_query = query[num_prefill_tokens:] - decode_query_succ = query_succ[num_prefill_tokens:] - decode_query_inter = query_inter[num_prefill_tokens:] - - # QKV for prefill. - query = query[:num_prefill_tokens] - query_succ = query_succ[:num_prefill_tokens] - query_inter = query_inter[:num_prefill_tokens] - query_succ_critical = query_succ_critical[:num_prefill_tokens] - query_inter_critical = query_inter_critical[:num_prefill_tokens] - key = key[:num_prefill_tokens] - value = value[:num_prefill_tokens] - assert query.shape[0] == num_prefill_tokens - assert decode_query.shape[0] == num_decode_tokens - - if prefill_meta := attn_metadata.prefill_metadata: - # Prompt run. - if (kv_cache is None or prefill_meta.block_tables is None - or prefill_meta.block_tables.numel() == 0): - # normal attention, called during the profiling run. - out = flash_attn_varlen_func( - q=query, - k=key, - v=value, - cu_seqlens_q=prefill_meta.seq_start_loc, - cu_seqlens_k=prefill_meta.seq_start_loc, - max_seqlen_q=prefill_meta.max_prefill_seq_len, - max_seqlen_k=prefill_meta.max_prefill_seq_len, - softmax_scale=self.scale, - causal=True, - window_size=self.sliding_window, - alibi_slopes=self.alibi_slopes, - ) - assert output[:num_prefill_tokens].shape == out.shape - output[:num_prefill_tokens] = out - else: - # prefix-enabled attention - assert prefill_meta.seq_lens is not None - assert prefill_meta.orig_seq_lens is not None - output[:num_prefill_tokens] = ( - self._dual_chunk_flash_attn_prefill( - q=query, - q_succ=query_succ, - q_inter=query_inter, - q_succ_critical=query_succ_critical, - q_inter_critical=query_inter_critical, - k=key_cache, - v=value_cache, - cu_seqlens_q=prefill_meta.query_start_loc, - cu_seqlens_k=prefill_meta.seq_start_loc, - orig_seq_lens=prefill_meta.orig_seq_lens, - scaling_factor=prefill_meta.scaling_factor, - softmax_scale=self.scale, - causal=True, - window_size=(-1, -1), - alibi_slopes=self.alibi_slopes, - block_table=prefill_meta.block_tables, - chunk_size=self.chunk_size, - local_size=self.local_size, - )) - - if decode_meta := attn_metadata.decode_metadata: - # Decoding run. - output[num_prefill_tokens:] = ( - self._dual_chunk_flash_attn_decoding( - decode_query.unsqueeze(1), - decode_query_succ.unsqueeze(1), - decode_query_inter.unsqueeze(1), - key_cache, - value_cache, - block_table=decode_meta.block_tables, - cache_seqlens=decode_meta.seq_lens_tensor, - softmax_scale=self.scale, - causal=True, - alibi_slopes=self.alibi_slopes, - chunk_size=self.chunk_size, - local_size=self.local_size, - original_max_position_embeddings=self. - original_max_position_embeddings, - decode_meta=decode_meta, - ).squeeze(1)) - # Reshape the output tensor. - return output.view(num_tokens, hidden_size) - - def _dual_chunk_flash_attn_prefill( - self, - q, - q_succ, - q_inter, - q_succ_critical, - q_inter_critical, - k, - v, - cu_seqlens_q, - cu_seqlens_k, - orig_seq_lens: List[int], - scaling_factor: torch.Tensor, - softmax_scale: float, - causal: Optional[bool] = True, - window_size: Tuple[int, int] = (-1, -1), - alibi_slopes: Optional[torch.Tensor] = None, - block_table: Optional[torch.Tensor] = None, - chunk_size: int = 8192, - local_size: int = 1024, - ): - if alibi_slopes is not None: - raise ValueError( - "Dual Chunk Attention does not support alibi_slopes") - if not causal: - raise ValueError( - "Dual Chunk Attention does not support causal=False") - if window_size != (-1, -1): - raise ValueError( - "Dual Chunk Attention does not support window_size") - - cu_seqlens_q_cpu = cu_seqlens_q.cpu().tolist() - cu_seqlens_k_cpu = cu_seqlens_k.cpu().tolist() - all_outputs = [] - - for i in range(0, len(cu_seqlens_q_cpu) - 1): - qs = cu_seqlens_q_cpu[i] - qe = cu_seqlens_q_cpu[i:i + 2][-1] - ks = cu_seqlens_k_cpu[i] - ke = cu_seqlens_k_cpu[i:i + 2][-1] - - current_q = q[qs:qe] - current_q_succ = q_succ[qs:qe] - current_q_inter = q_inter[qs:qe] - current_q_succ_critical = q_succ_critical[qs:qe] - current_q_inter_critical = q_inter_critical[qs:qe] - - if block_table is None: - current_k = k[ks:ke] - current_v = v[ks:ke] - current_block_table = None - current_orig_seq_len = orig_seq_lens[i] - else: - current_block_table = block_table[i] - current_orig_seq_len = orig_seq_lens[i] - current_k = k - current_v = v - sparse_attn_enabled = (self.sparse_attention_enabled - and current_orig_seq_len - > self.sparse_attention_threshold) - - if current_q.shape[0] == 0: - continue - - if current_k.shape[0] == 0: - all_outputs.append( - torch.zeros( - (current_q.shape[0], current_q.shape[1], v.shape[2]), - device=q.device, - dtype=q.dtype, - )) - continue - - current_output = torch.empty_like(current_q) - group_size = int(current_q.size(-2) / current_k.size(-2)) - - if sparse_attn_enabled: - num_device_q_heads = current_q.size(-2) - heads_vertical_size = torch.empty(size=(num_device_q_heads, ), - dtype=torch.int32) - heads_slash_size = torch.empty(size=(num_device_q_heads, ), - dtype=torch.int32) - for head_id in range(current_q.size(-2)): - ( - ty, - vertical_size, - slash_size, - _, - ) = self.sparse_attention_config[head_id] - assert ty == "vertical_and_slash", "only support slash mode" - - if vertical_size == 30: - vertical_size += 100 - heads_vertical_size[head_id] = vertical_size - heads_slash_size[head_id] = slash_size - - current_output = self._dual_chunk_flash_attn_prefill_func( - current_q, # allheads - current_q_succ, - current_q_inter, - current_q_succ_critical, - current_q_inter_critical, - current_k, - current_v, - current_block_table, - softmax_scale, - chunk_size, - local_size, - scaling_factor[i].item(), - ke - ks, - sparse_attn_enabled=sparse_attn_enabled, - heads_vertical_size=heads_vertical_size, - heads_slash_size=heads_slash_size, - group_size=group_size) - else: - for head_id in range(current_q.size(-2)): - # (seq_len, num_heads, head_size) - current_q_head = current_q[:, head_id, :].unsqueeze(1) - current_q_succ_head = \ - current_q_succ[:, head_id, :].unsqueeze(1) - current_q_inter_head = \ - current_q_inter[:, head_id, :].unsqueeze(1) - current_q_succ_head_critical = \ - current_q_succ_critical[:, head_id, :].unsqueeze(1) - current_q_inter_head_critical = \ - current_q_inter_critical[:, head_id, :].unsqueeze(1) - if block_table is not None: - current_k_head = current_k[..., head_id // - group_size, :].unsqueeze(2) - current_v_head = current_v[..., head_id // - group_size, :].unsqueeze(2) - - else: - current_k_head = current_k[:, head_id, :].unsqueeze(1) - current_v_head = current_v[:, head_id, :].unsqueeze(1) - - current_out = self._dual_chunk_flash_attn_prefill_func( - current_q_head, - current_q_succ_head, - current_q_inter_head, - current_q_succ_head_critical, - current_q_inter_head_critical, - current_k_head, - current_v_head, - current_block_table, - softmax_scale, - chunk_size, - local_size, - scaling_factor[i].item(), - ke - ks, - sparse_attn_enabled=sparse_attn_enabled, - ) - current_output[:, head_id:head_id + 1, :] = current_out - all_outputs.append(current_output) - return torch.cat(all_outputs, dim=0) - - def _dual_chunk_flash_attn_prefill_func( - self, - q, - q_succ, - q_inter, - q_succ_critical, - q_inter_critical, - k, - v, - block_table, - softmax_scale: float, - chunk_size: int, - local_size: int, - scaling_factor: float, - k_length: int, - sparse_attn_enabled: Optional[bool] = True, - heads_vertical_size=None, - heads_slash_size=None, - group_size=None, - ): - flash_results = [] - chunk_len = chunk_size - local_size - - if block_table is not None: - block_size = v.shape[1] - if chunk_len % block_size != 0: - raise ValueError("chunk_len must be divisible by block_size.") - else: - block_size = 1 - - if self.original_max_position_embeddings > 0: - softmax_scale = softmax_scale * scaling_factor - - begin = k_length - q.shape[0] - while begin < k_length: - flash_per_chunk = [] - - prev_chunk_end_pos = (begin // chunk_len) * chunk_len - next_chunk_end_pos = prev_chunk_end_pos + chunk_len - end = min(next_chunk_end_pos, k_length) - qbegin = begin - (k_length - q.shape[0]) - qend = end - (k_length - q.shape[0]) - - qk_chunks = [] - q_states_intra = q[qbegin:qend] - # choose critical token - if block_table is not None: - block_tables_intra = _get_block(block_table, block_size, - prev_chunk_end_pos, end) - k_states_intra = k[block_tables_intra].view( - -1, *k.shape[-2:])[:(end - prev_chunk_end_pos)] - v_states_intra = v[block_tables_intra].view( - -1, *v.shape[-2:])[:(end - prev_chunk_end_pos)] - else: - block_tables_intra = None - k_states_intra = k[prev_chunk_end_pos:end] - v_states_intra = v[prev_chunk_end_pos:end] - - if sparse_attn_enabled: - last_q_size = min(qend - qbegin, self.sparse_attention_last_q) - _, num_device_k_heads, head_dim = k_states_intra.shape - k_states_intra = (k_states_intra.unsqueeze(2).repeat( - 1, 1, group_size, - 1).reshape(-1, num_device_k_heads * group_size, head_dim)) - v_states_intra = (v_states_intra.unsqueeze(2).repeat( - 1, 1, group_size, - 1).reshape(-1, num_device_k_heads * group_size, head_dim)) - qk_chunks.append( - (q_states_intra.transpose(0, 1)[:, -last_q_size:] * - softmax_scale) @ k_states_intra.permute(1, 2, 0)) - - if prev_chunk_end_pos - chunk_len >= 0: - q_states_succ = q_succ[qbegin:qend] - q_states_succ_critical = q_succ_critical[qbegin:qend] - if block_table is not None: - block_tables_succ = _get_block( - block_table, block_size, - prev_chunk_end_pos - chunk_len, prev_chunk_end_pos) - k_states_succ = k[block_tables_succ].view( - -1, *k.shape[-2:])[:chunk_len] - v_states_succ = v[block_tables_succ].view( - -1, *v.shape[-2:])[:chunk_len] - else: - k_states_succ = k[prev_chunk_end_pos - - chunk_len:prev_chunk_end_pos] - v_states_succ = v[prev_chunk_end_pos - - chunk_len:prev_chunk_end_pos] - - if sparse_attn_enabled: - k_states_succ = (k_states_succ.unsqueeze(2).repeat( - 1, 1, group_size, - 1).reshape(-1, num_device_k_heads * group_size, - head_dim)) - v_states_succ = (v_states_succ.unsqueeze(2).repeat( - 1, 1, group_size, - 1).reshape(-1, num_device_k_heads * group_size, - head_dim)) - qk_chunks.append((q_states_succ_critical.transpose( - 0, 1)[:, -last_q_size:] * softmax_scale) - @ k_states_succ.permute(1, 2, 0)) - - if prev_chunk_end_pos - chunk_len * 2 >= 0: - q_states_inter = q_inter[qbegin:qend] - q_states_inter_critical = q_inter_critical[qbegin:qend] - if block_table is not None: - block_tables_inter = _get_block( - block_table, block_size, 0, - prev_chunk_end_pos - chunk_len) - k_states_inter = k[block_tables_inter].view( - -1, *k.shape[-2:])[:(prev_chunk_end_pos - chunk_len)] - v_states_inter = v[block_tables_inter].view( - -1, *v.shape[-2:])[:(prev_chunk_end_pos - chunk_len)] - else: - k_states_inter = k[:prev_chunk_end_pos - chunk_len] - v_states_inter = v[:prev_chunk_end_pos - chunk_len] - - if sparse_attn_enabled: - k_states_inter = (k_states_inter.unsqueeze(2).repeat( - 1, 1, group_size, - 1).reshape(-1, num_device_k_heads * group_size, - head_dim)) - v_states_inter = (v_states_inter.unsqueeze(2).repeat( - 1, 1, group_size, - 1).reshape(-1, num_device_k_heads * group_size, - head_dim)) - qk_chunks.append((q_states_inter_critical.transpose( - 0, 1)[:, -last_q_size:] * softmax_scale) - @ k_states_inter.permute(1, 2, 0)) - - if sparse_attn_enabled: - reversed_qk = qk_chunks[::-1] - qk = torch.cat(reversed_qk, dim=-1) - - qk[:, :, -last_q_size:] = torch.where( - self.last_q_mask[..., -last_q_size:, - -last_q_size:].to(qk.device), - qk[:, :, -last_q_size:], -torch.inf) - qk = F.softmax(qk, dim=-1, dtype=torch.float32) - - vertical = qk.sum(-2, keepdim=True) - vertical[..., :30] = torch.inf - - # Avoid sorting by using the min/max ints to fill the indexer - # buffers. - int32_max = torch.iinfo(torch.int32).max - int32_min = torch.iinfo(torch.int32).min - n_heads = qk.size()[0] - max_slash_topk = torch.max(heads_slash_size).item() - max_vertical_topk = torch.max(heads_vertical_size).item() - # store each head's slash topk, vertical topk - vertical = vertical.reshape((n_heads, -1)) - # prevent out of range when prompt size < max_vertical_topk - max_vertical_topk = min(vertical.shape[-1], max_vertical_topk) - vertical_topk_buffer = torch.topk(vertical, max_vertical_topk, - -1).indices - slash_topk_buffer = torch.empty(size=(n_heads, max_slash_topk), - dtype=torch.int64, - device=qk.device) - for head_i in range(n_heads): - # (nqheads=1, lastq, k_len) - head_score = qk[head_i:head_i + 1, :, :] - slash_scores = _sum_all_diagonal_matrix(head_score) - if head_score.size(1) != 1: - # drop right up corner - slash_scores = slash_scores[..., :-last_q_size + 1] - slash_scores[..., -100:] = torch.inf - - head_slash_size = heads_slash_size[head_i] - head_slash_size = min(head_slash_size, vertical.size(-1)) - slash_topk = torch.topk(slash_scores, head_slash_size, - -1).indices - #(nheads, max_topk) - slash_topk_buffer[head_i, :head_slash_size] = slash_topk - - # reset heads topk - heads_slash_size[head_i] = head_slash_size - heads_vertical_size[head_i] = min( - heads_vertical_size[head_i], max_vertical_topk) - - # store - vertical_buffer = torch.full((n_heads, max_vertical_topk), - int32_max, - dtype=torch.int64, - device=q.device) - slash_buffer = torch.full((n_heads, max_slash_topk), - int32_min, - dtype=torch.int64, - device=q.device) - succ_vertical_buffer = torch.full((n_heads, max_vertical_topk), - int32_max, - dtype=torch.int64, - device=q.device) - succ_slash_buffer = torch.full((n_heads, max_slash_topk), - int32_min, - dtype=torch.int64, - device=q.device) - inter_vertical_buffer = torch.full( - (n_heads, max_vertical_topk), - int32_max, - dtype=torch.int64, - device=q.device) - inter_slash_buffer = torch.full((n_heads, max_slash_topk), - int32_min, - dtype=torch.int64, - device=q.device) - - vertical_size_buffer = torch.empty(size=(n_heads, ), - dtype=torch.int32, - device=q.device) - slash_sizes_buffer = torch.empty(size=(n_heads, ), - dtype=torch.int32, - device=q.device) - succ_vertical_size_buffer = torch.empty(size=(n_heads, ), - dtype=torch.int32, - device=q.device) - succ_slash_sizes_buffer = torch.empty(size=(n_heads, ), - dtype=torch.int32, - device=q.device) - inter_vertical_size_buffer = torch.empty(size=(n_heads, ), - dtype=torch.int32, - device=q.device) - inter_slash_sizes_buffer = torch.empty(size=(n_heads, ), - dtype=torch.int32, - device=q.device) - - for head_i in range(n_heads): - vertical_topk = vertical_topk_buffer[ - head_i, :heads_vertical_size[head_i]] - # intra - intra_vertical_indices = vertical_topk[ - vertical_topk >= - prev_chunk_end_pos] - prev_chunk_end_pos - if intra_vertical_indices.nelement() == 0: - intra_vertical_indices = torch.cat([ - intra_vertical_indices, - torch.arange(0, - k_states_intra.size(0), - max(1, - k_states_intra.size(0) / 5), - dtype=torch.int32, - device=intra_vertical_indices.device) - ]) - slash_topk = slash_topk_buffer[ - head_i, :heads_slash_size[head_i]] - intra_slash_indices = ( - (qk.size(-1) - 1) - - slash_topk[slash_topk >= prev_chunk_end_pos]) - # fill buffer - v_count = intra_vertical_indices.nelement() - s_count = intra_slash_indices.nelement() - vertical_size_buffer[head_i] = v_count - slash_sizes_buffer[head_i] = s_count - vertical_buffer[head_i, :v_count].copy_( - intra_vertical_indices) - slash_buffer[head_i, :s_count].copy_(intra_slash_indices) - # succ - if prev_chunk_end_pos - chunk_len >= 0: - succ_vertical_indices = vertical_topk[ - (vertical_topk < prev_chunk_end_pos) - & (vertical_topk >= prev_chunk_end_pos - - chunk_len)] - (prev_chunk_end_pos - chunk_len) - # TODO: support no vertical - if succ_vertical_indices.nelement() == 0: - succ_vertical_indices = torch.cat([ - succ_vertical_indices, - torch.arange( - 0, - k_states_succ.size(0), - max(1, - k_states_succ.size(0) / 5), - dtype=torch.int32, - device=intra_vertical_indices.device) - ]) - succ_slash_indices = ( - (prev_chunk_end_pos + (qend - qbegin) - 1) - - slash_topk[((slash_topk >= - (prev_chunk_end_pos - chunk_len)) & - (slash_topk < (prev_chunk_end_pos + - (qend - qbegin))))]) - if succ_slash_indices.nelement() == 0: - succ_slash_indices = torch.cat([ - succ_slash_indices, - torch.arange( - 0, - k_states_succ.size(0), - max(1, - k_states_succ.size(0) / 5), - dtype=torch.int32, - device=intra_vertical_indices.device) - ]) - # fill buffer - v_count = succ_vertical_indices.nelement() - s_count = succ_slash_indices.nelement() - succ_vertical_size_buffer[head_i] = v_count - succ_slash_sizes_buffer[head_i] = s_count - succ_vertical_buffer[head_i, :v_count].copy_( - succ_vertical_indices) - succ_slash_buffer[head_i, :s_count].copy_( - succ_slash_indices) - - if prev_chunk_end_pos - 2 * chunk_len >= 0: - inter_vertical_indices = vertical_topk[ - vertical_topk < prev_chunk_end_pos - chunk_len] - - if inter_vertical_indices.nelement() == 0: - inter_vertical_indices = torch.cat([ - inter_vertical_indices, - torch.arange( - 0, - k_states_inter.size(0), - max(1, - k_states_inter.size(0) / 5), - dtype=torch.int32, - device=intra_vertical_indices.device) - ]) - inter_slash_indices = ( - (prev_chunk_end_pos - chunk_len + - (qend - qbegin) - 1) - - slash_topk[slash_topk < (prev_chunk_end_pos - - chunk_len + - (qend - qbegin))]) - if inter_slash_indices.nelement() == 0: - inter_slash_indices = torch.cat([ - inter_slash_indices, - torch.arange( - 0, - k_states_inter.size(0), - max(1, - k_states_inter.size(0) / 5), - dtype=torch.int32, - device=intra_vertical_indices.device) - ]) - # fill buffer - v_count = inter_vertical_indices.nelement() - s_count = inter_slash_indices.nelement() - inter_vertical_size_buffer[head_i] = v_count - inter_slash_sizes_buffer[head_i] = s_count - inter_vertical_buffer[head_i, :v_count].copy_( - inter_vertical_indices) - inter_slash_buffer[head_i, :s_count].copy_( - inter_slash_indices) - else: - intra_vertical_indices, intra_slash_indices = None, None - succ_vertical_indices, succ_slash_indices = None, None - inter_vertical_indices, inter_slash_indices = None, None - - if sparse_attn_enabled: - flash_result = self._do_flash_attn( - q_states_intra, - k_states_intra, - v_states_intra, - softmax_scale=softmax_scale, - causal=True, - stage="intra", - vertical_indices=vertical_buffer, - slash_indices=slash_buffer, - vertical_indices_count=vertical_size_buffer, - slash_indices_count=slash_sizes_buffer, - mergehead_softmax_scale=softmax_scale, - sparse_attn_enabled=sparse_attn_enabled) - else: - flash_result = self._do_flash_attn( - q_states_intra, - k_states_intra, - v_states_intra, - softmax_scale=softmax_scale, - causal=True, - stage="intra", - vertical_indices=intra_vertical_indices, - slash_indices=intra_slash_indices, - sparse_attn_enabled=sparse_attn_enabled) - flash_per_chunk.append(flash_result) - - if prev_chunk_end_pos - chunk_len >= 0: - if sparse_attn_enabled: - flash_result = self._do_flash_attn( - q_states_succ, - k_states_succ, - v_states_succ, - softmax_scale=softmax_scale, - causal=False, - stage="succ", - vertical_indices=succ_vertical_buffer, - slash_indices=succ_slash_buffer, - vertical_indices_count=succ_vertical_size_buffer, - slash_indices_count=succ_slash_sizes_buffer, - mergehead_softmax_scale=softmax_scale, - sparse_attn_enabled=sparse_attn_enabled) - else: - flash_result = self._do_flash_attn( - q_states_succ, - k_states_succ, - v_states_succ, - softmax_scale=softmax_scale, - causal=False, - stage="succ", - vertical_indices=succ_vertical_indices, - slash_indices=succ_slash_indices, - sparse_attn_enabled=sparse_attn_enabled) - flash_per_chunk.append(flash_result) - - if prev_chunk_end_pos - chunk_len * 2 >= 0: - if sparse_attn_enabled: - flash_result = self._do_flash_attn( - q_states_inter, - k_states_inter, - v_states_inter, - softmax_scale=softmax_scale, - causal=False, - stage="inter", - vertical_indices=inter_vertical_buffer, - slash_indices=inter_slash_buffer, - vertical_indices_count=inter_vertical_size_buffer, - slash_indices_count=inter_slash_sizes_buffer, - mergehead_softmax_scale=softmax_scale, - sparse_attn_enabled=sparse_attn_enabled) - else: - flash_result = self._do_flash_attn( - q_states_inter, - k_states_inter, - v_states_inter, - softmax_scale=softmax_scale, - causal=False, - stage="inter", - vertical_indices=inter_vertical_indices, - slash_indices=inter_slash_indices, - sparse_attn_enabled=sparse_attn_enabled) - flash_per_chunk.append(flash_result) - - flash_results.append(flash_per_chunk) - begin = end - - attn_output = self._merge_attn_outputs(flash_results) - del flash_results - return attn_output - - def _do_flash_attn( - self, - query_states: torch.Tensor, - key_states: torch.Tensor, - value_states: torch.Tensor, - softmax_scale: float, - causal: bool = True, - max_seqlen_k: Optional[int] = None, - stage: str = "intra", - vertical_indices: Optional[torch.Tensor] = None, - slash_indices: Optional[torch.Tensor] = None, - vertical_indices_count: Optional[torch.Tensor] = None, - slash_indices_count: Optional[torch.Tensor] = None, - mergehead_softmax_scale: Optional[float] = None, - sparse_attn_enabled: Optional[bool] = False, - ): - if max_seqlen_k is None: - max_seqlen_k = key_states.shape[0] - - q_len = query_states.shape[0] - q_heads = query_states.shape[1] - h_dim = query_states.shape[-1] - - if sparse_attn_enabled: - assert slash_indices is not None - if stage == "intra": - assert causal - else: - assert not causal - - query_states = query_states.unsqueeze(0).transpose(1, 2) - key_states = key_states.unsqueeze(0).transpose(1, 2) - value_states = value_states.unsqueeze(0).transpose(1, 2) - - q = query_states - k = key_states - v = value_states - - if (vertical_indices_count is not None and \ - slash_indices_count is not None): - assert mergehead_softmax_scale is not None - - res, s_lse = _vertical_slash_sparse_attention( - q, - k, - v, - vertical_indices, - slash_indices, - mergehead_softmax_scale, - causal=causal, - stage=stage, - vertical_indices_count=vertical_indices_count, - slash_indices_count=slash_indices_count) - res = res.view(q_heads, q_len, - h_dim).transpose(0, 1) # (qlen,nhead,h_dim) - s_lse = s_lse.view( - q_heads, q_len, - 1).squeeze(-1).unsqueeze(0).float() # (1, nhead,qlen) - else: - res, s_lse = _vertical_slash_sparse_attention(q, - k, - v, - vertical_indices, - slash_indices, - softmax_scale, - causal=causal, - stage=stage) - res = res.view(q_len, q_heads, h_dim) - s_lse = s_lse.view(q_len, q_heads, 1).transpose(0, 2).float() - return res, s_lse - - output, softmax_lse = flash_attn_varlen_func( - q=query_states, - k=key_states, - v=value_states, - softmax_scale=softmax_scale, - cu_seqlens_q=torch.tensor([0, query_states.shape[0]], - dtype=torch.int32, - device=query_states.device), - max_seqlen_q=query_states.shape[0], - cu_seqlens_k=torch.tensor([0, max_seqlen_k], - dtype=torch.int32, - device=query_states.device), - max_seqlen_k=max_seqlen_k, - causal=causal, - return_softmax_lse=True, - ) - softmax_lse = softmax_lse.view(q_len, q_heads, 1).transpose(0, - 2).float() - return output, softmax_lse - - def _merge_attn_outputs( - self, - flash_results: List[List[Tuple[torch.Tensor, torch.Tensor]]], - return_lse: Optional[bool] = False, - ) -> torch.Tensor: - attn_outputs_all = [] - logits_all = [] - - for flash_per_chunk in flash_results: - if len(flash_per_chunk) == 1: - attn_outputs_all.append(flash_per_chunk[0][0]) - if return_lse: - logits_all.append(flash_per_chunk[0][1]) - continue - - attn_outputs = torch.stack([ - flash_attn_output[0] for flash_attn_output in flash_per_chunk - ]) - logits = torch.stack([ - flash_attn_output[1] for flash_attn_output in flash_per_chunk - ]) - logits = logits.to(torch.float32) - - if return_lse: - max_val = torch.max(logits, dim=0).values - diff = torch.abs(logits[0] - logits[1]) - log_sum_exp = max_val + torch.log1p(torch.exp(-diff)) - logits_all.append(log_sum_exp) - - max_logits = torch.max(logits, dim=0).values - stable_logits = logits - max_logits.unsqueeze(0) - lse_s = torch.exp(stable_logits).detach() - lse_sum = torch.sum(lse_s, dim=0) - lse_s /= lse_sum - attn_outputs *= lse_s.unsqueeze(-1).transpose(2, 3).squeeze(1) - attn_outputs_all.append(attn_outputs.sum(dim=0)) - - if return_lse: - return (torch.cat(attn_outputs_all, - dim=0), torch.cat(logits_all, dim=-1)) - else: - return torch.cat(attn_outputs_all, dim=0) - - def _dual_chunk_flash_attn_decoding( - self, - query: torch.Tensor, - query_succ: torch.Tensor, - query_inter: torch.Tensor, - key_cache: torch.Tensor, - value_cache: torch.Tensor, - block_table: torch.Tensor, - cache_seqlens: torch.Tensor, - softmax_scale: float, - causal: bool, - alibi_slopes: Optional[torch.Tensor], - chunk_size: int, - local_size: int, - original_max_position_embeddings: int, - decode_meta: DualChunkFlashAttentionMetadata, - ): - if not causal: - raise ValueError( - "Dual Chunk Attention does not support causal=False") - - block_size = value_cache.shape[1] - chunk_len = chunk_size - local_size - if chunk_len % block_size != 0: - raise ValueError("chunk_len must be divisible by block_size.") - if original_max_position_embeddings > 0: - assert decode_meta.scaling_factor is not None - scaling_factor = decode_meta.scaling_factor - query = (query * scaling_factor.view(-1, 1, 1, 1)).to( - query.dtype - ) # possible for numerical issue, need to fused in the kernel - query_succ = (query_succ * scaling_factor.view(-1, 1, 1, 1)).to( - query.dtype) - query_inter = (query_inter * scaling_factor.view(-1, 1, 1, 1)).to( - query.dtype) - outputs_list = [] - softmax_lses_list = [] - - # intra-attention - intra_output, intra_softmax_lse = ( - self._dual_chunk_flash_attn_decoding_with_exp_sums( - query, - key_cache, - value_cache, - decode_meta.block_tables_intra, - decode_meta.seq_lens_intra, - softmax_scale, - alibi_slopes, - causal=False, - )) - outputs_list.append(intra_output) - softmax_lses_list.append(intra_softmax_lse) - - # succ-attention - if decode_meta.max_seq_len_succ: - succ_output, succ_softmax_lse = ( - self._dual_chunk_flash_attn_decoding_with_exp_sums( - query_succ, - key_cache, - value_cache, - decode_meta.block_tables_succ, - decode_meta.seq_lens_succ, - softmax_scale, - alibi_slopes, - causal=False, - )) - outputs_list.append(succ_output) - softmax_lses_list.append(succ_softmax_lse) - - # inter-attention - if decode_meta.max_seq_len_inter: - inter_output, inter_softmax_lse = ( - self._dual_chunk_flash_attn_decoding_with_exp_sums( - query_inter, - key_cache, - value_cache, - block_table[:, :decode_meta.max_seq_len_inter], - decode_meta.seq_lens_inter, - softmax_scale, - alibi_slopes, - causal=False, - )) - outputs_list.append(inter_output) - softmax_lses_list.append(inter_softmax_lse) - outputs = torch.stack(outputs_list, dim=0) - del outputs_list - softmax_lses = torch.stack(softmax_lses_list, dim=0).to(torch.float32) - del softmax_lses_list - max_logits = torch.max(softmax_lses, dim=0).values - stable_logits = softmax_lses - max_logits.unsqueeze(0) - lse_s = torch.exp(stable_logits).detach() - lse_sum = torch.sum(lse_s, dim=0) - lse_s /= lse_sum - outputs *= lse_s.unsqueeze(-1).transpose(2, 3) - return outputs.sum(0) - - def _dual_chunk_flash_attn_decoding_with_exp_sums( - self, - query: torch.Tensor, - key_cache: torch.Tensor, - value_cache: torch.Tensor, - block_table: torch.Tensor, - cache_seqlens: torch.Tensor, - softmax_scale: float, - alibi_slopes: Optional[torch.Tensor], - causal: bool, - ): - out, softmax_lse = flash_attn_with_kvcache( - q=query, - k_cache=key_cache, - v_cache=value_cache, - block_table=block_table, - cache_seqlens=cache_seqlens, - softmax_scale=softmax_scale, - alibi_slopes=alibi_slopes, - causal=causal, - return_softmax_lse=True, - ) - mask = (cache_seqlens == 0) - out[mask] = 0 - softmax_lse[mask] = -float("inf") - return out, softmax_lse - - -def _vertical_slash_sparse_attention( - query: torch.Tensor, # [BATCH, N_HEADS, N_CTX, D_HEAD] - key: torch.Tensor, # [BATCH, N_HEADS, N_KV_CTX, D_HEAD] - value: torch.Tensor, # [BATCH, N_HEADS, N_KV_CTX, D_HEAD] - v_idx: torch.Tensor, # [BATCH, N_HEADS, NNZ_V] - s_idx: torch.Tensor, # [BATCH, N_HEADS, NNZ_S] - softmax_scale: float, - causal: bool = True, - stage: str = "intra", - block_size_M: int = 64, - block_size_N: int = 64, - vertical_indices_count: torch.Tensor = None, # [N_HEADS,] - slash_indices_count: torch.Tensor = None, -): - if stage == "intra": - assert causal - else: - assert not causal - - batch_size, num_heads, context_size, head_dim = query.shape - _, _, kv_seq_len, _ = key.shape - - if head_dim not in [16, 32, 64, 128, 256, 512]: - target_dim = 2**math.ceil(math.log2(head_dim)) - head_dim - query = F.pad(query, [0, target_dim, 0, 0, 0, 0, 0, 0]) - key = F.pad(key, [0, target_dim, 0, 0, 0, 0, 0, 0]) - value = F.pad(value, [0, target_dim, 0, 0, 0, 0, 0, 0]) - - v_idx = v_idx.to(torch.int32).reshape( - (batch_size, num_heads, -1)).sort(dim=-1, descending=False)[0] - s_idx = s_idx.to(torch.int32).reshape( - (batch_size, num_heads, -1)).sort(dim=-1, descending=True)[0] - q_seqlens = torch.tensor([context_size], - dtype=torch.int32, - device=query.device) - kv_seqlens = torch.tensor([kv_seq_len], - dtype=torch.int32, - device=query.device) - - if vertical_indices_count is not None and slash_indices_count is not None: - ( - block_count, - block_offset, - column_count, - column_index, - ) = ops.convert_vertical_slash_indexes_mergehead( - q_seqlens, kv_seqlens, v_idx, s_idx, vertical_indices_count, - slash_indices_count, context_size, block_size_M, block_size_N, - causal) - else: - ( - block_count, - block_offset, - column_count, - column_index, - ) = ops.convert_vertical_slash_indexes(q_seqlens, kv_seqlens, v_idx, - s_idx, context_size, - block_size_M, block_size_N, - causal) - - q = query.transpose(1, 2).contiguous() - k = key.transpose(1, 2).contiguous() - v = value.transpose(1, 2).contiguous() - out, lse = sparse_attn_func( - q, - k, - v, - block_count, - block_offset, - column_count, - column_index, - causal=causal, - softmax_scale=softmax_scale, - return_softmax_lse=True, - ) - out = out.transpose(1, 2).contiguous() - softmax_lse = lse.reshape(*lse.shape, 1) - return (out[..., :context_size, :head_dim], - softmax_lse[..., :context_size, :]) - - -def _sum_all_diagonal_matrix(mat: torch.tensor): - h, n, m = mat.shape - # Zero matrix used for padding - zero_mat = torch.zeros((h, n, n), device=mat.device) - # pads the matrix on left and right - mat_padded = torch.cat((zero_mat, mat, zero_mat), -1) - # Change the strides - mat_strided = mat_padded.as_strided((1, n, n + m), - (n * (2 * n + m), 2 * n + m + 1, 1)) - # Sums the resulting matrix's columns - sum_diags = torch.sum(mat_strided, 1) - return sum_diags[:, 1:] # drop left bottom corner - - -def _get_block(block_table: torch.Tensor, block_size: int, begin: int, - end: int): - begin_block = begin // block_size - end_block = (end - 1) // block_size + 1 - return block_table[begin_block:end_block] diff --git a/vllm/attention/backends/flash_attn.py b/vllm/attention/backends/flash_attn.py deleted file mode 100755 index edb3afb4aa07..000000000000 --- a/vllm/attention/backends/flash_attn.py +++ /dev/null @@ -1,929 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project -"""Attention layer with FlashAttention.""" -from collections import defaultdict -from dataclasses import dataclass -from itertools import accumulate -from typing import Dict, List, Optional, Tuple, Type - -import torch - -from vllm import _custom_ops as ops -# yapf conflicts with isort for this block -# yapf: disable -from vllm.attention.backends.abstract import (AttentionBackend, AttentionImpl, - AttentionLayer, - AttentionMetadata, - AttentionMetadataBuilder, - AttentionType, - is_quantized_kv_cache) -# yapf: enable -from vllm.attention.backends.utils import ( - PAD_SLOT_ID, CommonAttentionState, compute_slot_mapping, - compute_slot_mapping_start_idx, get_num_prefill_decode_query_kv_tokens, - get_seq_len_block_table_args, is_all_cross_attn_metadata_set, - is_all_encoder_attn_metadata_set, is_block_tables_empty) -from vllm.attention.utils.fa_utils import (flash_attn_supports_fp8, - get_flash_attn_version) -from vllm.logger import init_logger -from vllm.multimodal import MultiModalPlaceholderMap -from vllm.utils import async_tensor_h2d, make_tensor_with_pad -from vllm.vllm_flash_attn import (flash_attn_varlen_func, - flash_attn_with_kvcache) - -logger = init_logger(__name__) - - -class FlashAttentionBackend(AttentionBackend): - - accept_output_buffer: bool = True - - @staticmethod - def get_supported_head_sizes() -> List[int]: - return [32, 64, 96, 128, 160, 192, 224, 256] - - @staticmethod - def get_name() -> str: - return "FLASH_ATTN" - - @staticmethod - def get_impl_cls() -> Type["FlashAttentionImpl"]: - return FlashAttentionImpl - - @staticmethod - def get_metadata_cls() -> Type["AttentionMetadata"]: - return FlashAttentionMetadata - - @staticmethod - def get_builder_cls() -> Type["FlashAttentionMetadataBuilder"]: - return FlashAttentionMetadataBuilder - - @staticmethod - def get_state_cls() -> Type["CommonAttentionState"]: - return CommonAttentionState - - @staticmethod - def get_kv_cache_shape( - num_blocks: int, - block_size: int, - num_kv_heads: int, - head_size: int, - ) -> Tuple[int, ...]: - if block_size % 16 != 0: - raise ValueError("Block size must be a multiple of 16.") - return (2, num_blocks, block_size, num_kv_heads, head_size) - - @staticmethod - def swap_blocks( - src_kv_cache: torch.Tensor, - dst_kv_cache: torch.Tensor, - src_to_dst: torch.Tensor, - ) -> None: - src_key_cache = src_kv_cache[0] - dst_key_cache = dst_kv_cache[0] - ops.swap_blocks(src_key_cache, dst_key_cache, src_to_dst) - src_value_cache = src_kv_cache[1] - dst_value_cache = dst_kv_cache[1] - ops.swap_blocks(src_value_cache, dst_value_cache, src_to_dst) - - @staticmethod - def copy_blocks( - kv_caches: List[torch.Tensor], - src_to_dists: torch.Tensor, - ) -> None: - key_caches = [kv_cache[0] for kv_cache in kv_caches] - value_caches = [kv_cache[1] for kv_cache in kv_caches] - - ops.copy_blocks(key_caches, value_caches, src_to_dists) - - -@dataclass -class FlashAttentionMetadata(AttentionMetadata): - """Metadata for FlashAttentionBackend. - - NOTE: Any python object stored here is not updated when it is - cuda-graph replayed. If you have values that need to be changed - dynamically, it should be stored in tensor. The tensor has to be - updated from `CUDAGraphRunner.forward` API. - """ - # (batch_size,). The sequence length per sequence. Sequence length means - # the computed tokens + new tokens None if it is a decoding. - seq_lens: Optional[List[int]] - # seq_lens stored as a tensor. - seq_lens_tensor: Optional[torch.Tensor] - - # NOTE(sang): Definition of context_len, query_len, and seq_len. - # |---------- N-1 iteration --------| - # |---------------- N iteration ---------------------| - # |- tokenA -|......................|-- newTokens ---| - # |---------- context_len ----------| - # |-------------------- seq_len ---------------------| - # |-- query_len ---| - - # Maximum sequence length among prefill batch. 0 if there are decoding - # requests only. - max_prefill_seq_len: int - # Maximum sequence length among decode batch. 0 if there are prefill - # requests only. - max_decode_seq_len: int - # (batch_size,) A tensor of context lengths (tokens that are computed - # so far). - context_lens_tensor: Optional[torch.Tensor] - - # (batch_size, max_blocks_per_seq). - # Block addresses per sequence. (Seq id -> list of physical block) - # E.g., [0, 1, 2] means tokens are stored in 0th, 1st, and 2nd blocks - # in the kv cache. Each block can contain up to block_size tokens. - # 2nd dimensions are padded up to max_blocks_per_seq if it is cuda-graph - # captured. - block_tables: Optional[torch.Tensor] - - # Whether or not if cuda graph is enabled. - # Cuda-graph is currently enabled for decoding only. - # TODO(woosuk): Move `use_cuda_graph` out since it's unrelated to attention. - - use_cuda_graph: bool - - # Maximum query length in the batch. - max_query_len: Optional[int] = None - - # Max number of query tokens among request in the batch. - max_decode_query_len: Optional[int] = None - - # (batch_size + 1,). The cumulative subquery lengths of the sequences in - # the batch, used to index into subquery. E.g., if the subquery length - # is [4, 6], it is [0, 4, 10]. - query_start_loc: Optional[torch.Tensor] = None - # (batch_size + 1,). The cumulative sequence lengths of the sequences in - # the batch, used to index into sequence. E.g., if the sequence length is - # [4, 6], it is [0, 4, 10]. - seq_start_loc: Optional[torch.Tensor] = None - - _cached_prefill_metadata: Optional["FlashAttentionMetadata"] = None - _cached_decode_metadata: Optional["FlashAttentionMetadata"] = None - - # Begin encoder attn & enc/dec cross-attn fields... - - # Encoder sequence lengths representation - encoder_seq_lens: Optional[List[int]] = None - encoder_seq_lens_tensor: Optional[torch.Tensor] = None - # (batch_size + 1,). The cumulative sequence lengths of the sequences in - # the batch, used to index into sequence. E.g., if the sequence length is - # [4, 6], it is [0, 4, 10]. - encoder_seq_start_loc: Optional[torch.Tensor] = None - # Maximum sequence length among encoder sequences - max_encoder_seq_len: Optional[int] = None - # Number of tokens input to encoder - num_encoder_tokens: Optional[int] = None - - # Cross-attention memory-mapping data structures: slot mapping - # and block tables - cross_slot_mapping: Optional[torch.Tensor] = None - cross_block_tables: Optional[torch.Tensor] = None - - @property - def is_all_encoder_attn_metadata_set(self): - ''' - All attention metadata required for encoder attention is set. - ''' - return is_all_encoder_attn_metadata_set(self) - - @property - def is_all_cross_attn_metadata_set(self): - ''' - All attention metadata required for enc/dec cross-attention is set. - - Superset of encoder attention required metadata. - ''' - return is_all_cross_attn_metadata_set(self) - - @property - def prefill_metadata(self) -> Optional["FlashAttentionMetadata"]: - if self.num_prefills == 0: - return None - - if self._cached_prefill_metadata is not None: - return self._cached_prefill_metadata - - assert ((self.seq_lens is not None) - or (self.encoder_seq_lens is not None)) - assert ((self.seq_lens_tensor is not None) - or (self.encoder_seq_lens_tensor is not None)) - - # Compute some attn_metadata fields which default to None - query_start_loc = (None if self.query_start_loc is None else - self.query_start_loc[:self.num_prefills + 1]) - slot_mapping = (None if self.slot_mapping is None else - self.slot_mapping[:self.num_prefill_tokens]) - seq_lens = (None if self.seq_lens is None else - self.seq_lens[:self.num_prefills]) - seq_lens_tensor = (None if self.seq_lens_tensor is None else - self.seq_lens_tensor[:self.num_prefills]) - seq_start_loc = (None if self.seq_start_loc is None else - self.seq_start_loc[:self.num_prefills + 1]) - context_lens_tensor = (None if self.context_lens_tensor is None else - self.context_lens_tensor[:self.num_prefills]) - block_tables = (None if self.block_tables is None else - self.block_tables[:self.num_prefills]) - - self._cached_prefill_metadata = FlashAttentionMetadata( - num_prefills=self.num_prefills, - num_prefill_tokens=self.num_prefill_tokens, - num_decode_tokens=0, - slot_mapping=slot_mapping, - multi_modal_placeholder_index_maps=self. - multi_modal_placeholder_index_maps, - enable_kv_scales_calculation=self.enable_kv_scales_calculation, - seq_lens=seq_lens, - seq_lens_tensor=seq_lens_tensor, - max_query_len=self.max_query_len, - max_prefill_seq_len=self.max_prefill_seq_len, - max_decode_query_len=0, - max_decode_seq_len=0, - query_start_loc=query_start_loc, - seq_start_loc=seq_start_loc, - context_lens_tensor=context_lens_tensor, - block_tables=block_tables, - use_cuda_graph=False, - # Begin encoder & cross attn fields below... - encoder_seq_lens=self.encoder_seq_lens, - encoder_seq_lens_tensor=self.encoder_seq_lens_tensor, - encoder_seq_start_loc=self.encoder_seq_start_loc, - max_encoder_seq_len=self.max_encoder_seq_len, - cross_slot_mapping=self.cross_slot_mapping, - cross_block_tables=self.cross_block_tables) - return self._cached_prefill_metadata - - @property - def decode_metadata(self) -> Optional["FlashAttentionMetadata"]: - if self.num_decode_tokens == 0: - return None - - if self._cached_decode_metadata is not None: - return self._cached_decode_metadata - assert ((self.seq_lens_tensor is not None) - or (self.encoder_seq_lens_tensor is not None)) - - # Compute some attn_metadata fields which default to None - slot_mapping = (None if self.slot_mapping is None else - self.slot_mapping[self.num_prefill_tokens:]) - seq_lens_tensor = (None if self.seq_lens_tensor is None else - self.seq_lens_tensor[self.num_prefills:]) - block_tables = (None if self.block_tables is None else - self.block_tables[self.num_prefills:]) - - self._cached_decode_metadata = FlashAttentionMetadata( - num_prefills=0, - num_prefill_tokens=0, - num_decode_tokens=self.num_decode_tokens, - slot_mapping=slot_mapping, - multi_modal_placeholder_index_maps=None, - enable_kv_scales_calculation=True, - seq_lens=None, - seq_lens_tensor=seq_lens_tensor, - max_decode_query_len=self.max_decode_query_len, - max_query_len=self.max_query_len, - max_prefill_seq_len=0, - max_decode_seq_len=self.max_decode_seq_len, - # Batch may be composed of prefill|decodes, adjust query start - # indices to refer to the start of decodes. E.g. - # in tokens:[3 prefills|6 decodes], query_start_loc=[3,9] => [0,6]. - query_start_loc=(self.query_start_loc[self.num_prefills:] - - self.query_start_loc[self.num_prefills]) - if self.query_start_loc is not None else None, - seq_start_loc=self.seq_start_loc[self.num_prefills:] - if self.seq_start_loc is not None else None, - context_lens_tensor=None, - block_tables=block_tables, - use_cuda_graph=self.use_cuda_graph, - # Begin encoder & cross attn fields below... - encoder_seq_lens=self.encoder_seq_lens, - encoder_seq_lens_tensor=self.encoder_seq_lens_tensor, - encoder_seq_start_loc=self.encoder_seq_start_loc, - max_encoder_seq_len=self.max_encoder_seq_len, - cross_slot_mapping=self.cross_slot_mapping, - cross_block_tables=self.cross_block_tables) - return self._cached_decode_metadata - - -class FlashAttentionMetadataBuilder( - AttentionMetadataBuilder[FlashAttentionMetadata]): - - def __init__(self, input_builder): - self.input_builder = input_builder - self.runner = input_builder.runner - self.sliding_window = input_builder.sliding_window - self.block_size = input_builder.block_size - - def prepare(self): - self.slot_mapping: List[int] = [] - self.prefill_seq_lens: List[int] = [] - self.context_lens: List[int] = [] - self.block_tables: List[List[int]] = [] - self.curr_seq_lens: List[int] = [] - self.multimodal_placeholder_maps: Dict[ - str, - MultiModalPlaceholderMap] = defaultdict(MultiModalPlaceholderMap) - self.num_prefills = 0 - self.num_prefill_tokens = 0 - self.num_decode_tokens = 0 - self.has_prefix_cache_hit = False - - def _add_seq_group(self, inter_data, chunked_prefill_enabled: bool, - prefix_cache_hit: bool): - """Add a sequence group to the metadata. Specifically update/append - 1. context length. - 2. block table. - 3. slot mapping. - """ - is_prompt = inter_data.is_prompt - block_tables = inter_data.block_tables - - for (seq_id, token_len, seq_len, curr_seq_len, query_len, context_len, - curr_sliding_window_block) in zip( - inter_data.seq_ids, [len(t) for t in inter_data.input_tokens], - inter_data.orig_seq_lens, inter_data.seq_lens, - inter_data.query_lens, inter_data.context_lens, - inter_data.curr_sliding_window_blocks): - self.context_lens.append(context_len) - - if is_prompt: - mm_maps = inter_data.multi_modal_placeholder_maps - if mm_maps: - for modality, placeholders in mm_maps.items(): - self.multimodal_placeholder_maps[modality].extend( - placeholders) - - self.num_prefills += 1 - self.num_prefill_tokens += token_len - self.prefill_seq_lens.append(seq_len) - else: - self.num_decode_tokens += query_len - self.curr_seq_lens.append(curr_seq_len) - - # Compute block table. - # TODO(sang): Combine chunked prefill and prefix caching by - # only allowing multiple of block_size chunk size. - # NOTE: This only works for oooooooxxx style attention. - block_table = [] - if prefix_cache_hit: - # NOTE(woosuk): For flash-attn, the block table should - # include the entries for the incoming prefill tokens. - block_table = block_tables[seq_id] - elif ((chunked_prefill_enabled or not is_prompt) - and block_tables is not None): - if curr_sliding_window_block == 0: - block_table = block_tables[seq_id] - else: - block_table = block_tables[seq_id][ - -curr_sliding_window_block:] - self.block_tables.append(block_table) - - # Compute slot mapping. - is_profile_run = is_block_tables_empty(block_tables) - start_idx = compute_slot_mapping_start_idx(is_prompt, query_len, - context_len, - self.sliding_window) - compute_slot_mapping(is_profile_run, self.slot_mapping, seq_id, - seq_len, context_len, start_idx, - self.block_size, inter_data.block_tables) - - def _get_graph_runner_block_tables( - self, num_seqs: int, - block_tables: List[List[int]]) -> torch.Tensor: - # The shape of graph_block_tables is - # [max batch size, max context len // block size]. - max_batch_size, max_blocks = self.runner.graph_block_tables.shape - assert max_batch_size >= num_seqs - - graph_block_tables = self.runner.graph_block_tables[:num_seqs] - for i, block_table in enumerate(block_tables): - if block_table: - num_blocks = len(block_table) - if num_blocks <= max_blocks: - graph_block_tables[i, :num_blocks] = block_table - else: - # It may be possible to have more blocks allocated due - # to lookahead slots of multi-step, however, they are - # not used anyway, so can be safely ignored. - graph_block_tables[ - i, :max_blocks] = block_table[:max_blocks] - - return torch.from_numpy(graph_block_tables).to( - device=self.runner.device, non_blocking=True) - - def build(self, seq_lens: List[int], query_lens: List[int], - cuda_graph_pad_size: int, batch_size: int): - """Build attention metadata with on-device tensors. - - Args: - seq_lens: The maybe padded sequence lengths of the input sequences. - query_lens: The query lengths of the input sequences. - cuda_graph_pad_size: The padding size for cuda graph. - -1 if cuda graph is not used. - batch_size: The maybe padded batch size. - """ - prefix_cache_hit = any([ - inter_data.prefix_cache_hit - for inter_data in self.input_builder.inter_data_list - ]) - for inter_data in self.input_builder.inter_data_list: - self._add_seq_group(inter_data, - self.input_builder.chunked_prefill_enabled, - prefix_cache_hit) - - device = self.runner.device - use_captured_graph = cuda_graph_pad_size != -1 - - max_query_len = max(query_lens) - decode_query_lens = query_lens[self.num_prefills:] - if len(decode_query_lens) > 0: - max_decode_query_len = max(decode_query_lens) - else: - max_decode_query_len = 1 - max_prefill_seq_len = max(self.prefill_seq_lens, default=0) - max_decode_seq_len = max(self.curr_seq_lens, default=0) - num_decode_tokens = self.num_decode_tokens - query_start_loc = list(accumulate(query_lens, initial=0)) - seq_start_loc = list(accumulate(seq_lens, initial=0)) - - num_seqs = len(seq_lens) - if use_captured_graph: - self.slot_mapping.extend([PAD_SLOT_ID] * cuda_graph_pad_size) - self.block_tables.extend([] * cuda_graph_pad_size) - num_decode_tokens = batch_size - self.num_prefill_tokens - block_tables = self._get_graph_runner_block_tables( - num_seqs, self.block_tables) - else: - block_tables = make_tensor_with_pad( - self.block_tables, - pad=0, - dtype=torch.int, - device=device, - ) - assert max_query_len > 0, ("query_lens: {}".format(query_lens)) - - assert device is not None - context_lens_tensor = async_tensor_h2d(self.context_lens, torch.int, - device, self.runner.pin_memory) - seq_lens_tensor = async_tensor_h2d(seq_lens, torch.int, device, - self.runner.pin_memory) - slot_mapping_tensor = async_tensor_h2d(self.slot_mapping, torch.long, - device, self.runner.pin_memory) - query_start_loc_tensor = async_tensor_h2d(query_start_loc, torch.int32, - device, - self.runner.pin_memory) - seq_start_loc_tensor = async_tensor_h2d(seq_start_loc, torch.int32, - device, self.runner.pin_memory) - placeholder_index_maps = { - modality: placeholder_map.index_map() - for modality, placeholder_map in - self.multimodal_placeholder_maps.items() - } - - return FlashAttentionMetadata( - num_prefills=self.num_prefills, - slot_mapping=slot_mapping_tensor, - num_prefill_tokens=self.num_prefill_tokens, - num_decode_tokens=num_decode_tokens, - seq_lens=seq_lens, - multi_modal_placeholder_index_maps=placeholder_index_maps, - enable_kv_scales_calculation=True, - seq_lens_tensor=seq_lens_tensor, - max_query_len=max_query_len, - max_decode_query_len=max_decode_query_len, - max_prefill_seq_len=max_prefill_seq_len, - max_decode_seq_len=max_decode_seq_len, - query_start_loc=query_start_loc_tensor, - seq_start_loc=seq_start_loc_tensor, - context_lens_tensor=context_lens_tensor, - block_tables=block_tables, - use_cuda_graph=use_captured_graph, - ) - - -class FlashAttentionImpl(AttentionImpl): - """ - If the input tensors contain prompt tokens, the layout is as follows: - |<--------------- num_prefill_tokens ----------------->| - |<--prefill_0-->|<--prefill_1-->|...|<--prefill_N-1--->| - - Otherwise, the layout is as follows: - |<----------------- num_decode_tokens ------------------>| - |<--decode_0-->|..........|<--decode_M-1-->|<--padding-->| - - Generation tokens can contain padding when cuda-graph is used. - Currently, prompt tokens don't contain any padding. - - The prompts might have different lengths, while the generation tokens - always have length 1. - - If chunked prefill is enabled, prefill tokens and decode tokens can be - batched together in a flattened 1D query. - - |<----- num_prefill_tokens ---->|<------- num_decode_tokens --------->| - |<-prefill_0->|...|<-prefill_N-1->|<--decode_0-->|...|<--decode_M-1-->| - - Currently, cuda graph is disabled for chunked prefill, meaning there's no - padding between prefill and decode tokens. - """ - - def __init__( - self, - num_heads: int, - head_size: int, - scale: float, - num_kv_heads: int, - alibi_slopes: Optional[List[float]], - sliding_window: Optional[int], - kv_cache_dtype: str, - logits_soft_cap: Optional[float] = None, - attn_type: str = AttentionType.DECODER, - kv_sharing_target_layer_name: Optional[str] = None, - use_irope: bool = False, - ) -> None: - if kv_sharing_target_layer_name is not None: - raise NotImplementedError("KV sharing is not supported in V0 " - "FLASH_ATTN backend.") - if use_irope: - logger.warning( - "Using irope in V0 is not supported yet, it will fall back " - "to global attention for long context.") - self.num_heads = num_heads - self.head_size = head_size - self.scale = float(scale) - self.num_kv_heads = num_kv_heads - if alibi_slopes is not None: - alibi_slopes = torch.tensor(alibi_slopes, dtype=torch.float32) - self.alibi_slopes = alibi_slopes - self.sliding_window = ((sliding_window - 1, - 0) if sliding_window is not None else (-1, -1)) - self.kv_cache_dtype = kv_cache_dtype - self.vllm_flash_attn_version = get_flash_attn_version( - requires_alibi=self.alibi_slopes is not None) - if is_quantized_kv_cache(self.kv_cache_dtype) and ( - not self.kv_cache_dtype.startswith("fp8") - or not flash_attn_supports_fp8()): - raise NotImplementedError( - f"FlashAttention does not support {self.kv_cache_dtype} " - "kv-cache on this device " - f"(FA supports fp8 = {flash_attn_supports_fp8()}).") - if logits_soft_cap is None: - # In flash-attn, setting logits_soft_cap as 0 means no soft cap. - logits_soft_cap = 0 - self.logits_soft_cap = logits_soft_cap - - self.num_queries_per_kv = self.num_heads // self.num_kv_heads - - support_head_sizes = FlashAttentionBackend.get_supported_head_sizes() - if head_size not in support_head_sizes: - raise ValueError( - f"Head size {head_size} is not supported by FlashAttention. " - f"Supported head sizes are: {support_head_sizes}.") - self.attn_type = attn_type - - def forward( - self, - layer: AttentionLayer, - query: torch.Tensor, - key: torch.Tensor, - value: torch.Tensor, - kv_cache: torch.Tensor, - attn_metadata: FlashAttentionMetadata, - output: Optional[torch.Tensor] = None, - output_scale: Optional[torch.Tensor] = None, - output_block_scale: Optional[torch.Tensor] = None, - ) -> torch.Tensor: - """Forward pass with FlashAttention. - - Args: - query: shape = [num_tokens, num_heads, head_size] - key: shape = [num_tokens, num_kv_heads, head_size] - value: shape = [num_tokens, num_kv_heads, head_size] - output: shape = [num_tokens, num_heads, head_size] - kv_cache: KV cache tensor with shape - [2, num_blocks, block_size, num_kv_heads, head_size]. - NOTE: kv_cache will be an empty tensor with shape [0] - for profiling run. - attn_metadata: Metadata for attention. - NOTE: It in-place updates the output tensor. - NOTE: FP8 quantization, flash-attn expect the size of - {q,k,v}_descale to be (num_sequences, num_kv_heads). - We use torch's .expand() to avoid duplicating values - """ - assert output is not None, "Output tensor must be provided." - - if output_scale is not None or output_block_scale is not None: - raise NotImplementedError( - "fused output quantization is not yet supported" - " for FlashAttentionImpl") - - # NOTE(woosuk): FlashAttention2 does not support FP8 KV cache. - if not flash_attn_supports_fp8() or output.dtype != torch.bfloat16: - assert ( - layer._k_scale_float == 1.0 and layer._v_scale_float == 1.0), ( - "key/v_scale is only supported in FlashAttention 3 with " - "base dtype bfloat16") - - attn_type = self.attn_type - if (attn_type == AttentionType.ENCODER - and (not attn_metadata.is_all_encoder_attn_metadata_set)): - raise AttributeError("Encoder attention requires setting " - "encoder metadata attributes.") - elif (attn_type == AttentionType.ENCODER_DECODER - and (not attn_metadata.is_all_cross_attn_metadata_set)): - raise AttributeError("Encoder/decoder cross-attention " - "requires setting cross-attention " - "metadata attributes.") - - kv_cache_dtype: str = self.kv_cache_dtype - softmax_scale: float = self.scale - window_size = self.sliding_window - alibi_slopes: Optional[torch.Tensor] = self.alibi_slopes - logits_soft_cap: Optional[float] = self.logits_soft_cap - fp8_attention = kv_cache_dtype.startswith("fp8") - - if fp8_attention and not flash_attn_supports_fp8(): - raise NotImplementedError( - "FlashAttention does not support FP8 kv-cache on this device.") - - if kv_cache.numel() > 0: - key_cache = kv_cache[0] - value_cache = kv_cache[1] - # We skip updating the KV cache under two conditions: - # a. When the Attention Type is ENCODER. In this phase, we compute - # only the encoder attention without updating the cache. - # b. When both Key and Value are None. This occurs during - # cross-attention computation in the decoding phase, where the - # KV cache is already populated with the cross-attention - # tensor. Thus, we skip cache updates during this time. - if (attn_type != AttentionType.ENCODER) and (key is not None) and ( - value is not None): - if attn_type == AttentionType.ENCODER_DECODER: - # Update cross-attention KV cache (prefill-only) - updated_slot_mapping = attn_metadata.cross_slot_mapping - else: - # Update self-attention KV cache (prefill/decode) - updated_slot_mapping = attn_metadata.slot_mapping - - # Reshape the input keys and values and store them in the cache. - # If kv_cache is not provided, the new key and value tensors are - # not cached. This happens during the initial memory - # profiling run. - torch.ops._C_cache_ops.reshape_and_cache_flash( - key, - value, - kv_cache[0], - kv_cache[1], - updated_slot_mapping.flatten(), # type: ignore[union-attr] - kv_cache_dtype, - layer._k_scale, - layer._v_scale, - ) - - if fp8_attention: - kv_cache = kv_cache.view(torch.float8_e4m3fn) - key_cache = key_cache.view(torch.float8_e4m3fn) - value_cache = value_cache.view(torch.float8_e4m3fn) - - if fp8_attention: - num_tokens, num_heads, head_size = query.shape - query, _ = ops.scaled_fp8_quant( - query.reshape( - (num_tokens, num_heads * head_size)).contiguous(), - layer._q_scale) - query = query.reshape((num_tokens, num_heads, head_size)) - - (num_prefill_query_tokens, num_prefill_kv_tokens, - num_decode_query_tokens) = \ - get_num_prefill_decode_query_kv_tokens(attn_metadata, attn_type) - decode_query = query[num_prefill_query_tokens:] - decode_output = output[num_prefill_query_tokens:] - # QKV for prefill. - query = query[:num_prefill_query_tokens] - prefill_output = output[:num_prefill_query_tokens] - assert query.shape[0] == num_prefill_query_tokens - assert decode_query.shape[0] == num_decode_query_tokens - - if prefill_meta := attn_metadata.prefill_metadata: - # Prompt run. - if (kv_cache.numel() == 0 or prefill_meta.block_tables is None - or prefill_meta.block_tables.numel() == 0): - # normal attention - # When block_tables are not filled, it means q and k are the - # prompt, and they have the same length. - q_seq_start_loc, q_seq_len, k_seq_start_loc, k_seq_len = \ - _get_query_key_seq_metadata(prefill_meta, True, attn_type) - - key = key[:num_prefill_kv_tokens] - value = value[:num_prefill_kv_tokens] - - if fp8_attention: - num_kv_tokens, num_kv_heads, head_size = key.shape - - key, _ = ops.scaled_fp8_quant( - key.reshape((num_kv_tokens, - num_kv_heads * head_size)).contiguous(), - layer._k_scale) - key = key.reshape((num_kv_tokens, num_kv_heads, head_size)) - - value, _ = ops.scaled_fp8_quant( - value.reshape((num_kv_tokens, - num_kv_heads * head_size)).contiguous(), - layer._v_scale) - value = value.reshape( - (num_kv_tokens, num_kv_heads, head_size)) - - descale_shape = (q_seq_start_loc.shape[0] - 1, key.shape[1]) - flash_attn_varlen_func( - q=query, - k=key, - v=value, - cu_seqlens_q=q_seq_start_loc, - cu_seqlens_k=k_seq_start_loc, - max_seqlen_q=q_seq_len, - max_seqlen_k=k_seq_len, - softmax_scale=softmax_scale, - causal=_get_causal_option(attn_type), - window_size=window_size, - alibi_slopes=alibi_slopes, - softcap=logits_soft_cap, - out=prefill_output, - fa_version=self.vllm_flash_attn_version, - q_descale=layer._q_scale.expand(descale_shape), - k_descale=layer._k_scale.expand(descale_shape), - v_descale=layer._v_scale.expand(descale_shape), - ) - else: - # prefix-enabled attention - assert attn_type == AttentionType.DECODER, ( - "Only decoder-only models support prefix caching") - assert prefill_meta.seq_lens is not None - assert prefill_meta.query_start_loc is not None - max_seq_len = max(prefill_meta.seq_lens) - descale_shape = (prefill_meta.query_start_loc.shape[0] - 1, - key.shape[1]) - flash_attn_varlen_func( # noqa - q=query, - k=key_cache, - v=value_cache, - cu_seqlens_q=prefill_meta.query_start_loc, - max_seqlen_q=prefill_meta.max_query_len, - seqused_k=prefill_meta.seq_lens_tensor, - max_seqlen_k=max_seq_len, - softmax_scale=softmax_scale, - causal=True, - window_size=window_size, - alibi_slopes=alibi_slopes, - block_table=prefill_meta.block_tables, - softcap=logits_soft_cap, - out=prefill_output, - fa_version=self.vllm_flash_attn_version, - q_descale=layer._q_scale.expand(descale_shape), - k_descale=layer._k_scale.expand(descale_shape), - v_descale=layer._v_scale.expand(descale_shape), - ) - - if decode_meta := attn_metadata.decode_metadata: - # Decoding run. - # Use flash_attn_varlen_func kernel for speculative decoding - # because different queries might have different lengths. - - assert decode_meta.max_decode_query_len is not None - # use only for actual varlen decoding - if decode_meta.max_decode_query_len > 1: - assert attn_type == AttentionType.DECODER, ( - "Only decoder-only models support max_decode_query_len > 1" - ) - assert decode_meta.query_start_loc is not None - descale_shape = (decode_meta.query_start_loc.shape[0] - 1, - key.shape[1]) - flash_attn_varlen_func( - q=decode_query, - k=key_cache, - v=value_cache, - cu_seqlens_q=decode_meta.query_start_loc, - max_seqlen_q=decode_meta.max_decode_query_len, - seqused_k=decode_meta.seq_lens_tensor, - max_seqlen_k=decode_meta.max_decode_seq_len, - softmax_scale=softmax_scale, - causal=True, - window_size=window_size, - alibi_slopes=alibi_slopes, - softcap=logits_soft_cap, - block_table=decode_meta.block_tables, - out=decode_output, - fa_version=self.vllm_flash_attn_version, - q_descale=layer._q_scale.expand(descale_shape), - k_descale=layer._k_scale.expand(descale_shape), - v_descale=layer._v_scale.expand(descale_shape), - ) - else: - # Use flash_attn_with_kvcache for normal decoding. - ( - seq_lens_arg, - _, - block_tables_arg, - ) = get_seq_len_block_table_args(decode_meta, False, attn_type) - descale_shape = (seq_lens_arg.shape[0], key_cache.shape[-2]) - flash_attn_with_kvcache( - q=decode_query.unsqueeze(1), - k_cache=key_cache, - v_cache=value_cache, - block_table=block_tables_arg, - cache_seqlens=seq_lens_arg, - softmax_scale=softmax_scale, - causal=True, - window_size=window_size, - alibi_slopes=alibi_slopes, - softcap=logits_soft_cap, - out=decode_output.unsqueeze(1), - fa_version=self.vllm_flash_attn_version, - q_descale=layer._q_scale.expand(descale_shape), - k_descale=layer._k_scale.expand(descale_shape), - v_descale=layer._v_scale.expand(descale_shape), - ) - return output - - -def _get_query_key_seq_metadata( - attn_metadata: FlashAttentionMetadata, - is_prompt: bool, - attn_type: str, -) -> tuple: - """ - Returns sequence metadata for key and query based on the specified - attention type and whether input is a prompt. - - This function computes the starting locations and maximum sequence lengths - for key and query sequences for different attention types. - - Args: - attn_metadata: The attention metadata object - is_prompt (bool): A flag indicating if the input is a prompt - attn_type (AttentionType): The type of attention being used. - - Returns: - tuple: A tuple containing four integers: - - Starting location for the query sequence. - - Maximum sequence length for the query sequence. - - Starting location for the key sequence. - - Maximum sequence length for the key sequence. - - Raises: - AttributeError: If an invalid attention type is provided. - """ - if attn_type == AttentionType.DECODER: - # Decoder self-attention - # Choose max_seq_len based on whether we are in prompt_run - if is_prompt: - max_seq_len = attn_metadata.max_prefill_seq_len - else: - max_seq_len = attn_metadata.max_decode_seq_len - return (attn_metadata.seq_start_loc, max_seq_len, - attn_metadata.seq_start_loc, max_seq_len) - - elif attn_type == AttentionType.ENCODER_DECODER: - # This is cross attention between the where the key - # is the precomputed encoder attention and query - # is the input sequence. - # Choose query max length based on whether it is prompt - # or not. - if is_prompt: - max_seq_len = attn_metadata.max_prefill_seq_len - else: - max_seq_len = attn_metadata.max_decode_seq_len - return (attn_metadata.seq_start_loc, max_seq_len, - attn_metadata.encoder_seq_start_loc, - attn_metadata.max_encoder_seq_len) - elif attn_type == AttentionType.ENCODER: - # For encoder attention both the query and the key are same i.e. the - # encoder sequence. - return (attn_metadata.encoder_seq_start_loc, - attn_metadata.max_encoder_seq_len, - attn_metadata.encoder_seq_start_loc, - attn_metadata.max_encoder_seq_len) - elif attn_type == AttentionType.ENCODER_ONLY: - assert is_prompt, "Should not have decode for encoder only model." - return (attn_metadata.seq_start_loc, attn_metadata.max_prefill_seq_len, - attn_metadata.seq_start_loc, attn_metadata.max_prefill_seq_len) - else: - raise AttributeError(f"Invalid attention type {str(attn_type)}") - - -def _get_causal_option(attn_type: str) -> bool: - """ - Determine whether the given attention type is suitable for causal - attention mechanisms. - - Args: - attn_type (AttentionType): The type of attention being evaluated - - Returns: - bool: Returns `True` if the attention type is suitable for causal - attention (i.e., not encoder, encoder-only, or encoder-decoder), - otherwise returns `False`. - """ - return not (attn_type == AttentionType.ENCODER - or attn_type == AttentionType.ENCODER_ONLY - or attn_type == AttentionType.ENCODER_DECODER) diff --git a/vllm/attention/backends/flashmla.py b/vllm/attention/backends/flashmla.py deleted file mode 100644 index aeaa0ab631cf..000000000000 --- a/vllm/attention/backends/flashmla.py +++ /dev/null @@ -1,227 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project - -from contextlib import contextmanager -from dataclasses import dataclass -from typing import List, Optional, Tuple, Type - -import torch - -from vllm.attention.backends.abstract import (AttentionType, - is_quantized_kv_cache) -from vllm.attention.backends.mla.common import (MLACommonBackend, - MLACommonImpl, - MLACommonMetadata, - MLACommonMetadataBuilder, - MLACommonState) -from vllm.attention.ops.flashmla import (flash_mla_with_kvcache, - get_mla_metadata, - is_flashmla_supported) - - -class FlashMLABackend(MLACommonBackend): - - @staticmethod - def get_name() -> str: - return "FLASHMLA" - - @staticmethod - def get_impl_cls() -> Type["FlashMLAImpl"]: - return FlashMLAImpl - - @staticmethod - def get_metadata_cls() -> Type["FlashMLAMetadata"]: - return FlashMLAMetadata - - @staticmethod - def get_builder_cls() -> Type["FlashMLAMetadataBuilder"]: - return FlashMLAMetadataBuilder - - @staticmethod - def get_state_cls() -> Type["FlashMLAState"]: - return FlashMLAState - - -@dataclass -class FlashMLAMetadata(MLACommonMetadata): - decode_tile_scheduler_metadata: Optional[Tuple[torch.Tensor, - torch.Tensor]] = None - decode_num_splits: Optional[torch.Tensor] = None - - @property - def decode_metadata(self): - decode_metadata = super().decode_metadata - # TODO: cache assignment? - if decode_metadata is not None: - decode_metadata.decode_tile_scheduler_metadata=\ - self.decode_tile_scheduler_metadata - decode_metadata.decode_num_splits=\ - self.decode_num_splits - return decode_metadata - - -class FlashMLAMetadataBuilder(MLACommonMetadataBuilder[FlashMLAMetadata]): - - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - - self.num_q_heads = self.runner.model_config.get_num_attention_heads( - self.runner.parallel_config) - - def build(self, seq_lens: List[int], query_lens: List[int], - cuda_graph_pad_size: int, batch_size: int): - m = super().build(seq_lens, query_lens, cuda_graph_pad_size, - batch_size) - - if m.num_decode_tokens > 0: - m.decode_tile_scheduler_metadata, m.decode_num_splits = \ - get_mla_metadata( - m.seq_lens_tensor[m.num_prefills:], - self.num_q_heads, - 1, # MQA for the decode path - ) - - return m - - -class FlashMLAState(MLACommonState[FlashMLAMetadata]): - - def __init__(self, *args, **kwds): - super().__init__(*args, **kwds) - - self.num_q_heads = self.runner.model_config.get_num_attention_heads( - self.runner.parallel_config) - - @contextmanager - def graph_capture(self, max_batch_size: int): - # Run a dummy `get_mla_metadata` so we can get the right shapes - self._graph_decoder_tile_scheduler_metadata, \ - self._graph_decode_num_splits = get_mla_metadata( - torch.ones( - max_batch_size, dtype=torch.int32, device=self.runner.device), - self.num_q_heads, - 1, # MQA for the decode path - ) - - with super().graph_capture(max_batch_size): - yield - - del self._graph_decoder_tile_scheduler_metadata - del self._graph_decode_num_splits - - def graph_capture_get_metadata_for_batch( - self, batch_size: int, is_encoder_decoder_model: bool = False): - metadata = super().graph_capture_get_metadata_for_batch( - batch_size, is_encoder_decoder_model) - assert metadata.num_decode_tokens > 0 - - decoder_tile_scheduler_metadata, decode_num_splits = get_mla_metadata( - self._graph_seq_lens[:batch_size], - self.num_q_heads, - 1, # MQA for the decode path - ) - - self._graph_decoder_tile_scheduler_metadata.copy_( - decoder_tile_scheduler_metadata) - self._graph_decode_num_splits[:batch_size + 1].copy_(decode_num_splits) - - metadata.decode_tile_scheduler_metadata=\ - self._graph_decoder_tile_scheduler_metadata - metadata.decode_num_splits=\ - self._graph_decode_num_splits[:batch_size + 1] - - return metadata - - def get_graph_input_buffers(self, - attn_metadata, - is_encoder_decoder_model: bool = False): - input_buffers = super().get_graph_input_buffers( - attn_metadata, is_encoder_decoder_model) - input_buffers["decode_tile_scheduler_metadata"] = \ - attn_metadata.decode_metadata.decode_tile_scheduler_metadata - input_buffers["decode_num_splits"] = \ - attn_metadata.decode_metadata.decode_num_splits - - return input_buffers - - def prepare_graph_input_buffers(self, - input_buffers, - attn_metadata, - is_encoder_decoder_model: bool = False): - super().prepare_graph_input_buffers(input_buffers, attn_metadata, - is_encoder_decoder_model) - - input_buffers["decode_tile_scheduler_metadata"].copy_( - attn_metadata.decode_metadata.decode_tile_scheduler_metadata) - input_buffers["decode_num_splits"].copy_( - attn_metadata.decode_metadata.decode_num_splits) - - -class FlashMLAImpl(MLACommonImpl[FlashMLAMetadata]): - - def __init__( - self, - num_heads: int, - head_size: int, - scale: float, - num_kv_heads: int, - alibi_slopes: Optional[List[float]], - sliding_window: Optional[int], - kv_cache_dtype: str, - logits_soft_cap: Optional[float], - attn_type: str, - kv_sharing_target_layer_name: Optional[str] = None, - # MLA Specific Arguments - **mla_args) -> None: - super().__init__(num_heads, head_size, scale, num_kv_heads, - alibi_slopes, sliding_window, kv_cache_dtype, - logits_soft_cap, attn_type, - kv_sharing_target_layer_name, **mla_args) - - is_supported, reason = is_flashmla_supported() - assert is_supported, reason - - unsupported_features = [alibi_slopes, sliding_window, logits_soft_cap] - if any(unsupported_features): - raise NotImplementedError( - "FlashMLAImpl does not support one of the following: " - "alibi_slopes, sliding_window, logits_soft_cap") - - if attn_type != AttentionType.DECODER: - raise NotImplementedError("Encoder self-attention and " - "encoder/decoder cross-attention " - "are not implemented for " - "FlashMLAImpl") - - if is_quantized_kv_cache(self.kv_cache_dtype): - raise NotImplementedError( - "FlashMLA with FP8 KV cache not yet supported") - - def _forward_decode( - self, - q_nope: torch.Tensor, - q_pe: torch.Tensor, - kv_c_and_k_pe_cache: torch.Tensor, - attn_metadata: FlashMLAMetadata, - ) -> torch.Tensor: - assert kv_c_and_k_pe_cache.numel() > 0 - - decode_meta = attn_metadata.decode_metadata - assert decode_meta is not None - - q = torch.cat([q_nope, q_pe], dim=-1)\ - .unsqueeze(1) # Add seqlen dim of 1 (decode) - - o, _ = flash_mla_with_kvcache( - q=q, - k_cache=kv_c_and_k_pe_cache.unsqueeze(-2), # Add head dim of 1 - block_table=decode_meta.block_tables, - cache_seqlens=decode_meta.seq_lens_tensor, - head_dim_v=self.kv_lora_rank, - tile_scheduler_metadata=decode_meta.decode_tile_scheduler_metadata, - num_splits=decode_meta.decode_num_splits, - softmax_scale=self.scale, - causal=True, - ) - - return self._v_up_proj(o) diff --git a/vllm/attention/backends/mla/__init__.py b/vllm/attention/backends/mla/__init__.py deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/vllm/attention/backends/mla/common.py b/vllm/attention/backends/mla/common.py deleted file mode 100644 index 826b63e1ccda..000000000000 --- a/vllm/attention/backends/mla/common.py +++ /dev/null @@ -1,1305 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project -""" -# MLA Common Components - -This file implements common components for MLA implementations. - -First we define: - -Sq as Q sequence length -Skv as KV sequence length - -MLA has two possible ways of computing, a data-movement friendly approach and a -compute friendly approach, we generally want to use the compute friendly -approach for "prefill" (i.e. the ratio Sq / Skv is "small", is near 1) -and the data-movement friendly approach for "decode" (i.e. the ratio -Sq / Skv is "large"). - -NOTE what we deem small and large is currently determined by if its labelled -prefill or decode by the scheduler, but this is something we should probably -tune. - -Main reference: DeepseekV2 paper, and FlashInfer Implementation -(https://arxiv.org/abs/2405.04434 and https://github.com/flashinfer-ai/flashinfer/pull/551). - -Deepseek's MLA attention works the following way: -* Use a single latent vector to represent the per-token entry of the KV cache. -* For decode (i.e. the memory friendly approach) the attention "simulates" a -multi-head attention, while the compute is similar to multi-query attention. - -Below is example of both paths assuming batchsize = 1 - -## More Extent Definitions: - -C Context length, `Skv - Sq` -H hidden size -N number of attention heads -Lq latent dimension for Q 1536 in DSV3 -Lkv latent dimension for K/V 512 in DSV3 -P nope dimension, no rope. 128 in DSV3 -R rope dimension, goes through rope. 64 in DSV3 -V V head dim. 128 in DSV3 - -## Vector/Matrix Definitions - -h_t hidden states (input to attention) shape [Sq, H] -q_c latent/compressed Q shape [Sq, Lq] -q_nope uncompressed Q (no-rope) shape [Sq, N, P] -q_pe uncompressed Q (rope) shape [Sq, N, R] -kv_c latent/compressed KV shape [Skv, Lkv] -k_pe decoupled k position embeddings shape [Skv, R] -new_kv_c new kv_c from current iter shape [Sq, Lkv] -new_k_pe new k_pe from current iter shape [Sq, R] -cache_kv_c cached k_c from previous iters shape [C, Lkv] -cache_k_pe cached k_pe from previous iters shape [C, R] -W_DQ project h_t to q_c shape [H, Lq] -W_UQ project q_c to q_nope shape [Lq, N * P] -W_QR project q_c to q_pe shape [Lq, N * R] -W_DKV project h_t to kv_c shape [H, Lkv] -W_UK project kv_c to k_nope shape [Lkv, N, P] -W_KR project h_t to k_pe shape [H, R] -W_UV project kv_c to v shape [Lkv, N, V] -W_O project v to h_t shape [N * V, H] - - -## Compute Friendly Approach (i.e. "_forward_prefill"): - -q_c = h_t @ W_DQ -q_nope = (q_c @ W_UQ).view(Sq, N, P) -q_pe = RoPE(q_c @ W_QR).view(Sq, N, R) -new_kv_c = h_t @ W_DKV -new_k_pe = RoPE(h_t @ W_KR) -kv_c = torch.cat([new_kv_c, cache_kv_c], dim=0) -k_pe = torch.cat([new_k_pe, cache_k_pe], dim=0) -k_nope = (kv_c @ W_UK.view(Lkv, N * P)).view(Skv, N, P) -v = (kv_c @ W_UV.view(Lkv, N * V)).view(Skv, N, V) - -// MHA with QK headdim = P + R -// V headdim = V -// spda_o shape [Sq, N, V] -spda_o = scaled_dot_product_attention( - torch.cat([q_nope, q_pe], dim=-1), - torch.cat([k_nope, k_pe.unsqueeze(1).expand(-1, N, -1)], dim=-1), - v -) -return spda_o @ W_O - -NOTE: in the actual code, - `kv_b_proj` is [W_UK; W_UV] concatenated per head - `q_b_proj` is [W_UQ; W_QR] concatenated per head - `out_proj` is W_O - - -## Data-Movement Friendly Approach (i.e. "_forward_decode"): - -Runtime -q_c = h_t @ W_DQ -q_nope = (q_c @ W_UQ).view(-1, N, P) -ql_nope = einsum("snh,lnh->snl", q, W_UK) -q_pe = RoPE(q_c @ W_QR).view(Sq, N, R) -new_kv_c = h_t @ W_DKV -new_k_pe = RoPE(h_t @ W_KR) -kv_c = torch.cat([new_kv_c, cache_kv_c], dim=0) -k_pe = torch.cat([new_k_pe, cache_k_pe], dim=0) - -// MQA with QK headdim = Lkv + R -// V headdim = Lkv -// spda_o shape [Sq, N, Lkv] -// NOTE: this is less compute-friendly since Lkv > P -// but is more data-movement friendly since its MQA vs MHA -spda_o = scaled_dot_product_attention( - torch.cat([ql_nope, q_pe], dim=-1), - torch.cat([kv_c, k_pe], dim=-1), - kv_c -) - -o = einsum("snl,lnv->snv", spda_o.reshape(-1, N, Lkv), W_UV) -return o.view(-1, N * V) @ self.num_heads @ W_O - - -## Chunked Prefill - -For chunked prefill we want to use the compute friendly algorithm. We are -assuming sufficiently large Sq / Skv ratio, in the future may want to switch to -the data-movement friendly approach if the chunk (i.e. `Sq`) is small. - -However, the compute-friendly approach can potentially run out of memory if Skv -is large due to: `k_nope = (kv_c @ W_UK).view(Skv, N, P)` - -To mitigate this, we chunk the computation of attention with respect to the -current context (i.e. `cache_kv_c` and `cache_k_pe`) so that we can used a -fixed workspace size. - -The chunked prefill approach is as follows: - -MCC Max chunk of context to process per iter, computed dynamically, - used to bound the memory usage - -q_c = h_t @ W_DQ -q_nope = (q_c @ W_UQ).view(Sq, N, P) -q_pe = RoPE(q_c @ W_QR).view(Sq, N, R) -new_kv_c = h_t @ W_DKV -new_k_pe = RoPE(h_t @ W_KR) -new_k_nope = (new_kv_c @ W_UK.view(Lkv, N * P)).view(Sq, N, P) -new_v = (new_kv_c @ W_UV.view(Lkv, N * V)).view(Sq, N, V) - -// MHA between queries and new KV -// with QK headdim = P + R -// V headdim = V -// curr_o shape [Sq, N, V] -// curr_lse shape [N, Sq], this is just order FA returns -curr_o, curr_lse = scaled_dot_product_attention( - torch.cat([q_nope, q_pe], dim=-1), - torch.cat([new_k_nope, new_k_pe.unsqueeze(1).expand(-1, N, -1)], dim=-1), - new_v, - casual=True, - return_softmax_lse=True -) - -// Compute attention with the already existing context -for chunk_idx in range(cdiv(C, MCC)): - chunk_start = chunk_idx * MCC - chunk_end = min(chunk_start + MCC, C) - Sc = chunk_end - chunk_start - cache_kv_c_chunk = cache_kv_c[chunk_start:chunk_end] - cache_k_pe_chunk = cache_k_pe[chunk_start:chunk_end] - cache_k_nope_chunk = (cache_kv_c_chunk @ W_UK).view(-1, N, P) - cache_v_chunk = (cache_kv_c_chunk @ W_UV).view(-1, N, V) - - chunk_o, chunk_lse = scaled_dot_product_attention( - torch.cat([q_nope, q_pe], dim=-1), - torch.cat([cache_k_nope_chunk, - cache_k_pe_chunk.unsqueeze(1).expand(-1, N, -1)], - dim=-1), - cache_v_chunk, - casual=False, - return_softmax_lse=True - ) - - curr_o, curr_lse = merge_attn_states( - suffix_output=curr_o, - suffix_lse=curr_lse, - prefix_output=chunk_o, - prefix_lse=chunk_lse, - ) - -return curr_o @ W_O -""" - -import functools -from abc import abstractmethod -from collections import defaultdict -from contextlib import contextmanager -from dataclasses import dataclass -from itertools import accumulate -from typing import Any, Dict, Generic, List, Optional, Tuple, Type, TypeVar - -import torch - -from vllm import _custom_ops as ops -from vllm import envs -from vllm.attention.backends.abstract import (AttentionBackend, AttentionLayer, - AttentionMetadata, - AttentionMetadataBuilder, - AttentionState, MLAAttentionImpl) -from vllm.attention.backends.utils import (PAD_SLOT_ID, compute_slot_mapping, - compute_slot_mapping_start_idx, - is_block_tables_empty) -from vllm.attention.ops.merge_attn_states import merge_attn_states -from vllm.attention.utils.fa_utils import get_flash_attn_version -from vllm.model_executor.layers.linear import (ColumnParallelLinear, - LinearBase, - UnquantizedLinearMethod) -from vllm.multimodal import MultiModalPlaceholderMap -from vllm.platforms import current_platform -from vllm.triton_utils import HAS_TRITON -from vllm.utils import async_tensor_h2d, cdiv, make_tensor_with_pad, round_down - -if HAS_TRITON: - from vllm.attention.ops.triton_flash_attention import triton_attention -else: - triton_attention = None - -try: - from vllm.vllm_flash_attn import flash_attn_varlen_func - is_vllm_fa = True -except ImportError: - is_vllm_fa = False - try: - # For rocm use upstream flash attention - from flash_attn import flash_attn_varlen_func - except ImportError: - flash_attn_varlen_func = None - -is_hip = current_platform.is_rocm() - - -class MLACommonBackend(AttentionBackend): - - @staticmethod - def get_name() -> str: - return "TRITON_MLA" - - @staticmethod - def get_metadata_cls() -> Type["AttentionMetadata"]: - return MLACommonMetadata - - @staticmethod - def get_builder_cls() -> Type["MLACommonMetadataBuilder"]: - return MLACommonMetadataBuilder - - @staticmethod - def get_state_cls() -> Type["MLACommonState"]: - return MLACommonState - - @staticmethod - def get_kv_cache_shape( - num_blocks: int, - block_size: int, - num_kv_heads: int, # assumed to be 1 for MLA - head_size: int, - ) -> Tuple[int, ...]: - return (num_blocks, block_size, head_size) - - @staticmethod - def swap_blocks( - src_kv_cache: torch.Tensor, - dst_kv_cache: torch.Tensor, - src_to_dst: torch.Tensor, - ) -> None: - ops.swap_blocks(src_kv_cache, dst_kv_cache, src_to_dst) - - @staticmethod - def copy_blocks( - kv_caches: List[torch.Tensor], - src_to_dists: torch.Tensor, - ) -> None: - ops.copy_blocks_mla(kv_caches, src_to_dists) - - @staticmethod - def get_supported_head_sizes() -> List[int]: - return [576] - - -T = TypeVar("T", bound="MLACommonMetadata") - - -class MLACommonState(AttentionState, Generic[T]): - - def __init__(self, runner): - self.runner = runner - self._is_graph_capturing = False - - scheduler_config = runner.scheduler_config - self.model_config = runner.model_config - cache_config = runner.cache_config - - self.chunked_prefill_enabled = scheduler_config.chunked_prefill_enabled - self.enable_prefix_caching = cache_config.enable_prefix_caching - - if self.chunked_prefill_enabled or self.enable_prefix_caching: - self.context_chunk_workspace_size = min( - # Max sure there is enough for 8 full length request or at least - # 4 pages of cache per request - max( - 8 * self.model_config.max_model_len, 4 * - scheduler_config.max_num_seqs * cache_config.block_size), - # For long-context models try not to over-allocate limiting - # kv-cache space, limiting it to 64k tokens, - # which would result in the workspace being: - # 2*(576)*(64*1024) = 144mb - # (assuming 576 MLA head dim, and fp16) - # which would result in up-projected context being - # 2*(192*128)*(64*1024) = 3gb - # (assuming 192 QK head dim, 128 heads, and fp16) - 128 * 1024) - assert self.context_chunk_workspace_size >= \ - scheduler_config.max_num_seqs * cache_config.block_size - - @contextmanager - def graph_capture(self, max_batch_size: int): - self._is_graph_capturing = True - - self._graph_slot_mapping = torch.full((max_batch_size, ), - PAD_SLOT_ID, - dtype=torch.long, - device=self.runner.device) - self._graph_seq_lens = torch.ones(max_batch_size, - dtype=torch.int32, - device=self.runner.device) - self._graph_block_tables = torch.from_numpy( - self.runner.graph_block_tables).to(device=self.runner.device) - - self._positions = torch.zeros((max_batch_size, ), - dtype=torch.long, - device=self.runner.device) - - yield - - self._is_graph_capturing = False - del self._graph_slot_mapping - del self._graph_seq_lens - del self._graph_block_tables - del self._positions - - def graph_clone(self, batch_size: int): - assert self._is_graph_capturing - return self.__class__(self.runner) - - def graph_capture_get_metadata_for_batch( - self, - batch_size: int, - is_encoder_decoder_model: bool = False) -> T: - assert self._is_graph_capturing - - attn_metadata = self.runner.attn_backend.make_metadata( - multi_modal_placeholder_index_maps=None, - enable_kv_scales_calculation=False, - use_cuda_graph=True, - num_prefills=0, - num_prefill_tokens=0, - num_decode_tokens=batch_size, - slot_mapping=self._graph_slot_mapping[:batch_size], - seq_lens=None, - seq_lens_tensor=self._graph_seq_lens[:batch_size], - max_query_len=1, - max_decode_query_len=1, - max_prefill_seq_len=0, - max_decode_seq_len=self.runner.max_seq_len_to_capture, - query_start_loc=None, - seq_start_loc=None, - context_lens_tensor=None, - block_tables=self._graph_block_tables[:batch_size], - head_dim=self.runner.model_config.get_head_size()) - - if is_encoder_decoder_model: - raise NotImplementedError( - "MLACommonState does not support encoder/decoder yet") - - return attn_metadata - - def get_graph_input_buffers(self, - attn_metadata, - is_encoder_decoder_model: bool = False): - input_buffers = { - "slot_mapping": attn_metadata.slot_mapping, - "seq_lens_tensor": attn_metadata.decode_metadata.seq_lens_tensor, - "block_tables": attn_metadata.decode_metadata.block_tables, - } - if is_encoder_decoder_model: - raise NotImplementedError( - "MLACommonState does not support encoder/decoder yet") - - return input_buffers - - def prepare_graph_input_buffers(self, - input_buffers, - attn_metadata, - is_encoder_decoder_model: bool = False): - input_buffers["seq_lens_tensor"].copy_( - attn_metadata.decode_metadata.seq_lens_tensor, non_blocking=True) - input_buffers["block_tables"].copy_( - attn_metadata.decode_metadata.block_tables, non_blocking=True) - if is_encoder_decoder_model: - raise NotImplementedError( - "TritonMLAState does not support encoder/decoder yet") - - def begin_forward(self, model_input): - if self.chunked_prefill_enabled or self.enable_prefix_caching: - if not hasattr(self, "context_chunk_workspace"): - # not self.runner.device does not return the correct device - # for this process, (init_device sets the correct device but - # only on the Worker). The only way Ive figured out to get the - # correct device is to allocate the workspace on the first call - # to begin_forward and use the device of the input tokens - assert model_input.input_tokens is not None - self.context_chunk_workspace = torch.empty( - (self.context_chunk_workspace_size, - self.model_config.get_head_size()), - dtype=self.model_config.dtype, - device=model_input.input_tokens.device, - ) - - model_input.attn_metadata.context_chunk_workspace = \ - self.context_chunk_workspace - - -@dataclass -class MLACommonMetadata(AttentionMetadata): - """Metadata for MLACommon. - - NOTE: Please read the comment at the top of the file before trying to - understand this class - - NOTE: Any python object stored here is not updated when it is - cuda-graph replayed. If you have values that need to be changed - dynamically, it should be stored in tensor. The tensor has to be - updated from `CUDAGraphRunner.forward` API. - """ - # Whether or not if cuda graph is enabled. - # Cuda-graph is currently enabled for decoding only. - # TODO(woosuk): Move `use_cuda_graph` out since it's unrelated to attention. - use_cuda_graph: bool - - # NOTE(sang): Definition of context_len, query_len, and seq_len. - # |---------- N-1 iteration --------| - # |---------------- N iteration ---------------------| - # |- tokenA -|......................|-- newTokens ---| - # |---------- context_len ----------| - # |-------------------- seq_len ---------------------| - # |-- query_len ---| - - # (batch_size,). The sequence length per sequence. Sequence length means - # the computed tokens + new tokens None if it is a decoding. - seq_lens: Optional[List[int]] - # seq_lens stored as a tensor. - seq_lens_tensor: Optional[torch.Tensor] - - # Maximum sequence length among prefill batch. 0 if there are decoding - # requests only. - max_prefill_seq_len: int - # Maximum sequence length among decode batch. 0 if there are prefill - # requests only. - max_decode_seq_len: int - # (batch_size,) A tensor of context lengths (tokens that are computed - # so far). - context_lens_tensor: Optional[torch.Tensor] - - # (batch_size, max_blocks_per_seq). - # Block addresses per sequence. (Seq id -> list of physical block) - # E.g., [0, 1, 2] means tokens are stored in 0th, 1st, and 2nd blocks - # in the kv cache. Each block can contain up to block_size tokens. - # 2nd dimensions are padded up to max_blocks_per_seq if it is cuda-graph - # captured. - block_tables: Optional[torch.Tensor] - - # Maximum query length in the batch. - max_query_len: Optional[int] = None - - # Max number of query tokens among request in the batch. - max_decode_query_len: Optional[int] = None - - # (batch_size + 1,). The cumulative subquery lengths of the sequences in - # the batch, used to index into subquery. E.g., if the subquery length - # is [4, 6], it is [0, 4, 10]. - query_start_loc: Optional[torch.Tensor] = None - # (batch_size + 1,). The cumulative sequence lengths of the sequences in - # the batch, used to index into sequence. E.g., if the sequence length is - # [4, 6], it is [0, 4, 10]. - seq_start_loc: Optional[torch.Tensor] = None - - _cached_prefill_metadata: Optional[Any] = None - _cached_decode_metadata: Optional[Any] = None - - num_prefill_tokens: int - - # The dimension of the attention heads - head_dim: Optional[int] = None - - # Used when chunked prefill is enabled to simulate worst case workspace - # allocations, hopefully to avoid going OOM - is_profile_run: bool = False - - # New for MLA (compared to FlashAttention) - # For chunked prefill - context_chunk_cu_seq_lens: Optional[torch.Tensor] = None - context_chunk_starts: Optional[torch.Tensor] = None - context_chunk_seq_tot: Optional[List[int]] = None - context_chunk_max_seq_lens: Optional[List[int]] = None - # Set by MLAAttentionState in `begin_forward` so it doesn't get broadcasted - context_chunk_workspace: Optional[torch.Tensor] = None - - def __post_init__(self): - supported_head_sizes = MLACommonBackend.get_supported_head_sizes() - if self.head_dim is not None and self.head_dim \ - not in supported_head_sizes: - raise ValueError( - f"Only {supported_head_sizes} are supported for head_dim,", - f" received {self.head_dim}.") - - @property - def prefill_metadata(self): - if self.num_prefills == 0: - return None - - if self._cached_prefill_metadata is not None: - return self._cached_prefill_metadata - - assert self.seq_lens is not None - assert self.seq_lens_tensor is not None - - # Compute some attn_metadata fields which default to None - query_start_loc = (None if self.query_start_loc is None else - self.query_start_loc[:self.num_prefills + 1]) - slot_mapping = (None if self.slot_mapping is None else - self.slot_mapping[:self.num_prefill_tokens]) - seq_lens = (None if self.seq_lens is None else - self.seq_lens[:self.num_prefills]) - seq_lens_tensor = (None if self.seq_lens_tensor is None else - self.seq_lens_tensor[:self.num_prefills]) - seq_start_loc = (None if self.seq_start_loc is None else - self.seq_start_loc[:self.num_prefills + 1]) - context_lens_tensor = (None if self.context_lens_tensor is None else - self.context_lens_tensor[:self.num_prefills]) - block_tables = (None if self.block_tables is None else - self.block_tables[:self.num_prefills]) - - self._cached_prefill_metadata = self.__class__( - # Required by ModelRunner - use_cuda_graph=False, # Not Attention Related - # Required by Attention Metadata - num_prefills=self.num_prefills, - num_prefill_tokens=self.num_prefill_tokens, - num_decode_tokens=0, - slot_mapping=slot_mapping, - # Required by Attention Metadata (not used) - multi_modal_placeholder_index_maps=None, - enable_kv_scales_calculation=False, - # MLACommonMetadata - seq_lens=seq_lens, - seq_lens_tensor=seq_lens_tensor, - max_query_len=self.max_query_len, - max_prefill_seq_len=self.max_prefill_seq_len, - max_decode_query_len=0, - max_decode_seq_len=0, - query_start_loc=query_start_loc, - seq_start_loc=seq_start_loc, - context_lens_tensor=context_lens_tensor, - block_tables=block_tables, - head_dim=self.head_dim, - is_profile_run=self.is_profile_run, - # MLACommonMetadata Chunk prefill specific - context_chunk_cu_seq_lens=self.context_chunk_cu_seq_lens, - context_chunk_starts=self.context_chunk_starts, - context_chunk_seq_tot=self.context_chunk_seq_tot, - context_chunk_max_seq_lens=self.context_chunk_max_seq_lens, - ) - return self._cached_prefill_metadata - - @property - def decode_metadata(self): - if self.num_decode_tokens == 0: - return None - - if self._cached_decode_metadata is not None: - return self._cached_decode_metadata - assert self.seq_lens_tensor is not None - - # Compute some attn_metadata fields which default to None - slot_mapping = (None if self.slot_mapping is None else - self.slot_mapping[self.num_prefill_tokens:]) - seq_lens_tensor = (None if self.seq_lens_tensor is None else - self.seq_lens_tensor[self.num_prefills:]) - block_tables = (None if self.block_tables is None else - self.block_tables[self.num_prefills:]) - - self._cached_decode_metadata = self.__class__( - # Required by ModelRunner - use_cuda_graph=self.use_cuda_graph, # Not Attention Related - # Required by Attention Metadata - num_prefills=0, - num_prefill_tokens=0, - num_decode_tokens=self.num_decode_tokens, - slot_mapping=slot_mapping, - # Required by Attention Metadata (not used) - multi_modal_placeholder_index_maps=None, - enable_kv_scales_calculation=False, - # MLACommonMetadata - seq_lens=None, - seq_lens_tensor=seq_lens_tensor, - max_decode_query_len=self.max_decode_query_len, - max_query_len=self.max_query_len, - max_prefill_seq_len=0, - max_decode_seq_len=self.max_decode_seq_len, - # Batch may be composed of prefill|decodes, adjust query start - # indices to refer to the start of decodes. E.g. - # in tokens:[3 prefills|6 decodes], query_start_loc=[3,9] => [0,6]. - query_start_loc=(self.query_start_loc[self.num_prefills:] - - self.query_start_loc[self.num_prefills]) - if self.query_start_loc is not None else None, - seq_start_loc=self.seq_start_loc[self.num_prefills:] - if self.seq_start_loc is not None else None, - context_lens_tensor=None, - block_tables=block_tables, - head_dim=self.head_dim, - is_profile_run=self.is_profile_run) - return self._cached_decode_metadata - - -class MLACommonMetadataBuilder(AttentionMetadataBuilder[T], Generic[T]): - """ - NOTE: Please read the comment at the top of the file before trying to - understand this class - """ - BLOCK_TABLE_EXTENDER: list[list[int]] = [] - - def __init__(self, input_builder): - self.input_builder = input_builder - self.runner = input_builder.runner - self.sliding_window = input_builder.sliding_window - self.block_size = input_builder.block_size - self.chunked_prefill_enabled = \ - self.runner.scheduler_config.chunked_prefill_enabled - self.enable_prefix_caching = \ - self.runner.cache_config.enable_prefix_caching - - if self.chunked_prefill_enabled or self.enable_prefix_caching: - attn_state = self.input_builder.runner.attn_state - self.context_chunk_workspace_size = \ - attn_state.context_chunk_workspace_size - self.page_size = self.runner.block_size - - def prepare(self): - self.slot_mapping: List[int] = [] - self.prefill_seq_lens: List[int] = [] - self.context_lens: List[int] = [] - self.block_tables: List[List[int]] = [] - self.curr_seq_lens: List[int] = [] - self.multimodal_placeholder_maps: Dict[ - str, - MultiModalPlaceholderMap] = defaultdict(MultiModalPlaceholderMap) - self.num_prefills = 0 - self.num_prefill_tokens = 0 - self.num_decode_tokens = 0 - self.has_prefix_cache_hit = False - - def _add_seq_group(self, inter_data, chunked_prefill_enabled: bool, - prefix_cache_hit: bool): - """Add a sequence group to the metadata. Specifically update/append - 1. context length. - 2. block table. - 3. slot mapping. - """ - is_prompt = inter_data.is_prompt - block_tables = inter_data.block_tables - - for (seq_id, token_len, seq_len, curr_seq_len, query_len, context_len, - curr_sliding_window_block) in zip( - inter_data.seq_ids, [len(t) for t in inter_data.input_tokens], - inter_data.orig_seq_lens, inter_data.seq_lens, - inter_data.query_lens, inter_data.context_lens, - inter_data.curr_sliding_window_blocks): - self.context_lens.append(context_len) - if is_prompt: - self.num_prefills += 1 - self.num_prefill_tokens += token_len - self.prefill_seq_lens.append(seq_len) - else: - self.num_decode_tokens += query_len - self.curr_seq_lens.append(curr_seq_len) - - # Compute block table. - # TODO(sang): Combine chunked prefill and prefix caching by - # only allowing multiple of block_size chunk size. - # NOTE: This only works for oooooooxxx style attention. - block_table = [] - if prefix_cache_hit: - # NOTE(woosuk): For flash-attn, the block table should - # include the entries for the incoming prefill tokens. - block_table = block_tables[seq_id] - elif ((chunked_prefill_enabled or not is_prompt) - and block_tables is not None): - if curr_sliding_window_block == 0: - block_table = block_tables[seq_id] - else: - block_table = block_tables[seq_id][ - -curr_sliding_window_block:] - self.block_tables.append(block_table) - - # Compute slot mapping. - is_profile_run = is_block_tables_empty(block_tables) - start_idx = compute_slot_mapping_start_idx(is_prompt, query_len, - context_len, - self.sliding_window) - compute_slot_mapping(is_profile_run, self.slot_mapping, seq_id, - seq_len, context_len, start_idx, - self.block_size, inter_data.block_tables) - - def _get_graph_runner_block_tables( - self, num_seqs: int, - block_tables: List[List[int]]) -> torch.Tensor: - # The shape of graph_block_tables is - # [max batch size, max context len // block size]. - max_batch_size, max_blocks = self.runner.graph_block_tables.shape - assert max_batch_size >= num_seqs - - graph_block_tables = self.runner.graph_block_tables[:num_seqs] - for i, block_table in enumerate(block_tables): - if block_table: - num_blocks = len(block_table) - if num_blocks <= max_blocks: - graph_block_tables[i, :num_blocks] = block_table - else: - # It may be possible to have more blocks allocated due - # to lookahead slots of multi-step, however, they are - # not used anyway, so can be safely ignored. - graph_block_tables[ - i, :max_blocks] = block_table[:max_blocks] - - return torch.from_numpy(graph_block_tables).to( - device=self.runner.device, non_blocking=True) - - def build(self, seq_lens: List[int], query_lens: List[int], - cuda_graph_pad_size: int, batch_size: int): - """Build attention metadata with on-device tensors. - - Args: - seq_lens: The maybe padded sequence lengths of the input sequences. - query_lens: The query lengths of the input sequences. - cuda_graph_pad_size: The padding size for cuda graph. - -1 if cuda graph is not used. - batch_size: The maybe padded batch size. - """ - prefix_cache_hit = any([ - inter_data.prefix_cache_hit - for inter_data in self.input_builder.inter_data_list - ]) - - for inter_data in self.input_builder.inter_data_list: - self._add_seq_group(inter_data, - self.input_builder.chunked_prefill_enabled, - prefix_cache_hit) - - device = self.runner.device - use_captured_graph = cuda_graph_pad_size != -1 - - max_query_len = max(query_lens) - decode_query_lens = query_lens[self.num_prefills:] - if len(decode_query_lens) > 0: - max_decode_query_len = max(decode_query_lens) - else: - max_decode_query_len = 1 - max_prefill_seq_len = max(self.prefill_seq_lens, default=0) - max_decode_seq_len = max(self.curr_seq_lens, default=0) - num_decode_tokens = self.num_decode_tokens - query_start_loc = list(accumulate(query_lens, initial=0)) - seq_start_loc = list(accumulate(seq_lens, initial=0)) - - num_seqs = len(seq_lens) - if use_captured_graph: - self.slot_mapping.extend([PAD_SLOT_ID] * cuda_graph_pad_size) - self.block_tables.extend(self.__class__.BLOCK_TABLE_EXTENDER * - cuda_graph_pad_size) - num_decode_tokens = batch_size - self.num_prefill_tokens - - block_tables = self._get_graph_runner_block_tables( - num_seqs, self.block_tables) - else: - block_tables = make_tensor_with_pad( - self.block_tables, - pad=0, - dtype=torch.int, - device=device, - ) - assert max_query_len > 0, ("query_lens: {}".format(query_lens)) - - assert device is not None - context_lens_tensor = async_tensor_h2d(self.context_lens, torch.int, - device, self.runner.pin_memory) - seq_lens_tensor = async_tensor_h2d(seq_lens, torch.int, device, - self.runner.pin_memory) - slot_mapping_tensor = async_tensor_h2d(self.slot_mapping, torch.long, - device, self.runner.pin_memory) - query_start_loc_tensor = async_tensor_h2d(query_start_loc, torch.int32, - device, - self.runner.pin_memory) - seq_start_loc_tensor = async_tensor_h2d(seq_start_loc, torch.int32, - device, self.runner.pin_memory) - - context_chunk_cu_seq_lens = None - context_chunk_starts = None - context_chunk_seq_tot = None - context_chunk_max_seq_lens = None - - if (self.chunked_prefill_enabled or self.enable_prefix_caching) \ - and self.num_prefills > 0 \ - and context_lens_tensor is not None \ - and context_lens_tensor[:self.num_prefills].max() > 0: - - # NOTE: it is recommended you read the `Chunked Prefill` section in - # the comment at the top of the file before trying to understand - # the following code - - num_prefills_with_context = \ - (context_lens_tensor[:self.num_prefills] > 0).sum().item() - - # currently we allocate an equal amount of workspace for each - # prefill in the batch, we could probably use a more advanced - # algorithm here and allocate more workspace to prefills with - # longer context lengths - max_context_chunk = \ - self.context_chunk_workspace_size // num_prefills_with_context - - # align max_context_chunk to page_size by rounding down, - # currently the `gather_and_maybe_dequant_cache` kernel cannot - # handle `context_chunk_starts` that are not aligned to page_size - max_context_chunk = round_down(max_context_chunk, self.page_size) - assert max_context_chunk > 0 - num_chunks = cdiv(context_lens_tensor.max(), max_context_chunk) - - # if `max_context_chunk = 256`, `num_chunks = 3`, and - # `num_prefills_with_context = 4`, create a tensor that looks like - # [[0, 0, 0, 0], [256, 256, 256, 256], [512, 512, 512, 512]] - context_chunk_starts = \ - torch.arange(num_chunks, device=device, dtype=torch.int32)\ - .unsqueeze(1).expand(-1, self.num_prefills)\ - * max_context_chunk - chunk_ends = torch.min(context_lens_tensor[:self.num_prefills]\ - .unsqueeze(0), context_chunk_starts + max_context_chunk) - chunk_seq_lens = (chunk_ends - context_chunk_starts).clamp(min=0) - _context_chunk_cu_seq_lens = chunk_seq_lens.cumsum(dim=1).to( - torch.int32) - zero = torch.zeros(num_chunks, dtype=torch.int32, device=device)\ - .unsqueeze(-1) - context_chunk_cu_seq_lens = \ - torch.cat([zero, _context_chunk_cu_seq_lens], dim=1) - context_chunk_max_seq_lens = \ - chunk_seq_lens.max(dim=1).values.tolist() - context_chunk_seq_tot = chunk_seq_lens.sum(dim=1).tolist() - assert max(context_chunk_seq_tot) <= \ - self.context_chunk_workspace_size - - return self.runner.attn_backend.make_metadata( - # Required by ModelRunner - use_cuda_graph=use_captured_graph, # Not Attention Related - # Required by Attention Metadata - num_prefills=self.num_prefills, - slot_mapping=slot_mapping_tensor, - num_prefill_tokens=self.num_prefill_tokens, - num_decode_tokens=num_decode_tokens, - # Required by Attention Metadata (not used) - multi_modal_placeholder_index_maps=None, # Not Attention Related - enable_kv_scales_calculation=False, - # MLACommonMetadata - seq_lens=seq_lens, - seq_lens_tensor=seq_lens_tensor, - max_query_len=max_query_len, - max_decode_query_len=max_decode_query_len, - max_prefill_seq_len=max_prefill_seq_len, - max_decode_seq_len=max_decode_seq_len, - query_start_loc=query_start_loc_tensor, - seq_start_loc=seq_start_loc_tensor, - context_lens_tensor=context_lens_tensor, - block_tables=block_tables, - head_dim=self.runner.model_config.get_head_size(), - is_profile_run=self.runner.in_profile_run, - # MLACommonMetadata Chunk prefill specific - context_chunk_cu_seq_lens=context_chunk_cu_seq_lens, - context_chunk_starts=context_chunk_starts, - context_chunk_seq_tot=context_chunk_seq_tot, - context_chunk_max_seq_lens=context_chunk_max_seq_lens, - ) - - -class MLACommonImpl(MLAAttentionImpl[T], Generic[T]): - """ - NOTE: Please read the comment at the top of the file before trying to - understand this class - """ - - def __init__( - self, - num_heads: int, - head_size: int, - scale: float, - num_kv_heads: int, - alibi_slopes: Optional[List[float]], - sliding_window: Optional[int], - kv_cache_dtype: str, - logits_soft_cap: Optional[float], - attn_type: str, - kv_sharing_target_layer_name: Optional[str], - # MLA Specific Arguments - q_lora_rank: Optional[int], - kv_lora_rank: int, - qk_nope_head_dim: int, - qk_rope_head_dim: int, - qk_head_dim: int, - v_head_dim: int, - kv_b_proj: ColumnParallelLinear, - ) -> None: - if kv_sharing_target_layer_name is not None: - raise NotImplementedError("KV sharing not supported in V0.") - self.num_heads = num_heads - self.head_size = head_size - self.scale = float(scale) - self.num_kv_heads = num_kv_heads - self.kv_cache_dtype = kv_cache_dtype - - self.q_lora_rank = q_lora_rank - self.kv_lora_rank = kv_lora_rank - self.qk_nope_head_dim = qk_nope_head_dim - self.qk_rope_head_dim = qk_rope_head_dim - self.qk_head_dim = qk_head_dim - self.v_head_dim = v_head_dim - self.kv_b_proj = kv_b_proj - - self.triton_fa_func = triton_attention - # Handle the differences between the flash_attn_varlen from flash_attn - # and the one from vllm_flash_attn. The former is used on RoCM and the - # latter has an additional parameter to control FA2 vs FA3 - self.flash_attn_varlen_func = flash_attn_varlen_func - self.vllm_flash_attn_version = get_flash_attn_version() - if self.vllm_flash_attn_version is not None: - self.flash_attn_varlen_func = \ - functools.partial(flash_attn_varlen_func, - fa_version=self.vllm_flash_attn_version) - - # For MLA the v head dim is smaller than qk head dim so we pad out - # v with 0s to match the qk head dim for attention backends that do - # not support different headdims - # We don't need to pad V if we are on a hopper system with FA3 - self._pad_v = self.vllm_flash_attn_version is None or not ( - self.vllm_flash_attn_version == 3 - and current_platform.get_device_capability()[0] == 9) - - def _flash_attn_varlen_diff_headdims(self, q, k, v, softmax_scale, - return_softmax_lse, **kwargs): - maybe_padded_v = v - if self._pad_v: - maybe_padded_v = torch.nn.functional.pad( - v, [0, q.shape[-1] - v.shape[-1]], value=0) - - if is_hip and envs.VLLM_USE_TRITON_FLASH_ATTN \ - and not return_softmax_lse: - attn_out = self.triton_fa_func( - q, - k, - maybe_padded_v, - None, # output - kwargs["cu_seqlens_q"], - kwargs["cu_seqlens_k"], - kwargs["max_seqlen_q"], - kwargs["max_seqlen_k"], - kwargs["causal"], - softmax_scale, - None, # bias - ) - elif is_vllm_fa: - attn_out = self.flash_attn_varlen_func( - q=q, - k=k, - v=maybe_padded_v, - return_softmax_lse=return_softmax_lse, - softmax_scale=softmax_scale, - **kwargs, - ) - else: - # Use return_attn_probs instead of return_softmax_lse for RoCM - attn_out = self.flash_attn_varlen_func( - q=q, - k=k, - v=maybe_padded_v, - return_attn_probs=return_softmax_lse, - softmax_scale=softmax_scale, - **kwargs, - ) - - # Unpack the output if there is multiple results, - # triton always returns (output, softmax_lse), - # vllm_flash_attn returns (output, softmax_lse) when - # `return_softmax_lse = True` - # flash_attn (RoCM) returns (output, softmax_lse, ...) when - # `return_attn_probs = True` - rest = None - if isinstance(attn_out, tuple): - attn_out, *rest = attn_out - - # Remain consistent with old `flash_attn_varlen_func` where there - # is only one output tensor if `return_softmax_lse` is False. - if return_softmax_lse: - assert rest is not None - return attn_out, rest[0] - return attn_out - - def _v_up_proj(self, x): - # Convert from (B, N, L) to (N, B, L) - x = x.view(-1, self.num_heads, self.kv_lora_rank).transpose(0, 1) - # Multiply (N, B, L) x (N, L, V) -> (N, B, V) - x = torch.bmm(x, self.W_UV) - # Convert from (N, B, V) to (B, N * V) - return x.transpose(0, 1).reshape(-1, self.num_heads * self.v_head_dim) - - def process_weights_after_loading(self, act_dtype: torch.dtype): - - def get_layer_weight(layer): - WEIGHT_NAMES = ("weight", "qweight", "weight_packed") - for attr in WEIGHT_NAMES: - if hasattr(layer, attr): - return getattr(layer, attr) - raise AttributeError( - f"Layer '{layer}' has no recognized weight attribute:" - f" {WEIGHT_NAMES}.") - - def get_and_maybe_dequant_weights(layer: LinearBase): - if not isinstance(layer.quant_method, UnquantizedLinearMethod): - # NOTE: This should only be used offline, since it's O(N^3) - eye = torch.eye(layer.input_size_per_partition, - dtype=act_dtype, - device=get_layer_weight(layer).device) - dequant_weights = layer.quant_method.apply(layer, - eye, - bias=None) - del eye - # standardize to (output, input) - return dequant_weights.T - return layer.weight - - # we currently do not have quantized bmm's which are needed for - # `W_UV` and `W_UK_T`, we just store fp16/bf16 copies and perform - # the bmm's in 16-bit, the extra memory overhead of this is fairly low - kv_b_proj_weight = get_and_maybe_dequant_weights(self.kv_b_proj).T - assert kv_b_proj_weight.shape == ( - self.kv_lora_rank, - self.num_heads * (self.qk_nope_head_dim + self.v_head_dim)), ( - f"{kv_b_proj_weight.shape=}, " - f"{self.kv_lora_rank=}, " - f"{self.num_heads=}, " - f"{self.qk_nope_head_dim=}, " - f"{self.v_head_dim=}") - kv_b_proj_weight = kv_b_proj_weight.view( - self.kv_lora_rank, - self.num_heads, - self.qk_nope_head_dim + self.v_head_dim, - ) - - W_UK, W_UV = kv_b_proj_weight.split( - [self.qk_nope_head_dim, self.v_head_dim], dim=-1) - - # Convert from (L, N, V) to (N, L, V) - self.W_UV = W_UV.transpose(0, 1) - # Convert from (L, N, P) to (N, P, L) - self.W_UK_T = W_UK.permute(1, 2, 0) - - def _compute_prefill_context( - self, - q: torch.Tensor, - kv_c_and_k_pe_cache: torch.Tensor, - attn_metadata: MLACommonMetadata, - k_scale: torch.Tensor, - ): - prefill_metadata = attn_metadata.prefill_metadata - assert prefill_metadata is not None - assert prefill_metadata.context_chunk_seq_tot is not None - assert prefill_metadata.context_chunk_cu_seq_lens is not None - assert prefill_metadata.context_chunk_starts is not None - assert prefill_metadata.context_chunk_max_seq_lens is not None - assert prefill_metadata.context_lens_tensor is not None - - output = None - iters = len(prefill_metadata.context_chunk_seq_tot) - - # Fetch from attn_metadata directly, since it late bound by - # MLAAttentionState, grabbing it directly `attn_metadata` can avoid - # any weirdness around prefill_metadata caching - assert attn_metadata.context_chunk_workspace is not None - workspace = attn_metadata.context_chunk_workspace - - for i in range(iters): - toks = prefill_metadata.context_chunk_seq_tot[i] - - ops.gather_and_maybe_dequant_cache( - src_cache=kv_c_and_k_pe_cache, - dst=workspace, - block_table=prefill_metadata.block_tables, - cu_seq_lens=prefill_metadata.context_chunk_cu_seq_lens[i], - batch_size=prefill_metadata.num_prefills, - kv_cache_dtype=self.kv_cache_dtype, - scale=k_scale, - seq_starts=prefill_metadata.context_chunk_starts[i], - ) - - kv_c_normed = workspace[:toks]\ - [..., :self.kv_lora_rank] - k_pe = workspace[:toks]\ - [..., self.kv_lora_rank:].unsqueeze(1) - - kv_nope = self.kv_b_proj(kv_c_normed)[0].view( \ - -1, self.num_heads, self.qk_nope_head_dim + self.v_head_dim) - k_nope, v = kv_nope\ - .split([self.qk_nope_head_dim, self.v_head_dim], dim=-1) - - k = torch.cat((k_nope, k_pe.expand((*k_nope.shape[:-1], -1))), - dim=-1) - - attn_output, attn_softmax_lse = \ - self._flash_attn_varlen_diff_headdims( - q=q, - k=k, - v=v, - cu_seqlens_q=prefill_metadata.query_start_loc, - cu_seqlens_k=prefill_metadata.context_chunk_cu_seq_lens[i], - max_seqlen_q=prefill_metadata.max_query_len, - max_seqlen_k=prefill_metadata.context_chunk_max_seq_lens[i], - softmax_scale=self.scale, - causal=False, # Context is unmasked - return_softmax_lse=True, - ) - - if output is None: - output = attn_output - output_lse = attn_softmax_lse - else: - output_tmp = torch.empty_like(output) - output_lse_tmp = torch.empty_like(output_lse) - merge_attn_states( - output=output_tmp, - output_lse=output_lse_tmp, - prefix_output=output, - prefix_lse=output_lse, - suffix_output=attn_output, - suffix_lse=attn_softmax_lse, - ) - output = output_tmp - output_lse = output_lse_tmp - - return output, output_lse - - def _forward_prefill( - self, - q: torch.Tensor, - kv_c_normed: torch.Tensor, - k_pe: torch.Tensor, - kv_c_and_k_pe_cache: torch.Tensor, - attn_metadata: MLACommonMetadata, - k_scale: torch.Tensor, - ) -> torch.Tensor: - - prefill_metadata = attn_metadata.prefill_metadata - assert prefill_metadata is not None - - has_context = prefill_metadata.context_lens_tensor is not None \ - and prefill_metadata.context_lens_tensor.max() > 0 - - kv_nope = self.kv_b_proj(kv_c_normed)[0].view(\ - -1, self.num_heads, self.qk_nope_head_dim + self.v_head_dim) - k_nope, v = kv_nope\ - .split([self.qk_nope_head_dim, self.v_head_dim], dim=-1) - - k = torch.cat((k_nope, k_pe.expand((*k_nope.shape[:-1], -1))), dim=-1) - - output = self._flash_attn_varlen_diff_headdims( - q=q, - k=k, - v=v, - cu_seqlens_q=prefill_metadata.query_start_loc, - cu_seqlens_k=prefill_metadata.query_start_loc, - max_seqlen_q=prefill_metadata.max_prefill_seq_len, - max_seqlen_k=prefill_metadata.max_prefill_seq_len, - softmax_scale=self.scale, - causal=True, - return_softmax_lse=has_context, - ) - - if has_context: - # ROCm flash_attn_varlen_func will return 3 objects instead of 2 - suffix_output, suffix_lse = output - context_output, context_lse = self._compute_prefill_context( \ - q, kv_c_and_k_pe_cache, attn_metadata, k_scale) - - output = torch.empty_like(suffix_output) - merge_attn_states( - output=output, - prefix_output=context_output, - prefix_lse=context_lse, - suffix_output=suffix_output, - suffix_lse=suffix_lse, - ) - - # unpad if necessary - if self._pad_v: - output = output[..., :v.shape[-1]] - - return output.flatten(start_dim=-2) - - @abstractmethod - def _forward_decode( - self, - ql_nope: torch.Tensor, - q_pe: torch.Tensor, - kv_c_and_k_pe_cache: torch.Tensor, - attn_metadata: T, - ) -> torch.Tensor: - raise NotImplementedError - - def forward( - self, - layer: AttentionLayer, - q: torch.Tensor, # query in unified attn - k_c_normed: torch.Tensor, # key in unified attn - k_pe: torch.Tensor, # value in unified attn - kv_cache: torch.Tensor, - attn_metadata: T, - output: Optional[torch.Tensor] = None, - output_scale: Optional[torch.Tensor] = None, - output_block_scale: Optional[torch.Tensor] = None, - ) -> torch.Tensor: - if output is not None: - raise NotImplementedError( - "output is not yet supported for MLAImplBase") - - if output_scale is not None or output_block_scale is not None: - raise NotImplementedError( - "fused output quantization is not yet supported" - " for MLAImplBase") - - if attn_metadata.is_profile_run and \ - attn_metadata.context_chunk_workspace is not None: - # During the profile run try to simulate to worse case output size - # for `self.kv_b_proj(kv_c_normed)` in `_compute_prefill_context` - # since this can be large - _ = torch.empty( - (attn_metadata.context_chunk_workspace.shape[0], - self.num_heads, self.qk_nope_head_dim + self.v_head_dim), - device=k_c_normed.device, - dtype=k_c_normed.dtype, - ) - - has_decode = attn_metadata.decode_metadata is not None - has_prefill = attn_metadata.prefill_metadata is not None - - num_prefill_tokens: int = attn_metadata.num_prefill_tokens - q = q.view(-1, self.num_heads, self.qk_head_dim) - - decode_q = q[num_prefill_tokens:] - - prefill_q = q[:num_prefill_tokens] - prefill_k_pe = k_pe[:num_prefill_tokens] - prefill_k_c_normed = k_c_normed[:num_prefill_tokens] - - # write the latent and rope to kv cache - if kv_cache.numel() > 0: - ops.concat_and_cache_mla( - k_c_normed, - k_pe.squeeze(1), - kv_cache, - attn_metadata.slot_mapping.flatten(), - kv_cache_dtype=self.kv_cache_dtype, - scale=layer._k_scale, - ) - - output = torch.empty(attn_metadata.num_prefill_tokens + - attn_metadata.num_decode_tokens, - self.v_head_dim * self.num_heads, - device=q.device, - dtype=q.dtype) - if has_prefill: - output[:num_prefill_tokens] = self._forward_prefill( - prefill_q, prefill_k_c_normed, prefill_k_pe, kv_cache, - attn_metadata, layer._k_scale) - - if has_decode: - decode_q_nope, decode_q_pe = decode_q.split( - [self.qk_nope_head_dim, self.qk_rope_head_dim], dim=-1) - # Convert from (B, N, P) to (N, B, P) - decode_q_nope = decode_q_nope.transpose(0, 1) - # Multiply (N, B, P) x (N, P, L) -> (N, B, L) - decode_ql_nope = torch.bmm(decode_q_nope, self.W_UK_T) - # Convert from (N, B, L) to (B, N, L) - decode_ql_nope = decode_ql_nope.transpose(0, 1) - - output[num_prefill_tokens:] = self._forward_decode( - decode_ql_nope, decode_q_pe, kv_cache, attn_metadata) - - return output diff --git a/vllm/attention/backends/rocm_aiter_mla.py b/vllm/attention/backends/rocm_aiter_mla.py deleted file mode 100644 index 587d08858b92..000000000000 --- a/vllm/attention/backends/rocm_aiter_mla.py +++ /dev/null @@ -1,407 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project - -from contextlib import contextmanager -from dataclasses import dataclass -from typing import Optional, Type, Union - -import torch - -import vllm.envs as envs -from vllm.attention.backends.mla.common import (MLACommonBackend, - MLACommonImpl, - MLACommonMetadata, - MLACommonMetadataBuilder, - MLACommonState) -from vllm.attention.backends.utils import (compute_slot_mapping, - compute_slot_mapping_start_idx, - is_block_tables_empty) -from vllm.attention.ops.rocm_aiter_mla import (aiter_mla_decode_fwd, - get_aiter_mla_metadata) - - -def is_aiter_mla_enabled() -> bool: - return envs.VLLM_ROCM_USE_AITER \ - and envs.VLLM_ROCM_USE_AITER_MLA - - -class AiterMLABackend(MLACommonBackend): - - @staticmethod - def get_name() -> str: - return "ROCM_AITER_MLA" - - @staticmethod - def get_impl_cls() -> Type["AiterMLAImpl"]: - return AiterMLAImpl - - @staticmethod - def get_metadata_cls() -> Type["AiterMLAMetadata"]: - return AiterMLAMetadata - - @staticmethod - def get_builder_cls() -> Type["AiterMLAMetadataBuilder"]: - return AiterMLAMetadataBuilder - - @staticmethod - def get_state_cls() -> Type["AiterMLAState"]: - return AiterMLAState - - -@dataclass -class AiterMLAMetadata(MLACommonMetadata): - # The following 5 tensors are for current version of AITER MLA - block_table_bound: Optional[torch.Tensor] = None - # The indptr of the paged kv cache, shape: [batch_size + 1] - paged_kv_indptr: Optional[torch.Tensor] = None - # The page indices of the paged kv cache - paged_kv_indices: Optional[torch.Tensor] = None - # The number of entries in the last page of each request in - # the paged kv cache, shape: [batch_size] - paged_kv_last_page_lens: Optional[torch.Tensor] = None - - # This is just to make new AITER MLA API work - # -- MTP support is not added yet. - qo_indptr: Optional[torch.Tensor] = None - - @property - def prefill_metadata(self): - prefill_metadata = super().prefill_metadata - self._cached_prefill_metadata = prefill_metadata - - if prefill_metadata is not None: - prefill_metadata.paged_kv_indptr = self.paged_kv_indptr - prefill_metadata.paged_kv_indices = self.paged_kv_indices - prefill_metadata\ - .paged_kv_last_page_lens = self.paged_kv_last_page_lens - prefill_metadata.block_table_bound = self.block_table_bound - prefill_metadata.qo_indptr = self.qo_indptr - - # update the cache - self._cached_prefill_metadata = self.__class__( - **prefill_metadata.__dict__) - - return self._cached_prefill_metadata - - @property - def decode_metadata(self): - decode_metadata = super().decode_metadata - - self._cached_decode_metadata = decode_metadata - - if decode_metadata is not None: - decode_metadata.paged_kv_indptr = self.paged_kv_indptr - decode_metadata.paged_kv_indices = self.paged_kv_indices - decode_metadata\ - .paged_kv_last_page_lens = self.paged_kv_last_page_lens - decode_metadata.block_table_bound = self.block_table_bound - decode_metadata.qo_indptr = self.qo_indptr - - # update the cache - self._cached_decode_metadata = self.__class__( - **decode_metadata.__dict__) - - return self._cached_decode_metadata - - -class AiterMLAMetadataBuilder(MLACommonMetadataBuilder[AiterMLAMetadata]): - BLOCK_TABLE_EXTENDER: list[list[int]] = [[]] - - def __init__(self, input_builder): - super().__init__(input_builder) - assert self.block_size == 1, "AITER MLA requires only block size 1." - - def prepare(self): - super().prepare() - self.paged_kv_indices: list[int] = [] - self.paged_kv_indptr: list[int] = [0] - self.paged_kv_last_page_lens: list[int] = [] - self.total_blocks = 0 - self.qo_indptr: list[int] = [0] - - def _add_seq_group(self, inter_data, chunked_prefill_enabled: bool, - prefix_cache_hit: bool): - """Add a sequence group to the metadata. Specifically update/append - 1. context length. - 2. block table. - 3. slot mapping. - """ - is_prompt = inter_data.is_prompt - block_tables = inter_data.block_tables - - for (seq_id, token_len, seq_len, curr_seq_len, query_len, context_len, - curr_sliding_window_block) in zip( - inter_data.seq_ids, [len(t) for t in inter_data.input_tokens], - inter_data.orig_seq_lens, inter_data.seq_lens, - inter_data.query_lens, inter_data.context_lens, - inter_data.curr_sliding_window_blocks): - self.context_lens.append(context_len) - if is_prompt: - self.num_prefills += 1 - self.num_prefill_tokens += token_len - self.prefill_seq_lens.append(seq_len) - else: - self.num_decode_tokens += query_len - self.curr_seq_lens.append(curr_seq_len) - - # Compute block table. - # TODO(sang): Combine chunked prefill and prefix caching by - # only allowing multiple of block_size chunk size. - # NOTE: This only works for oooooooxxx style attention. - block_table = [] - if prefix_cache_hit: - # NOTE(woosuk): For flash-attn, the block table should - # include the entries for the incoming prefill tokens. - block_table = block_tables[seq_id] - elif ((chunked_prefill_enabled or not is_prompt) - and block_tables is not None): - if curr_sliding_window_block == 0: - block_table = block_tables[seq_id] - else: - block_table = block_tables[seq_id][ - -curr_sliding_window_block:] - self.block_tables.append(block_table) - - # Compute slot mapping. - is_profile_run = is_block_tables_empty(block_tables) - start_idx = compute_slot_mapping_start_idx(is_prompt, query_len, - context_len, - self.sliding_window) - compute_slot_mapping(is_profile_run, self.slot_mapping, seq_id, - seq_len, context_len, start_idx, - self.block_size, inter_data.block_tables) - if is_profile_run: - return - - # Update paged_kv_* tensors only for non-profile run - block_table = block_tables[seq_id] - self._update_paged_kv_tensors(block_table, seq_len) - - def _update_paged_kv_tensors(self, block_table: list[int], seq_len: int): - # Get the number of valid blocks based on sequence length. - # If seq_len = 16, block_size = 16, - # block_table_bound is 1 with 1 valid block. - # If seq_len = 15, block_size = 16, - # block_table_bound is 0 + 1 with 1 valid block. - self.total_blocks += len(block_table) - block_table_bound = seq_len // self.block_size + 1 \ - if seq_len % self.block_size != 0 \ - else seq_len // self.block_size - self.paged_kv_indices.extend(block_table[:block_table_bound]) - self.paged_kv_indptr.append(self.paged_kv_indptr[-1] + - block_table_bound) - self.qo_indptr.append(self.qo_indptr[-1] + 1) - - last_page_len = seq_len % self.block_size - if last_page_len == 0: - last_page_len = self.block_size - self.paged_kv_last_page_lens.append(last_page_len) - - def build(self, seq_lens: list[int], query_lens: list[int], - cuda_graph_pad_size: int, batch_size: int) -> AiterMLAMetadata: - metadata = super().build(seq_lens, query_lens, cuda_graph_pad_size, - batch_size) - device = self.runner.device - use_captured_graph = cuda_graph_pad_size != -1 - - if use_captured_graph: - last_paged_kv_indptr = self.paged_kv_indptr[-1] - self.paged_kv_indptr.extend([last_paged_kv_indptr] * - cuda_graph_pad_size) - self.paged_kv_last_page_lens.extend([0] * cuda_graph_pad_size) - last_qo_indptr = self.qo_indptr[-1] - self.qo_indptr.extend([last_qo_indptr] * cuda_graph_pad_size) - - # For current version of AITER MLA - if len(self.paged_kv_indptr) > 0: - # extend to the maximum number of blocks as returned by the - # scheduler - self.paged_kv_indices.extend( - [0] * (self.total_blocks - len(self.paged_kv_indices))) - paged_kv_indices_tensor = torch.tensor(self.paged_kv_indices, - device=device, - dtype=torch.int) - paged_kv_indptr_tensor = torch.tensor(self.paged_kv_indptr, - device=device, - dtype=torch.int) - paged_kv_last_page_lens_tensor = torch.tensor( - self.paged_kv_last_page_lens, device=device, dtype=torch.int) - block_table_bound_tensor = torch.zeros(len(self.paged_kv_indptr) - - 1, - device=device, - dtype=torch.int) - - qo_indptr = torch.tensor(self.qo_indptr, - device=device, - dtype=torch.int) - else: - paged_kv_indices_tensor = None - paged_kv_indptr_tensor = None - paged_kv_last_page_lens_tensor = None - block_table_bound_tensor = None - qo_indptr = None - - metadata.paged_kv_indptr = paged_kv_indptr_tensor - metadata.paged_kv_indices = paged_kv_indices_tensor - metadata.paged_kv_last_page_lens = paged_kv_last_page_lens_tensor - metadata.block_table_bound = block_table_bound_tensor - metadata.qo_indptr = qo_indptr - - return metadata - - -class AiterMLAState(MLACommonState[AiterMLAMetadata]): - - @contextmanager - def graph_capture(self, max_batch_size: int): - kv_indices, kv_indptr, last_page_lens, qo_indptr = \ - get_aiter_mla_metadata( - max_batch_size=max_batch_size, - block_size=self.runner.block_size, - max_block_per_batch=\ - self.runner.get_max_block_per_batch(), - device=self.runner.device) - self._paged_kv_indices_tensor = kv_indices - self._paged_kv_indptr_tensor = kv_indptr - self._paged_kv_last_page_lens_tensor = last_page_lens - self._qo_indptr_tensor = qo_indptr - - with super().graph_capture(max_batch_size): - yield - - del self._paged_kv_indices_tensor - del self._paged_kv_indptr_tensor - del self._paged_kv_last_page_lens_tensor - del self._qo_indptr_tensor - - def graph_capture_get_metadata_for_batch( - self, - batch_size: int, - is_encoder_decoder_model: bool = False) -> AiterMLAMetadata: - - metadata = super().graph_capture_get_metadata_for_batch( - batch_size, is_encoder_decoder_model) - - paged_kv_indptr = self._paged_kv_indptr_tensor[:batch_size + 1] - paged_kv_indices = self._paged_kv_indices_tensor - paged_kv_last_page_lens = self._paged_kv_last_page_lens_tensor[: - batch_size] - qo_indptr = self._qo_indptr_tensor[:batch_size + 1] - - metadata.paged_kv_indptr = paged_kv_indptr - metadata.paged_kv_indices = paged_kv_indices - metadata.paged_kv_last_page_lens = paged_kv_last_page_lens - metadata.qo_indptr = qo_indptr - - return metadata - - def get_graph_input_buffers(self, - attn_metadata: AiterMLAMetadata, - is_encoder_decoder_model: bool = False): - input_buffers = super().get_graph_input_buffers( - attn_metadata, is_encoder_decoder_model) - input_buffers[ - 'paged_kv_indptr'] = attn_metadata.decode_metadata.paged_kv_indptr - input_buffers[ - "paged_kv_indices"] = attn_metadata.\ - decode_metadata.paged_kv_indices - input_buffers[ - "paged_kv_last_page_lens"] = attn_metadata.\ - decode_metadata.paged_kv_last_page_lens - input_buffers['qo_indptr'] = attn_metadata.qo_indptr - - return input_buffers - - def prepare_graph_input_buffers(self, - input_buffers, - attn_metadata: AiterMLAMetadata, - is_encoder_decoder_model: bool = False): - super().prepare_graph_input_buffers(input_buffers, attn_metadata, - is_encoder_decoder_model) - - num_total_blocks = attn_metadata.decode_metadata.paged_kv_indices.shape[ - 0] - input_buffers["paged_kv_indptr"].copy_( - attn_metadata.decode_metadata.paged_kv_indptr, non_blocking=True) - input_buffers["paged_kv_indices"][:num_total_blocks].copy_( - attn_metadata.decode_metadata.paged_kv_indices, non_blocking=True) - input_buffers["paged_kv_last_page_lens"].copy_( - attn_metadata.decode_metadata.paged_kv_last_page_lens, - non_blocking=True) - input_buffers["qo_indptr"].copy_( - attn_metadata.decode_metadata.qo_indptr, non_blocking=True) - - -class AiterMLAImpl(MLACommonImpl[AiterMLAMetadata]): - - def __init__( - self, - num_heads: int, - head_size: int, - scale: float, - num_kv_heads: int, - alibi_slopes: Optional[list[float]], - sliding_window: Optional[int], - kv_cache_dtype: str, - logits_soft_cap: Optional[float], - attn_type: str, - kv_sharing_target_layer_name: Optional[str], - # MLA Specific Arguments - **mla_args) -> None: - super().__init__(num_heads, head_size, scale, num_kv_heads, - alibi_slopes, sliding_window, kv_cache_dtype, - logits_soft_cap, attn_type, - kv_sharing_target_layer_name, **mla_args) - - unsupported_features = [alibi_slopes, sliding_window, logits_soft_cap] - if any(unsupported_features): - raise NotImplementedError( - "Aiter MLA does not support one of the following: " - "alibi_slopes, sliding_window, logits_soft_cap") - - from aiter import flash_attn_varlen_func - self.flash_attn_varlen_func = flash_attn_varlen_func - - def _flash_attn_varlen_diff_headdims( - self, q: torch.Tensor, k: torch.Tensor, v: torch.Tensor, - softmax_scale: float, return_softmax_lse: bool, - **kwargs) -> Union[tuple[torch.Tensor, ...], torch.Tensor]: - output = self.flash_attn_varlen_func( - q, - k, - v, - **kwargs, - ) - - return output - - def _forward_decode( - self, - q_nope: torch.Tensor, - q_pe: torch.Tensor, - kv_c_and_k_pe_cache: torch.Tensor, - attn_metadata: AiterMLAMetadata, - ) -> torch.Tensor: - assert kv_c_and_k_pe_cache.numel() > 0 - - decode_meta = attn_metadata.decode_metadata - assert decode_meta is not None - B = q_nope.shape[0] - - q = torch.cat([q_nope, q_pe], dim=-1) - o = torch.empty(B, - self.num_heads, - self.kv_lora_rank, - dtype=q.dtype, - device=q.device) - - kv_buffer = kv_c_and_k_pe_cache.unsqueeze(2) - - aiter_mla_decode_fwd(q, kv_buffer, o, self.scale, - attn_metadata.qo_indptr, - attn_metadata.max_query_len, - attn_metadata.paged_kv_indptr, - attn_metadata.paged_kv_indices, - attn_metadata.paged_kv_last_page_lens) - - return self._v_up_proj(o) diff --git a/vllm/attention/backends/rocm_flash_attn.py b/vllm/attention/backends/rocm_flash_attn.py deleted file mode 100644 index 9262144e37b5..000000000000 --- a/vllm/attention/backends/rocm_flash_attn.py +++ /dev/null @@ -1,953 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project -"""Attention layer ROCm GPUs.""" -import itertools -from dataclasses import dataclass -from functools import cache -from typing import List, Optional, Tuple, Type - -import torch - -import vllm.envs as envs -from vllm import _custom_ops as ops -from vllm.attention.backends.abstract import (AttentionBackend, AttentionImpl, - AttentionLayer, - AttentionMetadata, AttentionType) -from vllm.attention.backends.utils import (CommonAttentionState, - CommonMetadataBuilder) -from vllm.attention.ops.paged_attn import (PagedAttention, - PagedAttentionMetadata) -from vllm.config import get_current_vllm_config -from vllm.logger import init_logger -from vllm.model_executor.layers.quantization.utils.quant_utils import ( - QuantKey, kFp8StaticTensorSym) -from vllm.platforms import current_platform - -logger = init_logger(__name__) -_PARTITION_SIZE_ROCM = 256 - - -@cache -def is_rocm_aiter_paged_attn_enabled() -> bool: - return envs.VLLM_ROCM_USE_AITER_PAGED_ATTN \ - and envs.VLLM_ROCM_USE_AITER \ - - -@cache -def _get_paged_attn_module() -> PagedAttention: - """ - Initializes the appropriate PagedAttention module from `attention/ops`, - which is used as helper function - by `ROCmFlashAttentionImpl` and `ROCmFlashAttentionBackend`. - - The choice of attention module depends on whether - AITER paged attention is enabled: - - If enabled, `ROCmFlashAttentionImpl` uses `AITERPagedAttention`. - - Otherwise, it defaults to using the original `PagedAttention`. - """ - if is_rocm_aiter_paged_attn_enabled(): - # Import AITERPagedAttention only when the flag is enabled - from vllm.attention.ops.rocm_aiter_paged_attn import ( - AITERPagedAttention) - return AITERPagedAttention() - return PagedAttention() - - -class ROCmFlashAttentionBackend(AttentionBackend): - accept_output_buffer: bool = True - - @staticmethod - def get_name() -> str: - return "ROCM_FLASH" - - @staticmethod - def get_impl_cls() -> Type["ROCmFlashAttentionImpl"]: - return ROCmFlashAttentionImpl - - @staticmethod - def get_metadata_cls() -> Type["AttentionMetadata"]: - return ROCmFlashAttentionMetadata - - @staticmethod - def get_builder_cls() -> Type["ROCmFlashAttentionMetadataBuilder"]: - return ROCmFlashAttentionMetadataBuilder - - @staticmethod - def get_state_cls() -> Type["CommonAttentionState"]: - return CommonAttentionState - - @staticmethod - def get_kv_cache_shape( - num_blocks: int, - block_size: int, - num_kv_heads: int, - head_size: int, - ) -> Tuple[int, ...]: - paged_attn = _get_paged_attn_module() - return paged_attn.get_kv_cache_shape(num_blocks, block_size, - num_kv_heads, head_size) - - @staticmethod - def swap_blocks( - src_kv_cache: torch.Tensor, - dst_kv_cache: torch.Tensor, - src_to_dst: torch.Tensor, - ) -> None: - paged_attn = _get_paged_attn_module() - paged_attn.swap_blocks(src_kv_cache, dst_kv_cache, src_to_dst) - - @staticmethod - def copy_blocks( - kv_caches: List[torch.Tensor], - src_to_dists: torch.Tensor, - ) -> None: - paged_attn = _get_paged_attn_module() - paged_attn.copy_blocks(kv_caches, src_to_dists) - - -@dataclass -class ROCmFlashAttentionMetadata(AttentionMetadata, PagedAttentionMetadata): - """Metadata for FlashAttentionBackend. - - NOTE: Any python object stored here is not updated when it is - cuda-graph replayed. If you have values that need to be changed - dynamically, it should be stored in tensor. The tensor has to be - updated from `CUDAGraphRunner.forward` API. - """ - # (batch_size,). The sequence length per sequence. Sequence length means - # the computed tokens + new tokens None if it is a decoding. - seq_lens: Optional[List[int]] - # seq_lens stored as a tensor. - seq_lens_tensor: Optional[torch.Tensor] - # Maximum sequence length among prefill batch. 0 if there are decoding - # requests only. - max_prefill_seq_len: int - # Maximum sequence length among decode batch. 0 if there are prefill - # requests only. - max_decode_seq_len: int - - # Whether or not if cuda graph is enabled. - # Cuda-graph is currently enabled for decoding only. - # TODO(woosuk): Move `use_cuda_graph` out since it's unrelated to attention. - use_cuda_graph: bool - - # NOTE(sang): Definition of context_len, query_len, and seq_len. - # |---------- N-1 iteration --------| - # |---------------- N iteration ---------------------| - # |- tokenA -|......................|-- newTokens ---| - # |---------- context_len ----------| - # |-------------------- seq_len ----------------------| - # |-- query_len ---| - - # Maximum query length in the batch. None for decoding. - max_query_len: Optional[int] = None - # (batch_size + 1,). The cumulative subquery lengths of the sequences in - # the batch, used to index into subquery. E.g., if the subquery length - # is [4, 6], it is [0, 4, 10]. - query_start_loc: Optional[torch.Tensor] = None - # (batch_size + 1,). The cumulative sequence lengths of the sequences in - # the batch, used to index into sequence. E.g., if the sequence length is - # [4, 6], it is [0, 4, 10]. - seq_start_loc: Optional[torch.Tensor] = None - # (batch_size,) A tensor of context lengths (tokens that are computed - # so far). - context_lens_tensor: Optional[torch.Tensor] = None - - # Max number of query tokens among request in the batch. - max_decode_query_len: Optional[int] = None - - _cached_prefill_metadata: Optional["ROCmFlashAttentionMetadata"] = None - _cached_decode_metadata: Optional["ROCmFlashAttentionMetadata"] = None - - # Begin encoder attn & enc/dec cross-attn fields... - - # Encoder sequence lengths representation - encoder_seq_lens: Optional[List[int]] = None - encoder_seq_lens_tensor: Optional[torch.Tensor] = None - - # Maximum sequence length among encoder sequences - max_encoder_seq_len: Optional[int] = None - - # Number of tokens input to encoder - num_encoder_tokens: Optional[int] = None - - # Cross-attention memory-mapping data structures: slot mapping - # and block tables - cross_slot_mapping: Optional[torch.Tensor] = None - cross_block_tables: Optional[torch.Tensor] = None - - @property - def prefill_metadata(self) -> Optional["ROCmFlashAttentionMetadata"]: - if self.num_prefills == 0: - return None - - if self._cached_prefill_metadata is not None: - return self._cached_prefill_metadata - - assert self.seq_lens is not None - assert self.seq_lens_tensor is not None - assert self.block_tables is not None - - self._cached_prefill_metadata = ROCmFlashAttentionMetadata( - num_prefills=self.num_prefills, - num_prefill_tokens=self.num_prefill_tokens, - num_decode_tokens=0, - slot_mapping=self.slot_mapping[:self.num_prefill_tokens], - multi_modal_placeholder_index_maps=self. - multi_modal_placeholder_index_maps, - enable_kv_scales_calculation=self.enable_kv_scales_calculation, - seq_lens=self.seq_lens[:self.num_prefills], - seq_lens_tensor=self.seq_lens_tensor[:self.num_prefills], - max_query_len=self.max_query_len, - max_prefill_seq_len=self.max_prefill_seq_len, - max_decode_seq_len=0, - query_start_loc=None if self.query_start_loc is None else - self.query_start_loc[:self.num_prefills + 1], - seq_start_loc=None if self.seq_start_loc is None else - self.seq_start_loc[:self.num_prefills + 1], - context_lens_tensor=None if self.context_lens_tensor is None else - self.context_lens_tensor[:self.num_prefills], - block_tables=self.block_tables[:self.num_prefills], - use_cuda_graph=False, - # Begin encoder & cross attn fields below... - encoder_seq_lens=self.encoder_seq_lens, - encoder_seq_lens_tensor=self.encoder_seq_lens_tensor, - max_encoder_seq_len=self.max_encoder_seq_len, - cross_slot_mapping=self.cross_slot_mapping, - cross_block_tables=self.cross_block_tables) - return self._cached_prefill_metadata - - @property - def decode_metadata(self) -> Optional["ROCmFlashAttentionMetadata"]: - if self.num_decode_tokens == 0: - return None - - if self._cached_decode_metadata is not None: - return self._cached_decode_metadata - assert self.block_tables is not None - assert self.seq_lens_tensor is not None - - self._cached_decode_metadata = ROCmFlashAttentionMetadata( - num_prefills=0, - num_prefill_tokens=0, - num_decode_tokens=self.num_decode_tokens, - slot_mapping=self.slot_mapping[self.num_prefill_tokens:], - multi_modal_placeholder_index_maps=None, - enable_kv_scales_calculation=True, - seq_lens=None, - seq_lens_tensor=self.seq_lens_tensor[self.num_prefills:], - max_query_len=None, - max_prefill_seq_len=0, - max_decode_seq_len=self.max_decode_seq_len, - query_start_loc=None, - seq_start_loc=None, - context_lens_tensor=None, - block_tables=self.block_tables[self.num_prefills:], - use_cuda_graph=self.use_cuda_graph, - # Begin encoder & cross attn fields below... - encoder_seq_lens=self.encoder_seq_lens, - encoder_seq_lens_tensor=self.encoder_seq_lens_tensor, - max_encoder_seq_len=self.max_encoder_seq_len, - cross_slot_mapping=self.cross_slot_mapping, - cross_block_tables=self.cross_block_tables) - # Batch may be composed of prefill|decodes, adjust query start indices - # to refer to the start of decodes when the two are split apart. - # E.g. in tokens:[3 prefills|6 decodes], query_start_loc=[3,9] => [0,6]. - if self._cached_decode_metadata.query_start_loc is not None: - qs = self._cached_decode_metadata.query_start_loc - self._cached_decode_metadata.query_start_loc = qs - qs[0] - return self._cached_decode_metadata - - -class ROCmFlashAttentionMetadataBuilder( - CommonMetadataBuilder[ROCmFlashAttentionMetadata]): - - _metadata_cls = ROCmFlashAttentionMetadata - - -def _make_alibi_bias(alibi_slopes: torch.Tensor, - dtype: torch.dtype, - seq_lens: Optional[List[int]], - make_attn_mask: bool = True) -> List[torch.Tensor]: - attn_biases = [] - if seq_lens: - for seq_len in seq_lens: - bias = torch.arange(seq_len, dtype=dtype) - # NOTE(zhuohan): HF uses - # `bias = bias[None, :].repeat(seq_len, 1)` - # here. We find that both biases give the same results, but - # the bias below more accurately follows the original ALiBi - # paper. - bias = bias[None, :] - bias[:, None] - - num_heads = alibi_slopes.shape[0] - bias = bias[None, :].repeat( - (num_heads, 1, 1)).to(alibi_slopes.device) - bias.mul_(alibi_slopes[:, None, None]) - if make_attn_mask: - inf_mask = torch.empty( - (1, seq_len, seq_len), - dtype=bias.dtype).fill_(-torch.inf).triu_(diagonal=1).to( - alibi_slopes.device) - attn_biases.append((bias + inf_mask).to(dtype)) - else: - attn_biases.append(bias.to(dtype)) - - return attn_biases - - -def _get_seq_len_block_table_args( - attn_metadata: ROCmFlashAttentionMetadata, - attn_type: str, -) -> tuple: - ''' - The particular choice of sequence-length - attributes which should be extracted from attn_metadata is dependent - on the type of attention operation. - - Decoder attn -> select entirely decoder self-attention-related fields - Encoder/decoder cross-attn -> select encoder sequence lengths - Encoder attn -> select encoder sequence lengths fields - Encoder-only attn -> select prefill sequence lengths with - bidirectional attention - - Arguments: - - * attn_metadata: Attention metadata structure associated with attention op - * attn_type: encoder attention, decoder self-attention, - encoder/decoder cross-attention, encoder-only - - Returns: - - * Appropriate sequence-lengths tensors for query and key - * Appropriate max sequence-length scalar - * Causal masking flag - ''' - - if attn_type == AttentionType.ENCODER: - assert attn_metadata.encoder_seq_lens is not None - assert attn_metadata.encoder_seq_lens_tensor is not None - query_seq_start_loc = torch.tensor( - list(itertools.accumulate([0] + attn_metadata.encoder_seq_lens)), - device=attn_metadata.encoder_seq_lens_tensor.device, - dtype=attn_metadata.encoder_seq_lens_tensor.dtype) - causal_mask = False - - # No block tables associated with encoder attention - return (query_seq_start_loc, attn_metadata.max_encoder_seq_len, - query_seq_start_loc, attn_metadata.max_encoder_seq_len, - attn_metadata.encoder_seq_lens, causal_mask) - - elif attn_type == AttentionType.ENCODER_ONLY: - # For encoder-only models, we use the prefill sequence lengths - assert attn_metadata.seq_lens is not None - assert attn_metadata.seq_lens_tensor is not None - query_seq_start_loc = torch.tensor( - list(itertools.accumulate([0] + attn_metadata.seq_lens)), - device=attn_metadata.seq_lens_tensor.device, - dtype=attn_metadata.seq_lens_tensor.dtype) - max_seq_len = attn_metadata.max_prefill_seq_len - # Encoder-only models typically use bidirectional attention - causal_mask = False - - return (query_seq_start_loc, max_seq_len, query_seq_start_loc, - max_seq_len, attn_metadata.seq_lens, causal_mask) - - elif attn_type == AttentionType.DECODER: - # Decoder self-attention - # Choose max_seq_len based on whether we are in prompt_run - assert attn_metadata.seq_lens is not None - assert attn_metadata.seq_lens_tensor is not None - query_seq_start_loc = torch.tensor( - list(itertools.accumulate([0] + attn_metadata.seq_lens)), - device=attn_metadata.seq_lens_tensor.device, - dtype=attn_metadata.seq_lens_tensor.dtype) - max_seq_len = attn_metadata.max_prefill_seq_len - causal_mask = True - - return (query_seq_start_loc, max_seq_len, query_seq_start_loc, - max_seq_len, attn_metadata.seq_lens, causal_mask) - elif attn_type == AttentionType.ENCODER_DECODER: - assert attn_metadata.seq_lens is not None - assert attn_metadata.encoder_seq_lens_tensor is not None - query_start_loc = torch.tensor( - list(itertools.accumulate([0] + attn_metadata.seq_lens)), - device=attn_metadata.encoder_seq_lens_tensor.device, - dtype=attn_metadata.encoder_seq_lens_tensor.dtype) - - assert attn_metadata.encoder_seq_lens is not None - assert attn_metadata.seq_lens_tensor is not None - key_seq_start_loc = torch.tensor( - list(itertools.accumulate([0] + attn_metadata.encoder_seq_lens)), - device=attn_metadata.seq_lens_tensor.device, - dtype=attn_metadata.seq_lens_tensor.dtype) - causal_mask = False - - # Enc/dec cross-attention KVs match encoder sequence length; - # cross-attention utilizes special "cross" block tables - return (query_start_loc, attn_metadata.max_prefill_seq_len, - key_seq_start_loc, attn_metadata.max_encoder_seq_len, - attn_metadata.seq_lens, causal_mask) - else: - raise AttributeError(f"Invalid attention type {str(attn_type)}") - - -class ROCmFlashAttentionImpl(AttentionImpl): - """ - If the input tensors contain prompt tokens, the layout is as follows: - |<--------------- num_prompt_tokens -------------->| - |<--prompt_0-->|<--prompt_1-->|...|<--prompt_N-1-->| - - Otherwise, the layout is as follows: - |<------------------ num_generation_tokens (M) ----------------->| - |<--generation_0-->|..........|<--generation_M-1-->|<--padding-->| - - Generation tokens can contain padding when cuda-graph is used. - Currently, prompt tokens don't contain any padding. - - The prompts might have different lengths, while the generation tokens - always have length 1. - - If chunked prefill is enabled, prefill tokens and decode tokens can be - batched together in a flattened 1D query. - - |<----- num_prefill_tokens ---->|<------- num_decode_tokens ----------->| - |<-prompt_0->|...|<-prompt_N-1->|<-generation_0->|...|<-generation_M-1->| - - Currently, cuda graph is disabled for chunked prefill, meaning there's no - padding between prefill and decode tokens. - """ - - def __init__( - self, - num_heads: int, - head_size: int, - scale: float, - num_kv_heads: int, - alibi_slopes: Optional[List[float]], - sliding_window: Optional[int], - kv_cache_dtype: str, - logits_soft_cap: Optional[float] = None, - attn_type: str = AttentionType.DECODER, - kv_sharing_target_layer_name: Optional[str] = None, - use_irope: bool = False, - ) -> None: - if kv_sharing_target_layer_name is not None: - raise NotImplementedError("KV sharing is not supported in V0 " - "ROCM_FLASH backend.") - if use_irope: - logger.warning_once( - "Using irope in ROCm Flash Attention is not supported yet, it " - "will fail back to global attention for long context.") - if use_irope: - logger.warning( - "Using irope in V0 is not supported yet, it will fall back " - "to global attention for long context.") - if logits_soft_cap is None: - # In flash-attn, setting logits_soft_cap as 0 means no soft cap. - self.logits_soft_cap = 0.0 - else: - self.logits_soft_cap = logits_soft_cap - self.attn_type = attn_type - self.num_heads = num_heads - self.head_size = head_size - self.scale = float(scale) - self.num_kv_heads = num_kv_heads - if alibi_slopes is not None: - alibi_slopes = torch.tensor(alibi_slopes, dtype=torch.float32) - self.alibi_slopes = alibi_slopes - self.sliding_window = ((sliding_window, sliding_window) - if sliding_window is not None else (-1, -1)) - self.kv_cache_dtype = kv_cache_dtype - - self.num_queries_per_kv = self.num_heads // self.num_kv_heads - - self.paged_attn_module = _get_paged_attn_module() - supported_head_sizes = self.paged_attn_module.get_supported_head_sizes( - ) - - if head_size not in supported_head_sizes: - raise ValueError( - f"Head size {head_size} is not supported by PagedAttention. " - f"Supported head sizes are: {supported_head_sizes}.") - - self.use_naive_attn = False - # NOTE: Allow for switching between Triton and CK. Defaulting to triton. - self.use_triton_flash_attn = envs.VLLM_USE_TRITON_FLASH_ATTN - if self.use_triton_flash_attn: - if logits_soft_cap is not None: - raise ValueError( - "ROCm Triton FlashAttention does not support attention" - " logits soft capping." - " please try using the ROCm CK " - "FA backend instead by setting the env var " - "`VLLM_USE_TRITON_FLASH_ATTN=0`") - - from vllm.attention.ops.triton_flash_attention import ( # noqa: F401 - triton_attention) - self.triton_attn_func = triton_attention - logger.debug("Using Triton FA in ROCmBackend") - if self.sliding_window != (-1, -1): - logger.warning("ROCm Triton FA does not currently support " - "sliding window attention. If using half " - "precision, please try using the ROCm CK " - "FA backend instead by setting the env var " - "`VLLM_USE_TRITON_FLASH_ATTN=0`") - else: - # if not using triton, navi3x/navi21/navi10 do not use flash-attn - # either - if not current_platform.has_device_capability(90): - self.use_naive_attn = True - else: - try: - from flash_attn import flash_attn_varlen_func # noqa: F401 - self.fa_attn_func = flash_attn_varlen_func - logger.debug("Using CK FA in ROCmBackend") - except ModuleNotFoundError: - self.use_naive_attn = True - - if self.use_naive_attn: - if logits_soft_cap is not None: - raise ValueError( - "ROCm Naive FlashAttention does not support " - "attention logits soft capping.") - - self.sdpa_attn_func = _sdpa_attention - logger.debug("Using naive (SDPA) attention in ROCmBackend") - - self.aiter_kv_scales_initialized = False - self.force_fp8_attention = ( - get_current_vllm_config() is not None - and get_current_vllm_config().model_config.override_attention_dtype - == "fp8") - - def repeat_kv(self, x: torch.Tensor, n_rep: int) -> torch.Tensor: - """torch.repeat_interleave(x, dim=1, repeats=n_rep)""" - tokens, n_kv_heads, head_dim = x.shape - return (x[:, :, - None, :].expand(tokens, n_kv_heads, n_rep, - head_dim).reshape(tokens, n_kv_heads * n_rep, - head_dim)) - - def fused_output_quant_supported(self, quant_key: QuantKey): - if self.use_triton_flash_attn: - return quant_key == kFp8StaticTensorSym - - # Only supported in the Triton backend - return False - - def forward( - self, - layer: AttentionLayer, - query: torch.Tensor, - key: torch.Tensor, - value: torch.Tensor, - kv_cache: torch.Tensor, - attn_metadata: ROCmFlashAttentionMetadata, - output: Optional[torch.Tensor] = None, - output_scale: Optional[torch.Tensor] = None, - output_block_scale: Optional[torch.Tensor] = None, - ) -> torch.Tensor: - """Forward pass with FlashAttention and PagedAttention. - - For decoder-only models: query, key and value must be non-None. - - For encoder/decoder models: - * ROCmFlashAttentionImpl.forward() may be invoked for both self- and - cross-attention layers. - * For self-attention: query, key and value must be non-None. - * For cross-attention: - * Query must be non-None - * During prefill, key and value must be non-None; key and value - get cached for use during decode. - * During decode, key and value may be None, since: - (1) key and value tensors were cached during prefill, and - (2) cross-attention key and value tensors do not grow during - decode - - A note on how the attn_type (attention type enum) argument impacts - attention forward() behavior: - - * DECODER: normal decoder-only behavior; - use decoder self-attention block table - * ENCODER: no KV caching; pass encoder sequence - attributes (encoder_seq_lens/encoder_seq_lens_tensor/ - max_encoder_seq_len) to kernel, in lieu of decoder - sequence attributes (seq_lens/seq_lens_tensor/max_seq_len) - * ENCODER_DECODER: cross-attention behavior; - use cross-attention block table for caching KVs derived - from encoder hidden states; since KV sequence lengths - will match encoder sequence lengths, pass encoder sequence - attributes to kernel (encoder_seq_lens/encoder_seq_lens_tensor/ - max_encoder_seq_len) - * ENCODER_ONLY: bidirectional attention with no KV caching; - use prefill sequence attributes - - Args: - layer: Attention layer instance. - query: shape = [num_tokens, num_heads * head_size] - key: shape = [num_tokens, num_kv_heads * head_size] - value: shape = [num_tokens, num_kv_heads * head_size] - kv_cache: KV cache tensor with shape - [2, num_blocks, block_size * num_kv_heads * head_size]. - NOTE: kv_cache will be an empty tensor with shape [0] - for profiling run. - attn_metadata: Metadata for attention. - output: Optional output tensor. - output_scale: Optional output scale tensor. - output_block_scale: Optional output block scale tensor. - Returns: - shape = [num_tokens, num_heads * head_size] - """ - assert output is not None, "Output tensor must be provided." - - if output_scale is not None and not self.use_triton_flash_attn: - raise NotImplementedError( - "fused output quantization only supported for Triton" - " implementation in ROCMFlashAttentionImpl for now") - - if output_block_scale is not None: - raise NotImplementedError( - "fused nvfp4 output quantization is not supported" - " for ROCMFlashAttentionImpl") - - query = query.view(-1, self.num_heads, self.head_size) - if key is not None: - assert value is not None - key = key.view(-1, self.num_kv_heads, self.head_size) - value = value.view(-1, self.num_kv_heads, self.head_size) - else: - assert value is None - - paged_attn = self.paged_attn_module - - # Reshaping kv tensors is required for AITER paged attention kernel - # because it works on a different tensor shape, - # when the size of one element is one byte (int8/fp8 dtypes). - # This reshaping is only required on the first forward call - # and the kv cache must not be empty. - if (is_rocm_aiter_paged_attn_enabled() and kv_cache.dtype.itemsize == 1 - and not self.aiter_kv_scales_initialized - and kv_cache.shape != torch.Size([0])): - num_blocks = kv_cache.shape[1] - block_size = kv_cache.shape[2] // (self.num_kv_heads * - self.head_size) - k_scale = torch.empty((self.num_kv_heads, num_blocks * block_size), - dtype=torch.float32, - device=kv_cache.device) - v_scale = torch.empty((self.num_kv_heads, num_blocks * block_size), - dtype=torch.float32, - device=kv_cache.device) - self.aiter_kv_scales_initialized = True - k_scale.fill_(layer._k_scale.item()) - v_scale.fill_(layer._v_scale.item()) - layer._k_scale = k_scale - layer._v_scale = v_scale - - # Only update KV cache for decoder self-attention - # and encoder-decoder cross-attention - if self.attn_type not in [ - AttentionType.ENCODER, AttentionType.ENCODER_ONLY - ] and kv_cache.numel() > 0: - key_cache, value_cache = paged_attn.split_kv_cache( - kv_cache, self.num_kv_heads, self.head_size) - - if key is not None and value is not None: - # Reshape the input keys and values and store them in the - # cache. If kv_cache is not provided, the new key and value - # tensors are not cached. This happens during the initial - # memory profiling run. - paged_attn.write_to_paged_cache( - key, - value, - key_cache, - value_cache, - attn_metadata.slot_mapping - if self.attn_type != AttentionType.ENCODER_DECODER else - attn_metadata.cross_slot_mapping, - self.kv_cache_dtype, - layer._k_scale, - layer._v_scale, - ) - - if self.attn_type != AttentionType.ENCODER: - num_prefill_tokens = attn_metadata.num_prefill_tokens - elif self.attn_type == AttentionType.ENCODER_ONLY: - # For encoder-only models, all tokens are processed in one go - num_prefill_tokens = query.shape[0] - else: - assert attn_metadata.num_encoder_tokens is not None - num_prefill_tokens = attn_metadata.num_encoder_tokens - - # Query for decode. KV is not needed because it is already cached. - decode_query = query[num_prefill_tokens:] - # QKV for prefill. - query = query[:num_prefill_tokens] - - # For encoder-only and encoder models, - # we process all tokens at once - # For decoder and encoder-decoder, - # we may need to limit key/value to prefill tokens - if key is not None and value is not None \ - and self.attn_type not in [AttentionType.ENCODER_DECODER, - AttentionType.ENCODER_ONLY]: - key = key[:num_prefill_tokens] - value = value[:num_prefill_tokens] - - if prefill_meta := attn_metadata.prefill_metadata: - # Prompt run. - # normal attention and DECODER - if self.attn_type == AttentionType.DECODER and ( - kv_cache.numel() == 0 or prefill_meta.block_tables is None - or prefill_meta.block_tables.numel() == 0): - (query_seq_start_loc, query_max_seq_len, key_seq_start_loc, - key_max_seq_len, seq_lens, - causal_mask) = (prefill_meta.seq_start_loc, - prefill_meta.max_prefill_seq_len, - prefill_meta.seq_start_loc, - prefill_meta.max_prefill_seq_len, - attn_metadata.seq_lens, True) - # prefix-enabled attention and ENCODER/ENCODER_DECODER - else: - (query_seq_start_loc, query_max_seq_len, key_seq_start_loc, - key_max_seq_len, seq_lens, - causal_mask) = _get_seq_len_block_table_args( - prefill_meta, self.attn_type) - # Prompt run. - if kv_cache.numel() == 0 or prefill_meta.block_tables.numel() == 0: - # triton attention - # When block_tables are not filled, it means q and k are the - # prompt, and they have the same length. - attn_masks = None - if self.use_triton_flash_attn: - if self.alibi_slopes is not None: - attn_masks = _make_alibi_bias( - self.alibi_slopes, - query.dtype, - seq_lens, - make_attn_mask=causal_mask) # type: ignore - - use_fp8_scales = (layer._q_scale and layer._k_scale - and layer._v_scale and layer._prob_scale - and (self.kv_cache_dtype == "fp8" - or self.force_fp8_attention)) - - full_scales = ( - layer._q_scale.item(), layer._k_scale.item(), - layer._v_scale.item(), - layer._prob_scale.item()) if use_fp8_scales else None - self.triton_attn_func( - query, - key, - value, - output[:num_prefill_tokens], - query_seq_start_loc, - key_seq_start_loc, - query_max_seq_len, - key_max_seq_len, - causal_mask, - self.scale, - attn_masks[0][None] - if attn_masks is not None else None, - full_scales, - output_scale, - ) - elif self.use_naive_attn: - if self.num_kv_heads != self.num_heads: - # Interleave for MQA workaround. - key = self.repeat_kv(key, self.num_queries_per_kv) - value = self.repeat_kv(value, self.num_queries_per_kv) - if self.alibi_slopes is not None: - attn_masks = _make_alibi_bias( - self.alibi_slopes, - query.dtype, - attn_metadata.seq_lens, - make_attn_mask=causal_mask) # type: ignore - query = query.movedim(0, query.dim() - 2) - key = key.movedim(0, key.dim() - 2) - value = value.movedim(0, value.dim() - 2) - # sdpa math backend attention - self.sdpa_attn_func( - query, - key, - value, - output[:num_prefill_tokens], - query_seq_start_loc, - num_prefill_tokens, - self.num_heads, - self.head_size, - self.scale, - attn_masks, - ) - else: - # upstream FA does not support an output arg, copy - output[:num_prefill_tokens] = self.fa_attn_func( - q=query, - k=key, - v=value, - cu_seqlens_q=query_seq_start_loc, - cu_seqlens_k=key_seq_start_loc, - max_seqlen_q=prefill_meta.max_prefill_seq_len, - max_seqlen_k=key_max_seq_len, - softmax_scale=self.scale, - causal=causal_mask, - window_size=self.sliding_window, - alibi_slopes=self.alibi_slopes, - softcap=self.logits_soft_cap, - ) - - else: - # prefix-enabled attention - - # not applicable for encoder-only models - if self.attn_type != AttentionType.ENCODER_ONLY: - output[:num_prefill_tokens] = paged_attn.forward_prefix( - query, - key, - value, - self.kv_cache_dtype, - key_cache, - value_cache, - prefill_meta.block_tables, - prefill_meta.query_start_loc, - prefill_meta.seq_lens_tensor, - prefill_meta.max_query_len, - self.alibi_slopes, - self.sliding_window[0], - layer._k_scale, - layer._v_scale, - ) - # Skip decode phase for encoder-only models - if (decode_meta := attn_metadata.decode_metadata) and ( - self.attn_type != AttentionType.ENCODER_ONLY): - # Decoding run. - # Whether to use rocm custom paged attention or not - num_seqs, num_heads, head_size = decode_query.shape - block_size = value_cache.shape[3] - gqa_ratio = num_heads // self.num_kv_heads - from vllm.platforms.rocm import use_rocm_custom_paged_attention - use_custom = use_rocm_custom_paged_attention( - decode_query.dtype, head_size, block_size, gqa_ratio, - decode_meta.max_decode_seq_len, self.sliding_window, - self.kv_cache_dtype, self.alibi_slopes) - - if use_custom: - max_seq_len = (decode_meta.max_decode_seq_len if self.attn_type - != AttentionType.ENCODER_DECODER else - decode_meta.max_encoder_seq_len) - assert max_seq_len is not None - max_num_partitions = ( - (max_seq_len + _PARTITION_SIZE_ROCM - 1) // - _PARTITION_SIZE_ROCM) - assert _PARTITION_SIZE_ROCM % block_size == 0 - tmp_output = torch.empty( - size=(num_seqs, num_heads, max_num_partitions, head_size), - dtype=query.dtype, - device=output.device, - ) - exp_sums = torch.empty( - size=(num_seqs, num_heads, max_num_partitions), - dtype=torch.float32, - device=output.device, - ) - max_logits = torch.empty_like(exp_sums) - - query_start_loc = None - ops.paged_attention_rocm( - output[num_prefill_tokens:], - exp_sums, - max_logits, - tmp_output, - decode_query, - key_cache, - value_cache, - self.num_kv_heads, - self.scale, - decode_meta.block_tables - if self.attn_type != AttentionType.ENCODER_DECODER else - decode_meta.cross_block_tables, - decode_meta.seq_lens_tensor - if self.attn_type != AttentionType.ENCODER_DECODER else - decode_meta.encoder_seq_lens_tensor, - query_start_loc, - block_size, - max_seq_len, - self.alibi_slopes, - self.kv_cache_dtype, - layer._k_scale, - layer._v_scale, - output_scale, - ) - else: - # PagedAttention does not support fused quant, manually quantize - if output_scale is None: - out_pa = output[num_prefill_tokens:] - else: - out_pa = torch.empty_like(output[num_prefill_tokens:], - dtype=query.dtype) - - out_pa[:] = paged_attn.forward_decode( - decode_query, - key_cache, - value_cache, - decode_meta.block_tables - if self.attn_type != AttentionType.ENCODER_DECODER else - decode_meta.cross_block_tables, - decode_meta.seq_lens_tensor - if self.attn_type != AttentionType.ENCODER_DECODER else - decode_meta.encoder_seq_lens_tensor, - decode_meta.max_decode_seq_len - if self.attn_type != AttentionType.ENCODER_DECODER else - decode_meta.max_encoder_seq_len, - self.kv_cache_dtype, - self.num_kv_heads, - self.scale, - self.alibi_slopes, - layer._k_scale, - layer._v_scale, - ) - - # Manually perform quantization - if output_scale is not None: - out_uq = out_pa.view(-1, self.num_heads * self.head_size) - out_q = output.view(-1, self.num_heads * self.head_size) - ops.scaled_fp8_quant(out_uq, - output_scale, - output=out_q[num_prefill_tokens:]) - - # Reshape the output tensor. - return output.view(-1, self.num_heads * self.head_size) - - -def _sdpa_attention( - query: torch.Tensor, - key: torch.Tensor, - value: torch.Tensor, - output: torch.Tensor, - seq_lens: torch.Tensor, - num_tokens: int, - num_heads: int, - head_size: int, - scale: float, - attn_masks: Optional[List[torch.Tensor]] = None, -) -> torch.Tensor: - start = 0 - assert output.shape == (num_tokens, num_heads, head_size) - assert output.dtype == query.dtype - assert output.device == query.device - - for i, seq_len in enumerate(seq_lens): - end = start + seq_len - with torch.nn.attention.sdpa_kernel( - torch.nn.attention.SDPBackend.MATH): - sub_out = torch.nn.functional.scaled_dot_product_attention( - query[:, start:end, :], - key[:, start:end, :], - value[:, start:end, :], - dropout_p=0.0, - is_causal=attn_masks is None, - attn_mask=attn_masks[i] if attn_masks else None, - scale=scale).movedim(query.dim() - 2, 0) - output[start:end, :, :] = sub_out - start = end - - return output diff --git a/vllm/attention/backends/triton_mla.py b/vllm/attention/backends/triton_mla.py deleted file mode 100644 index fba5b5f6bca8..000000000000 --- a/vllm/attention/backends/triton_mla.py +++ /dev/null @@ -1,111 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project - -from typing import List, Optional, Type - -import torch - -from vllm.attention.backends.abstract import (AttentionType, - is_quantized_kv_cache) -from vllm.attention.backends.mla.common import (MLACommonBackend, - MLACommonImpl, - MLACommonMetadata) -from vllm.attention.ops.triton_decode_attention import decode_attention_fwd - - -class TritonMLABackend(MLACommonBackend): - - @staticmethod - def get_name() -> str: - return "TRITON_MLA" - - @staticmethod - def get_impl_cls() -> Type["TritonMLAImpl"]: - return TritonMLAImpl - - -class TritonMLAImpl(MLACommonImpl[MLACommonMetadata]): - - def __init__( - self, - num_heads: int, - head_size: int, - scale: float, - num_kv_heads: int, - alibi_slopes: Optional[List[float]], - sliding_window: Optional[int], - kv_cache_dtype: str, - logits_soft_cap: Optional[float], - attn_type: str, - kv_sharing_target_layer_name: Optional[str], - # MLA Specific Arguments - **mla_args) -> None: - super().__init__(num_heads, head_size, scale, num_kv_heads, - alibi_slopes, sliding_window, kv_cache_dtype, - logits_soft_cap, attn_type, - kv_sharing_target_layer_name, **mla_args) - - unsupported_features = [alibi_slopes, sliding_window, logits_soft_cap] - if any(unsupported_features): - raise NotImplementedError( - "TritonMLAImpl does not support one of the following: " - "alibi_slopes, sliding_window, logits_soft_cap") - - if attn_type != AttentionType.DECODER: - raise NotImplementedError("Encoder self-attention and " - "encoder/decoder cross-attention " - "are not implemented for " - "TritonMLAImpl") - - if is_quantized_kv_cache(self.kv_cache_dtype): - raise NotImplementedError( - "TritonMLA with FP8 KV cache not yet supported") - - def _forward_decode( - self, - q_nope: torch.Tensor, - q_pe: torch.Tensor, - kv_c_and_k_pe_cache: torch.Tensor, - attn_metadata: MLACommonMetadata, - ) -> torch.Tensor: - assert kv_c_and_k_pe_cache.numel() > 0 - - decode_meta = attn_metadata.decode_metadata - assert decode_meta is not None - B = q_nope.shape[0] - - q = torch.cat([q_nope, q_pe], dim=-1) - o = torch.zeros(B, - self.num_heads, - self.kv_lora_rank, - dtype=q.dtype, - device=q.device) - - num_kv_splits = 4 # TODO: heuristic - - # TODO(lucas) Allocate ahead of time - attn_logits = torch.empty( - ( - B, - self.num_heads, - num_kv_splits, - # NOTE(lucas) idk why the +1 is here but sglang has it so we - # just mirror that - self.kv_lora_rank + 1, - ), - dtype=torch.float32, - device=q.device, - ) - - # Add a head dim of 1 - kv_c_and_k_pe_cache = kv_c_and_k_pe_cache.unsqueeze(2) - kv_c_cache = kv_c_and_k_pe_cache[..., :self.kv_lora_rank] - PAGE_SIZE = kv_c_and_k_pe_cache.size(1) - - # Run MQA - decode_attention_fwd(q, kv_c_and_k_pe_cache, kv_c_cache, o, - decode_meta.block_tables, - decode_meta.seq_lens_tensor, attn_logits, - num_kv_splits, self.scale, PAGE_SIZE) - - return self._v_up_proj(o) diff --git a/vllm/attention/backends/utils.py b/vllm/attention/backends/utils.py index b28e6a4237cb..3f15580872c7 100644 --- a/vllm/attention/backends/utils.py +++ b/vllm/attention/backends/utils.py @@ -338,10 +338,9 @@ def graph_capture_get_metadata_for_batch( # The encoder decoder model works only with XFormers and # Flash Attention backend. Assert the same. assert self.runner.attn_backend.get_name() in \ - ["XFORMERS", "FLASH_ATTN", "ROCM_FLASH"], \ - f"Expected attn_backend name to be either 'XFORMERS'," \ - f"'ROCM_FLASH', or 'FLASH_ATTN', but " \ - f"got '{self.runner.attn_backend.get_name()}'" + ["XFORMERS", "FLASH_ATTN"], \ + f"Expected attn_backend name to be either 'XFORMERS' or " \ + f"'FLASH_ATTN', but got '{self.runner.attn_backend.get_name()}'" self._update_captured_metadata_for_enc_dec_model( batch_size=batch_size, attn_metadata=attn_metadata) @@ -360,10 +359,9 @@ def get_graph_input_buffers( # The encoder decoder model works only with XFormers and # Flash Attention backend. Assert the same. assert self.runner.attn_backend.get_name() in \ - ["XFORMERS", "FLASH_ATTN", "ROCM_FLASH"], \ - f"Expected attn_backend name to be either 'XFORMERS'," \ - f"'ROCM_FLASH', or 'FLASH_ATTN', but " \ - f"got '{self.runner.attn_backend.get_name()}'" + ["XFORMERS", "FLASH_ATTN"], \ + f"Expected attn_backend name to be either 'XFORMERS' or " \ + f"'FLASH_ATTN', but got '{self.runner.attn_backend.get_name()}'" self._add_additional_input_buffers_for_enc_dec_model( attn_metadata=attn_metadata, input_buffers=input_buffers) return input_buffers diff --git a/vllm/attention/backends/xformers.py b/vllm/attention/backends/xformers.py deleted file mode 100644 index 302d3d7ea903..000000000000 --- a/vllm/attention/backends/xformers.py +++ /dev/null @@ -1,805 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project -"""Attention layer with xFormers and PagedAttention.""" -from dataclasses import dataclass -from typing import Dict, List, Optional, Tuple, Type - -import torch -from xformers import ops as xops -from xformers.ops.fmha.attn_bias import (AttentionBias, - BlockDiagonalCausalMask, - BlockDiagonalMask, - LowerTriangularMaskWithTensorBias) - -from vllm.attention.backends.abstract import (AttentionBackend, AttentionImpl, - AttentionLayer, - AttentionMetadata, AttentionType) -from vllm.attention.backends.utils import ( - CommonAttentionState, CommonMetadataBuilder, - get_num_prefill_decode_query_kv_tokens, get_seq_len_block_table_args, - is_all_cross_attn_metadata_set, is_all_encoder_attn_metadata_set) -from vllm.attention.ops.paged_attn import (PagedAttention, - PagedAttentionMetadata) -from vllm.logger import init_logger - -logger = init_logger(__name__) - - -class XFormersBackend(AttentionBackend): - - @staticmethod - def get_name() -> str: - return "XFORMERS" - - @staticmethod - def get_impl_cls() -> Type["XFormersImpl"]: - return XFormersImpl - - @staticmethod - def get_metadata_cls() -> Type["AttentionMetadata"]: - return XFormersMetadata - - @staticmethod - def get_builder_cls() -> Type["XFormersMetadataBuilder"]: - return XFormersMetadataBuilder - - @staticmethod - def get_state_cls() -> Type["CommonAttentionState"]: - return CommonAttentionState - - @staticmethod - def get_kv_cache_shape( - num_blocks: int, - block_size: int, - num_kv_heads: int, - head_size: int, - ) -> Tuple[int, ...]: - return PagedAttention.get_kv_cache_shape(num_blocks, block_size, - num_kv_heads, head_size) - - @staticmethod - def swap_blocks( - src_kv_cache: torch.Tensor, - dst_kv_cache: torch.Tensor, - src_to_dst: Dict[int, int], - ) -> None: - PagedAttention.swap_blocks(src_kv_cache, dst_kv_cache, src_to_dst) - - @staticmethod - def copy_blocks( - kv_caches: List[torch.Tensor], - src_to_dists: torch.Tensor, - ) -> None: - PagedAttention.copy_blocks(kv_caches, src_to_dists) - - -@dataclass -class XFormersMetadata(AttentionMetadata, PagedAttentionMetadata): - """Metadata for XFormersbackend. - - NOTE: Any python object stored here is not updated when it is - cuda-graph replayed. If you have values that need to be changed - dynamically, it should be stored in tensor. The tensor has to be - updated from `CUDAGraphRunner.forward` API. - """ - - # |---------- N-1 iteration --------| - # |---------------- N iteration ---------------------| - # |- tokenA -|......................|-- newTokens ---| - # |---------- context_len ----------| - # |-------------------- seq_len ----------------------| - # |-- query_len ---| - - # seq_lens stored as a tensor. - seq_lens_tensor: Optional[torch.Tensor] - - # FIXME: It is for flash attn. - # Maximum sequence length among prefill batch. 0 if there are decoding - # requests only. - max_prefill_seq_len: int - # Maximum sequence length among decode batch. 0 if there are prefill - # requests only. - max_decode_seq_len: int - - # Whether or not if cuda graph is enabled. - # Cuda-graph is currently enabled for decoding only. - # TODO(woosuk): Move `use_cuda_graph` out since it's unrelated to attention. - use_cuda_graph: bool - - # (batch_size,). The sequence length per sequence. Sequence length means - # the computed tokens + new tokens None if it is a decoding. - seq_lens: Optional[List[int]] = None - - # FIXME: It is for flash attn. - # (batch_size + 1,). The cumulative sequence lengths of the sequences in - # the batch, used to index into sequence. E.g., if the sequence length is - # [4, 6], it is [0, 4, 10]. - seq_start_loc: Optional[torch.Tensor] = None - - # (batch_size,) A tensor of context lengths (tokens that are computed - # so far). - context_lens_tensor: Optional[torch.Tensor] = None - - # Maximum query length in the batch. None for decoding. - max_query_len: Optional[int] = None - - # Max number of query tokens among request in the batch. - max_decode_query_len: Optional[int] = None - - # (batch_size + 1,). The cumulative subquery lengths of the sequences in - # the batch, used to index into subquery. E.g., if the subquery length - # is [4, 6], it is [0, 4, 10]. - query_start_loc: Optional[torch.Tensor] = None - - # Self-attention prefill/decode metadata cache - _cached_prefill_metadata: Optional["XFormersMetadata"] = None - _cached_decode_metadata: Optional["XFormersMetadata"] = None - - # Begin encoder attn & enc/dec cross-attn fields... - - # Encoder sequence lengths representation - encoder_seq_lens: Optional[List[int]] = None - encoder_seq_lens_tensor: Optional[torch.Tensor] = None - # FIXME: It is for flash attn. - # (batch_size + 1,). The cumulative sequence lengths of the sequences in - # the batch, used to index into sequence. E.g., if the sequence length is - # [4, 6], it is [0, 4, 10]. - encoder_seq_start_loc: Optional[torch.Tensor] = None - - # Maximum sequence length among encoder sequences - max_encoder_seq_len: Optional[int] = None - - # Number of tokens input to encoder - num_encoder_tokens: Optional[int] = None - - # Cross-attention memory-mapping data structures: slot mapping - # and block tables - cross_slot_mapping: Optional[torch.Tensor] = None - cross_block_tables: Optional[torch.Tensor] = None - - def __post_init__(self): - # Set during the execution of the first attention op. - # It is a list because it is needed to set per prompt - # when alibi slopes is used. It is because of the limitation - # from xformer API. - # will not appear in the __repr__ and __init__ - self.attn_bias: Optional[List[AttentionBias]] = None - self.encoder_attn_bias: Optional[List[AttentionBias]] = None - self.cross_attn_bias: Optional[List[AttentionBias]] = None - - @property - def is_all_encoder_attn_metadata_set(self): - ''' - All attention metadata required for encoder attention is set. - ''' - return is_all_encoder_attn_metadata_set(self) - - @property - def is_all_cross_attn_metadata_set(self): - ''' - All attention metadata required for enc/dec cross-attention is set. - - Superset of encoder attention required metadata. - ''' - return is_all_cross_attn_metadata_set(self) - - @property - def prefill_metadata(self) -> Optional["XFormersMetadata"]: - if self.num_prefills == 0: - return None - - if self._cached_prefill_metadata is not None: - # Recover cached prefill-phase attention - # metadata structure - return self._cached_prefill_metadata - - assert ((self.seq_lens is not None) - or (self.encoder_seq_lens is not None)) - assert ((self.seq_lens_tensor is not None) - or (self.encoder_seq_lens_tensor is not None)) - - # Compute some attn_metadata fields which default to None - query_start_loc = (None if self.query_start_loc is None else - self.query_start_loc[:self.num_prefills + 1]) - seq_start_loc = (None if self.seq_start_loc is None else - self.seq_start_loc[:self.num_prefills + 1]) - slot_mapping = (None if self.slot_mapping is None else - self.slot_mapping[:self.num_prefill_tokens]) - seq_lens = (None if self.seq_lens is None else - self.seq_lens[:self.num_prefills]) - seq_lens_tensor = (None if self.seq_lens_tensor is None else - self.seq_lens_tensor[:self.num_prefills]) - context_lens_tensor = (None if self.context_lens_tensor is None else - self.context_lens_tensor[:self.num_prefills]) - block_tables = (None if self.block_tables is None else - self.block_tables[:self.num_prefills]) - - # Construct & cache prefill-phase attention metadata structure - self._cached_prefill_metadata = XFormersMetadata( - num_prefills=self.num_prefills, - num_prefill_tokens=self.num_prefill_tokens, - num_decode_tokens=0, - slot_mapping=slot_mapping, - multi_modal_placeholder_index_maps=self. - multi_modal_placeholder_index_maps, - enable_kv_scales_calculation=self.enable_kv_scales_calculation, - seq_lens=seq_lens, - seq_lens_tensor=seq_lens_tensor, - max_query_len=self.max_query_len, - max_prefill_seq_len=self.max_prefill_seq_len, - max_decode_seq_len=0, - query_start_loc=query_start_loc, - seq_start_loc=seq_start_loc, - context_lens_tensor=context_lens_tensor, - block_tables=block_tables, - use_cuda_graph=False, - # Begin encoder & cross attn fields below... - encoder_seq_lens=self.encoder_seq_lens, - encoder_seq_lens_tensor=self.encoder_seq_lens_tensor, - max_encoder_seq_len=self.max_encoder_seq_len, - cross_slot_mapping=self.cross_slot_mapping, - cross_block_tables=self.cross_block_tables) - return self._cached_prefill_metadata - - @property - def decode_metadata(self) -> Optional["XFormersMetadata"]: - if self.num_decode_tokens == 0: - return None - - if self._cached_decode_metadata is not None: - # Recover cached decode-phase attention - # metadata structure - return self._cached_decode_metadata - assert ((self.seq_lens_tensor is not None) - or (self.encoder_seq_lens_tensor is not None)) - - # Compute some attn_metadata fields which default to None - slot_mapping = (None if self.slot_mapping is None else - self.slot_mapping[self.num_prefill_tokens:]) - seq_lens_tensor = (None if self.seq_lens_tensor is None else - self.seq_lens_tensor[self.num_prefills:]) - block_tables = (None if self.block_tables is None else - self.block_tables[self.num_prefills:]) - - # Construct & cache decode-phase attention metadata structure - self._cached_decode_metadata = XFormersMetadata( - num_prefills=0, - num_prefill_tokens=0, - num_decode_tokens=self.num_decode_tokens, - slot_mapping=slot_mapping, - multi_modal_placeholder_index_maps=None, - enable_kv_scales_calculation=True, - seq_lens_tensor=seq_lens_tensor, - max_prefill_seq_len=0, - max_decode_seq_len=self.max_decode_seq_len, - block_tables=block_tables, - use_cuda_graph=self.use_cuda_graph, - # Begin encoder & cross attn fields below... - encoder_seq_lens=self.encoder_seq_lens, - encoder_seq_lens_tensor=self.encoder_seq_lens_tensor, - max_encoder_seq_len=self.max_encoder_seq_len, - cross_slot_mapping=self.cross_slot_mapping, - cross_block_tables=self.cross_block_tables) - - # Batch may be composed of prefill|decodes, adjust query start indices - # to refer to the start of decodes when the two are split apart. - # E.g. in tokens:[3 prefills|6 decodes], query_start_loc=[3,9] => [0,6]. - if self._cached_decode_metadata.query_start_loc is not None: - qs = self._cached_decode_metadata.query_start_loc - self._cached_decode_metadata.query_start_loc = qs - qs[0] - return self._cached_decode_metadata - - -def _get_attn_bias( - attn_metadata: XFormersMetadata, - attn_type: str, -) -> Optional[AttentionBias]: - ''' - Extract appropriate attention bias from attention metadata - according to attention type. - - Arguments: - - * attn_metadata: Attention metadata structure associated with attention - * attn_type: encoder attention, decoder self-attention, - encoder/decoder cross-attention - - Returns: - * Appropriate attention bias value given the attention type - ''' - - if (attn_type == AttentionType.DECODER - or attn_type == AttentionType.ENCODER_ONLY): - return attn_metadata.attn_bias - elif attn_type == AttentionType.ENCODER: - return attn_metadata.encoder_attn_bias - elif attn_type == AttentionType.ENCODER_DECODER: - return attn_metadata.cross_attn_bias - else: - raise AttributeError(f"Invalid attention type {str(attn_type)}") - - -def _set_attn_bias( - attn_metadata: XFormersMetadata, - attn_bias: List[Optional[AttentionBias]], - attn_type: str, -) -> None: - ''' - Update appropriate attention bias field of attention metadata, - according to attention type. - - Arguments: - - * attn_metadata: Attention metadata structure associated with attention - * attn_bias: The desired attention bias value - * attn_type: encoder attention, decoder self-attention, - encoder/decoder cross-attention - ''' - - if (attn_type == AttentionType.DECODER - or attn_type == AttentionType.ENCODER_ONLY): - attn_metadata.attn_bias = attn_bias - elif attn_type == AttentionType.ENCODER: - attn_metadata.encoder_attn_bias = attn_bias - elif attn_type == AttentionType.ENCODER_DECODER: - attn_metadata.cross_attn_bias = attn_bias - else: - raise AttributeError(f"Invalid attention type {str(attn_type)}") - - -class XFormersMetadataBuilder(CommonMetadataBuilder[XFormersMetadata]): - - _metadata_cls = XFormersMetadata - - -class XFormersImpl(AttentionImpl[XFormersMetadata]): - """ - If the input tensors contain prompt tokens, the layout is as follows: - |<--------------- num_prefill_tokens ----------------->| - |<--prefill_0-->|<--prefill_1-->|...|<--prefill_N-1--->| - - Otherwise, the layout is as follows: - |<----------------- num_decode_tokens ------------------>| - |<--decode_0-->|..........|<--decode_M-1-->|<--padding-->| - - Generation tokens can contain padding when cuda-graph is used. - Currently, prompt tokens don't contain any padding. - - The prompts might have different lengths, while the generation tokens - always have length 1. - - If chunked prefill is enabled, prefill tokens and decode tokens can be - batched together in a flattened 1D query. - - |<----- num_prefill_tokens ---->|<------- num_decode_tokens --------->| - |<-prefill_0->|...|<-prefill_N-1->|<--decode_0-->|...|<--decode_M-1-->| - - Currently, cuda graph is disabled for chunked prefill, meaning there's no - padding between prefill and decode tokens. - """ - - def __init__( - self, - num_heads: int, - head_size: int, - scale: float, - num_kv_heads: int, - alibi_slopes: Optional[List[float]], - sliding_window: Optional[int], - kv_cache_dtype: str, - logits_soft_cap: Optional[float] = None, - attn_type: str = AttentionType.DECODER, - kv_sharing_target_layer_name: Optional[str] = None, - use_irope: bool = False, - ) -> None: - if kv_sharing_target_layer_name is not None: - raise NotImplementedError("KV sharing is not supported in V0 " - "XFORMERS backend.") - if logits_soft_cap is not None: - logger.warning_once("XFormers does not support logits soft cap. " - "Outputs may be slightly off.") - if use_irope: - logger.warning_once( - "Using irope in XFormers is not supported yet, it will fall" - " back to global attention for long context.") - self.num_heads = num_heads - self.head_size = head_size - self.scale = float(scale) - self.num_kv_heads = num_kv_heads - if alibi_slopes is not None: - alibi_slopes = torch.tensor(alibi_slopes, dtype=torch.float32) - self.alibi_slopes = alibi_slopes - self.sliding_window = sliding_window - self.kv_cache_dtype = kv_cache_dtype - - self.num_queries_per_kv = self.num_heads // self.num_kv_heads - - supported_head_sizes = PagedAttention.get_supported_head_sizes() - if head_size not in supported_head_sizes: - raise ValueError( - f"Head size {head_size} is not supported by PagedAttention. " - f"Supported head sizes are: {supported_head_sizes}.") - - self.attn_type = attn_type - - def forward( - self, - layer: AttentionLayer, - query: torch.Tensor, - key: Optional[torch.Tensor], - value: Optional[torch.Tensor], - kv_cache: torch.Tensor, - attn_metadata: "XFormersMetadata", - output: Optional[torch.Tensor] = None, - output_scale: Optional[torch.Tensor] = None, - output_block_scale: Optional[torch.Tensor] = None, - ) -> torch.Tensor: - """Forward pass with xFormers and PagedAttention. - - For decoder-only models: query, key and value must be non-None. - - For encoder/decoder models: - * XFormersImpl.forward() may be invoked for both self- and cross- - attention layers. - * For self-attention: query, key and value must be non-None. - * For cross-attention: - * Query must be non-None - * During prefill, key and value must be non-None; key and value - get cached for use during decode. - * During decode, key and value may be None, since: - (1) key and value tensors were cached during prefill, and - (2) cross-attention key and value tensors do not grow during - decode - - A note on how the attn_type (attention type enum) argument impacts - attention forward() behavior: - - * DECODER: normal decoder-only behavior; - use decoder self-attention block table - * ENCODER: no KV caching; pass encoder sequence - attributes (encoder_seq_lens/encoder_seq_lens_tensor/ - max_encoder_seq_len) to kernel, in lieu of decoder - sequence attributes (seq_lens/seq_lens_tensor/max_seq_len). - Used for encoder branch of encoder-decoder models. - * ENCODER_ONLY: no kv_caching, uses the normal attention - attributes (seq_lens/seq_lens_tensor/max_seq_len). - * ENCODER_DECODER: cross-attention behavior; - use cross-attention block table for caching KVs derived - from encoder hidden states; since KV sequence lengths - will match encoder sequence lengths, pass encoder sequence - attributes to kernel (encoder_seq_lens/encoder_seq_lens_tensor/ - max_encoder_seq_len) - - Args: - layer: Attention layer instance. - query: shape = [num_tokens, num_heads * head_size] - key: shape = [num_tokens, num_kv_heads * head_size] - value: shape = [num_tokens, num_kv_heads * head_size] - kv_cache: KV cache tensor with shape - [2, num_blocks, block_size * num_kv_heads * head_size]. - NOTE: kv_cache will be an empty tensor with shape [0] - for profiling run. - attn_metadata: Metadata for attention. - output: Optional output tensor. - output_scale: Optional output scale tensor. - output_block_scale: Optional output block scale tensor. - Returns: - shape = [num_tokens, num_heads * head_size] - """ - if output_scale is not None or output_block_scale is not None: - raise NotImplementedError( - "fused output quantization is not yet supported" - " for XFormersImpl") - - attn_type = self.attn_type - # Check that appropriate attention metadata attributes are - # selected for the desired attention type - if (attn_type == AttentionType.ENCODER - and (not attn_metadata.is_all_encoder_attn_metadata_set)): - raise AttributeError("Encoder attention requires setting " - "encoder metadata attributes.") - - elif (attn_type == AttentionType.ENCODER_DECODER - and (not attn_metadata.is_all_cross_attn_metadata_set)): - raise AttributeError("Encoder/decoder cross-attention " - "requires setting cross-attention " - "metadata attributes.") - - query = query.view(-1, self.num_heads, self.head_size) - if key is not None: - assert value is not None - key = key.view(-1, self.num_kv_heads, self.head_size) - value = value.view(-1, self.num_kv_heads, self.head_size) - else: - assert value is None - - # Self-attention vs. cross-attention will impact - # which KV cache memory-mapping & which - # seqlen datastructures we utilize - - if (attn_type != AttentionType.ENCODER and kv_cache.numel() > 0): - # KV-cache during decoder-self- or - # encoder-decoder-cross-attention, but not - # during encoder attention. - # - # Even if there are no new key/value pairs to cache, - # we still need to break out key_cache and value_cache - # i.e. for later use by paged attention - key_cache, value_cache = PagedAttention.split_kv_cache( - kv_cache, self.num_kv_heads, self.head_size) - - if (key is not None) and (value is not None): - - if attn_type == AttentionType.ENCODER_DECODER: - # Update cross-attention KV cache (prefill-only) - # During cross-attention decode, key & value will be None, - # preventing this IF-statement branch from running - updated_slot_mapping = attn_metadata.cross_slot_mapping - else: - # Update self-attention KV cache (prefill/decode) - updated_slot_mapping = attn_metadata.slot_mapping - - # Reshape the input keys and values and store them in the cache. - # If kv_cache is not provided, the new key and value tensors are - # not cached. This happens during the initial memory - # profiling run. - PagedAttention.write_to_paged_cache( - key, value, key_cache, value_cache, updated_slot_mapping, - self.kv_cache_dtype, layer._k_scale, layer._v_scale) - (num_prefill_query_tokens, num_prefill_kv_tokens, - num_decode_query_tokens) = \ - get_num_prefill_decode_query_kv_tokens(attn_metadata, attn_type) - - output = torch.empty_like(query) - # Query for decode. KV is not needed because it is already cached. - decode_query = query[num_prefill_query_tokens:] - # QKV for prefill. - query = query[:num_prefill_query_tokens] - if key is not None and value is not None: - key = key[:num_prefill_kv_tokens] - value = value[:num_prefill_kv_tokens] - - assert query.shape[0] == num_prefill_query_tokens - assert decode_query.shape[0] == num_decode_query_tokens - - if prefill_meta := attn_metadata.prefill_metadata: - # Prompt run. - if kv_cache.numel() == 0 or prefill_meta.block_tables.numel() == 0: - # normal attention. - # block tables are empty if the prompt does not have a cached - # prefix. - out = self._run_memory_efficient_xformers_forward( - query, key, value, prefill_meta, attn_type=attn_type) - assert out.shape == output[:num_prefill_query_tokens].shape - output[:num_prefill_query_tokens] = out - else: - assert attn_type != AttentionType.ENCODER_ONLY, ( - "Encoder-only models should not have prefix attention.") - - assert prefill_meta.query_start_loc is not None - assert prefill_meta.max_query_len is not None - - # prefix-enabled attention - # TODO(Hai) this triton kernel has regression issue (broke) to - # deal with different data types between KV and FP8 KV cache, - # to be addressed separately. - out = PagedAttention.forward_prefix( - query, - key, - value, - self.kv_cache_dtype, - key_cache, - value_cache, - prefill_meta.block_tables, - prefill_meta.query_start_loc, - prefill_meta.seq_lens_tensor, - prefill_meta.max_query_len, - self.alibi_slopes, - self.sliding_window, - layer._k_scale, - layer._v_scale, - ) - assert output[:num_prefill_query_tokens].shape == out.shape - output[:num_prefill_query_tokens] = out - - if decode_meta := attn_metadata.decode_metadata: - assert attn_type != AttentionType.ENCODER_ONLY, ( - "Encoder-only models should not have decode metadata.") - - ( - seq_lens_arg, - max_seq_len_arg, - block_tables_arg, - ) = get_seq_len_block_table_args(decode_meta, False, attn_type) - - output[num_prefill_query_tokens:] = PagedAttention.forward_decode( - decode_query, - key_cache, - value_cache, - block_tables_arg, - seq_lens_arg, - max_seq_len_arg, - self.kv_cache_dtype, - self.num_kv_heads, - self.scale, - self.alibi_slopes, - layer._k_scale, - layer._v_scale, - ) - - # Reshape the output tensor. - return output.view(-1, self.num_heads * self.head_size) - - def _run_memory_efficient_xformers_forward( - self, - query: torch.Tensor, - key: torch.Tensor, - value: torch.Tensor, - attn_metadata: XFormersMetadata, - attn_type: str = AttentionType.DECODER, - ) -> torch.Tensor: - """Attention for 1D query of multiple prompts. Multiple prompt - tokens are flattened in to `query` input. - - See https://facebookresearch.github.io/xformers/components/ops.html - for API spec. - - Args: - query: shape = [num_prefill_tokens, num_heads, head_size] - key: shape = [num_prefill_tokens, num_kv_heads, head_size] - value: shape = [num_prefill_tokens, num_kv_heads, head_size] - attn_metadata: Metadata for attention. - attn_type: Select attention type, between encoder attention, - decoder self-attention, or encoder/decoder cross- - attention. Defaults to decoder self-attention, - which is the vLLM default generally - """ - - original_query = query - if self.num_kv_heads != self.num_heads: - # GQA/MQA requires the shape [B, M, G, H, K]. - # Note that the output also has the same shape (which is different - # from a spec from the doc). - query = query.view(query.shape[0], self.num_kv_heads, - self.num_queries_per_kv, query.shape[-1]) - key = key[:, :, - None, :].expand(key.shape[0], self.num_kv_heads, - self.num_queries_per_kv, key.shape[-1]) - value = value[:, :, - None, :].expand(value.shape[0], self.num_kv_heads, - self.num_queries_per_kv, - value.shape[-1]) - - # Set attention bias if not provided. This typically happens at - # the very attention layer of every iteration. - # FIXME(woosuk): This is a hack. - attn_bias = _get_attn_bias(attn_metadata, attn_type) - if attn_bias is None: - if self.alibi_slopes is None: - - # Cross attention block of decoder branch of encoder-decoder - # model uses seq_lens for dec / encoder_seq_lens for enc - if (attn_type == AttentionType.ENCODER_DECODER): - assert attn_metadata.seq_lens is not None - assert attn_metadata.encoder_seq_lens is not None - - # Cross-attention mask is non-causal - attn_bias = BlockDiagonalMask.from_seqlens( - attn_metadata.seq_lens, - attn_metadata.encoder_seq_lens, - device=query.device) - - # Encoder branch of encoder-decoder model uses - # attn_metadata.encoder_seq_lens - elif attn_type == AttentionType.ENCODER: - - assert attn_metadata.encoder_seq_lens is not None - - # Encoder self-attention mask is non-causal - attn_bias = BlockDiagonalMask.from_seqlens( - attn_metadata.encoder_seq_lens, device=query.device) - - # Self-attention block of encoder-only model just - # uses the seq_lens directly. - elif attn_type == AttentionType.ENCODER_ONLY: - assert attn_metadata.seq_lens is not None - - # Encoder self-attention mask is non-causal - attn_bias = BlockDiagonalMask.from_seqlens( - attn_metadata.seq_lens, device=query.device) - - # Self-attention block of decoder branch just - # uses the seq_lens directly - elif attn_type == AttentionType.DECODER: - assert attn_metadata.seq_lens is not None - - # Decoder self-attention mask is causal - attn_bias = BlockDiagonalCausalMask.from_seqlens( - attn_metadata.seq_lens, device=query.device) - else: - raise ValueError("Unknown AttentionType: %s", attn_type) - - if self.sliding_window is not None: - attn_bias = attn_bias.make_local_attention( - self.sliding_window) - attn_bias = [attn_bias] - else: - assert attn_type == AttentionType.DECODER - assert attn_metadata.seq_lens is not None - attn_bias = _make_alibi_bias(self.alibi_slopes, - self.num_kv_heads, query.dtype, - attn_metadata.seq_lens) - - _set_attn_bias(attn_metadata, attn_bias, attn_type) - - # No alibi slopes. - # TODO(woosuk): Too many view operations. Let's try to reduce - # them in the future for code readability. - if self.alibi_slopes is None: - # Add the batch dimension. - query = query.unsqueeze(0) - key = key.unsqueeze(0) - value = value.unsqueeze(0) - out = xops.memory_efficient_attention_forward( - query, - key, - value, - attn_bias=attn_bias[0], - p=0.0, - scale=self.scale) - return out.view_as(original_query) - - # Attention with alibi slopes. - # FIXME(woosuk): Because xformers does not support dynamic sequence - # lengths with custom attention bias, we process each prompt one by - # one. This is inefficient, especially when we have many short prompts. - assert attn_metadata.seq_lens is not None - output = torch.empty_like(original_query) - start = 0 - for i, seq_len in enumerate(attn_metadata.seq_lens): - end = start + seq_len - out = xops.memory_efficient_attention_forward( - query[None, start:end], - key[None, start:end], - value[None, start:end], - attn_bias=attn_bias[i], - p=0.0, - scale=self.scale) - # TODO(woosuk): Unnecessary copy. Optimize. - output[start:end].copy_(out.view_as(original_query[start:end])) - start += seq_len - return output - - -def _make_alibi_bias( - alibi_slopes: torch.Tensor, - num_kv_heads: int, - dtype: torch.dtype, - seq_lens: List[int], -) -> List[AttentionBias]: - attn_biases: List[AttentionBias] = [] - for seq_len in seq_lens: - bias = torch.arange(seq_len, dtype=dtype) - # NOTE(zhuohan): HF uses - # `bias = bias[None, :].repeat(seq_len, 1)` - # here. We find that both biases give the same results, but - # the bias below more accurately follows the original ALiBi - # paper. - # Calculate a matrix where each element represents ith element- jth - # element. - bias = bias[None, :] - bias[:, None] - - padded_len = (seq_len + 7) // 8 * 8 - num_heads = alibi_slopes.shape[0] - bias = torch.empty( - 1, # batch size - num_heads, - seq_len, - padded_len, - device=alibi_slopes.device, - dtype=dtype, - )[:, :, :, :seq_len].copy_(bias) - bias.mul_(alibi_slopes[:, None, None]) - attn_biases.append(LowerTriangularMaskWithTensorBias(bias)) - - return attn_biases diff --git a/vllm/config/model.py b/vllm/config/model.py index 95fe52883db0..33e5d3ea04a4 100644 --- a/vllm/config/model.py +++ b/vllm/config/model.py @@ -32,8 +32,7 @@ from vllm.transformers_utils.runai_utils import (ObjectStorageModel, is_runai_obj_uri) from vllm.transformers_utils.utils import maybe_model_redirect -from vllm.utils import (STR_DUAL_CHUNK_FLASH_ATTN_VAL, LayerBlockType, - LazyLoader, common_broadcastable_dtype) +from vllm.utils import LayerBlockType, LazyLoader, common_broadcastable_dtype if TYPE_CHECKING: from transformers import PretrainedConfig @@ -1103,10 +1102,6 @@ def verify_dual_chunk_attention_config( self.hf_config.dual_chunk_attention_config[ "sparse_attention_enabled"] = True - if envs.VLLM_ATTENTION_BACKEND != STR_DUAL_CHUNK_FLASH_ATTN_VAL: - raise ValueError("please set VLLM_ATTENTION_BACKEND to " - f"{STR_DUAL_CHUNK_FLASH_ATTN_VAL}") - def verify_with_parallel_config( self, parallel_config: ParallelConfig, diff --git a/vllm/distributed/kv_transfer/kv_connector/utils.py b/vllm/distributed/kv_transfer/kv_connector/utils.py index 911d77ba36fa..efa4c9abf47f 100644 --- a/vllm/distributed/kv_transfer/kv_connector/utils.py +++ b/vllm/distributed/kv_transfer/kv_connector/utils.py @@ -44,7 +44,7 @@ def get_model_args(self, model_executable: torch.nn.Module): # When VLLM_MLA_DISABLE=1, standard FA is used instead, leading # to a kv_cache shape of [2, num_blks, blk_size, # num_key_value_heads / tp, qk_nope_head_dim + qk_rope_head_dim]. - # For more details, see vllm/attention/backends/mla/common.py. + # For more details, see vllm/v1/attention/backends/mla/common.py. if self.is_deepseek_mla and self.use_mla_opt: head_size = model_config.kv_lora_rank + \ model_config.qk_rope_head_dim diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index 7e00260caa39..b09d43f70558 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -44,8 +44,8 @@ from vllm.transformers_utils.config import (get_model_path, is_interleaved, maybe_override_with_speculators) from vllm.transformers_utils.utils import check_gguf_file -from vllm.utils import (STR_DUAL_CHUNK_FLASH_ATTN_VAL, FlexibleArgumentParser, - GiB_bytes, get_ip, is_in_ray_actor) +from vllm.utils import (FlexibleArgumentParser, GiB_bytes, get_ip, + is_in_ray_actor) from vllm.v1.sample.logits_processor import LogitsProcessor # yapf: enable @@ -1163,17 +1163,6 @@ def create_engine_config( self._set_default_args_v0(model_config) assert self.enable_chunked_prefill is not None - if envs.VLLM_ATTENTION_BACKEND in [STR_DUAL_CHUNK_FLASH_ATTN_VAL]: - assert self.enforce_eager, ( - "Cuda graph is not supported with DualChunkFlashAttention. " - "To run the model in eager mode, set 'enforce_eager=True' " - "or use '--enforce-eager' in the CLI.") - assert current_platform.is_cuda(), ( - "DualChunkFlashAttention is only supported on CUDA platform.") - assert not use_v1, ( - "DualChunkFlashAttention is not supported on V1 engine. " - "To run the model in V0 engine, try set 'VLLM_USE_V1=0'") - sliding_window: Optional[int] = None if not is_interleaved(model_config.hf_text_config): # Only set CacheConfig.sliding_window if the model is all sliding diff --git a/vllm/envs.py b/vllm/envs.py index 3991a789d80f..cbd1d5474e60 100755 --- a/vllm/envs.py +++ b/vllm/envs.py @@ -529,7 +529,6 @@ def get_vllm_port() -> Optional[int]: # - "TORCH_SDPA": use torch.nn.MultiheadAttention # - "FLASH_ATTN": use FlashAttention # - "XFORMERS": use XFormers - # - "ROCM_FLASH": use ROCmFlashAttention # - "FLASHINFER": use flashinfer # - "FLASHMLA": use FlashMLA # - "FLASH_ATTN_MLA": use FlashAttention for MLA diff --git a/vllm/model_executor/layers/mamba/mamba2_metadata.py b/vllm/model_executor/layers/mamba/mamba2_metadata.py index c926e17a2c19..7f376b70a7ae 100644 --- a/vllm/model_executor/layers/mamba/mamba2_metadata.py +++ b/vllm/model_executor/layers/mamba/mamba2_metadata.py @@ -53,13 +53,18 @@ class Mamba2Metadata: def get_platform_metadata_classes() -> tuple[type[AttentionMetadata], ...]: """Returns the appropriate metadata classes for the current platform.""" if current_platform.is_rocm(): - from vllm.attention.backends.rocm_flash_attn import ( - ROCmFlashAttentionMetadata) - return (ROCmFlashAttentionMetadata, PlaceholderAttentionMetadata) - elif current_platform.is_cuda(): - from vllm.attention.backends.flash_attn import FlashAttentionMetadata - from vllm.attention.backends.xformers import XFormersMetadata - return (FlashAttentionMetadata, XFormersMetadata, + from vllm.v1.attention.backends.rocm_aiter_fa import ( + AiterFlashAttentionMetadata) + from vllm.v1.attention.backends.triton_attn import ( + TritonAttentionMetadata) + return (AiterFlashAttentionMetadata, TritonAttentionMetadata, + PlaceholderAttentionMetadata) + if current_platform.is_cuda(): + from vllm.v1.attention.backends.flash_attn import ( + FlashAttentionMetadata) + from vllm.v1.attention.backends.xformers import ( + XFormersAttentionMetadata) + return (FlashAttentionMetadata, XFormersAttentionMetadata, PlaceholderAttentionMetadata) raise ValueError( f"Unsupported platform for Mamba2: {current_platform.device_type}") diff --git a/vllm/model_executor/models/deepseek_v2.py b/vllm/model_executor/models/deepseek_v2.py index a99a6679a569..415d36c681d8 100644 --- a/vllm/model_executor/models/deepseek_v2.py +++ b/vllm/model_executor/models/deepseek_v2.py @@ -478,7 +478,8 @@ class DeepseekV2MLAAttention(nn.Module): Main reference: DeepseekV2 paper, and FlashInfer Implementation (https://arxiv.org/abs/2405.04434 and https://github.com/flashinfer-ai/flashinfer/pull/551). - For more info see MLACommonImpl in: vllm/attention/backends/mla/utils.py + For more info see MLACommonImpl in: + vllm/v1/attention/backends/mla/utils.py """ def __init__( diff --git a/vllm/platforms/cuda.py b/vllm/platforms/cuda.py index c263e2afe83b..05f129f513a0 100644 --- a/vllm/platforms/cuda.py +++ b/vllm/platforms/cuda.py @@ -226,8 +226,10 @@ def get_attn_backend_cls(cls, selected_backend, head_size, dtype, kv_cache_dtype, block_size, use_v1, use_mla, has_sink) -> str: if use_mla: - # TODO(lucas): refactor to be more concise - # we should probably consider factoring out V1 here + if not use_v1: + raise RuntimeError( + "MLA attention backends require the V1 engine. " + "Set VLLM_USE_V1=1 to enable them.") from vllm.attention.ops.flashmla import is_flashmla_supported from vllm.attention.utils.fa_utils import flash_attn_supports_mla @@ -246,35 +248,17 @@ def get_attn_backend_cls(cls, selected_backend, head_size, dtype, use_triton = selected_backend == _Backend.TRITON_MLA or ( selected_backend is None) - def _get_version(name, import_suffix) -> str: - if use_v1: - logger.info_once(f"Using {name} backend on V1 engine.") - return f"vllm.v1.attention.backends.mla.{import_suffix}" - else: - logger.info_once(f"Using {name} backend.") - return f"vllm.attention.backends.{import_suffix}" - if use_cutlassmla: - if use_v1: - logger.info_once("Using Cutlass MLA backend on V1 engine.") - return ("vllm.v1.attention.backends.mla." - "cutlass_mla.CutlassMLABackend") - else: - logger.warning( - "Cutlass MLA backend is only supported on V1 engine") + logger.info_once("Using Cutlass MLA backend on V1 engine.") + return ("vllm.v1.attention.backends.mla." + "cutlass_mla.CutlassMLABackend") if use_flashinfermla: - if use_v1: - from vllm.v1.attention.backends.utils import ( - set_kv_cache_layout) - set_kv_cache_layout("HND") - logger.info_once( - "Using FlashInfer MLA backend on V1 engine.") - return ("vllm.v1.attention.backends.mla." - "flashinfer_mla.FlashInferMLABackend") - else: - logger.warning( - "FlashInfer MLA backend is only supported on V1 engine" - ) + from vllm.v1.attention.backends.utils import ( + set_kv_cache_layout) + set_kv_cache_layout("HND") + logger.info_once("Using FlashInfer MLA backend on V1 engine.") + return ("vllm.v1.attention.backends.mla." + "flashinfer_mla.FlashInferMLABackend") if use_flashmla: if block_size != 64: logger.warning( @@ -282,20 +266,18 @@ def _get_version(name, import_suffix) -> str: " (currently only supports block size 64).", block_size) else: - return _get_version("FlashMLA", "flashmla.FlashMLABackend") - if use_flashattn: - if use_v1: - logger.info_once( - "Using FlashAttention MLA backend on V1 engine.") + logger.info_once("Using FlashMLA backend on V1 engine.") return ("vllm.v1.attention.backends.mla." - "flashattn_mla.FlashAttnMLABackend") - else: - logger.warning( - "FlashAttention MLA backend is only supported on V1 " - "engine.") + "flashmla.FlashMLABackend") + if use_flashattn: + logger.info_once( + "Using FlashAttention MLA backend on V1 engine.") + return ("vllm.v1.attention.backends.mla." + "flashattn_mla.FlashAttnMLABackend") if use_triton: - return _get_version("Triton MLA", - "triton_mla.TritonMLABackend") + logger.info_once("Using Triton MLA backend on V1 engine.") + return ("vllm.v1.attention.backends.mla." + "triton_mla.TritonMLABackend") if use_v1: FLASHINFER_V1 = "vllm.v1.attention.backends.flashinfer.FlashInferBackend" # noqa: E501 FLEX_ATTENTION_V1 = "vllm.v1.attention.backends.flex_attention.FlexAttentionBackend" # noqa: E501 @@ -382,78 +364,9 @@ def _get_version(name, import_suffix) -> str: ) return FLEX_ATTENTION_V1 - # Backends for V0 engine - if selected_backend == _Backend.XFORMERS: - logger.info("Using XFormers backend.") - return "vllm.attention.backends.xformers.XFormersBackend" - elif selected_backend == _Backend.DUAL_CHUNK_FLASH_ATTN: - logger.info("Using DualChunkFlashAttention backend.") - return ("vllm.attention.backends.dual_chunk_flash_attn." - "DualChunkFlashAttentionBackend") - elif selected_backend == _Backend.DIFFERENTIAL_FLASH_ATTN: - logger.info("Using DifferentialFlashAttention backend.") - return ("vllm.attention.backends.differential_flash_attn." - "DifferentialFlashAttentionBackend") - elif selected_backend == _Backend.FLASH_ATTN: - pass - elif selected_backend: - raise ValueError( - f"Invalid attention backend for {cls.device_name}, " - f"with use_v1: {use_v1} use_mla: {use_mla}") - - target_backend = _Backend.FLASH_ATTN - if not cls.has_device_capability(80): - # Volta and Turing NVIDIA GPUs. - logger.info( - "Cannot use FlashAttention-2 backend for Volta and Turing " - "GPUs.") - target_backend = _Backend.XFORMERS - elif dtype not in (torch.float16, torch.bfloat16): - logger.info( - "Cannot use FlashAttention-2 backend for dtype other than " - "torch.float16 or torch.bfloat16.") - target_backend = _Backend.XFORMERS - elif block_size % 16 != 0: - logger.info( - "Cannot use FlashAttention-2 backend for block size not " - "divisible by 16.") - target_backend = _Backend.XFORMERS - - # FlashAttn is valid for the model, checking if the package is - # installed. - if target_backend == _Backend.FLASH_ATTN: - try: - import vllm.vllm_flash_attn # noqa: F401 - from vllm.attention.backends.flash_attn import ( # noqa: F401 - FlashAttentionBackend, flash_attn_supports_fp8) - - supported_sizes = \ - FlashAttentionBackend.get_supported_head_sizes() - if head_size not in supported_sizes: - logger.info( - "Cannot use FlashAttention-2 backend for head size %d.", - head_size) - target_backend = _Backend.XFORMERS - fp8_kv_cache = (kv_cache_dtype is not None - and kv_cache_dtype.startswith("fp8")) - if (fp8_kv_cache and not flash_attn_supports_fp8()): - logger.info( - "Cannot use FlashAttention backend for FP8 KV cache.") - target_backend = _Backend.XFORMERS - except ImportError: - logger.info( - "Cannot use FlashAttention-2 backend because the " - "vllm.vllm_flash_attn package is not found. " - "Make sure that vllm_flash_attn was built and installed " - "(on by default).") - target_backend = _Backend.XFORMERS - - if target_backend == _Backend.XFORMERS: - logger.info("Using XFormers backend.") - return "vllm.attention.backends.xformers.XFormersBackend" - - logger.info("Using Flash Attention backend.") - return "vllm.attention.backends.flash_attn.FlashAttentionBackend" + raise RuntimeError( + "V0 attention backends have been removed. Set VLLM_USE_V1=1 " + "to select a supported backend.") @classmethod def get_punica_wrapper(cls) -> str: diff --git a/vllm/platforms/rocm.py b/vllm/platforms/rocm.py index dce2924ac7a9..9470434aa428 100644 --- a/vllm/platforms/rocm.py +++ b/vllm/platforms/rocm.py @@ -191,6 +191,11 @@ def get_attn_backend_cls(cls, selected_backend, head_size, dtype, kv_cache_dtype, block_size, use_v1, use_mla, has_sink) -> str: if use_mla: + if not use_v1: + raise RuntimeError( + "MLA attention backends require the V1 engine. " + "Set VLLM_USE_V1=1 to enable them.") + from vllm.v1.attention.backends.mla.rocm_aiter_mla import ( is_aiter_mla_enabled) @@ -201,39 +206,24 @@ def get_attn_backend_cls(cls, selected_backend, head_size, dtype, if selected_backend == _Backend.TRITON_MLA: if block_size != 1: - if use_v1: - logger.info_once( - "Using Triton MLA backend on V1 engine.") - return ("vllm.v1.attention.backends.mla." - "triton_mla.TritonMLABackend") - else: - logger.info("Using Triton MLA backend.") - return "vllm.attention.backends.triton_mla.TritonMLABackend" # noqa: E501 - else: - raise ValueError( - f" The selected backend, {selected_backend.name}," - f"does not support block size {block_size}.") - elif selected_backend == _Backend.ROCM_AITER_MLA \ - or selected_backend == _Backend.ROCM_AITER_MLA_VLLM_V1: + logger.info_once("Using Triton MLA backend on V1 engine.") + return ("vllm.v1.attention.backends.mla." + "triton_mla.TritonMLABackend") + raise ValueError( + f" The selected backend, {selected_backend.name}," + f"does not support block size {block_size}.") + if selected_backend in (_Backend.ROCM_AITER_MLA, + _Backend.ROCM_AITER_MLA_VLLM_V1): if block_size == 1: - if use_v1: - logger.info("Using AITER MLA backend on V1 engine.") - return "vllm.v1.attention.backends.mla.rocm_aiter_mla.AiterMLABackend" # noqa: E501 - else: - logger.info("Using AITER MLA backend") - return "vllm.attention.backends.rocm_aiter_mla.AiterMLABackend" # noqa: E501 - else: - raise ValueError( - f" The selected backend, {selected_backend.name}," - f"does not support block size {block_size}." - "(currently only supports block size 1)") - else: + logger.info("Using AITER MLA backend on V1 engine.") + return "vllm.v1.attention.backends.mla.rocm_aiter_mla.AiterMLABackend" # noqa: E501 raise ValueError( f" The selected backend, {selected_backend.name}," - f"is not MLA type while requested for MLA backend.") - - if selected_backend is None or selected_backend == _Backend.FLASH_ATTN: - selected_backend = _Backend.ROCM_FLASH + f"does not support block size {block_size}." + "(currently only supports block size 1)") + raise ValueError( + f" The selected backend, {selected_backend.name}," + f"is not MLA type while requested for MLA backend.") if envs.VLLM_USE_V1: if envs.VLLM_ROCM_USE_AITER and envs.VLLM_ROCM_USE_AITER_MHA \ @@ -245,14 +235,9 @@ def get_attn_backend_cls(cls, selected_backend, head_size, dtype, logger.info("Using Triton Attention backend on V1 engine.") return ("vllm.v1.attention.backends." "triton_attn.TritonAttentionBackend") - if selected_backend == _Backend.ROCM_FLASH: - if not cls.has_device_capability(90): - # not Instinct series GPUs. - logger.info("flash_attn is not supported on NAVI GPUs.") - else: - logger.info("%s is not supported in AMD GPUs.", selected_backend) - logger.info("Using ROCmFlashAttention backend.") - return "vllm.attention.backends.rocm_flash_attn.ROCmFlashAttentionBackend" # noqa: E501 + raise RuntimeError( + "V0 attention backends have been removed. Set VLLM_USE_V1=1 " + "to select a supported backend.") @classmethod def set_device(cls, device: torch.device) -> None: diff --git a/vllm/utils/__init__.py b/vllm/utils/__init__.py index 968bba664f0a..834ec9b1d30b 100644 --- a/vllm/utils/__init__.py +++ b/vllm/utils/__init__.py @@ -157,10 +157,8 @@ # register, corresponding to possible backends STR_FLASHINFER_ATTN_VAL: str = "FLASHINFER" STR_TORCH_SDPA_ATTN_VAL: str = "TORCH_SDPA" -STR_ROCM_FLASH_ATTN_VAL: str = "ROCM_FLASH" STR_XFORMERS_ATTN_VAL: str = "XFORMERS" STR_FLASH_ATTN_VAL: str = "FLASH_ATTN" -STR_DUAL_CHUNK_FLASH_ATTN_VAL: str = "DUAL_CHUNK_FLASH_ATTN" STR_INVALID_VAL: str = "INVALID" MB_bytes = 1_000_000 From 04d3752329e39d11674862be341faaab42ac4d11 Mon Sep 17 00:00:00 2001 From: Yang Liu <127183760+KKSK-DON@users.noreply.github.com> Date: Sun, 21 Sep 2025 16:06:16 -0700 Subject: [PATCH 216/518] [Bugfix][V0 Deprecation][CI] use async mock and await for async method (#25325) Signed-off-by: Yang --- .../entrypoints/openai/test_lora_resolvers.py | 29 +++++++++++++------ 1 file changed, 20 insertions(+), 9 deletions(-) diff --git a/tests/entrypoints/openai/test_lora_resolvers.py b/tests/entrypoints/openai/test_lora_resolvers.py index e2c83b9c4004..9d5ee84a1956 100644 --- a/tests/entrypoints/openai/test_lora_resolvers.py +++ b/tests/entrypoints/openai/test_lora_resolvers.py @@ -5,7 +5,7 @@ from dataclasses import dataclass, field from http import HTTPStatus from typing import Optional -from unittest.mock import MagicMock +from unittest.mock import AsyncMock, MagicMock import pytest @@ -83,20 +83,31 @@ def register_mock_resolver(): def mock_serving_setup(): """Provides a mocked engine and serving completion instance.""" mock_engine = MagicMock(spec=AsyncLLM) - mock_engine.get_tokenizer.return_value = get_tokenizer(MODEL_NAME) mock_engine.errored = False - def mock_add_lora_side_effect(lora_request: LoRARequest): + tokenizer = get_tokenizer(MODEL_NAME) + mock_engine.get_tokenizer = AsyncMock(return_value=tokenizer) + + async def mock_add_lora_side_effect(lora_request: LoRARequest): """Simulate engine behavior when adding LoRAs.""" if lora_request.lora_name == "test-lora": # Simulate successful addition - return - elif lora_request.lora_name == "invalid-lora": + return True + if lora_request.lora_name == "invalid-lora": # Simulate failure during addition (e.g. invalid format) raise ValueError(f"Simulated failure adding LoRA: " f"{lora_request.lora_name}") + return True + + mock_engine.add_lora = AsyncMock(side_effect=mock_add_lora_side_effect) + + async def mock_generate(*args, **kwargs): + for _ in []: + yield _ + + mock_engine.generate = MagicMock(spec=AsyncLLM.generate, + side_effect=mock_generate) - mock_engine.add_lora.side_effect = mock_add_lora_side_effect mock_engine.generate.reset_mock() mock_engine.add_lora.reset_mock() @@ -131,7 +142,7 @@ async def test_serving_completion_with_lora_resolver(mock_serving_setup, with suppress(Exception): await serving_completion.create_completion(req_found) - mock_engine.add_lora.assert_called_once() + mock_engine.add_lora.assert_awaited_once() called_lora_request = mock_engine.add_lora.call_args[0][0] assert isinstance(called_lora_request, LoRARequest) assert called_lora_request.lora_name == lora_model_name @@ -157,7 +168,7 @@ async def test_serving_completion_resolver_not_found(mock_serving_setup, response = await serving_completion.create_completion(req) - mock_engine.add_lora.assert_not_called() + mock_engine.add_lora.assert_not_awaited() mock_engine.generate.assert_not_called() assert isinstance(response, ErrorResponse) @@ -181,7 +192,7 @@ async def test_serving_completion_resolver_add_lora_fails( response = await serving_completion.create_completion(req) # Assert add_lora was called before the failure - mock_engine.add_lora.assert_called_once() + mock_engine.add_lora.assert_awaited_once() called_lora_request = mock_engine.add_lora.call_args[0][0] assert isinstance(called_lora_request, LoRARequest) assert called_lora_request.lora_name == invalid_model From 5aeb9254521023f97aca292b3478aa7ff485ffb2 Mon Sep 17 00:00:00 2001 From: Deboleina Date: Sun, 21 Sep 2025 19:07:11 -0400 Subject: [PATCH 217/518] Multimodal - audio tests (#25285) Signed-off-by: Debolina Roy --- tests/multimodal/test_audio.py | 140 +++++++++++++++++++++++++++++++++ 1 file changed, 140 insertions(+) create mode 100644 tests/multimodal/test_audio.py diff --git a/tests/multimodal/test_audio.py b/tests/multimodal/test_audio.py new file mode 100644 index 000000000000..ba39af845041 --- /dev/null +++ b/tests/multimodal/test_audio.py @@ -0,0 +1,140 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project +# test_audio.py +import base64 +from pathlib import Path +from unittest.mock import patch + +import numpy as np +import pytest + +from vllm.multimodal.audio import (AudioMediaIO, AudioResampler, + resample_audio_librosa, + resample_audio_scipy) + + +@pytest.fixture +def dummy_audio(): + return np.array([0.0, 0.1, 0.2, 0.3, 0.4], dtype=float) + + +def test_resample_audio_librosa(dummy_audio): + with patch("vllm.multimodal.audio.librosa.resample") as mock_resample: + mock_resample.return_value = dummy_audio * 2 + out = resample_audio_librosa(dummy_audio, + orig_sr=44100, + target_sr=22050) + mock_resample.assert_called_once_with(dummy_audio, + orig_sr=44100, + target_sr=22050) + assert np.all(out == dummy_audio * 2) + + +def test_resample_audio_scipy(dummy_audio): + out_down = resample_audio_scipy(dummy_audio, orig_sr=4, target_sr=2) + out_up = resample_audio_scipy(dummy_audio, orig_sr=2, target_sr=4) + out_same = resample_audio_scipy(dummy_audio, orig_sr=4, target_sr=4) + + assert len(out_down) == 3 + assert len(out_up) == 10 + assert np.all(out_same == dummy_audio) + + +@pytest.mark.xfail( + reason="resample_audio_scipy is buggy for non-integer ratios") +def test_resample_audio_scipy_non_integer_ratio(dummy_audio): + out = resample_audio_scipy(dummy_audio, orig_sr=5, target_sr=3) + + expected_len = int(round(len(dummy_audio) * 3 / 5)) + assert len(out) == expected_len + + assert isinstance(out, np.ndarray) + assert np.isfinite(out).all() + + +def test_audio_resampler_librosa_calls_resample(dummy_audio): + resampler = AudioResampler(target_sr=22050, method="librosa") + with patch( + "vllm.multimodal.audio.resample_audio_librosa") as mock_resample: + mock_resample.return_value = dummy_audio + out = resampler.resample(dummy_audio, orig_sr=44100) + mock_resample.assert_called_once_with(dummy_audio, + orig_sr=44100, + target_sr=22050) + assert np.all(out == dummy_audio) + + +def test_audio_resampler_scipy_calls_resample(dummy_audio): + resampler = AudioResampler(target_sr=22050, method="scipy") + with patch("vllm.multimodal.audio.resample_audio_scipy") as mock_resample: + mock_resample.return_value = dummy_audio + out = resampler.resample(dummy_audio, orig_sr=44100) + mock_resample.assert_called_once_with(dummy_audio, + orig_sr=44100, + target_sr=22050) + assert np.all(out == dummy_audio) + + +def test_audio_resampler_invalid_method(dummy_audio): + resampler = AudioResampler(target_sr=22050, method="invalid") + with pytest.raises(ValueError): + resampler.resample(dummy_audio, orig_sr=44100) + + +def test_audio_resampler_no_target_sr(dummy_audio): + resampler = AudioResampler(target_sr=None) + with pytest.raises(RuntimeError): + resampler.resample(dummy_audio, orig_sr=44100) + + +@pytest.fixture +def dummy_audio_bytes(): + return b"FAKEAUDIOBYTES" + + +def test_audio_media_io_load_bytes(dummy_audio_bytes): + audio_io = AudioMediaIO() + with patch("vllm.multimodal.audio.librosa.load") as mock_load: + mock_load.return_value = (np.array([0.1, 0.2]), 16000) + out = audio_io.load_bytes(dummy_audio_bytes) + mock_load.assert_called_once() + assert isinstance(out[0], np.ndarray) + assert out[1] == 16000 + + +def test_audio_media_io_load_base64(dummy_audio_bytes): + audio_io = AudioMediaIO() + encoded = base64.b64encode(dummy_audio_bytes).decode("utf-8") + with patch.object(AudioMediaIO, "load_bytes") as mock_load_bytes: + mock_load_bytes.return_value = (np.array([0.1, 0.2]), 16000) + out = audio_io.load_base64("audio/wav", encoded) + mock_load_bytes.assert_called_once() + assert isinstance(out[0], np.ndarray) + assert out[1] == 16000 + + +def test_audio_media_io_load_file(): + audio_io = AudioMediaIO() + path = Path("/fake/path.wav") + with patch("vllm.multimodal.audio.librosa.load") as mock_load: + mock_load.return_value = (np.array([0.1, 0.2]), 16000) + out = audio_io.load_file(path) + mock_load.assert_called_once_with(path, sr=None) + assert isinstance(out[0], np.ndarray) + assert out[1] == 16000 + + +def test_audio_media_io_encode_base64(dummy_audio): + audio_io = AudioMediaIO() + media = (dummy_audio, 16000) + with patch("vllm.multimodal.audio.soundfile.write") as mock_write: + + def write_to_buffer(buffer, *_args, **_kwargs): + buffer.write(b"dummy_wav_data") + + mock_write.side_effect = write_to_buffer + + out = audio_io.encode_base64(media) + decoded = base64.b64decode(out) + assert decoded == b"dummy_wav_data" + mock_write.assert_called_once() From 7b57a433daf94dc075ccf52267b82aae359d0d53 Mon Sep 17 00:00:00 2001 From: Roger Wang Date: Sun, 21 Sep 2025 19:24:40 -0700 Subject: [PATCH 218/518] [Model] Support Dots OCR (#24645) Signed-off-by: Roger Wang Co-authored-by: yinz-aizip --- docs/models/supported_models.md | 1 + examples/offline_inference/vision_language.py | 18 + tests/models/registry.py | 2 + vllm/model_executor/models/dots_ocr.py | 824 ++++++++++++++++++ vllm/model_executor/models/registry.py | 1 + vllm/transformers_utils/configs/__init__.py | 2 + vllm/transformers_utils/configs/dotsocr.py | 69 ++ 7 files changed, 917 insertions(+) create mode 100644 vllm/model_executor/models/dots_ocr.py create mode 100644 vllm/transformers_utils/configs/dotsocr.py diff --git a/docs/models/supported_models.md b/docs/models/supported_models.md index cbc0a56a645e..9d288667a318 100644 --- a/docs/models/supported_models.md +++ b/docs/models/supported_models.md @@ -352,6 +352,7 @@ th { | `DeepseekV2ForCausalLM` | DeepSeek-V2 | `deepseek-ai/DeepSeek-V2`, `deepseek-ai/DeepSeek-V2-Chat`, etc. | ✅︎ | ✅︎ | ✅︎ | | `DeepseekV3ForCausalLM` | DeepSeek-V3 | `deepseek-ai/DeepSeek-V3`, `deepseek-ai/DeepSeek-R1`, `deepseek-ai/DeepSeek-V3.1`, etc. | ✅︎ | ✅︎ | ✅︎ | | `Dots1ForCausalLM` | dots.llm1 | `rednote-hilab/dots.llm1.base`, `rednote-hilab/dots.llm1.inst`, etc. | | ✅︎ | ✅︎ | +| `DotsOCRForCausalLM` | dots_ocr | `rednote-hilab/dots.ocr` | | ✅︎ | ✅︎ | | `Ernie4_5ForCausalLM` | Ernie4.5 | `baidu/ERNIE-4.5-0.3B-PT`, etc. | ✅︎ | ✅︎ | ✅︎ | | `Ernie4_5_MoeForCausalLM` | Ernie4.5MoE | `baidu/ERNIE-4.5-21B-A3B-PT`, `baidu/ERNIE-4.5-300B-A47B-PT`, etc. |✅︎| ✅︎ | ✅︎ | | `ExaoneForCausalLM` | EXAONE-3 | `LGAI-EXAONE/EXAONE-3.0-7.8B-Instruct`, etc. | ✅︎ | ✅︎ | ✅︎ | diff --git a/examples/offline_inference/vision_language.py b/examples/offline_inference/vision_language.py index de3f3afc1794..f8ddb5a22b31 100644 --- a/examples/offline_inference/vision_language.py +++ b/examples/offline_inference/vision_language.py @@ -126,6 +126,23 @@ def run_chameleon(questions: list[str], modality: str) -> ModelRequestData: ) +# Dots-OCR +def run_dots_ocr(questions: list[str], modality: str) -> ModelRequestData: + assert modality == "image" + + prompts = [f"<|img|><|imgpad|><|endofimg|>{question}" for question in questions] + engine_args = EngineArgs( + model="rednote-hilab/dots.ocr", + limit_mm_per_prompt={modality: 1}, + trust_remote_code=True, + ) + + return ModelRequestData( + engine_args=engine_args, + prompts=prompts, + ) + + def run_command_a_vision(questions: list[str], modality: str) -> ModelRequestData: assert modality == "image" @@ -1676,6 +1693,7 @@ def run_tarsier2(questions: list[str], modality: str) -> ModelRequestData: "aya_vision": run_aya_vision, "blip-2": run_blip2, "chameleon": run_chameleon, + "dots_ocr": run_dots_ocr, "command_a_vision": run_command_a_vision, "deepseek_vl_v2": run_deepseek_vl2, "ernie45_vl": run_ernie45_vl, diff --git a/tests/models/registry.py b/tests/models/registry.py index e9cc5170ade7..29b6980aaa42 100644 --- a/tests/models/registry.py +++ b/tests/models/registry.py @@ -448,6 +448,8 @@ def check_available_online( max_transformers_version="4.48", # noqa: E501 transformers_version_reason="HF model is not compatible.", # noqa: E501 hf_overrides={"architectures": ["DeepseekVLV2ForCausalLM"]}), # noqa: E501 + "DotsOCRForCausalLM": _HfExamplesInfo("rednote-hilab/dots.ocr", + trust_remote_code=True), "Emu3ForConditionalGeneration": _HfExamplesInfo("BAAI/Emu3-Chat-hf"), "Ernie4_5_VLMoeForConditionalGeneration": _HfExamplesInfo("baidu/ERNIE-4.5-VL-28B-A3B-PT", # noqa: E501 trust_remote_code=True), diff --git a/vllm/model_executor/models/dots_ocr.py b/vllm/model_executor/models/dots_ocr.py new file mode 100644 index 000000000000..04fa5584199a --- /dev/null +++ b/vllm/model_executor/models/dots_ocr.py @@ -0,0 +1,824 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project +from collections.abc import Iterable, Mapping +from typing import Literal, Optional, TypedDict, Union + +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.nn import LayerNorm +from transformers.modeling_utils import PreTrainedModel +from transformers.models.qwen2_vl import Qwen2VLProcessor + +from vllm.attention.layer import check_upstream_fa_availability +from vllm.config import VllmConfig +from vllm.model_executor.layers.activation import SiluAndMul +from vllm.model_executor.layers.layernorm import RMSNorm +from vllm.model_executor.layers.linear import (ColumnParallelLinear, + MergedColumnParallelLinear, + QKVParallelLinear, + RowParallelLinear) +from vllm.model_executor.layers.quantization import QuantizationConfig +from vllm.model_executor.models.interfaces import (MultiModalEmbeddings, + SupportsMultiModal, + SupportsPP) +from vllm.model_executor.models.qwen2 import Qwen2ForCausalLM +from vllm.model_executor.models.qwen2_vl import (Qwen2VLDummyInputsBuilder, + Qwen2VLMultiModalProcessor, + Qwen2VLProcessingInfo) +from vllm.model_executor.models.utils import (AutoWeightsLoader, WeightsMapper, + init_vllm_registered_model, + maybe_prefix, + merge_multimodal_embeddings) +from vllm.model_executor.models.vision import get_vit_attn_backend +from vllm.multimodal import MULTIMODAL_REGISTRY +from vllm.multimodal.inputs import MultiModalDataDict +from vllm.platforms import _Backend +from vllm.sequence import IntermediateTensors +from vllm.transformers_utils.configs.dotsocr import (DotsOCRConfig, + DotsVisionConfig) + +IMAGE_TOKEN = "<|imgpad|>" + + +class DotsOCRImagePixelInputs(TypedDict): + type: Literal["pixel_values", "image_grid_thw"] + + pixel_values: torch.Tensor + image_grid_thw: torch.Tensor + + +class DotsOCRImageEmbeddingInputs(TypedDict): + type: Literal["image_embeds", "image_grid_thw"] + image_embeds: torch.Tensor + """Supported types: + - List[`torch.Tensor`]: A list of tensors holding all images' features. + Each tensor holds an image's features. + - `torch.Tensor`: A tensor holding all images' features + (concatenation of all images' feature tensors). + Tensor shape: `(num_image_features, hidden_size)` + - `num_image_features` varies based on + the number and resolution of the images. + - `hidden_size` must match the hidden size of language model backbone. + """ + + image_grid_thw: torch.Tensor + + +DotsOCRImageInputs = Union[DotsOCRImagePixelInputs, + DotsOCRImageEmbeddingInputs] + + +class DotsOCRDummyInputsBuilder(Qwen2VLDummyInputsBuilder): + + def get_dummy_text(self, mm_counts: Mapping[str, int]) -> str: + num_images = mm_counts.get("image", 0) + return IMAGE_TOKEN * num_images + + def get_dummy_mm_data( + self, + seq_len: int, + mm_counts: Mapping[str, int], + ) -> MultiModalDataDict: + num_images = mm_counts.get("image", 0) + + target_width, target_height = self.info.get_image_size_with_most_features( # noqa: E501 + ) + + return { + "image": + self._get_dummy_images(width=target_width, + height=target_height, + num_images=num_images), + } + + +class DotsOCRProcessingInfo(Qwen2VLProcessingInfo): + + def get_hf_config(self) -> DotsOCRConfig: + config = self.ctx.get_hf_config() + if not config.__class__.__name__ == 'DotsOCRConfig': + raise TypeError(f"Expected DotsOCRConfig, got {type(config)}") + + if hasattr(config, "vision_config") and isinstance( + config.vision_config, dict): + config.vision_config = DotsVisionConfig(**config.vision_config) + + return config + + def get_supported_mm_limits(self) -> Mapping[str, Optional[int]]: + return {"image": None} + + def get_mm_max_tokens_per_item( + self, + seq_len: int, + mm_counts: Mapping[str, int], + ) -> Mapping[str, int]: + max_image_tokens = self.get_max_image_tokens() + return {"image": max_image_tokens} + + def get_hf_processor( + self, + **kwargs: object, + ) -> Qwen2VLProcessor: + self.get_tokenizer( + ).image_token = IMAGE_TOKEN # Ensure image token is set + processor = self.ctx.get_hf_processor( + Qwen2VLProcessor, + **kwargs, + ) + processor.image_token = IMAGE_TOKEN + processor.video_token = "<|video_pad|>" + return processor + + +def rotate_half(x): + """Rotates half the hidden dims of the input.""" + x1 = x[..., :x.shape[-1] // 2] + x2 = x[..., x.shape[-1] // 2:] + return torch.cat((-x2, x1), dim=-1) + + +def apply_rotary_pos_emb_vision(tensor: torch.Tensor, + freqs: torch.Tensor) -> torch.Tensor: + orig_dtype = tensor.dtype + tensor = tensor.float() + + cos = freqs.cos() + sin = freqs.sin() + + cos = cos.unsqueeze(1).repeat(1, 1, 2).unsqueeze(0).float() + sin = sin.unsqueeze(1).repeat(1, 1, 2).unsqueeze(0).float() + + output = (tensor * cos) + (rotate_half(tensor) * sin) + + output = output.to(orig_dtype) + + return output + + +class VisionRotaryEmbedding(nn.Module): + + def __init__(self, dim: int, theta: float = 10000.0) -> None: + super().__init__() + inv_freq = 1.0 / (theta + **(torch.arange(0, dim, 2, dtype=torch.float) / dim)) + self.register_buffer("inv_freq", inv_freq, persistent=False) + + def forward(self, seqlen: int) -> torch.Tensor: + seq = torch.arange(seqlen, + device=self.inv_freq.device, + dtype=self.inv_freq.dtype) + freqs = torch.outer(seq, self.inv_freq) + return freqs + + +class PatchMerger(nn.Module): + + def __init__( + self, + dim: int, + context_dim: int, + spatial_merge_size: int = 2, + pre_norm="layernorm", + ) -> None: + super().__init__() + self.hidden_size = context_dim * (spatial_merge_size**2) + self.pre_norm = pre_norm + if self.pre_norm == "layernorm": + self.ln_q = LayerNorm(context_dim, eps=1e-6) + elif self.pre_norm == "rmsnorm": + self.ln_q = RMSNorm(context_dim, eps=1e-6) + else: + print("no norm in patch merger") + + self.mlp = nn.Sequential( + ColumnParallelLinear(self.hidden_size, + self.hidden_size, + bias=True, + return_bias=False, + disable_tp=True), + nn.GELU(), + RowParallelLinear(self.hidden_size, + dim, + bias=True, + return_bias=False, + disable_tp=True), + ) + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if self.pre_norm: + x = self.mlp(self.ln_q(x).view(-1, self.hidden_size)) + else: + x = self.mlp(x.view(-1, self.hidden_size)) + return x + + +class DotsVisionAttention(nn.Module): + + def __init__(self, + config, + dim: int, + num_heads: int = 16, + bias: bool = True, + *, + quant_config: Optional[QuantizationConfig] = None, + prefix: str = "") -> None: + super().__init__() + from vllm.distributed import (parallel_state, + tensor_model_parallel_all_gather) + from vllm.distributed import utils as dist_utils + + self.embed_dim = dim + self.num_heads = num_heads + self.head_dim = dim // num_heads + self.tp_size = parallel_state.get_tensor_model_parallel_world_size() + self.tp_rank = parallel_state.get_tensor_model_parallel_rank() + self.num_heads_per_partition = dist_utils.divide( + num_heads, self.tp_size) + + # qkv/proj follow Qwen2-VL style; bias controlled by arg + self.qkv = QKVParallelLinear(hidden_size=dim, + head_size=dim // num_heads, + total_num_heads=num_heads, + bias=bias, + quant_config=quant_config, + prefix=f"{prefix}.qkv") + self.proj = RowParallelLinear(input_size=dim, + output_size=dim, + bias=bias, + quant_config=quant_config, + prefix=f"{prefix}.proj") + self._all_gather = tensor_model_parallel_all_gather + self._split_last = dist_utils.split_tensor_along_last_dim + + # Select attention backend + self.attn_backend = get_vit_attn_backend(self.head_dim, + torch.get_default_dtype()) + self.use_upstream_fa = False + if self.attn_backend != _Backend.FLASH_ATTN and \ + check_upstream_fa_availability(torch.get_default_dtype()): + self.attn_backend = _Backend.FLASH_ATTN + self.use_upstream_fa = True + if self.attn_backend not in { + _Backend.FLASH_ATTN, _Backend.TORCH_SDPA, _Backend.XFORMERS, + _Backend.ROCM_AITER_FA + }: + raise RuntimeError( + f"Unsupported vision attention backend: {self.attn_backend}") + self.is_flash_attn_backend = self.attn_backend in { + _Backend.FLASH_ATTN, _Backend.ROCM_AITER_FA + } + + def _split_qkv(self, qkv: torch.Tensor) -> tuple[torch.Tensor, ...]: + # qkv: [S, B, 3*dim] + seq_len, bs, _ = qkv.shape + if self.tp_size > 1: + qkv = self._all_gather(qkv) + q, k, v = qkv.chunk(3, dim=2) + if self.tp_size > 1: + q = self._split_last(q, num_partitions=self.tp_size)[self.tp_rank] + k = self._split_last(k, num_partitions=self.tp_size)[self.tp_rank] + v = self._split_last(v, num_partitions=self.tp_size)[self.tp_rank] + new_shape = (seq_len, bs, self.num_heads_per_partition, self.head_dim) + return (q.view(*new_shape), k.view(*new_shape), v.view(*new_shape)) + + def forward( + self, + hidden_states: torch.Tensor, + cu_seqlens: torch.Tensor, + rotary_pos_emb: Optional[torch.Tensor] = None, + *, + max_seqlen: Optional[int] = None, + seqlens: Optional[list[int]] = None, + ) -> torch.Tensor: + # [S, C] -> [S, B=1, C] + x = hidden_states.unsqueeze(1) + x, _ = self.qkv(x) + q, k, v = self._split_qkv(x) + bs = q.shape[1] + # [S,B,H,D] -> [B,S,H,D] + q = q.permute(1, 0, 2, 3).contiguous() + k = k.permute(1, 0, 2, 3).contiguous() + v = v.permute(1, 0, 2, 3).contiguous() + + if rotary_pos_emb is not None: + qk_concat = torch.cat([q, k], dim=0) + qk_rotated = apply_rotary_pos_emb_vision(qk_concat, rotary_pos_emb) + q, k = torch.chunk(qk_rotated, 2, dim=0) + + if self.is_flash_attn_backend: + if self.attn_backend == _Backend.ROCM_AITER_FA: + from aiter import flash_attn_varlen_func + else: + if self.use_upstream_fa: + from flash_attn import flash_attn_varlen_func + else: + from vllm.vllm_flash_attn import flash_attn_varlen_func + q_ = q.reshape(bs * q.shape[1], q.shape[2], q.shape[3]) + k_ = k.reshape(bs * k.shape[1], k.shape[2], k.shape[3]) + v_ = v.reshape(bs * v.shape[1], v.shape[2], v.shape[3]) + output = flash_attn_varlen_func(q_, + k_, + v_, + cu_seqlens_q=cu_seqlens, + cu_seqlens_k=cu_seqlens, + max_seqlen_q=max_seqlen, + max_seqlen_k=max_seqlen, + dropout_p=0.0, + causal=False) + context_layer = output.view(bs, -1, self.num_heads_per_partition, + self.head_dim) + elif self.attn_backend == _Backend.TORCH_SDPA: + outputs = [] + for i in range(1, len(cu_seqlens)): + s = int(cu_seqlens[i - 1]) + e = int(cu_seqlens[i]) + q_i = q[:, s:e].permute(0, 2, 1, 3) + k_i = k[:, s:e].permute(0, 2, 1, 3) + v_i = v[:, s:e].permute(0, 2, 1, 3) + out_i = F.scaled_dot_product_attention(q_i, + k_i, + v_i, + dropout_p=0.0) + out_i = out_i.permute(0, 2, 1, 3) + outputs.append(out_i) + context_layer = torch.cat(outputs, dim=1) if outputs else q[:, :0] + elif self.attn_backend == _Backend.XFORMERS: + from xformers import ops as xops + from xformers.ops.fmha.attn_bias import BlockDiagonalMask + attn_bias = BlockDiagonalMask.from_seqlens(q_seqlen=seqlens, + kv_seqlen=None, + device=q.device) + context_layer = xops.memory_efficient_attention_forward( + q, k, v, attn_bias=attn_bias, p=0, scale=None) + else: + raise RuntimeError("Unsupported attention backend") + + # [B,S,H,D] -> [S,B,H*D] -> [S, C] + context_layer = context_layer.permute(1, 0, 2, 3).contiguous() + context_layer = context_layer.view(context_layer.shape[0], bs, -1) + out, _ = self.proj(context_layer) + return out.squeeze(1) + + +class DotsSwiGLUFFN(nn.Module): + + def __init__(self, + config, + *, + quant_config: Optional[QuantizationConfig] = None, + prefix: str = ""): + super().__init__() + hidden_features = config.intermediate_size + in_features = config.embed_dim + bias = config.use_bias + + # Referenced aimv2.py AIMv2SwiGLUFFN + self.fc13 = MergedColumnParallelLinear(in_features, + [hidden_features] * 2, + bias=bias, + quant_config=quant_config, + prefix=f"{prefix}.fc13", + disable_tp=True) + self.fc2 = RowParallelLinear(hidden_features, + in_features, + bias=bias, + quant_config=quant_config, + prefix=f"{prefix}.fc2", + disable_tp=True) + self.act_fn = SiluAndMul() + + def forward(self, x: torch.Tensor) -> torch.Tensor: + x, _ = self.fc13(x) + x = self.act_fn(x) + x, _ = self.fc2(x) + return x + + def load_weights(self, weights: Iterable[tuple[str, + torch.Tensor]]) -> set[str]: + params = dict(self.named_parameters()) + loaded: set[str] = set() + for name, w in weights: + # Map fc1 -> fc13 (shard 0) + if name.startswith("fc1."): + tgt = name.replace("fc1.", "fc13.") + if tgt in params: + params[tgt].weight_loader(params[tgt], w, 0) + loaded.add(tgt) + continue + # Map fc3 -> fc13 (shard 1) + if name.startswith("fc3."): + tgt = name.replace("fc3.", "fc13.") + if tgt in params: + params[tgt].weight_loader(params[tgt], w, 1) + loaded.add(tgt) + continue + # Pass-through for fc2 and others + if name in params: + params[name].weight_loader(params[name], w) + loaded.add(name) + return loaded + + +class DotsPatchEmbed(nn.Module): + + def __init__(self, config): + super().__init__() + self.num_channels = config.num_channels + self.patch_size = config.patch_size + self.temporal_patch_size = config.temporal_patch_size + self.embed_dim = config.embed_dim + self.config = config + self.proj = nn.Conv2d( + config.num_channels, + config.embed_dim, + kernel_size=(config.patch_size, config.patch_size), + stride=(config.patch_size, config.patch_size), + ) + self.norm = RMSNorm(config.embed_dim, eps=config.rms_norm_eps) + + def forward(self, x: torch.Tensor, grid_thw=None) -> torch.Tensor: + x = x.view(-1, self.num_channels, self.temporal_patch_size, + self.patch_size, self.patch_size)[:, :, 0] + x = self.proj(x).view(-1, self.embed_dim) + x = self.norm(x) + return x + + +class DotsViTPreprocessor(nn.Module): + + def __init__(self, config): + super().__init__() + self.patch_h = config.patch_size + self.patch_w = config.patch_size + self.embed_dim = config.embed_dim + self.config = config + self.patchifier = DotsPatchEmbed(config) + + def forward(self, x: torch.Tensor, grid_thw=None) -> torch.Tensor: + tokens = self.patchifier(x, grid_thw) + return tokens + + +class DotsVisionBlock(nn.Module): + + def __init__(self, + config, + *, + quant_config: Optional[QuantizationConfig] = None, + prefix: str = ""): + super().__init__() + + self.attn = DotsVisionAttention( + config, + config.embed_dim, + num_heads=config.num_attention_heads, + bias=config.use_bias, + quant_config=quant_config, + prefix=f"{prefix}.attn", + ) + self.norm1 = RMSNorm(config.embed_dim, eps=config.rms_norm_eps) + self.mlp = DotsSwiGLUFFN(config, + quant_config=quant_config, + prefix=f"{prefix}.mlp") + self.norm2 = RMSNorm(config.embed_dim, eps=config.rms_norm_eps) + + def forward(self, + hidden_states: torch.Tensor, + *, + cu_seqlens: torch.Tensor, + rotary_pos_emb: torch.Tensor, + max_seqlen: Optional[int] = None, + seqlens: Optional[list[int]] = None) -> torch.Tensor: + hidden_states = hidden_states + self.attn( + self.norm1(hidden_states), + cu_seqlens=cu_seqlens, + rotary_pos_emb=rotary_pos_emb, + max_seqlen=max_seqlen, + seqlens=seqlens, + ) + hidden_states = hidden_states + self.mlp(self.norm2(hidden_states)) + return hidden_states + + +class DotsVisionTransformer(PreTrainedModel): + + def __init__( + self, + config: DotsVisionConfig, + quant_config: Optional[QuantizationConfig] = None, + *, + num_hidden_layers_override: Optional[int] = None, + require_post_norm: Optional[bool] = None, + prefix: str = "", + ) -> None: + super().__init__(config) + self.config = config + self.spatial_merge_size = config.spatial_merge_size + + self.patch_embed = DotsViTPreprocessor(config) + + head_dim = config.embed_dim // config.num_attention_heads + self.rotary_pos_emb = VisionRotaryEmbedding(head_dim // 2) + self.attn_backend = get_vit_attn_backend( + head_size=head_dim, dtype=torch.get_default_dtype()) + if self.attn_backend != _Backend.FLASH_ATTN and \ + check_upstream_fa_availability(torch.get_default_dtype()): + self.attn_backend = _Backend.FLASH_ATTN + + # Keep blocks for compatibility with other vision towers + num_layers = (config.num_hidden_layers if num_hidden_layers_override + is None else num_hidden_layers_override) + self.blocks = nn.ModuleList([ + DotsVisionBlock(config, + quant_config=quant_config, + prefix=f"{prefix}.blocks.{i}") + for i in range(num_layers) + ]) + if require_post_norm is None: + require_post_norm = (len(self.blocks) == config.num_hidden_layers) + if require_post_norm and self.config.post_norm: + self.post_trunk_norm = RMSNorm(config.embed_dim, + eps=config.rms_norm_eps) + else: + self.post_trunk_norm = None + + self.merger = PatchMerger( + dim=config.hidden_size, + context_dim=config.embed_dim, + spatial_merge_size=config.spatial_merge_size, + ) + + @property + def dtype(self) -> torch.dtype: + return self.patch_embed.patchifier.proj.weight.dtype + + @property + def device(self) -> torch.device: + return self.patch_embed.patchifier.proj.weight.device + + def get_pos_ids_by_grid(self, grid_thw): + pos_ids = [] + for t, h, w in grid_thw: + hpos_ids = torch.arange(h).unsqueeze(1).expand(-1, w) + hpos_ids = hpos_ids.reshape( + h // self.spatial_merge_size, + self.spatial_merge_size, + w // self.spatial_merge_size, + self.spatial_merge_size, + ) + hpos_ids = hpos_ids.permute(0, 2, 1, 3) + hpos_ids = hpos_ids.flatten() + + wpos_ids = torch.arange(w).unsqueeze(0).expand(h, -1) + wpos_ids = wpos_ids.reshape( + h // self.spatial_merge_size, + self.spatial_merge_size, + w // self.spatial_merge_size, + self.spatial_merge_size, + ) + wpos_ids = wpos_ids.permute(0, 2, 1, 3) + wpos_ids = wpos_ids.flatten() + pos_ids.append( + torch.stack([hpos_ids, wpos_ids], dim=-1).repeat(t, 1)) + + return pos_ids + + def rot_pos_emb(self, grid_thw): + pos_ids = self.get_pos_ids_by_grid(grid_thw) + pos_ids = torch.cat(pos_ids, dim=0) + max_grid_size = grid_thw[:, 1:].max() + rotary_pos_emb_full = self.rotary_pos_emb(max_grid_size) + rotary_pos_emb = rotary_pos_emb_full[pos_ids].flatten(1) + return rotary_pos_emb + + def compute_attn_mask_seqlen( + self, cu_seqlens: torch.Tensor + ) -> tuple[Optional[int], Optional[list[int]]]: + max_seqlen, seqlens = None, None + if self.attn_backend == _Backend.FLASH_ATTN: + max_seqlen = (cu_seqlens[1:] - cu_seqlens[:-1]).max().item() + elif self.attn_backend == _Backend.XFORMERS: + seqlens = (cu_seqlens[1:] - cu_seqlens[:-1]).tolist() + return max_seqlen, seqlens + + def forward(self, hidden_states: torch.Tensor, + grid_thw: torch.Tensor) -> torch.Tensor: + hidden_states = hidden_states.to(self.dtype) + hidden_states = self.patch_embed(hidden_states, grid_thw) + + rotary_pos_emb = self.rot_pos_emb(grid_thw) + + cu_seqlens = torch.repeat_interleave( + grid_thw[:, 1] * grid_thw[:, 2], grid_thw[:, 0]).cumsum( + dim=0, + dtype=grid_thw.dtype + if torch.jit.is_tracing() else torch.int32, + ) + cu_seqlens = F.pad(cu_seqlens, (1, 0), value=0) + + max_seqlen, seqlens = self.compute_attn_mask_seqlen(cu_seqlens) + for blk in self.blocks: + hidden_states = blk(hidden_states, + cu_seqlens=cu_seqlens, + rotary_pos_emb=rotary_pos_emb, + max_seqlen=max_seqlen, + seqlens=seqlens) + + if self.post_trunk_norm is not None: + hidden_states = self.post_trunk_norm(hidden_states) + + hidden_states = self.merger(hidden_states) + return hidden_states + + +@MULTIMODAL_REGISTRY.register_processor( + Qwen2VLMultiModalProcessor, + info=DotsOCRProcessingInfo, + dummy_inputs=DotsOCRDummyInputsBuilder, +) +class DotsOCRForCausalLM(nn.Module, SupportsMultiModal, SupportsPP): + hf_to_vllm_mapper = WeightsMapper( + orig_to_new_substr={ + ".attn.qkv_proj.": ".attn.qkv.", + ".attn.out_proj.": ".attn.proj.", + }, + orig_to_new_prefix={ + "lm_head.": "language_model.lm_head.", + "model.": "language_model.model.", + }, + ) + + @classmethod + def get_placeholder_str(cls, modality: str, i: int) -> Optional[str]: + if modality.startswith("image"): + return "<|img|><|imgpad|><|endofimg|>" + + def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): + super().__init__() + + self.config: DotsOCRConfig = vllm_config.model_config.hf_config + self.quant_config = vllm_config.quant_config + self.multimodal_config = vllm_config.model_config.multimodal_config + + if isinstance(self.config.vision_config, dict): + vision_config = DotsVisionConfig(**self.config.vision_config) + self.config.vision_config = vision_config + else: + vision_config = self.config.vision_config + + self.vision_tower = DotsVisionTransformer( + vision_config, + quant_config=self.quant_config, + prefix=maybe_prefix(prefix, "vision_tower"), + ) + self.language_model: Qwen2ForCausalLM = init_vllm_registered_model( + vllm_config=vllm_config, + hf_config=self.config, + prefix=maybe_prefix(prefix, "language_model"), + architectures=["Qwen2ForCausalLM"], + ) + + def _validate_and_reshape_mm_tensor(self, mm_input: object, + name: str) -> torch.Tensor: + if not isinstance(mm_input, (torch.Tensor, list)): + raise ValueError(f"Incorrect type of {name}. " + f"Got type: {type(mm_input)}") + if isinstance(mm_input, torch.Tensor): + if mm_input.ndim == 2: + return mm_input + if mm_input.ndim != 3: + raise ValueError(f"{name} should be 2D or batched 3D tensor. " + f"Got ndim: {mm_input.ndim} " + f"(shape={mm_input.shape})") + return torch.concat(list(mm_input)) + else: + return torch.concat(mm_input) + + def _parse_and_validate_image_input( + self, **kwargs: object) -> Optional[DotsOCRImageInputs]: + pixel_values = kwargs.pop("pixel_values", None) + image_embeds = kwargs.pop("image_embeds", None) + image_grid_thw = kwargs.pop("image_grid_thw", None) + + if pixel_values is None and image_embeds is None: + return None + + if pixel_values is not None: + pixel_values = self._validate_and_reshape_mm_tensor( + pixel_values, "image pixel values") + image_grid_thw = self._validate_and_reshape_mm_tensor( + image_grid_thw, "image grid_thw") + + if not isinstance(pixel_values, (torch.Tensor, list)): + raise ValueError("Incorrect type of image pixel values. " + f"Got type: {type(pixel_values)}") + + return DotsOCRImagePixelInputs(type="pixel_values", + pixel_values=pixel_values, + image_grid_thw=image_grid_thw) + + if image_embeds is not None: + image_embeds = self._validate_and_reshape_mm_tensor( + image_embeds, "image embeds") + image_grid_thw = self._validate_and_reshape_mm_tensor( + image_grid_thw, "image grid_thw") + + if not isinstance(image_embeds, torch.Tensor): + raise ValueError("Incorrect type of image embeddings. " + f"Got type: {type(image_embeds)}") + return DotsOCRImageEmbeddingInputs(type="image_embeds", + image_embeds=image_embeds, + image_grid_thw=image_grid_thw) + + def _process_image_input( + self, image_input: DotsOCRImageInputs) -> tuple[torch.Tensor, ...]: + grid_thw = image_input["image_grid_thw"] + assert grid_thw.ndim == 2 + grid_thw_list = grid_thw.tolist() + + if image_input["type"] == "image_embeds": + image_embeds = image_input["image_embeds"].type( + self.vision_tower.dtype) + else: + pixel_values = image_input["pixel_values"].type( + self.vision_tower.dtype) + image_embeds = self.vision_tower( + pixel_values, grid_thw)[:, :self.config.hidden_size] + + # Split concatenated embeddings for each image item. + merge_size = self.vision_tower.spatial_merge_size + sizes = (torch.tensor(grid_thw_list, dtype=torch.long).prod(-1) // + (merge_size * merge_size)).tolist() + + return image_embeds.split(sizes) + + def get_language_model(self) -> torch.nn.Module: + return self.language_model + + def get_multimodal_embeddings( + self, **kwargs: object) -> Optional[MultiModalEmbeddings]: + image_input = self._parse_and_validate_image_input(**kwargs) + if image_input is None: + return [] + vision_embeddings = self._process_image_input(image_input) + return vision_embeddings + + def get_input_embeddings( + self, + input_ids: torch.Tensor, + multimodal_embeddings: Optional[MultiModalEmbeddings] = None, + ) -> torch.Tensor: + inputs_embeds = self.language_model.get_input_embeddings(input_ids) + if multimodal_embeddings is not None: + inputs_embeds = merge_multimodal_embeddings( + input_ids, + inputs_embeds, + multimodal_embeddings, + self.config.image_token_id, + ) + + return inputs_embeds + + def forward( + self, + input_ids: Optional[torch.Tensor], + positions: torch.Tensor, + intermediate_tensors: Optional[IntermediateTensors] = None, + inputs_embeds: Optional[torch.Tensor] = None, + **kwargs, + ) -> Union[torch.Tensor, IntermediateTensors]: + if intermediate_tensors is not None: + inputs_embeds = None + elif inputs_embeds is None and kwargs.get("pixel_values") is not None: + image_input = self._parse_and_validate_image_input(**kwargs) + if image_input is None: + inputs_embeds = None + else: + assert input_ids is not None + inputs_embeds = self.get_multimodal_embeddings( + input_ids, + image_input=image_input, + ) + input_ids = None + + hidden_states = self.language_model( + input_ids=input_ids, + positions=positions, + intermediate_tensors=intermediate_tensors, + inputs_embeds=inputs_embeds, + ) + + return hidden_states + + def compute_logits( + self, + hidden_states: torch.Tensor, + ) -> Optional[torch.Tensor]: + return self.language_model.compute_logits(hidden_states) + + def load_weights(self, weights: Iterable[tuple[str, + torch.Tensor]]) -> set[str]: + loader = AutoWeightsLoader(self) + return loader.load_weights(weights, mapper=self.hf_to_vllm_mapper) diff --git a/vllm/model_executor/models/registry.py b/vllm/model_executor/models/registry.py index 5dc5d545bb9c..86123bc092b9 100644 --- a/vllm/model_executor/models/registry.py +++ b/vllm/model_executor/models/registry.py @@ -219,6 +219,7 @@ "ChameleonForConditionalGeneration": ("chameleon", "ChameleonForConditionalGeneration"), # noqa: E501 "Cohere2VisionForConditionalGeneration": ("cohere2_vision", "Cohere2VisionForConditionalGeneration"), # noqa: E501 "DeepseekVLV2ForCausalLM": ("deepseek_vl2", "DeepseekVLV2ForCausalLM"), + "DotsOCRForCausalLM": ("dots_ocr", "DotsOCRForCausalLM"), "Ernie4_5_VLMoeForConditionalGeneration": ("ernie45_vl", "Ernie4_5_VLMoeForConditionalGeneration"), # noqa: E501 "FuyuForCausalLM": ("fuyu", "FuyuForCausalLM"), "Gemma3ForConditionalGeneration": ("gemma3_mm", "Gemma3ForConditionalGeneration"), # noqa: E501 diff --git a/vllm/transformers_utils/configs/__init__.py b/vllm/transformers_utils/configs/__init__.py index 91bfeb8c55ee..52fa49ad302b 100644 --- a/vllm/transformers_utils/configs/__init__.py +++ b/vllm/transformers_utils/configs/__init__.py @@ -9,6 +9,7 @@ from vllm.transformers_utils.configs.chatglm import ChatGLMConfig from vllm.transformers_utils.configs.deepseek_vl2 import DeepseekVLV2Config +from vllm.transformers_utils.configs.dotsocr import DotsOCRConfig from vllm.transformers_utils.configs.eagle import EAGLEConfig # RWConfig is for the original tiiuae/falcon-40b(-instruct) and # tiiuae/falcon-7b(-instruct) models. Newer Falcon models will use the @@ -36,6 +37,7 @@ __all__ = [ "ChatGLMConfig", "DeepseekVLV2Config", + "DotsOCRConfig", "EAGLEConfig", "RWConfig", "JAISConfig", diff --git a/vllm/transformers_utils/configs/dotsocr.py b/vllm/transformers_utils/configs/dotsocr.py new file mode 100644 index 000000000000..6bb3c12d9c7e --- /dev/null +++ b/vllm/transformers_utils/configs/dotsocr.py @@ -0,0 +1,69 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project +from typing import Any, Optional + +from transformers.configuration_utils import PretrainedConfig +from transformers.models.qwen2 import Qwen2Config + + +class DotsVisionConfig(PretrainedConfig): + model_type: str = "dots_vit" + + def __init__( + self, + embed_dim: int = 1536, # vision encoder embed size + hidden_size: int = 1536, # after merger hidden size + intermediate_size: int = 4224, + num_hidden_layers: int = 42, + num_attention_heads: int = 12, + num_channels: int = 3, + patch_size: int = 14, + spatial_merge_size: int = 2, + temporal_patch_size: int = 1, + rms_norm_eps: float = 1e-5, + use_bias: bool = False, + attn_implementation="flash_attention_2", + initializer_range=0.02, + init_merger_std=0.02, + is_causal=False, # ve causal forward + post_norm=True, + gradient_checkpointing=False, + **kwargs: Any, + ): + super().__init__(**kwargs) + self.embed_dim = embed_dim + self.hidden_size = hidden_size + self.intermediate_size = intermediate_size + self.num_hidden_layers = num_hidden_layers + self.num_attention_heads = num_attention_heads + self.num_channels = num_channels + self.patch_size = patch_size + self.spatial_merge_size = spatial_merge_size + self.temporal_patch_size = temporal_patch_size + self.rms_norm_eps = rms_norm_eps + self.use_bias = use_bias + self.attn_implementation = attn_implementation + self.initializer_range = initializer_range + self.init_merger_std = init_merger_std + self.is_causal = is_causal + self.post_norm = post_norm + self.gradient_checkpointing = gradient_checkpointing + + +class DotsOCRConfig(Qwen2Config): + model_type = "dots_ocr" + + def __init__(self, + image_token_id=151665, + video_token_id=151656, + vision_config: Optional[dict] = None, + *args, + **kwargs): + super().__init__(*args, **kwargs) + self.image_token_id = image_token_id + self.video_token_id = video_token_id + self.vision_config = DotsVisionConfig(**(vision_config or {})) + + def save_pretrained(self, save_directory, **kwargs): + self._auto_class = None + super().save_pretrained(save_directory, **kwargs) From 793be8d0579f11b3326f7e2099432c1433033571 Mon Sep 17 00:00:00 2001 From: WeiQing Chen <40507679+david6666666@users.noreply.github.com> Date: Mon, 22 Sep 2025 10:49:13 +0800 Subject: [PATCH 219/518] [Docs] GSM8K Accuracy Evaluation doc update (#25360) Signed-off-by: David Chen <530634352@qq.com> --- tests/evals/gsm8k/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/evals/gsm8k/README.md b/tests/evals/gsm8k/README.md index 58572c3a6fbc..29c5199e1e87 100644 --- a/tests/evals/gsm8k/README.md +++ b/tests/evals/gsm8k/README.md @@ -19,7 +19,7 @@ pytest -s -v tests/gsm8k/test_gsm8k_correctness.py \ vllm serve Qwen/Qwen2.5-1.5B-Instruct --port 8000 # Run evaluation -python tests/gsm8k/gsm8k_eval.py --port 8000 +python tests/evals/gsm8k/gsm8k_eval.py --port 8000 ``` ## Configuration Format From 0eecb3166365a29db117c2aff6ca441b484b514d Mon Sep 17 00:00:00 2001 From: WeiQing Chen <40507679+david6666666@users.noreply.github.com> Date: Mon, 22 Sep 2025 11:35:39 +0800 Subject: [PATCH 220/518] [Bugfix] Fix hermes tool parser handling of non-string argument types (#22002) Signed-off-by: wangzi <3220100013@zju.edu.cn> Signed-off-by: David Chen <530634352@qq.com> Co-authored-by: wangzi <3220100013@zju.edu.cn> Co-authored-by: Chauncey --- .../tool_parsers/test_hermes_tool_parser.py | 131 ++++++++++++++++++ .../openai/tool_parsers/hermes_tool_parser.py | 42 +++++- 2 files changed, 166 insertions(+), 7 deletions(-) diff --git a/tests/entrypoints/openai/tool_parsers/test_hermes_tool_parser.py b/tests/entrypoints/openai/tool_parsers/test_hermes_tool_parser.py index 4bab849f47c2..e0e6b2c07e17 100644 --- a/tests/entrypoints/openai/tool_parsers/test_hermes_tool_parser.py +++ b/tests/entrypoints/openai/tool_parsers/test_hermes_tool_parser.py @@ -45,8 +45,39 @@ }, }] +PRODUCT_TOOLS = [{ + "type": "function", + "function": { + "name": "get_product_info", + "description": "Get detailed information of a product based on its " + "product ID.", + "parameters": { + "type": "object", + "properties": { + "inserted": { + "type": "boolean", + "description": "inserted.", + }, + "product_id": { + "type": "integer", + "description": "The product ID of the product.", + }, + }, + "required": ["product_id", "inserted"], + }, + }, +}] + MESSAGES = [{"role": "user", "content": "What's the weather like in Boston?"}] +PRODUCT_MESSAGES = [{ + "role": + "user", + "content": + "Hi! Do you have any detailed information about the product id " + "7355608 and inserted true?" +}] + @pytest.mark.asyncio async def test_non_streaming_tool_call(): @@ -127,3 +158,103 @@ async def test_streaming_tool_call(): print("\n[Streaming Test Passed]") print(f"Reconstructed Tool Call: {reconstructed_tool_call['name']}") print(f"Reconstructed Arguments: {arguments}") + + +@pytest.mark.asyncio +async def test_non_streaming_product_tool_call(): + """Test tool call integer and boolean parameters in non-streaming mode.""" + with RemoteOpenAIServer(MODEL_NAME, SERVER_ARGS) as server: + client = server.get_async_client() + + response = await client.chat.completions.create( + model=LORA_MODEL, + messages=PRODUCT_MESSAGES, + tools=PRODUCT_TOOLS, + tool_choice="auto", + temperature=0.66, + ) + + assert response.choices + choice = response.choices[0] + message = choice.message + + assert choice.finish_reason == "tool_calls" + assert message.tool_calls is not None + + tool_call = message.tool_calls[0] + assert tool_call.type == "function" + assert tool_call.function.name == "get_product_info" + + arguments = json.loads(tool_call.function.arguments) + assert "product_id" in arguments + assert "inserted" in arguments + + product_id = arguments.get("product_id") + inserted = arguments.get("inserted") + + assert isinstance(product_id, int) + assert product_id == 7355608 + assert isinstance(inserted, bool) + assert inserted is True + + print("\n[Non-Streaming Product Test Passed]") + print(f"Tool Call: {tool_call.function.name}") + print(f"Arguments: {arguments}") + + +@pytest.mark.asyncio +async def test_streaming_product_tool_call(): + """Test tool call integer and boolean parameters in streaming mode.""" + with RemoteOpenAIServer(MODEL_NAME, SERVER_ARGS) as server: + client = server.get_async_client() + + stream = await client.chat.completions.create( + model=LORA_MODEL, + messages=PRODUCT_MESSAGES, + tools=PRODUCT_TOOLS, + tool_choice="auto", + temperature=0.66, + stream=True, + ) + + tool_call_chunks = {} + async for chunk in stream: + if not chunk.choices: + continue + + delta = chunk.choices[0].delta + if not delta or not delta.tool_calls: + continue + + for tool_chunk in delta.tool_calls: + index = tool_chunk.index + if index not in tool_call_chunks: + tool_call_chunks[index] = {"name": "", "arguments": ""} + + if tool_chunk.function.name: + tool_call_chunks[index]["name"] += tool_chunk.function.name + if tool_chunk.function.arguments: + tool_call_chunks[index][ + "arguments"] += tool_chunk.function.arguments + + assert len(tool_call_chunks) == 1 + reconstructed_tool_call = tool_call_chunks[0] + + assert reconstructed_tool_call["name"] == "get_product_info" + + arguments = json.loads(reconstructed_tool_call["arguments"]) + assert "product_id" in arguments + assert "inserted" in arguments + + # Handle type coercion for streaming test as well + product_id = arguments.get("product_id") + inserted = arguments.get("inserted") + + assert isinstance(product_id, int) + assert product_id == 7355608 + assert isinstance(inserted, bool) + assert inserted is True + + print("\n[Streaming Product Test Passed]") + print(f"Reconstructed Tool Call: {reconstructed_tool_call['name']}") + print(f"Reconstructed Arguments: {arguments}") diff --git a/vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py index e74c420da1d3..87595953da06 100644 --- a/vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py +++ b/vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py @@ -368,16 +368,32 @@ def extract_tool_calls_streaming( # case -- we now have the first info about arguments available from # autocompleting the JSON elif cur_arguments and not prev_arguments: + # extract the content after {"name": ..., "arguments": + # directly from tool_call_portion as cur_arguments_json, + # since cur_arguments may differ from the original text + # due to partial JSON parsing + # for example, tool_call_portion = + # {"name": "search", "arguments": {"search_request": {" + # but cur_arguments = + # {"search_request": {}} + function_name = current_tool_call.get("name") + match = re.search( + r'\{"name":\s*"' + + re.escape(function_name) + r'"\s*,\s*"arguments":\s*(.*)', + tool_call_portion.strip(), re.DOTALL) + if match: + cur_arguments_json = match.group(1) + else: + cur_arguments_json = json.dumps(cur_arguments, + ensure_ascii=False) - cur_arguments_json = json.dumps(cur_arguments, - ensure_ascii=False) logger.debug("finding %s in %s", delta_text, cur_arguments_json) - # get the location where previous args differ from current - if (delta_text not in cur_arguments_json[:-2]): + # get the location where previous args differ from current. + if (delta_text not in cur_arguments_json): return None - args_delta_start_loc = cur_arguments_json[:-2]. \ + args_delta_start_loc = cur_arguments_json. \ rindex(delta_text) + \ len(delta_text) @@ -397,8 +413,20 @@ def extract_tool_calls_streaming( # last case -- we have an update to existing arguments. elif cur_arguments and prev_arguments: - if isinstance(delta_text, str) and len(delta_text.rstrip( - )) >= 1 and delta_text.rstrip()[-1] == '}': + # judge whether the tool_call_portion is a complete JSON + try: + json.loads(tool_call_portion) + is_complete_json = True + except Exception: + is_complete_json = False + + # if the delta_text ends with a '}' and tool_call_portion is a + # complete JSON, then the last '}' does not belong to the + # arguments, so we should trim it off + if isinstance(delta_text, str) \ + and len(delta_text.rstrip()) >= 1 \ + and delta_text.rstrip()[-1] == '}' \ + and is_complete_json: delta_text = delta_text.rstrip()[:-1] logger.debug("got diff %s", delta_text) From 6d0b827cbd0510173f6a9e77549d917828e80c76 Mon Sep 17 00:00:00 2001 From: Cyrus Leung Date: Mon, 22 Sep 2025 13:58:26 +0800 Subject: [PATCH 221/518] [V0 Deprecation] Remove V0-only methods in multi-modal registry (#25362) Signed-off-by: DarkLight1337 --- .../multimodal/generation/test_qwen2_vl.py | 1 - vllm/multimodal/registry.py | 32 +------------------ 2 files changed, 1 insertion(+), 32 deletions(-) diff --git a/tests/models/multimodal/generation/test_qwen2_vl.py b/tests/models/multimodal/generation/test_qwen2_vl.py index 8336ebc0d59c..c8a3513ac7ad 100644 --- a/tests/models/multimodal/generation/test_qwen2_vl.py +++ b/tests/models/multimodal/generation/test_qwen2_vl.py @@ -209,7 +209,6 @@ def get_image_embeds(model): return visual(pixel_values_on_device, grid_thw=video_grid_thw_on_device).cpu() - # V1 Test: this calls a V0 internal. video_embeds = torch.concat(llm.apply_model(get_image_embeds)) # split into original batches diff --git a/vllm/multimodal/registry.py b/vllm/multimodal/registry.py index 38adbf8f3536..5d485bc361d1 100644 --- a/vllm/multimodal/registry.py +++ b/vllm/multimodal/registry.py @@ -12,8 +12,7 @@ cached_tokenizer_from_config) from vllm.utils import ClassRegistry -from .cache import (BaseMultiModalProcessorCache, - processor_only_cache_from_config) +from .cache import BaseMultiModalProcessorCache from .processing import BaseMultiModalProcessor, BaseProcessingInfo from .profiling import (BaseDummyInputsBuilder, DummyDecoderData, DummyEncoderData, MultiModalProfiler) @@ -176,35 +175,6 @@ def get_max_tokens_per_item_by_nonzero_modality( if mm_limits[key] > 0 } - # TODO: Remove once V0 is gone - def get_max_tokens_by_modality( - self, - model_config: "ModelConfig", - ) -> Mapping[str, int]: - """ - Get the maximum number of tokens from each modality - for profiling the memory usage of a model. - """ - cache = processor_only_cache_from_config(model_config, self) - mm_limits = self.get_mm_limits_per_prompt(model_config, cache=cache) - max_tokens_per_item = self.get_max_tokens_per_item_by_modality( - model_config, - cache=cache, - ) - - return { - key: mm_limits[key] * max_tokens_per_mm_item - for key, max_tokens_per_mm_item in max_tokens_per_item.items() - } - - # TODO: Remove once V0 is gone - def get_max_multimodal_tokens(self, model_config: "ModelConfig") -> int: - """ - Get the maximum number of multi-modal tokens - for profiling the memory usage of a model. - """ - return sum(self.get_max_tokens_by_modality(model_config).values()) - def get_mm_limits_per_prompt( self, model_config: "ModelConfig", From f92d952632541889c38f107f0425a5bd4707d9d9 Mon Sep 17 00:00:00 2001 From: Cyrus Leung Date: Mon, 22 Sep 2025 16:49:19 +0800 Subject: [PATCH 222/518] [V0 Deprecation] Remove `MultiModalPlaceholderMap` (#25366) Signed-off-by: DarkLight1337 --- tests/kernels/utils.py | 2 - vllm/attention/backends/abstract.py | 10 --- vllm/attention/backends/placeholder_attn.py | 23 +------ vllm/attention/backends/utils.py | 18 ----- vllm/multimodal/__init__.py | 2 - vllm/multimodal/base.py | 74 +-------------------- vllm/v1/attention/backends/cpu_attn.py | 1 - 7 files changed, 2 insertions(+), 128 deletions(-) diff --git a/tests/kernels/utils.py b/tests/kernels/utils.py index 8d6ce381976b..39ea07309134 100644 --- a/tests/kernels/utils.py +++ b/tests/kernels/utils.py @@ -959,7 +959,6 @@ def make_test_metadata( return attn_backend_obj.make_metadata( num_prefills=num_prefills, slot_mapping=(None if kv_mmap is None else kv_mmap.slot_mapping), - multi_modal_placeholder_index_maps=None, enable_kv_scales_calculation=True, num_prefill_tokens=num_prefill_tokens, num_decode_tokens=num_decode_tokens, @@ -1009,7 +1008,6 @@ def make_test_metadata( return attn_backend_obj.make_metadata( num_prefills=num_prefills, slot_mapping=kv_mmap.slot_mapping, - multi_modal_placeholder_index_maps=None, enable_kv_scales_calculation=True, num_prefill_tokens=num_prefill_tokens, num_decode_tokens=num_decode_tokens, diff --git a/vllm/attention/backends/abstract.py b/vllm/attention/backends/abstract.py index ab7ef2112b08..1b392cd7c88d 100644 --- a/vllm/attention/backends/abstract.py +++ b/vllm/attention/backends/abstract.py @@ -10,7 +10,6 @@ import torch from vllm.model_executor.layers.quantization.utils.quant_utils import QuantKey -from vllm.multimodal import MultiModalPlaceholderMap class AttentionType: @@ -116,15 +115,6 @@ class AttentionMetadata: # in block 0, and 1st slot in block 1, respectively. slot_mapping: torch.Tensor - # The index maps that relate multi-modal embeddings to the corresponding - # placeholders. - # - # N.B. These aren't really related to attention and don't belong on this - # type -- this is just a temporary solution to make them available to - # `model_executable`. - multi_modal_placeholder_index_maps: Optional[Dict[ - str, MultiModalPlaceholderMap.IndexMap]] - # Enable/disable KV scales calculation. This is so that we can disable the # calculation until after prefill and cuda graph capture. enable_kv_scales_calculation: bool diff --git a/vllm/attention/backends/placeholder_attn.py b/vllm/attention/backends/placeholder_attn.py index f82d28938f45..cddeb2cf39bf 100644 --- a/vllm/attention/backends/placeholder_attn.py +++ b/vllm/attention/backends/placeholder_attn.py @@ -1,10 +1,9 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -from collections import defaultdict from dataclasses import dataclass from itertools import accumulate -from typing import Dict, List, Optional, Tuple, Type +from typing import List, Optional, Tuple, Type import torch @@ -12,7 +11,6 @@ AttentionMetadata, AttentionMetadataBuilder) from vllm.attention.backends.utils import CommonAttentionState -from vllm.multimodal import MultiModalPlaceholderMap from vllm.utils import async_tensor_h2d # Placeholder attention backend for models like Mamba and pooling models that @@ -141,8 +139,6 @@ def prefill_metadata(self) -> Optional["PlaceholderAttentionMetadata"]: num_prefill_tokens=self.num_prefill_tokens, num_decode_tokens=0, slot_mapping=slot_mapping, - multi_modal_placeholder_index_maps=self. - multi_modal_placeholder_index_maps, enable_kv_scales_calculation=self.enable_kv_scales_calculation, seq_lens=seq_lens, seq_lens_tensor=seq_lens_tensor, @@ -178,7 +174,6 @@ def decode_metadata(self) -> Optional["PlaceholderAttentionMetadata"]: num_prefill_tokens=0, num_decode_tokens=self.num_decode_tokens, slot_mapping=slot_mapping, - multi_modal_placeholder_index_maps=None, enable_kv_scales_calculation=True, seq_lens=None, seq_lens_tensor=seq_lens_tensor, @@ -210,9 +205,6 @@ def prepare(self): self.prefill_seq_lens: List[int] = [] self.context_lens: List[int] = [] self.curr_seq_lens: List[int] = [] - self.multimodal_placeholder_maps: Dict[ - str, - MultiModalPlaceholderMap] = defaultdict(MultiModalPlaceholderMap) self.num_prefills = 0 self.num_prefill_tokens = 0 self.num_decode_tokens = 0 @@ -232,12 +224,6 @@ def _add_seq_group(self, inter_data, chunked_prefill_enabled: bool): self.context_lens.append(context_len) if is_prompt: - mm_maps = inter_data.multi_modal_placeholder_maps - if mm_maps: - for modality, placeholders in mm_maps.items(): - self.multimodal_placeholder_maps[modality].extend( - placeholders) - self.num_prefills += 1 self.num_prefill_tokens += token_len self.prefill_seq_lens.append(seq_len) @@ -295,12 +281,6 @@ def build(self, seq_lens: List[int], query_lens: List[int], seq_start_loc_tensor = async_tensor_h2d(seq_start_loc, torch.int32, device, self.runner.pin_memory) - placeholder_index_maps = { - modality: placeholder_map.index_map() - for modality, placeholder_map in - self.multimodal_placeholder_maps.items() - } - # Placeholders slot_mapping_tensor = torch.empty(0) block_tables = torch.empty(0) @@ -308,7 +288,6 @@ def build(self, seq_lens: List[int], query_lens: List[int], return PlaceholderAttentionMetadata( num_prefills=self.num_prefills, slot_mapping=slot_mapping_tensor, - multi_modal_placeholder_index_maps=placeholder_index_maps, enable_kv_scales_calculation=True, num_prefill_tokens=self.num_prefill_tokens, num_decode_tokens=num_decode_tokens, diff --git a/vllm/attention/backends/utils.py b/vllm/attention/backends/utils.py index 3f15580872c7..33d8168f8a13 100644 --- a/vllm/attention/backends/utils.py +++ b/vllm/attention/backends/utils.py @@ -1,7 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project """Attention backend utils""" -from collections import defaultdict from contextlib import contextmanager from dataclasses import dataclass from itertools import accumulate @@ -15,7 +14,6 @@ from vllm.attention.backends.abstract import AttentionType from vllm.config import ModelConfig from vllm.logger import init_logger -from vllm.multimodal import MultiModalPlaceholderMap from vllm.utils import async_tensor_h2d, make_tensor_with_pad logger = init_logger(__name__) @@ -135,9 +133,6 @@ def prepare(self): self.context_lens: List[int] = [] self.block_tables: List[List[int]] = [] self.curr_seq_lens: List[int] = [] - self.multimodal_placeholder_maps: Dict[ - str, - MultiModalPlaceholderMap] = defaultdict(MultiModalPlaceholderMap) self.num_prefills = 0 self.num_prefill_tokens = 0 self.num_decode_tokens = 0 @@ -154,12 +149,6 @@ def _add_seq_group(self, inter_data, chunked_prefill_enabled: bool): inter_data.curr_sliding_window_blocks): self.context_lens.append(context_len) if is_prompt: - mm_maps = inter_data.multi_modal_placeholder_maps - if mm_maps: - for modality, placeholders in mm_maps.items(): - self.multimodal_placeholder_maps[modality].extend( - placeholders) - self.num_prefills += 1 self.num_prefill_tokens += token_len self.prefill_seq_lens.append(seq_len) @@ -254,16 +243,10 @@ def build(self, seq_lens: List[int], query_lens: List[int], self.runner.pin_memory) seq_start_loc_tensor = async_tensor_h2d(seq_start_loc, torch.int32, device, self.runner.pin_memory) - placeholder_index_maps = { - modality: placeholder_map.index_map() - for modality, placeholder_map in - self.multimodal_placeholder_maps.items() - } return self._metadata_cls( # type: ignore num_prefills=self.num_prefills, slot_mapping=slot_mapping_tensor, - multi_modal_placeholder_index_maps=placeholder_index_maps, enable_kv_scales_calculation=True, num_prefill_tokens=self.num_prefill_tokens, num_decode_tokens=num_decode_tokens, @@ -320,7 +303,6 @@ def graph_capture_get_metadata_for_batch( num_prefill_tokens=0, num_decode_tokens=batch_size, slot_mapping=self._graph_slot_mapping[:batch_size], - multi_modal_placeholder_index_maps=None, enable_kv_scales_calculation=True, seq_lens=None, seq_lens_tensor=self._graph_seq_lens[:batch_size], diff --git a/vllm/multimodal/__init__.py b/vllm/multimodal/__init__.py index 7ffa732cf370..8ea79078465e 100644 --- a/vllm/multimodal/__init__.py +++ b/vllm/multimodal/__init__.py @@ -1,6 +1,5 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -from .base import MultiModalPlaceholderMap from .hasher import MultiModalHasher from .inputs import (BatchedTensorInputs, ModalityData, MultiModalDataBuiltins, MultiModalDataDict, MultiModalKwargs, @@ -27,7 +26,6 @@ "MultiModalKwargs", "MultiModalKwargsItems", "MultiModalPlaceholderDict", - "MultiModalPlaceholderMap", "MultiModalUUIDDict", "NestedTensors", "MULTIMODAL_REGISTRY", diff --git a/vllm/multimodal/base.py b/vllm/multimodal/base.py index e0edb3e883ed..faffddd57199 100644 --- a/vllm/multimodal/base.py +++ b/vllm/multimodal/base.py @@ -3,83 +3,11 @@ from abc import ABC, abstractmethod from pathlib import Path -from typing import Generic, NamedTuple, TypeVar +from typing import Generic, TypeVar _T = TypeVar("_T") -class MultiModalPlaceholderMap: - """ - Relates multi-modal embeddings to their corresponding placeholders. - - Note: This is only used in V0. - """ - - class IndexMap(NamedTuple): - src: list[int] - dest: list[int] - - src_ranges: list[range] - """ - The indices of the multi-modal embeddings that will replace the - corresponding placeholder embeddings pointed to by ``dest_ranges``. - """ - - src_len: int - """ - The total number of flattened multi-modal embeddings. - """ - - dest_ranges: list[range] - """ - The indices of the placeholder embeddings that will be replaced by the - multimodal embeddings. - """ - - dest_len: int - """ - The total number of embeddings in the destination tensor. - """ - - def __init__(self): - self.src_ranges = [] - self.src_len = 0 - self.dest_ranges = [] - self.dest_len = 0 - - def extend(self, other: "MultiModalPlaceholderMap"): - """ - Adds the placeholders from another ``MultiModalPlaceholderMap`` to this - instance based on the source and destination tensors being - concatenated. - """ - - self.src_ranges.extend( - range(self.src_len + r.start, self.src_len + r.stop) - for r in other.src_ranges) - self.src_len += other.src_len - self.dest_ranges.extend( - range(self.dest_len + r.start, self.dest_len + r.stop) - for r in other.dest_ranges) - self.dest_len += other.dest_len - - def index_map(self) -> "IndexMap": - """ - Finalizes the placeholder map into lists of indices that can be used to - index the source and destination tensors. - """ - - src_indices = [i for r in self.src_ranges for i in r] - dest_indices = [i for r in self.dest_ranges for i in r] - - if len(src_indices) != len(dest_indices): - raise ValueError( - f"The number of source ({len(src_indices)}) and destination " - f"indices ({len(dest_indices)}) must be the same.") - - return self.IndexMap(src=src_indices, dest=dest_indices) - - class MediaIO(ABC, Generic[_T]): @abstractmethod diff --git a/vllm/v1/attention/backends/cpu_attn.py b/vllm/v1/attention/backends/cpu_attn.py index 6627164c9879..7e485fea2689 100644 --- a/vllm/v1/attention/backends/cpu_attn.py +++ b/vllm/v1/attention/backends/cpu_attn.py @@ -425,7 +425,6 @@ def build(self, num_prompt_req], # prefill query_start_loc=query_start_loc_cpu[:num_reqs + 1], # for logits index - multi_modal_placeholder_index_maps=None, enable_kv_scales_calculation=False, ) From 21467f9a1c6219cbd66640e539b1f23221cff375 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eldar=20Kurti=C4=87?= <8884008+eldarkurtic@users.noreply.github.com> Date: Mon, 22 Sep 2025 10:50:39 +0200 Subject: [PATCH 223/518] Enable Eagle3 speculative decoding for GPT-OSS model (#25246) Signed-off-by: Eldar Kurtic <8884008+eldarkurtic@users.noreply.github.com> --- vllm/config/speculative.py | 2 +- vllm/model_executor/models/gpt_oss.py | 19 ++++++++++++++-- vllm/v1/spec_decode/eagle.py | 32 +++++++++++++++++++-------- 3 files changed, 41 insertions(+), 12 deletions(-) diff --git a/vllm/config/speculative.py b/vllm/config/speculative.py index 2c861723c396..d533930e1c7a 100644 --- a/vllm/config/speculative.py +++ b/vllm/config/speculative.py @@ -527,7 +527,7 @@ def _verify_args(self) -> Self: "speculative decoding is > 1, but got " f"{self.disable_by_batch_size=}") - eagle3_target_supported = ["llama", "qwen"] + eagle3_target_supported = ["llama", "qwen", "gpt_oss"] if self.method == "eagle3" and self.target_model_config and not any( supported_model in self.target_model_config.hf_text_config.model_type diff --git a/vllm/model_executor/models/gpt_oss.py b/vllm/model_executor/models/gpt_oss.py index 4fe59f91124d..7c755a00e1c9 100644 --- a/vllm/model_executor/models/gpt_oss.py +++ b/vllm/model_executor/models/gpt_oss.py @@ -27,7 +27,7 @@ from vllm.sequence import IntermediateTensors from vllm.utils import cdiv -from .interfaces import SupportsPP +from .interfaces import SupportsEagle3, SupportsPP from .utils import (AutoWeightsLoader, WeightsMapper, extract_layer_index, is_pp_missing_parameter, make_empty_intermediate_tensors_factory, make_layers, @@ -238,6 +238,7 @@ def __init__( self.make_empty_intermediate_tensors = ( make_empty_intermediate_tensors_factory( ["hidden_states", "residual"], self.config.hidden_size)) + self.aux_hidden_state_layers = tuple[int, ...]() def get_input_embeddings(self, input_ids: torch.Tensor) -> torch.Tensor: return self.embedding(input_ids) @@ -261,8 +262,12 @@ def forward( x = intermediate_tensors["hidden_states"] residual = intermediate_tensors["residual"] + aux_hidden_states = [] for i in range(self.start_layer, self.end_layer): layer = self.layers[i] + if i in self.aux_hidden_state_layers: + aux_hidden_states.append(x if residual is None else x + + residual) x, residual = layer(x, positions, residual) if not get_pp_group().is_last_rank: return IntermediateTensors({ @@ -270,6 +275,9 @@ def forward( "residual": residual }) x, _ = self.norm(x, residual) + + if len(aux_hidden_states) > 0: + return x, aux_hidden_states return x def _load_weights_mxfp4( @@ -610,7 +618,7 @@ def load_weights(self, weights: Iterable[tuple[str, weights, stacked_params_mapping) -class GptOssForCausalLM(nn.Module, SupportsPP): +class GptOssForCausalLM(nn.Module, SupportsPP, SupportsEagle3): packed_modules_mapping = {"qkv": ["q_proj", "k_proj", "v_proj"]} hf_to_vllm_mapper = WeightsMapper( @@ -658,6 +666,13 @@ def __init__( self.make_empty_intermediate_tensors = ( self.model.make_empty_intermediate_tensors) + def set_aux_hidden_state_layers(self, layers: tuple[int, ...]) -> None: + self.model.aux_hidden_state_layers = layers + + def get_eagle3_aux_hidden_state_layers(self) -> tuple[int, ...]: + num_layers = len(self.model.layers) + return (2, num_layers // 2, num_layers - 3) + def get_input_embeddings(self, input_ids: torch.Tensor) -> torch.Tensor: return self.model.get_input_embeddings(input_ids) diff --git a/vllm/v1/spec_decode/eagle.py b/vllm/v1/spec_decode/eagle.py index 5dacf6088696..dc97d5c8f39d 100644 --- a/vllm/v1/spec_decode/eagle.py +++ b/vllm/v1/spec_decode/eagle.py @@ -823,15 +823,29 @@ def load_model(self, target_model: nn.Module) -> None: else: target_language_model = target_model # share embed_tokens with the target model if needed - if get_pp_group().world_size == 1 \ - and self.model.model.embed_tokens.weight.shape \ - == target_language_model.model.embed_tokens.weight.shape: - logger.info( - "Assuming the EAGLE head shares the same vocab embedding" - " with the target model.") - del self.model.model.embed_tokens - self.model.model.embed_tokens = ( - target_language_model.model.embed_tokens) + if get_pp_group().world_size == 1: + if hasattr(target_language_model.model, 'embed_tokens'): + target_embed_tokens = target_language_model.model.embed_tokens + elif hasattr(target_language_model.model, 'embedding'): + target_embed_tokens = target_language_model.model.embedding + else: + raise AttributeError( + "Target model does not have 'embed_tokens' or 'embedding' " + "attribute") + + # Check if shapes match and we found the embedding + eagle_shape = self.model.model.embed_tokens.weight.shape + target_shape = target_embed_tokens.weight.shape + if eagle_shape == target_shape: + logger.info( + "Assuming the EAGLE head shares the same vocab embedding" + " with the target model.") + del self.model.model.embed_tokens + self.model.model.embed_tokens = target_embed_tokens + else: + logger.info( + "The EAGLE head's vocab embedding will be loaded separately" + " from the target model.") else: logger.info( "The EAGLE head's vocab embedding will be loaded separately" From a66d13138137147d5b3fbdfe64afb103fbc52f86 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicol=C3=B2=20Lucchesi?= Date: Mon, 22 Sep 2025 11:55:04 +0200 Subject: [PATCH 224/518] [TPU][Bugfix][CI] Fix broken tests/build dependency (#25255) Signed-off-by: NickLucche --- .buildkite/scripts/hardware_ci/run-tpu-v1-test-part2.sh | 2 +- .buildkite/scripts/hardware_ci/run-tpu-v1-test.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.buildkite/scripts/hardware_ci/run-tpu-v1-test-part2.sh b/.buildkite/scripts/hardware_ci/run-tpu-v1-test-part2.sh index 1073a4ee30af..e76528a17820 100755 --- a/.buildkite/scripts/hardware_ci/run-tpu-v1-test-part2.sh +++ b/.buildkite/scripts/hardware_ci/run-tpu-v1-test-part2.sh @@ -62,7 +62,7 @@ echo "--- Installing Python dependencies ---" python3 -m pip install --progress-bar off git+https://github.com/thuml/depyf.git \ && python3 -m pip install --progress-bar off pytest pytest-asyncio tpu-info \ && python3 -m pip install --progress-bar off "lm-eval @ git+https://github.com/EleutherAI/lm-evaluation-harness.git@206b7722158f58c35b7ffcd53b035fdbdda5126d" \ - && python3 -m pip install --progress-bar off hf-transfer + && python3 -m pip install --progress-bar off hf-transfer tblib==3.1.0 echo "--- Python dependencies installed ---" export VLLM_USE_V1=1 export VLLM_XLA_CHECK_RECOMPILATION=1 diff --git a/.buildkite/scripts/hardware_ci/run-tpu-v1-test.sh b/.buildkite/scripts/hardware_ci/run-tpu-v1-test.sh index 505664f3aecd..69366cd50321 100755 --- a/.buildkite/scripts/hardware_ci/run-tpu-v1-test.sh +++ b/.buildkite/scripts/hardware_ci/run-tpu-v1-test.sh @@ -62,7 +62,7 @@ echo "--- Installing Python dependencies ---" python3 -m pip install --progress-bar off git+https://github.com/thuml/depyf.git \ && python3 -m pip install --progress-bar off pytest pytest-asyncio tpu-info \ && python3 -m pip install --progress-bar off "lm-eval @ git+https://github.com/EleutherAI/lm-evaluation-harness.git@206b7722158f58c35b7ffcd53b035fdbdda5126d" \ - && python3 -m pip install --progress-bar off hf-transfer + && python3 -m pip install --progress-bar off hf-transfer tblib==3.1.0 echo "--- Python dependencies installed ---" export VLLM_USE_V1=1 export VLLM_XLA_CHECK_RECOMPILATION=1 From 4cf71cc88a19f97bf6f5db9802395edd646375ef Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicol=C3=B2=20Lucchesi?= Date: Mon, 22 Sep 2025 12:12:57 +0200 Subject: [PATCH 225/518] [TPU] Deprecate `xm.mark_step` in favor of ``torch_xla.sync` (#25254) Signed-off-by: NickLucche Co-authored-by: Ye (Charlotte) Qi --- tests/tpu/test_moe_pallas.py | 3 +- tests/v1/tpu/test_topk_topp_sampler.py | 11 ++++--- vllm/lora/punica_wrapper/punica_tpu.py | 4 +-- .../model_loader/default_loader.py | 9 +++-- vllm/v1/worker/tpu_model_runner.py | 33 ++++++++++--------- 5 files changed, 31 insertions(+), 29 deletions(-) diff --git a/tests/tpu/test_moe_pallas.py b/tests/tpu/test_moe_pallas.py index 407a824d8174..1e5d9d923d00 100644 --- a/tests/tpu/test_moe_pallas.py +++ b/tests/tpu/test_moe_pallas.py @@ -6,6 +6,7 @@ """ import pytest import torch +import torch_xla # yapf conflicts with isort for this block # yapf: disable @@ -77,7 +78,7 @@ def test_pallas_moe( expert_map=e_map, renormalize=False, ) - xm.mark_step() + torch_xla.sync(wait=False) # Compare outputs torch.testing.assert_close( diff --git a/tests/v1/tpu/test_topk_topp_sampler.py b/tests/v1/tpu/test_topk_topp_sampler.py index 05751badc761..665cf8cd2629 100644 --- a/tests/v1/tpu/test_topk_topp_sampler.py +++ b/tests/v1/tpu/test_topk_topp_sampler.py @@ -4,6 +4,7 @@ import pytest import torch +import torch_xla from vllm.platforms import current_platform from vllm.v1.sample.ops.topk_topp_sampler import apply_top_k_top_p @@ -63,7 +64,7 @@ def test_topp_result_sums_past_p(): probs.masked_fill_(logits_masked.isinf(), 0) masked_prob_sum = probs.sum(dim=-1) - xm.mark_step() + torch_xla.sync() # Perform assertion on CPU. assert torch.all(torch.ge(masked_prob_sum.cpu() + TOLERANCE, p.cpu())) @@ -82,7 +83,7 @@ def test_topp_basic(): k=torch.tensor([3, 3]), p=torch.tensor([0.79, 0.79])) - xm.mark_step() + torch_xla.sync() # Expect the smallest elements to be dropped. expected_result = logits.clone().cpu() @@ -104,7 +105,7 @@ def test_topp_select_all(): k=torch.tensor([3, 3]), p=torch.tensor([1.0, 1.0])) - xm.mark_step() + torch_xla.sync() assert torch.allclose(logits.cpu(), result.cpu()) @@ -122,7 +123,7 @@ def test_topp_with_ties(): k=torch.tensor([4]), p=torch.tensor([0.2])) - xm.mark_step() + torch_xla.sync() # All tie values are included in the top-p set. Tie breaking is left # to be done during final sampling (all tie tokens have equal @@ -146,7 +147,7 @@ def test_both_topk_topp(): k=torch.tensor([1, 3]), p=torch.tensor([0.79, 0.79])) - xm.mark_step() + torch_xla.sync() # Since for the first batch k=1, expect only the largest element gets # selected. diff --git a/vllm/lora/punica_wrapper/punica_tpu.py b/vllm/lora/punica_wrapper/punica_tpu.py index 07dc337a1cc8..5896da516540 100644 --- a/vllm/lora/punica_wrapper/punica_tpu.py +++ b/vllm/lora/punica_wrapper/punica_tpu.py @@ -6,7 +6,7 @@ import torch import torch.nn.functional as F -import torch_xla.core.xla_model as xm +import torch_xla from vllm.lora.ops.xla_ops import bgmv_expand, bgmv_expand_slice, bgmv_shrink from vllm.lora.punica_wrapper.utils import convert_mapping @@ -323,7 +323,7 @@ def _update_base_metadata( extra_vocab_size: int, ): # Make sure we don't accidentally collect outside operations - xm.mark_step() + torch_xla.sync() # Pad the prompt mapping to avoid running into recompiles on the TPU # TODO: Should this happen inside mapping internally? If so how can we diff --git a/vllm/model_executor/model_loader/default_loader.py b/vllm/model_executor/model_loader/default_loader.py index d1bdec21fd97..4b7bcd37d4bc 100644 --- a/vllm/model_executor/model_loader/default_loader.py +++ b/vllm/model_executor/model_loader/default_loader.py @@ -211,16 +211,15 @@ def _get_weights_iterator( from vllm.platforms.tpu import USE_TPU_COMMONS if not USE_TPU_COMMONS: - # In PyTorch XLA, we should call `xm.mark_step` + # In PyTorch XLA, we should call `torch_xla.sync` # frequently so that not too many ops are accumulated - # in the XLA program. import torch_xla.core.xla_model - # as xm - import torch_xla.core.xla_model as xm + # in the XLA program. + import torch_xla def _xla_weights_iterator(iterator: Generator): for weights in iterator: yield weights - xm.mark_step() + torch_xla.sync(wait=False) weights_iterator = _xla_weights_iterator(weights_iterator) diff --git a/vllm/v1/worker/tpu_model_runner.py b/vllm/v1/worker/tpu_model_runner.py index dd11b1dcbe94..4cbf991a14c1 100644 --- a/vllm/v1/worker/tpu_model_runner.py +++ b/vllm/v1/worker/tpu_model_runner.py @@ -10,6 +10,7 @@ import torch import torch.nn as nn # TPU XLA related +import torch_xla import torch_xla.core.xla_model as xm import torch_xla.distributed.spmd as xs import torch_xla.runtime as xr @@ -846,10 +847,10 @@ def _execute_mm_encoder(self, scheduler_output: "SchedulerOutput"): # 2. A list or tuple (length: num_items) of tensors, each of shape # (feature_size, hidden_size) in case the feature size is dynamic # depending on the input multimodal items. - xm.mark_step() + torch_xla.sync(wait=False) curr_group_outputs = self.model.get_multimodal_embeddings( **mm_kwargs_group) - xm.mark_step() + torch_xla.sync(wait=False) sanity_check_mm_encoder_outputs( curr_group_outputs, @@ -952,7 +953,7 @@ def execute_model( mm_embeds = self._gather_mm_embeddings(scheduler_output) else: mm_embeds = [] - xm.mark_step() + torch_xla.sync(wait=False) # Prepare inputs, the requests might be split into multiple # executions, combine the result of each execution. start_index = 0 @@ -969,7 +970,7 @@ def execute_model( end_index = self._prepare_inputs(scheduler_output, start_index) input_ids, inputs_embeds = self._get_model_inputs( self.input_ids, mm_embeds) - xm.mark_step() + torch_xla.sync(wait=False) # Run the decoder with set_forward_context( attn_metadata, @@ -1183,7 +1184,7 @@ def load_model(self) -> None: # Sync all pending XLA execution during model initialization and weight # loading. - xm.mark_step() + torch_xla.sync(wait=False) xm.wait_device_ops() if not hasattr(self, "model"): self.model = model @@ -1267,10 +1268,10 @@ def _dummy_run(self, num_tokens: int, num_reqs: int, def _set_active_loras(self, prompt_lora_mapping, token_lora_mapping, lora_requests) -> None: - xm.mark_step() # Captures input updates + torch_xla.sync(wait=False) # Captures input updates super()._set_active_loras(prompt_lora_mapping, token_lora_mapping, lora_requests) - xm.mark_step() # Captures metadata updates + torch_xla.sync(wait=False) # Captures metadata updates def _precompile_mm_encoder(self) -> None: if not self.supports_mm_inputs: @@ -1297,10 +1298,10 @@ def _precompile_mm_encoder(self) -> None: num_items, ) # Run multimodal encoder. - xm.mark_step() + torch_xla.sync(wait=False) mm_embeds = self.model.get_multimodal_embeddings( **batched_dummy_mm_inputs) - xm.mark_step() + torch_xla.sync(wait=False) num_patches = mm_embeds[0].shape[0] items_size = num_patches * num_items @@ -1325,7 +1326,7 @@ def _precompile_mm_encoder(self) -> None: a, b = self._get_model_inputs(placeholders_ids, [mm_embeds]) assert a is None - xm.mark_step() + torch_xla.sync(wait=False) # Pre-compile `get_input_embeddings` when mm_embeddings are not # present. Chunk is only made of text, no mm_placeholders. @@ -1336,7 +1337,7 @@ def _precompile_mm_encoder(self) -> None: placeholders_ids = placeholders_ids.to(self.device) a, b = self._get_model_inputs(placeholders_ids, []) assert a is None - xm.mark_step() + torch_xla.sync(wait=False) xm.wait_device_ops() end = time.perf_counter() @@ -1532,11 +1533,11 @@ def profile_run( # Isolate encoder graph from post-processing to minimize # impact of recompilation until it's fixed. start = time.perf_counter() - xm.mark_step() + torch_xla.sync(wait=False) dummy_encoder_outputs = \ self.model.get_multimodal_embeddings( **batched_dummy_mm_inputs) - xm.mark_step() + torch_xla.sync(wait=False) xm.wait_device_ops() end = time.perf_counter() logger.info( @@ -1559,7 +1560,7 @@ def profile_run( self._dummy_run(num_tokens, self.num_reqs_most_model_len, self.num_blocks_per_most_len_req) - xm.mark_step() + torch_xla.sync(wait=False) xm.wait_device_ops() self.encoder_cache.clear() gc.collect() @@ -1927,11 +1928,11 @@ def _tpu_set_lora( # to a tensor doesn't seem to work anymore. This might be fixed with a # later release of torch_xla. self._original_set_lora(index, lora_a, lora_b, embeddings_tensor, bias) - xm.mark_step() + torch_xla.sync(wait=False) def _tpu_reset_lora(self, index: int): self._original_reset_lora(index) - xm.mark_step() + torch_xla.sync(wait=False) for _, module in model.named_modules(): if isinstance(module, BaseLayerWithLoRA): From b6f01bd9a7941c3c06175feaf7588fa55c9db646 Mon Sep 17 00:00:00 2001 From: Yizhou <136800916+yiz-liu@users.noreply.github.com> Date: Mon, 22 Sep 2025 18:22:29 +0800 Subject: [PATCH 226/518] refactor: abstract graph mode support into platform interface (#25161) Signed-off-by: Yizhou Liu --- vllm/config/__init__.py | 2 +- vllm/platforms/cuda.py | 4 ++++ vllm/platforms/interface.py | 7 +++++++ vllm/platforms/rocm.py | 4 ++++ vllm/platforms/xpu.py | 13 +++++++------ 5 files changed, 23 insertions(+), 7 deletions(-) diff --git a/vllm/config/__init__.py b/vllm/config/__init__.py index e31a78ba33ba..9bb8087a511d 100644 --- a/vllm/config/__init__.py +++ b/vllm/config/__init__.py @@ -503,7 +503,7 @@ def __post_init__(self): if self.compilation_config.pass_config.enable_sequence_parallelism: self.compilation_config.custom_ops.append("+rms_norm") - if current_platform.is_cuda_alike() or current_platform.is_xpu(): + if current_platform.support_static_graph_mode(): # if cudagraph_mode is not explicitly set by users, set default # value if self.compilation_config.cudagraph_mode is None: diff --git a/vllm/platforms/cuda.py b/vllm/platforms/cuda.py index 05f129f513a0..7baa5a9742f4 100644 --- a/vllm/platforms/cuda.py +++ b/vllm/platforms/cuda.py @@ -498,6 +498,10 @@ def check_if_supports_dtype(cls, torch_dtype: torch.dtype): def support_hybrid_kv_cache(cls) -> bool: return True + @classmethod + def support_static_graph_mode(cls) -> bool: + return True + # NVML utils # Note that NVML is not affected by `CUDA_VISIBLE_DEVICES`, diff --git a/vllm/platforms/interface.py b/vllm/platforms/interface.py index c43580ac5da1..8a05c84d4242 100644 --- a/vllm/platforms/interface.py +++ b/vllm/platforms/interface.py @@ -587,6 +587,13 @@ def support_hybrid_kv_cache(cls) -> bool: """ return False + @classmethod + def support_static_graph_mode(cls) -> bool: + """ + Returns if the graph mode is supported by the current platform. + """ + return False + @classmethod def use_sync_weight_loader(cls) -> bool: """ diff --git a/vllm/platforms/rocm.py b/vllm/platforms/rocm.py index 9470434aa428..0c7b9c2a4abf 100644 --- a/vllm/platforms/rocm.py +++ b/vllm/platforms/rocm.py @@ -477,3 +477,7 @@ def check_if_supports_dtype(cls, torch_dtype: torch.dtype): @classmethod def support_hybrid_kv_cache(cls) -> bool: return True + + @classmethod + def support_static_graph_mode(cls) -> bool: + return True diff --git a/vllm/platforms/xpu.py b/vllm/platforms/xpu.py index 4d3bef4b4294..eb591ae4454e 100644 --- a/vllm/platforms/xpu.py +++ b/vllm/platforms/xpu.py @@ -113,12 +113,9 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None: # lazy import to avoid circular import from vllm.config import CompilationLevel, CUDAGraphMode compilation_config = vllm_config.compilation_config - if compilation_config.cudagraph_mode is None or \ - compilation_config.cudagraph_mode.max_cudagraph_mode() \ - != CUDAGraphMode.NONE: - logger.info("[XPU] CUDA graph is not supported on XPU, disabling " - "cudagraphs. Fallback to cudagraph_mode=NONE") - compilation_config.cudagraph_mode = CUDAGraphMode.NONE + + assert compilation_config.cudagraph_mode == CUDAGraphMode.NONE, \ + "CUDA graph mode should be NONE on XPU" if vllm_config.lora_config is not None: compilation_config.level = CompilationLevel.NO_COMPILATION @@ -169,6 +166,10 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None: def support_hybrid_kv_cache(cls) -> bool: return True + @classmethod + def support_static_graph_mode(cls) -> bool: + return False + @classmethod def is_pin_memory_available(cls): return True From 417a164af66a11978f11a9ba64d4d76b4cf3e72a Mon Sep 17 00:00:00 2001 From: Cyrus Leung Date: Mon, 22 Sep 2025 19:04:32 +0800 Subject: [PATCH 227/518] [Misc] Remove unused encoder-decoder error strings (#25374) Signed-off-by: DarkLight1337 --- vllm/attention/backends/utils.py | 5 --- vllm/utils/__init__.py | 58 -------------------------------- 2 files changed, 63 deletions(-) diff --git a/vllm/attention/backends/utils.py b/vllm/attention/backends/utils.py index 33d8168f8a13..63ee8f50825c 100644 --- a/vllm/attention/backends/utils.py +++ b/vllm/attention/backends/utils.py @@ -18,11 +18,6 @@ logger = init_logger(__name__) -# Error string(s) for encoder/decoder -# unsupported attention scenarios -STR_NOT_IMPL_ENC_DEC_ROCM_HIP = ("ROCm/HIP is not currently supported " - "with encoder/decoder models.") - PAD_SLOT_ID = -1 # Switch to numpy implementation of compute_slot_mapping diff --git a/vllm/utils/__init__.py b/vllm/utils/__init__.py index 834ec9b1d30b..b74b746a3583 100644 --- a/vllm/utils/__init__.py +++ b/vllm/utils/__init__.py @@ -88,64 +88,6 @@ POOLING_MODEL_MAX_NUM_BATCHED_TOKENS = 32768 MULTIMODAL_MODEL_MAX_NUM_BATCHED_TOKENS = 5120 -# Exception strings for non-implemented encoder/decoder scenarios - -# Reminder: Please update docs/features/compatibility_matrix.md -# If the feature combo become valid - -STR_NOT_IMPL_ENC_DEC_SWA = \ - "Sliding window attention for encoder/decoder models " + \ - "is not currently supported." - -STR_NOT_IMPL_ENC_DEC_PREFIX_CACHE = \ - "Prefix caching for encoder/decoder models " + \ - "is not currently supported." - -STR_NOT_IMPL_ENC_DEC_CHUNKED_PREFILL = \ - "Chunked prefill for encoder/decoder models " + \ - "is not currently supported." - -STR_NOT_IMPL_ENC_DEC_LOGIT_SOFTCAP = ( - "Models with logits_soft_cap " - "require FlashInfer backend, which is " - "currently not supported for encoder/decoder " - "models.") - -STR_NOT_IMPL_ENC_DEC_LORA = ("LoRA is not currently " - "supported with encoder/decoder " - "models.") - -STR_NOT_IMPL_ENC_DEC_PP = ("Pipeline parallelism is not " - "currently supported with " - "encoder/decoder models.") - -STR_NOT_IMPL_ENC_DEC_MM = ("Multimodal is not currently " - "supported with encoder/decoder " - "models.") - -STR_NOT_IMPL_ENC_DEC_SPEC_DEC = ("Speculative decoding is not " - "currently supported with encoder/" - "decoder models.") - -STR_NOT_IMPL_ENC_DEC_BACKEND = ("XFormers and Flash-Attention are the only " - "backends currently supported with encoder/" - "decoder models.") - -# Efficiently import all enc/dec error strings -# rather than having to import all of the above -STR_NOT_IMPL_ENC_DEC_ERR_STRS = { - "STR_NOT_IMPL_ENC_DEC_SWA": STR_NOT_IMPL_ENC_DEC_SWA, - "STR_NOT_IMPL_ENC_DEC_PREFIX_CACHE": STR_NOT_IMPL_ENC_DEC_PREFIX_CACHE, - "STR_NOT_IMPL_ENC_DEC_CHUNKED_PREFILL": - STR_NOT_IMPL_ENC_DEC_CHUNKED_PREFILL, - "STR_NOT_IMPL_ENC_DEC_LOGIT_SOFTCAP": STR_NOT_IMPL_ENC_DEC_LOGIT_SOFTCAP, - "STR_NOT_IMPL_ENC_DEC_LORA": STR_NOT_IMPL_ENC_DEC_LORA, - "STR_NOT_IMPL_ENC_DEC_PP": STR_NOT_IMPL_ENC_DEC_PP, - "STR_NOT_IMPL_ENC_DEC_MM": STR_NOT_IMPL_ENC_DEC_MM, - "STR_NOT_IMPL_ENC_DEC_SPEC_DEC": STR_NOT_IMPL_ENC_DEC_SPEC_DEC, - "STR_NOT_IMPL_ENC_DEC_BACKEND": STR_NOT_IMPL_ENC_DEC_BACKEND, -} - # Constants related to forcing the attention backend selection # String name of register which may be set in order to From 64c824cd787dd610008e1fc91dfe7c06debf909b Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Mon, 22 Sep 2025 12:08:25 +0100 Subject: [PATCH 228/518] Make pickle import check fast (#25379) Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- .pre-commit-config.yaml | 5 +- .../{ => pre_commit}/check_pickle_imports.py | 79 ++++--------------- 2 files changed, 18 insertions(+), 66 deletions(-) rename tools/{ => pre_commit}/check_pickle_imports.py (61%) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index a4ea888af3f3..bf36db7d15ed 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -155,11 +155,10 @@ repos: additional_dependencies: [regex] - id: check-pickle-imports name: Prevent new pickle/cloudpickle imports - entry: python tools/check_pickle_imports.py + entry: python tools/pre_commit/check_pickle_imports.py language: python types: [python] - pass_filenames: false - additional_dependencies: [pathspec, regex] + additional_dependencies: [regex] - id: validate-config name: Validate configuration has default values and that each field has a docstring entry: python tools/validate_config.py diff --git a/tools/check_pickle_imports.py b/tools/pre_commit/check_pickle_imports.py similarity index 61% rename from tools/check_pickle_imports.py rename to tools/pre_commit/check_pickle_imports.py index fe717121db40..acbbc1f181d6 100644 --- a/tools/check_pickle_imports.py +++ b/tools/pre_commit/check_pickle_imports.py @@ -1,20 +1,10 @@ #!/usr/bin/env python3 # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -import os import sys import regex as re -try: - import pathspec -except ImportError: - print( - "ERROR: The 'pathspec' library is required. " - "Install it with 'pip install pathspec'.", - file=sys.stderr) - sys.exit(2) - # List of files (relative to repo root) that are allowed to import pickle or # cloudpickle # @@ -25,7 +15,7 @@ # Before adding new uses of pickle/cloudpickle, please consider safer # alternatives like msgpack or pydantic that are already in use in vLLM. Only # add to this list if absolutely necessary and after careful security review. -ALLOWED_FILES = set([ +ALLOWED_FILES = { # pickle 'vllm/v1/serial_utils.py', 'vllm/v1/executor/multiproc_executor.py', @@ -36,11 +26,9 @@ 'tests/tokenization/test_cached_tokenizer.py', 'vllm/distributed/utils.py', 'vllm/distributed/parallel_state.py', - 'vllm/engine/multiprocessing/client.py', 'vllm/distributed/device_communicators/all_reduce_utils.py', 'vllm/distributed/device_communicators/shm_broadcast.py', 'vllm/distributed/device_communicators/shm_object_storage.py', - 'vllm/engine/multiprocessing/engine.py', 'benchmarks/kernels/graph_machete_bench.py', 'benchmarks/kernels/benchmark_lora.py', 'benchmarks/kernels/benchmark_machete.py', @@ -55,65 +43,30 @@ 'tests/utils.py', # pickle and cloudpickle 'vllm/utils/__init__.py', - 'vllm/v1/serial_utils.py', - 'vllm/v1/executor/multiproc_executor.py', - 'vllm/transformers_utils/config.py', - 'vllm/model_executor/models/registry.py', - 'vllm/engine/multiprocessing/client.py', - 'vllm/engine/multiprocessing/engine.py', -]) +} PICKLE_RE = re.compile(r"^\s*(import\s+(pickle|cloudpickle)(\s|$|\sas)" r"|from\s+(pickle|cloudpickle)\s+import\b)") -def is_python_file(path): - return path.endswith('.py') - - -def scan_file(path): +def scan_file(path: str) -> int: with open(path, encoding='utf-8') as f: - for line in f: + for i, line in enumerate(f, 1): if PICKLE_RE.match(line): - return True - return False - - -def load_gitignore(repo_root): - gitignore_path = os.path.join(repo_root, '.gitignore') - patterns = [] - if os.path.exists(gitignore_path): - with open(gitignore_path, encoding='utf-8') as f: - patterns = f.read().splitlines() - # Always ignore .git directory - patterns.append('.git/') - return pathspec.PathSpec.from_lines('gitwildmatch', patterns) + print(f"{path}:{i}: " + "\033[91merror:\033[0m " # red color + "Found pickle/cloudpickle import") + return 1 + return 0 def main(): - repo_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) - spec = load_gitignore(repo_root) - bad_files = [] - for dirpath, _, filenames in os.walk(repo_root): - for filename in filenames: - if not is_python_file(filename): - continue - abs_path = os.path.join(dirpath, filename) - rel_path = os.path.relpath(abs_path, repo_root) - # Skip ignored files - if spec.match_file(rel_path): - continue - if scan_file(abs_path) and rel_path not in ALLOWED_FILES: - bad_files.append(rel_path) - if bad_files: - print("\nERROR: The following files import 'pickle' or 'cloudpickle' " - "but are not in the allowed list:") - for f in bad_files: - print(f" {f}") - print("\nIf this is intentional, update the allowed list in " - "tools/check_pickle_imports.py.") - sys.exit(1) - sys.exit(0) + returncode = 0 + for filename in sys.argv[1:]: + if filename in ALLOWED_FILES: + continue + returncode |= scan_file(filename) + return returncode def test_regex(): @@ -149,4 +102,4 @@ def test_regex(): if '--test-regex' in sys.argv: test_regex() else: - main() + sys.exit(main()) From 3d2c56b7a958aa2c92f61424e828b1d3c3c933d4 Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Mon, 22 Sep 2025 13:23:45 +0100 Subject: [PATCH 229/518] Make `mypy` behave like a proper pre-commit hook (#25313) Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- .github/CODEOWNERS | 1 + .pre-commit-config.yaml | 34 ++++----- pyproject.toml | 21 ------ tools/mypy.sh | 35 --------- tools/pre_commit/mypy.py | 140 +++++++++++++++++++++++++++++++++++ vllm/entrypoints/llm.py | 4 +- vllm/entrypoints/renderer.py | 2 +- vllm/utils/__init__.py | 9 ++- vllm/utils/tensor_schema.py | 7 +- 9 files changed, 166 insertions(+), 87 deletions(-) delete mode 100755 tools/mypy.sh create mode 100755 tools/pre_commit/mypy.py diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 37bd0ace98a9..9d749fe8d323 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -72,6 +72,7 @@ mkdocs.yaml @hmellor # Linting .markdownlint.yaml @hmellor .pre-commit-config.yaml @hmellor +/tools/pre_commit @hmellor # CPU /vllm/v1/worker/cpu* @bigPYJ1151 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index bf36db7d15ed..8ca414ee4269 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -60,38 +60,32 @@ repos: files: ^requirements/test\.(in|txt)$ - id: mypy-local name: Run mypy for local Python installation - entry: tools/mypy.sh 0 "local" - language: python - types: [python] - additional_dependencies: &mypy_deps [mypy==1.11.1, types-cachetools, types-setuptools, types-PyYAML, types-requests, pydantic] + entry: python tools/pre_commit/mypy.py 0 "local" stages: [pre-commit] # Don't run in CI + <<: &mypy_common + language: python + types_or: [python, pyi] + require_serial: true + additional_dependencies: [mypy==1.11.1, regex, types-cachetools, types-setuptools, types-PyYAML, types-requests, types-torch, pydantic] - id: mypy-3.9 # TODO: Use https://github.com/pre-commit/mirrors-mypy when mypy setup is less awkward name: Run mypy for Python 3.9 - entry: tools/mypy.sh 1 "3.9" - language: python - types: [python] - additional_dependencies: *mypy_deps + entry: python tools/pre_commit/mypy.py 1 "3.9" + <<: *mypy_common stages: [manual] # Only run in CI - id: mypy-3.10 # TODO: Use https://github.com/pre-commit/mirrors-mypy when mypy setup is less awkward name: Run mypy for Python 3.10 - entry: tools/mypy.sh 1 "3.10" - language: python - types: [python] - additional_dependencies: *mypy_deps + entry: python tools/pre_commit/mypy.py 1 "3.10" + <<: *mypy_common stages: [manual] # Only run in CI - id: mypy-3.11 # TODO: Use https://github.com/pre-commit/mirrors-mypy when mypy setup is less awkward name: Run mypy for Python 3.11 - entry: tools/mypy.sh 1 "3.11" - language: python - types: [python] - additional_dependencies: *mypy_deps + entry: python tools/pre_commit/mypy.py 1 "3.11" + <<: *mypy_common stages: [manual] # Only run in CI - id: mypy-3.12 # TODO: Use https://github.com/pre-commit/mirrors-mypy when mypy setup is less awkward name: Run mypy for Python 3.12 - entry: tools/mypy.sh 1 "3.12" - language: python - types: [python] - additional_dependencies: *mypy_deps + entry: python tools/pre_commit/mypy.py 1 "3.12" + <<: *mypy_common stages: [manual] # Only run in CI - id: shellcheck name: Lint shell scripts diff --git a/pyproject.toml b/pyproject.toml index f43ae69e00bd..88c5c4067f5a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -110,27 +110,6 @@ ignore_missing_imports = true check_untyped_defs = true follow_imports = "silent" -# After fixing type errors resulting from follow_imports: "skip" -> "silent", -# move the directory here and remove it from tools/mypy.sh -files = [ - "vllm/*.py", - "vllm/assets", - "vllm/entrypoints", - "vllm/inputs", - "vllm/logging_utils", - "vllm/multimodal", - "vllm/platforms", - "vllm/transformers_utils", - "vllm/triton_utils", - "vllm/usage", -] -# TODO(woosuk): Include the code from Megatron and HuggingFace. -exclude = [ - "vllm/model_executor/parallel_utils/|vllm/model_executor/models/", - # Ignore triton kernels in ops. - 'vllm/attention/ops/.*\.py$' -] - [tool.isort] skip_glob = [ ".buildkite/*", diff --git a/tools/mypy.sh b/tools/mypy.sh deleted file mode 100755 index 63e3b9a91663..000000000000 --- a/tools/mypy.sh +++ /dev/null @@ -1,35 +0,0 @@ -#!/bin/bash - -CI=${1:-0} -PYTHON_VERSION=${2:-local} - -if [ "$CI" -eq 1 ]; then - set -e -fi - -if [ $PYTHON_VERSION == "local" ]; then - PYTHON_VERSION=$(python -c 'import sys; print(f"{sys.version_info.major}.{sys.version_info.minor}")') -fi - -run_mypy() { - echo "Running mypy on $1" - if [ "$CI" -eq 1 ] && [ -z "$1" ]; then - mypy --python-version "${PYTHON_VERSION}" "$@" - return - fi - mypy --follow-imports skip --python-version "${PYTHON_VERSION}" "$@" -} - -run_mypy # Note that this is less strict than CI -run_mypy tests -run_mypy vllm/attention -run_mypy vllm/compilation -run_mypy vllm/distributed -run_mypy vllm/engine -run_mypy vllm/executor -run_mypy vllm/inputs -run_mypy vllm/lora -run_mypy --exclude 'vllm/model_executor/layers/fla/ops' vllm/model_executor -run_mypy vllm/plugins -run_mypy vllm/worker -run_mypy vllm/v1 diff --git a/tools/pre_commit/mypy.py b/tools/pre_commit/mypy.py new file mode 100755 index 000000000000..039cf6075f63 --- /dev/null +++ b/tools/pre_commit/mypy.py @@ -0,0 +1,140 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project +""" +Run mypy on changed files. + +This script is designed to be used as a pre-commit hook. It runs mypy +on files that have been changed. It groups files into different mypy calls +based on their directory to avoid import following issues. + +Usage: + python tools/pre_commit/mypy.py + +Args: + ci: "1" if running in CI, "0" otherwise. In CI, follow_imports is set to + "silent" for the main group of files. + python_version: Python version to use (e.g., "3.10") or "local" to use + the local Python version. + changed_files: List of changed files to check. +""" + +import subprocess +import sys +from typing import Optional + +import regex as re + +FILES = [ + "vllm/*.py", + "vllm/assets", + "vllm/entrypoints", + "vllm/inputs", + "vllm/logging_utils", + "vllm/multimodal", + "vllm/platforms", + "vllm/transformers_utils", + "vllm/triton_utils", + "vllm/usage", +] + +# After fixing errors resulting from changing follow_imports +# from "skip" to "silent", move the following directories to FILES +SEPARATE_GROUPS = [ + "tests", + "vllm/attention", + "vllm/compilation", + "vllm/distributed", + "vllm/engine", + "vllm/executor", + "vllm/inputs", + "vllm/lora", + "vllm/model_executor", + "vllm/plugins", + "vllm/worker", + "vllm/v1", +] + +# TODO(woosuk): Include the code from Megatron and HuggingFace. +EXCLUDE = [ + "vllm/model_executor/parallel_utils", + "vllm/model_executor/models", + "vllm/model_executor/layers/fla/ops", + # Ignore triton kernels in ops. + "vllm/attention/ops", +] + + +def group_files(changed_files: list[str]) -> dict[str, list[str]]: + """ + Group changed files into different mypy calls. + + Args: + changed_files: List of changed files. + + Returns: + A dictionary mapping file group names to lists of changed files. + """ + exclude_pattern = re.compile(f"^{'|'.join(EXCLUDE)}.*") + files_pattern = re.compile(f"^({'|'.join(FILES)}).*") + file_groups = {"": []} + file_groups.update({k: [] for k in SEPARATE_GROUPS}) + for changed_file in changed_files: + # Skip files which should be ignored completely + if exclude_pattern.match(changed_file): + continue + # Group files by mypy call + if files_pattern.match(changed_file): + file_groups[""].append(changed_file) + continue + else: + for directory in SEPARATE_GROUPS: + if re.match(f"^{directory}.*", changed_file): + file_groups[directory].append(changed_file) + break + return file_groups + + +def mypy(targets: list[str], python_version: Optional[str], + follow_imports: Optional[str], file_group: str) -> int: + """ + Run mypy on the given targets. + + Args: + targets: List of files or directories to check. + python_version: Python version to use (e.g., "3.10") or None to use + the default mypy version. + follow_imports: Value for the --follow-imports option or None to use + the default mypy behavior. + file_group: The file group name for logging purposes. + + Returns: + The return code from mypy. + """ + args = ["mypy"] + if python_version is not None: + args += ["--python-version", python_version] + if follow_imports is not None: + args += ["--follow-imports", follow_imports] + print(f"$ {' '.join(args)} {file_group}") + return subprocess.run(args + targets, check=False).returncode + + +def main(): + ci = sys.argv[1] == "1" + python_version = sys.argv[2] + file_groups = group_files(sys.argv[3:]) + + if python_version == "local": + python_version = f"{sys.version_info.major}.{sys.version_info.minor}" + + returncode = 0 + for file_group, changed_files in file_groups.items(): + follow_imports = None if ci and file_group == "" else "skip" + if changed_files: + returncode |= mypy(changed_files, python_version, follow_imports, + file_group) + return returncode + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/vllm/entrypoints/llm.py b/vllm/entrypoints/llm.py index 092d3f276d1c..c41f44aa4718 100644 --- a/vllm/entrypoints/llm.py +++ b/vllm/entrypoints/llm.py @@ -1468,7 +1468,7 @@ def get_metrics(self) -> list["Metric"]: def _validate_and_add_requests( self, - prompts: Union[PromptType, Sequence[PromptType]], + prompts: Union[PromptType, Sequence[PromptType], DataPrompt], params: Union[SamplingParams, Sequence[SamplingParams], PoolingParams, Sequence[PoolingParams]], *, @@ -1478,7 +1478,7 @@ def _validate_and_add_requests( ) -> None: if isinstance(prompts, (str, dict)): # Convert a single prompt to a list. - prompts = [prompts] + prompts = [prompts] # type: ignore[list-item] num_requests = len(prompts) if isinstance(params, Sequence) and len(params) != num_requests: diff --git a/vllm/entrypoints/renderer.py b/vllm/entrypoints/renderer.py index fb859d57be9f..d7ce57c728ba 100644 --- a/vllm/entrypoints/renderer.py +++ b/vllm/entrypoints/renderer.py @@ -280,7 +280,7 @@ def _validate_and_normalize_truncate_tokens( if truncate_prompt_tokens < 0: truncate_prompt_tokens = self.model_config.max_model_len - if max_length is not None and truncate_prompt_tokens > max_length: + if max_length is not None and truncate_prompt_tokens > max_length: # type: ignore[operator] raise ValueError( f"truncate_prompt_tokens ({truncate_prompt_tokens}) " f"cannot be greater than max_length ({max_length}). " diff --git a/vllm/utils/__init__.py b/vllm/utils/__init__.py index b74b746a3583..022e35a399c5 100644 --- a/vllm/utils/__init__.py +++ b/vllm/utils/__init__.py @@ -551,9 +551,10 @@ async def _batch_encode_loop(self, queue: asyncio.Queue, can_batch: bool): # If every request uses identical kwargs we can run a single # batched tokenizer call for a big speed-up. if can_batch and len(prompts) > 1: - encode_fn = partial(self.tokenizer, prompts, **kwargs) + batch_encode_fn = partial(self.tokenizer, prompts, + **kwargs) results = await self._loop.run_in_executor( - self._executor, encode_fn) + self._executor, batch_encode_fn) for i, fut in enumerate(result_futures): if not fut.done(): @@ -889,7 +890,7 @@ def get_open_port() -> int: def get_open_ports_list(count: int = 5) -> list[int]: """Get a list of open ports.""" - ports = set() + ports = set[int]() while len(ports) < count: ports.add(get_open_port()) return list(ports) @@ -1279,7 +1280,7 @@ def as_list(maybe_list: Iterable[T]) -> list[T]: def as_iter(obj: Union[T, Iterable[T]]) -> Iterable[T]: if isinstance(obj, str) or not isinstance(obj, Iterable): - obj = [obj] + return [obj] # type: ignore[list-item] return obj diff --git a/vllm/utils/tensor_schema.py b/vllm/utils/tensor_schema.py index 21d3249fe154..d75dbcd5401b 100644 --- a/vllm/utils/tensor_schema.py +++ b/vllm/utils/tensor_schema.py @@ -22,9 +22,8 @@ def __init__( self.dims = dims self.dynamic_dims = dynamic_dims if dynamic_dims else set() - def resolve(self, **bindings: dict[str, - int]) -> tuple[Union[int, str], ...]: - resolved = [] + def resolve(self, **bindings: int) -> tuple[Union[int, str], ...]: + resolved = list[Union[int, str]]() for dim in self.dims: if isinstance(dim, str) and dim in bindings: resolved.append(bindings[dim]) @@ -159,7 +158,7 @@ def _validate_tensor_shape_expected( def validate(self) -> None: type_hints = get_type_hints(self.__class__, include_extras=True) - shape_env = {} + shape_env = dict[str, int]() for field_name, field_type in type_hints.items(): # Check if field is missing From ac243886b02399d327314b2d8bbf9989fd07f247 Mon Sep 17 00:00:00 2001 From: Sara-KS <50249410+Sara-KS@users.noreply.github.com> Date: Mon, 22 Sep 2025 09:29:54 -0500 Subject: [PATCH 230/518] [Kernel] MI-300X triton moe configs (#23445) Signed-off-by: Sara Kokkila Schumacher --- ...N=128,device_name=AMD_Instinct_MI300X.json | 200 ++++++++++++++++++ ...N=256,device_name=AMD_Instinct_MI300X.json | 200 ++++++++++++++++++ ...N=512,device_name=AMD_Instinct_MI300X.json | 200 ++++++++++++++++++ ...N=192,device_name=AMD_Instinct_MI300X.json | 200 ++++++++++++++++++ ...N=384,device_name=AMD_Instinct_MI300X.json | 200 ++++++++++++++++++ ...N=768,device_name=AMD_Instinct_MI300X.json | 200 ++++++++++++++++++ 6 files changed, 1200 insertions(+) create mode 100644 vllm/model_executor/layers/fused_moe/configs/E=62,N=128,device_name=AMD_Instinct_MI300X.json create mode 100644 vllm/model_executor/layers/fused_moe/configs/E=62,N=256,device_name=AMD_Instinct_MI300X.json create mode 100644 vllm/model_executor/layers/fused_moe/configs/E=62,N=512,device_name=AMD_Instinct_MI300X.json create mode 100644 vllm/model_executor/layers/fused_moe/configs/E=72,N=192,device_name=AMD_Instinct_MI300X.json create mode 100644 vllm/model_executor/layers/fused_moe/configs/E=72,N=384,device_name=AMD_Instinct_MI300X.json create mode 100644 vllm/model_executor/layers/fused_moe/configs/E=72,N=768,device_name=AMD_Instinct_MI300X.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=62,N=128,device_name=AMD_Instinct_MI300X.json b/vllm/model_executor/layers/fused_moe/configs/E=62,N=128,device_name=AMD_Instinct_MI300X.json new file mode 100644 index 000000000000..40d86ff8ba32 --- /dev/null +++ b/vllm/model_executor/layers/fused_moe/configs/E=62,N=128,device_name=AMD_Instinct_MI300X.json @@ -0,0 +1,200 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 16, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 1 + }, + "2": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 16, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 1 + }, + "4": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 16, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 1 + }, + "8": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 16, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 2 + }, + "16": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 2 + }, + "24": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 2 + }, + "32": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 2 + }, + "48": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 2 + }, + "64": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 2 + }, + "96": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 2 + }, + "128": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 8, + "num_warps": 4, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 2 + }, + "256": { + "BLOCK_SIZE_M": 32, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 4, + "num_warps": 8, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 1 + }, + "512": { + "BLOCK_SIZE_M": 32, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 4, + "num_warps": 8, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 2 + }, + "1024": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 2 + }, + "1536": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 2 + }, + "2048": { + "BLOCK_SIZE_M": 32, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 32, + "GROUP_SIZE_M": 4, + "num_warps": 8, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 1 + }, + "3072": { + "BLOCK_SIZE_M": 32, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 32, + "GROUP_SIZE_M": 1, + "num_warps": 8, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 1 + }, + "4096": { + "BLOCK_SIZE_M": 32, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 32, + "GROUP_SIZE_M": 4, + "num_warps": 8, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 1 + } +} diff --git a/vllm/model_executor/layers/fused_moe/configs/E=62,N=256,device_name=AMD_Instinct_MI300X.json b/vllm/model_executor/layers/fused_moe/configs/E=62,N=256,device_name=AMD_Instinct_MI300X.json new file mode 100644 index 000000000000..6014d827d741 --- /dev/null +++ b/vllm/model_executor/layers/fused_moe/configs/E=62,N=256,device_name=AMD_Instinct_MI300X.json @@ -0,0 +1,200 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 16, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "num_warps": 2, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 2 + }, + "2": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 16, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "num_warps": 2, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 2 + }, + "4": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 16, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "num_warps": 2, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 2 + }, + "8": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "num_warps": 2, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 1 + }, + "16": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 4, + "num_warps": 2, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 2 + }, + "24": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 2 + }, + "32": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 2 + }, + "48": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 8, + "num_warps": 4, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 2 + }, + "64": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 8, + "num_warps": 4, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 2 + }, + "96": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 8, + "num_warps": 4, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 1 + }, + "128": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 2 + }, + "256": { + "BLOCK_SIZE_M": 32, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 8, + "num_warps": 8, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 2 + }, + "512": { + "BLOCK_SIZE_M": 64, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 8, + "num_warps": 8, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 2 + }, + "1024": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 8, + "num_warps": 4, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 2 + }, + "1536": { + "BLOCK_SIZE_M": 256, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 4, + "num_warps": 8, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 2 + }, + "2048": { + "BLOCK_SIZE_M": 256, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 16, + "num_warps": 8, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 2 + }, + "3072": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 8, + "num_warps": 4, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 2 + }, + "4096": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 2 + } +} diff --git a/vllm/model_executor/layers/fused_moe/configs/E=62,N=512,device_name=AMD_Instinct_MI300X.json b/vllm/model_executor/layers/fused_moe/configs/E=62,N=512,device_name=AMD_Instinct_MI300X.json new file mode 100644 index 000000000000..3622659f3e91 --- /dev/null +++ b/vllm/model_executor/layers/fused_moe/configs/E=62,N=512,device_name=AMD_Instinct_MI300X.json @@ -0,0 +1,200 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 16, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "num_warps": 2, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 2 + }, + "2": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 16, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "num_warps": 2, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 2 + }, + "4": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "num_warps": 2, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 2 + }, + "8": { + "BLOCK_SIZE_M": 32, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 2 + }, + "16": { + "BLOCK_SIZE_M": 32, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 1 + }, + "24": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 2 + }, + "32": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "num_warps": 2, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 2 + }, + "48": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 16, + "num_warps": 2, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 1 + }, + "64": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 16, + "num_warps": 2, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 1 + }, + "96": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 8, + "num_warps": 8, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 2 + }, + "128": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 4, + "num_warps": 8, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 2 + }, + "256": { + "BLOCK_SIZE_M": 32, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 4, + "num_warps": 8, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 2 + }, + "512": { + "BLOCK_SIZE_M": 64, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 8, + "num_warps": 8, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 2 + }, + "1024": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 16, + "num_warps": 4, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 2 + }, + "1536": { + "BLOCK_SIZE_M": 256, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 16, + "num_warps": 8, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 2 + }, + "2048": { + "BLOCK_SIZE_M": 256, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 4, + "num_warps": 8, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 2 + }, + "3072": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 2 + }, + "4096": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 2 + } +} diff --git a/vllm/model_executor/layers/fused_moe/configs/E=72,N=192,device_name=AMD_Instinct_MI300X.json b/vllm/model_executor/layers/fused_moe/configs/E=72,N=192,device_name=AMD_Instinct_MI300X.json new file mode 100644 index 000000000000..311d2e829a05 --- /dev/null +++ b/vllm/model_executor/layers/fused_moe/configs/E=72,N=192,device_name=AMD_Instinct_MI300X.json @@ -0,0 +1,200 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 16, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 1 + }, + "2": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 16, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 1 + }, + "4": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 16, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 2 + }, + "8": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 1 + }, + "16": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 2 + }, + "24": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 2 + }, + "32": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 2 + }, + "48": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 2 + }, + "64": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 2 + }, + "96": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 8, + "num_warps": 4, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 2 + }, + "128": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 4, + "num_warps": 8, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 2 + }, + "256": { + "BLOCK_SIZE_M": 32, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 8, + "num_warps": 8, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 2 + }, + "512": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 2 + }, + "1024": { + "BLOCK_SIZE_M": 32, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 32, + "GROUP_SIZE_M": 8, + "num_warps": 8, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 1 + }, + "1536": { + "BLOCK_SIZE_M": 32, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "num_warps": 8, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 1 + }, + "2048": { + "BLOCK_SIZE_M": 32, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "num_warps": 8, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 1 + }, + "3072": { + "BLOCK_SIZE_M": 32, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "num_warps": 8, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 1 + }, + "4096": { + "BLOCK_SIZE_M": 32, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 32, + "GROUP_SIZE_M": 1, + "num_warps": 8, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 1 + } +} diff --git a/vllm/model_executor/layers/fused_moe/configs/E=72,N=384,device_name=AMD_Instinct_MI300X.json b/vllm/model_executor/layers/fused_moe/configs/E=72,N=384,device_name=AMD_Instinct_MI300X.json new file mode 100644 index 000000000000..91c4b916b864 --- /dev/null +++ b/vllm/model_executor/layers/fused_moe/configs/E=72,N=384,device_name=AMD_Instinct_MI300X.json @@ -0,0 +1,200 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 16, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 1 + }, + "2": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 16, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 1 + }, + "4": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 16, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "num_warps": 2, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 1 + }, + "8": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "num_warps": 8, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 2 + }, + "16": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "num_warps": 1, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 2 + }, + "24": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 4, + "num_warps": 2, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 2 + }, + "32": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "num_warps": 2, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 2 + }, + "48": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 8, + "num_warps": 4, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 2 + }, + "64": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 8, + "num_warps": 4, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 2 + }, + "96": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 4, + "num_warps": 2, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 2 + }, + "128": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 2 + }, + "256": { + "BLOCK_SIZE_M": 32, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 4, + "num_warps": 8, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 1 + }, + "512": { + "BLOCK_SIZE_M": 64, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 8, + "num_warps": 4, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 2 + }, + "1024": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 4, + "num_warps": 8, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 2 + }, + "1536": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 4, + "num_warps": 8, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 2 + }, + "2048": { + "BLOCK_SIZE_M": 256, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 32, + "GROUP_SIZE_M": 16, + "num_warps": 8, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 1 + }, + "3072": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 4, + "num_warps": 8, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 2 + }, + "4096": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 8, + "num_warps": 8, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 2 + } +} diff --git a/vllm/model_executor/layers/fused_moe/configs/E=72,N=768,device_name=AMD_Instinct_MI300X.json b/vllm/model_executor/layers/fused_moe/configs/E=72,N=768,device_name=AMD_Instinct_MI300X.json new file mode 100644 index 000000000000..8fee30ec7066 --- /dev/null +++ b/vllm/model_executor/layers/fused_moe/configs/E=72,N=768,device_name=AMD_Instinct_MI300X.json @@ -0,0 +1,200 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 16, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 2 + }, + "2": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 2 + }, + "4": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "num_warps": 8, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 2 + }, + "8": { + "BLOCK_SIZE_M": 32, + "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "num_warps": 2, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 1 + }, + "16": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 4, + "num_warps": 2, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 2 + }, + "24": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 2 + }, + "32": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 2 + }, + "48": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 4, + "num_warps": 8, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 1 + }, + "64": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "num_warps": 8, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 1 + }, + "96": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 4, + "num_warps": 8, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 2 + }, + "128": { + "BLOCK_SIZE_M": 32, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "num_warps": 8, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 2 + }, + "256": { + "BLOCK_SIZE_M": 32, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "num_warps": 8, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 2 + }, + "512": { + "BLOCK_SIZE_M": 64, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 4, + "num_warps": 8, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 2 + }, + "1024": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 8, + "num_warps": 8, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 2 + }, + "1536": { + "BLOCK_SIZE_M": 64, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "num_warps": 8, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 2 + }, + "2048": { + "BLOCK_SIZE_M": 256, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 4, + "num_warps": 8, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 2 + }, + "3072": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 16, + "num_warps": 8, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 2 + }, + "4096": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "num_warps": 8, + "num_stages": 2, + "waves_per_eu": 0, + "matrix_instr_nonkdim": 16, + "kpack": 2 + } +} From c10101a3eba053d22842edd95941d49e4324c727 Mon Sep 17 00:00:00 2001 From: Csrayz Date: Mon, 22 Sep 2025 22:53:13 +0800 Subject: [PATCH 231/518] [Bugfix] Fix several issues with p2p xPyD in GET type (#23993) Signed-off-by: Csrayz Signed-off-by: ivyilike Co-authored-by: ivyilike --- .../kv_connector/v1/p2p/p2p_nccl_connector.py | 5 +++- .../kv_connector/v1/p2p/p2p_nccl_engine.py | 26 ++++++++++++------- 2 files changed, 21 insertions(+), 10 deletions(-) diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/p2p/p2p_nccl_connector.py b/vllm/distributed/kv_transfer/kv_connector/v1/p2p/p2p_nccl_connector.py index ec72905a0d3e..3dadfa595ef1 100644 --- a/vllm/distributed/kv_transfer/kv_connector/v1/p2p/p2p_nccl_connector.py +++ b/vllm/distributed/kv_transfer/kv_connector/v1/p2p/p2p_nccl_connector.py @@ -178,6 +178,9 @@ def inject_kv_into_layer( # Load the KV for each request each layer for request in metadata.requests: + request_id = request.request_id + ip, port = self.parse_request_id(request_id, False) + remote_address = ip + ":" + str(port + self._rank) for layer_name in forward_context.no_compile_layers: layer = forward_context.no_compile_layers[layer_name] @@ -191,7 +194,7 @@ def inject_kv_into_layer( layer = kv_cache[forward_context.virtual_engine] kv_cache = self.p2p_nccl_engine.recv_tensor( - request.request_id + "#" + layer_name) + request.request_id + "#" + layer_name, remote_address) if kv_cache is None: logger.warning("🚧kv_cache is None, %s", request.request_id) diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/p2p/p2p_nccl_engine.py b/vllm/distributed/kv_transfer/kv_connector/v1/p2p/p2p_nccl_engine.py index fa7cc66ab654..959bf0277a3f 100644 --- a/vllm/distributed/kv_transfer/kv_connector/v1/p2p/p2p_nccl_engine.py +++ b/vllm/distributed/kv_transfer/kv_connector/v1/p2p/p2p_nccl_engine.py @@ -134,7 +134,6 @@ def __init__(self, # PUT or PUT_ASYNC # tensor_id: torch.Tensor self.send_queue: deque[SendQueueItem] = deque() - self.send_request_id_to_tensor_ids: dict[str, set[str]] = {} if self.send_type == "PUT_ASYNC": self._send_thread = threading.Thread(target=self.send_async, daemon=True) @@ -143,6 +142,7 @@ def __init__(self, # tensor_id: torch.Tensor/(addr, dtype, shape) self.recv_store: dict[str, Any] = {} self.recv_request_id_to_tensor_ids: dict[str, set[str]] = {} + self.send_request_id_to_tensor_ids: dict[str, set[str]] = {} self.socks: dict[str, Any] = {} # remote_address: client socket self.comms: dict[str, Any] = {} # remote_address: (ncclComm_t, rank) @@ -223,18 +223,26 @@ def send_tensor( # GET with self.send_store_cv: tensor_size = tensor.element_size() * tensor.numel() + if tensor_size > self.buffer_size_threshold: + logger.warning( + "❗[GET]tensor_id:%s, tensor_size:%d, is greater than" + "buffer size threshold :%d, skip send to %s, rank:%d", + tensor_id, tensor_size, self.buffer_size_threshold, + remote_address, self.rank) + return False while (self.buffer_size + tensor_size > self.buffer_size_threshold): - oldest_tenser_id = next(iter(self.send_store)) - oldest_tenser = self.send_store.pop(oldest_tenser_id) - oldest_tenser_size = oldest_tenser.element_size( - ) * oldest_tenser.numel() - self.buffer_size -= oldest_tenser_size - logger.info( + assert len(self.send_store) > 0 + oldest_tensor_id = next(iter(self.send_store)) + oldest_tensor = self.send_store.pop(oldest_tensor_id) + oldest_tensor_size = oldest_tensor.element_size( + ) * oldest_tensor.numel() + self.buffer_size -= oldest_tensor_size + logger.debug( "⛔[GET]Send to %s, tensor_id:%s, tensor_size:%d," - " buffer_size:%d, oldest_tenser_size:%d, rank:%d", + " buffer_size:%d, oldest_tensor_size:%d, rank:%d", remote_address, tensor_id, tensor_size, self.buffer_size, - oldest_tenser_size, self.rank) + oldest_tensor_size, self.rank) self.send_store[tensor_id] = tensor self.buffer_size += tensor_size From 175811e3b53f8f13eb3e8ac6aae02050f6e1412f Mon Sep 17 00:00:00 2001 From: Burkhard Ringlein Date: Mon, 22 Sep 2025 17:20:28 +0200 Subject: [PATCH 232/518] [V1][Attention] Split triton_attn in triton-only and rocm specific backends (#24648) Signed-off-by: Burkhard Ringlein --- vllm/engine/arg_utils.py | 1 + vllm/platforms/interface.py | 1 + vllm/platforms/rocm.py | 10 + vllm/v1/attention/backends/rocm_attn.py | 426 ++++++++++++++++++++++ vllm/v1/attention/backends/triton_attn.py | 169 +++------ 5 files changed, 483 insertions(+), 124 deletions(-) create mode 100644 vllm/v1/attention/backends/rocm_attn.py diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index b09d43f70558..d4d801b155e1 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -1494,6 +1494,7 @@ def _is_v1_supported_oracle(self, model_config: ModelConfig) -> bool: "FLEX_ATTENTION", "TREE_ATTN", "XFORMERS_VLLM_V1", + "ROCM_ATTN_VLLM_V1", ] if (envs.is_set("VLLM_ATTENTION_BACKEND") and envs.VLLM_ATTENTION_BACKEND not in V1_BACKENDS): diff --git a/vllm/platforms/interface.py b/vllm/platforms/interface.py index 8a05c84d4242..cad04ea14c01 100644 --- a/vllm/platforms/interface.py +++ b/vllm/platforms/interface.py @@ -67,6 +67,7 @@ class _Backend(enum.Enum): FLEX_ATTENTION = enum.auto() TREE_ATTN = enum.auto() XFORMERS_VLLM_V1 = enum.auto() + ROCM_ATTN_VLLM_V1 = enum.auto() class PlatformEnum(enum.Enum): diff --git a/vllm/platforms/rocm.py b/vllm/platforms/rocm.py index 0c7b9c2a4abf..6a49bd4a3386 100644 --- a/vllm/platforms/rocm.py +++ b/vllm/platforms/rocm.py @@ -231,7 +231,17 @@ def get_attn_backend_cls(cls, selected_backend, head_size, dtype, logger.info("Using Flash Attention backend on V1 engine.") return ("vllm.v1.attention.backends." "rocm_aiter_fa.AiterFlashAttentionBackend") + elif (envs.VLLM_ROCM_USE_AITER and + envs.VLLM_USE_AITER_UNIFIED_ATTENTION) or \ + envs.VLLM_V1_USE_PREFILL_DECODE_ATTENTION or \ + selected_backend == _Backend.ROCM_ATTN_VLLM_V1: + # rocm specific backend, with aiter and/or + # triton prefix-prefill + logger.info("Using Rocm/Aiter Attention backend on V1 engine.") + return ("vllm.v1.attention.backends." + "rocm_attn.RocmAttentionBackend") else: + # default case, using triton unified attention logger.info("Using Triton Attention backend on V1 engine.") return ("vllm.v1.attention.backends." "triton_attn.TritonAttentionBackend") diff --git a/vllm/v1/attention/backends/rocm_attn.py b/vllm/v1/attention/backends/rocm_attn.py new file mode 100644 index 000000000000..365df5f0d6ec --- /dev/null +++ b/vllm/v1/attention/backends/rocm_attn.py @@ -0,0 +1,426 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project +"""Attention layer with PagedAttention and Triton prefix prefill.""" +from dataclasses import dataclass +from functools import cache +from typing import ClassVar, Optional + +import torch + +from vllm import _custom_ops as ops +from vllm import envs +from vllm.attention.backends.abstract import (AttentionBackend, AttentionImpl, + AttentionMetadata, AttentionType) +from vllm.attention.ops.chunked_prefill_paged_decode import ( + chunked_prefill_paged_decode) +from vllm.attention.ops.paged_attn import PagedAttention +from vllm.config import VllmConfig +from vllm.logger import init_logger +from vllm.model_executor.layers.quantization.utils.quant_utils import ( + QuantKey, kFp8StaticTensorSym) +from vllm.platforms import current_platform +from vllm.v1.attention.backends.flash_attn import FlashAttentionMetadata +from vllm.v1.attention.backends.utils import (AttentionCGSupport, + AttentionMetadataBuilder, + CommonAttentionMetadata) +from vllm.v1.kv_cache_interface import AttentionSpec + +logger = init_logger(__name__) + + +@dataclass +class RocmAttentionMetadata: + # NOTE(sang): Definition of context_len, query_len, and seq_len. + # |---------- N-1 iteration --------| + # |---------------- N iteration ---------------------| + # |- tokenA -|......................|-- newTokens ---| + # |---------- context_len ----------| + # |-------------------- seq_len ---------------------| + # |-- query_len ---| + + num_actual_tokens: int # Number of tokens excluding padding. + max_query_len: int + query_start_loc: torch.Tensor + max_seq_len: int + seq_lens: torch.Tensor + block_table: torch.Tensor + slot_mapping: torch.Tensor + + # For cascade attention. + use_cascade: bool + common_prefix_len: int + cu_prefix_query_lens: Optional[torch.Tensor] + prefix_kv_lens: Optional[torch.Tensor] + suffix_kv_lens: Optional[torch.Tensor] + + # Optional aot scheduling + scheduler_metadata: Optional[torch.Tensor] = None + prefix_scheduler_metadata: Optional[torch.Tensor] = None + + +class RocmAttentionMetadataBuilder( + AttentionMetadataBuilder[RocmAttentionMetadata]): + cudagraph_support: ClassVar[AttentionCGSupport] = AttentionCGSupport.ALWAYS + + def __init__(self, kv_cache_spec: AttentionSpec, layer_names: list[str], + vllm_config: VllmConfig, device: torch.device): + super().__init__(kv_cache_spec, layer_names, vllm_config, device) + + self.block_size = kv_cache_spec.block_size + + model_config = vllm_config.model_config + self.num_heads_q = model_config.get_num_attention_heads( + vllm_config.parallel_config) + self.num_heads_kv = model_config.get_num_kv_heads( + vllm_config.parallel_config) + self.headdim = model_config.get_head_size() + + def build_for_cudagraph_capture( + self, common_attn_metadata: CommonAttentionMetadata + ) -> RocmAttentionMetadata: + attn_metadata = self.build(0, common_attn_metadata) + # When doing full graph capture, setting seq_lens to + # max_model_len will cause graph capture to be extremely + # slow, so here we set it to 1. + attn_metadata.seq_lens.fill_(1) + return attn_metadata + + def build(self, + common_prefix_len: int, + common_attn_metadata: CommonAttentionMetadata, + fast_build: bool = False) -> RocmAttentionMetadata: + num_actual_tokens = common_attn_metadata.num_actual_tokens + max_query_len = common_attn_metadata.max_query_len + + max_seq_len = common_attn_metadata.max_seq_len + query_start_loc = common_attn_metadata.query_start_loc + seq_lens = common_attn_metadata.seq_lens + block_table_tensor = common_attn_metadata.block_table_tensor + slot_mapping = common_attn_metadata.slot_mapping + + use_cascade = common_prefix_len > 0 + + if use_cascade: + cu_prefix_query_lens = torch.tensor([0, num_actual_tokens], + dtype=torch.int32, + device=self.device) + prefix_kv_lens = torch.tensor([common_prefix_len], + dtype=torch.int32, + device=self.device) + suffix_kv_lens = (common_attn_metadata.seq_lens_cpu - + common_prefix_len) + suffix_kv_lens = suffix_kv_lens.to(self.device) + else: + cu_prefix_query_lens = None + prefix_kv_lens = None + suffix_kv_lens = None + prefix_scheduler_metadata = None + + attn_metadata = RocmAttentionMetadata( + num_actual_tokens=num_actual_tokens, + max_query_len=max_query_len, + query_start_loc=query_start_loc, + max_seq_len=max_seq_len, + seq_lens=seq_lens, + block_table=block_table_tensor, + slot_mapping=slot_mapping, + use_cascade=use_cascade, + common_prefix_len=common_prefix_len, + cu_prefix_query_lens=cu_prefix_query_lens, + prefix_kv_lens=prefix_kv_lens, + suffix_kv_lens=suffix_kv_lens, + prefix_scheduler_metadata=prefix_scheduler_metadata, + ) + return attn_metadata + + +class RocmAttentionBackend(AttentionBackend): + + accept_output_buffer: bool = True + + @classmethod + def get_supported_dtypes(cls) -> list[torch.dtype]: + return [torch.float16, torch.bfloat16] + + @classmethod + def get_supported_head_sizes(cls) -> list[int]: + return [32, 64, 96, 128, 160, 192, 224, 256] + + @classmethod + def validate_head_size(cls, head_size: int) -> None: + supported_head_sizes = cls.get_supported_head_sizes() + if head_size not in supported_head_sizes: + attn_type = cls.__name__.removesuffix("Backend") + raise ValueError( + f"Head size {head_size} is not supported by {attn_type}. " + f"Supported head sizes are: {supported_head_sizes}. " + "Set VLLM_ATTENTION_BACKEND=FLEX_ATTENTION to use " + "FlexAttention backend which supports all head sizes.") + + @staticmethod + def get_name() -> str: + return "ROCM_ATTN_VLLM_V1" + + @staticmethod + def get_impl_cls() -> type["RocmAttentionImpl"]: + return RocmAttentionImpl + + @staticmethod + def get_metadata_cls() -> type["AttentionMetadata"]: + return RocmAttentionMetadata + + @staticmethod + def get_kv_cache_shape( + num_blocks: int, + block_size: int, + num_kv_heads: int, + head_size: int, + ) -> tuple[int, ...]: + if block_size % 16 != 0: + raise ValueError("Block size must be a multiple of 16.") + return (2, num_blocks, block_size, num_kv_heads, head_size) + + @staticmethod + def use_cascade_attention(*args, **kwargs) -> bool: + return False + + @staticmethod + def get_builder_cls() -> type["RocmAttentionMetadataBuilder"]: + return RocmAttentionMetadataBuilder + + +@cache +def use_aiter_unified_attention() -> bool: + """Check if aiter unified attention should be used.""" + # VLLM_ROCM_USE_AITER_MHA needs to set to 0 as well as it is set + # to 1 as default + return envs.VLLM_ROCM_USE_AITER \ + and envs.VLLM_USE_AITER_UNIFIED_ATTENTION + + +class RocmAttentionImpl(AttentionImpl): + + def fused_output_quant_supported(self, quant_key: QuantKey): + return quant_key == kFp8StaticTensorSym + + def __init__( + self, + num_heads: int, + head_size: int, + scale: float, + num_kv_heads: int, + alibi_slopes: Optional[list[float]], + sliding_window: Optional[int], + kv_cache_dtype: str, + logits_soft_cap: Optional[float] = None, + attn_type: AttentionType = AttentionType.DECODER, + kv_sharing_target_layer_name: Optional[int] = None, + sinks: Optional[torch.Tensor] = None, + ) -> None: + self.num_heads = num_heads + self.head_size = head_size + self.scale = float(scale) + self.num_kv_heads = num_kv_heads + if alibi_slopes is not None: + alibi_slopes = torch.tensor(alibi_slopes, dtype=torch.float32) + self.alibi_slopes = alibi_slopes + if sliding_window is None: + self.sliding_window = (-1, -1) + else: + self.sliding_window = (sliding_window - 1, 0) + self.kv_cache_dtype = kv_cache_dtype + if logits_soft_cap is None: + # In flash-attn, setting logits_soft_cap as 0 means no soft cap. + logits_soft_cap = 0 + self.logits_soft_cap = logits_soft_cap + self.kv_sharing_target_layer_name = kv_sharing_target_layer_name + + self.num_queries_per_kv = self.num_heads // self.num_kv_heads + + RocmAttentionBackend.validate_head_size(head_size) + + if attn_type != AttentionType.DECODER: + raise NotImplementedError("Encoder self-attention and " + "encoder/decoder cross-attention " + "are not implemented for " + "RocmAttentionImpl") + + self.fp8_dtype = current_platform.fp8_dtype() + self.force_prefill_decode_attn = \ + envs.VLLM_V1_USE_PREFILL_DECODE_ATTENTION + + if not self.force_prefill_decode_attn: + # If not using prefill decode attention, we use the Triton + # unified attention implementation. + if use_aiter_unified_attention(): + logger.info_once( + "Using aiter unified attention for RocmAttentionImpl") + from aiter.ops.triton.unified_attention import ( + unified_attention) + self.unified_attention = unified_attention + else: + logger.info_once( + "Using vllm unified attention for RocmAttentionImpl") + from vllm.attention.ops.triton_unified_attention import ( + unified_attention) + self.unified_attention = unified_attention + + self.sinks = sinks + if sinks is not None: + assert sinks.shape[0] == num_heads, ( + "Sinks must have the same number of heads as the number of " + f"heads in the layer. Sinks shape: {sinks.shape}, " + f"num_heads: {num_heads}.") + + def forward( + self, + layer: torch.nn.Module, + query: torch.Tensor, + key: torch.Tensor, + value: torch.Tensor, + kv_cache: torch.Tensor, + attn_metadata: FlashAttentionMetadata, + output: Optional[torch.Tensor] = None, + output_scale: Optional[torch.Tensor] = None, + output_block_scale: Optional[torch.Tensor] = None, + ) -> torch.Tensor: + """Forward pass with FlashAttention. + + Args: + query: shape = [num_tokens, num_heads, head_size] + key: shape = [num_tokens, num_kv_heads, head_size] + value: shape = [num_tokens, num_kv_heads, head_size] + kv_cache: shape = + [2, num_blocks, block_size, num_kv_heads, head_size] + attn_metadata: Metadata for attention. + Returns: + shape = [num_tokens, num_heads * head_size] + """ + assert output is not None, "Output tensor must be provided." + + if output_block_scale is not None: + raise NotImplementedError( + "fused block_scale output quantization is not yet supported" + " for RocmAttentionImpl") + + if attn_metadata is None: + # Profiling run. + return output + + assert attn_metadata.use_cascade is False + + # IMPORTANT! + # NOTE(woosuk): With piece-wise CUDA graphs, this method is executed in + # eager-mode PyTorch. Thus, we need to be careful about any CPU overhead + # in this method. For example, `view` and `slice` (or `[:n]`) operations + # are surprisingly slow even in the case they do not invoke any GPU ops. + # Minimize the PyTorch ops in this method as much as possible. + # Whenever making a change in this method, please benchmark the + # performance to make sure it does not introduce any overhead. + + use_prefill_decode_attn = self.force_prefill_decode_attn + num_actual_tokens = attn_metadata.num_actual_tokens + + if use_prefill_decode_attn: + key_cache, value_cache = PagedAttention.split_kv_cache( + kv_cache, self.num_kv_heads, self.head_size) + else: + key_cache, value_cache = kv_cache.unbind(0) + + if self.kv_sharing_target_layer_name is None: + # Reshape the input keys and values and store them in the cache. + # Skip this if sharing KV cache with an earlier attention layer. + if use_prefill_decode_attn: + PagedAttention.write_to_paged_cache( + key, + value, + key_cache, + value_cache, + attn_metadata.slot_mapping, + self.kv_cache_dtype, + layer._k_scale, + layer._v_scale, + ) + else: + ops.reshape_and_cache_flash( + key, + value, + key_cache, + value_cache, + attn_metadata.slot_mapping, + self.kv_cache_dtype, + layer._k_scale, + layer._v_scale, + ) + + if self.kv_cache_dtype.startswith("fp8"): + key_cache = key_cache.view(self.fp8_dtype) + value_cache = value_cache.view(self.fp8_dtype) + num_tokens, num_heads, head_size = query.shape + assert layer._q_scale_float == 1.0, \ + "A non 1.0 q_scale is not currently supported." + if current_platform.is_cuda(): + # Skip Q quantization on ROCm and XPU, enable this on cuda + # only, since dequantizing back to f32 in the attention kernel + # is not supported. + query, _ = ops.scaled_fp8_quant( + query.reshape( + (num_tokens, num_heads * head_size)).contiguous(), + layer._q_scale) + query = query.reshape((num_tokens, num_heads, head_size)) + + cu_seqlens_q = attn_metadata.query_start_loc + seqused_k = attn_metadata.seq_lens + max_seqlen_q = attn_metadata.max_query_len + max_seqlen_k = attn_metadata.max_seq_len + block_table = attn_metadata.block_table + + if use_prefill_decode_attn: + # Compute attention and update output up to `num_actual_tokens`. + chunked_prefill_paged_decode( + query=query[:num_actual_tokens], + key=key[:num_actual_tokens], + value=value[:num_actual_tokens], + output=output[:num_actual_tokens], + kv_cache_dtype=self.kv_cache_dtype, + key_cache=key_cache, + value_cache=value_cache, + block_table=block_table, + query_start_loc=cu_seqlens_q, + seq_lens=seqused_k, + max_seq_len=max_seqlen_k, + max_query_len=max_seqlen_q, + k_scale=layer._k_scale, + v_scale=layer._v_scale, + alibi_slopes=self.alibi_slopes, + sliding_window=self.sliding_window[0], + sm_scale=self.scale, + output_scale=output_scale, + sinks=self.sinks, + ) + + else: + descale_shape = (cu_seqlens_q.shape[0] - 1, key.shape[1]) + + self.unified_attention( + q=query[:num_actual_tokens], + k=key_cache, + v=value_cache, + out=output[:num_actual_tokens], + cu_seqlens_q=cu_seqlens_q, + max_seqlen_q=max_seqlen_q, + seqused_k=seqused_k, + max_seqlen_k=max_seqlen_k, + softmax_scale=self.scale, + causal=True, + alibi_slopes=self.alibi_slopes, + window_size=self.sliding_window, + block_table=block_table, + softcap=self.logits_soft_cap, + q_descale=None, # Not supported + k_descale=layer._k_scale.expand(descale_shape), + v_descale=layer._v_scale.expand(descale_shape), + sinks=self.sinks, + output_scale=output_scale) + + return output diff --git a/vllm/v1/attention/backends/triton_attn.py b/vllm/v1/attention/backends/triton_attn.py index 784912a122f6..722c23f150cd 100644 --- a/vllm/v1/attention/backends/triton_attn.py +++ b/vllm/v1/attention/backends/triton_attn.py @@ -1,24 +1,19 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -"""Attention layer with PagedAttention and Triton prefix prefill.""" +"""High-Performance Triton-only Attention layer.""" from dataclasses import dataclass -from functools import cache from typing import ClassVar, Optional import torch -from vllm import envs from vllm.attention.backends.abstract import (AttentionBackend, AttentionImpl, AttentionMetadata, AttentionType) -from vllm.attention.ops.chunked_prefill_paged_decode import ( - chunked_prefill_paged_decode) -from vllm.attention.ops.paged_attn import PagedAttention +from vllm.attention.ops.triton_unified_attention import unified_attention from vllm.config import VllmConfig from vllm.logger import init_logger from vllm.model_executor.layers.quantization.utils.quant_utils import ( QuantKey, kFp8StaticTensorSym) from vllm.platforms import current_platform -from vllm.v1.attention.backends.flash_attn import FlashAttentionMetadata from vllm.v1.attention.backends.utils import (AttentionCGSupport, AttentionMetadataBuilder, CommonAttentionMetadata) @@ -144,20 +139,15 @@ class TritonAttentionBackend(AttentionBackend): @classmethod def get_supported_dtypes(cls) -> list[torch.dtype]: - return [torch.float16, torch.bfloat16] - - @classmethod - def get_supported_head_sizes(cls) -> list[int]: - return [32, 64, 96, 128, 160, 192, 224, 256] + return [torch.float16, torch.bfloat16, torch.float32] @classmethod def validate_head_size(cls, head_size: int) -> None: - supported_head_sizes = cls.get_supported_head_sizes() - if head_size not in supported_head_sizes: - attn_type = cls.__name__.removesuffix("Backend") + # Triton Attention supports any head size above 32 + if head_size < 32: raise ValueError( - f"Head size {head_size} is not supported by {attn_type}. " - f"Supported head sizes are: {supported_head_sizes}. " + f"Head size {head_size} is not supported by TritonAttention." + f"Head sizes need to be larger or equal 32 for this backend. " "Set VLLM_ATTENTION_BACKEND=FLEX_ATTENTION to use " "FlexAttention backend which supports all head sizes.") @@ -182,7 +172,7 @@ def get_kv_cache_shape( ) -> tuple[int, ...]: if block_size % 16 != 0: raise ValueError("Block size must be a multiple of 16.") - return (2, num_blocks, block_size, num_kv_heads, head_size) + return (num_blocks, 2, block_size, num_kv_heads, head_size) @staticmethod def use_cascade_attention(*args, **kwargs) -> bool: @@ -193,15 +183,6 @@ def get_builder_cls() -> type["TritonAttentionMetadataBuilder"]: return TritonAttentionMetadataBuilder -@cache -def use_aiter_unified_attention() -> bool: - """Check if aiter unified attention should be used.""" - # VLLM_ROCM_USE_AITER_MHA needs to set to 0 as well as it is set - # to 1 as default - return envs.VLLM_ROCM_USE_AITER \ - and envs.VLLM_USE_AITER_UNIFIED_ATTENTION - - class TritonAttentionImpl(AttentionImpl): def fused_output_quant_supported(self, quant_key: QuantKey): @@ -250,24 +231,6 @@ def __init__( "TritonAttentionImpl") self.fp8_dtype = current_platform.fp8_dtype() - self.force_prefill_decode_attn = \ - envs.VLLM_V1_USE_PREFILL_DECODE_ATTENTION - - if not self.force_prefill_decode_attn: - # If not using prefill decode attention, we use the Triton - # unified attention implementation. - if use_aiter_unified_attention(): - logger.info_once( - "Using aiter unified attention for TritonAttentionImpl") - from aiter.ops.triton.unified_attention import ( - unified_attention) - self.unified_attention = unified_attention - else: - logger.info_once( - "Using vllm unified attention for TritonAttentionImpl") - from vllm.attention.ops.triton_unified_attention import ( - unified_attention) - self.unified_attention = unified_attention self.sinks = sinks if sinks is not None: @@ -283,19 +246,19 @@ def forward( key: torch.Tensor, value: torch.Tensor, kv_cache: torch.Tensor, - attn_metadata: FlashAttentionMetadata, + attn_metadata: TritonAttentionMetadata, output: Optional[torch.Tensor] = None, output_scale: Optional[torch.Tensor] = None, output_block_scale: Optional[torch.Tensor] = None, ) -> torch.Tensor: - """Forward pass with FlashAttention. + """Forward pass with Paged Attention impl. in Triton. Args: query: shape = [num_tokens, num_heads, head_size] key: shape = [num_tokens, num_kv_heads, head_size] value: shape = [num_tokens, num_kv_heads, head_size] kv_cache: shape = - [2, num_blocks, block_size, num_kv_heads, head_size] + [num_blocks, 2, block_size, num_kv_heads, head_size] attn_metadata: Metadata for attention. Returns: shape = [num_tokens, num_heads * head_size] @@ -322,40 +285,22 @@ def forward( # Whenever making a change in this method, please benchmark the # performance to make sure it does not introduce any overhead. - use_prefill_decode_attn = self.force_prefill_decode_attn num_actual_tokens = attn_metadata.num_actual_tokens - - if use_prefill_decode_attn: - key_cache, value_cache = PagedAttention.split_kv_cache( - kv_cache, self.num_kv_heads, self.head_size) - else: - key_cache, value_cache = kv_cache.unbind(0) + key_cache, value_cache = kv_cache.unbind(1) if self.kv_sharing_target_layer_name is None: # Reshape the input keys and values and store them in the cache. # Skip this if sharing KV cache with an earlier attention layer. - if use_prefill_decode_attn: - PagedAttention.write_to_paged_cache( - key, - value, - key_cache, - value_cache, - attn_metadata.slot_mapping, - self.kv_cache_dtype, - layer._k_scale, - layer._v_scale, - ) - else: - ops.reshape_and_cache_flash( - key, - value, - key_cache, - value_cache, - attn_metadata.slot_mapping, - self.kv_cache_dtype, - layer._k_scale, - layer._v_scale, - ) + ops.reshape_and_cache_flash( + key, + value, + key_cache, + value_cache, + attn_metadata.slot_mapping, + self.kv_cache_dtype, + layer._k_scale, + layer._v_scale, + ) if self.kv_cache_dtype.startswith("fp8"): key_cache = key_cache.view(self.fp8_dtype) @@ -379,52 +324,28 @@ def forward( max_seqlen_k = attn_metadata.max_seq_len block_table = attn_metadata.block_table - if use_prefill_decode_attn: - # Compute attention and update output up to `num_actual_tokens`. - chunked_prefill_paged_decode( - query=query[:num_actual_tokens], - key=key[:num_actual_tokens], - value=value[:num_actual_tokens], - output=output[:num_actual_tokens], - kv_cache_dtype=self.kv_cache_dtype, - key_cache=key_cache, - value_cache=value_cache, - block_table=block_table, - query_start_loc=cu_seqlens_q, - seq_lens=seqused_k, - max_seq_len=max_seqlen_k, - max_query_len=max_seqlen_q, - k_scale=layer._k_scale, - v_scale=layer._v_scale, - alibi_slopes=self.alibi_slopes, - sliding_window=self.sliding_window[0], - sm_scale=self.scale, - output_scale=output_scale, - sinks=self.sinks, - ) - - else: - descale_shape = (cu_seqlens_q.shape[0] - 1, key.shape[1]) - - self.unified_attention( - q=query[:num_actual_tokens], - k=key_cache, - v=value_cache, - out=output[:num_actual_tokens], - cu_seqlens_q=cu_seqlens_q, - max_seqlen_q=max_seqlen_q, - seqused_k=seqused_k, - max_seqlen_k=max_seqlen_k, - softmax_scale=self.scale, - causal=True, - alibi_slopes=self.alibi_slopes, - window_size=self.sliding_window, - block_table=block_table, - softcap=self.logits_soft_cap, - q_descale=None, # Not supported - k_descale=layer._k_scale.expand(descale_shape), - v_descale=layer._v_scale.expand(descale_shape), - sinks=self.sinks, - output_scale=output_scale) + descale_shape = (cu_seqlens_q.shape[0] - 1, key.shape[1]) + + unified_attention( + q=query[:num_actual_tokens], + k=key_cache, + v=value_cache, + out=output[:num_actual_tokens], + cu_seqlens_q=cu_seqlens_q, + max_seqlen_q=max_seqlen_q, + seqused_k=seqused_k, + max_seqlen_k=max_seqlen_k, + softmax_scale=self.scale, + causal=True, + alibi_slopes=self.alibi_slopes, + window_size=self.sliding_window, + block_table=block_table, + softcap=self.logits_soft_cap, + q_descale=None, # Not supported + k_descale=layer._k_scale.expand(descale_shape), + v_descale=layer._v_scale.expand(descale_shape), + sinks=self.sinks, + output_scale=output_scale, + ) return output From 06a41334c7e5c23b9a053f866c02524945d7d1db Mon Sep 17 00:00:00 2001 From: Bowen Wang Date: Mon, 22 Sep 2025 09:31:05 -0700 Subject: [PATCH 233/518] [EPLB] Reduce EPLB Inference Overhead (#24573) Signed-off-by: Bowen Wang Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> Co-authored-by: Tyler Michael Smith --- .../layers/fused_moe/fused_moe.py | 73 +++++++++++++++++++ vllm/model_executor/layers/fused_moe/layer.py | 69 +++++------------- 2 files changed, 92 insertions(+), 50 deletions(-) diff --git a/vllm/model_executor/layers/fused_moe/fused_moe.py b/vllm/model_executor/layers/fused_moe/fused_moe.py index 6c2a5bda7cba..0e334fdf2404 100644 --- a/vllm/model_executor/layers/fused_moe/fused_moe.py +++ b/vllm/model_executor/layers/fused_moe/fused_moe.py @@ -1017,6 +1017,79 @@ def grouped_topk( return topk_weights.to(torch.float32), topk_ids.to(torch.int32) +@torch.compile(dynamic=True, backend=current_platform.simple_compile_backend) +def eplb_map_to_physical_and_record( + topk_ids: torch.Tensor, + expert_load_view: torch.Tensor, + logical_to_physical_map: torch.Tensor, + logical_replica_count: torch.Tensor, + indices_type: Optional[torch.dtype] = None) -> torch.Tensor: + ''' + Map the logical expert ids to physical expert ids + and record the expert load metrics. + + This will select a pseudo-random replica for each logical expert. + Only used for EPLB. + + Args: + topk_ids: The logical expert ids. + expert_load_view: The expert load view. + logical_to_physical_map: The logical to physical map. + logical_replica_count: The logical replica count. + indices_type: The indices type. + + Returns: + The physical expert ids. + ''' + + # 1. Convert the logical expert ids to physical expert ids + # Directly select a random replica for each logical expert + + # In case `indices_type` is not `torch.long` or `torch.int`, + # e.g. `torch.uint32` as required by dispatch/combine kernels + topk_ids_long = topk_ids.long() + # Use (token position) modulo (replica count) + # to deterministically choose a replica + replica_count = logical_replica_count[topk_ids_long] + # Flatten-position based index, reshaped back to `topk_ids` shape + pos_indices = torch.arange(topk_ids.numel(), + device=topk_ids.device, + dtype=torch.long).reshape_as(topk_ids) + # Compute pseudo-random indices by modulo + replica_indices = (pos_indices % replica_count).unsqueeze(-1) + physical_ids = logical_to_physical_map[topk_ids_long].gather( + -1, replica_indices).squeeze(-1) + + topk_ids = physical_ids + + # 2. Record expert load metrics. + + # TODO(bowen): When using `FusedMoEModularKernel`, this + # can be done in a more unified way, since + # `FusedMoEPrepareAndFinalize` will return the expert + # token count, in some cases directly from the kernel. + # However, now there are many code paths not using + # the modular kernel, e.g. calling `fused_experts`, + # so we decide to keep the logic here. + # + # If later refactor moved all the MoE kernel calls + # to the modular kernel, we can move this logic there + # to achieve better efficiency. + + # `expert_load_view`: (num_physical_experts,) + + # `torch.bincount` is not compilable, so use `scatter_add_` instead. + topk_ids_flatten = topk_ids.flatten() + expert_load_view.scatter_add_( + dim=0, + index=topk_ids_flatten.long(), + src=torch.ones_like(topk_ids_flatten).to(expert_load_view)) + + if indices_type is not None: + topk_ids = topk_ids.to(dtype=indices_type) + return topk_ids + + def fused_grouped_topk( hidden_states: torch.Tensor, gating_output: torch.Tensor, diff --git a/vllm/model_executor/layers/fused_moe/layer.py b/vllm/model_executor/layers/fused_moe/layer.py index da513d75da4d..17ad75584a3f 100644 --- a/vllm/model_executor/layers/fused_moe/layer.py +++ b/vllm/model_executor/layers/fused_moe/layer.py @@ -43,7 +43,8 @@ if current_platform.is_cuda_alike(): from .fused_batched_moe import BatchedTritonExperts - from .fused_moe import TritonExperts, fused_experts + from .fused_moe import (TritonExperts, eplb_map_to_physical_and_record, + fused_experts) if has_pplx(): from .pplx_prepare_finalize import (PplxPrepareAndFinalize, pplx_hidden_dim_scale_bytes) @@ -55,6 +56,16 @@ fused_experts = None # type: ignore FusedMoEPermuteExpertsUnpermute = None # type: ignore FusedMoEPrepareAndFinalize = None # type: ignore + + def eplb_map_to_physical_and_record( + topk_ids: torch.Tensor, expert_load_view: torch.Tensor, + logical_to_physical_map: torch.Tensor, + logical_replica_count: torch.Tensor, + indices_type: Optional[torch.dtype]) -> torch.Tensor: + # CPU fallback: no EPLB so just return as is + return topk_ids + + if is_rocm_aiter_moe_enabled(): from vllm.model_executor.layers.fused_moe.rocm_aiter_fused_moe import ( # noqa: E501 rocm_aiter_grouped_topk as grouped_topk) @@ -1616,55 +1627,13 @@ def select_experts( assert logical_to_physical_map is not None assert logical_replica_count is not None - # 1. Convert the logical expert ids to physical expert ids - # Directly select a random replica for each logical expert - - # TODO: maybe optimize this by using specified kernels, - # or compute pseudo-random indices by modulo - - # In case `indices_type` is not `torch.long` or `torch.int`, - # e.g. `torch.uint32` as required by dispatch/combine kernels - topk_ids_long = topk_ids.long() - replica_indices = ( - torch.rand_like(topk_ids, dtype=torch.float) * - logical_replica_count[topk_ids_long]).long().unsqueeze(-1) - physical_ids = logical_to_physical_map[topk_ids_long].gather( - -1, replica_indices).squeeze(-1) - - topk_ids = physical_ids - - # 2. Record expert load metrics. - - # TODO(bowen): When using `FusedMoEModularKernel`, this - # can be done in a more unified way, since - # `FusedMoEPrepareAndFinalize` will return the expert - # token count, in some cases directly from the kernel. - # However, now there are many code paths not using - # the modular kernel, e.g. calling `fused_experts`, - # so we decide to keep the logic here. - # - # If later refactor moved all the MoE kernel calls - # to the modular kernel, we can move this logic there - # to achieve better efficiency. - - # `expert_load_view`: (num_physical_experts,) - - topk_ids_flatten = topk_ids.flatten() - - # Performance optimization: - # `masked_fill` is significantly faster than `masked_select` - invalid_mask = topk_ids_flatten < 0 - # Replace invalid expert ids with 0 (just a dummy position) - # to avoid out-of-bounds errors in scatter_add_ - index = topk_ids_flatten.masked_fill_(invalid_mask, 0) - # `src` is the valid mask, which is 1 for valid and 0 for invalid - src = ~invalid_mask - - expert_load_view.scatter_add_(dim=0, - index=index.long(), - src=src.to(expert_load_view)) - - topk_ids = topk_ids.to(dtype=indices_type) + topk_ids = eplb_map_to_physical_and_record( + topk_ids=topk_ids, + expert_load_view=expert_load_view, + logical_to_physical_map=logical_to_physical_map, + logical_replica_count=logical_replica_count, + indices_type=indices_type, + ) assert topk_ids.dtype == indices_type or indices_type is None From cfbee3d0e72582b0b0d910ecbd9a7c1028a7205c Mon Sep 17 00:00:00 2001 From: Daisy-Ma-coder Date: Mon, 22 Sep 2025 10:37:43 -0700 Subject: [PATCH 234/518] [CLI env var] Add VLLM_FLASH_ATTN_MAX_NUM_SPLITS_FOR_CUDA_GRAPH in env variables (#25274) Signed-off-by: qqma Signed-off-by: DarkLight1337 Co-authored-by: qqma Co-authored-by: Cyrus Leung --- tests/compile/piecewise/test_full_cudagraph.py | 11 +++++++++-- tests/v1/cudagraph/test_cudagraph_mode.py | 11 +++++++++-- vllm/envs.py | 8 ++++++++ vllm/v1/attention/backends/flash_attn.py | 7 +++---- vllm/v1/attention/backends/mla/flashattn_mla.py | 8 +++----- 5 files changed, 32 insertions(+), 13 deletions(-) diff --git a/tests/compile/piecewise/test_full_cudagraph.py b/tests/compile/piecewise/test_full_cudagraph.py index 2454f85342eb..780a0d6b5c0e 100644 --- a/tests/compile/piecewise/test_full_cudagraph.py +++ b/tests/compile/piecewise/test_full_cudagraph.py @@ -46,7 +46,10 @@ class BackendConfig: # FA3 on Hopper "FA3": BackendConfig(name="FA3", - env_vars={"VLLM_FLASH_ATTN_VERSION": "3"}, + env_vars={ + "VLLM_FLASH_ATTN_VERSION": "3", + "VLLM_FLASH_ATTN_MAX_NUM_SPLITS_FOR_CUDA_GRAPH": "16", + }, comp_config={ "cudagraph_mode": "FULL", }, @@ -66,6 +69,7 @@ class BackendConfig: BackendConfig(name="FlashAttentionMLA", env_vars={ "VLLM_ATTENTION_BACKEND": "FLASH_ATTN_MLA", + "VLLM_FLASH_ATTN_MAX_NUM_SPLITS_FOR_CUDA_GRAPH": "16", }, comp_config={ "cudagraph_mode": "FULL_DECODE_ONLY", @@ -89,7 +93,10 @@ class BackendConfig: # FA2 "FA2": BackendConfig(name="FA2", - env_vars={"VLLM_FLASH_ATTN_VERSION": "2"}, + env_vars={ + "VLLM_FLASH_ATTN_VERSION": "2", + "VLLM_FLASH_ATTN_MAX_NUM_SPLITS_FOR_CUDA_GRAPH": "16", + }, comp_config={ "cudagraph_mode": "FULL", }), diff --git a/tests/v1/cudagraph/test_cudagraph_mode.py b/tests/v1/cudagraph/test_cudagraph_mode.py index 25e01806f495..1ae9185fafbd 100644 --- a/tests/v1/cudagraph/test_cudagraph_mode.py +++ b/tests/v1/cudagraph/test_cudagraph_mode.py @@ -47,7 +47,10 @@ class BackendConfig: # FA3 on Hopper "FA3": BackendConfig(name="FA3", - env_vars={"VLLM_FLASH_ATTN_VERSION": "3"}, + env_vars={ + "VLLM_FLASH_ATTN_VERSION": "3", + "VLLM_FLASH_ATTN_MAX_NUM_SPLITS_FOR_CUDA_GRAPH": "16", + }, comp_config={ "cudagraph_mode": "FULL", }, @@ -67,6 +70,7 @@ class BackendConfig: BackendConfig(name="FlashAttentionMLA", env_vars={ "VLLM_ATTENTION_BACKEND": "FLASH_ATTN_MLA", + "VLLM_FLASH_ATTN_MAX_NUM_SPLITS_FOR_CUDA_GRAPH": "16", }, comp_config={ "cudagraph_mode": "FULL_DECODE_ONLY", @@ -75,7 +79,10 @@ class BackendConfig: # FA2 "FA2": BackendConfig(name="FA2", - env_vars={"VLLM_FLASH_ATTN_VERSION": "2"}, + env_vars={ + "VLLM_FLASH_ATTN_VERSION": "2", + "VLLM_FLASH_ATTN_MAX_NUM_SPLITS_FOR_CUDA_GRAPH": "16", + }, comp_config={ "cudagraph_mode": "FULL_AND_PIECEWISE", }), diff --git a/vllm/envs.py b/vllm/envs.py index cbd1d5474e60..e517088c5290 100755 --- a/vllm/envs.py +++ b/vllm/envs.py @@ -119,6 +119,7 @@ VLLM_SERVER_DEV_MODE: bool = False VLLM_V1_OUTPUT_PROC_CHUNK_SIZE: int = 128 VLLM_MLA_DISABLE: bool = False + VLLM_FLASH_ATTN_MAX_NUM_SPLITS_FOR_CUDA_GRAPH: int = 16 VLLM_RAY_PER_WORKER_GPUS: float = 1.0 VLLM_RAY_BUNDLE_INDICES: str = "" VLLM_CUDART_SO_PATH: Optional[str] = None @@ -946,6 +947,12 @@ def get_vllm_port() -> Optional[int]: "VLLM_MLA_DISABLE": lambda: bool(int(os.getenv("VLLM_MLA_DISABLE", "0"))), + # If set, vLLM will pick up the provided Flash Attention MLA + # max number splits for cuda graph decode + "VLLM_FLASH_ATTN_MAX_NUM_SPLITS_FOR_CUDA_GRAPH": + lambda: int(os.getenv("VLLM_FLASH_ATTN_MAX_NUM_SPLITS_FOR_CUDA_GRAPH", + "16")), + # Number of GPUs per worker in Ray, if it is set to be a fraction, # it allows ray to schedule multiple actors on a single GPU, # so that users can colocate other actors on the same GPUs as vLLM. @@ -1379,6 +1386,7 @@ def compute_hash() -> str: environment_variables_to_hash = [ "VLLM_PP_LAYER_PARTITION", "VLLM_MLA_DISABLE", + "VLLM_FLASH_ATTN_MAX_NUM_SPLITS_FOR_CUDA_GRAPH", "VLLM_USE_TRITON_FLASH_ATTN", "VLLM_USE_TRITON_AWQ", "VLLM_DP_RANK", diff --git a/vllm/v1/attention/backends/flash_attn.py b/vllm/v1/attention/backends/flash_attn.py index 20f1904b3be6..d564cf9988ea 100755 --- a/vllm/v1/attention/backends/flash_attn.py +++ b/vllm/v1/attention/backends/flash_attn.py @@ -8,6 +8,7 @@ import torch from vllm import _custom_ops as ops +from vllm import envs from vllm.attention.backends.abstract import (AttentionBackend, AttentionImpl, AttentionMetadata, AttentionType, is_quantized_kv_cache) @@ -33,9 +34,6 @@ logger = init_logger(__name__) -# NOTE(woosuk): This is an arbitrary number. Tune it if needed. -_DEFAULT_MAX_NUM_SPLITS_FOR_CUDA_GRAPH = 16 - class FlashAttentionBackend(AttentionBackend): @@ -215,7 +213,8 @@ def __init__(self, kv_cache_spec: AttentionSpec, layer_names: list[str], # When using cuda graph, we need to set the upper bound of the # number of splits so that large enough intermediate buffers are # pre-allocated during capture. - self.max_num_splits = _DEFAULT_MAX_NUM_SPLITS_FOR_CUDA_GRAPH + self.max_num_splits = ( + envs.VLLM_FLASH_ATTN_MAX_NUM_SPLITS_FOR_CUDA_GRAPH) # Sliding window size to be used with the AOT scheduler will be # populated on first build() call. diff --git a/vllm/v1/attention/backends/mla/flashattn_mla.py b/vllm/v1/attention/backends/mla/flashattn_mla.py index 472095e13615..4ad9a13b61d8 100644 --- a/vllm/v1/attention/backends/mla/flashattn_mla.py +++ b/vllm/v1/attention/backends/mla/flashattn_mla.py @@ -6,6 +6,7 @@ import torch +from vllm import envs from vllm.attention.backends.abstract import (AttentionLayer, AttentionType, is_quantized_kv_cache) from vllm.attention.utils.fa_utils import (flash_attn_supports_mla, @@ -24,10 +25,6 @@ logger = init_logger(__name__) -# NOTE(matt): This is an arbitrary number, copied from -# woosuk's implementation in standard FlashAttention backend -_DEFAULT_MAX_NUM_SPLITS_FOR_CUDA_GRAPH = 16 - class FlashAttnMLABackend(MLACommonBackend): @@ -97,7 +94,8 @@ def __init__(self, kv_cache_spec: AttentionSpec, layer_names: list[str], # When using cuda graph, we need to set the upper bound of the # number of splits so that large enough intermediate buffers are # pre-allocated during capture. - self.max_num_splits = _DEFAULT_MAX_NUM_SPLITS_FOR_CUDA_GRAPH + self.max_num_splits = ( + envs.VLLM_FLASH_ATTN_MAX_NUM_SPLITS_FOR_CUDA_GRAPH) # TODO(lucas): Until we add support for the DCP custom masking we need # to restrict decodes to q_len == 1 when DCP is enabled. From 1d7f95b85c9b1bdaa576a5275851995b75a9420d Mon Sep 17 00:00:00 2001 From: ElizaWszola Date: Mon, 22 Sep 2025 19:37:46 +0200 Subject: [PATCH 235/518] [Compiler] Disable Inductor standalone compile by default (#25391) Signed-off-by: ElizaWszola --- vllm/envs.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/vllm/envs.py b/vllm/envs.py index e517088c5290..8941be125ed0 100755 --- a/vllm/envs.py +++ b/vllm/envs.py @@ -126,6 +126,7 @@ VLLM_DP_RANK: int = 0 VLLM_DP_RANK_LOCAL: int = -1 VLLM_DP_SIZE: int = 1 + VLLM_USE_STANDALONE_COMPILE: bool = False VLLM_DP_MASTER_IP: str = "" VLLM_DP_MASTER_PORT: int = 0 VLLM_MOE_DP_CHUNK_SIZE: int = 256 @@ -437,9 +438,9 @@ def get_vllm_port() -> Optional[int]: # Feature flag to enable/disable Inductor standalone compile. # In torch <= 2.7 we ignore this flag; in torch >= 2.8 this is - # enabled by default. + # disabled by default. "VLLM_USE_STANDALONE_COMPILE": - lambda: os.environ.get("VLLM_USE_STANDALONE_COMPILE", "1") == "1", + lambda: os.environ.get("VLLM_USE_STANDALONE_COMPILE", "0") == "1", # local rank of the process in the distributed setting, used to determine # the GPU device id From 239ef0c1ac0dfe68d8d2e28c54ecf9aa9bcd945b Mon Sep 17 00:00:00 2001 From: Michael Goin Date: Mon, 22 Sep 2025 14:27:51 -0400 Subject: [PATCH 236/518] [CI Failure] Fix fp8 kv cache on --- vllm/platforms/cuda.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/vllm/platforms/cuda.py b/vllm/platforms/cuda.py index 7baa5a9742f4..b10bc03ee16c 100644 --- a/vllm/platforms/cuda.py +++ b/vllm/platforms/cuda.py @@ -286,6 +286,9 @@ def get_attn_backend_cls(cls, selected_backend, head_size, dtype, TREE_ATTN_V1 = "vllm.v1.attention.backends.tree_attn.TreeAttentionBackend" # noqa: E501 XFORMERS_V1 = "vllm.v1.attention.backends.xformers.XFormersAttentionBackend" # noqa: E501 + use_fp8_kv_cache = (kv_cache_dtype is not None + and kv_cache_dtype.startswith("fp8")) + if selected_backend == _Backend.FLASHINFER: logger.info_once("Using FlashInfer backend on V1 engine.") if cls.has_device_capability(100): @@ -334,10 +337,11 @@ def get_attn_backend_cls(cls, selected_backend, head_size, dtype, # FlashAttention is the default for SM 8.0+ GPUs if cls.has_device_capability(80): - if has_sink and not cls.is_device_capability(90): + if (has_sink or + use_fp8_kv_cache) and not cls.is_device_capability(90): logger.info_once("Using Triton backend on V1 engine.") return TRITON_ATTN_VLLM_V1 - if is_default_backend_supported := is_attn_backend_supported( + elif is_default_backend_supported := is_attn_backend_supported( FLASH_ATTN_V1, head_size, dtype, allow_import_error=False): logger.info_once("Using Flash Attention backend on " From 922979bfcc461cf0069789728f4afdf1284265fb Mon Sep 17 00:00:00 2001 From: Lucia Fang <116399278+luccafong@users.noreply.github.com> Date: Mon, 22 Sep 2025 12:06:05 -0700 Subject: [PATCH 237/518] [DP] support torchrun external launcher with Data Parallelism (#24899) Signed-off-by: Lu Fang Signed-off-by: Zhuohan Li Co-authored-by: Zhuohan Li --- .buildkite/test-pipeline.yaml | 12 ++- .../offline_inference/torchrun_dp_example.py | 81 +++++++++++++++++++ .../distributed/test_torchrun_example_moe.py | 81 +++++++++++++++++++ vllm/config/parallel.py | 13 ++- vllm/distributed/parallel_state.py | 4 +- vllm/v1/engine/llm_engine.py | 18 ++++- 6 files changed, 202 insertions(+), 7 deletions(-) create mode 100644 examples/offline_inference/torchrun_dp_example.py create mode 100644 tests/distributed/test_torchrun_example_moe.py diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index fe4796b35786..c4ea4b675649 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -165,10 +165,18 @@ steps: - tests/v1/test_hybrid_lb_dp.py - tests/v1/engine/test_engine_core_client.py commands: - # test with tp=2 and external_dp=2 + # test with torchrun tp=2 and external_dp=2 - torchrun --nproc-per-node=4 distributed/test_torchrun_example.py - # test with tp=2 and pp=2 + # test with torchrun tp=2 and pp=2 - PP_SIZE=2 torchrun --nproc-per-node=4 distributed/test_torchrun_example.py + # test with torchrun tp=4 and dp=1 + - TP_SIZE=4 torchrun --nproc-per-node=4 distributed/test_torchrun_example_moe.py + # test with torchrun tp=2, pp=2 and dp=1 + - PP_SIZE=2 TP_SIZE=2 torchrun --nproc-per-node=4 distributed/test_torchrun_example_moe.py + # test with torchrun tp=1 and dp=4 with ep + - DP_SIZE=4 ENABLE_EP=1 torchrun --nproc-per-node=4 distributed/test_torchrun_example_moe.py + # test with torchrun tp=2 and dp=2 with ep + - TP_SIZE=2 DP_SIZE=2 ENABLE_EP=1 torchrun --nproc-per-node=4 distributed/test_torchrun_example_moe.py # test with internal dp - python3 ../examples/offline_inference/data_parallel.py --enforce-eager - TP_SIZE=2 DP_SIZE=2 pytest -v -s v1/test_async_llm_dp.py diff --git a/examples/offline_inference/torchrun_dp_example.py b/examples/offline_inference/torchrun_dp_example.py new file mode 100644 index 000000000000..8e888a100254 --- /dev/null +++ b/examples/offline_inference/torchrun_dp_example.py @@ -0,0 +1,81 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project +""" +experimental support for data-parallel inference with torchrun +Note the data load balancing and distribution is done out of the vllm engine, +no internal lb supported in external_launcher mode. +""" + +from vllm import LLM, SamplingParams + +# Create prompts, the same across all ranks +prompts = [ + "Hello, my name is", + "The president of the United States is", + "The capital of France is", + "The future of AI is", +] * 50 + +# Create sampling parameters, the same across all ranks +sampling_params = SamplingParams(temperature=0.8, top_p=0.95) + +# Use `distributed_executor_backend="external_launcher"` so that +# this llm engine/instance only creates one worker. +# it is important to set an explicit seed to make sure that +# all ranks have the same random seed, so that sampling can be +# deterministic across ranks. +llm = LLM( + model="microsoft/Phi-mini-MoE-instruct", + tensor_parallel_size=1, + data_parallel_size=2, + pipeline_parallel_size=1, + enable_expert_parallel=False, + distributed_executor_backend="external_launcher", + max_model_len=4096, + gpu_memory_utilization=0.6, + seed=1, +) + +dp_rank = llm.llm_engine.vllm_config.parallel_config.data_parallel_rank +dp_size = llm.llm_engine.vllm_config.parallel_config.data_parallel_size + +prompts = [ + f"{idx}.{prompt}" for idx, prompt in enumerate(prompts) if idx % dp_size == dp_rank +] + +outputs = llm.generate(prompts, sampling_params) + + +# all ranks will have the same outputs +print("-" * 50) +for output in outputs: + prompt = output.prompt + generated_text = output.outputs[0].text + print(f"Prompt: {prompt!r}\nGenerated text: {generated_text!r}\n") + print("-" * 50) +""" +Further tips: + +1. to communicate control messages across all ranks, use the cpu group, +a PyTorch ProcessGroup with GLOO backend. + +```python +from vllm.distributed.parallel_state import get_world_group +cpu_group = get_world_group().cpu_group +torch_rank = dist.get_rank(group=cpu_group) +if torch_rank == 0: + # do something for rank 0, e.g. saving the results to disk. +``` + +2. to communicate data across all ranks, use the model's device group, +a PyTorch ProcessGroup with NCCL backend. +```python +from vllm.distributed.parallel_state import get_world_group +device_group = get_world_group().device_group +``` + +3. to access the model directly in every rank, use the following code: +```python +llm.llm_engine.model_executor.driver_worker.worker.model_runner.model +``` +""" diff --git a/tests/distributed/test_torchrun_example_moe.py b/tests/distributed/test_torchrun_example_moe.py new file mode 100644 index 000000000000..2d6b930fcc07 --- /dev/null +++ b/tests/distributed/test_torchrun_example_moe.py @@ -0,0 +1,81 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project + +# unit test for `examples/offline_inference/torchrun_example.py` +import os +import random + +import torch.distributed as dist + +from vllm import LLM, SamplingParams +from vllm.distributed.parallel_state import get_tp_group, get_world_group + +dist.init_process_group(backend="gloo") + +# Create prompts +prompts = [ + "Hello, my name is", + "The president of the United States is", + "The capital of France is", + "The future of AI is", +] * 10 +dp_size = int(os.getenv("DP_SIZE", "1")) +dp_rank = int(os.getenv("DP_RANK", "0")) + +if dp_size > 1: + # distribute the prompts across the data parallel ranks + prompts = [ + prompt for idx, prompt in enumerate(prompts) + if idx % dp_size == dp_rank + ] + +sampling_params = SamplingParams(temperature=0.8, top_p=0.95) + +# set different `gpu_memory_utilization` and `swap_space` for different ranks, +# to test if all ranks agree on the same kv cache configuration. +llm = LLM(model="microsoft/Phi-mini-MoE-instruct", + tensor_parallel_size=int(os.getenv("TP_SIZE", "1")), + pipeline_parallel_size=int(os.getenv("PP_SIZE", "1")), + enable_expert_parallel=int(os.getenv("ENABLE_EP", "0")) == 1, + distributed_executor_backend="external_launcher", + gpu_memory_utilization=random.uniform(0.7, 0.9), + swap_space=random.randint(1, 4), + seed=0) + +outputs = llm.generate(prompts, sampling_params) + +group = get_world_group() if dp_size == 1 else get_tp_group() +cpu_group = group.cpu_group +group_rank = dist.get_rank(group=cpu_group) + + +def test_consistent_across_ranks(obj): + if group_rank == 0: + dist.broadcast_object_list([obj], src=group.ranks[0], group=cpu_group) + else: + container = [None] + dist.broadcast_object_list(container, + src=group.ranks[0], + group=cpu_group) + assert container[0] == obj + + +test_consistent_across_ranks( + llm.llm_engine.vllm_config.cache_config.num_cpu_blocks) +test_consistent_across_ranks( + llm.llm_engine.vllm_config.cache_config.num_gpu_blocks) + +# make sure we can access the model parameters from the calling process +# of the `LLM` instance. +params = list(llm.llm_engine.model_executor.driver_worker.worker.model_runner. + model.parameters()) +test_consistent_across_ranks(len(params)) + +# all ranks should have the same outputs +for output in outputs: + prompt = output.prompt + generated_text = output.outputs[0].text + test_consistent_across_ranks(prompt) + test_consistent_across_ranks(generated_text) + print(f"Rank {group_rank}, Prompt: {prompt!r}, " + f"Generated text: {generated_text!r}") diff --git a/vllm/config/parallel.py b/vllm/config/parallel.py index 37a41bf6de71..a84d88243016 100644 --- a/vllm/config/parallel.py +++ b/vllm/config/parallel.py @@ -2,6 +2,7 @@ # SPDX-FileCopyrightText: Copyright contributors to the vLLM project import hashlib +import os from dataclasses import field from typing import TYPE_CHECKING, Any, Literal, Optional, Union @@ -351,6 +352,10 @@ def __post_init__(self) -> None: self.world_size = self.pipeline_parallel_size * \ self.tensor_parallel_size + if self.distributed_executor_backend == "external_launcher": + logger.info("Using external launcher for distributed inference.") + self.world_size *= self.data_parallel_size + if self.data_parallel_size_local > self.data_parallel_size: raise ValueError( f"data_parallel_size_local ({self.data_parallel_size_local}) " @@ -358,6 +363,13 @@ def __post_init__(self) -> None: if self.data_parallel_size > 1 or self.data_parallel_size_local == 0: # Data parallel was specified in the engine args. + if self.distributed_executor_backend == "external_launcher": + # For external launcher, + # we need to set the data parallel rank automatically + self.data_parallel_rank = int(os.environ["RANK"]) \ + // (self.world_size // self.data_parallel_size) + logger.info("Set data_parallel_rank to %d automatically.", + self.data_parallel_rank) if not self._data_parallel_master_port_list: self._data_parallel_master_port_list = get_open_ports_list(5) self.data_parallel_master_port = \ @@ -380,7 +392,6 @@ def __post_init__(self) -> None: "be set when data_parallel_size > 1") if self.distributed_executor_backend == "external_launcher": - import os os.environ["VLLM_ENABLE_V1_MULTIPROCESSING"] = "0" logger.info("Disabling V1 multiprocessing for external launcher.") diff --git a/vllm/distributed/parallel_state.py b/vllm/distributed/parallel_state.py index 12571afaa4c1..895971893a66 100644 --- a/vllm/distributed/parallel_state.py +++ b/vllm/distributed/parallel_state.py @@ -1032,7 +1032,9 @@ def init_distributed_environment(world_size: int = -1, distributed_init_method, backend) from vllm.config import get_current_vllm_config config = get_current_vllm_config() - if config is not None and config.parallel_config.data_parallel_size > 1: + if config is not None and config.parallel_config.data_parallel_size > 1 \ + and config.parallel_config.distributed_executor_backend \ + != "external_launcher": parallel_config = config.parallel_config # adjust to take into account data parallelism # offset the rank by the data parallel rank diff --git a/vllm/v1/engine/llm_engine.py b/vllm/v1/engine/llm_engine.py index 907656d1b24c..92c861d9e91f 100644 --- a/vllm/v1/engine/llm_engine.py +++ b/vllm/v1/engine/llm_engine.py @@ -11,6 +11,7 @@ import vllm.envs as envs from vllm.config import ParallelConfig, VllmConfig from vllm.distributed import stateless_destroy_torch_distributed_process_group +from vllm.distributed.parallel_state import get_dp_group from vllm.engine.arg_utils import EngineArgs from vllm.inputs import PromptType from vllm.logger import init_logger @@ -77,10 +78,15 @@ def __init__( if self.log_stats: self.stat_logger = PrometheusStatLogger(vllm_config) + executor_backend = ( + self.vllm_config.parallel_config.distributed_executor_backend) + parallel_config = vllm_config.parallel_config + self.external_launcher_dp = (parallel_config.data_parallel_size > 1 and + executor_backend == "external_launcher") # important: init dp group before init the engine_core # In the decoupled engine case this is handled in EngineCoreProc. - parallel_config = vllm_config.parallel_config - if not multiprocess_mode and parallel_config.data_parallel_size > 1: + if not multiprocess_mode and parallel_config.data_parallel_size > 1 \ + and not self.external_launcher_dp: self.dp_group = parallel_config.stateless_init_dp_group() else: self.dp_group = None @@ -120,6 +126,11 @@ def __init__( # for v0 compatibility self.model_executor = self.engine_core.engine_core.model_executor # type: ignore + if self.external_launcher_dp: + # If we use DP in external launcher mode, we reuse the + # existing DP group used for data communication. + self.dp_group = get_dp_group().cpu_group + # Don't keep the dummy data in memory self.reset_mm_cache() @@ -331,5 +342,6 @@ def apply_model(self, func: Callable[[nn.Module], _R]) -> list[_R]: return self.collective_rpc("apply_model", args=(func, )) def __del__(self): - if dp_group := getattr(self, "dp_group", None): + if dp_group := getattr(self, "dp_group", + None) and not self.external_launcher_dp: stateless_destroy_torch_distributed_process_group(dp_group) From 8d0ee5a56452bfb671ae00e96d10841a8054b823 Mon Sep 17 00:00:00 2001 From: Simon Mo Date: Mon, 22 Sep 2025 12:16:59 -0700 Subject: [PATCH 238/518] [misc] Remove RFC review hours reference (#25416) --- .github/ISSUE_TEMPLATE/750-RFC.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.github/ISSUE_TEMPLATE/750-RFC.yml b/.github/ISSUE_TEMPLATE/750-RFC.yml index 7ee57c42895c..c0e009855964 100644 --- a/.github/ISSUE_TEMPLATE/750-RFC.yml +++ b/.github/ISSUE_TEMPLATE/750-RFC.yml @@ -43,10 +43,6 @@ body: Any other things you would like to mention. validations: required: false -- type: markdown - attributes: - value: > - Thanks for contributing 🎉! The vLLM core team hosts a biweekly RFC review session at 9:30AM Pacific Time, while most RFCs can be discussed online, you can optionally sign up for a slot to discuss your RFC online [here](https://docs.google.com/document/d/1CiLVBZeIVfR7_PNAKVSusxpceywkoOOB78qoWqHvSZc/edit). - type: checkboxes id: askllm attributes: From d5e0fca264f7d277ed372f7c075827ed9a0c5e7e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luka=20Govedi=C4=8D?= Date: Mon, 22 Sep 2025 15:30:05 -0400 Subject: [PATCH 239/518] [torch.compile] Cleanup compilation tests and custom passes, add debug utils, fix DCE bug (#23091), fix test (#24376), and prep for custom op matching (#24604) (#24542) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Luka Govedič Signed-off-by: luka Signed-off-by: Luka Govedič --- tests/compile/backend.py | 28 +- tests/compile/test_async_tp.py | 2 + tests/compile/test_config.py | 10 +- tests/compile/test_functionalization.py | 10 +- tests/compile/test_fusion.py | 17 +- tests/compile/test_fusion_all_reduce.py | 6 +- tests/compile/test_fusion_attn.py | 20 +- tests/compile/test_sequence_parallelism.py | 21 +- tests/compile/test_silu_mul_quant_fusion.py | 13 +- vllm/compilation/activation_quant_fusion.py | 18 +- vllm/compilation/collective_fusion.py | 29 +- vllm/compilation/fix_functionalization.py | 9 +- vllm/compilation/fusion.py | 279 +++----------------- vllm/compilation/fusion_attn.py | 22 +- vllm/compilation/multi_output_match.py | 109 -------- vllm/compilation/noop_elimination.py | 5 +- vllm/compilation/pass_manager.py | 49 +++- vllm/compilation/post_cleanup.py | 20 ++ vllm/compilation/sequence_parallelism.py | 18 +- vllm/compilation/vllm_inductor_pass.py | 110 +++++++- vllm/config/__init__.py | 7 +- vllm/config/compilation.py | 48 +++- vllm/envs.py | 6 + vllm/utils/__init__.py | 15 +- 24 files changed, 407 insertions(+), 464 deletions(-) delete mode 100644 vllm/compilation/multi_output_match.py create mode 100644 vllm/compilation/post_cleanup.py diff --git a/tests/compile/backend.py b/tests/compile/backend.py index 2c4287950dcf..f25c367433f4 100644 --- a/tests/compile/backend.py +++ b/tests/compile/backend.py @@ -1,6 +1,7 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project +import weakref from collections.abc import Sequence from copy import deepcopy from typing import Callable, Union @@ -10,7 +11,26 @@ from vllm.compilation.fx_utils import find_op_nodes from vllm.compilation.inductor_pass import InductorPass -from vllm.config import get_current_vllm_config +from vllm.compilation.pass_manager import with_pattern_match_debug +from vllm.compilation.vllm_inductor_pass import VllmInductorPass +from vllm.config import VllmConfig, get_current_vllm_config + + +class LazyInitPass(InductorPass): + """ + If there's a pass that we want to initialize lazily in a test, + we can wrap it in LazyInitPass, which will initialize the pass when invoked + and then immediately invoke it. + """ + + def __init__(self, pass_cls: type[VllmInductorPass], + vllm_config: VllmConfig): + self.pass_cls = pass_cls + self.vllm_config = weakref.proxy(vllm_config) # avoid cycle + + def __call__(self, graph: fx.Graph) -> None: + self.pass_ = self.pass_cls(self.vllm_config) + self.pass_(graph) class TestBackend: @@ -40,10 +60,16 @@ def __call__(self, graph: fx.GraphModule, example_inputs): example_inputs, config_patches=self.inductor_config) + @with_pattern_match_debug def post_pass(self, graph: fx.Graph): self.graph_pre_pass = deepcopy(graph) + + VllmInductorPass.dump_prefix = 0 for pass_ in self.custom_passes: pass_(graph) + VllmInductorPass.dump_prefix += 1 + + VllmInductorPass.dump_prefix = None self.graph_post_pass = deepcopy(graph) # assign by reference, will reflect the final state of the graph diff --git a/tests/compile/test_async_tp.py b/tests/compile/test_async_tp.py index 9a51e6b3514f..1dc21365d557 100644 --- a/tests/compile/test_async_tp.py +++ b/tests/compile/test_async_tp.py @@ -294,6 +294,8 @@ def async_tp_pass_on_test_model(local_rank: int, world_size: int, compiled_model = torch.compile(model, backend=backend) compiled_model(hidden_states) + assert async_tp_pass.matched_count == 1 + # In pre-nodes, all gather or reduce scatter should exist, # fused_matmul_reduce_scatter or fused_all_gather_matmul should not backend.check_before_ops(model.ops_in_model_before(), fully_replaced=False) diff --git a/tests/compile/test_config.py b/tests/compile/test_config.py index 90e8e0ff9585..7afd6251bbbd 100644 --- a/tests/compile/test_config.py +++ b/tests/compile/test_config.py @@ -4,7 +4,7 @@ import vllm from vllm.compilation.counter import compilation_counter -from vllm.config import VllmConfig +from vllm.config import CompilationConfig, VllmConfig from vllm.utils import _is_torch_equal_or_newer @@ -26,6 +26,14 @@ def test_use_cudagraphs_dynamic(monkeypatch): assert not vllm_config.compilation_config.use_cudagraph +def test_custom_op(): + # proper syntax + _ = CompilationConfig(custom_ops=["+quant_fp8", "-silu_and_mul"]) + + with pytest.raises(ValueError, match="Invalid syntax '"): + _ = CompilationConfig(custom_ops=["quant_fp8"]) + + # forked needed to workaround https://github.com/vllm-project/vllm/issues/21073 @pytest.mark.forked # NB: We don't test VLLM_DISABLE_COMPILE_CACHE=0 because that depends diff --git a/tests/compile/test_functionalization.py b/tests/compile/test_functionalization.py index 0c7e6fbccf20..2ee9aa7476be 100644 --- a/tests/compile/test_functionalization.py +++ b/tests/compile/test_functionalization.py @@ -8,9 +8,10 @@ from vllm import LLM, SamplingParams from vllm.compilation.activation_quant_fusion import ActivationQuantFusionPass from vllm.compilation.fix_functionalization import FixFunctionalizationPass -from vllm.compilation.fusion import FUSED_OPS, FusionPass +from vllm.compilation.fusion import FUSED_OPS, RMSNormQuantFusionPass from vllm.compilation.fx_utils import find_auto_fn, find_auto_fn_maybe, is_func from vllm.compilation.noop_elimination import NoOpEliminationPass +from vllm.compilation.post_cleanup import PostCleanupPass from vllm.config import CompilationConfig, PassConfig, VllmConfig from vllm.model_executor.layers.quantization.utils.quant_utils import ( QuantKey, kFp8DynamicTokenSym, kFp8StaticTensorSym) @@ -58,11 +59,12 @@ def test_fix_functionalization(model: str, quant_key: QuantKey, vllm_config.compilation_config = CompilationConfig( pass_config=PassConfig(enable_fusion=do_fusion, enable_noop=True)) noop_pass = NoOpEliminationPass(vllm_config) - fusion_pass = FusionPass.instance(vllm_config) + fusion_pass = RMSNormQuantFusionPass(vllm_config) + cleanup_pass = PostCleanupPass(vllm_config) act_quant_fusion_pass = ActivationQuantFusionPass(vllm_config) - passes = [noop_pass, fusion_pass, act_quant_fusion_pass - ] if do_fusion else [noop_pass] + passes = [noop_pass, fusion_pass, act_quant_fusion_pass, cleanup_pass + ] if do_fusion else [noop_pass, cleanup_pass] func_pass = FixFunctionalizationPass(vllm_config) backend_func = TestBackend(*passes, func_pass) backend_no_func = TestBackend(*passes) diff --git a/tests/compile/test_fusion.py b/tests/compile/test_fusion.py index eedb9bdcd529..3d8897d3f18b 100644 --- a/tests/compile/test_fusion.py +++ b/tests/compile/test_fusion.py @@ -4,11 +4,11 @@ import pytest import torch -import vllm.envs as envs import vllm.plugins from vllm.compilation.fusion import (FUSED_OPS, QUANT_OPS, FusedRMSQuantKey, - FusionPass) + RMSNormQuantFusionPass) from vllm.compilation.noop_elimination import NoOpEliminationPass +from vllm.compilation.post_cleanup import PostCleanupPass from vllm.config import (CompilationConfig, CompilationLevel, PassConfig, VllmConfig) from vllm.model_executor.layers.layernorm import RMSNorm @@ -79,15 +79,15 @@ def ops_in_model_after(self): @pytest.mark.parametrize("dtype", [torch.float16, torch.bfloat16]) -@pytest.mark.parametrize("hidden_size", [64, 3392, 4096]) -@pytest.mark.parametrize("num_tokens", [7, 256, 533, 2048, 2049]) +@pytest.mark.parametrize("hidden_size", [64]) +@pytest.mark.parametrize("num_tokens", [257]) @pytest.mark.parametrize("eps", [1e-5, 1e-6]) @pytest.mark.parametrize("static", [True, False]) # cuda_force_torch used to test torch code path on platforms that # cutlass_fp8_supported() == True. @pytest.mark.parametrize("cuda_force_torch", [True, False] if cutlass_fp8_supported() else [True]) -@pytest.mark.skipif(envs.VLLM_TARGET_DEVICE not in ["cuda", "rocm"], +@pytest.mark.skipif(not current_platform.is_cuda_alike(), reason="Only test on CUDA and ROCm") def test_fusion_rmsnorm_quant(dtype, hidden_size, num_tokens, eps, static, cuda_force_torch): @@ -104,9 +104,10 @@ def test_fusion_rmsnorm_quant(dtype, hidden_size, num_tokens, eps, static, with vllm.config.set_current_vllm_config(vllm_config): # Reshape pass is needed for the fusion pass to work noop_pass = NoOpEliminationPass(vllm_config) - fusion_pass = FusionPass.instance(vllm_config) + fusion_pass = RMSNormQuantFusionPass(vllm_config) + cleanup_pass = PostCleanupPass(vllm_config) - backend = TestBackend(noop_pass, fusion_pass) + backend = TestBackend(noop_pass, fusion_pass, cleanup_pass) model = TestModel(hidden_size, eps, static, cuda_force_torch) # First dimension dynamic @@ -128,6 +129,8 @@ def test_fusion_rmsnorm_quant(dtype, hidden_size, num_tokens, eps, static, torch.testing.assert_close(result, result2, atol=ATOL, rtol=RTOL) + assert fusion_pass.matched_count == 2 + # In pre-nodes, fp8 quant should be there and fused kernels should not backend.check_before_ops(model.ops_in_model_before()) diff --git a/tests/compile/test_fusion_all_reduce.py b/tests/compile/test_fusion_all_reduce.py index dd31e0db1f59..60f32c863208 100644 --- a/tests/compile/test_fusion_all_reduce.py +++ b/tests/compile/test_fusion_all_reduce.py @@ -9,6 +9,7 @@ from vllm.compilation.collective_fusion import AllReduceFusionPass from vllm.compilation.fix_functionalization import FixFunctionalizationPass from vllm.compilation.noop_elimination import NoOpEliminationPass +from vllm.compilation.post_cleanup import PostCleanupPass from vllm.config import (CompilationConfig, CompilationLevel, DeviceConfig, ModelConfig, PassConfig, VllmConfig) from vllm.distributed import tensor_model_parallel_all_reduce @@ -215,8 +216,10 @@ def all_reduce_fusion_pass_on_test_model(local_rank: int, world_size: int, all_reduce_fusion_pass = AllReduceFusionPass(vllm_config) noop_pass = NoOpEliminationPass(vllm_config) func_pass = FixFunctionalizationPass(vllm_config) + cleanup_pass = PostCleanupPass(vllm_config) - backend = TestBackend(all_reduce_fusion_pass, noop_pass, func_pass) + backend = TestBackend(all_reduce_fusion_pass, noop_pass, func_pass, + cleanup_pass) token_num = batch_size * seq_len model = test_model_cls(hidden_size, token_num) @@ -227,6 +230,7 @@ def all_reduce_fusion_pass_on_test_model(local_rank: int, world_size: int, compiled_model = torch.compile(model, backend=backend) compiled_model(hidden_states, residual) + assert all_reduce_fusion_pass.matched_count == 1 backend.check_before_ops(model.ops_in_model_before(), fully_replaced=False) backend.check_after_ops(model.ops_in_model_after()) del all_reduce_fusion_pass diff --git a/tests/compile/test_fusion_attn.py b/tests/compile/test_fusion_attn.py index c3f1c7481d1b..c4cac9553192 100644 --- a/tests/compile/test_fusion_attn.py +++ b/tests/compile/test_fusion_attn.py @@ -6,18 +6,19 @@ import pytest import torch._dynamo -from tests.compile.backend import TestBackend +from tests.compile.backend import LazyInitPass, TestBackend from tests.models.utils import check_outputs_equal from tests.v1.attention.utils import (BatchSpec, _Backend, create_common_attn_metadata) from vllm import LLM, SamplingParams from vllm._custom_ops import cutlass_scaled_fp4_mm, scaled_fp4_quant -from vllm.attention import Attention +from vllm.attention import Attention, AttentionMetadata from vllm.attention.selector import global_force_attn_backend_context_manager from vllm.compilation.fusion import QUANT_OPS from vllm.compilation.fusion_attn import ATTN_OP, AttnFusionPass from vllm.compilation.fx_utils import find_op_nodes from vllm.compilation.noop_elimination import NoOpEliminationPass +from vllm.compilation.post_cleanup import PostCleanupPass from vllm.config import (CacheConfig, CompilationConfig, CompilationLevel, ModelConfig, PassConfig, SchedulerConfig, VllmConfig, set_current_vllm_config) @@ -104,7 +105,7 @@ def test_attention_fusion_v0(example_prompts, monkeypatch, model: str, # AttnFusionPass needs attention layers to be registered in config upon init # so we initialize it during compilation. - attn_pass = lambda *args, **kw: AttnFusionPass(vllm_config)(*args, **kw) + attn_pass = LazyInitPass(AttnFusionPass, vllm_config) backend = TestBackend(NoOpEliminationPass(vllm_config), attn_pass) llm2 = LLM(model, enforce_eager=True, @@ -197,7 +198,8 @@ def __init__(self, num_qo_heads: int, num_kv_heads: int, head_size: int, device=self.device, ) - def build_attn_metadata(self, batch_size: int, use_hnd: bool): + def build_attn_metadata(self, batch_size: int, use_hnd: bool) \ + -> AttentionMetadata: """Initialize attention metadata.""" # Create common attn metadata @@ -447,9 +449,10 @@ def test_attention_quant_pattern(num_qo_heads: int, num_kv_heads: int, # Create test backend with fusion passes enabled noop_pass = NoOpEliminationPass(vllm_config) - attn_pass = lambda *args, **kw: AttnFusionPass(vllm_config)(*args, **kw - ) - test_backend = TestBackend(noop_pass, attn_pass) + attn_pass = LazyInitPass(AttnFusionPass, vllm_config) + cleanup_pass = PostCleanupPass(vllm_config) + + test_backend = TestBackend(noop_pass, attn_pass, cleanup_pass) # Compile model with fusion enabled model_compiled = torch.compile(model_fused, @@ -485,6 +488,9 @@ def test_attention_quant_pattern(num_qo_heads: int, num_kv_heads: int, test_backend.check_before_ops([QUANT_OPS[quant_key]], fully_replaced=True) + # access the underlying `AttnFusionPass` on the `LazyInitPass` + assert attn_pass.pass_.matched_count == sum(attn_fusion_supported) + # Check attention ops in the graph before and after fusion attn_nodes_pre = list(find_op_nodes(ATTN_OP, test_backend.graph_pre_pass)) attn_nodes_post = list(find_op_nodes(ATTN_OP, diff --git a/tests/compile/test_sequence_parallelism.py b/tests/compile/test_sequence_parallelism.py index fb9f9dde2279..b2734e915bbb 100644 --- a/tests/compile/test_sequence_parallelism.py +++ b/tests/compile/test_sequence_parallelism.py @@ -6,10 +6,12 @@ import vllm.envs as envs from vllm.compilation.fix_functionalization import FixFunctionalizationPass -from vllm.compilation.fusion import FusionPass +from vllm.compilation.fusion import RMSNormQuantFusionPass from vllm.compilation.fx_utils import find_auto_fn, find_auto_fn_maybe, is_func from vllm.compilation.noop_elimination import NoOpEliminationPass +from vllm.compilation.post_cleanup import PostCleanupPass from vllm.compilation.sequence_parallelism import SequenceParallelismPass +from vllm.compilation.vllm_inductor_pass import VllmInductorPass from vllm.config import (CompilationConfig, DeviceConfig, ModelConfig, PassConfig, VllmConfig) from vllm.distributed import tensor_model_parallel_all_reduce @@ -104,7 +106,7 @@ def __init__(self, # Initialize weights torch.nn.init.normal_(self.gate_proj, std=0.02) - self.fp8_linear = Fp8LinearOp(use_per_token_if_dynamic=False) + self.fp8_linear = Fp8LinearOp(act_quant_static=True) self.scale = torch.rand(1, dtype=torch.float32) # Create a weight that is compatible with torch._scaled_mm, @@ -137,8 +139,7 @@ def forward(self, hidden_states, residual): # layer normalization norm_output, residual_output = self.norm(all_reduce, residual) - # for static input quantization - # self.fp8_linear is initialized with use_per_token_if_dynamic=False + # scaled_mm with static input quantization fp8_linear_result = self.fp8_linear.apply(norm_output, self.w, self.wscale, @@ -253,16 +254,20 @@ def sequence_parallelism_pass_on_test_model( dtype=dtype, seed=42) - sequence_parallelism_pass = SequenceParallelismPass(vllm_config) noop_pass = NoOpEliminationPass(vllm_config) + sequence_parallelism_pass = SequenceParallelismPass(vllm_config) func_pass = FixFunctionalizationPass(vllm_config) + cleanup_pass = PostCleanupPass(vllm_config) - passes_for_backend = [noop_pass, sequence_parallelism_pass] + passes_for_backend: list[VllmInductorPass] = \ + [noop_pass, sequence_parallelism_pass] if enable_fusion: - fusion_pass = FusionPass.instance(vllm_config) + fusion_pass = RMSNormQuantFusionPass(vllm_config) passes_for_backend.append(fusion_pass) + passes_for_backend.append(cleanup_pass) + backend_no_func = TestBackend(*passes_for_backend) backend_func = TestBackend(*passes_for_backend, func_pass) @@ -279,6 +284,8 @@ def sequence_parallelism_pass_on_test_model( compiled_model_func = torch.compile(model, backend=backend_func) compiled_model_func(hidden_states, residual) + assert sequence_parallelism_pass.matched_count == 1 + # In pre-nodes, all reduce should be there, # reduce scatter and all gather should not backend_no_func.check_before_ops(model.ops_in_model_before()) diff --git a/tests/compile/test_silu_mul_quant_fusion.py b/tests/compile/test_silu_mul_quant_fusion.py index ae190d25cad6..c445f4dde2cc 100644 --- a/tests/compile/test_silu_mul_quant_fusion.py +++ b/tests/compile/test_silu_mul_quant_fusion.py @@ -15,6 +15,7 @@ # yapf: enable from vllm.compilation.fusion import QUANT_OPS from vllm.compilation.noop_elimination import NoOpEliminationPass +from vllm.compilation.post_cleanup import PostCleanupPass from vllm.config import CompilationConfig, PassConfig, VllmConfig from vllm.model_executor.layers.activation import SiluAndMul from vllm.model_executor.layers.quantization.utils.quant_utils import ( @@ -69,6 +70,10 @@ class TestSiluMulNvfp4QuantModel(torch.nn.Module): def __init__(self, hidden_size: int, x: torch.Tensor, **kwargs): super().__init__() + from vllm.compilation.activation_quant_fusion import ( + silu_and_mul_nvfp4_quant_supported) + assert silu_and_mul_nvfp4_quant_supported + self.silu_and_mul = SiluAndMul() # create nvfp4 weight @@ -127,7 +132,11 @@ def test_fusion_silu_and_mul_quant(num_tokens, hidden_size, dtype, model_class, pass_config=PassConfig(enable_fusion=True, enable_noop=True)) fusion_pass = ActivationQuantFusionPass(config) - backend = TestBackend(NoOpEliminationPass(config), fusion_pass) + passes = [ + NoOpEliminationPass(config), fusion_pass, + PostCleanupPass(config) + ] + backend = TestBackend(*passes) model = model_class(hidden_size=hidden_size, cuda_force_torch=cuda_force_torch, x=x) @@ -151,6 +160,8 @@ def test_fusion_silu_and_mul_quant(num_tokens, hidden_size, dtype, model_class, atol=atol, rtol=rtol) + assert fusion_pass.matched_count == 1 + # In pre-nodes, quant op should be present and fused kernels should not backend.check_before_ops(model.ops_in_model_before()) diff --git a/vllm/compilation/activation_quant_fusion.py b/vllm/compilation/activation_quant_fusion.py index f2fbb1200eec..74462fb37ca9 100644 --- a/vllm/compilation/activation_quant_fusion.py +++ b/vllm/compilation/activation_quant_fusion.py @@ -17,7 +17,7 @@ from .fusion import QUANT_OPS, empty_bf16, empty_fp32, empty_i32 from .inductor_pass import enable_fake_mode -from .vllm_inductor_pass import VllmInductorPass +from .vllm_inductor_pass import VllmInductorPass, VllmPatternMatcherPass logger = init_logger(__name__) @@ -152,7 +152,7 @@ def replacement(result: torch.Tensor, output_scale: torch.Tensor, register_replacement(pattern, replacement, inputs, fwd_only, pm_pass) -class ActivationQuantFusionPass(VllmInductorPass): +class ActivationQuantFusionPass(VllmPatternMatcherPass): """ This pass fuses a pre-defined set of custom ops into fused ops. It uses the torch pattern matcher to find the patterns and replace them. @@ -176,16 +176,12 @@ def __init__(self, config: VllmConfig): pattern_silu_mul_nvfp4 = SiluMulNvfp4QuantPattern() pattern_silu_mul_nvfp4.register(self.patterns) - def __call__(self, graph: torch.fx.Graph): - self.begin() - self.dump_graph(graph, "before_act_quant_fusion") - - count = self.patterns.apply(graph) - logger.debug("Replaced %s patterns in ActivationQuantFusionPass", - count) + self.dump_patterns(config, self.patterns) - self.dump_graph(graph, "after_act_quant_fusion") - self.end_and_log() + @VllmInductorPass.time_and_log + def __call__(self, graph: torch.fx.Graph): + self.matched_count = self.patterns.apply(graph) + logger.debug("Replaced %s patterns", self.matched_count) def uuid(self): return VllmInductorPass.hash_source(self, ActivationQuantPattern, diff --git a/vllm/compilation/collective_fusion.py b/vllm/compilation/collective_fusion.py index 0658b59a2e21..331cd8a87392 100644 --- a/vllm/compilation/collective_fusion.py +++ b/vllm/compilation/collective_fusion.py @@ -20,7 +20,7 @@ from vllm.utils import direct_register_custom_op from .inductor_pass import enable_fake_mode -from .vllm_inductor_pass import VllmInductorPass +from .vllm_inductor_pass import VllmInductorPass, VllmPatternMatcherPass FP8_DTYPE = current_platform.fp8_dtype() @@ -348,7 +348,7 @@ def replacement(x: torch.Tensor, weight: torch.Tensor, pm.fwd_only, pm_pass) -class AsyncTPPass(VllmInductorPass): +class AsyncTPPass(VllmPatternMatcherPass): @enable_fake_mode def __init__(self, config: VllmConfig): @@ -378,18 +378,17 @@ def __init__(self, config: VllmConfig): AllGatherCutlassScaledMMPattern( self.model_dtype, self.device).register(self.patterns) + self.dump_patterns(config, self.patterns) + def is_applicable_for_shape(self, shape: Optional[int]) -> bool: # only do replace for specific shapes tp_size = get_tensor_model_parallel_world_size() return shape is not None and shape % tp_size == 0 + @VllmInductorPass.time_and_log def __call__(self, graph: fx.Graph): - self.begin() - self.dump_graph(graph, "before_async_tp_pass") - count = self.patterns.apply(graph) - logger.debug("Replaced %s patterns with async TP pass.", count) - self.dump_graph(graph, "after_async_tp_pass") - self.end_and_log() + self.matched_count = self.patterns.apply(graph) + logger.debug("Replaced %s patterns", self.matched_count) if flashinfer_comm is not None: @@ -1068,7 +1067,7 @@ def replacement(quant_result: torch.Tensor, residual: torch.Tensor, pm.fwd_only, pm_pass) -class AllReduceFusionPass(VllmInductorPass): +class AllReduceFusionPass(VllmPatternMatcherPass): def __init__(self, config: VllmConfig): super().__init__(config) @@ -1124,6 +1123,7 @@ def __init__(self, config: VllmConfig): fuse_rms_quant=config.compilation_config.pass_config.enable_fusion) self.register_patterns() + self.dump_patterns(config, self.patterns) @enable_fake_mode def register_patterns(self): @@ -1172,15 +1172,14 @@ def register_patterns(self): self.disabled = False + @VllmInductorPass.time_and_log def __call__(self, graph: fx.Graph): if self.disabled: + logger.debug("AllReduceFusionPass disabled") return - self.begin() - self.dump_graph(graph, "before_all_reduce_fusion_pass") - count = self.patterns.apply(graph) - logger.debug("Replaced %s patterns", count) - self.dump_graph(graph, "after_all_reduce_fusion_pass") - self.end_and_log() + + self.matched_count = self.patterns.apply(graph) + logger.debug("Replaced %s patterns", self.matched_count) def __del__(self): if getattr(self, "disabled", True): diff --git a/vllm/compilation/fix_functionalization.py b/vllm/compilation/fix_functionalization.py index 6bc721eec3d4..54403c1f7ca3 100644 --- a/vllm/compilation/fix_functionalization.py +++ b/vllm/compilation/fix_functionalization.py @@ -26,6 +26,7 @@ class FixFunctionalizationPass(VllmInductorPass): To add new nodes to defunctionalize, add to the if-elif chain in __call__. """ + @VllmInductorPass.time_and_log def __call__(self, graph: torch.fx.Graph): # XPU does not support auto-functionalization yet. # Will enable this when switch to vllm-xpu-kernels. @@ -34,9 +35,6 @@ def __call__(self, graph: torch.fx.Graph): "pass currently.") return - self.begin() - self.dump_graph(graph, "before_fix_functionalization") - self.nodes_to_remove: list[torch.fx.Node] = [] count = 0 for node in graph.nodes: @@ -111,7 +109,7 @@ def __call__(self, graph: torch.fx.Graph): count += 1 - self.dump_graph(graph, "before_fix_functionalization_cleanup") + self.dump_graph(graph, "before_cleanup") # Remove the nodes all at once count_removed = len(self.nodes_to_remove) @@ -120,8 +118,7 @@ def __call__(self, graph: torch.fx.Graph): logger.debug("De-functionalized %s nodes, removed %s nodes", count, count_removed) - self.dump_graph(graph, "after_fix_functionalization") - self.end_and_log() + self.nodes_to_remove.clear() def _remove(self, node_or_nodes: Union[torch.fx.Node, Iterable[torch.fx.Node]]): diff --git a/vllm/compilation/fusion.py b/vllm/compilation/fusion.py index afa739c966a5..3034b6eaeaca 100644 --- a/vllm/compilation/fusion.py +++ b/vllm/compilation/fusion.py @@ -1,6 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -from typing import Callable, NamedTuple, Optional +from typing import Any, NamedTuple import torch import torch._inductor.pattern_matcher as pm @@ -16,10 +16,8 @@ kFp8StaticTensorSym, kNvfp4Quant, kStaticTensorScale) from vllm.platforms import current_platform -from .fx_utils import find_getitem_maybe from .inductor_pass import enable_fake_mode -from .multi_output_match import MultiOutputMatch -from .vllm_inductor_pass import VllmInductorPass +from .vllm_inductor_pass import VllmInductorPass, VllmPatternMatcherPass logger = init_logger(__name__) FP8_DTYPE = current_platform.fp8_dtype() @@ -50,8 +48,7 @@ def empty_i32(*args, **kwargs): torch.ops._C.dynamic_per_token_scaled_fp8_quant.default, # noqa: E501 } if current_platform.is_cuda() and hasattr(torch.ops._C, "scaled_fp4_quant"): - QUANT_OPS[ - kNvfp4Quant] = torch.ops._C.scaled_fp4_quant.default # noqa: E501 + QUANT_OPS[kNvfp4Quant] = torch.ops._C.scaled_fp4_quant.default class FusedRMSQuantKey(NamedTuple): @@ -80,68 +77,6 @@ def __str__(self): } -class QuantMultiOutputMatch(MultiOutputMatch): - - def __init__(self, match: pm.Match, quant_op, fused_op): - super().__init__(match) - assert isinstance(quant_op, OpOverload) - assert isinstance(fused_op, OpOverload) - self.QUANT_OP = quant_op # in-place quant op - self.FUSED_OP = fused_op # in-place fused quant op - - def insert_fused_node(self, fused_return_mapping: dict[int, tuple[fx.Node, - int]], - **kwargs): - """ - This utility function inserts an auto-functionalized node for FUSED_OP. - It also correctly sets its meta value and rebinds the users of the - unfused nodes to use the fused node instead. - - :param fused_return_mapping: A dictionary, mapping from getitem indices - of the fused node result to a tuple of the old node and a getitem index. - :param kwargs: kwargs that get directly forwarded to the auto_fn node - - Example: - If we want to replace this graph: - _, x1, x2 = auto_fn(op1) - _, y1, y2 = auto_fn(op2) - - with - _, x1, y2, x2 = auto_fn(FUSED_OP) - - we would call: - insert_fused_node({1: (op1_node, 1), 2: (op2_node, 2), 3: (op1_node, 2)} - - Note that the 0th element is None for auto-functionalized in-place ops. - Hence, others appear 1-indexed. - """ - fused_node = self.insert_auto_fn(self.FUSED_OP, kwargs) - indices = fused_return_mapping.keys() - getitem_nodes = self.insert_getitems(fused_node, indices) - - # Prepare the meta value, use a list so it's mutable - meta_val = [None] * (max(indices) + 1) - - # Iterate through elements of the tuple produced by fused_node - for idx, getitem_node in zip(indices, getitem_nodes): - old_node, old_idx = fused_return_mapping[idx] - - # If the old value was never used, the old_getitem might not exist - old_getitem = find_getitem_maybe(old_node, old_idx) - if old_getitem is not None: - # Rebind the users of match getitem nodes to use the new nodes. - # The old nodes will be removed by DCE at the end of the pass. - old_getitem.replace_all_uses_with(getitem_node) - getitem_node.meta["val"] = old_getitem.meta["val"] - - # Extract the appropriate meta value - # It is present even if the getitem node does not exist - meta_val[idx] = old_node.meta["val"][old_idx] - - # Fix the meta value on the new fused node - fused_node.meta["val"] = tuple(meta_val) - - class RMSNormQuantPattern: def __init__(self, epsilon: float, key: FusedRMSQuantKey): @@ -224,8 +159,7 @@ def __init__(self, symmetric=symmetric)) super().__init__(epsilon, key) - def register(self, pm_pass: PatternMatcherPass, - record_match: Callable[[MultiOutputMatch], bool]): + def register(self, pm_pass: PatternMatcherPass): def pattern(result: torch.Tensor, input: torch.Tensor, residual: torch.Tensor, weight: torch.Tensor, @@ -271,36 +205,7 @@ def replacement(result: torch.Tensor, input: torch.Tensor, inputs, pm.fwd_only, pm_pass, - extra_check=lambda m: record_match( - self.Match(m, self.QUANT_OP, self.FUSED_OP))) - - class Match(QuantMultiOutputMatch): - - def process(self): - # Find the nodes in the match that we need to rebind - rms_node = self.find_auto_fn(RMS_ADD_OP) - quant_node = self.find_auto_fn(self.QUANT_OP) - - assert len(rms_node.users) == 2 - assert len(quant_node.users) == 1 - - # First, insert a new auto_functionalized node for the fused op, - # as well as getitem nodes to extract the result and residual. - # The auto_fn node returns a tuple of (None, result, residual). - # - # The resulting graph looks like this: - # at = auto_functionalized(torch.ops._C.fused_add_rms_norm_static_fp8_quant.default, ...) # noqa - # result_node_new = at[1] - # residual_node_new = at[2] - with self.inserting_after_match(): - # Missing epsilon, scalars cannot be inputs to the pattern - kwargs = self.match.kwargs.copy() - - # 0 is always None - fused_return_mapping = {1: (quant_node, 1), 2: (rms_node, 2)} - self.insert_fused_node(fused_return_mapping, - **kwargs, - epsilon=rms_node.kwargs["epsilon"]) + ) class RMSNormDynamicQuantPattern(RMSNormQuantPattern): @@ -317,8 +222,7 @@ def __init__(self, symmetric=symmetric)) super().__init__(epsilon, key) - def register(self, pm_pass: PatternMatcherPass, - record_match: Callable[[MultiOutputMatch], bool]): + def register(self, pm_pass: PatternMatcherPass): def pattern(result: torch.Tensor, result_rms: torch.Tensor, input: torch.Tensor, weight: torch.Tensor, @@ -366,39 +270,7 @@ def replacement(result: torch.Tensor, result_rms: torch.Tensor, inputs, pm.fwd_only, pm_pass, - extra_check=lambda m: record_match( - self.Match(m, self.QUANT_OP, self.FUSED_OP))) - - class Match(QuantMultiOutputMatch): - - def process(self): - # Find the nodes in the match that we need to rebind - rms_node = self.find_auto_fn(RMS_OP) - quant_node = self.find_auto_fn(self.QUANT_OP) - - assert len(rms_node.users) == 1 - assert len(quant_node.users) == 2 - - # First, insert a new auto_functionalized node for the fused op, - # as well as getitem nodes to extract the result and scale. - # The auto_fn node returns a tuple of (None, result, scale). - # - # The resulting graph looks like this: - # at = auto_functionalized(torch.ops._C.rms_norm_dynamic_per_token_quant.default, ...) # noqa - # result_node_new = at[1] - # scale_node_new = at[2] - with self.inserting_after_match(): - # Missing epsilon, scalars cannot be inputs to the pattern - kwargs = self.match.kwargs.copy() - del kwargs["result_rms"] # not used in the fused op - - fused_return_mapping = {1: (quant_node, 1), 2: (quant_node, 2)} - self.insert_fused_node( - fused_return_mapping, - epsilon=rms_node.kwargs["epsilon"], - scale_ub=None, # not used but required - residual=None, # not used but required - **kwargs) + ) class FusedAddRMSNormDynamicQuantPattern(RMSNormQuantPattern): @@ -415,8 +287,7 @@ def __init__(self, symmetric=symmetric)) super().__init__(epsilon, key) - def register(self, pm_pass: PatternMatcherPass, - record_match: Callable[[MultiOutputMatch], bool]): + def register(self, pm_pass: PatternMatcherPass): def pattern(result: torch.Tensor, input: torch.Tensor, residual: torch.Tensor, weight: torch.Tensor, @@ -464,137 +335,49 @@ def replacement(result: torch.Tensor, input: torch.Tensor, inputs, pm.fwd_only, pm_pass, - extra_check=lambda m: record_match( - self.Match(m, self.QUANT_OP, self.FUSED_OP))) - - class Match(QuantMultiOutputMatch): - - def process(self): - # Find the nodes in the match that we need to rebind - rms_node = self.find_auto_fn(RMS_ADD_OP) - quant_node = self.find_auto_fn(self.QUANT_OP) - - assert len(rms_node.users) == 2 - assert len(quant_node.users) == 2 - - # First, insert a new auto_functionalized node for the fused op, - # as well as getitem nodes to extract result, scale, and residual. - # The auto_fn node returns a tuple (None, result, scale, residual). - # - # The resulting graph looks like this: - # at = auto_functionalized(torch.ops._C.rms_norm_dynamic_per_token_quant.default, ...) # noqa - # result_node_new = at[1] - # scale_node_new = at[2] - # residual_node_new = at[3] - with self.inserting_after_match(): - # Missing epsilon, scalars cannot be inputs to the pattern - kwargs = self.match.kwargs.copy() - - fused_return_mapping = { - 1: (quant_node, 1), # result - 2: (quant_node, 2), # scale - 3: (rms_node, 2), # residual - } - self.insert_fused_node( - fused_return_mapping, - epsilon=rms_node.kwargs["epsilon"], - scale_ub=None, # not used but required - **kwargs) - - -class FusionPass(VllmInductorPass): + ) + + +class RMSNormQuantFusionPass(VllmPatternMatcherPass): """ - This pass fuses a pre-defined set of custom ops into fused ops. - It uses the torch pattern matcher to find the patterns and replace them. - It also manually processes multi-output matches, as those are broken in - the torch pattern matcher. - - Because patterns can only be registered once, the pass is a singleton. - This will be addressed in a future version of PyTorch: - https://github.com/pytorch/pytorch/pull/139321#issuecomment-2452354980 + This pass fuses rms_norm & quant custom ops into a fused rms_norm_quant op. + It also supports fused_add_rms_norm. """ - _instance: 'Optional[FusionPass]' = None - - @classmethod - def instance(cls, config: VllmConfig): - """ - Get the singleton instance of the FusionPass. - If the instance exists, the config is updated but - initialization is not repeated. - """ - if cls._instance is None: - cls._instance = FusionPass(config) - else: - cls._instance.pass_config = config.compilation_config.pass_config - return cls._instance - @enable_fake_mode def __init__(self, config: VllmConfig): - assert self.__class__._instance is None, \ - "FusionPass singleton instance already exists" super().__init__(config) - self.matches: list[MultiOutputMatch] = [] self.patterns: PatternMatcherPass = PatternMatcherPass( - pass_name="fusion_pass") + pass_name="rmsnorm_quant_fusion_pass") for epsilon in [1e-5, 1e-6]: # Fuse rms_norm + static fp8 quant RMSNormStaticQuantPattern(epsilon, FP8_DTYPE).register(self.patterns) - # Matches for patterns below have 2 or more outputs, - # so we need to process them manually (see process_matches) - - # Fuse rms_norm + static fp8 quant + # Fuse fused_add_rms_norm + static fp8 quant FusedAddRMSNormStaticQuantPattern(epsilon, FP8_DTYPE).register( - self.patterns, self.record_match) + self.patterns) # Fuse rms_norm + dynamic per-token fp8 quant - RMSNormDynamicQuantPattern(epsilon, FP8_DTYPE).register( - self.patterns, self.record_match) + RMSNormDynamicQuantPattern(epsilon, + FP8_DTYPE).register(self.patterns) # Fuse fused_add_rms_norm + dynamic per-token fp8 quant FusedAddRMSNormDynamicQuantPattern(epsilon, FP8_DTYPE).register( - self.patterns, self.record_match) - - # WARNING: This is a hack to clear the pattern matcher cache - # and allow multiple values of epsilon. - torch._inductor.pattern_matcher._seen_patterns.clear() - - def record_match(self, match: MultiOutputMatch) -> bool: - # Hijack the extra_check to record the match and - # save it for post-processing. - self.matches.append(match) - - # Return False to prevent automatic replacement. - return False - - def process_matches(self, graph: fx.Graph): - """ - Manually process multi-output matches and replace them with fused nodes. - See MultiOutputMatch for more details. - """ - for match in self.matches: - match.process() + self.patterns) - # Finally, remove matched nodes - graph.eliminate_dead_code() - assert all(node not in graph.nodes for match in self.matches - for node in match.match.nodes) + self.dump_patterns(config, self.patterns) + @VllmInductorPass.time_and_log def __call__(self, graph: fx.Graph): - self.begin() - self.dump_graph(graph, "before_fusion") - - count = self.patterns.apply(graph) - logger.debug("Replaced %s patterns", count) - self.dump_graph(graph, "after_pattern_match") - - # Manually process multi-output matches (and run DCE) - self.process_matches(graph) - logger.debug("Post-processed %s matches", len(self.matches)) - self.dump_graph(graph, "after_fusion") - self.matches.clear() - self.end_and_log() + self.matched_count = self.patterns.apply(graph) + logger.debug("Replaced %s patterns", self.matched_count) + + def uuid(self) -> Any: + return self.hash_source(self, RMSNormQuantPattern, + RMSNormStaticQuantPattern, + RMSNormDynamicQuantPattern, + FusedAddRMSNormStaticQuantPattern, + FusedAddRMSNormDynamicQuantPattern) diff --git a/vllm/compilation/fusion_attn.py b/vllm/compilation/fusion_attn.py index e3677b3dd62d..2c6cf8f12fdc 100644 --- a/vllm/compilation/fusion_attn.py +++ b/vllm/compilation/fusion_attn.py @@ -18,7 +18,7 @@ from .fusion import QUANT_OPS, empty_bf16, empty_fp32, empty_i32 from .inductor_pass import enable_fake_mode -from .vllm_inductor_pass import VllmInductorPass +from .vllm_inductor_pass import VllmInductorPass, VllmPatternMatcherPass logger = init_logger(__name__) @@ -245,7 +245,7 @@ def replacement(q: torch.Tensor, k: torch.Tensor, v: torch.Tensor, pm_pass) -class AttnFusionPass(VllmInductorPass): +class AttnFusionPass(VllmPatternMatcherPass): """ This pass fuses post-attention quantization onto attention if supported. @@ -282,20 +282,12 @@ def __init__(self, config: VllmConfig): "were found in CompilationConfig.static_forward_context " "so no fusion patterns were registered.") - def __call__(self, graph: torch.fx.graph.Graph) -> None: - self.begin() - self.dump_graph(graph, "before_attn_fusion") - - count = self.patterns.apply(graph) + self.dump_patterns(config, self.patterns) - # TODO: Move this to pass_manager.py after the fx graph broken issue - # has been resolved. - # see https://github.com/vllm-project/vllm/issues/23091 - graph.eliminate_dead_code() - - logger.debug("Fused quantization onto %s attention nodes", count) - self.dump_graph(graph, "after_attn_fusion") - self.end_and_log() + @VllmInductorPass.time_and_log + def __call__(self, graph: torch.fx.graph.Graph) -> None: + self.matched_count = self.patterns.apply(graph) + logger.debug("Fused quant onto %s attention nodes", self.matched_count) def uuid(self): return VllmInductorPass.hash_source(self, AttentionQuantPattern, diff --git a/vllm/compilation/multi_output_match.py b/vllm/compilation/multi_output_match.py deleted file mode 100644 index 6d1893777cec..000000000000 --- a/vllm/compilation/multi_output_match.py +++ /dev/null @@ -1,109 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project - -import abc -import operator -from abc import abstractmethod -from collections.abc import Iterable - -from torch import fx -from torch._higher_order_ops.auto_functionalize import auto_functionalized -from torch._inductor import pattern_matcher as pm -from torch._ops import OpOverload -from torch.fx import Node - -from vllm.compilation.fx_utils import find_auto_fn - - -class MultiOutputMatch(abc.ABC): - """ - This class provides utilities to process multi-output matches and - manually insert replacements. - - This is necessary because the automatic replacement for multi-output - matches is broken: https://github.com/pytorch/pytorch/issues/137280 - """ - - def __init__(self, match: pm.Match): - self.match = match - - @abstractmethod - def process(self): - """ - Process a multi-output match and manually insert the replacement. - - This method should: - 1. Insert the replacement nodes after the last node in the match. - 2. Rebind the users of nodes in the match to use the new nodes. - 3. Set meta["val"] for de-functionalization. - - The result of an auto-functionalized node is a tuple of tensors. - The first element is the return value of the function, usually None. - The remaining elements are the mutated args of the function. - - All auto-functionalized nodes must contain a proper meta["val"], - as it is used by de-functionalization. meta["val"] has to contain the - value of the node (tuple of tensors) that would be returned by the - functionalized node during tracing. - - Existing nodes in the graph all have this property set, but we have - to set it manually for new nodes we insert. - - Example: - # op schema: foo(a: Tensor!, b: Tensor, c: Tensor!) -> None - at = auto_functionalized(torch.ops._C.foo.default, a, b, c) - # at.meta["val"] = (None, a, c) - """ - raise NotImplementedError - - @property - def nodes(self) -> list[fx.Node]: - return self.match.nodes - - @property - def graph(self) -> fx.Graph: - return self.match.graph - - def find_auto_fn(self, op) -> fx.Node: - """ - Find the first auto_functionalized node with the given op in the match. - """ - return find_auto_fn(self.nodes, op) - - def inserting_after_match(self): - """ - Insert nodes after the last node in the match. - This is done to avoid use-before-definition errors after inserting - replacement nodes. - """ - - # match.nodes is not guaranteed to be sorted. - # Find the last node in the match. - for last_node_in_match in reversed(self.graph.nodes): - if last_node_in_match in self.match.nodes: - break - else: - raise ValueError("No nodes in graph") - - return self.graph.inserting_after(last_node_in_match) - - def insert_getitems(self, tuple_node: fx.Node, - indices: Iterable[int]) -> tuple[fx.Node, ...]: - """ - Insert operator.getitem nodes to extract elements from a tuple node. - - :param tuple_node: The tuple node to extract elements from. - :param indices: The indices of the elements to extract. - :return: Tuple of the new getitem nodes, corresponding to the indices. - """ - with self.graph.inserting_after(tuple_node): - return tuple( - self.graph.call_function(operator.getitem, (tuple_node, idx)) - for idx in indices) - - def insert_auto_fn(self, op: OpOverload, kwargs) -> Node: - """ - Insert an auto_functionalized node with the given op and kwargs. - """ - return self.graph.call_function(auto_functionalized, (op, ), - kwargs=kwargs) diff --git a/vllm/compilation/noop_elimination.py b/vllm/compilation/noop_elimination.py index 17e85e70218d..2c453daf873d 100644 --- a/vllm/compilation/noop_elimination.py +++ b/vllm/compilation/noop_elimination.py @@ -64,9 +64,8 @@ class NoOpEliminationPass(VllmInductorPass): out: "f16[s0, 4096]" = at[1] """ + @VllmInductorPass.time_and_log def __call__(self, graph: torch.fx.Graph): - self.begin() - self.dump_graph(graph, "before_noop_elimination") count = 0 # Remove no-op reshapes/views: for node in graph.nodes: @@ -121,8 +120,6 @@ def __call__(self, graph: torch.fx.Graph): count += 1 logger.debug("Removed %s no-op reshapes and slices", count) - self.dump_graph(graph, "after_noop_elimination") - self.end_and_log() # ---------------------- Reshape helpers ---------------------- def reshape_dims_equivalent(self, dim: Union[int, torch.fx.Node], diff --git a/vllm/compilation/pass_manager.py b/vllm/compilation/pass_manager.py index 1b1cbe4fa12c..e323fa1f7734 100644 --- a/vllm/compilation/pass_manager.py +++ b/vllm/compilation/pass_manager.py @@ -1,15 +1,21 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project +import functools from torch import fx as fx +from vllm import envs from vllm.config import VllmConfig from vllm.logger import init_logger from vllm.platforms import current_platform +from vllm.utils import set_env_var + +from .post_cleanup import PostCleanupPass +from .vllm_inductor_pass import VllmInductorPass if current_platform.is_cuda_alike(): from .activation_quant_fusion import ActivationQuantFusionPass - from .fusion import FusionPass + from .fusion import RMSNormQuantFusionPass from .fusion_attn import AttnFusionPass if current_platform.is_cuda(): @@ -19,11 +25,28 @@ from .inductor_pass import CustomGraphPass, InductorPass, get_pass_context from .noop_elimination import NoOpEliminationPass from .sequence_parallelism import SequenceParallelismPass -from .vllm_inductor_pass import VllmInductorPass logger = init_logger(__name__) +def with_pattern_match_debug(fn): + """ + Function decorator that turns on inductor pattern match debug + for the duration of the call. + Used to avoid logging builtin Inductor pattern matching. + """ + + @functools.wraps(fn) + def wrapper(*args, **kwargs): + if (debug_val := envs.VLLM_PATTERN_MATCH_DEBUG) is not None: + # optionally check rank here + with set_env_var("TORCHINDUCTOR_PATTERN_MATCH_DEBUG", debug_val): + return fn(*args, **kwargs) + return fn(*args, **kwargs) + + return wrapper + + class PostGradPassManager(CustomGraphPass): """ The pass manager for post-grad passes. @@ -40,16 +63,26 @@ class PostGradPassManager(CustomGraphPass): """ def __init__(self): - self.passes: list[VllmInductorPass] = [] + self.passes: list[InductorPass] = [] + @with_pattern_match_debug def __call__(self, graph: fx.Graph): + VllmInductorPass.dump_prefix = 0 # reset dump index + shape = get_pass_context().runtime_shape for pass_ in self.passes: if pass_.is_applicable_for_shape(shape): pass_(graph) + VllmInductorPass.dump_prefix += 1 + + # post-cleanup goes before fix_functionalization + # because it requires a functional graph + self.post_cleanup(graph) + VllmInductorPass.dump_prefix += 1 # always run fix_functionalization last self.fix_functionalization(graph) + VllmInductorPass.dump_prefix = None # Cleanup index def configure(self, config: VllmConfig): self.pass_config = config.compilation_config.pass_config @@ -61,14 +94,18 @@ def configure(self, config: VllmConfig): if self.pass_config.enable_async_tp: self.passes += [AsyncTPPass(config)] + if self.pass_config.enable_fi_allreduce_fusion: + self.passes += [AllReduceFusionPass(config)] + if self.pass_config.enable_fusion: - self.passes += [FusionPass.instance(config)] + self.passes += [RMSNormQuantFusionPass(config)] self.passes += [ActivationQuantFusionPass(config)] if self.pass_config.enable_attn_fusion: self.passes += [AttnFusionPass(config)] - if self.pass_config.enable_fi_allreduce_fusion: - self.passes += [AllReduceFusionPass(config)] + + # needs a functional graph + self.post_cleanup = PostCleanupPass(config) self.fix_functionalization = FixFunctionalizationPass(config) def add(self, pass_: InductorPass): diff --git a/vllm/compilation/post_cleanup.py b/vllm/compilation/post_cleanup.py new file mode 100644 index 000000000000..6a31f3935da7 --- /dev/null +++ b/vllm/compilation/post_cleanup.py @@ -0,0 +1,20 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project +from torch import fx + +from vllm.compilation.vllm_inductor_pass import VllmInductorPass + + +class PostCleanupPass(VllmInductorPass): + """ + This pass performs cleanup after custom passes. + It topologically sorts the graph and removes unused nodes. + This is needed because the pattern matcher does not guarantee producing + a topologically sorted graph, and there may be unused nodes left around. + """ + + @VllmInductorPass.time_and_log + def __call__(self, graph: fx.Graph) -> None: + from torch._inductor.pattern_matcher import stable_topological_sort + stable_topological_sort(graph) + graph.eliminate_dead_code() diff --git a/vllm/compilation/sequence_parallelism.py b/vllm/compilation/sequence_parallelism.py index 1758ed4c86d2..a6ca50c925a2 100644 --- a/vllm/compilation/sequence_parallelism.py +++ b/vllm/compilation/sequence_parallelism.py @@ -15,7 +15,7 @@ from vllm.platforms import current_platform from .inductor_pass import enable_fake_mode -from .vllm_inductor_pass import VllmInductorPass +from .vllm_inductor_pass import VllmInductorPass, VllmPatternMatcherPass logger = init_logger(__name__) @@ -417,7 +417,7 @@ def replacement( pm.fwd_only, pm_pass) -class SequenceParallelismPass(VllmInductorPass): +class SequenceParallelismPass(VllmPatternMatcherPass): """ This pass enables sequence parallelism for models. It identifies patterns where an AllReduce operation is followed by @@ -466,19 +466,13 @@ def __init__(self, config: VllmConfig): LastAllReduceRMSNormPattern(epsilon, self.model_dtype, self.device).register(self.patterns) - - # WARNING: This is a hack to clear the pattern matcher cache - # and allow multiple values of epsilon. - torch._inductor.pattern_matcher._seen_patterns.clear() + self.dump_patterns(config, self.patterns) def is_applicable_for_shape(self, shape: Optional[int]) -> bool: tp_size = get_tensor_model_parallel_world_size() return shape is not None and shape % tp_size == 0 + @VllmInductorPass.time_and_log def __call__(self, graph: fx.Graph): - self.begin() - self.dump_graph(graph, "before_sequence_parallelism_pass") - count = self.patterns.apply(graph) - logger.debug("Replaced %s patterns with sequence parallelism", count) - self.dump_graph(graph, "after_sequence_parallelism_pass") - self.end_and_log() + self.matched_count = self.patterns.apply(graph) + logger.debug("Replaced %s patterns", self.matched_count) diff --git a/vllm/compilation/vllm_inductor_pass.py b/vllm/compilation/vllm_inductor_pass.py index b822b05b0f1e..837770d18199 100644 --- a/vllm/compilation/vllm_inductor_pass.py +++ b/vllm/compilation/vllm_inductor_pass.py @@ -1,10 +1,16 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project - +import functools +import operator import time +from pathlib import Path +from typing import ClassVar, Optional +import regex as re import torch from torch._dynamo.utils import lazy_format_graph_code +from torch._inductor.pattern_matcher import (PatternMatcherPass, + PatternPrettyPrinter) from vllm.config import VllmConfig from vllm.logger import init_logger @@ -19,6 +25,8 @@ class VllmInductorPass(InductorPass): An inductor pass with access to vLLM PassConfig. It provides timing, logging, and dumping utilities. """ + dump_prefix: ClassVar[Optional[int]] = None + """Keep track of pass index for debug dump ordering.""" def __init__(self, config: VllmConfig): self.pass_config = config.compilation_config.pass_config @@ -28,8 +36,24 @@ def __init__(self, config: VllmConfig): else None self.pass_name = self.__class__.__name__ + @staticmethod + def time_and_log(call_fn): + + @functools.wraps(call_fn) + def wrapped(self: VllmInductorPass, graph: torch.fx.Graph): + self.begin() + self.dump_graph(graph, "before") + call_fn(self, graph) + self.dump_graph(graph, "after") + self.end_and_log() + + return wrapped + def dump_graph(self, graph: torch.fx.Graph, stage: str): - lazy_format_graph_code(stage, graph.owning_module) + i = VllmInductorPass.dump_prefix + i_str = "" if i is None else f".{i}" + lazy_format_graph_code(f"post_grad{i_str}.{self.pass_name}.{stage}", + graph.owning_module) def begin(self): self._start_time = time.perf_counter_ns() @@ -40,6 +64,88 @@ def end_and_log(self): logger.debug("%s completed in %.1f ms", self.pass_name, duration_ms) +class VllmPatternMatcherPass(VllmInductorPass): + """ + A VllmInductorPass that uses the Inductor pattern matcher. + Its main use is providing the dump_patterns utility that dumps the + Inductor pattern matcher patterns into a file, which greatly aids debugging. + + TODO(luka) move more utilities to this pass. + """ + matched_count: int = 0 + """The number of matched patterns in the pass.""" + + _OP_OVERLOAD_PATTERN: ClassVar[re.Pattern] = re.compile( + r"") + + def _replace_op_overloads(self, string: str) -> str: + """Replace with nicer formulations""" + return self._OP_OVERLOAD_PATTERN.sub( + lambda m: f"torch.ops.{m.group(1)}.{m.group(2)}", + string, + ) + + def dump_patterns(self, config: VllmConfig, pm_pass: PatternMatcherPass): + """ + If debug dumping is enabled, dump the Inductor pattern-matcher patterns + into the debug_dump_path folder next to the dumped fx graphs. + + This method does its best to print something that looks like Python code + for easier debugging and potentially navigation. If any errors appear in + the output, please add to this method. + + TODO(luka): use pattern object to manually produce pattern graph + """ + debug_dump_path = config.compilation_config.debug_dump_path + if not debug_dump_path: + return + + rank = config.parallel_config.rank + debug_dump_path = Path(debug_dump_path) / f"rank_{rank}" + debug_dump_path.mkdir(parents=True, exist_ok=True) + + from vllm.utils import unique_filepath + file_path = unique_filepath( + lambda i: debug_dump_path / f"patterns.{self.pass_name}.{i}.py") + + with file_path.open("w") as f: + print( + f'# This file was produced by VllmPatternMatcherPass.' + f'dump_patterns for {self.pass_name}.\n' + f'# It does its best to produce valid-Python-looking code but' + f' please add to dump_patterns if there are any errors.\n\n' + f'from torch._higher_order_ops.auto_functionalize import ' + f'auto_functionalized as auto_functionalized\n' + f'from torch._inductor.pattern_matcher import *', + file=f) + + for node, patterns in pm_pass.patterns.items(): + # fix the operator.getitem repr + if node[1] == operator.getitem: + node_repr = f"({repr(node[0])}, operator.getitem)" + else: + node_repr = repr(node) + + node_repr = self._replace_op_overloads(node_repr) + + print(f"\n\n# Patterns for op: {node_repr}", file=f) + for i, pattern in enumerate(patterns): + # reserve auto_functionalized ahead of time + pp = PatternPrettyPrinter() + pp.namespace.create_name("auto_functionalized", None) + + # Assemble pattern + out_node = pp.pretty_print(pattern.pattern) + pattern_repr = "\n".join([f"def pattern_{i}():"] + [ + f"{pp.memoized_objs_names[key]} = " + f"{pp.memoized_objs_pp[key]}" + for key in pp.memoized_objs_names + ] + [f"return {out_node}"]).replace("\n", "\n ") + + pattern_repr = self._replace_op_overloads(pattern_repr) + print(f"{pattern_repr}\n", file=f) + + class PrinterInductorPass(VllmInductorPass): def __init__(self, name: str, config: VllmConfig): diff --git a/vllm/config/__init__.py b/vllm/config/__init__.py index 9bb8087a511d..92fc68f8927c 100644 --- a/vllm/config/__init__.py +++ b/vllm/config/__init__.py @@ -905,10 +905,9 @@ def set_current_vllm_config(vllm_config: VllmConfig, except Exception: raise else: - logger.debug("enabled custom ops: %s", - vllm_config.compilation_config.enabled_custom_ops) - logger.debug("disabled custom ops: %s", - vllm_config.compilation_config.disabled_custom_ops) + if check_compile: + vllm_config.compilation_config.custom_op_log_check() + if check_compile and \ vllm_config.compilation_config.level == CompilationLevel.PIECEWISE \ and compilation_counter.num_models_seen == num_models_seen: diff --git a/vllm/config/compilation.py b/vllm/config/compilation.py index 22b38daf46c3..34fa7fcfe7e8 100644 --- a/vllm/config/compilation.py +++ b/vllm/config/compilation.py @@ -487,6 +487,12 @@ def __post_init__(self) -> None: "supported with torch>=2.9.0.dev. Set " "use_inductor_graph_partition=False instead.") + for op in self.custom_ops: + if op[0] not in {'+', '-'} and op not in {'all', 'none'}: + raise ValueError(f"Invalid syntax '{op}' for custom op, " + "must be 'all', 'none', '+op' or '-op' " + "(where 'op' is the registered op name)") + def init_backend(self, vllm_config: "VllmConfig") -> Union[str, Callable]: if self.level == CompilationLevel.NO_COMPILATION: raise ValueError("No compilation level is set.") @@ -532,8 +538,8 @@ def init_with_cudagraph_sizes(self, for x in self.compile_sizes: if isinstance(x, str): assert x == "cudagraph_capture_sizes", \ - "Unrecognized size type in compile_sizes, " \ - f"expect 'cudagraph_capture_sizes', got {x}" + "Unrecognized size type in compile_sizes, " \ + f"expect 'cudagraph_capture_sizes', got {x}" computed_compile_sizes.extend(self.cudagraph_capture_sizes) else: assert isinstance(x, int) @@ -628,3 +634,41 @@ def is_attention_compiled_piecewise(self) -> bool: return use_fx_graph_piecewise_compilation or \ use_inductor_piecewise_compilation + + def custom_op_log_check(self): + """ + This method logs the enabled/disabled custom ops and checks that the + passed custom_ops field only contains relevant ops. + It is called at the end of set_current_vllm_config, + after the custom ops have been instantiated. + """ + + if len(self.enabled_custom_ops) + len(self.disabled_custom_ops) == 0: + logger.debug("No custom ops found in model.") + return + + logger.debug("enabled custom ops: %s", self.enabled_custom_ops) + logger.debug("disabled custom ops: %s", self.disabled_custom_ops) + + all_ops_in_model = (self.enabled_custom_ops | self.disabled_custom_ops) + for op in self.custom_ops: + if op in {"all", "none"}: + continue + + assert op[0] in {'+', '-'}, "Invalid custom op syntax " \ + "(should be checked during init)" + + # check if op name exists in model + op_name = op[1:] + if op_name not in all_ops_in_model: + from vllm.model_executor.custom_op import CustomOp + + # Does op exist at all or is it just not present in this model? + # Note: Only imported op classes appear in the registry. + missing_str = "doesn't exist (or wasn't imported/registered)" \ + if op_name not in CustomOp.op_registry \ + else "not present in model" + + enable_str = "enabling" if op[0] == '+' else "disabling" + logger.warning_once("Op '%s' %s, %s with '%s' has no effect", + op_name, missing_str, enable_str, op) diff --git a/vllm/envs.py b/vllm/envs.py index 8941be125ed0..eaee2f6cc771 100755 --- a/vllm/envs.py +++ b/vllm/envs.py @@ -190,6 +190,7 @@ VLLM_CUSTOM_SCOPES_FOR_PROFILING: bool = False VLLM_KV_EVENTS_USE_INT_BLOCK_HASHES: bool = True VLLM_OBJECT_STORAGE_SHM_BUFFER_NAME: str = "VLLM_OBJECT_STORAGE_SHM_BUFFER" + VLLM_PATTERN_MATCH_DEBUG: Optional[str] = None def get_default_cache_root(): @@ -442,6 +443,11 @@ def get_vllm_port() -> Optional[int]: "VLLM_USE_STANDALONE_COMPILE": lambda: os.environ.get("VLLM_USE_STANDALONE_COMPILE", "0") == "1", + # Debug pattern matching inside custom passes. + # Should be set to the fx.Node name (e.g. 'getitem_34' or 'scaled_mm_3'). + "VLLM_PATTERN_MATCH_DEBUG": + lambda: os.environ.get("VLLM_PATTERN_MATCH_DEBUG", None), + # local rank of the process in the distributed setting, used to determine # the GPU device id "LOCAL_RANK": diff --git a/vllm/utils/__init__.py b/vllm/utils/__init__.py index 022e35a399c5..3399d00fbabb 100644 --- a/vllm/utils/__init__.py +++ b/vllm/utils/__init__.py @@ -3392,7 +3392,7 @@ def length_from_prompt_token_ids_or_embeds( prompt_token_ids: Optional[list[int]], prompt_embeds: Optional[torch.Tensor], ) -> int: - """Calculate the request length (in number of tokens) give either + """Calculate the request length (in number of tokens) give either prompt_token_ids or prompt_embeds. """ prompt_token_len = None if prompt_token_ids is None else len( @@ -3413,3 +3413,16 @@ def length_from_prompt_token_ids_or_embeds( f" prompt_token_ids={prompt_token_len}" f" prompt_embeds={prompt_embeds_len}") return prompt_token_len + + +@contextlib.contextmanager +def set_env_var(key, value): + old = os.environ.get(key) + os.environ[key] = value + try: + yield + finally: + if old is None: + del os.environ[key] + else: + os.environ[key] = old From 8db29392899b12843787b0a03880f70a6e6ac88e Mon Sep 17 00:00:00 2001 From: Or Ozeri Date: Mon, 22 Sep 2025 22:30:36 +0300 Subject: [PATCH 240/518] [KV offload][5/N] Add `CPUOffloadingSpec` (#24251) Signed-off-by: Or Ozeri --- docs/features/disagg_prefill.md | 6 ++ .../{test_cpu.py => test_cpu_manager.py} | 0 tests/v1/kv_offload/test_cpu_offloading.py | 62 +++++++++++++++ vllm/v1/kv_offload/cpu.py | 75 +++++++++++++++++++ vllm/v1/kv_offload/factory.py | 3 + 5 files changed, 146 insertions(+) rename tests/v1/kv_offload/{test_cpu.py => test_cpu_manager.py} (100%) create mode 100644 tests/v1/kv_offload/test_cpu_offloading.py create mode 100644 vllm/v1/kv_offload/cpu.py diff --git a/docs/features/disagg_prefill.md b/docs/features/disagg_prefill.md index 996ef00a6b96..69f70b8ff5ac 100644 --- a/docs/features/disagg_prefill.md +++ b/docs/features/disagg_prefill.md @@ -31,6 +31,12 @@ Now supports 5 types of connectors: --kv-transfer-config '{"kv_connector":"MultiConnector","kv_role":"kv_both","kv_connector_extra_config":{"connectors":[{"kv_connector":"NixlConnector","kv_role":"kv_both"},{"kv_connector":"SharedStorageConnector","kv_role":"kv_both","kv_connector_extra_config":{"shared_storage_path":"local_storage"}}]}}' ``` +- **OffloadingConnector**: enable offloading of KV data to CPU memory, customizing the CPU block size (in tokens) and number of blocks to allocate (per worker): + + ```bash + --kv-transfer-config '{"kv_connector":"OffloadingConnector","kv_role":"kv_both","kv_connector_extra_config":{"block_size": 64, "num_cpu_blocks": 1000}}' + ``` + ## Benchmarks Please refer to for disaggregated prefilling benchmarks. diff --git a/tests/v1/kv_offload/test_cpu.py b/tests/v1/kv_offload/test_cpu_manager.py similarity index 100% rename from tests/v1/kv_offload/test_cpu.py rename to tests/v1/kv_offload/test_cpu_manager.py diff --git a/tests/v1/kv_offload/test_cpu_offloading.py b/tests/v1/kv_offload/test_cpu_offloading.py new file mode 100644 index 000000000000..fc8ca09bea3d --- /dev/null +++ b/tests/v1/kv_offload/test_cpu_offloading.py @@ -0,0 +1,62 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project +import time + +import pytest + +from vllm import LLM, SamplingParams +from vllm.config import KVTransferConfig + +CPU_BLOCK_SIZES = [16, 48] + + +@pytest.mark.parametrize("cpu_block_size", CPU_BLOCK_SIZES) +def test_cpu_offloading(cpu_block_size: int) -> None: + """ + Tests OffloadingConnector with CPUOffloadingSpec. + """ + + # configure OffloadingConnector (spec_name=CPUOffloadingSpec by default) + kv_transfer_config = KVTransferConfig( + kv_connector="OffloadingConnector", + kv_role="kv_both", + kv_connector_extra_config={ + "num_cpu_blocks": 100, + "block_size": cpu_block_size + }, + ) + + llm = LLM( + model="meta-llama/Llama-3.2-1B-Instruct", + gpu_memory_utilization=0.5, + kv_transfer_config=kv_transfer_config, + ) + + prompts = ["Hi " * 100] + sampling_params = SamplingParams(temperature=0, max_tokens=20) + + # run generation - this should trigger saving KV cache + start_time = time.time() + llm.generate(prompts, sampling_params, use_tqdm=False) + cold_time = time.time() - start_time + + # run generation again - should hit the GPU prefix cache + start_time = time.time() + llm.generate(prompts, sampling_params, use_tqdm=False) + gpu_hit_time = time.time() - start_time + + # reset prefix cache to avoid GPU hit. + llm.reset_prefix_cache() + + # sleep for a sec to make sure CPU finished storing + time.sleep(1) + + # run generation again - this should trigger loading from CPU + start_time = time.time() + llm.generate(prompts, sampling_params, use_tqdm=False) + cpu_hit_time = time.time() - start_time + + print("Generation times:") + print(f" Cold: {cold_time * 1000:.2f}ms") + print(f" GPU hit: {gpu_hit_time * 1000:.2f}ms") + print(f" CPU hit: {cpu_hit_time * 1000:.2f}ms") diff --git a/vllm/v1/kv_offload/cpu.py b/vllm/v1/kv_offload/cpu.py new file mode 100644 index 000000000000..b85d375fe63e --- /dev/null +++ b/vllm/v1/kv_offload/cpu.py @@ -0,0 +1,75 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project +from collections.abc import Iterator +from typing import Optional + +import torch + +from vllm.config import VllmConfig, get_layers_from_vllm_config +from vllm.model_executor.layers.attention_layer_base import AttentionLayerBase +from vllm.platforms import current_platform +from vllm.v1.kv_offload.abstract import LoadStoreSpec, OffloadingManager +from vllm.v1.kv_offload.backends.cpu import CPUBackend +from vllm.v1.kv_offload.lru_manager import LRUOffloadingManager +from vllm.v1.kv_offload.mediums import CPULoadStoreSpec, GPULoadStoreSpec +from vllm.v1.kv_offload.spec import OffloadingSpec +from vllm.v1.kv_offload.worker.cpu_gpu import CpuGpuOffloadingHandler +from vllm.v1.kv_offload.worker.worker import OffloadingHandler + + +class CPUOffloadingSpec(OffloadingSpec): + + def __init__(self, vllm_config: VllmConfig): + super().__init__(vllm_config) + + num_cpu_blocks = self.extra_config.get("num_cpu_blocks") + if not num_cpu_blocks: + raise Exception("num_cpu_blocks must be specified " + "in kv_connector_extra_config") + self.num_cpu_blocks: int = num_cpu_blocks + + # scheduler-side + self._manager: Optional[OffloadingManager] = None + + # worker-side + self._handler: Optional[OffloadingHandler] = None + + def get_manager(self) -> OffloadingManager: + if not self._manager: + kv_events_config = self.vllm_config.kv_events_config + enable_events = (kv_events_config is not None + and kv_events_config.enable_kv_cache_events) + self._manager = LRUOffloadingManager(CPUBackend( + block_size=self.offloaded_block_size, + num_blocks=self.num_cpu_blocks), + enable_events=enable_events) + return self._manager + + def get_handlers( + self, kv_caches: dict[str, torch.Tensor] + ) -> Iterator[tuple[type[LoadStoreSpec], type[LoadStoreSpec], + OffloadingHandler]]: + if not self._handler: + if not current_platform.is_cuda(): + raise Exception("CPU Offloading is currently only supported" + " on CUDA GPUs") + + layer_names = list(kv_caches.keys()) + layers = get_layers_from_vllm_config(self.vllm_config, + AttentionLayerBase, + layer_names) + attn_backends = { + layer_name: layers[layer_name].get_attn_backend() + for layer_name in layer_names + } + + self._handler = CpuGpuOffloadingHandler( + attn_backends=attn_backends, + gpu_block_size=self.gpu_block_size, + cpu_block_size=self.offloaded_block_size, + num_cpu_blocks=self.num_cpu_blocks, + gpu_caches=kv_caches) + + assert self._handler is not None + yield GPULoadStoreSpec, CPULoadStoreSpec, self._handler + yield CPULoadStoreSpec, GPULoadStoreSpec, self._handler diff --git a/vllm/v1/kv_offload/factory.py b/vllm/v1/kv_offload/factory.py index 6365ab4a6db7..f9bef6cea903 100644 --- a/vllm/v1/kv_offload/factory.py +++ b/vllm/v1/kv_offload/factory.py @@ -51,3 +51,6 @@ def create_spec( # Register various specs here. +OffloadingSpecFactory.register_spec("CPUOffloadingSpec", + "vllm.v1.kv_offload.cpu", + "CPUOffloadingSpec") From f552d5e578077574276aa9d83139b91e1d5ae163 Mon Sep 17 00:00:00 2001 From: Cyrus Leung Date: Tue, 23 Sep 2025 04:18:24 +0800 Subject: [PATCH 241/518] [CI/Build] Skip Qwen3-VL initialization tests until models are actually released (#25394) Signed-off-by: DarkLight1337 --- tests/models/registry.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/models/registry.py b/tests/models/registry.py index 29b6980aaa42..6047a7a3e98d 100644 --- a/tests/models/registry.py +++ b/tests/models/registry.py @@ -562,10 +562,12 @@ def check_available_online( "Qwen2_5OmniForConditionalGeneration": _HfExamplesInfo("Qwen/Qwen2.5-Omni-7B-AWQ"), # noqa: E501 "Qwen3VLForConditionalGeneration": _HfExamplesInfo("Qwen/Qwen3-VL-4B-Instruct", # noqa: E501 max_model_len=4096, - min_transformers_version="4.57"), # noqa: E501 + min_transformers_version="4.57", + is_available_online=False), "Qwen3VLMoeForConditionalGeneration": _HfExamplesInfo("Qwen/Qwen3-VL-30B-A3B-Instruct", # noqa: E501 - max_model_len=4096, - min_transformers_version="4.57"), + max_model_len=4096, + min_transformers_version="4.57", + is_available_online=False), "RForConditionalGeneration": _HfExamplesInfo("YannQi/R-4B", trust_remote_code=True), "SkyworkR1VChatModel": _HfExamplesInfo("Skywork/Skywork-R1V-38B", From 8bed1791091510012c33f563ee8ff02e13bf6297 Mon Sep 17 00:00:00 2001 From: Johnny Yang <24908445+jcyang43@users.noreply.github.com> Date: Mon, 22 Sep 2025 16:14:44 -0700 Subject: [PATCH 242/518] [TPU] update torch_xla dependency for PyPI compatibility (#25278) Signed-off-by: Johnny Yang Co-authored-by: Chengji Yao --- requirements/tpu.txt | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) diff --git a/requirements/tpu.txt b/requirements/tpu.txt index 7ea239b48ea2..4241cbb2b033 100644 --- a/requirements/tpu.txt +++ b/requirements/tpu.txt @@ -14,14 +14,4 @@ nixl==0.3.0 tpu_info==0.4.0 # Install torch_xla ---pre ---extra-index-url https://download.pytorch.org/whl/nightly/cpu ---find-links https://storage.googleapis.com/libtpu-wheels/index.html ---find-links https://storage.googleapis.com/libtpu-releases/index.html ---find-links https://storage.googleapis.com/jax-releases/jax_nightly_releases.html ---find-links https://storage.googleapis.com/jax-releases/jaxlib_nightly_releases.html -torch==2.9.0.dev20250730 -torchvision==0.24.0.dev20250730 -torch_xla[tpu, pallas] @ https://storage.googleapis.com/pytorch-xla-releases/wheels/tpuvm/torch_xla-2.9.0.dev20250730-cp311-cp311-linux_x86_64.whl ; python_version == "3.11" -torch_xla[tpu, pallas] @ https://storage.googleapis.com/pytorch-xla-releases/wheels/tpuvm/torch_xla-2.9.0.dev20250730-cp312-cp312-linux_x86_64.whl ; python_version == "3.12" - +torch_xla[tpu, pallas]==2.8.0 \ No newline at end of file From 45d7d852d362b055b7c02eb967c4ea00d6f9e130 Mon Sep 17 00:00:00 2001 From: Alec S <10566873+alecsolder@users.noreply.github.com> Date: Mon, 22 Sep 2025 19:38:19 -0400 Subject: [PATCH 243/518] [Frontend] Responses API MCP tools for built in tools and to pass through headers (#24628) Signed-off-by: Alec Solder Signed-off-by: Alec S <10566873+alecsolder@users.noreply.github.com> Co-authored-by: Alec Solder Co-authored-by: Ye (Charlotte) Qi --- .../openai/test_response_api_mcp_tools.py | 106 +++++++++ .../openai/test_response_api_with_harmony.py | 9 +- tests/test_envs.py | 216 ++++++++++++++++++ vllm/entrypoints/context.py | 37 ++- vllm/entrypoints/harmony_utils.py | 4 +- vllm/entrypoints/openai/serving_responses.py | 25 +- vllm/entrypoints/tool_server.py | 29 ++- vllm/envs.py | 66 +++++- 8 files changed, 463 insertions(+), 29 deletions(-) create mode 100644 tests/entrypoints/openai/test_response_api_mcp_tools.py create mode 100644 tests/test_envs.py diff --git a/tests/entrypoints/openai/test_response_api_mcp_tools.py b/tests/entrypoints/openai/test_response_api_mcp_tools.py new file mode 100644 index 000000000000..b0eb84712c19 --- /dev/null +++ b/tests/entrypoints/openai/test_response_api_mcp_tools.py @@ -0,0 +1,106 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project + +import pytest +import pytest_asyncio +from openai import OpenAI + +from ...utils import RemoteOpenAIServer + +MODEL_NAME = "openai/gpt-oss-20b" + + +@pytest.fixture(scope="module") +def monkeypatch_module(): + from _pytest.monkeypatch import MonkeyPatch + mpatch = MonkeyPatch() + yield mpatch + mpatch.undo() + + +@pytest.fixture(scope="module") +def mcp_disabled_server(monkeypatch_module: pytest.MonkeyPatch): + args = ["--enforce-eager", "--tool-server", "demo"] + + with monkeypatch_module.context() as m: + m.setenv("VLLM_ENABLE_RESPONSES_API_STORE", "1") + m.setenv("PYTHON_EXECUTION_BACKEND", "dangerously_use_uv") + with RemoteOpenAIServer(MODEL_NAME, args) as remote_server: + yield remote_server + + +@pytest.fixture(scope="function") +def mcp_enabled_server(monkeypatch_module: pytest.MonkeyPatch): + args = ["--enforce-eager", "--tool-server", "demo"] + + with monkeypatch_module.context() as m: + m.setenv("VLLM_ENABLE_RESPONSES_API_STORE", "1") + m.setenv("PYTHON_EXECUTION_BACKEND", "dangerously_use_uv") + m.setenv("GPT_OSS_SYSTEM_TOOL_MCP_LABELS", + "code_interpreter,container") + with RemoteOpenAIServer(MODEL_NAME, args) as remote_server: + yield remote_server + + +@pytest_asyncio.fixture +async def mcp_disabled_client(mcp_disabled_server): + async with mcp_disabled_server.get_async_client() as async_client: + yield async_client + + +@pytest_asyncio.fixture +async def mcp_enabled_client(mcp_enabled_server): + async with mcp_enabled_server.get_async_client() as async_client: + yield async_client + + +@pytest.mark.asyncio +@pytest.mark.parametrize("model_name", [MODEL_NAME]) +@pytest.mark.skip(reason="Code interpreter tool is not available in CI yet.") +async def test_mcp_tool_env_flag_enabled(mcp_enabled_client: OpenAI, + model_name: str): + response = await mcp_enabled_client.responses.create( + model=model_name, + # TODO: Ideally should be able to set max tool calls + # to prevent multi-turn, but it is not currently supported + # would speed up the test + input=("What's the first 4 digits after the decimal point of " + "cube root of `19910212 * 20250910`? " + "Show only the digits. The python interpreter is not stateful " + "and you must print to see the output."), + tools=[{ + "type": "mcp", + "server_label": "code_interpreter", + # URL unused for DemoToolServer + "server_url": "http://localhost:8888" + }], + ) + assert response is not None + assert response.status == "completed" + assert response.usage.output_tokens_details.tool_output_tokens > 0 + + +@pytest.mark.asyncio +@pytest.mark.parametrize("model_name", [MODEL_NAME]) +@pytest.mark.skip(reason="Code interpreter tool is not available in CI yet.") +async def test_mcp_tool_env_flag_disabled(mcp_disabled_client: OpenAI, + model_name: str): + response = await mcp_disabled_client.responses.create( + model=model_name, + # TODO: Ideally should be able to set max tool calls + # to prevent multi-turn, but it is not currently supported + # would speed up the test + input=("What's the first 4 digits after the decimal point of " + "cube root of `19910212 * 20250910`? " + "Show only the digits. The python interpreter is not stateful " + "and you must print to see the output."), + tools=[{ + "type": "mcp", + "server_label": "code_interpreter", + # URL unused for DemoToolServer + "server_url": "http://localhost:8888" + }], + ) + assert response is not None + assert response.status == "completed" + assert response.usage.output_tokens_details.tool_output_tokens == 0 diff --git a/tests/entrypoints/openai/test_response_api_with_harmony.py b/tests/entrypoints/openai/test_response_api_with_harmony.py index f3c3148577b8..23d8373d9780 100644 --- a/tests/entrypoints/openai/test_response_api_with_harmony.py +++ b/tests/entrypoints/openai/test_response_api_with_harmony.py @@ -454,7 +454,13 @@ async def test_web_search(client: OpenAI, model_name: str): async def test_code_interpreter(client: OpenAI, model_name: str): response = await client.responses.create( model=model_name, - input="Multiply 64548*15151 using builtin python interpreter.", + # TODO: Ideally should be able to set max tool calls + # to prevent multi-turn, but it is not currently supported + # would speed up the test + input=("What's the first 4 digits after the decimal point of " + "cube root of `19910212 * 20250910`? " + "Show only the digits. The python interpreter is not stateful " + "and you must print to see the output."), tools=[{ "type": "code_interpreter", "container": { @@ -464,6 +470,7 @@ async def test_code_interpreter(client: OpenAI, model_name: str): ) assert response is not None assert response.status == "completed" + assert response.usage.output_tokens_details.tool_output_tokens > 0 def get_weather(latitude, longitude): diff --git a/tests/test_envs.py b/tests/test_envs.py new file mode 100644 index 000000000000..f81a6e2e415c --- /dev/null +++ b/tests/test_envs.py @@ -0,0 +1,216 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project + +import os +from unittest.mock import patch + +import pytest + +from vllm.envs import env_list_with_choices, env_with_choices + + +class TestEnvWithChoices: + """Test cases for env_with_choices function.""" + + def test_default_value_returned_when_env_not_set(self): + """Test default is returned when env var is not set.""" + env_func = env_with_choices("NONEXISTENT_ENV", "default", + ["option1", "option2"]) + assert env_func() == "default" + + def test_none_default_returned_when_env_not_set(self): + """Test that None is returned when env not set and default is None.""" + env_func = env_with_choices("NONEXISTENT_ENV", None, + ["option1", "option2"]) + assert env_func() is None + + def test_valid_value_returned_case_sensitive(self): + """Test that valid value is returned in case sensitive mode.""" + with patch.dict(os.environ, {"TEST_ENV": "option1"}): + env_func = env_with_choices("TEST_ENV", + "default", ["option1", "option2"], + case_sensitive=True) + assert env_func() == "option1" + + def test_valid_lowercase_value_returned_case_insensitive(self): + """Test that lowercase value is accepted in case insensitive mode.""" + with patch.dict(os.environ, {"TEST_ENV": "option1"}): + env_func = env_with_choices("TEST_ENV", + "default", ["OPTION1", "OPTION2"], + case_sensitive=False) + assert env_func() == "option1" + + def test_valid_uppercase_value_returned_case_insensitive(self): + """Test that uppercase value is accepted in case insensitive mode.""" + with patch.dict(os.environ, {"TEST_ENV": "OPTION1"}): + env_func = env_with_choices("TEST_ENV", + "default", ["option1", "option2"], + case_sensitive=False) + assert env_func() == "OPTION1" + + def test_invalid_value_raises_error_case_sensitive(self): + """Test that invalid value raises ValueError in case sensitive mode.""" + with patch.dict(os.environ, {"TEST_ENV": "invalid"}): + env_func = env_with_choices("TEST_ENV", + "default", ["option1", "option2"], + case_sensitive=True) + with pytest.raises(ValueError, + match="Invalid value 'invalid' for TEST_ENV"): + env_func() + + def test_case_mismatch_raises_error_case_sensitive(self): + """Test that case mismatch raises ValueError in case sensitive mode.""" + with patch.dict(os.environ, {"TEST_ENV": "OPTION1"}): + env_func = env_with_choices("TEST_ENV", + "default", ["option1", "option2"], + case_sensitive=True) + with pytest.raises(ValueError, + match="Invalid value 'OPTION1' for TEST_ENV"): + env_func() + + def test_invalid_value_raises_error_case_insensitive(self): + """Test that invalid value raises ValueError when case insensitive.""" + with patch.dict(os.environ, {"TEST_ENV": "invalid"}): + env_func = env_with_choices("TEST_ENV", + "default", ["option1", "option2"], + case_sensitive=False) + with pytest.raises(ValueError, + match="Invalid value 'invalid' for TEST_ENV"): + env_func() + + def test_callable_choices_resolved_correctly(self): + """Test that callable choices are resolved correctly.""" + + def get_choices(): + return ["dynamic1", "dynamic2"] + + with patch.dict(os.environ, {"TEST_ENV": "dynamic1"}): + env_func = env_with_choices("TEST_ENV", "default", get_choices) + assert env_func() == "dynamic1" + + def test_callable_choices_with_invalid_value(self): + """Test that callable choices raise error for invalid values.""" + + def get_choices(): + return ["dynamic1", "dynamic2"] + + with patch.dict(os.environ, {"TEST_ENV": "invalid"}): + env_func = env_with_choices("TEST_ENV", "default", get_choices) + with pytest.raises(ValueError, + match="Invalid value 'invalid' for TEST_ENV"): + env_func() + + +class TestEnvListWithChoices: + """Test cases for env_list_with_choices function.""" + + def test_default_list_returned_when_env_not_set(self): + """Test that default list is returned when env var is not set.""" + env_func = env_list_with_choices("NONEXISTENT_ENV", + ["default1", "default2"], + ["option1", "option2"]) + assert env_func() == ["default1", "default2"] + + def test_empty_default_list_returned_when_env_not_set(self): + """Test that empty default list is returned when env not set.""" + env_func = env_list_with_choices("NONEXISTENT_ENV", [], + ["option1", "option2"]) + assert env_func() == [] + + def test_single_valid_value_parsed_correctly(self): + """Test that single valid value is parsed correctly.""" + with patch.dict(os.environ, {"TEST_ENV": "option1"}): + env_func = env_list_with_choices("TEST_ENV", [], + ["option1", "option2"]) + assert env_func() == ["option1"] + + def test_multiple_valid_values_parsed_correctly(self): + """Test that multiple valid values are parsed correctly.""" + with patch.dict(os.environ, {"TEST_ENV": "option1,option2"}): + env_func = env_list_with_choices("TEST_ENV", [], + ["option1", "option2"]) + assert env_func() == ["option1", "option2"] + + def test_values_with_whitespace_trimmed(self): + """Test that values with whitespace are trimmed correctly.""" + with patch.dict(os.environ, {"TEST_ENV": " option1 , option2 "}): + env_func = env_list_with_choices("TEST_ENV", [], + ["option1", "option2"]) + assert env_func() == ["option1", "option2"] + + def test_empty_values_filtered_out(self): + """Test that empty values are filtered out.""" + with patch.dict(os.environ, {"TEST_ENV": "option1,,option2,"}): + env_func = env_list_with_choices("TEST_ENV", [], + ["option1", "option2"]) + assert env_func() == ["option1", "option2"] + + def test_empty_string_returns_default(self): + """Test that empty string returns default.""" + with patch.dict(os.environ, {"TEST_ENV": ""}): + env_func = env_list_with_choices("TEST_ENV", ["default"], + ["option1", "option2"]) + assert env_func() == ["default"] + + def test_only_commas_returns_default(self): + """Test that string with only commas returns default.""" + with patch.dict(os.environ, {"TEST_ENV": ",,,"}): + env_func = env_list_with_choices("TEST_ENV", ["default"], + ["option1", "option2"]) + assert env_func() == ["default"] + + def test_case_sensitive_validation(self): + """Test case sensitive validation.""" + with patch.dict(os.environ, {"TEST_ENV": "option1,OPTION2"}): + env_func = env_list_with_choices("TEST_ENV", [], + ["option1", "option2"], + case_sensitive=True) + with pytest.raises(ValueError, + match="Invalid value 'OPTION2' in TEST_ENV"): + env_func() + + def test_case_insensitive_validation(self): + """Test case insensitive validation.""" + with patch.dict(os.environ, {"TEST_ENV": "OPTION1,option2"}): + env_func = env_list_with_choices("TEST_ENV", [], + ["option1", "option2"], + case_sensitive=False) + assert env_func() == ["OPTION1", "option2"] + + def test_invalid_value_in_list_raises_error(self): + """Test that invalid value in list raises ValueError.""" + with patch.dict(os.environ, {"TEST_ENV": "option1,invalid,option2"}): + env_func = env_list_with_choices("TEST_ENV", [], + ["option1", "option2"]) + with pytest.raises(ValueError, + match="Invalid value 'invalid' in TEST_ENV"): + env_func() + + def test_callable_choices_resolved_correctly(self): + """Test that callable choices are resolved correctly.""" + + def get_choices(): + return ["dynamic1", "dynamic2"] + + with patch.dict(os.environ, {"TEST_ENV": "dynamic1,dynamic2"}): + env_func = env_list_with_choices("TEST_ENV", [], get_choices) + assert env_func() == ["dynamic1", "dynamic2"] + + def test_callable_choices_with_invalid_value(self): + """Test that callable choices raise error for invalid values.""" + + def get_choices(): + return ["dynamic1", "dynamic2"] + + with patch.dict(os.environ, {"TEST_ENV": "dynamic1,invalid"}): + env_func = env_list_with_choices("TEST_ENV", [], get_choices) + with pytest.raises(ValueError, + match="Invalid value 'invalid' in TEST_ENV"): + env_func() + + def test_duplicate_values_preserved(self): + """Test that duplicate values in the list are preserved.""" + with patch.dict(os.environ, {"TEST_ENV": "option1,option1,option2"}): + env_func = env_list_with_choices("TEST_ENV", [], + ["option1", "option2"]) + assert env_func() == ["option1", "option1", "option2"] diff --git a/vllm/entrypoints/context.py b/vllm/entrypoints/context.py index 8619452f2445..ea81fdbcd825 100644 --- a/vllm/entrypoints/context.py +++ b/vllm/entrypoints/context.py @@ -8,6 +8,7 @@ from contextlib import AsyncExitStack from typing import TYPE_CHECKING, Optional, Union +from openai.types.responses.tool import Mcp from openai_harmony import Author, Message, Role, StreamState, TextContent from vllm.entrypoints.harmony_utils import ( @@ -21,6 +22,24 @@ logger = logging.getLogger(__name__) +# This is currently needed as the tool type doesn't 1:1 match the +# tool namespace, which is what is used to look up the +# connection to the tool server +_TOOL_NAME_TO_TYPE_MAP = { + "browser": "web_search_preview", + "python": "code_interpreter", + "container": "container", +} + + +def _map_tool_name_to_tool_type(tool_name: str) -> str: + if tool_name not in _TOOL_NAME_TO_TYPE_MAP: + available_tools = ', '.join(_TOOL_NAME_TO_TYPE_MAP.keys()) + raise ValueError( + f"Built-in tool name '{tool_name}' not defined in mapping. " + f"Available tools: {available_tools}") + return _TOOL_NAME_TO_TYPE_MAP[tool_name] + class TurnTokens: """Tracks token counts for a single conversation turn.""" @@ -59,8 +78,8 @@ def render_for_completion(self) -> list[int]: @abstractmethod async def init_tool_sessions(self, tool_server: Optional[ToolServer], - exit_stack: AsyncExitStack, - request_id: str) -> None: + exit_stack: AsyncExitStack, request_id: str, + mcp_tools: dict[str, Mcp]) -> None: pass @abstractmethod @@ -96,8 +115,8 @@ def render_for_completion(self) -> list[int]: raise NotImplementedError("Should not be called.") async def init_tool_sessions(self, tool_server: Optional[ToolServer], - exit_stack: AsyncExitStack, - request_id: str) -> None: + exit_stack: AsyncExitStack, request_id: str, + mcp_tools: dict[str, Mcp]) -> None: pass async def cleanup_session(self) -> None: @@ -318,13 +337,17 @@ async def call_python_tool(self, tool_session: Union["ClientSession", ] async def init_tool_sessions(self, tool_server: Optional[ToolServer], - exit_stack: AsyncExitStack, - request_id: str) -> None: + exit_stack: AsyncExitStack, request_id: str, + mcp_tools: dict[str, Mcp]): if tool_server: for tool_name in self.available_tools: if tool_name not in self._tool_sessions: + tool_type = _map_tool_name_to_tool_type(tool_name) + headers = mcp_tools[ + tool_type].headers if tool_type in mcp_tools else None tool_session = await exit_stack.enter_async_context( - tool_server.new_session(tool_name, request_id)) + tool_server.new_session(tool_name, request_id, + headers)) self._tool_sessions[tool_name] = tool_session exit_stack.push_async_exit(self.cleanup_session) diff --git a/vllm/entrypoints/harmony_utils.py b/vllm/entrypoints/harmony_utils.py index 1364a41be950..57e4bb1e1da5 100644 --- a/vllm/entrypoints/harmony_utils.py +++ b/vllm/entrypoints/harmony_utils.py @@ -126,8 +126,10 @@ def get_developer_message( function_tools: list[Union[Tool, ChatCompletionToolsParam]] = [] for tool in tools: if tool.type in ("web_search_preview", "code_interpreter", - "container"): + "container", "mcp"): # These are built-in tools that are added to the system message. + # Adding in MCP for now until we support MCP tools executed + # server side pass elif tool.type == "function": diff --git a/vllm/entrypoints/openai/serving_responses.py b/vllm/entrypoints/openai/serving_responses.py index 6e243671af24..99bb464db1d1 100644 --- a/vllm/entrypoints/openai/serving_responses.py +++ b/vllm/entrypoints/openai/serving_responses.py @@ -460,8 +460,12 @@ async def responses_full_generator( async with AsyncExitStack() as exit_stack: try: + mcp_tools = { + tool.server_label: tool + for tool in request.tools if tool.type == "mcp" + } await context.init_tool_sessions(self.tool_server, exit_stack, - request.request_id) + request.request_id, mcp_tools) async for _ in result_generator: pass except asyncio.CancelledError: @@ -748,11 +752,16 @@ def _construct_input_messages_with_harmony( # New conversation. reasoning_effort = (request.reasoning.effort if request.reasoning else None) - # Temporary: OpenAI types doesn't have container tool - # so we used MCP to cover that, up for change tool_types = [tool.type for tool in request.tools] - if envs.VLLM_GPT_OSS_USE_CONTAINER_TOOL: - tool_types.append("container") + + # Allow the MCP Tool type to enable built in tools if the + # server_label is allowlisted in + # envs.GPT_OSS_SYSTEM_TOOL_MCP_LABELS + if envs.GPT_OSS_SYSTEM_TOOL_MCP_LABELS: + for tool in request.tools: + if (tool.type == "mcp" and tool.server_label + in envs.GPT_OSS_SYSTEM_TOOL_MCP_LABELS): + tool_types.append(tool.server_label) enable_browser = ("web_search_preview" in tool_types and self.tool_server is not None and self.tool_server.has_tool("browser")) @@ -1653,8 +1662,12 @@ def _increment_sequence_number_and_return( async with AsyncExitStack() as exit_stack: processer = None if self.use_harmony: + mcp_tools = { + tool.server_label: tool + for tool in request.tools if tool.type == "mcp" + } await context.init_tool_sessions(self.tool_server, exit_stack, - request.request_id) + request.request_id, mcp_tools) processer = self._process_harmony_streaming_events else: processer = self._process_simple_streaming_events diff --git a/vllm/entrypoints/tool_server.py b/vllm/entrypoints/tool_server.py index 056a571fb2fd..4c627b865ef9 100644 --- a/vllm/entrypoints/tool_server.py +++ b/vllm/entrypoints/tool_server.py @@ -18,7 +18,6 @@ async def list_server_and_tools(server_url: str): from mcp import ClientSession from mcp.client.sse import sse_client - async with sse_client(url=server_url) as streams, ClientSession( *streams) as session: initialize_response = await session.initialize() @@ -86,8 +85,12 @@ def get_tool_description(self, pass @abstractmethod - def new_session(self, tool_name: str, - session_id: str) -> AbstractAsyncContextManager[Any]: + def new_session( + self, + tool_name: str, + session_id: str, + headers: Optional[dict[str, str]] = None + ) -> AbstractAsyncContextManager[Any]: """ Create a session for the tool. """ @@ -144,16 +147,21 @@ def get_tool_description(self, tool_name: str): return self.harmony_tool_descriptions.get(tool_name) @asynccontextmanager - async def new_session(self, tool_name: str, session_id: str): + async def new_session(self, + tool_name: str, + session_id: str, + headers: Optional[dict[str, str]] = None): from mcp import ClientSession from mcp.client.sse import sse_client url = self.urls.get(tool_name) - headers = {"x-session-id": session_id} + request_headers = {"x-session-id": session_id} + if headers is not None: + request_headers.update(headers) if not url: raise KeyError(f"Tool '{tool_name}' is not supported") - async with sse_client(url=url, - headers=headers) as streams, ClientSession( - *streams) as session: + async with sse_client( + url=url, headers=request_headers) as streams, ClientSession( + *streams) as session: await session.initialize() yield session @@ -189,7 +197,10 @@ def get_tool_description(self, raise ValueError(f"Unknown tool {tool_name}") @asynccontextmanager - async def new_session(self, tool_name: str, session_id: str): + async def new_session(self, + tool_name: str, + session_id: str, + headers: Optional[dict[str, str]] = None): if tool_name not in self.tools: raise KeyError(f"Tool '{tool_name}' is not supported") yield self.tools[tool_name] diff --git a/vllm/envs.py b/vllm/envs.py index eaee2f6cc771..ee5efff8bcd9 100755 --- a/vllm/envs.py +++ b/vllm/envs.py @@ -185,11 +185,11 @@ VLLM_ALLREDUCE_USE_SYMM_MEM: bool = False VLLM_TUNED_CONFIG_FOLDER: Optional[str] = None VLLM_DISABLE_PAD_FOR_CUDAGRAPH: bool = False - VLLM_GPT_OSS_USE_CONTAINER_TOOL: bool = False VLLM_GPT_OSS_HARMONY_SYSTEM_INSTRUCTIONS: bool = False VLLM_CUSTOM_SCOPES_FOR_PROFILING: bool = False VLLM_KV_EVENTS_USE_INT_BLOCK_HASHES: bool = True VLLM_OBJECT_STORAGE_SHM_BUFFER_NAME: str = "VLLM_OBJECT_STORAGE_SHM_BUFFER" + GPT_OSS_SYSTEM_TOOL_MCP_LABELS: list[str] = [] VLLM_PATTERN_MATCH_DEBUG: Optional[str] = None @@ -261,6 +261,58 @@ def _get_validated_env() -> Optional[str]: return _get_validated_env +def env_list_with_choices( + env_name: str, + default: list[str], + choices: Union[list[str], Callable[[], list[str]]], + case_sensitive: bool = True) -> Callable[[], list[str]]: + """ + Create a lambda that validates environment variable + containing comma-separated values against allowed choices + + Args: + env_name: Name of the environment variable + default: Default list of values if not set + choices: List of valid string options or callable that returns list + case_sensitive: Whether validation should be case sensitive + + Returns: + Lambda function for environment_variables + dict that returns list of strings + """ + + def _get_validated_env_list() -> list[str]: + value = os.getenv(env_name) + if value is None: + return default + + # Split comma-separated values and strip whitespace + values = [v.strip() for v in value.split(",") if v.strip()] + + if not values: + return default + + # Resolve choices if it's a callable (for lazy loading) + actual_choices = choices() if callable(choices) else choices + + # Validate each value + for val in values: + if not case_sensitive: + check_value = val.lower() + check_choices = [choice.lower() for choice in actual_choices] + else: + check_value = val + check_choices = actual_choices + + if check_value not in check_choices: + raise ValueError(f"Invalid value '{val}' in {env_name}. " + f"Valid options: {actual_choices}.") + + return values + + return _get_validated_env_list + + def get_vllm_port() -> Optional[int]: """Get the port from VLLM_PORT environment variable. @@ -1320,10 +1372,6 @@ def get_vllm_port() -> Optional[int]: "VLLM_TUNED_CONFIG_FOLDER": lambda: os.getenv("VLLM_TUNED_CONFIG_FOLDER", None), - # Allows vllm use container tool - "VLLM_GPT_OSS_USE_CONTAINER_TOOL": - lambda: bool(int(os.getenv("VLLM_GPT_OSS_USE_CONTAINER_TOOL", "0"))), - # Allows harmony instructions to be injected on system messages "VLLM_GPT_OSS_HARMONY_SYSTEM_INSTRUCTIONS": lambda: bool( @@ -1343,6 +1391,14 @@ def get_vllm_port() -> Optional[int]: "VLLM_OBJECT_STORAGE_SHM_BUFFER_NAME": lambda: os.getenv("VLLM_OBJECT_STORAGE_SHM_BUFFER_NAME", "VLLM_OBJECT_STORAGE_SHM_BUFFER"), + + # Valid values are container,code_interpreter,web_search_preview + # ex GPT_OSS_SYSTEM_TOOL_MCP_LABELS=container,code_interpreter + "GPT_OSS_SYSTEM_TOOL_MCP_LABELS": + env_list_with_choices("GPT_OSS_SYSTEM_TOOL_MCP_LABELS", [], + ["container", + "code_interpreter", + "web_search_preview"]), } # --8<-- [end:env-vars-definition] From d588cd24061011f76da721c89f9e2171a2b2c4c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luka=20Govedi=C4=8D?= Date: Mon, 22 Sep 2025 20:07:43 -0400 Subject: [PATCH 244/518] [Bugfix] fix custom op test (#25429) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Luka Govedič --- .../model_executor/test_enabled_custom_ops.py | 22 ++++++++++--------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/tests/model_executor/test_enabled_custom_ops.py b/tests/model_executor/test_enabled_custom_ops.py index 86139d598582..92ce10a9efc0 100644 --- a/tests/model_executor/test_enabled_custom_ops.py +++ b/tests/model_executor/test_enabled_custom_ops.py @@ -1,5 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project +from typing import Optional import pytest import torch @@ -34,15 +35,15 @@ class Relu3(ReLUSquaredActivation): [ # Default values based on compile level # - All by default (no Inductor compilation) - ("", 0, False, [True] * 4, True), - ("", 1, True, [True] * 4, True), - ("", 2, False, [True] * 4, True), + (None, 0, False, [True] * 4, True), + (None, 1, True, [True] * 4, True), + (None, 2, False, [True] * 4, True), # - None by default (with Inductor) - ("", 3, True, [False] * 4, False), - ("", 4, True, [False] * 4, False), + (None, 3, True, [False] * 4, False), + (None, 4, True, [False] * 4, False), # - All by default (without Inductor) - ("", 3, False, [True] * 4, True), - ("", 4, False, [True] * 4, True), + (None, 3, False, [True] * 4, True), + (None, 4, False, [True] * 4, True), # Explicitly enabling/disabling # # Default: all @@ -54,7 +55,7 @@ class Relu3(ReLUSquaredActivation): # All but SiluAndMul ("all,-silu_and_mul", 2, True, [1, 0, 1, 1], True), # All but ReLU3 (even if ReLU2 is on) - ("-relu3,relu2", 3, False, [1, 1, 1, 0], True), + ("-relu3,+relu2", 3, False, [1, 1, 1, 0], True), # RMSNorm and SiluAndMul ("none,-relu3,+rms_norm,+silu_and_mul", 4, False, [1, 1, 0, 0], False), # All but RMSNorm @@ -67,12 +68,13 @@ class Relu3(ReLUSquaredActivation): # All but RMSNorm ("all,-rms_norm", 4, True, [0, 1, 1, 1], True), ]) -def test_enabled_ops(env: str, torch_level: int, use_inductor: bool, +def test_enabled_ops(env: Optional[str], torch_level: int, use_inductor: bool, ops_enabled: list[int], default_on: bool): + custom_ops = env.split(',') if env else [] vllm_config = VllmConfig( compilation_config=CompilationConfig(use_inductor=bool(use_inductor), level=torch_level, - custom_ops=env.split(","))) + custom_ops=custom_ops)) with set_current_vllm_config(vllm_config): assert CustomOp.default_on() == default_on From f31ff874607c311dda7aebe52f73c20f5cad923d Mon Sep 17 00:00:00 2001 From: Russell Bryant Date: Mon, 22 Sep 2025 20:09:52 -0400 Subject: [PATCH 245/518] [Core] Drop overly aggressive whisper assertion (#25408) Signed-off-by: Russell Bryant --- vllm/v1/core/sched/scheduler.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/vllm/v1/core/sched/scheduler.py b/vllm/v1/core/sched/scheduler.py index ef77d9e2d3ff..7fc4776b0261 100644 --- a/vllm/v1/core/sched/scheduler.py +++ b/vllm/v1/core/sched/scheduler.py @@ -463,10 +463,6 @@ def schedule(self) -> SchedulerOutput: # always padded to the maximum length. If we support other # encoder-decoder models, this will need to be updated if we # want to only allocate what is needed. - assert ("whisper" - in self.vllm_config.model_config.model.lower()), ( - "Whisper is the only supported " - "encoder-decoder model.") num_encoder_tokens =\ self.scheduler_config.max_num_encoder_input_tokens else: From 090197034faf3b193c4467cedeb9281e3078892d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicol=C3=B2=20Lucchesi?= Date: Tue, 23 Sep 2025 02:10:59 +0200 Subject: [PATCH 246/518] [Bugfix] Fix missing `clear_connector_metadata` (#25397) Signed-off-by: NickLucche --- .../unit/test_kv_connector_lifecyle.py | 59 +++++++++++++++++++ .../worker/kv_connector_model_runner_mixin.py | 1 + 2 files changed, 60 insertions(+) create mode 100644 tests/v1/kv_connector/unit/test_kv_connector_lifecyle.py diff --git a/tests/v1/kv_connector/unit/test_kv_connector_lifecyle.py b/tests/v1/kv_connector/unit/test_kv_connector_lifecyle.py new file mode 100644 index 000000000000..fe6296cf12ea --- /dev/null +++ b/tests/v1/kv_connector/unit/test_kv_connector_lifecyle.py @@ -0,0 +1,59 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project + +from vllm.distributed.kv_transfer.kv_connector.v1.shared_storage_connector import ( # noqa: E501 + SharedStorageConnectorMetadata) +from vllm.distributed.kv_transfer.kv_transfer_state import ( + ensure_kv_transfer_initialized, get_kv_transfer_group) +from vllm.v1.core.sched.output import CachedRequestData, SchedulerOutput +from vllm.v1.worker.kv_connector_model_runner_mixin import ( + KVConnectorModelRunnerMixin) + +# Importing utils registers TestSharedStorageConnector with the factory +from .utils import create_vllm_config + + +def _make_empty_scheduler_output(): + return SchedulerOutput( + scheduled_new_reqs=[], + scheduled_cached_reqs=CachedRequestData.make_empty(), + num_scheduled_tokens={}, + total_num_scheduled_tokens=0, + scheduled_spec_decode_tokens={}, + scheduled_encoder_inputs={}, + num_common_prefix_blocks=[], + finished_req_ids=set(), + free_encoder_mm_hashes=[], + structured_output_request_ids={}, + grammar_bitmask=None, + kv_connector_metadata=SharedStorageConnectorMetadata(), + ) + + +def test_kv_connector_mixin_clears_metadata(): + vllm_config = create_vllm_config() + vllm_config.kv_transfer_config.kv_connector = "TestSharedStorageConnector" + vllm_config.kv_transfer_config.kv_role = "kv_both" + vllm_config.kv_transfer_config.kv_connector_extra_config["name"] = ("unit") + + # Initialize the global connector instance + ensure_kv_transfer_initialized(vllm_config) + + try: + # Minimal scheduler output with empty metadata; mixin should still + # bind/clear metadata even if no loads happen + scheduler_output = _make_empty_scheduler_output() + + # Invoke the no-forward path which uses the mixin context manager + KVConnectorModelRunnerMixin.kv_connector_no_forward( + scheduler_output, vllm_config) + + # Verify clear_connector_metadata was called on the connector + connector = get_kv_transfer_group() + assert connector._connector_metadata is None + # Test connector wrapper records method calls + assert connector.call_record.get("bind_connector_metadata", 0) == 1 + assert connector.call_record.get("clear_connector_metadata", 0) == 1 + finally: + # Ensure we clean up the global connector between tests + KVConnectorModelRunnerMixin.ensure_kv_transfer_shutdown() diff --git a/vllm/v1/worker/kv_connector_model_runner_mixin.py b/vllm/v1/worker/kv_connector_model_runner_mixin.py index 016a90c196ba..7eaff924ecc1 100644 --- a/vllm/v1/worker/kv_connector_model_runner_mixin.py +++ b/vllm/v1/worker/kv_connector_model_runner_mixin.py @@ -123,6 +123,7 @@ def _get_kv_connector_output( output.kv_connector_stats = KVConnectorModelRunnerMixin.\ get_kv_connector_stats() + kv_connector.clear_connector_metadata() @staticmethod def get_kv_connector_stats() -> Optional[KVConnectorStats]: From ac0048c0aea977e520aa19f3b9155fc19d696df7 Mon Sep 17 00:00:00 2001 From: Matthew Bonanni Date: Mon, 22 Sep 2025 20:26:17 -0400 Subject: [PATCH 247/518] [BugFix] [DP/EP] Fix slow execution when BS <= DP (#25407) Signed-off-by: Matthew Bonanni Co-authored-by: Robert Shaw <114415538+robertgshaw2-redhat@users.noreply.github.com> Co-authored-by: Chris Bamford --- vllm/v1/worker/gpu_model_runner.py | 7 ++++--- vllm/v1/worker/gpu_worker.py | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index b0cd0f413307..89b9a3c34f2a 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -55,7 +55,7 @@ from vllm.sequence import IntermediateTensors, PoolerOutput from vllm.tasks import GenerationTask, PoolingTask, SupportedTask from vllm.utils import (STR_DTYPE_TO_TORCH_DTYPE, DeviceMemoryProfiler, - GiB_bytes, check_use_alibi, get_dtype_size, + GiB_bytes, cdiv, check_use_alibi, get_dtype_size, is_pin_memory_available, length_from_prompt_token_ids_or_embeds, round_up, supports_dynamo) @@ -2913,12 +2913,13 @@ def _dummy_run( # Note: Overriding max_query_len to be the prefill tokens max_query_len = num_prefill_tokens elif uniform_decode: - num_reqs = num_tokens // max_query_len + assert not create_mixed_batch + num_reqs = cdiv(num_tokens, max_query_len) assert num_reqs <= max_num_reqs, \ "Do not capture num_reqs > max_num_reqs for uniform batch" num_scheduled_tokens_list = [max_query_len] * num_reqs if num_tokens % max_query_len != 0: - num_scheduled_tokens_list[-1] += num_tokens % max_query_len + num_scheduled_tokens_list[-1] = num_tokens % max_query_len else: num_reqs = min(num_tokens, max_num_reqs) min_tokens_per_req = num_tokens // num_reqs diff --git a/vllm/v1/worker/gpu_worker.py b/vllm/v1/worker/gpu_worker.py index 8b1e1bb8f45c..ca8734d28b45 100644 --- a/vllm/v1/worker/gpu_worker.py +++ b/vllm/v1/worker/gpu_worker.py @@ -487,7 +487,7 @@ def profile(self, is_start: bool = True): sort_by="self_cuda_time_total")) def execute_dummy_batch(self) -> None: - self.model_runner._dummy_run(1) + self.model_runner._dummy_run(1, uniform_decode=True) def add_lora(self, lora_request: LoRARequest) -> bool: return self.model_runner.add_lora(lora_request) From 0b7bed9c386d41945b990d3ac5311ac7d9c8b499 Mon Sep 17 00:00:00 2001 From: Alexander Matveev <59768536+alexm-redhat@users.noreply.github.com> Date: Mon, 22 Sep 2025 21:20:53 -0400 Subject: [PATCH 248/518] [Performance] Remove input pads in cutlass_mla and optimize v_proj output handling (#25184) Signed-off-by: Alexander Matveev --- vllm/v1/attention/backends/mla/common.py | 45 ++++++++++++++++--- vllm/v1/attention/backends/mla/cutlass_mla.py | 30 +++++++------ 2 files changed, 55 insertions(+), 20 deletions(-) diff --git a/vllm/v1/attention/backends/mla/common.py b/vllm/v1/attention/backends/mla/common.py index 5b307810de93..9bca87c81d17 100755 --- a/vllm/v1/attention/backends/mla/common.py +++ b/vllm/v1/attention/backends/mla/common.py @@ -942,6 +942,7 @@ def __init__( qk_head_dim: int, v_head_dim: int, kv_b_proj: ColumnParallelLinear, + q_pad_num_heads: Optional[int] = None, ) -> None: if kv_sharing_target_layer_name is not None: raise NotImplementedError("KV sharing is not supported for MLA") @@ -959,6 +960,7 @@ def __init__( self.qk_head_dim = qk_head_dim self.v_head_dim = v_head_dim self.kv_b_proj = kv_b_proj + self.q_pad_num_heads = q_pad_num_heads if use_flashinfer_prefill(): logger.debug_once("Using FlashInfer prefill for MLA") @@ -1134,7 +1136,7 @@ def _run_prefill_context_chunk_cudnn(self, True, #Indicates actual_seq_lens are on GPU or CPU. ) - def _v_up_proj(self, x): + def _v_up_proj(self, x: torch.Tensor, out: torch.Tensor): # Convert from (B, N, L) to (N, B, L) x = x.view(-1, self.num_heads, self.kv_lora_rank).transpose(0, 1) if is_rocm_aiter_fp8bmm_enabled(): @@ -1146,12 +1148,23 @@ def _v_up_proj(self, x): transpose_bm=True) # Convert from (B, N, V) to (B, N * V) x = x.reshape(-1, self.num_heads * self.v_head_dim) + # Copy result + out.copy_(x) else: + # Convert from (B, N * V) to (N, B, V) + out = out.view(-1, self.num_heads, self.v_head_dim).transpose(0, 1) + # Multiply (N, B, L) x (N, L, V) -> (N, B, V) - x = torch.bmm(x, self.W_UV) + torch.bmm(x, self.W_UV, out=out) # Reuse "out" to make it "hot" + # Convert from (N, B, V) to (B, N * V) - x = x.transpose(0, 1).reshape(-1, self.num_heads * self.v_head_dim) - return x + out_new = out.transpose(0, 1).reshape( + -1, self.num_heads * self.v_head_dim) + + # Adjust output buffer shape back to the original (B, N * V) + N, B, V = out.shape + out.resize_((B, N * V)) + out.copy_(out_new) # Copy result def process_weights_after_loading(self, act_dtype: torch.dtype): @@ -1559,6 +1572,15 @@ def forward( # Convert from (B, N, P) to (N, B, P) decode_q_nope = decode_q_nope.transpose(0, 1) + # Pads the head_dim if necessary (for the underlying kernel) + if self.q_pad_num_heads is not None: + B, N, L = decode_q_pe.shape + decode_pe_padded = decode_q_pe.new_empty( + (B, self.q_pad_num_heads, L)) + decode_pe_padded.resize_((B, N, L)) + decode_pe_padded.copy_(decode_q_pe) + decode_q_pe = decode_pe_padded + if is_rocm_aiter_fp8bmm_enabled(): # Multiply+Transpose (N, B, P)x(N, P, L)->(N, B, L)->(B, N, L) decode_ql_nope = aiter_triton_fp8_bmm(decode_q_nope, @@ -1567,8 +1589,19 @@ def forward( group_size=128, transpose_bm=True) else: + # Pads the head_dim if necessary (for the underlying kernel) + N, B, P = decode_q_nope.shape + _, _, L = self.W_UK_T.shape + if self.q_pad_num_heads is not None: + decode_ql_nope = decode_q_nope.new_empty( + (self.q_pad_num_heads, B, L)) + decode_ql_nope.resize_((N, B, L)) + + else: + decode_ql_nope = decode_q_nope.new_empty((N, B, L)) + # Multiply (N, B, P) x (N, P, L) -> (N, B, L) - decode_ql_nope = torch.bmm(decode_q_nope, self.W_UK_T) + torch.bmm(decode_q_nope, self.W_UK_T, out=decode_ql_nope) # Convert from (N, B, L) to (B, N, L) decode_ql_nope = decode_ql_nope.transpose(0, 1) @@ -1603,5 +1636,5 @@ def forward( attn_out = cp_lse_ag_out_rs(attn_out, lse, get_dcp_group()) # v_up projection - output[:num_decode_tokens] = self._v_up_proj(attn_out) + self._v_up_proj(attn_out, out=output[:num_decode_tokens]) return output_padded diff --git a/vllm/v1/attention/backends/mla/cutlass_mla.py b/vllm/v1/attention/backends/mla/cutlass_mla.py index ae534f3207b5..d44e20f2cb6b 100644 --- a/vllm/v1/attention/backends/mla/cutlass_mla.py +++ b/vllm/v1/attention/backends/mla/cutlass_mla.py @@ -74,6 +74,8 @@ def ensure_size(self, attn_metadata: MLACommonMetadata, g_sm100_workspace = SM100Workspace(128 * 1024 * 1024) # 128MB +MAX_HEADS = 128 + class CutlassMLAImpl(MLACommonImpl[MLACommonMetadata]): can_return_lse_for_decode: bool = True @@ -92,10 +94,18 @@ def __init__( kv_sharing_target_layer_name: Optional[str], # MLA Specific Arguments **mla_args) -> None: - super().__init__(num_heads, head_size, scale, num_kv_heads, - alibi_slopes, sliding_window, kv_cache_dtype, - logits_soft_cap, attn_type, - kv_sharing_target_layer_name, **mla_args) + super().__init__(num_heads, + head_size, + scale, + num_kv_heads, + alibi_slopes, + sliding_window, + kv_cache_dtype, + logits_soft_cap, + attn_type, + kv_sharing_target_layer_name, + q_pad_num_heads=MAX_HEADS, + **mla_args) unsupported_features = [alibi_slopes, sliding_window, logits_soft_cap] if any(unsupported_features): @@ -157,14 +167,6 @@ def _sm100_cutlass_mla_decode( MAX_HEADS = 128 assert H <= MAX_HEADS, f"H must be <= {MAX_HEADS}, but got {H}" - if H < MAX_HEADS: - q_nope_padded = q_nope.new_empty((B_q, MAX_HEADS, D_q_nope)) - q_nope_padded[:, :H] = q_nope - q_nope = q_nope_padded - - q_pe_padded = q_pe.new_empty((B_q, MAX_HEADS, D_q_pe)) - q_pe_padded[:, :H] = q_pe - q_pe = q_pe_padded assert len(page_table.shape) == 2 B_block_table, block_num = page_table.shape @@ -206,9 +208,9 @@ def _sm100_cutlass_mla_decode( ) if H < MAX_HEADS: + # Extract the subsets of the outputs + lse = lse[:, :H] if self.need_to_return_lse_for_decode else lse out = out[:, :H] - if self.need_to_return_lse_for_decode: - lse = lse[:, :H].contiguous() return out, lse From 9949aa2ef1cc77721dea9077e0f76b9deb6fd066 Mon Sep 17 00:00:00 2001 From: Wentao Ye <44945378+yewentao256@users.noreply.github.com> Date: Mon, 22 Sep 2025 21:42:45 -0400 Subject: [PATCH 249/518] [Perf] Apply torch.compile for `per_block_cast_to_fp8` (#24611) Signed-off-by: yewentao256 --- vllm/utils/deep_gemm.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm/utils/deep_gemm.py b/vllm/utils/deep_gemm.py index 38d92f01192b..4083193d7650 100644 --- a/vllm/utils/deep_gemm.py +++ b/vllm/utils/deep_gemm.py @@ -135,7 +135,7 @@ def _align(x: int, y: int) -> int: # Taken from https://github.com/deepseek-ai/DeepGEMM/blob/dd6ed14acbc7445dcef224248a77ab4d22b5f240/deep_gemm/utils/math.py#L38 -# TODO(wentao): optimize this function, using triton or cuda kernel +@torch.compile(dynamic=True, backend=current_platform.simple_compile_backend) def per_block_cast_to_fp8( x: torch.Tensor, block_size: list[int] = DEFAULT_BLOCK_SIZE, @@ -187,4 +187,4 @@ def should_use_deepgemm_for_fp8_linear(output_dtype: torch.dtype, "is_deep_gemm_e8m0_used", "is_deep_gemm_supported", "should_use_deepgemm_for_fp8_linear", -] +] \ No newline at end of file From 6fa78d8f23b88da47ce6e13815e5e82226f2af00 Mon Sep 17 00:00:00 2001 From: Isotr0py Date: Tue, 23 Sep 2025 09:48:12 +0800 Subject: [PATCH 250/518] [V0 deprecation] Remove platform v1 controling interface (#25410) Signed-off-by: Isotr0py --- tests/v1/test_async_llm_dp.py | 5 ----- vllm/engine/arg_utils.py | 27 --------------------------- vllm/platforms/cpu.py | 17 ----------------- vllm/platforms/cuda.py | 4 ---- vllm/platforms/interface.py | 14 -------------- vllm/platforms/rocm.py | 5 ----- vllm/platforms/tpu.py | 5 ----- vllm/platforms/xpu.py | 4 ---- 8 files changed, 81 deletions(-) diff --git a/tests/v1/test_async_llm_dp.py b/tests/v1/test_async_llm_dp.py index 32da58011be9..cef0f362cff8 100644 --- a/tests/v1/test_async_llm_dp.py +++ b/tests/v1/test_async_llm_dp.py @@ -13,7 +13,6 @@ from vllm.config import VllmConfig from vllm.engine.arg_utils import AsyncEngineArgs from vllm.inputs import PromptType -from vllm.platforms import current_platform from vllm.sampling_params import RequestOutputKind from vllm.v1.engine.async_llm import AsyncLLM from vllm.v1.engine.core_client import DPAsyncMPClient @@ -29,10 +28,6 @@ data_parallel_size=DP_SIZE, ) -if not current_platform.supports_v1(engine_args.create_model_config()): - pytest.skip(reason="Requires V1-supporting platform.", - allow_module_level=True) - async def generate( engine: AsyncLLM, diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index d4d801b155e1..17df82c081ae 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -1502,12 +1502,6 @@ def _is_v1_supported_oracle(self, model_config: ModelConfig) -> bool: _raise_or_fallback(feature_name=name, recommend_to_remove=True) return False - # Platforms must decide if they can support v1 for this model - if not current_platform.supports_v1(model_config=model_config): - _raise_or_fallback( - feature_name=f"device type={current_platform.device_type}", - recommend_to_remove=False) - return False ############################################################# # Experimental Features - allow users to opt in. @@ -1524,12 +1518,6 @@ def _is_v1_supported_oracle(self, model_config: ModelConfig) -> bool: recommend_to_remove=False) return False - # The platform may be supported on V1, but off by default for now. - if not current_platform.default_v1( # noqa: SIM103 - model_config=model_config) and _warn_or_fallback( - current_platform.device_name): - return False - if (current_platform.is_cpu() and model_config.get_sliding_window() is not None): _raise_or_fallback(feature_name="sliding window (CPU backend)", @@ -1796,21 +1784,6 @@ def _raise_or_fallback(feature_name: str, recommend_to_remove: bool): logger.warning(msg) -def _warn_or_fallback(feature_name: str) -> bool: - if envs.is_set("VLLM_USE_V1") and envs.VLLM_USE_V1: - logger.warning( - "Detected VLLM_USE_V1=1 with %s. Usage should " - "be considered experimental. Please report any " - "issues on Github.", feature_name) - should_exit = False - else: - logger.info( - "%s is experimental on VLLM_USE_V1=1. " - "Falling back to V0 Engine.", feature_name) - should_exit = True - return should_exit - - def human_readable_int(value): """Parse human-readable integers like '1k', '2M', etc. Including decimal values with decimal multipliers. diff --git a/vllm/platforms/cpu.py b/vllm/platforms/cpu.py index cd41832bc2ea..1e15dc6a91aa 100644 --- a/vllm/platforms/cpu.py +++ b/vllm/platforms/cpu.py @@ -328,23 +328,6 @@ def get_device_communicator_cls(cls) -> str: def supports_structured_output(cls) -> bool: return True - @classmethod - def supports_v1(cls, model_config) -> bool: - """Returns whether the current platform can support v1 for the supplied - model configuration. - """ - return True - - @classmethod - def default_v1(cls, model_config) -> bool: - """Returns whether the current platform can use v1 by default for the - supplied model configuration. - """ - arch = cls.get_cpu_architecture() - return (cls.supports_v1(model_config) - and arch in (CpuArchEnum.X86, CpuArchEnum.POWERPC, - CpuArchEnum.ARM, CpuArchEnum.S390X)) - @classmethod def opaque_attention_op(cls) -> bool: return True diff --git a/vllm/platforms/cuda.py b/vllm/platforms/cuda.py index b10bc03ee16c..d5f3599acb1c 100644 --- a/vllm/platforms/cuda.py +++ b/vllm/platforms/cuda.py @@ -384,10 +384,6 @@ def get_device_communicator_cls(cls) -> str: def supports_fp8(cls) -> bool: return cls.has_device_capability(89) - @classmethod - def supports_v1(cls, model_config: "ModelConfig") -> bool: - return True - @classmethod def use_custom_allreduce(cls) -> bool: return True diff --git a/vllm/platforms/interface.py b/vllm/platforms/interface.py index cad04ea14c01..3f13ae72fe4d 100644 --- a/vllm/platforms/interface.py +++ b/vllm/platforms/interface.py @@ -482,20 +482,6 @@ def use_all_gather(cls) -> bool: or parallel_config.distributed_executor_backend == "external_launcher") - @classmethod - def supports_v1(cls, model_config: ModelConfig) -> bool: - """Returns whether the current platform can support v1 for the supplied - model configuration. - """ - return False - - @classmethod - def default_v1(cls, model_config: ModelConfig) -> bool: - """ - Returns whether the current platform supports v1 by default. - """ - return cls.supports_v1(model_config) - @classmethod def use_custom_allreduce(cls) -> bool: """ diff --git a/vllm/platforms/rocm.py b/vllm/platforms/rocm.py index 6a49bd4a3386..878718489fa8 100644 --- a/vllm/platforms/rocm.py +++ b/vllm/platforms/rocm.py @@ -396,11 +396,6 @@ def fp8_dtype(cls) -> torch.dtype: else: return torch.float8_e4m3fn - @classmethod - def supports_v1(cls, model_config: "ModelConfig") -> bool: - # V1 support on AMD gpus is experimental - return True - @classmethod def use_custom_allreduce(cls) -> bool: # We only enable custom allreduce for MI300 series diff --git a/vllm/platforms/tpu.py b/vllm/platforms/tpu.py index 9852d948bc4b..e4c73b1bae6f 100644 --- a/vllm/platforms/tpu.py +++ b/vllm/platforms/tpu.py @@ -174,11 +174,6 @@ def get_device_communicator_cls(cls) -> str: def use_all_gather(cls) -> bool: return True - @classmethod - def supports_v1(cls, model_config: ModelConfig) -> bool: - # V1 support on TPU is experimental - return True - @classmethod def validate_request( cls, diff --git a/vllm/platforms/xpu.py b/vllm/platforms/xpu.py index eb591ae4454e..034e039006c4 100644 --- a/vllm/platforms/xpu.py +++ b/vllm/platforms/xpu.py @@ -194,10 +194,6 @@ def is_data_center_gpu(cls) -> bool: def get_device_communicator_cls(cls) -> str: return "vllm.distributed.device_communicators.xpu_communicator.XpuCommunicator" # noqa - @classmethod - def supports_v1(cls, model_config: ModelConfig) -> bool: - return True - @classmethod def device_count(cls) -> int: return torch.xpu.device_count() From c625f9043c5beb5921e94c6a5b3ac18372bab4db Mon Sep 17 00:00:00 2001 From: Isotr0py Date: Tue, 23 Sep 2025 09:52:09 +0800 Subject: [PATCH 251/518] [V0 deprecation] Remove `_set_default_args_v0` function (#25409) Signed-off-by: Isotr0py --- vllm/engine/arg_utils.py | 83 ++++++---------------------------------- 1 file changed, 11 insertions(+), 72 deletions(-) diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index 17df82c081ae..8c7a1b413cdb 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -1147,20 +1147,15 @@ def create_engine_config( else: envs.set_vllm_use_v1(use_v1) - # Set default arguments for V0 or V1 Engine. - if use_v1: - self._set_default_args_v1(usage_context, model_config) - # Disable chunked prefill for POWER (ppc64le)/ARM/s390x CPUs in V1 - if current_platform.is_cpu( - ) and current_platform.get_cpu_architecture() in ( - CpuArchEnum.POWERPC, CpuArchEnum.S390X, CpuArchEnum.ARM): - logger.info( - "Chunked prefill is not supported for ARM and POWER " - "and S390X CPUs; " - "disabling it for V1 backend.") - self.enable_chunked_prefill = False - else: - self._set_default_args_v0(model_config) + # Set default arguments for V1 Engine. + self._set_default_args(usage_context, model_config) + # Disable chunked prefill for POWER (ppc64le)/ARM/s390x CPUs in V1 + if current_platform.is_cpu() and current_platform.get_cpu_architecture( + ) in (CpuArchEnum.POWERPC, CpuArchEnum.S390X, CpuArchEnum.ARM): + logger.info("Chunked prefill is not supported for ARM and POWER " + "and S390X CPUs; " + "disabling it for V1 backend.") + self.enable_chunked_prefill = False assert self.enable_chunked_prefill is not None sliding_window: Optional[int] = None @@ -1528,64 +1523,8 @@ def _is_v1_supported_oracle(self, model_config: ModelConfig) -> bool: return True - def _set_default_args_v0(self, model_config: ModelConfig) -> None: - """Set Default Arguments for V0 Engine.""" - - max_model_len = model_config.max_model_len - use_long_context = max_model_len > 32768 - if self.enable_chunked_prefill is None: - # Chunked prefill not supported for Multimodal or MLA in V0. - if model_config.is_multimodal_model or model_config.use_mla: - self.enable_chunked_prefill = False - - # Enable chunked prefill by default for long context (> 32K) - # models to avoid OOM errors in initial memory profiling phase. - elif use_long_context: - is_gpu = current_platform.is_cuda() - use_sliding_window = (model_config.get_sliding_window() - is not None) - use_spec_decode = self.speculative_config is not None - - if (is_gpu and not use_sliding_window and not use_spec_decode - and not self.enable_lora): - self.enable_chunked_prefill = True - logger.warning( - "Chunked prefill is enabled by default for models " - "with max_model_len > 32K. Chunked prefill might " - "not work with some features or models. If you " - "encounter any issues, please disable by launching " - "with --enable-chunked-prefill=False.") - - if self.enable_chunked_prefill is None: - self.enable_chunked_prefill = False - - if not self.enable_chunked_prefill and use_long_context: - logger.warning( - "The model has a long context length (%s). This may cause" - "OOM during the initial memory profiling phase, or result " - "in low performance due to small KV cache size. Consider " - "setting --max-model-len to a smaller value.", max_model_len) - - # Disable prefix caching for multimodal models for VLLM_V0. - if self.enable_prefix_caching and model_config.is_multimodal_model: - logger.warning( - "--enable-prefix-caching is not supported for multimodal " - "models in V0 and has been disabled.") - self.enable_prefix_caching = False - - if self.enable_prompt_embeds: - logger.warning( - "--enable-prompt-embeds and --enable-prefix-caching " - "are not supported together in V0. Prefix caching has " - "been disabled.") - self.enable_prefix_caching = False - - # Set max_num_seqs to 256 for VLLM_V0. - if self.max_num_seqs is None: - self.max_num_seqs = 256 - - def _set_default_args_v1(self, usage_context: UsageContext, - model_config: ModelConfig) -> None: + def _set_default_args(self, usage_context: UsageContext, + model_config: ModelConfig) -> None: """Set Default Arguments for V1 Engine.""" # V1 always uses chunked prefills and prefix caching From 4741239db7b75949d577f99eb1c3a9e463dc77f3 Mon Sep 17 00:00:00 2001 From: Wentao Ye <44945378+yewentao256@users.noreply.github.com> Date: Mon, 22 Sep 2025 22:04:15 -0400 Subject: [PATCH 252/518] [Bug] Fix Long Context OOM Issue (#25290) Signed-off-by: yewentao256 --- vllm/v1/attention/backends/mla/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/v1/attention/backends/mla/common.py b/vllm/v1/attention/backends/mla/common.py index 9bca87c81d17..a177117a50bd 100755 --- a/vllm/v1/attention/backends/mla/common.py +++ b/vllm/v1/attention/backends/mla/common.py @@ -481,7 +481,7 @@ def __init__(self, # which would result in up-projected context being # 2*(192*128)*(64*1024) = 3gb # (assuming 192 QK head dim, 128 heads, and fp16) - 128 * 1024) + 64 * 1024) assert self.chunked_prefill_workspace_size >= \ scheduler_config.max_num_seqs * cache_config.block_size if self.dcp_world_size > 1: From fc97733da801f1d6464e706f41ebd5180f2abb51 Mon Sep 17 00:00:00 2001 From: JJJYmmm <92386084+JJJYmmm@users.noreply.github.com> Date: Tue, 23 Sep 2025 11:04:47 +0800 Subject: [PATCH 253/518] [feat] Support MRoPE + YaRN (#25384) Signed-off-by: liuye.hj Co-authored-by: liuye.hj --- .../layers/rotary_embedding/__init__.py | 22 ++++++++++--- .../layers/rotary_embedding/mrope.py | 31 +++++++++++++++++++ 2 files changed, 48 insertions(+), 5 deletions(-) diff --git a/vllm/model_executor/layers/rotary_embedding/__init__.py b/vllm/model_executor/layers/rotary_embedding/__init__.py index c9653aa9e440..3576368981c7 100644 --- a/vllm/model_executor/layers/rotary_embedding/__init__.py +++ b/vllm/model_executor/layers/rotary_embedding/__init__.py @@ -153,11 +153,23 @@ def get_rope( if k in ("extrapolation_factor", "attn_factor", "beta_fast", "beta_slow") } - rotary_emb = YaRNScalingRotaryEmbedding(head_size, rotary_dim, - original_max_position, - base, is_neox_style, - scaling_factor, dtype, - **extra_kwargs) + if "mrope_section" in rope_scaling: + rotary_emb = MRotaryEmbedding( + head_size, + rotary_dim, + original_max_position, + base, + is_neox_style, + dtype, + mrope_section=rope_scaling["mrope_section"], + mrope_interleaved=rope_scaling.get("mrope_interleaved", + False), + scaling_factor=scaling_factor, + **extra_kwargs) + else: + rotary_emb = YaRNScalingRotaryEmbedding( + head_size, rotary_dim, original_max_position, base, + is_neox_style, scaling_factor, dtype, **extra_kwargs) elif scaling_type == "deepseek_yarn": scaling_factor = rope_scaling["factor"] original_max_position = rope_scaling[ diff --git a/vllm/model_executor/layers/rotary_embedding/mrope.py b/vllm/model_executor/layers/rotary_embedding/mrope.py index 17d04a1ad715..9bf0d6bd15e7 100644 --- a/vllm/model_executor/layers/rotary_embedding/mrope.py +++ b/vllm/model_executor/layers/rotary_embedding/mrope.py @@ -12,6 +12,7 @@ from .base import RotaryEmbedding from .common import apply_rotary_emb_dispatch +from .yarn_scaling_rope import YaRNScalingRotaryEmbedding, yarn_get_mscale @triton.jit @@ -213,7 +214,27 @@ def __init__( dtype: torch.dtype, mrope_section: Optional[list[int]] = None, mrope_interleaved: bool = False, + # YaRN parameters. + *, + scaling_factor: Optional[float] = None, + extrapolation_factor: float = 1, + attn_factor: float = 1, + beta_fast: int = 32, + beta_slow: int = 1, ) -> None: + + self.scaling_factor = scaling_factor + self.extrapolation_factor = extrapolation_factor + self.attn_factor = attn_factor + self.beta_fast = beta_fast + self.beta_slow = beta_slow + if self.scaling_factor is not None: + # Get n-d magnitude scaling corrected for interpolation + self.mscale = float( + yarn_get_mscale(self.scaling_factor) * attn_factor) + else: + self.mscale = 1.0 + # In Qwen2.5-VL, the maximum index value is related to the duration of # the input video. We enlarge max_position_embeddings to 4 times to get # a larger the cos and sin cache. @@ -226,6 +247,16 @@ def __init__( if self.mrope_section: assert sum(self.mrope_section) == rotary_dim // 2 + def _compute_inv_freq(self, base: float) -> torch.Tensor: + if self.scaling_factor is None: + return super()._compute_inv_freq(base) + return YaRNScalingRotaryEmbedding._compute_inv_freq(self, base) + + def _compute_cos_sin_cache(self) -> torch.Tensor: + if self.scaling_factor is None: + return super()._compute_cos_sin_cache() + return YaRNScalingRotaryEmbedding._compute_cos_sin_cache(self) + def forward_native( self, positions: torch.Tensor, From f225ea7dd98e9f29752e5c032cd4a8ee1d712f16 Mon Sep 17 00:00:00 2001 From: Kunshang Ji Date: Tue, 23 Sep 2025 11:09:00 +0800 Subject: [PATCH 254/518] [XPU] Fix `compile_size` is `None` case. (#25433) Signed-off-by: Kunshang Ji --- vllm/platforms/xpu.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/vllm/platforms/xpu.py b/vllm/platforms/xpu.py index 034e039006c4..af61db5e312a 100644 --- a/vllm/platforms/xpu.py +++ b/vllm/platforms/xpu.py @@ -113,6 +113,8 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None: # lazy import to avoid circular import from vllm.config import CompilationLevel, CUDAGraphMode compilation_config = vllm_config.compilation_config + if compilation_config.compile_sizes is None: + compilation_config.compile_sizes = [] assert compilation_config.cudagraph_mode == CUDAGraphMode.NONE, \ "CUDA graph mode should be NONE on XPU" From eea178398997b94b6df5560b597e2b7895b2ae3a Mon Sep 17 00:00:00 2001 From: Lucia Fang <116399278+luccafong@users.noreply.github.com> Date: Mon, 22 Sep 2025 20:21:48 -0700 Subject: [PATCH 255/518] [benchmarks]allow skip ready check for bench serve (#25420) Signed-off-by: Lu Fang Signed-off-by: Lucia Fang <116399278+luccafong@users.noreply.github.com> Co-authored-by: Lucia (Lu) Fang --- vllm/benchmarks/serve.py | 29 +++++++++++++++++------------ 1 file changed, 17 insertions(+), 12 deletions(-) diff --git a/vllm/benchmarks/serve.py b/vllm/benchmarks/serve.py index 7382782f1165..2a042802d0d5 100644 --- a/vllm/benchmarks/serve.py +++ b/vllm/benchmarks/serve.py @@ -531,18 +531,22 @@ async def benchmark( extra_body=extra_body, ) - test_output = await wait_for_endpoint( - request_func, - test_input, - session, - timeout_seconds=ready_check_timeout_sec, - ) - if not test_output.success: - raise ValueError( - "Initial test run failed - Please make sure benchmark arguments " - f"are correctly specified. Error: {test_output.error}") + if ready_check_timeout_sec > 0: + test_output = await wait_for_endpoint( + request_func, + test_input, + session, + timeout_seconds=ready_check_timeout_sec, + ) + if not test_output.success: + raise ValueError( + "Initial test run failed - Please make sure benchmark " + "arguments are correctly specified. " + f"Error: {test_output.error}") + else: + print("Initial test run completed. Starting main benchmark run...") else: - print("Initial test run completed. Starting main benchmark run...") + print("Skipping endpoint ready check.") if lora_modules: # For each input request, choose a LoRA module at random. @@ -1151,7 +1155,8 @@ def add_cli_args(parser: argparse.ArgumentParser): type=int, default=600, help="Maximum time to wait for the endpoint to become ready " - "in seconds (default: 600 seconds / 10 minutes).", + "in seconds (default: 600 seconds / 10 minutes). If set to 0, " + "the ready check will be skipped." ) From 78237e43bf3ac70f31b27684dc27d7eca0fa3677 Mon Sep 17 00:00:00 2001 From: Michael Goin Date: Mon, 22 Sep 2025 23:26:32 -0400 Subject: [PATCH 256/518] [Bugfix] Remove contiguous output req for context parallel MLA (#25414) Signed-off-by: Michael Goin --- vllm/attention/ops/common.py | 1 - 1 file changed, 1 deletion(-) diff --git a/vllm/attention/ops/common.py b/vllm/attention/ops/common.py index 189b57e8e8b8..6253e1e56b0f 100644 --- a/vllm/attention/ops/common.py +++ b/vllm/attention/ops/common.py @@ -134,6 +134,5 @@ def cp_lse_ag_out_rs(cp_attn_out: torch.Tensor, cp_attn_lse = cp_attn_lse.contiguous() lses = cp_group.all_gather(cp_attn_lse, dim=0).view_as(lses) out, _ = correct_attn_out(cp_attn_out, lses, cp_group.rank_in_group, ctx) - assert out.is_contiguous() out = cp_group.reduce_scatter(out, dim=1) return out From fafbe11af4f211abe02ef874f113bc28232eb0e8 Mon Sep 17 00:00:00 2001 From: Michael Yao Date: Tue, 23 Sep 2025 11:42:58 +0800 Subject: [PATCH 257/518] [Docs] Fix griffe warnings in vllm/lora/ops (#25369) Signed-off-by: windsonsea --- vllm/lora/ops/triton_ops/lora_kernel_metadata.py | 6 +++--- vllm/lora/ops/xla_ops/lora_ops.py | 1 - 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/vllm/lora/ops/triton_ops/lora_kernel_metadata.py b/vllm/lora/ops/triton_ops/lora_kernel_metadata.py index 39e647b9b88a..e27604728ed0 100644 --- a/vllm/lora/ops/triton_ops/lora_kernel_metadata.py +++ b/vllm/lora/ops/triton_ops/lora_kernel_metadata.py @@ -83,8 +83,8 @@ def prepare_tensors(self, token_lora_mapping: torch.Tensor) -> None: Prepare kernel metadata tensors for the current forward pass. Args: - token_lora_tensor (torch.Tensor): Tensor containing lora indices - for each input token. + token_lora_mapping (torch.Tensor): Tensor containing lora indices + for each input token. """ self._reset() @@ -136,7 +136,7 @@ def meta_args( Args: token_nums (int): Number of input tokens in the current forward - pass. + pass of the kernel. """ return ( self.token_lora_mapping[:token_nums], diff --git a/vllm/lora/ops/xla_ops/lora_ops.py b/vllm/lora/ops/xla_ops/lora_ops.py index 9118f3351ef0..29bfd5753a58 100644 --- a/vllm/lora/ops/xla_ops/lora_ops.py +++ b/vllm/lora/ops/xla_ops/lora_ops.py @@ -93,7 +93,6 @@ def bgmv_shrink( inputs (torch.Tensor): Input tensor of shape [num_tokens, hidden_size]. lora_b_weights (torch.Tensor): LoRA weights of shape [num_loras, lora_rank, hidden_size]. - output_tensor (torch.Tensor): (Unused) output tensor (placeholder). lora_indices_tensor (torch.Tensor): Tensor of shape [num_tokens] indicating which LoRA matrix to use for each token. scaling (float, optional): Scalar multiplier applied to the output. From e8db44f8834161259b25ebb13300bd502d37af9f Mon Sep 17 00:00:00 2001 From: Varun Sundar Rabindranath Date: Tue, 23 Sep 2025 00:01:09 -0400 Subject: [PATCH 258/518] [DP/EP][GPTOSS] Use triton matmul-ogs kernels for GPTOSS DP/EP (#24588) Signed-off-by: Varun Sundar Rabindranath Co-authored-by: Varun Sundar Rabindranath --- .../model_executor/layers/fused_moe/config.py | 22 ++- .../fused_moe/deepep_ht_prepare_finalize.py | 18 ++ .../fused_moe/gpt_oss_triton_kernels_moe.py | 174 ++++++++++++++---- vllm/model_executor/layers/fused_moe/layer.py | 82 ++++++--- .../layers/fused_moe/modular_kernel.py | 2 +- .../layers/quantization/mxfp4.py | 51 +++-- 6 files changed, 274 insertions(+), 75 deletions(-) diff --git a/vllm/model_executor/layers/fused_moe/config.py b/vllm/model_executor/layers/fused_moe/config.py index b14bc06e913c..3052bdb4dc1e 100644 --- a/vllm/model_executor/layers/fused_moe/config.py +++ b/vllm/model_executor/layers/fused_moe/config.py @@ -288,7 +288,11 @@ def use_int4_w4a16(self) -> bool: @property def use_mxfp4_w4a4(self) -> bool: - return self.quant_dtype == "mxfp4" + return (self._a1.dtype == "mxfp4" and self._w1.dtype == "mxfp4") + + @property + def use_mxfp4_w4a16(self) -> bool: + return (self._a1.dtype is None and self._w1.dtype == "mxfp4") @property def use_nvfp4_w4a4(self) -> bool: @@ -453,6 +457,22 @@ def int8_w8a8_moe_quant_config( ) +def mxfp4_w4a16_moe_quant_config( + w1_scale: Union[torch.Tensor, "PrecisionConfig"], + w2_scale: Union[torch.Tensor, "PrecisionConfig"], + w1_bias: Optional[torch.Tensor] = None, + w2_bias: Optional[torch.Tensor] = None) -> FusedMoEQuantConfig: + """ + Construct a quant config for unquantized activations and mxfp4 weights. + """ + return FusedMoEQuantConfig( + _a1=FusedMoEQuantDesc(), + _a2=FusedMoEQuantDesc(), + _w1=FusedMoEQuantDesc("mxfp4", None, w1_scale, None, None, w1_bias), + _w2=FusedMoEQuantDesc("mxfp4", None, w2_scale, None, None, w2_bias), + ) + + def mxfp4_w4a4_moe_quant_config( w1_scale: Union[torch.Tensor, "PrecisionConfig"], w2_scale: Union[torch.Tensor, "PrecisionConfig"], diff --git a/vllm/model_executor/layers/fused_moe/deepep_ht_prepare_finalize.py b/vllm/model_executor/layers/fused_moe/deepep_ht_prepare_finalize.py index f390f0a25875..a250a6218715 100644 --- a/vllm/model_executor/layers/fused_moe/deepep_ht_prepare_finalize.py +++ b/vllm/model_executor/layers/fused_moe/deepep_ht_prepare_finalize.py @@ -11,6 +11,7 @@ TopKWeightAndReduceContiguous, TopKWeightAndReduceDelegate) from vllm.model_executor.layers.fused_moe.utils import ( moe_kernel_quantize_input) +from vllm.utils import round_up class DeepEPHTPrepareAndFinalize(mk.FusedMoEPrepareAndFinalize): @@ -18,6 +19,23 @@ class DeepEPHTPrepareAndFinalize(mk.FusedMoEPrepareAndFinalize): Prepare/Finalize using DeepEP High-Throughput kernels. """ + @staticmethod + def maybe_roundup_layer_hidden_size(hidden_size: int, + dtype: torch.dtype) -> int: + # Round up hidden size so it is compatible with DeepEP High Throughput + # kernels. + # DeepEP intranode kernels make copies in units of, + # 32(warp-size) int4 elements. Round up hidden size to respect this. + # For example, an input hidden size of 2880 with dtype torch.bfloat16 + # will be rounded up to 3072. + hidden_size_bytes = hidden_size * dtype.itemsize + xfer_atom_size = 512 # 32 * 16 (size(int4)) + if hidden_size_bytes % xfer_atom_size == 0: + return hidden_size + + hidden_size_bytes = round_up(hidden_size_bytes, xfer_atom_size) + return hidden_size_bytes // dtype.itemsize + def __init__(self, buffer: deep_ep.Buffer, num_dispatchers: int, dp_size: int, rank_expert_offset: int): super().__init__() diff --git a/vllm/model_executor/layers/fused_moe/gpt_oss_triton_kernels_moe.py b/vllm/model_executor/layers/fused_moe/gpt_oss_triton_kernels_moe.py index f12d3807517f..0e84a9241e90 100644 --- a/vllm/model_executor/layers/fused_moe/gpt_oss_triton_kernels_moe.py +++ b/vllm/model_executor/layers/fused_moe/gpt_oss_triton_kernels_moe.py @@ -9,7 +9,8 @@ from vllm.model_executor.layers.fused_moe.config import ( FUSED_MOE_UNQUANTIZED_CONFIG, FusedMoEQuantConfig) from vllm.model_executor.layers.fused_moe.topk_weight_and_reduce import ( - TopKWeightAndReduceDelegate) + TopKWeightAndReduceNoOP) +from vllm.triton_utils import tl, triton from vllm.utils import has_triton_kernels logger = init_logger(__name__) @@ -19,13 +20,55 @@ import triton_kernels.swiglu from triton_kernels.matmul_ogs import (FnSpecs, FusedActivation, matmul_ogs) - from triton_kernels.routing import routing + from triton_kernels.routing import (RoutingData, routing, + routing_from_bitmatrix) + from triton_kernels.tensor import Bitmatrix except (ModuleNotFoundError, AttributeError) as e: logger.error( "Failed to import Triton kernels. Please make sure your triton " "version is compatible. Error: %s", e) +@triton.jit +def pack_bitmatrix( + bitmatrix, + topk_ids, + n_rows, # n_rows in bitmatrix / topk_ids + bm_cols: tl.constexpr, # n int32_t bitpacks in bitmatrix + n_expts_act, # num_topk + BLOCK_SIZE_M: tl.constexpr, + BLOCK_SIZE_K: tl.constexpr, +): + """ + Packs topk_ids into a bitmatrix. + code reference: + https://github.com/triton-lang/triton/blob/dd1bbc52b34d202dfe5ffea1e04fb16166c5c04e/python/triton_kernels/bench/distributed.py#L264 + """ + pid_m = tl.program_id(0) + offsets_m = pid_m * BLOCK_SIZE_M + tl.arange(0, BLOCK_SIZE_M) + offsets_k = tl.arange(0, BLOCK_SIZE_K) + offsets = offsets_m[:, None] * n_expts_act + offsets_k[None, :] + mask = (offsets_m < n_rows)[:, None] & (offsets_k < n_expts_act)[None, :] + indices = tl.load(topk_ids + offsets, mask=mask, other=-1) + div = indices // 32 + rem = indices % 32 + one = tl.cast(1, tl.uint32) + + # Iterate through all the relevant bitmatrix columns. + for i in range(bm_cols): + # When BLOCK_SIZE_K=32, offs is just the column index. + offs = tl.arange(0, BLOCK_SIZE_K // 32) + i * (BLOCK_SIZE_K // 32) + # All topks that need to go into this column has the correct bit set. + # Other bits are 0. x is a 2D tensor. + x = tl.where(div[:, :, None] == offs[None, None, :], + (one << rem)[:, :, None], 0) + # Reduce x to get a single int32_t bitpack. + y = tl.reduce_or(x, axis=1) + bitmatrix_ptrs = bitmatrix + offsets_m[:, + None] * bm_cols + offs[None, :] + tl.store(bitmatrix_ptrs, y, mask=offsets_m[:, None] < n_rows) + + def triton_kernel_moe_forward( hidden_states: torch.Tensor, w1, # Tensor or triton_kernels.Tensor @@ -124,34 +167,88 @@ def triton_kernel_fused_experts( return intermediate_cache3 -class BatchedOAITritonExperts(mk.FusedMoEPermuteExpertsUnpermute): +def make_routing_data( + topk_ids: torch.Tensor, + topk_weights: torch.Tensor, + num_local_experts: int, +) -> tuple["RoutingData", torch.Tensor, torch.Tensor]: + + topk_ids = topk_ids.to(torch.int16) + topk_weights = topk_weights.to(torch.bfloat16) + + n_rows, num_topk = topk_ids.size() + + BLOCK_SIZE_M = 512 + BLOCK_SIZE_K = 32 + + bm_cols = triton.cdiv(num_local_experts, BLOCK_SIZE_K) # n_bitpacks + bitmatrix = torch.zeros((n_rows, bm_cols), + dtype=torch.uint32, + device=topk_ids.device) + + grid = (triton.cdiv(n_rows, BLOCK_SIZE_M), ) + pack_bitmatrix[grid]( + bitmatrix, + topk_ids, + n_rows, + bm_cols, + num_topk, + BLOCK_SIZE_M=BLOCK_SIZE_M, + BLOCK_SIZE_K=BLOCK_SIZE_K, + ) + + bitmatrix_shape = [n_rows, bm_cols * 32] + bitmatrix_shape_max = [n_rows, None] + bitmatrix = Bitmatrix(bitmatrix, + shape=bitmatrix_shape, + shape_max=bitmatrix_shape_max, + scratchpad=None) + + # matmul_ogs expects invalid topk_weights to be -1s + topk_weights = torch.where(topk_ids == -1, -1.0, topk_weights) + routing_data, gather_indx, scatter_indx = routing_from_bitmatrix( + bitmatrix, topk_weights, topk_ids, num_local_experts, num_topk) + + return routing_data, gather_indx, scatter_indx + + +class BaseOAITritonExperts(mk.FusedMoEPermuteExpertsUnpermute): + + def __init__(self, quant_config: FusedMoEQuantConfig): + super().__init__(quant_config) + + def supports_expert_map(self) -> bool: + return True + + def finalize_weight_and_reduce_impl(self) -> mk.TopKWeightAndReduce: + # Weight application and reduction happens in the fused_experts kernel. + return TopKWeightAndReduceNoOP() - def __init__( + def _make_routing_data( self, - max_num_tokens: int, - num_dispatchers: int, - quant_config: FusedMoEQuantConfig, - ): + topk_ids: torch.Tensor, + topk_weights: torch.Tensor, + num_local_experts: int, + ) -> tuple["RoutingData", torch.Tensor, torch.Tensor]: + return make_routing_data(topk_ids, topk_weights, num_local_experts) + + +class OAITritonExperts(BaseOAITritonExperts): + + def __init__(self, quant_config: FusedMoEQuantConfig): + # TODO (varun) : Enable activation quantization + assert quant_config.use_mxfp4_w4a16, "Supports only mxfp4_w4a16" super().__init__(quant_config) - self.max_num_tokens = max_num_tokens - self.num_dispatchers = num_dispatchers @property def activation_formats( self ) -> tuple[mk.FusedMoEActivationFormat, mk.FusedMoEActivationFormat]: - return (mk.FusedMoEActivationFormat.BatchedExperts, - mk.FusedMoEActivationFormat.BatchedExperts) + return (mk.FusedMoEActivationFormat.Standard, + mk.FusedMoEActivationFormat.Standard) def supports_chunking(self) -> bool: - return False - - def supports_expert_map(self) -> bool: - return False - - def finalize_weight_and_reduce_impl(self) -> mk.TopKWeightAndReduce: - # Let PrepareAndFinalize::finalize() decide the impl. - return TopKWeightAndReduceDelegate() + return True def workspace_shapes( self, a: torch.Tensor, aq: torch.Tensor, M: int, N: int, K: int, @@ -159,13 +256,10 @@ def workspace_shapes( expert_tokens_meta: Optional[mk.ExpertTokensMetadata] ) -> tuple[tuple[int, ...], tuple[int, ...], tuple[int, ...], torch.dtype]: # workspace are allocated inside the kernel - assert a.dim() == 2 - num_dp = self.num_dispatchers - num_experts = local_num_experts - max_num_tokens = self.max_num_tokens - workspace2 = (0, 0, 0) - output = (num_experts, max_num_tokens * num_dp, N) - return (output, workspace2, output, a.dtype) + workspace1 = (M, K) + workspace2 = (0, 0) + output = (M, K) + return (workspace1, workspace2, output, a.dtype) def apply( self, @@ -185,17 +279,29 @@ def apply( expert_tokens_meta: Optional[mk.ExpertTokensMetadata], apply_router_weight_on_input: bool, ): - return triton_kernel_fused_experts( - output, + if expert_map is not None: + topk_ids = expert_map[topk_ids] + + local_num_experts = w1.size(0) + if global_num_experts == -1: + global_num_experts = local_num_experts + + routing_data, gather_indx, scatter_indx = self._make_routing_data( + topk_ids, topk_weights, local_num_experts) + + experts_output = triton_kernel_fused_experts( + None, hidden_states, w1, w2, - routing_data=None, - gather_indx=None, - scatter_indx=None, + routing_data, + gather_indx, + scatter_indx, activation=activation, quant_config=self.quant_config, apply_router_weight_on_input=False, - global_num_experts=global_num_experts, - expert_map=expert_map, + global_num_experts=local_num_experts, + expert_map=None, # applied already a1q_scale=a1q_scale) + + output.copy_(experts_output, non_blocking=True) diff --git a/vllm/model_executor/layers/fused_moe/layer.py b/vllm/model_executor/layers/fused_moe/layer.py index 17ad75584a3f..1f80e972b7f0 100644 --- a/vllm/model_executor/layers/fused_moe/layer.py +++ b/vllm/model_executor/layers/fused_moe/layer.py @@ -800,6 +800,49 @@ def get_compressed_expert_map(expert_map: torch.Tensor) -> str: for local_index, global_index in zip(local_indices, global_indices)) +def maybe_roundup_hidden_size( + hidden_size: int, act_dtype: torch.dtype, + quant_config: Optional[QuantizationConfig], + moe_parallel_config: FusedMoEParallelConfig) -> int: + """ + Given layer hidden size and MoE configurations, round up hidden_size + if necessary. + + Args: + hidden_size(int): Layer hidden-size + act_dtype: Data type of the layer activations. + quant_config(FusedMoEQuantConfig): Fused MoE quantization configuration. + moe_parallel_config(FusedMoEParallelConfig): Fused MoE parallelization + strategy configuration. + + Return: + Rounded up hidden_size if rounding up is required based on the configs. + Original hidden size otherwise. + """ + + if (moe_parallel_config.use_deepep_ht_kernels): + hidden_size = ( + DeepEPHTPrepareAndFinalize.maybe_roundup_layer_hidden_size( + hidden_size, act_dtype)) + + # we are padding globally so EP buffer allocation works + if quant_config and quant_config.get_name() == "mxfp4": + + from vllm.model_executor.layers.quantization.mxfp4 import ( + Mxfp4Backend, get_mxfp4_backend) + current_mxfp4_backend = get_mxfp4_backend() + if (current_mxfp4_backend == Mxfp4Backend.SM90_FI_MXFP4_BF16 + or current_mxfp4_backend + == Mxfp4Backend.SM100_FI_MXFP4_MXFP8_CUTLASS): + hidden_size = round_up(hidden_size, 128) + elif (current_platform.is_rocm() or current_mxfp4_backend + == Mxfp4Backend.SM100_FI_MXFP4_MXFP8_TRTLLM + or current_mxfp4_backend == Mxfp4Backend.SM100_FI_MXFP4_BF16): + hidden_size = round_up(hidden_size, 256) + + return hidden_size + + @CustomOp.register("fused_moe") class FusedMoE(CustomOp): """FusedMoE layer for MoE models. @@ -856,6 +899,18 @@ def __init__( params_dtype = torch.get_default_dtype() self.params_dtype = params_dtype + vllm_config = get_current_vllm_config() + + # FIXME (varun): We should have a better way of inferring the activation + # datatype. This works for now as the tensor datatype entering the MoE + # operation is typically unquantized (i.e. float16/bfloat16). + if vllm_config.model_config is not None: + moe_in_dtype = vllm_config.model_config.dtype + else: + # TODO (bnell): This is a hack to get test_mixtral_moe to work + # since model_config is not set in the pytest test. + moe_in_dtype = params_dtype + tp_size_ = (tp_size if tp_size is not None else get_tensor_model_parallel_world_size()) dp_size_ = (dp_size @@ -865,7 +920,6 @@ def __init__( if self.is_sequence_parallel: self.sp_size = tp_size_ - vllm_config = get_current_vllm_config() self.moe_parallel_config: FusedMoEParallelConfig = ( FusedMoEParallelConfig.make( tp_size_=tp_size_, @@ -874,19 +928,10 @@ def __init__( self.global_num_experts = num_experts + num_redundant_experts - # we are padding globally so EP buffer allocation works - if quant_config and quant_config.get_name() == "mxfp4": - from vllm.model_executor.layers.quantization.mxfp4 import ( - Mxfp4Backend, get_mxfp4_backend) - current_mxfp4_backend = get_mxfp4_backend() - if (current_mxfp4_backend == Mxfp4Backend.SM90_FI_MXFP4_BF16 - or current_mxfp4_backend - == Mxfp4Backend.SM100_FI_MXFP4_MXFP8_CUTLASS): - hidden_size = round_up(hidden_size, 128) - elif (current_platform.is_rocm() or current_mxfp4_backend - == Mxfp4Backend.SM100_FI_MXFP4_MXFP8_TRTLLM or - current_mxfp4_backend == Mxfp4Backend.SM100_FI_MXFP4_BF16): - hidden_size = round_up(hidden_size, 256) + # Round up hidden size if needed. + hidden_size = maybe_roundup_hidden_size(hidden_size, moe_in_dtype, + quant_config, + self.moe_parallel_config) # For smuggling this layer into the fused moe custom op compilation_config = vllm_config.compilation_config @@ -967,20 +1012,13 @@ def __init__( raise ValueError("Only softmax scoring function is supported for " "non-grouped topk.") - if vllm_config.model_config is not None: - model_dtype = vllm_config.model_config.dtype - else: - # TODO (bnell): This is a hack to get test_mixtral_moe to work - # since model_config is not set in the pytest test. - model_dtype = params_dtype - moe = FusedMoEConfig( num_experts=self.global_num_experts, experts_per_token=top_k, hidden_dim=hidden_size, num_local_experts=self.local_num_experts, moe_parallel_config=self.moe_parallel_config, - in_dtype=model_dtype, + in_dtype=moe_in_dtype, max_num_tokens=envs.VLLM_MOE_DP_CHUNK_SIZE, has_bias=has_bias, ) diff --git a/vllm/model_executor/layers/fused_moe/modular_kernel.py b/vllm/model_executor/layers/fused_moe/modular_kernel.py index a16c254fadf6..5fce24018e64 100644 --- a/vllm/model_executor/layers/fused_moe/modular_kernel.py +++ b/vllm/model_executor/layers/fused_moe/modular_kernel.py @@ -76,7 +76,7 @@ def _moe_problem_size( """ assert w1.dim() == 3 and w2.dim() == 3 E, N, _ = w1.size() - K = w2.size(1) + K = a1.size(-1) if a1.dim() == 2: # Make sure we are using the correct a1 (pre-permute). diff --git a/vllm/model_executor/layers/quantization/mxfp4.py b/vllm/model_executor/layers/quantization/mxfp4.py index 5c3f8a891276..a71c8d32a22c 100644 --- a/vllm/model_executor/layers/quantization/mxfp4.py +++ b/vllm/model_executor/layers/quantization/mxfp4.py @@ -13,7 +13,10 @@ FusedMoEMethodBase) from vllm.model_executor.layers.fused_moe import modular_kernel as mk from vllm.model_executor.layers.fused_moe.config import ( - FusedMoEQuantConfig, mxfp4_w4a4_moe_quant_config) + FusedMoEQuantConfig, mxfp4_w4a4_moe_quant_config, + mxfp4_w4a16_moe_quant_config) +from vllm.model_executor.layers.fused_moe.gpt_oss_triton_kernels_moe import ( + OAITritonExperts) from vllm.model_executor.layers.fused_moe.trtllm_moe import TrtLlmGenExperts from vllm.model_executor.layers.linear import (LinearBase, UnquantizedLinearMethod) @@ -578,9 +581,14 @@ def _interleave_mxfp4_cutlass_sm90(w): layer.w13_bias = Parameter(w13_bias, requires_grad=False) layer.w2_bias = Parameter(w2_bias, requires_grad=False) - # FIXME warp need to be adjusted based on batch size - # only apply to batched mode - if self.moe.use_ep: + # Ideally we'd use FusedMoEModularKernel.prepare_finalize object + # (stored in self.fused_experts) to determine if the MoE has a + # batched activation format. As self.fused_experts is not + # initialized at this point, we resort to checking the MoE config + # directly. + is_batched_moe = (self.moe.use_pplx_kernels + or self.moe.use_deepep_ll_kernels) + if is_batched_moe: num_warps = 4 if envs.VLLM_MOE_DP_CHUNK_SIZE <= 512 else 8 else: num_warps = 8 @@ -640,16 +648,21 @@ def get_fused_moe_quant_config( if self.mxfp4_backend == Mxfp4Backend.TRITON: w1_scale = self.w13_precision_config w2_scale = self.w2_precision_config + return mxfp4_w4a16_moe_quant_config( + w1_bias=layer.w13_bias, + w2_bias=layer.w2_bias, + w1_scale=w1_scale, + w2_scale=w2_scale, + ) else: w1_scale = layer.w13_weight_scale w2_scale = layer.w2_weight_scale - - return mxfp4_w4a4_moe_quant_config( - w1_bias=layer.w13_bias, - w2_bias=layer.w2_bias, - w1_scale=w1_scale, - w2_scale=w2_scale, - ) + return mxfp4_w4a4_moe_quant_config( + w1_bias=layer.w13_bias, + w2_bias=layer.w2_bias, + w1_scale=w1_scale, + w2_scale=w2_scale, + ) def select_gemm_impl( self, @@ -661,6 +674,7 @@ def select_gemm_impl( raise NotImplementedError( "Mxfp4 does not support batched experts format for EP") else: + assert self.moe_quant_config is not None if (self.mxfp4_backend == Mxfp4Backend.SM100_FI_MXFP4_MXFP8_TRTLLM or self.mxfp4_backend == Mxfp4Backend.SM100_FI_MXFP4_BF16): # B200 code-path @@ -671,13 +685,10 @@ def select_gemm_impl( # TODO(bnell): part of quant_config "max_capture_size": self.max_capture_size, } - assert self.moe_quant_config is not None return TrtLlmGenExperts(self.moe, self.moe_quant_config, **kwargs) else: - # Use matmul_ogs from triton_kernels here! - raise NotImplementedError( - "Mxfp4 does not support non-batched experts format for EP") + return OAITritonExperts(self.moe_quant_config) def _route_and_experts( self, @@ -722,10 +733,16 @@ def _route_and_experts( logical_to_physical_map=logical_to_physical_map, logical_replica_count=logical_replica_count) + w13_weight = (self.w13_weight_triton_tensor + if layer.w13_weight is None else layer.w13_weight) + w2_weight = (self.w2_weight_triton_tensor + if layer.w2_weight is None else layer.w2_weight) + assert all([w is not None for w in [w13_weight, w2_weight]]) + return self.fused_experts( hidden_states=x, - w1=layer.w13_weight, - w2=layer.w2_weight, + w1=w13_weight, + w2=w2_weight, topk_weights=topk_weights, topk_ids=topk_ids, inplace=True, From 5774b0a1da93492e60cc446e6e783a43da89e105 Mon Sep 17 00:00:00 2001 From: "Chendi.Xue" Date: Mon, 22 Sep 2025 23:17:42 -0500 Subject: [PATCH 259/518] [NIXL][OOT platform] support nixl_connector with oot platform and other nixl_backend (#25121) Signed-off-by: Chendi Xue --- docs/features/disagg_prefill.md | 6 +++ docs/serving/expert_parallel_deployment.md | 2 +- .../kv_connector/unit/test_nixl_connector.py | 52 ++++++++++++++++++- .../kv_connector/v1/nixl_connector.py | 33 +++++++++--- vllm/platforms/interface.py | 15 ++++++ 5 files changed, 99 insertions(+), 9 deletions(-) diff --git a/docs/features/disagg_prefill.md b/docs/features/disagg_prefill.md index 69f70b8ff5ac..cb62213cc7af 100644 --- a/docs/features/disagg_prefill.md +++ b/docs/features/disagg_prefill.md @@ -31,6 +31,12 @@ Now supports 5 types of connectors: --kv-transfer-config '{"kv_connector":"MultiConnector","kv_role":"kv_both","kv_connector_extra_config":{"connectors":[{"kv_connector":"NixlConnector","kv_role":"kv_both"},{"kv_connector":"SharedStorageConnector","kv_role":"kv_both","kv_connector_extra_config":{"shared_storage_path":"local_storage"}}]}}' ``` +For NixlConnector, you may also specify one or multiple NIXL_Backend. Such as: + + ```bash + --kv-transfer-config '{"kv_connector":"NixlConnector","kv_role":"kv_both", "kv_buffer_device":"cuda", "kv_connector_extra_config":{"backend":["UCX", "GDS"]}' + ``` + - **OffloadingConnector**: enable offloading of KV data to CPU memory, customizing the CPU block size (in tokens) and number of blocks to allocate (per worker): ```bash diff --git a/docs/serving/expert_parallel_deployment.md b/docs/serving/expert_parallel_deployment.md index 7489fc260983..f823d33df80e 100644 --- a/docs/serving/expert_parallel_deployment.md +++ b/docs/serving/expert_parallel_deployment.md @@ -193,7 +193,7 @@ For production deployments requiring strict SLA guarantees for time-to-first-tok 1. **Install gdrcopy/ucx/nixl**: For maximum performance, run the [install_gdrcopy.sh](gh-file:tools/install_gdrcopy.sh) script to install `gdrcopy` (e.g., `install_gdrcopy.sh "${GDRCOPY_OS_VERSION}" "12.8" "x64"`). You can find available OS versions [here](https://developer.download.nvidia.com/compute/redist/gdrcopy/CUDA%2012.8/). If `gdrcopy` is not installed, things will still work with a plain `pip install nixl`, just with lower performance. `nixl` and `ucx` are installed as dependencies via pip. -2. **Configure Both Instances**: Add this flag to both prefill and decode instances `--kv-transfer-config '{"kv_connector":"NixlConnector","kv_role":"kv_both"}` +2. **Configure Both Instances**: Add this flag to both prefill and decode instances `--kv-transfer-config '{"kv_connector":"NixlConnector","kv_role":"kv_both"}`. Noted, you may also specify one or multiple NIXL_Backend. Such as: `--kv-transfer-config '{"kv_connector":"NixlConnector","kv_role":"kv_both", "kv_connector_extra_config":{"backend":["UCX", "GDS"]}'` 3. **Client Orchestration**: Use the client-side script below to coordinate prefill/decode operations. We are actively working on routing solutions. diff --git a/tests/v1/kv_connector/unit/test_nixl_connector.py b/tests/v1/kv_connector/unit/test_nixl_connector.py index 6e58d158c3f4..fa698a2eabd9 100644 --- a/tests/v1/kv_connector/unit/test_nixl_connector.py +++ b/tests/v1/kv_connector/unit/test_nixl_connector.py @@ -27,6 +27,7 @@ KVConnectorRole, NixlAgentMetadata, NixlConnector, NixlConnectorMetadata, NixlConnectorWorker, NixlKVConnectorStats) from vllm.forward_context import ForwardContext +from vllm.platforms.interface import Platform from vllm.sampling_params import SamplingParams from vllm.v1.attention.backends.flash_attn import FlashAttentionBackend from vllm.v1.outputs import KVConnectorOutput, ModelRunnerOutput @@ -56,7 +57,7 @@ def __init__(self, agent_name: str, *args, **kwargs): def get_reg_descs(self, caches_data, memory_type: str) -> list: return [str(uuid.uuid4()) for _ in caches_data] - def register_memory(self, descs) -> None: + def register_memory(self, descs, backends) -> None: pass def get_xfer_descs(self, blocks_data, memory_type: str) -> list: @@ -855,3 +856,52 @@ def test_register_kv_caches(dist_init): assert block_len == expected_block_len, \ f"Block entry {i}: Expected block len {expected_block_len}, " \ f"got {block_len}" + + +class FakePlatform(Platform): + device_type: str = "oot" + + @classmethod + def get_nixl_supported_devices(cls) -> dict[str, tuple[str, ...]]: + """ + Returns a mapping from device_type to a tuple of supported + kv_buffer_device for nixl. + """ + return {'oot': ('oot', )} + + @classmethod + def get_nixl_memory_type(cls) -> Optional[str]: + """ + Returns the nixl memory type for the current platform. + """ + return 'VRAM' + + +@pytest.mark.parametrize("kv_buffer_device, nixl_memory_type", [ + ("oot", "VRAM"), +]) +def test_kv_buffer_to_nixl_memory_types(dist_init, kv_buffer_device, + nixl_memory_type): + """ + Test that register_kv_caches() passes the correct memory types from the + config to the nixl_wrapper. + """ + vllm_config = create_vllm_config() + # Override the default memory types in the config + vllm_config.kv_transfer_config.kv_buffer_device = kv_buffer_device + from vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector import ( + _NIXL_SUPPORTED_DEVICE) + _NIXL_SUPPORTED_DEVICE.update(FakePlatform.get_nixl_supported_devices()) + + with patch("vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector.NixlWrapper"), \ + patch("vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector.threading.Event"), \ + patch("vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector.threading.Thread"), \ + patch("vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector.current_platform", FakePlatform), \ + patch("vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector._NIXL_SUPPORTED_DEVICE", _NIXL_SUPPORTED_DEVICE): # noqa: E501 + + # Create connector and replace its worker with a fake one for isolation + connector = NixlConnector(vllm_config, KVConnectorRole.WORKER) + + # Verify get_reg_descs was called with the correct memory_type + assert connector.connector_worker.kv_buffer_device == kv_buffer_device + assert connector.connector_worker.nixl_memory_type == nixl_memory_type diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py b/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py index d3a08af088c1..82b483447e33 100644 --- a/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py +++ b/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py @@ -58,6 +58,12 @@ logger.warning("NIXL is not available") NixlWrapper = None +try: + from nixl._api import nixl_agent_config +except ImportError: + nixl_agent_config = None + logger.warning("NIXL agent config is not available") + # Supported platforms and types of kv transfer buffer. # {device: tuple of supported kv buffer types} _NIXL_SUPPORTED_DEVICE = { @@ -65,6 +71,8 @@ "tpu": ("cpu", ), "xpu": ("cpu", ), } +# support for oot platform by providing mapping in current_platform +_NIXL_SUPPORTED_DEVICE.update(current_platform.get_nixl_supported_devices()) class NixlAgentMetadata( @@ -448,8 +456,15 @@ def __init__(self, vllm_config: VllmConfig, engine_id: str): self.vllm_config = vllm_config self.block_size = vllm_config.cache_config.block_size + self.nixl_backends = \ + vllm_config.kv_transfer_config.get_from_extra_config( + "backends", ["UCX"]) # Agent. - self.nixl_wrapper = NixlWrapper(str(uuid.uuid4()), None) + non_ucx_backends = [b for b in self.nixl_backends if b != "UCX"] + config = nixl_agent_config(backends=self.nixl_backends) if len( + non_ucx_backends) > 0 and nixl_agent_config is not None else None + + self.nixl_wrapper = NixlWrapper(str(uuid.uuid4()), config) # Map of engine_id -> {rank0: agent_name0, rank1: agent_name1..}. self._remote_agents: dict[EngineId, dict[int, str]] = defaultdict(dict) @@ -486,11 +501,15 @@ def __init__(self, vllm_config: VllmConfig, engine_id: str): # used when device memory can not be registered under nixl self.host_xfer_buffers: dict[str, torch.Tensor] = {} self.use_host_buffer = self.kv_buffer_device == "cpu" - if self.kv_buffer_device == "cuda": - self.nixl_memory_type = "VRAM" - elif self.kv_buffer_device == "cpu": - self.nixl_memory_type = "DRAM" - else: + # support for oot platform which can't register nixl memory + # type based on kv_buffer_device + self.nixl_memory_type = current_platform.get_nixl_memory_type() + if self.nixl_memory_type is None: + if self.kv_buffer_device == "cuda": + self.nixl_memory_type = "VRAM" + elif self.kv_buffer_device == "cpu": + self.nixl_memory_type = "DRAM" + if self.nixl_memory_type is None: raise RuntimeError( f"{self.device_type} with {self.kv_buffer_device} kv_buffer " "is not supported.") @@ -766,7 +785,7 @@ def register_kv_caches(self, kv_caches: dict[str, torch.Tensor]): descs = self.nixl_wrapper.get_reg_descs(caches_data, self.nixl_memory_type) logger.debug("Registering descs: %s", caches_data) - self.nixl_wrapper.register_memory(descs) + self.nixl_wrapper.register_memory(descs, backends=self.nixl_backends) logger.debug("Done registering descs") self._registered_descs.append(descs) diff --git a/vllm/platforms/interface.py b/vllm/platforms/interface.py index 3f13ae72fe4d..7dd935d2eb31 100644 --- a/vllm/platforms/interface.py +++ b/vllm/platforms/interface.py @@ -604,6 +604,21 @@ def _synced_weight_loader(param, *args, **kwargs): return _synced_weight_loader + @classmethod + def get_nixl_supported_devices(cls) -> dict[str, tuple[str, ...]]: + """ + Returns a mapping from device_type to a tuple of supported + kv_buffer_device for nixl. + """ + return {} + + @classmethod + def get_nixl_memory_type(cls) -> Optional[str]: + """ + Returns the nixl memory type for the current platform. + """ + return None + class UnspecifiedPlatform(Platform): _enum = PlatformEnum.UNSPECIFIED From c98be0a232764fb68353e2c9e26d3495a979044d Mon Sep 17 00:00:00 2001 From: Cyrus Leung Date: Tue, 23 Sep 2025 13:17:10 +0800 Subject: [PATCH 260/518] [Model] Enable DP for ViT in Qwen2-VL (#25445) Signed-off-by: DarkLight1337 --- vllm/model_executor/models/qwen2_vl.py | 78 +++++++++++++++++++------- 1 file changed, 59 insertions(+), 19 deletions(-) diff --git a/vllm/model_executor/models/qwen2_vl.py b/vllm/model_executor/models/qwen2_vl.py index b3c42c257256..88813490c0fb 100644 --- a/vllm/model_executor/models/qwen2_vl.py +++ b/vllm/model_executor/models/qwen2_vl.py @@ -66,6 +66,7 @@ BaseProcessingInfo, PromptReplacement, PromptUpdate) from vllm.multimodal.profiling import BaseDummyInputsBuilder +from vllm.multimodal.utils import run_dp_sharded_mrope_vision_model from vllm.platforms import _Backend, current_platform from vllm.sequence import IntermediateTensors from vllm.transformers_utils.config import uses_mrope @@ -217,17 +218,20 @@ def __init__( act_layer: type[nn.Module] = QuickGELU, quant_config: Optional[QuantizationConfig] = None, prefix: str = "", + use_data_parallel: bool = False, ): super().__init__() self.fc1 = ColumnParallelLinear(in_features, hidden_features, quant_config=quant_config, - prefix=f"{prefix}.fc1") + prefix=f"{prefix}.fc1", + disable_tp=use_data_parallel) self.act = act_layer() self.fc2 = RowParallelLinear(hidden_features, in_features, quant_config=quant_config, - prefix=f"{prefix}.fc2") + prefix=f"{prefix}.fc2", + disable_tp=use_data_parallel) def forward(self, x: torch.Tensor) -> torch.Tensor: x_parallel, _ = self.fc1(x) @@ -293,25 +297,28 @@ def __init__( projection_size: int, quant_config: Optional[QuantizationConfig] = None, prefix: str = "", + use_data_parallel: bool = False, ) -> None: super().__init__() # Per attention head and per partition values. - world_size = parallel_state.get_tensor_model_parallel_world_size() - self.tp_size = world_size + self.tp_size = (1 if use_data_parallel else + parallel_state.get_tensor_model_parallel_world_size()) self.tp_rank = parallel_state.get_tensor_model_parallel_rank() self.hidden_size_per_attention_head = dist_utils.divide( projection_size, num_heads) self.num_attention_heads_per_partition = dist_utils.divide( - num_heads, world_size) + num_heads, self.tp_size) self.qkv = ColumnParallelLinear(input_size=embed_dim, output_size=3 * projection_size, quant_config=quant_config, - prefix=f"{prefix}.qkv") + prefix=f"{prefix}.qkv", + disable_tp=use_data_parallel) self.proj = RowParallelLinear(input_size=projection_size, output_size=embed_dim, quant_config=quant_config, - prefix=f"{prefix}.proj") + prefix=f"{prefix}.proj", + disable_tp=use_data_parallel) # Detect attention implementation. self.attn_backend = get_vit_attn_backend( @@ -453,6 +460,7 @@ def __init__( norm_layer: Optional[Callable[[int], nn.Module]] = None, quant_config: Optional[QuantizationConfig] = None, prefix: str = "", + use_data_parallel: bool = False, ) -> None: super().__init__() if norm_layer is None: @@ -465,12 +473,14 @@ def __init__( num_heads=num_heads, projection_size=dim, quant_config=quant_config, - prefix=f"{prefix}.attn") + prefix=f"{prefix}.attn", + use_data_parallel=use_data_parallel) self.mlp = Qwen2VisionMLP(dim, mlp_hidden_dim, act_layer=act_layer, quant_config=quant_config, - prefix=f"{prefix}.mlp") + prefix=f"{prefix}.mlp", + use_data_parallel=use_data_parallel) def forward( self, @@ -531,6 +541,7 @@ def __init__( spatial_merge_size: int = 2, quant_config: Optional[QuantizationConfig] = None, prefix: str = "", + use_data_parallel: bool = False, ) -> None: super().__init__() self.hidden_size = context_dim * (spatial_merge_size**2) @@ -542,13 +553,15 @@ def __init__( self.hidden_size, bias=True, quant_config=quant_config, - prefix=f"{prefix}.mlp.0"), + prefix=f"{prefix}.mlp.0", + disable_tp=use_data_parallel), nn.GELU(), RowParallelLinear(self.hidden_size, d_model, bias=True, quant_config=quant_config, - prefix=f"{prefix}.mlp.2"), + prefix=f"{prefix}.mlp.2", + disable_tp=use_data_parallel), ]) def forward(self, x: torch.Tensor) -> torch.Tensor: @@ -600,6 +613,7 @@ def __init__( norm_eps: float = 1e-6, quant_config: Optional[QuantizationConfig] = None, prefix: str = "", + use_data_parallel: bool = False, ) -> None: super().__init__() @@ -613,6 +627,9 @@ def __init__( num_heads = vision_config.num_heads mlp_ratio = vision_config.mlp_ratio + self.use_data_parallel = use_data_parallel + self.out_hidden_size = vision_config.hidden_size + self.spatial_merge_size = spatial_merge_size self.num_heads = num_heads self.embed_dim = embed_dim @@ -634,7 +651,8 @@ def __init__( mlp_ratio=mlp_ratio, norm_layer=norm_layer, quant_config=quant_config, - prefix=f"{prefix}.blocks.{layer_idx}") + prefix=f"{prefix}.blocks.{layer_idx}", + use_data_parallel=use_data_parallel) for layer_idx in range(depth) ]) self.merger = Qwen2VisionPatchMerger( @@ -643,6 +661,7 @@ def __init__( norm_layer=norm_layer, quant_config=quant_config, prefix=f"{prefix}.merger", + use_data_parallel=use_data_parallel, ) self.attn_backend = get_vit_attn_backend( head_size=head_dim, dtype=torch.get_default_dtype()) @@ -659,8 +678,9 @@ def dtype(self) -> torch.dtype: def device(self) -> torch.device: return self.patch_embed.proj.weight.device - def rot_pos_emb(self, grid_thw: torch.Tensor) -> torch.Tensor: + def rot_pos_emb(self, grid_thw: list[list[int]]) -> torch.Tensor: pos_ids = [] + max_grid_size = 0 for t, h, w in grid_thw: hpos_ids = torch.arange(h).unsqueeze(1).expand(-1, w) wpos_ids = torch.arange(w).unsqueeze(0).expand(h, -1) @@ -678,8 +698,8 @@ def rot_pos_emb(self, grid_thw: torch.Tensor) -> torch.Tensor: ).permute(0, 2, 1, 3).flatten() pos_ids.append( torch.stack([hpos_ids, wpos_ids], dim=-1).repeat(t, 1)) + max_grid_size = max(max_grid_size, h, w) pos_ids = torch.cat(pos_ids, dim=0) - max_grid_size = grid_thw[:, 1:].max() rotary_pos_emb_full = self.rotary_pos_emb(max_grid_size) rotary_pos_emb = rotary_pos_emb_full[pos_ids].flatten(1) return rotary_pos_emb @@ -698,7 +718,7 @@ def compute_attn_mask_seqlen( def forward( self, x: torch.Tensor, - grid_thw: torch.Tensor, + grid_thw: list[list[int]], ) -> torch.Tensor: # patchify x = x.to(device=self.device, dtype=self.dtype) @@ -708,8 +728,9 @@ def forward( rotary_pos_emb = self.rot_pos_emb(grid_thw) # compute cu_seqlens - cu_seqlens = torch.repeat_interleave(grid_thw[:, 1] * grid_thw[:, 2], - grid_thw[:, 0]).cumsum( + grid_thw_ = torch.tensor(grid_thw) + cu_seqlens = torch.repeat_interleave(grid_thw_[:, 1] * grid_thw_[:, 2], + grid_thw_[:, 0]).cumsum( dim=0, dtype=torch.int32) cu_seqlens = F.pad(cu_seqlens, (1, 0), "constant", 0) @@ -1112,6 +1133,8 @@ class Qwen2VLForConditionalGeneration(nn.Module, SupportsMultiModal, "model.": "language_model.model.", }) + supports_encoder_tp_data = True + def get_mrope_input_positions( self, input_tokens: list[int], @@ -1239,6 +1262,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): quant_config = vllm_config.quant_config multimodal_config = vllm_config.model_config.multimodal_config + self.use_data_parallel = multimodal_config.mm_encoder_tp_mode == "data" self.config = config self.multimodal_config = multimodal_config @@ -1249,6 +1273,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): norm_eps=getattr(config, "rms_norm_eps", 1e-6), quant_config=self._maybe_ignore_quant_config(quant_config), prefix=maybe_prefix(prefix, "visual"), + use_data_parallel=self.use_data_parallel, ) else: self.visual = None @@ -1357,7 +1382,15 @@ def _process_image_input( image_embeds = image_input["image_embeds"] else: pixel_values = image_input["pixel_values"] - image_embeds = self.visual(pixel_values, grid_thw=grid_thw) + + if self.use_data_parallel: + return run_dp_sharded_mrope_vision_model(self.visual, + pixel_values, + grid_thw_list, + rope_type="rope_3d") + else: + image_embeds = self.visual(pixel_values, + grid_thw=grid_thw_list) # Split concatenated embeddings for each image item. merge_size = self.visual.spatial_merge_size @@ -1377,7 +1410,14 @@ def _process_video_input( video_embeds = video_input["video_embeds"] else: pixel_values_videos = video_input["pixel_values_videos"] - video_embeds = self.visual(pixel_values_videos, grid_thw=grid_thw) + if self.use_data_parallel: + return run_dp_sharded_mrope_vision_model(self.visual, + pixel_values_videos, + grid_thw_list, + rope_type="rope_3d") + else: + video_embeds = self.visual(pixel_values_videos, + grid_thw=grid_thw_list) # Split concatenated embeddings for each video item. merge_size = self.visual.spatial_merge_size From ba8d2165b655793ff24cf074d041e0bddfc5a0cc Mon Sep 17 00:00:00 2001 From: Ming Yang Date: Tue, 23 Sep 2025 00:56:00 -0700 Subject: [PATCH 261/518] Handle triton kernel import exception (#25319) Signed-off-by: Ming Yang --- vllm/model_executor/layers/fused_moe/config.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/vllm/model_executor/layers/fused_moe/config.py b/vllm/model_executor/layers/fused_moe/config.py index 3052bdb4dc1e..34bfe1c16aac 100644 --- a/vllm/model_executor/layers/fused_moe/config.py +++ b/vllm/model_executor/layers/fused_moe/config.py @@ -14,11 +14,16 @@ from vllm.utils import cdiv, has_triton_kernels from vllm.utils.flashinfer import has_flashinfer_cutlass_fused_moe -if has_triton_kernels(): - from triton_kernels.matmul_ogs import PrecisionConfig - logger = init_logger(__name__) +if has_triton_kernels(): + try: + from triton_kernels.matmul_ogs import PrecisionConfig + except ImportError: + logger.error( + "Failed to import Triton kernels. Please make sure your triton " + "version is compatible.") + def _get_config_dtype_str( dtype: torch.dtype, From 9383cd6f10c55875de7f932e1bf7b0c43f01fc67 Mon Sep 17 00:00:00 2001 From: Zhikaiiii <55917203+Zhikaiiii@users.noreply.github.com> Date: Tue, 23 Sep 2025 16:07:27 +0800 Subject: [PATCH 262/518] [Frontend] Add a new xml-based tool parser for qwen3-coder (#25028) Signed-off-by: Zhikaiiii <1658973216@qq.com> --- docs/features/tool_calling.md | 9 + tests/tool_use/test_qwen3coder_tool_parser.py | 112 +- .../openai/tool_parsers/__init__.py | 2 + .../tool_parsers/qwen3xml_tool_parser.py | 1137 +++++++++++++++++ 4 files changed, 1238 insertions(+), 22 deletions(-) create mode 100644 vllm/entrypoints/openai/tool_parsers/qwen3xml_tool_parser.py diff --git a/docs/features/tool_calling.md b/docs/features/tool_calling.md index 2a48596571d1..291c313cd57a 100644 --- a/docs/features/tool_calling.md +++ b/docs/features/tool_calling.md @@ -319,6 +319,15 @@ Supported models: Flags: `--tool-call-parser glm45` +### Qwen3-Coder Models (`qwen3_xml`) + +Supported models: + +* `Qwen/Qwen3-480B-A35B-Instruct` +* `Qwen/Qwen3-Coder-30B-A3B-Instruct` + +Flags: `--tool-call-parser qwen3_xml` + ### Models with Pythonic Tool Calls (`pythonic`) A growing number of models output a python list to represent tool calls instead of using JSON. This has the advantage of inherently supporting parallel tool calls and removing ambiguity around the JSON schema required for tool calls. The `pythonic` tool parser can support such models. diff --git a/tests/tool_use/test_qwen3coder_tool_parser.py b/tests/tool_use/test_qwen3coder_tool_parser.py index f06fb2b9f2f0..57eaf84d36f2 100644 --- a/tests/tool_use/test_qwen3coder_tool_parser.py +++ b/tests/tool_use/test_qwen3coder_tool_parser.py @@ -13,6 +13,8 @@ ToolCall) from vllm.entrypoints.openai.tool_parsers.qwen3coder_tool_parser import ( Qwen3CoderToolParser) +from vllm.entrypoints.openai.tool_parsers.qwen3xml_tool_parser import ( + Qwen3XMLToolParser) from vllm.transformers_utils.detokenizer_utils import detokenize_incrementally from vllm.transformers_utils.tokenizer import AnyTokenizer, get_tokenizer @@ -29,6 +31,21 @@ def qwen3_tool_parser(qwen3_tokenizer): return Qwen3CoderToolParser(qwen3_tokenizer) +@pytest.fixture +def qwen3_xml_tool_parser(qwen3_tokenizer): + return Qwen3XMLToolParser(qwen3_tokenizer) + + +@pytest.fixture(params=["original", "xml"]) +def qwen3_tool_parser_parametrized(qwen3_tool_parser, qwen3_xml_tool_parser, + request): + """Parameterized fixture that provides both parser types for testing""" + if request.param == "original": + return qwen3_tool_parser + else: + return qwen3_xml_tool_parser + + @pytest.fixture def sample_tools(): return [ @@ -95,7 +112,7 @@ def assert_tool_calls(actual_tool_calls: list[ToolCall], def stream_delta_message_generator( - qwen3_tool_parser: Qwen3CoderToolParser, + qwen3_tool_parser, qwen3_tokenizer: AnyTokenizer, model_output: str, request: Optional[ChatCompletionRequest] = None @@ -144,9 +161,9 @@ def stream_delta_message_generator( read_offset = new_read_offset -def test_extract_tool_calls_no_tools(qwen3_tool_parser): +def test_extract_tool_calls_no_tools(qwen3_tool_parser_parametrized): model_output = "This is a test response without any tool calls" - extracted_tool_calls = qwen3_tool_parser.extract_tool_calls( + extracted_tool_calls = qwen3_tool_parser_parametrized.extract_tool_calls( model_output, request=None) # type: ignore[arg-type] assert not extracted_tool_calls.tools_called assert extracted_tool_calls.tool_calls == [] @@ -294,12 +311,13 @@ def test_extract_tool_calls_no_tools(qwen3_tool_parser): ], "Let me calculate that area for you."), ], ) -def test_extract_tool_calls(qwen3_tool_parser, sample_tools, model_output, - expected_tool_calls, expected_content): +def test_extract_tool_calls(qwen3_tool_parser_parametrized, sample_tools, + model_output, expected_tool_calls, + expected_content): request = ChatCompletionRequest(model=MODEL, messages=[], tools=sample_tools) - extracted_tool_calls = qwen3_tool_parser.extract_tool_calls( + extracted_tool_calls = qwen3_tool_parser_parametrized.extract_tool_calls( model_output, request=request) assert extracted_tool_calls.tools_called @@ -308,7 +326,8 @@ def test_extract_tool_calls(qwen3_tool_parser, sample_tools, model_output, assert extracted_tool_calls.content == expected_content -def test_extract_tool_calls_fallback_no_tags(qwen3_tool_parser, sample_tools): +def test_extract_tool_calls_fallback_no_tags(qwen3_tool_parser_parametrized, + sample_tools): """Test fallback parsing when XML tags are missing""" model_output = ''' @@ -322,7 +341,7 @@ def test_extract_tool_calls_fallback_no_tags(qwen3_tool_parser, sample_tools): request = ChatCompletionRequest(model=MODEL, messages=[], tools=sample_tools) - extracted_tool_calls = qwen3_tool_parser.extract_tool_calls( + extracted_tool_calls = qwen3_tool_parser_parametrized.extract_tool_calls( model_output, request=request) assert extracted_tool_calls.tools_called @@ -331,7 +350,7 @@ def test_extract_tool_calls_fallback_no_tags(qwen3_tool_parser, sample_tools): "get_current_weather") -def test_extract_tool_calls_type_conversion(qwen3_tool_parser): +def test_extract_tool_calls_type_conversion(qwen3_tool_parser_parametrized): """Test parameter type conversion based on tool schema""" tools = [ ChatCompletionToolsParam(type="function", @@ -381,7 +400,7 @@ def test_extract_tool_calls_type_conversion(qwen3_tool_parser): ''' request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools) - extracted_tool_calls = qwen3_tool_parser.extract_tool_calls( + extracted_tool_calls = qwen3_tool_parser_parametrized.extract_tool_calls( model_output, request=request) args = json.loads(extracted_tool_calls.tool_calls[0].function.arguments) @@ -536,9 +555,10 @@ def test_extract_tool_calls_type_conversion(qwen3_tool_parser): ], "Let me calculate that area for you."), ], ) -def test_extract_tool_calls_streaming(qwen3_tool_parser, qwen3_tokenizer, - sample_tools, model_output, - expected_tool_calls, expected_content): +def test_extract_tool_calls_streaming(qwen3_tool_parser_parametrized, + qwen3_tokenizer, sample_tools, + model_output, expected_tool_calls, + expected_content): """Test incremental streaming behavior including typed parameters""" request = ChatCompletionRequest(model=MODEL, messages=[], @@ -548,7 +568,8 @@ def test_extract_tool_calls_streaming(qwen3_tool_parser, qwen3_tokenizer, tool_states = {} # Track state per tool index for delta_message in stream_delta_message_generator( - qwen3_tool_parser, qwen3_tokenizer, model_output, request): + qwen3_tool_parser_parametrized, qwen3_tokenizer, model_output, + request): # role should never be streamed from tool parser assert not delta_message.role @@ -609,7 +630,7 @@ def test_extract_tool_calls_streaming(qwen3_tool_parser, qwen3_tokenizer, def test_extract_tool_calls_missing_closing_parameter_tag( - qwen3_tool_parser, sample_tools): + qwen3_tool_parser_parametrized, sample_tools): """Test handling of missing closing tag""" # Using get_current_weather from sample_tools but with malformed XML model_output = '''Let me check the weather for you: @@ -629,7 +650,7 @@ def test_extract_tool_calls_missing_closing_parameter_tag( request = ChatCompletionRequest(model=MODEL, messages=[], tools=sample_tools) - extracted_tool_calls = qwen3_tool_parser.extract_tool_calls( + extracted_tool_calls = qwen3_tool_parser_parametrized.extract_tool_calls( model_output, request=request) # The parser should handle the malformed XML gracefully @@ -652,7 +673,7 @@ def test_extract_tool_calls_missing_closing_parameter_tag( def test_extract_tool_calls_streaming_missing_closing_tag( - qwen3_tool_parser, qwen3_tokenizer, sample_tools): + qwen3_tool_parser_parametrized, qwen3_tokenizer, sample_tools): """Test streaming with missing closing tag""" # Using get_current_weather from sample_tools but with malformed XML model_output = '''Let me check the weather for you: @@ -677,7 +698,8 @@ def test_extract_tool_calls_streaming_missing_closing_tag( tool_states = {} for delta_message in stream_delta_message_generator( - qwen3_tool_parser, qwen3_tokenizer, model_output, request): + qwen3_tool_parser_parametrized, qwen3_tokenizer, model_output, + request): if delta_message.content: other_content += delta_message.content @@ -727,9 +749,8 @@ def test_extract_tool_calls_streaming_missing_closing_tag( assert args["unit"] == "fahrenheit" -def test_extract_tool_calls_streaming_incremental(qwen3_tool_parser, - qwen3_tokenizer, - sample_tools): +def test_extract_tool_calls_streaming_incremental( + qwen3_tool_parser_parametrized, qwen3_tokenizer, sample_tools): """Test that streaming is truly incremental""" model_output = '''I'll check the weather. @@ -748,7 +769,8 @@ def test_extract_tool_calls_streaming_incremental(qwen3_tool_parser, chunks = [] for delta_message in stream_delta_message_generator( - qwen3_tool_parser, qwen3_tokenizer, model_output, request): + qwen3_tool_parser_parametrized, qwen3_tokenizer, model_output, + request): chunks.append(delta_message) # Should have multiple chunks @@ -784,3 +806,49 @@ def test_extract_tool_calls_streaming_incremental(qwen3_tool_parser, parsed_args = json.loads(full_args) assert parsed_args["city"] == "Dallas" assert parsed_args["state"] == "TX" + + +def test_extract_tool_calls_complex_type_with_single_quote( + qwen3_tool_parser_parametrized): + """Test parameter type conversion based on tool schema""" + tools = [ + ChatCompletionToolsParam(type="function", + function={ + "name": "test_types", + "parameters": { + "type": "object", + "properties": { + "int_param": { + "type": "integer" + }, + "float_param": { + "type": "float" + }, + "bool_param": { + "type": "boolean" + }, + "str_param": { + "type": "string" + }, + "obj_param": { + "type": "object" + } + } + } + }) + ] + + model_output = ''' + + +{'key': 'value'} + + +''' + + request = ChatCompletionRequest(model=MODEL, messages=[], tools=tools) + extracted_tool_calls = qwen3_tool_parser_parametrized.extract_tool_calls( + model_output, request=request) + + args = json.loads(extracted_tool_calls.tool_calls[0].function.arguments) + assert args["obj_param"] == {"key": "value"} diff --git a/vllm/entrypoints/openai/tool_parsers/__init__.py b/vllm/entrypoints/openai/tool_parsers/__init__.py index 35096b046136..5e77c406b8d9 100644 --- a/vllm/entrypoints/openai/tool_parsers/__init__.py +++ b/vllm/entrypoints/openai/tool_parsers/__init__.py @@ -20,6 +20,7 @@ from .phi4mini_tool_parser import Phi4MiniJsonToolParser from .pythonic_tool_parser import PythonicToolParser from .qwen3coder_tool_parser import Qwen3CoderToolParser +from .qwen3xml_tool_parser import Qwen3XMLToolParser from .seed_oss_tool_parser import SeedOssToolParser from .step3_tool_parser import Step3ToolParser from .xlam_tool_parser import xLAMToolParser @@ -45,6 +46,7 @@ "HunyuanA13BToolParser", "Glm4MoeModelToolParser", "Qwen3CoderToolParser", + "Qwen3XMLToolParser", "SeedOssToolParser", "Step3ToolParser", "OpenAIToolParser", diff --git a/vllm/entrypoints/openai/tool_parsers/qwen3xml_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/qwen3xml_tool_parser.py new file mode 100644 index 000000000000..4ab67dfea104 --- /dev/null +++ b/vllm/entrypoints/openai/tool_parsers/qwen3xml_tool_parser.py @@ -0,0 +1,1137 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project +import ast +import json +import uuid +from collections.abc import Sequence +from typing import Any, Optional, Union +from xml.parsers.expat import ParserCreate + +import regex as re + +from vllm.entrypoints.openai.protocol import (ChatCompletionRequest, + ChatCompletionToolsParam, + DeltaFunctionCall, DeltaMessage, + DeltaToolCall, + ExtractedToolCallInformation, + FunctionCall, ToolCall) +from vllm.entrypoints.openai.tool_parsers.abstract_tool_parser import ( + ToolParser, ToolParserManager) +from vllm.logger import init_logger +from vllm.transformers_utils.tokenizer import AnyTokenizer + +logger = init_logger(__name__) + + +class StreamingXMLToolCallParser: + """ + Simplified streaming XML tool call parser + Supports streaming input, parsing, and output + """ + + def __init__(self): + self.reset_streaming_state() + + # Tool configuration information + self.tools: Union[list[ChatCompletionToolsParam], None] = None + self.tool_call_start_token: str = '' + self.tool_call_end_token: str = '' + self.function_start_token: str = ' DeltaMessage: + """ + Parse single streaming XML chunk and return Delta response + This is the actual streaming interface that receives chunks + one by one and maintains internal state + + Args: + xml_chunk: Single XML chunk string + Returns: + DeltaMessage: Contains delta information generated by this chunk, + returns empty response if no complete elements + """ + # Record delta count before processing + initial_delta_count = len(self.deltas) + + self.streaming_buffer += xml_chunk + + found_elements = self._process_complete_xml_elements() + + if found_elements: + # If complete elements found, check if end events were missed + # some tags may not have been triggered + try: + new_deltas = self.deltas[initial_delta_count:] + # If this chunk contains + # but didn't generate '}', then complete it + if (self.current_call_id is not None + and self.function_end_token in xml_chunk): + + # - Added '}' (non-empty parameter ending) + # - Added '{}' (empty parameter function) + has_function_close = any((td.tool_calls and any( + (tc.function and tc.id == self.current_call_id + and isinstance(tc.function.arguments, str) and + (tc.function.arguments in ('}', '{}'))) + for tc in td.tool_calls)) for td in new_deltas) + if not has_function_close: + # Close potentially unclosed element + if self.current_param_name: + self._end_element('parameter') + if self.current_function_name: + self._end_element('function') + # If this chunk contains + # but didn't generate final empty delta, then complete it + if (self.current_call_id is not None + and self.tool_call_end_token in xml_chunk): + has_toolcall_close = any((td.tool_calls and any( + (tc.type == 'function' and tc.function and tc.function. + arguments == '' and tc.id == self.current_call_id) + for tc in td.tool_calls)) for td in new_deltas) + if not has_toolcall_close: + # Close potentially unclosed element + if self.current_param_name: + self._end_element('parameter') + if self.current_function_name: + self._end_element('function') + self._end_element('tool_call') + except Exception as e: + logger.warning("Error with fallback parsing: %s", e) + # Merge newly generated deltas into single response + result_delta = self._merge_new_deltas_to_single_response( + initial_delta_count) + return result_delta + else: + # No complete elements, check if there's unoutput text content + if self.text_content_buffer and self.tool_call_index == 0: + # Has text content but no tool_call yet, output text content + text_delta = DeltaMessage(content=self.text_content_buffer) + self._emit_delta(text_delta) + # Clear buffer to avoid duplicate output + self.text_content_buffer = '' + return text_delta + + # If this chunk contains end tags but wasn't triggered by parser, + # manually complete end events + # Only execute when still on the same call as when entered, + # to prevent accidentally closing new calls + # in multi scenarios + if (self.current_call_id is not None + and (self.function_end_token in xml_chunk + or self.tool_call_end_token in xml_chunk)): + # Close potentially unclosed element + if self.current_param_name: + self._end_element('parameter') + if self.function_end_token in xml_chunk and \ + self.current_function_name: + self._end_element('function') + if self.tool_call_end_token in xml_chunk: + self._end_element('tool_call') + # Return the merged delta result generated by this fallback + result_delta = self._merge_new_deltas_to_single_response( + initial_delta_count) + return result_delta + + # No complete elements, return empty response + return DeltaMessage(content=None) + + def _escape_xml_special_chars(self, text: str) -> str: + """ + Escape XML special characters + Args: + text: Original text + Returns: + Escaped text + """ + xml_escapes = { + '&': '&', + '<': '<', + '>': '>', + '"': '"', + "'": ''' + } + + for char, escape in xml_escapes.items(): + text = text.replace(char, escape) + + return text + + def _process_complete_xml_elements(self) -> bool: + """ + Process complete XML elements in buffer + + Returns: + bool: Whether complete elements were found and processed + """ + found_any = False + + while self.last_processed_pos < len(self.streaming_buffer): + # Find next complete xml element + element, end_pos = self._find_next_complete_element( + self.last_processed_pos) + if element is None: + # No complete element found, wait for more data + break + + # Check if this element should be skipped + if self._should_skip_element(element): + self.last_processed_pos = end_pos + continue + + # Found complete XML element, process it + try: + preprocessed_element = self._preprocess_xml_chunk(element) + # Check if this is the first tool_call start + if ((preprocessed_element.strip().startswith('') or + preprocessed_element.strip().startswith('') + and self.tool_call_index > 0 and self.current_call_id): + # Reset parser state but preserve generated deltas + if self.current_param_name: + self._end_element('parameter') + if self.current_function_open or self.current_function_name: + self._end_element('function') + # Output final tool_call tail delta + final_delta = DeltaMessage( + role=None, + content=None, + reasoning_content=None, + tool_calls=[ + DeltaToolCall(index=self.tool_call_index - 1, + id=self.current_call_id, + type='function', + function=DeltaFunctionCall( + name=None, arguments='')) + ]) + self._emit_delta(final_delta) + # Reset XML parser and current call state + self._reset_xml_parser_after_tool_call() + # Parse preprocessed element + self.parser.Parse(preprocessed_element, False) + found_any = True + + except Exception as e: + logger.warning("Error when parsing XML elements: %s", e) + + # Update processed position + self.last_processed_pos = end_pos + + return found_any + + def _should_skip_element(self, element: str) -> bool: + """ + Determine whether an element should be skipped + + Args: + element: Element to evaluate + + Returns: + bool: True means should skip, False means should process + """ + + # If it's a tool_call XML tag, don't skip + if element.startswith( + self.tool_call_start_token) or element.startswith( + self.function_start_token) or element.startswith( + self.parameter_start_token): + return False + + # If currently not parsing tool calls and not blank, + # collect this text instead of skipping + # Only process other XML elements after tool_call appears, + # otherwise treat as plain text + if self.current_call_id is None and element: + # Collect text content to buffer + self.text_content_buffer += element + return True # Still skip, but content has been collected + + # If currently parsing tool calls, + # this might be parameter value, don't skip + if self.current_call_id is not None: + return False + + # Skip blank content + return not element + + def _find_next_complete_element( + self, start_pos: int) -> tuple[Optional[str], int]: + """ + Find next complete XML element from specified position + + Args: + start_pos: Position to start searching + + Returns: + (Complete element string, element end position), + returns (None, start_pos) if no complete element found + """ + buffer = self.streaming_buffer[start_pos:] + + if not buffer: + return None, start_pos + + if buffer.startswith('<'): + # Need to ensure no new < appears, + # find the nearest one between < and > + tag_end = buffer.find('<', 1) + tag_end2 = buffer.find('>', 1) + if tag_end != -1 and tag_end2 != -1: + # Next nearest is < + if tag_end < tag_end2: + return buffer[:tag_end], start_pos + tag_end + # Next nearest is >, means found XML element + else: + return buffer[:tag_end2 + 1], start_pos + tag_end2 + 1 + elif tag_end != -1: + return buffer[:tag_end], start_pos + tag_end + elif tag_end2 != -1: + return buffer[:tag_end2 + 1], start_pos + tag_end2 + 1 + else: + # If currently not parsing tool calls (entering a tool_call), + # check if starts with + if self.current_call_id is None: + # Check if might be start of + if buffer == ''[:len(buffer)]: + # Might be start of , wait for more data + return None, start_pos + else: + # Not start of , treat as text + return buffer, start_pos + len(buffer) + else: + # When parsing tool calls, + # wait for more data to get complete tag + return None, start_pos + else: + # Find text content (until next < or buffer end) + next_tag_pos = buffer.find('<') + if next_tag_pos != -1: + # Found text content + text_content = buffer[:next_tag_pos] + return text_content, start_pos + next_tag_pos + else: + # Buffer end is all text, process + # (no longer wait for more data) + remaining = buffer + return remaining, start_pos + len(remaining) + + def _merge_new_deltas_to_single_response( + self, initial_count: int) -> DeltaMessage: + """ + Merge newly generated deltas from this processing + into a single DeltaMessage + + Args: + initial_count: Delta count before processing + + Returns: + Merged DeltaMessage containing all newly generated delta information + """ + if len(self.deltas) <= initial_count: + return DeltaMessage(content=None) + + # Get newly generated deltas + new_deltas = self.deltas[initial_count:] + + if len(new_deltas) == 1: + # Only one new delta, return directly + return new_deltas[0] + + # Merge multiple new deltas + merged_tool_calls: list[DeltaToolCall] = [] + merged_content: str = '' + + for delta in new_deltas: + if delta.content: + merged_content += delta.content + if delta.tool_calls: + # For tool_calls, we need to intelligently merge arguments + for tool_call in delta.tool_calls: + # Find if there's already a tool_call with the same call_id + existing_call = None + for existing in merged_tool_calls: + if existing.id == tool_call.id: + existing_call = existing + break + + if existing_call and existing_call.function: + # Merge to existing tool_call + if tool_call.function and tool_call.function.name: + existing_call.function.name = \ + tool_call.function.name + if tool_call.function \ + and tool_call.function.arguments is not None: + if existing_call.function.arguments is None: + existing_call.function.arguments = '' + + # For streaming JSON parameters, + # simply concatenate in order + new_args = tool_call.function.arguments + existing_call.function.arguments += new_args + if tool_call.type: + existing_call.type = tool_call.type + else: + # Add new tool_call + merged_tool_calls.append(tool_call) + + return DeltaMessage(content=merged_content if merged_content else None, + tool_calls=merged_tool_calls) + + def _preprocess_xml_chunk(self, chunk: str) -> str: + """ + Preprocess XML chunk, handle non-standard formats, + and escape special characters + + Args: + chunk: Original XML chunk + + Returns: + Processed XML chunk + """ + + # Check if this is a tool_call related element + is_tool_call = False + if chunk.startswith(self.tool_call_start_token) or chunk.startswith( + self.tool_call_end_token): + is_tool_call = True + if chunk.startswith(self.function_start_token) or chunk.startswith( + self.function_end_token): + is_tool_call = True + if chunk.startswith(self.parameter_start_token) or chunk.startswith( + self.parameter_end_token): + is_tool_call = True + # Handle format -> + processed = re.sub(r']+)>', r'', + chunk) + # Handle format -> + processed = re.sub(r']+)>', r'', + processed) + + original_chunk = chunk + # If in parameter value accumulation mode + if self._pre_inside_parameter: + # Parameter end: output accumulated raw text + # safely then return + if processed.startswith(''): + body_text = self._pre_param_buffer + # Trigger deferred parsing mode + # literal_eval+json output in end_element + self.defer_current_parameter = True + self.deferred_param_raw_value = body_text + # Clean up state + self._pre_inside_parameter = False + self._pre_param_buffer = "" + self._pre_current_param_name = None + safe_text = self._escape_xml_special_chars(body_text) + return f"{safe_text}" + else: + # If this is the first block of content after entering parameter + # evaluate if deferred parsing is needed; + # If not needed, exit accumulation mode + # and pass through directly + if self._pre_param_buffer == "": + # Get current parameter type + param_type = self._get_param_type( + self._pre_current_param_name + ) if self._pre_current_param_name else 'string' + # Only these types need deferred parsing to + # handle Python literals containing single quotes + is_object_type = param_type in ["object"] + is_complex_type = (param_type + in ["array", "arr", "sequence"] + or param_type.startswith("dict") + or param_type.startswith("list")) + + # Only delay when contains container symbols + # and has single quotes and is complex type + has_container_hint = ('[' in original_chunk) or ( + '{' in original_chunk) or ('(' in original_chunk) + + # Determine if deferred parsing is needed + need_defer = False + if is_complex_type: + # Complex type, always need deferred parsing + need_defer = True + elif is_object_type and has_container_hint and ( + "'" in original_chunk): + # Object type with container symbols + # and single quotes, need deferred parsing + need_defer = True + + if not need_defer: + # No need for deferred parsing, + # exit parameter mode directly + self._pre_inside_parameter = False + return self._escape_xml_special_chars(original_chunk) + self._pre_param_buffer += original_chunk + return "" + + # Parameter start: enable accumulation + if processed.startswith('', processed) + if m: + self._pre_current_param_name = m.group(1) + self._pre_inside_parameter = True + self._pre_param_buffer = "" + return processed + + # If processed doesn't contain special_token, escape processed + # This is because XML parsing encounters special characters + # and reports errors, so escaping is needed + if not is_tool_call: + processed = self._escape_xml_special_chars(processed) + return processed + + def _emit_delta(self, delta: DeltaMessage): + """Emit Delta response (streaming output)""" + self.deltas.append(delta) + + def _auto_close_open_parameter_if_needed(self, + incoming_tag: Optional[str] = None + ): + """Before starting to process new elements, + if there are unclosed tags from before, + automatically complete their endings to the parser. + - If there are unclosed parameters, + it's equivalent to feeding `` + - When about to start a new function or tool_call, + if there are unclosed functions, complete ``. + - When about to start a new tool_call, + if there are unclosed tool_calls, complete ``. + """ + # First close unclosed parameters + if self.current_param_name: + self._end_element('parameter') + + # If about to start new function or tool_call, + # and there are unclosed functions, close function first + if incoming_tag in ('function', + 'tool_call') and self.current_function_name: + self._end_element('function') + + # If about to start new tool_call, + # and there are unclosed tool_calls, close tool_call first + if incoming_tag == 'tool_call' and self.current_call_id: + self._end_element('tool_call') + + def _start_element(self, name: str, attrs: dict[str, str]): + """Handle XML start element events""" + + if name == 'root': + return + + if name == 'tool_call': + # Before opening new tool_call, + # automatically complete previous unclosed tags + self._auto_close_open_parameter_if_needed('tool_call') + + self.parameters = {} + self.current_call_id = self._get_next_call_id() + self.current_param_is_first = True + self.tool_call_index += 1 + elif name.startswith('function') or (name == 'function'): + # If missing tool_call, manually complete + if not self.current_call_id: + self._start_element('tool_call', {}) + # Before opening new function, + # automatically complete previous unclosed tags (parameter/function) + self._auto_close_open_parameter_if_needed('function') + function_name = self._extract_function_name(name, attrs) + self.current_function_name = function_name + self.current_function_open = True + if function_name: + delta = DeltaMessage(tool_calls=[ + DeltaToolCall(index=self.tool_call_index - 1, + id=self.current_call_id, + type='function', + function=DeltaFunctionCall( + name=function_name, arguments='')) + ]) + self._emit_delta(delta) + elif name.startswith('parameter') or (name == 'parameter'): + # If previous parameter hasn't ended normally, + # complete its end first, then start new parameter + self._auto_close_open_parameter_if_needed('parameter') + param_name = self._extract_parameter_name(name, attrs) + self.current_param_name = param_name + self.current_param_value = '' + self.current_param_value_converted = '' + self.start_quote_emitted = False # Reset start quote flag + + # Only output parameter name and colon, + # don't output quotes + # decide after parameter value type is determined + if param_name: + if not self.parameters: + # First parameter + # start JSON, only output parameter name and colon + json_start = f'{{"{param_name}": ' + delta = DeltaMessage(tool_calls=[ + DeltaToolCall(index=self.tool_call_index - 1, + id=self.current_call_id, + type='function', + function=DeltaFunctionCall( + name=None, arguments=json_start)) + ]) + self._emit_delta(delta) + self.current_param_is_first = True + else: + # Subsequent parameters + # add comma and parameter name, no quotes + json_continue = f', "{param_name}": ' + delta = DeltaMessage(tool_calls=[ + DeltaToolCall(index=self.tool_call_index - 1, + id=self.current_call_id, + type='function', + function=DeltaFunctionCall( + name=None, arguments=json_continue)) + ]) + self._emit_delta(delta) + self.current_param_is_first = False + + def _char_data(self, data: str): + """Handle XML character data events""" + if data and self.current_param_name: + # If preprocessing stage determines deferred parsing is needed, + # only cache character data, no streaming output + if self.defer_current_parameter: + original_data = data + if self.should_emit_end_newline: + original_data = '\n' + original_data + self.should_emit_end_newline = False + if original_data.endswith('\n'): + self.should_emit_end_newline = True + original_data = original_data[:-1] + self.current_param_value += original_data + return + + param_type = self._get_param_type(self.current_param_name) + + # Check if this is the first time receiving data for this parameter + # If this is the first packet of data and starts with \n, remove \n + if not self.current_param_value and data.startswith('\n'): + data = data[1:] + + # Output start quote for string type (if not already output) + if (param_type + in ['string', 'str', 'text', 'varchar', 'char', 'enum'] + and not self.start_quote_emitted): + quote_delta = DeltaMessage(tool_calls=[ + DeltaToolCall(index=self.tool_call_index - 1, + id=self.current_call_id, + type='function', + function=DeltaFunctionCall(name=None, + arguments='"')) + ]) + self._emit_delta(quote_delta) + self.start_quote_emitted = True + + if not data: + return + + original_data = data + # Delay output of trailing newline + if self.should_emit_end_newline: + original_data = '\n' + original_data + self.should_emit_end_newline = False + if original_data.endswith('\n'): + self.should_emit_end_newline = True + original_data = original_data[:-1] + self.current_param_value += original_data + + # convert parameter value by param_type + converted_value = self._convert_param_value( + self.current_param_value, param_type) + output_data = self._convert_for_json_streaming( + converted_value, param_type) + + delta_data = output_data[len(self.current_param_value_converted):] + self.current_param_value_converted = output_data + + delta = DeltaMessage(tool_calls=[ + DeltaToolCall(index=self.tool_call_index - 1, + id=self.current_call_id, + type='function', + function=DeltaFunctionCall(name=None, + arguments=delta_data)) + ]) + self._emit_delta(delta) + + def _end_element(self, name: str): + """Handle XML end element events""" + + if name == 'root': + return + + # If function or tool_call ends and there are still unclosed parameters, + # complete parameter end first + if (name.startswith('function') or name == 'function' + or name == 'tool_call') and self.current_param_name: + self._auto_close_open_parameter_if_needed() + + if (name.startswith('parameter') + or name == 'parameter') and self.current_param_name: + # End current parameter + param_name = self.current_param_name + param_value = self.current_param_value + + # If in deferred parsing mode, + # perform overall parsing on raw content + # accumulated in preprocessing stage and output once + if self.defer_current_parameter: + raw_text = self.deferred_param_raw_value \ + if self.deferred_param_raw_value else param_value + parsed_value = None + output_arguments = None + try: + # If previously delayed trailing newline, + # add it back before parsing + if self.should_emit_end_newline: + raw_for_parse = raw_text + '\n' + else: + raw_for_parse = raw_text + parsed_value = ast.literal_eval(raw_for_parse) + output_arguments = json.dumps(parsed_value, + ensure_ascii=False) + except Exception: + # Fallback: output as string as-is + output_arguments = json.dumps(raw_text, ensure_ascii=False) + parsed_value = raw_text + + delta = DeltaMessage(tool_calls=[ + DeltaToolCall(index=self.tool_call_index - 1, + id=self.current_call_id, + type='function', + function=DeltaFunctionCall( + name=None, arguments=output_arguments)) + ]) + self._emit_delta(delta) + + # Clean up and store + self.should_emit_end_newline = False + self.parameters[param_name] = parsed_value + self.current_param_name = None + self.current_param_value = "" + self.current_param_value_converted = "" + self.start_quote_emitted = False + self.defer_current_parameter = False + self.deferred_param_raw_value = "" + return + + param_type = self._get_param_type(param_name) + + # convert complete parameter value by param_type + converted_value = self._convert_param_value( + param_value, param_type) + + # Decide whether to add end quote based on parameter type + if param_type in [ + 'string', 'str', 'text', 'varchar', 'char', 'enum' + ]: + # For empty string parameters, need special handling + if not param_value and not self.start_quote_emitted: + # No start quote output, + # directly output complete empty string + delta = DeltaMessage(tool_calls=[ + DeltaToolCall(index=self.tool_call_index - 1, + id=self.current_call_id, + type='function', + function=DeltaFunctionCall( + name=None, arguments='""')) + ]) + self._emit_delta(delta) + else: + # Non-empty parameter value, output end quote + delta = DeltaMessage(tool_calls=[ + DeltaToolCall(index=self.tool_call_index - 1, + id=self.current_call_id, + type='function', + function=DeltaFunctionCall( + name=None, arguments='"')) + ]) + self._emit_delta(delta) + + self.should_emit_end_newline = False + # Store converted value + self.parameters[param_name] = converted_value + self.current_param_name = None + self.current_param_value = '' + self.current_param_value_converted = '' + self.start_quote_emitted = False + + elif name.startswith('function') or name == 'function': + # if there are parameters, close JSON object + if self.parameters: + delta = DeltaMessage(tool_calls=[ + DeltaToolCall(index=self.tool_call_index - 1, + id=self.current_call_id, + type='function', + function=DeltaFunctionCall(name=None, + arguments='}')) + ]) + self._emit_delta(delta) + # return empty object + else: + delta = DeltaMessage(tool_calls=[ + DeltaToolCall(index=self.tool_call_index - 1, + id=self.current_call_id, + type='function', + function=DeltaFunctionCall(name=None, + arguments='{}')) + ]) + self._emit_delta(delta) + self.current_function_open = False + + elif name == 'tool_call': + # Before ending tool_call, + # ensure function is closed to complete missing right brace + if self.current_function_open: + # If there are still unclosed parameters, close them first + if self.current_param_name: + self._end_element('parameter') + # Close function, ensure output '}' or '{}' + self._end_element('function') + # Final Delta + delta = DeltaMessage(tool_calls=[ + DeltaToolCall(index=self.tool_call_index - 1, + id=self.current_call_id, + type='function', + function=DeltaFunctionCall(name=None, + arguments='')) + ]) + self._emit_delta(delta) + + # Check if there's text content to output (between tool_calls) + if self.text_content_buffer.strip(): + text_delta = DeltaMessage(content=self.text_content_buffer) + self._emit_delta(text_delta) + + self._reset_xml_parser_after_tool_call() + + def setup_parser(self): + """Set up XML parser event handlers""" + self.parser.buffer_text = True + self.parser.StartElementHandler = self._start_element + self.parser.EndElementHandler = self._end_element + self.parser.CharacterDataHandler = self._char_data + + def set_tools(self, tools: Union[list[ChatCompletionToolsParam], None]): + """Set tool configuration information""" + self.tools = tools + + def _get_next_call_id(self): + """Generate unique call ID""" + return f'call_{uuid.uuid4().hex[:24]}' + + def _extract_function_name(self, name: str, + attrs: dict[str, str]) -> Optional[str]: + """Extract function name from various formats""" + if attrs and 'name' in attrs: + return attrs['name'] + + if '=' in name: + parts = name.split('=', 1) + if len(parts) == 2 and parts[0] == 'function': + return parts[1] + + return None + + def _extract_parameter_name(self, name: str, + attrs: dict[str, str]) -> Optional[str]: + """Extract parameter name from various formats""" + if attrs and 'name' in attrs: + return attrs['name'] + + if '=' in name: + parts = name.split('=', 1) + if len(parts) == 2 and parts[0] == 'parameter': + return parts[1] + + return None + + def _get_param_type(self, param_name: str) -> str: + """Get parameter type based on tool configuration, defaults to string + Args: + param_name: Parameter name + + Returns: + Parameter type + """ + if not self.tools or not self.current_function_name: + return 'string' + + for tool in self.tools: + if not hasattr(tool, 'type') or not (hasattr( + tool, 'function') and hasattr(tool.function, 'name')): + continue + if tool.type == 'function' and \ + tool.function.name == self.current_function_name: + if not hasattr(tool.function, 'parameters'): + return 'string' + params = tool.function.parameters + if isinstance(params, dict) and 'properties' in params: + properties = params['properties'] + if param_name in properties and isinstance( + properties[param_name], dict): + return self.repair_param_type( + str(properties[param_name].get('type', 'string'))) + elif isinstance(params, dict) and param_name in params: + param_config = params[param_name] + if isinstance(param_config, dict): + return self.repair_param_type( + str(param_config.get('type', 'string'))) + break + return 'string' + + def repair_param_type(self, param_type: str) -> str: + """Repair unknown parameter types by treating them as string + Args: + param_type: Parameter type + + Returns: + Repaired parameter type + """ + if param_type in [ + 'string', 'str', 'text', 'varchar', 'char', 'enum' + ] or param_type.startswith('int') or param_type.startswith( + 'uint' + ) or param_type.startswith('long') or param_type.startswith( + 'short' + ) or param_type.startswith('unsigned') or param_type.startswith( + 'num') or param_type.startswith('float') or param_type in [ + 'boolean', 'bool', 'binary' + ] or (param_type in ["object", "array", "arr", "sequence"] + or param_type.startswith("dict") + or param_type.startswith("list")): + return param_type + else: + return 'string' + + def _convert_param_value(self, param_value: str, param_type: str) -> Any: + """Convert value based on parameter type + Args: + param_value: Parameter value + param_type: Parameter type + + Returns: + Converted value + """ + if param_value.lower() == 'null': + return None + + param_type = param_type.strip().lower() + if param_type in ['string', 'str', 'text', 'varchar', 'char', 'enum']: + return param_value + elif (param_type.startswith('int') or param_type.startswith('uint') + or param_type.startswith('long') + or param_type.startswith('short') + or param_type.startswith('unsigned')): + try: + return int(param_value) + except (ValueError, TypeError): + logger.warning( + "Parsed value '%s' of parameter '%s' is not an integer " + "in tool '%s', degenerating to string.", param_value) + return param_value + elif param_type.startswith('num') or param_type.startswith('float'): + try: + float_param_value: float = float(param_value) + return float_param_value if float_param_value - int( + float_param_value) != 0 else int(float_param_value) + except (ValueError, TypeError): + logger.warning( + "Parsed value '%s' of parameter '%s' is not a float " + "in tool '%s', degenerating to string.", param_value) + return param_value + elif param_type in ['boolean', 'bool', 'binary']: + param_value = param_value.lower() + return param_value == 'true' + else: + return param_value + + def _convert_for_json_streaming(self, converted_value: Any, + param_type: str) -> str: + """Convert converted_value based on + whether it's empty and if type is string + Args: + converted_value: Converted value + param_type: Parameter type + + Returns: + Converted string for streaming output + """ + # Check if value is empty, but exclude numeric 0 + if converted_value is None or converted_value == '': + return '' + + if param_type in ['string', 'str', 'text', 'varchar', 'char', 'enum']: + # String type, remove double quotes + return json.dumps(converted_value, ensure_ascii=False)[1:-1] + else: + # Non-string type, return complete JSON string + if not isinstance(converted_value, str): + return json.dumps(converted_value, ensure_ascii=False) + else: + return converted_value + + def _reset_xml_parser_after_tool_call(self): + """ + Each tool_call is treated as a separate XML document, + so we need to reset the parser after each tool_call. + """ + + # recreate XML parser + self.parser = ParserCreate() + self.setup_parser() + + # Reset current tool_call state + if self.current_call_id: + self.last_completed_call_id = self.current_call_id + self.current_call_id = None + self.current_function_name = None + self.current_function_open = False + self.parameters = {} + self.current_param_name = None + self.current_param_value = '' + self.current_param_value_converted = '' + self.current_param_is_first = False + self.should_emit_end_newline = False + self.start_quote_emitted = False + self.text_content_buffer = '' + + # Reset preprocessing and deferred parsing state + self._pre_inside_parameter = False + self._pre_param_buffer = "" + self._pre_current_param_name = None + self.defer_current_parameter = False + self.deferred_param_raw_value = "" + + +@ToolParserManager.register_module("qwen3_xml") +class Qwen3XMLToolParser(ToolParser): + + def __init__(self, tokenizer: AnyTokenizer): + super().__init__(tokenizer) + self.parser = StreamingXMLToolCallParser() + + logger.info("vLLM Successfully import tool parser %s !", + self.__class__.__name__) + + def extract_tool_calls( + self, + model_output: str, + request: ChatCompletionRequest, + ) -> ExtractedToolCallInformation: + self.parser.reset_streaming_state() + if request: + self.parser.set_tools(request.tools) + result = self.parser.parse_single_streaming_chunks(model_output) + if not result.tool_calls: + return ExtractedToolCallInformation( + tool_calls=[], + tools_called=False, + content=result.content, + ) + else: + tool_calls = [] + for tool_call in result.tool_calls: + if tool_call.function and tool_call.function.name: + tool_calls.append( + ToolCall( + id=tool_call.id, + type=tool_call.type, + function=FunctionCall( + name=tool_call.function.name, + arguments=tool_call.function.arguments, + ), + )) + return ExtractedToolCallInformation( + tool_calls=tool_calls, + tools_called=len(tool_calls) > 0, + content=result.content, + ) + + def extract_tool_calls_streaming( + self, + previous_text: str, + current_text: str, + delta_text: str, + previous_token_ids: Sequence[int], + current_token_ids: Sequence[int], + delta_token_ids: Sequence[int], + request: ChatCompletionRequest, + ) -> Union[DeltaMessage, None]: + if not previous_text: + self.parser.reset_streaming_state() + if request: + self.parser.set_tools(request.tools) + + # Model sometimes outputs separately causing delta_text to be empty. + # If there were tool_calls before and all current tool_calls have ended, + # return an empty tool_call for outer streaming output + # to correctly output tool_call field + if not delta_text and delta_token_ids: + open_calls = current_text.count( + self.parser.tool_call_start_token) - current_text.count( + self.parser.tool_call_end_token) + if open_calls == 0 and self.parser.tool_call_index > 0: + # If current_call_id is None, use last_completed_call_id + call_id = self.parser.current_call_id or \ + self.parser.last_completed_call_id + return DeltaMessage(tool_calls=[ + DeltaToolCall( + index=self.parser.tool_call_index - 1, + id=call_id, + function=DeltaFunctionCall(arguments=''), + type='function', + ) + ]) + + return self.parser.parse_single_streaming_chunks(delta_text) From babad6e5dde380b453be716c7ebc2b6928658c18 Mon Sep 17 00:00:00 2001 From: Cyrus Leung Date: Tue, 23 Sep 2025 17:20:52 +0800 Subject: [PATCH 263/518] [Misc] Move DP for ViT code inside model executor dir (#25459) Signed-off-by: DarkLight1337 --- tests/models/test_vision.py | 424 ++++++++++++++++- tests/multimodal/test_utils.py | 426 +----------------- vllm/model_executor/models/glm4_1v.py | 3 +- .../models/idefics2_vision_model.py | 3 +- vllm/model_executor/models/intern_vit.py | 3 +- vllm/model_executor/models/kimi_vl.py | 2 +- vllm/model_executor/models/mllama4.py | 2 +- vllm/model_executor/models/qwen2_5_vl.py | 3 +- vllm/model_executor/models/qwen2_vl.py | 3 +- vllm/model_executor/models/qwen3_vl.py | 6 +- vllm/model_executor/models/step3_vl.py | 2 +- vllm/model_executor/models/vision.py | 281 +++++++++++- vllm/multimodal/utils.py | 293 +----------- 13 files changed, 721 insertions(+), 730 deletions(-) diff --git a/tests/models/test_vision.py b/tests/models/test_vision.py index 310d3a3719b6..8744bcbd3a2a 100644 --- a/tests/models/test_vision.py +++ b/tests/models/test_vision.py @@ -1,10 +1,20 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project +import math import pytest import torch +import torch.multiprocessing as mp -from vllm.model_executor.models.vision import resolve_visual_encoder_outputs +from tests.utils import multi_gpu_test +from vllm.distributed import get_tensor_model_parallel_world_size +from vllm.distributed.parallel_state import (init_distributed_environment, + initialize_model_parallel) +from vllm.model_executor.models.vision import ( + get_load_balance_assignment, resolve_visual_encoder_outputs, + run_dp_sharded_mrope_vision_model, run_dp_sharded_vision_model) +from vllm.platforms import current_platform +from vllm.utils import get_open_port, update_environment_variables @pytest.mark.parametrize( @@ -33,3 +43,415 @@ def test_resolve_visual_encoder_outputs(feature_sample_layers, post_layer_norm=None, max_possible_layers=max_possible_layers) assert torch.equal(torch.tensor(expected_features), output_tensor) + + +class SimpleLinearModel(torch.nn.Module): + """A simple linear vision model for testing.""" + + def __init__(self, input_dim: int = 3 * 224 * 224, output_dim: int = 32): + super().__init__() + self.flatten = torch.nn.Flatten() + self.linear = torch.nn.Linear(input_dim, output_dim) + + def forward(self, x: torch.Tensor): + # Flatten the input and apply linear transformation + x = self.flatten(x) + return self.linear(x) + + +@multi_gpu_test(num_gpus=2) +@pytest.mark.parametrize( + "batch_size", + [ + 1, # Single image + 4, # Small batch + 5, # Odd batch size (for testing padding) + ], +) +def test_run_dp_sharded_vision_model(batch_size: int): + world_size = 2 + # Launch processes + mp.spawn( + run_dp_sharded_vision_model_vs_direct, + args=( + world_size, + batch_size, + get_open_port(), + ), + nprocs=world_size, + ) + + +def run_dp_sharded_vision_model_vs_direct(local_rank: int, world_size: int, + batch_size: int, master_port: int): + """ + Test that run_dp_sharded_vision_model produces the same results as + calling the model directly. + """ + + # Set random seed for reproducibility + current_platform.seed_everything(0) + + device = f"{current_platform.device_name}:{local_rank}" + current_platform.set_device(device) + torch.set_default_device(device) + + update_environment_variables({ + 'RANK': str(local_rank), + 'LOCAL_RANK': str(local_rank), + 'WORLD_SIZE': str(world_size), + 'MASTER_ADDR': 'localhost', + 'MASTER_PORT': str(master_port), + }) + + # initialize distributed + init_distributed_environment() + initialize_model_parallel(tensor_model_parallel_size=world_size) + + # Create a test input tensor + image_input = torch.randn(batch_size, 3, 224, 224) + + # Create a simple linear model + vision_model = SimpleLinearModel() + + # Run the model directly on the full input + with torch.inference_mode(): + direct_output = vision_model(image_input) + + # Run the model through the sharded function + with torch.inference_mode(): + sharded_output = run_dp_sharded_vision_model(image_input, vision_model) + + # Check that the world size is set up correctly + assert get_tensor_model_parallel_world_size() == world_size + + # Check that the outputs have the same shape + assert direct_output.shape == sharded_output.shape + + # Check that the outputs are close (they should be identical) + assert torch.allclose(direct_output, sharded_output, rtol=1e-5, atol=1e-5) + + +@pytest.mark.parametrize( + "sizes,num_gpus,expected_shuffle_indices,expected_gpu_sample_counts," + "expected_grouped_sizes_per_gpu,test_description", + [ + # Empty input + ([], 2, [], [0, 0], [0, 0], "empty input"), + + # Fewer samples than GPUs + ([100, 200], 4, [1, 0], [1, 1, 0, 0], [200, 100, 0, 0 + ], "fewer samples than GPUs"), + + # Single GPU + ([100, 200, 300], 1, [2, 1, 0], [3], [600], "single GPU"), + + # Balanced assignment + ([100, 100, 100, 100 + ], 2, [0, 2, 1, 3], [2, 2], [200, 200], "balanced assignment"), + + # Unbalanced sizes - this one is trickier since the algorithm is greedy + ([1000, 100, 200, 50], 2, [0, 2, 1, 3 + ], [1, 3], [1000, 350], "unbalanced sizes"), + ], +) +def test_get_load_balance_assignment_cases(sizes, num_gpus, + expected_shuffle_indices, + expected_gpu_sample_counts, + expected_grouped_sizes_per_gpu, + test_description): + """Test get_load_balance_assignment with various input cases.""" + result = get_load_balance_assignment(sizes, num_gpus=num_gpus) + (shuffle_indices, gpu_sample_counts, grouped_sizes_per_gpu) = result + + # Common assertions for all cases + assert len(shuffle_indices) == len(sizes) + assert len(gpu_sample_counts) == num_gpus + assert len(grouped_sizes_per_gpu) == num_gpus + assert sum(gpu_sample_counts) == len(sizes) + + assert shuffle_indices == expected_shuffle_indices + + assert gpu_sample_counts == expected_gpu_sample_counts + assert grouped_sizes_per_gpu == expected_grouped_sizes_per_gpu + + +class SimpleMRopeVisionModel(torch.nn.Module): + """A simple vision model for testing mrope functionality.""" + + def __init__(self, spatial_merge_size: int = 2, out_hidden_size: int = 64): + super().__init__() + self.spatial_merge_size = spatial_merge_size + self.out_hidden_size = out_hidden_size + self.linear = torch.nn.Linear(768, out_hidden_size) + + def forward(self, pixel_values: torch.Tensor, + grid_thw_list: list[list[int]]): + """Simple forward pass that simulates spatial merging.""" + # Apply linear transformation + embeddings = self.linear(pixel_values) + + # Simulate spatial merging by reducing the number of patches + merge_factor = self.spatial_merge_size * self.spatial_merge_size + + # Group patches and merge spatially + merged_embeddings = [] + start_idx = 0 + + for grid_thw in grid_thw_list: + num_patches = math.prod(grid_thw) + end_idx = start_idx + num_patches + + # Get patches for this image + image_patches = embeddings[start_idx:end_idx] + + # Simulate spatial merging by averaging groups of patches + merged_patches = num_patches // merge_factor + if merged_patches > 0: + # Reshape and average to simulate merging + reshaped = image_patches[:merged_patches * merge_factor].view( + merged_patches, merge_factor, -1) + merged = reshaped.mean(dim=1) + merged_embeddings.append(merged) + + start_idx = end_idx + + if merged_embeddings: + return torch.cat(merged_embeddings, dim=0) + else: + return torch.empty((0, self.out_hidden_size), + device=pixel_values.device, + dtype=pixel_values.dtype) + + +@multi_gpu_test(num_gpus=2) +@pytest.mark.parametrize( + "batch_size", + [ + 1, # Single image + 3, # Small batch + 5, # Odd batch size (for testing padding) + ], +) +def test_run_dp_sharded_mrope_vision_model(batch_size: int): + world_size = 2 + # Launch processes + mp.spawn( + run_dp_sharded_mrope_vision_model_vs_direct, + args=( + world_size, + batch_size, + get_open_port(), + ), + nprocs=world_size, + ) + + +def run_dp_sharded_mrope_vision_model_vs_direct(local_rank: int, + world_size: int, + batch_size: int, + master_port: int): + """ + Test that run_dp_sharded_mrope_vision_model produces the same results as + calling the model directly. + """ + # Set random seed for reproducibility + current_platform.seed_everything(0) + device = f"{current_platform.device_name}:{local_rank}" + current_platform.set_device(device) + torch.set_default_device(device) + + update_environment_variables({ + 'RANK': str(local_rank), + 'LOCAL_RANK': str(local_rank), + 'WORLD_SIZE': str(world_size), + 'MASTER_ADDR': 'localhost', + 'MASTER_PORT': str(master_port), + }) + + # initialize distributed + init_distributed_environment() + initialize_model_parallel(tensor_model_parallel_size=world_size) + + # Create test data + grid_thw_list = [] + pixel_values_list = [] + + for i in range(batch_size): + # Varying image sizes for better testing + t, h, w = 1, 4 + i, 4 + i + grid_thw_list.append([t, h, w]) + + num_patches = t * h * w + # Create random pixel values for this image + image_pixels = torch.randn(num_patches, 768) + pixel_values_list.append(image_pixels) + + # Concatenate all pixel values + pixel_values = torch.cat(pixel_values_list, dim=0) + + # Create a simple mrope vision model + vision_model = SimpleMRopeVisionModel() + + # Run the model directly on the full input (only on rank 0) + if local_rank == 0: + with torch.inference_mode(): + direct_output = vision_model(pixel_values, grid_thw_list) + + # Run the model through the sharded function + with torch.inference_mode(): + sharded_output = run_dp_sharded_mrope_vision_model(vision_model, + pixel_values, + grid_thw_list, + rope_type="rope_3d") + sharded_output = torch.cat(sharded_output, dim=0) + + # Check that the world size is set up correctly + assert get_tensor_model_parallel_world_size() == world_size + + # Compare outputs (only on rank 0) + if local_rank == 0: + # Check that the outputs have the same shape + assert direct_output.shape == sharded_output.shape + # Check that the outputs are close (they should be identical) + assert torch.allclose(direct_output, + sharded_output, + rtol=1e-5, + atol=1e-5) + + +@multi_gpu_test(num_gpus=2) +def test_run_dp_sharded_mrope_vision_model_empty_input(): + world_size = 2 + mp.spawn( + run_dp_sharded_mrope_vision_model_empty_input_worker, + args=(world_size, get_open_port()), + nprocs=world_size, + ) + + +def run_dp_sharded_mrope_vision_model_empty_input_worker( + local_rank: int, world_size: int, master_port: int): + """Test run_dp_sharded_mrope_vision_model with empty input.""" + # Set up distributed environment + device = f"{current_platform.device_name}:{local_rank}" + current_platform.set_device(device) + torch.set_default_device(device) + + update_environment_variables({ + 'RANK': str(local_rank), + 'LOCAL_RANK': str(local_rank), + 'WORLD_SIZE': str(world_size), + 'MASTER_ADDR': 'localhost', + 'MASTER_PORT': str(master_port), + }) + + init_distributed_environment() + initialize_model_parallel(tensor_model_parallel_size=world_size) + + # Create empty inputs + pixel_values = torch.empty((0, 768)) + grid_thw_list: list[list[int]] = [] + + vision_model = SimpleMRopeVisionModel() + + # Should handle empty input gracefully + with torch.inference_mode(): + output = run_dp_sharded_mrope_vision_model(vision_model, + pixel_values, + grid_thw_list, + rope_type="rope_3d") + + assert len(output) == 0 + + +@multi_gpu_test(num_gpus=4) +def test_run_dp_sharded_mrope_vision_model_uneven_load(): + world_size = 4 + mp.spawn( + run_dp_sharded_mrope_vision_model_uneven_load_worker, + args=(world_size, get_open_port()), + nprocs=world_size, + ) + + +def run_dp_sharded_mrope_vision_model_uneven_load_worker( + local_rank: int, world_size: int, master_port: int): + """Test run_dp_sharded_mrope_vision_model with uneven load distribution.""" + # Set up distributed environment + current_platform.seed_everything(123) + device = f"{current_platform.device_name}:{local_rank}" + current_platform.set_device(device) + torch.set_default_device(device) + + update_environment_variables({ + 'RANK': str(local_rank), + 'LOCAL_RANK': str(local_rank), + 'WORLD_SIZE': str(world_size), + 'MASTER_ADDR': 'localhost', + 'MASTER_PORT': str(master_port), + }) + + init_distributed_environment() + initialize_model_parallel(tensor_model_parallel_size=world_size) + + # Create images with very different sizes + grid_thw_list = [ + [1, 2, 2], # Small: 4 patches + [1, 8, 8], # Large: 64 patches + [1, 3, 3], # Medium: 9 patches + ] + + pixel_values_list = [] + for grid_thw in grid_thw_list: + num_patches = math.prod(grid_thw) + image_pixels = torch.randn(num_patches, 768) + pixel_values_list.append(image_pixels) + + pixel_values = torch.cat(pixel_values_list, dim=0) + vision_model = SimpleMRopeVisionModel() + + # Should handle uneven distribution without errors + with torch.inference_mode(): + output_tuple = run_dp_sharded_mrope_vision_model(vision_model, + pixel_values, + grid_thw_list, + rope_type="rope_3d") + + # Verify output shape is reasonable + merge_factor = vision_model.spatial_merge_size**2 + expected_output_patches = list( + math.prod(grid_thw) // merge_factor for grid_thw in grid_thw_list) + + for i, output in enumerate(output_tuple): + assert output.shape[0] == expected_output_patches[i] + assert output.shape[1] == vision_model.out_hidden_size + + +@pytest.mark.parametrize("spatial_merge_size", [2, 4]) +def test_simple_mrope_vision_model_spatial_merge(spatial_merge_size: int): + """Test SimpleMRopeVisionModel with different spatial merge sizes.""" + device = current_platform.device_type + + grid_thw_list = [[1, 4, 4], [1, 6, 6]] # Two images + pixel_values_list = [] + + for grid_thw in grid_thw_list: + num_patches = math.prod(grid_thw) + image_pixels = torch.randn(num_patches, 768, device=device) + pixel_values_list.append(image_pixels) + + pixel_values = torch.cat(pixel_values_list, dim=0) + vision_model = SimpleMRopeVisionModel( + spatial_merge_size=spatial_merge_size).to(device) + + with torch.inference_mode(): + output = vision_model(pixel_values, grid_thw_list) + + # Verify output dimensions based on spatial merging + total_patches = sum(math.prod(grid_thw) for grid_thw in grid_thw_list) + merge_factor = spatial_merge_size**2 + expected_output_patches = total_patches // merge_factor + + assert output.shape[0] == expected_output_patches + assert output.shape[1] == vision_model.out_hidden_size diff --git a/tests/multimodal/test_utils.py b/tests/multimodal/test_utils.py index e1e8282dd66d..f36d94ca0155 100644 --- a/tests/multimodal/test_utils.py +++ b/tests/multimodal/test_utils.py @@ -2,7 +2,6 @@ # SPDX-FileCopyrightText: Copyright contributors to the vLLM project import base64 -import math import mimetypes import os from tempfile import NamedTemporaryFile, TemporaryDirectory @@ -10,22 +9,11 @@ import numpy as np import pytest -import torch -import torch.multiprocessing as mp from PIL import Image, ImageChops -from tests.utils import multi_gpu_test -from vllm.distributed import get_tensor_model_parallel_world_size -from vllm.distributed.parallel_state import (init_distributed_environment, - initialize_model_parallel) from vllm.multimodal.image import convert_image_mode from vllm.multimodal.inputs import PlaceholderRange -from vllm.multimodal.utils import (MediaConnector, argsort_mm_positions, - get_load_balance_assignment, - run_dp_sharded_mrope_vision_model, - run_dp_sharded_vision_model) -from vllm.platforms import current_platform -from vllm.utils import get_open_port, update_environment_variables +from vllm.multimodal.utils import MediaConnector, argsort_mm_positions if TYPE_CHECKING: from vllm.multimodal.inputs import MultiModalPlaceholderDict @@ -404,415 +392,3 @@ def test_argsort_mm_positions(): modality_idxs = argsort_mm_positions(mm_positions) assert modality_idxs == expected_modality_idxs - - -class SimpleLinearModel(torch.nn.Module): - """A simple linear vision model for testing.""" - - def __init__(self, input_dim: int = 3 * 224 * 224, output_dim: int = 32): - super().__init__() - self.flatten = torch.nn.Flatten() - self.linear = torch.nn.Linear(input_dim, output_dim) - - def forward(self, x: torch.Tensor): - # Flatten the input and apply linear transformation - x = self.flatten(x) - return self.linear(x) - - -@multi_gpu_test(num_gpus=2) -@pytest.mark.parametrize( - "batch_size", - [ - 1, # Single image - 4, # Small batch - 5, # Odd batch size (for testing padding) - ], -) -def test_run_dp_sharded_vision_model(batch_size: int): - world_size = 2 - # Launch processes - mp.spawn( - run_dp_sharded_vision_model_vs_direct, - args=( - world_size, - batch_size, - get_open_port(), - ), - nprocs=world_size, - ) - - -def run_dp_sharded_vision_model_vs_direct(local_rank: int, world_size: int, - batch_size: int, master_port: int): - """ - Test that run_dp_sharded_vision_model produces the same results as - calling the model directly. - """ - - # Set random seed for reproducibility - current_platform.seed_everything(0) - - device = f"{current_platform.device_name}:{local_rank}" - current_platform.set_device(device) - torch.set_default_device(device) - - update_environment_variables({ - 'RANK': str(local_rank), - 'LOCAL_RANK': str(local_rank), - 'WORLD_SIZE': str(world_size), - 'MASTER_ADDR': 'localhost', - 'MASTER_PORT': str(master_port), - }) - - # initialize distributed - init_distributed_environment() - initialize_model_parallel(tensor_model_parallel_size=world_size) - - # Create a test input tensor - image_input = torch.randn(batch_size, 3, 224, 224) - - # Create a simple linear model - vision_model = SimpleLinearModel() - - # Run the model directly on the full input - with torch.inference_mode(): - direct_output = vision_model(image_input) - - # Run the model through the sharded function - with torch.inference_mode(): - sharded_output = run_dp_sharded_vision_model(image_input, vision_model) - - # Check that the world size is set up correctly - assert get_tensor_model_parallel_world_size() == world_size - - # Check that the outputs have the same shape - assert direct_output.shape == sharded_output.shape - - # Check that the outputs are close (they should be identical) - assert torch.allclose(direct_output, sharded_output, rtol=1e-5, atol=1e-5) - - -@pytest.mark.parametrize( - "sizes,num_gpus,expected_shuffle_indices,expected_gpu_sample_counts," - "expected_grouped_sizes_per_gpu,test_description", - [ - # Empty input - ([], 2, [], [0, 0], [0, 0], "empty input"), - - # Fewer samples than GPUs - ([100, 200], 4, [1, 0], [1, 1, 0, 0], [200, 100, 0, 0 - ], "fewer samples than GPUs"), - - # Single GPU - ([100, 200, 300], 1, [2, 1, 0], [3], [600], "single GPU"), - - # Balanced assignment - ([100, 100, 100, 100 - ], 2, [0, 2, 1, 3], [2, 2], [200, 200], "balanced assignment"), - - # Unbalanced sizes - this one is trickier since the algorithm is greedy - ([1000, 100, 200, 50], 2, [0, 2, 1, 3 - ], [1, 3], [1000, 350], "unbalanced sizes"), - ], -) -def test_get_load_balance_assignment_cases(sizes, num_gpus, - expected_shuffle_indices, - expected_gpu_sample_counts, - expected_grouped_sizes_per_gpu, - test_description): - """Test get_load_balance_assignment with various input cases.""" - result = get_load_balance_assignment(sizes, num_gpus=num_gpus) - (shuffle_indices, gpu_sample_counts, grouped_sizes_per_gpu) = result - - # Common assertions for all cases - assert len(shuffle_indices) == len(sizes) - assert len(gpu_sample_counts) == num_gpus - assert len(grouped_sizes_per_gpu) == num_gpus - assert sum(gpu_sample_counts) == len(sizes) - - assert shuffle_indices == expected_shuffle_indices - - assert gpu_sample_counts == expected_gpu_sample_counts - assert grouped_sizes_per_gpu == expected_grouped_sizes_per_gpu - - -class SimpleMRopeVisionModel(torch.nn.Module): - """A simple vision model for testing mrope functionality.""" - - def __init__(self, spatial_merge_size: int = 2, out_hidden_size: int = 64): - super().__init__() - self.spatial_merge_size = spatial_merge_size - self.out_hidden_size = out_hidden_size - self.linear = torch.nn.Linear(768, out_hidden_size) - - def forward(self, pixel_values: torch.Tensor, - grid_thw_list: list[list[int]]): - """Simple forward pass that simulates spatial merging.""" - # Apply linear transformation - embeddings = self.linear(pixel_values) - - # Simulate spatial merging by reducing the number of patches - merge_factor = self.spatial_merge_size * self.spatial_merge_size - - # Group patches and merge spatially - merged_embeddings = [] - start_idx = 0 - - for grid_thw in grid_thw_list: - num_patches = math.prod(grid_thw) - end_idx = start_idx + num_patches - - # Get patches for this image - image_patches = embeddings[start_idx:end_idx] - - # Simulate spatial merging by averaging groups of patches - merged_patches = num_patches // merge_factor - if merged_patches > 0: - # Reshape and average to simulate merging - reshaped = image_patches[:merged_patches * merge_factor].view( - merged_patches, merge_factor, -1) - merged = reshaped.mean(dim=1) - merged_embeddings.append(merged) - - start_idx = end_idx - - if merged_embeddings: - return torch.cat(merged_embeddings, dim=0) - else: - return torch.empty((0, self.out_hidden_size), - device=pixel_values.device, - dtype=pixel_values.dtype) - - -@multi_gpu_test(num_gpus=2) -@pytest.mark.parametrize( - "batch_size", - [ - 1, # Single image - 3, # Small batch - 5, # Odd batch size (for testing padding) - ], -) -def test_run_dp_sharded_mrope_vision_model(batch_size: int): - world_size = 2 - # Launch processes - mp.spawn( - run_dp_sharded_mrope_vision_model_vs_direct, - args=( - world_size, - batch_size, - get_open_port(), - ), - nprocs=world_size, - ) - - -def run_dp_sharded_mrope_vision_model_vs_direct(local_rank: int, - world_size: int, - batch_size: int, - master_port: int): - """ - Test that run_dp_sharded_mrope_vision_model produces the same results as - calling the model directly. - """ - # Set random seed for reproducibility - current_platform.seed_everything(0) - device = f"{current_platform.device_name}:{local_rank}" - current_platform.set_device(device) - torch.set_default_device(device) - - update_environment_variables({ - 'RANK': str(local_rank), - 'LOCAL_RANK': str(local_rank), - 'WORLD_SIZE': str(world_size), - 'MASTER_ADDR': 'localhost', - 'MASTER_PORT': str(master_port), - }) - - # initialize distributed - init_distributed_environment() - initialize_model_parallel(tensor_model_parallel_size=world_size) - - # Create test data - grid_thw_list = [] - pixel_values_list = [] - - for i in range(batch_size): - # Varying image sizes for better testing - t, h, w = 1, 4 + i, 4 + i - grid_thw_list.append([t, h, w]) - - num_patches = t * h * w - # Create random pixel values for this image - image_pixels = torch.randn(num_patches, 768) - pixel_values_list.append(image_pixels) - - # Concatenate all pixel values - pixel_values = torch.cat(pixel_values_list, dim=0) - - # Create a simple mrope vision model - vision_model = SimpleMRopeVisionModel() - - # Run the model directly on the full input (only on rank 0) - if local_rank == 0: - with torch.inference_mode(): - direct_output = vision_model(pixel_values, grid_thw_list) - - # Run the model through the sharded function - with torch.inference_mode(): - sharded_output = run_dp_sharded_mrope_vision_model(vision_model, - pixel_values, - grid_thw_list, - rope_type="rope_3d") - sharded_output = torch.cat(sharded_output, dim=0) - - # Check that the world size is set up correctly - assert get_tensor_model_parallel_world_size() == world_size - - # Compare outputs (only on rank 0) - if local_rank == 0: - # Check that the outputs have the same shape - assert direct_output.shape == sharded_output.shape - # Check that the outputs are close (they should be identical) - assert torch.allclose(direct_output, - sharded_output, - rtol=1e-5, - atol=1e-5) - - -@multi_gpu_test(num_gpus=2) -def test_run_dp_sharded_mrope_vision_model_empty_input(): - world_size = 2 - mp.spawn( - run_dp_sharded_mrope_vision_model_empty_input_worker, - args=(world_size, get_open_port()), - nprocs=world_size, - ) - - -def run_dp_sharded_mrope_vision_model_empty_input_worker( - local_rank: int, world_size: int, master_port: int): - """Test run_dp_sharded_mrope_vision_model with empty input.""" - # Set up distributed environment - device = f"{current_platform.device_name}:{local_rank}" - current_platform.set_device(device) - torch.set_default_device(device) - - update_environment_variables({ - 'RANK': str(local_rank), - 'LOCAL_RANK': str(local_rank), - 'WORLD_SIZE': str(world_size), - 'MASTER_ADDR': 'localhost', - 'MASTER_PORT': str(master_port), - }) - - init_distributed_environment() - initialize_model_parallel(tensor_model_parallel_size=world_size) - - # Create empty inputs - pixel_values = torch.empty((0, 768)) - grid_thw_list: list[list[int]] = [] - - vision_model = SimpleMRopeVisionModel() - - # Should handle empty input gracefully - with torch.inference_mode(): - output = run_dp_sharded_mrope_vision_model(vision_model, - pixel_values, - grid_thw_list, - rope_type="rope_3d") - - assert len(output) == 0 - - -@multi_gpu_test(num_gpus=4) -def test_run_dp_sharded_mrope_vision_model_uneven_load(): - world_size = 4 - mp.spawn( - run_dp_sharded_mrope_vision_model_uneven_load_worker, - args=(world_size, get_open_port()), - nprocs=world_size, - ) - - -def run_dp_sharded_mrope_vision_model_uneven_load_worker( - local_rank: int, world_size: int, master_port: int): - """Test run_dp_sharded_mrope_vision_model with uneven load distribution.""" - # Set up distributed environment - current_platform.seed_everything(123) - device = f"{current_platform.device_name}:{local_rank}" - current_platform.set_device(device) - torch.set_default_device(device) - - update_environment_variables({ - 'RANK': str(local_rank), - 'LOCAL_RANK': str(local_rank), - 'WORLD_SIZE': str(world_size), - 'MASTER_ADDR': 'localhost', - 'MASTER_PORT': str(master_port), - }) - - init_distributed_environment() - initialize_model_parallel(tensor_model_parallel_size=world_size) - - # Create images with very different sizes - grid_thw_list = [ - [1, 2, 2], # Small: 4 patches - [1, 8, 8], # Large: 64 patches - [1, 3, 3], # Medium: 9 patches - ] - - pixel_values_list = [] - for grid_thw in grid_thw_list: - num_patches = math.prod(grid_thw) - image_pixels = torch.randn(num_patches, 768) - pixel_values_list.append(image_pixels) - - pixel_values = torch.cat(pixel_values_list, dim=0) - vision_model = SimpleMRopeVisionModel() - - # Should handle uneven distribution without errors - with torch.inference_mode(): - output_tuple = run_dp_sharded_mrope_vision_model(vision_model, - pixel_values, - grid_thw_list, - rope_type="rope_3d") - - # Verify output shape is reasonable - merge_factor = vision_model.spatial_merge_size**2 - expected_output_patches = list( - math.prod(grid_thw) // merge_factor for grid_thw in grid_thw_list) - - for i, output in enumerate(output_tuple): - assert output.shape[0] == expected_output_patches[i] - assert output.shape[1] == vision_model.out_hidden_size - - -@pytest.mark.parametrize("spatial_merge_size", [2, 4]) -def test_simple_mrope_vision_model_spatial_merge(spatial_merge_size: int): - """Test SimpleMRopeVisionModel with different spatial merge sizes.""" - device = current_platform.device_type - - grid_thw_list = [[1, 4, 4], [1, 6, 6]] # Two images - pixel_values_list = [] - - for grid_thw in grid_thw_list: - num_patches = math.prod(grid_thw) - image_pixels = torch.randn(num_patches, 768, device=device) - pixel_values_list.append(image_pixels) - - pixel_values = torch.cat(pixel_values_list, dim=0) - vision_model = SimpleMRopeVisionModel( - spatial_merge_size=spatial_merge_size).to(device) - - with torch.inference_mode(): - output = vision_model(pixel_values, grid_thw_list) - - # Verify output dimensions based on spatial merging - total_patches = sum(math.prod(grid_thw) for grid_thw in grid_thw_list) - merge_factor = spatial_merge_size**2 - expected_output_patches = total_patches // merge_factor - - assert output.shape[0] == expected_output_patches - assert output.shape[1] == vision_model.out_hidden_size diff --git a/vllm/model_executor/models/glm4_1v.py b/vllm/model_executor/models/glm4_1v.py index 56ec63438690..b088e0c0dd24 100644 --- a/vllm/model_executor/models/glm4_1v.py +++ b/vllm/model_executor/models/glm4_1v.py @@ -69,7 +69,6 @@ BaseProcessingInfo, PromptReplacement, PromptUpdate, PromptUpdateDetails) from vllm.multimodal.profiling import BaseDummyInputsBuilder -from vllm.multimodal.utils import run_dp_sharded_mrope_vision_model from vllm.platforms import _Backend from vllm.sequence import IntermediateTensors from vllm.transformers_utils.config import uses_mrope @@ -83,7 +82,7 @@ from .utils import (AutoWeightsLoader, WeightsMapper, init_vllm_registered_model, maybe_prefix, merge_multimodal_embeddings) -from .vision import get_vit_attn_backend +from .vision import get_vit_attn_backend, run_dp_sharded_mrope_vision_model logger = init_logger(__name__) diff --git a/vllm/model_executor/models/idefics2_vision_model.py b/vllm/model_executor/models/idefics2_vision_model.py index 76737a442823..2f0c4240413b 100644 --- a/vllm/model_executor/models/idefics2_vision_model.py +++ b/vllm/model_executor/models/idefics2_vision_model.py @@ -34,7 +34,8 @@ RowParallelLinear) from vllm.model_executor.layers.quantization import QuantizationConfig from vllm.model_executor.model_loader.weight_utils import default_weight_loader -from vllm.multimodal.utils import run_dp_sharded_vision_model + +from .vision import run_dp_sharded_vision_model class Idefics2VisionEmbeddings(nn.Module): diff --git a/vllm/model_executor/models/intern_vit.py b/vllm/model_executor/models/intern_vit.py index 892188c04722..2c341d283971 100644 --- a/vllm/model_executor/models/intern_vit.py +++ b/vllm/model_executor/models/intern_vit.py @@ -28,7 +28,8 @@ RowParallelLinear) from vllm.model_executor.layers.quantization import QuantizationConfig from vllm.model_executor.model_loader.weight_utils import default_weight_loader -from vllm.multimodal.utils import run_dp_sharded_vision_model + +from .vision import run_dp_sharded_vision_model NORM2FN = { 'rms_norm': RMSNorm, diff --git a/vllm/model_executor/models/kimi_vl.py b/vllm/model_executor/models/kimi_vl.py index f554077935bf..503627865c4a 100644 --- a/vllm/model_executor/models/kimi_vl.py +++ b/vllm/model_executor/models/kimi_vl.py @@ -76,13 +76,13 @@ BaseProcessingInfo, PromptReplacement, PromptUpdate) from vllm.multimodal.profiling import BaseDummyInputsBuilder -from vllm.multimodal.utils import run_dp_sharded_mrope_vision_model from vllm.sequence import IntermediateTensors from vllm.transformers_utils.configs import KimiVLConfig, MoonViTConfig from vllm.transformers_utils.configs.deepseek_vl2 import DeepseekV2Config from vllm.utils.tensor_schema import TensorSchema, TensorShape from .utils import PPMissingLayer, is_pp_missing_parameter, maybe_prefix +from .vision import run_dp_sharded_mrope_vision_model # For dummy input only diff --git a/vllm/model_executor/models/mllama4.py b/vllm/model_executor/models/mllama4.py index 131a66b71323..50521b593786 100644 --- a/vllm/model_executor/models/mllama4.py +++ b/vllm/model_executor/models/mllama4.py @@ -50,7 +50,6 @@ BaseProcessingInfo, PromptReplacement, PromptUpdate, PromptUpdateDetails) from vllm.multimodal.profiling import BaseDummyInputsBuilder -from vllm.multimodal.utils import run_dp_sharded_vision_model from vllm.sequence import IntermediateTensors from vllm.utils.tensor_schema import TensorSchema, TensorShape @@ -58,6 +57,7 @@ from .llama4 import Llama4ForCausalLM from .utils import (AutoWeightsLoader, flatten_bn, maybe_prefix, merge_multimodal_embeddings) +from .vision import run_dp_sharded_vision_model class Llama4ImagePatchInputs(TensorSchema): diff --git a/vllm/model_executor/models/qwen2_5_vl.py b/vllm/model_executor/models/qwen2_5_vl.py index 73b27572a8eb..b740e6d87b74 100644 --- a/vllm/model_executor/models/qwen2_5_vl.py +++ b/vllm/model_executor/models/qwen2_5_vl.py @@ -59,7 +59,6 @@ from vllm.model_executor.models.module_mapping import MultiModelKeys from vllm.multimodal import MULTIMODAL_REGISTRY from vllm.multimodal.inputs import MultiModalFieldConfig -from vllm.multimodal.utils import run_dp_sharded_mrope_vision_model from vllm.platforms import _Backend from vllm.sequence import IntermediateTensors from vllm.transformers_utils.config import uses_mrope @@ -74,7 +73,7 @@ from .utils import (AutoWeightsLoader, WeightsMapper, cast_overflow_tensors, init_vllm_registered_model, maybe_prefix, merge_multimodal_embeddings) -from .vision import get_vit_attn_backend +from .vision import get_vit_attn_backend, run_dp_sharded_mrope_vision_model logger = init_logger(__name__) diff --git a/vllm/model_executor/models/qwen2_vl.py b/vllm/model_executor/models/qwen2_vl.py index 88813490c0fb..472e8b061a9e 100644 --- a/vllm/model_executor/models/qwen2_vl.py +++ b/vllm/model_executor/models/qwen2_vl.py @@ -66,7 +66,6 @@ BaseProcessingInfo, PromptReplacement, PromptUpdate) from vllm.multimodal.profiling import BaseDummyInputsBuilder -from vllm.multimodal.utils import run_dp_sharded_mrope_vision_model from vllm.platforms import _Backend, current_platform from vllm.sequence import IntermediateTensors from vllm.transformers_utils.config import uses_mrope @@ -78,7 +77,7 @@ from .utils import (AutoWeightsLoader, WeightsMapper, init_vllm_registered_model, maybe_prefix, merge_multimodal_embeddings) -from .vision import get_vit_attn_backend +from .vision import get_vit_attn_backend, run_dp_sharded_mrope_vision_model logger = init_logger(__name__) diff --git a/vllm/model_executor/models/qwen3_vl.py b/vllm/model_executor/models/qwen3_vl.py index 98d65dea2739..ee6703f7229e 100644 --- a/vllm/model_executor/models/qwen3_vl.py +++ b/vllm/model_executor/models/qwen3_vl.py @@ -83,7 +83,7 @@ from .qwen3 import Qwen3ForCausalLM, Qwen3Model from .utils import (AutoWeightsLoader, PPMissingLayer, WeightsMapper, maybe_prefix, merge_multimodal_embeddings) -from .vision import get_vit_attn_backend +from .vision import get_vit_attn_backend, run_dp_sharded_mrope_vision_model logger = init_logger(__name__) @@ -1214,8 +1214,6 @@ def _process_image_input( else: pixel_values = image_input["pixel_values"].type(self.visual.dtype) if self.use_data_parallel: - from vllm.multimodal.utils import ( - run_dp_sharded_mrope_vision_model) return run_dp_sharded_mrope_vision_model(self.visual, pixel_values, grid_thw_list, @@ -1245,8 +1243,6 @@ def _process_video_input( pixel_values_videos = video_input["pixel_values_videos"].type( self.visual.dtype) if self.use_data_parallel: - from vllm.multimodal.utils import ( - run_dp_sharded_mrope_vision_model) return run_dp_sharded_mrope_vision_model(self.visual, pixel_values_videos, grid_thw_list, diff --git a/vllm/model_executor/models/step3_vl.py b/vllm/model_executor/models/step3_vl.py index f667266b77bf..5f6ad5885043 100644 --- a/vllm/model_executor/models/step3_vl.py +++ b/vllm/model_executor/models/step3_vl.py @@ -31,7 +31,6 @@ BaseProcessingInfo, PromptReplacement, PromptUpdate, PromptUpdateDetails) from vllm.multimodal.profiling import BaseDummyInputsBuilder -from vllm.multimodal.utils import run_dp_sharded_vision_model from vllm.sequence import IntermediateTensors from vllm.transformers_utils.configs import Step3VisionEncoderConfig from vllm.transformers_utils.tokenizer import AnyTokenizer @@ -40,6 +39,7 @@ from .utils import (AutoWeightsLoader, WeightsMapper, flatten_bn, init_vllm_registered_model, maybe_prefix, merge_multimodal_embeddings) +from .vision import run_dp_sharded_vision_model class Step3VLImagePixelInputs(TypedDict): diff --git a/vllm/model_executor/models/vision.py b/vllm/model_executor/models/vision.py index 81f86db7e187..08ad8fbeb424 100644 --- a/vllm/model_executor/models/vision.py +++ b/vllm/model_executor/models/vision.py @@ -1,12 +1,17 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project +import itertools +import math from abc import ABC, abstractmethod -from typing import Final, Generic, Optional, Protocol, TypeVar, Union +from typing import Final, Generic, Literal, Optional, Protocol, TypeVar, Union import torch from transformers import PretrainedConfig +from vllm.distributed import (get_tensor_model_parallel_rank, + get_tensor_model_parallel_world_size, + tensor_model_parallel_all_gather) from vllm.logger import init_logger from vllm.platforms import _Backend, current_platform @@ -123,3 +128,277 @@ def resolve_visual_encoder_outputs( if post_layer_norm is not None and uses_last_layer: hs_pool[-1] = post_layer_norm(encoder_outputs) return torch.cat(hs_pool, dim=-1) + + +def run_dp_sharded_vision_model(image_input: torch.Tensor, + vision_model: torch.nn.Module) -> torch.Tensor: + """Run a vision model with data parallelism (DP) sharding. The function + will shard the input image tensor on the first dimension and run the vision + model + + Args: + image_input (torch.Tensor): Image input tensor. + vision_model (torch.nn.Module): Vision model. + Returns: + torch.Tensor: Output image embeddings + """ + + num_chunks = image_input.shape[0] + mp_world_size = get_tensor_model_parallel_world_size() + num_chunks_per_rank = (num_chunks + mp_world_size - 1) // mp_world_size + num_padded_chunks = num_chunks_per_rank * mp_world_size - num_chunks + pad = (0, ) * (2 * (image_input.dim() - 1)) + (0, num_padded_chunks) + image_input_padded = torch.nn.functional.pad(image_input, pad) + rank = get_tensor_model_parallel_rank() + image_input_per_rank = image_input_padded[rank * + num_chunks_per_rank:(rank + 1) * + num_chunks_per_rank, ...] + + vision_embeddings = vision_model(image_input_per_rank) + # Ensure tensor is contiguous before all_gather + vision_embeddings = vision_embeddings.contiguous() + vision_embeddings = tensor_model_parallel_all_gather(vision_embeddings, + dim=0) + vision_embeddings = vision_embeddings[:num_chunks, ...] + return vision_embeddings + + +def get_load_balance_assignment( + sizes: list[int], + num_gpus: int = 2, +) -> tuple[list[int], list[int], list[int]]: + """ + Generate load balancing assignment and metadata + for distributing data across GPUs. + The load is determined by the total image sizes, + not the number of images. + + Args: + sizes: The size of each image + num_gpus: Number of GPUs to balance across + + Returns: + shuffle_indices: + Indices to reorder data for balanced loading + gpu_sample_counts: + Number of samples assigned to each GPU + grouped_sizes_per_gpu: + Total size assigned to each GPU + + Example: + ``` + sizes = [1000, 100, 200, 50] + num_gpus=2 + ``` + + """ + + n_samples = len(sizes) + + # Handle edge cases + if n_samples == 0: + return [], [0] * num_gpus, [0] * num_gpus + + # Use greedy algorithm - balance by total size, not sample count + gpu_assignments = [list[int]() for _ in range(num_gpus)] + gpu_loads = [0] * num_gpus # This tracks total SIZE, not sample count + + # Sort indices by size (largest first for better load balancing) + # sizes = [1000, 100, 200, 50] + # large_to_small_indices = [0, 2, 1, 3] + large_to_small_indices = sorted(range(n_samples), + key=lambda i: sizes[i], + reverse=True) + + for idx in large_to_small_indices: + # Find GPU with minimum current load (by total size) + min_gpu = min(range(num_gpus), key=lambda i: gpu_loads[i]) + gpu_assignments[min_gpu].append(idx) + gpu_loads[min_gpu] += sizes[idx] + + # Create shuffle indices and counts + shuffle_indices = list[int]() + gpu_sample_counts = list[int]() + for gpu_id in range(num_gpus): + # GPU_0 = [1000] = [0] + # GPU_1 = [200, 100, 50] = [2, 1, 3] + # shuffle_indices = [0, 2, 1, 3] + shuffle_indices.extend(gpu_assignments[gpu_id]) + # GPU_0 = [1] + # GPU_1 = [3] + # gpu_sample_counts = [1, 3] + gpu_sample_counts.append(len(gpu_assignments[gpu_id])) + + return (shuffle_indices, gpu_sample_counts, gpu_loads) + + +def run_dp_sharded_mrope_vision_model( + vision_model: torch.nn.Module, + pixel_values: torch.Tensor, + grid_thw_list: list[list[int]], + *, + rope_type: Literal["rope_3d", "rope_2d"], +) -> tuple[torch.Tensor, ...]: + """Run a vision model with data parallelism (DP) sharding. + The function will shard the input image tensor on the + first dimension and run the vision model. + This function is used to run the vision model with mrope. + + Args: + vision_model (torch.nn.Module): Vision model. + pixel_values (torch.Tensor): Image/Video input tensor. + grid_thw_list: List of grid dimensions for each image + rope_type: Type of rope used in the vision model. + Different rope types have different dimension to do ViT. + "rope_3d" for 3D rope (e.g., Qwen2.5-VL) + "rope_2d" for 2D rope (e.g., Kimi-VL) + Returns: + torch.Tensor: Output image embeddings + + Example: + ``` + vision_model.out_hidden_size = 64 + vision_model.spatial_merge_size = 2 + pixel_values.shape = (1350, channel) + grid_thw_list = [[1, 10, 100], [1, 10, 10], [1, 10, 20], [1, 50]] + tp_size=2 + ``` + + """ + tp_size = get_tensor_model_parallel_world_size() + + # GPU_0 tp_rank_local = 0 + # GPU_1 tp_rank_local = 1 + tp_rank_local = get_tensor_model_parallel_rank() + + # patches_per_image = [1000, 100, 200, 50] + patches_per_image = [math.prod(grid_thw) for grid_thw in grid_thw_list] + # patches_per_image = [0, 1000, 1100, 1300, 1350] + cum_patches_per_image = [0, *itertools.accumulate(patches_per_image)] + + # Get load balancing assignment with all metadata + # image_to_tp_rank = [0, 2, 1, 3] + # gpu_sample_counts = [1, 3] + # grouped_pixel_values_len = [1000, 350] + (image_to_tp_rank, gpu_sample_counts, + grouped_pixel_values_len) = get_load_balance_assignment( + patches_per_image, tp_size) + + # cu_gpu_sample_counts = [0, 1, 4] + cum_gpu_sample_counts = [0, *itertools.accumulate(gpu_sample_counts)] + + # GPU_0 image_idxs_local = [0] + # GPU_1 image_idxs_local = [2, 1, 3] + image_idxs_local = image_to_tp_rank[cum_gpu_sample_counts[tp_rank_local]: + cum_gpu_sample_counts[tp_rank_local + + 1]] + + # Get the pixel values for the local images based on the image_idxs_local + if len(image_idxs_local) > 0: + pixel_values_local = torch.cat([ + pixel_values[cum_patches_per_image[i]:cum_patches_per_image[i + 1]] + for i in image_idxs_local + ]) + else: + # Handle case where this rank has no images + pixel_values_local = torch.empty((0, pixel_values.shape[1]), + device=pixel_values.device, + dtype=pixel_values.dtype) + # embed_dim_reduction_factor = 2 * 2 + if rope_type == "rope_2d": + embed_dim_reduction_factor = (vision_model.merge_kernel_size[0] * + vision_model.merge_kernel_size[1]) + else: + embed_dim_reduction_factor = (vision_model.spatial_merge_size * + vision_model.spatial_merge_size) + + # Find the max length across all ranks + # The output embedding of every DP rank has to be + # padded to this length for tensor_model_parallel_all_gather + # to work + max_len_per_rank = max( + grouped_pixel_values_len) // embed_dim_reduction_factor + local_grid_thw_list = [grid_thw_list[i] for i in image_idxs_local] + + # Run the vision model on the local pixel_values_local + if rope_type == "rope_2d": + if pixel_values_local.shape[0] > 0: + image_embeds_local = vision_model( + pixel_values_local, torch.tensor(local_grid_thw_list)) + if isinstance(image_embeds_local, list): + image_embeds_local = torch.cat(image_embeds_local, dim=0) + else: + out_dim = getattr(vision_model.config, "hidden_size", None) + image_embeds_local = torch.empty( + (0, embed_dim_reduction_factor, out_dim), + device=pixel_values.device, + dtype=pixel_values.dtype) + else: + if pixel_values_local.shape[0] > 0: + image_embeds_local = vision_model(pixel_values_local, + local_grid_thw_list) + else: + # Handle empty case + image_embeds_local = torch.empty((0, vision_model.out_hidden_size), + device=pixel_values.device, + dtype=pixel_values.dtype) + + # Pad the output based on max_len_per_rank + # for tensor_model_parallel_all_gather to work + current_len = image_embeds_local.shape[0] + if current_len < max_len_per_rank: + padding_size = max_len_per_rank - current_len + if rope_type == "rope_2d": + padding = torch.empty((padding_size, image_embeds_local.shape[1], + image_embeds_local.shape[2]), + dtype=image_embeds_local.dtype, + device=image_embeds_local.device) + else: + padding = torch.empty((padding_size, image_embeds_local.shape[1]), + dtype=image_embeds_local.dtype, + device=image_embeds_local.device) + image_embeds_local_padded = torch.cat([image_embeds_local, padding], + dim=0) + else: + image_embeds_local_padded = image_embeds_local + + # Do all_gather to collect embeddings from all ranks + gathered_embeds = tensor_model_parallel_all_gather( + image_embeds_local_padded, dim=0) + + # Remove padding and reconstruct per-rank embeddings + rank_embeddings = list[torch.Tensor]() + for rank in range(tp_size): + start_idx = rank * max_len_per_rank + end_idx = start_idx + (grouped_pixel_values_len[rank] // + embed_dim_reduction_factor) + rank_embeddings.append(gathered_embeds[start_idx:end_idx]) + + patches_per_output_image = [(patch_size // embed_dim_reduction_factor) + for patch_size in patches_per_image] + + # Reconstruct embeddings in the original order + original_order_embeddings = [None] * len(grid_thw_list) + current_idx = 0 + for rank in range(tp_size): + count = gpu_sample_counts[rank] + if count > 0: + # Get images assigned to this rank in shuffled order + # GPU_0 = image_idxs_local [0] + # GPU_1 = image_idxs_local [2, 1, 3] + rank_images = image_to_tp_rank[current_idx:current_idx + count] + + rank_embed = rank_embeddings[rank] + # Split rank embeddings back to individual images + embed_start = 0 + for img_idx in rank_images: + img_patches = patches_per_output_image[img_idx] + original_order_embeddings[img_idx] = rank_embed[ + embed_start:embed_start + img_patches] + embed_start += img_patches + current_idx += count + out_embeddings = tuple(embed for embed in original_order_embeddings + if embed is not None) + assert len(out_embeddings) == len( + original_order_embeddings), "Found unassigned embeddings" + return out_embeddings diff --git a/vllm/multimodal/utils.py b/vllm/multimodal/utils.py index f4e2ed72e2d7..0f8aeceb3944 100644 --- a/vllm/multimodal/utils.py +++ b/vllm/multimodal/utils.py @@ -3,13 +3,11 @@ import asyncio import atexit -import itertools -import math from collections.abc import Iterable from concurrent.futures import ThreadPoolExecutor from itertools import groupby from pathlib import Path -from typing import TYPE_CHECKING, Any, Literal, Optional, TypeVar, Union +from typing import TYPE_CHECKING, Any, Optional, TypeVar, Union from urllib.parse import ParseResult, urlparse from urllib.request import url2pathname @@ -21,9 +19,6 @@ import vllm.envs as envs from vllm.connections import HTTPConnection, global_http_connection -from vllm.distributed import (get_tensor_model_parallel_rank, - get_tensor_model_parallel_world_size, - tensor_model_parallel_all_gather) from .audio import AudioMediaIO from .base import MediaIO @@ -33,12 +28,10 @@ _M = TypeVar("_M") if TYPE_CHECKING: - from .inputs import (BatchedTensorInputs, MultiModalKwargs, - MultiModalKwargsItem, MultiModalKwargsItems, - MultiModalPlaceholderDict) + from .inputs import (BatchedTensorInputs, MultiModalKwargsItem, + MultiModalKwargsItems, MultiModalPlaceholderDict) else: BatchedTensorInputs = Any - MultiModalKwargs = Any MultiModalKwargsItem = Any MultiModalKwargsItems = Any MultiModalPlaceholderDict = Any @@ -93,7 +86,7 @@ def _load_data_url( self, url_spec: ParseResult, media_io: MediaIO[_M], - ) -> _M: + ) -> _M: # type: ignore[type-var] data_spec, data = url_spec.path.split(",", 1) media_type, data_type = data_spec.split(";", 1) @@ -107,7 +100,7 @@ def _load_file_url( self, url_spec: ParseResult, media_io: MediaIO[_M], - ) -> _M: + ) -> _M: # type: ignore[type-var] allowed_local_media_path = self.allowed_local_media_path if allowed_local_media_path is None: raise RuntimeError("Cannot load local files without " @@ -127,7 +120,7 @@ def load_from_url( media_io: MediaIO[_M], *, fetch_timeout: Optional[int] = None, - ) -> _M: + ) -> _M: # type: ignore[type-var] url_spec = urlparse(url) if url_spec.scheme.startswith("http"): @@ -434,280 +427,6 @@ def group_mm_kwargs_by_modality( yield modality, len(items_lst), mm_kwargs_group -def run_dp_sharded_vision_model(image_input: torch.Tensor, - vision_model: torch.nn.Module) -> torch.Tensor: - """Run a vision model with data parallelism (DP) sharding. The function - will shard the input image tensor on the first dimension and run the vision - model - - Args: - image_input (torch.Tensor): Image input tensor. - vision_model (torch.nn.Module): Vision model. - Returns: - torch.Tensor: Output image embeddings - """ - - num_chunks = image_input.shape[0] - mp_world_size = get_tensor_model_parallel_world_size() - num_chunks_per_rank = (num_chunks + mp_world_size - 1) // mp_world_size - num_padded_chunks = num_chunks_per_rank * mp_world_size - num_chunks - pad = (0, ) * (2 * (image_input.dim() - 1)) + (0, num_padded_chunks) - image_input_padded = torch.nn.functional.pad(image_input, pad) - rank = get_tensor_model_parallel_rank() - image_input_per_rank = image_input_padded[rank * - num_chunks_per_rank:(rank + 1) * - num_chunks_per_rank, ...] - - vision_embeddings = vision_model(image_input_per_rank) - # Ensure tensor is contiguous before all_gather - vision_embeddings = vision_embeddings.contiguous() - vision_embeddings = tensor_model_parallel_all_gather(vision_embeddings, - dim=0) - vision_embeddings = vision_embeddings[:num_chunks, ...] - return vision_embeddings - - -def get_load_balance_assignment( - sizes: list[int], - num_gpus: int = 2, -) -> tuple[list[int], list[int], list[int]]: - """ - Generate load balancing assignment and metadata - for distributing data across GPUs. - The load is determined by the total image sizes, - not the number of images. - - Args: - sizes: The size of each image - num_gpus: Number of GPUs to balance across - - Returns: - shuffle_indices: - Indices to reorder data for balanced loading - gpu_sample_counts: - Number of samples assigned to each GPU - grouped_sizes_per_gpu: - Total size assigned to each GPU - - Example: - ``` - sizes = [1000, 100, 200, 50] - num_gpus=2 - ``` - - """ - - n_samples = len(sizes) - - # Handle edge cases - if n_samples == 0: - return [], [0] * num_gpus, [0] * num_gpus - - # Use greedy algorithm - balance by total size, not sample count - gpu_assignments = [list[int]() for _ in range(num_gpus)] - gpu_loads = [0] * num_gpus # This tracks total SIZE, not sample count - - # Sort indices by size (largest first for better load balancing) - # sizes = [1000, 100, 200, 50] - # large_to_small_indices = [0, 2, 1, 3] - large_to_small_indices = sorted(range(n_samples), - key=lambda i: sizes[i], - reverse=True) - - for idx in large_to_small_indices: - # Find GPU with minimum current load (by total size) - min_gpu = min(range(num_gpus), key=lambda i: gpu_loads[i]) - gpu_assignments[min_gpu].append(idx) - gpu_loads[min_gpu] += sizes[idx] - - # Create shuffle indices and counts - shuffle_indices = list[int]() - gpu_sample_counts = list[int]() - for gpu_id in range(num_gpus): - # GPU_0 = [1000] = [0] - # GPU_1 = [200, 100, 50] = [2, 1, 3] - # shuffle_indices = [0, 2, 1, 3] - shuffle_indices.extend(gpu_assignments[gpu_id]) - # GPU_0 = [1] - # GPU_1 = [3] - # gpu_sample_counts = [1, 3] - gpu_sample_counts.append(len(gpu_assignments[gpu_id])) - - return (shuffle_indices, gpu_sample_counts, gpu_loads) - - -def run_dp_sharded_mrope_vision_model( - vision_model: torch.nn.Module, - pixel_values: torch.Tensor, - grid_thw_list: list[list[int]], - *, - rope_type: Literal["rope_3d", "rope_2d"], -) -> tuple[torch.Tensor, ...]: - """Run a vision model with data parallelism (DP) sharding. - The function will shard the input image tensor on the - first dimension and run the vision model. - This function is used to run the vision model with mrope. - - Args: - vision_model (torch.nn.Module): Vision model. - pixel_values (torch.Tensor): Image/Video input tensor. - grid_thw_list: List of grid dimensions for each image - rope_type: Type of rope used in the vision model. - Different rope types have different dimension to do ViT. - "rope_3d" for 3D rope (e.g., Qwen2.5-VL) - "rope_2d" for 2D rope (e.g., Kimi-VL) - Returns: - torch.Tensor: Output image embeddings - - Example: - ``` - vision_model.out_hidden_size = 64 - vision_model.spatial_merge_size = 2 - pixel_values.shape = (1350, channel) - grid_thw_list = [[1, 10, 100], [1, 10, 10], [1, 10, 20], [1, 50]] - tp_size=2 - ``` - - """ - tp_size = get_tensor_model_parallel_world_size() - - # GPU_0 tp_rank_local = 0 - # GPU_1 tp_rank_local = 1 - tp_rank_local = get_tensor_model_parallel_rank() - - # patches_per_image = [1000, 100, 200, 50] - patches_per_image = [math.prod(grid_thw) for grid_thw in grid_thw_list] - # patches_per_image = [0, 1000, 1100, 1300, 1350] - cum_patches_per_image = [0, *itertools.accumulate(patches_per_image)] - - # Get load balancing assignment with all metadata - # image_to_tp_rank = [0, 2, 1, 3] - # gpu_sample_counts = [1, 3] - # grouped_pixel_values_len = [1000, 350] - (image_to_tp_rank, gpu_sample_counts, - grouped_pixel_values_len) = get_load_balance_assignment( - patches_per_image, tp_size) - - # cu_gpu_sample_counts = [0, 1, 4] - cum_gpu_sample_counts = [0, *itertools.accumulate(gpu_sample_counts)] - - # GPU_0 image_idxs_local = [0] - # GPU_1 image_idxs_local = [2, 1, 3] - image_idxs_local = image_to_tp_rank[cum_gpu_sample_counts[tp_rank_local]: - cum_gpu_sample_counts[tp_rank_local + - 1]] - - # Get the pixel values for the local images based on the image_idxs_local - if len(image_idxs_local) > 0: - pixel_values_local = torch.cat([ - pixel_values[cum_patches_per_image[i]:cum_patches_per_image[i + 1]] - for i in image_idxs_local - ]) - else: - # Handle case where this rank has no images - pixel_values_local = torch.empty((0, pixel_values.shape[1]), - device=pixel_values.device, - dtype=pixel_values.dtype) - # embed_dim_reduction_factor = 2 * 2 - if rope_type == "rope_2d": - embed_dim_reduction_factor = (vision_model.merge_kernel_size[0] * - vision_model.merge_kernel_size[1]) - else: - embed_dim_reduction_factor = (vision_model.spatial_merge_size * - vision_model.spatial_merge_size) - - # Find the max length across all ranks - # The output embedding of every DP rank has to be - # padded to this length for tensor_model_parallel_all_gather - # to work - max_len_per_rank = max( - grouped_pixel_values_len) // embed_dim_reduction_factor - local_grid_thw_list = [grid_thw_list[i] for i in image_idxs_local] - - # Run the vision model on the local pixel_values_local - if rope_type == "rope_2d": - if pixel_values_local.shape[0] > 0: - image_embeds_local = vision_model( - pixel_values_local, torch.tensor(local_grid_thw_list)) - if isinstance(image_embeds_local, list): - image_embeds_local = torch.cat(image_embeds_local, dim=0) - else: - out_dim = getattr(vision_model.config, "hidden_size", None) - image_embeds_local = torch.empty( - (0, embed_dim_reduction_factor, out_dim), - device=pixel_values.device, - dtype=pixel_values.dtype) - else: - if pixel_values_local.shape[0] > 0: - image_embeds_local = vision_model(pixel_values_local, - local_grid_thw_list) - else: - # Handle empty case - image_embeds_local = torch.empty((0, vision_model.out_hidden_size), - device=pixel_values.device, - dtype=pixel_values.dtype) - - # Pad the output based on max_len_per_rank - # for tensor_model_parallel_all_gather to work - current_len = image_embeds_local.shape[0] - if current_len < max_len_per_rank: - padding_size = max_len_per_rank - current_len - if rope_type == "rope_2d": - padding = torch.empty((padding_size, image_embeds_local.shape[1], - image_embeds_local.shape[2]), - dtype=image_embeds_local.dtype, - device=image_embeds_local.device) - else: - padding = torch.empty((padding_size, image_embeds_local.shape[1]), - dtype=image_embeds_local.dtype, - device=image_embeds_local.device) - image_embeds_local_padded = torch.cat([image_embeds_local, padding], - dim=0) - else: - image_embeds_local_padded = image_embeds_local - - # Do all_gather to collect embeddings from all ranks - gathered_embeds = tensor_model_parallel_all_gather( - image_embeds_local_padded, dim=0) - - # Remove padding and reconstruct per-rank embeddings - rank_embeddings = list[torch.Tensor]() - for rank in range(tp_size): - start_idx = rank * max_len_per_rank - end_idx = start_idx + (grouped_pixel_values_len[rank] // - embed_dim_reduction_factor) - rank_embeddings.append(gathered_embeds[start_idx:end_idx]) - - patches_per_output_image = [(patch_size // embed_dim_reduction_factor) - for patch_size in patches_per_image] - - # Reconstruct embeddings in the original order - original_order_embeddings = [None] * len(grid_thw_list) - current_idx = 0 - for rank in range(tp_size): - count = gpu_sample_counts[rank] - if count > 0: - # Get images assigned to this rank in shuffled order - # GPU_0 = image_idxs_local [0] - # GPU_1 = image_idxs_local [2, 1, 3] - rank_images = image_to_tp_rank[current_idx:current_idx + count] - - rank_embed = rank_embeddings[rank] - # Split rank embeddings back to individual images - embed_start = 0 - for img_idx in rank_images: - img_patches = patches_per_output_image[img_idx] - original_order_embeddings[img_idx] = rank_embed[ - embed_start:embed_start + img_patches] - embed_start += img_patches - current_idx += count - out_embeddings = tuple(embed for embed in original_order_embeddings - if embed is not None) - assert len(out_embeddings) == len( - original_order_embeddings), "Found unassigned embeddings" - return out_embeddings - - def fetch_audio( audio_url: str, audio_io_kwargs: Optional[dict[str, Any]] = None, From 4322c553a64c3a6c95a39216f31b022eda04a5ad Mon Sep 17 00:00:00 2001 From: Andreas Hartel Date: Tue, 23 Sep 2025 11:56:31 +0200 Subject: [PATCH 264/518] [Test]: Hermes tool parser stream output error in Qwen3 case (#25203) Signed-off-by: Andreas Hartel --- .../tool_parsers/test_hermes_tool_parser.py | 209 +++++++++++++++++- 1 file changed, 203 insertions(+), 6 deletions(-) diff --git a/tests/entrypoints/openai/tool_parsers/test_hermes_tool_parser.py b/tests/entrypoints/openai/tool_parsers/test_hermes_tool_parser.py index e0e6b2c07e17..1da06be2eba9 100644 --- a/tests/entrypoints/openai/tool_parsers/test_hermes_tool_parser.py +++ b/tests/entrypoints/openai/tool_parsers/test_hermes_tool_parser.py @@ -5,6 +5,11 @@ import pytest +from vllm.entrypoints.openai.protocol import ChatCompletionRequest +from vllm.entrypoints.openai.tool_parsers.hermes_tool_parser import ( + Hermes2ProToolParser) +from vllm.transformers_utils.tokenizer import AnyTokenizer + from ....utils import RemoteOpenAIServer MODEL_NAME = "meta-llama/Llama-3.2-1B-Instruct" @@ -37,7 +42,7 @@ }, "unit": { "type": "string", - "enum": ["celsius", "fahrenheit"] + "enum": ["celsius", "fahrenheit"], }, }, "required": ["location"], @@ -75,7 +80,7 @@ "user", "content": "Hi! Do you have any detailed information about the product id " - "7355608 and inserted true?" + "7355608 and inserted true?", }] @@ -144,8 +149,8 @@ async def test_streaming_tool_call(): if tool_chunk.function.name: tool_call_chunks[index]["name"] += tool_chunk.function.name if tool_chunk.function.arguments: - tool_call_chunks[index][ - "arguments"] += tool_chunk.function.arguments + tool_call_chunks[index]["arguments"] += ( + tool_chunk.function.arguments) assert len(tool_call_chunks) == 1 reconstructed_tool_call = tool_call_chunks[0] @@ -234,8 +239,8 @@ async def test_streaming_product_tool_call(): if tool_chunk.function.name: tool_call_chunks[index]["name"] += tool_chunk.function.name if tool_chunk.function.arguments: - tool_call_chunks[index][ - "arguments"] += tool_chunk.function.arguments + tool_call_chunks[index]["arguments"] += ( + tool_chunk.function.arguments) assert len(tool_call_chunks) == 1 reconstructed_tool_call = tool_call_chunks[0] @@ -258,3 +263,195 @@ async def test_streaming_product_tool_call(): print("\n[Streaming Product Test Passed]") print(f"Reconstructed Tool Call: {reconstructed_tool_call['name']}") print(f"Reconstructed Arguments: {arguments}") + + +@pytest.fixture +def qwen_tokenizer() -> AnyTokenizer: + from vllm.transformers_utils.tokenizer import get_tokenizer + + return get_tokenizer("Qwen/Qwen3-32B") + + +@pytest.fixture +def hermes_parser(qwen_tokenizer: AnyTokenizer) -> Hermes2ProToolParser: + return Hermes2ProToolParser(qwen_tokenizer) + + +@pytest.fixture +def any_chat_request() -> ChatCompletionRequest: + return ChatCompletionRequest( + seed=42, + model="Qwen/Qwen3-32B", + messages=[], + ) + + +def test_hermes_parser_streaming_just_forward_text( + qwen_tokenizer: AnyTokenizer, + hermes_parser: Hermes2ProToolParser, + any_chat_request: ChatCompletionRequest, +) -> None: + text = ( + """This is some prior text that has nothing to do with tool calling.""" + ) + tokens = qwen_tokenizer.encode(text) + previous_text = "" + delta_messages = [] + for token in tokens: + delta_text = qwen_tokenizer.decode([token]) + current_text = previous_text + delta_text + delta = hermes_parser.extract_tool_calls_streaming( + previous_text=previous_text, + current_text=current_text, + delta_text=delta_text, + previous_token_ids=[], + current_token_ids=[], + delta_token_ids=[], + request=any_chat_request, + ) + previous_text = current_text + delta_messages.append(delta) + + for delta in delta_messages: + assert delta is not None + assert not delta.tool_calls + + print(delta_messages) + assert "".join([delta.content for delta in delta_messages]) == text + + +def test_hermes_parser_streaming_failure_case_bug_19056( + qwen_tokenizer: AnyTokenizer, + hermes_parser: Hermes2ProToolParser, + any_chat_request: ChatCompletionRequest, +) -> None: + text = """ +{"name": "final_answer", "arguments": {"trigger": true}} +""" + tokens = qwen_tokenizer.encode(text) + previous_text = "" + delta_messages = [] + for token in tokens: + text = qwen_tokenizer.decode([token]) + current_text = previous_text + text + delta = hermes_parser.extract_tool_calls_streaming( + previous_text=previous_text, + current_text=current_text, + delta_text=text, + previous_token_ids=[], + current_token_ids=[], + delta_token_ids=[], + request=any_chat_request, + ) + previous_text = current_text + if delta is not None: + delta_messages.append(delta) + + assert delta_messages[0].tool_calls[0].function.name == "final_answer" + tool_call_args = "".join(delta.tool_calls[0].function.arguments or "" + for delta in delta_messages) + assert tool_call_args == '{"trigger": true}' + + +def test_hermes_parser_streaming( + qwen_tokenizer: AnyTokenizer, + hermes_parser: Hermes2ProToolParser, + any_chat_request: ChatCompletionRequest, +) -> None: + text = '\ +{"name": "get_current_temperature",\ +"arguments": {"location":\ +"San Francisco, California, United States", "unit": "celsius"}}\ +' + + tokens = qwen_tokenizer.encode(text) + previous_text = "" + delta_messages = [] + for token in tokens: + text = qwen_tokenizer.decode([token]) + current_text = previous_text + text + delta = hermes_parser.extract_tool_calls_streaming( + previous_text=previous_text, + current_text=current_text, + delta_text=text, + previous_token_ids=[], + current_token_ids=[], + delta_token_ids=[], + request=any_chat_request, + ) + previous_text = current_text + if delta is not None: + delta_messages.append(delta) + print(delta_messages) + assert (delta_messages[0].tool_calls[0].function.name == + "get_current_temperature") + tool_call_args = "".join(delta.tool_calls[0].function.arguments or "" + for delta in delta_messages) + assert tool_call_args == ( + '{"location":"San Francisco, California, United States", ' + '"unit": "celsius"}') + + +def test_hermes_parser_non_streaming_no_tool_call( + hermes_parser: Hermes2ProToolParser, + any_chat_request: ChatCompletionRequest, +) -> None: + text = """This is not a tool call.""" + tool_call = hermes_parser.extract_tool_calls( + model_output=text, + request=any_chat_request, + ) + + assert tool_call is not None + assert not tool_call.tools_called + + +def test_hermes_parser_non_streaming_tool_call_between_tags( + hermes_parser: Hermes2ProToolParser, + any_chat_request: ChatCompletionRequest, +) -> None: + text = """ +{"name": "final_answer", "arguments": {"trigger": true}} +""" + tool_call = hermes_parser.extract_tool_calls( + model_output=text, + request=any_chat_request, + ) + + assert tool_call is not None + assert tool_call.tools_called + assert tool_call.tool_calls[0].function.name == "final_answer" + assert tool_call.tool_calls[0].function.arguments == '{"trigger": true}' + + +def test_hermes_parser_non_streaming_tool_call_until_eos( + hermes_parser: Hermes2ProToolParser, + any_chat_request: ChatCompletionRequest, +) -> None: + text = """ +{"name": "final_answer", "arguments": {"trigger": true}}""" + tool_call = hermes_parser.extract_tool_calls( + model_output=text, + request=any_chat_request, + ) + + assert tool_call is not None + assert tool_call.tools_called + assert tool_call.tool_calls[0].function.name == "final_answer" + assert tool_call.tool_calls[0].function.arguments == '{"trigger": true}' + + +def test_hermes_parser_non_streaming_tool_call_invalid_json( + hermes_parser: Hermes2ProToolParser, + any_chat_request: ChatCompletionRequest, +) -> None: + # Missing closing brace to trigger exception + text = """ +{"name": "final_answer", "arguments": {"trigger": true}""" + tool_call = hermes_parser.extract_tool_calls( + model_output=text, + request=any_chat_request, + ) + + assert tool_call is not None + assert not tool_call.tools_called From 231c2c63e4decd0cbf863690dfffe88e1d97a003 Mon Sep 17 00:00:00 2001 From: Isotr0py Date: Tue, 23 Sep 2025 18:06:48 +0800 Subject: [PATCH 265/518] [Bugfix] Fix idefics3 `tie_word_embeddings` (#25454) Signed-off-by: Isotr0py --- vllm/model_executor/models/idefics3.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/model_executor/models/idefics3.py b/vllm/model_executor/models/idefics3.py index 18446d126b51..79e130119ae8 100644 --- a/vllm/model_executor/models/idefics3.py +++ b/vllm/model_executor/models/idefics3.py @@ -608,7 +608,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): prefix=maybe_prefix(prefix, "lm_head"), ) if self.config.text_config.tie_word_embeddings: - self.lm_head.weight = self.model.text_model.wte.weight + self.lm_head.weight = self.model.text_model.embed_tokens.weight self.logits_processor = LogitsProcessor(config.text_config.vocab_size) def _parse_and_validate_image_input( From 273690a50ac2a5fa79fa7acc5077e49aa1af427e Mon Sep 17 00:00:00 2001 From: Jee Jee Li Date: Tue, 23 Sep 2025 18:19:45 +0800 Subject: [PATCH 266/518] [Core] Optimize LoRA weight loading (#25403) Signed-off-by: Jee Jee Li --- tests/lora/test_layers.py | 26 ++++---- tests/lora/test_lora_manager.py | 12 ++-- tests/lora/utils.py | 8 +-- vllm/lora/layers/base_linear.py | 10 +-- vllm/lora/layers/column_parallel_linear.py | 67 ++++++++++---------- vllm/lora/layers/logits_processor.py | 8 +-- vllm/lora/layers/row_parallel_linear.py | 4 +- vllm/lora/layers/vocal_parallel_embedding.py | 10 +-- vllm/lora/lora_weights.py | 4 +- vllm/lora/models.py | 17 ++--- 10 files changed, 83 insertions(+), 83 deletions(-) diff --git a/tests/lora/test_layers.py b/tests/lora/test_layers.py index 6735b7cd9e43..ced0afc50cb9 100644 --- a/tests/lora/test_layers.py +++ b/tests/lora/test_layers.py @@ -164,8 +164,8 @@ def populate_loras( weight=layer_weights, generate_embeddings_tensor=generate_embeddings_tensor, ) - sublora.lora_b = sublora.lora_b[:, (sublora_len * - i):(sublora_len * (i + 1))] + sublora.lora_b = sublora.lora_b[(sublora_len * + i):(sublora_len * (i + 1)), :] sublora.optimize() subloras.append(sublora) @@ -304,9 +304,9 @@ def create_random_embedding_layer(): result = embedding(input_) after_a = F.embedding( input_, - lora.lora_a, + lora.lora_a.T, ) - result += (after_a @ lora.lora_b) + result += (after_a @ lora.lora_b.T) expected_results.append(result) expected_result = torch.cat(expected_results) @@ -445,9 +445,9 @@ def create_random_embedding_layer(): result = expanded_embedding(input_) after_a = F.embedding( original_input_, - lora.lora_a, + lora.lora_a.T, ) - result += (after_a @ lora.lora_b) + result += (after_a @ lora.lora_b.T) expected_results.append(result) expected_result = torch.cat(expected_results) @@ -575,7 +575,7 @@ def _pretest(): lm_head=linear, embedding_bias=None) result[:, vocab_size + embeddings_tensor_len:] = float("-inf") - result += input_ @ lora.lora_a @ lora.lora_b * lora.scaling + result += input_ @ lora.lora_a.T @ lora.lora_b.T * lora.scaling expected_results.append(result) expected_result = torch.cat(expected_results) logits_processor.org_vocab_size = vocab_size @@ -692,9 +692,10 @@ def create_random_linear_replicated_layer(): expected_results: list[torch.Tensor] = [] for input_, lora_id in zip(inputs, prompt_mapping): + lora = lora_dict[lora_id] result = linear(input_)[0] - result += input_ @ lora.lora_a @ lora.lora_b * lora.scaling + result += input_ @ lora.lora_a.T @ lora.lora_b.T * lora.scaling expected_results.append(result) expected_result = torch.cat(expected_results) @@ -817,7 +818,7 @@ def create_random_linear_parallel_layer(): for input_, lora_id in zip(inputs, prompt_mapping): lora = lora_dict[lora_id] result = linear(input_)[0] - result += input_ @ lora.lora_a @ lora.lora_b * lora.scaling + result += input_ @ lora.lora_a.T @ lora.lora_b.T * lora.scaling expected_results.append(result) expected_result = torch.cat(expected_results) @@ -965,9 +966,10 @@ class FakeConfig: result = linear(input_)[0] subloras = sublora_dict[lora_id] for i, sublora in enumerate(subloras): - result[:, sublora.lora_b.shape[1] * i:sublora.lora_b.shape[1] * - (i + 1)] += (input_ @ sublora.lora_a @ sublora.lora_b * - sublora.scaling) + result[:, sublora.lora_b.shape[0] * i:sublora.lora_b.shape[0] * + (i + 1)] += ( + input_ @ sublora.lora_a.T @ sublora.lora_b.T * + sublora.scaling) expected_results.append(result) expected_result = torch.cat(expected_results) diff --git a/tests/lora/test_lora_manager.py b/tests/lora/test_lora_manager.py index d7684fbf34ab..6f0a85231408 100644 --- a/tests/lora/test_lora_manager.py +++ b/tests/lora/test_lora_manager.py @@ -63,9 +63,9 @@ def test_from_lora_tensors(sql_lora_files, device): assert lora.lora_b is not None assert lora.lora_a.device == torch.device(device) assert lora.lora_b.device == torch.device(device) - assert (lora.lora_a.shape[1] == lora.lora_b.shape[0] + assert (lora.lora_a.shape[0] == lora.lora_b.shape[1] ), f"{lora.lora_a.shape=}, {lora.lora_b.shape=}" - assert lora.lora_a.shape[1] == 8 + assert lora.lora_a.shape[0] == 8 embeddings_module = next( (k for k in EMBEDDING_MODULES if k in module_name), None) if embeddings_module: @@ -86,8 +86,8 @@ def create_lora(lora_id: int, model: nn.Module, sub_modules: list[str], name, 8, 16, - torch.rand([w.shape[1], 8], device=device), - torch.rand([8, w.shape[0]], device=device), + torch.rand([8, w.shape[1]], device=device), + torch.rand([w.shape[0], 8], device=device), ) return LoRAModel(lora_id, 8, loras) @@ -109,8 +109,8 @@ def create_packed_lora( replaced_module_name, 8, 16, - torch.rand([w.shape[1], 8], device=device), - torch.rand([8, w.shape[0] // len(replaced_module_names)], + torch.rand([8, w.shape[1]], device=device), + torch.rand([w.shape[0] // len(replaced_module_names), 8], device=device), ) return LoRAModel(lora_id, 8, loras) diff --git a/tests/lora/utils.py b/tests/lora/utils.py index ab475904d493..0432a1a9bba0 100644 --- a/tests/lora/utils.py +++ b/tests/lora/utils.py @@ -36,10 +36,10 @@ def init_random_lora( module_name, rank=rank, lora_alpha=1, - lora_a=torch.rand([weight.shape[1], rank], + lora_a=torch.rand([rank, weight.shape[1]], dtype=weight.dtype, device=self._device), - lora_b=torch.rand([rank, weight.shape[0]], + lora_b=torch.rand([weight.shape[0], rank], dtype=weight.dtype, device=self._device), ) @@ -67,8 +67,8 @@ def init_lora( module_name, rank=rank, lora_alpha=1, - lora_a=torch.rand([input_dim, rank], device="cuda"), - lora_b=torch.rand([rank, output_dim], device="cuda"), + lora_a=torch.rand([rank, input_dim], device="cuda"), + lora_b=torch.rand([output_dim, input_dim], device="cuda"), embeddings_tensor=embeddings_tensor, ) self.set_module_lora(module_name, lora) diff --git a/vllm/lora/layers/base_linear.py b/vllm/lora/layers/base_linear.py index 85a1f86ce6bf..6cf5815ef12d 100644 --- a/vllm/lora/layers/base_linear.py +++ b/vllm/lora/layers/base_linear.py @@ -121,18 +121,18 @@ def set_lora( lora_bias = self.slice_bias(lora_bias) self.lora_a_stacked[0][index, - 0, :lora_a.shape[1], :lora_a.shape[0]].copy_( - lora_a.T, non_blocking=True) + 0, :lora_a.shape[0], :lora_a.shape[1]].copy_( + lora_a, non_blocking=True) self.lora_b_stacked[0][index, - 0, :lora_b.shape[1], :lora_b.shape[0]].copy_( - lora_b.T, non_blocking=True) + 0, :lora_b.shape[0], :lora_b.shape[1]].copy_( + lora_b, non_blocking=True) if lora_bias is not None: self.lora_bias_stacked = cast(tuple[torch.Tensor, ...], self.lora_bias_stacked) assert len(self.lora_bias_stacked) self.lora_bias_stacked[0][index, 0, :lora_bias.shape[0]].copy_( - lora_bias.T, non_blocking=True) + lora_bias, non_blocking=True) def apply(self, x: torch.Tensor, diff --git a/vllm/lora/layers/column_parallel_linear.py b/vllm/lora/layers/column_parallel_linear.py index 658fd23165da..fa4eb272a69f 100644 --- a/vllm/lora/layers/column_parallel_linear.py +++ b/vllm/lora/layers/column_parallel_linear.py @@ -99,13 +99,13 @@ def slice_lora_b(self, lora_b: torch.Tensor) -> torch.Tensor: if self.is_merged_col_linear: tp_rank = get_tensor_model_parallel_rank() shard_size = self.output_size // 2 - offset = lora_b.shape[-1] // 2 + offset = lora_b.shape[0] // 2 - left_weight = lora_b[:, tp_rank * shard_size:(tp_rank + 1) * - shard_size] - right_weight = lora_b[:, offset + tp_rank * shard_size:offset + - (tp_rank + 1) * shard_size] - lora_b = torch.cat([left_weight, right_weight], dim=1) + left_weight = lora_b[tp_rank * shard_size:(tp_rank + 1) * + shard_size, :] + right_weight = lora_b[offset + tp_rank * shard_size:offset + + (tp_rank + 1) * shard_size, :] + lora_b = torch.cat([left_weight, right_weight], dim=0) # Applicable to cases where the base_layer is # ColumnParallelLinear. else: @@ -113,7 +113,7 @@ def slice_lora_b(self, lora_b: torch.Tensor) -> torch.Tensor: shard_size = self.output_size start_idx = tensor_model_parallel_rank * shard_size end_idx = (tensor_model_parallel_rank + 1) * shard_size - lora_b = lora_b[:, start_idx:end_idx] + lora_b = lora_b[start_idx:end_idx, :] return lora_b def slice_bias(self, bias: torch.Tensor) -> torch.Tensor: @@ -251,9 +251,8 @@ def slice_lora_b( for i, (shard_id, shard_size) in enumerate( zip(self.output_ids, self.output_slices)): if (lora_b_i := lora_b[i]) is not None: - sliced_lora_b[i] = lora_b_i[:, - shard_size * shard_id:shard_size * - (shard_id + 1)] + sliced_lora_b[i] = lora_b_i[shard_size * shard_id:shard_size * + (shard_id + 1), :] return sliced_lora_b def slice_bias( @@ -285,12 +284,12 @@ def set_lora( for i in range(self.n_slices): if (lora_a_i := lora_a[i]) is not None: self.lora_a_stacked[i][ - index, 0, :lora_a_i.shape[1], :lora_a_i.shape[0]].copy_( - lora_a_i.T, non_blocking=True) + index, 0, :lora_a_i.shape[0], :lora_a_i.shape[1]].copy_( + lora_a_i, non_blocking=True) if (lora_b_i := lora_b[i]) is not None: self.lora_b_stacked[i][ - index, 0, :lora_b_i.shape[1], :lora_b_i.shape[0]].copy_( - lora_b_i.T, non_blocking=True) + index, 0, :lora_b_i.shape[0], :lora_b_i.shape[1]].copy_( + lora_b_i, non_blocking=True) if lora_bias is not None: self.lora_bias_stacked = cast(tuple[torch.Tensor, ...], @@ -299,7 +298,7 @@ def set_lora( if (lora_bias_i := lora_bias[i]) is not None: self.lora_bias_stacked[i][index, 0, :lora_bias_i.shape[0]].copy_( - lora_bias_i.T, + lora_bias_i, non_blocking=True) @classmethod @@ -345,18 +344,18 @@ def slice_lora_b(self, lora_b: torch.Tensor) -> torch.Tensor: tp_rank = get_tensor_model_parallel_rank() self.q_shard_id = tp_rank self.kv_shard_id = tp_rank // self.base_layer.num_kv_head_replicas - lora_b_q = lora_b[:, self.q_proj_shard_size * + lora_b_q = lora_b[self.q_proj_shard_size * self.q_shard_id:self.q_proj_shard_size * - (self.q_shard_id + 1)] + (self.q_shard_id + 1), :] k_offset = self.q_proj_total_size - lora_b_k = lora_b[:, k_offset + + lora_b_k = lora_b[k_offset + self.kv_proj_shard_size * self.kv_shard_id:k_offset + - self.kv_proj_shard_size * (self.kv_shard_id + 1)] + self.kv_proj_shard_size * (self.kv_shard_id + 1), :] v_offset = k_offset + self.kv_proj_total_size - lora_b_v = lora_b[:, v_offset + + lora_b_v = lora_b[v_offset + self.kv_proj_shard_size * self.kv_shard_id:v_offset + - self.kv_proj_shard_size * (self.kv_shard_id + 1)] - lora_b = torch.cat([lora_b_q, lora_b_k, lora_b_v], dim=1) + self.kv_proj_shard_size * (self.kv_shard_id + 1), :] + lora_b = torch.cat([lora_b_q, lora_b_k, lora_b_v], dim=0) return lora_b def slice_bias(self, bias: torch.Tensor) -> torch.Tensor: @@ -465,7 +464,7 @@ def slice_lora_a(self, lora_a: torch.Tensor) -> torch.Tensor: tp_rank = get_tensor_model_parallel_rank() shard_size = self.lora_a_stacked[0].shape[2] start_idx = tp_rank * shard_size - lora_a = lora_a[:, start_idx:start_idx + shard_size] + lora_a = lora_a[start_idx:start_idx + shard_size, :] return lora_a def apply(self, @@ -508,10 +507,10 @@ def slice_lora_a( output_shard_size = self.lora_a_stacked[0].shape[2] output_start_idx = self.tp_rank * output_shard_size lora_a = [ - lora_a[0][:, output_start_idx:output_start_idx + - output_shard_size] if lora_a[0] is not None else None, - lora_a[1][:, output_start_idx:output_start_idx + - output_shard_size] if lora_a[1] is not None else None, + lora_a[0][output_start_idx:output_start_idx + + output_shard_size, :] if lora_a[0] is not None else None, + lora_a[1][output_start_idx:output_start_idx + + output_shard_size, :] if lora_a[1] is not None else None, ] return lora_a @@ -551,7 +550,7 @@ def slice_lora_a(self, lora_a: torch.Tensor) -> torch.Tensor: tp_rank = get_tensor_model_parallel_rank() shard_size = self.lora_a_stacked[0].shape[2] start_idx = tp_rank * shard_size - lora_a = lora_a[:, start_idx:start_idx + shard_size] + lora_a = lora_a[start_idx:start_idx + shard_size, :] return lora_a def apply(self, @@ -589,12 +588,12 @@ def slice_lora_a( shard_size = [self.lora_a_stacked[i].shape[2] for i in range(3)] start_idx = [self.tp_rank * shard_size[i] for i in range(3)] lora_a = [ - lora_a[0][:, start_idx[0]:start_idx[0] + - shard_size[0]] if lora_a[0] is not None else None, - lora_a[1][:, start_idx[1]:start_idx[1] + - shard_size[1]] if lora_a[1] is not None else None, - lora_a[2][:, start_idx[2]:start_idx[2] + - shard_size[2]] if lora_a[2] is not None else None, + lora_a[0][start_idx[0]:start_idx[0] + + shard_size[0], :] if lora_a[0] is not None else None, + lora_a[1][start_idx[1]:start_idx[1] + + shard_size[1], :] if lora_a[1] is not None else None, + lora_a[2][start_idx[2]:start_idx[2] + + shard_size[2], :] if lora_a[2] is not None else None, ] return lora_a diff --git a/vllm/lora/layers/logits_processor.py b/vllm/lora/layers/logits_processor.py index a50dcfa748f2..b8fbad3a4af0 100644 --- a/vllm/lora/layers/logits_processor.py +++ b/vllm/lora/layers/logits_processor.py @@ -140,11 +140,11 @@ def set_lora( ): self.reset_lora(index) self.lora_a_stacked[index, - 0, :lora_a.shape[1], :lora_a.shape[0]].copy_( - lora_a.T, non_blocking=True) + 0, :lora_a.shape[0], :lora_a.shape[1]].copy_( + lora_a, non_blocking=True) self.lora_b_stacked[index, - 0, :lora_b.shape[1], :lora_b.shape[0]].copy_( - lora_b.T, non_blocking=True) + 0, :lora_b.shape[0], :lora_b.shape[1]].copy_( + lora_b, non_blocking=True) if embeddings_tensor is not None: self.embeddings_tensors[ index, diff --git a/vllm/lora/layers/row_parallel_linear.py b/vllm/lora/layers/row_parallel_linear.py index 18ef6fd1ddd7..cac2c92136dc 100644 --- a/vllm/lora/layers/row_parallel_linear.py +++ b/vllm/lora/layers/row_parallel_linear.py @@ -39,7 +39,7 @@ def slice_lora_a(self, lora_a: torch.Tensor) -> torch.Tensor: shard_size = self.input_size start_idx = self.tp_rank * shard_size end_idx = (self.tp_rank + 1) * shard_size - lora_a = lora_a[start_idx:end_idx, :] + lora_a = lora_a[:,start_idx:end_idx] return lora_a def slice_lora_b(self, lora_b: torch.Tensor) -> torch.Tensor: @@ -122,7 +122,7 @@ def slice_lora_b(self, lora_b: torch.Tensor) -> torch.Tensor: shard_size = self.lora_b_stacked[0].shape[2] start_idx = self.tp_rank * shard_size end_idx = (self.tp_rank + 1) * shard_size - lora_b = lora_b[:, start_idx:end_idx] + lora_b = lora_b[ start_idx:end_idx,:] return lora_b def slice_bias(self, bias: torch.Tensor) -> torch.Tensor: diff --git a/vllm/lora/layers/vocal_parallel_embedding.py b/vllm/lora/layers/vocal_parallel_embedding.py index 4d6218d97097..ca01c7e17fff 100644 --- a/vllm/lora/layers/vocal_parallel_embedding.py +++ b/vllm/lora/layers/vocal_parallel_embedding.py @@ -95,11 +95,13 @@ def set_lora( bias: Optional[torch.Tensor] = None, ): self.reset_lora(index) - self.lora_a_stacked[index, :lora_a.shape[0], :lora_a.shape[1]].copy_( - lora_a, non_blocking=True) + # NOTE self.lora_a_stacked is row-major, and lora_a is col-major, + # so we need transpose here + self.lora_a_stacked[index, :lora_a.shape[1], :lora_a.shape[0]].copy_( + lora_a.T, non_blocking=True) self.lora_b_stacked[index, - 0, :lora_b.shape[1], :lora_b.shape[0]].copy_( - lora_b.T, non_blocking=True) + 0, :lora_b.shape[0], :lora_b.shape[1]].copy_( + lora_b, non_blocking=True) if embeddings_tensor is not None: self.embeddings_tensors[ index, diff --git a/vllm/lora/lora_weights.py b/vllm/lora/lora_weights.py index 958364fca592..e3198fb3d3ae 100644 --- a/vllm/lora/lora_weights.py +++ b/vllm/lora/lora_weights.py @@ -86,11 +86,11 @@ def create_dummy_lora_weights( embeddings_tensor_dim: Optional[int] = None, bias_enabled: Optional[bool] = False) -> "LoRALayerWeights": pin_memory = str(device) == "cpu" and is_pin_memory_available() - lora_a = torch.zeros([input_dim, rank], + lora_a = torch.zeros([rank, input_dim], dtype=dtype, device=device, pin_memory=pin_memory) - lora_b = torch.zeros([rank, output_dim], + lora_b = torch.zeros([output_dim, rank], dtype=dtype, device=device, pin_memory=pin_memory) diff --git a/vllm/lora/models.py b/vllm/lora/models.py index 9ea46be65cff..cc64cc78affa 100644 --- a/vllm/lora/models.py +++ b/vllm/lora/models.py @@ -152,30 +152,29 @@ def from_lora_tensors( module_name, peft_helper, lora_embeddings_tensor) if is_bias: - loras[module_name].bias = tensor.to(device=device, - dtype=dtype).t() - bias = tensor.to(device=device, dtype=dtype).t() + loras[module_name].bias = tensor.to(device=device, dtype=dtype) + bias = tensor.to(device=device, dtype=dtype) if pin_memory: bias = bias.pin_memory() loras[module_name].bias = bias elif is_lora_a: loras[module_name].lora_a = tensor.to(device=device, - dtype=dtype).t() + dtype=dtype) if pin_memory: loras[module_name].lora_a = loras[ module_name].lora_a.pin_memory() else: loras[module_name].lora_b = tensor.to(device=device, - dtype=dtype).t() + dtype=dtype) assert embedding_padding_modules is not None if any(name in module_name for name in embedding_padding_modules ) and target_embedding_padding is not None: lora_b = loras[module_name].lora_b - assert target_embedding_padding >= lora_b.shape[1] - addition = target_embedding_padding - lora_b.shape[1] + assert target_embedding_padding >= lora_b.shape[0] + addition = target_embedding_padding - lora_b.shape[0] loras[module_name].lora_b = torch.nn.functional.pad( - lora_b, (0, addition)) + lora_b, (0, 0, 0, addition)) if pin_memory: loras[module_name].lora_b = loras[ module_name].lora_b.pin_memory() @@ -585,7 +584,6 @@ def create_dummy_lora( "cpu", bias_enabled=bias_enabled, ) - lora.optimize() else: parts = module_name.split(".") replacements = self.packed_modules_mapping[parts[-1]] @@ -600,7 +598,6 @@ def create_dummy_lora( "cpu", bias_enabled=bias_enabled, ) - lora.optimize() subloras.append(lora) lora = PackedLoRALayerWeights.pack(subloras) model.loras[module_name] = lora From 0d9fe260dda994646b1e74f424b2c5e32190a78f Mon Sep 17 00:00:00 2001 From: vllmellm Date: Tue, 23 Sep 2025 21:05:11 +0800 Subject: [PATCH 267/518] [docs] Benchmark Serving Incorrect Arg (#25474) Signed-off-by: vllmellm --- docs/contributing/benchmarks.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/contributing/benchmarks.md b/docs/contributing/benchmarks.md index 2a03ce1dffd6..a97d1fa6a3a5 100644 --- a/docs/contributing/benchmarks.md +++ b/docs/contributing/benchmarks.md @@ -680,7 +680,7 @@ vllm bench serve \ --save-result \ --result-dir ~/vllm_benchmark_results \ --save-detailed \ - --endpoint /v1/chat/completion + --endpoint /v1/chat/completions ``` ##### Videos (ShareGPT4Video) @@ -707,7 +707,7 @@ vllm bench serve \ --save-result \ --result-dir ~/vllm_benchmark_results \ --save-detailed \ - --endpoint /v1/chat/completion + --endpoint /v1/chat/completions ``` ##### Synthetic Random Images (random-mm) From b6a136b58c10dde773045c471db762ed09fc7468 Mon Sep 17 00:00:00 2001 From: Isotr0py Date: Tue, 23 Sep 2025 21:05:46 +0800 Subject: [PATCH 268/518] [CI/Build] Fix disabled v1 attention backend selection test (#25471) Signed-off-by: Isotr0py --- tests/kernels/attention/test_attention_selector.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/kernels/attention/test_attention_selector.py b/tests/kernels/attention/test_attention_selector.py index 38ab40f88ae0..a4e200775c09 100644 --- a/tests/kernels/attention/test_attention_selector.py +++ b/tests/kernels/attention/test_attention_selector.py @@ -67,7 +67,6 @@ def generate_params(): return params -@pytest.mark.skip(reason="Skipped for now. Should be revisited.") @pytest.mark.parametrize("device, name, use_mla, block_size", generate_params()) def test_env( @@ -189,7 +188,7 @@ def test_env( # FlashMLA only supports block_size == 64 pytest.skip("FlashMLA only supports block_size 64") else: - from vllm.attention.backends.flashmla import ( + from vllm.v1.attention.backends.mla.flashmla import ( # noqa: E501 is_flashmla_supported) is_supported, _ = is_flashmla_supported() if not is_supported: From 61d1b35561c07589331fedf5eba58eaaedcd4423 Mon Sep 17 00:00:00 2001 From: Joel Date: Tue, 23 Sep 2025 21:49:13 +0800 Subject: [PATCH 269/518] [BugFix] Register expert_map as named buffer for wake_up and sleep (#25458) Signed-off-by: wuxibin Signed-off-by: youkaichao Co-authored-by: youkaichao --- vllm/model_executor/layers/fused_moe/layer.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/vllm/model_executor/layers/fused_moe/layer.py b/vllm/model_executor/layers/fused_moe/layer.py index 1f80e972b7f0..71cc2bcf174d 100644 --- a/vllm/model_executor/layers/fused_moe/layer.py +++ b/vllm/model_executor/layers/fused_moe/layer.py @@ -972,12 +972,15 @@ def __init__( "experts. Falling back to linear expert placement.") expert_placement_strategy = "linear" - self.local_num_experts, self.expert_map = determine_expert_map( + self.expert_map: Optional[torch.Tensor] + local_num_experts, expert_map = determine_expert_map( ep_size=self.ep_size, ep_rank=self.ep_rank, global_num_experts=self.global_num_experts, expert_placement_strategy=expert_placement_strategy, ) + self.local_num_experts = local_num_experts + self.register_buffer("expert_map", expert_map) logger.info_once( "[EP Rank %s/%s] Expert parallelism is enabled. Expert " "placement strategy: %s. Local/global" @@ -1154,10 +1157,12 @@ def update_expert_map(self): # ep_size and ep_rank should already be updated assert self.expert_map is not None with self.expert_map.device: - self.local_num_experts, self.expert_map = determine_expert_map( + local_num_experts, expert_map = determine_expert_map( ep_size=self.ep_size, ep_rank=self.ep_rank, global_num_experts=self.global_num_experts) + self.local_num_experts = local_num_experts + self.register_buffer("expert_map", expert_map) def _load_per_tensor_weight_scale(self, shard_id: str, param: torch.nn.Parameter, From f05a4f0e345bbfd4a7cb3f421bd9412e1cc53e74 Mon Sep 17 00:00:00 2001 From: Chauncey Date: Tue, 23 Sep 2025 22:08:02 +0800 Subject: [PATCH 270/518] [P/D] Support NIXL connector to disconnect during a clean shutdown (#24423) Signed-off-by: chaunceyjiang Co-authored-by: Mark McLoughlin --- .../kv_connector/unit/test_nixl_connector.py | 52 +++++++++++++++++++ .../kv_connector/v1/nixl_connector.py | 35 ++++++++++--- 2 files changed, 80 insertions(+), 7 deletions(-) diff --git a/tests/v1/kv_connector/unit/test_nixl_connector.py b/tests/v1/kv_connector/unit/test_nixl_connector.py index fa698a2eabd9..24cc83c28614 100644 --- a/tests/v1/kv_connector/unit/test_nixl_connector.py +++ b/tests/v1/kv_connector/unit/test_nixl_connector.py @@ -60,6 +60,9 @@ def get_reg_descs(self, caches_data, memory_type: str) -> list: def register_memory(self, descs, backends) -> None: pass + def deregister_memory(self, descs) -> None: + pass + def get_xfer_descs(self, blocks_data, memory_type: str) -> list: return [str(uuid.uuid4()) for _ in blocks_data] @@ -86,6 +89,12 @@ def check_xfer_state(self, handle: int) -> str: def release_xfer_handle(self, handle: int) -> None: pass + def release_dlist_handle(self, handle: int) -> None: + pass + + def remove_remote_agent(self, agent: str) -> None: + pass + def send_notif(self, agent_name: str, notif_msg: bytes) -> None: pass @@ -905,3 +914,46 @@ def test_kv_buffer_to_nixl_memory_types(dist_init, kv_buffer_device, # Verify get_reg_descs was called with the correct memory_type assert connector.connector_worker.kv_buffer_device == kv_buffer_device assert connector.connector_worker.nixl_memory_type == nixl_memory_type + + +@patch( + "vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector.NixlWrapper", + FakeNixlWrapper) +def test_shutdown_cleans_up_resources(dist_init): + """Test that shutdown() properly cleans up all resources.""" + vllm_config = create_vllm_config() + + worker = NixlConnectorWorker(vllm_config, + vllm_config.kv_transfer_config.engine_id) + nixl_wrapper = worker.nixl_wrapper + + with patch.object(worker, '_handshake_initiation_executor') as mock_exec, \ + patch.object(worker, '_nixl_handshake_listener_t') as mock_listener, \ + patch.object(nixl_wrapper, 'release_xfer_handle') as mock_rel_xfer, \ + patch.object(nixl_wrapper, 'release_dlist_handle') as mock_rel_dlist, \ + patch.object(nixl_wrapper, 'remove_remote_agent') as mock_rem_agent, \ + patch.object(nixl_wrapper, 'deregister_memory') as mock_dereg: + + worker._recving_transfers = {"req1": [(123, time.perf_counter())]} + worker.src_xfer_side_handle = 456 + worker.dst_xfer_side_handles = {"engine1": 789} + worker._remote_agents = {"engine1": {0: "agent1"}} + worker._registered_descs = ["desc1", "desc2"] + + worker.shutdown() + + # Test idempotency + worker.shutdown() + worker.shutdown() + + mock_exec.shutdown.assert_called_with(wait=False) + mock_listener.join.assert_called_once_with(timeout=0) + + mock_rel_xfer.assert_called_once_with(123) + assert mock_rel_dlist.call_count == 2 + mock_rel_dlist.assert_any_call(456) # src handle + mock_rel_dlist.assert_any_call(789) # dst handle + mock_rem_agent.assert_called_once_with("agent1") + assert mock_dereg.call_count == 2 + mock_dereg.assert_any_call("desc1") + mock_dereg.assert_any_call("desc2") diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py b/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py index 82b483447e33..64feddb591c2 100644 --- a/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py +++ b/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py @@ -250,6 +250,10 @@ def wait_for_save(self): self.connector_worker.copy_blocks: self.connector_worker.save_kv_to_host(self._connector_metadata) + def shutdown(self): + if self.connector_worker is not None: + self.connector_worker.shutdown() + class NixlConnectorScheduler: """Implementation of Scheduler side methods""" @@ -586,13 +590,6 @@ def __init__(self, vllm_config: VllmConfig, engine_id: str): self.consumer_notification_counts_by_req = defaultdict[ReqId, int](int) self.xfer_stats = NixlKVConnectorStats() - def __del__(self): - """Cleanup background threads on destruction.""" - if executor := getattr(self, "_handshake_initiation_executor", None): - executor.shutdown(wait=False) - if listener_t := getattr(self, "_nixl_handshake_listener_t", None): - listener_t.join(timeout=0) - @staticmethod def _nixl_handshake_listener(metadata: NixlAgentMetadata, ready_event: threading.Event, base_port: int, @@ -1346,6 +1343,30 @@ def get_kv_connector_stats(self) -> Optional[KVConnectorStats]: return self.xfer_stats.clone_and_reset() return None + def shutdown(self): + """Shutdown the connector worker.""" + self._handshake_initiation_executor.shutdown(wait=False) + if self._nixl_handshake_listener_t is not None: + self._nixl_handshake_listener_t.join(timeout=0) + self._nixl_handshake_listener_t = None + for handles in self._recving_transfers.values(): + for handle, _ in handles: + self.nixl_wrapper.release_xfer_handle(handle) + self._recving_transfers.clear() + if self.src_xfer_side_handle: + self.nixl_wrapper.release_dlist_handle(self.src_xfer_side_handle) + self.src_xfer_side_handle = 0 + for dst_xfer_side_handle in self.dst_xfer_side_handles.values(): + self.nixl_wrapper.release_dlist_handle(dst_xfer_side_handle) + self.dst_xfer_side_handles.clear() + for remote_agents in self._remote_agents.values(): + for agent_name in remote_agents.values(): + self.nixl_wrapper.remove_remote_agent(agent_name) + self._remote_agents.clear() + for desc in self._registered_descs: + self.nixl_wrapper.deregister_memory(desc) + self._registered_descs.clear() + @contextlib.contextmanager def zmq_ctx(socket_type: Any, addr: str) -> Iterator[zmq.Socket]: From da5e7e432979254eb0d8a528c40ae73e121cca20 Mon Sep 17 00:00:00 2001 From: Peter Pan Date: Tue, 23 Sep 2025 22:23:22 +0800 Subject: [PATCH 271/518] [Docs] NixlConnector quickstart guide (#24249) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Peter Pan Signed-off-by: Peter Pan Signed-off-by: Nicolò Lucchesi Co-authored-by: Nicolò Lucchesi --- docs/features/disagg_prefill.md | 2 +- docs/features/nixl_connector_usage.md | 159 ++++++++++++++++++ .../nixl_integration/run_accuracy_test.sh | 10 +- 3 files changed, 168 insertions(+), 3 deletions(-) create mode 100644 docs/features/nixl_connector_usage.md diff --git a/docs/features/disagg_prefill.md b/docs/features/disagg_prefill.md index cb62213cc7af..2c69304db339 100644 --- a/docs/features/disagg_prefill.md +++ b/docs/features/disagg_prefill.md @@ -23,7 +23,7 @@ Now supports 5 types of connectors: - **SharedStorageConnector**: refer to for the example usage of SharedStorageConnector disaggregated prefilling. - **LMCacheConnectorV1**: refer to for the example usage of LMCacheConnectorV1 disaggregated prefilling which uses NIXL as the underlying KV transmission. -- **NixlConnector**: refer to for the example usage of NixlConnector disaggregated prefilling which support fully async send/recv. +- **NixlConnector**: refer to for the example usage of NixlConnector disaggregated prefilling which support fully async send/recv. For detailed usage guide, see [NixlConnector Usage Guide](nixl_connector_usage.md). - **P2pNcclConnector**: refer to for the example usage of P2pNcclConnector disaggregated prefilling. - **MultiConnector**: take advantage of the kv_connector_extra_config: dict[str, Any] already present in KVTransferConfig to stash all the connectors we want in an ordered list of kwargs.such as: diff --git a/docs/features/nixl_connector_usage.md b/docs/features/nixl_connector_usage.md new file mode 100644 index 000000000000..de50f091df42 --- /dev/null +++ b/docs/features/nixl_connector_usage.md @@ -0,0 +1,159 @@ +# NixlConnector Usage Guide + +NixlConnector is a high-performance KV cache transfer connector for vLLM's disaggregated prefilling feature. It provides fully asynchronous send/receive operations using the NIXL library for efficient cross-process KV cache transfer. + +## Prerequisites + +### Installation + +Install the NIXL library: `uv pip install nixl`, as a quick start. + +- Refer to [NIXL official repository](https://github.com/ai-dynamo/nixl) for more installation instructions +- The specified required NIXL version can be found in [requirements/kv_connectors.txt](../../requirements/kv_connectors.txt) and other relevant config files + +### Transport Configuration + +NixlConnector uses NIXL library for underlying communication, which supports multiple transport backends. UCX (Unified Communication X) is the primary default transport library used by NIXL. Configure transport environment variables: + +```bash +# Example UCX configuration, adjust according to your enviroment +export UCX_TLS=all # or specify specific transports like "rc,ud,sm,^cuda_ipc" ..etc +export UCX_NET_DEVICES=all # or specify network devices like "mlx5_0:1,mlx5_1:1" +``` + +!!! tip + When using UCX as the transport backend, NCCL environment variables (like `NCCL_IB_HCA`, `NCCL_SOCKET_IFNAME`) are not applicable to NixlConnector, so configure UCX-specific environment variables instead of NCCL variables. + +## Basic Usage (on the same host) + +### Producer (Prefiller) Configuration + +Start a prefiller instance that produces KV caches + +```bash +# 1st GPU as prefiller +CUDA_VISIBLE_DEVICES=0 \ +UCX_NET_DEVICES=all \ +VLLM_NIXL_SIDE_CHANNEL_PORT=5600 \ +vllm serve Qwen/Qwen3-0.6B \ + --port 8100 \ + --enforce-eager \ + --kv-transfer-config '{"kv_connector":"NixlConnector","kv_role":"kv_both"}' +``` + +### Consumer (Decoder) Configuration + +Start a decoder instance that consumes KV caches: + +```bash +# 2nd GPU as decoder +CUDA_VISIBLE_DEVICES=1 \ +UCX_NET_DEVICES=all \ +VLLM_NIXL_SIDE_CHANNEL_PORT=5601 \ +vllm serve Qwen/Qwen3-0.6B \ + --port 8200 \ + --enforce-eager \ + --kv-transfer-config '{"kv_connector":"NixlConnector","kv_role":"kv_both"}' +``` + +### Proxy Server + +Use a proxy server to route requests between prefiller and decoder: + +```bash +python tests/v1/kv_connector/nixl_integration/toy_proxy_server.py \ + --port 8192 \ + --prefiller-hosts localhost \ + --prefiller-ports 8100 \ + --decoder-hosts localhost \ + --decoder-ports 8200 +``` + +## Environment Variables + +- `VLLM_NIXL_SIDE_CHANNEL_PORT`: Port for NIXL handshake communication + - Default: 5600 + - **Required for both prefiller and decoder instances** + - Each vLLM worker needs a unique port on its host; using the same port number across different hosts is fine + - For TP/DP deployments, each worker's port on a node is computed as: base_port + dp_rank * tp_size + tp_rank (e.g., with `--tensor-parallel-size=4` and base_port=5600, tp_rank 0..3 use ports 5600, 5601, 5602, 5603 on that node). + - Used for the initial NIXL handshake between the prefiller and the decoder + +- `VLLM_NIXL_SIDE_CHANNEL_HOST`: Host for side channel communication + - Default: "localhost" + - Set when prefiller and decoder are on different machines + - Connection info is passed via KVTransferParams from prefiller to decoder for handshake + +- `VLLM_NIXL_ABORT_REQUEST_TIMEOUT`: Timeout (in seconds) for automatically releasing the prefiller’s KV cache for a particular request. (Optional) + - Default: 120 + - If a request is aborted and the decoder has not yet read the KV-cache blocks through the nixl channel, the prefill instance will release its KV-cache blocks after this timeout to avoid holding them indefinitely. + +## Multi-Instance Setup + +### Multiple Prefiller Instances on Different Machines + +```bash +# Prefiller 1 on Machine A (example IP: ${IP1}) +VLLM_NIXL_SIDE_CHANNEL_HOST=${IP1} \ +VLLM_NIXL_SIDE_CHANNEL_PORT=5600 \ +UCX_NET_DEVICES=all \ +vllm serve Qwen/Qwen3-0.6B --port 8000 \ + --tensor-parallel-size 8 \ + --kv-transfer-config '{"kv_connector":"NixlConnector","kv_role":"kv_producer"}' + +# Prefiller 2 on Machine B (example IP: ${IP2}) +VLLM_NIXL_SIDE_CHANNEL_HOST=${IP2} \ +VLLM_NIXL_SIDE_CHANNEL_PORT=5600 \ +UCX_NET_DEVICES=all \ +vllm serve Qwen/Qwen3-0.6B --port 8000 \ + --tensor-parallel-size 8 \ + --kv-transfer-config '{"kv_connector":"NixlConnector","kv_role":"kv_producer"}' +``` + +### Multiple Decoder Instances on Different Machines + +```bash +# Decoder 1 on Machine C (example IP: ${IP3}) +VLLM_NIXL_SIDE_CHANNEL_HOST=${IP3} \ +VLLM_NIXL_SIDE_CHANNEL_PORT=5600 \ +UCX_NET_DEVICES=all \ +vllm serve Qwen/Qwen3-0.6B --port 8000 \ + --tensor-parallel-size 8 \ + --kv-transfer-config '{"kv_connector":"NixlConnector","kv_role":"kv_consumer"}' + +# Decoder 2 on Machine D (example IP: ${IP4}) +VLLM_NIXL_SIDE_CHANNEL_HOST=${IP4} \ +VLLM_NIXL_SIDE_CHANNEL_PORT=5600 \ +UCX_NET_DEVICES=all \ +vllm serve Qwen/Qwen3-0.6B --port 8000 \ + --tensor-parallel-size 8 \ + --kv-transfer-config '{"kv_connector":"NixlConnector","kv_role":"kv_consumer"}' +``` + +### Proxy for Multiple Instances + +```bash +python tests/v1/kv_connector/nixl_integration/toy_proxy_server.py \ + --port 8192 \ + --prefiller-hosts ${IP1} ${IP2} \ + --prefiller-ports 8000 8000 \ + --decoder-hosts ${IP3} ${IP4} \ + --decoder-ports 8000 8000 +``` + +### KV Role Options + +- **kv_producer**: For prefiller instances that generate KV caches +- **kv_consumer**: For decoder instances that consume KV caches from prefiller +- **kv_both**: Enables symmetric functionality where the connector can act as both producer and consumer. This provides flexibility for experimental setups and scenarios where the role distinction is not predetermined. + +!!! tip + NixlConnector currently does not distinguish `kv_role`; the actual prefiller/decoder roles are determined by the upper-level proxy (e.g., `toy_proxy_server.py` using `--prefiller-hosts` and `--decoder-hosts`). + Therefore, `kv_role` in `--kv-transfer-config` is effectively a placeholder and does not affect NixlConnector's behavior. + +## Example Scripts/Code + +Refer to these example scripts in the vLLM repository: + +- [run_accuracy_test.sh](../../tests/v1/kv_connector/nixl_integration/run_accuracy_test.sh) +- [toy_proxy_server.py](../../tests/v1/kv_connector/nixl_integration/toy_proxy_server.py) +- [test_accuracy.py](../../tests/v1/kv_connector/nixl_integration/test_accuracy.py) diff --git a/tests/v1/kv_connector/nixl_integration/run_accuracy_test.sh b/tests/v1/kv_connector/nixl_integration/run_accuracy_test.sh index 9322410ec99e..bc8837079109 100755 --- a/tests/v1/kv_connector/nixl_integration/run_accuracy_test.sh +++ b/tests/v1/kv_connector/nixl_integration/run_accuracy_test.sh @@ -85,7 +85,10 @@ run_tests_for_model() { echo "Starting prefill instance $i on GPU $GPU_ID, port $PORT" # Build the command with or without model-specific args - BASE_CMD="CUDA_VISIBLE_DEVICES=$GPU_ID VLLM_NIXL_SIDE_CHANNEL_PORT=$SIDE_CHANNEL_PORT vllm serve $model_name \ + BASE_CMD="CUDA_VISIBLE_DEVICES=$GPU_ID \ + UCX_NET_DEVICES=all \ + VLLM_NIXL_SIDE_CHANNEL_PORT=$SIDE_CHANNEL_PORT \ + vllm serve $model_name \ --port $PORT \ --enforce-eager \ --gpu-memory-utilization 0.2 \ @@ -117,7 +120,10 @@ run_tests_for_model() { echo "Starting decode instance $i on GPU $GPU_ID, port $PORT" # Build the command with or without model-specific args - BASE_CMD="CUDA_VISIBLE_DEVICES=$GPU_ID VLLM_NIXL_SIDE_CHANNEL_PORT=$SIDE_CHANNEL_PORT vllm serve $model_name \ + BASE_CMD="CUDA_VISIBLE_DEVICES=$GPU_ID \ + UCX_NET_DEVICES=all \ + VLLM_NIXL_SIDE_CHANNEL_PORT=$SIDE_CHANNEL_PORT \ + vllm serve $model_name \ --port $PORT \ --enforce-eager \ --gpu-memory-utilization 0.2 \ From 4c966e440e1b70ff22348b23559b410c06e7b9a8 Mon Sep 17 00:00:00 2001 From: Fanli Lin Date: Tue, 23 Sep 2025 22:32:57 +0800 Subject: [PATCH 272/518] [XPU] Fix MOE DP accuracy issue on XPU (#25465) --- examples/offline_inference/data_parallel.py | 11 ++++++++++- .../device_communicators/xpu_communicator.py | 19 +++++++++++++++++++ 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/examples/offline_inference/data_parallel.py b/examples/offline_inference/data_parallel.py index 98fe36d0fb79..0076d4d30ee8 100644 --- a/examples/offline_inference/data_parallel.py +++ b/examples/offline_inference/data_parallel.py @@ -101,6 +101,13 @@ def parse_args(): "--quantization", type=str, ) + parser.add_argument( + "--disable-expert-parallel", + dest="enable_expert_parallel", + action="store_false", + help="Disable expert parallel (default: enabled).", + ) + parser.set_defaults(enable_expert_parallel=True) return parser.parse_args() @@ -113,6 +120,7 @@ def main( dp_master_port, GPUs_per_dp_rank, enforce_eager, + enable_expert_parallel, trust_remote_code, max_num_seqs, max_model_len, @@ -168,7 +176,7 @@ def start(rank): model=model, tensor_parallel_size=GPUs_per_dp_rank, enforce_eager=enforce_eager, - enable_expert_parallel=True, + enable_expert_parallel=enable_expert_parallel, trust_remote_code=trust_remote_code, max_num_seqs=max_num_seqs, max_model_len=max_model_len, @@ -229,6 +237,7 @@ def start(rank): dp_master_port, tp_size, args.enforce_eager, + args.enable_expert_parallel, args.trust_remote_code, args.max_num_seqs, args.max_model_len, diff --git a/vllm/distributed/device_communicators/xpu_communicator.py b/vllm/distributed/device_communicators/xpu_communicator.py index 067315deb773..b236bae261e0 100644 --- a/vllm/distributed/device_communicators/xpu_communicator.py +++ b/vllm/distributed/device_communicators/xpu_communicator.py @@ -25,6 +25,12 @@ def __init__(self, super().__init__(cpu_group, device, device_group, unique_name) if self.use_all2all: all2all_backend = envs.VLLM_ALL2ALL_BACKEND + if all2all_backend != "naive": + logger.warning( + "`%s` all2all manager is not supported on XPU." + "Falling back to `naive` all2all manager for XPU.", + all2all_backend) + all2all_backend = "naive" if all2all_backend == "naive": from .all2all import NaiveAll2AllManager self.all2all_manager = NaiveAll2AllManager(self.cpu_group) @@ -67,3 +73,16 @@ def gather(self, def broadcast(self, input_: torch.Tensor, src: int = 0) -> None: dist.broadcast(input_, src=src, group=self.device_group) + + def dispatch( + self, hidden_states: torch.Tensor, + router_logits: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]: + assert self.all2all_manager is not None + hidden_states, router_logits = self.all2all_manager.dispatch( + hidden_states, router_logits) + return hidden_states, router_logits + + def combine(self, hidden_states: torch.Tensor) -> torch.Tensor: + assert self.all2all_manager is not None + hidden_states = self.all2all_manager.combine(hidden_states) + return hidden_states From 2c58742dff8613a3bd7496f2008ce927e18d38d1 Mon Sep 17 00:00:00 2001 From: Michael Goin Date: Tue, 23 Sep 2025 11:01:24 -0400 Subject: [PATCH 273/518] [UX] Change kv-cache-memory log level to debug (#25479) Signed-off-by: Michael Goin --- vllm/v1/worker/gpu_worker.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/v1/worker/gpu_worker.py b/vllm/v1/worker/gpu_worker.py index ca8734d28b45..ffea9bb35513 100644 --- a/vllm/v1/worker/gpu_worker.py +++ b/vllm/v1/worker/gpu_worker.py @@ -389,7 +389,7 @@ def compile_or_warm_up_model(self) -> None: f"utilize gpu memory. Current kv cache memory in use is " f"{int(self.available_kv_cache_memory_bytes)} bytes.") - logger.info(msg) + logger.debug(msg) # Warm up sampler and preallocate memory buffer for logits and other # sampling related tensors of max possible shape to avoid memory From a903669e10cc98507fa5c2ae099b7161f7140cf7 Mon Sep 17 00:00:00 2001 From: Thomas Parnell Date: Tue, 23 Sep 2025 17:26:13 +0200 Subject: [PATCH 274/518] [V1] Remove V0 code paths for Hybrid models (#25400) Signed-off-by: Thomas Parnell --- .../models/language/generation/test_hybrid.py | 55 +- tests/models/registry.py | 13 +- vllm/model_executor/layers/mamba/abstract.py | 5 +- .../layers/mamba/linear_attn.py | 107 +-- .../layers/mamba/mamba2_metadata.py | 177 ----- .../layers/mamba/mamba_mixer.py | 149 ++-- .../layers/mamba/mamba_mixer2.py | 172 ++--- .../layers/mamba/mamba_utils.py | 20 +- .../layers/mamba/ops/causal_conv1d.py | 3 - .../model_executor/layers/mamba/short_conv.py | 21 +- vllm/model_executor/models/bamba.py | 72 +- .../models/constant_size_cache.py | 137 ---- vllm/model_executor/models/falcon_h1.py | 65 +- .../model_executor/models/granitemoehybrid.py | 79 +- vllm/model_executor/models/jamba.py | 53 +- vllm/model_executor/models/lfm2.py | 7 - vllm/model_executor/models/mamba.py | 45 +- vllm/model_executor/models/mamba2.py | 63 +- vllm/model_executor/models/mamba_cache.py | 83 -- vllm/model_executor/models/minimax_cache.py | 36 - vllm/model_executor/models/minimax_text_01.py | 39 - vllm/model_executor/models/nemotron_h.py | 72 +- vllm/model_executor/models/phi4flash.py | 731 ------------------ vllm/model_executor/models/plamo2.py | 240 ++---- vllm/model_executor/models/qwen3_next.py | 36 +- vllm/model_executor/models/registry.py | 1 - vllm/model_executor/models/zamba2.py | 93 --- vllm/v1/attention/backends/gdn_attn.py | 8 +- vllm/v1/attention/backends/mamba2_attn.py | 15 +- vllm/v1/attention/backends/short_conv_attn.py | 14 +- vllm/v1/attention/backends/utils.py | 51 ++ 31 files changed, 359 insertions(+), 2303 deletions(-) delete mode 100644 vllm/model_executor/layers/mamba/mamba2_metadata.py delete mode 100644 vllm/model_executor/models/constant_size_cache.py delete mode 100644 vllm/model_executor/models/mamba_cache.py delete mode 100644 vllm/model_executor/models/minimax_cache.py delete mode 100644 vllm/model_executor/models/phi4flash.py diff --git a/tests/models/language/generation/test_hybrid.py b/tests/models/language/generation/test_hybrid.py index 0b1f90e27db8..e60a86075b8b 100644 --- a/tests/models/language/generation/test_hybrid.py +++ b/tests/models/language/generation/test_hybrid.py @@ -20,7 +20,9 @@ SSM_MODELS = [ "state-spaces/mamba-130m-hf", "tiiuae/falcon-mamba-tiny-dev", - "yujiepan/mamba2-codestral-v0.1-tiny-random", + # mamba2-codestral in transformers is broken pending: + # https://github.com/huggingface/transformers/pull/40861 + #"yujiepan/mamba2-codestral-v0.1-tiny-random", ] HYBRID_MODELS = [ @@ -31,18 +33,7 @@ "ibm-granite/granite-4.0-tiny-preview", "tiiuae/Falcon-H1-0.5B-Base", "LiquidAI/LFM2-1.2B", -] - -V1_SUPPORTED_MODELS = [ - "state-spaces/mamba-130m-hf", - "ai21labs/Jamba-tiny-dev", - "pfnet/plamo-2-1b", - "yujiepan/mamba2-codestral-v0.1-tiny-random", - "Zyphra/Zamba2-1.2B-instruct", - "hmellor/tiny-random-BambaForCausalLM", - "ibm-granite/granite-4.0-tiny-preview", - "tiiuae/Falcon-H1-0.5B-Base", - "LiquidAI/LFM2-1.2B", + "tiny-random/qwen3-next-moe", ] FULL_CUDA_GRAPH_MODELS = [ @@ -51,10 +42,6 @@ "Zyphra/Zamba2-1.2B-instruct", ] -V0_UNSUPPORTED_MODELS = [ - "LiquidAI/LFM2-1.2B", -] - FP32_STATE_MODELS = [ "state-spaces/mamba-130m-hf", "Zyphra/Zamba2-1.2B-instruct", @@ -88,20 +75,16 @@ def test_models( hf_outputs = hf_model.generate_greedy_logprobs_limit( example_prompts, max_tokens, num_logprobs) - if model in V1_SUPPORTED_MODELS: - with vllm_runner(model, max_num_seqs=MAX_NUM_SEQS) as vllm_model: - vllm_v1_outputs = vllm_model.generate_greedy_logprobs( - example_prompts, max_tokens, num_logprobs) - else: - vllm_v1_outputs = None + with vllm_runner(model, max_num_seqs=MAX_NUM_SEQS) as vllm_model: + vllm_outputs = vllm_model.generate_greedy_logprobs( + example_prompts, max_tokens, num_logprobs) - if model in V1_SUPPORTED_MODELS: - check_logprobs_close( - outputs_0_lst=hf_outputs, - outputs_1_lst=vllm_v1_outputs, - name_0="hf", - name_1="vllm-v1", - ) + check_logprobs_close( + outputs_0_lst=hf_outputs, + outputs_1_lst=vllm_outputs, + name_0="hf", + name_1="vllm", + ) @pytest.mark.parametrize("model", [SSM_MODELS[0], HYBRID_MODELS[0]]) @@ -299,14 +282,14 @@ def test_full_cuda_graph( example_prompts, max_tokens, num_logprobs) with vllm_runner(model, max_num_seqs=MAX_NUM_SEQS) as vllm_model: - vllm_v1_outputs = vllm_model.generate_greedy_logprobs( + vllm_outputs = vllm_model.generate_greedy_logprobs( example_prompts, max_tokens, num_logprobs) check_logprobs_close( outputs_0_lst=hf_outputs, - outputs_1_lst=vllm_v1_outputs, + outputs_1_lst=vllm_outputs, name_0="hf", - name_1="vllm-v1", + name_1="vllm", ) @@ -340,12 +323,12 @@ def test_fp32_cache_state( with vllm_runner(model, max_num_seqs=MAX_NUM_SEQS, **{cache_dtype_param: "float32"}) as vllm_model: - vllm_v1_outputs = vllm_model.generate_greedy_logprobs( + vllm_outputs = vllm_model.generate_greedy_logprobs( example_prompts, max_tokens, num_logprobs) check_logprobs_close( outputs_0_lst=hf_outputs, - outputs_1_lst=vllm_v1_outputs, + outputs_1_lst=vllm_outputs, name_0="hf", - name_1="vllm-v1", + name_1="vllm", ) diff --git a/tests/models/registry.py b/tests/models/registry.py index 6047a7a3e98d..8b62952ad590 100644 --- a/tests/models/registry.py +++ b/tests/models/registry.py @@ -312,14 +312,12 @@ def check_available_online( "PersimmonForCausalLM": _HfExamplesInfo("adept/persimmon-8b-chat"), "PhiForCausalLM": _HfExamplesInfo("microsoft/phi-2"), "Phi3ForCausalLM": _HfExamplesInfo("microsoft/Phi-3-mini-4k-instruct"), - "Phi4FlashForCausalLM": _HfExamplesInfo("microsoft/Phi-4-mini-flash-reasoning", # noqa: E501 - trust_remote_code=True, - v0_only=True, - max_model_len=10240), "PhiMoEForCausalLM": _HfExamplesInfo("microsoft/Phi-3.5-MoE-instruct", trust_remote_code=True), "Plamo2ForCausalLM": _HfExamplesInfo("pfnet/plamo-2-1b", - trust_remote_code=True), + max_transformers_version="4.55.4", + transformers_version_reason="HF model uses remote code that is not compatible with latest Transformers", # noqa: E501 + trust_remote_code=True), "QWenLMHeadModel": _HfExamplesInfo("Qwen/Qwen-7B-Chat", max_transformers_version="4.53", transformers_version_reason="HF model uses remote code that is not compatible with latest Transformers", # noqa: E501 @@ -330,7 +328,8 @@ def check_available_online( "Qwen3ForCausalLM": _HfExamplesInfo("Qwen/Qwen3-8B"), "Qwen3MoeForCausalLM": _HfExamplesInfo("Qwen/Qwen3-30B-A3B"), "Qwen3NextForCausalLM": _HfExamplesInfo("Qwen/Qwen3-Next-80B-A3B-Instruct", - min_transformers_version="4.56.2"), + extras={"tiny-random": "tiny-random/qwen3-next-moe"}, # noqa: E501 + min_transformers_version="4.56.3"), "RWForCausalLM": _HfExamplesInfo("tiiuae/falcon-40b"), "SeedOssForCausalLM": _HfExamplesInfo("ByteDance-Seed/Seed-OSS-36B-Instruct", # noqa: E501 trust_remote_code=True, @@ -644,7 +643,7 @@ def check_available_online( trust_remote_code=True, speculative_model="XiaomiMiMo/MiMo-7B-RL"), "Qwen3NextMTP": _HfExamplesInfo("Qwen/Qwen3-Next-80B-A3B-Instruct", - min_transformers_version="4.56.2"), + min_transformers_version="4.56.3"), } _TRANSFORMERS_BACKEND_MODELS = { diff --git a/vllm/model_executor/layers/mamba/abstract.py b/vllm/model_executor/layers/mamba/abstract.py index a524e1340580..6da62b5426bb 100644 --- a/vllm/model_executor/layers/mamba/abstract.py +++ b/vllm/model_executor/layers/mamba/abstract.py @@ -20,10 +20,7 @@ class MambaBase(AttentionLayerBase): # Contains the KV cache (mamba state) for the layer # in the shape specified by `self.get_state_shape`. - # The outer list is for v0 PP virtual engine. Though this code path - # only runs for v1, we have to do this to unify with the interface - # of Attention + v0 PP. - kv_cache: list[Iterable[torch.Tensor]] + kv_cache: tuple[torch.Tensor, ...] @abstractmethod def get_state_shape(self) -> Iterable[tuple[int, ...]]: diff --git a/vllm/model_executor/layers/mamba/linear_attn.py b/vllm/model_executor/layers/mamba/linear_attn.py index 5fe37a6289e0..6a901b47b8b6 100644 --- a/vllm/model_executor/layers/mamba/linear_attn.py +++ b/vllm/model_executor/layers/mamba/linear_attn.py @@ -15,7 +15,6 @@ from einops import rearrange from torch import nn -from vllm import envs from vllm.attention import AttentionMetadata from vllm.config import CacheConfig, ModelConfig, get_current_vllm_config from vllm.distributed.communication_op import tensor_model_parallel_all_reduce @@ -42,8 +41,6 @@ import torch import torch.distributed -from vllm.model_executor.models.minimax_cache import MinimaxCacheParams - class MiniMaxText01RMSNormTP(CustomOp): name = "MiniMaxText01RMSNormTP" @@ -225,11 +222,10 @@ def __init__( self.tp_heads:(self.tp_rank + 1) * self.tp_heads].contiguous() - if envs.VLLM_USE_V1: - compilation_config = get_current_vllm_config().compilation_config - if prefix in compilation_config.static_forward_context: - raise ValueError(f"Duplicate layer name: {prefix}") - compilation_config.static_forward_context[prefix] = self + compilation_config = get_current_vllm_config().compilation_config + if prefix in compilation_config.static_forward_context: + raise ValueError(f"Duplicate layer name: {prefix}") + compilation_config.static_forward_context[prefix] = self @staticmethod def weight_direct_load(param: torch.Tensor, @@ -268,8 +264,7 @@ def _prefill_and_mix_infer(self, q, k, v, kv_cache, state_indices_tensor, break if _prefill_idx >= len(state_indices_tensor): break - # prefills are packed at end of batch in V1 - offset = attn_metadata.num_decode_tokens if envs.VLLM_USE_V1 else 0 + offset = attn_metadata.num_decode_tokens _start = attn_metadata.query_start_loc[offset + _prefill_idx] _end = attn_metadata.query_start_loc[offset + _prefill_idx + 1] slot_id = state_indices_tensor[offset + _prefill_idx] @@ -291,10 +286,7 @@ def _prefill_and_mix_infer(self, q, k, v, kv_cache, state_indices_tensor, hidden_decode = self._decode_infer(q, k, v, kv_cache, state_indices_tensor, attn_metadata) - if envs.VLLM_USE_V1: - hidden.insert(0, hidden_decode) - else: - hidden.append(hidden_decode) + hidden.insert(0, hidden_decode) if not hidden: return torch.empty((0, q.size(-1)), device=q.device, dtype=q.dtype) @@ -304,40 +296,28 @@ def _prefill_and_mix_infer(self, q, k, v, kv_cache, state_indices_tensor, def _decode_infer(self, q, k, v, kv_cache, state_indices_tensor, attn_metadata): - if not envs.VLLM_USE_V1: - q = q[attn_metadata.num_prefill_tokens:].unsqueeze(2).contiguous() - k = k[attn_metadata.num_prefill_tokens:].unsqueeze(2).contiguous() - v = v[attn_metadata.num_prefill_tokens:].unsqueeze(2).contiguous() - num_prefills = getattr(attn_metadata, "num_prefills", 0) - slot_id = state_indices_tensor[num_prefills:] - else: - q = q[:attn_metadata.num_decode_tokens].unsqueeze(2).contiguous() - k = k[:attn_metadata.num_decode_tokens].unsqueeze(2).contiguous() - v = v[:attn_metadata.num_decode_tokens].unsqueeze(2).contiguous() - slot_id = state_indices_tensor[:attn_metadata.num_decodes] + q = q[:attn_metadata.num_decode_tokens].unsqueeze(2).contiguous() + k = k[:attn_metadata.num_decode_tokens].unsqueeze(2).contiguous() + v = v[:attn_metadata.num_decode_tokens].unsqueeze(2).contiguous() + slot_id = state_indices_tensor[:attn_metadata.num_decodes] hidden = linear_decode_forward_triton(q, k, v, kv_cache, self.tp_slope, slot_id, 32) return hidden def forward(self, hidden_states: torch.Tensor, output: torch.Tensor, - positions: torch.Tensor, - kv_caches: MinimaxCacheParams) -> None: - if not envs.VLLM_USE_V1: - self._forward(hidden_states, output, positions, kv_caches) - else: - torch.ops.vllm.linear_attention( - hidden_states, - output, - positions, - self.prefix, - ) + positions: torch.Tensor) -> None: + torch.ops.vllm.linear_attention( + hidden_states, + output, + positions, + self.prefix, + ) def _forward(self, hidden_states: torch.Tensor, output: torch.Tensor, - positions: torch.Tensor, - kv_caches: Optional[MinimaxCacheParams]) -> None: + positions: torch.Tensor) -> None: forward_context = get_forward_context() attn_metadata: AttentionMetadata = forward_context.attn_metadata - if envs.VLLM_USE_V1 and attn_metadata is not None: + if attn_metadata is not None: assert isinstance(attn_metadata, dict) attn_metadata = attn_metadata[self.prefix] assert isinstance(attn_metadata, LinearAttentionMetadata) @@ -351,32 +331,26 @@ def _forward(self, hidden_states: torch.Tensor, output: torch.Tensor, qkvact = torch.nn.functional.silu(qkv32) qkvact = qkvact.view((qkv.shape[0], self.tp_heads, -1)) q, k, v = torch.split(qkvact, [self.head_dim] * 3, dim=-1) - if envs.VLLM_USE_V1: - if attn_metadata is not None: - kv_cache = self.kv_cache[forward_context.virtual_engine][0] - state_indices_tensor = attn_metadata.state_indices_tensor - - num_prefills = getattr(attn_metadata, "num_prefills", 0) - if num_prefills > 0: - num_decode_tokens = getattr(attn_metadata, - "num_decode_tokens", 0) - for prefill_idx in range(num_prefills): - q_start = attn_metadata.query_start_loc[ - num_decode_tokens + prefill_idx] - q_end = attn_metadata.query_start_loc[num_decode_tokens - + prefill_idx + - 1] - query_len = q_end - q_start - context_len = attn_metadata.seq_lens[ - num_decode_tokens + prefill_idx] - query_len - if context_len == 0: - block_to_clear = state_indices_tensor[ - num_decode_tokens + prefill_idx] - kv_cache[block_to_clear, ...] = 0 - else: - assert kv_caches is not None - kv_cache = kv_caches.minimax_cache - state_indices_tensor = kv_caches.state_indices_tensor + if attn_metadata is not None: + kv_cache = self.kv_cache[forward_context.virtual_engine][0] + state_indices_tensor = attn_metadata.state_indices_tensor + + num_prefills = getattr(attn_metadata, "num_prefills", 0) + if num_prefills > 0: + num_decode_tokens = getattr(attn_metadata, "num_decode_tokens", + 0) + for prefill_idx in range(num_prefills): + q_start = attn_metadata.query_start_loc[num_decode_tokens + + prefill_idx] + q_end = attn_metadata.query_start_loc[num_decode_tokens + + prefill_idx + 1] + query_len = q_end - q_start + context_len = attn_metadata.seq_lens[ + num_decode_tokens + prefill_idx] - query_len + if context_len == 0: + block_to_clear = state_indices_tensor[num_decode_tokens + + prefill_idx] + kv_cache[block_to_clear, ...] = 0 decode_only = getattr(attn_metadata, "num_prefills", 0) == 0 if attn_metadata is None: @@ -410,8 +384,7 @@ def linear_attention( self = forward_context.no_compile_layers[layer_name] self._forward(hidden_states=hidden_states, output=output, - positions=positions, - kv_caches=None) + positions=positions) def linear_attention_fake( diff --git a/vllm/model_executor/layers/mamba/mamba2_metadata.py b/vllm/model_executor/layers/mamba/mamba2_metadata.py deleted file mode 100644 index 7f376b70a7ae..000000000000 --- a/vllm/model_executor/layers/mamba/mamba2_metadata.py +++ /dev/null @@ -1,177 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project -from dataclasses import dataclass -from typing import Optional, Union - -import numpy as np -import torch - -from vllm.attention.backends.abstract import AttentionMetadata -from vllm.attention.backends.placeholder_attn import ( - PlaceholderAttentionMetadata) -from vllm.attention.backends.utils import PAD_SLOT_ID -from vllm.platforms import current_platform -from vllm.v1.attention.backends.gdn_attn import GDNAttentionMetadata -from vllm.v1.attention.backends.mamba2_attn import ( - Mamba2AttentionMetadata, _query_start_loc_to_chunk_indices_offsets) - - -@dataclass -class Mamba2Metadata: - prep_initial_states: bool - chunk_size: int - - has_initial_states_p: torch.Tensor - seq_idx_p: torch.Tensor - chunk_indices_p: torch.Tensor - chunk_offsets_p: torch.Tensor - """ - With continuous batching layout of `x` in vLLM, to enable a Triton program - to handle a request in parallel, two supporting tensors are used - (batch_ptr, token_chunk_offset_ptr) - BLOCK_M = the # tokens to be handled by a Triton program - (can be customized for different hardware) - - nums_dict: - tracks the data associated with a given value of BLOCK_M - BLOCK_M = #tokens handled by a Triton program - cu_seqlen: total tokens per batch - (used as flag to update other data at each new input) - batch_ptr: tracks batch-id handled by the Triton program - token_chunk_offset_ptr: tracks token group_idx handled by the Triton program - (Triton implementation of causal_conv1d handles parallelism in 3-axes - - feature-axis - - batch-axis - - sequence-axis) - """ - nums_dict: Optional[dict] = None - cu_seqlen: Optional[int] = None - batch_ptr: Optional[torch.Tensor] = None - token_chunk_offset_ptr: Optional[torch.Tensor] = None - - -def get_platform_metadata_classes() -> tuple[type[AttentionMetadata], ...]: - """Returns the appropriate metadata classes for the current platform.""" - if current_platform.is_rocm(): - from vllm.v1.attention.backends.rocm_aiter_fa import ( - AiterFlashAttentionMetadata) - from vllm.v1.attention.backends.triton_attn import ( - TritonAttentionMetadata) - return (AiterFlashAttentionMetadata, TritonAttentionMetadata, - PlaceholderAttentionMetadata) - if current_platform.is_cuda(): - from vllm.v1.attention.backends.flash_attn import ( - FlashAttentionMetadata) - from vllm.v1.attention.backends.xformers import ( - XFormersAttentionMetadata) - return (FlashAttentionMetadata, XFormersAttentionMetadata, - PlaceholderAttentionMetadata) - raise ValueError( - f"Unsupported platform for Mamba2: {current_platform.device_type}") - - -def prepare_mamba2_metadata( - chunk_size: int, - attn_metadata: AttentionMetadata, -) -> Mamba2Metadata: - - # compute number of prefill and decode requests - # NOTE: in V0 we assume prefills are before decodes - num_prefills = attn_metadata.num_prefills - num_prefill_tokens = attn_metadata.num_prefill_tokens - - seq_idx_p = None - chunk_indices_p, chunk_offsets_p = None, None - # Need flags to indicate if there are initial states - # currently we really only support the FlashAttention backend - has_initial_states_p = None - prep_initial_states = False - - # Compute seq_idx, chunk_indices and chunk_offsets for prefill only - if num_prefills > 0: - attn_metadata_instances = get_platform_metadata_classes() - if (isinstance(attn_metadata, attn_metadata_instances) - and attn_metadata.context_lens_tensor is not None): - # precompute flag to avoid device syncs later in mamba2 layer - # forwards - # prep is only needed for mamba2 ssd prefill processing - has_initial_states_p = ( - attn_metadata.context_lens_tensor[:num_prefills] > 0) - prep_initial_states = torch.any(has_initial_states_p).item() - query_start_loc_p = attn_metadata.query_start_loc[:num_prefills + 1] - seq_idx_p = torch.repeat_interleave(torch.arange( - num_prefills, dtype=torch.int32, device=query_start_loc_p.device), - query_start_loc_p.diff(), - output_size=num_prefill_tokens) - seq_idx_p.unsqueeze_(0) - - # We compute metadata for chunked prefill once at the top level model - # forward and reuse them in mamba layers. If not needed, they will be - # ignored inside mamba kernels. - if prep_initial_states: - chunk_indices_p, chunk_offsets_p = \ - _query_start_loc_to_chunk_indices_offsets( - query_start_loc_p, chunk_size, num_prefill_tokens) - - return Mamba2Metadata(has_initial_states_p=has_initial_states_p, - prep_initial_states=prep_initial_states, - chunk_size=chunk_size, - seq_idx_p=seq_idx_p, - chunk_indices_p=chunk_indices_p, - chunk_offsets_p=chunk_offsets_p) - - -def update_metadata(x: torch.Tensor, query_start_loc: torch.Tensor, - mamba2_metadata: Union[Mamba2Metadata, - Mamba2AttentionMetadata, - GDNAttentionMetadata]): - """ - this is triggered upon handling a new input at the first layer - """ - dim, cu_seqlen = x.shape - mamba2_metadata.cu_seqlen = cu_seqlen - seqlens = np.diff(query_start_loc.to('cpu')) - nums_dict = {} # type: ignore - for BLOCK_M in [8]: # cover all BLOCK_M values - nums = -(-seqlens // BLOCK_M) - nums_dict[BLOCK_M] = {} - nums_dict[BLOCK_M]['nums'] = nums - nums_dict[BLOCK_M]['tot'] = nums.sum().item() - mlist = torch.from_numpy(np.repeat(np.arange(len(nums)), nums)) - nums_dict[BLOCK_M]['mlist'] = mlist - mlist_len = len(nums_dict[BLOCK_M]['mlist']) - nums_dict[BLOCK_M]['mlist_len'] = mlist_len - MAX_NUM_PROGRAMS = max(1024, mlist_len) * 2 - offsetlist = [] # type: ignore - for idx, num in enumerate(nums): - offsetlist.extend(range(num)) - offsetlist = torch.tensor(offsetlist, dtype=torch.int32) - nums_dict[BLOCK_M]['offsetlist'] = offsetlist - - if mamba2_metadata.batch_ptr is None: - # Update default value after class definition - #mamba2_metadata.MAX_NUM_PROGRAMS *= 2 - mamba2_metadata.batch_ptr = torch.full((MAX_NUM_PROGRAMS, ), - PAD_SLOT_ID, - dtype=torch.int32, - device='cuda') - mamba2_metadata.token_chunk_offset_ptr = torch.full( - (MAX_NUM_PROGRAMS, ), - PAD_SLOT_ID, - dtype=torch.int32, - device='cuda') - else: - if mamba2_metadata.batch_ptr.nelement() < MAX_NUM_PROGRAMS: - mamba2_metadata.batch_ptr.resize_(MAX_NUM_PROGRAMS).fill_( - PAD_SLOT_ID) - mamba2_metadata.token_chunk_offset_ptr.resize_( # type: ignore - MAX_NUM_PROGRAMS).fill_(PAD_SLOT_ID) - - mamba2_metadata.batch_ptr[0:mlist_len].copy_(mlist) - mamba2_metadata.token_chunk_offset_ptr[ # type: ignore - 0:mlist_len].copy_(offsetlist) - nums_dict[BLOCK_M]['batch_ptr'] = mamba2_metadata.batch_ptr - nums_dict[BLOCK_M]['token_chunk_offset_ptr'] = ( - mamba2_metadata.token_chunk_offset_ptr) # type: ignore - mamba2_metadata.nums_dict = nums_dict - return mamba2_metadata diff --git a/vllm/model_executor/layers/mamba/mamba_mixer.py b/vllm/model_executor/layers/mamba/mamba_mixer.py index e704bfd451bc..a56ee13a6380 100644 --- a/vllm/model_executor/layers/mamba/mamba_mixer.py +++ b/vllm/model_executor/layers/mamba/mamba_mixer.py @@ -10,8 +10,6 @@ from torch import nn from torch.nn.parameter import Parameter -from vllm import envs -from vllm.attention.backends.abstract import AttentionMetadata from vllm.config import CacheConfig, ModelConfig, get_current_vllm_config from vllm.distributed.parallel_state import ( get_tensor_model_parallel_rank, get_tensor_model_parallel_world_size) @@ -28,7 +26,6 @@ causal_conv1d_fn, causal_conv1d_update) from vllm.model_executor.layers.mamba.ops.mamba_ssm import ( selective_scan_fn, selective_state_update) -from vllm.model_executor.models.mamba_cache import MambaCacheParams from vllm.model_executor.utils import set_weight_attrs from vllm.platforms import current_platform from vllm.utils import direct_register_custom_op @@ -149,16 +146,12 @@ def A_weight_loader(param: Parameter, loaded_weight: torch.Tensor): has_weight=rms_norm_has_weight, ) if use_rms_norm else None - if envs.VLLM_USE_V1: - compilation_config = get_current_vllm_config().compilation_config - if prefix in compilation_config.static_forward_context: - raise ValueError(f"Duplicate layer name: {prefix}") - compilation_config.static_forward_context[prefix] = self - # The outer list is for v0 PP virtual engine. Though this code path - # only runs for v1, we have to do this to unify with the interface - # of Attention + v0 PP. - # The inner tuple is (conv_state, ssm_state) - self.kv_cache = [(torch.tensor([]), torch.tensor([]))] + compilation_config = get_current_vllm_config().compilation_config + if prefix in compilation_config.static_forward_context: + raise ValueError(f"Duplicate layer name: {prefix}") + compilation_config.static_forward_context[prefix] = self + # The inner tuple is (conv_state, ssm_state) + self.kv_cache = (torch.tensor([]), torch.tensor([])) self.model_config = model_config self.cache_config = cache_config @@ -186,29 +179,18 @@ def _ssm_transform( discrete_time_step = self.dt_proj(time_step)[0].transpose(-2, -1) return discrete_time_step, B, C - def forward(self, - hidden_states: torch.Tensor, - output: torch.Tensor, - mamba_cache_params: Optional[MambaCacheParams] = None): - if not envs.VLLM_USE_V1: - CustomOp.forward(self, hidden_states, output, mamba_cache_params) - else: - torch.ops.vllm.mamba_mixer( - hidden_states, - output, - self.prefix, - ) - - def forward_native(self, - hidden_states: torch.Tensor, - output: torch.Tensor, - mamba_cache_params: Optional[MambaCacheParams] = None): + def forward(self, hidden_states: torch.Tensor, output: torch.Tensor): + torch.ops.vllm.mamba_mixer( + hidden_states, + output, + self.prefix, + ) + + def forward_native(self, hidden_states: torch.Tensor, + output: torch.Tensor): pass - def forward_cuda(self, - hidden_states: torch.Tensor, - output: torch.Tensor, - mamba_cache_params: Optional[MambaCacheParams] = None): + def forward_cuda(self, hidden_states: torch.Tensor, output: torch.Tensor): """ Run the Mamba-1 SSM pipeline. @@ -234,31 +216,18 @@ def forward_cuda(self, forward_context: ForwardContext = get_forward_context() attn_metadata = forward_context.attn_metadata - if envs.VLLM_USE_V1: - if attn_metadata is not None: - assert isinstance(attn_metadata, dict) - attn_metadata = attn_metadata[self.prefix] - mamba1_metadata = attn_metadata - assert isinstance(mamba1_metadata, Mamba1AttentionMetadata) - query_start_loc = mamba1_metadata.query_start_loc - state_indices_tensor = mamba1_metadata.state_indices_tensor - self_kv_cache = self.kv_cache[forward_context.virtual_engine] - conv_state = self_kv_cache[0].transpose(-1, -2) - ssm_state = self_kv_cache[1] - has_initial_states = mamba1_metadata.has_initial_states - num_padded_decodes = mamba1_metadata.num_padded_decodes - else: - assert isinstance(attn_metadata, AttentionMetadata) - assert mamba_cache_params is not None - conv_state = mamba_cache_params.conv_state - ssm_state = mamba_cache_params.ssm_state - state_indices_tensor = mamba_cache_params.state_indices_tensor - query_start_loc = attn_metadata.query_start_loc - context_lens_tensor = attn_metadata.context_lens_tensor - has_initial_states = None - if context_lens_tensor is not None: - has_initial_states = context_lens_tensor > 0 - num_padded_decodes = attn_metadata.num_decode_tokens + if attn_metadata is not None: + assert isinstance(attn_metadata, dict) + attn_metadata = attn_metadata[self.prefix] + mamba1_metadata = attn_metadata + assert isinstance(mamba1_metadata, Mamba1AttentionMetadata) + query_start_loc = mamba1_metadata.query_start_loc + state_indices_tensor = mamba1_metadata.state_indices_tensor + self_kv_cache = self.kv_cache[forward_context.virtual_engine] + conv_state = self_kv_cache[0].transpose(-1, -2) + ssm_state = self_kv_cache[1] + has_initial_states = mamba1_metadata.has_initial_states + num_padded_decodes = mamba1_metadata.num_padded_decodes # 1. Gated MLP's linear projection projected_states = self.in_proj(hidden_states)[0].transpose(-2, -1) @@ -267,7 +236,7 @@ def forward_cuda(self, conv_weights = self.conv1d.weight.view(self.conv1d.weight.size(0), self.conv1d.weight.size(2)) - if envs.VLLM_USE_V1 and attn_metadata is None: + if attn_metadata is None: # V1 profile run hidden_states_BC = hidden_states_BC.contiguous() return self.out_proj(hidden_states_BC.transpose(-2, -1))[0] @@ -368,10 +337,7 @@ def forward_cuda(self, out=scan_outputs_d) scan_outputs_d = scan_outputs_d.transpose(0, 1) - if envs.VLLM_USE_V1: - ssm_outputs.insert(0, scan_outputs_d) - else: - ssm_outputs.append(scan_outputs_d) + ssm_outputs.insert(0, scan_outputs_d) scan_outputs_combined = ssm_outputs[0] if len( ssm_outputs) == 1 else torch.cat(ssm_outputs, dim=-1) @@ -441,40 +407,27 @@ def split_batch_to_prefill_and_decode( num_decodes: int, num_padded_decodes: int, ) -> PrefillDecodeSplit: + num_actual_tokens = num_prefill_tokens + num_padded_decodes - if envs.VLLM_USE_V1: - # In v1, decode tokens come first, then prefill tokens. - hidden_states_BC_d, hidden_states_BC_p = torch.split( - hidden_states_BC[..., :num_actual_tokens], - [num_padded_decodes, num_prefill_tokens], - dim=-1) - gate_d, gate_p = torch.split(gate[..., :num_actual_tokens], - [num_padded_decodes, num_prefill_tokens], - dim=-1) - - # num_padded_decodes accounts for CUDA graph padding when applicable - state_indices_tensor_d, state_indices_tensor_p = torch.split( - state_indices_tensor[:num_padded_decodes + num_prefills], - [num_padded_decodes, num_prefills], - dim=0) - query_start_loc_p = (query_start_loc[-num_prefills - 1:] - - num_padded_decodes if num_prefills > 0 else None) - has_initial_states_p = has_initial_states[-num_prefills:] if ( - has_initial_states is not None and num_prefills > 0) else None - else: - # In v0, prefill tokens come first, then decode tokens. - hidden_states_BC_p, hidden_states_BC_d = torch.split( - hidden_states_BC, [num_prefill_tokens, num_decode_tokens], dim=-1) - gate_p, gate_d = torch.split(gate, - [num_prefill_tokens, num_decode_tokens], - dim=-1) - state_indices_tensor_p, state_indices_tensor_d = torch.split( - state_indices_tensor, [num_prefills, num_decodes], dim=0) - query_start_loc_p = (query_start_loc[:num_prefills + - 1] if num_prefills > 0 else None) - has_initial_states_p = has_initial_states[:num_prefills] if ( - has_initial_states is not None and num_prefills > 0) else None + # In v1, decode tokens come first, then prefill tokens. + hidden_states_BC_d, hidden_states_BC_p = torch.split( + hidden_states_BC[..., :num_actual_tokens], + [num_padded_decodes, num_prefill_tokens], + dim=-1) + gate_d, gate_p = torch.split(gate[..., :num_actual_tokens], + [num_padded_decodes, num_prefill_tokens], + dim=-1) + + # num_padded_decodes accounts for CUDA graph padding when applicable + state_indices_tensor_d, state_indices_tensor_p = torch.split( + state_indices_tensor[:num_padded_decodes + num_prefills], + [num_padded_decodes, num_prefills], + dim=0) + query_start_loc_p = (query_start_loc[-num_prefills - 1:] - + num_padded_decodes if num_prefills > 0 else None) + has_initial_states_p = has_initial_states[-num_prefills:] if ( + has_initial_states is not None and num_prefills > 0) else None return PrefillDecodeSplit( hidden_states_BC_p=hidden_states_BC_p, @@ -495,9 +448,7 @@ def mamba_mixer( ) -> None: forward_context: ForwardContext = get_forward_context() self = forward_context.no_compile_layers[layer_name] - self.forward_cuda(hidden_states=hidden_states, - output=output, - mamba_cache_params=None) + self.forward_cuda(hidden_states=hidden_states, output=output) def mamba_mixer_fake( diff --git a/vllm/model_executor/layers/mamba/mamba_mixer2.py b/vllm/model_executor/layers/mamba/mamba_mixer2.py index 02e6a9138c05..047ce4c4c43d 100644 --- a/vllm/model_executor/layers/mamba/mamba_mixer2.py +++ b/vllm/model_executor/layers/mamba/mamba_mixer2.py @@ -9,7 +9,6 @@ import torch from torch import nn -from vllm import envs from vllm.attention.backends.abstract import AttentionMetadata from vllm.config import CacheConfig, ModelConfig, get_current_vllm_config from vllm.distributed import (divide, get_tensor_model_parallel_rank, @@ -22,8 +21,6 @@ MergedColumnParallelLinear, RowParallelLinear) from vllm.model_executor.layers.mamba.abstract import MambaBase -from vllm.model_executor.layers.mamba.mamba2_metadata import (Mamba2Metadata, - update_metadata) from vllm.model_executor.layers.mamba.mamba_utils import ( MambaStateDtypeCalculator, MambaStateShapeCalculator) from vllm.model_executor.layers.mamba.ops.causal_conv1d import ( @@ -36,7 +33,6 @@ from vllm.model_executor.layers.quantization import QuantizationConfig from vllm.model_executor.model_loader.weight_utils import ( LoaderFunction, composed_weight_loader, sharded_weight_loader) -from vllm.model_executor.models.mamba_cache import MambaCacheParams from vllm.model_executor.utils import set_weight_attrs from vllm.platforms import current_platform from vllm.utils import direct_register_custom_op @@ -449,16 +445,12 @@ def __init__(self, self.use_rms_norm, eps=rms_norm_eps) - if envs.VLLM_USE_V1: - compilation_config = get_current_vllm_config().compilation_config - if prefix in compilation_config.static_forward_context: - raise ValueError(f"Duplicate layer name: {prefix}") - compilation_config.static_forward_context[prefix] = self - # The outer list is for v0 PP virtual engine. Though this code path - # only runs for v1, we have to do this to unify with the interface - # of Attention + v0 PP. - # The inner tuple is (conv_state, ssm_state) - self.kv_cache = [(torch.tensor([]), torch.tensor([]))] + compilation_config = get_current_vllm_config().compilation_config + if prefix in compilation_config.static_forward_context: + raise ValueError(f"Duplicate layer name: {prefix}") + compilation_config.static_forward_context[prefix] = self + # The tuple is (conv_state, ssm_state) + self.kv_cache = (torch.tensor([]), torch.tensor([])) self.model_config = model_config self.cache_config = cache_config @@ -468,8 +460,6 @@ def forward_native( self, hidden_states: torch.Tensor, output: torch.Tensor, - mamba_cache_params: MambaCacheParams, - mamba2_metadata: Mamba2Metadata, mup_vector: Optional[torch.Tensor] = None, ): pass @@ -478,59 +468,43 @@ def forward( self, hidden_states: torch.Tensor, output: torch.Tensor, - mamba_cache_params: MambaCacheParams, - mamba2_metadata: Mamba2Metadata, mup_vector: Optional[torch.Tensor] = None, ): - if not envs.VLLM_USE_V1: - CustomOp.forward(self, hidden_states, output, mamba_cache_params, - mamba2_metadata, mup_vector) - else: - torch.ops.vllm.mamba_mixer2( - hidden_states, - output, - self.prefix, - mup_vector, - ) + torch.ops.vllm.mamba_mixer2( + hidden_states, + output, + self.prefix, + mup_vector, + ) def forward_cuda( self, hidden_states: torch.Tensor, output: torch.Tensor, - mamba_cache_params: MambaCacheParams, - mamba2_metadata: Mamba2Metadata, mup_vector: Optional[torch.Tensor] = None, ): forward_context = get_forward_context() - # mamba2_metadata contains metadata necessary for the mamba2 triton + # attn_metadata contains metadata necessary for the mamba2 triton # kernels to operate in continuous batching and in chunked prefill # modes; they are computed at top-level model forward since they # stay the same and reused for all mamba layers in the same iteration attn_metadata: AttentionMetadata = forward_context.attn_metadata - if envs.VLLM_USE_V1: - if attn_metadata is not None: - assert isinstance(attn_metadata, dict) - attn_metadata = attn_metadata[self.prefix] - mamba2_metadata = attn_metadata - assert isinstance(attn_metadata, Mamba2AttentionMetadata) - self_kv_cache = self.kv_cache[forward_context.virtual_engine] - # conv_state = (..., dim, width-1) yet contiguous along 'dim' - conv_state = self_kv_cache[0].transpose(-1, -2) - ssm_state = self_kv_cache[1] - state_indices_tensor = attn_metadata.state_indices_tensor - else: - conv_state = mamba_cache_params.conv_state - ssm_state = mamba_cache_params.ssm_state - state_indices_tensor = mamba_cache_params.state_indices_tensor - - # Common members between V1 metadata and V0 metadata - if mamba2_metadata is not None: - has_initial_states_p = mamba2_metadata.has_initial_states_p - prep_initial_states = mamba2_metadata.prep_initial_states - chunk_size = mamba2_metadata.chunk_size - seq_idx_p = mamba2_metadata.seq_idx_p - chunk_indices_p = mamba2_metadata.chunk_indices_p - chunk_offsets_p = mamba2_metadata.chunk_offsets_p + + if attn_metadata is not None: + assert isinstance(attn_metadata, dict) + attn_metadata = attn_metadata[self.prefix] + assert isinstance(attn_metadata, Mamba2AttentionMetadata) + self_kv_cache = self.kv_cache[forward_context.virtual_engine] + # conv_state = (..., dim, width-1) yet contiguous along 'dim' + conv_state = self_kv_cache[0].transpose(-1, -2) + ssm_state = self_kv_cache[1] + state_indices_tensor = attn_metadata.state_indices_tensor + has_initial_states_p = attn_metadata.has_initial_states_p + prep_initial_states = attn_metadata.prep_initial_states + chunk_size = attn_metadata.chunk_size + seq_idx_p = attn_metadata.seq_idx_p + chunk_indices_p = attn_metadata.chunk_indices_p + chunk_offsets_p = attn_metadata.chunk_offsets_p # 1. Gated MLP's linear projection projected_states, _ = self.in_proj(hidden_states) @@ -562,8 +536,8 @@ def forward_cuda( dim=-1, ) - if envs.VLLM_USE_V1 and attn_metadata is None: - # V1 profile run + if attn_metadata is None: + # profile run hidden_states_B_C = (hidden_states_B_C.transpose( 0, 1).clone().transpose(0, 1)).contiguous() hidden_states, _B, _C = split_hidden_states_B_C_fn( @@ -579,49 +553,27 @@ def forward_cuda( has_decode = num_decodes > 0 num_actual_tokens = num_prefill_tokens + num_decodes - # NOTE: V0 put prefill before decode, v1 puts decode before prefill # Separate prefill and decode by splitting varlen input # Split along token dimension - if envs.VLLM_USE_V1: - hidden_states_B_C_d, hidden_states_B_C_p = torch.split( - hidden_states_B_C[:num_actual_tokens], - [num_decodes, num_prefill_tokens], - dim=0, - ) - dt_d, dt_p = torch.split( - dt[:num_actual_tokens], - [num_decodes, num_prefill_tokens], - dim=0, - ) - # Split along batch dimension - state_indices_tensor_d, state_indices_tensor_p = torch.split( - state_indices_tensor[:num_actual_tokens], - [num_decodes, num_prefills], - dim=0, - ) - query_start_loc_p = ( - attn_metadata.query_start_loc[-num_prefills - 1:] - - num_decodes if has_prefill else None) - else: - hidden_states_B_C_p, hidden_states_B_C_d = torch.split( - hidden_states_B_C, - [num_prefill_tokens, num_decodes], - dim=0, - ) - dt_p, dt_d = torch.split( - dt, - [num_prefill_tokens, num_decodes], - dim=0, - ) - # Split along batch dimension - state_indices_tensor_p, state_indices_tensor_d = torch.split( - state_indices_tensor, - [num_prefills, num_decodes], - dim=0, - ) - query_start_loc_p = (attn_metadata.query_start_loc[:num_prefills + - 1] - if has_prefill else None) + hidden_states_B_C_d, hidden_states_B_C_p = torch.split( + hidden_states_B_C[:num_actual_tokens], + [num_decodes, num_prefill_tokens], + dim=0, + ) + dt_d, dt_p = torch.split( + dt[:num_actual_tokens], + [num_decodes, num_prefill_tokens], + dim=0, + ) + # Split along batch dimension + state_indices_tensor_d, state_indices_tensor_p = torch.split( + state_indices_tensor[:num_actual_tokens], + [num_decodes, num_prefills], + dim=0, + ) + query_start_loc_p = ( + attn_metadata.query_start_loc[-num_prefills - 1:] - + num_decodes if has_prefill else None) # Preallocate output tensor to avoid memcpy cost for merging prefill # and decode outputs @@ -633,18 +585,11 @@ def forward_cuda( dtype=hidden_states.dtype, device=hidden_states.device, ) - if envs.VLLM_USE_V1: - preallocated_ssm_out_d, preallocated_ssm_out_p = torch.split( - preallocated_ssm_out, - [num_decodes, num_prefill_tokens], - dim=0, - ) - else: - preallocated_ssm_out_p, preallocated_ssm_out_d = torch.split( - preallocated_ssm_out, - [num_prefill_tokens, num_decodes], - dim=0, - ) + preallocated_ssm_out_d, preallocated_ssm_out_p = torch.split( + preallocated_ssm_out, + [num_decodes, num_prefill_tokens], + dim=0, + ) # Process prefill requests if has_prefill: @@ -653,9 +598,6 @@ def forward_cuda( # pointed to by "state_indices_tensor" x = hidden_states_B_C_p.transpose( 0, 1) # this is the form that causal-conv see - if mamba2_metadata.cu_seqlen is None: - mamba2_metadata = update_metadata(x, query_start_loc_p, - mamba2_metadata) hidden_states_B_C_p = causal_conv1d_fn( x, conv_weights, @@ -664,7 +606,7 @@ def forward_cuda( conv_states=conv_state, has_initial_state=has_initial_states_p, cache_indices=state_indices_tensor_p, - metadata=mamba2_metadata, + metadata=attn_metadata, query_start_loc=query_start_loc_p).transpose( 0, 1)[:num_prefill_tokens] @@ -806,8 +748,6 @@ def mamba_mixer2( self = forward_context.no_compile_layers[layer_name] self.forward_cuda(hidden_states=hidden_states, output=output, - mamba_cache_params=None, - mamba2_metadata=None, mup_vector=mup_vector) diff --git a/vllm/model_executor/layers/mamba/mamba_utils.py b/vllm/model_executor/layers/mamba/mamba_utils.py index a6c1af91de42..677a4b9d87fc 100644 --- a/vllm/model_executor/layers/mamba/mamba_utils.py +++ b/vllm/model_executor/layers/mamba/mamba_utils.py @@ -100,7 +100,6 @@ def mamba1_state_shape( intermediate_size: int, state_size: int, conv_kernel: int, - use_v1: bool = True, ) -> tuple[tuple[int, int], tuple[int, int]]: conv_state_shape = (divide(intermediate_size, tp_world_size), conv_kernel - 1) @@ -108,11 +107,7 @@ def mamba1_state_shape( temporal_state_shape = (divide(intermediate_size, tp_world_size), state_size) - # In V0, the conv_state shape was swapped during allocation in - # MambaCacheManager, but in V1 it needs to be determined here at the - # calculation level - if use_v1: - conv_state_shape = conv_state_shape[1], conv_state_shape[0] + conv_state_shape = conv_state_shape[1], conv_state_shape[0] return conv_state_shape, temporal_state_shape @@ -126,7 +121,6 @@ def mamba2_state_shape( head_dim: int, state_size: int, conv_kernel: int, - use_v1: bool = True, ) -> tuple[tuple[int, int], tuple[int, int, int]]: # if n_groups is not divisible by world_size, need to extend the shards # to ensure all groups needed by a head is sharded along with it @@ -137,8 +131,6 @@ def mamba2_state_shape( # contiguous along 'dim' axis conv_state_shape = (conv_kernel - 1, divide(conv_dim, tp_world_size)) - if not use_v1: - conv_state_shape = conv_state_shape[1], conv_state_shape[0] # These are not TP-ed as they depend on A, dt_bias, D # - they are typically small @@ -153,12 +145,9 @@ def short_conv_state_shape( tp_world_size: int, intermediate_size: int, conv_kernel: int, - use_v1: bool = True, ) -> tuple[tuple[int, int]]: conv_dim = divide(intermediate_size, tp_world_size) conv_state_shape = (conv_kernel - 1, conv_dim) - if not use_v1: - conv_state_shape = conv_state_shape[1], conv_state_shape[0] return (conv_state_shape, ) @classmethod @@ -183,7 +172,6 @@ def gated_delta_net_state_shape( head_v_dim: int, conv_kernel_size: int, num_spec: int = 0, - use_v1: bool = True, ): conv_dim = (head_k_dim * num_k_heads * 2 + head_v_dim * num_v_heads) conv_state_shape = ( @@ -191,11 +179,7 @@ def gated_delta_net_state_shape( conv_kernel_size - 1 + num_spec, ) - # In V0, the conv_state shape was swapped during allocation in - # MambaCacheManager, but in V1 it needs to be determined here at the - # calculation level - if use_v1: - conv_state_shape = conv_state_shape[1], conv_state_shape[0] + conv_state_shape = conv_state_shape[1], conv_state_shape[0] temporal_state_shape = (divide(num_v_heads, tp_world_size), head_k_dim, head_v_dim) diff --git a/vllm/model_executor/layers/mamba/ops/causal_conv1d.py b/vllm/model_executor/layers/mamba/ops/causal_conv1d.py index 8cfd0962c5bf..010fcdda156c 100644 --- a/vllm/model_executor/layers/mamba/ops/causal_conv1d.py +++ b/vllm/model_executor/layers/mamba/ops/causal_conv1d.py @@ -420,9 +420,7 @@ def causal_conv1d_fn( x = x.to(conv_states.dtype) out = torch.empty_like(x) if metadata is not None: - cu_seqlen = metadata.cu_seqlen nums_dict = metadata.nums_dict - #x = metadata.x args = nums_dict batch_ptr = metadata.batch_ptr token_chunk_offset_ptr = metadata.token_chunk_offset_ptr @@ -926,7 +924,6 @@ def causal_conv1d_update( query_start_loc: Optional[torch.Tensor] = None, max_query_len: int = -1, pad_slot_id: int = PAD_SLOT_ID, - metadata=None, validate_data=False, ): """ diff --git a/vllm/model_executor/layers/mamba/short_conv.py b/vllm/model_executor/layers/mamba/short_conv.py index 335191a5c82c..ffdcd702aab4 100644 --- a/vllm/model_executor/layers/mamba/short_conv.py +++ b/vllm/model_executor/layers/mamba/short_conv.py @@ -8,7 +8,6 @@ import torch -from vllm import envs from vllm.attention.backends.abstract import AttentionMetadata from vllm.config import CacheConfig, ModelConfig, get_current_vllm_config from vllm.distributed import get_tensor_model_parallel_world_size @@ -18,7 +17,6 @@ MergedColumnParallelLinear, RowParallelLinear) from vllm.model_executor.layers.mamba.abstract import MambaBase -from vllm.model_executor.layers.mamba.mamba2_metadata import update_metadata from vllm.model_executor.layers.mamba.mamba_utils import ( MambaStateDtypeCalculator, MambaStateShapeCalculator) from vllm.model_executor.layers.mamba.ops.causal_conv1d import ( @@ -71,15 +69,11 @@ def __init__(self, prefix=f"{prefix}.out_proj", ) - assert envs.VLLM_USE_V1, ("ShortConv layers are only supported in V1") compilation_config = get_current_vllm_config().compilation_config if prefix in compilation_config.static_forward_context: raise ValueError(f"Duplicate layer name: {prefix}") compilation_config.static_forward_context[prefix] = self - # The outer list is for v0 PP virtual engine. Though this code path - # only runs for v1, we have to do this to unify with the interface - # of Attention + v0 PP. - self.kv_cache = [(torch.tensor([]), )] + self.kv_cache = (torch.tensor([]), ) self.model_config = model_config self.cache_config = cache_config @@ -89,7 +83,6 @@ def forward_native( self, hidden_states: torch.Tensor, output: torch.Tensor, - conv_metadata: ShortConvAttentionMetadata, ): return @@ -97,7 +90,6 @@ def forward( self, hidden_states: torch.Tensor, output: torch.Tensor, - conv_metadata: ShortConvAttentionMetadata, ): torch.ops.vllm.short_conv( hidden_states, @@ -109,7 +101,6 @@ def forward_cuda( self, hidden_states: torch.Tensor, output: torch.Tensor, - conv_metadata: ShortConvAttentionMetadata, ): forward_context = get_forward_context() # ShortConvAttentionMetadata contains metadata necessary for the @@ -121,7 +112,6 @@ def forward_cuda( if attn_metadata is not None: assert isinstance(attn_metadata, dict) attn_metadata = attn_metadata[self.prefix] - conv_metadata = attn_metadata assert isinstance(attn_metadata, ShortConvAttentionMetadata) self_kv_cache = self.kv_cache[forward_context.virtual_engine] conv_state = self_kv_cache[0].transpose(-1, -2) @@ -181,9 +171,6 @@ def forward_cuda( if has_prefill: Bx_p = (B_p * x_p).transpose(0, 1) - if conv_metadata.cu_seqlen is None: - conv_metadata = update_metadata(Bx_p, query_start_loc_p, - conv_metadata) Bx = causal_conv1d_fn(Bx_p, conv_weights, self.conv.bias, @@ -191,7 +178,7 @@ def forward_cuda( conv_states=conv_state, has_initial_state=has_initial_states_p, cache_indices=state_indices_tensor_p, - metadata=conv_metadata, + metadata=attn_metadata, query_start_loc=query_start_loc_p).transpose( 0, 1)[:num_prefill_tokens] @@ -248,9 +235,7 @@ def short_conv( ) -> None: forward_context: ForwardContext = get_forward_context() self = forward_context.no_compile_layers[layer_name] - self.forward_cuda(hidden_states=hidden_states, - output=output, - conv_metadata=None) + self.forward_cuda(hidden_states=hidden_states, output=output) def short_conv_fake( diff --git a/vllm/model_executor/models/bamba.py b/vllm/model_executor/models/bamba.py index 584981ef3ebf..4a6154dc548a 100644 --- a/vllm/model_executor/models/bamba.py +++ b/vllm/model_executor/models/bamba.py @@ -9,21 +9,17 @@ from torch import nn from transformers import BambaConfig -from vllm import envs from vllm.attention.layer import Attention from vllm.compilation.decorators import support_torch_compile from vllm.config import CacheConfig, ModelConfig, VllmConfig from vllm.distributed import get_tensor_model_parallel_world_size from vllm.distributed.parallel_state import get_pp_group -from vllm.forward_context import get_forward_context from vllm.model_executor.layers.activation import SiluAndMul from vllm.model_executor.layers.layernorm import RMSNorm from vllm.model_executor.layers.linear import (MergedColumnParallelLinear, QKVParallelLinear, RowParallelLinear) from vllm.model_executor.layers.logits_processor import LogitsProcessor -from vllm.model_executor.layers.mamba.mamba2_metadata import ( - Mamba2Metadata, prepare_mamba2_metadata) from vllm.model_executor.layers.mamba.mamba_mixer2 import MambaMixer2 from vllm.model_executor.layers.mamba.mamba_utils import ( MambaStateDtypeCalculator, MambaStateShapeCalculator) @@ -32,10 +28,7 @@ from vllm.model_executor.layers.vocab_parallel_embedding import ( DEFAULT_VOCAB_PADDING_SIZE, ParallelLMHead, VocabParallelEmbedding) from vllm.model_executor.model_loader.weight_utils import default_weight_loader -from vllm.model_executor.models.mamba_cache import (MambaCacheManager, - MambaCacheParams) from vllm.sequence import IntermediateTensors -from vllm.utils import LayerBlockType from .interfaces import (HasInnerState, IsHybrid, SupportsLoRA, SupportsPP, SupportsQuant) @@ -115,8 +108,6 @@ def forward( self, hidden_states: torch.Tensor, residual: Optional[torch.Tensor], - mamba_cache_params: MambaCacheParams, - mamba2_metadata: Mamba2Metadata, **kwargs, ): if residual is None: @@ -127,7 +118,7 @@ def forward( hidden_states, residual) output = torch.empty_like(hidden_states) - self.mamba(hidden_states, output, mamba_cache_params, mamba2_metadata) + self.mamba(hidden_states, output) # Fully Connected hidden_states, residual = self.pre_ff_layernorm(output, residual) hidden_states = self.feed_forward(hidden_states) @@ -315,22 +306,10 @@ def forward( self, input_ids: torch.Tensor, positions: torch.Tensor, - mamba_cache_params: MambaCacheParams, intermediate_tensors: Optional[IntermediateTensors] = None, inputs_embeds: Optional[torch.Tensor] = None, ) -> torch.Tensor: - attn_metadata = get_forward_context().attn_metadata - - if not envs.VLLM_USE_V1: - mamba2_metadata = prepare_mamba2_metadata( - chunk_size=self.config.mamba_chunk_size, - attn_metadata=attn_metadata, - ) - else: - # v1 get mamba2_metadata from forward_context - mamba2_metadata = None - if get_pp_group().is_first_rank: if inputs_embeds is not None: hidden_states = inputs_embeds @@ -343,23 +322,11 @@ def forward( residual = intermediate_tensors["residual"] residual = None - num_attn = 0 for i, layer in enumerate(self.layers): - if isinstance(layer, BambaAttentionDecoderLayer): - num_attn += 1 - - layer_mamba_cache_params = None - if isinstance(layer, - BambaMixerDecoderLayer) and mamba_cache_params: - layer_mamba_cache_params = mamba_cache_params.at_layer_idx( - i - num_attn) - hidden_states, residual = layer( positions=positions, hidden_states=hidden_states, residual=residual, - mamba_cache_params=layer_mamba_cache_params, - mamba2_metadata=mamba2_metadata, ) if not get_pp_group().is_last_rank: @@ -457,13 +424,11 @@ def get_mamba_state_dtype_from_config( def get_mamba_state_shape_from_config( cls, vllm_config: "VllmConfig", - use_v1: bool = True, ) -> tuple[tuple[int, int], tuple[int, int, int]]: """Calculate shapes for Mamba's convolutional and state caches. Args: vllm_config: vLLM config - use_v1: Get shapes for V1 (or V0) Returns: Tuple containing: @@ -482,7 +447,6 @@ def get_mamba_state_shape_from_config( head_dim=hf_config.mamba_d_head, state_size=hf_config.mamba_d_state, conv_kernel=hf_config.mamba_d_conv, - use_v1=use_v1, ) def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): @@ -515,8 +479,6 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): if not lora_config else lora_config.lora_vocab_padding_size, prefix=maybe_prefix(prefix, "lm_head"), ) - # Used to track and store by the Mamba cache between steps. - self.mamba_cache: Optional[MambaCacheManager] = None self.logits_processor = LogitsProcessor(self.unpadded_vocab_size, config.vocab_size) @@ -534,39 +496,11 @@ def forward(self, inputs_embeds: Optional[torch.Tensor] = None, **kwargs): - mamba_cache_params = None - if not envs.VLLM_USE_V1: - if self.mamba_cache is None: - num_mamba_layers = \ - self.model_config.get_num_layers_by_block_type( - self.vllm_config.parallel_config, - LayerBlockType.mamba - ) - mamba_state_shape = \ - self.get_mamba_state_shape_from_config( - self.vllm_config, use_v1=False) - mamba_state_dtype = \ - self.get_mamba_state_dtype_from_config( - self.vllm_config) - self.mamba_cache = MambaCacheManager(self.vllm_config, - num_mamba_layers, - *mamba_state_shape, - *mamba_state_dtype) - - mamba_cache_params = self.mamba_cache.current_run_tensors(**kwargs) - - hidden_states = self.model(input_ids, positions, mamba_cache_params, - intermediate_tensors, inputs_embeds) + hidden_states = self.model(input_ids, positions, intermediate_tensors, + inputs_embeds) return hidden_states - def copy_inputs_before_cuda_graphs(self, input_buffers, **kwargs): - return self.mamba_cache.copy_inputs_before_cuda_graphs( - input_buffers, **kwargs) - - def get_seqlen_agnostic_capture_inputs(self, batch_size: int): - return self.mamba_cache.get_seqlen_agnostic_capture_inputs(batch_size) - def compute_logits( self, hidden_states: torch.Tensor, diff --git a/vllm/model_executor/models/constant_size_cache.py b/vllm/model_executor/models/constant_size_cache.py deleted file mode 100644 index f03c58a12932..000000000000 --- a/vllm/model_executor/models/constant_size_cache.py +++ /dev/null @@ -1,137 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project -from abc import ABC, abstractmethod -from typing import Any - -import torch - -from vllm.attention.backends.utils import PAD_SLOT_ID - - -class ConstantSizeCache(ABC): - """ - Abstract base class for managing constant size caches - like Mamba and Minimax. - """ - - def __init__(self, max_batch_size: int): - # Maps between the request id and a dict that maps between the seq_id - # and its index inside the cache - self.cache_indices_mapping: dict[str, dict[int, int]] = {} - self.free_cache_indices = list(range(max_batch_size)) - - @property - @abstractmethod - def cache(self) -> Any: - """Return the underlying cache tensor(s)""" - pass - - @abstractmethod - def _copy_cache(self, from_index: int, to_index: int): - """Copy cache data from one index to another""" - pass - - def current_run_tensors(self, **kwargs) -> tuple: - """ - Return the tensors for the current run's conv and ssm state. - """ - if "seqlen_agnostic_capture_inputs" not in kwargs: - # We get here only on Prefill/Eager mode runs - request_ids_to_seq_ids = kwargs["request_ids_to_seq_ids"] - finished_requests_ids = kwargs["finished_requests_ids"] - - self._release_finished_requests(finished_requests_ids) - state_indices = self._prepare_current_run_cache( - request_ids_to_seq_ids, finished_requests_ids) - - state_indices_tensor = torch.as_tensor(state_indices, - dtype=torch.int32, - device="cuda") - cache_tensors = self.cache - else: - # CUDA graph capturing runs - cache_tensors, state_indices_tensor = kwargs[ - "seqlen_agnostic_capture_inputs"] - - return (cache_tensors, state_indices_tensor) - - def copy_inputs_before_cuda_graphs(self, input_buffers, **kwargs): - """ - Copy the relevant state_indices into the CUDA graph input buffer - """ - assert all( - key in kwargs - for key in ["request_ids_to_seq_ids", "finished_requests_ids"]) - finished_requests_ids = kwargs["finished_requests_ids"] - request_ids_to_seq_ids = kwargs["request_ids_to_seq_ids"] - assert "seqlen_agnostic_capture_inputs" in input_buffers - _, input_state_indices_buffer = input_buffers[ - "seqlen_agnostic_capture_inputs"] - - self._release_finished_requests(finished_requests_ids) - state_indices = self._prepare_current_run_cache( - request_ids_to_seq_ids, finished_requests_ids) - cuda_graph_pad_len = input_state_indices_buffer.shape[0] - len( - state_indices) - state_indices.extend([PAD_SLOT_ID] * cuda_graph_pad_len) - - input_state_indices_buffer.copy_( - torch.as_tensor(state_indices, dtype=torch.int32, device="cuda")) - - def get_seqlen_agnostic_capture_inputs(self, batch_size: int): - """ - Provide the CUDA graph capture runs with a buffer in adjusted size. - The buffer is used to maintain the Cache during the CUDA graph replay - runs. - """ - state_indices_tensor = torch.as_tensor([PAD_SLOT_ID] * batch_size, - dtype=torch.int32, - device="cuda") - return (self.cache, state_indices_tensor) - - def _assign_seq_id_to_cache_index(self, cur_rid: str, seq_id: int, - finished_requests_ids) -> int: - """ - Assign (req_id,seq_id) pair to a `destination_index` index, if - already occupied, move the occupying index to a free index. - """ - if cur_rid in finished_requests_ids: - # set as pad, do not allocate destination index - return PAD_SLOT_ID - elif cur_rid not in self.cache_indices_mapping: - destination_index = self.free_cache_indices.pop() - self.cache_indices_mapping[cur_rid] = {seq_id: destination_index} - return destination_index - elif seq_id not in (seq_ids2indices := - self.cache_indices_mapping[cur_rid]): - # parallel sampling , where n > 1, assume prefill have - # already happened, so we copy the - # existing cache into the siblings seq_ids caches - index_exists = next(iter(seq_ids2indices.values())) - # case of decoding n>1, copy prefill cache to decoding indices - destination_index = self.free_cache_indices.pop() - self._copy_cache(from_index=index_exists, - to_index=destination_index) - self.cache_indices_mapping[cur_rid][seq_id] = destination_index - return destination_index - else: - return self.cache_indices_mapping[cur_rid][seq_id] - - def _prepare_current_run_cache( - self, request_ids_to_seq_ids: dict[str, list[int]], - finished_requests_ids: list[str]) -> list[int]: - return [ - self._assign_seq_id_to_cache_index(req_id, seq_id, - finished_requests_ids) - for req_id, seq_ids in request_ids_to_seq_ids.items() - for seq_id in seq_ids - ] - - def _release_finished_requests(self, - finished_seq_groups_req_ids: list[str]): - for req_id in finished_seq_groups_req_ids: - if req_id in self.cache_indices_mapping: - for seq_id in self.cache_indices_mapping[req_id]: - self.free_cache_indices.append( - self.cache_indices_mapping[req_id][seq_id]) - self.cache_indices_mapping.pop(req_id) diff --git a/vllm/model_executor/models/falcon_h1.py b/vllm/model_executor/models/falcon_h1.py index 83efdd2e433f..f382018e2222 100644 --- a/vllm/model_executor/models/falcon_h1.py +++ b/vllm/model_executor/models/falcon_h1.py @@ -8,21 +8,17 @@ from torch import nn from transformers import FalconH1Config -from vllm import envs from vllm.attention.layer import Attention from vllm.compilation.decorators import support_torch_compile from vllm.config import CacheConfig, ModelConfig, VllmConfig from vllm.distributed import get_tensor_model_parallel_world_size from vllm.distributed.parallel_state import get_pp_group -from vllm.forward_context import get_forward_context from vllm.model_executor.layers.activation import SiluAndMul from vllm.model_executor.layers.layernorm import RMSNorm from vllm.model_executor.layers.linear import (MergedColumnParallelLinear, QKVParallelLinear, RowParallelLinear) from vllm.model_executor.layers.logits_processor import LogitsProcessor -from vllm.model_executor.layers.mamba.mamba2_metadata import ( - Mamba2Metadata, prepare_mamba2_metadata) from vllm.model_executor.layers.mamba.mamba_mixer2 import MambaMixer2 from vllm.model_executor.layers.mamba.mamba_utils import ( MambaStateDtypeCalculator, MambaStateShapeCalculator) @@ -31,8 +27,6 @@ from vllm.model_executor.layers.vocab_parallel_embedding import ( DEFAULT_VOCAB_PADDING_SIZE, ParallelLMHead, VocabParallelEmbedding) from vllm.model_executor.model_loader.weight_utils import default_weight_loader -from vllm.model_executor.models.mamba_cache import (MambaCacheManager, - MambaCacheParams) from vllm.sequence import IntermediateTensors from .interfaces import HasInnerState, IsHybrid, SupportsLoRA, SupportsPP @@ -179,16 +173,12 @@ def forward( self, hidden_states: torch.Tensor, residual: Optional[torch.Tensor], - mamba_cache_params: MambaCacheParams, - mamba2_metadata: Mamba2Metadata, **kwargs, ): output = torch.empty_like(hidden_states) self.mamba( hidden_states, output, - mamba_cache_params, - mamba2_metadata=mamba2_metadata, mup_vector=self.mup_vector, ) return output, residual @@ -364,8 +354,6 @@ def forward( self, positions: torch.Tensor, hidden_states: torch.Tensor, - mamba_cache_params: MambaCacheParams, - mamba2_metadata: Mamba2Metadata, **kwargs, ): residual = hidden_states @@ -382,12 +370,10 @@ def forward( # Process input through the SSM branch. # FalconH1SSMDecoderLayer expects hidden_states, attn_metadata, - # residual, mamba_cache_params, and sequence_idx. + # residual, and sequence_idx. ssm_hidden, _ = self.mamba( hidden_states=hidden_states * self.ssm_in_multiplier, residual=residual, - mamba_cache_params=mamba_cache_params, - mamba2_metadata=mamba2_metadata, **kwargs, ) # Sum the outputs from both branches. @@ -464,25 +450,10 @@ def forward( self, input_ids: torch.Tensor, positions: torch.Tensor, - mamba_cache_params: MambaCacheParams, intermediate_tensors: Optional[IntermediateTensors] = None, inputs_embeds: Optional[torch.Tensor] = None, ) -> torch.Tensor: - # pass a sequence index tensor, that is required for - # proper continuous batching computation including - # chunked prefill - attn_metadata = get_forward_context().attn_metadata - - if not envs.VLLM_USE_V1: - mamba2_metadata = prepare_mamba2_metadata( - chunk_size=self.config.mamba_chunk_size, - attn_metadata=attn_metadata, - ) - else: - # v1 get mamba2_metadata from forward_context - mamba2_metadata = None - if get_pp_group().is_first_rank: if inputs_embeds is not None: hidden_states = inputs_embeds * self.embedding_multiplier @@ -495,14 +466,9 @@ def forward( for i in range(self.start_layer, self.end_layer): layer = self.layers[i] - layer_mamba_cache_params = None - if mamba_cache_params: - layer_mamba_cache_params = mamba_cache_params.at_layer_idx(i) hidden_states = layer( positions=positions, hidden_states=hidden_states, - mamba_cache_params=layer_mamba_cache_params, - mamba2_metadata=mamba2_metadata, ) if not get_pp_group().is_last_rank: return IntermediateTensors({ @@ -541,13 +507,11 @@ def get_mamba_state_dtype_from_config( def get_mamba_state_shape_from_config( cls, vllm_config: "VllmConfig", - use_v1: bool = True, ) -> tuple[tuple[int, int], tuple[int, int, int]]: """Calculate shapes for Mamba's convolutional and state caches. Args: vllm_config: vLLM config - use_v1: Get shapes for V1 (or V0) Returns: Tuple containing: @@ -570,7 +534,6 @@ def get_mamba_state_shape_from_config( head_dim=hf_config.mamba_d_head, state_size=hf_config.mamba_d_state, conv_kernel=hf_config.mamba_d_conv, - use_v1=use_v1, ) def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): @@ -592,7 +555,6 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): prefix=maybe_prefix(prefix, "model")) self.tie_word_embeddings = config.tie_word_embeddings self.unpadded_vocab_size = config.vocab_size - self.mamba_cache: Optional[MambaCacheManager] = None if lora_config: self.unpadded_vocab_size += lora_config.lora_extra_vocab_size if get_pp_group().is_last_rank: @@ -637,40 +599,15 @@ def forward( **kwargs, ): - mamba_cache_params = None - if not envs.VLLM_USE_V1: - if self.mamba_cache is None: - mamba_state_shape = \ - self.get_mamba_state_shape_from_config( - self.vllm_config, use_v1=False) - mamba_state_dtype = \ - self.get_mamba_state_dtype_from_config( - self.vllm_config) - self.mamba_cache = MambaCacheManager( - self.vllm_config, - self.config.num_hidden_layers, - *mamba_state_shape, - *mamba_state_dtype, - ) - mamba_cache_params = self.mamba_cache.current_run_tensors(**kwargs) - hidden_states = self.model( input_ids, positions, - mamba_cache_params, intermediate_tensors, inputs_embeds, ) return hidden_states - def copy_inputs_before_cuda_graphs(self, input_buffers, **kwargs): - return self.mamba_cache.copy_inputs_before_cuda_graphs( - input_buffers, **kwargs) - - def get_seqlen_agnostic_capture_inputs(self, batch_size: int): - return self.mamba_cache.get_seqlen_agnostic_capture_inputs(batch_size) - def compute_logits( self, hidden_states: torch.Tensor, diff --git a/vllm/model_executor/models/granitemoehybrid.py b/vllm/model_executor/models/granitemoehybrid.py index e89a1a4a0f7d..f5751fe47bb8 100644 --- a/vllm/model_executor/models/granitemoehybrid.py +++ b/vllm/model_executor/models/granitemoehybrid.py @@ -9,19 +9,15 @@ from torch import nn from transformers import GraniteMoeHybridConfig -from vllm import envs from vllm.attention.layer import Attention from vllm.compilation.decorators import support_torch_compile from vllm.config import CacheConfig, ModelConfig, VllmConfig from vllm.distributed import get_tensor_model_parallel_world_size from vllm.distributed.parallel_state import get_pp_group -from vllm.forward_context import get_forward_context from vllm.model_executor.layers.layernorm import RMSNorm from vllm.model_executor.layers.linear import (QKVParallelLinear, RowParallelLinear) from vllm.model_executor.layers.logits_processor import LogitsProcessor -from vllm.model_executor.layers.mamba.mamba2_metadata import ( - Mamba2Metadata, prepare_mamba2_metadata) from vllm.model_executor.layers.mamba.mamba_mixer2 import MambaMixer2 from vllm.model_executor.layers.mamba.mamba_utils import ( MambaStateDtypeCalculator, MambaStateShapeCalculator) @@ -30,10 +26,7 @@ from vllm.model_executor.layers.vocab_parallel_embedding import ( DEFAULT_VOCAB_PADDING_SIZE, ParallelLMHead, VocabParallelEmbedding) from vllm.model_executor.model_loader.weight_utils import default_weight_loader -from vllm.model_executor.models.mamba_cache import (MambaCacheManager, - MambaCacheParams) from vllm.sequence import IntermediateTensors -from vllm.utils import LayerBlockType from .granitemoe import GraniteMoeMoE from .granitemoeshared import GraniteMoeSharedMLP @@ -102,14 +95,12 @@ def forward( self, hidden_states: torch.Tensor, residual: Optional[torch.Tensor], - mamba_cache_params: MambaCacheParams, - mamba2_metadata: Mamba2Metadata, **kwargs, ): residual = hidden_states hidden_states = self.input_layernorm(hidden_states) output = torch.empty_like(hidden_states) - self.mamba(hidden_states, output, mamba_cache_params, mamba2_metadata) + self.mamba(hidden_states, output) hidden_states = residual + output * self.residual_multiplier residual = hidden_states @@ -182,8 +173,6 @@ def forward( positions: torch.Tensor, hidden_states: torch.Tensor, residual: Optional[torch.Tensor], - mamba_cache_params: MambaCacheParams, - mamba2_metadata: Mamba2Metadata, ) -> torch.Tensor: residual = hidden_states hidden_states = self.input_layernorm(hidden_states) @@ -366,22 +355,10 @@ def forward( self, input_ids: torch.Tensor, positions: torch.Tensor, - mamba_cache_params: MambaCacheParams, intermediate_tensors: Optional[IntermediateTensors] = None, inputs_embeds: Optional[torch.Tensor] = None, ) -> torch.Tensor: - attn_metadata = get_forward_context().attn_metadata - - if not envs.VLLM_USE_V1: - mamba2_metadata = prepare_mamba2_metadata( - chunk_size=self.config.mamba_chunk_size, - attn_metadata=attn_metadata, - ) - else: - # v1 get mamba2_metadata from forward_context - mamba2_metadata = None - if get_pp_group().is_first_rank: if inputs_embeds is not None: hidden_states = inputs_embeds @@ -399,20 +376,9 @@ def forward( for i, layer in enumerate(self.layers): if isinstance(layer, GraniteMoeHybridAttentionDecoderLayer): num_attn += 1 - - layer_mamba_cache_params = None - if isinstance( - layer, - GraniteMoeHybridMambaDecoderLayer) and mamba_cache_params: - layer_mamba_cache_params = mamba_cache_params.at_layer_idx( - i - num_attn) - - hidden_states, residual = layer( - positions=positions, - hidden_states=hidden_states, - residual=residual, - mamba_cache_params=layer_mamba_cache_params, - mamba2_metadata=mamba2_metadata) + hidden_states, residual = layer(positions=positions, + hidden_states=hidden_states, + residual=residual) if not get_pp_group().is_last_rank: return IntermediateTensors({ @@ -552,13 +518,11 @@ def get_mamba_state_dtype_from_config( def get_mamba_state_shape_from_config( cls, vllm_config: "VllmConfig", - use_v1: bool = True, ) -> tuple[tuple[int, int], tuple[int, int, int]]: """Calculate shapes for Mamba's convolutional and state caches. Args: vllm_config: vLLM config - use_v1: Get shapes for V1 (or V0) Returns: Tuple containing: @@ -577,7 +541,6 @@ def get_mamba_state_shape_from_config( head_dim=hf_config.mamba_d_head, state_size=hf_config.mamba_d_state, conv_kernel=hf_config.mamba_d_conv, - use_v1=use_v1, ) def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): @@ -620,9 +583,6 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): scale=1 / self.config.logits_scaling) - # Used to track and store by the Mamba cache between steps. - self.mamba_cache: Optional[MambaCacheManager] = None - self.make_empty_intermediate_tensors = ( self.model.make_empty_intermediate_tensors) @@ -636,38 +596,11 @@ def forward(self, inputs_embeds: Optional[torch.Tensor] = None, **kwargs): - mamba_cache_params = None - if not envs.VLLM_USE_V1: - if self.mamba_cache is None: - num_mamba_layers = ( - self.model_config.get_num_layers_by_block_type( - self.vllm_config.parallel_config, - LayerBlockType.mamba)) - mamba_state_shape = \ - self.get_mamba_state_shape_from_config( - self.vllm_config, use_v1=False) - mamba_state_dtype = \ - self.get_mamba_state_dtype_from_config( - self.vllm_config) - self.mamba_cache = MambaCacheManager(self.vllm_config, - num_mamba_layers, - *mamba_state_shape, - *mamba_state_dtype) - - mamba_cache_params = self.mamba_cache.current_run_tensors(**kwargs) - - hidden_states = self.model(input_ids, positions, mamba_cache_params, - intermediate_tensors, inputs_embeds) + hidden_states = self.model(input_ids, positions, intermediate_tensors, + inputs_embeds) return hidden_states - def copy_inputs_before_cuda_graphs(self, input_buffers, **kwargs): - return self.mamba_cache.copy_inputs_before_cuda_graphs( - input_buffers, **kwargs) - - def get_seqlen_agnostic_capture_inputs(self, batch_size: int): - return self.mamba_cache.get_seqlen_agnostic_capture_inputs(batch_size) - def compute_logits( self, hidden_states: torch.Tensor, diff --git a/vllm/model_executor/models/jamba.py b/vllm/model_executor/models/jamba.py index 12a49029195f..e8277e259bc5 100644 --- a/vllm/model_executor/models/jamba.py +++ b/vllm/model_executor/models/jamba.py @@ -9,7 +9,6 @@ from torch import nn from transformers import JambaConfig -from vllm import envs from vllm.attention.layer import Attention from vllm.compilation.decorators import support_torch_compile from vllm.config import CacheConfig, ModelConfig, VllmConfig @@ -30,10 +29,7 @@ DEFAULT_VOCAB_PADDING_SIZE, ParallelLMHead, VocabParallelEmbedding) from vllm.model_executor.model_loader.weight_utils import default_weight_loader from vllm.model_executor.models.llama import LlamaMLP as JambaMLP -from vllm.model_executor.models.mamba_cache import (MambaCacheManager, - MambaCacheParams) from vllm.sequence import IntermediateTensors -from vllm.utils import LayerBlockType from .interfaces import HasInnerState, IsHybrid, SupportsLoRA, SupportsPP from .utils import (AutoWeightsLoader, WeightsMapper, is_pp_missing_parameter, @@ -145,7 +141,6 @@ def forward( self, hidden_states: torch.Tensor, residual: Optional[torch.Tensor], - mamba_cache_params: MambaCacheParams, **kwargs, ): if residual is None: @@ -156,7 +151,7 @@ def forward( hidden_states, residual) output = torch.empty_like(hidden_states) - self.mamba(hidden_states, output, mamba_cache_params) + self.mamba(hidden_states, output) # Fully Connected hidden_states, residual = self.pre_ff_layernorm(output, residual) hidden_states = self.feed_forward(hidden_states) @@ -333,7 +328,6 @@ def forward( self, input_ids: torch.Tensor, positions: torch.Tensor, - mamba_cache_params: MambaCacheParams, intermediate_tensors: Optional[IntermediateTensors] = None, inputs_embeds: Optional[torch.Tensor] = None, ) -> torch.Tensor: @@ -348,24 +342,11 @@ def forward( hidden_states = intermediate_tensors["hidden_states"] residual = intermediate_tensors["residual"] - kv_cache_index = 0 - mamba_cache_index = 0 for layer in islice(self.layers, self.start_layer, self.end_layer): - layer_mamba_cache_params = None - if isinstance(layer, JambaAttentionDecoderLayer): - kv_cache_index += 1 - if isinstance(layer, - JambaMambaDecoderLayer) and mamba_cache_params: - current_state_layer = mamba_cache_index - layer_mamba_cache_params = mamba_cache_params.at_layer_idx( - current_state_layer) - mamba_cache_index += 1 - - hidden_states, residual = layer( - positions=positions, - hidden_states=hidden_states, - residual=residual, - mamba_cache_params=layer_mamba_cache_params) + hidden_states, residual = layer(positions=positions, + hidden_states=hidden_states, + residual=residual) + if not get_pp_group().is_last_rank: return IntermediateTensors({ "hidden_states": hidden_states, @@ -503,8 +484,6 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): if not lora_config else lora_config.lora_vocab_padding_size, prefix=maybe_prefix(prefix, "lm_head"), ) - # Used to track and store by the Mamba cache between steps. - self.mamba_cache: Optional[MambaCacheManager] = None self.logits_processor = LogitsProcessor(self.unpadded_vocab_size, config.vocab_size) @@ -521,24 +500,9 @@ def forward(self, intermediate_tensors: Optional[IntermediateTensors] = None, inputs_embeds: Optional[torch.Tensor] = None, **kwargs): - # NOTE: mamba_cache_params is not needed for v1 - mamba_cache_params = None - if not envs.VLLM_USE_V1: - if self.mamba_cache is None: - num_layers = self.model_config.get_num_layers_by_block_type( - self.vllm_config.parallel_config, LayerBlockType.mamba) - state_shape = self.get_mamba_state_shape_from_config( - self.vllm_config) - state_dtype = self.get_mamba_state_dtype_from_config( - self.vllm_config) - self.mamba_cache = MambaCacheManager(self.vllm_config, - num_layers, *state_shape, - *state_dtype) - - mamba_cache_params = self.mamba_cache.current_run_tensors(**kwargs) - - hidden_states = self.model(input_ids, positions, mamba_cache_params, - intermediate_tensors, inputs_embeds) + + hidden_states = self.model(input_ids, positions, intermediate_tensors, + inputs_embeds) return hidden_states def copy_inputs_before_cuda_graphs(self, input_buffers, **kwargs): @@ -574,7 +538,6 @@ def get_mamba_state_shape_from_config( intermediate_size=hf_config.mamba_expand * hidden_size, state_size=hf_config.mamba_d_state, conv_kernel=hf_config.mamba_d_conv, - use_v1=envs.VLLM_USE_V1, ) def compute_logits( diff --git a/vllm/model_executor/models/lfm2.py b/vllm/model_executor/models/lfm2.py index dd97afbeb668..53c36e4e52d8 100644 --- a/vllm/model_executor/models/lfm2.py +++ b/vllm/model_executor/models/lfm2.py @@ -8,7 +8,6 @@ import torch.nn as nn from transformers import Lfm2Config -from vllm import envs from vllm.attention import Attention from vllm.compilation.decorators import support_torch_compile from vllm.config import CacheConfig, ModelConfig, VllmConfig @@ -297,7 +296,6 @@ def forward( self.conv( hidden_states, output, - conv_metadata=None, ) hidden_states, residual = self.ffn_norm(output, residual) hidden_states = self.feed_forward(hidden_states) @@ -459,13 +457,11 @@ def get_mamba_state_dtype_from_config( def get_mamba_state_shape_from_config( cls, vllm_config: "VllmConfig", - use_v1: bool = True, ) -> tuple[tuple[int, int]]: """ Calculate shapes for LFM2's convolutional cache. Args: vllm_config: vLLM config - use_v1: Get shapes for V1 (or V0) Returns: Tuple containing: @@ -478,7 +474,6 @@ def get_mamba_state_shape_from_config( tp_world_size=parallel_config.tensor_parallel_size, intermediate_size=hf_config.conv_dim, conv_kernel=hf_config.conv_L_cache, - use_v1=use_v1, ) def __init__(self, *, vllm_config: VllmConfig, prefix: str = "") -> None: @@ -489,8 +484,6 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = "") -> None: scheduler_config = vllm_config.scheduler_config assert (not cache_config.enable_prefix_caching ), "Lfm2 currently does not support prefix caching" - assert envs.VLLM_USE_V1, ( - "Lfm2ForCausalLM doesn't support vLLM v0. Please enable v1") super().__init__() self.config = config diff --git a/vllm/model_executor/models/mamba.py b/vllm/model_executor/models/mamba.py index 36141a5d5064..5bd268291c7d 100644 --- a/vllm/model_executor/models/mamba.py +++ b/vllm/model_executor/models/mamba.py @@ -8,7 +8,6 @@ from torch import nn from transformers import MambaConfig -from vllm import envs from vllm.compilation.decorators import support_torch_compile from vllm.config import CacheConfig, ModelConfig, VllmConfig from vllm.distributed.parallel_state import get_pp_group @@ -24,10 +23,7 @@ from vllm.model_executor.model_loader.weight_utils import default_weight_loader from vllm.model_executor.models.interfaces import (HasInnerState, IsAttentionFree, SupportsPP) -from vllm.model_executor.models.mamba_cache import (MambaCacheManager, - MambaCacheParams) from vllm.sequence import IntermediateTensors -from vllm.utils import LayerBlockType from .utils import (AutoWeightsLoader, is_pp_missing_parameter, make_empty_intermediate_tensors_factory, make_layers, @@ -72,7 +68,6 @@ def forward( self, hidden_states: torch.Tensor, residual: Optional[torch.Tensor], - mamba_cache_params: MambaCacheParams, **kwargs, ): if residual is None: @@ -82,7 +77,7 @@ def forward( hidden_states, residual = self.norm(hidden_states, residual) output = torch.empty_like(hidden_states) - self.mixer(hidden_states, output, mamba_cache_params) + self.mixer(hidden_states, output) return output, residual @@ -134,7 +129,6 @@ def forward( self, input_ids: torch.Tensor, positions: torch.Tensor, - mamba_cache_params: Optional[MambaCacheParams] = None, intermediate_tensors: Optional[IntermediateTensors] = None, inputs_embeds: Optional[torch.Tensor] = None, ) -> torch.Tensor: @@ -151,17 +145,9 @@ def forward( for i in range(self.start_layer, self.end_layer): layer = self.layers[i] - - layer_cache_params = None - if mamba_cache_params is not None: - layer_cache_params = mamba_cache_params.at_layer_idx( - i - self.start_layer) - - hidden_states, residual = layer( - positions=positions, - hidden_states=hidden_states, - residual=residual, - mamba_cache_params=layer_cache_params) + hidden_states, residual = layer(positions=positions, + hidden_states=hidden_states, + residual=residual) if not get_pp_group().is_last_rank: return IntermediateTensors({ "hidden_states": hidden_states, @@ -225,9 +211,6 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): prefix=maybe_prefix(prefix, "lm_head"), ) - # Used to track and store by the Mamba cache between steps. - self.mamba_cache: Optional[MambaCacheManager] = None - self.logits_processor = LogitsProcessor(self.unpadded_vocab_size, config.vocab_size) @@ -244,22 +227,7 @@ def forward(self, inputs_embeds: Optional[torch.Tensor] = None, **kwargs): - mamba_cache_params = None - if not envs.VLLM_USE_V1: - if self.mamba_cache is None: - num_layers = self.model_config.get_num_layers_by_block_type( - self.vllm_config.parallel_config, LayerBlockType.mamba) - state_shape = self.get_mamba_state_shape_from_config( - self.vllm_config) - state_dtype = self.get_mamba_state_dtype_from_config( - self.vllm_config) - self.mamba_cache = MambaCacheManager(self.vllm_config, - num_layers, *state_shape, - *state_dtype) - - mamba_cache_params = self.mamba_cache.current_run_tensors(**kwargs) - - hidden_states = self.backbone(input_ids, positions, mamba_cache_params, + hidden_states = self.backbone(input_ids, positions, intermediate_tensors, inputs_embeds) return hidden_states @@ -288,8 +256,7 @@ def get_mamba_state_shape_from_config( tp_world_size=parallel_config.tensor_parallel_size, intermediate_size=hf_config.intermediate_size, state_size=hf_config.state_size, - conv_kernel=hf_config.conv_kernel, - use_v1=envs.VLLM_USE_V1) + conv_kernel=hf_config.conv_kernel) def copy_inputs_before_cuda_graphs(self, input_buffers, **kwargs): return self.mamba_cache.copy_inputs_before_cuda_graphs( diff --git a/vllm/model_executor/models/mamba2.py b/vllm/model_executor/models/mamba2.py index 9c3108146d2e..97e9c5785e72 100644 --- a/vllm/model_executor/models/mamba2.py +++ b/vllm/model_executor/models/mamba2.py @@ -8,16 +8,11 @@ from torch import nn from transformers import MambaConfig -from vllm import envs -from vllm.attention.backends.abstract import AttentionMetadata from vllm.compilation.decorators import support_torch_compile from vllm.config import CacheConfig, ModelConfig, VllmConfig from vllm.distributed.parallel_state import get_pp_group -from vllm.forward_context import get_forward_context from vllm.model_executor.layers.layernorm import RMSNorm from vllm.model_executor.layers.logits_processor import LogitsProcessor -from vllm.model_executor.layers.mamba.mamba2_metadata import ( - Mamba2Metadata, prepare_mamba2_metadata) from vllm.model_executor.layers.mamba.mamba_mixer2 import MambaMixer2 from vllm.model_executor.layers.mamba.mamba_utils import ( MambaStateDtypeCalculator, MambaStateShapeCalculator) @@ -28,10 +23,7 @@ from vllm.model_executor.model_loader.weight_utils import default_weight_loader from vllm.model_executor.models.interfaces import (HasInnerState, IsAttentionFree) -from vllm.model_executor.models.mamba_cache import (MambaCacheManager, - MambaCacheParams) from vllm.sequence import IntermediateTensors -from vllm.utils import LayerBlockType from .utils import (AutoWeightsLoader, is_pp_missing_parameter, make_empty_intermediate_tensors_factory, make_layers, @@ -74,8 +66,6 @@ def forward( self, hidden_states: torch.Tensor, residual: Optional[torch.Tensor], - mamba_cache_params: MambaCacheParams, - mamba2_metadata: Mamba2Metadata, **kwargs, ): if residual is None: @@ -85,7 +75,7 @@ def forward( hidden_states, residual = self.norm(hidden_states, residual) output = torch.empty_like(hidden_states) - self.mixer(hidden_states, output, mamba_cache_params, mamba2_metadata) + self.mixer(hidden_states, output) return output, residual @@ -137,7 +127,6 @@ def forward( self, input_ids: torch.Tensor, positions: torch.Tensor, - mamba_cache_params: MambaCacheParams, intermediate_tensors: Optional[IntermediateTensors] = None, inputs_embeds: Optional[torch.Tensor] = None, ) -> torch.Tensor: @@ -152,25 +141,10 @@ def forward( hidden_states = intermediate_tensors["hidden_states"] residual = intermediate_tensors["residual"] - attn_metadata: AttentionMetadata = get_forward_context().attn_metadata - - if not envs.VLLM_USE_V1: - mamba2_metadata = prepare_mamba2_metadata( - chunk_size=self.config.chunk_size, - attn_metadata=attn_metadata, - ) - else: - # v1 get mamba2_metadata from forward_context - mamba2_metadata = None - for i, layer in enumerate(self.layers): - hidden_states, residual = layer( - positions=positions, - hidden_states=hidden_states, - residual=residual, - mamba_cache_params=mamba_cache_params.at_layer_idx( - i - self.start_layer) if mamba_cache_params else None, - mamba2_metadata=mamba2_metadata) + hidden_states, residual = layer(positions=positions, + hidden_states=hidden_states, + residual=residual) if not get_pp_group().is_last_rank: return IntermediateTensors({ @@ -222,13 +196,11 @@ def get_mamba_state_dtype_from_config( def get_mamba_state_shape_from_config( cls, vllm_config: "VllmConfig", - use_v1: bool = True, ) -> tuple[tuple[int, int], tuple[int, int, int]]: """Calculate shapes for Mamba's convolutional and state caches. Args: vllm_config: vLLM config - use_v1: Get shapes for V1 (or V0) Returns: Tuple containing: @@ -247,7 +219,6 @@ def get_mamba_state_shape_from_config( head_dim=hf_config.head_dim, state_size=hf_config.state_size, conv_kernel=hf_config.conv_kernel, - use_v1=use_v1, ) def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): @@ -282,9 +253,6 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): if config.tie_word_embeddings: self.lm_head = self.lm_head.tie_weights(self.backbone.embeddings) - # Used to track and store by the Mamba cache between steps. - self.mamba_cache: Optional[MambaCacheManager] = None - self.logits_processor = LogitsProcessor(self.unpadded_vocab_size, config.vocab_size) @@ -300,29 +268,8 @@ def forward(self, intermediate_tensors: Optional[IntermediateTensors] = None, inputs_embeds: Optional[torch.Tensor] = None, **kwargs): - if not envs.VLLM_USE_V1: - if self.mamba_cache is None: - num_mamba_layers = ( - self.model_config.get_num_layers_by_block_type( - self.vllm_config.parallel_config, - LayerBlockType.mamba)) - mamba_state_shape = \ - self.get_mamba_state_shape_from_config( - self.vllm_config, use_v1=False) - mamba_state_dtype = \ - self.get_mamba_state_dtype_from_config( - self.vllm_config) - self.mamba_cache = MambaCacheManager(self.vllm_config, - num_mamba_layers, - *mamba_state_shape, - *mamba_state_dtype) - - mamba_cache_params = self.mamba_cache.current_run_tensors(**kwargs) - else: - # NOTE: mamba_cache_params is not needed for v1 - mamba_cache_params = None - hidden_states = self.backbone(input_ids, positions, mamba_cache_params, + hidden_states = self.backbone(input_ids, positions, intermediate_tensors, inputs_embeds) return hidden_states diff --git a/vllm/model_executor/models/mamba_cache.py b/vllm/model_executor/models/mamba_cache.py deleted file mode 100644 index 6b16e3ce7d98..000000000000 --- a/vllm/model_executor/models/mamba_cache.py +++ /dev/null @@ -1,83 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project - -from dataclasses import dataclass - -import torch - -from vllm.attention.backends.utils import PAD_SLOT_ID -from vllm.config import VllmConfig -from vllm.model_executor.models.constant_size_cache import ConstantSizeCache - - -@dataclass -class MambaCacheParams: - conv_state: torch.Tensor = torch.Tensor() - ssm_state: torch.Tensor = torch.Tensor() - state_indices_tensor: torch.Tensor = torch.Tensor() - - def at_layer_idx(self, layer_idx): - return MambaCacheParams(self.conv_state[layer_idx], - self.ssm_state[layer_idx], - self.state_indices_tensor) - - -class MambaCacheManager(ConstantSizeCache): - - def __init__(self, vllm_config: VllmConfig, num_mamba_layers: int, - conv_state_shape: tuple[int, int], - temporal_state_shape: tuple[int, int], - conv_state_dtype: torch.dtype, - temporal_state_dtype: torch.dtype): - - self.conv_state_dtype = conv_state_dtype - self.temporal_state_dtype = temporal_state_dtype - - # Determine max batch size to set size of MambaCache - max_batch_size = vllm_config.scheduler_config.max_num_seqs - if not vllm_config.model_config.enforce_eager: - max_batch_size = vllm_config.pad_for_cudagraph(max_batch_size) - - # Initialize parent class - super().__init__(max_batch_size) - - # assume conv_state = (dim, state_len) - assert conv_state_shape[0] > conv_state_shape[1] - conv_state = torch.empty(size=(num_mamba_layers, max_batch_size) + - (conv_state_shape[1], conv_state_shape[0]), - dtype=self.conv_state_dtype, - device="cuda").transpose(-1, -2) - temporal_state = torch.empty(size=(num_mamba_layers, max_batch_size) + - temporal_state_shape, - dtype=self.temporal_state_dtype, - device="cuda") - - self._mamba_cache = (conv_state, temporal_state) - - @property - def cache(self): - return self._mamba_cache - - def _copy_cache(self, from_index: int, to_index: int): - for cache_t in self.cache: - cache_t[:, to_index].copy_(cache_t[:, from_index], - non_blocking=True) - - def current_run_tensors(self, **kwargs) -> MambaCacheParams: - """ - Return the tensors for the current run's conv and ssm state. - """ - cache_tensors, state_indices_tensor = super().current_run_tensors( - **kwargs) - return MambaCacheParams(cache_tensors[0], cache_tensors[1], - state_indices_tensor) - - def get_seqlen_agnostic_capture_inputs(self, batch_size: int): - """ - Provide the CUDA graph capture runs with a buffer in adjusted size. - The buffer is used to maintain the Mamba Cache during the CUDA graph - replay runs. - """ - return self._mamba_cache, torch.as_tensor([PAD_SLOT_ID] * batch_size, - dtype=torch.int32, - device="cuda") diff --git a/vllm/model_executor/models/minimax_cache.py b/vllm/model_executor/models/minimax_cache.py deleted file mode 100644 index 9164ac06a3b0..000000000000 --- a/vllm/model_executor/models/minimax_cache.py +++ /dev/null @@ -1,36 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project -from dataclasses import dataclass - -import torch - -from vllm.model_executor.models.constant_size_cache import ConstantSizeCache - - -@dataclass -class MinimaxCacheParams: - minimax_cache: torch.Tensor = torch.Tensor() - state_indices_tensor: torch.Tensor = torch.Tensor() - - def at_layer_idx(self, layer_idx): - return MinimaxCacheParams(self.minimax_cache[layer_idx, ...], - self.state_indices_tensor) - - -class MinimaxCacheManager(ConstantSizeCache): - - def __init__(self, dtype, cache_shape): - super().__init__(cache_shape[1]) # max_batch_size is cache_shape[1] - self._minimax_cache = torch.empty(size=cache_shape, - dtype=dtype, - device="cuda") - - @property - def cache(self): - return self._minimax_cache - - def _copy_cache(self, from_index: int, to_index: int): - assert len(self.cache) > 0 - for cache_t in self.cache: - cache_t[:, to_index].copy_(cache_t[:, from_index], - non_blocking=True) diff --git a/vllm/model_executor/models/minimax_text_01.py b/vllm/model_executor/models/minimax_text_01.py index 1d2c7dea811e..cc9a959f6331 100644 --- a/vllm/model_executor/models/minimax_text_01.py +++ b/vllm/model_executor/models/minimax_text_01.py @@ -14,7 +14,6 @@ from torch import nn from transformers import MiniMaxConfig -from vllm import envs from vllm.attention import Attention, AttentionMetadata from vllm.compilation.decorators import support_torch_compile from vllm.config import CacheConfig, ModelConfig, VllmConfig @@ -44,7 +43,6 @@ from vllm.sequence import IntermediateTensors from .interfaces import HasInnerState, IsHybrid -from .minimax_cache import MinimaxCacheManager, MinimaxCacheParams from .utils import PPMissingLayer, is_pp_missing_parameter, make_layers @@ -404,7 +402,6 @@ def __init__( def forward(self, hidden_states: torch.Tensor, positions: torch.Tensor, - kv_caches: Union[list[dict], Optional[torch.Tensor]], attn_metadata: AttentionMetadata, residual: Optional[torch.Tensor], is_warmup: bool = False, @@ -418,7 +415,6 @@ def forward(self, hidden_states=layernorm_output, output=self_attention_output, positions=positions, - kv_caches=kv_caches, ) residual = residual * self.layernorm_attention_alpha @@ -563,10 +559,6 @@ def layer_fn(prefix): self._dtype = _dummy.dtype del _dummy - if not envs.VLLM_USE_V1: - self.minimax_cache = MinimaxCacheManager( - dtype=torch.float32, cache_shape=self.cache_shape) - norm_kwargs = {} if hasattr(config, "rms_norm_eps"): norm_kwargs["eps"] = config.rms_norm_eps @@ -614,25 +606,6 @@ def forward(self, **kwargs) -> Union[torch.Tensor, IntermediateTensors]: forward_context = get_forward_context() attn_metadata = forward_context.attn_metadata - if not envs.VLLM_USE_V1 and attn_metadata is None: - return None - if not envs.VLLM_USE_V1: - if "request_ids_to_seq_ids" not in kwargs: - kwargs["request_ids_to_seq_ids"] = {} - if "finished_requests_ids" not in kwargs: - kwargs["finished_requests_ids"] = [] - ( - minimax_cache_tensors, - state_indices_tensor, - ) = self.minimax_cache.current_run_tensors(**kwargs) - if getattr(attn_metadata, "num_prefills", 0) > 0: - self._clear_prefill_cache(attn_metadata, minimax_cache_tensors, - **kwargs) - - minimax_cache_params = MinimaxCacheParams(minimax_cache_tensors, - state_indices_tensor) - else: - minimax_cache_params = None if get_pp_group().is_first_rank: if inputs_embeds is None: @@ -645,20 +618,10 @@ def forward(self, hidden_states = intermediate_tensors["hidden_states"] residual = intermediate_tensors["residual"] - minimax_cache_index = 0 - for layer in islice(self.layers, self.start_layer, self.end_layer): - _caches = None - if not envs.VLLM_USE_V1 and isinstance( - layer.self_attn, MiniMaxText01LinearAttention): - current_state_layer = minimax_cache_index - _caches = minimax_cache_params.at_layer_idx( - current_state_layer) - minimax_cache_index += 1 hidden_states, residual = layer( hidden_states=hidden_states, positions=positions, - kv_caches=_caches, attn_metadata=attn_metadata, residual=residual, ) @@ -1003,13 +966,11 @@ def get_mamba_state_dtype_from_config( def get_mamba_state_shape_from_config( cls, vllm_config: "VllmConfig", - use_v1: bool = True, ) -> tuple[tuple[int, ...], ...]: """Calculate shape for MiniMaxText01LinearAttention cache. Args: vllm_config: vLLM config - use_v1: Get shapes for V1 (or V0) Returns: Tuple containing: diff --git a/vllm/model_executor/models/nemotron_h.py b/vllm/model_executor/models/nemotron_h.py index ff571541a60a..987920ecc331 100644 --- a/vllm/model_executor/models/nemotron_h.py +++ b/vllm/model_executor/models/nemotron_h.py @@ -23,21 +23,17 @@ import torch from torch import nn -from vllm import envs from vllm.attention.layer import Attention from vllm.compilation.decorators import support_torch_compile from vllm.config import CacheConfig, ModelConfig, VllmConfig from vllm.distributed import get_tensor_model_parallel_world_size from vllm.distributed.parallel_state import get_pp_group -from vllm.forward_context import get_forward_context from vllm.model_executor.layers.activation import ReLUSquaredActivation from vllm.model_executor.layers.layernorm import RMSNorm from vllm.model_executor.layers.linear import (ColumnParallelLinear, QKVParallelLinear, RowParallelLinear) from vllm.model_executor.layers.logits_processor import LogitsProcessor -from vllm.model_executor.layers.mamba.mamba2_metadata import ( - Mamba2Metadata, prepare_mamba2_metadata) from vllm.model_executor.layers.mamba.mamba_mixer2 import MambaMixer2 from vllm.model_executor.layers.mamba.mamba_utils import ( MambaStateDtypeCalculator, MambaStateShapeCalculator) @@ -49,14 +45,11 @@ from vllm.model_executor.models.interfaces import (HasInnerState, IsHybrid, SupportsLoRA, SupportsPP, SupportsQuant) -from vllm.model_executor.models.mamba_cache import (MambaCacheManager, - MambaCacheParams) from vllm.model_executor.models.utils import ( AutoWeightsLoader, WeightsMapper, make_empty_intermediate_tensors_factory, make_layers, maybe_prefix) from vllm.sequence import IntermediateTensors from vllm.transformers_utils.configs import NemotronHConfig -from vllm.utils import LayerBlockType class NemotronHMLP(nn.Module): @@ -181,8 +174,6 @@ def forward( self, hidden_states: torch.Tensor, residual: Optional[torch.Tensor], - mamba_cache_params: MambaCacheParams, - mamba2_metadata: Mamba2Metadata, **kwargs, ): if residual is None: @@ -192,7 +183,7 @@ def forward( hidden_states, residual = self.norm(hidden_states, residual) output = torch.empty_like(hidden_states) - self.mixer(hidden_states, output, mamba_cache_params, mamba2_metadata) + self.mixer(hidden_states, output) return output, residual @@ -370,22 +361,10 @@ def forward( self, input_ids: torch.Tensor, positions: torch.Tensor, - mamba_cache_params: MambaCacheParams, intermediate_tensors: Optional[IntermediateTensors] = None, inputs_embeds: Optional[torch.Tensor] = None, ) -> torch.Tensor: - attn_metadata = get_forward_context().attn_metadata - - if not envs.VLLM_USE_V1: - mamba2_metadata = prepare_mamba2_metadata( - chunk_size=self.config.chunk_size, - attn_metadata=attn_metadata, - ) - else: - # v1 get mamba2_metadata from forward_context - mamba2_metadata = None - if get_pp_group().is_first_rank: if inputs_embeds is not None: hidden_states = inputs_embeds @@ -398,22 +377,11 @@ def forward( residual = intermediate_tensors["residual"] residual = None - num_non_mamba_layers = 0 for i, layer in enumerate(self.layers): - layer_mamba_cache_params = None - if isinstance(layer, - NemotronHMambaDecoderLayer) and mamba_cache_params: - layer_mamba_cache_params = mamba_cache_params.at_layer_idx( - i - num_non_mamba_layers) - else: - num_non_mamba_layers += 1 - hidden_states, residual = layer( positions=positions, hidden_states=hidden_states, residual=residual, - mamba_cache_params=layer_mamba_cache_params, - mamba2_metadata=mamba2_metadata, ) if not get_pp_group().is_last_rank: @@ -508,13 +476,11 @@ def get_mamba_state_dtype_from_config( def get_mamba_state_shape_from_config( cls, vllm_config: "VllmConfig", - use_v1: bool = True, ) -> tuple[tuple[int, int], tuple[int, int, int]]: """Calculate shapes for Mamba's convolutional and state caches. Args: vllm_config: vLLM config - use_v1: Get shapes for V1 (or V0) Returns: Tuple containing: @@ -533,7 +499,6 @@ def get_mamba_state_shape_from_config( head_dim=hf_config.mamba_head_dim, state_size=hf_config.ssm_state_size, conv_kernel=hf_config.conv_kernel, - use_v1=use_v1, ) def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): @@ -566,8 +531,6 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): if not lora_config else lora_config.lora_vocab_padding_size, prefix=maybe_prefix(prefix, "lm_head"), ) - # Used to track and store by the Mamba cache between steps. - self.mamba_cache: Optional[MambaCacheManager] = None self.logits_processor = LogitsProcessor(self.unpadded_vocab_size, config.vocab_size) @@ -584,40 +547,11 @@ def forward(self, inputs_embeds: Optional[torch.Tensor] = None, **kwargs): - mamba_cache_params = None - if not envs.VLLM_USE_V1: - if self.mamba_cache is None: - - num_mamba_layers = \ - self.model_config.get_num_layers_by_block_type( - self.vllm_config.parallel_config, - LayerBlockType.mamba - ) - mamba_state_shape = \ - self.get_mamba_state_shape_from_config( - self.vllm_config, use_v1=False) - mamba_state_dtype = \ - self.get_mamba_state_dtype_from_config( - self.vllm_config) - self.mamba_cache = MambaCacheManager(self.vllm_config, - num_mamba_layers, - *mamba_state_shape, - *mamba_state_dtype) - - mamba_cache_params = self.mamba_cache.current_run_tensors(**kwargs) - - hidden_states = self.model(input_ids, positions, mamba_cache_params, - intermediate_tensors, inputs_embeds) + hidden_states = self.model(input_ids, positions, intermediate_tensors, + inputs_embeds) return hidden_states - def copy_inputs_before_cuda_graphs(self, input_buffers, **kwargs): - return self.mamba_cache.copy_inputs_before_cuda_graphs( - input_buffers, **kwargs) - - def get_seqlen_agnostic_capture_inputs(self, batch_size: int): - return self.mamba_cache.get_seqlen_agnostic_capture_inputs(batch_size) - def compute_logits( self, hidden_states: torch.Tensor, diff --git a/vllm/model_executor/models/phi4flash.py b/vllm/model_executor/models/phi4flash.py deleted file mode 100644 index ae153558e37a..000000000000 --- a/vllm/model_executor/models/phi4flash.py +++ /dev/null @@ -1,731 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project -import math -from collections.abc import Iterable -from typing import Optional, Union - -import torch -import torch.nn as nn -from transformers.activations import ACT2FN - -import vllm.envs as envs -from vllm.attention import Attention, AttentionMetadata, AttentionType -from vllm.attention.selector import _Backend -from vllm.config import CacheConfig, VllmConfig -from vllm.distributed import get_pp_group, get_tensor_model_parallel_world_size -from vllm.forward_context import ForwardContext, get_forward_context -from vllm.logger import init_logger -from vllm.model_executor.layers.linear import (ColumnParallelLinear, - MergedColumnParallelLinear, - RowParallelLinear) -from vllm.model_executor.layers.logits_processor import LogitsProcessor -from vllm.model_executor.layers.mamba.ops.causal_conv1d import ( - causal_conv1d_fn, causal_conv1d_update) -from vllm.model_executor.layers.mamba.ops.mamba_ssm import ( - selective_scan_fn, selective_state_update) -from vllm.model_executor.layers.vocab_parallel_embedding import ( - DEFAULT_VOCAB_PADDING_SIZE, ParallelLMHead, VocabParallelEmbedding) -from vllm.model_executor.models.interfaces import (HasInnerState, IsHybrid, - SupportsV0Only) -from vllm.model_executor.models.mamba_cache import (MambaCacheManager, - MambaCacheParams) -from vllm.sequence import IntermediateTensors - -from .utils import make_layers, maybe_prefix - -logger = init_logger(__name__) - - -class SwiGLUActivation(nn.Module): - - def forward(self, x1: torch.Tensor, x2: torch.Tensor) -> torch.Tensor: - return x1 * nn.functional.silu(x2) - - -class SambaYMLP(nn.Module): - """Gated Linear Unit. - - Reference: - Language Modeling with Gated Convolutional Networks. - https://arxiv.org/pdf/1612.08083v3.pdf. - - """ - - def __init__(self, config): - super().__init__() - - self.config = config - self.fc1 = nn.Linear(config.hidden_size, - 2 * config.intermediate_size, - bias=False) - self.fc2 = nn.Linear(config.intermediate_size, - config.hidden_size, - bias=False) - - self.activation_fn = ACT2FN[config.hidden_act] - - def forward(self, hidden_states): - y = self.fc1(hidden_states) - gate, y = y.chunk(2, dim=-1) - y = y * self.activation_fn(gate) - return self.fc2(y) - - -def get_virtual_engine(): - forward_context: ForwardContext = get_forward_context() - return forward_context.virtual_engine - - -class SambaYAttention(nn.Module): - - def __init__(self, - config, - layer_idx: Optional[int] = None, - yoco_cross: bool = False, - cache_config: Optional[CacheConfig] = None, - prefix: str = ""): - super().__init__() - if layer_idx is None: - logger.warning_once( - f"Instantiating {self.__class__.__name__} without passing " - "a `layer_idx` is not recommended and will lead to errors " - "during the forward call if caching is used. Please make " - "sure to provide a `layer_idx` when creating this class.") - self.hidden_size = config.hidden_size - self.num_heads = config.num_attention_heads - self.head_dim = self.hidden_size // self.num_heads - self.num_key_value_heads = config.num_key_value_heads - self.yoco_cross = yoco_cross - - if (self.head_dim * self.num_heads) != self.hidden_size: - raise ValueError("hidden_size must be divisible by num_heads " - f"(got `hidden_size`: {self.hidden_size} and " - f"`num_heads`: {self.num_heads}).") - - op_size = self.num_heads * self.head_dim + 2 * ( - self.num_key_value_heads * self.head_dim) - self.out_proj = nn.Linear(self.num_heads * self.head_dim, - self.hidden_size, - bias=True) - if yoco_cross: - self.Wqkv = nn.Linear(self.hidden_size, - self.num_heads * self.head_dim, - bias=True) - else: - self.Wqkv = nn.Linear(self.hidden_size, op_size, bias=True) - - # disable sliding window for the second half of the model - is_sliding = config.layer_types[layer_idx] == "sliding_attention" - sliding_window = config.sliding_window if is_sliding else None - - assert self.num_heads % 2 == 0, 'num_heads should be even' - assert self.num_key_value_heads % 2 == 0, 'num_heads should be even' - - self.lambda_init = self.lambda_init_fn(layer_idx) - self.lambda_q1 = nn.Parameter( - torch.zeros(self.head_dim, dtype=torch.float32).normal_(mean=0, - std=0.1)) - self.lambda_k1 = nn.Parameter( - torch.zeros(self.head_dim, dtype=torch.float32).normal_(mean=0, - std=0.1)) - self.lambda_q2 = nn.Parameter( - torch.zeros(self.head_dim, dtype=torch.float32).normal_(mean=0, - std=0.1)) - self.lambda_k2 = nn.Parameter( - torch.zeros(self.head_dim, dtype=torch.float32).normal_(mean=0, - std=0.1)) - self.subln = nn.RMSNorm(2 * self.head_dim, - eps=1e-5, - elementwise_affine=True) - - params = { - 'differential_flash_attention_config': { - 'lambda_init': self.lambda_init, - 'lambda_q1': self.lambda_q1, - 'lambda_k1': self.lambda_k1, - 'lambda_q2': self.lambda_q2, - 'lambda_k2': self.lambda_k2, - "subln": self.subln, - } - } - - if yoco_cross: - kv_shared_layer_index = config.num_hidden_layers // 2 + 1 - kv_sharing_target_layer_name = \ - f"model.layers.{kv_shared_layer_index}.self_attn.attn" - else: - kv_sharing_target_layer_name = None - - self.attn = Attention( - self.num_heads, - self.head_dim, - self.head_dim**-0.5, - num_kv_heads=self.num_key_value_heads, - cache_config=cache_config, - per_layer_sliding_window=sliding_window, - prefix=f"{prefix}.attn", - attn_type=AttentionType.DECODER, - kv_sharing_target_layer_name=kv_sharing_target_layer_name, - **params) - assert self.attn.backend == _Backend.DIFFERENTIAL_FLASH_ATTN,\ - "DIFFERENTIAL_FLASH_ATTN required" - - def lambda_init_fn(self, depth): - return 0.8 - 0.6 * math.exp(-0.3 * depth) - - def forward( - self, - hidden_states: torch.Tensor, - ): - - if not self.yoco_cross: # need to generate kv-cache - qkv = self.Wqkv(hidden_states) - q, k, v = qkv.split([ - self.hidden_size, self.num_key_value_heads * self.head_dim, - self.num_key_value_heads * self.head_dim - ], - dim=-1) - attn_output = self.attn(q, k, v) - else: # reuse the kv cache, full attention - q = self.Wqkv(hidden_states) - attn_output = self.attn(q, None, None) - attn_output = attn_output.view(-1, self.num_heads * self.head_dim) - return self.out_proj(attn_output) - - -class Phi4Mamba(nn.Module): - - def __init__( - self, - d_model, - d_state=16, - d_conv=4, - expand=2, - dt_rank="auto", - dt_min=0.001, - dt_max=0.1, - dt_init="random", # difference - dt_scale=1.0, # difference - dt_init_floor=1e-4, - conv_bias=True, - bias=False, - use_fast_path=True, # Fused kernel options - layer_idx=None, - device=None, - dtype=None, - yoco_cross=False, - yoco_kv=False, - ): - factory_kwargs = {"params_dtype": dtype} # difference - super().__init__() - self.yoco_cross = yoco_cross - self.yoco_kv = yoco_kv - self.d_model = d_model - self.d_state = d_state - self.d_conv = d_conv - self.expand = expand - self.d_inner = int(self.expand * self.d_model) - self.dt_rank = math.ceil(self.d_model / - 16) if dt_rank == "auto" else dt_rank - self.use_fast_path = use_fast_path - self.layer_idx = layer_idx - self.swiGluActivation = SwiGLUActivation() - if self.yoco_cross: - self.in_proj = MergedColumnParallelLinear(self.d_model, - [self.d_inner], - bias=bias, - **factory_kwargs) - self.out_proj = RowParallelLinear(self.d_inner, - self.d_model, - bias=bias, - **factory_kwargs) - return - self.conv1d = ColumnParallelLinear( - input_size=d_conv, - output_size=self.d_inner, - bias=conv_bias, - params_dtype=dtype, - ) - # unsqueeze to fit conv1d weights shape into the linear weights shape. - # Can't do this in `weight_loader` since it already exists in - # `ColumnParallelLinear` and `set_weight_attrs` - # doesn't allow to override it - self.conv1d.weight.data = self.conv1d.weight.data.unsqueeze(1) - - self.in_proj = MergedColumnParallelLinear( - self.d_model, - [self.d_inner] * 2, - bias=bias, - params_dtype=dtype, - ) - - # selective projection used to make dt, B and C input dependent - self.x_proj = RowParallelLinear( - self.d_inner, - self.dt_rank + self.d_state * 2, - bias=False, - params_dtype=dtype, - ) - - # time step projection (discretization) - - # In the forward we need to apply dt_proj without the bias, - # as the bias is added in the selective scan kernel. - self.dt_proj = ColumnParallelLinear( - self.dt_rank, - self.d_inner, - bias=True, - skip_bias_add=True, - params_dtype=dtype, - ) - - # # D "skip" parameter - # self.D = nn.Parameter(torch.ones(self.d_inner)) # Keep in fp32 - self.A = nn.Parameter( - torch.empty( - self.d_inner, - self.d_state, - dtype=torch.float32, - )) - self.D = nn.Parameter(torch.ones(self.d_inner, dtype=torch.float32)) - - self.out_proj = RowParallelLinear( - self.d_inner, - self.d_model, - bias=bias, - input_is_parallel=True, - params_dtype=dtype, - ) - self.activation = "silu" - - def forward(self, - hidden_states: torch.Tensor, - attn_metadata: AttentionMetadata, - mamba_cache_params: MambaCacheParams, - yoco_key_values=None) -> torch.Tensor: - - if self.yoco_cross: - out = self.in_proj(hidden_states)[0] - out = self.swiGluActivation(yoco_key_values, out) - out = self.out_proj(out) - return out[0], yoco_key_values - - # 1. Gated MLP's linear projection - # projected_states = self.in_proj(hidden_states)[0].transpose(-2, -1) - projected_states = self.in_proj( - hidden_states.to(self.in_proj.weight.dtype))[0].transpose(-2, -1) - hidden_states, gate = projected_states.chunk(2, dim=-2) - - # 2. Convolution sequence transformation - conv_weights = self.conv1d.weight.view(self.conv1d.weight.size(0), - self.conv1d.weight.size(2)) - - if attn_metadata.query_start_loc is not None \ - and attn_metadata.context_lens_tensor is not None: - # |---------- N-1 iteration --------| - # |---------------- N iteration ---------------------| - # |- tokenA -|......................|-- newTokens ---| - # |---------- context_len ----------| - # |-------------------- seq_len ---------------------| - # |-- query_len ---| - hidden_states = causal_conv1d_fn( - hidden_states, - conv_weights, - self.conv1d.bias, - activation=self.activation, - conv_states=mamba_cache_params.conv_state, - has_initial_state=attn_metadata.context_lens_tensor > 0, - cache_indices=mamba_cache_params.state_indices_tensor, - query_start_loc=attn_metadata.query_start_loc) - else: - hidden_states = causal_conv1d_update( - hidden_states.transpose(0, 1), - mamba_cache_params.conv_state, - conv_weights, - self.conv1d.bias, - self.activation, - conv_state_indices=mamba_cache_params.state_indices_tensor) - hidden_states = hidden_states.transpose(0, 1) - - # 3. State Space Model sequence transformation - # 3.a. input varying initialization of time_step, B and C - ssm_parameters = self.x_proj(hidden_states.transpose(-2, -1))[0] - - time_step, B, C = torch.split( - ssm_parameters, - [self.dt_rank, self.d_state, self.d_state], - dim=-1, - ) - - # Note that Jamba normalizes B, C, and time_step here but Mamba doesn't. - - discrete_time_step = self.dt_proj(time_step)[0].transpose(-2, -1) - # 3.c perform the recurrence y ← SSM(A, B, C)(x) - time_proj_bias = (self.dt_proj.bias.float() if hasattr( - self.dt_proj, "bias") else None) - - if attn_metadata.query_start_loc is not None \ - and attn_metadata.context_lens_tensor is not None: - scan_outputs = selective_scan_fn( - hidden_states, - mamba_cache_params.ssm_state, - discrete_time_step, - self.A, - B.transpose(-2, -1), - C.transpose(-2, -1), - self.D.float(), - # z, - None if self.yoco_kv else gate, - time_proj_bias, - delta_softplus=True, - cache_indices=mamba_cache_params.state_indices_tensor, - has_initial_state=attn_metadata.context_lens_tensor > 0, - query_start_loc=attn_metadata.query_start_loc) - else: - scan_outputs = torch.empty_like(hidden_states.transpose(0, 1)) - selective_state_update( - mamba_cache_params.ssm_state, - hidden_states.transpose(0, 1), - discrete_time_step.transpose(0, 1), - self.A, - B, - C, - self.D, - # z - # gate.transpose(0, 1), - None if self.yoco_kv else gate.transpose(0, 1), - time_proj_bias, - dt_softplus=True, - state_batch_indices=mamba_cache_params.state_indices_tensor, - out=scan_outputs) - scan_outputs = scan_outputs.transpose(0, 1) - - # 4. Final linear projection - if self.yoco_kv: - # gate = gate.transpose(-1,-2).contiguous() - yoco_key_values = scan_outputs.transpose(-2, -1) - scan_outputs = self.swiGluActivation(scan_outputs, gate) - - contextualized_states = self.out_proj(scan_outputs.transpose(-2, - -1))[0] - - return contextualized_states, yoco_key_values - - -class SambaYDecoderLayer(nn.Module): - - def __init__( - self, - config, - layer_idx, - cache_config, - prefix: str = "", - ) -> None: - super().__init__() - - self.config = config - self.layer_idx = layer_idx - - self.mlp = SambaYMLP(config) - self.input_layernorm = nn.LayerNorm(config.hidden_size, - eps=config.layer_norm_eps) - - self.yoco_mb = False - self.yoco_cross = False - if layer_idx >= config.num_hidden_layers // 2: - self.yoco_mb = True - self.yoco_cross = (layer_idx - >= (config.num_hidden_layers // 2 + 2)) - self.use_mamba = config.mb_per_layer > 0 and \ - layer_idx % config.mb_per_layer == 0 - if self.use_mamba: - factory_kwargs = {"dtype": None} - self.attn = Phi4Mamba(config.hidden_size, - layer_idx=layer_idx, - yoco_cross=self.yoco_cross, - yoco_kv=self.yoco_mb, - **factory_kwargs) - else: - self.attn = SambaYAttention(config, - layer_idx=layer_idx, - yoco_cross=self.yoco_cross, - cache_config=cache_config, - prefix=f"{prefix}.self_attn") - self.post_attention_layernorm = nn.LayerNorm(config.hidden_size, - eps=config.layer_norm_eps) - - def forward( - self, - hidden_states: torch.Tensor, - positions: torch.Tensor, - attn_metadata: AttentionMetadata, - mamba_cache_params: MambaCacheParams, - ssm_output: Optional[torch.LongTensor] = None, - ) -> Union[torch.Tensor, IntermediateTensors]: - if self.use_mamba: - assert mamba_cache_params is not None - else: - assert mamba_cache_params is None - - residual = hidden_states - hidden_states = self.input_layernorm( - hidden_states.to(dtype=self.input_layernorm.weight.dtype)) - - if self.use_mamba: - attn_outputs, ssm_output = self.attn(hidden_states, - attn_metadata, - mamba_cache_params, - yoco_key_values=ssm_output) - residual = residual.to(torch.float32) - else: - attn_outputs = self.attn(hidden_states, ) - hidden_states = residual + attn_outputs - residual = hidden_states - hidden_states = self.post_attention_layernorm( - hidden_states.to(dtype=self.post_attention_layernorm.weight.dtype)) - hidden_states = self.mlp(hidden_states) - hidden_states = residual + hidden_states - - return hidden_states, ssm_output - - -class SambaYModel(nn.Module): - - def __init__(self, - config, - cache_config=None, - quant_config=None, - lora_config=None, - prefix: str = "") -> None: - super().__init__() - self.config = config - self.vocab_size = config.vocab_size - self.embed_tokens = VocabParallelEmbedding( - self.vocab_size, - config.hidden_size, - org_num_embeddings=config.vocab_size, - ) - - # Pipeline parallel is not supported since the second half of - # the layers share the kv cache. - if get_pp_group().world_size != 1: - raise ValueError("Pipeline Parallel not supported") - - self.start_layer, self.end_layer, self.layers = make_layers( - config.num_hidden_layers, - lambda prefix: SambaYDecoderLayer(config, - int(prefix.split('.')[-1]), - cache_config, - prefix=prefix), - prefix=f"{prefix}.layers") - self.final_layernorm = nn.LayerNorm(config.hidden_size, - eps=config.layer_norm_eps) - - def get_input_embeddings(self, input_ids: torch.Tensor) -> torch.Tensor: - return self.embed_tokens(input_ids) - - def forward( - self, - input_ids: Optional[torch.Tensor], - positions: torch.Tensor, - attn_metadata: AttentionMetadata, - mamba_cache_params: MambaCacheParams, - intermediate_tensors: Optional[IntermediateTensors] = None, - inputs_embeds: Optional[torch.Tensor] = None, - ) -> Union[torch.Tensor, IntermediateTensors]: - - if get_pp_group().is_first_rank: - if inputs_embeds is not None: - hidden_states = inputs_embeds - else: - hidden_states = self.get_input_embeddings(input_ids) - else: - assert intermediate_tensors is not None - hidden_states = intermediate_tensors["hidden_states"] - - mamba_state_idx = 0 - ssm_output = None - for i in range(self.start_layer, self.end_layer): - layer = self.layers[i] - if i == self.config.num_hidden_layers // 2 + 2: - # profile run - kv_cache_idx = self.config.num_hidden_layers // 2 + 1 - cache_layer = self.layers[kv_cache_idx] - kv_cache = cache_layer.attn.attn.kv_cache - if kv_cache[0].numel() == 0: - break - - # Starting from this layer, we do not need to calculate - # the kv cache since we reuse the kv cache from last layer. - # If in prefill phase, we can prune> truncate - # the hidden state to save computation cost. - if attn_metadata.prefill_metadata and not envs.VLLM_USE_V1: - selected_token_indices = torch.cumsum( - attn_metadata.seq_lens_tensor, dim=0) - 1 - hidden_states = hidden_states.index_select( - 0, selected_token_indices) - ssm_output = ssm_output.index_select( - 0, selected_token_indices) - - if layer.use_mamba: - if i < self.config.num_hidden_layers // 2 or \ - not layer.yoco_cross: - mamba_cache = mamba_cache_params.at_layer_idx( - mamba_state_idx) - mamba_state_idx += 1 - else: - mamba_cache = mamba_cache_params.at_layer_idx( - mamba_state_idx - 1) - - hidden_states, ssm_output = layer(hidden_states, - positions, - attn_metadata, - mamba_cache, - ssm_output=ssm_output) - else: - hidden_states, ssm_output = layer( - hidden_states, - positions, - attn_metadata, - None, # mamba_cache_params - ssm_output=ssm_output) - - hidden_states = self.final_layernorm( - hidden_states.to(dtype=self.final_layernorm.weight.dtype)) - return hidden_states - - -class Phi4FlashForCausalLM(nn.Module, HasInnerState, IsHybrid, SupportsV0Only): - - def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): - config = vllm_config.model_config.hf_config - cache_config = vllm_config.cache_config - lora_config = vllm_config.lora_config - quant_config = vllm_config.quant_config - scheduler_config = vllm_config.scheduler_config - self.compilation_config = vllm_config.compilation_config - self.vllm_config = vllm_config - # Prefix caching and chunked prefill is not supported for this model. - assert not cache_config.enable_prefix_caching, \ - "Phi4flash currently does not support prefix caching" - assert not scheduler_config.chunked_prefill_enabled, \ - "Phi4Flash currently does not support prefix caching" - super().__init__() - self.config = config - self.model_config = vllm_config.model_config - self.scheduler_config = scheduler_config - self.model = SambaYModel(config, - cache_config=cache_config, - prefix=maybe_prefix(prefix, "model")) - self.unpadded_vocab_size = config.vocab_size - if lora_config: - self.unpadded_vocab_size += lora_config.lora_extra_vocab_size - self.lm_head = ParallelLMHead( - self.unpadded_vocab_size, - config.hidden_size, - org_num_embeddings=config.vocab_size, - padding_size=( - DEFAULT_VOCAB_PADDING_SIZE - # We need bigger padding if using lora for kernel - # compatibility - if not lora_config else lora_config.lora_vocab_padding_size), - quant_config=quant_config, - prefix=maybe_prefix(prefix, "lm_head"), - ) - self.embedding_bias = None - # Used to track and store by the Mamba cache between steps. - self.mamba_cache: Optional[MambaCacheManager] = None - self.logits_processor = LogitsProcessor(self.unpadded_vocab_size, - config.vocab_size, - logits_as_input=False) - - def forward( - self, - input_ids: torch.Tensor, - positions: torch.Tensor, - intermediate_tensors: Optional[IntermediateTensors] = None, - inputs_embeds: Optional[torch.Tensor] = None, - **kwargs, - ) -> Union[torch.Tensor, IntermediateTensors]: - if self.mamba_cache is None: - num_mamba_layers = self.config.num_hidden_layers \ - // 2 // self.config.mb_per_layer + 1 - self.mamba_cache = MambaCacheManager( - self.vllm_config, - num_mamba_layers, - *self._get_mamba_cache_shape(), - self.lm_head.weight.dtype, - self.lm_head.weight.dtype, - ) - mamba_cache_params = self.mamba_cache.current_run_tensors(**kwargs) - - attn_metadata = get_forward_context().attn_metadata - # input_ids and hidden_states isn't a one-to-one mapping in prefill - # stage due to YOCO optimization. - hidden_states = self.model(input_ids, positions, attn_metadata, - mamba_cache_params, intermediate_tensors, - inputs_embeds) - return hidden_states - - def _get_mamba_cache_shape( - self - ) -> tuple[Optional[tuple[int, int]], Optional[tuple[int, int]]]: - world_size = get_tensor_model_parallel_world_size() - hidden_size = self.config.hidden_size - mamba_expand = self.config.mamba_expand # 2 - mamba_d_conv = self.config.mamba_d_conv # 4 - mamba_d_state = self.config.mamba_d_state # 16 - conv_state_shape = ( - mamba_expand * hidden_size // world_size, - mamba_d_conv - 1, - ) - temporal_state_shape = ( - mamba_expand * hidden_size // world_size, - mamba_d_state, - ) - return conv_state_shape, temporal_state_shape - - def copy_inputs_before_cuda_graphs(self, input_buffers, **kwargs): - return self.mamba_cache.copy_inputs_before_cuda_graphs( - input_buffers, **kwargs) - - def get_seqlen_agnostic_capture_inputs(self, batch_size: int): - return self.mamba_cache.get_seqlen_agnostic_capture_inputs(batch_size) - - def compute_logits( - self, - hidden_states: torch.Tensor, - ) -> Optional[torch.Tensor]: - processed_logits = self.logits_processor( - self.lm_head, - hidden_states, - self.embedding_bias, - ) - return processed_logits - - def load_weights( - self, - weights: Iterable[tuple[str, torch.Tensor]], - ): - weights = {name: weight for name, weight in weights} - adjusted_weights = {} - for name, weight in weights.items(): - if "A_log" in name: - name = name.replace("A_log", "A") - weight = -torch.exp(weight.float()) - if "inner_cross_attn." in name: - name = name.replace("inner_cross_attn.", "") - adjusted_weights[name] = weight - adjusted_weights["lm_head.weight"] = weights[ - "model.embed_tokens.weight"] - loaded_params: set[str] = set() - for name, param in self.named_parameters(): - weight = adjusted_weights.get(name) - if weight is not None and weight.shape != param.shape: - logger.warning("Shape mismatch: %s %s %s", name, weight.shape, - param.shape) - loaded_params.add(name) - missing_keys, unexpected_keys = self.load_state_dict(adjusted_weights, - strict=False) - assert len(unexpected_keys) == 0, f"Unexpected keys: {unexpected_keys}" - assert len(missing_keys) == 0, f"Missing keys: {missing_keys}" - return loaded_params diff --git a/vllm/model_executor/models/plamo2.py b/vllm/model_executor/models/plamo2.py index 33ee1cf44afd..0292f3bf8317 100644 --- a/vllm/model_executor/models/plamo2.py +++ b/vllm/model_executor/models/plamo2.py @@ -12,7 +12,6 @@ from torch import nn from transformers import PretrainedConfig -from vllm import envs from vllm.attention.backends.abstract import AttentionMetadata from vllm.attention.layer import Attention from vllm.compilation.decorators import support_torch_compile @@ -29,8 +28,6 @@ RowParallelLinear) from vllm.model_executor.layers.logits_processor import LogitsProcessor from vllm.model_executor.layers.mamba.abstract import MambaBase -from vllm.model_executor.layers.mamba.mamba2_metadata import ( - Mamba2Metadata, prepare_mamba2_metadata, update_metadata) from vllm.model_executor.layers.mamba.mamba_utils import ( MambaStateDtypeCalculator, MambaStateShapeCalculator) from vllm.model_executor.layers.mamba.ops.causal_conv1d import ( @@ -47,15 +44,13 @@ composed_weight_loader, default_weight_loader, sharded_weight_loader) from vllm.model_executor.models.interfaces import (HasInnerState, IsHybrid, SupportsPP) -from vllm.model_executor.models.mamba_cache import (MambaCacheManager, - MambaCacheParams) from vllm.model_executor.models.utils import ( is_pp_missing_parameter, make_empty_intermediate_tensors_factory, make_layers, maybe_prefix) from vllm.model_executor.utils import set_weight_attrs from vllm.platforms import current_platform from vllm.sequence import IntermediateTensors -from vllm.utils import LayerBlockType, direct_register_custom_op +from vllm.utils import direct_register_custom_op from vllm.v1.attention.backends.mamba2_attn import Mamba2AttentionMetadata @@ -194,17 +189,13 @@ def __init__(self, self.chunk_size = self.config.mamba_chunk_size - if envs.VLLM_USE_V1: - compilation_config = get_current_vllm_config().compilation_config - if prefix in compilation_config.static_forward_context: - raise ValueError(f"Duplicate layer name: {prefix}") - compilation_config.static_forward_context[prefix] = self - # The outer list is for v0 PP virtual engine. Though this code path - # only runs for v1, we have to do this to unify with the interface - # of Attention + v0 PP. - # The inner tuple is (conv_state, ssm_state) - self.kv_cache = [(torch.tensor([]), torch.tensor([]))] - assert self.chunk_size != -1, "chunk_size must be set for v1" + compilation_config = get_current_vllm_config().compilation_config + if prefix in compilation_config.static_forward_context: + raise ValueError(f"Duplicate layer name: {prefix}") + compilation_config.static_forward_context[prefix] = self + # The tuple is (conv_state, ssm_state) + self.kv_cache = (torch.tensor([]), torch.tensor([])) + assert self.chunk_size != -1, "chunk_size must be set for v1" self.prefix = prefix @@ -227,8 +218,6 @@ def forward_native( self, hidden_states: torch.Tensor, output: torch.Tensor, - mamba_cache_params: MambaCacheParams, - mamba2_metadata: Mamba2Metadata, **kwargs, ): pass @@ -237,59 +226,43 @@ def forward( self, hidden_states: torch.Tensor, output: torch.Tensor, - mamba_cache_params: MambaCacheParams, - mamba2_metadata: Mamba2Metadata, **kwargs, ): - if not envs.VLLM_USE_V1: - CustomOp.forward(self, hidden_states, output, mamba_cache_params, - mamba2_metadata) - else: - torch.ops.vllm.plamo2_mamba_mixer( - hidden_states, - output, - self.prefix, - ) + torch.ops.vllm.plamo2_mamba_mixer( + hidden_states, + output, + self.prefix, + ) def forward_cuda( self, hidden_states: torch.Tensor, output: torch.Tensor, - mamba_cache_params: MambaCacheParams, - mamba2_metadata: Mamba2Metadata, **kwargs, ): forward_context = get_forward_context() - # mamba2_metadata contains metadata necessary for the mamba2 triton + # attn_metadata contains metadata necessary for the mamba2 triton # kernels to operate in continuous batching and in chunked prefill # modes; they are computed at top-level model forward since they # stay the same and reused for all mamba layers in the same iteration attn_metadata: AttentionMetadata = forward_context.attn_metadata - if envs.VLLM_USE_V1: - if attn_metadata is not None: - assert isinstance(attn_metadata, dict) - attn_metadata = attn_metadata[self.prefix] - mamba2_metadata = attn_metadata - assert isinstance(attn_metadata, Mamba2AttentionMetadata) - self_kv_cache = self.kv_cache[forward_context.virtual_engine] - # conv_state = (..., dim, width-1) yet contiguous along 'dim' - conv_state = self_kv_cache[0].transpose(-1, -2) - ssm_state = self_kv_cache[1] - state_indices_tensor = attn_metadata.state_indices_tensor - else: - conv_state = mamba_cache_params.conv_state - ssm_state = mamba_cache_params.ssm_state - state_indices_tensor = mamba_cache_params.state_indices_tensor - - # Common members between V1 metadata and V0 metadata - if mamba2_metadata is not None: - has_initial_states_p = mamba2_metadata.has_initial_states_p - prep_initial_states = mamba2_metadata.prep_initial_states - chunk_size = mamba2_metadata.chunk_size - seq_idx_p = mamba2_metadata.seq_idx_p - chunk_indices_p = mamba2_metadata.chunk_indices_p - chunk_offsets_p = mamba2_metadata.chunk_offsets_p + + if attn_metadata is not None: + assert isinstance(attn_metadata, dict) + attn_metadata = attn_metadata[self.prefix] + assert isinstance(attn_metadata, Mamba2AttentionMetadata) + self_kv_cache = self.kv_cache[forward_context.virtual_engine] + # conv_state = (..., dim, width-1) yet contiguous along 'dim' + conv_state = self_kv_cache[0].transpose(-1, -2) + ssm_state = self_kv_cache[1] + state_indices_tensor = attn_metadata.state_indices_tensor + has_initial_states_p = attn_metadata.has_initial_states_p + prep_initial_states = attn_metadata.prep_initial_states + chunk_size = attn_metadata.chunk_size + seq_idx_p = attn_metadata.seq_idx_p + chunk_indices_p = attn_metadata.chunk_indices_p + chunk_offsets_p = attn_metadata.chunk_offsets_p # 1. Gated MLP's linear projection projected_states = self.in_proj(hidden_states) @@ -299,8 +272,8 @@ def forward_cuda( conv_weights = self.conv1d.weight.view(self.conv1d.weight.size(0), self.conv1d.weight.size(2)) - if envs.VLLM_USE_V1 and attn_metadata is None: - # V1 profile run + if attn_metadata is None: + # profile run hidden_states = (hidden_states.transpose(0, 1).clone().transpose( 0, 1)).contiguous() output[:] = self.out_proj(hidden_states) @@ -316,42 +289,23 @@ def forward_cuda( # NOTE: V0 put prefill before decode, v1 puts decode before prefill # Separate prefill and decode by splitting varlen input # Split along token dimension - if envs.VLLM_USE_V1: - hidden_states_d, hidden_states_p = torch.split( - hidden_states[:num_actual_tokens], - [num_decodes, num_prefill_tokens], - dim=0, - ) - gate_d, gate_p = torch.split(gate[:num_actual_tokens], - [num_decodes, num_prefill_tokens], - dim=0) - # Split along batch dimension - state_indices_tensor_d, state_indices_tensor_p = torch.split( - state_indices_tensor, - [num_decodes, num_prefills], - dim=0, - ) - query_start_loc_p = ( - attn_metadata.query_start_loc[-num_prefills - 1:] - - num_decodes if has_prefill else None) - else: - hidden_states_p, hidden_states_d = torch.split( - hidden_states, - [num_prefill_tokens, num_decodes], - dim=0, - ) - gate_p, gate_d = torch.split(gate, - [num_prefill_tokens, num_decodes], - dim=0) - # Split along batch dimension - state_indices_tensor_p, state_indices_tensor_d = torch.split( - state_indices_tensor, - [num_prefills, num_decodes], - dim=0, - ) - query_start_loc_p = (attn_metadata.query_start_loc[:num_prefills + - 1] - if has_prefill else None) + hidden_states_d, hidden_states_p = torch.split( + hidden_states[:num_actual_tokens], + [num_decodes, num_prefill_tokens], + dim=0, + ) + gate_d, gate_p = torch.split(gate[:num_actual_tokens], + [num_decodes, num_prefill_tokens], + dim=0) + # Split along batch dimension + state_indices_tensor_d, state_indices_tensor_p = torch.split( + state_indices_tensor, + [num_decodes, num_prefills], + dim=0, + ) + query_start_loc_p = ( + attn_metadata.query_start_loc[-num_prefills - 1:] - + num_decodes if has_prefill else None) # Preallocate output tensor to avoid memcpy cost for merging prefill # and decode outputs @@ -363,18 +317,11 @@ def forward_cuda( dtype=hidden_states.dtype, device=hidden_states.device, ) - if envs.VLLM_USE_V1: - preallocated_ssm_out_d, preallocated_ssm_out_p = torch.split( - preallocated_ssm_out, - [num_decodes, num_prefill_tokens], - dim=0, - ) - else: - preallocated_ssm_out_p, preallocated_ssm_out_d = torch.split( - preallocated_ssm_out, - [num_prefill_tokens, num_decodes], - dim=0, - ) + preallocated_ssm_out_d, preallocated_ssm_out_p = torch.split( + preallocated_ssm_out, + [num_decodes, num_prefill_tokens], + dim=0, + ) # Process prefill requests if has_prefill: @@ -383,9 +330,6 @@ def forward_cuda( # pointed to by "state_indices_tensor" x = hidden_states_p.transpose( 0, 1) # this is the form that causal-conv see - if mamba2_metadata.cu_seqlen is None: - mamba2_metadata = update_metadata(x, query_start_loc_p, - mamba2_metadata) hidden_states_p = causal_conv1d_fn( x, conv_weights, @@ -394,7 +338,7 @@ def forward_cuda( conv_states=conv_state, has_initial_state=has_initial_states_p, cache_indices=state_indices_tensor_p, - metadata=mamba2_metadata, + metadata=attn_metadata, query_start_loc=query_start_loc_p) hidden_states_p = hidden_states_p.transpose(0, 1) hidden_states_p = hidden_states_p[:num_prefill_tokens] @@ -470,7 +414,7 @@ def forward_cuda( -1, self.num_heads // self.tp_size, self.head_dim) # - the hidden is reshaped into (bs, num_heads, head_dim) - # - mamba_cache_params.ssm_state's slots will be selected + # - ssm_state's slots will be selected # using state_indices_tensor_d # NOTE: final output is an in-place update of out tensor @@ -530,10 +474,7 @@ def plamo2_mamba_mixer( ) -> None: forward_context: ForwardContext = get_forward_context() self = forward_context.no_compile_layers[layer_name] - self.forward_cuda(hidden_states=hidden_states, - output=output, - mamba_cache_params=None, - mamba2_metadata=None) + self.forward_cuda(hidden_states=hidden_states, output=output) def plamo2_mamba_mixer_fake( @@ -731,8 +672,6 @@ def forward( positions: torch.Tensor, hidden_states: torch.Tensor, residual: Optional[torch.Tensor], - mamba_cache_params: MambaCacheParams, - mamba2_metadata: Mamba2Metadata, **kwargs, ): if residual is None: @@ -747,8 +686,6 @@ def forward( output = torch.empty_like(hidden_states) mixer_kwargs = { "output": output, - "mamba_cache_params": mamba_cache_params, - "mamba2_metadata": mamba2_metadata, } else: mixer_kwargs = { @@ -790,23 +727,12 @@ def forward( positions: torch.Tensor, hidden_states: torch.Tensor, residual: Optional[torch.Tensor], - mamba_cache_params: MambaCacheParams, - mamba2_metadata: Mamba2Metadata, ) -> torch.Tensor: - mamba_cache_index = 0 for layer in islice(self.layers, self.start_layer, self.end_layer): - layer_mamba_cache_params = None - if layer.is_mamba and mamba_cache_params is not None: - layer_mamba_cache_params = mamba_cache_params.at_layer_idx( - mamba_cache_index) - mamba_cache_index += 1 - hidden_states, residual = layer( positions=positions, hidden_states=hidden_states, residual=residual, - mamba_cache_params=layer_mamba_cache_params, - mamba2_metadata=mamba2_metadata, ) return hidden_states, residual @@ -844,7 +770,6 @@ def forward( self, input_ids: torch.Tensor, positions: torch.Tensor, - mamba_cache_params: MambaCacheParams, intermediate_tensors: Optional[IntermediateTensors] = None, inputs_embeds: Optional[torch.Tensor] = None, ) -> torch.Tensor: @@ -859,23 +784,10 @@ def forward( hidden_states = intermediate_tensors["hidden_states"] residual = intermediate_tensors["residual"] - if not envs.VLLM_USE_V1: - attn_metadata: AttentionMetadata = get_forward_context( - ).attn_metadata - mamba2_metadata = prepare_mamba2_metadata( - chunk_size=self.config.mamba_chunk_size, - attn_metadata=attn_metadata, - ) - else: - # v1 get mamba2_metadata from forward_context - mamba2_metadata = None - hidden_states, residual = self.layers( positions=positions, hidden_states=hidden_states, residual=residual, - mamba_cache_params=mamba_cache_params, - mamba2_metadata=mamba2_metadata, ) if not get_pp_group().is_last_rank: return IntermediateTensors({ @@ -925,9 +837,6 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = "") -> None: if self.config.tie_word_embeddings: self.lm_head = self.lm_head.tie_weights(self.model.embed_tokens) - # Used to track and store by the Mamba cache between steps. - self.mamba_cache: Optional[MambaCacheManager] = None - self.logits_processor = LogitsProcessor(self.unpadded_vocab_size, self.config.vocab_size) self.make_empty_intermediate_tensors = ( @@ -942,39 +851,11 @@ def forward(self, intermediate_tensors: Optional[IntermediateTensors] = None, inputs_embeds: Optional[torch.Tensor] = None, **kwargs): - if not envs.VLLM_USE_V1: - if self.mamba_cache is None: - num_mamba_layers = ( - self.model_config.get_num_layers_by_block_type( - self.vllm_config.parallel_config, - LayerBlockType.mamba)) - - mamba_state_shape = self.get_mamba_state_shape_from_config( - self.vllm_config, use_v1=False) - mamba_state_dtype = \ - self.get_mamba_state_dtype_from_config( - self.vllm_config) - self.mamba_cache = MambaCacheManager(self.vllm_config, - num_mamba_layers, - *mamba_state_shape, - *mamba_state_dtype) - - mamba_cache_params = self.mamba_cache.current_run_tensors(**kwargs) - else: - # NOTE: mamba_cache_params is not needed for v1 - mamba_cache_params = None - hidden_states = self.model(input_ids, positions, mamba_cache_params, - intermediate_tensors, inputs_embeds) + hidden_states = self.model(input_ids, positions, intermediate_tensors, + inputs_embeds) return hidden_states - def copy_inputs_before_cuda_graphs(self, input_buffers, **kwargs): - return self.mamba_cache.copy_inputs_before_cuda_graphs( - input_buffers, **kwargs) - - def get_seqlen_agnostic_capture_inputs(self, batch_size: int): - return self.mamba_cache.get_seqlen_agnostic_capture_inputs(batch_size) - @classmethod def get_mamba_state_dtype_from_config( cls, @@ -991,12 +872,10 @@ def get_mamba_state_dtype_from_config( def get_mamba_state_shape_from_config( cls, vllm_config: "VllmConfig", - use_v1: bool = True, ) -> tuple[tuple[int, int], tuple[int, int, int]]: """Calculate shapes for Mamba's convolutional and state caches. Args: vllm_config: vLLM config - use_v1: Get shapes for V1 (or V0) Returns: Tuple containing: - conv_state_shape: Shape for convolutional state cache @@ -1015,7 +894,6 @@ def get_mamba_state_shape_from_config( head_dim=hf_config.hidden_size_per_head, state_size=hf_config.mamba_d_state, conv_kernel=hf_config.mamba_d_conv, - use_v1=use_v1, ) def compute_logits( diff --git a/vllm/model_executor/models/qwen3_next.py b/vllm/model_executor/models/qwen3_next.py index 24cebc5bfdd8..ab23b494e561 100644 --- a/vllm/model_executor/models/qwen3_next.py +++ b/vllm/model_executor/models/qwen3_next.py @@ -11,7 +11,6 @@ from torch import nn from transformers.activations import ACT2FN -from vllm import envs from vllm.attention import Attention, AttentionBackend, AttentionMetadata from vllm.compilation.decorators import support_torch_compile from vllm.config import (CacheConfig, ModelConfig, SpeculativeConfig, @@ -35,7 +34,6 @@ RowParallelLinear) from vllm.model_executor.layers.logits_processor import LogitsProcessor from vllm.model_executor.layers.mamba.abstract import MambaBase -from vllm.model_executor.layers.mamba.mamba2_metadata import update_metadata from vllm.model_executor.layers.mamba.mamba_mixer2 import ( mamba_v2_sharded_weight_loader) from vllm.model_executor.layers.mamba.mamba_utils import ( @@ -51,7 +49,6 @@ DEFAULT_VOCAB_PADDING_SIZE, ParallelLMHead, VocabParallelEmbedding) from vllm.model_executor.model_loader.weight_utils import ( default_weight_loader, sharded_weight_loader) -from vllm.model_executor.models.mamba_cache import MambaCacheParams from vllm.model_executor.models.qwen2_moe import Qwen2MoeMLP as Qwen3NextMLP from vllm.model_executor.utils import set_weight_attrs from vllm.platforms import current_platform @@ -198,14 +195,8 @@ def get_state_dtype(self) -> tuple[torch.dtype, torch.dtype]: def get_state_shape(self) -> tuple[tuple[int, ...], tuple[int, ...]]: return MambaStateShapeCalculator.gated_delta_net_state_shape( - self.tp_size, - self.num_k_heads, - self.num_v_heads, - self.head_k_dim, - self.head_v_dim, - self.conv_kernel_size, - self.num_spec, - use_v1=True) + self.tp_size, self.num_k_heads, self.num_v_heads, self.head_k_dim, + self.head_v_dim, self.conv_kernel_size, self.num_spec) def __init__( self, @@ -394,7 +385,6 @@ def forward( self, hidden_states: torch.Tensor, output: torch.Tensor, - cache_params: Optional[MambaCacheParams] = None, ): return torch.ops.vllm.gdn_attention( hidden_states, @@ -416,7 +406,6 @@ def _forward( assert isinstance(attn_metadata, dict) attn_metadata = attn_metadata[self.prefix] - conv_metadata = attn_metadata assert isinstance(attn_metadata, GDNAttentionMetadata) has_initial_state = attn_metadata.has_initial_state spec_query_start_loc = attn_metadata.spec_query_start_loc @@ -479,12 +468,8 @@ def _forward( # 2.2: process the remaining part if attn_metadata.num_prefills > 0: mixed_qkv_non_spec_T = mixed_qkv_non_spec.transpose(0, 1) - if conv_metadata.cu_seqlen is None: - conv_metadata = update_metadata(mixed_qkv_non_spec_T, - non_spec_query_start_loc, - conv_metadata) # - "cache_indices" updates the conv_state cache in positions - # pointed to by "mamba_cache_params.state_indices_tensor" + # pointed to by "state_indices_tensor" mixed_qkv_non_spec = causal_conv1d_fn( mixed_qkv_non_spec_T, conv_weights, @@ -494,7 +479,7 @@ def _forward( has_initial_state=has_initial_state, cache_indices=non_spec_state_indices_tensor, query_start_loc=non_spec_query_start_loc, - metadata=conv_metadata, + metadata=attn_metadata, ).transpose(0, 1) elif attn_metadata.num_decodes > 0: mixed_qkv_non_spec = causal_conv1d_update( @@ -1075,7 +1060,6 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): scheduler_config = vllm_config.scheduler_config assert not cache_config.enable_prefix_caching, \ "Qwen3Next currently does not support prefix caching" - assert envs.VLLM_USE_V1, "Qwen3Next requires VLLM_USE_V1" self.quant_config = vllm_config.quant_config super().__init__() @@ -1195,14 +1179,10 @@ def get_mamba_state_shape_from_config( num_spec = (vllm_config.speculative_config.num_speculative_tokens if vllm_config.speculative_config else 0) return MambaStateShapeCalculator.gated_delta_net_state_shape( - tp_size, - hf_config.linear_num_key_heads, - hf_config.linear_num_value_heads, - hf_config.linear_key_head_dim, - hf_config.linear_value_head_dim, - hf_config.linear_conv_kernel_dim, - num_spec, - use_v1=True) + tp_size, hf_config.linear_num_key_heads, + hf_config.linear_num_value_heads, hf_config.linear_key_head_dim, + hf_config.linear_value_head_dim, hf_config.linear_conv_kernel_dim, + num_spec) def compute_logits( self, diff --git a/vllm/model_executor/models/registry.py b/vllm/model_executor/models/registry.py index 86123bc092b9..6ab3fa902c38 100644 --- a/vllm/model_executor/models/registry.py +++ b/vllm/model_executor/models/registry.py @@ -134,7 +134,6 @@ "PhiForCausalLM": ("phi", "PhiForCausalLM"), "Phi3ForCausalLM": ("phi3", "Phi3ForCausalLM"), "PhiMoEForCausalLM": ("phimoe", "PhiMoEForCausalLM"), - "Phi4FlashForCausalLM": ("phi4flash", "Phi4FlashForCausalLM"), "Plamo2ForCausalLM": ("plamo2", "Plamo2ForCausalLM"), "QWenLMHeadModel": ("qwen", "QWenLMHeadModel"), "Qwen2ForCausalLM": ("qwen2", "Qwen2ForCausalLM"), diff --git a/vllm/model_executor/models/zamba2.py b/vllm/model_executor/models/zamba2.py index 4350e38e02f9..a0d93045b74c 100644 --- a/vllm/model_executor/models/zamba2.py +++ b/vllm/model_executor/models/zamba2.py @@ -15,12 +15,10 @@ from torch import nn from transformers import Zamba2Config -from vllm import envs from vllm.attention.layer import Attention from vllm.compilation.decorators import support_torch_compile from vllm.config import CacheConfig, ModelConfig, VllmConfig from vllm.distributed import get_tensor_model_parallel_world_size -from vllm.forward_context import get_forward_context from vllm.model_executor.layers.activation import GeluAndMul from vllm.model_executor.layers.layernorm import RMSNorm from vllm.model_executor.layers.linear import (ColumnParallelLinear, @@ -29,8 +27,6 @@ ReplicatedLinear, RowParallelLinear) from vllm.model_executor.layers.logits_processor import LogitsProcessor -from vllm.model_executor.layers.mamba.mamba2_metadata import ( - Mamba2Metadata, prepare_mamba2_metadata) from vllm.model_executor.layers.mamba.mamba_mixer2 import MambaMixer2 from vllm.model_executor.layers.mamba.mamba_utils import ( MambaStateDtypeCalculator, MambaStateShapeCalculator) @@ -39,8 +35,6 @@ from vllm.model_executor.layers.vocab_parallel_embedding import ( DEFAULT_VOCAB_PADDING_SIZE, ParallelLMHead, VocabParallelEmbedding) from vllm.model_executor.model_loader.weight_utils import default_weight_loader -from vllm.model_executor.models.mamba_cache import (MambaCacheManager, - MambaCacheParams) from vllm.sequence import IntermediateTensors from .interfaces import HasInnerState, IsHybrid @@ -515,8 +509,6 @@ def __init__(self, def forward( self, hidden_states: torch.Tensor, - mamba_cache_params: MambaCacheParams, - mamba2_metadata: Mamba2Metadata, transformer_hidden_states: Optional[torch.Tensor] = None, positions: Optional[torch.Tensor] = None, original_hidden_states: Optional[torch.Tensor] = None, @@ -525,8 +517,6 @@ def forward( Args: hidden_states: Input tensor [batch_size, seq_len, hidden_size] - mamba_cache_params: Parameters for Mamba's state caches - (one for conv, one for ssm) transformer_hidden_states: Optional output from transformer path Added to input if provided (used in hybrid architecture) positions: Optional position IDs (unused in Mamba) @@ -555,8 +545,6 @@ def forward( self.mamba( hidden_states, output, - mamba_cache_params=mamba_cache_params, - mamba2_metadata=mamba2_metadata, ) # residual connection after mamba @@ -607,8 +595,6 @@ def forward( hidden_states: torch.Tensor, original_hidden_states: torch.Tensor, positions: torch.Tensor, - mamba_cache_params: MambaCacheParams, - mamba2_metadata: Mamba2Metadata, ) -> torch.Tensor: """Forward pass through the hybrid layer. @@ -623,8 +609,6 @@ def forward( original_hidden_states: Original input for transformer residual connection positions: Position IDs for positional embeddings - mamba_cache_params: Parameters for Mamba's state caches - (one for conv, one for ssm) Returns: Output tensor combining transformer and Mamba representations @@ -644,8 +628,6 @@ def forward( layer_outputs = self.mamba_decoder( hidden_states, transformer_hidden_states=transformer_hidden_states, - mamba_cache_params=mamba_cache_params, - mamba2_metadata=mamba2_metadata, ) return layer_outputs @@ -752,7 +734,6 @@ def forward( self, input_ids: torch.Tensor, positions: torch.Tensor, - mamba_cache_params: MambaCacheParams, inputs_embeds: Optional[torch.Tensor] = None, ) -> Union[torch.Tensor, IntermediateTensors]: """Forward pass through the model. @@ -760,8 +741,6 @@ def forward( Args: input_ids: Input token IDs positions: Position IDs for embeddings - mamba_cache_params: Parameters for Mamba's state caches - (one for conv, one for ssm) inputs_embeds: Optional pre-computed input embeddings Returns: @@ -773,33 +752,13 @@ def forward( inputs_embeds = self.get_input_embeddings(input_ids) hidden_states = inputs_embeds - attn_metadata = get_forward_context().attn_metadata - - if not envs.VLLM_USE_V1: - mamba2_metadata = prepare_mamba2_metadata( - chunk_size=self.config.chunk_size, - attn_metadata=attn_metadata, - ) - else: - # v1 get mamba2_metadata from forward_context - mamba2_metadata = None - # Process through layers original_hidden_states = torch.clone(hidden_states) for layer_idx, layer in enumerate(self.layers): - - layer_mamba_cache_params = None - if (isinstance(layer, (Zamba2HybridLayer, Zamba2MambaDecoderLayer)) - and mamba_cache_params): - layer_mamba_cache_params = mamba_cache_params.at_layer_idx( - layer_idx) - layer_outputs = layer( hidden_states, original_hidden_states=original_hidden_states, positions=positions, - mamba_cache_params=layer_mamba_cache_params, - mamba2_metadata=mamba2_metadata, ) hidden_states = layer_outputs @@ -870,13 +829,11 @@ def get_mamba_state_dtype_from_config( def get_mamba_state_shape_from_config( cls, vllm_config: "VllmConfig", - use_v1: bool = True, ) -> tuple[tuple[int, int], tuple[int, int, int]]: """Calculate shapes for Mamba's convolutional and state caches. Args: vllm_config: vLLM config - use_v1: Get shapes for V1 (or V0) Returns: Tuple containing: @@ -896,7 +853,6 @@ def get_mamba_state_shape_from_config( head_dim=hf_config.mamba_headdim, state_size=hf_config.mamba_d_state, conv_kernel=hf_config.mamba_d_conv, - use_v1=use_v1, ) def __init__(self, *, vllm_config: VllmConfig, prefix: str = "") -> None: @@ -945,9 +901,6 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = "") -> None: # Tie weights with input embeddings if using same dimensions self.lm_head = self.lm_head.tie_weights(self.model.embed_tokens) - # Used to track and store by the Mamba cache between steps. - self.mamba_cache: Optional[MambaCacheManager] = None - # Initialize logits processing and sampling self.logits_processor = LogitsProcessor(self.unpadded_vocab_size, config.vocab_size) @@ -977,61 +930,15 @@ def forward(self, Returns: Output hidden states """ - # Initialize Mamba cache if needed - mamba_cache_params = None - if not envs.VLLM_USE_V1: - if self.mamba_cache is None: - num_mamba_layers = self.config.num_hidden_layers - mamba_state_shape = \ - self.get_mamba_state_shape_from_config( - self.vllm_config, use_v1=False) - mamba_state_dtype = \ - self.get_mamba_state_dtype_from_config( - self.vllm_config) - self.mamba_cache = MambaCacheManager(self.vllm_config, - num_mamba_layers, - *mamba_state_shape, - *mamba_state_dtype) - - # Get cache parameters for current run - mamba_cache_params = self.mamba_cache.current_run_tensors(**kwargs) - # Forward pass through model hidden_states = self.model( input_ids, positions, - mamba_cache_params, inputs_embeds, ) return hidden_states - def copy_inputs_before_cuda_graphs( - self, input_buffers: dict[str, torch.Tensor], - **kwargs: Any) -> dict[str, torch.Tensor]: - """Copy inputs before CUDA graph capture. - - Args: - input_buffers: Dictionary of input tensors - **kwargs: Additional arguments passed to cache manager - - Returns: - Updated input buffers - """ - return self.mamba_cache.copy_inputs_before_cuda_graphs( - input_buffers, **kwargs) - - def get_seqlen_agnostic_capture_inputs( - self, batch_size: int) -> dict[str, torch.Tensor]: - """Get inputs for sequence-length-agnostic graph capture. - - Args: - batch_size: Size of batch to capture - Returns: - Dictionary of capture inputs - """ - return self.mamba_cache.get_seqlen_agnostic_capture_inputs(batch_size) - def compute_logits( self, hidden_states: torch.Tensor, diff --git a/vllm/v1/attention/backends/gdn_attn.py b/vllm/v1/attention/backends/gdn_attn.py index 5dadc52d0fb1..06a87a4a3c8b 100644 --- a/vllm/v1/attention/backends/gdn_attn.py +++ b/vllm/v1/attention/backends/gdn_attn.py @@ -12,6 +12,7 @@ from vllm.v1.attention.backends.utils import (AttentionCGSupport, AttentionMetadataBuilder, CommonAttentionMetadata, + compute_causal_conv1d_metadata, split_decodes_and_prefills) from vllm.v1.kv_cache_interface import AttentionSpec, MambaSpec @@ -52,7 +53,6 @@ class GDNAttentionMetadata: # The following attributes are for triton implementation of causal_conv1d nums_dict: Optional[dict] = None - cu_seqlen: Optional[int] = None batch_ptr: Optional[torch.Tensor] = None token_chunk_offset_ptr: Optional[torch.Tensor] = None @@ -134,6 +134,7 @@ def build( # type: ignore[override] context_lens = m.num_computed_tokens_cpu context_lens_tensor = context_lens.to(query_start_loc.device) seq_lens_tensor = m.seq_lens + nums_dict, batch_ptr, token_chunk_offset_ptr = None, None, None if (not self.use_spec_decode or num_draft_tokens is None or num_draft_tokens.sum().item() == 0): @@ -210,6 +211,8 @@ def build( # type: ignore[override] has_initial_state = context_lens_tensor > 0 if spec_sequence_masks is not None: has_initial_state = has_initial_state[~spec_sequence_masks] + nums_dict, batch_ptr, token_chunk_offset_ptr = \ + compute_causal_conv1d_metadata(non_spec_query_start_loc) else: has_initial_state = None num_actual_tokens = num_prefill_tokens + num_decode_tokens + \ @@ -297,6 +300,9 @@ def build( # type: ignore[override] spec_sequence_masks=spec_sequence_masks, spec_token_masks=spec_token_masks, num_accepted_tokens=num_accepted_tokens, + nums_dict=nums_dict, + batch_ptr=batch_ptr, + token_chunk_offset_ptr=token_chunk_offset_ptr, ) return attn_metadata diff --git a/vllm/v1/attention/backends/mamba2_attn.py b/vllm/v1/attention/backends/mamba2_attn.py index 2fe1f14ca1db..f45fc75334a2 100644 --- a/vllm/v1/attention/backends/mamba2_attn.py +++ b/vllm/v1/attention/backends/mamba2_attn.py @@ -7,11 +7,12 @@ import torch from vllm.attention.backends.abstract import AttentionBackend -from vllm.attention.backends.utils import PAD_SLOT_ID from vllm.config import VllmConfig from vllm.v1.attention.backends.mamba_attn import ( BaseMambaAttentionMetadataBuilder) -from vllm.v1.attention.backends.utils import (CommonAttentionMetadata, +from vllm.v1.attention.backends.utils import (PAD_SLOT_ID, + CommonAttentionMetadata, + compute_causal_conv1d_metadata, split_decodes_and_prefills) from vllm.v1.kv_cache_interface import AttentionSpec @@ -131,7 +132,6 @@ class Mamba2AttentionMetadata: # The following attributes are for triton implementation of causal_conv1d nums_dict: Optional[dict] = None - cu_seqlen: Optional[int] = None batch_ptr: Optional[torch.Tensor] = None token_chunk_offset_ptr: Optional[torch.Tensor] = None @@ -161,6 +161,9 @@ def build(self, has_initial_states_p = None prep_initial_states = False + # for causal_conv1d + nums_dict, batch_ptr, token_chunk_offset_ptr = None, None, None + state_indices_tensor = common_attn_metadata.block_table_tensor[:, 0] num_decodes, num_prefills, num_decode_tokens, num_prefill_tokens = ( @@ -198,6 +201,9 @@ def build(self, query_start_loc_p, self.chunk_size, num_prefill_tokens)) + nums_dict, batch_ptr, token_chunk_offset_ptr = \ + compute_causal_conv1d_metadata(query_start_loc_p) + elif num_decodes <= self.decode_cudagraph_max_bs: # Pad state tensor for CUDA graph num_input_tokens = self.vllm_config.pad_for_cudagraph(num_decodes) @@ -220,5 +226,8 @@ def build(self, chunk_indices_p=chunk_indices_p, chunk_offsets_p=chunk_offsets_p, state_indices_tensor=state_indices_tensor, + nums_dict=nums_dict, + batch_ptr=batch_ptr, + token_chunk_offset_ptr=token_chunk_offset_ptr, ) return attn_metadata diff --git a/vllm/v1/attention/backends/short_conv_attn.py b/vllm/v1/attention/backends/short_conv_attn.py index 717c40b37ecf..428e40965979 100644 --- a/vllm/v1/attention/backends/short_conv_attn.py +++ b/vllm/v1/attention/backends/short_conv_attn.py @@ -9,6 +9,7 @@ from vllm.config import VllmConfig from vllm.v1.attention.backends.utils import (AttentionMetadataBuilder, CommonAttentionMetadata, + compute_causal_conv1d_metadata, split_decodes_and_prefills) from vllm.v1.kv_cache_interface import AttentionSpec, MambaSpec @@ -33,7 +34,6 @@ class ShortConvAttentionMetadata: # For causal_conv1d nums_dict: Optional[dict] = None - cu_seqlen: Optional[int] = None batch_ptr: Optional[torch.Tensor] = None token_chunk_offset_ptr: Optional[torch.Tensor] = None @@ -57,6 +57,9 @@ def build(self, state_indices_tensor = common_attn_metadata.block_table_tensor[:, 0] + # for causal_conv1d + nums_dict, batch_ptr, token_chunk_offset_ptr = None, None, None + num_decodes, num_prefills, num_decode_tokens, num_prefill_tokens = ( split_decodes_and_prefills( common_attn_metadata, @@ -70,6 +73,12 @@ def build(self, has_initial_states = has_initial_states_cpu.to( query_start_loc.device) + query_start_loc_p = common_attn_metadata.query_start_loc[ + -num_prefills - 1:] - num_decode_tokens + + nums_dict, batch_ptr, token_chunk_offset_ptr = \ + compute_causal_conv1d_metadata(query_start_loc_p) + attn_metadata = ShortConvAttentionMetadata( num_prefills=num_prefills, num_prefill_tokens=num_prefill_tokens, @@ -78,5 +87,8 @@ def build(self, query_start_loc=query_start_loc, has_initial_states=has_initial_states, state_indices_tensor=state_indices_tensor, + nums_dict=nums_dict, + batch_ptr=batch_ptr, + token_chunk_offset_ptr=token_chunk_offset_ptr, ) return attn_metadata diff --git a/vllm/v1/attention/backends/utils.py b/vllm/v1/attention/backends/utils.py index 63326d19194f..6ef489f5a7a2 100644 --- a/vllm/v1/attention/backends/utils.py +++ b/vllm/v1/attention/backends/utils.py @@ -34,6 +34,8 @@ KVCacheLayoutType = Literal["NHD", "HND"] _KV_CACHE_LAYOUT_OVERRIDE: Union[KVCacheLayoutType, None] = None +PAD_SLOT_ID = -1 + def is_valid_kv_cache_layout(value: str) -> bool: return value in get_args(KVCacheLayoutType) @@ -838,3 +840,52 @@ def __init__(self, metadata, common_attn_metadata): builder_cls=FastPrefillAttentionBuilder) return attn_backend + + +def compute_causal_conv1d_metadata(query_start_loc_p: torch.Tensor): + + # Needed for causal_conv1d + seqlens = query_start_loc_p.diff().to('cpu') + nums_dict = {} # type: ignore + batch_ptr = None + token_chunk_offset_ptr = None + for BLOCK_M in [8]: # cover all BLOCK_M values + nums = -(-seqlens // BLOCK_M) + nums_dict[BLOCK_M] = {} + nums_dict[BLOCK_M]['nums'] = nums + nums_dict[BLOCK_M]['tot'] = nums.sum().item() + mlist = torch.from_numpy(np.repeat(np.arange(len(nums)), nums)) + nums_dict[BLOCK_M]['mlist'] = mlist + mlist_len = len(nums_dict[BLOCK_M]['mlist']) + nums_dict[BLOCK_M]['mlist_len'] = mlist_len + MAX_NUM_PROGRAMS = max(1024, mlist_len) * 2 + offsetlist = [] # type: ignore + for idx, num in enumerate(nums): + offsetlist.extend(range(num)) + offsetlist = torch.tensor(offsetlist, dtype=torch.int32) + nums_dict[BLOCK_M]['offsetlist'] = offsetlist + + if batch_ptr is None: + # Update default value after class definition + batch_ptr = torch.full((MAX_NUM_PROGRAMS, ), + PAD_SLOT_ID, + dtype=torch.int32, + device='cuda') + token_chunk_offset_ptr = torch.full((MAX_NUM_PROGRAMS, ), + PAD_SLOT_ID, + dtype=torch.int32, + device='cuda') + else: + if batch_ptr.nelement() < MAX_NUM_PROGRAMS: + batch_ptr.resize_(MAX_NUM_PROGRAMS).fill_(PAD_SLOT_ID) + token_chunk_offset_ptr.resize_( # type: ignore + MAX_NUM_PROGRAMS).fill_(PAD_SLOT_ID) + + batch_ptr[0:mlist_len].copy_(mlist) + token_chunk_offset_ptr[ # type: ignore + 0:mlist_len].copy_(offsetlist) + nums_dict[BLOCK_M]['batch_ptr'] = batch_ptr + nums_dict[BLOCK_M]['token_chunk_offset_ptr'] = (token_chunk_offset_ptr + ) # type: ignore + + return nums_dict, batch_ptr, token_chunk_offset_ptr From cc1dc7ed6d2d901df46847724ad90147872a9d79 Mon Sep 17 00:00:00 2001 From: Lucas Wilkinson Date: Tue, 23 Sep 2025 12:02:10 -0400 Subject: [PATCH 275/518] [Core/DBO][2/N] Dual-Batch Overlap add DeepEP High Throughput support and Prefill support (#24845) Signed-off-by: Sage Moore Signed-off-by: Lucas Wilkinson Signed-off-by: yewentao256 Signed-off-by: Lucas Wilkinson Signed-off-by: Tyler Michael Smith Co-authored-by: Sage Moore Co-authored-by: yewentao256 Co-authored-by: Tyler Michael Smith --- .../v1/attention/test_attention_splitting.py | 83 ++++++++- tests/v1/spec_decode/test_eagle.py | 6 +- vllm/config/__init__.py | 12 +- vllm/config/parallel.py | 14 +- .../device_communicators/all2all.py | 28 ++- .../base_device_communicator.py | 6 + vllm/engine/arg_utils.py | 6 + vllm/envs.py | 11 ++ .../fused_moe/deepep_ht_prepare_finalize.py | 76 +++++--- .../fused_moe/deepep_ll_prepare_finalize.py | 10 +- .../layers/fused_moe/modular_kernel.py | 131 +++++++++---- vllm/v1/attention/backends/utils.py | 44 ++++- vllm/v1/spec_decode/eagle.py | 9 +- vllm/v1/worker/gpu_model_runner.py | 173 ++++++++---------- vllm/v1/worker/gpu_ubatch_wrapper.py | 87 ++++++++- vllm/v1/worker/ubatch_splitting.py | 78 +++++--- vllm/v1/worker/ubatch_utils.py | 8 + vllm/v1/worker/ubatching.py | 35 ++-- vllm/v1/worker/utils.py | 25 ++- 19 files changed, 604 insertions(+), 238 deletions(-) diff --git a/tests/v1/attention/test_attention_splitting.py b/tests/v1/attention/test_attention_splitting.py index c74dbb3ebb17..7d7a46910be8 100644 --- a/tests/v1/attention/test_attention_splitting.py +++ b/tests/v1/attention/test_attention_splitting.py @@ -5,11 +5,12 @@ import torch from tests.v1.attention.test_attention_backends import BATCH_SPECS -from tests.v1.attention.utils import create_common_attn_metadata +from tests.v1.attention.utils import BatchSpec, create_common_attn_metadata from vllm.v1.attention.backends.utils import (UBatchSlice, _make_metadata_with_slice, slice_query_start_locs, split_attn_metadata) +from vllm.v1.worker.ubatch_utils import create_ubatch_slices @pytest.fixture @@ -155,3 +156,83 @@ def test_split_attn_metadata_decode_batch(large_decode_metadata): assert results[1].num_reqs == mid_point assert results[1].num_actual_tokens == mid_point assert torch.equal(results[1].seq_lens, torch.tensor([2048] * mid_point)) + + +@pytest.mark.parametrize( + "seq_lens,query_lens,split_point,expected_first_reqs,expected_second_reqs", + [ + # Split in the middle of request 1 + ([32, 40], [8, 8], 12, 2, 1), + # Split inside the first request + ([32, 40], [8, 8], 4, 1, 2), + ], +) +def test_prefill_split_across_ubatches(seq_lens, query_lens, split_point, + expected_first_reqs, + expected_second_reqs): + """Test splitting a prefill across ubatches""" + import numpy as np + + device = torch.device("cpu") + batch_spec = BatchSpec(seq_lens=seq_lens, query_lens=query_lens) + common = create_common_attn_metadata(batch_spec, + block_size=16, + device=device) + + num_scheduled_tokens = np.array(query_lens, dtype=np.int32) + qsl_np = common.query_start_loc_cpu.numpy() + num_tokens = common.num_actual_tokens + + ubatch_slices = create_ubatch_slices(num_scheduled_tokens, split_point) + assert len(ubatch_slices) == 2 + + first_meta = _make_metadata_with_slice(ubatch_slices[0], common) + second_meta = _make_metadata_with_slice(ubatch_slices[1], common) + + # Token counts match the split + assert first_meta.num_actual_tokens == split_point + assert second_meta.num_actual_tokens == num_tokens - split_point + + # Number of requests per ubatch + assert first_meta.num_reqs == expected_first_reqs + assert second_meta.num_reqs == expected_second_reqs + + # Identify which request is split and how many tokens are in the first chunk + split_req_idx = int(np.searchsorted(qsl_np, split_point, side="right") - 1) + tokens_in_first_chunk = split_point - int(qsl_np[split_req_idx]) + orig_q_lens = (common.query_start_loc_cpu[1:] - + common.query_start_loc_cpu[:-1]) + + # Check query length continuity: first-chunk + second-chunk == original qlen + # First ubatch last request query length + qlen_first_last = int(first_meta.query_start_loc_cpu[-1] - + first_meta.query_start_loc_cpu[-2]) + # Second ubatch first request query length + qlen_second_first = int(second_meta.query_start_loc_cpu[1] - + second_meta.query_start_loc_cpu[0]) + assert qlen_first_last == tokens_in_first_chunk + assert qlen_first_last + qlen_second_first == int( + orig_q_lens[split_req_idx]) + + # Check seq_lens adjustments + # Context lengths per original request + context_lens = [s - q for s, q in zip(seq_lens, query_lens)] + + # First ubatch: last request's seq_len should be + # context + tokens_in_first_chunk + expected_seqlen = context_lens[split_req_idx] + tokens_in_first_chunk + assert int(first_meta.seq_lens[-1]) == expected_seqlen + + # For full preceding requests in first ubatch, seq_lens should match + # originals + for i in range(first_meta.num_reqs - 1): + assert int(first_meta.seq_lens[i]) == seq_lens[i] + + # Second ubatch: first request (continuation) seq_len should be full + # original + assert int(second_meta.seq_lens[0]) == seq_lens[split_req_idx] + # Any following full requests in second ubatch should match originals + for j in range(1, second_meta.num_reqs): + # Map to original request index + orig_idx = split_req_idx + j + assert int(second_meta.seq_lens[j]) == seq_lens[orig_idx] diff --git a/tests/v1/spec_decode/test_eagle.py b/tests/v1/spec_decode/test_eagle.py index e7f6b68fc3f7..23bfabfcf89b 100644 --- a/tests/v1/spec_decode/test_eagle.py +++ b/tests/v1/spec_decode/test_eagle.py @@ -532,9 +532,8 @@ def create_deterministic_logits(token_ids): # Mock runner for attention metadata building proposer.runner = mock.MagicMock() proposer.runner.attn_groups.append([mock.MagicMock()]) - proposer.runner.attn_groups[0][0].metadata_builders = [ + proposer.runner.attn_groups[0][0].get_metadata_builder.return_value = \ attn_metadata_builder - ] result = proposer.propose(target_token_ids=target_token_ids, target_positions=target_positions, @@ -659,9 +658,8 @@ def create_deterministic_logits(token_ids, k: int): # Mock runner for attention metadata building. proposer.runner = mock.MagicMock() proposer.runner.attn_groups.append([mock.MagicMock()]) - proposer.runner.attn_groups[0][0].metadata_builders = [ + proposer.runner.attn_groups[0][0].get_metadata_builder.return_value = \ attn_metadata_builder - ] # Setup inputs for the proposer. target_token_ids = torch.randint(0, diff --git a/vllm/config/__init__.py b/vllm/config/__init__.py index 92fc68f8927c..a2562a10b45a 100644 --- a/vllm/config/__init__.py +++ b/vllm/config/__init__.py @@ -638,11 +638,13 @@ def __post_init__(self): if self.parallel_config.enable_dbo: a2a_backend = envs.VLLM_ALL2ALL_BACKEND - assert a2a_backend == "deepep_low_latency", \ - "Microbatching currently only supports the deepep_low_latency "\ - f"all2all backend. {a2a_backend} is not supported. To fix set "\ - "the VLLM_ALL2ALL_BACKEND environment variable to "\ - "deepep_low_latency and install the DeepEP kerenls." + assert a2a_backend in \ + ["deepep_low_latency", "deepep_high_throughput"], \ + "Microbatching currently only supports the deepep_low_latency and "\ + f"deepep_high_throughput all2all backend. {a2a_backend} is not "\ + "supported. To fix set the VLLM_ALL2ALL_BACKEND environment "\ + "variable to deepep_low_latency or deepep_high_throughput and "\ + "install the DeepEP kernels." if not self.instance_id: self.instance_id = random_uuid()[:5] diff --git a/vllm/config/parallel.py b/vllm/config/parallel.py index a84d88243016..f80eb1adc7fd 100644 --- a/vllm/config/parallel.py +++ b/vllm/config/parallel.py @@ -139,12 +139,18 @@ class ParallelConfig: """Disable the custom all-reduce kernel and fall back to NCCL.""" enable_dbo: bool = False - """Enable microbatching for the model executor.""" + """Enable dual batch overlap for the model executor.""" dbo_decode_token_threshold: int = 32 - """The threshold for microbatching. If the number of tokens in the - request is greater than this threshold, microbatching will be used. - Otherwise, the request will be processed in a single batch.""" + """The threshold for dual batch overlap for batches only containing decodes. + If the number of tokens in the request is greater than this threshold, + microbatching will be used. Otherwise, the request will be processed in a + single batch.""" + dbo_prefill_token_threshold: int = 512 # TODO(lucas): tune + """The threshold for dual batch overlap for batches that contain one or more + prefills. If the number of tokens in the request is greater than this + threshold, microbatching will be used. Otherwise, the request will be + processed in a single batch.""" ray_workers_use_nsight: bool = False """Whether to profile Ray workers with nsight, see https://docs.ray.io/en/latest/ray-observability/user-guides/profiling.html#profiling-nsight-profiler.""" diff --git a/vllm/distributed/device_communicators/all2all.py b/vllm/distributed/device_communicators/all2all.py index 149df73d8667..ae18429f6251 100644 --- a/vllm/distributed/device_communicators/all2all.py +++ b/vllm/distributed/device_communicators/all2all.py @@ -1,10 +1,11 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -from typing import Any +from typing import Any, Optional import torch import torch.distributed as dist +import vllm.envs as envs from vllm.distributed import get_dp_group from vllm.forward_context import get_forward_context from vllm.logger import init_logger @@ -200,12 +201,12 @@ def __init__(self, cpu_group): def _make_all2all_kwargs(self) -> dict[Any, Any]: # Defaults for internode and intranode are taken from DeepEP tests. - num_nvl_bytes = 1024 * 1024 * 1024 + num_nvl_bytes = envs.VLLM_DEEPEP_BUFFER_SIZE_MB * 1024 * 1024 num_rdma_bytes = None num_qps_per_rank = None if self.internode: - num_rdma_bytes = 1024 * 1024 * 1024 + num_rdma_bytes = envs.VLLM_DEEPEP_BUFFER_SIZE_MB * 1024 * 1024 num_qps_per_rank = self.num_sms // 2 else: num_rdma_bytes = 0 @@ -230,13 +231,18 @@ def get_handle(self, kwargs): logger.debug("DeepEP all2all args %s", buffer_kwargs) handle: deep_ep.Buffer = self.handle_cache.get_or_create( buffer_kwargs, deep_ep.Buffer) - # It is dangerous to set num sms outside this function. num_sms is not - # a part of the hash-key that identifies this object. If we are in a - # situation where we make objects with different num_sms, the hash key - # in get_or_create must be updated. - handle.set_num_sms(self.num_sms) return handle + def set_num_sms(self, num_sms: int): + import deep_ep + + # Right now the buffers are sized for only what the kernels were + # created with. So we can only reduce the number of SMS used + # but not increase it. + if num_sms > self.num_sms: + num_sms = self.num_sms + deep_ep.Buffer.set_num_sms(num_sms) + class DeepEPLLAll2AllManager(DeepEPAll2AllManagerBase): """ @@ -265,7 +271,7 @@ def _make_all2all_kwargs( import deep_ep # Defaults for internode and intranode are taken from DeepEP tests. - num_nvl_bytes = 1024 * 1024 * 1024 + num_nvl_bytes = envs.VLLM_DEEPEP_BUFFER_SIZE_MB * 1024 * 1024 num_qps_per_rank = num_local_experts num_rdma_bytes = deep_ep.Buffer.get_low_latency_rdma_size_hint( num_max_dispatch_tokens_per_rank=max_num_tokens_per_dp_rank, @@ -291,3 +297,7 @@ def get_handle(self, kwargs): handle: deep_ep.Buffer = self.handle_cache.get_or_create( buffer_kwargs, deep_ep.Buffer) return handle + + # DeepEP LL uses RDMA so no SMs are used for communication + def max_sms_used(self) -> Optional[int]: + return 0 \ No newline at end of file diff --git a/vllm/distributed/device_communicators/base_device_communicator.py b/vllm/distributed/device_communicators/base_device_communicator.py index 01f59b44a0e6..586441c91783 100644 --- a/vllm/distributed/device_communicators/base_device_communicator.py +++ b/vllm/distributed/device_communicators/base_device_communicator.py @@ -60,6 +60,12 @@ def get_handle(self, kwargs): # and reuse it for the same config. raise NotImplementedError + def set_num_sms(self, num_sms: int): + pass + + def max_sms_used(self) -> Optional[int]: + return None # None means it could use the whole GPU + def dispatch(self, hidden_states: torch.Tensor, router_logits: torch.Tensor): raise NotImplementedError diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index 8c7a1b413cdb..556a490ffa10 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -330,6 +330,8 @@ class EngineArgs: enable_dbo: bool = ParallelConfig.enable_dbo dbo_decode_token_threshold: int = \ ParallelConfig.dbo_decode_token_threshold + dbo_prefill_token_threshold: int = \ + ParallelConfig.dbo_prefill_token_threshold eplb_config: EPLBConfig = get_field(ParallelConfig, "eplb_config") enable_eplb: bool = ParallelConfig.enable_eplb expert_placement_strategy: ExpertPlacementStrategy = \ @@ -698,6 +700,9 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser: parallel_group.add_argument( "--dbo-decode-token-threshold", **parallel_kwargs["dbo_decode_token_threshold"]) + parallel_group.add_argument( + "--dbo-prefill-token-threshold", + **parallel_kwargs["dbo_prefill_token_threshold"]) parallel_group.add_argument("--enable-eplb", **parallel_kwargs["enable_eplb"]) parallel_group.add_argument("--eplb-config", @@ -1316,6 +1321,7 @@ def create_engine_config( enable_expert_parallel=self.enable_expert_parallel, enable_dbo=self.enable_dbo, dbo_decode_token_threshold=self.dbo_decode_token_threshold, + dbo_prefill_token_threshold=self.dbo_prefill_token_threshold, enable_eplb=self.enable_eplb, eplb_config=self.eplb_config, expert_placement_strategy=self.expert_placement_strategy, diff --git a/vllm/envs.py b/vllm/envs.py index ee5efff8bcd9..f6eafe892ef2 100755 --- a/vllm/envs.py +++ b/vllm/envs.py @@ -189,6 +189,8 @@ VLLM_CUSTOM_SCOPES_FOR_PROFILING: bool = False VLLM_KV_EVENTS_USE_INT_BLOCK_HASHES: bool = True VLLM_OBJECT_STORAGE_SHM_BUFFER_NAME: str = "VLLM_OBJECT_STORAGE_SHM_BUFFER" + VLLM_DEEPEP_BUFFER_SIZE_MB: int = 1024 + VLLM_DBO_COMM_SMS: int = 20 GPT_OSS_SYSTEM_TOOL_MCP_LABELS: list[str] = [] VLLM_PATTERN_MATCH_DEBUG: Optional[str] = None @@ -1392,6 +1394,15 @@ def get_vllm_port() -> Optional[int]: lambda: os.getenv("VLLM_OBJECT_STORAGE_SHM_BUFFER_NAME", "VLLM_OBJECT_STORAGE_SHM_BUFFER"), + # The size in MB of the buffers (NVL and RDMA) used by DeepEP + "VLLM_DEEPEP_BUFFER_SIZE_MB": + lambda: int(os.getenv("VLLM_DEEPEP_BUFFER_SIZE_MB", "1024")), + + # The number of SMs to allocate for communication kernels when running DBO + # the rest of the SMs on the device will be allocated to compute + "VLLM_DBO_COMM_SMS": + lambda: int(os.getenv("VLLM_DBO_COMM_SMS", "20")), + # Valid values are container,code_interpreter,web_search_preview # ex GPT_OSS_SYSTEM_TOOL_MCP_LABELS=container,code_interpreter "GPT_OSS_SYSTEM_TOOL_MCP_LABELS": diff --git a/vllm/model_executor/layers/fused_moe/deepep_ht_prepare_finalize.py b/vllm/model_executor/layers/fused_moe/deepep_ht_prepare_finalize.py index a250a6218715..9e9a9afc18a0 100644 --- a/vllm/model_executor/layers/fused_moe/deepep_ht_prepare_finalize.py +++ b/vllm/model_executor/layers/fused_moe/deepep_ht_prepare_finalize.py @@ -12,6 +12,11 @@ from vllm.model_executor.layers.fused_moe.utils import ( moe_kernel_quantize_input) from vllm.utils import round_up +from vllm.v1.worker.ubatching import ( + dbo_current_ubatch_id, dbo_enabled, dbo_switch_to_comm, + dbo_switch_to_compute, dbo_switch_to_compute_sync, + dbo_yield_and_switch_from_comm_to_compute, + dbo_yield_and_switch_from_compute_to_comm) class DeepEPHTPrepareAndFinalize(mk.FusedMoEPrepareAndFinalize): @@ -46,9 +51,9 @@ def __init__(self, buffer: deep_ep.Buffer, num_dispatchers: int, self.async_prepare = True # The dispatch function returns a handle that the combine function - # requires. We store the handle here so it is available to the - # combine function. - self.handle = None + # requires. Under DBO microbatching we must track one handle per + # micro-batch to avoid races between threads. + self.handles = [None, None] # From https://github.com/deepseek-ai/DeepEP/blob/9fe9021f29c9083cd1808ab36b740208524d9f63/deep_ep/buffer.py#L164 self.available_rank_configs = [2, 4, 8, 16, 24, 32, 64, 128, 144, 160] @@ -89,6 +94,11 @@ def _do_dispatch( has_scales = token_scales is not None + # We yield before launching the dispatch kernel since the dispatch + # kernel will block the CPU so we want to queue up all the compute + # for the other ubatch before the dispatch kernel starts. + dbo_yield_and_switch_from_compute_to_comm() + (num_tokens_per_rank, num_tokens_per_rdma_rank, dispatch_expert_num_tokens, is_token_in_rank, event) = self.buffer.get_dispatch_layout( @@ -104,7 +114,7 @@ def _do_dispatch( ( token_data, expert_topk_ids, expert_topk_weights, - expert_num_tokens_per_expert_list, self.handle, event + expert_num_tokens_per_expert_list, handle, event ) = self.buffer.dispatch( x=token_data, handle=None, @@ -119,9 +129,15 @@ def _do_dispatch( expert_alignment=1, config=self._get_dispatch_config(), previous_event=None, - async_finish=self.async_prepare, + async_finish=self.async_prepare and not dbo_enabled(), allocate_on_comm_stream=False) + # record the handle for this ubatch + a2a_idx = dbo_current_ubatch_id() + self.handles[a2a_idx] = handle + + dbo_switch_to_compute_sync() + return lambda: self._receiver( event, has_scales, @@ -146,7 +162,7 @@ def _receiver( a1_scale: Optional[torch.Tensor], quant_config: FusedMoEQuantConfig, ) -> mk.PrepareResultType: - if self.async_prepare: + if event.event is not None: event.current_stream_wait() if has_scales: @@ -207,7 +223,7 @@ def prepare_async( expert_map: Optional[torch.Tensor], apply_router_weight_on_input: bool, quant_config: FusedMoEQuantConfig, - ) -> tuple[Callable, mk.ReceiverType]: + ) -> mk.ReceiverType: if apply_router_weight_on_input: topk = topk_ids.size(1) @@ -233,14 +249,13 @@ def prepare_async( a1q_scale = None a1_post_scale = quant_config.a1_scale - return (lambda *args: None, - self._do_dispatch(tokens=a1q, - token_scales=a1q_scale, - rank_topk_ids=topk_ids, - rank_topk_weights=topk_weights, - num_experts=num_experts, - a1_scale=a1_post_scale, - quant_config=quant_config)) + return self._do_dispatch(tokens=a1q, + token_scales=a1q_scale, + rank_topk_ids=topk_ids, + rank_topk_weights=topk_weights, + num_experts=num_experts, + a1_scale=a1_post_scale, + quant_config=quant_config) def prepare( self, @@ -252,10 +267,9 @@ def prepare( apply_router_weight_on_input: bool, quant_config: FusedMoEQuantConfig, ) -> mk.PrepareResultType: - (_, receiver) = self.prepare_async(a1, topk_weights, topk_ids, - num_experts, expert_map, - apply_router_weight_on_input, - quant_config) + receiver = self.prepare_async(a1, topk_weights, topk_ids, num_experts, + expert_map, apply_router_weight_on_input, + quant_config) return receiver() def _finalize( @@ -269,7 +283,9 @@ def _finalize( do_async: bool, ) -> Optional[Callable]: - assert self.handle is not None + a2a_idx = dbo_current_ubatch_id() + handle = self.handles[a2a_idx] + assert handle is not None # fused_expert_output can have 0 tokens - This happens when none of the # tokens from the all2all reach this EP rank. @@ -283,25 +299,35 @@ def _finalize( topk_ids=topk_ids, apply_router_weight_on_input=apply_router_weight_on_input, ) - + dbo_yield_and_switch_from_compute_to_comm() combined_x, _, event = self.buffer.combine( x=fused_expert_output, - handle=self.handle, + handle=handle, topk_weights=None, config=self._get_combine_config(), previous_event=None, - async_finish=do_async, + async_finish=do_async and not dbo_enabled(), allocate_on_comm_stream=False) + dbo_switch_to_compute() + if do_async: def _receiver(): - event.current_stream_wait() + if event.event is not None: + event.current_stream_wait() + dbo_switch_to_comm() # Respect inplace outputs. output.copy_(combined_x, non_blocking=True) - return lambda: _receiver() + # TODO(lucas): refactor the modular kernel so this will be + # handled there + dbo_yield_and_switch_from_comm_to_compute() + + return _receiver else: + # TODO(lucas): support this case with the refactored modular kernel + assert not dbo_enabled() # Respect inplace outputs. output.copy_(combined_x, non_blocking=True) return None diff --git a/vllm/model_executor/layers/fused_moe/deepep_ll_prepare_finalize.py b/vllm/model_executor/layers/fused_moe/deepep_ll_prepare_finalize.py index 101fc8798c42..a9554291db69 100644 --- a/vllm/model_executor/layers/fused_moe/deepep_ll_prepare_finalize.py +++ b/vllm/model_executor/layers/fused_moe/deepep_ll_prepare_finalize.py @@ -206,7 +206,7 @@ def _finalize( apply_router_weight_on_input: bool, weight_and_reduce_impl: mk.TopKWeightAndReduce, do_async: bool, - ) -> Optional[Callable]: + ) -> tuple[Callable, Callable]: assert isinstance( weight_and_reduce_impl, TopKWeightAndReduceDelegate ), ("Weight application and reduction happens in the combine kernel.") @@ -233,7 +233,7 @@ def _finalize( return_recv_hook=do_recv_hook, out=output) - return recv_hook + return recv_hook, lambda: None def finalize_async( self, @@ -243,8 +243,8 @@ def finalize_async( topk_ids: torch.Tensor, apply_router_weight_on_input: bool, weight_and_reduce_impl: mk.TopKWeightAndReduce, - ) -> Callable: - recv_hook = self._finalize( + ) -> tuple[Callable, Callable]: + return self._finalize( output, fused_expert_output, topk_weights, @@ -253,8 +253,6 @@ def finalize_async( weight_and_reduce_impl, do_async=True, ) - assert recv_hook is not None - return recv_hook def finalize( self, diff --git a/vllm/model_executor/layers/fused_moe/modular_kernel.py b/vllm/model_executor/layers/fused_moe/modular_kernel.py index 5fce24018e64..4ba14196682a 100644 --- a/vllm/model_executor/layers/fused_moe/modular_kernel.py +++ b/vllm/model_executor/layers/fused_moe/modular_kernel.py @@ -13,7 +13,8 @@ from vllm.model_executor.layers.fused_moe.utils import ( # yapf: disable _resize_cache, count_expert_num_tokens) from vllm.utils import cdiv -from vllm.v1.worker.ubatching import (dbo_enabled, dbo_maybe_run_recv_hook, +from vllm.v1.worker.ubatching import (dbo_current_ubatch_id, dbo_enabled, + dbo_maybe_run_recv_hook, dbo_register_recv_hook, dbo_yield) # @@ -223,7 +224,7 @@ def prepare_async( expert_map: Optional[torch.Tensor], apply_router_weight_on_input: bool, quant_config: FusedMoEQuantConfig, - ) -> tuple[Callable, ReceiverType]: + ) -> Union[tuple[Callable, ReceiverType], ReceiverType]: """ Perform any quantization (and/or) dispatching needed for this kernel but do not wait for results from other workers. @@ -239,10 +240,21 @@ def prepare_async( - apply_router_weight_on_input: When True, apply the weights to the activations, before quantization + dispatching. - Returns a callback that when invoked waits for results from other - workers and has the same return signature as `prepare`, e.g. - - receiver = obj.prepare_async(...) + Returns a callback or a hook callback pair that when invoked waits for + results from other workers and has the same return signature as + `prepare`, if a hook is returned this is more lightweight check that + the recv is complete without doing extra work (used by DBO, will be + refactored in the very near future) + + e.g. + + ret = obj.prepare_async(...) + + if isinstance(ret, tuple): + hook, receiver = ret + hook() + + if hook is not None: a, a_scales, expert_meta, topk_ids, topk_weights = receiver() is equivalent to: @@ -284,7 +296,7 @@ def finalize_async( topk_ids: torch.Tensor, apply_router_weight_on_input: bool, weight_and_reduce_impl: TopKWeightAndReduce, - ) -> Callable: + ) -> Union[tuple[Callable, Callable], Callable]: """ Perform any combine plus apply weights and perform a reduction on the fused experts output but do not wait for results from other workers. @@ -298,11 +310,17 @@ def finalize_async( - weight_and_reduce_impl: An optional TopKWeightAndReduce implementation. - Returns a callback that when invoked waits for results from other - workers and has the same return signature as `finalize`, e.g. + Returns a callback or a hook callback pair that when invoked waits for + results from other workers and has the same return signature as + `finalize`, if a hook is returned this is more lightweight check that + the recv is complete without doing extra work (used by DBO, will be + refactored in the very near future) - receiver = obj.finalize_async(output, ...) + ret = obj.finalize_async(output, ...) ... output not valid yet ... + if isinstance(ret, tuple): + hook, receiver = ret + hook() receiver() ... output valid here ... @@ -600,9 +618,23 @@ class FusedMoEModularKernel(torch.nn.Module): layer due to any layer specific state that may be used by the component objects. """ - fused_out_buffer = SharedResizableBuffer() - workspace13_buffer = SharedResizableBuffer() - workspace2_buffer = SharedResizableBuffer() + + class SharedBuffers: + + def __init__(self) -> None: + self.fused_out = SharedResizableBuffer() + self.workspace13 = SharedResizableBuffer() + self.workspace2 = SharedResizableBuffer() + + # Persistent buffers that are shared across `FusedMoEModularKernel` + # instances (layers), to save memory and allocattions. + # + # We have two sets of buffers to support dual batch overlap (DBO) where each + # microbatch (ubatch) should use its own set of buffers to avoid + # cross-ubatch contimination. + # NOTE that memory is lazily allocated for these buffers, meaning that if + # DBO isn't being used, the second SharedBuffers will be empty. + shared_buffers: list[SharedBuffers] = [SharedBuffers(), SharedBuffers()] def __init__( self, @@ -647,14 +679,18 @@ def _do_fused_experts( a1, a1q, M, N, K, top_k, global_num_experts, local_num_experts, expert_tokens_meta) + # select per-ubatch buffers to avoid cross-ubatch reuse under DBO + ubatch_idx = dbo_current_ubatch_id() + buffers = self.shared_buffers[ubatch_idx] + # We can reuse the memory between cache1 and cache3 because by the # time we need cache3, we're done with cache1. - workspace13 = self.workspace13_buffer.get(workspace13_shape, - device=a1.device, - dtype=workspace_dtype) - workspace2 = self.workspace2_buffer.get(workspace2_shape, - device=a1.device, - dtype=workspace_dtype) + workspace13 = buffers.workspace13.get(workspace13_shape, + device=a1.device, + dtype=workspace_dtype) + workspace2 = buffers.workspace2.get(workspace2_shape, + device=a1.device, + dtype=workspace_dtype) assert fused_out is None or fused_out.shape == fused_out_shape, ( f"fused_out {fused_out.shape} but expected {fused_out_shape}") @@ -733,9 +769,11 @@ def _maybe_chunk_fused_experts( (_, _, fused_out_shape, _) = self.fused_experts.workspace_shapes( a1, a1q, M, N, K, top_k, global_num_experts, local_num_experts, expert_tokens_meta) - fused_out = self.fused_out_buffer.get(fused_out_shape, - device=a1q.device, - dtype=a1.dtype) + ubatch_idx = dbo_current_ubatch_id() + buffers = self.shared_buffers[ubatch_idx] + fused_out = buffers.fused_out.get(fused_out_shape, + device=a1q.device, + dtype=a1.dtype) def slice_input_tensors( chunk_idx: int @@ -868,6 +906,7 @@ def forward( if not self.prepare_finalize.supports_async(): # We shouldn't be running an a2a kernel that doesn't # support async prepare/finalize + # TODO(lucas): enable in follow-up assert not dbo_enabled() (a1q, a1q_scale, expert_tokens_meta, _expert_topk_ids, @@ -883,7 +922,7 @@ def forward( else: # Overlap shared expert compute with all2all dispatch. dbo_maybe_run_recv_hook() - hook, receiver = self.prepare_finalize.prepare_async( + prepare_ret = self.prepare_finalize.prepare_async( a1, topk_weights, topk_ids, @@ -893,13 +932,21 @@ def forward( self.fused_experts.quant_config, ) - # If DBO is being used, register the hook with the ubatch context - # and call it in dbo_maybe_run_recv_hook instead of passing it to - # the receiver. - dbo_register_recv_hook(hook) - dbo_yield() - if not dbo_enabled(): - hook() + # TODO(lucas): refactor this in the alternative schedules followup + # currently unpack if we have hook + receiver pair or just + # receiver (see finalize_async docstring) + hook, receiver = prepare_ret \ + if isinstance(prepare_ret, tuple) else (None, prepare_ret) + + if hook is not None: + if dbo_enabled(): + # If DBO is being used, register the hook with the ubatch + # context and call it in dbo_maybe_run_recv_hook instead of + # passing it to the receiver. + dbo_register_recv_hook(hook) + dbo_yield() + else: + hook() (a1q, a1q_scale, expert_tokens_meta, _expert_topk_ids, _expert_topk_weights) = receiver() @@ -952,7 +999,7 @@ def forward( if self.shared_experts is not None: shared_output = self.shared_experts(a1) else: - recv_hook = self.prepare_finalize.finalize_async( + finalize_ret = self.prepare_finalize.finalize_async( output, fused_out, topk_weights, @@ -964,11 +1011,23 @@ def forward( if self.shared_experts is not None: shared_output = self.shared_experts(a1) - assert recv_hook is not None - dbo_register_recv_hook(recv_hook) - dbo_yield() - if not dbo_enabled(): - recv_hook() + # TODO(lucas): refactor this in the alternative schedules followup + # currently unpack if we have hook + receiver pair or just + # receiver (see finalize_async docstring) + hook, receiver = finalize_ret \ + if isinstance(finalize_ret, tuple) else (None, finalize_ret) + + if hook is not None: + if dbo_enabled(): + # If DBO is being used, register the hook with the ubatch + # context and call it in dbo_maybe_run_recv_hook instead of + # passing it to the receiver. + dbo_register_recv_hook(hook) + dbo_yield() + else: + hook() + + receiver() if self.shared_experts is None: return output diff --git a/vllm/v1/attention/backends/utils.py b/vllm/v1/attention/backends/utils.py index 6ef489f5a7a2..f837439f953e 100644 --- a/vllm/v1/attention/backends/utils.py +++ b/vllm/v1/attention/backends/utils.py @@ -107,19 +107,57 @@ def _make_metadata_with_slice( the requests included in ubatch_slice """ + assert not ubatch_slice.is_empty(), ( + f"Ubatch slice {ubatch_slice} is empty") + request_slice = ubatch_slice.request_slice token_slice = ubatch_slice.token_slice + start_locs = attn_metadata.query_start_loc_cpu + first_req = request_slice.start + first_tok = token_slice.start + last_req = request_slice.stop - 1 + last_tok = token_slice.stop - 1 + + assert start_locs[first_req] <= first_tok < start_locs[first_req + 1], \ + "Token slice start outside of first request" + assert start_locs[last_req] <= last_tok < start_locs[last_req+1], \ + "Token slice end outside of last request" + + # If the "middle" request has tokens in both ubatches, we have to split it. + # If ubatch_slice is the first ubatch then we will be splitting the last + # request. If it's the second microbatch, then we will be splitting the + # first request + splits_first_request = first_tok > start_locs[first_req] + splits_last_request = last_tok < start_locs[last_req + 1] - 1 + + query_start_loc_cpu = slice_query_start_locs(start_locs, request_slice) query_start_loc = slice_query_start_locs(attn_metadata.query_start_loc, request_slice) + assert len(query_start_loc) >= 2, ( f"query_start_loc must have at least 2 elements, " f"got {len(query_start_loc)}") - query_start_loc_cpu = slice_query_start_locs( - attn_metadata.query_start_loc_cpu, request_slice) + if splits_first_request: + tokens_skipped = first_tok - start_locs[first_req] + query_start_loc[1:] -= tokens_skipped + query_start_loc_cpu[1:] -= tokens_skipped seq_lens = attn_metadata.seq_lens[request_slice] seq_lens_cpu = attn_metadata.seq_lens_cpu[request_slice] + + if splits_last_request: + tokens_skipped = query_start_loc_cpu[-1] - token_slice.stop + query_start_loc[-1] -= tokens_skipped + query_start_loc_cpu[-1] -= tokens_skipped + + # Make sure we don't modify the seq_lens tensors + # (not cudagraph compatible) + seq_lens = seq_lens.clone() + seq_lens_cpu = seq_lens_cpu.clone() + seq_lens[-1] -= tokens_skipped + seq_lens_cpu[-1] -= tokens_skipped + max_seq_len = int(seq_lens_cpu.max()) num_computed_tokens_cpu = attn_metadata.num_computed_tokens_cpu[ request_slice] @@ -167,6 +205,7 @@ def split_attn_metadata( for ubatch_slice in ubatch_slices: results.append( _make_metadata_with_slice(ubatch_slice, common_attn_metadata)) + return results @@ -696,7 +735,6 @@ def split_decodes_and_prefills( return num_reqs, 0, num_tokens, 0 first_prefill = is_prefill.int().argmax(dim=-1).item() - assert torch.all(query_lens[first_prefill:] > decode_threshold) assert torch.all(query_lens[:first_prefill] <= decode_threshold) num_decodes = first_prefill num_prefills = num_reqs - num_decodes diff --git a/vllm/v1/spec_decode/eagle.py b/vllm/v1/spec_decode/eagle.py index dc97d5c8f39d..a0f40828d42f 100644 --- a/vllm/v1/spec_decode/eagle.py +++ b/vllm/v1/spec_decode/eagle.py @@ -30,7 +30,6 @@ from vllm.v1.spec_decode.metadata import SpecDecodeMetadata from vllm.v1.utils import CpuGpuBuffer from vllm.v1.worker.gpu_input_batch import CachedRequestState, InputBatch -from vllm.v1.worker.ubatching import dbo_current_ubatch_id logger = init_logger(__name__) @@ -192,9 +191,8 @@ def propose( assert self.runner is not None # FIXME: need to consider multiple kv_cache_groups - ubatch_id = dbo_current_ubatch_id() attn_metadata_builder = \ - self.runner.attn_groups[0][0].metadata_builders[ubatch_id] + self.runner.attn_groups[0][0].get_metadata_builder() attn_metadata = attn_metadata_builder.build_for_drafting( common_attn_metadata=common_attn_metadata, draft_index=0) @@ -330,7 +328,7 @@ def propose( # Rebuild attention metadata attn_metadata_builder = \ - self.runner.attn_groups[0][0].metadata_builders[ubatch_id] + self.runner.attn_groups[0][0].get_metadata_builder() attn_metadata = attn_metadata_builder\ .build_for_drafting(common_attn_metadata=common_attn_metadata, draft_index=token_index + 1) @@ -538,9 +536,8 @@ def propose_tree( hidden_states: torch.Tensor, common_attn_metadata: CommonAttentionMetadata, ) -> list[torch.Tensor]: - ubatch_id = dbo_current_ubatch_id() tree_attn_metadata_builder = \ - self.runner.attn_groups[0][0].metadata_builders[ubatch_id] + self.runner.attn_groups[0][0].get_metadata_builder() assert isinstance(tree_attn_metadata_builder, TreeAttentionMetadataBuilder) diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index 89b9a3c34f2a..ed324138c6fe 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -96,7 +96,8 @@ from vllm.v1.worker.kv_connector_model_runner_mixin import ( KVConnectorModelRunnerMixin) from vllm.v1.worker.lora_model_runner_mixin import LoRAModelRunnerMixin -from vllm.v1.worker.ubatch_splitting import get_dp_padding_ubatch, ubatch_split +from vllm.v1.worker.ubatch_splitting import (check_ubatch_thresholds, + ubatch_split) from vllm.v1.worker.ubatch_utils import UBatchSlice, UBatchSlices from vllm.v1.worker.utils import is_residual_scattered_for_sp @@ -1032,7 +1033,7 @@ def _prepare_inputs( num_tokens_padded = num_tokens_unpadded + self.get_local_padding( num_tokens_unpadded) ubatch_slices, num_tokens_after_padding = \ - ubatch_split(max_num_scheduled_tokens, + ubatch_split(num_scheduled_tokens, num_tokens_unpadded, num_tokens_padded, self.vllm_config) @@ -1206,7 +1207,6 @@ def _prepare_inputs( ubatch_slices, common_attn_metadata) for ubid, common_attn_metadata in enumerate( common_attn_metadata_list): - assert common_attn_metadata.max_query_len == 1 attn_metadata_i = (attn_group.get_metadata_builder( ubatch_id=ubid).build( common_prefix_len=common_prefix_len, @@ -2182,9 +2182,6 @@ def execute_model( ) = self._preprocess(scheduler_output, intermediate_tensors, ubatch_slices, num_tokens_after_padding) - if ubatch_slices is not None: - num_input_tokens = num_input_tokens // 2 - uniform_decode = (max_query_len == self.uniform_decode_query_len) and ( num_scheduled_tokens @@ -2194,6 +2191,11 @@ def execute_model( cudagraph_runtime_mode, batch_descriptor = \ self.cudagraph_dispatcher.dispatch(batch_descriptor) + # This is currently to get around the assert in the DPMetadata + # where it wants `num_tokens_across_dp` to align with `num_tokens` + if ubatch_slices is not None: + num_input_tokens = ubatch_slices[0].num_tokens + # Run the model. # Use persistent buffers for CUDA graphs. with (set_forward_context( @@ -2821,7 +2823,7 @@ def _dummy_run( cudagraph_runtime_mode: CUDAGraphMode = CUDAGraphMode.NONE, force_attention: bool = False, uniform_decode: bool = False, - allow_microbatching: bool = False, + allow_microbatching: bool = True, skip_eplb: bool = False, is_profile: bool = False, create_mixed_batch: bool = False, @@ -2847,32 +2849,10 @@ def _dummy_run( (1 token) and prefill (multiple tokens) requests. remove_lora: If False, dummy LoRAs are not destroyed after the run """ - ubatch_enabled = self.parallel_config.enable_dbo - num_tokens_across_dp = None - num_pad = 0 - should_ubatch = False - if ubatch_enabled: - should_ubatch = num_tokens >= \ - self.parallel_config.dbo_decode_token_threshold and \ - allow_microbatching - - (should_ubatch, num_tokens_across_dp) = get_dp_padding_ubatch( - num_tokens, num_tokens, should_ubatch, self.vllm_config) - - # Currently the dummy run should only be ubatching during - # cuda graph capture, meaning all DP ranks should already - # have the same batch size - if num_tokens_across_dp is not None: - assert int(num_tokens_across_dp[0]) == num_tokens // 2 - assert cudagraph_runtime_mode in { CUDAGraphMode.NONE, CUDAGraphMode.PIECEWISE, CUDAGraphMode.FULL } - if not should_ubatch: - num_pad, num_tokens_across_dp = self.get_dp_padding(num_tokens) - num_tokens += num_pad - # If cudagraph_mode.decode_mode() == FULL and # cudagraph_mode.separate_routine(). This means that we are using # different graphs and/or modes for mixed prefill-decode batches vs. @@ -2888,10 +2868,6 @@ def _dummy_run( # for GQA/MQA. max_query_len = self.uniform_decode_query_len if uniform_decode else \ num_tokens - if allow_microbatching: - assert self.uniform_decode_query_len == 1 - assert uniform_decode is True - assert max_query_len == 1 # Set num_scheduled_tokens based on num_tokens and max_num_seqs # for dummy run with LoRA so that the num_reqs collectively @@ -2930,20 +2906,31 @@ def _dummy_run( assert len(num_scheduled_tokens_list) == num_reqs num_scheduled_tokens = np.array(num_scheduled_tokens_list, dtype=np.int32) + total_num_scheduled_tokens = int(num_scheduled_tokens.sum()) ubatch_slices = None + num_tokens_after_padding = None + # We currently only microbatch if the number of tokens is # over a certain threshold. - if should_ubatch: - # We only support decode-only cudagraphs - assert num_reqs == num_tokens - assert num_tokens % 2 == 0 - ubatch_slices = [ - UBatchSlice(slice(0, num_reqs // 2), slice(0, - num_tokens // 2)), - UBatchSlice(slice(num_reqs // 2, num_reqs), - slice(num_tokens // 2, num_tokens)) - ] + if self.parallel_config.enable_dbo and allow_microbatching: + ubatch_slices, num_tokens_after_padding = ubatch_split( + num_scheduled_tokens, + total_num_scheduled_tokens, + total_num_scheduled_tokens, + self.vllm_config, + ) + + # If we failed to microbatch, currently need to resynchronize + # TODO(lucas,sage): we should be able to avoid this second sync by + # refactoring `get_dp_padding_ubatch` and `get_dp_padding` into + # a single `coordinate_batch_across_dp` function. + if num_tokens_after_padding is None: + num_pad, num_tokens_across_dp = self.get_dp_padding(num_tokens) + num_tokens_after_padding = num_tokens + num_pad + else: + num_tokens_across_dp = num_tokens_after_padding + num_tokens_after_padding = int(num_tokens_after_padding[0].item()) attn_metadata: Optional[PerLayerAttnMetadata] = None @@ -2966,6 +2953,11 @@ def _dummy_run( self.seq_lens.np[num_reqs:] = 0 self.seq_lens.copy_to_gpu() + cum_num_tokens, _ = self._get_cumsum_and_arange( + num_scheduled_tokens) + self.query_start_loc.np[1:num_reqs + 1] = cum_num_tokens + self.query_start_loc.copy_to_gpu() + for kv_cache_group_id, kv_cache_group_spec in enumerate( self.kv_cache_config.kv_cache_groups): common_attn_metadata = CommonAttentionMetadata( @@ -3060,7 +3052,7 @@ def _dummy_run( with self.maybe_randomize_inputs(input_ids), set_forward_context( attn_metadata, self.vllm_config, - num_tokens=num_tokens, + num_tokens=num_tokens_after_padding, num_tokens_across_dp=num_tokens_across_dp, cudagraph_runtime_mode=cudagraph_runtime_mode, batch_descriptor=batch_descriptor, @@ -3395,56 +3387,51 @@ def _capture_cudagraphs(self, compilation_cases: list[int], desc="Capturing CUDA graphs ({}, {})".format( "decode" if uniform_decode else "mixed prefill-decode", cudagraph_runtime_mode.name)) - enable_dbo = self.parallel_config.enable_dbo - # DBO Only supports running Full cudagraphs with uniform - # decode lengths - if enable_dbo and uniform_decode: - for num_tokens in compilation_cases: - # If the number of tokens is greater than the microbatching - # threshold, don't generate a microbatched cudagraph - if (num_tokens - < self.parallel_config.dbo_decode_token_threshold): - continue - # Warmup + # We skip EPLB here since we don't want to record dummy metrics + for num_tokens in compilation_cases: + # We currently only capture ubatched graphs when its a FULL + # cudagraph and for uniform decode batches. + capture_ubatched_graph = self.parallel_config.enable_dbo \ + and cudagraph_runtime_mode == CUDAGraphMode.FULL \ + and uniform_decode \ + and check_ubatch_thresholds( + config=self.vllm_config.parallel_config, + num_tokens=num_tokens, + uniform_decode=uniform_decode, + ) + + # Currently we capture both microbatched and non-microbatched + # graphs when capture_ubatched_graph is True, this is because + # occasionally we will be forced out of microbatching due to other + # DP ranks not microbatching (usually caused by an empty second + # microbatch; once we resolve this, we can remove the + # non-microbatched graph capture). + allow_microbatching_options = [True, False] if \ + capture_ubatched_graph else [False] + for allow_microbatching in allow_microbatching_options: for _ in range( self.compilation_config.cudagraph_num_of_warmups): + # Use CUDAGraphRuntimeStyle.NONE (default) for warmup. + # But be careful, warm up with `NONE`is orthogonal to + # if we want to warm up attention or not. This is + # different from the case where `FULL` implies capture + # attention while `PIECEWISE` implies no attention. force_attention = ( cudagraph_runtime_mode == CUDAGraphMode.FULL) self._dummy_run(num_tokens, cudagraph_runtime_mode=CUDAGraphMode.NONE, force_attention=force_attention, - uniform_decode=True, - allow_microbatching=True, - skip_eplb=True) - - # Graph Capture + uniform_decode=uniform_decode, + allow_microbatching=allow_microbatching, + skip_eplb=True, + remove_lora=False) self._dummy_run(num_tokens, - cudagraph_runtime_mode=CUDAGraphMode.FULL, - uniform_decode=True, - allow_microbatching=True, - skip_eplb=True) - # We skip EPLB here since we don't want to record dummy metrics - for num_tokens in compilation_cases: - for _ in range(self.compilation_config.cudagraph_num_of_warmups): - # Use CUDAGraphRuntimeStyle.NONE (default) for warmup. - # But be careful, warm up with `NONE`is orthogonal to - # if we want to warm up attention or not. This is - # different from the case where `FULL` implies capture - # attention while `PIECEWISE` implies no attention. - force_attention = ( - cudagraph_runtime_mode == CUDAGraphMode.FULL) - self._dummy_run(num_tokens, - cudagraph_runtime_mode=CUDAGraphMode.NONE, - force_attention=force_attention, + cudagraph_runtime_mode=cudagraph_runtime_mode, uniform_decode=uniform_decode, + allow_microbatching=allow_microbatching, skip_eplb=True, remove_lora=False) - self._dummy_run(num_tokens, - cudagraph_runtime_mode=cudagraph_runtime_mode, - uniform_decode=uniform_decode, - skip_eplb=True, - remove_lora=False) self.maybe_remove_all_loras(self.lora_config) def initialize_attn_backend(self, kv_cache_config: KVCacheConfig) -> None: @@ -3500,24 +3487,16 @@ def create_attn_groups( attn_groups: list[AttentionGroup] = [] for (attn_backend, kv_cache_spec), layer_names in attn_backends_map.items(): - attn_metadata_builders = [] - attn_metadata_builders.append(attn_backend.get_builder_cls()( - kv_cache_spec, + attn_group = AttentionGroup.create_with_metadata_builders( + attn_backend, layer_names, + kv_cache_spec, self.vllm_config, self.device, - )) - if self.parallel_config.enable_dbo: - attn_metadata_builders.append( - attn_backend.get_builder_cls()( - kv_cache_spec, - layer_names, - self.vllm_config, - self.device, - )) - attn_group = AttentionGroup(attn_backend, - attn_metadata_builders, - layer_names, kv_cache_spec) + num_metadata_builders=1 + if not self.parallel_config.enable_dbo else 2, + ) + attn_groups.append(attn_group) return attn_groups diff --git a/vllm/v1/worker/gpu_ubatch_wrapper.py b/vllm/v1/worker/gpu_ubatch_wrapper.py index 5012ad0483c8..bfc3743ea417 100644 --- a/vllm/v1/worker/gpu_ubatch_wrapper.py +++ b/vllm/v1/worker/gpu_ubatch_wrapper.py @@ -1,25 +1,28 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -import dataclasses import threading +from dataclasses import dataclass from typing import Any, Callable, Optional import torch +import vllm.envs as envs from vllm.compilation.cuda_graph import CUDAGraphWrapper from vllm.config import CUDAGraphMode, VllmConfig +from vllm.distributed import get_ep_group from vllm.forward_context import (create_forward_context, get_forward_context, override_forward_context) from vllm.logger import init_logger from vllm.platforms import current_platform from vllm.sequence import IntermediateTensors +from vllm.utils import has_deep_gemm from vllm.v1.worker.ubatching import UBatchContext, make_ubatch_contexts logger = init_logger(__name__) -@dataclasses.dataclass +@dataclass class UbatchMetadata: context: UBatchContext input_ids: torch.Tensor @@ -29,13 +32,55 @@ class UbatchMetadata: num_tokens: int -@dataclasses.dataclass +@dataclass class CUDAGraphMetaData: cudagraph: torch.cuda.CUDAGraph ubatch_metadata: UbatchMetadata outputs: Optional[Any] = None +class SMControlContextManager: + + def __init__(self, comm_sms: int, set_comm_sms: Callable[[int], None], + set_compute_sms: Callable[[int], None]): + """ + Context manager for controlling SM (Streaming Multiprocessor) + allocation. Upon entering the context, it sets the number of SMs + allocated for communication and computation to comm_sms and + total_sms - comm_sms respectively. Upon exiting, it restores the + allocation to use all available SMs (i.e. total_sms). + + Args: + comm_sms (int): The number of SMs to allocate for communication. + (The remainder will be used for computation.) + set_comm_sms (Callable[[int], None]): + A function that sets the number of SMs for communication. + set_compute_sms (Callable[[int], None]): + A function that sets the number of SMs for computation. + """ + + assert current_platform.is_cuda(), \ + "SM control is currently only supported on CUDA" + + props = torch.cuda.get_device_properties(torch.cuda.current_device()) + total_sms = props.multi_processor_count + + assert comm_sms < total_sms + self.total_sms = total_sms + self.compute_sms = total_sms - comm_sms + self.comm_sms = comm_sms + self.set_comm_sms = set_comm_sms + self.set_compute_sms = set_compute_sms + + def __enter__(self): + self.set_comm_sms(self.comm_sms) + self.set_compute_sms(self.compute_sms) + + def __exit__(self, exc_type, exc_value, traceback): + self.set_comm_sms(self.total_sms) + self.set_compute_sms(self.total_sms) + + class UBatchWrapper: def __init__(self, runnable: Callable, vllm_config: VllmConfig, @@ -56,6 +101,35 @@ def __init__(self, runnable: Callable, vllm_config: VllmConfig, runnable, vllm_config, runtime_mode=runtime_mode) self.graph_pool = current_platform.get_global_graph_pool() + self.sm_control = self._create_sm_control_context(vllm_config) + + @staticmethod + def _create_sm_control_context(vllm_config: VllmConfig): + comm_sms = envs.VLLM_DBO_COMM_SMS + + set_comm_sms = lambda sms: None + if vllm_config.parallel_config.enable_expert_parallel: + # Currently only DeepEP highthroughput supports SM control so this + # only affects that case. + all2all_manager = get_ep_group( + ).device_communicator.all2all_manager + + if all2all_manager.max_sms_used() is not None: + comm_sms = min(comm_sms, all2all_manager.max_sms_used()) + + if comm_sms > 0: + set_comm_sms = lambda sms: all2all_manager.set_num_sms(sms) + + # TODO(lucas): support other kernels besides DeepGEMM + set_compute_sms = lambda sms: None + if has_deep_gemm() and comm_sms > 0: + import deep_gemm as dg + set_compute_sms = lambda sms: dg.set_num_sms(sms) + + return SMControlContextManager(comm_sms=comm_sms, + set_comm_sms=set_comm_sms, + set_compute_sms=set_compute_sms) + def __getattr__(self, key: str): # allow accessing the attributes of the runnable. if hasattr(self.runnable, key): @@ -282,8 +356,8 @@ def __call__(self, *args, **kwargs): dp_metadata=dp_metadata, batch_descriptor=batch_descriptor, cudagraph_runtime_mode=CUDAGraphMode.NONE) - - return self._capture_ubatches(ubatch_metadata, self.model) + with self.sm_control: + return self._capture_ubatches(ubatch_metadata, self.model) elif num_tokens in self.cudagraphs: cudagraph_metadata = self.cudagraphs[num_tokens] cudagraph_metadata.cudagraph.replay() @@ -300,4 +374,5 @@ def __call__(self, *args, **kwargs): dp_metadata=dp_metadata, batch_descriptor=batch_descriptor, cudagraph_runtime_mode=CUDAGraphMode.NONE) - return self._run_ubatches(ubatch_metadata, self.model) + with self.sm_control: + return self._run_ubatches(ubatch_metadata, self.model) diff --git a/vllm/v1/worker/ubatch_splitting.py b/vllm/v1/worker/ubatch_splitting.py index 650f0ec5138d..30acb14ff58a 100644 --- a/vllm/v1/worker/ubatch_splitting.py +++ b/vllm/v1/worker/ubatch_splitting.py @@ -3,9 +3,10 @@ from typing import Optional +import numpy as np import torch -from vllm.config import VllmConfig +from vllm.config import ParallelConfig, VllmConfig from vllm.forward_context import DPMetadata from vllm.logger import init_logger from vllm.utils import round_up @@ -29,6 +30,16 @@ def should_ubatch_with_num_tokens( dp_size, dp_rank) +def check_ubatch_thresholds(config: ParallelConfig, num_tokens: int, + uniform_decode: bool) -> bool: + if not config.enable_dbo: + return False + if uniform_decode: + return num_tokens >= config.dbo_decode_token_threshold + else: + return num_tokens >= config.dbo_prefill_token_threshold + + def get_dp_padding_ubatch( num_tokens_unpadded: int, num_tokens_padded: int, should_attempt_ubatching: bool, @@ -95,9 +106,37 @@ def get_dp_padding_ubatch( dtype=torch.int32) return should_ubatch, num_tokens_after_padding +def create_ubatch_slices(num_scheduled_tokens: np.ndarray, split_point: int) \ + -> UBatchSlices: + # TODO(lucas): Refactor the gpu_model_runner.py so we can pass + # in cu_num_tokens directly (i.e. query_start_loc) + cu_num_tokens = np.zeros(len(num_scheduled_tokens) + 1, dtype=np.int32) + np.cumsum(num_scheduled_tokens, dtype=np.int32, out=cu_num_tokens[1:]) + + first_ubatch_token_slice = slice(0, split_point) + second_ubatch_token_slice = slice(split_point, cu_num_tokens[-1]) + + # Determine request slices using exclusive stop semantics + # First ubatch includes requests whose tokens overlap [0, split_point) + first_ubatch_req_stop = int( + np.searchsorted(cu_num_tokens, split_point, side="left")) + first_ubatch_req_slice = slice(0, first_ubatch_req_stop) + + # Second ubatch starts at the request that contains the split_point + # or the request starting exactly at split_point (if on boundary) + second_ubatch_req_start = int( + np.searchsorted(cu_num_tokens, split_point, side="right") - 1) + second_ubatch_req_slice = slice(second_ubatch_req_start, + len(cu_num_tokens) - 1) + + return [ + UBatchSlice(first_ubatch_req_slice, first_ubatch_token_slice), + UBatchSlice(second_ubatch_req_slice, second_ubatch_token_slice) + ] + def ubatch_split( - max_num_scheduled_tokens: int, + num_scheduled_tokens_per_request: np.ndarray, num_tokens_unpadded: int, num_tokens_padded: int, vllm_config: VllmConfig, @@ -122,17 +161,20 @@ def ubatch_split( return (None, None) # Check preconditions for microbatching - should_attempt_ubatching = \ - parallel_config.enable_dbo and \ - num_tokens_unpadded >= \ - parallel_config.dbo_decode_token_threshold \ - and max_num_scheduled_tokens == 1 + should_attempt_ubatching = check_ubatch_thresholds( + parallel_config, + num_tokens_unpadded, + vllm_config, + ) # Don't microbatch unless every other DP worker is also microbatching - num_tokens_after_padding = None - (should_ubatch, num_tokens_after_padding) = get_dp_padding_ubatch( - num_tokens_unpadded, num_tokens_padded, should_attempt_ubatching, - vllm_config) + should_ubatch, num_tokens_after_padding = get_dp_padding_ubatch( + num_tokens_unpadded, + num_tokens_padded, + should_attempt_ubatching, + vllm_config, + ) + if not should_ubatch: return (None, None) @@ -141,15 +183,9 @@ def ubatch_split( # to the second ubatch in pad_out_ubatch_slice after attention # metadata creation assert num_tokens_after_padding is not None - total_num_tokens_per_ubatch = int(num_tokens_after_padding[0].item()) - padded_first_ubatch_slice = slice(0, total_num_tokens_per_ubatch) - padded_second_ubatch_slice = slice(total_num_tokens_per_ubatch, - num_tokens_unpadded) - - # Note there's an assumption here that there's 1 token per request - ubatch_slices = [ - UBatchSlice(padded_first_ubatch_slice, padded_first_ubatch_slice), - UBatchSlice(padded_second_ubatch_slice, padded_second_ubatch_slice) - ] + token_split_point = int(num_tokens_after_padding[0].item()) + + ubatch_slices = create_ubatch_slices(num_scheduled_tokens_per_request, + token_split_point) return (ubatch_slices, num_tokens_after_padding) diff --git a/vllm/v1/worker/ubatch_utils.py b/vllm/v1/worker/ubatch_utils.py index 6716d171cc70..33d58aa94843 100644 --- a/vllm/v1/worker/ubatch_utils.py +++ b/vllm/v1/worker/ubatch_utils.py @@ -10,6 +10,14 @@ class UBatchSlice: request_slice: slice token_slice: slice + def is_empty(self) -> bool: + return self.request_slice.start == self.request_slice.stop \ + or self.token_slice.start == self.token_slice.stop + + @property + def num_tokens(self) -> int: + return self.token_slice.stop - self.token_slice.start + UBatchSlices: TypeAlias = list[UBatchSlice] diff --git a/vllm/v1/worker/ubatching.py b/vllm/v1/worker/ubatching.py index 9aeaa9909dc8..c26cb07123a5 100644 --- a/vllm/v1/worker/ubatching.py +++ b/vllm/v1/worker/ubatching.py @@ -51,8 +51,8 @@ def __enter__(self): self.cpu_wait_event.wait() self.cpu_wait_event.clear() self._restore_context() - # Assume we start on the compute stream - assert current_stream() == self.compute_stream + # Assume we want to start on the compute stream + self.update_stream(self.compute_stream) return self def __exit__(self, exc_type, exc_val, exc_tb): @@ -62,17 +62,15 @@ def __exit__(self, exc_type, exc_val, exc_tb): self.maybe_run_recv_hook() self.cpu_signal_event.set() self.cpu_wait_event.clear() - self.current_stream = self.compute_stream - torch.cuda.set_stream(self.current_stream) return False def _restore_context(self): forward_context._forward_context = self.forward_context - torch.cuda.set_stream(self.current_stream) def update_stream(self, stream): self.current_stream = stream - torch.cuda.set_stream(self.current_stream) + if current_stream() != self.current_stream: + torch.cuda.set_stream(self.current_stream) def _signal_comm_done(self): self.gpu_comm_done_event.record(self.comm_stream) @@ -99,9 +97,20 @@ def _cpu_yield(self): self.cpu_wait_event.clear() self._restore_context() + def switch_to_comm(self): + self.update_stream(self.comm_stream) + + def switch_to_compute(self): + self.update_stream(self.compute_stream) + def switch_to_comm_sync(self): self._signal_compute_done() self.update_stream(self.comm_stream) + self._wait_compute_done() + + def switch_to_compute_sync(self): + self._signal_comm_done() + self.update_stream(self.compute_stream) self._wait_comm_done() def maybe_run_recv_hook(self): @@ -112,8 +121,7 @@ def maybe_run_recv_hook(self): def yield_(self): self.current_stream = current_stream() self._cpu_yield() - if self.current_stream != current_stream(): - self.update_stream(self.current_stream) + self.update_stream(self.current_stream) def yield_and_switch_from_compute_to_comm(self): assert current_stream() == self.compute_stream @@ -153,15 +161,20 @@ def wrapper(*args, **kwargs): return wrapper +dbo_maybe_run_recv_hook = _register_ubatch_function( + UBatchContext.maybe_run_recv_hook) +dbo_yield = _register_ubatch_function(UBatchContext.yield_) dbo_yield_and_switch_from_compute_to_comm = _register_ubatch_function( UBatchContext.yield_and_switch_from_compute_to_comm) dbo_yield_and_switch_from_comm_to_compute = _register_ubatch_function( UBatchContext.yield_and_switch_from_comm_to_compute) -dbo_yield = _register_ubatch_function(UBatchContext.yield_) -dbo_maybe_run_recv_hook = _register_ubatch_function( - UBatchContext.maybe_run_recv_hook) +dbo_switch_to_comm = _register_ubatch_function(UBatchContext.switch_to_comm) +dbo_switch_to_compute = _register_ubatch_function( + UBatchContext.switch_to_compute) dbo_switch_to_comm_sync = _register_ubatch_function( UBatchContext.switch_to_comm_sync) +dbo_switch_to_compute_sync = _register_ubatch_function( + UBatchContext.switch_to_compute_sync) def dbo_register_recv_hook(recv_hook): diff --git a/vllm/v1/worker/utils.py b/vllm/v1/worker/utils.py index af922f9979d1..553d33e27203 100644 --- a/vllm/v1/worker/utils.py +++ b/vllm/v1/worker/utils.py @@ -130,15 +130,32 @@ def get_max_items( @dataclass class AttentionGroup: backend: type[AttentionBackend] + # When ubatching is enabled we will have a metadata builder for each ubatch + # so that if they use internal persistant buffers for cudagraphs, and they + # won't have to worry about conflicting with the other ubatches. metadata_builders: list[AttentionMetadataBuilder] layer_names: list[str] kv_cache_spec: KVCacheSpec + @staticmethod + def create_with_metadata_builders( + backend: type[AttentionBackend], + layer_names: list[str], + kv_cache_spec: KVCacheSpec, + vllm_config: VllmConfig, + device: torch.device, + num_metadata_builders: int = 1, + ) -> 'AttentionGroup': + metadata_builders = [ + backend.get_builder_cls()(kv_cache_spec, layer_names, vllm_config, + device) + for _ in range(num_metadata_builders) + ] + return AttentionGroup(backend, metadata_builders, layer_names, + kv_cache_spec) + def get_metadata_builder(self, - ubatch_id: Optional[int] = None - ) -> AttentionMetadataBuilder: - if ubatch_id is None: - return self.metadata_builders[0] + ubatch_id: int = 0) -> AttentionMetadataBuilder: assert len(self.metadata_builders) > ubatch_id return self.metadata_builders[ubatch_id] From 875d6def90f737f5f39eb484a29a940b117a9c79 Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Tue, 23 Sep 2025 17:07:30 +0100 Subject: [PATCH 276/518] Add backward compatibility for `GuidedDecodingParams` (#25422) Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- .../llm/test_struct_output_generate.py | 24 ++++++++++++- vllm/sampling_params.py | 36 +++++++++++++++++++ 2 files changed, 59 insertions(+), 1 deletion(-) diff --git a/tests/v1/entrypoints/llm/test_struct_output_generate.py b/tests/v1/entrypoints/llm/test_struct_output_generate.py index 4b0f3b2d9967..e2c686928cea 100644 --- a/tests/v1/entrypoints/llm/test_struct_output_generate.py +++ b/tests/v1/entrypoints/llm/test_struct_output_generate.py @@ -5,6 +5,7 @@ from __future__ import annotations import json +from dataclasses import fields from enum import Enum from typing import TYPE_CHECKING, Any @@ -21,7 +22,8 @@ from vllm.outputs import RequestOutput from vllm.platforms import current_platform from vllm.reasoning.abs_reasoning_parsers import ReasoningParserManager -from vllm.sampling_params import SamplingParams, StructuredOutputsParams +from vllm.sampling_params import (GuidedDecodingParams, SamplingParams, + StructuredOutputsParams) if TYPE_CHECKING: from vllm.config import TokenizerMode @@ -89,6 +91,26 @@ def _load_json(s: str, backend: str) -> str: return json.loads(s) +def test_guided_decoding_deprecated(): + with pytest.warns(DeprecationWarning, + match="GuidedDecodingParams is deprecated.*"): + guided_decoding = GuidedDecodingParams(json_object=True) + + structured_outputs = StructuredOutputsParams(json_object=True) + assert fields(guided_decoding) == fields(structured_outputs) + + with pytest.warns(DeprecationWarning, + match="guided_decoding is deprecated.*"): + sp1 = SamplingParams(guided_decoding=guided_decoding) + + with pytest.warns(DeprecationWarning, + match="guided_decoding is deprecated.*"): + sp2 = SamplingParams.from_optional(guided_decoding=guided_decoding) + + assert sp1 == sp2 + assert sp1.structured_outputs == guided_decoding + + @pytest.mark.skip_global_cleanup @pytest.mark.parametrize( "model_name, backend, tokenizer_mode, speculative_config", diff --git a/vllm/sampling_params.py b/vllm/sampling_params.py index efe70d019ccc..f424682f9dfa 100644 --- a/vllm/sampling_params.py +++ b/vllm/sampling_params.py @@ -2,6 +2,7 @@ # SPDX-FileCopyrightText: Copyright contributors to the vLLM project """Sampling parameters for text generation.""" import copy +import warnings from dataclasses import field from enum import Enum, IntEnum from functools import cached_property @@ -59,6 +60,19 @@ def __post_init__(self): f"but multiple are specified: {self.__dict__}") +@dataclass +class GuidedDecodingParams(StructuredOutputsParams): + + def __post_init__(self): + warnings.warn( + "GuidedDecodingParams is deprecated. This will be removed in " + "v0.12.0 or v1.0.0, which ever is soonest. Please use " + "StructuredOutputsParams instead.", + DeprecationWarning, + stacklevel=2) + return super().__post_init__() + + class RequestOutputKind(Enum): # Return entire output so far in every RequestOutput CUMULATIVE = 0 @@ -179,6 +193,8 @@ class SamplingParams( # Fields used to construct logits processors structured_outputs: Optional[StructuredOutputsParams] = None """Parameters for configuring structured outputs.""" + guided_decoding: Optional[GuidedDecodingParams] = None + """Deprecated alias for structured_outputs.""" logit_bias: Optional[dict[int, float]] = None """If provided, the engine will construct a logits processor that applies these logit biases.""" @@ -227,6 +243,7 @@ def from_optional( ge=-1)]] = None, output_kind: RequestOutputKind = RequestOutputKind.CUMULATIVE, structured_outputs: Optional[StructuredOutputsParams] = None, + guided_decoding: Optional[GuidedDecodingParams] = None, logit_bias: Optional[Union[dict[int, float], dict[str, float]]] = None, allowed_token_ids: Optional[list[int]] = None, extra_args: Optional[dict[str, Any]] = None, @@ -238,6 +255,15 @@ def from_optional( int(token): min(100.0, max(-100.0, bias)) for token, bias in logit_bias.items() } + if guided_decoding is not None: + warnings.warn( + "guided_decoding is deprecated. This will be removed in " + "v0.12.0 or v1.0.0, which ever is soonest. Please use " + "structured_outputs instead.", + DeprecationWarning, + stacklevel=2) + structured_outputs = guided_decoding + guided_decoding = None return SamplingParams( n=1 if n is None else n, @@ -334,6 +360,16 @@ def __post_init__(self) -> None: # eos_token_id is added to this by the engine self._all_stop_token_ids.update(self.stop_token_ids) + if self.guided_decoding is not None: + warnings.warn( + "guided_decoding is deprecated. This will be removed in " + "v0.12.0 or v1.0.0, which ever is soonest. Please use " + "structured_outputs instead.", + DeprecationWarning, + stacklevel=2) + self.structured_outputs = self.guided_decoding + self.guided_decoding = None + def _verify_args(self) -> None: if not isinstance(self.n, int): raise ValueError(f"n must be an int, but is of " From f11e3c516be3d88733ea4b0c79f47e33cc319197 Mon Sep 17 00:00:00 2001 From: bnellnm <49004751+bnellnm@users.noreply.github.com> Date: Tue, 23 Sep 2025 12:11:34 -0400 Subject: [PATCH 277/518] [Kernels] Support blocked fp8 quantization for compressed tensors MoE (#25219) Signed-off-by: Bill Nell Co-authored-by: Michael Goin --- .../compressed_tensors_moe.py | 109 +++++++++++++++--- .../model_executor/layers/quantization/fp8.py | 22 ++-- .../layers/quantization/utils/fp8_utils.py | 6 + .../model_executor/warmup/deep_gemm_warmup.py | 4 +- 4 files changed, 112 insertions(+), 29 deletions(-) diff --git a/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py index 85adae32f4cd..10f9085be4d1 100644 --- a/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py +++ b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py @@ -13,6 +13,7 @@ import vllm.envs as envs import vllm.model_executor.layers.fused_moe.modular_kernel as mk from vllm import _custom_ops as ops +from vllm.distributed import get_tensor_model_parallel_world_size from vllm.logger import init_logger from vllm.model_executor.layers.fused_moe import ( FusedMoE, FusedMoEActivationFormat, FusedMoEConfig, FusedMoEMethodBase, @@ -31,6 +32,9 @@ from vllm.model_executor.layers.quantization.utils.flashinfer_fp4_moe import ( build_flashinfer_fp4_cutlass_moe_prepare_finalize, reorder_w1w3_to_w3w1, select_nvfp4_gemm_impl) +from vllm.model_executor.layers.quantization.utils.fp8_utils import ( + expert_weight_is_col_major, get_col_major_tma_aligned_tensor, + requant_weight_ue8m0_inplace) from vllm.model_executor.layers.quantization.utils.marlin_utils import ( check_moe_marlin_supports_layer, marlin_make_workspace_new, marlin_moe_permute_scales) @@ -45,6 +49,7 @@ from vllm.model_executor.utils import set_weight_attrs from vllm.platforms import current_platform from vllm.scalar_type import scalar_types +from vllm.utils.deep_gemm import is_deep_gemm_e8m0_used logger = init_logger(__name__) @@ -505,10 +510,12 @@ def __init__( self.weight_quant.strategy == QuantizationStrategy.CHANNEL and self.input_quant.strategy == QuantizationStrategy.TOKEN) if not (per_tensor or per_channel): - raise ValueError( - "For FP8 Fused MoE layers, we require per tensor " - "or channelwise, dynamic per token quantization. Found " - f"{self.weight_quant}, {self.input_quant}") + assert self.weight_quant.strategy == QuantizationStrategy.BLOCK + self.weight_block_size = self.weight_quant.block_structure + assert self.weight_quant.dynamic is not None + else: + self.weight_block_size = None + self.block_quant = self.weight_block_size is not None self.static_input_scales = not self.input_quant.dynamic if self.static_input_scales and per_channel: @@ -519,7 +526,8 @@ def __init__( # For GPUs that lack FP8 hardware support, we can leverage the Marlin # kernel for fast weight-only FP8 quantization self.use_marlin = (not current_platform.has_device_capability(89) - or envs.VLLM_TEST_FORCE_FP8_MARLIN) + or envs.VLLM_TEST_FORCE_FP8_MARLIN + and not self.block_quant) # Disable marlin for rocm if current_platform.is_rocm(): self.use_marlin = False @@ -531,8 +539,9 @@ def __init__( # cutlass path self.is_fp8_w8a8_sm100 = quant_config._is_fp8_w8a8_sm100( self.weight_quant, self.input_quant) - self.use_cutlass = (quant_config._is_fp8_w8a8_sm90( - self.weight_quant, self.input_quant) or self.is_fp8_w8a8_sm100) + self.use_cutlass = not self.block_quant and ( + quant_config._is_fp8_w8a8_sm90(self.weight_quant, self.input_quant) + or self.is_fp8_w8a8_sm100) self.disable_expert_map = False def create_weights(self, layer: torch.nn.Module, num_experts: int, @@ -547,6 +556,31 @@ def create_weights(self, layer: torch.nn.Module, num_experts: int, params_dtype = torch.float8_e4m3fn + if self.block_quant: + assert self.weight_block_size is not None + layer.weight_block_size = self.weight_block_size + tp_size = get_tensor_model_parallel_world_size() + block_n, block_k = ( + self.weight_block_size[0], + self.weight_block_size[1], + ) + # NOTE: To ensure proper alignment of the block-wise quantization + # scales, the output_size of the weights for both the gate and up + # layers must be divisible by block_n. + # Required by column parallel or enabling merged weights + if intermediate_size_per_partition % block_n != 0: + raise ValueError( + f"The output_size of gate's and up's weight = " + f"{intermediate_size_per_partition} is not divisible by " + f"weight quantization block_n = {block_n}.") + if (tp_size > 1 + and intermediate_size_per_partition % block_k != 0): + # Required by row parallel + raise ValueError( + f"The input_size of down's weight = " + f"{intermediate_size_per_partition} is not divisible by " + f"weight quantization block_k = {block_k}.") + # WEIGHTS w13_weight = torch.nn.Parameter(torch.empty( num_experts, @@ -602,6 +636,27 @@ def create_weights(self, layer: torch.nn.Module, num_experts: int, set_weight_attrs(w13_weight_scale, extra_weight_attrs) set_weight_attrs(w2_weight_scale, extra_weight_attrs) + elif self.weight_quant.strategy == QuantizationStrategy.BLOCK: + w13_weight_scale = torch.nn.Parameter(torch.ones( + num_experts, + 2 * + ((intermediate_size_per_partition + block_n - 1) // block_n), + (hidden_size + block_k - 1) // block_k, + dtype=torch.float32), + requires_grad=False) + layer.register_parameter("w13_weight_scale", w13_weight_scale) + w2_weight_scale = torch.nn.Parameter(torch.ones( + num_experts, (hidden_size + block_n - 1) // block_n, + (intermediate_size_per_partition + block_k - 1) // block_k, + dtype=torch.float32), + requires_grad=False) + layer.register_parameter("w2_weight_scale", w2_weight_scale) + # Add PER-CHANNEL quantization for FusedMoE.weight_loader. + extra_weight_attrs.update( + {"quant_method": FusedMoeWeightScaleSupported.BLOCK.value}) + set_weight_attrs(w13_weight_scale, extra_weight_attrs) + set_weight_attrs(w2_weight_scale, extra_weight_attrs) + # INPUT_SCALES if self.static_input_scales: w13_input_scale = torch.nn.Parameter(torch.ones( @@ -706,6 +761,7 @@ def process_weights_after_loading(self, layer: torch.nn.Module) -> None: del layer.w2_input_scale if self.use_cutlass: + assert self.weight_quant.strategy != QuantizationStrategy.BLOCK device = layer.w13_weight.device # ab_strides1 and c_strides2 are the same self.ab_strides1_c_strides2 = torch.full( @@ -724,6 +780,29 @@ def process_weights_after_loading(self, layer: torch.nn.Module) -> None: device=device, dtype=torch.int64) + if is_deep_gemm_e8m0_used() and self.block_quant: + assert layer.weight_block_size is not None + # Re-quantise the expert weights so their scales are UE8M0. + block_sz = tuple(layer.weight_block_size) + requant_weight_ue8m0_inplace( + layer.w13_weight.data, + layer.w13_weight_scale.data, + block_sz, + ) + requant_weight_ue8m0_inplace( + layer.w2_weight.data, + layer.w2_weight_scale.data, + block_sz, + ) + + # Ensure column-major TMA alignment expected by DeepGEMM. + if expert_weight_is_col_major(layer.w13_weight_scale): + layer.w13_weight_scale = get_col_major_tma_aligned_tensor( + layer.w13_weight_scale) + if expert_weight_is_col_major(layer.w2_weight_scale): + layer.w2_weight_scale = get_col_major_tma_aligned_tensor( + layer.w2_weight_scale) + def maybe_make_prepare_finalize( self) -> Optional[mk.FusedMoEPrepareAndFinalize]: if self.use_marlin or self.rocm_aiter_moe_enabled: @@ -777,9 +856,10 @@ def select_gemm_impl( return experts # triton path - from vllm.model_executor.layers.fused_moe import TritonExperts - from vllm.model_executor.layers.fused_moe.fused_batched_moe import ( - BatchedTritonExperts) + from vllm.model_executor.layers.fused_moe.batched_triton_or_deep_gemm_moe import ( # noqa: E501 + BatchedTritonOrDeepGemmExperts) + from vllm.model_executor.layers.fused_moe.triton_deep_gemm_moe import ( + TritonOrDeepGemmExperts) assert not self.rocm_aiter_moe_enabled and not self.use_marlin @@ -790,14 +870,16 @@ def select_gemm_impl( assert max_num_tokens_per_rank is not None logger.debug("BatchedTritonExperts(%s)", self.__class__.__name__) - return BatchedTritonExperts( + return BatchedTritonOrDeepGemmExperts( max_num_tokens=max_num_tokens_per_rank, num_dispatchers=prepare_finalize.num_dispatchers(), quant_config=self.moe_quant_config, ) else: - logger.debug("TritonExperts(%s)", self.__class__.__name__) - return TritonExperts(self.moe_quant_config) + logger.debug("TritonOrDeepGemmExperts(%s)", + self.__class__.__name__) + return TritonOrDeepGemmExperts(self.moe_quant_config, + allow_deep_gemm=True) def get_fused_moe_quant_config( self, layer: torch.nn.Module) -> Optional[FusedMoEQuantConfig]: @@ -816,6 +898,7 @@ def get_fused_moe_quant_config( a2_scale=layer.w2_input_scale, per_act_token_quant=per_act_token, per_out_ch_quant=per_channel_quant, + block_shape=layer.weight_block_size, ) def apply( diff --git a/vllm/model_executor/layers/quantization/fp8.py b/vllm/model_executor/layers/quantization/fp8.py index aec9c79f1ea8..2b24e052053c 100644 --- a/vllm/model_executor/layers/quantization/fp8.py +++ b/vllm/model_executor/layers/quantization/fp8.py @@ -33,10 +33,10 @@ from vllm.model_executor.layers.quantization.utils.fp8_utils import ( apply_fp8_block_linear, check_aiter_fp8_linear_support, create_fp8_input_scale, create_fp8_scale_parameter, - create_fp8_weight_parameter, get_col_major_tma_aligned_tensor, - maybe_post_process_fp8_weight_block, process_fp8_weight_block_strategy, - process_fp8_weight_tensor_strategy, requant_weight_ue8m0_inplace, - validate_fp8_block_shape) + create_fp8_weight_parameter, expert_weight_is_col_major, + get_col_major_tma_aligned_tensor, maybe_post_process_fp8_weight_block, + process_fp8_weight_block_strategy, process_fp8_weight_tensor_strategy, + requant_weight_ue8m0_inplace, validate_fp8_block_shape) from vllm.model_executor.layers.quantization.utils.marlin_utils_fp8 import ( apply_fp8_marlin_linear, prepare_fp8_layer_for_marlin, prepare_moe_fp8_layer_for_marlin) @@ -64,12 +64,6 @@ logger = init_logger(__name__) -def _is_col_major(x: torch.Tensor) -> bool: - assert x.dim() == 3 - b, m, n = x.shape - return x.stride(0) == m * n and x.stride(1) == 1 and x.stride(2) == m - - class Fp8Config(QuantizationConfig): """Config class for FP8.""" @@ -660,10 +654,10 @@ def process_weights_after_loading(self, layer: Module) -> None: # DeepGemm scales need to be transposed and aligned. We try to do # it ahead of time for performance reasons. if self.allow_deep_gemm and not is_deep_gemm_e8m0_used(): - if _is_col_major(layer.w13_weight_scale_inv): + if expert_weight_is_col_major(layer.w13_weight_scale_inv): layer.w13_weight_scale_inv = \ get_col_major_tma_aligned_tensor(layer.w13_weight_scale_inv) - if _is_col_major(layer.w2_weight_scale_inv): + if expert_weight_is_col_major(layer.w2_weight_scale_inv): layer.w2_weight_scale_inv = \ get_col_major_tma_aligned_tensor(layer.w2_weight_scale_inv) @@ -811,10 +805,10 @@ def process_weights_after_loading(self, layer: Module) -> None: ) # Ensure column-major TMA alignment expected by DeepGEMM. - if _is_col_major(layer.w13_weight_scale_inv): + if expert_weight_is_col_major(layer.w13_weight_scale_inv): layer.w13_weight_scale_inv = get_col_major_tma_aligned_tensor( layer.w13_weight_scale_inv) - if _is_col_major(layer.w2_weight_scale_inv): + if expert_weight_is_col_major(layer.w2_weight_scale_inv): layer.w2_weight_scale_inv = get_col_major_tma_aligned_tensor( layer.w2_weight_scale_inv) diff --git a/vllm/model_executor/layers/quantization/utils/fp8_utils.py b/vllm/model_executor/layers/quantization/utils/fp8_utils.py index fc12483de0c0..d1d87b7ba12e 100644 --- a/vllm/model_executor/layers/quantization/utils/fp8_utils.py +++ b/vllm/model_executor/layers/quantization/utils/fp8_utils.py @@ -1014,3 +1014,9 @@ def apply_fp8_block_linear(layer: torch.nn.Module, input: torch.Tensor, cutlass_block_fp8_supported=cutlass_block_fp8_supported, use_aiter_and_is_supported=use_aiter_and_is_supported, ) + + +def expert_weight_is_col_major(x: torch.Tensor) -> bool: + assert x.dim() == 3 + b, m, n = x.shape + return x.stride(0) == m * n and x.stride(1) == 1 and x.stride(2) == m diff --git a/vllm/model_executor/warmup/deep_gemm_warmup.py b/vllm/model_executor/warmup/deep_gemm_warmup.py index 4d1829cd228c..f6df85a50238 100644 --- a/vllm/model_executor/warmup/deep_gemm_warmup.py +++ b/vllm/model_executor/warmup/deep_gemm_warmup.py @@ -53,9 +53,9 @@ def _extract_data_from_fused_moe_module( """ assert isinstance(m, FusedMoE) w13 = m.w13_weight - w13_s = m.w13_weight_scale_inv + w13_s = getattr(m, "w13_weight_scale_inv", m.w13_weight_scale) w2 = m.w2_weight - w2_s = m.w2_weight_scale_inv + w2_s = getattr(m, "w2_weight_scale_inv", m.w2_weight_scale) num_topk = m.top_k assert isinstance(w13, torch.Tensor) From 2357480b1abefb2b7177046133080917e1449324 Mon Sep 17 00:00:00 2001 From: rivos-shreeasish Date: Tue, 23 Sep 2025 09:14:22 -0700 Subject: [PATCH 278/518] [BugFix] Fix UB in per_token_group_quant.cu (#24913) Signed-off-by: Shreeasish Kumar --- csrc/quantization/fp8/per_token_group_quant.cu | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/csrc/quantization/fp8/per_token_group_quant.cu b/csrc/quantization/fp8/per_token_group_quant.cu index f5b40e35b6e5..91d489fdef86 100644 --- a/csrc/quantization/fp8/per_token_group_quant.cu +++ b/csrc/quantization/fp8/per_token_group_quant.cu @@ -12,8 +12,8 @@ #include "../vectorization_utils.cuh" #include "../../dispatch_utils.h" -__device__ __forceinline__ float GroupReduceMax(float val, const int tid) { - unsigned mask = 0xffff; +__device__ __forceinline__ float GroupReduceMax(float val) { + unsigned mask = threadIdx.x % 32 >= 16 ? 0xffff0000 : 0x0000ffff; val = fmaxf(val, __shfl_xor_sync(mask, val, 8)); val = fmaxf(val, __shfl_xor_sync(mask, val, 4)); @@ -86,7 +86,7 @@ __global__ void per_token_group_quant_8bit_kernel( threads_per_group, // stride in group scalar_op_cache); // scalar handler - local_absmax = GroupReduceMax(local_absmax, lane_id); + local_absmax = GroupReduceMax(local_absmax); float y_s = local_absmax / max_8bit; if constexpr (SCALE_UE8M0) { From 846197f505fb7b7861a9688e2a09b514b9d3133d Mon Sep 17 00:00:00 2001 From: Wentao Ye <44945378+yewentao256@users.noreply.github.com> Date: Tue, 23 Sep 2025 12:44:37 -0400 Subject: [PATCH 279/518] [Log] Optimize kv cache memory log from Bytes to GiB (#25204) Signed-off-by: yewentao256 --- vllm/v1/worker/gpu_worker.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/vllm/v1/worker/gpu_worker.py b/vllm/v1/worker/gpu_worker.py index ffea9bb35513..f59dacf13d85 100644 --- a/vllm/v1/worker/gpu_worker.py +++ b/vllm/v1/worker/gpu_worker.py @@ -383,11 +383,13 @@ def compile_or_warm_up_model(self) -> None: f"for non-torch memory, and {GiB(cuda_graph_memory_bytes)} " f"GiB for CUDAGraph memory. Replace gpu_memory_utilization " f"config with `--kv-cache-memory=" - f"{kv_cache_memory_bytes_to_requested_limit}` to fit into " - f"requested memory, or `--kv-cache-memory=" - f"{kv_cache_memory_bytes_to_gpu_limit}` to fully " + f"{kv_cache_memory_bytes_to_requested_limit}` " + f"({GiB(kv_cache_memory_bytes_to_requested_limit)} GiB) to fit " + f"into requested memory, or `--kv-cache-memory=" + f"{kv_cache_memory_bytes_to_gpu_limit}` " + f"({GiB(kv_cache_memory_bytes_to_gpu_limit)} GiB) to fully " f"utilize gpu memory. Current kv cache memory in use is " - f"{int(self.available_kv_cache_memory_bytes)} bytes.") + f"{GiB(self.available_kv_cache_memory_bytes)} GiB.") logger.debug(msg) From 527821d191e05e0414695fa8aea24ecaeb3d609c Mon Sep 17 00:00:00 2001 From: Ming Yang Date: Tue, 23 Sep 2025 09:45:39 -0700 Subject: [PATCH 280/518] Use macro guard CUDA functions for back compatibility in grouped_topk_kernel.cu (#25346) Signed-off-by: Ming Yang Signed-off-by: Rahul Tuli Co-authored-by: Rahul Tuli Co-authored-by: Claude Co-authored-by: Wentao Ye <44945378+yewentao256@users.noreply.github.com> Co-authored-by: Lu Fang <30275821+houseroad@users.noreply.github.com> Co-authored-by: Ye (Charlotte) Qi --- csrc/moe/grouped_topk_kernels.cu | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/csrc/moe/grouped_topk_kernels.cu b/csrc/moe/grouped_topk_kernels.cu index b5321f748e6b..c93f9d54d780 100644 --- a/csrc/moe/grouped_topk_kernels.cu +++ b/csrc/moe/grouped_topk_kernels.cu @@ -418,6 +418,15 @@ __device__ inline T neg_inf() { return cuda_cast(-cuda::std::numeric_limits::infinity()); } +template +__device__ inline bool is_finite(const T val) { +#if (__CUDACC_VER_MAJOR__ * 10000 + __CUDACC_VER_MINOR__ * 100 >= 120800) + return cuda::std::isfinite(val); +#else + return isfinite(cuda_cast(val)); +#endif +} + template __device__ void topk_with_k2(T* output, T const* input, cg::thread_block_tile<32> const& tile, @@ -533,7 +542,7 @@ __global__ void group_idx_and_topk_idx_kernel( // calculate group_idx int32_t target_num_min = WARP_SIZE - n_group + topk_group; // The check is necessary to avoid abnormal input - if (lane_id < n_group && cuda::std::isfinite(group_scores[lane_id])) { + if (lane_id < n_group && is_finite(group_scores[lane_id])) { value = group_scores[lane_id]; } @@ -568,11 +577,10 @@ __global__ void group_idx_and_topk_idx_kernel( int32_t offset = i_group * num_experts_per_group; for (int32_t i = lane_id; i < align_num_experts_per_group; i += WARP_SIZE) { - T candidates = - (i < num_experts_per_group) && - cuda::std::isfinite(scores_with_bias[offset + i]) - ? scores_with_bias[offset + i] - : neg_inf(); + T candidates = (i < num_experts_per_group) && + is_finite(scores_with_bias[offset + i]) + ? scores_with_bias[offset + i] + : neg_inf(); queue.add(candidates, offset + i); } if (group_scores[i_group] == topk_group_value) { From 100b630a604aff9586bb5493c22b571ad1a19570 Mon Sep 17 00:00:00 2001 From: Burkhard Ringlein Date: Tue, 23 Sep 2025 18:52:40 +0200 Subject: [PATCH 281/518] [V1][Kernel] Add triton implementation for `reshape_and_cache_flash` (#24503) Signed-off-by: Burkhard Ringlein Co-authored-by: Chih-Chieh Yang Co-authored-by: Wentao Ye <44945378+yewentao256@users.noreply.github.com> --- .../benchmark_reshape_and_cache_flash.py | 78 ++++++-- tests/kernels/attention/test_cache.py | 27 ++- .../ops/triton_reshape_and_cache_flash.py | 176 ++++++++++++++++++ vllm/v1/attention/backends/triton_attn.py | 15 +- 4 files changed, 276 insertions(+), 20 deletions(-) create mode 100644 vllm/attention/ops/triton_reshape_and_cache_flash.py diff --git a/benchmarks/kernels/benchmark_reshape_and_cache_flash.py b/benchmarks/kernels/benchmark_reshape_and_cache_flash.py index d4648c18f31d..0aace571064a 100644 --- a/benchmarks/kernels/benchmark_reshape_and_cache_flash.py +++ b/benchmarks/kernels/benchmark_reshape_and_cache_flash.py @@ -9,6 +9,9 @@ from tabulate import tabulate from vllm import _custom_ops as ops +from vllm.attention.ops.triton_reshape_and_cache_flash import ( + triton_reshape_and_cache_flash, +) from vllm.logger import init_logger from vllm.platforms import current_platform from vllm.utils import ( @@ -31,6 +34,8 @@ def run_benchmark( kv_cache_dtype: str, kv_cache_layout: str, num_iters: int, + implementation: str, + benchmark_mode: str, device: str = "cuda", ) -> float: """Return latency (seconds) for given num_tokens.""" @@ -38,6 +43,14 @@ def run_benchmark( if kv_cache_dtype == "fp8" and head_size % 16: raise ValueError("fp8 kv-cache requires head_size to be a multiple of 16.") + if implementation not in ("cuda", "triton"): + raise ValueError( + f"Unsupported implementation: {implementation}. " + "Only 'cuda' and 'triton' are supported." + ) + if implementation == "triton" and kv_cache_layout == "HND": + return float("nan") # Triton does not support HND layout yet. + current_platform.seed_everything(42) torch.set_default_device(device) @@ -65,27 +78,49 @@ def run_benchmark( cache_layout=kv_cache_layout, ) key_cache, value_cache = key_caches[0], value_caches[0] + # to free unused memory + del key_caches, value_caches # compute per-kernel scaling factors for fp8 conversion (if used). k_scale = (key.amax() / 64.0).to(torch.float32) v_scale = (value.amax() / 64.0).to(torch.float32) + if implementation == "cuda": + function_under_test = lambda: ops.reshape_and_cache_flash( + key, # noqa: F821 + value, # noqa: F821 + key_cache, # noqa: F821 + value_cache, # noqa: F821 + slot_mapping, # noqa: F821 + kv_cache_dtype, + k_scale, + v_scale, + ) + else: + function_under_test = lambda: triton_reshape_and_cache_flash( + key, # noqa: F821 + value, # noqa: F821 + key_cache, # noqa: F821 + value_cache, # noqa: F821 + slot_mapping, # noqa: F821 + kv_cache_dtype, + k_scale, + v_scale, + ) + if benchmark_mode == "cudagraph": + g = torch.cuda.CUDAGraph() + with torch.cuda.graph(g): + function_under_test() + torch.cuda.synchronize() + function_under_test = lambda: g.replay() + def run_cuda_benchmark(n_iters: int) -> float: nonlocal key, value, key_cache, value_cache, slot_mapping torch.cuda.synchronize() start = time.perf_counter() for _ in range(n_iters): - ops.reshape_and_cache_flash( - key, - value, - key_cache, - value_cache, - slot_mapping, - kv_cache_dtype, - k_scale, - v_scale, - ) - torch.cuda.synchronize() + function_under_test() + torch.cuda.synchronize() end = time.perf_counter() return (end - start) / n_iters @@ -116,10 +151,16 @@ def main(args): kv_cache_dtype=args.kv_cache_dtype, kv_cache_layout=layout, num_iters=args.iters, + implementation=args.implementation, + benchmark_mode=args.mode, device="cuda", ) rows.append([n_tok, layout, f"{lat * 1e6:.3f}"]) + print( + f"Benchmark results for implementation {args.implementation}" + f" (measuring with {args.mode}):" + ) print(tabulate(rows, headers=["num_tokens", "layout", "latency (µs)"])) @@ -151,6 +192,21 @@ def main(args): ) parser.add_argument("--iters", type=int, default=100) + + parser.add_argument( + "--implementation", + type=str, + choices=["cuda", "triton"], + default="cuda", + ) + + parser.add_argument( + "--mode", + type=str, + choices=["cudagraph", "no_graph"], + default="cudagraph", + ) + args = parser.parse_args() main(args) diff --git a/tests/kernels/attention/test_cache.py b/tests/kernels/attention/test_cache.py index 69e96dfd2cb1..1325e6883132 100644 --- a/tests/kernels/attention/test_cache.py +++ b/tests/kernels/attention/test_cache.py @@ -39,6 +39,8 @@ # We assume fp8 is always enabled for testing. KV_CACHE_DTYPE = ["auto", "fp8"] +RESHAPE_FLASH_IMPLEMENTATIONS = ["cuda", "triton"] + @pytest.mark.parametrize("num_mappings", NUM_MAPPINGS) @pytest.mark.parametrize("num_layers", NUM_LAYERS) @@ -223,6 +225,7 @@ def test_reshape_and_cache( @pytest.mark.parametrize("device", CUDA_DEVICES) @pytest.mark.parametrize("kv_cache_dtype", KV_CACHE_DTYPE) @pytest.mark.parametrize("kv_cache_layout", CACHE_LAYOUTS) +@pytest.mark.parametrize("implementation", RESHAPE_FLASH_IMPLEMENTATIONS) @torch.inference_mode() def test_reshape_and_cache_flash( kv_cache_factory_flashinfer, @@ -236,9 +239,13 @@ def test_reshape_and_cache_flash( device: str, kv_cache_dtype: str, kv_cache_layout: str, + implementation: str, ) -> None: current_platform.seed_everything(seed) torch.set_default_device(device) + assert implementation in ["cuda", "triton"] + if implementation == "triton" and kv_cache_layout == "HND": + pytest.skip("Triton implementation only supports NHD layout.") # fp8 conversion requires continugous memory buffer. Reduce the number of # blocks and tokens to consume less memory. @@ -298,12 +305,20 @@ def permute_and_compact(x): cloned_key_cache = key_cache_compact.clone() cloned_value_cache = value_cache_compact.clone() # Call the reshape_and_cache kernel. - opcheck(torch.ops._C_cache_ops.reshape_and_cache_flash, - (key, value, key_cache, value_cache, slot_mapping, kv_cache_dtype, - k_scale, v_scale), - cond=(head_size == HEAD_SIZES[0])) - ops.reshape_and_cache_flash(key, value, key_cache, value_cache, - slot_mapping, kv_cache_dtype, k_scale, v_scale) + if implementation == "cuda": + opcheck(torch.ops._C_cache_ops.reshape_and_cache_flash, + (key, value, key_cache, value_cache, slot_mapping, + kv_cache_dtype, k_scale, v_scale), + cond=(head_size == HEAD_SIZES[0])) + ops.reshape_and_cache_flash(key, value, key_cache, value_cache, + slot_mapping, kv_cache_dtype, k_scale, + v_scale) + elif implementation == "triton": + from vllm.attention.ops.triton_reshape_and_cache_flash import ( + triton_reshape_and_cache_flash) + triton_reshape_and_cache_flash(key, value, key_cache, value_cache, + slot_mapping, kv_cache_dtype, k_scale, + v_scale) key_cache_compact = permute_and_compact(key_cache) value_cache_compact = permute_and_compact(value_cache) diff --git a/vllm/attention/ops/triton_reshape_and_cache_flash.py b/vllm/attention/ops/triton_reshape_and_cache_flash.py new file mode 100644 index 000000000000..0b0c706626af --- /dev/null +++ b/vllm/attention/ops/triton_reshape_and_cache_flash.py @@ -0,0 +1,176 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project + +import torch +import triton +import triton.language as tl + +from vllm.platforms import current_platform + + +@triton.jit +def reshape_and_cache_kernel_flash( + key_ptr, # [num_tokens, num_heads, head_size] + value_ptr, # [num_tokens, num_heads, head_size] + key_cache_ptr, # [num_blocks, block_size, num_heads, head_size] + value_cache_ptr, # [num_blocks, block_size, num_heads, head_size] + slot_mapping_ptr, # [num_tokens] + k_scale, # float32 + v_scale, # float32 + # strides + key_stride: tl.int64, + value_stride: tl.int64, + block_stride: tl.int64, + page_stride: tl.int64, + num_heads: tl.constexpr, + head_size: tl.constexpr, + block_size: tl.constexpr, + # FP8 flags + FP8_KV_CACHE: tl.constexpr, + # tune parameters + TILE_SIZE: tl.constexpr, +): + + token_idx = tl.program_id(axis=0) + slot_idx = tl.load(slot_mapping_ptr + token_idx).to(tl.int64) + if slot_idx < 0: + # Padding token that should be ignored. + return + + tile_i = tl.program_id(axis=1) + tile_offs = tl.arange(0, TILE_SIZE) + tile_pos = tile_i * TILE_SIZE + tile_offs + + block_idx = slot_idx // block_size + block_offset = slot_idx % block_size + + src_key_idx = token_idx * key_stride + src_value_idx = token_idx * value_stride + + tgt_idx = block_idx * block_stride + block_offset * page_stride + + # [TILE_SIZE] + key_load = tl.load(key_ptr + src_key_idx + tile_pos, + mask=tile_pos < (num_heads * head_size)) + if FP8_KV_CACHE: + if key_load.dtype.is_fp8(): + key_tile = key_load + else: + # tl.store will do the correct implicit cast to fp8, + # based on the key_cache_ptr.dtype.element_ty + key_tile = key_load / tl.load(k_scale) + else: + key_tile = key_load + + # [TILE_SIZE] + value_load = tl.load(value_ptr + src_value_idx + tile_pos, + mask=tile_pos < (num_heads * head_size)) + if FP8_KV_CACHE: + if value_load.dtype.is_fp8(): + value_tile = value_load + else: + # tl.store will do the correct implicit cast to fp8, + # based on the value_cache_ptr.dtype.element_ty + value_tile = value_load / tl.load(v_scale) + else: + value_tile = value_load + + tl.store( + key_cache_ptr + tgt_idx + tile_pos, + key_tile, + mask=tile_pos < (num_heads * head_size), + ) + tl.store( + value_cache_ptr + tgt_idx + tile_pos, + value_tile, + mask=tile_pos < (num_heads * head_size), + ) + return + + +def triton_reshape_and_cache_flash( + key: torch.Tensor, # [num_tokens, num_heads, head_size] + value: torch.Tensor, # [num_tokens, num_heads, head_size] + # [num_blocks, block_size, num_heads, head_size] + key_cache: torch.Tensor, + # [num_blocks, block_size, num_heads, head_size] + value_cache: torch.Tensor, + slot_mapping: torch.Tensor, # [num_tokens] + kv_cache_dtype: str, # "auto", "fp8" + k_scale: torch.Tensor, # float32 + v_scale: torch.Tensor, # float32 +): + num_tokens = key.shape[0] + num_heads = key.shape[1] + head_size = key.shape[2] + block_size = key_cache.shape[1] + n = num_heads * head_size + + key_stride = key.stride()[0] + value_stride = value.stride()[0] + block_stride = key_cache.stride()[0] + page_stride = key_cache.stride()[1] + + head_stride = key_cache.stride()[2] + assert head_stride == head_size, "only continous heads are supported" + + assert kv_cache_dtype == "auto" or kv_cache_dtype.startswith("fp8"), \ + f"unsupported kv_cache_dtype (str), got {kv_cache_dtype}." + kv_cache_torch_dtype = current_platform.fp8_dtype() if \ + kv_cache_dtype.startswith("fp8") else key_cache.dtype + + if key_cache.dtype != kv_cache_torch_dtype and kv_cache_dtype.startswith( + "fp8"): + # to avoid erounous implicit cast in triton kernel (tl.store to uint8) + # (e.g. explicit cast to fp8e4m3fnuz is not supported in triton 3.4) + key_cache = key_cache.view(kv_cache_torch_dtype) + value_cache = value_cache.view(kv_cache_torch_dtype) + assert kv_cache_dtype != torch.uint8, "explicit fp8 cast and store to "\ + "uint8 is not supported by triton reshape_and_cache_flash" + + FP8_KV_CACHE = kv_cache_dtype.startswith("fp8") + assert (not FP8_KV_CACHE) or kv_cache_torch_dtype in [ + torch.float8_e4m3fn, torch.float8_e5m2, torch.uint8, + torch.float8_e4m3fnuz], \ + "unsupported dtype of KV cache tensor, got "\ + "{kv_cache_torch_dtype}. Supported kv cache dtypes: fp8e4m3fn, " \ + "fp8e5m2, uint8, bfloat16, float16, float32, fp8e4m3fnuz." + + # heuristics instead of autotuning + TILE_SIZE = min(2048, triton.next_power_of_2(n)) + if torch.version.hip: + num_stages = 4 + num_warps = 8 + else: # cuda + num_stages = 10 + num_warps = 16 + if torch.cuda.get_device_capability(key.device)[0] < 9: + TILE_SIZE = min(512, TILE_SIZE) + + # TODO(ngl): maybe replace with static launch grid to avoid overhead if + # using cudagraphs + grid = lambda meta: (int(num_tokens), triton.cdiv(n, meta["TILE_SIZE"])) + + reshape_and_cache_kernel_flash[grid]( + key_ptr=key, + value_ptr=value, + key_cache_ptr=key_cache, + value_cache_ptr=value_cache, + slot_mapping_ptr=slot_mapping, + k_scale=k_scale, + v_scale=v_scale, + # strides + key_stride=key_stride, + value_stride=value_stride, + block_stride=block_stride, + page_stride=page_stride, + num_heads=num_heads, + head_size=head_size, + block_size=block_size, + # FP8 flags + FP8_KV_CACHE=FP8_KV_CACHE, + # autotune parameters + TILE_SIZE=TILE_SIZE, + num_warps=num_warps, + num_stages=num_stages, + ) diff --git a/vllm/v1/attention/backends/triton_attn.py b/vllm/v1/attention/backends/triton_attn.py index 722c23f150cd..f9fbd05efc67 100644 --- a/vllm/v1/attention/backends/triton_attn.py +++ b/vllm/v1/attention/backends/triton_attn.py @@ -8,6 +8,8 @@ from vllm.attention.backends.abstract import (AttentionBackend, AttentionImpl, AttentionMetadata, AttentionType) +from vllm.attention.ops.triton_reshape_and_cache_flash import ( + triton_reshape_and_cache_flash) from vllm.attention.ops.triton_unified_attention import unified_attention from vllm.config import VllmConfig from vllm.logger import init_logger @@ -291,7 +293,13 @@ def forward( if self.kv_sharing_target_layer_name is None: # Reshape the input keys and values and store them in the cache. # Skip this if sharing KV cache with an earlier attention layer. - ops.reshape_and_cache_flash( + if self.kv_cache_dtype.startswith("fp8"): + key_cache = key_cache.view(self.fp8_dtype) + value_cache = value_cache.view(self.fp8_dtype) + # triton kernel does not support uint8 kv_cache + # (because some explicit casts (e.g. float8_e4m3fnuz) + # are not supported) + triton_reshape_and_cache_flash( key, value, key_cache, @@ -303,8 +311,9 @@ def forward( ) if self.kv_cache_dtype.startswith("fp8"): - key_cache = key_cache.view(self.fp8_dtype) - value_cache = value_cache.view(self.fp8_dtype) + if key_cache.dtype != self.fp8_dtype: + key_cache = key_cache.view(self.fp8_dtype) + value_cache = value_cache.view(self.fp8_dtype) num_tokens, num_heads, head_size = query.shape assert layer._q_scale_float == 1.0, \ "A non 1.0 q_scale is not currently supported." From 24e822274562790a8e7cd7fde2972be719eb38cb Mon Sep 17 00:00:00 2001 From: Weida Hong Date: Wed, 24 Sep 2025 01:34:58 +0800 Subject: [PATCH 282/518] [Misc] Reduce initialization time of auto_tune (#23682) Signed-off-by: Weida Hong --- benchmarks/auto_tune/auto_tune.sh | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/benchmarks/auto_tune/auto_tune.sh b/benchmarks/auto_tune/auto_tune.sh index ed3679b66f80..b333ba9cd8e9 100644 --- a/benchmarks/auto_tune/auto_tune.sh +++ b/benchmarks/auto_tune/auto_tune.sh @@ -103,10 +103,15 @@ start_server() { VLLM_USE_V1=1 VLLM_SERVER_DEV_MODE=1 \ vllm serve "${common_args_array[@]}" > "$vllm_log" 2>&1 & fi + local server_pid=$! # wait for 10 minutes... server_started=0 for i in {1..60}; do + # This line checks whether the server is still alive or not, + # since that we should always have permission to send signal to the server process. + kill -0 $server_pid 2> /dev/null || break + RESPONSE=$(curl -s -X GET "http://0.0.0.0:8004/health" -w "%{http_code}" -o /dev/stdout) STATUS_CODE=$(echo "$RESPONSE" | tail -n 1) if [[ "$STATUS_CODE" -eq 200 ]]; then @@ -118,7 +123,7 @@ start_server() { done if (( ! server_started )); then - echo "server did not start within 10 minutes. Please check server log at $vllm_log". + echo "server did not start within 10 minutes or crashed. Please check server log at $vllm_log". return 1 else return 0 From 867ecdd1c87ba17cecf8a97b262e1a8117795e94 Mon Sep 17 00:00:00 2001 From: Ekagra Ranjan <3116519+ekagra-ranjan@users.noreply.github.com> Date: Tue, 23 Sep 2025 13:46:40 -0400 Subject: [PATCH 283/518] [Spec Decode][CI] Add e2e test for `examples/spec_decode.py` and prevent breaking Acceptance Length (#24531) Signed-off-by: Ekagra Ranjan <3116519+ekagra-ranjan@users.noreply.github.com> Co-authored-by: Roger Wang --- .buildkite/test-pipeline.yaml | 2 ++ examples/offline_inference/spec_decode.py | 42 ++++++++++++++++++++--- 2 files changed, 40 insertions(+), 4 deletions(-) diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index c4ea4b675649..49316eb4f607 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -329,6 +329,8 @@ steps: - python3 offline_inference/basic/classify.py - python3 offline_inference/basic/embed.py - python3 offline_inference/basic/score.py + - python3 offline_inference/spec_decode.py --test --method eagle --num_spec_tokens 3 --dataset-name hf --dataset-path philschmid/mt-bench --num-prompts 80 --temp 0 --top-p 1.0 --top-k -1 --tp 1 --enable-chunked-prefill --max-model-len 2048 + - python3 offline_inference/spec_decode.py --test --method eagle3 --num_spec_tokens 3 --dataset-name hf --dataset-path philschmid/mt-bench --num-prompts 80 --temp 0 --top-p 1.0 --top-k -1 --tp 1 --enable-chunked-prefill --max-model-len 2048 - label: Platform Tests (CUDA) # 4min timeout_in_minutes: 15 diff --git a/examples/offline_inference/spec_decode.py b/examples/offline_inference/spec_decode.py index 004e75b20464..ce078bce0b75 100644 --- a/examples/offline_inference/spec_decode.py +++ b/examples/offline_inference/spec_decode.py @@ -49,6 +49,7 @@ def get_custom_mm_prompts(num_prompts): def parse_args(): parser = FlexibleArgumentParser() add_dataset_parser(parser) + parser.add_argument("--test", action="store_true") parser.add_argument( "--method", type=str, @@ -60,6 +61,7 @@ def parse_args(): parser.add_argument("--tp", type=int, default=1) parser.add_argument("--enforce-eager", action="store_true") parser.add_argument("--enable-chunked-prefill", action="store_true") + parser.add_argument("--max-model-len", type=int, default=16384) parser.add_argument("--temp", type=float, default=0) parser.add_argument("--top-p", type=float, default=1.0) parser.add_argument("--top-k", type=int, default=-1) @@ -71,8 +73,7 @@ def parse_args(): return parser.parse_args() -def main(): - args = parse_args() +def main(args): args.endpoint_type = "openai-chat" model_dir = args.model_dir @@ -134,7 +135,7 @@ def main(): gpu_memory_utilization=0.8, speculative_config=speculative_config, disable_log_stats=False, - max_model_len=16384, + max_model_len=args.max_model_len, limit_mm_per_prompt={"image": 5}, disable_chunked_mm_input=True, ) @@ -198,6 +199,39 @@ def main(): acceptance_rate = acceptance_counts[i] / num_drafts if num_drafts > 0 else 0 print(f"acceptance at token {i}: {acceptance_rate:.2f}") + return acceptance_length + if __name__ == "__main__": - main() + args = parse_args() + acceptance_length = main(args) + + if args.test: + # takes ~30s to run on 1xH100 + assert args.method in ["eagle", "eagle3"] + assert args.tp == 1 + assert args.num_spec_tokens == 3 + assert args.dataset_name == "hf" + assert args.dataset_path == "philschmid/mt-bench" + assert args.num_prompts == 80 + assert args.temp == 0 + assert args.top_p == 1.0 + assert args.top_k == -1 + assert args.enable_chunked_prefill + + # check acceptance length is within 2% of expected value + rtol = 0.02 + expected_acceptance_length = 2.296 if args.method == "eagle" else 2.811 + + assert ( + acceptance_length <= (1 + rtol) * expected_acceptance_length + and acceptance_length >= (1 - rtol) * expected_acceptance_length + ), ( + f"acceptance_length {acceptance_length} is not " + f"within {rtol * 100}% of {expected_acceptance_length}" + ) + + print( + f"Test passed! Expected AL: " + f"{expected_acceptance_length}, got {acceptance_length}" + ) From 5abb1179012268a587043dbc1e5afccb4e41c3a5 Mon Sep 17 00:00:00 2001 From: Jee Jee Li Date: Wed, 24 Sep 2025 02:19:25 +0800 Subject: [PATCH 284/518] [Core] Ensure LoRA linear respect the base_layer's tp_size and tp_rank (#25487) Signed-off-by: Jee Jee Li --- vllm/lora/layers/base_linear.py | 5 +-- vllm/lora/layers/column_parallel_linear.py | 40 ++++++++-------------- vllm/lora/layers/replicated_linear.py | 1 - vllm/lora/layers/row_parallel_linear.py | 15 +++----- vllm/lora/lora_weights.py | 4 +-- 5 files changed, 24 insertions(+), 41 deletions(-) diff --git a/vllm/lora/layers/base_linear.py b/vllm/lora/layers/base_linear.py index 6cf5815ef12d..ed294b0aedaf 100644 --- a/vllm/lora/layers/base_linear.py +++ b/vllm/lora/layers/base_linear.py @@ -24,11 +24,12 @@ def __init__(self, base_layer: LinearBase): super().__init__() self.base_layer = base_layer self.input_size = self.base_layer.input_size + # Ensure tp_size and tp_rank consistency with the base_layer. + self.tp_size = self.base_layer.tp_size + self.tp_rank = self.base_layer.tp_rank self.device = _get_lora_device(self.base_layer) self.lora_bias_stacked: Optional[tuple[torch.Tensor, ...]] = None - self.output_slices: tuple[int, ...] - self.tp_size: int self.output_size: int self.n_slices: int diff --git a/vllm/lora/layers/column_parallel_linear.py b/vllm/lora/layers/column_parallel_linear.py index fa4eb272a69f..6284576446c8 100644 --- a/vllm/lora/layers/column_parallel_linear.py +++ b/vllm/lora/layers/column_parallel_linear.py @@ -8,9 +8,7 @@ from transformers import PretrainedConfig from vllm.config.lora import LoRAConfig -from vllm.distributed import (get_tensor_model_parallel_rank, - get_tensor_model_parallel_world_size, - tensor_model_parallel_all_gather) +from vllm.distributed import tensor_model_parallel_all_gather from vllm.distributed.utils import divide from vllm.model_executor.layers.linear import (ColumnParallelLinear, MergedColumnParallelLinear, @@ -85,7 +83,6 @@ def __init__(self, base_layer: ColumnParallelLinear) -> None: # inconsistent when TP is greater than 1. self.is_merged_col_linear = type( base_layer) is MergedColumnParallelLinear - self.tp_size = get_tensor_model_parallel_world_size() self.output_size = self.base_layer.output_size_per_partition # There is only one LoRA layer self.n_slices = 1 @@ -97,22 +94,20 @@ def slice_lora_b(self, lora_b: torch.Tensor) -> torch.Tensor: # Applicable to cases where the base_layer is # MergedColumnParallelLinear. if self.is_merged_col_linear: - tp_rank = get_tensor_model_parallel_rank() shard_size = self.output_size // 2 offset = lora_b.shape[0] // 2 - left_weight = lora_b[tp_rank * shard_size:(tp_rank + 1) * + left_weight = lora_b[self.tp_rank * shard_size:(self.tp_rank + 1) * shard_size, :] - right_weight = lora_b[offset + tp_rank * shard_size:offset + - (tp_rank + 1) * shard_size, :] + right_weight = lora_b[offset + self.tp_rank * shard_size:offset + + (self.tp_rank + 1) * shard_size, :] lora_b = torch.cat([left_weight, right_weight], dim=0) # Applicable to cases where the base_layer is # ColumnParallelLinear. else: - tensor_model_parallel_rank = get_tensor_model_parallel_rank() shard_size = self.output_size - start_idx = tensor_model_parallel_rank * shard_size - end_idx = (tensor_model_parallel_rank + 1) * shard_size + start_idx = self.tp_rank * shard_size + end_idx = (self.tp_rank + 1) * shard_size lora_b = lora_b[start_idx:end_idx, :] return lora_b @@ -120,10 +115,9 @@ def slice_bias(self, bias: torch.Tensor) -> torch.Tensor: # TODO: Fix the slicing logic of bias. if bias is None: return bias - tensor_model_parallel_rank = get_tensor_model_parallel_rank() shard_size = self.output_size - start_idx = tensor_model_parallel_rank * shard_size - end_idx = (tensor_model_parallel_rank + 1) * shard_size + start_idx = self.tp_rank * shard_size + end_idx = (self.tp_rank + 1) * shard_size bias = bias[start_idx:end_idx] return bias @@ -144,7 +138,7 @@ def forward( # Matrix multiply. output_parallel = self.apply(input_, bias) - if self.base_layer.gather_output: + if self.base_layer.gather_output and self.tp_size > 1: # All-gather across the partitions. output = tensor_model_parallel_all_gather(output_parallel) else: @@ -185,8 +179,6 @@ def __init__( QKVParallelLinear]) -> None: super().__init__(base_layer) # There are two LoRA layers - self.tp_size = get_tensor_model_parallel_world_size() - self.tp_rank = get_tensor_model_parallel_rank() # the output_sizes in MergedColumnParallelLinear is not sharded by tp # we need to divide it by the tp_size to get correct slices size output_sizes = self.base_layer.output_sizes @@ -341,9 +333,9 @@ def __init__(self, base_layer: QKVParallelLinear) -> None: self.n_slices = 1 def slice_lora_b(self, lora_b: torch.Tensor) -> torch.Tensor: - tp_rank = get_tensor_model_parallel_rank() - self.q_shard_id = tp_rank - self.kv_shard_id = tp_rank // self.base_layer.num_kv_head_replicas + + self.q_shard_id = self.tp_rank + self.kv_shard_id = self.tp_rank // self.base_layer.num_kv_head_replicas lora_b_q = lora_b[self.q_proj_shard_size * self.q_shard_id:self.q_proj_shard_size * (self.q_shard_id + 1), :] @@ -397,8 +389,6 @@ def __init__(self, base_layer: QKVParallelLinear) -> None: super().__init__(base_layer) # There are three LoRA layer. self.n_slices = len(self.base_layer.output_sizes) - self.tp_size = get_tensor_model_parallel_world_size() - self.tp_rank = get_tensor_model_parallel_rank() self.q_proj_shard_size = (self.base_layer.num_heads * self.base_layer.head_size) @@ -461,9 +451,8 @@ class ColumnParallelLinearWithShardedLoRA(ColumnParallelLinearWithLoRA): # Therefore, the sharding of `lora_a` only needs to correspond with the # gather operation. def slice_lora_a(self, lora_a: torch.Tensor) -> torch.Tensor: - tp_rank = get_tensor_model_parallel_rank() shard_size = self.lora_a_stacked[0].shape[2] - start_idx = tp_rank * shard_size + start_idx = self.tp_rank * shard_size lora_a = lora_a[start_idx:start_idx + shard_size, :] return lora_a @@ -547,9 +536,8 @@ class QKVParallelLinearWithShardedLoRA(QKVParallelLinearWithLoRA): """ def slice_lora_a(self, lora_a: torch.Tensor) -> torch.Tensor: - tp_rank = get_tensor_model_parallel_rank() shard_size = self.lora_a_stacked[0].shape[2] - start_idx = tp_rank * shard_size + start_idx = self.tp_rank * shard_size lora_a = lora_a[start_idx:start_idx + shard_size, :] return lora_a diff --git a/vllm/lora/layers/replicated_linear.py b/vllm/lora/layers/replicated_linear.py index 3356297c1537..18a8f13ed942 100644 --- a/vllm/lora/layers/replicated_linear.py +++ b/vllm/lora/layers/replicated_linear.py @@ -18,7 +18,6 @@ class ReplicatedLinearWithLoRA(BaseLinearLayerWithLoRA): def __init__(self, base_layer: ReplicatedLinear) -> None: super().__init__(base_layer, ) # To ensure interface compatibility, set to 1 always. - self.tp_size = 1 self.output_size = self.base_layer.output_size self.n_slices = 1 diff --git a/vllm/lora/layers/row_parallel_linear.py b/vllm/lora/layers/row_parallel_linear.py index cac2c92136dc..d468655e629a 100644 --- a/vllm/lora/layers/row_parallel_linear.py +++ b/vllm/lora/layers/row_parallel_linear.py @@ -8,9 +8,7 @@ from transformers import PretrainedConfig from vllm.config.lora import LoRAConfig -from vllm.distributed import (get_tensor_model_parallel_rank, - get_tensor_model_parallel_world_size, - split_tensor_along_last_dim, +from vllm.distributed import (split_tensor_along_last_dim, tensor_model_parallel_all_reduce) # yapf: disable from vllm.model_executor.layers.linear import RowParallelLinear @@ -25,12 +23,9 @@ class RowParallelLinearWithLoRA(BaseLinearLayerWithLoRA): def __init__(self, base_layer: RowParallelLinear) -> None: super().__init__(base_layer) - self.tp_size = get_tensor_model_parallel_world_size() # reset input_size self.input_size = self.base_layer.input_size_per_partition self.output_size = self.base_layer.output_size - - self.tp_rank = get_tensor_model_parallel_rank() # There is only one LoRA layer. self.n_slices = 1 @@ -68,12 +63,12 @@ def forward( else: # TODO: simplify code below splitted_input = split_tensor_along_last_dim( - input_, num_partitions=self.base_layer.tp_size) + input_, num_partitions=self.tp_size) input_parallel = splitted_input[self.tp_rank].contiguous() # Matrix multiply. output_parallel = self.apply(input_parallel) - if self.base_layer.reduce_results and self.base_layer.tp_size > 1: + if self.base_layer.reduce_results and self.tp_size > 1: output_ = tensor_model_parallel_all_reduce(output_parallel) else: output_ = output_parallel @@ -154,8 +149,8 @@ def apply(self, buffer, x, self.lora_a_stacked, 1.0) if not current_platform.can_update_inplace(): buffer = shrunk_buffer - - buffer = tensor_model_parallel_all_reduce(buffer) + if self.tp_size>1: + buffer = tensor_model_parallel_all_reduce(buffer) # following S-LoRA, allows the fusing of all_gather and all_reduce # by adding the column partitioned lora output to a slice of output diff --git a/vllm/lora/lora_weights.py b/vllm/lora/lora_weights.py index e3198fb3d3ae..90e18217d28b 100644 --- a/vllm/lora/lora_weights.py +++ b/vllm/lora/lora_weights.py @@ -48,11 +48,11 @@ def optimize(self) -> "LoRALayerWeights": @property def input_dim(self) -> int: - return self.lora_a.shape[0] + return self.lora_a.shape[1] @property def output_dim(self) -> int: - return self.lora_b.shape[1] + return self.lora_b.shape[0] @property def is_packed(self) -> bool: From a3a7828010f4ab8e608fa257d09ab31846e83a9a Mon Sep 17 00:00:00 2001 From: Hashem Hashemi <159079214+amd-hhashemi@users.noreply.github.com> Date: Tue, 23 Sep 2025 11:31:45 -0700 Subject: [PATCH 285/518] [ROCm] Add skinny gemm bias support for dtypes fp16,bf16,fp8 (#24988) Signed-off-by: Hashem Hashemi Signed-off-by: Hashem Hashemi <159079214+amd-hhashemi@users.noreply.github.com> --- csrc/rocm/ops.h | 9 +- csrc/rocm/skinny_gemms.cu | 181 ++++++++++++++---- csrc/rocm/torch_bindings.cpp | 5 +- .../quantization/test_rocm_skinny_gemms.py | 80 ++++++-- vllm/_custom_ops.py | 23 ++- .../layers/quantization/utils/w8a8_utils.py | 8 +- vllm/model_executor/layers/utils.py | 6 +- 7 files changed, 233 insertions(+), 79 deletions(-) diff --git a/csrc/rocm/ops.h b/csrc/rocm/ops.h index b6ee2656746c..edf7aff1abaa 100644 --- a/csrc/rocm/ops.h +++ b/csrc/rocm/ops.h @@ -5,11 +5,14 @@ torch::Tensor LLMM1(at::Tensor& in_a, at::Tensor& in_b, const int64_t rows_per_block); -torch::Tensor wvSplitK(at::Tensor& in_a, at::Tensor& in_b, +torch::Tensor wvSplitK(const at::Tensor& in_a, const at::Tensor& in_b, + const c10::optional& in_bias, const int64_t CuCount); -void wvSplitKQ(at::Tensor& in_a, at::Tensor& in_b, at::Tensor& out_c, - at::Tensor& scale_a, at::Tensor& scale_b, const int64_t CuCount); +void wvSplitKQ(const at::Tensor& in_a, const at::Tensor& in_b, + const c10::optional& in_bias, at::Tensor& out_c, + const at::Tensor& scale_a, const at::Tensor& scale_b, + const int64_t CuCount); void paged_attention( torch::Tensor& out, torch::Tensor& exp_sums, torch::Tensor& max_logits, diff --git a/csrc/rocm/skinny_gemms.cu b/csrc/rocm/skinny_gemms.cu index eb47139208c9..52119d52f6d1 100644 --- a/csrc/rocm/skinny_gemms.cu +++ b/csrc/rocm/skinny_gemms.cu @@ -292,8 +292,9 @@ torch::Tensor LLMM1(at::Tensor& in_a, at::Tensor& in_b, template __global__ void __launch_bounds__(WvPrGrp* THRDS) - wvSplitK_hf_sml_(const int K, const int M, const scalar_t* B, - const scalar_t* __restrict__ A, scalar_t* C, + wvSplitK_hf_sml_(const int K, const int M, const int Bx, const int By, + const scalar_t* B, const scalar_t* __restrict__ A, + const scalar_t* __restrict__ BIAS, scalar_t* C, const int _WvPrGrp, const int CuCount) { constexpr int max_lds_len = LDS_SIZE / 2; #if defined(__HIP__MI3XX__) @@ -484,7 +485,14 @@ __global__ void __launch_bounds__(WvPrGrp* THRDS) if (threadIdx.x == 63) { for (int n = 0; n < N; n++) { for (int i = 0; i < YTILE; i++) { - // if (commitColumn[i]) C[m + i + n * M] = __float2half(sum[n][i]); + if constexpr (std::is_same_v) { + if (BIAS) + sum[n][i] += __half2float(BIAS[(m + i) % Bx + (n % By) * M]); + } else if constexpr (std::is_same_v) { + if (BIAS) + sum[n][i] += + __bfloat162float(BIAS[(m + i) % Bx + (n % By) * M]); + } C[m + i + n * M] = __float2s(sum[n][i]); } } @@ -529,7 +537,9 @@ __global__ void __launch_bounds__(WvPrGrp* THRDS) if (threadIdx.x == 63) { for (int n = 0; n < N; n++) { for (int i = 0; i < YTILE; i++) { - // if (commitColumn[i]) C[n + i + m * N] = __float2half(sum[n][i]); + if (BIAS) + sum4[n][i][0] += + __bfloat162float(BIAS[(m + i) % Bx + (n % By) * M]); C[m + i + n * M] = __float2bfloat16(sum4[n][i][0]); } } @@ -541,8 +551,10 @@ __global__ void __launch_bounds__(WvPrGrp* THRDS) #else // !defined(__HIP__GFX9__) TODO: Add NAVI support template -__global__ void wvSplitK_hf_sml_(const int K, const int M, const scalar_t* B, - const scalar_t* __restrict__ A, scalar_t* C, +__global__ void wvSplitK_hf_sml_(const int K, const int M, const int Bx, + const int By, const scalar_t* B, + const scalar_t* __restrict__ A, + const scalar_t* __restrict__ BIAS, scalar_t* C, const int _WvPrGrp, const int CuCount) { UNREACHABLE_CODE } @@ -553,8 +565,9 @@ __global__ void wvSplitK_hf_sml_(const int K, const int M, const scalar_t* B, template __global__ void __launch_bounds__(WvPrGrp* THRDS) - wvSplitK_hf_(const int K, const int M, const scalar_t* B, - const scalar_t* __restrict__ A, scalar_t* C, + wvSplitK_hf_(const int K, const int M, const int Bx, const int By, + const scalar_t* B, const scalar_t* __restrict__ A, + const scalar_t* __restrict__ BIAS, scalar_t* C, const int _WvPrGrp, const int CuCount) { constexpr int max_lds_len = LDS_SIZE / 2; #if defined(__HIP__MI3XX__) @@ -772,8 +785,17 @@ __global__ void __launch_bounds__(WvPrGrp* THRDS) if (threadIdx.x == 63) { for (int n = 0; n < N; n++) { for (int i = 0; i < YTILE; i++) { - if (commitColumn[i]) + if (commitColumn[i]) { + if constexpr (std::is_same_v) { + if (BIAS) + sum[n][i] += __half2float(BIAS[(m + i) % Bx + (n % By) * M]); + } else if constexpr (std::is_same_v) { + if (BIAS) + sum[n][i] += + __bfloat162float(BIAS[(m + i) % Bx + (n % By) * M]); + } C[m + i + n * M] = __float2s(sum[n][i]); + } } } } @@ -818,8 +840,12 @@ __global__ void __launch_bounds__(WvPrGrp* THRDS) if (threadIdx.x == 63) { for (int n = 0; n < N; n++) { for (int i = 0; i < YTILE; i++) { - // if (commitColumn[i]) C[n + i + m * N] = __float2half(sum[n][i]); - C[m + i + n * M] = __float2bfloat16(sum4[n][i][0]); + if (commitColumn[i]) { + if (BIAS) + sum4[n][i][0] += + __bfloat162float(BIAS[(m + i) % Bx + (n % By) * M]); + C[m + i + n * M] = __float2bfloat16(sum4[n][i][0]); + } } } } @@ -842,8 +868,10 @@ __global__ void __launch_bounds__(WvPrGrp* THRDS) #else // !defined(__HIP__GFX9__) TODO: Add NAVI support template -__global__ void wvSplitK_hf_(const int K, const int M, const scalar_t* B, - const scalar_t* __restrict__ A, scalar_t* C, +__global__ void wvSplitK_hf_(const int K, const int M, const int Bx, + const int By, const scalar_t* B, + const scalar_t* __restrict__ A, + const scalar_t* __restrict__ BIAS, scalar_t* C, const int _WvPrGrp, const int CuCount) { UNREACHABLE_CODE } @@ -854,8 +882,9 @@ __global__ void wvSplitK_hf_(const int K, const int M, const scalar_t* B, template __global__ void __launch_bounds__(WvPrGrp* THRDS) - wvSplitK_hf_big_(const int K, const int M, const scalar_t* B, - const scalar_t* __restrict__ A, scalar_t* C, + wvSplitK_hf_big_(const int K, const int M, const int Bx, const int By, + const scalar_t* B, const scalar_t* __restrict__ A, + const scalar_t* __restrict__ BIAS, scalar_t* C, const int _WvPrGrp, const int CuCount) { constexpr int max_lds_len = LDS_SIZE / 2; #if defined(__HIP__MI3XX__) @@ -1124,8 +1153,17 @@ __global__ void __launch_bounds__(WvPrGrp* THRDS) if (threadIdx.x == 63) { for (int n = 0; n < N; n++) { for (int i = 0; i < YTILE; i++) { - if (commitColumn[i]) + if (commitColumn[i]) { + if constexpr (std::is_same_v) { + if (BIAS) + sum[n][i] += __half2float(BIAS[(m + i) % Bx + (n % By) * M]); + } else if constexpr (std::is_same_v) { + if (BIAS) + sum[n][i] += + __bfloat162float(BIAS[(m + i) % Bx + (n % By) * M]); + } C[m + i + n * M] = __float2s(sum[n][i]); + } } } } @@ -1166,8 +1204,12 @@ __global__ void __launch_bounds__(WvPrGrp* THRDS) if (threadIdx.x == 63) { for (int n = 0; n < N; n++) { for (int i = 0; i < YTILE; i++) { - // if (commitColumn[i]) C[n + i + m * N] = __float2half(sum[n][i]); - C[m + i + n * M] = __float2bfloat16(sum4[n][i][0]); + if (commitColumn[i]) { + if (BIAS) + sum4[n][i][0] += + __bfloat162float(BIAS[(m + i) % Bx + (n % By) * M]); + C[m + i + n * M] = __float2bfloat16(sum4[n][i][0]); + } } } } @@ -1190,8 +1232,10 @@ __global__ void __launch_bounds__(WvPrGrp* THRDS) #else // !defined(__HIP__GFX9__) TODO: Add NAVI support template -__global__ void wvSplitK_hf_big_(const int K, const int M, const scalar_t* B, - const scalar_t* __restrict__ A, scalar_t* C, +__global__ void wvSplitK_hf_big_(const int K, const int M, const int Bx, + const int By, const scalar_t* B, + const scalar_t* __restrict__ A, + const scalar_t* __restrict__ BIAS, scalar_t* C, const int _WvPrGrp, const int CuCount) { UNREACHABLE_CODE } @@ -1226,11 +1270,20 @@ int mindiv(int N, int div1, int div2) { return rtn; } -torch::Tensor wvSplitK(at::Tensor& in_a, at::Tensor& in_b, +torch::Tensor wvSplitK(const at::Tensor& in_a, const at::Tensor& in_b, + const c10::optional& in_bias, const int64_t CuCount) { auto M_in = in_a.size(0); auto K_in = in_a.size(1); auto N_in = in_b.size(0); + auto Bx_in = + (in_bias.has_value() && in_bias->numel() > 0) + ? (in_bias->sizes().size() == 2) ? in_bias->size(1) : in_bias->size(0) + : 1; + auto By_in = (in_bias.has_value() && in_bias->numel() > 0 && + in_bias->sizes().size() == 2) + ? in_bias->size(0) + : 1; TORCH_CHECK(in_a.dtype() == in_b.dtype()); TORCH_CHECK(K_in % 8 == 0, "k % 8 == 0"); @@ -1254,18 +1307,18 @@ torch::Tensor wvSplitK(at::Tensor& in_a, at::Tensor& in_b, if ((K_in * N_in <= max_lds_len) && (M_in % _YTILEs == 0)) { \ int __wvPrGrp = mindiv(M_in, CuCount * _YTILEs, _WvPrGrp); \ wvSplitK_hf_sml_ \ - <<>>(K_in, M_in, af4, bf4, c, __wvPrGrp, \ - CuCount); \ + <<>>(K_in, M_in, Bx_in, By_in, af4, bf4, \ + biasf4, c, __wvPrGrp, CuCount); \ } else if (K_in * N_in <= max_lds_len * 1.2) { \ int __wvPrGrp = mindiv(M_in, CuCount * _YTILEm, _WvPrGrp); \ wvSplitK_hf_ \ - <<>>(K_in, M_in, af4, bf4, c, __wvPrGrp, \ - CuCount); \ + <<>>(K_in, M_in, Bx_in, By_in, af4, bf4, \ + biasf4, c, __wvPrGrp, CuCount); \ } else { \ int __wvPrGrp = mindiv(M_in, CuCount * _YTILEb, _WvPrGrp); \ wvSplitK_hf_big_ \ - <<>>(K_in, M_in, af4, bf4, c, __wvPrGrp, \ - CuCount); \ + <<>>(K_in, M_in, Bx_in, By_in, af4, bf4, \ + biasf4, c, __wvPrGrp, CuCount); \ } \ } @@ -1273,6 +1326,10 @@ torch::Tensor wvSplitK(at::Tensor& in_a, at::Tensor& in_b, using fptype = typename scalar::type; fptype* af4 = reinterpret_cast(in_a.data_ptr()); const fptype* bf4 = reinterpret_cast(in_b.data_ptr()); + const fptype* biasf4 = + (in_bias.has_value() && in_bias->numel() > 0) + ? reinterpret_cast(in_bias->data_ptr()) + : nullptr; fptype* c = reinterpret_cast(out_c.data_ptr()); switch (N_in) { case 1: @@ -1300,8 +1357,9 @@ torch::Tensor wvSplitK(at::Tensor& in_a, at::Tensor& in_b, template __global__ void __launch_bounds__(WvPrGrp* THRDS) - wvSplitKQ_hf_sml_(const int K, const int Kp, const int M, const fp8_t* B, - const fp8_t* __restrict__ A, scalar_t* C, + wvSplitKQ_hf_sml_(const int K, const int Kp, const int M, const int Bx, + const int By, const fp8_t* B, const fp8_t* __restrict__ A, + const scalar_t* __restrict__ BIAS, scalar_t* C, const float* __restrict__ s_A, const float* __restrict__ s_B, const int _WvPrGrp, const int CuCount) { @@ -1453,7 +1511,17 @@ __global__ void __launch_bounds__(WvPrGrp* THRDS) if (threadIdx.x == 0) { for (int n = 0; n < N; n++) { for (int y = 0; y < YTILE; y++) { - C[m + y + n * M] = __float2s(sum[n][y][0] * sA * sB); + if (y + m >= M) break; // To avoid mem access fault. + sum[n][y][0] *= sA * sB; + if constexpr (std::is_same_v) { + if (BIAS) + sum[n][y][0] += __half2float(BIAS[(m + y) % Bx + (n % By) * M]); + } else if constexpr (std::is_same_v) { + if (BIAS) + sum[n][y][0] += + __bfloat162float(BIAS[(m + y) % Bx + (n % By) * M]); + } + C[m + y + n * M] = __float2s(sum[n][y][0]); // * sA * sB); } } } @@ -1465,7 +1533,9 @@ __global__ void __launch_bounds__(WvPrGrp* THRDS) template __global__ void wvSplitKQ_hf_sml_(const int K, const int Kp, const int M, - const fp8_t* B, const fp8_t* __restrict__ A, + const int Bx, const int By, const fp8_t* B, + const fp8_t* __restrict__ A, + const scalar_t* __restrict__ BIAS, scalar_t* C, const float* __restrict__ s_A, const float* __restrict__ s_B, const int _WvPrGrp, const int CuCount) { @@ -1477,8 +1547,9 @@ __global__ void wvSplitKQ_hf_sml_(const int K, const int Kp, const int M, template __global__ void __launch_bounds__(WvPrGrp* THRDS) - wvSplitKQ_hf_(const int K, const int Kp, const int M, const fp8_t* B, - const fp8_t* __restrict__ A, scalar_t* C, + wvSplitKQ_hf_(const int K, const int Kp, const int M, const int Bx, + const int By, const fp8_t* B, const fp8_t* __restrict__ A, + const scalar_t* __restrict__ BIAS, scalar_t* C, const float* __restrict__ s_A, const float* __restrict__ s_B, const int _WvPrGrp, const int CuCount) { constexpr int max_lds_len = LDS_SIZE; @@ -1626,7 +1697,16 @@ __global__ void __launch_bounds__(WvPrGrp* THRDS) for (int n = 0; n < N; n++) { for (int y = 0; y < YTILE; y++) { if (y + m >= M) break; // To avoid mem access fault. - C[m + y + n * M] = __float2s(sum[n][y][0] * sA * sB); + sum[n][y][0] *= sA * sB; + if constexpr (std::is_same_v) { + if (BIAS) + sum[n][y][0] += __half2float(BIAS[(m + y) % Bx + (n % By) * M]); + } else if constexpr (std::is_same_v) { + if (BIAS) + sum[n][y][0] += + __bfloat162float(BIAS[(m + y) % Bx + (n % By) * M]); + } + C[m + y + n * M] = __float2s(sum[n][y][0]); } } } @@ -1638,16 +1718,19 @@ __global__ void __launch_bounds__(WvPrGrp* THRDS) template __global__ void wvSplitKQ_hf_(const int K, const int Kp, const int M, - const fp8_t* B, const fp8_t* __restrict__ A, - scalar_t* C, const float* __restrict__ s_A, + const int Bx, const int By, const fp8_t* B, + const fp8_t* __restrict__ A, + const scalar_t* __restrict__ BIAS, scalar_t* C, + const float* __restrict__ s_A, const float* __restrict__ s_B, const int _WvPrGrp, const int CuCount) { UNREACHABLE_CODE } #endif // defined(__HIP__MI3XX__) TODO: Add NAVI support -void wvSplitKQ(at::Tensor& in_a, at::Tensor& in_b, at::Tensor& out_c, - at::Tensor& scale_a, at::Tensor& scale_b, +void wvSplitKQ(const at::Tensor& in_a, const at::Tensor& in_b, + const c10::optional& in_bias, at::Tensor& out_c, + const at::Tensor& scale_a, const at::Tensor& scale_b, const int64_t CuCount) { static c10::ScalarType kFp8Type = is_fp8_ocp() ? c10::ScalarType::Float8_e4m3fn @@ -1656,6 +1739,15 @@ void wvSplitKQ(at::Tensor& in_a, at::Tensor& in_b, at::Tensor& out_c, auto K_in = in_a.size(1); auto N_in = in_b.size(0); auto Kp_in = in_a.stride(0); + auto Bx_in = + (in_bias.has_value() && in_bias->numel() > 0) + ? (in_bias->sizes().size() == 2) ? in_bias->size(1) : in_bias->size(0) + : 1; + auto By_in = (in_bias.has_value() && in_bias->numel() > 0 && + in_bias->sizes().size() == 2) + ? in_bias->size(0) + : 1; + TORCH_CHECK(K_in % 16 == 0, "k % 16 == 0"); TORCH_CHECK(in_a.dtype() == in_b.dtype() && in_a.dtype() == kFp8Type); TORCH_CHECK(out_c.dtype() == torch::kFloat16 || @@ -1673,13 +1765,15 @@ void wvSplitKQ(at::Tensor& in_a, at::Tensor& in_b, at::Tensor& out_c, if ((K_in * N_in <= max_lds_len) && (M_in % _YTILEs == 0)) { \ int __wvPrGrp = mindiv(M_in, CuCount * _YTILEs, _WvPrGrp); \ wvSplitKQ_hf_sml_ \ - <<>>(K_in, Kp_in, M_in, a_ptr, b_ptr, c_ptr, \ - s_a, s_b, __wvPrGrp, CuCount); \ + <<>>(K_in, Kp_in, M_in, Bx_in, By_in, a_ptr, \ + b_ptr, bias_ptr, c_ptr, s_a, s_b, \ + __wvPrGrp, CuCount); \ } else { \ int __wvPrGrp = mindiv(M_in, CuCount * _YTILEm, _WvPrGrp); \ wvSplitKQ_hf_ \ - <<>>(K_in, Kp_in, M_in, a_ptr, b_ptr, c_ptr, \ - s_a, s_b, __wvPrGrp, CuCount); \ + <<>>(K_in, Kp_in, M_in, Bx_in, By_in, a_ptr, \ + b_ptr, bias_ptr, c_ptr, s_a, s_b, \ + __wvPrGrp, CuCount); \ } \ } @@ -1691,6 +1785,9 @@ void wvSplitKQ(at::Tensor& in_a, at::Tensor& in_b, at::Tensor& out_c, VLLM_DISPATCH_FP8_TYPES(in_a.scalar_type(), "wvSplitKQ", [&] { auto a_ptr = in_a.data_ptr(); auto b_ptr = in_b.data_ptr(); + auto bias_ptr = (in_bias.has_value() && in_bias->numel() > 0) + ? reinterpret_cast(in_bias->data_ptr()) + : nullptr; switch (N_in) { case 1: WVSPLITKQ(16, 2, 2, 2, 2, 2, 2, 1) diff --git a/csrc/rocm/torch_bindings.cpp b/csrc/rocm/torch_bindings.cpp index c0c4daef64f0..518486b1ca5d 100644 --- a/csrc/rocm/torch_bindings.cpp +++ b/csrc/rocm/torch_bindings.cpp @@ -22,13 +22,14 @@ TORCH_LIBRARY_EXPAND(TORCH_EXTENSION_NAME, rocm_ops) { // Custom gemm op for skinny matrix-matrix multiplication rocm_ops.def( - "wvSplitK(Tensor in_a, Tensor in_b, int CuCount) -> " + "wvSplitK(Tensor in_a, Tensor in_b, Tensor? in_bias, int CuCount) -> " "Tensor"); rocm_ops.impl("wvSplitK", torch::kCUDA, &wvSplitK); // wvSplitK for fp8 rocm_ops.def( - "wvSplitKQ(Tensor in_a, Tensor in_b, Tensor! out_c, Tensor scale_a, " + "wvSplitKQ(Tensor in_a, Tensor in_b, Tensor? in_bias, Tensor! out_c, " + "Tensor scale_a, " " Tensor scale_b, int CuCount) -> ()"); rocm_ops.impl("wvSplitKQ", torch::kCUDA, &wvSplitKQ); diff --git a/tests/kernels/quantization/test_rocm_skinny_gemms.py b/tests/kernels/quantization/test_rocm_skinny_gemms.py index a9b1c71ef071..6de5fc9c5601 100644 --- a/tests/kernels/quantization/test_rocm_skinny_gemms.py +++ b/tests/kernels/quantization/test_rocm_skinny_gemms.py @@ -1,12 +1,12 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project +import math + import pytest import torch import vllm._custom_ops as ops from tests.kernels.quant_utils import ref_dynamic_per_tensor_fp8_quant -from vllm.model_executor.layers.quantization.utils.w8a8_utils import ( - rocm_per_tensor_w8a8_scaled_mm_impl) from vllm.platforms import current_platform DTYPES = [torch.bfloat16, torch.float16] @@ -49,6 +49,7 @@ (2, 512, 512), (3, 2048, 2048), (4, 4096, 4096), + (4, 16400, 2048), # Extended FP8 dimensions not covered by WVSPLITK (1, 14336, 1024), (2, 24576, 2048), @@ -67,6 +68,9 @@ @torch.inference_mode() def test_rocm_llmm1_kernel(n, k, m, dtype, rows_per_block, seed): torch.manual_seed(seed) + #TODO: Zero-centering the inputs causes errors for LLMM1! + # Without that the numbers quickly saturate, and may + # be giving false matches. A = torch.rand(n, k, dtype=dtype, device="cuda") B = torch.rand(m, k, dtype=dtype, device="cuda") @@ -85,11 +89,51 @@ def test_rocm_wvsplitk_kernel(n, k, m, dtype, seed): torch.manual_seed(seed) cu_count = current_platform.get_cu_count() - A = torch.rand(n, k, dtype=dtype, device="cuda") - B = torch.rand(m, k, dtype=dtype, device="cuda") + A = torch.rand(n, k, dtype=dtype, device="cuda") - .5 + B = torch.rand(m, k, dtype=dtype, device="cuda") - .5 - ref_out = torch.matmul(A, B.t()) - out = ops.wvSplitK(B, A, cu_count) + ref_out = torch.nn.functional.linear(A, B) + out = ops.wvSplitK(B, A.view(-1, A.size(-1)), cu_count) + + assert torch.allclose(out, ref_out, rtol=0.01) + + +@pytest.mark.parametrize("n,k,m", NKM_FACTORS_WVSPLITK) +@pytest.mark.parametrize("dtype", DTYPES) +@pytest.mark.parametrize("seed", SEEDS) +@pytest.mark.skipif(not current_platform.is_rocm(), + reason="only test for rocm") +def test_rocm_wvsplitk_bias1D_kernel(n, k, m, dtype, seed): + torch.manual_seed(seed) + cu_count = current_platform.get_cu_count() + + xavier = math.sqrt(2 / k) # normalize to avoid large output-bias deltas + A = (torch.rand(n, k, dtype=dtype, device="cuda") - .5) * xavier + B = (torch.rand(m, k, dtype=dtype, device="cuda") - .5) * xavier + BIAS = torch.rand(m, dtype=dtype, device="cuda") - .5 + + ref_out = torch.nn.functional.linear(A, B, BIAS) + out = ops.wvSplitK(B, A.view(-1, A.size(-1)), cu_count, BIAS) + + assert torch.allclose(out, ref_out, rtol=0.01) + + +@pytest.mark.parametrize("n,k,m", NKM_FACTORS_WVSPLITK) +@pytest.mark.parametrize("dtype", DTYPES) +@pytest.mark.parametrize("seed", SEEDS) +@pytest.mark.skipif(not current_platform.is_rocm(), + reason="only test for rocm") +def test_rocm_wvsplitk_bias2D_kernel(n, k, m, dtype, seed): + torch.manual_seed(seed) + cu_count = current_platform.get_cu_count() + + xavier = math.sqrt(2 / k) # normalize to avoid large output-bias deltas + A = (torch.rand(n, k, dtype=dtype, device="cuda") - .5) * xavier + B = (torch.rand(m, k, dtype=dtype, device="cuda") - .5) * xavier + BIAS = torch.rand(n, m, dtype=dtype, device="cuda") - .5 + + ref_out = torch.nn.functional.linear(A, B, BIAS) + out = ops.wvSplitK(B, A.view(-1, A.size(-1)), cu_count, BIAS) assert torch.allclose(out, ref_out, rtol=0.01) @@ -103,8 +147,8 @@ def test_rocm_wvsplitk_kernel(n, k, m, dtype, seed): def test_rocm_wvsplitk_fp8_kernel(n, k, m, dtype, seed): torch.manual_seed(seed) - A = torch.rand(n, k, device="cuda") - B = torch.rand(m, k, device="cuda") + A = torch.rand(n, k, device="cuda") - 0.5 + B = torch.rand(m, k, device="cuda") - 0.5 A, scale_a = ref_dynamic_per_tensor_fp8_quant(A) B, scale_b = ref_dynamic_per_tensor_fp8_quant(B) @@ -123,27 +167,27 @@ def test_rocm_wvsplitk_fp8_kernel(n, k, m, dtype, seed): @pytest.mark.parametrize("n,k,m", NKM_FACTORS_WVSPLITK_FP8) @pytest.mark.parametrize("dtype", DTYPES) @pytest.mark.parametrize("seed", SEEDS) -@pytest.mark.parametrize("use_bias", [True, False]) @pytest.mark.skipif( not (current_platform.is_rocm() and current_platform.supports_fp8()), reason="only test for rocm fp8") -def test_rocm_per_tensor_w8a8_scaled_mm_impl(n, k, m, dtype, seed, use_bias): +def test_rocm_wvsplitk_fp8_bias1D_kernel(n, k, m, dtype, seed): torch.manual_seed(seed) - A = torch.rand(n, k, device="cuda") - B = torch.rand(m, k, device="cuda") + xavier = math.sqrt(2 / k) # normalize to avoid large output-bias deltas + A = (torch.rand(n, k, device="cuda") - .5) * xavier + B = (torch.rand(m, k, device="cuda") - .5) * xavier + BIAS = torch.rand(m, dtype=dtype, device="cuda") - .5 A, scale_a = ref_dynamic_per_tensor_fp8_quant(A) B, scale_b = ref_dynamic_per_tensor_fp8_quant(B) - bias = torch.rand(1, m, dtype=dtype, device="cuda") if use_bias else None - - output = rocm_per_tensor_w8a8_scaled_mm_impl(A, B.t(), dtype, scale_a, - scale_b, bias) ref_out = torch._scaled_mm(A, B.t(), out_dtype=dtype, scale_a=scale_a, scale_b=scale_b, - bias=bias) - assert torch.allclose(output, ref_out, rtol=0.01) + bias=BIAS) + out = ops.wvSplitKQ(B, A, dtype, scale_a, scale_b, + current_platform.get_cu_count(), BIAS) + + assert torch.allclose(out, ref_out, rtol=0.01) diff --git a/vllm/_custom_ops.py b/vllm/_custom_ops.py index 712295aa9288..a108542e1436 100644 --- a/vllm/_custom_ops.py +++ b/vllm/_custom_ops.py @@ -1447,17 +1447,24 @@ def LLMM1(a: torch.Tensor, b: torch.Tensor, return torch.ops._rocm_C.LLMM1(a, b, rows_per_block) -def wvSplitK(a: torch.Tensor, b: torch.Tensor, cu_count: int) -> torch.Tensor: - return torch.ops._rocm_C.wvSplitK(a, b, cu_count) - - -def wvSplitKQ(a: torch.Tensor, b: torch.Tensor, out_dtype: torch.dtype, - scale_a: torch.Tensor, scale_b: torch.Tensor, - cu_count: int) -> torch.Tensor: +def wvSplitK(a: torch.Tensor, + b: torch.Tensor, + cu_count: int, + bias: torch.Tensor = None) -> torch.Tensor: + return torch.ops._rocm_C.wvSplitK(a, b, bias, cu_count) + + +def wvSplitKQ(a: torch.Tensor, + b: torch.Tensor, + out_dtype: torch.dtype, + scale_a: torch.Tensor, + scale_b: torch.Tensor, + cu_count: int, + bias: torch.Tensor = None) -> torch.Tensor: out = torch.empty((b.shape[0], a.shape[0]), dtype=out_dtype, device=b.device) - torch.ops._rocm_C.wvSplitKQ(a, b, out, scale_a, scale_b, cu_count) + torch.ops._rocm_C.wvSplitKQ(a, b, bias, out, scale_a, scale_b, cu_count) return out diff --git a/vllm/model_executor/layers/quantization/utils/w8a8_utils.py b/vllm/model_executor/layers/quantization/utils/w8a8_utils.py index 8cda1789e6c9..6ed482db4700 100644 --- a/vllm/model_executor/layers/quantization/utils/w8a8_utils.py +++ b/vllm/model_executor/layers/quantization/utils/w8a8_utils.py @@ -178,10 +178,12 @@ def rocm_per_tensor_w8a8_scaled_mm_impl(qinput: torch.Tensor, scale_b: torch.Tensor, bias: torch.Tensor) -> torch.Tensor: from vllm.platforms.rocm import on_mi3xx - if envs.VLLM_ROCM_USE_SKINNY_GEMM and on_mi3xx( - ) and qinput.shape[0] == 1 and qinput.shape[1] % 16 == 0 and bias is None: + if envs.VLLM_ROCM_USE_SKINNY_GEMM and on_mi3xx() and \ + qinput.shape[0] == 1 and \ + qinput.shape[1] % 16 == 0 and \ + ((bias is None) or (bias.dtype == out_dtype)) : output = ops.wvSplitKQ(weight.t(), qinput, out_dtype, scale_a, scale_b, - current_platform.get_cu_count()) + current_platform.get_cu_count(), bias) else: output = torch._scaled_mm(qinput, weight, diff --git a/vllm/model_executor/layers/utils.py b/vllm/model_executor/layers/utils.py index a1675ffbaa95..d7a65d43c210 100644 --- a/vllm/model_executor/layers/utils.py +++ b/vllm/model_executor/layers/utils.py @@ -100,7 +100,7 @@ def rocm_unquantized_gemm_impl( k = weight.shape[1] use_skinny = (envs.VLLM_ROCM_USE_SKINNY_GEMM and on_gfx9() and \ x.dtype in [torch.float16, torch.bfloat16] \ - and k % 8 == 0 and bias is None) + and k % 8 == 0) if use_skinny is not True: return torch.nn.functional.linear(x, weight, bias) @@ -111,9 +111,9 @@ def rocm_unquantized_gemm_impl( cu_count = current_platform.get_cu_count() if m > 8 and 0 < n <= 4: - out = ops.wvSplitK(weight, x_view, cu_count) + out = ops.wvSplitK(weight, x_view, cu_count, bias) return out.view(*x.shape[:-1], weight.shape[0]) - elif m % 4 == 0 and n == 1 and k <= 8192: + elif m % 4 == 0 and n == 1 and k <= 8192 and bias is None: out = ops.LLMM1(weight, x_view, 4) return out.view(*x.shape[:-1], weight.shape[0]) return torch.nn.functional.linear(x, weight, bias) From 8c1c81a3de344fba340cb2efc82f29d2c1563a2d Mon Sep 17 00:00:00 2001 From: Amir Samani Date: Tue, 23 Sep 2025 11:33:06 -0700 Subject: [PATCH 286/518] [core] add nccl symmetric memory for all reduce (#24532) Signed-off-by: Amir Samani Signed-off-by: Michael Goin Co-authored-by: Michael Goin --- .buildkite/test-pipeline.yaml | 1 + .../kernels/benchmark_device_communicators.py | 26 ++- .../test_nccl_symm_mem_allreduce.py | 94 +++++++++ vllm/compilation/cuda_graph.py | 6 + .../device_communicators/all_reduce_utils.py | 27 ++- .../device_communicators/cuda_communicator.py | 15 ++ .../device_communicators/pynccl.py | 53 ++++- .../device_communicators/pynccl_allocator.py | 186 ++++++++++++++++++ .../device_communicators/pynccl_wrapper.py | 38 +++- vllm/envs.py | 11 ++ vllm/utils/__init__.py | 32 +++ vllm/v1/worker/gpu_ubatch_wrapper.py | 6 + 12 files changed, 489 insertions(+), 6 deletions(-) create mode 100644 tests/distributed/test_nccl_symm_mem_allreduce.py create mode 100644 vllm/distributed/device_communicators/pynccl_allocator.py diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index 49316eb4f607..cf32087ed3b9 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -1039,3 +1039,4 @@ steps: num_gpus: 2 commands: - pytest -v -s tests/distributed/test_context_parallel.py + - pytest -v -s tests/distributed/test_nccl_symm_mem_allreduce.py diff --git a/benchmarks/kernels/benchmark_device_communicators.py b/benchmarks/kernels/benchmark_device_communicators.py index a61c17edc1e2..4cbdde5a5b2c 100644 --- a/benchmarks/kernels/benchmark_device_communicators.py +++ b/benchmarks/kernels/benchmark_device_communicators.py @@ -7,6 +7,10 @@ CustomAllreduce (oneshot, twoshot), PyNcclCommunicator, and SymmMemCommunicator (multimem, two-shot). +for NCCL symmetric memory you need to set the environment variables +NCCL_NVLS_ENABLE=1 NCCL_CUMEM_ENABLE=1 VLLM_USE_NCCL_SYMM_MEM=1, otherwise NCCL does +not use fast NVLS implementation for all reduce. + Usage: torchrun --nproc_per_node= benchmark_device_communicators.py [options] @@ -26,7 +30,13 @@ from torch.distributed import ProcessGroup from vllm.distributed.device_communicators.custom_all_reduce import CustomAllreduce -from vllm.distributed.device_communicators.pynccl import PyNcclCommunicator +from vllm.distributed.device_communicators.pynccl import ( + PyNcclCommunicator, + register_nccl_symmetric_ops, +) +from vllm.distributed.device_communicators.pynccl_allocator import ( + set_graph_pool_id, +) from vllm.distributed.device_communicators.symm_mem import SymmMemCommunicator from vllm.logger import init_logger from vllm.utils import FlexibleArgumentParser @@ -98,6 +108,7 @@ def _init_communicators(self): ) if not self.pynccl_comm.disabled: logger.info("Rank %s: PyNcclCommunicator initialized", self.rank) + register_nccl_symmetric_ops(self.pynccl_comm) else: logger.info("Rank %s: PyNcclCommunicator disabled", self.rank) self.pynccl_comm = None @@ -194,6 +205,15 @@ def benchmark_allreduce( None, # no env variable needed ) ) + communicators.append( + ( + "pynccl-symm", + lambda t: torch.ops.vllm.all_reduce_symmetric_with_copy(t), + lambda t: True, # Always available if initialized + nullcontext(), + None, # no env variable needed + ) + ) if self.symm_mem_comm_multimem is not None: comm = self.symm_mem_comm_multimem @@ -271,7 +291,9 @@ def benchmark_allreduce_single( # Capture the graph using context manager with context: graph = torch.cuda.CUDAGraph() - with torch.cuda.graph(graph): + graph_pool = torch.cuda.graph_pool_handle() + set_graph_pool_id(graph_pool) + with torch.cuda.graph(graph, pool=graph_pool): for _ in range(CUDA_GRAPH_CAPTURE_CYCLES): allreduce_fn(graph_input) diff --git a/tests/distributed/test_nccl_symm_mem_allreduce.py b/tests/distributed/test_nccl_symm_mem_allreduce.py new file mode 100644 index 000000000000..ffc913742620 --- /dev/null +++ b/tests/distributed/test_nccl_symm_mem_allreduce.py @@ -0,0 +1,94 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project + +import random +import typing + +import pytest +import torch +import torch.distributed as dist +import torch.multiprocessing as mp + +import vllm.envs as envs +from vllm.distributed import cleanup_dist_env_and_memory +from vllm.distributed.device_communicators.cuda_communicator import ( + CudaCommunicator) +from vllm.distributed.device_communicators.pynccl import ( + register_nccl_symmetric_ops) +from vllm.distributed.device_communicators.pynccl_allocator import ( + get_nccl_mem_pool, is_symmetric_memory_enabled) +from vllm.distributed.parallel_state import (get_tp_group, + init_distributed_environment, + initialize_model_parallel) +from vllm.platforms import current_platform +from vllm.utils import update_environment_variables + +torch.manual_seed(42) +random.seed(44) + +test_size_elements = 4 * 1024 * 1024 + + +def nccl_symm_mem_allreduce_worker(local_rank: int, world_size: int): + monkeypatch = pytest.MonkeyPatch() + with monkeypatch.context() as m: + m.delenv("CUDA_VISIBLE_DEVICES", raising=False) + dtype = torch.bfloat16 + device = torch.device(f"cuda:{local_rank}") + torch.cuda.set_device(device) + torch.set_default_device(device) + torch.set_default_dtype(dtype) + update_environment_variables({ + "RANK": str(local_rank), + "LOCAL_RANK": str(local_rank), + "WORLD_SIZE": str(world_size), + "MASTER_ADDR": "localhost", + "MASTER_PORT": "12345", + }) + + init_distributed_environment() + initialize_model_parallel(tensor_model_parallel_size=world_size) + + cuda_communicator = typing.cast(CudaCommunicator, + get_tp_group().device_communicator) + pynccl_comm = cuda_communicator.pynccl_comm + if get_nccl_mem_pool() is None: + pytest.skip("NCCL allocator compilation failed " + "(probably missing NCCL headers).") + if not is_symmetric_memory_enabled(): + pytest.skip("NCCL symmetric memory allreduce is disabled.") + + register_nccl_symmetric_ops(pynccl_comm) + input = torch.randint(1, + 23, (test_size_elements, ), + dtype=dtype, + device=device) + input_clone = input.clone() + output = torch.ops.vllm.all_reduce_symmetric_with_copy(input) + assert output is not None + + group = get_tp_group().device_group + dist.all_reduce(input_clone, group=group) + torch.testing.assert_close(output, input_clone, atol=2.5, rtol=0.1) + + +@pytest.mark.skipif( + not current_platform.is_cuda(), + reason="NCCLSymmMemAllreduce is only available for CUDA platforms.", +) +@pytest.mark.parametrize("world_size", [2]) +@pytest.mark.skipif(envs.VLLM_TARGET_DEVICE not in ["cuda"], + reason="Only test on CUDA") +def test_nccl_symm_mem_allreduce(monkeypatch: pytest.MonkeyPatch, world_size): + if world_size > torch.cuda.device_count(): + pytest.skip("Not enough GPUs to run the test.") + + # Enable SymmMemCommunicator + monkeypatch.setenv("VLLM_USE_NCCL_SYMM_MEM", "1") + monkeypatch.setenv("NCCL_NVLS_ENABLE", "1") + monkeypatch.setenv("NCCL_CUMEM_ENABLE", "1") + + mp.spawn(nccl_symm_mem_allreduce_worker, + args=(world_size, ), + nprocs=world_size) + cleanup_dist_env_and_memory() diff --git a/vllm/compilation/cuda_graph.py b/vllm/compilation/cuda_graph.py index e233f959c0a4..befb7736d75a 100644 --- a/vllm/compilation/cuda_graph.py +++ b/vllm/compilation/cuda_graph.py @@ -12,6 +12,8 @@ from vllm.compilation.counter import compilation_counter from vllm.compilation.monitor import validate_cudagraph_capturing_enabled from vllm.config import CUDAGraphMode, VllmConfig +from vllm.distributed.device_communicators.pynccl_allocator import ( + set_graph_pool_id) from vllm.forward_context import BatchDescriptor, get_forward_context from vllm.logger import init_logger from vllm.platforms import current_platform @@ -154,6 +156,10 @@ def __call__(self, *args, **kwargs): stack.enter_context( patch("torch.cuda.empty_cache", lambda: None)) + if self.graph_pool is not None: + set_graph_pool_id(self.graph_pool) + else: + set_graph_pool_id(current_platform.graph_pool_handle()) # mind-exploding: carefully manage the reference and memory. with torch.cuda.graph(cudagraph, pool=self.graph_pool): # `output` is managed by pytorch's cudagraph pool diff --git a/vllm/distributed/device_communicators/all_reduce_utils.py b/vllm/distributed/device_communicators/all_reduce_utils.py index 805a88854b77..87e0f8e1a967 100644 --- a/vllm/distributed/device_communicators/all_reduce_utils.py +++ b/vllm/distributed/device_communicators/all_reduce_utils.py @@ -10,8 +10,9 @@ import tempfile from collections.abc import Sequence from itertools import product -from typing import Optional +from typing import Any, Optional +import torch import torch.distributed as dist import torch.multiprocessing as mp @@ -56,6 +57,30 @@ } } +NCCL_SYMM_MEM_ALL_REDUCE_CONFIG: dict[str, Any] = { + "min_world_size": 4, + "thresholds": { + 4: 2 * MiB, # 2 MB + 8: 1 * MiB, # 1 MB + }, + "always_use_above_world_size": 8 # Always use symm mem for world_size > 8 +} + + +def should_nccl_symm_mem_allreduce(world_size: int, + input_tensor: torch.Tensor) -> bool: + from vllm.distributed.device_communicators.pynccl_allocator import ( + is_symmetric_memory_enabled) + if not is_symmetric_memory_enabled(): + return False + if world_size < NCCL_SYMM_MEM_ALL_REDUCE_CONFIG["min_world_size"]: + return False + threshold = NCCL_SYMM_MEM_ALL_REDUCE_CONFIG["thresholds"].get(world_size) + if threshold is not None and input_tensor.nbytes >= threshold: + return True + return (world_size + > NCCL_SYMM_MEM_ALL_REDUCE_CONFIG["always_use_above_world_size"]) + def producer(batch_src: Sequence[int], producer_queue, diff --git a/vllm/distributed/device_communicators/cuda_communicator.py b/vllm/distributed/device_communicators/cuda_communicator.py index b2bf3bc3cc2e..6c25bf3cd95c 100644 --- a/vllm/distributed/device_communicators/cuda_communicator.py +++ b/vllm/distributed/device_communicators/cuda_communicator.py @@ -7,6 +7,12 @@ from torch.distributed import ProcessGroup import vllm.envs as envs +from vllm.distributed.device_communicators.all_reduce_utils import ( + should_nccl_symm_mem_allreduce) +from vllm.distributed.device_communicators.pynccl import ( + register_nccl_symmetric_ops) +from vllm.distributed.device_communicators.pynccl_allocator import ( + is_symmetric_memory_enabled) from vllm.logger import init_logger from vllm.platforms import current_platform @@ -53,6 +59,8 @@ def __init__(self, group=self.cpu_group, device=self.device, ) + if is_symmetric_memory_enabled(): + register_nccl_symmetric_ops(self.pynccl_comm) self.ca_comm: Optional[CustomAllreduce] = None self.qr_comm: Optional[QuickAllReduce] = None @@ -107,6 +115,13 @@ def __init__(self, raise ValueError(f"Unknown all2all backend: {all2all_backend}") def all_reduce(self, input_): + # since currently we perform copy input -> symm_input -> out-of-place AR + # return symm_output, we don't need to check if input is symmetric + if self.pynccl_comm is not None and \ + should_nccl_symm_mem_allreduce(self.pynccl_comm.world_size,input_): + out = torch.ops.vllm.all_reduce_symmetric_with_copy(input_) + if out is not None: + return out # always try quick reduce first, then custom allreduce, # and then pynccl. (quick reduce just for ROCM MI3*) qr_comm = self.qr_comm diff --git a/vllm/distributed/device_communicators/pynccl.py b/vllm/distributed/device_communicators/pynccl.py index 3e4d0d250af9..75de85e1b0ab 100644 --- a/vllm/distributed/device_communicators/pynccl.py +++ b/vllm/distributed/device_communicators/pynccl.py @@ -17,6 +17,39 @@ logger = init_logger(__name__) +_NCCL_SYMM_OPS_REGISTERED = False + + +def register_nccl_symmetric_ops(pynccl_comm): + from vllm.distributed.device_communicators.pynccl_allocator import ( + nccl_symm_mem_context) + from vllm.utils import direct_register_custom_op + + global _NCCL_SYMM_OPS_REGISTERED + if _NCCL_SYMM_OPS_REGISTERED: + return + _NCCL_SYMM_OPS_REGISTERED = True + + def all_reduce_symmetric_with_copy_impl( + input_tensor: torch.Tensor) -> torch.Tensor: + with nccl_symm_mem_context(pynccl_comm): + symm_input = torch.empty_like(input_tensor) + symm_output = torch.empty_like(input_tensor) + symm_input.copy_(input_tensor) + symm_output = pynccl_comm.all_reduce(symm_input, symm_output) + return symm_output + + def all_reduce_symmetric_with_copy_fake( + input_tensor: torch.Tensor) -> torch.Tensor: + return torch.empty_like(input_tensor) + + direct_register_custom_op( + op_name="all_reduce_symmetric_with_copy", + op_func=all_reduce_symmetric_with_copy_impl, + mutates_args=[], + fake_impl=all_reduce_symmetric_with_copy_fake, + ) + class PyNcclCommunicator: @@ -67,6 +100,7 @@ def __init__( self.available = True self.disabled = False + self.nccl_version = self.nccl.ncclGetRawVersion() logger.info("vLLM is using nccl==%s", self.nccl.ncclGetVersion()) if self.rank == 0: @@ -109,6 +143,7 @@ def __init__( def all_reduce(self, in_tensor: torch.Tensor, + out_tensor: torch.Tensor = None, op: ReduceOp = ReduceOp.SUM, stream=None) -> torch.Tensor: if self.disabled: @@ -120,7 +155,8 @@ def all_reduce(self, f"this nccl communicator is created to work on {self.device}, " f"but the input tensor is on {in_tensor.device}") - out_tensor = torch.empty_like(in_tensor) + if out_tensor is None: + out_tensor = torch.empty_like(in_tensor) if stream is None: stream = current_stream() @@ -288,3 +324,18 @@ def group_start(self): def group_end(self): self.nccl.ncclGroupEnd() + + def register_comm_window(self, tensor: torch.Tensor): + return self.nccl.ncclCommWindowRegister( + self.comm, + buffer_type(tensor.data_ptr()), + tensor.numel() * tensor.element_size(), + 1, + ) + + def register_comm_window_raw(self, ptr: int, size: int): + return self.nccl.ncclCommWindowRegister(self.comm, buffer_type(ptr), + size, 1) + + def deregister_comm_window(self, window): + return self.nccl.ncclCommWindowDeregister(self.comm, window) diff --git a/vllm/distributed/device_communicators/pynccl_allocator.py b/vllm/distributed/device_communicators/pynccl_allocator.py new file mode 100644 index 000000000000..bc874c1e197e --- /dev/null +++ b/vllm/distributed/device_communicators/pynccl_allocator.py @@ -0,0 +1,186 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project +import atexit +import contextlib +import tempfile +from typing import Any, Optional + +import torch +from packaging import version +from torch.cuda.memory import CUDAPluggableAllocator +from torch.utils.cpp_extension import load_inline + +from vllm import envs +from vllm.distributed.device_communicators.pynccl import PyNcclCommunicator +from vllm.logger import init_logger +from vllm.platforms import current_platform +from vllm.utils import find_nccl_include_paths + +logger = init_logger(__name__) + +nccl_allocator_source = """ +#include +extern "C" { + +void* nccl_alloc_plug(size_t size, int device, void* stream) { + void* ptr; + ncclResult_t err = ncclMemAlloc(&ptr, size); + return ptr; + +} + +void nccl_free_plug(void* ptr, size_t size, int device, void* stream) { + ncclResult_t err = ncclMemFree(ptr); +} + +} +""" + +_allocator = None +_allocator_wrapper = None +_mem_pool = None +_registered_base_addrs = set() +_graph_pool_id = None +_nccl_allocator_failed_to_compile = False +_cached_pool_snapshot = None + + +def is_symmetric_memory_enabled(): + global _nccl_allocator_failed_to_compile + return envs.VLLM_USE_NCCL_SYMM_MEM and not _nccl_allocator_failed_to_compile + + +def is_symmetric_memory_tensor(tensor: torch.Tensor): + if not is_symmetric_memory_enabled() or _cached_pool_snapshot is None: + return False + for segment in _cached_pool_snapshot: + for block in segment["blocks"]: + if block["address"] == tensor.untyped_storage().data_ptr(): + return True + return False + + +def set_graph_pool_id(graph_pool_id): + global _graph_pool_id + _graph_pool_id = graph_pool_id + + +def compile_nccl_allocator(): + global _allocator, _allocator_wrapper, _nccl_allocator_failed_to_compile + if not current_platform.is_cuda(): + _nccl_allocator_failed_to_compile = True + return + try: + out_dir = tempfile.gettempdir() + nccl_allocator_libname = "nccl_allocator" + nccl_include_paths = find_nccl_include_paths() + load_inline( + name=nccl_allocator_libname, + cpp_sources=nccl_allocator_source, + with_cuda=True, + extra_ldflags=["-lnccl"], + verbose=envs.VLLM_LOGGING_LEVEL == "DEBUG", + is_python_module=False, + build_directory=out_dir, + extra_include_paths=nccl_include_paths, + ) + _allocator_wrapper = CUDAPluggableAllocator( + f"{out_dir}/{nccl_allocator_libname}.so", + "nccl_alloc_plug", + "nccl_free_plug", + ) + _allocator = _allocator_wrapper.allocator() + except Exception as e: + _nccl_allocator_failed_to_compile = True + logger.warning( + "Failed to compile NCCL memory allocator. " + "Symmetric memory will be disabled. " + "This is expected if NCCL headers are not available. " + "optionally set VLLM_NCCL_INCLUDE_PATH to point to a directory " + "containing the NCCL header. " + "Error: %s", str(e)) + + +def get_nccl_mem_pool(): + global _mem_pool, _nccl_allocator_failed_to_compile + if _mem_pool is None and not _nccl_allocator_failed_to_compile: + compile_nccl_allocator() + if _allocator is not None: + _mem_pool = torch.cuda.MemPool(_allocator) + return _mem_pool + + +def _cleanup_nccl_mem_pool(): + global _mem_pool + _mem_pool = None + + +def _cleanup_nccl_allocator_wrapper(): + global _allocator_wrapper + _allocator_wrapper = None + + +atexit.register(_cleanup_nccl_mem_pool) +atexit.register(_cleanup_nccl_allocator_wrapper) + + +class nccl_symm_mem_context: + + def __init__( + self, + pynccl_comm: PyNcclCommunicator, + disabled: bool = False, + ): + self.disabled = (disabled or not is_symmetric_memory_enabled() + or pynccl_comm.world_size == 1 + or not current_platform.is_cuda() + or get_nccl_mem_pool() is None or version.parse( + torch.__version__) < version.parse("2.8.0.a0")) + if self.disabled: + self.pynccl_comm: Optional[PyNcclCommunicator] = None + self._mem_pool_ctx: contextlib.AbstractContextManager[ + Any] = contextlib.nullcontext() + self.is_graph_capture = None + self.device = None + else: + self.pynccl_comm = pynccl_comm + self._mem_pool_ctx = torch.cuda.use_mem_pool(get_nccl_mem_pool()) + self.is_graph_capture = torch.cuda.is_current_stream_capturing() + self.device = torch.cuda.current_device() + + def __enter__(self): + if self.disabled: + return self + assert ( + self.pynccl_comm + is not None), "Symmetric memory requires pynccl to be initalized" + assert ( + self.pynccl_comm.nccl_version >= 22703 + ), "NCCL version 2.27.3 or higher is required for NCCL symmetric memory" + if self.is_graph_capture: + assert ( + _graph_pool_id + is not None), "graph_pool_id is not set under graph capture" + # Pause graph memory pool to use symmetric memory with cuda graph + torch._C._cuda_endAllocateToPool(self.device, _graph_pool_id) + self._mem_pool_ctx.__enter__() + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + if self.disabled: + return + global _cached_pool_snapshot + global _registered_base_addrs + self._mem_pool_ctx.__exit__(exc_type, exc_val, exc_tb) + _pool = get_nccl_mem_pool() + assert _pool is not None + _cached_pool_snapshot = _pool.snapshot() + assert self.pynccl_comm is not None + for segment in _cached_pool_snapshot: + if segment["address"] not in _registered_base_addrs: + self.pynccl_comm.register_comm_window_raw( + segment["address"], segment["total_size"]) + _registered_base_addrs.add(segment["address"]) + if self.is_graph_capture: + torch._C._cuda_beginAllocateCurrentThreadToPool( + self.device, _graph_pool_id) diff --git a/vllm/distributed/device_communicators/pynccl_wrapper.py b/vllm/distributed/device_communicators/pynccl_wrapper.py index a930b63bc26f..c3e99e177e2d 100644 --- a/vllm/distributed/device_communicators/pynccl_wrapper.py +++ b/vllm/distributed/device_communicators/pynccl_wrapper.py @@ -41,6 +41,7 @@ ncclResult_t = ctypes.c_int ncclComm_t = ctypes.c_void_p +ncclWindow_t = ctypes.c_void_p class ncclUniqueId(ctypes.Structure): @@ -222,6 +223,24 @@ class NCCLLibrary: Function("ncclGroupStart", ncclResult_t, []), # ncclResult_t ncclGroupEnd(); Function("ncclGroupEnd", ncclResult_t, []), + # ncclResult_t ncclCommWindowRegister( + # ncclComm_t comm, void* buff, size_t size, + # ncclWindow_t* win, int winFlags); + Function( + "ncclCommWindowRegister", + ncclResult_t, + [ + ncclComm_t, + buffer_type, + ctypes.c_size_t, + ctypes.POINTER(ncclWindow_t), + ctypes.c_int, + ], + ), + # ncclResult_t ncclCommWindowDeregister( + # ncclComm_t comm, ncclWindow_t win); + Function("ncclCommWindowDeregister", ncclResult_t, + [ncclComm_t, ncclWindow_t]), ] # class attribute to store the mapping from the path to the library @@ -271,10 +290,14 @@ def NCCL_CHECK(self, result: ncclResult_t) -> None: error_str = self.ncclGetErrorString(result) raise RuntimeError(f"NCCL error: {error_str}") - def ncclGetVersion(self) -> str: + def ncclGetRawVersion(self) -> int: version = ctypes.c_int() self.NCCL_CHECK(self._funcs["ncclGetVersion"](ctypes.byref(version))) - version_str = str(version.value) + # something like 21903 + return version.value + + def ncclGetVersion(self) -> str: + version_str = str(self.ncclGetRawVersion()) # something like 21903 --> "2.19.3" major = version_str[0].lstrip("0") minor = version_str[1:3].lstrip("0") @@ -375,6 +398,17 @@ def ncclGroupStart(self) -> None: def ncclGroupEnd(self) -> None: self.NCCL_CHECK(self._funcs["ncclGroupEnd"]()) + def ncclCommWindowRegister(self, comm: ncclComm_t, buff: buffer_type, + size: int, win_flags: int) -> ncclWindow_t: + window = ncclWindow_t() + self.NCCL_CHECK(self._funcs["ncclCommWindowRegister"]( + comm, buff, size, ctypes.byref(window), win_flags)) + return window + + def ncclCommWindowDeregister(self, comm: ncclComm_t, + window: ncclWindow_t) -> None: + self.NCCL_CHECK(self._funcs["ncclCommWindowDeregister"](comm, window)) + __all__ = [ "NCCLLibrary", "ncclDataTypeEnum", "ncclRedOpTypeEnum", "ncclUniqueId", diff --git a/vllm/envs.py b/vllm/envs.py index f6eafe892ef2..fa6f14d6b037 100755 --- a/vllm/envs.py +++ b/vllm/envs.py @@ -193,6 +193,8 @@ VLLM_DBO_COMM_SMS: int = 20 GPT_OSS_SYSTEM_TOOL_MCP_LABELS: list[str] = [] VLLM_PATTERN_MATCH_DEBUG: Optional[str] = None + VLLM_USE_NCCL_SYMM_MEM: bool = False + VLLM_NCCL_INCLUDE_PATH: Optional[str] = None def get_default_cache_root(): @@ -1410,6 +1412,15 @@ def get_vllm_port() -> Optional[int]: ["container", "code_interpreter", "web_search_preview"]), + + # Flag to enable NCCL symmetric memory allocation and registration + "VLLM_USE_NCCL_SYMM_MEM": + lambda: bool(int(os.getenv("VLLM_USE_NCCL_SYMM_MEM", "0"))), + + # NCCL header path + "VLLM_NCCL_INCLUDE_PATH": + lambda: os.environ.get("VLLM_NCCL_INCLUDE_PATH", None), + } # --8<-- [end:env-vars-definition] diff --git a/vllm/utils/__init__.py b/vllm/utils/__init__.py index 3399d00fbabb..5d165f166238 100644 --- a/vllm/utils/__init__.py +++ b/vllm/utils/__init__.py @@ -1383,6 +1383,38 @@ def find_nccl_library() -> str: return so_file +def find_nccl_include_paths() -> Optional[list[str]]: + """ + We either use the nccl.h specified by the `VLLM_NCCL_INCLUDE_PATH` + environment variable, or we find the library file brought by + nvidia-nccl-cuXX. load_inline by default uses + torch.utils.cpp_extension.include_paths + """ + paths: list[str] = [] + inc = envs.VLLM_NCCL_INCLUDE_PATH + if inc and os.path.isdir(inc): + paths.append(inc) + + try: + import importlib.util + spec = importlib.util.find_spec("nvidia.nccl") + if spec and getattr(spec, "submodule_search_locations", None): + for loc in spec.submodule_search_locations: + inc_dir = os.path.join(loc, "include") + if os.path.exists(os.path.join(inc_dir, "nccl.h")): + paths.append(inc_dir) + except Exception: + pass + + seen = set() + out: list[str] = [] + for p in paths: + if p and p not in seen: + out.append(p) + seen.add(p) + return out or None + + prev_set_stream = torch.cuda.set_stream _current_stream_tls = threading.local() diff --git a/vllm/v1/worker/gpu_ubatch_wrapper.py b/vllm/v1/worker/gpu_ubatch_wrapper.py index bfc3743ea417..d636e7af72ea 100644 --- a/vllm/v1/worker/gpu_ubatch_wrapper.py +++ b/vllm/v1/worker/gpu_ubatch_wrapper.py @@ -11,6 +11,8 @@ from vllm.compilation.cuda_graph import CUDAGraphWrapper from vllm.config import CUDAGraphMode, VllmConfig from vllm.distributed import get_ep_group +from vllm.distributed.device_communicators.pynccl_allocator import ( + set_graph_pool_id) from vllm.forward_context import (create_forward_context, get_forward_context, override_forward_context) from vllm.logger import init_logger @@ -206,6 +208,10 @@ def _capture_ubatch_thread(results, ubatch_metadata): cudagraph=torch.cuda.CUDAGraph(), ubatch_metadata=ubatch_metadata, ) + if self.graph_pool is not None: + set_graph_pool_id(self.graph_pool) + else: + set_graph_pool_id(current_platform.graph_pool_handle()) with torch.cuda.graph(cudagraph_metadata.cudagraph, stream=compute_stream, pool=self.graph_pool): From 63400259d05485330fb23635af892b3fce160dea Mon Sep 17 00:00:00 2001 From: ElizaWszola Date: Tue, 23 Sep 2025 21:03:10 +0200 Subject: [PATCH 287/518] [Performance] Move apply_w8a8_block_fp8_linear to an op class (#24666) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: ElizaWszola Signed-off-by: ElizaWszola Signed-off-by: Luka Govedič Signed-off-by: Luka Govedič Co-authored-by: Luka Govedič Co-authored-by: Michael Goin Co-authored-by: Luka Govedič --- .../cutlass_benchmarks/w8a8_benchmarks.py | 4 +- .../benchmark_fp8_block_dense_gemm.py | 4 +- tests/kernels/quantization/test_block_fp8.py | 5 +- .../quantization/test_fp8_quant_group.py | 26 +- .../model_executor/test_enabled_custom_ops.py | 30 -- tests/quantization/test_compressed_tensors.py | 35 ++ vllm/config/__init__.py | 17 + .../compressed_tensors/compressed_tensors.py | 8 + .../schemes/compressed_tensors_w8a8_fp8.py | 37 ++- .../layers/quantization/deepgemm.py | 10 +- .../model_executor/layers/quantization/fp8.py | 40 ++- .../layers/quantization/input_quant_fp8.py | 24 +- .../layers/quantization/utils/fp8_utils.py | 298 +++++++++++------- vllm/utils/deep_gemm.py | 12 +- 14 files changed, 345 insertions(+), 205 deletions(-) diff --git a/benchmarks/cutlass_benchmarks/w8a8_benchmarks.py b/benchmarks/cutlass_benchmarks/w8a8_benchmarks.py index a5a5b52f6039..02f8c593392c 100644 --- a/benchmarks/cutlass_benchmarks/w8a8_benchmarks.py +++ b/benchmarks/cutlass_benchmarks/w8a8_benchmarks.py @@ -17,7 +17,7 @@ from vllm import _custom_ops as ops from vllm.model_executor.layers.quantization.utils.fp8_utils import ( - w8a8_block_fp8_matmul, + w8a8_triton_block_scaled_mm, ) from vllm.utils import FlexibleArgumentParser, cdiv @@ -158,7 +158,7 @@ def bench_fp8( "cutlass_fp8_fp8_fp16_scaled_mm_bias": lambda: ops.cutlass_scaled_mm( a, b, scale_a, scale_b, torch.float16, bias.to(dtype=torch.float16) ), - "triton_fp8_fp8_fp16_scaled_mm_blockwise": lambda: w8a8_block_fp8_matmul( + "triton_fp8_fp8_fp16_scaled_mm_blockwise": lambda: w8a8_triton_block_scaled_mm( a_cont, b.t(), block_scale_a, block_scale_b.t(), (128, 128) ), "cutlass_fp8_fp8_fp16_scaled_mm_blockwise": lambda: ops.cutlass_scaled_mm( diff --git a/benchmarks/kernels/deepgemm/benchmark_fp8_block_dense_gemm.py b/benchmarks/kernels/deepgemm/benchmark_fp8_block_dense_gemm.py index b99c2099f2c3..b3c3742825de 100644 --- a/benchmarks/kernels/deepgemm/benchmark_fp8_block_dense_gemm.py +++ b/benchmarks/kernels/deepgemm/benchmark_fp8_block_dense_gemm.py @@ -10,7 +10,7 @@ from vllm.model_executor.layers.quantization.utils.fp8_utils import ( get_col_major_tma_aligned_tensor, per_token_group_quant_fp8, - w8a8_block_fp8_matmul, + w8a8_triton_block_scaled_mm, ) from vllm.triton_utils import triton from vllm.utils.deep_gemm import calc_diff, fp8_gemm_nt, per_block_cast_to_fp8 @@ -59,7 +59,7 @@ def deepgemm_gemm(): # === vLLM Triton Implementation === def vllm_triton_gemm(): - return w8a8_block_fp8_matmul(A_vllm, + return w8a8_triton_block_scaled_mm(A_vllm, B_vllm, A_scale_vllm, B_scale_vllm, diff --git a/tests/kernels/quantization/test_block_fp8.py b/tests/kernels/quantization/test_block_fp8.py index c440747316b8..c0b934fc55ae 100644 --- a/tests/kernels/quantization/test_block_fp8.py +++ b/tests/kernels/quantization/test_block_fp8.py @@ -12,7 +12,7 @@ from vllm.config import VllmConfig from vllm.model_executor.layers.quantization.utils.fp8_utils import ( cutlass_scaled_mm, get_col_major_tma_aligned_tensor, - per_token_group_quant_fp8, w8a8_block_fp8_matmul) + per_token_group_quant_fp8, w8a8_triton_block_scaled_mm) from vllm.platforms import current_platform from vllm.utils import has_deep_gemm from vllm.utils.deep_gemm import fp8_gemm_nt, per_block_cast_to_fp8 @@ -90,7 +90,8 @@ def test_w8a8_block_fp8_matmul(M, N, K, block_size, out_dtype, seed): ref_out = native_w8a8_block_matmul(A_fp8, B_fp8, As, Bs, block_size, out_dtype) - out = w8a8_block_fp8_matmul(A_fp8, B_fp8, As, Bs, block_size, out_dtype) + out = w8a8_triton_block_scaled_mm(A_fp8, B_fp8, As, Bs, block_size, + out_dtype) rel_diff = (torch.mean( torch.abs(out.to(torch.float32) - ref_out.to(torch.float32))) / diff --git a/tests/kernels/quantization/test_fp8_quant_group.py b/tests/kernels/quantization/test_fp8_quant_group.py index 720eee62760d..3d4c851a9b88 100644 --- a/tests/kernels/quantization/test_fp8_quant_group.py +++ b/tests/kernels/quantization/test_fp8_quant_group.py @@ -20,9 +20,11 @@ (8, 513, 64), # Non-divisible (native only) ]) @pytest.mark.parametrize("seed", [42]) +@pytest.mark.parametrize("use_ue8m0", [True, False]) @torch.inference_mode() def test_quantfp8_group_functionality(batch_size: int, hidden_dim: int, - group_size: int, seed: int) -> None: + group_size: int, seed: int, + use_ue8m0: bool) -> None: """Test QuantFP8 group quantization with various configurations. Tests both CUDA and native implementations, column-major scales, @@ -38,7 +40,8 @@ def test_quantfp8_group_functionality(batch_size: int, hidden_dim: int, group_shape = GroupShape(1, group_size) quant_op = QuantFP8(static=False, group_shape=group_shape, - column_major_scales=False) + column_major_scales=False, + use_ue8m0=use_ue8m0) # 1. Test native implementation (always available) x_quant_native, scales_native = quant_op.forward_native(x.clone()) @@ -48,9 +51,15 @@ def test_quantfp8_group_functionality(batch_size: int, hidden_dim: int, # 2. Test column-major scales configuration quant_op_col = QuantFP8(static=False, group_shape=group_shape, - column_major_scales=True) + column_major_scales=True, + use_ue8m0=use_ue8m0) _, scales_col = quant_op_col.forward_native(x.clone()) - assert scales_col.shape == (expected_num_groups, batch_size) + assert scales_col.shape == (batch_size, expected_num_groups) + assert scales_col.stride(0) == 1 + assert scales_col.stride(1) == batch_size + + # Test column-major scales consistency + assert torch.allclose(scales_col, scales_native, rtol=1e-9, atol=1e-8) # 3. Test CUDA implementation (only for divisible dimensions) if is_divisible: @@ -68,8 +77,9 @@ def test_quantfp8_group_functionality(batch_size: int, hidden_dim: int, @pytest.mark.parametrize("seed", [42]) +@pytest.mark.parametrize("use_ue8m0", [True, False]) @torch.inference_mode() -def test_quantfp8_group_multidimensional(seed: int) -> None: +def test_quantfp8_group_multidimensional(seed: int, use_ue8m0: bool) -> None: current_platform.seed_everything(seed) group_size = 64 @@ -82,7 +92,8 @@ def test_quantfp8_group_multidimensional(seed: int) -> None: group_shape = GroupShape(1, group_size) quant_op = QuantFP8(static=False, group_shape=group_shape, - column_major_scales=False) + column_major_scales=False, + use_ue8m0=use_ue8m0) x_quant, scales = quant_op.forward_native(x_3d.clone()) assert x_quant.shape == x_3d.shape @@ -91,7 +102,8 @@ def test_quantfp8_group_multidimensional(seed: int) -> None: # Test column_major_scales with multi-dim quant_op_col = QuantFP8(static=False, group_shape=group_shape, - column_major_scales=True) + column_major_scales=True, + use_ue8m0=use_ue8m0) _, scales_col = quant_op_col.forward_native(x_3d.clone()) assert scales_col.shape == (batch1, hidden_dim // group_size, batch2) diff --git a/tests/model_executor/test_enabled_custom_ops.py b/tests/model_executor/test_enabled_custom_ops.py index 92ce10a9efc0..200b6ecd5852 100644 --- a/tests/model_executor/test_enabled_custom_ops.py +++ b/tests/model_executor/test_enabled_custom_ops.py @@ -17,8 +17,6 @@ from vllm.model_executor.layers.layernorm import (RMSNorm, dispatch_rocm_rmsnorm_func, fused_add_rms_norm, rms_norm) -from vllm.model_executor.layers.quantization.utils.fp8_utils import ( - cutlass_scaled_mm, dispatch_w8a8_blockscale_func, w8a8_block_fp8_matmul) from vllm.platforms import current_platform RMS_NORM_SUPPORTED_DTYPES = [torch.float16, torch.bfloat16] @@ -111,34 +109,6 @@ def test_enabled_ops_invalid(env: str): RMSNorm(1024).enabled() -@pytest.mark.skipif( - not current_platform.is_rocm() or not current_platform.is_fp8_fnuz(), - reason="AITER is a feature exclusive for ROCm and FP8_FNUZ") -@pytest.mark.parametrize("use_cutlass", [True, False]) -@pytest.mark.parametrize("use_rocm_aiter", ["0", "1"]) -@pytest.mark.parametrize("use_rocm_aiter_gemm_w8a8_blockscale", ["0", "1"]) -def test_w8a8_blockscale_dispatch(use_cutlass: bool, use_rocm_aiter: str, - use_rocm_aiter_gemm_w8a8_blockscale: str, - monkeypatch): - - monkeypatch.setenv("VLLM_ROCM_USE_AITER", use_rocm_aiter) - monkeypatch.setenv("VLLM_ROCM_USE_AITER_LINEAR", - use_rocm_aiter_gemm_w8a8_blockscale) - - use_aiter_and_is_supported = (bool(int(use_rocm_aiter)) and bool( - int(use_rocm_aiter_gemm_w8a8_blockscale))) - block_scale_func = dispatch_w8a8_blockscale_func( - use_cutlass, use_aiter_and_is_supported=use_aiter_and_is_supported) - if use_cutlass: - assert block_scale_func == cutlass_scaled_mm - elif current_platform.is_rocm() and int(use_rocm_aiter) and int( - use_rocm_aiter_gemm_w8a8_blockscale): - assert block_scale_func == ( - torch.ops.vllm.rocm_aiter_gemm_w8a8_blockscale) - else: - assert block_scale_func == w8a8_block_fp8_matmul - - @pytest.mark.parametrize("use_rocm_aiter", ["0", "1"]) def test_topk_dispatch(use_rocm_aiter: str, monkeypatch): monkeypatch.setenv("VLLM_ROCM_USE_AITER", use_rocm_aiter) diff --git a/tests/quantization/test_compressed_tensors.py b/tests/quantization/test_compressed_tensors.py index c0ab3fbb1062..af8c7ec3b482 100644 --- a/tests/quantization/test_compressed_tensors.py +++ b/tests/quantization/test_compressed_tensors.py @@ -18,6 +18,9 @@ CompressedTensorsW4A16Fp4, CompressedTensorsW4A16Sparse24, CompressedTensorsW8A8Fp8, CompressedTensorsW8A8Int8, CompressedTensorsW8A16Fp8, CompressedTensorsWNA16) +from vllm.model_executor.layers.quantization.input_quant_fp8 import QuantFP8 +from vllm.model_executor.layers.quantization.utils.fp8_utils import ( + W8A8BlockFp8LinearOp) from vllm.model_executor.layers.quantization.utils.quant_utils import ( cutlass_fp4_supported) from vllm.model_executor.layers.quantization.utils.w8a8_utils import ( @@ -742,3 +745,35 @@ def test_compressed_tensors_transforms_perplexity(vllm_runner, model, prompt, perplexity = llm.generate_prompt_perplexity([prompt])[0] print(perplexity) assert perplexity <= exp_perplexity + + +def test_compressed_tensors_fp8_block_enabled(vllm_runner): + model_path = "RedHatAI/Qwen3-0.6B-FP8-BLOCK" + with vllm_runner(model_path) as llm: + + fp8_dtype = current_platform.fp8_dtype() + + def check_model(model): + layer = model.model.layers[0] + + qkv_proj = layer.self_attn.qkv_proj + assert isinstance(qkv_proj.quant_method, + CompressedTensorsLinearMethod) + assert isinstance(qkv_proj.scheme, CompressedTensorsW8A8Fp8) + assert isinstance(qkv_proj.scheme.w8a8_block_fp8_linear, + W8A8BlockFp8LinearOp) + + assert qkv_proj.weight.dtype is fp8_dtype + assert qkv_proj.weight_scale.dtype is torch.float32 + assert len(qkv_proj.weight.shape) == 2 + assert len(qkv_proj.weight_scale.shape) == 2 + + input_quant_op = \ + qkv_proj.scheme.w8a8_block_fp8_linear.input_quant_op + assert isinstance(input_quant_op, QuantFP8) + assert input_quant_op._forward_method == input_quant_op.forward_cuda + + llm.apply_model(check_model) + + output = llm.generate_greedy("Hello my name is", max_tokens=20) + assert output diff --git a/vllm/config/__init__.py b/vllm/config/__init__.py index a2562a10b45a..50e8cad23617 100644 --- a/vllm/config/__init__.py +++ b/vllm/config/__init__.py @@ -687,6 +687,23 @@ def __post_init__(self): # local attention. self.scheduler_config.disable_hybrid_kv_cache_manager = True + def has_blocked_weights(): + if self.quant_config is not None: + if hasattr(self.quant_config, "weight_block_size"): + return self.quant_config.weight_block_size is not None + elif hasattr(self.quant_config, "has_blocked_weights"): + return self.quant_config.has_blocked_weights() + return False + + # Enable quant_fp8 CUDA ops (TODO disable in follow up) + # On H100 the CUDA kernel is faster than + # native implementation + # https://github.com/vllm-project/vllm/issues/25094 + if has_blocked_weights(): + custom_ops = self.compilation_config.custom_ops + if "none" not in custom_ops and "-quant_fp8" not in custom_ops: + custom_ops.append("+quant_fp8") + def update_sizes_for_sequence_parallelism(self, possible_sizes: list) -> list: # remove the sizes that not multiple of tp_size when diff --git a/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors.py b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors.py index d6550dd16892..3f771ea2abd1 100644 --- a/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors.py +++ b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors.py @@ -644,6 +644,14 @@ def get_cache_scale(self, name: str) -> Optional[str]: # If no matches, return None return None + def has_blocked_weights(self) -> bool: + for scheme in self.target_scheme_map.values(): + weight_quant = scheme.get("weights") + if (weight_quant is not None + and weight_quant.strategy == QuantizationStrategy.BLOCK): + return True + return False + @staticmethod def supports_cutlass_24( weight_quant: Optional[QuantizationArgs], diff --git a/vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_fp8.py b/vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_fp8.py index d42ae22c5139..fa0816959fcd 100644 --- a/vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_fp8.py +++ b/vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_fp8.py @@ -11,7 +11,7 @@ from vllm.model_executor.layers.quantization.compressed_tensors.schemes import ( CompressedTensorsScheme) from vllm.model_executor.layers.quantization.utils.fp8_utils import ( - apply_fp8_block_linear, check_aiter_fp8_linear_support, + W8A8BlockFp8LinearOp, check_aiter_fp8_linear_support, create_fp8_input_scale, create_fp8_scale_parameter, create_fp8_weight_parameter, maybe_post_process_fp8_weight_block, process_fp8_weight_block_strategy, process_fp8_weight_channel_strategy, @@ -41,16 +41,30 @@ def __init__(self, weight_quant: QuantizationArgs, self.strategy = weight_quant.strategy self.out_dtype = torch.get_default_dtype() self.is_static_input_scheme = is_static_input_scheme - self.act_q_group_shape = GroupShape.PER_TENSOR \ - if is_static_input_scheme else GroupShape.PER_TOKEN - self.fp8_linear = Fp8LinearOp( - act_quant_static=self.is_static_input_scheme, - act_quant_group_shape=self.act_q_group_shape) self.weight_block_size = self.weight_quant.block_structure + if self.weight_block_size is not None: + self.act_q_group_shape = GroupShape(1, self.weight_block_size[0]) + else: + self.act_q_group_shape = GroupShape.PER_TENSOR \ + if is_static_input_scheme else GroupShape.PER_TOKEN + self.cutlass_block_fp8_supported = cutlass_block_fp8_supported() self.use_aiter_and_is_supported = check_aiter_fp8_linear_support() + if self.weight_block_size is not None: + assert not self.is_static_input_scheme + self.w8a8_block_fp8_linear = W8A8BlockFp8LinearOp( + weight_group_shape=GroupShape(*self.weight_block_size), + act_quant_group_shape=self.act_q_group_shape, + cutlass_block_fp8_supported=self.cutlass_block_fp8_supported, + use_aiter_and_is_supported=self.use_aiter_and_is_supported, + ) + else: + self.fp8_linear = Fp8LinearOp( + act_quant_static=self.is_static_input_scheme, + act_quant_group_shape=self.act_q_group_shape) + @classmethod def get_min_capability(cls) -> int: # lovelace and up @@ -141,13 +155,14 @@ def apply_weights(self, x: torch.Tensor, bias: Optional[torch.Tensor] = None) -> torch.Tensor: - if layer.weight_block_size is not None: - return apply_fp8_block_linear( - layer, + if self.weight_block_size is not None: + return self.w8a8_block_fp8_linear.apply( input=x, + weight=layer.weight, + weight_scale=layer.weight_scale, + input_scale=layer.input_scale, bias=bias, - cutlass_block_fp8_supported=self.cutlass_block_fp8_supported, - use_aiter_and_is_supported=self.use_aiter_and_is_supported) + ) return self.fp8_linear.apply(input=x, weight=layer.weight, diff --git a/vllm/model_executor/layers/quantization/deepgemm.py b/vllm/model_executor/layers/quantization/deepgemm.py index d26a932eddb2..c2b3ccf19fca 100644 --- a/vllm/model_executor/layers/quantization/deepgemm.py +++ b/vllm/model_executor/layers/quantization/deepgemm.py @@ -43,7 +43,7 @@ def prepare_block_fp8_matmul_inputs( return M, N, K, C -def w8a8_block_fp8_matmul_deepgemm( +def w8a8_deepgemm_block_scaled_mm( A: torch.Tensor, B: torch.Tensor, As: torch.Tensor, @@ -59,7 +59,7 @@ def w8a8_block_fp8_matmul_deepgemm( return C -def w8a8_block_fp8_matmul_deepgemm_fake( +def w8a8_deepgemm_block_scaled_mm_fake( A: torch.Tensor, B: torch.Tensor, As: torch.Tensor, @@ -73,9 +73,9 @@ def w8a8_block_fp8_matmul_deepgemm_fake( direct_register_custom_op( - op_name="w8a8_block_fp8_matmul_deepgemm", - op_func=w8a8_block_fp8_matmul_deepgemm, + op_name="w8a8_deepgemm_block_scaled_mm", + op_func=w8a8_deepgemm_block_scaled_mm, mutates_args=[], - fake_impl=w8a8_block_fp8_matmul_deepgemm_fake, + fake_impl=w8a8_deepgemm_block_scaled_mm_fake, dispatch_key=current_platform.dispatch_key, ) diff --git a/vllm/model_executor/layers/quantization/fp8.py b/vllm/model_executor/layers/quantization/fp8.py index 2b24e052053c..c4951712baa7 100644 --- a/vllm/model_executor/layers/quantization/fp8.py +++ b/vllm/model_executor/layers/quantization/fp8.py @@ -31,7 +31,7 @@ register_moe_scaling_factors, rotate_flashinfer_fp8_moe_weights, select_cutlass_fp8_gemm_impl, swap_w13_to_w31) from vllm.model_executor.layers.quantization.utils.fp8_utils import ( - apply_fp8_block_linear, check_aiter_fp8_linear_support, + W8A8BlockFp8LinearOp, check_aiter_fp8_linear_support, create_fp8_input_scale, create_fp8_scale_parameter, create_fp8_weight_parameter, expert_weight_is_col_major, get_col_major_tma_aligned_tensor, maybe_post_process_fp8_weight_block, @@ -234,15 +234,28 @@ def __init__(self, quant_config: Fp8Config): self.weight_block_size = self.quant_config.weight_block_size self.block_quant = self.weight_block_size is not None self.act_q_static = self.quant_config.activation_scheme == "static" - # Use per-token quantization for better perf if dynamic and cutlass - if not self.act_q_static and cutlass_fp8_supported(): - self.act_q_group_shape = GroupShape.PER_TOKEN + if self.weight_block_size: + self.act_q_group_shape = GroupShape(1, self.weight_block_size[0]) else: - self.act_q_group_shape = GroupShape.PER_TENSOR + # Use per-token quantization for better perf if dynamic and cutlass + if not self.act_q_static and cutlass_fp8_supported(): + self.act_q_group_shape = GroupShape.PER_TOKEN + else: + self.act_q_group_shape = GroupShape.PER_TENSOR - self.fp8_linear = Fp8LinearOp( - act_quant_static=self.act_q_static, - act_quant_group_shape=self.act_q_group_shape) + if self.block_quant: + assert not self.act_q_static + assert self.weight_block_size is not None + self.w8a8_block_fp8_linear = W8A8BlockFp8LinearOp( + weight_group_shape=GroupShape(*self.weight_block_size), + act_quant_group_shape=self.act_q_group_shape, + cutlass_block_fp8_supported=self.cutlass_block_fp8_supported, + use_aiter_and_is_supported=self.use_aiter_and_is_supported, + ) + else: + self.fp8_linear = Fp8LinearOp( + act_quant_static=self.act_q_static, + act_quant_group_shape=self.act_q_group_shape) def create_weights( self, @@ -391,12 +404,15 @@ def apply(self, bias=bias) if self.block_quant: - return apply_fp8_block_linear( - layer, + assert self.weight_block_size is not None + + return self.w8a8_block_fp8_linear.apply( input=x, + weight=layer.weight, + weight_scale=layer.weight_scale, + input_scale=layer.input_scale, bias=bias, - cutlass_block_fp8_supported=self.cutlass_block_fp8_supported, - use_aiter_and_is_supported=self.use_aiter_and_is_supported) + ) return self.fp8_linear.apply(input=x, weight=layer.weight, diff --git a/vllm/model_executor/layers/quantization/input_quant_fp8.py b/vllm/model_executor/layers/quantization/input_quant_fp8.py index 31182f40b48f..ece3e5817116 100644 --- a/vllm/model_executor/layers/quantization/input_quant_fp8.py +++ b/vllm/model_executor/layers/quantization/input_quant_fp8.py @@ -27,11 +27,14 @@ class QuantFP8(CustomOp): This CustomOp supports both static and dynamic quantization. """ - def __init__(self, - static: bool, - group_shape: GroupShape, - num_token_padding: Optional[int] = None, - column_major_scales: bool = False): + def __init__( + self, + static: bool, + group_shape: GroupShape, + num_token_padding: Optional[int] = None, + column_major_scales: bool = False, + use_ue8m0: Optional[bool] = None, # for Torch compile + ): """ :param static: static or dynamic quantization :param group_shape: quantization group shape (PER_TOKEN, PER_TENSOR, @@ -46,6 +49,7 @@ def __init__(self, self.group_shape = group_shape self.num_token_padding = num_token_padding self.column_major_scales = column_major_scales + self.use_ue8m0 = use_ue8m0 self.is_group_quant = group_shape.is_per_group() if self.is_group_quant: @@ -70,7 +74,8 @@ def forward_cuda( x, group_size=self.group_size, column_major_scales=self.column_major_scales, - dtype=_FP8_DTYPE) + dtype=_FP8_DTYPE, + use_ue8m0=self.use_ue8m0) assert (scale is not None) == self.static assert scale_ub is None or (not self.static and self.group_shape @@ -137,7 +142,10 @@ def _quantize_group_native( x_grouped = x.view(-1, num_groups, self.group_size) absmax = x_grouped.abs().max(dim=-1, keepdim=True)[0].float() - scales = (absmax / _FP8_MAX).clamp(min=_FP8_MIN_SCALING_FACTOR) + scales_raw = absmax / _FP8_MAX + if self.use_ue8m0: + scales_raw = torch.exp2(torch.ceil(torch.log2(scales_raw))) + scales = (scales_raw).clamp(min=_FP8_MIN_SCALING_FACTOR) x_scaled = x_grouped / scales x_quant = x_scaled.clamp(_FP8_MIN, _FP8_MAX).to(_FP8_DTYPE) @@ -151,6 +159,6 @@ def _quantize_group_native( scales = scales.reshape(orig_shape[:-1] + (num_groups, )) if self.column_major_scales: - scales = scales.transpose(-2, -1).contiguous() + scales = scales.transpose(-2, -1).contiguous().transpose(-1, -2) return x_quant, scales diff --git a/vllm/model_executor/layers/quantization/utils/fp8_utils.py b/vllm/model_executor/layers/quantization/utils/fp8_utils.py index d1d87b7ba12e..2098086bf240 100644 --- a/vllm/model_executor/layers/quantization/utils/fp8_utils.py +++ b/vllm/model_executor/layers/quantization/utils/fp8_utils.py @@ -13,8 +13,9 @@ import vllm.envs as envs from vllm import _custom_ops as ops from vllm.logger import init_logger +from vllm.model_executor.layers.quantization.input_quant_fp8 import QuantFP8 from vllm.model_executor.layers.quantization.utils.quant_utils import ( - group_broadcast) + GroupShape, group_broadcast) from vllm.model_executor.layers.quantization.utils.w8a8_utils import ( CUTLASS_BLOCK_FP8_SUPPORTED) from vllm.model_executor.parameter import (BlockQuantScaleParameter, @@ -24,6 +25,7 @@ from vllm.triton_utils import tl, triton from vllm.utils import cdiv, direct_register_custom_op from vllm.utils.deep_gemm import (is_deep_gemm_e8m0_used, + is_deep_gemm_supported, should_use_deepgemm_for_fp8_linear) logger = init_logger(__name__) @@ -35,6 +37,8 @@ def is_fp8(x: Union[torch.dtype, torch.Tensor]) -> bool: return x == torch.float8_e4m3fn or x == torch.float8_e4m3fnuz +# We need to pass in the is_hopper flag as argument because the function +# current_platform.is_device_capability() is not supported by Torch compiler. def cutlass_scaled_mm( A: torch.Tensor, B: torch.Tensor, @@ -42,15 +46,17 @@ def cutlass_scaled_mm( Bs: torch.Tensor, block_size: list[int], output_dtype: torch.dtype = torch.float16, + is_hopper: Optional[bool] = None, ) -> torch.Tensor: + if is_hopper is None: + is_hopper = current_platform.is_device_capability(90) return ops.cutlass_scaled_mm( A, B.T, out_dtype=output_dtype, scale_a=As, # SM90 block FP8 requires row-major scale_b, which we do ahead of time - scale_b=Bs if block_size is not None - and current_platform.is_device_capability(90) else Bs.T) + scale_b=Bs if block_size is not None and is_hopper else Bs.T) def rocm_aiter_gemm_w8a8_blockscale_impl( @@ -98,122 +104,189 @@ def rocm_aiter_gemm_w8a8_blockscale_fake( aiter_per1x128_quant = get_hip_quant(rocm_aiter.QuantType.per_1x128) -def dispatch_w8a8_blockscale_func( - use_cutlass: bool, use_aiter_and_is_supported: bool -) -> Callable[[ - torch.Tensor, - torch.Tensor, - torch.Tensor, - torch.Tensor, - list[int], - torch.dtype, -], torch.Tensor]: - if use_cutlass: - return cutlass_scaled_mm - if (use_aiter_and_is_supported): - return torch.ops.vllm.rocm_aiter_gemm_w8a8_blockscale - return w8a8_block_fp8_matmul +# TODO we should be able to change the type of block_size to GroupShape +# after we resolve GroupShape compilation issue +# https://github.com/vllm-project/vllm/issues/25270 +def _w8a8_triton_block_scaled_mm_func( + qx: torch.Tensor, + weight: torch.Tensor, + x_scale: torch.Tensor, + weight_scale: torch.Tensor, + block_size: list[int], + output_dtype: torch.dtype, +) -> torch.Tensor: + return w8a8_triton_block_scaled_mm(qx, weight, x_scale, weight_scale, + block_size, output_dtype) -# TODO fix ROCm->Triton custom path: -# https://github.com/vllm-project/vllm/issues/14397 -def apply_w8a8_block_fp8_linear( - input: torch.Tensor, +def _w8a8_triton_block_scaled_mm_fake( + qx: torch.Tensor, weight: torch.Tensor, - block_size: list[int], + x_scale: torch.Tensor, weight_scale: torch.Tensor, - input_scale: Optional[torch.Tensor] = None, - bias: Optional[torch.Tensor] = None, - cutlass_block_fp8_supported: bool = CUTLASS_BLOCK_FP8_SUPPORTED, - use_aiter_and_is_supported: bool = False, + block_size: list[int], + output_dtype: torch.dtype, ) -> torch.Tensor: - assert input_scale is None - # View input as 2D matrix for fp8 methods - input_2d = input.view(-1, input.shape[-1]) - output_shape = [*input.shape[:-1], weight.shape[0]] - output_dtype = input.dtype + return torch.empty((qx.size(0), weight.size(0)), + dtype=output_dtype, + device=qx.device) + - if should_use_deepgemm_for_fp8_linear(output_dtype, weight): +direct_register_custom_op( + "w8a8_triton_block_scaled_mm_func", + _w8a8_triton_block_scaled_mm_func, + mutates_args=[], + fake_impl=_w8a8_triton_block_scaled_mm_fake, + dispatch_key="CUDA", +) + +# TODO fix ROCm->Triton custom path: +# https://github.com/vllm-project/vllm/issues/14397 +class W8A8BlockFp8LinearOp: + """ + This class executes a Blocked FP8 linear layer using cutlass if supported + and torch.scaled_mm otherwise. + """ + + def __init__( + self, + weight_group_shape: GroupShape, + act_quant_group_shape: GroupShape, + cutlass_block_fp8_supported: bool = CUTLASS_BLOCK_FP8_SUPPORTED, + use_aiter_and_is_supported: bool = False, + ): + self.weight_group_shape = weight_group_shape + self.act_quant_group_shape = act_quant_group_shape + self.is_deep_gemm_supported = is_deep_gemm_supported() + self.is_hopper = current_platform.is_device_capability(90) + + # Get the correct blockscale mul and input quant operations. + # We can't use _dispatch_w8a8_blockscale_op to figure out if we want + # to use deepgemm because we don't know the shape of weights (and + # whether deepgemm supports it) at the init time. + self.w8a8_blockscale_op, self.input_quant_op = \ + self._dispatch_w8a8_blockscale_op( + cutlass_block_fp8_supported, use_aiter_and_is_supported) + self.deepgemm_input_quant_op = (QuantFP8( + False, + self.act_quant_group_shape, + column_major_scales=True, + use_ue8m0=is_deep_gemm_e8m0_used()) if self.is_deep_gemm_supported + else None) + + def apply( + self, + input: torch.Tensor, + weight: torch.Tensor, + weight_scale: torch.Tensor, + input_scale: Optional[torch.Tensor] = None, + bias: Optional[torch.Tensor] = None, + ) -> torch.Tensor: + assert input_scale is None + # View input as 2D matrix for fp8 methods input_2d = input.view(-1, input.shape[-1]) output_shape = [*input.shape[:-1], weight.shape[0]] + output_dtype = input.dtype - q_input, x_scale = per_token_group_quant_fp8( - input_2d, - block_size[1], - column_major_scales=True, - ) + if should_use_deepgemm_for_fp8_linear(output_dtype, weight, + self.is_deep_gemm_supported): + output = self._run_deepgemm(input, weight, weight_scale) + if bias is not None: + output = output + bias + return output.to(dtype=input.dtype).view(*output_shape) + output = self.w8a8_blockscale_op(input_2d, weight, weight_scale) + if bias is not None: + output = output + bias + return output.to(dtype=input.dtype).view(*output_shape) + + def _run_deepgemm( + self, + input_2d: torch.Tensor, + weight: torch.Tensor, + weight_scale: torch.Tensor, + ) -> torch.Tensor: # ensure DeepGEMM-backed custom op is registered before use import vllm.model_executor.layers.quantization.deepgemm # noqa: F401 - output = torch.ops.vllm.w8a8_block_fp8_matmul_deepgemm( + assert self.deepgemm_input_quant_op is not None + q_input, x_scale = self.deepgemm_input_quant_op(input_2d) + return torch.ops.vllm.w8a8_deepgemm_block_scaled_mm( q_input, weight, x_scale, weight_scale, - block_size, - output_dtype=output_dtype) - if bias is not None: - output += bias - return output.to(dtype=output_dtype).view(*output_shape) - - w8a8_blockscale_func = dispatch_w8a8_blockscale_func( - cutlass_block_fp8_supported, use_aiter_and_is_supported) - if cutlass_block_fp8_supported: - num_pad = 0 - if current_platform.is_device_capability(90): - # pad first dimension to be divisible by 4 due to - # cutlass blockwise gemm limitation for hopper - num_pad = 4 - (input_2d.shape[0] % 4) - if num_pad > 0: - input_2d = torch.nn.functional.pad(input_2d, - (0, 0, 0, num_pad), - "constant", 0) - q_input, x_scale = per_token_group_quant_fp8(input_2d, - block_size[1], - column_major_scales=True) - output = w8a8_blockscale_func(q_input, weight, x_scale, weight_scale, - block_size, input.dtype) - if num_pad > 0: - output = output[:-num_pad] - else: - if use_aiter_and_is_supported: - q_input, x_scale = aiter_per1x128_quant( - input_2d.contiguous(), quant_dtype=rocm_aiter.dtypes.fp8) - else: - q_input, x_scale = per_token_group_quant_fp8( - input_2d, block_size[1], column_major_scales=False) - - output = w8a8_blockscale_func(q_input, weight, x_scale, weight_scale, - block_size, input.dtype) - - if bias is not None: - output = output + bias - return output.to(dtype=input.dtype).view(*output_shape) - - -def apply_w8a8_block_fp8_linear_fake( - input: torch.Tensor, - weight: torch.Tensor, - block_size: list[int], - weight_scale: torch.Tensor, - input_scale: Optional[torch.Tensor] = None, - bias: Optional[torch.Tensor] = None, - cutlass_block_fp8_supported: bool = CUTLASS_BLOCK_FP8_SUPPORTED, - use_aiter_and_is_supported: bool = False, -) -> torch.Tensor: - output_shape = [*input.shape[:-1], weight.shape[0]] - return torch.empty(output_shape, dtype=input.dtype, device=input.device) - + self.weight_group_shape, + output_dtype=input_2d.dtype) -if not current_platform.is_cpu(): - direct_register_custom_op( - op_name="apply_w8a8_block_fp8_linear", - op_func=apply_w8a8_block_fp8_linear, - mutates_args=[], - fake_impl=apply_w8a8_block_fp8_linear_fake, - ) + def _run_cutlass( + self, + input_2d: torch.Tensor, + weight: torch.Tensor, + weight_scale: torch.Tensor, + ) -> torch.Tensor: + assert self.input_quant_op is not None + if self.is_hopper: + # We pad unconditionally (even if shape is already divisible by 4) + # to support dynamic shape for input_2d.shape[0] in torch.compile + x = torch.nn.functional.pad(input_2d, + (0, 0, 0, -input_2d.shape[0] % 4)) + else: + x = input_2d + + q_input, x_scale = self.input_quant_op(x) + output = cutlass_scaled_mm(q_input, weight, x_scale, weight_scale, + list(self.weight_group_shape), + input_2d.dtype, self.is_hopper) + output = output[0:input_2d.shape[0], ...] + return output + + def _run_aiter( + self, + input_2d: torch.Tensor, + weight: torch.Tensor, + weight_scale: torch.Tensor, + ) -> torch.Tensor: + assert self.act_quant_group_shape == GroupShape(1, 128) + q_input, x_scale = aiter_per1x128_quant( + input_2d.contiguous(), quant_dtype=rocm_aiter.dtypes.fp8) + return torch.ops.vllm.rocm_aiter_gemm_w8a8_blockscale( + q_input, weight, x_scale, weight_scale, self.weight_group_shape, + input_2d.dtype) + + def _run_triton( + self, + input_2d: torch.Tensor, + weight: torch.Tensor, + weight_scale: torch.Tensor, + ) -> torch.Tensor: + assert self.input_quant_op is not None + q_input, x_scale = self.input_quant_op(input_2d) + return torch.ops.vllm.w8a8_triton_block_scaled_mm_func( + q_input, weight, x_scale, weight_scale, self.weight_group_shape, + input_2d.dtype) + + def _dispatch_w8a8_blockscale_op( + self, + use_cutlass: bool, + use_aiter_and_is_supported: bool, + ) -> tuple[Callable[[ + torch.Tensor, + torch.Tensor, + torch.Tensor, + ], torch.Tensor], Optional[QuantFP8]]: + if use_cutlass: + return self._run_cutlass, (QuantFP8(False, + self.act_quant_group_shape, + column_major_scales=True, + use_ue8m0=False)) + if use_aiter_and_is_supported: + return self._run_aiter, None + return self._run_triton, (QuantFP8(False, + self.act_quant_group_shape, + column_major_scales=False, + use_ue8m0=False)) def input_to_float8( @@ -465,7 +538,7 @@ def per_token_group_quant_fp8( @triton.jit -def _w8a8_block_fp8_matmul( +def _w8a8_triton_block_scaled_mm( # Pointers to inputs and output A, B, @@ -590,7 +663,7 @@ def get_w8a8_block_fp8_configs(N: int, K: int, block_n: int, return None -def w8a8_block_fp8_matmul( +def w8a8_triton_block_scaled_mm( A: torch.Tensor, B: torch.Tensor, As: torch.Tensor, @@ -650,7 +723,7 @@ def grid(META): return (triton.cdiv(M, META["BLOCK_SIZE_M"]) * triton.cdiv(N, META["BLOCK_SIZE_N"]), ) - _w8a8_block_fp8_matmul[grid]( + _w8a8_triton_block_scaled_mm[grid]( A, B, C, @@ -997,25 +1070,6 @@ def maybe_post_process_fp8_weight_block(layer: torch.nn.Module, layer.weight_scale.data.T.contiguous(), requires_grad=False) -def apply_fp8_block_linear(layer: torch.nn.Module, input: torch.Tensor, - bias: Optional[torch.Tensor], - cutlass_block_fp8_supported: bool, - use_aiter_and_is_supported: bool) -> torch.Tensor: - """Apply block-wise FP8 linear operation.""" - assert layer.weight_block_size is not None - - return torch.ops.vllm.apply_w8a8_block_fp8_linear( - input=input, - weight=layer.weight, - block_size=layer.weight_block_size, - weight_scale=layer.weight_scale, - input_scale=layer.input_scale, - bias=bias, - cutlass_block_fp8_supported=cutlass_block_fp8_supported, - use_aiter_and_is_supported=use_aiter_and_is_supported, - ) - - def expert_weight_is_col_major(x: torch.Tensor) -> bool: assert x.dim() == 3 b, m, n = x.shape diff --git a/vllm/utils/deep_gemm.py b/vllm/utils/deep_gemm.py index 4083193d7650..2f533ca0639f 100644 --- a/vllm/utils/deep_gemm.py +++ b/vllm/utils/deep_gemm.py @@ -9,7 +9,7 @@ import functools import importlib import os -from typing import Any, Callable, NoReturn +from typing import Any, Callable, NoReturn, Optional import torch @@ -172,9 +172,13 @@ def calc_diff(x: torch.Tensor, y: torch.Tensor): return 1 - sim -def should_use_deepgemm_for_fp8_linear(output_dtype: torch.dtype, - weight: torch.Tensor): - return (is_deep_gemm_supported() and output_dtype == torch.bfloat16 +def should_use_deepgemm_for_fp8_linear( + output_dtype: torch.dtype, + weight: torch.Tensor, + supports_deep_gemm: Optional[bool] = None): + if supports_deep_gemm is None: + supports_deep_gemm = is_deep_gemm_supported() + return (supports_deep_gemm and output_dtype == torch.bfloat16 and weight.shape[0] % 128 == 0 and weight.shape[1] % 128 == 0) From 24fab45d96a91b491db338ee02cd24e55b7fbb5f Mon Sep 17 00:00:00 2001 From: Michael Goin Date: Tue, 23 Sep 2025 15:29:26 -0400 Subject: [PATCH 288/518] [Perf] Change default CUDAGraphMode from PIECEWISE to FULL_AND_PIECEWISE (#25444) Signed-off-by: mgoin --- vllm/config/__init__.py | 9 ++++++++- vllm/config/compilation.py | 7 +++---- vllm/v1/worker/gpu_model_runner.py | 23 +++++++++++++++++++++-- 3 files changed, 32 insertions(+), 7 deletions(-) diff --git a/vllm/config/__init__.py b/vllm/config/__init__.py index 50e8cad23617..d786d3e289b3 100644 --- a/vllm/config/__init__.py +++ b/vllm/config/__init__.py @@ -509,8 +509,15 @@ def __post_init__(self): if self.compilation_config.cudagraph_mode is None: if envs.VLLM_USE_V1 and self.compilation_config.level \ == CompilationLevel.PIECEWISE: + # default to full and piecewise for most models self.compilation_config.cudagraph_mode = \ - CUDAGraphMode.PIECEWISE + CUDAGraphMode.FULL_AND_PIECEWISE + + # pooling model does not support full cudagraphs + if self.model_config is not None and \ + self.model_config.pooler_config is not None: + self.compilation_config.cudagraph_mode = \ + CUDAGraphMode.PIECEWISE else: self.compilation_config.cudagraph_mode = CUDAGraphMode.NONE diff --git a/vllm/config/compilation.py b/vllm/config/compilation.py index 34fa7fcfe7e8..0441745e8b36 100644 --- a/vllm/config/compilation.py +++ b/vllm/config/compilation.py @@ -228,15 +228,14 @@ class CompilationConfig: The mode of the cudagraph: - NONE, no cudagraph capture. - - PIECEWISE. (v1 default) + - PIECEWISE. - FULL. - FULL_DECODE_ONLY. - - FULL_AND_PIECEWISE. + - FULL_AND_PIECEWISE. (v1 default) PIECEWISE mode build piecewise cudagraph only, keeping the cudagraph incompatible ops (i.e. some attention ops) outside the cudagraph for general flexibility. - This is the default mode. FULL mode: Capture full cudagraph for all batches. Can be good for small models or workloads with small prompts; not supported by many backends. @@ -249,7 +248,7 @@ class CompilationConfig: FULL_AND_PIECEWISE mode: Capture full cudagraph for decode batches and piecewise cudagraph for prefill and mixed prefill-decode batches. - This is like the most performant mode for most models. + This is the most performant mode for most models and is the default. Currently, the cudagraph mode is only used for the v1 engine. Note that the cudagraph logic is generally orthogonal to the diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index ed324138c6fe..df4f76958586 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -2947,8 +2947,7 @@ def _dummy_run( # TODO(luka) better system for describing dummy batches seq_lens = [1] * num_decode_tokens + [num_prefill_tokens + 1] else: - # Make sure max_model_len is used at the graph capture time. - seq_lens = self.max_model_len + seq_lens = max_query_len self.seq_lens.np[:num_reqs] = seq_lens self.seq_lens.np[num_reqs:] = 0 self.seq_lens.copy_to_gpu() @@ -3541,6 +3540,26 @@ def initialize_cudagraph_capture(self) -> None: CUDAGraphMode.FULL_DECODE_ONLY logger.warning(msg) + # check that if we are doing decode full-cudagraphs it is supported + if (cudagraph_mode.decode_mode() == CUDAGraphMode.FULL + and min_cg_support == AttentionCGSupport.NEVER): + msg = (f"CUDAGraphMode.{cudagraph_mode.name} is not supported " + f"with {min_cg_builder_name} backend (support: " + f"{min_cg_support})") + if (self.compilation_config.level == CompilationLevel.PIECEWISE and + (self.compilation_config.splitting_ops_contain_attention() + or self.compilation_config.use_inductor_graph_partition)): + msg += "; setting cudagraph_mode=PIECEWISE because "\ + "attention is compiled piecewise" + cudagraph_mode = self.compilation_config.cudagraph_mode = \ + CUDAGraphMode.PIECEWISE + else: + msg += "; setting cudagraph_mode=NONE because "\ + "attention is not compiled piecewise" + cudagraph_mode = self.compilation_config.cudagraph_mode = \ + CUDAGraphMode.NONE + logger.warning(msg) + # check that if we are doing spec-decode + decode full-cudagraphs it is # supported if (cudagraph_mode.decode_mode() == CUDAGraphMode.FULL From d5944d5146d9a6847badc7018b328de079cc7e80 Mon Sep 17 00:00:00 2001 From: jiahanc <173873397+jiahanc@users.noreply.github.com> Date: Tue, 23 Sep 2025 12:44:35 -0700 Subject: [PATCH 289/518] [Speculators][Speculative Decoding] Fix gpt-oss eagle3 accuracy issue (#25406) Signed-off-by: jiahanc <173873397+jiahanc@users.noreply.github.com> --- tests/v1/spec_decode/test_eagle.py | 4 ++ vllm/config/model.py | 1 + vllm/model_executor/models/llama_eagle.py | 5 ++ vllm/model_executor/models/llama_eagle3.py | 5 ++ vllm/v1/spec_decode/eagle.py | 70 +++++++++++++++++----- vllm/v1/worker/gpu_model_runner.py | 11 +++- 6 files changed, 79 insertions(+), 17 deletions(-) diff --git a/tests/v1/spec_decode/test_eagle.py b/tests/v1/spec_decode/test_eagle.py index 23bfabfcf89b..5096f9fd647b 100644 --- a/tests/v1/spec_decode/test_eagle.py +++ b/tests/v1/spec_decode/test_eagle.py @@ -534,6 +534,8 @@ def create_deterministic_logits(token_ids): proposer.runner.attn_groups.append([mock.MagicMock()]) proposer.runner.attn_groups[0][0].get_metadata_builder.return_value = \ attn_metadata_builder + proposer._get_attention_metadata_builder = mock.MagicMock( + return_value=attn_metadata_builder) result = proposer.propose(target_token_ids=target_token_ids, target_positions=target_positions, @@ -660,6 +662,8 @@ def create_deterministic_logits(token_ids, k: int): proposer.runner.attn_groups.append([mock.MagicMock()]) proposer.runner.attn_groups[0][0].get_metadata_builder.return_value = \ attn_metadata_builder + proposer._get_attention_metadata_builder = mock.MagicMock( + return_value=attn_metadata_builder) # Setup inputs for the proposer. target_token_ids = torch.randint(0, diff --git a/vllm/config/model.py b/vllm/config/model.py index 33e5d3ea04a4..d8a8fe20fd03 100644 --- a/vllm/config/model.py +++ b/vllm/config/model.py @@ -1003,6 +1003,7 @@ def _verify_quantization(self) -> None: self.quantization = quantization_override break + quant_method = quant_method if quant_method != "" else None # Verify quantization configurations. if self.quantization is None: self.quantization = quant_method diff --git a/vllm/model_executor/models/llama_eagle.py b/vllm/model_executor/models/llama_eagle.py index dfae3c3ea543..2ff2d54a83aa 100644 --- a/vllm/model_executor/models/llama_eagle.py +++ b/vllm/model_executor/models/llama_eagle.py @@ -134,6 +134,11 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): nn.Module.__init__(self) self.config = vllm_config. \ speculative_config.draft_model_config.hf_config + # Ensure draft_vocab_size is set + # default to the base vocab size when absent + if getattr(self.config, "draft_vocab_size", None) is None: + base_vocab_size = getattr(self.config, "vocab_size", None) + self.config.draft_vocab_size = base_vocab_size target_layer_num = vllm_config.model_config.get_num_layers( vllm_config.parallel_config) self.model = LlamaModel(vllm_config=vllm_config, diff --git a/vllm/model_executor/models/llama_eagle3.py b/vllm/model_executor/models/llama_eagle3.py index fb10af6c53c9..b99a1547918e 100644 --- a/vllm/model_executor/models/llama_eagle3.py +++ b/vllm/model_executor/models/llama_eagle3.py @@ -203,6 +203,11 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): nn.Module.__init__(self) self.config = vllm_config. \ speculative_config.draft_model_config.hf_config + # Ensure draft_vocab_size is set + # default to the base vocab size when absent + if getattr(self.config, "draft_vocab_size", None) is None: + base_vocab_size = getattr(self.config, "vocab_size", None) + self.config.draft_vocab_size = base_vocab_size target_layer_num = vllm_config.model_config.get_num_layers( vllm_config.parallel_config) diff --git a/vllm/v1/spec_decode/eagle.py b/vllm/v1/spec_decode/eagle.py index a0f40828d42f..a9e0a38fe341 100644 --- a/vllm/v1/spec_decode/eagle.py +++ b/vllm/v1/spec_decode/eagle.py @@ -9,6 +9,7 @@ import torch import torch.nn as nn +from vllm.attention.backends.abstract import AttentionMetadataBuilder from vllm.attention.layer import Attention from vllm.config import (CompilationLevel, VllmConfig, get_layers_from_vllm_config) @@ -77,6 +78,8 @@ def __init__( self.is_multimodal_model = vllm_config.model_config \ .is_multimodal_model + self.attn_metadata_builder: Optional[AttentionMetadataBuilder] = None + self.use_cuda_graph = (self.vllm_config.compilation_config.level == CompilationLevel.PIECEWISE and not self.vllm_config.model_config.enforce_eager) @@ -117,7 +120,7 @@ def __init__( with_numpy=True) # Determine allowed attention backends once during initialization. - self.allowed_attn_types: tuple[type[EagleAttentionMetadata], ...] + self.allowed_attn_types: tuple[type, ...] if current_platform.is_rocm(): rocm_types = [TritonAttentionMetadata, FlashAttentionMetadata] # vllm.v1.attention.backends.rocm_aiter_fa is an optional backend @@ -190,10 +193,12 @@ def propose( assert self.runner is not None - # FIXME: need to consider multiple kv_cache_groups - attn_metadata_builder = \ - self.runner.attn_groups[0][0].get_metadata_builder() - attn_metadata = attn_metadata_builder.build_for_drafting( + # Select the correct attention metadata builders for EAGLE layers. + # Get the attention metadata builders once and reuse for later. + builder = (self._get_attention_metadata_builder() + if self.attn_metadata_builder is None else + self.attn_metadata_builder) + attn_metadata = builder.build_for_drafting( common_attn_metadata=common_attn_metadata, draft_index=0) # At this moment, we assume all eagle layers belong to the same KV @@ -327,11 +332,9 @@ def propose( exceeds_max_model_len, PADDING_SLOT_ID) # Rebuild attention metadata - attn_metadata_builder = \ - self.runner.attn_groups[0][0].get_metadata_builder() - attn_metadata = attn_metadata_builder\ - .build_for_drafting(common_attn_metadata=common_attn_metadata, - draft_index=token_index + 1) + attn_metadata = builder.build_for_drafting( + common_attn_metadata=common_attn_metadata, + draft_index=token_index + 1) for layer_name in self.attn_layer_names: per_layer_attn_metadata[layer_name] = attn_metadata @@ -851,10 +854,24 @@ def load_model(self, target_model: nn.Module) -> None: # share lm_head with the target model if needed # some model definition do not define lm_head explicitly # and reuse embed_tokens for lm_head, e.g., CohereForCausalLM - if self.vllm_config.speculative_config.method != "eagle3" and \ - hasattr(target_language_model, "lm_head"): - logger.info("Loading EAGLE LM head weights from the target model.") - self.model.lm_head = target_language_model.lm_head + if self.vllm_config.speculative_config.method != "eagle3": + if hasattr(target_language_model, "lm_head"): + logger.info( + "Loading EAGLE LM head weights from the target model.") + self.model.lm_head = target_language_model.lm_head + else: + if (hasattr(self.model, "lm_head") + and hasattr(target_language_model, "lm_head") + and self.model.lm_head.weight.shape + == target_language_model.lm_head.weight.shape): + logger.info("Assuming the EAGLE head shares the same lm_head" + " with the target model.") + del self.model.lm_head + self.model.lm_head = target_language_model.lm_head + else: + logger.info( + "The EAGLE head's lm_head will be loaded separately" + " from the target model.") @torch.inference_mode() def dummy_run( @@ -877,6 +894,31 @@ def dummy_run( inputs_embeds=inputs_embeds, ) + def _get_attention_metadata_builder( + self) -> list[AttentionMetadataBuilder]: + """Find and return the attention metadata builders for EAGLE layers. + + Returns: + The metadata builders for EAGLE layers. + + Raises: + AssertionError: If no metadata builders are found for EAGLE layers. + """ + builder = None + chosen_layer = self.attn_layer_names[0] + + for kv_cache_group in self.runner.attn_groups: + for attn_group in kv_cache_group: + if chosen_layer in attn_group.layer_names: + builder = attn_group.get_metadata_builder() + break + if builder is not None: + break + + assert builder is not None, ( + "Failed to find attention metadata builder for EAGLE layers.") + return builder + def validate_same_kv_cache_group(self, kv_cache_config: KVCacheConfig) -> None: """ diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index df4f76958586..d0fe5e25d6ab 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -1177,9 +1177,14 @@ def _prepare_inputs( encoder_seq_lens=encoder_seq_lens, ) - if self.speculative_config and \ - spec_decode_common_attn_metadata is None: - spec_decode_common_attn_metadata = common_attn_metadata + if (self.speculative_config + and spec_decode_common_attn_metadata is None): + if isinstance(self.drafter, EagleProposer): + if (self.drafter.attn_layer_names[0] + in kv_cache_group_spec.layer_names): + spec_decode_common_attn_metadata = common_attn_metadata + else: + spec_decode_common_attn_metadata = common_attn_metadata for attn_group in self.attn_groups[kv_cache_group_id]: # Prepare for cascade attention if enabled & beneficial. From a8ffc4f0f2d02aa4e505dcc3c974d5ec6e00737c Mon Sep 17 00:00:00 2001 From: Michael Goin Date: Tue, 23 Sep 2025 15:49:55 -0400 Subject: [PATCH 290/518] [Bugfix] Lower gpt-oss max cudagraph size to 992 to be compatible with FA3 (#25508) Signed-off-by: mgoin --- vllm/model_executor/models/config.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/vllm/model_executor/models/config.py b/vllm/model_executor/models/config.py index ce3d23763ed6..aa7bcf5b65ad 100644 --- a/vllm/model_executor/models/config.py +++ b/vllm/model_executor/models/config.py @@ -266,24 +266,24 @@ def verify_and_update_config(vllm_config: "VllmConfig") -> None: if structured_outputs_config.reasoning_parser == "": structured_outputs_config.reasoning_parser = "openai_gptoss" - # Increase the max capture size from 512 to 1024 for performance. + # Increase the max capture size from 512 to 992 for performance. # NOTE(woosuk): This will increase the number of CUDA graphs - # from 67 to 83. + # from 67 to 81. scheduler_config = vllm_config.scheduler_config if len(scheduler_config.cuda_graph_sizes) == 1: max_capture_size = scheduler_config.cuda_graph_sizes[0] # FIXME(woosuk): When using full cuda graph with FA3, the max # supported size is 992. - if max_capture_size < 1024: + if max_capture_size < 992: cuda_graph_sizes = [1, 2, 4] # Step size 8 for small batch sizes cuda_graph_sizes += [i for i in range(8, 256, 8)] # Step size 16 for larger batch sizes - cuda_graph_sizes += [i for i in range(256, 1025, 16)] + cuda_graph_sizes += [i for i in range(256, 993, 16)] scheduler_config.cuda_graph_sizes = cuda_graph_sizes logger.info( "Overriding max cuda graph capture size to " - "%d for performance.", 1024) + "%d for performance.", 992) class MambaModelConfig(VerifyAndUpdateConfig): From 8bdd8b5c5155114f22af6d99f97176a15b6f8bf5 Mon Sep 17 00:00:00 2001 From: Ilya Markov Date: Tue, 23 Sep 2025 21:53:00 +0200 Subject: [PATCH 291/518] Enable symmetric memory all reduce by default only enabling for TP (#25070) Signed-off-by: ilmarkov Co-authored-by: Michael Goin --- .buildkite/test-pipeline.yaml | 2 + tests/distributed/test_symm_mem_allreduce.py | 59 +++++++++++++++---- .../device_communicators/cuda_communicator.py | 7 ++- vllm/envs.py | 4 +- 4 files changed, 56 insertions(+), 16 deletions(-) diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index cf32087ed3b9..aef6d709722f 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -164,6 +164,7 @@ steps: - tests/v1/test_internal_lb_dp.py - tests/v1/test_hybrid_lb_dp.py - tests/v1/engine/test_engine_core_client.py + - tests/distributed/test_symm_mem_allreduce.py commands: # test with torchrun tp=2 and external_dp=2 - torchrun --nproc-per-node=4 distributed/test_torchrun_example.py @@ -188,6 +189,7 @@ steps: - pytest -v -s compile/test_basic_correctness.py - pytest -v -s distributed/test_pynccl.py - pytest -v -s distributed/test_events.py + - pytest -v -s distributed/test_symm_mem_allreduce.py # TODO: create a dedicated test section for multi-GPU example tests # when we have multiple distributed example tests - pushd ../examples/offline_inference diff --git a/tests/distributed/test_symm_mem_allreduce.py b/tests/distributed/test_symm_mem_allreduce.py index 5a804a389123..83e1fe47aeec 100644 --- a/tests/distributed/test_symm_mem_allreduce.py +++ b/tests/distributed/test_symm_mem_allreduce.py @@ -1,6 +1,7 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project +import queue import random import typing @@ -10,26 +11,31 @@ import torch.multiprocessing as mp import vllm.envs as envs +from vllm.config import ParallelConfig, VllmConfig, set_current_vllm_config from vllm.distributed import cleanup_dist_env_and_memory from vllm.distributed.communication_op import tensor_model_parallel_all_reduce from vllm.distributed.device_communicators.cuda_communicator import ( CudaCommunicator) -from vllm.distributed.parallel_state import (get_tensor_model_parallel_group, - get_tp_group, +from vllm.distributed.parallel_state import (get_tp_group, init_distributed_environment, initialize_model_parallel) +from vllm.engine.arg_utils import EngineArgs +from vllm.engine.llm_engine import LLMEngine from vllm.platforms import current_platform from vllm.utils import update_environment_variables torch.manual_seed(42) random.seed(44) -test_size_elements = 4 * 1024 * 1024 +test_size_elements = 1024 * 1024 -def symm_mem_allreduce_worker(local_rank: int, world_size: int): +def symm_mem_allreduce_worker(local_rank: int, world_size: int, q: mp.Queue): monkeypatch = pytest.MonkeyPatch() - with monkeypatch.context() as m: + config = VllmConfig(parallel_config=ParallelConfig( + tensor_parallel_size=world_size)) + + with monkeypatch.context() as m, set_current_vllm_config(config): m.delenv("CUDA_VISIBLE_DEVICES", raising=False) dtype = torch.bfloat16 device = torch.device(f"cuda:{local_rank}") @@ -51,22 +57,26 @@ def symm_mem_allreduce_worker(local_rank: int, world_size: int): get_tp_group().device_communicator) symm_mem_comm = cuda_communicator.symm_mem_comm if symm_mem_comm is None or symm_mem_comm.disabled: - pytest.skip("SymmMemCommunicator is not available or disabled.") + # can't use skip under multiprocessing + q.put("SymmMemCommunicator is not available or disabled.") + return inp_direct_symm_mem = torch.randint(1, 23, (test_size_elements, ), dtype=dtype, device=device) if not symm_mem_comm.should_use_symm_mem(inp_direct_symm_mem): - pytest.skip( + # can't use skip under multiprocessing + q.put( "SymmMemCommunicator isn't used for this world and input size." ) + return original_inp_direct_symm_mem = inp_direct_symm_mem.clone() out_direct_symm_mem = symm_mem_comm.all_reduce(inp_direct_symm_mem) assert out_direct_symm_mem is not None - group = get_tensor_model_parallel_group().device_group + group = get_tp_group().device_group dist.all_reduce(original_inp_direct_symm_mem, group=group) torch.testing.assert_close(out_direct_symm_mem, original_inp_direct_symm_mem, @@ -100,9 +110,34 @@ def test_symm_mem_allreduce(monkeypatch: pytest.MonkeyPatch, tp_size, world_size = tp_size * pipeline_parallel_size if world_size > torch.cuda.device_count(): pytest.skip("Not enough GPUs to run the test.") + q = mp.get_context('spawn').Queue() + mp.spawn(symm_mem_allreduce_worker, + args=(world_size, q), + nprocs=world_size) + try: + val = q.get(timeout=1) + except queue.Empty: + val = None + finally: + cleanup_dist_env_and_memory() + if val is not None: + pytest.skip(val) - # Enable SymmMemCommunicator - monkeypatch.setenv("VLLM_ALLREDUCE_USE_SYMM_MEM", "1") - mp.spawn(symm_mem_allreduce_worker, args=(world_size, ), nprocs=world_size) - cleanup_dist_env_and_memory() +@pytest.mark.skipif( + not current_platform.is_cuda(), + reason="SymmMemAllreduce is only available for CUDA platforms.") +@pytest.mark.skipif(envs.VLLM_TARGET_DEVICE not in ["cuda"], + reason="Only test on CUDA") +def test_dp_with_symm_mem_allreduce(monkeypatch: pytest.MonkeyPatch): + world_size = 4 + if world_size > torch.cuda.device_count(): + pytest.skip("Not enough GPUs to run the test.") + # Verify that the DataParallel runs without error + engine_args = EngineArgs(model="distilbert/distilgpt2", + enforce_eager=True, + enable_prefix_caching=True, + data_parallel_size=2, + tensor_parallel_size=2, + data_parallel_backend="mp") + LLMEngine.from_engine_args(engine_args) diff --git a/vllm/distributed/device_communicators/cuda_communicator.py b/vllm/distributed/device_communicators/cuda_communicator.py index 6c25bf3cd95c..b20e79f577c3 100644 --- a/vllm/distributed/device_communicators/cuda_communicator.py +++ b/vllm/distributed/device_communicators/cuda_communicator.py @@ -30,18 +30,21 @@ def __init__(self, unique_name: str = ""): super().__init__(cpu_group, device, device_group, unique_name) if "tp" not in unique_name: - # only tp uses custom allreduce + # custom allreduce or torch symm mem can be used only by tp use_custom_allreduce = False + use_torch_symm_mem = False else: from vllm.distributed.parallel_state import ( _ENABLE_CUSTOM_ALL_REDUCE) use_custom_allreduce = _ENABLE_CUSTOM_ALL_REDUCE + use_torch_symm_mem = envs.VLLM_ALLREDUCE_USE_SYMM_MEM # ep does not use pynccl use_pynccl = "ep" not in unique_name self.use_pynccl = use_pynccl self.use_custom_allreduce = use_custom_allreduce + self.use_torch_symm_mem = use_torch_symm_mem # lazy import to avoid documentation build error from vllm.distributed.device_communicators.custom_all_reduce import ( @@ -65,7 +68,7 @@ def __init__(self, self.ca_comm: Optional[CustomAllreduce] = None self.qr_comm: Optional[QuickAllReduce] = None self.symm_mem_comm: Optional[SymmMemCommunicator] = None - if envs.VLLM_ALLREDUCE_USE_SYMM_MEM and current_platform.is_cuda(): + if use_torch_symm_mem and current_platform.is_cuda(): self.symm_mem_comm = SymmMemCommunicator( group=self.cpu_group, device=self.device, diff --git a/vllm/envs.py b/vllm/envs.py index fa6f14d6b037..50d58c5468f9 100755 --- a/vllm/envs.py +++ b/vllm/envs.py @@ -182,7 +182,7 @@ VLLM_USE_FLASHINFER_MOE_MXFP4_BF16: bool = False VLLM_ROCM_FP8_MFMA_PAGE_ATTN: bool = False VLLM_USE_FLASHINFER_MOE_MXFP4_MXFP8_CUTLASS: bool = False - VLLM_ALLREDUCE_USE_SYMM_MEM: bool = False + VLLM_ALLREDUCE_USE_SYMM_MEM: bool = True VLLM_TUNED_CONFIG_FOLDER: Optional[str] = None VLLM_DISABLE_PAD_FOR_CUDAGRAPH: bool = False VLLM_GPT_OSS_HARMONY_SYSTEM_INSTRUCTIONS: bool = False @@ -1370,7 +1370,7 @@ def get_vllm_port() -> Optional[int]: # Whether to use pytorch symmetric memory for allreduce "VLLM_ALLREDUCE_USE_SYMM_MEM": - lambda: bool(int(os.getenv("VLLM_ALLREDUCE_USE_SYMM_MEM", "0"))), + lambda: bool(int(os.getenv("VLLM_ALLREDUCE_USE_SYMM_MEM", "1"))), # Allows vllm to find tuned config under customized folder "VLLM_TUNED_CONFIG_FOLDER": From 8b8a8afc895ab200cf9aa27ea47c5fc6dbef443b Mon Sep 17 00:00:00 2001 From: Wentao Ye <44945378+yewentao256@users.noreply.github.com> Date: Tue, 23 Sep 2025 16:09:37 -0400 Subject: [PATCH 292/518] [CI] Fix Pre-commit Issue (#25497) Signed-off-by: yewentao256 --- vllm/v1/worker/gpu_model_runner.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index d0fe5e25d6ab..f78582495814 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -2367,7 +2367,7 @@ def propose_draft_token_ids( sampling_metadata: SamplingMetadata, hidden_states: torch.Tensor, sample_hidden_states: torch.Tensor, - aux_hidden_states: Optional[torch.Tensor], + aux_hidden_states: Optional[list[torch.Tensor]], spec_decode_metadata: Optional[SpecDecodeMetadata], common_attn_metadata: CommonAttentionMetadata, ) -> Union[list[list[int]], torch.Tensor]: @@ -2387,6 +2387,7 @@ def propose_draft_token_ids( else: indices = [] offset = 0 + assert spec_decode_metadata is not None for num_draft, tokens in zip( spec_decode_metadata.num_draft_tokens, sampled_token_ids): @@ -2437,6 +2438,7 @@ def propose_draft_token_ids( # TODO(woosuk): Support M-RoPE. target_positions = self.positions.gpu[:num_scheduled_tokens] if self.use_aux_hidden_state_outputs: + assert aux_hidden_states is not None target_hidden_states = torch.cat( [h[:num_scheduled_tokens] for h in aux_hidden_states], dim=-1) @@ -2462,6 +2464,7 @@ def propose_draft_token_ids( # TODO(woosuk): Support M-RoPE. target_positions = self.positions.gpu[token_indices] if self.use_aux_hidden_state_outputs: + assert aux_hidden_states is not None target_hidden_states = torch.cat( [h[token_indices] for h in aux_hidden_states], dim=-1) else: @@ -2897,7 +2900,9 @@ def _dummy_run( assert not create_mixed_batch num_reqs = cdiv(num_tokens, max_query_len) assert num_reqs <= max_num_reqs, \ - "Do not capture num_reqs > max_num_reqs for uniform batch" + f"Do not capture num_reqs {num_reqs} > max_num_reqs " \ + f"{max_num_reqs} for uniform batch. Num tokens: " \ + f"{num_tokens}, max_query_len: {max_query_len}" num_scheduled_tokens_list = [max_query_len] * num_reqs if num_tokens % max_query_len != 0: num_scheduled_tokens_list[-1] = num_tokens % max_query_len From c828d1bf987bbfe412c1fcd46736b1598d647053 Mon Sep 17 00:00:00 2001 From: Alec S <10566873+alecsolder@users.noreply.github.com> Date: Tue, 23 Sep 2025 16:43:45 -0400 Subject: [PATCH 293/518] [Bugfix] gpt-oss container tool output bug (#25485) Signed-off-by: Alec Solder Co-authored-by: Alec Solder --- vllm/entrypoints/harmony_utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vllm/entrypoints/harmony_utils.py b/vllm/entrypoints/harmony_utils.py index 57e4bb1e1da5..0c1c9c3192fc 100644 --- a/vllm/entrypoints/harmony_utils.py +++ b/vllm/entrypoints/harmony_utils.py @@ -317,7 +317,8 @@ def parse_output_message(message: Message) -> list[ResponseOutputItem]: ) output_items.append(response_item) elif recipient is not None and (recipient.startswith("python") - or recipient.startswith("browser")): + or recipient.startswith("browser") + or recipient.startswith("container")): for content in message.content: reasoning_item = ResponseReasoningItem( id=f"rs_{random_uuid()}", From 08275ec0a2ba48fbc1054bdbdda2f1e0dfcb20b3 Mon Sep 17 00:00:00 2001 From: Chauncey Date: Wed, 24 Sep 2025 05:25:46 +0800 Subject: [PATCH 294/518] [Build] Update Xgrammar to 0.1.25 (#25467) Signed-off-by: chaunceyjiang --- requirements/common.txt | 2 +- vllm/v1/structured_output/backend_xgrammar.py | 8 ++++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/requirements/common.txt b/requirements/common.txt index 7973da080c37..a52745f69870 100644 --- a/requirements/common.txt +++ b/requirements/common.txt @@ -24,7 +24,7 @@ outlines_core == 0.2.11 # required for outlines backend disk cache diskcache == 5.6.3 lark == 1.2.2 -xgrammar == 0.1.24; platform_machine == "x86_64" or platform_machine == "aarch64" or platform_machine == "arm64" +xgrammar == 0.1.25; platform_machine == "x86_64" or platform_machine == "aarch64" or platform_machine == "arm64" typing_extensions >= 4.10 filelock >= 3.16.1 # need to contain https://github.com/tox-dev/filelock/pull/317 partial-json-parser # used for parsing partial JSON outputs diff --git a/vllm/v1/structured_output/backend_xgrammar.py b/vllm/v1/structured_output/backend_xgrammar.py index 55b4792fe010..a853e6540719 100644 --- a/vllm/v1/structured_output/backend_xgrammar.py +++ b/vllm/v1/structured_output/backend_xgrammar.py @@ -108,7 +108,9 @@ def compile_grammar(self, request_type: StructuredOutputOptions, end=s["end"], ) for s in s_tag["structures"] ] - ctx = self.compiler.compile_structural_tag(tags, s_tag["triggers"]) + structural_tag = xgr.StructuralTag.from_legacy_structural_tag( + tags, s_tag["triggers"]) + ctx = self.compiler.compile_structural_tag(structural_tag) else: logger.error( "Validation should have already occurred. Please file an issue." @@ -318,6 +320,8 @@ def validate_xgrammar_grammar(sampling_params: SamplingParams) -> None: end=s["end"], ) for s in s_tag["structures"] ] - xgr.Grammar.from_structural_tag(tags, s_tag["triggers"]) + structural_tag = xgr.StructuralTag.from_legacy_structural_tag( + tags, s_tag["triggers"]) + xgr.Grammar.from_structural_tag(structural_tag) except Exception as e: raise ValueError("Invalid structural tag specification.") from e From 690f948e4a440baa85a7327c7e24f297d25b9bb2 Mon Sep 17 00:00:00 2001 From: Gregory Shtrasberg <156009573+gshtras@users.noreply.github.com> Date: Tue, 23 Sep 2025 17:31:08 -0400 Subject: [PATCH 295/518] [Bugfix] Fix for the import error from #24588 (#25481) Signed-off-by: Gregory Shtrasberg --- .../layers/fused_moe/gpt_oss_triton_kernels_moe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/model_executor/layers/fused_moe/gpt_oss_triton_kernels_moe.py b/vllm/model_executor/layers/fused_moe/gpt_oss_triton_kernels_moe.py index 0e84a9241e90..18de75851934 100644 --- a/vllm/model_executor/layers/fused_moe/gpt_oss_triton_kernels_moe.py +++ b/vllm/model_executor/layers/fused_moe/gpt_oss_triton_kernels_moe.py @@ -23,7 +23,7 @@ from triton_kernels.routing import (RoutingData, routing, routing_from_bitmatrix) from triton_kernels.tensor import Bitmatrix - except (ModuleNotFoundError, AttributeError) as e: + except (AttributeError, ImportError) as e: logger.error( "Failed to import Triton kernels. Please make sure your triton " "version is compatible. Error: %s", e) From ae002924e96bd17cfc690c266623c340ff28a70f Mon Sep 17 00:00:00 2001 From: Isotr0py Date: Wed, 24 Sep 2025 05:58:25 +0800 Subject: [PATCH 296/518] [CI/Build] Fix and re-enable v1 PP test on CI (#25496) Signed-off-by: Isotr0py --- tests/distributed/test_pipeline_parallel.py | 3 --- vllm/model_executor/models/granite.py | 7 ------- vllm/model_executor/models/granitemoe.py | 7 ------- vllm/model_executor/models/granitemoeshared.py | 7 ------- 4 files changed, 24 deletions(-) diff --git a/tests/distributed/test_pipeline_parallel.py b/tests/distributed/test_pipeline_parallel.py index 073b362b6474..aa28ed9ce25e 100644 --- a/tests/distributed/test_pipeline_parallel.py +++ b/tests/distributed/test_pipeline_parallel.py @@ -382,7 +382,6 @@ def test_tp_language_generation( test_options: PPTestOptions, num_gpus_available, ): - pytest.skip("Skipping the test until V1 passes it.") _compare_tp(model_id, parallel_setup, distributed_backend, @@ -410,7 +409,6 @@ def test_tp_language_embedding( test_options: PPTestOptions, num_gpus_available, ): - pytest.skip("Skipping the test until V1 passes it.") _compare_tp(model_id, parallel_setup, distributed_backend, @@ -438,7 +436,6 @@ def test_tp_multimodal_generation( test_options: PPTestOptions, num_gpus_available, ): - pytest.skip("Skipping the test until V1 passes it.") _compare_tp(model_id, parallel_setup, distributed_backend, diff --git a/vllm/model_executor/models/granite.py b/vllm/model_executor/models/granite.py index 795b38e724ea..2c619396e6c0 100644 --- a/vllm/model_executor/models/granite.py +++ b/vllm/model_executor/models/granite.py @@ -308,13 +308,11 @@ def forward( hidden_states = inputs_embeds else: hidden_states = self.get_input_embeddings(input_ids) - residual = None hidden_states *= self.config.embedding_multiplier else: assert intermediate_tensors is not None hidden_states = intermediate_tensors["hidden_states"] - residual = intermediate_tensors["residual"] for layer in islice(self.layers, self.start_layer, self.end_layer): hidden_states = layer(positions, hidden_states) @@ -322,7 +320,6 @@ def forward( if not get_pp_group().is_last_rank: return IntermediateTensors({ "hidden_states": hidden_states, - "residual": residual }) hidden_states = self.norm(hidden_states) @@ -475,10 +472,6 @@ def make_empty_intermediate_tensors( torch.zeros((batch_size, self.config.hidden_size), dtype=dtype, device=device), - "residual": - torch.zeros((batch_size, self.config.hidden_size), - dtype=dtype, - device=device), }) def load_weights(self, weights: Iterable[tuple[str, diff --git a/vllm/model_executor/models/granitemoe.py b/vllm/model_executor/models/granitemoe.py index 07200fef4799..47ac22c4aeaa 100644 --- a/vllm/model_executor/models/granitemoe.py +++ b/vllm/model_executor/models/granitemoe.py @@ -298,17 +298,14 @@ def forward( else: hidden_states = self.get_input_embeddings(input_ids) hidden_states *= self.embedding_multiplier - residual = None else: assert intermediate_tensors is not None hidden_states = intermediate_tensors["hidden_states"] - residual = intermediate_tensors["residual"] for layer in islice(self.layers, self.start_layer, self.end_layer): hidden_states = layer(positions, hidden_states) if not get_pp_group().is_last_rank: return IntermediateTensors({ "hidden_states": hidden_states, - "residual": residual }) hidden_states = self.norm(hidden_states) return hidden_states @@ -523,10 +520,6 @@ def make_empty_intermediate_tensors( torch.zeros((batch_size, self.config.hidden_size), dtype=dtype, device=device), - "residual": - torch.zeros((batch_size, self.config.hidden_size), - dtype=dtype, - device=device), }) def load_weights(self, weights: Iterable[tuple[str, diff --git a/vllm/model_executor/models/granitemoeshared.py b/vllm/model_executor/models/granitemoeshared.py index a5d118f084e6..b434822bff0a 100644 --- a/vllm/model_executor/models/granitemoeshared.py +++ b/vllm/model_executor/models/granitemoeshared.py @@ -195,17 +195,14 @@ def forward( else: hidden_states = self.get_input_embeddings(input_ids) hidden_states *= self.embedding_multiplier - residual = None else: assert intermediate_tensors is not None hidden_states = intermediate_tensors["hidden_states"] - residual = intermediate_tensors["residual"] for layer in islice(self.layers, self.start_layer, self.end_layer): hidden_states = layer(positions, hidden_states) if not get_pp_group().is_last_rank: return IntermediateTensors({ "hidden_states": hidden_states, - "residual": residual }) hidden_states = self.norm(hidden_states) return hidden_states @@ -323,10 +320,6 @@ def make_empty_intermediate_tensors( torch.zeros((batch_size, self.config.hidden_size), dtype=dtype, device=device), - "residual": - torch.zeros((batch_size, self.config.hidden_size), - dtype=dtype, - device=device), }) def load_weights(self, weights: Iterable[tuple[str, From 4f8c4b890ac1ca57b2f80278641fbe3f9a6abfba Mon Sep 17 00:00:00 2001 From: Jialin Ouyang Date: Tue, 23 Sep 2025 15:11:14 -0700 Subject: [PATCH 297/518] [Core] Use KVCacheBlock as much as possible instead of dict[block_id, KVCacheBlock] (#24830) Signed-off-by: Jialin Ouyang --- tests/v1/core/test_prefix_caching.py | 178 +++++++++++++----- .../core/test_single_type_kv_cache_manager.py | 18 +- vllm/v1/core/block_pool.py | 138 +++++++++++--- 3 files changed, 247 insertions(+), 87 deletions(-) diff --git a/tests/v1/core/test_prefix_caching.py b/tests/v1/core/test_prefix_caching.py index 3cf9d9369676..37b4f9a08e40 100644 --- a/tests/v1/core/test_prefix_caching.py +++ b/tests/v1/core/test_prefix_caching.py @@ -14,10 +14,11 @@ MultiModalKwargsItem, PlaceholderRange) from vllm.sampling_params import SamplingParams from vllm.utils import sha256, sha256_cbor -from vllm.v1.core.block_pool import BlockPool +from vllm.v1.core.block_pool import BlockHashToBlockMap, BlockPool from vllm.v1.core.kv_cache_manager import KVCacheManager, Request -from vllm.v1.core.kv_cache_utils import (BlockHash, KVCacheBlock, - get_block_hash, get_group_id, +from vllm.v1.core.kv_cache_utils import (BlockHash, BlockHashWithGroupId, + KVCacheBlock, get_block_hash, + get_group_id, get_request_block_hasher, hash_block_tokens, init_none_hash, make_block_hash_with_group_id) @@ -138,7 +139,7 @@ def test_prefill(hash_fn): blocks = manager.allocate_slots(req0, 55, len(computed_blocks.blocks[0]) * 16, computed_blocks) - assert blocks.get_block_ids() == ([1, 2, 3, 4], ) + assert blocks is not None and blocks.get_block_ids() == ([1, 2, 3, 4], ) # Check full block metadata parent_block_hash = None @@ -171,7 +172,7 @@ def test_prefill(hash_fn): blocks = manager.allocate_slots(req1, num_new_tokens, len(computed_blocks.blocks[0]) * 16, computed_blocks) - assert blocks.get_block_ids() == ([5], ) + assert blocks is not None and blocks.get_block_ids() == ([5], ) for block in computed_blocks.blocks[0]: assert block.ref_cnt == 2 @@ -207,7 +208,7 @@ def test_prefill(hash_fn): blocks = manager.allocate_slots(req2, num_new_tokens, len(computed_blocks.blocks[0]) * 16, computed_blocks) - assert blocks.get_block_ids() == ([6], ) + assert blocks is not None and blocks.get_block_ids() == ([6], ) # Although we only have 6 free blocks, we have 8 blocks in # the free block queue due to lazy removal. @@ -227,7 +228,9 @@ def test_prefill(hash_fn): len(computed_blocks.blocks[0]) * 16, computed_blocks) # This block ID order also checks the eviction order. - assert blocks.get_block_ids() == ([7, 8, 9, 10, 4, 5, 6, 3, 2, 1], ) + assert blocks is not None and blocks.get_block_ids() == ([ + 7, 8, 9, 10, 4, 5, 6, 3, 2, 1 + ], ) assert free_block_queue.num_free_blocks == 0 assert (free_block_queue.fake_free_list_head.next_free_block @@ -261,8 +264,9 @@ def test_prefill_hybrid_model(): blocks = manager.allocate_slots(req0, 55, len(computed_blocks.blocks[0]) * 16, computed_blocks) - assert blocks.get_block_ids() == ([1, 2, 3, 4], [5, 6, 7, - 8], [9, 10, 11, 12]) + assert blocks is not None and blocks.get_block_ids() == ([1, 2, 3, 4], [ + 5, 6, 7, 8 + ], [9, 10, 11, 12]) # Check full block metadata parent_block_hash = None @@ -298,7 +302,7 @@ def test_prefill_hybrid_model(): blocks = manager.allocate_slots(req1, num_new_tokens, len(computed_blocks.blocks[0]) * 16, computed_blocks) - assert blocks.get_block_ids() == ([13], [14], [15]) + assert blocks is not None and blocks.get_block_ids() == ([13], [14], [15]) for block_per_group in computed_blocks.blocks: for block in block_per_group: if block != manager.block_pool.null_block: @@ -309,14 +313,15 @@ def test_prefill_hybrid_model(): manager.free(req1) cached_block_hash_to_block_bak = copy.copy( - manager.block_pool.cached_block_hash_to_block) + manager.block_pool.cached_block_hash_to_block._cache) - def test_partial_request_hit(request_id: str, hash_to_evict: list[bytes], + def test_partial_request_hit(request_id: str, + hash_to_evict: list[BlockHashWithGroupId], expect_hit_length: int): req = make_request(request_id, common_token_ids + unique_token_ids, block_size, sha256) for hash_with_group_id in hash_to_evict: - manager.block_pool.cached_block_hash_to_block.pop( + manager.block_pool.cached_block_hash_to_block._cache.pop( hash_with_group_id) computed_blocks, num_computed_tokens = manager.get_computed_blocks(req) assert len(req.block_hashes) == 3 @@ -324,7 +329,7 @@ def test_partial_request_hit(request_id: str, hash_to_evict: list[bytes], for block_per_group in computed_blocks.blocks: assert len(block_per_group) == num_computed_tokens // block_size for hash_with_group_id in hash_to_evict: - manager.block_pool.cached_block_hash_to_block[ + manager.block_pool.cached_block_hash_to_block._cache[ hash_with_group_id] = cached_block_hash_to_block_bak[ hash_with_group_id] manager.free(req) @@ -362,7 +367,8 @@ def test_partial_request_hit(request_id: str, hash_to_evict: list[bytes], # total cache miss. # The cache hit length of full attention is 1 * block_size. # The cache hit length of sliding window is 2 * block_size. - # Then it is cache miss as the two type of layers have different hit length. + # Then it is cache miss as the two type of layers + # have different hit length. test_partial_request_hit("8", [ make_block_hash_with_group_id(block_hashes[2], 0), make_block_hash_with_group_id(block_hashes[0], 1), @@ -406,7 +412,7 @@ def test_prefill_plp(): blocks = manager.allocate_slots(req0, 55, len(computed_blocks.blocks[0]) * 16, computed_blocks) - assert blocks.get_block_ids() == ([1, 2, 3, 4], ) + assert blocks is not None and blocks.get_block_ids() == ([1, 2, 3, 4], ) req0_block_hashes = [b.block_hash for b in blocks.blocks[0]] # Check full block metadata @@ -441,7 +447,7 @@ def test_prefill_plp(): blocks = manager.allocate_slots(req1, num_new_tokens, len(computed_blocks.blocks[0]) * 16, computed_blocks) - assert blocks.get_block_ids() == ([5], ) + assert blocks is not None and blocks.get_block_ids() == ([5], ) for block in computed_blocks.blocks[0]: assert block.ref_cnt == 2 @@ -478,6 +484,7 @@ def test_prefill_plp(): blocks = manager.allocate_slots(req2, 55, len(computed_blocks.blocks[0]) * 16, computed_blocks) + assert blocks is not None block_ids = blocks.get_block_ids() # Duplicate cached blocks have different ids but same hashes vs request #0 assert [b.block_hash for b in blocks.blocks[0]] == req0_block_hashes @@ -513,7 +520,7 @@ def test_decode(): blocks = manager.allocate_slots(req0, 55, len(computed_blocks.blocks[0]) * 16, computed_blocks) - assert blocks.get_block_ids() == ([1, 2, 3, 4], ) + assert blocks is not None and blocks.get_block_ids() == ([1, 2, 3, 4], ) # Append slots without allocating a new block. req0.num_computed_tokens = 55 @@ -558,7 +565,8 @@ def test_evict(): blocks = manager.allocate_slots(req0, 5 * 16 + 7, len(computed_blocks.blocks[0]) * 16, computed_blocks) - assert len(blocks.blocks[0]) == 6 # 5 full + 1 partial + # 5 full + 1 partial + assert blocks is not None and len(blocks.blocks[0]) == 6 # 3 blocks. req1 = make_request("1", list(range(last_token_id, @@ -570,7 +578,7 @@ def test_evict(): blocks = manager.allocate_slots(req1, 3 * 16, len(computed_blocks.blocks[0]) * 16, computed_blocks) - assert len(blocks.blocks[0]) == 3 # 3 full blocks + assert blocks is not None and len(blocks.blocks[0]) == 3 # 3 full blocks last_token_id += 3 * 16 # 10 - (6 + 3) == 1 @@ -592,7 +600,7 @@ def test_evict(): blocks = manager.allocate_slots(req2, 3, len(computed_blocks.blocks[0]) * 16, computed_blocks) - assert blocks.get_block_ids() == ([10], ) + assert blocks is not None and blocks.get_block_ids() == ([10], ) assert manager.block_pool.free_block_queue.num_free_blocks == 7 @@ -617,7 +625,7 @@ def test_hash_block_correct_reuse(): blocks = manager.allocate_slots(req, num_tokens, len(computed_blocks.blocks[0]) * 16, computed_blocks) - assert len(blocks.blocks[0]) == 1 + assert blocks is not None and len(blocks.blocks[0]) == 1 # Deallocate the block. manager.free(req) @@ -631,7 +639,7 @@ def test_hash_block_correct_reuse(): blocks = manager.allocate_slots(req, num_tokens - 1, len(computed_blocks.blocks[0]) * 16, computed_blocks) - assert len(blocks.blocks[0]) == 1 + assert blocks is not None and len(blocks.blocks[0]) == 1 assert manager.block_pool.blocks[blocks.blocks[0] [0].block_id].block_hash is None @@ -658,7 +666,7 @@ def test_computed_blocks_not_evicted(): blocks = manager.allocate_slots(req0, num_tokens, len(computed_blocks.blocks[0]) * 16, computed_blocks) - assert len(blocks.blocks[0]) == 1 + assert blocks is not None and len(blocks.blocks[0]) == 1 assert blocks.blocks[0][0].block_id == 1 # Allocate another block. @@ -670,7 +678,7 @@ def test_computed_blocks_not_evicted(): blocks = manager.allocate_slots(req1, num_tokens, len(computed_blocks.blocks[0]) * 16, computed_blocks) - assert len(blocks.blocks[0]) == 1 + assert blocks is not None and len(blocks.blocks[0]) == 1 assert blocks.blocks[0][0].block_id == 2 # Free the blocks. @@ -688,7 +696,7 @@ def test_computed_blocks_not_evicted(): blocks = manager.allocate_slots(req2, num_tokens * 2 - num_tokens, len(computed_blocks.blocks[0]) * 16, computed_blocks) - assert len(blocks.blocks[0]) == 1 + assert blocks is not None and len(blocks.blocks[0]) == 1 assert blocks.blocks[0][0].block_id == 2 @@ -712,7 +720,7 @@ def test_basic_prefix_caching_disabled(): blocks = manager.allocate_slots(req1, 10, len(computed_blocks.blocks[0]) * 16, computed_blocks) - assert len(blocks.blocks[0]) == 3 + assert blocks is not None and len(blocks.blocks[0]) == 3 # Free the blocks. manager.free(req1) @@ -726,7 +734,7 @@ def test_basic_prefix_caching_disabled(): blocks = manager.allocate_slots(req2, 16, len(computed_blocks.blocks[0]) * 16, computed_blocks) - assert len(blocks.blocks[0]) == 4 + assert blocks is not None and len(blocks.blocks[0]) == 4 # New requests should not have any blocks. req3 = make_request("3", list(range(4)), block_size, sha256) @@ -773,7 +781,8 @@ def test_cache_blocks(hash_fn): assert len(block_pool.cached_block_hash_to_block) == 2 assert all([block.block_hash is not None for block in blocks]) - # Test that blocks that don't start from the beginning are cached correctly. + # Test that blocks that don't start from the beginning are cached + # correctly. blocks += [KVCacheBlock(block_id=2)] block_pool.cache_full_blocks( request=req, @@ -1101,7 +1110,7 @@ def test_reset_prefix_cache(): all_token_ids = full_block_token_ids + unique_token_ids req0 = make_request("0", all_token_ids, block_size, sha256) blocks = manager.allocate_slots(req0, 55) - assert blocks.get_block_ids() == ([1, 2, 3, 4], ) + assert blocks is not None and blocks.get_block_ids() == ([1, 2, 3, 4], ) unique_token_ids = [4] * 7 all_token_ids = full_block_token_ids + unique_token_ids @@ -1112,7 +1121,7 @@ def test_reset_prefix_cache(): blocks = manager.allocate_slots(req1, 7, len(computed_blocks.blocks[0]) * 16, computed_blocks) - assert blocks.get_block_ids() == ([5], ) + assert blocks is not None and blocks.get_block_ids() == ([5], ) # Failed to reset prefix cache because some blocks are not freed yet. assert not manager.reset_prefix_cache() @@ -1168,49 +1177,41 @@ def test_maybe_evict_cached_block(): # Manually add all blocks to cached_blocks for block, block_hash in zip(pool.blocks, block_hashes): block.block_hash = block_hash - pool.cached_block_hash_to_block[block_hash][block.block_id] = block + pool.cached_block_hash_to_block.insert(block_hash, block) block0, block1, block2, block3 = pool.blocks - assert pool.cached_block_hash_to_block == { + assert pool.cached_block_hash_to_block._cache == { block_hash0: { block0.block_id: block0, - block3.block_id: block3 - }, - block_hash1: { - block1.block_id: block1 + block3.block_id: block3, }, - block_hash2: { - block2.block_id: block2 - } + block_hash1: block1, + block_hash2: block2, } # Evict block1 pool._maybe_evict_cached_block(block1) - assert pool.cached_block_hash_to_block == { + assert pool.cached_block_hash_to_block._cache == { block_hash0: { block0.block_id: block0, block3.block_id: block3 }, - block_hash2: { - block2.block_id: block2 - } + block_hash2: block2, } # Evict block0: block_hash0 entry should NOT be removed, as block3 # also use the same hash pool._maybe_evict_cached_block(block0) - assert pool.cached_block_hash_to_block == { + assert pool.cached_block_hash_to_block._cache == { block_hash0: { block3.block_id: block3 }, - block_hash2: { - block2.block_id: block2 - } + block_hash2: block2, } # Evict block2 pool._maybe_evict_cached_block(block2) - assert pool.cached_block_hash_to_block == {block_hash0: {3: block3}} + assert pool.cached_block_hash_to_block._cache == {block_hash0: {3: block3}} # Evict block3 pool._maybe_evict_cached_block(block3) - assert pool.cached_block_hash_to_block == {} + assert pool.cached_block_hash_to_block._cache == {} @pytest.mark.parametrize("blocks_to_cache", [2, 3, 10]) @@ -1374,7 +1375,7 @@ def test_eagle_with_sliding_window(): # Evict the first block in the request assert manager.block_pool.get_cached_block( block_hash_first_block, kv_cache_group_ids=[0]) is not None - manager.block_pool.cached_block_hash_to_block.pop( + manager.block_pool.cached_block_hash_to_block._cache.pop( make_block_hash_with_group_id(block_hash_first_block, 0)) # New request @@ -1386,3 +1387,78 @@ def test_eagle_with_sliding_window(): # there will be no matched prefix. assert len(computed_blocks.blocks[0]) == 0 assert num_tokens == 0 + + +def test_block_lookup_cache_single_block_per_key(): + cache = BlockHashToBlockMap() + key0 = BlockHashWithGroupId(b"hash0") + key1 = BlockHashWithGroupId(b"hash1") + key2 = BlockHashWithGroupId(b"hash2") + block0 = KVCacheBlock(0) + block1 = KVCacheBlock(1) + + assert cache.get_one_block(key0) is None + assert cache.get_one_block(key1) is None + assert cache.get_one_block(key2) is None + # key0 inserted + cache.insert(key0, block0) + assert cache.get_one_block(key0) is block0 + assert cache.get_one_block(key1) is None + assert cache.get_one_block(key2) is None + # key1 inserted + cache.insert(key1, block1) + assert cache.get_one_block(key0) is block0 + assert cache.get_one_block(key1) is block1 + assert cache.get_one_block(key2) is None + # No block poped due to block_id mismatch + assert cache.pop(key0, 100) is None + assert cache.get_one_block(key0) is block0 + assert cache.get_one_block(key1) is block1 + assert cache.get_one_block(key2) is None + # block poped with (key0, block ID 0) + assert cache.pop(key0, 0) is block0 + assert cache.get_one_block(key0) is None + assert cache.get_one_block(key1) is block1 + assert cache.get_one_block(key2) is None + # No block poped due to block_id mismatch + assert cache.pop(key0, 1) is None + assert cache.get_one_block(key0) is None + assert cache.get_one_block(key1) is block1 + assert cache.get_one_block(key2) is None + # block poped with (key1, block ID 1) + assert cache.pop(key1, 1) is block1 + assert cache.get_one_block(key0) is None + assert cache.get_one_block(key1) is None + assert cache.get_one_block(key2) is None + + +def test_block_lookup_cache_multi_blocks_per_key(): + cache = BlockHashToBlockMap() + key0 = BlockHashWithGroupId(b"hash0") + key1 = BlockHashWithGroupId(b"hash1") + block00 = KVCacheBlock(0) + block01 = KVCacheBlock(1) + block10 = KVCacheBlock(10) + block11 = KVCacheBlock(11) + + assert cache.get_one_block(key0) is None + assert cache.get_one_block(key1) is None + + cache.insert(key0, block00) + cache.insert(key0, block01) + cache.insert(key1, block10) + cache.insert(key1, block11) + + assert cache.get_one_block(key0) is block00 + assert cache.pop(key0, 0) is block00 + assert cache.get_one_block(key0) is block01 + assert cache.pop(key0, 1) is block01 + assert cache.get_one_block(key0) is None + assert cache.pop(key0, 2) is None + + assert cache.get_one_block(key1) is block10 + assert cache.pop(key1, 10) is block10 + assert cache.get_one_block(key1) is block11 + assert cache.pop(key1, 11) is block11 + assert cache.get_one_block(key1) is None + assert cache.pop(key1, 12) is None diff --git a/tests/v1/core/test_single_type_kv_cache_manager.py b/tests/v1/core/test_single_type_kv_cache_manager.py index b70850a9bcff..01b54ae56e90 100644 --- a/tests/v1/core/test_single_type_kv_cache_manager.py +++ b/tests/v1/core/test_single_type_kv_cache_manager.py @@ -47,16 +47,15 @@ def run_one_case(block_is_cached, tail_token, expect_length): BlockHash(str(i).encode()) for i in range(len(block_is_cached)) ] - block_pool.cached_block_hash_to_block.clear() + block_pool.cached_block_hash_to_block._cache.clear() # Mock the block pool with the cached blocks for i, (block_hash, is_cached) in enumerate(zip(block_hash_list, block_is_cached)): if is_cached: - block_pool.cached_block_hash_to_block[ - make_block_hash_with_group_id(block_hash, 0)] = { - i: block_pool.blocks[i + 10], - } + block_pool.cached_block_hash_to_block.insert( + make_block_hash_with_group_id(block_hash, 0), + block_pool.blocks[i + 10]) computed_blocks = manager.find_longest_cache_hit( block_hashes=block_hash_list, @@ -112,16 +111,15 @@ def run_one_case(block_is_cached, expect_length): BlockHash(str(i).encode()) for i in range(len(block_is_cached)) ] - block_pool.cached_block_hash_to_block.clear() + block_pool.cached_block_hash_to_block._cache.clear() # Mock the block pool with the cached blocks for i, (block_hash, is_cached) in enumerate(zip(block_hash_list, block_is_cached)): if is_cached: - block_pool.cached_block_hash_to_block[ - make_block_hash_with_group_id(block_hash, 0)] = { - i: block_pool.blocks[i + 10], - } + block_pool.cached_block_hash_to_block.insert( + make_block_hash_with_group_id(block_hash, 0), + block_pool.blocks[i + 10]) computed_blocks = manager.find_longest_cache_hit( block_hashes=block_hash_list, diff --git a/vllm/v1/core/block_pool.py b/vllm/v1/core/block_pool.py index d1e1c1c8d038..3cc738304821 100644 --- a/vllm/v1/core/block_pool.py +++ b/vllm/v1/core/block_pool.py @@ -1,8 +1,7 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -from collections import defaultdict from collections.abc import Iterable -from typing import Optional +from typing import Any, Optional, Union from vllm.distributed.kv_events import (MEDIUM_GPU, AllBlocksCleared, BlockRemoved, BlockStored, @@ -19,6 +18,103 @@ logger = init_logger(__name__) +class BlockHashToBlockMap: + """ + Cache of blocks that are used for prefix caching. It caches blocks + from hash directly to a block or multiple blocks + (i.e. {block_hash: KVCacheBlocks}) + - Mostly block_hash maps to a single KVCacheBlock, and KVCacheBlocks + would simply be a KVCacheBlock. + - Otherwise, KVCacheBlocks is a dict from {block_id: KVCacheBlock} + + A cached block is a full block with a block hash that can be used + for prefix caching. + The cached block may be used by running requests or in the + free_block_queue that could potentially be evicted. + + NOTE #1: We currently don't de-duplicate the blocks in the cache, + meaning that if a block becomes full and is cached, we don't check + if there is already an identical block in the cache. This is because + we want to make sure the allocated block IDs won't change so that + block tables are append-only. + NOTE #2: The union type is introduced in order to reduce GC costs + from the inner dict. + """ + + def __init__(self): + self._cache: dict[BlockHashWithGroupId, + Union[KVCacheBlock, dict[int, KVCacheBlock]]] = {} + + def get_one_block(self, + key: BlockHashWithGroupId) -> Optional[KVCacheBlock]: + """ + Gets any block with the given block hash key. + """ + blocks = self._cache.get(key) + if blocks is not None: + if isinstance(blocks, KVCacheBlock): + return blocks + if isinstance(blocks, dict): + return next(iter(blocks.values())) + self._unexpected_blocks_type(blocks) + return None + + def insert(self, key: BlockHashWithGroupId, block: KVCacheBlock) -> None: + """ + Inserts the KVCacheBlock to the cache + """ + blocks = self._cache.get(key) + if blocks is None: + # When key is not found, attach a single block to the key + self._cache[key] = block + elif isinstance(blocks, KVCacheBlock): + # If there's a block with the same key, merge the original block + # and the new block into a dict + self._cache[key] = {blocks.block_id: blocks, block.block_id: block} + elif isinstance(blocks, dict): + # If it's already a dict, simply insert the block + blocks[block.block_id] = block + else: + self._unexpected_blocks_type(blocks) + + def pop(self, key: BlockHashWithGroupId, + block_id: int) -> Optional[KVCacheBlock]: + """ + Checks if block_hash exists and pop block_id from the cache + """ + blocks = self._cache.pop(key, None) + if blocks is None: + # block_hash not found in the cache + return None + # TODO(Jialin): If key is found, block_id should always present + # in blocks. We currently keep the original behaviour for safety. + # + # Will add block_id == blocks.block_id assertion and + # use del blocks[block_id] instead as followup. + if isinstance(blocks, KVCacheBlock): + if blocks.block_id == block_id: + return blocks + # If the single block ID doesn't match, we should put the + # block back (it should happen rarely) + self._cache[key] = blocks + return None + if isinstance(blocks, dict): + # Try to pop block_id from the block dict, and if dict still + # contain blocks, put back to the cache. + block = blocks.pop(block_id, None) + if len(blocks) > 0: + self._cache[key] = blocks + return block + self._unexpected_blocks_type(blocks) + return None + + def __len__(self) -> int: + return len(self._cache) + + def _unexpected_blocks_type(self, blocks: Any) -> None: + raise AssertionError(f"Invalid KV cache block type {type(blocks)}") + + class BlockPool: """BlockPool that manages KVCacheBlocks. It provides methods to allocate, free and cache the kv cache blocks. The @@ -51,17 +147,9 @@ def __init__( # enabled). self.free_block_queue = FreeKVCacheBlockQueue(self.blocks) - # {block_hash: {block ID: block}}. A cached block is - # a full block with a block hash that can be used for prefix caching. - # The cached block may be used by running requests or in the - # free_block_queue that could potentially be evicted. - # NOTE: We currently don't de-duplicate the blocks in the cache, - # meaning that if a block becomes full and is cached, we don't check - # if there is already an identical block in the cache. This is because - # we want to make sure the allocated block IDs won't change so that - # block tables are append-only. - self.cached_block_hash_to_block: dict[BlockHashWithGroupId, dict[ - int, KVCacheBlock]] = defaultdict(dict) + # Cache for block lookup + self.cached_block_hash_to_block: BlockHashToBlockMap = \ + BlockHashToBlockMap() # To represent a placeholder block with block_id=0. # The ref_cnt of null_block is not maintained, needs special care to @@ -90,12 +178,11 @@ def get_cached_block( for group_id in kv_cache_group_ids: block_hash_with_group_id = make_block_hash_with_group_id( block_hash, group_id) - cached_blocks_one_group = self.cached_block_hash_to_block.get( + block = self.cached_block_hash_to_block.get_one_block( block_hash_with_group_id) - if not cached_blocks_one_group: + if not block: return None - first_block = next(iter(cached_blocks_one_group.values())) - cached_blocks.append(first_block) + cached_blocks.append(block) return cached_blocks def cache_full_blocks( @@ -140,8 +227,8 @@ def cache_full_blocks( block_hash_with_group_id = make_block_hash_with_group_id( block_hash, kv_cache_group_id) blk.block_hash = block_hash_with_group_id - self.cached_block_hash_to_block[block_hash_with_group_id][ - blk.block_id] = blk + self.cached_block_hash_to_block.insert(block_hash_with_group_id, + blk) if new_hashes is not None: new_hashes.append(maybe_convert_block_hash(block_hash)) @@ -211,15 +298,14 @@ def _maybe_evict_cached_block(self, block: KVCacheBlock) -> bool: if block_hash is None: # The block doesn't have hash, eviction is not needed return False - blocks_by_id = self.cached_block_hash_to_block.get(block_hash) - if blocks_by_id is None: - # block_hash not found in cached_block_hash_to_block, + + if self.cached_block_hash_to_block.pop(block_hash, + block.block_id) is None: + # block not found in cached_block_hash_to_block, # eviction is not needed return False + block.reset_hash() - blocks_by_id.pop(block.block_id, None) - if len(blocks_by_id) == 0: - del self.cached_block_hash_to_block[block_hash] if self.enable_kv_cache_events: # FIXME (Chen): Not sure whether we should return `hash_value` @@ -283,7 +369,7 @@ def reset_prefix_cache(self) -> bool: return False # Remove all hashes so that no new blocks will hit. - self.cached_block_hash_to_block = defaultdict(dict) + self.cached_block_hash_to_block = BlockHashToBlockMap() # Remove all hashes from all blocks. for block in self.blocks: From 969b4da3a6ab737f72cb33db502b4c0bb70d4139 Mon Sep 17 00:00:00 2001 From: Thomas Parnell Date: Wed, 24 Sep 2025 00:12:14 +0200 Subject: [PATCH 298/518] [V0 Deprecation] Remove placeholder attn (#25510) Signed-off-by: Thomas Parnell --- .../attention/test_attention_selector.py | 37 +-- vllm/attention/backends/placeholder_attn.py | 314 ------------------ vllm/attention/layer.py | 3 - vllm/attention/selector.py | 9 - .../kv_connector/v1/nixl_connector.py | 1 - 5 files changed, 10 insertions(+), 354 deletions(-) delete mode 100644 vllm/attention/backends/placeholder_attn.py diff --git a/tests/kernels/attention/test_attention_selector.py b/tests/kernels/attention/test_attention_selector.py index a4e200775c09..730514eb5a56 100644 --- a/tests/kernels/attention/test_attention_selector.py +++ b/tests/kernels/attention/test_attention_selector.py @@ -85,8 +85,7 @@ def test_env( if device == "cpu": with patch("vllm.attention.selector.current_platform", CpuPlatform()): - backend = get_attn_backend(16, torch.float16, None, block_size, - False) + backend = get_attn_backend(16, torch.float16, None, block_size) assert backend.get_name() == "TORCH_SDPA_VLLM_V1" elif device == "hip": @@ -106,7 +105,6 @@ def test_env( torch.float16, None, block_size, - False, use_mla=use_mla) assert f"The selected backend, {name}" in str( exc_info.value) @@ -117,7 +115,6 @@ def test_env( torch.float16, None, block_size, - False, use_mla=use_mla) assert f"The selected backend, {name}" in str( exc_info.value) @@ -127,7 +124,6 @@ def test_env( torch.float16, None, block_size, - False, use_mla=use_mla) expected = f"{name}_VLLM_V1" assert backend.get_name() == expected @@ -136,7 +132,6 @@ def test_env( torch.float16, None, block_size, - False, use_mla=use_mla) expected = "TRITON_ATTN_VLLM_V1" assert backend.get_name() == expected @@ -164,7 +159,6 @@ def test_env( torch.float16, None, block_size, - False, use_mla=use_mla) expected = "CUTLASS_MLA_VLLM_V1" assert backend.get_name() == expected @@ -179,7 +173,6 @@ def test_env( torch.float16, None, block_size, - False, use_mla=use_mla) expected = "FLASHINFER_MLA" assert backend.get_name() == expected @@ -199,7 +192,6 @@ def test_env( torch.float16, None, block_size, - False, use_mla=use_mla) expected = f"{name}_VLLM_V1" assert backend.get_name() == expected @@ -208,7 +200,6 @@ def test_env( torch.float16, None, block_size, - False, use_mla=use_mla) expected = "FLASH_ATTN_MLA" assert backend.get_name() == expected @@ -218,7 +209,6 @@ def test_env( torch.float16, None, block_size, - False, use_mla=use_mla) expected = "TRITON_MLA_VLLM_V1" assert backend.get_name() == expected @@ -227,7 +217,6 @@ def test_env( torch.float16, None, block_size, - False, use_mla=use_mla) expected = "FLASHINFER_VLLM_V1" assert backend.get_name() == expected @@ -236,7 +225,6 @@ def test_env( torch.float16, None, block_size, - False, use_mla=use_mla) expected = "FLASH_ATTN_VLLM_V1" assert backend.get_name() == expected @@ -245,7 +233,6 @@ def test_env( torch.float16, None, block_size, - False, use_mla=use_mla) assert backend.get_name() == "FLEX_ATTENTION", ( "Should fallback to FlexAttention if head size is " @@ -264,13 +251,13 @@ def test_fp32_fallback( if device == "cpu": with patch("vllm.attention.selector.current_platform", CpuPlatform()): - backend = get_attn_backend(16, torch.float32, None, 16, False) + backend = get_attn_backend(16, torch.float32, None, 16) assert backend.get_name() == "TORCH_SDPA_VLLM_V1" elif device == "cuda": with patch("vllm.attention.selector.current_platform", CudaPlatform()): - backend = get_attn_backend(16, torch.float32, None, 16, False) + backend = get_attn_backend(16, torch.float32, None, 16) assert backend.get_name() == "FLEX_ATTENTION" @@ -286,29 +273,29 @@ def test_flash_attn(monkeypatch: pytest.MonkeyPatch): monkeypatch.setattr(torch.cuda, "get_device_capability", lambda _=None: (7, 5)) - backend = get_attn_backend(16, torch.float16, None, 16, False) + backend = get_attn_backend(16, torch.float16, None, 16) assert backend.get_name() != STR_FLASH_ATTN_VAL # Reset the monkeypatch for subsequent tests monkeypatch.undo() # Unsupported data type - backend = get_attn_backend(16, torch.float8_e4m3fn, None, 16, False) + backend = get_attn_backend(16, torch.float8_e4m3fn, None, 16) assert backend.get_name() != STR_FLASH_ATTN_VAL # Unsupported kv cache data type - backend = get_attn_backend(16, torch.float16, "fp8", 16, False) + backend = get_attn_backend(16, torch.float16, "fp8", 16) assert backend.get_name() != STR_FLASH_ATTN_VAL # Unsupported block size - backend = get_attn_backend(16, torch.float16, None, 8, False) + backend = get_attn_backend(16, torch.float16, None, 8) assert backend.get_name() != STR_FLASH_ATTN_VAL # flash-attn is not installed import sys original_module = sys.modules.get('vllm_flash_attn') monkeypatch.setitem(sys.modules, 'vllm_flash_attn', None) - backend = get_attn_backend(16, torch.float16, None, 16, False) + backend = get_attn_backend(16, torch.float16, None, 16) assert backend.get_name() != STR_FLASH_ATTN_VAL # Restore the original module if it existed @@ -319,11 +306,7 @@ def test_flash_attn(monkeypatch: pytest.MonkeyPatch): monkeypatch.delitem(sys.modules, 'vllm_flash_attn', raising=False) # Unsupported head size - backend = get_attn_backend(17, torch.float16, None, 16, False) - assert backend.get_name() != STR_FLASH_ATTN_VAL - - # Attention-free models should bypass env and use PlaceholderAttention - backend = get_attn_backend(16, torch.float16, None, 16, True) + backend = get_attn_backend(17, torch.float16, None, 16) assert backend.get_name() != STR_FLASH_ATTN_VAL @@ -336,5 +319,5 @@ def test_invalid_env(monkeypatch: pytest.MonkeyPatch): # Should raise ValueError for invalid backend with pytest.raises(ValueError) as exc_info: - get_attn_backend(32, torch.float16, None, 16, False) + get_attn_backend(32, torch.float16, None, 16) assert "Invalid value 'INVALID'" in str(exc_info.value) diff --git a/vllm/attention/backends/placeholder_attn.py b/vllm/attention/backends/placeholder_attn.py deleted file mode 100644 index cddeb2cf39bf..000000000000 --- a/vllm/attention/backends/placeholder_attn.py +++ /dev/null @@ -1,314 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project - -from dataclasses import dataclass -from itertools import accumulate -from typing import List, Optional, Tuple, Type - -import torch - -from vllm.attention.backends.abstract import (AttentionBackend, AttentionImpl, - AttentionMetadata, - AttentionMetadataBuilder) -from vllm.attention.backends.utils import CommonAttentionState -from vllm.utils import async_tensor_h2d - -# Placeholder attention backend for models like Mamba and pooling models that -# lack attention. - - -class PlaceholderAttentionBackend(AttentionBackend): - """Placeholder backend for when no attention is needed.""" - - @staticmethod - def get_name() -> str: - return "NO_ATTENTION" - - @staticmethod - def get_impl_cls() -> Type["PlaceholderAttentionImpl"]: - return PlaceholderAttentionImpl - - @staticmethod - def get_builder_cls() -> Type["PlaceholderAttentionMetadataBuilder"]: - return PlaceholderAttentionMetadataBuilder - - @staticmethod - def get_metadata_cls() -> Type["PlaceholderAttentionMetadata"]: - return PlaceholderAttentionMetadata - - @staticmethod - def get_state_cls() -> Type["CommonAttentionState"]: - return CommonAttentionState - - @staticmethod - def get_kv_cache_shape( - num_blocks: int, - block_size: int, - num_kv_heads: int, - head_size: int, - ) -> Tuple[int, ...]: - return (1, 1, 1, 1, 1) - - @staticmethod - def swap_blocks( - src_kv_cache: torch.Tensor, - dst_kv_cache: torch.Tensor, - src_to_dst: torch.Tensor, - ) -> None: - return - - @staticmethod - def copy_blocks( - kv_caches: List[torch.Tensor], - src_to_dists: torch.Tensor, - ) -> None: - return - - -@dataclass -class PlaceholderAttentionMetadata(AttentionMetadata): - """Attention metadata for prefill and decode batched together.""" - # (batch_size,). The sequence length per sequence. Sequence length means - # the computed tokens + new tokens None if it is a decoding. - seq_lens: Optional[List[int]] - # seq_lens stored as a tensor. - seq_lens_tensor: Optional[torch.Tensor] - - # Maximum sequence length among prefill batch. 0 if there are decoding - # requests only. - max_prefill_seq_len: int - # Maximum sequence length among decode batch. 0 if there are prefill - # requests only. - max_decode_seq_len: int - # (batch_size,) A tensor of context lengths (tokens that are computed - # so far). - context_lens_tensor: Optional[torch.Tensor] - - # Whether or not if cuda graph is enabled. - # Cuda-graph is currently enabled for decoding only. - # TODO(woosuk): Move `use_cuda_graph` out since it's unrelated to attention. - use_cuda_graph: bool - - # Maximum query length in the batch. - max_query_len: Optional[int] - - # Max number of query tokens among request in the batch. - max_decode_query_len: Optional[int] - - # (batch_size + 1,). The cumulative subquery lengths of the sequences in - # the batch, used to index into subquery. E.g., if the subquery length - # is [4, 6], it is [0, 4, 10]. - query_start_loc: Optional[torch.Tensor] = None - # (batch_size + 1,). The cumulative sequence lengths of the sequences in - # the batch, used to index into sequence. E.g., if the sequence length is - # [4, 6], it is [0, 4, 10]. - seq_start_loc: Optional[torch.Tensor] = None - - # Placeholder. - block_tables: Optional[torch.Tensor] = None - - _cached_prefill_metadata: Optional["PlaceholderAttentionMetadata"] = None - _cached_decode_metadata: Optional["PlaceholderAttentionMetadata"] = None - - @property - def prefill_metadata(self) -> Optional["PlaceholderAttentionMetadata"]: - if self.num_prefills == 0: - return None - - if self._cached_prefill_metadata is not None: - return self._cached_prefill_metadata - - # Compute some attn_metadata fields which default to None - query_start_loc = (None if self.query_start_loc is None else - self.query_start_loc[:self.num_prefills + 1]) - seq_lens = (None if self.seq_lens is None else - self.seq_lens[:self.num_prefills]) - seq_lens_tensor = (None if self.seq_lens_tensor is None else - self.seq_lens_tensor[:self.num_prefills]) - seq_start_loc = (None if self.seq_start_loc is None else - self.seq_start_loc[:self.num_prefills + 1]) - context_lens_tensor = (None if self.context_lens_tensor is None else - self.context_lens_tensor[:self.num_prefills]) - - # Placeholders - slot_mapping = torch.empty(0) - block_tables = torch.empty(0) - - self._cached_prefill_metadata = PlaceholderAttentionMetadata( - num_prefills=self.num_prefills, - num_prefill_tokens=self.num_prefill_tokens, - num_decode_tokens=0, - slot_mapping=slot_mapping, - enable_kv_scales_calculation=self.enable_kv_scales_calculation, - seq_lens=seq_lens, - seq_lens_tensor=seq_lens_tensor, - max_decode_query_len=0, - max_query_len=self.max_query_len, - max_prefill_seq_len=self.max_prefill_seq_len, - max_decode_seq_len=0, - query_start_loc=query_start_loc, - seq_start_loc=seq_start_loc, - context_lens_tensor=context_lens_tensor, - block_tables=block_tables, - use_cuda_graph=False, - ) - return self._cached_prefill_metadata - - @property - def decode_metadata(self) -> Optional["PlaceholderAttentionMetadata"]: - if self.num_decode_tokens == 0: - return None - - if self._cached_decode_metadata is not None: - return self._cached_decode_metadata - assert self.seq_lens_tensor is not None - - # Placeholders - slot_mapping = torch.empty(0) - block_tables = torch.empty(0) - seq_lens_tensor = (None if self.seq_lens_tensor is None else - self.seq_lens_tensor[self.num_prefills:]) - - self._cached_decode_metadata = PlaceholderAttentionMetadata( - num_prefills=0, - num_prefill_tokens=0, - num_decode_tokens=self.num_decode_tokens, - slot_mapping=slot_mapping, - enable_kv_scales_calculation=True, - seq_lens=None, - seq_lens_tensor=seq_lens_tensor, - max_decode_query_len=self.max_decode_query_len, - max_query_len=None, - max_prefill_seq_len=0, - max_decode_seq_len=self.max_decode_seq_len, - query_start_loc=(self.query_start_loc[self.num_prefills:] - - self.query_start_loc[self.num_prefills]) - if self.query_start_loc is not None else None, - seq_start_loc=self.seq_start_loc[self.num_prefills:] - if self.seq_start_loc is not None else None, - context_lens_tensor=None, - block_tables=block_tables, - use_cuda_graph=self.use_cuda_graph, - ) - return self._cached_decode_metadata - - -class PlaceholderAttentionMetadataBuilder( - AttentionMetadataBuilder[PlaceholderAttentionMetadata]): - - def __init__(self, input_builder): - - self.input_builder = input_builder - self.runner = input_builder.runner - - def prepare(self): - self.prefill_seq_lens: List[int] = [] - self.context_lens: List[int] = [] - self.curr_seq_lens: List[int] = [] - self.num_prefills = 0 - self.num_prefill_tokens = 0 - self.num_decode_tokens = 0 - - def _add_seq_group(self, inter_data, chunked_prefill_enabled: bool): - """Add a sequence group to the metadata. Specifically update/append - 1. context length. - """ - is_prompt = inter_data.is_prompt - - for (seq_id, token_len, seq_len, curr_seq_len, query_len, context_len, - curr_sliding_window_block) in zip( - inter_data.seq_ids, [len(t) for t in inter_data.input_tokens], - inter_data.orig_seq_lens, inter_data.seq_lens, - inter_data.query_lens, inter_data.context_lens, - inter_data.curr_sliding_window_blocks): - self.context_lens.append(context_len) - - if is_prompt: - self.num_prefills += 1 - self.num_prefill_tokens += token_len - self.prefill_seq_lens.append(seq_len) - else: - self.num_decode_tokens += query_len - self.curr_seq_lens.append(curr_seq_len) - - def build(self, seq_lens: List[int], query_lens: List[int], - cuda_graph_pad_size: int, batch_size: int): - """Build attention metadata with on-device tensors. - - Args: - seq_lens: The maybe padded sequence lengths of the input sequences. - query_lens: The query lengths of the input sequences. - cuda_graph_pad_size: The padding size for cuda graph. - -1 if cuda graph is not used. - batch_size: The maybe padded batch size. - """ - - # Some input builders such as ModelInputForCPUBuilder do not have the - # "inter_data_list" attribute. - # Let's check inter_data_list exists before we reference it. - if hasattr(self.input_builder, "inter_data_list"): - for inter_data in self.input_builder.inter_data_list: - self._add_seq_group(inter_data, - self.input_builder.chunked_prefill_enabled) - - device = self.runner.device - use_captured_graph = cuda_graph_pad_size != -1 - - max_query_len = max(query_lens) - decode_query_lens = query_lens[self.num_prefills:] - if len(decode_query_lens) > 0: - max_decode_query_len = max(decode_query_lens) - else: - max_decode_query_len = 1 - max_prefill_seq_len = max(self.prefill_seq_lens, default=0) - max_decode_seq_len = max(self.curr_seq_lens, default=0) - num_decode_tokens = self.num_decode_tokens - query_start_loc = list(accumulate(query_lens, initial=0)) - seq_start_loc = list(accumulate(seq_lens, initial=0)) - - if use_captured_graph: - num_decode_tokens = batch_size - self.num_prefill_tokens - assert max_query_len > 0, ("query_lens: {}".format(query_lens)) - - assert device is not None - context_lens_tensor = async_tensor_h2d(self.context_lens, torch.int, - device, self.runner.pin_memory) - seq_lens_tensor = async_tensor_h2d(seq_lens, torch.int, device, - self.runner.pin_memory) - query_start_loc_tensor = async_tensor_h2d(query_start_loc, torch.int32, - device, - self.runner.pin_memory) - seq_start_loc_tensor = async_tensor_h2d(seq_start_loc, torch.int32, - device, self.runner.pin_memory) - - # Placeholders - slot_mapping_tensor = torch.empty(0) - block_tables = torch.empty(0) - - return PlaceholderAttentionMetadata( - num_prefills=self.num_prefills, - slot_mapping=slot_mapping_tensor, - enable_kv_scales_calculation=True, - num_prefill_tokens=self.num_prefill_tokens, - num_decode_tokens=num_decode_tokens, - seq_lens=seq_lens, - seq_lens_tensor=seq_lens_tensor, - max_query_len=max_query_len, - max_decode_query_len=max_decode_query_len, - max_prefill_seq_len=max_prefill_seq_len, - max_decode_seq_len=max_decode_seq_len, - query_start_loc=query_start_loc_tensor, - seq_start_loc=seq_start_loc_tensor, - context_lens_tensor=context_lens_tensor, - block_tables=block_tables, - use_cuda_graph=use_captured_graph, - ) - - -class PlaceholderAttentionImpl(AttentionImpl): - - def __init__(self, *args, **kwargs) -> None: - return - - def forward(self, *args, **kwargs) -> torch.Tensor: - raise NotImplementedError diff --git a/vllm/attention/layer.py b/vllm/attention/layer.py index 544a72052442..0ed20b3b7151 100644 --- a/vllm/attention/layer.py +++ b/vllm/attention/layer.py @@ -115,12 +115,10 @@ def __init__( if cache_config is not None: kv_cache_dtype = cache_config.cache_dtype block_size = cache_config.block_size - is_attention_free = cache_config.is_attention_free calculate_kv_scales = cache_config.calculate_kv_scales else: kv_cache_dtype = "auto" block_size = 16 - is_attention_free = False calculate_kv_scales = False if num_kv_heads is None: num_kv_heads = num_heads @@ -185,7 +183,6 @@ def __init__( dtype, kv_cache_dtype, block_size, - is_attention_free, use_mla=use_mla, has_sink=self.has_sink) else: diff --git a/vllm/attention/selector.py b/vllm/attention/selector.py index 3a235ba6e0b4..b651fc3eaee3 100644 --- a/vllm/attention/selector.py +++ b/vllm/attention/selector.py @@ -142,7 +142,6 @@ def get_attn_backend( dtype: torch.dtype, kv_cache_dtype: Optional[str], block_size: int, - is_attention_free: bool = False, use_mla: bool = False, has_sink: bool = False, ) -> type[AttentionBackend]: @@ -156,7 +155,6 @@ def get_attn_backend( dtype=dtype, kv_cache_dtype=kv_cache_dtype, block_size=block_size, - is_attention_free=is_attention_free, use_v1=envs.VLLM_USE_V1, use_mla=use_mla, has_sink=has_sink, @@ -169,17 +167,10 @@ def _cached_get_attn_backend( dtype: torch.dtype, kv_cache_dtype: Optional[str], block_size: int, - is_attention_free: bool, use_v1: bool = False, use_mla: bool = False, has_sink: bool = False, ) -> type[AttentionBackend]: - # If there are no attention layers (e.g. we are running Mamba), - # use the placeholder NO_ATTENTION - if is_attention_free: - from vllm.attention.backends.placeholder_attn import ( - PlaceholderAttentionBackend) - return PlaceholderAttentionBackend # Check whether a particular choice of backend was # previously forced. diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py b/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py index 64feddb591c2..528d4022bd17 100644 --- a/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py +++ b/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py @@ -574,7 +574,6 @@ def __init__(self, vllm_config: VllmConfig, engine_id: str): self.model_config.dtype, self.cache_config.cache_dtype, self.block_size, - self.model_config.is_attention_free, use_mla=self.use_mla) self.backend_name = backend.get_name() attn_backend = backend_name_to_enum(self.backend_name) From eca7be9077aa22e70da5c2ef04ff056e3c7bdc58 Mon Sep 17 00:00:00 2001 From: rouchenzi <40842833+rouchenzi@users.noreply.github.com> Date: Tue, 23 Sep 2025 15:17:49 -0700 Subject: [PATCH 299/518] =?UTF-8?q?Add=20VLLM=5FENABLE=5FINDUCTOR=5FMAX=5F?= =?UTF-8?q?AUTOTUNE=20&=20VLLM=5FENABLE=5FINDUCTOR=5FCOORDINA=E2=80=A6=20(?= =?UTF-8?q?#25493)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: rouchenzi Signed-off-by: rouchenzi <40842833+rouchenzi@users.noreply.github.com> --- vllm/compilation/compiler_interface.py | 5 +++-- vllm/envs.py | 15 +++++++++++++++ 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/vllm/compilation/compiler_interface.py b/vllm/compilation/compiler_interface.py index 7158fd685964..eeca14d1296f 100644 --- a/vllm/compilation/compiler_interface.py +++ b/vllm/compilation/compiler_interface.py @@ -551,8 +551,9 @@ def set_inductor_config(config, runtime_shape): if isinstance(runtime_shape, int): # for a specific batchsize, tuning triton kernel parameters # can be beneficial - config["max_autotune"] = True - config["coordinate_descent_tuning"] = True + config["max_autotune"] = envs.VLLM_ENABLE_INDUCTOR_MAX_AUTOTUNE + config["coordinate_descent_tuning"] = ( + envs.VLLM_ENABLE_INDUCTOR_COORDINATE_DESCENT_TUNING) class EagerAdaptor(CompilerInterface): diff --git a/vllm/envs.py b/vllm/envs.py index 50d58c5468f9..1c6c1e78ac9b 100755 --- a/vllm/envs.py +++ b/vllm/envs.py @@ -193,6 +193,8 @@ VLLM_DBO_COMM_SMS: int = 20 GPT_OSS_SYSTEM_TOOL_MCP_LABELS: list[str] = [] VLLM_PATTERN_MATCH_DEBUG: Optional[str] = None + VLLM_ENABLE_INDUCTOR_MAX_AUTOTUNE: bool = True + VLLM_ENABLE_INDUCTOR_COORDINATE_DESCENT_TUNING: bool = True VLLM_USE_NCCL_SYMM_MEM: bool = False VLLM_NCCL_INCLUDE_PATH: Optional[str] = None @@ -1413,6 +1415,17 @@ def get_vllm_port() -> Optional[int]: "code_interpreter", "web_search_preview"]), + # Enable max_autotune & coordinate_descent_tuning in inductor_config + # to compile static shapes passed from compile_sizes in compilation_config + # If set to 1, enable max_autotune; By default, this is enabled (1) + "VLLM_ENABLE_INDUCTOR_MAX_AUTOTUNE": + lambda: bool(int(os.getenv("VLLM_ENABLE_INDUCTOR_MAX_AUTOTUNE", "1"))), + # If set to 1, enable coordinate_descent_tuning; + # By default, this is enabled (1) + "VLLM_ENABLE_INDUCTOR_COORDINATE_DESCENT_TUNING": + lambda: bool(int(os.getenv("VLLM_ENABLE_INDUCTOR_COORDINATE_DESCENT_TUNING", + "1"))), + # Flag to enable NCCL symmetric memory allocation and registration "VLLM_USE_NCCL_SYMM_MEM": lambda: bool(int(os.getenv("VLLM_USE_NCCL_SYMM_MEM", "0"))), @@ -1513,6 +1526,8 @@ def compute_hash() -> str: "VLLM_ROCM_QUICK_REDUCE_CAST_BF16_TO_FP16", "VLLM_ROCM_QUICK_REDUCE_MAX_SIZE_BYTES_MB", "VLLM_ROCM_FP8_MFMA_PAGE_ATTN", + "VLLM_ENABLE_INDUCTOR_MAX_AUTOTUNE", + "VLLM_ENABLE_INDUCTOR_COORDINATE_DESCENT_TUNING", ] for key in environment_variables_to_hash: # if this goes out of sync with environment_variables, From 4f2954f7240b2b8f14a680e55c3caae163a404a9 Mon Sep 17 00:00:00 2001 From: Michael Goin Date: Tue, 23 Sep 2025 18:26:10 -0400 Subject: [PATCH 300/518] Fix triton_reshape_and_cache_flash.py triton import (#25522) Signed-off-by: mgoin --- vllm/attention/ops/triton_reshape_and_cache_flash.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/vllm/attention/ops/triton_reshape_and_cache_flash.py b/vllm/attention/ops/triton_reshape_and_cache_flash.py index 0b0c706626af..883052cb46aa 100644 --- a/vllm/attention/ops/triton_reshape_and_cache_flash.py +++ b/vllm/attention/ops/triton_reshape_and_cache_flash.py @@ -2,10 +2,9 @@ # SPDX-FileCopyrightText: Copyright contributors to the vLLM project import torch -import triton -import triton.language as tl from vllm.platforms import current_platform +from vllm.triton_utils import tl, triton @triton.jit From 95bc60e4cb42029dbe7f1b7c568d5147f0ea1f2d Mon Sep 17 00:00:00 2001 From: Andrew Xia Date: Tue, 23 Sep 2025 15:46:46 -0700 Subject: [PATCH 301/518] [gpt-oss][bugfix] remove logic to require resp_ in ResponseAPI (#25428) Signed-off-by: Andrew Xia --- .../openai/test_response_api_with_harmony.py | 1 + vllm/entrypoints/openai/serving_responses.py | 15 --------------- 2 files changed, 1 insertion(+), 15 deletions(-) diff --git a/tests/entrypoints/openai/test_response_api_with_harmony.py b/tests/entrypoints/openai/test_response_api_with_harmony.py index 23d8373d9780..c28970afc731 100644 --- a/tests/entrypoints/openai/test_response_api_with_harmony.py +++ b/tests/entrypoints/openai/test_response_api_with_harmony.py @@ -523,6 +523,7 @@ async def test_function_calling(client: OpenAI, model_name: str): input="What's the weather like in Paris today?", tools=tools, temperature=0.0, + extra_body={"request_id": "test_function_calling_non_resp"}, ) assert response is not None assert response.status == "completed" diff --git a/vllm/entrypoints/openai/serving_responses.py b/vllm/entrypoints/openai/serving_responses.py index 99bb464db1d1..c70baba88d43 100644 --- a/vllm/entrypoints/openai/serving_responses.py +++ b/vllm/entrypoints/openai/serving_responses.py @@ -235,8 +235,6 @@ async def create_responses( # Handle the previous response ID. prev_response_id = request.previous_response_id if prev_response_id is not None: - if not prev_response_id.startswith("resp_"): - return self._make_invalid_id_error(prev_response_id) async with self.response_store_lock: prev_response = self.response_store.get(prev_response_id) if prev_response is None: @@ -924,9 +922,6 @@ async def retrieve_responses( stream: Optional[bool], ) -> Union[ErrorResponse, ResponsesResponse, AsyncGenerator[ StreamingResponsesResponse, None]]: - if not response_id.startswith("resp_"): - return self._make_invalid_id_error(response_id) - async with self.response_store_lock: response = self.response_store.get(response_id) @@ -944,9 +939,6 @@ async def cancel_responses( self, response_id: str, ) -> Union[ErrorResponse, ResponsesResponse]: - if not response_id.startswith("resp_"): - return self._make_invalid_id_error(response_id) - async with self.response_store_lock: response = self.response_store.get(response_id) if response is None: @@ -972,13 +964,6 @@ async def cancel_responses( response_id) return response - def _make_invalid_id_error(self, response_id: str) -> ErrorResponse: - return self.create_error_response( - err_type="invalid_request_error", - message=(f"Invalid 'response_id': '{response_id}'. " - "Expected an ID that begins with 'resp'."), - ) - def _make_not_found_error(self, response_id: str) -> ErrorResponse: return self.create_error_response( err_type="invalid_request_error", From 7361ab379f817727173691224e41e0175423b795 Mon Sep 17 00:00:00 2001 From: Michael Goin Date: Tue, 23 Sep 2025 18:48:40 -0400 Subject: [PATCH 302/518] Remove redundant mutates_args and dispatch_key for direct_register_custom_op (#25512) Signed-off-by: mgoin --- vllm/attention/layer.py | 3 --- vllm/compilation/collective_fusion.py | 1 - vllm/distributed/device_communicators/pynccl.py | 1 - vllm/distributed/parallel_state.py | 7 ------- vllm/lora/ops/triton_ops/lora_expand_op.py | 2 -- vllm/lora/ops/triton_ops/lora_shrink_op.py | 2 -- .../layers/fused_moe/flashinfer_trtllm_moe.py | 1 - .../layers/fused_moe/fused_marlin_moe.py | 1 - vllm/model_executor/layers/fused_moe/fused_moe.py | 1 - vllm/model_executor/layers/fused_moe/layer.py | 2 -- .../layers/fused_moe/rocm_aiter_fused_moe.py | 7 ------- vllm/model_executor/layers/layernorm.py | 4 ---- vllm/model_executor/layers/mamba/linear_attn.py | 2 -- vllm/model_executor/layers/mamba/mamba_mixer.py | 2 -- vllm/model_executor/layers/mamba/mamba_mixer2.py | 2 -- vllm/model_executor/layers/mamba/short_conv.py | 2 -- vllm/model_executor/layers/quantization/deepgemm.py | 3 --- vllm/model_executor/layers/quantization/gguf.py | 3 --- .../layers/quantization/kernels/scaled_mm/aiter.py | 2 -- .../layers/quantization/utils/fp8_utils.py | 3 --- .../layers/quantization/utils/mxfp4_utils.py | 2 -- .../layers/quantization/utils/w8a8_utils.py | 2 -- vllm/model_executor/layers/rotary_embedding/common.py | 1 - vllm/model_executor/layers/utils.py | 2 -- vllm/model_executor/models/deepseek_v2.py | 3 --- vllm/model_executor/models/plamo2.py | 2 -- vllm/model_executor/models/qwen3_next.py | 1 - vllm/utils/__init__.py | 11 +++++++++-- 28 files changed, 9 insertions(+), 66 deletions(-) diff --git a/vllm/attention/layer.py b/vllm/attention/layer.py index 0ed20b3b7151..baa83e29bdd0 100644 --- a/vllm/attention/layer.py +++ b/vllm/attention/layer.py @@ -575,9 +575,7 @@ def unified_attention_fake( direct_register_custom_op( op_name="unified_attention", op_func=unified_attention, - mutates_args=[], fake_impl=unified_attention_fake, - dispatch_key=current_platform.dispatch_key, tags=tag_cudagraph_unsafe, ) @@ -628,6 +626,5 @@ def unified_attention_with_output_fake( op_func=unified_attention_with_output, mutates_args=["output", "output_block_scale"], fake_impl=unified_attention_with_output_fake, - dispatch_key=current_platform.dispatch_key, tags=tag_cudagraph_unsafe, ) diff --git a/vllm/compilation/collective_fusion.py b/vllm/compilation/collective_fusion.py index 331cd8a87392..04b76a9c2d22 100644 --- a/vllm/compilation/collective_fusion.py +++ b/vllm/compilation/collective_fusion.py @@ -547,7 +547,6 @@ def call_trtllm_fused_allreduce_norm_fake( "scale_out", ], fake_impl=call_trtllm_fused_allreduce_norm_fake, - dispatch_key=current_platform.dispatch_key, ) flashinfer_trtllm_fused_allreduce_norm = ( torch.ops.vllm.flashinfer_trtllm_fused_allreduce_norm.default) diff --git a/vllm/distributed/device_communicators/pynccl.py b/vllm/distributed/device_communicators/pynccl.py index 75de85e1b0ab..76fe9a93259f 100644 --- a/vllm/distributed/device_communicators/pynccl.py +++ b/vllm/distributed/device_communicators/pynccl.py @@ -46,7 +46,6 @@ def all_reduce_symmetric_with_copy_fake( direct_register_custom_op( op_name="all_reduce_symmetric_with_copy", op_func=all_reduce_symmetric_with_copy_impl, - mutates_args=[], fake_impl=all_reduce_symmetric_with_copy_fake, ) diff --git a/vllm/distributed/parallel_state.py b/vllm/distributed/parallel_state.py index 895971893a66..69f98eb54f36 100644 --- a/vllm/distributed/parallel_state.py +++ b/vllm/distributed/parallel_state.py @@ -149,29 +149,22 @@ def all_gather_fake(tensor: torch.Tensor, dim: int, world_size: int, if supports_custom_op(): - from vllm.platforms import current_platform direct_register_custom_op( op_name="all_reduce", op_func=all_reduce, - mutates_args=[], fake_impl=all_reduce_fake, - dispatch_key=current_platform.dispatch_key, ) direct_register_custom_op( op_name="reduce_scatter", op_func=reduce_scatter, - mutates_args=[], fake_impl=reduce_scatter_fake, - dispatch_key=current_platform.dispatch_key, ) direct_register_custom_op( op_name="all_gather", op_func=all_gather, - mutates_args=[], fake_impl=all_gather_fake, - dispatch_key=current_platform.dispatch_key, ) diff --git a/vllm/lora/ops/triton_ops/lora_expand_op.py b/vllm/lora/ops/triton_ops/lora_expand_op.py index b1ab84e08ba7..467cbaa8af48 100644 --- a/vllm/lora/ops/triton_ops/lora_expand_op.py +++ b/vllm/lora/ops/triton_ops/lora_expand_op.py @@ -11,7 +11,6 @@ from vllm.lora.ops.triton_ops.kernel_utils import do_expand_kernel from vllm.lora.ops.triton_ops.utils import _get_lora_b_ptr -from vllm.platforms import current_platform from vllm.triton_utils import tl, triton from vllm.utils import direct_register_custom_op @@ -283,7 +282,6 @@ def _lora_expand_fake( op_func=_lora_expand, mutates_args=["output_tensor"], fake_impl=_lora_expand_fake, - dispatch_key=current_platform.dispatch_key, ) lora_expand = torch.ops.vllm.lora_expand diff --git a/vllm/lora/ops/triton_ops/lora_shrink_op.py b/vllm/lora/ops/triton_ops/lora_shrink_op.py index 1e7075ab0715..57da93c226d2 100644 --- a/vllm/lora/ops/triton_ops/lora_shrink_op.py +++ b/vllm/lora/ops/triton_ops/lora_shrink_op.py @@ -11,7 +11,6 @@ from vllm.lora.ops.triton_ops.kernel_utils import do_shrink_kernel from vllm.lora.ops.triton_ops.utils import _get_lora_a_ptr -from vllm.platforms import current_platform from vllm.triton_utils import tl, triton from vllm.utils import direct_register_custom_op @@ -237,7 +236,6 @@ def _lora_shrink_fake( op_func=_lora_shrink, mutates_args=["output_tensor"], fake_impl=_lora_shrink_fake, - dispatch_key=current_platform.dispatch_key, ) lora_shrink = torch.ops.vllm.lora_shrink diff --git a/vllm/model_executor/layers/fused_moe/flashinfer_trtllm_moe.py b/vllm/model_executor/layers/fused_moe/flashinfer_trtllm_moe.py index e358143fac7c..fe586a22e250 100644 --- a/vllm/model_executor/layers/fused_moe/flashinfer_trtllm_moe.py +++ b/vllm/model_executor/layers/fused_moe/flashinfer_trtllm_moe.py @@ -92,7 +92,6 @@ def flashinfer_fused_moe_blockscale_fp8_fake( direct_register_custom_op( op_name="flashinfer_fused_moe_blockscale_fp8", op_func=flashinfer_fused_moe_blockscale_fp8, - mutates_args=[], fake_impl=flashinfer_fused_moe_blockscale_fp8_fake, tags=(torch.Tag.needs_fixed_stride_order, ), ) diff --git a/vllm/model_executor/layers/fused_moe/fused_marlin_moe.py b/vllm/model_executor/layers/fused_moe/fused_marlin_moe.py index 1e3ac6cd79f6..eb12a9b0a233 100644 --- a/vllm/model_executor/layers/fused_moe/fused_marlin_moe.py +++ b/vllm/model_executor/layers/fused_moe/fused_marlin_moe.py @@ -235,6 +235,5 @@ def fused_marlin_moe_fake(hidden_states: torch.Tensor, direct_register_custom_op( op_name="fused_marlin_moe", op_func=fused_marlin_moe, - mutates_args=[], fake_impl=fused_marlin_moe_fake, ) diff --git a/vllm/model_executor/layers/fused_moe/fused_moe.py b/vllm/model_executor/layers/fused_moe/fused_moe.py index 0e334fdf2404..611df357265b 100644 --- a/vllm/model_executor/layers/fused_moe/fused_moe.py +++ b/vllm/model_executor/layers/fused_moe/fused_moe.py @@ -1256,7 +1256,6 @@ def outplace_fused_experts_fake( direct_register_custom_op( op_name="outplace_fused_experts", op_func=outplace_fused_experts, - mutates_args=[], fake_impl=outplace_fused_experts_fake, tags=(() if is_torch_equal_or_newer("2.7.0") else (torch.Tag.needs_fixed_stride_order, )), diff --git a/vllm/model_executor/layers/fused_moe/layer.py b/vllm/model_executor/layers/fused_moe/layer.py index 71cc2bcf174d..2bf3bf96baf1 100644 --- a/vllm/model_executor/layers/fused_moe/layer.py +++ b/vllm/model_executor/layers/fused_moe/layer.py @@ -2040,7 +2040,6 @@ def moe_forward_fake( op_func=moe_forward, mutates_args=["hidden_states"], fake_impl=moe_forward_fake, - dispatch_key=current_platform.dispatch_key, tags=(torch.Tag.needs_fixed_stride_order, ), ) @@ -2071,7 +2070,6 @@ def moe_forward_shared_fake( op_func=moe_forward_shared, mutates_args=["hidden_states"], fake_impl=moe_forward_shared_fake, - dispatch_key=current_platform.dispatch_key, tags=(torch.Tag.needs_fixed_stride_order, ), ) diff --git a/vllm/model_executor/layers/fused_moe/rocm_aiter_fused_moe.py b/vllm/model_executor/layers/fused_moe/rocm_aiter_fused_moe.py index f4972ff5f9cb..2764af5fc532 100644 --- a/vllm/model_executor/layers/fused_moe/rocm_aiter_fused_moe.py +++ b/vllm/model_executor/layers/fused_moe/rocm_aiter_fused_moe.py @@ -223,17 +223,13 @@ def rocm_aiter_fused_moe_fake( direct_register_custom_op( op_name="rocm_aiter_asm_moe_tkw1", op_func=rocm_aiter_asm_moe_tkw1_impl, - mutates_args=[], fake_impl=rocm_aiter_asm_moe_tkw1_fake, - dispatch_key=current_platform.dispatch_key, ) direct_register_custom_op( op_name="rocm_aiter_fused_moe", op_func=rocm_aiter_fused_moe_impl, - mutates_args=[], fake_impl=rocm_aiter_fused_moe_fake, - dispatch_key=current_platform.dispatch_key, ) direct_register_custom_op( @@ -241,7 +237,6 @@ def rocm_aiter_fused_moe_fake( op_func=rocm_aiter_topk_softmax_impl, mutates_args=["topk_weights", "topk_indices", "token_expert_indices"], fake_impl=rocm_aiter_topk_softmax_fake, - dispatch_key=current_platform.dispatch_key, ) direct_register_custom_op( @@ -249,7 +244,6 @@ def rocm_aiter_fused_moe_fake( op_func=rocm_aiter_biased_grouped_topk_impl, mutates_args=["topk_weights", "topk_ids"], fake_impl=rocm_aiter_biased_grouped_topk_fake, - dispatch_key=current_platform.dispatch_key, ) direct_register_custom_op( @@ -257,7 +251,6 @@ def rocm_aiter_fused_moe_fake( op_func=rocm_aiter_grouped_topk_impl, mutates_args=["topk_weights", "topk_ids"], fake_impl=rocm_aiter_grouped_topk_fake, - dispatch_key=current_platform.dispatch_key, ) diff --git a/vllm/model_executor/layers/layernorm.py b/vllm/model_executor/layers/layernorm.py index f875f712ba9c..8123259d037b 100644 --- a/vllm/model_executor/layers/layernorm.py +++ b/vllm/model_executor/layers/layernorm.py @@ -103,17 +103,13 @@ def rocm_aiter_rmsnorm2d_fwd_with_add_fake( direct_register_custom_op( op_name="rocm_aiter_rms_norm", op_func=rocm_aiter_rms_norm_impl, - mutates_args=[], fake_impl=rocm_aiter_rms_norm_fake, - dispatch_key=current_platform.dispatch_key, ) direct_register_custom_op( op_name="rocm_aiter_rmsnorm2d_fwd_with_add", op_func=rocm_aiter_rmsnorm2d_fwd_with_add_impl, - mutates_args=[], fake_impl=rocm_aiter_rmsnorm2d_fwd_with_add_fake, - dispatch_key=current_platform.dispatch_key, ) diff --git a/vllm/model_executor/layers/mamba/linear_attn.py b/vllm/model_executor/layers/mamba/linear_attn.py index 6a901b47b8b6..410cbef4f6bc 100644 --- a/vllm/model_executor/layers/mamba/linear_attn.py +++ b/vllm/model_executor/layers/mamba/linear_attn.py @@ -31,7 +31,6 @@ MambaStateDtypeCalculator, MambaStateShapeCalculator) from vllm.model_executor.layers.quantization.base_config import ( QuantizationConfig) -from vllm.platforms import current_platform from vllm.utils import direct_register_custom_op from vllm.v1.attention.backends.linear_attn import LinearAttentionMetadata @@ -401,5 +400,4 @@ def linear_attention_fake( op_func=linear_attention, mutates_args=["output"], fake_impl=linear_attention_fake, - dispatch_key=current_platform.dispatch_key, ) diff --git a/vllm/model_executor/layers/mamba/mamba_mixer.py b/vllm/model_executor/layers/mamba/mamba_mixer.py index a56ee13a6380..d64854cdb381 100644 --- a/vllm/model_executor/layers/mamba/mamba_mixer.py +++ b/vllm/model_executor/layers/mamba/mamba_mixer.py @@ -27,7 +27,6 @@ from vllm.model_executor.layers.mamba.ops.mamba_ssm import ( selective_scan_fn, selective_state_update) from vllm.model_executor.utils import set_weight_attrs -from vllm.platforms import current_platform from vllm.utils import direct_register_custom_op from vllm.v1.attention.backends.mamba1_attn import Mamba1AttentionMetadata @@ -464,5 +463,4 @@ def mamba_mixer_fake( op_func=mamba_mixer, mutates_args=["output"], fake_impl=mamba_mixer_fake, - dispatch_key=current_platform.dispatch_key, ) diff --git a/vllm/model_executor/layers/mamba/mamba_mixer2.py b/vllm/model_executor/layers/mamba/mamba_mixer2.py index 047ce4c4c43d..908ea6e0025f 100644 --- a/vllm/model_executor/layers/mamba/mamba_mixer2.py +++ b/vllm/model_executor/layers/mamba/mamba_mixer2.py @@ -34,7 +34,6 @@ from vllm.model_executor.model_loader.weight_utils import ( LoaderFunction, composed_weight_loader, sharded_weight_loader) from vllm.model_executor.utils import set_weight_attrs -from vllm.platforms import current_platform from vllm.utils import direct_register_custom_op from vllm.v1.attention.backends.mamba2_attn import Mamba2AttentionMetadata @@ -765,5 +764,4 @@ def mamba_mixer2_fake( op_func=mamba_mixer2, mutates_args=["output"], fake_impl=mamba_mixer2_fake, - dispatch_key=current_platform.dispatch_key, ) diff --git a/vllm/model_executor/layers/mamba/short_conv.py b/vllm/model_executor/layers/mamba/short_conv.py index ffdcd702aab4..cc424760e229 100644 --- a/vllm/model_executor/layers/mamba/short_conv.py +++ b/vllm/model_executor/layers/mamba/short_conv.py @@ -21,7 +21,6 @@ MambaStateDtypeCalculator, MambaStateShapeCalculator) from vllm.model_executor.layers.mamba.ops.causal_conv1d import ( causal_conv1d_fn, causal_conv1d_update) -from vllm.platforms import current_platform from vllm.utils import direct_register_custom_op from vllm.v1.attention.backends.short_conv_attn import ( ShortConvAttentionMetadata) @@ -251,5 +250,4 @@ def short_conv_fake( op_func=short_conv, mutates_args=["output"], fake_impl=short_conv_fake, - dispatch_key=current_platform.dispatch_key, ) diff --git a/vllm/model_executor/layers/quantization/deepgemm.py b/vllm/model_executor/layers/quantization/deepgemm.py index c2b3ccf19fca..8452f686b3ac 100644 --- a/vllm/model_executor/layers/quantization/deepgemm.py +++ b/vllm/model_executor/layers/quantization/deepgemm.py @@ -4,7 +4,6 @@ import torch -from vllm.platforms import current_platform from vllm.triton_utils import triton from vllm.utils import direct_register_custom_op from vllm.utils.deep_gemm import fp8_gemm_nt @@ -75,7 +74,5 @@ def w8a8_deepgemm_block_scaled_mm_fake( direct_register_custom_op( op_name="w8a8_deepgemm_block_scaled_mm", op_func=w8a8_deepgemm_block_scaled_mm, - mutates_args=[], fake_impl=w8a8_deepgemm_block_scaled_mm_fake, - dispatch_key=current_platform.dispatch_key, ) diff --git a/vllm/model_executor/layers/quantization/gguf.py b/vllm/model_executor/layers/quantization/gguf.py index a631dfdab654..de25ee84d081 100644 --- a/vllm/model_executor/layers/quantization/gguf.py +++ b/vllm/model_executor/layers/quantization/gguf.py @@ -161,7 +161,6 @@ def _fused_mul_mat_gguf_fake( direct_register_custom_op( op_name="_fused_mul_mat_gguf", op_func=_fused_mul_mat_gguf, - mutates_args=[], fake_impl=_fused_mul_mat_gguf_fake, ) fused_mul_mat_gguf = torch.ops.vllm._fused_mul_mat_gguf @@ -273,7 +272,6 @@ def _fused_moe_gguf_fake( direct_register_custom_op( op_name="_fused_moe_gguf", op_func=_fused_moe_gguf, - mutates_args=[], fake_impl=_fused_moe_gguf_fake, ) fused_moe_gguf = torch.ops.vllm._fused_moe_gguf @@ -319,7 +317,6 @@ def _apply_gguf_embedding_fake( direct_register_custom_op( op_name="_apply_gguf_embedding", op_func=_apply_gguf_embedding, - mutates_args=[], fake_impl=_apply_gguf_embedding_fake, ) apply_gguf_embedding = torch.ops.vllm._apply_gguf_embedding diff --git a/vllm/model_executor/layers/quantization/kernels/scaled_mm/aiter.py b/vllm/model_executor/layers/quantization/kernels/scaled_mm/aiter.py index 7f808fa92a9a..e8e950a4bb7b 100644 --- a/vllm/model_executor/layers/quantization/kernels/scaled_mm/aiter.py +++ b/vllm/model_executor/layers/quantization/kernels/scaled_mm/aiter.py @@ -51,9 +51,7 @@ def rocm_aiter_gemm_w8a8_fake( direct_register_custom_op( op_name="rocm_aiter_gemm_w8a8", op_func=rocm_aiter_gemm_w8a8_impl, - mutates_args=[], fake_impl=rocm_aiter_gemm_w8a8_fake, - dispatch_key=current_platform.dispatch_key, ) diff --git a/vllm/model_executor/layers/quantization/utils/fp8_utils.py b/vllm/model_executor/layers/quantization/utils/fp8_utils.py index 2098086bf240..0bc69fe7f930 100644 --- a/vllm/model_executor/layers/quantization/utils/fp8_utils.py +++ b/vllm/model_executor/layers/quantization/utils/fp8_utils.py @@ -91,9 +91,7 @@ def rocm_aiter_gemm_w8a8_blockscale_fake( direct_register_custom_op( op_name="rocm_aiter_gemm_w8a8_blockscale", op_func=rocm_aiter_gemm_w8a8_blockscale_impl, - mutates_args=[], fake_impl=rocm_aiter_gemm_w8a8_blockscale_fake, - dispatch_key=current_platform.dispatch_key, ) if (envs.VLLM_ROCM_USE_AITER and envs.VLLM_ROCM_USE_AITER_LINEAR and current_platform.is_fp8_fnuz()): @@ -135,7 +133,6 @@ def _w8a8_triton_block_scaled_mm_fake( direct_register_custom_op( "w8a8_triton_block_scaled_mm_func", _w8a8_triton_block_scaled_mm_func, - mutates_args=[], fake_impl=_w8a8_triton_block_scaled_mm_fake, dispatch_key="CUDA", ) diff --git a/vllm/model_executor/layers/quantization/utils/mxfp4_utils.py b/vllm/model_executor/layers/quantization/utils/mxfp4_utils.py index 3de928fea720..d61ca7ad5dc4 100644 --- a/vllm/model_executor/layers/quantization/utils/mxfp4_utils.py +++ b/vllm/model_executor/layers/quantization/utils/mxfp4_utils.py @@ -113,7 +113,6 @@ def _quant_dequant_mxfp4_fake(x: torch.Tensor, direct_register_custom_op( op_name="dequant_mxfp4", op_func=_dequant_mxfp4, - mutates_args=[], fake_impl=_dequant_mxfp4_fake, ) dequant_mxfp4 = torch.ops.vllm.dequant_mxfp4 @@ -124,7 +123,6 @@ def _quant_dequant_mxfp4_fake(x: torch.Tensor, direct_register_custom_op( op_name="quant_dequant_mxfp4", op_func=_quant_dequant_mxfp4, - mutates_args=[], fake_impl=_quant_dequant_mxfp4_fake, ) quant_dequant_mxfp4 = torch.ops.vllm.quant_dequant_mxfp4 diff --git a/vllm/model_executor/layers/quantization/utils/w8a8_utils.py b/vllm/model_executor/layers/quantization/utils/w8a8_utils.py index 6ed482db4700..b434b7acfea8 100644 --- a/vllm/model_executor/layers/quantization/utils/w8a8_utils.py +++ b/vllm/model_executor/layers/quantization/utils/w8a8_utils.py @@ -218,9 +218,7 @@ def rocm_per_tensor_w8a8_scaled_mm(*, qinput: torch.Tensor, direct_register_custom_op( op_name="rocm_per_tensor_w8a8_scaled_mm_impl", op_func=rocm_per_tensor_w8a8_scaled_mm_impl, - mutates_args=[], fake_impl=rocm_per_tensor_w8a8_scaled_mm_fake, - dispatch_key=current_platform.dispatch_key, ) diff --git a/vllm/model_executor/layers/rotary_embedding/common.py b/vllm/model_executor/layers/rotary_embedding/common.py index e3cd0a8e788e..861965106774 100644 --- a/vllm/model_executor/layers/rotary_embedding/common.py +++ b/vllm/model_executor/layers/rotary_embedding/common.py @@ -147,5 +147,4 @@ def _flashinfer_rotary_embedding_fake( op_func=_flashinfer_rotary_embedding, mutates_args=["query", "key"], # These tensors are modified in-place fake_impl=_flashinfer_rotary_embedding_fake, - dispatch_key=current_platform.dispatch_key, ) diff --git a/vllm/model_executor/layers/utils.py b/vllm/model_executor/layers/utils.py index d7a65d43c210..96dd58c0e4d2 100644 --- a/vllm/model_executor/layers/utils.py +++ b/vllm/model_executor/layers/utils.py @@ -136,9 +136,7 @@ def rocm_unquantized_gemm(layer: torch.nn.Module, direct_register_custom_op( op_name="rocm_unquantized_gemm_impl", op_func=rocm_unquantized_gemm_impl, - mutates_args=[], fake_impl=rocm_unquantized_gemm_impl_fake, - dispatch_key=current_platform.dispatch_key, ) diff --git a/vllm/model_executor/models/deepseek_v2.py b/vllm/model_executor/models/deepseek_v2.py index 415d36c681d8..9895ebbcdefe 100644 --- a/vllm/model_executor/models/deepseek_v2.py +++ b/vllm/model_executor/models/deepseek_v2.py @@ -56,7 +56,6 @@ ParallelLMHead, VocabParallelEmbedding) from vllm.model_executor.model_loader.weight_utils import ( default_weight_loader, maybe_remap_kv_scale_name) -from vllm.platforms import current_platform from vllm.sequence import IntermediateTensors from vllm.utils import cdiv, direct_register_custom_op @@ -141,9 +140,7 @@ def sequence_parallel_chunk_fake(x: torch.Tensor) -> torch.Tensor: direct_register_custom_op( op_name="sequence_parallel_chunk", op_func=sequence_parallel_chunk, - mutates_args=[], fake_impl=sequence_parallel_chunk_fake, - dispatch_key=current_platform.dispatch_key, tags=(torch.Tag.needs_fixed_stride_order, ), ) diff --git a/vllm/model_executor/models/plamo2.py b/vllm/model_executor/models/plamo2.py index 0292f3bf8317..a7acf64f302b 100644 --- a/vllm/model_executor/models/plamo2.py +++ b/vllm/model_executor/models/plamo2.py @@ -48,7 +48,6 @@ is_pp_missing_parameter, make_empty_intermediate_tensors_factory, make_layers, maybe_prefix) from vllm.model_executor.utils import set_weight_attrs -from vllm.platforms import current_platform from vllm.sequence import IntermediateTensors from vllm.utils import direct_register_custom_op from vllm.v1.attention.backends.mamba2_attn import Mamba2AttentionMetadata @@ -490,7 +489,6 @@ def plamo2_mamba_mixer_fake( op_func=plamo2_mamba_mixer, mutates_args=["output"], fake_impl=plamo2_mamba_mixer_fake, - dispatch_key=current_platform.dispatch_key, ) diff --git a/vllm/model_executor/models/qwen3_next.py b/vllm/model_executor/models/qwen3_next.py index ab23b494e561..356b5001a7dc 100644 --- a/vllm/model_executor/models/qwen3_next.py +++ b/vllm/model_executor/models/qwen3_next.py @@ -1225,7 +1225,6 @@ def gdn_attention_fake( op_func=gdn_attention, mutates_args=["output"], fake_impl=gdn_attention_fake, - dispatch_key=current_platform.dispatch_key, ) diff --git a/vllm/utils/__init__.py b/vllm/utils/__init__.py index 5d165f166238..0a7af79f7a17 100644 --- a/vllm/utils/__init__.py +++ b/vllm/utils/__init__.py @@ -2546,10 +2546,10 @@ def __getattr__(self, key: str): def direct_register_custom_op( op_name: str, op_func: Callable, - mutates_args: list[str], + mutates_args: Optional[list[str]] = None, fake_impl: Optional[Callable] = None, target_lib: Optional[Library] = None, - dispatch_key: str = "CUDA", + dispatch_key: Optional[str] = None, tags: tuple[torch.Tag, ...] = (), ): """ @@ -2577,6 +2577,13 @@ def direct_register_custom_op( "the required dependencies.") return + if mutates_args is None: + mutates_args = [] + + if dispatch_key is None: + from vllm.platforms import current_platform + dispatch_key = current_platform.dispatch_key + import torch.library if hasattr(torch.library, "infer_schema"): schema_str = torch.library.infer_schema(op_func, From abad204be6eaaae3630db4287ebdca004197f819 Mon Sep 17 00:00:00 2001 From: kourosh hakhamaneshi <31483498+kouroshHakha@users.noreply.github.com> Date: Tue, 23 Sep 2025 15:49:09 -0700 Subject: [PATCH 303/518] [BugFix] Fix OOM in vLLM replicas by ensuring consistent NCCL memory accounting (#25359) Signed-off-by: Kourosh Hakhamaneshi --- .buildkite/test-pipeline.yaml | 3 + .../v1/worker/test_worker_memory_snapshot.py | 174 ++++++++++++++++++ vllm/v1/worker/gpu_worker.py | 21 ++- 3 files changed, 191 insertions(+), 7 deletions(-) create mode 100644 tests/v1/worker/test_worker_memory_snapshot.py diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index aef6d709722f..245d21b778c0 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -887,6 +887,8 @@ steps: - tests/v1/test_external_lb_dp.py - tests/v1/entrypoints/openai/test_multi_api_servers.py - vllm/v1/engine/ + - vllm/v1/worker/ + - tests/v1/worker/test_worker_memory_snapshot.py commands: - TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/test_async_llm_dp.py - TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/test_external_lb_dp.py @@ -908,6 +910,7 @@ steps: - CUDA_VISIBLE_DEVICES=0,1 pytest -v -s test_sharded_state_loader.py - CUDA_VISIBLE_DEVICES=0,1 pytest -v -s v1/shutdown - pytest -v -s models/multimodal/generation/test_maverick.py + - pytest -v -s v1/worker/test_worker_memory_snapshot.py - label: Plugin Tests (2 GPUs) # 40min timeout_in_minutes: 60 diff --git a/tests/v1/worker/test_worker_memory_snapshot.py b/tests/v1/worker/test_worker_memory_snapshot.py new file mode 100644 index 000000000000..6faa6bcc591c --- /dev/null +++ b/tests/v1/worker/test_worker_memory_snapshot.py @@ -0,0 +1,174 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project + +import multiprocessing as mp +import os +import tempfile +from multiprocessing import Queue +from typing import Optional +from unittest.mock import patch + +import pytest +import torch + +from vllm.engine.arg_utils import EngineArgs +from vllm.utils import MemorySnapshot +from vllm.v1.worker.gpu_worker import (Worker, + init_worker_distributed_environment) + +# Global queue to track operation order across processes +_QUEUE: Optional[Queue] = None + + +def track_operation(operation: str, rank: int): + """Track when an operation happens and its rank.""" + if _QUEUE is not None: + _QUEUE.put((operation, rank)) + + +def make_operation_tracker(operation_name: str, original_func): + """Create a mock function that tracks when an operation is called. + + Args: + operation_name: Name to use when tracking this operation + original_func: The original function to wrap + + Returns: + A wrapper function that tracks the operation and calls the original + """ + + def wrapper(*args, **kwargs): + rank = int(os.environ.get("RANK", "-1")) + track_operation(operation_name, rank) + return original_func(*args, **kwargs) + + return wrapper + + +def worker_process(rank: int, world_size: int, distributed_init_method: str, + queue: Queue, error_queue: Queue): + """Worker process that initializes a GPU worker with proper tracking.""" + global _QUEUE + _QUEUE = queue + + try: + # Set environment variables + os.environ["RANK"] = str(rank) + os.environ["LOCAL_RANK"] = str(rank) + os.environ["WORLD_SIZE"] = str(world_size) + + # Create vLLM config with small model + vllm_config = EngineArgs(model="facebook/opt-125m", + tensor_parallel_size=2, + load_format="dummy").create_engine_config() + + # Create worker + worker = Worker( + vllm_config=vllm_config, + local_rank=rank, + rank=rank, + distributed_init_method=distributed_init_method, + ) + + # Get original functions before patching + original_init_worker = init_worker_distributed_environment + original_memory_snapshot_init = MemorySnapshot.__init__ + original_all_reduce = torch.distributed.all_reduce + + # Apply minimal patches to track operation order + init_patch = patch( + 'vllm.v1.worker.gpu_worker.init_worker_distributed_environment', + side_effect=make_operation_tracker("init_distributed", + original_init_worker)) + memory_patch = patch.object( + MemorySnapshot, '__init__', + make_operation_tracker("memory_snapshot", + original_memory_snapshot_init)) + all_reduce_patch = patch('torch.distributed.all_reduce', + side_effect=make_operation_tracker( + "nccl_all_reduce", original_all_reduce)) + + with init_patch, memory_patch, all_reduce_patch: + + # Initialize device (this is where we test the order) + worker.init_device() + + # Load model to ensure everything works + worker.load_model() + + # Signal success + queue.put(("success", rank)) + + except Exception as e: + error_queue.put((rank, str(e), type(e).__name__)) + raise + + +@pytest.mark.skipif(torch.cuda.device_count() < 2, + reason="Need at least 2 GPUs for tensor parallelism") +def test_init_distributed_is_called_before_memory_snapshot(): + """Test that distributed env is setup before memory snapshot. + + This test makes sure during worker initialization, the initial memory + snapshot is taken after distributed env is setup to include all the buffers + allocated by distributed env. + """ + world_size = 2 + + # Create a temporary file for distributed init + with tempfile.NamedTemporaryFile(delete=False) as f: + distributed_init_method = f"file://{f.name}" + + # Create queues for inter-process communication + ctx = mp.get_context("spawn") + operation_queue = ctx.Queue() + error_queue = ctx.Queue() + + # Start worker processes + processes = [] + for rank in range(world_size): + p = ctx.Process(target=worker_process, + args=(rank, world_size, distributed_init_method, + operation_queue, error_queue)) + p.start() + processes.append(p) + + # Wait for all processes to complete + for p in processes: + p.join(timeout=60) # 60 second timeout + + # Check for errors + errors = [] + while not error_queue.empty(): + rank, error_msg, error_type = error_queue.get() + errors.append(f"Rank {rank}: {error_type}: {error_msg}") + + if errors: + pytest.fail("Worker processes failed:\n" + "\n".join(errors)) + + # Collect all operations from the queue + operations = [] + while not operation_queue.empty(): + operations.append(operation_queue.get()) + + # Verify we got operations from both ranks + print(f"Collected operations: {operations}") + + # Check operations for each rank + for rank in range(world_size): + rank_ops = [op for op, r in operations if r == rank] + print(f"\nRank {rank} operations: {rank_ops}") + + # Raises ValueError if the operation is not found + init_distributed = rank_ops.index("init_distributed") + nccl_all_reduce = rank_ops.index("nccl_all_reduce") + memory_snapshot = rank_ops.index("memory_snapshot") + + # Verify order: init_distributed should happen before memory_snapshot + assert init_distributed < nccl_all_reduce < memory_snapshot, ( + f"Rank {rank}: init_distributed (index {init_distributed}) " + f"must happen before nccl_all_reduce (index {nccl_all_reduce}) " + f"and memory_snapshot (index {memory_snapshot})") + + # Clean up + os.unlink(distributed_init_method.replace("file://", "")) diff --git a/vllm/v1/worker/gpu_worker.py b/vllm/v1/worker/gpu_worker.py index f59dacf13d85..9082bbfd8f8e 100644 --- a/vllm/v1/worker/gpu_worker.py +++ b/vllm/v1/worker/gpu_worker.py @@ -169,6 +169,20 @@ def init_device(self): current_platform.set_device(self.device) current_platform.check_if_supports_dtype(self.model_config.dtype) + + # Initialize the distributed environment BEFORE taking + # memory snapshot + # This ensures NCCL buffers are allocated before we measure + # available memory + init_worker_distributed_environment(self.vllm_config, self.rank, + self.distributed_init_method, + self.local_rank, + current_platform.dist_backend) + + # Set random seed. + set_random_seed(self.model_config.seed) + + # Now take memory snapshot after NCCL is initialized gc.collect() torch.cuda.empty_cache() @@ -190,13 +204,6 @@ def init_device(self): else: raise RuntimeError( f"Not support device type: {self.device_config.device}") - # Initialize the distributed environment. - init_worker_distributed_environment(self.vllm_config, self.rank, - self.distributed_init_method, - self.local_rank, - current_platform.dist_backend) - # Set random seed. - set_random_seed(self.model_config.seed) # Construct the model runner self.model_runner: GPUModelRunner = GPUModelRunner( From c85d75cf08330f16f6c076d9cb1a586fa6c6fbb8 Mon Sep 17 00:00:00 2001 From: Corey Lowman Date: Tue, 23 Sep 2025 18:50:09 -0400 Subject: [PATCH 304/518] Add `VLLM_NVTX_SCOPES_FOR_PROFILING=1` to enable `nvtx.annotate` scopes (#25501) Signed-off-by: Corey Lowman --- vllm/envs.py | 5 +++++ vllm/v1/utils.py | 20 +++++++++++++++++--- 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/vllm/envs.py b/vllm/envs.py index 1c6c1e78ac9b..33dae0be05f8 100755 --- a/vllm/envs.py +++ b/vllm/envs.py @@ -187,6 +187,7 @@ VLLM_DISABLE_PAD_FOR_CUDAGRAPH: bool = False VLLM_GPT_OSS_HARMONY_SYSTEM_INSTRUCTIONS: bool = False VLLM_CUSTOM_SCOPES_FOR_PROFILING: bool = False + VLLM_NVTX_SCOPES_FOR_PROFILING: bool = False VLLM_KV_EVENTS_USE_INT_BLOCK_HASHES: bool = True VLLM_OBJECT_STORAGE_SHM_BUFFER_NAME: str = "VLLM_OBJECT_STORAGE_SHM_BUFFER" VLLM_DEEPEP_BUFFER_SIZE_MB: int = 1024 @@ -1387,6 +1388,10 @@ def get_vllm_port() -> Optional[int]: "VLLM_CUSTOM_SCOPES_FOR_PROFILING": lambda: bool(int(os.getenv("VLLM_CUSTOM_SCOPES_FOR_PROFILING", "0"))), + # Add optional nvtx scopes for profiling, disable to avoid overheads + "VLLM_NVTX_SCOPES_FOR_PROFILING": + lambda: bool(int(os.getenv("VLLM_NVTX_SCOPES_FOR_PROFILING", "0"))), + # Represent block hashes in KV cache events as 64-bit integers instead of # raw bytes. Defaults to True for backward compatibility. "VLLM_KV_EVENTS_USE_INT_BLOCK_HASHES": diff --git a/vllm/v1/utils.py b/vllm/v1/utils.py index fd84b4a111f5..ec4417290f61 100644 --- a/vllm/v1/utils.py +++ b/vllm/v1/utils.py @@ -375,8 +375,22 @@ def report_usage_stats( }) +_PROFILER_FUNC = None + + def record_function_or_nullcontext(name: str) -> AbstractContextManager: + global _PROFILER_FUNC + + # fast path assume it is set + if _PROFILER_FUNC is not None: + return _PROFILER_FUNC(name) + + func = contextlib.nullcontext if envs.VLLM_CUSTOM_SCOPES_FOR_PROFILING: - return record_function(name) - else: - return contextlib.nullcontext() + func = record_function + elif envs.VLLM_NVTX_SCOPES_FOR_PROFILING: + import nvtx + func = nvtx.annotate + + _PROFILER_FUNC = func + return func(name) From 5e25b122366a639f6da9fa568b532d6932c45aff Mon Sep 17 00:00:00 2001 From: Thomas Parnell Date: Wed, 24 Sep 2025 01:23:30 +0200 Subject: [PATCH 305/518] [Kernel] [Mamba] Remove BLOCK_H=1 from list of tuneable configurations for `_chunk_cumsum_fwd_kernel` (#25197) Signed-off-by: Thomas Parnell Co-authored-by: Chih-Chieh-Yang --- vllm/model_executor/layers/mamba/ops/ssd_chunk_state.py | 1 - 1 file changed, 1 deletion(-) diff --git a/vllm/model_executor/layers/mamba/ops/ssd_chunk_state.py b/vllm/model_executor/layers/mamba/ops/ssd_chunk_state.py index a7b3c814859c..2e657426143b 100644 --- a/vllm/model_executor/layers/mamba/ops/ssd_chunk_state.py +++ b/vllm/model_executor/layers/mamba/ops/ssd_chunk_state.py @@ -17,7 +17,6 @@ @triton.autotune( configs=[ - triton.Config({'BLOCK_SIZE_H': 1}), triton.Config({'BLOCK_SIZE_H': 2}), triton.Config({'BLOCK_SIZE_H': 4}), triton.Config({'BLOCK_SIZE_H': 8}), From bde2a1a8a442cd594eec35f5e356bddb5982de45 Mon Sep 17 00:00:00 2001 From: Juan Villamizar <100237675+jpvillam-amd@users.noreply.github.com> Date: Tue, 23 Sep 2025 18:39:50 -0500 Subject: [PATCH 306/518] [ROCm] Small functional changes for gptoss (#25201) Signed-off-by: jpvillam Co-authored-by: jpvillam --- .../model_executor/layers/quantization/mxfp4.py | 9 ++++++--- .../layers/quantization/utils/mxfp4_utils.py | 17 ++++++++++++++--- vllm/platforms/rocm.py | 6 ++++++ 3 files changed, 26 insertions(+), 6 deletions(-) diff --git a/vllm/model_executor/layers/quantization/mxfp4.py b/vllm/model_executor/layers/quantization/mxfp4.py index a71c8d32a22c..b710f6ee249b 100644 --- a/vllm/model_executor/layers/quantization/mxfp4.py +++ b/vllm/model_executor/layers/quantization/mxfp4.py @@ -212,12 +212,15 @@ def create_weights(self, layer: torch.nn.Module, num_experts: int, intermediate_size_per_partition_after_pad = round_up( intermediate_size_per_partition, 256) hidden_size = round_up(hidden_size, 256) - elif current_platform.is_rocm() or ( - self.mxfp4_backend == Mxfp4Backend.SM100_FI_MXFP4_MXFP8_CUTLASS - or self.mxfp4_backend == Mxfp4Backend.SM90_FI_MXFP4_BF16): + elif (self.mxfp4_backend == Mxfp4Backend.SM100_FI_MXFP4_MXFP8_CUTLASS + or self.mxfp4_backend == Mxfp4Backend.SM90_FI_MXFP4_BF16): intermediate_size_per_partition_after_pad = round_up( intermediate_size_per_partition, 128) hidden_size = round_up(hidden_size, 128) + elif current_platform.is_rocm(): + intermediate_size_per_partition_after_pad = round_up( + intermediate_size_per_partition, 256) + hidden_size = round_up(hidden_size, 256) else: intermediate_size_per_partition_after_pad = round_up( intermediate_size_per_partition, 64) diff --git a/vllm/model_executor/layers/quantization/utils/mxfp4_utils.py b/vllm/model_executor/layers/quantization/utils/mxfp4_utils.py index d61ca7ad5dc4..fb1d041f3449 100644 --- a/vllm/model_executor/layers/quantization/utils/mxfp4_utils.py +++ b/vllm/model_executor/layers/quantization/utils/mxfp4_utils.py @@ -1,6 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -from typing import Callable, Optional +from typing import Any, Callable, Optional import torch @@ -21,6 +21,10 @@ def _swizzle_mxfp4(quant_tensor, scale, num_warps): from triton_kernels.tensor import FP4, convert_layout, wrap_torch_tensor from triton_kernels.tensor_details import layout from triton_kernels.tensor_details.layout import StridedLayout + + value_layout_opts: dict[str, Any] = {} + scale_layout_opts: dict[str, Any] = {} + if (current_platform.is_cuda() and current_platform.is_device_capability(90) and not is_torch_equal_or_newer("2.8.1")): @@ -28,8 +32,15 @@ def _swizzle_mxfp4(quant_tensor, scale, num_warps): "Mxfp4 on hopper is running on torch < 2.8.1, " "this cause swizling to be disabled, which may " "cause performance degradation. Please upgrade to torch nightly") - value_layout, value_layout_opts = StridedLayout, dict() - scale_layout, scale_layout_opts = StridedLayout, dict() + value_layout = StridedLayout + scale_layout = StridedLayout + elif current_platform.is_rocm(): + from triton_kernels.tensor_details.layout import (GFX950MXScaleLayout, + StridedLayout) + + from vllm.platforms.rocm import on_gfx950 + value_layout = StridedLayout + scale_layout = GFX950MXScaleLayout if on_gfx950() else StridedLayout else: value_layout, value_layout_opts = \ layout.make_default_matmul_mxfp4_w_layout(mx_axis=1) diff --git a/vllm/platforms/rocm.py b/vllm/platforms/rocm.py index 878718489fa8..942fd1973f4f 100644 --- a/vllm/platforms/rocm.py +++ b/vllm/platforms/rocm.py @@ -118,6 +118,12 @@ def on_gfx9() -> bool: return any(arch in GPU_ARCH for arch in ["gfx90a", "gfx942", "gfx950"]) +@cache +def on_gfx950() -> bool: + GPU_ARCH = torch.cuda.get_device_properties("cuda").gcnArchName + return any(arch in GPU_ARCH for arch in ["gfx950"]) + + @cache def use_rocm_custom_paged_attention( qtype: torch.dtype, From e0b24ea0305e0ead2c1cb1e0488744b5a91d524e Mon Sep 17 00:00:00 2001 From: Lucas Wilkinson Date: Tue, 23 Sep 2025 19:53:34 -0400 Subject: [PATCH 307/518] [Perf] Increase default max splits for FA3 full cudagraphs (#25495) Signed-off-by: Lucas Wilkinson --- vllm/envs.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm/envs.py b/vllm/envs.py index 33dae0be05f8..689428ec5910 100755 --- a/vllm/envs.py +++ b/vllm/envs.py @@ -119,7 +119,7 @@ VLLM_SERVER_DEV_MODE: bool = False VLLM_V1_OUTPUT_PROC_CHUNK_SIZE: int = 128 VLLM_MLA_DISABLE: bool = False - VLLM_FLASH_ATTN_MAX_NUM_SPLITS_FOR_CUDA_GRAPH: int = 16 + VLLM_FLASH_ATTN_MAX_NUM_SPLITS_FOR_CUDA_GRAPH: int = 32 VLLM_RAY_PER_WORKER_GPUS: float = 1.0 VLLM_RAY_BUNDLE_INDICES: str = "" VLLM_CUDART_SO_PATH: Optional[str] = None @@ -1017,7 +1017,7 @@ def get_vllm_port() -> Optional[int]: # max number splits for cuda graph decode "VLLM_FLASH_ATTN_MAX_NUM_SPLITS_FOR_CUDA_GRAPH": lambda: int(os.getenv("VLLM_FLASH_ATTN_MAX_NUM_SPLITS_FOR_CUDA_GRAPH", - "16")), + "32")), # Number of GPUs per worker in Ray, if it is set to be a fraction, # it allows ray to schedule multiple actors on a single GPU, From 1210e4d95b515507cd151ee76ef947999fdcbe59 Mon Sep 17 00:00:00 2001 From: Alexander Matveev <59768536+alexm-redhat@users.noreply.github.com> Date: Tue, 23 Sep 2025 19:57:55 -0400 Subject: [PATCH 308/518] [Bugfix] [B200] cutlass_mla - ensure kv_split == 1 for batch size > 1 (#25509) Signed-off-by: Alexander Matveev --- csrc/attention/mla/cutlass_sm100_mla/device/sm100_mla.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/csrc/attention/mla/cutlass_sm100_mla/device/sm100_mla.hpp b/csrc/attention/mla/cutlass_sm100_mla/device/sm100_mla.hpp index fbbc2e588c32..297d94dcc063 100644 --- a/csrc/attention/mla/cutlass_sm100_mla/device/sm100_mla.hpp +++ b/csrc/attention/mla/cutlass_sm100_mla/device/sm100_mla.hpp @@ -135,10 +135,10 @@ class MLA { max_splits = min(16, max_splits); // TODO: This avoids a hang when the batch size larger than 1 and - // there is more than 4 kv_splits. + // there is more than 1 kv_splits. // Discuss with NVIDIA how this can be fixed. if (B > 1) { - max_splits = min(2, max_splits); + max_splits = min(1, max_splits); } // printf(" max_splits = %d\n", max_splits); From dc464a3d3937e30267514e1fc5b988a35dd9dbdf Mon Sep 17 00:00:00 2001 From: Lucas Wilkinson Date: Tue, 23 Sep 2025 20:00:29 -0400 Subject: [PATCH 309/518] [BugFix] AssertionError: Do not capture num_reqs > max_num_reqs for uniform batch (#25505) Signed-off-by: Lucas Wilkinson --- vllm/v1/worker/gpu_model_runner.py | 32 +++++++++++++++--------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index f78582495814..eebdbcc621c6 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -2828,7 +2828,7 @@ def _get_mm_dummy_batch( def _dummy_run( self, num_tokens: int, - cudagraph_runtime_mode: CUDAGraphMode = CUDAGraphMode.NONE, + cudagraph_runtime_mode: Optional[CUDAGraphMode] = None, force_attention: bool = False, uniform_decode: bool = False, allow_microbatching: bool = True, @@ -2844,6 +2844,8 @@ def _dummy_run( Args: num_tokens: Number of tokens to run the dummy forward pass. cudagraph_runtime_mode: used to control the behavior. + - if not set will determine the cudagraph mode based on using + the self.cudagraph_dispatcher. - CUDAGraphMode.NONE: No cudagraph, for warm up and profile run - CUDAGraphMode.PIECEWISE: Piecewise cudagraph. - CUDAGraphMode.FULL: Full cudagraph, attention metadata is @@ -2857,7 +2859,7 @@ def _dummy_run( (1 token) and prefill (multiple tokens) requests. remove_lora: If False, dummy LoRAs are not destroyed after the run """ - assert cudagraph_runtime_mode in { + assert cudagraph_runtime_mode is None or cudagraph_runtime_mode in { CUDAGraphMode.NONE, CUDAGraphMode.PIECEWISE, CUDAGraphMode.FULL } @@ -2899,10 +2901,6 @@ def _dummy_run( elif uniform_decode: assert not create_mixed_batch num_reqs = cdiv(num_tokens, max_query_len) - assert num_reqs <= max_num_reqs, \ - f"Do not capture num_reqs {num_reqs} > max_num_reqs " \ - f"{max_num_reqs} for uniform batch. Num tokens: " \ - f"{num_tokens}, max_query_len: {max_query_len}" num_scheduled_tokens_list = [max_query_len] * num_reqs if num_tokens % max_query_len != 0: num_scheduled_tokens_list[-1] = num_tokens % max_query_len @@ -3043,18 +3041,20 @@ def _dummy_run( intermediate_tensors = self.sync_and_slice_intermediate_tensors( num_tokens, None, False) - if cudagraph_runtime_mode == CUDAGraphMode.NONE: - batch_descriptor = None - else: - # filter out the valid batch descriptor - _cg_mode, batch_descriptor = \ - self.cudagraph_dispatcher.dispatch( - BatchDescriptor(num_tokens=num_tokens, - uniform_decode=uniform_decode)) - # sanity check - assert cudagraph_runtime_mode == _cg_mode, ( + + # filter out the valid batch descriptor + _cg_mode, batch_descriptor = self.cudagraph_dispatcher.dispatch( + BatchDescriptor(num_tokens=num_tokens, + uniform_decode=uniform_decode)) + if cudagraph_runtime_mode is not None: + # we allow forcing NONE when the dispatcher disagrees to support + # warm ups for cudagraph capture + assert cudagraph_runtime_mode == CUDAGraphMode.NONE or \ + cudagraph_runtime_mode == _cg_mode, ( f"Cudagraph runtime mode mismatch at dummy_run. " f"Expected {_cg_mode}, but got {cudagraph_runtime_mode}.") + else: + cudagraph_runtime_mode = _cg_mode if ubatch_slices is not None: num_tokens = num_tokens // 2 From 7ad5e50adf96826acca509747952452524e9422f Mon Sep 17 00:00:00 2001 From: Doug Smith Date: Tue, 23 Sep 2025 20:03:31 -0400 Subject: [PATCH 310/518] Improve output when failing json.loads() on structured output test (#25483) Signed-off-by: dougbtv --- .../llm/test_struct_output_generate.py | 31 +++++++++++-------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/tests/v1/entrypoints/llm/test_struct_output_generate.py b/tests/v1/entrypoints/llm/test_struct_output_generate.py index e2c686928cea..5017c83025ba 100644 --- a/tests/v1/entrypoints/llm/test_struct_output_generate.py +++ b/tests/v1/entrypoints/llm/test_struct_output_generate.py @@ -81,16 +81,6 @@ class CarDescription(BaseModel): car_type: CarType -def _load_json(s: str, backend: str) -> str: - if backend != "xgrammar": - return json.loads(s) - - # xgrammar specific workarounds - # https://github.com/mlc-ai/xgrammar/issues/286 - s = re.sub(r'[\x00-\x1F\x7F-\xFF]', '', s) - return json.loads(s) - - def test_guided_decoding_deprecated(): with pytest.warns(DeprecationWarning, match="GuidedDecodingParams is deprecated.*"): @@ -177,7 +167,12 @@ def test_structured_output( if backend != 'lm-format-enforcer': assert "\n" not in generated_text print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}") - output_json = json.loads(generated_text) + try: + output_json = json.loads(generated_text) + except json.JSONDecodeError as e: + pytest.fail( + f"Invalid JSON from backend={backend}: {generated_text!r}\n" + f"Schema: {sample_json_schema}\nError: {e}") jsonschema.validate(instance=output_json, schema=sample_json_schema) # @@ -425,7 +420,12 @@ def test_structured_output( generated_text = output.outputs[0].text assert generated_text is not None print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}") - output_json = json.loads(generated_text) + try: + output_json = json.loads(generated_text) + except json.JSONDecodeError as e: + pytest.fail( + f"Invalid JSON from backend={backend}: {generated_text!r}\n" + f"Schema: {json_schema}\nError: {e}") jsonschema.validate(instance=output_json, schema=json_schema) # @@ -468,7 +468,12 @@ def test_structured_output( generated_text = output.outputs[0].text assert generated_text is not None print(f"Prompt: {prompt!r}, Generated text: {generated_text!r}") - output_json = json.loads(generated_text) + try: + output_json = json.loads(generated_text) + except json.JSONDecodeError as e: + pytest.fail( + f"Invalid JSON from backend={backend}: {generated_text!r}\n" + f"Schema: {json_schema}\nError: {e}") jsonschema.validate(instance=output_json, schema=json_schema) if backend not in ["outlines", "lm-format-enforcer"]: From 0d235b874aae47b3214b3354bb70fcefb91c4fce Mon Sep 17 00:00:00 2001 From: Chenxi Yang Date: Tue, 23 Sep 2025 17:07:42 -0700 Subject: [PATCH 311/518] Add CUTLASS FP8 MOE benchmark scripts and kernel config (#25302) Signed-off-by: Chenxi Yang Co-authored-by: Chenxi Yang --- .../kernels/benchmark_cutlass_moe_fp8.py | 406 ++++++++++++++++++ ...evice_name=NVIDIA_H100,dtype=fp8_w8a8.json | 123 ++++++ 2 files changed, 529 insertions(+) create mode 100644 benchmarks/kernels/benchmark_cutlass_moe_fp8.py create mode 100644 vllm/model_executor/layers/fused_moe/configs/E=128,N=1024,device_name=NVIDIA_H100,dtype=fp8_w8a8.json diff --git a/benchmarks/kernels/benchmark_cutlass_moe_fp8.py b/benchmarks/kernels/benchmark_cutlass_moe_fp8.py new file mode 100644 index 000000000000..b419b2fa0e3e --- /dev/null +++ b/benchmarks/kernels/benchmark_cutlass_moe_fp8.py @@ -0,0 +1,406 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project +""" +Benchmark the performance of the cutlass_moe_fp8 kernel vs the triton_moe +kernel. Both kernels take in fp8 quantized weights and 16-bit activations, +but use different quantization strategies and backends. +""" + +import nvtx +import torch + +from vllm import _custom_ops as ops +from vllm.model_executor.layers.fused_moe.config import fp8_w8a8_moe_quant_config +from vllm.model_executor.layers.fused_moe.cutlass_moe import cutlass_moe_fp8 +from vllm.model_executor.layers.fused_moe.fused_moe import fused_experts, fused_topk +from vllm.platforms import current_platform +from vllm.utils import FlexibleArgumentParser + +# Weight shapes for different models: [num_experts, topk, hidden_size, +# intermediate_size] +WEIGHT_SHAPES_MOE = { + "mixtral-8x7b": [ + [8, 2, 4096, 14336], + ], + "deepseek-v2": [ + [160, 6, 5120, 12288], + ], + "custom-small": [ + [8, 2, 2048, 7168], + ], + "glm45-fp8": [ + [128, 8, 4096, 1408], + ], + "Llama-4-Maverick-17B-128E-Instruct-FP8": [ + [128, 1, 5120, 8192], + ], +} + +DEFAULT_MODELS = [ + "mixtral-8x7b", +] + +DEFAULT_BATCH_SIZES = [4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048] +DEFAULT_TP_SIZES = [1] + +PER_ACT_TOKEN_OPTS = [False, True] +PER_OUT_CH_OPTS = [False, True] + +FP8_DTYPE = current_platform.fp8_dtype() + + +def bench_run( + results: list, + model: str, + num_experts: int, + topk: int, + per_act_token: bool, + per_out_ch: bool, + mkn: tuple[int, int, int], +): + (m, k, n) = mkn + + dtype = torch.half + device = "cuda" + + # Create input activations + a = torch.randn((m, k), device=device, dtype=dtype) / 10 + + # Create weights + w1 = torch.randn((num_experts, 2 * n, k), device=device, dtype=dtype) / 10 + w2 = torch.randn((num_experts, k, n), device=device, dtype=dtype) / 10 + + # Create FP8 quantized weights and scales for both kernels + w1_fp8q = torch.empty((num_experts, 2 * n, k), device=device, dtype=FP8_DTYPE) + w2_fp8q = torch.empty((num_experts, k, n), device=device, dtype=FP8_DTYPE) + + # Create scales based on quantization strategy + if per_out_ch: + # Per-channel quantization + w1_scale = torch.empty( + (num_experts, 2 * n, 1), device=device, dtype=torch.float32 + ) + w2_scale = torch.empty((num_experts, k, 1), device=device, dtype=torch.float32) + else: + # Per-tensor quantization + w1_scale = torch.empty((num_experts, 1, 1), device=device, dtype=torch.float32) + w2_scale = torch.empty((num_experts, 1, 1), device=device, dtype=torch.float32) + + # Quantize weights + for expert in range(num_experts): + if per_out_ch: + # Per-channel quantization - not yet implemented properly + # For now, fall back to per-tensor quantization + w1_fp8q[expert], w1_scale_temp = ops.scaled_fp8_quant(w1[expert]) + w2_fp8q[expert], w2_scale_temp = ops.scaled_fp8_quant(w2[expert]) + # Expand scalar scales to the expected per-channel shape + w1_scale[expert] = w1_scale_temp.expand(2 * n, 1) + w2_scale[expert] = w2_scale_temp.expand(k, 1) + else: + # Per-tensor quantization + w1_fp8q[expert], w1_scale_temp = ops.scaled_fp8_quant(w1[expert]) + w2_fp8q[expert], w2_scale_temp = ops.scaled_fp8_quant(w2[expert]) + # Store scalar scales in [1, 1] tensors + w1_scale[expert, 0, 0] = w1_scale_temp + w2_scale[expert, 0, 0] = w2_scale_temp + + # Prepare weights for CUTLASS (no transpose needed) + w1_fp8q_cutlass = w1_fp8q # Keep original [E, 2N, K] + w2_fp8q_cutlass = w2_fp8q # Keep original [E, K, N] + + # Create router scores and get topk + score = torch.randn((m, num_experts), device=device, dtype=dtype) + topk_weights, topk_ids, _ = fused_topk(a, score, topk, renormalize=False) + + # WORKAROUND: CUTLASS MoE FP8 has issues with per-token quantization + # Force per-tensor quantization for all cases to match working e2e setup + a1_scale = torch.full((), 1e-2, device=device, dtype=torch.float32) + a2_scale = torch.full((), 1e-2, device=device, dtype=torch.float32) + + # Force per-tensor quantization for all cases + per_act_token = False + + # Create stride tensors for CUTLASS + ab_strides1 = torch.full((num_experts,), k, dtype=torch.int64, device=device) + ab_strides2 = torch.full((num_experts,), n, dtype=torch.int64, device=device) + c_strides1 = torch.full((num_experts,), 2 * n, dtype=torch.int64, device=device) + c_strides2 = torch.full((num_experts,), k, dtype=torch.int64, device=device) + + def run_triton_moe( + a: torch.Tensor, + w1: torch.Tensor, + w2: torch.Tensor, + topk_weights: torch.Tensor, + topk_ids: torch.Tensor, + w1_scale: torch.Tensor, + w2_scale: torch.Tensor, + a1_scale: torch.Tensor, + a2_scale: torch.Tensor, + num_repeats: int, + ): + quant_config = fp8_w8a8_moe_quant_config( + w1_scale=w1_scale, + w2_scale=w2_scale, + a1_scale=a1_scale, + a2_scale=a2_scale, + per_act_token_quant=per_act_token, + per_out_ch_quant=per_out_ch, + ) + + for _ in range(num_repeats): + fused_experts( + a, + w1, + w2, + topk_weights, + topk_ids, + quant_config=quant_config, + ) + + def run_cutlass_moe_fp8( + a: torch.Tensor, + w1: torch.Tensor, + w2: torch.Tensor, + topk_weights: torch.Tensor, + topk_ids: torch.Tensor, + ab_strides1: torch.Tensor, + ab_strides2: torch.Tensor, + c_strides1: torch.Tensor, + c_strides2: torch.Tensor, + w1_scale: torch.Tensor, + w2_scale: torch.Tensor, + a1_scale: torch.Tensor, + a2_scale: torch.Tensor, + num_repeats: int, + ): + quant_config = fp8_w8a8_moe_quant_config( + w1_scale=w1_scale, + w2_scale=w2_scale, + a1_scale=a1_scale, + a2_scale=a2_scale, + per_act_token_quant=per_act_token, + per_out_ch_quant=per_out_ch, + ) + + for _ in range(num_repeats): + with nvtx.annotate("cutlass_moe_fp8", color="blue"): + cutlass_moe_fp8( + a=a, + w1_q=w1, + w2_q=w2, + topk_weights=topk_weights, + topk_ids=topk_ids, + ab_strides1=ab_strides1, + ab_strides2=ab_strides2, + c_strides1=c_strides1, + c_strides2=c_strides2, + quant_config=quant_config, + activation="silu", + global_num_experts=num_experts, + ) + + # Pre-create quantization config to avoid creating it inside CUDA graph + quant_config = fp8_w8a8_moe_quant_config( + w1_scale=w1_scale, + w2_scale=w2_scale, + a1_scale=a1_scale, + a2_scale=a2_scale, + per_act_token_quant=per_act_token, + per_out_ch_quant=per_out_ch, + ) + + # Create CUDA graphs for CUTLASS (match benchmark_moe.py pattern exactly) + cutlass_stream = torch.cuda.Stream() + cutlass_graph = torch.cuda.CUDAGraph() + with torch.cuda.graph(cutlass_graph, stream=cutlass_stream): + # Capture 10 invocations like benchmark_moe.py + for _ in range(10): + cutlass_moe_fp8( + a=a, + w1_q=w1_fp8q_cutlass, + w2_q=w2_fp8q_cutlass, + topk_weights=topk_weights, + topk_ids=topk_ids, + ab_strides1=ab_strides1, + ab_strides2=ab_strides2, + c_strides1=c_strides1, + c_strides2=c_strides2, + quant_config=quant_config, + activation="silu", + global_num_experts=num_experts, + ) + torch.cuda.synchronize() + + # Create CUDA graphs for Triton (match benchmark_moe.py pattern exactly) + triton_stream = torch.cuda.Stream() + triton_graph = torch.cuda.CUDAGraph() + with torch.cuda.graph(triton_graph, stream=triton_stream): + # Capture 10 invocations like benchmark_moe.py + for _ in range(10): + fused_experts( + a, + w1_fp8q, + w2_fp8q, + topk_weights, + topk_ids, + quant_config=quant_config, + ) + torch.cuda.synchronize() + + def bench_cuda_graph(graph, num_warmup=5, num_iters=100): + """Benchmark CUDA graph using events like benchmark_moe.py""" + # Warmup + for _ in range(num_warmup): + graph.replay() + torch.cuda.synchronize() + + # Timing + start_event = torch.cuda.Event(enable_timing=True) + end_event = torch.cuda.Event(enable_timing=True) + + latencies = [] + for _ in range(num_iters): + torch.cuda.synchronize() + start_event.record() + graph.replay() + end_event.record() + end_event.synchronize() + latencies.append(start_event.elapsed_time(end_event)) + + # Divide by 10 since graph contains 10 calls + return sum(latencies) / (num_iters * 10) + + # Benchmark parameters + num_warmup = 5 + num_iters = 100 + + # Benchmark only CUDA graphs (more reliable and faster) + # Benchmark Triton MoE with CUDA graphs + triton_graph_time = bench_cuda_graph( + triton_graph, num_warmup=num_warmup, num_iters=num_iters + ) + + # Benchmark CUTLASS MoE with CUDA graphs + cutlass_graph_time = bench_cuda_graph( + cutlass_graph, num_warmup=num_warmup, num_iters=num_iters + ) + + # Convert ms to us and return results + triton_time_us = triton_graph_time * 1000 + cutlass_time_us = cutlass_graph_time * 1000 + + return { + "batch_size": m, + "triton_time_us": triton_time_us, + "cutlass_time_us": cutlass_time_us, + } + + +def main(args): + print("Benchmarking models:") + for i, model in enumerate(args.models): + print(f"[{i}] {model}") + + all_results = [] + + for model in args.models: + for tp in args.tp_sizes: + for layer in WEIGHT_SHAPES_MOE[model]: + num_experts = layer[0] + topk = layer[1] + size_k = layer[2] + size_n = layer[3] // tp + + if len(args.limit_k) > 0 and size_k not in args.limit_k: + continue + + if len(args.limit_n) > 0 and size_n not in args.limit_n: + continue + + for per_act_token in args.per_act_token_opts: + for per_out_ch in args.per_out_ch_opts: + print( + f"\n=== {model}, experts={num_experts}, topk={topk}," + f"per_act={per_act_token}, per_out_ch={per_out_ch} ===" + ) + + config_results = [] + for size_m in args.batch_sizes: + mkn = (size_m, size_k, size_n) + result = bench_run( + [], # Not used anymore + model, + num_experts, + topk, + per_act_token, + per_out_ch, + mkn, + ) + if result: + config_results.append(result) + + # Print results table for this configuration + if config_results: + print( + f"\n{'Batch Size':<12}" + f"{'Triton (us)':<15}" + f"{'CUTLASS (us)':<15}" + ) + print("-" * 45) + for result in config_results: + print( + f"{result['batch_size']:<12}" + f"{result['triton_time_us']:<15.2f}" + f"{result['cutlass_time_us']:<15.2f}" + ) + + all_results.extend(config_results) + + print(f"\nTotal benchmarks completed: {len(all_results)}") + + +if __name__ == "__main__": + parser = FlexibleArgumentParser( + description="""Benchmark CUTLASS FP8 MOE vs Triton FP8 FUSED MOE + across specified models/shapes/batches + + Example usage: + python benchmark_cutlass_moe_fp8.py \ + --model "Llama-4-Maverick-17B-128E-Instruct-FP8" \ + --tp-sizes 8 \ + --batch-size 2 4 8 \ + --per-act-token-opts false \ + --per-out-ch-opts false + + """ + ) + parser.add_argument( + "--models", + nargs="+", + type=str, + default=DEFAULT_MODELS, + choices=WEIGHT_SHAPES_MOE.keys(), + ) + parser.add_argument("--tp-sizes", nargs="+", type=int, default=DEFAULT_TP_SIZES) + parser.add_argument( + "--batch-sizes", nargs="+", type=int, default=DEFAULT_BATCH_SIZES + ) + parser.add_argument("--limit-k", nargs="+", type=int, default=[]) + parser.add_argument("--limit-n", nargs="+", type=int, default=[]) + parser.add_argument( + "--per-act-token-opts", + nargs="+", + type=lambda x: x.lower() == "true", + default=[False, True], + help="Per-activation token quantization options (true/false)", + ) + parser.add_argument( + "--per-out-ch-opts", + nargs="+", + type=lambda x: x.lower() == "true", + default=[False, True], + help="Per-output channel quantization options (true/false)", + ) + + args = parser.parse_args() + main(args) diff --git a/vllm/model_executor/layers/fused_moe/configs/E=128,N=1024,device_name=NVIDIA_H100,dtype=fp8_w8a8.json b/vllm/model_executor/layers/fused_moe/configs/E=128,N=1024,device_name=NVIDIA_H100,dtype=fp8_w8a8.json new file mode 100644 index 000000000000..600bd4444535 --- /dev/null +++ b/vllm/model_executor/layers/fused_moe/configs/E=128,N=1024,device_name=NVIDIA_H100,dtype=fp8_w8a8.json @@ -0,0 +1,123 @@ +{ + "triton_version": "3.4.0", + "2": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 16, + "num_warps": 4, + "num_stages": 4 + }, + "4": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 64, + "num_warps": 4, + "num_stages": 4 + }, + "8": { + "BLOCK_SIZE_M": 64, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "num_warps": 8, + "num_stages": 3 + }, + "16": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 32, + "num_warps": 4, + "num_stages": 3 + }, + "32": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 5 + }, + "64": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "num_warps": 8, + "num_stages": 3 + }, + "128": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 16, + "num_warps": 4, + "num_stages": 5 + }, + "256": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 16, + "num_warps": 4, + "num_stages": 5 + }, + "512": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 16, + "num_warps": 4, + "num_stages": 5 + }, + "1024": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 16, + "num_warps": 4, + "num_stages": 5 + }, + "2048": { + "BLOCK_SIZE_M": 32, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "num_warps": 8, + "num_stages": 3 + }, + "3072": { + "BLOCK_SIZE_M": 32, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "num_warps": 8, + "num_stages": 3 + }, + "4096": { + "BLOCK_SIZE_M": 64, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 3 + }, + "8192": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "num_warps": 8, + "num_stages": 4 + }, + "16384": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "num_warps": 8, + "num_stages": 4 + } +} From 88d7bdbd2337917fbdd65bdfe33e6af7ecdb45cd Mon Sep 17 00:00:00 2001 From: Wentao Ye <44945378+yewentao256@users.noreply.github.com> Date: Tue, 23 Sep 2025 20:07:51 -0400 Subject: [PATCH 312/518] [Bug] Fix AttributeError: 'FusedMoE' object has no attribute 'w13_weight_scale'. Did you mean: 'w13_weight_scale_inv' (#25519) Signed-off-by: yewentao256 Signed-off-by: Wentao Ye <44945378+yewentao256@users.noreply.github.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- vllm/model_executor/warmup/deep_gemm_warmup.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/vllm/model_executor/warmup/deep_gemm_warmup.py b/vllm/model_executor/warmup/deep_gemm_warmup.py index f6df85a50238..e495f9ee4472 100644 --- a/vllm/model_executor/warmup/deep_gemm_warmup.py +++ b/vllm/model_executor/warmup/deep_gemm_warmup.py @@ -53,9 +53,11 @@ def _extract_data_from_fused_moe_module( """ assert isinstance(m, FusedMoE) w13 = m.w13_weight - w13_s = getattr(m, "w13_weight_scale_inv", m.w13_weight_scale) + w13_s = m.w13_weight_scale_inv if hasattr( + m, "w13_weight_scale_inv") else m.w13_weight_scale w2 = m.w2_weight - w2_s = getattr(m, "w2_weight_scale_inv", m.w2_weight_scale) + w2_s = m.w2_weight_scale_inv if hasattr( + m, "w2_weight_scale_inv") else m.w2_weight_scale num_topk = m.top_k assert isinstance(w13, torch.Tensor) From c8bde93367fb252eca1e9a6ae78650caa4a9a951 Mon Sep 17 00:00:00 2001 From: ahao-anyscale Date: Tue, 23 Sep 2025 17:13:32 -0700 Subject: [PATCH 313/518] [BUG] Allows for RunAI Streamer and Torch.compile cache to be used together (#24922) Signed-off-by: ahao-anyscale --- tests/test_config.py | 107 +++++++++++++++++++++++++ vllm/config/model.py | 5 +- vllm/transformers_utils/runai_utils.py | 11 ++- 3 files changed, 119 insertions(+), 4 deletions(-) diff --git a/tests/test_config.py b/tests/test_config.py index 0796447c079b..9e2bfb9e1b0e 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -1,7 +1,9 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project +import os from dataclasses import MISSING, Field, asdict, dataclass, field +from unittest.mock import patch import pytest @@ -388,3 +390,108 @@ def test_get_and_verify_max_len(model_id, max_model_len, expected_max_len, else: actual_max_len = model_config.get_and_verify_max_len(max_model_len) assert actual_max_len == expected_max_len + + +class MockConfig: + """Simple mock object for testing maybe_pull_model_tokenizer_for_runai""" + + def __init__(self, model: str, tokenizer: str): + self.model = model + self.tokenizer = tokenizer + self.model_weights = None + + +@pytest.mark.parametrize("s3_url", [ + "s3://example-bucket-1/model/", + "s3://example-bucket-2/model/", +]) +@patch('vllm.transformers_utils.runai_utils.ObjectStorageModel.pull_files') +def test_s3_url_model_tokenizer_paths(mock_pull_files, s3_url): + """Test that S3 URLs create deterministic local directories for model and + tokenizer.""" + # Mock pull_files to avoid actually downloading files during tests + mock_pull_files.return_value = None + + # Create first mock and run the method + config1 = MockConfig(model=s3_url, tokenizer=s3_url) + ModelConfig.maybe_pull_model_tokenizer_for_runai(config1, s3_url, s3_url) + + # Check that model and tokenizer point to existing directories + assert os.path.exists( + config1.model), f"Model directory does not exist: {config1.model}" + assert os.path.isdir( + config1.model), f"Model path is not a directory: {config1.model}" + assert os.path.exists( + config1.tokenizer + ), f"Tokenizer directory does not exist: {config1.tokenizer}" + assert os.path.isdir( + config1.tokenizer + ), f"Tokenizer path is not a directory: {config1.tokenizer}" + + # Verify that the paths are different from the original S3 URL + assert config1.model != s3_url, ( + "Model path should be converted to local directory") + assert config1.tokenizer != s3_url, ( + "Tokenizer path should be converted to local directory") + + # Store the original paths + created_model_dir = config1.model + create_tokenizer_dir = config1.tokenizer + + # Create a new mock and run the method with the same S3 URL + config2 = MockConfig(model=s3_url, tokenizer=s3_url) + ModelConfig.maybe_pull_model_tokenizer_for_runai(config2, s3_url, s3_url) + + # Check that the new directories exist + assert os.path.exists( + config2.model), f"Model directory does not exist: {config2.model}" + assert os.path.isdir( + config2.model), f"Model path is not a directory: {config2.model}" + assert os.path.exists( + config2.tokenizer + ), f"Tokenizer directory does not exist: {config2.tokenizer}" + assert os.path.isdir( + config2.tokenizer + ), f"Tokenizer path is not a directory: {config2.tokenizer}" + + # Verify that the paths are deterministic (same as before) + assert config2.model == created_model_dir, ( + f"Model paths are not deterministic. " + f"Original: {created_model_dir}, New: {config2.model}") + assert config2.tokenizer == create_tokenizer_dir, ( + f"Tokenizer paths are not deterministic. " + f"Original: {create_tokenizer_dir}, New: {config2.tokenizer}") + + +@patch('vllm.transformers_utils.runai_utils.ObjectStorageModel.pull_files') +def test_s3_url_different_models_create_different_directories(mock_pull_files): + """Test that different S3 URLs create different local directories.""" + # Mock pull_files to avoid actually downloading files during tests + mock_pull_files.return_value = None + + s3_url1 = "s3://example-bucket-1/model/" + s3_url2 = "s3://example-bucket-2/model/" + + # Create mocks with different S3 URLs and run the method + config1 = MockConfig(model=s3_url1, tokenizer=s3_url1) + ModelConfig.maybe_pull_model_tokenizer_for_runai(config1, s3_url1, s3_url1) + + config2 = MockConfig(model=s3_url2, tokenizer=s3_url2) + ModelConfig.maybe_pull_model_tokenizer_for_runai(config2, s3_url2, s3_url2) + + # Verify that different URLs produce different directories + assert config1.model != config2.model, ( + f"Different S3 URLs should create different model directories. " + f"URL1 model: {config1.model}, URL2 model: {config2.model}") + assert config1.tokenizer != config2.tokenizer, ( + f"Different S3 URLs should create different tokenizer directories. " + f"URL1 tokenizer: {config1.tokenizer}, " + f"URL2 tokenizer: {config2.tokenizer}") + + # Verify that both sets of directories exist + assert os.path.exists(config1.model) and os.path.isdir(config1.model) + assert os.path.exists(config1.tokenizer) and os.path.isdir( + config1.tokenizer) + assert os.path.exists(config2.model) and os.path.isdir(config2.model) + assert os.path.exists(config2.tokenizer) and os.path.isdir( + config2.tokenizer) diff --git a/vllm/config/model.py b/vllm/config/model.py index d8a8fe20fd03..d75bd5fa47b3 100644 --- a/vllm/config/model.py +++ b/vllm/config/model.py @@ -699,11 +699,12 @@ def maybe_pull_model_tokenizer_for_runai(self, model: str, model: Model name or path tokenizer: Tokenizer name or path """ + if not (is_runai_obj_uri(model) or is_runai_obj_uri(tokenizer)): return if is_runai_obj_uri(model): - object_storage_model = ObjectStorageModel() + object_storage_model = ObjectStorageModel(url=model) object_storage_model.pull_files( model, allow_pattern=["*.model", "*.py", "*.json"]) self.model_weights = model @@ -722,7 +723,7 @@ def maybe_pull_model_tokenizer_for_runai(self, model: str, # Only download tokenizer if needed and not already handled if is_runai_obj_uri(tokenizer): - object_storage_tokenizer = ObjectStorageModel() + object_storage_tokenizer = ObjectStorageModel(url=tokenizer) object_storage_tokenizer.pull_files(model, ignore_pattern=[ "*.pt", "*.safetensors", diff --git a/vllm/transformers_utils/runai_utils.py b/vllm/transformers_utils/runai_utils.py index b7bee1974de5..08466ca19b8a 100644 --- a/vllm/transformers_utils/runai_utils.py +++ b/vllm/transformers_utils/runai_utils.py @@ -1,6 +1,7 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project +import hashlib import os import shutil import signal @@ -56,12 +57,18 @@ class ObjectStorageModel: pull_files(): Pull model from object storage to the temporary directory. """ - def __init__(self) -> None: + def __init__(self, url: str) -> None: for sig in (signal.SIGINT, signal.SIGTERM): existing_handler = signal.getsignal(sig) signal.signal(sig, self._close_by_signal(existing_handler)) - self.dir = tempfile.mkdtemp() + dir_name = os.path.join( + tempfile.gettempdir(), + hashlib.sha256(str(url).encode()).hexdigest()[:8]) + if os.path.exists(dir_name): + shutil.rmtree(dir_name) + os.makedirs(dir_name) + self.dir = dir_name def __del__(self): self._close() From be0bb568c92069a1ef3250307c818a46702255de Mon Sep 17 00:00:00 2001 From: 0xNullPath Date: Wed, 24 Sep 2025 08:15:51 +0800 Subject: [PATCH 314/518] [Model] Support SeedOss Reason Parser (#24263) Signed-off-by: Yan Lu Co-authored-by: Michael Goin --- .../test_base_thinking_reasoning_parser.py | 392 ++++++++++++++++++ .../test_seedoss_reasoning_parser.py | 237 +++++++++++ vllm/reasoning/__init__.py | 4 + vllm/reasoning/abs_reasoning_parsers.py | 8 +- vllm/reasoning/basic_parsers.py | 156 +++++++ .../reasoning/deepseek_r1_reasoning_parser.py | 160 ++----- vllm/reasoning/mistral_reasoning_parser.py | 19 +- vllm/reasoning/qwen3_reasoning_parser.py | 129 ++---- vllm/reasoning/seedoss_reasoning_parser.py | 28 ++ 9 files changed, 887 insertions(+), 246 deletions(-) create mode 100644 tests/reasoning/test_base_thinking_reasoning_parser.py create mode 100644 tests/reasoning/test_seedoss_reasoning_parser.py create mode 100644 vllm/reasoning/basic_parsers.py create mode 100644 vllm/reasoning/seedoss_reasoning_parser.py diff --git a/tests/reasoning/test_base_thinking_reasoning_parser.py b/tests/reasoning/test_base_thinking_reasoning_parser.py new file mode 100644 index 000000000000..6a939dcfc2c9 --- /dev/null +++ b/tests/reasoning/test_base_thinking_reasoning_parser.py @@ -0,0 +1,392 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project + +import pytest +from transformers import AutoTokenizer + +from tests.reasoning.utils import run_reasoning_extraction +from vllm.entrypoints.openai.protocol import ChatCompletionRequest +from vllm.reasoning.basic_parsers import BaseThinkingReasoningParser + + +# Create a concrete test implementation of BaseThinkingReasoningParser +class TestThinkingReasoningParser(BaseThinkingReasoningParser): + """Test implementation of BaseThinkingReasoningParser.""" + + @property + def start_token(self) -> str: + return "" + + @property + def end_token(self) -> str: + return "" + + +class TestThinkingReasoningParserAlt(BaseThinkingReasoningParser): + """Alternative test implementation with different tokens.""" + + @property + def start_token(self) -> str: + return "" + + @property + def end_token(self) -> str: + return "" + + +# Use a test model +REASONING_MODEL_NAME = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B" + + +@pytest.fixture(scope="module") +def test_tokenizer(): + tokenizer = AutoTokenizer.from_pretrained(REASONING_MODEL_NAME) + # Add custom test tokens + test_tokens = ["", "", "", ""] + existing_tokens = set(tokenizer.get_vocab().keys()) + new_tokens = [ + token for token in test_tokens if token not in existing_tokens + ] + if new_tokens: + tokenizer.add_tokens(new_tokens) + return tokenizer + + +class TestBaseThinkingReasoningParserInit: + """ + Test initialization and basic properties of + BaseThinkingReasoningParser. + """ + + def test_successful_initialization(self, test_tokenizer): + """Test successful initialization with valid tokens.""" + parser = TestThinkingReasoningParser(test_tokenizer) + assert parser.start_token == "" + assert parser.end_token == "" + assert parser.start_token_id is not None + assert parser.end_token_id is not None + + def test_initialization_with_missing_tokenizer(self): + """Test that initialization fails without tokenizer.""" + with pytest.raises(ValueError, match="model tokenizer must be passed"): + TestThinkingReasoningParser(None) + + def test_initialization_with_missing_tokens(self, test_tokenizer): + """Test that initialization fails when tokens are not in vocabulary.""" + + # Create a parser with tokens not in vocabulary + class MissingTokenParser(BaseThinkingReasoningParser): + + @property + def start_token(self) -> str: + return "" + + @property + def end_token(self) -> str: + return "" + + with pytest.raises(RuntimeError, + match="could not locate think start/end tokens"): + MissingTokenParser(test_tokenizer) + + def test_initialization_with_empty_tokens(self, test_tokenizer): + """Test that initialization fails with empty token strings.""" + + class EmptyTokenParser(BaseThinkingReasoningParser): + + @property + def start_token(self) -> str: + return "" + + @property + def end_token(self) -> str: + return "" + + with pytest.raises(ValueError, + match="start_token and end_token must be defined"): + EmptyTokenParser(test_tokenizer) + + +class TestBaseThinkingReasoningParserMethods: + """Test the methods of BaseThinkingReasoningParser.""" + + def test_is_reasoning_end(self, test_tokenizer): + """Test the is_reasoning_end method.""" + parser = TestThinkingReasoningParser(test_tokenizer) + end_token_id = parser.end_token_id + + # Test with end token present + assert parser.is_reasoning_end([1, 2, end_token_id, 4]) is True + + # Test without end token + assert parser.is_reasoning_end([1, 2, 3, 4]) is False + + # Test with empty list + assert parser.is_reasoning_end([]) is False + + def test_extract_content_ids(self, test_tokenizer): + """Test the extract_content_ids method.""" + parser = TestThinkingReasoningParser(test_tokenizer) + end_token_id = parser.end_token_id + + # Test with end token in the middle + input_ids = [1, 2, end_token_id, 4, 5] + content_ids = parser.extract_content_ids(input_ids) + assert content_ids == [4, 5] + + # Test with end token at the end + input_ids = [1, 2, 3, end_token_id] + content_ids = parser.extract_content_ids(input_ids) + assert content_ids == [] + + # Test without end token + input_ids = [1, 2, 3, 4] + content_ids = parser.extract_content_ids(input_ids) + assert content_ids == [] + + # Test with end token as last element (should not extract) + input_ids = [1, 2, 3, end_token_id] + content_ids = parser.extract_content_ids(input_ids) + assert content_ids == [] + + +class TestBaseThinkingReasoningParserExtraction: + """Test reasoning content extraction methods.""" + + def test_extract_reasoning_content_with_both_tokens(self, test_tokenizer): + """Test extraction when both start and end tokens are present.""" + parser = TestThinkingReasoningParser(test_tokenizer) + request = ChatCompletionRequest(messages=[], model="test-model") + + model_output = ("This is reasoning" + "This is content") + reasoning, content = parser.extract_reasoning_content( + model_output, request) + + assert reasoning == "This is reasoning" + assert content == "This is content" + + def test_extract_reasoning_content_only_end_token(self, test_tokenizer): + """Test extraction when only end token is present.""" + parser = TestThinkingReasoningParser(test_tokenizer) + request = ChatCompletionRequest(messages=[], model="test-model") + + model_output = ("This is reasoningThis is content") + reasoning, content = parser.extract_reasoning_content( + model_output, request) + + assert reasoning == "This is reasoning" + assert content == "This is content" + + def test_extract_reasoning_content_no_end_token(self, test_tokenizer): + """Test extraction when no end token is present.""" + parser = TestThinkingReasoningParser(test_tokenizer) + request = ChatCompletionRequest(messages=[], model="test-model") + + model_output = "This is just content" + reasoning, content = parser.extract_reasoning_content( + model_output, request) + + assert reasoning == "This is just content" + assert content is None + + def test_extract_reasoning_content_empty_output(self, test_tokenizer): + """Test extraction with empty output.""" + parser = TestThinkingReasoningParser(test_tokenizer) + request = ChatCompletionRequest(messages=[], model="test-model") + + model_output = "" + reasoning, content = parser.extract_reasoning_content( + model_output, request) + + assert reasoning == "" + assert content is None + + def test_extract_reasoning_content_only_tokens(self, test_tokenizer): + """Test extraction with only tokens and no content.""" + parser = TestThinkingReasoningParser(test_tokenizer) + request = ChatCompletionRequest(messages=[], model="test-model") + + model_output = ("") + reasoning, content = parser.extract_reasoning_content( + model_output, request) + + assert reasoning == "" + assert content is None + + +class TestBaseThinkingReasoningParserStreaming: + """Test streaming functionality of BaseThinkingReasoningParser.""" + + @pytest.mark.parametrize("streaming", [True, False]) + def test_simple_reasoning_extraction(self, test_tokenizer, streaming): + """ + Test basic reasoning extraction in both + streaming and non-streaming modes. + """ + parser = TestThinkingReasoningParser(test_tokenizer) + + model_output = [ + "", "Some ", "reasoning ", "content", "", + "Final ", "answer" + ] + + reasoning, content = run_reasoning_extraction(parser, + model_output, + streaming=streaming) + + assert reasoning == "Some reasoning content" + assert content == "Final answer" + + def test_streaming_with_incremental_deltas(self, test_tokenizer): + """Test streaming processing with small incremental deltas.""" + parser = TestThinkingReasoningParser(test_tokenizer) + + deltas = [ + "", + "Some ", + "reasoning ", + "content", + "", + "Final ", + "answer", + ] + + reasoning, content = run_reasoning_extraction(parser, + deltas, + streaming=True) + + assert reasoning == "Some reasoning content" + assert content == "Final answer" + + def test_streaming_with_start_token(self, test_tokenizer): + """Test streaming with start token included.""" + parser = TestThinkingReasoningParser(test_tokenizer) + + deltas = [ + "", + "Some ", + "reasoning", + "", + "Answer", + ] + + reasoning, content = run_reasoning_extraction(parser, + deltas, + streaming=True) + + assert reasoning == "Some reasoning" + assert content == "Answer" + + def test_streaming_no_end_token(self, test_tokenizer): + """Test streaming when no end token is encountered.""" + parser = TestThinkingReasoningParser(test_tokenizer) + + deltas = [ + "", + "Some ", + "reasoning ", + "without ", + "end", + ] + + reasoning, content = run_reasoning_extraction(parser, + deltas, + streaming=True) + + assert reasoning == "Some reasoning without end" + assert content is None + + def test_streaming_only_end_token(self, test_tokenizer): + """Test streaming when only end token appears.""" + parser = TestThinkingReasoningParser(test_tokenizer) + + deltas = [ + "", + "Reasoning ", + "content", + "", + "Final", + ] + + reasoning, content = run_reasoning_extraction(parser, + deltas, + streaming=True) + + assert reasoning == "Reasoning content" + assert content == "Final" + + +class TestBaseThinkingReasoningParserMultipleImplementations: + """ + Test that multiple implementations of + BaseThinkingReasoningParser work correctly. + """ + + def test_different_token_implementations(self, test_tokenizer): + """ + Test that different implementations + with different tokens work independently. + """ + parser1 = TestThinkingReasoningParser(test_tokenizer) + parser2 = TestThinkingReasoningParserAlt(test_tokenizer) + + # Test parser1 + model_output1 = ("Reasoning1Content1") + reasoning1, content1 = run_reasoning_extraction( + parser1, [model_output1]) + assert reasoning1 == "Reasoning1" + assert content1 == "Content1" + + # Test parser2 + model_output2 = "Reasoning2Content2" + reasoning2, content2 = run_reasoning_extraction( + parser2, [model_output2]) + assert reasoning2 == "Reasoning2" + assert content2 == "Content2" + + # Verify tokens are different + assert parser1.start_token != parser2.start_token + assert parser1.end_token != parser2.end_token + assert parser1.start_token_id != parser2.start_token_id + assert parser1.end_token_id != parser2.end_token_id + + +class TestBaseThinkingReasoningParserEdgeCases: + """Test edge cases and error conditions.""" + + def test_multiple_end_tokens(self, test_tokenizer): + """Test behavior with multiple end tokens.""" + parser = TestThinkingReasoningParser(test_tokenizer) + + model_output = ("FirstMiddleLast") + reasoning, content = run_reasoning_extraction(parser, [model_output]) + + # Should stop at first end token + assert reasoning == "First" + assert content == "MiddleLast" + + def test_nested_tokens(self, test_tokenizer): + """Test behavior with nested-like token patterns.""" + parser = TestThinkingReasoningParser(test_tokenizer) + + model_output = ("Outer" + "InnerContent") + reasoning, content = run_reasoning_extraction(parser, [model_output]) + + # Should process normally, start from first start token + assert reasoning == "OuterInner" + assert content == "Content" + + def test_malformed_tokens(self, test_tokenizer): + """Test behavior with malformed token-like strings.""" + parser = TestThinkingReasoningParser(test_tokenizer) + + model_output = ("Not a real token" + "Content") + reasoning, content = run_reasoning_extraction(parser, [model_output]) + + # Should treat as regular content since tokens don't match exactly + assert reasoning == ("Not a real token" + "Content") + assert content is None diff --git a/tests/reasoning/test_seedoss_reasoning_parser.py b/tests/reasoning/test_seedoss_reasoning_parser.py new file mode 100644 index 000000000000..bb5dc0f4ffe4 --- /dev/null +++ b/tests/reasoning/test_seedoss_reasoning_parser.py @@ -0,0 +1,237 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project + +from typing import Any, cast + +import pytest +from transformers import AutoTokenizer + +from tests.reasoning.utils import run_reasoning_extraction +from vllm.reasoning import ReasoningParser, ReasoningParserManager + +parser_name = "seed_oss" +start_token = "" +end_token = "" + +# Use a test model that contains our custom tokens +REASONING_MODEL_NAME = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B" + + +@pytest.fixture(scope="module") +def seedoss_tokenizer(): + tokenizer = AutoTokenizer.from_pretrained(REASONING_MODEL_NAME) + # Add custom SeedOSS tokens if they don't exist + if start_token not in tokenizer.get_vocab(): + tokenizer.add_tokens([start_token, end_token]) + return tokenizer + + +SIMPLE_REASONING: dict[str, Any] = { + "output": "This is a reasoning sectionThis is the rest", + "reasoning_content": "This is a reasoning section", + "content": "This is the rest", + "is_reasoning_end": True, +} +COMPLETE_REASONING: dict[str, Any] = { + "output": "This is a reasoning section", + "reasoning_content": "This is a reasoning section", + "content": None, + "is_reasoning_end": True, +} +NO_CONTENT: dict[str, Any] = { + "output": "This is content", + "reasoning_content": "This is content", + "content": None, + "is_reasoning_end": False, +} +NO_REASONING_STREAMING: dict[str, Any] = { + "output": "This is a reasoning section", + "reasoning_content": "This is a reasoning section", + "content": None, + "is_reasoning_end": False, +} +MULTIPLE_LINES: dict[str, Any] = { + "output": "This\nThatThis is the rest\nThat", + "reasoning_content": "This\nThat", + "content": "This is the rest\nThat", + "is_reasoning_end": True, +} +WITH_START_TOKEN: dict[str, Any] = { + "output": ("This is a reasoning section" + "This is the rest"), + "reasoning_content": + "This is a reasoning section", + "content": + "This is the rest", + "is_reasoning_end": + True, +} +ONLY_END_TOKEN: dict[str, Any] = { + "output": "Some reasoningThis is the rest", + "reasoning_content": "Some reasoning", + "content": "This is the rest", + "is_reasoning_end": True, +} +NO_TOKENS: dict[str, Any] = { + "output": "This is just content without any reasoning tokens", + "reasoning_content": "This is just content without any reasoning tokens", + "content": None, + "is_reasoning_end": False, +} + + +def test_seedoss_reasoning_parser_creation(seedoss_tokenizer): + """Test that the SeedOSS reasoning parser can be created and registered.""" + parser_cls = ReasoningParserManager.get_reasoning_parser(parser_name) + parser = parser_cls(seedoss_tokenizer) + assert isinstance(parser, ReasoningParser) + assert parser.start_token == start_token + assert parser.end_token == end_token + + +@pytest.mark.parametrize("streaming", [True, False]) +def test_simple_reasoning(seedoss_tokenizer, streaming): + """Test basic reasoning extraction with both tokens.""" + parser_cls = ReasoningParserManager.get_reasoning_parser(parser_name) + parser = parser_cls(seedoss_tokenizer) + + reasoning, content = run_reasoning_extraction( + parser, [cast(str, SIMPLE_REASONING["output"])], streaming=streaming) + + assert reasoning == SIMPLE_REASONING["reasoning_content"] + assert content == SIMPLE_REASONING["content"] + + +@pytest.mark.parametrize("streaming", [True, False]) +def test_complete_reasoning(seedoss_tokenizer, streaming): + """Test reasoning extraction when there's no content after reasoning.""" + parser_cls = ReasoningParserManager.get_reasoning_parser(parser_name) + parser = parser_cls(seedoss_tokenizer) + + reasoning, content = run_reasoning_extraction( + parser, [cast(str, COMPLETE_REASONING["output"])], streaming=streaming) + + assert reasoning == COMPLETE_REASONING["reasoning_content"] + assert content == COMPLETE_REASONING["content"] + + +@pytest.mark.parametrize("streaming", [True, False]) +def test_no_content(seedoss_tokenizer, streaming): + """Test when there's no end token - everything is reasoning content.""" + parser_cls = ReasoningParserManager.get_reasoning_parser(parser_name) + parser = parser_cls(seedoss_tokenizer) + + reasoning, content = run_reasoning_extraction( + parser, [cast(str, NO_CONTENT["output"])], streaming=streaming) + + assert reasoning == NO_CONTENT["reasoning_content"] + assert content == NO_CONTENT["content"] + + +@pytest.mark.parametrize("streaming", [True, False]) +def test_multiple_lines(seedoss_tokenizer, streaming): + """Test reasoning extraction with multiline content.""" + parser_cls = ReasoningParserManager.get_reasoning_parser(parser_name) + parser = parser_cls(seedoss_tokenizer) + + reasoning, content = run_reasoning_extraction( + parser, [cast(str, MULTIPLE_LINES["output"])], streaming=streaming) + + assert reasoning == MULTIPLE_LINES["reasoning_content"] + assert content == MULTIPLE_LINES["content"] + + +@pytest.mark.parametrize("streaming", [True, False]) +def test_with_start_token(seedoss_tokenizer, streaming): + """Test reasoning extraction with both start and end tokens.""" + parser_cls = ReasoningParserManager.get_reasoning_parser(parser_name) + parser = parser_cls(seedoss_tokenizer) + + reasoning, content = run_reasoning_extraction( + parser, [cast(str, WITH_START_TOKEN["output"])], streaming=streaming) + + assert reasoning == WITH_START_TOKEN["reasoning_content"] + assert content == WITH_START_TOKEN["content"] + + +@pytest.mark.parametrize("streaming", [True, False]) +def test_only_end_token(seedoss_tokenizer, streaming): + """ + Test reasoning extraction with only end token + (SeedOSS typical behavior). + """ + parser_cls = ReasoningParserManager.get_reasoning_parser(parser_name) + parser = parser_cls(seedoss_tokenizer) + + reasoning, content = run_reasoning_extraction( + parser, [cast(str, ONLY_END_TOKEN["output"])], streaming=streaming) + + assert reasoning == ONLY_END_TOKEN["reasoning_content"] + assert content == ONLY_END_TOKEN["content"] + + +@pytest.mark.parametrize("streaming", [True, False]) +def test_no_tokens(seedoss_tokenizer, streaming): + """Test when there are no reasoning tokens at all.""" + parser_cls = ReasoningParserManager.get_reasoning_parser(parser_name) + parser = parser_cls(seedoss_tokenizer) + + reasoning, content = run_reasoning_extraction( + parser, [cast(str, NO_TOKENS["output"])], streaming=streaming) + + assert reasoning == NO_TOKENS["reasoning_content"] + assert content == NO_TOKENS["content"] + + +def test_is_reasoning_end(seedoss_tokenizer): + """Test the is_reasoning_end method.""" + parser_cls = ReasoningParserManager.get_reasoning_parser(parser_name) + parser = parser_cls(seedoss_tokenizer) + + # Test with end token present + end_token_id = parser.end_token_id + assert parser.is_reasoning_end([1, 2, end_token_id, 4]) is True + + # Test without end token + assert parser.is_reasoning_end([1, 2, 3, 4]) is False + + +def test_extract_content_ids(seedoss_tokenizer): + """Test the extract_content_ids method.""" + parser_cls = ReasoningParserManager.get_reasoning_parser(parser_name) + parser = parser_cls(seedoss_tokenizer) + + end_token_id = parser.end_token_id + + # Test with end token in the middle + input_ids = [1, 2, end_token_id, 4, 5] + content_ids = parser.extract_content_ids(input_ids) + assert content_ids == [4, 5] + + # Test with end token at the end + input_ids = [1, 2, 3, end_token_id] + content_ids = parser.extract_content_ids(input_ids) + assert content_ids == [] + + # Test without end token + input_ids = [1, 2, 3, 4] + content_ids = parser.extract_content_ids(input_ids) + assert content_ids == [] + + +def test_streaming_delta_processing(seedoss_tokenizer): + """Test streaming processing with small deltas.""" + parser_cls = ReasoningParserManager.get_reasoning_parser(parser_name) + parser = parser_cls(seedoss_tokenizer) + + # Test streaming with incremental tokens + deltas = [ + "Some ", "reasoning ", "content", "", "Final ", "answer" + ] + + reasoning, content = run_reasoning_extraction(parser, + deltas, + streaming=True) + + assert reasoning == "Some reasoning content" + assert content == "Final answer" diff --git a/vllm/reasoning/__init__.py b/vllm/reasoning/__init__.py index b987adeb6428..3c8a9c6ae0d3 100644 --- a/vllm/reasoning/__init__.py +++ b/vllm/reasoning/__init__.py @@ -2,6 +2,7 @@ # SPDX-FileCopyrightText: Copyright contributors to the vLLM project from .abs_reasoning_parsers import ReasoningParser, ReasoningParserManager +from .basic_parsers import BaseThinkingReasoningParser from .deepseek_r1_reasoning_parser import DeepSeekR1ReasoningParser from .glm4_moe_reasoning_parser import Glm4MoeModelReasoningParser from .gptoss_reasoning_parser import GptOssReasoningParser @@ -9,10 +10,12 @@ from .hunyuan_a13b_reasoning_parser import HunyuanA13BReasoningParser from .mistral_reasoning_parser import MistralReasoningParser from .qwen3_reasoning_parser import Qwen3ReasoningParser +from .seedoss_reasoning_parser import SeedOSSReasoningParser from .step3_reasoning_parser import Step3ReasoningParser __all__ = [ "ReasoningParser", + "BaseThinkingReasoningParser", "ReasoningParserManager", "DeepSeekR1ReasoningParser", "GraniteReasoningParser", @@ -22,4 +25,5 @@ "MistralReasoningParser", "Step3ReasoningParser", "GptOssReasoningParser", + "SeedOSSReasoningParser", ] diff --git a/vllm/reasoning/abs_reasoning_parsers.py b/vllm/reasoning/abs_reasoning_parsers.py index df9e84163f16..39b08ec11107 100644 --- a/vllm/reasoning/abs_reasoning_parsers.py +++ b/vllm/reasoning/abs_reasoning_parsers.py @@ -7,7 +7,7 @@ from abc import abstractmethod from collections.abc import Sequence from functools import cached_property -from typing import TYPE_CHECKING, Any, Callable, Optional, Union +from typing import TYPE_CHECKING, Any, Callable, Union from vllm.logger import init_logger from vllm.utils import import_from_path, is_list_of @@ -77,7 +77,7 @@ def extract_reasoning_content( self, model_output: str, request: Union[ChatCompletionRequest, ResponsesRequest], - ) -> tuple[Optional[str], Optional[str]]: + ) -> tuple[str | None, str | None]: """ Extract reasoning content from a complete model-generated string. @@ -135,7 +135,7 @@ def get_reasoning_parser(cls, name: str | None) -> type[ReasoningParser]: def _register_module( cls, module: type, - module_name: Optional[Union[str, list[str]]] = None, + module_name: Union[str, list[str]] | None = None, force: bool = True, ) -> None: if not issubclass(module, ReasoningParser): @@ -155,7 +155,7 @@ def _register_module( @classmethod def register_module( cls, - name: Optional[Union[str, list[str]]] = None, + name: Union[str, list[str]] | None = None, force: bool = True, module: Union[type, None] = None, ) -> Union[type, Callable]: diff --git a/vllm/reasoning/basic_parsers.py b/vllm/reasoning/basic_parsers.py new file mode 100644 index 000000000000..03cb882c2693 --- /dev/null +++ b/vllm/reasoning/basic_parsers.py @@ -0,0 +1,156 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project + +from abc import abstractmethod +from collections.abc import Sequence +from typing import Optional, Union + +from vllm.entrypoints.openai.protocol import (ChatCompletionRequest, + DeltaMessage, ResponsesRequest) +from vllm.reasoning.abs_reasoning_parsers import ReasoningParser +from vllm.transformers_utils.tokenizer import AnyTokenizer + + +class BaseThinkingReasoningParser(ReasoningParser): + """ + Base class for reasoning parsers that use thinking tokens. + + This class provides common functionality for parsers that use start and end + tokens to delimit reasoning content ( + e.g., ..., ...). + + Subclasses must implement the start and end tokens via abstract + properties. + """ + + @property + @abstractmethod + def start_token(self) -> str: + """The token that starts reasoning content.""" + raise NotImplementedError + + @property + @abstractmethod + def end_token(self) -> str: + """The token that ends reasoning content.""" + raise NotImplementedError + + def __init__(self, tokenizer: AnyTokenizer): + super().__init__(tokenizer) + + if not self.model_tokenizer: + raise ValueError( + "The model tokenizer must be passed to the ReasoningParser " + "constructor during construction.") + + if not self.start_token or not self.end_token: + raise ValueError( + "start_token and end_token must be defined in subclasses") + + self.start_token_id = self.vocab.get(self.start_token) + self.end_token_id = self.vocab.get(self.end_token) + if self.start_token_id is None or self.end_token_id is None: + raise RuntimeError( + f"{self.__class__.__name__} reasoning parser could not locate " + "think start/end tokens in the tokenizer!") + + def is_reasoning_end(self, input_ids: list[int]) -> bool: + return self.end_token_id in input_ids + + def extract_content_ids(self, input_ids: list[int]) -> list[int]: + """ + Extract the content after the end tokens + """ + if self.end_token_id not in input_ids[:-1]: + return [] + else: + return input_ids[input_ids.index(self.end_token_id) + 1:] + + def extract_reasoning_content_streaming( + self, + previous_text: str, + current_text: str, + delta_text: str, + previous_token_ids: Sequence[int], + current_token_ids: Sequence[int], + delta_token_ids: Sequence[int], + ) -> Union[DeltaMessage, None]: + """ + Extract reasoning content from a delta message. + Handles streaming output where previous + delta = current. + Uses token IDs for faster processing. + """ + # Skip single special tokens + if len(delta_token_ids) == 1 and (delta_token_ids[0] in [ + self.start_token_id, self.end_token_id + ]): + return None + + # Check if start token is present in previous or delta. + # Keep compatibility with models that don't generate start tokens. + if self.start_token_id in previous_token_ids: + if self.end_token_id in delta_token_ids: + # start token in previous, end token in delta, + # extract reasoning content + end_index = delta_text.find(self.end_token) + reasoning_content = delta_text[:end_index] + content = delta_text[end_index + len(self.end_token):] + return DeltaMessage( + reasoning_content=reasoning_content, + content=content if content else None, + ) + elif self.end_token_id in previous_token_ids: + # start token in previous, end token in previous, + # reasoning content continues + return DeltaMessage(content=delta_text) + else: + # start token in previous, no end token in previous or delta, + # reasoning content continues + return DeltaMessage(reasoning_content=delta_text) + elif self.start_token_id in delta_token_ids: + if self.end_token_id in delta_token_ids: + # start token in delta, end token in delta, + # extract reasoning content + start_index = delta_text.find(self.start_token) + end_index = delta_text.find(self.end_token) + reasoning_content = delta_text[start_index + + len(self.start_token):end_index] + content = delta_text[end_index + len(self.end_token):] + return DeltaMessage( + reasoning_content=reasoning_content, + content=content if content else None, + ) + else: + # start token in delta, no end token in delta, + # reasoning content continues + return DeltaMessage(reasoning_content=delta_text) + else: + # not find thinking start token + return DeltaMessage(content=delta_text) + + def extract_reasoning_content( + self, model_output: str, request: Union[ChatCompletionRequest, + ResponsesRequest] + ) -> tuple[Optional[str], Optional[str]]: + """ + Extract reasoning content from the model output. + + This is the base implementation that works for most models. + Subclasses can override this method for specific behavior. + """ + # Check if the start token is present in the model output, remove it + # if it is present. + model_output_parts = model_output.partition(self.start_token) + model_output = model_output_parts[2] if model_output_parts[ + 1] else model_output_parts[0] + + # For models that may not generate start token, + # assume the reasoning content is always at the start. + if self.end_token not in model_output: + return model_output, None + else: + reasoning_content, _, content = model_output.partition( + self.end_token) + # If generation stops right after end-of-think, return null content + final_content = content or None + return reasoning_content, final_content diff --git a/vllm/reasoning/deepseek_r1_reasoning_parser.py b/vllm/reasoning/deepseek_r1_reasoning_parser.py index 1a5ca46a60f1..76d2959e1c9a 100644 --- a/vllm/reasoning/deepseek_r1_reasoning_parser.py +++ b/vllm/reasoning/deepseek_r1_reasoning_parser.py @@ -2,20 +2,15 @@ # SPDX-FileCopyrightText: Copyright contributors to the vLLM project from collections.abc import Sequence -from typing import Optional, Union +from typing import Union -from transformers import PreTrainedTokenizerBase - -from vllm.entrypoints.openai.protocol import (ChatCompletionRequest, - DeltaMessage) -from vllm.logger import init_logger -from vllm.reasoning import ReasoningParser, ReasoningParserManager - -logger = init_logger(__name__) +from vllm.entrypoints.openai.protocol import DeltaMessage +from vllm.reasoning.abs_reasoning_parsers import ReasoningParserManager +from vllm.reasoning.basic_parsers import BaseThinkingReasoningParser @ReasoningParserManager.register_module("deepseek_r1") -class DeepSeekR1ReasoningParser(ReasoningParser): +class DeepSeekR1ReasoningParser(BaseThinkingReasoningParser): """ Reasoning parser for DeepSeek R1 model. @@ -23,38 +18,15 @@ class DeepSeekR1ReasoningParser(ReasoningParser): text. This parser extracts the reasoning content from the model output. """ - start_token_id: int - end_token_id: int - - start_token: str = "" - end_token: str = "" - - def __init__(self, tokenizer: PreTrainedTokenizerBase): - super().__init__(tokenizer) - - if not self.model_tokenizer: - raise ValueError( - "The model tokenizer must be passed to the ReasoningParser " - "constructor during construction.") + @property + def start_token(self) -> str: + """The token that starts reasoning content.""" + return "" - self.start_token_id = self.vocab.get(self.start_token) - self.end_token_id = self.vocab.get(self.end_token) - if self.start_token_id is None or self.end_token_id is None: - raise RuntimeError( - "DeepSeek R1 reasoning parser could not locate think start/end " - "tokens in the tokenizer!") - - def is_reasoning_end(self, input_ids: list[int]) -> bool: - return self.end_token_id in input_ids - - def extract_content_ids(self, input_ids: list[int]) -> list[int]: - """ - Extract the content after the end tokens - """ - if self.end_token_id not in input_ids[:-1]: - return [] - else: - return input_ids[input_ids.index(self.end_token_id) + 1:] + @property + def end_token(self) -> str: + """The token that ends reasoning content.""" + return "" def extract_reasoning_content_streaming( self, @@ -65,63 +37,18 @@ def extract_reasoning_content_streaming( current_token_ids: Sequence[int], delta_token_ids: Sequence[int], ) -> Union[DeltaMessage, None]: - """ - Extract reasoning content from a delta message. - Handles streaming output where previous + delta = current. - Uses token IDs for faster processing. - For text abcxyz: - - 'abc' goes to reasoning_content - - 'xyz' goes to content - """ - # Skip single special tokens - if len(delta_token_ids) == 1 and (delta_token_ids[0] in [ - self.start_token_id, self.end_token_id - ]): - return None - - # Check if is present in previous or delta. - # Keep compatibility with models that don't generate tokens. - if self.start_token_id in previous_token_ids: + ret = super().extract_reasoning_content_streaming( + previous_text, + current_text, + delta_text, + previous_token_ids, + current_token_ids, + delta_token_ids, + ) + if (ret is not None and self.start_token_id not in previous_token_ids + and self.start_token_id not in delta_token_ids): if self.end_token_id in delta_token_ids: - # in previous, in delta, - # extract reasoning content - end_index = delta_text.find(self.end_token) - reasoning_content = delta_text[:end_index] - content = delta_text[end_index + len(self.end_token):] - return DeltaMessage( - reasoning_content=reasoning_content, - content=content if content else None, - ) - elif self.end_token_id in previous_token_ids: - # in previous, in previous, - # reasoning content continues - return DeltaMessage(content=delta_text) - else: - # in previous, no in previous or delta, - # reasoning content continues - return DeltaMessage(reasoning_content=delta_text) - elif self.start_token_id in delta_token_ids: - if self.end_token_id in delta_token_ids: - # in delta, in delta, extract reasoning content - start_index = delta_text.find(self.start_token) - end_index = delta_text.find(self.end_token) - reasoning_content = delta_text[start_index + - len(self.start_token):end_index] - content = delta_text[end_index + len(self.end_token):] - return DeltaMessage( - reasoning_content=reasoning_content, - content=content if content else None, - ) - else: - # in delta, no in delta, - # reasoning content continues - return DeltaMessage(reasoning_content=delta_text) - else: - # No in previous or delta, also need to check for . - # Because the model may have generated without - # Ref https://huggingface.co/deepseek-ai/DeepSeek-R1/commit/8a58a132790c9935686eb97f042afa8013451c9f - if self.end_token_id in delta_token_ids: - # in delta with more tokens, + # end token in delta with more tokens, # extract reasoning content and content end_index = delta_text.find(self.end_token) reasoning_content = delta_text[:end_index] @@ -131,43 +58,10 @@ def extract_reasoning_content_streaming( content=content if content else None, ) elif self.end_token_id in previous_token_ids: - # in previous, thinking content ends + # end token in previous, thinking content ends return DeltaMessage(content=delta_text) else: - # no in previous or delta, reasoning content continues + # no end token in previous or delta, reasoning content continues return DeltaMessage(reasoning_content=delta_text) - def extract_reasoning_content( - self, model_output: str, request: ChatCompletionRequest - ) -> tuple[Optional[str], Optional[str]]: - """ - Extract reasoning content from the model output. - - For text abcxyz: - - 'abc' goes to reasoning_content - - 'xyz' goes to content - - Returns: - tuple[Optional[str], Optional[str]]: reasoning content and content - """ - - # Check if the start token is present in the model output, remove it - # if it is present. - model_output_parts = model_output.partition(self.start_token) - model_output = model_output_parts[2] if model_output_parts[ - 1] else model_output_parts[0] - - # DeepSeek R1 doesn't generate now. - # Thus we assume the reasoning content is always at the start. - # Ref https://huggingface.co/deepseek-ai/DeepSeek-R1/commit/8a58a132790c9935686eb97f042afa8013451c9f - if self.end_token not in model_output: - return model_output, None - else: - reasoning_content, _, content = model_output.partition( - self.end_token) - # If the end token is not found, return the model output as is. - # It should not happen since we already checked for the presence - # of the end token. - # If generation stops right after end-of-think, return null content - final_content = content or None - return reasoning_content, final_content + return ret diff --git a/vllm/reasoning/mistral_reasoning_parser.py b/vllm/reasoning/mistral_reasoning_parser.py index 6c707a4079fa..5cb54e6acbb3 100644 --- a/vllm/reasoning/mistral_reasoning_parser.py +++ b/vllm/reasoning/mistral_reasoning_parser.py @@ -1,6 +1,8 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project +from functools import cached_property + from vllm.logger import init_logger from vllm.reasoning import ReasoningParser, ReasoningParserManager from vllm.reasoning.deepseek_r1_reasoning_parser import ( @@ -31,11 +33,6 @@ def __init__(self, tokenizer: MistralTokenizer): "The model tokenizer must be passed to the ReasoningParser " "constructor during construction.") - from mistral_common.tokens.tokenizers.base import SpecialTokens - - self.start_token = SpecialTokens.begin_think - self.end_token = SpecialTokens.end_think - self.start_token_id = tokenizer.tokenizer.get_control_token( self.start_token) self.end_token_id = tokenizer.tokenizer.get_control_token( @@ -45,3 +42,15 @@ def __init__(self, tokenizer: MistralTokenizer): raise RuntimeError( "Mistral reasoning parser could not locate think start/end " "tokens in the tokenizer!") + + @cached_property + def start_token(self) -> str: + """The token that starts reasoning content.""" + from mistral_common.tokens.tokenizers.base import SpecialTokens + return SpecialTokens.begin_think + + @cached_property + def end_token(self) -> str: + """The token that ends reasoning content.""" + from mistral_common.tokens.tokenizers.base import SpecialTokens + return SpecialTokens.end_think diff --git a/vllm/reasoning/qwen3_reasoning_parser.py b/vllm/reasoning/qwen3_reasoning_parser.py index 61bafc724c17..3e3c7f32796b 100644 --- a/vllm/reasoning/qwen3_reasoning_parser.py +++ b/vllm/reasoning/qwen3_reasoning_parser.py @@ -1,21 +1,16 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -from collections.abc import Sequence from typing import Optional, Union -from transformers import PreTrainedTokenizerBase - from vllm.entrypoints.openai.protocol import (ChatCompletionRequest, - DeltaMessage) -from vllm.logger import init_logger -from vllm.reasoning import ReasoningParser, ReasoningParserManager - -logger = init_logger(__name__) + ResponsesRequest) +from vllm.reasoning.abs_reasoning_parsers import ReasoningParserManager +from vllm.reasoning.basic_parsers import BaseThinkingReasoningParser @ReasoningParserManager.register_module("qwen3") -class Qwen3ReasoningParser(ReasoningParser): +class Qwen3ReasoningParser(BaseThinkingReasoningParser): """ Reasoning parser for the Qwen3 model. @@ -26,100 +21,25 @@ class Qwen3ReasoningParser(ReasoningParser): output. """ - def __init__(self, tokenizer: PreTrainedTokenizerBase): - super().__init__(tokenizer) - self.think_start_token = "" - self.think_end_token = "" - - if not self.model_tokenizer: - raise ValueError( - "The model tokenizer must be passed to the ReasoningParser " - "constructor during construction.") - - self.think_start_token_id = self.vocab.get(self.think_start_token) - self.think_end_token_id = self.vocab.get(self.think_end_token) - if (self.think_start_token_id is None - or self.think_end_token_id is None): - raise RuntimeError( - "Qwen3 reasoning parser could not locate think start/end " - "tokens in the tokenizer!") - - def is_reasoning_end(self, input_ids: list[int]) -> bool: - return self.think_end_token_id in input_ids - - def extract_content_ids(self, input_ids: list[int]) -> list[int]: - """ - Extract the content after the end tokens - """ - if self.think_end_token_id not in input_ids[:-1]: - return [] - else: - return input_ids[input_ids.index(self.think_end_token_id) + 1:] - - def extract_reasoning_content_streaming( - self, - previous_text: str, - current_text: str, - delta_text: str, - previous_token_ids: Sequence[int], - current_token_ids: Sequence[int], - delta_token_ids: Sequence[int], - ) -> Union[DeltaMessage, None]: - """ - Extract reasoning content from a delta message. - Handles streaming output where previous + delta = current. - Uses token IDs for faster processing. - For text abcxyz: - - 'abc' goes to reasoning_content - - 'xyz' goes to content - """ - # Skip single special tokens - if len(delta_token_ids) == 1 and (delta_token_ids[0] in [ - self.think_start_token_id, self.think_end_token_id - ]): - return None + @property + def start_token(self) -> str: + """The token that starts reasoning content.""" + return "" - if self.think_start_token_id in previous_token_ids: - if self.think_end_token_id in delta_token_ids: - # in previous, in delta, - # extract reasoning content - end_index = delta_text.find(self.think_end_token) - reasoning_content = delta_text[:end_index] - content = delta_text[end_index + len(self.think_end_token):] - return DeltaMessage(reasoning_content=reasoning_content, - content=content if content else None) - elif self.think_end_token_id in previous_token_ids: - # in previous, in previous, - # reasoning content continues - return DeltaMessage(content=delta_text) - else: - # in previous, no in previous or delta, - # reasoning content continues - return DeltaMessage(reasoning_content=delta_text) - elif self.think_start_token_id in delta_token_ids: - if self.think_end_token_id in delta_token_ids: - # in delta, in delta, extract reasoning content - start_index = delta_text.find(self.think_start_token) - end_index = delta_text.find(self.think_end_token) - reasoning_content = delta_text[start_index + - len(self.think_start_token - ):end_index] - content = delta_text[end_index + len(self.think_end_token):] - return DeltaMessage(reasoning_content=reasoning_content, - content=content if content else None) - else: - # in delta, no in delta, - # reasoning content continues - return DeltaMessage(reasoning_content=delta_text) - else: - # thinking is disabled, just content - return DeltaMessage(content=delta_text) + @property + def end_token(self) -> str: + """The token that ends reasoning content.""" + return "" def extract_reasoning_content( - self, model_output: str, request: ChatCompletionRequest + self, model_output: str, request: Union[ChatCompletionRequest, + ResponsesRequest] ) -> tuple[Optional[str], Optional[str]]: """ Extract reasoning content from the model output. + + Qwen3 has stricter requirements - it needs both start and end tokens + to be present, unlike other models that work with just the end token. For text abcxyz: - 'abc' goes to reasoning_content @@ -129,23 +49,24 @@ def extract_reasoning_content( tuple[Optional[str], Optional[str]]: reasoning content and content """ - # Check if the model output contains the and tokens. - if (self.think_start_token not in model_output - or self.think_end_token not in model_output): + # Check if the model output contains both and tokens. + if (self.start_token not in model_output + or self.end_token not in model_output): return None, model_output + # Check if the is present in the model output, remove it # if it is present. - model_output_parts = model_output.partition(self.think_start_token) + model_output_parts = model_output.partition(self.start_token) model_output = model_output_parts[2] if model_output_parts[ 1] else model_output_parts[0] + # Check if the model output contains the tokens. # If the end token is not found, return the model output as is. - if self.think_end_token not in model_output: + if self.end_token not in model_output: return None, model_output # Extract reasoning content from the model output. - reasoning_content, _, content = model_output.partition( - self.think_end_token) + reasoning_content, _, content = model_output.partition(self.end_token) final_content = content or None return reasoning_content, final_content diff --git a/vllm/reasoning/seedoss_reasoning_parser.py b/vllm/reasoning/seedoss_reasoning_parser.py new file mode 100644 index 000000000000..5f4bbbf1557e --- /dev/null +++ b/vllm/reasoning/seedoss_reasoning_parser.py @@ -0,0 +1,28 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project + +from vllm.reasoning.abs_reasoning_parsers import ReasoningParserManager +from vllm.reasoning.basic_parsers import BaseThinkingReasoningParser + + +@ReasoningParserManager.register_module("seed_oss") +class SeedOSSReasoningParser(BaseThinkingReasoningParser): + """ + Reasoning parser for SeedOSS model. + + The SeedOSS model uses ... tokens to + denote reasoning content text. This parser extracts + the reasoning content from the model output. + Similar to DeepSeek R1, it supports cases + where the model doesn't generate the start token. + """ + + @property + def start_token(self) -> str: + """The token that starts reasoning content.""" + return "" + + @property + def end_token(self) -> str: + """The token that ends reasoning content.""" + return "" From d06b5a95cbd5114478ca87dabac3156a33bb6bca Mon Sep 17 00:00:00 2001 From: baxingpiaochong <771405853@qq.com> Date: Wed, 24 Sep 2025 08:19:04 +0800 Subject: [PATCH 315/518] [V1][Metrics] Add per-request TPOT histogram (#24015) Signed-off-by: baxingpiaochong <771405853@qq.com> --- vllm/v1/metrics/loggers.py | 15 +++++++++++++++ vllm/v1/metrics/stats.py | 10 +++++++++- 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/vllm/v1/metrics/loggers.py b/vllm/v1/metrics/loggers.py index f0076b2d81db..52264e41e7a1 100644 --- a/vllm/v1/metrics/loggers.py +++ b/vllm/v1/metrics/loggers.py @@ -411,6 +411,19 @@ def __init__(self, self.histogram_inter_token_latency = make_per_engine( histogram_inter_token_latency, engine_indexes, model_name) + histogram_request_time_per_output_token = self._histogram_cls( + name="vllm:request_time_per_output_token_seconds", + documentation= + "Histogram of time_per_output_token_seconds per request.", + buckets=[ + 0.01, 0.025, 0.05, 0.075, 0.1, 0.15, 0.2, 0.3, 0.4, 0.5, 0.75, + 1.0, 2.5, 5.0, 7.5, 10.0, 20.0, 40.0, 80.0 + ], + labelnames=labelnames) + self.histogram_request_time_per_output_token = make_per_engine( + histogram_request_time_per_output_token, engine_indexes, + model_name) + request_latency_buckets = [ 0.3, 0.5, 0.8, 1.0, 1.5, 2.0, 2.5, 5.0, 10.0, 15.0, 20.0, 30.0, 40.0, 50.0, 60.0, 120.0, 240.0, 480.0, 960.0, 1920.0, 7680.0 @@ -583,6 +596,8 @@ def record(self, finished_request.num_prompt_tokens) self.histogram_num_generation_tokens_request[engine_idx].observe( finished_request.num_generation_tokens) + self.histogram_request_time_per_output_token[engine_idx].observe( + finished_request.mean_time_per_output_token) if finished_request.max_tokens_param: self.histogram_max_tokens_request[engine_idx].observe( finished_request.max_tokens_param) diff --git a/vllm/v1/metrics/stats.py b/vllm/v1/metrics/stats.py index 0eff557336bc..296c39e8cdb5 100644 --- a/vllm/v1/metrics/stats.py +++ b/vllm/v1/metrics/stats.py @@ -86,6 +86,7 @@ class FinishedRequestStats: prefill_time: float = 0.0 inference_time: float = 0.0 decode_time: float = 0.0 + mean_time_per_output_token: float = 0.0 class IterationStats: @@ -177,6 +178,12 @@ def update_from_finished_request(self, finish_reason: "FinishReason", # Any preemptions during prefill or decode are included inference_time = req_stats.last_token_ts - req_stats.scheduled_ts + # Do not count the token generated by the prefill phase + mean_time_per_output_token = (decode_time / + (req_stats.num_generation_tokens - 1) + if req_stats.num_generation_tokens - + 1 > 0 else 0) + finished_req = \ FinishedRequestStats(finish_reason=finish_reason, e2e_latency=e2e_latency, @@ -186,7 +193,8 @@ def update_from_finished_request(self, finish_reason: "FinishReason", queued_time=queued_time, prefill_time=prefill_time, inference_time=inference_time, - decode_time=decode_time) + decode_time=decode_time, + mean_time_per_output_token=mean_time_per_output_token) self.finished_requests.append(finished_req) From 1983609239caaab24ab1ed2bfa2aa92e8c76c1b1 Mon Sep 17 00:00:00 2001 From: Benjamin Chislett Date: Tue, 23 Sep 2025 20:19:56 -0400 Subject: [PATCH 316/518] [Bugfix] Use a separate FlashInfer workspace buffer for trtllm-gen (#25520) --- vllm/v1/attention/backends/flashinfer.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/vllm/v1/attention/backends/flashinfer.py b/vllm/v1/attention/backends/flashinfer.py index cb092aa74e7f..1a5c171430bc 100755 --- a/vllm/v1/attention/backends/flashinfer.py +++ b/vllm/v1/attention/backends/flashinfer.py @@ -48,6 +48,16 @@ logger = init_logger(__name__) +trtllm_gen_workspace_buffer = None + + +def _get_trtllm_gen_workspace_buffer(): + global trtllm_gen_workspace_buffer + if trtllm_gen_workspace_buffer is None: + trtllm_gen_workspace_buffer = torch.zeros( + FLASHINFER_WORKSPACE_BUFFER_SIZE, dtype=torch.uint8, device='cuda') + return trtllm_gen_workspace_buffer + @triton.jit def _trtllm_prefill_attn_kvfp8_dequant( @@ -862,7 +872,7 @@ def forward( else: # prefill_query may be non-contiguous prefill_query = prefill_query.contiguous() - workspace_buffer = prefill_wrapper._float_workspace_buffer + workspace_buffer = _get_trtllm_gen_workspace_buffer() block_tables_prefill = attn_metadata.block_table_tensor[ num_decode_tokens:] seq_lens_prefill = attn_metadata.seq_lens[num_decode_tokens:] @@ -943,7 +953,7 @@ def forward( else: # decode_query may be non-contiguous decode_query = decode_query.contiguous() - workspace_buffer = decode_wrapper._float_workspace_buffer + workspace_buffer = _get_trtllm_gen_workspace_buffer() block_tables_decode = attn_metadata.\ block_table_tensor[:num_decode_tokens] seq_lens_decode = attn_metadata.seq_lens[:num_decode_tokens] From de94289a98d7ec52a5ef02719e01a1db8b505170 Mon Sep 17 00:00:00 2001 From: Kyle Sayers Date: Wed, 24 Sep 2025 01:30:26 +0100 Subject: [PATCH 317/518] [Core] Support weight_loader_v2 for `UnquantizedLinearMethod` (#23036) Signed-off-by: Kyle Sayers --- vllm/compilation/decorators.py | 43 ++++++++++++++++++++++++---- vllm/model_executor/layers/linear.py | 16 +++++++---- vllm/model_executor/parameter.py | 23 ++++++++++++++- 3 files changed, 70 insertions(+), 12 deletions(-) diff --git a/vllm/compilation/decorators.py b/vllm/compilation/decorators.py index b7a6e23c1aa7..6e9a36a2b0b9 100644 --- a/vllm/compilation/decorators.py +++ b/vllm/compilation/decorators.py @@ -8,6 +8,7 @@ import torch import torch.nn as nn +from packaging import version from torch._dynamo.symbolic_convert import InliningInstructionTranslator from vllm.compilation.counter import compilation_counter @@ -300,13 +301,13 @@ def patched_inline_call(parent, func, args, kwargs): logger.debug( "enable_cpp_symbolic_shape_guards config not available") - with patch.object(InliningInstructionTranslator, 'inline_call', - patched_inline_call), torch._dynamo.config.patch( - **dynamo_config_patches - ), maybe_use_cudagraph_partition_wrapper( - self.vllm_config): + with patch.object( + InliningInstructionTranslator, "inline_call", + patched_inline_call), torch._dynamo.config.patch( + **dynamo_config_patches + ), maybe_use_cudagraph_partition_wrapper( + self.vllm_config), _torch27_patch_tensor_subclasses(): output = self.compiled_callable(*args, **kwargs) - return output # usually, capturing the model once is enough, and then we can @@ -367,3 +368,33 @@ def customized_cudagraph_wrapper(f, if (compilation_config.cudagraph_mode != CUDAGraphMode.NONE and compilation_config.use_inductor_graph_partition): torch._inductor.utils.set_customized_partition_wrappers(None) + + +@contextlib.contextmanager +def _torch27_patch_tensor_subclasses(): + """ + Add support for using tensor subclasses (ie `BasevLLMParameter`, ect) when + using torch 2.7.0. This enables using weight_loader_v2 and the use of + `BasevLLMParameters` without having to replace them with regular tensors + before `torch.compile`-time. + """ + from vllm.model_executor.parameter import (BasevLLMParameter, + ModelWeightParameter, + RowvLLMParameter, + _ColumnvLLMParameter) + + def return_false(*args, **kwargs): + return False + + if version.parse("2.7") <= version.parse( + torch.__version__) < version.parse("2.8"): + yield + return + + with (torch._dynamo.config.patch("traceable_tensor_subclasses", [ + BasevLLMParameter, ModelWeightParameter, _ColumnvLLMParameter, + RowvLLMParameter + ]), + patch("torch._dynamo.variables.torch.can_dispatch_torch_function", + return_false)): + yield diff --git a/vllm/model_executor/layers/linear.py b/vllm/model_executor/layers/linear.py index 5bf96398bc71..df5bced6b228 100644 --- a/vllm/model_executor/layers/linear.py +++ b/vllm/model_executor/layers/linear.py @@ -22,6 +22,7 @@ # yapf: disable from vllm.model_executor.parameter import (BasevLLMParameter, BlockQuantScaleParameter, + ModelWeightParameter, PackedColumnParameter, PackedvLLMParameter, PerTensorScaleParameter, @@ -34,6 +35,7 @@ logger = init_logger(__name__) WEIGHT_LOADER_V2_SUPPORTED = [ + "UnquantizedLinearMethod", "CompressedTensorsLinearMethod", "CompressedTensorsLinearTransformMethod", "BitBLASLinearMethod", @@ -196,10 +198,14 @@ def create_weights(self, layer: torch.nn.Module, # The amount of memory allocated for the weights is # sum(output_partition_sizes) * input_size_per_partition. try: - weight = Parameter(torch.empty(sum(output_partition_sizes), - input_size_per_partition, - dtype=params_dtype), - requires_grad=False) + weight_loader = extra_weight_attrs.pop("weight_loader") + weight = ModelWeightParameter(data=torch.empty( + sum(output_partition_sizes), + input_size_per_partition, + dtype=params_dtype), + input_dim=1, + output_dim=0, + weight_loader=weight_loader) except torch.cuda.OutOfMemoryError as e: logger.error("Failed to create unquantized linear weights: %s", e) if torch.cuda.is_available(): @@ -212,7 +218,7 @@ def create_weights(self, layer: torch.nn.Module, "Failed to create unquantized linear weights. " "This may be caused by insufficient memory to allocate " "the weight.") from e - set_weight_attrs(weight, {"input_dim": 1, "output_dim": 0}) + layer.register_parameter("weight", weight) set_weight_attrs(weight, extra_weight_attrs) diff --git a/vllm/model_executor/parameter.py b/vllm/model_executor/parameter.py index 03e5e5809b67..66add98dab44 100644 --- a/vllm/model_executor/parameter.py +++ b/vllm/model_executor/parameter.py @@ -61,9 +61,24 @@ def __init__(self, data: torch.Tensor, weight_loader: Callable): self.tp_size = get_tensor_model_parallel_world_size() @property - def weight_loader(self): + def weight_loader(self) -> Callable: + # NOTE(@ksayers) some models such as mamba_mixer2 override the + # weight loader to support custom loading. In the future, model-specific + # weight loading should be implemented via Model.load_weights. In the + # meantime, support deleting and overriding `weight_loader`` attribute + if self._weight_loader is None: + raise AttributeError(f"{self.__class__.__name__} weight_loader " + "attribute has been deleted") return self._weight_loader + @weight_loader.setter + def weight_loader(self, value: Callable): + self._weight_loader = value + + @weight_loader.deleter + def weight_loader(self): + self._weight_loader = None # type: ignore[assignment] + def _is_1d_and_scalar(self, loaded_weight: torch.Tensor): cond1 = self.data.ndim == 1 and self.data.numel() == 1 cond2 = loaded_weight.ndim == 0 and loaded_weight.numel() == 1 @@ -97,6 +112,12 @@ def _shard_id_as_int(self, shard_id: Union[str, int]) -> int: assert shard_id in qkv_idxs return qkv_idxs[shard_id] + @classmethod + def __torch_function__(cls, func, types, args=(), kwargs=None): + if kwargs is None: + kwargs = {} + return super().__torch_function__(func, types, args, kwargs) + class _ColumnvLLMParameter(BasevLLMParameter): """ From bf68fd76a91bd00f1693a453368688540bf22305 Mon Sep 17 00:00:00 2001 From: Wentao Ye <44945378+yewentao256@users.noreply.github.com> Date: Tue, 23 Sep 2025 20:42:48 -0400 Subject: [PATCH 318/518] [Compile] Fix AMD Compile Error (#25518) Signed-off-by: yewentao256 --- csrc/quantization/activation_kernels.cu | 7 ++++++- csrc/rocm/attention.cu | 6 ++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/csrc/quantization/activation_kernels.cu b/csrc/quantization/activation_kernels.cu index 9aa1411b4a25..b94cc9ce5086 100644 --- a/csrc/quantization/activation_kernels.cu +++ b/csrc/quantization/activation_kernels.cu @@ -23,9 +23,14 @@ typedef __hip_bfloat162 __nv_bfloat162; typedef __hip_bfloat16 __nv_bfloat16; typedef __hip_bfloat16_raw __nv_bfloat16_raw; - + #if defined(HIP_FP8_TYPE_OCP) typedef __hip_fp8_e4m3 __nv_fp8_e4m3; typedef __hip_fp8x4_e4m3 __nv_fp8x4_e4m3; + #else +// ROCm 6.2 fallback: only *_fnuz types exist +typedef __hip_fp8_e4m3_fnuz __nv_fp8_e4m3; +typedef __hip_fp8x4_e4m3_fnuz __nv_fp8x4_e4m3; + #endif #endif #include "core/registration.h" diff --git a/csrc/rocm/attention.cu b/csrc/rocm/attention.cu index dac9df6048f2..133a545045b1 100644 --- a/csrc/rocm/attention.cu +++ b/csrc/rocm/attention.cu @@ -25,6 +25,12 @@ #include "../attention/dtype_fp8.cuh" #include "../quantization/fp8/amd/quant_utils.cuh" +// ROCm 6.2 compatibility: map OCP fp8 types to FNUZ variants if OCP is absent +#if !defined(HIP_FP8_TYPE_OCP) +using __hip_fp8_e4m3 = __hip_fp8_e4m3_fnuz; +using __hip_fp8_e5m2 = __hip_fp8_e5m2_fnuz; +#endif + #if defined(__HIPCC__) && \ (defined(__gfx90a__) || defined(__gfx942__) || defined(__gfx950__)) #define __HIP__GFX9__ From 9df8da548e920f83d4f7710e481f106c2ac92d31 Mon Sep 17 00:00:00 2001 From: Lucas Wilkinson Date: Tue, 23 Sep 2025 21:09:43 -0400 Subject: [PATCH 319/518] [BugFix] Fix MLA assert with CUTLASS MLA (#25478) Signed-off-by: Lucas Wilkinson --- vllm/v1/attention/backends/mla/common.py | 64 +++++++++++++++++------- 1 file changed, 46 insertions(+), 18 deletions(-) diff --git a/vllm/v1/attention/backends/mla/common.py b/vllm/v1/attention/backends/mla/common.py index a177117a50bd..e84f2d89943e 100755 --- a/vllm/v1/attention/backends/mla/common.py +++ b/vllm/v1/attention/backends/mla/common.py @@ -204,7 +204,7 @@ from vllm.attention.ops.common import cp_lse_ag_out_rs from vllm.attention.ops.merge_attn_states import merge_attn_states from vllm.attention.utils.fa_utils import get_flash_attn_version -from vllm.config import VllmConfig +from vllm.config import VllmConfig, get_current_vllm_config from vllm.distributed.parallel_state import get_dcp_group, is_global_first_rank from vllm.logger import init_logger from vllm.model_executor.layers.linear import (ColumnParallelLinear, @@ -436,6 +436,34 @@ class MLACommonMetadataBuilder(AttentionMetadataBuilder[M]): """ reorder_batch_threshold: ClassVar[int] = 1 + @staticmethod + def determine_chunked_prefill_workspace_size( + vllm_config: VllmConfig) -> int: + scheduler_config = vllm_config.scheduler_config + cache_config = vllm_config.cache_config + model_config = vllm_config.model_config + + chunked_prefill_workspace_size = min( + # Try for 8 full length request or at least 4 pages per-request + max(8 * model_config.max_model_len, + 4 * scheduler_config.max_num_seqs * cache_config.block_size), + # For long-context models try not to over-allocate limiting + # kv-cache space, limiting it to 64k tokens, + # which would result in the workspace being: + # 2*(576)*(64*1024) = 144mb + # (assuming 576 MLA head dim, and fp16) + # which would result in up-projected context being + # 2*(192*128)*(64*1024) = 3gb + # (assuming 192 QK head dim, 128 heads, and fp16) + 64 * 1024) + + # Enforce that we enough for at least 1 page per request + chunked_prefill_workspace_size = max( + chunked_prefill_workspace_size, + scheduler_config.max_num_seqs * cache_config.block_size) + + return chunked_prefill_workspace_size + def __init__(self, kv_cache_spec: AttentionSpec, layer_names: list[str], @@ -448,7 +476,6 @@ def __init__(self, scheduler_config = vllm_config.scheduler_config self.model_config = vllm_config.model_config parallel_config = vllm_config.parallel_config - cache_config = vllm_config.cache_config self.compilation_config = vllm_config.compilation_config self.device = device @@ -468,22 +495,9 @@ def __init__(self, if self.aot_schedule: self.page_size = self.kv_cache_spec.block_size - self.chunked_prefill_workspace_size = min( - # Max sure there is enough for 8 full length request or at least - # 4 pages of cache per request - max(8 * self.model_config.max_model_len, - 4 * scheduler_config.max_num_seqs * cache_config.block_size), - # For long-context models try not to over-allocate limiting - # kv-cache space, limiting it to 64k tokens, - # which would result in the workspace being: - # 2*(576)*(64*1024) = 144mb - # (assuming 576 MLA head dim, and fp16) - # which would result in up-projected context being - # 2*(192*128)*(64*1024) = 3gb - # (assuming 192 QK head dim, 128 heads, and fp16) - 64 * 1024) - assert self.chunked_prefill_workspace_size >= \ - scheduler_config.max_num_seqs * cache_config.block_size + self.chunked_prefill_workspace_size = \ + self.determine_chunked_prefill_workspace_size(vllm_config) + if self.dcp_world_size > 1: # Note(hc): The local kvcache is incomplete when DCP is triggered, # an additional kvcache allgather across the DCP group is therefore @@ -999,6 +1013,10 @@ def __init__( self.dcp_world_size: Optional[int] = None + self.chunked_prefill_workspace_size = \ + MLACommonMetadataBuilder.determine_chunked_prefill_workspace_size( + get_current_vllm_config()) + def _flash_attn_varlen_diff_headdims(self, q, k, @@ -1513,6 +1531,16 @@ def forward( " for MLACommonImpl") if attn_metadata is None: + # During the profile run try to simulate to worse case output size + # for `self.kv_b_proj(kv_c_normed)` in `_compute_prefill_context` + # since this can be large + _ = torch.empty( + (self.chunked_prefill_workspace_size, self.num_heads, + self.qk_nope_head_dim + self.v_head_dim), + device=k_c_normed.device, + dtype=k_c_normed.dtype, + ) + # The zero fill is required when used with DP + EP # to ensure all ranks within a DP group compute the # same expert outputs. From 359d2930063038c78b9a54a1b0176fbabb1c204b Mon Sep 17 00:00:00 2001 From: Nikhil Gupta Date: Wed, 24 Sep 2025 02:32:22 +0100 Subject: [PATCH 320/518] [fix]: add Arm 4bit fused moe support (#23809) Signed-off-by: Nikhil Gupta --- cmake/cpu_extension.cmake | 3 +- csrc/cpu/torch_bindings.cpp | 10 + csrc/moe/dynamic_4bit_int_moe_cpu.cpp | 156 +++++++++ csrc/ops.h | 6 + .../layers/fused_moe/cpu_fused_moe.py | 15 +- vllm/model_executor/layers/fused_moe/layer.py | 2 - .../compressed_tensors_moe.py | 307 +++++++++++++++++- 7 files changed, 488 insertions(+), 11 deletions(-) create mode 100644 csrc/moe/dynamic_4bit_int_moe_cpu.cpp diff --git a/cmake/cpu_extension.cmake b/cmake/cpu_extension.cmake index 06494463223b..2a2ec08f8695 100644 --- a/cmake/cpu_extension.cmake +++ b/cmake/cpu_extension.cmake @@ -258,7 +258,8 @@ set(VLLM_EXT_SRC "csrc/cpu/layernorm.cpp" "csrc/cpu/mla_decode.cpp" "csrc/cpu/pos_encoding.cpp" - "csrc/cpu/torch_bindings.cpp") + "csrc/cpu/torch_bindings.cpp" + "csrc/moe/dynamic_4bit_int_moe_cpu.cpp") if (AVX512_FOUND AND NOT AVX512_DISABLED) set(VLLM_EXT_SRC diff --git a/csrc/cpu/torch_bindings.cpp b/csrc/cpu/torch_bindings.cpp index 98c3ebc5a75f..d279c03e0b59 100644 --- a/csrc/cpu/torch_bindings.cpp +++ b/csrc/cpu/torch_bindings.cpp @@ -88,8 +88,18 @@ TORCH_LIBRARY_EXPAND(TORCH_EXTENSION_NAME, ops) { " int tp_rank, int blocksparse_local_blocks," " int blocksparse_vert_stride, int blocksparse_block_size," " int blocksparse_head_sliding_step) -> ()"); + ops.impl("paged_attention_v1", torch::kCPU, &paged_attention_v1); + ops.def( + "dynamic_4bit_int_moe(" + "Tensor x, Tensor topk_ids, Tensor topk_weights," + "Tensor w13_packed, Tensor w2_packed, int H, int I, int I2," + "int group_size, bool apply_router_weight_on_input, int activation_kind" + ") -> Tensor"); + + ops.impl("dynamic_4bit_int_moe", torch::kCPU, &dynamic_4bit_int_moe_cpu); + // PagedAttention V2. ops.def( "paged_attention_v2(" diff --git a/csrc/moe/dynamic_4bit_int_moe_cpu.cpp b/csrc/moe/dynamic_4bit_int_moe_cpu.cpp new file mode 100644 index 000000000000..1d06fc6b5b0a --- /dev/null +++ b/csrc/moe/dynamic_4bit_int_moe_cpu.cpp @@ -0,0 +1,156 @@ +#include +#include +#include + +// _dyn_quant_matmul_4bit is only available on AArch64. +#if defined(__aarch64__) + #include +#endif + +inline torch::Tensor mm(const torch::Tensor& a, const torch::Tensor& packed_w, + int64_t group_size_eff, int64_t in_features, + int64_t out_features) { +#if defined(__aarch64__) + return at::_ops::_dyn_quant_matmul_4bit::call(a, packed_w, group_size_eff, + in_features, out_features); +#else + TORCH_CHECK(false, + "dynamic 4-bit int MoE path requires AArch64 (ARM64); " + "_dyn_quant_matmul_4bit is unavailable on this architecture"); + return {}; +#endif +} + +enum ActivationKind : int64_t { + SwiGLU_Gu = 0, // act = SiLU(g) * u + SwiGLUOAI = 1, // act = SiLU(u) * g + SiLU = 2 // SiLU +}; + +torch::Tensor dynamic_4bit_int_moe_cpu( + torch::Tensor x, torch::Tensor topk_ids, torch::Tensor topk_weights, + torch::Tensor w13_packed, torch::Tensor w2_packed, int64_t H, int64_t I, + int64_t I2, int64_t group_size, bool apply_router_weight_on_input, + int64_t activation_kind) { + TORCH_CHECK(x.dim() == 2, "x must be 2D"); + TORCH_CHECK(topk_ids.dim() == 2 && topk_weights.dim() == 2, + "topk tensors must be [T, K]"); + TORCH_CHECK( + w13_packed.size(0) == w2_packed.size(0), + "w13_packed and w2_packed must have same number of experts in dim 0"); + TORCH_CHECK(I2 == 2 * I, "I2 must equal 2*I"); + + const int64_t T = x.size(0); + const int64_t K = topk_ids.size(1); + const int64_t E = w13_packed.size(0); + const int64_t N = T * K; + + auto x_c = x.contiguous(); + auto ids_c = topk_ids.contiguous(); + auto gates_c = topk_weights.to(at::kFloat).contiguous(); + + // bucketing tokens -> experts + c10::SmallVector counts( + E, 0); // Small vector uses stack allocation + { + const auto* ids_ptr = ids_c.data_ptr(); + for (int64_t i = 0; i < N; ++i) { + const int64_t e_id = ids_ptr[i]; + TORCH_CHECK(0 <= e_id && e_id < E, "expert id out of range"); + counts[e_id]++; + } + } + c10::SmallVector offsets(E + 1, 0); // ( E +1 ) + for (int64_t e = 0; e < E; ++e) offsets[e + 1] = offsets[e] + counts[e]; + + auto expert_tokens = at::empty({offsets[E]}, ids_c.options()); + auto expert_gates = at::empty({offsets[E]}, gates_c.options()); + { + c10::SmallVector cursor(E, 0); + const auto* ids_ptr = ids_c.data_ptr(); + const auto* gts_ptr = gates_c.data_ptr(); + auto* tok_ptr = expert_tokens.data_ptr(); + auto* gate_ptr = expert_gates.data_ptr(); + + for (int64_t t = 0; t < T; ++t) { + const int64_t base = t * K; + for (int64_t k = 0; k < K; ++k) { + const int64_t idx = base + k; + const int64_t e = ids_ptr[idx]; + const int64_t p = offsets[e] + (cursor[e]++); + tok_ptr[p] = t; + gate_ptr[p] = gts_ptr[idx]; + } + } + } + + const int64_t g_eff_13 = (group_size != -1) ? group_size : H; + const int64_t g_eff_2 = (group_size != -1) ? group_size : I; + + // Per-expert outputs filled in parallel + std::vector y_list(E); + y_list.resize(E); + + at::parallel_for(0, E, 1, [&](int64_t e_begin, int64_t e_end) { + for (int64_t e = e_begin; e < e_end; ++e) { + const int64_t te = counts[e]; + if (te == 0) { + y_list[e] = at::empty({0, H}, x_c.options()); + continue; + } + + const int64_t start = offsets[e]; + + auto sel_tokens = + expert_tokens.narrow(/*dim=*/0, /*start=*/start, /*length=*/te); + auto gates_e = + expert_gates.narrow(/*dim=*/0, /*start=*/start, /*length=*/te); + + auto x_e = x_c.index_select(/*dim=*/0, sel_tokens); + + if (apply_router_weight_on_input) { + x_e = x_e.mul(gates_e.unsqueeze(1)); + } + + auto w13_e = w13_packed.select(/*dim=*/0, e); + auto w2_e = w2_packed.select(/*dim=*/0, e); + + // W13 + auto y13 = + mm(x_e, w13_e, g_eff_13, /*in_features=*/H, /*out_features=*/I2); + + auto g_part = y13.narrow(/*dim=*/1, /*start=*/0, /*length=*/I); + auto u_part = y13.narrow(/*dim=*/1, /*start=*/I, /*length=*/I); + + torch::Tensor act; + if (activation_kind == ActivationKind::SwiGLUOAI) { // SwiGLUOAI + constexpr double kAlpha = 1.702; // GPT-OSS default + constexpr double kLimit = 7.0; // GPT-OSS default + auto gate_c = at::clamp_max(g_part, kLimit); + auto up_c = at::clamp(u_part, -kLimit, kLimit); + auto glu = gate_c.mul(at::sigmoid(gate_c.mul(kAlpha))); + act = up_c.add(1.0).mul(glu); + } else { // SiLU , SwiGLU_GU, vLLM maps silu to SiluAndMul() + act = at::silu(g_part).mul(u_part); + } + + // W2 + auto y = mm(act, w2_e, g_eff_2, /*in_features=*/I, /*out_features=*/H); + + if (!apply_router_weight_on_input) { + y = y.mul(gates_e.unsqueeze(1)); + } + + // Store per-expert result + y_list[e] = y; + } + }); + + // Concatenate all expert outputs to match expert_tokens order + auto Y_all = at::cat(y_list, /*dim=*/0); + auto out = at::zeros({T, H}, x.options()); + out = + at::index_add(out, /*dim=*/0, /*index=*/expert_tokens, /*source=*/Y_all); + + return out; +} diff --git a/csrc/ops.h b/csrc/ops.h index fd9c55b94895..2ada7905da4b 100644 --- a/csrc/ops.h +++ b/csrc/ops.h @@ -328,6 +328,12 @@ void selective_scan_fwd(const torch::Tensor& u, const torch::Tensor& delta, const std::optional& has_initial_state, const torch::Tensor& ssm_states, int64_t pad_slot_id); +torch::Tensor dynamic_4bit_int_moe_cpu( + torch::Tensor x, torch::Tensor topk_ids, torch::Tensor topk_weights, + torch::Tensor w13_packed, torch::Tensor w2_packed, int64_t H, int64_t I, + int64_t I2, int64_t group_size, bool apply_router_weight_on_input, + int64_t activation_kind); + using fptr_t = int64_t; fptr_t init_custom_ar(const std::vector& fake_ipc_ptrs, torch::Tensor& rank_data, int64_t rank, diff --git a/vllm/model_executor/layers/fused_moe/cpu_fused_moe.py b/vllm/model_executor/layers/fused_moe/cpu_fused_moe.py index 0eec93601b3f..114f349538fb 100644 --- a/vllm/model_executor/layers/fused_moe/cpu_fused_moe.py +++ b/vllm/model_executor/layers/fused_moe/cpu_fused_moe.py @@ -98,13 +98,16 @@ def select_experts( e_score_correction_bias=e_score_correction_bias) elif custom_routing_function is None: assert scoring_func == "softmax" - topk_weights = torch.nn.functional.softmax(router_logits, - dim=1, - dtype=torch.float32) - topk_weights, topk_ids = torch.topk(topk_weights, top_k, dim=-1) + topk_logit_vals, topk_idx = torch.topk(router_logits, + k=top_k, + dim=-1, + sorted=False) if renormalize: - topk_weights /= topk_weights.sum(dim=-1, keepdim=True) - return topk_weights, topk_ids.to(torch.int32) + topk_vals = torch.softmax(topk_logit_vals, dim=-1) + else: + logZ = torch.logsumexp(router_logits, dim=-1, keepdim=True) + topk_vals = (topk_logit_vals - logZ).exp() + return topk_vals.to(torch.float32), topk_idx.to(torch.int32) else: return custom_routing_function(hidden_states=hidden_states, gating_output=router_logits, diff --git a/vllm/model_executor/layers/fused_moe/layer.py b/vllm/model_executor/layers/fused_moe/layer.py index 2bf3bf96baf1..89e0cee08170 100644 --- a/vllm/model_executor/layers/fused_moe/layer.py +++ b/vllm/model_executor/layers/fused_moe/layer.py @@ -69,8 +69,6 @@ def eplb_map_to_physical_and_record( if is_rocm_aiter_moe_enabled(): from vllm.model_executor.layers.fused_moe.rocm_aiter_fused_moe import ( # noqa: E501 rocm_aiter_grouped_topk as grouped_topk) -elif current_platform.is_cpu(): - pass else: from vllm.model_executor.layers.fused_moe.fused_moe import grouped_topk if current_platform.is_tpu(): diff --git a/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py index 10f9085be4d1..a7d3e920414d 100644 --- a/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py +++ b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py @@ -22,6 +22,7 @@ FusedMoEQuantConfig, fp8_w8a8_moe_quant_config, int4_w4a16_moe_quant_config, int8_w8a8_moe_quant_config, int8_w8a16_moe_quant_config, nvfp4_moe_quant_config) +from vllm.model_executor.layers.fused_moe.cpu_fused_moe import select_experts from vllm.model_executor.layers.fused_moe.flashinfer_cutlass_moe import ( is_valid_flashinfer_cutlass_fused_moe) from vllm.model_executor.layers.quantization.compressed_tensors.schemes.compressed_tensors_wNa16 import ( # noqa @@ -47,7 +48,7 @@ from vllm.model_executor.layers.quantization.utils.w8a8_utils import ( all_close_1d, normalize_e4m3fn_to_e4m3fnuz, per_tensor_dequantize) from vllm.model_executor.utils import set_weight_attrs -from vllm.platforms import current_platform +from vllm.platforms import CpuArchEnum, current_platform from vllm.scalar_type import scalar_types from vllm.utils.deep_gemm import is_deep_gemm_e8m0_used @@ -63,7 +64,7 @@ class GPTQMarlinState(Enum): "CompressedTensorsMoEMethod", "CompressedTensorsW8A8Fp8MoEMethod", "CompressedTensorsW8A8Int8MoEMethod", "CompressedTensorsWNA16MarlinMoEMethod", "CompressedTensorsWNA16MoEMethod", - "CompressedTensorsW4A4MoeMethod" + "CompressedTensorsW4A4MoeMethod", "CompressedTensorsW4A8Int8MoEMethod" ] @@ -139,6 +140,10 @@ def get_moe_method( elif quant_config._is_dynamic_token_w8a8(weight_quant, input_quant): return CompressedTensorsW8A8Int8MoEMethod(quant_config, layer.moe_config) + elif quant_config._is_dynamic_token_w4a8_int(weight_quant, + input_quant): + return CompressedTensorsW4A8Int8MoEMethod(quant_config, + layer.moe_config) else: raise RuntimeError( f"Unsupported FusedMoe scheme: {weight_quant}, {input_quant}") @@ -1769,3 +1774,301 @@ def apply( expert_map=expert_map, quant_config=self.moe_quant_config, ) + + +class CompressedTensorsW4A8Int8MoEMethod(CompressedTensorsMoEMethod): + """ + CPU-only MoE method using dynamic 4-bit matmul kernels on Arm Platform + - Weights: int4 (stored as int8 values in [-8,7], packed to uint8 nibbles) + - Scales: Fp32 for Channelwise , bf16 for groupwise quantization + - Bias: Same data type as original weights + - Activations: FP32/Bf16 dynamic per-token (A8 Int), + quantized inside the kernel + """ + + def __init__( + self, + quant_config: "CompressedTensorsConfig", # type: ignore # noqa E501 + moe: FusedMoEConfig): + super().__init__(moe) + self.has_bias = self.moe.has_bias + self.quant_config = quant_config + + # Validate scheme: weights=W4 (channel or group), + # activations=dynamic TOKEN (A8) + wq = self.quant_config.target_scheme_map["Linear"].get("weights") + aq = self.quant_config.target_scheme_map["Linear"].get( + "input_activations") + + # Must be dynamic per-token activations + if aq.strategy != QuantizationStrategy.TOKEN or not aq.dynamic: + raise ValueError( + "W4A8-int MoE needs dynamic per-token activation quantization." + ) + + # Weight can be channel-wise (group_size=None) or group-wise + self.group_size = wq.group_size if (wq.group_size is not None) else -1 + if wq.num_bits != 4: + raise ValueError( + "This method only supports 4-bit weights (num_bits=4).") + + # CPU only + if not current_platform.is_cpu(): + raise ValueError("CompressedTensorsW4A8Int8MoEMethod is CPU-only.") + + # Arm: check _dyn ops availability + if current_platform.get_cpu_architecture() == CpuArchEnum.ARM: + try: + _ = torch.ops.aten._dyn_quant_matmul_4bit + _ = torch.ops.aten._dyn_quant_pack_4bit_weight + except AttributeError as err: + raise RuntimeError( + f"""PyTorch {torch.__version__} lacks _dyn_quant_* 4bit ops; + install a newer build.""") from err + self.static_input_scales = False # always dynamic per token + + # ---- parameter creation ---- + def create_weights(self, layer: torch.nn.Module, num_experts: int, + hidden_size: int, intermediate_size_per_partition: int, + params_dtype: torch.dtype, **extra_weight_attrs): + + # Shapes per local rank (TP/EP): + # w13: [E, 2*I_local, H] int8 (int4 values in [-8,7]) + # w2 : [E, H, I_local] int8 + # Scales: + # channel-wise: group_size=-1 -> per-output-row, single scale per row + # group-wise : group_size=g -> + # per-output-row, (in_features/g) scales + + E = num_experts + H = hidden_size + IN = intermediate_size_per_partition + g = self.group_size + + # Per-row scale columns + def _n_scale_cols(in_features: int) -> int: + return 1 if g == -1 else (in_features // g) + + # Register unpacked int4-as-int8 weights the loader will fill. + w13 = torch.nn.Parameter(torch.empty(E, 2 * IN, H, dtype=torch.int8), + requires_grad=False) + set_weight_attrs(w13, extra_weight_attrs) + layer.register_parameter("w13_weight", w13) + + w2 = torch.nn.Parameter(torch.empty(E, H, IN, dtype=torch.int8), + requires_grad=False) + set_weight_attrs(w2, extra_weight_attrs) + layer.register_parameter("w2_weight", w2) + + # Register scales + # KleidiAI groupwise kernels accepts float32 scales + # KleidiAI groupwise kernels accepts bfloat16 scales + scale_dtype = torch.float32 if g == -1 else torch.bfloat16 + + w13_s = torch.nn.Parameter(torch.ones(E, + 2 * IN, + _n_scale_cols(H), + dtype=scale_dtype), + requires_grad=False) + set_weight_attrs( + w13_s, { + "quant_method": "channel" if g == -1 else "group", + **extra_weight_attrs + }) + layer.register_parameter("w13_weight_scale", w13_s) + + w2_s = torch.nn.Parameter(torch.ones(E, + H, + _n_scale_cols(IN), + dtype=scale_dtype), + requires_grad=False) + set_weight_attrs( + w2_s, { + "quant_method": "channel" if g == -1 else "group", + **extra_weight_attrs + }) + layer.register_parameter("w2_weight_scale", w2_s) + + if self.has_bias: + w13_bias = torch.nn.Parameter(torch.zeros(E, + 2 * IN, + dtype=params_dtype), + requires_grad=False) + layer.register_parameter("w13_bias", w13_bias) + set_weight_attrs(w13_bias, extra_weight_attrs) + + w2_bias = torch.nn.Parameter(torch.zeros(num_experts, + hidden_size, + dtype=params_dtype), + requires_grad=False) + layer.register_parameter("w2_bias", w2_bias) + set_weight_attrs(w2_bias, extra_weight_attrs) + + # Placeholders for packed weights (will be replaced after packing) + layer.register_parameter( + "w13_weight_packed", + torch.nn.Parameter(torch.empty(0), requires_grad=False)) + set_weight_attrs(layer.w13_weight_packed, extra_weight_attrs) + + layer.register_parameter( + "w2_weight_packed", + torch.nn.Parameter(torch.empty(0), requires_grad=False)) + set_weight_attrs(layer.w2_weight_packed, extra_weight_attrs) + + # dims for 4 bit fused matmuls + layer.w13_in_features = H + layer.w13_out_features = 2 * IN + layer.w2_in_features = IN + layer.w2_out_features = H + layer.group_size = g + + # post-load packing to dyn-4bit KleidiAI kernel's format + def process_weights_after_loading(self, layer: torch.nn.Module) -> None: + E = layer.w13_weight.shape[0] + H = layer.w13_in_features + I2 = layer.w13_out_features + IN = layer.w2_in_features + g = layer.group_size + + def _pack_matrix(int4_as_int8_2d: torch.Tensor, + scales_2d: torch.Tensor, + bias_1d: Optional[torch.Tensor], in_features: int, + out_features: int) -> torch.Tensor: + # int4 values are stored as int8 in [-8,7]. + # Shift to unsigned nibble and pack pairs along input-dim. + tmp = int4_as_int8_2d.add(8) # [out, in] + uint8_nibbles = ((tmp[:, 1::2] << 4) | tmp[:, ::2]).to( + torch.uint8) # [out, in//2] + + # KleidiAI groupwise kernels accepts float32 scales + # KleidiAI groupwise kernels accepts bfloat16 scales + scale_dtype = torch.float32 if g == -1 else torch.bfloat16 + scales = scales_2d.to(scale_dtype) + bias = None if bias_1d is None else bias_1d.to(torch.float32) + return torch.ops.aten._dyn_quant_pack_4bit_weight( + uint8_nibbles, scales, bias, g if g != -1 else in_features, + in_features, out_features) + + # Pack per expert + w13_packed_list = [] + w2_packed_list = [] + + has_w13_bias = hasattr(layer, + "w13_bias") and layer.w13_bias is not None + has_w2_bias = hasattr(layer, "w2_bias") and layer.w2_bias is not None + + for e in range(E): + w13_packed_list.append( + _pack_matrix( + layer.w13_weight[e], # [2I, H] + layer.w13_weight_scale[e], # [2I, H/g or 1] + layer.w13_bias[e] if has_w13_bias else None, # [2I] + H, + I2)) + w2_packed_list.append( + _pack_matrix( + # w2 shape is [H, IN]; we need [out, in] == [H, IN]. + layer.w2_weight[e], # [H, IN] + layer.w2_weight_scale[e], # [H, IN/g or 1] + layer.w2_bias[e] if has_w2_bias else None, # [H] + IN, + layer.w2_out_features # in_features=IN, out_features=H + )) + + # each packed tensor has identical shape per expert; stack on dim 0 + w13_packed = torch.stack(w13_packed_list, dim=0) + w2_packed = torch.stack(w2_packed_list, dim=0) + + replace_parameter(layer, "w13_weight_packed", + torch.nn.Parameter(w13_packed, requires_grad=False)) + replace_parameter(layer, "w2_weight_packed", + torch.nn.Parameter(w2_packed, requires_grad=False)) + + # free raw tensors/scales/bias now that they're packed into the payload. + replace_parameter( + layer, "w13_weight", + torch.nn.Parameter(torch.empty(0), requires_grad=False)) + replace_parameter( + layer, "w2_weight", + torch.nn.Parameter(torch.empty(0), requires_grad=False)) + replace_parameter( + layer, "w13_weight_scale", + torch.nn.Parameter(torch.empty(0), requires_grad=False)) + replace_parameter( + layer, "w2_weight_scale", + torch.nn.Parameter(torch.empty(0), requires_grad=False)) + if has_w13_bias: + replace_parameter( + layer, "w13_bias", + torch.nn.Parameter(torch.empty(0), requires_grad=False)) + if has_w2_bias: + replace_parameter( + layer, "w2_bias", + torch.nn.Parameter(torch.empty(0), requires_grad=False)) + + def get_fused_moe_quant_config( + self, layer: torch.nn.Module) -> Optional[FusedMoEQuantConfig]: + # CPU dynamic 4-bit MoE path does not use modular kernels or + # fused_experts; quant config is not needed. + return None + + def apply( + self, + layer: torch.nn.Module, + x: torch.Tensor, + router_logits: torch.Tensor, + top_k: int, + renormalize: bool, + use_grouped_topk: bool = False, + topk_group: Optional[int] = None, + num_expert_group: Optional[int] = None, + global_num_experts: int = -1, + expert_map: Optional[torch.Tensor] = None, + custom_routing_function: Optional[Callable] = None, + scoring_func: str = "softmax", + routed_scaling_factor: float = 1.0, + e_score_correction_bias: Optional[torch.Tensor] = None, + apply_router_weight_on_input: bool = False, + activation: str = "silu", + enable_eplb: bool = False, + expert_load_view: Optional[torch.Tensor] = None, + logical_to_physical_map: Optional[torch.Tensor] = None, + logical_replica_count: Optional[torch.Tensor] = None, + ) -> torch.Tensor: + assert not enable_eplb, "EPLB not supported for W4A8-int MoE yet." + assert activation in ( + "silu", "swigluoai", + "swiglu"), "Only SiLU/SwiGLUGU/SwiGLUUG are supported." + assert expert_map is None, """expert_map/EP not implemented + for CPU dyn-4bit MoE.""" + + def _act_kind(s: str) -> int: + # 0 = SwiGLU_Gu (SiLU(g)*u), 1 = SwiGLU_Ug (SiLU(u)*g), 2 = SiLU + if s == "swiglu": + return 0 + if s == "swigluoai": + return 1 + if s == "silu": + return 2 + raise ValueError(f"Unknown activation '{s}'") + + # Apply topk softmax on router output + topk_weights, topk_ids = select_experts( + hidden_states=x, + router_logits=router_logits, + use_grouped_topk=use_grouped_topk, + top_k=top_k, + renormalize=renormalize, + topk_group=topk_group, + num_expert_group=num_expert_group, + custom_routing_function=custom_routing_function, + scoring_func=scoring_func, + routed_scaling_factor=routed_scaling_factor, + e_score_correction_bias=e_score_correction_bias, + ) + + return torch.ops._C.dynamic_4bit_int_moe( + x, topk_ids.to(torch.long), topk_weights, layer.w13_weight_packed, + layer.w2_weight_packed, layer.w2_out_features, + layer.w2_in_features, layer.w13_out_features, layer.group_size, + apply_router_weight_on_input, int(_act_kind(activation))) \ No newline at end of file From 77d906995c464fd24fc5b3effad29b65bcf65798 Mon Sep 17 00:00:00 2001 From: Yong Hoon Shin <48474650+sarckk@users.noreply.github.com> Date: Tue, 23 Sep 2025 19:25:34 -0700 Subject: [PATCH 321/518] [KV sharing] Re-land Gemma3n model changes from #22628 (#24357) Signed-off-by: Yong Hoon Shin --- vllm/model_executor/models/gemma3n.py | 402 ++++++++++++++++++++++---- 1 file changed, 344 insertions(+), 58 deletions(-) diff --git a/vllm/model_executor/models/gemma3n.py b/vllm/model_executor/models/gemma3n.py index f4d288fd887e..0b6bccb33498 100644 --- a/vllm/model_executor/models/gemma3n.py +++ b/vllm/model_executor/models/gemma3n.py @@ -26,6 +26,7 @@ from vllm.compilation.decorators import support_torch_compile from vllm.config import CacheConfig, VllmConfig from vllm.distributed import get_tensor_model_parallel_world_size +from vllm.forward_context import get_forward_context from vllm.logger import init_logger from vllm.model_executor.layers.activation import (_ACTIVATION_REGISTRY, GeluAndMul, @@ -44,6 +45,7 @@ from vllm.model_executor.model_loader.weight_utils import ( default_weight_loader, maybe_remap_kv_scale_name) from vllm.sequence import IntermediateTensors +from vllm.v1.attention.backends.utils import KVSharingFastPrefillMetadata from .interfaces import SupportsQuant from .utils import (AutoWeightsLoader, extract_layer_index, @@ -51,6 +53,8 @@ logger = init_logger(__name__) +EPS = torch.tensor(torch.finfo().min) + class Gemma3nAltUp(nn.Module): """Alternating updates (Altup) @@ -532,16 +536,29 @@ def forward( return corrected_predictions -@support_torch_compile -class Gemma3nTextModel(nn.Module, SupportsQuant): +# This enables torch.compile if --kv-sharing-fast-prefill passed +@support_torch_compile(enable_if=lambda vllm_config: vllm_config.cache_config. + kv_sharing_fast_prefill) +class Gemma3nSelfDecoder(nn.Module): + """ + Includes altup embedding and self decoder layers + """ - def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): + def __init__( + self, + *, + vllm_config: VllmConfig, + prefix: str = "", + decoder_layers: list[Gemma3nDecoderLayer], + layer_idx_start: int, + ): super().__init__() + self.decoder_layers = decoder_layers + self.layer_idx_start = layer_idx_start + config = vllm_config.model_config.hf_config - cache_config = vllm_config.cache_config - quant_config = vllm_config.quant_config self.config = config - self.quant_config = quant_config + quant_config = vllm_config.quant_config self.embed_tokens = VocabParallelEmbedding( config.vocab_size, @@ -594,32 +611,6 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): prefix=f"{prefix}.altup_projections.{idx-1}", ) for idx in range(1, self.config.altup_num_inputs) ]) - self.altup_unembed_projections = nn.ModuleList([ - ColumnParallelLinear( - config.hidden_size, - config.hidden_size, - bias=False, - gather_output=True, - return_bias=False, - quant_config=quant_config, - prefix=f"{prefix}.altup_unembed_projections.{idx-1}", - ) for idx in range(1, self.config.altup_num_inputs) - ]) - - # Transformer blocks. - self.start_layer, self.end_layer, self.layers = make_layers( - config.num_hidden_layers, - lambda prefix: Gemma3nDecoderLayer( - config, cache_config, quant_config, prefix=prefix), - prefix=f"{prefix}.layers") - self.norm = RMSNorm( - config.hidden_size, - eps=config.rms_norm_eps, - ) - self.eps = torch.tensor(torch.finfo().min) - - def get_input_embeddings(self, input_ids: torch.Tensor) -> torch.Tensor: - return self.embed_tokens(input_ids) * self.embed_scale def get_per_layer_input_embeddings( self, input_ids: torch.Tensor) -> torch.Tensor: @@ -633,20 +624,11 @@ def get_per_layer_input_embeddings( return self.embed_tokens_per_layer( per_layer_inputs_tokens) * self.embed_scale_per_layer - def forward( + def get_per_layer_inputs( self, - input_ids: Optional[torch.Tensor], - positions: torch.Tensor, - per_layer_inputs: torch.Tensor, - intermediate_tensors: Optional[IntermediateTensors] = None, - inputs_embeds: Optional[torch.Tensor] = None, - **kwargs, - ) -> Union[torch.Tensor, IntermediateTensors]: - if inputs_embeds is not None: - hidden_states_0 = inputs_embeds - else: - hidden_states_0 = self.get_input_embeddings(input_ids) - + hidden_states_0: torch.Tensor, + per_layer_inputs: Optional[torch.Tensor], + ) -> torch.Tensor: per_layer_projection = self.per_layer_model_projection(hidden_states_0) per_layer_projection = per_layer_projection.reshape( *hidden_states_0.shape[:-1], @@ -655,14 +637,18 @@ def forward( ) per_layer_projection = self.per_layer_projection_norm( per_layer_projection) - if per_layer_inputs is not None: # Profiling run does not compute per_layer_inputs per_layer_inputs = per_layer_projection + per_layer_inputs per_layer_inputs *= self.per_layer_input_scale else: per_layer_inputs = per_layer_projection + return per_layer_inputs + def get_input_embeddings(self, input_ids: torch.Tensor) -> torch.Tensor: + return self.embed_tokens(input_ids) * self.embed_scale + + def altup_embed(self, hidden_states_0: torch.Tensor) -> torch.Tensor: # Altup embed. hidden_states = [hidden_states_0] * self.config.altup_num_inputs target_magnitude = torch.mean(hidden_states_0**2, dim=-1, @@ -673,11 +659,77 @@ def forward( dim=-1, keepdim=True)**0.5 hidden_states[i] *= target_magnitude / torch.maximum( - new_magnitude, self.eps) - hidden_states = torch.stack(hidden_states, dim=0) + new_magnitude, EPS) + hidden_states = torch.stack(hidden_states, dim=-1) + return hidden_states + + def forward( + self, + input_ids: torch.Tensor, + positions: torch.Tensor, + inputs_embeds: Optional[torch.Tensor] = None, + per_layer_inputs: Optional[torch.Tensor] = None, + **kwargs, + ) -> tuple[torch.Tensor, torch.Tensor]: + if inputs_embeds is not None: + hidden_states_0 = inputs_embeds + else: + hidden_states_0 = self.get_input_embeddings(input_ids) + + adjusted_per_layer_inputs = self.get_per_layer_inputs( + hidden_states_0, per_layer_inputs) + hidden_states = self.altup_embed(hidden_states_0) + + # [altnum_inputs, num_tokens, hidden_size] + hidden_states = hidden_states.permute(2, 0, 1) + + for idx, layer in enumerate(self.decoder_layers): + layer_idx = idx + self.layer_idx_start + # [altup_num_inputs, num_tokens, hidden_size] + hidden_states = layer( + positions=positions, + hidden_states=hidden_states, + per_layer_input=adjusted_per_layer_inputs[:, layer_idx, :], + **kwargs, + ) + + # [num_tokens, hidden_size, altnum_inputs] + hidden_states = hidden_states.permute(1, 2, 0) + + return hidden_states, adjusted_per_layer_inputs + + +# This enables torch.compile if --kv-sharing-fast-prefill passed +@support_torch_compile(enable_if=lambda vllm_config: vllm_config.cache_config. + kv_sharing_fast_prefill) +class Gemma3nCrossDecoder(nn.Module): + """ + Cross-decoder layers + """ + + def __init__( + self, + *, + vllm_config: VllmConfig, + prefix: str = "", + decoder_layers: list[Gemma3nDecoderLayer], + layer_idx_start: int, + ): + super().__init__() + self.decoder_layers = decoder_layers + self.layer_idx_start = layer_idx_start - # Transformer blocks. - for layer_idx, layer in enumerate(self.layers): + def forward( + self, + positions: torch.Tensor, + hidden_states: torch.Tensor, + per_layer_inputs: torch.Tensor, + **kwargs, + ) -> torch.Tensor: + # [altnum_inputs, num_tokens, hidden_size] + hidden_states = hidden_states.permute(2, 0, 1) + for idx, layer in enumerate(self.decoder_layers): + layer_idx = idx + self.layer_idx_start # [altup_num_inputs, num_tokens, hidden_size] hidden_states = layer( positions=positions, @@ -685,22 +737,249 @@ def forward( per_layer_input=per_layer_inputs[:, layer_idx, :], **kwargs, ) + # [num_tokens, hidden_size, altnum_inputs] + hidden_states = hidden_states.permute(1, 2, 0) + return hidden_states + + +# This disables torch.compile if --kv-sharing-fast-prefill passed +@support_torch_compile(enable_if=lambda vllm_config: not vllm_config. + cache_config.kv_sharing_fast_prefill) +class Gemma3nTextModel(nn.Module, SupportsQuant): + + def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): + super().__init__() + config = vllm_config.model_config.hf_config + cache_config = vllm_config.cache_config + quant_config = vllm_config.quant_config + self.config = config + self.quant_config = quant_config + + self.altup_unembed_projections = nn.ModuleList([ + ColumnParallelLinear( + config.hidden_size, + config.hidden_size, + bias=False, + gather_output=True, + return_bias=False, + quant_config=quant_config, + prefix=f"{prefix}.altup_unembed_projections.{idx-1}", + ) for idx in range(1, self.config.altup_num_inputs) + ]) + + # Allocate config.num_kv_shared_layers layers for self-decoder + self.start_layer, self.end_layer, self.layers = make_layers( + config.num_hidden_layers, + lambda prefix: Gemma3nDecoderLayer( + config, cache_config, quant_config, prefix=prefix), + prefix=f"{prefix}.layers") + + first_kv_shared_layer_idx = (config.num_hidden_layers - + config.num_kv_shared_layers) + + # NOTE(sarckk): importing this top level seems to cause issues + # during running of tests. + from vllm.compilation.backends import set_model_tag + + # Layer idx 0-19 are self-decoder layers in You Only Cache Once (YOCO) + with set_model_tag("self_decoder"): + self.self_decoder = Gemma3nSelfDecoder( + vllm_config=vllm_config, + prefix=f"{prefix}.self_decoder", + decoder_layers=self.layers[:first_kv_shared_layer_idx], + layer_idx_start=0, + ) + # Layer idx 20-30 are cross-decoder layers in YOCO + with set_model_tag("cross_decoder"): + self.cross_decoder = Gemma3nCrossDecoder( + vllm_config=vllm_config, + prefix=f"{prefix}.cross_decoder", + decoder_layers=self.layers[first_kv_shared_layer_idx:], + layer_idx_start=first_kv_shared_layer_idx, + ) + + self.norm = RMSNorm( + config.hidden_size, + eps=config.rms_norm_eps, + ) + + self.fast_prefill_enabled = cache_config.kv_sharing_fast_prefill + + if self.fast_prefill_enabled: + # Allocate static buffers for CUDAGraph + # TODO(sarckk): Extract this functionality to interface + max_num_tokens = vllm_config.scheduler_config.max_num_batched_tokens + device = next(self.parameters()).device + self.positions = torch.zeros(max_num_tokens, + dtype=torch.int64, + device=device) + self.hidden_states = torch.zeros( + (max_num_tokens, config.hidden_size, + self.config.altup_num_inputs), + dtype=self.embed_tokens.weight.dtype, + device=device, + ) + self.per_layer_inputs = torch.zeros( + (max_num_tokens, self.config.num_hidden_layers, + self.config.hidden_size_per_layer_input), + dtype=self.embed_tokens.weight.dtype, + device=device, + ) + + @property + def embed_tokens(self): + return self.self_decoder.embed_tokens + + def get_per_layer_input_embeddings( + self, input_ids: torch.Tensor) -> torch.Tensor: + return self.self_decoder.get_per_layer_input_embeddings(input_ids) + + def get_input_embeddings(self, input_ids: torch.Tensor) -> torch.Tensor: + return self.self_decoder.get_input_embeddings(input_ids) + + def fast_prefill_forward( + self, + input_ids: torch.Tensor, + positions: torch.Tensor, + inputs_embeds: Optional[torch.Tensor] = None, + per_layer_inputs: Optional[torch.Tensor] = None, + **kwargs, + ) -> torch.Tensor: + logits_indices_padded, num_logits_indices = None, None + attn_metadata = get_forward_context().attn_metadata + + # attn_metadata is None during dummy runs + if (self.fast_prefill_enabled and attn_metadata is not None): + assert isinstance(attn_metadata, dict) + # Last layer is a KV sharing layer + layer_attn_metadata = attn_metadata[ + self.layers[-1].self_attn.attn.layer_name] + if (isinstance(layer_attn_metadata, KVSharingFastPrefillMetadata)): + logits_indices_padded = ( + layer_attn_metadata.logits_indices_padded) + num_logits_indices = layer_attn_metadata.num_logits_indices + + # Copy inputs for cudagraph + batch_size = positions.size(0) + self.positions[:batch_size].copy_(positions) + self_decoder_hidden_states, per_layer_inputs_adjusted = \ + self.self_decoder( + input_ids=input_ids, + positions=self.positions[:batch_size], + inputs_embeds=inputs_embeds, + per_layer_inputs=per_layer_inputs, + **kwargs, + ) + + if logits_indices_padded is None: + logits_indices_padded = torch.arange( + positions.size(0), + dtype=positions.dtype, + device=positions.device, + ) + + # NOTE(sarckk): There is currently a bug caused by + # vLLM converting output of last piecewise CUDA graph + # to weakref, causing memory to be prematurely freed + # when there are multiple compilation units + # Keep .clone() until fix in + # https://github.com/vllm-project/vllm/pull/22282 + hidden_states = self_decoder_hidden_states.clone() + + # Copy inputs for cudagraph + num_padded_logits_indices = logits_indices_padded.size(0) + self.positions[:num_padded_logits_indices].copy_( + positions[logits_indices_padded]) + self.hidden_states[:num_padded_logits_indices].copy_( + self_decoder_hidden_states[logits_indices_padded]) + self.per_layer_inputs[:num_padded_logits_indices].copy_( + per_layer_inputs_adjusted[logits_indices_padded]) + cross_decoder_hidden_states = self.cross_decoder( + positions=self.positions[:num_padded_logits_indices], + hidden_states=self.hidden_states[:num_padded_logits_indices], + per_layer_inputs=self.per_layer_inputs[:num_padded_logits_indices], + **kwargs, + ) + + if num_logits_indices is not None: + assert num_logits_indices > 0 + # Merge cross-decoder and self-decoder hidden states + hidden_states[logits_indices_padded[:num_logits_indices]] = ( + cross_decoder_hidden_states[:num_logits_indices]) + else: + hidden_states = cross_decoder_hidden_states + + return hidden_states + + def normal_forward( + self, + input_ids: torch.Tensor, + positions: torch.Tensor, + inputs_embeds: Optional[torch.Tensor] = None, + per_layer_inputs: Optional[torch.Tensor] = None, + **kwargs, + ) -> torch.Tensor: + hidden_states, per_layer_inputs = self.self_decoder( + input_ids=input_ids, + positions=positions, + inputs_embeds=inputs_embeds, + per_layer_inputs=per_layer_inputs, + **kwargs, + ) + hidden_states = self.cross_decoder( + positions=positions, + hidden_states=hidden_states, + per_layer_inputs=per_layer_inputs, + **kwargs, + ) + return hidden_states + def altup_unembed( + self, + hidden_states: torch.Tensor, + ) -> torch.Tensor: # Altup unembed. - target_magnitude = torch.mean(hidden_states[0]**2, + target_magnitude = torch.mean(hidden_states[..., 0]**2, dim=-1, keepdim=True)**0.5 for i in range(1, self.config.altup_num_inputs): - hidden_states[i] = self.altup_unembed_projections[i - 1]( - hidden_states[i]) - new_magnitude = torch.mean(hidden_states[i]**2, + hidden_states[..., i] = self.altup_unembed_projections[i - 1]( + hidden_states[..., i]) + new_magnitude = torch.mean(hidden_states[..., i]**2, dim=-1, keepdim=True)**0.5 - hidden_states[i] *= target_magnitude / torch.maximum( - new_magnitude, self.eps) - # [altup_num_inputs,num_tokens,hidden_size] -> [num_tokens,hidden_size] - hidden_states = torch.mean(hidden_states, dim=0) + hidden_states[..., i] *= target_magnitude / torch.maximum( + new_magnitude, EPS) + # [num_tokens,hidden_size, altup_num_inputs] -> [num_tokens,hidden_size] + hidden_states = torch.mean(hidden_states, dim=-1) + return hidden_states + def forward( + self, + input_ids: Optional[torch.Tensor], + positions: torch.Tensor, + per_layer_inputs: Optional[torch.Tensor] = None, + intermediate_tensors: Optional[IntermediateTensors] = None, + inputs_embeds: Optional[torch.Tensor] = None, + **kwargs, + ) -> Union[torch.Tensor, IntermediateTensors]: + if self.fast_prefill_enabled: + hidden_states = self.fast_prefill_forward( + input_ids, + positions, + inputs_embeds, + per_layer_inputs, + **kwargs, + ) + else: + hidden_states = self.normal_forward( + input_ids, + positions, + inputs_embeds, + per_layer_inputs, + **kwargs, + ) + hidden_states = self.altup_unembed(hidden_states) return self.norm(hidden_states) def load_weights(self, weights: Iterable[tuple[str, @@ -716,6 +995,13 @@ def load_weights(self, weights: Iterable[tuple[str, params_dict = dict(self.named_parameters()) loaded_params: set[str] = set() for name, loaded_weight in weights: + # decoder layer weights, altup_unembed_projections and rmsnorm + # are initialized in text model, others are in self decoder + if (not name.startswith('layers') + and not name.startswith('altup_unembed_projections') + and not name.startswith('norm')): + name = f"self_decoder.{name}" + if (self.quant_config is not None and (scale_name := self.quant_config.get_cache_scale(name))): # Loading kv cache scales for compressed-tensors quantization From c30b405b8f022d41f57042cad7032256d4831355 Mon Sep 17 00:00:00 2001 From: Benjamin Chislett Date: Tue, 23 Sep 2025 22:29:58 -0400 Subject: [PATCH 322/518] [Spec Decode] Enable FlashInfer Spec Decoding (#25196) Signed-off-by: Benjamin Chislett Signed-off-by: Benjamin Chislett Co-authored-by: lhsjohn --- .../v1/attention/test_attention_splitting.py | 109 +++++++++++++++++- vllm/utils/flashinfer.py | 15 +++ vllm/v1/attention/backends/flashinfer.py | 55 +++++++-- vllm/v1/attention/backends/gdn_attn.py | 6 +- vllm/v1/attention/backends/linear_attn.py | 3 +- vllm/v1/attention/backends/mamba_attn.py | 2 +- vllm/v1/attention/backends/mla/common.py | 4 +- .../attention/backends/mla/flashattn_mla.py | 4 +- vllm/v1/attention/backends/short_conv_attn.py | 4 +- vllm/v1/attention/backends/utils.py | 72 +++++++++++- vllm/v1/attention/backends/xformers.py | 4 +- vllm/v1/spec_decode/eagle.py | 21 +--- 12 files changed, 250 insertions(+), 49 deletions(-) diff --git a/tests/v1/attention/test_attention_splitting.py b/tests/v1/attention/test_attention_splitting.py index 7d7a46910be8..2fd11415d490 100644 --- a/tests/v1/attention/test_attention_splitting.py +++ b/tests/v1/attention/test_attention_splitting.py @@ -9,7 +9,8 @@ from vllm.v1.attention.backends.utils import (UBatchSlice, _make_metadata_with_slice, slice_query_start_locs, - split_attn_metadata) + split_attn_metadata, + split_decodes_and_prefills) from vllm.v1.worker.ubatch_utils import create_ubatch_slices @@ -158,6 +159,112 @@ def test_split_attn_metadata_decode_batch(large_decode_metadata): assert torch.equal(results[1].seq_lens, torch.tensor([2048] * mid_point)) +def apply_split_decodes_and_prefills(query_lens: list[int], + decode_threshold: int, + require_uniform: bool): + """Helper function to apply split_decodes_and_prefills and return + the results.""" + device = torch.device("cpu") + seq_lens = [10 * (i + 1) for i in range(len(query_lens))] + common_metadata = create_common_attn_metadata(BatchSpec( + seq_lens=seq_lens, query_lens=query_lens), + block_size=16, + device=device) + return split_decodes_and_prefills(common_metadata, + decode_threshold=decode_threshold, + require_uniform=require_uniform) + + +def test_split_decodes_and_prefills_nonuniform_all_ones(): + query_lens = [1, 1, 1] + num_decodes, num_prefills, num_decode_tokens, num_prefill_tokens = ( + apply_split_decodes_and_prefills(query_lens, 1, False)) + assert num_decodes == 3 + assert num_prefills == 0 + assert num_decode_tokens == 3 + assert num_prefill_tokens == 0 + + +def test_split_decodes_and_prefills_nonuniform_all_short_decodes(): + query_lens = [1, 2, 1, 3, 2, 1, 2] + num_decodes, num_prefills, num_decode_tokens, num_prefill_tokens = ( + apply_split_decodes_and_prefills(query_lens, 3, False)) + assert num_decodes == 7 + assert num_prefills == 0 + assert num_decode_tokens == sum(query_lens) + assert num_prefill_tokens == 0 + + +def test_split_decodes_and_prefills_nonuniform_all_prefills(): + query_lens = [4, 5, 6, 7] + num_decodes, num_prefills, num_decode_tokens, num_prefill_tokens = ( + apply_split_decodes_and_prefills(query_lens, 3, False)) + assert num_decodes == 0 + assert num_prefills == 4 + assert num_decode_tokens == 0 + assert num_prefill_tokens == sum(query_lens) + + +def test_split_decodes_and_prefills_nonuniform_mixed_batch(): + query_lens = [2, 1, 3, 4, 5, 6, 7, 8] + num_decodes, num_prefills, num_decode_tokens, num_prefill_tokens = ( + apply_split_decodes_and_prefills(query_lens, 4, False)) + assert num_decodes == 4 # 2, 1, 3, 4 are all <= 4 + assert num_prefills == 4 # 5, 6, 7, 8 are all > 4 + assert num_decode_tokens == 10 # 2 + 1 + 3 + 4 + assert num_prefill_tokens == 26 # 5 + 6 + 7 + 8 + + +def test_split_decodes_and_prefills_uniform_all_ones(): + query_lens = [1, 1, 1] + num_decodes, num_prefills, num_decode_tokens, num_prefill_tokens = ( + apply_split_decodes_and_prefills(query_lens, 1, True)) + assert num_decodes == 3 + assert num_prefills == 0 + assert num_decode_tokens == 3 + assert num_prefill_tokens == 0 + + +def test_split_decodes_and_prefills_uniform_all_short_decodes(): + query_lens = [2, 2, 1, 3, 2, 1, 2] + num_decodes, num_prefills, num_decode_tokens, num_prefill_tokens = ( + apply_split_decodes_and_prefills(query_lens, 3, True)) + assert num_decodes == 2 + assert num_prefills == 5 + assert num_decode_tokens == 4 + assert num_prefill_tokens == (1 + 3 + 2 + 1 + 2) + + +def test_split_decodes_and_prefills_uniform_all_prefills(): + query_lens = [4, 5, 6, 7] + num_decodes, num_prefills, num_decode_tokens, num_prefill_tokens = ( + apply_split_decodes_and_prefills(query_lens, 3, True)) + assert num_decodes == 0 + assert num_prefills == 4 + assert num_decode_tokens == 0 + assert num_prefill_tokens == sum(query_lens) + + +def test_split_decodes_and_prefills_uniform_mixed_batch_all_uniform_decodes(): + query_lens = [2, 2, 2, 4, 5, 6, 7, 8] + num_decodes, num_prefills, num_decode_tokens, num_prefill_tokens = ( + apply_split_decodes_and_prefills(query_lens, 4, True)) + assert num_decodes == 3 # 2, 2, 2 are all <= 4 and uniform + assert num_prefills == 5 # 4, 5, 6, 7, 8 are all > 4 + assert num_decode_tokens == 6 # 2 + 2 + 2 + assert num_prefill_tokens == 30 # 4 + 5 + 6 + 7 + 8 + + +def test_split_decodes_and_prefills_uniform_mixed_batch_non_uniform_decodes(): + query_lens = [2, 1, 2, 4, 5, 6, 7, 8] + num_decodes, num_prefills, num_decode_tokens, num_prefill_tokens = ( + apply_split_decodes_and_prefills(query_lens, 4, True)) + assert num_decodes == 1 # only the first 2 is taken as decode + assert num_prefills == 7 # 1, 2, 4, 5, 6, 7, 8 are all > 4 or non-uniform + assert num_decode_tokens == 2 # only the first 2 + assert num_prefill_tokens == (sum(query_lens) - 2) # rest of the tokens + + @pytest.mark.parametrize( "seq_lens,query_lens,split_point,expected_first_reqs,expected_second_reqs", [ diff --git a/vllm/utils/flashinfer.py b/vllm/utils/flashinfer.py index 2179bddae243..ebc7a56ff906 100644 --- a/vllm/utils/flashinfer.py +++ b/vllm/utils/flashinfer.py @@ -181,6 +181,12 @@ def force_use_trtllm_attention() -> Optional[bool]: return _force_use_trtllm_attention(envs.VLLM_USE_TRTLLM_ATTENTION) +def can_use_trtllm_attention(num_qo_heads: int, num_kv_heads: int) -> bool: + """Check if the current configuration supports TRTLLM attention.""" + has_trtllm = supports_trtllm_attention() + return has_trtllm and (num_qo_heads % num_kv_heads == 0) + + def use_trtllm_attention( num_qo_heads: int, num_kv_heads: int, @@ -188,7 +194,9 @@ def use_trtllm_attention( max_seq_len: int, kv_cache_dtype: str, q_dtype: torch.dtype, + is_prefill: bool, has_sinks: bool = False, + has_spec: bool = False, ) -> bool: """Return ``True`` if TRTLLM attention is used.""" force_use_trtllm = force_use_trtllm_attention() @@ -214,6 +222,12 @@ def use_trtllm_attention( ) return False + if has_spec and not is_prefill: + # Speculative decoding requires TRTLLM attention for decodes + logger.info_once( + "Using TRTLLM attention (enabled for speculative decoding).") + return True + # Must use TRTLLM attention if query is FP8 quantized if q_dtype == current_platform.fp8_dtype(): if has_sinks: @@ -391,6 +405,7 @@ def flashinfer_disable_q_quantization() -> bool: "has_flashinfer_cutlass_fused_moe", "has_nvidia_artifactory", "supports_trtllm_attention", + "can_use_trtllm_attention", "use_trtllm_attention", "flashinfer_disable_q_quantization", "flashinfer_scaled_fp4_mm", diff --git a/vllm/v1/attention/backends/flashinfer.py b/vllm/v1/attention/backends/flashinfer.py index 1a5c171430bc..891108f961b5 100755 --- a/vllm/v1/attention/backends/flashinfer.py +++ b/vllm/v1/attention/backends/flashinfer.py @@ -25,7 +25,8 @@ from vllm.platforms import current_platform from vllm.triton_utils import tl, triton from vllm.utils import cdiv, is_pin_memory_available -from vllm.utils.flashinfer import (flashinfer_disable_q_quantization, +from vllm.utils.flashinfer import (can_use_trtllm_attention, + flashinfer_disable_q_quantization, supports_trtllm_attention, use_trtllm_attention) from vllm.v1.attention.backends.flash_attn import use_cascade_attention @@ -223,6 +224,7 @@ class FlashInferMetadata: # For flashinfer trtllm batch decode max_q_len: int + max_q_len_prefill: int max_seq_len: int seq_lens: torch.Tensor block_table_tensor: torch.Tensor @@ -250,7 +252,7 @@ class FlashInferMetadataBuilder(AttentionMetadataBuilder[FlashInferMetadata]): cudagraph_support: ClassVar[AttentionCGSupport] = \ AttentionCGSupport.UNIFORM_SINGLE_TOKEN_DECODE - reorder_batch_threshold: ClassVar[int] = 1 + reorder_batch_threshold: int = 1 def __init__(self, kv_cache_spec: AttentionSpec, layer_names: list[str], vllm_config: VllmConfig, device: torch.device): @@ -302,6 +304,10 @@ def __init__(self, kv_cache_spec: AttentionSpec, layer_names: list[str], else: self.q_data_type = self.model_config.dtype + supports_spec_as_decode = \ + can_use_trtllm_attention(self.num_qo_heads, self.num_kv_heads) + self._init_reorder_batch_threshold(1, supports_spec_as_decode) + self._cascade_wrapper = None # Wrapper for cascade attention # Global hyperparameters shared by all attention layers @@ -416,7 +422,8 @@ def build(self, num_actual_tokens = common_attn_metadata.num_actual_tokens num_decodes, num_prefills, num_decode_tokens, num_prefill_tokens =\ split_decodes_and_prefills(common_attn_metadata, - decode_threshold=self.reorder_batch_threshold) + decode_threshold=self.reorder_batch_threshold, + require_uniform=True) page_size = self.page_size max_q_len = common_attn_metadata.max_query_len @@ -491,20 +498,25 @@ def build(self, paged_kv_last_page_len_np, ) + uses_spec_reorder = self.reorder_batch_threshold > 1 prefill_use_trtllm = use_trtllm_attention(self.num_qo_heads, self.num_kv_heads, num_prefill_tokens, max_seq_len, self.cache_dtype, self.q_data_type, - has_sinks=self.has_sinks) + is_prefill=True, + has_sinks=self.has_sinks, + has_spec=uses_spec_reorder) decode_use_trtllm = use_trtllm_attention(self.num_qo_heads, self.num_kv_heads, num_decode_tokens, max_seq_len, self.cache_dtype, self.q_data_type, - has_sinks=self.has_sinks) + is_prefill=False, + has_sinks=self.has_sinks, + has_spec=uses_spec_reorder) if self.has_sinks and not (prefill_use_trtllm and decode_use_trtllm): raise NotImplementedError( "FlashInfer backend currently does not support attention " @@ -521,6 +533,7 @@ def build(self, q_data_type=self.q_data_type, slot_mapping=common_attn_metadata.slot_mapping, max_q_len=max_q_len, + max_q_len_prefill=max_q_len, max_seq_len=max_seq_len, seq_lens=seq_lens, block_table_tensor=block_table_tensor, @@ -577,6 +590,15 @@ def build(self, qo_indptr_cpu = qo_indptr_cpu[prefill_start:] - qo_indptr_cpu[ prefill_start] paged_kv_indptr_cpu = paged_kv_indptr_cpu[prefill_start:] + + # Recompute max_q_len for the slice of requests we are using + # for prefills. This can be different from max_q_len when + # we have a non-uniform batch with some short decodes offloaded + # to the prefill pathway + query_lens_prefill = qo_indptr_cpu[1:] - qo_indptr_cpu[:-1] + attn_metadata.max_q_len_prefill = \ + int(query_lens_prefill.max().item()) + if not attn_metadata.prefill_use_trtllm: attn_metadata.prefill_wrapper.plan( qo_indptr_cpu, @@ -607,7 +629,7 @@ def build(self, num_decodes <= self._decode_cudagraph_max_bs) if use_cudagraph: num_input_tokens = ( - self.vllm_config.pad_for_cudagraph(num_decodes)) + self.vllm_config.pad_for_cudagraph(num_decode_tokens)) # Carefully fulfill the padding region with reasonable value # on cpu. # Make sure paged_kv_indptr_cpu is not decreasing @@ -621,7 +643,7 @@ def build(self, num_decodes:num_input_tokens].fill_(1) else: - num_input_tokens = num_decodes + num_input_tokens = num_decode_tokens attn_metadata.decode_wrapper = self._get_decode_wrapper( num_input_tokens, use_cudagraph) @@ -842,6 +864,9 @@ def forward( output.copy_(attn_metadata.cascade_wrapper.run(query, kv_cache)) return output + # When using spec decoding, num_decodes can be < num_decode_tokens + # because some decode requests may have more than one query token. + num_decodes = attn_metadata.num_decodes num_decode_tokens = attn_metadata.num_decode_tokens num_prefill_tokens = attn_metadata.num_prefill_tokens @@ -874,8 +899,8 @@ def forward( prefill_query = prefill_query.contiguous() workspace_buffer = _get_trtllm_gen_workspace_buffer() block_tables_prefill = attn_metadata.block_table_tensor[ - num_decode_tokens:] - seq_lens_prefill = attn_metadata.seq_lens[num_decode_tokens:] + num_decodes:] + seq_lens_prefill = attn_metadata.seq_lens[num_decodes:] # This path needs to be enabled with VLLM_KV_CACHE_LAYOUT = HND assert get_kv_cache_layout() == "HND" @@ -919,7 +944,7 @@ def forward( workspace_buffer=workspace_buffer, block_tables=mock_block_table, seq_lens=seq_lens_prefill, - max_q_len=attn_metadata.max_q_len, + max_q_len=attn_metadata.max_q_len_prefill, max_kv_len=attn_metadata.max_seq_len, bmm1_scale=self.bmm1_scale, bmm2_scale=self.bmm2_scale, @@ -976,6 +1001,14 @@ def forward( assert self.o_sf_scale is None out = output[:num_decode_tokens] + if num_decode_tokens % attn_metadata.num_decodes != 0: + # This gets triggered when the dummy_run forces + # attention to be initialized with q_len = 0 + q_len_per_req = 1 + else: + q_len_per_req = \ + num_decode_tokens // attn_metadata.num_decodes + trtllm_batch_decode_with_kv_cache( query=decode_query, kv_cache=kv_cache_permute, @@ -989,7 +1022,7 @@ def forward( sinks=self.sinks, o_sf_scale=self.o_sf_scale, out=out, - ) + q_len_per_req=q_len_per_req) return output_padded diff --git a/vllm/v1/attention/backends/gdn_attn.py b/vllm/v1/attention/backends/gdn_attn.py index 06a87a4a3c8b..843958bc79de 100644 --- a/vllm/v1/attention/backends/gdn_attn.py +++ b/vllm/v1/attention/backends/gdn_attn.py @@ -2,7 +2,7 @@ # SPDX-FileCopyrightText: Copyright contributors to the vLLM project """Backend for GatedDeltaNet attention.""" from dataclasses import dataclass -from typing import ClassVar, Optional +from typing import Optional import torch @@ -62,7 +62,7 @@ class GDNAttentionMetadataBuilder( cudagraph_support = AttentionCGSupport.UNIFORM_BATCH - reorder_batch_threshold: ClassVar[int] = 1 + reorder_batch_threshold: int = 1 def __init__(self, kv_cache_spec: AttentionSpec, layer_names: list[str], vllm_config: VllmConfig, device: torch.device): @@ -76,7 +76,7 @@ def __init__(self, kv_cache_spec: AttentionSpec, layer_names: list[str], else: self.num_spec = 0 self.use_spec_decode = self.num_spec > 0 - self.reorder_batch_threshold = self.num_spec + 1 # type: ignore[misc] + self._init_reorder_batch_threshold(1, self.use_spec_decode) self.use_full_cuda_graph = \ self.compilation_config.cudagraph_mode.has_full_cudagraphs() diff --git a/vllm/v1/attention/backends/linear_attn.py b/vllm/v1/attention/backends/linear_attn.py index 3ff201d83a79..0dc62d668020 100644 --- a/vllm/v1/attention/backends/linear_attn.py +++ b/vllm/v1/attention/backends/linear_attn.py @@ -1,7 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project from dataclasses import dataclass -from typing import ClassVar import torch @@ -35,7 +34,7 @@ class LinearAttentionMetadata: class LinearAttentionMetadataBuilder( AttentionMetadataBuilder[LinearAttentionMetadata]): - reorder_batch_threshold: ClassVar[int] = 1 + reorder_batch_threshold: int = 1 def __init__(self, kv_cache_spec: AttentionSpec, layer_names: list[str], vllm_config: VllmConfig, device: torch.device): diff --git a/vllm/v1/attention/backends/mamba_attn.py b/vllm/v1/attention/backends/mamba_attn.py index 9970331a6042..ef342ce421ae 100644 --- a/vllm/v1/attention/backends/mamba_attn.py +++ b/vllm/v1/attention/backends/mamba_attn.py @@ -16,7 +16,7 @@ class BaseMambaAttentionMetadataBuilder(AttentionMetadataBuilder[M], abc.ABC): - reorder_batch_threshold: ClassVar[int] = 1 + reorder_batch_threshold: int = 1 cudagraph_support: ClassVar[AttentionCGSupport] = \ AttentionCGSupport.UNIFORM_SINGLE_TOKEN_DECODE diff --git a/vllm/v1/attention/backends/mla/common.py b/vllm/v1/attention/backends/mla/common.py index e84f2d89943e..3e8dba14ee2e 100755 --- a/vllm/v1/attention/backends/mla/common.py +++ b/vllm/v1/attention/backends/mla/common.py @@ -190,7 +190,7 @@ import functools from abc import abstractmethod from dataclasses import dataclass, field -from typing import ClassVar, Generic, Optional, TypeVar, Union +from typing import Generic, Optional, TypeVar, Union import torch from tqdm import tqdm @@ -434,7 +434,7 @@ class MLACommonMetadataBuilder(AttentionMetadataBuilder[M]): NOTE: Please read the comment at the top of the file before trying to understand this class """ - reorder_batch_threshold: ClassVar[int] = 1 + reorder_batch_threshold: int = 1 @staticmethod def determine_chunked_prefill_workspace_size( diff --git a/vllm/v1/attention/backends/mla/flashattn_mla.py b/vllm/v1/attention/backends/mla/flashattn_mla.py index 4ad9a13b61d8..652b1cdb6b76 100644 --- a/vllm/v1/attention/backends/mla/flashattn_mla.py +++ b/vllm/v1/attention/backends/mla/flashattn_mla.py @@ -64,7 +64,7 @@ class FlashAttnMLAMetadataBuilder( cudagraph_support: ClassVar[AttentionCGSupport] = \ AttentionCGSupport.UNIFORM_BATCH - reorder_batch_threshold: ClassVar[int] = 512 + reorder_batch_threshold: int = 512 def __init__(self, kv_cache_spec: AttentionSpec, layer_names: list[str], vllm_config: VllmConfig, device: torch.device): @@ -99,7 +99,7 @@ def __init__(self, kv_cache_spec: AttentionSpec, layer_names: list[str], # TODO(lucas): Until we add support for the DCP custom masking we need # to restrict decodes to q_len == 1 when DCP is enabled. - self.__class__.reorder_batch_threshold = 1 \ + self.reorder_batch_threshold = 1 \ if get_dcp_group().world_size > 1 else self.reorder_batch_threshold def _schedule_decode(self, num_reqs, cu_query_lens, max_query_len, seqlens, diff --git a/vllm/v1/attention/backends/short_conv_attn.py b/vllm/v1/attention/backends/short_conv_attn.py index 428e40965979..df7f0d2310ab 100644 --- a/vllm/v1/attention/backends/short_conv_attn.py +++ b/vllm/v1/attention/backends/short_conv_attn.py @@ -1,7 +1,7 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project from dataclasses import dataclass -from typing import ClassVar, Optional +from typing import Optional import torch @@ -41,7 +41,7 @@ class ShortConvAttentionMetadata: class ShortConvAttentionMetadataBuilder( AttentionMetadataBuilder[ShortConvAttentionMetadata]): - reorder_batch_threshold: ClassVar[int] = 1 + reorder_batch_threshold: int = 1 def __init__(self, kv_cache_spec: AttentionSpec, layer_names: list[str], vllm_config: VllmConfig, device: torch.device): diff --git a/vllm/v1/attention/backends/utils.py b/vllm/v1/attention/backends/utils.py index f837439f953e..0c6e0dfefd8a 100644 --- a/vllm/v1/attention/backends/utils.py +++ b/vllm/v1/attention/backends/utils.py @@ -236,7 +236,7 @@ class AttentionMetadataBuilder(abc.ABC, Generic[M]): # Does this backend/builder reorder the batch? # If not, set this to None. Otherwise set it to the query # length that will be pulled into the front of the batch. - reorder_batch_threshold: ClassVar[Optional[int]] = None + reorder_batch_threshold: Optional[int] = None @abstractmethod def __init__(self, kv_cache_spec: AttentionSpec, layer_names: list[str], @@ -246,6 +246,22 @@ def __init__(self, kv_cache_spec: AttentionSpec, layer_names: list[str], self.vllm_config = vllm_config self.device = device + def _init_reorder_batch_threshold( + self, + reorder_batch_threshold: int = 1, + supports_spec_as_decode: bool = False) -> None: + self.reorder_batch_threshold = reorder_batch_threshold + if self.reorder_batch_threshold is not None \ + and supports_spec_as_decode: + # If the backend supports spec-as-decode kernels, then we can set + # the reorder_batch_threshold based on the number of speculative + # tokens from the config. + speculative_config = self.vllm_config.speculative_config + if (speculative_config is not None + and speculative_config.num_speculative_tokens is not None): + self.reorder_batch_threshold = \ + 1 + speculative_config.num_speculative_tokens + @abstractmethod def build(self, common_prefix_len: int, @@ -703,9 +719,9 @@ def subclass_attention_backend( def split_decodes_and_prefills( - common_attn_metadata: CommonAttentionMetadata, - decode_threshold: int = 1, -) -> tuple[int, int, int, int]: + common_attn_metadata: CommonAttentionMetadata, + decode_threshold: int = 1, + require_uniform: bool = False) -> tuple[int, int, int, int]: """ Assuming a reordered batch, finds the boundary between prefill and decode requests. @@ -714,6 +730,9 @@ def split_decodes_and_prefills( common_attn_metadata: CommonAttentionMetadata object containing the batch metadata. decode_threshold: The maximum query length to be considered a decode. + require_uniform: If True, requires that all decode requests have the + same query length. When set, some queries may be considered prefills + even if they are <= decode_threshold, in order to ensure uniformity. Returns: num_decodes: The number of decode requests. @@ -726,11 +745,20 @@ def split_decodes_and_prefills( num_tokens = common_attn_metadata.num_actual_tokens query_start_loc = common_attn_metadata.query_start_loc_cpu - if max_query_len <= decode_threshold: + if max_query_len <= decode_threshold and \ + (not require_uniform or decode_threshold <= 1): return num_reqs, 0, num_tokens, 0 query_lens = query_start_loc[1:] - query_start_loc[:-1] - is_prefill = query_lens > decode_threshold + if query_lens[0].item() > decode_threshold: + # first request is not decode, so no decode requests + return 0, num_reqs, 0, num_tokens + + if require_uniform: + is_prefill = query_lens != query_lens[0] + else: + is_prefill = query_lens > decode_threshold + if not torch.any(is_prefill): return num_reqs, 0, num_tokens, 0 @@ -806,6 +834,38 @@ def reorder_batch_to_split_decodes_and_prefills( return modified_batch +def reshape_query_for_spec_decode(query: torch.Tensor, + batch_size: int) -> torch.Tensor: + """ + Reshapes the query tensor for the specified batch size, so that + it has shape (batch_size, seq_len, num_heads, head_dim). + """ + assert query.dim() == 3, f"query must be 3D, got {query.dim()}D" + total_tokens = query.shape[0] + num_heads = query.shape[1] + head_dim = query.shape[2] + assert total_tokens % batch_size == 0, ( + f"{total_tokens=} is not divisible by {batch_size=}") + seq_len = total_tokens // batch_size + return query.view(batch_size, seq_len, num_heads, head_dim) + + +def reshape_attn_output_for_spec_decode( + attn_output: torch.Tensor) -> torch.Tensor: + """ + Reshapes the attention output tensor, so that + the batch_size and seq_len dimensions are combined. + """ + if attn_output.dim() == 3: + # Already in the correct shape + return attn_output + assert attn_output.dim() == 4, \ + f"attn_output must be 4D, got {attn_output.dim()}D" + total_tokens = attn_output.shape[0] * attn_output.shape[1] + return attn_output.view(total_tokens, attn_output.shape[2], + attn_output.shape[3]) + + KV_SHARING_FAST_PREFILL_METADATA_FIELDS = [ ('logits_indices_padded', Optional[torch.Tensor], None), ('num_logits_indices', int, 0), diff --git a/vllm/v1/attention/backends/xformers.py b/vllm/v1/attention/backends/xformers.py index a6ca33491235..d5a6c4c1db52 100644 --- a/vllm/v1/attention/backends/xformers.py +++ b/vllm/v1/attention/backends/xformers.py @@ -3,7 +3,7 @@ """Attention layer with XFormersAttention.""" from dataclasses import dataclass -from typing import TYPE_CHECKING, ClassVar, Optional +from typing import TYPE_CHECKING, Optional import torch @@ -197,7 +197,7 @@ def decode_metadata(self) -> Optional["XFormersAttentionMetadata"]: class XFormersAttentionMetadataBuilder( AttentionMetadataBuilder[XFormersAttentionMetadata]): - reorder_batch_threshold: ClassVar[int] = 1 + reorder_batch_threshold: int = 1 def __init__( self, diff --git a/vllm/v1/spec_decode/eagle.py b/vllm/v1/spec_decode/eagle.py index a9e0a38fe341..5cae7df70470 100644 --- a/vllm/v1/spec_decode/eagle.py +++ b/vllm/v1/spec_decode/eagle.py @@ -3,7 +3,7 @@ import ast from dataclasses import replace from importlib.util import find_spec -from typing import Optional, Protocol +from typing import Optional import numpy as np import torch @@ -37,17 +37,6 @@ PADDING_SLOT_ID = -1 -class EagleAttentionMetadata(Protocol): - # Required attributes - num_actual_tokens: int - max_query_len: int - query_start_loc: torch.Tensor - max_seq_len: int - seq_lens: torch.Tensor - block_table: torch.Tensor - slot_mapping: torch.Tensor - - class EagleProposer: def __init__( @@ -120,7 +109,7 @@ def __init__( with_numpy=True) # Determine allowed attention backends once during initialization. - self.allowed_attn_types: tuple[type, ...] + self.allowed_attn_types: Optional[tuple] = None if current_platform.is_rocm(): rocm_types = [TritonAttentionMetadata, FlashAttentionMetadata] # vllm.v1.attention.backends.rocm_aiter_fa is an optional backend @@ -129,9 +118,6 @@ def __init__( AiterFlashAttentionMetadata) rocm_types.append(AiterFlashAttentionMetadata) self.allowed_attn_types = tuple(rocm_types) - else: - self.allowed_attn_types = (FlashAttentionMetadata, - TreeAttentionMetadata) # Parse the speculative token tree. spec_token_tree = self.speculative_config.speculative_token_tree @@ -266,7 +252,8 @@ def propose( draft_token_ids = logits.argmax(dim=-1) - if not isinstance(attn_metadata, self.allowed_attn_types): + if self.allowed_attn_types is not None and \ + not isinstance(attn_metadata, self.allowed_attn_types): raise ValueError( f"Unsupported attention metadata type for speculative " "decoding with num_speculative_tokens > 1: " From d747c2ef18f9d9174bf40c58ae313ab31cdba305 Mon Sep 17 00:00:00 2001 From: Corey Lowman Date: Tue, 23 Sep 2025 23:16:13 -0400 Subject: [PATCH 323/518] [Perf] Fix jit compiles at runtime of fla gated delta rule (#25432) Co-authored-by: Michael Goin --- vllm/model_executor/layers/fla/ops/fused_recurrent.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm/model_executor/layers/fla/ops/fused_recurrent.py b/vllm/model_executor/layers/fla/ops/fused_recurrent.py index b278e3741574..98437340fd24 100644 --- a/vllm/model_executor/layers/fla/ops/fused_recurrent.py +++ b/vllm/model_executor/layers/fla/ops/fused_recurrent.py @@ -40,8 +40,8 @@ def fused_recurrent_gated_delta_rule_fwd_kernel( ssm_state_indices, num_accepted_tokens, scale, - N: tl.constexpr, # num of sequences - T: tl.constexpr, # num of tokens + N: tl.int64, # num of sequences + T: tl.int64, # num of tokens B: tl.constexpr, H: tl.constexpr, HV: tl.constexpr, From 5caaeb714ce3fd08de9c2e87848b4825bb4b676d Mon Sep 17 00:00:00 2001 From: Ben Browning Date: Tue, 23 Sep 2025 23:20:38 -0400 Subject: [PATCH 324/518] [Bugfix] [Frontend] Cleanup gpt-oss non-streaming chat tool calls (#25514) Signed-off-by: Ben Browning --- tests/entrypoints/openai/test_serving_chat.py | 1 + tests/tool_use/test_openai_tool_parser.py | 76 ++++++++++++++++++- vllm/entrypoints/openai/serving_chat.py | 13 ++-- .../openai/tool_parsers/openai_tool_parser.py | 24 +++++- 4 files changed, 102 insertions(+), 12 deletions(-) diff --git a/tests/entrypoints/openai/test_serving_chat.py b/tests/entrypoints/openai/test_serving_chat.py index 8e68699e5904..b773061b3092 100644 --- a/tests/entrypoints/openai/test_serving_chat.py +++ b/tests/entrypoints/openai/test_serving_chat.py @@ -194,6 +194,7 @@ async def test_gpt_oss_multi_turn_chat(gptoss_client: OpenAI, assert tc.function is not None and tc.function.name == "get_current_weather" args1 = tc.function.arguments assert args1 is not None and len(args1) > 0 + assert not first_msg.content messages.append({"role": "assistant", "content": args1}) messages.append({ diff --git a/tests/tool_use/test_openai_tool_parser.py b/tests/tool_use/test_openai_tool_parser.py index 0192c7d2765c..2551c41c6275 100644 --- a/tests/tool_use/test_openai_tool_parser.py +++ b/tests/tool_use/test_openai_tool_parser.py @@ -70,7 +70,12 @@ def test_extract_tool_calls_no_tools(openai_tool_parser, harmony_encoding): assert extracted_info.content == "This is a test" -def test_extract_tool_calls_single_tool(openai_tool_parser, harmony_encoding): +@pytest.mark.parametrize("tool_args", [ + '{"location": "Tokyo"}', + '{\n"location": "Tokyo"\n}', +]) +def test_extract_tool_calls_single_tool(openai_tool_parser, harmony_encoding, + tool_args): convo = Conversation.from_messages([ Message.from_role_and_content(Role.USER, "What is the weather in Tokyo?"), @@ -80,7 +85,7 @@ def test_extract_tool_calls_single_tool(openai_tool_parser, harmony_encoding): ).with_channel("analysis"), Message.from_role_and_content( Role.ASSISTANT, - '{"location": "Tokyo"}').with_channel("commentary").with_recipient( + tool_args).with_channel("commentary").with_recipient( "functions.get_current_weather").with_content_type("json"), ]) token_ids = harmony_encoding.render_conversation_for_completion( @@ -121,6 +126,17 @@ def test_extract_tool_calls_multiple_tools( Role.ASSISTANT, '{"location": "Tokyo"}').with_channel("commentary").with_recipient( "functions.get_user_location").with_content_type("json"), + Message.from_role_and_content( + Role.ASSISTANT, '{"location": "Tokyo"}').with_channel( + "commentary").with_recipient("functions.no_content_type"), + Message.from_role_and_content(Role.ASSISTANT, "foo").with_channel( + "commentary").with_recipient("functions.not_json_no_content_type"), + Message.from_role_and_content( + Role.ASSISTANT, '{}').with_channel("commentary").with_recipient( + "functions.empty_args").with_content_type("json"), + Message.from_role_and_content( + Role.ASSISTANT, '').with_channel("commentary").with_recipient( + "functions.no_args").with_content_type("json"), ]) token_ids = harmony_encoding.render_conversation_for_completion( convo, @@ -141,7 +157,63 @@ def test_extract_tool_calls_multiple_tools( ToolCall(function=FunctionCall( name="get_user_location", arguments=json.dumps({"location": "Tokyo"}), + )), + ToolCall(function=FunctionCall( + name="no_content_type", + arguments=json.dumps({"location": "Tokyo"}), + )), + ToolCall(function=FunctionCall( + name="not_json_no_content_type", + arguments="foo", + )), + ToolCall(function=FunctionCall( + name="empty_args", + arguments=json.dumps({}), + )), + ToolCall(function=FunctionCall( + name="no_args", + arguments="", )) ] assert_tool_calls(extracted_info.tool_calls, expected_tool_calls) assert extracted_info.content is None + + +def test_extract_tool_calls_with_content( + openai_tool_parser, + harmony_encoding, +): + final_content = "This tool call will get the weather." + convo = Conversation.from_messages([ + Message.from_role_and_content( + Role.USER, "What is the weather in Tokyo based on where I'm at?"), + Message.from_role_and_content( + Role.ASSISTANT, + 'User asks: "What is the weather in Tokyo?" based on their location. We need to use get_current_weather tool and get_user_location tool.', # noqa: E501 + ).with_channel("analysis"), + Message.from_role_and_content( + Role.ASSISTANT, + '{"location": "Tokyo"}').with_channel("commentary").with_recipient( + "functions.get_current_weather").with_content_type("json"), + Message.from_role_and_content(Role.ASSISTANT, + final_content).with_channel("final"), + ]) + token_ids = harmony_encoding.render_conversation_for_completion( + convo, + Role.ASSISTANT, + ) + + extracted_info = openai_tool_parser.extract_tool_calls( + "", + request=None, + token_ids=token_ids, + ) + assert extracted_info.tools_called + expected_tool_calls = [ + ToolCall(function=FunctionCall( + name="get_current_weather", + arguments=json.dumps({"location": "Tokyo"}), + )), + ] + assert_tool_calls(extracted_info.tool_calls, expected_tool_calls) + assert extracted_info.content == final_content diff --git a/vllm/entrypoints/openai/serving_chat.py b/vllm/entrypoints/openai/serving_chat.py index 16564214e353..0780448ad733 100644 --- a/vllm/entrypoints/openai/serving_chat.py +++ b/vllm/entrypoints/openai/serving_chat.py @@ -1186,6 +1186,10 @@ async def chat_completion_full_generator( logprobs = None if self.use_harmony: + reasoning_content, content, _ = parse_chat_output(token_ids) + if not request.include_reasoning: + reasoning_content = None + if self.tool_parser is not None: tool_parser = self.tool_parser(tokenizer) # NOTE: We use token_ids for openai tool parser @@ -1194,10 +1198,7 @@ async def chat_completion_full_generator( request=request, token_ids=token_ids, # type: ignore ) - reasoning_content, content = None, tool_call_info.content - if request.include_reasoning: - reasoning_content, content, _ = parse_chat_output( - token_ids) + content = tool_call_info.content message = ChatMessage( role=role, reasoning_content=reasoning_content, @@ -1205,10 +1206,6 @@ async def chat_completion_full_generator( tool_calls=tool_call_info.tool_calls, ) else: - reasoning_content, content, _ = parse_chat_output( - token_ids) - if not request.include_reasoning: - reasoning_content = None message = ChatMessage( role=role, reasoning_content=reasoning_content, diff --git a/vllm/entrypoints/openai/tool_parsers/openai_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/openai_tool_parser.py index c5d59514b944..1729fdbc9971 100644 --- a/vllm/entrypoints/openai/tool_parsers/openai_tool_parser.py +++ b/vllm/entrypoints/openai/tool_parsers/openai_tool_parser.py @@ -2,6 +2,7 @@ # SPDX-FileCopyrightText: Copyright contributors to the vLLM project from __future__ import annotations +import json from collections.abc import Sequence from typing import TYPE_CHECKING @@ -12,10 +13,13 @@ FunctionCall, ToolCall) from vllm.entrypoints.openai.tool_parsers.abstract_tool_parser import ( ToolParser, ToolParserManager) +from vllm.logger import init_logger if TYPE_CHECKING: from vllm.transformers_utils.tokenizer import AnyTokenizer +logger = init_logger(__name__) + @ToolParserManager.register_module("openai") class OpenAIToolParser(ToolParser): @@ -40,17 +44,33 @@ def extract_tool_calls( if len(parser.messages) > 0: for msg in parser.messages: + if len(msg.content) < 1: + continue + msg_text = msg.content[0].text if msg.recipient and msg.recipient.startswith("functions."): + # If no content-type is given assume JSON, as that's the + # most common case with gpt-oss models. + if not msg.content_type or "json" in msg.content_type: + # load and dump the JSON text to check validity and + # remove any extra newlines or other odd formatting + try: + tool_args = json.dumps(json.loads(msg_text)) + except json.JSONDecodeError: + logger.exception( + "Error decoding JSON tool call from response.") + tool_args = msg_text + else: + tool_args = msg_text tool_calls.append( ToolCall( type="function", function=FunctionCall( name=msg.recipient.split("functions.")[1], - arguments=msg.content[0].text, + arguments=tool_args, ), )) elif msg.channel == "final": - final_content = msg.content[0].text + final_content = msg_text return ExtractedToolCallInformation( tools_called=len(tool_calls) > 0, From 190c45a6af666ce2be25a73aa68301e2b1de309e Mon Sep 17 00:00:00 2001 From: Chengji Yao Date: Tue, 23 Sep 2025 22:18:08 -0700 Subject: [PATCH 325/518] [TPU][Bugfix] fix the missing apply_model in tpu worker (#25526) Signed-off-by: Chengji Yao --- tests/v1/tpu/test_tpu_int8.py | 6 +----- vllm/v1/worker/tpu_worker.py | 8 +++++++- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/tests/v1/tpu/test_tpu_int8.py b/tests/v1/tpu/test_tpu_int8.py index 991070dc9239..f39a8021a29e 100644 --- a/tests/v1/tpu/test_tpu_int8.py +++ b/tests/v1/tpu/test_tpu_int8.py @@ -48,13 +48,9 @@ def test_model_tpu_int8(vllm_runner, model: str, dtype: str, max_tokens: int, prompts = [ "A robot may not injure a human being", - "It is only with the heart that one can see rightly;", - "The greatest glory in living lies not in never falling,", ] answers = [ - "or, being injured, not kill, except in", - "without the heart, one can only see wrongly.", - "but in rising every time we fall. - Nelson" + "or kill a human being", ] with vllm_runner(model, dtype=dtype, hf_overrides=hf_overrides) as vllm: diff --git a/vllm/v1/worker/tpu_worker.py b/vllm/v1/worker/tpu_worker.py index fc72b954df9c..d4f0a65f2a16 100644 --- a/vllm/v1/worker/tpu_worker.py +++ b/vllm/v1/worker/tpu_worker.py @@ -3,7 +3,7 @@ """A TPU worker class.""" import os -from typing import Any, Optional +from typing import Any, Callable, Optional, TypeVar import torch import torch.distributed @@ -31,6 +31,8 @@ logger = init_logger(__name__) +_R = TypeVar("_R") + if not USE_TPU_COMMONS: logger.info("tpu_commons not found, using vLLM's TPUWorker.") import torch_xla.core.xla_model as xm @@ -333,6 +335,10 @@ def _init_tpu_worker_distributed_environment( def shutdown(self) -> None: self.model_runner.ensure_kv_transfer_shutdown() + def apply_model(self, fn: Callable[[nn.Module], _R]) -> _R: + """Apply a function on the model inside this worker.""" + return fn(self.get_model()) + if USE_TPU_COMMONS: from tpu_commons.worker import TPUWorker as TPUCommonsWorker From fed8a9b107df3e27d57728c6911c7d308b871477 Mon Sep 17 00:00:00 2001 From: Cyrus Leung Date: Wed, 24 Sep 2025 13:32:11 +0800 Subject: [PATCH 326/518] [Misc] Retry HF processing if "Already borrowed" error occurs (#25535) Signed-off-by: DarkLight1337 --- vllm/inputs/registry.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/vllm/inputs/registry.py b/vllm/inputs/registry.py index b5316b6d0574..0aad78b04e34 100644 --- a/vllm/inputs/registry.py +++ b/vllm/inputs/registry.py @@ -1,5 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project +import time from collections.abc import Mapping from dataclasses import dataclass from typing import TYPE_CHECKING, Any, Union @@ -139,6 +140,9 @@ def call_hf_processor( hf_processor: ProcessorMixin, data: Mapping[str, object], kwargs: Mapping[str, object] = {}, + *, + num_tries: int = 1, + max_tries: int = 5, ) -> Union[BatchFeature, JSONTree]: """ Call `hf_processor` on the prompt `data` @@ -180,6 +184,22 @@ def maybe_cast_dtype(x): return cast_output except Exception as exc: + # See https://github.com/huggingface/tokenizers/issues/537 + if (isinstance(exc, RuntimeError) and exc + and exc.args[0] == "Already borrowed" + and num_tries < max_tries): + logger.warning( + "Failed to acquire tokenizer in current thread. " + "Retrying (%d/%d)...", num_tries, max_tries) + time.sleep(0.5) + return self.call_hf_processor( + hf_processor, + data, + kwargs, + num_tries=num_tries + 1, + max_tries=max_tries, + ) + msg = (f"Failed to apply {type(hf_processor).__name__} " f"on data={data} with kwargs={allowed_kwargs}") From 1cbcfb94ded21da2b922b452c1e90baef87e419b Mon Sep 17 00:00:00 2001 From: "Li, Jiang" Date: Wed, 24 Sep 2025 14:21:51 +0800 Subject: [PATCH 327/518] [Bugfix][CPU] Skip unsupported custom op register on CPU (#25534) Signed-off-by: jiang1.li --- .../layers/quantization/utils/fp8_utils.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/vllm/model_executor/layers/quantization/utils/fp8_utils.py b/vllm/model_executor/layers/quantization/utils/fp8_utils.py index 0bc69fe7f930..a4cfc7d6c15c 100644 --- a/vllm/model_executor/layers/quantization/utils/fp8_utils.py +++ b/vllm/model_executor/layers/quantization/utils/fp8_utils.py @@ -130,12 +130,14 @@ def _w8a8_triton_block_scaled_mm_fake( device=qx.device) -direct_register_custom_op( - "w8a8_triton_block_scaled_mm_func", - _w8a8_triton_block_scaled_mm_func, - fake_impl=_w8a8_triton_block_scaled_mm_fake, - dispatch_key="CUDA", -) +# Note: the check can be removed when CPU torch > 2.7 +if not current_platform.is_cpu(): + direct_register_custom_op( + "w8a8_triton_block_scaled_mm_func", + _w8a8_triton_block_scaled_mm_func, + fake_impl=_w8a8_triton_block_scaled_mm_fake, + dispatch_key="CUDA", + ) # TODO fix ROCm->Triton custom path: From 27ec3c78f387f2eaed8c51e749a49972f60225ca Mon Sep 17 00:00:00 2001 From: Isotr0py Date: Wed, 24 Sep 2025 16:03:13 +0800 Subject: [PATCH 328/518] [CI/Build] Fix v1 OOT registration test (#25547) Signed-off-by: Isotr0py --- tests/conftest.py | 4 +-- tests/models/test_oot_registration.py | 2 -- vllm/model_executor/models/registry.py | 35 +++++++++++++------------- 3 files changed, 20 insertions(+), 21 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index dc70c9835959..a50985a465e6 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1079,7 +1079,7 @@ def dummy_llava_path(): local_dir=_dummy_llava_path, ignore_patterns=[ "*.bin", "*.bin.index.json", "*.pt", "*.h5", - "*.msgpack" + "*.msgpack", "*.safetensors" ]) assert os.path.exists(json_path) with open(json_path) as f: @@ -1098,7 +1098,7 @@ def dummy_gemma2_embedding_path(): local_dir=_dummy_gemma2_embedding_path, ignore_patterns=[ "*.bin", "*.bin.index.json", "*.pt", "*.h5", - "*.msgpack" + "*.msgpack", "*.safetensors" ]) assert os.path.exists(json_path) with open(json_path) as f: diff --git a/tests/models/test_oot_registration.py b/tests/models/test_oot_registration.py index 9b376f2a260a..4aa7bb729789 100644 --- a/tests/models/test_oot_registration.py +++ b/tests/models/test_oot_registration.py @@ -42,7 +42,6 @@ def test_oot_registration_text_generation( assert rest == "" -@pytest.mark.skip(reason="This test is skipped because it failed on V1.") @create_new_process_for_each_test() def test_oot_registration_embedding( monkeypatch: pytest.MonkeyPatch, @@ -63,7 +62,6 @@ def test_oot_registration_embedding( image = convert_image_mode(ImageAsset("cherry_blossom").pil_image, "RGB") -@pytest.mark.skip(reason="This test is skipped because it failed on V1.") @create_new_process_for_each_test() def test_oot_registration_multimodal( monkeypatch: pytest.MonkeyPatch, diff --git a/vllm/model_executor/models/registry.py b/vllm/model_executor/models/registry.py index 6ab3fa902c38..ac0ec6ca146c 100644 --- a/vllm/model_executor/models/registry.py +++ b/vllm/model_executor/models/registry.py @@ -483,23 +483,23 @@ def _save_modelinfo_to_cache(self, mi: _ModelInfo, def inspect_model_cls(self) -> _ModelInfo: model_path = Path( __file__).parent / f"{self.module_name.split('.')[-1]}.py" + module_hash = None - assert model_path.exists(), \ - f"Model {self.module_name} expected to be on path {model_path}" - with open(model_path, "rb") as f: - module_hash = hashlib.md5(f.read()).hexdigest() - - mi = self._load_modelinfo_from_cache(module_hash) - if mi is not None: - logger.debug(("Loaded model info " - "for class %s.%s from cache"), self.module_name, - self.class_name) - return mi - else: - logger.debug(("Cache model info " - "for class %s.%s miss. " - "Loading model instead."), self.module_name, - self.class_name) + if model_path.exists(): + with open(model_path, "rb") as f: + module_hash = hashlib.md5(f.read()).hexdigest() + + mi = self._load_modelinfo_from_cache(module_hash) + if mi is not None: + logger.debug(("Loaded model info " + "for class %s.%s from cache"), self.module_name, + self.class_name) + return mi + else: + logger.debug(("Cache model info " + "for class %s.%s miss. " + "Loading model instead."), self.module_name, + self.class_name) # Performed in another process to avoid initializing CUDA mi = _run_in_subprocess( @@ -508,7 +508,8 @@ def inspect_model_cls(self) -> _ModelInfo: self.class_name) # save cache file - self._save_modelinfo_to_cache(mi, module_hash) + if module_hash is not None: + self._save_modelinfo_to_cache(mi, module_hash) return mi From 6488f3481be77bbce66a9ebfcab3f6bc5b961838 Mon Sep 17 00:00:00 2001 From: Cyrus Leung Date: Wed, 24 Sep 2025 16:15:00 +0800 Subject: [PATCH 329/518] [Misc]] Move processing context to multimodal directory (#25548) Signed-off-by: DarkLight1337 --- .../multimodal/processing/test_common.py | 4 +- .../processing/test_tensor_schema.py | 4 +- tests/models/utils.py | 13 +- tests/multimodal/test_processing.py | 4 +- vllm/inputs/__init__.py | 3 - vllm/inputs/registry.py | 206 ---------------- .../models/hyperclovax_vision.py | 6 +- vllm/model_executor/models/llava.py | 7 +- vllm/model_executor/models/mistral3.py | 7 +- vllm/model_executor/models/mllama4.py | 7 +- vllm/model_executor/models/tarsier.py | 6 +- vllm/multimodal/processing.py | 231 +++++++++++++++++- vllm/multimodal/registry.py | 6 +- 13 files changed, 262 insertions(+), 242 deletions(-) delete mode 100644 vllm/inputs/registry.py diff --git a/tests/models/multimodal/processing/test_common.py b/tests/models/multimodal/processing/test_common.py index 0941cc3f608e..4eb8e0cfaa5d 100644 --- a/tests/models/multimodal/processing/test_common.py +++ b/tests/models/multimodal/processing/test_common.py @@ -12,11 +12,11 @@ from PIL import Image from vllm.config import ModelConfig -from vllm.inputs import InputProcessingContext from vllm.multimodal import MULTIMODAL_REGISTRY, MultiModalDataDict from vllm.multimodal.cache import MultiModalProcessorOnlyCache from vllm.multimodal.inputs import MultiModalInputs -from vllm.multimodal.processing import BaseMultiModalProcessor +from vllm.multimodal.processing import (BaseMultiModalProcessor, + InputProcessingContext) from vllm.transformers_utils.tokenizer import (AnyTokenizer, MistralTokenizer, cached_tokenizer_from_config, encode_tokens) diff --git a/tests/models/multimodal/processing/test_tensor_schema.py b/tests/models/multimodal/processing/test_tensor_schema.py index b678313752d6..d5d5bfaa3b45 100644 --- a/tests/models/multimodal/processing/test_tensor_schema.py +++ b/tests/models/multimodal/processing/test_tensor_schema.py @@ -18,10 +18,10 @@ from vllm.distributed import (cleanup_dist_env_and_memory, init_distributed_environment, initialize_model_parallel) -from vllm.inputs import InputProcessingContext from vllm.model_executor.model_loader.utils import set_default_torch_dtype from vllm.multimodal import MULTIMODAL_REGISTRY, BatchedTensorInputs -from vllm.multimodal.processing import BaseMultiModalProcessor +from vllm.multimodal.processing import (BaseMultiModalProcessor, + InputProcessingContext) from vllm.multimodal.utils import group_mm_kwargs_by_modality from vllm.transformers_utils.tokenizer import cached_tokenizer_from_config from vllm.utils import is_list_of diff --git a/tests/models/utils.py b/tests/models/utils.py index 5da2382cef81..f80e92ebb3e2 100644 --- a/tests/models/utils.py +++ b/tests/models/utils.py @@ -11,8 +11,9 @@ from transformers import PretrainedConfig from vllm.config import ModelConfig, ModelDType, RunnerOption -from vllm.inputs import InputContext from vllm.logprobs import Logprob, PromptLogprobs, SampleLogprobs +from vllm.multimodal.processing import InputProcessingContext +from vllm.transformers_utils.tokenizer import cached_tokenizer_from_config from .registry import HF_EXAMPLE_MODELS @@ -264,7 +265,7 @@ def build_model_context( limit_mm_per_prompt: Optional[dict[str, int]] = None, mm_processor_cache_gb: int = 0, ): - """Creates an InputContext for a given model. + """Creates an InputProcessingContext for a given model. Args: model_id: ID of the model being considered. @@ -273,7 +274,7 @@ def build_model_context( limit_mm_per_prompt: Multimodal limits. Returns: - InputContext for the model being considered. + InputProcessingContext for the model being considered. """ model_info = HF_EXAMPLE_MODELS.find_hf_info(model_id) model_info.check_available_online(on_fail="skip") @@ -298,7 +299,11 @@ def build_model_context( enforce_eager=model_info.enforce_eager, **model_config_kwargs, ) - return InputContext(model_config) + + return InputProcessingContext( + model_config, + tokenizer=cached_tokenizer_from_config(model_config), + ) def check_embeddings_close( diff --git a/tests/multimodal/test_processing.py b/tests/multimodal/test_processing.py index 6ce5fcfe644b..352b5b5b4fd4 100644 --- a/tests/multimodal/test_processing.py +++ b/tests/multimodal/test_processing.py @@ -8,11 +8,11 @@ import pytest from vllm.config import ModelConfig -from vllm.inputs import InputProcessingContext from vllm.multimodal import MULTIMODAL_REGISTRY # yapf conflicts with isort for this block # yapf: disable -from vllm.multimodal.processing import (PlaceholderFeaturesInfo, +from vllm.multimodal.processing import (InputProcessingContext, + PlaceholderFeaturesInfo, PromptIndexTargets, PromptInsertion, PromptReplacement, apply_text_matches, apply_token_matches, diff --git a/vllm/inputs/__init__.py b/vllm/inputs/__init__.py index 46f49aaa013d..3f1cac531f45 100644 --- a/vllm/inputs/__init__.py +++ b/vllm/inputs/__init__.py @@ -7,7 +7,6 @@ SingletonPrompt, TextPrompt, TokenInputs, TokensPrompt, build_explicit_enc_dec_prompt, embeds_inputs, to_enc_dec_tuple_list, token_inputs, zip_enc_dec_prompts) -from .registry import InputContext, InputProcessingContext __all__ = [ "DataPrompt", @@ -28,6 +27,4 @@ "build_explicit_enc_dec_prompt", "to_enc_dec_tuple_list", "zip_enc_dec_prompts", - "InputContext", - "InputProcessingContext", ] diff --git a/vllm/inputs/registry.py b/vllm/inputs/registry.py deleted file mode 100644 index 0aad78b04e34..000000000000 --- a/vllm/inputs/registry.py +++ /dev/null @@ -1,206 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project -import time -from collections.abc import Mapping -from dataclasses import dataclass -from typing import TYPE_CHECKING, Any, Union - -import torch -from transformers import BatchFeature, PretrainedConfig, ProcessorMixin -from typing_extensions import TypeVar - -from vllm.logger import init_logger -from vllm.transformers_utils.processor import cached_processor_from_config -from vllm.utils import get_allowed_kwarg_only_overrides -from vllm.utils.jsontree import JSONTree, json_map_leaves - -if TYPE_CHECKING: - from vllm.config import ModelConfig - from vllm.transformers_utils.tokenizer import AnyTokenizer -else: - ModelConfig = Any - AnyTokenizer = Any - -_T = TypeVar("_T") -_C = TypeVar("_C", bound=PretrainedConfig, default=PretrainedConfig) -_P = TypeVar("_P", bound=ProcessorMixin, default=ProcessorMixin) - -logger = init_logger(__name__) - - -@dataclass(frozen=True) -class InputContext: - """ - Contains information about the model which may be used to - modify the inputs. - """ - - model_config: ModelConfig - """The configuration of the model.""" - - def get_hf_config( - self, - typ: Union[type[_C], tuple[type[_C], ...]] = PretrainedConfig, - /, - ) -> _C: - """ - Get the HuggingFace configuration - (`transformers.PretrainedConfig`) of the model, - additionally checking its type. - - Raises: - TypeError: If the configuration is not of the specified type. - """ - hf_config = self.model_config.hf_config - if not isinstance(hf_config, typ): - raise TypeError("Invalid type of HuggingFace config. " - f"Expected type: {typ}, but " - f"found type: {type(hf_config)}") - - return hf_config - - def get_hf_image_processor_config(self) -> dict[str, Any]: - """ - Get the HuggingFace image processor configuration of the model. - """ - return self.model_config.hf_image_processor_config - - def get_mm_config(self): - """ - Get the multimodal config of the model. - - Raises: - RuntimeError: If the model is not a multimodal model. - """ - mm_config = self.model_config.multimodal_config - if mm_config is None: - raise RuntimeError("Not a multimodal model") - - return mm_config - - def get_hf_processor( - self, - typ: Union[type[_P], tuple[type[_P], ...]] = ProcessorMixin, - /, - **kwargs: object, - ) -> _P: - """ - Get the HuggingFace processor - (`transformers.ProcessorMixin`) of the model, - additionally checking its type. - - Raises: - TypeError: If the processor is not of the specified type. - """ - return cached_processor_from_config( - self.model_config, - processor_cls=typ, - **kwargs, - ) - - def init_processor( - self, - typ: type[_T], - /, - **kwargs: object, - ) -> _T: - """ - Initialize a HuggingFace-like processor class, merging the - keyword arguments with those in the model's configuration. - """ - mm_config = self.model_config.get_multimodal_config() - base_kwargs = mm_config.mm_processor_kwargs - if base_kwargs is None: - base_kwargs = {} - - merged_kwargs = {**base_kwargs, **kwargs} - - return typ(**merged_kwargs) - - -@dataclass(frozen=True) -class InputProcessingContext(InputContext): - tokenizer: AnyTokenizer - """The tokenizer used to tokenize the inputs.""" - - def get_hf_processor( - self, - typ: Union[type[_P], tuple[type[_P], ...]] = ProcessorMixin, - /, - **kwargs: object, - ) -> _P: - return super().get_hf_processor( - typ, - tokenizer=self.tokenizer, - **kwargs, - ) - - def call_hf_processor( - self, - hf_processor: ProcessorMixin, - data: Mapping[str, object], - kwargs: Mapping[str, object] = {}, - *, - num_tries: int = 1, - max_tries: int = 5, - ) -> Union[BatchFeature, JSONTree]: - """ - Call `hf_processor` on the prompt `data` - (text, image, audio...) with configurable options `kwargs`. - """ - assert callable(hf_processor) - - mm_config = self.model_config.get_multimodal_config() - merged_kwargs = mm_config.merge_mm_processor_kwargs(kwargs) - - allowed_kwargs = get_allowed_kwarg_only_overrides( - hf_processor, - merged_kwargs, - requires_kw_only=False, - allow_var_kwargs=True, - ) - - def maybe_cast_dtype(x): - # This mimics the behavior of transformers.BatchFeature - if isinstance(x, torch.Tensor) and x.is_floating_point(): - return x.to(dtype=self.model_config.dtype) - return x - - try: - output = hf_processor(**data, - **allowed_kwargs, - return_tensors="pt") - # this emulates output.to(dtype=self.model_config.dtype) - if isinstance(output, BatchFeature): - cast_output = json_map_leaves(maybe_cast_dtype, output.data) - return BatchFeature(cast_output) - - cast_output = json_map_leaves(maybe_cast_dtype, output) - - logger.warning_once( - f"{type(hf_processor).__name__} did not return `BatchFeature`. " - "Make sure to match the behaviour of `ProcessorMixin` when " - "implementing custom processors.") - return cast_output - - except Exception as exc: - # See https://github.com/huggingface/tokenizers/issues/537 - if (isinstance(exc, RuntimeError) and exc - and exc.args[0] == "Already borrowed" - and num_tries < max_tries): - logger.warning( - "Failed to acquire tokenizer in current thread. " - "Retrying (%d/%d)...", num_tries, max_tries) - time.sleep(0.5) - return self.call_hf_processor( - hf_processor, - data, - kwargs, - num_tries=num_tries + 1, - max_tries=max_tries, - ) - - msg = (f"Failed to apply {type(hf_processor).__name__} " - f"on data={data} with kwargs={allowed_kwargs}") - - raise ValueError(msg) from exc diff --git a/vllm/model_executor/models/hyperclovax_vision.py b/vllm/model_executor/models/hyperclovax_vision.py index 54167f9f1099..4d39ff9ae79e 100644 --- a/vllm/model_executor/models/hyperclovax_vision.py +++ b/vllm/model_executor/models/hyperclovax_vision.py @@ -29,7 +29,6 @@ from transformers.modeling_utils import no_init_weights from vllm.config import VllmConfig -from vllm.inputs import InputProcessingContext from vllm.model_executor.layers.quantization import QuantizationConfig from vllm.multimodal import MULTIMODAL_REGISTRY from vllm.multimodal.cache import BaseMultiModalProcessorCache @@ -37,8 +36,9 @@ MultiModalKwargsItems) from vllm.multimodal.parse import ImageSize, MultiModalDataItems from vllm.multimodal.processing import (BaseMultiModalProcessor, - BaseProcessingInfo, PromptReplacement, - PromptUpdate) + BaseProcessingInfo, + InputProcessingContext, + PromptReplacement, PromptUpdate) from vllm.multimodal.profiling import BaseDummyInputsBuilder from vllm.sequence import IntermediateTensors diff --git a/vllm/model_executor/models/llava.py b/vllm/model_executor/models/llava.py index e2d7b9f23b28..8d7feb965e76 100644 --- a/vllm/model_executor/models/llava.py +++ b/vllm/model_executor/models/llava.py @@ -15,7 +15,6 @@ from transformers.models.pixtral import PixtralProcessor from vllm.config import VllmConfig -from vllm.inputs import InputProcessingContext from vllm.model_executor.layers.activation import get_act_fn from vllm.model_executor.layers.linear import (ColumnParallelLinear, RowParallelLinear) @@ -28,8 +27,10 @@ from vllm.multimodal.parse import (ImageEmbeddingItems, ImageProcessorItems, ImageSize, MultiModalDataItems) from vllm.multimodal.processing import (BaseMultiModalProcessor, - BaseProcessingInfo, PromptReplacement, - PromptUpdate, PromptUpdateDetails) + BaseProcessingInfo, + InputProcessingContext, + PromptReplacement, PromptUpdate, + PromptUpdateDetails) from vllm.multimodal.profiling import BaseDummyInputsBuilder from vllm.sequence import IntermediateTensors from vllm.utils.jsontree import json_map_leaves diff --git a/vllm/model_executor/models/mistral3.py b/vllm/model_executor/models/mistral3.py index 94e3d7234b6f..ba6da4403ae1 100644 --- a/vllm/model_executor/models/mistral3.py +++ b/vllm/model_executor/models/mistral3.py @@ -13,7 +13,6 @@ from transformers.models.pixtral import PixtralProcessor from vllm.config import VllmConfig -from vllm.inputs import InputProcessingContext from vllm.model_executor.layers.activation import get_act_fn from vllm.model_executor.layers.layernorm import RMSNorm from vllm.model_executor.layers.linear import (ColumnParallelLinear, @@ -27,8 +26,10 @@ from vllm.multimodal.parse import (ImageProcessorItems, ImageSize, MultiModalDataItems) from vllm.multimodal.processing import (BaseMultiModalProcessor, - BaseProcessingInfo, PromptReplacement, - PromptUpdate, PromptUpdateDetails) + BaseProcessingInfo, + InputProcessingContext, + PromptReplacement, PromptUpdate, + PromptUpdateDetails) from vllm.multimodal.profiling import BaseDummyInputsBuilder from vllm.sequence import IntermediateTensors from vllm.utils.tensor_schema import TensorSchema, TensorShape diff --git a/vllm/model_executor/models/mllama4.py b/vllm/model_executor/models/mllama4.py index 50521b593786..79e315f79489 100644 --- a/vllm/model_executor/models/mllama4.py +++ b/vllm/model_executor/models/mllama4.py @@ -32,7 +32,6 @@ from vllm.attention.layer import MultiHeadAttention from vllm.config import VllmConfig from vllm.distributed import get_tensor_model_parallel_world_size -from vllm.inputs import InputProcessingContext from vllm.model_executor.layers.linear import (ColumnParallelLinear, QKVParallelLinear, ReplicatedLinear, @@ -47,8 +46,10 @@ from vllm.multimodal.parse import (ImageProcessorItems, ImageSize, MultiModalDataItems) from vllm.multimodal.processing import (BaseMultiModalProcessor, - BaseProcessingInfo, PromptReplacement, - PromptUpdate, PromptUpdateDetails) + BaseProcessingInfo, + InputProcessingContext, + PromptReplacement, PromptUpdate, + PromptUpdateDetails) from vllm.multimodal.profiling import BaseDummyInputsBuilder from vllm.sequence import IntermediateTensors from vllm.utils.tensor_schema import TensorSchema, TensorShape diff --git a/vllm/model_executor/models/tarsier.py b/vllm/model_executor/models/tarsier.py index 67cf3ccf315d..b75c858a6480 100644 --- a/vllm/model_executor/models/tarsier.py +++ b/vllm/model_executor/models/tarsier.py @@ -17,7 +17,6 @@ from transformers.tokenization_utils_base import PreTokenizedInput, TextInput from vllm.config import VllmConfig -from vllm.inputs import InputProcessingContext from vllm.model_executor.layers.activation import get_act_fn from vllm.model_executor.layers.linear import (ColumnParallelLinear, RowParallelLinear) @@ -29,8 +28,9 @@ from vllm.multimodal.parse import (ImageEmbeddingItems, ImageProcessorItems, ImageSize, MultiModalDataItems) from vllm.multimodal.processing import (BaseMultiModalProcessor, - BaseProcessingInfo, PromptReplacement, - PromptUpdate) + BaseProcessingInfo, + InputProcessingContext, + PromptReplacement, PromptUpdate) from vllm.multimodal.profiling import BaseDummyInputsBuilder from vllm.sequence import IntermediateTensors from vllm.utils.jsontree import json_map_leaves diff --git a/vllm/multimodal/processing.py b/vllm/multimodal/processing.py index 7471bfcb4d50..78e2cb7fa733 100644 --- a/vllm/multimodal/processing.py +++ b/vllm/multimodal/processing.py @@ -1,5 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project +import time from abc import ABC, abstractmethod from collections import defaultdict from collections.abc import (Callable, Generator, ItemsView, Iterable, Mapping, @@ -7,18 +8,20 @@ from dataclasses import dataclass, field, replace from enum import Enum from functools import lru_cache -from typing import (TYPE_CHECKING, Generic, NamedTuple, Optional, Protocol, - TypeVar, Union, cast) +from typing import (TYPE_CHECKING, Any, Generic, NamedTuple, Optional, + Protocol, Union, cast, overload) import regex as re import torch -from typing_extensions import assert_never +from typing_extensions import TypeVar, assert_never -from vllm.inputs import InputProcessingContext from vllm.logger import init_logger +from vllm.transformers_utils.processor import cached_processor_from_config from vllm.transformers_utils.tokenizer import (AnyTokenizer, decode_tokens, encode_tokens) -from vllm.utils import flatten_2d_lists, full_groupby +from vllm.utils import (flatten_2d_lists, full_groupby, + get_allowed_kwarg_only_overrides) +from vllm.utils.jsontree import JSONTree, json_map_leaves from .hasher import MultiModalHasher from .inputs import (MultiModalDataDict, MultiModalEncDecInputs, @@ -34,6 +37,8 @@ from transformers.feature_extraction_utils import BatchFeature from transformers.processing_utils import ProcessorMixin + from vllm.config import ModelConfig + from .cache import BaseMultiModalProcessorCache from .profiling import BaseDummyInputsBuilder @@ -875,6 +880,222 @@ def find_mm_placeholders( return dict(full_groupby_modality(it)) +_T = TypeVar("_T") +_C = TypeVar("_C", bound="PretrainedConfig", default="PretrainedConfig") +_P = TypeVar("_P", bound="ProcessorMixin", default="ProcessorMixin") + + +@dataclass(frozen=True) +class InputProcessingContext: + """ + Contains information about the model which may be used to + modify the inputs. + """ + + model_config: "ModelConfig" + """The configuration of the model.""" + + tokenizer: AnyTokenizer + """The tokenizer used to tokenize the inputs.""" + + @overload + def get_hf_config(self, /) -> "PretrainedConfig": + ... + + @overload + def get_hf_config( + self, + typ: Union[type[_C], tuple[type[_C], ...]], + /, + ) -> _C: + ... + + def get_hf_config( + self, + typ: Optional[Union[type[Any], tuple[type[Any], ...]]] = None, + /, + ) -> Any: + """ + Get the HuggingFace configuration + (`transformers.PretrainedConfig`) of the model, + additionally checking its type. + + Raises: + TypeError: If the configuration is not of the specified type. + """ + if typ is None: + from transformers.configuration_utils import PretrainedConfig + + typ = PretrainedConfig + + hf_config = self.model_config.hf_config + if not isinstance(hf_config, typ): + raise TypeError("Invalid type of HuggingFace config. " + f"Expected type: {typ}, but " + f"found type: {type(hf_config)}") + + return hf_config + + def get_hf_image_processor_config(self) -> dict[str, Any]: + """ + Get the HuggingFace image processor configuration of the model. + """ + return self.model_config.hf_image_processor_config + + def get_mm_config(self): + """ + Get the multimodal config of the model. + + Raises: + RuntimeError: If the model is not a multimodal model. + """ + mm_config = self.model_config.multimodal_config + if mm_config is None: + raise RuntimeError("Not a multimodal model") + + return mm_config + + @overload + def get_hf_processor(self, /, **kwargs: object) -> "ProcessorMixin": + ... + + @overload + def get_hf_processor( + self, + typ: Union[type[_P], tuple[type[_P], ...]], + /, + **kwargs: object, + ) -> _P: + ... + + def get_hf_processor( + self, + typ: Optional[Union[type[Any], tuple[type[Any], ...]]] = None, + /, + **kwargs: object, + ) -> Any: + """ + Get the HuggingFace processor + (`transformers.ProcessorMixin`) of the model, + additionally checking its type. + + Raises: + TypeError: If the processor is not of the specified type. + """ + if typ is None: + from transformers.processing_utils import ProcessorMixin + + typ = ProcessorMixin + + return cached_processor_from_config( + self.model_config, + processor_cls=typ, + tokenizer=self.tokenizer, + **kwargs, + ) + + def init_processor( + self, + typ: type[_T], + /, + **kwargs: object, + ) -> _T: + """ + Initialize a HuggingFace-like processor class, merging the + keyword arguments with those in the model's configuration. + """ + mm_config = self.model_config.get_multimodal_config() + base_kwargs = mm_config.mm_processor_kwargs + if base_kwargs is None: + base_kwargs = {} + + merged_kwargs = {**base_kwargs, **kwargs} + + return typ(**merged_kwargs) + + def _postprocess_output( + self, + output: JSONTree, + ) -> JSONTree: + + def _postprocess_one(x: object): + if isinstance(x, torch.Tensor): # noqa: SIM102 + # This mimics the behavior of transformers.BatchFeature + if x.is_floating_point(): + x = x.to(dtype=self.model_config.dtype) + + return x + + return json_map_leaves(_postprocess_one, output) + + def call_hf_processor( + self, + hf_processor: "ProcessorMixin", + data: Mapping[str, object], + kwargs: Mapping[str, object] = {}, + *, + num_tries: int = 1, + max_tries: int = 5, + ) -> Union["BatchFeature", JSONTree]: + """ + Call `hf_processor` on the prompt `data` + (text, image, audio...) with configurable options `kwargs`. + """ + assert callable(hf_processor) + + mm_config = self.model_config.get_multimodal_config() + merged_kwargs = mm_config.merge_mm_processor_kwargs(kwargs) + + allowed_kwargs = get_allowed_kwarg_only_overrides( + hf_processor, + merged_kwargs, + requires_kw_only=False, + allow_var_kwargs=True, + ) + + try: + output = hf_processor(**data, + **allowed_kwargs, + return_tensors="pt") + except Exception as exc: + # See https://github.com/huggingface/tokenizers/issues/537 + if (isinstance(exc, RuntimeError) and exc + and exc.args[0] == "Already borrowed" + and num_tries < max_tries): + logger.warning( + "Failed to acquire tokenizer in current thread. " + "Retrying (%d/%d)...", num_tries, max_tries) + time.sleep(0.5) + return self.call_hf_processor( + hf_processor, + data, + kwargs, + num_tries=num_tries + 1, + max_tries=max_tries, + ) + + msg = (f"Failed to apply {type(hf_processor).__name__} " + f"on data={data} with kwargs={allowed_kwargs}") + + raise ValueError(msg) from exc + + # this emulates output.to(dtype=self.model_config.dtype) + from transformers.feature_extraction_utils import BatchFeature + + if isinstance(output, BatchFeature): + output_ = self._postprocess_output(output.data) + return BatchFeature(output_) + + logger.warning_once( + "%s did not return `BatchFeature`. " + "Make sure to match the behaviour of `ProcessorMixin` when " + "implementing custom processors.", + type(hf_processor).__name__, + ) + + return self._postprocess_output(output) + + class BaseProcessingInfo: """Base class to provide the information necessary for data processing.""" diff --git a/vllm/multimodal/registry.py b/vllm/multimodal/registry.py index 5d485bc361d1..2bbc0078ad13 100644 --- a/vllm/multimodal/registry.py +++ b/vllm/multimodal/registry.py @@ -6,14 +6,14 @@ import torch.nn as nn -from vllm.inputs import InputProcessingContext from vllm.logger import init_logger from vllm.transformers_utils.tokenizer import (AnyTokenizer, cached_tokenizer_from_config) from vllm.utils import ClassRegistry from .cache import BaseMultiModalProcessorCache -from .processing import BaseMultiModalProcessor, BaseProcessingInfo +from .processing import (BaseMultiModalProcessor, BaseProcessingInfo, + InputProcessingContext) from .profiling import (BaseDummyInputsBuilder, DummyDecoderData, DummyEncoderData, MultiModalProfiler) @@ -41,7 +41,7 @@ def __call__( ... -class DummyInputsBuilderFactory(Protocol[_I]): +class DummyInputsBuilderFactory(Protocol[_I]): # type: ignore[misc] """ Constructs a [`BaseDummyInputsBuilder`][vllm.multimodal.profiling.BaseDummyInputsBuilder] From 77a7fce1bb7c081811548c64ae24049efd118c9a Mon Sep 17 00:00:00 2001 From: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> Date: Wed, 24 Sep 2025 01:44:22 -0700 Subject: [PATCH 330/518] [CI/Build] add nightly prime-rl integration tests (#25207) Signed-off-by: Jackmin801 Signed-off-by: Michael Goin Co-authored-by: Michael Goin --- .buildkite/scripts/run-prime-rl-test.sh | 59 +++++++++++++++++++++++++ .buildkite/test-pipeline.yaml | 12 +++++ 2 files changed, 71 insertions(+) create mode 100755 .buildkite/scripts/run-prime-rl-test.sh diff --git a/.buildkite/scripts/run-prime-rl-test.sh b/.buildkite/scripts/run-prime-rl-test.sh new file mode 100755 index 000000000000..5b25c358fc4a --- /dev/null +++ b/.buildkite/scripts/run-prime-rl-test.sh @@ -0,0 +1,59 @@ +#!/bin/bash +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project + +# Setup script for Prime-RL integration tests +# This script prepares the environment for running Prime-RL tests with nightly vLLM + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)" +PRIME_RL_REPO="https://github.com/PrimeIntellect-ai/prime-rl.git" +PRIME_RL_DIR="${REPO_ROOT}/prime-rl" + +echo "Setting up Prime-RL integration test environment..." + +# Clean up any existing Prime-RL directory +if [ -d "${PRIME_RL_DIR}" ]; then + echo "Removing existing Prime-RL directory..." + rm -rf "${PRIME_RL_DIR}" +fi + +# Install UV if not available +if ! command -v uv &> /dev/null; then + echo "Installing UV package manager..." + curl -LsSf https://astral.sh/uv/install.sh | sh + source $HOME/.local/bin/env +fi + +# Clone Prime-RL repository at specific branch for reproducible tests +PRIME_RL_BRANCH="integ-vllm-main" +echo "Cloning Prime-RL repository at branch: ${PRIME_RL_BRANCH}..." +git clone --branch "${PRIME_RL_BRANCH}" --single-branch "${PRIME_RL_REPO}" "${PRIME_RL_DIR}" +cd "${PRIME_RL_DIR}" + +echo "Setting up UV project environment..." +export UV_PROJECT_ENVIRONMENT=/usr/local +ln -s /usr/bin/python3 /usr/local/bin/python + +# Remove vllm pin from pyproject.toml +echo "Removing vllm pin from pyproject.toml..." +sed -i '/vllm==/d' pyproject.toml + +# Sync Prime-RL dependencies +echo "Installing Prime-RL dependencies..." +uv sync --inexact && uv sync --inexact --all-extras + +# Verify installation +echo "Verifying installations..." +uv run python -c "import vllm; print(f'vLLM version: {vllm.__version__}')" +uv run python -c "import prime_rl; print('Prime-RL imported successfully')" + +echo "Prime-RL integration test environment setup complete!" + +echo "Running Prime-RL integration tests..." +export WANDB_MODE=offline # this makes this test not require a WANDB_API_KEY +uv run pytest -vs tests/integration/test_rl.py -m gpu + +echo "Prime-RL integration tests completed!" diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index 245d21b778c0..200ed344c4e8 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -1045,3 +1045,15 @@ steps: commands: - pytest -v -s tests/distributed/test_context_parallel.py - pytest -v -s tests/distributed/test_nccl_symm_mem_allreduce.py + +##### RL Integration Tests ##### +- label: Prime-RL Integration Test # 15min + timeout_in_minutes: 30 + optional: true + num_gpus: 2 + working_dir: "/vllm-workspace" + source_file_dependencies: + - vllm/ + - .buildkite/scripts/run-prime-rl-test.sh + commands: + - bash .buildkite/scripts/run-prime-rl-test.sh From 2e19a848d42d95ebef0a5760f9cff673acaf5ca1 Mon Sep 17 00:00:00 2001 From: Woosuk Kwon Date: Wed, 24 Sep 2025 01:51:39 -0700 Subject: [PATCH 331/518] [V0 Deprecation] Remove max_seq_len_to_capture (#25543) Signed-off-by: Woosuk Kwon --- tests/tpu/lora/test_lora.py | 1 - vllm/attention/backends/utils.py | 4 ++-- vllm/config/model.py | 18 ------------------ vllm/config/speculative.py | 2 -- vllm/engine/arg_utils.py | 4 ---- vllm/entrypoints/llm.py | 7 ------- vllm/model_executor/models/config.py | 14 -------------- 7 files changed, 2 insertions(+), 48 deletions(-) diff --git a/tests/tpu/lora/test_lora.py b/tests/tpu/lora/test_lora.py index 636108e98581..5196a92cb727 100644 --- a/tests/tpu/lora/test_lora.py +++ b/tests/tpu/lora/test_lora.py @@ -31,7 +31,6 @@ def use_v1_only(monkeypatch: pytest.MonkeyPatch): def setup_vllm(num_loras: int, tp: int) -> vllm.LLM: return vllm.LLM(model="Qwen/Qwen2.5-3B-Instruct", max_model_len=256, - max_seq_len_to_capture=256, max_num_seqs=8, tensor_parallel_size=tp, enable_lora=True, diff --git a/vllm/attention/backends/utils.py b/vllm/attention/backends/utils.py index 63ee8f50825c..accb3ab6ae2b 100644 --- a/vllm/attention/backends/utils.py +++ b/vllm/attention/backends/utils.py @@ -304,7 +304,7 @@ def graph_capture_get_metadata_for_batch( max_query_len=1, max_decode_query_len=1, max_prefill_seq_len=0, - max_decode_seq_len=self.runner.max_seq_len_to_capture, + max_decode_seq_len=self.runner.max_model_len, query_start_loc=None, seq_start_loc=None, context_lens_tensor=None, @@ -390,7 +390,7 @@ def _update_captured_metadata_for_enc_dec_model(self, batch_size: int, dtype=torch.int).cuda() attn_metadata.encoder_seq_lens_tensor = torch.full( (batch_size, ), 1, dtype=torch.int).cuda() - attn_metadata.max_encoder_seq_len = self.runner.max_seq_len_to_capture + attn_metadata.max_encoder_seq_len = self.runner.max_model_len attn_metadata.num_encoder_tokens = 0 def _add_additional_input_buffers_for_enc_dec_model( diff --git a/vllm/config/model.py b/vllm/config/model.py index d75bd5fa47b3..f37489bdfff5 100644 --- a/vllm/config/model.py +++ b/vllm/config/model.py @@ -177,11 +177,6 @@ class ModelConfig: graph and always execute the model in eager mode. If False, we will use CUDA graph and eager execution in hybrid for maximal performance and flexibility.""" - max_seq_len_to_capture: int = 8192 - """Maximum sequence len covered by CUDA graphs. When a sequence has context - length larger than this, we fall back to eager mode. Additionally for - encoder-decoder models, if the sequence length of the encoder input is - larger than this, we fall back to the eager mode.""" max_logprobs: int = 20 """Maximum number of log probabilities to return when `logprobs` is specified in `SamplingParams`. The default value comes the default for the @@ -1024,21 +1019,8 @@ def _verify_quantization(self) -> None: current_platform.verify_quantization(self.quantization) def _verify_cuda_graph(self) -> None: - # The `max_seq_len_to_capture` was incorrectly - # based on the encoder's input length (448) - # but not the decoder's larger input length (1500). - # This change ensures the CUDA Graph captures the correct, - # larger sequence length, allowing it to work as intended. - effective_max_seq_len = self.max_model_len - if self.is_encoder_decoder: - effective_max_seq_len = max( - effective_max_seq_len, - getattr(self.hf_config, "max_source_positions", 0)) - self.max_seq_len_to_capture = min(self.max_seq_len_to_capture, - effective_max_seq_len) # CUDAGraph capture not supported for encoder-decoder models on ROCm unsupported_rocm = self.is_encoder_decoder - if (unsupported_rocm and not self.enforce_eager and current_platform.is_rocm()): logger.warning( diff --git a/vllm/config/speculative.py b/vllm/config/speculative.py index d533930e1c7a..34b17628def1 100644 --- a/vllm/config/speculative.py +++ b/vllm/config/speculative.py @@ -285,8 +285,6 @@ def __post_init__(self): max_model_len, quantization=self.quantization, enforce_eager=self.target_model_config.enforce_eager, - max_seq_len_to_capture=self.target_model_config. - max_seq_len_to_capture, max_logprobs=self.target_model_config.max_logprobs, hf_overrides=SpeculativeConfig.hf_config_override, ) diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index 556a490ffa10..3f0dfce1b4b5 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -373,7 +373,6 @@ class EngineArgs: tokenizer_revision: Optional[str] = ModelConfig.tokenizer_revision quantization: Optional[QuantizationMethods] = ModelConfig.quantization enforce_eager: bool = ModelConfig.enforce_eager - max_seq_len_to_capture: int = ModelConfig.max_seq_len_to_capture disable_custom_all_reduce: bool = ParallelConfig.disable_custom_all_reduce limit_mm_per_prompt: dict[str, int] = \ get_field(MultiModalConfig, "limit_per_prompt") @@ -545,8 +544,6 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser: **model_kwargs["quantization"]) model_group.add_argument("--enforce-eager", **model_kwargs["enforce_eager"]) - model_group.add_argument("--max-seq-len-to-capture", - **model_kwargs["max_seq_len_to_capture"]) model_group.add_argument("--max-logprobs", **model_kwargs["max_logprobs"]) model_group.add_argument("--logprobs-mode", @@ -1008,7 +1005,6 @@ def create_model_config(self) -> ModelConfig: max_model_len=self.max_model_len, quantization=self.quantization, enforce_eager=self.enforce_eager, - max_seq_len_to_capture=self.max_seq_len_to_capture, max_logprobs=self.max_logprobs, logprobs_mode=self.logprobs_mode, disable_sliding_window=self.disable_sliding_window, diff --git a/vllm/entrypoints/llm.py b/vllm/entrypoints/llm.py index c41f44aa4718..dfe535b95917 100644 --- a/vllm/entrypoints/llm.py +++ b/vllm/entrypoints/llm.py @@ -130,11 +130,6 @@ class LLM: enforce_eager: Whether to enforce eager execution. If True, we will disable CUDA graph and always execute the model in eager mode. If False, we will use CUDA graph and eager execution in hybrid. - max_seq_len_to_capture: Maximum sequence len covered by CUDA graphs. - When a sequence has context length larger than this, we fall back - to eager mode. Additionally for encoder-decoder models, if the - sequence length of the encoder input is larger than this, we fall - back to the eager mode. disable_custom_all_reduce: See [ParallelConfig][vllm.config.ParallelConfig]. hf_token: The token to use as HTTP bearer authorization for remote files @@ -184,7 +179,6 @@ def __init__( swap_space: float = 4, cpu_offload_gb: float = 0, enforce_eager: bool = False, - max_seq_len_to_capture: int = 8192, disable_custom_all_reduce: bool = False, hf_token: Optional[Union[bool, str]] = None, hf_overrides: Optional[HfOverrides] = None, @@ -281,7 +275,6 @@ def __init__( swap_space=swap_space, cpu_offload_gb=cpu_offload_gb, enforce_eager=enforce_eager, - max_seq_len_to_capture=max_seq_len_to_capture, disable_custom_all_reduce=disable_custom_all_reduce, hf_token=hf_token, hf_overrides=hf_overrides, diff --git a/vllm/model_executor/models/config.py b/vllm/model_executor/models/config.py index aa7bcf5b65ad..cab85ea347f4 100644 --- a/vllm/model_executor/models/config.py +++ b/vllm/model_executor/models/config.py @@ -245,19 +245,6 @@ def verify_and_update_config(vllm_config: "VllmConfig") -> None: } -class GraniteMoeHybridModelConfig(VerifyAndUpdateConfig): - - @staticmethod - def verify_and_update_config(vllm_config: "VllmConfig") -> None: - config = vllm_config.model_config - config.max_seq_len_to_capture = config.max_model_len - logger.info( - "Setting max_seq_len_to_capture to %d " - "to ensure that CUDA graph capture " - "covers sequences of length up to max_model_len.", - config.max_model_len) - - class GptOssForCausalLMConfig(VerifyAndUpdateConfig): @staticmethod @@ -426,7 +413,6 @@ def verify_and_update_config(cls, vllm_config: "VllmConfig") -> None: "XLMRobertaModel": JinaRobertaModelConfig, "JinaVLForRanking": JinaVLForSequenceClassificationConfig, "JambaForSequenceClassification": JambaForSequenceClassificationConfig, - "GraniteMoeHybridForCausalLM": GraniteMoeHybridModelConfig, "GptOssForCausalLM": GptOssForCausalLMConfig, "MambaForCausalLM": MambaModelConfig, "Mamba2ForCausalLM": MambaModelConfig, From 2338daffd3ec5935f6a4b53fdd62d0bde5ab1367 Mon Sep 17 00:00:00 2001 From: Lucas Wilkinson Date: Wed, 24 Sep 2025 05:04:04 -0400 Subject: [PATCH 332/518] [BugFix] Potential Fix for FA3 full-cudagraph IMA (#25490) Signed-off-by: Lucas Wilkinson --- vllm/v1/attention/backends/flash_attn.py | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/vllm/v1/attention/backends/flash_attn.py b/vllm/v1/attention/backends/flash_attn.py index d564cf9988ea..a2e18f970bec 100755 --- a/vllm/v1/attention/backends/flash_attn.py +++ b/vllm/v1/attention/backends/flash_attn.py @@ -194,10 +194,9 @@ def __init__(self, kv_cache_spec: AttentionSpec, layer_names: list[str], self.use_full_cuda_graph = \ self.compilation_config.cudagraph_mode.has_full_cudagraphs() + self.max_cudagraph_size = self.compilation_config.max_capture_size if self.use_full_cuda_graph and self.aot_schedule: - self.max_cudagraph_size = self.compilation_config.max_capture_size - if self.max_cudagraph_size > 992: # This condition derives from FA3's internal heuristic. # TODO(woosuk): Support larger cudagraph sizes. @@ -259,6 +258,15 @@ def build(self, self.aot_schedule = False aot_schedule = False + max_num_splits = 0 # 0 means use FA3's heuristics, not CG compatible + if self.use_full_cuda_graph and \ + num_actual_tokens <= self.max_cudagraph_size: + # NOTE(woosuk): Setting num_splits > 1 may increase the memory + # usage, because the intermediate buffers of size [num_splits, + # num_heads, num_tokens, head_size] are allocated. Therefore, + # we only set num_splits when using cuda graphs. + max_num_splits = self.max_num_splits + def schedule(batch_size, cu_query_lens, max_query_len, seqlens, max_seq_len, causal): cache_dtype = self.cache_config.cache_dtype @@ -281,7 +289,7 @@ def schedule(batch_size, cu_query_lens, max_query_len, seqlens, page_size=self.block_size, causal=causal, window_size=self.aot_sliding_window, - num_splits=self.max_num_splits, + num_splits=max_num_splits, ) return None @@ -322,7 +330,6 @@ def schedule(batch_size, cu_query_lens, max_query_len, seqlens, max_seq_len=max_seq_len, causal=causal) # For FA3 + full cudagraph - max_num_splits = 0 if self.use_full_cuda_graph and scheduler_metadata is not None: n = scheduler_metadata.shape[0] self.scheduler_metadata[:n] = scheduler_metadata @@ -333,13 +340,6 @@ def schedule(batch_size, cu_query_lens, max_query_len, seqlens, self.scheduler_metadata[n:] = 0 scheduler_metadata = self.scheduler_metadata[:n] - if num_actual_tokens <= self.max_cudagraph_size: - # NOTE(woosuk): Setting num_splits > 1 may increase the memory - # usage, because the intermediate buffers of size [num_splits, - # num_heads, num_tokens, head_size] are allocated. Therefore, - # we only set num_splits when using cuda graphs. - max_num_splits = self.max_num_splits - attn_metadata = FlashAttentionMetadata( num_actual_tokens=num_actual_tokens, max_query_len=max_query_len, From b67dece2d8a1fb8136948932a1203fdef6eb8981 Mon Sep 17 00:00:00 2001 From: youkaichao Date: Wed, 24 Sep 2025 17:24:35 +0800 Subject: [PATCH 333/518] [misc] update the warning message (#25566) Signed-off-by: youkaichao --- vllm/distributed/device_communicators/shm_broadcast.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/vllm/distributed/device_communicators/shm_broadcast.py b/vllm/distributed/device_communicators/shm_broadcast.py index c7810043b81e..deeed1f21b4e 100644 --- a/vllm/distributed/device_communicators/shm_broadcast.py +++ b/vllm/distributed/device_communicators/shm_broadcast.py @@ -392,7 +392,8 @@ def acquire_write(self, timeout: Optional[float] = None): > VLLM_RINGBUFFER_WARNING_INTERVAL * n_warning): logger.debug( ("No available shared memory broadcast block found" - " in %s second."), + " in %s seconds. This typically happens when some" + " processes are hanging."), VLLM_RINGBUFFER_WARNING_INTERVAL, ) n_warning += 1 @@ -455,7 +456,8 @@ def acquire_read(self, > VLLM_RINGBUFFER_WARNING_INTERVAL * n_warning): logger.debug( ("No available shared memory broadcast block found" - " in %s second."), + " in %s seconds. This typically happens when some" + " processes are hanging."), VLLM_RINGBUFFER_WARNING_INTERVAL, ) n_warning += 1 From 42488dae690b69cbf5ad022ac22070608e65dc0e Mon Sep 17 00:00:00 2001 From: Roger Wang Date: Wed, 24 Sep 2025 02:47:30 -0700 Subject: [PATCH 334/518] [Bugfix] Fix dummy video number of frames calculation (#25553) Signed-off-by: Roger Wang --- vllm/v1/worker/gpu_model_runner.py | 2 +- vllm/v1/worker/tpu_model_runner.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index eebdbcc621c6..ee339e22cea9 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -2807,7 +2807,7 @@ def _get_mm_dummy_batch( dummy_decoder_data = self.mm_registry.get_decoder_dummy_data( model_config=self.model_config, - seq_len=self.max_num_tokens, + seq_len=self.max_model_len, mm_counts={modality: 1}, cache=self.mm_budget.cache, ) diff --git a/vllm/v1/worker/tpu_model_runner.py b/vllm/v1/worker/tpu_model_runner.py index 4cbf991a14c1..4a2adb1e6510 100644 --- a/vllm/v1/worker/tpu_model_runner.py +++ b/vllm/v1/worker/tpu_model_runner.py @@ -1795,7 +1795,7 @@ def _get_mm_dummy_batch( dummy_decoder_data = self.mm_registry.get_decoder_dummy_data( model_config=self.model_config, - seq_len=self.max_num_tokens, + seq_len=self.max_model_len, mm_counts={modality: 1}, cache=self.mm_budget.cache, ) From 58c360d9be8c3963bf5cdf82a275570e759adef1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20M=2E=20K=C3=BCbler?= <44084297+jmkuebler@users.noreply.github.com> Date: Wed, 24 Sep 2025 12:17:59 +0200 Subject: [PATCH 335/518] [Bug] fix import and unit test (#25558) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Jonas M. Kübler <44084297+jmkuebler@users.noreply.github.com> --- tests/v1/attention/test_attention_splitting.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/v1/attention/test_attention_splitting.py b/tests/v1/attention/test_attention_splitting.py index 2fd11415d490..d81f3da7e9cd 100644 --- a/tests/v1/attention/test_attention_splitting.py +++ b/tests/v1/attention/test_attention_splitting.py @@ -11,7 +11,7 @@ slice_query_start_locs, split_attn_metadata, split_decodes_and_prefills) -from vllm.v1.worker.ubatch_utils import create_ubatch_slices +from vllm.v1.worker.ubatch_splitting import create_ubatch_slices @pytest.fixture From 164299500be5ae1e8c71e06d4b90ac82d9a0b31b Mon Sep 17 00:00:00 2001 From: Russell Bryant Date: Wed, 24 Sep 2025 06:40:42 -0400 Subject: [PATCH 336/518] [Benchmark] Fix regression in structured output benchmark (#25500) Signed-off-by: Russell Bryant --- benchmarks/benchmark_serving_structured_output.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/benchmarks/benchmark_serving_structured_output.py b/benchmarks/benchmark_serving_structured_output.py index 73b4aa5a87e0..a0350625491f 100644 --- a/benchmarks/benchmark_serving_structured_output.py +++ b/benchmarks/benchmark_serving_structured_output.py @@ -449,7 +449,8 @@ async def benchmark( def prepare_extra_body(request) -> dict: extra_body = {} # Add the schema to the extra_body - extra_body[request.structure_type] = request.schema + extra_body["structured_outputs"] = {} + extra_body["structured_outputs"][request.structure_type] = request.schema return extra_body print("Starting initial single prompt test run...") From b1068903fdca26cf6b4a1a51a32c3365ce3ac636 Mon Sep 17 00:00:00 2001 From: Peter Pan Date: Wed, 24 Sep 2025 19:00:27 +0800 Subject: [PATCH 337/518] [docs] fix nixl kv_connector_extra_config.backends key (#25565) Signed-off-by: Peter Pan Signed-off-by: Peter Pan Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- docs/features/disagg_prefill.md | 2 +- docs/serving/expert_parallel_deployment.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/features/disagg_prefill.md b/docs/features/disagg_prefill.md index 2c69304db339..fe065b52268a 100644 --- a/docs/features/disagg_prefill.md +++ b/docs/features/disagg_prefill.md @@ -34,7 +34,7 @@ Now supports 5 types of connectors: For NixlConnector, you may also specify one or multiple NIXL_Backend. Such as: ```bash - --kv-transfer-config '{"kv_connector":"NixlConnector","kv_role":"kv_both", "kv_buffer_device":"cuda", "kv_connector_extra_config":{"backend":["UCX", "GDS"]}' + --kv-transfer-config '{"kv_connector":"NixlConnector","kv_role":"kv_both", "kv_buffer_device":"cuda", "kv_connector_extra_config":{"backends":["UCX", "GDS"]}}' ``` - **OffloadingConnector**: enable offloading of KV data to CPU memory, customizing the CPU block size (in tokens) and number of blocks to allocate (per worker): diff --git a/docs/serving/expert_parallel_deployment.md b/docs/serving/expert_parallel_deployment.md index f823d33df80e..e44a914c726d 100644 --- a/docs/serving/expert_parallel_deployment.md +++ b/docs/serving/expert_parallel_deployment.md @@ -193,7 +193,7 @@ For production deployments requiring strict SLA guarantees for time-to-first-tok 1. **Install gdrcopy/ucx/nixl**: For maximum performance, run the [install_gdrcopy.sh](gh-file:tools/install_gdrcopy.sh) script to install `gdrcopy` (e.g., `install_gdrcopy.sh "${GDRCOPY_OS_VERSION}" "12.8" "x64"`). You can find available OS versions [here](https://developer.download.nvidia.com/compute/redist/gdrcopy/CUDA%2012.8/). If `gdrcopy` is not installed, things will still work with a plain `pip install nixl`, just with lower performance. `nixl` and `ucx` are installed as dependencies via pip. -2. **Configure Both Instances**: Add this flag to both prefill and decode instances `--kv-transfer-config '{"kv_connector":"NixlConnector","kv_role":"kv_both"}`. Noted, you may also specify one or multiple NIXL_Backend. Such as: `--kv-transfer-config '{"kv_connector":"NixlConnector","kv_role":"kv_both", "kv_connector_extra_config":{"backend":["UCX", "GDS"]}'` +2. **Configure Both Instances**: Add this flag to both prefill and decode instances `--kv-transfer-config '{"kv_connector":"NixlConnector","kv_role":"kv_both"}`. Noted, you may also specify one or multiple NIXL_Backend. Such as: `--kv-transfer-config '{"kv_connector":"NixlConnector","kv_role":"kv_both", "kv_connector_extra_config":{"backends":["UCX", "GDS"]}}'` 3. **Client Orchestration**: Use the client-side script below to coordinate prefill/decode operations. We are actively working on routing solutions. From e18b714b2ed849f26cfbea3365855885856dbc58 Mon Sep 17 00:00:00 2001 From: Tao Hui Date: Wed, 24 Sep 2025 20:58:00 +0800 Subject: [PATCH 338/518] [Bugfix] Fix DeepSeekV31ToolParser to correctly parse multiple tools in non-streaming output (#25405) Signed-off-by: taohui --- .../tool_use/test_deepseekv31_tool_parser.py | 54 +++++++++++++++++++ .../tool_parsers/deepseekv31_tool_parser.py | 2 +- 2 files changed, 55 insertions(+), 1 deletion(-) create mode 100644 tests/tool_use/test_deepseekv31_tool_parser.py diff --git a/tests/tool_use/test_deepseekv31_tool_parser.py b/tests/tool_use/test_deepseekv31_tool_parser.py new file mode 100644 index 000000000000..5f6b266d3aa1 --- /dev/null +++ b/tests/tool_use/test_deepseekv31_tool_parser.py @@ -0,0 +1,54 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project + +import pytest + +from vllm.entrypoints.openai.tool_parsers import DeepSeekV31ToolParser +from vllm.transformers_utils.tokenizer import get_tokenizer + +MODEL = "deepseek-ai/DeepSeek-V3.1" + + +@pytest.fixture(scope="module") +def deepseekv31_tokenizer(): + return get_tokenizer(tokenizer_name=MODEL) + + +@pytest.fixture +def parser(deepseekv31_tokenizer): + return DeepSeekV31ToolParser(deepseekv31_tokenizer) + + +def test_extract_tool_calls_with_tool(parser): + model_output = ( + "normal text" + "<|tool▁calls▁begin|>" + + "<|tool▁call▁begin|>foo<|tool▁sep|>{\"x\":1}<|tool▁call▁end|>" + + "<|tool▁calls▁end|>") + result = parser.extract_tool_calls(model_output, None) + assert result.tools_called + assert len(result.tool_calls) == 1 + assert result.tool_calls[0].function.name == "foo" + assert result.tool_calls[0].function.arguments == "{\"x\":1}" + assert result.content == "normal text" + + +def test_extract_tool_calls_with_multiple_tools(parser): + model_output = ( + "some prefix text" + "<|tool▁calls▁begin|>" + + "<|tool▁call▁begin|>foo<|tool▁sep|>{\"x\":1}<|tool▁call▁end|>" + + "<|tool▁call▁begin|>bar<|tool▁sep|>{\"y\":2}<|tool▁call▁end|>" + + "<|tool▁calls▁end|>" + " some suffix text") + + result = parser.extract_tool_calls(model_output, None) + + assert result.tools_called + assert len(result.tool_calls) == 2 + + assert result.tool_calls[0].function.name == "foo" + assert result.tool_calls[0].function.arguments == "{\"x\":1}" + + assert result.tool_calls[1].function.name == "bar" + assert result.tool_calls[1].function.arguments == "{\"y\":2}" + + # prefix is content + assert result.content == "some prefix text" diff --git a/vllm/entrypoints/openai/tool_parsers/deepseekv31_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/deepseekv31_tool_parser.py index ff9188190f3f..09095f899177 100644 --- a/vllm/entrypoints/openai/tool_parsers/deepseekv31_tool_parser.py +++ b/vllm/entrypoints/openai/tool_parsers/deepseekv31_tool_parser.py @@ -39,7 +39,7 @@ def __init__(self, tokenizer: AnyTokenizer): self.tool_call_end_token: str = "<|tool▁call▁end|>" self.tool_call_regex = re.compile( - r"<|tool▁call▁begin|>(?P.*)<|tool▁sep|>(?P.*)<|tool▁call▁end|>" + r"<|tool▁call▁begin|>(?P.*?)<|tool▁sep|>(?P.*?)<|tool▁call▁end|>" ) self.stream_tool_call_portion_regex = re.compile( From 8938774c79f185035bc3de5f19cfc7abaa242a5a Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Wed, 24 Sep 2025 14:59:05 +0100 Subject: [PATCH 339/518] Move `DeviceConfig`, `ObservabilityConfig`, `SpeechToTextConfig` to their own files (#25564) Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- vllm/config/__init__.py | 186 +--------------------------------- vllm/config/device.py | 74 ++++++++++++++ vllm/config/observability.py | 99 ++++++++++++++++++ vllm/config/speech_to_text.py | 39 +++++++ 4 files changed, 215 insertions(+), 183 deletions(-) create mode 100644 vllm/config/device.py create mode 100644 vllm/config/observability.py create mode 100644 vllm/config/speech_to_text.py diff --git a/vllm/config/__init__.py b/vllm/config/__init__.py index d786d3e289b3..df6564077e8a 100644 --- a/vllm/config/__init__.py +++ b/vllm/config/__init__.py @@ -27,6 +27,7 @@ PrefixCachingHashAlgo) from vllm.config.compilation import (CompilationConfig, CompilationLevel, CUDAGraphMode, PassConfig) +from vllm.config.device import Device, DeviceConfig from vllm.config.kv_events import KVEventsConfig from vllm.config.kv_transfer import KVTransferConfig from vllm.config.load import LoadConfig @@ -38,11 +39,13 @@ try_match_architecture_defaults) from vllm.config.multimodal import (MMCacheType, MMEncoderTPMode, MultiModalConfig) +from vllm.config.observability import DetailedTraceModules, ObservabilityConfig from vllm.config.parallel import (DistributedExecutorBackend, EPLBConfig, ParallelConfig) from vllm.config.pooler import PoolerConfig from vllm.config.scheduler import RunnerType, SchedulerConfig, SchedulerPolicy from vllm.config.speculative import SpeculativeConfig +from vllm.config.speech_to_text import SpeechToTextConfig from vllm.config.structured_outputs import StructuredOutputsConfig from vllm.config.utils import ConfigType, config, get_attr_docs, is_init_field from vllm.logger import init_logger @@ -81,158 +84,6 @@ def metrics_info(self) -> dict[str, str]: ... -Device = Literal["auto", "cuda", "cpu", "tpu", "xpu"] - - -@config -@dataclass(config=ConfigDict(arbitrary_types_allowed=True)) -class DeviceConfig: - """Configuration for the device to use for vLLM execution.""" - - device: SkipValidation[Optional[Union[Device, torch.device]]] = "auto" - """Device type for vLLM execution. - This parameter is deprecated and will be - removed in a future release. - It will now be set automatically based - on the current platform.""" - device_type: str = field(init=False) - """Device type from the current platform. This is set in - `__post_init__`.""" - - def compute_hash(self) -> str: - """ - WARNING: Whenever a new field is added to this config, - ensure that it is included in the factors list if - it affects the computation graph. - - Provide a hash that uniquely identifies all the configs - that affect the structure of the computation - graph from input ids/embeddings to the final hidden states, - excluding anything before input ids/embeddings and after - the final hidden states. - """ - # no factors to consider. - # the device/platform information will be summarized - # by torch/vllm automatically. - factors: list[Any] = [] - hash_str = hashlib.md5(str(factors).encode(), - usedforsecurity=False).hexdigest() - return hash_str - - def __post_init__(self): - if self.device == "auto": - # Automated device type detection - from vllm.platforms import current_platform - self.device_type = current_platform.device_type - if not self.device_type: - raise RuntimeError( - "Failed to infer device type, please set " - "the environment variable `VLLM_LOGGING_LEVEL=DEBUG` " - "to turn on verbose logging to help debug the issue.") - else: - # Device type is assigned explicitly - if isinstance(self.device, str): - self.device_type = self.device - elif isinstance(self.device, torch.device): - self.device_type = self.device.type - - # Some device types require processing inputs on CPU - if self.device_type in ["tpu"]: - self.device = None - else: - # Set device with device type - self.device = torch.device(self.device_type) - - -DetailedTraceModules = Literal["model", "worker", "all"] - - -@config -@dataclass -class ObservabilityConfig: - """Configuration for observability - metrics and tracing.""" - - show_hidden_metrics_for_version: Optional[str] = None - """Enable deprecated Prometheus metrics that have been hidden since the - specified version. For example, if a previously deprecated metric has been - hidden since the v0.7.0 release, you use - `--show-hidden-metrics-for-version=0.7` as a temporary escape hatch while - you migrate to new metrics. The metric is likely to be removed completely - in an upcoming release.""" - - @cached_property - def show_hidden_metrics(self) -> bool: - """Check if the hidden metrics should be shown.""" - if self.show_hidden_metrics_for_version is None: - return False - return version._prev_minor_version_was( - self.show_hidden_metrics_for_version) - - otlp_traces_endpoint: Optional[str] = None - """Target URL to which OpenTelemetry traces will be sent.""" - - collect_detailed_traces: Optional[list[DetailedTraceModules]] = None - """It makes sense to set this only if `--otlp-traces-endpoint` is set. If - set, it will collect detailed traces for the specified modules. This - involves use of possibly costly and or blocking operations and hence might - have a performance impact. - - Note that collecting detailed timing information for each request can be - expensive.""" - - @cached_property - def collect_model_forward_time(self) -> bool: - """Whether to collect model forward time for the request.""" - return (self.collect_detailed_traces is not None - and ("model" in self.collect_detailed_traces - or "all" in self.collect_detailed_traces)) - - @cached_property - def collect_model_execute_time(self) -> bool: - """Whether to collect model execute time for the request.""" - return (self.collect_detailed_traces is not None - and ("worker" in self.collect_detailed_traces - or "all" in self.collect_detailed_traces)) - - def compute_hash(self) -> str: - """ - WARNING: Whenever a new field is added to this config, - ensure that it is included in the factors list if - it affects the computation graph. - - Provide a hash that uniquely identifies all the configs - that affect the structure of the computation - graph from input ids/embeddings to the final hidden states, - excluding anything before input ids/embeddings and after - the final hidden states. - """ - # no factors to consider. - # this config will not affect the computation graph. - factors: list[Any] = [] - hash_str = hashlib.md5(str(factors).encode(), - usedforsecurity=False).hexdigest() - return hash_str - - def __post_init__(self): - if (self.collect_detailed_traces is not None - and len(self.collect_detailed_traces) == 1 - and "," in self.collect_detailed_traces[0]): - self._parse_collect_detailed_traces() - - from vllm.tracing import is_otel_available, otel_import_error_traceback - if not is_otel_available() and self.otlp_traces_endpoint is not None: - raise ValueError( - "OpenTelemetry is not available. Unable to configure " - "'otlp_traces_endpoint'. Ensure OpenTelemetry packages are " - f"installed. Original error:\n{otel_import_error_traceback}") - - def _parse_collect_detailed_traces(self): - assert isinstance(self.collect_detailed_traces, list) - self.collect_detailed_traces = cast( - list[DetailedTraceModules], - self.collect_detailed_traces[0].split(",")) - - @config @dataclass(config=ConfigDict(arbitrary_types_allowed=True)) class VllmConfig: @@ -1009,37 +860,6 @@ def get_layers_from_vllm_config( } -@config -@dataclass -class SpeechToTextConfig: - """Configuration for speech-to-text models.""" - - sample_rate: float = 16_000 - """Sample rate (Hz) to resample input audio to. Most speech models expect - 16kHz audio input. The input audio will be automatically resampled to this - rate before processing.""" - - max_audio_clip_s: int = 30 - """Maximum duration in seconds for a single audio clip without chunking. - Audio longer than this will be split into smaller chunks if - `allow_audio_chunking` evaluates to True, otherwise it will be rejected.""" - - overlap_chunk_second: int = 1 - """Overlap duration in seconds between consecutive audio chunks when - splitting long audio. This helps maintain context across chunk boundaries - and improves transcription quality at split points.""" - - min_energy_split_window_size: Optional[int] = 1600 - """Window size in samples for finding low-energy (quiet) regions to split - audio chunks. The algorithm looks for the quietest moment within this - window to minimize cutting through speech. Default 1600 samples ≈ 100ms - at 16kHz. If None, no chunking will be done.""" - - @property - def allow_audio_chunking(self) -> bool: - return self.min_energy_split_window_size is not None - - def update_config(config: DataclassInstanceT, overrides: dict[str, Any]) -> DataclassInstanceT: processed_overrides = {} diff --git a/vllm/config/device.py b/vllm/config/device.py new file mode 100644 index 000000000000..4654ac96e0b7 --- /dev/null +++ b/vllm/config/device.py @@ -0,0 +1,74 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project + +import hashlib +from dataclasses import field +from typing import Any, Literal, Optional, Union + +import torch +from pydantic import ConfigDict, SkipValidation +from pydantic.dataclasses import dataclass + +from vllm.config.utils import config + +Device = Literal["auto", "cuda", "cpu", "tpu", "xpu"] + + +@config +@dataclass(config=ConfigDict(arbitrary_types_allowed=True)) +class DeviceConfig: + """Configuration for the device to use for vLLM execution.""" + + device: SkipValidation[Optional[Union[Device, torch.device]]] = "auto" + """Device type for vLLM execution. + This parameter is deprecated and will be + removed in a future release. + It will now be set automatically based + on the current platform.""" + device_type: str = field(init=False) + """Device type from the current platform. This is set in + `__post_init__`.""" + + def compute_hash(self) -> str: + """ + WARNING: Whenever a new field is added to this config, + ensure that it is included in the factors list if + it affects the computation graph. + + Provide a hash that uniquely identifies all the configs + that affect the structure of the computation + graph from input ids/embeddings to the final hidden states, + excluding anything before input ids/embeddings and after + the final hidden states. + """ + # no factors to consider. + # the device/platform information will be summarized + # by torch/vllm automatically. + factors: list[Any] = [] + hash_str = hashlib.md5(str(factors).encode(), + usedforsecurity=False).hexdigest() + return hash_str + + def __post_init__(self): + if self.device == "auto": + # Automated device type detection + from vllm.platforms import current_platform + self.device_type = current_platform.device_type + if not self.device_type: + raise RuntimeError( + "Failed to infer device type, please set " + "the environment variable `VLLM_LOGGING_LEVEL=DEBUG` " + "to turn on verbose logging to help debug the issue.") + else: + # Device type is assigned explicitly + if isinstance(self.device, str): + self.device_type = self.device + elif isinstance(self.device, torch.device): + self.device_type = self.device.type + + # Some device types require processing inputs on CPU + if self.device_type in ["tpu"]: + self.device = None + else: + # Set device with device type + self.device = torch.device(self.device_type) diff --git a/vllm/config/observability.py b/vllm/config/observability.py new file mode 100644 index 000000000000..766d03051e21 --- /dev/null +++ b/vllm/config/observability.py @@ -0,0 +1,99 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project + +import hashlib +from functools import cached_property +from typing import Any, Literal, Optional, cast + +from pydantic.dataclasses import dataclass + +from vllm import version +from vllm.config.utils import config + +DetailedTraceModules = Literal["model", "worker", "all"] + + +@config +@dataclass +class ObservabilityConfig: + """Configuration for observability - metrics and tracing.""" + + show_hidden_metrics_for_version: Optional[str] = None + """Enable deprecated Prometheus metrics that have been hidden since the + specified version. For example, if a previously deprecated metric has been + hidden since the v0.7.0 release, you use + `--show-hidden-metrics-for-version=0.7` as a temporary escape hatch while + you migrate to new metrics. The metric is likely to be removed completely + in an upcoming release.""" + + @cached_property + def show_hidden_metrics(self) -> bool: + """Check if the hidden metrics should be shown.""" + if self.show_hidden_metrics_for_version is None: + return False + return version._prev_minor_version_was( + self.show_hidden_metrics_for_version) + + otlp_traces_endpoint: Optional[str] = None + """Target URL to which OpenTelemetry traces will be sent.""" + + collect_detailed_traces: Optional[list[DetailedTraceModules]] = None + """It makes sense to set this only if `--otlp-traces-endpoint` is set. If + set, it will collect detailed traces for the specified modules. This + involves use of possibly costly and or blocking operations and hence might + have a performance impact. + + Note that collecting detailed timing information for each request can be + expensive.""" + + @cached_property + def collect_model_forward_time(self) -> bool: + """Whether to collect model forward time for the request.""" + return (self.collect_detailed_traces is not None + and ("model" in self.collect_detailed_traces + or "all" in self.collect_detailed_traces)) + + @cached_property + def collect_model_execute_time(self) -> bool: + """Whether to collect model execute time for the request.""" + return (self.collect_detailed_traces is not None + and ("worker" in self.collect_detailed_traces + or "all" in self.collect_detailed_traces)) + + def compute_hash(self) -> str: + """ + WARNING: Whenever a new field is added to this config, + ensure that it is included in the factors list if + it affects the computation graph. + + Provide a hash that uniquely identifies all the configs + that affect the structure of the computation + graph from input ids/embeddings to the final hidden states, + excluding anything before input ids/embeddings and after + the final hidden states. + """ + # no factors to consider. + # this config will not affect the computation graph. + factors: list[Any] = [] + hash_str = hashlib.md5(str(factors).encode(), + usedforsecurity=False).hexdigest() + return hash_str + + def __post_init__(self): + if (self.collect_detailed_traces is not None + and len(self.collect_detailed_traces) == 1 + and "," in self.collect_detailed_traces[0]): + self._parse_collect_detailed_traces() + + from vllm.tracing import is_otel_available, otel_import_error_traceback + if not is_otel_available() and self.otlp_traces_endpoint is not None: + raise ValueError( + "OpenTelemetry is not available. Unable to configure " + "'otlp_traces_endpoint'. Ensure OpenTelemetry packages are " + f"installed. Original error:\n{otel_import_error_traceback}") + + def _parse_collect_detailed_traces(self): + assert isinstance(self.collect_detailed_traces, list) + self.collect_detailed_traces = cast( + list[DetailedTraceModules], + self.collect_detailed_traces[0].split(",")) diff --git a/vllm/config/speech_to_text.py b/vllm/config/speech_to_text.py new file mode 100644 index 000000000000..de9f525efe18 --- /dev/null +++ b/vllm/config/speech_to_text.py @@ -0,0 +1,39 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project + +from typing import Optional + +from pydantic.dataclasses import dataclass + +from vllm.config.utils import config + + +@config +@dataclass +class SpeechToTextConfig: + """Configuration for speech-to-text models.""" + + sample_rate: float = 16_000 + """Sample rate (Hz) to resample input audio to. Most speech models expect + 16kHz audio input. The input audio will be automatically resampled to this + rate before processing.""" + + max_audio_clip_s: int = 30 + """Maximum duration in seconds for a single audio clip without chunking. + Audio longer than this will be split into smaller chunks if + `allow_audio_chunking` evaluates to True, otherwise it will be rejected.""" + + overlap_chunk_second: int = 1 + """Overlap duration in seconds between consecutive audio chunks when + splitting long audio. This helps maintain context across chunk boundaries + and improves transcription quality at split points.""" + + min_energy_split_window_size: Optional[int] = 1600 + """Window size in samples for finding low-energy (quiet) regions to split + audio chunks. The algorithm looks for the quietest moment within this + window to minimize cutting through speech. Default 1600 samples ≈ 100ms + at 16kHz. If None, no chunking will be done.""" + + @property + def allow_audio_chunking(self) -> bool: + return self.min_energy_split_window_size is not None From 9313be501721c42078e06490ff0b8a7eee0a4fb6 Mon Sep 17 00:00:00 2001 From: Cyrus Leung Date: Wed, 24 Sep 2025 22:49:58 +0800 Subject: [PATCH 340/518] [Misc] Improve type annotations for jsontree (#25577) Signed-off-by: DarkLight1337 --- vllm/model_executor/models/aya_vision.py | 23 +++---- vllm/model_executor/models/llava.py | 10 ++- vllm/model_executor/models/minimax_vl_01.py | 10 ++- vllm/model_executor/models/tarsier.py | 15 ++--- vllm/utils/jsontree.py | 69 +++++++++++++++++++-- 5 files changed, 88 insertions(+), 39 deletions(-) diff --git a/vllm/model_executor/models/aya_vision.py b/vllm/model_executor/models/aya_vision.py index 0f05f9b4efcd..6fd8c2fb5c56 100644 --- a/vllm/model_executor/models/aya_vision.py +++ b/vllm/model_executor/models/aya_vision.py @@ -2,7 +2,7 @@ # SPDX-FileCopyrightText: Copyright contributors to the vLLM project # Adapted from https://github.com/huggingface/transformers/tree/main/src/transformers/models/aya_vision from collections.abc import Iterable, Mapping, Sequence -from typing import Annotated, Literal, Optional, Union, cast +from typing import Annotated, Literal, Optional, Union import torch from torch import nn @@ -347,12 +347,16 @@ def load_weights(self, weights: Iterable[tuple[str, loader = AutoWeightsLoader(self) return loader.load_weights(weights, mapper=self.hf_to_vllm_mapper) - def _image_pixels_to_features(self, vision_tower: SiglipVisionModel, - pixel_values: torch.Tensor, - **kwargs) -> torch.Tensor: - target_dtype = vision_tower.get_input_embeddings().weight.dtype - image_features = vision_tower(pixel_values.to(dtype=target_dtype), - **kwargs) + def _image_pixels_to_features( + self, + vision_tower: SiglipVisionModel, + pixel_values: torch.Tensor, + **kwargs, + ) -> Union[torch.Tensor, tuple[torch.Tensor, ...]]: + target_dtype: torch.dtype = \ + vision_tower.get_input_embeddings().weight.dtype + image_features: Union[torch.Tensor, tuple[torch.Tensor, ...]] = \ + vision_tower(pixel_values.to(dtype=target_dtype), **kwargs) def select_features(leaf: torch.Tensor): return self._select_image_features( @@ -360,10 +364,7 @@ def select_features(leaf: torch.Tensor): strategy=self.config.vision_feature_select_strategy, ) - return cast( - Union[torch.Tensor, tuple[torch.Tensor, ...]], - json_map_leaves(select_features, image_features), - ) + return json_map_leaves(select_features, image_features) def _select_image_features(self, image_features: torch.Tensor, *, strategy: str) -> torch.Tensor: diff --git a/vllm/model_executor/models/llava.py b/vllm/model_executor/models/llava.py index 8d7feb965e76..4d8ed95b6cc8 100644 --- a/vllm/model_executor/models/llava.py +++ b/vllm/model_executor/models/llava.py @@ -4,7 +4,7 @@ from abc import abstractmethod from collections.abc import Iterable, Mapping, Sequence from typing import (Annotated, Final, Literal, Optional, Protocol, TypeVar, - Union, cast) + Union) import torch import torch.nn as nn @@ -623,7 +623,8 @@ def _image_pixels_to_features( ) -> Union[torch.Tensor, tuple[torch.Tensor, ...]]: # NOTE: we skip the step to select the vision feature layer since # this is already done inside the vision tower - image_features = vision_tower(pixel_values) + image_features: Union[torch.Tensor, tuple[torch.Tensor, ...]] = \ + vision_tower(pixel_values) def select_features(leaf: torch.Tensor): return self._select_image_features( @@ -631,10 +632,7 @@ def select_features(leaf: torch.Tensor): strategy=self.config.vision_feature_select_strategy, ) - return cast( - Union[torch.Tensor, tuple[torch.Tensor, ...]], - json_map_leaves(select_features, image_features), - ) + return json_map_leaves(select_features, image_features) def _process_image_pixels( self, diff --git a/vllm/model_executor/models/minimax_vl_01.py b/vllm/model_executor/models/minimax_vl_01.py index b2f020f3323e..d81ac8c704e7 100644 --- a/vllm/model_executor/models/minimax_vl_01.py +++ b/vllm/model_executor/models/minimax_vl_01.py @@ -1,7 +1,7 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project from collections.abc import Iterable, Mapping -from typing import Annotated, Literal, Optional, Union, cast +from typing import Annotated, Literal, Optional, Union import torch import torch.nn as nn @@ -254,7 +254,8 @@ def _image_pixels_to_features( ) -> Union[torch.Tensor, tuple[torch.Tensor, ...]]: # NOTE: we skip the step to select the vision feature layer since # this is already done inside the vision tower - image_features = tuple(vision_tower(p) for p in pixel_values) + image_features: tuple[torch.Tensor, ...] = \ + tuple(vision_tower(p) for p in pixel_values) def select_features(leaf: torch.Tensor): return self._select_image_features( @@ -262,10 +263,7 @@ def select_features(leaf: torch.Tensor): strategy=self.config.vision_feature_select_strategy, ) - return cast( - Union[torch.Tensor, tuple[torch.Tensor, ...]], - json_map_leaves(select_features, image_features), - ) + return json_map_leaves(select_features, image_features) # adapted from https://huggingface.co/MiniMaxAI/MiniMax-VL-01/blob/main/modeling_minimax_vl_01.py#L616-L631 def pack_image_features(self, image_features: list[torch.Tensor], diff --git a/vllm/model_executor/models/tarsier.py b/vllm/model_executor/models/tarsier.py index b75c858a6480..3660efdc079a 100644 --- a/vllm/model_executor/models/tarsier.py +++ b/vllm/model_executor/models/tarsier.py @@ -4,7 +4,7 @@ import math from collections.abc import Iterable, Mapping, Sequence from typing import (Annotated, Final, Literal, Optional, Protocol, TypeVar, - Union, cast) + Union) import torch import torch.nn as nn @@ -490,11 +490,8 @@ def _image_pixels_to_features( pixel_values: Union[torch.Tensor, list[torch.Tensor]], ) -> Union[torch.Tensor, tuple[torch.Tensor, ...]]: # From vLLM LLaVA, vision tower output handling - image_hidden_states = vision_tower(pixel_values) - if not isinstance(image_hidden_states, torch.Tensor): - raise TypeError( - f"image_hidden_states type: {type(image_hidden_states)}" - " is not supported") + image_hidden_states: Union[torch.Tensor, tuple[torch.Tensor, ...]] = \ + vision_tower(pixel_values) def select_features_fn(leaf: torch.Tensor): return self._select_image_features( @@ -502,11 +499,7 @@ def select_features_fn(leaf: torch.Tensor): strategy=self.config.vision_feature_select_strategy, ) - selected_features = cast( - Union[torch.Tensor, tuple[torch.Tensor, ...]], - json_map_leaves(select_features_fn, image_hidden_states), - ) - return selected_features + return json_map_leaves(select_features_fn, image_hidden_states) def _add_tarsier_split_tokens( self, projected_image_features: torch.Tensor) -> torch.Tensor: diff --git a/vllm/utils/jsontree.py b/vllm/utils/jsontree.py index 457afb7e2c6f..804c443eb184 100644 --- a/vllm/utils/jsontree.py +++ b/vllm/utils/jsontree.py @@ -4,7 +4,7 @@ from collections.abc import Iterable from functools import reduce -from typing import Callable, TypeVar, Union, overload +from typing import Callable, TypeVar, Union, cast, overload _T = TypeVar("_T") _U = TypeVar("_U") @@ -30,10 +30,42 @@ def json_iter_leaves(value: JSONTree[_T]) -> Iterable[_T]: yield value +@overload +def json_map_leaves( + func: Callable[[_T], _U], + value: Union[_T, dict[str, _T]], +) -> Union[_U, dict[str, _U]]: + ... + + +@overload +def json_map_leaves( + func: Callable[[_T], _U], + value: Union[_T, list[_T]], +) -> Union[_U, list[_U]]: + ... + + +@overload +def json_map_leaves( + func: Callable[[_T], _U], + value: Union[_T, tuple[_T, ...]], +) -> Union[_U, tuple[_U, ...]]: + ... + + +@overload def json_map_leaves( func: Callable[[_T], _U], value: JSONTree[_T], ) -> JSONTree[_U]: + ... + + +def json_map_leaves( + func: Callable[[_T], _U], + value: Union[dict[str, _T], list[_T], tuple[_T, ...], JSONTree[_T]], +) -> Union[dict[str, _U], list[_U], tuple[_U, ...], JSONTree[_U]]: """Apply a function to each leaf in a nested JSON structure.""" if isinstance(value, dict): return {k: json_map_leaves(func, v) for k, v in value.items()} @@ -45,6 +77,33 @@ def json_map_leaves( return func(value) +@overload +def json_reduce_leaves( + func: Callable[[_T, _T], _T], + value: Union[_T, dict[str, _T]], + /, +) -> _T: + ... + + +@overload +def json_reduce_leaves( + func: Callable[[_T, _T], _T], + value: Union[_T, list[_T]], + /, +) -> _T: + ... + + +@overload +def json_reduce_leaves( + func: Callable[[_T, _T], _T], + value: Union[_T, tuple[_T, ...]], + /, +) -> _T: + ... + + @overload def json_reduce_leaves( func: Callable[[_T, _T], _T], @@ -65,10 +124,10 @@ def json_reduce_leaves( def json_reduce_leaves( - func: Callable[..., Union[_T, _U]], - value: JSONTree[_T], - initial: _U = ..., # type: ignore[assignment] - /, + func: Callable[..., Union[_T, _U]], + value: Union[dict[str, _T], list[_T], tuple[_T, ...], JSONTree[_T]], + initial: _U = cast(_U, ...), # noqa: B008 + /, ) -> Union[_T, _U]: """ Apply a function of two arguments cumulatively to each leaf in a From 487745ff493c365fe328f8d8bc8b29f50e04dfc2 Mon Sep 17 00:00:00 2001 From: Gregory Shtrasberg <156009573+gshtras@users.noreply.github.com> Date: Wed, 24 Sep 2025 11:24:39 -0400 Subject: [PATCH 341/518] [ROCm][Bugfix] Only enable +rms_norm based on aiter if not explicitly disabled (#25275) Signed-off-by: Gregory Shtrasberg --- vllm/platforms/rocm.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vllm/platforms/rocm.py b/vllm/platforms/rocm.py index 942fd1973f4f..d44d6930c177 100644 --- a/vllm/platforms/rocm.py +++ b/vllm/platforms/rocm.py @@ -340,7 +340,8 @@ def check_and_update_config(cls, vllm_config: "VllmConfig") -> None: else: parallel_config.worker_cls = "vllm.worker.worker.Worker" # Aiter rms norm perform best when CUDA Graph capture is enabled. - if use_v1 and use_aiter_rms_norm and not is_eager_execution: + if (use_v1 and use_aiter_rms_norm and not is_eager_execution + and "-rms_norm" not in compilation_config.custom_ops): compilation_config.custom_ops.append("+rms_norm") @classmethod From 302eb941f360aaac6936abc782e235b45da88699 Mon Sep 17 00:00:00 2001 From: Gregory Shtrasberg <156009573+gshtras@users.noreply.github.com> Date: Wed, 24 Sep 2025 11:25:10 -0400 Subject: [PATCH 342/518] [ROCm][Build][Bugfix] Fix ROCm base docker whls installation order (#25415) Signed-off-by: Gregory Shtrasberg --- docker/Dockerfile.rocm_base | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/docker/Dockerfile.rocm_base b/docker/Dockerfile.rocm_base index 4973b57f7656..87fcb18b1c03 100644 --- a/docker/Dockerfile.rocm_base +++ b/docker/Dockerfile.rocm_base @@ -65,8 +65,6 @@ ARG PYTORCH_BRANCH ARG PYTORCH_VISION_BRANCH ARG PYTORCH_REPO ARG PYTORCH_VISION_REPO -ARG FA_BRANCH -ARG FA_REPO RUN git clone ${PYTORCH_REPO} pytorch RUN cd pytorch && git checkout ${PYTORCH_BRANCH} && \ pip install -r requirements.txt && git submodule update --init --recursive \ @@ -77,14 +75,20 @@ RUN git clone ${PYTORCH_VISION_REPO} vision RUN cd vision && git checkout ${PYTORCH_VISION_BRANCH} \ && python3 setup.py bdist_wheel --dist-dir=dist \ && pip install dist/*.whl +RUN mkdir -p /app/install && cp /app/pytorch/dist/*.whl /app/install \ + && cp /app/vision/dist/*.whl /app/install + +FROM base AS build_fa +ARG FA_BRANCH +ARG FA_REPO +RUN --mount=type=bind,from=build_pytorch,src=/app/install/,target=/install \ + pip install /install/*.whl RUN git clone ${FA_REPO} RUN cd flash-attention \ && git checkout ${FA_BRANCH} \ && git submodule update --init \ && GPU_ARCHS=$(echo ${PYTORCH_ROCM_ARCH} | sed -e 's/;gfx1[0-9]\{3\}//g') python3 setup.py bdist_wheel --dist-dir=dist -RUN mkdir -p /app/install && cp /app/pytorch/dist/*.whl /app/install \ - && cp /app/vision/dist/*.whl /app/install \ - && cp /app/flash-attention/dist/*.whl /app/install +RUN mkdir -p /app/install && cp /app/flash-attention/dist/*.whl /app/install FROM base AS build_aiter ARG AITER_BRANCH @@ -103,6 +107,8 @@ FROM base AS debs RUN mkdir /app/debs RUN --mount=type=bind,from=build_triton,src=/app/install/,target=/install \ cp /install/*.whl /app/debs +RUN --mount=type=bind,from=build_fa,src=/app/install/,target=/install \ + cp /install/*.whl /app/debs RUN --mount=type=bind,from=build_amdsmi,src=/app/install/,target=/install \ cp /install/*.whl /app/debs RUN --mount=type=bind,from=build_pytorch,src=/app/install/,target=/install \ @@ -111,13 +117,7 @@ RUN --mount=type=bind,from=build_aiter,src=/app/install/,target=/install \ cp /install/*.whl /app/debs FROM base AS final -RUN --mount=type=bind,from=build_triton,src=/app/install/,target=/install \ - pip install /install/*.whl -RUN --mount=type=bind,from=build_amdsmi,src=/app/install/,target=/install \ - pip install /install/*.whl -RUN --mount=type=bind,from=build_pytorch,src=/app/install/,target=/install \ - pip install /install/*.whl -RUN --mount=type=bind,from=build_aiter,src=/app/install/,target=/install \ +RUN --mount=type=bind,from=debs,src=/app/debs,target=/install \ pip install /install/*.whl ARG BASE_IMAGE From d83f3f7cb37a0f1861f16c84d529abcd54889885 Mon Sep 17 00:00:00 2001 From: Michael Goin Date: Wed, 24 Sep 2025 11:30:15 -0400 Subject: [PATCH 343/518] Fixes and updates to bench_per_token_quant_fp8 (#25591) Signed-off-by: Michael Goin --- .../kernels/bench_per_token_quant_fp8.py | 50 ++++++++++--------- 1 file changed, 26 insertions(+), 24 deletions(-) diff --git a/benchmarks/kernels/bench_per_token_quant_fp8.py b/benchmarks/kernels/bench_per_token_quant_fp8.py index 9170361e974b..e08e5680c191 100644 --- a/benchmarks/kernels/bench_per_token_quant_fp8.py +++ b/benchmarks/kernels/bench_per_token_quant_fp8.py @@ -51,7 +51,7 @@ def calculate_diff( ): """Calculate the difference between Inductor and CUDA implementations.""" device = torch.device("cuda") - x = torch.rand((batch_size * hidden_size, 4096), dtype=dtype, device=device) + x = torch.randn((batch_size, hidden_size), dtype=dtype, device=device) quant_fp8 = QuantFP8(False, group_shape, column_major_scales=False) @@ -59,23 +59,25 @@ def calculate_diff( torch_eager_out, torch_eager_scale = quant_fp8.forward_native(x) cuda_out, cuda_scale = quant_fp8.forward_cuda(x) - out_allclose = lambda o1, o2: torch.allclose( - o1.to(torch.float32), - o2.to(torch.float32), - rtol=1e-3, - atol=1e-5, - ) - scale_allclose = lambda s1, s2: torch.allclose(s1, s2, rtol=1e-3, atol=1e-5) - - if ( - out_allclose(cuda_out, torch_out) - and scale_allclose(cuda_scale, torch_scale) - and out_allclose(cuda_out, torch_eager_out) - and scale_allclose(cuda_scale, torch_eager_scale) - ): + try: + torch.testing.assert_close( + cuda_out.to(torch.float32), + torch_out.to(torch.float32), + rtol=1e-3, + atol=1e-5, + ) + torch.testing.assert_close(cuda_scale, torch_scale, rtol=1e-3, atol=1e-5) + torch.testing.assert_close( + cuda_out.to(torch.float32), + torch_eager_out.to(torch.float32), + rtol=1e-3, + atol=1e-5, + ) + torch.testing.assert_close(cuda_scale, torch_eager_scale, rtol=1e-3, atol=1e-5) print("✅ All implementations match") - else: + except AssertionError as e: print("❌ Implementations differ") + print(e) configs = [] @@ -91,7 +93,7 @@ def benchmark_quantization( ): device = torch.device("cuda") - x = torch.randn(batch_size * hidden_size, 4096, device=device, dtype=dtype) + x = torch.randn(batch_size, hidden_size, device=device, dtype=dtype) quantiles = [0.5, 0.2, 0.8] quant_fp8 = QuantFP8(False, group_shape, column_major_scales=col_major) @@ -157,21 +159,21 @@ def geo_speedup(group: pd.DataFrame) -> pd.Series: ) parser.add_argument("-c", "--check", action="store_true") parser.add_argument( - "--dtype", type=str, choices=["half", "bfloat16", "float"], default="half" + "--dtype", type=str, choices=["half", "bfloat16", "float"], default="bfloat16" ) parser.add_argument( "--hidden-sizes", type=int, nargs="+", - default=None, - help="Hidden sizes to benchmark (default: 1,16,64,128,256,512,1024,2048,4096)", + default=[896, 1024, 2048, 4096, 7168], + help="Hidden sizes to benchmark", ) parser.add_argument( "--batch-sizes", type=int, nargs="+", - default=None, - help="Batch sizes to benchmark (default: 1,16,32,64,128)", + default=[1, 16, 128, 512, 1024], + help="Batch sizes to benchmark", ) parser.add_argument( "--group-sizes", @@ -192,8 +194,8 @@ def geo_speedup(group: pd.DataFrame) -> pd.Series: dtype = STR_DTYPE_TO_TORCH_DTYPE[args.dtype] - hidden_sizes = args.hidden_sizes or [1, 16, 64, 128, 256, 512, 1024, 2048, 4096] - batch_sizes = args.batch_sizes or [1, 16, 32, 64, 128] + hidden_sizes = args.hidden_sizes + batch_sizes = args.batch_sizes if args.group_sizes is not None: group_shapes = [] From 2dda3e35d054235b0c2170df359b42ec25b4fe2c Mon Sep 17 00:00:00 2001 From: "rongfu.leng" Date: Thu, 25 Sep 2025 02:11:16 +0800 Subject: [PATCH 344/518] [Bugfix] add cache model when from object storage get model (#24764) Signed-off-by: rongfu.leng --- vllm/envs.py | 6 ++++++ vllm/transformers_utils/runai_utils.py | 16 ++++++++-------- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/vllm/envs.py b/vllm/envs.py index 689428ec5910..0833949b527f 100755 --- a/vllm/envs.py +++ b/vllm/envs.py @@ -64,6 +64,7 @@ VLLM_XLA_USE_SPMD: bool = False VLLM_WORKER_MULTIPROC_METHOD: Literal["fork", "spawn"] = "fork" VLLM_ASSETS_CACHE: str = os.path.join(VLLM_CACHE_ROOT, "assets") + VLLM_ASSETS_CACHE_MODEL_CLEAN: bool = False VLLM_IMAGE_FETCH_TIMEOUT: int = 5 VLLM_VIDEO_FETCH_TIMEOUT: int = 30 VLLM_AUDIO_FETCH_TIMEOUT: int = 10 @@ -699,6 +700,11 @@ def get_vllm_port() -> Optional[int]: os.path.join(get_default_cache_root(), "vllm", "assets"), )), + # If the env var is set, we will clean model file in + # this path $VLLM_ASSETS_CACHE/model_streamer/$model_name + "VLLM_ASSETS_CACHE_MODEL_CLEAN": + lambda: bool(int(os.getenv("VLLM_ASSETS_CACHE_MODEL_CLEAN", "0"))), + # Timeout for fetching images when serving multimodal models # Default is 5 seconds "VLLM_IMAGE_FETCH_TIMEOUT": diff --git a/vllm/transformers_utils/runai_utils.py b/vllm/transformers_utils/runai_utils.py index 08466ca19b8a..355fd60e8da1 100644 --- a/vllm/transformers_utils/runai_utils.py +++ b/vllm/transformers_utils/runai_utils.py @@ -5,9 +5,10 @@ import os import shutil import signal -import tempfile from typing import Optional +from vllm import envs +from vllm.assets.base import get_cache_dir from vllm.logger import init_logger from vllm.utils import PlaceholderModule @@ -58,20 +59,19 @@ class ObjectStorageModel: """ def __init__(self, url: str) -> None: - for sig in (signal.SIGINT, signal.SIGTERM): - existing_handler = signal.getsignal(sig) - signal.signal(sig, self._close_by_signal(existing_handler)) + if envs.VLLM_ASSETS_CACHE_MODEL_CLEAN: + for sig in (signal.SIGINT, signal.SIGTERM): + existing_handler = signal.getsignal(sig) + signal.signal(sig, self._close_by_signal(existing_handler)) dir_name = os.path.join( - tempfile.gettempdir(), + get_cache_dir(), "model_streamer", hashlib.sha256(str(url).encode()).hexdigest()[:8]) if os.path.exists(dir_name): shutil.rmtree(dir_name) os.makedirs(dir_name) self.dir = dir_name - - def __del__(self): - self._close() + logger.debug("Init object storage, model cache path is: %s", dir_name) def _close(self) -> None: if os.path.exists(self.dir): From 54e42b72dbf7ec259de1c28e66ea9575fe594685 Mon Sep 17 00:00:00 2001 From: Shu Wang Date: Wed, 24 Sep 2025 13:38:16 -0500 Subject: [PATCH 345/518] Support mnnvl all2allv from Flashinfer (#21003) Signed-off-by: Shu Wang Signed-off-by: Shu Wang. Signed-off-by: Tyler Michael Smith Signed-off-by: Tyler Michael Smith Co-authored-by: Tyler Michael Smith Co-authored-by: Tyler Michael Smith --- .../moe/modular_kernel_tools/mk_objects.py | 5 +- .../device_communicators/all2all.py | 125 ++++++++-- .../device_communicators/cuda_communicator.py | 5 + .../device_communicators/mnnvl_compat.py | 28 +++ vllm/envs.py | 7 +- .../fused_moe/flashinfer_cutlass_moe.py | 7 +- .../flashinfer_cutlass_prepare_finalize.py | 233 +++++++++++++++++- .../quantization/utils/flashinfer_fp4_moe.py | 6 +- .../quantization/utils/flashinfer_utils.py | 4 +- vllm/utils/flashinfer.py | 30 +++ 10 files changed, 410 insertions(+), 40 deletions(-) create mode 100644 vllm/distributed/device_communicators/mnnvl_compat.py diff --git a/tests/kernels/moe/modular_kernel_tools/mk_objects.py b/tests/kernels/moe/modular_kernel_tools/mk_objects.py index 7947391d0348..57a1da7b4b1a 100644 --- a/tests/kernels/moe/modular_kernel_tools/mk_objects.py +++ b/tests/kernels/moe/modular_kernel_tools/mk_objects.py @@ -222,7 +222,8 @@ def expert_info(kind) -> ExpertInfo: from vllm.model_executor.layers.fused_moe.flashinfer_cutlass_moe import ( # noqa: E501 FlashInferExperts) from vllm.model_executor.layers.fused_moe.flashinfer_cutlass_prepare_finalize import ( # noqa: E501 - FlashInferCutlassMoEPrepareAndFinalize) + FlashInferCutlassMoEPrepareAndFinalize, + create_flashinfer_prepare_finalize) register_prepare_and_finalize( FlashInferCutlassMoEPrepareAndFinalize, @@ -373,7 +374,7 @@ def make_prepare_finalize( assert prepare_finalize is not None return prepare_finalize elif prepare_finalize_type == FlashInferCutlassMoEPrepareAndFinalize: - return FlashInferCutlassMoEPrepareAndFinalize( + return create_flashinfer_prepare_finalize( use_dp=moe.moe_parallel_config.dp_size > 1) else: return MoEPrepareAndFinalizeNoEP() diff --git a/vllm/distributed/device_communicators/all2all.py b/vllm/distributed/device_communicators/all2all.py index ae18429f6251..661ed939608a 100644 --- a/vllm/distributed/device_communicators/all2all.py +++ b/vllm/distributed/device_communicators/all2all.py @@ -10,9 +10,15 @@ from vllm.forward_context import get_forward_context from vllm.logger import init_logger from vllm.utils import has_deep_ep, has_pplx +from vllm.utils.flashinfer import has_flashinfer_all2all from .base_device_communicator import All2AllManagerBase, Cache +if has_flashinfer_all2all(): + from flashinfer.comm import Mapping + from flashinfer.comm.mnnvl import MnnvlConfig + from flashinfer.comm.trtllm_alltoall import MnnvlMoe + logger = init_logger(__name__) @@ -47,24 +53,22 @@ def naive_multicast(self, x: torch.Tensor, def dispatch(self, hidden_states: torch.Tensor, router_logits: torch.Tensor): - cu_tokens_across_dp_cpu = get_forward_context( - ).dp_metadata.cu_tokens_across_dp_cpu + sizes = get_forward_context( + ).dp_metadata.get_chunk_sizes_across_dp_rank() + hidden_states, router_logits = get_dp_group().all_gatherv( + [hidden_states, router_logits], + dim=0, + sizes=sizes, + ) - hidden_states = self.naive_multicast(hidden_states, - cu_tokens_across_dp_cpu) - router_logits = self.naive_multicast(router_logits, - cu_tokens_across_dp_cpu) return hidden_states, router_logits def combine(self, hidden_states: torch.Tensor) -> torch.Tensor: - cu_tokens_across_dp_cpu = get_forward_context( - ).dp_metadata.cu_tokens_across_dp_cpu - start = 0 if self.dp_rank == 0 else cu_tokens_across_dp_cpu[ - self.dp_rank - 1] - end = cu_tokens_across_dp_cpu[self.dp_rank] - - all_hidden_states = self.dp_group.all_reduce(hidden_states) - hidden_states = all_hidden_states[start:end, :] + sizes = get_forward_context( + ).dp_metadata.get_chunk_sizes_across_dp_rank() + hidden_states = get_dp_group().reduce_scatterv(hidden_states, + dim=0, + sizes=sizes) return hidden_states def destroy(self): @@ -300,4 +304,95 @@ def get_handle(self, kwargs): # DeepEP LL uses RDMA so no SMs are used for communication def max_sms_used(self) -> Optional[int]: - return 0 \ No newline at end of file + return 0 + + +class FlashInferAllToAllManager(All2AllManagerBase): + """ + All2All communication based on flashinfer kernels. + """ + + def __init__(self, cpu_group): + assert has_flashinfer_all2all( + ), "flashinfer all2all module not found. Please install/check flashinfer" # noqa + super().__init__(cpu_group) + logger.debug( + "Initialize for flashinfer All2All " + "rank=%d, world size=%d", self.rank, self.world_size) + self.initialized = False + self.alltoall_info = None + + def initialize( + self, + world_size: int, + rank: int, + gpus_per_node: int, + ): + """Initialize workspace""" + if self.initialized: + return + + self.cleanup() + logger.debug("making map: " + "rank=%d, world size=%d", rank, world_size) + self.mapping = Mapping( + world_size, + rank, + gpus_per_node, + tp_size=world_size, + ) + + from vllm.distributed.device_communicators.mnnvl_compat import ( + CustomCommunicator) + dp_config = MnnvlConfig( + comm_backend=CustomCommunicator(get_dp_group().cpu_group), + fabric_page_size=1 << 29, # 512MB + allocation_granularity=0 # Auto-detect + ) + + self.workspace_tensor = MnnvlMoe.get_moe_workspaces( + self.mapping, dp_config) + self.prepare_workspace_tensor = MnnvlMoe.get_moe_prepare_workspace( + self.mapping, dp_config) + + self.world_size = world_size + self.rank = rank + self.gpus_per_node = gpus_per_node + self.initialized = True + + logger.info("FlashInfer All2All initialized for rank %s, size %s", + rank, world_size) + + def ensure_alltoall_workspace_initialized(self): + """Ensure workspace is initialized""" + if not has_flashinfer_all2all(): + return False + + if self.world_size <= 1: + return False + + if not self.initialized: + self.initialize( + world_size=self.world_size, + rank=self.rank, + gpus_per_node=torch.cuda.device_count, + ) + return self.initialized + + def get_handle(self, kwargs): + return self + + def cleanup(self): + """Clean up workspace""" + if self.initialized and self.workspace_tensor is not None \ + and self.prepare_workspace_tensor is not None: + try: + del self.workspace_tensor + del self.prepare_workspace_tensor + except Exception as e: + logger.warning("Failed to cleanup FlashInfer workspace: %s", e) + finally: + self.workspace_tensor = None + self.prepare_workspace_tensor = None + self.mapping = None + self.initialized = False \ No newline at end of file diff --git a/vllm/distributed/device_communicators/cuda_communicator.py b/vllm/distributed/device_communicators/cuda_communicator.py index b20e79f577c3..bab372b722db 100644 --- a/vllm/distributed/device_communicators/cuda_communicator.py +++ b/vllm/distributed/device_communicators/cuda_communicator.py @@ -114,6 +114,11 @@ def __init__(self, from .all2all import DeepEPLLAll2AllManager self.all2all_manager = DeepEPLLAll2AllManager(self.cpu_group) logger.info("Using DeepEP Low-Latency all2all manager.") + elif all2all_backend == "flashinfer_all2allv": + from .all2all import FlashInferAllToAllManager + self.all2all_manager = FlashInferAllToAllManager( + self.cpu_group) + logger.info("Using Flashinfer all2allv manager.") else: raise ValueError(f"Unknown all2all backend: {all2all_backend}") diff --git a/vllm/distributed/device_communicators/mnnvl_compat.py b/vllm/distributed/device_communicators/mnnvl_compat.py new file mode 100644 index 000000000000..80072c4fa643 --- /dev/null +++ b/vllm/distributed/device_communicators/mnnvl_compat.py @@ -0,0 +1,28 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project +import torch.distributed as dist +from flashinfer.comm.mnnvl import CommBackend as CommBackend + +from vllm.utils.flashinfer import has_flashinfer_all2all + +assert has_flashinfer_all2all(), "Flashinfer alltoallv module cannot be found" + + +class CustomCommunicator(CommBackend): + + def __init__(self, group): + self._group = group + + def Get_rank(self) -> int: + return self._group.rank() + + def Get_size(self) -> int: + return self._group.size() + + def allgather(self, data: int): + gathered = [None] * self.Get_size() + dist.all_gather_object(gathered, data, group=self._group) + return gathered + + def Split(self, color: int, key: int) -> 'CustomCommunicator': + return self diff --git a/vllm/envs.py b/vllm/envs.py index 0833949b527f..4797d96bb899 100755 --- a/vllm/envs.py +++ b/vllm/envs.py @@ -156,7 +156,8 @@ VLLM_ALL2ALL_BACKEND: Literal["naive", "pplx", "deepep_high_throughput", "deepep_low_latency", - "allgather_reducescatter"] = \ + "allgather_reducescatter", + "flashinfer_all2allv"] = \ "allgather_reducescatter" VLLM_MAX_TOKENS_PER_EXPERT_FP4_MOE: int = 163840 VLLM_TOOL_PARSE_REGEX_TIMEOUT_SECONDS: int = 1 @@ -1209,12 +1210,14 @@ def get_vllm_port() -> Optional[int]: # - "pplx": use pplx kernels # - "deepep_high_throughput", use deepep high-throughput kernels # - "deepep_low_latency", use deepep low-latency kernels + # - "flashinfer_all2allv", use flashinfer alltoallv kernels for mnnvl "VLLM_ALL2ALL_BACKEND": env_with_choices("VLLM_ALL2ALL_BACKEND", "allgather_reducescatter", ["naive", "pplx", "deepep_high_throughput", "deepep_low_latency", - "allgather_reducescatter"]), + "allgather_reducescatter", + "flashinfer_all2allv"]), # Flashinfer MoE backend for vLLM's fused Mixture-of-Experts support. # Both require compute capability 10.0 or above. diff --git a/vllm/model_executor/layers/fused_moe/flashinfer_cutlass_moe.py b/vllm/model_executor/layers/fused_moe/flashinfer_cutlass_moe.py index a074da883088..8700181d18fe 100644 --- a/vllm/model_executor/layers/fused_moe/flashinfer_cutlass_moe.py +++ b/vllm/model_executor/layers/fused_moe/flashinfer_cutlass_moe.py @@ -8,7 +8,7 @@ from vllm.logger import init_logger from vllm.model_executor.layers.fused_moe.config import FusedMoEQuantConfig from vllm.model_executor.layers.fused_moe.flashinfer_cutlass_prepare_finalize import ( # noqa: E501 - FlashInferCutlassMoEPrepareAndFinalize) + create_flashinfer_prepare_finalize) from vllm.model_executor.layers.fused_moe.topk_weight_and_reduce import ( TopKWeightAndReduceNoOP) from vllm.utils.flashinfer import (flashinfer_cutlass_fused_moe, @@ -108,7 +108,7 @@ def workspace_shapes( of each tuple must be the number of tokens. """ aq_m, aq_n = aq.shape - workspace2 = () + workspace2 = (0, ) output_shape = (aq_m, aq_n * 2) if self.quant_dtype != \ torch.float8_e4m3fn else (aq_m, aq_n) workspace_dtype = a.dtype @@ -192,9 +192,8 @@ def flashinfer_cutlass_moe_fp4( expert_map: Optional[torch.Tensor] = None, apply_router_weight_on_input: bool = False, ) -> torch.Tensor: - fused_experts = mk.FusedMoEModularKernel( - FlashInferCutlassMoEPrepareAndFinalize(use_dp=False), + create_flashinfer_prepare_finalize(use_dp=False), FlashInferExperts( out_dtype=hidden_states.dtype, quant_config=quant_config, diff --git a/vllm/model_executor/layers/fused_moe/flashinfer_cutlass_prepare_finalize.py b/vllm/model_executor/layers/fused_moe/flashinfer_cutlass_prepare_finalize.py index 8c7eff59f3cd..6e127064d32d 100644 --- a/vllm/model_executor/layers/fused_moe/flashinfer_cutlass_prepare_finalize.py +++ b/vllm/model_executor/layers/fused_moe/flashinfer_cutlass_prepare_finalize.py @@ -5,7 +5,9 @@ import torch import vllm.model_executor.layers.fused_moe.modular_kernel as mk -from vllm.distributed import get_dp_group +from vllm.distributed import get_dp_group, get_ep_group +from vllm.distributed.device_communicators.base_device_communicator import ( + All2AllManagerBase) from vllm.forward_context import get_forward_context from vllm.model_executor.layers.fused_moe.config import FusedMoEQuantConfig from vllm.model_executor.layers.fused_moe.utils import ( @@ -18,6 +20,7 @@ def get_local_sizes(): class FlashInferCutlassMoEPrepareAndFinalize(mk.FusedMoEPrepareAndFinalize): + """Base class for FlashInfer MoE prepare and finalize operations.""" def __init__( self, @@ -42,6 +45,39 @@ def topk_indices_dtype(self) -> Optional[torch.dtype]: def num_dispatchers(self) -> int: return self.num_dispatchers_ + def _apply_router_weight_on_input( + self, + a1: torch.Tensor, + topk_weights: torch.Tensor, + topk_ids: torch.Tensor, + apply_router_weight_on_input: bool, + ) -> None: + """Apply router weight on input if needed.""" + if apply_router_weight_on_input: + topk = topk_ids.size(1) + assert topk == 1, \ + "apply_router_weight_on_input is only implemented for topk=1" + a1.mul_(topk_weights.to(a1.dtype)) + + +class FlashInferAllToAllMoEPrepareAndFinalize( + FlashInferCutlassMoEPrepareAndFinalize): + """FlashInfer implementation using AllToAll communication.""" + + def __init__( + self, + use_dp: bool, + num_dispatchers: int = 1, + ): + super().__init__(use_dp, num_dispatchers) + self.alltoall_info = None + + # Initialize all2all_manager only for DP case + self.all2all_manager = None + if self.use_dp: + self.all2all_manager = get_ep_group( + ).device_communicator.all2all_manager + def prepare( self, a1: torch.Tensor, @@ -53,12 +89,84 @@ def prepare( quant_config: FusedMoEQuantConfig, ) -> mk.PrepareResultType: - if apply_router_weight_on_input: - topk = topk_ids.size(1) - # TODO: this only works for topK=1, will need to update for topK>1 - assert topk == 1, \ - "apply_router_weight_on_input is only implemented for topk=1" - a1.mul_(topk_weights.to(a1.dtype)) + self._apply_router_weight_on_input(a1, topk_weights, topk_ids, + apply_router_weight_on_input) + + if not self.use_dp: + # Non-DP case: standard quantization + a1q, a1q_scale = moe_kernel_quantize_input( + a1, + quant_config.a1_gscale, + quant_config.quant_dtype, + quant_config.per_act_token_quant, + quant_config.block_shape, + is_fp4_scale_swizzled=not self.use_dp, + ) + else: + # DP case: use FlashInfer AllToAll + global_num_tokens_cpu = get_local_sizes() + top_k = topk_ids.size(1) + + (self.alltoall_info, topk_ids, topk_weights, a1q, + a1q_scale) = flashinfer_alltoall_dispatch( + self.all2all_manager, + global_num_tokens_cpu, + a1, + quant_config.a1_gscale, + topk_ids, + topk_weights, + top_k, + num_experts, + quant_config, + ) + + return a1q, a1q_scale, None, topk_ids, topk_weights + + def finalize( + self, + output: torch.Tensor, + fused_expert_output: torch.Tensor, + topk_weights: torch.Tensor, + topk_ids: torch.Tensor, + apply_router_weight_on_input: bool, + weight_and_reduce_impl: mk.TopKWeightAndReduce, + ) -> None: + if self.use_dp: + top_k = topk_ids.size(1) + token_count = output.shape[0] + fused_expert_output = flashinfer_alltoall_combine( + self.all2all_manager, + fused_expert_output, + top_k=top_k, + token_count=token_count, + alltoall_info=self.alltoall_info, + ) + output.copy_(fused_expert_output) + + +class FlashInferAllGatherMoEPrepareAndFinalize( + FlashInferCutlassMoEPrepareAndFinalize): + + def __init__( + self, + use_dp: bool, + num_dispatchers: int = 1, + ): + super().__init__(use_dp, num_dispatchers) + + def prepare( + self, + a1: torch.Tensor, + topk_weights: torch.Tensor, + topk_ids: torch.Tensor, + num_experts: int, + expert_map: Optional[torch.Tensor], + apply_router_weight_on_input: bool, + quant_config: FusedMoEQuantConfig, + ) -> mk.PrepareResultType: + + self._apply_router_weight_on_input(a1, topk_weights, topk_ids, + apply_router_weight_on_input) a1q, a1q_scale = moe_kernel_quantize_input( a1, @@ -66,7 +174,6 @@ def prepare( quant_config.quant_dtype, quant_config.per_act_token_quant, quant_config.block_shape, - # Swizzling after communication is_fp4_scale_swizzled=not self.use_dp, ) if self.use_dp: @@ -76,17 +183,117 @@ def prepare( dim=0, sizes=get_local_sizes(), ) - a1_m, a1_n = a1q.shape a1q_scale = nvfp4_block_scale_interleave(a1q_scale) return a1q, a1q_scale, None, topk_ids, topk_weights - def finalize(self, output: torch.Tensor, fused_expert_output: torch.Tensor, - topk_weights: torch.Tensor, topk_ids: torch.Tensor, - apply_router_weight_on_input: bool, - weight_and_reduce_impl: mk.TopKWeightAndReduce) -> None: + def finalize( + self, + output: torch.Tensor, + fused_expert_output: torch.Tensor, + topk_weights: torch.Tensor, + topk_ids: torch.Tensor, + apply_router_weight_on_input: bool, + weight_and_reduce_impl: mk.TopKWeightAndReduce, + ) -> None: if self.use_dp: fused_expert_output = get_dp_group().reduce_scatterv( fused_expert_output, dim=0, sizes=get_local_sizes()) output.copy_(fused_expert_output) + + +def flashinfer_alltoall_dispatch( + all2all_manager: All2AllManagerBase, + global_num_tokens_cpu: list[int], + x: torch.Tensor, + gs: torch.Tensor, + topk_ids: torch.Tensor, + topk_weights: torch.Tensor, + top_k: int, + num_experts: int, + quant_config: FusedMoEQuantConfig, +): + from flashinfer.comm.trtllm_alltoall import MnnvlMoe + assert (all2all_manager.ensure_alltoall_workspace_initialized() + ), "FlashInfer AllToAll workspace not available" + + ep_rank = all2all_manager.rank + ep_size = all2all_manager.world_size + max_num_token = max(global_num_tokens_cpu + ) if global_num_tokens_cpu is not None else x.shape[0] + alltoall_info, topk_ids, topk_weights, _ = ( + MnnvlMoe.mnnvl_moe_alltoallv_prepare_without_allgather( + topk_ids, + topk_weights, + None, + all2all_manager.prepare_workspace, + max_num_token, + ep_rank, + ep_size, + num_experts, + num_experts, + top_k, + )) + + x, x_sf = moe_kernel_quantize_input( + x, + gs, + quant_config.quant_dtype, + quant_config.per_act_token_quant, + quant_config.block_shape, + is_fp4_scale_swizzled=False, # delay swizzle to after comm + ) + x = MnnvlMoe.mnnvl_moe_alltoallv( + x, + alltoall_info, + all2all_manager.workspace_tensor, + ep_rank, + ep_size, + ) + + x_sf = MnnvlMoe.mnnvl_moe_alltoallv( + x_sf, + alltoall_info, + all2all_manager.workspace_tensor, + ep_rank, + ep_size, + ) + x_sf = nvfp4_block_scale_interleave(x_sf) + return alltoall_info, topk_ids, topk_weights, x, x_sf + + +def flashinfer_alltoall_combine( + all2all_manager: All2AllManagerBase, + output: torch.Tensor, + top_k: int, + token_count: int, + alltoall_info, +): + from flashinfer.comm.trtllm_alltoall import MnnvlMoe + assert (all2all_manager.ensure_alltoall_workspace_initialized() + ), "FlashInfer AllToAll workspace not available" + return MnnvlMoe.mnnvl_moe_alltoallv_combine( + output, + alltoall_info, + all2all_manager.workspace_tensor, + ep_rank=all2all_manager.rank, + ep_size=all2all_manager.world_size, + top_k=top_k, + token_count=token_count, + ) + + +def create_flashinfer_prepare_finalize( + use_dp: bool, + use_nvfp4: bool = False, + enable_alltoallv: bool = False, +) -> FlashInferCutlassMoEPrepareAndFinalize: + """Factory function to create the appropriate FlashInfer implementation.""" + if use_nvfp4: + if enable_alltoallv: + return FlashInferAllToAllMoEPrepareAndFinalize(use_dp) + else: + return FlashInferAllGatherMoEPrepareAndFinalize(use_dp) + # Fp8 only supports AllGather + return FlashInferAllGatherMoEPrepareAndFinalize(use_dp) diff --git a/vllm/model_executor/layers/quantization/utils/flashinfer_fp4_moe.py b/vllm/model_executor/layers/quantization/utils/flashinfer_fp4_moe.py index fabf855b36e6..a520302c62d9 100644 --- a/vllm/model_executor/layers/quantization/utils/flashinfer_fp4_moe.py +++ b/vllm/model_executor/layers/quantization/utils/flashinfer_fp4_moe.py @@ -12,7 +12,7 @@ from vllm.model_executor.layers.fused_moe.flashinfer_cutlass_moe import ( FlashInferExperts) from vllm.model_executor.layers.fused_moe.flashinfer_cutlass_prepare_finalize import ( # noqa: E501 - FlashInferCutlassMoEPrepareAndFinalize) + create_flashinfer_prepare_finalize) from vllm.platforms import current_platform from vllm.utils.flashinfer import has_flashinfer_cutlass_fused_moe @@ -51,7 +51,9 @@ def build_flashinfer_fp4_cutlass_moe_prepare_finalize( moe: FusedMoEConfig) -> mk.FusedMoEPrepareAndFinalize: """Create a FlashInfer CUTLASS fused-MoE prepare finalize kernel""" use_dp = moe.moe_parallel_config.dp_size > 1 - return FlashInferCutlassMoEPrepareAndFinalize(use_dp) + enable_alltoallv = envs.VLLM_ALL2ALL_BACKEND == "flashinfer_all2allv" + return create_flashinfer_prepare_finalize( + use_dp=use_dp, use_nvfp4=True, enable_alltoallv=enable_alltoallv) def select_nvfp4_gemm_impl( diff --git a/vllm/model_executor/layers/quantization/utils/flashinfer_utils.py b/vllm/model_executor/layers/quantization/utils/flashinfer_utils.py index aa66a42c588a..b779a5355b67 100644 --- a/vllm/model_executor/layers/quantization/utils/flashinfer_utils.py +++ b/vllm/model_executor/layers/quantization/utils/flashinfer_utils.py @@ -13,7 +13,7 @@ from vllm.model_executor.layers.fused_moe.flashinfer_cutlass_moe import ( FlashInferExperts) from vllm.model_executor.layers.fused_moe.flashinfer_cutlass_prepare_finalize import ( # noqa: E501 - FlashInferCutlassMoEPrepareAndFinalize) + create_flashinfer_prepare_finalize) logger = init_logger(__name__) @@ -173,7 +173,7 @@ def build_flashinfer_fp8_cutlass_moe_prepare_finalize( moe: Optional[FusedMoEConfig], ) -> mk.FusedMoEPrepareAndFinalize: """Create a FlashInfer CUTLASS fused-MoE prepare finalize kernel""" use_dp = moe.moe_parallel_config.dp_size > 1 if moe is not None else False - return FlashInferCutlassMoEPrepareAndFinalize(use_dp) + return create_flashinfer_prepare_finalize(use_dp) def select_cutlass_fp8_gemm_impl( diff --git a/vllm/utils/flashinfer.py b/vllm/utils/flashinfer.py index ebc7a56ff906..734cd938792a 100644 --- a/vllm/utils/flashinfer.py +++ b/vllm/utils/flashinfer.py @@ -97,6 +97,34 @@ def wrapper(*args, **kwargs): fallback_fn=lambda *args, **kwargs: contextlib.nullcontext()) +@functools.cache +def has_flashinfer_comm() -> bool: + """Return ``True`` if FlashInfer comm module is available.""" + return has_flashinfer() and importlib.util.find_spec( + "flashinfer.comm") is not None + + +@functools.cache +def has_flashinfer_all2all() -> bool: + """Return ``True`` if FlashInfer mnnvl all2all is available.""" + if not has_flashinfer_comm(): + return False + + # Check if all required functions are available + required_functions = [ + ("flashinfer.comm", "Mapping"), + ("flashinfer.comm.mnnvl", "MnnvlMemory"), + ("flashinfer.comm.trtllm_alltoall", "MnnvlMoe"), + ("flashinfer.comm.trtllm_alltoall", "MoEAlltoallInfo"), + ] + + for module_name, attr_name in required_functions: + mod = _get_submodule(module_name) + if not mod or not hasattr(mod, attr_name): + return False + return True + + @functools.cache def has_flashinfer_moe() -> bool: """Return ``True`` if FlashInfer MoE module is available.""" @@ -402,6 +430,8 @@ def flashinfer_disable_q_quantization() -> bool: "trtllm_fp4_block_scale_moe", "autotune", "has_flashinfer_moe", + "has_flashinfer_comm", + "has_flashinfer_all2all", "has_flashinfer_cutlass_fused_moe", "has_nvidia_artifactory", "supports_trtllm_attention", From f84a472a03cf86cc7362eedaf6c2f09cfa90982c Mon Sep 17 00:00:00 2001 From: Sage Moore Date: Wed, 24 Sep 2025 12:02:08 -0700 Subject: [PATCH 346/518] Suppress benign cuBLAS warning when capturing cudagraphs with DBO (#25596) Signed-off-by: Sage Moore --- vllm/v1/worker/gpu_ubatch_wrapper.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/vllm/v1/worker/gpu_ubatch_wrapper.py b/vllm/v1/worker/gpu_ubatch_wrapper.py index d636e7af72ea..8f6044e59418 100644 --- a/vllm/v1/worker/gpu_ubatch_wrapper.py +++ b/vllm/v1/worker/gpu_ubatch_wrapper.py @@ -104,6 +104,7 @@ def __init__(self, runnable: Callable, vllm_config: VllmConfig, self.graph_pool = current_platform.get_global_graph_pool() self.sm_control = self._create_sm_control_context(vllm_config) + self.device = device @staticmethod def _create_sm_control_context(vllm_config: VllmConfig): @@ -168,6 +169,7 @@ def _capture_ubatches(self, ubatch_metadata, model) -> torch.Tensor: @torch.inference_mode() def _capture_ubatch_thread(results, ubatch_metadata): + torch.cuda.set_device(self.device) ubatch_context = ubatch_metadata.context with torch.cuda.stream(ubatch_context.compute_stream): _ = torch.cuda.current_blas_handle() From 8c853050e7da6a868d7b583a51b94592c86f2d19 Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Wed, 24 Sep 2025 20:30:33 +0100 Subject: [PATCH 347/518] [Docs] Enable `fail_on_warning` for the docs build in CI (#25580) Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- .readthedocs.yaml | 1 + docs/features/nixl_connector_usage.md | 8 ++-- docs/mkdocs/hooks/generate_argparse.py | 5 +- docs/models/generative_models.md | 2 +- docs/models/supported_models.md | 2 +- docs/usage/README.md | 2 +- .../dashboards/grafana/README.md | 4 +- .../dashboards/perses/README.md | 4 +- vllm/attention/ops/common.py | 36 +++++++------- vllm/inputs/data.py | 4 +- vllm/model_executor/layers/fused_moe/layer.py | 12 ++--- .../model_loader/weight_utils.py | 4 +- vllm/model_executor/models/qwen3_vl.py | 20 ++++---- vllm/model_executor/models/zamba2.py | 5 +- vllm/reasoning/granite_reasoning_parser.py | 10 ++-- vllm/transformers_utils/configs/radio.py | 47 +++++++------------ vllm/transformers_utils/dynamic_module.py | 2 +- vllm/v1/kv_offload/__init__.py | 0 vllm/v1/kv_offload/backends/__init__.py | 0 vllm/v1/kv_offload/worker/__init__.py | 0 20 files changed, 81 insertions(+), 87 deletions(-) create mode 100644 vllm/v1/kv_offload/__init__.py create mode 100644 vllm/v1/kv_offload/backends/__init__.py create mode 100644 vllm/v1/kv_offload/worker/__init__.py diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 432975009068..d83d6df35ed9 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -13,6 +13,7 @@ build: mkdocs: configuration: mkdocs.yaml + fail_on_warning: true # Optionally declare the Python requirements required to build your docs python: diff --git a/docs/features/nixl_connector_usage.md b/docs/features/nixl_connector_usage.md index de50f091df42..afecbc82947b 100644 --- a/docs/features/nixl_connector_usage.md +++ b/docs/features/nixl_connector_usage.md @@ -9,7 +9,7 @@ NixlConnector is a high-performance KV cache transfer connector for vLLM's disag Install the NIXL library: `uv pip install nixl`, as a quick start. - Refer to [NIXL official repository](https://github.com/ai-dynamo/nixl) for more installation instructions -- The specified required NIXL version can be found in [requirements/kv_connectors.txt](../../requirements/kv_connectors.txt) and other relevant config files +- The specified required NIXL version can be found in [requirements/kv_connectors.txt](gh-file:requirements/kv_connectors.txt) and other relevant config files ### Transport Configuration @@ -154,6 +154,6 @@ python tests/v1/kv_connector/nixl_integration/toy_proxy_server.py \ Refer to these example scripts in the vLLM repository: -- [run_accuracy_test.sh](../../tests/v1/kv_connector/nixl_integration/run_accuracy_test.sh) -- [toy_proxy_server.py](../../tests/v1/kv_connector/nixl_integration/toy_proxy_server.py) -- [test_accuracy.py](../../tests/v1/kv_connector/nixl_integration/test_accuracy.py) +- [run_accuracy_test.sh](gh-file:tests/v1/kv_connector/nixl_integration/run_accuracy_test.sh) +- [toy_proxy_server.py](gh-file:tests/v1/kv_connector/nixl_integration/toy_proxy_server.py) +- [test_accuracy.py](gh-file:tests/v1/kv_connector/nixl_integration/test_accuracy.py) diff --git a/docs/mkdocs/hooks/generate_argparse.py b/docs/mkdocs/hooks/generate_argparse.py index 91454ec272b8..ac70980ac9d0 100644 --- a/docs/mkdocs/hooks/generate_argparse.py +++ b/docs/mkdocs/hooks/generate_argparse.py @@ -32,8 +32,9 @@ def auto_mock(module, attr, max_mocks=50): for _ in range(max_mocks): try: # First treat attr as an attr, then as a submodule - return getattr(importlib.import_module(module), attr, - importlib.import_module(f"{module}.{attr}")) + with patch("importlib.metadata.version", return_value="0.0.0"): + return getattr(importlib.import_module(module), attr, + importlib.import_module(f"{module}.{attr}")) except importlib.metadata.PackageNotFoundError as e: raise e except ModuleNotFoundError as e: diff --git a/docs/models/generative_models.md b/docs/models/generative_models.md index d02522a6657d..05f8d16cc4ca 100644 --- a/docs/models/generative_models.md +++ b/docs/models/generative_models.md @@ -4,7 +4,7 @@ vLLM provides first-class support for generative models, which covers most of LL In vLLM, generative models implement the[VllmModelForTextGeneration][vllm.model_executor.models.VllmModelForTextGeneration] interface. Based on the final hidden states of the input, these models output log probabilities of the tokens to generate, -which are then passed through [Sampler][vllm.model_executor.layers.sampler.Sampler] to obtain the final text. +which are then passed through [Sampler][vllm.v1.sample.sampler.Sampler] to obtain the final text. ## Configuration diff --git a/docs/models/supported_models.md b/docs/models/supported_models.md index 9d288667a318..81bd12f9a29f 100644 --- a/docs/models/supported_models.md +++ b/docs/models/supported_models.md @@ -29,7 +29,7 @@ _*Vision-language models currently accept only image inputs. Support for video i If the Transformers model implementation follows all the steps in [writing a custom model](#writing-custom-models) then, when used with the Transformers backend, it will be compatible with the following features of vLLM: -- All the features listed in the [compatibility matrix](../features/compatibility_matrix.md#feature-x-feature) +- All the features listed in the [compatibility matrix](../features/README.md#feature-x-feature) - Any combination of the following vLLM parallelisation schemes: - Pipeline parallel - Tensor parallel diff --git a/docs/usage/README.md b/docs/usage/README.md index 83aea121819f..0c63d01f0f99 100644 --- a/docs/usage/README.md +++ b/docs/usage/README.md @@ -1,6 +1,6 @@ # Using vLLM -First, vLLM must be [installed](../getting_started/installation) for your chosen device in either a Python or Docker environment. +First, vLLM must be [installed](../getting_started/installation/) for your chosen device in either a Python or Docker environment. Then, vLLM supports the following usage patterns: diff --git a/examples/online_serving/dashboards/grafana/README.md b/examples/online_serving/dashboards/grafana/README.md index e42b0f814367..abe5f8cf2367 100644 --- a/examples/online_serving/dashboards/grafana/README.md +++ b/examples/online_serving/dashboards/grafana/README.md @@ -11,9 +11,9 @@ vLLM performance and metrics. ## Dashboard Descriptions -- **[performance_statistics.json](./performance_statistics.json)**: Tracks performance metrics including latency and +- **performance_statistics.json**: Tracks performance metrics including latency and throughput for your vLLM service. -- **[query_statistics.json](./query_statistics.json)**: Tracks query performance, request volume, and key +- **query_statistics.json**: Tracks query performance, request volume, and key performance indicators for your vLLM service. ## Deployment Options diff --git a/examples/online_serving/dashboards/perses/README.md b/examples/online_serving/dashboards/perses/README.md index ae04fd17b1b9..780a6ef13a3e 100644 --- a/examples/online_serving/dashboards/perses/README.md +++ b/examples/online_serving/dashboards/perses/README.md @@ -21,9 +21,9 @@ deployment methods: ## Dashboard Descriptions -- **[performance_statistics.yaml](./performance_statistics.yaml)**: Performance metrics with aggregated latency +- **performance_statistics.yaml**: Performance metrics with aggregated latency statistics -- **[query_statistics.yaml](./query_statistics.yaml)**: Query performance and deployment metrics +- **query_statistics.yaml**: Query performance and deployment metrics ## Deployment Options diff --git a/vllm/attention/ops/common.py b/vllm/attention/ops/common.py index 6253e1e56b0f..c8efa6e63a2e 100644 --- a/vllm/attention/ops/common.py +++ b/vllm/attention/ops/common.py @@ -18,12 +18,14 @@ def _correct_attn_cp_out_kernel(outputs_ptr, new_output_ptr, lses_ptr, final attention output. Args: - output: [ B, H, D ] - lses : [ N, B, H ] - cp, batch, q_heads, v_head_dim - Return: - output: [ B, H, D ] - lse : [ B, H ] + outputs_ptr (triton.PointerType): + Pointer to input tensor of shape [ B, H, D ] + lses_ptr (triton.PointerType): + Pointer to input tensor of shape [ N, B, H ] + new_output_ptr (triton.PointerType): + Pointer to output tensor of shape [ B, H, D ] + vlse_ptr (triton.PointerType): + Pointer to output tensor of shape [ B, H ] """ batch_idx = tl.program_id(axis=0).to(tl.int64) head_idx = tl.program_id(axis=1).to(tl.int64) @@ -81,19 +83,19 @@ def call_kernel(self, kernel, grid, *regular_args, **const_args): self.inner_kernel[grid](*regular_args) -def correct_attn_out(out: torch.Tensor, lses: torch.Tensor, cp_rank: int, - ctx: CPTritonContext): - """ - Apply the all-gathered lses to correct each local rank's attention - output. we still need perform a cross-rank reduction to obtain the - final attention output. +def correct_attn_out( + out: torch.Tensor, lses: torch.Tensor, cp_rank: int, + ctx: CPTritonContext) -> tuple[torch.Tensor, torch.Tensor]: + """Correct the attention output using the all-gathered lses. Args: - output: [ B, H, D ] - lses : [ N, B, H ] - Return: - output: [ B, H, D ] - lse : [ B, H ] + out: Tensor of shape [ B, H, D ] + lses: Tensor of shape [ N, B, H ] + cp_rank: Current rank in the context-parallel group + ctx: Triton context to avoid recompilation + + Returns: + Tuple of (out, lse) with corrected attention and final log-sum-exp. """ if ctx is None: ctx = CPTritonContext() diff --git a/vllm/inputs/data.py b/vllm/inputs/data.py index 6a005aa634e8..1718c0767ab6 100644 --- a/vllm/inputs/data.py +++ b/vllm/inputs/data.py @@ -287,8 +287,8 @@ class EncoderDecoderInputs(TypedDict): SingletonInputs = Union[TokenInputs, EmbedsInputs, "MultiModalInputs"] """ -A processed [`SingletonPrompt`][vllm.inputs.data.SingletonPrompt] which can be -passed to [`vllm.sequence.Sequence`][]. +A processed [`SingletonPrompt`][vllm.inputs.data.SingletonPrompt] which can be +passed to [`Sequence`][collections.abc.Sequence]. """ ProcessorInputs = Union[DecoderOnlyInputs, EncoderDecoderInputs] diff --git a/vllm/model_executor/layers/fused_moe/layer.py b/vllm/model_executor/layers/fused_moe/layer.py index 89e0cee08170..b68190e5d1c1 100644 --- a/vllm/model_executor/layers/fused_moe/layer.py +++ b/vllm/model_executor/layers/fused_moe/layer.py @@ -57,7 +57,7 @@ FusedMoEPermuteExpertsUnpermute = None # type: ignore FusedMoEPrepareAndFinalize = None # type: ignore - def eplb_map_to_physical_and_record( + def _eplb_map_to_physical_and_record( topk_ids: torch.Tensor, expert_load_view: torch.Tensor, logical_to_physical_map: torch.Tensor, logical_replica_count: torch.Tensor, @@ -65,6 +65,7 @@ def eplb_map_to_physical_and_record( # CPU fallback: no EPLB so just return as is return topk_ids + eplb_map_to_physical_and_record = _eplb_map_to_physical_and_record if is_rocm_aiter_moe_enabled(): from vllm.model_executor.layers.fused_moe.rocm_aiter_fused_moe import ( # noqa: E501 @@ -807,12 +808,11 @@ def maybe_roundup_hidden_size( if necessary. Args: - hidden_size(int): Layer hidden-size + hidden_size: Layer hidden-size act_dtype: Data type of the layer activations. - quant_config(FusedMoEQuantConfig): Fused MoE quantization configuration. - moe_parallel_config(FusedMoEParallelConfig): Fused MoE parallelization - strategy configuration. - + quant_config: Fused MoE quantization configuration. + moe_parallel_config: Fused MoE parallelization strategy configuration. + Return: Rounded up hidden_size if rounding up is required based on the configs. Original hidden size otherwise. diff --git a/vllm/model_executor/model_loader/weight_utils.py b/vllm/model_executor/model_loader/weight_utils.py index a72086da18c4..cad32fee1d0f 100644 --- a/vllm/model_executor/model_loader/weight_utils.py +++ b/vllm/model_executor/model_loader/weight_utils.py @@ -13,7 +13,7 @@ from collections.abc import Generator from contextlib import contextmanager from pathlib import Path -from typing import Any, Callable, Optional, Union +from typing import IO, Any, Callable, Optional, Union import filelock import huggingface_hub.constants @@ -102,7 +102,7 @@ def get_lock(model_name_or_path: Union[str, Path], @contextmanager def atomic_writer(filepath: Union[str, Path], mode: str = 'w', - encoding: Optional[str] = None): + encoding: Optional[str] = None) -> Generator[IO]: """ Context manager that provides an atomic file writing routine. diff --git a/vllm/model_executor/models/qwen3_vl.py b/vllm/model_executor/models/qwen3_vl.py index ee6703f7229e..d4f1547fd8e5 100644 --- a/vllm/model_executor/models/qwen3_vl.py +++ b/vllm/model_executor/models/qwen3_vl.py @@ -1445,14 +1445,18 @@ def forward( **NOTE**: If mrope is enabled (default setting for Qwen3VL opensource models), the shape will be `(3, seq_len)`, otherwise it will be `(seq_len,). - pixel_values: Pixel values to be fed to a model. - `None` if no images are passed. - image_grid_thw: Tensor `(n_images, 3)` of image 3D grid in LLM. - `None` if no images are passed. - pixel_values_videos: Pixel values of videos to be fed to a model. - `None` if no videos are passed. - video_grid_thw: Tensor `(n_videos, 3)` of video 3D grid in LLM. - `None` if no videos are passed. + intermediate_tensors: Intermediate tensors from previous pipeline + stages. + inputs_embeds: Pre-computed input embeddings. + **kwargs: Additional keyword arguments including: + - pixel_values: Pixel values to be fed to a model. + `None` if no images are passed. + - image_grid_thw: Tensor `(n_images, 3)` of image 3D grid in + LLM. `None` if no images are passed. + - pixel_values_videos: Pixel values of videos to be fed to a + model. `None` if no videos are passed. + - video_grid_thw: Tensor `(n_videos, 3)` of video 3D grid in + LLM. `None` if no videos are passed. """ if intermediate_tensors is not None: diff --git a/vllm/model_executor/models/zamba2.py b/vllm/model_executor/models/zamba2.py index a0d93045b74c..1d68320bd9b2 100644 --- a/vllm/model_executor/models/zamba2.py +++ b/vllm/model_executor/models/zamba2.py @@ -944,11 +944,10 @@ def compute_logits( hidden_states: torch.Tensor, ) -> Optional[torch.Tensor]: """Compute logits for next token prediction. - + Args: hidden_states: Hidden states from model forward pass - sampling_metadata: Metadata for sampling process - + Returns: Logits for next token prediction """ diff --git a/vllm/reasoning/granite_reasoning_parser.py b/vllm/reasoning/granite_reasoning_parser.py index 5820001b918f..212e14b09286 100644 --- a/vllm/reasoning/granite_reasoning_parser.py +++ b/vllm/reasoning/granite_reasoning_parser.py @@ -278,11 +278,11 @@ def _get_delta_message_with_both_bounds( content and normal (response) content. Args: - delta_text (str): Text to consider and parse content from. - reasoning_content (str): reasoning content from current_text. - response_content (str): response content from current_text. - current_text (str): The full previous + delta text. - response_seq_len(str): Len of the complete response sequence used. + delta_text: Text to consider and parse content from. + reasoning_content: reasoning content from current_text. + response_content: response content from current_text. + current_text: The full previous + delta text. + response_seq_len: Len of the complete response sequence used. Returns: DeltaMessage: Message containing the parsed content. diff --git a/vllm/transformers_utils/configs/radio.py b/vllm/transformers_utils/configs/radio.py index 58ad7b8187bc..e1d96294d6ad 100644 --- a/vllm/transformers_utils/configs/radio.py +++ b/vllm/transformers_utils/configs/radio.py @@ -27,36 +27,23 @@ class RadioConfig(PretrainedConfig): specified arguments, defining the model architecture. Args: - model_name (`str`, *optional*, defaults to "vit_base_patch16_224"): - Name of the vision transformer model (e.g., "vit_base_patch16_224"). - Used to determine architecture dimensions from - `VIT_TIMM_DIM_BY_NAME`. - image_size (`int`, *optional*, defaults to 224): - The size (resolution) of each image. - patch_size (`int`, *optional*, defaults to 16): - The size (resolution) of each patch. - qkv_bias (`bool`, *optional*, defaults to True): - Whether to add a bias to the queries, keys and values. - qk_normalization (`bool`, *optional*, defaults to False): - Whether to apply normalization to queries and keys. - norm_type (`str`, *optional*, defaults to "layer_norm"): - The normalization type to use. - layer_norm_eps (`float`, *optional*, defaults to 1e-6): - The epsilon used by the layer normalization layers. - initializer_factor (`float`, *optional*, defaults to 1.0): - A factor for initializing all weight matrices. - hidden_act (`str`, *optional*, defaults to "gelu"): - The non-linear activation function in the encoder. - max_img_size (`int`, *optional*, defaults to 2048): - Maximum image size for position embeddings. - norm_mean (`tuple` or `list`, *optional*, - defaults to (0.48145466, 0.4578275, 0.40821073)): - Mean values for image normalization (RGB channels). - norm_std (`tuple` or `list`, *optional*, - defaults to (0.26862954, 0.26130258, 0.27577711)): - Standard deviation values for image normalization (RGB channels). - reg_tokens (`int`, *optional*): - Number of register tokens to use. + model_name: Name of the vision transformer model + (e.g., "vit_base_patch16_224"). Used to determine architecture + dimensions from `VIT_TIMM_DIM_BY_NAME`. + image_size: The size (resolution) of each image. + patch_size: The size (resolution) of each patch. + qkv_bias: Whether to add a bias to the queries, keys and values. + qk_normalization: Whether to apply normalization to queries and keys. + norm_type: The normalization type to use. + layer_norm_eps: The epsilon used by the layer normalization layers. + initializer_factor: A factor for initializing all weight matrices. + hidden_act: The non-linear activation function in the encoder. + max_img_size: Maximum image size for position embeddings. + norm_mean: Mean values for image normalization (RGB channels). + Defaults to (0.48145466, 0.4578275, 0.40821073)). + norm_std: Standard deviation values for image normalization + (RGB channels). Defaults to (0.26862954, 0.26130258, 0.27577711)). + reg_tokens: Number of register tokens to use. """ model_type = "radio" diff --git a/vllm/transformers_utils/dynamic_module.py b/vllm/transformers_utils/dynamic_module.py index 05191f95216c..3c273ad41da0 100644 --- a/vllm/transformers_utils/dynamic_module.py +++ b/vllm/transformers_utils/dynamic_module.py @@ -27,7 +27,7 @@ def try_get_class_from_dynamic_module( **kwargs, ) -> Optional[type]: """ - As [transformers.dynamic_module_utils.get_class_from_dynamic_module][], + As `transformers.dynamic_module_utils.get_class_from_dynamic_module`, but ignoring any errors. """ try: diff --git a/vllm/v1/kv_offload/__init__.py b/vllm/v1/kv_offload/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/vllm/v1/kv_offload/backends/__init__.py b/vllm/v1/kv_offload/backends/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/vllm/v1/kv_offload/worker/__init__.py b/vllm/v1/kv_offload/worker/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 From e6750d0b18e07631bb2ea7f256f7dd444d4936fa Mon Sep 17 00:00:00 2001 From: Woosuk Kwon Date: Wed, 24 Sep 2025 13:24:40 -0700 Subject: [PATCH 348/518] [V0 Deprecation] Remove unused classes in attention (#25541) Signed-off-by: Woosuk Kwon Signed-off-by: Woosuk Kwon --- vllm/attention/__init__.py | 6 +- vllm/attention/backends/abstract.py | 145 +------ vllm/attention/backends/utils.py | 545 +------------------------ vllm/v1/attention/backends/cpu_attn.py | 15 - vllm/v1/attention/backends/pallas.py | 5 - vllm/v1/spec_decode/eagle.py | 11 +- 6 files changed, 11 insertions(+), 716 deletions(-) diff --git a/vllm/attention/__init__.py b/vllm/attention/__init__.py index dcb2aa68fbee..1b37bd1f6100 100644 --- a/vllm/attention/__init__.py +++ b/vllm/attention/__init__.py @@ -2,9 +2,7 @@ # SPDX-FileCopyrightText: Copyright contributors to the vLLM project from vllm.attention.backends.abstract import (AttentionBackend, - AttentionMetadata, - AttentionMetadataBuilder, - AttentionState, AttentionType) + AttentionMetadata, AttentionType) from vllm.attention.layer import Attention from vllm.attention.selector import get_attn_backend @@ -13,7 +11,5 @@ "AttentionBackend", "AttentionMetadata", "AttentionType", - "AttentionMetadataBuilder", - "AttentionState", "get_attn_backend", ] diff --git a/vllm/attention/backends/abstract.py b/vllm/attention/backends/abstract.py index 1b392cd7c88d..0f51ef4b2e51 100644 --- a/vllm/attention/backends/abstract.py +++ b/vllm/attention/backends/abstract.py @@ -2,10 +2,7 @@ # SPDX-FileCopyrightText: Copyright contributors to the vLLM project from abc import ABC, abstractmethod -from contextlib import contextmanager -from dataclasses import dataclass, fields -from typing import (Any, Dict, Generic, List, Optional, Protocol, Set, Tuple, - Type, TypeVar) +from typing import Generic, List, Optional, Protocol, Tuple, Type, TypeVar import torch @@ -49,18 +46,13 @@ def get_impl_cls() -> Type["AttentionImpl"]: def get_metadata_cls() -> Type["AttentionMetadata"]: raise NotImplementedError - @staticmethod - @abstractmethod - def get_state_cls() -> Type["AttentionState"]: - raise NotImplementedError - @classmethod def make_metadata(cls, *args, **kwargs) -> "AttentionMetadata": return cls.get_metadata_cls()(*args, **kwargs) @staticmethod @abstractmethod - def get_builder_cls() -> Type["AttentionMetadataBuilder"]: + def get_builder_cls(): # -> Type["AttentionMetadataBuilder"]: raise NotImplementedError @staticmethod @@ -77,149 +69,18 @@ def get_kv_cache_shape( def get_kv_cache_stride_order() -> Tuple[int, ...]: raise NotImplementedError - @staticmethod - @abstractmethod - def swap_blocks( - src_kv_cache: torch.Tensor, - dst_kv_cache: torch.Tensor, - src_to_dst: torch.Tensor, - ) -> None: - raise NotImplementedError - - @staticmethod - @abstractmethod - def copy_blocks( - kv_caches: List[torch.Tensor], - src_to_dists: torch.Tensor, - ) -> None: - raise NotImplementedError - @classmethod def full_cls_name(cls) -> tuple[str, str]: return (cls.__module__, cls.__qualname__) -@dataclass class AttentionMetadata: - """Attention metadata for prefill and decode batched together.""" - # Total number of prefill requests. - num_prefills: int - # Number of prefill tokens. - num_prefill_tokens: int - # Number of decode tokens. Note that it is equivalent to the number of - # decode requests. - num_decode_tokens: int - # (num_tokens,). The indices of the token slots that input tokens will be - # stored into. E.g., if `slot_mapping` is [35, 2, 17] and the block size - # is 16, the three tokens are stored in the 3rd slot in block 2, 2nd slot - # in block 0, and 1st slot in block 1, respectively. - slot_mapping: torch.Tensor - - # Enable/disable KV scales calculation. This is so that we can disable the - # calculation until after prefill and cuda graph capture. - enable_kv_scales_calculation: bool - - @property - @abstractmethod - def prefill_metadata(self) -> Optional["AttentionMetadata"]: - """Return the attention metadata that's required to run prefill - attention.""" - pass - - @property - @abstractmethod - def decode_metadata(self) -> Optional["AttentionMetadata"]: - """Return the attention metadata that's required to run decode - attention.""" - pass - - def asdict_zerocopy(self, - skip_fields: Optional[Set[str]] = None - ) -> Dict[str, Any]: - """Similar to dataclasses.asdict, but avoids deepcopying.""" - if skip_fields is None: - skip_fields = set() - # Note that if we add dataclasses as fields, they will need - # similar handling. - return { - field.name: getattr(self, field.name) - for field in fields(self) if field.name not in skip_fields - } + pass T = TypeVar("T", bound=AttentionMetadata) -class AttentionState(ABC, Generic[T]): - """Holds attention backend-specific objects reused during the - lifetime of the model runner.""" - - @abstractmethod - def __init__(self, runner: Any): - ... - - @abstractmethod - @contextmanager - def graph_capture(self, max_batch_size: int): - """Context manager used when capturing CUDA graphs.""" - yield - - @abstractmethod - def graph_clone(self, batch_size: int) -> "AttentionState[T]": - """Clone attention state to save in CUDA graph metadata.""" - ... - - @abstractmethod - def graph_capture_get_metadata_for_batch( - self, - batch_size: int, - is_encoder_decoder_model: bool = False) -> T: - """Get attention metadata for CUDA graph capture of batch_size.""" - ... - - @abstractmethod - def get_graph_input_buffers( - self, - attn_metadata: T, - is_encoder_decoder_model: bool = False) -> Dict[str, Any]: - """Get attention-specific input buffers for CUDA graph capture.""" - ... - - @abstractmethod - def prepare_graph_input_buffers( - self, - input_buffers: Dict[str, Any], - attn_metadata: T, - is_encoder_decoder_model: bool = False) -> None: - """In-place modify input buffers dict for CUDA graph replay.""" - ... - - @abstractmethod - def begin_forward(self, model_input) -> None: - """Prepare state for forward pass.""" - ... - - -class AttentionMetadataBuilder(ABC, Generic[T]): - """Abstract class for attention metadata builders.""" - - @abstractmethod - def __init__(self, input_builder) -> None: - """Create the builder, remember some configuration and parameters.""" - raise NotImplementedError - - @abstractmethod - def prepare(self) -> None: - """Prepare for one batch.""" - raise NotImplementedError - - @abstractmethod - def build(self, seq_lens: List[int], query_lens: List[int], - cuda_graph_pad_size: int, batch_size: int) -> T: - """Build attention metadata with on-device tensors.""" - raise NotImplementedError - - class AttentionLayer(Protocol): _q_scale: torch.Tensor diff --git a/vllm/attention/backends/utils.py b/vllm/attention/backends/utils.py index accb3ab6ae2b..6b8d97be7050 100644 --- a/vllm/attention/backends/utils.py +++ b/vllm/attention/backends/utils.py @@ -1,559 +1,16 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project """Attention backend utils""" -from contextlib import contextmanager from dataclasses import dataclass -from itertools import accumulate -from typing import Any, Dict, List, Optional, Tuple, Type, TypeVar, Union +from typing import Optional -import numpy as np -import torch - -from vllm.attention import (AttentionMetadata, AttentionMetadataBuilder, - AttentionState) -from vllm.attention.backends.abstract import AttentionType from vllm.config import ModelConfig from vllm.logger import init_logger -from vllm.utils import async_tensor_h2d, make_tensor_with_pad logger = init_logger(__name__) PAD_SLOT_ID = -1 -# Switch to numpy implementation of compute_slot_mapping -# if we have at least this many elements. Could be tuned further. -_COMPUTE_SLOT_MAPPING_NUMPY_NUMEL = 256 - - -def is_block_tables_empty(block_tables: Union[None, Dict]): - """ - Check if block_tables is None or a dictionary with all None values. - """ - if block_tables is None: - return True - return (isinstance(block_tables, dict) - and all(value is None for value in block_tables.values())) - - -def compute_slot_mapping_start_idx(is_prompt: bool, query_len: int, - context_len: int, sliding_window: int): - """ - Compute the start index of slot mapping. - """ - start_idx = 0 - if is_prompt and sliding_window is not None: - start_idx = max(0, query_len - sliding_window) - return start_idx - - -def _compute_slot_mapping_python(slot_mapping: List[int], - block_table: List[int], range_start: int, - range_end: int, block_size: int): - for i in range(range_start, range_end): - block_number = block_table[i // block_size] - block_offset = i % block_size - slot = block_number * block_size + block_offset - slot_mapping.append(slot) - - -def _compute_slot_mapping_numpy(slot_mapping: List[int], - block_table: List[int], range_start: int, - range_end: int, block_size: int): - block_table_array = np.array(block_table) - idx = np.arange(range_start, range_end) - block_offset = idx % block_size - idx //= block_size - seq_slot_mapping_array = block_table_array[idx] - seq_slot_mapping_array *= block_size - seq_slot_mapping_array += block_offset - slot_mapping.extend(seq_slot_mapping_array) - - -def compute_slot_mapping(is_profile_run: bool, slot_mapping: List[int], - seq_id: int, seq_len: int, context_len: int, - start_idx: int, block_size: int, - block_tables: Dict[int, List[int]]): - """ - Compute slot mapping. - """ - if is_profile_run: - # During memory profiling, the block tables are not - # initialized yet. In this case, we just use a dummy - # slot mapping. - # In embeddings, the block tables are {seq_id: None}. - slot_mapping.extend([PAD_SLOT_ID] * seq_len) - return - - # Mask the [0, start_idx) tokens of the prompt with - # PAD_SLOT_ID, where start_idx is max(0, seq_len - - # sliding_window). For example, if the prompt len is 10, - # sliding window is 8, and block size is 4, the first two - # tokens are masked and the slot mapping will be - # [-1, -1, 2, 3, 4, 5, 6, 7, 0, 1]. - padding_mask_len = max(0, start_idx - context_len) - slot_mapping.extend([PAD_SLOT_ID] * padding_mask_len) - - range_start = max(start_idx, context_len) - range_end = seq_len - numel = range_end - range_start - block_table = block_tables[seq_id] - - # numpy implementation will be faster than python if we have - # many elements, otherwise it will be slower. - if numel < _COMPUTE_SLOT_MAPPING_NUMPY_NUMEL: - _compute_slot_mapping_python(slot_mapping, block_table, range_start, - range_end, block_size) - else: - _compute_slot_mapping_numpy(slot_mapping, block_table, range_start, - range_end, block_size) - - -TAttentionMetadata = TypeVar("TAttentionMetadata", bound='AttentionMetadata') - - -class CommonMetadataBuilder(AttentionMetadataBuilder[TAttentionMetadata]): - - _metadata_cls: Type[TAttentionMetadata] - - def __init__(self, input_builder): - self.input_builder = input_builder - self.runner = input_builder.runner - - self.sliding_window = input_builder.sliding_window - self.block_size = input_builder.block_size - - def prepare(self): - self.slot_mapping: List[int] = [] - self.prefill_seq_lens: List[int] = [] - self.context_lens: List[int] = [] - self.block_tables: List[List[int]] = [] - self.curr_seq_lens: List[int] = [] - self.num_prefills = 0 - self.num_prefill_tokens = 0 - self.num_decode_tokens = 0 - - def _add_seq_group(self, inter_data, chunked_prefill_enabled: bool): - is_prompt = inter_data.is_prompt - block_tables = inter_data.block_tables - - for (seq_id, token_len, seq_len, curr_seq_len, query_len, context_len, - curr_sliding_window_block) in zip( - inter_data.seq_ids, [len(t) for t in inter_data.input_tokens], - inter_data.orig_seq_lens, inter_data.seq_lens, - inter_data.query_lens, inter_data.context_lens, - inter_data.curr_sliding_window_blocks): - self.context_lens.append(context_len) - if is_prompt: - self.num_prefills += 1 - self.num_prefill_tokens += token_len - self.prefill_seq_lens.append(seq_len) - else: - assert query_len == 1, ( - "seq_len: {}, context_len: {}, query_len: {}".format( - seq_len, context_len, query_len)) - self.num_decode_tokens += query_len - self.curr_seq_lens.append(curr_seq_len) - - # Compute block table. - # TODO(sang): Combine chunked prefill and prefix caching by - # only allowing multiple of block_size chunk size. - # NOTE: This only works for oooooooxxx style attention. - block_table = [] - if inter_data.prefix_cache_hit: - block_table = block_tables[seq_id] - elif ((chunked_prefill_enabled or not is_prompt) - and block_tables is not None): - if curr_sliding_window_block == 0: - block_table = block_tables[seq_id] - else: - block_table = block_tables[seq_id][ - -curr_sliding_window_block:] - self.block_tables.append(block_table) - - # Compute slot mapping. - is_profile_run = is_block_tables_empty(block_tables) - start_idx = compute_slot_mapping_start_idx(is_prompt, query_len, - context_len, - self.sliding_window) - compute_slot_mapping(is_profile_run, self.slot_mapping, seq_id, - seq_len, context_len, start_idx, - self.block_size, inter_data.block_tables) - - def build(self, seq_lens: List[int], query_lens: List[int], - cuda_graph_pad_size: int, batch_size: int): - """Build attention metadata with on-device tensors. - - Args: - seq_lens: The maybe padded sequence lengths of the input sequences. - query_lens: The query lengths of the input sequences. - cuda_graph_pad_size: The padding size for cuda graph. - -1 if cuda graph is not used. - batch_size: The maybe padded batch size. - """ - for inter_data in self.input_builder.inter_data_list: - self._add_seq_group(inter_data, - self.input_builder.chunked_prefill_enabled) - - device = self.runner.device - use_captured_graph = cuda_graph_pad_size != -1 - - max_query_len = max(query_lens) - max_prefill_seq_len = max(self.prefill_seq_lens, default=0) - max_decode_seq_len = max(self.curr_seq_lens, default=0) - num_decode_tokens = self.num_decode_tokens - query_start_loc = list(accumulate(query_lens, initial=0)) - seq_start_loc = list(accumulate(seq_lens, initial=0)) - - if use_captured_graph: - self.slot_mapping.extend([PAD_SLOT_ID] * cuda_graph_pad_size) - self.block_tables.extend([] * cuda_graph_pad_size) - num_decode_tokens = batch_size - - # The shape of graph_block_tables is - # [max batch size, max context len // block size]. - input_block_tables = self.runner.graph_block_tables[:batch_size] - for i, block_table in enumerate(self.block_tables): - if block_table: - input_block_tables[i, :len(block_table)] = block_table - block_tables = torch.from_numpy(input_block_tables).to( - device, non_blocking=True) - else: - block_tables = make_tensor_with_pad( - self.block_tables, - pad=0, - dtype=torch.int, - device=device, - ) - assert max_query_len > 0, "query_lens: {}".format(query_lens) - - assert device is not None - context_lens_tensor = async_tensor_h2d(self.context_lens, torch.int, - device, self.runner.pin_memory) - seq_lens_tensor = async_tensor_h2d(seq_lens, torch.int, device, - self.runner.pin_memory) - slot_mapping_tensor = async_tensor_h2d(self.slot_mapping, torch.long, - device, self.runner.pin_memory) - query_start_loc_tensor = async_tensor_h2d(query_start_loc, torch.int32, - device, - self.runner.pin_memory) - seq_start_loc_tensor = async_tensor_h2d(seq_start_loc, torch.int32, - device, self.runner.pin_memory) - - return self._metadata_cls( # type: ignore - num_prefills=self.num_prefills, - slot_mapping=slot_mapping_tensor, - enable_kv_scales_calculation=True, - num_prefill_tokens=self.num_prefill_tokens, - num_decode_tokens=num_decode_tokens, - seq_lens=seq_lens, - seq_lens_tensor=seq_lens_tensor, - max_query_len=max_query_len, - max_prefill_seq_len=max_prefill_seq_len, - max_decode_seq_len=max_decode_seq_len, - query_start_loc=query_start_loc_tensor, - seq_start_loc=seq_start_loc_tensor, - context_lens_tensor=context_lens_tensor, - block_tables=block_tables, - use_cuda_graph=use_captured_graph, - ) - - -class CommonAttentionState(AttentionState): - - def __init__(self, runner): - self.runner = runner - self._is_graph_capturing = False - - @contextmanager - def graph_capture(self, max_batch_size: int): - - self._is_graph_capturing = True - - self._graph_slot_mapping = torch.full((max_batch_size, ), - PAD_SLOT_ID, - dtype=torch.long, - device=self.runner.device) - self._graph_seq_lens = torch.ones(max_batch_size, - dtype=torch.int32, - device=self.runner.device) - self._graph_block_tables = torch.from_numpy( - self.runner.graph_block_tables).to(device=self.runner.device) - - yield - - self._is_graph_capturing = False - del self._graph_slot_mapping - del self._graph_seq_lens - del self._graph_block_tables - - def graph_clone(self, batch_size: int) -> "CommonAttentionState": - assert self._is_graph_capturing - return self.__class__(self.runner) - - def graph_capture_get_metadata_for_batch( - self, batch_size: int, is_encoder_decoder_model: bool = False): - assert self._is_graph_capturing - attn_metadata = self.runner.attn_backend.make_metadata( - num_prefills=0, - num_prefill_tokens=0, - num_decode_tokens=batch_size, - slot_mapping=self._graph_slot_mapping[:batch_size], - enable_kv_scales_calculation=True, - seq_lens=None, - seq_lens_tensor=self._graph_seq_lens[:batch_size], - max_query_len=1, - max_decode_query_len=1, - max_prefill_seq_len=0, - max_decode_seq_len=self.runner.max_model_len, - query_start_loc=None, - seq_start_loc=None, - context_lens_tensor=None, - block_tables=self._graph_block_tables[:batch_size], - use_cuda_graph=True, - ) - if is_encoder_decoder_model: - # The encoder decoder model works only with XFormers and - # Flash Attention backend. Assert the same. - assert self.runner.attn_backend.get_name() in \ - ["XFORMERS", "FLASH_ATTN"], \ - f"Expected attn_backend name to be either 'XFORMERS' or " \ - f"'FLASH_ATTN', but got '{self.runner.attn_backend.get_name()}'" - self._update_captured_metadata_for_enc_dec_model( - batch_size=batch_size, attn_metadata=attn_metadata) - - return attn_metadata - - def get_graph_input_buffers( - self, - attn_metadata, - is_encoder_decoder_model: bool = False) -> Dict[str, Any]: - input_buffers = { - "slot_mapping": attn_metadata.slot_mapping, - "seq_lens_tensor": attn_metadata.decode_metadata.seq_lens_tensor, - "block_tables": attn_metadata.decode_metadata.block_tables, - } - if is_encoder_decoder_model: - # The encoder decoder model works only with XFormers and - # Flash Attention backend. Assert the same. - assert self.runner.attn_backend.get_name() in \ - ["XFORMERS", "FLASH_ATTN"], \ - f"Expected attn_backend name to be either 'XFORMERS' or " \ - f"'FLASH_ATTN', but got '{self.runner.attn_backend.get_name()}'" - self._add_additional_input_buffers_for_enc_dec_model( - attn_metadata=attn_metadata, input_buffers=input_buffers) - return input_buffers - - def prepare_graph_input_buffers( - self, - input_buffers, - attn_metadata, - is_encoder_decoder_model: bool = False) -> None: - input_buffers["seq_lens_tensor"].copy_( - attn_metadata.decode_metadata.seq_lens_tensor, non_blocking=True) - input_buffers["block_tables"].copy_( - attn_metadata.decode_metadata.block_tables, non_blocking=True) - if is_encoder_decoder_model: - # The encoder decoder model works only with XFormers and - # Flash Attention backend. Assert the same. - assert self.runner.attn_backend.get_name() in\ - ["XFORMERS", "FLASH_ATTN"], \ - f"Expected attn_backend name to be either 'XFORMERS' or "\ - f"'FLASH_ATTN', but "\ - f"got '{self.runner.attn_backend.get_name()}'" - self._prepare_input_buffers_for_enc_dec_model( - attn_metadata, input_buffers) - - def begin_forward(self, model_input) -> None: - return - - def _update_captured_metadata_for_enc_dec_model(self, batch_size: int, - attn_metadata): - """ - Updates the attention metadata parameters for CUDA graph capture in an - encoder-decoder model. - - This method modifies attention-related tensors and metadata required - for CUDA graph capture in encoder-decoder models. Specifically, it - updates the cross-attention and encoder sequence tensors in the - AttentionMetadata object. - """ - # During decode phase the cross_slot_mapping will be empty. Hence set - # an empty tensor for CUDA Graph capture. - attn_metadata.cross_slot_mapping = torch.tensor( - [], dtype=torch.int).cuda() - attn_metadata.cross_block_tables = torch.full( - (batch_size, self.runner.get_max_block_per_batch()), - 1, - dtype=torch.int).cuda() - attn_metadata.encoder_seq_lens = torch.full((batch_size, ), - 1, - dtype=torch.int).cuda() - attn_metadata.encoder_seq_lens_tensor = torch.full( - (batch_size, ), 1, dtype=torch.int).cuda() - attn_metadata.max_encoder_seq_len = self.runner.max_model_len - attn_metadata.num_encoder_tokens = 0 - - def _add_additional_input_buffers_for_enc_dec_model( - self, attn_metadata, input_buffers: Dict[str, Any]): - """ - Saves additional input buffers specific to the encoder-decoder model - from the attention metadata. - - This method extracts and stores encoder-decoder related input buffers - from the `attn_metadata` into the `input_buffers` dictionary. The - buffers include encoder sequence lengths, cross-slot mappings, and - cross-block tables, which are essential for the encoder-decoder model - during CUDA graph replay. - """ - input_buffers["encoder_seq_lens_tensor"] = ( - attn_metadata.decode_metadata.encoder_seq_lens_tensor) - input_buffers["cross_slot_mapping"] = ( - attn_metadata.decode_metadata.cross_slot_mapping) - input_buffers["cross_block_tables"] = ( - attn_metadata.decode_metadata.cross_block_tables) - - def _prepare_input_buffers_for_enc_dec_model(self, attn_metadata, - input_buffers: Dict[str, - Any]): - """ - Populates input buffers with data from the encoder-decoder model's - attention metadata. - - This method fills the input buffers with encoder-decoder specific - tensors. It copies data from the `attn_metadata` and keyword arguments - (`kwargs`) into corresponding buffers in the `input_buffers` dictionary. - The copied data includes attention-related metadata as well as input - IDs and positional information for the encoder. - """ - input_buffers["encoder_seq_lens_tensor"].copy_( - attn_metadata.decode_metadata.encoder_seq_lens_tensor, - non_blocking=True) - input_buffers["cross_slot_mapping"].copy_( - attn_metadata.decode_metadata.cross_slot_mapping, - non_blocking=True) - input_buffers["cross_block_tables"].copy_( - attn_metadata.decode_metadata.cross_block_tables, - non_blocking=True) - - -def is_all_encoder_attn_metadata_set(attn_metadata): - ''' - All attention metadata required for encoder attention is set. - ''' - return ((attn_metadata.encoder_seq_lens is not None) - and (attn_metadata.encoder_seq_lens_tensor is not None) - and (attn_metadata.max_encoder_seq_len is not None)) - - -def is_all_cross_attn_metadata_set(attn_metadata): - ''' - All attention metadata required for enc/dec cross-attention is set. - - Superset of encoder attention required metadata. - ''' - return (attn_metadata.is_all_encoder_attn_metadata_set - and (attn_metadata.cross_slot_mapping is not None) - and (attn_metadata.cross_block_tables is not None)) - - -def get_seq_len_block_table_args( - attn_metadata, - is_prompt: bool, - attn_type: str, -) -> tuple: - ''' - The particular choice of sequence-length- and block-table-related - attributes which should be extracted from attn_metadata is dependent - on the type of attention operation. - - Decoder attn -> select entirely decoder self-attention-related fields - Encoder/decoder cross-attn -> select encoder sequence lengths & - cross-attn block-tables fields - Encoder attn -> select encoder sequence lengths fields & no block tables - - Arguments: - - * attn_metadata: Attention metadata structure associated with attention op - * is_prompt: True if prefill, False otherwise - * attn_type: encoder attention, decoder self-attention, - encoder/decoder cross-attention - - Returns: - - * Appropriate sequence-lengths tensor - * Appropriate max sequence-length scalar - * Appropriate block tables (or None) - ''' - - if attn_type == AttentionType.DECODER: - # Decoder self-attention - # Choose max_seq_len based on whether we are in prompt_run - if is_prompt: - max_seq_len = attn_metadata.max_prefill_seq_len - else: - max_seq_len = attn_metadata.max_decode_seq_len - return (attn_metadata.seq_lens_tensor, max_seq_len, - attn_metadata.block_tables) - elif attn_type == AttentionType.ENCODER_DECODER: - # Enc/dec cross-attention KVs match encoder sequence length; - # cross-attention utilizes special "cross" block tables - return (attn_metadata.encoder_seq_lens_tensor, - attn_metadata.max_encoder_seq_len, - attn_metadata.cross_block_tables) - elif attn_type == AttentionType.ENCODER: - # No block tables associated with encoder attention - return (attn_metadata.encoder_seq_lens_tensor, - attn_metadata.max_encoder_seq_len, None) - else: - raise AttributeError(f"Invalid attention type {str(attn_type)}") - - -def get_num_prefill_decode_query_kv_tokens( - attn_metadata, - attn_type: str, -) -> Tuple[int, int, int]: - """ - Calculate the number of prefill and decode tokens for query, key/value - based on the attention metadata and the specified attention type. - - Args: - attn_metadata (AttentionMetadata): Attention Metadata object. - attn_type (AttentionType): The type of attention being used. - Returns: - Tuple[int, int, int]: A tuple containing three integers: - - The number of prefill query tokens. - - The number of prefill key/value tokens. - - The number of decode query tokens. - - Raises: - AssertionError: If the number of encoder tokens in `attn_metadata` - is `None` when required for the calculations. - """ - num_prefill_query_tokens = 0 - num_decode_query_tokens = 0 - num_prefill_kv_tokens = 0 - if attn_type == AttentionType.ENCODER: - # Encoder attention is only invoked during prefill phase. - # The same input servers a both query and key. - assert attn_metadata.num_encoder_tokens is not None - num_prefill_query_tokens = attn_metadata.num_encoder_tokens - num_prefill_kv_tokens = attn_metadata.num_encoder_tokens - num_decode_query_tokens = 0 - elif attn_type == AttentionType.ENCODER_DECODER: - assert attn_metadata.num_encoder_tokens is not None - num_prefill_query_tokens = attn_metadata.num_prefill_tokens - # The key is the encoder/cross-attention. - num_prefill_kv_tokens = attn_metadata.num_encoder_tokens - num_decode_query_tokens = attn_metadata.num_decode_tokens - else: # attn_type == AttentionType.DECODER or - # attn_type == AttentionType.ENCODER_ONLY - num_prefill_query_tokens = attn_metadata.num_prefill_tokens - num_prefill_kv_tokens = attn_metadata.num_prefill_tokens - num_decode_query_tokens = attn_metadata.num_decode_tokens - - return (num_prefill_query_tokens, num_prefill_kv_tokens, - num_decode_query_tokens) - @dataclass class MLADims: diff --git a/vllm/v1/attention/backends/cpu_attn.py b/vllm/v1/attention/backends/cpu_attn.py index 7e485fea2689..72f26c23b60b 100644 --- a/vllm/v1/attention/backends/cpu_attn.py +++ b/vllm/v1/attention/backends/cpu_attn.py @@ -11,7 +11,6 @@ AttentionLayer, AttentionMetadata, AttentionType, is_quantized_kv_cache) -from vllm.attention.backends.utils import CommonAttentionState from vllm.config import VllmConfig from vllm.logger import init_logger from vllm.v1.attention.backends.utils import (AttentionMetadataBuilder, @@ -65,10 +64,6 @@ def get_impl_cls() -> type["TorchSDPABackendImpl"]: def get_metadata_cls() -> type["AttentionMetadata"]: return TorchSDPAMetadata - @staticmethod - def get_state_cls() -> type["CommonAttentionState"]: - return CommonAttentionState - @staticmethod def get_builder_cls() -> type["TorchSDPAMetadataBuilderV1"]: return TorchSDPAMetadataBuilderV1 @@ -835,16 +830,6 @@ def forward_decode( blocksparse_head_sliding_step, ) - @staticmethod - def copy_blocks( - kv_caches: list[torch.Tensor], - src_to_dists: torch.Tensor, - *args, - ) -> None: - key_caches = [kv_cache[0] for kv_cache in kv_caches] - value_caches = [kv_cache[1] for kv_cache in kv_caches] - ops.copy_blocks(key_caches, value_caches, src_to_dists) - class _IPEXPagedAttention(_PagedAttention): diff --git a/vllm/v1/attention/backends/pallas.py b/vllm/v1/attention/backends/pallas.py index 26f9abf13d0e..4ae0634e082a 100644 --- a/vllm/v1/attention/backends/pallas.py +++ b/vllm/v1/attention/backends/pallas.py @@ -8,7 +8,6 @@ from vllm.attention.backends.abstract import (AttentionBackend, AttentionImpl, AttentionLayer, AttentionType) -from vllm.attention.backends.utils import CommonAttentionState from vllm.config import VllmConfig from vllm.logger import init_logger from vllm.utils import cdiv, next_power_of_2 @@ -97,10 +96,6 @@ def get_impl_cls() -> type["PallasAttentionBackendImpl"]: def get_metadata_cls() -> type["PallasMetadata"]: return PallasMetadata - @staticmethod - def get_state_cls() -> type["CommonAttentionState"]: - return CommonAttentionState - @staticmethod def get_kv_cache_shape( num_blocks: int, diff --git a/vllm/v1/spec_decode/eagle.py b/vllm/v1/spec_decode/eagle.py index 5cae7df70470..b30e4dab956a 100644 --- a/vllm/v1/spec_decode/eagle.py +++ b/vllm/v1/spec_decode/eagle.py @@ -9,7 +9,6 @@ import torch import torch.nn as nn -from vllm.attention.backends.abstract import AttentionMetadataBuilder from vllm.attention.layer import Attention from vllm.config import (CompilationLevel, VllmConfig, get_layers_from_vllm_config) @@ -25,7 +24,8 @@ from vllm.v1.attention.backends.tree_attn import (TreeAttentionMetadata, TreeAttentionMetadataBuilder) from vllm.v1.attention.backends.triton_attn import TritonAttentionMetadata -from vllm.v1.attention.backends.utils import CommonAttentionMetadata +from vllm.v1.attention.backends.utils import (AttentionMetadataBuilder, + CommonAttentionMetadata) from vllm.v1.kv_cache_interface import KVCacheConfig from vllm.v1.sample.metadata import SamplingMetadata from vllm.v1.spec_decode.metadata import SpecDecodeMetadata @@ -184,8 +184,9 @@ def propose( builder = (self._get_attention_metadata_builder() if self.attn_metadata_builder is None else self.attn_metadata_builder) - attn_metadata = builder.build_for_drafting( - common_attn_metadata=common_attn_metadata, draft_index=0) + attn_metadata = builder.build_for_drafting( # type: ignore + common_attn_metadata=common_attn_metadata, + draft_index=0) # At this moment, we assume all eagle layers belong to the same KV # cache group, thus using the same attention metadata. @@ -319,7 +320,7 @@ def propose( exceeds_max_model_len, PADDING_SLOT_ID) # Rebuild attention metadata - attn_metadata = builder.build_for_drafting( + attn_metadata = builder.build_for_drafting( # type: ignore common_attn_metadata=common_attn_metadata, draft_index=token_index + 1) for layer_name in self.attn_layer_names: From fea8006062866900c1a2bee9dd4f18381777e332 Mon Sep 17 00:00:00 2001 From: Tyler Michael Smith Date: Wed, 24 Sep 2025 18:43:06 -0400 Subject: [PATCH 349/518] [Logging] Improve log for when DeepEP HT disables CUDA Graphs (#25531) Signed-off-by: Tyler Michael Smith --- vllm/platforms/cuda.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/vllm/platforms/cuda.py b/vllm/platforms/cuda.py index d5f3599acb1c..4aa4ca057f45 100644 --- a/vllm/platforms/cuda.py +++ b/vllm/platforms/cuda.py @@ -186,11 +186,12 @@ def check_and_update_config(cls, vllm_config: "VllmConfig") -> None: # if torch compile cache key issue fixed # See https://github.com/vllm-project/vllm/pull/25093 logger.info( - "Data Parallel: disabling cudagraphs since DP " - "with DeepEP high-throughput kernels are not CUDA Graph " - "compatible. The DeepEP low-latency kernels are CUDA Graph " - "compatible. Set the all_to_all backend to deepep_low_latency " - "to use those kernels instead.") + "WideEP: Disabling CUDA Graphs since DeepEP high-throughput " + "kernels are optimized for prefill and are incompatible with " + "CUDA Graphs. " + "In order to use CUDA Graphs for decode-optimized workloads, " + "set VLLM_ALL2ALL_BACKEND to another option, such as " + "deepep_low_latency, pplx, or allgather_reducescatter.") compilation_config.cudagraph_mode = CUDAGraphMode.NONE @classmethod From 6160ba4151084c78164a0f472ce4da04067f9705 Mon Sep 17 00:00:00 2001 From: Duncan Moss Date: Wed, 24 Sep 2025 15:50:04 -0700 Subject: [PATCH 350/518] feat: BF16 FlashInfer Fused Cutlass MOE for Hopper and Blackwell Expert Parallel (#25503) Signed-off-by: Duncan Moss --- vllm/envs.py | 6 ++ .../fused_moe/flashinfer_cutlass_moe.py | 65 +++++++++++++++++-- .../flashinfer_cutlass_prepare_finalize.py | 3 +- vllm/model_executor/layers/fused_moe/layer.py | 51 +++++++++++++++ .../layers/fused_moe/modular_kernel.py | 2 + 5 files changed, 121 insertions(+), 6 deletions(-) diff --git a/vllm/envs.py b/vllm/envs.py index 4797d96bb899..5d622c067529 100755 --- a/vllm/envs.py +++ b/vllm/envs.py @@ -144,6 +144,7 @@ VLLM_USE_DEEP_GEMM_E8M0_HOPPER: bool = False VLLM_SKIP_DEEP_GEMM_WARMUP: bool = False VLLM_USE_FUSED_MOE_GROUPED_TOPK: bool = True + VLLM_USE_FLASHINFER_MOE_FP16: bool = False VLLM_USE_FLASHINFER_MOE_FP8: bool = False VLLM_USE_FLASHINFER_MOE_FP4: bool = False VLLM_FLASHINFER_MOE_BACKEND: Literal["throughput", @@ -1145,6 +1146,10 @@ def get_vllm_port() -> Optional[int]: "VLLM_USE_FUSED_MOE_GROUPED_TOPK": lambda: bool(int(os.getenv("VLLM_USE_FUSED_MOE_GROUPED_TOPK", "1"))), + # Allow use of FlashInfer MoE kernels for fused moe ops. + "VLLM_USE_FLASHINFER_MOE_FP16": + lambda: bool(int(os.getenv("VLLM_USE_FLASHINFER_MOE_FP16", "0"))), + # Allow use of FlashInfer MoE kernels for fused moe ops. "VLLM_USE_FLASHINFER_MOE_FP8": lambda: bool(int(os.getenv("VLLM_USE_FLASHINFER_MOE_FP8", "0"))), @@ -1516,6 +1521,7 @@ def compute_hash() -> str: "VLLM_USE_DEEP_GEMM_E8M0_HOPPER", "VLLM_USE_TRTLLM_FP4_GEMM", "VLLM_USE_FUSED_MOE_GROUPED_TOPK", + "VLLM_USE_FLASHINFER_MOE_FP16", "VLLM_USE_FLASHINFER_MOE_FP8", "VLLM_USE_FLASHINFER_MOE_FP4", "VLLM_USE_FLASHINFER_MOE_MXFP4_MXFP8", diff --git a/vllm/model_executor/layers/fused_moe/flashinfer_cutlass_moe.py b/vllm/model_executor/layers/fused_moe/flashinfer_cutlass_moe.py index 8700181d18fe..3ea4ed39e956 100644 --- a/vllm/model_executor/layers/fused_moe/flashinfer_cutlass_moe.py +++ b/vllm/model_executor/layers/fused_moe/flashinfer_cutlass_moe.py @@ -52,8 +52,10 @@ def __init__( tp_size: int = 1, ): super().__init__(quant_config) - assert quant_config.quant_dtype in ("nvfp4", torch.float8_e4m3fn), ( - "Only nvfp4,fp8 quantization are currently supported.") + assert quant_config.quant_dtype in ( + "nvfp4", torch.float8_e4m3fn, + None), ("Only nvfp4, fp8, bfloat16 and" + " float16 quantization are currently supported.") self.ep_rank = ep_rank self.ep_size = ep_size self.tp_rank = tp_rank @@ -109,8 +111,9 @@ def workspace_shapes( """ aq_m, aq_n = aq.shape workspace2 = (0, ) - output_shape = (aq_m, aq_n * 2) if self.quant_dtype != \ - torch.float8_e4m3fn else (aq_m, aq_n) + output_shape = (aq_m, + aq_n * 2) if self.quant_dtype == "nvfp4" else (aq_m, + aq_n) workspace_dtype = a.dtype workspace1 = output_shape # The workspace is determined by `aq`, since it comes after any @@ -135,6 +138,10 @@ def apply( expert_tokens_meta: Optional[mk.ExpertTokensMetadata], apply_router_weight_on_input: Optional[bool], ): + + assert activation == "silu", ("Only activation silu is supported in " + "FlashInferExperts") + if self.quant_dtype == torch.float8_e4m3fn: quant_scales = [ self.g1_alphas, self.a2_gscale, self.g2_alphas, self.a1_gscale @@ -143,7 +150,7 @@ def apply( a1q_scale = None # not passing input_sf in fp8 fc1_expert_weights = w1 fc2_expert_weights = w2 - else: + elif self.quant_dtype == "nvfp4": # Ensure w1_scale and w2_scale are not None before calling view assert self.w1_scale is not None and self.w2_scale is not None, ( "w1_scale and w2_scale must not " @@ -161,6 +168,11 @@ def apply( # FlashInfer API requires weight to be long for nvfp4 fc1_expert_weights = w1.view(torch.long) fc2_expert_weights = w2.view(torch.long) + else: + quant_scales = None + a1q_scale = None + fc1_expert_weights = w1 + fc2_expert_weights = w2 _ = flashinfer_cutlass_fused_moe( input=hidden_states, @@ -211,3 +223,46 @@ def flashinfer_cutlass_moe_fp4( expert_map=expert_map, apply_router_weight_on_input=apply_router_weight_on_input, ) + + +def flashinfer_cutlass_moe( + hidden_states: torch.Tensor, + w1: torch.Tensor, + w2: torch.Tensor, + topk_weights: torch.Tensor, + topk_ids: torch.Tensor, + quant_config: FusedMoEQuantConfig, + inplace: bool = False, + activation: str = "silu", + global_num_experts: int = -1, + expert_map: Optional[torch.Tensor] = None, + apply_router_weight_on_input: bool = False, + tp_rank: int = 0, + tp_size: int = 1, + ep_rank: int = 0, + ep_size: int = 1, + use_dp: bool = False, +) -> torch.Tensor: + fused_experts = mk.FusedMoEModularKernel( + create_flashinfer_prepare_finalize(use_dp=use_dp), + FlashInferExperts( + out_dtype=hidden_states.dtype, + quant_config=quant_config, + tp_rank=tp_rank, + tp_size=tp_size, + ep_rank=ep_rank, + ep_size=ep_size, + )) + + return fused_experts( + hidden_states=hidden_states, + w1=w1, + w2=w2, + topk_weights=topk_weights, + topk_ids=topk_ids, + inplace=inplace, + activation=activation, + global_num_experts=global_num_experts, + expert_map=expert_map, + apply_router_weight_on_input=apply_router_weight_on_input, + ) diff --git a/vllm/model_executor/layers/fused_moe/flashinfer_cutlass_prepare_finalize.py b/vllm/model_executor/layers/fused_moe/flashinfer_cutlass_prepare_finalize.py index 6e127064d32d..ed364ac77b28 100644 --- a/vllm/model_executor/layers/fused_moe/flashinfer_cutlass_prepare_finalize.py +++ b/vllm/model_executor/layers/fused_moe/flashinfer_cutlass_prepare_finalize.py @@ -183,7 +183,8 @@ def prepare( dim=0, sizes=get_local_sizes(), ) - a1q_scale = nvfp4_block_scale_interleave(a1q_scale) + if quant_config.quant_dtype == "nvfp4": + a1q_scale = nvfp4_block_scale_interleave(a1q_scale) return a1q, a1q_scale, None, topk_ids, topk_weights diff --git a/vllm/model_executor/layers/fused_moe/layer.py b/vllm/model_executor/layers/fused_moe/layer.py index b68190e5d1c1..ea88539db27b 100644 --- a/vllm/model_executor/layers/fused_moe/layer.py +++ b/vllm/model_executor/layers/fused_moe/layer.py @@ -39,6 +39,7 @@ from vllm.platforms.interface import CpuArchEnum from vllm.utils import (cdiv, direct_register_custom_op, has_deep_ep, has_pplx, round_up) +from vllm.utils.flashinfer import has_flashinfer_cutlass_fused_moe from vllm.v1.worker.ubatching import dbo_current_ubatch_id if current_platform.is_cuda_alike(): @@ -296,6 +297,40 @@ def __init__(self, moe: FusedMoEConfig): else: self.rocm_aiter_fused_experts = None # type: ignore + # FlashInfer CUTLASS MoE is only supported on Hopper and later GPUS + self.flashinfer_cutlass_moe_enabled = ( + has_flashinfer_cutlass_fused_moe() + and envs.VLLM_USE_FLASHINFER_MOE_FP16 + and self.moe.moe_parallel_config.use_ep + and self.moe.moe_parallel_config.dp_size == 1 + and current_platform.get_device_capability()[0] >= 9) + if self.flashinfer_cutlass_moe_enabled: + logger.info_once( + "Enabling FlashInfer CUTLASS MoE for UnquantizedFusedMoEMethod" + ) + from functools import partial + + from .flashinfer_cutlass_moe import flashinfer_cutlass_moe + self.flashinfer_cutlass_moe = partial( + flashinfer_cutlass_moe, + quant_config=FUSED_MOE_UNQUANTIZED_CONFIG, + tp_rank=self.moe.moe_parallel_config.tp_rank, + tp_size=self.moe.moe_parallel_config.tp_size, + ep_rank=self.moe.moe_parallel_config.ep_rank, + ep_size=self.moe.moe_parallel_config.ep_size) + else: + if (self.moe.moe_parallel_config.use_ep + and self.moe.moe_parallel_config.dp_size == 1): + logger.info_once( + "FlashInfer CUTLASS MoE is available for EP" + " but not enabled, consider setting" + " VLLM_USE_FLASHINFER_MOE_FP16=1 to enable it.") + elif self.moe.moe_parallel_config.dp_size > 1: + logger.info_once( + "FlashInfer CUTLASS MoE is currently not available for DP." + ) + self.flashinfer_cutlass_moe = None # type: ignore + def maybe_make_prepare_finalize( self) -> Optional[FusedMoEPrepareAndFinalize]: if self.rocm_aiter_moe_enabled: @@ -367,6 +402,7 @@ def _maybe_pad_weight(self, weight: torch.Tensor) -> torch.Tensor: num_pad = 256 // weight.element_size() weight = F.pad(weight, (0, num_pad), "constant", 0)[..., :-num_pad] torch.cuda.empty_cache() + return weight def process_weights_after_loading(self, layer: torch.nn.Module) -> None: @@ -386,6 +422,12 @@ def process_weights_after_loading(self, layer: torch.nn.Module) -> None: layer.w13_weight.data = shuffled_w13 layer.w2_weight.data = shuffled_w2 + if self.flashinfer_cutlass_moe_enabled: + # Swap halves to arrange as [w3; w1] (kernel expectation) + w1_w, w3_w = torch.chunk(layer.w13_weight.data, 2, dim=1) + w13_weight_swapped = torch.cat([w3_w, w1_w], dim=1) + layer.w13_weight.data = w13_weight_swapped.contiguous() + if current_platform.is_xpu(): import intel_extension_for_pytorch as ipex layer.ipex_fusion = ipex.llm.modules.GatedMLPMOE( @@ -536,6 +578,15 @@ def forward_cuda( expert_map=expert_map, activation=activation, apply_router_weight_on_input=apply_router_weight_on_input) + elif self.flashinfer_cutlass_moe_enabled: + return self.flashinfer_cutlass_moe( + hidden_states=x, + w1=layer.w13_weight, + w2=layer.w2_weight, + topk_weights=topk_weights, + topk_ids=topk_ids, + activation=activation, + apply_router_weight_on_input=apply_router_weight_on_input) elif self.fused_experts is not None: if self.moe.has_bias: raise ValueError( diff --git a/vllm/model_executor/layers/fused_moe/modular_kernel.py b/vllm/model_executor/layers/fused_moe/modular_kernel.py index 4ba14196682a..b6afc8651e36 100644 --- a/vllm/model_executor/layers/fused_moe/modular_kernel.py +++ b/vllm/model_executor/layers/fused_moe/modular_kernel.py @@ -598,6 +598,8 @@ def __init__(self): def get(self, shape: tuple[int, ...], device: torch.device, dtype: torch.dtype): + if shape == () or shape is None: + return None shape_numel = prod(shape) if (self.buffer is None or self.buffer.numel() < shape_numel or self.buffer.device != device or self.buffer.dtype != dtype): From 1f291412586110bffdebe597bd9d5f49c1cd4f73 Mon Sep 17 00:00:00 2001 From: Wentao Ye <44945378+yewentao256@users.noreply.github.com> Date: Wed, 24 Sep 2025 18:52:36 -0400 Subject: [PATCH 351/518] [Refactor] Use DeepGEMM Col Major TMA Aligned Tensor (#25517) Signed-off-by: yewentao256 --- .../benchmark_fp8_block_dense_gemm.py | 8 ++- tests/kernels/quantization/test_block_fp8.py | 7 +- .../compressed_tensors_moe.py | 6 +- .../model_executor/layers/quantization/fp8.py | 10 +-- .../layers/quantization/utils/fp8_utils.py | 66 +------------------ vllm/utils/deep_gemm.py | 15 ++++- 6 files changed, 34 insertions(+), 78 deletions(-) diff --git a/benchmarks/kernels/deepgemm/benchmark_fp8_block_dense_gemm.py b/benchmarks/kernels/deepgemm/benchmark_fp8_block_dense_gemm.py index b3c3742825de..2010b8038563 100644 --- a/benchmarks/kernels/deepgemm/benchmark_fp8_block_dense_gemm.py +++ b/benchmarks/kernels/deepgemm/benchmark_fp8_block_dense_gemm.py @@ -8,12 +8,16 @@ from vllm import _custom_ops as ops from vllm.model_executor.layers.quantization.utils.fp8_utils import ( - get_col_major_tma_aligned_tensor, per_token_group_quant_fp8, w8a8_triton_block_scaled_mm, ) from vllm.triton_utils import triton -from vllm.utils.deep_gemm import calc_diff, fp8_gemm_nt, per_block_cast_to_fp8 +from vllm.utils.deep_gemm import ( + calc_diff, + fp8_gemm_nt, + get_col_major_tma_aligned_tensor, + per_block_cast_to_fp8, +) def benchmark_shape(m: int, diff --git a/tests/kernels/quantization/test_block_fp8.py b/tests/kernels/quantization/test_block_fp8.py index c0b934fc55ae..e02df540ce9d 100644 --- a/tests/kernels/quantization/test_block_fp8.py +++ b/tests/kernels/quantization/test_block_fp8.py @@ -11,11 +11,12 @@ native_w8a8_block_matmul) from vllm.config import VllmConfig from vllm.model_executor.layers.quantization.utils.fp8_utils import ( - cutlass_scaled_mm, get_col_major_tma_aligned_tensor, - per_token_group_quant_fp8, w8a8_triton_block_scaled_mm) + cutlass_scaled_mm, per_token_group_quant_fp8, w8a8_triton_block_scaled_mm) from vllm.platforms import current_platform from vllm.utils import has_deep_gemm -from vllm.utils.deep_gemm import fp8_gemm_nt, per_block_cast_to_fp8 +from vllm.utils.deep_gemm import (fp8_gemm_nt, + get_col_major_tma_aligned_tensor, + per_block_cast_to_fp8) if current_platform.get_device_capability() < (9, 0): pytest.skip("FP8 Triton requires CUDA 9.0 or higher", diff --git a/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py index a7d3e920414d..3a81a0059df8 100644 --- a/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py +++ b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py @@ -34,8 +34,7 @@ build_flashinfer_fp4_cutlass_moe_prepare_finalize, reorder_w1w3_to_w3w1, select_nvfp4_gemm_impl) from vllm.model_executor.layers.quantization.utils.fp8_utils import ( - expert_weight_is_col_major, get_col_major_tma_aligned_tensor, - requant_weight_ue8m0_inplace) + expert_weight_is_col_major, requant_weight_ue8m0_inplace) from vllm.model_executor.layers.quantization.utils.marlin_utils import ( check_moe_marlin_supports_layer, marlin_make_workspace_new, marlin_moe_permute_scales) @@ -50,7 +49,8 @@ from vllm.model_executor.utils import set_weight_attrs from vllm.platforms import CpuArchEnum, current_platform from vllm.scalar_type import scalar_types -from vllm.utils.deep_gemm import is_deep_gemm_e8m0_used +from vllm.utils.deep_gemm import (get_col_major_tma_aligned_tensor, + is_deep_gemm_e8m0_used) logger = init_logger(__name__) diff --git a/vllm/model_executor/layers/quantization/fp8.py b/vllm/model_executor/layers/quantization/fp8.py index c4951712baa7..f77e5880209d 100644 --- a/vllm/model_executor/layers/quantization/fp8.py +++ b/vllm/model_executor/layers/quantization/fp8.py @@ -34,9 +34,9 @@ W8A8BlockFp8LinearOp, check_aiter_fp8_linear_support, create_fp8_input_scale, create_fp8_scale_parameter, create_fp8_weight_parameter, expert_weight_is_col_major, - get_col_major_tma_aligned_tensor, maybe_post_process_fp8_weight_block, - process_fp8_weight_block_strategy, process_fp8_weight_tensor_strategy, - requant_weight_ue8m0_inplace, validate_fp8_block_shape) + maybe_post_process_fp8_weight_block, process_fp8_weight_block_strategy, + process_fp8_weight_tensor_strategy, requant_weight_ue8m0_inplace, + validate_fp8_block_shape) from vllm.model_executor.layers.quantization.utils.marlin_utils_fp8 import ( apply_fp8_marlin_linear, prepare_fp8_layer_for_marlin, prepare_moe_fp8_layer_for_marlin) @@ -53,7 +53,9 @@ from vllm.platforms import current_platform from vllm.scalar_type import scalar_types from vllm.utils import has_deep_gemm -from vllm.utils.deep_gemm import is_deep_gemm_e8m0_used, is_deep_gemm_supported +from vllm.utils.deep_gemm import (get_col_major_tma_aligned_tensor, + is_deep_gemm_e8m0_used, + is_deep_gemm_supported) from vllm.utils.flashinfer import has_flashinfer_moe if TYPE_CHECKING: diff --git a/vllm/model_executor/layers/quantization/utils/fp8_utils.py b/vllm/model_executor/layers/quantization/utils/fp8_utils.py index a4cfc7d6c15c..441bba6baacc 100644 --- a/vllm/model_executor/layers/quantization/utils/fp8_utils.py +++ b/vllm/model_executor/layers/quantization/utils/fp8_utils.py @@ -23,7 +23,7 @@ PerTensorScaleParameter) from vllm.platforms import current_platform from vllm.triton_utils import tl, triton -from vllm.utils import cdiv, direct_register_custom_op +from vllm.utils import direct_register_custom_op from vllm.utils.deep_gemm import (is_deep_gemm_e8m0_used, is_deep_gemm_supported, should_use_deepgemm_for_fp8_linear) @@ -749,70 +749,6 @@ def grid(META): return C -# Taken from https://github.com/deepseek-ai/DeepGEMM/blob/0c88cd01392c1073c7049a97d6328c7bba9b3947 -# TODO(wentao): remove this function when DeepGEMM exposes this function -def get_tma_aligned_size(x: int, element_size: int) -> int: - """ - Global memory address of TMA must be 16-byte aligned. - Since we use column-major layout for the LHS scaling tensor, - the M-axis of the LHS scaling tensor needs to be padded to a multiple of - 16 bytes. - - Arguments: - x: original M-axis shape of the LHS scaling tensor. - element_size: element size of the LHS scaling tensor. - - Returns: - M-axis shape of the LHS scaling tensor after padding. - """ - tma_alignment_bytes = 16 - assert tma_alignment_bytes % element_size == 0 - alignment = tma_alignment_bytes // element_size - return cdiv(x, alignment) * alignment - - -# Taken from https://github.com/deepseek-ai/DeepGEMM/blob/0c88cd01392c1073c7049a97d6328c7bba9b3947 -# TODO(wentao): remove this function when DeepGEMM exposes this function -def get_col_major_tma_aligned_tensor(x: torch.Tensor) -> torch.Tensor: - """ - Returns TMA-aligned transposed format of the input tensor. `torch.transpose` - will be called if necessary. - If the input tensor is already column-major layout and 16-byte aligned along - the M axis (thus meets the requirement of LHS scaling tensor in - DeepGEMM), this function will do nothing. - - Arguments: - x: usually the LHS scaling tensor in GEMM. - - Returns: - The LHS scaling tensor of TMA-aligned transposed format. - """ - # NOTES: for the extreme performance, you may rewrite/fuse this function in - # CUDA - assert x.dim() in (2, 3) - remove_dim = False - m, n = x.shape[-2], x.shape[-1] - aligned_m = get_tma_aligned_size(m, x.element_size()) - if x.dim() == 2: - if x.stride(0) == 1 and x.stride(1) == aligned_m: - return x - x, remove_dim = x.unsqueeze(0), True - - b = x.shape[0] - - # The last kernel gives a column-major TMA aligned layout - if x.stride(0) == aligned_m * n and x.stride(1) == 1 and x.stride( - 2) == aligned_m: - return x.squeeze(0) if remove_dim else x - - # Normal layout requires transposing - aligned_x = torch.transpose( - torch.empty((b, n, aligned_m), device=x.device, dtype=x.dtype), 1, 2) - aligned_x[:, :m, :] = x - aligned_x = aligned_x[:, :m, :] - return aligned_x.squeeze(0) if remove_dim else aligned_x - - def requant_weight_ue8m0_inplace( weight: torch.Tensor, weight_scale: torch.Tensor, diff --git a/vllm/utils/deep_gemm.py b/vllm/utils/deep_gemm.py index 2f533ca0639f..979c10f2c3e9 100644 --- a/vllm/utils/deep_gemm.py +++ b/vllm/utils/deep_gemm.py @@ -70,11 +70,13 @@ def _missing(*_: Any, **__: Any) -> NoReturn: _fp8_gemm_nt_impl: Callable[..., Any] | None = None _grouped_impl: Callable[..., Any] | None = None _grouped_masked_impl: Callable[..., Any] | None = None +_get_mn_major_tma_aligned_tensor_impl: Callable[..., Any] | None = None def _lazy_init() -> None: """Import deep_gemm and resolve symbols on first use.""" - global _fp8_gemm_nt_impl, _grouped_impl, _grouped_masked_impl + global _fp8_gemm_nt_impl, _grouped_impl, _grouped_masked_impl,\ + _get_mn_major_tma_aligned_tensor_impl # fast path if (_fp8_gemm_nt_impl is not None or _grouped_impl is not None @@ -95,6 +97,16 @@ def _lazy_init() -> None: _fp8_gemm_nt_impl = getattr(_dg, "fp8_gemm_nt", None) _grouped_impl = getattr(_dg, "m_grouped_fp8_gemm_nt_contiguous", None) _grouped_masked_impl = getattr(_dg, "fp8_m_grouped_gemm_nt_masked", None) + _get_mn_major_tma_aligned_tensor_impl = getattr( + _dg, "get_mn_major_tma_aligned_tensor", None) + + +def get_col_major_tma_aligned_tensor(x: torch.Tensor) -> torch.Tensor: + """Wrapper for DeepGEMM's get_mn_major_tma_aligned_tensor""" + _lazy_init() + if _get_mn_major_tma_aligned_tensor_impl is None: + return _missing() + return _get_mn_major_tma_aligned_tensor_impl(x) def fp8_gemm_nt(*args, **kwargs): @@ -191,4 +203,5 @@ def should_use_deepgemm_for_fp8_linear( "is_deep_gemm_e8m0_used", "is_deep_gemm_supported", "should_use_deepgemm_for_fp8_linear", + "get_col_major_tma_aligned_tensor", ] \ No newline at end of file From e7f27ea648fd234ec44566925dfe9548a002d6c0 Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Thu, 25 Sep 2025 00:08:18 +0100 Subject: [PATCH 352/518] Improve `--help` for enhanced user experience (#24903) Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- docs/mkdocs/hooks/generate_argparse.py | 2 +- vllm/engine/arg_utils.py | 4 +- vllm/entrypoints/cli/benchmark/main.py | 13 ++- vllm/entrypoints/cli/main.py | 2 +- vllm/entrypoints/cli/run_batch.py | 10 +-- vllm/entrypoints/cli/serve.py | 20 +++-- vllm/entrypoints/utils.py | 113 ++----------------------- vllm/utils/__init__.py | 87 +++++++++++++++++-- 8 files changed, 114 insertions(+), 137 deletions(-) diff --git a/docs/mkdocs/hooks/generate_argparse.py b/docs/mkdocs/hooks/generate_argparse.py index ac70980ac9d0..d026235dd9d5 100644 --- a/docs/mkdocs/hooks/generate_argparse.py +++ b/docs/mkdocs/hooks/generate_argparse.py @@ -168,5 +168,5 @@ def on_startup(command: Literal["build", "gh-deploy", "serve"], dirty: bool): doc_path = ARGPARSE_DOC_DIR / f"{stem}.md" # Specify encoding for building on Windows with open(doc_path, "w", encoding="utf-8") as f: - f.write(parser.format_help()) + f.write(super(type(parser), parser).format_help()) logger.info("Argparse generated: %s", doc_path.relative_to(ROOT_DIR)) diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index 3f0dfce1b4b5..d3e23890469e 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -156,8 +156,8 @@ def is_online_quantization(quantization: Any) -> bool: NEEDS_HELP = ( - "--help" in (argv := sys.argv) # vllm SUBCOMMAND --help - or (argv0 := argv[0]).endswith("mkdocs") # mkdocs SUBCOMMAND + any("--help" in arg for arg in sys.argv) # vllm SUBCOMMAND --help + or (argv0 := sys.argv[0]).endswith("mkdocs") # mkdocs SUBCOMMAND or argv0.endswith("mkdocs/__main__.py") # python -m mkdocs SUBCOMMAND ) diff --git a/vllm/entrypoints/cli/benchmark/main.py b/vllm/entrypoints/cli/benchmark/main.py index 87fb9f351464..0c65fd97fc04 100644 --- a/vllm/entrypoints/cli/benchmark/main.py +++ b/vllm/entrypoints/cli/benchmark/main.py @@ -8,8 +8,7 @@ from vllm.entrypoints.cli.benchmark.base import BenchmarkSubcommandBase from vllm.entrypoints.cli.types import CLISubcommand -from vllm.entrypoints.utils import (VLLM_SUBCMD_PARSER_EPILOG, - show_filtered_argument_or_group_from_help) +from vllm.entrypoints.utils import VLLM_SUBCMD_PARSER_EPILOG if typing.TYPE_CHECKING: from vllm.utils import FlexibleArgumentParser @@ -33,9 +32,8 @@ def subparser_init( subparsers: argparse._SubParsersAction) -> FlexibleArgumentParser: bench_parser = subparsers.add_parser( self.name, - help=self.help, description=self.help, - usage="vllm bench [options]") + usage=f"vllm {self.name} [options]") bench_subparsers = bench_parser.add_subparsers(required=True, dest="bench_type") @@ -44,13 +42,12 @@ def subparser_init( cmd_cls.name, help=cmd_cls.help, description=cmd_cls.help, - usage=f"vllm bench {cmd_cls.name} [options]", + usage=f"vllm {self.name} {cmd_cls.name} [options]", ) cmd_subparser.set_defaults(dispatch_function=cmd_cls.cmd) cmd_cls.add_cli_args(cmd_subparser) - show_filtered_argument_or_group_from_help(cmd_subparser, - ["bench", cmd_cls.name]) - cmd_subparser.epilog = VLLM_SUBCMD_PARSER_EPILOG + cmd_subparser.epilog = VLLM_SUBCMD_PARSER_EPILOG.format( + subcmd=f"{self.name} {cmd_cls.name}") return bench_parser diff --git a/vllm/entrypoints/cli/main.py b/vllm/entrypoints/cli/main.py index fed3ea650405..f1bcbc8262bd 100644 --- a/vllm/entrypoints/cli/main.py +++ b/vllm/entrypoints/cli/main.py @@ -30,7 +30,7 @@ def main(): parser = FlexibleArgumentParser( description="vLLM CLI", - epilog=VLLM_SUBCMD_PARSER_EPILOG, + epilog=VLLM_SUBCMD_PARSER_EPILOG.format(subcmd="[subcommand]"), ) parser.add_argument( '-v', diff --git a/vllm/entrypoints/cli/run_batch.py b/vllm/entrypoints/cli/run_batch.py index 86491678d7d2..e669464bff83 100644 --- a/vllm/entrypoints/cli/run_batch.py +++ b/vllm/entrypoints/cli/run_batch.py @@ -9,8 +9,7 @@ import typing from vllm.entrypoints.cli.types import CLISubcommand -from vllm.entrypoints.utils import (VLLM_SUBCMD_PARSER_EPILOG, - show_filtered_argument_or_group_from_help) +from vllm.entrypoints.utils import VLLM_SUBCMD_PARSER_EPILOG from vllm.logger import init_logger if typing.TYPE_CHECKING: @@ -50,7 +49,7 @@ def subparser_init( from vllm.entrypoints.openai.run_batch import make_arg_parser run_batch_parser = subparsers.add_parser( - "run-batch", + self.name, help="Run batch prompts and write results to file.", description=( "Run batch prompts using vLLM's OpenAI-compatible API.\n" @@ -59,9 +58,8 @@ def subparser_init( "vllm run-batch -i INPUT.jsonl -o OUTPUT.jsonl --model ", ) run_batch_parser = make_arg_parser(run_batch_parser) - show_filtered_argument_or_group_from_help(run_batch_parser, - ["run-batch"]) - run_batch_parser.epilog = VLLM_SUBCMD_PARSER_EPILOG + run_batch_parser.epilog = VLLM_SUBCMD_PARSER_EPILOG.format( + subcmd=self.name) return run_batch_parser diff --git a/vllm/entrypoints/cli/serve.py b/vllm/entrypoints/cli/serve.py index de47bf00932e..0a5547144800 100644 --- a/vllm/entrypoints/cli/serve.py +++ b/vllm/entrypoints/cli/serve.py @@ -14,8 +14,7 @@ setup_server) from vllm.entrypoints.openai.cli_args import (make_arg_parser, validate_parsed_serve_args) -from vllm.entrypoints.utils import (VLLM_SUBCMD_PARSER_EPILOG, - show_filtered_argument_or_group_from_help) +from vllm.entrypoints.utils import VLLM_SUBCMD_PARSER_EPILOG from vllm.logger import init_logger from vllm.usage.usage_lib import UsageContext from vllm.utils import (FlexibleArgumentParser, decorate_logs, get_tcp_uri, @@ -29,6 +28,14 @@ logger = init_logger(__name__) +DESCRIPTION = """Launch a local OpenAI-compatible API server to serve LLM +completions via HTTP. Defaults to Qwen/Qwen3-0.6B if no model is specified. + +Search by using: `--help=` to explore options by section (e.g., +--help=ModelConfig, --help=Frontend) + Use `--help=all` to show all available flags at once. +""" + class ServeSubcommand(CLISubcommand): """The `serve` subcommand for the vLLM CLI. """ @@ -56,14 +63,13 @@ def subparser_init( self, subparsers: argparse._SubParsersAction) -> FlexibleArgumentParser: serve_parser = subparsers.add_parser( - "serve", - help="Start the vLLM OpenAI Compatible API server.", - description="Start the vLLM OpenAI Compatible API server.", + self.name, + description=DESCRIPTION, usage="vllm serve [model_tag] [options]") serve_parser = make_arg_parser(serve_parser) - show_filtered_argument_or_group_from_help(serve_parser, ["serve"]) - serve_parser.epilog = VLLM_SUBCMD_PARSER_EPILOG + serve_parser.epilog = VLLM_SUBCMD_PARSER_EPILOG.format( + subcmd=self.name) return serve_parser diff --git a/vllm/entrypoints/utils.py b/vllm/entrypoints/utils.py index d2d7dba3ae46..4a90fe094ae2 100644 --- a/vllm/entrypoints/utils.py +++ b/vllm/entrypoints/utils.py @@ -1,13 +1,11 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -import argparse import asyncio import dataclasses import functools import os -import subprocess -import sys +from argparse import Namespace from typing import Any, Optional, Union from fastapi import Request @@ -25,13 +23,10 @@ logger = init_logger(__name__) VLLM_SUBCMD_PARSER_EPILOG = ( - "Tip: Use `vllm [serve|run-batch|bench ] " - "--help=` to explore arguments from help.\n" - " - To view a argument group: --help=ModelConfig\n" - " - To view a single argument: --help=max-num-seqs\n" - " - To search by keyword: --help=max\n" - " - To list all groups: --help=listgroup\n" - " - To view help with pager: --help=page") + "For full list: vllm {subcmd} --help=all\n" + "For a section: vllm {subcmd} --help=ModelConfig (case-insensitive)\n" # noqa: E501 + "For a flag: vllm {subcmd} --help=max-model-len (_ or - accepted)\n" # noqa: E501 + "Documentation: https://docs.vllm.ai\n") async def listen_for_disconnect(request: Request) -> None: @@ -196,96 +191,6 @@ def _validate_truncation_size( return truncate_prompt_tokens -def _output_with_pager(text: str): - """Output text using scrolling view if available and appropriate.""" - - pagers = ['less -R', 'more'] - for pager_cmd in pagers: - try: - proc = subprocess.Popen(pager_cmd.split(), - stdin=subprocess.PIPE, - text=True) - proc.communicate(input=text) - return - except (subprocess.SubprocessError, OSError, FileNotFoundError): - continue - - # No pager worked, fall back to normal print - print(text) - - -def show_filtered_argument_or_group_from_help(parser: argparse.ArgumentParser, - subcommand_name: list[str]): - - # Only handle --help= for the current subcommand. - # Since subparser_init() runs for all subcommands during CLI setup, - # we skip processing if the subcommand name is not in sys.argv. - # sys.argv[0] is the program name. The subcommand follows. - # e.g., for `vllm bench latency`, - # sys.argv is `['vllm', 'bench', 'latency', ...]` - # and subcommand_name is "bench latency". - if len(sys.argv) <= len(subcommand_name) or sys.argv[ - 1:1 + len(subcommand_name)] != subcommand_name: - return - - for arg in sys.argv: - if arg.startswith('--help='): - search_keyword = arg.split('=', 1)[1] - - # Enable paged view for full help - if search_keyword == 'page': - help_text = parser.format_help() - _output_with_pager(help_text) - sys.exit(0) - - # List available groups - if search_keyword == 'listgroup': - output_lines = ["\nAvailable argument groups:"] - for group in parser._action_groups: - if group.title and not group.title.startswith( - "positional arguments"): - output_lines.append(f" - {group.title}") - if group.description: - output_lines.append(" " + - group.description.strip()) - output_lines.append("") - _output_with_pager("\n".join(output_lines)) - sys.exit(0) - - # For group search - formatter = parser._get_formatter() - for group in parser._action_groups: - if group.title and group.title.lower() == search_keyword.lower( - ): - formatter.start_section(group.title) - formatter.add_text(group.description) - formatter.add_arguments(group._group_actions) - formatter.end_section() - _output_with_pager(formatter.format_help()) - sys.exit(0) - - # For single arg - matched_actions = [] - - for group in parser._action_groups: - for action in group._group_actions: - # search option name - if any(search_keyword.lower() in opt.lower() - for opt in action.option_strings): - matched_actions.append(action) - - if matched_actions: - header = f"\nParameters matching '{search_keyword}':\n" - formatter = parser._get_formatter() - formatter.add_arguments(matched_actions) - _output_with_pager(header + formatter.format_help()) - sys.exit(0) - - print(f"\nNo group or parameter matching '{search_keyword}'") - print("Tip: use `--help=listgroup` to view all groups.") - sys.exit(1) - - def get_max_tokens(max_model_len: int, request: Union[ChatCompletionRequest, CompletionRequest], input_length: int, default_sampling_params: dict) -> int: @@ -301,11 +206,11 @@ def get_max_tokens(max_model_len: int, request: Union[ChatCompletionRequest, if val is not None) -def log_non_default_args(args: Union[argparse.Namespace, EngineArgs]): +def log_non_default_args(args: Union[Namespace, EngineArgs]): non_default_args = {} - # Handle argparse.Namespace - if isinstance(args, argparse.Namespace): + # Handle Namespace + if isinstance(args, Namespace): parser = make_arg_parser(FlexibleArgumentParser()) for arg, default in vars(parser.parse_args([])).items(): if default != getattr(args, arg): @@ -323,6 +228,6 @@ def log_non_default_args(args: Union[argparse.Namespace, EngineArgs]): non_default_args["model"] = default_args.model else: raise TypeError("Unsupported argument type. " \ - "Must be argparse.Namespace or EngineArgs instance.") + "Must be Namespace or EngineArgs instance.") logger.info("non-default args: %s", non_default_args) diff --git a/vllm/utils/__init__.py b/vllm/utils/__init__.py index 0a7af79f7a17..c502a69ea500 100644 --- a/vllm/utils/__init__.py +++ b/vllm/utils/__init__.py @@ -1720,6 +1720,7 @@ class FlexibleArgumentParser(ArgumentParser): "Additionally, list elements can be passed individually using +:\n" ' --json-arg \'{"key4": ["value3", "value4", "value5"]}\'\n' " --json-arg.key4+ value3 --json-arg.key4+=\'value4,value5\'\n\n") + _search_keyword: Optional[str] = None def __init__(self, *args, **kwargs): # Set the default "formatter_class" to SortedHelpFormatter @@ -1768,13 +1769,79 @@ def add_argument_group(self, *args, **kwargs): self._action_groups.append(group) return group - def format_help(self) -> str: - # Add tip about JSON arguments to the epilog - epilog = self.epilog or "" - if (self.add_json_tip - and not epilog.startswith(FlexibleArgumentParser._json_tip)): - self.epilog = FlexibleArgumentParser._json_tip + epilog - return super().format_help() + def format_help(self): + # Only use custom help formatting for bottom level parsers + if self._subparsers is not None: + return super().format_help() + + formatter = self._get_formatter() + + # Handle keyword search of the args + if (search_keyword := self._search_keyword) is not None: + # Normalise the search keyword + search_keyword = search_keyword.lower().replace("_", "-") + # Return full help if searching for 'all' + if search_keyword == 'all': + self.epilog = self._json_tip + return super().format_help() + + # Return group help if searching for a group title + for group in self._action_groups: + if group.title and group.title.lower() == search_keyword: + formatter.start_section(group.title) + formatter.add_text(group.description) + formatter.add_arguments(group._group_actions) + formatter.end_section() + formatter.add_text(self._json_tip) + return formatter.format_help() + + # Return matched args if searching for an arg name + matched_actions = [] + for group in self._action_groups: + for action in group._group_actions: + # search option name + if any(search_keyword in opt.lower() + for opt in action.option_strings): + matched_actions.append(action) + if matched_actions: + formatter.start_section( + f"Arguments matching '{search_keyword}'") + formatter.add_arguments(matched_actions) + formatter.end_section() + formatter.add_text(self._json_tip) + return formatter.format_help() + + # No match found + formatter.add_text( + f"No group or arguments matching '{search_keyword}'.\n" + "Use '--help' to see available groups or " + "'--help=all' to see all available parameters.") + return formatter.format_help() + + # usage + formatter.add_usage(self.usage, self._actions, + self._mutually_exclusive_groups) + + # description + formatter.add_text(self.description) + + # positionals, optionals and user-defined groups + formatter.start_section("Config Groups") + config_groups = "" + for group in self._action_groups: + if not group._group_actions: + continue + title = group.title + description = group.description or "" + config_groups += f"{title: <24}{description}\n" + formatter.add_text(config_groups) + formatter.end_section() + + # epilog + formatter.add_text(self.epilog) + + # determine help from format above + return formatter.format_help() def parse_args( # type: ignore[override] self, @@ -1807,7 +1874,11 @@ def repl(match: re.Match) -> str: # Convert underscores to dashes and vice versa in argument names processed_args = list[str]() for i, arg in enumerate(args): - if arg.startswith('--'): + if arg.startswith("--help="): + FlexibleArgumentParser._search_keyword = arg.split( + '=', 1)[-1].lower() + processed_args.append("--help") + elif arg.startswith('--'): if '=' in arg: key, value = arg.split('=', 1) key = pattern.sub(repl, key, count=1) From 5c1e496a75138f6b7663cf56d733b6f9e06d7d66 Mon Sep 17 00:00:00 2001 From: Shiyan Deng Date: Wed, 24 Sep 2025 16:56:21 -0700 Subject: [PATCH 353/518] [MISC] replace c10::optional with std::optional (#25602) Signed-off-by: Shiyan Deng --- csrc/rocm/ops.h | 4 ++-- csrc/rocm/skinny_gemms.cu | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/csrc/rocm/ops.h b/csrc/rocm/ops.h index edf7aff1abaa..8b80362583ee 100644 --- a/csrc/rocm/ops.h +++ b/csrc/rocm/ops.h @@ -6,11 +6,11 @@ torch::Tensor LLMM1(at::Tensor& in_a, at::Tensor& in_b, const int64_t rows_per_block); torch::Tensor wvSplitK(const at::Tensor& in_a, const at::Tensor& in_b, - const c10::optional& in_bias, + const std::optional& in_bias, const int64_t CuCount); void wvSplitKQ(const at::Tensor& in_a, const at::Tensor& in_b, - const c10::optional& in_bias, at::Tensor& out_c, + const std::optional& in_bias, at::Tensor& out_c, const at::Tensor& scale_a, const at::Tensor& scale_b, const int64_t CuCount); diff --git a/csrc/rocm/skinny_gemms.cu b/csrc/rocm/skinny_gemms.cu index 52119d52f6d1..e4600350d3ea 100644 --- a/csrc/rocm/skinny_gemms.cu +++ b/csrc/rocm/skinny_gemms.cu @@ -1271,7 +1271,7 @@ int mindiv(int N, int div1, int div2) { } torch::Tensor wvSplitK(const at::Tensor& in_a, const at::Tensor& in_b, - const c10::optional& in_bias, + const std::optional& in_bias, const int64_t CuCount) { auto M_in = in_a.size(0); auto K_in = in_a.size(1); @@ -1729,7 +1729,7 @@ __global__ void wvSplitKQ_hf_(const int K, const int Kp, const int M, #endif // defined(__HIP__MI3XX__) TODO: Add NAVI support void wvSplitKQ(const at::Tensor& in_a, const at::Tensor& in_b, - const c10::optional& in_bias, at::Tensor& out_c, + const std::optional& in_bias, at::Tensor& out_c, const at::Tensor& scale_a, const at::Tensor& scale_b, const int64_t CuCount) { static c10::ScalarType kFp8Type = is_fp8_ocp() From 52d0cb845866869d587fc013a7c59e60a86ebcf2 Mon Sep 17 00:00:00 2001 From: Jee Jee Li Date: Thu, 25 Sep 2025 07:58:08 +0800 Subject: [PATCH 354/518] [Model] Improve DotsOCRForCausalLM (#25466) Signed-off-by: Jee Jee Li --- vllm/model_executor/models/dots_ocr.py | 237 +++++++++++++++---------- 1 file changed, 143 insertions(+), 94 deletions(-) diff --git a/vllm/model_executor/models/dots_ocr.py b/vllm/model_executor/models/dots_ocr.py index 04fa5584199a..2db350c892ae 100644 --- a/vllm/model_executor/models/dots_ocr.py +++ b/vllm/model_executor/models/dots_ocr.py @@ -7,11 +7,13 @@ import torch.nn as nn import torch.nn.functional as F from torch.nn import LayerNorm -from transformers.modeling_utils import PreTrainedModel from transformers.models.qwen2_vl import Qwen2VLProcessor from vllm.attention.layer import check_upstream_fa_availability from vllm.config import VllmConfig +from vllm.distributed import utils as dist_utils +from vllm.distributed.parallel_state import ( + get_tensor_model_parallel_rank, get_tensor_model_parallel_world_size) from vllm.model_executor.layers.activation import SiluAndMul from vllm.model_executor.layers.layernorm import RMSNorm from vllm.model_executor.layers.linear import (ColumnParallelLinear, @@ -19,10 +21,14 @@ QKVParallelLinear, RowParallelLinear) from vllm.model_executor.layers.quantization import QuantizationConfig +from vllm.model_executor.model_loader.weight_utils import default_weight_loader from vllm.model_executor.models.interfaces import (MultiModalEmbeddings, + SupportsLoRA, SupportsMultiModal, SupportsPP) +from vllm.model_executor.models.module_mapping import MultiModelKeys from vllm.model_executor.models.qwen2 import Qwen2ForCausalLM +from vllm.model_executor.models.qwen2_5_vl import Qwen2_5_VisionAttention from vllm.model_executor.models.qwen2_vl import (Qwen2VLDummyInputsBuilder, Qwen2VLMultiModalProcessor, Qwen2VLProcessingInfo) @@ -38,6 +44,8 @@ from vllm.transformers_utils.configs.dotsocr import (DotsOCRConfig, DotsVisionConfig) +from .vision import run_dp_sharded_mrope_vision_model + IMAGE_TOKEN = "<|imgpad|>" @@ -181,6 +189,8 @@ def __init__( context_dim: int, spatial_merge_size: int = 2, pre_norm="layernorm", + prefix: str = "", + use_data_parallel: bool = False, ) -> None: super().__init__() self.hidden_size = context_dim * (spatial_merge_size**2) @@ -189,21 +199,21 @@ def __init__( self.ln_q = LayerNorm(context_dim, eps=1e-6) elif self.pre_norm == "rmsnorm": self.ln_q = RMSNorm(context_dim, eps=1e-6) - else: - print("no norm in patch merger") self.mlp = nn.Sequential( ColumnParallelLinear(self.hidden_size, self.hidden_size, bias=True, return_bias=False, - disable_tp=True), + prefix=f"{prefix}.0", + disable_tp=use_data_parallel), nn.GELU(), RowParallelLinear(self.hidden_size, dim, bias=True, return_bias=False, - disable_tp=True), + prefix=f"{prefix}.2", + disable_tp=use_data_parallel), ) def forward(self, x: torch.Tensor) -> torch.Tensor: @@ -223,38 +233,36 @@ def __init__(self, bias: bool = True, *, quant_config: Optional[QuantizationConfig] = None, - prefix: str = "") -> None: + prefix: str = "", + use_data_parallel: bool = False) -> None: super().__init__() - from vllm.distributed import (parallel_state, - tensor_model_parallel_all_gather) - from vllm.distributed import utils as dist_utils self.embed_dim = dim - self.num_heads = num_heads - self.head_dim = dim // num_heads - self.tp_size = parallel_state.get_tensor_model_parallel_world_size() - self.tp_rank = parallel_state.get_tensor_model_parallel_rank() - self.num_heads_per_partition = dist_utils.divide( + self.tp_size = (1 if use_data_parallel else + get_tensor_model_parallel_world_size()) + self.tp_rank = (0 if use_data_parallel else + get_tensor_model_parallel_rank()) + self.hidden_size_per_attention_head = dist_utils.divide(dim, num_heads) + self.num_attention_heads_per_partition = dist_utils.divide( num_heads, self.tp_size) - # qkv/proj follow Qwen2-VL style; bias controlled by arg - self.qkv = QKVParallelLinear(hidden_size=dim, - head_size=dim // num_heads, - total_num_heads=num_heads, - bias=bias, - quant_config=quant_config, - prefix=f"{prefix}.qkv") + self.qkv = QKVParallelLinear( + hidden_size=dim, + head_size=self.hidden_size_per_attention_head, + total_num_heads=num_heads, + bias=bias, + quant_config=quant_config, + prefix=f"{prefix}.qkv", + disable_tp=use_data_parallel) self.proj = RowParallelLinear(input_size=dim, output_size=dim, bias=bias, quant_config=quant_config, - prefix=f"{prefix}.proj") - self._all_gather = tensor_model_parallel_all_gather - self._split_last = dist_utils.split_tensor_along_last_dim - + prefix=f"{prefix}.proj", + disable_tp=use_data_parallel) # Select attention backend - self.attn_backend = get_vit_attn_backend(self.head_dim, - torch.get_default_dtype()) + self.attn_backend = get_vit_attn_backend( + self.hidden_size_per_attention_head, torch.get_default_dtype()) self.use_upstream_fa = False if self.attn_backend != _Backend.FLASH_ATTN and \ check_upstream_fa_availability(torch.get_default_dtype()): @@ -270,19 +278,6 @@ def __init__(self, _Backend.FLASH_ATTN, _Backend.ROCM_AITER_FA } - def _split_qkv(self, qkv: torch.Tensor) -> tuple[torch.Tensor, ...]: - # qkv: [S, B, 3*dim] - seq_len, bs, _ = qkv.shape - if self.tp_size > 1: - qkv = self._all_gather(qkv) - q, k, v = qkv.chunk(3, dim=2) - if self.tp_size > 1: - q = self._split_last(q, num_partitions=self.tp_size)[self.tp_rank] - k = self._split_last(k, num_partitions=self.tp_size)[self.tp_rank] - v = self._split_last(v, num_partitions=self.tp_size)[self.tp_rank] - new_shape = (seq_len, bs, self.num_heads_per_partition, self.head_dim) - return (q.view(*new_shape), k.view(*new_shape), v.view(*new_shape)) - def forward( self, hidden_states: torch.Tensor, @@ -295,7 +290,7 @@ def forward( # [S, C] -> [S, B=1, C] x = hidden_states.unsqueeze(1) x, _ = self.qkv(x) - q, k, v = self._split_qkv(x) + q, k, v = Qwen2_5_VisionAttention.split_qkv(self, x) bs = q.shape[1] # [S,B,H,D] -> [B,S,H,D] q = q.permute(1, 0, 2, 3).contiguous() @@ -327,8 +322,9 @@ def forward( max_seqlen_k=max_seqlen, dropout_p=0.0, causal=False) - context_layer = output.view(bs, -1, self.num_heads_per_partition, - self.head_dim) + context_layer = output.view(bs, -1, + self.num_attention_heads_per_partition, + self.hidden_size_per_attention_head) elif self.attn_backend == _Backend.TORCH_SDPA: outputs = [] for i in range(1, len(cu_seqlens)): @@ -368,7 +364,8 @@ def __init__(self, config, *, quant_config: Optional[QuantizationConfig] = None, - prefix: str = ""): + prefix: str = "", + use_data_parallel: bool = False): super().__init__() hidden_features = config.intermediate_size in_features = config.embed_dim @@ -380,13 +377,13 @@ def __init__(self, bias=bias, quant_config=quant_config, prefix=f"{prefix}.fc13", - disable_tp=True) + disable_tp=use_data_parallel) self.fc2 = RowParallelLinear(hidden_features, in_features, bias=bias, quant_config=quant_config, prefix=f"{prefix}.fc2", - disable_tp=True) + disable_tp=use_data_parallel) self.act_fn = SiluAndMul() def forward(self, x: torch.Tensor) -> torch.Tensor: @@ -397,28 +394,36 @@ def forward(self, x: torch.Tensor) -> torch.Tensor: def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]: - params = dict(self.named_parameters()) - loaded: set[str] = set() - for name, w in weights: - # Map fc1 -> fc13 (shard 0) - if name.startswith("fc1."): - tgt = name.replace("fc1.", "fc13.") - if tgt in params: - params[tgt].weight_loader(params[tgt], w, 0) - loaded.add(tgt) - continue - # Map fc3 -> fc13 (shard 1) - if name.startswith("fc3."): - tgt = name.replace("fc3.", "fc13.") - if tgt in params: - params[tgt].weight_loader(params[tgt], w, 1) - loaded.add(tgt) - continue - # Pass-through for fc2 and others - if name in params: - params[name].weight_loader(params[name], w) - loaded.add(name) - return loaded + stacked_params_mapping = [ + ("fc13", "fc1", 0), + ("fc13", "fc3", 1), + ] + params_dict = dict(self.named_parameters()) + loaded_params: set[str] = set() + for name, loaded_weight in weights: + + for param_name, weight_name, shard_id in stacked_params_mapping: + if weight_name not in name: + continue + name = name.replace(weight_name, param_name) + # Skip loading extra bias for GPTQ models. + if name.endswith(".bias") and name not in params_dict: + continue + param = params_dict[name] + weight_loader = param.weight_loader + weight_loader(param, loaded_weight, shard_id) + break + else: + # Skip loading extra bias for GPTQ models. + if name.endswith(".bias") and name not in params_dict: + continue + + param = params_dict[name] + weight_loader = getattr(param, "weight_loader", + default_weight_loader) + weight_loader(param, loaded_weight) + loaded_params.add(name) + return loaded_params class DotsPatchEmbed(nn.Module): @@ -463,25 +468,28 @@ def forward(self, x: torch.Tensor, grid_thw=None) -> torch.Tensor: class DotsVisionBlock(nn.Module): - def __init__(self, - config, - *, - quant_config: Optional[QuantizationConfig] = None, - prefix: str = ""): + def __init__( + self, + config, + *, + quant_config: Optional[QuantizationConfig] = None, + prefix: str = "", + use_data_parallel: bool = False, + ): super().__init__() - self.attn = DotsVisionAttention( - config, - config.embed_dim, - num_heads=config.num_attention_heads, - bias=config.use_bias, - quant_config=quant_config, - prefix=f"{prefix}.attn", - ) + self.attn = DotsVisionAttention(config, + config.embed_dim, + num_heads=config.num_attention_heads, + bias=config.use_bias, + quant_config=quant_config, + prefix=f"{prefix}.attn", + use_data_parallel=use_data_parallel) self.norm1 = RMSNorm(config.embed_dim, eps=config.rms_norm_eps) self.mlp = DotsSwiGLUFFN(config, quant_config=quant_config, - prefix=f"{prefix}.mlp") + prefix=f"{prefix}.mlp", + use_data_parallel=use_data_parallel) self.norm2 = RMSNorm(config.embed_dim, eps=config.rms_norm_eps) def forward(self, @@ -502,7 +510,7 @@ def forward(self, return hidden_states -class DotsVisionTransformer(PreTrainedModel): +class DotsVisionTransformer(nn.Module): def __init__( self, @@ -512,8 +520,9 @@ def __init__( num_hidden_layers_override: Optional[int] = None, require_post_norm: Optional[bool] = None, prefix: str = "", + use_data_parallel: bool = False, ) -> None: - super().__init__(config) + super().__init__() self.config = config self.spatial_merge_size = config.spatial_merge_size @@ -526,14 +535,15 @@ def __init__( if self.attn_backend != _Backend.FLASH_ATTN and \ check_upstream_fa_availability(torch.get_default_dtype()): self.attn_backend = _Backend.FLASH_ATTN - + self.out_hidden_size = config.hidden_size # Keep blocks for compatibility with other vision towers num_layers = (config.num_hidden_layers if num_hidden_layers_override is None else num_hidden_layers_override) self.blocks = nn.ModuleList([ DotsVisionBlock(config, quant_config=quant_config, - prefix=f"{prefix}.blocks.{i}") + prefix=f"{prefix}.blocks.{i}", + use_data_parallel=use_data_parallel) for i in range(num_layers) ]) if require_post_norm is None: @@ -548,6 +558,7 @@ def __init__( dim=config.hidden_size, context_dim=config.embed_dim, spatial_merge_size=config.spatial_merge_size, + use_data_parallel=use_data_parallel, ) @property @@ -604,7 +615,11 @@ def compute_attn_mask_seqlen( return max_seqlen, seqlens def forward(self, hidden_states: torch.Tensor, - grid_thw: torch.Tensor) -> torch.Tensor: + grid_thw: list[list[int]]) -> torch.Tensor: + # Convert grid_thw to tensor (always expecting list format now) + grid_thw = torch.tensor(grid_thw, + device=hidden_states.device, + dtype=torch.long) hidden_states = hidden_states.to(self.dtype) hidden_states = self.patch_embed(hidden_states, grid_thw) @@ -638,7 +653,8 @@ def forward(self, hidden_states: torch.Tensor, info=DotsOCRProcessingInfo, dummy_inputs=DotsOCRDummyInputsBuilder, ) -class DotsOCRForCausalLM(nn.Module, SupportsMultiModal, SupportsPP): +class DotsOCRForCausalLM(nn.Module, SupportsMultiModal, SupportsPP, + SupportsLoRA): hf_to_vllm_mapper = WeightsMapper( orig_to_new_substr={ ".attn.qkv_proj.": ".attn.qkv.", @@ -650,6 +666,21 @@ class DotsOCRForCausalLM(nn.Module, SupportsMultiModal, SupportsPP): }, ) + packed_modules_mapping = { + "qkv_proj": [ + "q_proj", + "k_proj", + "v_proj", + ], + "gate_up_proj": [ + "gate_proj", + "up_proj", + ], + ".attn.qkv": [".attn.qkv"], + "fc13": ["fc1", "fc3"], + } + supports_encoder_tp_data = True + @classmethod def get_placeholder_str(cls, modality: str, i: int) -> Optional[str]: if modality.startswith("image"): @@ -660,19 +691,18 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): self.config: DotsOCRConfig = vllm_config.model_config.hf_config self.quant_config = vllm_config.quant_config - self.multimodal_config = vllm_config.model_config.multimodal_config - + multimodal_config = vllm_config.model_config.multimodal_config + self.use_data_parallel = multimodal_config.mm_encoder_tp_mode == "data" if isinstance(self.config.vision_config, dict): vision_config = DotsVisionConfig(**self.config.vision_config) self.config.vision_config = vision_config else: vision_config = self.config.vision_config - self.vision_tower = DotsVisionTransformer( vision_config, quant_config=self.quant_config, prefix=maybe_prefix(prefix, "vision_tower"), - ) + use_data_parallel=self.use_data_parallel) self.language_model: Qwen2ForCausalLM = init_vllm_registered_model( vllm_config=vllm_config, hf_config=self.config, @@ -744,8 +774,17 @@ def _process_image_input( else: pixel_values = image_input["pixel_values"].type( self.vision_tower.dtype) - image_embeds = self.vision_tower( - pixel_values, grid_thw)[:, :self.config.hidden_size] + + if self.use_data_parallel: + return run_dp_sharded_mrope_vision_model( + self.vision_tower, + pixel_values, + grid_thw_list, + rope_type="rope_3d", + ) + else: + image_embeds = self.vision_tower( + pixel_values, grid_thw)[:, :self.config.hidden_size] # Split concatenated embeddings for each image item. merge_size = self.vision_tower.spatial_merge_size @@ -822,3 +861,13 @@ def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]: loader = AutoWeightsLoader(self) return loader.load_weights(weights, mapper=self.hf_to_vllm_mapper) + + def get_mm_mapping(self) -> MultiModelKeys: + """ + Get the module prefix in multimodal models + """ + return MultiModelKeys.from_string_field( + language_model="language_model", + connector="vision_tower.merger", + tower_model="vision_tower.", + ) From 05c19485a5294fef5bbab625822ff96613cf0c39 Mon Sep 17 00:00:00 2001 From: Wei Wei Date: Wed, 24 Sep 2025 18:09:34 -0700 Subject: [PATCH 355/518] [Kernel] Support DCP for Triton backend (#25132) Signed-off-by: Wei Wei --- .../attention/test_triton_decode_attention.py | 5 +++++ vllm/attention/ops/triton_decode_attention.py | 19 +++++++++++++++++-- vllm/model_executor/models/deepseek_v2.py | 2 +- vllm/v1/attention/backends/mla/triton_mla.py | 12 +++++++----- 4 files changed, 30 insertions(+), 8 deletions(-) diff --git a/tests/kernels/attention/test_triton_decode_attention.py b/tests/kernels/attention/test_triton_decode_attention.py index 2dca720fe330..48aacac8376b 100644 --- a/tests/kernels/attention/test_triton_decode_attention.py +++ b/tests/kernels/attention/test_triton_decode_attention.py @@ -46,6 +46,8 @@ def test_decode_attention(B, L, H_Q, H_KV, D_QK, D_V, CACHE_SIZE, PAGE_SIZE): # o will have the same shape as q o = torch.zeros(B, H_Q, D_V, dtype=dtype, device="cuda") + lse = torch.zeros(B, H_Q, dtype=dtype, device="cuda") + b_seq_len = torch.full((B, ), seq_len, device="cuda") attn_logits = torch.empty( @@ -60,6 +62,7 @@ def test_decode_attention(B, L, H_Q, H_KV, D_QK, D_V, CACHE_SIZE, PAGE_SIZE): k_buffer, v_buffer, o, + lse, req_to_token, b_seq_len, attn_logits, @@ -72,12 +75,14 @@ def test_decode_attention(B, L, H_Q, H_KV, D_QK, D_V, CACHE_SIZE, PAGE_SIZE): v_buffer = v_buffer.view(CACHE_SIZE // PAGE_SIZE, PAGE_SIZE, H_KV, D_V) o1 = torch.zeros_like(o) + lse1 = torch.zeros_like(lse) decode_attention_fwd( q, k_buffer, v_buffer, o1, + lse1, req_to_page, b_seq_len, attn_logits, diff --git a/vllm/attention/ops/triton_decode_attention.py b/vllm/attention/ops/triton_decode_attention.py index f82ce5b4d4b6..7f5a678615cf 100644 --- a/vllm/attention/ops/triton_decode_attention.py +++ b/vllm/attention/ops/triton_decode_attention.py @@ -474,12 +474,14 @@ def _decode_grouped_att_m_fwd( def _fwd_kernel_stage2( Mid_O, o, + lse, B_Seqlen, stride_mid_ob, stride_mid_oh, stride_mid_os, stride_obs, stride_oh, + stride_lse_bs, NUM_KV_SPLITS: tl.constexpr, BLOCK_DV: tl.constexpr, Lv: tl.constexpr, @@ -525,12 +527,18 @@ def _fwd_kernel_stage2( acc / e_sum, mask=mask_d, ) + lse_val = e_max + tl.log(e_sum) + tl.store( + lse + cur_batch * stride_lse_bs + cur_head, + lse_val, + ) def _decode_softmax_reducev_fwd( logits, q, o, + lse, v_buffer, b_seq_len, num_kv_splits, @@ -555,12 +563,14 @@ def _decode_softmax_reducev_fwd( _fwd_kernel_stage2[grid]( logits, o, + lse, b_seq_len, logits.stride(0), logits.stride(1), logits.stride(2), o.stride(0), o.stride(1), + lse.stride(0), NUM_KV_SPLITS=NUM_KV_SPLITS, BLOCK_DV=BLOCK_DV, Lv=Lv, @@ -575,6 +585,7 @@ def decode_attention_fwd_normal( k_buffer, v_buffer, o, + lse, req_to_token, b_seq_len, attn_logits, @@ -595,7 +606,7 @@ def decode_attention_fwd_normal( page_size, logit_cap, ) - _decode_softmax_reducev_fwd(attn_logits, q, o, v_buffer, b_seq_len, + _decode_softmax_reducev_fwd(attn_logits, q, o, lse, v_buffer, b_seq_len, num_kv_splits) @@ -604,6 +615,7 @@ def decode_attention_fwd_grouped( k_buffer, v_buffer, o, + lse, req_to_token, b_seq_len, attn_logits, @@ -624,7 +636,7 @@ def decode_attention_fwd_grouped( page_size, logit_cap, ) - _decode_softmax_reducev_fwd(attn_logits, q, o, v_buffer, b_seq_len, + _decode_softmax_reducev_fwd(attn_logits, q, o, lse, v_buffer, b_seq_len, num_kv_splits) @@ -633,6 +645,7 @@ def decode_attention_fwd( k_buffer, v_buffer, o, + lse, req_to_token, b_seq_len, attn_logits, @@ -651,6 +664,7 @@ def decode_attention_fwd( k_buffer, v_buffer, o, + lse, req_to_token, b_seq_len, attn_logits, @@ -666,6 +680,7 @@ def decode_attention_fwd( k_buffer, v_buffer, o, + lse, req_to_token, b_seq_len, attn_logits, diff --git a/vllm/model_executor/models/deepseek_v2.py b/vllm/model_executor/models/deepseek_v2.py index 9895ebbcdefe..aab522390a7a 100644 --- a/vllm/model_executor/models/deepseek_v2.py +++ b/vllm/model_executor/models/deepseek_v2.py @@ -685,7 +685,7 @@ def forward( ) -> torch.Tensor: # Self Attention if residual is None: - residual = hidden_states + residual = hidden_states.clone() hidden_states = self.input_layernorm(hidden_states) else: hidden_states, residual = self.input_layernorm( diff --git a/vllm/v1/attention/backends/mla/triton_mla.py b/vllm/v1/attention/backends/mla/triton_mla.py index d692b00d78b4..dd272fa01925 100644 --- a/vllm/v1/attention/backends/mla/triton_mla.py +++ b/vllm/v1/attention/backends/mla/triton_mla.py @@ -32,6 +32,7 @@ def get_impl_cls() -> type["TritonMLAImpl"]: class TritonMLAImpl(MLACommonImpl[MLACommonMetadata]): + can_return_lse_for_decode: bool = True def __init__( self, @@ -139,19 +140,20 @@ def _forward_decode( assert isinstance(q, torch.Tensor) B = q.shape[0] + q_num_heads = q.shape[1] o = torch.zeros(B, - self.num_heads, + q_num_heads, self.kv_lora_rank, dtype=q.dtype, device=q.device) - + lse = torch.zeros(B, q_num_heads, dtype=q.dtype, device=q.device) num_kv_splits = 4 # TODO: heuristic # TODO(lucas) Allocate ahead of time attn_logits = torch.empty( ( B, - self.num_heads, + q_num_heads, num_kv_splits, # NOTE(lucas) idk why the +1 is here but sglang has it so we # just mirror that @@ -167,9 +169,9 @@ def _forward_decode( PAGE_SIZE = kv_c_and_k_pe_cache.size(1) # Run MQA - decode_attention_fwd(q, kv_c_and_k_pe_cache, kv_c_cache, o, + decode_attention_fwd(q, kv_c_and_k_pe_cache, kv_c_cache, o, lse, attn_metadata.decode.block_table, attn_metadata.decode.seq_lens, attn_logits, num_kv_splits, self.scale, PAGE_SIZE) - return o, None + return o, lse From 4492e3a55428e161ca8db381edc28263e5da4c8d Mon Sep 17 00:00:00 2001 From: Wentao Ye <44945378+yewentao256@users.noreply.github.com> Date: Wed, 24 Sep 2025 21:52:52 -0400 Subject: [PATCH 356/518] [Bug] Dynamo Unsupported due to `BasevLLMParameter.torch_function` calling disabled super() (#25613) Signed-off-by: yewentao256 Signed-off-by: Wentao Ye <44945378+yewentao256@users.noreply.github.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- vllm/model_executor/parameter.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/vllm/model_executor/parameter.py b/vllm/model_executor/parameter.py index 66add98dab44..9b9d89ebaed1 100644 --- a/vllm/model_executor/parameter.py +++ b/vllm/model_executor/parameter.py @@ -12,6 +12,7 @@ from vllm.distributed import (get_tensor_model_parallel_rank, get_tensor_model_parallel_world_size) from vllm.logger import init_logger +from vllm.utils import is_torch_equal_or_newer __all__ = [ "BasevLLMParameter", "PackedvLLMParameter", "PerTensorScaleParameter", @@ -114,6 +115,15 @@ def _shard_id_as_int(self, shard_id: Union[str, int]) -> int: @classmethod def __torch_function__(cls, func, types, args=(), kwargs=None): + if not is_torch_equal_or_newer("2.8.0"): + logger.warning_once( + "Torch %s detected (<2.8.0): returning NotImplemented in " + "BasevLLMParameter.__torch_function__ to avoid potential " + "TorchDynamo issues.", + torch.__version__, + ) + return NotImplemented + if kwargs is None: kwargs = {} return super().__torch_function__(func, types, args, kwargs) From 90b139cfffd52ee2563260a89409fee60f7329f2 Mon Sep 17 00:00:00 2001 From: "Saman A. Pour" Date: Wed, 24 Sep 2025 21:12:53 -0700 Subject: [PATCH 357/518] Enable Fbgemm NVFP4 on Dense models (#25609) Signed-off-by: Saman Keon --- benchmarks/kernels/bench_nvfp4_gemm.py | 65 +++++++++++++++++-- vllm/envs.py | 5 +- .../schemes/compressed_tensors_w4a4_nvfp4.py | 24 +++++++ 3 files changed, 89 insertions(+), 5 deletions(-) diff --git a/benchmarks/kernels/bench_nvfp4_gemm.py b/benchmarks/kernels/bench_nvfp4_gemm.py index 9e832c9faa8e..6b19eb113f3e 100644 --- a/benchmarks/kernels/bench_nvfp4_gemm.py +++ b/benchmarks/kernels/bench_nvfp4_gemm.py @@ -3,6 +3,7 @@ import argparse import copy import itertools +import os import torch from weight_shapes import WEIGHT_SHAPES @@ -23,21 +24,45 @@ "torch-bf16": dict(enabled=True), "nvfp4": dict(no_a_quant=False, enabled=True), "nvfp4-noquant": dict(no_a_quant=True, enabled=True), + "fbgemm-nvfp4": dict(fbgemm=True, no_a_quant=False, enabled=True), + "fbgemm-nvfp4-noquant": dict(fbgemm=True, no_a_quant=True, enabled=True), } +_needs_fbgemm = any( + v.get("fbgemm", False) for v in PROVIDER_CFGS.values() if v.get("enabled", False) +) +if _needs_fbgemm: + try: + from fbgemm_gpu.experimental.gemm.triton_gemm.fp4_quantize import ( + triton_scale_nvfp4_quant, + ) + except ImportError: + print( + "WARNING: FBGEMM providers are enabled but fbgemm_gpu is not installed. " + "These providers will be skipped. Please install fbgemm_gpu with: " + "'pip install fbgemm-gpu-genai' to run them." + ) + # Disable FBGEMM providers so the benchmark can run. + for cfg in PROVIDER_CFGS.values(): + if cfg.get("fbgemm"): + cfg["enabled"] = False + _enabled = [k for k, v in PROVIDER_CFGS.items() if v["enabled"]] -def _quant_weight_nvfp4(b: torch.Tensor, device: str): +def _quant_weight_nvfp4(b: torch.Tensor, device: str, cfg): # Compute global scale for weight b_amax = torch.abs(b).max().to(torch.float32) b_global_scale = FLOAT8_E4M3_MAX * FLOAT4_E2M1_MAX / b_amax - b_fp4, scale_b_fp4 = ops.scaled_fp4_quant(b, b_global_scale) + if "fbgemm" in cfg and cfg["fbgemm"]: + b_fp4, scale_b_fp4 = triton_scale_nvfp4_quant(b, b_global_scale) + else: + b_fp4, scale_b_fp4 = ops.scaled_fp4_quant(b, b_global_scale) return b_fp4, scale_b_fp4, b_global_scale def build_nvfp4_runner(cfg, a, b, dtype, device): - b_fp4, scale_b_fp4, b_global_scale = _quant_weight_nvfp4(b, device) + b_fp4, scale_b_fp4, b_global_scale = _quant_weight_nvfp4(b, device, cfg) # Compute global scale for activation # NOTE: This is generally provided ahead-of-time by the model checkpoint. @@ -46,6 +71,35 @@ def build_nvfp4_runner(cfg, a, b, dtype, device): # Alpha for the GEMM operation alpha = 1.0 / (a_global_scale * b_global_scale) + if "fbgemm" in cfg and cfg["fbgemm"]: + if cfg["no_a_quant"]: + a_fp4, scale_a_fp4 = triton_scale_nvfp4_quant(a, a_global_scale) + + def run(): + return torch.ops.fbgemm.f4f4bf16( + a_fp4, + b_fp4, + scale_a_fp4, + scale_b_fp4, + global_scale=alpha, + use_mx=False, + ) + + return run + else: + + def run(): + a_fp4, scale_a_fp4 = triton_scale_nvfp4_quant(a, a_global_scale) + return torch.ops.fbgemm.f4f4bf16( + a_fp4, + b_fp4, + scale_a_fp4, + scale_b_fp4, + global_scale=alpha, + use_mx=False, + ) + + return run if cfg["no_a_quant"]: # Pre-quantize activation @@ -130,10 +184,13 @@ def prepare_shapes(args): for K, N, model in prepare_shapes(args): print(f"{model}, N={N} K={K}, BF16 vs NVFP4 GEMMs TFLOP/s:") + save_dir = f"bench_nvfp4_res_n{N}_k{K}" + os.makedirs(save_dir, exist_ok=True) + benchmark.run( print_data=True, show_plots=True, - save_path=f"bench_nvfp4_res_n{N}_k{K}", + save_path=save_dir, N=N, K=K, ) diff --git a/vllm/envs.py b/vllm/envs.py index 5d622c067529..b8af770d05f6 100755 --- a/vllm/envs.py +++ b/vllm/envs.py @@ -201,6 +201,7 @@ VLLM_ENABLE_INDUCTOR_COORDINATE_DESCENT_TUNING: bool = True VLLM_USE_NCCL_SYMM_MEM: bool = False VLLM_NCCL_INCLUDE_PATH: Optional[str] = None + VLLM_USE_FBGEMM: bool = False def get_default_cache_root(): @@ -1452,7 +1453,8 @@ def get_vllm_port() -> Optional[int]: # NCCL header path "VLLM_NCCL_INCLUDE_PATH": lambda: os.environ.get("VLLM_NCCL_INCLUDE_PATH", None), - + # Flag to enable FBGemm kernels on model execution + "VLLM_USE_FBGEMM": lambda: bool(int(os.getenv("VLLM_USE_FBGEMM", "0"))), } # --8<-- [end:env-vars-definition] @@ -1548,6 +1550,7 @@ def compute_hash() -> str: "VLLM_ROCM_FP8_MFMA_PAGE_ATTN", "VLLM_ENABLE_INDUCTOR_MAX_AUTOTUNE", "VLLM_ENABLE_INDUCTOR_COORDINATE_DESCENT_TUNING", + "VLLM_USE_FBGEMM", ] for key in environment_variables_to_hash: # if this goes out of sync with environment_variables, diff --git a/vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w4a4_nvfp4.py b/vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w4a4_nvfp4.py index dedd681f15de..d472427756d4 100644 --- a/vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w4a4_nvfp4.py +++ b/vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w4a4_nvfp4.py @@ -30,8 +30,20 @@ def __init__(self): if envs.VLLM_USE_TRTLLM_FP4_GEMM: assert has_flashinfer(), "TRTLLM FP4 GEMM requires FlashInfer" self.backend = "flashinfer-trtllm" + logger.info_once("Using flashinfer-trtllm for FP4") + elif envs.VLLM_USE_FBGEMM: + self.backend = "fbgemm" + try: + import fbgemm_gpu # noqa: F401 + except ImportError as exc: + raise ImportError( + "Backend fbgemm requires fbgemm.f4f4bf16 operator, " + "Please install with: pip install fbgemm-gpu-genai" + ) from exc + logger.info_once("Using FGBEMM-GPU-GENAI for FP4") elif has_flashinfer(): self.backend = "flashinfer-cutlass" + logger.info_once("Using flashinfer-cutlass for FP4") else: self.backend = "cutlass" self.group_size = 16 @@ -116,6 +128,9 @@ def process_weights_after_loading(self, layer) -> None: layer.weight_packed = Parameter(weight, requires_grad=False) else: swizzled_weight_scale = swizzle_blockscale(layer.weight_scale) + if self.backend == "fbgemm": + swizzled_weight_scale = swizzled_weight_scale.view(-1).view( + torch.uint8) layer.weight_scale = Parameter(swizzled_weight_scale, requires_grad=False) layer.weight_packed = Parameter(layer.weight_packed.data, @@ -153,6 +168,15 @@ def apply_weights(self, out = flashinfer_scaled_fp4_mm(*mm_args, backend="trtllm") elif self.backend == "flashinfer-cutlass": out = flashinfer_scaled_fp4_mm(*mm_args, backend="cutlass") + elif self.backend == "fbgemm": + out = torch.ops.fbgemm.f4f4bf16( + x_fp4, + layer.weight_packed, + x_blockscale.view(-1).view(torch.uint8), + layer.weight_scale, + layer.alpha, + use_mx=False, + ).to(output_dtype) else: out = cutlass_scaled_fp4_mm(*mm_args) From 845adb3ec6d7123a4f4251c121d406524aa76548 Mon Sep 17 00:00:00 2001 From: XuruiYang <530534756@qq.com> Date: Thu, 25 Sep 2025 12:53:40 +0800 Subject: [PATCH 358/518] [Model] Add LongCat-Flash (#23991) Signed-off-by: yangxurui Co-authored-by: yangxurui --- csrc/moe/moe_align_sum_kernels.cu | 10 +- docs/models/supported_models.md | 1 + tests/kernels/moe/test_flashinfer.py | 4 +- tests/models/registry.py | 6 + tests/models/utils.py | 12 +- tests/test_routing_simulator.py | 2 +- vllm/config/model.py | 6 +- vllm/config/speculative.py | 21 +- .../layers/fused_moe/fused_moe.py | 89 +++ vllm/model_executor/layers/fused_moe/layer.py | 87 ++- vllm/model_executor/layers/mla.py | 1 - .../layers/quantization/awq_marlin.py | 2 +- .../layers/quantization/bitsandbytes.py | 2 +- .../compressed_tensors_moe.py | 10 +- .../layers/quantization/experts_int8.py | 2 +- .../model_executor/layers/quantization/fp8.py | 38 +- .../layers/quantization/gguf.py | 2 +- .../layers/quantization/gptq_marlin.py | 2 +- .../layers/quantization/modelopt.py | 4 +- .../layers/quantization/moe_wna16.py | 2 +- .../layers/quantization/mxfp4.py | 6 +- .../layers/quantization/quark/quark_moe.py | 4 +- .../model_executor/layers/quantization/rtn.py | 2 +- .../layers/quantization/utils/quant_utils.py | 5 + vllm/model_executor/models/longcat_flash.py | 712 ++++++++++++++++++ .../models/longcat_flash_mtp.py | 352 +++++++++ vllm/model_executor/models/registry.py | 2 + vllm/model_executor/models/utils.py | 18 +- vllm/v1/spec_decode/eagle.py | 11 +- vllm/v1/worker/gpu_model_runner.py | 4 +- vllm/v1/worker/utils.py | 4 +- 31 files changed, 1357 insertions(+), 66 deletions(-) create mode 100644 vllm/model_executor/models/longcat_flash.py create mode 100644 vllm/model_executor/models/longcat_flash_mtp.py diff --git a/csrc/moe/moe_align_sum_kernels.cu b/csrc/moe/moe_align_sum_kernels.cu index 8bbcf5a673fd..629348bf8876 100644 --- a/csrc/moe/moe_align_sum_kernels.cu +++ b/csrc/moe/moe_align_sum_kernels.cu @@ -44,6 +44,9 @@ __global__ void moe_align_block_size_kernel( for (size_t i = tid; i < numel; i += stride) { int expert_id = topk_ids[i]; + if (expert_id >= num_experts) { + continue; + } int warp_idx = expert_id / experts_per_warp; int expert_offset = expert_id % experts_per_warp; atomicAdd(&shared_counts[warp_idx * experts_per_warp + expert_offset], 1); @@ -95,12 +98,15 @@ template __global__ void count_and_sort_expert_tokens_kernel( const scalar_t* __restrict__ topk_ids, int32_t* __restrict__ sorted_token_ids, int32_t* __restrict__ cumsum_buffer, - size_t numel) { + size_t numel, int32_t num_experts) { const size_t tid = blockIdx.x * blockDim.x + threadIdx.x; const size_t stride = blockDim.x * gridDim.x; for (size_t i = tid; i < numel; i += stride) { int32_t expert_id = topk_ids[i]; + if (expert_id >= num_experts) { + continue; + } int32_t rank_post_pad = atomicAdd(&cumsum_buffer[expert_id], 1); sorted_token_ids[rank_post_pad] = i; } @@ -269,7 +275,7 @@ void moe_align_block_size(torch::Tensor topk_ids, int64_t num_experts, sort_kernel<<>>( topk_ids.data_ptr(), sorted_token_ids.data_ptr(), - cumsum_buffer.data_ptr(), topk_ids.numel()); + cumsum_buffer.data_ptr(), topk_ids.numel(), num_experts); } }); } diff --git a/docs/models/supported_models.md b/docs/models/supported_models.md index 81bd12f9a29f..650f62492282 100644 --- a/docs/models/supported_models.md +++ b/docs/models/supported_models.md @@ -428,6 +428,7 @@ th { | `MiniMaxM1ForCausalLM` | MiniMax-Text | `MiniMaxAI/MiniMax-M1-40k`, `MiniMaxAI/MiniMax-M1-80k`, etc. | | | ✅︎ | | `MiniMaxText01ForCausalLM` | MiniMax-Text | `MiniMaxAI/MiniMax-Text-01`, etc. | | | ✅︎ | | `Zamba2ForCausalLM` | Zamba2 | `Zyphra/Zamba2-7B-instruct`, `Zyphra/Zamba2-2.7B-instruct`, `Zyphra/Zamba2-1.2B-instruct`, etc. | | | ✅︎ | +| `LongcatFlashForCausalLM` | LongCat-Flash | `meituan-longcat/LongCat-Flash-Chat`, `meituan-longcat/LongCat-Flash-Chat-FP8` | ✅︎ |✅︎ | ✅︎ | Some models are supported only via the [Transformers backend](#transformers). The purpose of the table below is to acknowledge models which we officially support in this way. The logs will say that the Transformers backend is being used, and you will see no warning that this is fallback behaviour. This means that, if you have issues with any of the models listed below, please [make an issue](https://github.com/vllm-project/vllm/issues/new/choose) and we'll do our best to fix it! diff --git a/tests/kernels/moe/test_flashinfer.py b/tests/kernels/moe/test_flashinfer.py index 5564db3cda0e..c3be7f28fb24 100644 --- a/tests/kernels/moe/test_flashinfer.py +++ b/tests/kernels/moe/test_flashinfer.py @@ -138,7 +138,7 @@ def test_flashinfer_per_tensor_moe_fp8_no_graph( td = TestData.make_moe_tensors_8bit(m, k, n, e, reorder=True) score = torch.randn((m, e), device="cuda", dtype=torch.bfloat16) - topk_weights, topk_ids = FusedMoE.select_experts( + topk_weights, topk_ids, _ = FusedMoE.select_experts( hidden_states=td.hidden_states, router_logits=score, use_grouped_topk=False, @@ -206,7 +206,7 @@ def test_flashinfer_cutlass_moe_fp8_no_graph( td = TestData.make_moe_tensors_8bit(m, k, n, e, reorder=False) score = torch.randn((m, e), device="cuda", dtype=torch.bfloat16) - topk_weights, topk_ids = FusedMoE.select_experts( + topk_weights, topk_ids, _ = FusedMoE.select_experts( hidden_states=td.hidden_states, router_logits=score, use_grouped_topk=False, diff --git a/tests/models/registry.py b/tests/models/registry.py index 8b62952ad590..10d85707d668 100644 --- a/tests/models/registry.py +++ b/tests/models/registry.py @@ -273,6 +273,8 @@ def check_available_online( is_available_online=False), "Llama4ForCausalLM": _HfExamplesInfo("meta-llama/Llama-4-Scout-17B-16E-Instruct", # noqa: E501 is_available_online=False), + "LongcatFlashForCausalLM": _HfExamplesInfo + ("meituan-longcat/LongCat-Flash-Chat", trust_remote_code=True), "MambaForCausalLM": _HfExamplesInfo("state-spaces/mamba-130m-hf"), "Mamba2ForCausalLM": _HfExamplesInfo("mistralai/Mamba-Codestral-7B-v0.1", min_transformers_version="4.55.3", @@ -639,6 +641,10 @@ def check_available_online( speculative_model="zai-org/GLM-4.5", min_transformers_version="4.54", is_available_online=False), + "LongCatFlashMTPModel": _HfExamplesInfo( + "meituan-longcat/LongCat-Flash-Chat", + trust_remote_code=True, + speculative_model="meituan-longcat/LongCat-Flash-Chat"), "MiMoMTPModel": _HfExamplesInfo("XiaomiMiMo/MiMo-7B-RL", trust_remote_code=True, speculative_model="XiaomiMiMo/MiMo-7B-RL"), diff --git a/tests/models/utils.py b/tests/models/utils.py index f80e92ebb3e2..7e731cffc047 100644 --- a/tests/models/utils.py +++ b/tests/models/utils.py @@ -428,9 +428,8 @@ def dummy_hf_overrides( num_hidden_layers = (3 if model_arch == "Gemma3nForConditionalGeneration" else 1) - text_config.update({ + update_dict = { "num_layers": num_layers, - "num_hidden_layers": num_hidden_layers, "num_experts": num_experts, "num_experts_per_tok": 2, "num_local_experts": num_experts, @@ -440,7 +439,14 @@ def dummy_hf_overrides( "n_routed_experts": num_experts, # For Gemma-3n "num_kv_shared_layers": 1, - }) + } + + # Update num_hidden_layers for non-Longcat architectures + if model_arch != "LongcatFlashForCausalLM" \ + and model_arch != "LongCatFlashMTPModel": + update_dict["num_hidden_layers"] = num_hidden_layers + + text_config.update(update_dict) if hasattr(hf_config, "vision_config"): hf_config.vision_config.update({ diff --git a/tests/test_routing_simulator.py b/tests/test_routing_simulator.py index 8324b225a8ce..77501f4bddc2 100644 --- a/tests/test_routing_simulator.py +++ b/tests/test_routing_simulator.py @@ -96,7 +96,7 @@ def test_routing_strategy_integration(monkeypatch, device): envs.environment_variables[env_name] = lambda s=strategy: s # Test the select_experts method - topk_weights, topk_ids = FusedMoE.select_experts( + topk_weights, topk_ids, _ = FusedMoE.select_experts( hidden_states=hidden_states, router_logits=router_logits, top_k=top_k, diff --git a/vllm/config/model.py b/vllm/config/model.py index f37489bdfff5..1e0e4d8b3551 100644 --- a/vllm/config/model.py +++ b/vllm/config/model.py @@ -1131,7 +1131,8 @@ def is_deepseek_mla(self) -> bool: if not hasattr(self.hf_text_config, "model_type"): return False elif self.hf_text_config.model_type in \ - ('deepseek_v2', 'deepseek_v3', 'deepseek_mtp', 'kimi_k2'): + ('deepseek_v2', 'deepseek_v3', 'deepseek_mtp', + 'kimi_k2', 'longcat_flash'): return self.hf_text_config.kv_lora_rank is not None elif self.hf_text_config.model_type == 'eagle': # if the model is an EAGLE module, check for the @@ -1257,6 +1258,9 @@ def get_layers_start_end_indices( or self.hf_config.model_type == "qwen3_next_mtp"): total_num_hidden_layers = getattr(self.hf_text_config, "num_nextn_predict_layers", 0) + elif (self.hf_config.model_type == "longcat_flash_mtp"): + total_num_hidden_layers = getattr(self.hf_text_config, + "num_nextn_predict_layers", 1) else: total_num_hidden_layers = getattr(self.hf_text_config, "num_hidden_layers", 0) diff --git a/vllm/config/speculative.py b/vllm/config/speculative.py index 34b17628def1..04e2e7d800a1 100644 --- a/vllm/config/speculative.py +++ b/vllm/config/speculative.py @@ -31,7 +31,8 @@ SpeculativeMethod = Literal["ngram", "eagle", "eagle3", "medusa", "mlp_speculator", "draft_model", "deepseek_mtp", - "ernie_mtp", "qwen3_next_mtp", "mimo_mtp"] + "ernie_mtp", "qwen3_next_mtp", "mimo_mtp", + "longcat_flash_mtp"] @config @@ -186,6 +187,13 @@ def hf_config_override(hf_config: PretrainedConfig) -> PretrainedConfig: "n_predict": n_predict, "architectures": ["Qwen3NextMTP"] }) + if hf_config.model_type == "longcat_flash": + hf_config.model_type = "longcat_flash_mtp" + n_predict = getattr(hf_config, "num_nextn_predict_layers", 1) + hf_config.update({ + "n_predict": n_predict, + "architectures": ["LongCatFlashMTPModel"] + }) return hf_config @@ -332,6 +340,15 @@ def __post_init__(self): "one layer. Might need some code changes " \ "to support multiple layers." ) + elif (self.draft_model_config.hf_config.model_type + in ("longcat_flash_mtp")): + self.method = "longcat_flash_mtp" + if self.num_speculative_tokens > 1: + logger.warning( + "LongCat MTP models only have " \ + "one layer. Might need some code changes " \ + "to support multiple layers." + ) else: self.method = "draft_model" raise NotImplementedError( @@ -548,7 +565,7 @@ def num_lookahead_slots(self) -> int: def use_eagle(self) -> bool: return self.method in ("eagle", "eagle3", "deepseek_mtp", "ernie_mtp", - "qwen3_next_mtp") + "qwen3_next_mtp", "longcat_flash_mtp") def __repr__(self) -> str: method = self.method diff --git a/vllm/model_executor/layers/fused_moe/fused_moe.py b/vllm/model_executor/layers/fused_moe/fused_moe.py index 611df357265b..0fd1b60f3bdf 100644 --- a/vllm/model_executor/layers/fused_moe/fused_moe.py +++ b/vllm/model_executor/layers/fused_moe/fused_moe.py @@ -664,6 +664,76 @@ def invoke_fused_moe_kernel(A: torch.Tensor, ) +@triton.jit +def compute_identity_kernel( + top_k: int, + hidden_states_ptr: tl.tensor, + expert_scales_ptr: tl.tensor, + num_tokens: int, + output_ptr: tl.tensor, + hidden_dim: int, + scales_stride: int, + BLOCK_SIZE: tl.constexpr, +) -> None: + pid = tl.program_id(0) + + batch_id = pid // (hidden_dim // BLOCK_SIZE) + dim_offset = pid % (hidden_dim // BLOCK_SIZE) * BLOCK_SIZE + + if batch_id >= num_tokens or dim_offset >= hidden_dim: + return + + h = tl.load(hidden_states_ptr + batch_id * hidden_dim + dim_offset + + tl.arange(0, BLOCK_SIZE), + mask=(dim_offset + tl.arange(0, BLOCK_SIZE)) < hidden_dim) + + result = tl.zeros([BLOCK_SIZE], dtype=tl.float32) + for i in range(top_k): + scale = tl.load(expert_scales_ptr + batch_id * scales_stride + i) + result += h * scale + + tl.store(output_ptr + batch_id * hidden_dim + dim_offset + + tl.arange(0, BLOCK_SIZE), + result, + mask=(dim_offset + tl.arange(0, BLOCK_SIZE)) < hidden_dim) + + +def zero_experts_compute_triton(expert_indices: torch.Tensor, + expert_scales: torch.Tensor, num_experts: int, + zero_expert_type: str, + hidden_states: torch.Tensor) -> torch.Tensor: + N = expert_indices.numel() + top_k = expert_indices.size(-1) + grid = lambda meta: (triton.cdiv(N, meta['BLOCK_SIZE']), ) + + if zero_expert_type == "identity": + zero_expert_mask = expert_indices < num_experts + zero_expert_scales = expert_scales.clone() + zero_expert_scales[zero_expert_mask] = 0.0 + + normal_expert_mask = expert_indices >= num_experts + expert_indices[normal_expert_mask] = 0 + expert_scales[normal_expert_mask] = 0.0 + + output = torch.zeros_like(hidden_states).to(hidden_states.device) + hidden_dim = hidden_states.size(-1) + num_tokens = hidden_states.size(0) + + grid = lambda meta: (num_tokens * (hidden_dim // meta['BLOCK_SIZE']), ) + compute_identity_kernel[grid]( + top_k, + hidden_states, + zero_expert_scales, + num_tokens, + output, + hidden_dim, + zero_expert_scales.stride(0), + BLOCK_SIZE=256, + ) + + return output + + # Adapted from: https://github.com/sgl-project/sglang/pull/2628 def get_config_file_name(E: int, N: int, @@ -940,6 +1010,25 @@ def fused_topk( return topk_weights, topk_ids, token_expert_indices +def fused_topk_bias( + hidden_states: torch.Tensor, + gating_output: torch.Tensor, + e_score_correction_bias: torch.Tensor, + topk: int, + renormalize: bool, +): + n_routed_experts = gating_output.shape[-1] + scores = gating_output.softmax(dim=-1) + scores_for_choice = scores.view( + -1, n_routed_experts) + e_score_correction_bias.unsqueeze(0) + topk_indices = torch.topk(scores_for_choice, k=topk, dim=-1, + sorted=False)[1] + topk_weights = scores.gather(1, topk_indices) + if renormalize: + topk_weights = topk_weights / topk_weights.sum(dim=-1, keepdim=True) + return topk_weights.to(torch.float32), topk_indices.to(torch.int32) + + # This is used by the Deepseek-V2 and Deepseek-V3 model @torch.compile(dynamic=True, backend=current_platform.simple_compile_backend) def grouped_topk( diff --git a/vllm/model_executor/layers/fused_moe/layer.py b/vllm/model_executor/layers/fused_moe/layer.py index ea88539db27b..92241f50db07 100644 --- a/vllm/model_executor/layers/fused_moe/layer.py +++ b/vllm/model_executor/layers/fused_moe/layer.py @@ -24,6 +24,8 @@ from vllm.model_executor.layers.fused_moe.config import ( FUSED_MOE_UNQUANTIZED_CONFIG, FusedMoEConfig, FusedMoEParallelConfig, FusedMoEQuantConfig, biased_moe_quant_config) +from vllm.model_executor.layers.fused_moe.fused_moe import ( + zero_experts_compute_triton) # yapf: enable from vllm.model_executor.layers.fused_moe.modular_kernel import ( FusedMoEActivationFormat, FusedMoEModularKernel, @@ -548,7 +550,10 @@ def forward_cuda( logical_replica_count: Optional[torch.Tensor] = None, ) -> Union[torch.Tensor, tuple[torch.Tensor, torch.Tensor]]: - topk_weights, topk_ids = FusedMoE.select_experts( + zero_expert_num = getattr(layer, 'zero_expert_num', 0) + zero_expert_type = getattr(layer, 'zero_expert_type', None) + + topk_weights, topk_ids, zero_expert_result = FusedMoE.select_experts( hidden_states=x, router_logits=router_logits, use_grouped_topk=use_grouped_topk, @@ -565,11 +570,14 @@ def forward_cuda( expert_map=expert_map, expert_load_view=expert_load_view, logical_to_physical_map=logical_to_physical_map, - logical_replica_count=logical_replica_count) + logical_replica_count=logical_replica_count, + global_num_experts=global_num_experts, + zero_expert_num=zero_expert_num, + zero_expert_type=zero_expert_type) if self.rocm_aiter_moe_enabled: assert self.fused_experts is None - return self.rocm_aiter_fused_experts( + result = self.rocm_aiter_fused_experts( hidden_states=x, w1=layer.w13_weight, w2=layer.w2_weight, @@ -591,7 +599,7 @@ def forward_cuda( if self.moe.has_bias: raise ValueError( "FusedMoEModularKernel does not support bias.") - return self.fused_experts( + result = self.fused_experts( hidden_states=x, w1=layer.w13_weight, w2=layer.w2_weight, @@ -605,7 +613,7 @@ def forward_cuda( ) else: assert fused_experts is not None - return fused_experts( + result = fused_experts( hidden_states=x, w1=layer.w13_weight, w2=layer.w2_weight, @@ -619,6 +627,13 @@ def forward_cuda( expert_map=expert_map, ) + if zero_expert_num != 0 and zero_expert_type is not None: + assert not isinstance(result, tuple), \ + "Shared + zero experts are mutually exclusive not yet supported" + return result, zero_expert_result + else: + return result + def forward_cpu( self, layer: torch.nn.Module, @@ -942,6 +957,8 @@ def __init__( num_redundant_experts: int = 0, has_bias: bool = False, is_sequence_parallel=False, + zero_expert_num: Optional[int] = 0, + zero_expert_type: Optional[str] = None, ): super().__init__() if params_dtype is None: @@ -976,6 +993,8 @@ def __init__( vllm_parallel_config=vllm_config.parallel_config)) self.global_num_experts = num_experts + num_redundant_experts + self.zero_expert_num = zero_expert_num + self.zero_expert_type = zero_expert_type # Round up hidden size if needed. hidden_size = maybe_roundup_hidden_size(hidden_size, moe_in_dtype, @@ -1656,25 +1675,30 @@ def select_experts( expert_load_view: Optional[torch.Tensor] = None, logical_to_physical_map: Optional[torch.Tensor] = None, logical_replica_count: Optional[torch.Tensor] = None, - ) -> tuple[torch.Tensor, torch.Tensor]: + global_num_experts: Optional[int] = None, + zero_expert_num: Optional[int] = None, + zero_expert_type: Optional[str] = None, + ) -> tuple[torch.Tensor, torch.Tensor, torch.Tensor]: """ Route the input hidden states to the top-k experts based on the router logits. Returns: - (topk_weights, topk_ids) (tuple[torch.Tensor, torch.Tensor]): - The weights and *global physical* expert ids of the top-k experts. + (topk_weights, topk_ids, zero_expert_result) + (tuple[torch.Tensor, torch.Tensor, torch.Tensor]): + The weights, expert ids, and zero expert computation result. **Compatibility**: When EPLB is not enabled, the returned ids are equivalent to global logical ids, so should be compatible with plain MoE implementations without redundant experts. """ - from vllm.model_executor.layers.fused_moe.fused_moe import fused_topk + from vllm.model_executor.layers.fused_moe.fused_moe import ( + fused_topk, fused_topk_bias) # Check if we should use a routing simulation strategy routing_strategy = envs.VLLM_MOE_ROUTING_SIMULATION_STRATEGY if routing_strategy != "": - return RoutingSimulator.simulate_routing( + topk_weights, topk_ids = RoutingSimulator.simulate_routing( hidden_states=hidden_states, router_logits=router_logits, strategy_name=routing_strategy, @@ -1697,6 +1721,16 @@ def select_experts( e_score_correction_bias=e_score_correction_bias) if indices_type is not None: topk_ids = topk_ids.to(dtype=indices_type) + elif e_score_correction_bias is not None: + topk_weights, topk_ids = fused_topk_bias( + hidden_states=hidden_states, + gating_output=router_logits, + e_score_correction_bias=e_score_correction_bias.data, + topk=top_k, + renormalize=renormalize, + ) + if routed_scaling_factor is not None: + topk_weights *= routed_scaling_factor elif custom_routing_function is None: topk_weights, topk_ids, token_expert_indices = fused_topk( hidden_states=hidden_states, @@ -1729,7 +1763,20 @@ def select_experts( assert topk_ids.dtype == indices_type or indices_type is None - return topk_weights, topk_ids + # Compute zero expert result if needed + if (zero_expert_num is not None and zero_expert_num > 0 + and zero_expert_type is not None + and global_num_experts is not None): + zero_expert_result = zero_experts_compute_triton( + expert_indices=topk_ids, + expert_scales=topk_weights, + num_experts=global_num_experts, + zero_expert_type=zero_expert_type, + hidden_states=hidden_states, + ) + else: + zero_expert_result = None + return topk_weights, topk_ids, zero_expert_result def must_reduce_shared_expert_outputs(self) -> bool: """ @@ -1878,6 +1925,11 @@ def process_chunk(chunk_start, chunk_end, skip_result_store=False): assert self.shared_experts is None or isinstance( final_hidden_states, tuple) + if isinstance(final_hidden_states, tuple): + final_hidden_states, zero_expert_result = final_hidden_states + if zero_expert_result is not None: + final_hidden_states += zero_expert_result + if not skip_result_store: if self.shared_experts is None: full_fused_final_hidden_states[ @@ -1992,6 +2044,9 @@ def forward_impl( shared_output, final_hidden_states, ) + elif self.zero_expert_num is not None and self.zero_expert_num > 0: + assert isinstance(final_hidden_states, tuple) + final_hidden_states, zero_expert_result = final_hidden_states def reduce_output(states: torch.Tensor, do_combine: bool = True) -> torch.Tensor: @@ -2003,14 +2058,16 @@ def reduce_output(states: torch.Tensor, return states - if self.shared_experts is None: - assert not isinstance(final_hidden_states, tuple) - return reduce_output(final_hidden_states) - else: + if self.shared_experts is not None: return ( reduce_output(final_hidden_states[0], do_combine=False), reduce_output(final_hidden_states[1]), ) + elif self.zero_expert_num is not None and self.zero_expert_num > 0: + assert isinstance(final_hidden_states, torch.Tensor) + return reduce_output(final_hidden_states) + zero_expert_result + else: + return reduce_output(final_hidden_states) @classmethod def make_expert_params_mapping( diff --git a/vllm/model_executor/layers/mla.py b/vllm/model_executor/layers/mla.py index a05716190365..9782b37c321f 100644 --- a/vllm/model_executor/layers/mla.py +++ b/vllm/model_executor/layers/mla.py @@ -103,7 +103,6 @@ def __init__( ) self.prefix = prefix - self.debug_layer_idx = int(self.prefix.split(".")[-2]) def forward_native( self, diff --git a/vllm/model_executor/layers/quantization/awq_marlin.py b/vllm/model_executor/layers/quantization/awq_marlin.py index 060d6e84a944..6bf6ea914651 100644 --- a/vllm/model_executor/layers/quantization/awq_marlin.py +++ b/vllm/model_executor/layers/quantization/awq_marlin.py @@ -520,7 +520,7 @@ def apply( assert activation == "silu", "Only SiLU activation is supported." - topk_weights, topk_ids = FusedMoE.select_experts( + topk_weights, topk_ids, _ = FusedMoE.select_experts( hidden_states=x, router_logits=router_logits, use_grouped_topk=use_grouped_topk, diff --git a/vllm/model_executor/layers/quantization/bitsandbytes.py b/vllm/model_executor/layers/quantization/bitsandbytes.py index 650dab8df87e..29584188630f 100644 --- a/vllm/model_executor/layers/quantization/bitsandbytes.py +++ b/vllm/model_executor/layers/quantization/bitsandbytes.py @@ -486,7 +486,7 @@ def apply( if enable_eplb: raise NotImplementedError( "EPLB not supported for `BitsAndBytesMoEMethod` yet.") - topk_weights, topk_ids = FusedMoE.select_experts( + topk_weights, topk_ids, _ = FusedMoE.select_experts( hidden_states=x, router_logits=router_logits, use_grouped_topk=use_grouped_topk, diff --git a/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py index 3a81a0059df8..8504ba73defb 100644 --- a/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py +++ b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py @@ -385,7 +385,7 @@ def apply( "`CompressedTensorsW4A4MoeMethod` yet.") assert activation == "silu", "Only SiLU activation is supported." - topk_weights, topk_ids = FusedMoE.select_experts( + topk_weights, topk_ids, _ = FusedMoE.select_experts( hidden_states=x, router_logits=router_logits, use_grouped_topk=use_grouped_topk, @@ -934,7 +934,7 @@ def apply( "EPLB not supported for " "`CompressedTensorsW8A8Fp8MoEMethod` yet.") - topk_weights, topk_ids = FusedMoE.select_experts( + topk_weights, topk_ids, _ = FusedMoE.select_experts( hidden_states=x, router_logits=router_logits, use_grouped_topk=use_grouped_topk, @@ -1195,7 +1195,7 @@ def apply( from vllm.model_executor.layers.fused_moe import fused_experts - topk_weights, topk_ids = FusedMoE.select_experts( + topk_weights, topk_ids, _ = FusedMoE.select_experts( hidden_states=x, router_logits=router_logits, use_grouped_topk=use_grouped_topk, @@ -1502,7 +1502,7 @@ def apply( assert activation == "silu", ( f"{activation} not supported for Marlin MoE.") - topk_weights, topk_ids = FusedMoE.select_experts( + topk_weights, topk_ids, _ = FusedMoE.select_experts( hidden_states=x, router_logits=router_logits, use_grouped_topk=use_grouped_topk, @@ -1747,7 +1747,7 @@ def apply( from vllm.model_executor.layers.fused_moe import fused_experts - topk_weights, topk_ids = FusedMoE.select_experts( + topk_weights, topk_ids, _ = FusedMoE.select_experts( hidden_states=x, router_logits=router_logits, use_grouped_topk=use_grouped_topk, diff --git a/vllm/model_executor/layers/quantization/experts_int8.py b/vllm/model_executor/layers/quantization/experts_int8.py index 8555e9ff2034..38d7e200b303 100644 --- a/vllm/model_executor/layers/quantization/experts_int8.py +++ b/vllm/model_executor/layers/quantization/experts_int8.py @@ -146,7 +146,7 @@ def apply( from vllm.model_executor.layers.fused_moe import fused_experts - topk_weights, topk_ids = FusedMoE.select_experts( + topk_weights, topk_ids, _ = FusedMoE.select_experts( hidden_states=x, router_logits=router_logits, use_grouped_topk=use_grouped_topk, diff --git a/vllm/model_executor/layers/quantization/fp8.py b/vllm/model_executor/layers/quantization/fp8.py index f77e5880209d..65f85b6ea829 100644 --- a/vllm/model_executor/layers/quantization/fp8.py +++ b/vllm/model_executor/layers/quantization/fp8.py @@ -18,6 +18,8 @@ FusedMoeWeightScaleSupported) from vllm.model_executor.layers.fused_moe.config import ( FusedMoEQuantConfig, fp8_w8a8_moe_quant_config) +from vllm.model_executor.layers.fused_moe.layer import ( + UnquantizedFusedMoEMethod) from vllm.model_executor.layers.linear import (LinearBase, LinearMethodBase, UnquantizedLinearMethod) from vllm.model_executor.layers.quantization import QuantizationMethods @@ -174,6 +176,10 @@ def get_quant_method(self, layer: torch.nn.Module, return UnquantizedLinearMethod() return Fp8LinearMethod(self) elif isinstance(layer, FusedMoE): + if is_layer_skipped(prefix=prefix, + ignored_layers=self.ignored_layers, + fused_mapping=self.packed_modules_mapping): + return UnquantizedFusedMoEMethod(layer.moe_config) return Fp8MoEMethod(self, layer) elif isinstance(layer, Attention): return Fp8KVCacheMethod(self) @@ -927,6 +933,7 @@ def apply( logical_to_physical_map: Optional[torch.Tensor] = None, logical_replica_count: Optional[torch.Tensor] = None, ) -> Union[torch.Tensor, tuple[torch.Tensor, torch.Tensor]]: + if enable_eplb: assert expert_load_view is not None assert logical_to_physical_map is not None @@ -943,8 +950,7 @@ def apply( import vllm.model_executor.layers.fused_moe.flashinfer_trtllm_moe # noqa: E501, F401 assert (renormalize and use_grouped_topk and custom_routing_function is None) - - return torch.ops.vllm.flashinfer_fused_moe_blockscale_fp8( + result = torch.ops.vllm.flashinfer_fused_moe_blockscale_fp8( routing_logits=router_logits.to(torch.float32), routing_bias=e_score_correction_bias, x=x, @@ -965,7 +971,7 @@ def apply( else: assert (not renormalize and custom_routing_function is not None) - return apply_flashinfer_per_tensor_scale_fp8( + result = apply_flashinfer_per_tensor_scale_fp8( layer=layer, hidden_states=x, router_logits=router_logits, @@ -976,7 +982,10 @@ def apply( topk_group=topk_group, apply_router_weight_on_input=apply_router_weight_on_input) - topk_weights, topk_ids = FusedMoE.select_experts( + zero_expert_num = getattr(layer, 'zero_expert_num', 0) + zero_expert_type = getattr(layer, 'zero_expert_type', None) + + select_result = FusedMoE.select_experts( hidden_states=x, router_logits=router_logits, use_grouped_topk=use_grouped_topk, @@ -994,17 +1003,22 @@ def apply( expert_load_view=expert_load_view, logical_to_physical_map=logical_to_physical_map, logical_replica_count=logical_replica_count, + global_num_experts=global_num_experts, + zero_expert_num=zero_expert_num, + zero_expert_type=zero_expert_type, ) # # Note: the order of checks is important since self.fused_experts # can override fused_experts or cutlass but not rocm or marlin. # + topk_weights, topk_ids, zero_expert_result = select_result + if self.rocm_aiter_moe_enabled: from vllm.model_executor.layers.fused_moe.rocm_aiter_fused_moe import ( # noqa: E501 rocm_aiter_fused_experts) assert self.fused_experts is None - return rocm_aiter_fused_experts( + result = rocm_aiter_fused_experts( x, layer.w13_weight, layer.w2_weight, @@ -1018,7 +1032,7 @@ def apply( assert activation == "silu", ( f"{activation} not supported for Marlin MoE.") assert self.fused_experts is None - return torch.ops.vllm.fused_marlin_moe( + result = torch.ops.vllm.fused_marlin_moe( x, layer.w13_weight, layer.w2_weight, @@ -1035,7 +1049,7 @@ def apply( expert_map=expert_map, workspace=layer.workspace) elif self.fused_experts: - return self.fused_experts( + result = self.fused_experts( hidden_states=x, w1=layer.w13_weight, w2=layer.w2_weight, @@ -1055,7 +1069,7 @@ def apply( assert scoring_func == 'sigmoid', ( f"Expected 'sigmoid' scoring func but got {scoring_func}") - return flashinfer_cutlass_moe_fp8( + result = flashinfer_cutlass_moe_fp8( x, layer, topk_weights, @@ -1068,7 +1082,7 @@ def apply( ) else: from vllm.model_executor.layers.fused_moe import fused_experts - return fused_experts( + result = fused_experts( hidden_states=x, w1=layer.w13_weight, w2=layer.w2_weight, @@ -1083,6 +1097,12 @@ def apply( allow_deep_gemm=self.allow_deep_gemm, allow_cutlass_block_scaled_grouped_gemm=( self.allow_cutlass_block_scaled_grouped_gemm)) + if zero_expert_num != 0 and zero_expert_type is not None: + assert not isinstance(result, tuple), \ + "Shared + zero experts are mutually exclusive not yet supported" + return result, zero_expert_result + else: + return result class Fp8KVCacheMethod(BaseKVCacheMethod): diff --git a/vllm/model_executor/layers/quantization/gguf.py b/vllm/model_executor/layers/quantization/gguf.py index de25ee84d081..da1688808bb5 100644 --- a/vllm/model_executor/layers/quantization/gguf.py +++ b/vllm/model_executor/layers/quantization/gguf.py @@ -555,7 +555,7 @@ def apply( "Apply router weight on input is not supported for" "fused GGUF MoE method.") - topk_weights, topk_ids = FusedMoE.select_experts( + topk_weights, topk_ids, _ = FusedMoE.select_experts( hidden_states=x, router_logits=router_logits, use_grouped_topk=use_grouped_topk, diff --git a/vllm/model_executor/layers/quantization/gptq_marlin.py b/vllm/model_executor/layers/quantization/gptq_marlin.py index e06b974255f0..02188c3c224f 100644 --- a/vllm/model_executor/layers/quantization/gptq_marlin.py +++ b/vllm/model_executor/layers/quantization/gptq_marlin.py @@ -669,7 +669,7 @@ def apply( assert activation == "silu", "Only SiLU activation is supported." - topk_weights, topk_ids = FusedMoE.select_experts( + topk_weights, topk_ids, _ = FusedMoE.select_experts( hidden_states=x, router_logits=router_logits, use_grouped_topk=use_grouped_topk, diff --git a/vllm/model_executor/layers/quantization/modelopt.py b/vllm/model_executor/layers/quantization/modelopt.py index 1083f398a3a2..4491fcf18106 100644 --- a/vllm/model_executor/layers/quantization/modelopt.py +++ b/vllm/model_executor/layers/quantization/modelopt.py @@ -543,7 +543,7 @@ def apply( apply_router_weight_on_input=apply_router_weight_on_input) # Expert selection - topk_weights, topk_ids = FusedMoE.select_experts( + topk_weights, topk_ids, _ = FusedMoE.select_experts( hidden_states=x, router_logits=router_logits, use_grouped_topk=use_grouped_topk, @@ -1491,7 +1491,7 @@ def apply( )[0] return out - topk_weights, topk_ids = FusedMoE.select_experts( + topk_weights, topk_ids, _ = FusedMoE.select_experts( hidden_states=x, router_logits=router_logits, use_grouped_topk=use_grouped_topk, diff --git a/vllm/model_executor/layers/quantization/moe_wna16.py b/vllm/model_executor/layers/quantization/moe_wna16.py index 145b614237fb..ee8d33e636f9 100644 --- a/vllm/model_executor/layers/quantization/moe_wna16.py +++ b/vllm/model_executor/layers/quantization/moe_wna16.py @@ -332,7 +332,7 @@ def apply( from vllm.model_executor.layers.fused_moe import fused_experts assert activation == "silu", "Only SiLU activation is supported." - topk_weights, topk_ids = FusedMoE.select_experts( + topk_weights, topk_ids, _ = FusedMoE.select_experts( hidden_states=x, router_logits=router_logits, use_grouped_topk=use_grouped_topk, diff --git a/vllm/model_executor/layers/quantization/mxfp4.py b/vllm/model_executor/layers/quantization/mxfp4.py index b710f6ee249b..54194b2e7d5b 100644 --- a/vllm/model_executor/layers/quantization/mxfp4.py +++ b/vllm/model_executor/layers/quantization/mxfp4.py @@ -718,7 +718,7 @@ def _route_and_experts( assert isinstance(self.fused_experts, mk.FusedMoEModularKernel) - topk_weights, topk_ids = FusedMoE.select_experts( + topk_weights, topk_ids, _ = FusedMoE.select_experts( hidden_states=x, router_logits=router_logits, use_grouped_topk=use_grouped_topk, @@ -783,7 +783,7 @@ def apply( raise NotImplementedError("EPLB is not supported for mxfp4") if self.mxfp4_backend == Mxfp4Backend.MARLIN: - topk_weights, topk_ids = FusedMoE.select_experts( + topk_weights, topk_ids, _ = FusedMoE.select_experts( hidden_states=x, router_logits=router_logits, use_grouped_topk=use_grouped_topk, @@ -894,7 +894,7 @@ def apply( or self.mxfp4_backend == Mxfp4Backend.SM90_FI_MXFP4_BF16): from vllm.utils.flashinfer import flashinfer_cutlass_fused_moe - topk_weights, topk_ids = FusedMoE.select_experts( + topk_weights, topk_ids, _ = FusedMoE.select_experts( hidden_states=x, router_logits=router_logits, use_grouped_topk=use_grouped_topk, diff --git a/vllm/model_executor/layers/quantization/quark/quark_moe.py b/vllm/model_executor/layers/quantization/quark/quark_moe.py index d2d990e46bcf..24497cc756c1 100644 --- a/vllm/model_executor/layers/quantization/quark/quark_moe.py +++ b/vllm/model_executor/layers/quantization/quark/quark_moe.py @@ -329,7 +329,7 @@ def apply( raise NotImplementedError( "EPLB not supported for `QuarkW8A8Fp8MoEMethod` yet.") - topk_weights, topk_ids = FusedMoE.select_experts( + topk_weights, topk_ids, _ = FusedMoE.select_experts( hidden_states=x, router_logits=router_logits, use_grouped_topk=use_grouped_topk, @@ -531,7 +531,7 @@ def apply( from vllm.model_executor.layers.fused_moe import fused_experts - topk_weights, topk_ids = FusedMoE.select_experts( + topk_weights, topk_ids, _ = FusedMoE.select_experts( hidden_states=x, router_logits=router_logits, use_grouped_topk=use_grouped_topk, diff --git a/vllm/model_executor/layers/quantization/rtn.py b/vllm/model_executor/layers/quantization/rtn.py index ed90e2e26460..015dc136bb82 100644 --- a/vllm/model_executor/layers/quantization/rtn.py +++ b/vllm/model_executor/layers/quantization/rtn.py @@ -318,7 +318,7 @@ def apply( from vllm.model_executor.layers.fused_moe import fused_experts - topk_weights, topk_ids = FusedMoE.select_experts( + topk_weights, topk_ids, _ = FusedMoE.select_experts( hidden_states=x, router_logits=router_logits, use_grouped_topk=use_grouped_topk, diff --git a/vllm/model_executor/layers/quantization/utils/quant_utils.py b/vllm/model_executor/layers/quantization/utils/quant_utils.py index 5339c6043cc1..acd9058fe694 100644 --- a/vllm/model_executor/layers/quantization/utils/quant_utils.py +++ b/vllm/model_executor/layers/quantization/utils/quant_utils.py @@ -292,6 +292,11 @@ def is_layer_skipped( f"Detected some but not all shards of {prefix} " "are quantized. All shards of fused layers " "to have the same precision.") + elif "experts" in prefix: + return any([ + prefix in layer_name for layer_name in ignored_layers + if "experts" in layer_name + ]) else: is_skipped = prefix in ignored_layers diff --git a/vllm/model_executor/models/longcat_flash.py b/vllm/model_executor/models/longcat_flash.py new file mode 100644 index 000000000000..1a7a64bfd1a4 --- /dev/null +++ b/vllm/model_executor/models/longcat_flash.py @@ -0,0 +1,712 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project +# Apache License, Version 2.0: +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# MIT License: +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +"""Inference-only Flash model compatible with HuggingFace weights.""" +import typing +from collections.abc import Callable, Iterable +from typing import Optional, Union + +import torch +from torch import nn +from transformers import PretrainedConfig + +from vllm.compilation.decorators import support_torch_compile +from vllm.config import CacheConfig, VllmConfig +from vllm.distributed import get_pp_group +from vllm.logger import init_logger +from vllm.model_executor.layers.activation import SiluAndMul +from vllm.model_executor.layers.fused_moe import FusedMoE +from vllm.model_executor.layers.layernorm import RMSNorm +from vllm.model_executor.layers.linear import (MergedColumnParallelLinear, + ReplicatedLinear, + RowParallelLinear) +from vllm.model_executor.layers.logits_processor import LogitsProcessor +from vllm.model_executor.layers.quantization import QuantizationConfig +from vllm.model_executor.layers.quantization.utils.int8_utils import ( + block_dequant) +from vllm.model_executor.layers.vocab_parallel_embedding import ( + ParallelLMHead, VocabParallelEmbedding) +from vllm.model_executor.model_loader.weight_utils import default_weight_loader +from vllm.model_executor.models.deepseek_v2 import DeepseekV2MLAAttention +from vllm.sequence import IntermediateTensors + +from .interfaces import SupportsLoRA, SupportsPP +from .utils import (PPMissingLayer, is_pp_missing_parameter, + make_empty_intermediate_tensors_factory, make_layers, + maybe_prefix) + +logger = init_logger(__name__) + + +class FlashConfig(PretrainedConfig): + """Flash model configuration.""" + model_type = "longcat_flash" + keys_to_ignore_at_inference = ["past_key_values"] + + def __init__( + self, + vocab_size=131072, + hidden_size=4096, + intermediate_size=8192, + num_layers=28, + num_hidden_layers=None, + num_attention_heads=96, + num_key_value_heads=128, + ep_size=1, + kv_lora_rank=512, + q_lora_rank=1536, + qk_rope_head_dim=64, + v_head_dim=128, + qk_nope_head_dim=128, + num_experts_per_tok=None, + norm_topk_prob=False, + max_position_embeddings=8192, + initializer_range=0.02, + rms_norm_eps=1e-05, + use_cache=True, + pad_token_id=None, + bos_token_id=100000, + eos_token_id=100001, + pretraining_tp=1, + tie_word_embeddings=False, + rope_theta=1000000.0, + rope_scaling=None, + attention_bias=False, + attention_dropout=0.0, + mla_scale_q_lora=False, + mla_scale_kv_lora=False, + torch_dtype="bfloat16", + params_dtype="bfloat16", + router_dtype="float32", + router_bias=False, + topk_method=None, + routed_scaling_factor=None, + zero_expert_num=0, + zero_expert_type=None, + nextn_use_scmoe=False, + **kwargs, + ): + super().__init__( + pad_token_id=pad_token_id, + bos_token_id=bos_token_id, + eos_token_id=eos_token_id, + tie_word_embeddings=tie_word_embeddings, + torch_dtype=torch_dtype, + params_dtype=params_dtype, + router_dtype=router_dtype, + topk_method=topk_method, + router_bias=router_bias, + nextn_use_scmoe=nextn_use_scmoe, + **kwargs, + ) + self.vocab_size = vocab_size + self.max_position_embeddings = max_position_embeddings + self.hidden_size = hidden_size + self.num_hidden_layers = (num_hidden_layers if num_hidden_layers + is not None else num_layers) + self.num_attention_heads = num_attention_heads + self.ep_size = ep_size + self.kv_lora_rank = kv_lora_rank + self.q_lora_rank = q_lora_rank + self.qk_rope_head_dim = qk_rope_head_dim + self.v_head_dim = v_head_dim + self.qk_nope_head_dim = qk_nope_head_dim + self.num_experts_per_tok = num_experts_per_tok + self.norm_topk_prob = norm_topk_prob + # for backward compatibility + if num_key_value_heads is None: + num_key_value_heads = num_attention_heads + + self.num_key_value_heads = num_key_value_heads + self.initializer_range = initializer_range + self.rms_norm_eps = rms_norm_eps + self.pretraining_tp = pretraining_tp + self.use_cache = use_cache + self.rope_theta = rope_theta + self.rope_scaling = rope_scaling + self.attention_bias = attention_bias + self.attention_dropout = attention_dropout + self.mla_scale_q_lora = mla_scale_q_lora + self.mla_scale_kv_lora = mla_scale_kv_lora + self.zero_expert_num = zero_expert_num + self.zero_expert_type = zero_expert_type + self.routed_scaling_factor = routed_scaling_factor + self.hidden_act = "silu" + self.intermediate_size = self.ffn_hidden_size if hasattr( + self, "ffn_hidden_size") else self.intermediate_size + if hasattr(self, "moe_intermediate_size"): + self.moe_intermediate_size = self.moe_intermediate_size + elif hasattr(self, "expert_ffn_hidden_size"): + self.moe_intermediate_size = self.expert_ffn_hidden_size + else: + self.moe_intermediate_size = self.intermediate_size + + +class FlashMLP(nn.Module): + """Flash MLP layer.""" + + def __init__( + self, + hidden_size: int, + intermediate_size: int, + hidden_act: str, + quant_config: Optional[QuantizationConfig] = None, + reduce_results: bool = True, + prefix: str = "", + ) -> None: + super().__init__() + self.gate_up_proj = MergedColumnParallelLinear( + hidden_size, + [intermediate_size] * 2, + bias=False, + quant_config=quant_config, + prefix=f"{prefix}.gate_up_proj", + ) + self.down_proj = RowParallelLinear( + intermediate_size, + hidden_size, + bias=False, + quant_config=quant_config, + reduce_results=reduce_results, + prefix=f"{prefix}.down_proj", + ) + if hidden_act != "silu": + raise ValueError(f"Unsupported activation: {hidden_act}. " + "Only silu is supported for now.") + self.act_fn = SiluAndMul() + + def forward(self, x: torch.Tensor) -> torch.Tensor: + if x.numel() == 0: + return x + + gate_up, _ = self.gate_up_proj(x) + x = self.act_fn(gate_up) + x, _ = self.down_proj(x) + return x + + +class LongcatRouter(nn.Module): + + def __init__(self, + config, + zero_expert_num=0, + rounter_params_dtype=torch.bfloat16, + prefix: str = ""): + super().__init__() + self.n_routed_experts = config.n_routed_experts if hasattr( + config, "n_routed_experts") else config.num_experts[0] + self.n_routed_experts = self.n_routed_experts + zero_expert_num + self.classifier = ReplicatedLinear( + config.hidden_size, + self.n_routed_experts, + bias=config.router_bias, + params_dtype=rounter_params_dtype, + quant_config=None, + prefix=f"{prefix}.classifier", + ) + self.e_score_correction_bias = nn.Parameter( + torch.zeros((self.n_routed_experts), dtype=rounter_params_dtype)) + + def forward(self, hidden_states): + logits, _ = self.classifier(hidden_states) + return logits + + +class LongcatMoe(nn.Module): + + def __init__( + self, + config: FlashConfig, + num_experts: int, + top_k: int, + hidden_size: int, + intermediate_size: int, + params_dtype: Optional[torch.dtype] = None, + quant_config: Optional[QuantizationConfig] = None, + prefix: str = "", + enable_eplb: bool = False, + ): + super().__init__() + self.hidden_size = hidden_size + self.zero_expert_num = config.zero_expert_num + self.zero_expert_type = config.zero_expert_type + self.routed_scaling_factor = config.routed_scaling_factor + self.enable_eplb = enable_eplb + # Gate always runs at half / full precision for now. + self.rounter_params_dtype = params_dtype + if config.router_dtype == "float32": + self.rounter_params_dtype = torch.float32 + + self.router = LongcatRouter( + config=config, + zero_expert_num=self.zero_expert_num, + rounter_params_dtype=self.rounter_params_dtype, + prefix=f"{prefix}.gate") + + self.experts = FusedMoE( + num_experts=num_experts, + top_k=top_k, + hidden_size=hidden_size, + intermediate_size=intermediate_size, + reduce_results=True, + params_dtype=params_dtype, + e_score_correction_bias=self.router.e_score_correction_bias, + renormalize=False, + quant_config=quant_config, + prefix=f"{prefix}.experts", + zero_expert_num=self.zero_expert_num, + zero_expert_type=self.zero_expert_type, + enable_eplb=self.enable_eplb, + routed_scaling_factor=config.routed_scaling_factor, + ) + + def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: + + num_tokens, hidden_dim = hidden_states.shape + hidden_states = hidden_states.view(-1, hidden_dim) + + router_logits = self.router(hidden_states.to( + self.rounter_params_dtype)) + final_hidden_states = self.experts(hidden_states=hidden_states, + router_logits=router_logits) + + return final_hidden_states.view(num_tokens, hidden_dim) + + +class FlashDecoderLayer(nn.Module): + """Flash decoder layer with dual attention and MLP structure.""" + + def __init__( + self, + config: FlashConfig, + cache_config: Optional[CacheConfig] = None, + quant_config: Optional[QuantizationConfig] = None, + prefix: str = "", + enable_eplb: bool = False, + ) -> None: + super().__init__() + self.layer_idx = int(prefix.split(sep='.')[-1]) + self.hidden_size = config.hidden_size + rope_theta = getattr(config, "rope_theta", 10000) + rope_scaling = getattr(config, "rope_scaling", None) + max_position_embeddings = getattr(config, "max_position_embeddings", + 8192) + if rope_scaling is not None and getattr( + config, "original_max_position_embeddings", None): + rope_scaling["original_max_position_embeddings"] = ( + config.original_max_position_embeddings) + + # Dual attention structure + self.self_attn = nn.ModuleList([ + DeepseekV2MLAAttention( + config=config, + hidden_size=self.hidden_size, + num_heads=config.num_attention_heads, + qk_nope_head_dim=config.qk_nope_head_dim, + qk_rope_head_dim=config.qk_rope_head_dim, + v_head_dim=config.v_head_dim, + q_lora_rank=(config.q_lora_rank if hasattr( + config, "q_lora_rank") else None), + kv_lora_rank=config.kv_lora_rank, + rope_theta=rope_theta, + rope_scaling=rope_scaling, + max_position_embeddings=max_position_embeddings, + cache_config=cache_config, + quant_config=None if "self_attn" in getattr( + config, "disable_quant_module", []) else quant_config, + prefix=f"{prefix}.self_attn.{i}", + ) for i in range(2) + ]) + self.input_layernorm = nn.ModuleList([ + RMSNorm(config.hidden_size, eps=config.rms_norm_eps) + for i in range(2) + ]) + self.post_attention_layernorm = nn.ModuleList([ + RMSNorm(config.hidden_size, eps=config.rms_norm_eps) + for i in range(2) + ]) + + # Dual MLP structure + self.mlps = nn.ModuleList([ + FlashMLP( + hidden_size=self.hidden_size, + intermediate_size=config.intermediate_size, + hidden_act=config.hidden_act, + quant_config=None if "mlps" in getattr( + config, "disable_quant_module", []) else quant_config, + prefix=f"{prefix}.mlps.{i}", + ) for i in range(2) + ]) + + self.mlp = LongcatMoe( + config=config, + num_experts=config.n_routed_experts if hasattr( + config, "n_routed_experts") else + config.num_experts[self.layer_idx], + top_k=config.moe_topk + if hasattr(config, "moe_topk") else config.num_experts_per_tok, + hidden_size=config.hidden_size, + intermediate_size=config.moe_intermediate_size, + quant_config=quant_config, + prefix=(f"{prefix}.mlp"), + ) + + def forward( + self, + positions: torch.Tensor, + hidden_states: torch.Tensor, + residual: Optional[torch.Tensor], + ) -> tuple[torch.Tensor, torch.Tensor]: + + if residual is None: + residual = hidden_states + hidden_states = self.input_layernorm[0](hidden_states) + else: + hidden_states, residual = self.input_layernorm[0](hidden_states, + residual) + + hidden_states = self.self_attn[0]( + positions=positions, + hidden_states=hidden_states, + ) + + hidden_states, residual = self.post_attention_layernorm[0]( + hidden_states, residual) + + # moe + hidden_states_copy = hidden_states.clone() + moe_hidden_states = self.mlp(hidden_states_copy) + + # first mlp + hidden_states = self.mlps[0](hidden_states) + + hidden_states, residual = self.input_layernorm[1](hidden_states, + residual) + + # second_attn + hidden_states = self.self_attn[1]( + positions=positions, + hidden_states=hidden_states, + ) + hidden_states, residual = self.post_attention_layernorm[1]( + hidden_states, residual) + + # second_mlp + hidden_states = self.mlps[1](hidden_states) + + hidden_states = hidden_states + moe_hidden_states + + return hidden_states, residual + + +@support_torch_compile +class FlashModel(nn.Module): + """Flash model.""" + + def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): + super().__init__() + config = FlashConfig(**vllm_config.model_config.hf_config.__dict__) + cache_config = vllm_config.cache_config + quant_config = vllm_config.quant_config + self.config = config + + self.padding_idx = getattr(config, "pad_token_id", None) + self.vocab_size = config.vocab_size + + if get_pp_group().is_first_rank: + self.embed_tokens = VocabParallelEmbedding( + config.vocab_size, + config.hidden_size, + prefix=maybe_prefix(prefix, "embed_tokens"), + ) + else: + self.embed_tokens = PPMissingLayer() + self.start_layer, self.end_layer, self.layers = make_layers( + config.num_hidden_layers, + lambda prefix: FlashDecoderLayer( + config, + cache_config=cache_config, + quant_config=quant_config, + prefix=prefix, + ), + prefix=f"{prefix}.layers") + if get_pp_group().is_last_rank: + self.norm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps) + else: + self.norm = PPMissingLayer() + self.make_empty_intermediate_tensors = ( + make_empty_intermediate_tensors_factory( + ["hidden_states", "residual"], config.hidden_size)) + + def get_input_embeddings(self, input_ids: torch.Tensor) -> torch.Tensor: + return self.embed_tokens(input_ids) + + def forward( + self, + input_ids: torch.Tensor, + positions: torch.Tensor, + intermediate_tensors: Optional[IntermediateTensors] = None, + inputs_embeds: Optional[torch.Tensor] = None, + ) -> Union[torch.Tensor, IntermediateTensors]: + if get_pp_group().is_first_rank: + if inputs_embeds is not None: + hidden_states = inputs_embeds + else: + hidden_states = self.get_input_embeddings(input_ids) + residual = None + else: + assert intermediate_tensors is not None + hidden_states = intermediate_tensors["hidden_states"] + residual = intermediate_tensors["residual"] + + for i in range(self.start_layer, self.end_layer): + layer = self.layers[i] + hidden_states, residual = layer( + positions, + hidden_states, + residual, + ) + + if not get_pp_group().is_last_rank: + return IntermediateTensors({ + "hidden_states": hidden_states, + "residual": residual + }) + + hidden_states, _ = self.norm(hidden_states, residual) + return hidden_states + + +class LongcatFlashForCausalLM(nn.Module, SupportsLoRA, SupportsPP): + """Flash model for causal language modeling.""" + + packed_modules_mapping = { + "qkv_proj": [ + "q_proj", + "k_proj", + "v_proj", + ], + "gate_up_proj": [ + "gate_proj", + "up_proj", + ], + } + + def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): + super().__init__() + config = FlashConfig(**vllm_config.model_config.hf_config.__dict__) + quant_config = vllm_config.quant_config + lora_config = vllm_config.lora_config + + self.config = config + config.intermediate_size = config.ffn_hidden_size if hasattr( + config, "ffn_hidden_size") else config.intermediate_size + self.lora_config = lora_config + self.quant_config = quant_config + + self.model = FlashModel(vllm_config=vllm_config, + prefix=maybe_prefix(prefix, "model")) + + if get_pp_group().is_last_rank: + self.lm_head = ParallelLMHead(config.vocab_size, + config.hidden_size, + quant_config=quant_config, + prefix=maybe_prefix( + prefix, "lm_head")) + else: + self.lm_head = PPMissingLayer() + + self.logits_processor = LogitsProcessor(config.vocab_size) + self.make_empty_intermediate_tensors = ( + self.model.make_empty_intermediate_tensors) + + def get_input_embeddings(self, input_ids: torch.Tensor) -> torch.Tensor: + return self.model.get_input_embeddings(input_ids) + + def forward( + self, + input_ids: torch.Tensor, + positions: torch.Tensor, + intermediate_tensors: Optional[IntermediateTensors] = None, + inputs_embeds: Optional[torch.Tensor] = None, + ) -> Union[torch.Tensor, IntermediateTensors]: + hidden_states = self.model(input_ids, positions, intermediate_tensors, + inputs_embeds) + return hidden_states + + def compute_logits( + self, + hidden_states: torch.Tensor, + ) -> Optional[torch.Tensor]: + logits = self.logits_processor(self.lm_head, hidden_states) + return logits + + def get_expert_mapping(self) -> list[tuple[str, str, int, str]]: + # Params for weights, fp8 weight scales, fp8 activation scales + # (param_name, weight_name, expert_id, shard_id) + return FusedMoE.make_expert_params_mapping( + ckpt_gate_proj_name="gate_proj", + ckpt_down_proj_name="down_proj", + ckpt_up_proj_name="up_proj", + num_experts=self.config.n_routed_experts if hasattr( + self.config, "n_routed_experts") else + self.config.num_experts[0], + ) + + def load_weights(self, weights: Iterable[tuple[str, + torch.Tensor]]) -> set[str]: + + stacked_params_mapping = [ + ("fused_qkv_a_proj", "q_a_proj", 0), + ("fused_qkv_a_proj", "kv_a_proj_with_mqa", 1), + (".gate_up_proj", ".gate_proj", 0), + (".gate_up_proj", ".up_proj", 1), + ] + + expert_params_mapping = self.get_expert_mapping() + loaded_params: set[str] = set() + + params_dict = dict(self.named_parameters()) + for name, loaded_weight in weights: + if "rotary_emb.inv_freq" in name: + continue + for param_name, weight_name, shard_id in stacked_params_mapping: + if weight_name not in name: + continue + if "mlp" in name and "mlps" not in name: + continue + name = name.replace(weight_name, param_name) + # Skip loading extra bias for GPTQ models. + if (name.endswith(".bias") + or name.endswith("_bias")) and name not in params_dict: + continue + # Skip mtp + if ".mtp." in name: + continue + if is_pp_missing_parameter(name, self): + continue + param = params_dict[name] + weight_loader = param.weight_loader + weight_loader(param, loaded_weight, shard_id) + break + else: + is_expert_weight = False + for mapping in expert_params_mapping: + param_name, weight_name, expert_id, shard_id = mapping + if weight_name not in name: + continue + is_expert_weight = True + name_mapped = name.replace(weight_name, param_name) + # Skip mtp + if ".mtp." in name_mapped: + continue + if (name_mapped.endswith(".bias") + or name_mapped.endswith("_bias") + ) and name not in params_dict: + continue + if is_pp_missing_parameter(name, self): + continue + param = params_dict[name_mapped] + weight_loader = param.weight_loader + weight_loader = typing.cast(Callable[..., bool], + param.weight_loader) + success = weight_loader(param, + loaded_weight, + name_mapped, + shard_id=shard_id, + expert_id=expert_id, + return_success=True) + if success: + name = name_mapped + break + else: + if is_expert_weight: + # We've checked that this is an expert weight + # However it's not mapped locally to this rank + # So we simply skip it + continue + # Skip loading extra bias for GPTQ models. + if name.endswith(".bias") and name not in params_dict: + continue + # Skip loading kv_scale from ckpts towards new design. + if name.endswith(".kv_scale") and name not in params_dict: + continue + # Skip mtp + if ".mtp." in name: + continue + if name is None: + continue + if is_pp_missing_parameter(name, self): + continue + param = params_dict[name] + weight_loader = getattr(param, "weight_loader", + default_weight_loader) + weight_loader(param, loaded_weight) + loaded_params.add(name) + for layer_id in range(self.config.num_hidden_layers): + for i in range(2): + if isinstance(self.model.layers[layer_id], PPMissingLayer): + continue + self_attn = self.model.layers[layer_id].self_attn[i] + if hasattr(self.quant_config, "weight_block_size" + ) and self_attn.kv_b_proj.weight.dtype in ( + torch.float8_e4m3fn, + torch.float8_e4m3fnuz, + ): + weight_block_size = self.quant_config.weight_block_size + if weight_block_size is not None: + assert hasattr(self_attn.kv_b_proj, "weight_scale_inv") + dtype = torch.get_default_dtype() + w = block_dequant(self_attn.kv_b_proj.weight, + self_attn.kv_b_proj.weight_scale_inv, + weight_block_size).to(dtype) + else: + w = self_attn.kv_b_proj.weight + + w_kc, w_vc = w.unflatten( + 0, + (-1, + self_attn.qk_nope_head_dim + self_attn.v_head_dim)).split( + [self_attn.qk_nope_head_dim, self_attn.v_head_dim], + dim=1) + self_attn.w_kc = w_kc.transpose(1, 2).contiguous().transpose( + 1, 2) + self_attn.w_vc = w_vc.contiguous().transpose(1, 2) + if self.config.mla_scale_q_lora: + self_attn.q_a_layernorm.weight.data *= ( + self.config.hidden_size / self.config.q_lora_rank)**0.5 + if self.config.mla_scale_kv_lora: + self_attn.kv_a_layernorm.weight.data *= ( + self.config.hidden_size / + self.config.kv_lora_rank)**0.5 + return loaded_params diff --git a/vllm/model_executor/models/longcat_flash_mtp.py b/vllm/model_executor/models/longcat_flash_mtp.py new file mode 100644 index 000000000000..eebc2ee15597 --- /dev/null +++ b/vllm/model_executor/models/longcat_flash_mtp.py @@ -0,0 +1,352 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project + +# Adapted from +# https://github.com/vllm-project/vllm/blob/v0.7.3/vllm/model_executor/models/deepseek_mtp.py +from collections.abc import Iterable +from typing import Optional + +import torch +import torch.nn as nn +from transformers import PretrainedConfig + +from vllm.config import VllmConfig +from vllm.model_executor.layers.layernorm import RMSNorm +from vllm.model_executor.layers.linear import ReplicatedLinear +from vllm.model_executor.layers.logits_processor import LogitsProcessor +from vllm.model_executor.layers.quantization import QuantizationConfig +from vllm.model_executor.layers.quantization.utils.int8_utils import ( + block_dequant) +from vllm.model_executor.layers.vocab_parallel_embedding import ( + ParallelLMHead, VocabParallelEmbedding) +from vllm.model_executor.model_loader.weight_utils import default_weight_loader +from vllm.model_executor.models.longcat_flash import FlashConfig +from vllm.sequence import IntermediateTensors + +from .deepseek_v2 import DeepseekV2DecoderLayer +from .interfaces import SupportsPP +from .utils import maybe_prefix + + +class LongCatMultiTokenPredictorLayer(nn.Module): + + def __init__( + self, + config: PretrainedConfig, + prefix: str, + vllm_config: VllmConfig, + quant_config: Optional[QuantizationConfig] = None, + ) -> None: + super().__init__() + self.enorm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps) + self.hnorm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps) + self.eh_proj = ReplicatedLinear(2 * config.hidden_size, + config.hidden_size, + bias=False, + quant_config=quant_config, + prefix="eh_proj") + self.mtp_block = DeepseekV2DecoderLayer(vllm_config, prefix) + self.final_layernorm = RMSNorm(config.hidden_size, + eps=config.rms_norm_eps) + + def forward( + self, + input_ids: torch.Tensor, + positions: torch.Tensor, + previous_hidden_states: torch.Tensor, + inputs_embeds: Optional[torch.Tensor] = None, + spec_step_index: int = 0, + ) -> torch.Tensor: + assert inputs_embeds is not None + inputs_embeds = self.enorm(inputs_embeds) + previous_hidden_states = self.hnorm(previous_hidden_states) + + hidden_states, _ = self.eh_proj( + torch.cat([inputs_embeds, previous_hidden_states], dim=-1)) + + hidden_states, residual = self.mtp_block(positions=positions, + hidden_states=hidden_states, + residual=None) + hidden_states, _ = self.final_layernorm(hidden_states, residual) + return hidden_states + + +class LongCatMultiTokenPredictor(nn.Module): + + def __init__(self, + *, + vllm_config: VllmConfig, + quant_config: Optional[QuantizationConfig] = None, + prefix: str = ""): + super().__init__() + config = FlashConfig(**vllm_config.model_config.hf_config.__dict__) + vllm_config.model_config.hf_config.intermediate_size \ + = config.intermediate_size + self.mtp_start_layer_idx = config.num_hidden_layers * 2 + self.num_mtp_layers = 1 + self.layers = torch.nn.ModuleDict({ + str(idx): + LongCatMultiTokenPredictorLayer( + config, + prefix=f"{prefix}.layers.{idx}", + vllm_config=vllm_config, + quant_config=quant_config, + ) + for idx in range(self.mtp_start_layer_idx, + self.mtp_start_layer_idx + self.num_mtp_layers) + }) + self.embed_tokens = VocabParallelEmbedding( + config.vocab_size, + config.hidden_size, + ) + + def forward( + self, + input_ids: torch.Tensor, + positions: torch.Tensor, + previous_hidden_states: torch.Tensor, + inputs_embeds: Optional[torch.Tensor] = None, + spec_step_idx: int = 0, + ) -> torch.Tensor: + if inputs_embeds is None: + inputs_embeds = self.embed_tokens(input_ids) + current_step_idx = (spec_step_idx % self.num_mtp_layers) + return self.layers[str(self.mtp_start_layer_idx + current_step_idx)]( + input_ids, + positions, + previous_hidden_states, + inputs_embeds, + current_step_idx, + ) + + +class LongCatFlashMTP(nn.Module, SupportsPP): + + def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): + super().__init__() + # LongCat MTP without MoE layers + vllm_config.model_config.hf_config.n_routed_experts = None + self.config = FlashConfig( + **vllm_config.model_config.hf_config.__dict__) + self.quant_config = None if "mtp" in getattr( + self.config, "disable_quant_module", + []) else vllm_config.quant_config + + self.model = LongCatMultiTokenPredictor(vllm_config=vllm_config, + quant_config=self.quant_config, + prefix=maybe_prefix( + prefix, "model")) + self.lm_head = ParallelLMHead( + self.config.vocab_size, + self.config.hidden_size, + quant_config=self.quant_config, + ) + self.logits_processor = LogitsProcessor(self.config.vocab_size) + + def forward( + self, + input_ids: torch.Tensor, + positions: torch.Tensor, + hidden_states: torch.Tensor, + intermediate_tensors: Optional[IntermediateTensors] = None, + inputs_embeds: Optional[torch.Tensor] = None, + spec_step_idx: int = 0, + ) -> torch.Tensor: + hidden_states = self.model(input_ids, positions, hidden_states, + inputs_embeds, spec_step_idx) + return hidden_states + + def compute_logits( + self, + hidden_states: torch.Tensor, + spec_step_idx: int = 0, + ) -> Optional[torch.Tensor]: + + logits = self.logits_processor(self.lm_head, hidden_states) + return logits + + def load_weights(self, weights: Iterable[tuple[str, + torch.Tensor]]) -> set[str]: + stacked_params_mapping = [ + ("gate_up_proj", "gate_proj", 0), + ("gate_up_proj", "up_proj", 1), + ("fused_qkv_a_proj", "q_a_proj", 0), + ("fused_qkv_a_proj", "kv_a_proj_with_mqa", 1), + ] + + new_to_old_names_mapping = { + "model.mtp.embed_tokens.weight": + "model.layers.0.embed_tokens.weight", + "model.mtp.layers.0.eh_proj.weight": "eh_proj.weight", + "model.mtp.layers.0.eh_proj.weight_scale_inv": + "eh_proj.weight_scale_inv", + "model.mtp.layers.0.enorm.m.weight": "enorm.weight", + "model.mtp.layers.0.hnorm.m.weight": "hnorm.weight", + "model.mtp.layers.0.input_layernorm.weight": + "model.layers.0.input_layernorm.weight", + "model.mtp.layers.0.post_attention_layernorm.weight": + "model.layers.0.post_attention_layernorm.weight", + "model.mtp.layers.0.self_attn.kv_a_layernorm.weight": + "model.layers.0.self_attn.kv_a_layernorm.weight", + "model.mtp.layers.0.self_attn.kv_a_proj_with_mqa.weight": + "model.layers.0.self_attn.kv_a_proj_with_mqa.weight", + "model.mtp.layers.0.self_attn.kv_a_proj_with_mqa.weight_scale_inv": + "model.layers.0.self_attn.kv_a_proj_with_mqa.weight_scale_inv", + "model.mtp.layers.0.self_attn.kv_b_proj.weight": + "model.layers.0.self_attn.kv_b_proj.weight", + "model.mtp.layers.0.self_attn.kv_b_proj.weight_scale_inv": + "model.layers.0.self_attn.kv_b_proj.weight_scale_inv", + "model.mtp.layers.0.self_attn.o_proj.weight": + "model.layers.0.self_attn.o_proj.weight", + "model.mtp.layers.0.self_attn.o_proj.weight_scale_inv": + "model.layers.0.self_attn.o_proj.weight_scale_inv", + "model.mtp.layers.0.self_attn.q_a_layernorm.weight": + "model.layers.0.self_attn.q_a_layernorm.weight", + "model.mtp.layers.0.self_attn.q_a_proj.weight": + "model.layers.0.self_attn.q_a_proj.weight", + "model.mtp.layers.0.self_attn.q_a_proj.weight_scale_inv": + "model.layers.0.self_attn.q_a_proj.weight_scale_inv", + "model.mtp.layers.0.self_attn.q_b_proj.weight": + "model.layers.0.self_attn.q_b_proj.weight", + "model.mtp.layers.0.self_attn.q_b_proj.weight_scale_inv": + "model.layers.0.self_attn.q_b_proj.weight_scale_inv", + "model.mtp.layers.0.transformer_layer.mlp.down_proj.weight": + "model.layers.0.mlp.down_proj.weight", + "model.mtp.layers.0.transformer_layer.mlp.down_proj.weight_scale_inv": + "model.layers.0.mlp.down_proj.weight_scale_inv", + "model.mtp.layers.0.transformer_layer.mlp.gate_proj.weight": + "model.layers.0.mlp.gate_proj.weight", + "model.mtp.layers.0.transformer_layer.mlp.gate_proj.weight_scale_inv": + "model.layers.0.mlp.gate_proj.weight_scale_inv", + "model.mtp.layers.0.transformer_layer.mlp.up_proj.weight": + "model.layers.0.mlp.up_proj.weight", + "model.mtp.layers.0.transformer_layer.mlp.up_proj.weight_scale_inv": + "model.layers.0.mlp.up_proj.weight_scale_inv", + "model.mtp.norm.weight": "final_layernorm.weight", + } + + params_dict = dict(self.named_parameters()) + loaded_params: set[str] = set() + for name, loaded_weight in weights: + if "rotary_emb.inv_freq" in name: + continue + spec_layer = self.get_spec_layer_idx_from_weight_name( + self.config, name) + if spec_layer is None: + continue + name = self._rewrite_spec_layer_name(spec_layer, name, + new_to_old_names_mapping) + for (param_name, weight_name, shard_id) in stacked_params_mapping: + # Skip non-stacked layers and experts (experts handled below). + if weight_name not in name: + continue + # We have mlp.experts[0].gate_proj in the checkpoint. + # Since we handle the experts below in expert_params_mapping, + # we need to skip here BEFORE we update the name, otherwise + # name will be updated to mlp.experts[0].gate_up_proj, which + # will then be updated below in expert_params_mapping + # for mlp.experts[0].gate_gate_up_proj, which breaks load. + if (("mlp.experts." in name) and name not in params_dict): + continue + name = name.replace(weight_name, param_name) + + # QKV fusion is optional, fall back to normal + # weight loading if it's not enabled + if ((param_name == "fused_qkv_a_proj") + and name not in params_dict): + continue + + # Skip loading extra bias for GPTQ models. + if name.endswith(".bias") and name not in params_dict: + continue + + param = params_dict[name] + weight_loader = param.weight_loader + weight_loader(param, loaded_weight, shard_id) + break + else: + # Skip loading extra bias for GPTQ models. + if name.endswith(".bias") and name not in params_dict: + continue + + # According to DeepSeek-V3 Technical Report, MTP modules + # shares embedding layer. We only load the first weights. + if (spec_layer != self.model.mtp_start_layer_idx + and ".layers" not in name): + continue + + param = params_dict[name] + weight_loader = getattr(param, "weight_loader", + default_weight_loader) + weight_loader(param, loaded_weight) + loaded_params.add(name) + spec_layer_id = self.config.num_hidden_layers * 2 + self_attn = self.model.layers[str(spec_layer_id)].mtp_block.self_attn + if hasattr( + self.quant_config, + "weight_block_size") and self_attn.kv_b_proj.weight.dtype in ( + torch.float8_e4m3fn, + torch.float8_e4m3fnuz, + ): + weight_block_size = self.quant_config.weight_block_size + if weight_block_size is not None: + dtype = torch.get_default_dtype() + w = block_dequant(self_attn.kv_b_proj.weight, + self_attn.kv_b_proj.weight_scale_inv, + weight_block_size).to(dtype) + else: + w = self_attn.kv_b_proj.weight + else: + w = self_attn.kv_b_proj.weight + w_kc, w_vc = w.unflatten( + 0, (-1, self_attn.qk_nope_head_dim + self_attn.v_head_dim)).split( + [self_attn.qk_nope_head_dim, self_attn.v_head_dim], dim=1) + self_attn.w_kc = w_kc.transpose(1, 2).contiguous().transpose(1, 2) + self_attn.w_vc = w_vc.contiguous().transpose(1, 2) + if self.config.mla_scale_q_lora: + self_attn.q_a_layernorm.weight.data *= ( + self.config.hidden_size / self.config.q_lora_rank)**0.5 + if self.config.mla_scale_kv_lora: + self_attn.kv_a_layernorm.weight.data *= ( + self.config.hidden_size / self.config.kv_lora_rank)**0.5 + return loaded_params + + def _rewrite_spec_layer_name(self, spec_layer: int, name: str, + new_to_old_names_mapping: dict) -> str: + """ + Rewrite the weight name to match the format of the original model. + Add .mtp_block for modules in transformer layer block for spec layer + and rename shared layer weights to be top level. + """ + if name in new_to_old_names_mapping: + name = new_to_old_names_mapping[name] + spec_layer_weight_names = [ + "embed_tokens", "enorm", "hnorm", "eh_proj", "shared_head" + ] + if name.startswith("enorm") or name.startswith( + "hnorm") or name.startswith("eh_proj") or name.startswith( + "final_layernorm"): + name = "model.layers." + str(spec_layer) + "." + name + shared_weight_names = ["embed_tokens"] + spec_layer_weight = False + shared_weight = False + for weight_name in spec_layer_weight_names: + if weight_name in name: + spec_layer_weight = True + if weight_name in shared_weight_names: + shared_weight = True + break + if not spec_layer_weight: + # treat rest weights as weights for transformer layer block + name = name.replace("model.layers.0.", + f"model.layers.{spec_layer}.mtp_block.") + elif shared_weight: + # treat shared weights as top level weights + name = name.replace("model.layers.0.", "model.") + return name + + def get_spec_layer_idx_from_weight_name(self, config: PretrainedConfig, + weight_name: str) -> Optional[int]: + if "model.mtp" in weight_name: + return config.num_hidden_layers * 2 + return None diff --git a/vllm/model_executor/models/registry.py b/vllm/model_executor/models/registry.py index ac0ec6ca146c..c0afd83c28ce 100644 --- a/vllm/model_executor/models/registry.py +++ b/vllm/model_executor/models/registry.py @@ -109,6 +109,7 @@ "Llama4ForCausalLM": ("llama4", "Llama4ForCausalLM"), # noqa: E501 # For decapoda-research/llama-* "LLaMAForCausalLM": ("llama", "LlamaForCausalLM"), + "LongcatFlashForCausalLM": ("longcat_flash", "LongcatFlashForCausalLM"), "MambaForCausalLM": ("mamba", "MambaForCausalLM"), "FalconMambaForCausalLM": ("mamba", "MambaForCausalLM"), "FalconH1ForCausalLM":("falcon_h1", "FalconH1ForCausalLM"), @@ -287,6 +288,7 @@ "EagleDeepSeekMTPModel": ("deepseek_eagle", "EagleDeepseekV3ForCausalLM"), "DeepSeekMTPModel": ("deepseek_mtp", "DeepSeekMTP"), "ErnieMTPModel": ("ernie_mtp", "ErnieMTP"), + "LongCatFlashMTPModel": ("longcat_flash_mtp", "LongCatFlashMTP"), "Glm4MoeMTPModel": ("glm4_moe_mtp", "Glm4MoeMTP"), "MedusaModel": ("medusa", "Medusa"), "Qwen3NextMTP": ("qwen3_next_mtp", "Qwen3NextMTP"), diff --git a/vllm/model_executor/models/utils.py b/vllm/model_executor/models/utils.py index 83e381b3b157..51cd41c864f0 100644 --- a/vllm/model_executor/models/utils.py +++ b/vllm/model_executor/models/utils.py @@ -691,14 +691,14 @@ def maybe_prefix(prefix: str, name: str) -> str: return name if not prefix else f"{prefix}.{name}" -def extract_layer_index(layer_name: str) -> int: +def extract_layer_index(layer_name: str, num_attn_module: int = 1) -> int: """ Extract the layer index from the module name. Examples: - "encoder.layers.0" -> 0 - "encoder.layers.1.self_attn" -> 1 - "2.self_attn" -> 2 - - "model.encoder.layers.0.sub.1" -> ValueError + - "model.encoder.layers.0.sub.1" -> ValueError if num_attn_module == 1 """ subnames = layer_name.split(".") int_vals: list[int] = [] @@ -707,9 +707,17 @@ def extract_layer_index(layer_name: str) -> int: int_vals.append(int(subname)) except ValueError: continue - assert len(int_vals) == 1, (f"layer name {layer_name} should" - " only contain one integer") - return int_vals[0] + if num_attn_module == 1 or "attn" not in layer_name: + assert len(int_vals) == 1, (f"layer name {layer_name} should" + " only contain one integer") + + return int_vals[0] + else: + assert len(int_vals) <= 2, (f"layer name {layer_name} should" + " contain most two integers") + layer_index = int_vals[0] * num_attn_module + int_vals[1] if len( + int_vals) == 2 else int_vals[0] + return layer_index def cast_overflow_tensors( diff --git a/vllm/v1/spec_decode/eagle.py b/vllm/v1/spec_decode/eagle.py index b30e4dab956a..119f41d8580e 100644 --- a/vllm/v1/spec_decode/eagle.py +++ b/vllm/v1/spec_decode/eagle.py @@ -169,7 +169,6 @@ def propose( target_hidden_states = self.model.combine_hidden_states( target_hidden_states) assert target_hidden_states.shape[-1] == self.hidden_size - # Shift the input ids by one token. # E.g., [a1, b1, b2, c1, c2, c3] -> [b1, b2, c1, c2, c3, c3] self.input_ids[:num_tokens - 1] = target_token_ids[1:] @@ -223,7 +222,8 @@ def propose( hidden_states=self.hidden_states[:num_input_tokens], inputs_embeds=inputs_embeds, ) - if self.method in ("deepseek_mtp", "ernie_mtp", "qwen3_next_mtp"): + if self.method in ("deepseek_mtp", "ernie_mtp", "qwen3_next_mtp", + "longcat_flash_mtp"): last_hidden_states = ret_hidden_states hidden_states = last_hidden_states else: @@ -237,7 +237,10 @@ def propose( return draft_token_ids.view(-1, 1) positions = target_positions[last_token_indices] - hidden_states = hidden_states[last_token_indices] + if self.method in ("deepseek_mtp", "ernie_mtp", "longcat_flash_mtp"): + hidden_states = self.hidden_states[last_token_indices] + else: + hidden_states = hidden_states[last_token_indices] if isinstance(attn_metadata, TreeAttentionMetadata): # Draft using tree attention. @@ -350,7 +353,7 @@ def propose( inputs_embeds=inputs_embeds, ) if self.method in ("deepseek_mtp", "ernie_mtp", - "qwen3_next_mtp"): + "qwen3_next_mtp", "longcat_flash_mtp"): last_hidden_states = ret_hidden_states hidden_states = ret_hidden_states else: diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index ee339e22cea9..ac7ceb824ce7 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -3840,9 +3840,11 @@ def initialize_kv_cache_tensors( target_layer_name) kv_caches[layer_name] = kv_caches[target_layer_name] + num_attn_module = 2 \ + if self.model_config.hf_config.model_type == "longcat_flash" else 1 bind_kv_cache(kv_caches, self.compilation_config.static_forward_context, - self.kv_caches) + self.kv_caches, num_attn_module) return kv_caches def maybe_add_kv_sharing_layers_to_kv_cache_groups( diff --git a/vllm/v1/worker/utils.py b/vllm/v1/worker/utils.py index 553d33e27203..3e0dbda59435 100644 --- a/vllm/v1/worker/utils.py +++ b/vllm/v1/worker/utils.py @@ -266,6 +266,7 @@ def bind_kv_cache( kv_caches: dict[str, torch.Tensor], forward_context: dict[str, "Attention"], runner_kv_caches: list[torch.Tensor], + num_attn_module: Optional[int] = 1, ) -> None: """ Bind the allocated KV cache to both ModelRunner and forward context so @@ -289,7 +290,8 @@ def bind_kv_cache( # Convert kv_caches dict to a list of tensors in the order of layer_index. index2name = defaultdict(list) for layer_name in kv_caches: - index2name[extract_layer_index(layer_name)].append(layer_name) + index2name[extract_layer_index(layer_name, + num_attn_module)].append(layer_name) for layer_index in sorted(index2name.keys()): layer_names = index2name[layer_index] From c85be1f6dd3e20d9b42cd68ff54b328ffeb6cb4b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicole=20LiHui=20=F0=9F=A5=9C?= Date: Thu, 25 Sep 2025 13:03:25 +0800 Subject: [PATCH 359/518] optimize: eliminate duplicate split_enc_dec_inputs calls (#25573) Signed-off-by: nicole-lihui --- vllm/v1/engine/processor.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/vllm/v1/engine/processor.py b/vllm/v1/engine/processor.py index 507e2cd3223f..f493cc13ece2 100644 --- a/vllm/v1/engine/processor.py +++ b/vllm/v1/engine/processor.py @@ -388,9 +388,9 @@ def process_inputs( eos_token_id = self.input_preprocessor.get_eos_token_id() - self._validate_model_inputs(processed_inputs) - encoder_inputs, decoder_inputs = split_enc_dec_inputs(processed_inputs) + self._validate_model_inputs(encoder_inputs, decoder_inputs) + # Mypy does not always properly infer the types of some elements of # discriminated unions of TypedDicts, because of how it handles # inheritance of TypedDict. If we explicitly extract the items we want @@ -458,9 +458,8 @@ def process_inputs( trace_headers=trace_headers, ) - def _validate_model_inputs(self, inputs: ProcessorInputs): - encoder_inputs, decoder_inputs = split_enc_dec_inputs(inputs) - + def _validate_model_inputs(self, encoder_inputs: Optional[SingletonInputs], + decoder_inputs: SingletonInputs): if encoder_inputs is not None: self._validate_model_input(encoder_inputs, prompt_type="encoder") From a676e668ee10585099816b60a440ec089fd391c4 Mon Sep 17 00:00:00 2001 From: courage17340 Date: Thu, 25 Sep 2025 13:32:21 +0800 Subject: [PATCH 360/518] [Bugfix] fix apply_temperature to avoid nan in probs (#24734) Signed-off-by: courage17340 --- vllm/v1/sample/sampler.py | 7 ++++++- vllm/v1/worker/gpu_input_batch.py | 4 ++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/vllm/v1/sample/sampler.py b/vllm/v1/sample/sampler.py index fa2a6e590f22..83ea766b1b4a 100644 --- a/vllm/v1/sample/sampler.py +++ b/vllm/v1/sample/sampler.py @@ -128,8 +128,12 @@ def apply_temperature( self, logits: torch.Tensor, temp: torch.Tensor, + all_random: bool, ) -> torch.Tensor: # Use in-place division to avoid creating a new tensor. + # Avoid division by zero if there are greedy requests. + if not all_random: + temp = torch.where(temp < _SAMPLING_EPS, 1.0, temp) return logits.div_(temp.unsqueeze(dim=1)) def greedy_sample(self, logits: torch.Tensor) -> torch.Tensor: @@ -164,7 +168,8 @@ def sample( assert sampling_metadata.temperature is not None # Apply temperature. - logits = self.apply_temperature(logits, sampling_metadata.temperature) + logits = self.apply_temperature(logits, sampling_metadata.temperature, + sampling_metadata.all_random) # Apply logits processors that only apply to random sampling # (argmax invariant) diff --git a/vllm/v1/worker/gpu_input_batch.py b/vllm/v1/worker/gpu_input_batch.py index 79a392337574..67fb9864b19c 100644 --- a/vllm/v1/worker/gpu_input_batch.py +++ b/vllm/v1/worker/gpu_input_batch.py @@ -354,8 +354,8 @@ def add_request( and is_spec_decode_unsupported(sampling_params)): self.spec_decode_unsupported_reqs.add(req_id) if sampling_params.sampling_type == SamplingType.GREEDY: - # Avoid later division by zero. - self.temperature_cpu[req_index] = -1.0 + # Should avoid division by zero later when apply_temperature. + self.temperature_cpu[req_index] = 0.0 self.greedy_reqs.add(req_id) else: self.temperature_cpu[req_index] = sampling_params.temperature From 755ed7b05be4743237d3339c4ff8c22bcaae04f4 Mon Sep 17 00:00:00 2001 From: Cyrus Leung Date: Thu, 25 Sep 2025 14:47:03 +0800 Subject: [PATCH 361/518] [Misc] Simplify PoolerOutput and move to `v1/outputs` (#25629) Signed-off-by: DarkLight1337 --- vllm/executor/executor_base.py | 4 +-- vllm/model_executor/layers/pooler.py | 29 +++++------------ vllm/model_executor/models/gritlm.py | 6 ++-- vllm/sequence.py | 48 ---------------------------- vllm/v1/outputs.py | 7 +++- vllm/v1/worker/gpu_model_runner.py | 22 +++++++++---- 6 files changed, 34 insertions(+), 82 deletions(-) diff --git a/vllm/executor/executor_base.py b/vllm/executor/executor_base.py index fd4b992c3821..e3063ec2b8ab 100644 --- a/vllm/executor/executor_base.py +++ b/vllm/executor/executor_base.py @@ -15,10 +15,10 @@ from vllm.distributed.kv_transfer.kv_connector.utils import KVOutputAggregator from vllm.logger import init_logger from vllm.lora.request import LoRARequest -from vllm.sequence import ExecuteModelRequest, PoolerOutput +from vllm.sequence import ExecuteModelRequest from vllm.tasks import SupportedTask from vllm.utils import make_async -from vllm.v1.outputs import SamplerOutput +from vllm.v1.outputs import PoolerOutput, SamplerOutput from vllm.worker.worker_base import WorkerBase logger = init_logger(__name__) diff --git a/vllm/model_executor/layers/pooler.py b/vllm/model_executor/layers/pooler.py index 4a97438b1bb2..139011ce10be 100644 --- a/vllm/model_executor/layers/pooler.py +++ b/vllm/model_executor/layers/pooler.py @@ -16,9 +16,9 @@ from vllm.logger import init_logger from vllm.model_executor.models.adapters import _load_st_projector from vllm.pooling_params import PoolingParams -from vllm.sequence import PoolerOutput, PoolingSequenceGroupOutput from vllm.tasks import PoolingTask -from vllm.utils import current_stream, resolve_obj_by_qualname +from vllm.utils import resolve_obj_by_qualname +from vllm.v1.outputs import PoolerOutput from vllm.v1.pool.metadata import PoolingCursor, PoolingMetadata logger = init_logger(__name__) @@ -190,19 +190,6 @@ def get_cross_encoder_activation_function(config: PretrainedConfig): return PoolerClassify() -def build_output( - all_data: Union[torch.Tensor, list[torch.Tensor]], ) -> PoolerOutput: - # Pooling models D2H & synchronize occurs here - if isinstance(all_data, list): - all_data = [d.to("cpu", non_blocking=True) for d in all_data] - else: - all_data = all_data.to("cpu", non_blocking=True) - current_stream().synchronize() - - all_outputs = [PoolingSequenceGroupOutput(data) for data in all_data] - return PoolerOutput(outputs=all_outputs) - - class PoolingMethod(nn.Module, ABC): @staticmethod @@ -556,7 +543,7 @@ def forward( ) -> PoolerOutput: pooled_data = self.pooling(hidden_states, pooling_metadata) pooled_data = self.head(pooled_data, pooling_metadata) - return build_output(pooled_data) + return pooled_data class StepPooler(Pooler): @@ -607,7 +594,7 @@ def forward( ) -> PoolerOutput: pooled_data = self.extract_states(hidden_states, pooling_metadata) pooled_data = self.head(pooled_data, pooling_metadata) - return build_output(pooled_data) + return pooled_data class ClassifierPooler(Pooler): @@ -678,7 +665,7 @@ def forward( ] # scores shape: [batchsize, num_labels] - return build_output(scores) + return scores class DispatchPooler(Pooler): @@ -708,7 +695,7 @@ def forward( ) -> PoolerOutput: poolers_by_task = self.poolers_by_task - outputs = list[PoolingSequenceGroupOutput]() + outputs = list[torch.Tensor]() offset = 0 for task, group in groupby(get_tasks(pooling_metadata)): if not (pooler := poolers_by_task.get(task)): @@ -722,10 +709,10 @@ def forward( pooling_metadata[offset:offset + num_items], ) - outputs.extend(group_output.outputs) + outputs.extend(group_output) offset += num_items - return PoolerOutput(outputs) + return outputs def extra_repr(self) -> str: s = f"supported_task={self.get_supported_tasks()}" diff --git a/vllm/model_executor/models/gritlm.py b/vllm/model_executor/models/gritlm.py index a7b324f0a5b4..639d8f620f94 100644 --- a/vllm/model_executor/models/gritlm.py +++ b/vllm/model_executor/models/gritlm.py @@ -12,12 +12,12 @@ from vllm.model_executor.layers.pooler import (DispatchPooler, Pooler, PoolerHead, PoolerNormalize, PoolingParamsUpdate, - build_output, get_prompt_lens, + get_prompt_lens, get_prompt_token_ids) from vllm.model_executor.models.llama import LlamaForCausalLM -from vllm.sequence import PoolerOutput from vllm.tasks import PoolingTask from vllm.transformers_utils.tokenizer import cached_tokenizer_from_config +from vllm.v1.outputs import PoolerOutput from vllm.v1.pool.metadata import PoolingMetadata from .interfaces_base import default_pooling_type @@ -212,7 +212,7 @@ def forward( ) -> PoolerOutput: pooled_data = self.pooling(hidden_states, pooling_metadata) pooled_data = self.head(pooled_data, pooling_metadata) - return build_output(pooled_data) + return pooled_data @default_pooling_type("MEAN") diff --git a/vllm/sequence.py b/vllm/sequence.py index a6c194fbac0b..e5f23d47a660 100644 --- a/vllm/sequence.py +++ b/vllm/sequence.py @@ -11,7 +11,6 @@ from vllm.v1.worker.kv_connector_model_runner_mixin import ( KVConnectorOutput) else: - LoRARequest = Any KVConnectorOutput = Any VLLM_TOKEN_ID_ARRAY_TYPE = "l" @@ -48,29 +47,6 @@ class RequestMetrics: model_execute_time: Optional[float] = None -class PoolingSequenceGroupOutput( - msgspec.Struct, - omit_defaults=True, # type: ignore[call-arg] - array_like=True, # type: ignore[call-arg] -): - """The model output associated with a pooling sequence group.""" - # Annotated as Any to be compatible with msgspec - # The actual type is in SequenceGroup.pooled_data - data: Any - - def get_data_nbytes(self) -> int: - data: torch.Tensor = self.data - return data.nbytes - - def __repr__(self) -> str: - return f"PoolingSequenceGroupOutput(data={self.data}" - - def __eq__(self, other: object) -> bool: - if not isinstance(other, PoolingSequenceGroupOutput): - raise NotImplementedError() - return self.data == other.data - - # cannot use msgspec.Struct here because Dynamo does not support it @dataclass class IntermediateTensors: @@ -119,30 +95,6 @@ def __repr__(self) -> str: return f"IntermediateTensors(tensors={self.tensors})" -class PoolerOutput( - msgspec.Struct, - omit_defaults=True, # type: ignore[call-arg] - array_like=True): # type: ignore[call-arg] - """The output from a pooling operation in the pooling model.""" - outputs: list[PoolingSequenceGroupOutput] - - def get_data_nbytes(self) -> int: - return sum(o.get_data_nbytes() for o in self.outputs) - - def __getitem__(self, idx: int) -> PoolingSequenceGroupOutput: - return self.outputs[idx] - - def __setitem__(self, idx: int, value: PoolingSequenceGroupOutput): - self.outputs[idx] = value - - def __len__(self): - return len(self.outputs) - - def __eq__(self, other: object): - return isinstance(other, - self.__class__) and self.outputs == other.outputs - - class ExecuteModelRequest( msgspec.Struct, array_like=True, # type: ignore[call-arg] diff --git a/vllm/v1/outputs.py b/vllm/v1/outputs.py index e6cc6019b172..01f3676abd92 100644 --- a/vllm/v1/outputs.py +++ b/vllm/v1/outputs.py @@ -3,7 +3,7 @@ from abc import ABC, abstractmethod from dataclasses import dataclass -from typing import TYPE_CHECKING, NamedTuple, Optional +from typing import TYPE_CHECKING, NamedTuple, Optional, Union import torch @@ -65,6 +65,11 @@ def empty_cpu(num_positions: int, ) +# [num_reqs, ] +# The shape of each element depends on the pooler used +PoolerOutput = Union[torch.Tensor, list[torch.Tensor]] + + @dataclass class SamplerOutput: diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index ac7ceb824ce7..17f8be86af2f 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -52,13 +52,14 @@ from vllm.multimodal.utils import group_mm_kwargs_by_modality from vllm.pooling_params import PoolingParams from vllm.sampling_params import SamplingType -from vllm.sequence import IntermediateTensors, PoolerOutput +from vllm.sequence import IntermediateTensors from vllm.tasks import GenerationTask, PoolingTask, SupportedTask from vllm.utils import (STR_DTYPE_TO_TORCH_DTYPE, DeviceMemoryProfiler, GiB_bytes, cdiv, check_use_alibi, get_dtype_size, is_pin_memory_available, length_from_prompt_token_ids_or_embeds, round_up, supports_dynamo) +from vllm.utils.jsontree import json_map_leaves from vllm.v1.attention.backends.flash_attn import AttentionMetadata from vllm.v1.attention.backends.gdn_attn import GDNAttentionMetadataBuilder from vllm.v1.attention.backends.utils import ( @@ -79,7 +80,7 @@ # yapf: enable from vllm.v1.outputs import (EMPTY_MODEL_RUNNER_OUTPUT, AsyncModelRunnerOutput, DraftTokenIds, LogprobsLists, LogprobsTensors, - ModelRunnerOutput, SamplerOutput) + ModelRunnerOutput, PoolerOutput, SamplerOutput) from vllm.v1.pool.metadata import PoolingMetadata from vllm.v1.sample.logits_processor import LogitsProcessors, build_logitsprocs from vllm.v1.sample.metadata import SamplingMetadata @@ -1823,15 +1824,22 @@ def _pool( device=hidden_states.device) seq_lens_cpu = self.seq_lens.cpu[:self.input_batch.num_reqs] - # Pooling models D2H & synchronize occurs in pooler.py:build_output - raw_pooler_output = self.model.pooler( - hidden_states=hidden_states, pooling_metadata=pooling_metadata) + model = cast(VllmModelForPooling, self.model) + raw_pooler_output: PoolerOutput = model.pooler( + hidden_states=hidden_states, + pooling_metadata=pooling_metadata, + ) + raw_pooler_output = json_map_leaves( + lambda x: x.to("cpu", non_blocking=True), + raw_pooler_output, + ) + self._sync_device() pooler_output: list[Optional[torch.Tensor]] = [] for raw_output, seq_len, prompt_len in zip( raw_pooler_output, seq_lens_cpu, pooling_metadata.prompt_lens): - output = raw_output.data if seq_len == prompt_len else None + output = raw_output if seq_len == prompt_len else None pooler_output.append(output) return ModelRunnerOutput( @@ -3233,7 +3241,7 @@ def _dummy_pooler_run( for task in self.get_supported_pooling_tasks(): # Run a full batch with each task to ensure none of them OOMs output = self._dummy_pooler_run_task(hidden_states, task) - output_size[task] = output.get_data_nbytes() + output_size[task] = sum(o.nbytes for o in output) del output # Allow GC max_task = max(output_size.items(), key=lambda x: x[1])[0] From bc092ea873422569d2300d48a40e6a3e3ac99737 Mon Sep 17 00:00:00 2001 From: Jacob Kahn Date: Thu, 25 Sep 2025 09:37:03 +0200 Subject: [PATCH 362/518] Map CwmForCausalLM to llama and LlamaForCausalLM (#25611) Signed-off-by: Jacob Kahn Co-authored-by: Roger Wang --- tests/models/registry.py | 3 +++ vllm/model_executor/models/registry.py | 1 + 2 files changed, 4 insertions(+) diff --git a/tests/models/registry.py b/tests/models/registry.py index 10d85707d668..8dbada0d03a0 100644 --- a/tests/models/registry.py +++ b/tests/models/registry.py @@ -196,6 +196,9 @@ def check_available_online( trust_remote_code=True), "Cohere2ForCausalLM": _HfExamplesInfo("CohereForAI/c4ai-command-r7b-12-2024", # noqa: E501 trust_remote_code=True), + "CwmForCausalLM": _HfExamplesInfo("facebook/cwm", # noqa: E501 + trust_remote_code=True, + is_available_online=False), "DbrxForCausalLM": _HfExamplesInfo("databricks/dbrx-instruct"), "DeciLMForCausalLM": _HfExamplesInfo("nvidia/Llama-3_3-Nemotron-Super-49B-v1", # noqa: E501 trust_remote_code=True), diff --git a/vllm/model_executor/models/registry.py b/vllm/model_executor/models/registry.py index c0afd83c28ce..432060acfee6 100644 --- a/vllm/model_executor/models/registry.py +++ b/vllm/model_executor/models/registry.py @@ -64,6 +64,7 @@ "ChatGLMForConditionalGeneration": ("chatglm", "ChatGLMForCausalLM"), "CohereForCausalLM": ("commandr", "CohereForCausalLM"), "Cohere2ForCausalLM": ("commandr", "CohereForCausalLM"), + "CwmForCausalLM": ("llama", "LlamaForCausalLM"), "DbrxForCausalLM": ("dbrx", "DbrxForCausalLM"), "DeciLMForCausalLM": ("nemotron_nas", "DeciLMForCausalLM"), "DeepseekForCausalLM": ("deepseek", "DeepseekForCausalLM"), From af4ee63e0e9d051e3f3756a2d33686e774503f4a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicole=20LiHui=20=F0=9F=A5=9C?= Date: Thu, 25 Sep 2025 15:46:22 +0800 Subject: [PATCH 363/518] typo: remove duplicate `is` (#25641) Signed-off-by: nicole-lihui --- vllm/v1/engine/processor.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm/v1/engine/processor.py b/vllm/v1/engine/processor.py index f493cc13ece2..843ca9ad68e3 100644 --- a/vllm/v1/engine/processor.py +++ b/vllm/v1/engine/processor.py @@ -79,7 +79,7 @@ def _validate_logprobs( if num_logprobs > max_logprobs: raise ValueError( f"Requested sample logprobs of {num_logprobs}, " - f"which is is greater than max allowed: {max_logprobs}") + f"which is greater than max allowed: {max_logprobs}") # Validate prompt logprobs. if params.prompt_logprobs: @@ -89,7 +89,7 @@ def _validate_logprobs( if num_prompt_logprobs > max_logprobs: raise ValueError( f"Requested prompt logprobs of {num_prompt_logprobs}, " - f"which is is greater than max allowed: {max_logprobs}") + f"which is greater than max allowed: {max_logprobs}") def _validate_sampling_params( self, From 1260180c678b1ca0a5863b907d9afadcb72399f1 Mon Sep 17 00:00:00 2001 From: Tyler Michael Smith Date: Thu, 25 Sep 2025 04:05:21 -0400 Subject: [PATCH 364/518] =?UTF-8?q?Revert=20"[Performance]=20Move=20apply?= =?UTF-8?q?=5Fw8a8=5Fblock=5Ffp8=5Flinear=20to=20an=20op=20class=E2=80=A6?= =?UTF-8?q?=20(#25607)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Tyler Michael Smith --- .../cutlass_benchmarks/w8a8_benchmarks.py | 4 +- .../benchmark_fp8_block_dense_gemm.py | 4 +- tests/kernels/quantization/test_block_fp8.py | 5 +- .../quantization/test_fp8_quant_group.py | 26 +- .../model_executor/test_enabled_custom_ops.py | 30 ++ tests/quantization/test_compressed_tensors.py | 35 -- vllm/config/__init__.py | 17 - .../compressed_tensors/compressed_tensors.py | 8 - .../schemes/compressed_tensors_w8a8_fp8.py | 37 +-- .../layers/quantization/deepgemm.py | 10 +- .../model_executor/layers/quantization/fp8.py | 40 +-- .../layers/quantization/input_quant_fp8.py | 24 +- .../layers/quantization/utils/fp8_utils.py | 299 +++++++----------- vllm/utils/deep_gemm.py | 12 +- 14 files changed, 205 insertions(+), 346 deletions(-) diff --git a/benchmarks/cutlass_benchmarks/w8a8_benchmarks.py b/benchmarks/cutlass_benchmarks/w8a8_benchmarks.py index 02f8c593392c..a5a5b52f6039 100644 --- a/benchmarks/cutlass_benchmarks/w8a8_benchmarks.py +++ b/benchmarks/cutlass_benchmarks/w8a8_benchmarks.py @@ -17,7 +17,7 @@ from vllm import _custom_ops as ops from vllm.model_executor.layers.quantization.utils.fp8_utils import ( - w8a8_triton_block_scaled_mm, + w8a8_block_fp8_matmul, ) from vllm.utils import FlexibleArgumentParser, cdiv @@ -158,7 +158,7 @@ def bench_fp8( "cutlass_fp8_fp8_fp16_scaled_mm_bias": lambda: ops.cutlass_scaled_mm( a, b, scale_a, scale_b, torch.float16, bias.to(dtype=torch.float16) ), - "triton_fp8_fp8_fp16_scaled_mm_blockwise": lambda: w8a8_triton_block_scaled_mm( + "triton_fp8_fp8_fp16_scaled_mm_blockwise": lambda: w8a8_block_fp8_matmul( a_cont, b.t(), block_scale_a, block_scale_b.t(), (128, 128) ), "cutlass_fp8_fp8_fp16_scaled_mm_blockwise": lambda: ops.cutlass_scaled_mm( diff --git a/benchmarks/kernels/deepgemm/benchmark_fp8_block_dense_gemm.py b/benchmarks/kernels/deepgemm/benchmark_fp8_block_dense_gemm.py index 2010b8038563..db2398fc40a4 100644 --- a/benchmarks/kernels/deepgemm/benchmark_fp8_block_dense_gemm.py +++ b/benchmarks/kernels/deepgemm/benchmark_fp8_block_dense_gemm.py @@ -9,7 +9,7 @@ from vllm import _custom_ops as ops from vllm.model_executor.layers.quantization.utils.fp8_utils import ( per_token_group_quant_fp8, - w8a8_triton_block_scaled_mm, + w8a8_block_fp8_matmul, ) from vllm.triton_utils import triton from vllm.utils.deep_gemm import ( @@ -63,7 +63,7 @@ def deepgemm_gemm(): # === vLLM Triton Implementation === def vllm_triton_gemm(): - return w8a8_triton_block_scaled_mm(A_vllm, + return w8a8_block_fp8_matmul(A_vllm, B_vllm, A_scale_vllm, B_scale_vllm, diff --git a/tests/kernels/quantization/test_block_fp8.py b/tests/kernels/quantization/test_block_fp8.py index e02df540ce9d..211d1ecfe6e4 100644 --- a/tests/kernels/quantization/test_block_fp8.py +++ b/tests/kernels/quantization/test_block_fp8.py @@ -11,7 +11,7 @@ native_w8a8_block_matmul) from vllm.config import VllmConfig from vllm.model_executor.layers.quantization.utils.fp8_utils import ( - cutlass_scaled_mm, per_token_group_quant_fp8, w8a8_triton_block_scaled_mm) + cutlass_scaled_mm, per_token_group_quant_fp8, w8a8_block_fp8_matmul) from vllm.platforms import current_platform from vllm.utils import has_deep_gemm from vllm.utils.deep_gemm import (fp8_gemm_nt, @@ -91,8 +91,7 @@ def test_w8a8_block_fp8_matmul(M, N, K, block_size, out_dtype, seed): ref_out = native_w8a8_block_matmul(A_fp8, B_fp8, As, Bs, block_size, out_dtype) - out = w8a8_triton_block_scaled_mm(A_fp8, B_fp8, As, Bs, block_size, - out_dtype) + out = w8a8_block_fp8_matmul(A_fp8, B_fp8, As, Bs, block_size, out_dtype) rel_diff = (torch.mean( torch.abs(out.to(torch.float32) - ref_out.to(torch.float32))) / diff --git a/tests/kernels/quantization/test_fp8_quant_group.py b/tests/kernels/quantization/test_fp8_quant_group.py index 3d4c851a9b88..720eee62760d 100644 --- a/tests/kernels/quantization/test_fp8_quant_group.py +++ b/tests/kernels/quantization/test_fp8_quant_group.py @@ -20,11 +20,9 @@ (8, 513, 64), # Non-divisible (native only) ]) @pytest.mark.parametrize("seed", [42]) -@pytest.mark.parametrize("use_ue8m0", [True, False]) @torch.inference_mode() def test_quantfp8_group_functionality(batch_size: int, hidden_dim: int, - group_size: int, seed: int, - use_ue8m0: bool) -> None: + group_size: int, seed: int) -> None: """Test QuantFP8 group quantization with various configurations. Tests both CUDA and native implementations, column-major scales, @@ -40,8 +38,7 @@ def test_quantfp8_group_functionality(batch_size: int, hidden_dim: int, group_shape = GroupShape(1, group_size) quant_op = QuantFP8(static=False, group_shape=group_shape, - column_major_scales=False, - use_ue8m0=use_ue8m0) + column_major_scales=False) # 1. Test native implementation (always available) x_quant_native, scales_native = quant_op.forward_native(x.clone()) @@ -51,15 +48,9 @@ def test_quantfp8_group_functionality(batch_size: int, hidden_dim: int, # 2. Test column-major scales configuration quant_op_col = QuantFP8(static=False, group_shape=group_shape, - column_major_scales=True, - use_ue8m0=use_ue8m0) + column_major_scales=True) _, scales_col = quant_op_col.forward_native(x.clone()) - assert scales_col.shape == (batch_size, expected_num_groups) - assert scales_col.stride(0) == 1 - assert scales_col.stride(1) == batch_size - - # Test column-major scales consistency - assert torch.allclose(scales_col, scales_native, rtol=1e-9, atol=1e-8) + assert scales_col.shape == (expected_num_groups, batch_size) # 3. Test CUDA implementation (only for divisible dimensions) if is_divisible: @@ -77,9 +68,8 @@ def test_quantfp8_group_functionality(batch_size: int, hidden_dim: int, @pytest.mark.parametrize("seed", [42]) -@pytest.mark.parametrize("use_ue8m0", [True, False]) @torch.inference_mode() -def test_quantfp8_group_multidimensional(seed: int, use_ue8m0: bool) -> None: +def test_quantfp8_group_multidimensional(seed: int) -> None: current_platform.seed_everything(seed) group_size = 64 @@ -92,8 +82,7 @@ def test_quantfp8_group_multidimensional(seed: int, use_ue8m0: bool) -> None: group_shape = GroupShape(1, group_size) quant_op = QuantFP8(static=False, group_shape=group_shape, - column_major_scales=False, - use_ue8m0=use_ue8m0) + column_major_scales=False) x_quant, scales = quant_op.forward_native(x_3d.clone()) assert x_quant.shape == x_3d.shape @@ -102,8 +91,7 @@ def test_quantfp8_group_multidimensional(seed: int, use_ue8m0: bool) -> None: # Test column_major_scales with multi-dim quant_op_col = QuantFP8(static=False, group_shape=group_shape, - column_major_scales=True, - use_ue8m0=use_ue8m0) + column_major_scales=True) _, scales_col = quant_op_col.forward_native(x_3d.clone()) assert scales_col.shape == (batch1, hidden_dim // group_size, batch2) diff --git a/tests/model_executor/test_enabled_custom_ops.py b/tests/model_executor/test_enabled_custom_ops.py index 200b6ecd5852..92ce10a9efc0 100644 --- a/tests/model_executor/test_enabled_custom_ops.py +++ b/tests/model_executor/test_enabled_custom_ops.py @@ -17,6 +17,8 @@ from vllm.model_executor.layers.layernorm import (RMSNorm, dispatch_rocm_rmsnorm_func, fused_add_rms_norm, rms_norm) +from vllm.model_executor.layers.quantization.utils.fp8_utils import ( + cutlass_scaled_mm, dispatch_w8a8_blockscale_func, w8a8_block_fp8_matmul) from vllm.platforms import current_platform RMS_NORM_SUPPORTED_DTYPES = [torch.float16, torch.bfloat16] @@ -109,6 +111,34 @@ def test_enabled_ops_invalid(env: str): RMSNorm(1024).enabled() +@pytest.mark.skipif( + not current_platform.is_rocm() or not current_platform.is_fp8_fnuz(), + reason="AITER is a feature exclusive for ROCm and FP8_FNUZ") +@pytest.mark.parametrize("use_cutlass", [True, False]) +@pytest.mark.parametrize("use_rocm_aiter", ["0", "1"]) +@pytest.mark.parametrize("use_rocm_aiter_gemm_w8a8_blockscale", ["0", "1"]) +def test_w8a8_blockscale_dispatch(use_cutlass: bool, use_rocm_aiter: str, + use_rocm_aiter_gemm_w8a8_blockscale: str, + monkeypatch): + + monkeypatch.setenv("VLLM_ROCM_USE_AITER", use_rocm_aiter) + monkeypatch.setenv("VLLM_ROCM_USE_AITER_LINEAR", + use_rocm_aiter_gemm_w8a8_blockscale) + + use_aiter_and_is_supported = (bool(int(use_rocm_aiter)) and bool( + int(use_rocm_aiter_gemm_w8a8_blockscale))) + block_scale_func = dispatch_w8a8_blockscale_func( + use_cutlass, use_aiter_and_is_supported=use_aiter_and_is_supported) + if use_cutlass: + assert block_scale_func == cutlass_scaled_mm + elif current_platform.is_rocm() and int(use_rocm_aiter) and int( + use_rocm_aiter_gemm_w8a8_blockscale): + assert block_scale_func == ( + torch.ops.vllm.rocm_aiter_gemm_w8a8_blockscale) + else: + assert block_scale_func == w8a8_block_fp8_matmul + + @pytest.mark.parametrize("use_rocm_aiter", ["0", "1"]) def test_topk_dispatch(use_rocm_aiter: str, monkeypatch): monkeypatch.setenv("VLLM_ROCM_USE_AITER", use_rocm_aiter) diff --git a/tests/quantization/test_compressed_tensors.py b/tests/quantization/test_compressed_tensors.py index af8c7ec3b482..c0ab3fbb1062 100644 --- a/tests/quantization/test_compressed_tensors.py +++ b/tests/quantization/test_compressed_tensors.py @@ -18,9 +18,6 @@ CompressedTensorsW4A16Fp4, CompressedTensorsW4A16Sparse24, CompressedTensorsW8A8Fp8, CompressedTensorsW8A8Int8, CompressedTensorsW8A16Fp8, CompressedTensorsWNA16) -from vllm.model_executor.layers.quantization.input_quant_fp8 import QuantFP8 -from vllm.model_executor.layers.quantization.utils.fp8_utils import ( - W8A8BlockFp8LinearOp) from vllm.model_executor.layers.quantization.utils.quant_utils import ( cutlass_fp4_supported) from vllm.model_executor.layers.quantization.utils.w8a8_utils import ( @@ -745,35 +742,3 @@ def test_compressed_tensors_transforms_perplexity(vllm_runner, model, prompt, perplexity = llm.generate_prompt_perplexity([prompt])[0] print(perplexity) assert perplexity <= exp_perplexity - - -def test_compressed_tensors_fp8_block_enabled(vllm_runner): - model_path = "RedHatAI/Qwen3-0.6B-FP8-BLOCK" - with vllm_runner(model_path) as llm: - - fp8_dtype = current_platform.fp8_dtype() - - def check_model(model): - layer = model.model.layers[0] - - qkv_proj = layer.self_attn.qkv_proj - assert isinstance(qkv_proj.quant_method, - CompressedTensorsLinearMethod) - assert isinstance(qkv_proj.scheme, CompressedTensorsW8A8Fp8) - assert isinstance(qkv_proj.scheme.w8a8_block_fp8_linear, - W8A8BlockFp8LinearOp) - - assert qkv_proj.weight.dtype is fp8_dtype - assert qkv_proj.weight_scale.dtype is torch.float32 - assert len(qkv_proj.weight.shape) == 2 - assert len(qkv_proj.weight_scale.shape) == 2 - - input_quant_op = \ - qkv_proj.scheme.w8a8_block_fp8_linear.input_quant_op - assert isinstance(input_quant_op, QuantFP8) - assert input_quant_op._forward_method == input_quant_op.forward_cuda - - llm.apply_model(check_model) - - output = llm.generate_greedy("Hello my name is", max_tokens=20) - assert output diff --git a/vllm/config/__init__.py b/vllm/config/__init__.py index df6564077e8a..bf2cb325a23d 100644 --- a/vllm/config/__init__.py +++ b/vllm/config/__init__.py @@ -545,23 +545,6 @@ def __post_init__(self): # local attention. self.scheduler_config.disable_hybrid_kv_cache_manager = True - def has_blocked_weights(): - if self.quant_config is not None: - if hasattr(self.quant_config, "weight_block_size"): - return self.quant_config.weight_block_size is not None - elif hasattr(self.quant_config, "has_blocked_weights"): - return self.quant_config.has_blocked_weights() - return False - - # Enable quant_fp8 CUDA ops (TODO disable in follow up) - # On H100 the CUDA kernel is faster than - # native implementation - # https://github.com/vllm-project/vllm/issues/25094 - if has_blocked_weights(): - custom_ops = self.compilation_config.custom_ops - if "none" not in custom_ops and "-quant_fp8" not in custom_ops: - custom_ops.append("+quant_fp8") - def update_sizes_for_sequence_parallelism(self, possible_sizes: list) -> list: # remove the sizes that not multiple of tp_size when diff --git a/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors.py b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors.py index 3f771ea2abd1..d6550dd16892 100644 --- a/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors.py +++ b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors.py @@ -644,14 +644,6 @@ def get_cache_scale(self, name: str) -> Optional[str]: # If no matches, return None return None - def has_blocked_weights(self) -> bool: - for scheme in self.target_scheme_map.values(): - weight_quant = scheme.get("weights") - if (weight_quant is not None - and weight_quant.strategy == QuantizationStrategy.BLOCK): - return True - return False - @staticmethod def supports_cutlass_24( weight_quant: Optional[QuantizationArgs], diff --git a/vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_fp8.py b/vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_fp8.py index fa0816959fcd..d42ae22c5139 100644 --- a/vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_fp8.py +++ b/vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_fp8.py @@ -11,7 +11,7 @@ from vllm.model_executor.layers.quantization.compressed_tensors.schemes import ( CompressedTensorsScheme) from vllm.model_executor.layers.quantization.utils.fp8_utils import ( - W8A8BlockFp8LinearOp, check_aiter_fp8_linear_support, + apply_fp8_block_linear, check_aiter_fp8_linear_support, create_fp8_input_scale, create_fp8_scale_parameter, create_fp8_weight_parameter, maybe_post_process_fp8_weight_block, process_fp8_weight_block_strategy, process_fp8_weight_channel_strategy, @@ -41,30 +41,16 @@ def __init__(self, weight_quant: QuantizationArgs, self.strategy = weight_quant.strategy self.out_dtype = torch.get_default_dtype() self.is_static_input_scheme = is_static_input_scheme + self.act_q_group_shape = GroupShape.PER_TENSOR \ + if is_static_input_scheme else GroupShape.PER_TOKEN + self.fp8_linear = Fp8LinearOp( + act_quant_static=self.is_static_input_scheme, + act_quant_group_shape=self.act_q_group_shape) self.weight_block_size = self.weight_quant.block_structure - if self.weight_block_size is not None: - self.act_q_group_shape = GroupShape(1, self.weight_block_size[0]) - else: - self.act_q_group_shape = GroupShape.PER_TENSOR \ - if is_static_input_scheme else GroupShape.PER_TOKEN - self.cutlass_block_fp8_supported = cutlass_block_fp8_supported() self.use_aiter_and_is_supported = check_aiter_fp8_linear_support() - if self.weight_block_size is not None: - assert not self.is_static_input_scheme - self.w8a8_block_fp8_linear = W8A8BlockFp8LinearOp( - weight_group_shape=GroupShape(*self.weight_block_size), - act_quant_group_shape=self.act_q_group_shape, - cutlass_block_fp8_supported=self.cutlass_block_fp8_supported, - use_aiter_and_is_supported=self.use_aiter_and_is_supported, - ) - else: - self.fp8_linear = Fp8LinearOp( - act_quant_static=self.is_static_input_scheme, - act_quant_group_shape=self.act_q_group_shape) - @classmethod def get_min_capability(cls) -> int: # lovelace and up @@ -155,14 +141,13 @@ def apply_weights(self, x: torch.Tensor, bias: Optional[torch.Tensor] = None) -> torch.Tensor: - if self.weight_block_size is not None: - return self.w8a8_block_fp8_linear.apply( + if layer.weight_block_size is not None: + return apply_fp8_block_linear( + layer, input=x, - weight=layer.weight, - weight_scale=layer.weight_scale, - input_scale=layer.input_scale, bias=bias, - ) + cutlass_block_fp8_supported=self.cutlass_block_fp8_supported, + use_aiter_and_is_supported=self.use_aiter_and_is_supported) return self.fp8_linear.apply(input=x, weight=layer.weight, diff --git a/vllm/model_executor/layers/quantization/deepgemm.py b/vllm/model_executor/layers/quantization/deepgemm.py index 8452f686b3ac..2236824ce910 100644 --- a/vllm/model_executor/layers/quantization/deepgemm.py +++ b/vllm/model_executor/layers/quantization/deepgemm.py @@ -42,7 +42,7 @@ def prepare_block_fp8_matmul_inputs( return M, N, K, C -def w8a8_deepgemm_block_scaled_mm( +def w8a8_block_fp8_matmul_deepgemm( A: torch.Tensor, B: torch.Tensor, As: torch.Tensor, @@ -58,7 +58,7 @@ def w8a8_deepgemm_block_scaled_mm( return C -def w8a8_deepgemm_block_scaled_mm_fake( +def w8a8_block_fp8_matmul_deepgemm_fake( A: torch.Tensor, B: torch.Tensor, As: torch.Tensor, @@ -72,7 +72,7 @@ def w8a8_deepgemm_block_scaled_mm_fake( direct_register_custom_op( - op_name="w8a8_deepgemm_block_scaled_mm", - op_func=w8a8_deepgemm_block_scaled_mm, - fake_impl=w8a8_deepgemm_block_scaled_mm_fake, + op_name="w8a8_block_fp8_matmul_deepgemm", + op_func=w8a8_block_fp8_matmul_deepgemm, + fake_impl=w8a8_block_fp8_matmul_deepgemm_fake, ) diff --git a/vllm/model_executor/layers/quantization/fp8.py b/vllm/model_executor/layers/quantization/fp8.py index 65f85b6ea829..fb448de3c234 100644 --- a/vllm/model_executor/layers/quantization/fp8.py +++ b/vllm/model_executor/layers/quantization/fp8.py @@ -33,7 +33,7 @@ register_moe_scaling_factors, rotate_flashinfer_fp8_moe_weights, select_cutlass_fp8_gemm_impl, swap_w13_to_w31) from vllm.model_executor.layers.quantization.utils.fp8_utils import ( - W8A8BlockFp8LinearOp, check_aiter_fp8_linear_support, + apply_fp8_block_linear, check_aiter_fp8_linear_support, create_fp8_input_scale, create_fp8_scale_parameter, create_fp8_weight_parameter, expert_weight_is_col_major, maybe_post_process_fp8_weight_block, process_fp8_weight_block_strategy, @@ -242,28 +242,15 @@ def __init__(self, quant_config: Fp8Config): self.weight_block_size = self.quant_config.weight_block_size self.block_quant = self.weight_block_size is not None self.act_q_static = self.quant_config.activation_scheme == "static" - if self.weight_block_size: - self.act_q_group_shape = GroupShape(1, self.weight_block_size[0]) + # Use per-token quantization for better perf if dynamic and cutlass + if not self.act_q_static and cutlass_fp8_supported(): + self.act_q_group_shape = GroupShape.PER_TOKEN else: - # Use per-token quantization for better perf if dynamic and cutlass - if not self.act_q_static and cutlass_fp8_supported(): - self.act_q_group_shape = GroupShape.PER_TOKEN - else: - self.act_q_group_shape = GroupShape.PER_TENSOR + self.act_q_group_shape = GroupShape.PER_TENSOR - if self.block_quant: - assert not self.act_q_static - assert self.weight_block_size is not None - self.w8a8_block_fp8_linear = W8A8BlockFp8LinearOp( - weight_group_shape=GroupShape(*self.weight_block_size), - act_quant_group_shape=self.act_q_group_shape, - cutlass_block_fp8_supported=self.cutlass_block_fp8_supported, - use_aiter_and_is_supported=self.use_aiter_and_is_supported, - ) - else: - self.fp8_linear = Fp8LinearOp( - act_quant_static=self.act_q_static, - act_quant_group_shape=self.act_q_group_shape) + self.fp8_linear = Fp8LinearOp( + act_quant_static=self.act_q_static, + act_quant_group_shape=self.act_q_group_shape) def create_weights( self, @@ -412,15 +399,12 @@ def apply(self, bias=bias) if self.block_quant: - assert self.weight_block_size is not None - - return self.w8a8_block_fp8_linear.apply( + return apply_fp8_block_linear( + layer, input=x, - weight=layer.weight, - weight_scale=layer.weight_scale, - input_scale=layer.input_scale, bias=bias, - ) + cutlass_block_fp8_supported=self.cutlass_block_fp8_supported, + use_aiter_and_is_supported=self.use_aiter_and_is_supported) return self.fp8_linear.apply(input=x, weight=layer.weight, diff --git a/vllm/model_executor/layers/quantization/input_quant_fp8.py b/vllm/model_executor/layers/quantization/input_quant_fp8.py index ece3e5817116..31182f40b48f 100644 --- a/vllm/model_executor/layers/quantization/input_quant_fp8.py +++ b/vllm/model_executor/layers/quantization/input_quant_fp8.py @@ -27,14 +27,11 @@ class QuantFP8(CustomOp): This CustomOp supports both static and dynamic quantization. """ - def __init__( - self, - static: bool, - group_shape: GroupShape, - num_token_padding: Optional[int] = None, - column_major_scales: bool = False, - use_ue8m0: Optional[bool] = None, # for Torch compile - ): + def __init__(self, + static: bool, + group_shape: GroupShape, + num_token_padding: Optional[int] = None, + column_major_scales: bool = False): """ :param static: static or dynamic quantization :param group_shape: quantization group shape (PER_TOKEN, PER_TENSOR, @@ -49,7 +46,6 @@ def __init__( self.group_shape = group_shape self.num_token_padding = num_token_padding self.column_major_scales = column_major_scales - self.use_ue8m0 = use_ue8m0 self.is_group_quant = group_shape.is_per_group() if self.is_group_quant: @@ -74,8 +70,7 @@ def forward_cuda( x, group_size=self.group_size, column_major_scales=self.column_major_scales, - dtype=_FP8_DTYPE, - use_ue8m0=self.use_ue8m0) + dtype=_FP8_DTYPE) assert (scale is not None) == self.static assert scale_ub is None or (not self.static and self.group_shape @@ -142,10 +137,7 @@ def _quantize_group_native( x_grouped = x.view(-1, num_groups, self.group_size) absmax = x_grouped.abs().max(dim=-1, keepdim=True)[0].float() - scales_raw = absmax / _FP8_MAX - if self.use_ue8m0: - scales_raw = torch.exp2(torch.ceil(torch.log2(scales_raw))) - scales = (scales_raw).clamp(min=_FP8_MIN_SCALING_FACTOR) + scales = (absmax / _FP8_MAX).clamp(min=_FP8_MIN_SCALING_FACTOR) x_scaled = x_grouped / scales x_quant = x_scaled.clamp(_FP8_MIN, _FP8_MAX).to(_FP8_DTYPE) @@ -159,6 +151,6 @@ def _quantize_group_native( scales = scales.reshape(orig_shape[:-1] + (num_groups, )) if self.column_major_scales: - scales = scales.transpose(-2, -1).contiguous().transpose(-1, -2) + scales = scales.transpose(-2, -1).contiguous() return x_quant, scales diff --git a/vllm/model_executor/layers/quantization/utils/fp8_utils.py b/vllm/model_executor/layers/quantization/utils/fp8_utils.py index 441bba6baacc..b32c67dec7ff 100644 --- a/vllm/model_executor/layers/quantization/utils/fp8_utils.py +++ b/vllm/model_executor/layers/quantization/utils/fp8_utils.py @@ -13,9 +13,8 @@ import vllm.envs as envs from vllm import _custom_ops as ops from vllm.logger import init_logger -from vllm.model_executor.layers.quantization.input_quant_fp8 import QuantFP8 from vllm.model_executor.layers.quantization.utils.quant_utils import ( - GroupShape, group_broadcast) + group_broadcast) from vllm.model_executor.layers.quantization.utils.w8a8_utils import ( CUTLASS_BLOCK_FP8_SUPPORTED) from vllm.model_executor.parameter import (BlockQuantScaleParameter, @@ -25,7 +24,6 @@ from vllm.triton_utils import tl, triton from vllm.utils import direct_register_custom_op from vllm.utils.deep_gemm import (is_deep_gemm_e8m0_used, - is_deep_gemm_supported, should_use_deepgemm_for_fp8_linear) logger = init_logger(__name__) @@ -37,8 +35,6 @@ def is_fp8(x: Union[torch.dtype, torch.Tensor]) -> bool: return x == torch.float8_e4m3fn or x == torch.float8_e4m3fnuz -# We need to pass in the is_hopper flag as argument because the function -# current_platform.is_device_capability() is not supported by Torch compiler. def cutlass_scaled_mm( A: torch.Tensor, B: torch.Tensor, @@ -46,17 +42,15 @@ def cutlass_scaled_mm( Bs: torch.Tensor, block_size: list[int], output_dtype: torch.dtype = torch.float16, - is_hopper: Optional[bool] = None, ) -> torch.Tensor: - if is_hopper is None: - is_hopper = current_platform.is_device_capability(90) return ops.cutlass_scaled_mm( A, B.T, out_dtype=output_dtype, scale_a=As, # SM90 block FP8 requires row-major scale_b, which we do ahead of time - scale_b=Bs if block_size is not None and is_hopper else Bs.T) + scale_b=Bs if block_size is not None + and current_platform.is_device_capability(90) else Bs.T) def rocm_aiter_gemm_w8a8_blockscale_impl( @@ -102,190 +96,122 @@ def rocm_aiter_gemm_w8a8_blockscale_fake( aiter_per1x128_quant = get_hip_quant(rocm_aiter.QuantType.per_1x128) -# TODO we should be able to change the type of block_size to GroupShape -# after we resolve GroupShape compilation issue -# https://github.com/vllm-project/vllm/issues/25270 -def _w8a8_triton_block_scaled_mm_func( - qx: torch.Tensor, - weight: torch.Tensor, - x_scale: torch.Tensor, - weight_scale: torch.Tensor, - block_size: list[int], - output_dtype: torch.dtype, -) -> torch.Tensor: - return w8a8_triton_block_scaled_mm(qx, weight, x_scale, weight_scale, - block_size, output_dtype) +def dispatch_w8a8_blockscale_func( + use_cutlass: bool, use_aiter_and_is_supported: bool +) -> Callable[[ + torch.Tensor, + torch.Tensor, + torch.Tensor, + torch.Tensor, + list[int], + torch.dtype, +], torch.Tensor]: + if use_cutlass: + return cutlass_scaled_mm + if (use_aiter_and_is_supported): + return torch.ops.vllm.rocm_aiter_gemm_w8a8_blockscale + return w8a8_block_fp8_matmul -def _w8a8_triton_block_scaled_mm_fake( - qx: torch.Tensor, +# TODO fix ROCm->Triton custom path: +# https://github.com/vllm-project/vllm/issues/14397 +def apply_w8a8_block_fp8_linear( + input: torch.Tensor, weight: torch.Tensor, - x_scale: torch.Tensor, - weight_scale: torch.Tensor, block_size: list[int], - output_dtype: torch.dtype, + weight_scale: torch.Tensor, + input_scale: Optional[torch.Tensor] = None, + bias: Optional[torch.Tensor] = None, + cutlass_block_fp8_supported: bool = CUTLASS_BLOCK_FP8_SUPPORTED, + use_aiter_and_is_supported: bool = False, ) -> torch.Tensor: - return torch.empty((qx.size(0), weight.size(0)), - dtype=output_dtype, - device=qx.device) + assert input_scale is None + # View input as 2D matrix for fp8 methods + input_2d = input.view(-1, input.shape[-1]) + output_shape = [*input.shape[:-1], weight.shape[0]] + output_dtype = input.dtype + if should_use_deepgemm_for_fp8_linear(output_dtype, weight): -# Note: the check can be removed when CPU torch > 2.7 -if not current_platform.is_cpu(): - direct_register_custom_op( - "w8a8_triton_block_scaled_mm_func", - _w8a8_triton_block_scaled_mm_func, - fake_impl=_w8a8_triton_block_scaled_mm_fake, - dispatch_key="CUDA", - ) - - -# TODO fix ROCm->Triton custom path: -# https://github.com/vllm-project/vllm/issues/14397 -class W8A8BlockFp8LinearOp: - """ - This class executes a Blocked FP8 linear layer using cutlass if supported - and torch.scaled_mm otherwise. - """ - - def __init__( - self, - weight_group_shape: GroupShape, - act_quant_group_shape: GroupShape, - cutlass_block_fp8_supported: bool = CUTLASS_BLOCK_FP8_SUPPORTED, - use_aiter_and_is_supported: bool = False, - ): - self.weight_group_shape = weight_group_shape - self.act_quant_group_shape = act_quant_group_shape - self.is_deep_gemm_supported = is_deep_gemm_supported() - self.is_hopper = current_platform.is_device_capability(90) - - # Get the correct blockscale mul and input quant operations. - # We can't use _dispatch_w8a8_blockscale_op to figure out if we want - # to use deepgemm because we don't know the shape of weights (and - # whether deepgemm supports it) at the init time. - self.w8a8_blockscale_op, self.input_quant_op = \ - self._dispatch_w8a8_blockscale_op( - cutlass_block_fp8_supported, use_aiter_and_is_supported) - self.deepgemm_input_quant_op = (QuantFP8( - False, - self.act_quant_group_shape, - column_major_scales=True, - use_ue8m0=is_deep_gemm_e8m0_used()) if self.is_deep_gemm_supported - else None) - - def apply( - self, - input: torch.Tensor, - weight: torch.Tensor, - weight_scale: torch.Tensor, - input_scale: Optional[torch.Tensor] = None, - bias: Optional[torch.Tensor] = None, - ) -> torch.Tensor: - assert input_scale is None - # View input as 2D matrix for fp8 methods input_2d = input.view(-1, input.shape[-1]) output_shape = [*input.shape[:-1], weight.shape[0]] - output_dtype = input.dtype - if should_use_deepgemm_for_fp8_linear(output_dtype, weight, - self.is_deep_gemm_supported): - output = self._run_deepgemm(input, weight, weight_scale) - if bias is not None: - output = output + bias - return output.to(dtype=input.dtype).view(*output_shape) - - output = self.w8a8_blockscale_op(input_2d, weight, weight_scale) - if bias is not None: - output = output + bias - return output.to(dtype=input.dtype).view(*output_shape) + q_input, x_scale = per_token_group_quant_fp8( + input_2d, + block_size[1], + column_major_scales=True, + ) - def _run_deepgemm( - self, - input_2d: torch.Tensor, - weight: torch.Tensor, - weight_scale: torch.Tensor, - ) -> torch.Tensor: # ensure DeepGEMM-backed custom op is registered before use import vllm.model_executor.layers.quantization.deepgemm # noqa: F401 - assert self.deepgemm_input_quant_op is not None - q_input, x_scale = self.deepgemm_input_quant_op(input_2d) - return torch.ops.vllm.w8a8_deepgemm_block_scaled_mm( + output = torch.ops.vllm.w8a8_block_fp8_matmul_deepgemm( q_input, weight, x_scale, weight_scale, - self.weight_group_shape, - output_dtype=input_2d.dtype) - - def _run_cutlass( - self, - input_2d: torch.Tensor, - weight: torch.Tensor, - weight_scale: torch.Tensor, - ) -> torch.Tensor: - assert self.input_quant_op is not None - if self.is_hopper: - # We pad unconditionally (even if shape is already divisible by 4) - # to support dynamic shape for input_2d.shape[0] in torch.compile - x = torch.nn.functional.pad(input_2d, - (0, 0, 0, -input_2d.shape[0] % 4)) - else: - x = input_2d - - q_input, x_scale = self.input_quant_op(x) - output = cutlass_scaled_mm(q_input, weight, x_scale, weight_scale, - list(self.weight_group_shape), - input_2d.dtype, self.is_hopper) - output = output[0:input_2d.shape[0], ...] - return output - - def _run_aiter( - self, - input_2d: torch.Tensor, - weight: torch.Tensor, - weight_scale: torch.Tensor, - ) -> torch.Tensor: - assert self.act_quant_group_shape == GroupShape(1, 128) - q_input, x_scale = aiter_per1x128_quant( - input_2d.contiguous(), quant_dtype=rocm_aiter.dtypes.fp8) - return torch.ops.vllm.rocm_aiter_gemm_w8a8_blockscale( - q_input, weight, x_scale, weight_scale, self.weight_group_shape, - input_2d.dtype) - - def _run_triton( - self, - input_2d: torch.Tensor, - weight: torch.Tensor, - weight_scale: torch.Tensor, - ) -> torch.Tensor: - assert self.input_quant_op is not None - q_input, x_scale = self.input_quant_op(input_2d) - return torch.ops.vllm.w8a8_triton_block_scaled_mm_func( - q_input, weight, x_scale, weight_scale, self.weight_group_shape, - input_2d.dtype) - - def _dispatch_w8a8_blockscale_op( - self, - use_cutlass: bool, - use_aiter_and_is_supported: bool, - ) -> tuple[Callable[[ - torch.Tensor, - torch.Tensor, - torch.Tensor, - ], torch.Tensor], Optional[QuantFP8]]: - if use_cutlass: - return self._run_cutlass, (QuantFP8(False, - self.act_quant_group_shape, - column_major_scales=True, - use_ue8m0=False)) + block_size, + output_dtype=output_dtype) + if bias is not None: + output += bias + return output.to(dtype=output_dtype).view(*output_shape) + + w8a8_blockscale_func = dispatch_w8a8_blockscale_func( + cutlass_block_fp8_supported, use_aiter_and_is_supported) + if cutlass_block_fp8_supported: + num_pad = 0 + if current_platform.is_device_capability(90): + # pad first dimension to be divisible by 4 due to + # cutlass blockwise gemm limitation for hopper + num_pad = 4 - (input_2d.shape[0] % 4) + if num_pad > 0: + input_2d = torch.nn.functional.pad(input_2d, + (0, 0, 0, num_pad), + "constant", 0) + q_input, x_scale = per_token_group_quant_fp8(input_2d, + block_size[1], + column_major_scales=True) + output = w8a8_blockscale_func(q_input, weight, x_scale, weight_scale, + block_size, input.dtype) + if num_pad > 0: + output = output[:-num_pad] + else: if use_aiter_and_is_supported: - return self._run_aiter, None - return self._run_triton, (QuantFP8(False, - self.act_quant_group_shape, - column_major_scales=False, - use_ue8m0=False)) + q_input, x_scale = aiter_per1x128_quant( + input_2d.contiguous(), quant_dtype=rocm_aiter.dtypes.fp8) + else: + q_input, x_scale = per_token_group_quant_fp8( + input_2d, block_size[1], column_major_scales=False) + + output = w8a8_blockscale_func(q_input, weight, x_scale, weight_scale, + block_size, input.dtype) + + if bias is not None: + output = output + bias + return output.to(dtype=input.dtype).view(*output_shape) + + +def apply_w8a8_block_fp8_linear_fake( + input: torch.Tensor, + weight: torch.Tensor, + block_size: list[int], + weight_scale: torch.Tensor, + input_scale: Optional[torch.Tensor] = None, + bias: Optional[torch.Tensor] = None, + cutlass_block_fp8_supported: bool = CUTLASS_BLOCK_FP8_SUPPORTED, + use_aiter_and_is_supported: bool = False, +) -> torch.Tensor: + output_shape = [*input.shape[:-1], weight.shape[0]] + return torch.empty(output_shape, dtype=input.dtype, device=input.device) + + +if not current_platform.is_cpu(): + direct_register_custom_op( + op_name="apply_w8a8_block_fp8_linear", + op_func=apply_w8a8_block_fp8_linear, + mutates_args=[], + fake_impl=apply_w8a8_block_fp8_linear_fake, + ) def input_to_float8( @@ -537,7 +463,7 @@ def per_token_group_quant_fp8( @triton.jit -def _w8a8_triton_block_scaled_mm( +def _w8a8_block_fp8_matmul( # Pointers to inputs and output A, B, @@ -662,7 +588,7 @@ def get_w8a8_block_fp8_configs(N: int, K: int, block_n: int, return None -def w8a8_triton_block_scaled_mm( +def w8a8_block_fp8_matmul( A: torch.Tensor, B: torch.Tensor, As: torch.Tensor, @@ -722,7 +648,7 @@ def grid(META): return (triton.cdiv(M, META["BLOCK_SIZE_M"]) * triton.cdiv(N, META["BLOCK_SIZE_N"]), ) - _w8a8_triton_block_scaled_mm[grid]( + _w8a8_block_fp8_matmul[grid]( A, B, C, @@ -1005,6 +931,25 @@ def maybe_post_process_fp8_weight_block(layer: torch.nn.Module, layer.weight_scale.data.T.contiguous(), requires_grad=False) +def apply_fp8_block_linear(layer: torch.nn.Module, input: torch.Tensor, + bias: Optional[torch.Tensor], + cutlass_block_fp8_supported: bool, + use_aiter_and_is_supported: bool) -> torch.Tensor: + """Apply block-wise FP8 linear operation.""" + assert layer.weight_block_size is not None + + return torch.ops.vllm.apply_w8a8_block_fp8_linear( + input=input, + weight=layer.weight, + block_size=layer.weight_block_size, + weight_scale=layer.weight_scale, + input_scale=layer.input_scale, + bias=bias, + cutlass_block_fp8_supported=cutlass_block_fp8_supported, + use_aiter_and_is_supported=use_aiter_and_is_supported, + ) + + def expert_weight_is_col_major(x: torch.Tensor) -> bool: assert x.dim() == 3 b, m, n = x.shape diff --git a/vllm/utils/deep_gemm.py b/vllm/utils/deep_gemm.py index 979c10f2c3e9..f955beb92b36 100644 --- a/vllm/utils/deep_gemm.py +++ b/vllm/utils/deep_gemm.py @@ -9,7 +9,7 @@ import functools import importlib import os -from typing import Any, Callable, NoReturn, Optional +from typing import Any, Callable, NoReturn import torch @@ -184,13 +184,9 @@ def calc_diff(x: torch.Tensor, y: torch.Tensor): return 1 - sim -def should_use_deepgemm_for_fp8_linear( - output_dtype: torch.dtype, - weight: torch.Tensor, - supports_deep_gemm: Optional[bool] = None): - if supports_deep_gemm is None: - supports_deep_gemm = is_deep_gemm_supported() - return (supports_deep_gemm and output_dtype == torch.bfloat16 +def should_use_deepgemm_for_fp8_linear(output_dtype: torch.dtype, + weight: torch.Tensor): + return (is_deep_gemm_supported() and output_dtype == torch.bfloat16 and weight.shape[0] % 128 == 0 and weight.shape[1] % 128 == 0) From 393de22d2ebf0ba397df5ef0bb9b7b9111a3d190 Mon Sep 17 00:00:00 2001 From: Fadi Arafeh <115173828+fadara01@users.noreply.github.com> Date: Thu, 25 Sep 2025 10:39:18 +0100 Subject: [PATCH 365/518] [fix] Update torch version in cpu-build.txt for AArch64/ppc64le and Darwin (#25579) Signed-off-by: Fadi Arafeh --- requirements/cpu-build.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/requirements/cpu-build.txt b/requirements/cpu-build.txt index 37f072202bd7..118558942f76 100644 --- a/requirements/cpu-build.txt +++ b/requirements/cpu-build.txt @@ -6,7 +6,8 @@ packaging>=24.2 setuptools>=77.0.3,<80.0.0 setuptools-scm>=8 --extra-index-url https://download.pytorch.org/whl/cpu -torch==2.6.0+cpu +torch==2.6.0+cpu; platform_machine == "x86_64" # torch>2.6.0+cpu has performance regression on x86 platform, see https://github.com/pytorch/pytorch/pull/151218 +torch==2.8.0; platform_machine == "ppc64le" or platform_machine == "aarch64" or platform_system == "Darwin" wheel jinja2>=3.1.6 regex From 7be9ffcd9f5c1a93705caac7d556794dd2608099 Mon Sep 17 00:00:00 2001 From: Roger Wang Date: Thu, 25 Sep 2025 03:16:45 -0700 Subject: [PATCH 366/518] [Misc] Fix Qwen3-VL `video_grid_thw` typing (#25646) Signed-off-by: Roger Wang --- vllm/model_executor/models/qwen3_vl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/model_executor/models/qwen3_vl.py b/vllm/model_executor/models/qwen3_vl.py index d4f1547fd8e5..4ea93d3fdf41 100644 --- a/vllm/model_executor/models/qwen3_vl.py +++ b/vllm/model_executor/models/qwen3_vl.py @@ -1249,7 +1249,7 @@ def _process_video_input( rope_type="rope_3d") else: video_embeds = self.visual(pixel_values_videos, - grid_thw=grid_thw) + grid_thw=grid_thw_list) # Split concatenated embeddings for each video item. # Using prod on grid_thw_list instead of grid_thw.prod avoids CUDA sync From 3c2b2ccece425d459fab1e34ed0056b8be7eae0c Mon Sep 17 00:00:00 2001 From: Agata Dobrzyniewicz <160237065+adobrzyn@users.noreply.github.com> Date: Thu, 25 Sep 2025 12:31:14 +0200 Subject: [PATCH 367/518] [Bugfix] Add triton.language.tensor placeholder (#25649) Signed-off-by: Agata Dobrzyniewicz --- tests/test_triton_utils.py | 2 ++ vllm/triton_utils/importing.py | 1 + 2 files changed, 3 insertions(+) diff --git a/tests/test_triton_utils.py b/tests/test_triton_utils.py index 64f72668f29c..ebb69e627e95 100644 --- a/tests/test_triton_utils.py +++ b/tests/test_triton_utils.py @@ -69,6 +69,8 @@ def test_triton_placeholder_language(): assert lang.constexpr is None assert lang.dtype is None assert lang.int64 is None + assert lang.int32 is None + assert lang.tensor is None def test_triton_placeholder_language_from_parent(): diff --git a/vllm/triton_utils/importing.py b/vllm/triton_utils/importing.py index 2a06a9b7d11e..95076a9a7c8f 100644 --- a/vllm/triton_utils/importing.py +++ b/vllm/triton_utils/importing.py @@ -93,3 +93,4 @@ def __init__(self): self.dtype = None self.int64 = None self.int32 = None + self.tensor = None From 17b4c6685ce62d5652654784d6771a3d38e4273e Mon Sep 17 00:00:00 2001 From: Isotr0py Date: Thu, 25 Sep 2025 18:36:01 +0800 Subject: [PATCH 368/518] [Bugfix] Fix Qwen3-VL max_num_video_tokens calculation for video profiling (#25648) Signed-off-by: Isotr0py --- vllm/model_executor/models/qwen2_vl.py | 2 +- vllm/model_executor/models/qwen3_vl.py | 12 ++++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/vllm/model_executor/models/qwen2_vl.py b/vllm/model_executor/models/qwen2_vl.py index 472e8b061a9e..14ea03444484 100644 --- a/vllm/model_executor/models/qwen2_vl.py +++ b/vllm/model_executor/models/qwen2_vl.py @@ -82,7 +82,7 @@ logger = init_logger(__name__) # For profile run -_MAX_FRAMES_PER_VIDEO = 600 +_MAX_FRAMES_PER_VIDEO = 32 # === Vision Inputs === # diff --git a/vllm/model_executor/models/qwen3_vl.py b/vllm/model_executor/models/qwen3_vl.py index 4ea93d3fdf41..ede477cde1a2 100644 --- a/vllm/model_executor/models/qwen3_vl.py +++ b/vllm/model_executor/models/qwen3_vl.py @@ -715,6 +715,18 @@ def _get_dummy_videos( video_items.append(video_item) return video_items + def get_dummy_processor_inputs(self, seq_len, mm_counts): + processor_inputs = super().get_dummy_processor_inputs( + seq_len, mm_counts) + # HACK(Isotr0py): We set do_resize to False here to reuse Qwen2-VL's + # profiling logic, which will be problematic for configurable mm + # profiling. + # TODO(Isotr0py): Switch to the implementation in + # https://github.com/vllm-project/vllm/pull/25557 + # after supporting configurable mm profiling. + processor_inputs.hf_processor_mm_kwargs = {"do_resize": False} + return processor_inputs + class Qwen3VLMultiModalProcessor(BaseMultiModalProcessor[Qwen3VLProcessingInfo] ): From 12c1287d6418d9a97155bb036b9e79cd3aff803b Mon Sep 17 00:00:00 2001 From: Cyrus Leung Date: Thu, 25 Sep 2025 18:57:36 +0800 Subject: [PATCH 369/518] [mypy] Further improve MM type annotations (#25654) Signed-off-by: DarkLight1337 --- vllm/model_executor/models/transformers.py | 7 +++- vllm/multimodal/inputs.py | 23 +++++++---- vllm/multimodal/processing.py | 19 +++++----- vllm/multimodal/profiling.py | 6 +-- vllm/multimodal/utils.py | 44 ++++++++++++---------- vllm/utils/jsontree.py | 39 ++++++++++++++++--- 6 files changed, 90 insertions(+), 48 deletions(-) diff --git a/vllm/model_executor/models/transformers.py b/vllm/model_executor/models/transformers.py index 475a68bc642b..19dd242f16eb 100644 --- a/vllm/model_executor/models/transformers.py +++ b/vllm/model_executor/models/transformers.py @@ -415,9 +415,12 @@ def apply( self._get_mm_fields_config(processed_data, hf_processor_mm_kwargs, num_image_patches), ) + # Use overrides if provided; fallback to data-dependent hashing. - mm_hashes = (mm_uuids if mm_uuids is not None else self._hash_mm_items( - mm_items, hf_processor_mm_kwargs, tokenization_kwargs)) + mm_hashes = self._hash_mm_items(mm_items, + hf_processor_mm_kwargs, + tokenization_kwargs, + mm_uuids=mm_uuids) return MultiModalInputs( type="multimodal", diff --git a/vllm/multimodal/inputs.py b/vllm/multimodal/inputs.py index e00c10fb66ee..3539517ed45e 100644 --- a/vllm/multimodal/inputs.py +++ b/vllm/multimodal/inputs.py @@ -14,7 +14,7 @@ from typing_extensions import NotRequired, TypeAlias, TypeVar, deprecated from vllm.utils import LazyLoader, full_groupby, is_list_of -from vllm.utils.jsontree import JSONTree, json_map_leaves +from vllm.utils.jsontree import json_map_leaves if TYPE_CHECKING: import torch @@ -203,7 +203,7 @@ def nested_tensors_equal(a: NestedTensors, b: NestedTensors) -> bool: return a == b -BatchedTensorInputs: TypeAlias = Mapping[str, NestedTensors] +BatchedTensorInputs: TypeAlias = dict[str, NestedTensors] """ A dictionary containing nested tensors which have been batched via [`MultiModalKwargs.batch`][vllm.multimodal.inputs.MultiModalKwargs.batch]. @@ -377,6 +377,7 @@ def _reduce_data( pin_memory: bool, ) -> NestedTensors: if len(batch) > 0 and is_list_of(batch, torch.Tensor, check="all"): + batch = cast(list[torch.Tensor], batch) if len(batch) == 1: # An optimization when `batch` contains only one tensor: # - produce exactly same result as `torch.stack(batch)` @@ -422,6 +423,7 @@ def _reduce_data( pin_memory: bool, ) -> NestedTensors: if len(batch) > 0 and is_list_of(batch, torch.Tensor, check="all"): + batch = cast(list[torch.Tensor], batch) if len(batch) == 1: # An optimization when `batch` contains only one tensor: # - produce exactly same result as `torch.concat(batch)` @@ -764,6 +766,15 @@ def __getitem__(self, modality: str) -> Sequence[_I]: return super().__getitem__(modality) # type: ignore[return-value] + def require_data(self) -> "MultiModalKwargsItems[MultiModalKwargsItem]": + for modality, items in self.items(): + for i, item in enumerate(items): + if item is None: + raise RuntimeError( + f"Found empty mm_items[{modality}][{i}]") + + return self # type: ignore[return-value] + def get_data(self, *, pin_memory: bool = False) -> "MultiModalKwargs": elems_by_key = defaultdict[str, list[MultiModalFieldElem]](list) for modality, items in self.items(): @@ -897,15 +908,11 @@ def as_kwargs( *, device: torch.types.Device, ) -> BatchedTensorInputs: - json_inputs = cast(JSONTree[torch.Tensor], batched_inputs) - - json_mapped = json_map_leaves( + return json_map_leaves( lambda x: x.to(device=device, non_blocking=True), - json_inputs, + batched_inputs, ) - return cast(BatchedTensorInputs, json_mapped) - def __getitem__(self, key: str): if key not in self: raise KeyError(f"Keyword argument {key!r} not found. " diff --git a/vllm/multimodal/processing.py b/vllm/multimodal/processing.py index 78e2cb7fa733..ce671479b1ae 100644 --- a/vllm/multimodal/processing.py +++ b/vllm/multimodal/processing.py @@ -1585,7 +1585,7 @@ def _hash_mm_items( *, mm_uuids: Optional[MultiModalUUIDDict] = None, ) -> MultiModalHashes: - """Create MM hashes to be returned (only used in V1). + """Create MM hashes to be returned. Note: When overrides are provided via callers of `apply`, @@ -2098,23 +2098,22 @@ def _get_enc_dec_inputs( encoder_inputs: MultiModalInputs, ): tokenizer = self.info.get_tokenizer() - decoder_prompt = self.create_decoder_prompt(prompt, mm_data) - if isinstance(decoder_prompt, str): + decoder_prompt_raw = self.create_decoder_prompt(prompt, mm_data) + if isinstance(decoder_prompt_raw, str): + decoder_prompt = decoder_prompt_raw decoder_prompt_ids = encode_tokens(tokenizer, - decoder_prompt, + decoder_prompt_raw, add_special_tokens=False) else: - decoder_prompt_ids = decoder_prompt - decoder_prompt = decode_tokens(tokenizer, decoder_prompt) + decoder_prompt = decode_tokens(tokenizer, decoder_prompt_raw) + decoder_prompt_ids = decoder_prompt_raw mm_inputs = MultiModalEncDecInputs( encoder_prompt=encoder_inputs["prompt"], encoder_prompt_token_ids=encoder_inputs["prompt_token_ids"], **encoder_inputs) - mm_inputs.update({ - "prompt": decoder_prompt, - "prompt_token_ids": decoder_prompt_ids - }) + mm_inputs["prompt"] = decoder_prompt + mm_inputs["prompt_token_ids"] = decoder_prompt_ids return mm_inputs def apply( diff --git a/vllm/multimodal/profiling.py b/vllm/multimodal/profiling.py index 9b463e212bb4..26c5d188964c 100644 --- a/vllm/multimodal/profiling.py +++ b/vllm/multimodal/profiling.py @@ -13,7 +13,7 @@ from vllm.logger import init_logger from .inputs import (MultiModalDataDict, MultiModalEncDecInputs, - MultiModalInputs, MultiModalKwargsOptionalItems, + MultiModalInputs, MultiModalKwargsItems, MultiModalPlaceholderDict) from .processing import (BaseMultiModalProcessor, BaseProcessingInfo, EncDecMultiModalProcessor) @@ -43,7 +43,7 @@ class DummyDecoderData(NamedTuple): """Dummy data used for profiling.""" prompt_token_ids: list[int] - multi_modal_data: MultiModalKwargsOptionalItems + multi_modal_data: MultiModalKwargsItems multi_modal_placeholders: MultiModalPlaceholderDict @@ -239,7 +239,7 @@ def get_decoder_dummy_data( return DummyDecoderData( prompt_token_ids=prompt_token_ids, - multi_modal_data=mm_inputs["mm_kwargs"], + multi_modal_data=mm_inputs["mm_kwargs"].require_data(), multi_modal_placeholders=mm_inputs["mm_placeholders"], ) diff --git a/vllm/multimodal/utils.py b/vllm/multimodal/utils.py index 0f8aeceb3944..9b158267040a 100644 --- a/vllm/multimodal/utils.py +++ b/vllm/multimodal/utils.py @@ -19,6 +19,7 @@ import vllm.envs as envs from vllm.connections import HTTPConnection, global_http_connection +from vllm.utils.jsontree import json_map_leaves from .audio import AudioMediaIO from .base import MediaIO @@ -383,6 +384,7 @@ def group_mm_kwargs_by_modality( *, device: torch.types.Device = None, pin_memory: bool = False, + merge_by_field_config: bool = False, ) -> Iterable[tuple[str, int, BatchedTensorInputs]]: """Group consecutive `MultiModalKwargsItem`s from `mm_kwargs` with the same modality together into the same `MultiModalKwargs` instance. @@ -400,29 +402,31 @@ def group_mm_kwargs_by_modality( for modality, items in groupby(mm_kwargs, key=lambda item: item.modality): items_lst = list(items) - # mm_kwargs_group = MultiModalKwargsItems.from_items(items_lst) \ - # .get_data(pin_memory=pin_memory) - - # if device is not None: - # mm_kwargs_group = json_map_leaves( - # lambda x: x.to(device=device), - # mm_kwargs_group, - # ) - - # TODO: Once V0 is removed, we can use the merging logic above + # TODO: Enable `merge_by_field_config` for all models # to avoid creating an extra batch dimension (except for fields # that are meant to be stacked anyway). # We will also need to update each model to remove `flatten_bn`. - mm_kwargs_group = MultiModalKwargs.as_kwargs( - MultiModalKwargs.batch( - [ - MultiModalKwargsItems.from_seq([item]).get_data() - for item in items_lst - ], - pin_memory=pin_memory, - ), - device=device, - ) + if merge_by_field_config: + mm_kwargs_group: BatchedTensorInputs = dict( + MultiModalKwargsItems.from_seq(items_lst).get_data( + pin_memory=pin_memory)) + + if device is not None: + mm_kwargs_group = json_map_leaves( + lambda x: x.to(device=device), + mm_kwargs_group, + ) + else: + mm_kwargs_group = MultiModalKwargs.as_kwargs( + MultiModalKwargs.batch( + [ + MultiModalKwargsItems.from_seq([item]).get_data() + for item in items_lst + ], + pin_memory=pin_memory, + ), + device=device, + ) yield modality, len(items_lst), mm_kwargs_group diff --git a/vllm/utils/jsontree.py b/vllm/utils/jsontree.py index 804c443eb184..7eb58b5f5cf8 100644 --- a/vllm/utils/jsontree.py +++ b/vllm/utils/jsontree.py @@ -4,7 +4,12 @@ from collections.abc import Iterable from functools import reduce -from typing import Callable, TypeVar, Union, cast, overload +from typing import TYPE_CHECKING, Callable, TypeVar, Union, cast, overload + +if TYPE_CHECKING: + import torch + + from vllm.multimodal.inputs import BatchedTensorInputs _T = TypeVar("_T") _U = TypeVar("_U") @@ -17,6 +22,19 @@ ] """A nested JSON structure where the leaves need not be JSON-serializable.""" +_JSONTree = Union[ + dict[str, "JSONTree[_T]"], + list["JSONTree[_T]"], + tuple["JSONTree[_T]", ...], + dict[str, _T], + list[_T], + tuple[_T, ...], + _T, +] +""" +Same as `JSONTree` but with additional `Union` members to satisfy overloads. +""" + def json_iter_leaves(value: JSONTree[_T]) -> Iterable[_T]: """Iterate through each leaf in a nested JSON structure.""" @@ -30,6 +48,14 @@ def json_iter_leaves(value: JSONTree[_T]) -> Iterable[_T]: yield value +@overload +def json_map_leaves( + func: Callable[["torch.Tensor"], "torch.Tensor"], + value: "BatchedTensorInputs", +) -> "BatchedTensorInputs": + ... + + @overload def json_map_leaves( func: Callable[[_T], _U], @@ -64,11 +90,14 @@ def json_map_leaves( def json_map_leaves( func: Callable[[_T], _U], - value: Union[dict[str, _T], list[_T], tuple[_T, ...], JSONTree[_T]], -) -> Union[dict[str, _U], list[_U], tuple[_U, ...], JSONTree[_U]]: + value: Union["BatchedTensorInputs", _JSONTree[_T]], +) -> Union["BatchedTensorInputs", _JSONTree[_U]]: """Apply a function to each leaf in a nested JSON structure.""" if isinstance(value, dict): - return {k: json_map_leaves(func, v) for k, v in value.items()} + return { + k: json_map_leaves(func, v) # type: ignore[arg-type] + for k, v in value.items() + } elif isinstance(value, list): return [json_map_leaves(func, v) for v in value] elif isinstance(value, tuple): @@ -125,7 +154,7 @@ def json_reduce_leaves( def json_reduce_leaves( func: Callable[..., Union[_T, _U]], - value: Union[dict[str, _T], list[_T], tuple[_T, ...], JSONTree[_T]], + value: _JSONTree[_T], initial: _U = cast(_U, ...), # noqa: B008 /, ) -> Union[_T, _U]: From eaeca3cd7ff7dc5e7d28361020706a2ab7c58233 Mon Sep 17 00:00:00 2001 From: yyzxw <34639446+yyzxw@users.noreply.github.com> Date: Thu, 25 Sep 2025 19:09:39 +0800 Subject: [PATCH 370/518] [Bugfix] Parse SpeculativeConfig Error (#25142) Signed-off-by: zxw <1020938856@qq.com> Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- vllm/engine/arg_utils.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index d3e23890469e..3d48d2a0b22d 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -1465,12 +1465,18 @@ def _is_v1_supported_oracle(self, model_config: ModelConfig) -> bool: return False # V1 supports N-gram, Medusa, and Eagle speculative decoding. - if (self.speculative_config is not None - and self.speculative_config.get("method") == "draft_model"): - raise NotImplementedError( - "Speculative decoding with draft model is not supported yet. " - "Please consider using other speculative decoding methods " - "such as ngram, medusa, eagle, or deepseek_mtp.") + if self.speculative_config is not None: + # speculative_config could still be a dict at this point + if isinstance(self.speculative_config, dict): + method = self.speculative_config.get("method", None) + else: + method = self.speculative_config.method + + if method == "draft_model": + raise NotImplementedError( + "Draft model speculative decoding is not supported yet. " + "Please consider using other speculative decoding methods " + "such as ngram, medusa, eagle, or deepseek_mtp.") V1_BACKENDS = [ "FLASH_ATTN_VLLM_V1", From 7f570f1caa472dcb4143488d9406eb339f87e359 Mon Sep 17 00:00:00 2001 From: "wang.yuqi" Date: Thu, 25 Sep 2025 19:26:31 +0800 Subject: [PATCH 371/518] [V0 deprecation] Remove unreachable model_config.supported_tasks (#25642) Signed-off-by: wang.yuqi --- tests/test_config.py | 3 -- vllm/config/model.py | 61 --------------------------- vllm/engine/protocol.py | 5 +++ vllm/entrypoints/openai/api_server.py | 6 +-- vllm/entrypoints/openai/run_batch.py | 8 +--- 5 files changed, 7 insertions(+), 76 deletions(-) diff --git a/tests/test_config.py b/tests/test_config.py index 9e2bfb9e1b0e..90d0c78c451f 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -97,7 +97,6 @@ def test_auto_task(model_id, expected_runner_type, expected_convert_type, assert config.runner_type == expected_runner_type assert config.convert_type == expected_convert_type - assert expected_task in config.supported_tasks # Can remove once --task option is fully deprecated @@ -120,7 +119,6 @@ def test_score_task(model_id, expected_runner_type, expected_convert_type, assert config.runner_type == expected_runner_type assert config.convert_type == expected_convert_type - assert expected_task in config.supported_tasks # Can remove once --task option is fully deprecated @@ -137,7 +135,6 @@ def test_transcription_task(model_id, expected_runner_type, assert config.runner_type == expected_runner_type assert config.convert_type == expected_convert_type - assert expected_task in config.supported_tasks @pytest.mark.parametrize( diff --git a/vllm/config/model.py b/vllm/config/model.py index 1e0e4d8b3551..0ded70388b8a 100644 --- a/vllm/config/model.py +++ b/vllm/config/model.py @@ -14,7 +14,6 @@ model_validator) from pydantic.dataclasses import dataclass from safetensors.torch import _TYPES as _SAFETENSORS_TO_TORCH_DTYPE -from typing_extensions import assert_never import vllm.envs as envs from vllm.config.multimodal import (MMCacheType, MMEncoderTPMode, @@ -534,9 +533,6 @@ def _task_to_convert(task: TaskOption) -> ConvertType: f"You can pass `--convert {convert_option} to adapt " "it into a pooling model.") - self.supported_tasks = self._get_supported_tasks( - architectures, self.runner_type, self.convert_type) - # Note: Initialize these attributes early because transformers fallback # may fail to load dynamic modules in child processes model_info, arch = registry.inspect_model_cls(architectures, self) @@ -834,27 +830,6 @@ def _get_convert_type( return convert_type - def _get_supported_generation_tasks( - self, - architectures: list[str], - convert_type: ConvertType, - ) -> list[_ResolvedTask]: - registry = self.registry - - if registry.is_transcription_only_model(architectures, self): - return ["transcription"] - - # TODO: Use get_supported_generation_tasks once V0 is removed - supported_tasks = list[_ResolvedTask]() - if (registry.is_text_generation_model(architectures, self) - or convert_type in _RUNNER_CONVERTS["generate"]): - supported_tasks.append("generate") - - if registry.is_transcription_model(architectures, self): - supported_tasks.append("transcription") - - return supported_tasks - def _get_default_pooling_task( self, architectures: list[str], @@ -872,42 +847,6 @@ def _get_default_pooling_task( return "embed" - def _get_supported_pooling_tasks( - self, - architectures: list[str], - convert_type: ConvertType, - ) -> list[_ResolvedTask]: - registry = self.registry - - # TODO: Use get_supported_pooling_tasks once V0 is removed - supported_tasks = list[_ResolvedTask]() - if (registry.is_pooling_model(architectures, self) - or convert_type in _RUNNER_CONVERTS["pooling"]): - supported_tasks.append("encode") - - extra_task = (self._get_default_pooling_task(architectures) - if convert_type == "none" else convert_type) - supported_tasks.append(extra_task) - - return supported_tasks - - def _get_supported_tasks( - self, - architectures: list[str], - runner_type: RunnerType, - convert_type: ConvertType, - ) -> list[_ResolvedTask]: - if runner_type == "generate": - return self._get_supported_generation_tasks( - architectures, convert_type) - if runner_type == "pooling": - return self._get_supported_pooling_tasks(architectures, - convert_type) - if runner_type == "draft": - return ["draft"] - - assert_never(runner_type) - def _parse_quant_hf_config(self, hf_config: PretrainedConfig): quant_cfg = getattr(hf_config, "quantization_config", None) if quant_cfg is None: diff --git a/vllm/engine/protocol.py b/vllm/engine/protocol.py index e828ac04364f..9aea74d0c8f3 100644 --- a/vllm/engine/protocol.py +++ b/vllm/engine/protocol.py @@ -16,6 +16,7 @@ from vllm.plugins.io_processors.interface import IOProcessor from vllm.pooling_params import PoolingParams from vllm.sampling_params import BeamSearchParams, SamplingParams +from vllm.tasks import SupportedTask from vllm.transformers_utils.tokenizer import AnyTokenizer from vllm.utils import Device, collect_from_async_generator, random_uuid @@ -326,3 +327,7 @@ async def collective_rpc(self, kwargs: Optional[dict] = None): """Perform a collective RPC call to the given path.""" raise NotImplementedError + + async def get_supported_tasks(self) -> tuple[SupportedTask, ...]: + """Get supported tasks""" + raise NotImplementedError diff --git a/vllm/entrypoints/openai/api_server.py b/vllm/entrypoints/openai/api_server.py index b8ba7e81ef5f..97cbda63bf42 100644 --- a/vllm/entrypoints/openai/api_server.py +++ b/vllm/entrypoints/openai/api_server.py @@ -1609,11 +1609,7 @@ async def init_app_state( state.vllm_config = vllm_config model_config = vllm_config.model_config - if envs.VLLM_USE_V1: - supported_tasks = await engine_client \ - .get_supported_tasks() # type: ignore - else: - supported_tasks = model_config.supported_tasks + supported_tasks = await engine_client.get_supported_tasks() logger.info("Supported_tasks: %s", supported_tasks) diff --git a/vllm/entrypoints/openai/run_batch.py b/vllm/entrypoints/openai/run_batch.py index fa813550e520..2568c21c4abe 100644 --- a/vllm/entrypoints/openai/run_batch.py +++ b/vllm/entrypoints/openai/run_batch.py @@ -14,7 +14,6 @@ from prometheus_client import start_http_server from tqdm import tqdm -import vllm.envs as envs from vllm.config import VllmConfig from vllm.engine.arg_utils import AsyncEngineArgs, optional_type from vllm.engine.protocol import EngineClient @@ -334,12 +333,7 @@ async def run_batch( model_config = vllm_config.model_config - if envs.VLLM_USE_V1: - supported_tasks = await engine_client \ - .get_supported_tasks() # type: ignore - else: - supported_tasks = model_config.supported_tasks - + supported_tasks = await engine_client.get_supported_tasks() logger.info("Supported_tasks: %s", supported_tasks) # Create the openai serving objects. From 70fbdb26e99d7d0b0299acc108f83bd63626e589 Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Thu, 25 Sep 2025 12:45:25 +0100 Subject: [PATCH 372/518] Add backward compatibility for `guided_...` API (#25615) Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> Signed-off-by: Cyrus Leung Co-authored-by: Cyrus Leung --- docs/features/structured_outputs.md | 11 +++ vllm/entrypoints/openai/protocol.py | 124 +++++++++++++++++++++++++++- 2 files changed, 133 insertions(+), 2 deletions(-) diff --git a/docs/features/structured_outputs.md b/docs/features/structured_outputs.md index 1f955c6e30d6..901d87e7ed3d 100644 --- a/docs/features/structured_outputs.md +++ b/docs/features/structured_outputs.md @@ -6,6 +6,17 @@ vLLM supports the generation of structured outputs using This document shows you some examples of the different options that are available to generate structured outputs. +!!! warning + If you are still using the following deprecated API fields, please update your code to use `structured_outputs` as demonstrated in the rest of this document: + + - `guided_json` -> `{"structured_outputs": {"json": ...}}` or `StructuredOutputsParams(json=...)` + - `guided_regex` -> `{"structured_outputs": {"regex": ...}}` or `StructuredOutputsParams(regex=...)` + - `guided_choice` -> `{"structured_outputs": {"choice": ...}}` or `StructuredOutputsParams(choice=...)` + - `guided_grammar` -> `{"structured_outputs": {"grammar": ...}}` or `StructuredOutputsParams(grammar=...)` + - `guided_whitespace_pattern` -> `{"structured_outputs": {"whitespace_pattern": ...}}` or `StructuredOutputsParams(whitespace_pattern=...)` + - `structural_tag` -> `{"structured_outputs": {"structural_tag": ...}}` or `StructuredOutputsParams(structural_tag=...)` + - `guided_decoding_backend` -> Remove this field from your request + ## Online Serving (OpenAI API) You can generate structured outputs using the OpenAI's [Completions](https://platform.openai.com/docs/api-reference/completions) and [Chat](https://platform.openai.com/docs/api-reference/chat) API. diff --git a/vllm/entrypoints/openai/protocol.py b/vllm/entrypoints/openai/protocol.py index c30681318f69..8829fa4886f6 100644 --- a/vllm/entrypoints/openai/protocol.py +++ b/vllm/entrypoints/openai/protocol.py @@ -541,6 +541,56 @@ class ChatCompletionRequest(OpenAIBaseModel): default=None, description="Additional kwargs for structured outputs", ) + guided_json: Optional[Union[str, dict, BaseModel]] = Field( + default=None, + description=( + "`guided_json` is deprecated. " + "This will be removed in v0.12.0 or v1.0.0, whichever is soonest. " + "Please pass `json` to `structured_outputs` instead."), + ) + guided_regex: Optional[str] = Field( + default=None, + description=( + "`guided_regex` is deprecated. " + "This will be removed in v0.12.0 or v1.0.0, whichever is soonest. " + "Please pass `regex` to `structured_outputs` instead."), + ) + guided_choice: Optional[list[str]] = Field( + default=None, + description=( + "`guided_choice` is deprecated. " + "This will be removed in v0.12.0 or v1.0.0, whichever is soonest. " + "Please pass `choice` to `structured_outputs` instead."), + ) + guided_grammar: Optional[str] = Field( + default=None, + description=( + "`guided_grammar` is deprecated. " + "This will be removed in v0.12.0 or v1.0.0, whichever is soonest. " + "Please pass `grammar` to `structured_outputs` instead."), + ) + structural_tag: Optional[str] = Field( + default=None, + description=( + "`structural_tag` is deprecated. " + "This will be removed in v0.12.0 or v1.0.0, whichever is soonest. " + "Please pass `structural_tag` to `structured_outputs` instead."), + ) + guided_decoding_backend: Optional[str] = Field( + default=None, + description=( + "`guided_decoding_backend` is deprecated. " + "This will be removed in v0.12.0 or v1.0.0, whichever is soonest. " + "Please remove it from your request."), + ) + guided_whitespace_pattern: Optional[str] = Field( + default=None, + description=( + "`guided_whitespace_pattern` is deprecated. " + "This will be removed in v0.12.0 or v1.0.0, whichever is soonest. " + "Please pass `whitespace_pattern` to `structured_outputs` instead." + ), + ) priority: int = Field( default=0, description=( @@ -658,6 +708,20 @@ def to_sampling_params( if prompt_logprobs is None and self.echo: prompt_logprobs = self.top_logprobs + # Forward deprecated guided_* parameters to structured_outputs + if self.structured_outputs is None: + kwargs = dict[str, Any]( + json=self.guided_json, + regex=self.guided_regex, + choice=self.guided_choice, + grammar=self.guided_grammar, + whitespace_pattern=self.guided_whitespace_pattern, + structural_tag=self.structural_tag, + ) + kwargs = {k: v for k, v in kwargs.items() if v is not None} + if len(kwargs) > 0: + self.structured_outputs = StructuredOutputsParams(**kwargs) + response_format = self.response_format json_schema_from_tool = self._get_json_schema_from_tool() if response_format is not None or json_schema_from_tool is not None: @@ -839,7 +903,7 @@ def check_structured_outputs_count(cls, data): if isinstance(data, ValueError): raise data - if "structured_outputs" not in data: + if data.get("structured_outputs", None) is None: return data structured_outputs_kwargs = data['structured_outputs'] @@ -1016,6 +1080,49 @@ class CompletionRequest(OpenAIBaseModel): default=None, description="Additional kwargs for structured outputs", ) + guided_json: Optional[Union[str, dict, BaseModel]] = Field( + default=None, + description=( + "`guided_json` is deprecated. " + "This will be removed in v0.12.0 or v1.0.0, whichever is soonest. " + "Please pass `json` to `structured_outputs` instead."), + ) + guided_regex: Optional[str] = Field( + default=None, + description=( + "`guided_regex` is deprecated. " + "This will be removed in v0.12.0 or v1.0.0, whichever is soonest. " + "Please pass `regex` to `structured_outputs` instead."), + ) + guided_choice: Optional[list[str]] = Field( + default=None, + description=( + "`guided_choice` is deprecated. " + "This will be removed in v0.12.0 or v1.0.0, whichever is soonest. " + "Please pass `choice` to `structured_outputs` instead."), + ) + guided_grammar: Optional[str] = Field( + default=None, + description=( + "`guided_grammar` is deprecated. " + "This will be removed in v0.12.0 or v1.0.0, whichever is soonest. " + "Please pass `grammar` to `structured_outputs` instead."), + ) + guided_decoding_backend: Optional[str] = Field( + default=None, + description=( + "`guided_decoding_backend` is deprecated. " + "This will be removed in v0.12.0 or v1.0.0, whichever is soonest. " + "Please remove it from your request."), + ) + guided_whitespace_pattern: Optional[str] = Field( + default=None, + description=( + "`guided_whitespace_pattern` is deprecated. " + "This will be removed in v0.12.0 or v1.0.0, whichever is soonest. " + "Please pass `whitespace_pattern` to `structured_outputs` instead." + ), + ) priority: int = Field( default=0, description=( @@ -1145,6 +1252,19 @@ def to_sampling_params( echo_without_generation = self.echo and self.max_tokens == 0 + # Forward deprecated guided_* parameters to structured_outputs + if self.structured_outputs is None: + kwargs = dict[str, Any]( + json=self.guided_json, + regex=self.guided_regex, + choice=self.guided_choice, + grammar=self.guided_grammar, + whitespace_pattern=self.guided_whitespace_pattern, + ) + kwargs = {k: v for k, v in kwargs.items() if v is not None} + if len(kwargs) > 0: + self.structured_outputs = StructuredOutputsParams(**kwargs) + if (self.structured_outputs is not None and self.response_format is not None and self.response_format.type == "json_object"): @@ -1189,7 +1309,7 @@ def to_sampling_params( @model_validator(mode="before") @classmethod def check_structured_outputs_count(cls, data): - if "structured_outputs" not in data: + if data.get("structured_outputs", None) is None: return data structured_outputs_kwargs = data['structured_outputs'] From 0bcc3a160d95d778cd53c96984d2b2b50fa68342 Mon Sep 17 00:00:00 2001 From: Cyrus Leung Date: Thu, 25 Sep 2025 20:19:40 +0800 Subject: [PATCH 373/518] [CI/Build] Fix flaky entrypoints test (#25663) Signed-off-by: DarkLight1337 --- .../test_completion_with_prompt_embeds.py | 64 ++++++++++++------- 1 file changed, 41 insertions(+), 23 deletions(-) diff --git a/tests/entrypoints/openai/test_completion_with_prompt_embeds.py b/tests/entrypoints/openai/test_completion_with_prompt_embeds.py index 9c62595ad280..ae51025455b1 100644 --- a/tests/entrypoints/openai/test_completion_with_prompt_embeds.py +++ b/tests/entrypoints/openai/test_completion_with_prompt_embeds.py @@ -15,7 +15,7 @@ from ...utils import RemoteOpenAIServer # any model with a chat template should work here -MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta" +MODEL_NAME = "facebook/opt-125m" CONFIG = AutoConfig.from_pretrained(MODEL_NAME) @@ -27,7 +27,7 @@ def default_server_args() -> list[str]: "--dtype", "bfloat16", "--max-model-len", - "8192", + "2048", "--max-num-seqs", "128", "--enforce-eager", @@ -36,6 +36,27 @@ def default_server_args() -> list[str]: ] +EXAMPLE_PROMPTS = [ + "Hello, my name is", + "What is an LLM?", +] + + +def _encode_embeds(embeds: torch.Tensor): + buffer = io.BytesIO() + torch.save(embeds, buffer) + return base64.b64encode(buffer.getvalue()).decode('utf-8') + + +@pytest.fixture(scope="module") +def example_prompt_embeds(hf_runner): + """Create example embeddings and return them as base64 encoded string.""" + with hf_runner(MODEL_NAME) as hf_model: + example_embeddings = hf_model.get_prompt_embeddings(EXAMPLE_PROMPTS) + + return [_encode_embeds(item) for item in example_embeddings] + + @pytest.fixture(scope="module", params=["", "--disable-frontend-multiprocessing"]) def server_with_prompt_embeds(default_server_args, request): @@ -52,21 +73,16 @@ async def client_with_prompt_embeds(server_with_prompt_embeds): yield async_client -def create_dummy_embeds(num_tokens: int = 5) -> str: - """Create dummy embeddings and return them as base64 encoded string.""" - dummy_embeds = torch.randn(num_tokens, CONFIG.hidden_size) - buffer = io.BytesIO() - torch.save(dummy_embeds, buffer) - return base64.b64encode(buffer.getvalue()).decode('utf-8') - - -@pytest.mark.skip("This test is skipped because it is flaky.") @pytest.mark.asyncio @pytest.mark.parametrize("model_name", [MODEL_NAME]) async def test_completions_with_prompt_embeds( - client_with_prompt_embeds: openai.AsyncOpenAI, model_name: str): + example_prompt_embeds, + client_with_prompt_embeds: openai.AsyncOpenAI, + model_name: str, +): + encoded_embeds, encoded_embeds2 = example_prompt_embeds + # Test case: Single prompt embeds input - encoded_embeds = create_dummy_embeds() completion = await client_with_prompt_embeds.completions.create( model=model_name, prompt="", # Add empty prompt as required parameter @@ -77,7 +93,6 @@ async def test_completions_with_prompt_embeds( assert completion.choices[0].prompt_logprobs is None # Test case: batch completion with prompt_embeds - encoded_embeds2 = create_dummy_embeds() completion = await client_with_prompt_embeds.completions.create( model=model_name, prompt="", # Add empty prompt as required parameter @@ -89,7 +104,6 @@ async def test_completions_with_prompt_embeds( assert len(completion.choices[1].text) >= 1 # Test case: streaming with prompt_embeds - encoded_embeds = create_dummy_embeds() single_completion = await client_with_prompt_embeds.completions.create( model=model_name, prompt="", # Add empty prompt as required parameter @@ -117,7 +131,6 @@ async def test_completions_with_prompt_embeds( assert "".join(chunks) == single_output # Test case: batch streaming with prompt_embeds - encoded_embeds2 = create_dummy_embeds() stream = await client_with_prompt_embeds.completions.create( model=model_name, prompt="", # Add empty prompt as required parameter @@ -139,7 +152,6 @@ async def test_completions_with_prompt_embeds( assert len(chunks_stream_embeds[1]) > 0 # Test case: mixed text and prompt_embeds - encoded_embeds = create_dummy_embeds() completion_mixed = await client_with_prompt_embeds.completions.create( model=model_name, prompt="This is a prompt", @@ -184,10 +196,14 @@ async def test_completions_errors_with_prompt_embeds( @pytest.mark.parametrize("logprobs_arg", [1, 0]) @pytest.mark.parametrize("model_name", [MODEL_NAME]) async def test_completions_with_logprobs_and_prompt_embeds( - client_with_prompt_embeds: openai.AsyncOpenAI, logprobs_arg: int, - model_name: str): + example_prompt_embeds, + client_with_prompt_embeds: openai.AsyncOpenAI, + logprobs_arg: int, + model_name: str, +): + encoded_embeds, encoded_embeds2 = example_prompt_embeds + # Test case: Logprobs using prompt_embeds - encoded_embeds = create_dummy_embeds() completion = await client_with_prompt_embeds.completions.create( model=model_name, prompt="", # Add empty prompt as required parameter @@ -207,7 +223,6 @@ async def test_completions_with_logprobs_and_prompt_embeds( assert len(logprobs.tokens) == 5 # Test case: Log probs with batch completion and prompt_embeds - encoded_embeds2 = create_dummy_embeds() completion = await client_with_prompt_embeds.completions.create( model=model_name, prompt="", # Add empty prompt as required parameter @@ -232,9 +247,12 @@ async def test_completions_with_logprobs_and_prompt_embeds( @pytest.mark.asyncio async def test_prompt_logprobs_raises_error( - client_with_prompt_embeds: openai.AsyncOpenAI): + example_prompt_embeds, + client_with_prompt_embeds: openai.AsyncOpenAI, +): + encoded_embeds, _ = example_prompt_embeds + with pytest.raises(BadRequestError, match="not compatible"): - encoded_embeds = create_dummy_embeds() await client_with_prompt_embeds.completions.create( model=MODEL_NAME, prompt="", From d2af67441ddf5965aaebf129802a0a9d38f0e225 Mon Sep 17 00:00:00 2001 From: Kunshang Ji Date: Thu, 25 Sep 2025 20:38:11 +0800 Subject: [PATCH 374/518] [XPU][Triton]add xpu config in triton_reshape_and_cache_flash (#25643) Signed-off-by: Kunshang Ji --- vllm/attention/ops/triton_reshape_and_cache_flash.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/attention/ops/triton_reshape_and_cache_flash.py b/vllm/attention/ops/triton_reshape_and_cache_flash.py index 883052cb46aa..cc9e1bb23b9b 100644 --- a/vllm/attention/ops/triton_reshape_and_cache_flash.py +++ b/vllm/attention/ops/triton_reshape_and_cache_flash.py @@ -137,7 +137,7 @@ def triton_reshape_and_cache_flash( # heuristics instead of autotuning TILE_SIZE = min(2048, triton.next_power_of_2(n)) - if torch.version.hip: + if torch.version.hip or torch.version.xpu: num_stages = 4 num_warps = 8 else: # cuda From 1e9a77e0371b160f3c49ee02e7e196eef30122c7 Mon Sep 17 00:00:00 2001 From: chenlang Date: Thu, 25 Sep 2025 20:46:11 +0800 Subject: [PATCH 375/518] [Hardware][RISC-V] Add riscv64 support for vLLM with scalar (#22112) Signed-off-by: chenlang Co-authored-by: chenlang <10346245@zte.com.cn> --- cmake/cpu_extension.cmake | 9 +- csrc/cpu/cpu_types.hpp | 3 +- csrc/cpu/cpu_types_scalar.hpp | 513 ++++++++++++++++++++++++++++++++++ csrc/cpu/float_convert.hpp | 106 +++++++ vllm/platforms/interface.py | 3 + 5 files changed, 632 insertions(+), 2 deletions(-) create mode 100644 csrc/cpu/cpu_types_scalar.hpp create mode 100644 csrc/cpu/float_convert.hpp diff --git a/cmake/cpu_extension.cmake b/cmake/cpu_extension.cmake index 2a2ec08f8695..e6d0012c1a4b 100644 --- a/cmake/cpu_extension.cmake +++ b/cmake/cpu_extension.cmake @@ -101,6 +101,7 @@ else() find_isa(${CPUINFO} "asimd" ASIMD_FOUND) # Check for ARM NEON support find_isa(${CPUINFO} "bf16" ARM_BF16_FOUND) # Check for ARM BF16 support find_isa(${CPUINFO} "S390" S390_FOUND) + find_isa(${CPUINFO} "v" RVV_FOUND) # Check for RISC-V RVV support endif() if (AVX512_FOUND AND NOT AVX512_DISABLED) @@ -177,8 +178,14 @@ elseif (S390_FOUND) "-mzvector" "-march=native" "-mtune=native") +elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "riscv64") + if(RVV_FOUND) + message(FAIL_ERROR "Can't support rvv now.") + else() + list(APPEND CXX_COMPILE_FLAGS "-march=rv64gc") + endif() else() - message(FATAL_ERROR "vLLM CPU backend requires AVX512, AVX2, Power9+ ISA, S390X ISA or ARMv8 support.") + message(FATAL_ERROR "vLLM CPU backend requires AVX512, AVX2, Power9+ ISA, S390X ISA, ARMv8 or RISC-V support.") endif() # diff --git a/csrc/cpu/cpu_types.hpp b/csrc/cpu/cpu_types.hpp index c3a21796881c..9cdcd2edacfd 100644 --- a/csrc/cpu/cpu_types.hpp +++ b/csrc/cpu/cpu_types.hpp @@ -14,7 +14,8 @@ // arm implementation #include "cpu_types_arm.hpp" #else - #warning "unsupported vLLM cpu implementation" + #warning "unsupported vLLM cpu implementation, vLLM will compile with scalar" + #include "cpu_types_scalar.hpp" #endif #ifdef _OPENMP diff --git a/csrc/cpu/cpu_types_scalar.hpp b/csrc/cpu/cpu_types_scalar.hpp new file mode 100644 index 000000000000..1a9278bc662e --- /dev/null +++ b/csrc/cpu/cpu_types_scalar.hpp @@ -0,0 +1,513 @@ +#include +#include +#include +#include +#include "float_convert.hpp" + +namespace vec_op { + +#define VLLM_DISPATCH_CASE_FLOATING_TYPES(...) \ + AT_DISPATCH_CASE(at::ScalarType::Float, __VA_ARGS__) \ + AT_DISPATCH_CASE(at::ScalarType::BFloat16, __VA_ARGS__) \ + AT_DISPATCH_CASE(at::ScalarType::Half, __VA_ARGS__) + +#define VLLM_DISPATCH_FLOATING_TYPES(TYPE, NAME, ...) \ + AT_DISPATCH_SWITCH(TYPE, NAME, VLLM_DISPATCH_CASE_FLOATING_TYPES(__VA_ARGS__)) + +#ifndef CPU_OP_GUARD + #define CPU_KERNEL_GUARD_IN(NAME) + #define CPU_KERNEL_GUARD_OUT(NAME) +#else + #define CPU_KERNEL_GUARD_IN(NAME) \ + std::cout << #NAME << " invoked." << std::endl; + #define CPU_KERNEL_GUARD_OUT(NAME) \ + std::cout << #NAME << " exit." << std::endl; +#endif + +#define FORCE_INLINE __attribute__((always_inline)) inline + +#define __max(a, b) ((a) > (b) ? (a) : (b)) +#define __min(a, b) ((a) < (b) ? (a) : (b)) +#define __abs(a) ((a) < (0) ? (0 - a) : (a)) + +typedef struct f16x8_t { + uint16_t val[8]; +} f16x8_t; + +typedef struct f16x16_t { + uint16_t val[16]; +} f16x16_t; + +typedef struct f16x32_t { + uint16_t val[32]; +} f16x32_t; + +typedef struct f32x4_t { + float val[4]; +} f32x4_t; + +typedef struct f32x8_t { + float val[8]; +} f32x8_t; + +typedef struct f32x16_t { + float val[16]; +} f32x16_t; + +namespace { +template +constexpr void unroll_loop_item(std::integer_sequence, F&& f) { + (f(std::integral_constant{}), ...); +}; +}; // namespace + +template > > +constexpr void unroll_loop(F&& f) { + unroll_loop_item(std::make_integer_sequence{}, std::forward(f)); +} + +template +struct Vec { + constexpr static int get_elem_num() { return T::VEC_ELEM_NUM; } +}; + +struct FP32Vec8; +struct FP32Vec16; + +struct FP16Vec8 : public Vec { + constexpr static int VEC_ELEM_NUM = 8; + f16x8_t reg; + + explicit FP16Vec8(const void* ptr) + : reg(*reinterpret_cast(ptr)) {}; + + explicit FP16Vec8(const FP32Vec8&); + + void save(void* ptr) const { *reinterpret_cast(ptr) = reg; } +}; + +struct FP16Vec16 : public Vec { + constexpr static int VEC_ELEM_NUM = 16; + f16x16_t reg; + + explicit FP16Vec16(const void* ptr) + : reg(*reinterpret_cast(ptr)) {}; + + explicit FP16Vec16(const FP32Vec16&); + + void save(void* ptr) const { *reinterpret_cast(ptr) = reg; } + + void save(void* ptr, const int elem_num) const { + int num = __min(elem_num, VEC_ELEM_NUM); + std::memcpy(ptr, &(reg.val[0]), num * sizeof(uint16_t)); + } +}; + +struct BF16Vec8 : public Vec { + constexpr static int VEC_ELEM_NUM = 8; + f16x8_t reg; + + explicit BF16Vec8(const void* ptr) + : reg(*reinterpret_cast(ptr)) {}; + + explicit BF16Vec8(const FP32Vec8&); + + void save(void* ptr) const { *reinterpret_cast(ptr) = reg; } +}; + +struct BF16Vec16 : public Vec { + constexpr static int VEC_ELEM_NUM = 16; + f16x16_t reg; + + explicit BF16Vec16(const void* ptr) + : reg(*reinterpret_cast(ptr)) {}; + + explicit BF16Vec16(const FP32Vec16&); + + void save(void* ptr) const { *reinterpret_cast(ptr) = reg; } + + void save(void* ptr, const int elem_num) const { + int num = __min(elem_num, VEC_ELEM_NUM); + std::memcpy(ptr, &(reg.val[0]), num * sizeof(uint16_t)); + } +}; + +struct BF16Vec32 : public Vec { + constexpr static int VEC_ELEM_NUM = 32; + f16x32_t reg; + + explicit BF16Vec32(const void* ptr) + : reg(*reinterpret_cast(ptr)) {}; + + explicit BF16Vec32(f16x32_t data) : reg(data) {}; + + explicit BF16Vec32(BF16Vec8& vec8_data) { + for (int i = 0; i < VEC_ELEM_NUM; ++i) { + reg.val[i] = vec8_data.reg.val[i % BF16Vec8::VEC_ELEM_NUM]; + } + } + + void save(void* ptr) const { *reinterpret_cast(ptr) = reg; } +}; + +struct FP32Vec4 : public Vec { + constexpr static int VEC_ELEM_NUM = 4; + + f32x4_t reg; + + explicit FP32Vec4(float v) { + for (int i = 0; i < VEC_ELEM_NUM; ++i) { + reg.val[i] = v; + } + } + + explicit FP32Vec4() { + for (int i = 0; i < VEC_ELEM_NUM; ++i) { + reg.val[i] = 0.0f; + } + } + + explicit FP32Vec4(const float* ptr) + : reg(*reinterpret_cast(ptr)) {}; + + explicit FP32Vec4(f32x4_t data) : reg(data) {}; + + explicit FP32Vec4(const FP32Vec4& data) : reg(data.reg) {}; +}; + +struct FP32Vec8 : public Vec { + constexpr static int VEC_ELEM_NUM = 8; + + f32x8_t reg; + + explicit FP32Vec8(float v) { + for (int i = 0; i < VEC_ELEM_NUM; ++i) { + reg.val[i] = v; + } + } + + explicit FP32Vec8() { + for (int i = 0; i < VEC_ELEM_NUM; ++i) { + reg.val[i] = 0.0f; + } + } + + explicit FP32Vec8(const float* ptr) + : reg(*reinterpret_cast(ptr)) {}; + + explicit FP32Vec8(f32x8_t data) : reg(data) {}; + + explicit FP32Vec8(const FP32Vec8& data) : reg(data.reg) {}; + + explicit FP32Vec8(const FP16Vec8& v) { + for (int i = 0; i < VEC_ELEM_NUM; ++i) { + reg.val[i] = fp16_to_float(v.reg.val[i]); + } + } + + FP32Vec8(const BF16Vec8& v) { + for (int i = 0; i < VEC_ELEM_NUM; ++i) { + reg.val[i] = bf16_to_float(v.reg.val[i]); + } + } + + float reduce_sum() const { + float result = 0; + for (int i = 0; i < VEC_ELEM_NUM; ++i) { + result += reg.val[i]; + } + return result; + } + + FP32Vec8 exp() const { + f32x8_t ret; + for (int i = 0; i < VEC_ELEM_NUM; ++i) { + ret.val[i] = expf(reg.val[i]); + } + return FP32Vec8(ret); + } + + FP32Vec8 tanh() const { + f32x8_t ret; + for (int i = 0; i < VEC_ELEM_NUM; ++i) { + ret.val[i] = tanhf(reg.val[i]); + } + return FP32Vec8(ret); + } + + FP32Vec8 er() const { + f32x8_t ret; + for (int i = 0; i < VEC_ELEM_NUM; ++i) { + ret.val[i] = erf(reg.val[i]); + } + return FP32Vec8(ret); + } + + FP32Vec8 operator*(const FP32Vec8& b) const { + f32x8_t ret; + for (int i = 0; i < VEC_ELEM_NUM; ++i) { + ret.val[i] = reg.val[i] * b.reg.val[i]; + } + return FP32Vec8(ret); + } + + FP32Vec8 operator+(const FP32Vec8& b) const { + f32x8_t ret; + for (int i = 0; i < VEC_ELEM_NUM; ++i) { + ret.val[i] = reg.val[i] + b.reg.val[i]; + } + return FP32Vec8(ret); + } + + FP32Vec8 operator-(const FP32Vec8& b) const { + f32x8_t ret; + for (int i = 0; i < VEC_ELEM_NUM; ++i) { + ret.val[i] = reg.val[i] - b.reg.val[i]; + } + return FP32Vec8(ret); + } + + FP32Vec8 operator/(const FP32Vec8& b) const { + f32x8_t ret; + for (int i = 0; i < VEC_ELEM_NUM; ++i) { + ret.val[i] = reg.val[i] / b.reg.val[i]; + } + return FP32Vec8(ret); + } + + void save(void* ptr) const { *reinterpret_cast(ptr) = reg; } +}; + +struct FP32Vec16 : public Vec { + constexpr static int VEC_ELEM_NUM = 16; + f32x16_t reg; + + explicit FP32Vec16(float v) { + for (int i = 0; i < VEC_ELEM_NUM; ++i) { + reg.val[i] = v; + } + } + + explicit FP32Vec16() { + for (int i = 0; i < VEC_ELEM_NUM; ++i) { + reg.val[i] = 0.0f; + } + } + + explicit FP32Vec16(const float* ptr) + : reg(*reinterpret_cast(ptr)) {}; + + explicit FP32Vec16(f32x16_t data) : reg(data) {}; + + FP32Vec16(const FP32Vec4& data) { + for (int i = 0; i < VEC_ELEM_NUM; ++i) { + reg.val[i] = data.reg.val[i % FP32Vec4::VEC_ELEM_NUM]; + } + } + + FP32Vec16(const FP32Vec8& data) { + for (int i = 0; i < VEC_ELEM_NUM; ++i) { + reg.val[i] = data.reg.val[i % FP32Vec8::VEC_ELEM_NUM]; + } + } + + FP32Vec16(const FP32Vec16& data) : reg(data.reg) {}; + + explicit FP32Vec16(const FP16Vec16& v) { + for (int i = 0; i < VEC_ELEM_NUM; ++i) { + reg.val[i] = fp16_to_float(v.reg.val[i]); + } + } + + explicit FP32Vec16(const BF16Vec16& v) { + for (int i = 0; i < VEC_ELEM_NUM; ++i) { + reg.val[i] = bf16_to_float(v.reg.val[i]); + } + } + + explicit FP32Vec16(const FP16Vec8& v) : FP32Vec16(FP32Vec8(v)) {}; + + FP32Vec16(const BF16Vec8& v) : FP32Vec16(FP32Vec8(v)) {}; + + FP32Vec16 operator*(const FP32Vec16& b) const { + FP32Vec16 result(0.0f); + for (int i = 0; i < VEC_ELEM_NUM; ++i) { + result.reg.val[i] = reg.val[i] * b.reg.val[i]; + } + return result; + } + + FP32Vec16 operator+(const FP32Vec16& b) const { + FP32Vec16 result(0.0f); + for (int i = 0; i < VEC_ELEM_NUM; ++i) { + result.reg.val[i] = reg.val[i] + b.reg.val[i]; + } + return result; + } + + FP32Vec16 operator-(const FP32Vec16& b) const { + FP32Vec16 result(0.0f); + for (int i = 0; i < VEC_ELEM_NUM; ++i) { + result.reg.val[i] = reg.val[i] - b.reg.val[i]; + } + return result; + } + + FP32Vec16 operator/(const FP32Vec16& b) const { + FP32Vec16 result(0.0f); + for (int i = 0; i < VEC_ELEM_NUM; ++i) { + result.reg.val[i] = reg.val[i] / b.reg.val[i]; + } + return result; + } + + FP32Vec16 max(const FP32Vec16& b) const { + FP32Vec16 result(0.0f); + for (int i = 0; i < VEC_ELEM_NUM; ++i) { + result.reg.val[i] = __max(reg.val[i], b.reg.val[i]); + } + return result; + } + + FP32Vec16 min(const FP32Vec16& b) const { + FP32Vec16 result(0.0f); + for (int i = 0; i < VEC_ELEM_NUM; ++i) { + result.reg.val[i] = __min(reg.val[i], b.reg.val[i]); + } + return result; + } + + FP32Vec16 abs() const { + FP32Vec16 result(0.0f); + for (int i = 0; i < VEC_ELEM_NUM; ++i) { + result.reg.val[i] = __abs(reg.val[i]); + } + return result; + } + + float reduce_sum() const { + float result = 0.0f; + for (int i = 0; i < VEC_ELEM_NUM; ++i) { + result += reg.val[i]; + } + return result; + } + + float reduce_max() const { + float result = reg.val[0]; + for (int i = 0; i < VEC_ELEM_NUM; ++i) { + result = __max(reg.val[i], result); + } + return result; + } + + float reduce_min() const { + float result = reg.val[0]; + for (int i = 0; i < VEC_ELEM_NUM; ++i) { + result = __min(reg.val[i], result); + } + return result; + } + + template + float reduce_sub_sum(int idx) { + static_assert(VEC_ELEM_NUM % group_size == 0); + float sum = 0.0; + int start = idx * group_size; + int end = (idx + 1) * group_size; + + for (; (start < VEC_ELEM_NUM) && (start < end); ++start) { + sum += reg.val[start]; + } + + return sum; + } + + void save(void* ptr) const { *reinterpret_cast(ptr) = reg; } +}; + +template +struct VecType { + using vec_type = void; +}; + +template +using vec_t = typename VecType::vec_type; + +template <> +struct VecType { + using vec_type = FP32Vec8; +}; + +template <> +struct VecType { + using vec_type = FP16Vec8; +}; + +template <> +struct VecType { + using vec_type = BF16Vec8; +}; + +template +void storeFP32(float v, T* ptr) { + *ptr = v; +} + +/* +template <> inline void storeFP32(float v, c10::Half *ptr) { + c10::Half __attribute__((__may_alias__)) *v_ptr = + reinterpret_cast(&v); + *ptr = *(v_ptr + 1); +} +*/ + +template <> +inline void storeFP32(float v, c10::Half* ptr) { + uint16_t fp16 = float_to_fp16(v); + *reinterpret_cast(ptr) = fp16; +} + +template <> +inline void storeFP32(float v, c10::BFloat16* ptr) { + c10::BFloat16 __attribute__((__may_alias__))* v_ptr = + reinterpret_cast(&v); + *ptr = *(v_ptr + 1); +} + +inline FP16Vec16::FP16Vec16(const FP32Vec16& v) { + int i = 0; + for (i = 0; i < FP16Vec16::VEC_ELEM_NUM; ++i) { + reg.val[i] = float_to_fp16(v.reg.val[i]); + } +} + +inline FP16Vec8 ::FP16Vec8(const FP32Vec8& v) { + int i = 0; + for (i = 0; i < FP16Vec8::VEC_ELEM_NUM; ++i) { + reg.val[i] = float_to_fp16(v.reg.val[i]); + } +} + +inline void fma(FP32Vec16& acc, FP32Vec16& a, FP32Vec16& b) { + acc = acc + a * b; +} + +inline BF16Vec8::BF16Vec8(const FP32Vec8& v) { + int i = 0; + for (i = 0; i < BF16Vec8::VEC_ELEM_NUM; ++i) { + reg.val[i] = float_to_bf16(v.reg.val[i]); + } +} + +inline BF16Vec16::BF16Vec16(const FP32Vec16& v) { + int i = 0; + for (i = 0; i < BF16Vec16::VEC_ELEM_NUM; ++i) { + reg.val[i] = float_to_bf16(v.reg.val[i]); + } +} + +inline void prefetch(const void* addr) { __builtin_prefetch(addr, 0, 3); } + +}; // namespace vec_op diff --git a/csrc/cpu/float_convert.hpp b/csrc/cpu/float_convert.hpp new file mode 100644 index 000000000000..c792bf131ccd --- /dev/null +++ b/csrc/cpu/float_convert.hpp @@ -0,0 +1,106 @@ + +static float bf16_to_float(uint16_t bf16) { + uint32_t bits = static_cast(bf16) << 16; + float fp32; + std::memcpy(&fp32, &bits, sizeof(fp32)); + return fp32; +} + +static uint16_t float_to_bf16(float fp32) { + uint32_t bits; + std::memcpy(&bits, &fp32, sizeof(fp32)); + return static_cast(bits >> 16); +} + +/************************************************ + * Copyright (c) 2015 Princeton Vision Group + * Licensed under the MIT license. + * Codes below copied from + * https://github.com/PrincetonVision/marvin/tree/master/tools/tensorIO_matlab + *************************************************/ +static uint16_t float_to_fp16(float fp32) { + uint16_t fp16; + + unsigned x; + unsigned u, remainder, shift, lsb, lsb_s1, lsb_m1; + unsigned sign, exponent, mantissa; + + std::memcpy(&x, &fp32, sizeof(fp32)); + u = (x & 0x7fffffff); + + // Get rid of +NaN/-NaN case first. + if (u > 0x7f800000) { + fp16 = 0x7fffU; + return fp16; + } + + sign = ((x >> 16) & 0x8000); + + // Get rid of +Inf/-Inf, +0/-0. + if (u > 0x477fefff) { + fp16 = sign | 0x7c00U; + return fp16; + } + if (u < 0x33000001) { + fp16 = (sign | 0x0000); + return fp16; + } + + exponent = ((u >> 23) & 0xff); + mantissa = (u & 0x7fffff); + + if (exponent > 0x70) { + shift = 13; + exponent -= 0x70; + } else { + shift = 0x7e - exponent; + exponent = 0; + mantissa |= 0x800000; + } + lsb = (1 << shift); + lsb_s1 = (lsb >> 1); + lsb_m1 = (lsb - 1); + + // Round to nearest even. + remainder = (mantissa & lsb_m1); + mantissa >>= shift; + if (remainder > lsb_s1 || (remainder == lsb_s1 && (mantissa & 0x1))) { + ++mantissa; + if (!(mantissa & 0x3ff)) { + ++exponent; + mantissa = 0; + } + } + + fp16 = (sign | (exponent << 10) | mantissa); + + return fp16; +} + +static float fp16_to_float(uint16_t fp16) { + unsigned sign = ((fp16 >> 15) & 1); + unsigned exponent = ((fp16 >> 10) & 0x1f); + unsigned mantissa = ((fp16 & 0x3ff) << 13); + int temp; + float fp32; + if (exponent == 0x1f) { /* NaN or Inf */ + mantissa = (mantissa ? (sign = 0, 0x7fffff) : 0); + exponent = 0xff; + } else if (!exponent) { /* Denorm or Zero */ + if (mantissa) { + unsigned int msb; + exponent = 0x71; + do { + msb = (mantissa & 0x400000); + mantissa <<= 1; /* normalize */ + --exponent; + } while (!msb); + mantissa &= 0x7fffff; /* 1.mantissa is implicit */ + } + } else { + exponent += 0x70; + } + temp = ((sign << 31) | (exponent << 23) | mantissa); + std::memcpy(&fp32, &temp, sizeof(temp)); + return fp32; +} diff --git a/vllm/platforms/interface.py b/vllm/platforms/interface.py index 7dd935d2eb31..73b97dafcd6e 100644 --- a/vllm/platforms/interface.py +++ b/vllm/platforms/interface.py @@ -85,6 +85,7 @@ class CpuArchEnum(enum.Enum): ARM = enum.auto() POWERPC = enum.auto() S390X = enum.auto() + RISCV = enum.auto() OTHER = enum.auto() UNKNOWN = enum.auto() @@ -374,6 +375,8 @@ def get_cpu_architecture(cls) -> CpuArchEnum: return CpuArchEnum.POWERPC elif machine == "s390x": return CpuArchEnum.S390X + elif machine.startswith("riscv"): + return CpuArchEnum.RISCV return CpuArchEnum.OTHER if machine else CpuArchEnum.UNKNOWN From 2f17117606950a379b3b53babb97d19e2fabd93a Mon Sep 17 00:00:00 2001 From: Cyrus Leung Date: Thu, 25 Sep 2025 21:00:45 +0800 Subject: [PATCH 376/518] [mypy] Fix wrong type annotations related to tuple (#25660) Signed-off-by: DarkLight1337 --- benchmarks/kernels/benchmark_lora.py | 8 ++++---- tests/engine/test_arg_utils.py | 3 +++ tests/kernels/core/test_pos_encoding.py | 2 +- tests/kernels/test_onednn.py | 4 ++-- .../models/multimodal/generation/vlm_utils/types.py | 12 ++++++------ tests/v1/sample/test_sampler.py | 8 +++++--- tests/v1/spec_decode/test_eagle.py | 2 +- .../device_communicators/ray_communicator.py | 2 +- vllm/logits_process.py | 4 ++-- 9 files changed, 25 insertions(+), 20 deletions(-) diff --git a/benchmarks/kernels/benchmark_lora.py b/benchmarks/kernels/benchmark_lora.py index debb29744bfa..799b16999873 100644 --- a/benchmarks/kernels/benchmark_lora.py +++ b/benchmarks/kernels/benchmark_lora.py @@ -79,9 +79,9 @@ def make_rand_lora_weight_tensor( def make_rand_tensors( - a_shape: tuple[int], - b_shape: tuple[int], - c_shape: tuple[int], + a_shape: tuple[int, ...], + b_shape: tuple[int, ...], + c_shape: tuple[int, ...], a_dtype: torch.dtype, b_dtype: torch.dtype, c_dtype: torch.dtype, @@ -243,7 +243,7 @@ def matmul_shapes( lora_rank: int, num_loras: int, num_slices: int, - ) -> tuple[tuple[int], tuple[int], tuple[int]]: + ) -> tuple[tuple[int, ...], tuple[int, ...], tuple[int, ...]]: """ Given num_slices, return the shapes of the A, B, and C matrices in A x B = C, for the op_type diff --git a/tests/engine/test_arg_utils.py b/tests/engine/test_arg_utils.py index b82e83963804..33888f008f04 100644 --- a/tests/engine/test_arg_utils.py +++ b/tests/engine/test_arg_utils.py @@ -50,8 +50,11 @@ def test_is_type(type_hint, type, expected): @pytest.mark.parametrize(("type_hints", "type", "expected"), [ ({float, int}, int, True), + ({int, tuple}, int, True), ({int, tuple[int]}, int, True), + ({int, tuple[int, ...]}, int, True), ({int, tuple[int]}, float, False), + ({int, tuple[int, ...]}, float, False), ({str, Literal["x", "y"]}, Literal, True), ]) def test_contains_type(type_hints, type, expected): diff --git a/tests/kernels/core/test_pos_encoding.py b/tests/kernels/core/test_pos_encoding.py index bf9b1d9b4401..1235e3222a78 100644 --- a/tests/kernels/core/test_pos_encoding.py +++ b/tests/kernels/core/test_pos_encoding.py @@ -60,7 +60,7 @@ def _get_batch_tensor_shape(batch_size: int, seq_len: int, num_heads: int, @torch.inference_mode() def test_rotary_embedding( is_neox_style: bool, - tensor_shape_fn: Callable[[int, int, int, int], tuple[int]], + tensor_shape_fn: Callable[[int, int, int, int], tuple[int, ...]], batch_size: int, seq_len: int, num_heads: int, diff --git a/tests/kernels/test_onednn.py b/tests/kernels/test_onednn.py index 37772464a209..198a8fdf0c33 100644 --- a/tests/kernels/test_onednn.py +++ b/tests/kernels/test_onednn.py @@ -165,7 +165,7 @@ def onednn_gemm_test_helper(primitive_cache_size: int, def test_onednn_int8_scaled_gemm( n: int, k: int, - m_list: tuple[int], + m_list: tuple[int, ...], per_tensor_a_scale: bool, per_tensor_b_scale: bool, use_bias: bool, @@ -196,7 +196,7 @@ def test_onednn_int8_scaled_gemm( def test_onednn_gemm( n: int, k: int, - m_list: tuple[int], + m_list: tuple[int, ...], use_bias: bool, use_stride: bool, dtype: torch.dtype, diff --git a/tests/models/multimodal/generation/vlm_utils/types.py b/tests/models/multimodal/generation/vlm_utils/types.py index e39ca40fbbf5..6a82bdfc4cf2 100644 --- a/tests/models/multimodal/generation/vlm_utils/types.py +++ b/tests/models/multimodal/generation/vlm_utils/types.py @@ -101,7 +101,7 @@ class VLMTestInfo(NamedTuple): # Function for converting ImageAssets to image embeddings; # We need to define this explicitly for embedding tests convert_assets_to_embeddings: Optional[Callable[[ImageTestAssets], - torch.Tensor]] = None + list[torch.Tensor]]] = None # Exposed options for vLLM runner; we change these in a several tests, # but the defaults are derived from VllmRunner & the engine defaults @@ -137,12 +137,12 @@ class VLMTestInfo(NamedTuple): # Default expandable params per test; these defaults can be overridden in # instances of this object; the complete set of test cases for the model # is all combinations of .models + all fields below - max_tokens: Union[int, tuple[int]] = 128 - num_logprobs: Union[int, tuple[int]] = 5 - dtype: Union[str, Union[list[str], tuple[str, ...]]] = "auto" - distributed_executor_backend: Optional[Union[str, Iterable[str]]] = None + max_tokens: int = 128 + num_logprobs: int = 5 + dtype: str = "auto" + distributed_executor_backend: Optional[str] = None # Only expanded in video tests - num_video_frames: Union[int, tuple[int]] = 16 + num_video_frames: int = 16 # Fixed image sizes / image size factors; most tests use image_size_factors # The values provided for these two fields will be stacked and expanded diff --git a/tests/v1/sample/test_sampler.py b/tests/v1/sample/test_sampler.py index 53215f88bb27..6ff000043265 100644 --- a/tests/v1/sample/test_sampler.py +++ b/tests/v1/sample/test_sampler.py @@ -72,8 +72,10 @@ def _create_allowed_token_ids( def _create_bad_words_token_ids( - batch_size: int, vocab_size: int, - bad_words_lengths: list[tuple[int]]) -> dict[int, list[list[int]]]: + batch_size: int, + vocab_size: int, + bad_words_lengths: tuple[int, ...], +) -> dict[int, list[list[int]]]: bad_words_token_ids = {} for batch_idx in range(batch_size): token_ids_single_batch = [] @@ -402,7 +404,7 @@ def test_sampler_allowed_token_ids(device: str, batch_size: int, @pytest.mark.parametrize("batch_size", [1, 2, 32]) @pytest.mark.parametrize("bad_words_lengths", [(1, ), (1, 3), (2, 2)]) def test_sampler_bad_words(device: str, batch_size: int, - bad_words_lengths: list[tuple[int]]): + bad_words_lengths: tuple[int, ...]): """ Test to verify that when the bad words restriction is present, tokens are penalized based on their match with the bad words. diff --git a/tests/v1/spec_decode/test_eagle.py b/tests/v1/spec_decode/test_eagle.py index 5096f9fd647b..0b28365ed599 100644 --- a/tests/v1/spec_decode/test_eagle.py +++ b/tests/v1/spec_decode/test_eagle.py @@ -30,7 +30,7 @@ def _create_proposer( method: str, num_speculative_tokens: int, - speculative_token_tree: Optional[list[tuple[int]]] = None, + speculative_token_tree: Optional[list[tuple[int, ...]]] = None, ) -> EagleProposer: model_config = ModelConfig(model=model_dir, runner="generate", diff --git a/vllm/distributed/device_communicators/ray_communicator.py b/vllm/distributed/device_communicators/ray_communicator.py index 8cd8c459a9e5..69efc8b45270 100644 --- a/vllm/distributed/device_communicators/ray_communicator.py +++ b/vllm/distributed/device_communicators/ray_communicator.py @@ -178,7 +178,7 @@ def send(self, buf: "torch.Tensor", peer_rank: int) -> None: def recv( self, - shape: tuple[int], + shape: tuple[int, ...], dtype: "torch.dtype", peer_rank: int, allocator: TorchTensorAllocator, diff --git a/vllm/logits_process.py b/vllm/logits_process.py index 5967d0836bd4..48f7e7495b17 100644 --- a/vllm/logits_process.py +++ b/vllm/logits_process.py @@ -1,6 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project - +from collections.abc import Sequence from typing import Callable, Union import torch @@ -55,7 +55,7 @@ def __init__(self, bad_words_ids: list[list[int]]): def __call__( self, - past_tokens_ids: Union[list[int], tuple[int]], + past_tokens_ids: Sequence[int], logits: torch.FloatTensor, ) -> torch.Tensor: if self.word_bias is None: From 6c340da4df81487e1fdbfd78eb87f55c9023b9b8 Mon Sep 17 00:00:00 2001 From: youkaichao Date: Thu, 25 Sep 2025 21:14:57 +0800 Subject: [PATCH 377/518] [misc] log info messages by default for hanging / busy / idle (#25627) Signed-off-by: youkaichao --- vllm/distributed/device_communicators/shm_broadcast.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/vllm/distributed/device_communicators/shm_broadcast.py b/vllm/distributed/device_communicators/shm_broadcast.py index deeed1f21b4e..499c6927f2f9 100644 --- a/vllm/distributed/device_communicators/shm_broadcast.py +++ b/vllm/distributed/device_communicators/shm_broadcast.py @@ -390,10 +390,11 @@ def acquire_write(self, timeout: Optional[float] = None): # if we wait for a long time, log a message if (time.monotonic() - start_time > VLLM_RINGBUFFER_WARNING_INTERVAL * n_warning): - logger.debug( + logger.info( ("No available shared memory broadcast block found" " in %s seconds. This typically happens when some" - " processes are hanging."), + " processes are hanging, doing some time-consuming" + " work (e.g. compilation), or sitting idle."), VLLM_RINGBUFFER_WARNING_INTERVAL, ) n_warning += 1 @@ -454,10 +455,11 @@ def acquire_read(self, # if we wait for a long time, log a message if (time.monotonic() - start_time > VLLM_RINGBUFFER_WARNING_INTERVAL * n_warning): - logger.debug( + logger.info( ("No available shared memory broadcast block found" " in %s seconds. This typically happens when some" - " processes are hanging."), + " processes are hanging, doing some time-consuming" + " work (e.g. compilation), or sitting idle."), VLLM_RINGBUFFER_WARNING_INTERVAL, ) n_warning += 1 From 69a8c8e99ab9e16028453efd5e7860063f002bf8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20M=2E=20K=C3=BCbler?= <44084297+jmkuebler@users.noreply.github.com> Date: Thu, 25 Sep 2025 15:25:12 +0200 Subject: [PATCH 378/518] [torch.compile] Make Query Quantization Fusable (#24914) Signed-off-by: Jonas Kuebler --- vllm/attention/backends/abstract.py | 8 ++++++++ vllm/attention/layer.py | 23 ++++++++++++++++++++++- vllm/v1/attention/backends/flash_attn.py | 9 ++------- 3 files changed, 32 insertions(+), 8 deletions(-) diff --git a/vllm/attention/backends/abstract.py b/vllm/attention/backends/abstract.py index 0f51ef4b2e51..629e42a8b902 100644 --- a/vllm/attention/backends/abstract.py +++ b/vllm/attention/backends/abstract.py @@ -31,6 +31,14 @@ class AttentionBackend(ABC): # makes sure the output tensor is allocated inside the cudagraph. accept_output_buffer: bool = False + # Whether this backend supports receiving pre-quantized query input. + # If True, the attention layer will handle query quantization instead + # of the backend, allowing torch.compile to fuse quantization with + # previous operations. + # Needs to be worked through for all backends + # https://github.com/vllm-project/vllm/issues/25584 + supports_quant_query_input: bool = False + @staticmethod @abstractmethod def get_name() -> str: diff --git a/vllm/attention/layer.py b/vllm/attention/layer.py index baa83e29bdd0..17281c89516d 100644 --- a/vllm/attention/layer.py +++ b/vllm/attention/layer.py @@ -22,7 +22,10 @@ from vllm.model_executor.layers.linear import UnquantizedLinearMethod from vllm.model_executor.layers.quantization.base_config import ( QuantizationConfig) +from vllm.model_executor.layers.quantization.input_quant_fp8 import QuantFP8 from vllm.model_executor.layers.quantization.kv_cache import BaseKVCacheMethod +from vllm.model_executor.layers.quantization.utils.quant_utils import ( + GroupShape) from vllm.model_executor.models.vision import get_vit_attn_backend from vllm.platforms import _Backend, current_platform from vllm.utils import GiB_bytes, direct_register_custom_op @@ -247,6 +250,13 @@ def __init__( "This may be caused by insufficient memory to allocate " "kv cache.") from e + # for attn backends supporting query quantization + self.query_quant = None + if self.kv_cache_dtype.startswith( + "fp8") and self.attn_backend.supports_quant_query_input: + self.query_quant = QuantFP8(static=True, + group_shape=GroupShape.PER_TENSOR) + def forward( self, query: torch.Tensor, @@ -270,11 +280,22 @@ def forward( attn_metadata = get_forward_context().attn_metadata if attn_metadata.enable_kv_scales_calculation: self.calc_kv_scales(query, key, value) + + output_dtype = query.dtype + if self.query_quant is not None: + # quantizing with a simple torch operation enables + # torch.compile to fuse this into previous ops + # which reduces overheads during decoding. + # Otherwise queries are quantized using custom ops + # which causes decoding overheads + assert self.kv_cache_dtype in {"fp8", "fp8_e4m3"} + query, _ = self.query_quant(query, self._q_scale) + if self.use_output: output_shape = (output_shape if output_shape is not None else query.shape) output = torch.zeros(output_shape, - dtype=query.dtype, + dtype=output_dtype, device=query.device) hidden_size = output_shape[-1] # We skip reshaping query, key and value tensors for the MLA diff --git a/vllm/v1/attention/backends/flash_attn.py b/vllm/v1/attention/backends/flash_attn.py index a2e18f970bec..7a50bb5d3134 100755 --- a/vllm/v1/attention/backends/flash_attn.py +++ b/vllm/v1/attention/backends/flash_attn.py @@ -7,7 +7,6 @@ import numpy as np import torch -from vllm import _custom_ops as ops from vllm import envs from vllm.attention.backends.abstract import (AttentionBackend, AttentionImpl, AttentionMetadata, AttentionType, @@ -38,6 +37,7 @@ class FlashAttentionBackend(AttentionBackend): accept_output_buffer: bool = True + supports_quant_query_input: bool = True @classmethod def get_supported_dtypes(cls) -> list[torch.dtype]: @@ -506,16 +506,11 @@ def forward( ) if self.kv_cache_dtype.startswith("fp8"): + # queries are quantized in the attention layer dtype = FlashAttentionBackend.get_fp8_dtype_for_flashattn( self.kv_cache_dtype) key_cache = key_cache.view(dtype) value_cache = value_cache.view(dtype) - num_tokens, num_heads, head_size = query.shape - query, _ = ops.scaled_fp8_quant( - query.reshape( - (num_tokens, num_heads * head_size)).contiguous(), - layer._q_scale) - query = query.reshape((num_tokens, num_heads, head_size)) if not attn_metadata.use_cascade: cu_seqlens_q = attn_metadata.query_start_loc From eb32335e355f194c1b9d27a570176d4cf7fb9abf Mon Sep 17 00:00:00 2001 From: "Li, Jiang" Date: Thu, 25 Sep 2025 21:29:11 +0800 Subject: [PATCH 379/518] [CPU] update torch 2.8 and fix missing fields in TorchSDPAMetadata (#25652) Signed-off-by: jiang1.li --- .../scripts/hardware_ci/run-cpu-test.sh | 7 +--- docker/Dockerfile.cpu | 3 -- requirements/cpu-build.txt | 4 +- requirements/cpu.txt | 4 +- vllm/v1/attention/backends/cpu_attn.py | 14 ++++++- vllm/v1/sample/ops/topk_topp_sampler.py | 41 +++++++++++++++++++ vllm/v1/worker/cpu_worker.py | 39 ------------------ 7 files changed, 59 insertions(+), 53 deletions(-) diff --git a/.buildkite/scripts/hardware_ci/run-cpu-test.sh b/.buildkite/scripts/hardware_ci/run-cpu-test.sh index 64943d2a15a7..7512cb1bbed0 100644 --- a/.buildkite/scripts/hardware_ci/run-cpu-test.sh +++ b/.buildkite/scripts/hardware_ci/run-cpu-test.sh @@ -58,11 +58,8 @@ function cpu_tests() { # pytest -x -v -s tests/kernels/attention/test_cache.py -m cpu_model # pytest -x -v -s tests/kernels/attention/test_mla_decode_cpu.py -m cpu_model - # Note: disable Bart until supports V1 - pytest -x -v -s tests/models/language/generation -m cpu_model \ - --ignore=tests/models/language/generation/test_bart.py - VLLM_CPU_SGL_KERNEL=1 pytest -x -v -s tests/models/language/generation -m cpu_model \ - --ignore=tests/models/language/generation/test_bart.py + pytest -x -v -s tests/models/language/generation -m cpu_model + VLLM_CPU_SGL_KERNEL=1 pytest -x -v -s tests/models/language/generation -m cpu_model pytest -x -v -s tests/models/language/pooling -m cpu_model pytest -x -v -s tests/models/multimodal/generation \ diff --git a/docker/Dockerfile.cpu b/docker/Dockerfile.cpu index 1a0981f8ea6d..b80157d2fd75 100644 --- a/docker/Dockerfile.cpu +++ b/docker/Dockerfile.cpu @@ -114,9 +114,6 @@ WORKDIR /workspace/vllm RUN --mount=type=bind,src=requirements/test.in,target=requirements/test.in \ cp requirements/test.in requirements/cpu-test.in && \ sed -i '/mamba_ssm/d' requirements/cpu-test.in && \ - sed -i 's/^torch==.*/torch==2.6.0/g' requirements/cpu-test.in && \ - sed -i 's/torchaudio.*/torchaudio/g' requirements/cpu-test.in && \ - sed -i 's/torchvision.*/torchvision/g' requirements/cpu-test.in && \ uv pip compile requirements/cpu-test.in -o requirements/cpu-test.txt --index-strategy unsafe-best-match --torch-backend cpu RUN --mount=type=cache,target=/root/.cache/uv \ diff --git a/requirements/cpu-build.txt b/requirements/cpu-build.txt index 118558942f76..b511b0f5d31b 100644 --- a/requirements/cpu-build.txt +++ b/requirements/cpu-build.txt @@ -1,12 +1,10 @@ -# Temporarily used for x86 CPU backend to avoid performance regression of torch>2.6.0+cpu, -# see https://github.com/pytorch/pytorch/pull/151218 cmake>=3.26.1 ninja packaging>=24.2 setuptools>=77.0.3,<80.0.0 setuptools-scm>=8 --extra-index-url https://download.pytorch.org/whl/cpu -torch==2.6.0+cpu; platform_machine == "x86_64" # torch>2.6.0+cpu has performance regression on x86 platform, see https://github.com/pytorch/pytorch/pull/151218 +torch==2.8.0+cpu; platform_machine == "x86_64" torch==2.8.0; platform_machine == "ppc64le" or platform_machine == "aarch64" or platform_system == "Darwin" wheel jinja2>=3.1.6 diff --git a/requirements/cpu.txt b/requirements/cpu.txt index a48cb9fde000..2db6d87ee67b 100644 --- a/requirements/cpu.txt +++ b/requirements/cpu.txt @@ -8,7 +8,7 @@ numba == 0.61.2; python_version > '3.9' and platform_machine != "s390x" packaging>=24.2 setuptools>=77.0.3,<80.0.0 --extra-index-url https://download.pytorch.org/whl/cpu -torch==2.6.0+cpu; platform_machine == "x86_64" # torch>2.6.0+cpu has performance regression on x86 platform, see https://github.com/pytorch/pytorch/pull/151218 +torch==2.8.0+cpu; platform_machine == "x86_64" torch==2.8.0; platform_system == "Darwin" torch==2.8.0; platform_machine == "ppc64le" or platform_machine == "aarch64" @@ -23,7 +23,7 @@ datasets # for benchmark scripts # Intel Extension for PyTorch, only for x86_64 CPUs intel-openmp==2024.2.1; platform_machine == "x86_64" -intel_extension_for_pytorch==2.6.0; platform_machine == "x86_64" # torch>2.6.0+cpu has performance regression on x86 platform, see https://github.com/pytorch/pytorch/pull/151218 +intel_extension_for_pytorch==2.8.0; platform_machine == "x86_64" triton==3.2.0; platform_machine == "x86_64" # Triton is required for torch 2.6+cpu, as it is imported in torch.compile. # Use this to gather CPU info and optimize based on ARM Neoverse cores diff --git a/vllm/v1/attention/backends/cpu_attn.py b/vllm/v1/attention/backends/cpu_attn.py index 72f26c23b60b..4bae13b4f77c 100644 --- a/vllm/v1/attention/backends/cpu_attn.py +++ b/vllm/v1/attention/backends/cpu_attn.py @@ -85,6 +85,19 @@ def use_cascade_attention(*args, **kwargs) -> bool: @dataclass class TorchSDPAMetadata(AttentionMetadata): + """Attention metadata for prefill and decode batched together.""" + # Total number of prefill requests. + num_prefills: int + # Number of prefill tokens. + num_prefill_tokens: int + # Number of decode tokens. Note that it is equivalent to the number of + # decode requests. + num_decode_tokens: int + # (num_tokens,). The indices of the token slots that input tokens will be + # stored into. E.g., if `slot_mapping` is [35, 2, 17] and the block size + # is 16, the three tokens are stored in the 3rd slot in block 2, 2nd slot + # in block 0, and 1st slot in block 1, respectively. + slot_mapping: torch.Tensor """Metadata for PagedAttention.""" # (batch_size,). The length of sequences (entire tokens seen so far) per # sequence. @@ -420,7 +433,6 @@ def build(self, num_prompt_req], # prefill query_start_loc=query_start_loc_cpu[:num_reqs + 1], # for logits index - enable_kv_scales_calculation=False, ) return attn_metadata diff --git a/vllm/v1/sample/ops/topk_topp_sampler.py b/vllm/v1/sample/ops/topk_topp_sampler.py index 747e52f2e589..d3c5019f1228 100644 --- a/vllm/v1/sample/ops/topk_topp_sampler.py +++ b/vllm/v1/sample/ops/topk_topp_sampler.py @@ -68,6 +68,8 @@ def __init__(self, logprobs_mode: LogprobsMode = "raw_logprobs") -> None: "native implementation of top-p & top-k sampling. For the " "best performance, please install FlashInfer.") self.forward = self.forward_native + elif current_platform.is_cpu(): + self.forward = self.forward_cpu else: self.forward = self.forward_native @@ -119,6 +121,45 @@ def forward_cuda( # because of slicing operation in logits_processor. return flashinfer_sample(logits.contiguous(), k, p, generators), None + def forward_cpu( + self, + logits: torch.Tensor, + generators: dict[int, torch.Generator], + k: Optional[torch.Tensor], + p: Optional[torch.Tensor], + ) -> tuple[torch.Tensor, Optional[torch.Tensor]]: + """ + PyTorch-native implementation of top-k and top-p sampling for CPU. + + The logits tensor may be updated in-place. + """ + logits = self.apply_top_k_top_p(logits, k, p) + logits_to_return = None + if self.logprobs_mode == "processed_logits": + logits_to_return = logits + elif self.logprobs_mode == "processed_logprobs": + logits_to_return = logits.log_softmax(dim=-1, dtype=torch.float32) + + # Note: this is a workaround for + # https://github.com/pytorch/pytorch/pull/151218 + @torch.compile(dynamic=True) + def compiled_random_sample(logits: torch.Tensor) -> torch.Tensor: + probs = logits.softmax(dim=-1, dtype=torch.float32) + q = torch.empty_like(probs) + q.exponential_() + return probs.div(q).argmax(dim=-1).view(-1) + + if len(generators) != logits.shape[0]: + return compiled_random_sample(logits), logits_to_return + else: + probs = logits.softmax(dim=-1, dtype=torch.float32) + q = torch.empty_like(probs) + q.exponential_() + for i, generator in generators.items(): + q[i].exponential_(generator=generator) + + return probs.div_(q).argmax(dim=-1).view(-1), logits_to_return + def apply_top_k_top_p( logits: torch.Tensor, diff --git a/vllm/v1/worker/cpu_worker.py b/vllm/v1/worker/cpu_worker.py index daee91ec404f..c6a686d6b75e 100644 --- a/vllm/v1/worker/cpu_worker.py +++ b/vllm/v1/worker/cpu_worker.py @@ -8,18 +8,13 @@ from vllm import envs from vllm.config import VllmConfig -from vllm.distributed.parallel_state import get_pp_group, get_tp_group from vllm.logger import init_logger from vllm.model_executor.utils import set_random_seed from vllm.platforms import CpuArchEnum, current_platform from vllm.platforms.cpu import CpuPlatform, LogicalCPUInfo -from vllm.sequence import IntermediateTensors -from vllm.v1.core.sched.output import SchedulerOutput -from vllm.v1.outputs import ModelRunnerOutput from vllm.v1.worker.cpu_model_runner import CPUModelRunner from vllm.v1.worker.gpu_worker import (Worker, init_worker_distributed_environment) -from vllm.v1.worker.utils import is_residual_scattered_for_sp logger = init_logger(__name__) @@ -102,40 +97,6 @@ def compile_or_warm_up_model(self) -> None: set_random_seed(self.model_config.seed) self.model_runner.warming_up_model() - @torch.inference_mode() - def execute_model( - self, - scheduler_output: "SchedulerOutput", - ) -> Optional[ModelRunnerOutput]: - intermediate_tensors = None - num_scheduled_tokens = scheduler_output.total_num_scheduled_tokens - num_input_tokens = self.model_runner._get_num_input_tokens( - num_scheduled_tokens) - all_gather_tensors = { - "residual": - not is_residual_scattered_for_sp(self.vllm_config, - num_input_tokens) - } - if not get_pp_group().is_first_rank: - intermediate_tensors = IntermediateTensors( - get_pp_group().recv_tensor_dict( - all_gather_group=get_tp_group(), - all_gather_tensors=all_gather_tensors)) - - output = self.model_runner.execute_model(scheduler_output, - intermediate_tensors) - - if not get_pp_group().is_last_rank: - assert isinstance(output, IntermediateTensors) - get_pp_group().send_tensor_dict( - output.tensors, - all_gather_group=get_tp_group(), - all_gather_tensors=all_gather_tensors) - return None - - assert isinstance(output, ModelRunnerOutput) - return output if self.is_driver_worker else None - def _get_autobind_cpu_ids( self, cpu_selector: Callable[[list[LogicalCPUInfo]], list[LogicalCPUInfo]] From 532a6cfccbaa1bc943512cca06b48d5d3500669d Mon Sep 17 00:00:00 2001 From: Russell Bryant Date: Thu, 25 Sep 2025 10:38:16 -0400 Subject: [PATCH 380/518] [ux] Switch a warning to debug about a pytorch fallback (#23750) Signed-off-by: Russell Bryant --- vllm/v1/sample/ops/topk_topp_sampler.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/vllm/v1/sample/ops/topk_topp_sampler.py b/vllm/v1/sample/ops/topk_topp_sampler.py index d3c5019f1228..5bcf1b585441 100644 --- a/vllm/v1/sample/ops/topk_topp_sampler.py +++ b/vllm/v1/sample/ops/topk_topp_sampler.py @@ -109,9 +109,9 @@ def forward_cuda( # CPU-GPU synchronization while `flashinfer_sample` does. if (k is None and p is None) or generators: if generators: - logger.warning_once("FlashInfer 0.2.3+ does not support " - "per-request generators. Falling back to " - "PyTorch-native implementation.") + logger.debug_once("FlashInfer 0.2.3+ does not support " + "per-request generators. Falling back to " + "PyTorch-native implementation.") return self.forward_native(logits, generators, k, p) assert self.logprobs_mode not in ( "processed_logits", "processed_logprobs" From 03858e6d1c852798ac7e2aa4e02f17c248e2bbed Mon Sep 17 00:00:00 2001 From: Isotr0py Date: Thu, 25 Sep 2025 22:46:04 +0800 Subject: [PATCH 381/518] [Bugfix] Fix InternS1 video processing after Transformers v4.56 (#25644) Signed-off-by: Isotr0py --- .buildkite/test-pipeline.yaml | 3 +- .../multimodal/processing/test_common.py | 1 + vllm/model_executor/models/interns1.py | 11 +++- vllm/transformers_utils/processor.py | 56 ++++++++++++++++++- 4 files changed, 68 insertions(+), 3 deletions(-) diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index 200ed344c4e8..7a1f38606062 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -770,8 +770,9 @@ steps: - pytest -v -s tests/models/multimodal/processing/ - pytest -v -s tests/models/multimodal/test_mapping.py - python3 examples/offline_inference/basic/chat.py - - python3 examples/offline_inference/audio_language.py --model-type whisper - python3 examples/offline_inference/vision_language.py --model-type qwen2_5_vl + # Whisper needs spawn method to avoid deadlock + - VLLM_WORKER_MULTIPROC_METHOD=spawn python3 examples/offline_inference/audio_language.py --model-type whisper - label: Blackwell Test # 38 min timeout_in_minutes: 60 diff --git a/tests/models/multimodal/processing/test_common.py b/tests/models/multimodal/processing/test_common.py index 4eb8e0cfaa5d..ddc675b0849c 100644 --- a/tests/models/multimodal/processing/test_common.py +++ b/tests/models/multimodal/processing/test_common.py @@ -213,6 +213,7 @@ def _test_processing_correctness( MM_DATA_PATCHES = { # GLM4.1V and Qwen3-VL requires video metadata to be included in the input "glm4v": glm4_1v_patch_mm_data, + "glm4v_moe": glm4_1v_patch_mm_data, "qwen3_vl": qwen3_vl_patch_mm_data, "qwen3_vl_moe": qwen3_vl_patch_mm_data, } diff --git a/vllm/model_executor/models/interns1.py b/vllm/model_executor/models/interns1.py index ba72c288b2b1..197d629b906f 100644 --- a/vllm/model_executor/models/interns1.py +++ b/vllm/model_executor/models/interns1.py @@ -16,6 +16,8 @@ from transformers.activations import ACT2FN from transformers.models.got_ocr2.image_processing_got_ocr2_fast import ( GotOcr2ImageProcessorFast) +from transformers.models.internvl.video_processing_internvl import ( + InternVLVideoProcessor) from vllm.config import VllmConfig from vllm.model_executor.layers.quantization import QuantizationConfig @@ -31,6 +33,8 @@ PromptUpdate, PromptUpdateDetails) from vllm.multimodal.profiling import BaseDummyInputsBuilder from vllm.sequence import IntermediateTensors +from vllm.transformers_utils.processor import ( + cached_video_processor_from_config) from vllm.utils.tensor_schema import TensorSchema, TensorShape from .interfaces import (MultiModalEmbeddings, SupportsLoRA, @@ -152,7 +156,12 @@ class InternS1ProcessingInfo(BaseProcessingInfo): """ProcessingInfo for InternS1-style models.""" def get_hf_processor(self, **kwargs: object) -> InternVLProcessor: - return self.ctx.get_hf_processor(InternVLProcessor, **kwargs) + hf_processor = self.ctx.get_hf_processor(InternVLProcessor, **kwargs) + hf_processor.video_processor = cached_video_processor_from_config( + self.ctx.model_config, + processor_cls=InternVLVideoProcessor, + **kwargs) + return hf_processor def get_supported_mm_limits(self) -> Mapping[str, Optional[int]]: return {"image": None, "video": None} diff --git a/vllm/transformers_utils/processor.py b/vllm/transformers_utils/processor.py index a630d940b257..51bcce6c10e2 100644 --- a/vllm/transformers_utils/processor.py +++ b/vllm/transformers_utils/processor.py @@ -5,10 +5,11 @@ from typing import TYPE_CHECKING, Any, Optional, Union, cast from transformers import (AutoFeatureExtractor, AutoImageProcessor, - AutoProcessor) + AutoProcessor, AutoVideoProcessor) from transformers.feature_extraction_utils import FeatureExtractionMixin from transformers.image_processing_utils import BaseImageProcessor from transformers.processing_utils import ProcessorMixin +from transformers.video_processing_utils import BaseVideoProcessor from typing_extensions import TypeVar from vllm.utils import get_allowed_kwarg_only_overrides @@ -17,6 +18,7 @@ from vllm.config import ModelConfig _P = TypeVar("_P", bound=ProcessorMixin, default=ProcessorMixin) +_V = TypeVar("_V", bound=BaseVideoProcessor, default=BaseVideoProcessor) class HashableDict(dict): @@ -243,3 +245,55 @@ def cached_image_processor_from_config( trust_remote_code=model_config.trust_remote_code, **_merge_mm_kwargs(model_config, AutoImageProcessor, **kwargs), ) + + +def get_video_processor( + processor_name: str, + *args: Any, + revision: Optional[str] = None, + trust_remote_code: bool = False, + processor_cls_overrides: Optional[type[_V]] = None, + **kwargs: Any, +): + """Load a video processor for the given model name via HuggingFace.""" + try: + processor_cls = processor_cls_overrides or AutoVideoProcessor + processor = processor_cls.from_pretrained( + processor_name, + *args, + revision=revision, + trust_remote_code=trust_remote_code, + **kwargs) + except ValueError as e: + # If the error pertains to the processor class not existing or not + # currently being imported, suggest using the --trust-remote-code flag. + # Unlike AutoTokenizer, AutoVideoProcessor does not separate such errors + if not trust_remote_code: + err_msg = ( + "Failed to load the video processor. If the video processor is " + "a custom processor not yet available in the HuggingFace " + "transformers library, consider setting " + "`trust_remote_code=True` in LLM or using the " + "`--trust-remote-code` flag in the CLI.") + raise RuntimeError(err_msg) from e + else: + raise e + + return cast(BaseVideoProcessor, processor) + + +cached_get_video_processor = lru_cache(get_video_processor) + + +def cached_video_processor_from_config( + model_config: "ModelConfig", + processor_cls: Optional[type[_V]] = None, + **kwargs: Any, +): + return cached_get_video_processor( + model_config.model, + revision=model_config.revision, + trust_remote_code=model_config.trust_remote_code, + processor_cls_overrides=processor_cls, # type: ignore[arg-type] + **_merge_mm_kwargs(model_config, AutoVideoProcessor, **kwargs), + ) From 0754ac4c492776f33d6092391ad2bf4a9dc32925 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicol=C3=B2=20Lucchesi?= Date: Thu, 25 Sep 2025 17:05:12 +0200 Subject: [PATCH 382/518] [Misc] Remove cruft file in repo (#25678) Signed-off-by: NickLucche --- tests/test_test.py | 61 ---------------------------------------------- 1 file changed, 61 deletions(-) delete mode 100644 tests/test_test.py diff --git a/tests/test_test.py b/tests/test_test.py deleted file mode 100644 index dc8c9814ede3..000000000000 --- a/tests/test_test.py +++ /dev/null @@ -1,61 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project - -import pytest - -from vllm import LLM, envs -from vllm.sampling_params import SamplingParams - -if not envs.VLLM_USE_V1: - pytest.skip( - "Skipping V1 tests. Rerun with `VLLM_USE_V1=1` to test.", - allow_module_level=True, - ) - - -@pytest.mark.parametrize("model_name", ["Qwen/Qwen2.5-1.5B-Instruct"]) -# TODO TPU will appear busy if we fan-out test params here -@pytest.mark.parametrize("n_prompts", [1]) -def test_logprobs(model_name: str, n_prompts: int): - """ - Request top logprobs with different sampling settings and check - that results contains the requested number, ordered ascendingly. - """ - - def check_num_logprobs(logprobs, expected_num: int): - for step in logprobs: - prev_logp = 1.0 - # order by rank - sorted_step = dict( - sorted(step.items(), key=lambda item: item[1].rank)) - - if len(step) != expected_num: - print("watch out", sorted_step) - - # check results are ordered by prob value - # assert len(step) == expected_num - for rankno, (tid, logp) in enumerate(sorted_step.items()): - assert logp.logprob <= prev_logp - prev_logp = logp.logprob - assert logp.rank == rankno + 1 - - llm = LLM(model_name, - enforce_eager=False, - max_num_seqs=1, - max_model_len=128, - max_num_batched_tokens=128) - prompts = [ - "Write a short story about a robot that dreams for the first time." - ] * n_prompts - greedy_sampling_params = SamplingParams(temperature=0.0, max_tokens=64,\ - logprobs=4) - regular_sampling_params = SamplingParams(temperature=0.4, max_tokens=64,\ - logprobs=4) - topkp_sampling_params = SamplingParams(temperature=0.4, max_tokens=64,\ - logprobs=4, top_k=12, top_p=0.5) - - for sp in [greedy_sampling_params, regular_sampling_params, \ - topkp_sampling_params]: - output = llm.generate(prompts, sp) - for o in output: - check_num_logprobs(o.outputs[0].logprobs, 4) From 2e5df88c92fd066d05b90a37a6ff5f802f7be814 Mon Sep 17 00:00:00 2001 From: Tyler Michael Smith Date: Thu, 25 Sep 2025 11:16:06 -0400 Subject: [PATCH 383/518] [Logging] Remove TORCH_NCCL_AVOID_RECORD_STREAMS to squash a warning (#25532) Signed-off-by: Tyler Michael Smith --- vllm/v1/worker/gpu_worker.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/vllm/v1/worker/gpu_worker.py b/vllm/v1/worker/gpu_worker.py index 9082bbfd8f8e..8c75e8914857 100644 --- a/vllm/v1/worker/gpu_worker.py +++ b/vllm/v1/worker/gpu_worker.py @@ -155,14 +155,6 @@ def initialize_cache(self, num_gpu_blocks: int, def init_device(self): if self.device_config.device.type == "cuda": - # torch.distributed.all_reduce does not free the input tensor until - # the synchronization point. This causes the memory usage to grow - # as the number of all_reduce calls increases. This env var disables - # this behavior. - # Related issue: - # https://discuss.pytorch.org/t/cuda-allocation-lifetime-for-inputs-to-distributed-all-reduce/191573 - os.environ["TORCH_NCCL_AVOID_RECORD_STREAMS"] = "1" - # This env var set by Ray causes exceptions with graph building. os.environ.pop("NCCL_ASYNC_ERROR_HANDLING", None) self.device = torch.device(f"cuda:{self.local_rank}") From e04a1b6b21e1c410abbd6fa87880d5aa0c4277c2 Mon Sep 17 00:00:00 2001 From: AlonKejzman Date: Thu, 25 Sep 2025 18:40:14 +0300 Subject: [PATCH 384/518] =?UTF-8?q?[BUGFIX]=20Fix=20crash=20in=20Eagle=20S?= =?UTF-8?q?peculative=20Decoding=20models=20when=20exceedin=E2=80=A6=20(#2?= =?UTF-8?q?4662)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: AlonKejzman --- vllm/v1/worker/gpu_model_runner.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index 17f8be86af2f..652da189b8dd 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -2310,7 +2310,20 @@ def propose_draft_token_ids(sampled_token_ids): use_padded_batch_for_eagle = self.speculative_config and \ self.speculative_config.use_eagle() and \ not self.speculative_config.disable_padded_drafter_batch - if use_padded_batch_for_eagle: + effective_drafter_max_model_len = self.max_model_len + if effective_drafter_max_model_len is None: + effective_drafter_max_model_len = self.model_config.max_model_len + if (self.speculative_config + and self.speculative_config.draft_model_config is not None + and self.speculative_config.draft_model_config.max_model_len + is not None): + effective_drafter_max_model_len = ( + self.speculative_config.draft_model_config.max_model_len) + input_fits_in_drafter = spec_decode_common_attn_metadata and ( + spec_decode_common_attn_metadata.seq_lens.max() + + self.speculative_config.num_speculative_tokens + <= effective_drafter_max_model_len) + if use_padded_batch_for_eagle and input_fits_in_drafter: # EAGLE speculative decoding can use the GPU sampled tokens # as inputs, and does not need to wait for bookkeeping to finish. propose_draft_token_ids(sampler_output.sampled_token_ids) @@ -2328,7 +2341,8 @@ def propose_draft_token_ids(sampled_token_ids): logits, hidden_states, num_scheduled_tokens) - if self.speculative_config and not use_padded_batch_for_eagle: + if (self.speculative_config and not use_padded_batch_for_eagle + and input_fits_in_drafter): # ngram and other speculative decoding methods use the sampled # tokens on the CPU, so they are run after bookkeeping. propose_draft_token_ids(valid_sampled_token_ids) From 916bd9204d4f895d7a6d93199d87a3fbe3e1546d Mon Sep 17 00:00:00 2001 From: Michael Goin Date: Thu, 25 Sep 2025 12:45:06 -0400 Subject: [PATCH 385/518] Revert "[Bug] Dynamo Unsupported due to `BasevLLMParameter.torch_function` calling disabled super()" (#25681) Signed-off-by: yewentao256 Co-authored-by: Wentao Ye <44945378+yewentao256@users.noreply.github.com> --- vllm/model_executor/parameter.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/vllm/model_executor/parameter.py b/vllm/model_executor/parameter.py index 9b9d89ebaed1..66add98dab44 100644 --- a/vllm/model_executor/parameter.py +++ b/vllm/model_executor/parameter.py @@ -12,7 +12,6 @@ from vllm.distributed import (get_tensor_model_parallel_rank, get_tensor_model_parallel_world_size) from vllm.logger import init_logger -from vllm.utils import is_torch_equal_or_newer __all__ = [ "BasevLLMParameter", "PackedvLLMParameter", "PerTensorScaleParameter", @@ -115,15 +114,6 @@ def _shard_id_as_int(self, shard_id: Union[str, int]) -> int: @classmethod def __torch_function__(cls, func, types, args=(), kwargs=None): - if not is_torch_equal_or_newer("2.8.0"): - logger.warning_once( - "Torch %s detected (<2.8.0): returning NotImplemented in " - "BasevLLMParameter.__torch_function__ to avoid potential " - "TorchDynamo issues.", - torch.__version__, - ) - return NotImplemented - if kwargs is None: kwargs = {} return super().__torch_function__(func, types, args, kwargs) From 13cc7f5370ed5d629028143311e95ac6646819cb Mon Sep 17 00:00:00 2001 From: Lucas Wilkinson Date: Thu, 25 Sep 2025 13:04:48 -0400 Subject: [PATCH 386/518] [BugFix] Fix DBO hang (#25625) Signed-off-by: Lucas Wilkinson --- vllm/v1/worker/gpu_ubatch_wrapper.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vllm/v1/worker/gpu_ubatch_wrapper.py b/vllm/v1/worker/gpu_ubatch_wrapper.py index 8f6044e59418..5e4c1d32ab6c 100644 --- a/vllm/v1/worker/gpu_ubatch_wrapper.py +++ b/vllm/v1/worker/gpu_ubatch_wrapper.py @@ -366,7 +366,8 @@ def __call__(self, *args, **kwargs): cudagraph_runtime_mode=CUDAGraphMode.NONE) with self.sm_control: return self._capture_ubatches(ubatch_metadata, self.model) - elif num_tokens in self.cudagraphs: + elif num_tokens in self.cudagraphs \ + and cudagraph_runtime_mode is CUDAGraphMode.FULL: cudagraph_metadata = self.cudagraphs[num_tokens] cudagraph_metadata.cudagraph.replay() return cudagraph_metadata.outputs From b8d9e4a326f5193d094b95628baaa9de9ba452a2 Mon Sep 17 00:00:00 2001 From: Tao Hui Date: Fri, 26 Sep 2025 01:12:50 +0800 Subject: [PATCH 387/518] [Model] Add optional parameter to reasoning parser constructor (#25554) Signed-off-by: taohui Signed-off-by: Tao Hui Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- vllm/reasoning/abs_reasoning_parsers.py | 2 +- vllm/reasoning/basic_parsers.py | 4 ++-- vllm/reasoning/glm4_moe_reasoning_parser.py | 4 ++-- vllm/reasoning/gptoss_reasoning_parser.py | 4 ++-- vllm/reasoning/granite_reasoning_parser.py | 4 ++-- vllm/reasoning/hunyuan_a13b_reasoning_parser.py | 4 ++-- vllm/reasoning/mistral_reasoning_parser.py | 4 ++-- vllm/reasoning/step3_reasoning_parser.py | 4 ++-- 8 files changed, 15 insertions(+), 15 deletions(-) diff --git a/vllm/reasoning/abs_reasoning_parsers.py b/vllm/reasoning/abs_reasoning_parsers.py index 39b08ec11107..320009c2611e 100644 --- a/vllm/reasoning/abs_reasoning_parsers.py +++ b/vllm/reasoning/abs_reasoning_parsers.py @@ -34,7 +34,7 @@ class ReasoningParser: It is used to extract reasoning content from the model output. """ - def __init__(self, tokenizer: AnyTokenizer): + def __init__(self, tokenizer: AnyTokenizer, *args, **kwargs): self.model_tokenizer = tokenizer @cached_property diff --git a/vllm/reasoning/basic_parsers.py b/vllm/reasoning/basic_parsers.py index 03cb882c2693..cea4b8601ae7 100644 --- a/vllm/reasoning/basic_parsers.py +++ b/vllm/reasoning/basic_parsers.py @@ -35,8 +35,8 @@ def end_token(self) -> str: """The token that ends reasoning content.""" raise NotImplementedError - def __init__(self, tokenizer: AnyTokenizer): - super().__init__(tokenizer) + def __init__(self, tokenizer: AnyTokenizer, *args, **kwargs): + super().__init__(tokenizer, *args, **kwargs) if not self.model_tokenizer: raise ValueError( diff --git a/vllm/reasoning/glm4_moe_reasoning_parser.py b/vllm/reasoning/glm4_moe_reasoning_parser.py index 460e38d2d396..11e828a7039f 100644 --- a/vllm/reasoning/glm4_moe_reasoning_parser.py +++ b/vllm/reasoning/glm4_moe_reasoning_parser.py @@ -26,8 +26,8 @@ class Glm4MoeModelReasoningParser(ReasoningParser): from the model's output. """ - def __init__(self, tokenizer: PreTrainedTokenizerBase): - super().__init__(tokenizer) + def __init__(self, tokenizer: PreTrainedTokenizerBase, *args, **kwargs): + super().__init__(tokenizer, *args, **kwargs) self.think_start_token = "" self.think_end_token = "" diff --git a/vllm/reasoning/gptoss_reasoning_parser.py b/vllm/reasoning/gptoss_reasoning_parser.py index 3bd4d872ce22..b0988d5d2618 100644 --- a/vllm/reasoning/gptoss_reasoning_parser.py +++ b/vllm/reasoning/gptoss_reasoning_parser.py @@ -24,8 +24,8 @@ class GptOssReasoningParser(ReasoningParser): is only used for detecting the end of the reasoning content. """ - def __init__(self, tokenizer: PreTrainedTokenizerBase): - super().__init__(tokenizer) + def __init__(self, tokenizer: PreTrainedTokenizerBase, *args, **kwargs): + super().__init__(tokenizer, *args, **kwargs) self.reasoning_end_token_ids = self.model_tokenizer.encode( "<|start|>assistant<|channel|>final<|message|>") diff --git a/vllm/reasoning/granite_reasoning_parser.py b/vllm/reasoning/granite_reasoning_parser.py index 212e14b09286..b76170f39f10 100644 --- a/vllm/reasoning/granite_reasoning_parser.py +++ b/vllm/reasoning/granite_reasoning_parser.py @@ -24,8 +24,8 @@ class GraniteReasoningParser(ReasoningParser): and "Here is my response:" to separate its thinking / response outputs. """ - def __init__(self, tokenizer: PreTrainedTokenizerBase): - super().__init__(tokenizer) + def __init__(self, tokenizer: PreTrainedTokenizerBase, *args, **kwargs): + super().__init__(tokenizer, *args, **kwargs) # NOTE: There have been some observed occurrences of quantized # instances of the current models using "Here's" instead of "Here is", diff --git a/vllm/reasoning/hunyuan_a13b_reasoning_parser.py b/vllm/reasoning/hunyuan_a13b_reasoning_parser.py index 9deec8a1e8fb..6e3b056d6b62 100644 --- a/vllm/reasoning/hunyuan_a13b_reasoning_parser.py +++ b/vllm/reasoning/hunyuan_a13b_reasoning_parser.py @@ -40,8 +40,8 @@ class HunyuanA13BReasoningParser(ReasoningParser): response ends: "\n": [524, 9399, 29] """ - def __init__(self, tokenizer: PreTrainedTokenizerBase): - super().__init__(tokenizer) + def __init__(self, tokenizer: PreTrainedTokenizerBase, *args, **kwargs): + super().__init__(tokenizer, *args, **kwargs) self.think_start_expr = r"\n" self.think_end_expr = r"\n\n" diff --git a/vllm/reasoning/mistral_reasoning_parser.py b/vllm/reasoning/mistral_reasoning_parser.py index 5cb54e6acbb3..ceda96ca6a6d 100644 --- a/vllm/reasoning/mistral_reasoning_parser.py +++ b/vllm/reasoning/mistral_reasoning_parser.py @@ -21,12 +21,12 @@ class MistralReasoningParser(DeepSeekR1ReasoningParser): text. This parser extracts the reasoning content from the model output. """ - def __init__(self, tokenizer: MistralTokenizer): + def __init__(self, tokenizer: MistralTokenizer, *args, **kwargs): if not isinstance(tokenizer, MistralTokenizer): raise ValueError( "The tokenizer must be an instance of MistralTokenizer.") - ReasoningParser.__init__(self, tokenizer) + ReasoningParser.__init__(self, tokenizer, *args, **kwargs) if not self.model_tokenizer: raise ValueError( diff --git a/vllm/reasoning/step3_reasoning_parser.py b/vllm/reasoning/step3_reasoning_parser.py index f642ea977c58..6e5deb52d345 100644 --- a/vllm/reasoning/step3_reasoning_parser.py +++ b/vllm/reasoning/step3_reasoning_parser.py @@ -24,8 +24,8 @@ class Step3ReasoningParser(ReasoningParser): text. This parser extracts all content before as reasoning content. """ - def __init__(self, tokenizer: PreTrainedTokenizerBase): - super().__init__(tokenizer) + def __init__(self, tokenizer: PreTrainedTokenizerBase, *args, **kwargs): + super().__init__(tokenizer, *args, **kwargs) self.think_end_token = "" self.reasoning_regex = re.compile(rf"(.*?){self.think_end_token}", From 0ea80c87d9764a27f1cbdec446e38470a3f8c15b Mon Sep 17 00:00:00 2001 From: Cyrus Leung Date: Fri, 26 Sep 2025 01:13:07 +0800 Subject: [PATCH 388/518] [Model] Define `merge_by_field_config` MM interface (#25676) Signed-off-by: DarkLight1337 --- .../processing/test_tensor_schema.py | 23 +++++++++++++++---- vllm/config/model.py | 5 ++-- vllm/model_executor/models/interfaces.py | 6 +++++ vllm/v1/worker/gpu_model_runner.py | 13 +++++++++-- vllm/v1/worker/tpu_model_runner.py | 9 ++++++-- 5 files changed, 44 insertions(+), 12 deletions(-) diff --git a/tests/models/multimodal/processing/test_tensor_schema.py b/tests/models/multimodal/processing/test_tensor_schema.py index d5d5bfaa3b45..9d132ecc34b7 100644 --- a/tests/models/multimodal/processing/test_tensor_schema.py +++ b/tests/models/multimodal/processing/test_tensor_schema.py @@ -19,6 +19,8 @@ init_distributed_environment, initialize_model_parallel) from vllm.model_executor.model_loader.utils import set_default_torch_dtype +from vllm.model_executor.models.interfaces import (SupportsMultiModal, + supports_multimodal) from vllm.multimodal import MULTIMODAL_REGISTRY, BatchedTensorInputs from vllm.multimodal.processing import (BaseMultiModalProcessor, InputProcessingContext) @@ -88,6 +90,7 @@ def resize_mm_data( def create_batched_mm_kwargs( + model_cls: type[SupportsMultiModal], model_config: ModelConfig, processor: BaseMultiModalProcessor, size_factors: tuple[float, ...] = (1.0, 0.5, 0.25), @@ -127,16 +130,22 @@ def create_batched_mm_kwargs( mm_data=resized_mm_data, hf_processor_mm_kwargs=processor_inputs.hf_processor_mm_kwargs, tokenization_kwargs=processor_inputs.tokenization_kwargs, - )["mm_kwargs"] + )["mm_kwargs"].require_data() items = [ item for modality in supported_mm_limits for item in mm_kwargs[modality] ] - return group_mm_kwargs_by_modality(items) + return group_mm_kwargs_by_modality( + items, + merge_by_field_config=model_cls.merge_by_field_config, + ) @contextmanager -def initialize_dummy_model(model_cls: nn.Module, model_config: ModelConfig): +def initialize_dummy_model( + model_cls: type[nn.Module], + model_config: ModelConfig, +): temp_file = tempfile.mkstemp()[1] init_distributed_environment( world_size=1, @@ -198,8 +207,12 @@ def test_model_tensor_schema(model_arch: str, model_id: str): hf_overrides=hf_overrides_fn, skip_tokenizer_init=model_info.skip_tokenizer_init, enforce_eager=model_info.enforce_eager, - dtype=model_info.dtype) + dtype=model_info.dtype, + ) + model_cls = MULTIMODAL_REGISTRY._get_model_cls(model_config) + assert supports_multimodal(model_cls) + factories = MULTIMODAL_REGISTRY._processor_factories[model_cls] inputs_parse_methods = [] @@ -228,7 +241,7 @@ def test_model_tensor_schema(model_arch: str, model_id: str): with initialize_dummy_model(model_cls, model_config) as model: for modality, _, mm_kwargs in create_batched_mm_kwargs( - model_config, processor): + model_cls, model_config, processor): for method_name in inputs_parse_methods: print(f"Testing `{method_name}` with modality={modality} " f"and mm_kwargs{list(mm_kwargs.keys())}") diff --git a/vllm/config/model.py b/vllm/config/model.py index 0ded70388b8a..302260e7e993 100644 --- a/vllm/config/model.py +++ b/vllm/config/model.py @@ -63,13 +63,12 @@ ConvertOption = Literal["auto", ConvertType] TaskOption = Literal["auto", "generate", "embedding", "embed", "classify", "score", "reward", "transcription", "draft"] -_ResolvedTask = Literal["generate", "transcription", "encode", "embed", - "classify", "reward", "draft"] TokenizerMode = Literal["auto", "slow", "mistral", "custom"] ModelDType = Literal["auto", "half", "float16", "bfloat16", "float", "float32"] LogprobsMode = Literal["raw_logits", "raw_logprobs", "processed_logits", "processed_logprobs"] -HfOverrides = Union[dict[str, Any], Callable[[type], type]] +HfOverrides = Union[dict[str, Any], Callable[[PretrainedConfig], + PretrainedConfig]] ModelImpl = Literal["auto", "vllm", "transformers", "terratorch"] _RUNNER_TASKS: dict[RunnerType, list[TaskOption]] = { diff --git a/vllm/model_executor/models/interfaces.py b/vllm/model_executor/models/interfaces.py index 6be70c4b3b21..e5cb5eb0bacb 100644 --- a/vllm/model_executor/models/interfaces.py +++ b/vllm/model_executor/models/interfaces.py @@ -64,6 +64,12 @@ class SupportsMultiModal(Protocol): `multimodal_config.mm_encoder_tp_mode="data"`. """ + merge_by_field_config: ClassVar[bool] = False + """ + A flag that indicates which implementation of + `vllm.multimodal.utils.group_mm_kwargs_by_modality` to use. + """ + @classmethod def get_placeholder_str(cls, modality: str, i: int) -> Optional[str]: """ diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index 652da189b8dd..f6f697d74a20 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -40,7 +40,8 @@ from vllm.model_executor.layers.mamba.abstract import MambaBase from vllm.model_executor.layers.rotary_embedding import MRotaryEmbedding from vllm.model_executor.model_loader import TensorizerLoader, get_model_loader -from vllm.model_executor.models.interfaces import (is_mixture_of_experts, +from vllm.model_executor.models.interfaces import (SupportsMultiModal, + is_mixture_of_experts, supports_eagle3, supports_mrope, supports_transcription) @@ -777,11 +778,13 @@ def _extract_mm_kwargs( mm_kwargs.append(feature.data) # Input all modalities at once + model = cast(SupportsMultiModal, self.model) mm_kwargs_combined: BatchedTensorInputs = {} for _, _, mm_kwargs_group in group_mm_kwargs_by_modality( mm_kwargs, device=self.device, pin_memory=self.pin_memory, + merge_by_field_config=model.merge_by_field_config, ): mm_kwargs_combined.update(mm_kwargs_group) @@ -1525,11 +1528,13 @@ def _execute_mm_encoder(self, scheduler_output: "SchedulerOutput"): # in the same batch while still being able to benefit from batching # multimodal inputs. The proper solution should be reordering the # encoder outputs. + model = cast(SupportsMultiModal, self.model) encoder_outputs = [] for _, num_items, mm_kwargs_group in group_mm_kwargs_by_modality( mm_kwargs, device=self.device, pin_memory=self.pin_memory, + merge_by_field_config=model.merge_by_field_config, ): # Run the encoder. # `curr_group_outputs` is either of the following: @@ -1538,7 +1543,7 @@ def _execute_mm_encoder(self, scheduler_output: "SchedulerOutput"): # 2. A list or tuple (length: num_items) of tensors, each of shape # (feature_size, hidden_size) in case the feature size is dynamic # depending on the input multimodal items. - curr_group_outputs = self.model.get_multimodal_embeddings( + curr_group_outputs = model.get_multimodal_embeddings( **mm_kwargs_group) sanity_check_mm_encoder_outputs( @@ -1623,11 +1628,13 @@ def _extract_encoder_inputs( return {} # Group MM kwargs by modality and extract features + model = cast(SupportsMultiModal, self.model) encoder_features = {} for _, _, mm_kwargs_group in group_mm_kwargs_by_modality( mm_kwargs, device=self.device, pin_memory=self.pin_memory, + merge_by_field_config=model.merge_by_field_config, ): # Add the grouped features to encoder_features dict # This allows the model to receive them as kwargs (e.g., @@ -2839,11 +2846,13 @@ def _get_mm_dummy_batch( dummy_mm_item = dummy_mm_data[modality][0] dummy_mm_items = [dummy_mm_item] * max_items_per_batch + model = cast(SupportsMultiModal, self.model) return next(mm_kwargs_group for _, _, mm_kwargs_group in group_mm_kwargs_by_modality( dummy_mm_items, device=self.device, pin_memory=self.pin_memory, + merge_by_field_config=model.merge_by_field_config, )) @torch.inference_mode() diff --git a/vllm/v1/worker/tpu_model_runner.py b/vllm/v1/worker/tpu_model_runner.py index 4a2adb1e6510..a330f50875a8 100644 --- a/vllm/v1/worker/tpu_model_runner.py +++ b/vllm/v1/worker/tpu_model_runner.py @@ -30,7 +30,8 @@ from vllm.lora.layers import BaseLayerWithLoRA from vllm.model_executor.model_loader import get_model_loader from vllm.model_executor.model_loader.tpu import TPUModelLoader -from vllm.model_executor.models.interfaces import supports_transcription +from vllm.model_executor.models.interfaces import (SupportsMultiModal, + supports_transcription) from vllm.model_executor.models.interfaces_base import ( is_pooling_model, is_text_generation_model) from vllm.multimodal import MULTIMODAL_REGISTRY @@ -834,11 +835,13 @@ def _execute_mm_encoder(self, scheduler_output: "SchedulerOutput"): # in the same batch while still being able to benefit from batching # multimodal inputs. The proper solution should be reordering the # encoder outputs. + model = cast(SupportsMultiModal, self.model) encoder_outputs = [] for _, num_items, mm_kwargs_group in group_mm_kwargs_by_modality( mm_kwargs, device=self.device, pin_memory=self.pin_memory, + merge_by_field_config=model.merge_by_field_config, ): # Run the encoder. # `curr_group_outputs` is either of the following: @@ -848,7 +851,7 @@ def _execute_mm_encoder(self, scheduler_output: "SchedulerOutput"): # (feature_size, hidden_size) in case the feature size is dynamic # depending on the input multimodal items. torch_xla.sync(wait=False) - curr_group_outputs = self.model.get_multimodal_embeddings( + curr_group_outputs = model.get_multimodal_embeddings( **mm_kwargs_group) torch_xla.sync(wait=False) @@ -1805,11 +1808,13 @@ def _get_mm_dummy_batch( dummy_mm_item = dummy_mm_data[modality][0] dummy_mm_items = [dummy_mm_item] * max_items_per_batch + model = cast(SupportsMultiModal, self.model) return next(grouped_mm_kwargs for _, _, grouped_mm_kwargs in group_mm_kwargs_by_modality( dummy_mm_items, device=self.device, pin_memory=self.pin_memory, + merge_by_field_config=model.merge_by_field_config, )) From 71b25b0d482e8c5e49e4b586bd36fd52cc9951dc Mon Sep 17 00:00:00 2001 From: Isotr0py Date: Fri, 26 Sep 2025 01:29:51 +0800 Subject: [PATCH 389/518] [V0 deprecation] Clean up V0 fallback in compilation config (#25675) Signed-off-by: Isotr0py --- vllm/config/__init__.py | 90 +++++++++----------------------------- vllm/config/compilation.py | 5 +-- 2 files changed, 22 insertions(+), 73 deletions(-) diff --git a/vllm/config/__init__.py b/vllm/config/__init__.py index bf2cb325a23d..958df4c66955 100644 --- a/vllm/config/__init__.py +++ b/vllm/config/__init__.py @@ -384,19 +384,7 @@ def __post_init__(self): else: self.compilation_config.cudagraph_mode = CUDAGraphMode.NONE - if self.cache_config.cpu_offload_gb > 0 and \ - self.compilation_config.level != CompilationLevel.NO_COMPILATION \ - and not envs.VLLM_USE_V1: - logger.warning( - "CPU offload is not supported with `torch.compile` in v0 yet." - " Disabling `torch.compile`.") - self.compilation_config.level = CompilationLevel.NO_COMPILATION - if self.cache_config.kv_sharing_fast_prefill: - if not envs.VLLM_USE_V1: - raise NotImplementedError( - "Fast prefill optimization for KV sharing is not supported " - "in V0 currently.") if self.speculative_config is not None and \ self.speculative_config.use_eagle(): @@ -410,14 +398,6 @@ def __post_init__(self): "--kv-sharing-fast-prefill requires changes on model side for " "correctness and to realize prefill savings. ") - if ((not envs.VLLM_USE_V1) and self.lora_config is not None - and self.compilation_config.level - != CompilationLevel.NO_COMPILATION): - logger.warning( - "LoRA for V0 is not supported with `torch.compile` yet. " - "Disabling `torch.compile`.") - self.compilation_config.level = CompilationLevel.NO_COMPILATION - disable_chunked_prefill_reasons: list[str] = [] if self.model_config: @@ -604,57 +584,27 @@ def _set_cudagraph_sizes(self): """ # calculate the default `batch_size_capture_list` - if not envs.VLLM_USE_V1: - batch_size_capture_list = [] - if self.scheduler_config is not None and \ - self.model_config is not None and \ - not self.model_config.enforce_eager: - - possible_sizes = [1, 2, 4] + [8 * i for i in range(1, 1025)] - if self.parallel_config.tensor_parallel_size > 1 and \ - self.compilation_config.pass_config.enable_sequence_parallelism: - possible_sizes = self.update_sizes_for_sequence_parallelism( - possible_sizes) - - # find the minimum size that is larger than max_num_seqs, - # which then becomes the max_batchsize_to_capture - larger_sizes = [ - x for x in possible_sizes - if x >= self.scheduler_config.max_num_seqs - ] - if larger_sizes: - max_batchsize_to_capture = larger_sizes[0] - else: - max_batchsize_to_capture = possible_sizes[-1] - - # filter out the sizes that are - # larger than max_batchsize_to_capture - batch_size_capture_list = [ - size for size in possible_sizes - if size <= max_batchsize_to_capture - ] - else: - batch_size_capture_list = [] - if self.model_config is not None and \ - not self.model_config.enforce_eager: - cuda_graph_sizes = self.scheduler_config.cuda_graph_sizes - if len(cuda_graph_sizes) == 1: - batch_size_capture_list = [1, 2, 4] + [ - i for i in range(8, cuda_graph_sizes[0] + 1, 8) - ] - elif len(cuda_graph_sizes) > 1: - batch_size_capture_list = sorted(cuda_graph_sizes) - else: - raise TypeError(f"Invalid value for {cuda_graph_sizes=}.") - if self.parallel_config.tensor_parallel_size > 1 and \ - self.compilation_config.pass_config.enable_sequence_parallelism: - batch_size_capture_list = \ - self.update_sizes_for_sequence_parallelism(batch_size_capture_list) - max_num_tokens = self.scheduler_config.max_num_batched_tokens - batch_size_capture_list = [ - size for size in batch_size_capture_list - if size <= max_num_tokens + batch_size_capture_list = [] + if self.model_config is not None and \ + not self.model_config.enforce_eager: + cuda_graph_sizes = self.scheduler_config.cuda_graph_sizes + if len(cuda_graph_sizes) == 1: + batch_size_capture_list = [1, 2, 4] + [ + i for i in range(8, cuda_graph_sizes[0] + 1, 8) ] + elif len(cuda_graph_sizes) > 1: + batch_size_capture_list = sorted(cuda_graph_sizes) + else: + raise TypeError(f"Invalid value for {cuda_graph_sizes=}.") + if self.parallel_config.tensor_parallel_size > 1 and \ + self.compilation_config.pass_config.enable_sequence_parallelism: + batch_size_capture_list = \ + self.update_sizes_for_sequence_parallelism(batch_size_capture_list) + max_num_tokens = self.scheduler_config.max_num_batched_tokens + batch_size_capture_list = [ + size for size in batch_size_capture_list + if size <= max_num_tokens + ] self.compilation_config.init_with_cudagraph_sizes( batch_size_capture_list) diff --git a/vllm/config/compilation.py b/vllm/config/compilation.py index 0441745e8b36..50fde9461a13 100644 --- a/vllm/config/compilation.py +++ b/vllm/config/compilation.py @@ -10,7 +10,6 @@ from pydantic import TypeAdapter, field_validator from pydantic.dataclasses import dataclass -import vllm.envs as envs from vllm.compilation.inductor_pass import CallableInductorPass, InductorPass from vllm.config.utils import config from vllm.logger import init_logger @@ -75,11 +74,11 @@ class PassConfig: don't all have access to full configuration - that would create a cycle as the `PassManager` is set as a property of config.""" - enable_fusion: bool = field(default_factory=lambda: not envs.VLLM_USE_V1) + enable_fusion: bool = False """Whether to enable the custom fusion (RMSNorm/SiluMul+quant) pass.""" enable_attn_fusion: bool = False """Whether to enable the custom attention+quant fusion pass.""" - enable_noop: bool = field(default_factory=lambda: not envs.VLLM_USE_V1) + enable_noop: bool = False """Whether to enable the custom no-op elimination pass.""" enable_sequence_parallelism: bool = False """Whether to enable sequence parallelism.""" From 3468f17ebe4daf19ff120d95eebde48f59d7cac2 Mon Sep 17 00:00:00 2001 From: Matthew Bonanni Date: Thu, 25 Sep 2025 13:37:50 -0400 Subject: [PATCH 390/518] [V0 deprecation] Remove _VLLM_V1 suffixes from attention backend names (#25489) Signed-off-by: Matthew Bonanni Signed-off-by: Matthew Bonanni --- .../scripts/hardware_ci/run-xpu-test.sh | 2 +- .../compile/piecewise/test_full_cudagraph.py | 2 +- tests/compile/test_fusion_attn.py | 2 +- tests/entrypoints/openai/test_serving_chat.py | 2 +- .../attention/test_attention_selector.py | 34 ++++++++++--------- .../attention/test_rocm_attention_selector.py | 14 +++----- tests/kernels/utils.py | 8 ++--- tests/models/test_initialization.py | 2 +- tests/utils.py | 8 ++--- tests/v1/attention/test_attention_backends.py | 15 ++++---- tests/v1/attention/test_mla_backends.py | 4 +-- tests/v1/attention/utils.py | 12 +++---- tests/v1/cudagraph/test_cudagraph_mode.py | 2 +- tests/v1/e2e/test_cascade_attention.py | 7 ++-- tests/v1/e2e/test_spec_decode.py | 7 ++-- tests/v1/spec_decode/test_eagle.py | 22 ++++++------ tests/v1/spec_decode/test_max_len.py | 7 ++-- tests/v1/spec_decode/test_tree_attention.py | 2 +- tests/v1/test_oracle.py | 23 ------------- vllm/attention/layer.py | 21 ++++-------- vllm/attention/selector.py | 8 +++++ .../kv_connector/v1/nixl_connector.py | 8 ++--- vllm/engine/arg_utils.py | 12 +++---- vllm/model_executor/warmup/kernel_warmup.py | 2 +- vllm/platforms/cuda.py | 21 ++++++------ vllm/platforms/interface.py | 12 ++----- vllm/platforms/rocm.py | 5 ++- vllm/platforms/tpu.py | 3 +- vllm/platforms/xpu.py | 12 +++---- vllm/v1/attention/backends/cpu_attn.py | 2 +- vllm/v1/attention/backends/flash_attn.py | 2 +- vllm/v1/attention/backends/flashinfer.py | 2 +- vllm/v1/attention/backends/mla/common.py | 2 +- vllm/v1/attention/backends/mla/flashmla.py | 2 +- .../attention/backends/mla/rocm_aiter_mla.py | 2 +- vllm/v1/attention/backends/mla/triton_mla.py | 2 +- vllm/v1/attention/backends/pallas.py | 2 +- vllm/v1/attention/backends/rocm_aiter_fa.py | 2 +- vllm/v1/attention/backends/rocm_attn.py | 2 +- vllm/v1/attention/backends/tree_attn.py | 2 +- vllm/v1/attention/backends/triton_attn.py | 2 +- vllm/v1/attention/backends/xformers.py | 2 +- 42 files changed, 131 insertions(+), 174 deletions(-) diff --git a/.buildkite/scripts/hardware_ci/run-xpu-test.sh b/.buildkite/scripts/hardware_ci/run-xpu-test.sh index 8c9b00990e99..1fc3dbd8c21f 100644 --- a/.buildkite/scripts/hardware_ci/run-xpu-test.sh +++ b/.buildkite/scripts/hardware_ci/run-xpu-test.sh @@ -35,7 +35,7 @@ docker run \ python3 examples/offline_inference/basic/generate.py --model facebook/opt-125m --block-size 64 -O3 -O.cudagraph_mode=NONE python3 examples/offline_inference/basic/generate.py --model facebook/opt-125m --block-size 64 --enforce-eager -tp 2 --distributed-executor-backend ray python3 examples/offline_inference/basic/generate.py --model facebook/opt-125m --block-size 64 --enforce-eager -tp 2 --distributed-executor-backend mp - VLLM_ATTENTION_BACKEND=TRITON_ATTN_VLLM_V1 python3 examples/offline_inference/basic/generate.py --model facebook/opt-125m --block-size 64 --enforce-eager + VLLM_ATTENTION_BACKEND=TRITON_ATTN python3 examples/offline_inference/basic/generate.py --model facebook/opt-125m --block-size 64 --enforce-eager cd tests pytest -v -s v1/core pytest -v -s v1/engine diff --git a/tests/compile/piecewise/test_full_cudagraph.py b/tests/compile/piecewise/test_full_cudagraph.py index 780a0d6b5c0e..b02c1b565671 100644 --- a/tests/compile/piecewise/test_full_cudagraph.py +++ b/tests/compile/piecewise/test_full_cudagraph.py @@ -103,7 +103,7 @@ class BackendConfig: # Triton Attention "TritonAttn": BackendConfig(name="TritonAttn", - env_vars={"VLLM_ATTENTION_BACKEND": "TRITON_ATTN_VLLM_V1"}, + env_vars={"VLLM_ATTENTION_BACKEND": "TRITON_ATTN"}, comp_config={ "cudagraph_mode": "FULL", }), diff --git a/tests/compile/test_fusion_attn.py b/tests/compile/test_fusion_attn.py index c4cac9553192..6c2679ccfc81 100644 --- a/tests/compile/test_fusion_attn.py +++ b/tests/compile/test_fusion_attn.py @@ -338,7 +338,7 @@ def forward(self, q: torch.Tensor, k: torch.Tensor, v: torch.Tensor): @pytest.mark.parametrize("model_name, model_class", MODELS) @pytest.mark.parametrize("backend", [_Backend.FLASHINFER] if current_platform.is_cuda() - else [_Backend.TRITON_ATTN_VLLM_V1]) + else [_Backend.TRITON_ATTN]) @pytest.mark.parametrize( "split_attention", [False, True] if current_platform.is_rocm() else [False]) diff --git a/tests/entrypoints/openai/test_serving_chat.py b/tests/entrypoints/openai/test_serving_chat.py index b773061b3092..bfed760822cd 100644 --- a/tests/entrypoints/openai/test_serving_chat.py +++ b/tests/entrypoints/openai/test_serving_chat.py @@ -68,7 +68,7 @@ def default_server_args(with_tool_parser: bool): def gptoss_server(monkeypatch_module: pytest.MonkeyPatch, default_server_args: list[str]): with monkeypatch_module.context() as m: - m.setenv("VLLM_ATTENTION_BACKEND", "TRITON_ATTN_VLLM_V1") + m.setenv("VLLM_ATTENTION_BACKEND", "TRITON_ATTN") with RemoteOpenAIServer(GPT_OSS_MODEL_NAME, default_server_args) as remote_server: yield remote_server diff --git a/tests/kernels/attention/test_attention_selector.py b/tests/kernels/attention/test_attention_selector.py index 730514eb5a56..0ff2517f7ba2 100644 --- a/tests/kernels/attention/test_attention_selector.py +++ b/tests/kernels/attention/test_attention_selector.py @@ -31,7 +31,7 @@ def clear_cache(): } DEVICE_REGULAR_ATTN_BACKENDS = { - "cuda": ["XFORMERS", "FLASHINFER"], + "cuda": ["XFORMERS", "FLASHINFER", "FLASH_ATTN"], "hip": ["ROCM_FLASH"], "cpu": ["TORCH_SDPA"], } @@ -86,7 +86,7 @@ def test_env( with patch("vllm.attention.selector.current_platform", CpuPlatform()): backend = get_attn_backend(16, torch.float16, None, block_size) - assert backend.get_name() == "TORCH_SDPA_VLLM_V1" + assert backend.get_name() == "TORCH_SDPA" elif device == "hip": with patch("vllm.attention.selector.current_platform", @@ -125,7 +125,7 @@ def test_env( None, block_size, use_mla=use_mla) - expected = f"{name}_VLLM_V1" + expected = name assert backend.get_name() == expected else: backend = get_attn_backend(16, @@ -133,7 +133,7 @@ def test_env( None, block_size, use_mla=use_mla) - expected = "TRITON_ATTN_VLLM_V1" + expected = "TRITON_ATTN" assert backend.get_name() == expected elif device == "cuda": @@ -160,7 +160,7 @@ def test_env( None, block_size, use_mla=use_mla) - expected = "CUTLASS_MLA_VLLM_V1" + expected = "CUTLASS_MLA" assert backend.get_name() == expected elif name == "FLASHINFER_MLA": if block_size not in [32, 64]: @@ -193,7 +193,7 @@ def test_env( None, block_size, use_mla=use_mla) - expected = f"{name}_VLLM_V1" + expected = name assert backend.get_name() == expected elif name == "FLASH_ATTN_MLA": backend = get_attn_backend(16, @@ -210,7 +210,7 @@ def test_env( None, block_size, use_mla=use_mla) - expected = "TRITON_MLA_VLLM_V1" + expected = "TRITON_MLA" assert backend.get_name() == expected elif name == "FLASHINFER": backend = get_attn_backend(16, @@ -218,25 +218,24 @@ def test_env( None, block_size, use_mla=use_mla) - expected = "FLASHINFER_VLLM_V1" + expected = "FLASHINFER" assert backend.get_name() == expected - else: + elif name == "XFORMERS": backend = get_attn_backend(32, torch.float16, None, block_size, use_mla=use_mla) - expected = "FLASH_ATTN_VLLM_V1" + expected = "XFORMERS" assert backend.get_name() == expected - - backend = get_attn_backend(16, + elif name == "FLASH_ATTN": + backend = get_attn_backend(32, torch.float16, None, block_size, use_mla=use_mla) - assert backend.get_name() == "FLEX_ATTENTION", ( - "Should fallback to FlexAttention if head size is " - "not supported by FlashAttention") + expected = "FLASH_ATTN" + assert backend.get_name() == expected @pytest.mark.parametrize("device", ["cpu", "cuda"]) @@ -252,7 +251,7 @@ def test_fp32_fallback( with patch("vllm.attention.selector.current_platform", CpuPlatform()): backend = get_attn_backend(16, torch.float32, None, 16) - assert backend.get_name() == "TORCH_SDPA_VLLM_V1" + assert backend.get_name() == "TORCH_SDPA" elif device == "cuda": with patch("vllm.attention.selector.current_platform", @@ -266,6 +265,9 @@ def test_flash_attn(monkeypatch: pytest.MonkeyPatch): # TODO: When testing for v1, pipe in `use_v1` as an argument to # get_attn_backend + pytest.skip("Skipping as current backend selector does not " \ + "handle fallbacks when a backend is set via env var.") + with monkeypatch.context() as m: m.setenv(STR_BACKEND_ENV_VAR, STR_FLASH_ATTN_VAL) diff --git a/tests/kernels/attention/test_rocm_attention_selector.py b/tests/kernels/attention/test_rocm_attention_selector.py index af301d9de435..a5b4bddaf475 100644 --- a/tests/kernels/attention/test_rocm_attention_selector.py +++ b/tests/kernels/attention/test_rocm_attention_selector.py @@ -28,7 +28,7 @@ def test_selector(monkeypatch: pytest.MonkeyPatch): # Test standard ROCm attention backend = get_attn_backend(16, torch.float16, torch.float16, 16, False) assert (backend.get_name() == "ROCM_FLASH" - or backend.get_name() == "TRITON_ATTN_VLLM_V1") + or backend.get_name() == "TRITON_ATTN") # MLA test for deepseek related @@ -40,8 +40,7 @@ def test_selector(monkeypatch: pytest.MonkeyPatch): 16, False, use_mla=True) - assert (backend.get_name() == "TRITON_MLA" - or backend.get_name() == "TRITON_MLA_VLLM_V1") + assert backend.get_name() == "TRITON_MLA" # If attention backend is None # If use_mla is true @@ -53,8 +52,7 @@ def test_selector(monkeypatch: pytest.MonkeyPatch): 16, False, use_mla=True) - assert (backend.get_name() == "TRITON_MLA" - or backend.get_name() == "TRITON_MLA_VLLM_V1") + assert backend.get_name() == "TRITON_MLA" # change the attention backend to AITER MLA m.setenv(STR_BACKEND_ENV_VAR, "ROCM_AITER_MLA") @@ -64,8 +62,7 @@ def test_selector(monkeypatch: pytest.MonkeyPatch): 1, False, use_mla=True) - assert (backend.get_name() == "ROCM_AITER_MLA" - or backend.get_name() == "ROCM_AITER_MLA_VLLM_V1") + assert backend.get_name() == "ROCM_AITER_MLA" # If attention backend is None # If use_mla is true @@ -79,5 +76,4 @@ def test_selector(monkeypatch: pytest.MonkeyPatch): 1, False, use_mla=True) - assert (backend.get_name() == "ROCM_AITER_MLA" - or backend.get_name() == "ROCM_AITER_MLA_VLLM_V1") + assert backend.get_name() == "ROCM_AITER_MLA" diff --git a/tests/kernels/utils.py b/tests/kernels/utils.py index 39ea07309134..0fdaa600aefa 100644 --- a/tests/kernels/utils.py +++ b/tests/kernels/utils.py @@ -524,14 +524,14 @@ def make_backend(backend_name: str) -> AttentionBackend: * Backend instance ''' - if backend_name in (STR_XFORMERS_ATTN_VAL, "XFORMERS_VLLM_V1"): + if backend_name == STR_XFORMERS_ATTN_VAL: from vllm.v1.attention.backends.xformers import ( XFormersAttentionBackend) return XFormersAttentionBackend() - if backend_name in (STR_FLASH_ATTN_VAL, "FLASH_ATTN_VLLM_V1"): + if backend_name == STR_FLASH_ATTN_VAL: from vllm.v1.attention.backends.flash_attn import FlashAttentionBackend return FlashAttentionBackend() - if backend_name == "TRITON_ATTN_VLLM_V1": + if backend_name == "TRITON_ATTN": from vllm.v1.attention.backends.triton_attn import ( TritonAttentionBackend) return TritonAttentionBackend() @@ -539,7 +539,7 @@ def make_backend(backend_name: str) -> AttentionBackend: from vllm.v1.attention.backends.flex_attention import ( FlexAttentionBackend) return FlexAttentionBackend() - if backend_name in ("TORCH_SDPA", "TORCH_SDPA_VLLM_V1"): + if backend_name == "TORCH_SDPA": from vllm.v1.attention.backends.cpu_attn import TorchSDPABackend return TorchSDPABackend() if backend_name == "FLASHINFER": diff --git a/tests/models/test_initialization.py b/tests/models/test_initialization.py index bfde6e20a3b1..42d69367042d 100644 --- a/tests/models/test_initialization.py +++ b/tests/models/test_initialization.py @@ -84,7 +84,7 @@ def _initialize_kv_caches_v1(self, vllm_config): # FIXME: A hack to bypass FA3 assertion because our CI's L4 GPU # has cc==8.9 which hasn't supported FA3 yet. Remove this hack when # L4 supports FA3. - m.setenv("VLLM_ATTENTION_BACKEND", "TRITON_ATTN_VLLM_V1") + m.setenv("VLLM_ATTENTION_BACKEND", "TRITON_ATTN") if model_arch == "WhisperForConditionalGeneration": m.setenv("VLLM_WORKER_MULTIPROC_METHOD", "spawn") LLM( diff --git a/tests/utils.py b/tests/utils.py index 9a27c3de4533..f630c57f46d8 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -1131,14 +1131,14 @@ def has_module_attribute(module_name, attribute_name): def get_attn_backend_list_based_on_platform() -> list[str]: if current_platform.is_cuda(): - return ["FLASH_ATTN_VLLM_V1", "TRITON_ATTN_VLLM_V1", "TREE_ATTN"] + return ["FLASH_ATTN", "TRITON_ATTN", "TREE_ATTN"] elif current_platform.is_rocm(): - attn_backend_list = ["TRITON_ATTN_VLLM_V1"] + attn_backend_list = ["TRITON_ATTN"] try: import aiter # noqa: F401 - attn_backend_list.append("FLASH_ATTN_VLLM_V1") + attn_backend_list.append("FLASH_ATTN") except Exception: - print("Skip FLASH_ATTN_VLLM_V1 on ROCm as aiter is not installed") + print("Skip FLASH_ATTN on ROCm as aiter is not installed") return attn_backend_list else: diff --git a/tests/v1/attention/test_attention_backends.py b/tests/v1/attention/test_attention_backends.py index 8a4fc15791b0..6c17be759ab6 100644 --- a/tests/v1/attention/test_attention_backends.py +++ b/tests/v1/attention/test_attention_backends.py @@ -21,16 +21,15 @@ from vllm.v1.kv_cache_interface import FullAttentionSpec BACKENDS_TO_TEST = [ - _Backend.FLASH_ATTN_VLLM_V1, _Backend.FLASHINFER_VLLM_V1, - _Backend.FLEX_ATTENTION, _Backend.TRITON_ATTN_VLLM_V1, _Backend.TREE_ATTN, - "FLEX_ATTENTION_SLOW" + _Backend.FLASH_ATTN, _Backend.FLASHINFER, _Backend.FLEX_ATTENTION, + _Backend.TRITON_ATTN, _Backend.TREE_ATTN, "FLEX_ATTENTION_SLOW" ] # Remove flashinfer from the list if it's not available try: import flashinfer # noqa: F401 except ImportError: - BACKENDS_TO_TEST.remove(_Backend.FLASHINFER_VLLM_V1) + BACKENDS_TO_TEST.remove(_Backend.FLASHINFER) def _convert_dtype_to_torch(dtype): @@ -214,7 +213,7 @@ def run_attention_backend( builder_cls, impl_cls = get_attention_backend(actual_backend) # Mock flashinfer's get_per_layer_parameters if needed - if actual_backend == _Backend.FLASHINFER_VLLM_V1: + if actual_backend == _Backend.FLASHINFER: import unittest.mock from vllm.v1.attention.backends.utils import PerLayerParameters @@ -434,7 +433,7 @@ def _test_backend_correctness( # [num_blocks, 2, block_size, num_kv_heads, head_size] # Select the appropriate KV cache format for each backend kv_cache_for_backend = kv_cache - if backend_name == _Backend.FLASHINFER_VLLM_V1: + if backend_name == _Backend.FLASHINFER: kv_cache_for_backend = kv_cache.transpose(0, 1) # For FlashInfer default to HND layout and @@ -518,8 +517,8 @@ def causal_mask_mod( SLIDING_WINDOW_BACKENDS_TO_TEST = [ - _Backend.FLASH_ATTN_VLLM_V1, _Backend.FLEX_ATTENTION, - _Backend.TRITON_ATTN_VLLM_V1, "FLEX_ATTENTION_SLOW" + _Backend.FLASH_ATTN, _Backend.FLEX_ATTENTION, _Backend.TRITON_ATTN, + "FLEX_ATTENTION_SLOW" ] diff --git a/tests/v1/attention/test_mla_backends.py b/tests/v1/attention/test_mla_backends.py index a62993950aff..d4829c64b5c6 100644 --- a/tests/v1/attention/test_mla_backends.py +++ b/tests/v1/attention/test_mla_backends.py @@ -15,8 +15,8 @@ from vllm.v1.kv_cache_interface import FullAttentionSpec BACKENDS_TO_TEST = [ - _Backend.CUTLASS_MLA, _Backend.FLASHMLA_VLLM_V1, _Backend.FLASH_ATTN_MLA, - _Backend.TRITON_MLA_VLLM_V1 + _Backend.CUTLASS_MLA, _Backend.FLASHMLA, _Backend.FLASH_ATTN_MLA, + _Backend.TRITON_MLA ] # Remove CUTLASS_MLA from the list if not using sm100 diff --git a/tests/v1/attention/utils.py b/tests/v1/attention/utils.py index f07c6eb0ea4d..6f8c5ea50ef0 100644 --- a/tests/v1/attention/utils.py +++ b/tests/v1/attention/utils.py @@ -120,30 +120,30 @@ def get_attention_backend(backend_name: _Backend): Tuple of (backend_builder_class, backend_impl_class) """ backend_map = { - _Backend.FLASH_ATTN_VLLM_V1: + _Backend.FLASH_ATTN: ("vllm.v1.attention.backends.flash_attn.FlashAttentionBackend" if current_platform.is_cuda() else "vllm.v1.attention.backends.rocm_aiter_fa.AiterFlashAttentionBackend" ), - _Backend.FLASHINFER_VLLM_V1: + _Backend.FLASHINFER: "vllm.v1.attention.backends.flashinfer.FlashInferBackend", _Backend.FLEX_ATTENTION: "vllm.v1.attention.backends.flex_attention.FlexAttentionBackend", - _Backend.TRITON_ATTN_VLLM_V1: + _Backend.TRITON_ATTN: "vllm.v1.attention.backends.triton_attn.TritonAttentionBackend", _Backend.TREE_ATTN: "vllm.v1.attention.backends.tree_attn.TreeAttentionBackend", - _Backend.XFORMERS_VLLM_V1: + _Backend.XFORMERS: "vllm.v1.attention.backends.xformers.XFormersAttentionBackend", _Backend.CUTLASS_MLA: "vllm.v1.attention.backends.mla.cutlass_mla.CutlassMLABackend", - _Backend.FLASHMLA_VLLM_V1: + _Backend.FLASHMLA: "vllm.v1.attention.backends.mla.flashmla.FlashMLABackend", _Backend.FLASH_ATTN_MLA: "vllm.v1.attention.backends.mla.flashattn_mla.FlashAttnMLABackend", _Backend.FLASHINFER_MLA: "vllm.v1.attention.backends.mla.flashinfer_mla.FlashInferMLABackend", - _Backend.TRITON_MLA_VLLM_V1: + _Backend.TRITON_MLA: "vllm.v1.attention.backends.mla.triton_mla.TritonMLABackend", } diff --git a/tests/v1/cudagraph/test_cudagraph_mode.py b/tests/v1/cudagraph/test_cudagraph_mode.py index 1ae9185fafbd..41a9493cbe58 100644 --- a/tests/v1/cudagraph/test_cudagraph_mode.py +++ b/tests/v1/cudagraph/test_cudagraph_mode.py @@ -89,7 +89,7 @@ class BackendConfig: # Triton Attention "TritonAttn": BackendConfig(name="TritonAttn", - env_vars={"VLLM_ATTENTION_BACKEND": "TRITON_ATTN_VLLM_V1"}, + env_vars={"VLLM_ATTENTION_BACKEND": "TRITON_ATTN"}, comp_config={ "cudagraph_mode": "FULL_AND_PIECEWISE", }), diff --git a/tests/v1/e2e/test_cascade_attention.py b/tests/v1/e2e/test_cascade_attention.py index f2f460513605..5022347a87a4 100644 --- a/tests/v1/e2e/test_cascade_attention.py +++ b/tests/v1/e2e/test_cascade_attention.py @@ -9,11 +9,14 @@ @create_new_process_for_each_test() -@pytest.mark.parametrize("attn_backend", - ["FLASH_ATTN_VLLM_V1", "FLASHINFER_VLLM_V1"]) +@pytest.mark.parametrize("attn_backend", ["FLASH_ATTN", "FLASHINFER"]) def test_cascade_attention(example_system_message, monkeypatch, attn_backend): prompt = "\n: Implement fibonacci sequence in Python.\n:" + if attn_backend == "FLASHINFER": + pytest.skip("This test is failing with FlashInfer backend and " + "needs investigation. See issue #25679.") + with monkeypatch.context() as m: m.setenv("VLLM_USE_V1", "1") m.setenv("VLLM_ATTENTION_BACKEND", attn_backend) diff --git a/tests/v1/e2e/test_spec_decode.py b/tests/v1/e2e/test_spec_decode.py index bf90f50b1082..66115f14c182 100644 --- a/tests/v1/e2e/test_spec_decode.py +++ b/tests/v1/e2e/test_spec_decode.py @@ -176,12 +176,11 @@ def test_eagle_correctness( m.setenv("VLLM_MLA_DISABLE", "1") m.setenv("VLLM_ATTENTION_BACKEND", attn_backend) - if (attn_backend == "TRITON_ATTN_VLLM_V1" - and not current_platform.is_rocm()): - pytest.skip("TRITON_ATTN_VLLM_V1 does not support " + if (attn_backend == "TRITON_ATTN" and not current_platform.is_rocm()): + pytest.skip("TRITON_ATTN does not support " "multi-token eagle spec decode on current platform") - if attn_backend == "FLASH_ATTN_VLLM_V1" and current_platform.is_rocm(): + if attn_backend == "FLASH_ATTN" and current_platform.is_rocm(): m.setenv("VLLM_ROCM_USE_AITER", "1") method, model_name, spec_model_name, tp_size = model_setup diff --git a/tests/v1/spec_decode/test_eagle.py b/tests/v1/spec_decode/test_eagle.py index 0b28365ed599..690732eb1232 100644 --- a/tests/v1/spec_decode/test_eagle.py +++ b/tests/v1/spec_decode/test_eagle.py @@ -314,12 +314,11 @@ def test_load_model(mock_get_model, mock_get_layers, mock_get_pp_group, method, monkeypatch.setenv("VLLM_ATTENTION_BACKEND", attn_backend) - if (attn_backend == "TRITON_ATTN_VLLM_V1" - and not current_platform.is_rocm()): - pytest.skip("TRITON_ATTN_VLLM_V1 does not support " + if (attn_backend == "TRITON_ATTN" and not current_platform.is_rocm()): + pytest.skip("TRITON_ATTN does not support " "multi-token eagle spec decode on current platform") - if attn_backend == "FLASH_ATTN_VLLM_V1" and current_platform.is_rocm(): + if attn_backend == "FLASH_ATTN" and current_platform.is_rocm(): monkeypatch.setenv("VLLM_ROCM_USE_AITER", "1") # Setup draft model mock @@ -400,16 +399,15 @@ def test_propose(method, attn_backend, num_speculative_tokens, monkeypatch): monkeypatch.setenv("VLLM_ATTENTION_BACKEND", attn_backend) - if (attn_backend == "TRITON_ATTN_VLLM_V1" - and not current_platform.is_rocm()): - pytest.skip("TRITON_ATTN_VLLM_V1 does not support " + if (attn_backend == "TRITON_ATTN" and not current_platform.is_rocm()): + pytest.skip("TRITON_ATTN does not support " "multi-token eagle spec decode on current platform") if (attn_backend == "TREE_ATTN"): pytest.skip("TREE_ATTN is tested separately in test_propose_tree" "because it requires special input mocking.") - if attn_backend == "FLASH_ATTN_VLLM_V1" and current_platform.is_rocm(): + if attn_backend == "FLASH_ATTN" and current_platform.is_rocm(): monkeypatch.setenv("VLLM_ROCM_USE_AITER", "1") # Use GPU device @@ -510,12 +508,12 @@ def create_deterministic_logits(token_ids): device=device) sampling_metadata = mock.MagicMock() - if attn_backend == "FLASH_ATTN_VLLM_V1": + if attn_backend == "FLASH_ATTN": attn_metadata_builder_cls, _ = get_attention_backend( - _Backend.FLASH_ATTN_VLLM_V1) - elif attn_backend == "TRITON_ATTN_VLLM_V1": + _Backend.FLASH_ATTN) + elif attn_backend == "TRITON_ATTN": attn_metadata_builder_cls, _ = get_attention_backend( - _Backend.TRITON_ATTN_VLLM_V1) + _Backend.TRITON_ATTN) elif attn_backend == "TREE_ATTN": attn_metadata_builder_cls, _ = get_attention_backend( _Backend.TREE_ATTN) diff --git a/tests/v1/spec_decode/test_max_len.py b/tests/v1/spec_decode/test_max_len.py index a5b10bb51866..f93593f2d482 100644 --- a/tests/v1/spec_decode/test_max_len.py +++ b/tests/v1/spec_decode/test_max_len.py @@ -41,12 +41,11 @@ def test_eagle_max_len(monkeypatch: pytest.MonkeyPatch, m.setenv("VLLM_USE_V1", "1") m.setenv("VLLM_ATTENTION_BACKEND", attn_backend) - if (attn_backend == "TRITON_ATTN_VLLM_V1" - and not current_platform.is_rocm()): - pytest.skip("TRITON_ATTN_VLLM_V1 does not support " + if (attn_backend == "TRITON_ATTN" and not current_platform.is_rocm()): + pytest.skip("TRITON_ATTN does not support " "multi-token eagle spec decode on current platform") - if attn_backend == "FLASH_ATTN_VLLM_V1" and current_platform.is_rocm(): + if attn_backend == "FLASH_ATTN" and current_platform.is_rocm(): m.setenv("VLLM_ROCM_USE_AITER", "1") llm = LLM( diff --git a/tests/v1/spec_decode/test_tree_attention.py b/tests/v1/spec_decode/test_tree_attention.py index eacb2ad584ba..51a737496dff 100644 --- a/tests/v1/spec_decode/test_tree_attention.py +++ b/tests/v1/spec_decode/test_tree_attention.py @@ -278,7 +278,7 @@ def test_tree_attn_correctness() -> None: block_table=block_table, slot_mapping=branch_slot_mapping, seqlen_k=sequence_position + q_len, - backend=_Backend.FLASH_ATTN_VLLM_V1, + backend=_Backend.FLASH_ATTN, ).view(batch_size, -1, num_heads, dim_per_head) # Compare the outputs. diff --git a/tests/v1/test_oracle.py b/tests/v1/test_oracle.py index f6b8a18dd7c2..74aa20a2f7f9 100644 --- a/tests/v1/test_oracle.py +++ b/tests/v1/test_oracle.py @@ -54,26 +54,3 @@ def test_v1_llm_by_default(monkeypatch): print(llm.generate("Hello my name is")) assert hasattr(llm.llm_engine, "engine_core") m.delenv("VLLM_USE_V1") - - -def test_v1_attn_backend(monkeypatch): - with monkeypatch.context() as m: - if os.getenv("VLLM_USE_V1", None): - m.delenv("VLLM_USE_V1") - m.setenv("VLLM_ATTENTION_BACKEND", "XFORMERS") - - # Fall back to V0. - _ = AsyncEngineArgs(model=MODEL).create_engine_config() - assert not envs.VLLM_USE_V1 - m.delenv("VLLM_USE_V1") - - # Reject if V1. - m.setenv("VLLM_USE_V1", "1") - with pytest.raises(NotImplementedError): - AsyncEngineArgs(model=MODEL).create_engine_config() - m.delenv("VLLM_USE_V1") - - m.setenv("VLLM_ATTENTION_BACKEND", "FLASHMLA") - _ = AsyncEngineArgs(model=MODEL).create_engine_config() - assert envs.VLLM_USE_V1 - m.delenv("VLLM_USE_V1") diff --git a/vllm/attention/layer.py b/vllm/attention/layer.py index 17281c89516d..326fe6dd048a 100644 --- a/vllm/attention/layer.py +++ b/vllm/attention/layer.py @@ -364,7 +364,7 @@ def process_weights_after_loading(self, act_dtype: torch.dtype): self.impl.process_weights_after_loading(act_dtype) # FlashInfer requires attention sinks to be float32 - if (self.backend == _Backend.FLASHINFER_VLLM_V1 + if (self.backend == _Backend.FLASHINFER and hasattr(self.impl, 'sinks')): from vllm.v1.attention.backends.flashinfer import FlashInferImpl assert isinstance(self.impl, FlashInferImpl) @@ -420,21 +420,17 @@ def __init__( self.attn_backend = backend if backend in { _Backend.TORCH_SDPA, - _Backend.TORCH_SDPA_VLLM_V1, _Backend.XFORMERS, - _Backend.PALLAS_VLLM_V1, + _Backend.PALLAS, _Backend.ROCM_AITER_FA, _Backend.FLASH_ATTN, - _Backend.FLASH_ATTN_VLLM_V1, } else _Backend.TORCH_SDPA if (self.attn_backend == _Backend.XFORMERS and not check_xformers_availability()): self.attn_backend = _Backend.TORCH_SDPA - if self.attn_backend in { - _Backend.FLASH_ATTN, _Backend.FLASH_ATTN_VLLM_V1 - }: + if self.attn_backend == _Backend.FLASH_ATTN: if use_upstream_fa: from flash_attn import flash_attn_varlen_func self._flash_attn_varlen_func = flash_attn_varlen_func @@ -468,11 +464,7 @@ def forward( key = torch.repeat_interleave(key, num_repeat, dim=2) value = torch.repeat_interleave(value, num_repeat, dim=2) - if self.attn_backend in { - _Backend.FLASH_ATTN, - _Backend.FLASH_ATTN_VLLM_V1, - }: - + if self.attn_backend == _Backend.FLASH_ATTN: cu_seqlens_q = torch.arange(0, (bsz + 1) * q_len, step=q_len, dtype=torch.int32, @@ -499,8 +491,7 @@ def forward( key, value, scale=self.scale) - elif (self.attn_backend == _Backend.TORCH_SDPA - or self.attn_backend == _Backend.TORCH_SDPA_VLLM_V1): + elif self.attn_backend == _Backend.TORCH_SDPA: query, key, value = (x.transpose(1, 2) for x in (query, key, value)) out = F.scaled_dot_product_attention(query, @@ -508,7 +499,7 @@ def forward( value, scale=self.scale) out = out.transpose(1, 2) - elif self.attn_backend == _Backend.PALLAS_VLLM_V1: + elif self.attn_backend == _Backend.PALLAS: query, key, value = (x.transpose(1, 2) for x in (query, key, value)) from torch_xla.experimental.custom_kernel import flash_attention diff --git a/vllm/attention/selector.py b/vllm/attention/selector.py index b651fc3eaee3..bd83473db6f3 100644 --- a/vllm/attention/selector.py +++ b/vllm/attention/selector.py @@ -186,6 +186,14 @@ def _cached_get_attn_backend( # Check the environment variable and override if specified backend_by_env_var: Optional[str] = envs.VLLM_ATTENTION_BACKEND if backend_by_env_var is not None: + if backend_by_env_var.endswith("_VLLM_V1"): + logger.warning( + "The suffix '_VLLM_V1' in the environment variable " + "%s is no longer necessary as V0 backends have been " + "deprecated. Please remove this suffix from your " + "environment variable setting.", STR_BACKEND_ENV_VAR) + backend_by_env_var = backend_by_env_var.removesuffix( + "_VLLM_V1") selected_backend = backend_name_to_enum(backend_by_env_var) if selected_backend is None: raise ValueError( diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py b/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py index 528d4022bd17..c205501e6c98 100644 --- a/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py +++ b/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py @@ -577,8 +577,8 @@ def __init__(self, vllm_config: VllmConfig, engine_id: str): use_mla=self.use_mla) self.backend_name = backend.get_name() attn_backend = backend_name_to_enum(self.backend_name) - self._use_flashinfer = attn_backend == _Backend.FLASHINFER_VLLM_V1 - self._use_pallas_v1 = attn_backend == _Backend.PALLAS_VLLM_V1 + self._use_flashinfer = attn_backend == _Backend.FLASHINFER + self._use_pallas = attn_backend == _Backend.PALLAS self.kv_cache_layout = get_kv_cache_layout() logger.debug("Detected attention backend %s", self.backend_name) logger.debug("Detected kv cache layout %s", self.kv_cache_layout) @@ -749,7 +749,7 @@ def register_kv_caches(self, kv_caches: dict[str, torch.Tensor]): # (roughly 8KB vs 5KB). # Conversely for FlashInfer, K and V are registered in the same region # to better exploit the memory layout (ie num_blocks is the first dim). - split_k_and_v = not (self.use_mla or self._use_pallas_v1 + split_k_and_v = not (self.use_mla or self._use_pallas or self._use_flashinfer) tensor_size_bytes = None for layer_name, cache_or_caches in xfer_buffers.items(): @@ -938,7 +938,7 @@ def add_remote_agent(self, tp_ratio = divide(self._tp_size[self.engine_id], self._tp_size[engine_id]) assert tp_ratio > 0, "Decode TP cannot be smaller than prefill TP" - assert not self._use_pallas_v1 or tp_ratio == 1, \ + assert not self._use_pallas or tp_ratio == 1, \ "TPU (pallas_v1) DOES NOT support heterogeneous TP yet." # Handle tp_size>num_kv_heads: replicate KV cache. diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index 3d48d2a0b22d..c894477d34b5 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -1479,25 +1479,21 @@ def _is_v1_supported_oracle(self, model_config: ModelConfig) -> bool: "such as ngram, medusa, eagle, or deepseek_mtp.") V1_BACKENDS = [ - "FLASH_ATTN_VLLM_V1", "FLASH_ATTN", "PALLAS", - "PALLAS_VLLM_V1", - "TRITON_ATTN_VLLM_V1", + "TRITON_ATTN", "TRITON_MLA", "CUTLASS_MLA", "FLASHMLA", - "FLASHMLA_VLLM_V1", "FLASH_ATTN_MLA", "FLASHINFER", - "FLASHINFER_VLLM_V1", "FLASHINFER_MLA", "ROCM_AITER_MLA", - "TORCH_SDPA_VLLM_V1", + "TORCH_SDPA", "FLEX_ATTENTION", "TREE_ATTN", - "XFORMERS_VLLM_V1", - "ROCM_ATTN_VLLM_V1", + "XFORMERS", + "ROCM_ATTN", ] if (envs.is_set("VLLM_ATTENTION_BACKEND") and envs.VLLM_ATTENTION_BACKEND not in V1_BACKENDS): diff --git a/vllm/model_executor/warmup/kernel_warmup.py b/vllm/model_executor/warmup/kernel_warmup.py index 89ce20308f44..3f99340c2906 100644 --- a/vllm/model_executor/warmup/kernel_warmup.py +++ b/vllm/model_executor/warmup/kernel_warmup.py @@ -42,7 +42,7 @@ def kernel_warmup(worker: "Worker"): # and is not a pooling model def _is_flashinfer_backend(backend): try: - return backend.get_name() == "FLASHINFER_VLLM_V1" + return backend.get_name() == "FLASHINFER" except NotImplementedError: return False diff --git a/vllm/platforms/cuda.py b/vllm/platforms/cuda.py index 4aa4ca057f45..58ba08101bc9 100644 --- a/vllm/platforms/cuda.py +++ b/vllm/platforms/cuda.py @@ -241,9 +241,8 @@ def get_attn_backend_cls(cls, selected_backend, head_size, dtype, use_flashinfermla = selected_backend == _Backend.FLASHINFER_MLA or ( selected_backend is None and cls.is_device_capability(100) and block_size in [32, 64]) - use_flashmla = selected_backend in [ - _Backend.FLASHMLA, _Backend.FLASHMLA_VLLM_V1 - ] or (selected_backend is None and is_flashmla_supported()[0]) + use_flashmla = selected_backend == _Backend.FLASHMLA or ( + selected_backend is None and is_flashmla_supported()[0]) use_flashattn = selected_backend == _Backend.FLASH_ATTN_MLA or ( selected_backend is None and flash_attn_supports_mla()) use_triton = selected_backend == _Backend.TRITON_MLA or ( @@ -282,7 +281,7 @@ def get_attn_backend_cls(cls, selected_backend, head_size, dtype, if use_v1: FLASHINFER_V1 = "vllm.v1.attention.backends.flashinfer.FlashInferBackend" # noqa: E501 FLEX_ATTENTION_V1 = "vllm.v1.attention.backends.flex_attention.FlexAttentionBackend" # noqa: E501 - TRITON_ATTN_VLLM_V1 = "vllm.v1.attention.backends.triton_attn.TritonAttentionBackend" # noqa: E501 + TRITON_ATTN = "vllm.v1.attention.backends.triton_attn.TritonAttentionBackend" # noqa: E501 FLASH_ATTN_V1 = "vllm.v1.attention.backends.flash_attn.FlashAttentionBackend" # noqa: E501 TREE_ATTN_V1 = "vllm.v1.attention.backends.tree_attn.TreeAttentionBackend" # noqa: E501 XFORMERS_V1 = "vllm.v1.attention.backends.xformers.XFormersAttentionBackend" # noqa: E501 @@ -300,16 +299,16 @@ def get_attn_backend_cls(cls, selected_backend, head_size, dtype, elif selected_backend == _Backend.FLEX_ATTENTION: logger.info_once("Using FlexAttention backend on V1 engine.") return FLEX_ATTENTION_V1 - elif selected_backend == _Backend.TRITON_ATTN_VLLM_V1: + elif selected_backend == _Backend.TRITON_ATTN: logger.info_once("Using Triton backend on V1 engine.") - return TRITON_ATTN_VLLM_V1 + return TRITON_ATTN elif selected_backend == _Backend.FLASH_ATTN: logger.info_once("Using Flash Attention backend on V1 engine.") return FLASH_ATTN_V1 elif selected_backend == _Backend.TREE_ATTN: logger.info_once("Using Tree Attention backend on V1 engine.") return TREE_ATTN_V1 - elif selected_backend == _Backend.XFORMERS_VLLM_V1: + elif selected_backend == _Backend.XFORMERS: logger.info_once("Using XFormers backend on V1 engine.") return XFORMERS_V1 @@ -341,7 +340,7 @@ def get_attn_backend_cls(cls, selected_backend, head_size, dtype, if (has_sink or use_fp8_kv_cache) and not cls.is_device_capability(90): logger.info_once("Using Triton backend on V1 engine.") - return TRITON_ATTN_VLLM_V1 + return TRITON_ATTN elif is_default_backend_supported := is_attn_backend_supported( FLASH_ATTN_V1, head_size, dtype, allow_import_error=False): @@ -457,12 +456,12 @@ def is_kv_cache_dtype_supported(cls, kv_cache_dtype: str, else: # Default to FlashAttention if attention_backend is None: - attention_backend = "FLASH_ATTN_VLLM_V1" + attention_backend = "FLASH_ATTN" # All Blackwell backends support fp8 if cls.is_device_capability(100): supported = True - elif attention_backend == "FLASH_ATTN_VLLM_V1": + elif attention_backend == "FLASH_ATTN": if fp8_attention: from vllm.attention.utils.fa_utils import ( flash_attn_supports_fp8) @@ -471,7 +470,7 @@ def is_kv_cache_dtype_supported(cls, kv_cache_dtype: str, supported = True elif attention_backend == "FLASHINFER": supported = True - elif attention_backend == "TRITON_ATTN_VLLM_V1": + elif attention_backend == "TRITON_ATTN": supported = cls.supports_fp8() return supported diff --git a/vllm/platforms/interface.py b/vllm/platforms/interface.py index 73b97dafcd6e..de23a665d2ea 100644 --- a/vllm/platforms/interface.py +++ b/vllm/platforms/interface.py @@ -40,34 +40,26 @@ def in_wsl() -> bool: class _Backend(enum.Enum): FLASH_ATTN = enum.auto() - FLASH_ATTN_VLLM_V1 = enum.auto() - TRITON_ATTN_VLLM_V1 = enum.auto() + TRITON_ATTN = enum.auto() XFORMERS = enum.auto() ROCM_FLASH = enum.auto() ROCM_AITER_MLA = enum.auto() # Supported by V1 - ROCM_AITER_MLA_VLLM_V1 = enum.auto() ROCM_AITER_FA = enum.auto() # used for ViT attn backend TORCH_SDPA = enum.auto() - TORCH_SDPA_VLLM_V1 = enum.auto() FLASHINFER = enum.auto() - FLASHINFER_VLLM_V1 = enum.auto() FLASHINFER_MLA = enum.auto() TRITON_MLA = enum.auto() # Supported by V1 - TRITON_MLA_VLLM_V1 = enum.auto() CUTLASS_MLA = enum.auto() FLASHMLA = enum.auto() # Supported by V1 - FLASHMLA_VLLM_V1 = enum.auto() FLASH_ATTN_MLA = enum.auto() # Supported by V1 PALLAS = enum.auto() - PALLAS_VLLM_V1 = enum.auto() IPEX = enum.auto() DUAL_CHUNK_FLASH_ATTN = enum.auto() DIFFERENTIAL_FLASH_ATTN = enum.auto() NO_ATTENTION = enum.auto() FLEX_ATTENTION = enum.auto() TREE_ATTN = enum.auto() - XFORMERS_VLLM_V1 = enum.auto() - ROCM_ATTN_VLLM_V1 = enum.auto() + ROCM_ATTN = enum.auto() class PlatformEnum(enum.Enum): diff --git a/vllm/platforms/rocm.py b/vllm/platforms/rocm.py index d44d6930c177..1dacd026b667 100644 --- a/vllm/platforms/rocm.py +++ b/vllm/platforms/rocm.py @@ -218,8 +218,7 @@ def get_attn_backend_cls(cls, selected_backend, head_size, dtype, raise ValueError( f" The selected backend, {selected_backend.name}," f"does not support block size {block_size}.") - if selected_backend in (_Backend.ROCM_AITER_MLA, - _Backend.ROCM_AITER_MLA_VLLM_V1): + if selected_backend == _Backend.ROCM_AITER_MLA: if block_size == 1: logger.info("Using AITER MLA backend on V1 engine.") return "vllm.v1.attention.backends.mla.rocm_aiter_mla.AiterMLABackend" # noqa: E501 @@ -240,7 +239,7 @@ def get_attn_backend_cls(cls, selected_backend, head_size, dtype, elif (envs.VLLM_ROCM_USE_AITER and envs.VLLM_USE_AITER_UNIFIED_ATTENTION) or \ envs.VLLM_V1_USE_PREFILL_DECODE_ATTENTION or \ - selected_backend == _Backend.ROCM_ATTN_VLLM_V1: + selected_backend == _Backend.ROCM_ATTN: # rocm specific backend, with aiter and/or # triton prefix-prefill logger.info("Using Rocm/Aiter Attention backend on V1 engine.") diff --git a/vllm/platforms/tpu.py b/vllm/platforms/tpu.py index e4c73b1bae6f..c2ba37224d61 100644 --- a/vllm/platforms/tpu.py +++ b/vllm/platforms/tpu.py @@ -50,8 +50,7 @@ def get_attn_backend_cls(cls, selected_backend: _Backend, head_size: int, dtype: torch.dtype, kv_cache_dtype: Optional[str], block_size: int, use_v1: bool, use_mla: bool, has_sink) -> str: - if (selected_backend != _Backend.PALLAS - and selected_backend != _Backend.PALLAS_VLLM_V1): + if selected_backend != _Backend.PALLAS: logger.info("Cannot use %s backend on TPU.", selected_backend) if not use_v1: diff --git a/vllm/platforms/xpu.py b/vllm/platforms/xpu.py index af61db5e312a..cf408cc5df04 100644 --- a/vllm/platforms/xpu.py +++ b/vllm/platforms/xpu.py @@ -40,14 +40,14 @@ def get_attn_backend_cls(cls, selected_backend: _Backend, head_size: int, use_v1 = envs.VLLM_USE_V1 if not use_v1: raise ValueError("XPU backend only supports V1.") - TRITON_ATTN_VLLM_V1 = "vllm.v1.attention.backends.triton_attn.TritonAttentionBackend" # noqa: E501 - FLASH_ATTN_V1 = "vllm.v1.attention.backends.flash_attn.FlashAttentionBackend" # noqa: E501 - if selected_backend == _Backend.TRITON_ATTN_VLLM_V1: + TRITON_ATTN = "vllm.v1.attention.backends.triton_attn.TritonAttentionBackend" # noqa: E501 + FLASH_ATTN = "vllm.v1.attention.backends.flash_attn.FlashAttentionBackend" # noqa: E501 + if selected_backend == _Backend.TRITON_ATTN: logger.info_once("Using Triton backend on V1 engine.") - return TRITON_ATTN_VLLM_V1 + return TRITON_ATTN elif selected_backend == _Backend.FLASH_ATTN: logger.info_once("Using Flash Attention backend on V1 engine.") - return FLASH_ATTN_V1 + return FLASH_ATTN elif selected_backend: raise ValueError( f"Invalid attention backend for {cls.device_name}, " @@ -64,7 +64,7 @@ def is_kv_cache_dtype_supported(cls, kv_cache_dtype: str, XPU only support fp8 kv cache with triton backend. """ if envs.is_set("VLLM_ATTENTION_BACKEND") and \ - envs.VLLM_ATTENTION_BACKEND == "TRITON_ATTN_VLLM_V1": + envs.VLLM_ATTENTION_BACKEND == "TRITON_ATTN": return kv_cache_dtype in ["fp8_e4m3", "fp8_e5m2", "fp8"] return False diff --git a/vllm/v1/attention/backends/cpu_attn.py b/vllm/v1/attention/backends/cpu_attn.py index 4bae13b4f77c..ab09ab9f8e0e 100644 --- a/vllm/v1/attention/backends/cpu_attn.py +++ b/vllm/v1/attention/backends/cpu_attn.py @@ -54,7 +54,7 @@ def validate_head_size(cls, head_size: int) -> None: @staticmethod def get_name() -> str: - return "TORCH_SDPA_VLLM_V1" + return "TORCH_SDPA" @staticmethod def get_impl_cls() -> type["TorchSDPABackendImpl"]: diff --git a/vllm/v1/attention/backends/flash_attn.py b/vllm/v1/attention/backends/flash_attn.py index 7a50bb5d3134..f284847dd9e9 100755 --- a/vllm/v1/attention/backends/flash_attn.py +++ b/vllm/v1/attention/backends/flash_attn.py @@ -60,7 +60,7 @@ def validate_head_size(cls, head_size: int) -> None: @staticmethod def get_name() -> str: - return "FLASH_ATTN_VLLM_V1" + return "FLASH_ATTN" @staticmethod def get_impl_cls() -> type["FlashAttentionImpl"]: diff --git a/vllm/v1/attention/backends/flashinfer.py b/vllm/v1/attention/backends/flashinfer.py index 891108f961b5..a4bf3635bbca 100755 --- a/vllm/v1/attention/backends/flashinfer.py +++ b/vllm/v1/attention/backends/flashinfer.py @@ -167,7 +167,7 @@ def validate_head_size(cls, head_size: int) -> None: @staticmethod def get_name() -> str: - return "FLASHINFER_VLLM_V1" + return "FLASHINFER" @staticmethod def get_impl_cls() -> type[FlashInferImpl]: diff --git a/vllm/v1/attention/backends/mla/common.py b/vllm/v1/attention/backends/mla/common.py index 3e8dba14ee2e..1053fde09910 100755 --- a/vllm/v1/attention/backends/mla/common.py +++ b/vllm/v1/attention/backends/mla/common.py @@ -270,7 +270,7 @@ class MLACommonBackend(AttentionBackend): @staticmethod def get_name() -> str: - return "TRITON_MLA_VLLM_V1" + return "TRITON_MLA" @staticmethod def get_metadata_cls() -> type["AttentionMetadata"]: diff --git a/vllm/v1/attention/backends/mla/flashmla.py b/vllm/v1/attention/backends/mla/flashmla.py index 150e38553e4b..ac0524ba088b 100644 --- a/vllm/v1/attention/backends/mla/flashmla.py +++ b/vllm/v1/attention/backends/mla/flashmla.py @@ -27,7 +27,7 @@ class FlashMLABackend(MLACommonBackend): @staticmethod def get_name() -> str: - return "FLASHMLA_VLLM_V1" + return "FLASHMLA" @staticmethod def get_metadata_cls() -> type["FlashMLAMetadata"]: diff --git a/vllm/v1/attention/backends/mla/rocm_aiter_mla.py b/vllm/v1/attention/backends/mla/rocm_aiter_mla.py index db27a34d8959..79247e569b1c 100644 --- a/vllm/v1/attention/backends/mla/rocm_aiter_mla.py +++ b/vllm/v1/attention/backends/mla/rocm_aiter_mla.py @@ -33,7 +33,7 @@ class AiterMLABackend(MLACommonBackend): @staticmethod def get_name() -> str: - return "ROCM_AITER_MLA_VLLM_V1" + return "ROCM_AITER_MLA" @staticmethod def get_impl_cls() -> type["AiterMLAImpl"]: diff --git a/vllm/v1/attention/backends/mla/triton_mla.py b/vllm/v1/attention/backends/mla/triton_mla.py index dd272fa01925..076152061d50 100644 --- a/vllm/v1/attention/backends/mla/triton_mla.py +++ b/vllm/v1/attention/backends/mla/triton_mla.py @@ -24,7 +24,7 @@ class TritonMLABackend(MLACommonBackend): @staticmethod def get_name() -> str: - return "TRITON_MLA_VLLM_V1" + return "TRITON_MLA" @staticmethod def get_impl_cls() -> type["TritonMLAImpl"]: diff --git a/vllm/v1/attention/backends/pallas.py b/vllm/v1/attention/backends/pallas.py index 4ae0634e082a..4cb4b85956bc 100644 --- a/vllm/v1/attention/backends/pallas.py +++ b/vllm/v1/attention/backends/pallas.py @@ -86,7 +86,7 @@ class PallasAttentionBackend(AttentionBackend): @staticmethod def get_name() -> str: - return "PALLAS_VLLM_V1" + return "PALLAS" @staticmethod def get_impl_cls() -> type["PallasAttentionBackendImpl"]: diff --git a/vllm/v1/attention/backends/rocm_aiter_fa.py b/vllm/v1/attention/backends/rocm_aiter_fa.py index afb2283c44d3..96f8e92a2039 100644 --- a/vllm/v1/attention/backends/rocm_aiter_fa.py +++ b/vllm/v1/attention/backends/rocm_aiter_fa.py @@ -340,7 +340,7 @@ def validate_head_size(cls, head_size: int) -> None: @staticmethod def get_name() -> str: - return "FLASH_ATTN_VLLM_V1" + return "FLASH_ATTN" @staticmethod def get_impl_cls() -> type["AiterFlashAttentionImpl"]: diff --git a/vllm/v1/attention/backends/rocm_attn.py b/vllm/v1/attention/backends/rocm_attn.py index 365df5f0d6ec..e973be16d779 100644 --- a/vllm/v1/attention/backends/rocm_attn.py +++ b/vllm/v1/attention/backends/rocm_attn.py @@ -159,7 +159,7 @@ def validate_head_size(cls, head_size: int) -> None: @staticmethod def get_name() -> str: - return "ROCM_ATTN_VLLM_V1" + return "ROCM_ATTN" @staticmethod def get_impl_cls() -> type["RocmAttentionImpl"]: diff --git a/vllm/v1/attention/backends/tree_attn.py b/vllm/v1/attention/backends/tree_attn.py index 10238f36455d..1d4ab4c96728 100644 --- a/vllm/v1/attention/backends/tree_attn.py +++ b/vllm/v1/attention/backends/tree_attn.py @@ -52,7 +52,7 @@ def validate_head_size(cls, head_size: int) -> None: @staticmethod def get_name() -> str: - return "TREE_ATTN_VLLM_V1" + return "TREE_ATTN" @staticmethod def get_impl_cls() -> type["TreeAttentionImpl"]: diff --git a/vllm/v1/attention/backends/triton_attn.py b/vllm/v1/attention/backends/triton_attn.py index f9fbd05efc67..fc5ecf6ed3b6 100644 --- a/vllm/v1/attention/backends/triton_attn.py +++ b/vllm/v1/attention/backends/triton_attn.py @@ -155,7 +155,7 @@ def validate_head_size(cls, head_size: int) -> None: @staticmethod def get_name() -> str: - return "TRITON_ATTN_VLLM_V1" + return "TRITON_ATTN" @staticmethod def get_impl_cls() -> type["TritonAttentionImpl"]: diff --git a/vllm/v1/attention/backends/xformers.py b/vllm/v1/attention/backends/xformers.py index d5a6c4c1db52..f739e6832274 100644 --- a/vllm/v1/attention/backends/xformers.py +++ b/vllm/v1/attention/backends/xformers.py @@ -90,7 +90,7 @@ def validate_head_size(cls, head_size: int) -> None: @staticmethod def get_name() -> str: - return "XFORMERS_VLLM_V1" + return "XFORMERS" @staticmethod def get_impl_cls() -> type["XFormersAttentionImpl"]: From 0fa673af4c2aeaf6d21a0163dc55218f5ee1daa6 Mon Sep 17 00:00:00 2001 From: Jee Jee Li Date: Fri, 26 Sep 2025 02:12:33 +0800 Subject: [PATCH 391/518] [V0 deprecation] Clean up LoRA (#25686) Signed-off-by: Jee Jee Li --- vllm/lora/punica_wrapper/punica_gpu.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/vllm/lora/punica_wrapper/punica_gpu.py b/vllm/lora/punica_wrapper/punica_gpu.py index 2db0e9fee142..467f50050eb2 100644 --- a/vllm/lora/punica_wrapper/punica_gpu.py +++ b/vllm/lora/punica_wrapper/punica_gpu.py @@ -11,7 +11,6 @@ import torch -import vllm.envs as envs from vllm.lora.layers import LoRAMapping from vllm.triton_utils import HAS_TRITON @@ -41,14 +40,8 @@ def __init__(self, max_num_batched_tokens: int, max_batches: int, max_num_batched_tokens, device=device) - # When cudagraph capture size is greater than max_num_seqs (max_batches, - # here), V0 captures the graph as if max_num_seqs is set to - # the capture size. - # V1 doesn't have this problem and always respects max_num_seqs. - max_num_prompts = (max_batches - if envs.VLLM_USE_V1 else max_num_batched_tokens) self.prompt_mapping_meta = LoRAKernelMeta.make(self.max_loras, - max_num_prompts, + max_batches, device=device) def update_metadata(self, mapping: LoRAMapping, From 6b0fcbbf4361cce7965c67ceede953e22062610c Mon Sep 17 00:00:00 2001 From: Cyrus Leung Date: Fri, 26 Sep 2025 02:23:01 +0800 Subject: [PATCH 392/518] [Misc] Simplify `test_argsort_mm_positions` (#25690) Signed-off-by: DarkLight1337 --- tests/multimodal/test_utils.py | 47 +++++++++++++++------------------- 1 file changed, 21 insertions(+), 26 deletions(-) diff --git a/tests/multimodal/test_utils.py b/tests/multimodal/test_utils.py index f36d94ca0155..f6a93bae2afc 100644 --- a/tests/multimodal/test_utils.py +++ b/tests/multimodal/test_utils.py @@ -5,7 +5,6 @@ import mimetypes import os from tempfile import NamedTemporaryFile, TemporaryDirectory -from typing import TYPE_CHECKING, NamedTuple import numpy as np import pytest @@ -15,9 +14,6 @@ from vllm.multimodal.inputs import PlaceholderRange from vllm.multimodal.utils import MediaConnector, argsort_mm_positions -if TYPE_CHECKING: - from vllm.multimodal.inputs import MultiModalPlaceholderDict - # Test different image extensions (JPG/PNG) and formats (gray/RGB/RGBA) TEST_IMAGE_ASSETS = [ "2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg", # "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" @@ -218,18 +214,13 @@ async def test_fetch_video_http_with_dynamic_loader( assert metadata_sync["video_backend"] == "opencv_dynamic" -# Used for `test_argsort_mm_positions`. -class TestCase(NamedTuple): - mm_positions: "MultiModalPlaceholderDict" - expected_modality_idxs: list[tuple[str, int]] - - -def test_argsort_mm_positions(): - - test_cases = [ +# yapf: disable +@pytest.mark.parametrize( + "case", + [ # Single modality ## Internally sorted - TestCase( + dict( mm_positions={ "image": [ PlaceholderRange(offset=0, length=2), @@ -242,7 +233,7 @@ def test_argsort_mm_positions(): ], ), ## Internally unsorted - TestCase( + dict( mm_positions={ "image": [ PlaceholderRange(offset=3, length=2), @@ -257,7 +248,7 @@ def test_argsort_mm_positions(): # Two modalities ## Internally sorted - TestCase( + dict( mm_positions={ "image": [ PlaceholderRange(offset=7, length=4), @@ -276,7 +267,7 @@ def test_argsort_mm_positions(): ], ), ## Interleaved, internally sorted - TestCase( + dict( mm_positions={ "image": [ PlaceholderRange(offset=0, length=4), @@ -295,7 +286,7 @@ def test_argsort_mm_positions(): ], ), ## Interleaved, internally unsorted - TestCase( + dict( mm_positions={ "image": [ PlaceholderRange(offset=8, length=2), @@ -316,7 +307,7 @@ def test_argsort_mm_positions(): # Three modalities ## Internally sorted - TestCase( + dict( mm_positions={ "image": [ PlaceholderRange(offset=15, length=7), @@ -341,7 +332,7 @@ def test_argsort_mm_positions(): ], ), ## Interleaved, internally sorted - TestCase( + dict( mm_positions={ "image": [ PlaceholderRange(offset=0, length=2), @@ -363,8 +354,8 @@ def test_argsort_mm_positions(): ("image", 2), ], ), - ## Interleaved, internally sunorted - TestCase( + ## Interleaved, internally unsorted + dict( mm_positions={ "image": [ PlaceholderRange(offset=0, length=2), @@ -386,9 +377,13 @@ def test_argsort_mm_positions(): ("image", 1), ], ), - ] + ], +) +# yapf: enable +def test_argsort_mm_positions(case): + mm_positions = case["mm_positions"] + expected_modality_idxs = case["expected_modality_idxs"] - for mm_positions, expected_modality_idxs in test_cases: - modality_idxs = argsort_mm_positions(mm_positions) + modality_idxs = argsort_mm_positions(mm_positions) - assert modality_idxs == expected_modality_idxs + assert modality_idxs == expected_modality_idxs From 3d54bdcb73572c8797aa6bc69bc31a6a8122b073 Mon Sep 17 00:00:00 2001 From: Cyrus Leung Date: Fri, 26 Sep 2025 05:06:49 +0800 Subject: [PATCH 393/518] [Optimization] Streamline `InputPreprocessor` (#25702) Signed-off-by: DarkLight1337 --- vllm/inputs/preprocess.py | 290 ++------------------------------------ 1 file changed, 12 insertions(+), 278 deletions(-) diff --git a/vllm/inputs/preprocess.py b/vllm/inputs/preprocess.py index cb3a5cdb840e..a24307b79d95 100644 --- a/vllm/inputs/preprocess.py +++ b/vllm/inputs/preprocess.py @@ -1,7 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -import asyncio from collections.abc import Mapping from typing import Any, Optional, Union, cast @@ -13,6 +12,7 @@ from vllm.multimodal.cache import BaseMultiModalProcessorCache from vllm.multimodal.inputs import (MultiModalDataDict, MultiModalEncDecInputs, MultiModalInputs, MultiModalUUIDDict) +from vllm.multimodal.processing import BaseMultiModalProcessor from vllm.transformers_utils.tokenizer import AnyTokenizer from .data import (DecoderOnlyInputs, EmbedsInputs, EmbedsPrompt, @@ -200,20 +200,6 @@ def _tokenize_prompt( return tokenizer.encode(prompt, **tokenization_kwargs) - async def _tokenize_prompt_async( - self, - prompt: str, - tokenization_kwargs: Optional[dict[str, Any]] = None, - ) -> list[int]: - """ - Async version of - [`_tokenize_prompt`][vllm.inputs.preprocess.InputPreprocessor._tokenize_prompt]. - """ - tokenizer = self.get_tokenizer() - tokenization_kwargs = self._get_tokenization_kw(tokenization_kwargs) - - return tokenizer.encode(prompt, **tokenization_kwargs) - def _get_mm_tokenizer(self) -> AnyTokenizer: # PrithviGeoSpatialMAE needs to be initialized without a tokenizer # while using also multi-modal input @@ -223,14 +209,17 @@ def _get_mm_tokenizer(self) -> AnyTokenizer: tokenizer = self.get_tokenizer() return tokenizer - async def _get_mm_tokenizer_async(self) -> AnyTokenizer: - # PrithviGeoSpatialMAE needs to be initialized without a tokenizer - # while using also multi-modal input - if not self.tokenizer: - return cast(AnyTokenizer, object()) # Dummy + def _get_mm_processor(self) -> BaseMultiModalProcessor: + if not hasattr(self, "_mm_processor"): + tokenizer = self._get_mm_tokenizer() - tokenizer = self.get_tokenizer() - return tokenizer + self._mm_processor = self.mm_registry.create_processor( + self.model_config, + tokenizer=tokenizer, + cache=self.mm_processor_cache, + ) + + return self._mm_processor def _process_multimodal( self, @@ -245,55 +234,7 @@ def _process_multimodal( Apply the model's multi-modal processor to a multi-modal prompt, returning the corresponding token IDs and metadata. """ - tokenizer = self._get_mm_tokenizer() - - mm_processor = self.mm_registry.create_processor( - self.model_config, - tokenizer=tokenizer, - cache=self.mm_processor_cache, - ) - - if mm_processor_kwargs is None: - mm_processor_kwargs = {} - - mm_input = mm_processor.apply( - prompt, - mm_data, - hf_processor_mm_kwargs=mm_processor_kwargs, - tokenization_kwargs=tokenization_kwargs, - mm_uuids=mm_uuids, - ) - mm_hashes = mm_input["mm_hashes"] - - # Validate that all mm items have a string as their hash - if not contains_only_strings(mm_hashes): - raise ValueError( - f"mm_hashes must contain only strings, got: {mm_hashes}. " - "This is likely due to an incorrect custom implementation of " - "MultiModalProcessor.apply method.") - - return mm_input - - async def _process_multimodal_async( - self, - prompt: Union[str, list[int]], - mm_data: MultiModalDataDict, - mm_processor_kwargs: Optional[Mapping[str, object]], - tokenization_kwargs: Optional[dict[str, Any]] = None, - *, - mm_uuids: Optional[MultiModalUUIDDict] = None, - ) -> MultiModalInputs: - """ - Async version of - [`_process_multimodal`][vllm.inputs.preprocess.InputPreprocessor._process_multimodal]. - """ - tokenizer = await self._get_mm_tokenizer_async() - - mm_processor = self.mm_registry.create_processor( - self.model_config, - tokenizer=tokenizer, - cache=self.mm_processor_cache, - ) + mm_processor = self._get_mm_processor() if mm_processor_kwargs is None: mm_processor_kwargs = {} @@ -340,12 +281,6 @@ def _process_embeds( return embeds_inputs(prompt_embeds=prompt_embeds, cache_salt=parsed_content.get("cache_salt")) - async def _process_embeds_async( - self, - parsed_content: EmbedsPrompt, - ) -> EmbedsInputs: - return self._process_embeds(parsed_content) - def _truncate_inputs( self, inputs: list[int], @@ -389,33 +324,6 @@ def _process_tokens( return inputs - async def _process_tokens_async( - self, - parsed_content: TokensPrompt, - tokenization_kwargs: Optional[dict[str, Any]] = None, - *, - mm_uuids: Optional[MultiModalUUIDDict] = None, - ) -> Union[TokenInputs, MultiModalInputs]: - prompt_token_ids = self._truncate_inputs( - parsed_content["prompt_token_ids"], tokenization_kwargs) - - inputs: Union[TokenInputs, MultiModalInputs] - if multi_modal_data := parsed_content.get("multi_modal_data"): - inputs = await self._process_multimodal_async( - prompt_token_ids, - multi_modal_data, - parsed_content.get("mm_processor_kwargs"), - tokenization_kwargs=tokenization_kwargs, - mm_uuids=mm_uuids, - ) - else: - inputs = token_inputs(prompt_token_ids=prompt_token_ids, ) - - if cache_salt := parsed_content.get("cache_salt"): - inputs["cache_salt"] = cache_salt - - return inputs - def _process_text( self, parsed_content: TextPrompt, @@ -449,39 +357,6 @@ def _process_text( return inputs - async def _process_text_async( - self, - parsed_content: TextPrompt, - tokenization_kwargs: Optional[dict[str, Any]] = None, - *, - mm_uuids: Optional[MultiModalUUIDDict] = None, - ) -> Union[TokenInputs, MultiModalInputs]: - prompt_text = parsed_content["prompt"] - - inputs: Union[TokenInputs, MultiModalInputs] - if multi_modal_data := parsed_content.get("multi_modal_data"): - inputs = await self._process_multimodal_async( - prompt_text, - multi_modal_data, - parsed_content.get("mm_processor_kwargs"), - tokenization_kwargs=tokenization_kwargs, - mm_uuids=mm_uuids, - ) - else: - prompt_token_ids = await self._tokenize_prompt_async( - prompt_text, - tokenization_kwargs=tokenization_kwargs, - ) - inputs = token_inputs( - prompt=prompt_text, - prompt_token_ids=prompt_token_ids, - ) - - if cache_salt := parsed_content.get("cache_salt"): - inputs["cache_salt"] = cache_salt - - return inputs - def _prompt_to_llm_inputs( self, prompt: SingletonPrompt, @@ -524,41 +399,6 @@ def _prompt_to_llm_inputs( assert_never(parsed) - async def _prompt_to_llm_inputs_async( - self, - prompt: SingletonPrompt, - tokenization_kwargs: Optional[dict[str, Any]] = None, - *, - mm_uuids: Optional[MultiModalUUIDDict] = None, - ) -> SingletonInputs: - """ - Async version of - [`_prompt_to_llm_inputs`][vllm.inputs.preprocess.InputPreprocessor._prompt_to_llm_inputs]. - """ - parsed = parse_singleton_prompt(prompt) - - if parsed["type"] == "embeds": - return await self._process_embeds_async(parsed["content"]) - if parsed["type"] == "tokens": - return await self._process_tokens_async( - parsed["content"], - mm_uuids=mm_uuids, - ) - if parsed["type"] == "text": - return await self._process_text_async( - parsed["content"], - tokenization_kwargs=tokenization_kwargs, - mm_uuids=mm_uuids, - ) - if parsed["type"] == "str": - return await self._process_text_async( - TextPrompt(prompt=parsed["content"]), - tokenization_kwargs=tokenization_kwargs, - mm_uuids=mm_uuids, - ) - - assert_never(parsed) - def _build_enc_dec_llm_inputs( self, encoder_inputs: SingletonInputs, @@ -735,62 +575,6 @@ def _process_encoder_decoder_prompt( return self._build_enc_dec_llm_inputs(encoder_inputs, decoder_inputs) - async def _process_encoder_decoder_prompt_async( - self, - prompt: PromptType, - tokenization_kwargs: Optional[dict[str, Any]] = None, - *, - mm_uuids: Optional[MultiModalUUIDDict] = None, - ) -> EncoderDecoderInputs: - """ - Async version of - [`_process_encoder_decoder_prompt`][vllm.inputs.preprocess.InputPreprocessor._process_encoder_decoder_prompt]. - """ - encoder_inputs: SingletonInputs - decoder_inputs: Optional[SingletonInputs] - - if is_explicit_encoder_decoder_prompt(prompt): - encoder_task = self._prompt_to_llm_inputs_async( - prompt["encoder_prompt"], - tokenization_kwargs=tokenization_kwargs, - mm_uuids=mm_uuids, - ) - - if (decoder_input := prompt["decoder_prompt"]) is None: - encoder_inputs = await encoder_task - decoder_inputs = None - else: - decoder_task = self._prompt_to_llm_inputs_async( - decoder_input, - tokenization_kwargs=tokenization_kwargs, - mm_uuids=mm_uuids, - ) - - encoder_inputs, decoder_inputs = await asyncio.gather( - encoder_task, decoder_task) - - # For multimodal model, override decoder prompt from processor - # with explicit decoder prompt. - if self.model_config.is_multimodal_model: - encoder_inputs, decoder_inputs = ( - self._split_enc_dec_mm_inputs(encoder_inputs, - decoder_inputs)) - else: - inputs = await self._prompt_to_llm_inputs_async( - prompt, - tokenization_kwargs=tokenization_kwargs, - mm_uuids=mm_uuids, - ) - if self.model_config.is_multimodal_model: - # Encoder-Decoder Multimodal model - encoder_inputs, decoder_inputs = ( - self._split_enc_dec_mm_inputs(inputs)) - else: - encoder_inputs = inputs - decoder_inputs = None - - return self._build_enc_dec_llm_inputs(encoder_inputs, decoder_inputs) - def _build_decoder_only_llm_inputs( self, prompt_inputs: DecoderOnlyInputs, @@ -830,25 +614,6 @@ def _process_decoder_only_prompt( return self._build_decoder_only_llm_inputs(prompt_comps) - async def _process_decoder_only_prompt_async( - self, - prompt: SingletonPrompt, - tokenization_kwargs: Optional[dict[str, Any]] = None, - *, - mm_uuids: Optional[MultiModalUUIDDict] = None, - ) -> DecoderOnlyInputs: - """ - Async version of - [`_process_decoder_only_prompt`][vllm.inputs.preprocess.InputPreprocessor._process_decoder_only_prompt]. - """ - prompt_comps = await self._prompt_to_llm_inputs_async( - prompt, - tokenization_kwargs=tokenization_kwargs, - mm_uuids=mm_uuids, - ) - - return self._build_decoder_only_llm_inputs(prompt_comps) - def preprocess( self, prompt: PromptType, @@ -877,37 +642,6 @@ def preprocess( mm_uuids=mm_uuids, ) - async def preprocess_async( - self, - prompt: PromptType, - tokenization_kwargs: Optional[dict[str, Any]] = None, - *, - mm_uuids: Optional[MultiModalUUIDDict] = None, - ) -> ProcessorInputs: - """ - Async version of - [`preprocess`][vllm.inputs.preprocess.InputPreprocessor.preprocess]. - """ - if self.model_config.is_encoder_decoder: - # Encoder-decoder model requires special mapping of - # input prompts to encoder & decoder. - return await self._process_encoder_decoder_prompt_async( - prompt, - tokenization_kwargs, - mm_uuids=mm_uuids, - ) - - if is_explicit_encoder_decoder_prompt(prompt): - raise ValueError("Cannot pass encoder-decoder prompt " - "to decoder-only models") - - # Decoder-only operation - return await self._process_decoder_only_prompt_async( - prompt, - tokenization_kwargs=tokenization_kwargs, - mm_uuids=mm_uuids, - ) - def clear_cache(self) -> None: if self.mm_processor_cache is not None: self.mm_processor_cache.clear_cache() From 89fa54e6f7a0789161ab73f604fdec5be6b57e58 Mon Sep 17 00:00:00 2001 From: Cyrus Leung Date: Fri, 26 Sep 2025 05:54:20 +0800 Subject: [PATCH 394/518] [Optimization] Use a cheaper cache key in `get_model_architecture` (#25682) Signed-off-by: DarkLight1337 --- vllm/model_executor/model_loader/utils.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/vllm/model_executor/model_loader/utils.py b/vllm/model_executor/model_loader/utils.py index e007d431880e..03202e13c280 100644 --- a/vllm/model_executor/model_loader/utils.py +++ b/vllm/model_executor/model_loader/utils.py @@ -165,7 +165,7 @@ def device_loading_context(module: torch.nn.Module, # New parameters or parameters already on target device are untouched -_MODEL_ARCH_BY_HASH = dict[str, tuple[type[nn.Module], str]]() +_MODEL_ARCH_BY_HASH = dict[int, tuple[type[nn.Module], str]]() """Caches the outputs of `_get_model_architecture`.""" @@ -215,7 +215,14 @@ def _get_model_architecture( def get_model_architecture( model_config: ModelConfig) -> tuple[type[nn.Module], str]: - key = model_config.compute_hash() + key = hash(( + model_config.model, + model_config.convert_type, + model_config.runner_type, + model_config.trust_remote_code, + model_config.model_impl, + tuple(getattr(model_config.hf_config, "architectures", [])), + )) if key in _MODEL_ARCH_BY_HASH: return _MODEL_ARCH_BY_HASH[key] From e71b8e210db4b98ffa4398d25f8bdf280686ad78 Mon Sep 17 00:00:00 2001 From: Ekagra Ranjan <3116519+ekagra-ranjan@users.noreply.github.com> Date: Thu, 25 Sep 2025 18:22:03 -0400 Subject: [PATCH 395/518] [Spec Decode] Add Batch Parallel Ngram. Upto 8x lower overhead. (#24986) Signed-off-by: Ekagra Ranjan <3116519+ekagra-ranjan@users.noreply.github.com> Co-authored-by: Nick Hill --- benchmarks/benchmark_ngram_proposer.py | 107 +++++++++++++- tests/v1/spec_decode/test_ngram.py | 142 ++++++++++++++---- vllm/v1/sample/rejection_sampler.py | 2 +- vllm/v1/spec_decode/ngram_proposer.py | 195 ++++++++++++++++++++----- vllm/v1/worker/gpu_model_runner.py | 42 +----- 5 files changed, 381 insertions(+), 107 deletions(-) diff --git a/benchmarks/benchmark_ngram_proposer.py b/benchmarks/benchmark_ngram_proposer.py index 11833fa1b3c8..d4b83edbd940 100644 --- a/benchmarks/benchmark_ngram_proposer.py +++ b/benchmarks/benchmark_ngram_proposer.py @@ -1,17 +1,31 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project import gc +import time +from unittest import mock import numpy as np from tabulate import tabulate from benchmark_utils import TimeCollector -from vllm.config import ModelConfig, SpeculativeConfig, VllmConfig +from vllm.config import ( + CacheConfig, + DeviceConfig, + LoadConfig, + ModelConfig, + ParallelConfig, + SchedulerConfig, + SpeculativeConfig, + VllmConfig, +) +from vllm.platforms import current_platform from vllm.utils import FlexibleArgumentParser from vllm.v1.spec_decode.ngram_proposer import NgramProposer +from vllm.v1.worker.gpu_input_batch import InputBatch +from vllm.v1.worker.gpu_model_runner import GPUModelRunner -def main(args): +def benchmark_propose(args): rows = [] for max_ngram in args.max_ngram: collector = TimeCollector(TimeCollector.US) @@ -69,10 +83,88 @@ def main(args): ) +def benchmark_batched_propose(args): + NUM_SPECULATIVE_TOKENS_NGRAM = 10 + PROMPT_LOOKUP_MIN = 5 + PROMPT_LOOKUP_MAX = 15 + MAX_MODEL_LEN = int(1e7) + DEVICE = current_platform.device_type + + model_config = ModelConfig(model="facebook/opt-125m", runner="generate") + + speculative_config = SpeculativeConfig( + target_model_config=model_config, + target_parallel_config=ParallelConfig(), + method="ngram", + num_speculative_tokens=NUM_SPECULATIVE_TOKENS_NGRAM, + prompt_lookup_max=PROMPT_LOOKUP_MAX, + prompt_lookup_min=PROMPT_LOOKUP_MIN, + ) + + vllm_config = VllmConfig( + model_config=model_config, + cache_config=CacheConfig(), + speculative_config=speculative_config, + device_config=DeviceConfig(device=current_platform.device_type), + parallel_config=ParallelConfig(), + load_config=LoadConfig(), + scheduler_config=SchedulerConfig(), + ) + + # monkey patch vllm.v1.worker.gpu_model_runner.get_pp_group + mock_pp_group = mock.MagicMock() + mock_pp_group.world_size = 1 + with mock.patch( + "vllm.v1.worker.gpu_model_runner.get_pp_group", return_value=mock_pp_group + ): + runner = GPUModelRunner(vllm_config, DEVICE) + + # hack max model len + runner.max_model_len = MAX_MODEL_LEN + runner.drafter.max_model_len = MAX_MODEL_LEN + + dummy_input_batch = InputBatch( + max_num_reqs=args.num_req, + max_model_len=MAX_MODEL_LEN, + max_num_batched_tokens=args.num_req * args.num_token, + device=DEVICE, + pin_memory=False, + vocab_size=256000, + block_sizes=[16], + ) + dummy_input_batch._req_ids = list(str(id) for id in range(args.num_req)) + dummy_input_batch.spec_decode_unsupported_reqs = () + dummy_input_batch.num_tokens_no_spec = [args.num_token] * args.num_req + dummy_input_batch.token_ids_cpu = np.random.randint( + 0, 20, (args.num_req, args.num_token) + ) + + runner.input_batch = dummy_input_batch + + sampled_token_ids = [[0]] * args.num_req + + print("Starting benchmark") + # first run is warmup so ignore it + for _ in range(args.num_iteration): + start = time.time() + runner.drafter.propose( + sampled_token_ids, + dummy_input_batch.req_ids, + dummy_input_batch.num_tokens_no_spec, + dummy_input_batch.token_ids_cpu, + dummy_input_batch.spec_decode_unsupported_reqs, + ) + end = time.time() + print(f"Iteration time (s): {end - start}") + + def invoke_main() -> None: parser = FlexibleArgumentParser( description="Benchmark the performance of N-gram speculative decode drafting" ) + parser.add_argument( + "--batched", action="store_true", help="consider time to prepare batch" + ) # noqa: E501 parser.add_argument( "--num-iteration", type=int, @@ -105,8 +197,17 @@ def invoke_main() -> None: help="Number of speculative tokens to generate", ) args = parser.parse_args() - main(args) + + if not args.batched: + benchmark_propose(args) + else: + benchmark_batched_propose(args) +""" +# Example command lines: +# time python3 benchmarks/benchmark_ngram_proposer.py +# time python3 benchmarks/benchmark_ngram_proposer.py --batched --num-iteration 4 --num-token 1000000 --num-req 128 +""" # noqa: E501 if __name__ == "__main__": invoke_main() # pragma: no cover diff --git a/tests/v1/spec_decode/test_ngram.py b/tests/v1/spec_decode/test_ngram.py index 4193f4041b32..344d19c60db7 100644 --- a/tests/v1/spec_decode/test_ngram.py +++ b/tests/v1/spec_decode/test_ngram.py @@ -9,11 +9,13 @@ def test_find_longest_matched_ngram_and_propose_tokens(): tokens = np.array([1, 2, 3, 4, 1, 2, 3, 5, 6]) - assert _find_longest_matched_ngram_and_propose_tokens(origin_tokens=tokens, - min_ngram=2, - max_ngram=2, - max_model_len=1024, - k=2) is None + result = _find_longest_matched_ngram_and_propose_tokens( + origin_tokens=tokens, + min_ngram=2, + max_ngram=2, + max_model_len=1024, + k=2) + assert len(result) == 0 tokens = np.array([1, 2, 3, 4, 1, 2, 3]) np.testing.assert_array_equal( @@ -62,7 +64,7 @@ def test_find_longest_matched_ngram_and_propose_tokens(): def test_ngram_proposer(): - def ngram_proposer(min_n: int, max_n: int, k: int) -> NgramProposer: + def get_ngram_proposer(min_n: int, max_n: int, k: int) -> NgramProposer: # Dummy model config. Just to set max_model_len. model_config = ModelConfig(model="facebook/opt-125m") return NgramProposer( @@ -75,36 +77,120 @@ def ngram_proposer(min_n: int, max_n: int, k: int) -> NgramProposer: ))) # No match. - result = ngram_proposer( - min_n=2, max_n=2, - k=2).propose(context_token_ids=np.array([1, 2, 3, 4, 5])) - assert result is None + token_ids_cpu = np.array([[1, 2, 3, 4, 5]]) + result = get_ngram_proposer(min_n=2, max_n=2, k=2).propose( + sampled_token_ids=[[0]], + req_ids=["0"], + num_tokens_no_spec=np.array([len(c) for c in token_ids_cpu]), + token_ids_cpu=token_ids_cpu, + spec_decode_unsupported_reqs=(), + ) + assert len(result[0]) == 0 # No match for 4-gram. - result = ngram_proposer( - min_n=4, max_n=4, - k=2).propose(context_token_ids=np.array([1, 2, 3, 4, 1, 2, 3])) - assert result is None + token_ids_cpu = np.array([[1, 2, 3, 4, 1, 2, 3]]) + result = get_ngram_proposer(min_n=4, max_n=4, k=2).propose( + sampled_token_ids=[[0]], + req_ids=["0"], + num_tokens_no_spec=np.array([len(c) for c in token_ids_cpu]), + token_ids_cpu=token_ids_cpu, + spec_decode_unsupported_reqs=(), + ) + assert len(result[0]) == 0 # No match for 4-gram but match for 3-gram. - result = ngram_proposer( - min_n=3, max_n=4, - k=2).propose(context_token_ids=np.array([1, 2, 3, 4, 1, 2, 3])) - assert np.array_equal(result, np.array([4, 1])) + token_ids_cpu = np.array([[1, 2, 3, 4, 1, 2, 3]]) + result = get_ngram_proposer(min_n=3, max_n=4, k=2).propose( + sampled_token_ids=[[0]], + req_ids=["0"], + num_tokens_no_spec=np.array([len(c) for c in token_ids_cpu]), + token_ids_cpu=token_ids_cpu, + spec_decode_unsupported_reqs=(), + ) + assert np.array_equal(result, np.array([[4, 1]])) # Match for both 4-gram and 3-gram. # In this case, the proposer should return the 4-gram match. - result = ngram_proposer(min_n=3, max_n=4, k=2).propose( - context_token_ids=np.array([2, 3, 4, 5, 1, 2, 3, 4, 1, 2, 3, 4])) - assert np.array_equal(result, np.array([1, 2])) # Not [5, 1] + token_ids_cpu = np.array([[2, 3, 4, 5, 1, 2, 3, 4, 1, 2, 3, 4]]) + result = get_ngram_proposer(min_n=3, max_n=4, k=2).propose( + sampled_token_ids=[[0]], + req_ids=["0"], + num_tokens_no_spec=np.array([len(c) for c in token_ids_cpu]), + token_ids_cpu=token_ids_cpu, + spec_decode_unsupported_reqs=(), + ) + assert np.array_equal(result, np.array([[1, 2]])) # Not [5, 1]] # Match for 2-gram and 3-gram, but not 4-gram. - result = ngram_proposer(min_n=2, max_n=4, k=2).propose( - context_token_ids=np.array([3, 4, 5, 2, 3, 4, 1, 2, 3, 4])) - assert np.array_equal(result, np.array([1, 2])) # Not [5, 2] + token_ids_cpu = np.array([[3, 4, 5, 2, 3, 4, 1, 2, 3, 4]]) + result = get_ngram_proposer(min_n=2, max_n=4, k=2).propose( + sampled_token_ids=[[0]], + req_ids=["0"], + num_tokens_no_spec=np.array([len(c) for c in token_ids_cpu]), + token_ids_cpu=token_ids_cpu, + spec_decode_unsupported_reqs=(), + ) + assert np.array_equal(result, np.array([[1, 2]])) # Not [5, 2]] # Multiple 3-gram matched, but always pick the first one. - result = ngram_proposer( - min_n=3, max_n=3, k=2).propose(context_token_ids=np.array( - [1, 2, 3, 100, 1, 2, 3, 200, 1, 2, 3, 300, 1, 2, 3])) - assert np.array_equal(result, np.array([100, 1])) + token_ids_cpu = np.array( + [[1, 2, 3, 100, 1, 2, 3, 200, 1, 2, 3, 300, 1, 2, 3]]) + result = get_ngram_proposer(min_n=3, max_n=3, k=2).propose( + sampled_token_ids=[[0]], + req_ids=["0"], + num_tokens_no_spec=np.array([len(c) for c in token_ids_cpu]), + token_ids_cpu=token_ids_cpu, + spec_decode_unsupported_reqs=(), + ) + assert np.array_equal(result, np.array([[100, 1]])) + + # check empty input + token_ids_cpu = np.array([[]]) + result = get_ngram_proposer(min_n=2, max_n=2, k=2).propose( + sampled_token_ids=[[0]], + req_ids=["0"], + num_tokens_no_spec=np.array([len(c) for c in token_ids_cpu]), + token_ids_cpu=token_ids_cpu, + spec_decode_unsupported_reqs=(), + ) + assert len(result[0]) == 0 + + # check multibatch input + # first request has 5 tokens and a match + # second request has 3 tokens and no match. Padded with -1 for max len 5 + token_ids_cpu = np.array([[1, 2, 3, 1, 2], [4, 5, 6, -1, -1]]) + result = get_ngram_proposer(min_n=2, max_n=2, k=2).propose( + sampled_token_ids=[[0], [1]], + req_ids=["0", "1"], + num_tokens_no_spec=np.array([5, 3]), + token_ids_cpu=token_ids_cpu, + spec_decode_unsupported_reqs=(), + ) + assert len(result[0]) == 2 + assert np.array_equal(result[0], np.array([3, 1])) + assert np.array_equal(result[1], np.array([])) + + # test if 0 threads available: can happen if TP size > CPU count + ngram_proposer = get_ngram_proposer(min_n=2, max_n=2, k=2) + ngram_proposer.num_numba_thread_available = 0 + # set max_model_len to 2 * threshold to ensure multithread is used + num_tokens_threshold = ngram_proposer.num_tokens_threshold + ngram_proposer.max_model_len = 2 * num_tokens_threshold + # using multibatch test + middle_integer = num_tokens_threshold // 2 + input_1 = [_ for _ in range(num_tokens_threshold)] + input_1 += [middle_integer, middle_integer + 1] + input_2 = [-1] * len(input_1) + input_2[:3] = [4, 5, 6] + token_ids_cpu = np.array([input_1, input_2]) + result = ngram_proposer.propose( + sampled_token_ids=[[0], [1]], + req_ids=["0", "1"], + num_tokens_no_spec=np.array([len(input_1), 3]), + token_ids_cpu=token_ids_cpu, + spec_decode_unsupported_reqs=(), + ) + assert len(result[0]) == 2 + assert np.array_equal(result[0], + np.array([middle_integer + 2, middle_integer + 3])) + assert np.array_equal(result[1], np.array([])) diff --git a/vllm/v1/sample/rejection_sampler.py b/vllm/v1/sample/rejection_sampler.py index ced5c7a97038..8f0b38ecb34d 100644 --- a/vllm/v1/sample/rejection_sampler.py +++ b/vllm/v1/sample/rejection_sampler.py @@ -17,7 +17,7 @@ GREEDY_TEMPERATURE: tl.constexpr = -1 # Maximum number of speculative draft tokens allowed per request in a single # step. This value is chosen to be large enough to handle typical use cases. -MAX_SPEC_LEN = 32 +MAX_SPEC_LEN = 128 class RejectionSampler(nn.Module): diff --git a/vllm/v1/spec_decode/ngram_proposer.py b/vllm/v1/spec_decode/ngram_proposer.py index b92e396d4536..fd8e0a6fd1d2 100644 --- a/vllm/v1/spec_decode/ngram_proposer.py +++ b/vllm/v1/spec_decode/ngram_proposer.py @@ -1,9 +1,9 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -from typing import Optional +import os import numpy as np -from numba import jit +from numba import get_num_threads, jit, njit, prange, set_num_threads from vllm.config import VllmConfig @@ -26,55 +26,174 @@ def __init__(self, vllm_config: VllmConfig): # Maximum length of the model. self.max_model_len = vllm_config.model_config.max_model_len + # Pre-allocate buffers for numba batch propose. + max_num_seqs = vllm_config.scheduler_config.max_num_seqs + self.valid_ngram_draft = np.zeros((max_num_seqs, self.k), + dtype=np.int32) + self.valid_ngram_num_drafts = np.zeros((max_num_seqs), dtype=np.int32) + + # Threshold of total number of tokens in the batch to enable + # multi-threading in numba batch propose. + self.num_tokens_threshold = 8192 + tp_size = vllm_config.parallel_config.tensor_parallel_size + cpu_count = os.cpu_count() + # Max number of threads for numba parallel processing. + if cpu_count: + # Divide by 2 to use physical cores + # and not logical cores (hyper-threading). + # Cap the number of threads to 8 to avoid using too many threads + # since other components like frontend (incl tokenization) + # and Structured Outputs also use multiple threads. + # TODO(ekagra-ranjan): bump up the cap from 1 to 8 + # when TP parallelization for ngram is implemented. + self.num_numba_thread_available = min(1, (cpu_count // 2)) + # Divide by tp_size to ensure each tensor parallel rank + # has some threads since all ranks will run this. + self.num_numba_thread_available //= tp_size + else: + self.num_numba_thread_available = 1 + # Trigger Numba JIT compilation for N-gram proposer. # This usually takes less than 1 second. - self.propose(np.zeros(1024, dtype=np.int32)) + self.propose([[]] * 1024, [""] * 1024, np.zeros(1024, dtype=np.int32), + np.zeros((1024, self.max_model_len), dtype=np.int32), + set()) - def propose( + def batch_propose( self, - context_token_ids: np.ndarray, - ) -> Optional[np.ndarray]: - """Proposes the next sequence of tokens based on n-gram pattern - matching in the context. The function finds matches of the last n - tokens in the previous context, and returns k tokens that followed - that match. + num_requests: int, + valid_ngram_requests: list, + num_tokens_no_spec: np.ndarray, + token_ids_cpu: np.ndarray, + ) -> list[list[int]]: + """Batch version of ngram proposer using numba for acceleration. Args: - context_token_ids: Numpy array of token IDs representing the - context sequence. + valid_ngram_requests: + Set of indices of requests that need ngram proposals. + num_tokens_no_spec: + Numpy array of shape (batch_size,) representing the number + of tokens without speculative tokens for each request. + token_ids_cpu: + Numpy array of shape (batch_size, max_model_len) + representing the token IDs for each request. Returns: - np.ndarray: The sequence of tokens that followed - the matched n-gram in the context. - None: If no matching n-gram pattern is found. - - Example: - If context_token_ids = [1,2,3,4,2,3], min_n = 2, max_n = 3, and - k = 4: - - The last 3 (= max_n) tokens [4,2,3] cannot find a match. - - The last 2 tokens [2,3] will be matched against the previous - 4 tokens [1,2,3,4]. - - Finding a match of [2,3] would return the tokens that - followed that pattern. Here we will return [4,2,3] because - we only have three tokens after the match. + list[list[int]]: + A list where each element is a list of proposed + token IDs for the corresponding request. """ - # TODO(woosuk): Optimize this. - return _find_longest_matched_ngram_and_propose_tokens( - origin_tokens=context_token_ids, - min_ngram=self.min_n, - max_ngram=self.max_n, - max_model_len=self.max_model_len, - k=self.k) + draft_token_ids: list[list[int]] = [] + + # Only run batch propose if there are requests needing ngram proposals. + # avoid calling numba function with empty list which causes error + # ValueError: cannot compute fingerprint of empty list + if num_ngram_requests := len(valid_ngram_requests): + original_num_numba_threads = get_num_threads() + # Ensure we use at least one thread. + # If total tokens is small, using multiple threads + # may slow down due to overhead. + total_tokens = np.sum(num_tokens_no_spec) + if total_tokens >= self.num_tokens_threshold: + final_num_threads = max( + 1, min(self.num_numba_thread_available, + num_ngram_requests)) + set_num_threads(final_num_threads) + else: + set_num_threads(1) + + batch_propose_numba(valid_ngram_requests, num_tokens_no_spec, + token_ids_cpu, self.min_n, self.max_n, + self.max_model_len, self.k, + self.valid_ngram_draft, + self.valid_ngram_num_drafts) + + # Restore original number of threads. + set_num_threads(original_num_numba_threads) + + for i in range(num_requests): + if i in valid_ngram_requests and \ + self.valid_ngram_num_drafts[i] > 0: + draft_token_ids.append(self.valid_ngram_draft[ + i, :self.valid_ngram_num_drafts[i]].tolist()) + else: + draft_token_ids.append([]) + + return draft_token_ids + + def propose( + self, + sampled_token_ids: list[list[int]], + req_ids: list[str], + num_tokens_no_spec: np.ndarray, + token_ids_cpu: np.ndarray, + spec_decode_unsupported_reqs: set, + ) -> list[list[int]]: + + # find which requests need ngram proposals + valid_ngram_requests = [] + for i, sampled_ids in enumerate(sampled_token_ids): + num_sampled_ids = len(sampled_ids) + if not num_sampled_ids: + # Skip speculative decoding. + continue + + # Skip requests that require sampling parameters that are not + # supported with speculative decoding. + req_id = req_ids[i] + if req_id in spec_decode_unsupported_reqs: + continue + + num_tokens = num_tokens_no_spec[i] + if num_tokens >= self.max_model_len: + # Skip requests that have already reached the max model length. + continue + + valid_ngram_requests.append(i) + + draft_token_ids = self.batch_propose( + len(sampled_token_ids), + valid_ngram_requests, + num_tokens_no_spec, + token_ids_cpu, + ) + + return draft_token_ids def load_model(self, *args, **kwargs): # No model to load. pass +@njit(parallel=True) +def batch_propose_numba(valid_ngram_requests: list, + num_tokens_no_spec: np.ndarray, + token_ids_cpu: np.ndarray, min_n: int, max_n: int, + max_model_len: int, k: int, + valid_ngram_draft: np.ndarray, + valid_ngram_num_drafts: np.ndarray): + for i in prange(len(valid_ngram_requests)): + idx = valid_ngram_requests[i] + num_tokens = num_tokens_no_spec[idx] + context_token_ids = token_ids_cpu[idx, :num_tokens] + drafter_output = _find_longest_matched_ngram_and_propose_tokens( + origin_tokens=context_token_ids, + min_ngram=min_n, + max_ngram=max_n, + max_model_len=max_model_len, + k=k) + + valid_ngram_num_drafts[i] = drafter_output.shape[0] + if len(drafter_output): + valid_ngram_draft[i, :drafter_output.shape[0]] = drafter_output + + @jit(nopython=True) -def _find_longest_matched_ngram_and_propose_tokens( - origin_tokens: np.ndarray, min_ngram: int, max_ngram: int, - max_model_len: int, k: int) -> Optional[np.ndarray]: +def _find_longest_matched_ngram_and_propose_tokens(origin_tokens: np.ndarray, + min_ngram: int, + max_ngram: int, + max_model_len: int, + k: int) -> np.ndarray: """ Find the longest n-gram which matches the suffix of the given tokens whose length is within [min_ngram, max_ngram] (inclusive). @@ -84,12 +203,12 @@ def _find_longest_matched_ngram_and_propose_tokens( # Do not generate draft tokens is context is shorter than minimum n-gram total_token = origin_tokens.shape[0] if total_token < min_ngram: - return None + return np.empty((0, ), dtype=origin_tokens.dtype) # Do not generate draft tokens beyond the max model length. k = min(k, max_model_len - total_token) if k <= 0: - return None + return np.empty((0, ), dtype=origin_tokens.dtype) # Flip tokens, and the goal become to find longest ngram # on the rightmost position which matches the prefix with @@ -146,7 +265,7 @@ def _find_longest_matched_ngram_and_propose_tokens( if longest_ngram < min_ngram: # No valid ngram is found - return None + return np.empty((0, ), dtype=origin_tokens.dtype) # Flip the position back, so in origin_tokens, # origin_tokens[total_token-1-position:total_token-1-position+longest_ngram] diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index f6f697d74a20..b7a066654d70 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -2404,8 +2404,11 @@ def propose_draft_token_ids( if self.speculative_config.method == "ngram": assert isinstance(sampled_token_ids, list) assert isinstance(self.drafter, NgramProposer) - draft_token_ids = self.propose_ngram_draft_token_ids( - sampled_token_ids) + draft_token_ids = self.drafter.propose( + sampled_token_ids, self.input_batch.req_ids, + self.input_batch.num_tokens_no_spec, + self.input_batch.token_ids_cpu, + self.input_batch.spec_decode_unsupported_reqs) elif self.speculative_config.method == "medusa": assert isinstance(sampled_token_ids, list) assert isinstance(self.drafter, MedusaProposer) @@ -2515,41 +2518,6 @@ def propose_draft_token_ids( ) return draft_token_ids - def propose_ngram_draft_token_ids( - self, - sampled_token_ids: list[list[int]], - ) -> list[list[int]]: - # TODO(woosuk): Optimize. - req_ids = self.input_batch.req_ids - draft_token_ids: list[list[int]] = [] - for i, sampled_ids in enumerate(sampled_token_ids): - num_sampled_ids = len(sampled_ids) - if not num_sampled_ids: - # Skip speculative decoding. - draft_token_ids.append([]) - continue - - # Skip requests that require sampling parameters that are not - # supported with speculative decoding. - req_id = req_ids[i] - if req_id in self.input_batch.spec_decode_unsupported_reqs: - draft_token_ids.append([]) - continue - - num_tokens = self.input_batch.num_tokens_no_spec[i] - if num_tokens >= self.max_model_len: - # Skip requests that have already reached the max model length. - draft_token_ids.append([]) - continue - - drafter_output = self.drafter.propose( - self.input_batch.token_ids_cpu[i, :num_tokens]) - if drafter_output is None or len(drafter_output) == 0: - draft_token_ids.append([]) - else: - draft_token_ids.append(drafter_output.tolist()) - return draft_token_ids - def update_config(self, overrides: dict[str, Any]) -> None: allowed_config_names = {"load_config", "model_config"} for config_name, config_overrides in overrides.items(): From 8c435c9bcea818fa5a748fec0f8289505447001e Mon Sep 17 00:00:00 2001 From: Zhuohan Li Date: Thu, 25 Sep 2025 15:31:17 -0700 Subject: [PATCH 396/518] [Core] Enable command line logging for LLMEngine (#25610) Co-authored-by: Ye (Charlotte) Qi Signed-off-by: Zhuohan Li --- vllm/v1/engine/llm_engine.py | 39 ++++++++++++++++++++++++++++-------- vllm/v1/metrics/loggers.py | 1 - 2 files changed, 31 insertions(+), 9 deletions(-) diff --git a/vllm/v1/engine/llm_engine.py b/vllm/v1/engine/llm_engine.py index 92c861d9e91f..2738776e3d37 100644 --- a/vllm/v1/engine/llm_engine.py +++ b/vllm/v1/engine/llm_engine.py @@ -1,6 +1,7 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project +import time from collections.abc import Mapping from copy import copy from typing import Any, Callable, Optional, Union @@ -31,8 +32,7 @@ from vllm.v1.engine.parallel_sampling import ParentRequest from vllm.v1.engine.processor import Processor from vllm.v1.executor.abstract import Executor -from vllm.v1.metrics.loggers import (PrometheusStatLogger, StatLoggerBase, - StatLoggerFactory) +from vllm.v1.metrics.loggers import StatLoggerFactory, StatLoggerManager from vllm.v1.metrics.reader import Metric, get_metrics_snapshot from vllm.v1.metrics.stats import IterationStats from vllm.v1.worker.worker_base import WorkerBase @@ -74,9 +74,6 @@ def __init__( self.cache_config = vllm_config.cache_config self.log_stats = log_stats - self.stat_logger: Optional[StatLoggerBase] = None - if self.log_stats: - self.stat_logger = PrometheusStatLogger(vllm_config) executor_backend = ( self.vllm_config.parallel_config.distributed_executor_backend) @@ -122,6 +119,15 @@ def __init__( log_stats=self.log_stats, ) + self.logger_manager: Optional[StatLoggerManager] = None + if self.log_stats: + self.logger_manager = StatLoggerManager( + vllm_config=vllm_config, + custom_stat_loggers=stat_loggers, + enable_default_loggers=log_stats, + ) + self.logger_manager.log_engine_initialized() + if not multiprocess_mode: # for v0 compatibility self.model_executor = self.engine_core.engine_core.model_executor # type: ignore @@ -269,10 +275,13 @@ def step(self) -> Union[list[RequestOutput], list[PoolingRequestOutput]]: self.engine_core.abort_requests(processed_outputs.reqs_to_abort) # 4) Record stats - if self.stat_logger is not None: + if self.logger_manager is not None: assert outputs.scheduler_stats is not None - self.stat_logger.record(scheduler_stats=outputs.scheduler_stats, - iteration_stats=iteration_stats) + self.logger_manager.record( + scheduler_stats=outputs.scheduler_stats, + iteration_stats=iteration_stats, + ) + self.do_log_stats_with_interval() return processed_outputs.request_outputs @@ -315,6 +324,20 @@ def get_tokenizer(self) -> AnyTokenizer: return self.tokenizer + def do_log_stats(self) -> None: + """Log stats if logging is enabled.""" + if self.logger_manager: + self.logger_manager.log() + + def do_log_stats_with_interval(self) -> None: + """Log stats when the time interval has passed.""" + now = time.time() + if not hasattr(self, "_last_log_time"): + self._last_log_time = now + if now - self._last_log_time >= envs.VLLM_LOG_STATS_INTERVAL: + self.do_log_stats() + self._last_log_time = now + def add_lora(self, lora_request: LoRARequest) -> bool: """Load a new LoRA adapter into the engine for future requests.""" return self.engine_core.add_lora(lora_request) diff --git a/vllm/v1/metrics/loggers.py b/vllm/v1/metrics/loggers.py index 52264e41e7a1..d68d111c67ca 100644 --- a/vllm/v1/metrics/loggers.py +++ b/vllm/v1/metrics/loggers.py @@ -90,7 +90,6 @@ def record(self, iteration_stats: Optional[IterationStats], engine_idx: int = 0): """Log Stats to standard output.""" - if iteration_stats: self._track_iteration_stats(iteration_stats) From 57329a8c013ca9e5d575faad3f04436f2eabad15 Mon Sep 17 00:00:00 2001 From: tomeras91 <57313761+tomeras91@users.noreply.github.com> Date: Fri, 26 Sep 2025 02:10:29 +0300 Subject: [PATCH 397/518] [Model] rename NemotronH_Nano_VL -> NemotronH_Nano_VL_V2 (#25708) Signed-off-by: Tomer Asida <57313761+tomeras91@users.noreply.github.com> --- tests/models/registry.py | 2 +- vllm/model_executor/models/nano_nemotron_vl.py | 8 ++++---- vllm/model_executor/models/registry.py | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/models/registry.py b/tests/models/registry.py index 8dbada0d03a0..6a6e2538559f 100644 --- a/tests/models/registry.py +++ b/tests/models/registry.py @@ -531,7 +531,7 @@ def check_available_online( trust_remote_code=True), "Llama_Nemotron_Nano_VL" : _HfExamplesInfo("nvidia/Llama-3.1-Nemotron-Nano-VL-8B-V1", # noqa: E501 trust_remote_code=True), - "NemotronH_Nano_VL": _HfExamplesInfo("nano_vl_dummy", + "NemotronH_Nano_VL_V2": _HfExamplesInfo("nano_vl_dummy", is_available_online=False, trust_remote_code=True), "Ovis": _HfExamplesInfo("AIDC-AI/Ovis2-1B", trust_remote_code=True, diff --git a/vllm/model_executor/models/nano_nemotron_vl.py b/vllm/model_executor/models/nano_nemotron_vl.py index ae50f1aefc6f..2b68d40cf2c6 100644 --- a/vllm/model_executor/models/nano_nemotron_vl.py +++ b/vllm/model_executor/models/nano_nemotron_vl.py @@ -869,8 +869,8 @@ def get_dummy_mm_data( info=NanoNemotronVLProcessingInfo, dummy_inputs=NanoNemotronVLDummyInputsBuilder, ) -class NemotronH_Nano_VL(nn.Module, HasInnerState, IsHybrid, - SupportsMultiModal): +class NemotronH_Nano_VL_V2(nn.Module, HasInnerState, IsHybrid, + SupportsMultiModal): @classmethod def get_placeholder_str(cls, modality: str, i: int) -> Optional[str]: @@ -1249,7 +1249,7 @@ def print_architecture(self, try: print("=" * 100) - print("NemotronH_Nano_VL Model Architecture") + print("NemotronH_Nano_VL_V2 Model Architecture") print("=" * 100) total_params = 0 @@ -1333,7 +1333,7 @@ def get_model_info(self): component_info[component]["size"] += param.numel() return { - "model_name": "NemotronH_Nano_VL", + "model_name": "NemotronH_Nano_VL_V2", "total_parameters": total_params, "memory_estimate_mb": total_params * 2 / (1024**2), # bfloat16 "components": component_info, diff --git a/vllm/model_executor/models/registry.py b/vllm/model_executor/models/registry.py index 432060acfee6..10e9aa4db078 100644 --- a/vllm/model_executor/models/registry.py +++ b/vllm/model_executor/models/registry.py @@ -231,7 +231,7 @@ "GraniteSpeechForConditionalGeneration": ("granite_speech", "GraniteSpeechForConditionalGeneration"), # noqa: E501 "H2OVLChatModel": ("h2ovl", "H2OVLChatModel"), "InternVLChatModel": ("internvl", "InternVLChatModel"), - "NemotronH_Nano_VL": ("nano_nemotron_vl", "NemotronH_Nano_VL"), + "NemotronH_Nano_VL_V2": ("nano_nemotron_vl", "NemotronH_Nano_VL_V2"), "InternS1ForConditionalGeneration": ("interns1", "InternS1ForConditionalGeneration"), # noqa: E501 "InternVLForConditionalGeneration": ("interns1", "InternS1ForConditionalGeneration"), # noqa: E501 "Idefics3ForConditionalGeneration":("idefics3","Idefics3ForConditionalGeneration"), From 081b5594a2b1a37ea793659bb6767c497beef45d Mon Sep 17 00:00:00 2001 From: Shu Wang Date: Thu, 25 Sep 2025 18:35:14 -0500 Subject: [PATCH 398/518] Fix routing_bias dtype (#25711) Signed-off-by: Shu Wang. --- vllm/model_executor/layers/quantization/modelopt.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/vllm/model_executor/layers/quantization/modelopt.py b/vllm/model_executor/layers/quantization/modelopt.py index 4491fcf18106..0be43da00b53 100644 --- a/vllm/model_executor/layers/quantization/modelopt.py +++ b/vllm/model_executor/layers/quantization/modelopt.py @@ -1454,10 +1454,13 @@ def apply( routing_method_type = flashinfer.RoutingMethodType.DeepSeekV3 if use_llama4_routing: routing_method_type = flashinfer.RoutingMethodType.Llama4 + routing_bias = e_score_correction_bias + if routing_bias is not None: + routing_bias = routing_bias.to(torch.bfloat16) out = flashinfer.fused_moe.trtllm_fp4_block_scale_moe( routing_logits=router_logits if use_llama4_routing else router_logits.to(torch.float32), - routing_bias=e_score_correction_bias, + routing_bias=routing_bias, hidden_states=hidden_states_fp4, hidden_states_scale=hidden_states_scale_linear_fp4.view( torch.float8_e4m3fn).flatten(), From 9fe4c2bdb9859c14ad7f7479e1db7e01083bada3 Mon Sep 17 00:00:00 2001 From: Wentao Ye <44945378+yewentao256@users.noreply.github.com> Date: Thu, 25 Sep 2025 20:13:41 -0400 Subject: [PATCH 399/518] [Refactor] Remove DeepGEMM OP Register (#25710) Signed-off-by: yewentao256 --- .../layers/quantization/deepgemm.py | 78 ------------------- .../layers/quantization/utils/fp8_utils.py | 17 ++-- 2 files changed, 5 insertions(+), 90 deletions(-) delete mode 100644 vllm/model_executor/layers/quantization/deepgemm.py diff --git a/vllm/model_executor/layers/quantization/deepgemm.py b/vllm/model_executor/layers/quantization/deepgemm.py deleted file mode 100644 index 2236824ce910..000000000000 --- a/vllm/model_executor/layers/quantization/deepgemm.py +++ /dev/null @@ -1,78 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project -import logging - -import torch - -from vllm.triton_utils import triton -from vllm.utils import direct_register_custom_op -from vllm.utils.deep_gemm import fp8_gemm_nt - -logger = logging.getLogger(__name__) - - -def prepare_block_fp8_matmul_inputs( - A: torch.Tensor, - B: torch.Tensor, - As: torch.Tensor, - Bs: torch.Tensor, - block_size: list[int], - output_dtype: torch.dtype = torch.float16, -) -> tuple[int, int, int, torch.Tensor]: - assert len(block_size) == 2 - block_n, block_k = block_size[0], block_size[1] - - assert A.shape[-1] == B.shape[-1] - assert A.shape[:-1] == As.shape[:-1] - assert A.is_contiguous() - assert triton.cdiv(A.shape[-1], block_k) == As.shape[-1] - - M = A.numel() // A.shape[-1] - - assert B.ndim == 2 - assert B.is_contiguous() - assert Bs.ndim == 2 - N, K = B.shape - assert triton.cdiv(N, block_n) == Bs.shape[0] - assert triton.cdiv(K, block_k) == Bs.shape[1] - - C_shape = A.shape[:-1] + (N, ) - C = A.new_empty(C_shape, dtype=output_dtype) - - return M, N, K, C - - -def w8a8_block_fp8_matmul_deepgemm( - A: torch.Tensor, - B: torch.Tensor, - As: torch.Tensor, - Bs: torch.Tensor, - block_size: list[int], - output_dtype: torch.dtype, -) -> torch.Tensor: - M, N, K, C = prepare_block_fp8_matmul_inputs(A, B, As, Bs, block_size, - output_dtype) - # Deepgemm only supports output tensor type as bfloat16 - assert C.dtype == torch.bfloat16 - fp8_gemm_nt((A, As), (B, Bs), C) - return C - - -def w8a8_block_fp8_matmul_deepgemm_fake( - A: torch.Tensor, - B: torch.Tensor, - As: torch.Tensor, - Bs: torch.Tensor, - block_size: list[int], - output_dtype: torch.dtype, -) -> torch.Tensor: - M, N, K, C = prepare_block_fp8_matmul_inputs(A, B, As, Bs, block_size, - output_dtype) - return C - - -direct_register_custom_op( - op_name="w8a8_block_fp8_matmul_deepgemm", - op_func=w8a8_block_fp8_matmul_deepgemm, - fake_impl=w8a8_block_fp8_matmul_deepgemm_fake, -) diff --git a/vllm/model_executor/layers/quantization/utils/fp8_utils.py b/vllm/model_executor/layers/quantization/utils/fp8_utils.py index b32c67dec7ff..b2548e66827d 100644 --- a/vllm/model_executor/layers/quantization/utils/fp8_utils.py +++ b/vllm/model_executor/layers/quantization/utils/fp8_utils.py @@ -23,7 +23,7 @@ from vllm.platforms import current_platform from vllm.triton_utils import tl, triton from vllm.utils import direct_register_custom_op -from vllm.utils.deep_gemm import (is_deep_gemm_e8m0_used, +from vllm.utils.deep_gemm import (fp8_gemm_nt, is_deep_gemm_e8m0_used, should_use_deepgemm_for_fp8_linear) logger = init_logger(__name__) @@ -141,17 +141,10 @@ def apply_w8a8_block_fp8_linear( block_size[1], column_major_scales=True, ) - - # ensure DeepGEMM-backed custom op is registered before use - import vllm.model_executor.layers.quantization.deepgemm # noqa: F401 - - output = torch.ops.vllm.w8a8_block_fp8_matmul_deepgemm( - q_input, - weight, - x_scale, - weight_scale, - block_size, - output_dtype=output_dtype) + output = torch.empty((q_input.shape[0], weight.shape[0]), + dtype=torch.bfloat16, + device=q_input.device) + fp8_gemm_nt((q_input, x_scale), (weight, weight_scale), output) if bias is not None: output += bias return output.to(dtype=output_dtype).view(*output_shape) From 8b77328ffe0451f4b809de573e71b1c100c8ed67 Mon Sep 17 00:00:00 2001 From: Nick Hill Date: Thu, 25 Sep 2025 18:08:30 -0700 Subject: [PATCH 400/518] [Misc] Don't log shm dequeue delay warning on worker side (#25720) Signed-off-by: Nick Hill --- .../device_communicators/shm_broadcast.py | 60 +++++++++---------- vllm/v1/executor/multiproc_executor.py | 2 +- 2 files changed, 31 insertions(+), 31 deletions(-) diff --git a/vllm/distributed/device_communicators/shm_broadcast.py b/vllm/distributed/device_communicators/shm_broadcast.py index 499c6927f2f9..0ee432fea15e 100644 --- a/vllm/distributed/device_communicators/shm_broadcast.py +++ b/vllm/distributed/device_communicators/shm_broadcast.py @@ -387,23 +387,21 @@ def acquire_write(self, timeout: Optional[float] = None): # Release the processor to other threads sched_yield() + # if we time out, raise an exception + elapsed = time.monotonic() - start_time + if timeout is not None and elapsed > timeout: + raise TimeoutError + # if we wait for a long time, log a message - if (time.monotonic() - start_time - > VLLM_RINGBUFFER_WARNING_INTERVAL * n_warning): + if elapsed > VLLM_RINGBUFFER_WARNING_INTERVAL * n_warning: logger.info( - ("No available shared memory broadcast block found" - " in %s seconds. This typically happens when some" - " processes are hanging, doing some time-consuming" - " work (e.g. compilation), or sitting idle."), - VLLM_RINGBUFFER_WARNING_INTERVAL, - ) + "No available shared memory broadcast block found" + " in %s seconds. This typically happens when some" + " processes are hanging or doing some" + " time-consuming work (e.g. compilation)", + VLLM_RINGBUFFER_WARNING_INTERVAL) n_warning += 1 - # if we time out, raise an exception - if (timeout is not None - and time.monotonic() - start_time > timeout): - raise TimeoutError - continue # found a block that is either # (1) not written @@ -432,7 +430,8 @@ def acquire_write(self, timeout: Optional[float] = None): @contextmanager def acquire_read(self, timeout: Optional[float] = None, - cancel: Optional[Event] = None): + cancel: Optional[Event] = None, + indefinite: bool = False): assert self._is_local_reader, "Only readers can acquire read" start_time = time.monotonic() n_warning = 1 @@ -452,26 +451,26 @@ def acquire_read(self, # Release the processor to other threads self._read_spin_timer.spin() - # if we wait for a long time, log a message - if (time.monotonic() - start_time - > VLLM_RINGBUFFER_WARNING_INTERVAL * n_warning): - logger.info( - ("No available shared memory broadcast block found" - " in %s seconds. This typically happens when some" - " processes are hanging, doing some time-consuming" - " work (e.g. compilation), or sitting idle."), - VLLM_RINGBUFFER_WARNING_INTERVAL, - ) - n_warning += 1 - if cancel is not None and cancel.is_set(): raise RuntimeError("cancelled") # if we time out, raise an exception - if (timeout is not None - and time.monotonic() - start_time > timeout): + elapsed = time.monotonic() - start_time + if timeout is not None and elapsed > timeout: raise TimeoutError + # if we wait for a long time, log a message + if not indefinite and (elapsed + > VLLM_RINGBUFFER_WARNING_INTERVAL * + n_warning): + logger.info( + "No available shared memory broadcast block found" + " in %s seconds. This typically happens when some" + " processes are hanging or doing some" + " time-consuming work (e.g. compilation).", + VLLM_RINGBUFFER_WARNING_INTERVAL) + n_warning += 1 + continue # found a block that is not read by this reader # let caller read from the buffer @@ -505,10 +504,11 @@ def enqueue(self, obj, timeout: Optional[float] = None): def dequeue(self, timeout: Optional[float] = None, - cancel: Optional[Event] = None): + cancel: Optional[Event] = None, + indefinite: bool = False): """ Read from message queue with optional timeout (in seconds) """ if self._is_local_reader: - with self.acquire_read(timeout, cancel) as buf: + with self.acquire_read(timeout, cancel, indefinite) as buf: overflow = buf[0] == 1 if not overflow: # no need to know the size of serialized object diff --git a/vllm/v1/executor/multiproc_executor.py b/vllm/v1/executor/multiproc_executor.py index 2aa732f34bcc..ef90af263664 100644 --- a/vllm/v1/executor/multiproc_executor.py +++ b/vllm/v1/executor/multiproc_executor.py @@ -653,7 +653,7 @@ def worker_busy_loop(self, cancel: Optional[threading.Event] = None): """Main busy loop for Multiprocessing Workers""" while True: method, args, kwargs, output_rank = self.rpc_broadcast_mq.dequeue( - cancel=cancel) + cancel=cancel, indefinite=True) try: if isinstance(method, str): func = getattr(self.worker, method) From 53a30845bef29f62140dad2113a2dc901fd959cf Mon Sep 17 00:00:00 2001 From: Aleksandr Malyshev <164964928+maleksan85@users.noreply.github.com> Date: Thu, 25 Sep 2025 18:16:53 -0700 Subject: [PATCH 401/518] Llamas 3.1 405B fp4 changes upstreaming from 355_wip (#25135) Signed-off-by: Aleksandr Malyshev Co-authored-by: Aleksandr Malyshev Co-authored-by: Doug Lehr --- vllm/envs.py | 16 ++ vllm/model_executor/layers/linear.py | 28 ++- .../layers/quantization/quark/quark.py | 1 + .../quark/schemes/quark_w4a4_mxfp4.py | 185 +++++++++++++++--- .../layers/rotary_embedding/base.py | 23 ++- .../rotary_embedding/rocm_aiter_rope_ops.py | 86 ++++++++ 6 files changed, 301 insertions(+), 38 deletions(-) create mode 100644 vllm/model_executor/layers/rotary_embedding/rocm_aiter_rope_ops.py diff --git a/vllm/envs.py b/vllm/envs.py index b8af770d05f6..832d031f998e 100755 --- a/vllm/envs.py +++ b/vllm/envs.py @@ -106,6 +106,8 @@ VLLM_ROCM_USE_AITER_RMSNORM: bool = True VLLM_ROCM_USE_AITER_MLA: bool = True VLLM_ROCM_USE_AITER_MHA: bool = True + VLLM_ROCM_USE_AITER_FP4_ASM_GEMM: bool = False + VLLM_ROCM_USE_TRITON_ROPE: bool = False VLLM_ROCM_USE_AITER_FP8BMM: bool = True VLLM_ROCM_USE_SKINNY_GEMM: bool = True VLLM_ROCM_FP8_PADDING: bool = True @@ -934,6 +936,18 @@ def get_vllm_port() -> Optional[int]: lambda: (os.getenv("VLLM_ROCM_USE_AITER_MHA", "True").lower() in ("true", "1")), + # Whether to use aiter fp4 gemm asm. + # By default is disabled. + "VLLM_ROCM_USE_AITER_FP4_ASM_GEMM": + lambda: (os.getenv("VLLM_ROCM_USE_AITER_FP4_ASM_GEMM", "False").lower() in + ("true", "1")), + + # Whether to use aiter rope. + # By default is disabled. + "VLLM_ROCM_USE_TRITON_ROPE": + lambda: (os.getenv("VLLM_ROCM_USE_TRITON_ROPE", "False").lower() in + ("true", "1")), + # Whether to use aiter triton fp8 bmm kernel # By default is enabled. "VLLM_ROCM_USE_AITER_FP8BMM": @@ -1539,6 +1553,8 @@ def compute_hash() -> str: "VLLM_ROCM_USE_AITER_RMSNORM", "VLLM_ROCM_USE_AITER_MLA", "VLLM_ROCM_USE_AITER_MHA", + "VLLM_ROCM_USE_AITER_FP4_ASM_GEMM", + "VLLM_ROCM_USE_TRITON_ROPE", "VLLM_ROCM_USE_AITER_FP8BMM", "VLLM_ROCM_USE_SKINNY_GEMM", "VLLM_ROCM_FP8_PADDING", diff --git a/vllm/model_executor/layers/linear.py b/vllm/model_executor/layers/linear.py index df5bced6b228..04a5db07e95c 100644 --- a/vllm/model_executor/layers/linear.py +++ b/vllm/model_executor/layers/linear.py @@ -323,6 +323,12 @@ def __init__( return_bias: bool = True, disable_tp: bool = False, ): + # If MergedReplicatedLinear, use output size of each partition. + if hasattr(self, "output_sizes"): + self.output_partition_sizes = self.output_sizes + else: + self.output_partition_sizes = [output_size] + super().__init__(input_size, output_size, skip_bias_add, @@ -335,7 +341,8 @@ def __init__( # All the linear layer supports quant method. assert self.quant_method is not None self.quant_method.create_weights(self, - self.input_size, [self.output_size], + self.input_size, + self.output_partition_sizes, self.input_size, self.output_size, self.params_dtype, @@ -374,12 +381,15 @@ def weight_loader(self, param: Parameter, loaded_weight: torch.Tensor): param.data.copy_(loaded_weight) def forward( - self, x: torch.Tensor + self, + x: torch.Tensor, ) -> Union[torch.Tensor, tuple[torch.Tensor, Optional[Parameter]]]: bias = self.bias if not self.skip_bias_add else None assert self.quant_method is not None + output = self.quant_method.apply(self, x, bias) output_bias = self.bias if self.skip_bias_add else None + if not self.return_bias: return output return output, output_bias @@ -413,7 +423,7 @@ class ColumnParallelLinear(LinearBase): output_sizes: list of output sizes packed into one output, like for QKV the list would be size 3. prefix: The name of the layer in the state dict, including all parents - (e.g. model.layers.0.qkv_proj) + (e.g. model.layers.0.qkv_proj) return_bias: If true, return bias together with outputs in forward pass. disable_tp: If true, weights matrix won't be sharded through tp rank. """ @@ -535,13 +545,15 @@ def weight_loader_v2(self, param: BasevLLMParameter, param.load_column_parallel_weight(loaded_weight=loaded_weight) def forward( - self, input_ + self, + input_, ) -> Union[torch.Tensor, tuple[torch.Tensor, Optional[Parameter]]]: bias = self.bias if not self.skip_bias_add else None # Matrix multiply. assert self.quant_method is not None output_parallel = self.quant_method.apply(self, input_, bias) + if self.gather_output and self.tp_size > 1: # All-gather across the partitions. output = tensor_model_parallel_all_gather(output_parallel) @@ -1326,7 +1338,8 @@ def weight_loader_v2(self, param: BasevLLMParameter, param.load_row_parallel_weight(loaded_weight=loaded_weight) def forward( - self, input_ + self, + input_, ) -> Union[torch.Tensor, tuple[torch.Tensor, Optional[Parameter]]]: if self.input_is_parallel: input_parallel = input_ @@ -1340,9 +1353,8 @@ def forward( # Only fuse bias add into GEMM for rank 0 (this ensures that # bias will not get added more than once in TP>1 case) bias_ = None if (self.tp_rank > 0 or self.skip_bias_add) else self.bias - output_parallel = self.quant_method.apply(self, - input_parallel, - bias=bias_) + output_parallel = self.quant_method.apply(self, input_parallel, bias_) + if self.reduce_results and self.tp_size > 1: output = tensor_model_parallel_all_reduce(output_parallel) else: diff --git a/vllm/model_executor/layers/quantization/quark/quark.py b/vllm/model_executor/layers/quantization/quark/quark.py index b67ee5cf453d..c65212c01819 100644 --- a/vllm/model_executor/layers/quantization/quark/quark.py +++ b/vllm/model_executor/layers/quantization/quark/quark.py @@ -395,6 +395,7 @@ def apply(self, scheme = layer.scheme if scheme is None: raise ValueError("A scheme must be defined for each layer") + return scheme.apply_weights(layer, x, bias=bias) diff --git a/vllm/model_executor/layers/quantization/quark/schemes/quark_w4a4_mxfp4.py b/vllm/model_executor/layers/quantization/quark/schemes/quark_w4a4_mxfp4.py index 880438a22a69..f8628a82277b 100644 --- a/vllm/model_executor/layers/quantization/quark/schemes/quark_w4a4_mxfp4.py +++ b/vllm/model_executor/layers/quantization/quark/schemes/quark_w4a4_mxfp4.py @@ -1,12 +1,13 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project +from functools import cache from typing import Any, Callable, Optional import torch import torch.nn.functional as F -from vllm.logger import init_logger +from vllm import envs from vllm.model_executor.layers.quantization.quark.schemes import QuarkScheme from vllm.model_executor.layers.quantization.utils.mxfp4_utils import ( OCP_MX_BLOCK_SIZE, dequant_mxfp4, quant_dequant_mxfp4) @@ -14,7 +15,90 @@ PackedvLLMParameter) from vllm.platforms import current_platform -logger = init_logger(__name__) + +@cache +def is_rocm_aiter_fp4_asm_gemm_enabled() -> bool: + return current_platform.is_rocm() \ + and envs.VLLM_ROCM_USE_AITER_FP4_ASM_GEMM \ + and envs.VLLM_ROCM_USE_AITER + + +try: + from aiter.ops.shuffle import shuffle_weight + from aiter.ops.triton.gemm_afp4wfp4 import gemm_afp4wfp4 + from aiter.ops.triton.quant import dynamic_mxfp4_quant + + from vllm.utils import direct_register_custom_op + if is_rocm_aiter_fp4_asm_gemm_enabled(): + from aiter import gemm_a4w4, per_1x32_f4_quant_hip + + def gemm_with_dynamic_quant( + x: torch.Tensor, + weight: torch.Tensor, + weight_scale: torch.Tensor, + rocm_use_aiter_fp4_asm_gemm: bool = False, + out_dtype: Optional[torch.dtype] = torch.bfloat16, + x_scales: Optional[torch.Tensor] = None, + ) -> torch.Tensor: + M = x.shape[0] + if rocm_use_aiter_fp4_asm_gemm: + if x_scales is None: + # use hip quant kernel for performance + x_q, x_s = per_1x32_f4_quant_hip(x, shuffle=True) + else: + x_q = x + x_s = x_scales + + # 32 alignment is enough for dim0 padding of output for + # gemm_a4w4 kernel + y = torch.empty((M + 31) // 32 * 32, + weight.shape[0], + device=x_q.device, + dtype=out_dtype) + + gemm_a4w4(x_q, + weight, + x_s, + weight_scale.view(x_s.dtype), + y, + bpreshuffle=True) + return y[:M] + else: + if x_scales is None: + x_q, x_s = dynamic_mxfp4_quant(x) + else: + x_q = x + x_s = x_scales + y = torch.empty(x_q.shape[0], + weight.shape[0], + device=x_q.device, + dtype=out_dtype) + + gemm_afp4wfp4(x_q, weight, x_s, weight_scale.T, out_dtype, y) + return y + + def gemm_with_dynamic_quant_fake( + x: torch.Tensor, + weight: torch.Tensor, + weight_scale: torch.Tensor, + x_scales: torch.Tensor = None, + rocm_use_aiter_fp4_asm_gemm: bool = False, + out_dtype: Optional[torch.dtype] = torch.bfloat16, + ) -> torch.Tensor: + return torch.empty((*x.shape[:-1], weight.shape[0]), + dtype=out_dtype, + device=x.device) + + direct_register_custom_op( + op_name="gemm_with_dynamic_quant", + op_func=gemm_with_dynamic_quant, + mutates_args=[], + fake_impl=gemm_with_dynamic_quant_fake, + dispatch_key=current_platform.dispatch_key, + ) + +except ImportError: + dynamic_mxfp4_quant = gemm_afp4wfp4 = None __all__ = ["QuarkW4A4MXFP4"] @@ -27,29 +111,15 @@ def __init__(self, weight_quant_spec: dict[str, Any], self.qscheme = "per_group" self.weight_quant_spec = weight_quant_spec self.input_quant_spec = input_quant_spec - - self.static_input_scales = not input_quant_spec.get("is_dynamic") - - if self.static_input_scales: + self.emulate = not current_platform.supports_mx() + self.rocm_use_aiter_fp4_asm_gemm = is_rocm_aiter_fp4_asm_gemm_enabled() + if not self.emulate and (dynamic_mxfp4_quant is None + or gemm_afp4wfp4 is None): + # Currently need these kernels if not emulating raise NotImplementedError( - "QuarkW4A4MXFP4 with static input scales is currently not " - "implemented. Please open an issue.") - - if not current_platform.supports_mx(): - self.emulate = True - logger.warning_once( - "The current platform does not support native MXFP4 " - "computation. Simulated weight dequantization and activation " - "QDQ (quantize and dequantize) will be used, with the linear " - "layers computed in high precision.") - else: - self.emulate = True - logger.warning_once( - "The current platform supports native MXFP4 " - "computation, but kernels are not yet integrated in vLLM. " - "Simulated weight dequantization and activation " - "QDQ (quantize and dequantize) will be used, with the linear " - "layers computed in high precision.") + f"{self.__class__.__name__} requires AITER to be installed " + "for non-emulation mode! Please refer to " + "https://github.com/ROCm/aiter for installation details.") @classmethod def get_min_capability(cls) -> int: @@ -58,8 +128,65 @@ def get_min_capability(cls) -> int: def process_weights_after_loading(self, layer: torch.nn.Module) -> None: layer.weight = torch.nn.Parameter(layer.weight.data, requires_grad=False) - layer.weight_scale = torch.nn.Parameter(layer.weight_scale.data, - requires_grad=False) + + if self.emulate: + layer.weight_scale = torch.nn.Parameter(layer.weight_scale.data, + requires_grad=False) + try: + from quark.torch.export.nn.modules import realquantizer + from quark.torch.quantization.config.config import ( + QuantizationSpec) + except ImportError as err: + raise ImportError( + "The package `amd-quark` is required to use AMD Quark " + "MX-FP4 models. Please install it with `pip install " + "amd-quark`.") from err + + weight_quant_spec = QuantizationSpec.from_dict( + self.weight_quant_spec) + + weight_quantizer = realquantizer.get_real_quantizer( + qspec=weight_quant_spec, + quantizer=None, + real_quantized=True, + reorder=False, + float_dtype=self.out_dtype, + scale_shape=layer.weight_scale.shape, + zero_point_shape=None, + ) + weight_quantizer.scale.data = layer.weight_scale.data + + layer.weight = torch.nn.Parameter( + weight_quantizer(layer.weight.data).to(self.out_dtype), + requires_grad=False, + ) + layer.weight_scale = None + + # This call is necessary to release the scales memory. + torch.cuda.empty_cache() + else: + if self.rocm_use_aiter_fp4_asm_gemm: + # shuffle weight scale + weight_scale_shuffle = layer.weight_scale.data + sm, sn = weight_scale_shuffle.shape + weight_scale_shuffle = weight_scale_shuffle.view( + sm // 32, 2, 16, sn // 8, 2, 4, 1) + weight_scale_shuffle = weight_scale_shuffle.permute( + 0, 3, 5, 2, 4, 1, 6).contiguous() + weight_scale_shuffle = weight_scale_shuffle.view(sm, sn) + layer.weight_scale = torch.nn.Parameter(weight_scale_shuffle, + requires_grad=False) + + # shuffle weight + weight_shuffle = layer.weight.data + weight_shuffle = shuffle_weight(weight_shuffle, + layout=(16, 16)) + layer.weight = torch.nn.Parameter(weight_shuffle, + requires_grad=False) + else: + layer.weight_scale = torch.nn.Parameter( + layer.weight_scale.data.T.contiguous(), + requires_grad=False) def create_weights(self, layer: torch.nn.Module, output_partition_sizes: list[int], @@ -104,9 +231,9 @@ def apply_weights(self, if self.emulate: dq_w = dequant_mxfp4(layer.weight, layer.weight_scale, x.dtype) - x = quant_dequant_mxfp4(x) - return F.linear(x, dq_w, bias) else: - raise NotImplementedError() + return torch.ops.vllm.gemm_with_dynamic_quant( + x, layer.weight, layer.weight_scale, + self.rocm_use_aiter_fp4_asm_gemm, self.out_dtype) diff --git a/vllm/model_executor/layers/rotary_embedding/base.py b/vllm/model_executor/layers/rotary_embedding/base.py index 1c3576bee539..0cf634f82a8a 100644 --- a/vllm/model_executor/layers/rotary_embedding/base.py +++ b/vllm/model_executor/layers/rotary_embedding/base.py @@ -8,6 +8,8 @@ from vllm.model_executor.custom_op import CustomOp from .common import apply_rotary_emb_torch +from .rocm_aiter_rope_ops import (is_rocm_triton_rotary_embedding_enabled, + rocm_aiter_rotary_emb) @CustomOp.register("rotary_embedding") @@ -45,6 +47,8 @@ def __init__( cache = cache.to(dtype) self.cos_sin_cache: torch.Tensor self.register_buffer("cos_sin_cache", cache, persistent=False) + self.is_rocm_triton_rotary_embedding_enabled = \ + is_rocm_triton_rotary_embedding_enabled() def _compute_inv_freq(self, base: float) -> torch.Tensor: """Compute the inverse frequency.""" @@ -120,14 +124,31 @@ def forward_cuda( return query, key from vllm import _custom_ops as ops - self._match_cos_sin_cache_dtype(query) + # ops.rotary_embedding() is an in-place operation # that updates the query and key tensors. ops.rotary_embedding(positions, query, key, self.head_size, self.cos_sin_cache, self.is_neox_style) return query, key + def forward_hip( + self, + positions: torch.Tensor, + query: torch.Tensor, + key: Optional[torch.Tensor] = None, + ) -> tuple[torch.Tensor, Optional[torch.Tensor]]: + if self.is_rocm_triton_rotary_embedding_enabled: + self._match_cos_sin_cache_dtype(query) + rocm_aiter_rotary_emb(positions, query, key, self.cos_sin_cache, + self.head_size, self.rotary_dim, + self.is_neox_style) + else: + # ops.rotary_embedding() is an in-place operation + # that updates the query and key tensors. + self.forward_cuda(positions, query, key) + return query, key + def forward_xpu( self, positions: torch.Tensor, diff --git a/vllm/model_executor/layers/rotary_embedding/rocm_aiter_rope_ops.py b/vllm/model_executor/layers/rotary_embedding/rocm_aiter_rope_ops.py new file mode 100644 index 000000000000..da7c84cb442d --- /dev/null +++ b/vllm/model_executor/layers/rotary_embedding/rocm_aiter_rope_ops.py @@ -0,0 +1,86 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project + +import torch + +import vllm.envs as envs +from vllm.platforms import current_platform +from vllm.utils import direct_register_custom_op + + +def is_rocm_triton_rotary_embedding_enabled() -> bool: + return (current_platform.is_rocm() and envs.VLLM_ROCM_USE_AITER + and envs.VLLM_ROCM_USE_TRITON_ROPE) + + +def rocm_aiter_rotary_emb_with_key_forward_triton_impl( + positions: torch.Tensor, + sin: torch.Tensor, + cos: torch.Tensor, + query: torch.Tensor, + key: torch.Tensor, + rotate_style: int = 0, + is_nope_first: bool = False, +) -> None: + import aiter.ops.triton.rope as ops + ops.rope_cached_thd_positions_2c_fwd_inplace( + query, + key, + cos, + sin, + positions, + rotate_style, + reuse_freqs_front_part=True, + nope_first=is_nope_first, + ) + + +def rocm_aiter_rotary_emb_with_key_forward_triton_fake( + positions: torch.Tensor, + sin: torch.Tensor, + cos: torch.Tensor, + query: torch.Tensor, + key: torch.Tensor, + rotate_style: int = 0, + is_nope_first: bool = False, +) -> None: + pass + + +if is_rocm_triton_rotary_embedding_enabled(): + + direct_register_custom_op( + op_name="rocm_aiter_rotary_emb_with_key_forward_triton", + op_func=rocm_aiter_rotary_emb_with_key_forward_triton_impl, + mutates_args=["key", "query"], + fake_impl=rocm_aiter_rotary_emb_with_key_forward_triton_fake, + dispatch_key=current_platform.dispatch_key, + ) + + +def rocm_aiter_rotary_emb(positions: torch.Tensor, query: torch.Tensor, + key: torch.Tensor, cos_sin_cache: torch.Tensor, + head_size: int, rotary_dim: int, + is_neox_style: bool): + num_tokens = positions.numel() + cos, sin = cos_sin_cache.chunk(2, dim=-1) + query_shape = query.shape + key_shape = key.shape + rotate_style = 0 if is_neox_style else 1 + + query = query.view(num_tokens, -1, head_size) + key = key.view(num_tokens, -1, head_size) + query_ = query[..., :rotary_dim] + key_ = key[..., :rotary_dim] + positions = positions.view(*query.shape[:1]) + torch.ops.vllm.rocm_aiter_rotary_emb_with_key_forward_triton( + positions, + sin, + cos, + query_, + key_, + rotate_style, + False, + ) + query = query.view(query_shape) + key = key.view(key_shape) From 13dd93c66766efff810feed3edb919f69d0d3836 Mon Sep 17 00:00:00 2001 From: Russell Bryant Date: Thu, 25 Sep 2025 21:21:56 -0400 Subject: [PATCH 402/518] [Core] Force PIECEWISE CUDAGraph mode for encoder-decoder (#25701) Signed-off-by: Russell Bryant --- vllm/config/__init__.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/vllm/config/__init__.py b/vllm/config/__init__.py index 958df4c66955..2da9d8f4f3ea 100644 --- a/vllm/config/__init__.py +++ b/vllm/config/__init__.py @@ -364,9 +364,11 @@ def __post_init__(self): self.compilation_config.cudagraph_mode = \ CUDAGraphMode.FULL_AND_PIECEWISE - # pooling model does not support full cudagraphs + # pooling models and encoder-decoder models + # do not support full cudagraphs if self.model_config is not None and \ - self.model_config.pooler_config is not None: + (self.model_config.pooler_config is not None + or self.model_config.is_encoder_decoder): self.compilation_config.cudagraph_mode = \ CUDAGraphMode.PIECEWISE else: From 983056e456a418662601f2f4414a27b2c576bf67 Mon Sep 17 00:00:00 2001 From: Nick Hill Date: Thu, 25 Sep 2025 20:11:44 -0700 Subject: [PATCH 403/518] [Misc] Remove unnecessary memoryviews in shm_broadcast.py (#25721) Signed-off-by: Nick Hill --- vllm/distributed/device_communicators/shm_broadcast.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/vllm/distributed/device_communicators/shm_broadcast.py b/vllm/distributed/device_communicators/shm_broadcast.py index 0ee432fea15e..0fc9d1cf4f51 100644 --- a/vllm/distributed/device_communicators/shm_broadcast.py +++ b/vllm/distributed/device_communicators/shm_broadcast.py @@ -136,8 +136,8 @@ def __init__(self, self.shared_memory = shared_memory.SharedMemory( create=True, size=self.total_bytes_of_buffer) # initialize the metadata section to 0 - with memoryview(self.shared_memory.buf[self.metadata_offset:] - ) as metadata_buffer: + with self.shared_memory.buf[self. + metadata_offset:] as metadata_buffer: torch.frombuffer(metadata_buffer, dtype=torch.uint8).fill_(0) else: # we are opening an existing buffer @@ -182,14 +182,14 @@ def __del__(self): def get_data(self, current_idx: int): start = self.data_offset + current_idx * self.max_chunk_bytes end = start + self.max_chunk_bytes - with memoryview(self.shared_memory.buf[start:end]) as buf: + with self.shared_memory.buf[start:end] as buf: yield buf @contextmanager def get_metadata(self, current_idx: int): start = self.metadata_offset + current_idx * self.metadata_size end = start + self.metadata_size - with memoryview(self.shared_memory.buf[start:end]) as buf: + with self.shared_memory.buf[start:end] as buf: yield buf From 392edee34a008af2453d936cb3cdbd97842984a7 Mon Sep 17 00:00:00 2001 From: Eugene Khvedchenya Date: Fri, 26 Sep 2025 06:54:54 +0300 Subject: [PATCH 404/518] EVS Support (Video tokens pruning) (#22980) Signed-off-by: Eugene Khvedchenia Signed-off-by: Eugene Khvedchenya Co-authored-by: Roger Wang --- .../multimodal/generation/test_qwen2_5_vl.py | 132 +++++++++ vllm/config/model.py | 27 +- vllm/config/multimodal.py | 9 + vllm/engine/arg_utils.py | 5 + vllm/model_executor/models/interfaces.py | 55 ++++ vllm/model_executor/models/qwen2_5_vl.py | 238 ++++++++++++++- vllm/multimodal/evs.py | 273 ++++++++++++++++++ vllm/v1/worker/gpu_model_runner.py | 83 +++++- 8 files changed, 783 insertions(+), 39 deletions(-) create mode 100644 tests/models/multimodal/generation/test_qwen2_5_vl.py create mode 100644 vllm/multimodal/evs.py diff --git a/tests/models/multimodal/generation/test_qwen2_5_vl.py b/tests/models/multimodal/generation/test_qwen2_5_vl.py new file mode 100644 index 000000000000..1dc3188d60bd --- /dev/null +++ b/tests/models/multimodal/generation/test_qwen2_5_vl.py @@ -0,0 +1,132 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project + +import pytest + +from vllm.multimodal.video import sample_frames_from_video + +from ....conftest import VIDEO_ASSETS + +models = ["Qwen/Qwen2.5-VL-3B-Instruct"] +target_dtype = "bfloat16" + +VIDEO_PLACEHOLDER = "<|vision_start|><|video_pad|><|vision_end|>" + + +def qwen2_5_vl_chat_template(*query): + return f"<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n<|im_start|>user\n{''.join(query)}<|im_end|><|im_start|>assistant\n" # noqa: E501 + + +VIDEO_PROMPTS = VIDEO_ASSETS.prompts({ + "baby_reading": + qwen2_5_vl_chat_template( + VIDEO_PLACEHOLDER, + "Describe this video with a short sentence ", + "(no more than 20 words)", + ), +}) + + +@pytest.mark.core_model +@pytest.mark.parametrize("model", models) +@pytest.mark.parametrize("video_pruning_rate", [0.0, 0.75]) +@pytest.mark.parametrize("num_frames", [16]) +@pytest.mark.parametrize("dtype", [target_dtype]) +@pytest.mark.parametrize("max_tokens", [128]) +def test_qwen2_5_vl_evs_functionality(vllm_runner, video_assets, model, + video_pruning_rate: float, + num_frames: int, dtype: str, + max_tokens: int) -> None: + """Test EVS (Efficient Video Sampling) functionality with different + pruning rates. + """ + + # Sample frames from video assets + sampled_vids = [ + sample_frames_from_video(asset.np_ndarrays, num_frames) + for asset in video_assets + ] + + prompts = [VIDEO_PROMPTS[0]] + videos = [sampled_vids[0]] + + # Initialize model with EVS configuration + with vllm_runner(model, + runner="generate", + max_model_len=4000, + max_num_seqs=1, + dtype=dtype, + limit_mm_per_prompt={"video": 1}, + tensor_parallel_size=1, + video_pruning_rate=video_pruning_rate) as vllm_model: + + # Generate output - this should not crash + outputs = vllm_model.generate_greedy(prompts, + max_tokens, + videos=videos) + + # Basic validation that we got a response + assert len(outputs) == 1 + output_ids, output_text = outputs[0] + + # Ensure we got some output + assert len(output_ids) > 0 + assert len(output_text) > 0 + + # Ensure the output is a string + assert isinstance(output_text, str) + + +@pytest.mark.core_model +@pytest.mark.parametrize("model", models) +@pytest.mark.parametrize("video_pruning_rate", [0.0, 0.75]) +@pytest.mark.parametrize("num_frames", [16]) +@pytest.mark.parametrize("dtype", [target_dtype]) +@pytest.mark.parametrize("max_tokens", [128]) +def test_qwen2_5_vl_evs_batched_videos(vllm_runner, video_assets, model, + video_pruning_rate: float, + num_frames: int, dtype: str, + max_tokens: int) -> None: + """Test EVS functionality with batched videos. + + This test validates that: + 1. The model handles batched video inputs correctly with EVS + 2. Both pruning configurations work with multiple videos + 3. The model doesn't crash when processing multiple videos simultaneously + """ + # Sample frames from video assets + sampled_vids = [ + sample_frames_from_video(asset.np_ndarrays, num_frames) + for asset in video_assets + ] + + # Test batched videos + prompts = [VIDEO_PROMPTS[0], VIDEO_PROMPTS[0]] + videos = [sampled_vids[0], + sampled_vids[0]] # Use same video twice for testing + + # Initialize model with EVS configuration + with vllm_runner(model, + runner="generate", + max_model_len=4000, + max_num_seqs=2, + dtype=dtype, + limit_mm_per_prompt={"video": 2}, + tensor_parallel_size=1, + video_pruning_rate=video_pruning_rate) as vllm_model: + + # Generate output - this should not crash + outputs = vllm_model.generate_greedy(prompts, + max_tokens, + videos=videos) + + # Basic validation that we got responses for both videos + assert len(outputs) == 2 + + for output_ids, output_text in outputs: + # Ensure we got some output for each video + assert len(output_ids) > 0 + assert len(output_text) > 0 + + # Ensure the output is a string + assert isinstance(output_text, str) diff --git a/vllm/config/model.py b/vllm/config/model.py index 302260e7e993..da01d6d4480c 100644 --- a/vllm/config/model.py +++ b/vllm/config/model.py @@ -283,6 +283,7 @@ class ModelConfig: mm_encoder_tp_mode: InitVar[Optional[MMEncoderTPMode]] = None interleave_mm_strings: InitVar[Optional[bool]] = None skip_mm_profiling: InitVar[Optional[bool]] = None + video_pruning_rate: InitVar[Optional[float]] = None def compute_hash(self) -> str: """ @@ -311,6 +312,7 @@ def compute_hash(self) -> str: factors.append(self.override_generation_config) factors.append(self.rope_scaling) factors.append(self.rope_theta) + factors.append(self.video_pruning_rate) # hf_config can control how the model looks! try: @@ -338,17 +340,19 @@ def compute_hash(self) -> str: return hashlib.sha256(str(factors).encode()).hexdigest() def __post_init__( - self, - # Multimodal config init vars - limit_mm_per_prompt: Optional[dict[str, int]], - media_io_kwargs: Optional[dict[str, dict[str, Any]]], - mm_processor_kwargs: Optional[dict[str, Any]], - mm_processor_cache_gb: Optional[float], - mm_processor_cache_type: Optional[MMCacheType], - mm_shm_cache_max_object_size_mb: Optional[int], - mm_encoder_tp_mode: Optional[MMEncoderTPMode], - interleave_mm_strings: Optional[bool], - skip_mm_profiling: Optional[bool]) -> None: + self, + # Multimodal config init vars + limit_mm_per_prompt: Optional[dict[str, int]], + media_io_kwargs: Optional[dict[str, dict[str, Any]]], + mm_processor_kwargs: Optional[dict[str, Any]], + mm_processor_cache_gb: Optional[float], + mm_processor_cache_type: Optional[MMCacheType], + mm_shm_cache_max_object_size_mb: Optional[int], + mm_encoder_tp_mode: Optional[MMEncoderTPMode], + interleave_mm_strings: Optional[bool], + skip_mm_profiling: Optional[bool], + video_pruning_rate: Optional[float], + ) -> None: # Set the default seed to 0 in V1. # NOTE(woosuk): In V0, we set the default seed to None because the # driver worker shares the same process as the user process, and thus @@ -612,6 +616,7 @@ def _task_to_convert(task: TaskOption) -> ConvertType: mm_encoder_tp_mode=mm_encoder_tp_mode, interleave_mm_strings=interleave_mm_strings, skip_mm_profiling=skip_mm_profiling, + video_pruning_rate=video_pruning_rate, ) mm_config_kwargs = { diff --git a/vllm/config/multimodal.py b/vllm/config/multimodal.py index 1b93b520f33f..569de9579900 100644 --- a/vllm/config/multimodal.py +++ b/vllm/config/multimodal.py @@ -78,6 +78,11 @@ class MultiModalConfig: This reduces engine startup time but shifts the responsibility to users for estimating the peak memory usage of the activation of multimodal encoder and embedding cache.""" + video_pruning_rate: Optional[float] = None + """Sets pruning rate for video pruning via Efficient Video Sampling. + Value sits in range [0;1) and determines fraction of media tokens + from each video to be pruned. + """ def compute_hash(self) -> str: """ @@ -118,3 +123,7 @@ def merge_mm_processor_kwargs( """ kwargs = self.mm_processor_kwargs or {} return kwargs | dict(inference_kwargs) + + def is_multimodal_pruning_enabled(self): + return (self.video_pruning_rate is not None + and self.video_pruning_rate > 0) diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index c894477d34b5..7b5ed67d0adb 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -391,6 +391,7 @@ class EngineArgs: mm_encoder_tp_mode: MMEncoderTPMode = MultiModalConfig.mm_encoder_tp_mode io_processor_plugin: Optional[str] = None skip_mm_profiling: bool = MultiModalConfig.skip_mm_profiling + video_pruning_rate: float = MultiModalConfig.video_pruning_rate # LoRA fields enable_lora: bool = False enable_lora_bias: bool = LoRAConfig.bias_enabled @@ -813,6 +814,9 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser: multimodal_group.add_argument("--skip-mm-profiling", **multimodal_kwargs["skip_mm_profiling"]) + multimodal_group.add_argument( + "--video-pruning-rate", **multimodal_kwargs["video_pruning_rate"]) + # LoRA related configs lora_kwargs = get_kwargs(LoRAConfig) lora_group = parser.add_argument_group( @@ -1032,6 +1036,7 @@ def create_model_config(self) -> ModelConfig: model_impl=self.model_impl, override_attention_dtype=self.override_attention_dtype, logits_processors=self.logits_processors, + video_pruning_rate=self.video_pruning_rate, io_processor_plugin=self.io_processor_plugin, ) diff --git a/vllm/model_executor/models/interfaces.py b/vllm/model_executor/models/interfaces.py index e5cb5eb0bacb..f13e590cd243 100644 --- a/vllm/model_executor/models/interfaces.py +++ b/vllm/model_executor/models/interfaces.py @@ -115,6 +115,42 @@ def get_input_embeddings( ... +@runtime_checkable +class SupportsMultiModalPruning(Protocol): + """The interface required for models that support returning both input + embeddings and positions. Model may require custom positions for dynamic + pruning of multimodal embeddings. + """ + supports_multimodal_pruning: ClassVar[Literal[True]] = True + + def recompute_mrope_positions( + self, input_ids: list[int], + multimodal_embeddings: MultiModalEmbeddings, + mrope_positions: torch.LongTensor, num_computed_tokens: int + ) -> tuple[MultiModalEmbeddings, Tensor, int]: + """ + Update part of input mrope positions (starting with + num_computed_tokens index). Original mrope_positions are computed + for unpruned sequence and becomes incorrect once pruning occurs, + so once we prune media tokens we should reflect this in the + mrope_positions before we feed it to LLM. + + Args: + input_ids: (N,) All input tokens of the prompt containing + entire sequence. + multimodal_embeddings: Tuple of multimodal embeddings that + fits into the prefill chunk that is being processed. + mrope_positions: Existing mrope positions (3, N) for entire + sequence + num_computed_tokens: A number of computed tokens so far. + + Returns: + Tuple of (multimodal_embeddings, mrope_positions, + mrope_position_delta). + """ + ... + + @overload def supports_multimodal( model: type[object]) -> TypeIs[type[SupportsMultiModal]]: @@ -142,6 +178,25 @@ def supports_multimodal_encoder_tp_data( return getattr(model, "supports_encoder_tp_data", False) +@overload +def supports_multimodal_pruning( + model: type[object]) -> TypeIs[type[SupportsMultiModalPruning]]: + ... + + +@overload +def supports_multimodal_pruning( + model: object) -> TypeIs[SupportsMultiModalPruning]: + ... + + +def supports_multimodal_pruning( + model: Union[type[object], object], +) -> Union[TypeIs[type[SupportsMultiModalPruning]], + TypeIs[SupportsMultiModalPruning]]: + return getattr(model, "supports_multimodal_pruning", False) + + @runtime_checkable class SupportsScoreTemplate(Protocol): """The interface required for all models that support score template.""" diff --git a/vllm/model_executor/models/qwen2_5_vl.py b/vllm/model_executor/models/qwen2_5_vl.py index b740e6d87b74..bd6c0b162cb4 100644 --- a/vllm/model_executor/models/qwen2_5_vl.py +++ b/vllm/model_executor/models/qwen2_5_vl.py @@ -25,9 +25,9 @@ # See the License for the specific language governing permissions and # limitations under the License. """Inference-only Qwen2.5-VL model compatible with HuggingFace weights.""" -from collections.abc import Iterable, Mapping +from collections.abc import Iterable, Mapping, Sequence from functools import lru_cache, partial -from typing import Annotated, Callable, Literal, Optional, Union +from typing import Annotated, Any, Callable, Literal, Optional, Union import torch import torch.nn as nn @@ -58,7 +58,13 @@ from vllm.model_executor.model_loader.weight_utils import default_weight_loader from vllm.model_executor.models.module_mapping import MultiModelKeys from vllm.multimodal import MULTIMODAL_REGISTRY -from vllm.multimodal.inputs import MultiModalFieldConfig +from vllm.multimodal.evs import (compute_mrope_for_media, + compute_retained_tokens_count, + compute_retention_mask, + recompute_mrope_positions) +from vllm.multimodal.inputs import MultiModalFieldConfig, MultiModalKwargs +from vllm.multimodal.parse import MultiModalDataItems +from vllm.multimodal.processing import PromptReplacement, PromptUpdate from vllm.platforms import _Backend from vllm.sequence import IntermediateTensors from vllm.transformers_utils.config import uses_mrope @@ -66,7 +72,8 @@ from vllm.utils.tensor_schema import TensorSchema, TensorShape from .interfaces import (MultiModalEmbeddings, SupportsLoRA, - SupportsMultiModal, SupportsPP, SupportsQuant) + SupportsMultiModal, SupportsMultiModalPruning, + SupportsPP, SupportsQuant) from .qwen2_vl import Qwen2VLDummyInputsBuilder as Qwen2_5_VLDummyInputsBuilder from .qwen2_vl import (Qwen2VLMultiModalProcessor, Qwen2VLProcessingInfo, apply_rotary_pos_emb_vision) @@ -86,9 +93,9 @@ class Qwen2_5_VLImagePixelInputs(TensorSchema): - np: Number of patches - ni: Number of images - cps: Number of channels * patch_size * patch_size - + Historical context: - - pixel_values shape: (num_patches, num_channels * patch_size * + - pixel_values shape: (num_patches, num_channels * patch_size * patch_size) - image_grid_thw shape: (num_images, 3) in (grid_t, grid_h, grid_w) formatnum_channels * patch_size * patch_size @@ -112,7 +119,7 @@ class Qwen2_5_VLImageEmbeddingInputs(TensorSchema): - nf: Number of image features - hs: Hidden size - ni: Number of images - + Historical context: - image_embeds shape: (num_image_features, hidden_size) - num_image_features varies based on the number and resolution of the @@ -143,11 +150,11 @@ class Qwen2_5_VLVideoPixelInputs(TensorSchema): Dimensions: - np: Number of patches - nv: Number of videos - - ctps: Number of channels * temporal_patch_size * patch_size * + - ctps: Number of channels * temporal_patch_size * patch_size * patch_size - + Historical context: - - pixel_values_videos shape: (num_patches, num_channels * + - pixel_values_videos shape: (num_patches, num_channels * temporal_patch_size * patch_size * patch_size) - video_grid_thw shape: (num_videos, 3) in (grid_t, grid_h, grid_w) format @@ -179,7 +186,7 @@ class Qwen2_5_VLVideoEmbeddingInputs(TensorSchema): - nf: Number of video features - hs: Hidden size - nv: Number of videos - + Historical context: - video_embeds shape: (num_video_features, hidden_size) - num_video_features varies based on the number and resolution of the @@ -905,6 +912,55 @@ def _get_mm_fields_config( second_per_grid_ts=MultiModalFieldConfig.batched("video"), ) + def _get_prompt_updates( + self, + mm_items: MultiModalDataItems, + hf_processor_mm_kwargs: Mapping[str, Any], + out_mm_kwargs: MultiModalKwargs, + ) -> Sequence[PromptUpdate]: + hf_processor = self.info.get_hf_processor(**hf_processor_mm_kwargs) + image_processor = self.info.get_image_processor( + **hf_processor_mm_kwargs) + tokenizer = self.info.get_tokenizer() + vocab = tokenizer.get_vocab() + + placeholder = { + "image": vocab[hf_processor.image_token], + "video": vocab[hf_processor.video_token], + } + + merge_length = image_processor.merge_size**2 + + def get_replacement_qwen2vl(item_idx: int, modality: str): + out_item = out_mm_kwargs[modality][item_idx] + grid_thw = out_item[f"{modality}_grid_thw"].data + assert isinstance(grid_thw, torch.Tensor) + + num_tokens = int(grid_thw.prod()) // merge_length + + # EVS-specific code + video_pruning_rate = self.info.ctx.get_mm_config( + ).video_pruning_rate + if (modality == "video" and video_pruning_rate is not None + and video_pruning_rate > 0.0): + num_tokens = compute_retained_tokens_count( + grid_thw, + image_processor.merge_size, + video_pruning_rate, + ) + # End of EVS-specific code + + return [placeholder[modality]] * num_tokens + + return [ + PromptReplacement( + modality=modality, + target=[placeholder[modality]], + replacement=partial(get_replacement_qwen2vl, + modality=modality), + ) for modality in ("image", "video") + ] + @MULTIMODAL_REGISTRY.register_processor( Qwen2_5_VLMultiModalProcessor, @@ -912,7 +968,8 @@ def _get_mm_fields_config( dummy_inputs=Qwen2_5_VLDummyInputsBuilder) class Qwen2_5_VLForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsLoRA, SupportsPP, - SupportsQuant): + SupportsQuant, + SupportsMultiModalPruning): packed_modules_mapping = { "qkv_proj": ["q_proj", "k_proj", "v_proj"], @@ -949,6 +1006,9 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): self.use_data_parallel = multimodal_config.mm_encoder_tp_mode == "data" self.config = config self.multimodal_config = multimodal_config + self.video_pruning_rate = multimodal_config.video_pruning_rate + self.is_multimodal_pruning_enabled = ( + multimodal_config.is_multimodal_pruning_enabled()) if multimodal_config.get_limit_per_prompt("image") or \ multimodal_config.get_limit_per_prompt("video"): @@ -1090,6 +1150,36 @@ def _process_image_input( return image_embeds.split(sizes) + def _postprocess_image_embeds_evs( + self, image_embeds_split: tuple[torch.Tensor, ...], + image_input: Qwen2_5_VLImageInputs) -> tuple[torch.Tensor, ...]: + """ + Append mrope positions for each for images. + This is necessary to recover correct mrope + positions after video pruning + + Args: + image_embeds_split: Tuple of image embeddings for + each image item. + image_input: Image input data. + + Returns: + Tuple of image embeddings for each image item. + Resulting embeddings will have extra 4 channels for + computed mrope positions. + """ + merge_size = self.visual.spatial_merge_size + grid_thw = image_input["image_grid_thw"] + grid_thw_list = grid_thw.tolist() + image_embeds_out = [] + for emb, size in zip(image_embeds_split, grid_thw_list): + positions = compute_mrope_for_media(size, + merge_size).to(emb.device) + emb = torch.cat([emb, positions], dim=1) + image_embeds_out.append(emb) + image_embeds_split = image_embeds_out + return tuple(image_embeds_split) + def _process_video_input( self, video_input: Qwen2_5_VLVideoInputs) -> tuple[torch.Tensor, ...]: @@ -1119,6 +1209,114 @@ def _process_video_input( return video_embeds.split(sizes) + def _postprocess_video_embeds_evs( + self, video_embeds_split: tuple[torch.Tensor, ...], + video_input: Qwen2_5_VLVideoInputs) -> tuple[torch.Tensor, ...]: + """ + Prunes video embeddings via Efficient Video Sampling (EVS) + and then appends mrope positions for each retained embeddings + + Args: + video_embeds_split: Tuple of video embeddings for each video item. + video_input: Video input data. + + Returns: + Tuple of video embeddings for each video item. + Resulting embeddings will have extra 4 channels for + computed mrope positions. + """ + grid_thw = video_input["video_grid_thw"] + assert grid_thw.ndim == 2 + grid_thw_list = grid_thw.tolist() + merge_size = self.visual.spatial_merge_size + + # Cast to long to match the original code + # https://github.com/huggingface/transformers/blob/41980ce93e775f6c88500c51c8db7946fc6a2add/src/transformers/models/qwen2_5_vl/modular_qwen2_5_vl.py#L491 # noqa + second_per_grid_ts = video_input["second_per_grid_ts"].long() + tokens_per_second = self.config.vision_config.tokens_per_second + + video_embeds_out = [] + for emb, size, video_second_per_grid_t in zip(video_embeds_split, + grid_thw_list, + second_per_grid_ts): + # For each video, we compute retention mask using EVS + retention_mask = compute_retention_mask( + emb, + size, + spatial_merge_size=self.visual.spatial_merge_size, + q=self.video_pruning_rate, + ) + positions = compute_mrope_for_media( + size, + merge_size, + tokens_per_second=tokens_per_second, + video_second_per_grid=video_second_per_grid_t.item(), + ).to(emb.device) + + emb = emb[retention_mask] + positions = positions[retention_mask] + emb = torch.cat([emb, positions], dim=1) + video_embeds_out.append(emb) + return tuple(video_embeds_out) + + def recompute_mrope_positions( + self, + input_ids: list[int], + multimodal_embeddings: tuple[torch.Tensor, ...], + mrope_positions: torch.LongTensor, + num_computed_tokens: int, + ) -> tuple[tuple[torch.Tensor, ...], torch.Tensor, int]: + """ + Update part of input mrope positions (starting with + num_computed_tokens index). Original mrope_positions are computed + for unpruned sequence and becomes incorrect once pruning occurs, + so once we prune media tokens we should reflect this in the + mrope_positions before we feed it to LLM. + + Args: + input_ids: (N,) All input tokens of the prompt (Containing + entire sequence). + multimodal_embeddings: Tuple of multimodal embeddings. + mrope_positions: Existing mrope positions (3, N) for entire + sequence + num_computed_tokens: A number of computed tokens so far. + + Returns: + Tuple of (multimodal_embeddings, mrope_positions, + mrope_position_delta). + """ + image_token_id = self.config.image_token_id + video_token_id = self.config.video_token_id + vision_start_token_id = self.config.vision_start_token_id + + # Device + device = (multimodal_embeddings[0].device + if len(multimodal_embeddings) else mrope_positions.device) + + # Tensors + input_ids_t = torch.as_tensor(input_ids, + device=device, + dtype=torch.long) + + # fmt: off + mm_embeddings_out = [mm[:, :-4] for mm in + multimodal_embeddings] + mm_embeddings_pos = [mm[:, -4:].permute(1, 0).long() for mm in + multimodal_embeddings] + # fmt: in + + positions, mrope_positions_delta = recompute_mrope_positions( + input_ids_t, + mm_embeddings_pos, + mrope_positions, + num_computed_tokens, + vision_start_token_id, + image_token_id, + video_token_id, + ) + + return tuple(mm_embeddings_out), positions, mrope_positions_delta + def _parse_and_validate_multimodal_inputs(self, **kwargs: object) -> dict: mm_input_by_modality = {} @@ -1156,9 +1354,17 @@ def get_multimodal_embeddings(self, multimodal_input = mm_input_by_modality[modality] if modality == "image": vision_embeddings = self._process_image_input(multimodal_input) + if self.is_multimodal_pruning_enabled: + vision_embeddings = self._postprocess_image_embeds_evs( + vision_embeddings, multimodal_input + ) multimodal_embeddings += vision_embeddings if modality == "video": video_embeddings = self._process_video_input(multimodal_input) + if self.is_multimodal_pruning_enabled: + video_embeddings = self._postprocess_video_embeds_evs( + video_embeddings, multimodal_input + ) multimodal_embeddings += video_embeddings return multimodal_embeddings @@ -1184,6 +1390,10 @@ def get_input_embeddings_v0( inputs_embeds = self.get_input_embeddings(input_ids) if image_input is not None: image_embeds = self._process_image_input(image_input) + if self.is_multimodal_pruning_enabled: + image_embeds = self._postprocess_image_embeds_evs( + image_embeds, image_input + ) inputs_embeds = merge_multimodal_embeddings( input_ids, inputs_embeds, @@ -1193,6 +1403,10 @@ def get_input_embeddings_v0( if video_input is not None: video_embeds = self._process_video_input(video_input) + if self.is_multimodal_pruning_enabled: + video_embeds = self._postprocess_video_embeds_evs( + video_embeds, video_input + ) inputs_embeds = merge_multimodal_embeddings( input_ids, inputs_embeds, diff --git a/vllm/multimodal/evs.py b/vllm/multimodal/evs.py new file mode 100644 index 000000000000..056f3d905968 --- /dev/null +++ b/vllm/multimodal/evs.py @@ -0,0 +1,273 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project +# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# NVIDIA CORPORATION and its licensors retain all intellectual property +# and proprietary rights in and to this software, related documentation +# and any modifications thereto. Any use, reproduction, disclosure or +# distribution of this software and related documentation without an express +# license agreement from NVIDIA CORPORATION is strictly prohibited. + +import typing + +import torch + + +def compute_retained_tokens_count(video_size_thw: torch.LongTensor, + spatial_merge_size: int, q: float) -> int: + """ + Compute the number of retained tokens for a given video. + Method ensures that we retain all the tokens from the first frame + regardless of the pruning rate. + + Args: + video_size_thw: The size of the video in the format of (T, H, W). + spatial_merge_size: The size of the spatial merge. + q: The pruning rate. + + Returns: + The number of retained tokens. + """ + T, H, W = map(int, video_size_thw) + min_num_tokens = (H // spatial_merge_size) * (W // spatial_merge_size) + evs_num_tokens = int(T * min_num_tokens * (1 - q)) + return max(min_num_tokens, evs_num_tokens) + + +def compute_retention_mask( + video_embeds: torch.Tensor, + video_size_thw: torch.LongTensor, + spatial_merge_size: int, + q: float, +) -> torch.Tensor: + """ + Computes the retention mask for input video embeddings. + + Args: + video_embeds (`torch.Tensor`): The input video embeddings + of shape `(T * H * W // spatial_merge_size ^ 2, hidden_size)` + video_size_thw (`torch.LongTensor` of shape `(3)`): + The temporal, height and width of video. + spatial_merge_size: Size reduction for rows & cols dimensions. + q: (`float`): Pruning rate factor [0,1) + + Returns: + `torch.Tensor`: The retention mask for the video embeddings of + `(T * H * W // spatial_merge_size ^ 2)` shape. + """ + T, H, W = video_size_thw + + # Use reshape instead of einops to avoid graph breaks + video_embeds = video_embeds.reshape( + T, + H // spatial_merge_size, + W // spatial_merge_size, + video_embeds.size(-1), + ) + + # Core EVS + similarity = torch.nn.functional.cosine_similarity(video_embeds[1:, ...], + video_embeds[:-1, ...], + dim=-1) + dissimilarity = 1 - similarity + + # Always ensure we include all tokens from the first frame + dissimilarity = torch.cat( + [255 * torch.ones_like(video_embeds[:1, :, :, 0]), dissimilarity], + dim=0) + + dissimilarity_flat = dissimilarity.view(-1) + order = torch.argsort(dissimilarity_flat, + dim=-1, + descending=True, + stable=True) + retain_num_tokens = compute_retained_tokens_count(video_size_thw, + spatial_merge_size, q) + topk_indices = order[:retain_num_tokens] + + retention_mask = torch.zeros_like(dissimilarity_flat, dtype=torch.bool) + retention_mask[topk_indices] = True + retention_mask = retention_mask.reshape(dissimilarity.size()) + + mask = retention_mask.view(-1) # "T H W -> (T H W)" + return mask + + +def compute_mrope_for_media( + video_size_thw: torch.LongTensor, + spatial_merge_size: int, + tokens_per_second: float = 1.0, + video_second_per_grid: float = 1.0, +) -> torch.Tensor: + """ + Computes the mrope for video embeddings based on the grid dimensions. + Computed mrope positions match original qwen 2.5 implementation, + but positions are built for media being the first element in sequence. + + Args: + video_size_thw: Media size (num frames, rows, cols) + spatial_merge_size: Size reduction for rows & cols dimensions. + tokens_per_second: Number of tokens per second. + video_second_per_grid: Number of seconds per video. + + Returns: + Tensor of shape `(T * H * W, 4)` where last dimension + represents mrope positions [0:3), while the last channel + contains value of llm_grid_w repeated for all positions. + """ + llm_grid_t = video_size_thw[0] + llm_grid_h = video_size_thw[1] // spatial_merge_size + llm_grid_w = video_size_thw[2] // spatial_merge_size + + t_index = ((torch.arange(llm_grid_t).view(-1, 1).expand( + -1, llm_grid_h * llm_grid_w).mul( + tokens_per_second * video_second_per_grid)).long().flatten()) + h_index = (torch.arange(llm_grid_h).view(1, -1, + 1).expand(llm_grid_t, -1, + llm_grid_w).flatten()) + w_index = (torch.arange(llm_grid_w).view(1, 1, -1).expand( + llm_grid_t, llm_grid_h, -1).flatten()) + llm_grid_w = (torch.tensor([llm_grid_w + ]).view(1, 1, + 1).expand(llm_grid_t, llm_grid_h, + llm_grid_w).flatten()) + + positions = torch.stack([t_index, h_index, w_index, llm_grid_w], dim=1) + return positions + + +def recompute_mrope_positions( + input_ids: torch.LongTensor, + multimodal_positions: list[torch.Tensor], + mrope_positions: torch.LongTensor, + num_computed_tokens: int, + vision_start_token_id: int, + image_token_id: int, + video_token_id: int, +) -> tuple[torch.LongTensor, int]: + """ + Update part of input mrope positions. + Original mrope_positions are computed incorrectly, so once we prune media + tokens we should reflect this in the mrope positions for the LLM. + + This method supports chunked prefill approach where + multimodal_embeddings are passed to LLM in chunks, so input + multimodal_embeddings may contain zero, some or even some part of all + multimodal_embeddings for a given prompt. + + Each multimodal_positions has 4 extra channels + (First 3 channels corresponds to original 3 mrope positions, last channel + is the maximum width of the media repeated). Provided multimodal_positions + do not reflect location of media position in sequence - they are computed + like the media is in the 0-th position in the sequence. + + Method works as follows: it recomputes mrope_positions starting from the + `num_computed_tokens` for `total_len_of_multimodal_embeddings` and then + shifts all text tokens that goes after total_len_of_multimodal_embeddings. + + It also handles case when multimodal_embeddings is partial + (e.g. one media is split into two prefill stages) + + Args: + input_ids: (N,) All input tokens of the prompt (entire sequence). + multimodal_positions: List of mrope positsions for each media. + mrope_positions: Existing mrope positions (4, N) for entire sequence. + num_computed_tokens: A number of computed tokens so far. + vision_start_token_id: Token indicating start of vision media. + image_token_id: Image token id + video_token_id: Video token id + + Returns: + Tuple of (mrope_positions, mrope_position_delta). + """ + + # Tensors + positions: torch.LongTensor = typing.cast( + torch.LongTensor, mrope_positions.clone()) # (3, N) + N = input_ids.numel() + + image_mask = input_ids.eq(image_token_id) + video_mask = input_ids.eq(video_token_id) + media_mask = image_mask | video_mask + text_mask = ~media_mask + + # Early exit: no media in this chunk + if len(multimodal_positions) == 0: + delta = (int((positions.max().item() + 1) - + N) if positions.numel() else -N) + return positions, delta + + total_mm_tokens = torch.count_nonzero(media_mask) + seen_mm_tokens = torch.count_nonzero(media_mask[:num_computed_tokens]) + + # Early exit: we've updated positions for all media tokens + # (and consequently - for all remaining text tokens) + if seen_mm_tokens == total_mm_tokens: + delta = (int((positions.max().item() + 1) - + N) if positions.numel() else -N) + return positions, delta + + vision_start_indices = (input_ids == vision_start_token_id).nonzero( + as_tuple=True)[0] + + for mm_pos in multimodal_positions: + # Each mm_pos can be a complete embedding for single media + # or it can be a part of a single media (due to chunked prefill) + + # Cases to cover + # - Current prefill chunk has no vision start indexes at all + # - Vision start token appeared in previous prefill round + # - Regular case + seen_vision_start_indices = vision_start_indices[vision_start_indices < + num_computed_tokens] + + if len(seen_vision_start_indices): + # If we have encountered some vision start indexes, + # then we should check the condition: + # | --- prefill 1 ------| ---- prefill 2 ----- | + # | TTTTTTTTTSVVVVVVVVVV|VVVVVVTTTTTTTTTTTTTTTT| + last_vision_start_token = seen_vision_start_indices[-1] + seem_mm_tokens_before_last_vision_start = torch.count_nonzero( + media_mask[:last_vision_start_token]) + in_the_middle_of_media = ( + seen_mm_tokens > seem_mm_tokens_before_last_vision_start) + + if in_the_middle_of_media: + mm_embeddings_seen = (seen_mm_tokens - + seem_mm_tokens_before_last_vision_start) + global_mm_start = last_vision_start_token + else: + # We have completed previous mm_embedding part and + # ready to start a new one + next_vision_start_token = vision_start_indices[ + vision_start_indices >= num_computed_tokens][0] + mm_embeddings_seen = 0 + global_mm_start = next_vision_start_token + + else: + # If there were no vision start indexes so far, + # let's find first vision start index + next_vision_start_token = vision_start_indices[ + vision_start_indices >= num_computed_tokens][0] + + mm_embeddings_seen = 0 + global_mm_start = next_vision_start_token + + # Offset right after vision_start_token + base = positions[-1, global_mm_start] + 1 + local_start = global_mm_start + 1 + mm_embeddings_seen + local_end = local_start + mm_pos.shape[1] + positions[:, local_start:local_end] = mm_pos[0:3] + base + + # mm_pos[3, 0] is the max width of the media + offset = mm_pos[3, 0] + base + + text_pos_sum = torch.cumsum(text_mask[local_end:].long(), dim=0) + + positions[:, local_end:N] = text_pos_sum + offset - 1 + + # Include distance to the next vision start token + num_computed_tokens += mm_pos.shape[1] + + mrope_positions_delta = (positions.max() + 1 - N).item() + return positions, mrope_positions_delta diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index b7a066654d70..dca6feded12e 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -40,11 +40,15 @@ from vllm.model_executor.layers.mamba.abstract import MambaBase from vllm.model_executor.layers.rotary_embedding import MRotaryEmbedding from vllm.model_executor.model_loader import TensorizerLoader, get_model_loader +# yapf conflicts with isort for this block +# yapf: disable from vllm.model_executor.models.interfaces import (SupportsMultiModal, is_mixture_of_experts, supports_eagle3, supports_mrope, + supports_multimodal_pruning, supports_transcription) +# yapf: enable from vllm.model_executor.models.interfaces_base import ( VllmModelForPooling, is_pooling_model, is_text_generation_model) from vllm.multimodal import MULTIMODAL_REGISTRY @@ -206,7 +210,8 @@ def __init__( self.enable_prompt_embeds = model_config.enable_prompt_embeds self.is_multimodal_raw_input_only_model = ( model_config.is_multimodal_raw_input_only_model) - + # This will be overridden in load_model() + self.is_multimodal_pruning_enabled = False self.max_model_len = model_config.max_model_len self.dcp_world_size = self.parallel_config.decode_context_parallel_size self.max_num_tokens = scheduler_config.max_num_batched_tokens @@ -1530,29 +1535,47 @@ def _execute_mm_encoder(self, scheduler_output: "SchedulerOutput"): # encoder outputs. model = cast(SupportsMultiModal, self.model) encoder_outputs = [] - for _, num_items, mm_kwargs_group in group_mm_kwargs_by_modality( + for modality, num_items, mm_kwargs_group in group_mm_kwargs_by_modality( mm_kwargs, device=self.device, pin_memory=self.pin_memory, merge_by_field_config=model.merge_by_field_config, ): - # Run the encoder. - # `curr_group_outputs` is either of the following: - # 1. A tensor of shape (num_items, feature_size, hidden_size) - # in case feature_size is fixed across all multimodal items. - # 2. A list or tuple (length: num_items) of tensors, each of shape - # (feature_size, hidden_size) in case the feature size is dynamic - # depending on the input multimodal items. - curr_group_outputs = model.get_multimodal_embeddings( - **mm_kwargs_group) + # (ekhvedchenia): Temporary hack to limit peak memory usage when + # processing multimodal data.This solves the issue with scheduler + # putting too many video samples into a single batch. Scheduler + # uses pruned vision tokens count to compare it versus compute + # budget which is incorrect (Either input media size or non-pruned + # output vision tokens count should be considered) + curr_group_outputs = [] + + if self.is_multimodal_pruning_enabled and modality == "video": + micro_batch_size = 1 + for i in range(0, num_items, micro_batch_size): + micro_batch_mm_inputs = dict( + (k, v[i:i + micro_batch_size]) + for k, v in mm_kwargs_group.items()) + + micro_batch_outputs = model.get_multimodal_embeddings( + **micro_batch_mm_inputs) + + curr_group_outputs.extend(micro_batch_outputs) + else: + # Run the encoder. + # `curr_group_outputs` is either of the following: + # 1. A tensor of shape (num_items, feature_size, hidden_size) + # in case feature_size is fixed across all multimodal items. + # 2. A list or tuple (length: num_items) of tensors, + # each of shape (feature_size, hidden_size) in case the feature + # size is dynamic depending on the input multimodal items. + curr_group_outputs = model.get_multimodal_embeddings( + **mm_kwargs_group) sanity_check_mm_encoder_outputs( curr_group_outputs, expected_num_items=num_items, ) - - for output in curr_group_outputs: - encoder_outputs.append(output) + encoder_outputs.extend(curr_group_outputs) # Cache the encoder outputs by mm_hash for (mm_hash, pos_info), output in zip(mm_hashes_pos, encoder_outputs): @@ -1566,8 +1589,11 @@ def _gather_mm_embeddings( scheduler_output: "SchedulerOutput", shift_computed_tokens: int = 0, ) -> list[torch.Tensor]: + should_sync_mrope_positions = False mm_embeds: list[torch.Tensor] = [] for req_id in self.input_batch.req_ids: + mm_embeds_req: list[torch.Tensor] = [] + num_scheduled_tokens = scheduler_output.num_scheduled_tokens[ req_id] req_state = self.requests[req_id] @@ -1609,7 +1635,28 @@ def _gather_mm_embeddings( encoder_output[start_idx:end_idx], is_embed=is_embed, ) - mm_embeds.append(mm_embeds_item) + mm_embeds_req.append(mm_embeds_item) + + if self.is_multimodal_pruning_enabled and self.uses_mrope: + should_sync_mrope_positions = True + mm_embeds_req, new_mrope_positions, new_delta = ( + self.model.recompute_mrope_positions( + input_ids=req_state.prompt_token_ids, + multimodal_embeddings=mm_embeds_req, + mrope_positions=req_state.mrope_positions, + num_computed_tokens=req_state.num_computed_tokens, + )) + assert req_state.mrope_positions is not None + req_state.mrope_positions.copy_(new_mrope_positions) + req_state.mrope_position_delta = new_delta + + mm_embeds.extend(mm_embeds_req) + + if should_sync_mrope_positions: + self._calc_mrope_positions(scheduler_output) + self.mrope_positions.copy_to_gpu( + scheduler_output.total_num_scheduled_tokens) + return mm_embeds def _extract_encoder_inputs( @@ -2589,6 +2636,10 @@ def load_model(self, eep_scale_up: bool = False) -> None: time_after_load - time_before_load) prepare_communication_buffer_for_model(self.model) + self.is_multimodal_pruning_enabled = (supports_multimodal_pruning( + self.model) and self.model_config.multimodal_config. + is_multimodal_pruning_enabled()) + if is_mixture_of_experts( self.model) and self.parallel_config.enable_eplb: logger.info("EPLB is enabled for model %s.", @@ -2843,7 +2894,7 @@ def _dummy_run( Args: num_tokens: Number of tokens to run the dummy forward pass. cudagraph_runtime_mode: used to control the behavior. - - if not set will determine the cudagraph mode based on using + - if not set will determine the cudagraph mode based on using the self.cudagraph_dispatcher. - CUDAGraphMode.NONE: No cudagraph, for warm up and profile run - CUDAGraphMode.PIECEWISE: Piecewise cudagraph. From 3edf87d25fd0afcd0798c6cac5917631fde34054 Mon Sep 17 00:00:00 2001 From: yitingdc <59356937+yitingdc@users.noreply.github.com> Date: Fri, 26 Sep 2025 16:18:02 +0800 Subject: [PATCH 405/518] [CI/Build] fix doc build warning: Failed to get 'name: description' pair (#25733) Signed-off-by: yiting.jiang --- vllm/v1/spec_decode/ngram_proposer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm/v1/spec_decode/ngram_proposer.py b/vllm/v1/spec_decode/ngram_proposer.py index fd8e0a6fd1d2..aed050a3540c 100644 --- a/vllm/v1/spec_decode/ngram_proposer.py +++ b/vllm/v1/spec_decode/ngram_proposer.py @@ -71,8 +71,8 @@ def batch_propose( Args: valid_ngram_requests: Set of indices of requests that need ngram proposals. - num_tokens_no_spec: - Numpy array of shape (batch_size,) representing the number + num_tokens_no_spec: + Numpy array of shape (batch_size,) representing the number of tokens without speculative tokens for each request. token_ids_cpu: Numpy array of shape (batch_size, max_model_len) From e84e0735c71e983440b2194e0ea7f9117694dc71 Mon Sep 17 00:00:00 2001 From: Andrew Sansom Date: Fri, 26 Sep 2025 03:18:05 -0500 Subject: [PATCH 406/518] fix: revert cast to cpu in `MsgpackEncoder._encode_tensor` to avoid hidden performance regressions (#25738) Signed-off-by: Andrew Sansom --- vllm/inputs/preprocess.py | 5 +++++ vllm/v1/serial_utils.py | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/vllm/inputs/preprocess.py b/vllm/inputs/preprocess.py index a24307b79d95..7518cd8fc897 100644 --- a/vllm/inputs/preprocess.py +++ b/vllm/inputs/preprocess.py @@ -278,6 +278,11 @@ def _process_embeds( raise ValueError( "prompt_embeds must be of shape (seq_len, hidden_size).") + # Tensors must be on CPU for serialization between processes + # in the MsgpackEncoder. Casting to CPU here ensures that there is no + # hidden device transfer in the critical path of generation. + prompt_embeds = prompt_embeds.cpu() + return embeds_inputs(prompt_embeds=prompt_embeds, cache_salt=parsed_content.get("cache_salt")) diff --git a/vllm/v1/serial_utils.py b/vllm/v1/serial_utils.py index c812a2ec6427..876838084b9a 100644 --- a/vllm/v1/serial_utils.py +++ b/vllm/v1/serial_utils.py @@ -208,7 +208,7 @@ def _encode_tensor( ) -> tuple[str, tuple[int, ...], Union[int, memoryview]]: assert self.aux_buffers is not None # view the tensor as a contiguous 1D array of bytes - arr = obj.flatten().contiguous().cpu().view(torch.uint8).numpy() + arr = obj.flatten().contiguous().view(torch.uint8).numpy() if obj.nbytes < self.size_threshold: # Smaller tensors are encoded inline, just like ndarrays. data = msgpack.Ext(CUSTOM_TYPE_RAW_VIEW, arr.data) From d48f4d6daf7cf62568f84cc50b3ba1fd18ad8c82 Mon Sep 17 00:00:00 2001 From: Andrew Sansom Date: Fri, 26 Sep 2025 03:18:09 -0500 Subject: [PATCH 407/518] perf: Avoid copying inputs_embeds tensors to GPU unless prompt_embeds is enabled (#25739) Signed-off-by: Andrew Sansom --- vllm/v1/worker/gpu_model_runner.py | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index dca6feded12e..a1969463cbfb 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -836,8 +836,9 @@ def _prepare_input_ids(self, total_num_scheduled_tokens: int, if self.input_batch.prev_sampled_token_ids is None: # Normal scheduling case self.input_ids.copy_to_gpu(total_num_scheduled_tokens) - self.inputs_embeds.copy_to_gpu(total_num_scheduled_tokens) - self.is_token_ids.copy_to_gpu(total_num_scheduled_tokens) + if self.enable_prompt_embeds: + self.inputs_embeds.copy_to_gpu(total_num_scheduled_tokens) + self.is_token_ids.copy_to_gpu(total_num_scheduled_tokens) return # Async scheduling case, where some decode requests from the previous @@ -863,8 +864,9 @@ def _prepare_input_ids(self, total_num_scheduled_tokens: int, # If not all requests are decodes from the last iteration, # We need to copy the input_ids_cpu to the GPU first. self.input_ids.copy_to_gpu(total_num_scheduled_tokens) - self.inputs_embeds.copy_to_gpu(total_num_scheduled_tokens) - self.is_token_ids.copy_to_gpu(total_num_scheduled_tokens) + if self.enable_prompt_embeds: + self.inputs_embeds.copy_to_gpu(total_num_scheduled_tokens) + self.is_token_ids.copy_to_gpu(total_num_scheduled_tokens) if num_commmon_tokens == 0: # No requests in common with the previous iteration # So input_ids_cpu will have all the input ids. @@ -878,7 +880,8 @@ def _prepare_input_ids(self, total_num_scheduled_tokens: int, self.input_batch.prev_sampled_token_ids[:num_commmon_tokens, 0], non_blocking=True) - self.is_token_ids.gpu[:num_commmon_tokens] = True + if self.enable_prompt_embeds: + self.is_token_ids.gpu[:num_commmon_tokens] = True return # Upload the index tensors asynchronously # so the scatter can be non-blocking. @@ -978,12 +981,13 @@ def _prepare_inputs( 0, token_indices_tensor, out=self.input_ids.cpu[:total_num_scheduled_tokens]) - is_token_ids = self.input_batch.is_token_ids.flatten() - torch.index_select( - is_token_ids, - 0, - token_indices_tensor, - out=self.is_token_ids.cpu[:total_num_scheduled_tokens]) + if self.enable_prompt_embeds: + is_token_ids = self.input_batch.is_token_ids.flatten() + torch.index_select( + is_token_ids, + 0, + token_indices_tensor, + out=self.is_token_ids.cpu[:total_num_scheduled_tokens]) # Because we did not pre-allocate a massive prompt_embeds CPU tensor on # the InputBatch, we need to fill in the prompt embeds into the expected From 52621c8f5c5a1052411218f8903b4c0b283bf00a Mon Sep 17 00:00:00 2001 From: xaguilar-amd Date: Fri, 26 Sep 2025 10:18:20 +0200 Subject: [PATCH 408/518] [Harware][AMD][Model] Triton MoE tuning configs for GLM-4.5 for MI300X (#25703) Signed-off-by: xaguilar --- ...me=AMD_Instinct_MI300X,dtype=fp8_w8a8.json | 164 ++++++++++++++++++ 1 file changed, 164 insertions(+) create mode 100644 vllm/model_executor/layers/fused_moe/configs/E=160,N=384,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=160,N=384,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json b/vllm/model_executor/layers/fused_moe/configs/E=160,N=384,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json new file mode 100644 index 000000000000..8239492d8f4f --- /dev/null +++ b/vllm/model_executor/layers/fused_moe/configs/E=160,N=384,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8.json @@ -0,0 +1,164 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "num_warps": 8, + "num_stages": 2, + "waves_per_eu": 0 + }, + "2": { + "BLOCK_SIZE_M": 32, + "BLOCK_SIZE_N": 16, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "num_warps": 2, + "num_stages": 2, + "waves_per_eu": 0 + }, + "4": { + "BLOCK_SIZE_M": 64, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 2, + "waves_per_eu": 0 + }, + "8": { + "BLOCK_SIZE_M": 32, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "num_warps": 2, + "num_stages": 2, + "waves_per_eu": 0 + }, + "16": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 8, + "num_warps": 4, + "num_stages": 2, + "waves_per_eu": 0 + }, + "24": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "num_warps": 2, + "num_stages": 2, + "waves_per_eu": 0 + }, + "32": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 4, + "num_warps": 1, + "num_stages": 2, + "waves_per_eu": 0 + }, + "48": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "num_warps": 2, + "num_stages": 2, + "waves_per_eu": 0 + }, + "64": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "num_warps": 8, + "num_stages": 2, + "waves_per_eu": 0 + }, + "96": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 8, + "num_warps": 2, + "num_stages": 2, + "waves_per_eu": 0 + }, + "128": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "num_warps": 2, + "num_stages": 2, + "waves_per_eu": 0 + }, + "256": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "num_warps": 2, + "num_stages": 2, + "waves_per_eu": 0 + }, + "512": { + "BLOCK_SIZE_M": 32, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "num_warps": 2, + "num_stages": 2, + "waves_per_eu": 0 + }, + "1024": { + "BLOCK_SIZE_M": 64, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 8, + "num_warps": 4, + "num_stages": 2, + "waves_per_eu": 0 + }, + "1536": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 8, + "num_warps": 8, + "num_stages": 2, + "waves_per_eu": 0 + }, + "2048": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 8, + "num_warps": 8, + "num_stages": 2, + "waves_per_eu": 0 + }, + "3072": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 4, + "num_warps": 2, + "num_stages": 2, + "waves_per_eu": 0 + }, + "4096": { + "BLOCK_SIZE_M": 256, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 16, + "num_warps": 8, + "num_stages": 2, + "waves_per_eu": 0 + } +} From 6e30010d2fc409471d10781b767157025c34827f Mon Sep 17 00:00:00 2001 From: Iceber Gu Date: Fri, 26 Sep 2025 16:18:24 +0800 Subject: [PATCH 409/518] fix: print outputt offline_inference/base/chat.py example (#25744) Signed-off-by: Iceber Gu --- examples/offline_inference/basic/chat.py | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/offline_inference/basic/chat.py b/examples/offline_inference/basic/chat.py index d078c517d00e..9e7036fea613 100644 --- a/examples/offline_inference/basic/chat.py +++ b/examples/offline_inference/basic/chat.py @@ -87,6 +87,7 @@ def print_outputs(outputs): use_tqdm=False, chat_template=chat_template, ) + print_outputs(outputs) if __name__ == "__main__": From 99b3a504c5e6b8beeaf03ae276f8220f79e02ba6 Mon Sep 17 00:00:00 2001 From: Tao He Date: Fri, 26 Sep 2025 16:18:58 +0800 Subject: [PATCH 410/518] [Qwen3-Next][GDN] fixes cuda graph capturing bug in GDN metadata and a stride bug in causal_conv_1d. (#25743) Signed-off-by: Tao He --- .../layers/mamba/ops/causal_conv1d.py | 11 +++- vllm/v1/attention/backends/gdn_attn.py | 61 ++++++++----------- vllm/v1/worker/gpu_model_runner.py | 23 ++++--- 3 files changed, 50 insertions(+), 45 deletions(-) diff --git a/vllm/model_executor/layers/mamba/ops/causal_conv1d.py b/vllm/model_executor/layers/mamba/ops/causal_conv1d.py index 010fcdda156c..5e5011fa2ac5 100644 --- a/vllm/model_executor/layers/mamba/ops/causal_conv1d.py +++ b/vllm/model_executor/layers/mamba/ops/causal_conv1d.py @@ -41,6 +41,7 @@ def _causal_conv1d_fwd_kernel( # continuous batching stride_istate_seq: tl.constexpr, stride_istate_dim: tl.constexpr, stride_istate_token: tl.constexpr, + stride_cache_indices: tl.constexpr, stride_o_seq: tl.constexpr, stride_o_dim: tl.constexpr, stride_o_token: tl.constexpr, @@ -69,7 +70,7 @@ def _causal_conv1d_fwd_kernel( # continuous batching # rather than mixing sequences - to make updating initial_states across sequences efficiently # single-sequence id - idx_seq = tl.load(batch_ptr + tl.program_id(0)) + idx_seq = tl.load(batch_ptr + tl.program_id(0)).to(tl.int64) chunk_offset = tl.load(token_chunk_offset_ptr + tl.program_id(0)) # BLOCK_N elements along the feature-dimension (channel) @@ -91,8 +92,9 @@ def _causal_conv1d_fwd_kernel( # continuous batching if IS_CONTINUOUS_BATCHING: # cache_idx - conv_state_batch_coord = tl.load(conv_state_indices_ptr + idx_seq).to( - tl.int64) + conv_state_batch_coord = tl.load(conv_state_indices_ptr + + idx_seq * stride_cache_indices).to( + tl.int64) else: # cache_idx conv_state_batch_coord = idx_seq @@ -480,6 +482,8 @@ def causal_conv1d_fn( stride_o_seq = out.stride(0) stride_o_dim = out.stride(1) stride_o_token = out.stride(2) + stride_cache_indices = cache_indices.stride( + 0) if cache_indices is not None else 0 if validate_data: assert x.dim() == 2 @@ -595,6 +599,7 @@ def grid(META): stride_istate_seq, stride_istate_dim, stride_istate_token, + stride_cache_indices, stride_o_seq, stride_o_dim, stride_o_token, diff --git a/vllm/v1/attention/backends/gdn_attn.py b/vllm/v1/attention/backends/gdn_attn.py index 843958bc79de..11f165d6cfc6 100644 --- a/vllm/v1/attention/backends/gdn_attn.py +++ b/vllm/v1/attention/backends/gdn_attn.py @@ -125,7 +125,7 @@ def build( # type: ignore[override] common_prefix_len: int, common_attn_metadata: CommonAttentionMetadata, num_accepted_tokens: Optional[torch.Tensor] = None, - num_draft_tokens: Optional[torch.Tensor] = None, + num_decode_draft_tokens_cpu: Optional[torch.Tensor] = None, fast_build: bool = False, ) -> GDNAttentionMetadata: m = common_attn_metadata @@ -133,23 +133,25 @@ def build( # type: ignore[override] query_start_loc = m.query_start_loc context_lens = m.num_computed_tokens_cpu context_lens_tensor = context_lens.to(query_start_loc.device) - seq_lens_tensor = m.seq_lens nums_dict, batch_ptr, token_chunk_offset_ptr = None, None, None - if (not self.use_spec_decode or num_draft_tokens is None - or num_draft_tokens.sum().item() == 0): + if (not self.use_spec_decode or num_decode_draft_tokens_cpu is None + or num_decode_draft_tokens_cpu[num_decode_draft_tokens_cpu >= + 0].sum().item() == 0): spec_sequence_masks = None + num_spec_decodes = 0 else: - spec_sequence_masks = (num_draft_tokens > 0) & ( - context_lens_tensor + - (num_draft_tokens + 1) == seq_lens_tensor) - if spec_sequence_masks.sum().item() == 0: + spec_sequence_masks = num_decode_draft_tokens_cpu >= 0 + num_spec_decodes = spec_sequence_masks.sum().item() + if num_spec_decodes == 0: spec_sequence_masks = None + else: + spec_sequence_masks = spec_sequence_masks.to( + query_start_loc.device, non_blocking=True) if spec_sequence_masks is None: num_decodes, num_prefills, num_decode_tokens, num_prefill_tokens = ( split_decodes_and_prefills(m, decode_threshold=1)) - num_spec_decodes = 0 num_spec_decode_tokens = 0 spec_token_masks = None spec_state_indices_tensor = None @@ -158,7 +160,6 @@ def build( # type: ignore[override] non_spec_query_start_loc = query_start_loc num_accepted_tokens = None else: - num_spec_decodes = spec_sequence_masks.sum().item() query_lens = query_start_loc[1:] - query_start_loc[:-1] non_spec_query_lens = query_lens[~spec_sequence_masks] @@ -314,28 +315,18 @@ def build_for_cudagraph_capture( """ m = common_attn_metadata - assert (m.num_reqs * (self.num_spec + 1) <= m.num_actual_tokens - and ((m.num_reqs + 1) * (self.num_spec + 1) - >= m.num_actual_tokens)), \ - "GDN only supports decode-only full CUDAGraph capture. " \ - "Make sure all cudagraph capture sizes <= max_num_seq." - - num_accepted_tokens = torch.full((m.num_reqs, ), - m.max_query_len, - dtype=torch.int32, - device=m.query_start_loc.device) - num_drafted_tokens = torch.full((m.num_reqs, ), - self.num_spec, - dtype=torch.int32, - device=m.query_start_loc.device) - - # Fixes query-start loc for spec-sequence-indices. - m.query_start_loc = torch.arange(0, - m.num_actual_tokens + 1, - step=m.max_query_len, - device=m.query_start_loc.device, - dtype=torch.int32) - m.num_computed_tokens_cpu = (m.seq_lens_cpu - torch.full( - (m.num_reqs, ), m.max_query_len, dtype=torch.int32, device='cpu')) - - return self.build(0, m, num_accepted_tokens, num_drafted_tokens) + assert ( + m.num_reqs <= self.decode_cudagraph_max_bs + and m.num_actual_tokens <= self.decode_cudagraph_max_bs), ( + f"GDN only supports decode-only full CUDAGraph capture. " + f"Make sure batch size ({m.num_reqs}) <= " + f"cudagraph capture sizes ({self.decode_cudagraph_max_bs}), " + f"and number of tokens ({m.num_actual_tokens}) <= " + f"cudagraph capture sizes ({self.decode_cudagraph_max_bs}).") + + num_accepted_tokens = torch.diff(m.query_start_loc) + num_decode_draft_tokens_cpu = (num_accepted_tokens - 1).cpu() + m.num_computed_tokens_cpu = m.seq_lens_cpu - num_accepted_tokens.cpu() + + return self.build(0, m, num_accepted_tokens, + num_decode_draft_tokens_cpu) diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index a1969463cbfb..cbf439aa697b 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -360,8 +360,8 @@ def __init__( dtype=torch.int64) self.num_discarded_requests = 0 - self.num_draft_tokens = self._make_buffer(self.max_num_reqs, - dtype=torch.int32) + self.num_decode_draft_tokens = self._make_buffer(self.max_num_reqs, + dtype=torch.int32) self.num_accepted_tokens = self._make_buffer(self.max_num_reqs, dtype=torch.int64) @@ -1103,17 +1103,25 @@ def _prepare_inputs( # Iterate over the dictionary rather than all requests since not all # requests have draft tokens. num_draft_tokens = np.zeros(num_reqs, dtype=np.int32) + # For chunked prefills, use -1 as mask rather than 0, as guided + # decoding may rollback speculative tokens. + num_decode_draft_tokens = np.full(num_reqs, -1, dtype=np.int32) for req_id, draft_token_ids in ( scheduler_output.scheduled_spec_decode_tokens.items()): req_idx = self.input_batch.req_id_to_index[req_id] num_draft_tokens[req_idx] = len(draft_token_ids) - + num_decode_draft_tokens[req_idx] = (len(draft_token_ids) if ( + self.input_batch.num_computed_tokens_cpu[req_idx] + >= self.input_batch.num_prompt_tokens[req_idx]) else -1) spec_decode_metadata = self._calc_spec_decode_metadata( num_draft_tokens, cu_num_tokens) logits_indices = spec_decode_metadata.logits_indices - self.num_draft_tokens.np[:num_reqs] = num_draft_tokens - self.num_draft_tokens.np[num_reqs:].fill(0) - self.num_draft_tokens.copy_to_gpu() + + # For DECODE only cuda graph of some attention backends (e.g., GDN). + self.num_decode_draft_tokens.np[: + num_reqs] = num_decode_draft_tokens + self.num_decode_draft_tokens.np[num_reqs:].fill(-1) + self.num_decode_draft_tokens.copy_to_gpu() logits_indices_padded = None if self.cache_config.kv_sharing_fast_prefill: @@ -1217,7 +1225,8 @@ def _prepare_inputs( extra_attn_metadata_args = dict( num_accepted_tokens=self.num_accepted_tokens. gpu[:num_reqs], - num_draft_tokens=self.num_draft_tokens.gpu[:num_reqs], + num_decode_draft_tokens_cpu=self. + num_decode_draft_tokens.cpu[:num_reqs], ) if ubatch_slices is not None: From dd70437a4f36f08468bb572e3ab368c4d3b05c9a Mon Sep 17 00:00:00 2001 From: Icey <1790571317@qq.com> Date: Fri, 26 Sep 2025 16:19:20 +0800 Subject: [PATCH 411/518] Remove cuda hard-code in compute_causal_conv1d_metadata (#25555) Signed-off-by: Icey <1790571317@qq.com> --- vllm/v1/attention/backends/utils.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/vllm/v1/attention/backends/utils.py b/vllm/v1/attention/backends/utils.py index 0c6e0dfefd8a..f37a829f401c 100644 --- a/vllm/v1/attention/backends/utils.py +++ b/vllm/v1/attention/backends/utils.py @@ -947,6 +947,7 @@ def compute_causal_conv1d_metadata(query_start_loc_p: torch.Tensor): nums_dict = {} # type: ignore batch_ptr = None token_chunk_offset_ptr = None + device = query_start_loc_p.device for BLOCK_M in [8]: # cover all BLOCK_M values nums = -(-seqlens // BLOCK_M) nums_dict[BLOCK_M] = {} @@ -968,11 +969,11 @@ def compute_causal_conv1d_metadata(query_start_loc_p: torch.Tensor): batch_ptr = torch.full((MAX_NUM_PROGRAMS, ), PAD_SLOT_ID, dtype=torch.int32, - device='cuda') + device=device) token_chunk_offset_ptr = torch.full((MAX_NUM_PROGRAMS, ), PAD_SLOT_ID, dtype=torch.int32, - device='cuda') + device=device) else: if batch_ptr.nelement() < MAX_NUM_PROGRAMS: batch_ptr.resize_(MAX_NUM_PROGRAMS).fill_(PAD_SLOT_ID) From 19f76ee68e60c71d7848ec73f5aa9e0d2ab9dbc4 Mon Sep 17 00:00:00 2001 From: yyzxw <34639446+yyzxw@users.noreply.github.com> Date: Fri, 26 Sep 2025 16:22:06 +0800 Subject: [PATCH 412/518] [misc] refactor speculative config (#25657) Signed-off-by: zxw <1020938856@qq.com> --- vllm/config/speculative.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/vllm/config/speculative.py b/vllm/config/speculative.py index 04e2e7d800a1..2e4b3d3a6b20 100644 --- a/vllm/config/speculative.py +++ b/vllm/config/speculative.py @@ -209,12 +209,9 @@ def __post_init__(self): if self.model is None and self.num_speculative_tokens is not None: # TODO(Shangming): Refactor mtp configuration logic when supporting - # mtp acceleration for more models besides deepseek_v3 - if self.target_model_config and \ - (self.target_model_config.hf_text_config.model_type \ - == "deepseek_v3" or - self.target_model_config.hf_text_config.model_type in - ("mimo","ernie4_5_moe", "qwen3_next")): + if (self.target_model_config + and self.target_model_config.hf_text_config.model_type + in ("deepseek_v3", "mimo", "ernie4_5_moe", "qwen3_next")): # use the draft model from the same model: self.model = self.target_model_config.model # Align the quantization of draft model for cases such as @@ -224,8 +221,9 @@ def __post_init__(self): elif self.method in ("ngram", "[ngram]"): self.model = "ngram" else: - raise ValueError("num_speculative_tokens was provided without " - "speculative model.") + raise ValueError( + "num_speculative_tokens was provided but without " + "speculative model.") # Automatically configure the method for ngram when "model" is used # instead of "method" From dfb9af20142a848b114ee2bbe3e148856f59c552 Mon Sep 17 00:00:00 2001 From: Sage Moore Date: Fri, 26 Sep 2025 01:25:28 -0700 Subject: [PATCH 413/518] [Bugfix] Fix Shared Expert/Zero expert code in FusedMoE.process_chunk (#25698) Signed-off-by: Sage Moore Co-authored-by: Robert Shaw <114415538+robertgshaw2-redhat@users.noreply.github.com> --- vllm/model_executor/layers/fused_moe/layer.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/vllm/model_executor/layers/fused_moe/layer.py b/vllm/model_executor/layers/fused_moe/layer.py index 92241f50db07..eccae8b2a7af 100644 --- a/vllm/model_executor/layers/fused_moe/layer.py +++ b/vllm/model_executor/layers/fused_moe/layer.py @@ -1925,7 +1925,9 @@ def process_chunk(chunk_start, chunk_end, skip_result_store=False): assert self.shared_experts is None or isinstance( final_hidden_states, tuple) - if isinstance(final_hidden_states, tuple): + if self.zero_expert_num is not None and self.zero_expert_num > 0: + assert isinstance(final_hidden_states, tuple) + assert self.shared_experts is None final_hidden_states, zero_expert_result = final_hidden_states if zero_expert_result is not None: final_hidden_states += zero_expert_result From b03b1b97f6fc460c7b80f856361ae681d9574ce9 Mon Sep 17 00:00:00 2001 From: Xu Wenqing <121550081+Xu-Wenqing@users.noreply.github.com> Date: Fri, 26 Sep 2025 17:25:39 +0800 Subject: [PATCH 414/518] Support LongCat-Flash-Chat tool call (#24083) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: 许文卿 --- docs/features/tool_calling.md | 9 +++++ .../openai/tool_parsers/__init__.py | 2 + .../tool_parsers/longcat_tool_parser.py | 39 +++++++++++++++++++ 3 files changed, 50 insertions(+) create mode 100644 vllm/entrypoints/openai/tool_parsers/longcat_tool_parser.py diff --git a/docs/features/tool_calling.md b/docs/features/tool_calling.md index 291c313cd57a..16693db7255c 100644 --- a/docs/features/tool_calling.md +++ b/docs/features/tool_calling.md @@ -310,6 +310,15 @@ Flags: * For non-reasoning: `--tool-call-parser hunyuan_a13b` * For reasoning: `--tool-call-parser hunyuan_a13b --reasoning-parser hunyuan_a13b --enable_reasoning` +### LongCat-Flash-Chat Models (`longcat`) + +Supported models: + +* `meituan-longcat/LongCat-Flash-Chat` +* `meituan-longcat/LongCat-Flash-Chat-FP8` + +Flags: `--tool-call-parser longcat` + ### GLM-4.5 Models (`glm45`) Supported models: diff --git a/vllm/entrypoints/openai/tool_parsers/__init__.py b/vllm/entrypoints/openai/tool_parsers/__init__.py index 5e77c406b8d9..2c5a0a6af23f 100644 --- a/vllm/entrypoints/openai/tool_parsers/__init__.py +++ b/vllm/entrypoints/openai/tool_parsers/__init__.py @@ -14,6 +14,7 @@ from .kimi_k2_tool_parser import KimiK2ToolParser from .llama4_pythonic_tool_parser import Llama4PythonicToolParser from .llama_tool_parser import Llama3JsonToolParser +from .longcat_tool_parser import LongcatFlashToolParser from .minimax_tool_parser import MinimaxToolParser from .mistral_tool_parser import MistralToolParser from .openai_tool_parser import OpenAIToolParser @@ -36,6 +37,7 @@ "Llama3JsonToolParser", "JambaToolParser", "Llama4PythonicToolParser", + "LongcatFlashToolParser", "PythonicToolParser", "Phi4MiniJsonToolParser", "DeepSeekV3ToolParser", diff --git a/vllm/entrypoints/openai/tool_parsers/longcat_tool_parser.py b/vllm/entrypoints/openai/tool_parsers/longcat_tool_parser.py new file mode 100644 index 000000000000..87a3fdc44397 --- /dev/null +++ b/vllm/entrypoints/openai/tool_parsers/longcat_tool_parser.py @@ -0,0 +1,39 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project + +import regex as re + +from vllm.entrypoints.openai.tool_parsers.abstract_tool_parser import ( + ToolParserManager) +from vllm.entrypoints.openai.tool_parsers.hermes_tool_parser import ( + Hermes2ProToolParser) +from vllm.transformers_utils.tokenizer import AnyTokenizer + + +@ToolParserManager.register_module("longcat") +class LongcatFlashToolParser(Hermes2ProToolParser): + + def __init__(self, tokenizer: AnyTokenizer): + super().__init__(tokenizer) + + self.tool_call_start_token: str = "" + self.tool_call_end_token: str = "" + + self.tool_call_regex = re.compile( + r"(.*?)|(.*)", + re.DOTALL) + + self.tool_call_start_token_ids = self.model_tokenizer.encode( + self.tool_call_start_token, add_special_tokens=False) + self.tool_call_end_token_ids = self.model_tokenizer.encode( + self.tool_call_end_token, add_special_tokens=False) + + self.tool_call_start_token_array = [ + self.model_tokenizer.decode([token_id]) + for token_id in self.tool_call_start_token_ids + ] + + self.tool_call_end_token_array = [ + self.model_tokenizer.decode([token_id]) + for token_id in self.tool_call_end_token_ids + ] From 633f943e30a4444d890d26b81850f7217736f840 Mon Sep 17 00:00:00 2001 From: Cyrus Leung Date: Fri, 26 Sep 2025 17:37:40 +0800 Subject: [PATCH 415/518] [Doc] Update Batch-level DP docs (#25757) Signed-off-by: DarkLight1337 --- docs/configuration/optimization.md | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/docs/configuration/optimization.md b/docs/configuration/optimization.md index 5564d8a81d93..5c74610ebd29 100644 --- a/docs/configuration/optimization.md +++ b/docs/configuration/optimization.md @@ -139,9 +139,9 @@ there is relatively little gain from TP. On the other hand, TP incurs significan overhead because of all-reduce being performed after every layer. Given this, it may be advantageous to instead shard the batched input data using TP, essentially -performing batch-level DP. This has been shown to improve the throughput by around 10% for +performing batch-level DP. This has been shown to improve the throughput and TTFT by around 10% for `tensor_parallel_size=8`. For vision encoders that use hardware-unoptimized Conv3D operations, -batch-level DP can provide another 40% increase to throughput compared to regular TP. +batch-level DP can provide another 40% improvement compared to regular TP. Nevertheless, since the weights of the multi-modal encoder are replicated across each TP rank, there will be a minor increase in memory consumption and may cause OOM if you can barely fit the model already. @@ -172,14 +172,15 @@ Batch-level DP needs to be implemented on a per-model basis, and enabled by setting `supports_encoder_tp_data = True` in the model class. Regardless, you need to set `mm_encoder_tp_mode="data"` in engine arguments to use this feature. -Known supported models: +Known supported models (with corresponding benchmarks): -- GLM-4.5V GLM-4.1V () +- dots_ocr () +- GLM-4.1V or above () - InternVL () - Kimi-VL () - Llama4 () - MiniCPM-V-2.5 or above (, ) -- Qwen2.5-VL () +- Qwen2-VL or above (, , ) - Step3 () ## Input Processing From 2b6b1d78092826c789514c5a901d1e872d4602dd Mon Sep 17 00:00:00 2001 From: Chih-Chieh Yang <7364402+cyang49@users.noreply.github.com> Date: Fri, 26 Sep 2025 07:31:14 -0400 Subject: [PATCH 416/518] [Model] Mamba2 varlen refactor (#21467) Signed-off-by: Chih-Chieh-Yang <7364402+cyang49@users.noreply.github.com> Co-authored-by: RishiAstra <40644327+RishiAstra@users.noreply.github.com> --- tests/kernels/mamba/test_mamba_ssm_ssd.py | 125 ++--- .../layers/mamba/mamba_mixer2.py | 23 +- .../layers/mamba/ops/causal_conv1d.py | 2 +- .../layers/mamba/ops/ssd_bmm.py | 158 +++---- .../layers/mamba/ops/ssd_chunk_scan.py | 395 +++++++--------- .../layers/mamba/ops/ssd_chunk_state.py | 430 ++++++++---------- .../layers/mamba/ops/ssd_combined.py | 156 +++---- .../layers/mamba/ops/ssd_state_passing.py | 268 +++++------ vllm/model_executor/models/plamo2.py | 22 +- vllm/v1/attention/backends/mamba2_attn.py | 9 +- 10 files changed, 723 insertions(+), 865 deletions(-) diff --git a/tests/kernels/mamba/test_mamba_ssm_ssd.py b/tests/kernels/mamba/test_mamba_ssm_ssd.py index fc60d5ac82b2..927af32588e6 100644 --- a/tests/kernels/mamba/test_mamba_ssm_ssd.py +++ b/tests/kernels/mamba/test_mamba_ssm_ssd.py @@ -7,7 +7,7 @@ from einops import rearrange, repeat from vllm.model_executor.layers.mamba.ops.ssd_combined import ( - mamba_chunk_scan_combined) + mamba_chunk_scan_combined_varlen) from vllm.platforms import current_platform from vllm.v1.attention.backends.mamba2_attn import ( _query_start_loc_to_chunk_indices_offsets) @@ -185,9 +185,14 @@ def end_boundary(n: int): IND_S = [x % full_length for x in IND_E] IND_E = [end_boundary(x + y) for x, y in zip(IND_S, spec)] + # varlen has implicit batch=1 + dt2 = dt2.squeeze(0) + X2 = X2.squeeze(0) + B2 = B2.squeeze(0) + C2 = C2.squeeze(0) yield ([Y_min[s, IND_S[s]:IND_E[s]] for s in range(num_examples)] if return_naive_ref else None, - cu_seqlens, seq_idx.unsqueeze(0), (A, dt2, X2, B2, C2)) + cu_seqlens, seq_idx, (A, dt2, X2, B2, C2)) @pytest.mark.parametrize("itype", @@ -198,7 +203,7 @@ def end_boundary(n: int): def test_mamba_chunk_scan_single_example(d_head, n_heads, seq_len_chunk_size, itype): - # this tests the kernels on a single example (no batching) + # this tests the kernels on a single example (bs=1) # TODO: the bfloat16 case requires higher thresholds. To be investigated @@ -219,23 +224,40 @@ def test_mamba_chunk_scan_single_example(d_head, n_heads, seq_len_chunk_size, Y_min, final_state_min = ssd_minimal_discrete(X * dt.unsqueeze(-1), A * dt, B, C, chunk_size) + + cu_seqlens = torch.tensor((0, seqlen), device='cuda').cumsum(dim=0) + seq_idx = torch.zeros(seqlen, dtype=torch.int32, device=cu_seqlens.device) + + chunk_indices, chunk_offsets = \ + _query_start_loc_to_chunk_indices_offsets( + cu_seqlens, chunk_size, cu_seqlens[-1]) + + # varlen has implicit batch=1 + X = X.squeeze(0) + dt = dt.squeeze(0) + A = A.squeeze(0) + B = B.squeeze(0) + C = C.squeeze(0) Y = torch.empty_like(X) - final_state = mamba_chunk_scan_combined(X, - dt, - A, - B, - C, - chunk_size, - D=None, - return_final_states=True, - out=Y) + final_state = mamba_chunk_scan_combined_varlen(X, + dt, + A, + B, + C, + chunk_size, + D=None, + cu_seqlens=cu_seqlens, + seq_idx=seq_idx, + chunk_indices=chunk_indices, + chunk_offsets=chunk_offsets, + out=Y) # just test the last in sequence - torch.testing.assert_close(Y[:, -1], Y_min[:, -1], atol=atol, rtol=rtol) + torch.testing.assert_close(Y[-1], Y_min[0, -1], atol=atol, rtol=rtol) # just test the last head # NOTE, in the kernel we always cast states to fp32 - torch.testing.assert_close(final_state[:, -1], + torch.testing.assert_close(final_state[:, -1].to(torch.float32), final_state_min[:, -1].to(torch.float32), atol=atol, rtol=rtol) @@ -300,7 +322,7 @@ def test_mamba_chunk_scan_cont_batch(d_head, n_heads, seq_len_chunk_size_cases, cu_seqlens, chunk_size, cu_seqlens[-1]) Y = torch.empty_like(X) - new_states = mamba_chunk_scan_combined( + new_states = mamba_chunk_scan_combined_varlen( X, dt, A, @@ -312,7 +334,6 @@ def test_mamba_chunk_scan_cont_batch(d_head, n_heads, seq_len_chunk_size_cases, seq_idx=seq_idx, chunk_indices=chunk_indices, chunk_offsets=chunk_offsets, - return_varlen_states=True, initial_states=states, out=Y, ) @@ -321,7 +342,7 @@ def test_mamba_chunk_scan_cont_batch(d_head, n_heads, seq_len_chunk_size_cases, for i in range(num_examples): # just test one dim and dstate - Y_eg = Y[0, cu_seqlens[i]:cu_seqlens[i + 1], 0, 0] + Y_eg = Y[cu_seqlens[i]:cu_seqlens[i + 1], 0, 0] Y_min_eg = Y_min[i][:, 0, 0] torch.testing.assert_close(Y_eg, Y_min_eg, atol=atol, rtol=rtol) @@ -386,7 +407,7 @@ def test_mamba_chunk_scan_cont_batch_prefill_chunking(chunk_size, seqlens): _query_start_loc_to_chunk_indices_offsets( cu_seqlens, chunk_size, cu_seqlens[-1]) Y_ref = torch.empty_like(X) - state_ref = mamba_chunk_scan_combined( + state_ref = mamba_chunk_scan_combined_varlen( X, dt, A, @@ -398,7 +419,6 @@ def test_mamba_chunk_scan_cont_batch_prefill_chunking(chunk_size, seqlens): seq_idx=seq_idx, chunk_indices=chunk_indices, chunk_offsets=chunk_offsets, - return_varlen_states=True, initial_states=None, out=Y_ref, ) @@ -414,27 +434,27 @@ def test_mamba_chunk_scan_cont_batch_prefill_chunking(chunk_size, seqlens): chunked_seq_idx = torch.repeat_interleave( torch.arange(len(chunked_seqlens), device=device), chunked_seqlens, - output_size=chunked_cu_seqlens[-1]).unsqueeze(0).to(torch.int32) + output_size=chunked_cu_seqlens[-1]).to(torch.int32) chunked_input_seq_len = chunked_cu_seqlens[-1] - X_chunked = torch.zeros_like(X)[:, :chunked_input_seq_len, ...] - dt_chunked = torch.zeros_like(dt)[:, :chunked_input_seq_len, ...] - B_chunked = torch.zeros_like(B)[:, :chunked_input_seq_len, ...] - C_chunked = torch.zeros_like(C)[:, :chunked_input_seq_len, ...] + X_chunked = torch.zeros_like(X)[:chunked_input_seq_len, ...] + dt_chunked = torch.zeros_like(dt)[:chunked_input_seq_len, ...] + B_chunked = torch.zeros_like(B)[:chunked_input_seq_len, ...] + C_chunked = torch.zeros_like(C)[:chunked_input_seq_len, ...] for i in range(num_sequences): # fmt: off - chunk_f = lambda x, i: x[:, cu_seqlens[i]:cu_seqlens[i] + chunked_seqlens[i], ...] # noqa: E501 + chunk_f = lambda x, i: x[cu_seqlens[i]:cu_seqlens[i] + chunked_seqlens[i], ...] # noqa: E501 - X_chunked[:, chunked_cu_seqlens[i]:chunked_cu_seqlens[i+1], ...] = chunk_f(X, i) # noqa: E501 - dt_chunked[:, chunked_cu_seqlens[i]:chunked_cu_seqlens[i+1], ...] = chunk_f(dt, i) # noqa: E501 - B_chunked[:, chunked_cu_seqlens[i]:chunked_cu_seqlens[i+1], ...] = chunk_f(B, i) # noqa: E501 - C_chunked[:, chunked_cu_seqlens[i]:chunked_cu_seqlens[i+1], ...] = chunk_f(C, i) # noqa: E501 + X_chunked[chunked_cu_seqlens[i]:chunked_cu_seqlens[i+1], ...] = chunk_f(X, i) # noqa: E501 + dt_chunked[chunked_cu_seqlens[i]:chunked_cu_seqlens[i+1], ...] = chunk_f(dt, i) # noqa: E501 + B_chunked[chunked_cu_seqlens[i]:chunked_cu_seqlens[i+1], ...] = chunk_f(B, i) # noqa: E501 + C_chunked[chunked_cu_seqlens[i]:chunked_cu_seqlens[i+1], ...] = chunk_f(C, i) # noqa: E501 # fmt: on chunk_indices, chunk_offsets = \ _query_start_loc_to_chunk_indices_offsets( chunked_cu_seqlens, chunk_size, chunked_cu_seqlens[-1]) Y_partial = torch.empty_like(X_chunked) - partial_state = mamba_chunk_scan_combined( + partial_state = mamba_chunk_scan_combined_varlen( X_chunked, dt_chunked, A, @@ -446,7 +466,6 @@ def test_mamba_chunk_scan_cont_batch_prefill_chunking(chunk_size, seqlens): seq_idx=chunked_seq_idx, chunk_indices=chunk_indices, chunk_offsets=chunk_offsets, - return_varlen_states=True, initial_states=None, out=Y_partial, ) @@ -461,29 +480,28 @@ def test_mamba_chunk_scan_cont_batch_prefill_chunking(chunk_size, seqlens): remaining_chunked_seq_idx = torch.repeat_interleave( torch.arange(len(remaining_chunked_seqlens), device=device), remaining_chunked_seqlens, - output_size=remaining_chunked_cu_seqlens[-1]).unsqueeze(0).to( - torch.int32) + output_size=remaining_chunked_cu_seqlens[-1]).to(torch.int32) remaining_chunked_input_seq_len = remaining_chunked_cu_seqlens[-1] # fmt: off - remaining_X_chunked = torch.zeros_like(X)[:, :remaining_chunked_input_seq_len, ...] # noqa: E501 - remaining_dt_chunked = torch.zeros_like(dt)[:, :remaining_chunked_input_seq_len, ...] # noqa: E501 - remaining_B_chunked = torch.zeros_like(B)[:, :remaining_chunked_input_seq_len, ...] # noqa: E501 - remaining_C_chunked = torch.zeros_like(C)[:, :remaining_chunked_input_seq_len, ...] # noqa: E501 + remaining_X_chunked = torch.zeros_like(X)[:remaining_chunked_input_seq_len, ...] # noqa: E501 + remaining_dt_chunked = torch.zeros_like(dt)[:remaining_chunked_input_seq_len, ...] # noqa: E501 + remaining_B_chunked = torch.zeros_like(B)[:remaining_chunked_input_seq_len, ...] # noqa: E501 + remaining_C_chunked = torch.zeros_like(C)[:remaining_chunked_input_seq_len, ...] # noqa: E501 for i in range(num_sequences): - remaining_chunk_f = lambda x, i: x[:, cu_seqlens[i] + chunked_seqlens[i]:cu_seqlens[i+1], ...] # noqa: E501 + remaining_chunk_f = lambda x, i: x[cu_seqlens[i] + chunked_seqlens[i]:cu_seqlens[i+1], ...] # noqa: E501 - remaining_X_chunked[:, remaining_chunked_cu_seqlens[i]:remaining_chunked_cu_seqlens[i+1], ...] = remaining_chunk_f(X, i) # noqa: E501 - remaining_dt_chunked[:, remaining_chunked_cu_seqlens[i]:remaining_chunked_cu_seqlens[i+1], ...] = remaining_chunk_f(dt, i) # noqa: E501 - remaining_B_chunked[:, remaining_chunked_cu_seqlens[i]:remaining_chunked_cu_seqlens[i+1], ...] = remaining_chunk_f(B, i) # noqa: E501 - remaining_C_chunked[:, remaining_chunked_cu_seqlens[i]:remaining_chunked_cu_seqlens[i+1], ...] = remaining_chunk_f(C, i) # noqa: E501 + remaining_X_chunked[remaining_chunked_cu_seqlens[i]:remaining_chunked_cu_seqlens[i+1], ...] = remaining_chunk_f(X, i) # noqa: E501 + remaining_dt_chunked[remaining_chunked_cu_seqlens[i]:remaining_chunked_cu_seqlens[i+1], ...] = remaining_chunk_f(dt, i) # noqa: E501 + remaining_B_chunked[remaining_chunked_cu_seqlens[i]:remaining_chunked_cu_seqlens[i+1], ...] = remaining_chunk_f(B, i) # noqa: E501 + remaining_C_chunked[remaining_chunked_cu_seqlens[i]:remaining_chunked_cu_seqlens[i+1], ...] = remaining_chunk_f(C, i) # noqa: E501 # assert input chunking is correct concat_chunk_f = lambda pt1, pt2, i: torch.cat([ - pt1[:,chunked_cu_seqlens[i]:chunked_cu_seqlens[i+1],...], - pt2[:,remaining_chunked_cu_seqlens[i]:remaining_chunked_cu_seqlens[i+1],...], + pt1[chunked_cu_seqlens[i]:chunked_cu_seqlens[i+1],...], + pt2[remaining_chunked_cu_seqlens[i]:remaining_chunked_cu_seqlens[i+1],...], ], - dim=1) - concat_batch_f = lambda pt1, pt2: torch.cat([concat_chunk_f(pt1, pt2, i) for i in range(num_sequences)], dim=1) # noqa: E501 + dim=0) + concat_batch_f = lambda pt1, pt2: torch.cat([concat_chunk_f(pt1, pt2, i) for i in range(num_sequences)], dim=0) # noqa: E501 # fmt: on assert concat_batch_f(X_chunked, remaining_X_chunked).equal(X) @@ -498,7 +516,7 @@ def test_mamba_chunk_scan_cont_batch_prefill_chunking(chunk_size, seqlens): remaining_chunked_cu_seqlens[-1]) Y_chunked = torch.empty_like(remaining_X_chunked) - state_chunked = mamba_chunk_scan_combined( + state_chunked = mamba_chunk_scan_combined_varlen( remaining_X_chunked, remaining_dt_chunked, A, @@ -510,7 +528,6 @@ def test_mamba_chunk_scan_cont_batch_prefill_chunking(chunk_size, seqlens): seq_idx=remaining_chunked_seq_idx, chunk_indices=chunk_indices, chunk_offsets=chunk_offsets, - return_varlen_states=True, initial_states=partial_state, out=Y_chunked, ) @@ -518,17 +535,17 @@ def test_mamba_chunk_scan_cont_batch_prefill_chunking(chunk_size, seqlens): # kernel chunked is same as kernel overall for i in range(num_sequences): - Y_seq = Y[:, cu_seqlens[i]:cu_seqlens[i + 1], ...] - Y_ref_seq = Y_ref[:, cu_seqlens[i]:cu_seqlens[i + 1], ...] + Y_seq = Y[cu_seqlens[i]:cu_seqlens[i + 1], ...] + Y_ref_seq = Y_ref[cu_seqlens[i]:cu_seqlens[i + 1], ...] torch.testing.assert_close( - Y_seq[:, :chunked_seqlens[i], ...], - Y_ref_seq[:, :chunked_seqlens[i], ...], + Y_seq[:chunked_seqlens[i], ...], + Y_ref_seq[:chunked_seqlens[i], ...], atol=atol, rtol=rtol, msg=lambda x: f"seq{i} output part1 " + x) # noqa: B023 torch.testing.assert_close( - Y_seq[:, chunked_seqlens[i]:, ...], - Y_ref_seq[:, chunked_seqlens[i]:, ...], + Y_seq[chunked_seqlens[i]:, ...], + Y_ref_seq[chunked_seqlens[i]:, ...], atol=atol, rtol=rtol, msg=lambda x: f"seq{i} output part2 " + x) # noqa: B023 diff --git a/vllm/model_executor/layers/mamba/mamba_mixer2.py b/vllm/model_executor/layers/mamba/mamba_mixer2.py index 908ea6e0025f..6dd09fad7a90 100644 --- a/vllm/model_executor/layers/mamba/mamba_mixer2.py +++ b/vllm/model_executor/layers/mamba/mamba_mixer2.py @@ -29,7 +29,7 @@ from vllm.model_executor.layers.mamba.ops.mamba_ssm import ( selective_state_update) from vllm.model_executor.layers.mamba.ops.ssd_combined import ( - mamba_chunk_scan_combined) + mamba_chunk_scan_combined_varlen) from vllm.model_executor.layers.quantization import QuantizationConfig from vllm.model_executor.model_loader.weight_utils import ( LoaderFunction, composed_weight_loader, sharded_weight_loader) @@ -504,6 +504,7 @@ def forward_cuda( seq_idx_p = attn_metadata.seq_idx_p chunk_indices_p = attn_metadata.chunk_indices_p chunk_offsets_p = attn_metadata.chunk_offsets_p + query_start_loc_p = attn_metadata.query_start_loc_p # 1. Gated MLP's linear projection projected_states, _ = self.in_proj(hidden_states) @@ -545,6 +546,7 @@ def forward_cuda( out, _ = self.out_proj(hidden_states) return out + # NOTE: V0 put prefill before decode, v1 puts decode before prefill num_prefills = attn_metadata.num_prefills # request count num_decodes = attn_metadata.num_decode_tokens # token count (=request) num_prefill_tokens = attn_metadata.num_prefill_tokens # token count @@ -570,9 +572,6 @@ def forward_cuda( [num_decodes, num_prefills], dim=0, ) - query_start_loc_p = ( - attn_metadata.query_start_loc[-num_prefills - 1:] - - num_decodes if has_prefill else None) # Preallocate output tensor to avoid memcpy cost for merging prefill # and decode outputs @@ -620,15 +619,15 @@ def forward_cuda( ssm_state[state_indices_tensor_p], 0) # NOTE: final output is an in-place update of out tensor - varlen_state = mamba_chunk_scan_combined( - hidden_states_p.view(1, num_prefill_tokens, + varlen_states = mamba_chunk_scan_combined_varlen( + hidden_states_p.view(num_prefill_tokens, self.num_heads // self.tp_size, self.head_dim), - dt_p.unsqueeze(0), + dt_p, self.A, - B_p.view(1, num_prefill_tokens, self.n_groups // self.tp_size, + B_p.view(num_prefill_tokens, self.n_groups // self.tp_size, -1), - C_p.view(1, num_prefill_tokens, self.n_groups // self.tp_size, + C_p.view(num_prefill_tokens, self.n_groups // self.tp_size, -1), chunk_size=chunk_size, D=self.D, @@ -639,17 +638,15 @@ def forward_cuda( chunk_offsets=chunk_offsets_p, cu_seqlens=query_start_loc_p, initial_states=initial_states, - return_varlen_states=True, - return_final_states=False, dt_softplus=True, dt_limit=(0.0, float("inf")), - out=preallocated_ssm_out_p.view(1, num_prefill_tokens, -1, + out=preallocated_ssm_out_p.view(num_prefill_tokens, -1, self.head_dim), state_dtype=ssm_state.dtype) # update ssm states # - varlen state is a (num_prefills, nheads, headdim, dstate) tensor - ssm_state[state_indices_tensor_p] = varlen_state + ssm_state[state_indices_tensor_p] = varlen_states # Process decode requests if has_decode: diff --git a/vllm/model_executor/layers/mamba/ops/causal_conv1d.py b/vllm/model_executor/layers/mamba/ops/causal_conv1d.py index 5e5011fa2ac5..c4102c4753c7 100644 --- a/vllm/model_executor/layers/mamba/ops/causal_conv1d.py +++ b/vllm/model_executor/layers/mamba/ops/causal_conv1d.py @@ -427,7 +427,7 @@ def causal_conv1d_fn( batch_ptr = metadata.batch_ptr token_chunk_offset_ptr = metadata.token_chunk_offset_ptr else: - seqlens = np.diff(query_start_loc.to('cpu')) + seqlens = query_start_loc.diff().to('cpu') args = seqlens MAX_NUM_PROGRAMS = 1024 diff --git a/vllm/model_executor/layers/mamba/ops/ssd_bmm.py b/vllm/model_executor/layers/mamba/ops/ssd_bmm.py index 11ca1255ebfb..601b71ab2a51 100644 --- a/vllm/model_executor/layers/mamba/ops/ssd_bmm.py +++ b/vllm/model_executor/layers/mamba/ops/ssd_bmm.py @@ -99,34 +99,28 @@ def _bmm_chunk_fwd_kernel( seq_idx_ptr, # Matrix dimensions seqlen, - chunk_size, - K, - ngroups, - stride_a_batch, - stride_a_seqlen, - stride_a_head, - stride_ak, - stride_b_batch, - stride_b_seqlen, - stride_b_head, - stride_bk, - stride_out_batch, - stride_out_chunk, - stride_out_head, - stride_outm, - stride_outn, - stride_seq_idx_batch, - stride_seq_idx_seqlen, + chunk_size: tl.constexpr, + K: tl.constexpr, + ngroups: tl.constexpr, + stride_a_seqlen: tl.int64, + stride_a_head: tl.int64, + stride_ak: tl.constexpr, + stride_b_seqlen: tl.int64, + stride_b_head: tl.int64, + stride_bk: tl.constexpr, + stride_out_chunk: tl.int64, + stride_out_head: tl.int64, + stride_outm: tl.int64, + stride_outn: tl.constexpr, + stride_seq_idx_seqlen: tl.constexpr, # Meta-parameters IS_CAUSAL: tl.constexpr, dot_dtype: tl.constexpr, - HAS_SEQ_IDX: tl.constexpr, BLOCK_SIZE_M: tl.constexpr, BLOCK_SIZE_N: tl.constexpr, BLOCK_SIZE_K: tl.constexpr, ): - pid_b = tl.program_id(axis=1) - pid_ch = tl.program_id(axis=2).to(tl.int64) + pid_ch = tl.program_id(axis=1).to(tl.int64) pid_c = pid_ch // ngroups pid_h = pid_ch - pid_c * ngroups num_pid_n = tl.cdiv(chunk_size, BLOCK_SIZE_N) @@ -135,10 +129,10 @@ def _bmm_chunk_fwd_kernel( if IS_CAUSAL: if pid_n * BLOCK_SIZE_N >= (pid_m + 1) * BLOCK_SIZE_M: return - a_ptr += pid_b * stride_a_batch + pid_c * chunk_size * stride_a_seqlen + pid_h * stride_a_head - b_ptr += pid_b * stride_b_batch + pid_c * chunk_size * stride_b_seqlen + pid_h * stride_b_head - if HAS_SEQ_IDX: - seq_idx_ptr += pid_b * stride_seq_idx_batch + pid_c * chunk_size * stride_seq_idx_seqlen + a_ptr += pid_c * chunk_size * stride_a_seqlen + pid_h * stride_a_head + b_ptr += pid_c * chunk_size * stride_b_seqlen + pid_h * stride_b_head + + seq_idx_ptr += pid_c * chunk_size * stride_seq_idx_seqlen offs_m = pid_m * BLOCK_SIZE_M + tl.arange(0, BLOCK_SIZE_M) offs_n = pid_n * BLOCK_SIZE_N + tl.arange(0, BLOCK_SIZE_N) @@ -150,6 +144,8 @@ def _bmm_chunk_fwd_kernel( chunk_size_limit = min(chunk_size, seqlen - pid_c * chunk_size) acc = tl.zeros((BLOCK_SIZE_M, BLOCK_SIZE_N), dtype=tl.float32) + + # compute a * b.T for k in range(0, tl.cdiv(K, BLOCK_SIZE_K)): a = tl.load(a_ptrs, mask=(offs_m[:, None] < chunk_size_limit) & @@ -165,18 +161,19 @@ def _bmm_chunk_fwd_kernel( offs_m = pid_m * BLOCK_SIZE_M + tl.arange(0, BLOCK_SIZE_M) offs_n = pid_n * BLOCK_SIZE_N + tl.arange(0, BLOCK_SIZE_N) - if HAS_SEQ_IDX: - chunk_size_limit = min(chunk_size, seqlen - pid_c * chunk_size) - seq_idx_m = tl.load(seq_idx_ptr + offs_m * stride_seq_idx_seqlen, - mask=offs_m < chunk_size_limit, - other=-1) - seq_idx_n = tl.load(seq_idx_ptr + offs_n * stride_seq_idx_seqlen, - mask=offs_n < chunk_size_limit, - other=-2) - acc = tl.where(seq_idx_m[:, None] == seq_idx_n[None, :], acc, 0.0) - out = acc.to(out_ptr.dtype.element_ty) - out_ptr += pid_b * stride_out_batch + pid_c * stride_out_chunk + pid_h * stride_out_head + # Zero out the results that are not from the same request + # in the varlen batch + seq_idx_m = tl.load(seq_idx_ptr + offs_m * stride_seq_idx_seqlen, + mask=offs_m < chunk_size_limit, + other=-1) + seq_idx_n = tl.load(seq_idx_ptr + offs_n * stride_seq_idx_seqlen, + mask=offs_n < chunk_size_limit, + other=-2) + acc = tl.where(seq_idx_m[:, None] == seq_idx_n[None, :], acc, 0.0) + + out = acc.to(out_ptr.dtype.element_ty) + out_ptr += pid_c * stride_out_chunk + pid_h * stride_out_head out_ptrs = out_ptr + (stride_outm * offs_m[:, None] + offs_n[None, :] * stride_outn) tl.store(out_ptrs, @@ -185,78 +182,61 @@ def _bmm_chunk_fwd_kernel( (offs_n[None, :] < chunk_size)) -def _bmm_chunk_fwd(a, - b, - chunk_size, - seq_idx=None, - causal=False, - output_dtype=None): +def _bmm_chunk_fwd(a, b, chunk_size, seq_idx, causal=False, output_dtype=None): """ Argument: - a: (batch, seqlen, k) or (batch, seqlen, ngroups, k) - b: (batch, seqlen, k) or (batch, seqlen, ngroups, k) - seq_idx: (batch, seqlen) or None. out[i, j] for seq_idx[i] != seq_idx[j] will be zeroed out. + a: (seqlen, ngroups, k) + b: (seqlen, ngroups, k) + seq_idx: (seqlen,). out[i, j] for seq_idx[i] != seq_idx[j] will be zeroed out. causal: if True, then out[i, j] for i > j will be arbitrary, only out[i, j] for i <= j are guaranteed to be correct. Return: - out: (batch, nchunks, chunk_size, chunk_size) or (batch, nchunks, ngroups, chunk_size, chunk_size) + out: (nchunks, ngroups, chunk_size, chunk_size) """ - # Check constraints. - has_groups = a.dim() == 4 - if not has_groups: - batch, seqlen, k = a.shape - else: - batch, seqlen, ngroups, k = a.shape + seqlen, ngroups, k = a.shape assert b.shape == a.shape - if seq_idx is not None: - assert seq_idx.shape == (batch, seqlen) - if a.stride(-1) != 1 and a.stride(1) != 1: + assert seq_idx is not None + assert seq_idx.shape == (seqlen, ) + if a.stride(-1) != 1 and a.stride(0) != 1: a = a.contiguous() - if b.stride(-1) != 1 and b.stride(1) != 1: + if b.stride(-1) != 1 and b.stride(0) != 1: b = b.contiguous() + nchunks = math.ceil(seqlen / chunk_size) # Allocates output. out_dtype = a.dtype if output_dtype is None else output_dtype - out = torch.empty( - (batch, nchunks, chunk_size, chunk_size) if not has_groups else - (batch, nchunks, ngroups, chunk_size, chunk_size), - device=a.device, - dtype=out_dtype) + out = torch.empty((nchunks, ngroups, chunk_size, chunk_size), + device=a.device, + dtype=out_dtype) dot_dtype = (tl.bfloat16 if a.dtype == torch.bfloat16 or b.dtype == torch.bfloat16 else (tl.float16 if a.dtype == torch.float16 or b.dtype == torch.float16 else tl.float32)) grid = lambda META: (triton.cdiv( chunk_size, META['BLOCK_SIZE_M']) * triton.cdiv( - chunk_size, META['BLOCK_SIZE_N']), batch, nchunks - if not has_groups else nchunks * ngroups) + chunk_size, META['BLOCK_SIZE_N']), nchunks * ngroups) with torch.cuda.device(a.device.index): _bmm_chunk_fwd_kernel[grid]( - a, - b, - out, - seq_idx, - seqlen, - chunk_size, - k, - ngroups if has_groups else 1, - a.stride(0), - a.stride(1), - 0 if not has_groups else a.stride(2), - a.stride(-1), - b.stride(0), - b.stride(1), - 0 if not has_groups else b.stride(2), - b.stride(-1), - out.stride(0), - out.stride(1), - 0 if not has_groups else out.stride(2), - out.stride(-2), - out.stride(-1), - *((seq_idx.stride(0), - seq_idx.stride(1)) if seq_idx is not None else (0, 0)), - causal, - dot_dtype, - HAS_SEQ_IDX=seq_idx is not None, + a_ptr=a, + b_ptr=b, + out_ptr=out, + seq_idx_ptr=seq_idx, + seqlen=seqlen, + chunk_size=chunk_size, + K=k, + ngroups=ngroups, + stride_a_seqlen=a.stride(0), + stride_a_head=a.stride(1), + stride_ak=a.stride(2), + stride_b_seqlen=b.stride(0), + stride_b_head=b.stride(1), + stride_bk=b.stride(2), + stride_out_chunk=out.stride(0), + stride_out_head=out.stride(1), + stride_outm=out.stride(-2), + stride_outn=out.stride(-1), + stride_seq_idx_seqlen=seq_idx.stride(0), + IS_CAUSAL=causal, + dot_dtype=dot_dtype, ) return out diff --git a/vllm/model_executor/layers/mamba/ops/ssd_chunk_scan.py b/vllm/model_executor/layers/mamba/ops/ssd_chunk_scan.py index fb8350e191c9..add72617fcea 100644 --- a/vllm/model_executor/layers/mamba/ops/ssd_chunk_scan.py +++ b/vllm/model_executor/layers/mamba/ops/ssd_chunk_scan.py @@ -6,7 +6,6 @@ # ruff: noqa: E501,SIM102 -import torch from packaging import version from vllm.triton_utils import tl, triton @@ -114,7 +113,6 @@ def _chunk_scan_fwd_kernel( x_ptr, z_ptr, out_ptr, - out_x_ptr, dt_ptr, dA_cumsum_ptr, seq_idx_ptr, @@ -126,60 +124,49 @@ def _chunk_scan_fwd_kernel( chunk_offsets_ptr, chunk_meta_num, # Matrix dimensions - chunk_size, - hdim, - dstate, - batch, + chunk_size: tl.constexpr, + hdim: tl.constexpr, + dstate: tl.constexpr, seqlen, - nheads_ngroups_ratio, + nheads_ngroups_ratio: tl.constexpr, # Strides - stride_cb_batch, - stride_cb_chunk, - stride_cb_head, - stride_cb_csize_m, - stride_cb_csize_k, - stride_x_batch, - stride_x_seqlen, - stride_x_head, - stride_x_hdim, - stride_z_batch, - stride_z_seqlen, - stride_z_head, - stride_z_hdim, - stride_out_batch, - stride_out_seqlen, - stride_out_head, - stride_out_hdim, - stride_dt_batch, - stride_dt_chunk, - stride_dt_head, - stride_dt_csize, - stride_dA_cs_batch, - stride_dA_cs_chunk, - stride_dA_cs_head, - stride_dA_cs_csize, - stride_seq_idx_batch, - stride_seq_idx_seqlen, - stride_C_batch, - stride_C_seqlen, - stride_C_head, - stride_C_dstate, - stride_states_batch, - stride_states_chunk, - stride_states_head, - stride_states_hdim, - stride_states_dstate, - stride_init_states_batch, - stride_init_states_head, - stride_init_states_hdim, - stride_init_states_dstate, - stride_D_head, + stride_cb_chunk: tl.int64, + stride_cb_head: tl.int64, + stride_cb_csize_m: tl.int64, + stride_cb_csize_k: tl.constexpr, + stride_x_seqlen: tl.int64, + stride_x_head: tl.int64, + stride_x_hdim: tl.constexpr, + stride_z_seqlen: tl.int64, + stride_z_head: tl.int64, + stride_z_hdim: tl.constexpr, + stride_out_seqlen: tl.int64, + stride_out_head: tl.int64, + stride_out_hdim: tl.constexpr, + stride_dt_chunk: tl.int64, + stride_dt_head: tl.int64, + stride_dt_csize: tl.constexpr, + stride_dA_cs_chunk: tl.int64, + stride_dA_cs_head: tl.int64, + stride_dA_cs_csize: tl.constexpr, + stride_seq_idx_seqlen: tl.constexpr, + stride_C_seqlen: tl.int64, + stride_C_head: tl.int64, + stride_C_dstate: tl.constexpr, + stride_states_chunk: tl.int64, + stride_states_head: tl.int64, + stride_states_hdim: tl.int64, + stride_states_dstate: tl.constexpr, + stride_init_states_batch: tl.int64, + stride_init_states_head: tl.int64, + stride_init_states_hdim: tl.int64, + stride_init_states_dstate: tl.constexpr, + stride_D_head: tl.constexpr, # Meta-parameters IS_CAUSAL: tl.constexpr, HAS_D: tl.constexpr, D_HAS_HDIM: tl.constexpr, HAS_Z: tl.constexpr, - HAS_SEQ_IDX: tl.constexpr, BLOCK_SIZE_M: tl.constexpr, BLOCK_SIZE_N: tl.constexpr, BLOCK_SIZE_K: tl.constexpr, @@ -187,9 +174,7 @@ def _chunk_scan_fwd_kernel( IS_TRITON_22: tl.constexpr, HAS_INITSTATES: tl.constexpr, ): - pid_bc = tl.program_id(axis=1).to(tl.int64) - pid_c = pid_bc // batch - pid_b = pid_bc - pid_c * batch + pid_c = tl.program_id(axis=1).to(tl.int64) if not HAS_INITSTATES: c_idx = pid_c c_off = 0 @@ -201,53 +186,51 @@ def _chunk_scan_fwd_kernel( num_pid_n = tl.cdiv(hdim, BLOCK_SIZE_N) pid_m = tl.program_id(axis=0) // num_pid_n pid_n = tl.program_id(axis=0) % num_pid_n - cb_ptr += pid_b * stride_cb_batch + c_idx * stride_cb_chunk + ( - pid_h // nheads_ngroups_ratio) * stride_cb_head - x_ptr += pid_b * stride_x_batch + c_idx * chunk_size * stride_x_seqlen + pid_h * stride_x_head - dt_ptr += pid_b * stride_dt_batch + c_idx * stride_dt_chunk + pid_h * stride_dt_head - dA_cumsum_ptr += pid_b * stride_dA_cs_batch + c_idx * stride_dA_cs_chunk + pid_h * stride_dA_cs_head - C_ptr += pid_b * stride_C_batch + c_idx * chunk_size * stride_C_seqlen + ( + cb_ptr += c_idx * stride_cb_chunk + (pid_h // + nheads_ngroups_ratio) * stride_cb_head + x_ptr += c_idx * chunk_size * stride_x_seqlen + pid_h * stride_x_head + dt_ptr += c_idx * stride_dt_chunk + pid_h * stride_dt_head + dA_cumsum_ptr += c_idx * stride_dA_cs_chunk + pid_h * stride_dA_cs_head + C_ptr += c_idx * chunk_size * stride_C_seqlen + ( pid_h // nheads_ngroups_ratio) * stride_C_head # M-block offsets and prev states # - logic in next block may override these if there is an active offset offs_m = pid_m * BLOCK_SIZE_M + c_off + tl.arange(0, BLOCK_SIZE_M) - prev_states_ptr = states_ptr + pid_b * stride_states_batch + c_idx * stride_states_chunk + pid_h * stride_states_head + prev_states_ptr = states_ptr + c_idx * stride_states_chunk + pid_h * stride_states_head prev_states_hdim = stride_states_hdim prev_states_dstate = stride_states_dstate chunk_size_limit = min(chunk_size, seqlen - c_idx * chunk_size) - if HAS_SEQ_IDX: - seq_idx_ptr += pid_b * stride_seq_idx_batch + c_idx * chunk_size * stride_seq_idx_seqlen - - # - we only need seq_idx_prev to be aligned to chunk boundary - seq_idx_prev = tl.load(seq_idx_ptr - stride_seq_idx_seqlen, - mask=c_idx >= 1, - other=0) - - if HAS_INITSTATES: - # if there are init states, we only need seq_idx_m to point - # what is the current seq_idx - - # get current seq idx - if (pid_m * BLOCK_SIZE_M + c_off) < chunk_size_limit: - seq_idx_m = tl.load( - seq_idx_ptr + - (pid_m * BLOCK_SIZE_M + c_off) * stride_seq_idx_seqlen, ) - - # - recall that in ssd_state_passing, for the case c_off == 0 - # i.e., the very first sequence, we made states_ptr hold its initial state - # so this edge case is taken care of - if ((c_off == 0) and - (seq_idx_prev != seq_idx_m - ) # if a seq is changed exactly on boundary - or (c_off > 0) # implies a new example (pseudo chunk) - ): - - # - replace prev_states_ptr with init_states - prev_states_ptr = initstates_ptr + seq_idx_m * stride_init_states_batch + pid_h * stride_init_states_head - prev_states_hdim = stride_init_states_hdim # override strides - prev_states_dstate = stride_init_states_dstate + + seq_idx_ptr += c_idx * chunk_size * stride_seq_idx_seqlen + # - we only need seq_idx_prev to be aligned to chunk boundary + seq_idx_prev = tl.load(seq_idx_ptr - stride_seq_idx_seqlen, + mask=c_idx >= 1, + other=0) + + if HAS_INITSTATES: + # if there are init states, we only need seq_idx_m to point + # what is the current seq_idx + + # get current seq idx + if (pid_m * BLOCK_SIZE_M + c_off) < chunk_size_limit: + seq_idx_m = tl.load( + seq_idx_ptr + + (pid_m * BLOCK_SIZE_M + c_off) * stride_seq_idx_seqlen, ) + + # - recall that in ssd_state_passing, for the case c_off == 0 + # i.e., the very first sequence, we made states_ptr hold its initial state + # so this edge case is taken care of + if ((c_off == 0) and (seq_idx_prev != seq_idx_m + ) # if a seq is changed exactly on boundary + or (c_off > 0) # implies a new example (pseudo chunk) + ): + + # - replace prev_states_ptr with init_states + prev_states_ptr = initstates_ptr + seq_idx_m * stride_init_states_batch + pid_h * stride_init_states_head + prev_states_hdim = stride_init_states_hdim # override strides + prev_states_dstate = stride_init_states_dstate offs_n = pid_n * BLOCK_SIZE_N + tl.arange(0, BLOCK_SIZE_N) dA_cs_m = tl.load(dA_cumsum_ptr + offs_m * stride_dA_cs_csize, @@ -256,7 +239,6 @@ def _chunk_scan_fwd_kernel( # - handle chunk state limit if HAS_INITSTATES: - # have to split this if otherwise compilation will have problems dA_cs_m_boundary = 0.0 @@ -296,13 +278,11 @@ def _chunk_scan_fwd_kernel( dA_cumsum_ptr + (c_off - 1) * stride_dA_cs_csize, mask=(((c_off - 1) > -1) and ((c_off) < chunk_size)), other=0.0).to(tl.float32) - - if HAS_SEQ_IDX: + else: # - handle seq idx when HAS_INITSTATES==False - if not HAS_INITSTATES: - seq_idx_m = tl.load(seq_idx_ptr + offs_m * stride_seq_idx_seqlen, - mask=offs_m < chunk_size_limit, - other=-1) + seq_idx_m = tl.load(seq_idx_ptr + offs_m * stride_seq_idx_seqlen, + mask=offs_m < chunk_size_limit, + other=-1) acc = tl.zeros((BLOCK_SIZE_M, BLOCK_SIZE_N), dtype=tl.float32) @@ -319,18 +299,15 @@ def _chunk_scan_fwd_kernel( prev_states_ptrs = prev_states_ptr + ( offs_n[None, :] * prev_states_hdim + offs_k_dstate[:, None] * prev_states_dstate) - if HAS_SEQ_IDX: - - if not HAS_INITSTATES: - # - this is for continuous batching where there is no init states - scale_m = tl.where(seq_idx_m == seq_idx_prev, tl.exp(dA_cs_m), - 0.0) - else: - # - if there is initstates, we will rely on prev_states, no zeroing - # required. - scale_m = tl.exp(dA_cs_m - dA_cs_m_boundary) + + if not HAS_INITSTATES: + # - this is for continuous batching where there is no init states + scale_m = tl.where(seq_idx_m == seq_idx_prev, tl.exp(dA_cs_m), 0.0) else: - scale_m = tl.exp(dA_cs_m) + # - if there is initstates, we will rely on prev_states, no zeroing + # required. + scale_m = tl.exp(dA_cs_m - dA_cs_m_boundary) + if BLOCK_SIZE_DSTATE <= 128: C = tl.load(C_ptrs, mask=(offs_m[:, None] < chunk_size_limit) & @@ -416,15 +393,7 @@ def _chunk_scan_fwd_kernel( acc += x_residual * D if HAS_Z: - out_x_ptr += pid_b * stride_out_batch + c_idx * chunk_size * stride_out_seqlen + pid_h * stride_out_head - out_x_ptrs = out_x_ptr + (stride_out_seqlen * offs_out_m[:, None] + - offs_out_n[None, :]) - tl.store(out_x_ptrs, - acc, - mask=(offs_out_m[:, None] < chunk_size_limit) & - (offs_out_n[None, :] < hdim)) - - z_ptr += pid_b * stride_z_batch + c_idx * chunk_size * stride_z_seqlen + pid_h * stride_z_head + z_ptr += c_idx * chunk_size * stride_z_seqlen + pid_h * stride_z_head z_ptrs = z_ptr + (stride_z_seqlen * offs_out_m[:, None] + stride_z_hdim * offs_out_n[None, :]) z = tl.load(z_ptrs, @@ -433,7 +402,7 @@ def _chunk_scan_fwd_kernel( other=0.0).to(tl.float32) acc *= z * tl.sigmoid(z) - out_ptr += pid_b * stride_out_batch + c_idx * chunk_size * stride_out_seqlen + pid_h * stride_out_head + out_ptr += c_idx * chunk_size * stride_out_seqlen + pid_h * stride_out_head out_ptrs = out_ptr + (stride_out_seqlen * offs_out_m[:, None] + offs_out_n[None, :] * stride_out_hdim) tl.store(out_ptrs, @@ -449,126 +418,110 @@ def _chunk_scan_fwd( dA_cumsum, C, states, + out, + seq_idx, D=None, z=None, - seq_idx=None, chunk_indices=None, chunk_offsets=None, initial_states=None, - out=None, ): - batch, seqlen, nheads, headdim = x.shape - _, _, nchunks, chunk_size = dt.shape - _, _, ngroups, dstate = C.shape + assert seq_idx is not None, "this implementation requires seq_idx" + + seqlen, nheads, headdim = x.shape + _, nchunks, chunk_size = dt.shape + _, ngroups, dstate = C.shape assert nheads % ngroups == 0 - assert C.shape == (batch, seqlen, ngroups, dstate) - assert cb.shape == (batch, nchunks, ngroups, chunk_size, chunk_size) - if z is not None: - assert z.shape == x.shape + assert C.shape == (seqlen, ngroups, dstate) + assert cb.shape == (nchunks, ngroups, chunk_size, chunk_size) if D is not None: assert D.shape == (nheads, headdim) or D.shape == (nheads, ) - assert dt.shape == (batch, nheads, nchunks, chunk_size) - assert dA_cumsum.shape == (batch, nheads, nchunks, chunk_size) - assert states.shape == (batch, nchunks, nheads, headdim, dstate) - - if seq_idx is not None: - assert seq_idx.shape == (batch, seqlen) - - if initial_states is not None: - # with initial states, we need to take care of how - # seq_idx crosses the boundaries - assert batch == 1, "chunk scan only supports initial states with batch 1" - assert chunk_indices is not None and chunk_offsets is not None, \ - "chunk_indices and chunk_offsets should have been set" - else: - chunk_indices, chunk_offsets = None, None - else: - chunk_indices, chunk_offsets = None, None - - assert out.shape == x.shape - if z is not None: - out_x = torch.empty_like(x) - assert out_x.stride() == out.stride() + assert z.shape == x.shape + assert dt.shape == (nheads, nchunks, chunk_size) + assert dA_cumsum.shape == (nheads, nchunks, chunk_size) + assert states.shape == (nchunks, nheads, headdim, dstate) + assert seq_idx.shape == (seqlen, ) + + if initial_states is not None: + # with initial states, we need to take care of how + # seq_idx crosses the boundaries + assert chunk_indices is not None and chunk_offsets is not None, \ + "chunk_indices and chunk_offsets should have been set" else: - out_x = None + chunk_indices, chunk_offsets = None, None grid = lambda META: ( triton.cdiv(chunk_size, META['BLOCK_SIZE_M']) * triton.cdiv( - headdim, META['BLOCK_SIZE_N']), batch * nchunks + headdim, META['BLOCK_SIZE_N']), nchunks if chunk_offsets is None else len(chunk_offsets), nheads) - z_strides = ((z.stride(0), z.stride(1), z.stride(2), - z.stride(3)) if z is not None else (0, 0, 0, 0)) + + z_strides = ((z.stride(0), z.stride(1), z.stride(2)) if z is not None else + (0, 0, 0)) + initial_states_strides = ((initial_states.stride(0), + initial_states.stride(1), + initial_states.stride(2), + initial_states.stride(3)) + if initial_states is not None else (0, 0, 0, 0)) + _chunk_scan_fwd_kernel[grid]( - cb, - x, - z, - out, - out_x, - dt, - dA_cumsum, - seq_idx, - C, - states, - D, - initial_states, - chunk_indices, - chunk_offsets, - len(chunk_indices) if chunk_indices is not None else 0, - chunk_size, - headdim, - dstate, - batch, - seqlen, - nheads // ngroups, - cb.stride(0), - cb.stride(1), - cb.stride(2), - cb.stride(3), - cb.stride(4), - x.stride(0), - x.stride(1), - x.stride(2), - x.stride(3), - z_strides[0], - z_strides[1], - z_strides[2], - z_strides[3], - out.stride(0), - out.stride(1), - out.stride(2), - out.stride(3), - dt.stride(0), - dt.stride(2), - dt.stride(1), - dt.stride(3), - dA_cumsum.stride(0), - dA_cumsum.stride(2), - dA_cumsum.stride(1), - dA_cumsum.stride(3), - *((seq_idx.stride(0), seq_idx.stride(1)) if seq_idx is not None else - (0, 0)), - C.stride(0), - C.stride(1), - C.stride(2), - C.stride(3), - states.stride(0), - states.stride(1), - states.stride(2), - states.stride(3), - states.stride(4), - *((initial_states.stride(0), initial_states.stride(1), - initial_states.stride(2), - initial_states.stride(3)) if initial_states is not None else - (0, 0, 0, 0)), - D.stride(0) if D is not None else 0, - True, - D is not None, - D.dim() == 2 if D is not None else True, - BLOCK_SIZE_DSTATE=max(triton.next_power_of_2(dstate), 16), + cb_ptr=cb, + x_ptr=x, + z_ptr=z, + out_ptr=out, + dt_ptr=dt, + dA_cumsum_ptr=dA_cumsum, + seq_idx_ptr=seq_idx, + C_ptr=C, + states_ptr=states, + D_ptr=D, + initstates_ptr=initial_states, + chunk_indices_ptr=chunk_indices, + chunk_offsets_ptr=chunk_offsets, + chunk_meta_num=len(chunk_indices) if chunk_indices is not None else 0, + chunk_size=chunk_size, + hdim=headdim, + dstate=dstate, + seqlen=seqlen, + nheads_ngroups_ratio=nheads // ngroups, + stride_cb_chunk=cb.stride(0), + stride_cb_head=cb.stride(1), + stride_cb_csize_m=cb.stride(2), + stride_cb_csize_k=cb.stride(3), + stride_x_seqlen=x.stride(0), + stride_x_head=x.stride(1), + stride_x_hdim=x.stride(2), + stride_z_seqlen=z_strides[0], + stride_z_head=z_strides[1], + stride_z_hdim=z_strides[2], + stride_out_seqlen=out.stride(0), + stride_out_head=out.stride(1), + stride_out_hdim=out.stride(2), + stride_dt_chunk=dt.stride(1), + stride_dt_head=dt.stride(0), + stride_dt_csize=dt.stride(2), + stride_dA_cs_chunk=dA_cumsum.stride(1), + stride_dA_cs_head=dA_cumsum.stride(0), + stride_dA_cs_csize=dA_cumsum.stride(2), + stride_seq_idx_seqlen=seq_idx.stride(0), + stride_C_seqlen=C.stride(0), + stride_C_head=C.stride(1), + stride_C_dstate=C.stride(2), + stride_states_chunk=states.stride(0), + stride_states_head=states.stride(1), + stride_states_hdim=states.stride(2), + stride_states_dstate=states.stride(3), + stride_init_states_batch=initial_states_strides[0], + stride_init_states_head=initial_states_strides[1], + stride_init_states_hdim=initial_states_strides[2], + stride_init_states_dstate=initial_states_strides[3], + stride_D_head=D.stride(0) if D is not None else 0, + IS_CAUSAL=True, + HAS_D=D is not None, + D_HAS_HDIM=D.dim() == 2 if D is not None else True, HAS_Z=z is not None, - HAS_SEQ_IDX=seq_idx is not None, + BLOCK_SIZE_DSTATE=max(triton.next_power_of_2(dstate), 16), IS_TRITON_22=TRITON_22, HAS_INITSTATES=initial_states is not None, ) - return out_x + return diff --git a/vllm/model_executor/layers/mamba/ops/ssd_chunk_state.py b/vllm/model_executor/layers/mamba/ops/ssd_chunk_state.py index 2e657426143b..8ee41f2cbc1b 100644 --- a/vllm/model_executor/layers/mamba/ops/ssd_chunk_state.py +++ b/vllm/model_executor/layers/mamba/ops/ssd_chunk_state.py @@ -35,41 +35,35 @@ def _chunk_cumsum_fwd_kernel( dt_out_ptr, dA_cumsum_ptr, # Matrix dimension - batch, seqlen, - nheads, - chunk_size, - dt_min, - dt_max, + nheads: tl.constexpr, + chunk_size: tl.constexpr, + dt_min: tl.constexpr, + dt_max: tl.constexpr, # Strides - stride_dt_batch, - stride_dt_seqlen, - stride_dt_head, - stride_A_head, - stride_dt_bias_head, - stride_dt_out_batch, - stride_dt_out_chunk, - stride_dt_out_head, - stride_dt_out_csize, - stride_dA_cs_batch, - stride_dA_cs_chunk, - stride_dA_cs_head, - stride_dA_cs_csize, + stride_dt_seqlen: tl.int64, + stride_dt_head: tl.constexpr, + stride_A_head: tl.constexpr, + stride_dt_bias_head: tl.constexpr, + stride_dt_out_head: tl.int64, + stride_dt_out_chunk: tl.int64, + stride_dt_out_csize: tl.constexpr, + stride_dA_cs_head: tl.int64, + stride_dA_cs_chunk: tl.int64, + stride_dA_cs_csize: tl.constexpr, # Meta-parameters DT_SOFTPLUS: tl.constexpr, HAS_DT_BIAS: tl.constexpr, BLOCK_SIZE_H: tl.constexpr, BLOCK_SIZE_CHUNK: tl.constexpr, ): - pid_b = tl.program_id(axis=0) - # if dt is long, may cause problems, so use 64 bit # https://github.com/triton-lang/triton/issues/1058 - pid_c = tl.program_id(axis=1).to(tl.int64) - pid_h = tl.program_id(axis=2) - dt_ptr += pid_b * stride_dt_batch + pid_c * chunk_size * stride_dt_seqlen - dt_out_ptr += pid_b * stride_dt_out_batch + pid_c * stride_dt_out_chunk - dA_cumsum_ptr += pid_b * stride_dA_cs_batch + pid_c * stride_dA_cs_chunk + pid_c = tl.program_id(axis=0).to(tl.int64) + pid_h = tl.program_id(axis=1) + dt_ptr += pid_c * chunk_size * stride_dt_seqlen + dt_out_ptr += pid_c * stride_dt_out_chunk + dA_cumsum_ptr += pid_c * stride_dA_cs_chunk offs_h = pid_h * BLOCK_SIZE_H + tl.arange(0, BLOCK_SIZE_H) offs_c = tl.arange(0, BLOCK_SIZE_CHUNK) @@ -93,9 +87,8 @@ def _chunk_cumsum_fwd_kernel( dt += dt_bias[:, None] if DT_SOFTPLUS: dt = tl.where(dt <= 20.0, softplus(dt), dt) - # As of Triton 2.2.0, tl.clamp is not available yet - # dt = tl.clamp(dt, dt_min, dt_max) - dt = tl.minimum(tl.maximum(dt, dt_min), dt_max) + + dt = tl.clamp(dt, dt_min, dt_max) dt = tl.where( (offs_h[:, None] < nheads) & (offs_c[None, :] < chunk_size_limit), dt, 0.0) @@ -197,56 +190,46 @@ def _chunk_state_fwd_kernel( dA_cumsum_ptr, seq_idx_ptr, # Matrix dimensions - hdim, - dstate, - chunk_size, - batch, + hdim: tl.constexpr, + dstate: tl.constexpr, + chunk_size: tl.constexpr, seqlen, - nheads_ngroups_ratio, + nheads_ngroups_ratio: tl.constexpr, # Strides - stride_x_batch, - stride_x_seqlen, - stride_x_head, - stride_x_hdim, - stride_b_batch, - stride_b_seqlen, - stride_b_head, - stride_b_dstate, - stride_states_batch, - stride_states_chunk, - stride_states_head, - stride_states_hdim, - stride_states_dstate, - stride_dt_batch, - stride_dt_chunk, - stride_dt_head, - stride_dt_csize, - stride_dA_cs_batch, - stride_dA_cs_chunk, - stride_dA_cs_head, - stride_dA_cs_csize, - stride_seq_idx_batch, - stride_seq_idx_seqlen, + stride_x_seqlen: tl.int64, + stride_x_head: tl.int64, + stride_x_hdim: tl.constexpr, + stride_b_seqlen: tl.int64, + stride_b_head: tl.int64, + stride_b_dstate: tl.constexpr, + stride_states_chunk: tl.int64, + stride_states_head: tl.int64, + stride_states_hdim: tl.int64, + stride_states_dstate: tl.constexpr, + stride_dt_head: tl.int64, + stride_dt_chunk: tl.int64, + stride_dt_csize: tl.constexpr, + stride_dA_cs_head: tl.int64, + stride_dA_cs_chunk: tl.int64, + stride_dA_cs_csize: tl.constexpr, + stride_seq_idx_seqlen: tl.constexpr, # Meta-parameters - HAS_SEQ_IDX: tl.constexpr, BLOCK_SIZE_M: tl.constexpr, BLOCK_SIZE_N: tl.constexpr, BLOCK_SIZE_K: tl.constexpr, ): - pid_bc = tl.program_id(axis=1).to(tl.int64) - pid_c = pid_bc // batch - pid_b = pid_bc - pid_c * batch + pid_c = tl.program_id(axis=1).to(tl.int64) pid_h = tl.program_id(axis=2) num_pid_n = tl.cdiv(dstate, BLOCK_SIZE_N) pid_m = tl.program_id(axis=0) // num_pid_n pid_n = tl.program_id(axis=0) % num_pid_n - b_ptr += pid_b * stride_b_batch + pid_c * chunk_size * stride_b_seqlen + ( + b_ptr += pid_c * chunk_size * stride_b_seqlen + ( pid_h // nheads_ngroups_ratio) * stride_b_head - x_ptr += pid_b * stride_x_batch + pid_c * chunk_size * stride_x_seqlen + pid_h * stride_x_head - dt_ptr += pid_b * stride_dt_batch + pid_c * stride_dt_chunk + pid_h * stride_dt_head - dA_cumsum_ptr += pid_b * stride_dA_cs_batch + pid_c * stride_dA_cs_chunk + pid_h * stride_dA_cs_head - if HAS_SEQ_IDX: - seq_idx_ptr += pid_b * stride_seq_idx_batch + pid_c * chunk_size * stride_seq_idx_seqlen + x_ptr += pid_c * chunk_size * stride_x_seqlen + pid_h * stride_x_head + dt_ptr += pid_c * stride_dt_chunk + pid_h * stride_dt_head + dA_cumsum_ptr += pid_c * stride_dA_cs_chunk + pid_h * stride_dA_cs_head + + seq_idx_ptr += pid_c * chunk_size * stride_seq_idx_seqlen offs_m = pid_m * BLOCK_SIZE_M + tl.arange(0, BLOCK_SIZE_M) offs_n = pid_n * BLOCK_SIZE_N + tl.arange(0, BLOCK_SIZE_N) @@ -259,13 +242,11 @@ def _chunk_state_fwd_kernel( dA_cs_last = tl.load(dA_cumsum_ptr + (chunk_size - 1) * stride_dA_cs_csize).to(tl.float32) dA_cumsum_ptrs = dA_cumsum_ptr + offs_k * stride_dA_cs_csize - if HAS_SEQ_IDX: - seq_idx_ptrs = seq_idx_ptr + offs_k * stride_seq_idx_seqlen + seq_idx_ptrs = seq_idx_ptr + offs_k * stride_seq_idx_seqlen chunk_size_limit = min(chunk_size, seqlen - pid_c * chunk_size) - if HAS_SEQ_IDX: - seq_idx_last = tl.load(seq_idx_ptr + - (chunk_size_limit - 1) * stride_seq_idx_seqlen) + seq_idx_last = tl.load(seq_idx_ptr + + (chunk_size_limit - 1) * stride_seq_idx_seqlen) acc = tl.zeros((BLOCK_SIZE_M, BLOCK_SIZE_N), dtype=tl.float32) for k in range(0, chunk_size_limit, BLOCK_SIZE_K): @@ -280,29 +261,28 @@ def _chunk_state_fwd_kernel( dA_cs_k = tl.load(dA_cumsum_ptrs, mask=offs_k < chunk_size_limit - k, other=0.0).to(tl.float32) - if HAS_SEQ_IDX: - seq_idx_k = tl.load(seq_idx_ptrs, - mask=offs_k < chunk_size_limit - k, - other=-1) + + seq_idx_k = tl.load(seq_idx_ptrs, + mask=offs_k < chunk_size_limit - k, + other=-1) dt_k = tl.load(dt_ptrs, mask=offs_k < chunk_size_limit - k, other=0.0).to(tl.float32) - if not HAS_SEQ_IDX: - scale = tl.exp(dA_cs_last - dA_cs_k) * dt_k - else: - scale = tl.where(seq_idx_k == seq_idx_last, - tl.exp(dA_cs_last - dA_cs_k) * dt_k, 0.0) + + scale = tl.where(seq_idx_k == seq_idx_last, + tl.exp(dA_cs_last - dA_cs_k) * dt_k, 0.0) b *= scale[:, None] b = b.to(x_ptr.dtype.element_ty) acc += tl.dot(x, b) + x_ptrs += BLOCK_SIZE_K * stride_x_seqlen b_ptrs += BLOCK_SIZE_K * stride_b_seqlen dt_ptrs += BLOCK_SIZE_K * stride_dt_csize dA_cumsum_ptrs += BLOCK_SIZE_K * stride_dA_cs_csize - if HAS_SEQ_IDX: - seq_idx_ptrs += BLOCK_SIZE_K * stride_seq_idx_seqlen + seq_idx_ptrs += BLOCK_SIZE_K * stride_seq_idx_seqlen + states = acc.to(states_ptr.dtype.element_ty) - states_ptr += pid_b * stride_states_batch + pid_c * stride_states_chunk + pid_h * stride_states_head + states_ptr += pid_c * stride_states_chunk + pid_h * stride_states_head offs_m = pid_m * BLOCK_SIZE_M + tl.arange(0, BLOCK_SIZE_M) offs_n = pid_n * BLOCK_SIZE_N + tl.arange(0, BLOCK_SIZE_N) states_ptrs = states_ptr + (offs_m[:, None] * stride_states_hdim + @@ -400,36 +380,35 @@ def _chunk_state_varlen_kernel( states_ptr, initstates_ptr, # Matrix dimensions - hdim, - dstate, - chunk_size, - seqlen, - nheads_ngroups_ratio, + hdim: tl.constexpr, + dstate: tl.constexpr, + chunk_size: tl.constexpr, + nheads_ngroups_ratio: tl.constexpr, # Strides - stride_x_seqlen, - stride_x_head, - stride_x_hdim, - stride_b_seqlen, - stride_b_head, - stride_b_dstate, - stride_dt_chunk, - stride_dt_head, - stride_dt_csize, - stride_dA_cs_chunk, - stride_dA_cs_head, - stride_dA_cs_csize, - stride_chunk_states_chunk, - stride_chunk_states_head, - stride_chunk_states_hdim, - stride_chunk_states_dstate, - stride_states_batch, - stride_states_head, - stride_states_hdim, - stride_states_dstate, - stride_init_states_batch, - stride_init_states_head, - stride_init_states_hdim, - stride_init_states_dstate, + stride_x_seqlen: tl.int64, + stride_x_head: tl.int64, + stride_x_hdim: tl.constexpr, + stride_b_seqlen: tl.int64, + stride_b_head: tl.int64, + stride_b_dstate: tl.constexpr, + stride_dt_head: tl.int64, + stride_dt_chunk: tl.int64, + stride_dt_csize: tl.constexpr, + stride_dA_cs_head: tl.int64, + stride_dA_cs_chunk: tl.int64, + stride_dA_cs_csize: tl.constexpr, + stride_chunk_states_chunk: tl.int64, + stride_chunk_states_head: tl.int64, + stride_chunk_states_hdim: tl.int64, + stride_chunk_states_dstate: tl.constexpr, + stride_states_batch: tl.int64, + stride_states_head: tl.int64, + stride_states_hdim: tl.int64, + stride_states_dstate: tl.constexpr, + stride_init_states_batch: tl.int64, + stride_init_states_head: tl.int64, + stride_init_states_hdim: tl.int64, + stride_init_states_dstate: tl.constexpr, # Meta-parameters BLOCK_SIZE_M: tl.constexpr, BLOCK_SIZE_N: tl.constexpr, @@ -558,52 +537,47 @@ def _chunk_cumsum_fwd(dt, dt_bias=None, dt_softplus=False, dt_limit=(0.0, float("inf"))): - batch, seqlen, nheads = dt.shape + seqlen, nheads = dt.shape assert A.shape == (nheads, ) if dt_bias is not None: assert dt_bias.shape == (nheads, ) nchunks = math.ceil(seqlen / chunk_size) - dt_out = torch.empty(batch, - nheads, + dt_out = torch.empty(nheads, nchunks, chunk_size, device=dt.device, dtype=torch.float32) - dA_cumsum = torch.empty(batch, - nheads, + dA_cumsum = torch.empty(nheads, nchunks, chunk_size, device=dt.device, dtype=torch.float32) - grid_chunk_cs = lambda META: (batch, nchunks, + grid_chunk_cs = lambda META: (nchunks, triton.cdiv(nheads, META['BLOCK_SIZE_H'])) with torch.cuda.device(dt.device.index): _chunk_cumsum_fwd_kernel[grid_chunk_cs]( - dt, - A, - dt_bias, - dt_out, - dA_cumsum, - batch, - seqlen, - nheads, - chunk_size, - dt_limit[0], - dt_limit[1], - dt.stride(0), - dt.stride(1), - dt.stride(2), - A.stride(0), - dt_bias.stride(0) if dt_bias is not None else 0, - dt_out.stride(0), - dt_out.stride(2), - dt_out.stride(1), - dt_out.stride(3), - dA_cumsum.stride(0), - dA_cumsum.stride(2), - dA_cumsum.stride(1), - dA_cumsum.stride(3), - dt_softplus, + dt_ptr=dt, + A_ptr=A, + dt_bias_ptr=dt_bias, + dt_out_ptr=dt_out, + dA_cumsum_ptr=dA_cumsum, + seqlen=seqlen, + nheads=nheads, + chunk_size=chunk_size, + dt_min=dt_limit[0], + dt_max=dt_limit[1], + stride_dt_seqlen=dt.stride(0), + stride_dt_head=dt.stride(1), + stride_A_head=A.stride(0), + stride_dt_bias_head=dt_bias.stride(0) + if dt_bias is not None else 0, + stride_dt_out_head=dt_out.stride(0), + stride_dt_out_chunk=dt_out.stride(1), + stride_dt_out_csize=dt_out.stride(2), + stride_dA_cs_head=dA_cumsum.stride(0), + stride_dA_cs_chunk=dA_cumsum.stride(1), + stride_dA_cs_csize=dA_cumsum.stride(2), + DT_SOFTPLUS=dt_softplus, HAS_DT_BIAS=dt_bias is not None, BLOCK_SIZE_CHUNK=triton.next_power_of_2(chunk_size), ) @@ -617,63 +591,57 @@ def _chunk_state_fwd(B, seq_idx=None, states=None, states_in_fp32=True): - batch, seqlen, nheads, headdim = x.shape - _, _, nchunks, chunk_size = dt.shape - _, _, ngroups, dstate = B.shape + seqlen, nheads, headdim = x.shape + _, nchunks, chunk_size = dt.shape + _, ngroups, dstate = B.shape assert nheads % ngroups == 0 - assert B.shape == (batch, seqlen, ngroups, dstate) - assert dt.shape == (batch, nheads, nchunks, chunk_size) + assert B.shape == (seqlen, ngroups, dstate) + assert dt.shape == (nheads, nchunks, chunk_size) assert dA_cumsum.shape == dt.shape - if seq_idx is not None: - assert seq_idx.shape == (batch, seqlen) + + assert seq_idx is not None + assert seq_idx.shape == (seqlen, ) + if states is not None: - assert states.shape == (batch, nchunks, nheads, headdim, dstate) + assert states.shape == (nchunks, nheads, headdim, dstate) else: states_dtype = torch.float32 if states_in_fp32 else B.dtype - states = torch.empty((batch, nchunks, nheads, headdim, dstate), + states = torch.empty((nchunks, nheads, headdim, dstate), device=x.device, dtype=states_dtype) - grid = lambda META: ( - triton.cdiv(headdim, META['BLOCK_SIZE_M']) * triton.cdiv( - dstate, META['BLOCK_SIZE_N']), batch * nchunks, nheads) + + grid = lambda META: (triton.cdiv(headdim, META['BLOCK_SIZE_M']) * triton. + cdiv(dstate, META['BLOCK_SIZE_N']), nchunks, nheads) with torch.cuda.device(x.device.index): _chunk_state_fwd_kernel[grid]( - x, - B, - states, - dt, - dA_cumsum, - seq_idx, - headdim, - dstate, - chunk_size, - batch, - seqlen, - nheads // ngroups, - x.stride(0), - x.stride(1), - x.stride(2), - x.stride(3), - B.stride(0), - B.stride(1), - B.stride(2), - B.stride(-1), - states.stride(0), - states.stride(1), - states.stride(2), - states.stride(3), - states.stride(4), - dt.stride(0), - dt.stride(2), - dt.stride(1), - dt.stride(3), - dA_cumsum.stride(0), - dA_cumsum.stride(2), - dA_cumsum.stride(1), - dA_cumsum.stride(3), - *((seq_idx.stride(0), - seq_idx.stride(1)) if seq_idx is not None else (0, 0)), - HAS_SEQ_IDX=seq_idx is not None, + x_ptr=x, + b_ptr=B, + states_ptr=states, + dt_ptr=dt, + dA_cumsum_ptr=dA_cumsum, + seq_idx_ptr=seq_idx, + hdim=headdim, + dstate=dstate, + chunk_size=chunk_size, + seqlen=seqlen, + nheads_ngroups_ratio=nheads // ngroups, + stride_x_seqlen=x.stride(0), + stride_x_head=x.stride(1), + stride_x_hdim=x.stride(2), + stride_b_seqlen=B.stride(0), + stride_b_head=B.stride(1), + stride_b_dstate=B.stride(2), + stride_states_chunk=states.stride(0), + stride_states_head=states.stride(1), + stride_states_hdim=states.stride(2), + stride_states_dstate=states.stride(3), + stride_dt_head=dt.stride(0), + stride_dt_chunk=dt.stride(1), + stride_dt_csize=dt.stride(2), + stride_dA_cs_head=dA_cumsum.stride(0), + stride_dA_cs_chunk=dA_cumsum.stride(1), + stride_dA_cs_csize=dA_cumsum.stride(2), + stride_seq_idx_seqlen=seq_idx.stride(0), ) return states @@ -705,46 +673,52 @@ def chunk_state_varlen(B, dstate, dtype=chunk_states.dtype, device=chunk_states.device) + + initial_states_strides = ((initial_states.stride(0), + initial_states.stride(1), + initial_states.stride(2), + initial_states.stride(3)) + if initial_states is not None else (0, 0, 0, 0)) + grid = lambda META: (triton.cdiv(headdim, META['BLOCK_SIZE_M']) * triton. cdiv(dstate, META['BLOCK_SIZE_N']), batch, nheads) with torch.cuda.device(x.device.index): _chunk_state_varlen_kernel[grid]( - x, - B, - dt, - dA_cumsum, - chunk_states, - cu_seqlens, - states, - initial_states, - headdim, - dstate, - chunk_size, - total_seqlen, - nheads // ngroups, - x.stride(0), - x.stride(1), - x.stride(2), - B.stride(0), - B.stride(1), - B.stride(2), - dt.stride(1), - dt.stride(0), - dt.stride(2), - dA_cumsum.stride(1), - dA_cumsum.stride(0), - dA_cumsum.stride(2), - chunk_states.stride(0), - chunk_states.stride(1), - chunk_states.stride(2), - chunk_states.stride(3), - states.stride(0), - states.stride(1), - states.stride(2), - states.stride(3), - *((initial_states.stride(0), initial_states.stride(1), - initial_states.stride(2), - initial_states.stride(3)) if initial_states is not None else - (0, 0, 0, 0)), + x_ptr=x, + b_ptr=B, + dt_ptr=dt, + dA_cumsum_ptr=dA_cumsum, + chunk_states_ptr=chunk_states, + cu_seqlens_ptr=cu_seqlens, + states_ptr=states, + initstates_ptr=initial_states, + hdim=headdim, + dstate=dstate, + chunk_size=chunk_size, + nheads_ngroups_ratio=nheads // ngroups, + stride_x_seqlen=x.stride(0), + stride_x_head=x.stride(1), + stride_x_hdim=x.stride(2), + stride_b_seqlen=B.stride(0), + stride_b_head=B.stride(1), + stride_b_dstate=B.stride(2), + stride_dt_head=dt.stride(0), + stride_dt_chunk=dt.stride(1), + stride_dt_csize=dt.stride(2), + stride_dA_cs_head=dA_cumsum.stride(0), + stride_dA_cs_chunk=dA_cumsum.stride(1), + stride_dA_cs_csize=dA_cumsum.stride(2), + stride_chunk_states_chunk=chunk_states.stride(0), + stride_chunk_states_head=chunk_states.stride(1), + stride_chunk_states_hdim=chunk_states.stride(2), + stride_chunk_states_dstate=chunk_states.stride(3), + stride_states_batch=states.stride(0), + stride_states_head=states.stride(1), + stride_states_hdim=states.stride(2), + stride_states_dstate=states.stride(3), + stride_init_states_batch=initial_states_strides[0], + stride_init_states_head=initial_states_strides[1], + stride_init_states_hdim=initial_states_strides[2], + stride_init_states_dstate=initial_states_strides[3], HAS_INITSTATES=initial_states is not None) return states diff --git a/vllm/model_executor/layers/mamba/ops/ssd_combined.py b/vllm/model_executor/layers/mamba/ops/ssd_combined.py index fcc5c905bf77..37d6c2870812 100644 --- a/vllm/model_executor/layers/mamba/ops/ssd_combined.py +++ b/vllm/model_executor/layers/mamba/ops/ssd_combined.py @@ -31,6 +31,7 @@ def _mamba_chunk_scan_combined_fwd(x, B, C, chunk_size, + out, D=None, z=None, dt_bias=None, @@ -41,14 +42,13 @@ def _mamba_chunk_scan_combined_fwd(x, cu_seqlens=None, dt_softplus=False, dt_limit=(0.0, float("inf")), - state_dtype=None, - out=None): + state_dtype=None): assert is_int_pow_2(chunk_size), "chunk_size must be integer power of 2" - batch, seqlen, nheads, headdim = x.shape - _, _, ngroups, dstate = B.shape + seqlen, nheads, headdim = x.shape + _, ngroups, dstate = B.shape assert nheads % ngroups == 0 - assert B.shape == (batch, seqlen, ngroups, dstate) - assert dt.shape == (batch, seqlen, nheads) + assert B.shape == (seqlen, ngroups, dstate) + assert dt.shape == (seqlen, nheads) assert A.shape == (nheads, ) assert C.shape == B.shape if z is not None: @@ -56,25 +56,24 @@ def _mamba_chunk_scan_combined_fwd(x, if D is not None: assert D.shape == (nheads, headdim) or D.shape == (nheads, ) if seq_idx is not None: - assert seq_idx.shape == (batch, seqlen) + assert seq_idx.shape == (seqlen, ) if B.stride(-1) != 1: B = B.contiguous() if C.stride(-1) != 1: C = C.contiguous() if x.stride(-1) != 1 and x.stride( - 1) != 1: # Either M or K dimension should be contiguous + 0) != 1: # Either M or K dimension should be contiguous x = x.contiguous() if z is not None and z.stride(-1) != 1 and z.stride( - 1) != 1: # Either M or K dimension should be contiguous + 0) != 1: # Either M or K dimension should be contiguous z = z.contiguous() if D is not None and D.stride(-1) != 1: D = D.contiguous() + assert cu_seqlens is not None, "Assuming varlen input - must supply cu_seqlens" + if initial_states is not None: - if cu_seqlens is None: - assert initial_states.shape == (batch, nheads, headdim, dstate) - else: - assert initial_states.shape == (len(cu_seqlens) - 1, nheads, - headdim, dstate) + assert initial_states.shape == (len(cu_seqlens) - 1, nheads, headdim, + dstate) # This function executes 5 sub-functions for computing mamba # - a good resource is the blog https://goombalab.github.io/blog/2024/mamba2-part3-algorithm/ @@ -114,18 +113,16 @@ def _mamba_chunk_scan_combined_fwd(x, # - this will ensure that states will be updated with the rightmost flushed seq_idx # of the previous chunk. This implies that the first chunk of states is either 0 # or equal to init_states of the first example. - states, final_states = _state_passing_fwd( + states = _state_passing_fwd( rearrange(states, "... p n -> ... (p n)"), - dA_cumsum, + dA_cumsum, # (nheads, nchunks, chunk_size) initial_states=rearrange(initial_states, "... p n -> ... (p n)") - if initial_states is not None else None, + if initial_states is not None else + None, # (batch, nheads, headdim*dstate) seq_idx=seq_idx, - chunk_size=chunk_size, out_dtype=state_dtype if state_dtype is not None else C.dtype, - is_cont_batched=cu_seqlens is not None, chunk_offsets=chunk_offsets) - states, final_states = (rearrange(t, "... (p n) -> ... p n", n=dstate) - for t in [states, final_states]) + states = rearrange(states, "... (p n) -> ... p n", n=dstate) # 4. Compute batched matrix multiply for C_j^T B_i terms CB = _bmm_chunk_fwd(C, @@ -144,87 +141,88 @@ def _mamba_chunk_scan_combined_fwd(x, # - in each (pseudo) chunk, we detect if the previous (pseudo) chunk had # a seq_idx change, in which case we take states information from # init_states. - out_x = _chunk_scan_fwd( + _chunk_scan_fwd( CB, x, dt, dA_cumsum, C, states, + out, # in-place update + seq_idx, D=D, z=z, - seq_idx=seq_idx, chunk_indices=chunk_indices, chunk_offsets=chunk_offsets, initial_states=initial_states, - out=out, ) - if cu_seqlens is None: - return out_x, dt, dA_cumsum, states, final_states - else: - assert batch == 1, "passing cu_seqlens to get the varlen states is only supported if batch dimension is 1" - varlen_states = chunk_state_varlen( - B.squeeze(0), - x.squeeze(0), - dt.squeeze(0), - dA_cumsum.squeeze(0), - cu_seqlens, - states.squeeze(0), - initial_states=initial_states, - ) - return out_x, dt, dA_cumsum, states, final_states, varlen_states - - -def mamba_chunk_scan_combined(x, - dt, - A, - B, - C, - chunk_size, - D=None, - z=None, - dt_bias=None, - initial_states=None, - seq_idx=None, - chunk_indices=None, - chunk_offsets=None, - cu_seqlens=None, - dt_softplus=False, - dt_limit=(0.0, float("inf")), - out=None, - return_final_states=False, - return_varlen_states=False, - state_dtype=None): + + varlen_states = chunk_state_varlen( + B, + x, + dt, + dA_cumsum, + cu_seqlens, + states, + initial_states=initial_states, + ) + + return varlen_states + + +def mamba_chunk_scan_combined_varlen( + x, + dt, + A, + B, + C, + chunk_size, + cu_seqlens, + seq_idx, + out, + D=None, + z=None, + dt_bias=None, + initial_states=None, + chunk_indices=None, + chunk_offsets=None, + dt_softplus=False, + dt_limit=(0.0, float("inf")), + state_dtype=None, +): """ Argument: - x: (batch, seqlen, nheads, headdim) - dt: (batch, seqlen, nheads) + x: (seqlen, nheads, headdim) + dt: (seqlen, nheads) A: (nheads) - B: (batch, seqlen, ngroups, dstate) - C: (batch, seqlen, ngroups, dstate) + B: (seqlen, ngroups, dstate) + C: (seqlen, ngroups, dstate) chunk_size: int + seq_idx: (seqlen) + cu_seqlens: (batch + 1) + out: (seqlen, nheads, headdim) preallocated output tensor D: (nheads, headdim) or (nheads,) - z: (batch, seqlen, nheads, headdim) + z: (seqlen, nheads, headdim) dt_bias: (nheads,) initial_states: (batch, nheads, headdim, dstate) - seq_idx: (batch, seqlen) - cu_seqlens: (num_sequences + 1) or None, only used if return_varlen_states is True dt_softplus: Whether to apply softplus to dt - out: Preallocated output tensor + out: (seqlen, nheads, headdim) preallocated output tensor state_dtype: The data type of the ssm state + Return: + varlen_states: (batch, nheads, headdim, dstate) """ - if not return_varlen_states: - cu_seqlens = None - else: - assert cu_seqlens is not None, "cu_seqlens must be provided if return_varlen_states is True" - out_x, dt_out, dA_cumsum, states, final_states, *rest = _mamba_chunk_scan_combined_fwd( + assert cu_seqlens is not None, "cu_seqlens must be provided assuming varlen input" + assert seq_idx is not None + + varlen_states = _mamba_chunk_scan_combined_fwd( x, dt, A, B, C, chunk_size, + out, D=D, z=z, dt_bias=dt_bias, @@ -235,14 +233,6 @@ def mamba_chunk_scan_combined(x, cu_seqlens=cu_seqlens, dt_softplus=dt_softplus, dt_limit=dt_limit, - out=out, state_dtype=state_dtype) - if not return_varlen_states: - if not return_final_states: - return - else: - return final_states - else: - varlen_states = rest[0] - return (varlen_states) if not return_final_states else (final_states, - varlen_states) + + return varlen_states diff --git a/vllm/model_executor/layers/mamba/ops/ssd_state_passing.py b/vllm/model_executor/layers/mamba/ops/ssd_state_passing.py index d61c3a8cdbe9..71a8a4b0a1c8 100644 --- a/vllm/model_executor/layers/mamba/ops/ssd_state_passing.py +++ b/vllm/model_executor/layers/mamba/ops/ssd_state_passing.py @@ -27,64 +27,46 @@ def _state_passing_fwd_kernel( # Pointers to matrices states_ptr, out_ptr, - final_states_ptr, dA_cs_ptr, initstates_ptr, seq_idx_ptr, chunk_offsets_ptr, chunk_meta_num, # Matrix dimensions - dim, + dim: tl.constexpr, nchunks, seqlen, - chunk_size, + chunk_size: tl.constexpr, # Strides - stride_states_batch, - stride_states_chunk, - stride_states_head, - stride_states_dim, - stride_out_batch, - stride_out_chunk, - stride_out_head, - stride_out_dim, - stride_final_states_batch, - stride_final_states_head, - stride_final_states_dim, - stride_dA_cs_batch, - stride_dA_cs_chunk, - stride_dA_cs_head, - stride_dA_cs_csize, - stride_initstates_batch, - stride_initstates_head, - stride_initstates_dim, - stride_seq_idx_batch, - stride_seq_idx_seqlen, + stride_states_chunk: tl.int64, + stride_states_head: tl.int64, + stride_states_dim: tl.constexpr, + stride_out_chunk: tl.int64, + stride_out_head: tl.int64, + stride_out_dim: tl.constexpr, + stride_dA_cs_head: tl.int64, + stride_dA_cs_chunk: tl.int64, + stride_dA_cs_csize: tl.constexpr, + stride_initstates_batch: tl.int64, + stride_initstates_head: tl.int64, + stride_initstates_dim: tl.constexpr, + stride_seq_idx_seqlen: tl.constexpr, # Meta-parameters HAS_INITSTATES: tl.constexpr, - HAS_SEQ_IDX: tl.constexpr, - IS_CONT_BATCHED: tl.constexpr, BLOCK_SIZE: tl.constexpr, ): - pid_b = tl.program_id(axis=1) - pid_h = tl.program_id(axis=2) + pid_h = tl.program_id(axis=1) pid_m = tl.program_id(axis=0) - states_ptr += pid_b * stride_states_batch + pid_h * stride_states_head - dA_cs_ptr += pid_b * stride_dA_cs_batch + pid_h * stride_dA_cs_head + ( - chunk_size - 1) * stride_dA_cs_csize - out_ptr += pid_b * stride_out_batch + pid_h * stride_out_head - final_states_ptr += pid_b * stride_final_states_batch + pid_h * stride_final_states_head + states_ptr += pid_h * stride_states_head + dA_cs_ptr += pid_h * stride_dA_cs_head + (chunk_size - + 1) * stride_dA_cs_csize + out_ptr += pid_h * stride_out_head if HAS_INITSTATES: initstates_ptr += pid_h * stride_initstates_head - if not IS_CONT_BATCHED: - initstates_ptr += pid_b * stride_initstates_batch - - if HAS_SEQ_IDX: - seq_idx_ptr += pid_b * stride_seq_idx_batch offs_m = pid_m * BLOCK_SIZE + tl.arange(0, BLOCK_SIZE) states_ptrs = states_ptr + offs_m * stride_states_dim out_ptrs = out_ptr + offs_m * stride_out_dim - final_states_ptrs = final_states_ptr + offs_m * stride_final_states_dim # - states will be the past state of the sequence that continues on the current check if not HAS_INITSTATES: @@ -101,65 +83,63 @@ def _state_passing_fwd_kernel( out_ptrs += stride_out_chunk prev_seq_idx_chunk_end = 0 logical_chunk_idx = 0 - for c in range(nchunks): + for c in range(nchunks - 1): new_states = tl.load(states_ptrs, mask=offs_m < dim, other=0.0).to(tl.float32) dA_cs = tl.load(dA_cs_ptr).to(tl.float32) scale_mask = True - if HAS_SEQ_IDX: - # - the seq to pass forward is the one that is flushed to the right - # boundary. - # - that is given by seq_idx_chunk_end below: the sequence index at the end of the chunk. - seq_idx_chunk_end = tl.load(seq_idx_ptr + (min( - (c + 1) * chunk_size, seqlen) - 1) * stride_seq_idx_seqlen) - if HAS_INITSTATES: - if IS_CONT_BATCHED and prev_seq_idx_chunk_end != seq_idx_chunk_end: - # this means in the current chunk the rightmost flushed seq - # has changed. - # - so we do not propagate the state from previous chunk - # - but rather we load that sequence's init state - initstates_ptrs = initstates_ptr + seq_idx_chunk_end * stride_initstates_batch - - # - update state with seq_idx_new's init state - states = tl.load(initstates_ptrs, - mask=offs_m < dim, - other=0.0).to(tl.float32) - - # - we need to consider the cumsum only of the last sequence in the chunk - # - find its starting position (given by c_off of the logical chunk index) - # - and subtract the cumsum just before that position from the total cumsum - # - first, update the logical chunk index (add the number of sequences in the current physical chunk): - # sequence index at the start of the current chunk - seq_idx_chunk_start = tl.load(seq_idx_ptr + - min(c * chunk_size, seqlen) * - stride_seq_idx_seqlen) - logical_chunk_idx += seq_idx_chunk_end - seq_idx_chunk_start - # - load the chunk offset: - c_off = tl.load(chunk_offsets_ptr + logical_chunk_idx, - mask=logical_chunk_idx < chunk_meta_num, - other=0) - # - if offset is 0, then the sequence starts at the beginning of the chunk, and we don't need to subtract anything - if c_off > 0: - # - dA_cs_ptr currently points to the cumsum at the end of the chunk - subtract the chunk size and add the offset - dA_cs_boundary = tl.load( - dA_cs_ptr - (chunk_size - 1) * stride_dA_cs_csize + - (c_off - 1) * stride_dA_cs_csize, - mask=(c_off - 1) > -1 and c_off < chunk_size, - other=0.0) - dA_cs -= dA_cs_boundary - - # - increment logical chunk index for every physical chunk - logical_chunk_idx += 1 - else: - scale_mask = seq_idx_chunk_end == prev_seq_idx_chunk_end - prev_seq_idx_chunk_end = seq_idx_chunk_end + # - the seq to pass forward is the one that is flushed to the right + # boundary. + # - that is given by seq_idx_chunk_end below: the sequence index at the end of the chunk. + seq_idx_chunk_end = tl.load(seq_idx_ptr + + (min((c + 1) * chunk_size, seqlen) - 1) * + stride_seq_idx_seqlen) + + if HAS_INITSTATES: + if prev_seq_idx_chunk_end != seq_idx_chunk_end: + # this means in the current chunk the rightmost flushed seq + # has changed. + # - so we do not propagate the state from previous chunk + # - but rather we load that sequence's init state + initstates_ptrs = initstates_ptr + seq_idx_chunk_end * stride_initstates_batch + + # - update state with seq_idx_new's init state + states = tl.load(initstates_ptrs, mask=offs_m < dim, + other=0.0).to(tl.float32) + + # - we need to consider the cumsum only of the last sequence in the chunk + # - find its starting position (given by c_off of the logical chunk index) + # - and subtract the cumsum just before that position from the total cumsum + # - first, update the logical chunk index (add the number of sequences in the current physical chunk): + # sequence index at the start of the current chunk + seq_idx_chunk_start = tl.load(seq_idx_ptr + + min(c * chunk_size, seqlen) * + stride_seq_idx_seqlen) + logical_chunk_idx += seq_idx_chunk_end - seq_idx_chunk_start + # - load the chunk offset: + c_off = tl.load(chunk_offsets_ptr + logical_chunk_idx, + mask=logical_chunk_idx < chunk_meta_num, + other=0) + # - if offset is 0, then the sequence starts at the beginning of the chunk, and we don't need to subtract anything + if c_off > 0: + # - dA_cs_ptr currently points to the cumsum at the end of the chunk - subtract the chunk size and add the offset + dA_cs_boundary = tl.load( + dA_cs_ptr - (chunk_size - 1) * stride_dA_cs_csize + + (c_off - 1) * stride_dA_cs_csize, + mask=(c_off - 1) > -1 and c_off < chunk_size, + other=0.0) + dA_cs -= dA_cs_boundary + + # - increment logical chunk index for every physical chunk + logical_chunk_idx += 1 + else: + scale_mask = seq_idx_chunk_end == prev_seq_idx_chunk_end + prev_seq_idx_chunk_end = seq_idx_chunk_end scale = tl.where(scale_mask, tl.exp(dA_cs), 0.0) states = scale * states + new_states - if c < nchunks - 1: - tl.store(out_ptrs, states, mask=offs_m < dim) - else: - tl.store(final_states_ptrs, states, mask=offs_m < dim) + tl.store(out_ptrs, states, mask=offs_m < dim) + states_ptrs += stride_states_chunk dA_cs_ptr += stride_dA_cs_chunk out_ptrs += stride_out_chunk @@ -168,81 +148,53 @@ def _state_passing_fwd_kernel( def _state_passing_fwd( states, dA_cumsum, + seq_idx, + chunk_offsets, initial_states=None, - seq_idx=None, - chunk_size=None, out_dtype=None, - is_cont_batched=False, - chunk_offsets=None, ): - batch, nchunks, nheads, dim = states.shape - if chunk_size is None: - chunk_size = dA_cumsum.shape[-1] - else: - assert chunk_size == dA_cumsum.shape[-1] - assert dA_cumsum.shape == (batch, nheads, nchunks, chunk_size) - if initial_states is not None: - if is_cont_batched: - # - if cu_seqlens is provided, then the initial states - # are used for continuous batching. In which case we - # require seq_idx to be provided - assert seq_idx is not None, "seq_idx must be provided for continuous batching" - # - we also need chunk_offsets to be provided, to account - # for computation of dA_cumsum from the start of the - # sequence - assert chunk_offsets is not None, "chunk_offsets must be provided for continuous batching" - else: - # - this is the regular batching case, where initial - # states are used are for each example of the batch. - assert initial_states.shape == (batch, nheads, dim) - - if seq_idx is not None: - seqlen = seq_idx.shape[-1] - assert seq_idx.shape == (batch, seqlen) + nchunks, nheads, dim = states.shape + chunk_size = dA_cumsum.shape[-1] + assert dA_cumsum.shape == (nheads, nchunks, chunk_size) + seqlen = seq_idx.shape[-1] out_dtype = states.dtype if out_dtype is None else out_dtype - out = torch.empty((batch, nchunks, nheads, dim), + out = torch.empty((nchunks, nheads, dim), device=states.device, dtype=out_dtype) - final_states = torch.empty((batch, nheads, dim), - device=states.device, - dtype=torch.float32) - grid = lambda META: (triton.cdiv(dim, META['BLOCK_SIZE']), batch, nheads) + + initial_states_strides = ((initial_states.stride(0), + initial_states.stride(1), + initial_states.stride(2)) + if initial_states is not None else (0, 0, 0)) + + grid = lambda META: (triton.cdiv(dim, META['BLOCK_SIZE']), nheads) with torch.cuda.device(states.device.index): _state_passing_fwd_kernel[grid]( - states, - out, - final_states, - dA_cumsum, - initial_states, - seq_idx, - chunk_offsets, - len(chunk_offsets) if chunk_offsets is not None else 0, - dim, - nchunks, - seqlen if seq_idx is not None else 0, - chunk_size, - states.stride(0), - states.stride(1), - states.stride(2), - states.stride(3), - out.stride(0), - out.stride(1), - out.stride(2), - out.stride(3), - final_states.stride(0), - final_states.stride(1), - final_states.stride(2), - dA_cumsum.stride(0), - dA_cumsum.stride(2), - dA_cumsum.stride(1), - dA_cumsum.stride(3), - *((initial_states.stride(0), initial_states.stride(1), - initial_states.stride(2)) if initial_states is not None else - (0, 0, 0)), - *((seq_idx.stride(0), - seq_idx.stride(1)) if seq_idx is not None else (0, 0)), + states_ptr=states, + out_ptr=out, + dA_cs_ptr=dA_cumsum, + initstates_ptr=initial_states, + seq_idx_ptr=seq_idx, + chunk_offsets_ptr=chunk_offsets, + chunk_meta_num=len(chunk_offsets) + if chunk_offsets is not None else 0, + dim=dim, + nchunks=nchunks, + seqlen=seqlen if seq_idx is not None else 0, + chunk_size=chunk_size if seq_idx is not None else 0, + stride_states_chunk=states.stride(0), + stride_states_head=states.stride(1), + stride_states_dim=states.stride(2), + stride_out_chunk=out.stride(0), + stride_out_head=out.stride(1), + stride_out_dim=out.stride(2), + stride_dA_cs_head=dA_cumsum.stride(0), + stride_dA_cs_chunk=dA_cumsum.stride(1), + stride_dA_cs_csize=dA_cumsum.stride(2), + stride_initstates_batch=initial_states_strides[0], + stride_initstates_head=initial_states_strides[1], + stride_initstates_dim=initial_states_strides[2], + stride_seq_idx_seqlen=seq_idx.stride(0), HAS_INITSTATES=initial_states is not None, - HAS_SEQ_IDX=seq_idx is not None, - IS_CONT_BATCHED=is_cont_batched, ) - return out, final_states + return out diff --git a/vllm/model_executor/models/plamo2.py b/vllm/model_executor/models/plamo2.py index a7acf64f302b..03265b13de50 100644 --- a/vllm/model_executor/models/plamo2.py +++ b/vllm/model_executor/models/plamo2.py @@ -35,7 +35,7 @@ from vllm.model_executor.layers.mamba.ops.mamba_ssm import ( selective_state_update) from vllm.model_executor.layers.mamba.ops.ssd_combined import ( - mamba_chunk_scan_combined) + mamba_chunk_scan_combined_varlen) from vllm.model_executor.layers.quantization import QuantizationConfig from vllm.model_executor.layers.rotary_embedding import get_rope from vllm.model_executor.layers.vocab_parallel_embedding import ( @@ -262,6 +262,7 @@ def forward_cuda( seq_idx_p = attn_metadata.seq_idx_p chunk_indices_p = attn_metadata.chunk_indices_p chunk_offsets_p = attn_metadata.chunk_offsets_p + query_start_loc_p = attn_metadata.query_start_loc_p # 1. Gated MLP's linear projection projected_states = self.in_proj(hidden_states) @@ -302,9 +303,6 @@ def forward_cuda( [num_decodes, num_prefills], dim=0, ) - query_start_loc_p = ( - attn_metadata.query_start_loc[-num_prefills - 1:] - - num_decodes if has_prefill else None) # Preallocate output tensor to avoid memcpy cost for merging prefill # and decode outputs @@ -356,17 +354,17 @@ def forward_cuda( has_initial_states_p[:, None, None, None], ssm_state[state_indices_tensor_p], 0) - varlen_state = mamba_chunk_scan_combined( - hidden_states_p.view(1, num_prefill_tokens, + varlen_state = mamba_chunk_scan_combined_varlen( + hidden_states_p.view(num_prefill_tokens, self.num_heads // self.tp_size, self.head_dim), - dt.unsqueeze(0), + dt, self.A, - B.view(1, num_prefill_tokens, 1, -1), - C.view(1, num_prefill_tokens, 1, -1), + B.view(num_prefill_tokens, 1, -1), + C.view(num_prefill_tokens, 1, -1), chunk_size=chunk_size, D=self.D, - z=gate_p.view(1, num_prefill_tokens, + z=gate_p.view(num_prefill_tokens, self.num_heads // self.tp_size, self.head_dim), dt_bias=self.dt_bias, seq_idx=seq_idx_p, @@ -374,11 +372,9 @@ def forward_cuda( chunk_offsets=chunk_offsets_p, cu_seqlens=query_start_loc_p, initial_states=initial_states, - return_varlen_states=True, - return_final_states=False, dt_softplus=True, dt_limit=(0.0, float("inf")), - out=preallocated_ssm_out_p.view(1, num_prefill_tokens, -1, + out=preallocated_ssm_out_p.view(num_prefill_tokens, -1, self.head_dim), state_dtype=ssm_state.dtype, ) diff --git a/vllm/v1/attention/backends/mamba2_attn.py b/vllm/v1/attention/backends/mamba2_attn.py index f45fc75334a2..6f16fda962ae 100644 --- a/vllm/v1/attention/backends/mamba2_attn.py +++ b/vllm/v1/attention/backends/mamba2_attn.py @@ -115,7 +115,7 @@ class Mamba2AttentionMetadata: num_prefill_tokens: int num_decodes: int num_decode_tokens: int - query_start_loc: torch.Tensor + query_start_loc_p: torch.Tensor seq_lens: torch.Tensor prep_initial_states: bool @@ -151,7 +151,7 @@ def build(self, common_attn_metadata: CommonAttentionMetadata, fast_build: bool = False) -> Mamba2AttentionMetadata: num_reqs = common_attn_metadata.num_reqs - query_start_loc = common_attn_metadata.query_start_loc + query_start_loc_p = None seq_lens = common_attn_metadata.seq_lens seq_idx_p = None @@ -179,7 +179,7 @@ def build(self, num_computed_tokens_cpu[num_reqs - num_prefills:num_reqs] > 0) prep_initial_states = torch.any(has_initial_states_cpu).item() has_initial_states_p = has_initial_states_cpu.to( - query_start_loc.device) + common_attn_metadata.query_start_loc.device) query_start_loc_p = common_attn_metadata.query_start_loc[ -num_prefills - 1:] - num_decode_tokens @@ -190,7 +190,6 @@ def build(self, device=query_start_loc_p.device), query_start_loc_p.diff(), output_size=num_prefill_tokens) - seq_idx_p.unsqueeze_(0) # We compute metadata for chunked prefill once at the top level # model forward and reuse them in mamba layers. If not needed, @@ -217,7 +216,7 @@ def build(self, num_prefill_tokens=num_prefill_tokens, num_decodes=num_decodes, num_decode_tokens=num_decode_tokens, - query_start_loc=query_start_loc, + query_start_loc_p=query_start_loc_p, seq_lens=seq_lens, prep_initial_states=prep_initial_states, chunk_size=self.chunk_size, From 2827b3f4a37bcd0715d948ee8d3c9e1e1d08da8a Mon Sep 17 00:00:00 2001 From: Chauncey Date: Fri, 26 Sep 2025 20:46:17 +0800 Subject: [PATCH 417/518] [CI] Fix test_shared_storage_connector_hashes (#25748) Signed-off-by: chaunceyjiang --- .../kv_connector/unit/test_nixl_connector.py | 22 +++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/tests/v1/kv_connector/unit/test_nixl_connector.py b/tests/v1/kv_connector/unit/test_nixl_connector.py index 24cc83c28614..6b4bd29f18a5 100644 --- a/tests/v1/kv_connector/unit/test_nixl_connector.py +++ b/tests/v1/kv_connector/unit/test_nixl_connector.py @@ -26,6 +26,8 @@ from vllm.distributed.kv_transfer.kv_connector.v1.nixl_connector import ( KVConnectorRole, NixlAgentMetadata, NixlConnector, NixlConnectorMetadata, NixlConnectorWorker, NixlKVConnectorStats) +from vllm.distributed.kv_transfer.kv_transfer_state import ( + ensure_kv_transfer_shutdown, has_kv_transfer_group) from vllm.forward_context import ForwardContext from vllm.platforms.interface import Platform from vllm.sampling_params import SamplingParams @@ -35,6 +37,26 @@ from .utils import create_request, create_scheduler, create_vllm_config +@pytest.fixture(scope="module", autouse=True) +def clear_kv_transfer(): + """ + The test cases in this file use `VLLM_ENABLE_V1_MULTIPROCESSING=0`, + causing the global variable `_KV_CONNECTOR_AGENT` + to be assigned but never deleted. + + Since the current pytest process does not terminate and instead + continues running tests from other files, + this global variable remains in memory and interferes + with test cases in other modules. + + So we use this fixture to ensure that the global variable + `_KV_CONNECTOR_AGENT` is properly cleaned up after each test. + """ + yield + if has_kv_transfer_group(): + ensure_kv_transfer_shutdown() + + class FakeNixlWrapper: """Mock implementation of NixlWrapper for testing. From fe6b19c31470668b03fda6dbedc3939aac05ed3b Mon Sep 17 00:00:00 2001 From: "wang.yuqi" Date: Fri, 26 Sep 2025 20:47:34 +0800 Subject: [PATCH 418/518] [Bugfix] Properly abort pooling request. (#25734) Signed-off-by: wang.yuqi Co-authored-by: Cyrus Leung --- tests/v1/engine/test_output_processor.py | 33 ++++++++++++++++++++++++ vllm/v1/engine/output_processor.py | 9 ++++++- 2 files changed, 41 insertions(+), 1 deletion(-) diff --git a/tests/v1/engine/test_output_processor.py b/tests/v1/engine/test_output_processor.py index bdb40be99aa3..72c0a9a13e23 100644 --- a/tests/v1/engine/test_output_processor.py +++ b/tests/v1/engine/test_output_processor.py @@ -12,6 +12,7 @@ STOP_STRINGS, DummyOutputProcessorTestVectors, MockEngineCore) +from vllm import PoolingParams from vllm.logprobs import PromptLogprobs, SampleLogprobs from vllm.outputs import CompletionOutput, RequestOutput from vllm.sampling_params import RequestOutputKind, SamplingParams @@ -998,3 +999,35 @@ async def test_cumulative_output_collector_n(): third = [k for k in result.outputs if k.index == 2] assert len(third) == 1 assert third[0].text == "c" + + +@pytest.mark.parametrize("runner", ["generate", "pooling"]) +def test_abort_requests(runner: str, dummy_test_vectors): + output_processor = OutputProcessor(dummy_test_vectors.tokenizer, + log_stats=True) + requests = [ + EngineCoreRequest( + request_id=f"request-{idx}", + prompt_token_ids=prompt_tokens, + mm_features=None, + eos_token_id=None, + arrival_time=0, + lora_request=None, + cache_salt=None, + data_parallel_rank=None, + sampling_params=SamplingParams() if runner == "generate" else None, + pooling_params=PoolingParams( + task="embed") if runner == "pooling" else None, + ) for idx, prompt_tokens in enumerate(dummy_test_vectors.prompt_tokens) + ] + + for request in requests: + if runner == "generate": + output_kind = request.sampling_params.output_kind + else: + output_kind = request.pooling_params.output_kind + queue = RequestOutputCollector(output_kind=output_kind) + output_processor.add_request(request, None, queue=queue) + + for request in requests: + output_processor.abort_requests([request.request_id]) diff --git a/vllm/v1/engine/output_processor.py b/vllm/v1/engine/output_processor.py index c17dc3e204ec..38b2d6824b47 100644 --- a/vllm/v1/engine/output_processor.py +++ b/vllm/v1/engine/output_processor.py @@ -335,7 +335,14 @@ def abort_requests( # Produce final abort output. if req_state.queue is not None and ( request_output := req_state.make_request_output( - [], None, FinishReason.ABORT, None, None)): + new_token_ids=[], + # Set pooling_output is not None to + # correctly enter the abort pooling branch + pooling_output=torch.randn(0, device="cpu") + if req_state.detokenizer is None else None, + finish_reason=FinishReason.ABORT, + stop_reason=None, + kv_transfer_params=None)): req_state.queue.put(request_output) elif parent := self.parent_requests.get(request_id): # Abort children prior to removing the parent. From bc9d7b5595887d4a1358926579b638b42368efd7 Mon Sep 17 00:00:00 2001 From: Cyrus Leung Date: Fri, 26 Sep 2025 20:49:33 +0800 Subject: [PATCH 419/518] [CI/Build] Split up Distributed Tests (#25572) Signed-off-by: DarkLight1337 --- .buildkite/test-pipeline.yaml | 43 ++++++++++++------- .../test_sharded_state_loader.py | 3 +- 2 files changed, 28 insertions(+), 18 deletions(-) rename tests/{ => model_executor/model_loader}/test_sharded_state_loader.py (98%) diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index 7a1f38606062..2fc65c7fb658 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -870,25 +870,27 @@ steps: - NUM_NODES=2 torchrun --nnodes 2 --nproc-per-node=2 --rdzv_backend=c10d --rdzv_endpoint=192.168.10.10 distributed/test_node_count.py | grep 'Node count test passed' - python3 ../examples/offline_inference/data_parallel.py --dp-size=2 --tp-size=1 --node-size=2 --node-rank=1 --master-addr=192.168.10.10 --master-port=12345 --enforce-eager --trust-remote-code -- label: Distributed Tests (2 GPUs) # 110min - timeout_in_minutes: 150 +- label: Distributed Tests (2 GPUs) # 68min + timeout_in_minutes: 90 mirror_hardwares: [amdexperimental] working_dir: "/vllm-workspace/tests" num_gpus: 2 source_file_dependencies: + - vllm/compilation/ - vllm/distributed/ - vllm/engine/ - vllm/executor/ - - vllm/model_executor/models/ - - tests/distributed/ - - vllm/compilation - vllm/worker/worker_base.py - - entrypoints/llm/test_collective_rpc.py + - vllm/v1/engine/ + - vllm/v1/worker/ + - tests/compile/test_basic_correctness.py + - tests/compile/test_wrapper.py + - tests/distributed/ + - tests/entrypoints/llm/test_collective_rpc.py - tests/v1/test_async_llm_dp.py - tests/v1/test_external_lb_dp.py - tests/v1/entrypoints/openai/test_multi_api_servers.py - - vllm/v1/engine/ - - vllm/v1/worker/ + - tests/v1/shutdown - tests/v1/worker/test_worker_memory_snapshot.py commands: - TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/test_async_llm_dp.py @@ -898,20 +900,29 @@ steps: - pytest -v -s ./compile/test_basic_correctness.py - pytest -v -s ./compile/test_wrapper.py - VLLM_TEST_SAME_HOST=1 torchrun --nproc-per-node=4 distributed/test_same_node.py | grep 'Same node test passed' + - pytest -v -s distributed/test_sequence_parallel.py + - CUDA_VISIBLE_DEVICES=0,1 pytest -v -s v1/shutdown + - pytest -v -s v1/worker/test_worker_memory_snapshot.py + +- label: Distributed Model Tests (2 GPUs) # 37min + timeout_in_minutes: 50 + mirror_hardwares: [amdexperimental] + working_dir: "/vllm-workspace/tests" + num_gpus: 2 + source_file_dependencies: + - vllm/model_executor/model_loader/sharded_state_loader.py + - vllm/model_executor/models/ + - tests/basic_correctness/ + - tests/model_executor/model_loader/test_sharded_state_loader.py + - tests/models/ + commands: - TARGET_TEST_SUITE=L4 pytest basic_correctness/ -v -s -m 'distributed(num_gpus=2)' + - CUDA_VISIBLE_DEVICES=0,1 pytest -v -s model_executor/model_loader/test_sharded_state_loader.py # Avoid importing model tests that cause CUDA reinitialization error - pytest models/test_transformers.py -v -s -m 'distributed(num_gpus=2)' - pytest models/language -v -s -m 'distributed(num_gpus=2)' - pytest models/multimodal -v -s -m 'distributed(num_gpus=2)' --ignore models/multimodal/generation/test_whisper.py - VLLM_WORKER_MULTIPROC_METHOD=spawn pytest models/multimodal/generation/test_whisper.py -v -s -m 'distributed(num_gpus=2)' - # test sequence parallel - - pytest -v -s distributed/test_sequence_parallel.py - # this test fails consistently. - # TODO: investigate and fix - - CUDA_VISIBLE_DEVICES=0,1 pytest -v -s test_sharded_state_loader.py - - CUDA_VISIBLE_DEVICES=0,1 pytest -v -s v1/shutdown - - pytest -v -s models/multimodal/generation/test_maverick.py - - pytest -v -s v1/worker/test_worker_memory_snapshot.py - label: Plugin Tests (2 GPUs) # 40min timeout_in_minutes: 60 diff --git a/tests/test_sharded_state_loader.py b/tests/model_executor/model_loader/test_sharded_state_loader.py similarity index 98% rename from tests/test_sharded_state_loader.py rename to tests/model_executor/model_loader/test_sharded_state_loader.py index fd5b5fad0999..785169f5d22e 100644 --- a/tests/test_sharded_state_loader.py +++ b/tests/model_executor/model_loader/test_sharded_state_loader.py @@ -91,8 +91,7 @@ def _run_generate(input_dir, queue: mp.Queue, **kwargs): @pytest.mark.parametrize("enable_lora", [False, True]) @pytest.mark.parametrize("tp_size", [1, 2]) def test_sharded_state_loader(enable_lora, tp_size, num_gpus_available, - llama_3p2_1b_files, - monkeypatch: pytest.MonkeyPatch): + llama_3p2_1b_files): if num_gpus_available < tp_size: pytest.skip(f"Not enough GPUs for tensor parallelism {tp_size}") From db1e42f627f75ab455a157a03c6ebde33c8cbb1c Mon Sep 17 00:00:00 2001 From: Cyrus Leung Date: Fri, 26 Sep 2025 20:52:36 +0800 Subject: [PATCH 420/518] [CI/Build] Fix some V1 tests not being run (#25569) Signed-off-by: DarkLight1337 --- .buildkite/test-pipeline.yaml | 6 ++- tests/v1/test_kv_sharing.py | 97 ++--------------------------------- 2 files changed, 8 insertions(+), 95 deletions(-) diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index 2fc65c7fb658..0914c899aa5b 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -300,10 +300,12 @@ steps: - pytest -v -s v1/spec_decode - pytest -v -s v1/kv_connector/unit - pytest -v -s v1/metrics + - pytest -v -s v1/test_kv_sharing.py + - pytest -v -s v1/test_metrics_reader.py + - pytest -v -s v1/test_oracle.py + - pytest -v -s v1/test_request.py - pytest -v -s v1/test_serial_utils.py - pytest -v -s v1/test_utils.py - - pytest -v -s v1/test_oracle.py - - pytest -v -s v1/test_metrics_reader.py # Integration test for streaming correctness (requires special branch). - pip install -U git+https://github.com/robertgshaw2-redhat/lm-evaluation-harness.git@streaming-api - pytest -v -s entrypoints/openai/correctness/test_lmeval.py::test_lm_eval_accuracy_v1_engine diff --git a/tests/v1/test_kv_sharing.py b/tests/v1/test_kv_sharing.py index 96848047145b..31a74101faf9 100644 --- a/tests/v1/test_kv_sharing.py +++ b/tests/v1/test_kv_sharing.py @@ -1,17 +1,10 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -from unittest.mock import Mock - import torch -from vllm.v1.attention.backends.flash_attn import ( - FlashAttentionBackend, FlashAttentionMetadataBuilder) -from vllm.v1.attention.backends.flex_attention import ( - FlexAttentionBackend, FlexAttentionMetadataBuilder) from vllm.v1.kv_cache_interface import FullAttentionSpec, KVCacheGroupSpec -from vllm.v1.worker.utils import (AttentionGroup, - initialize_kv_cache_for_kv_sharing) +from vllm.v1.worker.utils import add_kv_sharing_layers_to_kv_cache_groups def new_kv_cache_spec(): @@ -37,56 +30,17 @@ def test_initialize_kv_cache_for_kv_sharing_different_attn_groups(): new_kv_cache_spec()), ] - attn_groups = [ - # KV cache group 0 has two attention groups - [ - AttentionGroup( - backend=FlashAttentionBackend, - metadata_builder=Mock(spec=FlashAttentionMetadataBuilder), - layer_names=["model.layers.0"], - ), - AttentionGroup( - backend=FlexAttentionBackend, - metadata_builder=Mock(spec=FlexAttentionMetadataBuilder), - layer_names=["model.layers.1"], - ), - ], - ] - - # Only layers 0 and 1 will have KV caches allocated - kv_caches = { - "model.layers.0": torch.zeros(1, 2, 3), - "model.layers.1": torch.ones(1, 2, 3), - } - - initialize_kv_cache_for_kv_sharing( + add_kv_sharing_layers_to_kv_cache_groups( shared_kv_cache_layers=shared_kv_cache_layers, kv_cache_groups=kv_cache_groups, - kv_caches=kv_caches, - attn_groups=attn_groups, ) - # Check that the KV caches were shared correctly - assert kv_caches["model.layers.2"].data_ptr( - ) == kv_caches["model.layers.0"].data_ptr() - assert kv_caches["model.layers.3"].data_ptr( - ) == kv_caches["model.layers.1"].data_ptr() - # Check that the layers were added to the correct KV cache group assert len(kv_cache_groups) == 1 assert kv_cache_groups[0].layer_names == [ "model.layers.0", "model.layers.1", "model.layers.2", "model.layers.3" ] - # Check that the layers were added to the attention groups - assert len(attn_groups) == 1 and len(attn_groups[0]) == 2 - assert attn_groups[0][0].layer_names == [ - "model.layers.0", "model.layers.2" - ] - assert attn_groups[0][1].layer_names == [ - "model.layers.1", "model.layers.3" - ] - def test_initialize_kv_cache_for_kv_sharing_same_attn_groups(): """ @@ -103,48 +57,17 @@ def test_initialize_kv_cache_for_kv_sharing_same_attn_groups(): new_kv_cache_spec()), ] - attn_groups = [ - # KV cache group 0 has a single attention group - # as all layers have the same flash attention backend - [ - AttentionGroup( - backend=FlashAttentionBackend, - metadata_builder=Mock(spec=FlashAttentionMetadataBuilder), - layer_names=["model.layers.0", "model.layers.1"], - ), - ], - ] - - kv_caches = { - "model.layers.0": torch.zeros(1, 2, 3), - "model.layers.1": torch.ones(1, 2, 3), - } - - initialize_kv_cache_for_kv_sharing( + add_kv_sharing_layers_to_kv_cache_groups( shared_kv_cache_layers=shared_kv_cache_layers, kv_cache_groups=kv_cache_groups, - kv_caches=kv_caches, - attn_groups=attn_groups, ) - # Check that the KV caches were shared correctly - assert kv_caches["model.layers.2"].data_ptr( - ) == kv_caches["model.layers.0"].data_ptr() - assert kv_caches["model.layers.3"].data_ptr( - ) == kv_caches["model.layers.1"].data_ptr() - # Check that the layers were added to the correct KV cache group assert len(kv_cache_groups) == 1 assert kv_cache_groups[0].layer_names == [ "model.layers.0", "model.layers.1", "model.layers.2", "model.layers.3" ] - # Check that the layers were added to the attention groups - assert len(attn_groups) == 1 and len(attn_groups[0]) == 1 - assert attn_groups[0][0].layer_names == [ - "model.layers.0", "model.layers.1", "model.layers.2", "model.layers.3" - ] - def test_initialize_kv_cache_for_kv_sharing_no_attn_groups(): """ @@ -162,23 +85,11 @@ def test_initialize_kv_cache_for_kv_sharing_no_attn_groups(): KVCacheGroupSpec(["model.layers.1"], new_kv_cache_spec()), ] - kv_caches = { - "model.layers.0": torch.zeros(1, 2, 3), - "model.layers.1": torch.ones(1, 2, 3), - } - - initialize_kv_cache_for_kv_sharing( + add_kv_sharing_layers_to_kv_cache_groups( shared_kv_cache_layers=shared_kv_cache_layers, kv_cache_groups=kv_cache_groups, - kv_caches=kv_caches, ) - # Check that the KV caches were shared correctly - assert kv_caches["model.layers.2"].data_ptr( - ) == kv_caches["model.layers.0"].data_ptr() - assert kv_caches["model.layers.3"].data_ptr( - ) == kv_caches["model.layers.1"].data_ptr() - # Check that the layers were added to the correct KV cache group assert len(kv_cache_groups) == 2 assert kv_cache_groups[0].layer_names == [ From d4d9899860e75b2ef4aa6dda205c23e641876d24 Mon Sep 17 00:00:00 2001 From: Isotr0py Date: Fri, 26 Sep 2025 23:47:41 +0800 Subject: [PATCH 421/518] [Quantization] Add field to skip unquantized modules for GPTQ config (#25455) Signed-off-by: Isotr0py --- vllm/config/__init__.py | 1 + .../layers/quantization/base_config.py | 6 ++ .../layers/quantization/gptq.py | 52 +++++++++++++-- .../layers/quantization/gptq_marlin.py | 65 ++++++++++++++++--- .../layers/quantization/utils/gptq_utils.py | 52 ++++++++++++++- vllm/model_executor/models/keye.py | 12 +--- vllm/model_executor/models/minicpmo.py | 36 +--------- vllm/model_executor/models/ovis.py | 13 +--- vllm/model_executor/models/qwen2_5_vl.py | 13 +--- vllm/model_executor/models/qwen2_vl.py | 13 +--- vllm/model_executor/models/qwen3_moe.py | 28 ++------ vllm/model_executor/models/qwen3_next.py | 24 ++----- vllm/model_executor/models/qwen3_vl.py | 12 +--- vllm/model_executor/models/qwen3_vl_moe.py | 2 +- vllm/transformers_utils/config.py | 32 ++++++++- vllm/transformers_utils/utils.py | 11 +++- 16 files changed, 219 insertions(+), 153 deletions(-) diff --git a/vllm/config/__init__.py b/vllm/config/__init__.py index 2da9d8f4f3ea..59b65900b1e1 100644 --- a/vllm/config/__init__.py +++ b/vllm/config/__init__.py @@ -270,6 +270,7 @@ def _get_quantization_config( f"{model_config.dtype} is not supported for quantization " f"method {model_config.quantization}. Supported dtypes: " f"{supported_dtypes}") + quant_config.maybe_update_config(model_config.model) return quant_config return None diff --git a/vllm/model_executor/layers/quantization/base_config.py b/vllm/model_executor/layers/quantization/base_config.py index 6fd94afbe556..807a9866a18b 100644 --- a/vllm/model_executor/layers/quantization/base_config.py +++ b/vllm/model_executor/layers/quantization/base_config.py @@ -162,3 +162,9 @@ def apply_vllm_mapper( # noqa: B027 """ # TODO (@kylesayrs): add implementations for all subclasses pass + + def maybe_update_config(self, model_name: str): # noqa: B027 + """ + Interface to update values after config initialization. + """ + pass diff --git a/vllm/model_executor/layers/quantization/gptq.py b/vllm/model_executor/layers/quantization/gptq.py index 2272709f9309..0335b9c46b4d 100644 --- a/vllm/model_executor/layers/quantization/gptq.py +++ b/vllm/model_executor/layers/quantization/gptq.py @@ -7,6 +7,7 @@ from typing import Any, Optional, Union import torch +from safetensors.torch import _TYPES as _SAFETENSORS_TO_TORCH_DTYPE from torch.nn.parameter import Parameter from vllm import _custom_ops as ops @@ -22,6 +23,8 @@ PackedColumnParameter, PackedvLLMParameter, RowvLLMParameter) +from vllm.transformers_utils.config import get_safetensors_params_metadata +from vllm.utils import is_list_of class GPTQConfig(QuantizationConfig): @@ -38,6 +41,7 @@ def __init__( lm_head_quantized: bool, dynamic: dict[str, dict[str, Union[int, bool]]], autoround_version: str = "", + modules_in_block_to_quantize: Optional[list[str]] = None, ) -> None: # GPTQModel use `dynamic` config property to allow per module # quantization config so each module can be individually optimized. @@ -75,15 +79,20 @@ def __init__( "Currently, only 2/3/4/8-bit weight quantization is " f"supported for GPTQ, but got {self.weight_bits} bits.") + self.modules_in_block_to_quantize = modules_in_block_to_quantize or [] + # used to identify GPTQ model quantized by autoround self.autoround_version = autoround_version def __repr__(self) -> str: - return (f"GPTQConfig(weight_bits={self.weight_bits}, " - f"group_size={self.group_size}, " - f"desc_act={self.desc_act}), " - f"lm_head_quantized={self.lm_head_quantized}), " - f"dynamic={self.dynamic}") + return ( + f"GPTQConfig(weight_bits={self.weight_bits}, " + f"group_size={self.group_size}, " + f"desc_act={self.desc_act}), " + f"lm_head_quantized={self.lm_head_quantized}, " + f"dynamic={self.dynamic}, " + f"modules_in_block_to_quantize={self.modules_in_block_to_quantize})" + ) @classmethod def get_name(cls) -> QuantizationMethods: @@ -114,8 +123,10 @@ def from_config(cls, config: dict[str, Any]) -> "GPTQConfig": default=False) autoround_version = cls.get_from_keys_or(config, ["autoround_version"], default="") + modules_in_block_to_quantize = cls.get_from_keys_or( + config, ["modules_in_block_to_quantize"], default=None) return cls(weight_bits, group_size, desc_act, lm_head_quantized, - dynamic, autoround_version) + dynamic, autoround_version, modules_in_block_to_quantize) def get_quant_method( self, layer: torch.nn.Module, prefix: str @@ -136,6 +147,35 @@ def get_quant_method( return get_linear_quant_method(self, layer, prefix, GPTQLinearMethod) + def apply_vllm_mapper(self, hf_to_vllm_mapper): + if self.modules_in_block_to_quantize is not None: + self.modules_in_block_to_quantize = hf_to_vllm_mapper.apply_list( + self.modules_in_block_to_quantize) + + def maybe_update_config(self, + model_name: str, + revision: Optional[str] = None): + if self.modules_in_block_to_quantize: + if is_list_of(self.modules_in_block_to_quantize, list): + # original modules_in_block_to_quantize: list[list[str]] + # flatten original modules_in_block_to_quantize + self.modules_in_block_to_quantize = [ + item for sublist in self.modules_in_block_to_quantize + for item in sublist + ] + return + + unquant_dtypes = [torch.float16, torch.bfloat16, torch.float32] + metadata = get_safetensors_params_metadata(model_name, + revision=revision) + quant_layers: set[str] = { + param_name.rsplit(".", 1)[0] + for param_name, info in metadata.items() + if (dtype := info.get('dtype', None)) + and _SAFETENSORS_TO_TORCH_DTYPE[dtype] not in unquant_dtypes + } + self.modules_in_block_to_quantize = list(quant_layers) + class ExllamaState(Enum): diff --git a/vllm/model_executor/layers/quantization/gptq_marlin.py b/vllm/model_executor/layers/quantization/gptq_marlin.py index 02188c3c224f..967e46c24378 100644 --- a/vllm/model_executor/layers/quantization/gptq_marlin.py +++ b/vllm/model_executor/layers/quantization/gptq_marlin.py @@ -5,6 +5,7 @@ from typing import Any, Callable, Optional, Union import torch +from safetensors.torch import _TYPES as _SAFETENSORS_TO_TORCH_DTYPE import vllm.model_executor.layers.fused_moe # noqa from vllm import _custom_ops as ops @@ -35,6 +36,8 @@ RowvLLMParameter) from vllm.platforms import current_platform from vllm.scalar_type import scalar_types +from vllm.transformers_utils.config import get_safetensors_params_metadata +from vllm.utils import is_list_of logger = init_logger(__name__) @@ -71,10 +74,16 @@ class GPTQMarlinConfig(QuantizationConfig): (8, True): scalar_types.uint8b128, } - def __init__(self, weight_bits: int, group_size: int, desc_act: bool, - is_sym: bool, lm_head_quantized: bool, - dynamic: dict[str, dict[str, Union[int, bool]]], - full_config: dict[str, Any]) -> None: + def __init__( + self, + weight_bits: int, + group_size: int, + desc_act: bool, + is_sym: bool, + lm_head_quantized: bool, + dynamic: dict[str, dict[str, Union[int, bool]]], + full_config: dict[str, Any], + modules_in_block_to_quantize: Optional[list[str]] = None) -> None: super().__init__() if desc_act and group_size == -1: # In this case, act_order == True is the same as act_order == False @@ -121,15 +130,19 @@ def __init__(self, weight_bits: int, group_size: int, desc_act: bool, self.quant_type = self.TYPE_MAP[(weight_bits, is_sym)] + self.modules_in_block_to_quantize = modules_in_block_to_quantize or [] # used to identify GPTQ model quantized by autoround self.autoround_version = full_config.get("autoround_version", "") def __repr__(self) -> str: - return (f"GPTQMarlinConfig(quant_type={self.quant_type}, " - f"group_size={self.group_size}, " - f"desc_act={self.desc_act}, " - f"lm_head_quantized={self.lm_head_quantized}), " - f"dynamic={self.dynamic}") + return ( + f"GPTQMarlinConfig(quant_type={self.quant_type}, " + f"group_size={self.group_size}, " + f"desc_act={self.desc_act}, " + f"lm_head_quantized={self.lm_head_quantized}, " + f"dynamic={self.dynamic}, " + f"modules_in_block_to_quantize={self.modules_in_block_to_quantize})" + ) @classmethod def get_name(cls) -> QuantizationMethods: @@ -158,8 +171,11 @@ def from_config(cls, config: dict[str, Any]) -> "GPTQMarlinConfig": is_sym = cls.get_from_keys(config, ["sym"]) lm_head_quantized = cls.get_from_keys_or(config, ["lm_head"], default=False) + modules_in_block_to_quantize = cls.get_from_keys_or( + config, ["modules_in_block_to_quantize"], default=None) return cls(weight_bits, group_size, desc_act, is_sym, - lm_head_quantized, dynamic, config) + lm_head_quantized, dynamic, config, + modules_in_block_to_quantize) @classmethod def override_quantization_method( @@ -223,6 +239,35 @@ def is_gptq_marlin_compatible(cls, quant_config: dict[str, Any]): return check_marlin_supported(quant_type=cls.TYPE_MAP[(num_bits, sym)], group_size=group_size) + def apply_vllm_mapper(self, hf_to_vllm_mapper): + if self.modules_in_block_to_quantize is not None: + self.modules_in_block_to_quantize = hf_to_vllm_mapper.apply_list( + self.modules_in_block_to_quantize) + + def maybe_update_config(self, + model_name: str, + revision: Optional[str] = None): + if self.modules_in_block_to_quantize: + if is_list_of(self.modules_in_block_to_quantize, list): + # original modules_in_block_to_quantize: list[list[str]] + # flatten original modules_in_block_to_quantize + self.modules_in_block_to_quantize = [ + item for sublist in self.modules_in_block_to_quantize + for item in sublist + ] + return + + unquant_dtypes = [torch.float16, torch.bfloat16, torch.float32] + metadata = get_safetensors_params_metadata(model_name, + revision=revision) + quant_layers: set[str] = { + param_name.rsplit(".", 1)[0] + for param_name, info in metadata.items() + if (dtype := info.get('dtype', None)) + and _SAFETENSORS_TO_TORCH_DTYPE[dtype] not in unquant_dtypes + } + self.modules_in_block_to_quantize = list(quant_layers) + class GPTQMarlinLinearMethod(LinearMethodBase): """Linear method for GPTQ Marlin. diff --git a/vllm/model_executor/layers/quantization/utils/gptq_utils.py b/vllm/model_executor/layers/quantization/utils/gptq_utils.py index 4fbd0f5c4eff..41b833725b30 100644 --- a/vllm/model_executor/layers/quantization/utils/gptq_utils.py +++ b/vllm/model_executor/layers/quantization/utils/gptq_utils.py @@ -1,7 +1,9 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project +from collections.abc import Mapping from copy import deepcopy from fractions import Fraction +from types import MappingProxyType from typing import Optional, Union import regex as re @@ -70,6 +72,49 @@ def get_dynamic_override( return default_value +def is_layer_gptq_quantized( + prefix: str, + quantized_layers: list[str], + fused_mapping: Mapping[str, list[str]] = MappingProxyType({}) +) -> bool: + # prefix: model.layers.0.self_attn.q_proj + # proj_name: q_proj + + # GPTQ's `modules_in_block_to_quantize`: + # Substr: ["self_attn.k_proj", "self_attn.v_proj", "self_attn.q_proj"] + # Full prefix ["model.layers.0.self_attn.q_proj"] + + proj_name = prefix.split(".")[-1] + + # Fused layers like gate_up_proj or qkv_proj will not be fused + # in the safetensors checkpoint. So, we convert the name + # from the fused version to unfused + check to make sure that + # each shard of the fused layer has the same scheme. + if proj_name in fused_mapping: + shard_prefixes = [ + prefix.replace(proj_name, shard_proj_name) + for shard_proj_name in fused_mapping[proj_name] + ] + + is_quantized = None + for shard_prefix in shard_prefixes: + is_shard_quantized = any(layer in shard_prefix + for layer in quantized_layers) + + if is_quantized is None: + is_quantized = is_shard_quantized + elif is_shard_quantized != is_quantized: + raise ValueError( + f"Detected some but not all shards of {prefix} " + "are quantized. All shards of fused layers " + "to have the same precision.") + else: + is_quantized = any(layer in prefix for layer in quantized_layers) + + assert is_quantized is not None + return is_quantized + + def get_linear_quant_method( config: QuantizationConfig, layer: torch.nn.Module, @@ -80,10 +125,15 @@ def get_linear_quant_method( parallel_lm_head_quantized = isinstance( layer, ParallelLMHead) and cloned_config.lm_head_quantized if isinstance(layer, LinearBase) or parallel_lm_head_quantized: + is_layer_quantized = is_layer_gptq_quantized( + prefix=prefix, + quantized_layers=cloned_config.modules_in_block_to_quantize, + fused_mapping=cloned_config.packed_modules_mapping) # False = skip module, None = no override, else = Positive match if get_dynamic_override( # noqa: E712 cloned_config, # noqa: E712 - layer_name=prefix) == False: # noqa: E712 + layer_name=prefix) == False or ( + not is_layer_quantized): # noqa: E712 if parallel_lm_head_quantized: return UnquantizedEmbeddingMethod() return UnquantizedLinearMethod() diff --git a/vllm/model_executor/models/keye.py b/vllm/model_executor/models/keye.py index 2e5e276cc1c7..3b6fdba22512 100644 --- a/vllm/model_executor/models/keye.py +++ b/vllm/model_executor/models/keye.py @@ -25,9 +25,6 @@ QKVParallelLinear, RowParallelLinear) from vllm.model_executor.layers.quantization import QuantizationConfig -from vllm.model_executor.layers.quantization.gptq import GPTQConfig -from vllm.model_executor.layers.quantization.gptq_marlin import ( - GPTQMarlinConfig) from vllm.model_executor.model_loader.weight_utils import ( default_weight_loader, maybe_remap_kv_scale_name) from vllm.model_executor.models.module_mapping import MultiModelKeys @@ -1281,11 +1278,6 @@ def get_placeholder_str(cls, modality: str, i: int) -> Optional[str]: raise ValueError("Only image or video modality is supported") - def _maybe_ignore_quant_config(self, quant_config: QuantizationConfig): - if isinstance(quant_config, (GPTQConfig, GPTQMarlinConfig)): - return None - return quant_config - def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): super().__init__() config: PretrainedConfig = vllm_config.model_config.hf_config @@ -1297,14 +1289,14 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): self.visual = KeyeSiglipVisionModel( config.vision_config, - quant_config=self._maybe_ignore_quant_config(quant_config), + quant_config=quant_config, prefix=maybe_prefix(prefix, "visual"), ) self.mlp_AR = self._build_projector( config, config.vision_config, - quant_config=self._maybe_ignore_quant_config(quant_config), + quant_config=quant_config, prefix=maybe_prefix(prefix, "mlp_AR"), ) diff --git a/vllm/model_executor/models/minicpmo.py b/vllm/model_executor/models/minicpmo.py index 225668d87fac..e5333fb652b1 100644 --- a/vllm/model_executor/models/minicpmo.py +++ b/vllm/model_executor/models/minicpmo.py @@ -28,7 +28,7 @@ import torch from torch import nn -from transformers import BatchFeature, PretrainedConfig +from transformers import BatchFeature from transformers.modeling_outputs import BaseModelOutputWithPast from transformers.models.whisper.modeling_whisper import (ACT2FN, WhisperAttention, @@ -36,10 +36,6 @@ WhisperEncoder) from vllm.config import VllmConfig -from vllm.model_executor.layers.quantization import QuantizationConfig -from vllm.model_executor.layers.quantization.gptq import GPTQConfig -from vllm.model_executor.layers.quantization.gptq_marlin import ( - GPTQMarlinConfig) from vllm.multimodal import MULTIMODAL_REGISTRY, MultiModalKwargsItems from vllm.multimodal.inputs import (MultiModalDataDict, MultiModalFieldConfig, NestedTensors) @@ -548,36 +544,6 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): self.audio_token_id = None - def _maybe_ignore_quant_config(self, quant_config: QuantizationConfig): - # GPTQ configs do not have a list of ignored modules, however AutoGPTQ - # seems to avoid vision encoder sections for some models. - # See: https://huggingface.co/openbmb/MiniCPM-o-2_6-int4 - if isinstance(quant_config, (GPTQConfig, GPTQMarlinConfig)): - return None - return quant_config - - def init_vision_module( - self, - config: PretrainedConfig, - quant_config: Optional[QuantizationConfig] = None, - prefix: str = "", - ) -> nn.Module: - # MiniCPMO GPTQ model leave vpm unquantized. - quant_config = self._maybe_ignore_quant_config(quant_config) - return super().init_vision_module(config, quant_config, prefix) - - def init_resampler( - self, - embed_dim: int, - vision_dim: int, - quant_config: Optional[QuantizationConfig] = None, - prefix: str = "", - ) -> nn.Module: - # MiniCPMO GPTQ model leave resampler unquantized. - quant_config = self._maybe_ignore_quant_config(quant_config) - return super().init_resampler(embed_dim, vision_dim, quant_config, - prefix) - def init_audio_module(self, *, vllm_config: VllmConfig, prefix: str = ""): # Do not use parameters temporarily audio_config = self.config.audio_config diff --git a/vllm/model_executor/models/ovis.py b/vllm/model_executor/models/ovis.py index 052e143b27f6..bd525b6780e0 100644 --- a/vllm/model_executor/models/ovis.py +++ b/vllm/model_executor/models/ovis.py @@ -31,9 +31,6 @@ from vllm.model_executor.layers.linear import ReplicatedLinear from vllm.model_executor.layers.quantization.base_config import ( QuantizationConfig) -from vllm.model_executor.layers.quantization.gptq import GPTQConfig -from vllm.model_executor.layers.quantization.gptq_marlin import ( - GPTQMarlinConfig) from vllm.model_executor.models.aimv2 import AIMv2Model from vllm.model_executor.models.siglip import SiglipVisionModel from vllm.model_executor.models.utils import (AutoWeightsLoader, flatten_bn, @@ -416,7 +413,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): self.visual_tokenizer = VisualTokenizer( config=config.visual_tokenizer_config, - quant_config=self._maybe_ignore_quant_config(quant_config), + quant_config=quant_config, prefix=f"{prefix}.visual_tokenizer", ) @@ -430,14 +427,6 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): self.make_empty_intermediate_tensors = ( self.get_language_model().make_empty_intermediate_tensors) - def _maybe_ignore_quant_config(self, quant_config: QuantizationConfig): - # GPTQ configs do not have a list of ignored modules, however AutoGPTQ - # seems to avoid vision encoder sections for some models. - # See: https://huggingface.co/AIDC-AI/Ovis2-2B-GPTQ-Int4 - if isinstance(quant_config, (GPTQConfig, GPTQMarlinConfig)): - return None - return quant_config - def _parse_and_validate_image_input( self, **kwargs: object) -> Optional[OvisImagePatchInputs]: pixel_values = kwargs.pop("pixel_values", None) diff --git a/vllm/model_executor/models/qwen2_5_vl.py b/vllm/model_executor/models/qwen2_5_vl.py index bd6c0b162cb4..6af6faa2b296 100644 --- a/vllm/model_executor/models/qwen2_5_vl.py +++ b/vllm/model_executor/models/qwen2_5_vl.py @@ -52,9 +52,6 @@ RowParallelLinear) # yapf: enable from vllm.model_executor.layers.quantization import QuantizationConfig -from vllm.model_executor.layers.quantization.gptq import GPTQConfig -from vllm.model_executor.layers.quantization.gptq_marlin import ( - GPTQMarlinConfig) from vllm.model_executor.model_loader.weight_utils import default_weight_loader from vllm.model_executor.models.module_mapping import MultiModelKeys from vllm.multimodal import MULTIMODAL_REGISTRY @@ -1015,8 +1012,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): self.visual = Qwen2_5_VisionTransformer( config.vision_config, norm_eps=getattr(config, "rms_norm_eps", 1e-6), - quant_config=self._maybe_ignore_quant_config( - self.quant_config), + quant_config=self.quant_config, prefix=maybe_prefix(prefix, "visual"), use_data_parallel=self.use_data_parallel, ) @@ -1032,13 +1028,6 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): self.make_empty_intermediate_tensors = ( self.language_model.make_empty_intermediate_tensors) - def _maybe_ignore_quant_config(self, config: Optional[QuantizationConfig]): - # GPTQ configs do not have a list of ignored modules, however AutoGPTQ - # seems to avoid vision encoder sections for some models. - if isinstance(config, (GPTQConfig, GPTQMarlinConfig)): - return None - return config - def _validate_and_reshape_mm_tensor(self, mm_input: object, name: str) -> torch.Tensor: if not isinstance(mm_input, (torch.Tensor, list)): diff --git a/vllm/model_executor/models/qwen2_vl.py b/vllm/model_executor/models/qwen2_vl.py index 14ea03444484..d4e195246bf1 100644 --- a/vllm/model_executor/models/qwen2_vl.py +++ b/vllm/model_executor/models/qwen2_vl.py @@ -50,9 +50,6 @@ from vllm.model_executor.layers.linear import (ColumnParallelLinear, RowParallelLinear) from vllm.model_executor.layers.quantization import QuantizationConfig -from vllm.model_executor.layers.quantization.gptq import GPTQConfig -from vllm.model_executor.layers.quantization.gptq_marlin import ( - GPTQMarlinConfig) from vllm.model_executor.model_loader.weight_utils import default_weight_loader from vllm.model_executor.models.module_mapping import MultiModelKeys from vllm.multimodal import MULTIMODAL_REGISTRY @@ -1270,7 +1267,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): self.visual = Qwen2VisionTransformer( config.vision_config, norm_eps=getattr(config, "rms_norm_eps", 1e-6), - quant_config=self._maybe_ignore_quant_config(quant_config), + quant_config=quant_config, prefix=maybe_prefix(prefix, "visual"), use_data_parallel=self.use_data_parallel, ) @@ -1286,14 +1283,6 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): self.make_empty_intermediate_tensors = ( self.language_model.make_empty_intermediate_tensors) - def _maybe_ignore_quant_config(self, quant_config: QuantizationConfig): - # GPTQ configs do not have a list of ignored modules, however AutoGPTQ - # seems to avoid vision encoder sections for some models. - # See: https://huggingface.co/Qwen/Qwen2-VL-2B-Instruct-GPTQ-Int4 - if isinstance(quant_config, (GPTQConfig, GPTQMarlinConfig)): - return None - return quant_config - def _validate_and_reshape_mm_tensor(self, mm_input: object, name: str) -> torch.Tensor: if not isinstance(mm_input, (torch.Tensor, list)): diff --git a/vllm/model_executor/models/qwen3_moe.py b/vllm/model_executor/models/qwen3_moe.py index 0661b3707ff4..cb2ff97a5df2 100644 --- a/vllm/model_executor/models/qwen3_moe.py +++ b/vllm/model_executor/models/qwen3_moe.py @@ -46,9 +46,6 @@ RowParallelLinear) from vllm.model_executor.layers.logits_processor import LogitsProcessor from vllm.model_executor.layers.quantization import QuantizationConfig -from vllm.model_executor.layers.quantization.gptq import GPTQConfig -from vllm.model_executor.layers.quantization.gptq_marlin import ( - GPTQMarlinConfig) from vllm.model_executor.layers.rotary_embedding import get_rope from vllm.model_executor.layers.vocab_parallel_embedding import ( ParallelLMHead, VocabParallelEmbedding) @@ -149,24 +146,11 @@ def __init__( enable_eplb=self.enable_eplb, num_redundant_experts=self.n_redundant_experts) - self.gate = ReplicatedLinear( - config.hidden_size, - config.num_experts, - bias=False, - quant_config=self._maybe_ignore_quant_config(quant_config), - prefix=f"{prefix}.gate") - - def _maybe_ignore_quant_config(self, quant_config: QuantizationConfig): - # GPTQ configs do not have a list of ignored modules, however AutoGPTQ - # seems to avoid gate quantization while AutoRound does. - # See: https://huggingface.co/Qwen/Qwen3-30B-A3B-GPTQ-Int4, - # and https://huggingface.co/jart25/Qwen3-Coder-30B-A3B-Instruct-Int4-gptq - if isinstance( - quant_config, - (GPTQConfig, - GPTQMarlinConfig)) and not quant_config.autoround_version: - return None - return quant_config + self.gate = ReplicatedLinear(config.hidden_size, + config.num_experts, + bias=False, + quant_config=quant_config, + prefix=f"{prefix}.gate") def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: assert hidden_states.dim( @@ -699,4 +683,4 @@ def load_weights(self, weights: Iterable[tuple[str, return loader.load_weights(weights) def get_expert_mapping(self) -> list[tuple[str, str, int, str]]: - return self.model.get_expert_mapping() \ No newline at end of file + return self.model.get_expert_mapping() diff --git a/vllm/model_executor/models/qwen3_next.py b/vllm/model_executor/models/qwen3_next.py index 356b5001a7dc..dc3153fcc826 100644 --- a/vllm/model_executor/models/qwen3_next.py +++ b/vllm/model_executor/models/qwen3_next.py @@ -41,9 +41,6 @@ from vllm.model_executor.layers.mamba.ops.causal_conv1d import ( causal_conv1d_fn, causal_conv1d_update) from vllm.model_executor.layers.quantization import QuantizationConfig -from vllm.model_executor.layers.quantization.gptq import GPTQConfig -from vllm.model_executor.layers.quantization.gptq_marlin import ( - GPTQMarlinConfig) from vllm.model_executor.layers.rotary_embedding import get_rope from vllm.model_executor.layers.vocab_parallel_embedding import ( DEFAULT_VOCAB_PADDING_SIZE, ParallelLMHead, VocabParallelEmbedding) @@ -119,12 +116,11 @@ def __init__( enable_eplb=self.enable_eplb, num_redundant_experts=self.n_redundant_experts) - self.gate = ReplicatedLinear( - config.hidden_size, - config.num_experts, - bias=False, - quant_config=self._maybe_ignore_quant_config(quant_config), - prefix=f"{prefix}.gate") + self.gate = ReplicatedLinear(config.hidden_size, + config.num_experts, + bias=False, + quant_config=quant_config, + prefix=f"{prefix}.gate") if config.shared_expert_intermediate_size > 0: self.shared_expert = Qwen3NextMLP( @@ -142,16 +138,6 @@ def __init__( 1, bias=False) - def _maybe_ignore_quant_config(self, quant_config: QuantizationConfig): - # GPTQ configs do not have a list of ignored modules, however AutoGPTQ - # seems to avoid gate quantization while AutoRound does. - if isinstance( - quant_config, - (GPTQConfig, - GPTQMarlinConfig)) and not quant_config.autoround_version: - return None - return quant_config - def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: # NOTE: hidden_states can have either 1D or 2D shape. orig_shape = hidden_states.shape diff --git a/vllm/model_executor/models/qwen3_vl.py b/vllm/model_executor/models/qwen3_vl.py index ede477cde1a2..f3f11438eeee 100644 --- a/vllm/model_executor/models/qwen3_vl.py +++ b/vllm/model_executor/models/qwen3_vl.py @@ -50,9 +50,6 @@ RowParallelLinear) from vllm.model_executor.layers.logits_processor import LogitsProcessor from vllm.model_executor.layers.quantization import QuantizationConfig -from vllm.model_executor.layers.quantization.gptq import GPTQConfig -from vllm.model_executor.layers.quantization.gptq_marlin import ( - GPTQMarlinConfig) from vllm.model_executor.layers.vocab_parallel_embedding import ParallelLMHead from vllm.model_executor.model_loader.weight_utils import default_weight_loader from vllm.model_executor.models.module_mapping import MultiModelKeys @@ -1058,7 +1055,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = "model"): self.visual = Qwen3_VisionTransformer( config.vision_config, norm_eps=getattr(config, "rms_norm_eps", 1e-6), - quant_config=self._maybe_ignore_quant_config(quant_config), + quant_config=quant_config, prefix=maybe_prefix(prefix, "visual"), use_data_parallel=self.use_data_parallel, ) @@ -1116,13 +1113,6 @@ def _clear_deepstack_input_embeds(self, num_tokens: int) -> None: for idx in range(self.deepstack_num_level): self.deepstack_input_embeds[idx][:num_tokens].zero_() - def _maybe_ignore_quant_config(self, quant_config: QuantizationConfig): - # GPTQ configs do not have a list of ignored modules, however AutoGPTQ - # seems to avoid vision encoder sections for some models. - if isinstance(quant_config, (GPTQConfig, GPTQMarlinConfig)): - return None - return quant_config - def _validate_and_reshape_mm_tensor(self, mm_input: object, name: str) -> torch.Tensor: if not isinstance(mm_input, (torch.Tensor, list)): diff --git a/vllm/model_executor/models/qwen3_vl_moe.py b/vllm/model_executor/models/qwen3_vl_moe.py index 7912cf3ea52b..52ea652b3765 100644 --- a/vllm/model_executor/models/qwen3_vl_moe.py +++ b/vllm/model_executor/models/qwen3_vl_moe.py @@ -322,7 +322,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): self.visual = Qwen3_VisionTransformer( config.vision_config, norm_eps=getattr(config, "rms_norm_eps", 1e-6), - quant_config=self._maybe_ignore_quant_config(quant_config), + quant_config=quant_config, prefix=maybe_prefix(prefix, "visual"), use_data_parallel=self.use_data_parallel, ) diff --git a/vllm/transformers_utils/config.py b/vllm/transformers_utils/config.py index 9eed46678866..b0816cfb0702 100644 --- a/vllm/transformers_utils/config.py +++ b/vllm/transformers_utils/config.py @@ -4,6 +4,7 @@ import json import os import time +from dataclasses import asdict from functools import cache, partial from pathlib import Path from typing import Any, Callable, Literal, Optional, TypeVar, Union @@ -27,7 +28,8 @@ from vllm import envs from vllm.logger import init_logger from vllm.transformers_utils.config_parser_base import ConfigParserBase -from vllm.transformers_utils.utils import check_gguf_file +from vllm.transformers_utils.utils import (check_gguf_file, + parse_safetensors_file_metadata) if envs.VLLM_USE_MODELSCOPE: from modelscope import AutoConfig @@ -999,6 +1001,34 @@ def try_get_tokenizer_config( return None +def get_safetensors_params_metadata( + model: str, + *, + revision: Optional[str] = None, +) -> dict[str, Any]: + """ + Get the safetensors metadata for remote model repository. + """ + full_metadata = {} + if (model_path := Path(model)).exists(): + safetensors_to_check = model_path.glob("*.safetensors") + full_metadata = { + param_name: info + for file_path in safetensors_to_check if file_path.is_file() + for param_name, info in parse_safetensors_file_metadata( + file_path).items() + } + else: + repo_mt = try_get_safetensors_metadata(model, revision=revision) + if repo_mt and (files_mt := repo_mt.files_metadata): + full_metadata = { + param_name: asdict(info) + for file_mt in files_mt.values() + for param_name, info in file_mt.tensors.items() + } + return full_metadata + + def _download_mistral_config_file(model, revision) -> dict: config_file_name = "params.json" config_dict = get_hf_file_to_dict(config_file_name, model, revision) diff --git a/vllm/transformers_utils/utils.py b/vllm/transformers_utils/utils.py index 66c8fb797adc..2aaad8f949d0 100644 --- a/vllm/transformers_utils/utils.py +++ b/vllm/transformers_utils/utils.py @@ -2,10 +2,11 @@ # SPDX-FileCopyrightText: Copyright contributors to the vLLM project import json +import struct from functools import cache from os import PathLike from pathlib import Path -from typing import Optional, Union +from typing import Any, Optional, Union from vllm.envs import VLLM_MODEL_REDIRECT_PATH from vllm.logger import init_logger @@ -97,3 +98,11 @@ def maybe_model_redirect(model: str) -> str: return redirect_model return model + + +def parse_safetensors_file_metadata( + path: Union[str, PathLike]) -> dict[str, Any]: + with open(path, "rb") as f: + length_of_metadata = struct.unpack(' Date: Fri, 26 Sep 2025 12:22:49 -0400 Subject: [PATCH 422/518] [BugFix] Fix using `dbo_decode_token_threshold` always (and ignoring `dbo_prefill_token_threshold`) (#25622) Signed-off-by: Lucas Wilkinson --- vllm/v1/worker/gpu_model_runner.py | 9 +++++++-- vllm/v1/worker/ubatch_splitting.py | 3 ++- 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index cbf439aa697b..f199dbd991f4 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -1045,11 +1045,15 @@ def _prepare_inputs( num_tokens_unpadded = scheduler_output.total_num_scheduled_tokens num_tokens_padded = num_tokens_unpadded + self.get_local_padding( num_tokens_unpadded) + uniform_decode = \ + (max_num_scheduled_tokens == self.uniform_decode_query_len) and \ + (total_num_scheduled_tokens == num_reqs * max_num_scheduled_tokens) ubatch_slices, num_tokens_after_padding = \ ubatch_split(num_scheduled_tokens, num_tokens_unpadded, num_tokens_padded, - self.vllm_config) + uniform_decode=uniform_decode, + vllm_config=self.vllm_config) self.seq_lens.np[:num_reqs] = ( self.input_batch.num_computed_tokens_cpu[:num_reqs] + @@ -2989,7 +2993,8 @@ def _dummy_run( num_scheduled_tokens, total_num_scheduled_tokens, total_num_scheduled_tokens, - self.vllm_config, + uniform_decode=uniform_decode, + vllm_config=self.vllm_config, ) # If we failed to microbatch, currently need to resynchronize diff --git a/vllm/v1/worker/ubatch_splitting.py b/vllm/v1/worker/ubatch_splitting.py index 30acb14ff58a..7767750aa604 100644 --- a/vllm/v1/worker/ubatch_splitting.py +++ b/vllm/v1/worker/ubatch_splitting.py @@ -139,6 +139,7 @@ def ubatch_split( num_scheduled_tokens_per_request: np.ndarray, num_tokens_unpadded: int, num_tokens_padded: int, + uniform_decode: bool, vllm_config: VllmConfig, ) -> tuple[Optional[UBatchSlices], Optional[torch.Tensor]]: """ @@ -164,7 +165,7 @@ def ubatch_split( should_attempt_ubatching = check_ubatch_thresholds( parallel_config, num_tokens_unpadded, - vllm_config, + uniform_decode=uniform_decode, ) # Don't microbatch unless every other DP worker is also microbatching From 8d52f2b3a7b75c9efe9eba906ab37780f6e4e5f3 Mon Sep 17 00:00:00 2001 From: Seiji Eicher <58963096+eicherseiji@users.noreply.github.com> Date: Fri, 26 Sep 2025 09:43:30 -0700 Subject: [PATCH 423/518] [ray][metrics] Replace ':' with '_' for OpenTelemetry compatibility in Ray (#25439) Signed-off-by: Seiji Eicher Signed-off-by: Seiji Eicher <58963096+eicherseiji@users.noreply.github.com> Co-authored-by: Rui Qiao <161574667+ruisearch42@users.noreply.github.com> --- tests/v1/metrics/test_ray_metrics.py | 39 +++++++++++++++++++++++++++- vllm/v1/metrics/ray_wrappers.py | 19 ++++++++++++++ 2 files changed, 57 insertions(+), 1 deletion(-) diff --git a/tests/v1/metrics/test_ray_metrics.py b/tests/v1/metrics/test_ray_metrics.py index 92f6c6f0e89c..0c9f83f049e4 100644 --- a/tests/v1/metrics/test_ray_metrics.py +++ b/tests/v1/metrics/test_ray_metrics.py @@ -8,7 +8,8 @@ from vllm.config import ModelDType from vllm.sampling_params import SamplingParams from vllm.v1.engine.async_llm import AsyncEngineArgs, AsyncLLM -from vllm.v1.metrics.ray_wrappers import RayPrometheusStatLogger +from vllm.v1.metrics.ray_wrappers import (RayPrometheusMetric, + RayPrometheusStatLogger) @pytest.fixture(scope="function", autouse=True) @@ -65,3 +66,39 @@ async def run(self): # Create the actor and call the async method actor = EngineTestActor.remote() # type: ignore[attr-defined] ray.get(actor.run.remote()) + + +def test_sanitized_opentelemetry_name(): + """Test the metric name sanitization logic for Ray.""" + + # Only a-z, A-Z, 0-9, _, test valid characters are preserved + valid_name = "valid_metric_123_abcDEF" + assert RayPrometheusMetric._get_sanitized_opentelemetry_name( + valid_name) == valid_name + + # Test dash, dot, are replaced + name_with_dash_dot = "metric-name.test" + expected = "metric_name_test" + assert RayPrometheusMetric._get_sanitized_opentelemetry_name( + name_with_dash_dot) == expected + + # Test colon is replaced with underscore + name_with_colon = "metric:name" + expected = "metric_name" + assert RayPrometheusMetric._get_sanitized_opentelemetry_name( + name_with_colon) == expected + + # Test multiple invalid characters are replaced + name_with_invalid = "metric:name@with#special%chars" + expected = "metric_name_with_special_chars" + assert RayPrometheusMetric._get_sanitized_opentelemetry_name( + name_with_invalid) == expected + + # Test mixed valid and invalid characters + complex_name = "vllm:engine_stats/time.latency_ms-99p" + expected = "vllm_engine_stats_time_latency_ms_99p" + assert RayPrometheusMetric._get_sanitized_opentelemetry_name( + complex_name) == expected + + # Test empty string + assert RayPrometheusMetric._get_sanitized_opentelemetry_name("") == "" diff --git a/vllm/v1/metrics/ray_wrappers.py b/vllm/v1/metrics/ray_wrappers.py index ae8f9447e9c8..609185753860 100644 --- a/vllm/v1/metrics/ray_wrappers.py +++ b/vllm/v1/metrics/ray_wrappers.py @@ -11,6 +11,7 @@ from ray.util.metrics import Metric except ImportError: ray_metrics = None +import regex as re class RayPrometheusMetric: @@ -42,6 +43,21 @@ def labels(self, *labels, **labelskwargs): return self + @staticmethod + def _get_sanitized_opentelemetry_name(name: str) -> str: + """ + For compatibility with Ray + OpenTelemetry, the metric name must be + sanitized. In particular, this replaces disallowed character (e.g., ':') + with '_' in the metric name. + Allowed characters: a-z, A-Z, 0-9, _ + + # ruff: noqa: E501 + Ref: https://github.com/open-telemetry/opentelemetry-cpp/blob/main/sdk/src/metrics/instrument_metadata_validator.cc#L22-L23 + Ref: https://github.com/ray-project/ray/blob/master/src/ray/stats/metric.cc#L107 + """ + + return re.sub(r"[^a-zA-Z0-9_]", "_", name) + class RayGaugeWrapper(RayPrometheusMetric): """Wraps around ray.util.metrics.Gauge to provide same API as @@ -58,6 +74,7 @@ def __init__(self, # implemented at the observability layer (Prometheus/Grafana). del multiprocess_mode labelnames_tuple = tuple(labelnames) if labelnames else None + name = self._get_sanitized_opentelemetry_name(name) self.metric = ray_metrics.Gauge(name=name, description=documentation, tag_keys=labelnames_tuple) @@ -79,6 +96,7 @@ def __init__(self, documentation: Optional[str] = "", labelnames: Optional[list[str]] = None): labelnames_tuple = tuple(labelnames) if labelnames else None + name = self._get_sanitized_opentelemetry_name(name) self.metric = ray_metrics.Counter(name=name, description=documentation, tag_keys=labelnames_tuple) @@ -99,6 +117,7 @@ def __init__(self, labelnames: Optional[list[str]] = None, buckets: Optional[list[float]] = None): labelnames_tuple = tuple(labelnames) if labelnames else None + name = self._get_sanitized_opentelemetry_name(name) boundaries = buckets if buckets else [] self.metric = ray_metrics.Histogram(name=name, description=documentation, From 56aafa8c0bb9cce3594d834b996a5352504b985e Mon Sep 17 00:00:00 2001 From: Jiangyun Zhu Date: Sat, 27 Sep 2025 00:56:15 +0800 Subject: [PATCH 424/518] [Misc] fix unique_filepath (#25732) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: zjy0516 Co-authored-by: Luka Govedič --- tests/utils_/test_utils.py | 29 +++++++++++++++++++++-------- vllm/utils/__init__.py | 21 +++++++++++++++++++++ 2 files changed, 42 insertions(+), 8 deletions(-) diff --git a/tests/utils_/test_utils.py b/tests/utils_/test_utils.py index 608f517f6914..658ae7e7451a 100644 --- a/tests/utils_/test_utils.py +++ b/tests/utils_/test_utils.py @@ -23,15 +23,16 @@ from vllm.config import ParallelConfig, VllmConfig, set_current_vllm_config from vllm.transformers_utils.detokenizer_utils import ( convert_ids_list_to_tokens) -from vllm.utils import (CacheInfo, FlexibleArgumentParser, LRUCache, - MemorySnapshot, PlaceholderModule, StoreBoolean, - bind_kv_cache, common_broadcastable_dtype, - current_stream, deprecate_kwargs, get_open_port, - get_tcp_uri, is_lossless_cast, join_host_port, - make_zmq_path, make_zmq_socket, memory_profiling, - merge_async_iterators, sha256, split_host_port, - split_zmq_path, supports_kw, swap_dict_values) +# isort: off +from vllm.utils import ( + CacheInfo, FlexibleArgumentParser, LRUCache, MemorySnapshot, + PlaceholderModule, bind_kv_cache, common_broadcastable_dtype, + current_stream, deprecate_kwargs, get_open_port, get_tcp_uri, + is_lossless_cast, join_host_port, make_zmq_path, make_zmq_socket, + memory_profiling, merge_async_iterators, sha256, split_host_port, + split_zmq_path, supports_kw, swap_dict_values, unique_filepath) +# isort: on from ..utils import create_new_process_for_each_test, error_on_warning @@ -1032,3 +1033,15 @@ def test_load_config_file(tmp_path): # Assert that the processed arguments match the expected output assert processed_args == expected_args os.remove(str(config_file_path)) + + +def test_unique_filepath(): + temp_dir = tempfile.mkdtemp() + path_fn = lambda i: Path(temp_dir) / f"file_{i}.txt" + paths = set() + for i in range(10): + path = unique_filepath(path_fn) + path.write_text("test") + paths.add(path) + assert len(paths) == 10 + assert len(list(Path(temp_dir).glob("*.txt"))) == 10 diff --git a/vllm/utils/__init__.py b/vllm/utils/__init__.py index c502a69ea500..ba280d6dbe4a 100644 --- a/vllm/utils/__init__.py +++ b/vllm/utils/__init__.py @@ -45,6 +45,7 @@ from concurrent.futures.process import ProcessPoolExecutor from dataclasses import dataclass, field from functools import cache, lru_cache, partial, wraps +from pathlib import Path from types import MappingProxyType from typing import (TYPE_CHECKING, Any, Callable, Generic, Literal, NamedTuple, Optional, TextIO, TypeVar, Union, cast, overload) @@ -3536,3 +3537,23 @@ def set_env_var(key, value): del os.environ[key] else: os.environ[key] = old + + +def unique_filepath(fn: Callable[[int], Path]) -> Path: + """ + unique_filepath returns a unique path by trying + to include an integer in increasing order. + + fn should be a callable that returns a path that + includes the passed int at a fixed location. + + Note: This function has a TOCTOU race condition. + Caller should use atomic operations (e.g., open with 'x' mode) + when creating the file to ensure thread safety. + """ + i = 0 + while True: + p = fn(i) + if not p.exists(): + return p + i += 1 From 33f6aaf9725ab3c0b07ed88d8a6113621f9eb4d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=98=BF=E4=B8=B9=28adan=29?= <47373076+LDLINGLINGLING@users.noreply.github.com> Date: Sat, 27 Sep 2025 01:04:57 +0800 Subject: [PATCH 425/518] Eagle3 that supports the Minicpm3 model (#24243) Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> Co-authored-by: liudan Co-authored-by: liudan Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> Co-authored-by: Lucia Fang <116399278+luccafong@users.noreply.github.com> --- vllm/config/speculative.py | 2 +- vllm/model_executor/models/minicpm.py | 51 ++++++++++++++++++++++----- 2 files changed, 44 insertions(+), 9 deletions(-) diff --git a/vllm/config/speculative.py b/vllm/config/speculative.py index 2e4b3d3a6b20..5f462442148f 100644 --- a/vllm/config/speculative.py +++ b/vllm/config/speculative.py @@ -540,7 +540,7 @@ def _verify_args(self) -> Self: "speculative decoding is > 1, but got " f"{self.disable_by_batch_size=}") - eagle3_target_supported = ["llama", "qwen", "gpt_oss"] + eagle3_target_supported = ["llama", "qwen", "minicpm", "gpt_oss"] if self.method == "eagle3" and self.target_model_config and not any( supported_model in self.target_model_config.hf_text_config.model_type diff --git a/vllm/model_executor/models/minicpm.py b/vllm/model_executor/models/minicpm.py index 0986ea07406a..55fe3e2ae3ae 100644 --- a/vllm/model_executor/models/minicpm.py +++ b/vllm/model_executor/models/minicpm.py @@ -55,7 +55,7 @@ from vllm.platforms import current_platform from vllm.sequence import IntermediateTensors -from .interfaces import SupportsLoRA, SupportsPP +from .interfaces import SupportsEagle3, SupportsLoRA, SupportsPP from .utils import (AutoWeightsLoader, is_pp_missing_parameter, make_empty_intermediate_tensors_factory, make_layers, maybe_prefix) @@ -381,6 +381,9 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): self.num_experts = getattr(self.config, "num_experts", 0) self._init_layers(prefix, config, cache_config, quant_config) self.norm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps) + + self.aux_hidden_state_layers = tuple[int, ...]() + self.make_empty_intermediate_tensors = ( make_empty_intermediate_tensors_factory( ["hidden_states", "residual"], self.config.hidden_size)) @@ -408,7 +411,8 @@ def forward( positions: torch.Tensor, intermediate_tensors: Optional[IntermediateTensors] = None, inputs_embeds: Optional[torch.Tensor] = None, - ) -> Union[torch.Tensor, IntermediateTensors]: + ) -> Union[torch.Tensor, IntermediateTensors, tuple[torch.Tensor, + list[torch.Tensor]]]: if get_pp_group().is_first_rank: if inputs_embeds is not None: hidden_states = inputs_embeds @@ -419,18 +423,29 @@ def forward( hidden_states = intermediate_tensors["hidden_states"] residual = intermediate_tensors["residual"] - for layer in islice(self.layers, self.start_layer, self.end_layer): + aux_hidden_states = [] + for idx, layer in enumerate( + islice(self.layers, self.start_layer, self.end_layer)): + if idx in self.aux_hidden_state_layers: + aux_hidden_states.append( + hidden_states + + residual if residual is not None else hidden_states) hidden_states, residual = layer( positions, hidden_states, residual, ) + if not get_pp_group().is_last_rank: return IntermediateTensors({ "hidden_states": hidden_states, "residual": residual }) + hidden_states = self.norm(hidden_states) + + if len(aux_hidden_states) > 0: + return hidden_states, aux_hidden_states return hidden_states def load_weights(self, weights: Iterable[tuple[str, @@ -502,7 +517,7 @@ def load_weights(self, weights: Iterable[tuple[str, return loaded_params -class MiniCPMForCausalLM(nn.Module, SupportsLoRA, SupportsPP): +class MiniCPMForCausalLM(nn.Module, SupportsLoRA, SupportsPP, SupportsEagle3): packed_modules_mapping = { "qkv_proj": [ "q_proj", @@ -568,16 +583,36 @@ def _init_model(self, *, vllm_config: VllmConfig, prefix: str = ""): def get_input_embeddings(self, input_ids: torch.Tensor) -> torch.Tensor: return self.model.get_input_embeddings(input_ids) + def set_aux_hidden_state_layers(self, layers: tuple[int, ...]) -> None: + self.model.aux_hidden_state_layers = layers + + def get_eagle3_aux_hidden_state_layers(self) -> tuple[int, ...]: + num_layers = len(self.model.layers) + return (2, num_layers // 2, num_layers - 3) + def forward( self, input_ids: torch.Tensor, positions: torch.Tensor, intermediate_tensors: Optional[IntermediateTensors] = None, inputs_embeds: Optional[torch.Tensor] = None, - ) -> Union[torch.Tensor, IntermediateTensors]: - hidden_states = self.model(input_ids, positions, intermediate_tensors, - inputs_embeds) / self.scale_width - return hidden_states + ) -> Union[torch.Tensor, IntermediateTensors, tuple[torch.Tensor, + list[torch.Tensor]]]: + model_output = self.model(input_ids, positions, intermediate_tensors, + inputs_embeds) + + if isinstance(model_output, tuple) and len(model_output) == 2: + # Aux hidden states are present. + hidden_states, aux_hidden_states = model_output + hidden_states = hidden_states / self.scale_width + return hidden_states, aux_hidden_states + else: + # Only hidden states or IntermediateTensors + if isinstance(model_output, IntermediateTensors): + return model_output + else: + hidden_states = model_output / self.scale_width + return hidden_states def compute_logits( self, From b761df963c2032144468a99bcb39a11e73e16ca4 Mon Sep 17 00:00:00 2001 From: Clouddude Date: Fri, 26 Sep 2025 13:26:33 -0400 Subject: [PATCH 426/518] [Doc]: improve CPU(x86) build-wheel-from-source section (#25617) Signed-off-by: Kosseila (CloudThrill) --- .../installation/cpu/x86.inc.md | 77 ++++++++++++++++++- 1 file changed, 75 insertions(+), 2 deletions(-) diff --git a/docs/getting_started/installation/cpu/x86.inc.md b/docs/getting_started/installation/cpu/x86.inc.md index 836da33f6531..00f3b726b1a0 100644 --- a/docs/getting_started/installation/cpu/x86.inc.md +++ b/docs/getting_started/installation/cpu/x86.inc.md @@ -20,7 +20,80 @@ vLLM supports basic model inferencing and serving on x86 CPU platform, with data # --8<-- [end:pre-built-wheels] # --8<-- [start:build-wheel-from-source] ---8<-- "docs/getting_started/installation/cpu/build.inc.md" +Install recommended compiler. We recommend to use `gcc/g++ >= 12.3.0` as the default compiler to avoid potential problems. For example, on Ubuntu 22.4, you can run: + +```bash +sudo apt-get update -y +sudo apt-get install -y gcc-12 g++-12 libnuma-dev python3-dev +sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-12 10 --slave /usr/bin/g++ g++ /usr/bin/g++-12 +``` + +Clone the vLLM project: + +```bash +git clone https://github.com/vllm-project/vllm.git vllm_source +cd vllm_source +``` + +Install the required dependencies: + +```bash +uv pip install -r requirements/cpu-build.txt --torch-backend cpu +uv pip install -r requirements/cpu.txt --torch-backend cpu +``` + +??? console "pip" + ```bash + pip install --upgrade pip + pip install -v -r requirements/cpu-build.txt --extra-index-url https://download.pytorch.org/whl/cpu + pip install -v -r requirements/cpu.txt --extra-index-url https://download.pytorch.org/whl/cpu + ``` + +Build and install vLLM: + +```bash +VLLM_TARGET_DEVICE=cpu uv pip install . --no-build-isolation +``` + +If you want to develop vLLM, install it in editable mode instead. + +```bash +VLLM_TARGET_DEVICE=cpu uv pip install -e . --no-build-isolation +``` + +Optionally, build a portable wheel which you can then install elsewhere: + +```bash +VLLM_TARGET_DEVICE=cpu uv build --wheel +``` + +```bash +uv pip install dist/*.whl +``` + +??? console "pip" + ```bash + VLLM_TARGET_DEVICE=cpu python -m build --wheel --no-isolation + ``` + + ```bash + pip install dist/*.whl + ``` + +!!! example "Troubleshooting" + - **NumPy ≥2.0 error**: Downgrade using `pip install "numpy<2.0"`. + - **CMake picks up CUDA**: Add `CMAKE_DISABLE_FIND_PACKAGE_CUDA=ON` to prevent CUDA detection during CPU builds, even if CUDA is installed. + - `AMD` requies at least 4th gen processors (Zen 4/Genoa) or higher to support [AVX512](https://www.phoronix.com/review/amd-zen4-avx512) to run vLLM on CPU. + - If you receive an error such as: `Could not find a version that satisfies the requirement torch==X.Y.Z+cpu+cpu`, consider updating [pyproject.toml](https://github.com/vllm-project/vllm/blob/main/pyproject.toml) to help pip resolve the dependency. + ```toml title="pyproject.toml" + [build-system] + requires = [ + "cmake>=3.26.1", + ... + "torch==X.Y.Z+cpu" # <------- + ] + ``` + - If you are building vLLM from source and not using the pre-built images, remember to set `LD_PRELOAD="/usr/lib/x86_64-linux-gnu/libtcmalloc_minimal.so.4:$LD_PRELOAD"` on x86 machines before running vLLM. # --8<-- [end:build-wheel-from-source] # --8<-- [start:pre-built-images] @@ -57,4 +130,4 @@ docker run --rm \ # --8<-- [end:build-image-from-source] # --8<-- [start:extra-information] -# --8<-- [end:extra-information] +# --8<-- [end:extra-information] \ No newline at end of file From 11aafd988661235cf8672d2065e5594b33ddec8d Mon Sep 17 00:00:00 2001 From: Frank Wang <41319051+frankwang28@users.noreply.github.com> Date: Fri, 26 Sep 2025 11:54:00 -0700 Subject: [PATCH 427/518] [Bugfix] Improve GLM4 MoE Reasoning Parser's is_reasoning_end Condition (#25355) Signed-off-by: frankwang28 Signed-off-by: Frank Wang <41319051+frankwang28@users.noreply.github.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> Co-authored-by: Chauncey --- .../test_glm4_moe_reasoning_parser.py | 203 ++++++++++++++++++ vllm/reasoning/glm4_moe_reasoning_parser.py | 19 +- 2 files changed, 219 insertions(+), 3 deletions(-) create mode 100644 tests/reasoning/test_glm4_moe_reasoning_parser.py diff --git a/tests/reasoning/test_glm4_moe_reasoning_parser.py b/tests/reasoning/test_glm4_moe_reasoning_parser.py new file mode 100644 index 000000000000..4c5ec2c9b408 --- /dev/null +++ b/tests/reasoning/test_glm4_moe_reasoning_parser.py @@ -0,0 +1,203 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project + +import pytest +from transformers import AutoTokenizer + +from tests.reasoning.utils import run_reasoning_extraction +from vllm.reasoning import ReasoningParser, ReasoningParserManager + +parser_name = "glm45" +start_token = "" +end_token = "" + +REASONING_MODEL_NAME = "zai-org/GLM-4.5" + + +@pytest.fixture(scope="module") +def glm45_tokenizer(): + return AutoTokenizer.from_pretrained(REASONING_MODEL_NAME) + + +WITH_THINK = { + "output": "This is a reasoning sectionThis is the rest", + "reasoning_content": "This is a reasoning section", + "content": "This is the rest", + "is_reasoning_end": True, +} + +WITH_THINK_STREAM = { + "output": "This is a reasoning sectionThis is the rest", + "reasoning_content": "This is a reasoning section", + "content": "This is the rest", + "is_reasoning_end": True, +} + +WITHOUT_THINK = { + "output": "This is the rest", + "reasoning_content": None, + "content": "This is the rest", + "is_reasoning_end": False, +} + +WITHOUT_THINK_STREAM = { + "output": "This is the rest", + "reasoning_content": None, + "content": "This is the rest", + "is_reasoning_end": False, +} + +COMPLETE_REASONING = { + "output": "This is a reasoning section", + "reasoning_content": "This is a reasoning section", + "content": None, + "is_reasoning_end": True, +} +MULTILINE_REASONING = { + "output": + "This is a reasoning\nsectionThis is the rest\nThat", + "reasoning_content": "This is a reasoning\nsection", + "content": "This is the rest\nThat", + "is_reasoning_end": True, +} +ONLY_OPEN_TAG = { + "output": "This is a reasoning section", + "reasoning_content": None, + "content": "This is a reasoning section", + "is_reasoning_end": False, +} + +ONLY_OPEN_TAG_STREAM = { + "output": "This is a reasoning section", + "reasoning_content": "This is a reasoning section", + "content": None, + "is_reasoning_end": False, +} + +TEST_CASES = [ + pytest.param( + False, + WITH_THINK, + id="with_think", + ), + pytest.param( + True, + WITH_THINK_STREAM, + id="with_think_stream", + ), + pytest.param( + False, + WITHOUT_THINK, + id="without_think", + ), + pytest.param( + True, + WITHOUT_THINK_STREAM, + id="without_think_stream", + ), + pytest.param( + False, + COMPLETE_REASONING, + id="complete_reasoning", + ), + pytest.param( + True, + COMPLETE_REASONING, + id="complete_reasoning_stream", + ), + pytest.param( + False, + MULTILINE_REASONING, + id="multiline_reasoning", + ), + pytest.param( + True, + MULTILINE_REASONING, + id="multiline_reasoning_stream", + ), + pytest.param( + False, + ONLY_OPEN_TAG, + id="only_open_tag", + ), + pytest.param( + True, + ONLY_OPEN_TAG_STREAM, + id="only_open_tag_stream", + ), +] + +STILL_REASONING_PROMPT = """[gMASK]<|system|> +You are a helpful assistant.<|user|> +What is the capital of France?<|assistant|> +The user is asking for the capital of""" + +DONE_REASONING_PROMPT = """[gMASK]<|system|> +You are a helpful assistant.<|user|> +What is the capital of France?<|assistant|> +The user is asking for the capital of France. +The capital of France is Paris.""" + +MULTI_TURN_STILL_REASONING_PROMPT = """[gMASK]<|system|> +You are a helpful assistant.<|user|> +What is the capital of France?<|assistant|> + +The capital of France is Paris.<|user|> +What about Chile?<|assistant|> +The user is asking for the capital of""" + +MULTI_TURN_DONE_REASONING_PROMPT = """[gMASK]<|system|> +You are a helpful assistant.<|user|> +What is the capital of France?<|assistant|> + +The capital of France is Paris.<|user|> +What about Chile?<|assistant|> +The user is asking for the capital of Chile. +The capital of Chile is Santiago.""" + +REASONING_END_TEST_CASES = [ + pytest.param(STILL_REASONING_PROMPT, False, id="still_reasoning"), + pytest.param(DONE_REASONING_PROMPT, True, id="done_reasoning"), + pytest.param(MULTI_TURN_STILL_REASONING_PROMPT, + False, + id="multi_turn_still_reasoning"), + pytest.param(MULTI_TURN_DONE_REASONING_PROMPT, + True, + id="multi_turn_done_reasoning") +] + + +@pytest.mark.parametrize("streaming, param_dict", TEST_CASES) +def test_reasoning( + streaming: bool, + param_dict: dict, + glm45_tokenizer, +): + output = glm45_tokenizer.tokenize(param_dict["output"]) + output_tokens: list[str] = [ + glm45_tokenizer.convert_tokens_to_string([token]) for token in output + ] + parser: ReasoningParser = ReasoningParserManager.get_reasoning_parser( + parser_name)(glm45_tokenizer) + + reasoning, content = run_reasoning_extraction(parser, + output_tokens, + streaming=streaming) + + assert reasoning == param_dict["reasoning_content"] + assert content == param_dict["content"] + + output_ids = glm45_tokenizer.convert_tokens_to_ids(output) + is_reasoning_end = parser.is_reasoning_end(output_ids) + assert is_reasoning_end == param_dict["is_reasoning_end"] + + +@pytest.mark.parametrize("prompt, is_reasoning_end", REASONING_END_TEST_CASES) +def test_is_reasoning_end_full_prompt(prompt: str, is_reasoning_end: bool, + glm45_tokenizer): + parser: ReasoningParser = ReasoningParserManager.get_reasoning_parser( + parser_name)(glm45_tokenizer) + tokens = glm45_tokenizer.tokenize(prompt) + token_ids = glm45_tokenizer.convert_tokens_to_ids(tokens) + check_is_reasoning_end = parser.is_reasoning_end(token_ids) + assert check_is_reasoning_end == is_reasoning_end diff --git a/vllm/reasoning/glm4_moe_reasoning_parser.py b/vllm/reasoning/glm4_moe_reasoning_parser.py index 11e828a7039f..8d7488afce68 100644 --- a/vllm/reasoning/glm4_moe_reasoning_parser.py +++ b/vllm/reasoning/glm4_moe_reasoning_parser.py @@ -30,6 +30,7 @@ def __init__(self, tokenizer: PreTrainedTokenizerBase, *args, **kwargs): super().__init__(tokenizer, *args, **kwargs) self.think_start_token = "" self.think_end_token = "" + self.assistant_token = "<|assistant|>" if not self.model_tokenizer: raise ValueError( @@ -38,14 +39,26 @@ def __init__(self, tokenizer: PreTrainedTokenizerBase, *args, **kwargs): self.think_start_token_id = self.vocab.get(self.think_start_token) self.think_end_token_id = self.vocab.get(self.think_end_token) + self.assistant_token_id = self.vocab.get(self.assistant_token) if (self.think_start_token_id is None - or self.think_end_token_id is None): + or self.think_end_token_id is None + or self.assistant_token_id is None): raise RuntimeError( "Glm4MoeModel reasoning parser could not locate " - "think start/end tokens in the tokenizer!") + "think start/end or assistant tokens in the tokenizer!") def is_reasoning_end(self, input_ids: list[int]) -> bool: - return self.think_end_token_id in input_ids + """ + GLM's chat template has tokens after every + <|assistant|> token. Thus, we need to check if is + after the most recent <|assistant|> token (if present). + """ + for token_id in input_ids[::-1]: + if token_id == self.think_end_token_id: + return True + elif token_id == self.assistant_token_id: + return False + return False def extract_content_ids(self, input_ids: list[int]) -> list[int]: """ From 0002b7f0d1308b767a8a5b20e361ed975a0c7576 Mon Sep 17 00:00:00 2001 From: Michael Goin Date: Fri, 26 Sep 2025 15:00:46 -0400 Subject: [PATCH 428/518] [Docs] Add Toronto Meetup (#25773) Signed-off-by: mgoin --- README.md | 1 + docs/community/meetups.md | 1 + 2 files changed, 2 insertions(+) diff --git a/README.md b/README.md index 0c6e5aa6b31d..6772a9eae073 100644 --- a/README.md +++ b/README.md @@ -21,6 +21,7 @@ Join us at the [PyTorch Conference, October 22-23](https://events.linuxfoundatio *Latest News* 🔥 +- [2025/09] We hosted [vLLM Toronto Meetup](https://luma.com/e80e0ymm) focused on tackling inference at scale and speculative decoding with speakers from NVIDIA and Red Hat! Please find the meetup slides [here](https://docs.google.com/presentation/d/1IYJYmJcu9fLpID5N5RbW_vO0XLo0CGOR14IXOjB61V8/edit?usp=sharing). - [2025/08] We hosted [vLLM Shenzhen Meetup](https://mp.weixin.qq.com/s/k8ZBO1u2_2odgiKWH_GVTQ) focusing on the ecosystem around vLLM! Please find the meetup slides [here](https://drive.google.com/drive/folders/1Ua2SVKVSu-wp5vou_6ElraDt2bnKhiEA). - [2025/08] We hosted [vLLM Singapore Meetup](https://www.sginnovate.com/event/vllm-sg-meet). We shared V1 updates, disaggregated serving and MLLM speedups with speakers from Embedded LLM, AMD, WekaIO, and A*STAR. Please find the meetup slides [here](https://drive.google.com/drive/folders/1ncf3GyqLdqFaB6IeB834E5TZJPLAOiXZ?usp=sharing). - [2025/08] We hosted [vLLM Shanghai Meetup](https://mp.weixin.qq.com/s/pDmAXHcN7Iqc8sUKgJgGtg) focusing on building, developing, and integrating with vLLM! Please find the meetup slides [here](https://drive.google.com/drive/folders/1OvLx39wnCGy_WKq8SiVKf7YcxxYI3WCH). diff --git a/docs/community/meetups.md b/docs/community/meetups.md index a3004249b758..e821e2ac8114 100644 --- a/docs/community/meetups.md +++ b/docs/community/meetups.md @@ -2,6 +2,7 @@ We host regular meetups in San Francisco Bay Area every 2 months. We will share the project updates from the vLLM team and have guest speakers from the industry to share their experience and insights. Please find the materials of our previous meetups below: +- [vLLM Toronto Meetup](https://luma.com/e80e0ymm), September 25th 2025. [[Slides]](https://docs.google.com/presentation/d/1IYJYmJcu9fLpID5N5RbW_vO0XLo0CGOR14IXOjB61V8/edit?usp=sharing) - [vLLM Shenzhen Meetup](https://mp.weixin.qq.com/s/k8ZBO1u2_2odgiKWH_GVTQ), August 30th 2025. [[Slides]](https://drive.google.com/drive/folders/1Ua2SVKVSu-wp5vou_6ElraDt2bnKhiEA) - [vLLM Singapore Meetup](https://www.sginnovate.com/event/vllm-sg-meet), August 27th 2025. [[Slides]](https://drive.google.com/drive/folders/1ncf3GyqLdqFaB6IeB834E5TZJPLAOiXZ?usp=sharing) - [vLLM Shanghai Meetup](https://mp.weixin.qq.com/s/pDmAXHcN7Iqc8sUKgJgGtg), August 23rd 2025. [[Slides]](https://drive.google.com/drive/folders/1OvLx39wnCGy_WKq8SiVKf7YcxxYI3WCH) From f708bd4904ee15bdf9e86503439f2408aa754cda Mon Sep 17 00:00:00 2001 From: Michael Goin Date: Fri, 26 Sep 2025 15:23:00 -0400 Subject: [PATCH 429/518] [CI] Add E2E Blackwell Quantized MoE Test (#25723) Signed-off-by: mgoin --- .buildkite/test-pipeline.yaml | 19 ++- tests/quantization/test_blackwell_moe.py | 132 ++++++++++++++++++ tests/utils.py | 4 +- .../layers/fused_moe/flashinfer_trtllm_moe.py | 2 + 4 files changed, 155 insertions(+), 2 deletions(-) create mode 100644 tests/quantization/test_blackwell_moe.py diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index 0914c899aa5b..c178fd372bcb 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -522,7 +522,7 @@ steps: # https://github.com/pytorch/ao/issues/2919, we'll have to skip new torchao tests for now # we can only upgrade after this is resolved - pip install --pre torchao==0.13.0.dev20250814 --index-url https://download.pytorch.org/whl/nightly/cu128 - - VLLM_TEST_FORCE_LOAD_FORMAT=auto pytest -v -s quantization + - VLLM_TEST_FORCE_LOAD_FORMAT=auto pytest -v -s quantization/ - label: LM Eval Small Models # 53min timeout_in_minutes: 75 @@ -830,6 +830,23 @@ steps: - uv pip install --system 'gpt-oss[eval]==0.0.5' - pytest -s -v tests/evals/gpt_oss/test_gpqa_correctness.py --model openai/gpt-oss-20b --metric 0.58 --server-args '--tensor-parallel-size 2' +- label: Blackwell Quantized MoE Test + timeout_in_minutes: 60 + working_dir: "/vllm-workspace/" + gpu: b200 + source_file_dependencies: + - tests/quantization/test_blackwell_moe.py + - vllm/model_executor/models/deepseek_v2.py + - vllm/model_executor/models/gpt_oss.py + - vllm/model_executor/models/llama4.py + - vllm/model_executor/layers/fused_moe + - vllm/model_executor/layers/quantization/compressed_tensors + - vllm/model_executor/layers/quantization/modelopt.py + - vllm/model_executor/layers/quantization/mxfp4.py + - vllm/v1/attention/backends/flashinfer.py + commands: + - pytest -s -v tests/quantization/test_blackwell_moe.py + ##### 1 GPU test ##### ##### multi gpus test ##### diff --git a/tests/quantization/test_blackwell_moe.py b/tests/quantization/test_blackwell_moe.py new file mode 100644 index 000000000000..c021126720af --- /dev/null +++ b/tests/quantization/test_blackwell_moe.py @@ -0,0 +1,132 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project + +import json +import os + +import pytest + +from tests.utils import RemoteOpenAIServer +from vllm.platforms import current_platform + +if not current_platform.is_device_capability(100): + pytest.skip("This test only runs on Blackwell GPUs (SM100).", + allow_module_level=True) + +os.environ["FLASHINFER_NVCC_THREADS"] = "16" + +# dummy_hf_overrides = {"num_layers": 4, "num_hidden_layers": 4, +# "text_config": {"num_layers": 4, "num_hidden_layers": 4}} +dummy_hf_overrides = {"num_layers": 4, "num_hidden_layers": 4} + + +def can_initialize(model: str, extra_args: list[str]): + + # Server arguments + server_args = [ + "--max-model-len", + "2048", + "--max-num-batched-tokens", + "256", + "--load-format", + "dummy", + "--trust-remote-code", + "--limit-mm-per-prompt", + json.dumps({"image": 0}), + *extra_args, + ] + + # Launch server and make a simple request + with RemoteOpenAIServer( + model, + server_args, + max_wait_seconds=1000, # Due to FlashInfer compile + override_hf_configs=dummy_hf_overrides) as server: + client = server.get_client() + # Make a simple request to verify the server works + completion = client.completions.create( + model=model, + prompt=["Hello, World!"], + temperature=0, + max_tokens=2, + ) + print(completion) + assert completion.choices[0].text is not None + + +## Llama4 ## + + +@pytest.mark.skip(reason=( + "RuntimeError: run_moe() Expected a value of type " + "'Optional[List[Tensor]]' for argument '_9' but instead found type " + "'list'.")) +def test_llama4_fp8_tensor_moe_flashinfer_cutlass( + monkeypatch: pytest.MonkeyPatch): + monkeypatch.setenv("VLLM_USE_FLASHINFER_MOE_FP8", "1") + monkeypatch.setenv("VLLM_FLASHINFER_MOE_BACKEND", "throughput") + can_initialize("nvidia/Llama-4-Scout-17B-16E-Instruct-FP8", []) + + +@pytest.mark.skip(reason="Works, but takes too long to run") +def test_llama4_fp8_tensor_moe_flashinfer_trtllm( + monkeypatch: pytest.MonkeyPatch): + monkeypatch.setenv("VLLM_USE_FLASHINFER_MOE_FP8", "1") + monkeypatch.setenv("VLLM_FLASHINFER_MOE_BACKEND", "latency") + can_initialize("nvidia/Llama-4-Scout-17B-16E-Instruct-FP8", []) + + +@pytest.mark.skip(reason="Works, but takes too long to run") +def test_llama4_nvfp4_moe_flashinfer_cutlass(monkeypatch: pytest.MonkeyPatch): + monkeypatch.setenv("VLLM_USE_FLASHINFER_MOE_FP4", "1") + monkeypatch.setenv("VLLM_FLASHINFER_MOE_BACKEND", "throughput") + can_initialize("nvidia/Llama-4-Scout-17B-16E-Instruct-FP4", []) + + +@pytest.mark.skip(reason="RuntimeError: No kernel found for the given options") +def test_llama4_nvfp4_moe_flashinfer_trtllm(monkeypatch: pytest.MonkeyPatch): + monkeypatch.setenv("VLLM_USE_FLASHINFER_MOE_FP4", "1") + monkeypatch.setenv("VLLM_FLASHINFER_MOE_BACKEND", "latency") + can_initialize("nvidia/Llama-4-Scout-17B-16E-Instruct-FP4", []) + + +## DeepSeekV3 ## + + +def test_deepseek_fp8_block_moe_deep_gemm(monkeypatch: pytest.MonkeyPatch): + monkeypatch.setenv("VLLM_USE_DEEP_GEMM", "1") + can_initialize("deepseek-ai/DeepSeek-V3.1", []) + + +def test_deepseek_nvfp4_moe_flashinfer_cutlass( + monkeypatch: pytest.MonkeyPatch): + monkeypatch.setenv("VLLM_USE_FLASHINFER_MOE_FP4", "1") + monkeypatch.setenv("VLLM_FLASHINFER_MOE_BACKEND", "throughput") + can_initialize("nvidia/DeepSeek-R1-0528-FP4-v2", []) + + +@pytest.mark.skip(reason="RuntimeError: No kernel found for the given options") +def test_deepseek_nvfp4_moe_flashinfer_trtllm(monkeypatch: pytest.MonkeyPatch): + monkeypatch.setenv("VLLM_USE_FLASHINFER_MOE_FP4", "1") + monkeypatch.setenv("VLLM_FLASHINFER_MOE_BACKEND", "latency") + can_initialize("nvidia/DeepSeek-R1-0528-FP4-v2", []) + + +## GPT-OSS ## + + +def test_gptoss_mxfp4bf16_moe_flashinfer(monkeypatch: pytest.MonkeyPatch): + monkeypatch.setenv("VLLM_USE_FLASHINFER_MOE_MXFP4_BF16", "1") + can_initialize("openai/gpt-oss-20b", []) + + +def test_gptoss_mxfp4mxfp8_moe_flashinfer_cutlass( + monkeypatch: pytest.MonkeyPatch): + monkeypatch.setenv("VLLM_USE_FLASHINFER_MOE_MXFP4_MXFP8_CUTLASS", "1") + can_initialize("openai/gpt-oss-20b", []) + + +def test_gptoss_mxfp4mxfp8_moe_flashinfer_trtllm( + monkeypatch: pytest.MonkeyPatch): + monkeypatch.setenv("VLLM_USE_FLASHINFER_MOE_MXFP4_MXFP8", "1") + can_initialize("openai/gpt-oss-20b", []) diff --git a/tests/utils.py b/tests/utils.py index f630c57f46d8..ab6ccc7ad9f9 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -91,8 +91,10 @@ def _start_server(self, model: str, vllm_serve_args: list[str], env['VLLM_WORKER_MULTIPROC_METHOD'] = 'spawn' if env_dict is not None: env.update(env_dict) + serve_cmd = ["vllm", "serve", model, *vllm_serve_args] + print(f"Launching RemoteOpenAIServer with: {' '.join(serve_cmd)}") self.proc: subprocess.Popen = subprocess.Popen( - ["vllm", "serve", model, *vllm_serve_args], + serve_cmd, env=env, stdout=sys.stdout, stderr=sys.stderr, diff --git a/vllm/model_executor/layers/fused_moe/flashinfer_trtllm_moe.py b/vllm/model_executor/layers/fused_moe/flashinfer_trtllm_moe.py index fe586a22e250..74bcffd8ca03 100644 --- a/vllm/model_executor/layers/fused_moe/flashinfer_trtllm_moe.py +++ b/vllm/model_executor/layers/fused_moe/flashinfer_trtllm_moe.py @@ -40,6 +40,8 @@ def flashinfer_fused_moe_blockscale_fp8( assert global_num_experts % 4 == 0 assert top_k < (topk_group * global_num_experts / num_expert_group) assert block_shape == [128, 128] + # Routing kernel expects #experts <= #threads 256 + assert global_num_experts <= 256 a_q, a_sf = per_token_group_quant_fp8(x, block_shape[1]) # NOTE: scales of hidden states have to be transposed! From f075693da767a8246274219c284ef687c4fcaf51 Mon Sep 17 00:00:00 2001 From: fhl2000 <63384265+fhl2000@users.noreply.github.com> Date: Sat, 27 Sep 2025 03:58:19 +0800 Subject: [PATCH 430/518] [V1] address post issues related to #20059 (part 1) (#23046) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: fhl2000 <63384265+fhl2000@users.noreply.github.com> Co-authored-by: Luka Govedič --- .../compile/piecewise/test_full_cudagraph.py | 86 +------------ tests/compile/test_config.py | 67 ++++++++++- tests/v1/attention/utils.py | 87 +++++++++++++- tests/v1/cudagraph/test_cudagraph_dispatch.py | 61 ++++------ tests/v1/cudagraph/test_cudagraph_mode.py | 84 ++----------- vllm/compilation/backends.py | 6 +- vllm/compilation/decorators.py | 4 +- ...cewise_backend.py => piecewise_backend.py} | 0 vllm/config/__init__.py | 37 ++++-- vllm/config/compilation.py | 113 ++++++++++++------ vllm/forward_context.py | 3 +- vllm/v1/cudagraph_dispatcher.py | 41 +++---- vllm/v1/worker/gpu_model_runner.py | 47 +++++--- 13 files changed, 346 insertions(+), 290 deletions(-) rename vllm/compilation/{cuda_piecewise_backend.py => piecewise_backend.py} (100%) diff --git a/tests/compile/piecewise/test_full_cudagraph.py b/tests/compile/piecewise/test_full_cudagraph.py index b02c1b565671..9906e49bb110 100644 --- a/tests/compile/piecewise/test_full_cudagraph.py +++ b/tests/compile/piecewise/test_full_cudagraph.py @@ -3,12 +3,11 @@ import contextlib import os import weakref -from dataclasses import dataclass -from typing import Optional import pytest from tests.utils import wait_for_gpu_memory_to_clear +from tests.v1.attention.utils import full_cg_backend_configs as backend_configs from vllm import LLM, SamplingParams from vllm.config import CompilationConfig from vllm.platforms import current_platform @@ -33,89 +32,6 @@ def temporary_environ(env_vars): os.environ[k] = v -@dataclass -class BackendConfig: - name: str - env_vars: dict - comp_config: dict - specific_gpu_arch: Optional[tuple] = None - - -# Define all backend configurations of full cudagraph to be tested -backend_configs = { - # FA3 on Hopper - "FA3": - BackendConfig(name="FA3", - env_vars={ - "VLLM_FLASH_ATTN_VERSION": "3", - "VLLM_FLASH_ATTN_MAX_NUM_SPLITS_FOR_CUDA_GRAPH": "16", - }, - comp_config={ - "cudagraph_mode": "FULL", - }, - specific_gpu_arch=(9, 0)), - # FlashMLA on Hopper - "FlashMLA": - BackendConfig(name="FlashMLA", - env_vars={ - "VLLM_ATTENTION_BACKEND": "FLASHMLA", - }, - comp_config={ - "cudagraph_mode": "FULL_AND_PIECEWISE", - }, - specific_gpu_arch=(9, 0)), - # FlashAttention MLA on Hopper - "FlashAttentionMLA": - BackendConfig(name="FlashAttentionMLA", - env_vars={ - "VLLM_ATTENTION_BACKEND": "FLASH_ATTN_MLA", - "VLLM_FLASH_ATTN_MAX_NUM_SPLITS_FOR_CUDA_GRAPH": "16", - }, - comp_config={ - "cudagraph_mode": "FULL_DECODE_ONLY", - }, - specific_gpu_arch=(9, 0)), - # Cutlass MLA on Blackwell - "CutlassMLA": - BackendConfig( - name="CutlassMLA", - env_vars={ - "VLLM_USE_V1": "1", - "VLLM_ATTENTION_BACKEND": "CUTLASS_MLA", - "FORCE_NUM_KV_SPLITS": - "1", # TODO: remove this when hang issue is fixed - }, - comp_config={ - "cudagraph_mode": "FULL_AND_PIECEWISE", - "cudagraph_capture_sizes": [16, 32, 64, 128, 256, 512], - }, - specific_gpu_arch=(10, 0)), - # FA2 - "FA2": - BackendConfig(name="FA2", - env_vars={ - "VLLM_FLASH_ATTN_VERSION": "2", - "VLLM_FLASH_ATTN_MAX_NUM_SPLITS_FOR_CUDA_GRAPH": "16", - }, - comp_config={ - "cudagraph_mode": "FULL", - }), - # Triton Attention - "TritonAttn": - BackendConfig(name="TritonAttn", - env_vars={"VLLM_ATTENTION_BACKEND": "TRITON_ATTN"}, - comp_config={ - "cudagraph_mode": "FULL", - }), - # FlashInfer - "FlashInfer": - BackendConfig(name="FlashInfer", - env_vars={"VLLM_ATTENTION_BACKEND": "FLASHINFER"}, - comp_config={ - "cudagraph_mode": "FULL_AND_PIECEWISE", - }), -} - test_params_full_cudagraph = [] # deepseek-ai/DeepSeek-V2-Lite with MLA diff --git a/tests/compile/test_config.py b/tests/compile/test_config.py index 7afd6251bbbd..17d3f0b37768 100644 --- a/tests/compile/test_config.py +++ b/tests/compile/test_config.py @@ -4,7 +4,7 @@ import vllm from vllm.compilation.counter import compilation_counter -from vllm.config import CompilationConfig, VllmConfig +from vllm.config import CompilationConfig, CUDAGraphMode, VllmConfig from vllm.utils import _is_torch_equal_or_newer @@ -106,7 +106,6 @@ def test_dynamo_as_is(vllm_runner, monkeypatch): def test_no_compilation(vllm_runner, monkeypatch): # Disable multiprocessing so that the counter is in the same process monkeypatch.setenv('VLLM_ENABLE_V1_MULTIPROCESSING', '0') - with ( compilation_counter.expect(num_graphs_seen=0, dynamo_as_is_count=0), @@ -131,3 +130,67 @@ def test_enforce_eager(vllm_runner, monkeypatch): enforce_eager=True, gpu_memory_utilization=0.4) as _): pass + + +def test_splitting_ops_dynamic(): + # Default config + config = VllmConfig() + assert config.compilation_config.cudagraph_mode == \ + CUDAGraphMode.FULL_AND_PIECEWISE + assert config.compilation_config.splitting_ops_contain_attention() + + # When use_inductor_graph_partition=True + if _is_torch_equal_or_newer('2.9.0.dev'): + # inductor graph partition is only available in PyTorch 2.9+. + # this is a fast config check so we are not using pytest.skip. + config = VllmConfig(compilation_config=CompilationConfig( + use_inductor_graph_partition=True, + splitting_ops=["silly_attention"])) + # should ignore splitting_ops + assert config.compilation_config.splitting_ops == [] + + # When attn_fusion pass enabled. + config = VllmConfig(compilation_config=CompilationConfig( + pass_config={ + "enable_attn_fusion": True, + "enable_noop": True + }, + custom_ops=["+quant_fp8"], + cudagraph_mode=CUDAGraphMode.PIECEWISE, + )) + assert config.compilation_config.splitting_ops == [] + # cudagraph mode also fall back to FULL + assert config.compilation_config.cudagraph_mode == \ + CUDAGraphMode.FULL + + # splitting_ops can not contain attention ops when attn_fusion + # pass enabled. + with pytest.raises(AssertionError): + config = VllmConfig(compilation_config=CompilationConfig( + pass_config={ + "enable_attn_fusion": True, + "enable_noop": True + }, + custom_ops=["+quant_fp8"], + cudagraph_mode=CUDAGraphMode.PIECEWISE, + # work around for accessing all attntion ops + splitting_ops=CompilationConfig()._attention_ops, + )) + + # When both use_inductor_graph_partition and attn_fusion pass enabled. + if _is_torch_equal_or_newer('2.9.0.dev'): + config = VllmConfig(compilation_config=CompilationConfig( + use_inductor_graph_partition=True, + pass_config={ + "enable_attn_fusion": True, + "enable_noop": True + }, + custom_ops=["+quant_fp8"], + cudagraph_mode=CUDAGraphMode.PIECEWISE, + )) + assert config.compilation_config.splitting_ops == [] + # enable_attn_fusion is directly support under + # use_inductor_graph_partition=True, and cudagraph_mode + # is unchanged. + assert config.compilation_config.cudagraph_mode == \ + CUDAGraphMode.PIECEWISE diff --git a/tests/v1/attention/utils.py b/tests/v1/attention/utils.py index 6f8c5ea50ef0..01b5de83a59a 100644 --- a/tests/v1/attention/utils.py +++ b/tests/v1/attention/utils.py @@ -3,7 +3,7 @@ """Utility functions for attention-related v1 tests.""" from dataclasses import dataclass -from typing import Union +from typing import Optional, Union import pytest import torch @@ -260,3 +260,88 @@ def create_dummy_kv_cache(block_size: int, dtype=dtype, device=device) return kv_cache + + +@dataclass +class BackendConfig: + name: str + env_vars: dict + comp_config: dict # compilation config + specific_gpu_arch: Optional[tuple] = None + + +# Define all backend configurations of full cudagraph to be tested +full_cg_backend_configs = { + # FA3 on Hopper + "FA3": + BackendConfig(name="FA3", + env_vars={ + "VLLM_ATTENTION_BACKEND": "FLASH_ATTN", + "VLLM_FLASH_ATTN_VERSION": "3", + "VLLM_FLASH_ATTN_MAX_NUM_SPLITS_FOR_CUDA_GRAPH": "16", + }, + comp_config={ + "cudagraph_mode": "FULL", + }, + specific_gpu_arch=(9, 0)), + # FlashMLA on Hopper + "FlashMLA": + BackendConfig(name="FlashMLA", + env_vars={ + "VLLM_ATTENTION_BACKEND": "FLASHMLA", + }, + comp_config={ + "cudagraph_mode": "FULL_AND_PIECEWISE", + }, + specific_gpu_arch=(9, 0)), + # Cutlass MLA on Blackwell + "CutlassMLA": + BackendConfig( + name="CutlassMLA", + env_vars={ + "VLLM_USE_V1": "1", + "VLLM_ATTENTION_BACKEND": "CUTLASS_MLA", + "FORCE_NUM_KV_SPLITS": + "1", # TODO: remove this when hang issue is fixed + }, + comp_config={ + "cudagraph_mode": "FULL_AND_PIECEWISE", + }, + specific_gpu_arch=(10, 0)), + # FlashAttention MLA on Hopper + "FlashAttentionMLA": + BackendConfig(name="FlashAttentionMLA", + env_vars={ + "VLLM_ATTENTION_BACKEND": "FLASH_ATTN_MLA", + "VLLM_FLASH_ATTN_MAX_NUM_SPLITS_FOR_CUDA_GRAPH": "16", + }, + comp_config={ + "cudagraph_mode": "FULL_DECODE_ONLY", + }, + specific_gpu_arch=(9, 0)), + # FA2 + "FA2": + BackendConfig(name="FA2", + env_vars={ + "VLLM_ATTENTION_BACKEND": "FLASH_ATTN", + "VLLM_FLASH_ATTN_VERSION": "2", + "VLLM_FLASH_ATTN_MAX_NUM_SPLITS_FOR_CUDA_GRAPH": "16", + }, + comp_config={ + "cudagraph_mode": "FULL_AND_PIECEWISE", + }), + # Triton Attention + "TritonAttn": + BackendConfig(name="TritonAttn", + env_vars={"VLLM_ATTENTION_BACKEND": "TRITON_ATTN"}, + comp_config={ + "cudagraph_mode": "FULL_AND_PIECEWISE", + }), + # FlashInfer + "FlashInfer": + BackendConfig(name="FlashInfer", + env_vars={"VLLM_ATTENTION_BACKEND": "FLASHINFER"}, + comp_config={ + "cudagraph_mode": "FULL_AND_PIECEWISE", + }), +} diff --git a/tests/v1/cudagraph/test_cudagraph_dispatch.py b/tests/v1/cudagraph/test_cudagraph_dispatch.py index 64f2fa462802..b6b85e4440d0 100644 --- a/tests/v1/cudagraph/test_cudagraph_dispatch.py +++ b/tests/v1/cudagraph/test_cudagraph_dispatch.py @@ -45,39 +45,22 @@ def _create_vllm_config(compilation_config: CompilationConfig, class TestCudagraphDispatcher: @pytest.mark.parametrize( - "params", + "case_id,cudagraph_mode_str,compilation_level", [ # Test case 0: Full CG for mixed batches, no separate routine - { - "case_id": 0, - "cudagraph_mode": "FULL", - "compilation_level": CompilationLevel.NO_COMPILATION, - }, + (0, "FULL", CompilationLevel.NO_COMPILATION), # Test case 1: Full CG for uniform batches, piecewise for mixed - { - "case_id": 1, - "cudagraph_mode": "FULL_AND_PIECEWISE", - "compilation_level": CompilationLevel.PIECEWISE, - }, + (1, "FULL_AND_PIECEWISE", CompilationLevel.NO_COMPILATION), # Test case 2: Full CG for uniform batches, no CG for mixed - { - "case_id": 2, - "cudagraph_mode": "FULL_DECODE_ONLY", - "compilation_level": CompilationLevel.NO_COMPILATION, - }, + (2, "FULL_DECODE_ONLY", CompilationLevel.NO_COMPILATION), # Test case 3: Piecewise for all - { - "case_id": 3, - "cudagraph_mode": "PIECEWISE", - "compilation_level": CompilationLevel.PIECEWISE, - }, + (3, "PIECEWISE", CompilationLevel.PIECEWISE), ]) - def test_dispatcher(self, params): + def test_dispatcher(self, cudagraph_mode_str, compilation_level): # Setup dispatcher - comp_config = CompilationConfig( - cudagraph_mode=params["cudagraph_mode"], - level=params["compilation_level"], - cudagraph_capture_sizes=[1, 8]) + comp_config = CompilationConfig(cudagraph_mode=cudagraph_mode_str, + level=compilation_level, + cudagraph_capture_sizes=[1, 8]) config = _create_vllm_config(comp_config, max_num_seqs=8) dispatcher = CudagraphDispatcher(config) @@ -86,11 +69,11 @@ def test_dispatcher(self, params): uniform_decode_query_len=1) # Verify the key is initialized correctly - if params["cudagraph_mode"] in ["FULL_AND_PIECEWISE", "PIECEWISE"]: + if cudagraph_mode_str in ["FULL_AND_PIECEWISE", "PIECEWISE"]: assert len(dispatcher.cudagraph_keys[CUDAGraphMode.PIECEWISE]) == 2 else: assert len(dispatcher.cudagraph_keys[CUDAGraphMode.PIECEWISE]) == 0 - if params["cudagraph_mode"] not in ["NONE", "PIECEWISE"]: + if cudagraph_mode_str not in ["NONE", "PIECEWISE"]: assert len(dispatcher.cudagraph_keys[CUDAGraphMode.FULL]) == 2 else: assert len(dispatcher.cudagraph_keys[CUDAGraphMode.FULL]) == 0 @@ -99,10 +82,10 @@ def test_dispatcher(self, params): # 1. non-uniform batch, size in cudagraph size list desc_full_exact = BatchDescriptor(num_tokens=8, uniform_decode=False) rt_mode, key = dispatcher.dispatch(desc_full_exact) - if params["cudagraph_mode"] == "FULL": + if cudagraph_mode_str == "FULL": assert rt_mode == CUDAGraphMode.FULL assert key == desc_full_exact - elif params["cudagraph_mode"] in ["FULL_AND_PIECEWISE", "PIECEWISE"]: + elif cudagraph_mode_str in ["FULL_AND_PIECEWISE", "PIECEWISE"]: assert rt_mode == CUDAGraphMode.PIECEWISE assert key == desc_full_exact else: @@ -111,15 +94,13 @@ def test_dispatcher(self, params): # 2. uniform decode batch, size in cudagraph size list desc_uniform_exact = BatchDescriptor(num_tokens=8, uniform_decode=True) rt_mode, key = dispatcher.dispatch(desc_uniform_exact) - if params["cudagraph_mode"] == "FULL": + if cudagraph_mode_str == "FULL": assert rt_mode == CUDAGraphMode.FULL assert key == desc_uniform_exact.non_uniform - elif params["cudagraph_mode"] in [ - "FULL_DECODE_ONLY", "FULL_AND_PIECEWISE" - ]: + elif cudagraph_mode_str in ["FULL_DECODE_ONLY", "FULL_AND_PIECEWISE"]: assert rt_mode == CUDAGraphMode.FULL assert key == desc_uniform_exact - elif params["cudagraph_mode"] == "PIECEWISE": + elif cudagraph_mode_str == "PIECEWISE": assert rt_mode == CUDAGraphMode.PIECEWISE assert key == desc_uniform_exact.non_uniform else: @@ -131,6 +112,16 @@ def test_dispatcher(self, params): assert rt_mode == CUDAGraphMode.NONE assert key is None + # 4. Cascade attention should have a fall back mode + desc_full_exact = BatchDescriptor(num_tokens=8, uniform_decode=False) + rt_mode, key = dispatcher.dispatch(desc_full_exact, + use_cascade_attn=True) + if "PIECEWISE" in cudagraph_mode_str: # string contains check + assert rt_mode == CUDAGraphMode.PIECEWISE + assert key == desc_full_exact.non_uniform + else: + assert rt_mode == CUDAGraphMode.NONE + @pytest.mark.skipif(not current_platform.is_cuda(), reason="Skip if not cuda") class TestCUDAGraphWrapper: diff --git a/tests/v1/cudagraph/test_cudagraph_mode.py b/tests/v1/cudagraph/test_cudagraph_mode.py index 41a9493cbe58..c4116247bb7c 100644 --- a/tests/v1/cudagraph/test_cudagraph_mode.py +++ b/tests/v1/cudagraph/test_cudagraph_mode.py @@ -4,12 +4,11 @@ import os import weakref from contextlib import ExitStack -from dataclasses import dataclass -from typing import Optional import pytest from tests.utils import wait_for_gpu_memory_to_clear +from tests.v1.attention.utils import full_cg_backend_configs as backend_configs from vllm import LLM from vllm.config import CompilationConfig from vllm.platforms import current_platform @@ -34,74 +33,6 @@ def temporary_environ(env_vars): os.environ[k] = v -@dataclass -class BackendConfig: - name: str - env_vars: dict - comp_config: dict - specific_gpu_arch: Optional[tuple] = None - - -# Define all backend configurations of full cudagraph to be tested -backend_configs = { - # FA3 on Hopper - "FA3": - BackendConfig(name="FA3", - env_vars={ - "VLLM_FLASH_ATTN_VERSION": "3", - "VLLM_FLASH_ATTN_MAX_NUM_SPLITS_FOR_CUDA_GRAPH": "16", - }, - comp_config={ - "cudagraph_mode": "FULL", - }, - specific_gpu_arch=(9, 0)), - # FlashMLA on Hopper - "FlashMLA": - BackendConfig(name="FlashMLA", - env_vars={ - "VLLM_ATTENTION_BACKEND": "FLASHMLA", - }, - comp_config={ - "cudagraph_mode": "FULL_AND_PIECEWISE", - }, - specific_gpu_arch=(9, 0)), - # FlashAttention MLA on Hopper - "FlashAttentionMLA": - BackendConfig(name="FlashAttentionMLA", - env_vars={ - "VLLM_ATTENTION_BACKEND": "FLASH_ATTN_MLA", - "VLLM_FLASH_ATTN_MAX_NUM_SPLITS_FOR_CUDA_GRAPH": "16", - }, - comp_config={ - "cudagraph_mode": "FULL_DECODE_ONLY", - }, - specific_gpu_arch=(9, 0)), - # FA2 - "FA2": - BackendConfig(name="FA2", - env_vars={ - "VLLM_FLASH_ATTN_VERSION": "2", - "VLLM_FLASH_ATTN_MAX_NUM_SPLITS_FOR_CUDA_GRAPH": "16", - }, - comp_config={ - "cudagraph_mode": "FULL_AND_PIECEWISE", - }), - # Triton Attention - "TritonAttn": - BackendConfig(name="TritonAttn", - env_vars={"VLLM_ATTENTION_BACKEND": "TRITON_ATTN"}, - comp_config={ - "cudagraph_mode": "FULL_AND_PIECEWISE", - }), - # FlashInfer - "FlashInfer": - BackendConfig(name="FlashInfer", - env_vars={"VLLM_ATTENTION_BACKEND": "FLASHINFER"}, - comp_config={ - "cudagraph_mode": "FULL_AND_PIECEWISE", - }), -} - # test attention backend and cudagraph_mode combo # (backend_name, cudagraph_mode, supported) combo_cases_1 = [ @@ -114,9 +45,10 @@ class BackendConfig: ] -@pytest.mark.parametrize("combo_case", combo_cases_1) -def test_backend_and_cudagraph_mode_combo(combo_case): - backend_name, cudagraph_mode, supported = combo_case +@pytest.mark.parametrize("backend_name, cudagraph_mode, supported", + combo_cases_1) +def test_backend_and_cudagraph_mode_combo(backend_name, cudagraph_mode, + supported): if backend_name == "FlashInfer": try: import flashinfer # noqa: F401 @@ -142,7 +74,7 @@ def test_backend_and_cudagraph_mode_combo(combo_case): compilation_config=CompilationConfig( level=3, cudagraph_mode=cudagraph_mode)) llm.generate(["Hello, my name is"] * 10) - + # when above code raises, `llm` may be undefined, so we need to catch that try: llm = weakref.proxy(llm) del llm @@ -173,7 +105,8 @@ def test_backend_and_cudagraph_mode_combo(combo_case): ] -@pytest.mark.parametrize("combo_case", combo_cases_2) +@pytest.mark.parametrize("backend_name,cudagraph_mode,compilation_level,"\ + "supported", combo_cases_2) def test_cudagraph_compilation_combo(combo_case): backend_name, cudagraph_mode, compilation_level, supported\ = combo_case @@ -192,6 +125,7 @@ def test_cudagraph_compilation_combo(combo_case): compilation_config=CompilationConfig( level=compilation_level, cudagraph_mode=cudagraph_mode)) llm.generate(["Hello, my name is"] * 10) + # when above code raises, `llm` may be undefined, so we need to catch that try: llm = weakref.proxy(llm) del llm diff --git a/vllm/compilation/backends.py b/vllm/compilation/backends.py index 17fc727b8fc7..335bbda5e4eb 100644 --- a/vllm/compilation/backends.py +++ b/vllm/compilation/backends.py @@ -340,15 +340,15 @@ def call_module(self, target: torch.fx.node.Target, num_graphs=len(self.compile_submod_names), runtime_shape=None) # Lazy import here to avoid circular import - from .cuda_piecewise_backend import PiecewiseBackend + from .piecewise_backend import PiecewiseBackend piecewise_backend = PiecewiseBackend( submod, self.vllm_config, index, len(self.compile_submod_names), sym_shape_indices, compiled_graph_for_dynamic_shape, self.vllm_backend) - if (self.compilation_config.cudagraph_mode != CUDAGraphMode.NONE - and + if (self.compilation_config.cudagraph_mode.\ + has_piecewise_cudagraphs() and not self.compilation_config.use_inductor_graph_partition): # We're using Dynamo-based piecewise splitting, so we wrap # the whole subgraph with a static graph wrapper. diff --git a/vllm/compilation/decorators.py b/vllm/compilation/decorators.py index 6e9a36a2b0b9..fa38cfe49a91 100644 --- a/vllm/compilation/decorators.py +++ b/vllm/compilation/decorators.py @@ -336,7 +336,7 @@ def maybe_use_cudagraph_partition_wrapper(vllm_config: VllmConfig): from vllm.config import CUDAGraphMode compilation_config = vllm_config.compilation_config - if (compilation_config.cudagraph_mode != CUDAGraphMode.NONE + if (compilation_config.cudagraph_mode.has_piecewise_cudagraphs() and compilation_config.use_inductor_graph_partition): from torch._inductor.utils import CUDAGraphWrapperMetadata @@ -365,7 +365,7 @@ def customized_cudagraph_wrapper(f, yield - if (compilation_config.cudagraph_mode != CUDAGraphMode.NONE + if (compilation_config.cudagraph_mode.has_piecewise_cudagraphs() and compilation_config.use_inductor_graph_partition): torch._inductor.utils.set_customized_partition_wrappers(None) diff --git a/vllm/compilation/cuda_piecewise_backend.py b/vllm/compilation/piecewise_backend.py similarity index 100% rename from vllm/compilation/cuda_piecewise_backend.py rename to vllm/compilation/piecewise_backend.py diff --git a/vllm/config/__init__.py b/vllm/config/__init__.py index 59b65900b1e1..ecea90988ebc 100644 --- a/vllm/config/__init__.py +++ b/vllm/config/__init__.py @@ -459,15 +459,22 @@ def __post_init__(self): "to True to enable.") current_platform.check_and_update_config(self) - # final check of cudagraph mode after platform-specific update + # Do this after all the updates to compilation_config.level + if envs.VLLM_USE_V1 and \ + self.compilation_config.level == CompilationLevel.PIECEWISE: + self.compilation_config.set_splitting_ops_for_v1() + + # final check of cudagraph mode after all possible updates if envs.VLLM_USE_V1 and current_platform.is_cuda_alike(): - if self.compilation_config.cudagraph_mode == CUDAGraphMode.FULL \ + if self.compilation_config.cudagraph_mode.has_full_cudagraphs()\ and self.model_config is not None and \ - not self.model_config.disable_cascade_attn: - logger.info("CUDAGraphMode.FULL is not supported with " - "cascade attention currently. Disabling cascade" - "attention.") - self.model_config.disable_cascade_attn = True + not self.model_config.disable_cascade_attn and\ + not self.compilation_config.cudagraph_mode.\ + has_piecewise_cudagraphs(): + logger.warning_once( + "No piecewise cudagraph for executing cascade attention." + " Will fall back to eager execution if a batch runs " + "into cascade attentions") if self.compilation_config.cudagraph_mode\ .requires_piecewise_compilation(): @@ -477,6 +484,12 @@ def __post_init__(self): "when cudagraph_mode piecewise cudagraphs is used, "\ f"cudagraph_mode={self.compilation_config.cudagraph_mode}" + # final migrate the deprecated flags + self.compilation_config.use_cudagraph = self.compilation_config.\ + cudagraph_mode!= CUDAGraphMode.NONE + self.compilation_config.full_cuda_graph = self.compilation_config.\ + cudagraph_mode.has_full_cudagraphs() + if self.parallel_config.enable_dbo: a2a_backend = envs.VLLM_ALL2ALL_BACKEND assert a2a_backend in \ @@ -487,14 +500,14 @@ def __post_init__(self): "variable to deepep_low_latency or deepep_high_throughput and "\ "install the DeepEP kernels." + if not self.model_config.disable_cascade_attn: + self.model_config.disable_cascade_attn = True + logger.warning_once( + "Disabling cascade attention when DBO is enabled.") + if not self.instance_id: self.instance_id = random_uuid()[:5] - # Do this after all the updates to compilation_config.level - if envs.VLLM_USE_V1 and \ - self.compilation_config.level == CompilationLevel.PIECEWISE: - self.compilation_config.set_splitting_ops_for_v1() - if (envs.VLLM_USE_V1 and not self.scheduler_config.disable_hybrid_kv_cache_manager): # logger should only print warning message for hybrid models. As we diff --git a/vllm/config/compilation.py b/vllm/config/compilation.py index 50fde9461a13..9735db98567d 100644 --- a/vllm/config/compilation.py +++ b/vllm/config/compilation.py @@ -61,9 +61,17 @@ def max_cudagraph_mode(self) -> 'CUDAGraphMode': def has_full_cudagraphs(self) -> bool: return self.max_cudagraph_mode() == CUDAGraphMode.FULL + def has_piecewise_cudagraphs(self) -> bool: + return self.requires_piecewise_compilation() + def separate_routine(self) -> bool: return isinstance(self.value, tuple) + def valid_runtime_modes(self) -> bool: + return self in [ + CUDAGraphMode.NONE, CUDAGraphMode.PIECEWISE, CUDAGraphMode.FULL + ] + @config @dataclass @@ -269,7 +277,8 @@ class CompilationConfig: Note that this is orthogonal to the cudagraph capture logic outside of compilation. Warning: This flag is deprecated and will be removed in the next major or - minor release, i.e. v0.11.0 or v1.0.0. Please use cudagraph_mode instead. + minor release, i.e. v0.11.0 or v1.0.0. Please use cudagraph_mode=PIECEWISE + instead. """ cudagraph_num_of_warmups: int = 0 """Number of warmup runs for cudagraph. @@ -294,7 +303,8 @@ class CompilationConfig: flag cannot be used together with splitting_ops. This may provide performance benefits for smaller models. Warning: This flag is deprecated and will be removed in the next major or - minor release, i.e. v0.11.0 or v1.0.0. Please use cudagraph_mode instead. + minor release, i.e. v0.11.0 or v1.0.0. Please use cudagraph_mode= + FULL_AND_PIECEWISE instead. """ use_inductor_graph_partition: bool = False @@ -464,7 +474,8 @@ def __post_init__(self) -> None: if not self.use_cudagraph: logger.warning("use_cudagraph is deprecated, use " "cudagraph_mode=NONE instead.") - if self.cudagraph_mode is not None: + if self.cudagraph_mode is not None and \ + self.cudagraph_mode != CUDAGraphMode.NONE: raise ValueError( "use_cudagraph and cudagraph_mode are mutually" " exclusive, prefer cudagraph_mode since " @@ -473,7 +484,8 @@ def __post_init__(self) -> None: if self.full_cuda_graph: logger.warning("full_cuda_graph is deprecated, use " "cudagraph_mode=FULL instead.") - if self.cudagraph_mode is not None: + if self.cudagraph_mode is not None and \ + not self.cudagraph_mode.has_full_cudagraphs(): raise ValueError("full_cuda_graph and cudagraph_mode are " "mutually exclusive, prefer cudagraph_mode " "since full_cuda_graph is deprecated.") @@ -570,48 +582,75 @@ def set_splitting_ops_for_v1(self): "set_splitting_ops_for_v1 should only be called when " "level is CompilationLevel.PIECEWISE") - use_inductor_graph_partition_msg = ( - "When use_inductor_graph_partition=True, splitting_ops " - "are ignored and set to an empty list. Instead, " - "\"tags=(torch._C.Tag.cudagraph_unsafe, ),\" is " - "used to annotate custom ops for graph partition.") + if self.use_inductor_graph_partition: + self.set_splitting_ops_for_inductor_graph_partition() + return + + if self.pass_config.enable_attn_fusion: + # here use_inductor_graph_partition is False + self.set_splitting_ops_for_attn_fusion() + return if self.splitting_ops is None: - if self.use_inductor_graph_partition: - # When using inductor graph partition, we set splitting_ops - # to be empty and rely on torch._C.Tag.cudagraph_unsafe to - # annotate custom ops as splitting ops. - logger.warning_once(use_inductor_graph_partition_msg) - self.splitting_ops = [] - else: - # NOTE: When using full cudagraph, instead of setting an empty - # list and capture the full cudagraph inside the flattened fx - # graph, we keep the piecewise fx graph structure but capture - # the full cudagraph outside the fx graph. This reduces some - # cpu overhead when the runtime batch_size is not cudagraph - # captured. see https://github.com/vllm-project/vllm/pull/20059 - # for details. make a copy to avoid mutating the class-level - # list via reference. - self.splitting_ops = list(self._attention_ops) + # NOTE: When using full cudagraph, instead of setting an empty + # list and capture the full cudagraph inside the flattened fx + # graph, we keep the piecewise fx graph structure but capture + # the full cudagraph outside the fx graph. This reduces some + # cpu overhead when the runtime batch_size is not cudagraph + # captured. see https://github.com/vllm-project/vllm/pull/20059 + # for details. Make a copy to avoid mutating the class-level + # list via reference. + self.splitting_ops = list(self._attention_ops) elif len(self.splitting_ops) == 0: logger.warning_once( - "Using piecewise compilation with empty " - "splitting_ops and use_inductor_graph_partition" - f"={self.use_inductor_graph_partition}.") - if (self.cudagraph_mode == CUDAGraphMode.PIECEWISE - and not self.use_inductor_graph_partition): + "Using piecewise compilation with empty splitting_ops") + if self.cudagraph_mode == CUDAGraphMode.PIECEWISE: + logger.warning_once( + "Piecewise compilation with empty splitting_ops do not" \ + "contains piecewise cudagraph. Setting cudagraph_" + "mode to NONE. Hint: If you are using attention backends " + "that support cudagraph, consider manually setting " + "cudagraph_mode to FULL or FULL_DECODE_ONLY to enable " + "full cudagraphs.") + self.cudagraph_mode = CUDAGraphMode.NONE + elif self.cudagraph_mode == CUDAGraphMode.FULL_AND_PIECEWISE: logger.warning_once( - "When compilation level is piecewise with empty " - "splitting_ops, PIECEWISE cudagraph_mode will be " - "treated as FULL cudagraph_mode. Please ensure you are " - "using attention backends that support cudagraph or set " - "cudagraph_mode to NONE explicitly if encountering " - "any problems.") + "Piecewise compilation with empty splitting_ops do not " + "contains piecewise cudagraph. Setting cudagraph_mode " + "to FULL.") self.cudagraph_mode = CUDAGraphMode.FULL self.splitting_ops = [] - elif self.use_inductor_graph_partition: + + def set_splitting_ops_for_inductor_graph_partition(self): + assert self.use_inductor_graph_partition + use_inductor_graph_partition_msg = ( + "When use_inductor_graph_partition=True, splitting_ops " + "are ignored and set to an empty list. Instead, " + "\"tags=(torch._C.Tag.cudagraph_unsafe, ),\" is " + "used to annotate custom ops for graph partition.") + if self.splitting_ops is not None and \ + len(self.splitting_ops) > 0: logger.warning_once(use_inductor_graph_partition_msg) + self.splitting_ops = [] + + def set_splitting_ops_for_attn_fusion(self): + assert self.pass_config.enable_attn_fusion + if self.splitting_ops is None: self.splitting_ops = [] + if self.cudagraph_mode.has_piecewise_cudagraphs(): + logger.warning_once( + "enable_attn_fusion is incompatible with piecewise " + "cudagraph when use_inductor_graph_partition is off." + "In this case, splitting_ops will be set to empty " + "list, and cudagraph_mode will be set to FULL. " + "Please ensure you are using attention backends that " + "support cudagraph or set cudagraph_mode to NONE " + "explicitly if encountering any problems.") + self.cudagraph_mode = CUDAGraphMode.FULL + + assert not self.splitting_ops_contain_attention(), ( + "attention ops should not be in splitting_ops " + "when enable_attn_fusion is True") def splitting_ops_contain_attention(self) -> bool: return self.splitting_ops is not None and all( diff --git a/vllm/forward_context.py b/vllm/forward_context.py index 3b535423f7bc..2bf4e1804521 100644 --- a/vllm/forward_context.py +++ b/vllm/forward_context.py @@ -246,8 +246,7 @@ class ForwardContext: ubatch_slices: Optional[UBatchSlices] = None def __post_init__(self): - assert self.cudagraph_runtime_mode in [ - CUDAGraphMode.NONE, CUDAGraphMode.PIECEWISE, CUDAGraphMode.FULL], \ + assert self.cudagraph_runtime_mode.valid_runtime_modes(), \ f"Invalid cudagraph runtime mode: {self.cudagraph_runtime_mode}" diff --git a/vllm/v1/cudagraph_dispatcher.py b/vllm/v1/cudagraph_dispatcher.py index ea4fba8eeea6..2dbe2bfb8082 100644 --- a/vllm/v1/cudagraph_dispatcher.py +++ b/vllm/v1/cudagraph_dispatcher.py @@ -22,10 +22,10 @@ class CudagraphDispatcher: At runtime, the dispatch method generates the runtime cudagraph mode (FULL, PIECEWISE, or NONE for no cudagraph) and the valid key (batch descriptor) - based on the input key. After dispatching (communicate via forward context), - the cudagraph wrappers will trust the dispatch key to do either capturing - or replaying (if mode matched), or pass through to the underlying runnable - without cudagraph (if mode no match or mode is NONE). + based on the input key. After dispatching (communicated via forward + context), the cudagraph wrappers will trust the dispatch key to either + capture or replay (if the mode matches), or pass through to the underlying + runnable without cudagraph (if the mode does not match or mode is NONE). """ def __init__(self, vllm_config: VllmConfig): @@ -57,19 +57,15 @@ def __init__(self, vllm_config: VllmConfig): def add_cudagraph_key(self, runtime_mode: CUDAGraphMode, batch_descriptor: BatchDescriptor): assert runtime_mode in [CUDAGraphMode.PIECEWISE, CUDAGraphMode.FULL], \ - f"Invalid cudagraph runtime mode: {runtime_mode}" + f"Invalid cudagraph runtime mode for keys: {runtime_mode}" self.cudagraph_keys[runtime_mode].add(batch_descriptor) def initialize_cudagraph_keys(self, cudagraph_mode: CUDAGraphMode, uniform_decode_query_len: int): # This should be called only after attention backend is initialized. - # Note: we create all valid keys possible for cudagraph but do not - # guarantee all keys would be used. For example, we create keys for - # piecewise cudagraphs when it is piecewise compilation, which is always - # valid, but for attention backend support unified routine, we may not - # trigger capturing/replaying the piecewise cudagraphs depending on - # CompilationConfig.cudagraph_mode. In addition, if we allow lazy + # Note: we create all valid keys for cudagraph here but do not + # guarantee all keys would be used. For example, if we allow lazy # capturing in future PR, some keys may never be triggered. if cudagraph_mode.mixed_mode() != CUDAGraphMode.NONE: for bs in self.compilation_config.cudagraph_capture_sizes: @@ -94,10 +90,13 @@ def initialize_cudagraph_keys(self, cudagraph_mode: CUDAGraphMode, self.keys_initialized = True def dispatch( - self, batch_descriptor: BatchDescriptor + self, + batch_descriptor: BatchDescriptor, + use_cascade_attn: bool = False ) -> tuple[CUDAGraphMode, Optional[BatchDescriptor]]: """ - Given a batch descriptor, dispatch to a cudagraph mode. + Given conditions(e.g.,batch descriptor and if using cascade attention), + dispatch to a cudagraph runtime mode and the valid batch descriptor. A new batch descriptor is returned as we might dispatch a uniform batch to a graph that supports a more general batch (uniform to non-uniform). """ @@ -107,14 +106,16 @@ def dispatch( "initialized. No cudagraph will be used.") return CUDAGraphMode.NONE, None - # check if key exists for full cudagraph - if batch_descriptor in self.cudagraph_keys[CUDAGraphMode.FULL]: - return CUDAGraphMode.FULL, batch_descriptor - - # otherwise, check if non-uniform key exists non_uniform_key = batch_descriptor.non_uniform - if non_uniform_key in self.cudagraph_keys[CUDAGraphMode.FULL]: - return CUDAGraphMode.FULL, non_uniform_key + # if a batch use cascade attention, bypass checking full cudagraphs + if not use_cascade_attn: + # check if key exists for full cudagraph + if batch_descriptor in self.cudagraph_keys[CUDAGraphMode.FULL]: + return CUDAGraphMode.FULL, batch_descriptor + + # otherwise, check if non-uniform key exists + if non_uniform_key in self.cudagraph_keys[CUDAGraphMode.FULL]: + return CUDAGraphMode.FULL, non_uniform_key # also check if non-uniform key exists for more "general" # piecewise cudagraph diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index f199dbd991f4..2fac708905d0 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -923,11 +923,13 @@ def _prepare_inputs( ) -> tuple[PerLayerAttnMetadata, torch.Tensor, Optional[SpecDecodeMetadata], np.ndarray, Optional[CommonAttentionMetadata], int, Optional[UBatchSlices], - Optional[torch.Tensor]]: + Optional[torch.Tensor], bool]: """ :return: tuple[ attn_metadata: layer-to-attention_metadata mapping, - logits_indices, spec_decode_metadata + logits_indices, spec_decode_metadata, + num_scheduled_tokens, spec_decode_common_attn_metadata, + max_num_scheduled_tokens, use_cascade_attn ] """ total_num_scheduled_tokens = scheduler_output.total_num_scheduled_tokens @@ -1135,6 +1137,7 @@ def _prepare_inputs( attn_metadata: PerLayerAttnMetadata = {} if ubatch_slices is not None: attn_metadata = [dict() for _ in range(len(ubatch_slices))] + use_cascade_attn = False # Used in the below loop. query_start_loc_cpu = self.query_start_loc.cpu[:num_reqs + 1] @@ -1251,9 +1254,15 @@ def _prepare_inputs( common_prefix_len=common_prefix_len, common_attn_metadata=common_attn_metadata, **extra_attn_metadata_args) + use_cascade_attn |= getattr(attn_metadata_i, "use_cascade", + False) for layer_name in attn_group.layer_names: attn_metadata[layer_name] = attn_metadata_i + # disable cascade attention when DBO + if ubatch_slices is not None: + use_cascade_attn = False + # Hot-Swap lora model if self.lora_config: self.set_active_loras(self.input_batch, num_scheduled_tokens) @@ -1261,7 +1270,7 @@ def _prepare_inputs( return (attn_metadata, logits_indices, spec_decode_metadata, num_scheduled_tokens, spec_decode_common_attn_metadata, max_num_scheduled_tokens, ubatch_slices, - num_tokens_after_padding) + num_tokens_after_padding, use_cascade_attn) def _compute_cascade_attn_prefix_len( self, @@ -2251,8 +2260,8 @@ def execute_model( # Prepare the decoder inputs. (attn_metadata, logits_indices, spec_decode_metadata, num_scheduled_tokens_np, spec_decode_common_attn_metadata, - max_query_len, ubatch_slices, num_tokens_after_padding - ) = self._prepare_inputs(scheduler_output) + max_query_len, ubatch_slices, num_tokens_after_padding, + use_cascade_attn) = self._prepare_inputs(scheduler_output) ( num_scheduled_tokens, @@ -2273,7 +2282,8 @@ def execute_model( batch_descriptor = BatchDescriptor(num_tokens=num_input_tokens, uniform_decode=uniform_decode) cudagraph_runtime_mode, batch_descriptor = \ - self.cudagraph_dispatcher.dispatch(batch_descriptor) + self.cudagraph_dispatcher.dispatch(batch_descriptor, + use_cascade_attn) # This is currently to get around the assert in the DPMetadata # where it wants `num_tokens_across_dp` to align with `num_tokens` @@ -2701,16 +2711,15 @@ def reload_weights(self) -> None: "Cannot reload weights before model is loaded." model_loader = get_model_loader(self.load_config) logger.info("Reloading weights inplace...") - model = self.get_model() - model_loader.load_weights(model, model_config=self.model_config) + model_loader.load_weights(self.get_model(), + model_config=self.model_config) def save_tensorized_model( self, tensorizer_config: "TensorizerConfig", ) -> None: - model = self.get_model() TensorizerLoader.save_model( - model, + self.get_model(), tensorizer_config=tensorizer_config, model_config=self.model_config, ) @@ -2926,9 +2935,8 @@ def _dummy_run( (1 token) and prefill (multiple tokens) requests. remove_lora: If False, dummy LoRAs are not destroyed after the run """ - assert cudagraph_runtime_mode is None or cudagraph_runtime_mode in { - CUDAGraphMode.NONE, CUDAGraphMode.PIECEWISE, CUDAGraphMode.FULL - } + assert cudagraph_runtime_mode is None or \ + cudagraph_runtime_mode.valid_runtime_modes() # If cudagraph_mode.decode_mode() == FULL and # cudagraph_mode.separate_routine(). This means that we are using @@ -3113,7 +3121,8 @@ def _dummy_run( # filter out the valid batch descriptor _cg_mode, batch_descriptor = self.cudagraph_dispatcher.dispatch( BatchDescriptor(num_tokens=num_tokens, - uniform_decode=uniform_decode)) + uniform_decode=uniform_decode)) \ + if not is_profile else (CUDAGraphMode.NONE, None) if cudagraph_runtime_mode is not None: # we allow forcing NONE when the dispatcher disagrees to support # warm ups for cudagraph capture @@ -3453,8 +3462,8 @@ def _capture_cudagraphs(self, compilation_cases: list[int], cudagraph_runtime_mode: CUDAGraphMode, uniform_decode: bool): assert cudagraph_runtime_mode != CUDAGraphMode.NONE and \ - cudagraph_runtime_mode in [CUDAGraphMode.FULL, - CUDAGraphMode.PIECEWISE] + cudagraph_runtime_mode.valid_runtime_modes(), \ + f"Invalid cudagraph runtime mode: {cudagraph_runtime_mode}" # Only rank 0 should print progress bar during capture if is_global_first_rank(): @@ -3585,6 +3594,12 @@ def create_attn_groups( self.calculate_reorder_batch_threshold() def initialize_cudagraph_capture(self) -> None: + """ + Resolve the cudagraph_mode when there are multiple attention + backends with potential conflicting CUDA graph support. + Then initialize the cudagraph_dispatcher based on the resolved + cudagraph_mode. + """ min_cg_support = AttentionCGSupport.ALWAYS min_cg_builder_name = None From cf89202855a4c219051f722258a815f70dcde842 Mon Sep 17 00:00:00 2001 From: Michael Goin Date: Fri, 26 Sep 2025 17:11:40 -0400 Subject: [PATCH 431/518] [CI] Fix FlashInfer AOT in release docker image (#25730) Signed-off-by: mgoin --- .buildkite/release-pipeline.yaml | 2 +- docker/Dockerfile | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/.buildkite/release-pipeline.yaml b/.buildkite/release-pipeline.yaml index 8c6ef7817aaf..7677d783fabc 100644 --- a/.buildkite/release-pipeline.yaml +++ b/.buildkite/release-pipeline.yaml @@ -76,7 +76,7 @@ steps: queue: arm64_cpu_queue_postmerge commands: - "aws ecr-public get-login-password --region us-east-1 | docker login --username AWS --password-stdin public.ecr.aws/q9t5s3a7" - - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.9.1 --build-arg torch_cuda_arch_list='8.7 9.0 10.0+PTX 12.0' --build-arg INSTALL_KV_CONNECTORS=true --tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m) --target vllm-openai --progress plain -f docker/Dockerfile ." + - "DOCKER_BUILDKIT=1 docker build --build-arg max_jobs=16 --build-arg USE_SCCACHE=1 --build-arg GIT_REPO_CHECK=1 --build-arg CUDA_VERSION=12.9.1 --build-arg FLASHINFER_AOT_COMPILE=true --build-arg torch_cuda_arch_list='8.7 9.0 10.0+PTX 12.0' --build-arg INSTALL_KV_CONNECTORS=true --tag public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m) --target vllm-openai --progress plain -f docker/Dockerfile ." - "docker push public.ecr.aws/q9t5s3a7/vllm-release-repo:$BUILDKITE_COMMIT-$(uname -m)" # Add job to create multi-arch manifest diff --git a/docker/Dockerfile b/docker/Dockerfile index 034f73736ca7..c0f55a7eeba0 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -404,6 +404,9 @@ RUN --mount=type=cache,target=/root/.cache/uv bash - <<'BASH' FI_TORCH_CUDA_ARCH_LIST="7.5 8.0 8.9 9.0a 10.0a 12.0" fi echo "🏗️ Installing FlashInfer with AOT compilation for arches: ${FI_TORCH_CUDA_ARCH_LIST}" + export FLASHINFER_CUDA_ARCH_LIST="${FI_TORCH_CUDA_ARCH_LIST}" + # HACK: We need these to run flashinfer.aot before installing flashinfer, get from the package in the future + uv pip install --system cuda-python==$(echo $CUDA_VERSION | cut -d. -f1,2) pynvml==$(echo $CUDA_VERSION | cut -d. -f1) nvidia-nvshmem-cu$(echo $CUDA_VERSION | cut -d. -f1) # Build AOT kernels TORCH_CUDA_ARCH_LIST="${FI_TORCH_CUDA_ARCH_LIST}" \ python3 -m flashinfer.aot From c70ac4b8fff7674deb0fa58fc232cf06f477f3ec Mon Sep 17 00:00:00 2001 From: qizixi <22851944+zixi-qi@users.noreply.github.com> Date: Fri, 26 Sep 2025 15:27:05 -0700 Subject: [PATCH 432/518] [spec decode] Consolidate speculative decode method name for MTP (#25232) Signed-off-by: zixi-qi --- examples/offline_inference/spec_decode.py | 5 +- tests/v1/e2e/test_spec_decode.py | 65 ++++++++ tests/v1/spec_decode/test_mtp.py | 195 ++++++++++++++++++++++ vllm/config/speculative.py | 50 +++--- vllm/engine/arg_utils.py | 2 +- vllm/v1/spec_decode/eagle.py | 10 +- 6 files changed, 287 insertions(+), 40 deletions(-) create mode 100644 tests/v1/spec_decode/test_mtp.py diff --git a/examples/offline_inference/spec_decode.py b/examples/offline_inference/spec_decode.py index ce078bce0b75..af65b6d38e02 100644 --- a/examples/offline_inference/spec_decode.py +++ b/examples/offline_inference/spec_decode.py @@ -54,6 +54,7 @@ def parse_args(): "--method", type=str, default="eagle", + choices=["ngram", "eagle", "eagle3", "mtp"], ) parser.add_argument("--num-spec-tokens", type=int, default=2) parser.add_argument("--prompt-lookup-max", type=int, default=5) @@ -118,9 +119,9 @@ def main(args): "prompt_lookup_max": args.prompt_lookup_max, "prompt_lookup_min": args.prompt_lookup_min, } - elif args.method.endswith("mtp"): + elif args.method == "mtp": speculative_config = { - "method": args.method, + "method": "mtp", "num_speculative_tokens": args.num_spec_tokens, } else: diff --git a/tests/v1/e2e/test_spec_decode.py b/tests/v1/e2e/test_spec_decode.py index 66115f14c182..c4efd7548b81 100644 --- a/tests/v1/e2e/test_spec_decode.py +++ b/tests/v1/e2e/test_spec_decode.py @@ -15,6 +15,8 @@ from vllm.distributed import cleanup_dist_env_and_memory from vllm.platforms import current_platform +MTP_SIMILARITY_RATE = 0.8 + def get_test_prompts(mm_enabled: bool): prompt_types = ["repeat", "sentence"] @@ -222,3 +224,66 @@ def test_eagle_correctness( del spec_llm torch.cuda.empty_cache() cleanup_dist_env_and_memory() + + +@pytest.mark.parametrize(["model_setup", "mm_enabled"], [ + (("mtp", "XiaomiMiMo/MiMo-7B-Base", 1), False), + (("mtp", "ZixiQi/DeepSeek-V3-4layers-MTP-FP8", 1), False), +], + ids=["mimo", "deepseek"]) +def test_mtp_correctness( + monkeypatch: pytest.MonkeyPatch, + sampling_config: SamplingParams, + model_setup: tuple[str, str, int], + mm_enabled: bool, +): + # Generate test prompts inside the function instead of using fixture + test_prompts = get_test_prompts(mm_enabled) + ''' + Compare the outputs of a original LLM and a speculative LLM + should be the same when using MTP speculative decoding. + model_setup: (method, model_name, tp_size) + ''' + with monkeypatch.context() as m: + m.setenv("VLLM_USE_V1", "1") + m.setenv("VLLM_MLA_DISABLE", "1") + + method, model_name, tp_size = model_setup + + ref_llm = LLM(model=model_name, + max_model_len=2048, + tensor_parallel_size=tp_size, + trust_remote_code=True) + ref_outputs = ref_llm.chat(test_prompts, sampling_config) + del ref_llm + torch.cuda.empty_cache() + cleanup_dist_env_and_memory() + + spec_llm = LLM( + model=model_name, + trust_remote_code=True, + tensor_parallel_size=tp_size, + speculative_config={ + "method": method, + "num_speculative_tokens": 1, + "max_model_len": 2048, + }, + max_model_len=2048, + ) + spec_outputs = spec_llm.chat(test_prompts, sampling_config) + matches = 0 + misses = 0 + for ref_output, spec_output in zip(ref_outputs, spec_outputs): + if ref_output.outputs[0].text == spec_output.outputs[0].text: + matches += 1 + else: + misses += 1 + print(f"ref_output: {ref_output.outputs[0].text}") + print(f"spec_output: {spec_output.outputs[0].text}") + + # Heuristic: expect at least 80% of the prompts to match exactly + # Upon failure, inspect the outputs to check for inaccuracy. + assert matches > int(MTP_SIMILARITY_RATE * len(ref_outputs)) + del spec_llm + torch.cuda.empty_cache() + cleanup_dist_env_and_memory() diff --git a/tests/v1/spec_decode/test_mtp.py b/tests/v1/spec_decode/test_mtp.py new file mode 100644 index 000000000000..e4881859ece1 --- /dev/null +++ b/tests/v1/spec_decode/test_mtp.py @@ -0,0 +1,195 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project + +from unittest import mock + +import pytest +import torch + +from tests.v1.attention.utils import (BatchSpec, _Backend, + create_common_attn_metadata, + create_standard_kv_cache_spec, + get_attention_backend) +from vllm.config import (CacheConfig, DeviceConfig, ModelConfig, + ParallelConfig, SchedulerConfig, SpeculativeConfig, + VllmConfig) +from vllm.config.load import LoadConfig +from vllm.model_executor.models.llama import LlamaForCausalLM +from vllm.platforms import current_platform +from vllm.v1.spec_decode.eagle import EagleProposer + +mimo_7b_dir = "XiaomiMiMo/MiMo-7B-Base" + + +def _create_mtp_proposer(num_speculative_tokens: int) -> EagleProposer: + """Create an MTP proposer with unified model configuration.""" + model_config = ModelConfig(model=mimo_7b_dir, + runner="generate", + max_model_len=100, + trust_remote_code=True) + + speculative_config = SpeculativeConfig( + target_model_config=model_config, + target_parallel_config=ParallelConfig(), + model=mimo_7b_dir, + method="mtp", + num_speculative_tokens=num_speculative_tokens, + ) + + vllm_config = VllmConfig( + model_config=model_config, + cache_config=CacheConfig(), + speculative_config=speculative_config, + device_config=DeviceConfig(device=current_platform.device_type), + parallel_config=ParallelConfig(), + load_config=LoadConfig(), + scheduler_config=SchedulerConfig()) + + return EagleProposer(vllm_config=vllm_config, + device=current_platform.device_type) + + +@mock.patch('vllm.v1.spec_decode.eagle.get_pp_group') +@mock.patch('vllm.v1.spec_decode.eagle.get_layers_from_vllm_config') +@mock.patch('vllm.v1.spec_decode.eagle.get_model') +def test_mtp_load_model_unified(mock_get_model, mock_get_layers, + mock_get_pp_group): + """Test MTP-specific model loading with unified model approach.""" + + # Setup mocks + mock_model = mock.MagicMock() + mock_model.model.embed_tokens.weight.shape = (131072, 4096) + mock_get_model.return_value = mock_model + + target_attn_layers = {"target_attn_1": mock.MagicMock()} + all_attn_layers = {**target_attn_layers, "draft_attn_1": mock.MagicMock()} + mock_get_layers.side_effect = [target_attn_layers, all_attn_layers] + + mock_pp_group = mock.MagicMock() + mock_pp_group.world_size = 1 + mock_get_pp_group.return_value = mock_pp_group + + # Create target model + class _TargetModelStub(LlamaForCausalLM): + model: mock.MagicMock + lm_head: mock.MagicMock + + target_model = mock.create_autospec(_TargetModelStub, instance=True) + target_model.model = mock.MagicMock() + target_model.model.embed_tokens.weight.shape = (131072, 4096) + target_model.lm_head = mock.MagicMock() + + # Create MTP proposer + proposer = _create_mtp_proposer(num_speculative_tokens=4) + proposer.load_model(target_model) + + # Verify MTP-specific behavior: + # Model is loaded + mock_get_model.assert_called_once() + # MTP shares lm_head with target model + assert proposer.model.lm_head == target_model.lm_head + # MTP shares embed_tokens with target model + assert proposer.model.model.embed_tokens == target_model.model.embed_tokens + + +@pytest.mark.parametrize("num_speculative_tokens", [1]) +def test_mtp_propose(num_speculative_tokens, monkeypatch): + """Test that MTP's forward method returns hidden states directly""" + + device = torch.device(current_platform.device_type) + batch_size = 2 + seq_lens = [5, 3] + total_tokens = sum(seq_lens) + vocab_size = 100 + + proposer = _create_mtp_proposer(num_speculative_tokens) + hidden_size = proposer.hidden_size + + # Mock the MTP model to verify it returns hidden states directly + model_mock = mock.MagicMock() + + # MTP returns hidden states directly + if num_speculative_tokens == 1: + model_mock.return_value = torch.zeros(total_tokens, + hidden_size, + device=device) + else: + # Multiple forward passes for multi-token speculation + forward_returns = [] + for i in range(num_speculative_tokens): + if i == 0: + h_states = torch.zeros(total_tokens, + hidden_size, + device=device) + else: + h_states = torch.zeros(batch_size, hidden_size, device=device) + forward_returns.append(h_states) + model_mock.side_effect = forward_returns + + # Mock compute_logits + def create_deterministic_logits(batch_size, vocab_size, token_offset): + logits = torch.full((batch_size, vocab_size), -100.0, device=device) + logits[:, token_offset] = 100.0 + return logits + + if num_speculative_tokens == 1: + model_mock.compute_logits.return_value = create_deterministic_logits( + batch_size, vocab_size, 42) + else: + logits_returns = [ + create_deterministic_logits(batch_size, vocab_size, 42 + i) + for i in range(num_speculative_tokens) + ] + model_mock.compute_logits.side_effect = logits_returns + + proposer.model = model_mock + proposer.attn_layer_names = ["layer.0"] + + # Prepare inputs + batch_spec = BatchSpec(seq_lens=seq_lens, query_lens=seq_lens) + common_attn_metadata = create_common_attn_metadata(batch_spec, + block_size=16, + device=device) + + target_token_ids = torch.randint(0, + vocab_size, (total_tokens, ), + device=device) + target_positions = torch.cat([ + torch.arange(seq_lens[0], device=device), + torch.arange(seq_lens[1], device=device) + ]) + target_hidden_states = torch.randn(total_tokens, + hidden_size, + device=device) + next_token_ids = torch.randint(0, + vocab_size, (batch_size, ), + dtype=torch.int32, + device=device) + sampling_metadata = mock.MagicMock() + + # Setup attention metadata + attn_metadata_builder_cls, _ = get_attention_backend(_Backend.FLASH_ATTN) + + attn_metadata_builder = attn_metadata_builder_cls( + kv_cache_spec=create_standard_kv_cache_spec(proposer.vllm_config), + layer_names=proposer.attn_layer_names, + vllm_config=proposer.vllm_config, + device=device, + ) + + proposer.runner = mock.MagicMock() + proposer.attn_metadata_builder = attn_metadata_builder + + # Run propose + result = proposer.propose(target_token_ids=target_token_ids, + target_positions=target_positions, + target_hidden_states=target_hidden_states, + next_token_ids=next_token_ids, + last_token_indices=None, + common_attn_metadata=common_attn_metadata, + sampling_metadata=sampling_metadata) + + # Verify the model was called correctly + assert model_mock.called + # Verify output shape + assert result.shape == (batch_size, num_speculative_tokens) diff --git a/vllm/config/speculative.py b/vllm/config/speculative.py index 5f462442148f..8b80ce13f96e 100644 --- a/vllm/config/speculative.py +++ b/vllm/config/speculative.py @@ -32,7 +32,9 @@ SpeculativeMethod = Literal["ngram", "eagle", "eagle3", "medusa", "mlp_speculator", "draft_model", "deepseek_mtp", "ernie_mtp", "qwen3_next_mtp", "mimo_mtp", - "longcat_flash_mtp"] + "longcat_flash_mtp", "mtp"] +MTP_MODEL_TYPES = ("deepseek_mtp", "mimo_mtp", "glm4_moe_mtp", "ernie_mtp", + "qwen3_next_mtp", "longcat_flash_mtp") @config @@ -207,11 +209,16 @@ def __post_init__(self): # can not be detected, it will be considered as the "draft_model" by # default. + if self.method in MTP_MODEL_TYPES: + logger.warning("method `%s` is deprecated and replaced with mtp.", + self.method) + self.method = "mtp" + if self.model is None and self.num_speculative_tokens is not None: - # TODO(Shangming): Refactor mtp configuration logic when supporting - if (self.target_model_config - and self.target_model_config.hf_text_config.model_type - in ("deepseek_v3", "mimo", "ernie4_5_moe", "qwen3_next")): + if self.method == "mtp": + assert ( + self.target_model_config + is not None), "target_model_config must be present for mtp" # use the draft model from the same model: self.model = self.target_model_config.model # Align the quantization of draft model for cases such as @@ -312,31 +319,13 @@ def __post_init__(self): "mlp_speculator"): self.method = "mlp_speculator" elif (self.draft_model_config.hf_config.model_type - in ("deepseek_mtp", "mimo_mtp", "glm4_moe_mtp")): - self.method = "deepseek_mtp" - if self.num_speculative_tokens > 1: - logger.warning( - "All Deepseek MTP models only have " \ - "one layer. Might need some code changes " \ - "to support multiple layers." - ) - elif (self.draft_model_config.hf_config.model_type == - "ernie_mtp"): - self.method = "ernie_mtp" + in MTP_MODEL_TYPES): + self.method = "mtp" if self.num_speculative_tokens > 1: logger.warning( - "All Ernie MTP models only have " \ - "one layer. Might need some code changes " \ - "to support multiple layers." - ) - elif (self.draft_model_config.hf_config.model_type == - "qwen3_next_mtp"): - self.method = "qwen3_next_mtp" - if self.num_speculative_tokens > 1: - logger.warning( - "All Qwen3Next MTP models only have " \ - "one layer. Might need some code changes " \ - "to support multiple layers." + "Enabling num_speculative_tokens > 1 will run" \ + "multiple times of forward on same MTP layer" \ + ",which may result in lower acceptance rate" \ ) elif (self.draft_model_config.hf_config.model_type in ("longcat_flash_mtp")): @@ -353,7 +342,7 @@ def __post_init__(self): "Speculative decoding with draft model is not " "supported yet. Please consider using other " "speculative decoding methods such as ngram, medusa, " - "eagle, or deepseek_mtp.") + "eagle, or mtp.") # Replace hf_config for EAGLE draft_model if self.method in ("eagle", "eagle3"): @@ -562,8 +551,7 @@ def num_lookahead_slots(self) -> int: return self.num_speculative_tokens def use_eagle(self) -> bool: - return self.method in ("eagle", "eagle3", "deepseek_mtp", "ernie_mtp", - "qwen3_next_mtp", "longcat_flash_mtp") + return self.method in ("eagle", "eagle3", "mtp") def __repr__(self) -> str: method = self.method diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index 7b5ed67d0adb..8757f4b8b7ba 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -1481,7 +1481,7 @@ def _is_v1_supported_oracle(self, model_config: ModelConfig) -> bool: raise NotImplementedError( "Draft model speculative decoding is not supported yet. " "Please consider using other speculative decoding methods " - "such as ngram, medusa, eagle, or deepseek_mtp.") + "such as ngram, medusa, eagle, or mtp.") V1_BACKENDS = [ "FLASH_ATTN", diff --git a/vllm/v1/spec_decode/eagle.py b/vllm/v1/spec_decode/eagle.py index 119f41d8580e..57da8346f497 100644 --- a/vllm/v1/spec_decode/eagle.py +++ b/vllm/v1/spec_decode/eagle.py @@ -222,8 +222,7 @@ def propose( hidden_states=self.hidden_states[:num_input_tokens], inputs_embeds=inputs_embeds, ) - if self.method in ("deepseek_mtp", "ernie_mtp", "qwen3_next_mtp", - "longcat_flash_mtp"): + if self.method == "mtp": last_hidden_states = ret_hidden_states hidden_states = last_hidden_states else: @@ -352,8 +351,7 @@ def propose( hidden_states=self.hidden_states[:input_batch_size], inputs_embeds=inputs_embeds, ) - if self.method in ("deepseek_mtp", "ernie_mtp", - "qwen3_next_mtp", "longcat_flash_mtp"): + if self.method == "mtp": last_hidden_states = ret_hidden_states hidden_states = ret_hidden_states else: @@ -888,10 +886,10 @@ def dummy_run( def _get_attention_metadata_builder( self) -> list[AttentionMetadataBuilder]: """Find and return the attention metadata builders for EAGLE layers. - + Returns: The metadata builders for EAGLE layers. - + Raises: AssertionError: If no metadata builders are found for EAGLE layers. """ From 4778b426605ec48b3eed90a94b29197835059090 Mon Sep 17 00:00:00 2001 From: Sage Moore Date: Fri, 26 Sep 2025 15:29:56 -0700 Subject: [PATCH 433/518] Reduce the Cuda Graph memory footprint when running with DBO (#25779) Signed-off-by: Sage Moore --- vllm/v1/worker/gpu_model_runner.py | 48 ++++++++++++---------------- vllm/v1/worker/gpu_ubatch_wrapper.py | 12 +++++++ 2 files changed, 32 insertions(+), 28 deletions(-) diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index 2fac708905d0..4fd4f9128c6e 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -3477,8 +3477,10 @@ def _capture_cudagraphs(self, compilation_cases: list[int], # We skip EPLB here since we don't want to record dummy metrics for num_tokens in compilation_cases: # We currently only capture ubatched graphs when its a FULL - # cudagraph and for uniform decode batches. - capture_ubatched_graph = self.parallel_config.enable_dbo \ + # cudagraph, a uniform decode batch, and the number of tokens + # is above the threshold. Otherwise we just capture a non-ubatched + # version of the graph + allow_microbatching = self.parallel_config.enable_dbo \ and cudagraph_runtime_mode == CUDAGraphMode.FULL \ and uniform_decode \ and check_ubatch_thresholds( @@ -3487,37 +3489,27 @@ def _capture_cudagraphs(self, compilation_cases: list[int], uniform_decode=uniform_decode, ) - # Currently we capture both microbatched and non-microbatched - # graphs when capture_ubatched_graph is True, this is because - # occasionally we will be forced out of microbatching due to other - # DP ranks not microbatching (usually caused by an empty second - # microbatch; once we resolve this, we can remove the - # non-microbatched graph capture). - allow_microbatching_options = [True, False] if \ - capture_ubatched_graph else [False] - for allow_microbatching in allow_microbatching_options: - for _ in range( - self.compilation_config.cudagraph_num_of_warmups): - # Use CUDAGraphRuntimeStyle.NONE (default) for warmup. - # But be careful, warm up with `NONE`is orthogonal to - # if we want to warm up attention or not. This is - # different from the case where `FULL` implies capture - # attention while `PIECEWISE` implies no attention. - force_attention = ( - cudagraph_runtime_mode == CUDAGraphMode.FULL) - self._dummy_run(num_tokens, - cudagraph_runtime_mode=CUDAGraphMode.NONE, - force_attention=force_attention, - uniform_decode=uniform_decode, - allow_microbatching=allow_microbatching, - skip_eplb=True, - remove_lora=False) + for _ in range(self.compilation_config.cudagraph_num_of_warmups): + # Use CUDAGraphRuntimeStyle.NONE (default) for warmup. + # But be careful, warm up with `NONE`is orthogonal to + # if we want to warm up attention or not. This is + # different from the case where `FULL` implies capture + # attention while `PIECEWISE` implies no attention. + force_attention = ( + cudagraph_runtime_mode == CUDAGraphMode.FULL) self._dummy_run(num_tokens, - cudagraph_runtime_mode=cudagraph_runtime_mode, + cudagraph_runtime_mode=CUDAGraphMode.NONE, + force_attention=force_attention, uniform_decode=uniform_decode, allow_microbatching=allow_microbatching, skip_eplb=True, remove_lora=False) + self._dummy_run(num_tokens, + cudagraph_runtime_mode=cudagraph_runtime_mode, + uniform_decode=uniform_decode, + allow_microbatching=allow_microbatching, + skip_eplb=True, + remove_lora=False) self.maybe_remove_all_loras(self.lora_config) def initialize_attn_backend(self, kv_cache_config: KVCacheConfig) -> None: diff --git a/vllm/v1/worker/gpu_ubatch_wrapper.py b/vllm/v1/worker/gpu_ubatch_wrapper.py index 5e4c1d32ab6c..39be8c74102e 100644 --- a/vllm/v1/worker/gpu_ubatch_wrapper.py +++ b/vllm/v1/worker/gpu_ubatch_wrapper.py @@ -330,6 +330,18 @@ def __call__(self, *args, **kwargs): # If there's no ubatching, just run the runnable object if ubatch_slices is None: + + # This is to account for the case where ubatching was aborted. + # When we capture full graphs we only capture one graph per shape, + # meaning that if we have a ubatched cudagraph for the current + # num_tokens, we don't have a non-ubatched one. Without this + # check, the cudagraph wrapper will try to capture a cudagraph + # for this shape during a normal run. + if cudagraph_runtime_mode is CUDAGraphMode.FULL: + assert batch_descriptor is not None + if batch_descriptor.num_tokens in self.cudagraphs: + cudagraph_runtime_mode = CUDAGraphMode.NONE + if cudagraph_runtime_mode in (CUDAGraphMode.NONE, CUDAGraphMode.PIECEWISE): return self.runnable(*args, **kwargs) From dc48ba0c750e176f6314504cc0e8370a46ed01a8 Mon Sep 17 00:00:00 2001 From: Bram Wasti Date: Fri, 26 Sep 2025 19:59:09 -0400 Subject: [PATCH 434/518] Kernel-override Determinism [1/n] (#25603) Signed-off-by: Bram Wasti --- csrc/core/batch_invariant.hpp | 16 + csrc/layernorm_kernels.cu | 8 +- csrc/layernorm_quant_kernels.cu | 5 +- csrc/moe/topk_softmax_kernels.cu | 4 +- tests/v1/generation/test_batch_invariance.py | 290 +++++++++ vllm/model_executor/layers/batch_invariant.py | 561 ++++++++++++++++++ vllm/v1/attention/backends/flex_attention.py | 7 + vllm/v1/worker/gpu_model_runner.py | 3 + 8 files changed, 890 insertions(+), 4 deletions(-) create mode 100644 csrc/core/batch_invariant.hpp create mode 100644 tests/v1/generation/test_batch_invariance.py create mode 100644 vllm/model_executor/layers/batch_invariant.py diff --git a/csrc/core/batch_invariant.hpp b/csrc/core/batch_invariant.hpp new file mode 100644 index 000000000000..19e422e4b80c --- /dev/null +++ b/csrc/core/batch_invariant.hpp @@ -0,0 +1,16 @@ +#pragma once +#include +#include +#include + +namespace vllm { + +// vllm_kernel_override_batch_invariant(); returns true +// if env VLLM_KERNEL_OVERRIDE_BATCH_INVARIANT=1 +inline bool vllm_kernel_override_batch_invariant() { + std::string env_key = "VLLM_KERNEL_OVERRIDE_BATCH_INVARIANT"; + const char* val = std::getenv(env_key.c_str()); + return (val && std::atoi(val) != 0) ? 1 : 0; +} + +} // namespace vllm diff --git a/csrc/layernorm_kernels.cu b/csrc/layernorm_kernels.cu index 93c73d58390e..6c3685f6f7cd 100644 --- a/csrc/layernorm_kernels.cu +++ b/csrc/layernorm_kernels.cu @@ -1,6 +1,7 @@ #include "type_convert.cuh" #include "dispatch_utils.h" #include "cub_helpers.h" +#include "core/batch_invariant.hpp" #include #include @@ -413,7 +414,9 @@ void fused_add_rms_norm(torch::Tensor& input, // [..., hidden_size] wt_ptr % req_alignment_bytes == 0; bool offsets_are_multiple_of_vector_width = hidden_size % vector_width == 0 && input_stride % vector_width == 0; - if (ptrs_are_aligned && offsets_are_multiple_of_vector_width) { + bool batch_invariant_launch = vllm::vllm_kernel_override_batch_invariant(); + if (ptrs_are_aligned && offsets_are_multiple_of_vector_width && + !batch_invariant_launch) { LAUNCH_FUSED_ADD_RMS_NORM(8); } else { LAUNCH_FUSED_ADD_RMS_NORM(0); @@ -459,7 +462,8 @@ void poly_norm(torch::Tensor& out, // [..., hidden_size] auto inp_ptr = reinterpret_cast(input.data_ptr()); auto out_ptr = reinterpret_cast(out.data_ptr()); bool ptrs_are_aligned = inp_ptr % 16 == 0 && out_ptr % 16 == 0; - if (ptrs_are_aligned && hidden_size % 8 == 0) { + bool batch_invariant_launch = vllm::vllm_kernel_override_batch_invariant(); + if (ptrs_are_aligned && hidden_size % 8 == 0 && !batch_invariant_launch) { LAUNCH_FUSED_POLY_NORM(8); } else { LAUNCH_FUSED_POLY_NORM(0); diff --git a/csrc/layernorm_quant_kernels.cu b/csrc/layernorm_quant_kernels.cu index be134089bd6d..58c3d9c0981a 100644 --- a/csrc/layernorm_quant_kernels.cu +++ b/csrc/layernorm_quant_kernels.cu @@ -9,6 +9,7 @@ #include "quantization/fp8/common.cuh" #include "dispatch_utils.h" #include "cub_helpers.h" +#include "core/batch_invariant.hpp" #include #include @@ -240,7 +241,9 @@ void fused_add_rms_norm_static_fp8_quant( auto wt_ptr = reinterpret_cast(weight.data_ptr()); bool ptrs_are_aligned = inp_ptr % 16 == 0 && res_ptr % 16 == 0 && wt_ptr % 16 == 0; - if (ptrs_are_aligned && hidden_size % 8 == 0 && input_stride % 8 == 0) { + bool batch_invariant_launch = vllm::vllm_kernel_override_batch_invariant(); + if (ptrs_are_aligned && hidden_size % 8 == 0 && input_stride % 8 == 0 && + !batch_invariant_launch) { LAUNCH_FUSED_ADD_RMS_NORM(8); } else { LAUNCH_FUSED_ADD_RMS_NORM(0); diff --git a/csrc/moe/topk_softmax_kernels.cu b/csrc/moe/topk_softmax_kernels.cu index 53573ada86ba..eca021f1c186 100644 --- a/csrc/moe/topk_softmax_kernels.cu +++ b/csrc/moe/topk_softmax_kernels.cu @@ -21,6 +21,7 @@ #include #include "../cuda_compat.h" #include "../cub_helpers.h" +#include "../core/batch_invariant.hpp" #define MAX(a, b) ((a) > (b) ? (a) : (b)) #define MIN(a, b) ((a) < (b) ? (a) : (b)) @@ -405,7 +406,8 @@ void topkGatingSoftmaxLauncherHelper(const float* input, const bool* finished, f using Constants = detail::TopkConstants; static constexpr int VPT = Constants::VPT; static constexpr int ROWS_PER_WARP = Constants::ROWS_PER_WARP; - const int num_warps = (num_rows + ROWS_PER_WARP - 1) / ROWS_PER_WARP; + const bool batch_invariant_launch = vllm::vllm_kernel_override_batch_invariant(); + const int num_warps = batch_invariant_launch ? 32 : (num_rows + ROWS_PER_WARP - 1) / ROWS_PER_WARP; const int num_blocks = (num_warps + WARPS_PER_TB - 1) / WARPS_PER_TB; dim3 block_dim(WARP_SIZE_PARAM, WARPS_PER_TB); diff --git a/tests/v1/generation/test_batch_invariance.py b/tests/v1/generation/test_batch_invariance.py new file mode 100644 index 000000000000..b864f9a31836 --- /dev/null +++ b/tests/v1/generation/test_batch_invariance.py @@ -0,0 +1,290 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project +import contextlib +import os +import random +import string + +import pytest +import torch + +from vllm import LLM, SamplingParams + + +def _random_prompt(min_words: int = 1024, max_words: int = 1024 * 2) -> str: + # Lightweight random prompt generator to vary prompt lengths and content. + vocab = [ + "alpha", + "bravo", + "charlie", + "delta", + "echo", + "foxtrot", + "golf", + "hotel", + "india", + "juliet", + "kilo", + "lima", + "mike", + "november", + "oscar", + "papa", + "quebec", + "romeo", + "sierra", + "tango", + "uniform", + "victor", + "whiskey", + "xray", + "yankee", + "zulu", + ] + n = random.randint(min_words, max_words) + words = random.choices(vocab, k=n) + + # Add some noise and punctuation variability + if random.random() < 0.5: + words[0] = words[0].capitalize() + if random.random() < 0.2: + words.append("".join(random.choices(string.ascii_lowercase, k=5))) + punct = random.choice([".", "?", "!", "...", ""]) + return " ".join(words) + punct + + +@pytest.mark.timeout(1000) +def test_v1_generation_is_deterministic_across_batch_sizes_with_needle(): + """ + Ensures that the same request (the 'needle' prompt) yields identical output + whether run alone (bs=1) or mixed into a larger batch (e.g., bs=64), + using the high-level v1 LLM() API only (no manual batching). + + Strategy: + - Create two LLM engines with identical config except max_num_seqs: 1 vs N. + - Compute a baseline output for the needle prompt with the bs=1 engine. + - For many trials, generate a batch (size N) where the needle appears at a + random position among random filler prompts using the bs=N engine. + - Track how many trials match vs mismatch, and report totals at the end. + The test fails if any mismatches occur, but we still dump pass/fail + counts. + + Notes: + - Use seeded stochastic sampling with a fixed seed to test determinism. + - Outputs are intentionally longer and sampled at higher temperature/top_p + to produce a more random-sounding phrase, yet remain deterministic by + seed. + - Keep max_tokens and max_model_len bounded for speed and memory use. + """ + random.seed(12345) + + # Allow overrides from environment (useful for CI tuning) + # "facebook/opt-125m" is too small, doesn't reliably test determinism + model = os.getenv("VLLM_TEST_MODEL", "Qwen/Qwen3-1.7B") + num_trials = int(os.getenv("VLLM_NEEDLE_TRIALS", "5")) + batch_size = int(os.getenv("VLLM_NEEDLE_BATCH_SIZE", "64")) + assert batch_size >= 2, "Batch size should be >= 2 to mix needle." + + # Keep GPU memory usage low to avoid startup allocation failures. + gpu_mem_util = float(os.getenv("VLLM_GPU_MEMORY_UTILIZATION", "0.3")) + max_model_len = int(os.getenv("VLLM_MAX_MODEL_LEN", "4096")) + swap_space_gb = int(os.getenv("VLLM_SWAP_SPACE_GB", "4")) + + # Sampling parameters: longer outputs with a more random-sounding + # continuation,but still deterministic due to fixed seed. + temperature = float(os.getenv("VLLM_NEEDLE_TEMPERATURE", "0.0")) + top_p = float(os.getenv("VLLM_NEEDLE_TOP_P", "0.95")) + max_tokens = int(os.getenv("VLLM_NEEDLE_MAX_TOKENS", "128")) + + sampling = SamplingParams( + temperature=temperature, + top_p=top_p, + max_tokens=max_tokens, + seed=20240919, + ) + + needle_prompt = ("There once was a ") + + llm_bs1 = None + llm_bsN = None + try: + # Engine with bs=1 behavior + llm_bs1 = LLM_with_max_seqs( + model=model, + max_num_seqs=1, + gpu_memory_utilization=gpu_mem_util, + max_model_len=max_model_len, + swap_space=swap_space_gb, + ) + + # Baseline generation for the needle prompt alone. + baseline_out = llm_bs1.generate([needle_prompt], sampling) + assert len(baseline_out) == 1 + assert len(baseline_out[0].outputs) >= 1 + baseline_text = baseline_out[0].outputs[0].text + + # Engine with larger batch limit (e.g., 64) + llm_bsN = LLM_with_max_seqs( + model=model, + max_num_seqs=batch_size, + gpu_memory_utilization=gpu_mem_util, + max_model_len=max_model_len, + swap_space=swap_space_gb, + ) + + mismatches = 0 + + for trial in range(num_trials): + # Create a batch of size `batch_size` and insert the needle at + # a random index + prompts: list[str] = [] + needle_pos = random.randint(0, batch_size - 1) + for i in range(batch_size): + if i == needle_pos: + prompts.append(needle_prompt) + else: + prompts.append(_random_prompt()) + + # Generate with the larger-batch engine + outputs = llm_bsN.generate(prompts, sampling) + # Find the needle output by position + needle_output = outputs[needle_pos] + assert needle_output.prompt == needle_prompt + assert len(needle_output.outputs) >= 1 + text = needle_output.outputs[0].text + + if text != baseline_text: + mismatches += 1 + + passes = num_trials - mismatches + # Dump how many passed vs failed + print(f"[determinism] total={num_trials}, passed={passes}, " + f"failed={mismatches}, batch_size={batch_size}") + + if mismatches > 0: + pytest.fail( + f"Nondeterministic outputs detected: {mismatches} failed out " + f"of {num_trials} trials (batch_size={batch_size}).") + + finally: + # Ensure engines are shutdown to free GPU/VRAM across test sessions + if llm_bs1 is not None: + with contextlib.suppress(Exception): + llm_bs1.shutdown() + if llm_bsN is not None: + with contextlib.suppress(Exception): + llm_bsN.shutdown() + + +def _extract_step_logprobs(request_output): + if getattr(request_output, "outputs", None): + inner = request_output.outputs[0] + if hasattr(inner, "logprobs") and inner.logprobs is not None: + t = torch.tensor( + [ + inner.logprobs[i][tid].logprob + for i, tid in enumerate(inner.token_ids) + ], + dtype=torch.float32, + ) + return t + + return None + + +@pytest.mark.skipif( + not torch.cuda.is_available(), + reason="Requires CUDA to match production inference path.", +) +def test_logprobs_bitwise_batch_invariance_bs1_vs_bs2(): + + #model_name = os.getenv("VLLM_TEST_MODEL", "facebook/opt-125m") + model_name = os.getenv("VLLM_TEST_MODEL", "Qwen/Qwen3-1.7B") + tp_size = int(os.getenv("VLLM_TEST_TP_SIZE", "1")) + + # Force float32 to avoid precision-induced differences. + llm = LLM( + model=model_name, + tensor_parallel_size=tp_size, + enforce_eager=True, # helps reduce nondeterminism from some backends + ) + + prompts = [ + "The capital of France is", + "The capital of Germany is", + ] + + sp = SamplingParams( + temperature=0.0, + top_p=1.0, + max_tokens=8, + # Seed shouldn't matter at temperature=0, but keeping it stable anyway. + seed=1234, + logprobs=5, + ) + + # BS=1: run prompts individually and collect logprobs per step. + bs1_logprobs_per_prompt = [] + for p in prompts: + outs = llm.generate([p], sp, use_tqdm=False) + assert len(outs) == 1 + step_logprobs = _extract_step_logprobs(outs[0]) + if step_logprobs is None: + pytest.skip("Logits are not available on RequestOutput; " + "enable logprobs return to run this test.") + bs1_logprobs_per_prompt.append(step_logprobs) + + # BS=2: run prompts in a batch and collect logprobs per step for each + # prompt. + outs_batched = llm.generate(prompts, sp, use_tqdm=False) + assert len(outs_batched) == len(prompts) + bs2_logprobs_per_prompt = [] + for o in outs_batched: + step_logprobs = _extract_step_logprobs(o) + if step_logprobs is None: + pytest.skip("Logits are not available on RequestOutput; " + "enable logprobs return to run this test.") + bs2_logprobs_per_prompt.append(step_logprobs) + + # Compare step-by-step logprobs for each prompt between BS=1 and BS=2 runs. + for i, (logprobs_bs1, logprobs_bs2) in enumerate( + zip(bs1_logprobs_per_prompt, bs2_logprobs_per_prompt)): + assert len(logprobs_bs1) == len(logprobs_bs2), ( + f"Different number of generation steps for prompt index {i}: " + f"{len(logprobs_bs1)} (BS=1) vs {len(logprobs_bs2)} (BS=2)") + for t, (a, b) in enumerate(zip(logprobs_bs1, logprobs_bs2)): + assert a.shape == b.shape, ( + f"Logits shape mismatch at prompt {i}, step {t}: " + f"{a.shape} vs {b.shape}") + # Bitwise exact equality. + assert torch.equal( + a, b), (f"Bitwise logprobs mismatch at prompt {i}, step {t} " + f"(dtype={a.dtype}, shape={a.shape}).") + + +def LLM_with_max_seqs( + model: str, + max_num_seqs: int, + gpu_memory_utilization: float, + max_model_len: int, + swap_space: int, +) -> LLM: + """ + Helper to construct an LLM with a specific max_num_seqs (batch-size limit) + using the high-level v1 LLM API, while constraining memory usage. + """ + return LLM( + model=model, + max_num_seqs=max_num_seqs, + # Constrain GPU memory pool so test can run even on busy GPUs. + gpu_memory_utilization=gpu_memory_utilization, + # Keep KV cache footprint small while allowing longer outputs. + max_model_len=max_model_len, + # Allow some CPU offload if needed. + swap_space=swap_space, + # Keep things lean and CI-friendly. + dtype="float16", + # Single-GPU by default; override externally if desired. + tensor_parallel_size=int(os.getenv("VLLM_TP_SIZE", "1")), + trust_remote_code=os.getenv("VLLM_TRUST_REMOTE_CODE", "0") == "1", + ) diff --git a/vllm/model_executor/layers/batch_invariant.py b/vllm/model_executor/layers/batch_invariant.py new file mode 100644 index 000000000000..ae2c842af698 --- /dev/null +++ b/vllm/model_executor/layers/batch_invariant.py @@ -0,0 +1,561 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project +import contextlib +import os +from collections import namedtuple +from collections.abc import Callable +from typing import Any, Union + +import torch +import triton +import triton.language as tl + + +def _matmul_launch_metadata(grid: Callable[..., Any], kernel: Any, + args: dict[str, Any]) -> dict[str, Any]: + ret = {} + m, n, k = args["M"], args["N"], args["K"] + ret["name"] = f"{kernel.name} [M={m}, N={n}, K={k}]" + if "tiles_per_update" in args: + ret["name"] = (f"{kernel.name} [M={m}, N={n}, K={k}, " + f"tiles_per_update={args['tiles_per_update']:02}]") + if "c_ptr" in args: + bytes_per_elem = args["c_ptr"].element_size() + else: + bytes_per_elem = 1 if args["FP8_OUTPUT"] else 2 + ret[f"flops{bytes_per_elem * 8}"] = 2.0 * m * n * k + ret["bytes"] = bytes_per_elem * (m * k + n * k + m * n) + return ret + + +@triton.jit +def _compute_pid(tile_id, num_pid_in_group, num_pid_m, GROUP_SIZE_M, NUM_SMS): + group_id = tile_id // num_pid_in_group + first_pid_m = group_id * GROUP_SIZE_M + group_size_m = min(num_pid_m - first_pid_m, GROUP_SIZE_M) + pid_m = first_pid_m + (tile_id % group_size_m) + pid_n = (tile_id % num_pid_in_group) // group_size_m + return pid_m, pid_n + + +@triton.jit(launch_metadata=_matmul_launch_metadata) +def matmul_kernel_persistent( + a_ptr, + b_ptr, + c_ptr, # + bias_ptr, + M, + N, + K, # + stride_am, + stride_ak, + stride_bk, + stride_bn, + stride_cm, + stride_cn, + BLOCK_SIZE_M: tl.constexpr, # + BLOCK_SIZE_N: tl.constexpr, # + BLOCK_SIZE_K: tl.constexpr, # + GROUP_SIZE_M: tl.constexpr, # + NUM_SMS: tl.constexpr, # + A_LARGE: tl.constexpr, + B_LARGE: tl.constexpr, + C_LARGE: tl.constexpr, + HAS_BIAS: tl.constexpr, +): + start_pid = tl.program_id(axis=0) + num_pid_m = tl.cdiv(M, BLOCK_SIZE_M) + num_pid_n = tl.cdiv(N, BLOCK_SIZE_N) + k_tiles = tl.cdiv(K, BLOCK_SIZE_K) + num_tiles = num_pid_m * num_pid_n + + tile_id_c = start_pid - NUM_SMS + + offs_k_for_mask = tl.arange(0, BLOCK_SIZE_K) + num_pid_in_group = GROUP_SIZE_M * num_pid_n + + for tile_id in tl.range(start_pid, num_tiles, NUM_SMS, flatten=True): + pid_m, pid_n = _compute_pid(tile_id, num_pid_in_group, num_pid_m, + GROUP_SIZE_M, NUM_SMS) + start_m = pid_m * BLOCK_SIZE_M + start_n = pid_n * BLOCK_SIZE_N + offs_am = start_m + tl.arange(0, BLOCK_SIZE_M) + offs_bn = start_n + tl.arange(0, BLOCK_SIZE_N) + if A_LARGE: + offs_am = offs_am.to(tl.int64) + if B_LARGE: + offs_bn = offs_bn.to(tl.int64) + offs_am = tl.where(offs_am < M, offs_am, 0) + offs_bn = tl.where(offs_bn < N, offs_bn, 0) + offs_am = tl.max_contiguous(tl.multiple_of(offs_am, BLOCK_SIZE_M), + BLOCK_SIZE_M) + offs_bn = tl.max_contiguous(tl.multiple_of(offs_bn, BLOCK_SIZE_N), + BLOCK_SIZE_N) + + accumulator = tl.zeros((BLOCK_SIZE_M, BLOCK_SIZE_N), dtype=tl.float32) + for ki in range(k_tiles): + if A_LARGE or B_LARGE: + offs_k = ki * BLOCK_SIZE_K + tl.arange(0, BLOCK_SIZE_K).to( + tl.int64) + else: + offs_k = ki * BLOCK_SIZE_K + tl.arange(0, BLOCK_SIZE_K) + a_ptrs = a_ptr + (offs_am[:, None] * stride_am + + offs_k[None, :] * stride_ak) + b_ptrs = b_ptr + (offs_k[:, None] * stride_bk + + offs_bn[None, :] * stride_bn) + + a = tl.load(a_ptrs, + mask=offs_k_for_mask[None, :] < K - ki * BLOCK_SIZE_K, + other=0.0) + b = tl.load(b_ptrs, + mask=offs_k_for_mask[:, None] < K - ki * BLOCK_SIZE_K, + other=0.0) + accumulator = tl.dot(a, b, accumulator) + + tile_id_c += NUM_SMS + pid_m, pid_n = _compute_pid(tile_id_c, num_pid_in_group, num_pid_m, + GROUP_SIZE_M, NUM_SMS) + offs_cm = pid_m * BLOCK_SIZE_M + tl.arange(0, BLOCK_SIZE_M) + offs_cn = pid_n * BLOCK_SIZE_N + tl.arange(0, BLOCK_SIZE_N) + if C_LARGE: + offs_cm = offs_cm.to(tl.int64) + offs_cn = offs_cn.to(tl.int64) + c_ptrs = c_ptr + stride_cm * offs_cm[:, None] + stride_cn * offs_cn[ + None, :] + c_mask = (offs_cm[:, None] < M) & (offs_cn[None, :] < N) + if HAS_BIAS: + bias_ptrs = bias_ptr + offs_cn + bias = tl.load(bias_ptrs, mask=offs_cn < N, + other=0.0).to(tl.float32) + accumulator += bias + if c_ptr.dtype.element_ty == tl.float8e4nv: + c = accumulator.to(tl.float8e4nv) + else: + c = accumulator.to(tl.float16) + tl.store(c_ptrs, c, mask=c_mask) + + +def matmul_persistent(a: torch.Tensor, + b: torch.Tensor, + bias: Union[torch.Tensor, None] = None): + # Check constraints. + assert a.shape[1] == b.shape[0], "Incompatible dimensions" + assert a.dtype == b.dtype, "Incompatible dtypes" + assert bias is None or bias.dim() == 1, ( + "Currently assuming bias is 1D, let Horace know if you run into this") + NUM_SMS = torch.cuda.get_device_properties("cuda").multi_processor_count + M, K = a.shape + K, N = b.shape + dtype = a.dtype + # Allocates output. + c = torch.empty((M, N), device=a.device, dtype=dtype) + + # 1D launch kernel where each block gets its own program. + def grid(META): + return (min( + NUM_SMS, + triton.cdiv(M, META["BLOCK_SIZE_M"]) * + triton.cdiv(N, META["BLOCK_SIZE_N"])), ) + + configs = { + torch.bfloat16: { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 8, + "num_stages": 3, + "num_warps": 8, + }, + torch.float16: { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 8, + "num_stages": 3, + "num_warps": 8, + }, + torch.float32: { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 32, + "GROUP_SIZE_M": 8, + "num_stages": 3, + "num_warps": 8, + }, + } + # print(a.device, b.device, c.device) + matmul_kernel_persistent[grid]( + a, + b, + c, # + bias, + M, + N, + K, # + a.stride(0), + a.stride(1), # + b.stride(0), + b.stride(1), # + c.stride(0), + c.stride(1), # + NUM_SMS=NUM_SMS, # + A_LARGE=a.numel() > 2**31, + B_LARGE=b.numel() > 2**31, + C_LARGE=c.numel() > 2**31, + HAS_BIAS=bias is not None, + **configs[dtype], + ) + return c + + +@triton.jit +def _log_softmax_kernel( + input_ptr, + output_ptr, + input_row_stride, + output_row_stride, + n_cols, + BLOCK_SIZE: tl.constexpr, +): + """ + Compute log_softmax along the last dimension of a 2D tensor. + Each block handles one row of the input tensor. + """ + # Get the row index for this block + row_idx = tl.program_id(0).to(tl.int64) + + # Compute base pointers for input and output rows + row_start_ptr = input_ptr + row_idx * input_row_stride + output_row_start_ptr = output_ptr + row_idx * output_row_stride + + # Step 1: Find maximum value in the row for numerical stability + max_val = -float("inf") + for col_offset in range(0, n_cols, BLOCK_SIZE): + col_idx = col_offset + tl.arange(0, BLOCK_SIZE) + mask = col_idx < n_cols + + # Load values + vals = tl.load(row_start_ptr + col_idx, mask=mask, other=-float("inf")) + + # Update maximum + max_val = tl.max(tl.maximum(vals, max_val)) + + # Step 2: Compute sum of exp(x - max_val) + sum_exp = 0.0 + for col_offset in range(0, n_cols, BLOCK_SIZE): + col_idx = col_offset + tl.arange(0, BLOCK_SIZE) + mask = col_idx < n_cols + + # Load values + vals = tl.load(row_start_ptr + col_idx, mask=mask, other=0.0) + + # Compute exp(x - max_val) and accumulate + exp_vals = tl.exp(vals - max_val) + sum_exp += tl.sum(tl.where(mask, exp_vals, 0.0)) + + # Compute log(sum_exp) + log_sum_exp = tl.log(sum_exp) + + # Step 3: Compute final log_softmax values: x - max_val - log_sum_exp + for col_offset in range(0, n_cols, BLOCK_SIZE): + col_idx = col_offset + tl.arange(0, BLOCK_SIZE) + mask = col_idx < n_cols + + # Load values + vals = tl.load(row_start_ptr + col_idx, mask=mask) + + # Compute log_softmax + output = vals - max_val - log_sum_exp + + # Store results + tl.store(output_row_start_ptr + col_idx, output, mask=mask) + + +def log_softmax(input: torch.Tensor, dim: int = -1) -> torch.Tensor: + """ + Compute log_softmax using Triton kernel. + + Args: + input: Input tensor + dim: Dimension along which to compute log_softmax + (only -1 or last dim supported) + >> Stashed changes + Returns: + Tensor with log_softmax applied along the specified dimension + """ + if dim != -1 and dim != input.ndim - 1: + raise ValueError("This implementation only supports log_softmax along " + "the last dimension") + + # Flatten all dimensions except the last one + original_shape = input.shape + input_2d = input.reshape(-1, input.shape[-1]) + input_2d = input_2d.contiguous() + + n_rows, n_cols = input_2d.shape + + # Allocate output tensor + output = torch.empty_like(input_2d) + + # Choose block size based on the number of columns + BLOCK_SIZE = 1024 + + # Launch kernel with one block per row + grid = (n_rows, ) + _log_softmax_kernel[grid]( + input_2d, + output, + input_2d.stride(0), + output.stride(0), + n_cols, + BLOCK_SIZE=BLOCK_SIZE, + ) + # Reshape output back to original shape + return output.reshape(original_shape) + + +@triton.jit +def mean_kernel( + input_ptr, + output_ptr, + input_stride0, + input_stride1, + input_stride2, + output_stride0, + output_stride1, + M, # size before reduction dim + N, # size of reduction dim + K, # size after reduction dim + BLOCK_SIZE: tl.constexpr, +): + """ + Kernel for computing mean along a single dimension. + Input is viewed as (M, N, K) where N is the dimension being reduced. + """ + # Program ID gives us which output element we're computing + pid = tl.program_id(0) + + # Compute output indices + m_idx = pid // K + k_idx = pid % K + + # Bounds check + if m_idx >= M or k_idx >= K: + return + + # Accumulate sum across reduction dimension + acc = 0.0 + for n_start in range(0, N, BLOCK_SIZE): + n_offsets = n_start + tl.arange(0, BLOCK_SIZE) + mask = n_offsets < N + + # Calculate input indices + input_idx = m_idx * input_stride0 + n_offsets * input_stride1 \ + + k_idx * input_stride2 + + # Load and accumulate + vals = tl.load(input_ptr + input_idx, mask=mask, other=0.0) + acc += tl.sum(vals) + + # Compute mean and store + mean_val = acc / N + output_idx = m_idx * output_stride0 + k_idx * output_stride1 + tl.store(output_ptr + output_idx, mean_val) + + +def mean_dim(input: torch.Tensor, + dim: int, + keepdim: bool = False, + dtype: Union[torch.dtype, None] = None) -> torch.Tensor: + """ + Triton implementation of torch.mean with single dimension reduction. + + Args: + input: Input tensor + dim: Single dimension along which to compute mean + keepdim: Whether to keep the reduced dimension + dtype: Output dtype. If None, uses input dtype + (or float32 for integer inputs) + + Returns: + Tensor with mean values along specified dimension + """ + # Validate inputs + assert input.is_cuda, "Input must be a CUDA tensor" + assert -input.ndim <= dim < input.ndim, ( + f"Invalid dimension {dim} for tensor with {input.ndim} dimensions") + + # Handle negative dim + if dim < 0: + dim = dim + input.ndim + + # Handle dtype + if dtype is None: + if input.dtype in [torch.int8, torch.int16, torch.int32, torch.int64]: + dtype = torch.float32 + else: + dtype = input.dtype + + # Convert input to appropriate dtype if needed + if input.dtype != dtype: + input = input.to(dtype) + + # Get input shape and strides + shape = list(input.shape) + + # Calculate dimensions for kernel + M = 1 + for i in range(dim): + M *= shape[i] + + N = shape[dim] + + K = 1 + for i in range(dim + 1, len(shape)): + K *= shape[i] + + # Reshape input to 3D view (M, N, K) + input_3d = input.reshape(M, N, K) + + # Create output shape + if keepdim: + output_shape = shape.copy() + output_shape[dim] = 1 + else: + output_shape = shape[:dim] + shape[dim + 1:] + + # Create output tensor + output = torch.empty(output_shape, dtype=dtype, device=input.device) + + # Reshape output for kernel + if keepdim: + output_2d = output.reshape(M, 1, K).squeeze(1) + else: + output_2d = output.reshape(M, K) + + # Launch kernel + grid = (M * K, ) + BLOCK_SIZE = 1024 + + mean_kernel[grid]( + input_3d, + output_2d, + input_3d.stride(0), + input_3d.stride(1), + input_3d.stride(2), + output_2d.stride(0), + output_2d.stride(1) if output_2d.ndim > 1 else 0, + M, + N, + K, + BLOCK_SIZE, + ) + + return output + + +def mm_batch_invariant(a, b): + return matmul_persistent(a, b) + + +def addmm_batch_invariant(bias, a, b): + return matmul_persistent(a, b, bias=bias) + + +def _log_softmax_batch_invariant(input, dim, _half_to_float): + assert not _half_to_float, "not implemented" + return log_softmax(input, dim=dim) + + +def mean_batch_invariant(input, + dim, + keepdim=False, + dtype: Union[torch.dtype, None] = None): + assert dtype is None or dtype == torch.float32, \ + f"unsupported dtype: {dtype}" + + result = input.to(torch.float32) + + # Sort dimensions to reduce from largest to smallest to handle shifting dims + # during iterative reduction. + sorted_dims = sorted([d % input.ndim for d in dim], reverse=True) + + # Iteratively apply a deterministic mean. + for d in sorted_dims: + result = mean_dim(result, dim=d, keepdim=True) + + if not keepdim: + # Squeeze the reduced dimensions. + for d in sorted_dims: + result = result.squeeze(d) + + return result + + +_batch_invariant_MODE = False +_batch_invariant_LIB = None + + +def is_batch_invariant_mode_enabled(): + return _batch_invariant_MODE + + +def enable_batch_invariant_mode(): + global _batch_invariant_MODE, _batch_invariant_LIB + if _batch_invariant_MODE: + return + + _batch_invariant_MODE = True + _batch_invariant_LIB = torch.library.Library("aten", "IMPL") + _batch_invariant_LIB.impl("aten::mm", mm_batch_invariant, "CUDA") + _batch_invariant_LIB.impl("aten::addmm", addmm_batch_invariant, "CUDA") + _batch_invariant_LIB.impl("aten::_log_softmax", + _log_softmax_batch_invariant, "CUDA") + _batch_invariant_LIB.impl("aten::mean.dim", mean_batch_invariant, "CUDA") + + +def disable_batch_invariant_mode(): + global _batch_invariant_MODE, _batch_invariant_LIB + if _batch_invariant_LIB is not None: + _batch_invariant_LIB._destroy() + _batch_invariant_MODE = False + _batch_invariant_LIB = None + + +@contextlib.contextmanager +def set_batch_invariant_mode(enabled: bool = True): + global _batch_invariant_MODE, _batch_invariant_LIB + old_data = (_batch_invariant_MODE, _batch_invariant_LIB) + if enabled: + enable_batch_invariant_mode() + else: + disable_batch_invariant_mode() + yield + if _batch_invariant_LIB is not None: + _batch_invariant_LIB._destroy() + _batch_invariant_MODE, _batch_invariant_LIB = old_data + + +AttentionBlockSize = namedtuple("AttentionBlockSize", ["block_m", "block_n"]) + + +def get_batch_invariant_attention_block_size() -> AttentionBlockSize: + return AttentionBlockSize(block_m=16, block_n=16) + + +def vllm_kernel_override_batch_invariant(): + env_key = "VLLM_KERNEL_OVERRIDE_BATCH_INVARIANT" + is_overridden = False + val = os.getenv(env_key, "0") + try: + is_overridden = int(val) != 0 + except ValueError: + is_overridden = False + return is_overridden + + +def init_batch_invariance(): + # this will hit all the csrc overrides as well + if vllm_kernel_override_batch_invariant(): + os.environ["VLLM_ATTENTION_BACKEND"] = "FLEX_ATTENTION" + enable_batch_invariant_mode() diff --git a/vllm/v1/attention/backends/flex_attention.py b/vllm/v1/attention/backends/flex_attention.py index c3358bfa74e9..807b8d987a2d 100644 --- a/vllm/v1/attention/backends/flex_attention.py +++ b/vllm/v1/attention/backends/flex_attention.py @@ -18,6 +18,8 @@ is_quantized_kv_cache) from vllm.config import VllmConfig from vllm.logger import init_logger +from vllm.model_executor.layers.batch_invariant import ( + vllm_kernel_override_batch_invariant) from vllm.utils import cdiv, is_torch_equal_or_newer from vllm.v1.attention.backends.utils import (AttentionMetadataBuilder, CommonAttentionMetadata) @@ -839,6 +841,11 @@ def get_kernel_options(query, block_m, block_n, kernel_options: dict[str, Union[int, bool]] = { "FORCE_USE_FLEX_ATTENTION": True, } + if vllm_kernel_override_batch_invariant(): + kernel_options["BLOCK_M"] = 16 + kernel_options["BLOCK_N"] = 16 + kernel_options["IS_DIVISIBLE"] = False + return kernel_options if use_direct_build: kernel_options["BLOCK_M"] = block_m kernel_options["BLOCK_N"] = block_n diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index 4fd4f9128c6e..f87a327d02a5 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -192,6 +192,9 @@ def __init__( from vllm.model_executor.models.utils import set_cpu_offload_max_bytes set_cpu_offload_max_bytes( int(self.cache_config.cpu_offload_gb * 1024**3)) + from vllm.model_executor.layers.batch_invariant import ( + init_batch_invariance) + init_batch_invariance() model_config = self.model_config cache_config = self.cache_config From 4e33a7ea85cb702090c07fb7a8ebdbf44c472f5c Mon Sep 17 00:00:00 2001 From: Naman Lalit Date: Fri, 26 Sep 2025 17:07:36 -0700 Subject: [PATCH 435/518] [Bugfix] Optimize CpuGpuBuffer initialization (#25447) Signed-off-by: Naman Lalit --- vllm/v1/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/v1/utils.py b/vllm/v1/utils.py index ec4417290f61..ee0c1168f3cd 100644 --- a/vllm/v1/utils.py +++ b/vllm/v1/utils.py @@ -117,7 +117,7 @@ def __init__( dtype=dtype, device="cpu", pin_memory=pin_memory) - self.gpu = self.cpu.to(device) + self.gpu = torch.zeros_like(self.cpu, device=device) self.np: np.ndarray # To keep type hints simple (avoiding generics and subclasses), we # only conditionally create the numpy array attribute. This can cause From 6f5c0931c1f618b2ca8668d114fec2d26cecfd8c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jonas=20M=2E=20K=C3=BCbler?= <44084297+jmkuebler@users.noreply.github.com> Date: Sat, 27 Sep 2025 02:10:21 +0200 Subject: [PATCH 436/518] [Spec decode] automatically disable mm for text-only draft models (#25667) Signed-off-by: Jonas Kuebler --- tests/v1/e2e/test_spec_decode.py | 136 ++++++++++++++++--------------- vllm/v1/spec_decode/eagle.py | 14 ++++ 2 files changed, 83 insertions(+), 67 deletions(-) diff --git a/tests/v1/e2e/test_spec_decode.py b/tests/v1/e2e/test_spec_decode.py index c4efd7548b81..ea8d94722859 100644 --- a/tests/v1/e2e/test_spec_decode.py +++ b/tests/v1/e2e/test_spec_decode.py @@ -8,7 +8,7 @@ import pytest import torch -from tests.utils import get_attn_backend_list_based_on_platform +from tests.utils import get_attn_backend_list_based_on_platform, large_gpu_mark from vllm import LLM, SamplingParams from vllm.assets.base import VLLM_S3_BUCKET_URL from vllm.assets.image import VLM_IMAGES_DIR @@ -88,69 +88,66 @@ def test_ngram_correctness( Compare the outputs of an original LLM and a speculative LLM should be the same when using ngram speculative decoding. ''' - with monkeypatch.context() as m: - m.setenv("VLLM_USE_V1", "1") - test_prompts = get_test_prompts(mm_enabled=False) - - ref_llm = LLM(model=model_name, max_model_len=1024) - ref_outputs = ref_llm.chat(test_prompts, sampling_config) - del ref_llm - torch.cuda.empty_cache() - cleanup_dist_env_and_memory() - - spec_llm = LLM( - model=model_name, - speculative_config={ - "method": "ngram", - "prompt_lookup_max": 5, - "prompt_lookup_min": 3, - "num_speculative_tokens": 3, - }, - max_model_len=1024, - ) - spec_outputs = spec_llm.chat(test_prompts, sampling_config) - matches = 0 - misses = 0 - for ref_output, spec_output in zip(ref_outputs, spec_outputs): - if ref_output.outputs[0].text == spec_output.outputs[0].text: - matches += 1 - else: - misses += 1 - print(f"ref_output: {ref_output.outputs[0].text}") - print(f"spec_output: {spec_output.outputs[0].text}") - - # Heuristic: expect at least 66% of the prompts to match exactly - # Upon failure, inspect the outputs to check for inaccuracy. - assert matches >= int(0.66 * len(ref_outputs)) - del spec_llm - torch.cuda.empty_cache() - cleanup_dist_env_and_memory() - - -@pytest.mark.parametrize(["model_setup", "mm_enabled"], [ - (("eagle3", "Qwen/Qwen3-8B", "AngelSlim/Qwen3-8B_eagle3", 1), False), - (("eagle", "meta-llama/Llama-3.1-8B-Instruct", - "yuhuili/EAGLE-LLaMA3.1-Instruct-8B", 1), False), - (("eagle3", "meta-llama/Llama-3.1-8B-Instruct", - "yuhuili/EAGLE3-LLaMA3.1-Instruct-8B", 1), False), - pytest.param( - ("eagle", "meta-llama/Llama-4-Scout-17B-16E-Instruct", - "morgendave/EAGLE-Llama-4-Scout-17B-16E-Instruct", 4), - False, - marks=pytest.mark.skip(reason="Skipping due to CI OOM issues")), - pytest.param( - ("eagle", "meta-llama/Llama-4-Scout-17B-16E-Instruct", - "morgendave/EAGLE-Llama-4-Scout-17B-16E-Instruct", 4), - True, - marks=pytest.mark.skip(reason="Skipping due to CI OOM issues")), - (("eagle", "eagle618/deepseek-v3-random", - "eagle618/eagle-deepseek-v3-random", 1), False), -], - ids=[ - "qwen3_eagle3", "llama3_eagle", "llama3_eagle3", - "llama4_eagle", "llama4_eagle_mm", - "deepseek_eagle" - ]) + test_prompts = get_test_prompts(mm_enabled=False) + + ref_llm = LLM(model=model_name, max_model_len=1024) + ref_outputs = ref_llm.chat(test_prompts, sampling_config) + del ref_llm + torch.cuda.empty_cache() + cleanup_dist_env_and_memory() + + spec_llm = LLM( + model=model_name, + speculative_config={ + "method": "ngram", + "prompt_lookup_max": 5, + "prompt_lookup_min": 3, + "num_speculative_tokens": 3, + }, + max_model_len=1024, + ) + spec_outputs = spec_llm.chat(test_prompts, sampling_config) + matches = 0 + misses = 0 + for ref_output, spec_output in zip(ref_outputs, spec_outputs): + if ref_output.outputs[0].text == spec_output.outputs[0].text: + matches += 1 + else: + misses += 1 + print(f"ref_output: {ref_output.outputs[0].text}") + print(f"spec_output: {spec_output.outputs[0].text}") + + # Heuristic: expect at least 66% of the prompts to match exactly + # Upon failure, inspect the outputs to check for inaccuracy. + assert matches >= int(0.66 * len(ref_outputs)) + del spec_llm + torch.cuda.empty_cache() + cleanup_dist_env_and_memory() + + +@pytest.mark.parametrize( + ["model_setup", "mm_enabled"], + [ + (("eagle3", "Qwen/Qwen3-8B", "AngelSlim/Qwen3-8B_eagle3", 1), False), + (("eagle", "meta-llama/Llama-3.1-8B-Instruct", + "yuhuili/EAGLE-LLaMA3.1-Instruct-8B", 1), False), + (("eagle3", "meta-llama/Llama-3.1-8B-Instruct", + "yuhuili/EAGLE3-LLaMA3.1-Instruct-8B", 1), False), + pytest.param(("eagle", "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "morgendave/EAGLE-Llama-4-Scout-17B-16E-Instruct", 4), + False, + marks=large_gpu_mark(min_gb=80)), # works on 4x H100 + pytest.param(("eagle", "meta-llama/Llama-4-Scout-17B-16E-Instruct", + "morgendave/EAGLE-Llama-4-Scout-17B-16E-Instruct", 4), + True, + marks=large_gpu_mark(min_gb=80)), # works on 4x H100 + (("eagle", "eagle618/deepseek-v3-random", + "eagle618/eagle-deepseek-v3-random", 1), False), + ], + ids=[ + "qwen3_eagle3", "llama3_eagle", "llama3_eagle3", "llama4_eagle", + "llama4_eagle_mm", "deepseek_eagle" + ]) @pytest.mark.parametrize("attn_backend", get_attn_backend_list_based_on_platform()) def test_eagle_correctness( @@ -174,9 +171,14 @@ def test_eagle_correctness( model_setup: (method, model_name, eagle_model_name, tp_size) ''' with monkeypatch.context() as m: - m.setenv("VLLM_USE_V1", "1") - m.setenv("VLLM_MLA_DISABLE", "1") - m.setenv("VLLM_ATTENTION_BACKEND", attn_backend) + if "Llama-4-Scout" in model_setup[1] and attn_backend == "FLASH_ATTN": + # Scout requires default backend selection + # because vision encoder has head_dim 88 being incompatible + # with FLASH_ATTN and needs to fall back to Flex Attn + pass + else: + m.setenv("VLLM_MLA_DISABLE", "1") + m.setenv("VLLM_ATTENTION_BACKEND", attn_backend) if (attn_backend == "TRITON_ATTN" and not current_platform.is_rocm()): pytest.skip("TRITON_ATTN does not support " diff --git a/vllm/v1/spec_decode/eagle.py b/vllm/v1/spec_decode/eagle.py index 57da8346f497..394df48b4153 100644 --- a/vllm/v1/spec_decode/eagle.py +++ b/vllm/v1/spec_decode/eagle.py @@ -804,6 +804,20 @@ def load_model(self, target_model: nn.Module) -> None: self.attn_layer_names = list(draft_attn_layer_names) + if self.is_multimodal_model: + # Even if the target model is multimodal, we can also use + # text-only draft models + try: + dummy_input_ids = torch.tensor([[1]], + device=self.input_ids.device) + self.model.get_input_embeddings(dummy_input_ids, + multimodal_embeddings=None) + except (NotImplementedError, AttributeError, TypeError): + logger.warning( + "Draft model does not support multimodal inputs, " + "falling back to text-only mode") + self.is_multimodal_model = False + if supports_multimodal(target_model): # handle multimodality self.model.config.image_token_index = ( From 8bf8f4582208ac7af230512ff5f3ac1dc36d5222 Mon Sep 17 00:00:00 2001 From: Zhuohan Li Date: Fri, 26 Sep 2025 17:16:40 -0700 Subject: [PATCH 437/518] [Core] Don't count preempted tokens in prefix cache hit rate (#25787) Signed-off-by: Zhuohan Li --- vllm/v1/core/kv_cache_manager.py | 24 ++++++++---- vllm/v1/core/sched/scheduler.py | 66 ++++++++++++++++---------------- vllm/v1/metrics/stats.py | 8 +++- vllm/v1/request.py | 3 ++ 4 files changed, 60 insertions(+), 41 deletions(-) diff --git a/vllm/v1/core/kv_cache_manager.py b/vllm/v1/core/kv_cache_manager.py index 401327f727a4..0af98e7ba2d8 100644 --- a/vllm/v1/core/kv_cache_manager.py +++ b/vllm/v1/core/kv_cache_manager.py @@ -27,8 +27,8 @@ class KVCacheBlocks: `blocks[i][j]` refers to the i-th kv_cache_group and the j-th block of tokens.We don't use block of tokens as the outer dimension because it assumes all - kv_cache_groups have the same number of blocks, which is true for now but - will be broken if we want to give different block_size to different + kv_cache_groups have the same number of blocks, which is true for now but + will be broken if we want to give different block_size to different kv_cache_groups in the future. """ @@ -184,9 +184,17 @@ def get_computed_blocks(self, if self.log_stats: assert self.prefix_cache_stats is not None - self.prefix_cache_stats.requests += 1 - self.prefix_cache_stats.queries += request.num_tokens - self.prefix_cache_stats.hits += num_new_computed_tokens + if request.num_preemptions > 0: + # Previously preempted request + self.prefix_cache_stats.preempted_requests += 1 + self.prefix_cache_stats.preempted_queries += request.num_tokens + self.prefix_cache_stats.preempted_hits += ( + num_new_computed_tokens) + else: + # New request + self.prefix_cache_stats.requests += 1 + self.prefix_cache_stats.queries += request.num_tokens + self.prefix_cache_stats.hits += num_new_computed_tokens return KVCacheBlocks(computed_blocks), num_new_computed_tokens @@ -209,10 +217,10 @@ def allocate_slots( already been computed locally (i.e. new_computed_blocks). num_new_computed_tokens: The number of new computed tokens just hitting the prefix caching, excluding external tokens. - new_computed_blocks: The cached blocks for the above new computed + new_computed_blocks: The cached blocks for the above new computed tokens. num_lookahead_tokens: The number of speculative tokens to allocate. - This is used by spec decode proposers with kv-cache such + This is used by spec decode proposers with kv-cache such as eagle. delay_cache_blocks: Whether to skip caching the blocks. This is used by P/D when allocating blocks used in a KV transfer @@ -365,7 +373,7 @@ def get_num_common_prefix_blocks( requests in the current step. Returns: - list[int]: The number of common prefix blocks for each kv cache + list[int]: The number of common prefix blocks for each kv cache group. """ assert request.status == RequestStatus.RUNNING diff --git a/vllm/v1/core/sched/scheduler.py b/vllm/v1/core/sched/scheduler.py index 7fc4776b0261..10d8f6bbda5c 100644 --- a/vllm/v1/core/sched/scheduler.py +++ b/vllm/v1/core/sched/scheduler.py @@ -251,46 +251,48 @@ def schedule(self) -> SchedulerOutput: req_index += 1 continue + # Schedule newly needed KV blocks for the request. while True: new_blocks = self.kv_cache_manager.allocate_slots( request, num_new_tokens, num_lookahead_tokens=self.num_lookahead_tokens) - if new_blocks is None: - # The request cannot be scheduled. - # Preempt the lowest-priority request. - if self.policy == SchedulingPolicy.PRIORITY: - preempted_req = max( - self.running, - key=lambda r: (r.priority, r.arrival_time), - ) - self.running.remove(preempted_req) - if preempted_req in scheduled_running_reqs: - scheduled_running_reqs.remove(preempted_req) - else: - preempted_req = self.running.pop() - - self.kv_cache_manager.free(preempted_req) - self.encoder_cache_manager.free(preempted_req) - preempted_req.status = RequestStatus.PREEMPTED - preempted_req.num_computed_tokens = 0 - if self.log_stats: - preempted_req.record_event( - EngineCoreEventType.PREEMPTED, scheduled_timestamp) - - self.waiting.prepend_request(preempted_req) - preempted_reqs.append(preempted_req) - if preempted_req == request: - # No more request to preempt. - can_schedule = False - break - else: + + if new_blocks is not None: # The request can be scheduled. - can_schedule = True break - if not can_schedule: + + # The request cannot be scheduled. + # Preempt the lowest-priority request. + if self.policy == SchedulingPolicy.PRIORITY: + preempted_req = max( + self.running, + key=lambda r: (r.priority, r.arrival_time), + ) + self.running.remove(preempted_req) + if preempted_req in scheduled_running_reqs: + scheduled_running_reqs.remove(preempted_req) + else: + preempted_req = self.running.pop() + + self.kv_cache_manager.free(preempted_req) + self.encoder_cache_manager.free(preempted_req) + preempted_req.status = RequestStatus.PREEMPTED + preempted_req.num_computed_tokens = 0 + preempted_req.num_preemptions += 1 + if self.log_stats: + preempted_req.record_event(EngineCoreEventType.PREEMPTED, + scheduled_timestamp) + + self.waiting.prepend_request(preempted_req) + preempted_reqs.append(preempted_req) + if preempted_req == request: + # No more request to preempt. Cannot schedule this request. + break + + if new_blocks is None: + # Cannot schedule this request. break - assert new_blocks is not None # Schedule the request. scheduled_running_reqs.append(request) diff --git a/vllm/v1/metrics/stats.py b/vllm/v1/metrics/stats.py index 296c39e8cdb5..a0d571318ba0 100644 --- a/vllm/v1/metrics/stats.py +++ b/vllm/v1/metrics/stats.py @@ -17,13 +17,19 @@ class PrefixCacheStats: """Stores prefix cache hit statistics.""" # Whether reset_prefix_cache was invoked. reset: bool = False - # The number of requests in this update. + # The number of new requests in this update. requests: int = 0 # The number of queries in these requests. Note that "queries" here # means the number of tokens that were queried from the cache. queries: int = 0 # The number of hits in these requests. hits: int = 0 + # The number of previously preempted requests in this update. + preempted_requests: int = 0 + # The `queries` number for preempted requests. + preempted_queries: int = 0 + # The `hits` number for preempted requests. + preempted_hits: int = 0 @dataclass diff --git a/vllm/v1/request.py b/vllm/v1/request.py index ff10fa00c1cf..dd0aea645d74 100644 --- a/vllm/v1/request.py +++ b/vllm/v1/request.py @@ -115,6 +115,9 @@ def __init__( # indicates that the output is corrupted self.num_nans_in_logits = 0 + # The number of requests being preempted by the scheduler + self.num_preemptions = 0 + self.block_hashes: list[BlockHash] = [] self.get_hash_new_full_blocks: Optional[Callable[ [], list[BlockHash]]] = None From 3958b96bf5f771560053b752424b1e7caba04a61 Mon Sep 17 00:00:00 2001 From: Russell Bryant Date: Fri, 26 Sep 2025 21:23:52 -0400 Subject: [PATCH 438/518] Add option to restrict media domains (#25783) Signed-off-by: Chenheli Hua Signed-off-by: Russell Bryant Co-authored-by: Chenheli Hua --- docs/features/multimodal_inputs.md | 4 +++ docs/usage/security.md | 6 ++++ .../entrypoints/openai/test_lora_resolvers.py | 1 + tests/entrypoints/openai/test_serving_chat.py | 1 + tests/multimodal/test_utils.py | 33 ++++++++++++++++++- vllm/config/model.py | 3 ++ vllm/config/speculative.py | 2 ++ vllm/engine/arg_utils.py | 5 +++ vllm/entrypoints/chat_utils.py | 6 ++++ vllm/entrypoints/llm.py | 4 +++ vllm/multimodal/utils.py | 16 +++++++++ 11 files changed, 80 insertions(+), 1 deletion(-) diff --git a/docs/features/multimodal_inputs.md b/docs/features/multimodal_inputs.md index 7fb033723500..bcc48e756046 100644 --- a/docs/features/multimodal_inputs.md +++ b/docs/features/multimodal_inputs.md @@ -6,6 +6,10 @@ This page teaches you how to pass multi-modal inputs to [multi-modal models][sup We are actively iterating on multi-modal support. See [this RFC](gh-issue:4194) for upcoming changes, and [open an issue on GitHub](https://github.com/vllm-project/vllm/issues/new/choose) if you have any feedback or feature requests. +!!! tip + When serving multi-modal models, consider setting `--allowed-media-domains` to restrict domain that vLLM can access to prevent it from accessing arbitrary endpoints that can potentially be vulnerable to Server-Side Request Forgery (SSRF) attacks. You can provide a list of domains for this arg. For example: `--allowed-media-domains upload.wikimedia.org github.com www.bogotobogo.com` + This restriction is especially important if you run vLLM in a containerized environment where the vLLM pods may have unrestricted access to internal networks. + ## Offline Inference To input multi-modal data, follow this schema in [vllm.inputs.PromptType][]: diff --git a/docs/usage/security.md b/docs/usage/security.md index d54e2bb37ec0..5d85e889c80c 100644 --- a/docs/usage/security.md +++ b/docs/usage/security.md @@ -60,6 +60,12 @@ Key points from the PyTorch security guide: - Implement proper authentication and authorization for management interfaces - Follow the principle of least privilege for all system components +### 4. **Restrict Domains Access for Media URLs:** + +Restrict domains that vLLM can access for media URLs by setting +`--allowed-media-domains` to prevent Server-Side Request Forgery (SSRF) attacks. +(e.g. `--allowed-media-domains upload.wikimedia.org github.com www.bogotobogo.com`) + ## Security and Firewalls: Protecting Exposed vLLM Systems While vLLM is designed to allow unsafe network services to be isolated to diff --git a/tests/entrypoints/openai/test_lora_resolvers.py b/tests/entrypoints/openai/test_lora_resolvers.py index 9d5ee84a1956..0561158dcf65 100644 --- a/tests/entrypoints/openai/test_lora_resolvers.py +++ b/tests/entrypoints/openai/test_lora_resolvers.py @@ -45,6 +45,7 @@ class MockModelConfig: logits_processor_pattern: Optional[str] = None diff_sampling_param: Optional[dict] = None allowed_local_media_path: str = "" + allowed_media_domains: Optional[list[str]] = None encoder_config = None generation_config: str = "auto" skip_tokenizer_init: bool = False diff --git a/tests/entrypoints/openai/test_serving_chat.py b/tests/entrypoints/openai/test_serving_chat.py index bfed760822cd..07f39fe2b9bd 100644 --- a/tests/entrypoints/openai/test_serving_chat.py +++ b/tests/entrypoints/openai/test_serving_chat.py @@ -240,6 +240,7 @@ class MockModelConfig: logits_processor_pattern = None diff_sampling_param: Optional[dict] = None allowed_local_media_path: str = "" + allowed_media_domains: Optional[list[str]] = None encoder_config = None generation_config: str = "auto" media_io_kwargs: dict[str, dict[str, Any]] = field(default_factory=dict) diff --git a/tests/multimodal/test_utils.py b/tests/multimodal/test_utils.py index f6a93bae2afc..d1a7882a4c37 100644 --- a/tests/multimodal/test_utils.py +++ b/tests/multimodal/test_utils.py @@ -66,7 +66,12 @@ async def test_fetch_image_http(image_url: str): @pytest.mark.parametrize("suffix", get_supported_suffixes()) async def test_fetch_image_base64(url_images: dict[str, Image.Image], raw_image_url: str, suffix: str): - connector = MediaConnector() + connector = MediaConnector( + # Domain restriction should not apply to data URLs. + allowed_media_domains=[ + "www.bogotobogo.com", + "github.com", + ]) url_image = url_images[raw_image_url] try: @@ -387,3 +392,29 @@ def test_argsort_mm_positions(case): modality_idxs = argsort_mm_positions(mm_positions) assert modality_idxs == expected_modality_idxs + + +@pytest.mark.asyncio +@pytest.mark.parametrize("video_url", TEST_VIDEO_URLS) +@pytest.mark.parametrize("num_frames", [-1, 32, 1800]) +async def test_allowed_media_domains(video_url: str, num_frames: int): + connector = MediaConnector( + media_io_kwargs={"video": { + "num_frames": num_frames, + }}, + allowed_media_domains=[ + "www.bogotobogo.com", + "github.com", + ]) + + video_sync, metadata_sync = connector.fetch_video(video_url) + video_async, metadata_async = await connector.fetch_video_async(video_url) + assert np.array_equal(video_sync, video_async) + assert metadata_sync == metadata_async + + disallowed_url = "https://upload.wikimedia.org/wikipedia/commons/4/47/PNG_transparency_demonstration_1.png" + with pytest.raises(ValueError): + _, _ = connector.fetch_video(disallowed_url) + + with pytest.raises(ValueError): + _, _ = await connector.fetch_video_async(disallowed_url) diff --git a/vllm/config/model.py b/vllm/config/model.py index da01d6d4480c..b2b68abd2c1d 100644 --- a/vllm/config/model.py +++ b/vllm/config/model.py @@ -137,6 +137,9 @@ class ModelConfig: """Allowing API requests to read local images or videos from directories specified by the server file system. This is a security risk. Should only be enabled in trusted environments.""" + allowed_media_domains: Optional[list[str]] = None + """If set, only media URLs that belong to this domain can be used for + multi-modal inputs. """ revision: Optional[str] = None """The specific model version to use. It can be a branch name, a tag name, or a commit id. If unspecified, will use the default version.""" diff --git a/vllm/config/speculative.py b/vllm/config/speculative.py index 8b80ce13f96e..cb4f0ae2cee0 100644 --- a/vllm/config/speculative.py +++ b/vllm/config/speculative.py @@ -288,6 +288,8 @@ def __post_init__(self): trust_remote_code, allowed_local_media_path=self.target_model_config. allowed_local_media_path, + allowed_media_domains=self.target_model_config. + allowed_media_domains, dtype=self.target_model_config.dtype, seed=self.target_model_config.seed, revision=self.revision, diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index 8757f4b8b7ba..6bb794177db8 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -297,6 +297,8 @@ class EngineArgs: tokenizer_mode: TokenizerMode = ModelConfig.tokenizer_mode trust_remote_code: bool = ModelConfig.trust_remote_code allowed_local_media_path: str = ModelConfig.allowed_local_media_path + allowed_media_domains: Optional[ + list[str]] = ModelConfig.allowed_media_domains download_dir: Optional[str] = LoadConfig.download_dir safetensors_load_strategy: str = LoadConfig.safetensors_load_strategy load_format: Union[str, LoadFormats] = LoadConfig.load_format @@ -531,6 +533,8 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser: **model_kwargs["hf_config_path"]) model_group.add_argument("--allowed-local-media-path", **model_kwargs["allowed_local_media_path"]) + model_group.add_argument("--allowed-media-domains", + **model_kwargs["allowed_media_domains"]) model_group.add_argument("--revision", **model_kwargs["revision"]) model_group.add_argument("--code-revision", **model_kwargs["code_revision"]) @@ -997,6 +1001,7 @@ def create_model_config(self) -> ModelConfig: tokenizer_mode=self.tokenizer_mode, trust_remote_code=self.trust_remote_code, allowed_local_media_path=self.allowed_local_media_path, + allowed_media_domains=self.allowed_media_domains, dtype=self.dtype, seed=self.seed, revision=self.revision, diff --git a/vllm/entrypoints/chat_utils.py b/vllm/entrypoints/chat_utils.py index df49119d8642..4e1ecb9ed4c5 100644 --- a/vllm/entrypoints/chat_utils.py +++ b/vllm/entrypoints/chat_utils.py @@ -632,6 +632,10 @@ def model_cls(self) -> type[SupportsMultiModal]: def allowed_local_media_path(self): return self._model_config.allowed_local_media_path + @property + def allowed_media_domains(self): + return self._model_config.allowed_media_domains + @property def mm_registry(self): return MULTIMODAL_REGISTRY @@ -832,6 +836,7 @@ def __init__(self, tracker: MultiModalItemTracker) -> None: self._connector = MediaConnector( media_io_kwargs=media_io_kwargs, allowed_local_media_path=tracker.allowed_local_media_path, + allowed_media_domains=tracker.allowed_media_domains, ) def parse_image( @@ -916,6 +921,7 @@ def __init__(self, tracker: AsyncMultiModalItemTracker) -> None: self._connector = MediaConnector( media_io_kwargs=media_io_kwargs, allowed_local_media_path=tracker.allowed_local_media_path, + allowed_media_domains=tracker.allowed_media_domains, ) def parse_image( diff --git a/vllm/entrypoints/llm.py b/vllm/entrypoints/llm.py index dfe535b95917..862f383e4ecb 100644 --- a/vllm/entrypoints/llm.py +++ b/vllm/entrypoints/llm.py @@ -86,6 +86,8 @@ class LLM: or videos from directories specified by the server file system. This is a security risk. Should only be enabled in trusted environments. + allowed_media_domains: If set, only media URLs that belong to this + domain can be used for multi-modal inputs. tensor_parallel_size: The number of GPUs to use for distributed execution with tensor parallelism. dtype: The data type for the model weights and activations. Currently, @@ -169,6 +171,7 @@ def __init__( skip_tokenizer_init: bool = False, trust_remote_code: bool = False, allowed_local_media_path: str = "", + allowed_media_domains: Optional[list[str]] = None, tensor_parallel_size: int = 1, dtype: ModelDType = "auto", quantization: Optional[QuantizationMethods] = None, @@ -264,6 +267,7 @@ def __init__( skip_tokenizer_init=skip_tokenizer_init, trust_remote_code=trust_remote_code, allowed_local_media_path=allowed_local_media_path, + allowed_media_domains=allowed_media_domains, tensor_parallel_size=tensor_parallel_size, dtype=dtype, quantization=quantization, diff --git a/vllm/multimodal/utils.py b/vllm/multimodal/utils.py index 9b158267040a..1f1eea6bfee7 100644 --- a/vllm/multimodal/utils.py +++ b/vllm/multimodal/utils.py @@ -50,6 +50,7 @@ def __init__( connection: HTTPConnection = global_http_connection, *, allowed_local_media_path: str = "", + allowed_media_domains: Optional[list[str]] = None, ) -> None: """ Args: @@ -82,6 +83,9 @@ def __init__( allowed_local_media_path_ = None self.allowed_local_media_path = allowed_local_media_path_ + if allowed_media_domains is None: + allowed_media_domains = [] + self.allowed_media_domains = allowed_media_domains def _load_data_url( self, @@ -115,6 +119,14 @@ def _load_file_url( return media_io.load_file(filepath) + def _assert_url_in_allowed_media_domains(self, url_spec) -> None: + if self.allowed_media_domains and url_spec.hostname not in \ + self.allowed_media_domains: + raise ValueError( + f"The URL must be from one of the allowed domains: " + f"{self.allowed_media_domains}. Input URL domain: " + f"{url_spec.hostname}") + def load_from_url( self, url: str, @@ -125,6 +137,8 @@ def load_from_url( url_spec = urlparse(url) if url_spec.scheme.startswith("http"): + self._assert_url_in_allowed_media_domains(url_spec) + connection = self.connection data = connection.get_bytes(url, timeout=fetch_timeout) @@ -150,6 +164,8 @@ async def load_from_url_async( loop = asyncio.get_running_loop() if url_spec.scheme.startswith("http"): + self._assert_url_in_allowed_media_domains(url_spec) + connection = self.connection data = await connection.async_get_bytes(url, timeout=fetch_timeout) future = loop.run_in_executor(global_thread_pool, From 92da847cf5f4eedf0bc9fed45d7c076be78b8c1f Mon Sep 17 00:00:00 2001 From: Michael Goin Date: Fri, 26 Sep 2025 21:54:09 -0400 Subject: [PATCH 439/518] Add flashinfer-build.sh and register precompiled cu128 wheel in Dockerfile (#25782) Signed-off-by: mgoin --- docker/Dockerfile | 30 ++++++++++++------- tools/flashinfer-build.sh | 63 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 83 insertions(+), 10 deletions(-) create mode 100644 tools/flashinfer-build.sh diff --git a/docker/Dockerfile b/docker/Dockerfile index c0f55a7eeba0..fad62be798a1 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -391,18 +391,28 @@ RUN --mount=type=cache,target=/root/.cache/uv bash - <<'BASH' git clone --depth 1 --recursive --shallow-submodules \ --branch ${FLASHINFER_GIT_REF} \ ${FLASHINFER_GIT_REPO} flashinfer + # Exclude CUDA arches for older versions (11.x and 12.0-12.7) + # TODO: Update this to allow setting TORCH_CUDA_ARCH_LIST as a build arg. + if [[ "${CUDA_VERSION}" == 11.* ]]; then + FI_TORCH_CUDA_ARCH_LIST="7.5 8.0 8.9" + elif [[ "${CUDA_VERSION}" == 12.[0-7]* ]]; then + FI_TORCH_CUDA_ARCH_LIST="7.5 8.0 8.9 9.0a" + else + # CUDA 12.8+ supports 10.0a and 12.0 + FI_TORCH_CUDA_ARCH_LIST="7.5 8.0 8.9 9.0a 10.0a 12.0" + fi pushd flashinfer - if [ "${FLASHINFER_AOT_COMPILE}" = "true" ]; then - # Exclude CUDA arches for older versions (11.x and 12.0-12.7) - # TODO: Update this to allow setting TORCH_CUDA_ARCH_LIST as a build arg. - if [[ "${CUDA_VERSION}" == 11.* ]]; then - FI_TORCH_CUDA_ARCH_LIST="7.5 8.0 8.9" - elif [[ "${CUDA_VERSION}" == 12.[0-7]* ]]; then - FI_TORCH_CUDA_ARCH_LIST="7.5 8.0 8.9 9.0a" - else - # CUDA 12.8+ supports 10.0a and 12.0 - FI_TORCH_CUDA_ARCH_LIST="7.5 8.0 8.9 9.0a 10.0a 12.0" + if [[ "${CUDA_VERSION}" == 12.8.* ]] && [ "$TARGETPLATFORM" = "linux/amd64" ]; then + # NOTE: To make new precompiled wheels, see tools/flashinfer-build.sh + echo "🏗️ Installing FlashInfer from pre-compiled wheel" + uv pip install --system https://wheels.vllm.ai/flashinfer-python/flashinfer_python-0.3.1-cp39-abi3-manylinux1_x86_64.whl \ + --extra-index-url ${PYTORCH_CUDA_INDEX_BASE_URL}/cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') + if [ "${FLASHINFER_AOT_COMPILE}" = "true" ]; then + # Download pre-compiled cubins + TORCH_CUDA_ARCH_LIST="${FI_TORCH_CUDA_ARCH_LIST}" \ + python3 -m flashinfer --download-cubin || echo "WARNING: Failed to download flashinfer cubins." fi + elif [ "${FLASHINFER_AOT_COMPILE}" = "true" ]; then echo "🏗️ Installing FlashInfer with AOT compilation for arches: ${FI_TORCH_CUDA_ARCH_LIST}" export FLASHINFER_CUDA_ARCH_LIST="${FI_TORCH_CUDA_ARCH_LIST}" # HACK: We need these to run flashinfer.aot before installing flashinfer, get from the package in the future diff --git a/tools/flashinfer-build.sh b/tools/flashinfer-build.sh new file mode 100644 index 000000000000..6c14d87348c3 --- /dev/null +++ b/tools/flashinfer-build.sh @@ -0,0 +1,63 @@ +#!/usr/bin/env bash +# This script is used to build FlashInfer wheels with AOT kernels + +set -ex + +# FlashInfer configuration +FLASHINFER_GIT_REPO="https://github.com/flashinfer-ai/flashinfer.git" +FLASHINFER_GIT_REF="${FLASHINFER_GIT_REF}" +CUDA_VERSION="${CUDA_VERSION}" +BUILD_WHEEL="${BUILD_WHEEL:-true}" + +if [[ -z "${FLASHINFER_GIT_REF}" ]]; then + echo "❌ FLASHINFER_GIT_REF must be specified" >&2 + exit 1 +fi + +if [[ -z "${CUDA_VERSION}" ]]; then + echo "❌ CUDA_VERSION must be specified" >&2 + exit 1 +fi + +echo "🏗️ Building FlashInfer ${FLASHINFER_GIT_REF} for CUDA ${CUDA_VERSION}" + +# Clone FlashInfer +git clone --depth 1 --recursive --shallow-submodules \ + --branch ${FLASHINFER_GIT_REF} \ + ${FLASHINFER_GIT_REPO} flashinfer + +# Set CUDA arch list based on CUDA version +# Exclude CUDA arches for older versions (11.x and 12.0-12.7) +if [[ "${CUDA_VERSION}" == 11.* ]]; then + FI_TORCH_CUDA_ARCH_LIST="7.5 8.0 8.9" +elif [[ "${CUDA_VERSION}" == 12.[0-7]* ]]; then + FI_TORCH_CUDA_ARCH_LIST="7.5 8.0 8.9 9.0a" +else + # CUDA 12.8+ supports 10.0a and 12.0 + FI_TORCH_CUDA_ARCH_LIST="7.5 8.0 8.9 9.0a 10.0a 12.0" +fi + +echo "🏗️ Building FlashInfer AOT for arches: ${FI_TORCH_CUDA_ARCH_LIST}" + +pushd flashinfer + # Make sure the wheel is built for the correct CUDA version + export UV_TORCH_BACKEND=cu$(echo $CUDA_VERSION | cut -d. -f1,2 | tr -d '.') + + # Build AOT kernels + export TORCH_CUDA_ARCH_LIST="${FI_TORCH_CUDA_ARCH_LIST}" + export FLASHINFER_CUDA_ARCH_LIST="${FI_TORCH_CUDA_ARCH_LIST}" + python3 -m flashinfer.aot + + if [[ "${BUILD_WHEEL}" == "true" ]]; then + # Build wheel for distribution + uv build --no-build-isolation --wheel --out-dir ../flashinfer-dist . + echo "✅ FlashInfer wheel built successfully in flashinfer-dist/" + else + # Install directly (for Dockerfile) + uv pip install --system --no-build-isolation --force-reinstall . + echo "✅ FlashInfer installed successfully" + fi +popd + +# Cleanup +rm -rf flashinfer \ No newline at end of file From f1d53d150c5cd9c7d94db296793fc25f955ea8a9 Mon Sep 17 00:00:00 2001 From: WeiQing Chen <40507679+david6666666@users.noreply.github.com> Date: Sat, 27 Sep 2025 11:35:47 +0800 Subject: [PATCH 440/518] [Multimodal][Speculative Decoding]Eagle Eagle3 mm support, enablement on qwen2.5vl (#22872) Signed-off-by: Junhong Signed-off-by: Junhong Liu <98734602+LJH-LBJ@users.noreply.github.com> Co-authored-by: Junhong Co-authored-by: LJH-LBJ <98734602+LJH-LBJ@users.noreply.github.com> --- tests/models/registry.py | 3 + tests/v1/e2e/test_spec_decode.py | 9 +- vllm/benchmarks/datasets.py | 80 +++++++++++++++ vllm/model_executor/models/llama_eagle3.py | 27 ++++-- vllm/model_executor/models/qwen2_5_vl.py | 11 ++- vllm/model_executor/models/registry.py | 1 + vllm/v1/spec_decode/eagle.py | 108 +++++++++++++++------ vllm/v1/worker/gpu_model_runner.py | 16 ++- 8 files changed, 210 insertions(+), 45 deletions(-) diff --git a/tests/models/registry.py b/tests/models/registry.py index 6a6e2538559f..e321acc873c6 100644 --- a/tests/models/registry.py +++ b/tests/models/registry.py @@ -651,6 +651,9 @@ def check_available_online( "MiMoMTPModel": _HfExamplesInfo("XiaomiMiMo/MiMo-7B-RL", trust_remote_code=True, speculative_model="XiaomiMiMo/MiMo-7B-RL"), + "Eagle3Qwen2_5vlForCausalLM": _HfExamplesInfo( + "Qwen/Qwen2.5-VL-7B-Instruct", + speculative_model="Rayzl/qwen2.5-vl-7b-eagle3-sgl"), "Qwen3NextMTP": _HfExamplesInfo("Qwen/Qwen3-Next-80B-A3B-Instruct", min_transformers_version="4.56.3"), } diff --git a/tests/v1/e2e/test_spec_decode.py b/tests/v1/e2e/test_spec_decode.py index ea8d94722859..8f048775352e 100644 --- a/tests/v1/e2e/test_spec_decode.py +++ b/tests/v1/e2e/test_spec_decode.py @@ -129,6 +129,11 @@ def test_ngram_correctness( ["model_setup", "mm_enabled"], [ (("eagle3", "Qwen/Qwen3-8B", "AngelSlim/Qwen3-8B_eagle3", 1), False), + pytest.param(("eagle3", "Qwen/Qwen2.5-VL-7B-Instruct", + "Rayzl/qwen2.5-vl-7b-eagle3-sgl", 1), + False, + marks=pytest.mark.skip(reason="Skipping due to its " \ + "head_dim not being a a multiple of 32")), (("eagle", "meta-llama/Llama-3.1-8B-Instruct", "yuhuili/EAGLE-LLaMA3.1-Instruct-8B", 1), False), (("eagle3", "meta-llama/Llama-3.1-8B-Instruct", @@ -145,8 +150,8 @@ def test_ngram_correctness( "eagle618/eagle-deepseek-v3-random", 1), False), ], ids=[ - "qwen3_eagle3", "llama3_eagle", "llama3_eagle3", "llama4_eagle", - "llama4_eagle_mm", "deepseek_eagle" + "qwen3_eagle3", "qwen2_5_vl_eagle3", "llama3_eagle", "llama3_eagle3", + "llama4_eagle", "llama4_eagle_mm", "deepseek_eagle" ]) @pytest.mark.parametrize("attn_backend", get_attn_backend_list_based_on_platform()) diff --git a/vllm/benchmarks/datasets.py b/vllm/benchmarks/datasets.py index 68a937d5750e..f0c0d829a393 100644 --- a/vllm/benchmarks/datasets.py +++ b/vllm/benchmarks/datasets.py @@ -1450,6 +1450,13 @@ def get_samples(args, tokenizer) -> list[SampleRequest]: ): dataset_class = MLPerfDataset args.hf_split = "train" + elif ( + args.dataset_path in MMStarDataset.SUPPORTED_DATASET_PATHS + or args.hf_name in MMStarDataset.SUPPORTED_DATASET_PATHS + ): + dataset_class = MMStarDataset + args.hf_split = "val" + args.hf_subset = None else: supported_datasets = set([ dataset_name for cls in HuggingFaceDataset.__subclasses__() @@ -2721,3 +2728,76 @@ def _generate_exact_length_tokens(target_length: int) -> list[int]: random.shuffle(requests) return requests + + +# ----------------------------------------------------------------------------- +# MMStar Dataset Implementation +# ----------------------------------------------------------------------------- + + +class MMStarDataset(HuggingFaceDataset): + """ + Lin-Chen/MMStar: https://huggingface.co/datasets/Lin-Chen/MMStar + refer to: https://github.com/sgl-project/SpecForge/pull/106 + """ + DEFAULT_OUTPUT_LEN = 128 + SUPPORTED_DATASET_PATHS = {"Lin-Chen/MMStar"} + IS_MULTIMODAL = True + + def sample( + self, + tokenizer: PreTrainedTokenizerBase, + num_requests: int, + output_len: Optional[int] = None, + enable_multimodal_chat: bool = False, + request_id_prefix: str = "", + no_oversample: bool = False, + **kwargs, + ) -> list[SampleRequest]: + # If --hf-output-len is not set, use the default output length. + output_len = (output_len + if output_len is not None else self.DEFAULT_OUTPUT_LEN) + sampled_requests: list[SampleRequest] = [] + + for ind, item in enumerate(self.data): + if len(sampled_requests) >= num_requests: + break + # Split the question text from options + # (keep only the part before "Options:"). + full_q: str = item.get("question", "") + question_text = full_q.split("Options:", 1)[0].strip() + + # Multimodal image content. + mm_content = process_image(item["image"]) + + # Compute prompt token length (note: this is plain text length + # if enable_multimodal_chat is False). + prompt_len = len(tokenizer(question_text).input_ids) + + if enable_multimodal_chat: + # If multimodal content should be embedded in the chat message, + # convert to [{"role":"user","content":[...]}] + prompt = self.apply_multimodal_chat_transformation( + question_text, mm_content + ) + mm_for_request = None # Already embedded in chat content. + else: + # Default: prompt is plain text, + # image is in mm_content for the bench to assemble. + prompt = question_text + mm_for_request = mm_content + + sampled_requests.append( + SampleRequest( + prompt=prompt, + prompt_len=prompt_len, + expected_output_len=output_len, + multi_modal_data=mm_for_request, + request_id=request_id_prefix + str(ind), + ) + ) + + self.maybe_oversample_requests( + sampled_requests, num_requests, request_id_prefix, no_oversample + ) + return sampled_requests diff --git a/vllm/model_executor/models/llama_eagle3.py b/vllm/model_executor/models/llama_eagle3.py index b99a1547918e..55b6ae6ee0e9 100644 --- a/vllm/model_executor/models/llama_eagle3.py +++ b/vllm/model_executor/models/llama_eagle3.py @@ -8,7 +8,6 @@ import torch.nn as nn from transformers import LlamaConfig -from vllm.compilation.decorators import support_torch_compile from vllm.config import CacheConfig, VllmConfig, get_current_vllm_config from vllm.logger import init_logger from vllm.model_executor.layers.layernorm import RMSNorm @@ -19,6 +18,7 @@ from vllm.model_executor.layers.vocab_parallel_embedding import ( DEFAULT_VOCAB_PADDING_SIZE, ParallelLMHead, VocabParallelEmbedding) from vllm.model_executor.model_loader.weight_utils import default_weight_loader +from vllm.model_executor.models.interfaces import MultiModalEmbeddings from vllm.model_executor.models.llama import (LlamaDecoderLayer, LlamaForCausalLM) @@ -102,7 +102,6 @@ def forward( return hidden_states, residual -@support_torch_compile class LlamaModel(nn.Module): def __init__( @@ -145,13 +144,21 @@ def __init__( eps=self.config.rms_norm_eps, ) + def get_input_embeddings( + self, + input_ids: torch.Tensor, + ) -> torch.Tensor: + return self.embed_tokens(input_ids) + def forward( self, input_ids: torch.Tensor, positions: torch.Tensor, hidden_states: torch.Tensor, + input_embeds: Optional[torch.Tensor] = None, ) -> tuple[torch.Tensor, torch.Tensor]: - input_embeds = self.embed_tokens(input_ids) + if input_embeds is None: + input_embeds = self.get_input_embeddings(input_ids) assert hidden_states.shape[-1] == input_embeds.shape[-1] residual = None @@ -239,11 +246,7 @@ def forward( hidden_states: torch.Tensor, inputs_embeds: Optional[torch.Tensor] = None, ) -> tuple[torch.Tensor, torch.Tensor]: - if inputs_embeds is not None: - raise NotImplementedError( - f"{type(self).__name__} does not support multimodal inputs yet." - ) - return self.model(input_ids, positions, hidden_states) + return self.model(input_ids, positions, hidden_states, inputs_embeds) def compute_logits( self, @@ -299,3 +302,11 @@ def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]): skip_substrs=skip_substrs, ) loader.load_weights(model_weights.items()) + + def get_input_embeddings( + self, + input_ids: torch.Tensor, + multimodal_embeddings: Optional[MultiModalEmbeddings] = None, + ) -> torch.Tensor: + inputs_embeds = self.model.get_input_embeddings(input_ids) + return inputs_embeds diff --git a/vllm/model_executor/models/qwen2_5_vl.py b/vllm/model_executor/models/qwen2_5_vl.py index 6af6faa2b296..3199f53a0539 100644 --- a/vllm/model_executor/models/qwen2_5_vl.py +++ b/vllm/model_executor/models/qwen2_5_vl.py @@ -68,7 +68,7 @@ from vllm.utils import is_pin_memory_available from vllm.utils.tensor_schema import TensorSchema, TensorShape -from .interfaces import (MultiModalEmbeddings, SupportsLoRA, +from .interfaces import (MultiModalEmbeddings, SupportsEagle3, SupportsLoRA, SupportsMultiModal, SupportsMultiModalPruning, SupportsPP, SupportsQuant) from .qwen2_vl import Qwen2VLDummyInputsBuilder as Qwen2_5_VLDummyInputsBuilder @@ -965,7 +965,7 @@ def get_replacement_qwen2vl(item_idx: int, modality: str): dummy_inputs=Qwen2_5_VLDummyInputsBuilder) class Qwen2_5_VLForConditionalGeneration(nn.Module, SupportsMultiModal, SupportsLoRA, SupportsPP, - SupportsQuant, + SupportsQuant, SupportsEagle3, SupportsMultiModalPruning): packed_modules_mapping = { @@ -1028,6 +1028,13 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): self.make_empty_intermediate_tensors = ( self.language_model.make_empty_intermediate_tensors) + def set_aux_hidden_state_layers(self, layers: tuple[int, ...]) -> None: + self.language_model.model.aux_hidden_state_layers = layers + + def get_eagle3_aux_hidden_state_layers(self) -> tuple[int, ...]: + num_layers = len(self.language_model.model.layers) + return (2, num_layers // 2, num_layers - 3) + def _validate_and_reshape_mm_tensor(self, mm_input: object, name: str) -> torch.Tensor: if not isinstance(mm_input, (torch.Tensor, list)): diff --git a/vllm/model_executor/models/registry.py b/vllm/model_executor/models/registry.py index 10e9aa4db078..0471164ab8a6 100644 --- a/vllm/model_executor/models/registry.py +++ b/vllm/model_executor/models/registry.py @@ -286,6 +286,7 @@ "EagleMiniCPMForCausalLM": ("minicpm_eagle", "EagleMiniCPMForCausalLM"), "Eagle3LlamaForCausalLM": ("llama_eagle3", "Eagle3LlamaForCausalLM"), "LlamaForCausalLMEagle3": ("llama_eagle3", "Eagle3LlamaForCausalLM"), + "Eagle3Qwen2_5vlForCausalLM": ("llama_eagle3", "Eagle3LlamaForCausalLM"), "EagleDeepSeekMTPModel": ("deepseek_eagle", "EagleDeepseekV3ForCausalLM"), "DeepSeekMTPModel": ("deepseek_mtp", "DeepSeekMTP"), "ErnieMTPModel": ("ernie_mtp", "ErnieMTP"), diff --git a/vllm/v1/spec_decode/eagle.py b/vllm/v1/spec_decode/eagle.py index 394df48b4153..51e54e0dc337 100644 --- a/vllm/v1/spec_decode/eagle.py +++ b/vllm/v1/spec_decode/eagle.py @@ -80,9 +80,17 @@ def __init__( self.input_ids = torch.zeros(self.max_num_tokens, dtype=torch.int32, device=device) - self.positions = torch.zeros(self.max_num_tokens, - dtype=torch.int64, - device=device) + self.uses_mrope = self.vllm_config.model_config.uses_mrope + if self.uses_mrope: + # M-RoPE need (3, max_num_tokens) + self.mrope_positions = torch.zeros((3, self.max_num_tokens), + dtype=torch.int64, + device=device) + else: + # RoPE need (max_num_tokens,) + self.positions = torch.zeros(self.max_num_tokens, + dtype=torch.int64, + device=device) self.hidden_states = torch.zeros( (self.max_num_tokens, self.hidden_size), dtype=self.dtype, @@ -143,11 +151,22 @@ def __init__( dtype=torch.int32, ).repeat(max_batch_size, 1) + def _get_positions(self, num_tokens: int): + if self.uses_mrope: + return self.mrope_positions[:, :num_tokens] + return self.positions[:num_tokens] + + def _set_positions(self, num_tokens: int, positions: torch.Tensor): + if self.uses_mrope: + self.mrope_positions[:, :num_tokens] = positions + else: + self.positions[:num_tokens] = positions + def propose( self, # [num_tokens] target_token_ids: torch.Tensor, - # [num_tokens] + # [num_tokens] or [3, num_tokens] when M-RoPE is enabled target_positions: torch.Tensor, # [num_tokens, hidden_size] target_hidden_states: torch.Tensor, @@ -198,7 +217,7 @@ def propose( else: num_input_tokens = num_tokens # copy inputs to buffer for cudagraph - self.positions[:num_tokens] = target_positions + self._set_positions(num_tokens, target_positions) self.hidden_states[:num_tokens] = target_hidden_states if self.is_multimodal_model: input_ids = self.input_ids[:num_tokens] @@ -218,7 +237,7 @@ def propose( num_tokens=num_input_tokens): ret_hidden_states = self.model( input_ids=input_ids, - positions=self.positions[:num_input_tokens], + positions=self._get_positions(num_input_tokens), hidden_states=self.hidden_states[:num_input_tokens], inputs_embeds=inputs_embeds, ) @@ -235,7 +254,10 @@ def propose( draft_token_ids = logits.argmax(dim=-1) return draft_token_ids.view(-1, 1) - positions = target_positions[last_token_indices] + if self.uses_mrope: + positions = target_positions[:, last_token_indices] + else: + positions = target_positions[last_token_indices] if self.method in ("deepseek_mtp", "ernie_mtp", "longcat_flash_mtp"): hidden_states = self.hidden_states[last_token_indices] else: @@ -282,25 +304,34 @@ def propose( # cast to int32 is crucial when eagle model is compiled. # tensor.argmax() returns int64 by default. input_ids = draft_token_ids_list[-1].int() - positions += 1 - - # NOTE(woosuk): We should handle the case where the draft model - # generates tokens beyond the max model length. Since it is complex - # to remove such requests from the batch, we keep them in the batch - # but adjust the position ids and slot mappings to avoid the - # out-of-range access during the model execution. The draft tokens - # generated with this adjustment should be ignored. - exceeds_max_model_len = positions >= self.max_model_len - # Mask out the position ids that exceed the max model length. - # Otherwise, we may get out-of-range error in RoPE. - clamped_positions = torch.where(exceeds_max_model_len, 0, - positions) + if self.uses_mrope: + positions += 1 + # NOTE(woosuk): We should handle the case where the draft model + # generates tokens beyond the max model length. + # Since it is complex to remove such requests from the batch, + # we keep them in the batch but adjust the position ids + # and slot mappings to avoid the + # out-of-range access during the model execution. + # The draft tokens generated with this adjustment + # should be ignored. + exceeds_max_model_len = positions[0] >= self.max_model_len + # Mask out the position ids that exceed the max model length. + # Otherwise, we may get out-of-range error in RoPE. + clamped_positions = torch.where\ + (exceeds_max_model_len.unsqueeze(0), \ + torch.zeros_like(positions), positions) + else: + positions += 1 + exceeds_max_model_len = positions >= self.max_model_len + clamped_positions = torch.where(exceeds_max_model_len, 0, + positions) # Increment the sequence lengths. common_attn_metadata.seq_lens += 1 common_attn_metadata.seq_lens_cpu += 1 # For the requests that exceed the max model length, we set the # sequence length to 1 to minimize their overheads in attention. + common_attn_metadata.seq_lens.masked_fill_(exceeds_max_model_len, 1) @@ -308,13 +339,22 @@ def propose( common_attn_metadata.seq_lens_cpu - 1 # Compute the slot mapping. - block_numbers = clamped_positions // self.block_size + if self.uses_mrope: + # all dimensions of positions are the same + block_numbers = clamped_positions[0] // self.block_size + else: + block_numbers = clamped_positions // self.block_size block_ids = common_attn_metadata.block_table_tensor.gather( dim=1, index=block_numbers.view(-1, 1)) block_ids = block_ids.view(-1) - common_attn_metadata.slot_mapping = ( - block_ids * self.block_size + - clamped_positions % self.block_size) + if self.uses_mrope: + common_attn_metadata.slot_mapping = ( + block_ids * self.block_size + + clamped_positions[0] % self.block_size) + else: + common_attn_metadata.slot_mapping = ( + block_ids * self.block_size + + clamped_positions % self.block_size) # Mask out the slot mappings that exceed the max model length. # Otherwise, the KV cache will be inadvertently updated with the # padding tokens. @@ -330,7 +370,7 @@ def propose( # copy inputs to buffer for cudagraph self.input_ids[:batch_size] = input_ids - self.positions[:batch_size] = clamped_positions + self._set_positions(batch_size, clamped_positions) self.hidden_states[:batch_size] = hidden_states if self.is_multimodal_model: inputs_embeds = self.model.get_input_embeddings(input_ids) @@ -347,7 +387,7 @@ def propose( num_tokens=input_batch_size): ret_hidden_states = self.model( input_ids=input_ids, - positions=self.positions[:input_batch_size], + positions=self._get_positions(input_batch_size), hidden_states=self.hidden_states[:input_batch_size], inputs_embeds=inputs_embeds, ) @@ -787,6 +827,11 @@ def prepare_inputs( return spec_common_attn_metadata, token_indices + def get_model_name(self, model: nn.Module) -> str: + if hasattr(model, 'module'): # multi-GPU + model = model.module + return model.__class__.__name__ + def load_model(self, target_model: nn.Module) -> None: draft_model_config = \ self.vllm_config.speculative_config.draft_model_config @@ -820,8 +865,13 @@ def load_model(self, target_model: nn.Module) -> None: if supports_multimodal(target_model): # handle multimodality - self.model.config.image_token_index = ( - target_model.config.image_token_index) + if (self.get_model_name(target_model) == + "Qwen2_5_VLForConditionalGeneration"): + self.model.config.image_token_index = ( + target_model.config.image_token_id) + else: + self.model.config.image_token_index = ( + target_model.config.image_token_index) target_language_model = target_model.get_language_model() else: target_language_model = target_model @@ -892,7 +942,7 @@ def dummy_run( self.model( input_ids=input_ids, - positions=self.positions[:num_tokens], + positions=self._get_positions(num_tokens), hidden_states=self.hidden_states[:num_tokens], inputs_embeds=inputs_embeds, ) diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index f87a327d02a5..22a177dd7cc7 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -442,6 +442,16 @@ def __init__( device="cpu", pin_memory=self.pin_memory) + def _get_positions(self, num_tokens: Any): + if isinstance(num_tokens, int): + if self.uses_mrope: + return self.mrope_positions.gpu[:, :num_tokens] + return self.positions.gpu[:num_tokens] + else: + if self.uses_mrope: + return self.mrope_positions.gpu[:, num_tokens] + return self.positions.gpu[num_tokens] + def _make_buffer(self, *size: Union[int, torch.SymInt], dtype: torch.dtype, @@ -2544,8 +2554,7 @@ def propose_draft_token_ids( token_indices_to_sample = None # input_ids can be None for multimodal models. target_token_ids = self.input_ids.gpu[:num_scheduled_tokens] - # TODO(woosuk): Support M-RoPE. - target_positions = self.positions.gpu[:num_scheduled_tokens] + target_positions = self._get_positions(num_scheduled_tokens) if self.use_aux_hidden_state_outputs: assert aux_hidden_states is not None target_hidden_states = torch.cat( @@ -2570,8 +2579,7 @@ def propose_draft_token_ids( valid_sampled_tokens_count) target_token_ids = self.input_ids.gpu[token_indices] - # TODO(woosuk): Support M-RoPE. - target_positions = self.positions.gpu[token_indices] + target_positions = self._get_positions(token_indices) if self.use_aux_hidden_state_outputs: assert aux_hidden_states is not None target_hidden_states = torch.cat( From c242c98031b87d00999e07dbb4aa9b2a70798c6c Mon Sep 17 00:00:00 2001 From: Wentao Ye <44945378+yewentao256@users.noreply.github.com> Date: Fri, 26 Sep 2025 23:44:52 -0400 Subject: [PATCH 441/518] [Bugfix] Allow Only SDPA Backend for ViT on B200 for Qwen3-VL (#25788) --- vllm/model_executor/models/qwen2_5_vl.py | 73 ++++++++++++------------ vllm/model_executor/models/qwen3_vl.py | 53 ++++++++++++----- 2 files changed, 75 insertions(+), 51 deletions(-) diff --git a/vllm/model_executor/models/qwen2_5_vl.py b/vllm/model_executor/models/qwen2_5_vl.py index 3199f53a0539..adb21373056c 100644 --- a/vllm/model_executor/models/qwen2_5_vl.py +++ b/vllm/model_executor/models/qwen2_5_vl.py @@ -274,6 +274,8 @@ def __init__( quant_config: Optional[QuantizationConfig] = None, prefix: str = "", use_data_parallel: bool = False, + attn_backend: _Backend = _Backend.TORCH_SDPA, + use_upstream_fa: bool = False, ) -> None: super().__init__() # Per attention head and per partition values. @@ -300,25 +302,8 @@ def __init__( quant_config=quant_config, prefix=f"{prefix}.proj", disable_tp=use_data_parallel) - - # Detect attention implementation. - self.attn_backend = get_vit_attn_backend( - head_size=self.hidden_size_per_attention_head, - dtype=torch.get_default_dtype()) - self.use_upstream_fa = False - if self.attn_backend != _Backend.FLASH_ATTN and \ - check_upstream_fa_availability( - torch.get_default_dtype()): - self.attn_backend = _Backend.FLASH_ATTN - self.use_upstream_fa = True - - if self.attn_backend not in { - _Backend.FLASH_ATTN, _Backend.TORCH_SDPA, _Backend.XFORMERS, - _Backend.ROCM_AITER_FA - }: - raise RuntimeError( - f"Qwen2.5-VL does not support {self.attn_backend} backend now." - ) + self.attn_backend = attn_backend + self.use_upstream_fa = use_upstream_fa self.is_flash_attn_backend = self.attn_backend in { _Backend.FLASH_ATTN, _Backend.ROCM_AITER_FA } @@ -443,6 +428,8 @@ def __init__( quant_config: Optional[QuantizationConfig] = None, prefix: str = "", use_data_parallel: bool = False, + attn_backend: _Backend = _Backend.TORCH_SDPA, + use_upstream_fa: bool = False, ) -> None: super().__init__() if norm_layer is None: @@ -455,7 +442,9 @@ def __init__( projection_size=dim, quant_config=quant_config, prefix=f"{prefix}.attn", - use_data_parallel=use_data_parallel) + use_data_parallel=use_data_parallel, + attn_backend=attn_backend, + use_upstream_fa=use_upstream_fa) self.mlp = Qwen2_5_VisionMLP(dim, mlp_hidden_dim, act_fn=act_fn, @@ -627,17 +616,35 @@ def __init__( head_dim = self.hidden_size // self.num_heads self.rotary_pos_emb = Qwen2_5_VisionRotaryEmbedding(head_dim // 2) + use_upstream_fa = False + self.attn_backend = get_vit_attn_backend( + head_size=head_dim, dtype=torch.get_default_dtype()) + if self.attn_backend != _Backend.FLASH_ATTN and \ + check_upstream_fa_availability( + torch.get_default_dtype()): + self.attn_backend = _Backend.FLASH_ATTN + use_upstream_fa = True + + if self.attn_backend not in { + _Backend.FLASH_ATTN, _Backend.TORCH_SDPA, _Backend.XFORMERS, + _Backend.ROCM_AITER_FA + }: + raise RuntimeError( + f"Qwen2.5-VL does not support {self.attn_backend} backend now." + ) + self.blocks = nn.ModuleList([ - Qwen2_5_VisionBlock(dim=self.hidden_size, - num_heads=self.num_heads, - mlp_hidden_dim=vision_config.intermediate_size, - act_fn=get_act_and_mul_fn( - vision_config.hidden_act), - norm_layer=norm_layer, - quant_config=quant_config, - prefix=f"{prefix}.blocks.{layer_idx}", - use_data_parallel=use_data_parallel) - for layer_idx in range(depth) + Qwen2_5_VisionBlock( + dim=self.hidden_size, + num_heads=self.num_heads, + mlp_hidden_dim=vision_config.intermediate_size, + act_fn=get_act_and_mul_fn(vision_config.hidden_act), + norm_layer=norm_layer, + quant_config=quant_config, + prefix=f"{prefix}.blocks.{layer_idx}", + use_data_parallel=use_data_parallel, + attn_backend=self.attn_backend, + use_upstream_fa=use_upstream_fa) for layer_idx in range(depth) ]) self.merger = Qwen2_5_VisionPatchMerger( d_model=vision_config.out_hidden_size, @@ -648,12 +655,6 @@ def __init__( prefix=f"{prefix}.merger", use_data_parallel=use_data_parallel, ) - self.attn_backend = get_vit_attn_backend( - head_size=head_dim, dtype=torch.get_default_dtype()) - if self.attn_backend != _Backend.FLASH_ATTN and \ - check_upstream_fa_availability( - torch.get_default_dtype()): - self.attn_backend = _Backend.FLASH_ATTN @property def dtype(self) -> torch.dtype: diff --git a/vllm/model_executor/models/qwen3_vl.py b/vllm/model_executor/models/qwen3_vl.py index f3f11438eeee..f1aeb99a4d37 100644 --- a/vllm/model_executor/models/qwen3_vl.py +++ b/vllm/model_executor/models/qwen3_vl.py @@ -63,7 +63,7 @@ PromptReplacement, PromptUpdate, PromptUpdateDetails) from vllm.multimodal.profiling import BaseDummyInputsBuilder -from vllm.platforms import _Backend +from vllm.platforms import _Backend, current_platform from vllm.sequence import IntermediateTensors from vllm.transformers_utils.config import uses_mrope from vllm.utils import is_list_of @@ -158,6 +158,8 @@ def __init__( quant_config: Optional[QuantizationConfig] = None, prefix: str = "", use_data_parallel: bool = False, + attn_backend: _Backend = _Backend.TORCH_SDPA, + use_upstream_fa: bool = False, ) -> None: super().__init__() if norm_layer is None: @@ -170,7 +172,9 @@ def __init__( projection_size=dim, quant_config=quant_config, prefix=f"{prefix}.attn", - use_data_parallel=use_data_parallel) + use_data_parallel=use_data_parallel, + attn_backend=attn_backend, + use_upstream_fa=use_upstream_fa) self.mlp = Qwen3_VisionMLP(dim, mlp_hidden_dim, act_fn=act_fn, @@ -287,19 +291,6 @@ def __init__( head_dim = self.hidden_size // self.num_heads self.rotary_pos_emb = Qwen2_5_VisionRotaryEmbedding(head_dim // 2) - self.blocks = nn.ModuleList([ - Qwen3_VisionBlock( - dim=self.hidden_size, - num_heads=self.num_heads, - mlp_hidden_dim=vision_config.intermediate_size, - act_fn=_ACTIVATION_REGISTRY[vision_config.hidden_act], - norm_layer=norm_layer, - quant_config=quant_config, - prefix=f"{prefix}.blocks.{layer_idx}", - use_data_parallel=use_data_parallel) - for layer_idx in range(vision_config.depth) - ]) - self.merger = Qwen3_VisionPatchMerger( d_model=vision_config.out_hidden_size, context_dim=self.hidden_size, @@ -325,10 +316,42 @@ def __init__( self.attn_backend = get_vit_attn_backend( head_size=head_dim, dtype=torch.get_default_dtype()) + use_upstream_fa = False if self.attn_backend != _Backend.FLASH_ATTN and \ check_upstream_fa_availability( torch.get_default_dtype()): self.attn_backend = _Backend.FLASH_ATTN + use_upstream_fa = True + + if self.attn_backend not in { + _Backend.FLASH_ATTN, _Backend.TORCH_SDPA, _Backend.XFORMERS, + _Backend.ROCM_AITER_FA + }: + raise RuntimeError( + f"Qwen3-VL does not support {self.attn_backend} backend now.") + if current_platform.is_device_capability( + 100) and self.attn_backend != _Backend.TORCH_SDPA: + # TODO(Roger/Wentao): remove this after FA + # or XFORMERS's issue fixed on Blackwell + logger.info_once("Qwen3-VL vision attention does not support " + f"{self.attn_backend} backend on Blackwell now. " + "Vision attention backend is set to TORCH_SDPA.") + self.attn_backend = _Backend.TORCH_SDPA + + self.blocks = nn.ModuleList([ + Qwen3_VisionBlock( + dim=self.hidden_size, + num_heads=self.num_heads, + mlp_hidden_dim=vision_config.intermediate_size, + act_fn=_ACTIVATION_REGISTRY[vision_config.hidden_act], + norm_layer=norm_layer, + quant_config=quant_config, + prefix=f"{prefix}.blocks.{layer_idx}", + use_data_parallel=use_data_parallel, + attn_backend=self.attn_backend, + use_upstream_fa=use_upstream_fa) + for layer_idx in range(vision_config.depth) + ]) @property def dtype(self) -> torch.dtype: From d346ec695ef5dc74cde338a6bc3857e91c311ab2 Mon Sep 17 00:00:00 2001 From: Cyrus Leung Date: Sat, 27 Sep 2025 12:45:20 +0800 Subject: [PATCH 442/518] [CI/Build] Consolidate model loader tests and requirements (#25765) Signed-off-by: DarkLight1337 --- .buildkite/test-pipeline.yaml | 19 ++----- .github/mergify.yml | 2 +- docker/Dockerfile | 2 +- requirements/nightly_torch_test.txt | 3 +- requirements/rocm.txt | 5 +- requirements/test.in | 3 +- requirements/test.txt | 10 ++-- setup.py | 5 +- tests/model_executor/conftest.py | 52 ------------------- .../fastsafetensors_loader/__init__.py | 0 .../test_fastsafetensors_loader.py | 0 .../test_weight_utils.py | 0 .../runai_model_streamer}/__init__.py | 0 .../test_runai_model_streamer_loader.py | 0 .../runai_model_streamer}/test_runai_utils.py | 0 .../test_weight_utils.py | 0 .../tensorizer_loader/__init__.py | 0 .../tensorizer_loader/conftest.py | 0 .../tensorizer_loader/test_tensorizer.py | 2 +- .../model_loader/weight_utils.py | 35 +++++++++++-- 20 files changed, 48 insertions(+), 90 deletions(-) delete mode 100644 tests/model_executor/conftest.py rename tests/{ => model_executor/model_loader}/fastsafetensors_loader/__init__.py (100%) rename tests/{ => model_executor/model_loader}/fastsafetensors_loader/test_fastsafetensors_loader.py (100%) rename tests/{ => model_executor/model_loader}/fastsafetensors_loader/test_weight_utils.py (100%) rename tests/{runai_model_streamer_test => model_executor/model_loader/runai_model_streamer}/__init__.py (100%) rename tests/{runai_model_streamer_test => model_executor/model_loader/runai_model_streamer}/test_runai_model_streamer_loader.py (100%) rename tests/{runai_model_streamer_test => model_executor/model_loader/runai_model_streamer}/test_runai_utils.py (100%) rename tests/{runai_model_streamer_test => model_executor/model_loader/runai_model_streamer}/test_weight_utils.py (100%) rename tests/{ => model_executor/model_loader}/tensorizer_loader/__init__.py (100%) rename tests/{ => model_executor/model_loader}/tensorizer_loader/conftest.py (100%) rename tests/{ => model_executor/model_loader}/tensorizer_loader/test_tensorizer.py (99%) diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index c178fd372bcb..82a3b2fc199e 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -465,29 +465,18 @@ steps: commands: - pytest -v -s kernels/mamba -- label: Tensorizer Test # 14min - timeout_in_minutes: 25 - mirror_hardwares: [amdexperimental] - source_file_dependencies: - - vllm/model_executor/model_loader - - tests/tensorizer_loader - - tests/entrypoints/openai/test_tensorizer_entrypoint.py - commands: - - apt-get update && apt-get install -y curl libsodium23 - - export VLLM_WORKER_MULTIPROC_METHOD=spawn - - pytest -v -s tensorizer_loader - - pytest -v -s entrypoints/openai/test_tensorizer_entrypoint.py - -- label: Model Executor Test # 7min - timeout_in_minutes: 20 +- label: Model Executor Test # ??? + timeout_in_minutes: 60 mirror_hardwares: [amdexperimental] source_file_dependencies: - vllm/model_executor - tests/model_executor + - tests/entrypoints/openai/test_tensorizer_entrypoint.py commands: - apt-get update && apt-get install -y curl libsodium23 - export VLLM_WORKER_MULTIPROC_METHOD=spawn - pytest -v -s model_executor + - pytest -v -s entrypoints/openai/test_tensorizer_entrypoint.py - label: Benchmarks # 11min timeout_in_minutes: 20 diff --git a/.github/mergify.yml b/.github/mergify.yml index 75ee3e3c55b4..923f708ea10c 100644 --- a/.github/mergify.yml +++ b/.github/mergify.yml @@ -274,7 +274,7 @@ pull_request_rules: - files~=^vllm/model_executor/model_loader/tensorizer.py - files~=^vllm/model_executor/model_loader/tensorizer_loader.py - files~=^tests/entrypoints/openai/test_tensorizer_entrypoint.py - - files~=^tests/tensorizer_loader/ + - files~=^tests/model_executor/model_loader/tensorizer_loader/ actions: assign: users: diff --git a/docker/Dockerfile b/docker/Dockerfile index fad62be798a1..c2b855be4403 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -546,7 +546,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \ else \ BITSANDBYTES_VERSION="0.46.1"; \ fi; \ - uv pip install --system accelerate hf_transfer modelscope "bitsandbytes>=${BITSANDBYTES_VERSION}" 'timm>=1.0.17' boto3 runai-model-streamer runai-model-streamer[s3] + uv pip install --system accelerate hf_transfer modelscope "bitsandbytes>=${BITSANDBYTES_VERSION}" 'timm>=1.0.17' 'runai-model-streamer[s3]>=0.14.0' ENV VLLM_USAGE_SOURCE production-docker-image diff --git a/requirements/nightly_torch_test.txt b/requirements/nightly_torch_test.txt index a529bf4504e4..790a18f28b7f 100644 --- a/requirements/nightly_torch_test.txt +++ b/requirements/nightly_torch_test.txt @@ -43,7 +43,6 @@ tritonclient==2.51.0 numba == 0.60.0; python_version == '3.9' # v0.61 doesn't support Python 3.9. Required for N-gram speculative decoding numba == 0.61.2; python_version > '3.9' numpy -runai-model-streamer==0.11.0 -runai-model-streamer-s3==0.11.0 +runai-model-streamer[s3]==0.14.0 fastsafetensors>=0.1.10 pydantic>=2.10 # 2.9 leads to error on python 3.10 diff --git a/requirements/rocm.txt b/requirements/rocm.txt index c129dd345c81..c4aabe2a7314 100644 --- a/requirements/rocm.txt +++ b/requirements/rocm.txt @@ -5,8 +5,6 @@ numba == 0.60.0; python_version == '3.9' # v0.61 doesn't support Python 3.9. Req numba == 0.61.2; python_version > '3.9' # Dependencies for AMD GPUs -boto3 -botocore datasets ray[cgraph]>=2.48.0 # Ray Compiled Graph, required for pipeline parallelism in V1. peft @@ -15,7 +13,6 @@ tensorizer==2.10.1 packaging>=24.2 setuptools>=77.0.3,<80.0.0 setuptools-scm>=8 -runai-model-streamer==0.11.0 -runai-model-streamer-s3==0.11.0 +runai-model-streamer[s3]==0.14.0 conch-triton-kernels==1.2.1 timm>=1.0.17 \ No newline at end of file diff --git a/requirements/test.in b/requirements/test.in index 451bd7387910..c9496c61a7e4 100644 --- a/requirements/test.in +++ b/requirements/test.in @@ -51,8 +51,7 @@ tritonclient==2.51.0 numba == 0.60.0; python_version == '3.9' # v0.61 doesn't support Python 3.9. Required for N-gram speculative decoding numba == 0.61.2; python_version > '3.9' numpy -runai-model-streamer==0.11.0 -runai-model-streamer-s3==0.11.0 +runai-model-streamer[s3]==0.14.0 fastsafetensors>=0.1.10 pydantic>=2.10 # 2.9 leads to error on python 3.10 decord==0.6.0 diff --git a/requirements/test.txt b/requirements/test.txt index 3519aa524f41..912e04b2606c 100644 --- a/requirements/test.txt +++ b/requirements/test.txt @@ -72,7 +72,9 @@ blobfile==3.0.0 bm25s==0.2.13 # via mteb boto3==1.35.57 - # via tensorizer + # via + # runai-model-streamer-s3 + # tensorizer botocore==1.35.57 # via # boto3 @@ -925,10 +927,10 @@ rsa==4.9.1 # via google-auth rtree==1.4.0 # via torchgeo -runai-model-streamer==0.11.0 - # via -r requirements/test.in -runai-model-streamer-s3==0.11.0 +runai-model-streamer==0.14.0 # via -r requirements/test.in +runai-model-streamer-s3==0.14.0 + # via runai-model-streamer s3transfer==0.10.3 # via boto3 sacrebleu==2.4.3 diff --git a/setup.py b/setup.py index e4c40d22b928..a8fec8a028d0 100644 --- a/setup.py +++ b/setup.py @@ -654,10 +654,7 @@ def _read_requirements(filename: str) -> list[str]: "bench": ["pandas", "datasets"], "tensorizer": ["tensorizer==2.10.1"], "fastsafetensors": ["fastsafetensors >= 0.1.10"], - "runai": [ - "runai-model-streamer >= 0.14.0", "runai-model-streamer-gcs", - "google-cloud-storage", "runai-model-streamer-s3", "boto3" - ], + "runai": ["runai-model-streamer[s3,gcs] >= 0.14.0"], "audio": ["librosa", "soundfile", "mistral_common[audio]"], # Required for audio processing "video": [], # Kept for backwards compatibility diff --git a/tests/model_executor/conftest.py b/tests/model_executor/conftest.py deleted file mode 100644 index c6d89d849e9f..000000000000 --- a/tests/model_executor/conftest.py +++ /dev/null @@ -1,52 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project - -import pytest - - -@pytest.fixture -def sample_regex(): - return (r"((25[0-5]|(2[0-4]|1\d|[1-9]|)\d)\.){3}" - r"(25[0-5]|(2[0-4]|1\d|[1-9]|)\d)") - - -@pytest.fixture -def sample_json_schema(): - return { - "type": "object", - "properties": { - "name": { - "type": "string" - }, - "age": { - "type": "integer" - }, - "skills": { - "type": "array", - "items": { - "type": "string", - "maxLength": 10 - }, - "minItems": 3 - }, - "work_history": { - "type": "array", - "items": { - "type": "object", - "properties": { - "company": { - "type": "string" - }, - "duration": { - "type": "number" - }, - "position": { - "type": "string" - } - }, - "required": ["company", "position"] - } - } - }, - "required": ["name", "age", "skills", "work_history"] - } diff --git a/tests/fastsafetensors_loader/__init__.py b/tests/model_executor/model_loader/fastsafetensors_loader/__init__.py similarity index 100% rename from tests/fastsafetensors_loader/__init__.py rename to tests/model_executor/model_loader/fastsafetensors_loader/__init__.py diff --git a/tests/fastsafetensors_loader/test_fastsafetensors_loader.py b/tests/model_executor/model_loader/fastsafetensors_loader/test_fastsafetensors_loader.py similarity index 100% rename from tests/fastsafetensors_loader/test_fastsafetensors_loader.py rename to tests/model_executor/model_loader/fastsafetensors_loader/test_fastsafetensors_loader.py diff --git a/tests/fastsafetensors_loader/test_weight_utils.py b/tests/model_executor/model_loader/fastsafetensors_loader/test_weight_utils.py similarity index 100% rename from tests/fastsafetensors_loader/test_weight_utils.py rename to tests/model_executor/model_loader/fastsafetensors_loader/test_weight_utils.py diff --git a/tests/runai_model_streamer_test/__init__.py b/tests/model_executor/model_loader/runai_model_streamer/__init__.py similarity index 100% rename from tests/runai_model_streamer_test/__init__.py rename to tests/model_executor/model_loader/runai_model_streamer/__init__.py diff --git a/tests/runai_model_streamer_test/test_runai_model_streamer_loader.py b/tests/model_executor/model_loader/runai_model_streamer/test_runai_model_streamer_loader.py similarity index 100% rename from tests/runai_model_streamer_test/test_runai_model_streamer_loader.py rename to tests/model_executor/model_loader/runai_model_streamer/test_runai_model_streamer_loader.py diff --git a/tests/runai_model_streamer_test/test_runai_utils.py b/tests/model_executor/model_loader/runai_model_streamer/test_runai_utils.py similarity index 100% rename from tests/runai_model_streamer_test/test_runai_utils.py rename to tests/model_executor/model_loader/runai_model_streamer/test_runai_utils.py diff --git a/tests/runai_model_streamer_test/test_weight_utils.py b/tests/model_executor/model_loader/runai_model_streamer/test_weight_utils.py similarity index 100% rename from tests/runai_model_streamer_test/test_weight_utils.py rename to tests/model_executor/model_loader/runai_model_streamer/test_weight_utils.py diff --git a/tests/tensorizer_loader/__init__.py b/tests/model_executor/model_loader/tensorizer_loader/__init__.py similarity index 100% rename from tests/tensorizer_loader/__init__.py rename to tests/model_executor/model_loader/tensorizer_loader/__init__.py diff --git a/tests/tensorizer_loader/conftest.py b/tests/model_executor/model_loader/tensorizer_loader/conftest.py similarity index 100% rename from tests/tensorizer_loader/conftest.py rename to tests/model_executor/model_loader/tensorizer_loader/conftest.py diff --git a/tests/tensorizer_loader/test_tensorizer.py b/tests/model_executor/model_loader/tensorizer_loader/test_tensorizer.py similarity index 99% rename from tests/tensorizer_loader/test_tensorizer.py rename to tests/model_executor/model_loader/tensorizer_loader/test_tensorizer.py index e00d7c2f80c6..f50f04696738 100644 --- a/tests/tensorizer_loader/test_tensorizer.py +++ b/tests/model_executor/model_loader/tensorizer_loader/test_tensorizer.py @@ -14,6 +14,7 @@ import torch import vllm.model_executor.model_loader.tensorizer +from tests.utils import VLLM_PATH, RemoteOpenAIServer from vllm import LLM, SamplingParams from vllm.engine.arg_utils import EngineArgs # yapf: disable @@ -27,7 +28,6 @@ # yapf: enable from vllm.utils import PlaceholderModule -from ..utils import VLLM_PATH, RemoteOpenAIServer from .conftest import DummyExecutor, assert_from_collective_rpc try: diff --git a/vllm/model_executor/model_loader/weight_utils.py b/vllm/model_executor/model_loader/weight_utils.py index cad32fee1d0f..f52d9dd2f534 100644 --- a/vllm/model_executor/model_loader/weight_utils.py +++ b/vllm/model_executor/model_loader/weight_utils.py @@ -639,6 +639,19 @@ def runai_safetensors_weights_iterator( yield from tensor_iter +def _init_loader( + pg: torch.distributed.ProcessGroup, + device: torch.device, + f_list: list[str], + *, + nogds: bool = False, +): + loader = SafeTensorsFileLoader(pg, device, nogds=nogds) + rank_file_map = {i: [f] for i, f in enumerate(f_list)} + loader.add_filenames(rank_file_map) + return loader + + def fastsafetensors_weights_iterator( hf_weights_files: list[str], use_tqdm_on_load: bool, @@ -656,17 +669,31 @@ def fastsafetensors_weights_iterator( for i in range(0, len(hf_weights_files), pg.size()) ] + nogds = False + for f_list in tqdm( weight_files_sub_lists, desc="Loading safetensors using Fastsafetensor loader", disable=not enable_tqdm(use_tqdm_on_load), bar_format=_BAR_FORMAT, ): - loader = SafeTensorsFileLoader(pg, device) - rank_file_map = {i: [f] for i, f in enumerate(f_list)} - loader.add_filenames(rank_file_map) + loader = _init_loader(pg, device, f_list, nogds=nogds) try: - fb = loader.copy_files_to_device() + try: + fb = loader.copy_files_to_device() + except RuntimeError as e: + if "gds" not in str(e): + raise + + loader.close() + nogds = True + logger.warning_once( + "GDS not enabled, setting `nogds=True`.\n" + "For more information, see: https://github.com/foundation-model-stack/fastsafetensors?tab=readme-ov-file#basic-api-usages" + ) + loader = _init_loader(pg, device, f_list, nogds=nogds) + fb = loader.copy_files_to_device() + try: keys = list(fb.key_to_rank_lidx.keys()) for k in keys: From b3613e3acece6502c553901fe4433e3f783363b7 Mon Sep 17 00:00:00 2001 From: 22quinn <33176974+22quinn@users.noreply.github.com> Date: Fri, 26 Sep 2025 21:57:27 -0700 Subject: [PATCH 443/518] [CI/Build] Add timing to Model Executor Test (#25799) Signed-off-by: 22quinn <33176974+22quinn@users.noreply.github.com> --- .buildkite/test-pipeline.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index 82a3b2fc199e..c6c4e2a2309f 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -465,8 +465,8 @@ steps: commands: - pytest -v -s kernels/mamba -- label: Model Executor Test # ??? - timeout_in_minutes: 60 +- label: Model Executor Test # 23min + timeout_in_minutes: 35 mirror_hardwares: [amdexperimental] source_file_dependencies: - vllm/model_executor From cd87bfbf37f2300b7076b496366cd69048819777 Mon Sep 17 00:00:00 2001 From: Cyrus Leung Date: Sat, 27 Sep 2025 13:51:15 +0800 Subject: [PATCH 444/518] [CI/Build] Reorganize root-level V1 tests (#25767) Signed-off-by: DarkLight1337 --- .../scripts/hardware_ci/run-xpu-test.sh | 3 +- .buildkite/test-pipeline.yaml | 23 +++---- tests/v1/{ => core}/test_kv_sharing.py | 0 tests/v1/distributed/__init__.py | 0 .../v1/{ => distributed}/test_async_llm_dp.py | 0 .../{ => distributed}/test_external_lb_dp.py | 0 .../v1/{ => distributed}/test_hybrid_lb_dp.py | 2 +- .../{ => distributed}/test_internal_lb_dp.py | 2 +- .../openai/test_multi_api_servers.py | 2 +- tests/v1/{ => metrics}/test_metrics_reader.py | 0 tests/v1/{test_utils.py => utils.py} | 61 ------------------ tests/v1/worker/test_utils.py | 63 +++++++++++++++++++ 12 files changed, 75 insertions(+), 81 deletions(-) rename tests/v1/{ => core}/test_kv_sharing.py (100%) create mode 100644 tests/v1/distributed/__init__.py rename tests/v1/{ => distributed}/test_async_llm_dp.py (100%) rename tests/v1/{ => distributed}/test_external_lb_dp.py (100%) rename tests/v1/{ => distributed}/test_hybrid_lb_dp.py (99%) rename tests/v1/{ => distributed}/test_internal_lb_dp.py (99%) rename tests/v1/{ => metrics}/test_metrics_reader.py (100%) rename tests/v1/{test_utils.py => utils.py} (67%) create mode 100644 tests/v1/worker/test_utils.py diff --git a/.buildkite/scripts/hardware_ci/run-xpu-test.sh b/.buildkite/scripts/hardware_ci/run-xpu-test.sh index 1fc3dbd8c21f..6b9c0121c4aa 100644 --- a/.buildkite/scripts/hardware_ci/run-xpu-test.sh +++ b/.buildkite/scripts/hardware_ci/run-xpu-test.sh @@ -44,7 +44,6 @@ docker run \ pytest -v -s v1/structured_output pytest -v -s v1/spec_decode --ignore=v1/spec_decode/test_max_len.py --ignore=v1/spec_decode/test_eagle.py --ignore=v1/spec_decode/test_tree_attention.py pytest -v -s v1/kv_connector/unit --ignore=v1/kv_connector/unit/test_multi_connector.py --ignore=v1/kv_connector/unit/test_nixl_connector.py --ignore=v1/kv_connector/unit/test_shared_storage_connector.py + pytest -v -s v1/test_metrics pytest -v -s v1/test_serial_utils.py - pytest -v -s v1/test_utils.py - pytest -v -s v1/test_metrics_reader.py ' diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index c6c4e2a2309f..e603c1582e1f 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -159,10 +159,7 @@ steps: - examples/offline_inference/rlhf.py - examples/offline_inference/rlhf_colocate.py - tests/examples/offline_inference/data_parallel.py - - tests/v1/test_async_llm_dp.py - - tests/v1/test_external_lb_dp.py - - tests/v1/test_internal_lb_dp.py - - tests/v1/test_hybrid_lb_dp.py + - tests/v1/distributed - tests/v1/engine/test_engine_core_client.py - tests/distributed/test_symm_mem_allreduce.py commands: @@ -180,10 +177,10 @@ steps: - TP_SIZE=2 DP_SIZE=2 ENABLE_EP=1 torchrun --nproc-per-node=4 distributed/test_torchrun_example_moe.py # test with internal dp - python3 ../examples/offline_inference/data_parallel.py --enforce-eager - - TP_SIZE=2 DP_SIZE=2 pytest -v -s v1/test_async_llm_dp.py - - TP_SIZE=2 DP_SIZE=2 pytest -v -s v1/test_external_lb_dp.py - - TP_SIZE=1 DP_SIZE=4 pytest -v -s v1/test_internal_lb_dp.py - - TP_SIZE=1 DP_SIZE=4 pytest -v -s v1/test_hybrid_lb_dp.py + - TP_SIZE=2 DP_SIZE=2 pytest -v -s v1/distributed/test_async_llm_dp.py + - TP_SIZE=2 DP_SIZE=2 pytest -v -s v1/distributed/test_external_lb_dp.py + - TP_SIZE=1 DP_SIZE=4 pytest -v -s v1/distributed/test_internal_lb_dp.py + - TP_SIZE=1 DP_SIZE=4 pytest -v -s v1/distributed/test_hybrid_lb_dp.py - pytest -v -s v1/engine/test_engine_core_client.py::test_kv_cache_events_dp - pytest -v -s distributed/test_utils.py - pytest -v -s compile/test_basic_correctness.py @@ -300,12 +297,9 @@ steps: - pytest -v -s v1/spec_decode - pytest -v -s v1/kv_connector/unit - pytest -v -s v1/metrics - - pytest -v -s v1/test_kv_sharing.py - - pytest -v -s v1/test_metrics_reader.py - pytest -v -s v1/test_oracle.py - pytest -v -s v1/test_request.py - pytest -v -s v1/test_serial_utils.py - - pytest -v -s v1/test_utils.py # Integration test for streaming correctness (requires special branch). - pip install -U git+https://github.com/robertgshaw2-redhat/lm-evaluation-harness.git@streaming-api - pytest -v -s entrypoints/openai/correctness/test_lmeval.py::test_lm_eval_accuracy_v1_engine @@ -895,14 +889,13 @@ steps: - tests/compile/test_wrapper.py - tests/distributed/ - tests/entrypoints/llm/test_collective_rpc.py - - tests/v1/test_async_llm_dp.py - - tests/v1/test_external_lb_dp.py + - tests/v1/distributed - tests/v1/entrypoints/openai/test_multi_api_servers.py - tests/v1/shutdown - tests/v1/worker/test_worker_memory_snapshot.py commands: - - TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/test_async_llm_dp.py - - TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/test_external_lb_dp.py + - TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/distributed/test_async_llm_dp.py + - TP_SIZE=1 DP_SIZE=2 pytest -v -s v1/distributed/test_external_lb_dp.py - DP_SIZE=2 pytest -v -s v1/entrypoints/openai/test_multi_api_servers.py - pytest -v -s entrypoints/llm/test_collective_rpc.py - pytest -v -s ./compile/test_basic_correctness.py diff --git a/tests/v1/test_kv_sharing.py b/tests/v1/core/test_kv_sharing.py similarity index 100% rename from tests/v1/test_kv_sharing.py rename to tests/v1/core/test_kv_sharing.py diff --git a/tests/v1/distributed/__init__.py b/tests/v1/distributed/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/tests/v1/test_async_llm_dp.py b/tests/v1/distributed/test_async_llm_dp.py similarity index 100% rename from tests/v1/test_async_llm_dp.py rename to tests/v1/distributed/test_async_llm_dp.py diff --git a/tests/v1/test_external_lb_dp.py b/tests/v1/distributed/test_external_lb_dp.py similarity index 100% rename from tests/v1/test_external_lb_dp.py rename to tests/v1/distributed/test_external_lb_dp.py diff --git a/tests/v1/test_hybrid_lb_dp.py b/tests/v1/distributed/test_hybrid_lb_dp.py similarity index 99% rename from tests/v1/test_hybrid_lb_dp.py rename to tests/v1/distributed/test_hybrid_lb_dp.py index 552436f818d7..21d8009a6dbb 100644 --- a/tests/v1/test_hybrid_lb_dp.py +++ b/tests/v1/distributed/test_hybrid_lb_dp.py @@ -12,7 +12,7 @@ import requests from tests.utils import RemoteOpenAIServer -from tests.v1.test_utils import check_request_balancing +from tests.v1.utils import check_request_balancing from vllm.platforms import current_platform MODEL_NAME = "ibm-research/PowerMoE-3b" diff --git a/tests/v1/test_internal_lb_dp.py b/tests/v1/distributed/test_internal_lb_dp.py similarity index 99% rename from tests/v1/test_internal_lb_dp.py rename to tests/v1/distributed/test_internal_lb_dp.py index e965645711ee..3f9defd13dea 100644 --- a/tests/v1/test_internal_lb_dp.py +++ b/tests/v1/distributed/test_internal_lb_dp.py @@ -13,7 +13,7 @@ import requests from tests.utils import RemoteOpenAIServer -from tests.v1.test_utils import check_request_balancing +from tests.v1.utils import check_request_balancing from vllm.platforms import current_platform MODEL_NAME = "ibm-research/PowerMoE-3b" diff --git a/tests/v1/entrypoints/openai/test_multi_api_servers.py b/tests/v1/entrypoints/openai/test_multi_api_servers.py index f7c31b0c4377..35f75191d9c8 100644 --- a/tests/v1/entrypoints/openai/test_multi_api_servers.py +++ b/tests/v1/entrypoints/openai/test_multi_api_servers.py @@ -8,7 +8,7 @@ import pytest_asyncio from tests.utils import RemoteOpenAIServer -from tests.v1.test_utils import check_request_balancing +from tests.v1.utils import check_request_balancing MODEL_NAME = "ibm-research/PowerMoE-3b" diff --git a/tests/v1/test_metrics_reader.py b/tests/v1/metrics/test_metrics_reader.py similarity index 100% rename from tests/v1/test_metrics_reader.py rename to tests/v1/metrics/test_metrics_reader.py diff --git a/tests/v1/test_utils.py b/tests/v1/utils.py similarity index 67% rename from tests/v1/test_utils.py rename to tests/v1/utils.py index 00d98a873a31..b3f560c11e8f 100644 --- a/tests/v1/test_utils.py +++ b/tests/v1/utils.py @@ -1,71 +1,10 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project - import pytest import regex as re import requests -import torch from tests.utils import RemoteOpenAIServer -from vllm.v1.worker.utils import bind_kv_cache - - -def test_bind_kv_cache(): - from vllm.attention import Attention - - ctx = { - 'layers.0.self_attn': Attention(32, 128, 0.1), - 'layers.1.self_attn': Attention(32, 128, 0.1), - 'layers.2.self_attn': Attention(32, 128, 0.1), - 'layers.3.self_attn': Attention(32, 128, 0.1), - } - kv_cache = { - 'layers.0.self_attn': torch.zeros((1, )), - 'layers.1.self_attn': torch.zeros((1, )), - 'layers.2.self_attn': torch.zeros((1, )), - 'layers.3.self_attn': torch.zeros((1, )), - } - runner_kv_caches: list[torch.Tensor] = [] - bind_kv_cache(kv_cache, ctx, runner_kv_caches) - assert ctx['layers.0.self_attn'].kv_cache[0] is kv_cache[ - 'layers.0.self_attn'] - assert ctx['layers.1.self_attn'].kv_cache[0] is kv_cache[ - 'layers.1.self_attn'] - assert ctx['layers.2.self_attn'].kv_cache[0] is kv_cache[ - 'layers.2.self_attn'] - assert ctx['layers.3.self_attn'].kv_cache[0] is kv_cache[ - 'layers.3.self_attn'] - - assert runner_kv_caches[0] is kv_cache['layers.0.self_attn'] - assert runner_kv_caches[1] is kv_cache['layers.1.self_attn'] - assert runner_kv_caches[2] is kv_cache['layers.2.self_attn'] - assert runner_kv_caches[3] is kv_cache['layers.3.self_attn'] - - -def test_bind_kv_cache_non_attention(): - from vllm.attention import Attention - - # example from Jamba PP=2 - ctx = { - 'model.layers.20.attn': Attention(32, 128, 0.1), - 'model.layers.28.attn': Attention(32, 128, 0.1), - } - kv_cache = { - 'model.layers.20.attn': torch.zeros((1, )), - 'model.layers.28.attn': torch.zeros((1, )), - } - - runner_kv_caches: list[torch.Tensor] = [] - bind_kv_cache(kv_cache, ctx, runner_kv_caches) - - assert ctx['model.layers.20.attn'].kv_cache[0] is kv_cache[ - 'model.layers.20.attn'] - assert ctx['model.layers.28.attn'].kv_cache[0] is kv_cache[ - 'model.layers.28.attn'] - - assert runner_kv_caches[0] is kv_cache['model.layers.20.attn'] - assert runner_kv_caches[1] is kv_cache['model.layers.28.attn'] - # Prometheus metrics utilities for testing diff --git a/tests/v1/worker/test_utils.py b/tests/v1/worker/test_utils.py new file mode 100644 index 000000000000..fd0e630ce178 --- /dev/null +++ b/tests/v1/worker/test_utils.py @@ -0,0 +1,63 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project + +import torch + +from vllm.v1.worker.utils import bind_kv_cache + + +def test_bind_kv_cache(): + from vllm.attention import Attention + + ctx = { + 'layers.0.self_attn': Attention(32, 128, 0.1), + 'layers.1.self_attn': Attention(32, 128, 0.1), + 'layers.2.self_attn': Attention(32, 128, 0.1), + 'layers.3.self_attn': Attention(32, 128, 0.1), + } + kv_cache = { + 'layers.0.self_attn': torch.zeros((1, )), + 'layers.1.self_attn': torch.zeros((1, )), + 'layers.2.self_attn': torch.zeros((1, )), + 'layers.3.self_attn': torch.zeros((1, )), + } + runner_kv_caches: list[torch.Tensor] = [] + bind_kv_cache(kv_cache, ctx, runner_kv_caches) + assert ctx['layers.0.self_attn'].kv_cache[0] is kv_cache[ + 'layers.0.self_attn'] + assert ctx['layers.1.self_attn'].kv_cache[0] is kv_cache[ + 'layers.1.self_attn'] + assert ctx['layers.2.self_attn'].kv_cache[0] is kv_cache[ + 'layers.2.self_attn'] + assert ctx['layers.3.self_attn'].kv_cache[0] is kv_cache[ + 'layers.3.self_attn'] + + assert runner_kv_caches[0] is kv_cache['layers.0.self_attn'] + assert runner_kv_caches[1] is kv_cache['layers.1.self_attn'] + assert runner_kv_caches[2] is kv_cache['layers.2.self_attn'] + assert runner_kv_caches[3] is kv_cache['layers.3.self_attn'] + + +def test_bind_kv_cache_non_attention(): + from vllm.attention import Attention + + # example from Jamba PP=2 + ctx = { + 'model.layers.20.attn': Attention(32, 128, 0.1), + 'model.layers.28.attn': Attention(32, 128, 0.1), + } + kv_cache = { + 'model.layers.20.attn': torch.zeros((1, )), + 'model.layers.28.attn': torch.zeros((1, )), + } + + runner_kv_caches: list[torch.Tensor] = [] + bind_kv_cache(kv_cache, ctx, runner_kv_caches) + + assert ctx['model.layers.20.attn'].kv_cache[0] is kv_cache[ + 'model.layers.20.attn'] + assert ctx['model.layers.28.attn'].kv_cache[0] is kv_cache[ + 'model.layers.28.attn'] + + assert runner_kv_caches[0] is kv_cache['model.layers.20.attn'] + assert runner_kv_caches[1] is kv_cache['model.layers.28.attn'] From 39391520698e8b1d699ea2ccec571a6c6416ba9d Mon Sep 17 00:00:00 2001 From: 22quinn <33176974+22quinn@users.noreply.github.com> Date: Sat, 27 Sep 2025 00:47:29 -0700 Subject: [PATCH 445/518] [Misc] Fix codeowners override for v1 sample and attention (#25037) Signed-off-by: 22quinn <33176974+22quinn@users.noreply.github.com> --- .github/CODEOWNERS | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 9d749fe8d323..0b9c054b968a 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -12,8 +12,6 @@ /vllm/model_executor/layers/mamba @tdoublep /vllm/model_executor/model_loader @22quinn /vllm/multimodal @DarkLight1337 @ywang96 @NickLucche -/vllm/v1/attention @LucasWilkinson -/vllm/v1/sample @22quinn @houseroad /vllm/vllm_flash_attn @LucasWilkinson /vllm/lora @jeejeelee /vllm/reasoning @aarnphm @chaunceyjiang @@ -28,11 +26,13 @@ CMakeLists.txt @tlrmchlsmth @LucasWilkinson # vLLM V1 /vllm/v1 @WoosukKwon @robertgshaw2-redhat @njhill @ywang96 @comaniac @alexm-redhat -/vllm/v1/structured_output @mgoin @russellb @aarnphm @benchislett -/vllm/v1/spec_decode @benchislett @luccafong +/vllm/v1/attention @LucasWilkinson /vllm/v1/attention/backends/flashinfer.py @mgoin /vllm/v1/attention/backends/triton_attn.py @tdoublep /vllm/v1/core @WoosukKwon @robertgshaw2-redhat @njhill @ywang96 @comaniac @alexm-redhat @heheda12345 @ApostaC +/vllm/v1/sample @22quinn @houseroad @njhill +/vllm/v1/spec_decode @benchislett @luccafong +/vllm/v1/structured_output @mgoin @russellb @aarnphm @benchislett /vllm/v1/kv_cache_interface.py @heheda12345 /vllm/v1/offloading @ApostaC @@ -54,7 +54,7 @@ CMakeLists.txt @tlrmchlsmth @LucasWilkinson /tests/weight_loading @mgoin @youkaichao @yewentao256 /tests/lora @jeejeelee /tests/models/language/generation/test_hybrid.py @tdoublep -/tests/v1/kv_connector/nixl_integration @NickLucche +/tests/v1/kv_connector/nixl_integration @NickLucche /tests/v1/kv_connector @ApostaC /tests/v1/offloading @ApostaC From 23b8ee672d7ce4c383ed1527a7f268c0ca33c16c Mon Sep 17 00:00:00 2001 From: Roger Wang Date: Sat, 27 Sep 2025 00:57:07 -0700 Subject: [PATCH 446/518] [Misc] Update openai client example file for multimodal (#25795) Signed-off-by: Roger Wang Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- ...i_chat_completion_client_for_multimodal.py | 63 +++++++++++-------- 1 file changed, 36 insertions(+), 27 deletions(-) diff --git a/examples/online_serving/openai_chat_completion_client_for_multimodal.py b/examples/online_serving/openai_chat_completion_client_for_multimodal.py index 37216a5cfe57..5d515fbfb671 100644 --- a/examples/online_serving/openai_chat_completion_client_for_multimodal.py +++ b/examples/online_serving/openai_chat_completion_client_for_multimodal.py @@ -38,11 +38,13 @@ base_url=openai_api_base, ) +headers = {"User-Agent": "vLLM Example Client"} + def encode_base64_content_from_url(content_url: str) -> str: """Encode a content retrieved from a remote url to base64 format.""" - with requests.get(content_url) as response: + with requests.get(content_url, headers=headers) as response: response.raise_for_status() result = base64.b64encode(response.content).decode("utf-8") @@ -50,19 +52,19 @@ def encode_base64_content_from_url(content_url: str) -> str: # Text-only inference -def run_text_only(model: str) -> None: +def run_text_only(model: str, max_completion_tokens: int) -> None: chat_completion = client.chat.completions.create( messages=[{"role": "user", "content": "What's the capital of France?"}], model=model, - max_completion_tokens=64, + max_completion_tokens=max_completion_tokens, ) result = chat_completion.choices[0].message.content - print("Chat completion output:", result) + print("Chat completion output:\n", result) # Single-image input inference -def run_single_image(model: str) -> None: +def run_single_image(model: str, max_completion_tokens: int) -> None: ## Use image url in the payload image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg" chat_completion_from_url = client.chat.completions.create( @@ -79,11 +81,11 @@ def run_single_image(model: str) -> None: } ], model=model, - max_completion_tokens=64, + max_completion_tokens=max_completion_tokens, ) result = chat_completion_from_url.choices[0].message.content - print("Chat completion output from image url:", result) + print("Chat completion output from image url:\n", result) ## Use base64 encoded image in the payload image_base64 = encode_base64_content_from_url(image_url) @@ -101,7 +103,7 @@ def run_single_image(model: str) -> None: } ], model=model, - max_completion_tokens=64, + max_completion_tokens=max_completion_tokens, ) result = chat_completion_from_base64.choices[0].message.content @@ -109,7 +111,7 @@ def run_single_image(model: str) -> None: # Multi-image input inference -def run_multi_image(model: str) -> None: +def run_multi_image(model: str, max_completion_tokens: int) -> None: image_url_duck = "https://upload.wikimedia.org/wikipedia/commons/d/da/2015_Kaczka_krzy%C5%BCowka_w_wodzie_%28samiec%29.jpg" image_url_lion = "https://upload.wikimedia.org/wikipedia/commons/7/77/002_The_lion_king_Snyggve_in_the_Serengeti_National_Park_Photo_by_Giles_Laurent.jpg" chat_completion_from_url = client.chat.completions.create( @@ -130,15 +132,15 @@ def run_multi_image(model: str) -> None: } ], model=model, - max_completion_tokens=64, + max_completion_tokens=max_completion_tokens, ) result = chat_completion_from_url.choices[0].message.content - print("Chat completion output:", result) + print("Chat completion output:\n", result) # Video input inference -def run_video(model: str) -> None: +def run_video(model: str, max_completion_tokens: int) -> None: video_url = "http://commondatastorage.googleapis.com/gtv-videos-bucket/sample/ForBiggerFun.mp4" video_base64 = encode_base64_content_from_url(video_url) @@ -157,11 +159,11 @@ def run_video(model: str) -> None: } ], model=model, - max_completion_tokens=64, + max_completion_tokens=max_completion_tokens, ) result = chat_completion_from_url.choices[0].message.content - print("Chat completion output from image url:", result) + print("Chat completion output from video url:\n", result) ## Use base64 encoded video in the payload chat_completion_from_base64 = client.chat.completions.create( @@ -178,15 +180,15 @@ def run_video(model: str) -> None: } ], model=model, - max_completion_tokens=64, + max_completion_tokens=max_completion_tokens, ) result = chat_completion_from_base64.choices[0].message.content - print("Chat completion output from base64 encoded image:", result) + print("Chat completion output from base64 encoded video:\n", result) # Audio input inference -def run_audio(model: str) -> None: +def run_audio(model: str, max_completion_tokens: int) -> None: from vllm.assets.audio import AudioAsset audio_url = AudioAsset("winning_call").url @@ -211,11 +213,11 @@ def run_audio(model: str) -> None: } ], model=model, - max_completion_tokens=64, + max_completion_tokens=max_completion_tokens, ) result = chat_completion_from_base64.choices[0].message.content - print("Chat completion output from input audio:", result) + print("Chat completion output from input audio:\n", result) # HTTP URL chat_completion_from_url = client.chat.completions.create( @@ -235,11 +237,11 @@ def run_audio(model: str) -> None: } ], model=model, - max_completion_tokens=64, + max_completion_tokens=max_completion_tokens, ) result = chat_completion_from_url.choices[0].message.content - print("Chat completion output from audio url:", result) + print("Chat completion output from audio url:\n", result) # base64 URL chat_completion_from_base64 = client.chat.completions.create( @@ -259,14 +261,14 @@ def run_audio(model: str) -> None: } ], model=model, - max_completion_tokens=64, + max_completion_tokens=max_completion_tokens, ) result = chat_completion_from_base64.choices[0].message.content - print("Chat completion output from base64 encoded audio:", result) + print("Chat completion output from base64 encoded audio:\n", result) -def run_multi_audio(model: str) -> None: +def run_multi_audio(model: str, max_completion_tokens: int) -> None: from vllm.assets.audio import AudioAsset # Two different audios to showcase batched inference. @@ -300,11 +302,11 @@ def run_multi_audio(model: str) -> None: } ], model=model, - max_completion_tokens=64, + max_completion_tokens=max_completion_tokens, ) result = chat_completion_from_base64.choices[0].message.content - print("Chat completion output from input audio:", result) + print("Chat completion output from input audio:\n", result) example_function_map = { @@ -330,13 +332,20 @@ def parse_args(): choices=list(example_function_map.keys()), help="Conversation type with multimodal data.", ) + parser.add_argument( + "--max-completion-tokens", + "-n", + type=int, + default=128, + help="Maximum number of tokens to generate for each completion.", + ) return parser.parse_args() def main(args) -> None: chat_type = args.chat_type model = get_first_model(client) - example_function_map[chat_type](model) + example_function_map[chat_type](model, args.max_completion_tokens) if __name__ == "__main__": From 176173989a4c5d9c3a4dca8c788d3492ac27a2e0 Mon Sep 17 00:00:00 2001 From: Xiaohan Zou Date: Sat, 27 Sep 2025 03:59:22 -0400 Subject: [PATCH 447/518] [Bugfix] Add missing `image_size` for phi4_multimodal (#25796) --- vllm/model_executor/models/phi4_multimodal.py | 1 + 1 file changed, 1 insertion(+) diff --git a/vllm/model_executor/models/phi4_multimodal.py b/vllm/model_executor/models/phi4_multimodal.py index d2a3a8cc0496..bdc831354c11 100644 --- a/vllm/model_executor/models/phi4_multimodal.py +++ b/vllm/model_executor/models/phi4_multimodal.py @@ -786,6 +786,7 @@ def _find_target_aspect_ratio( target_ratios, orig_width, orig_height, + image_size, ) # calculate the target width and height From 27d7638b9476062931a6770ed90714792e77cc83 Mon Sep 17 00:00:00 2001 From: Cyrus Leung Date: Sat, 27 Sep 2025 16:15:12 +0800 Subject: [PATCH 448/518] [Bugfix] Merge MM embeddings by index instead of token IDs (#16229) Signed-off-by: DarkLight1337 Signed-off-by: NickLucche Signed-off-by: Roger Wang Co-authored-by: NickLucche Co-authored-by: Roger Wang --- docs/contributing/model/multimodal.md | 33 +------ vllm/config/model.py | 7 +- vllm/model_executor/models/aria.py | 25 ++--- vllm/model_executor/models/aya_vision.py | 27 ++---- vllm/model_executor/models/bert.py | 12 +++ vllm/model_executor/models/bert_with_rope.py | 6 ++ vllm/model_executor/models/blip2.py | 22 ++--- vllm/model_executor/models/chameleon.py | 24 ++--- vllm/model_executor/models/chatglm.py | 3 + vllm/model_executor/models/cohere2_vision.py | 27 ++---- vllm/model_executor/models/deepseek_eagle.py | 6 ++ vllm/model_executor/models/deepseek_mtp.py | 6 ++ vllm/model_executor/models/deepseek_vl2.py | 25 ++--- vllm/model_executor/models/dots_ocr.py | 44 +++------ vllm/model_executor/models/ernie45_vl.py | 31 +++--- vllm/model_executor/models/ernie_mtp.py | 6 ++ vllm/model_executor/models/fuyu.py | 26 ++--- vllm/model_executor/models/gemma3_mm.py | 26 ++--- vllm/model_executor/models/gemma3n_mm.py | 23 +++-- vllm/model_executor/models/glm4_1v.py | 17 ---- vllm/model_executor/models/glm4_moe_mtp.py | 6 ++ vllm/model_executor/models/glm4v.py | 35 ++----- vllm/model_executor/models/granite_speech.py | 34 ++++--- vllm/model_executor/models/hunyuan_v1.py | 3 + .../models/hyperclovax_vision.py | 35 ++----- vllm/model_executor/models/idefics3.py | 31 ++---- vllm/model_executor/models/interfaces.py | 83 ++++++++++++++-- vllm/model_executor/models/interfaces_base.py | 24 ++++- vllm/model_executor/models/interns1.py | 47 ++++++---- vllm/model_executor/models/internvl.py | 46 +++++---- vllm/model_executor/models/keye.py | 18 ---- vllm/model_executor/models/kimi_vl.py | 29 +----- vllm/model_executor/models/lfm2.py | 3 + vllm/model_executor/models/llama4_eagle.py | 31 ++---- vllm/model_executor/models/llama_eagle.py | 6 ++ vllm/model_executor/models/llama_eagle3.py | 17 +--- vllm/model_executor/models/llava.py | 26 ++--- vllm/model_executor/models/llava_next.py | 31 +++--- .../model_executor/models/llava_next_video.py | 23 ++--- vllm/model_executor/models/llava_onevision.py | 13 --- vllm/model_executor/models/midashenglm.py | 25 ++--- vllm/model_executor/models/mimo_mtp.py | 6 ++ vllm/model_executor/models/minicpmv.py | 27 ++---- vllm/model_executor/models/minimax_text_01.py | 10 +- vllm/model_executor/models/minimax_vl_01.py | 25 ++--- vllm/model_executor/models/mistral3.py | 26 ++--- vllm/model_executor/models/mllama4.py | 30 ++---- vllm/model_executor/models/modernbert.py | 9 ++ vllm/model_executor/models/molmo.py | 32 ++----- .../model_executor/models/nano_nemotron_vl.py | 43 +++------ vllm/model_executor/models/nemotron_vl.py | 37 +++++--- vllm/model_executor/models/olmo2.py | 6 ++ vllm/model_executor/models/ovis.py | 21 +---- vllm/model_executor/models/ovis2_5.py | 18 +--- vllm/model_executor/models/paligemma.py | 23 ++--- vllm/model_executor/models/phi3v.py | 44 ++++++--- vllm/model_executor/models/phi4_multimodal.py | 18 +--- vllm/model_executor/models/phi4mm.py | 14 --- vllm/model_executor/models/pixtral.py | 26 ++--- .../models/qwen2_5_omni_thinker.py | 28 +++--- vllm/model_executor/models/qwen2_5_vl.py | 13 --- vllm/model_executor/models/qwen2_audio.py | 23 ++--- vllm/model_executor/models/qwen2_vl.py | 13 --- vllm/model_executor/models/qwen3_vl.py | 89 ++++++++++++------ vllm/model_executor/models/qwen_vl.py | 25 ++--- vllm/model_executor/models/roberta.py | 3 + vllm/model_executor/models/skyworkr1v.py | 36 ++++--- vllm/model_executor/models/solar.py | 3 + vllm/model_executor/models/step3_text.py | 3 + vllm/model_executor/models/step3_vl.py | 52 +++++----- vllm/model_executor/models/tarsier.py | 25 ++--- vllm/model_executor/models/terratorch.py | 3 + vllm/model_executor/models/transformers.py | 64 ++++++++++--- vllm/model_executor/models/ultravox.py | 36 +++---- vllm/model_executor/models/utils.py | 94 ++++++++----------- vllm/model_executor/models/voxtral.py | 29 ++---- vllm/model_executor/models/whisper.py | 10 +- vllm/v1/spec_decode/eagle.py | 46 +++++---- vllm/v1/worker/gpu_model_runner.py | 52 +++++++--- vllm/v1/worker/tpu_model_runner.py | 79 +++++++++++++--- 80 files changed, 965 insertions(+), 1138 deletions(-) diff --git a/docs/contributing/model/multimodal.md b/docs/contributing/model/multimodal.md index 87d34d207cde..1d72fe97b966 100644 --- a/docs/contributing/model/multimodal.md +++ b/docs/contributing/model/multimodal.md @@ -66,35 +66,12 @@ Further update the model as follows: !!! important The returned `multimodal_embeddings` must be either a **3D [torch.Tensor][]** of shape `(num_items, feature_size, hidden_size)`, or a **list / tuple of 2D [torch.Tensor][]'s** of shape `(feature_size, hidden_size)`, so that `multimodal_embeddings[i]` retrieves the embeddings generated from the `i`-th multimodal data item (e.g, image) of the request. -- Implement [get_input_embeddings][vllm.model_executor.models.interfaces.SupportsMultiModal.get_input_embeddings] to merge `multimodal_embeddings` with text embeddings from the `input_ids`. If input processing for the model is implemented correctly (see sections below), then you can leverage the utility function we provide to easily merge the embeddings. - - ??? code - - ```python - from .utils import merge_multimodal_embeddings - - class YourModelForImage2Seq(nn.Module): - ... - - def get_input_embeddings( - self, - input_ids: torch.Tensor, - multimodal_embeddings: Optional[MultiModalEmbeddings] = None, - ) -> torch.Tensor: - - # `get_input_embeddings` should already be implemented for the language - # model as one of the requirements of basic vLLM model implementation. - inputs_embeds = self.language_model.get_input_embeddings(input_ids) - - if multimodal_embeddings is not None: - inputs_embeds = merge_multimodal_embeddings( - input_ids=input_ids, - inputs_embeds=inputs_embeds, - multimodal_embeddings=multimodal_embeddings, - placeholder_token_id=self.config.image_token_index) +!!! note + By default, vLLM merges the multimodal embeddings into text embeddings depending on the information of their locations defined in + [PlaceholderRange][vllm.multimodal.inputs.PlaceholderRange] from input processing. + This logic can be found at [get_input_embeddings][vllm.model_executor.models.interfaces.SupportsMultiModal.get_input_embeddings]. - return inputs_embeds - ``` + You may override this method if additional logic is required for your model when merging embeddings. - Implement [get_language_model][vllm.model_executor.models.interfaces.SupportsMultiModal.get_language_model] getter to provide stable access to the underlying language model. diff --git a/vllm/config/model.py b/vllm/config/model.py index b2b68abd2c1d..3fb448ebbf36 100644 --- a/vllm/config/model.py +++ b/vllm/config/model.py @@ -509,9 +509,14 @@ def _task_to_convert(task: TaskOption) -> ConvertType: else: # task == "auto" pass else: + debug_info = { + "architectures": architectures, + "is_generative_model": is_generative_model, + "is_pooling_model": is_pooling_model, + } raise AssertionError("The model should be a generative or " "pooling model when task is set to " - f"{self.task!r}.") + f"{self.task!r}. Found: {debug_info}") self.runner = runner self.convert = convert diff --git a/vllm/model_executor/models/aria.py b/vllm/model_executor/models/aria.py index 35c1adbdd00b..6cef5e134a4b 100644 --- a/vllm/model_executor/models/aria.py +++ b/vllm/model_executor/models/aria.py @@ -38,8 +38,7 @@ from .interfaces import MultiModalEmbeddings, SupportsMultiModal, SupportsQuant from .llama import LlamaDecoderLayer, LlamaMLP, LlamaModel from .utils import (AutoWeightsLoader, WeightsMapper, flatten_bn, - is_pp_missing_parameter, maybe_prefix, - merge_multimodal_embeddings) + is_pp_missing_parameter, maybe_prefix) class AriaImagePixelInputs(TensorSchema): @@ -605,19 +604,6 @@ def get_multimodal_embeddings(self, multimodal_embeddings = self._process_image_input(image_input) return multimodal_embeddings - def get_input_embeddings( - self, - input_ids: torch.Tensor, - multimodal_embeddings: Optional[MultiModalEmbeddings] = None, - ) -> torch.Tensor: - inputs_embeds = self.language_model.get_input_embeddings(input_ids) - if multimodal_embeddings is not None \ - and len(multimodal_embeddings) != 0: - inputs_embeds = merge_multimodal_embeddings( - input_ids, inputs_embeds, multimodal_embeddings, - self.config.image_token_index) - return inputs_embeds - def forward( self, input_ids: torch.Tensor, @@ -628,10 +614,11 @@ def forward( ) -> Union[torch.Tensor, IntermediateTensors]: if inputs_embeds is None: multimodal_embeddings = self.get_multimodal_embeddings(**kwargs) - # always pass the input via `inputs_embeds` - # to make sure the computation graph is consistent - inputs_embeds = self.get_input_embeddings(input_ids, - multimodal_embeddings) + inputs_embeds = self.get_input_embeddings( + input_ids, + multimodal_embeddings, + is_multimodal=input_ids == self.config.image_token_index, + ) input_ids = None hidden_states = self.language_model( diff --git a/vllm/model_executor/models/aya_vision.py b/vllm/model_executor/models/aya_vision.py index 6fd8c2fb5c56..eab996e9ba22 100644 --- a/vllm/model_executor/models/aya_vision.py +++ b/vllm/model_executor/models/aya_vision.py @@ -33,8 +33,7 @@ from .interfaces import MultiModalEmbeddings, SupportsMultiModal, SupportsPP from .siglip import SiglipVisionModel from .utils import (AutoWeightsLoader, WeightsMapper, flatten_bn, - init_vllm_registered_model, maybe_prefix, - merge_multimodal_embeddings) + init_vllm_registered_model, maybe_prefix) class AyaVisionImagePixelInputs(TensorSchema): @@ -417,23 +416,6 @@ def get_multimodal_embeddings(self, return self._process_image_input(image_input, **kwargs) - def get_input_embeddings( - self, - input_ids: torch.Tensor, - multimodal_embeddings: Optional[MultiModalEmbeddings] = None, - ) -> torch.Tensor: - inputs_embeds = self.language_model.get_input_embeddings(input_ids) - if multimodal_embeddings is not None \ - and len(multimodal_embeddings) != 0: - inputs_embeds = merge_multimodal_embeddings( - input_ids=input_ids, - inputs_embeds=inputs_embeds, - multimodal_embeddings=multimodal_embeddings, - placeholder_token_id=self.config.image_token_index, - ) - - return inputs_embeds - def forward( self, input_ids: torch.Tensor, @@ -449,8 +431,11 @@ def forward( # condition is for v0 compatibility. elif inputs_embeds is None: vision_embeddings = self.get_multimodal_embeddings(**kwargs) - inputs_embeds = self.get_input_embeddings(input_ids, - vision_embeddings) + inputs_embeds = self.get_input_embeddings( + input_ids, + vision_embeddings, + is_multimodal=input_ids == self.config.image_token_index, + ) input_ids = None hidden_states = self.language_model.model( diff --git a/vllm/model_executor/models/bert.py b/vllm/model_executor/models/bert.py index ee32587f6b1b..c984845204c4 100644 --- a/vllm/model_executor/models/bert.py +++ b/vllm/model_executor/models/bert.py @@ -348,6 +348,9 @@ def __init__( self.encoder = BertEncoder(vllm_config=vllm_config, prefix=f"{prefix}.encoder") + def get_input_embeddings(self, input_ids: torch.Tensor) -> torch.Tensor: + return self.embeddings(input_ids) + def forward( self, input_ids: torch.Tensor, @@ -457,6 +460,9 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): prefix=maybe_prefix(prefix, "model")) self.pooler = self._build_pooler(pooler_config) + def get_input_embeddings(self, input_ids: torch.Tensor) -> torch.Tensor: + return self.model.get_input_embeddings(input_ids) + def forward( self, input_ids: torch.Tensor, @@ -588,6 +594,9 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): ), }) + def get_input_embeddings(self, input_ids: torch.Tensor) -> torch.Tensor: + return self.bert.get_input_embeddings(input_ids) + def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]): loader = AutoWeightsLoader(self) loaded_params = loader.load_weights(weights) @@ -637,6 +646,9 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): Pooler.for_encode(pooler_config), }) + def get_input_embeddings(self, input_ids: torch.Tensor) -> torch.Tensor: + return self.bert.get_input_embeddings(input_ids) + def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]): loader = AutoWeightsLoader(self) loaded_params = loader.load_weights(weights) diff --git a/vllm/model_executor/models/bert_with_rope.py b/vllm/model_executor/models/bert_with_rope.py index bfc1408ddf88..4e1eba32d259 100644 --- a/vllm/model_executor/models/bert_with_rope.py +++ b/vllm/model_executor/models/bert_with_rope.py @@ -426,6 +426,9 @@ def __init__(self, prefix=f"{prefix}.encoder") self.pooler = BertPooler(self.config) if add_pooling_layer else None + def get_input_embeddings(self, input_ids: torch.Tensor) -> torch.Tensor: + return self.embeddings(input_ids) + def forward( self, input_ids: torch.Tensor, @@ -673,6 +676,9 @@ def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]): loaded_params = loader.load_weights(weights) return loaded_params + def get_input_embeddings(self, input_ids: torch.Tensor) -> torch.Tensor: + return self.new.get_input_embeddings(input_ids) + def forward( self, input_ids: Optional[torch.Tensor], diff --git a/vllm/model_executor/models/blip2.py b/vllm/model_executor/models/blip2.py index b7455fba62c0..4d1850d07b28 100644 --- a/vllm/model_executor/models/blip2.py +++ b/vllm/model_executor/models/blip2.py @@ -27,7 +27,7 @@ from .interfaces import (MultiModalEmbeddings, SupportsMultiModal, SupportsPP, SupportsQuant) from .utils import (AutoWeightsLoader, flatten_bn, init_vllm_registered_model, - maybe_prefix, merge_multimodal_embeddings) + maybe_prefix) # We use this internally as placeholders since there is no image token # defined on the HuggingFace repo @@ -631,19 +631,6 @@ def get_multimodal_embeddings(self, vision_embeddings = self._process_image_input(image_input) return vision_embeddings - def get_input_embeddings( - self, - input_ids: torch.Tensor, - multimodal_embeddings: Optional[MultiModalEmbeddings] = None, - ) -> torch.Tensor: - inputs_embeds = self.language_model.get_input_embeddings(input_ids) - if multimodal_embeddings is not None \ - and len(multimodal_embeddings) != 0: - inputs_embeds = merge_multimodal_embeddings( - input_ids, inputs_embeds, multimodal_embeddings, - _IMAGE_TOKEN_ID) - return inputs_embeds - def forward( self, input_ids: torch.Tensor, @@ -689,8 +676,11 @@ def forward( # condition is for v0 compatibility. elif inputs_embeds is None: vision_embeddings = self.get_multimodal_embeddings(**kwargs) - inputs_embeds = self.get_input_embeddings(input_ids, - vision_embeddings) + inputs_embeds = self.get_input_embeddings( + input_ids, + vision_embeddings, + is_multimodal=input_ids == _IMAGE_TOKEN_ID, + ) input_ids = None hidden_states = self.language_model.model(input_ids, diff --git a/vllm/model_executor/models/chameleon.py b/vllm/model_executor/models/chameleon.py index 79d648d749c6..f9740adb151b 100644 --- a/vllm/model_executor/models/chameleon.py +++ b/vllm/model_executor/models/chameleon.py @@ -44,7 +44,7 @@ SupportsQuant) from .utils import (flatten_bn, is_pp_missing_parameter, make_empty_intermediate_tensors_factory, make_layers, - maybe_prefix, merge_multimodal_embeddings) + maybe_prefix) logger = init_logger(__name__) @@ -1002,20 +1002,6 @@ def get_multimodal_embeddings(self, vision_embeddings = self.model.get_input_embeddings(image_tokens) return vision_embeddings - def get_input_embeddings( - self, - input_ids: torch.Tensor, - multimodal_embeddings: Optional[MultiModalEmbeddings] = None, - ) -> torch.Tensor: - - inputs_embeds = self.model.get_input_embeddings(input_ids) - if multimodal_embeddings is not None \ - and len(multimodal_embeddings) != 0: - inputs_embeds = merge_multimodal_embeddings( - input_ids, inputs_embeds, multimodal_embeddings, - self.model.vocabulary_mapping.image_token_id) - return inputs_embeds - def forward( self, input_ids: torch.Tensor, @@ -1032,8 +1018,12 @@ def forward( # condition is for v0 compatibility. elif inputs_embeds is None: vision_embeddings = self.get_multimodal_embeddings(**kwargs) - inputs_embeds = self.get_input_embeddings(input_ids, - vision_embeddings) + image_token_id = self.model.vocabulary_mapping.image_token_id + inputs_embeds = self.get_input_embeddings( + input_ids, + vision_embeddings, + is_multimodal=input_ids == image_token_id, + ) input_ids = None hidden_states = self.model(input_ids, diff --git a/vllm/model_executor/models/chatglm.py b/vllm/model_executor/models/chatglm.py index 879508400222..c182201fe256 100644 --- a/vllm/model_executor/models/chatglm.py +++ b/vllm/model_executor/models/chatglm.py @@ -433,6 +433,9 @@ def __init__( self.make_empty_intermediate_tensors = ( self.transformer.make_empty_intermediate_tensors) + def get_input_embeddings(self, input_ids: torch.Tensor) -> torch.Tensor: + return self.transformer.get_input_embeddings(input_ids) + def compute_logits( self, hidden_states: torch.Tensor, diff --git a/vllm/model_executor/models/cohere2_vision.py b/vllm/model_executor/models/cohere2_vision.py index 6d67eb68d51a..99edcba4d874 100644 --- a/vllm/model_executor/models/cohere2_vision.py +++ b/vllm/model_executor/models/cohere2_vision.py @@ -37,8 +37,7 @@ from .interfaces import MultiModalEmbeddings, SupportsMultiModal, SupportsPP from .siglip import SiglipVisionModel from .utils import (AutoWeightsLoader, WeightsMapper, flatten_bn, - init_vllm_registered_model, maybe_prefix, - merge_multimodal_embeddings) + init_vllm_registered_model, maybe_prefix) class Cohere2VisionImagePixelInputs(TensorSchema): @@ -430,23 +429,6 @@ def get_multimodal_embeddings(self, return self._process_image_input(image_input, **kwargs) - def get_input_embeddings( - self, - input_ids: torch.Tensor, - multimodal_embeddings: Optional[MultiModalEmbeddings] = None, - ) -> torch.Tensor: - inputs_embeds = self.language_model.get_input_embeddings(input_ids) - if multimodal_embeddings is not None \ - and len(multimodal_embeddings) != 0: - inputs_embeds = merge_multimodal_embeddings( - input_ids=input_ids, - inputs_embeds=inputs_embeds, - multimodal_embeddings=multimodal_embeddings, - placeholder_token_id=self.config.image_token_id, - ) - - return inputs_embeds - def forward( self, input_ids: torch.Tensor, @@ -462,8 +444,11 @@ def forward( # condition is for v0 compatibility. elif inputs_embeds is None: vision_embeddings = self.get_multimodal_embeddings(**kwargs) - inputs_embeds = self.get_input_embeddings(input_ids, - vision_embeddings) + inputs_embeds = self.get_input_embeddings( + input_ids, + vision_embeddings, + is_multimodal=input_ids == self.config.image_token_id, + ) input_ids = None hidden_states = self.language_model.model( diff --git a/vllm/model_executor/models/deepseek_eagle.py b/vllm/model_executor/models/deepseek_eagle.py index ed7e7614800f..c42a66d86912 100644 --- a/vllm/model_executor/models/deepseek_eagle.py +++ b/vllm/model_executor/models/deepseek_eagle.py @@ -66,6 +66,9 @@ def __init__( self.norm = RMSNorm(self.config.hidden_size, eps=self.config.rms_norm_eps) + def get_input_embeddings(self, input_ids: torch.Tensor) -> torch.Tensor: + return self.embed_tokens(input_ids) + def forward( self, input_ids: torch.Tensor, @@ -205,6 +208,9 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): self.logits_processor = LogitsProcessor(self.config.vocab_size, scale=logit_scale) + def get_input_embeddings(self, input_ids: torch.Tensor) -> torch.Tensor: + return self.model.get_input_embeddings(input_ids) + def forward( self, input_ids: torch.Tensor, diff --git a/vllm/model_executor/models/deepseek_mtp.py b/vllm/model_executor/models/deepseek_mtp.py index 92f311ab465b..a4623ff13cec 100644 --- a/vllm/model_executor/models/deepseek_mtp.py +++ b/vllm/model_executor/models/deepseek_mtp.py @@ -101,6 +101,9 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): ) self.logits_processor = LogitsProcessor(config.vocab_size) + def get_input_embeddings(self, input_ids: torch.Tensor) -> torch.Tensor: + return self.embed_tokens(input_ids) + def forward( self, input_ids: torch.Tensor, @@ -142,6 +145,9 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): prefix=maybe_prefix( prefix, "model")) + def get_input_embeddings(self, input_ids: torch.Tensor) -> torch.Tensor: + return self.model.get_input_embeddings(input_ids) + def forward( self, input_ids: torch.Tensor, diff --git a/vllm/model_executor/models/deepseek_vl2.py b/vllm/model_executor/models/deepseek_vl2.py index c8ed759d2e97..b98008c83bdc 100644 --- a/vllm/model_executor/models/deepseek_vl2.py +++ b/vllm/model_executor/models/deepseek_vl2.py @@ -41,8 +41,7 @@ from .interfaces import MultiModalEmbeddings, SupportsMultiModal, SupportsPP from .utils import (AutoWeightsLoader, WeightsMapper, flatten_bn, - init_vllm_registered_model, maybe_prefix, - merge_multimodal_embeddings) + init_vllm_registered_model, maybe_prefix) # The image token id may be various _IMAGE_TOKEN = "" @@ -346,7 +345,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): model_config = vllm_config.model_config tokenizer = cached_tokenizer_from_config(model_config) - self.image_token_id = tokenizer.vocab[_IMAGE_TOKEN] + self.image_token_id: int = tokenizer.vocab[_IMAGE_TOKEN] self.vision = self._init_vision_module(self.vision_config, quant_config, @@ -605,19 +604,6 @@ def get_multimodal_embeddings(self, vision_embeddings = self._process_image_input(image_input) return vision_embeddings - def get_input_embeddings( - self, - input_ids: torch.Tensor, - multimodal_embeddings: Optional[MultiModalEmbeddings] = None, - ) -> torch.Tensor: - inputs_embeds = self.language_model.get_input_embeddings(input_ids) - if multimodal_embeddings is not None \ - and len(multimodal_embeddings) != 0: - inputs_embeds = merge_multimodal_embeddings( - input_ids, inputs_embeds, multimodal_embeddings, - self.image_token_id) - return inputs_embeds - def forward(self, input_ids: torch.Tensor, positions: torch.Tensor, @@ -632,8 +618,11 @@ def forward(self, # condition is for v0 compatibility elif inputs_embeds is None: vision_embeddings = self.get_multimodal_embeddings(**kwargs) - inputs_embeds = self.get_input_embeddings(input_ids, - vision_embeddings) + inputs_embeds = self.get_input_embeddings( + input_ids, + vision_embeddings, + is_multimodal=input_ids == self.image_token_id, + ) input_ids = None hidden_states = self.language_model(input_ids, diff --git a/vllm/model_executor/models/dots_ocr.py b/vllm/model_executor/models/dots_ocr.py index 2db350c892ae..4845f19bcbc4 100644 --- a/vllm/model_executor/models/dots_ocr.py +++ b/vllm/model_executor/models/dots_ocr.py @@ -34,8 +34,7 @@ Qwen2VLProcessingInfo) from vllm.model_executor.models.utils import (AutoWeightsLoader, WeightsMapper, init_vllm_registered_model, - maybe_prefix, - merge_multimodal_embeddings) + maybe_prefix) from vllm.model_executor.models.vision import get_vit_attn_backend from vllm.multimodal import MULTIMODAL_REGISTRY from vllm.multimodal.inputs import MultiModalDataDict @@ -796,33 +795,17 @@ def _process_image_input( def get_language_model(self) -> torch.nn.Module: return self.language_model - def get_multimodal_embeddings( - self, **kwargs: object) -> Optional[MultiModalEmbeddings]: + def get_multimodal_embeddings(self, + **kwargs: object) -> MultiModalEmbeddings: image_input = self._parse_and_validate_image_input(**kwargs) if image_input is None: return [] vision_embeddings = self._process_image_input(image_input) return vision_embeddings - def get_input_embeddings( - self, - input_ids: torch.Tensor, - multimodal_embeddings: Optional[MultiModalEmbeddings] = None, - ) -> torch.Tensor: - inputs_embeds = self.language_model.get_input_embeddings(input_ids) - if multimodal_embeddings is not None: - inputs_embeds = merge_multimodal_embeddings( - input_ids, - inputs_embeds, - multimodal_embeddings, - self.config.image_token_id, - ) - - return inputs_embeds - def forward( self, - input_ids: Optional[torch.Tensor], + input_ids: torch.Tensor, positions: torch.Tensor, intermediate_tensors: Optional[IntermediateTensors] = None, inputs_embeds: Optional[torch.Tensor] = None, @@ -830,17 +813,14 @@ def forward( ) -> Union[torch.Tensor, IntermediateTensors]: if intermediate_tensors is not None: inputs_embeds = None - elif inputs_embeds is None and kwargs.get("pixel_values") is not None: - image_input = self._parse_and_validate_image_input(**kwargs) - if image_input is None: - inputs_embeds = None - else: - assert input_ids is not None - inputs_embeds = self.get_multimodal_embeddings( - input_ids, - image_input=image_input, - ) - input_ids = None + elif inputs_embeds is None: + vision_embeddings = self.get_multimodal_embeddings(**kwargs) + inputs_embeds = self.get_input_embeddings( + input_ids, + vision_embeddings, + is_multimodal=input_ids == self.config.image_token_id, + ) + input_ids = None hidden_states = self.language_model( input_ids=input_ids, diff --git a/vllm/model_executor/models/ernie45_vl.py b/vllm/model_executor/models/ernie45_vl.py index 74b358034ef3..a73ec4f88ffe 100644 --- a/vllm/model_executor/models/ernie45_vl.py +++ b/vllm/model_executor/models/ernie45_vl.py @@ -60,8 +60,7 @@ from .ernie45_vl_moe import Ernie4_5_VLMoeForCausalLM from .interfaces import (MultiModalEmbeddings, SupportsLoRA, SupportsMultiModal, SupportsPP) -from .utils import (AutoWeightsLoader, WeightsMapper, maybe_prefix, - merge_multimodal_embeddings) +from .utils import AutoWeightsLoader, WeightsMapper, maybe_prefix from .vision import get_vit_attn_backend logger = init_logger(__name__) @@ -1467,18 +1466,24 @@ def get_input_embeddings( self, input_ids: torch.Tensor, multimodal_embeddings: Optional[MultiModalEmbeddings] = None, + *, + is_multimodal: Optional[torch.Tensor] = None, + handle_oov_mm_token: bool = False, ) -> torch.Tensor: - - inputs_embeds = self.language_model.get_input_embeddings(input_ids) - - if multimodal_embeddings is None: - return inputs_embeds - - self._set_visual_token_mask(input_ids) - inputs_embeds = merge_multimodal_embeddings(input_ids, inputs_embeds, - multimodal_embeddings, - [self.config.im_patch_id]) - return inputs_embeds + if multimodal_embeddings is not None and len( + multimodal_embeddings) > 0: + self._set_visual_token_mask(input_ids) + + # This is to satisfy the type checker for each overload + if multimodal_embeddings is None or is_multimodal is None: + return super().get_input_embeddings(input_ids) + + return super().get_input_embeddings( + input_ids, + multimodal_embeddings=multimodal_embeddings, + is_multimodal=is_multimodal, + handle_oov_mm_token=handle_oov_mm_token, + ) def forward( self, diff --git a/vllm/model_executor/models/ernie_mtp.py b/vllm/model_executor/models/ernie_mtp.py index 288fbe736c32..3b24bf2f1ef8 100644 --- a/vllm/model_executor/models/ernie_mtp.py +++ b/vllm/model_executor/models/ernie_mtp.py @@ -116,6 +116,9 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): ) self.logits_processor = LogitsProcessor(config.vocab_size) + def get_input_embeddings(self, input_ids: torch.Tensor) -> torch.Tensor: + return self.embed_tokens(input_ids) + def forward( self, input_ids: torch.Tensor, @@ -160,6 +163,9 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): if self.config.tie_word_embeddings: self.lm_head.weight = self.model.embed_tokens.weight + def get_input_embeddings(self, input_ids: torch.Tensor) -> torch.Tensor: + return self.model.get_input_embeddings(input_ids) + def forward( self, input_ids: torch.Tensor, diff --git a/vllm/model_executor/models/fuyu.py b/vllm/model_executor/models/fuyu.py index 53e9e6fe6e46..b99fe33a1dcc 100644 --- a/vllm/model_executor/models/fuyu.py +++ b/vllm/model_executor/models/fuyu.py @@ -42,8 +42,7 @@ from vllm.utils.tensor_schema import TensorSchema, TensorShape from .interfaces import MultiModalEmbeddings, SupportsMultiModal, SupportsPP -from .utils import (AutoWeightsLoader, WeightsMapper, flatten_bn, maybe_prefix, - merge_multimodal_embeddings) +from .utils import AutoWeightsLoader, WeightsMapper, flatten_bn, maybe_prefix # Cannot find the following 2 numbers from hf config. _IMAGE_TOKEN_ID = 71011 @@ -342,22 +341,6 @@ def get_multimodal_embeddings(self, return self._process_image_input(image_input) - def get_input_embeddings( - self, - input_ids: torch.Tensor, - multimodal_embeddings: Optional[MultiModalEmbeddings] = None, - ) -> torch.Tensor: - inputs_embeds = self.language_model.get_input_embeddings(input_ids) - if multimodal_embeddings is not None \ - and len(multimodal_embeddings) != 0: - inputs_embeds = merge_multimodal_embeddings( - input_ids, - inputs_embeds, - multimodal_embeddings, - _IMAGE_TOKEN_ID, - ) - return inputs_embeds - def forward( self, input_ids: torch.Tensor, @@ -373,8 +356,11 @@ def forward( # condition is for v0 compatibility. elif inputs_embeds is None: vision_embeddings = self.get_multimodal_embeddings(**kwargs) - inputs_embeds = self.get_input_embeddings(input_ids, - vision_embeddings) + inputs_embeds = self.get_input_embeddings( + input_ids, + vision_embeddings, + is_multimodal=input_ids == _IMAGE_TOKEN_ID, + ) input_ids = None hidden_states = self.language_model( diff --git a/vllm/model_executor/models/gemma3_mm.py b/vllm/model_executor/models/gemma3_mm.py index 0630ee07c347..be75e36fe23b 100644 --- a/vllm/model_executor/models/gemma3_mm.py +++ b/vllm/model_executor/models/gemma3_mm.py @@ -37,8 +37,7 @@ SupportsMultiModal, SupportsPP) from .siglip import SiglipVisionModel from .utils import (AutoWeightsLoader, WeightsMapper, flatten_bn, - init_vllm_registered_model, maybe_prefix, - merge_multimodal_embeddings) + init_vllm_registered_model, maybe_prefix) logger = init_logger(__name__) @@ -588,22 +587,6 @@ def get_multimodal_embeddings(self, return self._process_image_input(image_input) - def get_input_embeddings( - self, - input_ids: torch.Tensor, - multimodal_embeddings: Optional[MultiModalEmbeddings] = None, - ) -> torch.Tensor: - inputs_embeds = self.language_model.get_input_embeddings(input_ids) - if multimodal_embeddings is not None \ - and len(multimodal_embeddings) != 0: - inputs_embeds = merge_multimodal_embeddings( - input_ids, - inputs_embeds, - multimodal_embeddings, - self.config.image_token_index, - ) - return inputs_embeds - def forward(self, input_ids: torch.Tensor, positions: torch.Tensor, @@ -618,8 +601,11 @@ def forward(self, elif inputs_embeds is None: vision_embeddings = self.get_multimodal_embeddings(**kwargs) - inputs_embeds = self.get_input_embeddings(input_ids, - vision_embeddings) + inputs_embeds = self.get_input_embeddings( + input_ids, + vision_embeddings, + is_multimodal=input_ids == self.config.image_token_index, + ) if (vision_embeddings is not None) and len(vision_embeddings) != 0: kwargs = self.prepare_attn_masks( input_ids, diff --git a/vllm/model_executor/models/gemma3n_mm.py b/vllm/model_executor/models/gemma3n_mm.py index 2acdba54a257..b23437a08e5a 100644 --- a/vllm/model_executor/models/gemma3n_mm.py +++ b/vllm/model_executor/models/gemma3n_mm.py @@ -632,8 +632,10 @@ def get_input_embeddings( self, input_ids: torch.Tensor, multimodal_embeddings: Optional[MultiModalEmbeddings] = None, + *, + is_multimodal: Optional[torch.Tensor] = None, + handle_oov_mm_token: bool = False, ) -> torch.Tensor: - inputs_embeds = self.language_model.get_input_embeddings(input_ids) # NOTE (NickLucche) Each pass needs tokens to compute PLE so we cache # them here, as the model forward has only access to the input_embeds. if input_ids is not None: @@ -645,15 +647,16 @@ def get_input_embeddings( self.per_layer_embeddings[:per_layer_inputs.shape[0]].copy_( per_layer_inputs) - if multimodal_embeddings is not None \ - and len(multimodal_embeddings) != 0: - inputs_embeds = merge_multimodal_embeddings( - input_ids, - inputs_embeds, - multimodal_embeddings, - # NOTE: this order of processing mm items is important - [self.config.image_token_id, self.config.audio_token_id]) - return inputs_embeds + # This is to satisfy the type checker for each overload + if multimodal_embeddings is None or is_multimodal is None: + return super().get_input_embeddings(input_ids) + + return super().get_input_embeddings( + input_ids, + multimodal_embeddings=multimodal_embeddings, + is_multimodal=is_multimodal, + handle_oov_mm_token=handle_oov_mm_token, + ) def forward(self, input_ids: torch.Tensor, diff --git a/vllm/model_executor/models/glm4_1v.py b/vllm/model_executor/models/glm4_1v.py index b088e0c0dd24..dbb5431ae491 100644 --- a/vllm/model_executor/models/glm4_1v.py +++ b/vllm/model_executor/models/glm4_1v.py @@ -1552,23 +1552,6 @@ def get_multimodal_embeddings( multimodal_embeddings += video_embeddings return multimodal_embeddings - def get_input_embeddings( - self, - input_ids: torch.Tensor, - multimodal_embeddings: Optional[MultiModalEmbeddings] = None, - ) -> torch.Tensor: - inputs_embeds = self.language_model.get_input_embeddings(input_ids) - if (multimodal_embeddings is not None - and len(multimodal_embeddings) != 0 - and all(embed.numel() > 0 for embed in multimodal_embeddings)): - inputs_embeds = merge_multimodal_embeddings( - input_ids, - inputs_embeds, - multimodal_embeddings, - [self.config.image_token_id, self.config.video_token_id], - ) - return inputs_embeds - def get_input_embeddings_v0( self, input_ids: torch.Tensor, diff --git a/vllm/model_executor/models/glm4_moe_mtp.py b/vllm/model_executor/models/glm4_moe_mtp.py index c572978e6220..826d541e571b 100644 --- a/vllm/model_executor/models/glm4_moe_mtp.py +++ b/vllm/model_executor/models/glm4_moe_mtp.py @@ -132,6 +132,9 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): ) self.logits_processor = LogitsProcessor(config.vocab_size) + def get_input_embeddings(self, input_ids: torch.Tensor) -> torch.Tensor: + return self.embed_tokens(input_ids) + def forward( self, input_ids: torch.Tensor, @@ -173,6 +176,9 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): prefix=maybe_prefix( prefix, "model")) + def get_input_embeddings(self, input_ids: torch.Tensor) -> torch.Tensor: + return self.model.get_input_embeddings(input_ids) + def forward( self, input_ids: torch.Tensor, diff --git a/vllm/model_executor/models/glm4v.py b/vllm/model_executor/models/glm4v.py index bf33575859ae..ace9c05daf15 100644 --- a/vllm/model_executor/models/glm4v.py +++ b/vllm/model_executor/models/glm4v.py @@ -43,7 +43,7 @@ from .chatglm import ChatGLMBaseModel, ChatGLMModel from .interfaces import (MultiModalEmbeddings, SupportsLoRA, SupportsMultiModal, SupportsPP) -from .utils import flatten_bn, merge_multimodal_embeddings +from .utils import flatten_bn, isin_list class GLMVImagePixelInputs(TensorSchema): @@ -607,28 +607,6 @@ def get_multimodal_embeddings(self, vision_embeddings = self._process_image_input(image_input) return vision_embeddings - def get_input_embeddings( - self, - input_ids: torch.Tensor, - multimodal_embeddings: Optional[MultiModalEmbeddings] = None, - ) -> torch.Tensor: - inputs_embeds = self.transformer.get_input_embeddings(input_ids) - - if multimodal_embeddings is not None \ - and len(multimodal_embeddings) != 0: - inputs_embeds = merge_multimodal_embeddings( - input_ids=input_ids, - inputs_embeds=inputs_embeds, - multimodal_embeddings=multimodal_embeddings, - placeholder_token_id=[ - self.config.boi_token_id, - self.config.pad_token_id, - self.config.eoi_token_id, - ], - ) - - return inputs_embeds - def forward( self, input_ids: torch.Tensor, @@ -644,8 +622,15 @@ def forward( # condition is for v0 compatibility. elif inputs_embeds is None: vision_embeddings = self.get_multimodal_embeddings(**kwargs) - inputs_embeds = self.get_input_embeddings(input_ids, - vision_embeddings) + inputs_embeds = self.get_input_embeddings( + input_ids, + vision_embeddings, + is_multimodal=isin_list(input_ids, [ + self.config.boi_token_id, + self.config.pad_token_id, + self.config.eoi_token_id, + ]), + ) input_ids = None hidden_states = self.transformer(input_ids, positions, diff --git a/vllm/model_executor/models/granite_speech.py b/vllm/model_executor/models/granite_speech.py index a5849184339b..8a02da58ea0b 100644 --- a/vllm/model_executor/models/granite_speech.py +++ b/vllm/model_executor/models/granite_speech.py @@ -52,8 +52,7 @@ from .blip2 import Blip2QFormerModel from .interfaces import (MultiModalEmbeddings, SupportsLoRA, SupportsMultiModal, SupportsPP) -from .utils import (AutoWeightsLoader, embed_multimodal, - init_vllm_registered_model, maybe_prefix) +from .utils import AutoWeightsLoader, init_vllm_registered_model, maybe_prefix ### Audio Input @@ -720,6 +719,9 @@ def _process_audio_input( # Split variable length features into a tuple return torch.split(masked_embeds, audio_input["audio_embed_sizes"]) + def get_language_model(self) -> torch.nn.Module: + return self.language_model + def get_multimodal_embeddings( self, **kwargs: object, @@ -728,7 +730,7 @@ def get_multimodal_embeddings( audio_input = self._parse_and_validate_audio_input(**kwargs) if audio_input is None: return [] - return None + audio_features = self._process_audio_input(audio_input) return audio_features @@ -736,19 +738,21 @@ def get_input_embeddings( self, input_ids: torch.Tensor, multimodal_embeddings: Optional[MultiModalEmbeddings] = None, + *, + is_multimodal: Optional[torch.Tensor] = None, + # Multi-modal token ID may exceed vocab size + handle_oov_mm_token: bool = True, ) -> torch.Tensor: - """Compute the merged LLM / audio embeddings.""" - if multimodal_embeddings is None \ - or len(multimodal_embeddings) == 0: - return self.language_model.get_input_embeddings(input_ids) + # This is to satisfy the type checker for each overload + if multimodal_embeddings is None or is_multimodal is None: + return super().get_input_embeddings(input_ids) - inputs_embeds = embed_multimodal( + return super().get_input_embeddings( input_ids, - self.config.audio_token_index, - self.language_model.model.get_input_embeddings, - multimodal_embeddings, + multimodal_embeddings=multimodal_embeddings, + is_multimodal=is_multimodal, + handle_oov_mm_token=handle_oov_mm_token, ) - return inputs_embeds def forward( self, @@ -765,7 +769,11 @@ def forward( # condition is for v0 compatibility. elif inputs_embeds is None: audio_embeds = self.get_multimodal_embeddings(**kwargs) - inputs_embeds = self.get_input_embeddings(input_ids, audio_embeds) + inputs_embeds = self.get_input_embeddings( + input_ids, + audio_embeds, + is_multimodal=input_ids == self.config.audio_token_index, + ) input_ids = None model_output = self.language_model(input_ids, positions, diff --git a/vllm/model_executor/models/hunyuan_v1.py b/vllm/model_executor/models/hunyuan_v1.py index 8a23a6b45bc7..d28c97116790 100644 --- a/vllm/model_executor/models/hunyuan_v1.py +++ b/vllm/model_executor/models/hunyuan_v1.py @@ -989,6 +989,9 @@ def update_physical_experts_metadata( moe.n_redundant_experts = self.num_redundant_experts moe.experts.update_expert_map() + def get_input_embeddings(self, input_ids: torch.Tensor) -> torch.Tensor: + return self.model.get_input_embeddings(input_ids) + def forward( self, input_ids: torch.Tensor, diff --git a/vllm/model_executor/models/hyperclovax_vision.py b/vllm/model_executor/models/hyperclovax_vision.py index 4d39ff9ae79e..f851688bf7ba 100644 --- a/vllm/model_executor/models/hyperclovax_vision.py +++ b/vllm/model_executor/models/hyperclovax_vision.py @@ -45,8 +45,8 @@ from .clip import CLIPVisionModel from .interfaces import MultiModalEmbeddings, SupportsMultiModal, SupportsPP from .siglip import SiglipVisionModel -from .utils import (AutoWeightsLoader, init_vllm_registered_model, - maybe_prefix, merge_multimodal_embeddings) +from .utils import (AutoWeightsLoader, init_vllm_registered_model, isin_list, + maybe_prefix) from .vision import get_vision_encoder_info EOT = "<|endofturn|>" @@ -691,7 +691,7 @@ def get_language_model(self) -> torch.nn.Module: def get_multimodal_embeddings( self, **kwargs: Unpack[HCXVisionMultimodalInputs], - ) -> Optional[MultiModalEmbeddings]: + ) -> MultiModalEmbeddings: multimodal_embeddings = list() if kwargs.get("pixel_values_images") is not None: @@ -736,26 +736,6 @@ def get_multimodal_embeddings( multimodal_embeddings.append(_multimodal_embeddings_videos) return multimodal_embeddings - def get_input_embeddings( - self, - input_ids: torch.Tensor, - multimodal_embeddings: Optional[MultiModalEmbeddings] = None, - ) -> torch.Tensor: - inputs_embeds = self.language_model.get_input_embeddings(input_ids) - if multimodal_embeddings is not None \ - and len(multimodal_embeddings) != 0: - inputs_embeds = merge_multimodal_embeddings( - input_ids, - inputs_embeds, - multimodal_embeddings, - placeholder_token_id=[ - self.config.image_token_id, - self.config.video_token_id, - ], - ) - - return inputs_embeds - def forward( self, input_ids: torch.Tensor, @@ -771,8 +751,13 @@ def forward( # condition is for v0 compatibility. elif inputs_embeds is None: multimodal_embeddings = self.get_multimodal_embeddings(**kwargs) - inputs_embeds = self.get_input_embeddings(input_ids, - multimodal_embeddings) + inputs_embeds = self.get_input_embeddings( + input_ids, + multimodal_embeddings, + is_multimodal=isin_list( + input_ids, + [self.config.image_token_id, self.config.video_token_id]), + ) input_ids = None hidden_states = self.language_model.model(input_ids, positions, diff --git a/vllm/model_executor/models/idefics3.py b/vllm/model_executor/models/idefics3.py index 79e130119ae8..3334ee224253 100644 --- a/vllm/model_executor/models/idefics3.py +++ b/vllm/model_executor/models/idefics3.py @@ -52,8 +52,7 @@ # yapf: enable from .interfaces import MultiModalEmbeddings, SupportsLoRA, SupportsMultiModal from .llama import LlamaModel -from .utils import (AutoWeightsLoader, flatten_bn, maybe_prefix, - merge_multimodal_embeddings) +from .utils import AutoWeightsLoader, flatten_bn, maybe_prefix class Idefics3ImagePixelInputs(TensorSchema): @@ -539,10 +538,7 @@ def image_pixels_to_features( return image_hidden_states - def get_input_embeddings( - self, - input_ids: torch.Tensor, - ) -> torch.Tensor: + def get_input_embeddings(self, input_ids: torch.Tensor) -> torch.Tensor: return self.text_model.get_input_embeddings(input_ids) def forward( @@ -695,22 +691,6 @@ def get_multimodal_embeddings(self, return self._process_image_input(image_input) - def get_input_embeddings( - self, - input_ids: torch.Tensor, - multimodal_embeddings: Optional[MultiModalEmbeddings] = None, - ) -> torch.Tensor: - inputs_embeds = self.model.get_input_embeddings(input_ids) - if multimodal_embeddings is not None \ - and len(multimodal_embeddings) != 0: - inputs_embeds = merge_multimodal_embeddings( - input_ids, - inputs_embeds, - multimodal_embeddings, - self.config.image_token_id, - ) - return inputs_embeds - def forward( self, input_ids: torch.Tensor, @@ -726,8 +706,11 @@ def forward( # condition is for v0 compatibility. elif inputs_embeds is None: vision_embeddings = self.get_multimodal_embeddings(**kwargs) - inputs_embeds = self.get_input_embeddings(input_ids, - vision_embeddings) + inputs_embeds = self.get_input_embeddings( + input_ids, + vision_embeddings, + is_multimodal=input_ids == self.config.image_token_id, + ) input_ids = None hidden_states = self.model.text_model(input_ids, diff --git a/vllm/model_executor/models/interfaces.py b/vllm/model_executor/models/interfaces.py index f13e590cd243..d40df9b43dd4 100644 --- a/vllm/model_executor/models/interfaces.py +++ b/vllm/model_executor/models/interfaces.py @@ -2,8 +2,8 @@ # SPDX-FileCopyrightText: Copyright contributors to the vLLM project from collections.abc import Iterable, Mapping, MutableSequence -from typing import (TYPE_CHECKING, ClassVar, Literal, Optional, Protocol, - Union, overload, runtime_checkable) +from typing import (TYPE_CHECKING, Callable, ClassVar, Literal, Optional, + Protocol, Union, overload, runtime_checkable) import numpy as np import torch @@ -20,7 +20,7 @@ QuantizationConfig) from vllm.utils import supports_kw -from .interfaces_base import is_pooling_model +from .interfaces_base import VllmModel, is_pooling_model if TYPE_CHECKING: from vllm.config import VllmConfig @@ -90,7 +90,7 @@ def get_multimodal_embeddings(self, """ ... - def get_language_model(self) -> torch.nn.Module: + def get_language_model(self) -> VllmModel: """ Returns the underlying language model used for text generation. @@ -102,17 +102,84 @@ def get_language_model(self) -> torch.nn.Module: """ ... + @overload + def get_input_embeddings(self, input_ids: Tensor) -> Tensor: + ... + + @overload + def get_input_embeddings( + self, + input_ids: Tensor, + multimodal_embeddings: MultiModalEmbeddings, + *, + is_multimodal: torch.Tensor, + handle_oov_mm_token: bool = False, + ) -> Tensor: + ... + + def _get_text_embeddings( + self, + input_ids: Tensor, + get_input_embeddings: Callable[[Tensor], Tensor], + *, + is_multimodal: Optional[Tensor], + handle_oov_mm_token: bool, + ) -> Tensor: + if handle_oov_mm_token and is_multimodal is not None: + is_text = ~is_multimodal + text_embeds = get_input_embeddings(input_ids[is_text]) + + return torch.empty( + (input_ids.shape[0], text_embeds.shape[1]), + dtype=text_embeds.dtype, + device=text_embeds.device, + ).masked_scatter_(is_text.unsqueeze_(-1), text_embeds) + + return get_input_embeddings(input_ids) + def get_input_embeddings( self, input_ids: Tensor, multimodal_embeddings: Optional[MultiModalEmbeddings] = None, + *, + is_multimodal: Optional[Tensor] = None, + handle_oov_mm_token: bool = False, ) -> Tensor: """ - Returns the input embeddings merged from the text embeddings from - input_ids and the multimodal embeddings generated from multimodal - kwargs. + Apply token embeddings to `input_ids`. + + If `multimodal_embeddings` is passed, scatter them into + `input_ids` according to the mask `is_multimodal`. + + In case the multi-modal token IDs exceed the vocabulary size of + the language model, you can set `handle_oov_mm_token=False` + to avoid calling the language model's `get_input_embeddings` method + on those tokens. Note however that doing so increases memory usage + as an additional buffer is needed to hold the input embeddings. """ - ... + from .utils import _merge_multimodal_embeddings + + inputs_embeds = self._get_text_embeddings( + input_ids, + self.get_language_model().get_input_embeddings, + is_multimodal=is_multimodal, + handle_oov_mm_token=handle_oov_mm_token, + ) + + if multimodal_embeddings is None or len(multimodal_embeddings) == 0: + return inputs_embeds + + if is_multimodal is None: + raise ValueError( + "`get_input_embeddings` now requires `is_multimodal` arg, " + "please update your model runner according to " + "https://github.com/vllm-project/vllm/pull/16229.") + + return _merge_multimodal_embeddings( + inputs_embeds=inputs_embeds, + multimodal_embeddings=multimodal_embeddings, + is_multimodal=is_multimodal, + ) @runtime_checkable diff --git a/vllm/model_executor/models/interfaces_base.py b/vllm/model_executor/models/interfaces_base.py index 8fdf70e35a2b..84146db0943c 100644 --- a/vllm/model_executor/models/interfaces_base.py +++ b/vllm/model_executor/models/interfaces_base.py @@ -41,6 +41,13 @@ def __init__( ) -> None: ... + def get_input_embeddings( + self, + input_ids: torch.Tensor, + ) -> torch.Tensor: + """Apply token embeddings to `input_ids`.""" + ... + def forward( self, input_ids: torch.Tensor, @@ -54,6 +61,19 @@ def _check_vllm_model_init(model: Union[type[object], object]) -> bool: return supports_kw(model_init, "vllm_config") +def _check_vllm_model_get_input_embeddings( + model: Union[type[object], object]) -> bool: + model_get_input_embeddings = getattr(model, "get_input_embeddings", None) + if not callable(model_get_input_embeddings): + logger.warning( + "The model (%s) is missing the `get_input_embeddings` method.", + model, + ) + return False + + return True + + def _check_vllm_model_forward(model: Union[type[object], object]) -> bool: model_forward = getattr(model, "forward", None) if not callable(model_forward): @@ -88,7 +108,9 @@ def is_vllm_model(model: object) -> TypeIs[VllmModel]: def is_vllm_model( model: Union[type[object], object], ) -> Union[TypeIs[type[VllmModel]], TypeIs[VllmModel]]: - return _check_vllm_model_init(model) and _check_vllm_model_forward(model) + return (_check_vllm_model_init(model) + and _check_vllm_model_get_input_embeddings(model) + and _check_vllm_model_forward(model)) @runtime_checkable diff --git a/vllm/model_executor/models/interns1.py b/vllm/model_executor/models/interns1.py index 197d629b906f..545dad1a96f5 100644 --- a/vllm/model_executor/models/interns1.py +++ b/vllm/model_executor/models/interns1.py @@ -40,8 +40,7 @@ from .interfaces import (MultiModalEmbeddings, SupportsLoRA, SupportsMultiModal, SupportsPP) from .utils import (AutoWeightsLoader, WeightsMapper, flatten_bn, - init_vllm_registered_model, maybe_prefix, - merge_multimodal_embeddings) + init_vllm_registered_model, isin_list, maybe_prefix) class InternS1MultiModalProjector(nn.Module): @@ -767,24 +766,24 @@ def get_input_embeddings( self, input_ids: torch.Tensor, multimodal_embeddings: Optional[MultiModalEmbeddings] = None, + *, + is_multimodal: Optional[torch.Tensor] = None, + handle_oov_mm_token: bool = False, ) -> torch.Tensor: - inputs_embeds = self.language_model.get_input_embeddings(input_ids) - if multimodal_embeddings is not None \ - and len(multimodal_embeddings) != 0: - context_token_ids = [ - token_id for token_id in (self.img_context_token_id, - self.video_context_token_id) - if token_id is not None - ] - assert len(context_token_ids) >= 1 + if multimodal_embeddings is not None and len( + multimodal_embeddings) > 0: self._set_visual_token_mask(input_ids) - inputs_embeds = merge_multimodal_embeddings( - input_ids, - inputs_embeds, - multimodal_embeddings, - context_token_ids, - ) - return inputs_embeds + + # This is to satisfy the type checker for each overload + if multimodal_embeddings is None or is_multimodal is None: + return super().get_input_embeddings(input_ids) + + return super().get_input_embeddings( + input_ids, + multimodal_embeddings=multimodal_embeddings, + is_multimodal=is_multimodal, + handle_oov_mm_token=handle_oov_mm_token, + ) def forward( self, @@ -802,9 +801,17 @@ def forward( # NOTE: In v1, inputs_embeds is always generated at model runner, this # condition is for v0 compatibility. elif inputs_embeds is None: + context_token_ids = [ + token_id for token_id in (self.img_context_token_id, + self.video_context_token_id) + if token_id is not None + ] vision_embeddings = self.get_multimodal_embeddings(**kwargs) - inputs_embeds = self.get_input_embeddings(input_ids, - vision_embeddings) + inputs_embeds = self.get_input_embeddings( + input_ids, + vision_embeddings, + is_multimodal=isin_list(input_ids, context_token_ids), + ) input_ids = None forward_kwargs = { diff --git a/vllm/model_executor/models/internvl.py b/vllm/model_executor/models/internvl.py index f4004e518e3b..78aac8541434 100644 --- a/vllm/model_executor/models/internvl.py +++ b/vllm/model_executor/models/internvl.py @@ -43,7 +43,7 @@ from .interfaces import (MultiModalEmbeddings, SupportsLoRA, SupportsMultiModal, SupportsPP) from .utils import (AutoWeightsLoader, flatten_bn, init_vllm_registered_model, - maybe_prefix, merge_multimodal_embeddings) + isin_list, maybe_prefix) IMG_START = '' IMG_END = '' @@ -1339,24 +1339,24 @@ def get_input_embeddings( self, input_ids: torch.Tensor, multimodal_embeddings: Optional[MultiModalEmbeddings] = None, + *, + is_multimodal: Optional[torch.Tensor] = None, + handle_oov_mm_token: bool = False, ) -> torch.Tensor: - inputs_embeds = self.language_model.get_input_embeddings(input_ids) - if multimodal_embeddings is not None \ - and len(multimodal_embeddings) != 0: - context_token_ids = [ - token_id for token_id in (self.img_context_token_id, - self.video_context_token_id) - if token_id is not None - ] - assert len(context_token_ids) >= 1 + if multimodal_embeddings is not None and len( + multimodal_embeddings) > 0: self._set_visual_token_mask(input_ids) - inputs_embeds = merge_multimodal_embeddings( - input_ids, - inputs_embeds, - multimodal_embeddings, - context_token_ids, - ) - return inputs_embeds + + # This is to satisfy the type checker for each overload + if multimodal_embeddings is None or is_multimodal is None: + return super().get_input_embeddings(input_ids) + + return super().get_input_embeddings( + input_ids, + multimodal_embeddings=multimodal_embeddings, + is_multimodal=is_multimodal, + handle_oov_mm_token=handle_oov_mm_token, + ) def forward( self, @@ -1374,9 +1374,17 @@ def forward( # NOTE: In v1, inputs_embeds is always generated at model runner, this # condition is for v0 compatibility. elif inputs_embeds is None: + context_token_ids = [ + token_id for token_id in (self.img_context_token_id, + self.video_context_token_id) + if token_id is not None + ] vision_embeddings = self.get_multimodal_embeddings(**kwargs) - inputs_embeds = self.get_input_embeddings(input_ids, - vision_embeddings) + inputs_embeds = self.get_input_embeddings( + input_ids, + vision_embeddings, + is_multimodal=isin_list(input_ids, context_token_ids), + ) input_ids = None forward_kwargs = { diff --git a/vllm/model_executor/models/keye.py b/vllm/model_executor/models/keye.py index 3b6fdba22512..62a71b7b1fa8 100644 --- a/vllm/model_executor/models/keye.py +++ b/vllm/model_executor/models/keye.py @@ -1450,24 +1450,6 @@ def get_multimodal_embeddings( multimodal_embeddings += video_embeddings return multimodal_embeddings - def get_input_embeddings( - self, - input_ids: torch.Tensor, - multimodal_embeddings: Optional[MultiModalEmbeddings] = None, - ) -> torch.Tensor: - inputs_embeds = self.language_model.get_input_embeddings(input_ids) - if multimodal_embeddings is not None: - inputs_embeds = merge_multimodal_embeddings( - input_ids, - inputs_embeds, - multimodal_embeddings, - [ - self.config.image_token_id, - self.config.video_token_id, - ], - ) - return inputs_embeds - def get_input_embeddings_v0( self, input_ids: torch.Tensor, diff --git a/vllm/model_executor/models/kimi_vl.py b/vllm/model_executor/models/kimi_vl.py index 503627865c4a..db032736f914 100644 --- a/vllm/model_executor/models/kimi_vl.py +++ b/vllm/model_executor/models/kimi_vl.py @@ -66,7 +66,6 @@ from vllm.model_executor.models.interfaces import (SupportsMultiModal, SupportsPP) from vllm.model_executor.models.moonvit import MoonVitPretrainedModel -from vllm.model_executor.models.utils import merge_multimodal_embeddings from vllm.multimodal import MULTIMODAL_REGISTRY from vllm.multimodal.inputs import (MultiModalDataDict, MultiModalFieldConfig, MultiModalKwargsItems, NestedTensors) @@ -424,26 +423,6 @@ def get_multimodal_embeddings(self, vision_embeddings = self._process_image_input(image_input) return vision_embeddings - def get_input_embeddings( - self, - input_ids: torch.Tensor, - multimodal_embeddings: Optional[NestedTensors] = None, - ) -> torch.Tensor: - - # `get_input_embeddings` should already be implemented for the language - # model as one of the requirements of basic vLLM model implementation. - inputs_embeds = self.language_model.get_input_embeddings(input_ids) - - if multimodal_embeddings is not None and len( - multimodal_embeddings) != 0: - inputs_embeds = merge_multimodal_embeddings( - input_ids=input_ids, - inputs_embeds=inputs_embeds, - multimodal_embeddings=multimodal_embeddings, - placeholder_token_id=self.config.media_placeholder_token_id) - - return inputs_embeds - def forward( self, input_ids: torch.Tensor, @@ -462,14 +441,12 @@ def forward( if image_input is None: inputs_embeds = None else: - inputs_embeds = self.get_input_embeddings(input_ids) image_embeds = self._process_image_input(image_input) - inputs_embeds = merge_multimodal_embeddings( + inputs_embeds = self.get_input_embeddings( input_ids, - inputs_embeds, image_embeds, - placeholder_token_id=self.config. - media_placeholder_token_id, + is_multimodal=input_ids == + self.config.media_placeholder_token_id, ) input_ids = None diff --git a/vllm/model_executor/models/lfm2.py b/vllm/model_executor/models/lfm2.py index 53c36e4e52d8..f9def222a1ec 100644 --- a/vllm/model_executor/models/lfm2.py +++ b/vllm/model_executor/models/lfm2.py @@ -522,6 +522,9 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = "") -> None: self.make_empty_intermediate_tensors = ( self.model.make_empty_intermediate_tensors) + def get_input_embeddings(self, input_ids: torch.Tensor) -> torch.Tensor: + return self.model.get_input_embeddings(input_ids) + def forward( self, input_ids: torch.Tensor, diff --git a/vllm/model_executor/models/llama4_eagle.py b/vllm/model_executor/models/llama4_eagle.py index a203af53205c..235275c0940a 100644 --- a/vllm/model_executor/models/llama4_eagle.py +++ b/vllm/model_executor/models/llama4_eagle.py @@ -37,9 +37,9 @@ from vllm.model_executor.models.llama4 import (Llama4DecoderLayer, Llama4ForCausalLM) from vllm.model_executor.models.utils import extract_layer_index -from vllm.multimodal.inputs import NestedTensors -from .utils import AutoWeightsLoader, maybe_prefix, merge_multimodal_embeddings +from .interfaces import SupportsMultiModal +from .utils import AutoWeightsLoader, maybe_prefix logger = init_logger(__name__) @@ -79,10 +79,7 @@ def __init__( self.norm = RMSNorm(self.config.hidden_size, eps=self.config.rms_norm_eps) - def get_input_embeddings( - self, - input_ids: torch.Tensor, - ) -> torch.Tensor: + def get_input_embeddings(self, input_ids: torch.Tensor) -> torch.Tensor: return self.embed_tokens(input_ids) def forward( @@ -194,6 +191,11 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): self.logits_processor = LogitsProcessor(self.config.vocab_size, scale=logit_scale) + def get_language_model(self) -> torch.nn.Module: + return self.model + + get_input_embeddings = SupportsMultiModal.get_input_embeddings # type: ignore + def forward( self, input_ids: torch.Tensor, @@ -220,20 +222,3 @@ def transform(inputs): skip_prefixes=(["lm_head."]), ) loader.load_weights(map(transform, weights)) - - def get_input_embeddings( - self, - input_ids: torch.Tensor, - multimodal_embeddings: Optional[NestedTensors] = None, - ) -> torch.Tensor: - inputs_embeds = self.model.get_input_embeddings(input_ids) - - if multimodal_embeddings is not None: - inputs_embeds = merge_multimodal_embeddings( - input_ids, - inputs_embeds, - multimodal_embeddings, - self.config.image_token_index, - ) - - return inputs_embeds diff --git a/vllm/model_executor/models/llama_eagle.py b/vllm/model_executor/models/llama_eagle.py index 2ff2d54a83aa..d6e6fd3fcfe9 100644 --- a/vllm/model_executor/models/llama_eagle.py +++ b/vllm/model_executor/models/llama_eagle.py @@ -73,6 +73,9 @@ def __init__( self.config.hidden_size, bias=False) + def get_input_embeddings(self, input_ids: torch.Tensor) -> torch.Tensor: + return self.embed_tokens(input_ids) + def forward( self, input_ids: torch.Tensor, @@ -149,6 +152,9 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): self.logits_processor = LogitsProcessor(self.config.vocab_size, scale=logit_scale) + def get_input_embeddings(self, input_ids: torch.Tensor) -> torch.Tensor: + return self.model.get_input_embeddings(input_ids) + def forward( self, input_ids: torch.Tensor, diff --git a/vllm/model_executor/models/llama_eagle3.py b/vllm/model_executor/models/llama_eagle3.py index 55b6ae6ee0e9..34b8ea0ca536 100644 --- a/vllm/model_executor/models/llama_eagle3.py +++ b/vllm/model_executor/models/llama_eagle3.py @@ -18,7 +18,6 @@ from vllm.model_executor.layers.vocab_parallel_embedding import ( DEFAULT_VOCAB_PADDING_SIZE, ParallelLMHead, VocabParallelEmbedding) from vllm.model_executor.model_loader.weight_utils import default_weight_loader -from vllm.model_executor.models.interfaces import MultiModalEmbeddings from vllm.model_executor.models.llama import (LlamaDecoderLayer, LlamaForCausalLM) @@ -144,10 +143,7 @@ def __init__( eps=self.config.rms_norm_eps, ) - def get_input_embeddings( - self, - input_ids: torch.Tensor, - ) -> torch.Tensor: + def get_input_embeddings(self, input_ids: torch.Tensor) -> torch.Tensor: return self.embed_tokens(input_ids) def forward( @@ -239,6 +235,9 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): requires_grad=False, ) + def get_input_embeddings(self, input_ids: torch.Tensor) -> torch.Tensor: + return self.model.get_input_embeddings(input_ids) + def forward( self, input_ids: torch.Tensor, @@ -302,11 +301,3 @@ def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]): skip_substrs=skip_substrs, ) loader.load_weights(model_weights.items()) - - def get_input_embeddings( - self, - input_ids: torch.Tensor, - multimodal_embeddings: Optional[MultiModalEmbeddings] = None, - ) -> torch.Tensor: - inputs_embeds = self.model.get_input_embeddings(input_ids) - return inputs_embeds diff --git a/vllm/model_executor/models/llava.py b/vllm/model_executor/models/llava.py index 4d8ed95b6cc8..6f3cfd88aee2 100644 --- a/vllm/model_executor/models/llava.py +++ b/vllm/model_executor/models/llava.py @@ -41,8 +41,7 @@ from .pixtral import PixtralHFEncoderInfo, PixtralHFVisionModel from .siglip import SiglipVisionModel from .utils import (AutoWeightsLoader, WeightsMapper, flatten_bn, - init_vllm_registered_model, maybe_prefix, - merge_multimodal_embeddings) + init_vllm_registered_model, maybe_prefix) from .vision import get_vision_encoder_info @@ -676,22 +675,6 @@ def get_multimodal_embeddings(self, return self._process_image_input(image_input) - def get_input_embeddings( - self, - input_ids: torch.Tensor, - multimodal_embeddings: Optional[MultiModalEmbeddings] = None, - ) -> torch.Tensor: - inputs_embeds = self.language_model.get_input_embeddings(input_ids) - if multimodal_embeddings is not None \ - and len(multimodal_embeddings) != 0: - inputs_embeds = merge_multimodal_embeddings( - input_ids, - inputs_embeds, - multimodal_embeddings, - self.config.image_token_index, - ) - return inputs_embeds - def forward( self, input_ids: torch.Tensor, @@ -744,8 +727,11 @@ def forward( # condition is for v0 compatibility. elif inputs_embeds is None: vision_embeddings = self.get_multimodal_embeddings(**kwargs) - inputs_embeds = self.get_input_embeddings(input_ids, - vision_embeddings) + inputs_embeds = self.get_input_embeddings( + input_ids, + vision_embeddings, + is_multimodal=input_ids == self.config.image_token_index, + ) input_ids = None hidden_states = self.language_model.model(input_ids, diff --git a/vllm/model_executor/models/llava_next.py b/vllm/model_executor/models/llava_next.py index c9133fde1455..e132389c4f06 100644 --- a/vllm/model_executor/models/llava_next.py +++ b/vllm/model_executor/models/llava_next.py @@ -25,8 +25,8 @@ LlavaDummyInputsBuilder, LlavaLikeConfig, LlavaMultiModalProjector, init_vision_tower_for_llava) from .siglip import SiglipVisionModel -from .utils import (AutoWeightsLoader, WeightsMapper, embed_multimodal, - flatten_bn, init_vllm_registered_model, maybe_prefix) +from .utils import (AutoWeightsLoader, WeightsMapper, flatten_bn, + init_vllm_registered_model, maybe_prefix) class LlavaNextImagePixelInputs(TensorSchema): @@ -474,19 +474,21 @@ def get_input_embeddings( self, input_ids: torch.Tensor, multimodal_embeddings: Optional[MultiModalEmbeddings] = None, + *, + is_multimodal: Optional[torch.Tensor] = None, + # Multi-modal token ID may exceed vocab size + handle_oov_mm_token: bool = True, ) -> torch.Tensor: + # This is to satisfy the type checker for each overload + if multimodal_embeddings is None or is_multimodal is None: + return super().get_input_embeddings(input_ids) - if multimodal_embeddings is None \ - or len(multimodal_embeddings) == 0: - return self.language_model.get_input_embeddings(input_ids) - - inputs_embeds = embed_multimodal( + return super().get_input_embeddings( input_ids, - self.config.image_token_index, - self.language_model.model.get_input_embeddings, - multimodal_embeddings, + multimodal_embeddings=multimodal_embeddings, + is_multimodal=is_multimodal, + handle_oov_mm_token=handle_oov_mm_token, ) - return inputs_embeds def forward( self, @@ -549,8 +551,11 @@ def forward( # condition is for v0 compatibility. elif inputs_embeds is None: vision_embeddings = self.get_multimodal_embeddings(**kwargs) - inputs_embeds = self.get_input_embeddings(input_ids, - vision_embeddings) + inputs_embeds = self.get_input_embeddings( + input_ids, + vision_embeddings, + is_multimodal=input_ids == self.config.image_token_index, + ) input_ids = None hidden_states = self.language_model.model(input_ids, diff --git a/vllm/model_executor/models/llava_next_video.py b/vllm/model_executor/models/llava_next_video.py index 610fb188d57d..2642d8c77cf3 100644 --- a/vllm/model_executor/models/llava_next_video.py +++ b/vllm/model_executor/models/llava_next_video.py @@ -30,8 +30,7 @@ from .llava import init_vision_tower_for_llava from .siglip import SiglipVisionModel from .utils import (AutoWeightsLoader, WeightsMapper, - init_vllm_registered_model, maybe_prefix, - merge_multimodal_embeddings) + init_vllm_registered_model, maybe_prefix) from .vision import get_vision_encoder_info @@ -415,19 +414,6 @@ def get_multimodal_embeddings(self, vision_embeddings = self._process_video_pixels(video_input) return vision_embeddings - def get_input_embeddings( - self, - input_ids: torch.Tensor, - multimodal_embeddings: Optional[MultiModalEmbeddings] = None, - ) -> torch.Tensor: - inputs_embeds = self.language_model.get_input_embeddings(input_ids) - if multimodal_embeddings is not None \ - and len(multimodal_embeddings) != 0: - inputs_embeds = merge_multimodal_embeddings( - input_ids, inputs_embeds, multimodal_embeddings, - self.config.video_token_index) - return inputs_embeds - def forward( self, input_ids: torch.Tensor, @@ -449,8 +435,11 @@ def forward( # condition is for v0 compatibility. elif inputs_embeds is None: vision_embeddings = self.get_multimodal_embeddings(**kwargs) - inputs_embeds = self.get_input_embeddings(input_ids, - vision_embeddings) + inputs_embeds = self.get_input_embeddings( + input_ids, + vision_embeddings, + is_multimodal=input_ids == self.config.video_token_index, + ) input_ids = None hidden_states = self.language_model.model(input_ids, diff --git a/vllm/model_executor/models/llava_onevision.py b/vllm/model_executor/models/llava_onevision.py index cee9ddaf94cc..906858f4e2f4 100644 --- a/vllm/model_executor/models/llava_onevision.py +++ b/vllm/model_executor/models/llava_onevision.py @@ -850,19 +850,6 @@ def get_multimodal_embeddings(self, return multimodal_embeddings - def get_input_embeddings( - self, - input_ids: torch.Tensor, - multimodal_embeddings: Optional[MultiModalEmbeddings] = None, - ) -> torch.Tensor: - inputs_embeds = self.language_model.get_input_embeddings(input_ids) - if multimodal_embeddings is not None \ - and len(multimodal_embeddings) != 0: - inputs_embeds = merge_multimodal_embeddings( - input_ids, inputs_embeds, multimodal_embeddings, - [self.config.image_token_index, self.config.video_token_index]) - return inputs_embeds - def get_input_embeddings_v0( self, input_ids: torch.Tensor, diff --git a/vllm/model_executor/models/midashenglm.py b/vllm/model_executor/models/midashenglm.py index 82648ba668ca..0bf04e0e7e2f 100644 --- a/vllm/model_executor/models/midashenglm.py +++ b/vllm/model_executor/models/midashenglm.py @@ -54,8 +54,7 @@ from vllm.transformers_utils.configs.midashenglm import DashengConfig from .interfaces import MultiModalEmbeddings, SupportsMultiModal, SupportsPP -from .utils import (AutoWeightsLoader, init_vllm_registered_model, - maybe_prefix, merge_multimodal_embeddings) +from .utils import AutoWeightsLoader, init_vllm_registered_model, maybe_prefix _Tuple2 = Union[int, tuple[int, int], Sequence[int]] @@ -744,21 +743,6 @@ def get_multimodal_embeddings(self, return [] return self._process_audio_input(audio_input) - def get_input_embeddings( - self, - input_ids: torch.Tensor, - multimodal_embeddings: Optional[MultiModalEmbeddings] = None, - ) -> torch.Tensor: - inputs_embeds = self.decoder.get_input_embeddings(input_ids) - if multimodal_embeddings and len(multimodal_embeddings) > 0: - inputs_embeds = merge_multimodal_embeddings( - input_ids, - inputs_embeds, - multimodal_embeddings, - self.config.audio_token_id, - ) - return inputs_embeds - def forward( self, input_ids: torch.Tensor, @@ -771,8 +755,11 @@ def forward( inputs_embeds = None elif inputs_embeds is None: multimodal_embeddings = self.get_multimodal_embeddings(**kwargs) - inputs_embeds = self.get_input_embeddings(input_ids, - multimodal_embeddings) + inputs_embeds = self.get_input_embeddings( + input_ids, + multimodal_embeddings, + is_multimodal=input_ids == self.config.audio_token_id, + ) input_ids = None return self.decoder.model(input_ids, diff --git a/vllm/model_executor/models/mimo_mtp.py b/vllm/model_executor/models/mimo_mtp.py index b4abe458e477..9c1e36094c4a 100644 --- a/vllm/model_executor/models/mimo_mtp.py +++ b/vllm/model_executor/models/mimo_mtp.py @@ -117,6 +117,9 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): self.logits_processor = LogitsProcessor(config.vocab_size) + def get_input_embeddings(self, input_ids: torch.Tensor) -> torch.Tensor: + return self.embed_tokens(input_ids) + def forward( self, input_ids: torch.Tensor, @@ -158,6 +161,9 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): self.config.hidden_size, prefix=maybe_prefix(prefix, "lm_head")) + def get_input_embeddings(self, input_ids: torch.Tensor) -> torch.Tensor: + return self.model.get_input_embeddings(input_ids) + def forward( self, input_ids: torch.Tensor, diff --git a/vllm/model_executor/models/minicpmv.py b/vllm/model_executor/models/minicpmv.py index a17c4f004d75..bffc9a0c125e 100644 --- a/vllm/model_executor/models/minicpmv.py +++ b/vllm/model_executor/models/minicpmv.py @@ -71,8 +71,7 @@ from .idefics2_vision_model import Idefics2VisionTransformer from .interfaces import (MultiModalEmbeddings, SupportsLoRA, SupportsMultiModal, SupportsPP) -from .utils import (AutoWeightsLoader, flatten_bn, maybe_prefix, - merge_multimodal_embeddings) +from .utils import AutoWeightsLoader, flatten_bn, isin_list, maybe_prefix # For profile run _MAX_FRAMES_PER_VIDEO = 16 @@ -1144,23 +1143,6 @@ def get_multimodal_embeddings(self, return self._process_multimodal_inputs(modalities) - def get_input_embeddings( - self, - input_ids: torch.Tensor, - multimodal_embeddings: Optional[MultiModalEmbeddings] = None, - ) -> torch.Tensor: - inputs_embeds = self.llm.get_input_embeddings(input_ids) - if multimodal_embeddings is not None \ - and len(multimodal_embeddings) != 0: - assert len(self.mm_token_ids) > 0 - inputs_embeds = merge_multimodal_embeddings( - input_ids, - inputs_embeds, - multimodal_embeddings, - list(self.mm_token_ids), - ) - return inputs_embeds - def forward( self, input_ids: torch.Tensor, @@ -1178,8 +1160,11 @@ def forward( elif inputs_embeds is None: vision_embeddings = self.get_multimodal_embeddings(**kwargs) - inputs_embeds = self.get_input_embeddings(input_ids, - vision_embeddings) + inputs_embeds = self.get_input_embeddings( + input_ids, + vision_embeddings, + is_multimodal=isin_list(input_ids, list(self.mm_token_ids)), + ) input_ids = None hidden_states = self.llm.model( diff --git a/vllm/model_executor/models/minimax_text_01.py b/vllm/model_executor/models/minimax_text_01.py index cc9a959f6331..a92890c9f7b5 100644 --- a/vllm/model_executor/models/minimax_text_01.py +++ b/vllm/model_executor/models/minimax_text_01.py @@ -592,10 +592,7 @@ def _clear_prefill_cache(self, attn_metadata, dtype=torch.long) minimax_cache_tensors[:, slots_tensor, ...] = 0 - def get_input_embeddings( - self, - input_ids: torch.Tensor, - ) -> torch.Tensor: + def get_input_embeddings(self, input_ids: torch.Tensor) -> torch.Tensor: return self.embed_tokens(input_ids) def forward(self, @@ -687,10 +684,7 @@ def get_seqlen_agnostic_capture_inputs(self, batch_size: int): return self.model.minimax_cache.get_seqlen_agnostic_capture_inputs( batch_size) - def get_input_embeddings( - self, - input_ids: torch.Tensor, - ) -> torch.Tensor: + def get_input_embeddings(self, input_ids: torch.Tensor) -> torch.Tensor: return self.model.get_input_embeddings(input_ids) def forward(self, diff --git a/vllm/model_executor/models/minimax_vl_01.py b/vllm/model_executor/models/minimax_vl_01.py index d81ac8c704e7..d41b9d3f14fe 100644 --- a/vllm/model_executor/models/minimax_vl_01.py +++ b/vllm/model_executor/models/minimax_vl_01.py @@ -28,7 +28,7 @@ from .pixtral import PixtralHFVisionModel from .siglip import SiglipVisionModel from .utils import (AutoWeightsLoader, flatten_bn, init_vllm_registered_model, - maybe_prefix, merge_multimodal_embeddings) + maybe_prefix) class MiniMaxVL01ImagePixelInputs(TensorSchema): @@ -218,22 +218,6 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = "") -> None: self.make_empty_intermediate_tensors = ( self.language_model.make_empty_intermediate_tensors) - def get_input_embeddings( - self, - input_ids: torch.Tensor, - multimodal_embeddings: Optional[MultiModalEmbeddings] = None, - ) -> torch.Tensor: - inputs_embeds = self.language_model.get_input_embeddings(input_ids) - if multimodal_embeddings is not None \ - and len(multimodal_embeddings) != 0: - inputs_embeds = merge_multimodal_embeddings( - input_ids, - inputs_embeds, - multimodal_embeddings, - self.config.image_token_index, - ) - return inputs_embeds - def get_language_model(self) -> torch.nn.Module: return self.language_model @@ -403,8 +387,11 @@ def forward( inputs_embeds = None elif inputs_embeds is None: vision_embeddings = self.get_multimodal_embeddings(**kwargs) - inputs_embeds = self.get_input_embeddings(input_ids, - vision_embeddings) + inputs_embeds = self.get_input_embeddings( + input_ids, + vision_embeddings, + is_multimodal=input_ids == self.config.image_token_index, + ) input_ids = None hidden_states = self.language_model.model(input_ids, diff --git a/vllm/model_executor/models/mistral3.py b/vllm/model_executor/models/mistral3.py index ba6da4403ae1..31571ce962d1 100644 --- a/vllm/model_executor/models/mistral3.py +++ b/vllm/model_executor/models/mistral3.py @@ -38,8 +38,7 @@ SupportsMultiModal, SupportsPP) from .pixtral import PixtralHFEncoderInfo, PixtralHFVisionModel from .utils import (AutoWeightsLoader, WeightsMapper, flatten_bn, - init_vllm_registered_model, maybe_prefix, - merge_multimodal_embeddings) + init_vllm_registered_model, maybe_prefix) from .vision import get_vision_encoder_info @@ -524,22 +523,6 @@ def get_multimodal_embeddings(self, return vision_embeddings - def get_input_embeddings( - self, - input_ids: torch.Tensor, - multimodal_embeddings: Optional[MultiModalEmbeddings] = None, - ) -> torch.Tensor: - inputs_embeds = self.language_model.get_input_embeddings(input_ids) - if multimodal_embeddings is not None \ - and len(multimodal_embeddings) != 0: - inputs_embeds = merge_multimodal_embeddings( - input_ids, - inputs_embeds, - multimodal_embeddings, - self.config.image_token_index, - ) - return inputs_embeds - def forward( self, input_ids: torch.Tensor, @@ -592,8 +575,11 @@ def forward( # condition is for v0 compatibility. elif inputs_embeds is None: vision_embeddings = self.get_multimodal_embeddings(**kwargs) - inputs_embeds = self.get_input_embeddings(input_ids, - vision_embeddings) + inputs_embeds = self.get_input_embeddings( + input_ids, + vision_embeddings, + is_multimodal=input_ids == self.config.image_token_index, + ) input_ids = None hidden_states = self.language_model.model(input_ids, diff --git a/vllm/model_executor/models/mllama4.py b/vllm/model_executor/models/mllama4.py index 79e315f79489..3af5267928cd 100644 --- a/vllm/model_executor/models/mllama4.py +++ b/vllm/model_executor/models/mllama4.py @@ -42,7 +42,7 @@ from vllm.model_executor.model_loader.weight_utils import default_weight_loader from vllm.multimodal import MULTIMODAL_REGISTRY from vllm.multimodal.inputs import (MultiModalDataDict, MultiModalFieldConfig, - MultiModalKwargsItems, NestedTensors) + MultiModalKwargsItems) from vllm.multimodal.parse import (ImageProcessorItems, ImageSize, MultiModalDataItems) from vllm.multimodal.processing import (BaseMultiModalProcessor, @@ -56,8 +56,7 @@ from .interfaces import MultiModalEmbeddings, SupportsMultiModal, SupportsPP from .llama4 import Llama4ForCausalLM -from .utils import (AutoWeightsLoader, flatten_bn, maybe_prefix, - merge_multimodal_embeddings) +from .utils import AutoWeightsLoader, flatten_bn, maybe_prefix from .vision import run_dp_sharded_vision_model @@ -813,24 +812,6 @@ def get_multimodal_embeddings(self, **kwargs) -> MultiModalEmbeddings: return self._process_image_input(image_input) - def get_input_embeddings( - self, - input_ids: torch.Tensor, - multimodal_embeddings: Optional[NestedTensors] = None, - ) -> torch.Tensor: - inputs_embeds = self.language_model.get_input_embeddings(input_ids) - - if multimodal_embeddings is not None and len( - multimodal_embeddings) != 0: - inputs_embeds = merge_multimodal_embeddings( - input_ids, - inputs_embeds, - multimodal_embeddings, - self.config.image_token_index, - ) - - return inputs_embeds - def forward( self, input_ids: torch.Tensor, @@ -846,8 +827,11 @@ def forward( # this condition is for v0 compatibility. elif inputs_embeds is None: vision_embeddings = self.get_multimodal_embeddings(**kwargs) - inputs_embeds = self.get_input_embeddings(input_ids, - vision_embeddings) + inputs_embeds = self.get_input_embeddings( + input_ids, + vision_embeddings, + is_multimodal=input_ids == self.config.image_token_index, + ) input_ids = None return self.language_model(input_ids, positions, intermediate_tensors, diff --git a/vllm/model_executor/models/modernbert.py b/vllm/model_executor/models/modernbert.py index 1d5da3139de9..e4a51b369737 100644 --- a/vllm/model_executor/models/modernbert.py +++ b/vllm/model_executor/models/modernbert.py @@ -43,6 +43,9 @@ def __init__(self, config: ModernBertConfig): eps=config.layer_norm_eps, bias=config.norm_bias) + def get_input_embeddings(self, input_ids: torch.Tensor) -> torch.Tensor: + return self.tok_embeddings(input_ids) + def forward( self, input_ids: torch.Tensor, @@ -220,6 +223,9 @@ def __init__( eps=config.norm_eps, bias=config.norm_bias) + def get_input_embeddings(self, input_ids: torch.Tensor) -> torch.Tensor: + return self.embeddings.get_input_embeddings(input_ids) + def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]: weights = self.hf_to_vllm_mapper.apply(weights) @@ -333,6 +339,9 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): ), }) + def get_input_embeddings(self, input_ids: torch.Tensor) -> torch.Tensor: + return self.model.get_input_embeddings(input_ids) + def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]): self_weights = [] diff --git a/vllm/model_executor/models/molmo.py b/vllm/model_executor/models/molmo.py index 201bf83cac58..054caee9e8a4 100644 --- a/vllm/model_executor/models/molmo.py +++ b/vllm/model_executor/models/molmo.py @@ -58,7 +58,7 @@ from .utils import (AutoWeightsLoader, WeightsMapper, flatten_bn, is_pp_missing_parameter, make_empty_intermediate_tensors_factory, make_layers, - maybe_prefix, merge_multimodal_embeddings) + maybe_prefix) # TODO: hard-coded for now. Consider making it configurable. VIT_LAYERS = [-2, -9] @@ -819,10 +819,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): make_empty_intermediate_tensors_factory( ["hidden_states", "residual"], config.hidden_size)) - def get_input_embeddings( - self, - input_ids: torch.Tensor, - ) -> torch.Tensor: + def get_input_embeddings(self, input_ids: torch.Tensor) -> torch.Tensor: return self.embed_tokens(input_ids) def forward( @@ -1481,24 +1478,6 @@ def get_multimodal_embeddings(self, return self._process_image_input(image_input) - def get_input_embeddings( - self, - input_ids: torch.Tensor, - multimodal_embeddings: Optional[MultiModalEmbeddings] = None, - ) -> torch.Tensor: - inputs_embeds = self.model.get_input_embeddings(input_ids) - if multimodal_embeddings is not None \ - and len(multimodal_embeddings) != 0: - assert self.img_patch_id is not None - - inputs_embeds = merge_multimodal_embeddings( - input_ids, - inputs_embeds, - multimodal_embeddings, - self.img_patch_id, - ) - return inputs_embeds - def forward( self, input_ids: torch.LongTensor, @@ -1515,8 +1494,11 @@ def forward( # condition is for v0 compatibility. elif inputs_embeds is None: vision_embeddings = self.get_multimodal_embeddings(**kwargs) - inputs_embeds = self.get_input_embeddings(input_ids, - vision_embeddings) + inputs_embeds = self.get_input_embeddings( + input_ids, + vision_embeddings, + is_multimodal=input_ids == self.img_patch_id, + ) input_ids = None hidden_states = self.model(input_ids, diff --git a/vllm/model_executor/models/nano_nemotron_vl.py b/vllm/model_executor/models/nano_nemotron_vl.py index 2b68d40cf2c6..505806a15c89 100644 --- a/vllm/model_executor/models/nano_nemotron_vl.py +++ b/vllm/model_executor/models/nano_nemotron_vl.py @@ -35,8 +35,7 @@ from vllm.model_executor.models.radio import RadioModel from vllm.model_executor.models.utils import (flatten_bn, init_vllm_registered_model, - maybe_prefix, - merge_multimodal_embeddings) + isin_list, maybe_prefix) from vllm.multimodal import MULTIMODAL_REGISTRY from vllm.multimodal.inputs import (MultiModalDataDict, MultiModalFieldConfig, MultiModalKwargs, MultiModalKwargsItems, @@ -1096,8 +1095,8 @@ def _parse_and_validate_multimodal_inputs(self, **kwargs: object) -> dict: return modalities - def get_multimodal_embeddings( - self, **kwargs: object) -> Optional[MultiModalEmbeddings]: + def get_multimodal_embeddings(self, + **kwargs: object) -> MultiModalEmbeddings: # Validate the multimodal input keyword arguments modalities = self._parse_and_validate_multimodal_inputs(**kwargs) if modalities is None: @@ -1121,30 +1120,6 @@ def get_multimodal_embeddings( return multimodal_embeddings - def get_input_embeddings( - self, - input_ids: torch.Tensor, - multimodal_embeddings: Optional[MultiModalEmbeddings] = None, - ) -> torch.Tensor: - inputs_embeds = self.language_model.get_input_embeddings(input_ids) - - if (multimodal_embeddings is not None - and len(multimodal_embeddings) != 0): - context_token_ids = [ - token_id for token_id in (self.img_context_token_id, - self.video_context_token_id) - if token_id is not None - ] - assert len(context_token_ids) >= 1 - inputs_embeds = merge_multimodal_embeddings( - input_ids, - inputs_embeds, - multimodal_embeddings, - context_token_ids, - ) - - return inputs_embeds - def get_language_model(self) -> torch.nn.Module: return self.language_model @@ -1163,9 +1138,17 @@ def forward( # NOTE: In v1, inputs_embeds is always generated at model runner, this # condition is for v0 compatibility. elif inputs_embeds is None: + context_token_ids = [ + token_id for token_id in (self.img_context_token_id, + self.video_context_token_id) + if token_id is not None + ] vision_embeddings = self.get_multimodal_embeddings(**kwargs) - inputs_embeds = self.get_input_embeddings(input_ids, - vision_embeddings) + inputs_embeds = self.get_input_embeddings( + input_ids, + vision_embeddings, + is_multimodal=isin_list(input_ids, context_token_ids), + ) input_ids = None hidden_states = self.language_model( diff --git a/vllm/model_executor/models/nemotron_vl.py b/vllm/model_executor/models/nemotron_vl.py index 3abbff8c717d..2627a262e958 100644 --- a/vllm/model_executor/models/nemotron_vl.py +++ b/vllm/model_executor/models/nemotron_vl.py @@ -38,7 +38,7 @@ from .interfaces import (MultiModalEmbeddings, SupportsLoRA, SupportsMultiModal, SupportsPP) from .utils import (AutoWeightsLoader, flatten_bn, init_vllm_registered_model, - maybe_prefix, merge_multimodal_embeddings) + maybe_prefix) IMG_START = '' IMG_END = '' @@ -576,20 +576,24 @@ def get_input_embeddings( self, input_ids: torch.Tensor, multimodal_embeddings: Optional[MultiModalEmbeddings] = None, + *, + is_multimodal: Optional[torch.Tensor] = None, + handle_oov_mm_token: bool = False, ) -> torch.Tensor: - inputs_embeds = self.language_model.get_input_embeddings(input_ids) - if multimodal_embeddings is not None \ - and len(multimodal_embeddings) != 0: - context_token_ids = [self.img_context_token_id] - assert len(context_token_ids) >= 1 + if multimodal_embeddings is not None and len( + multimodal_embeddings) > 0: self._set_visual_token_mask(input_ids) - inputs_embeds = merge_multimodal_embeddings( - input_ids, - inputs_embeds, - multimodal_embeddings, - context_token_ids, - ) - return inputs_embeds + + # This is to satisfy the type checker for each overload + if multimodal_embeddings is None or is_multimodal is None: + return super().get_input_embeddings(input_ids) + + return super().get_input_embeddings( + input_ids, + multimodal_embeddings=multimodal_embeddings, + is_multimodal=is_multimodal, + handle_oov_mm_token=handle_oov_mm_token, + ) def forward( self, @@ -608,8 +612,11 @@ def forward( # condition is for v0 compatibility. elif inputs_embeds is None: vision_embeddings = self.get_multimodal_embeddings(**kwargs) - inputs_embeds = self.get_input_embeddings(input_ids, - vision_embeddings) + inputs_embeds = self.get_input_embeddings( + input_ids, + vision_embeddings, + is_multimodal=input_ids == self.img_context_token_id, + ) input_ids = None forward_kwargs = { diff --git a/vllm/model_executor/models/olmo2.py b/vllm/model_executor/models/olmo2.py index 2e0b1fb2a13f..e7e30ee8df0f 100644 --- a/vllm/model_executor/models/olmo2.py +++ b/vllm/model_executor/models/olmo2.py @@ -295,6 +295,9 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): make_empty_intermediate_tensors_factory(["hidden_states"], self.config.hidden_size)) + def get_input_embeddings(self, input_ids: torch.Tensor) -> torch.Tensor: + return self.embed_tokens(input_ids) + def forward( self, input_ids: torch.Tensor, @@ -408,6 +411,9 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): self.make_empty_intermediate_tensors = ( self.model.make_empty_intermediate_tensors) + def get_input_embeddings(self, input_ids: torch.Tensor) -> torch.Tensor: + return self.model.get_input_embeddings(input_ids) + def forward( self, input_ids: torch.Tensor, diff --git a/vllm/model_executor/models/ovis.py b/vllm/model_executor/models/ovis.py index bd525b6780e0..8503d3f71d1c 100644 --- a/vllm/model_executor/models/ovis.py +++ b/vllm/model_executor/models/ovis.py @@ -48,7 +48,6 @@ from vllm.utils.tensor_schema import TensorSchema, TensorShape from .interfaces import MultiModalEmbeddings, SupportsMultiModal, SupportsPP -from .utils import merge_multimodal_embeddings # Cannot find the following number from hf config. IMAGE_TOKEN = "" @@ -501,19 +500,6 @@ def get_multimodal_embeddings(self, return image_features - def get_input_embeddings( - self, - input_ids: torch.Tensor, - multimodal_embeddings: Optional[MultiModalEmbeddings] = None, - ) -> torch.Tensor: - inputs_embeds = self.llm.get_input_embeddings(input_ids) - if multimodal_embeddings is not None \ - and len(multimodal_embeddings) != 0: - inputs_embeds = merge_multimodal_embeddings( - input_ids, inputs_embeds, multimodal_embeddings, - self.image_pad_token_id) - return inputs_embeds - def forward( self, input_ids: torch.Tensor, @@ -529,8 +515,11 @@ def forward( # condition is for v0 compatibility. elif inputs_embeds is None: vision_embeddings = self.get_multimodal_embeddings(**kwargs) - inputs_embeds = self.get_input_embeddings(input_ids, - vision_embeddings) + inputs_embeds = self.get_input_embeddings( + input_ids, + vision_embeddings, + is_multimodal=input_ids == self.image_pad_token_id, + ) input_ids = None # up until here we have an inputs_embeds 100% numerical identity diff --git a/vllm/model_executor/models/ovis2_5.py b/vllm/model_executor/models/ovis2_5.py index f18e38ce154d..2ecc7bff07e0 100644 --- a/vllm/model_executor/models/ovis2_5.py +++ b/vllm/model_executor/models/ovis2_5.py @@ -585,17 +585,6 @@ def get_multimodal_embeddings(self, return multimodal_embeddings - def get_input_embeddings( - self, - input_ids: torch.Tensor, - multimodal_embeddings: Optional[MultiModalEmbeddings] = None, - ) -> torch.Tensor: - inputs_embeds = self.llm.get_input_embeddings(input_ids) - if multimodal_embeddings is not None: - tmp = torch.concat(multimodal_embeddings, dim=0) - inputs_embeds[input_ids == self.image_pad_token_id] = tmp - return inputs_embeds - def forward( self, input_ids: torch.Tensor, @@ -612,8 +601,11 @@ def forward( elif inputs_embeds is None: vision_embeddings = self.get_multimodal_embeddings(**kwargs) - inputs_embeds = self.get_input_embeddings(input_ids, - vision_embeddings) + inputs_embeds = self.get_input_embeddings( + input_ids, + vision_embeddings, + is_multimodal=input_ids == self.image_pad_token_id, + ) input_ids = None # up until here we have a inputs_embeds 100% numerical identity diff --git a/vllm/model_executor/models/paligemma.py b/vllm/model_executor/models/paligemma.py index aef510230461..f07f444819f4 100644 --- a/vllm/model_executor/models/paligemma.py +++ b/vllm/model_executor/models/paligemma.py @@ -26,8 +26,7 @@ from .interfaces import MultiModalEmbeddings, SupportsMultiModal, SupportsPP from .siglip import SiglipVisionModel from .utils import (AutoWeightsLoader, WeightsMapper, flatten_bn, - init_vllm_registered_model, maybe_prefix, - merge_multimodal_embeddings) + init_vllm_registered_model, maybe_prefix) from .vision import get_vision_encoder_info logger = init_logger(__name__) @@ -362,19 +361,6 @@ def get_multimodal_embeddings(self, vision_embeddings = vision_embeddings * (self.config.hidden_size**-0.5) return vision_embeddings - def get_input_embeddings( - self, - input_ids: torch.Tensor, - multimodal_embeddings: Optional[MultiModalEmbeddings] = None, - ) -> torch.Tensor: - inputs_embeds = self.language_model.get_input_embeddings(input_ids) - if multimodal_embeddings is not None \ - and len(multimodal_embeddings) != 0: - inputs_embeds = merge_multimodal_embeddings( - input_ids, inputs_embeds, multimodal_embeddings, - self.config.image_token_index) - return inputs_embeds - def forward(self, input_ids: torch.Tensor, positions: torch.Tensor, @@ -388,8 +374,11 @@ def forward(self, # condition is for v0 compatibility. elif inputs_embeds is None: vision_embeddings = self.get_multimodal_embeddings(**kwargs) - inputs_embeds = self.get_input_embeddings(input_ids, - vision_embeddings) + inputs_embeds = self.get_input_embeddings( + input_ids, + vision_embeddings, + is_multimodal=input_ids == self.config.image_token_index, + ) input_ids = None hidden_states = self.language_model.model(input_ids, diff --git a/vllm/model_executor/models/phi3v.py b/vllm/model_executor/models/phi3v.py index a2b201fe4228..ea34c8d92f13 100644 --- a/vllm/model_executor/models/phi3v.py +++ b/vllm/model_executor/models/phi3v.py @@ -51,9 +51,9 @@ from .clip import CLIPVisionModel from .interfaces import (MultiModalEmbeddings, SupportsMultiModal, SupportsPP, SupportsQuant) -from .utils import (AutoWeightsLoader, WeightsMapper, flatten_bn, - init_vllm_registered_model, maybe_prefix, - merge_multimodal_embeddings) +from .utils import (AutoWeightsLoader, WeightsMapper, + _merge_multimodal_embeddings, flatten_bn, + init_vllm_registered_model, maybe_prefix) logger = init_logger(__name__) @@ -643,14 +643,31 @@ def get_input_embeddings( self, input_ids: torch.Tensor, multimodal_embeddings: Optional[MultiModalEmbeddings] = None, + *, + is_multimodal: Optional[torch.Tensor] = None, + handle_oov_mm_token: bool = False, ) -> torch.Tensor: - inputs_embeds = self.embed_tokens(input_ids) - if multimodal_embeddings is not None \ - and len(multimodal_embeddings) != 0: - inputs_embeds = merge_multimodal_embeddings( - input_ids, inputs_embeds, multimodal_embeddings, - self.image_token_id) - return inputs_embeds + inputs_embeds = self._get_text_embeddings( + input_ids, + self.embed_tokens, + is_multimodal=is_multimodal, + handle_oov_mm_token=handle_oov_mm_token, + ) + + if multimodal_embeddings is None or len(multimodal_embeddings) == 0: + return inputs_embeds + + if is_multimodal is None: + raise ValueError( + "`get_input_embeddings` now requires `is_multimodal` arg, " + "please update your model runner according to " + "https://github.com/vllm-project/vllm/pull/16229.") + + return _merge_multimodal_embeddings( + inputs_embeds=inputs_embeds, + multimodal_embeddings=multimodal_embeddings, + is_multimodal=is_multimodal, + ) def forward(self, input_ids: torch.Tensor, @@ -666,8 +683,11 @@ def forward(self, # condition is for v0 compatibility elif inputs_embeds is None: vision_embeddings = self.get_multimodal_embeddings(**kwargs) - inputs_embeds = self.get_input_embeddings(input_ids, - vision_embeddings) + inputs_embeds = self.get_input_embeddings( + input_ids, + vision_embeddings, + is_multimodal=self.image_token_id, + ) input_ids = None hidden_states = self.language_model.model(input_ids, diff --git a/vllm/model_executor/models/phi4_multimodal.py b/vllm/model_executor/models/phi4_multimodal.py index bdc831354c11..e8b79717d75d 100644 --- a/vllm/model_executor/models/phi4_multimodal.py +++ b/vllm/model_executor/models/phi4_multimodal.py @@ -1342,12 +1342,12 @@ def _process_image_input( image_attention_mask) return image_embeds - def get_multimodal_embeddings( - self, **kwargs: object) -> Optional[MultiModalEmbeddings]: + def get_multimodal_embeddings(self, + **kwargs: object) -> MultiModalEmbeddings: modalities = self._parse_and_validate_multimodal_inputs(**kwargs) if not modalities: - return None + return [] # The result multimodal_embeddings is tuple of tensors, with each # tensor corresponding to a multimodal data item (image or video). @@ -1371,18 +1371,6 @@ def get_multimodal_embeddings( return multimodal_embeddings - def get_input_embeddings( - self, - input_ids: torch.Tensor, - multimodal_embeddings: Optional[MultiModalEmbeddings] = None, - ) -> torch.Tensor: - inputs_embeds = self.language_model.get_input_embeddings(input_ids) - if multimodal_embeddings is not None: - inputs_embeds = merge_multimodal_embeddings( - input_ids, inputs_embeds, multimodal_embeddings, - [_IMAGE_PLACEHOLDER_TOKEN_ID, _AUDIO_PLACEHOLDER_TOKEN_ID]) - return inputs_embeds - def get_input_embeddings_v0( self, input_ids: torch.Tensor, diff --git a/vllm/model_executor/models/phi4mm.py b/vllm/model_executor/models/phi4mm.py index 47b5ad55ab2d..15b09c7ae2bc 100644 --- a/vllm/model_executor/models/phi4mm.py +++ b/vllm/model_executor/models/phi4mm.py @@ -1151,7 +1151,6 @@ def get_multimodal_embeddings(self, modalities = self._parse_and_validate_multimodal_inputs(**kwargs) if not modalities: return [] - return None # The result multimodal_embeddings is tuple of tensors, with each # tensor corresponding to a multimodal data item (image or video). @@ -1175,19 +1174,6 @@ def get_multimodal_embeddings(self, return multimodal_embeddings - def get_input_embeddings( - self, - input_ids: torch.Tensor, - multimodal_embeddings: Optional[MultiModalEmbeddings] = None, - ) -> torch.Tensor: - inputs_embeds = self.model.embed_tokens(input_ids) - if multimodal_embeddings is not None and len( - multimodal_embeddings) != 0: - inputs_embeds = merge_multimodal_embeddings( - input_ids, inputs_embeds, multimodal_embeddings, - [_IMAGE_PLACEHOLDER_TOKEN_ID, _AUDIO_PLACEHOLDER_TOKEN_ID]) - return inputs_embeds - def get_input_embeddings_v0( self, input_ids: torch.Tensor, diff --git a/vllm/model_executor/models/pixtral.py b/vllm/model_executor/models/pixtral.py index 7b197844c8b6..2c04b6f0f4f9 100644 --- a/vllm/model_executor/models/pixtral.py +++ b/vllm/model_executor/models/pixtral.py @@ -50,8 +50,7 @@ from vllm.utils.tensor_schema import TensorSchema, TensorShape from .interfaces import MultiModalEmbeddings, SupportsMultiModal, SupportsPP -from .utils import (flatten_bn, init_vllm_registered_model, maybe_prefix, - merge_multimodal_embeddings) +from .utils import flatten_bn, init_vllm_registered_model, maybe_prefix from .vision import VisionEncoderInfo, resolve_visual_encoder_outputs try: @@ -433,22 +432,6 @@ def get_multimodal_embeddings(self, return self._process_image_input(image_input) - def get_input_embeddings( - self, - input_ids: torch.Tensor, - multimodal_embeddings: Optional[MultiModalEmbeddings] = None, - ) -> torch.Tensor: - inputs_embeds = self.language_model.get_input_embeddings(input_ids) - if multimodal_embeddings is not None \ - and len(multimodal_embeddings) != 0: - inputs_embeds = merge_multimodal_embeddings( - input_ids, - inputs_embeds, - multimodal_embeddings, - self.vision_args.image_token_id, - ) - return inputs_embeds - def forward( self, input_ids: torch.Tensor, @@ -465,8 +448,11 @@ def forward( # condition is for v0 compatibility. elif inputs_embeds is None: vision_embeddings = self.get_multimodal_embeddings(**kwargs) - inputs_embeds = self.get_input_embeddings(input_ids, - vision_embeddings) + inputs_embeds = self.get_input_embeddings( + input_ids, + vision_embeddings, + is_multimodal=input_ids == self.vision_args.image_token_id, + ) input_ids = None hidden_states = self.language_model.model(input_ids, diff --git a/vllm/model_executor/models/qwen2_5_omni_thinker.py b/vllm/model_executor/models/qwen2_5_omni_thinker.py index 5f27230c913b..bfa398ee43b5 100644 --- a/vllm/model_executor/models/qwen2_5_omni_thinker.py +++ b/vllm/model_executor/models/qwen2_5_omni_thinker.py @@ -865,24 +865,26 @@ def get_multimodal_embeddings(self, multimodal_embeddings += audio_embeddings return multimodal_embeddings + # TODO (ywang96): support overlapping modality embeddings so that + # `use_audio_in_video` will work on V1. def get_input_embeddings( self, input_ids: torch.Tensor, multimodal_embeddings: Optional[MultiModalEmbeddings] = None, + *, + is_multimodal: Optional[torch.Tensor] = None, + handle_oov_mm_token: bool = False, ) -> torch.Tensor: - inputs_embeds = self.language_model.get_input_embeddings(input_ids) - if multimodal_embeddings is not None \ - and len(multimodal_embeddings) != 0: - - # TODO (ywang96): support overlapping modality embeddings so that - # `use_audio_in_video` will work on V1. - inputs_embeds = merge_multimodal_embeddings( - input_ids, inputs_embeds, multimodal_embeddings, [ - self.config.image_token_index, - self.config.video_token_index, - self.config.audio_token_index - ]) - return inputs_embeds + # This is to satisfy the type checker for each overload + if multimodal_embeddings is None or is_multimodal is None: + return super().get_input_embeddings(input_ids) + + return super().get_input_embeddings( + input_ids, + multimodal_embeddings=multimodal_embeddings, + is_multimodal=is_multimodal, + handle_oov_mm_token=handle_oov_mm_token, + ) def get_multimodal_embeddings_v0( self, **kwargs: object) -> Optional[NestedTensors]: diff --git a/vllm/model_executor/models/qwen2_5_vl.py b/vllm/model_executor/models/qwen2_5_vl.py index adb21373056c..5b092b42205f 100644 --- a/vllm/model_executor/models/qwen2_5_vl.py +++ b/vllm/model_executor/models/qwen2_5_vl.py @@ -1365,19 +1365,6 @@ def get_multimodal_embeddings(self, multimodal_embeddings += video_embeddings return multimodal_embeddings - def get_input_embeddings( - self, - input_ids: torch.Tensor, - multimodal_embeddings: Optional[MultiModalEmbeddings] = None, - ) -> torch.Tensor: - inputs_embeds = self.language_model.get_input_embeddings(input_ids) - if multimodal_embeddings is not None \ - and len(multimodal_embeddings) != 0: - inputs_embeds = merge_multimodal_embeddings( - input_ids, inputs_embeds, multimodal_embeddings, - [self.config.image_token_id, self.config.video_token_id]) - return inputs_embeds - def get_input_embeddings_v0( self, input_ids: torch.Tensor, diff --git a/vllm/model_executor/models/qwen2_audio.py b/vllm/model_executor/models/qwen2_audio.py index 762ab42e5929..9dfa29eef5ce 100644 --- a/vllm/model_executor/models/qwen2_audio.py +++ b/vllm/model_executor/models/qwen2_audio.py @@ -49,8 +49,7 @@ from vllm.utils.tensor_schema import TensorSchema, TensorShape from .interfaces import MultiModalEmbeddings, SupportsMultiModal, SupportsPP -from .utils import (AutoWeightsLoader, init_vllm_registered_model, - maybe_prefix, merge_multimodal_embeddings) +from .utils import AutoWeightsLoader, init_vllm_registered_model, maybe_prefix # # === Audio Inputs === # @@ -438,19 +437,6 @@ def get_multimodal_embeddings(self, masked_audio_features = self._process_audio_input(audio_input) return masked_audio_features - def get_input_embeddings( - self, - input_ids: torch.Tensor, - multimodal_embeddings: Optional[MultiModalEmbeddings] = None, - ) -> torch.Tensor: - inputs_embeds = self.language_model.get_input_embeddings(input_ids) - if multimodal_embeddings is not None \ - and len(multimodal_embeddings) != 0: - inputs_embeds = merge_multimodal_embeddings( - input_ids, inputs_embeds, multimodal_embeddings, - self.config.audio_token_index) - return inputs_embeds - def forward( self, input_ids: torch.Tensor, @@ -467,8 +453,11 @@ def forward( # condition is for v0 compatibility. elif inputs_embeds is None: multimodal_embeddings = self.get_multimodal_embeddings(**kwargs) - inputs_embeds = self.get_input_embeddings(input_ids, - multimodal_embeddings) + inputs_embeds = self.get_input_embeddings( + input_ids, + multimodal_embeddings, + is_multimodal=input_ids == self.config.audio_token_index, + ) input_ids = None hidden_states = self.language_model.model(input_ids, diff --git a/vllm/model_executor/models/qwen2_vl.py b/vllm/model_executor/models/qwen2_vl.py index d4e195246bf1..8192c3ce05dd 100644 --- a/vllm/model_executor/models/qwen2_vl.py +++ b/vllm/model_executor/models/qwen2_vl.py @@ -1459,19 +1459,6 @@ def get_multimodal_embeddings(self, return multimodal_embeddings - def get_input_embeddings( - self, - input_ids: torch.Tensor, - multimodal_embeddings: Optional[MultiModalEmbeddings] = None, - ) -> torch.Tensor: - inputs_embeds = self.language_model.get_input_embeddings(input_ids) - if multimodal_embeddings is not None \ - and len(multimodal_embeddings) != 0: - inputs_embeds = merge_multimodal_embeddings( - input_ids, inputs_embeds, multimodal_embeddings, - [self.config.image_token_id, self.config.video_token_id]) - return inputs_embeds - def get_input_embeddings_v0( self, input_ids: torch.Tensor, diff --git a/vllm/model_executor/models/qwen3_vl.py b/vllm/model_executor/models/qwen3_vl.py index f1aeb99a4d37..5d0b66f91ace 100644 --- a/vllm/model_executor/models/qwen3_vl.py +++ b/vllm/model_executor/models/qwen3_vl.py @@ -79,7 +79,8 @@ from .qwen2_vl import Qwen2VLProcessingInfo from .qwen3 import Qwen3ForCausalLM, Qwen3Model from .utils import (AutoWeightsLoader, PPMissingLayer, WeightsMapper, - maybe_prefix, merge_multimodal_embeddings) + _merge_multimodal_embeddings, maybe_prefix, + merge_multimodal_embeddings) from .vision import get_vit_attn_backend, run_dp_sharded_mrope_vision_model logger = init_logger(__name__) @@ -1324,17 +1325,22 @@ def get_multimodal_embeddings( return multimodal_embeddings def _compute_deepstack_embeds( - self, input_ids: torch.Tensor, inputs_embeds: torch.Tensor, - multimodal_embeddings: MultiModalEmbeddings) -> torch.Tensor: - visual_lens = [ - x.shape[0] if isinstance(x, torch.Tensor) else len(x) - for x in multimodal_embeddings - ] + self, + inputs_embeds: torch.Tensor, + multimodal_embeddings: MultiModalEmbeddings, + is_multimodal: torch.Tensor, + ) -> tuple[torch.Tensor, MultiModalEmbeddings]: + visual_lens = [len(x) for x in multimodal_embeddings] multimodal_embeddings_cat = torch.cat(multimodal_embeddings, dim=0) - multimodal_embeddings_main, multimodal_embeddings_multiscale = torch.split( # noqa:E501 - multimodal_embeddings_cat, [self.visual_dim, self.multiscale_dim], - dim=-1) + ( + multimodal_embeddings_main, + multimodal_embeddings_multiscale, + ) = torch.split( + multimodal_embeddings_cat, + [self.visual_dim, self.multiscale_dim], + dim=-1, + ) multimodal_embeddings = torch.split(multimodal_embeddings_main, visual_lens, @@ -1346,39 +1352,62 @@ def _compute_deepstack_embeds( inputs_embeds.size(0), self.deepstack_num_level * inputs_embeds.size(1)) - deepstack_input_embeds = merge_multimodal_embeddings( - input_ids, - deepstack_input_embeds, - multimodal_embeddings_multiscale, - placeholder_token_id=[ - self.config.image_token_id, self.config.video_token_id - ], + deepstack_input_embeds = _merge_multimodal_embeddings( + inputs_embeds=deepstack_input_embeds, + multimodal_embeddings=multimodal_embeddings_multiscale, + is_multimodal=is_multimodal, ) deepstack_input_embeds = deepstack_input_embeds.view( inputs_embeds.shape[0], self.deepstack_num_level, self.visual_dim) deepstack_input_embeds = deepstack_input_embeds.permute(1, 0, 2) + return deepstack_input_embeds, multimodal_embeddings def get_input_embeddings( self, input_ids: torch.Tensor, multimodal_embeddings: Optional[MultiModalEmbeddings] = None, + *, + is_multimodal: Optional[torch.Tensor] = None, + handle_oov_mm_token: bool = False, ) -> torch.Tensor: - deepstack_input_embeds = None - inputs_embeds = self.language_model.get_input_embeddings(input_ids) - if multimodal_embeddings is not None: - if self.use_deepstack: - deepstack_input_embeds, multimodal_embeddings = self._compute_deepstack_embeds( # noqa:E501 - input_ids, inputs_embeds, multimodal_embeddings) - inputs_embeds = merge_multimodal_embeddings( - input_ids, inputs_embeds, multimodal_embeddings, - [self.config.image_token_id, self.config.video_token_id]) + inputs_embeds = self._get_text_embeddings( + input_ids, + self.language_model.get_input_embeddings, + is_multimodal=is_multimodal, + handle_oov_mm_token=handle_oov_mm_token, + ) + + if multimodal_embeddings is None or len(multimodal_embeddings) == 0: + return inputs_embeds + + if is_multimodal is None: + raise ValueError( + "`get_input_embeddings` now requires `is_multimodal` arg, " + "please update your model runner according to " + "https://github.com/vllm-project/vllm/pull/16229.") if self.use_deepstack: - if deepstack_input_embeds is None: - deepstack_input_embeds = torch.zeros_like( - inputs_embeds).unsqueeze(0).repeat( - self.deepstack_num_level, 1, 1).contiguous() + ( + deepstack_input_embeds, + multimodal_embeddings, + ) = self._compute_deepstack_embeds( + inputs_embeds=inputs_embeds, + multimodal_embeddings=multimodal_embeddings, + is_multimodal=is_multimodal, + ) + else: + deepstack_input_embeds = None + + inputs_embeds = _merge_multimodal_embeddings( + inputs_embeds=inputs_embeds, + multimodal_embeddings=multimodal_embeddings, + is_multimodal=is_multimodal, + ) + + if deepstack_input_embeds is not None: + deepstack_input_embeds = torch.zeros_like(inputs_embeds).unsqueeze( + 0).repeat(self.deepstack_num_level, 1, 1).contiguous() self._set_deepstack_input_embeds(deepstack_input_embeds) return inputs_embeds diff --git a/vllm/model_executor/models/qwen_vl.py b/vllm/model_executor/models/qwen_vl.py index 90200f319464..dc11b60604a9 100644 --- a/vllm/model_executor/models/qwen_vl.py +++ b/vllm/model_executor/models/qwen_vl.py @@ -45,7 +45,7 @@ from .interfaces import (MultiModalEmbeddings, SupportsLoRA, SupportsMultiModal, SupportsPP) from .qwen import QWenBaseModel, QWenModel -from .utils import flatten_bn, merge_multimodal_embeddings +from .utils import flatten_bn class QwenImagePixelInputs(TensorSchema): @@ -756,21 +756,6 @@ def get_multimodal_embeddings(self, vision_embeddings = self._process_image_input(image_input) return vision_embeddings - def get_input_embeddings( - self, - input_ids: torch.Tensor, - multimodal_embeddings: Optional[MultiModalEmbeddings] = None, - ) -> torch.Tensor: - inputs_embeds = self.transformer.get_input_embeddings(input_ids) - - if multimodal_embeddings is not None \ - and len(multimodal_embeddings) != 0: - inputs_embeds = merge_multimodal_embeddings( - input_ids, inputs_embeds, multimodal_embeddings, - self.transformer.visual.image_pad_id) - - return inputs_embeds - def forward( self, input_ids: torch.Tensor, @@ -786,8 +771,12 @@ def forward( # condition is for v0 compatibility. elif inputs_embeds is None: vision_embeddings = self.get_multimodal_embeddings(**kwargs) - inputs_embeds = self.get_input_embeddings(input_ids, - vision_embeddings) + inputs_embeds = self.get_input_embeddings( + input_ids, + vision_embeddings, + is_multimodal=input_ids == + self.transformer.visual.image_pad_id, + ) input_ids = None hidden_states = self.transformer(input_ids, positions, diff --git a/vllm/model_executor/models/roberta.py b/vllm/model_executor/models/roberta.py index ba405be41687..53e698c4fa80 100644 --- a/vllm/model_executor/models/roberta.py +++ b/vllm/model_executor/models/roberta.py @@ -218,6 +218,9 @@ def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]): loader = AutoWeightsLoader(self) return loader.load_weights(weights, mapper=self.jina_to_vllm_mapper) + def get_input_embeddings(self, input_ids: torch.Tensor) -> torch.Tensor: + return self.roberta.get_input_embeddings(input_ids) + def forward( self, input_ids: Optional[torch.Tensor], diff --git a/vllm/model_executor/models/skyworkr1v.py b/vllm/model_executor/models/skyworkr1v.py index 893ce4497c31..f9a107c06085 100644 --- a/vllm/model_executor/models/skyworkr1v.py +++ b/vllm/model_executor/models/skyworkr1v.py @@ -38,7 +38,7 @@ from .interfaces import MultiModalEmbeddings, SupportsMultiModal, SupportsPP from .utils import (AutoWeightsLoader, flatten_bn, init_vllm_registered_model, - maybe_prefix, merge_multimodal_embeddings) + maybe_prefix) IMG_START = '' IMG_END = '' @@ -842,19 +842,24 @@ def get_input_embeddings( self, input_ids: torch.Tensor, multimodal_embeddings: Optional[MultiModalEmbeddings] = None, + *, + is_multimodal: Optional[torch.Tensor] = None, + handle_oov_mm_token: bool = False, ) -> torch.Tensor: - inputs_embeds = self.language_model.get_input_embeddings(input_ids) - if multimodal_embeddings is not None \ - and len(multimodal_embeddings) != 0: - assert self.img_context_token_id is not None + if multimodal_embeddings is not None and len( + multimodal_embeddings) > 0: self._set_visual_token_mask(input_ids) - inputs_embeds = merge_multimodal_embeddings( - input_ids, - inputs_embeds, - multimodal_embeddings, - self.img_context_token_id, - ) - return inputs_embeds + + # This is to satisfy the type checker for each overload + if multimodal_embeddings is None or is_multimodal is None: + return super().get_input_embeddings(input_ids) + + return super().get_input_embeddings( + input_ids, + multimodal_embeddings=multimodal_embeddings, + is_multimodal=is_multimodal, + handle_oov_mm_token=handle_oov_mm_token, + ) def forward( self, @@ -873,8 +878,11 @@ def forward( # condition is for v0 compatibility. elif inputs_embeds is None: vision_embeddings = self.get_multimodal_embeddings(**kwargs) - inputs_embeds = self.get_input_embeddings(input_ids, - vision_embeddings) + inputs_embeds = self.get_input_embeddings( + input_ids, + vision_embeddings, + is_multimodal=input_ids == self.img_context_token_id, + ) input_ids = None forward_kwargs = { diff --git a/vllm/model_executor/models/solar.py b/vllm/model_executor/models/solar.py index c774171b9dcd..c5b82b0ca4a0 100644 --- a/vllm/model_executor/models/solar.py +++ b/vllm/model_executor/models/solar.py @@ -483,6 +483,9 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): self.make_empty_intermediate_tensors = ( self.model.make_empty_intermediate_tensors) + def get_input_embeddings(self, input_ids: torch.Tensor) -> torch.Tensor: + return self.model.get_input_embeddings(input_ids) + def forward( self, input_ids: torch.Tensor, diff --git a/vllm/model_executor/models/step3_text.py b/vllm/model_executor/models/step3_text.py index 0cce0c78f8dc..0fe723d59483 100644 --- a/vllm/model_executor/models/step3_text.py +++ b/vllm/model_executor/models/step3_text.py @@ -395,6 +395,9 @@ def __init__( self.make_empty_intermediate_tensors = ( self.model.make_empty_intermediate_tensors) + def get_input_embeddings(self, input_ids: torch.Tensor) -> torch.Tensor: + return self.model.get_input_embeddings(input_ids) + def forward(self, input_ids: torch.Tensor, positions: torch.Tensor, diff --git a/vllm/model_executor/models/step3_vl.py b/vllm/model_executor/models/step3_vl.py index 5f6ad5885043..ad295ef44732 100644 --- a/vllm/model_executor/models/step3_vl.py +++ b/vllm/model_executor/models/step3_vl.py @@ -25,7 +25,7 @@ from vllm.model_executor.layers.quantization import QuantizationConfig from vllm.multimodal import MULTIMODAL_REGISTRY from vllm.multimodal.inputs import (MultiModalDataDict, MultiModalFieldConfig, - MultiModalKwargsItems, NestedTensors) + MultiModalKwargsItems) from vllm.multimodal.parse import ImageSize, MultiModalDataItems from vllm.multimodal.processing import (BaseMultiModalProcessor, BaseProcessingInfo, PromptReplacement, @@ -37,8 +37,7 @@ from .interfaces import MultiModalEmbeddings, SupportsMultiModal, SupportsPP from .utils import (AutoWeightsLoader, WeightsMapper, flatten_bn, - init_vllm_registered_model, maybe_prefix, - merge_multimodal_embeddings) + init_vllm_registered_model, maybe_prefix) from .vision import run_dp_sharded_vision_model @@ -996,10 +995,13 @@ def _process_image_input( 1 else cur_feature[0]) return merged_image_features - def get_multimodal_embeddings(self, **kwargs) -> Optional[NestedTensors]: + def get_language_model(self) -> torch.nn.Module: + return self.language_model + + def get_multimodal_embeddings(self, **kwargs) -> MultiModalEmbeddings: image_input = self._parse_and_validate_image_input(**kwargs) if image_input is None: - return None + return [] vision_embeddings = self._process_image_input(image_input) return vision_embeddings @@ -1007,24 +1009,21 @@ def get_input_embeddings( self, input_ids: torch.Tensor, multimodal_embeddings: Optional[MultiModalEmbeddings] = None, + *, + is_multimodal: Optional[torch.Tensor] = None, + # Multi-modal token ID may exceed vocab size + handle_oov_mm_token: bool = True, ) -> torch.Tensor: - if multimodal_embeddings is None: - inputs_embeds = self.language_model.model.get_input_embeddings( - input_ids) - else: - is_text = input_ids != self.config.image_token_id - text_ids = input_ids[is_text] - text_embeds = self.language_model.model.get_input_embeddings( - text_ids) - inputs_embeds = torch.empty(input_ids.shape[0], - text_embeds.shape[-1], - dtype=text_embeds.dtype, - device=text_embeds.device) - inputs_embeds[is_text] = text_embeds - inputs_embeds = merge_multimodal_embeddings( - input_ids, inputs_embeds, multimodal_embeddings, - self.config.image_token_id) - return inputs_embeds + # This is to satisfy the type checker for each overload + if multimodal_embeddings is None or is_multimodal is None: + return super().get_input_embeddings(input_ids) + + return super().get_input_embeddings( + input_ids, + multimodal_embeddings=multimodal_embeddings, + is_multimodal=is_multimodal, + handle_oov_mm_token=handle_oov_mm_token, + ) def forward( self, @@ -1038,10 +1037,11 @@ def forward( inputs_embeds = None elif inputs_embeds is None: vision_embeddings = self.get_multimodal_embeddings(**kwargs) - # always pass the input via `inputs_embeds` - # to make sure the computation graph is consistent - inputs_embeds = self.get_input_embeddings(input_ids, - vision_embeddings) + inputs_embeds = self.get_input_embeddings( + input_ids, + vision_embeddings, + is_multimodal=input_ids == self.config.image_token_id, + ) input_ids = None hidden_states = self.language_model(input_ids, diff --git a/vllm/model_executor/models/tarsier.py b/vllm/model_executor/models/tarsier.py index 3660efdc079a..1145bea41480 100644 --- a/vllm/model_executor/models/tarsier.py +++ b/vllm/model_executor/models/tarsier.py @@ -40,7 +40,7 @@ from .interfaces import MultiModalEmbeddings, SupportsMultiModal, SupportsPP from .siglip import SiglipVisionModel from .utils import (AutoWeightsLoader, flatten_bn, init_vllm_registered_model, - maybe_prefix, merge_multimodal_embeddings) + maybe_prefix) from .vision import VisionEncoderInfo, get_vision_encoder_info @@ -589,22 +589,6 @@ def get_multimodal_embeddings(self, return [] return self._process_image_input(image_input) - def get_input_embeddings( - self, - input_ids: torch.Tensor, - multimodal_embeddings: Optional[MultiModalEmbeddings] = None, - ) -> torch.Tensor: - inputs_embeds = self.language_model.get_input_embeddings(input_ids) - if multimodal_embeddings is not None \ - and len(multimodal_embeddings) != 0: - inputs_embeds = merge_multimodal_embeddings( - input_ids, - inputs_embeds, - multimodal_embeddings, - self.config.image_token_index, - ) - return inputs_embeds - def forward( self, input_ids: torch.Tensor, @@ -617,8 +601,11 @@ def forward( inputs_embeds = None elif inputs_embeds is None: vision_embeddings = self.get_multimodal_embeddings(**kwargs) - inputs_embeds = self.get_input_embeddings(input_ids, - vision_embeddings) + inputs_embeds = self.get_input_embeddings( + input_ids, + vision_embeddings, + is_multimodal=input_ids == self.config.image_token_index, + ) input_ids = None hidden_states = self.language_model.model( input_ids=input_ids, diff --git a/vllm/model_executor/models/terratorch.py b/vllm/model_executor/models/terratorch.py index b9dfa8e9b6f5..938b02e3e04b 100644 --- a/vllm/model_executor/models/terratorch.py +++ b/vllm/model_executor/models/terratorch.py @@ -233,6 +233,9 @@ def get_input_embeddings( self, input_ids: torch.Tensor, multimodal_embeddings: Optional[MultiModalEmbeddings] = None, + *, + is_multimodal: Optional[torch.Tensor] = None, + handle_oov_mm_token: bool = False, ) -> torch.Tensor: # We do not really use any input tokens and therefore no embeddings # to be calculated. However, due to the mandatory token ids in diff --git a/vllm/model_executor/models/transformers.py b/vllm/model_executor/models/transformers.py index 19dd242f16eb..3d7b06633f34 100644 --- a/vllm/model_executor/models/transformers.py +++ b/vllm/model_executor/models/transformers.py @@ -52,8 +52,8 @@ from vllm.sequence import IntermediateTensors from vllm.utils import is_list_of -from .interfaces import (SupportsLoRA, SupportsMultiModal, SupportsPP, - SupportsQuant) +from .interfaces import (MultiModalEmbeddings, SupportsLoRA, + SupportsMultiModal, SupportsPP, SupportsQuant) from .utils import (AutoWeightsLoader, PPMissingLayer, WeightsMapper, flatten_bn, make_empty_intermediate_tensors_factory, maybe_prefix) @@ -797,6 +797,9 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): else: self.lm_head = PPMissingLayer() + def get_input_embeddings(self, input_ids: torch.Tensor) -> torch.Tensor: + return self.model.get_input_embeddings()(input_ids) + def compute_logits( self, hidden_states: torch.Tensor, @@ -873,13 +876,19 @@ def forward( multimodal_embeds = self.get_multimodal_embeddings(**kwargs) if multimodal_embeds is not None: inputs_embeds = self.get_input_embeddings( - input_ids, multimodal_embeds) + input_ids, + multimodal_embeds, + is_multimodal=input_ids == self.config.image_token_id, + ) input_ids = None model_output = super().forward(input_ids, positions, intermediate_tensors, inputs_embeds) return model_output + def get_language_model(self) -> torch.nn.Module: + return self.model + def get_multimodal_embeddings(self, **kwargs): pixel_values = kwargs.pop("pixel_values", None) pixel_values = pixel_values if pixel_values is not None else kwargs.pop( @@ -934,15 +943,42 @@ def get_multimodal_embeddings(self, **kwargs): def get_input_embeddings( self, input_ids: torch.Tensor, - multimodal_embeddings=None, + multimodal_embeddings: Optional[MultiModalEmbeddings] = None, + *, + is_multimodal: Optional[torch.Tensor] = None, + handle_oov_mm_token: bool = False, ) -> torch.Tensor: - inputs_embeds = self.model.get_input_embeddings()(input_ids) - if (multimodal_embeddings is not None - and len(multimodal_embeddings) != 0): - mask = (input_ids == self.config.image_token_id) - mask = mask.unsqueeze(-1).expand_as(inputs_embeds) - multimodal_embeddings = torch.cat(multimodal_embeddings) - - inputs_embeds = inputs_embeds.masked_scatter( - mask, multimodal_embeddings) - return inputs_embeds + """ + Apply token embeddings to `input_ids`. + + If `multimodal_embeddings` is passed, scatter them into + `input_ids` according to the mask `is_multimodal`. + + In case the multi-modal token IDs exceed the vocabulary size of + the language model, you can set `handle_oov_mm_token=False` + to avoid calling the language model's `get_input_embeddings` method + on those tokens. + """ + from .utils import _merge_multimodal_embeddings + + inputs_embeds = self._get_text_embeddings( + input_ids, + self.model.get_input_embeddings(), + is_multimodal=is_multimodal, + handle_oov_mm_token=handle_oov_mm_token, + ) + + if multimodal_embeddings is None or len(multimodal_embeddings) == 0: + return inputs_embeds + + if is_multimodal is None: + raise ValueError( + "`get_input_embeddings` now requires `is_multimodal` arg, " + "please update your model runner according to " + "https://github.com/vllm-project/vllm/pull/16229.") + + return _merge_multimodal_embeddings( + inputs_embeds=inputs_embeds, + multimodal_embeddings=multimodal_embeddings, + is_multimodal=is_multimodal, + ) diff --git a/vllm/model_executor/models/ultravox.py b/vllm/model_executor/models/ultravox.py index 12ae9487ad9d..77e886c22e63 100644 --- a/vllm/model_executor/models/ultravox.py +++ b/vllm/model_executor/models/ultravox.py @@ -33,8 +33,7 @@ from .interfaces import (MultiModalEmbeddings, SupportsLoRA, SupportsMultiModal, SupportsPP) from .utils import (AutoWeightsLoader, WeightsMapper, flatten_bn, - init_vllm_registered_model, maybe_prefix, - merge_multimodal_embeddings) + init_vllm_registered_model, maybe_prefix) _AUDIO_PLACEHOLDER_OVERRIDE = "<|audio|>" _MAX_ENCODER_BATCH_SIZE = 16 @@ -555,19 +554,21 @@ def get_input_embeddings( self, input_ids: torch.Tensor, multimodal_embeddings: Optional[MultiModalEmbeddings] = None, + *, + is_multimodal: Optional[torch.Tensor] = None, + # Multi-modal token ID may exceed vocab size + handle_oov_mm_token: bool = True, ) -> torch.Tensor: - # The audio token index is not included in the embedding table - # We need to remove it before embedding lookup - safe_input_ids = input_ids.clone() - safe_input_ids[safe_input_ids == self.config.audio_token_index] = 0 - inputs_embeds = self.language_model.get_input_embeddings( - safe_input_ids) - if multimodal_embeddings is not None and len( - multimodal_embeddings) > 0: - inputs_embeds = merge_multimodal_embeddings( - input_ids, inputs_embeds, multimodal_embeddings, - self.config.audio_token_index) - return inputs_embeds + # This is to satisfy the type checker for each overload + if multimodal_embeddings is None or is_multimodal is None: + return super().get_input_embeddings(input_ids) + + return super().get_input_embeddings( + input_ids, + multimodal_embeddings=multimodal_embeddings, + is_multimodal=is_multimodal, + handle_oov_mm_token=handle_oov_mm_token, + ) def forward(self, input_ids: torch.Tensor, @@ -601,8 +602,11 @@ def forward(self, elif inputs_embeds is None: multimodal_embeddings = self.get_multimodal_embeddings(**kwargs) - inputs_embeds = self.get_input_embeddings(input_ids, - multimodal_embeddings) + inputs_embeds = self.get_input_embeddings( + input_ids, + multimodal_embeddings, + is_multimodal=input_ids == self.config.audio_token_index, + ) input_ids = None language_model = self.language_model diff --git a/vllm/model_executor/models/utils.py b/vllm/model_executor/models/utils.py index 51cd41c864f0..7b3f20c6b28a 100644 --- a/vllm/model_executor/models/utils.py +++ b/vllm/model_executor/models/utils.py @@ -4,7 +4,7 @@ import itertools from collections.abc import Iterable, Mapping from dataclasses import dataclass, field -from typing import Any, Callable, Literal, Optional, Protocol, Union, overload +from typing import Any, Literal, Optional, Protocol, Union, overload import torch import torch.nn as nn @@ -391,8 +391,8 @@ def _embedding_count_expression(embeddings: NestedTensors) -> str: def _merge_multimodal_embeddings( inputs_embeds: torch.Tensor, - is_multimodal: torch.Tensor, multimodal_embeddings: NestedTensors, + is_multimodal: torch.Tensor, ) -> torch.Tensor: """ Merge ``multimodal_embeddings`` into ``inputs_embeds`` by overwriting the @@ -402,61 +402,35 @@ def _merge_multimodal_embeddings( Note: This updates ``inputs_embeds`` in place. """ - flattened = _flatten_embeddings(multimodal_embeddings) + if len(multimodal_embeddings) == 0: + return inputs_embeds + + mm_embeds_flat = _flatten_embeddings(multimodal_embeddings) + input_dtype = inputs_embeds.dtype + try: - # This is equivalent to: inputs_embeds[is_multimodal] = flattened. + # For debugging + # inputs_embeds[is_multimodal] = mm_embeds_flat.to(dtype=input_dtype) + + # NOTE: This can avoid D2H sync (#22105), but fails to + # raise an error if is_multimodal.sum() < len(mm_embeds_flat) inputs_embeds.masked_scatter_(is_multimodal.unsqueeze(-1), - flattened.to(dtype=inputs_embeds.dtype)) + mm_embeds_flat.to(dtype=input_dtype)) except RuntimeError as e: + num_actual_tokens = len(mm_embeds_flat) num_expected_tokens = is_multimodal.sum().item() - assert isinstance(num_expected_tokens, int) - if flattened.shape[0] != num_expected_tokens: + if num_actual_tokens != num_expected_tokens: expr = _embedding_count_expression(multimodal_embeddings) + raise ValueError( - f"Attempted to assign {expr} = {flattened.shape[0]} " + f"Attempted to assign {expr} = {num_actual_tokens} " f"multimodal tokens to {num_expected_tokens} placeholders" ) from e - else: - raise ValueError("Error during masked scatter operation") from e - - return inputs_embeds - - -def embed_multimodal( - input_ids: torch.Tensor, - multimodal_token_id: int, - get_text_embeds: Callable[[torch.Tensor], torch.Tensor], - multimodal_embeds: NestedTensors, -) -> torch.Tensor: - """ - Embed token IDs and multimodal inputs and combine their embeddings. - - ``multimodal_token_id`` is used to determine whether a token ID should - be embedded using ``get_text_embeds`` or ``get_multimodal_embeds``. - Compared to ``merge_multimodal_embeddings`, this avoids running - ``get_text_embeds`` on ``input_ids[input_ids == multimodal_token_id]`` - which causes issues when the placeholder token ID exceeds the - vocabulary size of the language model. - """ - is_multimodal = input_ids == multimodal_token_id - is_text = ~is_multimodal - - text_embeds = get_text_embeds(input_ids[is_text]) - merged_embeds = torch.empty( - (input_ids.shape[0], text_embeds.shape[1]), - dtype=text_embeds.dtype, - device=text_embeds.device, - ) + raise ValueError("Error during masked scatter operation") from e - merged_embeds[is_text] = text_embeds - - return _merge_multimodal_embeddings( - merged_embeds, - is_multimodal, - multimodal_embeds, - ) + return inputs_embeds def merge_multimodal_embeddings( @@ -491,23 +465,29 @@ def merge_multimodal_embeddings( This updates ``inputs_embeds`` in place. """ if isinstance(placeholder_token_id, list): - placeholder_token_id = torch.tensor( - placeholder_token_id, - pin_memory=is_pin_memory_available()).to(device=input_ids.device, - non_blocking=True) - return _merge_multimodal_embeddings( - inputs_embeds, - torch.isin(input_ids, placeholder_token_id), - multimodal_embeddings, - ) + is_multimodal = isin_list(input_ids, placeholder_token_id) + else: + is_multimodal = (input_ids == placeholder_token_id) return _merge_multimodal_embeddings( inputs_embeds, - (input_ids == placeholder_token_id), - multimodal_embeddings, + multimodal_embeddings=multimodal_embeddings, + is_multimodal=is_multimodal, ) +def isin_list( + elements: torch.Tensor, + test_elements_list: list[int], +) -> torch.Tensor: + test_elements = torch.tensor( + test_elements_list, + pin_memory=is_pin_memory_available(), + ).to(device=elements.device, non_blocking=True) + + return torch.isin(elements, test_elements) + + class LayerFn(Protocol): def __call__(self, prefix: str) -> torch.nn.Module: diff --git a/vllm/model_executor/models/voxtral.py b/vllm/model_executor/models/voxtral.py index b33e8d09c4be..f93e7ccfd06f 100644 --- a/vllm/model_executor/models/voxtral.py +++ b/vllm/model_executor/models/voxtral.py @@ -45,10 +45,8 @@ from vllm.transformers_utils.tokenizer import (MistralTokenizer, cached_tokenizer_from_config) -from .interfaces import (MultiModalEmbeddings, SupportsLoRA, - SupportsMultiModal, SupportsTranscription) -from .utils import (flatten_bn, init_vllm_registered_model, maybe_prefix, - merge_multimodal_embeddings) +from .interfaces import SupportsLoRA, SupportsMultiModal, SupportsTranscription +from .utils import flatten_bn, init_vllm_registered_model, maybe_prefix logger = init_logger(__name__) @@ -376,9 +374,14 @@ def forward( # NOTE: In v1, inputs_embeds is always generated at model runner, this # condition is for v0 compatibility. elif inputs_embeds is None: + audio_encoder = self.tokenizer.instruct.audio_encoder + audio_tok_id = audio_encoder.audio_token audio_embeddings = self.get_multimodal_embeddings(**kwargs) - inputs_embeds = self.get_input_embeddings(input_ids, - audio_embeddings) + inputs_embeds = self.get_input_embeddings( + input_ids, + audio_embeddings, + is_multimodal=input_ids == audio_tok_id, + ) input_ids = None hidden_states = self.language_model.model(input_ids, @@ -421,20 +424,6 @@ def get_multimodal_embeddings( return audio_embeddings - def get_input_embeddings( - self, - input_ids: torch.Tensor, - multimodal_embeddings: Optional[MultiModalEmbeddings] = None, - ) -> torch.Tensor: - audio_encoder = self.tokenizer.instruct.audio_encoder - audio_tok_id = audio_encoder.audio_token - - inputs_embeds = self.language_model.get_input_embeddings(input_ids) - if multimodal_embeddings is not None: - inputs_embeds = merge_multimodal_embeddings( - input_ids, inputs_embeds, multimodal_embeddings, audio_tok_id) - return inputs_embeds - def _parse_and_validate_audio_arrays( self, **kwargs: object) -> Union[list[torch.Tensor], None]: audio_arrays = kwargs.pop("audio_arrays", None) diff --git a/vllm/model_executor/models/whisper.py b/vllm/model_executor/models/whisper.py index de3e4f0592a6..7beeeddf988f 100644 --- a/vllm/model_executor/models/whisper.py +++ b/vllm/model_executor/models/whisper.py @@ -579,10 +579,7 @@ def forward( hidden_states = self.layer_norm(hidden_states) return hidden_states - def get_input_embeddings( - self, - input_ids: torch.Tensor, - ) -> torch.Tensor: + def get_input_embeddings(self, input_ids: torch.Tensor) -> torch.Tensor: return self.embed_tokens(input_ids) @@ -916,7 +913,10 @@ def get_multimodal_embeddings(self, def get_input_embeddings( self, input_ids: torch.Tensor, - multimodal_embeddings: Optional[NestedTensors] = None, + multimodal_embeddings: Optional[MultiModalEmbeddings] = None, + *, + is_multimodal: Optional[torch.Tensor] = None, + handle_oov_mm_token: bool = False, ) -> torch.Tensor: # This method just returns the decoder sequence embeddings since # Whisper does not have encoder text tokens. diff --git a/vllm/v1/spec_decode/eagle.py b/vllm/v1/spec_decode/eagle.py index 51e54e0dc337..1b5bafb9ca1b 100644 --- a/vllm/v1/spec_decode/eagle.py +++ b/vllm/v1/spec_decode/eagle.py @@ -18,6 +18,7 @@ from vllm.model_executor.model_loader import get_model from vllm.model_executor.models import supports_multimodal from vllm.model_executor.models.llama_eagle3 import Eagle3LlamaForCausalLM +from vllm.multimodal import MULTIMODAL_REGISTRY from vllm.platforms import current_platform from vllm.utils import is_pin_memory_available from vllm.v1.attention.backends.flash_attn import FlashAttentionMetadata @@ -64,8 +65,10 @@ def __init__( # hidden size (e.g., Llama 3.3 70B). self.hidden_size = self.draft_model_config.get_hidden_size() - self.is_multimodal_model = vllm_config.model_config \ - .is_multimodal_model + # Multi-modal data support + self.mm_registry = MULTIMODAL_REGISTRY + self.supports_mm_inputs = self.mm_registry.supports_multimodal_inputs( + vllm_config.model_config) self.attn_metadata_builder: Optional[AttentionMetadataBuilder] = None @@ -175,7 +178,8 @@ def propose( last_token_indices: Optional[torch.Tensor], common_attn_metadata: CommonAttentionMetadata, sampling_metadata: SamplingMetadata, - mm_embeds: Optional[list[torch.Tensor]] = None, + mm_embed_inputs: Optional[tuple[list[torch.Tensor], + torch.Tensor]] = None, ) -> torch.Tensor: num_tokens = target_token_ids.shape[0] batch_size = next_token_ids.shape[0] @@ -219,18 +223,21 @@ def propose( # copy inputs to buffer for cudagraph self._set_positions(num_tokens, target_positions) self.hidden_states[:num_tokens] = target_hidden_states - if self.is_multimodal_model: - input_ids = self.input_ids[:num_tokens] - inputs_embeds = self.model.get_input_embeddings( - input_ids, - multimodal_embeddings=mm_embeds or None, + + if self.supports_mm_inputs: + mm_embeds, is_mm_embed = mm_embed_inputs or (None, None) + + self.inputs_embeds[:num_tokens] = self.model.get_input_embeddings( + self.input_ids[:num_tokens], + multimodal_embeddings=mm_embeds, + is_multimodal=is_mm_embed, ) - self.inputs_embeds[:num_tokens] = inputs_embeds - inputs_embeds = self.inputs_embeds[:num_input_tokens] + input_ids = None + inputs_embeds = self.inputs_embeds[:num_input_tokens] else: - inputs_embeds = None input_ids = self.input_ids[:num_input_tokens] + inputs_embeds = None with set_forward_context(per_layer_attn_metadata, self.vllm_config, @@ -372,14 +379,15 @@ def propose( self.input_ids[:batch_size] = input_ids self._set_positions(batch_size, clamped_positions) self.hidden_states[:batch_size] = hidden_states - if self.is_multimodal_model: - inputs_embeds = self.model.get_input_embeddings(input_ids) - self.inputs_embeds[:batch_size] = inputs_embeds - inputs_embeds = self.inputs_embeds[:input_batch_size] + if self.supports_mm_inputs: + self.inputs_embeds[:batch_size] = \ + self.model.get_input_embeddings(input_ids) + input_ids = None + inputs_embeds = self.inputs_embeds[:input_batch_size] else: - inputs_embeds = None input_ids = self.input_ids[:input_batch_size] + inputs_embeds = None # Run the model. with set_forward_context(per_layer_attn_metadata, @@ -849,7 +857,7 @@ def load_model(self, target_model: nn.Module) -> None: self.attn_layer_names = list(draft_attn_layer_names) - if self.is_multimodal_model: + if self.supports_mm_inputs: # Even if the target model is multimodal, we can also use # text-only draft models try: @@ -861,7 +869,7 @@ def load_model(self, target_model: nn.Module) -> None: logger.warning( "Draft model does not support multimodal inputs, " "falling back to text-only mode") - self.is_multimodal_model = False + self.supports_mm_inputs = False if supports_multimodal(target_model): # handle multimodality @@ -933,7 +941,7 @@ def dummy_run( ) -> None: with set_forward_context(None, self.vllm_config, num_tokens=num_tokens): - if self.is_multimodal_model: + if self.supports_mm_inputs: input_ids = None inputs_embeds = self.inputs_embeds[:num_tokens] else: diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index 22a177dd7cc7..1bae0d4ce4d1 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -368,6 +368,11 @@ def __init__( self.num_accepted_tokens = self._make_buffer(self.max_num_reqs, dtype=torch.int64) + # Only relevant for multimodal models + if self.supports_mm_inputs: + self.is_mm_embed = self._make_buffer(self.max_num_tokens, + dtype=torch.bool) + # Only relevant for models using M-RoPE (e.g, Qwen2-VL) if self.uses_mrope: # NOTE: `mrope_positions` is implemented with one additional dummy @@ -1627,9 +1632,16 @@ def _gather_mm_embeddings( self, scheduler_output: "SchedulerOutput", shift_computed_tokens: int = 0, - ) -> list[torch.Tensor]: + ) -> tuple[list[torch.Tensor], torch.Tensor]: + total_num_scheduled_tokens = scheduler_output.total_num_scheduled_tokens + + mm_embeds = list[torch.Tensor]() + is_mm_embed = self.is_mm_embed.cpu + is_mm_embed[:total_num_scheduled_tokens] = False + + req_start_idx = 0 should_sync_mrope_positions = False - mm_embeds: list[torch.Tensor] = [] + for req_id in self.input_batch.req_ids: mm_embeds_req: list[torch.Tensor] = [] @@ -1638,6 +1650,7 @@ def _gather_mm_embeddings( req_state = self.requests[req_id] num_computed_tokens = \ req_state.num_computed_tokens + shift_computed_tokens + for mm_feature in req_state.mm_features: pos_info = mm_feature.mm_position start_pos = pos_info.offset @@ -1670,6 +1683,10 @@ def _gather_mm_embeddings( if (is_embed := pos_info.is_embed) is not None: is_embed = is_embed[start_idx:end_idx] + req_start_pos = req_start_idx + start_pos - num_computed_tokens + is_mm_embed[req_start_pos+start_idx:req_start_pos + end_idx] \ + = True if is_embed is None else is_embed + mm_embeds_item = gather_mm_placeholders( encoder_output[start_idx:end_idx], is_embed=is_embed, @@ -1677,6 +1694,7 @@ def _gather_mm_embeddings( mm_embeds_req.append(mm_embeds_item) if self.is_multimodal_pruning_enabled and self.uses_mrope: + assert req_state.mrope_positions is not None should_sync_mrope_positions = True mm_embeds_req, new_mrope_positions, new_delta = ( self.model.recompute_mrope_positions( @@ -1685,18 +1703,19 @@ def _gather_mm_embeddings( mrope_positions=req_state.mrope_positions, num_computed_tokens=req_state.num_computed_tokens, )) - assert req_state.mrope_positions is not None req_state.mrope_positions.copy_(new_mrope_positions) req_state.mrope_position_delta = new_delta mm_embeds.extend(mm_embeds_req) + req_start_idx += num_scheduled_tokens + + is_mm_embed = self.is_mm_embed.copy_to_gpu(total_num_scheduled_tokens) if should_sync_mrope_positions: self._calc_mrope_positions(scheduler_output) - self.mrope_positions.copy_to_gpu( - scheduler_output.total_num_scheduled_tokens) + self.mrope_positions.copy_to_gpu(total_num_scheduled_tokens) - return mm_embeds + return mm_embeds, is_mm_embed def _extract_encoder_inputs( self, @@ -1990,14 +2009,16 @@ def _preprocess( and not self.model_config.is_encoder_decoder): # Run the multimodal encoder if any. self._execute_mm_encoder(scheduler_output) - mm_embeds = self._gather_mm_embeddings(scheduler_output) + mm_embeds, is_mm_embed = self._gather_mm_embeddings( + scheduler_output) # NOTE(woosuk): To unify token ids and soft tokens (vision # embeddings), we always use embeddings (rather than token ids) # as input to the multimodal model, even when the input is text. inputs_embeds_scheduled = self.model.get_input_embeddings( - input_ids=self.input_ids.gpu[:num_scheduled_tokens], - multimodal_embeddings=mm_embeds or None, + self.input_ids.gpu[:num_scheduled_tokens], + multimodal_embeddings=mm_embeds, + is_multimodal=is_mm_embed, ) # TODO(woosuk): Avoid the copy. Optimize. @@ -2586,10 +2607,14 @@ def propose_draft_token_ids( [h[token_indices] for h in aux_hidden_states], dim=-1) else: target_hidden_states = hidden_states[token_indices] - mm_embeds = None + if self.supports_mm_inputs: - mm_embeds = self._gather_mm_embeddings(scheduler_output, - shift_computed_tokens=1) + mm_embed_inputs = self._gather_mm_embeddings( + scheduler_output, + shift_computed_tokens=1, + ) + else: + mm_embed_inputs = None draft_token_ids = self.drafter.propose( target_token_ids=target_token_ids, @@ -2599,8 +2624,9 @@ def propose_draft_token_ids( last_token_indices=token_indices_to_sample, sampling_metadata=sampling_metadata, common_attn_metadata=common_attn_metadata, - mm_embeds=mm_embeds, + mm_embed_inputs=mm_embed_inputs, ) + return draft_token_ids def update_config(self, overrides: dict[str, Any]) -> None: diff --git a/vllm/v1/worker/tpu_model_runner.py b/vllm/v1/worker/tpu_model_runner.py index a330f50875a8..2405f978ca73 100644 --- a/vllm/v1/worker/tpu_model_runner.py +++ b/vllm/v1/worker/tpu_model_runner.py @@ -263,6 +263,13 @@ def __init__( pin_memory=self.pin_memory) self.seq_lens_np = self.seq_lens_cpu.numpy() + # Only relevant for multimodal models + if self.supports_mm_inputs: + self.is_mm_embed_cpu = torch.zeros(self.max_num_tokens, + dtype=torch.bool, + device="cpu", + pin_memory=self.pin_memory) + # Range tensor with values [0 .. self.max_num_tokens - 1]. # Used to initialize positions / context_lens / seq_lens # Keep in int64 to avoid overflow with long context @@ -879,13 +886,22 @@ def _execute_mm_encoder(self, scheduler_output: "SchedulerOutput"): def _gather_mm_embeddings( self, scheduler_output: "SchedulerOutput", - ) -> list[torch.Tensor]: - mm_embeds: list[torch.Tensor] = [] + ) -> tuple[list[torch.Tensor], torch.Tensor]: + total_num_scheduled_tokens = scheduler_output.total_num_scheduled_tokens + padded_total_num_scheduled_tokens = _get_padded_token_len( + self.num_tokens_paddings, total_num_scheduled_tokens) + + is_mm_embed = self.is_mm_embed_cpu + is_mm_embed[:padded_total_num_scheduled_tokens] = False + mm_embeds = list[torch.Tensor]() + req_start_idx = 0 + for req_id in self.input_batch.req_ids: num_scheduled_tokens = scheduler_output.num_scheduled_tokens[ req_id] req_state = self.requests[req_id] num_computed_tokens = req_state.num_computed_tokens + # TODO unroll loop and assume/enforce --disable_chunked_mm_input # NOTE (NickLucche) here we diverge from logic in other runners, as # we assume to only have whole mm items to process. Hence we avoid @@ -906,26 +922,53 @@ def _gather_mm_embeddings( # The encoder output is already processed and stored # in the decoder's KV cache. continue + + start_idx = max(num_computed_tokens - start_pos, 0) + end_idx = min( + num_computed_tokens - start_pos + num_scheduled_tokens, + num_encoder_tokens, + ) + assert start_idx < end_idx + mm_hash = mm_feature.identifier encoder_output = self.encoder_cache.get(mm_hash, None) assert encoder_output is not None,\ f"Encoder cache miss for {mm_hash}." + assert pos_info.is_embed is None, "Expected all positions to"\ " be contiguous and embeddings." - encoder_output = self.encoder_cache[mm_hash] + + req_start_pos = req_start_idx + start_pos - num_computed_tokens + is_mm_embed[req_start_pos+start_idx:req_start_pos + end_idx] \ + = True + + # Only whole mm items are processed mm_embeds.append(encoder_output) - return mm_embeds - def _get_model_inputs(self, input_ids: torch.Tensor, - mm_embeds: list[torch.Tensor]): + req_start_idx += num_scheduled_tokens + + is_mm_embed = is_mm_embed[:padded_total_num_scheduled_tokens] \ + .to(self.device) + + return mm_embeds, is_mm_embed + + def _get_model_inputs( + self, + input_ids: torch.Tensor, + mm_embed_inputs: Optional[tuple[list[torch.Tensor], torch.Tensor]], + ): if self.supports_mm_inputs: + mm_embeds, is_mm_embed = mm_embed_inputs or (None, None) + # NOTE(woosuk): To unify token ids and soft tokens (vision # embeddings), we always use embeddings (rather than token ids) # as input to the multimodal model, even when the input is text. inputs_embeds = self.model.get_input_embeddings( - input_ids=input_ids, + input_ids, multimodal_embeddings=mm_embeds, + is_multimodal=is_mm_embed, ) + return None, inputs_embeds else: # For text-only models, we use token ids as input. @@ -953,9 +996,10 @@ def execute_model( if self.supports_mm_inputs: # Run the multimodal encoder if any. self._execute_mm_encoder(scheduler_output) - mm_embeds = self._gather_mm_embeddings(scheduler_output) + mm_embed_inputs = self._gather_mm_embeddings(scheduler_output) else: - mm_embeds = [] + mm_embed_inputs = None + torch_xla.sync(wait=False) # Prepare inputs, the requests might be split into multiple # executions, combine the result of each execution. @@ -972,7 +1016,7 @@ def execute_model( attn_metadata, logits_indices, padded_num_reqs, num_reqs,\ end_index = self._prepare_inputs(scheduler_output, start_index) input_ids, inputs_embeds = self._get_model_inputs( - self.input_ids, mm_embeds) + self.input_ids, mm_embed_inputs) torch_xla.sync(wait=False) # Run the decoder with set_forward_context( @@ -1325,9 +1369,15 @@ def _precompile_mm_encoder(self) -> None: hf_config.image_token_index placeholders_ids = placeholders_ids.to(self.device) + + mm_mask = torch.tensor([False] * num_tokens) + mm_mask[:items_size] = True + mm_mask = mm_mask.to(self.device) # Assign outputs or the graph will be cut short. - a, b = self._get_model_inputs(placeholders_ids, - [mm_embeds]) + a, b = self._get_model_inputs( + placeholders_ids, + mm_embed_inputs=([mm_embeds], mm_mask), + ) assert a is None torch_xla.sync(wait=False) @@ -1338,7 +1388,10 @@ def _precompile_mm_encoder(self) -> None: dtype=torch.int32, device="cpu") placeholders_ids = placeholders_ids.to(self.device) - a, b = self._get_model_inputs(placeholders_ids, []) + a, b = self._get_model_inputs( + placeholders_ids, + mm_embed_inputs=None, + ) assert a is None torch_xla.sync(wait=False) From 3f5d902d2a6fa752925d49b8c219b9515b37c0a6 Mon Sep 17 00:00:00 2001 From: Russell Bryant Date: Sat, 27 Sep 2025 06:09:26 -0400 Subject: [PATCH 449/518] Validate API tokens in constant time (#25781) Signed-off-by: rentianyue-jk Signed-off-by: Russell Bryant Co-authored-by: rentianyue-jk --- vllm/entrypoints/openai/api_server.py | 28 +++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/vllm/entrypoints/openai/api_server.py b/vllm/entrypoints/openai/api_server.py index 97cbda63bf42..d054e2826744 100644 --- a/vllm/entrypoints/openai/api_server.py +++ b/vllm/entrypoints/openai/api_server.py @@ -3,12 +3,14 @@ import asyncio import gc +import hashlib import importlib import inspect import json import multiprocessing import multiprocessing.forkserver as forkserver import os +import secrets import signal import socket import tempfile @@ -1252,7 +1254,7 @@ def load_log_config(log_config_file: Optional[str]) -> Optional[dict]: class AuthenticationMiddleware: """ Pure ASGI middleware that authenticates each request by checking - if the Authorization header exists and equals "Bearer {api_key}". + if the Authorization Bearer token exists and equals anyof "{api_key}". Notes ----- @@ -1263,7 +1265,26 @@ class AuthenticationMiddleware: def __init__(self, app: ASGIApp, tokens: list[str]) -> None: self.app = app - self.api_tokens = {f"Bearer {token}" for token in tokens} + self.api_tokens = [ + hashlib.sha256(t.encode("utf-8")).digest() for t in tokens + ] + + def verify_token(self, headers: Headers) -> bool: + authorization_header_value = headers.get("Authorization") + if not authorization_header_value: + return False + + scheme, _, param = authorization_header_value.partition(" ") + if scheme.lower() != "bearer": + return False + + param_hash = hashlib.sha256(param.encode("utf-8")).digest() + + token_match = False + for token_hash in self.api_tokens: + token_match |= secrets.compare_digest(param_hash, token_hash) + + return token_match def __call__(self, scope: Scope, receive: Receive, send: Send) -> Awaitable[None]: @@ -1276,8 +1297,7 @@ def __call__(self, scope: Scope, receive: Receive, url_path = URL(scope=scope).path.removeprefix(root_path) headers = Headers(scope=scope) # Type narrow to satisfy mypy. - if url_path.startswith("/v1") and headers.get( - "Authorization") not in self.api_tokens: + if url_path.startswith("/v1") and not self.verify_token(headers): response = JSONResponse(content={"error": "Unauthorized"}, status_code=401) return response(scope, receive, send) From 7977e5027c2250a4abc1f474c5619c40b4e5682f Mon Sep 17 00:00:00 2001 From: Russell Bryant Date: Sat, 27 Sep 2025 06:46:49 -0400 Subject: [PATCH 450/518] Add filtering for chat template kwargs (#25794) Signed-off-by: Isotr0py Co-authored-by: Isotr0py --- tests/entrypoints/test_chat_utils.py | 85 +++++++++++++++++++++++++ vllm/entrypoints/chat_utils.py | 54 +++++++++++++++- vllm/entrypoints/openai/api_server.py | 1 + vllm/entrypoints/openai/cli_args.py | 10 ++- vllm/entrypoints/openai/serving_chat.py | 14 +++- 5 files changed, 158 insertions(+), 6 deletions(-) diff --git a/tests/entrypoints/test_chat_utils.py b/tests/entrypoints/test_chat_utils.py index 78370d199b56..a268f573ef90 100644 --- a/tests/entrypoints/test_chat_utils.py +++ b/tests/entrypoints/test_chat_utils.py @@ -19,6 +19,7 @@ parse_chat_messages, parse_chat_messages_futures, resolve_chat_template_content_format, + resolve_chat_template_kwargs, resolve_hf_chat_template) from vllm.multimodal import MultiModalDataDict, MultiModalUUIDDict from vllm.multimodal.utils import (encode_audio_base64, encode_image_base64, @@ -37,6 +38,7 @@ QWEN2VL_MODEL_ID = "Qwen/Qwen2-VL-2B-Instruct" QWEN25VL_MODEL_ID = "Qwen/Qwen2.5-VL-3B-Instruct" QWEN25OMNI_MODEL_ID = "Qwen/Qwen2.5-Omni-7B" +QWEN3_MODEL_ID = "Qwen/Qwen3-8B" LLAMA_GUARD_MODEL_ID = "meta-llama/Llama-Guard-3-1B" HERMES_MODEL_ID = "NousResearch/Hermes-3-Llama-3.1-8B" MISTRAL_MODEL_ID = "mistralai/Mistral-Small-3.1-24B-Instruct-2503" @@ -2255,6 +2257,89 @@ def test_resolve_hf_chat_template(sample_json_schema, model, use_tools): assert isinstance(chat_template, str) +@pytest.mark.parametrize( + "model, expected_kwargs", + [ + ( + QWEN2VL_MODEL_ID, + { + "add_vision_id", "add_generation_prompt", + "continue_final_message", "tools" + }, + ), + ( + QWEN3_MODEL_ID, + { + "enable_thinking", "add_generation_prompt", + "continue_final_message", "tools" + }, + ), + ], +) +def test_resolve_hf_chat_template_kwargs(sample_json_schema, model, + expected_kwargs): + """checks that chat_template is a dict type for HF models.""" + model_info = HF_EXAMPLE_MODELS.find_hf_info(model) + model_info.check_available_online(on_fail="skip") + + tools = ([{ + "type": "function", + "function": { + "name": "dummy_function_name", + "description": "This is a dummy function", + "parameters": sample_json_schema, + }, + }]) + + chat_template_kwargs = { + # both unused + "unsed_kwargs_1": 123, + "unsed_kwargs_2": "abc", + # should not appear + "chat_template": "{% Hello world! %}", + # used by tokenizer + "continue_final_message": True, + "tools": tools, + # both used by Qwen2-VL and Qwen3 + "add_generation_prompt": True, + # only used by Qwen2-VL + "add_vision_id": True, + # only used by Qwen3 + "enable_thinking": True, + } + + model_config = ModelConfig( + model, + tokenizer=model_info.tokenizer or model, + tokenizer_mode=model_info.tokenizer_mode, + revision=model_info.revision, + trust_remote_code=model_info.trust_remote_code, + hf_overrides=model_info.hf_overrides, + skip_tokenizer_init=model_info.skip_tokenizer_init, + enforce_eager=model_info.enforce_eager, + dtype=model_info.dtype) + + # Build the tokenizer + tokenizer = get_tokenizer( + model, + trust_remote_code=model_config.trust_remote_code, + ) + + # Test detecting the tokenizer's chat_template + chat_template = resolve_hf_chat_template( + tokenizer, + chat_template=None, + tools=tools, + model_config=model_config, + ) + resolved_chat_template_kwargs = resolve_chat_template_kwargs( + tokenizer, + chat_template=chat_template, + chat_template_kwargs=chat_template_kwargs, + ) + assert set(resolved_chat_template_kwargs.keys()) == expected_kwargs + + # NOTE: Qwen2-Audio default chat template is specially defined inside # processor class instead of using `tokenizer_config.json` # yapf: disable diff --git a/vllm/entrypoints/chat_utils.py b/vllm/entrypoints/chat_utils.py index 4e1ecb9ed4c5..6b0ed23277d3 100644 --- a/vllm/entrypoints/chat_utils.py +++ b/vllm/entrypoints/chat_utils.py @@ -11,7 +11,12 @@ from typing import (Any, Callable, Generic, Literal, Optional, TypeVar, Union, cast) +import jinja2 +import jinja2.ext +import jinja2.meta import jinja2.nodes +import jinja2.parser +import jinja2.sandbox import transformers.utils.chat_template_utils as hf_chat_utils # yapf conflicts with isort for this block # yapf: disable @@ -50,7 +55,7 @@ # yapf: enable from vllm.transformers_utils.processor import cached_get_processor from vllm.transformers_utils.tokenizer import AnyTokenizer, MistralTokenizer -from vllm.utils import random_uuid +from vllm.utils import random_uuid, supports_kw logger = init_logger(__name__) @@ -1554,6 +1559,46 @@ def parse_chat_messages_futures( return conversation, mm_tracker.all_mm_data(), mm_tracker.all_mm_uuids() +# adapted from https://github.com/huggingface/transformers/blob/v4.56.2/src/transformers/utils/chat_template_utils.py#L398-L412 +# only preserve the parse function used to resolve chat template kwargs +class AssistantTracker(jinja2.ext.Extension): + tags = {"generation"} + + def parse(self, parser: jinja2.parser.Parser) -> jinja2.nodes.CallBlock: + lineno = next(parser.stream).lineno + body = parser.parse_statements(["name:endgeneration"], drop_needle=True) + call = self.call_method("_generation_support") + call_block = jinja2.nodes.CallBlock(call, [], [], body) + return call_block.set_lineno(lineno) + + +def resolve_chat_template_kwargs( + tokenizer: Union[PreTrainedTokenizer, PreTrainedTokenizerFast], + chat_template: str, + chat_template_kwargs: dict[str, Any], +) -> dict[str, Any]: + fn_kw = { + k for k in chat_template_kwargs + if supports_kw(tokenizer.apply_chat_template, k, allow_var_kwargs=False) + } + + env = jinja2.sandbox.ImmutableSandboxedEnvironment( + trim_blocks=True, + lstrip_blocks=True, + extensions=[AssistantTracker, jinja2.ext.loopcontrols], + ) + parsed_content = env.parse(chat_template) + template_vars = jinja2.meta.find_undeclared_variables(parsed_content) + + # We exclude chat_template from kwargs here, because + # chat template has been already resolved at this stage + unexpected_vars = {"chat_template"} + accept_vars = (fn_kw | template_vars) - unexpected_vars + return { + k: v for k, v in chat_template_kwargs.items() if k in accept_vars + } + + def apply_hf_chat_template( tokenizer: Union[PreTrainedTokenizer, PreTrainedTokenizerFast], conversation: list[ConversationMessage], @@ -1579,12 +1624,17 @@ def apply_hf_chat_template( ) try: + resolved_kwargs = resolve_chat_template_kwargs( + tokenizer=tokenizer, + chat_template=hf_chat_template, + chat_template_kwargs=kwargs, + ) return tokenizer.apply_chat_template( conversation=conversation, # type: ignore[arg-type] tools=tools, # type: ignore[arg-type] chat_template=hf_chat_template, tokenize=tokenize, - **kwargs, + **resolved_kwargs, ) # External library exceptions can sometimes occur despite the framework's diff --git a/vllm/entrypoints/openai/api_server.py b/vllm/entrypoints/openai/api_server.py index d054e2826744..15844d3162fe 100644 --- a/vllm/entrypoints/openai/api_server.py +++ b/vllm/entrypoints/openai/api_server.py @@ -1716,6 +1716,7 @@ async def init_app_state( request_logger=request_logger, chat_template=resolved_chat_template, chat_template_content_format=args.chat_template_content_format, + trust_request_chat_template=args.trust_request_chat_template, return_tokens_as_token_ids=args.return_tokens_as_token_ids, enable_auto_tools=args.enable_auto_tool_choice, exclude_tools_when_tool_choice_none=args. diff --git a/vllm/entrypoints/openai/cli_args.py b/vllm/entrypoints/openai/cli_args.py index 1c2a6f58197d..a306c2bb7cb5 100644 --- a/vllm/entrypoints/openai/cli_args.py +++ b/vllm/entrypoints/openai/cli_args.py @@ -103,9 +103,13 @@ class FrontendArgs: chat_template_content_format: ChatTemplateContentFormatOption = "auto" """The format to render message content within a chat template. -* "string" will render the content as a string. Example: `"Hello World"` -* "openai" will render the content as a list of dictionaries, similar to OpenAI -schema. Example: `[{"type": "text", "text": "Hello world!"}]`""" + * "string" will render the content as a string. Example: `"Hello World"` + * "openai" will render the content as a list of dictionaries, similar to + OpenAI schema. Example: `[{"type": "text", "text": "Hello world!"}]`""" + trust_request_chat_template: bool = False + """Whether to trust the chat template provided in the request. If False, + the server will always use the chat template specified by `--chat-template` + or the ones from tokenizer.""" response_role: str = "assistant" """The role name to return if `request.add_generation_prompt=true`.""" ssl_keyfile: Optional[str] = None diff --git a/vllm/entrypoints/openai/serving_chat.py b/vllm/entrypoints/openai/serving_chat.py index 0780448ad733..ab4bf75102f4 100644 --- a/vllm/entrypoints/openai/serving_chat.py +++ b/vllm/entrypoints/openai/serving_chat.py @@ -68,6 +68,7 @@ def __init__( request_logger: Optional[RequestLogger], chat_template: Optional[str], chat_template_content_format: ChatTemplateContentFormatOption, + trust_request_chat_template: bool = False, return_tokens_as_token_ids: bool = False, reasoning_parser: str = "", enable_auto_tools: bool = False, @@ -89,6 +90,7 @@ def __init__( self.response_role = response_role self.chat_template = chat_template self.chat_template_content_format: Final = chat_template_content_format + self.trust_request_chat_template = trust_request_chat_template self.enable_log_outputs = enable_log_outputs # set up tool use @@ -220,6 +222,16 @@ async def create_chat_completion( if not self.use_harmony: # Common case. + request_chat_template = request.chat_template + chat_template_kwargs = request.chat_template_kwargs + if not self.trust_request_chat_template and ( + request_chat_template is not None or + (chat_template_kwargs and + chat_template_kwargs.get("chat_template") is not None)): + return self.create_error_response( + "Chat template is passed with request, but " + "--trust-request-chat-template is not set. " + "Refused request with untrusted chat template.") ( conversation, request_prompts, @@ -228,7 +240,7 @@ async def create_chat_completion( request, tokenizer, request.messages, - chat_template=request.chat_template or self.chat_template, + chat_template=request_chat_template or self.chat_template, chat_template_content_format=self. chat_template_content_format, add_generation_prompt=request.add_generation_prompt, From ec152c8748d0b37da157fa6a99a75920822dc30d Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Sat, 27 Sep 2025 13:18:20 +0100 Subject: [PATCH 451/518] Fix GPTQ model loading in Transformers backend (#25770) Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> Co-authored-by: Isotr0py --- tests/models/test_transformers.py | 10 +++++++--- vllm/model_executor/models/transformers.py | 22 +++++++++++++++++----- vllm/model_executor/models/utils.py | 7 +++++-- 3 files changed, 29 insertions(+), 10 deletions(-) diff --git a/tests/models/test_transformers.py b/tests/models/test_transformers.py index 1817d4aeee9f..e4b5e7c24453 100644 --- a/tests/models/test_transformers.py +++ b/tests/models/test_transformers.py @@ -100,10 +100,9 @@ def test_distributed( kwargs_test=kwargs) -@pytest.mark.skipif( - current_platform.is_rocm(), - reason="bitsandbytes quantization is currently not supported in rocm.") @pytest.mark.parametrize("model, quantization_kwargs", [ + ("TheBloke/TinyLlama-1.1B-Chat-v0.3-AWQ", {}), + ("TheBloke/TinyLlama-1.1B-Chat-v0.3-GPTQ", {}), ( "meta-llama/Llama-3.2-1B-Instruct", { @@ -121,6 +120,11 @@ def test_quantization( max_tokens: int, num_logprobs: int, ) -> None: + if (current_platform.is_rocm() + and quantization_kwargs.get("quantization", "") == "bitsandbytes"): + pytest.skip( + "bitsandbytes quantization is currently not supported in rocm.") + with vllm_runner( model, model_impl="auto", enforce_eager=True, **quantization_kwargs) as vllm_model: # type: ignore[arg-type] diff --git a/vllm/model_executor/models/transformers.py b/vllm/model_executor/models/transformers.py index 3d7b06633f34..7cfb639f675d 100644 --- a/vllm/model_executor/models/transformers.py +++ b/vllm/model_executor/models/transformers.py @@ -447,7 +447,8 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): self.device_config: DeviceConfig = vllm_config.device_config self.model_config: ModelConfig = vllm_config.model_config self.parallel_config: ParallelConfig = vllm_config.parallel_config - self.quant_config: QuantizationConfig = vllm_config.quant_config + self.quant_config: Optional[ + QuantizationConfig] = vllm_config.quant_config self.pp_group = get_pp_group() self.pp_size = self.pp_group.world_size @@ -456,7 +457,18 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): # Weights to skip in `self.load_weights` self.skip_prefixes: list[str] = [] + """Skip loading weights whose qualname starts with these prefixes.""" self.skip_substrs: list[str] = [] + """Skip loading weights whose qualname contains these substrings.""" + self.ignore_unexpected_prefixes: list[str] = [] + """Ignore unexpected weights whose qualname starts with these prefixes. + """ + self.ignore_unexpected_suffixes: list[str] = [] + """Ignore unexpected weights whose qualname ends with these suffixes.""" + + # Skip loading extra bias for GPTQ models. + if self.quant_config and "gptq" in self.quant_config.get_name(): + self.ignore_unexpected_suffixes.append(".bias") # Set correct attn and init on "meta" to delay allocating GPU tensors # TODO: @raushan, use the public `model.set_attn_implementation()` @@ -563,9 +575,7 @@ def tensor_parallel(self): raise ValueError( f"{type(self.model)} does not support tensor parallel. {tip}") - def _tensor_parallel(module: nn.Module, - prefix: str = "", - tp_plan=None): + def _tensor_parallel(module: nn.Module, prefix: str, tp_plan=None): tp_plan = tp_plan or {} # If the current module is a PreTrainedModel, set the tp_plan for @@ -597,7 +607,7 @@ def _tensor_parallel(module: nn.Module, prefix=qual_name, tp_plan=tp_plan) - _tensor_parallel(self.model) + _tensor_parallel(self.model, prefix="model") def create_attention_instances( self, @@ -696,6 +706,8 @@ def load_weights(self, weights: Iterable[tuple[str, self, skip_prefixes=self.skip_prefixes, skip_substrs=self.skip_substrs, + ignore_unexpected_prefixes=self.ignore_unexpected_prefixes, + ignore_unexpected_suffixes=self.ignore_unexpected_suffixes, ) return loader.load_weights(weights, mapper=self.hf_to_vllm_mapper) diff --git a/vllm/model_executor/models/utils.py b/vllm/model_executor/models/utils.py index 7b3f20c6b28a..bb6a0bd02202 100644 --- a/vllm/model_executor/models/utils.py +++ b/vllm/model_executor/models/utils.py @@ -109,6 +109,7 @@ def __init__( skip_prefixes: Optional[list[str]] = None, skip_substrs: Optional[list[str]] = None, ignore_unexpected_prefixes: Optional[list[str]] = None, + ignore_unexpected_suffixes: Optional[list[str]] = None, ) -> None: super().__init__() @@ -116,6 +117,7 @@ def __init__( self.skip_prefixes = skip_prefixes or [] self.skip_substrs = skip_substrs or [] self.ignore_unexpected_prefixes = ignore_unexpected_prefixes or [] + self.ignore_unexpected_suffixes = ignore_unexpected_suffixes or [] # update default skip_substrs self.skip_substrs += self.ROTARY_EMBEDS_UNUSED_WEIGHTS @@ -149,8 +151,9 @@ def _can_skip(self, qualname: str) -> bool: or any(substr in qualname for substr in self.skip_substrs)) def _can_ignore_unexpected(self, qualname: str) -> bool: - return any( - qualname.startswith(p) for p in self.ignore_unexpected_prefixes) + iup = (qualname.startswith(p) for p in self.ignore_unexpected_prefixes) + ius = (qualname.endswith(s) for s in self.ignore_unexpected_suffixes) + return any(iup) or any(ius) def _load_param( self, From f9df8b4ad77a933659c93cb1b923c8e09d76ea3a Mon Sep 17 00:00:00 2001 From: Tyler Michael Smith Date: Sat, 27 Sep 2025 10:13:11 -0400 Subject: [PATCH 452/518] [Bugfix] Fix triton import precommit failure (#25803) Signed-off-by: Tyler Michael Smith --- vllm/model_executor/layers/batch_invariant.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm/model_executor/layers/batch_invariant.py b/vllm/model_executor/layers/batch_invariant.py index ae2c842af698..c025d509d862 100644 --- a/vllm/model_executor/layers/batch_invariant.py +++ b/vllm/model_executor/layers/batch_invariant.py @@ -7,8 +7,8 @@ from typing import Any, Union import torch -import triton -import triton.language as tl + +from vllm.triton_utils import tl, triton def _matmul_launch_metadata(grid: Callable[..., Any], kernel: Any, From a5354b3ed24723ac6e351896cc11e16dcee0b701 Mon Sep 17 00:00:00 2001 From: Tyler Michael Smith Date: Sat, 27 Sep 2025 10:22:28 -0400 Subject: [PATCH 453/518] [Bugfix][WideEP] Apply TP Attn + EP MoE fix to other models (#24982) Signed-off-by: Tyler Michael Smith --- vllm/config/parallel.py | 18 +++ .../device_communicators/all2all.py | 122 ++++++++++++------ .../base_device_communicator.py | 28 ++-- .../device_communicators/cuda_communicator.py | 22 ++-- .../device_communicators/xpu_communicator.py | 16 ++- vllm/distributed/parallel_state.py | 17 ++- vllm/forward_context.py | 97 +++++++++----- vllm/model_executor/layers/fused_moe/layer.py | 119 +++++++++-------- vllm/model_executor/models/aria.py | 16 +-- vllm/model_executor/models/deepseek_v2.py | 58 +-------- vllm/model_executor/models/ernie_mtp.py | 16 +-- vllm/model_executor/models/glm4.py | 16 ++- vllm/model_executor/models/gpt_oss.py | 42 ++++-- vllm/model_executor/models/granitemoe.py | 38 ++++-- vllm/model_executor/models/llama.py | 25 ++-- vllm/model_executor/models/llama4.py | 44 ++++--- vllm/model_executor/models/llama4_eagle.py | 4 +- vllm/model_executor/models/llama_eagle.py | 8 +- vllm/model_executor/models/llama_eagle3.py | 27 ++-- vllm/model_executor/models/qwen3_moe.py | 60 +++++---- vllm/model_executor/models/qwen3_next.py | 70 +++++----- vllm/model_executor/models/qwen3_next_mtp.py | 6 +- vllm/model_executor/models/utils.py | 48 ++++++- 23 files changed, 541 insertions(+), 376 deletions(-) diff --git a/vllm/config/parallel.py b/vllm/config/parallel.py index f80eb1adc7fd..8b980458ddaf 100644 --- a/vllm/config/parallel.py +++ b/vllm/config/parallel.py @@ -279,6 +279,24 @@ def stateless_init_dp_group(self) -> ProcessGroup: assert last_exc is not None raise last_exc + # The all_reduce at the end of attention (during o_proj) means that + # inputs are replicated across each rank of the tensor parallel group. + # If using expert-parallelism with DeepEP All2All ops, replicated + # tokens results in useless duplicate computation and communication. + # + # In this case, ensure the input to the experts is sequence parallel + # to avoid the excess work. + # + # Not needed for pplx-kernels as it can handle duplicate input tokens. + @property + def use_sequence_parallel_moe(self) -> bool: + return (envs.VLLM_ALL2ALL_BACKEND + in ("allgather_reducescatter", "naive", + "deepep_high_throughput", "deepep_low_latency") + and self.enable_expert_parallel + and self.tensor_parallel_size > 1 + and self.data_parallel_size > 1) + @staticmethod def has_unfinished_dp(dp_group: ProcessGroup, has_unfinished: bool) -> bool: diff --git a/vllm/distributed/device_communicators/all2all.py b/vllm/distributed/device_communicators/all2all.py index 661ed939608a..bb3fd657facd 100644 --- a/vllm/distributed/device_communicators/all2all.py +++ b/vllm/distributed/device_communicators/all2all.py @@ -6,7 +6,7 @@ import torch.distributed as dist import vllm.envs as envs -from vllm.distributed import get_dp_group +from vllm.distributed import get_dp_group, get_ep_group from vllm.forward_context import get_forward_context from vllm.logger import init_logger from vllm.utils import has_deep_ep, has_pplx @@ -34,41 +34,60 @@ def __init__(self, cpu_group): super().__init__(cpu_group) def naive_multicast(self, x: torch.Tensor, - cu_tokens_across_dp_cpu: torch.Tensor): + cu_tokens_across_sp_cpu: torch.Tensor, + is_sequence_parallel: bool) -> torch.Tensor: assert (len(x.shape) == 2) - buffer = torch.empty((cu_tokens_across_dp_cpu[-1], x.size(1)), + buffer = torch.empty((cu_tokens_across_sp_cpu[-1], x.size(1)), device=x.device, dtype=x.dtype) - start = 0 if self.dp_rank == 0 else cu_tokens_across_dp_cpu[ - self.dp_rank - 1] - end = cu_tokens_across_dp_cpu[self.dp_rank] + rank = self.rank if is_sequence_parallel else self.dp_rank + world_size = (self.world_size + if is_sequence_parallel else self.dp_world_size) + + start = 0 if rank == 0 else cu_tokens_across_sp_cpu[rank - 1] + end = cu_tokens_across_sp_cpu[rank] buffer[start:end, :].copy_(x) - for idx in range(self.dp_world_size): - start = 0 if idx == 0 else cu_tokens_across_dp_cpu[idx - 1] - end = cu_tokens_across_dp_cpu[idx] - self.dp_group.broadcast(buffer[start:end, :], idx) + for idx in range(world_size): + start = 0 if idx == 0 else cu_tokens_across_sp_cpu[idx - 1] + end = cu_tokens_across_sp_cpu[idx] + get_ep_group().broadcast(buffer[start:end, :], idx) return buffer - def dispatch(self, hidden_states: torch.Tensor, - router_logits: torch.Tensor): - sizes = get_forward_context( - ).dp_metadata.get_chunk_sizes_across_dp_rank() - hidden_states, router_logits = get_dp_group().all_gatherv( - [hidden_states, router_logits], - dim=0, - sizes=sizes, - ) - + def dispatch( + self, + hidden_states: torch.Tensor, + router_logits: torch.Tensor, + is_sequence_parallel: bool = False + ) -> tuple[torch.Tensor, torch.Tensor]: + sp_size = self.tp_group.world_size if is_sequence_parallel else 1 + dp_metadata = get_forward_context().dp_metadata + cu_tokens_across_sp_cpu = dp_metadata.cu_tokens_across_sp(sp_size) + + hidden_states = self.naive_multicast(hidden_states, + cu_tokens_across_sp_cpu, + is_sequence_parallel) + router_logits = self.naive_multicast(router_logits, + cu_tokens_across_sp_cpu, + is_sequence_parallel) return hidden_states, router_logits - def combine(self, hidden_states: torch.Tensor) -> torch.Tensor: - sizes = get_forward_context( - ).dp_metadata.get_chunk_sizes_across_dp_rank() - hidden_states = get_dp_group().reduce_scatterv(hidden_states, - dim=0, - sizes=sizes) + def combine(self, + hidden_states: torch.Tensor, + is_sequence_parallel: bool = False) -> torch.Tensor: + + ep_rank = self.rank if is_sequence_parallel else self.dp_rank + + dp_metadata = get_forward_context().dp_metadata + sp_size = self.tp_group.world_size if is_sequence_parallel else 1 + cu_tokens_across_sp_cpu = dp_metadata.cu_tokens_across_sp(sp_size) + + start = 0 if ep_rank == 0 else cu_tokens_across_sp_cpu[ep_rank - 1] + end = cu_tokens_across_sp_cpu[ep_rank] + + all_hidden_states = get_ep_group().all_reduce(hidden_states) + hidden_states = all_hidden_states[start:end, :] return hidden_states def destroy(self): @@ -84,29 +103,40 @@ class AgRsAll2AllManager(All2AllManagerBase): def __init__(self, cpu_group): super().__init__(cpu_group) - def dispatch(self, hidden_states: torch.Tensor, - router_logits: torch.Tensor): + def dispatch( + self, + hidden_states: torch.Tensor, + router_logits: torch.Tensor, + is_sequence_parallel: bool = False + ) -> tuple[torch.Tensor, torch.Tensor]: """ Gather hidden_states and router_logits from all dp ranks. """ sizes = get_forward_context( ).dp_metadata.get_chunk_sizes_across_dp_rank() - hidden_states, router_logits = get_dp_group().all_gatherv( + + dist_group = get_ep_group() if is_sequence_parallel else get_dp_group() + assert sizes[dist_group.rank_in_group] == hidden_states.shape[0] + hidden_states, router_logits = dist_group.all_gatherv( [hidden_states, router_logits], dim=0, sizes=sizes, ) return hidden_states, router_logits - def combine(self, hidden_states: torch.Tensor) -> torch.Tensor: + def combine(self, + hidden_states: torch.Tensor, + is_sequence_parallel: bool = False) -> torch.Tensor: """ Reduce-scatter hidden_states across all dp ranks. """ sizes = get_forward_context( ).dp_metadata.get_chunk_sizes_across_dp_rank() - hidden_states = get_dp_group().reduce_scatterv(hidden_states, - dim=0, - sizes=sizes) + + dist_group = get_ep_group() if is_sequence_parallel else get_dp_group() + hidden_states = dist_group.reduce_scatterv(hidden_states, + dim=0, + sizes=sizes) return hidden_states def destroy(self): @@ -148,11 +178,17 @@ def get_handle(self, kwargs): kwargs, pplx.AllToAll.internode if self.internode else pplx.AllToAll.intranode) - def dispatch(self, hidden_states: torch.Tensor, - router_logits: torch.Tensor): + def dispatch( + self, + hidden_states: torch.Tensor, + router_logits: torch.Tensor, + is_sequence_parallel: bool = False + ) -> tuple[torch.Tensor, torch.Tensor]: raise NotImplementedError - def combine(self, hidden_states: torch.Tensor) -> torch.Tensor: + def combine(self, + hidden_states: torch.Tensor, + is_sequence_parallel: bool = False) -> torch.Tensor: raise NotImplementedError def destroy(self): @@ -184,11 +220,17 @@ def __init__(self, cpu_group): def get_handle(self, kwargs): raise NotImplementedError - def dispatch(self, hidden_states: torch.Tensor, - router_logits: torch.Tensor): + def dispatch( + self, + hidden_states: torch.Tensor, + router_logits: torch.Tensor, + is_sequence_parallel: bool = False + ) -> tuple[torch.Tensor, torch.Tensor]: raise NotImplementedError - def combine(self, hidden_states: torch.Tensor) -> torch.Tensor: + def combine(self, + hidden_states: torch.Tensor, + is_sequence_parallel: bool = False) -> torch.Tensor: raise NotImplementedError def destroy(self): @@ -395,4 +437,4 @@ def cleanup(self): self.workspace_tensor = None self.prepare_workspace_tensor = None self.mapping = None - self.initialized = False \ No newline at end of file + self.initialized = False diff --git a/vllm/distributed/device_communicators/base_device_communicator.py b/vllm/distributed/device_communicators/base_device_communicator.py index 586441c91783..a42081fb0c15 100644 --- a/vllm/distributed/device_communicators/base_device_communicator.py +++ b/vllm/distributed/device_communicators/base_device_communicator.py @@ -28,6 +28,8 @@ def get_or_create(self, kwargs, func): class All2AllManagerBase: + rank: int + world_size: int def __init__(self, cpu_group): self.cpu_group = cpu_group @@ -40,6 +42,7 @@ def __init__(self, cpu_group): # all2all lives in ep group, which is merged from dp and tp group self.dp_group = get_dp_group() self.tp_group = get_tp_group() + # no self.ep_group since self.ep_group is still in construction # when we create this object self.dp_rank = self.dp_group.rank_in_group @@ -60,17 +63,21 @@ def get_handle(self, kwargs): # and reuse it for the same config. raise NotImplementedError + def dispatch(self, + hidden_states: torch.Tensor, + router_logits: torch.Tensor, + is_sequence_parallel: bool = False): + raise NotImplementedError + def set_num_sms(self, num_sms: int): pass def max_sms_used(self) -> Optional[int]: return None # None means it could use the whole GPU - def dispatch(self, hidden_states: torch.Tensor, - router_logits: torch.Tensor): - raise NotImplementedError - - def combine(self, hidden_states: torch.Tensor) -> torch.Tensor: + def combine(self, + hidden_states: torch.Tensor, + is_sequence_parallel: bool = False): raise NotImplementedError def destroy(self): @@ -267,15 +274,20 @@ def prepare_communication_buffer_for_model(self, module.quant_method.init_prepare_finalize(module) def dispatch( - self, hidden_states: torch.Tensor, - router_logits: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]: + self, + hidden_states: torch.Tensor, + router_logits: torch.Tensor, + is_sequence_parallel: bool = False + ) -> tuple[torch.Tensor, torch.Tensor]: """ Dispatch the hidden states and router logits to the appropriate device. This is a no-op in the base class. """ return hidden_states, router_logits - def combine(self, hidden_states: torch.Tensor) -> torch.Tensor: + def combine(self, + hidden_states: torch.Tensor, + is_sequence_parallel: bool = False) -> torch.Tensor: """ Combine the hidden states and router logits from the appropriate device. This is a no-op in the base class. diff --git a/vllm/distributed/device_communicators/cuda_communicator.py b/vllm/distributed/device_communicators/cuda_communicator.py index bab372b722db..30d1bf10138b 100644 --- a/vllm/distributed/device_communicators/cuda_communicator.py +++ b/vllm/distributed/device_communicators/cuda_communicator.py @@ -39,10 +39,6 @@ def __init__(self, use_custom_allreduce = _ENABLE_CUSTOM_ALL_REDUCE use_torch_symm_mem = envs.VLLM_ALLREDUCE_USE_SYMM_MEM - # ep does not use pynccl - use_pynccl = "ep" not in unique_name - - self.use_pynccl = use_pynccl self.use_custom_allreduce = use_custom_allreduce self.use_torch_symm_mem = use_torch_symm_mem @@ -57,7 +53,7 @@ def __init__(self, SymmMemCommunicator) self.pynccl_comm: Optional[PyNcclCommunicator] = None - if use_pynccl and self.world_size > 1: + if self.world_size > 1: self.pynccl_comm = PyNcclCommunicator( group=self.cpu_group, device=self.device, @@ -308,14 +304,20 @@ def _all_gather_single(input_: torch.Tensor, return output_list def dispatch( - self, hidden_states: torch.Tensor, - router_logits: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]: + self, + hidden_states: torch.Tensor, + router_logits: torch.Tensor, + is_sequence_parallel: bool = False + ) -> tuple[torch.Tensor, torch.Tensor]: assert self.all2all_manager is not None hidden_states, router_logits = self.all2all_manager.dispatch( - hidden_states, router_logits) + hidden_states, router_logits, is_sequence_parallel) return hidden_states, router_logits - def combine(self, hidden_states: torch.Tensor) -> torch.Tensor: + def combine(self, + hidden_states: torch.Tensor, + is_sequence_parallel: bool = False) -> torch.Tensor: assert self.all2all_manager is not None - hidden_states = self.all2all_manager.combine(hidden_states) + hidden_states = self.all2all_manager.combine(hidden_states, + is_sequence_parallel) return hidden_states diff --git a/vllm/distributed/device_communicators/xpu_communicator.py b/vllm/distributed/device_communicators/xpu_communicator.py index b236bae261e0..27bd176554af 100644 --- a/vllm/distributed/device_communicators/xpu_communicator.py +++ b/vllm/distributed/device_communicators/xpu_communicator.py @@ -75,14 +75,20 @@ def broadcast(self, input_: torch.Tensor, src: int = 0) -> None: dist.broadcast(input_, src=src, group=self.device_group) def dispatch( - self, hidden_states: torch.Tensor, - router_logits: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]: + self, + hidden_states: torch.Tensor, + router_logits: torch.Tensor, + is_sequence_parallel: bool = False + ) -> tuple[torch.Tensor, torch.Tensor]: assert self.all2all_manager is not None hidden_states, router_logits = self.all2all_manager.dispatch( - hidden_states, router_logits) + hidden_states, router_logits, is_sequence_parallel) return hidden_states, router_logits - def combine(self, hidden_states: torch.Tensor) -> torch.Tensor: + def combine(self, + hidden_states: torch.Tensor, + is_sequence_parallel: bool = False) -> torch.Tensor: assert self.all2all_manager is not None - hidden_states = self.all2all_manager.combine(hidden_states) + hidden_states = self.all2all_manager.combine(hidden_states, + is_sequence_parallel) return hidden_states diff --git a/vllm/distributed/parallel_state.py b/vllm/distributed/parallel_state.py index 69f98eb54f36..638170963e2b 100644 --- a/vllm/distributed/parallel_state.py +++ b/vllm/distributed/parallel_state.py @@ -871,17 +871,24 @@ def prepare_communication_buffer_for_model(self, model: torch.nn.Module): model) def dispatch( - self, hidden_states: torch.Tensor, - router_logits: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]: + self, + hidden_states: torch.Tensor, + router_logits: torch.Tensor, + is_sequence_parallel: bool = False + ) -> tuple[torch.Tensor, torch.Tensor]: if self.device_communicator is not None: return self.device_communicator.dispatch(hidden_states, - router_logits) + router_logits, + is_sequence_parallel) else: return hidden_states, router_logits - def combine(self, hidden_states) -> torch.Tensor: + def combine(self, + hidden_states, + is_sequence_parallel: bool = False) -> torch.Tensor: if self.device_communicator is not None: - return self.device_communicator.combine(hidden_states) + return self.device_communicator.combine(hidden_states, + is_sequence_parallel) else: return hidden_states diff --git a/vllm/forward_context.py b/vllm/forward_context.py index 2bf4e1804521..09defade00dc 100644 --- a/vllm/forward_context.py +++ b/vllm/forward_context.py @@ -49,16 +49,29 @@ def non_uniform(self) -> "BatchDescriptor": return BatchDescriptor(self.num_tokens, uniform_decode=False) -def _compute_chunked_local_num_tokens(num_tokens_across_dp_cpu: list[int], +def _compute_sp_num_tokens(num_tokens_across_dp_cpu: torch.Tensor, + sequence_parallel_size: int) -> list[int]: + sp_tokens = ((num_tokens_across_dp_cpu + sequence_parallel_size - 1) // + sequence_parallel_size) + + sp_tokens = sp_tokens.repeat_interleave(sequence_parallel_size) + return sp_tokens.tolist() + + +def _compute_chunked_local_num_tokens(num_tokens_across_dp_cpu: torch.Tensor, + sequence_parallel_size: int, max_num_tokens: int, chunk_idx: int) -> list[int]: - dp_size = len(num_tokens_across_dp_cpu) - local_size = [-1] * dp_size - for i in range(dp_size): - dp_tokens = num_tokens_across_dp_cpu[i] + sp_tokens = _compute_sp_num_tokens(num_tokens_across_dp_cpu, + sequence_parallel_size) + sp_size = len(sp_tokens) + + local_size = [-1] * sp_size + for i in range(sp_size): + # Take into account sharding if MoE activation is sequence parallel. local_size[i] = min(max_num_tokens, - dp_tokens - (max_num_tokens * chunk_idx)) + sp_tokens[i] - (max_num_tokens * chunk_idx)) if local_size[i] <= 0: local_size[i] = 1 # ensure lockstep even if done return local_size @@ -67,7 +80,9 @@ def _compute_chunked_local_num_tokens(num_tokens_across_dp_cpu: list[int], @dataclass class DPMetadata: max_tokens_across_dp_cpu: torch.Tensor - cu_tokens_across_dp_cpu: torch.Tensor + num_tokens_across_dp_cpu: torch.Tensor + + # NOTE: local_sizes should only be set by the chunked_sizes context manager local_sizes: Optional[list[int]] = None @staticmethod @@ -98,6 +113,17 @@ def num_tokens_across_dp(num_tokens: int, dp_size: int, dist.all_reduce(num_tokens_tensor, group=group) return num_tokens_tensor.cpu() + # Get the cumulative tokens across sequence parallel ranks. + # In this case the input to the MoEs will be distributed w.r.t both + # DP and TP rank. + # When sp_size==1, this is just the cummulative num tokens across DP. + def cu_tokens_across_sp(self, sp_size: int) -> torch.Tensor: + num_tokens_across_sp_cpu = ( + (self.num_tokens_across_dp_cpu - 1 + sp_size) // sp_size) + num_tokens_across_sp_cpu = ( + num_tokens_across_sp_cpu.repeat_interleave(sp_size)) + return torch.cumsum(num_tokens_across_sp_cpu, dim=0) + @staticmethod def should_ubatch_across_dp( should_ubatch: bool, orig_num_tokens_per_ubatch: int, @@ -147,10 +173,10 @@ def should_ubatch_across_dp( @staticmethod def make( - parallel_config: ParallelConfig, - attn_metadata: Any, - num_tokens: int, - num_tokens_across_dp: Optional[torch.Tensor] = None + parallel_config: ParallelConfig, + attn_metadata: Any, + num_tokens: int, + num_tokens_across_dp_cpu: Optional[torch.Tensor] = None ) -> "DPMetadata": assert parallel_config.data_parallel_size > 1 @@ -167,18 +193,18 @@ def make( # If num_tokens_across_dp is None, it will be computed by all_reduce # Otherwise, num_tokens_across_dp[dp_rank] should be equal to batchsize - assert (num_tokens_across_dp is None or num_tokens_across_dp[dp_rank] - == batchsize), f"{num_tokens_across_dp[dp_rank]} {batchsize}" - if num_tokens_across_dp is None: - num_tokens_across_dp = DPMetadata.num_tokens_across_dp( + assert (num_tokens_across_dp_cpu is None + or num_tokens_across_dp_cpu[dp_rank] == batchsize + ), f"{num_tokens_across_dp_cpu[dp_rank]} {batchsize}" + if num_tokens_across_dp_cpu is None: + num_tokens_across_dp_cpu = DPMetadata.num_tokens_across_dp( batchsize, dp_size, dp_rank) - max_tokens_across_dp_cpu = torch.max(num_tokens_across_dp) - cu_tokens_across_dp_cpu = torch.cumsum(num_tokens_across_dp, dim=0) - return DPMetadata(max_tokens_across_dp_cpu, cu_tokens_across_dp_cpu, - num_tokens_across_dp) + max_tokens_across_dp_cpu = torch.max(num_tokens_across_dp_cpu) + return DPMetadata(max_tokens_across_dp_cpu, num_tokens_across_dp_cpu) @contextmanager - def chunked_sizes(self, max_chunk_size_per_rank: int, chunk_idx: int): + def chunked_sizes(self, sequence_parallel_size: int, + max_chunk_size_per_rank: int, chunk_idx: int): """ Context manager to compute and temporarily set the per-rank local token sizes for a specific chunk during chunked forward execution. @@ -192,31 +218,40 @@ def chunked_sizes(self, max_chunk_size_per_rank: int, chunk_idx: int): `chunk_idx`, this context manager sets `self.local_sizes` to the number of tokens to process in that chunk on each rank. - It uses cumulative sizes (`cu_tokens_across_dp_cpu`) to derive the - number of tokens per rank, and calls `_compute_chunked_local_num_tokens` - to determine the chunk-wise split. - `self.local_sizes` is only valid inside the context. Args: + sequence_parallel_size: When Attn is TP and MoE layers are EP, + we use SP between the layers to avoid + redundant ops. We need this value to + compute the chunked sizes. max_chunk_size_per_rank: The max number of tokens each rank is allowed to process in this chunk. chunk_idx: The index of the chunk to compute sizes for. """ - cu_sizes = self.cu_tokens_across_dp_cpu - num_tokens_across_dp_cpu = [ - (cu_sizes[i] - - cu_sizes[i - 1]).item() if i > 0 else cu_sizes[0].item() - for i in range(len(cu_sizes)) - ] self.local_sizes = _compute_chunked_local_num_tokens( - num_tokens_across_dp_cpu, max_chunk_size_per_rank, chunk_idx) + self.num_tokens_across_dp_cpu, sequence_parallel_size, + max_chunk_size_per_rank, chunk_idx) + try: + yield self.local_sizes + finally: + self.local_sizes = None + + @contextmanager + def sp_local_sizes(self, sequence_parallel_size: int): + """ + Context mamager for setting self.local_sizes. Same as self.chunked_sizes + but without any chunking. + """ + self.local_sizes = _compute_sp_num_tokens( + self.num_tokens_across_dp_cpu, sequence_parallel_size) try: yield self.local_sizes finally: self.local_sizes = None def get_chunk_sizes_across_dp_rank(self) -> Optional[list[int]]: + assert self.local_sizes is not None return self.local_sizes diff --git a/vllm/model_executor/layers/fused_moe/layer.py b/vllm/model_executor/layers/fused_moe/layer.py index eccae8b2a7af..8de1d14d46b3 100644 --- a/vllm/model_executor/layers/fused_moe/layer.py +++ b/vllm/model_executor/layers/fused_moe/layer.py @@ -3,6 +3,7 @@ from abc import abstractmethod from collections.abc import Iterable +from contextlib import nullcontext from enum import Enum from typing import Callable, Literal, Optional, Union, get_args, overload @@ -983,8 +984,7 @@ def __init__( if dp_size is not None else get_dp_group().world_size) self.is_sequence_parallel = is_sequence_parallel - if self.is_sequence_parallel: - self.sp_size = tp_size_ + self.sp_size = tp_size_ if is_sequence_parallel else 1 self.moe_parallel_config: FusedMoEParallelConfig = ( FusedMoEParallelConfig.make( @@ -1966,7 +1966,8 @@ def process_chunk(chunk_start, chunk_end, skip_result_store=False): # clamp start and end chunk_start = min(chunk_start, num_tokens - 1) chunk_end = min(chunk_end, num_tokens) - with ctx.dp_metadata.chunked_sizes(moe_dp_chunk_size_per_rank, + with ctx.dp_metadata.chunked_sizes(self.sp_size, + moe_dp_chunk_size_per_rank, chunk_idx): process_chunk(chunk_start, chunk_end, @@ -2011,65 +2012,73 @@ def forward_impl( else: shared_output = None - if do_naive_dispatch_combine: - hidden_states, router_logits = get_ep_group().dispatch( - hidden_states, router_logits) + ctx = get_forward_context() + sp_ctx = ctx.dp_metadata.sp_local_sizes( + self.sp_size) if ctx.dp_metadata else nullcontext() - # Matrix multiply. - final_hidden_states = self.quant_method.apply( - layer=self, - x=hidden_states, - router_logits=router_logits, - top_k=self.top_k, - renormalize=self.renormalize, - use_grouped_topk=self.use_grouped_topk, - global_num_experts=self.global_num_experts, - expert_map=self.expert_map, - topk_group=self.topk_group, - num_expert_group=self.num_expert_group, - custom_routing_function=self.custom_routing_function, - scoring_func=self.scoring_func, - routed_scaling_factor=self.routed_scaling_factor, - e_score_correction_bias=self.e_score_correction_bias, - activation=self.activation, - apply_router_weight_on_input=self.apply_router_weight_on_input, - enable_eplb=self.enable_eplb, - expert_load_view=self.expert_load_view, - logical_to_physical_map=self.logical_to_physical_map, - logical_replica_count=self.logical_replica_count, - ) + with sp_ctx: + if do_naive_dispatch_combine: + hidden_states, router_logits = get_ep_group().dispatch( + hidden_states, router_logits, self.is_sequence_parallel) - if shared_output is not None: - assert not isinstance(final_hidden_states, tuple) - assert self.shared_experts is not None - final_hidden_states = ( - shared_output, - final_hidden_states, + # Matrix multiply. + final_hidden_states = self.quant_method.apply( + layer=self, + x=hidden_states, + router_logits=router_logits, + top_k=self.top_k, + renormalize=self.renormalize, + use_grouped_topk=self.use_grouped_topk, + global_num_experts=self.global_num_experts, + expert_map=self.expert_map, + topk_group=self.topk_group, + num_expert_group=self.num_expert_group, + custom_routing_function=self.custom_routing_function, + scoring_func=self.scoring_func, + routed_scaling_factor=self.routed_scaling_factor, + e_score_correction_bias=self.e_score_correction_bias, + activation=self.activation, + apply_router_weight_on_input=self.apply_router_weight_on_input, + enable_eplb=self.enable_eplb, + expert_load_view=self.expert_load_view, + logical_to_physical_map=self.logical_to_physical_map, + logical_replica_count=self.logical_replica_count, ) - elif self.zero_expert_num is not None and self.zero_expert_num > 0: - assert isinstance(final_hidden_states, tuple) - final_hidden_states, zero_expert_result = final_hidden_states - def reduce_output(states: torch.Tensor, - do_combine: bool = True) -> torch.Tensor: - if do_naive_dispatch_combine and do_combine: - states = get_ep_group().combine(states) + if shared_output is not None: + assert not isinstance(final_hidden_states, tuple) + assert self.shared_experts is not None + final_hidden_states = ( + shared_output, + final_hidden_states, + ) + elif self.zero_expert_num is not None and self.zero_expert_num > 0: + assert isinstance(final_hidden_states, tuple) + final_hidden_states, zero_expert_result = final_hidden_states + + def reduce_output(states: torch.Tensor, + do_combine: bool = True) -> torch.Tensor: + if do_naive_dispatch_combine and do_combine: + states = get_ep_group().combine(states, + self.is_sequence_parallel) - if self.reduce_results and (self.tp_size > 1 or self.ep_size > 1): - states = self.maybe_all_reduce_tensor_model_parallel(states) + if (not self.is_sequence_parallel and self.reduce_results + and (self.tp_size > 1 or self.ep_size > 1)): + states = self.maybe_all_reduce_tensor_model_parallel( + states) - return states + return states - if self.shared_experts is not None: - return ( - reduce_output(final_hidden_states[0], do_combine=False), - reduce_output(final_hidden_states[1]), - ) - elif self.zero_expert_num is not None and self.zero_expert_num > 0: - assert isinstance(final_hidden_states, torch.Tensor) - return reduce_output(final_hidden_states) + zero_expert_result - else: - return reduce_output(final_hidden_states) + if self.shared_experts is not None: + return ( + reduce_output(final_hidden_states[0], do_combine=False), + reduce_output(final_hidden_states[1]), + ) + elif self.zero_expert_num is not None and self.zero_expert_num > 0: + assert isinstance(final_hidden_states, torch.Tensor) + return reduce_output(final_hidden_states) + zero_expert_result + else: + return reduce_output(final_hidden_states) @classmethod def make_expert_params_mapping( diff --git a/vllm/model_executor/models/aria.py b/vllm/model_executor/models/aria.py index 6cef5e134a4b..e0d7af0b1c3e 100644 --- a/vllm/model_executor/models/aria.py +++ b/vllm/model_executor/models/aria.py @@ -9,7 +9,7 @@ from transformers.models.aria.modeling_aria import AriaCrossAttention from transformers.models.aria.processing_aria import AriaProcessor -from vllm.config import CacheConfig, QuantizationConfig, VllmConfig +from vllm.config import QuantizationConfig, VllmConfig from vllm.distributed import get_tensor_model_parallel_rank from vllm.model_executor.layers.activation import get_act_fn from vllm.model_executor.layers.fused_moe import FusedMoE @@ -297,14 +297,12 @@ class AriaTextDecoderLayer(LlamaDecoderLayer): Experts (MoE) Layer. """ - def __init__( - self, - config: AriaTextConfig, - cache_config: Optional[CacheConfig] = None, - quant_config: Optional[QuantizationConfig] = None, - prefix: str = "", - ) -> None: - super().__init__(config, cache_config, quant_config, prefix) + def __init__(self, vllm_config: VllmConfig, prefix: str = "") -> None: + super().__init__(vllm_config, prefix) + + config = vllm_config.model_config.hf_config + quant_config = vllm_config.quant_config + self.mlp = AriaTextMoELayer(config, quant_config=quant_config, prefix=f"{prefix}.mlp") diff --git a/vllm/model_executor/models/deepseek_v2.py b/vllm/model_executor/models/deepseek_v2.py index aab522390a7a..2e0bcbe5d2e5 100644 --- a/vllm/model_executor/models/deepseek_v2.py +++ b/vllm/model_executor/models/deepseek_v2.py @@ -32,7 +32,6 @@ from torch import nn from transformers import DeepseekV2Config, DeepseekV3Config -import vllm.envs as envs from vllm.attention import Attention from vllm.compilation.decorators import support_torch_compile from vllm.config import CacheConfig, ParallelConfig, VllmConfig @@ -56,8 +55,8 @@ ParallelLMHead, VocabParallelEmbedding) from vllm.model_executor.model_loader.weight_utils import ( default_weight_loader, maybe_remap_kv_scale_name) +from vllm.model_executor.models.utils import sequence_parallel_chunk from vllm.sequence import IntermediateTensors -from vllm.utils import cdiv, direct_register_custom_op from .interfaces import MixtureOfExperts, SupportsLoRA, SupportsPP from .utils import (PPMissingLayer, is_pp_missing_parameter, @@ -108,43 +107,6 @@ def forward(self, x): return x -# Chunk x along the num_tokens axis for sequence parallelism -# NOTE: This is wrapped in a torch custom op to work around the following issue: -# The output tensor can have a sequence length 0 at small input sequence lengths -# even though we explicitly pad to avoid this. -def sequence_parallel_chunk(x: torch.Tensor) -> torch.Tensor: - tp_size = get_tensor_model_parallel_world_size() - tp_rank = get_tensor_model_parallel_rank() - - # all_gather needs the sequence length to be divisible by tp_size - seq_len = x.size(0) - remainder = seq_len % tp_size - if remainder != 0: - pad_len = tp_size - remainder - x = nn.functional.pad(x, (0, 0, 0, pad_len)) - - chunk = x.shape[0] // tp_size - start = tp_rank * chunk - return torch.narrow(x, 0, start, chunk) - - -def sequence_parallel_chunk_fake(x: torch.Tensor) -> torch.Tensor: - tp_size = get_tensor_model_parallel_world_size() - seq_len = cdiv(x.size(0), tp_size) - shape = list(x.shape) - shape[0] = seq_len - out = torch.empty(shape, dtype=x.dtype, device=x.device) - return out - - -direct_register_custom_op( - op_name="sequence_parallel_chunk", - op_func=sequence_parallel_chunk, - fake_impl=sequence_parallel_chunk_fake, - tags=(torch.Tag.needs_fixed_stride_order, ), -) - - class DeepseekV2MoE(nn.Module): def __init__( @@ -166,20 +128,7 @@ def __init__( self.n_routed_experts: int = config.n_routed_experts self.n_shared_experts: int = config.n_shared_experts - # The all_reduce at the end of attention (during o_proj) means that - # inputs are replicated across each rank of the tensor parallel group. - # If using expert-parallelism with DeepEP All2All ops, replicated - # tokens results in useless duplicate computation and communication. - # - # In this case, ensure the input to the experts is sequence parallel - # to avoid the excess work. - # - # Not needed for pplx-kernels as it can handle duplicate input tokens. - self.is_sequence_parallel = (envs.VLLM_ALL2ALL_BACKEND - in ("deepep_high_throughput", - "deepep_low_latency") - and parallel_config.enable_expert_parallel - and self.tp_size > 1) + self.is_sequence_parallel = parallel_config.use_sequence_parallel_moe if config.hidden_act != "silu": raise ValueError(f"Unsupported activation: {config.hidden_act}. " @@ -278,8 +227,7 @@ def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: # TODO: We can replace the all_reduce at the end of attn with a # reduce_scatter instead of chunking here. if self.is_sequence_parallel: - hidden_states = torch.ops.vllm.sequence_parallel_chunk( - hidden_states) + hidden_states = sequence_parallel_chunk(hidden_states) # router_logits: (num_tokens, n_experts) router_logits, _ = self.gate(hidden_states) diff --git a/vllm/model_executor/models/ernie_mtp.py b/vllm/model_executor/models/ernie_mtp.py index 3b24bf2f1ef8..2e6ef2d476a6 100644 --- a/vllm/model_executor/models/ernie_mtp.py +++ b/vllm/model_executor/models/ernie_mtp.py @@ -29,10 +29,9 @@ import torch.nn as nn from transformers import PretrainedConfig -from vllm.config import CacheConfig, ModelConfig, VllmConfig +from vllm.config import VllmConfig from vllm.model_executor.layers.layernorm import RMSNorm from vllm.model_executor.layers.logits_processor import LogitsProcessor -from vllm.model_executor.layers.quantization import QuantizationConfig from vllm.model_executor.layers.vocab_parallel_embedding import ( ParallelLMHead, VocabParallelEmbedding) from vllm.model_executor.model_loader.weight_utils import default_weight_loader @@ -47,13 +46,11 @@ class ErnieMultiTokenPredictorLayer(nn.Module): def __init__( self, - config: PretrainedConfig, + vllm_config: VllmConfig, prefix: str, - model_config: ModelConfig, - cache_config: Optional[CacheConfig] = None, - quant_config: Optional[QuantizationConfig] = None, ) -> None: super().__init__() + config = vllm_config.model_config.hf_config self.mtp_emb_norm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps) @@ -62,8 +59,7 @@ def __init__( self.mtp_linear_proj = nn.Linear(config.hidden_size * 2, config.hidden_size, bias=False) - self.mtp_block = LlamaDecoderLayer(config, cache_config, quant_config, - prefix) + self.mtp_block = LlamaDecoderLayer(vllm_config, prefix) def forward( self, @@ -102,10 +98,8 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): self.layers = torch.nn.ModuleDict({ str(idx): ErnieMultiTokenPredictorLayer( - config, + vllm_config, f"{prefix}.layers.{idx}", - model_config=vllm_config.model_config, - cache_config=vllm_config.cache_config, ) for idx in range(self.mtp_start_layer_idx, self.mtp_start_layer_idx + self.num_mtp_layers) diff --git a/vllm/model_executor/models/glm4.py b/vllm/model_executor/models/glm4.py index b9d5e24e9f6f..f49f21a40f82 100644 --- a/vllm/model_executor/models/glm4.py +++ b/vllm/model_executor/models/glm4.py @@ -136,14 +136,16 @@ def forward( class Glm4DecoderLayer(nn.Module): - def __init__( - self, - config: Glm4Config, - cache_config: Optional[CacheConfig] = None, - quant_config: Optional[QuantizationConfig] = None, - prefix: str = "", - ) -> None: + def __init__(self, + vllm_config: VllmConfig, + prefix: str = "", + config: Optional[Glm4Config] = None) -> None: super().__init__() + + config = config or vllm_config.model_config.hf_config + cache_config = vllm_config.cache_config + quant_config = vllm_config.quant_config + self.hidden_size = config.hidden_size rope_theta = getattr(config, "rope_theta", 1000000) rope_scaling = getattr(config, "rope_scaling", None) diff --git a/vllm/model_executor/models/gpt_oss.py b/vllm/model_executor/models/gpt_oss.py index 7c755a00e1c9..47ba5084d608 100644 --- a/vllm/model_executor/models/gpt_oss.py +++ b/vllm/model_executor/models/gpt_oss.py @@ -13,7 +13,8 @@ from vllm.config import CacheConfig, VllmConfig from vllm.distributed import (get_ep_group, get_pp_group, get_tensor_model_parallel_rank, - get_tensor_model_parallel_world_size) + get_tensor_model_parallel_world_size, + tensor_model_parallel_all_gather) from vllm.model_executor.layers.fused_moe import FusedMoE from vllm.model_executor.layers.layernorm import RMSNorm from vllm.model_executor.layers.linear import (QKVParallelLinear, @@ -24,6 +25,7 @@ from vllm.model_executor.layers.vocab_parallel_embedding import ( ParallelLMHead, VocabParallelEmbedding) from vllm.model_executor.model_loader.weight_utils import default_weight_loader +from vllm.model_executor.models.utils import sequence_parallel_chunk from vllm.sequence import IntermediateTensors from vllm.utils import cdiv @@ -132,12 +134,18 @@ class MLPBlock(torch.nn.Module): def __init__( self, - config: GptOssConfig, + vllm_config: VllmConfig, layer_idx: int, - quant_config: QuantizationConfig, prefix: str = "", ): super().__init__() + + config = vllm_config.model_config.hf_config + quant_config = vllm_config.quant_config + parallel_config = vllm_config.parallel_config + + self.is_sequence_parallel = parallel_config.use_sequence_parallel_moe + self.layer_idx = layer_idx self.num_experts = config.num_local_experts self.experts_per_token = config.num_experts_per_tok @@ -155,11 +163,20 @@ def __init__( prefix=f"{prefix}.experts", apply_router_weight_on_input=False, has_bias=True, - activation="swigluoai") + activation="swigluoai", + is_sequence_parallel=self.is_sequence_parallel) def forward(self, x: torch.Tensor) -> torch.Tensor: + num_tokens = x.shape[0] + if self.is_sequence_parallel: + x = sequence_parallel_chunk(x) + g = self.router(x) x = self.experts(hidden_states=x, router_logits=g) + + if self.is_sequence_parallel: + x = tensor_model_parallel_all_gather(x.contiguous(), 0) + x = x[:num_tokens] return x @@ -167,19 +184,20 @@ class TransformerBlock(torch.nn.Module): def __init__( self, - config: GptOssConfig, - cache_config: CacheConfig, - quant_config: QuantizationConfig, + vllm_config: VllmConfig, prefix: str = "", ): super().__init__() + + config = vllm_config.model_config.hf_config + cache_config = vllm_config.cache_config + self.layer_idx = extract_layer_index(prefix) self.attn = OAIAttention(config, prefix=f"{prefix}.attn", cache_config=cache_config) - self.mlp = MLPBlock(config, + self.mlp = MLPBlock(vllm_config, self.layer_idx, - quant_config=quant_config, prefix=f"{prefix}.mlp") self.input_layernorm = RMSNorm(config.hidden_size, eps=1e-5) self.post_attention_layernorm = RMSNorm(config.hidden_size, eps=1e-5) @@ -216,8 +234,6 @@ def __init__( ): super().__init__() self.config = vllm_config.model_config.hf_config - self.cache_config = vllm_config.cache_config - self.quant_config = vllm_config.quant_config self.parallel_config = vllm_config.parallel_config self.config.hidden_size = self.config.hidden_size self.embedding = VocabParallelEmbedding( @@ -227,9 +243,7 @@ def __init__( self.start_layer, self.end_layer, self.layers = make_layers( self.config.num_hidden_layers, lambda prefix: TransformerBlock( - self.config, - cache_config=self.cache_config, - quant_config=self.quant_config, + vllm_config, prefix=prefix, ), prefix=f"{prefix}.layers", diff --git a/vllm/model_executor/models/granitemoe.py b/vllm/model_executor/models/granitemoe.py index 47ac22c4aeaa..76a5745a4f51 100644 --- a/vllm/model_executor/models/granitemoe.py +++ b/vllm/model_executor/models/granitemoe.py @@ -29,12 +29,13 @@ import torch from torch import nn -from transformers.models.granitemoe import GraniteMoeConfig from vllm.attention import Attention from vllm.compilation.decorators import support_torch_compile from vllm.config import CacheConfig, VllmConfig -from vllm.distributed import get_pp_group, get_tensor_model_parallel_world_size +from vllm.distributed import (get_pp_group, + get_tensor_model_parallel_world_size, + tensor_model_parallel_all_gather) from vllm.model_executor.layers.fused_moe import FusedMoE from vllm.model_executor.layers.layernorm import RMSNorm from vllm.model_executor.layers.linear import (QKVParallelLinear, @@ -48,6 +49,7 @@ DEFAULT_VOCAB_PADDING_SIZE, ParallelLMHead, VocabParallelEmbedding) from vllm.model_executor.model_loader.weight_utils import ( default_weight_loader, maybe_remap_kv_scale_name) +from vllm.model_executor.models.utils import sequence_parallel_chunk from vllm.sequence import IntermediateTensors from .interfaces import SupportsLoRA, SupportsPP @@ -71,9 +73,11 @@ def __init__(self, params_dtype: Optional[torch.dtype] = None, quant_config: Optional[QuantizationConfig] = None, tp_size: Optional[int] = None, + is_sequence_parallel=False, prefix: str = ""): super().__init__() self.hidden_size = hidden_size + self.is_sequence_parallel = is_sequence_parallel # Gate always runs at half / full precision for now. self.gate = ReplicatedLinear(hidden_size, @@ -92,15 +96,27 @@ def __init__(self, renormalize=True, quant_config=quant_config, tp_size=tp_size, - prefix=f"{prefix}.experts") + prefix=f"{prefix}.experts", + is_sequence_parallel=self.is_sequence_parallel) def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: # NOTE: hidden_states can have either 1D or 2D shape. orig_shape = hidden_states.shape hidden_states = hidden_states.view(-1, self.hidden_size) + + if self.is_sequence_parallel: + hidden_states = sequence_parallel_chunk(hidden_states) + # router_logits: (num_tokens, n_experts) router_logits, _ = self.gate(hidden_states) final_hidden_states = self.experts(hidden_states, router_logits) + + if self.is_sequence_parallel: + final_hidden_states = tensor_model_parallel_all_gather( + final_hidden_states, 0) + num_tokens = orig_shape[0] + final_hidden_states = final_hidden_states[:num_tokens] + return final_hidden_states.view(orig_shape) @@ -191,12 +207,16 @@ class GraniteMoeDecoderLayer(nn.Module): def __init__( self, - config: GraniteMoeConfig, - cache_config: Optional[CacheConfig] = None, - quant_config: Optional[QuantizationConfig] = None, + vllm_config: VllmConfig, prefix: str = "", ) -> None: super().__init__() + + config = vllm_config.model_config.hf_config + cache_config = vllm_config.cache_config + quant_config = vllm_config.quant_config + parallel_config = vllm_config.parallel_config + self.hidden_size = config.hidden_size # Requires transformers > 4.32.0 rope_theta = getattr(config, "rope_theta", 10000) @@ -218,6 +238,7 @@ def __init__( hidden_size=config.hidden_size, intermediate_size=config.intermediate_size, quant_config=quant_config, + is_sequence_parallel=parallel_config.use_sequence_parallel_moe, prefix=f"{prefix}.block_sparse_moe") self.input_layernorm = RMSNorm(config.hidden_size, @@ -255,7 +276,6 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): super().__init__() config = vllm_config.model_config.hf_config - cache_config = vllm_config.cache_config quant_config = vllm_config.quant_config lora_config = vllm_config.lora_config @@ -275,9 +295,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): self.start_layer, self.end_layer, self.layers = make_layers( config.num_hidden_layers, - lambda prefix: GraniteMoeDecoderLayer( - config, cache_config, quant_config=quant_config, prefix=prefix - ), + lambda prefix: GraniteMoeDecoderLayer(vllm_config, prefix=prefix), prefix=f"{prefix}.layers") self.norm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps) diff --git a/vllm/model_executor/models/llama.py b/vllm/model_executor/models/llama.py index 1b03cbef501b..c7dd134ea47e 100644 --- a/vllm/model_executor/models/llama.py +++ b/vllm/model_executor/models/llama.py @@ -68,6 +68,7 @@ def __init__( bias: bool = False, prefix: str = "", reduce_results: bool = True, + disable_tp: bool = False, ) -> None: super().__init__() self.gate_up_proj = MergedColumnParallelLinear( @@ -75,6 +76,7 @@ def __init__( output_sizes=[intermediate_size] * 2, bias=bias, quant_config=quant_config, + disable_tp=disable_tp, prefix=f"{prefix}.gate_up_proj", ) self.down_proj = RowParallelLinear( @@ -83,6 +85,7 @@ def __init__( bias=bias, quant_config=quant_config, reduce_results=reduce_results, + disable_tp=disable_tp, prefix=f"{prefix}.down_proj", ) if hidden_act != "silu": @@ -237,14 +240,16 @@ def _init_rotary_emb(self, config: LlamaConfig, class LlamaDecoderLayer(nn.Module): - def __init__( - self, - config: LlamaConfig, - cache_config: Optional[CacheConfig] = None, - quant_config: Optional[QuantizationConfig] = None, - prefix: str = "", - ) -> None: + def __init__(self, + vllm_config: VllmConfig, + prefix: str = "", + config: Optional[LlamaConfig] = None) -> None: super().__init__() + + config = config or vllm_config.model_config.hf_config + cache_config = vllm_config.cache_config + quant_config = vllm_config.quant_config + self.hidden_size = config.hidden_size rope_theta = getattr(config, "rope_theta", 10000) rope_scaling = getattr(config, "rope_scaling", None) @@ -335,7 +340,6 @@ def __init__(self, super().__init__() config = vllm_config.model_config.hf_config - cache_config = vllm_config.cache_config quant_config = vllm_config.quant_config lora_config = vllm_config.lora_config @@ -357,10 +361,7 @@ def __init__(self, self.embed_tokens = PPMissingLayer() self.start_layer, self.end_layer, self.layers = make_layers( config.num_hidden_layers, - lambda prefix: layer_type(config=config, - cache_config=cache_config, - quant_config=quant_config, - prefix=prefix), + lambda prefix: layer_type(vllm_config=vllm_config, prefix=prefix), prefix=f"{prefix}.layers", ) if get_pp_group().is_last_rank: diff --git a/vllm/model_executor/models/llama4.py b/vllm/model_executor/models/llama4.py index ddd7e6a5936e..32d4f69c6bf1 100644 --- a/vllm/model_executor/models/llama4.py +++ b/vllm/model_executor/models/llama4.py @@ -28,7 +28,8 @@ from vllm.attention.layers.chunked_local_attention import ChunkedLocalAttention from vllm.compilation.decorators import support_torch_compile from vllm.config import CacheConfig, VllmConfig -from vllm.distributed import get_tensor_model_parallel_world_size +from vllm.distributed import (get_tensor_model_parallel_world_size, + tensor_model_parallel_all_gather) from vllm.model_executor.layers.fused_moe import FusedMoE from vllm.model_executor.layers.layernorm import RMSNorm from vllm.model_executor.layers.linear import (QKVParallelLinear, @@ -39,6 +40,7 @@ from vllm.model_executor.layers.shared_fused_moe import SharedFusedMoE from vllm.model_executor.model_loader.weight_utils import ( default_weight_loader, maybe_remap_kv_scale_name) +from vllm.model_executor.models.utils import sequence_parallel_chunk from .llama import LlamaForCausalLM, LlamaMLP, LlamaModel from .utils import (AutoWeightsLoader, extract_layer_index, fast_topk, @@ -59,13 +61,16 @@ def custom_routing_function( router_scores = torch.sigmoid(router_scores.float()) return (router_scores, router_indices.to(torch.int32)) - def __init__(self, - config: Llama4TextConfig, - quant_config: Optional[QuantizationConfig] = None, - prefix: str = ""): + def __init__(self, vllm_config: VllmConfig, prefix: str = ""): super().__init__() + + config = vllm_config.model_config.hf_config + parallel_config = vllm_config.parallel_config + quant_config = vllm_config.quant_config + self.tp_size = get_tensor_model_parallel_world_size() self.top_k = config.num_experts_per_tok + self.is_sequence_parallel = parallel_config.use_sequence_parallel_moe intermediate_size_moe = config.intermediate_size self.router = ReplicatedLinear(config.hidden_size, @@ -82,6 +87,7 @@ def __init__(self, bias=False, prefix=f"{prefix}.shared_expert", reduce_results=False, + disable_tp=self.is_sequence_parallel, ) self.experts = SharedFusedMoE( @@ -96,9 +102,14 @@ def __init__(self, renormalize=False, quant_config=quant_config, prefix=f"{prefix}.experts", + is_sequence_parallel=self.is_sequence_parallel, ) def forward(self, hidden_states): + num_tokens = hidden_states.shape[0] + if self.is_sequence_parallel: + hidden_states = sequence_parallel_chunk(hidden_states) + router_logits, _ = self.router(hidden_states) shared_out, routed_out = self.experts( @@ -107,7 +118,10 @@ def forward(self, hidden_states): ) experts_out = routed_out + shared_out - if self.tp_size > 1: + if self.is_sequence_parallel: + experts_out = tensor_model_parallel_all_gather(experts_out, 0) + experts_out = experts_out[:num_tokens] + elif self.tp_size > 1: experts_out = self.experts.maybe_all_reduce_tensor_model_parallel( experts_out) @@ -257,15 +271,16 @@ def forward( class Llama4DecoderLayer(nn.Module): - def __init__( - self, - config: Llama4TextConfig, - cache_config: Optional[CacheConfig] = None, - quant_config: Optional[QuantizationConfig] = None, - prefix: str = "", - ) -> None: + def __init__(self, + vllm_config: VllmConfig, + prefix: str = "", + config: Optional[Llama4TextConfig] = None) -> None: super().__init__() + config = config or vllm_config.model_config.hf_config + cache_config = vllm_config.cache_config + quant_config = vllm_config.quant_config + self.layer_idx = extract_layer_index(prefix) self.global_layer = config.no_rope_layers[self.layer_idx] == 0 self.hidden_size = config.hidden_size @@ -291,8 +306,7 @@ def __init__( self.layer_idx + 1) % config.interleave_moe_layer_step == 0 if is_moe_layer: self.feed_forward = Llama4MoE( - config=config, - quant_config=quant_config, + vllm_config=vllm_config, prefix=f"{prefix}.feed_forward", ) else: diff --git a/vllm/model_executor/models/llama4_eagle.py b/vllm/model_executor/models/llama4_eagle.py index 235275c0940a..0768edd08315 100644 --- a/vllm/model_executor/models/llama4_eagle.py +++ b/vllm/model_executor/models/llama4_eagle.py @@ -68,9 +68,9 @@ def __init__( self.layers = nn.ModuleList([ Llama4DecoderLayer( - self.config, - quant_config=quant_config, + vllm_config=vllm_config, prefix=maybe_prefix(prefix, f"layers.{i + start_layer_id}"), + config=self.config, ) for i in range(self.config.num_hidden_layers) ]) self.fc = torch.nn.Linear(self.config.hidden_size * 2, diff --git a/vllm/model_executor/models/llama_eagle.py b/vllm/model_executor/models/llama_eagle.py index d6e6fd3fcfe9..d7d6b1745fc8 100644 --- a/vllm/model_executor/models/llama_eagle.py +++ b/vllm/model_executor/models/llama_eagle.py @@ -28,11 +28,12 @@ class LlamaDecoderLayer(LlamaDecoderLayer): def __init__( self, - config: LlamaConfig, + vllm_config: VllmConfig, disable_input_layernorm: bool, prefix: str = "", + config: Optional[LlamaConfig] = None, ) -> None: - super().__init__(config, prefix=prefix) + super().__init__(vllm_config, prefix=prefix, config=config) # Skip the input_layernorm # https://github.com/SafeAILab/EAGLE/blob/35c78f6cdc19a73e05cf5c330b4c358dad970c6a/eagle/model/cnets.py#L427 @@ -64,9 +65,10 @@ def __init__( self.layers = nn.ModuleList([ LlamaDecoderLayer( - self.config, + vllm_config, i == 0, prefix=maybe_prefix(prefix, f"layers.{i + start_layer_id}"), + config=self.config, ) for i in range(self.config.num_hidden_layers) ]) self.fc = torch.nn.Linear(self.config.hidden_size * 2, diff --git a/vllm/model_executor/models/llama_eagle3.py b/vllm/model_executor/models/llama_eagle3.py index 34b8ea0ca536..7192a76c8749 100644 --- a/vllm/model_executor/models/llama_eagle3.py +++ b/vllm/model_executor/models/llama_eagle3.py @@ -8,13 +8,11 @@ import torch.nn as nn from transformers import LlamaConfig -from vllm.config import CacheConfig, VllmConfig, get_current_vllm_config +from vllm.config import VllmConfig, get_current_vllm_config from vllm.logger import init_logger from vllm.model_executor.layers.layernorm import RMSNorm from vllm.model_executor.layers.linear import QKVParallelLinear from vllm.model_executor.layers.logits_processor import LogitsProcessor -from vllm.model_executor.layers.quantization.base_config import ( - QuantizationConfig) from vllm.model_executor.layers.vocab_parallel_embedding import ( DEFAULT_VOCAB_PADDING_SIZE, ParallelLMHead, VocabParallelEmbedding) from vllm.model_executor.model_loader.weight_utils import default_weight_loader @@ -28,17 +26,14 @@ class LlamaDecoderLayer(LlamaDecoderLayer): - def __init__( - self, - config: LlamaConfig, - cache_config: Optional[CacheConfig] = None, - quant_config: Optional[QuantizationConfig] = None, - prefix: str = "", - ) -> None: - super().__init__(config, - cache_config=cache_config, - quant_config=quant_config, - prefix=prefix) + def __init__(self, + vllm_config: VllmConfig, + prefix: str = "", + config: Optional[LlamaConfig] = None) -> None: + super().__init__(vllm_config, prefix=prefix, config=config) + + config = config or vllm_config.model_config.hf_config + quant_config = vllm_config.quant_config # override qkv self.self_attn.qkv_proj = QKVParallelLinear( @@ -125,9 +120,9 @@ def __init__( self.layers = nn.ModuleList([ LlamaDecoderLayer( - config=self.config, - cache_config=current_vllm_config.cache_config, + current_vllm_config, prefix=maybe_prefix(prefix, f"layers.{start_layer_id}"), + config=self.config, ) ]) if hasattr(self.config, "target_hidden_size"): diff --git a/vllm/model_executor/models/qwen3_moe.py b/vllm/model_executor/models/qwen3_moe.py index cb2ff97a5df2..45b9c656a4bb 100644 --- a/vllm/model_executor/models/qwen3_moe.py +++ b/vllm/model_executor/models/qwen3_moe.py @@ -29,13 +29,13 @@ import torch from torch import nn -from transformers import Qwen3MoeConfig from vllm.attention import Attention from vllm.compilation.decorators import support_torch_compile from vllm.config import CacheConfig, VllmConfig, get_current_vllm_config from vllm.distributed import (get_ep_group, get_pp_group, - get_tensor_model_parallel_world_size) + get_tensor_model_parallel_world_size, + tensor_model_parallel_all_gather) from vllm.logger import init_logger from vllm.model_executor.layers.activation import SiluAndMul from vllm.model_executor.layers.fused_moe import FusedMoE @@ -51,6 +51,7 @@ ParallelLMHead, VocabParallelEmbedding) from vllm.model_executor.model_loader.weight_utils import ( default_weight_loader, maybe_remap_kv_scale_name) +from vllm.model_executor.models.utils import sequence_parallel_chunk from vllm.sequence import IntermediateTensors from .interfaces import MixtureOfExperts, SupportsLoRA, SupportsPP @@ -101,12 +102,15 @@ class Qwen3MoeSparseMoeBlock(nn.Module): def __init__( self, - config: Qwen3MoeConfig, - quant_config: Optional[QuantizationConfig] = None, + vllm_config: VllmConfig, prefix: str = "", - enable_eplb: bool = False, ): super().__init__() + + config = vllm_config.model_config.hf_config + parallel_config = vllm_config.parallel_config + quant_config = vllm_config.quant_config + self.tp_size = get_tensor_model_parallel_world_size() self.ep_group = get_ep_group().device_group @@ -114,6 +118,8 @@ def __init__( self.ep_size = self.ep_group.size() self.n_routed_experts = config.num_experts + self.is_sequence_parallel = parallel_config.use_sequence_parallel_moe + if self.tp_size > config.num_experts: raise ValueError( f"Tensor parallel size {self.tp_size} is greater than " @@ -122,7 +128,7 @@ def __init__( # Load balancing settings. vllm_config = get_current_vllm_config() eplb_config = vllm_config.parallel_config.eplb_config - self.enable_eplb = enable_eplb + self.enable_eplb = parallel_config.enable_eplb self.n_logical_experts = self.n_routed_experts self.n_redundant_experts = eplb_config.num_redundant_experts @@ -144,7 +150,8 @@ def __init__( quant_config=quant_config, prefix=f"{prefix}.experts", enable_eplb=self.enable_eplb, - num_redundant_experts=self.n_redundant_experts) + num_redundant_experts=self.n_redundant_experts, + is_sequence_parallel=self.is_sequence_parallel) self.gate = ReplicatedLinear(config.hidden_size, config.num_experts, @@ -156,14 +163,22 @@ def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: assert hidden_states.dim( ) <= 2, "Qwen3MoeSparseMoeBlock only supports 1D or 2D inputs" is_input_1d = hidden_states.dim() == 1 - hidden_dim = hidden_states.shape[-1] + num_tokens, hidden_dim = hidden_states.shape hidden_states = hidden_states.view(-1, hidden_dim) + if self.is_sequence_parallel: + hidden_states = sequence_parallel_chunk(hidden_states) + # router_logits: (num_tokens, n_experts) router_logits, _ = self.gate(hidden_states) final_hidden_states = self.experts(hidden_states=hidden_states, router_logits=router_logits) + if self.is_sequence_parallel: + final_hidden_states = tensor_model_parallel_all_gather( + final_hidden_states, 0) + final_hidden_states = final_hidden_states[:num_tokens] + # return to 1d if input is 1d return final_hidden_states.squeeze(0) if is_input_1d else \ final_hidden_states @@ -275,15 +290,13 @@ def forward( class Qwen3MoeDecoderLayer(nn.Module): - def __init__( - self, - config: Qwen3MoeConfig, - cache_config: Optional[CacheConfig] = None, - quant_config: Optional[QuantizationConfig] = None, - prefix: str = "", - enable_eplb: bool = False, - ) -> None: + def __init__(self, vllm_config: VllmConfig, prefix: str = "") -> None: super().__init__() + + config = vllm_config.model_config.hf_config + cache_config = vllm_config.cache_config + quant_config = vllm_config.quant_config + self.hidden_size = config.hidden_size rope_theta = getattr(config, "rope_theta", 10000) rope_scaling = getattr(config, "rope_scaling", None) @@ -315,10 +328,8 @@ def __init__( if (layer_idx not in mlp_only_layers) and ( config.num_experts > 0 and (layer_idx + 1) % config.decoder_sparse_step == 0): - self.mlp = Qwen3MoeSparseMoeBlock(config=config, - quant_config=quant_config, - prefix=f"{prefix}.mlp", - enable_eplb=enable_eplb) + self.mlp = Qwen3MoeSparseMoeBlock(vllm_config=vllm_config, + prefix=f"{prefix}.mlp") else: self.mlp = Qwen3MoeMLP(hidden_size=config.hidden_size, intermediate_size=config.intermediate_size, @@ -362,10 +373,8 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): super().__init__() config = vllm_config.model_config.hf_config.get_text_config() - cache_config = vllm_config.cache_config quant_config = vllm_config.quant_config parallel_config = vllm_config.parallel_config - enable_eplb = parallel_config.enable_eplb eplb_config = parallel_config.eplb_config self.num_redundant_experts = eplb_config.num_redundant_experts @@ -379,11 +388,8 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): prefix=f"{prefix}.embed_tokens") self.start_layer, self.end_layer, self.layers = make_layers( config.num_hidden_layers, - lambda prefix: Qwen3MoeDecoderLayer(config=config, - cache_config=cache_config, - quant_config=quant_config, - prefix=prefix, - enable_eplb=enable_eplb), + lambda prefix: Qwen3MoeDecoderLayer(vllm_config=vllm_config, + prefix=prefix), prefix=f"{prefix}.layers", ) self.norm = RMSNorm(config.hidden_size, eps=config.rms_norm_eps) diff --git a/vllm/model_executor/models/qwen3_next.py b/vllm/model_executor/models/qwen3_next.py index dc3153fcc826..14d19874a51e 100644 --- a/vllm/model_executor/models/qwen3_next.py +++ b/vllm/model_executor/models/qwen3_next.py @@ -17,7 +17,8 @@ VllmConfig, get_current_vllm_config) from vllm.distributed import (divide, get_ep_group, get_pp_group, get_tensor_model_parallel_rank, - get_tensor_model_parallel_world_size) + get_tensor_model_parallel_world_size, + tensor_model_parallel_all_gather) from vllm.forward_context import ForwardContext, get_forward_context from vllm.logger import init_logger from vllm.model_executor.layers.fla.ops import ( @@ -47,6 +48,7 @@ from vllm.model_executor.model_loader.weight_utils import ( default_weight_loader, sharded_weight_loader) from vllm.model_executor.models.qwen2_moe import Qwen2MoeMLP as Qwen3NextMLP +from vllm.model_executor.models.utils import sequence_parallel_chunk from vllm.model_executor.utils import set_weight_attrs from vllm.platforms import current_platform from vllm.sequence import IntermediateTensors @@ -69,14 +71,13 @@ class Qwen3NextSparseMoeBlock(nn.Module): - def __init__( - self, - config: Qwen3NextConfig, - quant_config: Optional[QuantizationConfig] = None, - prefix: str = "", - enable_eplb: bool = False, - ): + def __init__(self, vllm_config: VllmConfig, prefix: str = ""): super().__init__() + + config = vllm_config.model_config.hf_config + parallel_config = vllm_config.parallel_config + quant_config = vllm_config.quant_config + self.tp_size = get_tensor_model_parallel_world_size() self.ep_group = get_ep_group().device_group @@ -84,6 +85,8 @@ def __init__( self.ep_size = self.ep_group.size() self.n_routed_experts = config.num_experts + self.is_sequence_parallel = parallel_config.use_sequence_parallel_moe + if self.tp_size > config.num_experts: raise ValueError( f"Tensor parallel size {self.tp_size} is greater than " @@ -92,7 +95,7 @@ def __init__( # Load balancing settings. vllm_config = get_current_vllm_config() eplb_config = vllm_config.parallel_config.eplb_config - self.enable_eplb = enable_eplb + self.enable_eplb = parallel_config.enable_eplb self.n_logical_experts = self.n_routed_experts self.n_redundant_experts = eplb_config.num_redundant_experts @@ -114,7 +117,8 @@ def __init__( quant_config=quant_config, prefix=f"{prefix}.experts", enable_eplb=self.enable_eplb, - num_redundant_experts=self.n_redundant_experts) + num_redundant_experts=self.n_redundant_experts, + is_sequence_parallel=self.is_sequence_parallel) self.gate = ReplicatedLinear(config.hidden_size, config.num_experts, @@ -141,9 +145,12 @@ def __init__( def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: # NOTE: hidden_states can have either 1D or 2D shape. orig_shape = hidden_states.shape - hidden_dim = hidden_states.shape[-1] + num_tokens, hidden_dim = hidden_states.shape hidden_states = hidden_states.view(-1, hidden_dim) + if self.is_sequence_parallel: + hidden_states = sequence_parallel_chunk(hidden_states) + shared_output = None if self.shared_expert is not None: shared_output = self.shared_expert(hidden_states) @@ -158,7 +165,12 @@ def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: if shared_output is not None: final_hidden_states = final_hidden_states + shared_output - if self.tp_size > 1: + + if self.is_sequence_parallel: + final_hidden_states = tensor_model_parallel_all_gather( + final_hidden_states, 0) + final_hidden_states = final_hidden_states[:num_tokens] + elif self.tp_size > 1: final_hidden_states = self.experts.maybe_all_reduce_tensor_model_parallel( # noqa E501 final_hidden_states) @@ -719,17 +731,17 @@ class Qwen3NextDecoderLayer(nn.Module): def __init__( self, - config: Qwen3NextConfig, + vllm_config: VllmConfig, layer_type: str, - model_config: Optional[ModelConfig] = None, - cache_config: Optional[CacheConfig] = None, - quant_config: Optional[QuantizationConfig] = None, - speculative_config: Optional[SpeculativeConfig] = None, prefix: str = "", - enable_eplb: bool = False, ) -> None: super().__init__() - self.config = config + + config = vllm_config.model_config.hf_config + model_config = vllm_config.model_config + cache_config = vllm_config.cache_config + quant_config = vllm_config.quant_config + speculative_config = vllm_config.speculative_config self.layer_type = layer_type self.layer_idx = extract_layer_index(prefix) @@ -759,10 +771,8 @@ def __init__( config.num_experts > 0 and (self.layer_idx + 1) % config.decoder_sparse_step == 0): self.mlp = Qwen3NextSparseMoeBlock( - config=config, - quant_config=quant_config, + vllm_config=vllm_config, prefix=f"{prefix}.mlp", - enable_eplb=enable_eplb, ) else: self.mlp = Qwen3NextMLP( @@ -783,14 +793,14 @@ def __init__( torch.zeros( 1, 1, - self.config.hidden_size, + config.hidden_size, dtype=config.torch_dtype, ), ) self.ffn_layer_scale = torch.nn.Parameter( torch.zeros( 1, 1, - self.config.hidden_size, + config.hidden_size, dtype=config.torch_dtype, ), ) @@ -858,13 +868,8 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): super().__init__() config: Qwen3NextConfig = vllm_config.model_config.hf_config - model_config = vllm_config.model_config - cache_config = vllm_config.cache_config - quant_config = vllm_config.quant_config parallel_config = vllm_config.parallel_config lora_config = vllm_config.lora_config - speculative_config = vllm_config.speculative_config - enable_eplb = parallel_config.enable_eplb eplb_config = parallel_config.eplb_config self.num_redundant_experts = eplb_config.num_redundant_experts @@ -881,14 +886,9 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): def get_layer(prefix: str): return Qwen3NextDecoderLayer( - config, + vllm_config, layer_type=config.layer_types[extract_layer_index(prefix)], - model_config=model_config, - cache_config=cache_config, - quant_config=quant_config, - speculative_config=speculative_config, prefix=prefix, - enable_eplb=enable_eplb, ) self.start_layer, self.end_layer, self.layers = make_layers( diff --git a/vllm/model_executor/models/qwen3_next_mtp.py b/vllm/model_executor/models/qwen3_next_mtp.py index c054339842e6..e950699a0c49 100644 --- a/vllm/model_executor/models/qwen3_next_mtp.py +++ b/vllm/model_executor/models/qwen3_next_mtp.py @@ -38,7 +38,6 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): super().__init__() model_config = vllm_config.model_config - cache_config = vllm_config.cache_config quant_config = vllm_config.quant_config lora_config = vllm_config.lora_config config: Qwen3NextConfig = model_config.hf_config @@ -68,11 +67,8 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): self.layers = torch.nn.ModuleList( Qwen3NextDecoderLayer( - config, + vllm_config, layer_type="full_attention", - model_config=model_config, - cache_config=cache_config, - quant_config=quant_config, prefix=f'{prefix}.layers.{idx}', ) for idx in range(self.num_mtp_layers)) diff --git a/vllm/model_executor/models/utils.py b/vllm/model_executor/models/utils.py index bb6a0bd02202..4bf151fbf62d 100644 --- a/vllm/model_executor/models/utils.py +++ b/vllm/model_executor/models/utils.py @@ -13,11 +13,14 @@ import vllm.envs as envs from vllm.config import VllmConfig +from vllm.distributed import (get_tensor_model_parallel_rank, + get_tensor_model_parallel_world_size) from vllm.logger import init_logger from vllm.model_executor.model_loader.weight_utils import default_weight_loader from vllm.multimodal import NestedTensors from vllm.sequence import IntermediateTensors -from vllm.utils import (get_cuda_view_from_cpu_tensor, is_pin_memory_available, +from vllm.utils import (cdiv, direct_register_custom_op, + get_cuda_view_from_cpu_tensor, is_pin_memory_available, is_uva_available) logger = init_logger(__name__) @@ -743,3 +746,46 @@ def get_model_hidden_size(hf_config: PretrainedConfig) -> int: return hf_config.hidden_size text_config = hf_config.get_text_config() return text_config.hidden_size + + +# Chunk x along the num_tokens axis for sequence parallelism +# NOTE: This is wrapped in a torch custom op to work around the following issue: +# The output tensor can have a sequence length 0 at small input sequence lengths +# even though we explicitly pad to avoid this. +def sequence_parallel_chunk(x: torch.Tensor) -> torch.Tensor: + return torch.ops.vllm.sequence_parallel_chunk_impl(x) + + +def sequence_parallel_chunk_impl(x: torch.Tensor) -> torch.Tensor: + tp_size = get_tensor_model_parallel_world_size() + tp_rank = get_tensor_model_parallel_rank() + + # all_gather needs the sequence length to be divisible by tp_size + seq_len = x.size(0) + remainder = seq_len % tp_size + if remainder != 0: + pad_len = tp_size - remainder + y = nn.functional.pad(x, (0, 0, 0, pad_len)) + else: + y = x + + chunk = y.shape[0] // tp_size + start = tp_rank * chunk + return torch.narrow(y, 0, start, chunk) + + +def sequence_parallel_chunk_impl_fake(x: torch.Tensor) -> torch.Tensor: + tp_size = get_tensor_model_parallel_world_size() + seq_len = cdiv(x.size(0), tp_size) + shape = list(x.shape) + shape[0] = seq_len + out = torch.empty(shape, dtype=x.dtype, device=x.device) + return out + + +direct_register_custom_op( + op_name="sequence_parallel_chunk_impl", + op_func=sequence_parallel_chunk_impl, + fake_impl=sequence_parallel_chunk_impl_fake, + tags=(torch.Tag.needs_fixed_stride_order, ), +) From ecb37e276a8c261312a2b0dc011182216d895917 Mon Sep 17 00:00:00 2001 From: yyzxw <34639446+yyzxw@users.noreply.github.com> Date: Sat, 27 Sep 2025 23:00:35 +0800 Subject: [PATCH 454/518] [docs] transcriptions API audio upload (#25446) Signed-off-by: zxw <1020938856@qq.com> --- docs/serving/openai_compatible_server.md | 81 +++++++++++++++++++++++- 1 file changed, 80 insertions(+), 1 deletion(-) diff --git a/docs/serving/openai_compatible_server.md b/docs/serving/openai_compatible_server.md index bac3f6c1fe90..1ffe9c9ade20 100644 --- a/docs/serving/openai_compatible_server.md +++ b/docs/serving/openai_compatible_server.md @@ -351,13 +351,92 @@ you can use the [official OpenAI Python client](https://github.com/openai/openai To use the Transcriptions API, please install with extra audio dependencies using `pip install vllm[audio]`. Code example: - #### API Enforced Limits Set the maximum audio file size (in MB) that VLLM will accept, via the `VLLM_MAX_AUDIO_CLIP_FILESIZE_MB` environment variable. Default is 25 MB. +#### Uploading Audio Files + +The Transcriptions API supports uploading audio files in various formats including FLAC, MP3, MP4, MPEG, MPGA, M4A, OGG, WAV, and WEBM. + +**Using OpenAI Python Client:** + +??? code + + ```python + from openai import OpenAI + + client = OpenAI( + base_url="http://localhost:8000/v1", + api_key="token-abc123", + ) + + # Upload audio file from disk + with open("audio.mp3", "rb") as audio_file: + transcription = client.audio.transcriptions.create( + model="openai/whisper-large-v3-turbo", + file=audio_file, + language="en", + response_format="verbose_json" + ) + + print(transcription.text) + ``` + +**Using curl with multipart/form-data:** + +??? code + + ```bash + curl -X POST "http://localhost:8000/v1/audio/transcriptions" \ + -H "Authorization: Bearer token-abc123" \ + -F "file=@audio.mp3" \ + -F "model=openai/whisper-large-v3-turbo" \ + -F "language=en" \ + -F "response_format=verbose_json" + ``` + +**Supported Parameters:** + +- `file`: The audio file to transcribe (required) +- `model`: The model to use for transcription (required) +- `language`: The language code (e.g., "en", "zh") (optional) +- `prompt`: Optional text to guide the transcription style (optional) +- `response_format`: Format of the response ("json", "text") (optional) +- `temperature`: Sampling temperature between 0 and 1 (optional) + +For the complete list of supported parameters including sampling parameters and vLLM extensions, see the [protocol definitions](https://github.com/vllm-project/vllm/blob/main/vllm/entrypoints/openai/protocol.py#L2182). + +**Response Format:** + +For `verbose_json` response format: + +??? code + + ```json + { + "text": "Hello, this is a transcription of the audio file.", + "language": "en", + "duration": 5.42, + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.0, + "end": 2.5, + "text": "Hello, this is a transcription", + "tokens": [50364, 938, 428, 307, 275, 28347], + "temperature": 0.0, + "avg_logprob": -0.245, + "compression_ratio": 1.235, + "no_speech_prob": 0.012 + } + ] + } + ``` + #### Extra Parameters The following [sampling parameters][sampling-params] are supported. From 49996cd59789c36d0dc18bcb9dc9454b4d94dde7 Mon Sep 17 00:00:00 2001 From: Peter Pan Date: Sat, 27 Sep 2025 23:02:40 +0800 Subject: [PATCH 455/518] [env] default nixl side port conflicts with kv-event zmq port (#25056) Signed-off-by: Peter Pan --- vllm/envs.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm/envs.py b/vllm/envs.py index 832d031f998e..94b0dece9655 100755 --- a/vllm/envs.py +++ b/vllm/envs.py @@ -155,7 +155,7 @@ VLLM_MSGPACK_ZERO_COPY_THRESHOLD: int = 256 VLLM_ALLOW_INSECURE_SERIALIZATION: bool = False VLLM_NIXL_SIDE_CHANNEL_HOST: str = "localhost" - VLLM_NIXL_SIDE_CHANNEL_PORT: int = 5557 + VLLM_NIXL_SIDE_CHANNEL_PORT: int = 5600 VLLM_ALL2ALL_BACKEND: Literal["naive", "pplx", "deepep_high_throughput", "deepep_low_latency", @@ -1220,7 +1220,7 @@ def get_vllm_port() -> Optional[int]: # Port used for NIXL handshake between remote agents. "VLLM_NIXL_SIDE_CHANNEL_PORT": - lambda: int(os.getenv("VLLM_NIXL_SIDE_CHANNEL_PORT", "5557")), + lambda: int(os.getenv("VLLM_NIXL_SIDE_CHANNEL_PORT", "5600")), # all2all backend for vllm's expert parallel communication # Available options: From b65e56babec4d23abf2c80f65dbf9f5b16c3a9d9 Mon Sep 17 00:00:00 2001 From: "Patrick C. Toulme" <135739773+patrick-toulme@users.noreply.github.com> Date: Sat, 27 Sep 2025 11:40:59 -0400 Subject: [PATCH 456/518] [Core] Refactor self.model() to call a helper for subclassing. (#25084) Signed-off-by: Patrick Toulme Signed-off-by: Patrick Toulme --- vllm/v1/worker/gpu_model_runner.py | 34 +++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index 1bae0d4ce4d1..2354e8222e7a 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -2268,6 +2268,38 @@ def synchronize_input_prep(self): finally: self.prepare_inputs_event.record() + def _model_forward( + self, + input_ids: Optional[torch.Tensor] = None, + positions: Optional[torch.Tensor] = None, + intermediate_tensors: Optional[IntermediateTensors] = None, + inputs_embeds: Optional[torch.Tensor] = None, + **model_kwargs: dict[str, Any], + ) -> Any: + """Helper method to call the model forward pass. + + This method can be overridden by subclasses for model execution. + Motivation: We can inspect only this method versus + the whole execute_model, which has additional logic. + + Args: + input_ids: Input token IDs + positions: Token positions + intermediate_tensors: Tensors from previous pipeline stages + inputs_embeds: Input embeddings (alternative to input_ids) + **model_kwargs: Additional model arguments + + Returns: + Model output tensor + """ + return self.model( + input_ids=input_ids, + positions=positions, + intermediate_tensors=intermediate_tensors, + inputs_embeds=inputs_embeds, + **model_kwargs, + ) + @torch.inference_mode() def execute_model( self, @@ -2337,7 +2369,7 @@ def execute_model( ), record_function_or_nullcontext("Forward"), self.maybe_get_kv_connector_output(scheduler_output) as kv_connector_output): - model_output = self.model( + model_output = self._model_forward( input_ids=input_ids, positions=positions, intermediate_tensors=intermediate_tensors, From c0ec81836fd47492a900a2538dea461619122555 Mon Sep 17 00:00:00 2001 From: Jiangyun Zhu Date: Sun, 28 Sep 2025 00:09:00 +0800 Subject: [PATCH 457/518] [torch.compile]: Add VLLM_DEBUG_DUMP_PATH environment variable (#25651) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: zjy0516 Signed-off-by: Jiangyun Zhu Co-authored-by: Luka Govedič --- vllm/compilation/monitor.py | 10 ++++------ vllm/compilation/vllm_inductor_pass.py | 5 +---- vllm/compilation/wrapper.py | 11 +++++------ vllm/config/__init__.py | 26 ++++++++++++++++++++++++++ vllm/config/compilation.py | 3 ++- vllm/envs.py | 6 ++++++ 6 files changed, 44 insertions(+), 17 deletions(-) diff --git a/vllm/compilation/monitor.py b/vllm/compilation/monitor.py index c46721ab2d74..35658466d66d 100644 --- a/vllm/compilation/monitor.py +++ b/vllm/compilation/monitor.py @@ -1,7 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -import os import time from vllm.config import CompilationConfig, CompilationLevel, VllmConfig @@ -18,13 +17,12 @@ def start_monitoring_torch_compile(vllm_config: VllmConfig): torch_compile_start_time = time.time() compilation_config: CompilationConfig = vllm_config.compilation_config - if compilation_config.level == CompilationLevel.PIECEWISE and \ - compilation_config.debug_dump_path: + path = vllm_config.compile_debug_dump_path() + if compilation_config.level == CompilationLevel.PIECEWISE and path: import depyf - path = os.path.join(compilation_config.debug_dump_path, - f"rank_{vllm_config.parallel_config.rank}") + path.mkdir(parents=True, exist_ok=True) global context_manager - context_manager = depyf.prepare_debug(path) + context_manager = depyf.prepare_debug(path.as_posix()) context_manager.__enter__() diff --git a/vllm/compilation/vllm_inductor_pass.py b/vllm/compilation/vllm_inductor_pass.py index 837770d18199..59019d74cb80 100644 --- a/vllm/compilation/vllm_inductor_pass.py +++ b/vllm/compilation/vllm_inductor_pass.py @@ -3,7 +3,6 @@ import functools import operator import time -from pathlib import Path from typing import ClassVar, Optional import regex as re @@ -96,12 +95,10 @@ def dump_patterns(self, config: VllmConfig, pm_pass: PatternMatcherPass): TODO(luka): use pattern object to manually produce pattern graph """ - debug_dump_path = config.compilation_config.debug_dump_path + debug_dump_path = config.compile_debug_dump_path() if not debug_dump_path: return - rank = config.parallel_config.rank - debug_dump_path = Path(debug_dump_path) / f"rank_{rank}" debug_dump_path.mkdir(parents=True, exist_ok=True) from vllm.utils import unique_filepath diff --git a/vllm/compilation/wrapper.py b/vllm/compilation/wrapper.py index 930e4d27b410..062c9dc27017 100644 --- a/vllm/compilation/wrapper.py +++ b/vllm/compilation/wrapper.py @@ -92,12 +92,11 @@ def bytecode_hook(self, old_code: CodeType, new_code: CodeType): return self.compiled_codes.append(new_code) - debug_dump_dir = self.vllm_config.compilation_config.debug_dump_path - if isinstance(debug_dump_dir, str) and debug_dump_dir != "": - rank = self.vllm_config.parallel_config.rank - decompiled_file = os.path.join(debug_dump_dir, f"rank_{rank}", - "transformed_code.py") - if not os.path.exists(decompiled_file): + + path = self.vllm_config.compile_debug_dump_path() + if path: + decompiled_file = path / "transformed_code.py" + if not decompiled_file.exists(): try: # usually the decompilation will succeed for most models, # as we guarantee a full-graph compilation in Dynamo. diff --git a/vllm/config/__init__.py b/vllm/config/__init__.py index ecea90988ebc..ccb91999d370 100644 --- a/vllm/config/__init__.py +++ b/vllm/config/__init__.py @@ -12,6 +12,7 @@ from contextlib import contextmanager from dataclasses import field, fields, is_dataclass, replace from functools import cached_property, lru_cache +from pathlib import Path from typing import (TYPE_CHECKING, Any, Literal, Optional, Protocol, TypeVar, Union, cast) @@ -541,6 +542,17 @@ def __post_init__(self): # local attention. self.scheduler_config.disable_hybrid_kv_cache_manager = True + if self.compilation_config.debug_dump_path: + self.compilation_config.debug_dump_path = \ + self.compilation_config.debug_dump_path.absolute().expanduser() + if envs.VLLM_DEBUG_DUMP_PATH is not None: + env_path = Path(envs.VLLM_DEBUG_DUMP_PATH).absolute().expanduser() + if self.compilation_config.debug_dump_path: + logger.warning( + "Config-specified debug dump path is overridden" + " by VLLM_DEBUG_DUMP_PATH to %s", env_path) + self.compilation_config.debug_dump_path = env_path + def update_sizes_for_sequence_parallelism(self, possible_sizes: list) -> list: # remove the sizes that not multiple of tp_size when @@ -672,6 +684,20 @@ def try_verify_and_update_config(self): f"but got '{self.load_config.load_format}'. " f"Model: {self.model_config.model}") + def compile_debug_dump_path(self) -> Optional[Path]: + """Returns a rank-aware path for dumping + torch.compile debug information. + """ + if self.compilation_config.debug_dump_path is None: + return None + tp_rank = self.parallel_config.rank + dp_rank = self.parallel_config.data_parallel_rank + data_parallel_size = self.parallel_config.data_parallel_size + append_path = f"rank_{tp_rank}" if data_parallel_size == 1 \ + else f"rank_{tp_rank}_dp_{dp_rank}" + path = self.compilation_config.debug_dump_path / append_path + return path + def __str__(self): return ( f"model={self.model_config.model!r}, " diff --git a/vllm/config/compilation.py b/vllm/config/compilation.py index 9735db98567d..825de7d26191 100644 --- a/vllm/config/compilation.py +++ b/vllm/config/compilation.py @@ -5,6 +5,7 @@ import hashlib from collections import Counter from dataclasses import asdict, field +from pathlib import Path from typing import TYPE_CHECKING, Any, Callable, ClassVar, Optional, Union from pydantic import TypeAdapter, field_validator @@ -169,7 +170,7 @@ class CompilationConfig: - 1: dynamo as is. - 2: dynamo once. - 3: piecewise compilation.""" - debug_dump_path: str = "" + debug_dump_path: Optional[Path] = None """The path to dump the debug information.""" cache_dir: str = "" """The directory to store the compiled graph, to accelerate Inductor diff --git a/vllm/envs.py b/vllm/envs.py index 94b0dece9655..854328044304 100755 --- a/vllm/envs.py +++ b/vllm/envs.py @@ -199,6 +199,7 @@ VLLM_DBO_COMM_SMS: int = 20 GPT_OSS_SYSTEM_TOOL_MCP_LABELS: list[str] = [] VLLM_PATTERN_MATCH_DEBUG: Optional[str] = None + VLLM_DEBUG_DUMP_PATH: Optional[str] = None VLLM_ENABLE_INDUCTOR_MAX_AUTOTUNE: bool = True VLLM_ENABLE_INDUCTOR_COORDINATE_DESCENT_TUNING: bool = True VLLM_USE_NCCL_SYMM_MEM: bool = False @@ -513,6 +514,11 @@ def get_vllm_port() -> Optional[int]: "VLLM_PATTERN_MATCH_DEBUG": lambda: os.environ.get("VLLM_PATTERN_MATCH_DEBUG", None), + # Dump fx graphs to the given directory. + # It will override CompilationConfig.debug_dump_path if set. + "VLLM_DEBUG_DUMP_PATH": + lambda: os.environ.get("VLLM_DEBUG_DUMP_PATH", None), + # local rank of the process in the distributed setting, used to determine # the GPU device id "LOCAL_RANK": From 5546acb463243ce3c166dc620c764a93351b7c69 Mon Sep 17 00:00:00 2001 From: Clayton Coleman Date: Sat, 27 Sep 2025 13:36:28 -0400 Subject: [PATCH 458/518] [Bug]: Set LD_LIBRARY_PATH to include the 'standard' CUDA location (#25766) Signed-off-by: Clayton Coleman --- docker/Dockerfile | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/docker/Dockerfile b/docker/Dockerfile index c2b855be4403..62be0a12e988 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -474,6 +474,12 @@ ENV CUDA_HOME=/usr/local/cuda RUN export TORCH_CUDA_ARCH_LIST="${TORCH_CUDA_ARCH_LIST:-9.0a+PTX}" \ && bash install_python_libraries.sh +# CUDA image changed from /usr/local/nvidia to /usr/local/cuda in 12.8 but will +# return to /usr/local/nvidia in 13.0 to allow container providers to mount drivers +# consistently from the host (see https://github.com/vllm-project/vllm/issues/18859). +# Until then, add /usr/local/nvidia/lib64 before the image cuda path to allow override. +ENV LD_LIBRARY_PATH=/usr/local/nvidia/lib64:${LD_LIBRARY_PATH} + #################### vLLM installation IMAGE #################### #################### TEST IMAGE #################### From c216119d649a9ed96e38c2082a2647dbc864118f Mon Sep 17 00:00:00 2001 From: Jialin Ouyang Date: Sat, 27 Sep 2025 10:53:31 -0700 Subject: [PATCH 459/518] [Core] GC Debug callback (#24829) Signed-off-by: Jialin Ouyang Signed-off-by: Jialin Ouyang Co-authored-by: Jialin Ouyang --- docs/contributing/profiling.md | 8 +++ tests/utils_/test_gc_utils.py | 69 ++++++++++++++++++ vllm/envs.py | 8 +++ vllm/utils/gc_utils.py | 128 +++++++++++++++++++++++++++++++++ vllm/v1/engine/core.py | 4 ++ 5 files changed, 217 insertions(+) create mode 100644 tests/utils_/test_gc_utils.py create mode 100644 vllm/utils/gc_utils.py diff --git a/docs/contributing/profiling.md b/docs/contributing/profiling.md index 5b83d93274f0..a1b7927a95d1 100644 --- a/docs/contributing/profiling.md +++ b/docs/contributing/profiling.md @@ -208,3 +208,11 @@ One example is [snakeviz](https://jiffyclub.github.io/snakeviz/). pip install snakeviz snakeviz expensive_function.prof ``` + +### Analyzing Garbage Collection Costs + +Leverage VLLM_GC_DEBUG environment variable to debug GC costs. + +- VLLM_GC_DEBUG=1: enable GC debugger with gc.collect elpased times +- VLLM_GC_DEBUG='{"top_objects":5}': enable GC debugger to log top 5 + collected objects for each gc.collect diff --git a/tests/utils_/test_gc_utils.py b/tests/utils_/test_gc_utils.py new file mode 100644 index 000000000000..265761b069ca --- /dev/null +++ b/tests/utils_/test_gc_utils.py @@ -0,0 +1,69 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project +from dataclasses import dataclass +from typing import Any + +from vllm.utils.gc_utils import (GCDebugConfig, _compute_detailed_type, + _compute_top_gc_collected_objects) + + +@dataclass +class Normal: + v: int + + +@dataclass +class ListWrapper: + vs: list[int] + + def __len__(self) -> int: + return len(self.vs) + + +def test_compute_detailed_type(): + assert _compute_detailed_type( + Normal(v=8)) == "" + + assert _compute_detailed_type([1, 2, 3]) == "(size:3)" + assert _compute_detailed_type({4, 5}) == "(size:2)" + assert _compute_detailed_type({6: 7}) == "(size:1)" + assert _compute_detailed_type(ListWrapper( + vs=[])) == "(size:0)" + + +def test_compute_top_gc_collected_objects(): + objects: list[Any] = [[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12], + {13, 14}, { + 15: 16, + 17: 18 + }, + Normal(v=19), + Normal(v=20), + Normal(v=21)] + assert _compute_top_gc_collected_objects(objects, top=-1) == "" + assert _compute_top_gc_collected_objects(objects, top=0) == "" + assert _compute_top_gc_collected_objects( + objects, top=1) == " 4:(size:3)" + assert _compute_top_gc_collected_objects(objects, top=2) == "\n".join([ + " 4:(size:3)", + " 3:" + ]) + assert _compute_top_gc_collected_objects(objects, top=3) == "\n".join([ + " 4:(size:3)", + " 3:", + " 1:(size:2)" + ]) + + +def test_gc_debug_config(): + assert not GCDebugConfig(None).enabled + assert not GCDebugConfig("").enabled + assert not GCDebugConfig("0").enabled + + config = GCDebugConfig("1") + assert config.enabled + assert config.top_objects == -1 + + config = GCDebugConfig("{\"top_objects\":5}") + assert config.enabled + assert config.top_objects == 5 diff --git a/vllm/envs.py b/vllm/envs.py index 854328044304..f06c860b8297 100755 --- a/vllm/envs.py +++ b/vllm/envs.py @@ -205,6 +205,7 @@ VLLM_USE_NCCL_SYMM_MEM: bool = False VLLM_NCCL_INCLUDE_PATH: Optional[str] = None VLLM_USE_FBGEMM: bool = False + VLLM_GC_DEBUG: str = "" def get_default_cache_root(): @@ -1475,6 +1476,13 @@ def get_vllm_port() -> Optional[int]: lambda: os.environ.get("VLLM_NCCL_INCLUDE_PATH", None), # Flag to enable FBGemm kernels on model execution "VLLM_USE_FBGEMM": lambda: bool(int(os.getenv("VLLM_USE_FBGEMM", "0"))), + + # GC debug config + # - VLLM_GC_DEBUG=0: disable GC debugger + # - VLLM_GC_DEBUG=1: enable GC debugger with gc.collect elpased times + # - VLLM_GC_DEBUG='{"top_objects":5}': enable GC debugger with + # top 5 collected objects + "VLLM_GC_DEBUG": lambda: os.getenv("VLLM_GC_DEBUG", ""), } # --8<-- [end:env-vars-definition] diff --git a/vllm/utils/gc_utils.py b/vllm/utils/gc_utils.py new file mode 100644 index 000000000000..8ce2c200e299 --- /dev/null +++ b/vllm/utils/gc_utils.py @@ -0,0 +1,128 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project +import gc +import json +import time +from collections import Counter +from contextlib import suppress +from typing import Any, Optional + +from vllm.envs import VLLM_GC_DEBUG +from vllm.logger import init_logger + +logger = init_logger(__name__) + + +class GCDebugConfig: + """ + Config for GC Debugger. + - 0: disable GC debugger + - 1: enable GC debugger with gc.collect elpased times + - '{"top_objects":5}': enable GC debugger with top 5 collected objects + """ + + def __init__(self, gc_debug_conf: Optional[str] = None) -> None: + self.enabled: bool = False + self.top_objects: int = -1 + + if not gc_debug_conf or gc_debug_conf == "0": + pass + elif gc_debug_conf == "1": + self.enabled = True + else: + try: + json_conf = json.loads(gc_debug_conf) + self.enabled = True + self.top_objects = json_conf.get("top_objects", -1) + except Exception: + self.enabled = False + logger.error("Failed to parse VLLM_GC_DEBUG(%s)", + VLLM_GC_DEBUG) + logger.info("GC Debug Config. %s", str(self)) + + def __repr__(self) -> str: + return f"enabled:{self.enabled},top_objects:{self.top_objects}" + + +class GCDebugger: + """ + Debugger for GC which logs helpful information for GC understanding. + To enable, you should call maybe_attach_gc_debug_callback in the process. + """ + + def __init__(self, config: GCDebugConfig) -> None: + self.config = config + # Start time in micro second of this GC cycle + self.start_time_ns: int = time.monotonic_ns() + # If config.top_objects is positive, + # compute top collected objects by object types + self.gc_top_collected_objects: str = "" + + def handle(self, phase: str, info: dict[str, int]) -> None: + """ + Handles a GC event (e.g. GC start or GC finish) + """ + generation = info.get("generation") + if generation is None: + return + if phase == "start": + # Before GC started, record GC start time + # and top collected objects + self.start_time_ns = time.monotonic_ns() + self.gc_top_collected_objects = _compute_top_gc_collected_objects( + gc.get_objects(generation), self.config.top_objects) + elif phase == "stop": + # After GC finished, Record GC elapsed time and + # optionally top collected objects + elpased_ms = (time.monotonic_ns() - self.start_time_ns) / 1e6 + logger.info( + "GC took %.3fms to complete. " + "Collected %s objects in GC generation %d.%s", + elpased_ms, + str(info.get("collected", "?")), + generation, + (f" Top collected objects: \n{self.gc_top_collected_objects}" + if self.gc_top_collected_objects else ""), + ) + + +def maybe_attach_gc_debug_callback() -> None: + """ + Attached a callback for GC debug when VLLM_GC_DEBUG is enabled. + """ + config = GCDebugConfig(VLLM_GC_DEBUG) + if config.enabled: + debugger: GCDebugger = GCDebugger(config) + + def gc_callback(phase: str, info: dict[str, int]) -> None: + debugger.handle(phase, info) + + gc.callbacks.append(gc_callback) + + +def _compute_detailed_type(o: Any) -> str: + """ + Detailed object type. + + TODO(Jialin): Further enhance the detailed type with element types for + easier debugging. We tried but occasionally it would run into signals + which kills the engine. + """ + size_str: str = "" + # Object doesn't support len() - this can happen with type objects + # or other objects that don't implement __len__ properly + with suppress(Exception): + size_str = f"(size:{len(o)})" + return f"{str(type(o))}{size_str}" + + +def _compute_top_gc_collected_objects(objects: list[Any], top: int) -> str: + """ + Group collected objects by types. + """ + if top <= 0: + return "" + object_types = [_compute_detailed_type(o) for o in objects] + return "\n".join( + f"{count:>5}:{object_type}" + for object_type, count in Counter(object_types).most_common(top)) diff --git a/vllm/v1/engine/core.py b/vllm/v1/engine/core.py index a43042a5510a..3ee804f10c17 100644 --- a/vllm/v1/engine/core.py +++ b/vllm/v1/engine/core.py @@ -29,6 +29,7 @@ maybe_register_config_serialize_by_value) from vllm.utils import (decorate_logs, get_hash_fn_by_name, make_zmq_socket, resolve_obj_by_qualname, set_process_title) +from vllm.utils.gc_utils import maybe_attach_gc_debug_callback from vllm.v1.core.kv_cache_utils import (BlockHash, generate_scheduler_kv_cache_config, get_kv_cache_configs, @@ -532,6 +533,9 @@ def __init__( gc.collect() gc.freeze() + # If enable, attach GC debugger after static variable freeze. + maybe_attach_gc_debug_callback() + @contextmanager def _perform_handshakes( self, From da63274d9f3d06ba5815b5c8786a7194923a0234 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicol=C3=B2=20Lucchesi?= Date: Sat, 27 Sep 2025 21:17:35 +0200 Subject: [PATCH 460/518] [Bugfix][NIXL] Fix Async Scheduler timeout issue (#25808) Signed-off-by: NickLucche --- .../kv_connector/v1/nixl_connector.py | 31 +++++++++++++++++-- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py b/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py index c205501e6c98..5af2b33f029c 100644 --- a/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py +++ b/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py @@ -105,6 +105,7 @@ def __init__(self): self.reqs_to_recv: dict[ReqId, ReqMeta] = {} self.reqs_to_save: dict[ReqId, ReqMeta] = {} self.reqs_to_send: dict[ReqId, float] = {} + self.reqs_in_batch: set[ReqId] = set() def add_new_req( self, @@ -278,6 +279,7 @@ def __init__(self, vllm_config: VllmConfig, engine_id: str): self._reqs_need_save: dict[ReqId, tuple[Request, list[int]]] = {} # Reqs to send and their expiration time self._reqs_need_send: dict[ReqId, float] = {} + self._reqs_in_batch: set[ReqId] = set() def get_num_new_matched_tokens( self, request: "Request", @@ -324,6 +326,9 @@ def update_state_after_alloc(self, request: "Request", if not params: return + + if params.get("do_remote_decode"): + self._reqs_in_batch.add(request.request_id) if self.use_host_buffer and params.get("do_remote_decode"): # NOTE: when accelerator is not directly supported by Nixl, # prefilled blocks need to be saved to host memory before transfer. @@ -373,6 +378,8 @@ def build_connector_meta( request_id=req_id, local_block_ids=block_ids, kv_transfer_params=req.kv_transfer_params, + load_remote_cache=True, + save_to_host=False, ) for req_id, (req, block_ids) in self._reqs_need_save.items(): @@ -386,10 +393,12 @@ def build_connector_meta( ) meta.reqs_to_send = self._reqs_need_send + meta.reqs_in_batch = self._reqs_in_batch # Clear the list once workers start the transfers self._reqs_need_recv.clear() self._reqs_need_save.clear() + self._reqs_in_batch = set() self._reqs_need_send = {} return meta @@ -546,6 +555,8 @@ def __init__(self, vllm_config: VllmConfig, engine_id: str): self._recving_transfers = defaultdict[ReqId, list[Transfer]](list) # Track the expiration time of requests that are waiting to be sent. self._reqs_to_send: dict[ReqId, float] = {} + # Set of requests that have been part of a batch, regardless of status. + self._reqs_to_process: set[ReqId] = set() # Background thread for handling new handshake requests. self._nixl_handshake_listener_t: Optional[threading.Thread] = None @@ -1082,6 +1093,7 @@ def get_finished(self) -> tuple[set[str], set[str]]: "Releasing expired KV blocks for request %s which were " "retrieved by %d decode worker(s) within %d seconds.", req_id, count, envs.VLLM_NIXL_ABORT_REQUEST_TIMEOUT) + self._reqs_to_process.remove(req_id) del self._reqs_to_send[req_id] done_sending.add(req_id) @@ -1097,7 +1109,8 @@ def _get_new_notifs(self) -> set[str]: for notifs in self.nixl_wrapper.get_new_notifs().values(): for notif in notifs: req_id, tp_ratio = notif.decode("utf-8").rsplit(":", 1) - if req_id not in self._reqs_to_send: + if (req_id not in self._reqs_to_send + and req_id not in self._reqs_to_process): logger.error( "Potentially invalid KV blocks for " "unrecognized request %s were retrieved by " @@ -1110,7 +1123,8 @@ def _get_new_notifs(self) -> set[str]: tp_ratio): notified_req_ids.add(req_id) del self.consumer_notification_counts_by_req[req_id] - del self._reqs_to_send[req_id] + self._reqs_to_process.remove(req_id) + self._reqs_to_send.pop(req_id, None) return notified_req_ids def _pop_done_transfers( @@ -1171,8 +1185,19 @@ def start_load_kv(self, metadata: NixlConnectorMetadata): while not self._ready_requests.empty(): self._read_blocks_for_req(*self._ready_requests.get_nowait()) + # Keep around the requests that have been part of a batch. This is + # needed because async scheduling pushes the misalignment between the + # moment in which requests expiration is set (P side) and the moment in + # which blocks are read from D. As P can now more easily lag behind D + # while processing the next batch, we make sure to only set an + # expiration for requests that have not been read from D yet. + for req_id in metadata.reqs_in_batch: + self._reqs_to_process.add(req_id) + # Add to requests that are waiting to be read and track expiration. - self._reqs_to_send.update(metadata.reqs_to_send) + for req_id, expiration_time in metadata.reqs_to_send.items(): + if req_id in self._reqs_to_process: + self._reqs_to_send[req_id] = expiration_time def _read_blocks_for_req(self, req_id: str, meta: ReqMeta): logger.debug( From 69311446baa7c447708b591156c57d6996dda0a7 Mon Sep 17 00:00:00 2001 From: Roger Wang Date: Sat, 27 Sep 2025 19:17:58 -0700 Subject: [PATCH 461/518] [MM] Optimize memory profiling for scattered multimodal embeddings (#25810) Signed-off-by: Roger Wang --- vllm/v1/worker/gpu_model_runner.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index 2354e8222e7a..0960fe3a25fb 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -3429,6 +3429,23 @@ def profile_run(self) -> None: expected_num_items=max_mm_items_per_batch, ) + # NOTE: This happens when encoder cache needs to store + # the embeddings that encoder outputs are scattered onto. + # In this case we create dummy embeddings of size + # (encode_budget, hidden_size) and scatter encoder + # output into it. + encoder_output_shape = dummy_encoder_outputs[0].shape + if encoder_output_shape[0] < encoder_budget: + expanded_outputs = [] + for output in dummy_encoder_outputs: + expanded = output.new_zeros( + (encoder_budget, encoder_output_shape[-1])) + num_tokens = output.shape[0] + expanded[:num_tokens].copy_(output) + expanded_outputs.append(expanded) + + dummy_encoder_outputs = expanded_outputs + # Cache the dummy encoder outputs. self.encoder_cache["tmp"] = dict( enumerate(dummy_encoder_outputs)) From 61447540146602992116ae1068e45419e6bb1a5b Mon Sep 17 00:00:00 2001 From: Roger Wang Date: Sat, 27 Sep 2025 20:21:09 -0700 Subject: [PATCH 462/518] [Bugfix] Fix Qwen3-VL regression from #24982 (#25814) Signed-off-by: Roger Wang --- vllm/model_executor/models/qwen3_moe.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/vllm/model_executor/models/qwen3_moe.py b/vllm/model_executor/models/qwen3_moe.py index 45b9c656a4bb..61f1abad72b6 100644 --- a/vllm/model_executor/models/qwen3_moe.py +++ b/vllm/model_executor/models/qwen3_moe.py @@ -107,7 +107,7 @@ def __init__( ): super().__init__() - config = vllm_config.model_config.hf_config + config = vllm_config.model_config.hf_text_config parallel_config = vllm_config.parallel_config quant_config = vllm_config.quant_config @@ -293,7 +293,7 @@ class Qwen3MoeDecoderLayer(nn.Module): def __init__(self, vllm_config: VllmConfig, prefix: str = "") -> None: super().__init__() - config = vllm_config.model_config.hf_config + config = vllm_config.model_config.hf_text_config cache_config = vllm_config.cache_config quant_config = vllm_config.quant_config @@ -372,7 +372,7 @@ class Qwen3MoeModel(nn.Module): def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): super().__init__() - config = vllm_config.model_config.hf_config.get_text_config() + config = vllm_config.model_config.hf_text_config quant_config = vllm_config.quant_config parallel_config = vllm_config.parallel_config eplb_config = parallel_config.eplb_config @@ -586,7 +586,7 @@ class Qwen3MoeForCausalLM(nn.Module, SupportsPP, SupportsLoRA, def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): super().__init__() - config = vllm_config.model_config.hf_config + config = vllm_config.model_config.hf_text_config quant_config = vllm_config.quant_config self.config = config self.quant_config = quant_config From 0efd540dbc5405ada2f57f09d2a376aecad576dc Mon Sep 17 00:00:00 2001 From: Isotr0py Date: Sun, 28 Sep 2025 12:21:01 +0800 Subject: [PATCH 463/518] [VLM] Update Qwen3-VL max_num_video_tokens calculation for configurable video profiling (#25557) Signed-off-by: Isotr0py Signed-off-by: Roger Wang Co-authored-by: Roger Wang --- vllm/model_executor/models/qwen2_vl.py | 13 +++-- vllm/model_executor/models/qwen3_vl.py | 70 ++++++++++++++++++++++++-- 2 files changed, 74 insertions(+), 9 deletions(-) diff --git a/vllm/model_executor/models/qwen2_vl.py b/vllm/model_executor/models/qwen2_vl.py index 8192c3ce05dd..6ef01f333554 100644 --- a/vllm/model_executor/models/qwen2_vl.py +++ b/vllm/model_executor/models/qwen2_vl.py @@ -79,7 +79,7 @@ logger = init_logger(__name__) # For profile run -_MAX_FRAMES_PER_VIDEO = 32 +_MAX_FRAMES_PER_VIDEO = 14 # === Vision Inputs === # @@ -932,6 +932,7 @@ def get_num_image_tokens( _, num_image_tokens = self._get_vision_info( image_width=image_width, image_height=image_height, + num_frames=1, image_processor=image_processor, ) return num_image_tokens @@ -956,6 +957,7 @@ def get_image_size_with_most_features(self) -> ImageSize: max_image_size, _ = self._get_vision_info( image_width=9999999, image_height=9999999, + num_frames=1, image_processor=None, ) return max_image_size @@ -969,10 +971,12 @@ def get_max_image_tokens(self) -> int: image_processor=None, ) - def _get_max_video_frames(self, max_tokens: int) -> int: + def _get_max_video_frames(self, + max_tokens: int, + start_num_frames: int = 1) -> int: target_width, target_height = self.get_image_size_with_most_features() - num_frames = 0 + num_frames = start_num_frames while True: next_num_frames = num_frames + 1 @@ -994,12 +998,13 @@ def get_num_frames_with_most_features( self, seq_len: int, mm_counts: Mapping[str, int], + max_frames_per_video: int = _MAX_FRAMES_PER_VIDEO, ) -> int: max_videos = mm_counts.get("video", 0) max_total_frames = self._get_max_video_frames(seq_len) max_frames_per_video = min(max_total_frames // max(max_videos, 1), - _MAX_FRAMES_PER_VIDEO) + max_frames_per_video) return max(max_frames_per_video, 1) diff --git a/vllm/model_executor/models/qwen3_vl.py b/vllm/model_executor/models/qwen3_vl.py index 5d0b66f91ace..c8f91dd48969 100644 --- a/vllm/model_executor/models/qwen3_vl.py +++ b/vllm/model_executor/models/qwen3_vl.py @@ -33,11 +33,14 @@ import torch.nn.functional as F from transformers import BatchFeature from transformers.models.qwen2_vl import Qwen2VLImageProcessorFast -from transformers.models.qwen2_vl.image_processing_qwen2_vl import smart_resize +from transformers.models.qwen2_vl.image_processing_qwen2_vl import ( + smart_resize as image_smart_resize) from transformers.models.qwen3_vl import (Qwen3VLProcessor, Qwen3VLVideoProcessor) from transformers.models.qwen3_vl.configuration_qwen3_vl import ( Qwen3VLConfig, Qwen3VLVisionConfig) +from transformers.models.qwen3_vl.video_processing_qwen3_vl import ( + smart_resize as video_smart_resize) from transformers.video_utils import VideoMetadata from vllm.attention.layer import check_upstream_fa_availability @@ -85,6 +88,9 @@ logger = init_logger(__name__) +# Official recommended max pixels is 24576 * 32 * 32 +_MAX_FRAMES_PER_VIDEO = 24576 + class Qwen3_VisionPatchEmbed(nn.Module): @@ -593,11 +599,16 @@ def _get_vision_info( image_height: int, num_frames: int = 2, do_resize: bool = True, - image_processor: Optional[Qwen2VLImageProcessorFast], + image_processor: Optional[Union[Qwen2VLImageProcessorFast, + Qwen3VLVideoProcessor]], ) -> tuple[ImageSize, int]: - if image_processor is None: + if image_processor is None and num_frames > 1: + image_processor = self.get_video_processor() + elif image_processor is None: image_processor = self.get_image_processor() + is_video = isinstance(image_processor, Qwen3VLVideoProcessor) + hf_config = self.get_hf_config() vision_config = hf_config.vision_config patch_size = vision_config.patch_size @@ -605,12 +616,22 @@ def _get_vision_info( temporal_patch_size = vision_config.temporal_patch_size if do_resize: + if is_video: + smart_resize = video_smart_resize + extra_kwargs = { + "num_frames": num_frames, + "temporal_factor": temporal_patch_size + } + else: + smart_resize = image_smart_resize + extra_kwargs = {} resized_height, resized_width = smart_resize( height=image_height, width=image_width, factor=patch_size * merge_size, min_pixels=image_processor.size["shortest_edge"], max_pixels=image_processor.size["longest_edge"], + **extra_kwargs, ) preprocessed_size = ImageSize(width=resized_width, height=resized_height) @@ -629,6 +650,39 @@ def _get_vision_info( return preprocessed_size, num_vision_tokens + def _get_max_video_frames(self, + max_tokens: int, + start_num_frames: int = 2) -> int: + return super()._get_max_video_frames(max_tokens, + start_num_frames=start_num_frames) + + def get_num_frames_with_most_features( + self, + seq_len: int, + mm_counts: Mapping[str, int], + ) -> int: + return super().get_num_frames_with_most_features( + seq_len, mm_counts, max_frames_per_video=_MAX_FRAMES_PER_VIDEO) + + def get_max_video_tokens( + self, + seq_len: int, + mm_counts: Mapping[str, int], + ) -> int: + target_width, target_height = self.get_image_size_with_most_features() + video_soft_tokens = self.get_num_video_tokens( + image_width=target_width, + image_height=target_height, + num_frames=self.get_num_frames_with_most_features( + seq_len, mm_counts), + image_processor=None, + ) + + # NOTE: By default in Qwen3-VL, one video token is converted to + # "<{timestamp} seconds>" (on average 9.5 tokens) + vision_start_token + video_token + vision_end_token # noqa: E501 + formatted_video_soft_tokens = video_soft_tokens * 12.5 + return int(formatted_video_soft_tokens) + def _calculate_timestamps(self, indices: list[int] | torch.Tensor, video_fps: float, merge_size: int): if not isinstance(indices, list): @@ -698,6 +752,12 @@ def get_dummy_mm_data( self.info.get_image_size_with_most_features()) target_num_frames = self.info.get_num_frames_with_most_features( seq_len, mm_counts) + target_video_size, _ = self.info._get_vision_info( + image_width=target_width, + image_height=target_height, + num_frames=target_num_frames, + image_processor=self.info.get_video_processor(), + ) return { "image": self._get_dummy_images(width=target_width, @@ -705,8 +765,8 @@ def get_dummy_mm_data( num_images=num_images), "video": self._get_dummy_videos( - width=target_width, - height=target_height, + width=target_video_size.width, + height=target_video_size.height, num_frames=target_num_frames, num_videos=num_videos, ), From f4e4088c99020c9711e824096f013f89da54bb36 Mon Sep 17 00:00:00 2001 From: weiliang Date: Sun, 28 Sep 2025 16:23:44 +0800 Subject: [PATCH 464/518] Fix random dataset mismatched token length with config. (#24937) Signed-off-by: Weiliang Liu Signed-off-by: Roger Wang Co-authored-by: Roger Wang --- vllm/benchmarks/datasets.py | 145 +++++++++++++++++++++++++++++------- 1 file changed, 118 insertions(+), 27 deletions(-) diff --git a/vllm/benchmarks/datasets.py b/vllm/benchmarks/datasets.py index f0c0d829a393..807e543dc0cd 100644 --- a/vllm/benchmarks/datasets.py +++ b/vllm/benchmarks/datasets.py @@ -366,11 +366,67 @@ def process_video(video: Any) -> Mapping[str, Any]: f"Invalid video input {video}. Must be a string of local path/remote url, or a dictionary with raw video bytes in the form of `{{'bytes': raw_video_bytes}}`." # noqa: E501 ) + +def gen_prompt_decode_to_target_len( + tokenizer: PreTrainedTokenizerBase, + token_sequence: list[int], + target_token_len: int, + max_retry: int = 10, + add_special_tokens: bool = False, + rng: Optional[np.random.Generator] = None, +) -> tuple[str, list[int]]: + """ + Ensure decoded-then-encoded prompt length matches the target token length. + + This function decodes an initial token sequence to text and re-encodes it + , iteratively adjusting the token sequence length to match a target. + This is necessary because some tokenizers do not guarantee a 1:1 mapping + between consecutive tokens and the decoded-then-encoded sequence length. + For example, for GPT2Tokenizer: + [6880, 6881] -> ['Ġcalls', 'here'] -> + [1650, 939, 486] -> ['Ġcall', 'sh', 'ere'] + + Returns a tuple of the final prompt string and the adjusted token sequence. + """ + remain_num_try = max_retry + token_mismatch = 0 + while True: + prompt = tokenizer.decode(token_sequence) + token_sequence = tokenizer.encode( + prompt, add_special_tokens=add_special_tokens + ) + if remain_num_try <= 0: + if len(token_sequence) != target_token_len: + token_mismatch = len(token_sequence) - target_token_len + break + + if len(token_sequence) == target_token_len: + break + elif len(token_sequence) < target_token_len: + if rng is not None: + extra_tokens = rng.integers( + 0, + tokenizer.vocab_size, + size=target_token_len - len(token_sequence), + ).tolist() + else: + extra_tokens = np.random.randint( + 0, + tokenizer.vocab_size, + size=target_token_len - len(token_sequence), + ).tolist() + token_sequence.extend(extra_tokens) + elif len(token_sequence) > target_token_len: + token_sequence = token_sequence[:target_token_len] + + remain_num_try -= 1 + + return prompt, token_sequence, token_mismatch + # ----------------------------------------------------------------------------- # Random Dataset Implementation (Synthetic Data) # ----------------------------------------------------------------------------- - class RandomDataset(BenchmarkDataset): """ Synthetic text-only dataset for serving/throughput benchmarks. @@ -420,8 +476,9 @@ def sample( vocab_size = tokenizer.vocab_size requests = [] + token_mismatch_total = 0 for i in range(num_requests): - prompt, total_input_len = self.generate_token_sequence( + prompt, total_input_len, token_mismatch = self.generate_token_sequence( # noqa: E501 tokenizer=tokenizer, prefix_token_ids=prefix_token_ids, prefix_len=prefix_len, @@ -430,6 +487,7 @@ def sample( offset=int(offsets[i]), index=i, ) + token_mismatch_total += token_mismatch requests.append( SampleRequest( prompt=prompt, @@ -453,6 +511,18 @@ def sample( ) ) requests = batch_requests + + if token_mismatch_total != 0: + sign = "more" if token_mismatch_total > 0 else "fewer" + logger.warning( + "Across all generated prompts, there were %d %s tokens " + "than expected after decoding and re-encoding. This is " + "expected due to the imperfect nature of the sampling " + "procedure.", + abs(token_mismatch_total), + sign, + ) + return requests def get_prefix( @@ -530,7 +600,7 @@ def generate_token_sequence( input_len: int, offset: int, index: int, - ) -> tuple[str, int]: + ) -> tuple[str, int, int]: """ Returns (prompt, total_input_len). @@ -549,15 +619,16 @@ def generate_token_sequence( token_sequence = prefix_token_ids + inner_seq # Decode, then re-encode and truncate to preserve token count invariants - prompt = tokenizer.decode(token_sequence) total_input_len = prefix_len + int(input_len) - - re_encoded_sequence = tokenizer.encode( - prompt, add_special_tokens=False)[:total_input_len] - prompt = tokenizer.decode(re_encoded_sequence) - total_input_len = len(re_encoded_sequence) - - return prompt, total_input_len + prompt, adjusted_token_sequence, token_mismatch = gen_prompt_decode_to_target_len( # noqa: E501 + tokenizer=tokenizer, + token_sequence=token_sequence, + target_token_len=total_input_len, + add_special_tokens=False, + rng=self._rng, + ) + total_input_len = len(adjusted_token_sequence) + return prompt, total_input_len, token_mismatch # ----------------------------------------------------------------------------- @@ -873,8 +944,9 @@ def sample( vocab_size = tokenizer.vocab_size # Add synthetic multimodal items to each request mm_requests = [] + token_mismatch_total = 0 for i in range(num_requests): - prompt, total_input_len = self.generate_token_sequence( + prompt, total_input_len, token_mismatch = self.generate_token_sequence( # noqa: E501 tokenizer=tokenizer, prefix_token_ids=prefix_token_ids, prefix_len=prefix_len, @@ -883,6 +955,7 @@ def sample( offset=int(offsets[i]), index=i, ) + token_mismatch_total += token_mismatch # Get multimodal item iterator for a given request mm_item_iterator = self.get_mm_item_iterator( min_num_mm_items, @@ -918,6 +991,18 @@ def sample( request_id=request_id_prefix + str(i), ) mm_requests.append(sample_request) + + if token_mismatch_total != 0: + sign = "more" if token_mismatch_total > 0 else "fewer" + logger.warning( + "Across all generated prompts, there were %d %s tokens " + "than expected after decoding and re-encoding. This is " + "expected due to the imperfect nature of the sampling " + "procedure.", + abs(token_mismatch_total), + sign, + ) + return mm_requests # ----------------------------------------------------------------------------- @@ -2694,27 +2779,23 @@ def _generate_exact_length_tokens(target_length: int) -> list[int]: # Generate random tokens tokens = np.random.randint( 0, vocab_size, size=target_length).tolist() - text = tokenizer.decode(tokens) - re_encoded = tokenizer.encode(text, add_special_tokens=False) - - if len(re_encoded) == target_length: - return re_encoded - elif len(re_encoded) < target_length: - # Recursively generate additional consistent tokens - needed = target_length - len(re_encoded) - extra_tokens = _generate_exact_length_tokens(needed) - return re_encoded + extra_tokens - else: - # Truncate to target length - return re_encoded[:target_length] + + _, adjusted_tokens, token_mismatch = gen_prompt_decode_to_target_len( # noqa: E501 + tokenizer=tokenizer, + token_sequence=tokens, + target_token_len=target_length, + add_special_tokens=False, + ) + return adjusted_tokens, token_mismatch requests = [] + token_mismatch_total = 0 for _ in range(num_prefixes): prefix_tokens = _generate_exact_length_tokens(prefix_len) for _ in range(prompts_per_prefix): - suffix_tokens = _generate_exact_length_tokens(suffix_len) - + suffix_tokens, token_mistmatch = _generate_exact_length_tokens(suffix_len) # noqa: E501 + token_mismatch_total += token_mistmatch combined_tokens = prefix_tokens + suffix_tokens prompt = tokenizer.decode(combined_tokens) prompt_len = len(combined_tokens) @@ -2726,6 +2807,16 @@ def _generate_exact_length_tokens(target_length: int) -> list[int]: ) ) + if token_mismatch_total != 0: + sign = "more" if token_mismatch_total > 0 else "fewer" + logger.warning( + "Across all generated prompts, there were %d %s tokens " + "than expected after decoding and re-encoding. This is " + "expected due to the imperfect nature of the sampling " + "procedure.", + abs(token_mismatch_total), + sign, + ) random.shuffle(requests) return requests From b1ded114b976b0fa4445565d235b23e64a4dc737 Mon Sep 17 00:00:00 2001 From: Yuxuan Zhang <2448370773@qq.com> Date: Sun, 28 Sep 2025 20:05:51 +0800 Subject: [PATCH 465/518] Update GLM-4.5 Doc transformers version (#25830) Signed-off-by: zRzRzRzRzRzRzR <2448370773@qq.com> --- docs/features/tool_calling.md | 6 ++++-- docs/models/supported_models.md | 2 +- tests/models/registry.py | 2 +- vllm/model_executor/models/glm4_moe.py | 2 +- 4 files changed, 7 insertions(+), 5 deletions(-) diff --git a/docs/features/tool_calling.md b/docs/features/tool_calling.md index 16693db7255c..241438ae5578 100644 --- a/docs/features/tool_calling.md +++ b/docs/features/tool_calling.md @@ -323,8 +323,10 @@ Flags: `--tool-call-parser longcat` Supported models: -* `ZhipuAI/GLM-4.5` -* `ZhipuAI/GLM-4.5-Air` +* `zai-org/GLM-4.5` +* `zai-org/GLM-4.5-Air` +* `zai-org/GLM-4.6` +* `zai-org/GLM-4.6-Air` Flags: `--tool-call-parser glm45` diff --git a/docs/models/supported_models.md b/docs/models/supported_models.md index 650f62492282..3ee5a7d0ffc5 100644 --- a/docs/models/supported_models.md +++ b/docs/models/supported_models.md @@ -367,7 +367,7 @@ th { | `Gemma3nForCausalLM` | Gemma 3n | `google/gemma-3n-E2B-it`, `google/gemma-3n-E4B-it`, etc. | | | ✅︎ | | `GlmForCausalLM` | GLM-4 | `zai-org/glm-4-9b-chat-hf`, etc. | ✅︎ | ✅︎ | ✅︎ | | `Glm4ForCausalLM` | GLM-4-0414 | `zai-org/GLM-4-32B-0414`, etc. | ✅︎ | ✅︎ | ✅︎ | -| `Glm4MoeForCausalLM` | GLM-4.5 | `zai-org/GLM-4.5`, etc. | ✅︎ | ✅︎ | ✅︎ | +| `Glm4MoeForCausalLM` | GLM-4.5, GLM-4.6 | `zai-org/GLM-4.5`, etc. | ✅︎ | ✅︎ | ✅︎ | | `GPT2LMHeadModel` | GPT-2 | `gpt2`, `gpt2-xl`, etc. | | ✅︎ | ✅︎ | | `GPTBigCodeForCausalLM` | StarCoder, SantaCoder, WizardCoder | `bigcode/starcoder`, `bigcode/gpt_bigcode-santacoder`, `WizardLM/WizardCoder-15B-V1.0`, etc. | ✅︎ | ✅︎ | ✅︎ | | `GPTJForCausalLM` | GPT-J | `EleutherAI/gpt-j-6b`, `nomic-ai/gpt4all-j`, etc. | | ✅︎ | ✅︎ | diff --git a/tests/models/registry.py b/tests/models/registry.py index e321acc873c6..124a97ed0c89 100644 --- a/tests/models/registry.py +++ b/tests/models/registry.py @@ -642,7 +642,7 @@ def check_available_online( speculative_model="baidu/ERNIE-4.5-21B-A3B-PT"), "Glm4MoeMTPModel": _HfExamplesInfo("zai-org/GLM-4.5", speculative_model="zai-org/GLM-4.5", - min_transformers_version="4.54", + min_transformers_version="4.56", is_available_online=False), "LongCatFlashMTPModel": _HfExamplesInfo( "meituan-longcat/LongCat-Flash-Chat", diff --git a/vllm/model_executor/models/glm4_moe.py b/vllm/model_executor/models/glm4_moe.py index 947c6ce62f55..2557748b7faa 100644 --- a/vllm/model_executor/models/glm4_moe.py +++ b/vllm/model_executor/models/glm4_moe.py @@ -21,7 +21,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Inference-only GLM-4.5 model compatible with HuggingFace weights.""" +"""Inference-only GLM-4.5, GLM-4.6 model compatible with HuggingFace weights.""" import typing from collections.abc import Callable, Iterable from itertools import islice From 471997adf696f4f97b7b6bc7b52610e1eceac54c Mon Sep 17 00:00:00 2001 From: JJJYmmm <92386084+JJJYmmm@users.noreply.github.com> Date: Mon, 29 Sep 2025 01:56:12 +0800 Subject: [PATCH 466/518] [Bugfix] fix Qwen3VLMoe load when pp > 1 (#25838) Signed-off-by: liuye.hj Co-authored-by: liuye.hj --- vllm/model_executor/models/qwen3_vl_moe.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm/model_executor/models/qwen3_vl_moe.py b/vllm/model_executor/models/qwen3_vl_moe.py index 52ea652b3765..02cc5d6d66d1 100644 --- a/vllm/model_executor/models/qwen3_vl_moe.py +++ b/vllm/model_executor/models/qwen3_vl_moe.py @@ -212,6 +212,8 @@ def load_weights(self, weights: Iterable[tuple[str, # attempted to load as other weights later is_expert_weight = True name_mapped = name.replace(weight_name, param_name) + if is_pp_missing_parameter(name_mapped, self): + continue if is_fused_expert: loaded_weight = loaded_weight.transpose(-1, -2) # no bias @@ -230,8 +232,6 @@ def load_weights(self, weights: Iterable[tuple[str, name_mapped, params_dict, loaded_weight, shard_id, num_experts) else: - if is_pp_missing_parameter(name_mapped, self): - continue # Skip loading extra parameters for GPTQ/modelopt models if name_mapped.endswith( ignore_suffixes From 0307428d65acf5cf1a73a70a7722e076bbb83f22 Mon Sep 17 00:00:00 2001 From: Michael Goin Date: Sun, 28 Sep 2025 17:12:42 -0400 Subject: [PATCH 467/518] Remove redundant cudagraph dispatcher warning (#25841) --- vllm/v1/cudagraph_dispatcher.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/vllm/v1/cudagraph_dispatcher.py b/vllm/v1/cudagraph_dispatcher.py index 2dbe2bfb8082..29bb220760c0 100644 --- a/vllm/v1/cudagraph_dispatcher.py +++ b/vllm/v1/cudagraph_dispatcher.py @@ -4,9 +4,6 @@ from vllm.config import CUDAGraphMode, VllmConfig from vllm.forward_context import BatchDescriptor -from vllm.logger import init_logger - -logger = init_logger(__name__) class CudagraphDispatcher: @@ -102,8 +99,6 @@ def dispatch( """ # if not initialized, just skip dispatching. if not self.keys_initialized: - logger.warning_once("cudagraph dispatching keys are not " - "initialized. No cudagraph will be used.") return CUDAGraphMode.NONE, None non_uniform_key = batch_descriptor.non_uniform From a3ae45a38cb0ee920427e3f2db2a237afd557fd8 Mon Sep 17 00:00:00 2001 From: Juechen Liu Date: Sun, 28 Sep 2025 21:18:57 -0700 Subject: [PATCH 468/518] [Misc] fix tests failure by using current_platform (#25825) Signed-off-by: Juechen Liu --- vllm/attention/ops/triton_reshape_and_cache_flash.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/attention/ops/triton_reshape_and_cache_flash.py b/vllm/attention/ops/triton_reshape_and_cache_flash.py index cc9e1bb23b9b..0d82935bb418 100644 --- a/vllm/attention/ops/triton_reshape_and_cache_flash.py +++ b/vllm/attention/ops/triton_reshape_and_cache_flash.py @@ -137,7 +137,7 @@ def triton_reshape_and_cache_flash( # heuristics instead of autotuning TILE_SIZE = min(2048, triton.next_power_of_2(n)) - if torch.version.hip or torch.version.xpu: + if current_platform.is_rocm() or current_platform.is_xpu(): num_stages = 4 num_warps = 8 else: # cuda From 9b44a7d9265b78c7b72c20e0698ede17d62de5e8 Mon Sep 17 00:00:00 2001 From: Robert Shaw <114415538+robertgshaw2-redhat@users.noreply.github.com> Date: Mon, 29 Sep 2025 00:46:30 -0400 Subject: [PATCH 469/518] [P/D] NIXL Updates (#25844) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Sage Moore Signed-off-by: simon-mo Signed-off-by: rentianyue-jk Signed-off-by: Russell Bryant Signed-off-by: Isotr0py Signed-off-by: Chenheli Hua Signed-off-by: mgoin Signed-off-by: Tyler Michael Smith Signed-off-by: NickLucche Signed-off-by: Roger Wang Signed-off-by: Robert Shaw Co-authored-by: Sage Moore Co-authored-by: Russell Bryant Co-authored-by: rentianyue-jk Co-authored-by: Isotr0py Co-authored-by: Chenheli Hua Co-authored-by: Wentao Ye <44945378+yewentao256@users.noreply.github.com> Co-authored-by: Michael Goin Co-authored-by: Tyler Michael Smith Co-authored-by: Nicolò Lucchesi Co-authored-by: Roger Wang Co-authored-by: Robert Shaw --- .../kv_transfer/kv_connector/v1/nixl_connector.py | 7 +++++-- vllm/v1/core/sched/scheduler.py | 7 ++++++- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py b/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py index 5af2b33f029c..c11189d7ec10 100644 --- a/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py +++ b/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py @@ -474,8 +474,11 @@ def __init__(self, vllm_config: VllmConfig, engine_id: str): "backends", ["UCX"]) # Agent. non_ucx_backends = [b for b in self.nixl_backends if b != "UCX"] - config = nixl_agent_config(backends=self.nixl_backends) if len( - non_ucx_backends) > 0 and nixl_agent_config is not None else None + if nixl_agent_config is None: + config = None + else: + config = nixl_agent_config(backends=self.nixl_backends) if len( + non_ucx_backends) > 0 else nixl_agent_config(num_threads=8) self.nixl_wrapper = NixlWrapper(str(uuid.uuid4()), config) # Map of engine_id -> {rank0: agent_name0, rank1: agent_name1..}. diff --git a/vllm/v1/core/sched/scheduler.py b/vllm/v1/core/sched/scheduler.py index 10d8f6bbda5c..8d6ea887142d 100644 --- a/vllm/v1/core/sched/scheduler.py +++ b/vllm/v1/core/sched/scheduler.py @@ -1290,4 +1290,9 @@ def _update_from_kv_xfer_finished(self, self.finished_recving_kv_req_ids.add(req_id) for req_id in (kv_connector_output.finished_sending or ()): logger.debug("Finished sending KV transfer for request %s", req_id) - self._free_blocks(self.requests[req_id]) + if req_id not in self.requests: + logger.warning( + "Got finished sending KV transfer for request %s," + "but the request is already freed.", req_id) + else: + self._free_blocks(self.requests[req_id]) From 219cfbe7f6992120465c01c9c85a138be93204b2 Mon Sep 17 00:00:00 2001 From: Thomas Parnell Date: Mon, 29 Sep 2025 07:08:17 +0200 Subject: [PATCH 470/518] Add Phi4FlashForCausalLM to _PREVIOUSLY_SUPPORTED_MODELS (#25832) Signed-off-by: Thomas Parnell --- vllm/model_executor/models/registry.py | 1 + 1 file changed, 1 insertion(+) diff --git a/vllm/model_executor/models/registry.py b/vllm/model_executor/models/registry.py index 0471164ab8a6..a68012d8a8c9 100644 --- a/vllm/model_executor/models/registry.py +++ b/vllm/model_executor/models/registry.py @@ -333,6 +333,7 @@ _PREVIOUSLY_SUPPORTED_MODELS = { "Phi3SmallForCausalLM": "0.9.2", + "Phi4FlashForCausalLM": "0.10.2", # encoder-decoder models except whisper # have been removed for V0 deprecation. "BartModel": "0.10.2", From 143844fa43d4851831e89200c9a6069c929f8882 Mon Sep 17 00:00:00 2001 From: Kunshang Ji Date: Mon, 29 Sep 2025 13:15:10 +0800 Subject: [PATCH 471/518] [XPU]Fix xpu spec decoding UTs, avoid using cuda graph (#25847) Signed-off-by: Kunshang Ji --- .buildkite/scripts/hardware_ci/run-xpu-test.sh | 2 +- tests/utils.py | 2 ++ vllm/v1/spec_decode/eagle.py | 7 ++++--- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/.buildkite/scripts/hardware_ci/run-xpu-test.sh b/.buildkite/scripts/hardware_ci/run-xpu-test.sh index 6b9c0121c4aa..2fd7265fa536 100644 --- a/.buildkite/scripts/hardware_ci/run-xpu-test.sh +++ b/.buildkite/scripts/hardware_ci/run-xpu-test.sh @@ -42,7 +42,7 @@ docker run \ pytest -v -s v1/sample --ignore=v1/sample/test_logprobs.py --ignore=v1/sample/test_logprobs_e2e.py pytest -v -s v1/worker --ignore=v1/worker/test_gpu_model_runner.py pytest -v -s v1/structured_output - pytest -v -s v1/spec_decode --ignore=v1/spec_decode/test_max_len.py --ignore=v1/spec_decode/test_eagle.py --ignore=v1/spec_decode/test_tree_attention.py + pytest -v -s v1/spec_decode --ignore=v1/spec_decode/test_max_len.py --ignore=v1/spec_decode/test_tree_attention.py pytest -v -s v1/kv_connector/unit --ignore=v1/kv_connector/unit/test_multi_connector.py --ignore=v1/kv_connector/unit/test_nixl_connector.py --ignore=v1/kv_connector/unit/test_shared_storage_connector.py pytest -v -s v1/test_metrics pytest -v -s v1/test_serial_utils.py diff --git a/tests/utils.py b/tests/utils.py index ab6ccc7ad9f9..ffdc0f732543 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -1143,6 +1143,8 @@ def get_attn_backend_list_based_on_platform() -> list[str]: print("Skip FLASH_ATTN on ROCm as aiter is not installed") return attn_backend_list + elif current_platform.is_xpu(): + return ["FLASH_ATTN", "TRITON_ATTN"] else: raise ValueError("Unsupported platform") diff --git a/vllm/v1/spec_decode/eagle.py b/vllm/v1/spec_decode/eagle.py index 1b5bafb9ca1b..a2f7dbe5703f 100644 --- a/vllm/v1/spec_decode/eagle.py +++ b/vllm/v1/spec_decode/eagle.py @@ -72,12 +72,13 @@ def __init__( self.attn_metadata_builder: Optional[AttentionMetadataBuilder] = None - self.use_cuda_graph = (self.vllm_config.compilation_config.level + self.use_cuda_graph = (not current_platform.is_xpu() + and self.vllm_config.compilation_config.level == CompilationLevel.PIECEWISE and not self.vllm_config.model_config.enforce_eager) self.cudagraph_batch_sizes = list( - reversed( - self.vllm_config.compilation_config.cudagraph_capture_sizes)) + reversed(self.vllm_config.compilation_config. + cudagraph_capture_sizes)) if self.use_cuda_graph else [] # persistent buffers for cuda graph self.input_ids = torch.zeros(self.max_num_tokens, From 65ecb4f134a2a9b651c0ad070ad833cf6fe41b5b Mon Sep 17 00:00:00 2001 From: Roger Wang Date: Sun, 28 Sep 2025 23:03:51 -0700 Subject: [PATCH 472/518] [Bugfix] Fallback ViT attn backend to SDPA for blackwell (#25851) Signed-off-by: Roger Wang --- vllm/model_executor/models/qwen3_vl.py | 10 +--------- vllm/platforms/cuda.py | 6 ++++++ 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/vllm/model_executor/models/qwen3_vl.py b/vllm/model_executor/models/qwen3_vl.py index c8f91dd48969..6d2a6019ef6f 100644 --- a/vllm/model_executor/models/qwen3_vl.py +++ b/vllm/model_executor/models/qwen3_vl.py @@ -66,7 +66,7 @@ PromptReplacement, PromptUpdate, PromptUpdateDetails) from vllm.multimodal.profiling import BaseDummyInputsBuilder -from vllm.platforms import _Backend, current_platform +from vllm.platforms import _Backend from vllm.sequence import IntermediateTensors from vllm.transformers_utils.config import uses_mrope from vllm.utils import is_list_of @@ -336,14 +336,6 @@ def __init__( }: raise RuntimeError( f"Qwen3-VL does not support {self.attn_backend} backend now.") - if current_platform.is_device_capability( - 100) and self.attn_backend != _Backend.TORCH_SDPA: - # TODO(Roger/Wentao): remove this after FA - # or XFORMERS's issue fixed on Blackwell - logger.info_once("Qwen3-VL vision attention does not support " - f"{self.attn_backend} backend on Blackwell now. " - "Vision attention backend is set to TORCH_SDPA.") - self.attn_backend = _Backend.TORCH_SDPA self.blocks = nn.ModuleList([ Qwen3_VisionBlock( diff --git a/vllm/platforms/cuda.py b/vllm/platforms/cuda.py index 58ba08101bc9..8b9f9f569206 100644 --- a/vllm/platforms/cuda.py +++ b/vllm/platforms/cuda.py @@ -205,6 +205,12 @@ def get_current_memory_usage(cls, @classmethod def get_vit_attn_backend(cls, head_size: int, dtype: torch.dtype) -> _Backend: + + # For Blackwell GPUs, force TORCH_SDPA for now. + # See https://github.com/facebookresearch/xformers/issues/1317#issuecomment-3199392579 # noqa: E501 + if cls.has_device_capability(100): + return _Backend.TORCH_SDPA + if dtype not in (torch.float16, torch.bfloat16): return _Backend.XFORMERS From bd51f78e39e0ea80b1d3bfbb17d24f40b1f62f6c Mon Sep 17 00:00:00 2001 From: Isotr0py Date: Mon, 29 Sep 2025 14:09:18 +0800 Subject: [PATCH 473/518] [V0 Deprecation][Models] Remove all V0 condition for mm embeddings merge (#25331) Signed-off-by: Isotr0py Signed-off-by: isotr0py <2037008807@qq.com> --- vllm/model_executor/models/aya_vision.py | 11 --- vllm/model_executor/models/blip2.py | 11 --- vllm/model_executor/models/chameleon.py | 12 --- vllm/model_executor/models/cohere2_vision.py | 11 --- vllm/model_executor/models/deepseek_vl2.py | 11 --- vllm/model_executor/models/fuyu.py | 11 --- vllm/model_executor/models/gemma3_mm.py | 19 ---- vllm/model_executor/models/glm4_1v.py | 50 +--------- vllm/model_executor/models/glm4v.py | 17 +--- vllm/model_executor/models/granite_speech.py | 11 --- .../models/hyperclovax_vision.py | 15 +-- vllm/model_executor/models/idefics3.py | 11 --- vllm/model_executor/models/interns1.py | 18 +--- vllm/model_executor/models/internvl.py | 18 +--- vllm/model_executor/models/kimi_vl.py | 16 ---- vllm/model_executor/models/llava.py | 11 --- vllm/model_executor/models/llava_next.py | 11 --- .../model_executor/models/llava_next_video.py | 11 --- vllm/model_executor/models/llava_onevision.py | 46 +-------- vllm/model_executor/models/minicpmv.py | 15 +-- vllm/model_executor/models/mistral3.py | 11 --- vllm/model_executor/models/mllama4.py | 11 --- vllm/model_executor/models/molmo.py | 11 --- .../model_executor/models/nano_nemotron_vl.py | 18 +--- vllm/model_executor/models/nemotron_vl.py | 11 --- vllm/model_executor/models/ovis.py | 11 --- vllm/model_executor/models/ovis2_5.py | 12 --- vllm/model_executor/models/paligemma.py | 11 --- vllm/model_executor/models/phi3v.py | 11 --- vllm/model_executor/models/phi4_multimodal.py | 16 ---- vllm/model_executor/models/phi4mm.py | 48 +--------- vllm/model_executor/models/pixtral.py | 11 --- .../models/qwen2_5_omni_thinker.py | 31 +------ vllm/model_executor/models/qwen2_5_vl.py | 58 +----------- vllm/model_executor/models/qwen2_audio.py | 11 --- vllm/model_executor/models/qwen2_vl.py | 50 +--------- vllm/model_executor/models/qwen3_vl.py | 93 +------------------ vllm/model_executor/models/qwen_vl.py | 12 --- vllm/model_executor/models/skyworkr1v.py | 11 --- vllm/model_executor/models/transformers.py | 13 --- vllm/model_executor/models/ultravox.py | 12 --- vllm/model_executor/models/voxtral.py | 13 --- 42 files changed, 13 insertions(+), 809 deletions(-) diff --git a/vllm/model_executor/models/aya_vision.py b/vllm/model_executor/models/aya_vision.py index eab996e9ba22..f6dfa435ddd4 100644 --- a/vllm/model_executor/models/aya_vision.py +++ b/vllm/model_executor/models/aya_vision.py @@ -427,17 +427,6 @@ def forward( if intermediate_tensors is not None: inputs_embeds = None - # NOTE: In v1, inputs_embeds is always generated at model runner, this - # condition is for v0 compatibility. - elif inputs_embeds is None: - vision_embeddings = self.get_multimodal_embeddings(**kwargs) - inputs_embeds = self.get_input_embeddings( - input_ids, - vision_embeddings, - is_multimodal=input_ids == self.config.image_token_index, - ) - input_ids = None - hidden_states = self.language_model.model( input_ids=input_ids, positions=positions, diff --git a/vllm/model_executor/models/blip2.py b/vllm/model_executor/models/blip2.py index 4d1850d07b28..334743a7358c 100644 --- a/vllm/model_executor/models/blip2.py +++ b/vllm/model_executor/models/blip2.py @@ -672,17 +672,6 @@ def forward( if intermediate_tensors is not None: inputs_embeds = None - # NOTE: In v1, inputs_embeds is always generated at model runner, this - # condition is for v0 compatibility. - elif inputs_embeds is None: - vision_embeddings = self.get_multimodal_embeddings(**kwargs) - inputs_embeds = self.get_input_embeddings( - input_ids, - vision_embeddings, - is_multimodal=input_ids == _IMAGE_TOKEN_ID, - ) - input_ids = None - hidden_states = self.language_model.model(input_ids, positions, intermediate_tensors, diff --git a/vllm/model_executor/models/chameleon.py b/vllm/model_executor/models/chameleon.py index f9740adb151b..86dbf63fa5df 100644 --- a/vllm/model_executor/models/chameleon.py +++ b/vllm/model_executor/models/chameleon.py @@ -1014,18 +1014,6 @@ def forward( if intermediate_tensors is not None: inputs_embeds = None - # NOTE: In v1, inputs_embeds is always generated at model runner, this - # condition is for v0 compatibility. - elif inputs_embeds is None: - vision_embeddings = self.get_multimodal_embeddings(**kwargs) - image_token_id = self.model.vocabulary_mapping.image_token_id - inputs_embeds = self.get_input_embeddings( - input_ids, - vision_embeddings, - is_multimodal=input_ids == image_token_id, - ) - input_ids = None - hidden_states = self.model(input_ids, positions, intermediate_tensors, diff --git a/vllm/model_executor/models/cohere2_vision.py b/vllm/model_executor/models/cohere2_vision.py index 99edcba4d874..7162571c08d9 100644 --- a/vllm/model_executor/models/cohere2_vision.py +++ b/vllm/model_executor/models/cohere2_vision.py @@ -440,17 +440,6 @@ def forward( if intermediate_tensors is not None: inputs_embeds = None - # NOTE: In v1, inputs_embeds is always generated at model runner, this - # condition is for v0 compatibility. - elif inputs_embeds is None: - vision_embeddings = self.get_multimodal_embeddings(**kwargs) - inputs_embeds = self.get_input_embeddings( - input_ids, - vision_embeddings, - is_multimodal=input_ids == self.config.image_token_id, - ) - input_ids = None - hidden_states = self.language_model.model( input_ids=input_ids, positions=positions, diff --git a/vllm/model_executor/models/deepseek_vl2.py b/vllm/model_executor/models/deepseek_vl2.py index b98008c83bdc..0f87fb34bf32 100644 --- a/vllm/model_executor/models/deepseek_vl2.py +++ b/vllm/model_executor/models/deepseek_vl2.py @@ -614,17 +614,6 @@ def forward(self, if intermediate_tensors is not None: inputs_embeds = None - # NOTE: In v1, inputs_embeds is always generated at model runner, this - # condition is for v0 compatibility - elif inputs_embeds is None: - vision_embeddings = self.get_multimodal_embeddings(**kwargs) - inputs_embeds = self.get_input_embeddings( - input_ids, - vision_embeddings, - is_multimodal=input_ids == self.image_token_id, - ) - input_ids = None - hidden_states = self.language_model(input_ids, positions, intermediate_tensors, diff --git a/vllm/model_executor/models/fuyu.py b/vllm/model_executor/models/fuyu.py index b99fe33a1dcc..9e491c0b50d2 100644 --- a/vllm/model_executor/models/fuyu.py +++ b/vllm/model_executor/models/fuyu.py @@ -352,17 +352,6 @@ def forward( if intermediate_tensors is not None: inputs_embeds = None - # NOTE: In v1, inputs_embeds is always generated at model runner, this - # condition is for v0 compatibility. - elif inputs_embeds is None: - vision_embeddings = self.get_multimodal_embeddings(**kwargs) - inputs_embeds = self.get_input_embeddings( - input_ids, - vision_embeddings, - is_multimodal=input_ids == _IMAGE_TOKEN_ID, - ) - input_ids = None - hidden_states = self.language_model( input_ids=input_ids, positions=positions, diff --git a/vllm/model_executor/models/gemma3_mm.py b/vllm/model_executor/models/gemma3_mm.py index be75e36fe23b..36f8651371ba 100644 --- a/vllm/model_executor/models/gemma3_mm.py +++ b/vllm/model_executor/models/gemma3_mm.py @@ -596,25 +596,6 @@ def forward(self, if intermediate_tensors is not None: inputs_embeds = None - # NOTE: In v1, inputs_embeds is always generated at model runner, this - # condition is for v0 compatibility. - elif inputs_embeds is None: - vision_embeddings = self.get_multimodal_embeddings(**kwargs) - - inputs_embeds = self.get_input_embeddings( - input_ids, - vision_embeddings, - is_multimodal=input_ids == self.config.image_token_index, - ) - if (vision_embeddings is not None) and len(vision_embeddings) != 0: - kwargs = self.prepare_attn_masks( - input_ids, - positions, - mask_dtype=self.dtype, - **kwargs, - ) - input_ids = None - hidden_states = self.language_model.model(input_ids, positions, intermediate_tensors, diff --git a/vllm/model_executor/models/glm4_1v.py b/vllm/model_executor/models/glm4_1v.py index dbb5431ae491..722f1e428be7 100644 --- a/vllm/model_executor/models/glm4_1v.py +++ b/vllm/model_executor/models/glm4_1v.py @@ -71,7 +71,6 @@ from vllm.multimodal.profiling import BaseDummyInputsBuilder from vllm.platforms import _Backend from vllm.sequence import IntermediateTensors -from vllm.transformers_utils.config import uses_mrope from vllm.utils.tensor_schema import TensorSchema, TensorShape from ..layers.activation import SiluAndMul @@ -80,8 +79,7 @@ from .qwen2_vl import (_create_qwen2vl_field_factory, apply_rotary_pos_emb_vision) from .utils import (AutoWeightsLoader, WeightsMapper, - init_vllm_registered_model, maybe_prefix, - merge_multimodal_embeddings) + init_vllm_registered_model, maybe_prefix) from .vision import get_vit_attn_backend, run_dp_sharded_mrope_vision_model logger = init_logger(__name__) @@ -1552,32 +1550,6 @@ def get_multimodal_embeddings( multimodal_embeddings += video_embeddings return multimodal_embeddings - def get_input_embeddings_v0( - self, - input_ids: torch.Tensor, - image_input: Optional[Glm4vImageInputs] = None, - video_input: Optional[Glm4vVideoInputs] = None, - ) -> torch.Tensor: - inputs_embeds = self.get_input_embeddings(input_ids) - if image_input is not None: - image_embeds = self._process_image_input(image_input) - inputs_embeds = merge_multimodal_embeddings( - input_ids, - inputs_embeds, - image_embeds, - placeholder_token_id=self.config.image_token_id, - ) - - if video_input is not None: - video_embeds = self._process_video_input(video_input) - inputs_embeds = merge_multimodal_embeddings( - input_ids, - inputs_embeds, - video_embeds, - placeholder_token_id=self.config.video_token_id, - ) - return inputs_embeds - def forward( self, input_ids: torch.Tensor, @@ -1604,26 +1576,6 @@ def forward( if intermediate_tensors is not None: inputs_embeds = None - # NOTE: In v1, inputs_embeds is always generated at model runner from - # `get_multimodal_embeddings` and `get_input_embeddings`, this - # condition is only for v0 compatibility. - elif inputs_embeds is None: - image_input = self._parse_and_validate_image_input(**kwargs) - video_input = self._parse_and_validate_video_input(**kwargs) - - if image_input is None and video_input is None: - inputs_embeds = None - else: - if uses_mrope(self.config): - assert positions.ndim == 2 and positions.size(0) == 3, ( - "multimodal section rotary embedding requires " - f"(3, seq_len) positions, but got {positions.size()}") - inputs_embeds = self.get_input_embeddings_v0( - input_ids, - image_input=image_input, - video_input=video_input) - input_ids = None - hidden_states = self.language_model.model( input_ids=input_ids, positions=positions, diff --git a/vllm/model_executor/models/glm4v.py b/vllm/model_executor/models/glm4v.py index ace9c05daf15..22ddb1d75160 100644 --- a/vllm/model_executor/models/glm4v.py +++ b/vllm/model_executor/models/glm4v.py @@ -43,7 +43,7 @@ from .chatglm import ChatGLMBaseModel, ChatGLMModel from .interfaces import (MultiModalEmbeddings, SupportsLoRA, SupportsMultiModal, SupportsPP) -from .utils import flatten_bn, isin_list +from .utils import flatten_bn class GLMVImagePixelInputs(TensorSchema): @@ -618,21 +618,6 @@ def forward( if intermediate_tensors is not None: inputs_embeds = None - # NOTE: In v1, inputs_embeds is always generated at model runner, this - # condition is for v0 compatibility. - elif inputs_embeds is None: - vision_embeddings = self.get_multimodal_embeddings(**kwargs) - inputs_embeds = self.get_input_embeddings( - input_ids, - vision_embeddings, - is_multimodal=isin_list(input_ids, [ - self.config.boi_token_id, - self.config.pad_token_id, - self.config.eoi_token_id, - ]), - ) - input_ids = None - hidden_states = self.transformer(input_ids, positions, intermediate_tensors, inputs_embeds) diff --git a/vllm/model_executor/models/granite_speech.py b/vllm/model_executor/models/granite_speech.py index 8a02da58ea0b..0ec451356f5e 100644 --- a/vllm/model_executor/models/granite_speech.py +++ b/vllm/model_executor/models/granite_speech.py @@ -765,17 +765,6 @@ def forward( if intermediate_tensors is not None: inputs_embeds = None - # NOTE: In v1, inputs_embeds is always generated at model runner, this - # condition is for v0 compatibility. - elif inputs_embeds is None: - audio_embeds = self.get_multimodal_embeddings(**kwargs) - inputs_embeds = self.get_input_embeddings( - input_ids, - audio_embeds, - is_multimodal=input_ids == self.config.audio_token_index, - ) - input_ids = None - model_output = self.language_model(input_ids, positions, intermediate_tensors, inputs_embeds) return model_output diff --git a/vllm/model_executor/models/hyperclovax_vision.py b/vllm/model_executor/models/hyperclovax_vision.py index f851688bf7ba..b0f9d5e2657e 100644 --- a/vllm/model_executor/models/hyperclovax_vision.py +++ b/vllm/model_executor/models/hyperclovax_vision.py @@ -45,8 +45,7 @@ from .clip import CLIPVisionModel from .interfaces import MultiModalEmbeddings, SupportsMultiModal, SupportsPP from .siglip import SiglipVisionModel -from .utils import (AutoWeightsLoader, init_vllm_registered_model, isin_list, - maybe_prefix) +from .utils import AutoWeightsLoader, init_vllm_registered_model, maybe_prefix from .vision import get_vision_encoder_info EOT = "<|endofturn|>" @@ -747,18 +746,6 @@ def forward( if intermediate_tensors is not None: inputs_embeds = None - # NOTE: In v1, inputs_embeds is always generated at model runner, this - # condition is for v0 compatibility. - elif inputs_embeds is None: - multimodal_embeddings = self.get_multimodal_embeddings(**kwargs) - inputs_embeds = self.get_input_embeddings( - input_ids, - multimodal_embeddings, - is_multimodal=isin_list( - input_ids, - [self.config.image_token_id, self.config.video_token_id]), - ) - input_ids = None hidden_states = self.language_model.model(input_ids, positions, intermediate_tensors, diff --git a/vllm/model_executor/models/idefics3.py b/vllm/model_executor/models/idefics3.py index 3334ee224253..dddf1c6fb626 100644 --- a/vllm/model_executor/models/idefics3.py +++ b/vllm/model_executor/models/idefics3.py @@ -702,17 +702,6 @@ def forward( if intermediate_tensors is not None: inputs_embeds = None - # NOTE: In v1, inputs_embeds is always generated at model runner, this - # condition is for v0 compatibility. - elif inputs_embeds is None: - vision_embeddings = self.get_multimodal_embeddings(**kwargs) - inputs_embeds = self.get_input_embeddings( - input_ids, - vision_embeddings, - is_multimodal=input_ids == self.config.image_token_id, - ) - input_ids = None - hidden_states = self.model.text_model(input_ids, positions, intermediate_tensors, diff --git a/vllm/model_executor/models/interns1.py b/vllm/model_executor/models/interns1.py index 545dad1a96f5..0292845f819c 100644 --- a/vllm/model_executor/models/interns1.py +++ b/vllm/model_executor/models/interns1.py @@ -40,7 +40,7 @@ from .interfaces import (MultiModalEmbeddings, SupportsLoRA, SupportsMultiModal, SupportsPP) from .utils import (AutoWeightsLoader, WeightsMapper, flatten_bn, - init_vllm_registered_model, isin_list, maybe_prefix) + init_vllm_registered_model, maybe_prefix) class InternS1MultiModalProjector(nn.Module): @@ -798,22 +798,6 @@ def forward( input_ids = None inputs_embeds = None - # NOTE: In v1, inputs_embeds is always generated at model runner, this - # condition is for v0 compatibility. - elif inputs_embeds is None: - context_token_ids = [ - token_id for token_id in (self.img_context_token_id, - self.video_context_token_id) - if token_id is not None - ] - vision_embeddings = self.get_multimodal_embeddings(**kwargs) - inputs_embeds = self.get_input_embeddings( - input_ids, - vision_embeddings, - is_multimodal=isin_list(input_ids, context_token_ids), - ) - input_ids = None - forward_kwargs = { "input_ids": input_ids, "positions": positions, diff --git a/vllm/model_executor/models/internvl.py b/vllm/model_executor/models/internvl.py index 78aac8541434..0c95c49f90b1 100644 --- a/vllm/model_executor/models/internvl.py +++ b/vllm/model_executor/models/internvl.py @@ -43,7 +43,7 @@ from .interfaces import (MultiModalEmbeddings, SupportsLoRA, SupportsMultiModal, SupportsPP) from .utils import (AutoWeightsLoader, flatten_bn, init_vllm_registered_model, - isin_list, maybe_prefix) + maybe_prefix) IMG_START = '' IMG_END = '' @@ -1371,22 +1371,6 @@ def forward( input_ids = None inputs_embeds = None - # NOTE: In v1, inputs_embeds is always generated at model runner, this - # condition is for v0 compatibility. - elif inputs_embeds is None: - context_token_ids = [ - token_id for token_id in (self.img_context_token_id, - self.video_context_token_id) - if token_id is not None - ] - vision_embeddings = self.get_multimodal_embeddings(**kwargs) - inputs_embeds = self.get_input_embeddings( - input_ids, - vision_embeddings, - is_multimodal=isin_list(input_ids, context_token_ids), - ) - input_ids = None - forward_kwargs = { "input_ids": input_ids, "positions": positions, diff --git a/vllm/model_executor/models/kimi_vl.py b/vllm/model_executor/models/kimi_vl.py index db032736f914..30ec9029f74f 100644 --- a/vllm/model_executor/models/kimi_vl.py +++ b/vllm/model_executor/models/kimi_vl.py @@ -433,22 +433,6 @@ def forward( ) -> IntermediateTensors: if intermediate_tensors is not None: inputs_embeds = None - # NOTE: In v1, inputs_embeds is always generated at model runner from - # `get_multimodal_embeddings` and `get_input_embeddings`, this - # condition is only for v0 compatibility. - elif inputs_embeds is None: - image_input = self._parse_and_validate_image_input(**kwargs) - if image_input is None: - inputs_embeds = None - else: - image_embeds = self._process_image_input(image_input) - inputs_embeds = self.get_input_embeddings( - input_ids, - image_embeds, - is_multimodal=input_ids == - self.config.media_placeholder_token_id, - ) - input_ids = None hidden_states = self.language_model( input_ids=input_ids, diff --git a/vllm/model_executor/models/llava.py b/vllm/model_executor/models/llava.py index 6f3cfd88aee2..46cf93be191e 100644 --- a/vllm/model_executor/models/llava.py +++ b/vllm/model_executor/models/llava.py @@ -723,17 +723,6 @@ def forward( if intermediate_tensors is not None: inputs_embeds = None - # NOTE: In v1, inputs_embeds is always generated at model runner, this - # condition is for v0 compatibility. - elif inputs_embeds is None: - vision_embeddings = self.get_multimodal_embeddings(**kwargs) - inputs_embeds = self.get_input_embeddings( - input_ids, - vision_embeddings, - is_multimodal=input_ids == self.config.image_token_index, - ) - input_ids = None - hidden_states = self.language_model.model(input_ids, positions, intermediate_tensors, diff --git a/vllm/model_executor/models/llava_next.py b/vllm/model_executor/models/llava_next.py index e132389c4f06..c4f1daaab9bf 100644 --- a/vllm/model_executor/models/llava_next.py +++ b/vllm/model_executor/models/llava_next.py @@ -547,17 +547,6 @@ def forward( if intermediate_tensors is not None: inputs_embeds = None - # NOTE: In v1, inputs_embeds is always generated at model runner, this - # condition is for v0 compatibility. - elif inputs_embeds is None: - vision_embeddings = self.get_multimodal_embeddings(**kwargs) - inputs_embeds = self.get_input_embeddings( - input_ids, - vision_embeddings, - is_multimodal=input_ids == self.config.image_token_index, - ) - input_ids = None - hidden_states = self.language_model.model(input_ids, positions, intermediate_tensors, diff --git a/vllm/model_executor/models/llava_next_video.py b/vllm/model_executor/models/llava_next_video.py index 2642d8c77cf3..aebc661d53f8 100644 --- a/vllm/model_executor/models/llava_next_video.py +++ b/vllm/model_executor/models/llava_next_video.py @@ -431,17 +431,6 @@ def forward( if intermediate_tensors is not None: inputs_embeds = None - # NOTE: In v1, inputs_embeds is always generated at model runner, this - # condition is for v0 compatibility. - elif inputs_embeds is None: - vision_embeddings = self.get_multimodal_embeddings(**kwargs) - inputs_embeds = self.get_input_embeddings( - input_ids, - vision_embeddings, - is_multimodal=input_ids == self.config.video_token_index, - ) - input_ids = None - hidden_states = self.language_model.model(input_ids, positions, intermediate_tensors, diff --git a/vllm/model_executor/models/llava_onevision.py b/vllm/model_executor/models/llava_onevision.py index 906858f4e2f4..6088195c91d5 100644 --- a/vllm/model_executor/models/llava_onevision.py +++ b/vllm/model_executor/models/llava_onevision.py @@ -30,8 +30,7 @@ LlavaNextProcessingInfo) from .siglip import SiglipVisionModel from .utils import (AutoWeightsLoader, WeightsMapper, flatten_bn, - init_vllm_registered_model, maybe_prefix, - merge_multimodal_embeddings) + init_vllm_registered_model, maybe_prefix) # For profile run _MAX_FRAMES_PER_VIDEO = 16 @@ -850,33 +849,6 @@ def get_multimodal_embeddings(self, return multimodal_embeddings - def get_input_embeddings_v0( - self, - input_ids: torch.Tensor, - image_input: Optional[LlavaOnevisionImagePixelInputs] = None, - video_input: Optional[LlavaOnevisionVideoPixelInputs] = None, - ) -> torch.Tensor: - inputs_embeds = self.get_input_embeddings(input_ids) - if image_input is not None: - image_embeds = self._process_image_input(image_input) - inputs_embeds = merge_multimodal_embeddings( - input_ids, - inputs_embeds, - image_embeds, - placeholder_token_id=self.config.image_token_index, - ) - - if video_input is not None: - video_embeds = self._process_video_pixels(video_input) - inputs_embeds = merge_multimodal_embeddings( - input_ids, - inputs_embeds, - video_embeds, - placeholder_token_id=self.config.video_token_index, - ) - - return inputs_embeds - def forward( self, input_ids: torch.Tensor, @@ -894,22 +866,6 @@ def forward( if intermediate_tensors is not None: inputs_embeds = None - # NOTE: In v1, inputs_embeds is always generated at model runner from - # `get_multimodal_embeddings` and `get_input_embeddings`, this - # condition is only for v0 compatibility. - elif inputs_embeds is None: - image_input = self._parse_and_validate_image_input(**kwargs) - video_input = self._parse_and_validate_video_input(**kwargs) - - if image_input is None and video_input is None: - inputs_embeds = None - else: - inputs_embeds = self.get_input_embeddings_v0( - input_ids, - image_input=image_input, - video_input=video_input) - input_ids = None - hidden_states = self.language_model.model(input_ids, positions, intermediate_tensors, diff --git a/vllm/model_executor/models/minicpmv.py b/vllm/model_executor/models/minicpmv.py index bffc9a0c125e..eaa3839af37b 100644 --- a/vllm/model_executor/models/minicpmv.py +++ b/vllm/model_executor/models/minicpmv.py @@ -71,7 +71,7 @@ from .idefics2_vision_model import Idefics2VisionTransformer from .interfaces import (MultiModalEmbeddings, SupportsLoRA, SupportsMultiModal, SupportsPP) -from .utils import AutoWeightsLoader, flatten_bn, isin_list, maybe_prefix +from .utils import AutoWeightsLoader, flatten_bn, maybe_prefix # For profile run _MAX_FRAMES_PER_VIDEO = 16 @@ -1154,19 +1154,6 @@ def forward( if intermediate_tensors is not None: inputs_embeds = None - # NOTE: In v1, inputs_embeds is always generated at model runner from - # `get_multimodal_embeddings` and `get_input_embeddings`, this - # condition is only for v0 compatibility. - elif inputs_embeds is None: - vision_embeddings = self.get_multimodal_embeddings(**kwargs) - - inputs_embeds = self.get_input_embeddings( - input_ids, - vision_embeddings, - is_multimodal=isin_list(input_ids, list(self.mm_token_ids)), - ) - input_ids = None - hidden_states = self.llm.model( input_ids=input_ids, positions=positions, diff --git a/vllm/model_executor/models/mistral3.py b/vllm/model_executor/models/mistral3.py index 31571ce962d1..e932f7f007f5 100644 --- a/vllm/model_executor/models/mistral3.py +++ b/vllm/model_executor/models/mistral3.py @@ -571,17 +571,6 @@ def forward( if intermediate_tensors is not None: inputs_embeds = None - # NOTE: In v1, inputs_embeds is always generated at model runner, this - # condition is for v0 compatibility. - elif inputs_embeds is None: - vision_embeddings = self.get_multimodal_embeddings(**kwargs) - inputs_embeds = self.get_input_embeddings( - input_ids, - vision_embeddings, - is_multimodal=input_ids == self.config.image_token_index, - ) - input_ids = None - hidden_states = self.language_model.model(input_ids, positions, intermediate_tensors, diff --git a/vllm/model_executor/models/mllama4.py b/vllm/model_executor/models/mllama4.py index 3af5267928cd..db5a9fbc6a33 100644 --- a/vllm/model_executor/models/mllama4.py +++ b/vllm/model_executor/models/mllama4.py @@ -823,17 +823,6 @@ def forward( if intermediate_tensors is not None: inputs_embeds = None - # NOTE: In v1, inputs_embeds is always generated at model runner, - # this condition is for v0 compatibility. - elif inputs_embeds is None: - vision_embeddings = self.get_multimodal_embeddings(**kwargs) - inputs_embeds = self.get_input_embeddings( - input_ids, - vision_embeddings, - is_multimodal=input_ids == self.config.image_token_index, - ) - input_ids = None - return self.language_model(input_ids, positions, intermediate_tensors, inputs_embeds) diff --git a/vllm/model_executor/models/molmo.py b/vllm/model_executor/models/molmo.py index 054caee9e8a4..0227a83a1f55 100644 --- a/vllm/model_executor/models/molmo.py +++ b/vllm/model_executor/models/molmo.py @@ -1490,17 +1490,6 @@ def forward( if intermediate_tensors is not None: inputs_embeds = None - # NOTE: In v1, inputs_embeds is always generated at model runner, this - # condition is for v0 compatibility. - elif inputs_embeds is None: - vision_embeddings = self.get_multimodal_embeddings(**kwargs) - inputs_embeds = self.get_input_embeddings( - input_ids, - vision_embeddings, - is_multimodal=input_ids == self.img_patch_id, - ) - input_ids = None - hidden_states = self.model(input_ids, positions, intermediate_tensors, diff --git a/vllm/model_executor/models/nano_nemotron_vl.py b/vllm/model_executor/models/nano_nemotron_vl.py index 505806a15c89..2d0ebdc90277 100644 --- a/vllm/model_executor/models/nano_nemotron_vl.py +++ b/vllm/model_executor/models/nano_nemotron_vl.py @@ -35,7 +35,7 @@ from vllm.model_executor.models.radio import RadioModel from vllm.model_executor.models.utils import (flatten_bn, init_vllm_registered_model, - isin_list, maybe_prefix) + maybe_prefix) from vllm.multimodal import MULTIMODAL_REGISTRY from vllm.multimodal.inputs import (MultiModalDataDict, MultiModalFieldConfig, MultiModalKwargs, MultiModalKwargsItems, @@ -1135,22 +1135,6 @@ def forward( input_ids = None inputs_embeds = None - # NOTE: In v1, inputs_embeds is always generated at model runner, this - # condition is for v0 compatibility. - elif inputs_embeds is None: - context_token_ids = [ - token_id for token_id in (self.img_context_token_id, - self.video_context_token_id) - if token_id is not None - ] - vision_embeddings = self.get_multimodal_embeddings(**kwargs) - inputs_embeds = self.get_input_embeddings( - input_ids, - vision_embeddings, - is_multimodal=isin_list(input_ids, context_token_ids), - ) - input_ids = None - hidden_states = self.language_model( input_ids=input_ids, positions=positions, diff --git a/vllm/model_executor/models/nemotron_vl.py b/vllm/model_executor/models/nemotron_vl.py index 2627a262e958..0e7ec8e458cf 100644 --- a/vllm/model_executor/models/nemotron_vl.py +++ b/vllm/model_executor/models/nemotron_vl.py @@ -608,17 +608,6 @@ def forward( input_ids = None inputs_embeds = None - # NOTE: In v1, inputs_embeds is always generated at model runner, this - # condition is for v0 compatibility. - elif inputs_embeds is None: - vision_embeddings = self.get_multimodal_embeddings(**kwargs) - inputs_embeds = self.get_input_embeddings( - input_ids, - vision_embeddings, - is_multimodal=input_ids == self.img_context_token_id, - ) - input_ids = None - forward_kwargs = { "input_ids": input_ids, "positions": positions, diff --git a/vllm/model_executor/models/ovis.py b/vllm/model_executor/models/ovis.py index 8503d3f71d1c..2f9c6ddfc661 100644 --- a/vllm/model_executor/models/ovis.py +++ b/vllm/model_executor/models/ovis.py @@ -511,17 +511,6 @@ def forward( if intermediate_tensors is not None: inputs_embeds = None - # NOTE: In v1, inputs_embeds is always generated at model runner, this - # condition is for v0 compatibility. - elif inputs_embeds is None: - vision_embeddings = self.get_multimodal_embeddings(**kwargs) - inputs_embeds = self.get_input_embeddings( - input_ids, - vision_embeddings, - is_multimodal=input_ids == self.image_pad_token_id, - ) - input_ids = None - # up until here we have an inputs_embeds 100% numerical identity # between the OG HF Transformers implementation and ours hidden_states = self.llm( diff --git a/vllm/model_executor/models/ovis2_5.py b/vllm/model_executor/models/ovis2_5.py index 2ecc7bff07e0..86ce7e9eab27 100644 --- a/vllm/model_executor/models/ovis2_5.py +++ b/vllm/model_executor/models/ovis2_5.py @@ -596,18 +596,6 @@ def forward( if intermediate_tensors is not None: inputs_embeds = None - # NOTE: In v1, inputs_embeds is always generated at model runner, this - # condition is for v0 compatibility. - elif inputs_embeds is None: - vision_embeddings = self.get_multimodal_embeddings(**kwargs) - - inputs_embeds = self.get_input_embeddings( - input_ids, - vision_embeddings, - is_multimodal=input_ids == self.image_pad_token_id, - ) - input_ids = None - # up until here we have a inputs_embeds 100% numerical identity # between the OG HF Transformers implementation and ours hidden_states = self.llm( diff --git a/vllm/model_executor/models/paligemma.py b/vllm/model_executor/models/paligemma.py index f07f444819f4..d118e6c89ab5 100644 --- a/vllm/model_executor/models/paligemma.py +++ b/vllm/model_executor/models/paligemma.py @@ -370,17 +370,6 @@ def forward(self, if intermediate_tensors is not None: inputs_embeds = None - # NOTE: In v1, inputs_embeds is always generated at model runner, this - # condition is for v0 compatibility. - elif inputs_embeds is None: - vision_embeddings = self.get_multimodal_embeddings(**kwargs) - inputs_embeds = self.get_input_embeddings( - input_ids, - vision_embeddings, - is_multimodal=input_ids == self.config.image_token_index, - ) - input_ids = None - hidden_states = self.language_model.model(input_ids, positions, intermediate_tensors, diff --git a/vllm/model_executor/models/phi3v.py b/vllm/model_executor/models/phi3v.py index ea34c8d92f13..59977796e2af 100644 --- a/vllm/model_executor/models/phi3v.py +++ b/vllm/model_executor/models/phi3v.py @@ -679,17 +679,6 @@ def forward(self, if intermediate_tensors is not None: inputs_embeds = None - # NOTE: In v1, inputs_embeds is always generated at model runner, this - # condition is for v0 compatibility - elif inputs_embeds is None: - vision_embeddings = self.get_multimodal_embeddings(**kwargs) - inputs_embeds = self.get_input_embeddings( - input_ids, - vision_embeddings, - is_multimodal=self.image_token_id, - ) - input_ids = None - hidden_states = self.language_model.model(input_ids, positions, intermediate_tensors, diff --git a/vllm/model_executor/models/phi4_multimodal.py b/vllm/model_executor/models/phi4_multimodal.py index e8b79717d75d..3dbb67d28065 100644 --- a/vllm/model_executor/models/phi4_multimodal.py +++ b/vllm/model_executor/models/phi4_multimodal.py @@ -1411,22 +1411,6 @@ def forward( if intermediate_tensors is not None: inputs_embeds = None - # NOTE: In v1, inputs_embeds is always generated at model runner from - # `get_multimodal_embeddings` and `get_input_embeddings`, this - # condition is only for v0 compatibility. - elif inputs_embeds is None: - image_input = self._parse_and_validate_image_input(**kwargs) - audio_input = self._parse_and_validate_audio_input(**kwargs) - - if image_input is None and audio_input is None: - inputs_embeds = None - else: - inputs_embeds = self.get_input_embeddings_v0( - input_ids, - image_input=image_input, - audio_input=audio_input) - input_ids = None - hidden_states = self.language_model( input_ids, positions, diff --git a/vllm/model_executor/models/phi4mm.py b/vllm/model_executor/models/phi4mm.py index 15b09c7ae2bc..8ccc7129ddb2 100644 --- a/vllm/model_executor/models/phi4mm.py +++ b/vllm/model_executor/models/phi4mm.py @@ -35,8 +35,7 @@ from .idefics2_vision_model import Idefics2VisionTransformer from .interfaces import MultiModalEmbeddings, SupportsLoRA, SupportsMultiModal from .phi4mm_audio import AudioEmbedding -from .utils import (AutoWeightsLoader, WeightsMapper, flatten_bn, maybe_prefix, - merge_multimodal_embeddings) +from .utils import AutoWeightsLoader, WeightsMapper, flatten_bn, maybe_prefix # <|endoftext10|> (see vocab.json in hf model) _IMAGE_PLACEHOLDER_TOKEN_ID = 200010 @@ -1174,35 +1173,6 @@ def get_multimodal_embeddings(self, return multimodal_embeddings - def get_input_embeddings_v0( - self, - input_ids: torch.Tensor, - image_input: Optional[Phi4MMImagePixelInputs] = None, - audio_input: Optional[Phi4MMAudioFeatureInputs] = None, - ) -> torch.Tensor: - audio_projection_mode = 'speech' - inputs_embeds = self.get_input_embeddings(input_ids) - if image_input is not None: - image_embeds = self._process_image_input(image_input) - inputs_embeds = merge_multimodal_embeddings( - input_ids, - inputs_embeds, - image_embeds, - placeholder_token_id=_IMAGE_PLACEHOLDER_TOKEN_ID, - ) - audio_projection_mode = 'vision' - - if audio_input is not None: - audio_embeds = self._process_audio_input( - audio_input, audio_projection_mode=audio_projection_mode) - inputs_embeds = merge_multimodal_embeddings( - input_ids, - inputs_embeds, - audio_embeds, - placeholder_token_id=_AUDIO_PLACEHOLDER_TOKEN_ID, - ) - return inputs_embeds - def forward( self, input_ids: torch.Tensor, @@ -1214,22 +1184,6 @@ def forward( if intermediate_tensors is not None: inputs_embeds = None - # NOTE: In v1, inputs_embeds is always generated at model runner from - # `get_multimodal_embeddings` and `get_input_embeddings`, this - # condition is only for v0 compatibility. - elif inputs_embeds is None: - image_input = self._parse_and_validate_image_input(**kwargs) - audio_input = self._parse_and_validate_audio_input(**kwargs) - - if image_input is None and audio_input is None: - inputs_embeds = None - else: - inputs_embeds = self.get_input_embeddings_v0( - input_ids, - image_input=image_input, - audio_input=audio_input) - input_ids = None - hidden_states = self.model( input_ids, positions, diff --git a/vllm/model_executor/models/pixtral.py b/vllm/model_executor/models/pixtral.py index 2c04b6f0f4f9..6344fc394833 100644 --- a/vllm/model_executor/models/pixtral.py +++ b/vllm/model_executor/models/pixtral.py @@ -444,17 +444,6 @@ def forward( if intermediate_tensors is not None: inputs_embeds = None - # NOTE: In v1, inputs_embeds is always generated at model runner, this - # condition is for v0 compatibility. - elif inputs_embeds is None: - vision_embeddings = self.get_multimodal_embeddings(**kwargs) - inputs_embeds = self.get_input_embeddings( - input_ids, - vision_embeddings, - is_multimodal=input_ids == self.vision_args.image_token_id, - ) - input_ids = None - hidden_states = self.language_model.model(input_ids, positions, intermediate_tensors, diff --git a/vllm/model_executor/models/qwen2_5_omni_thinker.py b/vllm/model_executor/models/qwen2_5_omni_thinker.py index bfa398ee43b5..8f069710b0f9 100644 --- a/vllm/model_executor/models/qwen2_5_omni_thinker.py +++ b/vllm/model_executor/models/qwen2_5_omni_thinker.py @@ -69,8 +69,7 @@ from .interfaces import (MultiModalEmbeddings, SupportsLoRA, SupportsMultiModal, SupportsPP) from .utils import (AutoWeightsLoader, WeightsMapper, - init_vllm_registered_model, maybe_prefix, - merge_multimodal_embeddings) + init_vllm_registered_model, maybe_prefix) try: import flash_attn @@ -908,26 +907,6 @@ def get_multimodal_embeddings_v0( multimodal_embeddings.append((video_embeds, "video")) return multimodal_embeddings - def get_input_embeddings_v0( - self, - input_ids: torch.Tensor, - multimodal_embeddings: Optional[NestedTensors] = None, - ) -> torch.Tensor: - inputs_embeds = self.language_model.get_input_embeddings(input_ids) - if multimodal_embeddings is None or len(multimodal_embeddings) == 0: - return inputs_embeds - - for embeddings, modality in multimodal_embeddings: - if modality == "audio": - placeholder_token_id = self.config.audio_token_index - if modality == "image": - placeholder_token_id = self.config.image_token_index - if modality == "video": - placeholder_token_id = self.config.video_token_index - inputs_embeds = merge_multimodal_embeddings( - input_ids, inputs_embeds, embeddings, placeholder_token_id) - return inputs_embeds - def forward( self, input_ids: torch.Tensor, @@ -939,14 +918,6 @@ def forward( if intermediate_tensors is not None: inputs_embeds = None - # NOTE: In v1, inputs_embeds is always generated at model runner, this - # condition is for v0 compatibility. - elif inputs_embeds is None: - multimodal_embeddings = self.get_multimodal_embeddings_v0(**kwargs) - inputs_embeds = self.get_input_embeddings_v0( - input_ids, multimodal_embeddings) - input_ids = None - hidden_states = self.language_model.model(input_ids, positions, intermediate_tensors, diff --git a/vllm/model_executor/models/qwen2_5_vl.py b/vllm/model_executor/models/qwen2_5_vl.py index 5b092b42205f..da3889d31a7d 100644 --- a/vllm/model_executor/models/qwen2_5_vl.py +++ b/vllm/model_executor/models/qwen2_5_vl.py @@ -64,7 +64,6 @@ from vllm.multimodal.processing import PromptReplacement, PromptUpdate from vllm.platforms import _Backend from vllm.sequence import IntermediateTensors -from vllm.transformers_utils.config import uses_mrope from vllm.utils import is_pin_memory_available from vllm.utils.tensor_schema import TensorSchema, TensorShape @@ -75,8 +74,7 @@ from .qwen2_vl import (Qwen2VLMultiModalProcessor, Qwen2VLProcessingInfo, apply_rotary_pos_emb_vision) from .utils import (AutoWeightsLoader, WeightsMapper, cast_overflow_tensors, - init_vllm_registered_model, maybe_prefix, - merge_multimodal_embeddings) + init_vllm_registered_model, maybe_prefix) from .vision import get_vit_attn_backend, run_dp_sharded_mrope_vision_model logger = init_logger(__name__) @@ -1365,40 +1363,6 @@ def get_multimodal_embeddings(self, multimodal_embeddings += video_embeddings return multimodal_embeddings - def get_input_embeddings_v0( - self, - input_ids: torch.Tensor, - image_input: Optional[Qwen2_5_VLImageInputs] = None, - video_input: Optional[Qwen2_5_VLVideoInputs] = None, - ) -> torch.Tensor: - inputs_embeds = self.get_input_embeddings(input_ids) - if image_input is not None: - image_embeds = self._process_image_input(image_input) - if self.is_multimodal_pruning_enabled: - image_embeds = self._postprocess_image_embeds_evs( - image_embeds, image_input - ) - inputs_embeds = merge_multimodal_embeddings( - input_ids, - inputs_embeds, - image_embeds, - placeholder_token_id=self.config.image_token_id, - ) - - if video_input is not None: - video_embeds = self._process_video_input(video_input) - if self.is_multimodal_pruning_enabled: - video_embeds = self._postprocess_video_embeds_evs( - video_embeds, video_input - ) - inputs_embeds = merge_multimodal_embeddings( - input_ids, - inputs_embeds, - video_embeds, - placeholder_token_id=self.config.video_token_id, - ) - return inputs_embeds - def forward( self, input_ids: torch.Tensor, @@ -1421,26 +1385,6 @@ def forward( if intermediate_tensors is not None: inputs_embeds = None - # NOTE: In v1, inputs_embeds is always generated at model runner from - # `get_multimodal_embeddings` and `get_input_embeddings`, this - # condition is only for v0 compatibility. - elif inputs_embeds is None: - image_input = self._parse_and_validate_image_input(**kwargs) - video_input = self._parse_and_validate_video_input(**kwargs) - - if image_input is None and video_input is None: - inputs_embeds = None - else: - if uses_mrope(self.config): - assert positions.ndim == 2 and positions.size(0) == 3, ( - "multimodal section rotary embedding requires " - f"(3, seq_len) positions, but got {positions.size()}") - inputs_embeds = self.get_input_embeddings_v0( - input_ids, - image_input=image_input, - video_input=video_input) - input_ids = None - hidden_states = self.language_model.model( input_ids=input_ids, positions=positions, diff --git a/vllm/model_executor/models/qwen2_audio.py b/vllm/model_executor/models/qwen2_audio.py index 9dfa29eef5ce..f9136863b8d6 100644 --- a/vllm/model_executor/models/qwen2_audio.py +++ b/vllm/model_executor/models/qwen2_audio.py @@ -449,17 +449,6 @@ def forward( if intermediate_tensors is not None: inputs_embeds = None - # NOTE: In v1, inputs_embeds is always generated at model runner, this - # condition is for v0 compatibility. - elif inputs_embeds is None: - multimodal_embeddings = self.get_multimodal_embeddings(**kwargs) - inputs_embeds = self.get_input_embeddings( - input_ids, - multimodal_embeddings, - is_multimodal=input_ids == self.config.audio_token_index, - ) - input_ids = None - hidden_states = self.language_model.model(input_ids, positions, intermediate_tensors, diff --git a/vllm/model_executor/models/qwen2_vl.py b/vllm/model_executor/models/qwen2_vl.py index 6ef01f333554..f83a411459cc 100644 --- a/vllm/model_executor/models/qwen2_vl.py +++ b/vllm/model_executor/models/qwen2_vl.py @@ -65,15 +65,13 @@ from vllm.multimodal.profiling import BaseDummyInputsBuilder from vllm.platforms import _Backend, current_platform from vllm.sequence import IntermediateTensors -from vllm.transformers_utils.config import uses_mrope from vllm.transformers_utils.tokenizer import AnyTokenizer from vllm.utils.tensor_schema import TensorSchema, TensorShape from .interfaces import (MultiModalEmbeddings, SupportsLoRA, SupportsMRoPE, SupportsMultiModal, SupportsPP) from .utils import (AutoWeightsLoader, WeightsMapper, - init_vllm_registered_model, maybe_prefix, - merge_multimodal_embeddings) + init_vllm_registered_model, maybe_prefix) from .vision import get_vit_attn_backend, run_dp_sharded_mrope_vision_model logger = init_logger(__name__) @@ -1464,32 +1462,6 @@ def get_multimodal_embeddings(self, return multimodal_embeddings - def get_input_embeddings_v0( - self, - input_ids: torch.Tensor, - image_input: Optional[Qwen2VLImagePixelInputs] = None, - video_input: Optional[Qwen2VLVideoPixelInputs] = None, - ) -> torch.Tensor: - inputs_embeds = self.get_input_embeddings(input_ids) - if image_input is not None: - image_embeds = self._process_image_input(image_input) - inputs_embeds = merge_multimodal_embeddings( - input_ids, - inputs_embeds, - image_embeds, - placeholder_token_id=self.config.image_token_id, - ) - - if video_input is not None: - video_embeds = self._process_video_input(video_input) - inputs_embeds = merge_multimodal_embeddings( - input_ids, - inputs_embeds, - video_embeds, - placeholder_token_id=self.config.video_token_id, - ) - return inputs_embeds - def forward( self, input_ids: torch.Tensor, @@ -1515,26 +1487,6 @@ def forward( if intermediate_tensors is not None: inputs_embeds = None - # NOTE: In v1, inputs_embeds is always generated at model runner from - # `get_multimodal_embeddings` and `get_input_embeddings`, this - # condition is only for v0 compatibility. - elif inputs_embeds is None: - image_input = self._parse_and_validate_image_input(**kwargs) - video_input = self._parse_and_validate_video_input(**kwargs) - - if image_input is None and video_input is None: - inputs_embeds = None - else: - if uses_mrope(self.config): - assert positions.ndim == 2 and positions.size(0) == 3, ( - "multimodal section rotary embedding requires " - f"(3, seq_len) positions, but got {positions.size()}") - inputs_embeds = self.get_input_embeddings_v0( - input_ids, - image_input=image_input, - video_input=video_input) - input_ids = None - hidden_states = self.language_model.model( input_ids=input_ids, positions=positions, diff --git a/vllm/model_executor/models/qwen3_vl.py b/vllm/model_executor/models/qwen3_vl.py index 6d2a6019ef6f..ce92557d6424 100644 --- a/vllm/model_executor/models/qwen3_vl.py +++ b/vllm/model_executor/models/qwen3_vl.py @@ -68,7 +68,6 @@ from vllm.multimodal.profiling import BaseDummyInputsBuilder from vllm.platforms import _Backend from vllm.sequence import IntermediateTensors -from vllm.transformers_utils.config import uses_mrope from vllm.utils import is_list_of from .interfaces import (MultiModalEmbeddings, SupportsLoRA, @@ -82,8 +81,7 @@ from .qwen2_vl import Qwen2VLProcessingInfo from .qwen3 import Qwen3ForCausalLM, Qwen3Model from .utils import (AutoWeightsLoader, PPMissingLayer, WeightsMapper, - _merge_multimodal_embeddings, maybe_prefix, - merge_multimodal_embeddings) + _merge_multimodal_embeddings, maybe_prefix) from .vision import get_vit_attn_backend, run_dp_sharded_mrope_vision_model logger = init_logger(__name__) @@ -1464,75 +1462,6 @@ def get_input_embeddings( return inputs_embeds - def get_input_embeddings_v0( - self, - input_ids: torch.Tensor, - image_input: Optional[Qwen2_5_VLImageInputs] = None, - video_input: Optional[Qwen2_5_VLVideoInputs] = None, - ) -> torch.Tensor: - inputs_embeds = self.get_input_embeddings(input_ids) - - if self.use_deepstack: - visual_dim = inputs_embeds.shape[-1] - deepstack_input_embeds = None - if image_input is not None or video_input is not None: - deepstack_input_embeds = torch.zeros_like( - inputs_embeds).unsqueeze(1).repeat( - 1, self.deepstack_num_level, 1).flatten(1) - - if image_input is not None: - image_embeds = self._process_image_input(image_input) - if self.use_deepstack: - image_embeds = torch.cat(image_embeds) - - image_embeds, image_embeds_multiscale = image_embeds.split( - [visual_dim, visual_dim * self.deepstack_num_level], - dim=-1) - - deepstack_input_embeds = merge_multimodal_embeddings( - input_ids, - deepstack_input_embeds, - image_embeds_multiscale, - placeholder_token_id=self.config.image_token_id, - ) - - inputs_embeds = merge_multimodal_embeddings( - input_ids, - inputs_embeds, - image_embeds, - placeholder_token_id=self.config.image_token_id, - ) - - if video_input is not None: - video_embeds = self._process_video_input(video_input) - if self.use_deepstack: - video_embeds = torch.cat(video_embeds) - - video_embeds, video_embeds_multiscale = video_embeds.split( - [visual_dim, visual_dim * self.deepstack_num_level], - dim=-1) - - deepstack_input_embeds = merge_multimodal_embeddings( - input_ids, - deepstack_input_embeds, - video_embeds_multiscale, - placeholder_token_id=self.config.video_token_id, - ) - - inputs_embeds = merge_multimodal_embeddings( - input_ids, - inputs_embeds, - video_embeds, - placeholder_token_id=self.config.video_token_id, - ) - - if self.use_deepstack and deepstack_input_embeds is not None: - deepstack_input_embeds = deepstack_input_embeds.view( - inputs_embeds.shape[0], self.deepstack_num_level, - visual_dim).permute(1, 0, 2).contiguous() - self._set_deepstack_input_embeds(deepstack_input_embeds) - return inputs_embeds - def forward( self, input_ids: torch.Tensor, @@ -1568,26 +1497,6 @@ def forward( if intermediate_tensors is not None: inputs_embeds = None - # NOTE: In v1, inputs_embeds is always generated at model runner from - # `get_multimodal_embeddings` and `get_input_embeddings`, this - # condition is only for v0 compatibility. - elif inputs_embeds is None: - image_input = self._parse_and_validate_image_input(**kwargs) - video_input = self._parse_and_validate_video_input(**kwargs) - - if image_input is None and video_input is None: - inputs_embeds = None - else: - if uses_mrope(self.config): - assert positions.ndim == 2 and positions.size(0) == 3, ( - "multimodal section rotary embedding requires " - f"(3, seq_len) positions, but got {positions.size()}") - inputs_embeds = self.get_input_embeddings_v0( - input_ids, - image_input=image_input, - video_input=video_input) - input_ids = None - if self.use_deepstack and inputs_embeds is not None and get_pp_group( ).is_first_rank: deepstack_input_embeds = self._get_deepstack_input_embeds( diff --git a/vllm/model_executor/models/qwen_vl.py b/vllm/model_executor/models/qwen_vl.py index dc11b60604a9..924119ed63ab 100644 --- a/vllm/model_executor/models/qwen_vl.py +++ b/vllm/model_executor/models/qwen_vl.py @@ -767,18 +767,6 @@ def forward( if intermediate_tensors is not None: inputs_embeds = None - # NOTE: In v1, inputs_embeds is always generated at model runner, this - # condition is for v0 compatibility. - elif inputs_embeds is None: - vision_embeddings = self.get_multimodal_embeddings(**kwargs) - inputs_embeds = self.get_input_embeddings( - input_ids, - vision_embeddings, - is_multimodal=input_ids == - self.transformer.visual.image_pad_id, - ) - input_ids = None - hidden_states = self.transformer(input_ids, positions, intermediate_tensors, inputs_embeds) return hidden_states diff --git a/vllm/model_executor/models/skyworkr1v.py b/vllm/model_executor/models/skyworkr1v.py index f9a107c06085..f03022aa719c 100644 --- a/vllm/model_executor/models/skyworkr1v.py +++ b/vllm/model_executor/models/skyworkr1v.py @@ -874,17 +874,6 @@ def forward( input_ids = None inputs_embeds = None - # NOTE: In v1, inputs_embeds is always generated at model runner, this - # condition is for v0 compatibility. - elif inputs_embeds is None: - vision_embeddings = self.get_multimodal_embeddings(**kwargs) - inputs_embeds = self.get_input_embeddings( - input_ids, - vision_embeddings, - is_multimodal=input_ids == self.img_context_token_id, - ) - input_ids = None - forward_kwargs = { "input_ids": input_ids, "positions": positions, diff --git a/vllm/model_executor/models/transformers.py b/vllm/model_executor/models/transformers.py index 7cfb639f675d..00d87f560e70 100644 --- a/vllm/model_executor/models/transformers.py +++ b/vllm/model_executor/models/transformers.py @@ -881,19 +881,6 @@ def forward( inputs_embeds: Optional[torch.Tensor] = None, **kwargs: object, ) -> Union[torch.Tensor, IntermediateTensors]: - # NOTE: In v1, inputs_embeds is always generated at model runner from - # `get_multimodal_embeddings` and `get_input_embeddings`, this - # condition is only for v0 compatibility. - if inputs_embeds is None: - multimodal_embeds = self.get_multimodal_embeddings(**kwargs) - if multimodal_embeds is not None: - inputs_embeds = self.get_input_embeddings( - input_ids, - multimodal_embeds, - is_multimodal=input_ids == self.config.image_token_id, - ) - input_ids = None - model_output = super().forward(input_ids, positions, intermediate_tensors, inputs_embeds) return model_output diff --git a/vllm/model_executor/models/ultravox.py b/vllm/model_executor/models/ultravox.py index 77e886c22e63..70aabf6dfe78 100644 --- a/vllm/model_executor/models/ultravox.py +++ b/vllm/model_executor/models/ultravox.py @@ -597,18 +597,6 @@ def forward(self, if intermediate_tensors is not None: inputs_embeds = None - # NOTE: In v1, inputs_embeds is always generated at model runner, this - # condition is for v0 compatibility. - elif inputs_embeds is None: - multimodal_embeddings = self.get_multimodal_embeddings(**kwargs) - - inputs_embeds = self.get_input_embeddings( - input_ids, - multimodal_embeddings, - is_multimodal=input_ids == self.config.audio_token_index, - ) - input_ids = None - language_model = self.language_model if hasattr(language_model, "language_model"): language_model = language_model.language_model diff --git a/vllm/model_executor/models/voxtral.py b/vllm/model_executor/models/voxtral.py index f93e7ccfd06f..1edeaeb0f319 100644 --- a/vllm/model_executor/models/voxtral.py +++ b/vllm/model_executor/models/voxtral.py @@ -371,19 +371,6 @@ def forward( if intermediate_tensors is not None: inputs_embeds = None - # NOTE: In v1, inputs_embeds is always generated at model runner, this - # condition is for v0 compatibility. - elif inputs_embeds is None: - audio_encoder = self.tokenizer.instruct.audio_encoder - audio_tok_id = audio_encoder.audio_token - audio_embeddings = self.get_multimodal_embeddings(**kwargs) - inputs_embeds = self.get_input_embeddings( - input_ids, - audio_embeddings, - is_multimodal=input_ids == audio_tok_id, - ) - input_ids = None - hidden_states = self.language_model.model(input_ids, positions, intermediate_tensors, From 1b67b0465647490c6abf2afe330a0cbb2eb2949b Mon Sep 17 00:00:00 2001 From: Cyrus Leung Date: Mon, 29 Sep 2025 16:03:37 +0800 Subject: [PATCH 474/518] [Misc] Remove more `get_input_embeddings_v0` (#25857) Signed-off-by: DarkLight1337 --- vllm/model_executor/models/gemma3n_mm.py | 3 +- vllm/model_executor/models/keye.py | 46 +------------------ vllm/model_executor/models/phi4_multimodal.py | 37 +-------------- vllm/model_executor/models/utils.py | 4 ++ 4 files changed, 7 insertions(+), 83 deletions(-) diff --git a/vllm/model_executor/models/gemma3n_mm.py b/vllm/model_executor/models/gemma3n_mm.py index b23437a08e5a..101e083ac123 100644 --- a/vllm/model_executor/models/gemma3n_mm.py +++ b/vllm/model_executor/models/gemma3n_mm.py @@ -45,8 +45,7 @@ from .interfaces import (MultiModalEmbeddings, SupportsMultiModal, SupportsTranscription) from .utils import (AutoWeightsLoader, WeightsMapper, flatten_bn, - init_vllm_registered_model, maybe_prefix, - merge_multimodal_embeddings) + init_vllm_registered_model, maybe_prefix) logger = init_logger(__name__) diff --git a/vllm/model_executor/models/keye.py b/vllm/model_executor/models/keye.py index 62a71b7b1fa8..10b5c45169f4 100644 --- a/vllm/model_executor/models/keye.py +++ b/vllm/model_executor/models/keye.py @@ -41,7 +41,6 @@ from vllm.multimodal.profiling import BaseDummyInputsBuilder from vllm.platforms import _Backend from vllm.sequence import IntermediateTensors -from vllm.transformers_utils.config import uses_mrope from vllm.utils import is_list_of from vllm.utils.tensor_schema import TensorSchema, TensorShape @@ -50,7 +49,7 @@ from .siglip import SiglipMLP from .utils import (AutoWeightsLoader, WeightsMapper, init_vllm_registered_model, is_pp_missing_parameter, - maybe_prefix, merge_multimodal_embeddings) + maybe_prefix) from .vision import get_vit_attn_backend logger = init_logger(__name__) @@ -1450,32 +1449,6 @@ def get_multimodal_embeddings( multimodal_embeddings += video_embeddings return multimodal_embeddings - def get_input_embeddings_v0( - self, - input_ids: torch.Tensor, - image_input: Optional[Any] = None, - video_input: Optional[Any] = None, - ) -> torch.Tensor: - inputs_embeds = self.get_input_embeddings(input_ids) - if image_input is not None: - image_embeds = self._process_image_input(image_input) - inputs_embeds = merge_multimodal_embeddings( - input_ids, - inputs_embeds, - image_embeds, - placeholder_token_id=self.config.image_token_id, - ) - - if video_input is not None: - video_embeds = self._process_video_input(video_input) - inputs_embeds = merge_multimodal_embeddings( - input_ids, - inputs_embeds, - video_embeds, - placeholder_token_id=self.config.video_token_id, - ) - return inputs_embeds - def forward( self, input_ids: torch.Tensor, @@ -1500,23 +1473,6 @@ def forward( if intermediate_tensors is not None: inputs_embeds = None - elif inputs_embeds is None: - image_input = self._parse_and_validate_image_input(**kwargs) - video_input = self._parse_and_validate_video_input(**kwargs) - if image_input is None and video_input is None: - inputs_embeds = None - else: - if uses_mrope(self.config): - assert positions.ndim == 2 and positions.size(0) == 3, ( - "multimodal section rotary embedding requires " - f"(3, seq_len) positions, but got {positions.size()}") - inputs_embeds = self.get_input_embeddings_v0( - input_ids, - image_input=image_input, - video_input=video_input, - ) - input_ids = None - hidden_states = self.language_model.model( input_ids=input_ids, positions=positions, diff --git a/vllm/model_executor/models/phi4_multimodal.py b/vllm/model_executor/models/phi4_multimodal.py index 3dbb67d28065..a4f9f96cb951 100644 --- a/vllm/model_executor/models/phi4_multimodal.py +++ b/vllm/model_executor/models/phi4_multimodal.py @@ -44,13 +44,7 @@ from .idefics2_vision_model import Idefics2VisionTransformer from .interfaces import MultiModalEmbeddings, SupportsLoRA, SupportsMultiModal from .utils import (AutoWeightsLoader, WeightsMapper, flatten_bn, - init_vllm_registered_model, maybe_prefix, - merge_multimodal_embeddings) - -# <|endoftext10|> (see vocab.json in hf model) -_IMAGE_PLACEHOLDER_TOKEN_ID = 200010 -# <|endoftext11|> -_AUDIO_PLACEHOLDER_TOKEN_ID = 200011 + init_vllm_registered_model, maybe_prefix) _AUDIO_MAX_SOUNDFILE_SIZE = 241_000 @@ -1371,35 +1365,6 @@ def get_multimodal_embeddings(self, return multimodal_embeddings - def get_input_embeddings_v0( - self, - input_ids: torch.Tensor, - image_input: Optional[Phi4MMImagePixelInputs] = None, - audio_input: Optional[Phi4MMAudioFeatureInputs] = None, - ) -> torch.Tensor: - audio_projection_mode = 'speech' - inputs_embeds = self.get_input_embeddings(input_ids) - if image_input is not None: - image_embeds = self._process_image_input(image_input) - inputs_embeds = merge_multimodal_embeddings( - input_ids, - inputs_embeds, - image_embeds, - placeholder_token_id=_IMAGE_PLACEHOLDER_TOKEN_ID, - ) - audio_projection_mode = 'vision' - - if audio_input is not None: - audio_embeds = self._process_audio_input( - audio_input, audio_projection_mode=audio_projection_mode) - inputs_embeds = merge_multimodal_embeddings( - input_ids, - inputs_embeds, - audio_embeds, - placeholder_token_id=_AUDIO_PLACEHOLDER_TOKEN_ID, - ) - return inputs_embeds - def forward( self, input_ids: torch.Tensor, diff --git a/vllm/model_executor/models/utils.py b/vllm/model_executor/models/utils.py index 4bf151fbf62d..d6fa88f06e56 100644 --- a/vllm/model_executor/models/utils.py +++ b/vllm/model_executor/models/utils.py @@ -10,6 +10,7 @@ import torch.nn as nn from torch.func import functional_call from transformers import PretrainedConfig +from typing_extensions import deprecated import vllm.envs as envs from vllm.config import VllmConfig @@ -439,6 +440,9 @@ def _merge_multimodal_embeddings( return inputs_embeds +@deprecated("`merge_multimodal_embeddings` has been replaced with " + "`SupportsMultiModal.get_input_embeddings` and will be " + "removed in v0.12.") def merge_multimodal_embeddings( input_ids: torch.Tensor, inputs_embeds: torch.Tensor, From 9360d34fa18fcba15a0e4dccb2ef3d781bcfba7c Mon Sep 17 00:00:00 2001 From: youkaichao Date: Mon, 29 Sep 2025 17:51:43 +0800 Subject: [PATCH 475/518] update to latest deepgemm for dsv3.2 (#25871) Signed-off-by: youkaichao --- docker/Dockerfile | 2 +- tools/install_deepgemm.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docker/Dockerfile b/docker/Dockerfile index 62be0a12e988..a6b24f69f64e 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -456,7 +456,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \ ARG DEEPGEMM_GIT_REF COPY tools/install_deepgemm.sh /tmp/install_deepgemm.sh RUN --mount=type=cache,target=/root/.cache/uv \ - VLLM_DOCKER_BUILD_CONTEXT=1 /tmp/install_deepgemm.sh --cuda-version "${CUDA_VERSION}" ${DEEPGEMM_GIT_REF:+--ref "$DEEPGEMM_GIT_REF"} + VLLM_DOCKER_BUILD_CONTEXT=1 TORCH_CUDA_ARCH_LIST="9.0a 10.0a" /tmp/install_deepgemm.sh --cuda-version "${CUDA_VERSION}" ${DEEPGEMM_GIT_REF:+--ref "$DEEPGEMM_GIT_REF"} COPY tools/install_gdrcopy.sh install_gdrcopy.sh RUN set -eux; \ diff --git a/tools/install_deepgemm.sh b/tools/install_deepgemm.sh index 98427f1835ec..4f2cd302c3ef 100755 --- a/tools/install_deepgemm.sh +++ b/tools/install_deepgemm.sh @@ -6,7 +6,7 @@ set -e # Default values DEEPGEMM_GIT_REPO="https://github.com/deepseek-ai/DeepGEMM.git" -DEEPGEMM_GIT_REF="ea9c5d9270226c5dd7a577c212e9ea385f6ef048" +DEEPGEMM_GIT_REF="594953acce41793ae00a1233eb516044d604bcb6" # Parse command line arguments while [[ $# -gt 0 ]]; do From edbaadd91f06647f6d3e727cc327de856a96ab53 Mon Sep 17 00:00:00 2001 From: Yingjun Mou Date: Mon, 29 Sep 2025 03:49:35 -0700 Subject: [PATCH 476/518] [Bugfix] Fix requirements paths in install instructions (#25827) Signed-off-by: yingjun-mou --- docs/getting_started/installation/cpu/s390x.inc.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/getting_started/installation/cpu/s390x.inc.md b/docs/getting_started/installation/cpu/s390x.inc.md index f9c4ccb942fa..442c2b4ec64e 100644 --- a/docs/getting_started/installation/cpu/s390x.inc.md +++ b/docs/getting_started/installation/cpu/s390x.inc.md @@ -46,22 +46,22 @@ Execute the following commands to build and install vLLM from source. Please build the following dependencies, `torchvision`, `pyarrow` from source before building vLLM. ```bash - sed -i '/^torch/d' requirements-build.txt # remove torch from requirements-build.txt since we use nightly builds + sed -i '/^torch/d' requirements/build.txt # remove torch from requirements/build.txt since we use nightly builds uv pip install -v \ --torch-backend auto \ - -r requirements-build.txt \ - -r requirements-cpu.txt \ + -r requirements/build.txt \ + -r requirements/cpu.txt \ VLLM_TARGET_DEVICE=cpu python setup.py bdist_wheel && \ uv pip install dist/*.whl ``` ??? console "pip" ```bash - sed -i '/^torch/d' requirements-build.txt # remove torch from requirements-build.txt since we use nightly builds + sed -i '/^torch/d' requirements/build.txt # remove torch from requirements/build.txt since we use nightly builds pip install -v \ --extra-index-url https://download.pytorch.org/whl/nightly/cpu \ - -r requirements-build.txt \ - -r requirements-cpu.txt \ + -r requirements/build.txt \ + -r requirements/cpu.txt \ VLLM_TARGET_DEVICE=cpu python setup.py bdist_wheel && \ pip install dist/*.whl ``` From 8616300ae2a00b48aee2f02220e9e007f76ce77c Mon Sep 17 00:00:00 2001 From: Zhou Jiahao Date: Mon, 29 Sep 2025 18:59:04 +0800 Subject: [PATCH 477/518] [Model][Bugfix] Fix issues in MiDashengLM implementation for quantized models (#25854) Signed-off-by: zhoukz --- vllm/model_executor/models/midashenglm.py | 193 ++++++++++++++-------- 1 file changed, 122 insertions(+), 71 deletions(-) diff --git a/vllm/model_executor/models/midashenglm.py b/vllm/model_executor/models/midashenglm.py index 0bf04e0e7e2f..0b62fbd40b07 100644 --- a/vllm/model_executor/models/midashenglm.py +++ b/vllm/model_executor/models/midashenglm.py @@ -22,6 +22,7 @@ # See the License for the specific language governing permissions and # limitations under the License. """Inference-only MiDashengLM model compatible with HuggingFace weights.""" + import collections import collections.abc from collections.abc import Iterable, Mapping, Sequence @@ -30,10 +31,10 @@ import numpy as np import torch import torch.nn as nn -import torchaudio.transforms as audio_transforms +import torchaudio.functional as F +from torch.nn.functional import scaled_dot_product_attention from transformers import BatchFeature -from vllm.attention.layer import MultiHeadAttention from vllm.config import VllmConfig from vllm.distributed import get_tensor_model_parallel_world_size from vllm.model_executor.layers.activation import get_act_fn @@ -41,7 +42,6 @@ QKVParallelLinear, RowParallelLinear) from vllm.model_executor.layers.quantization import QuantizationConfig -from vllm.model_executor.model_loader.utils import set_default_torch_dtype from vllm.multimodal import MULTIMODAL_REGISTRY from vllm.multimodal.inputs import (MultiModalDataDict, MultiModalFieldConfig, MultiModalKwargsItems) @@ -147,15 +147,19 @@ def __init__( super().__init__() out_features = out_features or in_features hidden_features = hidden_features or in_features - self.fc1 = ColumnParallelLinear(input_size=in_features, - output_size=hidden_features, - quant_config=quant_config, - prefix=f"{prefix}.fc1") + self.fc1 = ColumnParallelLinear( + input_size=in_features, + output_size=hidden_features, + quant_config=quant_config, + prefix=f"{prefix}.fc1", + ) self.act = get_act_fn("gelu") - self.fc2 = RowParallelLinear(input_size=hidden_features, - output_size=out_features, - quant_config=quant_config, - prefix=f"{prefix}.fc2") + self.fc2 = RowParallelLinear( + input_size=hidden_features, + output_size=out_features, + quant_config=quant_config, + prefix=f"{prefix}.fc2", + ) def forward(self, x: torch.Tensor) -> torch.Tensor: x, _ = self.fc1(x) @@ -171,7 +175,6 @@ def __init__( dim: int, num_heads: int = 8, qkv_bias: bool = False, - causal: bool = False, quant_config: Optional[QuantizationConfig] = None, prefix: str = "", ): @@ -205,33 +208,30 @@ def __init__( quant_config=quant_config, prefix=f"{prefix}.qkv", ) - self.attn = MultiHeadAttention( - self.num_heads, - self.head_dim, - self.scale, - num_kv_heads=self.num_kv_heads, - ) self.proj = RowParallelLinear( input_size=dim, output_size=dim, quant_config=quant_config, prefix=f"{prefix}.proj", ) - self.causal = causal def forward(self, x: torch.Tensor, mask: Optional[torch.Tensor] = None): B, N, C = x.shape - qkv_out, _ = self.qkv(x) - q, k, v = qkv_out.split([self.q_size, self.kv_size, self.kv_size], - dim=-1) + qkv, _ = self.qkv(x) + qkv = qkv.reshape(B, N, 3, self.num_heads, C // self.num_heads) + qkv = qkv.permute(2, 0, 3, 1, 4) + q, k, v = qkv.unbind(0) - attn_out = self.attn(q, k, v) - C_local = attn_out.numel() // (B * N) # C_local for parallel - attn_out = attn_out.view(B, N, C_local) - - x, _ = self.proj(attn_out) + x = scaled_dot_product_attention( + q, + k, + v, + attn_mask=mask[:, None, None, :] if mask is not None else None, + ) + x = x.transpose(1, 2).reshape(B, N, C) + x, _ = self.proj(x) return x @@ -280,6 +280,63 @@ def forward( return x +class DashengFrontend(nn.Module): + + def __init__(self, config: DashengConfig): + super().__init__() + self.config = config + + spectrogram_window = torch.hann_window(self.config.win_length) + self.register_buffer( + "spectrogram_window", + spectrogram_window, + persistent=False, + ) + self.spectrogram_window: torch.Tensor + + melscale_fbanks = F.melscale_fbanks( + n_freqs=self.config.n_fft // 2 + 1, + f_min=self.config.f_min, + f_max=self.config.f_max, + n_mels=self.config.n_mels, + sample_rate=self.config.sample_rate, + ) + self.register_buffer("melscale_fbanks", + melscale_fbanks, + persistent=False) + self.melscale_fbanks: torch.Tensor + + def forward(self, waveform: torch.Tensor) -> torch.Tensor: + spectrogram = F.spectrogram( + waveform=waveform.to(torch.float32), + pad=0, + window=self.spectrogram_window, + n_fft=self.config.n_fft, + hop_length=self.config.hop_length, + win_length=self.config.win_length, + power=2, + normalized=False, + center=self.config.center, + ) + mel_spectrogram = ( + spectrogram.mT @ self.melscale_fbanks.to(torch.float32)).mT + # x has shape [batch, freq, time]. + # F.amplitude_to_DB accepts inputs shaped as: + # - [freq, time] + # - [channel, freq, time] + # - [..., channel, freq, time] + # Here we insert a channel dimension of size 1 before calling it, + # then remove that extra dimension afterward. + log_mel_spectrogram = F.amplitude_to_DB( + mel_spectrogram.unsqueeze(1), + multiplier=10, + amin=1e-10, + db_multiplier=0, + top_db=120, + ).squeeze(1) + return log_mel_spectrogram.to(waveform.dtype) + + class DashengAudioTransformer(nn.Module): def __init__( @@ -293,7 +350,7 @@ def __init__( self.target_length = config.target_length self.hop_length = config.hop_length - self._init_front_end(config) + self.front_end = DashengFrontend(config) self.init_bn = nn.BatchNorm2d(config.n_mels, momentum=0.01) @@ -318,34 +375,10 @@ def __init__( qkv_bias=config.qkv_bias, init_values=config.init_values, quant_config=quant_config, - prefix=f"{prefix}.block{i}", + prefix=f"{prefix}.blocks.{i}", ) for i in range(config.depth)) self.norm = nn.LayerNorm(config.embed_dim, eps=1e-6) - def _init_front_end(self, config): - with set_default_torch_dtype(torch.float32): - self.front_end = nn.Sequential( - audio_transforms.MelSpectrogram( - f_min=config.f_min, - f_max=config.f_max, - center=config.center, - win_length=config.win_length, - hop_length=config.hop_length, - sample_rate=config.sample_rate, - n_fft=config.n_fft, - n_mels=config.n_mels, - ), - audio_transforms.AmplitudeToDB(top_db=120), - ) - - mel_spectrogram = self.front_end[0] - fb = mel_spectrogram.mel_scale.fb - win = mel_spectrogram.spectrogram.window - mel_spectrogram.mel_scale.fb = fb.to(torch.bfloat16).to( - torch.float32) - mel_spectrogram.spectrogram.window = win.to(torch.bfloat16).to( - torch.float32) - def forward_features( self, x: torch.Tensor, @@ -430,14 +463,16 @@ def __init__( quant_config=quant_config, prefix=f"{prefix}.net.0", return_bias=False, - ), get_act_fn("gelu"), + ), + get_act_fn("gelu"), RowParallelLinear( input_size=out_dim, output_size=out_dim, quant_config=quant_config, prefix=f"{prefix}.net.2", return_bias=False, - )) + ), + ) def forward(self, x, mask=None): batch_size, seq_len, dim = x.shape @@ -534,9 +569,12 @@ def _call_hf_processor( # + Padding min_audio_len = self.info.get_min_audio_len() processed_audios = [ - np.pad(audio, (0, min_audio_len - audio.shape[-1]), - mode='constant', - constant_values=0) if isinstance(audio, np.ndarray) + np.pad( + audio, + (0, min_audio_len - audio.shape[-1]), + mode="constant", + constant_values=0, + ) if isinstance(audio, np.ndarray) and audio.shape[-1] < min_audio_len else audio for audio in audios ] @@ -585,8 +623,8 @@ def _get_prompt_updates( if audio_length is None: audio_output_lengths = [] else: - audio_length_np = audio_length.cpu().numpy() if isinstance( - audio_length, torch.Tensor) else audio_length + audio_length_np = (audio_length.cpu().numpy() if isinstance( + audio_length, torch.Tensor) else audio_length) audio_output_lengths = [ max(1, calculate_mel_frames_dasheng( int(length))) # at least one frame @@ -617,6 +655,17 @@ def get_replacement_midashenglm(item_idx: int): dummy_inputs=MiDashengLMDummyInputsBuilder, ) class MiDashengLMModel(nn.Module, SupportsMultiModal, SupportsPP): + packed_modules_mapping = { + "qkv_proj": [ + "q_proj", + "k_proj", + "v_proj", + ], + "gate_up_proj": [ + "gate_proj", + "up_proj", + ], + } @classmethod def get_placeholder_str(cls, modality: str, i: int) -> Optional[str]: @@ -660,8 +709,8 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): def _validate_and_reshape_mm_tensor(self, mm_input: object, name: str) -> torch.Tensor: if not isinstance(mm_input, (torch.Tensor, list)): - raise ValueError(f"Incorrect type of {name}. " - f"Got type: {type(mm_input)}") + raise ValueError( + f"Incorrect type of {name}. Got type: {type(mm_input)}") if isinstance(mm_input, torch.Tensor): return mm_input.reshape(-1, *mm_input.shape[2:]) @@ -710,8 +759,8 @@ def _process_audio_input( audio_input["input_values"].dtype) batch_size, max_audio_tokens, embed_dim = audio_embeddings.shape - audio_length_np = audio_length.cpu().numpy() if isinstance( - audio_length, torch.Tensor) else audio_length + audio_length_np = (audio_length.cpu().numpy() if isinstance( + audio_length, torch.Tensor) else audio_length) audio_output_lengths = [ max(1, calculate_mel_frames_dasheng( int(length))) # at least one frame @@ -720,11 +769,11 @@ def _process_audio_input( audio_output_lengths = torch.tensor(audio_output_lengths).to( audio_embeddings.device) - audio_feature_mask = (torch.arange( + audio_feature_mask = torch.arange( max_audio_tokens, device=audio_embeddings.device).unsqueeze(0).expand( - batch_size, max_audio_tokens) - < audio_output_lengths.unsqueeze(1)) + batch_size, + max_audio_tokens) < audio_output_lengths.unsqueeze(1) masked_audio_features = audio_embeddings[audio_feature_mask].view( -1, embed_dim) @@ -762,10 +811,12 @@ def forward( ) input_ids = None - return self.decoder.model(input_ids, - positions, - intermediate_tensors, - inputs_embeds=inputs_embeds) + return self.decoder.model( + input_ids, + positions, + intermediate_tensors, + inputs_embeds=inputs_embeds, + ) def compute_logits( self, From 43227236ecaa0ea741504a3b403a57846d307129 Mon Sep 17 00:00:00 2001 From: Jiangyun Zhu Date: Mon, 29 Sep 2025 21:54:52 +0800 Subject: [PATCH 478/518] [torch.compile] serialize cudagraph_mode as its enum name instead of value (#25868) Signed-off-by: zjy0516 --- vllm/config/compilation.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/vllm/config/compilation.py b/vllm/config/compilation.py index 825de7d26191..df08fa20e0cf 100644 --- a/vllm/config/compilation.py +++ b/vllm/config/compilation.py @@ -73,6 +73,9 @@ def valid_runtime_modes(self) -> bool: CUDAGraphMode.NONE, CUDAGraphMode.PIECEWISE, CUDAGraphMode.FULL ] + def __str__(self) -> str: + return self.name + @config @dataclass @@ -417,10 +420,11 @@ def __repr__(self) -> str: if pass_config_exclude: exclude["pass_config"] = pass_config_exclude - return TypeAdapter(CompilationConfig).dump_json( - self, - exclude=exclude, # type: ignore[arg-type] - exclude_unset=True).decode() + config = TypeAdapter(CompilationConfig).dump_python(self, + exclude=exclude, + exclude_unset=True) + + return str(config) __str__ = __repr__ From d0d138bc55f707e86c66b1d4edfb3f4046e9aa5f Mon Sep 17 00:00:00 2001 From: Chenxi Yang Date: Mon, 29 Sep 2025 07:31:51 -0700 Subject: [PATCH 479/518] [Nixl][P/D] Add cuda2cpu support (HD->DH transfer) (#24690) Signed-off-by: Chenxi Yang Co-authored-by: Chenxi Yang --- .../nixl_integration/run_accuracy_test.sh | 31 ++++++++++++++-- .../nixl_integration/run_edge_case_test.sh | 37 ++++++++++++++++--- vllm/config/kv_transfer.py | 4 +- .../kv_connector/v1/nixl_connector.py | 8 +++- vllm/platforms/cuda.py | 24 ++++++++++++ vllm/v1/worker/gpu_model_runner.py | 7 ++-- 6 files changed, 96 insertions(+), 15 deletions(-) mode change 100644 => 100755 tests/v1/kv_connector/nixl_integration/run_edge_case_test.sh diff --git a/tests/v1/kv_connector/nixl_integration/run_accuracy_test.sh b/tests/v1/kv_connector/nixl_integration/run_accuracy_test.sh index bc8837079109..3b0f2d102c1f 100755 --- a/tests/v1/kv_connector/nixl_integration/run_accuracy_test.sh +++ b/tests/v1/kv_connector/nixl_integration/run_accuracy_test.sh @@ -1,6 +1,31 @@ #!/bin/bash set -xe +# Parse command line arguments +KV_BUFFER_DEVICE="cuda" # Default to cuda +while [[ $# -gt 0 ]]; do + case $1 in + --kv_buffer_device) + KV_BUFFER_DEVICE="$2" + shift 2 + ;; + *) + echo "Unknown option $1" + echo "Usage: $0 [--kv_buffer_device ]" + exit 1 + ;; + esac +done + +echo "Running accuracy tests with kv_buffer_device=$KV_BUFFER_DEVICE" + +# Build the kv-transfer-config once +if [[ "$KV_BUFFER_DEVICE" == "cuda" ]]; then + KV_CONFIG='{"kv_connector":"NixlConnector","kv_role":"kv_both"}' +else + KV_CONFIG="{\"kv_connector\":\"NixlConnector\",\"kv_role\":\"kv_both\",\"kv_buffer_device\":\"$KV_BUFFER_DEVICE\"}" +fi + # Models to run MODELS=( "Qwen/Qwen3-0.6B" @@ -79,7 +104,7 @@ run_tests_for_model() { # Calculate port number (base port + instance number) PORT=$((8100 + i)) - # Calculate side channel port. Avoid clash with with TP workers. + # Calculate side channel port. Avoid clash with with TP workers. SIDE_CHANNEL_PORT=$((5559 + i)) echo "Starting prefill instance $i on GPU $GPU_ID, port $PORT" @@ -93,7 +118,7 @@ run_tests_for_model() { --enforce-eager \ --gpu-memory-utilization 0.2 \ --tensor-parallel-size $PREFILLER_TP_SIZE \ - --kv-transfer-config '{\"kv_connector\":\"NixlConnector\",\"kv_role\":\"kv_both\"}'" + --kv-transfer-config '$KV_CONFIG'" if [ -n "$model_args" ]; then FULL_CMD="$BASE_CMD $model_args" @@ -128,7 +153,7 @@ run_tests_for_model() { --enforce-eager \ --gpu-memory-utilization 0.2 \ --tensor-parallel-size $DECODER_TP_SIZE \ - --kv-transfer-config '{\"kv_connector\":\"NixlConnector\",\"kv_role\":\"kv_both\"}'" + --kv-transfer-config '$KV_CONFIG'" if [ -n "$model_args" ]; then FULL_CMD="$BASE_CMD $model_args" diff --git a/tests/v1/kv_connector/nixl_integration/run_edge_case_test.sh b/tests/v1/kv_connector/nixl_integration/run_edge_case_test.sh old mode 100644 new mode 100755 index b64461292910..c48b452e24cd --- a/tests/v1/kv_connector/nixl_integration/run_edge_case_test.sh +++ b/tests/v1/kv_connector/nixl_integration/run_edge_case_test.sh @@ -1,6 +1,33 @@ #!/bin/bash set -xe +# Parse command line arguments +KV_BUFFER_DEVICE="cuda" # Default to cuda +PREFILL_GPU_ID=4 # Default GPU IDs +DECODE_GPU_ID=5 +while [[ $# -gt 0 ]]; do + case $1 in + --kv_buffer_device) + KV_BUFFER_DEVICE="$2" + shift 2 + ;; + *) + echo "Unknown option $1" + echo "Usage: $0 [--kv_buffer_device ]" + exit 1 + ;; + esac +done + +echo "Running edge case tests with kv_buffer_device=$KV_BUFFER_DEVICE (GPUs: $PREFILL_GPU_ID, $DECODE_GPU_ID)" + +# Build the kv-transfer-config once +if [[ "$KV_BUFFER_DEVICE" == "cuda" ]]; then + KV_CONFIG='{"kv_connector":"NixlConnector","kv_role":"kv_both"}' +else + KV_CONFIG="{\"kv_connector\":\"NixlConnector\",\"kv_role\":\"kv_both\",\"kv_buffer_device\":\"$KV_BUFFER_DEVICE\"}" +fi + # Models to run MODELS=( "Qwen/Qwen3-0.6B" @@ -50,15 +77,15 @@ run_tests_for_model() { # Get model-specific arguments local model_args=$(get_model_args "$model_name") - + # Start prefill instance PREFILL_PORT=8001 - BASE_CMD="CUDA_VISIBLE_DEVICES=0 VLLM_NIXL_SIDE_CHANNEL_PORT=5559 vllm serve $model_name \ + BASE_CMD="CUDA_VISIBLE_DEVICES=$PREFILL_GPU_ID VLLM_NIXL_SIDE_CHANNEL_PORT=5559 vllm serve $model_name \ --port $PREFILL_PORT \ --enforce-eager \ --gpu-memory-utilization 0.2 \ - --kv-transfer-config '{\"kv_connector\":\"NixlConnector\",\"kv_role\":\"kv_both\"}'" + --kv-transfer-config '$KV_CONFIG'" if [ -n "$model_args" ]; then FULL_CMD="$BASE_CMD $model_args" @@ -72,11 +99,11 @@ run_tests_for_model() { DECODE_PORT=8002 # Build the command with or without model-specific args - BASE_CMD="CUDA_VISIBLE_DEVICES=1 VLLM_NIXL_SIDE_CHANNEL_PORT=6000 vllm serve $model_name \ + BASE_CMD="CUDA_VISIBLE_DEVICES=$DECODE_GPU_ID VLLM_NIXL_SIDE_CHANNEL_PORT=6000 vllm serve $model_name \ --port $DECODE_PORT \ --enforce-eager \ --gpu-memory-utilization 0.2 \ - --kv-transfer-config '{\"kv_connector\":\"NixlConnector\",\"kv_role\":\"kv_both\"}'" + --kv-transfer-config '$KV_CONFIG'" if [ -n "$model_args" ]; then FULL_CMD="$BASE_CMD $model_args" diff --git a/vllm/config/kv_transfer.py b/vllm/config/kv_transfer.py index 9abf4acacfe8..c3d9a3309eb3 100644 --- a/vllm/config/kv_transfer.py +++ b/vllm/config/kv_transfer.py @@ -28,8 +28,8 @@ class KVTransferConfig: """The engine id for KV transfers.""" kv_buffer_device: Optional[str] = "cuda" - """The device used by kv connector to buffer the KV cache. - Currently only support 'cuda'.""" + """The device used by kv connector to buffer the KV cache. Choices are + 'cuda' and 'cpu'.""" kv_buffer_size: float = 1e9 """The buffer size for TorchDistributedConnector. Measured in number of diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py b/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py index c11189d7ec10..1c7569515dec 100644 --- a/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py +++ b/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py @@ -67,7 +67,10 @@ # Supported platforms and types of kv transfer buffer. # {device: tuple of supported kv buffer types} _NIXL_SUPPORTED_DEVICE = { - "cuda": ("cuda", ), + "cuda": ( + "cuda", + "cpu", + ), "tpu": ("cpu", ), "xpu": ("cpu", ), } @@ -701,6 +704,9 @@ def initialize_host_xfer_buffer( def set_host_xfer_buffer_ops(self, copy_operation: CopyBlocksOp): """Assign copy (d2h, h2d) operations when host buffer is used.""" + # Set a no-op if the host buffer is not cpu. + if self.kv_buffer_device != "cpu": + return assert self.use_host_buffer self.copy_blocks = copy_operation diff --git a/vllm/platforms/cuda.py b/vllm/platforms/cuda.py index 8b9f9f569206..6738d3dec286 100644 --- a/vllm/platforms/cuda.py +++ b/vllm/platforms/cuda.py @@ -500,6 +500,30 @@ def check_if_supports_dtype(cls, torch_dtype: torch.dtype): "You can use float16 instead by explicitly setting the " "`dtype` flag in CLI, for example: --dtype=half.") + @classmethod + def insert_blocks_to_device( + cls, + src_cache: torch.Tensor, + dst_cache: torch.Tensor, + src_block_indices: torch.Tensor, + dst_block_indices: torch.Tensor, + ) -> None: + """Copy blocks from src_cache to dst_cache on GPU.""" + _src_cache = src_cache[:, src_block_indices] + dst_cache[:, dst_block_indices] = _src_cache.to(dst_cache.device) + + @classmethod + def swap_out_blocks_to_host( + cls, + src_cache: torch.Tensor, + dst_cache: torch.Tensor, + src_block_indices: torch.Tensor, + dst_block_indices: torch.Tensor, + ) -> None: + """Copy blocks from GPU to host (CPU).""" + _src_cache = src_cache[:, src_block_indices] + dst_cache[:, dst_block_indices] = _src_cache.cpu() + @classmethod def support_hybrid_kv_cache(cls) -> bool: return True diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index 0960fe3a25fb..f8b0b9cba1bc 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -4059,10 +4059,9 @@ def initialize_kv_cache(self, kv_cache_config: KVCacheConfig) -> None: self.drafter.validate_same_kv_cache_group(kv_cache_config) if has_kv_transfer_group(): - get_kv_transfer_group().register_kv_caches(kv_caches) - if self.device.type == 'xpu': - get_kv_transfer_group().set_host_xfer_buffer_ops( - copy_kv_blocks) + kv_transfer_group = get_kv_transfer_group() + kv_transfer_group.register_kv_caches(kv_caches) + kv_transfer_group.set_host_xfer_buffer_ops(copy_kv_blocks) if self.dcp_world_size > 1: layer_names = self.attn_groups[0][0].layer_names From 145ac73317e0a255cd22ab5f4ac346124800be41 Mon Sep 17 00:00:00 2001 From: Rahul Tuli Date: Mon, 29 Sep 2025 21:07:20 +0530 Subject: [PATCH 480/518] [Bugfix][Speculative Decoding] Fix Eagle3 quantization config issue (#25883) Signed-off-by: Rahul Tuli --- .../speculators/test_eagle3.py | 3 +++ vllm/model_executor/models/llama.py | 7 ++++++- vllm/model_executor/models/llama_eagle3.py | 14 +++++++++++++- 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/tests/speculative_decoding/speculators/test_eagle3.py b/tests/speculative_decoding/speculators/test_eagle3.py index 368238b3a720..87d799a5fed7 100644 --- a/tests/speculative_decoding/speculators/test_eagle3.py +++ b/tests/speculative_decoding/speculators/test_eagle3.py @@ -14,6 +14,9 @@ pytest.param( "nm-testing/Speculator-Qwen3-8B-Eagle3-converted-071-quantized", id="qwen3-eagle3-speculator"), + pytest.param( + "nm-testing/Speculator-Qwen3-8B-Eagle3-converted-071-quantized-w4a16", + id="qwen3-eagle3-speculator-w4a16-verifier"), ]) def test_eagle3_speculators_model(vllm_runner, example_prompts, model_path, monkeypatch): diff --git a/vllm/model_executor/models/llama.py b/vllm/model_executor/models/llama.py index c7dd134ea47e..a6081d331511 100644 --- a/vllm/model_executor/models/llama.py +++ b/vllm/model_executor/models/llama.py @@ -248,7 +248,7 @@ def __init__(self, config = config or vllm_config.model_config.hf_config cache_config = vllm_config.cache_config - quant_config = vllm_config.quant_config + quant_config = self.get_quant_config(vllm_config) self.hidden_size = config.hidden_size rope_theta = getattr(config, "rope_theta", 10000) @@ -328,6 +328,11 @@ def forward( hidden_states = self.mlp(hidden_states) return hidden_states, residual + def get_quant_config( + self, vllm_config: VllmConfig) -> Optional[QuantizationConfig]: + """Get quantization config for this layer. Override in subclasses.""" + return vllm_config.quant_config + @support_torch_compile class LlamaModel(nn.Module): diff --git a/vllm/model_executor/models/llama_eagle3.py b/vllm/model_executor/models/llama_eagle3.py index 7192a76c8749..3fb6f2f8d5ec 100644 --- a/vllm/model_executor/models/llama_eagle3.py +++ b/vllm/model_executor/models/llama_eagle3.py @@ -13,6 +13,8 @@ from vllm.model_executor.layers.layernorm import RMSNorm from vllm.model_executor.layers.linear import QKVParallelLinear from vllm.model_executor.layers.logits_processor import LogitsProcessor +from vllm.model_executor.layers.quantization.base_config import ( + QuantizationConfig) from vllm.model_executor.layers.vocab_parallel_embedding import ( DEFAULT_VOCAB_PADDING_SIZE, ParallelLMHead, VocabParallelEmbedding) from vllm.model_executor.model_loader.weight_utils import default_weight_loader @@ -33,7 +35,7 @@ def __init__(self, super().__init__(vllm_config, prefix=prefix, config=config) config = config or vllm_config.model_config.hf_config - quant_config = vllm_config.quant_config + quant_config = self.get_quant_config(vllm_config) # override qkv self.self_attn.qkv_proj = QKVParallelLinear( @@ -53,6 +55,16 @@ def __init__(self, else: self._residual_norm = self._norm_after_residual + def get_quant_config( + self, vllm_config: VllmConfig) -> Optional[QuantizationConfig]: + """Use drafter's quantization config instead of verifier's.""" + draft_model_config = vllm_config.speculative_config.draft_model_config + draft_load_config = vllm_config.load_config + + return VllmConfig.get_quantization_config( + draft_model_config, + draft_load_config) if draft_model_config else None + def _norm_before_residual( self, hidden_states: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]: From 0899ba5b42827466372cae94df2f8461f74ccd7c Mon Sep 17 00:00:00 2001 From: Isotr0py Date: Tue, 30 Sep 2025 00:33:39 +0800 Subject: [PATCH 481/518] [CI/Build] Include Transformers backend test in nightly transformers test (#25885) Signed-off-by: Isotr0py --- .buildkite/test-pipeline.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index e603c1582e1f..460f0afb6f67 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -752,6 +752,7 @@ steps: commands: - pip install --upgrade git+https://github.com/huggingface/transformers - pytest -v -s tests/models/test_initialization.py + - pytest -v -s tests/models/test_transformers.py - pytest -v -s tests/models/multimodal/processing/ - pytest -v -s tests/models/multimodal/test_mapping.py - python3 examples/offline_inference/basic/chat.py From e61eb5e09d644a57814038f48f658d4f8e3c1fee Mon Sep 17 00:00:00 2001 From: Jee Jee Li Date: Tue, 30 Sep 2025 00:36:30 +0800 Subject: [PATCH 482/518] [Model] Remove MotifForCausalLM (#25866) Signed-off-by: Jee Jee Li --- docs/models/supported_models.md | 2 - tests/models/registry.py | 3 - tests/models/test_initialization.py | 4 - vllm/model_executor/models/motif.py | 345 ------------------------- vllm/model_executor/models/registry.py | 2 +- 5 files changed, 1 insertion(+), 355 deletions(-) delete mode 100644 vllm/model_executor/models/motif.py diff --git a/docs/models/supported_models.md b/docs/models/supported_models.md index 3ee5a7d0ffc5..8e87a98e3d51 100644 --- a/docs/models/supported_models.md +++ b/docs/models/supported_models.md @@ -396,7 +396,6 @@ th { | `MiniCPM3ForCausalLM` | MiniCPM3 | `openbmb/MiniCPM3-4B`, etc. | ✅︎ | ✅︎ | ✅︎ | | `MistralForCausalLM` | Mistral, Mistral-Instruct | `mistralai/Mistral-7B-v0.1`, `mistralai/Mistral-7B-Instruct-v0.1`, etc. | ✅︎ | ✅︎ | ✅︎ | | `MixtralForCausalLM` | Mixtral-8x7B, Mixtral-8x7B-Instruct | `mistralai/Mixtral-8x7B-v0.1`, `mistralai/Mixtral-8x7B-Instruct-v0.1`, `mistral-community/Mixtral-8x22B-v0.1`, etc. | ✅︎ | ✅︎ | ✅︎ | -| `MotifForCausalLM` | Motif-1-Tiny | `Motif-Technologies/Motif-2.6B`, `Motif-Technologies/Motif-2.6b-v1.1-LC`, etc. | ✅︎ | ✅︎ | | | `MPTForCausalLM` | MPT, MPT-Instruct, MPT-Chat, MPT-StoryWriter | `mosaicml/mpt-7b`, `mosaicml/mpt-7b-storywriter`, `mosaicml/mpt-30b`, etc. | | ✅︎ | ✅︎ | | `NemotronForCausalLM` | Nemotron-3, Nemotron-4, Minitron | `nvidia/Minitron-8B-Base`, `mgoin/Nemotron-4-340B-Base-hf-FP8`, etc. | ✅︎ | ✅︎ | ✅︎ | | `NemotronHForCausalLM` | Nemotron-H | `nvidia/Nemotron-H-8B-Base-8K`, `nvidia/Nemotron-H-47B-Base-8K`, `nvidia/Nemotron-H-56B-Base-8K`, etc. | ✅︎ | ✅︎ | ✅︎ | @@ -409,7 +408,6 @@ th { | `PhiForCausalLM` | Phi | `microsoft/phi-1_5`, `microsoft/phi-2`, etc. | ✅︎ | ✅︎ | ✅︎ | | `Phi3ForCausalLM` | Phi-4, Phi-3 | `microsoft/Phi-4-mini-instruct`, `microsoft/Phi-4`, `microsoft/Phi-3-mini-4k-instruct`, `microsoft/Phi-3-mini-128k-instruct`, `microsoft/Phi-3-medium-128k-instruct`, etc. | ✅︎ | ✅︎ | ✅︎ | | `PhiMoEForCausalLM` | Phi-3.5-MoE | `microsoft/Phi-3.5-MoE-instruct`, etc. | ✅︎ | ✅︎ | ✅︎ | -| `Phi4FlashForCausalLM` | Phi-4-mini-flash-reasoning | `microsoft/microsoft/Phi-4-mini-instruct`, etc. | | | | | `PersimmonForCausalLM` | Persimmon | `adept/persimmon-8b-base`, `adept/persimmon-8b-chat`, etc. | | ✅︎ | ✅︎ | | `Plamo2ForCausalLM` | PLaMo2 | `pfnet/plamo-2-1b`, `pfnet/plamo-2-8b`, etc. | | ✅︎ | ✅︎ | | `QWenLMHeadModel` | Qwen | `Qwen/Qwen-7B`, `Qwen/Qwen-7B-Chat`, etc. | ✅︎ | ✅︎ | ✅︎ | diff --git a/tests/models/registry.py b/tests/models/registry.py index 124a97ed0c89..37ee474d3ecb 100644 --- a/tests/models/registry.py +++ b/tests/models/registry.py @@ -298,9 +298,6 @@ def check_available_online( "MistralForCausalLM": _HfExamplesInfo("mistralai/Mistral-7B-Instruct-v0.1"), "MixtralForCausalLM": _HfExamplesInfo("mistralai/Mixtral-8x7B-Instruct-v0.1", # noqa: E501 {"tiny": "TitanML/tiny-mixtral"}), # noqa: E501 - "MotifForCausalLM": _HfExamplesInfo("Motif-Technologies/Motif-2.6B", - trust_remote_code=True, - v0_only=True), "MptForCausalLM": _HfExamplesInfo("mpt", is_available_online=False), "MPTForCausalLM": _HfExamplesInfo("mosaicml/mpt-7b"), "NemotronForCausalLM": _HfExamplesInfo("nvidia/Minitron-8B-Base"), diff --git a/tests/models/test_initialization.py b/tests/models/test_initialization.py index 42d69367042d..e818b908e8a8 100644 --- a/tests/models/test_initialization.py +++ b/tests/models/test_initialization.py @@ -76,10 +76,6 @@ def _initialize_kv_caches_v1(self, vllm_config): if model_info.v0_only: # NOTE(woosuk): skip the test for V0-only models return - - if model_arch in ("Phi4FlashForCausalLM", "MotifForCausalLM"): - pytest.skip( - "Differential Flash Attention backend has been removed.") if model_arch == "GptOssForCausalLM": # FIXME: A hack to bypass FA3 assertion because our CI's L4 GPU # has cc==8.9 which hasn't supported FA3 yet. Remove this hack when diff --git a/vllm/model_executor/models/motif.py b/vllm/model_executor/models/motif.py deleted file mode 100644 index 153f36dcf1f5..000000000000 --- a/vllm/model_executor/models/motif.py +++ /dev/null @@ -1,345 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project - -# Adapted from -# https://huggingface.co/Motif-Technologies/Motif-2.6B/blob/main/modeling_motif.py -# Copyright (c) Alibaba Cloud. -# LICENSE: https://huggingface.co/Motif-Technologies/Motif-2.6B/blob/main/LICENSE -"""Inference-only Motif model compatible with HuggingFace weights.""" -import math -from typing import Any, Optional - -import torch -from torch import nn -from transformers import PretrainedConfig - -from vllm.attention import Attention, AttentionType -from vllm.attention.selector import _Backend -from vllm.config import CacheConfig, VllmConfig -from vllm.distributed import get_tensor_model_parallel_world_size -from vllm.model_executor.layers.layernorm import PolyNorm, RMSNorm -from vllm.model_executor.layers.linear import (MergedColumnParallelLinear, - QKVParallelLinear, - RowParallelLinear) -from vllm.model_executor.layers.quantization import QuantizationConfig -from vllm.model_executor.layers.rotary_embedding import get_rope -from vllm.model_executor.models.llama import LlamaForCausalLM - -from .adapters import as_seq_cls_model -from .interfaces import SupportsV0Only -from .utils import extract_layer_index - - -class MotifMLP(nn.Module): - """MLP for the language component of the Motif model, which contains a - MergedColumnParallelLinear merging 2 outputs via PolyNorm activation.""" - - def __init__( - self, - hidden_size: int, - intermediate_size: int, - hidden_act: str = "poly_norm", - quant_config: Optional[QuantizationConfig] = None, - bias: bool = False, - prefix: str = "", - reduce_results: bool = True, - ): - super().__init__() - self.gate_up_proj = MergedColumnParallelLinear( - input_size=hidden_size, - output_sizes=[intermediate_size] * 2, - bias=bias, - quant_config=quant_config, - prefix=f"{prefix}.gate_up_proj", - ) - self.down_proj = RowParallelLinear( - input_size=intermediate_size, - output_size=hidden_size, - bias=bias, - quant_config=quant_config, - reduce_results=reduce_results, - prefix=f"{prefix}.down_proj", - ) - if hidden_act != "poly_norm": - raise NotImplementedError(f"Unsupported activation: {hidden_act}. " - "Only poly_norm is supported for now.") - self.act_fn = PolyNorm() - self.intermediate_size = intermediate_size - tp_size = get_tensor_model_parallel_world_size() - if hidden_act == "poly_norm" and tp_size > 1: - raise NotImplementedError( - "Tensor parallelism for poly_norm is not supported yet. " - "Support will be added in the future.") - - def forward(self, x): - x, _ = self.gate_up_proj(x) - x = self.act_fn( - x[..., :self.intermediate_size]) * x[..., self.intermediate_size:] - x, _ = self.down_proj(x) - return x - - -class MotifAttention(nn.Module): - - def __init__( - self, - config: PretrainedConfig, - hidden_size: int, - num_heads: int, - num_kv_heads: int, - rope_theta: float = 10000, - rope_scaling: Optional[dict[str, Any]] = None, - max_position_embeddings: int = 8192, - quant_config: Optional[QuantizationConfig] = None, - bias: bool = False, - bias_o_proj: bool = False, - cache_config: Optional[CacheConfig] = None, - prefix: str = "", - attn_type: str = AttentionType.DECODER, - ) -> None: - super().__init__() - layer_idx = extract_layer_index(prefix) - self.hidden_size = hidden_size - tp_size = get_tensor_model_parallel_world_size() - self.total_num_heads = num_heads - assert self.total_num_heads % tp_size == 0 - self.num_heads = self.total_num_heads // tp_size - self.total_num_kv_heads = num_kv_heads - if self.total_num_kv_heads >= tp_size: - # Number of KV heads is greater than TP size, so we partition - # the KV heads across multiple tensor parallel GPUs. - assert self.total_num_kv_heads % tp_size == 0 - else: - # Number of KV heads is less than TP size, so we replicate - # the KV heads across multiple tensor parallel GPUs. - assert tp_size % self.total_num_kv_heads == 0 - self.num_kv_heads = max(1, self.total_num_kv_heads // tp_size) - # MistralConfig has an optional head_dim introduced by Mistral-Nemo - head_dim = getattr(config, "head_dim", None) - if head_dim is None: - head_dim = self.hidden_size // self.total_num_heads - self.head_dim = head_dim - # Phi models introduced a partial_rotary_factor parameter in the config - self.partial_rotary_factor = getattr(config, "partial_rotary_factor", - 1) - self.q_size = self.num_heads * self.head_dim - self.kv_size = self.num_kv_heads * self.head_dim - self.scaling = self.head_dim**-0.5 - self.rope_theta = rope_theta - self.max_position_embeddings = max_position_embeddings - - assert self.num_heads % 2 == 0, 'num_heads should be even' - assert self.num_kv_heads % 2 == 0, 'num_heads should be even' - - self.qkv_proj = QKVParallelLinear( - hidden_size=hidden_size, - head_size=self.head_dim, - total_num_heads=self.total_num_heads, - total_num_kv_heads=self.total_num_kv_heads, - bias=bias, - quant_config=quant_config, - prefix=f"{prefix}.qkv_proj", - ) - - self.o_proj = RowParallelLinear( - input_size=self.total_num_heads * self.head_dim, - output_size=hidden_size, - bias=bias_o_proj, - quant_config=quant_config, - prefix=f"{prefix}.o_proj", - ) - - self._init_rotary_emb(config, - rope_scaling=rope_scaling, - quant_config=quant_config) - sliding_window = None - - self.lambda_init = self.lambda_init_fn(layer_idx) - self.lambda_q1 = nn.Parameter( - torch.zeros(self.head_dim, dtype=torch.float32).normal_(mean=0, - std=0.1)) - self.lambda_k1 = nn.Parameter( - torch.zeros(self.head_dim, dtype=torch.float32).normal_(mean=0, - std=0.1)) - self.lambda_q2 = nn.Parameter( - torch.zeros(self.head_dim, dtype=torch.float32).normal_(mean=0, - std=0.1)) - self.lambda_k2 = nn.Parameter( - torch.zeros(self.head_dim, dtype=torch.float32).normal_(mean=0, - std=0.1)) - self.subln = RMSNorm(2 * self.head_dim, eps=config.attn_rms_norm_eps) - - params = { - 'differential_flash_attention_config': { - 'lambda_init': self.lambda_init, - 'lambda_q1': self.lambda_q1, - 'lambda_k1': self.lambda_k1, - 'lambda_q2': self.lambda_q2, - 'lambda_k2': self.lambda_k2, - "subln": self.subln, - } - } - - diff_attn_err_msg = ( - 'Set VLLM_ATTENTION_BACKEND="DIFFERENTIAL_FLASH_ATTN" ' - 'to enable Differential Flash Attention.') - try: - self.attn = Attention( - self.num_heads, - self.head_dim, - self.scaling, - num_kv_heads=self.num_kv_heads, - cache_config=cache_config, - quant_config=quant_config, - per_layer_sliding_window=sliding_window, - attn_type=attn_type, - prefix=f"{prefix}.attn", - **params, - ) - except TypeError as e: - raise ValueError(diff_attn_err_msg) from e - assert (self.attn.backend == _Backend.DIFFERENTIAL_FLASH_ATTN - ), diff_attn_err_msg - - def lambda_init_fn(self, depth): - return 0.8 - 0.6 * math.exp(-0.3 * (depth - 1)) - - def forward( - self, - positions: torch.Tensor, - hidden_states: torch.Tensor, - ) -> torch.Tensor: - qkv, _ = self.qkv_proj(hidden_states) - q, k, v = qkv.split([self.q_size, self.kv_size, self.kv_size], dim=-1) - q, k = self.rotary_emb(positions, q, k) - attn_output = self.attn(q, k, v) - output, _ = self.o_proj(attn_output) - return output - - def _init_rotary_emb(self, config: PretrainedConfig, - rope_scaling: Optional[dict[str, Any]], - quant_config: Optional[QuantizationConfig]) -> None: - is_neox_style = True - is_gguf = quant_config and quant_config.get_name() == "gguf" - if is_gguf and config.model_type == "llama": - is_neox_style = False - - self.rotary_emb = get_rope( - self.head_dim, - rotary_dim=self.head_dim, - max_position=self.max_position_embeddings, - base=self.rope_theta, - rope_scaling=rope_scaling, - is_neox_style=is_neox_style, - partial_rotary_factor=self.partial_rotary_factor, - ) - - -class MotifDecoderLayer(nn.Module): - - def __init__( - self, - config: PretrainedConfig, - cache_config: Optional[CacheConfig] = None, - quant_config: Optional[QuantizationConfig] = None, - prefix: str = "", - ) -> None: - super().__init__() - self.hidden_size = config.hidden_size - rope_theta = getattr(config, "rope_theta", 10000) - rope_scaling = getattr(config, "rope_scaling", None) - if rope_scaling is not None and getattr( - config, "original_max_position_embeddings", None): - rope_scaling["original_max_position_embeddings"] = ( - config.original_max_position_embeddings) - max_position_embeddings = getattr(config, "max_position_embeddings", - 8192) - attention_bias = getattr(config, "attention_bias", False) or getattr( - config, "use_bias", False) - bias_o_proj = attention_bias - if hasattr(config, 'qkv_bias'): - attention_bias = config.qkv_bias - - # By default, Motif uses causal attention as it is a decoder-only model. - # You can override the HF config with `is_causal=False` to enable - # bidirectional attention, which is used in some embedding models - # (e.g. parasail-ai/GritLM-7B-vllm) - if getattr(config, "is_causal", True): - attn_type = AttentionType.DECODER - else: - attn_type = AttentionType.ENCODER_ONLY - - self.self_attn = MotifAttention( - config=config, - hidden_size=self.hidden_size, - num_heads=config.num_attention_heads, - num_kv_heads=getattr(config, "num_key_value_heads", - config.num_attention_heads), - rope_theta=rope_theta, - rope_scaling=rope_scaling, - max_position_embeddings=max_position_embeddings, - quant_config=quant_config, - bias=attention_bias, - bias_o_proj=bias_o_proj, - cache_config=cache_config, - prefix=f"{prefix}.self_attn", - attn_type=attn_type, - ) - self.mlp = MotifMLP( - hidden_size=self.hidden_size, - intermediate_size=config.intermediate_size, - hidden_act=config.hidden_act, - quant_config=quant_config, - bias=getattr(config, "use_bias", False), - prefix=f"{prefix}.mlp", - ) - self.input_layernorm = RMSNorm(config.hidden_size, - eps=config.rms_norm_eps) - self.post_attention_layernorm = RMSNorm(config.hidden_size, - eps=config.rms_norm_eps) - - def forward( - self, - positions: torch.Tensor, - hidden_states: torch.Tensor, - residual: Optional[torch.Tensor], - ) -> tuple[torch.Tensor, torch.Tensor]: - # Self Attention - if residual is None: - residual = hidden_states - hidden_states = self.input_layernorm(hidden_states) - else: - hidden_states, residual = self.input_layernorm( - hidden_states, residual) - hidden_states = self.self_attn(positions=positions, - hidden_states=hidden_states) - - # Fully Connected - hidden_states, residual = self.post_attention_layernorm( - hidden_states, residual) - hidden_states = self.mlp(hidden_states) - return hidden_states, residual - - -# Motif model uses differential attention -# Only supported in v0 (no chunked prefill support) -class MotifForCausalLM(LlamaForCausalLM, SupportsV0Only): - - def __init__(self, - *, - vllm_config: VllmConfig, - prefix: str = "", - layer_type: type[nn.Module] = MotifDecoderLayer): - - # Prefix caching and chunked prefill is not supported for this model. - assert not vllm_config.cache_config.enable_prefix_caching, \ - "Motif currently does not support prefix caching" - assert not vllm_config.scheduler_config.chunked_prefill_enabled, \ - "Motif currently does not support chunked prefill" - - super().__init__(vllm_config=vllm_config, - prefix=prefix, - layer_type=layer_type) - - -MotifForSequenceClassification = as_seq_cls_model(MotifForCausalLM) diff --git a/vllm/model_executor/models/registry.py b/vllm/model_executor/models/registry.py index a68012d8a8c9..62aa5af9fb14 100644 --- a/vllm/model_executor/models/registry.py +++ b/vllm/model_executor/models/registry.py @@ -119,7 +119,6 @@ "MiniCPM3ForCausalLM": ("minicpm3", "MiniCPM3ForCausalLM"), "MistralForCausalLM": ("llama", "LlamaForCausalLM"), "MixtralForCausalLM": ("mixtral", "MixtralForCausalLM"), - "MotifForCausalLM": ("motif", "MotifForCausalLM"), # transformers's mpt class has lower case "MptForCausalLM": ("mpt", "MPTForCausalLM"), "MPTForCausalLM": ("mpt", "MPTForCausalLM"), @@ -332,6 +331,7 @@ ] _PREVIOUSLY_SUPPORTED_MODELS = { + "MotifForCausalLM": "0.10.2", "Phi3SmallForCausalLM": "0.9.2", "Phi4FlashForCausalLM": "0.10.2", # encoder-decoder models except whisper From d5ab28511c5fca0294d1b445b670e199f202193b Mon Sep 17 00:00:00 2001 From: Lee Nau Date: Mon, 29 Sep 2025 12:07:29 -0700 Subject: [PATCH 483/518] [Bugfix] Use correct key "ignore" for config.json non-quantized layers (#25706) Signed-off-by: Lee Nau --- vllm/model_executor/layers/quantization/modelopt.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/vllm/model_executor/layers/quantization/modelopt.py b/vllm/model_executor/layers/quantization/modelopt.py index 0be43da00b53..20704439eaa9 100644 --- a/vllm/model_executor/layers/quantization/modelopt.py +++ b/vllm/model_executor/layers/quantization/modelopt.py @@ -138,13 +138,15 @@ def from_config(cls, config: dict[str, Any]) -> "ModelOptFp8Config": if not quant_method: raise ValueError("Missing 'quant_algo' in quantization config") kv_cache_quant_method = quant_config.get("kv_cache_quant_algo") + # "exclude_modules" is the key in the legacy hf_quant_config.json exclude_modules = quant_config.get("exclude_modules") else: # Compressed-tensors style format: # {"quant_algo": "...", "quant_method": "modelopt"} quant_method = config.get("quant_algo", "") kv_cache_quant_method = config.get("kv_cache_quant_algo") - exclude_modules = config.get("exclude_modules") + # "ignore" is the key in config.json + exclude_modules = config.get("ignore") if quant_method not in QUANT_ALGOS: raise ValueError( @@ -723,6 +725,7 @@ def from_config(cls, config: dict[str, Any]) -> "ModelOptNvFp4Config": raise ValueError(f"group_size must be an integer, got " f"{type(group_size_raw)}") from None + # "exclude_modules" is the key in the legacy hf_quant_config.json exclude_modules = quant_config.get("exclude_modules", []) if not isinstance(exclude_modules, list): raise ValueError(f"exclude_modules must be a list, got " @@ -756,7 +759,8 @@ def from_config(cls, config: dict[str, Any]) -> "ModelOptNvFp4Config": raise ValueError(f"group_size must be an integer, got " f"{type(group_size_raw)}") from None - exclude_modules = config.get("exclude_modules", []) + # "ignore" is the key in config.json + exclude_modules = config.get("ignore", []) if not isinstance(exclude_modules, list): raise ValueError(f"exclude_modules must be a list, got " f"{type(exclude_modules)}") From c42ff4f4fdc4a4d48ccef18b8067995f6c19e6ec Mon Sep 17 00:00:00 2001 From: Adrian Abeyta Date: Mon, 29 Sep 2025 14:52:04 -0500 Subject: [PATCH 484/518] [BugFix][torch.compile] KV scale calculation issues with FP8 quantization (#25513) Signed-off-by: adabeyta --- tests/compile/test_full_graph.py | 15 +++++++++++ vllm/attention/layer.py | 43 +++++++++++++++++++++++++++--- vllm/v1/worker/gpu_model_runner.py | 9 +++++++ 3 files changed, 64 insertions(+), 3 deletions(-) diff --git a/tests/compile/test_full_graph.py b/tests/compile/test_full_graph.py index 870aa553ca62..f9f146810924 100644 --- a/tests/compile/test_full_graph.py +++ b/tests/compile/test_full_graph.py @@ -139,6 +139,21 @@ def test_custom_compile_config( run_model(compilation_config, model, model_kwargs) +@pytest.mark.parametrize( + "optimization_level", + [CompilationLevel.NO_COMPILATION, CompilationLevel.PIECEWISE], +) +def test_fp8_kv_scale_compile(optimization_level: int): + model = "Qwen/Qwen2-0.5B" + model_kwargs = { + "quantization": "fp8", + "kv_cache_dtype": "fp8_e4m3", + "calculate_kv_scales": True, + "max_model_len": 512, + } + run_model(optimization_level, model, model_kwargs) + + def test_inductor_graph_partition_attn_fusion(caplog_vllm): if not is_torch_equal_or_newer("2.9.0.dev"): pytest.skip("inductor graph partition is only available " diff --git a/vllm/attention/layer.py b/vllm/attention/layer.py index 326fe6dd048a..d97c87d96e99 100644 --- a/vllm/attention/layer.py +++ b/vllm/attention/layer.py @@ -277,9 +277,8 @@ def forward( `vllm.forward_context.get_forward_context().attn_metadata`. """ if self.calculate_kv_scales: - attn_metadata = get_forward_context().attn_metadata - if attn_metadata.enable_kv_scales_calculation: - self.calc_kv_scales(query, key, value) + torch.ops.vllm.maybe_calc_kv_scales(query, key, value, + self.layer_name) output_dtype = query.dtype if self.query_quant is not None: @@ -554,6 +553,44 @@ def maybe_save_kv_layer_to_connector( attn_metadata[layer_name]) +def maybe_calc_kv_scales( + query: torch.Tensor, + key: torch.Tensor, + value: torch.Tensor, + layer_name: str, +) -> None: + + forward_context: ForwardContext = get_forward_context() + attn_metadata = forward_context.attn_metadata + + if isinstance(attn_metadata, dict): + attn_metadata = attn_metadata[layer_name] + + if attn_metadata is None or not getattr( + attn_metadata, 'enable_kv_scales_calculation', False): + return + + self = forward_context.no_compile_layers[layer_name] + self.calc_kv_scales(query, key, value) + + +def maybe_calc_kv_scales_fake( + query: torch.Tensor, + key: torch.Tensor, + value: torch.Tensor, + layer_name: str, +) -> None: + return + + +direct_register_custom_op( + op_name="maybe_calc_kv_scales", + op_func=maybe_calc_kv_scales, + mutates_args=["query", "key", "value"], + fake_impl=maybe_calc_kv_scales_fake, +) + + def unified_attention( query: torch.Tensor, key: torch.Tensor, diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index f8b0b9cba1bc..9e7d6eb0387b 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -2351,6 +2351,15 @@ def execute_model( self.cudagraph_dispatcher.dispatch(batch_descriptor, use_cascade_attn) + # Set cudagraph mode to none if calc_kv_scales is true. + if attn_metadata is not None: + metadata_list = (attn_metadata.values() if isinstance( + attn_metadata, dict) else [attn_metadata]) + if any( + getattr(m, 'enable_kv_scales_calculation', False) + for m in metadata_list): + cudagraph_runtime_mode = CUDAGraphMode.NONE + # This is currently to get around the assert in the DPMetadata # where it wants `num_tokens_across_dp` to align with `num_tokens` if ubatch_slices is not None: From 9bedac962384c02524edc29161ce60ed735d4c1d Mon Sep 17 00:00:00 2001 From: Naman Lalit Date: Mon, 29 Sep 2025 13:49:49 -0700 Subject: [PATCH 485/518] [Doc] Add documentation for vLLM continuous benchmarking and profiling (#25819) Signed-off-by: Naman Lalit --- docs/contributing/benchmarks.md | 24 ++++++++++++++++++++++++ docs/contributing/profiling.md | 16 ++++++++++++++++ 2 files changed, 40 insertions(+) diff --git a/docs/contributing/benchmarks.md b/docs/contributing/benchmarks.md index a97d1fa6a3a5..cf14770c01a6 100644 --- a/docs/contributing/benchmarks.md +++ b/docs/contributing/benchmarks.md @@ -823,6 +823,30 @@ The latest performance results are hosted on the public [vLLM Performance Dashbo More information on the performance benchmarks and their parameters can be found in [Benchmark README](https://github.com/intel-ai-tce/vllm/blob/more_cpu_models/.buildkite/nightly-benchmarks/README.md) and [performance benchmark description](gh-file:.buildkite/nightly-benchmarks/performance-benchmarks-descriptions.md). +### Continuous Benchmarking + +The continuous benchmarking provides automated performance monitoring for vLLM across different models and GPU devices. This helps track vLLM's performance characteristics over time and identify any performance regressions or improvements. + +#### How It Works + +The continuous benchmarking is triggered via a [GitHub workflow CI](https://github.com/pytorch/pytorch-integration-testing/actions/workflows/vllm-benchmark.yml) in the PyTorch infrastructure repository, which runs automatically every 4 hours. The workflow executes three types of performance tests: + +- **Serving tests**: Measure request handling and API performance +- **Throughput tests**: Evaluate token generation rates +- **Latency tests**: Assess response time characteristics + +#### Benchmark Configuration + +The benchmarking currently runs on a predefined set of models configured in the [vllm-benchmarks directory](https://github.com/pytorch/pytorch-integration-testing/tree/main/vllm-benchmarks/benchmarks). To add new models for benchmarking: + +1. Navigate to the appropriate GPU directory in the benchmarks configuration +2. Add your model specifications to the corresponding configuration files +3. The new models will be included in the next scheduled benchmark run + +#### Viewing Results + +All continuous benchmarking results are automatically published to the public [vLLM Performance Dashboard](https://hud.pytorch.org/benchmark/llms?repoName=vllm-project%2Fvllm). + [](){ #nightly-benchmarks } ## Nightly Benchmarks diff --git a/docs/contributing/profiling.md b/docs/contributing/profiling.md index a1b7927a95d1..b62560a58748 100644 --- a/docs/contributing/profiling.md +++ b/docs/contributing/profiling.md @@ -160,6 +160,22 @@ GUI example: Screenshot 2025-03-05 at 11 48 42 AM +## Continuous Profiling + +There is a [GitHub CI workflow](https://github.com/pytorch/pytorch-integration-testing/actions/workflows/vllm-profiling.yml) in the PyTorch infrastructure repository that provides continuous profiling for different models on vLLM. This automated profiling helps track performance characteristics over time and across different model configurations. + +### How It Works + +The workflow currently runs weekly profiling sessions for selected models, generating detailed performance traces that can be analyzed using different tools to identify performance regressions or optimization opportunities. But, it can be triggered manually as well, using the Github Action tool. + +### Adding New Models + +To extend the continuous profiling to additional models, you can modify the [profiling-tests.json](https://github.com/pytorch/pytorch-integration-testing/blob/main/vllm-profiling/cuda/profiling-tests.json) configuration file in the PyTorch integration testing repository. Simply add your model specifications to this file to include them in the automated profiling runs. + +### Viewing Profiling Results + +The profiling traces generated by the continuous profiling workflow are publicly available on the [vLLM Performance Dashboard](https://hud.pytorch.org/benchmark/llms?repoName=vllm-project%2Fvllm). Look for the **Profiling traces** table to access and download the traces for different models and runs. + ## Profiling vLLM Python Code The Python standard library includes From 61a34316136833f44ff0c8c3c44fb0f2254a4a2b Mon Sep 17 00:00:00 2001 From: Gregory Shtrasberg <156009573+gshtras@users.noreply.github.com> Date: Mon, 29 Sep 2025 17:01:50 -0400 Subject: [PATCH 486/518] [Bugfix][ROCm] Fixing trying to import non-existent symbols from libnccl.so (#25605) Signed-off-by: Gregory Shtrasberg --- .../device_communicators/pynccl_wrapper.py | 27 ++++++++++++++++--- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/vllm/distributed/device_communicators/pynccl_wrapper.py b/vllm/distributed/device_communicators/pynccl_wrapper.py index c3e99e177e2d..2e9a4e024de4 100644 --- a/vllm/distributed/device_communicators/pynccl_wrapper.py +++ b/vllm/distributed/device_communicators/pynccl_wrapper.py @@ -30,7 +30,9 @@ import torch from torch.distributed import ReduceOp +from vllm import envs from vllm.logger import init_logger +from vllm.platforms import current_platform from vllm.utils import find_nccl_library logger = init_logger(__name__) @@ -275,10 +277,27 @@ def __init__(self, so_file: Optional[str] = None): if so_file not in NCCLLibrary.path_to_dict_mapping: _funcs: dict[str, Any] = {} for func in NCCLLibrary.exported_functions: - f = getattr(self.lib, func.name) - f.restype = func.restype - f.argtypes = func.argtypes - _funcs[func.name] = f + try: + f = getattr(self.lib, func.name) + f.restype = func.restype + f.argtypes = func.argtypes + _funcs[func.name] = f + except AttributeError: + if func.name in [ + "ncclCommWindowRegister", + "ncclCommWindowDeregister" + ]: + if envs.VLLM_USE_NCCL_SYMM_MEM: + logger.warning_once( + "The symbol %s is not found in the NCCL " + "library %s. To enable VLLM_USE_NCCL_SYMM_MEM " + " please update your NCCL version to >= " + "2.27.03.", func.name, so_file) + if current_platform.is_rocm(): + # Having an exception here on ROCm platform is + # not allowed during graph capturing + continue + raise NCCLLibrary.path_to_dict_mapping[so_file] = _funcs self._funcs = NCCLLibrary.path_to_dict_mapping[so_file] From fea3e476aa3eec13c39c4cb61eab88e8eb70ad7b Mon Sep 17 00:00:00 2001 From: Thomas Parnell Date: Mon, 29 Sep 2025 23:18:25 +0200 Subject: [PATCH 487/518] [Kernel] Chunk-aligned mamba2 (#24683) --- .../layers/mamba/mamba_mixer2.py | 8 +- .../layers/mamba/ops/ssd_bmm.py | 42 ++-- .../layers/mamba/ops/ssd_chunk_scan.py | 236 ++++++------------ .../layers/mamba/ops/ssd_chunk_state.py | 48 ++-- .../layers/mamba/ops/ssd_combined.py | 56 ++--- .../layers/mamba/ops/ssd_state_passing.py | 97 ++----- vllm/model_executor/models/plamo2.py | 8 +- vllm/v1/attention/backends/mamba2_attn.py | 189 ++++++-------- 8 files changed, 250 insertions(+), 434 deletions(-) diff --git a/vllm/model_executor/layers/mamba/mamba_mixer2.py b/vllm/model_executor/layers/mamba/mamba_mixer2.py index 6dd09fad7a90..bfb0666d361f 100644 --- a/vllm/model_executor/layers/mamba/mamba_mixer2.py +++ b/vllm/model_executor/layers/mamba/mamba_mixer2.py @@ -502,9 +502,9 @@ def forward_cuda( prep_initial_states = attn_metadata.prep_initial_states chunk_size = attn_metadata.chunk_size seq_idx_p = attn_metadata.seq_idx_p - chunk_indices_p = attn_metadata.chunk_indices_p - chunk_offsets_p = attn_metadata.chunk_offsets_p query_start_loc_p = attn_metadata.query_start_loc_p + cu_chunk_seqlen_p = attn_metadata.cu_chunk_seqlen_p + last_chunk_indices_p = attn_metadata.last_chunk_indices_p # 1. Gated MLP's linear projection projected_states, _ = self.in_proj(hidden_states) @@ -634,9 +634,9 @@ def forward_cuda( z=None, dt_bias=self.dt_bias, seq_idx=seq_idx_p, - chunk_indices=chunk_indices_p, - chunk_offsets=chunk_offsets_p, cu_seqlens=query_start_loc_p, + cu_chunk_seqlens=cu_chunk_seqlen_p, + last_chunk_indices=last_chunk_indices_p, initial_states=initial_states, dt_softplus=True, dt_limit=(0.0, float("inf")), diff --git a/vllm/model_executor/layers/mamba/ops/ssd_bmm.py b/vllm/model_executor/layers/mamba/ops/ssd_bmm.py index 601b71ab2a51..15a72fc61261 100644 --- a/vllm/model_executor/layers/mamba/ops/ssd_bmm.py +++ b/vllm/model_executor/layers/mamba/ops/ssd_bmm.py @@ -6,8 +6,6 @@ # ruff: noqa: E501,SIM102 -import math - import torch from vllm.triton_utils import tl, triton @@ -96,7 +94,7 @@ def _bmm_chunk_fwd_kernel( a_ptr, b_ptr, out_ptr, - seq_idx_ptr, + cu_chunk_seqlens_ptr, # Matrix dimensions seqlen, chunk_size: tl.constexpr, @@ -112,7 +110,6 @@ def _bmm_chunk_fwd_kernel( stride_out_head: tl.int64, stride_outm: tl.int64, stride_outn: tl.constexpr, - stride_seq_idx_seqlen: tl.constexpr, # Meta-parameters IS_CAUSAL: tl.constexpr, dot_dtype: tl.constexpr, @@ -129,10 +126,12 @@ def _bmm_chunk_fwd_kernel( if IS_CAUSAL: if pid_n * BLOCK_SIZE_N >= (pid_m + 1) * BLOCK_SIZE_M: return - a_ptr += pid_c * chunk_size * stride_a_seqlen + pid_h * stride_a_head - b_ptr += pid_c * chunk_size * stride_b_seqlen + pid_h * stride_b_head - seq_idx_ptr += pid_c * chunk_size * stride_seq_idx_seqlen + chunk_seqlen_start = tl.load(cu_chunk_seqlens_ptr + pid_c) + chunk_seqlen_end = tl.load(cu_chunk_seqlens_ptr + pid_c + 1) + + a_ptr += chunk_seqlen_start * stride_a_seqlen + pid_h * stride_a_head + b_ptr += chunk_seqlen_start * stride_b_seqlen + pid_h * stride_b_head offs_m = pid_m * BLOCK_SIZE_M + tl.arange(0, BLOCK_SIZE_M) offs_n = pid_n * BLOCK_SIZE_N + tl.arange(0, BLOCK_SIZE_N) @@ -141,7 +140,7 @@ def _bmm_chunk_fwd_kernel( offs_k[None, :] * stride_ak) b_ptrs = b_ptr + (offs_k[:, None] * stride_bk + offs_n[None, :] * stride_b_seqlen) - chunk_size_limit = min(chunk_size, seqlen - pid_c * chunk_size) + chunk_size_limit = chunk_seqlen_end - chunk_seqlen_start acc = tl.zeros((BLOCK_SIZE_M, BLOCK_SIZE_N), dtype=tl.float32) @@ -162,16 +161,6 @@ def _bmm_chunk_fwd_kernel( offs_m = pid_m * BLOCK_SIZE_M + tl.arange(0, BLOCK_SIZE_M) offs_n = pid_n * BLOCK_SIZE_N + tl.arange(0, BLOCK_SIZE_N) - # Zero out the results that are not from the same request - # in the varlen batch - seq_idx_m = tl.load(seq_idx_ptr + offs_m * stride_seq_idx_seqlen, - mask=offs_m < chunk_size_limit, - other=-1) - seq_idx_n = tl.load(seq_idx_ptr + offs_n * stride_seq_idx_seqlen, - mask=offs_n < chunk_size_limit, - other=-2) - acc = tl.where(seq_idx_m[:, None] == seq_idx_n[None, :], acc, 0.0) - out = acc.to(out_ptr.dtype.element_ty) out_ptr += pid_c * stride_out_chunk + pid_h * stride_out_head out_ptrs = out_ptr + (stride_outm * offs_m[:, None] + @@ -182,12 +171,18 @@ def _bmm_chunk_fwd_kernel( (offs_n[None, :] < chunk_size)) -def _bmm_chunk_fwd(a, b, chunk_size, seq_idx, causal=False, output_dtype=None): +def _bmm_chunk_fwd(a, + b, + chunk_size, + cu_chunk_seqlens, + causal=False, + output_dtype=None): """ Argument: a: (seqlen, ngroups, k) b: (seqlen, ngroups, k) - seq_idx: (seqlen,). out[i, j] for seq_idx[i] != seq_idx[j] will be zeroed out. + chunk_size: int + cu_chunk_seq_lens: (nchunks+1,) causal: if True, then out[i, j] for i > j will be arbitrary, only out[i, j] for i <= j are guaranteed to be correct. Return: @@ -195,14 +190,12 @@ def _bmm_chunk_fwd(a, b, chunk_size, seq_idx, causal=False, output_dtype=None): """ seqlen, ngroups, k = a.shape assert b.shape == a.shape - assert seq_idx is not None - assert seq_idx.shape == (seqlen, ) if a.stride(-1) != 1 and a.stride(0) != 1: a = a.contiguous() if b.stride(-1) != 1 and b.stride(0) != 1: b = b.contiguous() - nchunks = math.ceil(seqlen / chunk_size) + nchunks = len(cu_chunk_seqlens) - 1 # Allocates output. out_dtype = a.dtype if output_dtype is None else output_dtype out = torch.empty((nchunks, ngroups, chunk_size, chunk_size), @@ -220,7 +213,7 @@ def _bmm_chunk_fwd(a, b, chunk_size, seq_idx, causal=False, output_dtype=None): a_ptr=a, b_ptr=b, out_ptr=out, - seq_idx_ptr=seq_idx, + cu_chunk_seqlens_ptr=cu_chunk_seqlens, seqlen=seqlen, chunk_size=chunk_size, K=k, @@ -235,7 +228,6 @@ def _bmm_chunk_fwd(a, b, chunk_size, seq_idx, causal=False, output_dtype=None): stride_out_head=out.stride(1), stride_outm=out.stride(-2), stride_outn=out.stride(-1), - stride_seq_idx_seqlen=seq_idx.stride(0), IS_CAUSAL=causal, dot_dtype=dot_dtype, ) diff --git a/vllm/model_executor/layers/mamba/ops/ssd_chunk_scan.py b/vllm/model_executor/layers/mamba/ops/ssd_chunk_scan.py index add72617fcea..e1e77e14f69d 100644 --- a/vllm/model_executor/layers/mamba/ops/ssd_chunk_scan.py +++ b/vllm/model_executor/layers/mamba/ops/ssd_chunk_scan.py @@ -120,9 +120,7 @@ def _chunk_scan_fwd_kernel( states_ptr, D_ptr, initstates_ptr, - chunk_indices_ptr, - chunk_offsets_ptr, - chunk_meta_num, + cu_chunk_seqlens_ptr, # Matrix dimensions chunk_size: tl.constexpr, hdim: tl.constexpr, @@ -149,7 +147,7 @@ def _chunk_scan_fwd_kernel( stride_dA_cs_chunk: tl.int64, stride_dA_cs_head: tl.int64, stride_dA_cs_csize: tl.constexpr, - stride_seq_idx_seqlen: tl.constexpr, + stride_seq_idx_chunk: tl.constexpr, stride_C_seqlen: tl.int64, stride_C_head: tl.int64, stride_C_dstate: tl.constexpr, @@ -175,170 +173,107 @@ def _chunk_scan_fwd_kernel( HAS_INITSTATES: tl.constexpr, ): pid_c = tl.program_id(axis=1).to(tl.int64) - if not HAS_INITSTATES: - c_idx = pid_c - c_off = 0 - else: - c_idx = tl.load(chunk_indices_ptr + pid_c, mask=pid_c > -1, other=0) - c_off = tl.load(chunk_offsets_ptr + pid_c, mask=pid_c > -1, other=0) - pid_h = tl.program_id(axis=2) num_pid_n = tl.cdiv(hdim, BLOCK_SIZE_N) pid_m = tl.program_id(axis=0) // num_pid_n pid_n = tl.program_id(axis=0) % num_pid_n - cb_ptr += c_idx * stride_cb_chunk + (pid_h // + cb_ptr += pid_c * stride_cb_chunk + (pid_h // nheads_ngroups_ratio) * stride_cb_head - x_ptr += c_idx * chunk_size * stride_x_seqlen + pid_h * stride_x_head - dt_ptr += c_idx * stride_dt_chunk + pid_h * stride_dt_head - dA_cumsum_ptr += c_idx * stride_dA_cs_chunk + pid_h * stride_dA_cs_head - C_ptr += c_idx * chunk_size * stride_C_seqlen + ( + chunk_seqlen_start = tl.load(cu_chunk_seqlens_ptr + pid_c) + chunk_seqlen_end = tl.load(cu_chunk_seqlens_ptr + pid_c + 1) + x_ptr += chunk_seqlen_start * stride_x_seqlen + pid_h * stride_x_head + dt_ptr += pid_c * stride_dt_chunk + pid_h * stride_dt_head + dA_cumsum_ptr += pid_c * stride_dA_cs_chunk + pid_h * stride_dA_cs_head + C_ptr += chunk_seqlen_start * stride_C_seqlen + ( pid_h // nheads_ngroups_ratio) * stride_C_head # M-block offsets and prev states # - logic in next block may override these if there is an active offset - offs_m = pid_m * BLOCK_SIZE_M + c_off + tl.arange(0, BLOCK_SIZE_M) - prev_states_ptr = states_ptr + c_idx * stride_states_chunk + pid_h * stride_states_head - prev_states_hdim = stride_states_hdim - prev_states_dstate = stride_states_dstate - - chunk_size_limit = min(chunk_size, seqlen - c_idx * chunk_size) - - seq_idx_ptr += c_idx * chunk_size * stride_seq_idx_seqlen - # - we only need seq_idx_prev to be aligned to chunk boundary - seq_idx_prev = tl.load(seq_idx_ptr - stride_seq_idx_seqlen, - mask=c_idx >= 1, - other=0) - - if HAS_INITSTATES: - # if there are init states, we only need seq_idx_m to point - # what is the current seq_idx - - # get current seq idx - if (pid_m * BLOCK_SIZE_M + c_off) < chunk_size_limit: - seq_idx_m = tl.load( - seq_idx_ptr + - (pid_m * BLOCK_SIZE_M + c_off) * stride_seq_idx_seqlen, ) - - # - recall that in ssd_state_passing, for the case c_off == 0 - # i.e., the very first sequence, we made states_ptr hold its initial state - # so this edge case is taken care of - if ((c_off == 0) and (seq_idx_prev != seq_idx_m - ) # if a seq is changed exactly on boundary - or (c_off > 0) # implies a new example (pseudo chunk) - ): + offs_m = pid_m * BLOCK_SIZE_M + tl.arange(0, BLOCK_SIZE_M) + + seq_idx_ptr += pid_c * stride_seq_idx_chunk + seq_idx = tl.load(seq_idx_ptr) + seq_idx_prev = tl.load(seq_idx_ptr - stride_seq_idx_chunk, + mask=pid_c >= 1, + other=-1) + + if HAS_INITSTATES and (seq_idx != seq_idx_prev): + prev_states_ptr = initstates_ptr + seq_idx * stride_init_states_batch + pid_h * stride_init_states_head + prev_states_hdim = stride_init_states_hdim + prev_states_dstate = stride_init_states_dstate + else: + prev_states_ptr = states_ptr + ( + pid_c - 1) * stride_states_chunk + pid_h * stride_states_head + prev_states_hdim = stride_states_hdim + prev_states_dstate = stride_states_dstate - # - replace prev_states_ptr with init_states - prev_states_ptr = initstates_ptr + seq_idx_m * stride_init_states_batch + pid_h * stride_init_states_head - prev_states_hdim = stride_init_states_hdim # override strides - prev_states_dstate = stride_init_states_dstate + chunk_size_limit = chunk_seqlen_end - chunk_seqlen_start offs_n = pid_n * BLOCK_SIZE_N + tl.arange(0, BLOCK_SIZE_N) dA_cs_m = tl.load(dA_cumsum_ptr + offs_m * stride_dA_cs_csize, mask=offs_m < chunk_size, other=0.0).to(tl.float32) - # - handle chunk state limit - if HAS_INITSTATES: - # have to split this if otherwise compilation will have problems - dA_cs_m_boundary = 0.0 - - # get the c_idx for the next (logica) chunk - c_idx_n = tl.load( - chunk_indices_ptr + (pid_c + 1), - mask=pid_c > -1 and (pid_c + 1) < chunk_meta_num, - other=-1 # to trigger different chunk - ) - - # - there are things to consider - # A. if c_off > 0 then we need to move the dA_cs boundary to ensure correct - # contribution of past states - # B. if c_off_n < chunk_size_limit, then we need to adjust this so as not to - # encroach into the next sequence, where c_off_n is the offset of the next - # (logical) chunk. - # An equivalent check for B is c_idx == c_idx_n, where there is repetition in - # (logical) chunk indices. - - if (c_idx == c_idx_n) or c_off > 0: - - # get the next offset - c_off_n = tl.load(chunk_offsets_ptr + (pid_c + 1), - mask=pid_c > -1 and (pid_c + 1) < chunk_meta_num, - other=chunk_size) - - # in this case, adjust down the chunk_size_limit - if c_idx == c_idx_n: - chunk_size_limit = min(c_off_n, chunk_size_limit) - - # get the cs at the offset boundary - # - c_off == 0 is a passthrough - # - We need dA_cs at the boundary, defined by c_off - no need - # to increase pointer by pid_m (it is a constant offset, - # i.e. the same for all blocks) - dA_cs_m_boundary = tl.load( - dA_cumsum_ptr + (c_off - 1) * stride_dA_cs_csize, - mask=(((c_off - 1) > -1) and ((c_off) < chunk_size)), - other=0.0).to(tl.float32) - else: - # - handle seq idx when HAS_INITSTATES==False - seq_idx_m = tl.load(seq_idx_ptr + offs_m * stride_seq_idx_seqlen, - mask=offs_m < chunk_size_limit, - other=-1) - acc = tl.zeros((BLOCK_SIZE_M, BLOCK_SIZE_N), dtype=tl.float32) - # Without the if (pid_c > -1), with Triton 2.1.0, I get - # Assertion `!(srcMmaLayout && dstMmaLayout) && "Unexpected mma -> mm a layout conversion"' failed. - # With Triton 2.2.0, this works - if IS_TRITON_22 or c_idx > -1: - # Faster to just do 1 iteration with larger BLOCK_SIZE_K, up to block size 128 - offs_k_dstate = tl.arange( - 0, BLOCK_SIZE_DSTATE if BLOCK_SIZE_DSTATE <= 128 else BLOCK_SIZE_K) - C_ptrs = C_ptr + (offs_m[:, None] * stride_C_seqlen + - offs_k_dstate[None, :] * stride_C_dstate) + offs_out_m = pid_m * BLOCK_SIZE_M + tl.arange(0, BLOCK_SIZE_M) + offs_out_n = pid_n * BLOCK_SIZE_N + tl.arange(0, BLOCK_SIZE_N) - prev_states_ptrs = prev_states_ptr + ( - offs_n[None, :] * prev_states_hdim + - offs_k_dstate[:, None] * prev_states_dstate) + # Faster to just do 1 iteration with larger BLOCK_SIZE_K, up to block size 128 + offs_k_dstate = tl.arange( + 0, BLOCK_SIZE_DSTATE if BLOCK_SIZE_DSTATE <= 128 else BLOCK_SIZE_K) + C_ptrs = C_ptr + (offs_m[:, None] * stride_C_seqlen + + offs_k_dstate[None, :] * stride_C_dstate) + + scale_m = tl.exp(dA_cs_m) + if BLOCK_SIZE_DSTATE <= 128: + C = tl.load(C_ptrs, + mask=(offs_m[:, None] < chunk_size_limit) & + (offs_k_dstate[None, :] < dstate), + other=0.0) - if not HAS_INITSTATES: - # - this is for continuous batching where there is no init states - scale_m = tl.where(seq_idx_m == seq_idx_prev, tl.exp(dA_cs_m), 0.0) + if not HAS_INITSTATES and (seq_idx != seq_idx_prev): + # if no init states AND starting a new sequence, we need zeros + prev_states = tl.zeros((BLOCK_SIZE_DSTATE, BLOCK_SIZE_N), + dtype=C_ptr.dtype.element_ty) else: - # - if there is initstates, we will rely on prev_states, no zeroing - # required. - scale_m = tl.exp(dA_cs_m - dA_cs_m_boundary) - - if BLOCK_SIZE_DSTATE <= 128: - C = tl.load(C_ptrs, - mask=(offs_m[:, None] < chunk_size_limit) & - (offs_k_dstate[None, :] < dstate), - other=0.0) - + # otherwise read the previous state + prev_states_ptrs = prev_states_ptr \ + + offs_n[None, :] * prev_states_hdim \ + + offs_k_dstate[:, None] * prev_states_dstate prev_states = tl.load(prev_states_ptrs, mask=(offs_k_dstate[:, None] < dstate) & (offs_n[None, :] < hdim), other=0.0) prev_states = prev_states.to(C_ptr.dtype.element_ty) - acc = tl.dot(C, prev_states) * scale_m[:, None] - else: - for k in range(0, dstate, BLOCK_SIZE_K): - C = tl.load(C_ptrs, - mask=(offs_m[:, None] < chunk_size_limit) & - (offs_k_dstate[None, :] < dstate - k), - other=0.0) - # C = (C * scale_m[:, None]).to(C_ptr.dtype.element_ty) + + acc = tl.dot(C, prev_states) * scale_m[:, None] + + else: + prev_states_ptrs = prev_states_ptr \ + + offs_n[None, :] * prev_states_hdim \ + + offs_k_dstate[:, None] * prev_states_dstate + for k in range(0, dstate, BLOCK_SIZE_K): + C = tl.load(C_ptrs, + mask=(offs_m[:, None] < chunk_size_limit) & + (offs_k_dstate[None, :] < dstate - k), + other=0.0) + if not HAS_INITSTATES and (seq_idx != seq_idx_prev): + prev_states = tl.zeros((BLOCK_SIZE_DSTATE, BLOCK_SIZE_K), + dtype=C_ptr.dtype.element_ty) + else: prev_states = tl.load( prev_states_ptrs, mask=(offs_k_dstate[:, None] < dstate - k) & (offs_n[None, :] < hdim), other=0.0) prev_states = prev_states.to(C_ptr.dtype.element_ty) - acc += tl.dot(C, prev_states) - C_ptrs += BLOCK_SIZE_K - prev_states_ptrs += BLOCK_SIZE_K - acc *= scale_m[:, None] + acc += tl.dot(C, prev_states) + C_ptrs += BLOCK_SIZE_K + prev_states_ptrs += BLOCK_SIZE_K + acc *= scale_m[:, None] - offs_k = tl.arange(0, BLOCK_SIZE_K) + c_off + offs_k = tl.arange(0, BLOCK_SIZE_K) cb_ptrs = cb_ptr + (offs_m[:, None] * stride_cb_csize_m + offs_k[None, :] * stride_cb_csize_k) x_ptrs = x_ptr + (offs_k[:, None] * stride_x_seqlen + @@ -375,7 +310,7 @@ def _chunk_scan_fwd_kernel( dt_ptrs += BLOCK_SIZE_K * stride_dt_csize dA_cumsum_ptrs += BLOCK_SIZE_K * stride_dA_cs_csize - offs_out_m = pid_m * BLOCK_SIZE_M + c_off + tl.arange(0, BLOCK_SIZE_M) + offs_out_m = pid_m * BLOCK_SIZE_M + tl.arange(0, BLOCK_SIZE_M) offs_out_n = pid_n * BLOCK_SIZE_N + tl.arange(0, BLOCK_SIZE_N) if HAS_D: @@ -393,7 +328,7 @@ def _chunk_scan_fwd_kernel( acc += x_residual * D if HAS_Z: - z_ptr += c_idx * chunk_size * stride_z_seqlen + pid_h * stride_z_head + z_ptr += chunk_seqlen_start * stride_z_seqlen + pid_h * stride_z_head z_ptrs = z_ptr + (stride_z_seqlen * offs_out_m[:, None] + stride_z_hdim * offs_out_n[None, :]) z = tl.load(z_ptrs, @@ -402,7 +337,7 @@ def _chunk_scan_fwd_kernel( other=0.0).to(tl.float32) acc *= z * tl.sigmoid(z) - out_ptr += c_idx * chunk_size * stride_out_seqlen + pid_h * stride_out_head + out_ptr += chunk_seqlen_start * stride_out_seqlen + pid_h * stride_out_head out_ptrs = out_ptr + (stride_out_seqlen * offs_out_m[:, None] + offs_out_n[None, :] * stride_out_hdim) tl.store(out_ptrs, @@ -418,12 +353,11 @@ def _chunk_scan_fwd( dA_cumsum, C, states, + cu_chunk_seqlens, out, seq_idx, D=None, z=None, - chunk_indices=None, - chunk_offsets=None, initial_states=None, ): assert seq_idx is not None, "this implementation requires seq_idx" @@ -441,20 +375,10 @@ def _chunk_scan_fwd( assert dt.shape == (nheads, nchunks, chunk_size) assert dA_cumsum.shape == (nheads, nchunks, chunk_size) assert states.shape == (nchunks, nheads, headdim, dstate) - assert seq_idx.shape == (seqlen, ) - - if initial_states is not None: - # with initial states, we need to take care of how - # seq_idx crosses the boundaries - assert chunk_indices is not None and chunk_offsets is not None, \ - "chunk_indices and chunk_offsets should have been set" - else: - chunk_indices, chunk_offsets = None, None + assert seq_idx.shape == (nchunks, ) - grid = lambda META: ( - triton.cdiv(chunk_size, META['BLOCK_SIZE_M']) * triton.cdiv( - headdim, META['BLOCK_SIZE_N']), nchunks - if chunk_offsets is None else len(chunk_offsets), nheads) + grid = lambda META: (triton.cdiv(chunk_size, META['BLOCK_SIZE_M']) * triton + .cdiv(headdim, META['BLOCK_SIZE_N']), nchunks, nheads) z_strides = ((z.stride(0), z.stride(1), z.stride(2)) if z is not None else (0, 0, 0)) @@ -476,9 +400,7 @@ def _chunk_scan_fwd( states_ptr=states, D_ptr=D, initstates_ptr=initial_states, - chunk_indices_ptr=chunk_indices, - chunk_offsets_ptr=chunk_offsets, - chunk_meta_num=len(chunk_indices) if chunk_indices is not None else 0, + cu_chunk_seqlens_ptr=cu_chunk_seqlens, chunk_size=chunk_size, hdim=headdim, dstate=dstate, @@ -503,7 +425,7 @@ def _chunk_scan_fwd( stride_dA_cs_chunk=dA_cumsum.stride(1), stride_dA_cs_head=dA_cumsum.stride(0), stride_dA_cs_csize=dA_cumsum.stride(2), - stride_seq_idx_seqlen=seq_idx.stride(0), + stride_seq_idx_chunk=seq_idx.stride(0), stride_C_seqlen=C.stride(0), stride_C_head=C.stride(1), stride_C_dstate=C.stride(2), diff --git a/vllm/model_executor/layers/mamba/ops/ssd_chunk_state.py b/vllm/model_executor/layers/mamba/ops/ssd_chunk_state.py index 8ee41f2cbc1b..3a3e0f293459 100644 --- a/vllm/model_executor/layers/mamba/ops/ssd_chunk_state.py +++ b/vllm/model_executor/layers/mamba/ops/ssd_chunk_state.py @@ -6,8 +6,6 @@ # ruff: noqa: E501 -import math - import torch from vllm.triton_utils import tl, triton @@ -34,6 +32,7 @@ def _chunk_cumsum_fwd_kernel( dt_bias_ptr, dt_out_ptr, dA_cumsum_ptr, + cu_chunk_seqlens_ptr, # Matrix dimension seqlen, nheads: tl.constexpr, @@ -61,7 +60,11 @@ def _chunk_cumsum_fwd_kernel( # https://github.com/triton-lang/triton/issues/1058 pid_c = tl.program_id(axis=0).to(tl.int64) pid_h = tl.program_id(axis=1) - dt_ptr += pid_c * chunk_size * stride_dt_seqlen + + chunk_seqlen_start = tl.load(cu_chunk_seqlens_ptr + pid_c) + chunk_seqlen_end = tl.load(cu_chunk_seqlens_ptr + pid_c + 1) + + dt_ptr += chunk_seqlen_start * stride_dt_seqlen dt_out_ptr += pid_c * stride_dt_out_chunk dA_cumsum_ptr += pid_c * stride_dA_cs_chunk @@ -74,7 +77,7 @@ def _chunk_cumsum_fwd_kernel( offs_c[None, :] * stride_dt_out_csize) dA_cs_ptrs = dA_cumsum_ptr + (offs_h[:, None] * stride_dA_cs_head + offs_c[None, :] * stride_dA_cs_csize) - chunk_size_limit = min(chunk_size, seqlen - pid_c * chunk_size) + chunk_size_limit = chunk_seqlen_end - chunk_seqlen_start dt = tl.load(dt_ptrs, mask=(offs_h[:, None] < nheads) & @@ -188,7 +191,7 @@ def _chunk_state_fwd_kernel( states_ptr, dt_ptr, dA_cumsum_ptr, - seq_idx_ptr, + cu_chunk_seqlens_ptr, # Matrix dimensions hdim: tl.constexpr, dstate: tl.constexpr, @@ -212,7 +215,6 @@ def _chunk_state_fwd_kernel( stride_dA_cs_head: tl.int64, stride_dA_cs_chunk: tl.int64, stride_dA_cs_csize: tl.constexpr, - stride_seq_idx_seqlen: tl.constexpr, # Meta-parameters BLOCK_SIZE_M: tl.constexpr, BLOCK_SIZE_N: tl.constexpr, @@ -223,14 +225,14 @@ def _chunk_state_fwd_kernel( num_pid_n = tl.cdiv(dstate, BLOCK_SIZE_N) pid_m = tl.program_id(axis=0) // num_pid_n pid_n = tl.program_id(axis=0) % num_pid_n - b_ptr += pid_c * chunk_size * stride_b_seqlen + ( + chunk_seqlen_start = tl.load(cu_chunk_seqlens_ptr + pid_c) + chunk_seqlen_end = tl.load(cu_chunk_seqlens_ptr + pid_c + 1) + b_ptr += chunk_seqlen_start * stride_b_seqlen + ( pid_h // nheads_ngroups_ratio) * stride_b_head - x_ptr += pid_c * chunk_size * stride_x_seqlen + pid_h * stride_x_head + x_ptr += chunk_seqlen_start * stride_x_seqlen + pid_h * stride_x_head dt_ptr += pid_c * stride_dt_chunk + pid_h * stride_dt_head dA_cumsum_ptr += pid_c * stride_dA_cs_chunk + pid_h * stride_dA_cs_head - seq_idx_ptr += pid_c * chunk_size * stride_seq_idx_seqlen - offs_m = pid_m * BLOCK_SIZE_M + tl.arange(0, BLOCK_SIZE_M) offs_n = pid_n * BLOCK_SIZE_N + tl.arange(0, BLOCK_SIZE_N) offs_k = tl.arange(0, BLOCK_SIZE_K) @@ -243,10 +245,7 @@ def _chunk_state_fwd_kernel( (chunk_size - 1) * stride_dA_cs_csize).to(tl.float32) dA_cumsum_ptrs = dA_cumsum_ptr + offs_k * stride_dA_cs_csize - seq_idx_ptrs = seq_idx_ptr + offs_k * stride_seq_idx_seqlen - chunk_size_limit = min(chunk_size, seqlen - pid_c * chunk_size) - seq_idx_last = tl.load(seq_idx_ptr + - (chunk_size_limit - 1) * stride_seq_idx_seqlen) + chunk_size_limit = chunk_seqlen_end - chunk_seqlen_start acc = tl.zeros((BLOCK_SIZE_M, BLOCK_SIZE_N), dtype=tl.float32) for k in range(0, chunk_size_limit, BLOCK_SIZE_K): @@ -261,15 +260,9 @@ def _chunk_state_fwd_kernel( dA_cs_k = tl.load(dA_cumsum_ptrs, mask=offs_k < chunk_size_limit - k, other=0.0).to(tl.float32) - - seq_idx_k = tl.load(seq_idx_ptrs, - mask=offs_k < chunk_size_limit - k, - other=-1) dt_k = tl.load(dt_ptrs, mask=offs_k < chunk_size_limit - k, other=0.0).to(tl.float32) - - scale = tl.where(seq_idx_k == seq_idx_last, - tl.exp(dA_cs_last - dA_cs_k) * dt_k, 0.0) + scale = tl.exp(dA_cs_last - dA_cs_k) * dt_k b *= scale[:, None] b = b.to(x_ptr.dtype.element_ty) acc += tl.dot(x, b) @@ -278,7 +271,6 @@ def _chunk_state_fwd_kernel( b_ptrs += BLOCK_SIZE_K * stride_b_seqlen dt_ptrs += BLOCK_SIZE_K * stride_dt_csize dA_cumsum_ptrs += BLOCK_SIZE_K * stride_dA_cs_csize - seq_idx_ptrs += BLOCK_SIZE_K * stride_seq_idx_seqlen states = acc.to(states_ptr.dtype.element_ty) @@ -534,6 +526,7 @@ def _chunk_state_varlen_kernel( def _chunk_cumsum_fwd(dt, A, chunk_size, + cu_chunk_seqlens, dt_bias=None, dt_softplus=False, dt_limit=(0.0, float("inf"))): @@ -541,7 +534,7 @@ def _chunk_cumsum_fwd(dt, assert A.shape == (nheads, ) if dt_bias is not None: assert dt_bias.shape == (nheads, ) - nchunks = math.ceil(seqlen / chunk_size) + nchunks = cu_chunk_seqlens.shape[0] - 1 dt_out = torch.empty(nheads, nchunks, chunk_size, @@ -561,6 +554,7 @@ def _chunk_cumsum_fwd(dt, dt_bias_ptr=dt_bias, dt_out_ptr=dt_out, dA_cumsum_ptr=dA_cumsum, + cu_chunk_seqlens_ptr=cu_chunk_seqlens, seqlen=seqlen, nheads=nheads, chunk_size=chunk_size, @@ -588,7 +582,7 @@ def _chunk_state_fwd(B, x, dt, dA_cumsum, - seq_idx=None, + cu_chunk_seqlens, states=None, states_in_fp32=True): seqlen, nheads, headdim = x.shape @@ -599,9 +593,6 @@ def _chunk_state_fwd(B, assert dt.shape == (nheads, nchunks, chunk_size) assert dA_cumsum.shape == dt.shape - assert seq_idx is not None - assert seq_idx.shape == (seqlen, ) - if states is not None: assert states.shape == (nchunks, nheads, headdim, dstate) else: @@ -619,7 +610,7 @@ def _chunk_state_fwd(B, states_ptr=states, dt_ptr=dt, dA_cumsum_ptr=dA_cumsum, - seq_idx_ptr=seq_idx, + cu_chunk_seqlens_ptr=cu_chunk_seqlens, hdim=headdim, dstate=dstate, chunk_size=chunk_size, @@ -641,7 +632,6 @@ def _chunk_state_fwd(B, stride_dA_cs_head=dA_cumsum.stride(0), stride_dA_cs_chunk=dA_cumsum.stride(1), stride_dA_cs_csize=dA_cumsum.stride(2), - stride_seq_idx_seqlen=seq_idx.stride(0), ) return states diff --git a/vllm/model_executor/layers/mamba/ops/ssd_combined.py b/vllm/model_executor/layers/mamba/ops/ssd_combined.py index 37d6c2870812..f3eb61d5840e 100644 --- a/vllm/model_executor/layers/mamba/ops/ssd_combined.py +++ b/vllm/model_executor/layers/mamba/ops/ssd_combined.py @@ -14,8 +14,7 @@ from .ssd_bmm import _bmm_chunk_fwd from .ssd_chunk_scan import _chunk_scan_fwd -from .ssd_chunk_state import (_chunk_cumsum_fwd, _chunk_state_fwd, - chunk_state_varlen) +from .ssd_chunk_state import _chunk_cumsum_fwd, _chunk_state_fwd from .ssd_state_passing import _state_passing_fwd TRITON_22 = version.parse(triton.__version__) >= version.parse('2.2.0') @@ -37,9 +36,9 @@ def _mamba_chunk_scan_combined_fwd(x, dt_bias=None, initial_states=None, seq_idx=None, - chunk_indices=None, - chunk_offsets=None, cu_seqlens=None, + cu_chunk_seqlens=None, + last_chunk_indices=None, dt_softplus=False, dt_limit=(0.0, float("inf")), state_dtype=None): @@ -56,7 +55,7 @@ def _mamba_chunk_scan_combined_fwd(x, if D is not None: assert D.shape == (nheads, headdim) or D.shape == (nheads, ) if seq_idx is not None: - assert seq_idx.shape == (seqlen, ) + assert seq_idx.shape == (cu_chunk_seqlens.shape[0] - 1, ) if B.stride(-1) != 1: B = B.contiguous() if C.stride(-1) != 1: @@ -89,6 +88,7 @@ def _mamba_chunk_scan_combined_fwd(x, dA_cumsum, dt = _chunk_cumsum_fwd(dt, A, chunk_size, + cu_chunk_seqlens, dt_bias=dt_bias, dt_softplus=dt_softplus, dt_limit=dt_limit) @@ -99,36 +99,31 @@ def _mamba_chunk_scan_combined_fwd(x, x, dt, dA_cumsum, - seq_idx=seq_idx, + cu_chunk_seqlens, states_in_fp32=True) # 3. Compute the inter-chunk SSM recurrence; produces correct SSM states at chunk boundaries # (middle term of factorization of off-diag blocks; A terms) - # - for handling chunked prefill, this requires i) initial_states - # ii) seq_idx iii) is_cont_batched and (iv) chunk_offsets to be all specified. + # - for handling chunked prefill, this requires i) initial_states and + # ii) seq_idx to be all specified. # - When a new seq_idx is detected, we will stop passing the prev_state # and switch accordingly to the init_state corresponding to the new seq_idx. - # - We will also make sure that the dA_cumsum is taken only from the start of the - # sequence (hence we need the full dA_cumsum tensor and not just the values at chunk boundaries) - # - this will ensure that states will be updated with the rightmost flushed seq_idx - # of the previous chunk. This implies that the first chunk of states is either 0 - # or equal to init_states of the first example. states = _state_passing_fwd( rearrange(states, "... p n -> ... (p n)"), dA_cumsum, # (nheads, nchunks, chunk_size) + cu_chunk_seqlens, initial_states=rearrange(initial_states, "... p n -> ... (p n)") if initial_states is not None else None, # (batch, nheads, headdim*dstate) seq_idx=seq_idx, - out_dtype=state_dtype if state_dtype is not None else C.dtype, - chunk_offsets=chunk_offsets) + out_dtype=state_dtype if state_dtype is not None else C.dtype) states = rearrange(states, "... (p n) -> ... p n", n=dstate) # 4. Compute batched matrix multiply for C_j^T B_i terms CB = _bmm_chunk_fwd(C, B, chunk_size, - seq_idx=seq_idx, + cu_chunk_seqlens, output_dtype=torch.float32) # 5. Scan and compute the diagonal blocks, taking into @@ -148,26 +143,15 @@ def _mamba_chunk_scan_combined_fwd(x, dA_cumsum, C, states, + cu_chunk_seqlens, out, # in-place update seq_idx, D=D, z=z, - chunk_indices=chunk_indices, - chunk_offsets=chunk_offsets, initial_states=initial_states, ) - varlen_states = chunk_state_varlen( - B, - x, - dt, - dA_cumsum, - cu_seqlens, - states, - initial_states=initial_states, - ) - - return varlen_states + return states[last_chunk_indices] def mamba_chunk_scan_combined_varlen( @@ -178,14 +162,14 @@ def mamba_chunk_scan_combined_varlen( C, chunk_size, cu_seqlens, + cu_chunk_seqlens, + last_chunk_indices, seq_idx, out, D=None, z=None, dt_bias=None, initial_states=None, - chunk_indices=None, - chunk_offsets=None, dt_softplus=False, dt_limit=(0.0, float("inf")), state_dtype=None, @@ -198,8 +182,10 @@ def mamba_chunk_scan_combined_varlen( B: (seqlen, ngroups, dstate) C: (seqlen, ngroups, dstate) chunk_size: int - seq_idx: (seqlen) - cu_seqlens: (batch + 1) + cu_seqlens: (batch + 1,) + cu_chunk_seqlens: (nchunks + 1,) + last_chunk_indices: (batch,) + seq_idx: (nchunks,) out: (seqlen, nheads, headdim) preallocated output tensor D: (nheads, headdim) or (nheads,) z: (seqlen, nheads, headdim) @@ -228,9 +214,9 @@ def mamba_chunk_scan_combined_varlen( dt_bias=dt_bias, initial_states=initial_states, seq_idx=seq_idx, - chunk_indices=chunk_indices, - chunk_offsets=chunk_offsets, cu_seqlens=cu_seqlens, + cu_chunk_seqlens=cu_chunk_seqlens, + last_chunk_indices=last_chunk_indices, dt_softplus=dt_softplus, dt_limit=dt_limit, state_dtype=state_dtype) diff --git a/vllm/model_executor/layers/mamba/ops/ssd_state_passing.py b/vllm/model_executor/layers/mamba/ops/ssd_state_passing.py index 71a8a4b0a1c8..f09af262cfc2 100644 --- a/vllm/model_executor/layers/mamba/ops/ssd_state_passing.py +++ b/vllm/model_executor/layers/mamba/ops/ssd_state_passing.py @@ -30,8 +30,7 @@ def _state_passing_fwd_kernel( dA_cs_ptr, initstates_ptr, seq_idx_ptr, - chunk_offsets_ptr, - chunk_meta_num, + cu_chunk_seqlens_ptr, # Matrix dimensions dim: tl.constexpr, nchunks, @@ -50,94 +49,52 @@ def _state_passing_fwd_kernel( stride_initstates_batch: tl.int64, stride_initstates_head: tl.int64, stride_initstates_dim: tl.constexpr, - stride_seq_idx_seqlen: tl.constexpr, + stride_seq_idx_chunk: tl.constexpr, # Meta-parameters HAS_INITSTATES: tl.constexpr, BLOCK_SIZE: tl.constexpr, ): pid_h = tl.program_id(axis=1) pid_m = tl.program_id(axis=0) + states_ptr += pid_h * stride_states_head dA_cs_ptr += pid_h * stride_dA_cs_head + (chunk_size - 1) * stride_dA_cs_csize out_ptr += pid_h * stride_out_head - if HAS_INITSTATES: - initstates_ptr += pid_h * stride_initstates_head offs_m = pid_m * BLOCK_SIZE + tl.arange(0, BLOCK_SIZE) states_ptrs = states_ptr + offs_m * stride_states_dim out_ptrs = out_ptr + offs_m * stride_out_dim - # - states will be the past state of the sequence that continues on the current check - if not HAS_INITSTATES: - states = tl.zeros((BLOCK_SIZE, ), dtype=tl.float32) - else: - initstates_ptr += offs_m * stride_initstates_dim - initstates_ptrs = initstates_ptr - # - for cont batches, for the first chunk mean it will be the first batch's - # init state + if HAS_INITSTATES: + initstates_ptrs = initstates_ptr \ + + pid_h * stride_initstates_head \ + + offs_m * stride_initstates_dim + states = tl.load(initstates_ptrs, mask=offs_m < dim, other=0.0).to(tl.float32) + else: + states = tl.zeros((BLOCK_SIZE, ), dtype=tl.float32) - tl.store(out_ptrs, states, mask=offs_m < dim) - out_ptrs += stride_out_chunk - prev_seq_idx_chunk_end = 0 - logical_chunk_idx = 0 - for c in range(nchunks - 1): + prev_seq_idx = 0 + for c in range(nchunks): new_states = tl.load(states_ptrs, mask=offs_m < dim, other=0.0).to(tl.float32) dA_cs = tl.load(dA_cs_ptr).to(tl.float32) - scale_mask = True - # - the seq to pass forward is the one that is flushed to the right - # boundary. - # - that is given by seq_idx_chunk_end below: the sequence index at the end of the chunk. - seq_idx_chunk_end = tl.load(seq_idx_ptr + - (min((c + 1) * chunk_size, seqlen) - 1) * - stride_seq_idx_seqlen) - - if HAS_INITSTATES: - if prev_seq_idx_chunk_end != seq_idx_chunk_end: - # this means in the current chunk the rightmost flushed seq - # has changed. - # - so we do not propagate the state from previous chunk - # - but rather we load that sequence's init state - initstates_ptrs = initstates_ptr + seq_idx_chunk_end * stride_initstates_batch - - # - update state with seq_idx_new's init state + seq_idx = tl.load(seq_idx_ptr + c * stride_seq_idx_chunk) + # we have started a new sequence + if prev_seq_idx != seq_idx: + if HAS_INITSTATES: + initstates_ptrs = initstates_ptr + seq_idx * stride_initstates_batch \ + + pid_h * stride_initstates_head \ + + offs_m * stride_initstates_dim states = tl.load(initstates_ptrs, mask=offs_m < dim, other=0.0).to(tl.float32) + else: + states = tl.zeros((BLOCK_SIZE, ), dtype=tl.float32) - # - we need to consider the cumsum only of the last sequence in the chunk - # - find its starting position (given by c_off of the logical chunk index) - # - and subtract the cumsum just before that position from the total cumsum - # - first, update the logical chunk index (add the number of sequences in the current physical chunk): - # sequence index at the start of the current chunk - seq_idx_chunk_start = tl.load(seq_idx_ptr + - min(c * chunk_size, seqlen) * - stride_seq_idx_seqlen) - logical_chunk_idx += seq_idx_chunk_end - seq_idx_chunk_start - # - load the chunk offset: - c_off = tl.load(chunk_offsets_ptr + logical_chunk_idx, - mask=logical_chunk_idx < chunk_meta_num, - other=0) - # - if offset is 0, then the sequence starts at the beginning of the chunk, and we don't need to subtract anything - if c_off > 0: - # - dA_cs_ptr currently points to the cumsum at the end of the chunk - subtract the chunk size and add the offset - dA_cs_boundary = tl.load( - dA_cs_ptr - (chunk_size - 1) * stride_dA_cs_csize + - (c_off - 1) * stride_dA_cs_csize, - mask=(c_off - 1) > -1 and c_off < chunk_size, - other=0.0) - dA_cs -= dA_cs_boundary - - # - increment logical chunk index for every physical chunk - logical_chunk_idx += 1 - else: - scale_mask = seq_idx_chunk_end == prev_seq_idx_chunk_end - prev_seq_idx_chunk_end = seq_idx_chunk_end - - scale = tl.where(scale_mask, tl.exp(dA_cs), 0.0) - states = scale * states + new_states + prev_seq_idx = seq_idx + states = tl.exp(dA_cs) * states + new_states tl.store(out_ptrs, states, mask=offs_m < dim) states_ptrs += stride_states_chunk @@ -148,8 +105,8 @@ def _state_passing_fwd_kernel( def _state_passing_fwd( states, dA_cumsum, + cu_chunk_seqlens, seq_idx, - chunk_offsets, initial_states=None, out_dtype=None, ): @@ -175,9 +132,7 @@ def _state_passing_fwd( dA_cs_ptr=dA_cumsum, initstates_ptr=initial_states, seq_idx_ptr=seq_idx, - chunk_offsets_ptr=chunk_offsets, - chunk_meta_num=len(chunk_offsets) - if chunk_offsets is not None else 0, + cu_chunk_seqlens_ptr=cu_chunk_seqlens, dim=dim, nchunks=nchunks, seqlen=seqlen if seq_idx is not None else 0, @@ -194,7 +149,7 @@ def _state_passing_fwd( stride_initstates_batch=initial_states_strides[0], stride_initstates_head=initial_states_strides[1], stride_initstates_dim=initial_states_strides[2], - stride_seq_idx_seqlen=seq_idx.stride(0), + stride_seq_idx_chunk=seq_idx.stride(0), HAS_INITSTATES=initial_states is not None, ) return out diff --git a/vllm/model_executor/models/plamo2.py b/vllm/model_executor/models/plamo2.py index 03265b13de50..8234d40e94ab 100644 --- a/vllm/model_executor/models/plamo2.py +++ b/vllm/model_executor/models/plamo2.py @@ -260,9 +260,9 @@ def forward_cuda( prep_initial_states = attn_metadata.prep_initial_states chunk_size = attn_metadata.chunk_size seq_idx_p = attn_metadata.seq_idx_p - chunk_indices_p = attn_metadata.chunk_indices_p - chunk_offsets_p = attn_metadata.chunk_offsets_p query_start_loc_p = attn_metadata.query_start_loc_p + cu_chunk_seqlen_p = attn_metadata.cu_chunk_seqlen_p + last_chunk_indices_p = attn_metadata.last_chunk_indices_p # 1. Gated MLP's linear projection projected_states = self.in_proj(hidden_states) @@ -368,9 +368,9 @@ def forward_cuda( self.num_heads // self.tp_size, self.head_dim), dt_bias=self.dt_bias, seq_idx=seq_idx_p, - chunk_indices=chunk_indices_p, - chunk_offsets=chunk_offsets_p, cu_seqlens=query_start_loc_p, + cu_chunk_seqlens=cu_chunk_seqlen_p, + last_chunk_indices=last_chunk_indices_p, initial_states=initial_states, dt_softplus=True, dt_limit=(0.0, float("inf")), diff --git a/vllm/v1/attention/backends/mamba2_attn.py b/vllm/v1/attention/backends/mamba2_attn.py index 6f16fda962ae..e4f16f37a430 100644 --- a/vllm/v1/attention/backends/mamba2_attn.py +++ b/vllm/v1/attention/backends/mamba2_attn.py @@ -1,6 +1,5 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -import math from dataclasses import dataclass from typing import Optional @@ -8,6 +7,7 @@ from vllm.attention.backends.abstract import AttentionBackend from vllm.config import VllmConfig +from vllm.utils import cdiv from vllm.v1.attention.backends.mamba_attn import ( BaseMambaAttentionMetadataBuilder) from vllm.v1.attention.backends.utils import (PAD_SLOT_ID, @@ -17,91 +17,6 @@ from vllm.v1.kv_cache_interface import AttentionSpec -def _query_start_loc_to_chunk_indices_offsets( - query_start_loc: torch.Tensor, chunk_size: int, - total_seqlens: int) -> tuple[torch.Tensor, torch.Tensor]: - """ - Args: - query_start_loc (torch.Tensor): 1D tensor of cumulative sequence - lengths, shape (num_seqs + 1,). - The first element should be 0. Each entry represents the starting - index of a sequence in the flattened token array. - chunk_size (int): The size of each physical mamba chunk - (number of tokens per chunk). - total_seqlens (int): The total number of tokens in the batch. - - Returns: - Tuple[torch.Tensor, torch.Tensor]: A tuple containing: - - chunk_indices (torch.Tensor): 1D tensor of indices - indicating the physical chunk for each logical chunk. - - chunk_offsets (torch.Tensor): 1D tensor of offsets - indicating the starting index of each logical chunk within - its physical chunk. - - This function computes the chunk indices and offsets for the given - query_start_loc and chunk_size. Both are tensors of integers with length N, - where N is the number of logical (pseudo) chunks. - A logical chunk is a sequence of tokens that are all part of the same - sequence and are all in the same physical mamba chunk. - In other words, a logical chunk changes every time we cross a sequence - boundary or a physical mamba chunk boundary. - Logical chunks are needed to handle batched requests with initial states - (see _state_passing_fwd and _chunk_scan_fwd). - The chunk_indices tensor contains the index of the physical chunk for each - logical chunk. - The chunk_offsets tensor contains the offset (AKA starting index) of the - logical chunk in the physical chunk. - - Example: - query_start_loc = [0, 5, 10] - chunk_size = 8 - total_seqlens = 10 - -> chunk_indices = [0, 0, 1] - -> chunk_offsets = [0, 5, 0] - - In this example, we have 2 sequences, each with 5 tokens. The physical - chunk size is 8 tokens. - We have three logical chunks: - - the first logical chunk starts at token 0 in the first physical chunk - and contains all 5 tokens from the first sequence - - the second logical chunk starts at token 5 in the first physical chunk - and contains first 3 tokens from the second sequence - - the third logical chunk starts at token 0 in the second physical chunk - and contains the remaining 2 tokens from the second sequence - """ - - cu_seqlens = query_start_loc[1:] # remove prepended 0 - - # outputs will have length expansion of chunks that do not divide - # chunk_size - N = math.ceil(total_seqlens / chunk_size) + (cu_seqlens[:-1] % chunk_size - > 0).sum() - chunk_indices = torch.arange(N, - dtype=torch.int, - device=query_start_loc.device) - chunk_offsets = torch.zeros((N, ), - dtype=torch.int, - device=query_start_loc.device) - - p = 0 # num of insertions - for s, e in zip(cu_seqlens[:-1], cu_seqlens[1:]): - - # if does not divide chunk_size, then there is one chunk insertion - p += (s % chunk_size > 0) - - # get the dimensions - # - the + 1 for _e is to shift the boundary by one chunk - # - this shifting is not needed if chunk_size divides e - _s, _e = s // chunk_size + p, e // chunk_size + p + (e % chunk_size - > 0) - - # adjust indices and offsets - chunk_indices[_s:_e] -= p - chunk_offsets[_s] = s % chunk_size - - return chunk_indices, chunk_offsets - - class Mamba2AttentionBackend(AttentionBackend): @staticmethod @@ -125,8 +40,16 @@ class Mamba2AttentionMetadata: # the batch has no prefill request. has_initial_states_p: Optional[torch.Tensor] seq_idx_p: Optional[torch.Tensor] - chunk_indices_p: Optional[torch.Tensor] - chunk_offsets_p: Optional[torch.Tensor] + + # cu_chunk_seqlen_p is a tensor of shape (nchunks+1,) that contains, for + # each chunk, its offests into the varlen sequence dimension. It is defined + # such that the i-th chunk contains tokens from cu_chunk_seqlen_p[i] to + # cu_chunk_seqlen_p[i+1]. + cu_chunk_seqlen_p: Optional[torch.Tensor] + + # last_chunk_indices_p is a tensor of shape (batch,) that contains the + # index of the last chunk for every sequence in the (prefill) batch. + last_chunk_indices_p: Optional[torch.Tensor] state_indices_tensor: torch.Tensor # shape: [batch,] @@ -151,13 +74,14 @@ def build(self, common_attn_metadata: CommonAttentionMetadata, fast_build: bool = False) -> Mamba2AttentionMetadata: num_reqs = common_attn_metadata.num_reqs - query_start_loc_p = None seq_lens = common_attn_metadata.seq_lens + query_start_loc_p = None seq_idx_p = None - chunk_indices_p, chunk_offsets_p = None, None + cu_chunk_seqlen_p = None + last_chunk_indices_p = None + # Need flags to indicate if there are initial states - # currently we really only support the FlashAttention backend has_initial_states_p = None prep_initial_states = False @@ -171,7 +95,7 @@ def build(self, common_attn_metadata, decode_threshold=self.reorder_batch_threshold)) - # Compute seq_idx, chunk_indices and chunk_offsets for prefill only + # Compute seq_idx for prefill only if num_prefills > 0: #[batch,] has_initial_states_cpu = ( @@ -184,21 +108,68 @@ def build(self, query_start_loc_p = common_attn_metadata.query_start_loc[ -num_prefills - 1:] - num_decode_tokens - seq_idx_p = torch.repeat_interleave(torch.arange( - num_prefills, - dtype=torch.int32, - device=query_start_loc_p.device), - query_start_loc_p.diff(), - output_size=num_prefill_tokens) - - # We compute metadata for chunked prefill once at the top level - # model forward and reuse them in mamba layers. If not needed, - # they will be ignored inside mamba kernels. - if prep_initial_states: - chunk_indices_p, chunk_offsets_p = ( - _query_start_loc_to_chunk_indices_offsets( - query_start_loc_p, self.chunk_size, - num_prefill_tokens)) + num_computed_tokens_p = \ + common_attn_metadata.num_computed_tokens_cpu[ + num_reqs - num_prefills:num_reqs] + query_start_loc_p_cpu = common_attn_metadata.query_start_loc_cpu[ + -num_prefills - 1:] - num_decode_tokens + + # The code below carefully constructs the chunks such that: + # 1. Chunks contain tokens from a *single* sequence only. + # 2. For every sequence, we are guaranteed that we can + # retrieve the mamba state *every* chunk_size tokens. + # Constraint (1) dramatically simplifies the mamba2 kernels. + # Constraint (2) dramatically simplifies the implementation + # of prefix caching for mamba2 (wip). We need to take care + # of the interaction with chunked prefill in order to + # satisfy constraint (2). + # TODO (tdoublep): This code could probably be optimized. + cu_chunk_seqlen = [] + seq_idx = [] + last_chunk_indices = [] + seqlen_pos = 0 + for req_idx in range(num_prefills): + this_num_computed = num_computed_tokens_p[req_idx].item() + this_new_tokens = query_start_loc_p_cpu[req_idx + 1].item( + ) - query_start_loc_p_cpu[req_idx].item() + + # if computed tokens are not chunk-aligned, use the first + # chunk to finish it off + if this_num_computed % self.chunk_size != 0: + seq_idx.append(req_idx) + cu_chunk_seqlen.append(seqlen_pos) + # how many tokens to finish the chunk? + chunk_len = cdiv(this_num_computed, self.chunk_size + ) * self.chunk_size - this_num_computed + # we can only use at most this_new_tokens + chunk_len = min(chunk_len, this_new_tokens) + seqlen_pos += chunk_len + this_new_tokens -= chunk_len + + n_chunks = cdiv(this_new_tokens, self.chunk_size) + for chunk in range(n_chunks): + seq_idx.append(req_idx) + cu_chunk_seqlen.append(seqlen_pos) + chunk_len = min(self.chunk_size, this_new_tokens) + seqlen_pos += chunk_len + this_new_tokens -= chunk_len + + assert this_new_tokens == 0 + last_chunk_indices.append(len(cu_chunk_seqlen) - 1) + + cu_chunk_seqlen.append(seqlen_pos) + + seq_idx_p = torch.as_tensor(seq_idx, + device=query_start_loc_p.device, + dtype=torch.int32) + cu_chunk_seqlen_p = torch.as_tensor( + cu_chunk_seqlen, + device=query_start_loc_p.device, + dtype=torch.int32) + last_chunk_indices_p = torch.as_tensor( + last_chunk_indices, + device=query_start_loc_p.device, + dtype=torch.int32) nums_dict, batch_ptr, token_chunk_offset_ptr = \ compute_causal_conv1d_metadata(query_start_loc_p) @@ -222,9 +193,9 @@ def build(self, chunk_size=self.chunk_size, has_initial_states_p=has_initial_states_p, seq_idx_p=seq_idx_p, - chunk_indices_p=chunk_indices_p, - chunk_offsets_p=chunk_offsets_p, state_indices_tensor=state_indices_tensor, + cu_chunk_seqlen_p=cu_chunk_seqlen_p, + last_chunk_indices_p=last_chunk_indices_p, nums_dict=nums_dict, batch_ptr=batch_ptr, token_chunk_offset_ptr=token_chunk_offset_ptr, From 8eb0a1d90621927538697f75b4e17c6f79153b4d Mon Sep 17 00:00:00 2001 From: Zhuohan Li Date: Mon, 29 Sep 2025 14:31:34 -0700 Subject: [PATCH 488/518] [Doc] Polish example for torchrun dp (#25899) --- .../offline_inference/torchrun_dp_example.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/examples/offline_inference/torchrun_dp_example.py b/examples/offline_inference/torchrun_dp_example.py index 8e888a100254..295d1637528c 100644 --- a/examples/offline_inference/torchrun_dp_example.py +++ b/examples/offline_inference/torchrun_dp_example.py @@ -4,6 +4,11 @@ experimental support for data-parallel inference with torchrun Note the data load balancing and distribution is done out of the vllm engine, no internal lb supported in external_launcher mode. + +To run this example: +```bash +$ torchrun --nproc-per-node=2 examples/offline_inference/torchrun_dp_example.py +``` """ from vllm import LLM, SamplingParams @@ -14,7 +19,7 @@ "The president of the United States is", "The capital of France is", "The future of AI is", -] * 50 +] # Create sampling parameters, the same across all ranks sampling_params = SamplingParams(temperature=0.8, top_p=0.95) @@ -45,14 +50,13 @@ outputs = llm.generate(prompts, sampling_params) - -# all ranks will have the same outputs -print("-" * 50) for output in outputs: prompt = output.prompt generated_text = output.outputs[0].text - print(f"Prompt: {prompt!r}\nGenerated text: {generated_text!r}\n") - print("-" * 50) + print( + f"DP Rank: {dp_rank} Prompt: {prompt!r}\nGenerated text: {generated_text!r}\n" + ) + """ Further tips: From 2e4fe48c370e833350eae092eddd1490b65ff529 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicol=C3=B2=20Lucchesi?= Date: Mon, 29 Sep 2025 23:35:14 +0200 Subject: [PATCH 489/518] [NIXL] Increase default KV block eviction timeout on P (#25897) Signed-off-by: NickLucche --- docs/features/nixl_connector_usage.md | 2 +- vllm/envs.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/features/nixl_connector_usage.md b/docs/features/nixl_connector_usage.md index afecbc82947b..5e273af05dc5 100644 --- a/docs/features/nixl_connector_usage.md +++ b/docs/features/nixl_connector_usage.md @@ -84,7 +84,7 @@ python tests/v1/kv_connector/nixl_integration/toy_proxy_server.py \ - Connection info is passed via KVTransferParams from prefiller to decoder for handshake - `VLLM_NIXL_ABORT_REQUEST_TIMEOUT`: Timeout (in seconds) for automatically releasing the prefiller’s KV cache for a particular request. (Optional) - - Default: 120 + - Default: 480 - If a request is aborted and the decoder has not yet read the KV-cache blocks through the nixl channel, the prefill instance will release its KV-cache blocks after this timeout to avoid holding them indefinitely. ## Multi-Instance Setup diff --git a/vllm/envs.py b/vllm/envs.py index f06c860b8297..ffa7ed5c3aa5 100755 --- a/vllm/envs.py +++ b/vllm/envs.py @@ -174,7 +174,7 @@ "NONE"] = "NONE" VLLM_ROCM_QUICK_REDUCE_CAST_BF16_TO_FP16: bool = True VLLM_ROCM_QUICK_REDUCE_MAX_SIZE_BYTES_MB: Optional[int] = None - VLLM_NIXL_ABORT_REQUEST_TIMEOUT: int = 120 + VLLM_NIXL_ABORT_REQUEST_TIMEOUT: int = 480 VLLM_USE_CUDNN_PREFILL: bool = False VLLM_ENABLE_CUDAGRAPH_GC: bool = False VLLM_LOOPBACK_IP: str = "" @@ -1330,7 +1330,7 @@ def get_vllm_port() -> Optional[int]: # consumer. This is only applicable when using NixlConnector in a # disaggregated decode-prefill setup. "VLLM_NIXL_ABORT_REQUEST_TIMEOUT": - lambda: int(os.getenv("VLLM_NIXL_ABORT_REQUEST_TIMEOUT", "120")), + lambda: int(os.getenv("VLLM_NIXL_ABORT_REQUEST_TIMEOUT", "480")), # Controls whether or not to use cudnn prefill "VLLM_USE_CUDNN_PREFILL": From 6a113d9aed8221a9c234535958e70e34ab6cac5b Mon Sep 17 00:00:00 2001 From: Aaron Pham Date: Mon, 29 Sep 2025 19:26:11 -0400 Subject: [PATCH 490/518] [V0 Deprecation] Remove `vllm.worker` and update according imports (#25901) --- .../tensorizer_loader/conftest.py | 2 +- tools/pre_commit/check_pickle_imports.py | 1 - vllm/executor/executor_base.py | 10 +- vllm/executor/ray_utils.py | 2 +- vllm/executor/uniproc_executor.py | 10 +- vllm/platforms/cuda.py | 12 +- vllm/platforms/rocm.py | 12 +- vllm/v1/executor/multiproc_executor.py | 4 +- vllm/v1/worker/worker_base.py | 271 ++++++++++++++++- vllm/worker/__init__.py | 0 vllm/worker/worker_base.py | 279 ------------------ 11 files changed, 276 insertions(+), 327 deletions(-) delete mode 100644 vllm/worker/__init__.py delete mode 100644 vllm/worker/worker_base.py diff --git a/tests/model_executor/model_loader/tensorizer_loader/conftest.py b/tests/model_executor/model_loader/tensorizer_loader/conftest.py index 571dc2e0eb50..cc02d7ecf20b 100644 --- a/tests/model_executor/model_loader/tensorizer_loader/conftest.py +++ b/tests/model_executor/model_loader/tensorizer_loader/conftest.py @@ -10,7 +10,7 @@ from vllm.model_executor.model_loader.tensorizer import TensorizerConfig from vllm.utils import get_distributed_init_method, get_ip, get_open_port from vllm.v1.executor.abstract import UniProcExecutor -from vllm.worker.worker_base import WorkerWrapperBase +from vllm.v1.worker.worker_base import WorkerWrapperBase MODEL_REF = "facebook/opt-125m" diff --git a/tools/pre_commit/check_pickle_imports.py b/tools/pre_commit/check_pickle_imports.py index acbbc1f181d6..c97a5b0b6c71 100644 --- a/tools/pre_commit/check_pickle_imports.py +++ b/tools/pre_commit/check_pickle_imports.py @@ -36,7 +36,6 @@ 'benchmarks/cutlass_benchmarks/w8a8_benchmarks.py', 'benchmarks/cutlass_benchmarks/sparse_benchmarks.py', # cloudpickle - 'vllm/worker/worker_base.py', 'vllm/executor/mp_distributed_executor.py', 'vllm/executor/ray_distributed_executor.py', 'vllm/entrypoints/llm.py', diff --git a/vllm/executor/executor_base.py b/vllm/executor/executor_base.py index e3063ec2b8ab..fe80be61410c 100644 --- a/vllm/executor/executor_base.py +++ b/vllm/executor/executor_base.py @@ -19,7 +19,7 @@ from vllm.tasks import SupportedTask from vllm.utils import make_async from vllm.v1.outputs import PoolerOutput, SamplerOutput -from vllm.worker.worker_base import WorkerBase +from vllm.v1.worker.worker_base import WorkerBase logger = init_logger(__name__) @@ -30,7 +30,7 @@ class ExecutorBase(ABC): """Base class for all executors. An executor is responsible for executing the model on one device, - or it can be a distributed executor + or it can be a distributed executor that can execute the model on multiple devices. """ @@ -83,7 +83,7 @@ def collective_rpc(self, Returns: A list containing the results from each worker. - + Note: It is recommended to use this API to only pass control messages, and set up data-plane communication to pass data. @@ -100,7 +100,7 @@ def determine_num_available_blocks(self) -> tuple[int, int]: Returns a tuple `(num_gpu_blocks, num_cpu_blocks)`, where `num_gpu_blocks` are blocks that are "active" on the device and can be - appended to. + appended to. `num_cpu_blocks` refers to "swapped" blocks in CPU memory and cannot be appended to. """ @@ -327,7 +327,7 @@ def _run_workers( run only in the remote TP workers, not the driver worker. It will also be run asynchronously and return a list of futures rather than blocking on the results. - + # TODO: simplify and merge with collective_rpc """ raise NotImplementedError diff --git a/vllm/executor/ray_utils.py b/vllm/executor/ray_utils.py index 0bdeb2856989..d8eb7977dbde 100644 --- a/vllm/executor/ray_utils.py +++ b/vllm/executor/ray_utils.py @@ -16,7 +16,7 @@ from vllm.platforms import current_platform from vllm.sequence import ExecuteModelRequest, IntermediateTensors from vllm.utils import get_ip -from vllm.worker.worker_base import WorkerWrapperBase +from vllm.v1.worker.worker_base import WorkerWrapperBase if TYPE_CHECKING: from vllm.v1.core.sched.output import SchedulerOutput diff --git a/vllm/executor/uniproc_executor.py b/vllm/executor/uniproc_executor.py index 7a753d608a43..d669592e75f1 100644 --- a/vllm/executor/uniproc_executor.py +++ b/vllm/executor/uniproc_executor.py @@ -19,7 +19,7 @@ from vllm.v1.engine import ReconfigureDistributedRequest, ReconfigureRankType from vllm.v1.executor.utils import get_and_update_mm_cache from vllm.v1.outputs import AsyncModelRunnerOutput -from vllm.worker.worker_base import WorkerWrapperBase +from vllm.v1.worker.worker_base import WorkerWrapperBase logger = init_logger(__name__) @@ -160,10 +160,10 @@ def determine_num_available_blocks(self) -> Tuple[int, int]: """ Determine the number of available KV blocks. Add an additional all_reduce to get the min across all ranks. - Note that even if we have the same `gpu_memory_utilization` and - `swap_space`, the available memory in every rank might still - differ because NCCL can take different amounts of memory in - different ranks. Therefore, it is necessary to test if all ranks + Note that even if we have the same `gpu_memory_utilization` and + `swap_space`, the available memory in every rank might still + differ because NCCL can take different amounts of memory in + different ranks. Therefore, it is necessary to test if all ranks agree on the same KV cache configuration. """ a, b = super().determine_num_available_blocks() diff --git a/vllm/platforms/cuda.py b/vllm/platforms/cuda.py index 6738d3dec286..1463fe34fc75 100644 --- a/vllm/platforms/cuda.py +++ b/vllm/platforms/cuda.py @@ -110,17 +110,7 @@ def check_and_update_config(cls, vllm_config: "VllmConfig") -> None: model_config = vllm_config.model_config if parallel_config.worker_cls == "auto": - if vllm_config.speculative_config: - if not envs.VLLM_USE_V1: - raise NotImplementedError( - "Speculative decoding is not supported on vLLM V0.") - parallel_config.worker_cls = "vllm.v1.worker.gpu_worker.Worker" - else: - if envs.VLLM_USE_V1: - parallel_config.worker_cls = \ - "vllm.v1.worker.gpu_worker.Worker" - else: - parallel_config.worker_cls = "vllm.worker.worker.Worker" + parallel_config.worker_cls = "vllm.v1.worker.gpu_worker.Worker" cache_config = vllm_config.cache_config if cache_config and cache_config.block_size is None: diff --git a/vllm/platforms/rocm.py b/vllm/platforms/rocm.py index 1dacd026b667..f67568bf07c1 100644 --- a/vllm/platforms/rocm.py +++ b/vllm/platforms/rocm.py @@ -327,17 +327,7 @@ def check_and_update_config(cls, vllm_config: "VllmConfig") -> None: cache_config.block_size = 16 if parallel_config.worker_cls == "auto": - if vllm_config.speculative_config: - if not use_v1: - raise NotImplementedError( - "Speculative decoding is not supported on vLLM V0.") - parallel_config.worker_cls = "vllm.v1.worker.gpu_worker.Worker" - else: - if use_v1: - parallel_config.worker_cls = \ - "vllm.v1.worker.gpu_worker.Worker" - else: - parallel_config.worker_cls = "vllm.worker.worker.Worker" + parallel_config.worker_cls = "vllm.v1.worker.gpu_worker.Worker" # Aiter rms norm perform best when CUDA Graph capture is enabled. if (use_v1 and use_aiter_rms_norm and not is_eager_execution and "-rms_norm" not in compilation_config.custom_ops): diff --git a/vllm/v1/executor/multiproc_executor.py b/vllm/v1/executor/multiproc_executor.py index ef90af263664..eecdf8def6de 100644 --- a/vllm/v1/executor/multiproc_executor.py +++ b/vllm/v1/executor/multiproc_executor.py @@ -41,7 +41,7 @@ from vllm.v1.executor.utils import get_and_update_mm_cache from vllm.v1.outputs import (AsyncModelRunnerOutput, DraftTokenIds, ModelRunnerOutput) -from vllm.worker.worker_base import WorkerWrapperBase +from vllm.v1.worker.worker_base import WorkerWrapperBase logger = init_logger(__name__) @@ -702,7 +702,7 @@ def setup_proc_title_and_log_prefix(enable_ep: bool) -> None: def set_multiprocessing_worker_envs(): """ Set up environment variables that should be used when there are workers - in a multiprocessing environment. This should be called by the parent + in a multiprocessing environment. This should be called by the parent process before worker processes are created""" _maybe_force_spawn() diff --git a/vllm/v1/worker/worker_base.py b/vllm/v1/worker/worker_base.py index 038ce4b54f96..5b393ee6bf3e 100644 --- a/vllm/v1/worker/worker_base.py +++ b/vllm/v1/worker/worker_base.py @@ -1,23 +1,35 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -from typing import Optional +from __future__ import annotations + +import os +from typing import Any, Callable, Optional, TypeVar, Union import torch import torch.nn as nn -from vllm.config import VllmConfig +from vllm.config import VllmConfig, set_current_vllm_config from vllm.logger import init_logger +from vllm.lora.request import LoRARequest +from vllm.sequence import ExecuteModelRequest +from vllm.utils import (enable_trace_function_call_for_thread, + resolve_obj_by_qualname, run_method, + update_environment_variables, + warn_for_unimplemented_methods) from vllm.v1.kv_cache_interface import KVCacheSpec -from vllm.worker.worker_base import WorkerBase as WorkerBaseV0 +from vllm.v1.outputs import SamplerOutput logger = init_logger(__name__) +_R = TypeVar("_R") -class WorkerBase(WorkerBaseV0): - """ - Abstract class for v1 worker, mainly define some methods for v1. - For methods shared by v0 and v1, define them in v0 WorkerBase + +@warn_for_unimplemented_methods +class WorkerBase: + """Worker interface that allows vLLM to cleanly separate implementations for + different hardware. Also abstracts control plane communication, e.g., to + communicate request metadata to other workers. """ def __init__( @@ -27,10 +39,10 @@ def __init__( rank: int, distributed_init_method: str, is_driver_worker: bool = False, - ): + ) -> None: """ Initialize common worker components. - + Args: vllm_config: Complete vLLM configuration local_rank: Local device index @@ -39,8 +51,21 @@ def __init__( is_driver_worker: Whether this worker handles driver responsibilities """ - # Configuration storage - super().__init__(vllm_config=vllm_config) + self.vllm_config = vllm_config + self.model_config = vllm_config.model_config + self.cache_config = vllm_config.cache_config + self.lora_config = vllm_config.lora_config + self.load_config = vllm_config.load_config + self.parallel_config = vllm_config.parallel_config + self.scheduler_config = vllm_config.scheduler_config + self.device_config = vllm_config.device_config + self.speculative_config = vllm_config.speculative_config + self.observability_config = vllm_config.observability_config + self.kv_transfer_config = vllm_config.kv_transfer_config + self.compilation_config = vllm_config.compilation_config + + from vllm.platforms import current_platform + self.current_platform = current_platform self.parallel_config.rank = rank self.local_rank = local_rank @@ -63,3 +88,227 @@ def compile_or_warm_up_model(self) -> None: def check_health(self) -> None: """Basic health check (override for device-specific checks).""" return + + def init_device(self) -> None: + """Initialize device state, such as loading the model or other on-device + memory allocations. + """ + raise NotImplementedError + + def initialize_cache(self, num_gpu_blocks: int, + num_cpu_blocks: int) -> None: + """Initialize the KV cache with the given size in blocks. + """ + raise NotImplementedError + + def get_model(self) -> nn.Module: + raise NotImplementedError + + def apply_model(self, fn: Callable[[nn.Module], _R]) -> _R: + """Apply a function on the model inside this worker.""" + return fn(self.get_model()) + + def load_model(self) -> None: + """Load model onto target device.""" + raise NotImplementedError + + def execute_model( + self, + execute_model_req: Optional[ExecuteModelRequest] = None + ) -> Optional[list[SamplerOutput]]: + raise NotImplementedError + + def start_worker_execution_loop(self) -> None: + """Execute model loop in parallel worker. + + You can stop the loop by executing a driver worker with an empty output. + See `stop_remote_worker_execution_loop` for more details. + """ + with self.current_platform.inference_mode(): + while True: + output = self.execute_model(execute_model_req=None) + if output is None: + return None + + def determine_num_available_blocks(self) -> tuple[int, int]: + """Determine the number of available blocks for the GPU KV cache and + swappable CPU KV cache. + + The implementation may run profiling or other heuristics to determine + the size of caches. + + Returns a tuple[num_gpu_blocks, num_cpu_blocks], where num_gpu_blocks + are blocks that are "active" on the device and can be appended to. + num_cpu_blocks refers to "swapped" blocks in CPU memory and cannot be + appended to. + """ + raise NotImplementedError + + def get_cache_block_size_bytes(self) -> int: + """Return the size of a single cache block, in bytes. Used in + speculative decoding. + """ + raise NotImplementedError + + def add_lora(self, lora_request: LoRARequest) -> bool: + raise NotImplementedError + + def remove_lora(self, lora_id: int) -> bool: + raise NotImplementedError + + def pin_lora(self, lora_id: int) -> bool: + raise NotImplementedError + + def list_loras(self) -> set[int]: + raise NotImplementedError + + @property + def vocab_size(self) -> int: + """Get vocabulary size from model configuration.""" + return self.model_config.get_vocab_size() + + def shutdown(self) -> None: + """Clean up resources held by the worker.""" + return + + +class WorkerWrapperBase: + """ + This class represents one process in an executor/engine. It is responsible + for lazily initializing the worker and handling the worker's lifecycle. + We first instantiate the WorkerWrapper, which remembers the worker module + and class name. Then, when we call `update_environment_variables`, and the + real initialization happens in `init_worker`. + """ + + def __init__( + self, + vllm_config: VllmConfig, + rpc_rank: int = 0, + ) -> None: + """ + Initialize the worker wrapper with the given vllm_config and rpc_rank. + Note: rpc_rank is the rank of the worker in the executor. In most cases, + it is also the rank of the worker in the distributed group. However, + when multiple executors work together, they can be different. + e.g. in the case of SPMD-style offline inference with TP=2, + users can launch 2 engines/executors, each with only 1 worker. + All workers have rpc_rank=0, but they have different ranks in the TP + group. + """ + self.rpc_rank = rpc_rank + self.worker: Optional[WorkerBase] = None + self.vllm_config: Optional[VllmConfig] = None + # do not store this `vllm_config`, `init_worker` will set the final + # one. TODO: investigate if we can remove this field in + # `WorkerWrapperBase`, `init_cached_hf_modules` should be + # unnecessary now. + if vllm_config.model_config is not None: + # it can be None in tests + trust_remote_code = vllm_config.model_config.trust_remote_code + if trust_remote_code: + # note: lazy import to avoid importing torch before initializing + from vllm.utils import init_cached_hf_modules + init_cached_hf_modules() + + def shutdown(self) -> None: + if self.worker is not None: + self.worker.shutdown() + + def adjust_rank(self, rank_mapping: dict[int, int]) -> None: + """ + Adjust the rpc_rank based on the given mapping. + It is only used during the initialization of the executor, + to adjust the rpc_rank of workers after we create all workers. + """ + if self.rpc_rank in rank_mapping: + self.rpc_rank = rank_mapping[self.rpc_rank] + + def update_environment_variables( + self, + envs_list: list[dict[str, str]], + ) -> None: + envs = envs_list[self.rpc_rank] + key = 'CUDA_VISIBLE_DEVICES' + if key in envs and key in os.environ: + # overwriting CUDA_VISIBLE_DEVICES is desired behavior + # suppress the warning in `update_environment_variables` + del os.environ[key] + update_environment_variables(envs) + + def init_worker(self, all_kwargs: list[dict[str, Any]]) -> None: + """ + Here we inject some common logic before initializing the worker. + Arguments are passed to the worker class constructor. + """ + kwargs = all_kwargs[self.rpc_rank] + self.vllm_config = kwargs.get("vllm_config") + assert self.vllm_config is not None, ( + "vllm_config is required to initialize the worker") + enable_trace_function_call_for_thread(self.vllm_config) + + from vllm.plugins import load_general_plugins + load_general_plugins() + + if isinstance(self.vllm_config.parallel_config.worker_cls, str): + worker_class = resolve_obj_by_qualname( + self.vllm_config.parallel_config.worker_cls) + else: + raise ValueError( + "passing worker_cls is no longer supported. Please pass keep the class in a separate module and pass the qualified name of the class as a string." # noqa: E501 + ) + if self.vllm_config.parallel_config.worker_extension_cls: + worker_extension_cls = resolve_obj_by_qualname( + self.vllm_config.parallel_config.worker_extension_cls) + extended_calls = [] + if worker_extension_cls not in worker_class.__bases__: + # check any conflicts between worker and worker_extension_cls + for attr in dir(worker_extension_cls): + if attr.startswith("__"): + continue + assert not hasattr(worker_class, attr), ( + f"Worker class {worker_class} already has an attribute" + f" {attr}, which conflicts with the worker" + f" extension class {worker_extension_cls}.") + if callable(getattr(worker_extension_cls, attr)): + extended_calls.append(attr) + # dynamically inherit the worker extension class + worker_class.__bases__ = worker_class.__bases__ + ( + worker_extension_cls, ) + logger.info( + "Injected %s into %s for extended collective_rpc calls %s", + worker_extension_cls, worker_class, extended_calls) + with set_current_vllm_config(self.vllm_config): + # To make vLLM config available during worker initialization + self.worker = worker_class(**kwargs) + assert self.worker is not None + + def initialize_from_config(self, kv_cache_configs: list[Any]) -> None: + kv_cache_config = kv_cache_configs[self.rpc_rank] + with set_current_vllm_config(self.vllm_config): + self.worker.initialize_from_config(kv_cache_config) # type: ignore + + def init_device(self): + with set_current_vllm_config(self.vllm_config): + # To make vLLM config available during device initialization + self.worker.init_device() # type: ignore + + def execute_method(self, method: Union[str, bytes], *args, **kwargs): + try: + # method resolution order: + # if a method is defined in this class, it will be called directly. + # otherwise, since we define `__getattr__` and redirect attribute + # query to `self.worker`, the method will be called on the worker. + return run_method(self, method, args, kwargs) + except Exception as e: + # if the driver worker also execute methods, + # exceptions in the rest worker may cause deadlock in rpc like ray + # see https://github.com/vllm-project/vllm/issues/3455 + # print the error and inform the user to solve the error + msg = (f"Error executing method {method!r}. " + "This might cause deadlock in distributed execution.") + logger.exception(msg) + raise e + + def __getattr__(self, attr): + return getattr(self.worker, attr) diff --git a/vllm/worker/__init__.py b/vllm/worker/__init__.py deleted file mode 100644 index e69de29bb2d1..000000000000 diff --git a/vllm/worker/worker_base.py b/vllm/worker/worker_base.py deleted file mode 100644 index 20fabef4f19b..000000000000 --- a/vllm/worker/worker_base.py +++ /dev/null @@ -1,279 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# SPDX-FileCopyrightText: Copyright contributors to the vLLM project - -import os -from typing import (Any, Callable, Dict, List, Optional, Set, Tuple, TypeVar, - Union) - -import cloudpickle -import torch.nn as nn - -from vllm.config import VllmConfig, set_current_vllm_config -from vllm.logger import init_logger -from vllm.lora.request import LoRARequest -from vllm.sequence import ExecuteModelRequest -from vllm.utils import (enable_trace_function_call_for_thread, - resolve_obj_by_qualname, run_method, - update_environment_variables, - warn_for_unimplemented_methods) -from vllm.v1.outputs import SamplerOutput - -logger = init_logger(__name__) - -_R = TypeVar("_R") - - -@warn_for_unimplemented_methods -class WorkerBase: - """Worker interface that allows vLLM to cleanly separate implementations for - different hardware. Also abstracts control plane communication, e.g., to - communicate request metadata to other workers. - """ - - def __init__( - self, - vllm_config: VllmConfig, - ) -> None: - self.vllm_config = vllm_config - self.model_config = vllm_config.model_config - self.cache_config = vllm_config.cache_config - self.lora_config = vllm_config.lora_config - self.load_config = vllm_config.load_config - self.parallel_config = vllm_config.parallel_config - self.scheduler_config = vllm_config.scheduler_config - self.device_config = vllm_config.device_config - self.speculative_config = vllm_config.speculative_config - self.observability_config = vllm_config.observability_config - self.kv_transfer_config = vllm_config.kv_transfer_config - self.compilation_config = vllm_config.compilation_config - from vllm.platforms import current_platform - self.current_platform = current_platform - - def init_device(self) -> None: - """Initialize device state, such as loading the model or other on-device - memory allocations. - """ - raise NotImplementedError - - def initialize_cache(self, num_gpu_blocks: int, - num_cpu_blocks: int) -> None: - """Initialize the KV cache with the given size in blocks. - """ - raise NotImplementedError - - def get_model(self) -> nn.Module: - raise NotImplementedError - - def apply_model(self, fn: Callable[[nn.Module], _R]) -> _R: - """Apply a function on the model inside this worker.""" - return fn(self.get_model()) - - def load_model(self) -> None: - """Load model onto target device.""" - raise NotImplementedError - - def execute_model( - self, - execute_model_req: Optional[ExecuteModelRequest] = None - ) -> Optional[List[SamplerOutput]]: - raise NotImplementedError - - def start_worker_execution_loop(self) -> None: - """Execute model loop in parallel worker. - - You can stop the loop by executing a driver worker with an empty output. - See `stop_remote_worker_execution_loop` for more details. - """ - with self.current_platform.inference_mode(): - while True: - output = self.execute_model(execute_model_req=None) - if output is None: - return None - - def determine_num_available_blocks(self) -> Tuple[int, int]: - """Determine the number of available blocks for the GPU KV cache and - swappable CPU KV cache. - - The implementation may run profiling or other heuristics to determine - the size of caches. - - Returns a Tuple[num_gpu_blocks, num_cpu_blocks], where num_gpu_blocks - are blocks that are "active" on the device and can be appended to. - num_cpu_blocks refers to "swapped" blocks in CPU memory and cannot be - appended to. - """ - raise NotImplementedError - - def get_cache_block_size_bytes(self) -> int: - """Return the size of a single cache block, in bytes. Used in - speculative decoding. - """ - raise NotImplementedError - - def add_lora(self, lora_request: LoRARequest) -> bool: - raise NotImplementedError - - def remove_lora(self, lora_id: int) -> bool: - raise NotImplementedError - - def pin_lora(self, lora_id: int) -> bool: - raise NotImplementedError - - def list_loras(self) -> Set[int]: - raise NotImplementedError - - @property - def vocab_size(self) -> int: - """Get vocabulary size from model configuration.""" - return self.model_config.get_vocab_size() - - def shutdown(self) -> None: - """Clean up resources held by the worker.""" - return - - -class WorkerWrapperBase: - """ - This class represents one process in an executor/engine. It is responsible - for lazily initializing the worker and handling the worker's lifecycle. - We first instantiate the WorkerWrapper, which remembers the worker module - and class name. Then, when we call `update_environment_variables`, and the - real initialization happens in `init_worker`. - """ - - def __init__( - self, - vllm_config: VllmConfig, - rpc_rank: int = 0, - ) -> None: - """ - Initialize the worker wrapper with the given vllm_config and rpc_rank. - Note: rpc_rank is the rank of the worker in the executor. In most cases, - it is also the rank of the worker in the distributed group. However, - when multiple executors work together, they can be different. - e.g. in the case of SPMD-style offline inference with TP=2, - users can launch 2 engines/executors, each with only 1 worker. - All workers have rpc_rank=0, but they have different ranks in the TP - group. - """ - self.rpc_rank = rpc_rank - self.worker: Optional[WorkerBase] = None - self.vllm_config: Optional[VllmConfig] = None - # do not store this `vllm_config`, `init_worker` will set the final - # one. TODO: investigate if we can remove this field in - # `WorkerWrapperBase`, `init_cached_hf_modules` should be - # unnecessary now. - if vllm_config.model_config is not None: - # it can be None in tests - trust_remote_code = vllm_config.model_config.trust_remote_code - if trust_remote_code: - # note: lazy import to avoid importing torch before initializing - from vllm.utils import init_cached_hf_modules - init_cached_hf_modules() - - def shutdown(self) -> None: - if self.worker is not None: - self.worker.shutdown() - - def adjust_rank(self, rank_mapping: Dict[int, int]) -> None: - """ - Adjust the rpc_rank based on the given mapping. - It is only used during the initialization of the executor, - to adjust the rpc_rank of workers after we create all workers. - """ - if self.rpc_rank in rank_mapping: - self.rpc_rank = rank_mapping[self.rpc_rank] - - def update_environment_variables(self, envs_list: List[Dict[str, - str]]) -> None: - envs = envs_list[self.rpc_rank] - key = 'CUDA_VISIBLE_DEVICES' - if key in envs and key in os.environ: - # overwriting CUDA_VISIBLE_DEVICES is desired behavior - # suppress the warning in `update_environment_variables` - del os.environ[key] - update_environment_variables(envs) - - def init_worker(self, all_kwargs: List[Dict[str, Any]]) -> None: - """ - Here we inject some common logic before initializing the worker. - Arguments are passed to the worker class constructor. - """ - kwargs = all_kwargs[self.rpc_rank] - self.vllm_config = kwargs.get("vllm_config") - assert self.vllm_config is not None, ( - "vllm_config is required to initialize the worker") - enable_trace_function_call_for_thread(self.vllm_config) - - from vllm.plugins import load_general_plugins - load_general_plugins() - - if isinstance(self.vllm_config.parallel_config.worker_cls, str): - worker_class = resolve_obj_by_qualname( - self.vllm_config.parallel_config.worker_cls) - else: - logger.warning( - "passing worker_cls as a class object is strongly deprecated," - " as the serialization of class objects can be tricky and" - " error-prone. To be safe, please keep the class in a separate" - " module and pass the qualified name of the class as a string." - ) - assert isinstance(self.vllm_config.parallel_config.worker_cls, - bytes) - worker_class = cloudpickle.loads( - self.vllm_config.parallel_config.worker_cls) - if self.vllm_config.parallel_config.worker_extension_cls: - worker_extension_cls = resolve_obj_by_qualname( - self.vllm_config.parallel_config.worker_extension_cls) - extended_calls = [] - if worker_extension_cls not in worker_class.__bases__: - # check any conflicts between worker and worker_extension_cls - for attr in dir(worker_extension_cls): - if attr.startswith("__"): - continue - assert not hasattr(worker_class, attr), ( - f"Worker class {worker_class} already has an attribute" - f" {attr}, which conflicts with the worker" - f" extension class {worker_extension_cls}.") - if callable(getattr(worker_extension_cls, attr)): - extended_calls.append(attr) - # dynamically inherit the worker extension class - worker_class.__bases__ = worker_class.__bases__ + ( - worker_extension_cls, ) - logger.info( - "Injected %s into %s for extended collective_rpc calls %s", - worker_extension_cls, worker_class, extended_calls) - with set_current_vllm_config(self.vllm_config): - # To make vLLM config available during worker initialization - self.worker = worker_class(**kwargs) - assert self.worker is not None - - def initialize_from_config(self, kv_cache_configs: List[Any]) -> None: - kv_cache_config = kv_cache_configs[self.rpc_rank] - with set_current_vllm_config(self.vllm_config): - self.worker.initialize_from_config(kv_cache_config) # type: ignore - - def init_device(self): - with set_current_vllm_config(self.vllm_config): - # To make vLLM config available during device initialization - self.worker.init_device() # type: ignore - - def execute_method(self, method: Union[str, bytes], *args, **kwargs): - try: - # method resolution order: - # if a method is defined in this class, it will be called directly. - # otherwise, since we define `__getattr__` and redirect attribute - # query to `self.worker`, the method will be called on the worker. - return run_method(self, method, args, kwargs) - except Exception as e: - # if the driver worker also execute methods, - # exceptions in the rest worker may cause deadlock in rpc like ray - # see https://github.com/vllm-project/vllm/issues/3455 - # print the error and inform the user to solve the error - msg = (f"Error executing method {method!r}. " - "This might cause deadlock in distributed execution.") - logger.exception(msg) - raise e - - def __getattr__(self, attr): - return getattr(self.worker, attr) From 78a47f87ce259a48f0391fa9ae15add05ea7432b Mon Sep 17 00:00:00 2001 From: Andrew Sansom Date: Mon, 29 Sep 2025 19:10:58 -0500 Subject: [PATCH 491/518] Test Prompt Embeds/LoRA compatibility and Enable LoRA Support for OPT Models (#25717) Signed-off-by: Andrew Sansom --- docs/features/README.md | 2 +- docs/models/supported_models.md | 2 +- tests/entrypoints/conftest.py | 8 +++++ .../test_completion_with_prompt_embeds.py | 34 +++++++++++++++---- vllm/model_executor/models/opt.py | 5 ++- 5 files changed, 40 insertions(+), 11 deletions(-) diff --git a/docs/features/README.md b/docs/features/README.md index 10cc448cc2ee..05ce0b57a9fc 100644 --- a/docs/features/README.md +++ b/docs/features/README.md @@ -52,7 +52,7 @@ th:not(:first-child) { | [mm](multimodal_inputs.md) | ✅ | ✅ | [🟠](gh-pr:4194)^ | ❔ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ❔ | ✅ | | | | | best-of | ✅ | ✅ | ✅ | [❌](gh-issue:6137) | ✅ | ❌ | ✅ | ✅ | ✅ | ❔ | [❌](gh-issue:7968) | ✅ | ✅ | | | | beam-search | ✅ | ✅ | ✅ | [❌](gh-issue:6137) | ✅ | ❌ | ✅ | ✅ | ✅ | ❔ | [❌](gh-issue:7968) | ❔ | ✅ | ✅ | | -| [prompt-embeds](prompt_embeds.md) | ✅ | [❌](gh-issue:25096) | ? | ❌ | ✅ | ❌ | ❌ | ✅ | ❌ | ? | ? | ❌ | ? | ? | ✅ | +| [prompt-embeds](prompt_embeds.md) | ✅ | [❌](gh-issue:25096) | ✅ | ❌ | ✅ | ❌ | ❌ | ✅ | ❌ | ❔ | ❔ | ❌ | ❔ | ❔ | ✅ | \* Chunked prefill and prefix caching are only applicable to last-token pooling. ^ LoRA is only applicable to the language backbone of multimodal models. diff --git a/docs/models/supported_models.md b/docs/models/supported_models.md index 8e87a98e3d51..d720fa2458e1 100644 --- a/docs/models/supported_models.md +++ b/docs/models/supported_models.md @@ -403,7 +403,7 @@ th { | `OLMo2ForCausalLM` | OLMo2 | `allenai/OLMo-2-0425-1B`, etc. | ✅︎ | ✅︎ | ✅︎ | | `OLMo3ForCausalLM` | OLMo3 | TBA | ✅︎ | ✅︎ | ✅︎ | | `OLMoEForCausalLM` | OLMoE | `allenai/OLMoE-1B-7B-0924`, `allenai/OLMoE-1B-7B-0924-Instruct`, etc. | | ✅︎ | ✅︎ | -| `OPTForCausalLM` | OPT, OPT-IML | `facebook/opt-66b`, `facebook/opt-iml-max-30b`, etc. | | ✅︎ | ✅︎ | +| `OPTForCausalLM` | OPT, OPT-IML | `facebook/opt-66b`, `facebook/opt-iml-max-30b`, etc. | ✅︎ | ✅︎ | ✅︎ | | `OrionForCausalLM` | Orion | `OrionStarAI/Orion-14B-Base`, `OrionStarAI/Orion-14B-Chat`, etc. | | ✅︎ | ✅︎ | | `PhiForCausalLM` | Phi | `microsoft/phi-1_5`, `microsoft/phi-2`, etc. | ✅︎ | ✅︎ | ✅︎ | | `Phi3ForCausalLM` | Phi-4, Phi-3 | `microsoft/Phi-4-mini-instruct`, `microsoft/Phi-4`, `microsoft/Phi-3-mini-4k-instruct`, `microsoft/Phi-3-mini-128k-instruct`, `microsoft/Phi-3-medium-128k-instruct`, etc. | ✅︎ | ✅︎ | ✅︎ | diff --git a/tests/entrypoints/conftest.py b/tests/entrypoints/conftest.py index da75806ccf4d..7daf62595b1b 100644 --- a/tests/entrypoints/conftest.py +++ b/tests/entrypoints/conftest.py @@ -208,3 +208,11 @@ def zephyr_lora_files(): """Download zephyr LoRA files once per test session.""" from huggingface_hub import snapshot_download return snapshot_download(repo_id="typeof/zephyr-7b-beta-lora") + + +@pytest.fixture(scope="session") +def opt125_lora_files() -> str: + """Download opt-125m LoRA files once per test session.""" + from huggingface_hub import snapshot_download + return snapshot_download( + repo_id="peft-internal-testing/opt-125m-dummy-lora") diff --git a/tests/entrypoints/openai/test_completion_with_prompt_embeds.py b/tests/entrypoints/openai/test_completion_with_prompt_embeds.py index ae51025455b1..cad914282306 100644 --- a/tests/entrypoints/openai/test_completion_with_prompt_embeds.py +++ b/tests/entrypoints/openai/test_completion_with_prompt_embeds.py @@ -3,6 +3,7 @@ import base64 import io +import json import openai # use the official client for correctness check import pytest @@ -16,13 +17,15 @@ # any model with a chat template should work here MODEL_NAME = "facebook/opt-125m" +LORA_SERVING_MODEL_NAME = "opt125m-lora" CONFIG = AutoConfig.from_pretrained(MODEL_NAME) -@pytest.fixture(scope="module") -def default_server_args() -> list[str]: - return [ +@pytest.fixture(scope="module", params=["use-lora"]) +def default_server_args(request: pytest.FixtureRequest, + opt125_lora_files: str) -> list[str]: + args = [ # use half precision for speed and memory savings in CI environment "--dtype", "bfloat16", @@ -35,6 +38,25 @@ def default_server_args() -> list[str]: "--enable-prompt-embeds", ] + if request.param == "use-lora": + lora_module_1 = { + "name": LORA_SERVING_MODEL_NAME, + "path": opt125_lora_files, + "base_model_name": MODEL_NAME + } + + args.extend([ + "--enable-lora", + "--lora-module", + json.dumps(lora_module_1), + "--max-lora-rank", + "64", + "--max-cpu-loras", + "2", + ]) + + return args + EXAMPLE_PROMPTS = [ "Hello, my name is", @@ -74,7 +96,7 @@ async def client_with_prompt_embeds(server_with_prompt_embeds): @pytest.mark.asyncio -@pytest.mark.parametrize("model_name", [MODEL_NAME]) +@pytest.mark.parametrize("model_name", [MODEL_NAME, LORA_SERVING_MODEL_NAME]) async def test_completions_with_prompt_embeds( example_prompt_embeds, client_with_prompt_embeds: openai.AsyncOpenAI, @@ -179,7 +201,7 @@ async def test_completions_with_prompt_embeds( @pytest.mark.asyncio -@pytest.mark.parametrize("model_name", [MODEL_NAME]) +@pytest.mark.parametrize("model_name", [MODEL_NAME, LORA_SERVING_MODEL_NAME]) async def test_completions_errors_with_prompt_embeds( client_with_prompt_embeds: openai.AsyncOpenAI, model_name: str): # Test error case: invalid prompt_embeds @@ -194,7 +216,7 @@ async def test_completions_errors_with_prompt_embeds( @pytest.mark.asyncio @pytest.mark.parametrize("logprobs_arg", [1, 0]) -@pytest.mark.parametrize("model_name", [MODEL_NAME]) +@pytest.mark.parametrize("model_name", [MODEL_NAME, LORA_SERVING_MODEL_NAME]) async def test_completions_with_logprobs_and_prompt_embeds( example_prompt_embeds, client_with_prompt_embeds: openai.AsyncOpenAI, diff --git a/vllm/model_executor/models/opt.py b/vllm/model_executor/models/opt.py index 4c3ce9f61efb..c4746166471c 100644 --- a/vllm/model_executor/models/opt.py +++ b/vllm/model_executor/models/opt.py @@ -43,7 +43,7 @@ from vllm.model_executor.model_loader.weight_utils import default_weight_loader from vllm.sequence import IntermediateTensors -from .interfaces import SupportsPP +from .interfaces import SupportsLoRA, SupportsPP from .utils import (AutoWeightsLoader, WeightsMapper, is_pp_missing_parameter, make_empty_intermediate_tensors_factory, make_layers, maybe_prefix) @@ -352,10 +352,9 @@ def load_weights(self, weights: Iterable[tuple[str, return loaded_params -class OPTForCausalLM(nn.Module, SupportsPP): +class OPTForCausalLM(nn.Module, SupportsPP, SupportsLoRA): packed_modules_mapping = { "qkv_proj": ["q_proj", "k_proj", "v_proj"], - "gate_up_proj": ["gate_proj", "up_proj"] } hf_to_vllm_mapper = WeightsMapper(orig_to_new_prefix={ From 89e4050af4299f461077ed6c4dfa1318693d40ee Mon Sep 17 00:00:00 2001 From: Wentao Ye <44945378+yewentao256@users.noreply.github.com> Date: Mon, 29 Sep 2025 21:15:19 -0400 Subject: [PATCH 492/518] [Bug] Fix Weight Loading for Block FP8 Cutlass SM90 (#25909) Signed-off-by: yewentao256 Signed-off-by: Wentao Ye <44945378+yewentao256@users.noreply.github.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- .../model_executor/layers/quantization/utils/fp8_utils.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/vllm/model_executor/layers/quantization/utils/fp8_utils.py b/vllm/model_executor/layers/quantization/utils/fp8_utils.py index b2548e66827d..828111dc299e 100644 --- a/vllm/model_executor/layers/quantization/utils/fp8_utils.py +++ b/vllm/model_executor/layers/quantization/utils/fp8_utils.py @@ -911,15 +911,15 @@ def maybe_post_process_fp8_weight_block(layer: torch.nn.Module, # On Blackwell or Hopper, if E8M0 for DeepGemm is used, we need to # requantize the weight and input to the specific scale # at the same time. - if is_deep_gemm_e8m0_used(): + should_use_deepgemm = should_use_deepgemm_for_fp8_linear( + layer.orig_dtype, layer.weight) + if is_deep_gemm_e8m0_used() and should_use_deepgemm: block_sz = tuple(layer.weight_block_size) requant_weight_ue8m0_inplace(layer.weight.data, layer.weight_scale.data, block_sz) # SM90 Block FP8 CUTLASS requires row-major weight scales elif (current_platform.is_device_capability(90) - and cutlass_block_fp8_supported - and not should_use_deepgemm_for_fp8_linear(torch.bfloat16, - layer.weight)): + and cutlass_block_fp8_supported and not should_use_deepgemm): layer.weight_scale = torch.nn.Parameter( layer.weight_scale.data.T.contiguous(), requires_grad=False) From d3bd171123ed4814afbef34584dd286817ccfe03 Mon Sep 17 00:00:00 2001 From: Zhuohan Li Date: Mon, 29 Sep 2025 18:43:57 -0700 Subject: [PATCH 493/518] [Benchmark] Support benchmark throughput for external launcher DP (#25913) Signed-off-by: Zhuohan Li --- vllm/benchmarks/throughput.py | 33 +++++++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/vllm/benchmarks/throughput.py b/vllm/benchmarks/throughput.py index 96e39fd92eba..9e38e63a0883 100644 --- a/vllm/benchmarks/throughput.py +++ b/vllm/benchmarks/throughput.py @@ -358,7 +358,23 @@ def get_requests(args, tokenizer): raise ValueError(f"Unknown dataset name: {args.dataset_name}") # Remove None values sample_kwargs = {k: v for k, v in sample_kwargs.items() if v is not None} - return dataset_cls(**common_kwargs).sample(**sample_kwargs) + requests = dataset_cls(**common_kwargs).sample(**sample_kwargs) + requests = filter_requests_for_dp(requests, args.data_parallel_size) + return requests + + +def filter_requests_for_dp(requests, data_parallel_size): + # Note(zhuohan): The way we get data_parallel_rank is hacky and only + # works for external launcher mode. Should be cleaned up and deprecated + # in the future with a better vLLM distributed process design. + if data_parallel_size == 1: + return requests + + global_rank = int(os.environ["RANK"]) + world_size = int(os.environ["WORLD_SIZE"]) + data_parallel_rank = global_rank // (world_size // data_parallel_size) + return [r for i, r in enumerate(requests) + if i % data_parallel_size == data_parallel_rank] def validate_args(args): @@ -453,12 +469,17 @@ def validate_args(args): if args.backend == "mii" and args.tokenizer != args.model: raise ValueError( "Tokenizer must be the same as the model for MII backend.") - - # --data-parallel is not supported currently. - # https://github.com/vllm-project/vllm/issues/16222 - if args.data_parallel_size > 1: + + if args.data_parallel_size > 1 and ( + args.distributed_executor_backend != "external_launcher" + or args.async_engine): + # --data-parallel is not supported fully. + # Old issue: https://github.com/vllm-project/vllm/issues/16222 + # Currently we only support data parallel with external launcher + # mode (i.e., launch with toruchrun). raise ValueError( - "Data parallel is not supported in offline benchmark, " + "Data parallel is only supported with external launcher mode " + "with synchronous engine in offline benchmark, " "please use benchmark serving instead" ) From 61aedb5ffe056f83b1edab15610a644d32f40071 Mon Sep 17 00:00:00 2001 From: Harry Mellor <19981378+hmellor@users.noreply.github.com> Date: Tue, 30 Sep 2025 03:49:49 +0100 Subject: [PATCH 494/518] Move`VllmConfig` from `config/__init__.py` to `config/vllm.py` (#25271) Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- vllm/attention/layer.py | 3 +- .../layers/chunked_local_attention.py | 3 +- vllm/config/__init__.py | 905 ++---------------- vllm/config/utils.py | 42 +- vllm/config/vllm.py | 789 +++++++++++++++ .../layers/mamba/linear_attn.py | 3 +- .../layers/quantization/auto_round.py | 5 +- .../layers/quantization/bitblas.py | 5 +- .../layers/quantization/bitsandbytes.py | 5 +- .../layers/quantization/deepspeedfp.py | 5 +- .../layers/quantization/gptq.py | 8 +- .../layers/quantization/gptq_bitblas.py | 5 +- .../layers/quantization/gptq_marlin.py | 2 +- .../layers/quantization/gptq_marlin_24.py | 5 +- .../layers/quantization/ipex_quant.py | 5 +- .../kernels/mixed_precision/bitblas.py | 3 +- .../layers/quantization/tpu_int8.py | 5 +- .../layers/quantization/utils/gptq_utils.py | 18 +- vllm/model_executor/models/aimv2.py | 3 +- vllm/model_executor/models/aria.py | 3 +- vllm/model_executor/models/bailing_moe.py | 3 +- vllm/model_executor/models/granite.py | 3 +- vllm/model_executor/models/granitemoe.py | 3 +- .../model_executor/models/granitemoeshared.py | 3 +- vllm/model_executor/models/hunyuan_v1.py | 3 +- vllm/model_executor/models/interfaces.py | 3 +- vllm/model_executor/models/llama4_eagle.py | 3 +- vllm/model_executor/models/mamba.py | 3 +- vllm/model_executor/models/mamba2.py | 3 +- vllm/model_executor/models/minimax_text_01.py | 3 +- vllm/model_executor/models/ovis.py | 3 +- vllm/model_executor/models/ovis2_5.py | 3 +- vllm/model_executor/models/phimoe.py | 3 +- vllm/model_executor/models/siglip2navit.py | 2 +- vllm/model_executor/models/step3_text.py | 3 +- vllm/model_executor/models/whisper.py | 3 +- 36 files changed, 964 insertions(+), 905 deletions(-) create mode 100644 vllm/config/vllm.py diff --git a/vllm/attention/layer.py b/vllm/attention/layer.py index d97c87d96e99..8cab8330a6cd 100644 --- a/vllm/attention/layer.py +++ b/vllm/attention/layer.py @@ -20,8 +20,7 @@ from vllm.logger import init_logger from vllm.model_executor.layers.attention_layer_base import AttentionLayerBase from vllm.model_executor.layers.linear import UnquantizedLinearMethod -from vllm.model_executor.layers.quantization.base_config import ( - QuantizationConfig) +from vllm.model_executor.layers.quantization import QuantizationConfig from vllm.model_executor.layers.quantization.input_quant_fp8 import QuantFP8 from vllm.model_executor.layers.quantization.kv_cache import BaseKVCacheMethod from vllm.model_executor.layers.quantization.utils.quant_utils import ( diff --git a/vllm/attention/layers/chunked_local_attention.py b/vllm/attention/layers/chunked_local_attention.py index 087c5004bde0..3964eca7d36b 100644 --- a/vllm/attention/layers/chunked_local_attention.py +++ b/vllm/attention/layers/chunked_local_attention.py @@ -9,7 +9,8 @@ from vllm.attention.backends.abstract import (AttentionBackend, AttentionMetadata) from vllm.attention.selector import get_attn_backend -from vllm.config import CacheConfig, QuantizationConfig +from vllm.config import CacheConfig +from vllm.model_executor.layers.quantization import QuantizationConfig from vllm.v1.attention.backends.utils import ( CommonAttentionMetadata, make_local_attention_virtual_batches, subclass_attention_backend) diff --git a/vllm/config/__init__.py b/vllm/config/__init__.py index ccb91999d370..c909265c071d 100644 --- a/vllm/config/__init__.py +++ b/vllm/config/__init__.py @@ -1,29 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -# ruff: noqa: F401 -import ast -import copy -import hashlib -import inspect -import json -import os -import textwrap -from contextlib import contextmanager -from dataclasses import field, fields, is_dataclass, replace -from functools import cached_property, lru_cache -from pathlib import Path -from typing import (TYPE_CHECKING, Any, Literal, Optional, Protocol, TypeVar, - Union, cast) - -import regex as re -import torch -from pydantic import ConfigDict, SkipValidation -from pydantic.dataclasses import dataclass -from typing_extensions import runtime_checkable - -import vllm.envs as envs -from vllm import version from vllm.config.cache import (BlockSize, CacheConfig, CacheDType, MambaDType, PrefixCachingHashAlgo) from vllm.config.compilation import (CompilationConfig, CompilationLevel, @@ -48,806 +25,82 @@ from vllm.config.speculative import SpeculativeConfig from vllm.config.speech_to_text import SpeechToTextConfig from vllm.config.structured_outputs import StructuredOutputsConfig -from vllm.config.utils import ConfigType, config, get_attr_docs, is_init_field -from vllm.logger import init_logger -from vllm.multimodal import MULTIMODAL_REGISTRY -from vllm.transformers_utils.runai_utils import is_runai_obj_uri -from vllm.utils import random_uuid - -if TYPE_CHECKING: - from _typeshed import DataclassInstance - from transformers.configuration_utils import PretrainedConfig - - from vllm.model_executor.layers.quantization.base_config import ( - QuantizationConfig) -else: - DataclassInstance = Any - PretrainedConfig = Any - QuantizationConfig = Any - QuantizationMethods = Any - BaseModelLoader = Any - LogitsProcessor = Any - -logger = init_logger(__name__) -DataclassInstanceT = TypeVar("DataclassInstanceT", bound=DataclassInstance) - - -@runtime_checkable -class SupportsHash(Protocol): - - def compute_hash(self) -> str: - ... - - -class SupportsMetricsInfo(Protocol): - - def metrics_info(self) -> dict[str, str]: - ... - - -@config -@dataclass(config=ConfigDict(arbitrary_types_allowed=True)) -class VllmConfig: - """Dataclass which contains all vllm-related configuration. This - simplifies passing around the distinct configurations in the codebase. - """ - - # TODO: use default_factory once default constructing ModelConfig doesn't - # try to download a model - model_config: ModelConfig = None # type: ignore - """Model configuration.""" - cache_config: CacheConfig = field(default_factory=CacheConfig) - """Cache configuration.""" - parallel_config: ParallelConfig = field(default_factory=ParallelConfig) - """Parallel configuration.""" - scheduler_config: SchedulerConfig = field(default_factory=SchedulerConfig) - """Scheduler configuration.""" - device_config: DeviceConfig = field(default_factory=DeviceConfig) - """Device configuration.""" - load_config: LoadConfig = field(default_factory=LoadConfig) - """Load configuration.""" - lora_config: Optional[LoRAConfig] = None - """LoRA configuration.""" - speculative_config: Optional[SpeculativeConfig] = None - """Speculative decoding configuration.""" - structured_outputs_config: StructuredOutputsConfig = field( - default_factory=StructuredOutputsConfig) - """Structured outputs configuration.""" - observability_config: Optional[ObservabilityConfig] = None - """Observability configuration.""" - quant_config: Optional[QuantizationConfig] = None - """Quantization configuration.""" - compilation_config: CompilationConfig = field( - default_factory=CompilationConfig) - """`torch.compile` and cudagraph capture configuration for the model. - - As a shorthand, `-O` can be used to directly specify the compilation - level `n`: `-O3` is equivalent to `-O.level=3` (same as `-O='{"level":3}'`). - Currently, -O and -O= are supported as well but this will likely be - removed in favor of clearer -O syntax in the future. - - NOTE: level 0 is the default level without any optimization. level 1 and 2 - are for internal testing only. level 3 is the recommended level for - production, also default in V1. - - You can specify the full compilation config like so: - `{"level": 3, "cudagraph_capture_sizes": [1, 2, 4, 8]}` - """ - kv_transfer_config: Optional[KVTransferConfig] = None - """The configurations for distributed KV cache transfer.""" - kv_events_config: Optional[KVEventsConfig] = None - """The configurations for event publishing.""" - # some opaque config, only used to provide additional information - # for the hash computation, mainly used for testing, debugging or out of - # tree config registration. - additional_config: Union[dict, SupportsHash] = field(default_factory=dict) - """Additional config for specified platform. Different platforms may - support different configs. Make sure the configs are valid for the platform - you are using. Contents must be hashable.""" - instance_id: str = "" - """The ID of the vLLM instance.""" - - def compute_hash(self) -> str: - """ - WARNING: Whenever a new field is added to this config, - ensure that it is included in the factors list if - it affects the computation graph. - - Provide a hash that uniquely identifies all the configs - that affect the structure of the computation - graph from input ids/embeddings to the final hidden states, - excluding anything before input ids/embeddings and after - the final hidden states. - """ - factors: list[Any] = [] - - # summarize vllm config - vllm_factors: list[Any] = [] - from vllm import __version__ - vllm_factors.append(__version__) - vllm_factors.append(envs.VLLM_USE_V1) - if self.model_config: - vllm_factors.append(self.model_config.compute_hash()) - else: - vllm_factors.append("None") - if self.cache_config: - vllm_factors.append(self.cache_config.compute_hash()) - else: - vllm_factors.append("None") - if self.parallel_config: - vllm_factors.append(self.parallel_config.compute_hash()) - else: - vllm_factors.append("None") - if self.scheduler_config: - vllm_factors.append(self.scheduler_config.compute_hash()) - else: - vllm_factors.append("None") - if self.device_config: - vllm_factors.append(self.device_config.compute_hash()) - else: - vllm_factors.append("None") - if self.load_config: - vllm_factors.append(self.load_config.compute_hash()) - else: - vllm_factors.append("None") - if self.lora_config: - vllm_factors.append(self.lora_config.compute_hash()) - # LoRA creates static buffers based on max_num_batched_tokens. - # The tensor sizes and strides get captured in the torch.compile - # graph explicitly. - vllm_factors.append( - str(self.scheduler_config.max_num_batched_tokens)) - else: - vllm_factors.append("None") - if self.speculative_config: - vllm_factors.append(self.speculative_config.compute_hash()) - else: - vllm_factors.append("None") - if self.structured_outputs_config: - vllm_factors.append(self.structured_outputs_config.compute_hash()) - else: - vllm_factors.append("None") - if self.observability_config: - vllm_factors.append(self.observability_config.compute_hash()) - else: - vllm_factors.append("None") - if self.quant_config: - pass # should be captured by model_config.quantization - if self.compilation_config: - vllm_factors.append(self.compilation_config.compute_hash()) - else: - vllm_factors.append("None") - if self.kv_transfer_config: - vllm_factors.append(self.kv_transfer_config.compute_hash()) - else: - vllm_factors.append("None") - if self.additional_config: - if isinstance(additional_config := self.additional_config, dict): - additional_config_hash = hashlib.md5( - json.dumps(additional_config, sort_keys=True).encode(), - usedforsecurity=False, - ).hexdigest() - else: - additional_config_hash = additional_config.compute_hash() - vllm_factors.append(additional_config_hash) - else: - vllm_factors.append("None") - factors.append(vllm_factors) - - hash_str = hashlib.md5(str(factors).encode(), - usedforsecurity=False).hexdigest()[:10] - return hash_str - - def pad_for_cudagraph(self, batch_size: int) -> int: - # if batch_size > self.compilation_config.max_capture_size, - # it should raise an IndexError. - # the caller should make sure the batch_size is within the range, - # i.e., batch_size <= self.compilation_config.max_capture_size - return self.compilation_config.bs_to_padded_graph_size[batch_size] - - @staticmethod - def _get_quantization_config( - model_config: ModelConfig, - load_config: LoadConfig) -> Optional[QuantizationConfig]: - """Get the quantization config.""" - from vllm.platforms import current_platform - if model_config.quantization is not None: - from vllm.model_executor.model_loader.weight_utils import ( - get_quant_config) - quant_config = get_quant_config(model_config, load_config) - capability_tuple = current_platform.get_device_capability() - - if capability_tuple is not None: - capability = capability_tuple.to_int() - if capability < quant_config.get_min_capability(): - raise ValueError( - f"The quantization method {model_config.quantization} " - "is not supported for the current GPU. Minimum " - f"capability: {quant_config.get_min_capability()}. " - f"Current capability: {capability}.") - supported_dtypes = quant_config.get_supported_act_dtypes() - if model_config.dtype not in supported_dtypes: - raise ValueError( - f"{model_config.dtype} is not supported for quantization " - f"method {model_config.quantization}. Supported dtypes: " - f"{supported_dtypes}") - quant_config.maybe_update_config(model_config.model) - return quant_config - return None - - @staticmethod - def get_quantization_config( - model_config: ModelConfig, - load_config: LoadConfig) -> Optional[QuantizationConfig]: - import copy - - # For some reason, the _ version of this modifies the model_config - # object, so using deepcopy to avoid this problem. - return VllmConfig._get_quantization_config(copy.deepcopy(model_config), - load_config) - - def with_hf_config( - self, - hf_config: PretrainedConfig, - architectures: Optional[list[str]] = None, - ) -> "VllmConfig": - if architectures is not None: - hf_config = copy.deepcopy(hf_config) - hf_config.architectures = architectures - - model_config = copy.deepcopy(self.model_config) - model_config.hf_config = hf_config - - return replace(self, model_config=model_config) - - def __post_init__(self): - """Verify configs are valid & consistent with each other. - """ - - self.try_verify_and_update_config() - - if self.model_config is not None: - self.model_config.verify_with_parallel_config(self.parallel_config) - self.model_config.verify_dual_chunk_attention_config( - self.load_config) - - self.cache_config.verify_with_parallel_config(self.parallel_config) - - if self.lora_config is not None: - self.lora_config.verify_with_cache_config(self.cache_config) - self.lora_config.verify_with_model_config(self.model_config) - - if self.quant_config is None and self.model_config is not None: - self.quant_config = VllmConfig._get_quantization_config( - self.model_config, self.load_config) - - from vllm.platforms import current_platform - if self.model_config is not None and \ - self.scheduler_config.chunked_prefill_enabled and \ - self.model_config.dtype == torch.float32 and \ - current_platform.get_device_capability() == (7, 5): - logger.warning_once( - "Turing devices tensor cores do not support float32 matmul. " - "To workaround this limitation, vLLM will set 'ieee' input " - "precision for chunked prefill triton kernels.") - - # If the user does not explicitly set a compilation level, then - # we use the default level. The default level depends on other - # settings (see the below code). - if self.compilation_config.level is None: - if envs.VLLM_USE_V1: - if (self.model_config is not None - and not self.model_config.enforce_eager): - self.compilation_config.level = CompilationLevel.PIECEWISE - else: - self.compilation_config.level = \ - CompilationLevel.NO_COMPILATION - - else: - # NB: Passing both --enforce-eager and a compilation level - # in V0 means the compilation level wins out. - self.compilation_config.level = CompilationLevel.NO_COMPILATION - - # async tp is built on top of sequence parallelism - # and requires it to be enabled. - if self.compilation_config.pass_config.enable_async_tp: - self.compilation_config.pass_config.enable_sequence_parallelism = \ - True - if self.compilation_config.pass_config.enable_sequence_parallelism: - self.compilation_config.custom_ops.append("+rms_norm") - - if current_platform.support_static_graph_mode(): - # if cudagraph_mode is not explicitly set by users, set default - # value - if self.compilation_config.cudagraph_mode is None: - if envs.VLLM_USE_V1 and self.compilation_config.level \ - == CompilationLevel.PIECEWISE: - # default to full and piecewise for most models - self.compilation_config.cudagraph_mode = \ - CUDAGraphMode.FULL_AND_PIECEWISE - - # pooling models and encoder-decoder models - # do not support full cudagraphs - if self.model_config is not None and \ - (self.model_config.pooler_config is not None - or self.model_config.is_encoder_decoder): - self.compilation_config.cudagraph_mode = \ - CUDAGraphMode.PIECEWISE - else: - self.compilation_config.cudagraph_mode = CUDAGraphMode.NONE - - # disable cudagraph when enforce eager execution - if self.model_config is not None and \ - self.model_config.enforce_eager: - logger.info("Cudagraph is disabled under eager mode") - self.compilation_config.cudagraph_mode = CUDAGraphMode.NONE - elif envs.VLLM_USE_V1: - self.compilation_config.cudagraph_num_of_warmups = 1 - - self._set_cudagraph_sizes() - else: - self.compilation_config.cudagraph_mode = CUDAGraphMode.NONE - - if self.cache_config.kv_sharing_fast_prefill: - - if self.speculative_config is not None and \ - self.speculative_config.use_eagle(): - raise NotImplementedError( - "Fast prefill optimization for KV sharing is not " - "compatible with EAGLE as EAGLE requires correct logits " - "for all tokens while fast prefill gives incorrect logits " - "for prompt tokens.") - - logger.warning_once( - "--kv-sharing-fast-prefill requires changes on model side for " - "correctness and to realize prefill savings. ") - - disable_chunked_prefill_reasons: list[str] = [] - - if self.model_config: - if self.model_config.pooler_config: - pooling_type = self.model_config.pooler_config.pooling_type - if pooling_type is None or pooling_type.lower() != "last": - disable_chunked_prefill_reasons.append( - "Only \"last\" pooling supports chunked " - "prefill and prefix caching; disabling both.") - if not getattr(self.model_config.hf_config, "is_causal", True): - disable_chunked_prefill_reasons.append( - "Only models using causal attention supports chunked " - "prefill and prefix caching; disabling both.") - elif self.model_config.is_encoder_decoder: - self.scheduler_config.max_num_encoder_input_tokens = \ - MULTIMODAL_REGISTRY.get_encdec_max_encoder_len(self.model_config) - logger.debug( - "Encoder-decoder model detected: setting " - "`max_num_encoder_input_tokens` to encoder length (%s)", - self.scheduler_config.max_num_encoder_input_tokens) - self.scheduler_config.disable_chunked_mm_input = True - disable_chunked_prefill_reasons.append( - "Encoder-decoder models do not support chunked prefill nor" - " prefix caching; disabling both.") - if (self.model_config.architecture - == "WhisperForConditionalGeneration" - and os.environ.get("VLLM_WORKER_MULTIPROC_METHOD") - != "spawn"): - logger.warning( - "Whisper is known to have issues with " - "forked workers. If startup is hanging, " - "try setting 'VLLM_WORKER_MULTIPROC_METHOD' " - "to 'spawn'.") - - if disable_chunked_prefill_reasons: - for reason in disable_chunked_prefill_reasons: - logger.info(reason) - self.scheduler_config.chunked_prefill_enabled = False - self.scheduler_config.long_prefill_token_threshold = 0 - - if self.cache_config is not None: - self.cache_config.enable_prefix_caching = False - - if (self.kv_events_config is not None - and self.kv_events_config.enable_kv_cache_events - and not self.cache_config.enable_prefix_caching): - logger.warning( - "KV cache events are on, but prefix caching is not enabled." - "Use --enable-prefix-caching to enable.") - if (self.kv_events_config is not None - and self.kv_events_config.publisher != "null" - and not self.kv_events_config.enable_kv_cache_events): - logger.warning("KV cache events are disabled," - "but the scheduler is configured to publish them." - "Modify KVEventsConfig.enable_kv_cache_events" - "to True to enable.") - current_platform.check_and_update_config(self) - - # Do this after all the updates to compilation_config.level - if envs.VLLM_USE_V1 and \ - self.compilation_config.level == CompilationLevel.PIECEWISE: - self.compilation_config.set_splitting_ops_for_v1() - - # final check of cudagraph mode after all possible updates - if envs.VLLM_USE_V1 and current_platform.is_cuda_alike(): - if self.compilation_config.cudagraph_mode.has_full_cudagraphs()\ - and self.model_config is not None and \ - not self.model_config.disable_cascade_attn and\ - not self.compilation_config.cudagraph_mode.\ - has_piecewise_cudagraphs(): - logger.warning_once( - "No piecewise cudagraph for executing cascade attention." - " Will fall back to eager execution if a batch runs " - "into cascade attentions") - - if self.compilation_config.cudagraph_mode\ - .requires_piecewise_compilation(): - assert self.compilation_config.level == \ - CompilationLevel.PIECEWISE, \ - "Compilation level should be CompilationLevel.PIECEWISE "\ - "when cudagraph_mode piecewise cudagraphs is used, "\ - f"cudagraph_mode={self.compilation_config.cudagraph_mode}" - - # final migrate the deprecated flags - self.compilation_config.use_cudagraph = self.compilation_config.\ - cudagraph_mode!= CUDAGraphMode.NONE - self.compilation_config.full_cuda_graph = self.compilation_config.\ - cudagraph_mode.has_full_cudagraphs() - - if self.parallel_config.enable_dbo: - a2a_backend = envs.VLLM_ALL2ALL_BACKEND - assert a2a_backend in \ - ["deepep_low_latency", "deepep_high_throughput"], \ - "Microbatching currently only supports the deepep_low_latency and "\ - f"deepep_high_throughput all2all backend. {a2a_backend} is not "\ - "supported. To fix set the VLLM_ALL2ALL_BACKEND environment "\ - "variable to deepep_low_latency or deepep_high_throughput and "\ - "install the DeepEP kernels." - - if not self.model_config.disable_cascade_attn: - self.model_config.disable_cascade_attn = True - logger.warning_once( - "Disabling cascade attention when DBO is enabled.") - - if not self.instance_id: - self.instance_id = random_uuid()[:5] - - if (envs.VLLM_USE_V1 - and not self.scheduler_config.disable_hybrid_kv_cache_manager): - # logger should only print warning message for hybrid models. As we - # can't know whether the model is hybrid or not now, so we don't log - # warning message here and will log it later. - if not current_platform.support_hybrid_kv_cache(): - # Hybrid KV cache manager is not supported on non-GPU platforms. - self.scheduler_config.disable_hybrid_kv_cache_manager = True - if self.kv_transfer_config is not None: - # Hybrid KV cache manager is not compatible with KV transfer. - self.scheduler_config.disable_hybrid_kv_cache_manager = True - if self.kv_events_config is not None: - # Hybrid KV cache manager is not compatible with KV events. - self.scheduler_config.disable_hybrid_kv_cache_manager = True - if self.model_config is not None and \ - self.model_config.attention_chunk_size is not None: - if self.speculative_config is not None and \ - self.speculative_config.use_eagle(): - # Hybrid KV cache manager is not yet supported with chunked - # local attention + eagle. - self.scheduler_config.disable_hybrid_kv_cache_manager = True - elif \ - not envs.VLLM_ALLOW_CHUNKED_LOCAL_ATTN_WITH_HYBRID_KV_CACHE: - logger.warning( - "There is a latency regression when using chunked local" - " attention with the hybrid KV cache manager. Disabling" - " it, by default. To enable it, set the environment " - "VLLM_ALLOW_CHUNKED_LOCAL_ATTN_WITH_HYBRID_KV_CACHE=1." - ) - # Hybrid KV cache manager is not yet supported with chunked - # local attention. - self.scheduler_config.disable_hybrid_kv_cache_manager = True - - if self.compilation_config.debug_dump_path: - self.compilation_config.debug_dump_path = \ - self.compilation_config.debug_dump_path.absolute().expanduser() - if envs.VLLM_DEBUG_DUMP_PATH is not None: - env_path = Path(envs.VLLM_DEBUG_DUMP_PATH).absolute().expanduser() - if self.compilation_config.debug_dump_path: - logger.warning( - "Config-specified debug dump path is overridden" - " by VLLM_DEBUG_DUMP_PATH to %s", env_path) - self.compilation_config.debug_dump_path = env_path - - def update_sizes_for_sequence_parallelism(self, - possible_sizes: list) -> list: - # remove the sizes that not multiple of tp_size when - # enable sequence parallelism - removed_sizes = [ - size for size in possible_sizes - if size % self.parallel_config.tensor_parallel_size != 0 - ] - if removed_sizes: - logger.warning( - "Batch sizes %s are removed because they are not " - "multiple of tp_size %d when " - "sequence parallelism is enabled", removed_sizes, - self.parallel_config.tensor_parallel_size) - - return [ - size for size in possible_sizes - if size % self.parallel_config.tensor_parallel_size == 0 - ] - - def _set_cudagraph_sizes(self): - """ - vLLM defines the default candidate list of batch sizes for CUDA graph - capture as: - - ```python - max_graph_size = min(max_num_seqs * 2, 512) - # 1, 2, 4, then multiples of 8 up to max_graph_size - cuda_graph_sizes = [1, 2, 4, 8, 16, 24, 32, 40, ..., max_graph_size] - - In the end, `vllm_config.compilation_config.cudagraph_capture_sizes` - will be the final sizes to capture cudagraph (in descending order). - - These sizes are used to capture and reuse CUDA graphs for - performance-critical paths (e.g., decoding). Capturing enables - significantly faster kernel dispatch by avoiding Python overhead. The - list is then filtered based on `max_num_batched_tokens` (e.g., 8192 on - most GPUs), which controls the total allowed number of tokens in a - batch. Since each sequence may have a variable number of tokens, the - maximum usable batch size will depend on actual sequence lengths. - - Example: - With `max_num_batched_tokens = 8192`, and typical sequences - averaging ~32 tokens, most practical batch sizes fall below 256. - However, the system will still allow capture sizes up to 512 if - shape and memory permit. - - Note: - If users explicitly specify cudagraph capture sizes in the - compilation config, those will override this default logic. - At runtime: - - - If batch size <= one of the `cudagraph_capture_sizes`, the closest - padded CUDA graph will be used. - - If batch size > largest `cudagraph_capture_sizes`, cudagraph will - not be used. - """ - - # calculate the default `batch_size_capture_list` - batch_size_capture_list = [] - if self.model_config is not None and \ - not self.model_config.enforce_eager: - cuda_graph_sizes = self.scheduler_config.cuda_graph_sizes - if len(cuda_graph_sizes) == 1: - batch_size_capture_list = [1, 2, 4] + [ - i for i in range(8, cuda_graph_sizes[0] + 1, 8) - ] - elif len(cuda_graph_sizes) > 1: - batch_size_capture_list = sorted(cuda_graph_sizes) - else: - raise TypeError(f"Invalid value for {cuda_graph_sizes=}.") - if self.parallel_config.tensor_parallel_size > 1 and \ - self.compilation_config.pass_config.enable_sequence_parallelism: - batch_size_capture_list = \ - self.update_sizes_for_sequence_parallelism(batch_size_capture_list) - max_num_tokens = self.scheduler_config.max_num_batched_tokens - batch_size_capture_list = [ - size for size in batch_size_capture_list - if size <= max_num_tokens - ] - - self.compilation_config.init_with_cudagraph_sizes( - batch_size_capture_list) - - def recalculate_max_model_len(self, max_model_len: int): - # Can only be called in try_verify_and_update_config - model_config = self.model_config - max_model_len = model_config.get_and_verify_max_len(max_model_len) - self.model_config.max_model_len = max_model_len - self.scheduler_config.max_model_len = max_model_len - - def try_verify_and_update_config(self): - if self.model_config is None: - return - - # Avoid running try_verify_and_update_config multiple times - if getattr(self.model_config, "config_updated", False): - return - self.model_config.config_updated = True - - architecture = self.model_config.architecture - if architecture is None: - return - - from vllm.model_executor.models.config import ( - MODELS_CONFIG_MAP, HybridAttentionMambaModelConfig) - cls = MODELS_CONFIG_MAP.get(architecture, None) - if cls is not None: - cls.verify_and_update_config(self) - - if self.model_config.is_hybrid: - HybridAttentionMambaModelConfig.verify_and_update_config(self) - - if self.model_config.convert_type == "classify": - # Maybe convert ForCausalLM into ForSequenceClassification model. - from vllm.model_executor.models.adapters import ( - SequenceClassificationConfig) - SequenceClassificationConfig.verify_and_update_config(self) - - if hasattr(self.model_config, "model_weights") and is_runai_obj_uri( - self.model_config.model_weights): - if self.load_config.load_format == "auto": - logger.info("Detected Run:ai model config. " - "Overriding `load_format` to 'runai_streamer'") - self.load_config.load_format = "runai_streamer" - elif self.load_config.load_format != "runai_streamer": - raise ValueError(f"To load a model from S3, 'load_format' " - f"must be 'runai_streamer', " - f"but got '{self.load_config.load_format}'. " - f"Model: {self.model_config.model}") - - def compile_debug_dump_path(self) -> Optional[Path]: - """Returns a rank-aware path for dumping - torch.compile debug information. - """ - if self.compilation_config.debug_dump_path is None: - return None - tp_rank = self.parallel_config.rank - dp_rank = self.parallel_config.data_parallel_rank - data_parallel_size = self.parallel_config.data_parallel_size - append_path = f"rank_{tp_rank}" if data_parallel_size == 1 \ - else f"rank_{tp_rank}_dp_{dp_rank}" - path = self.compilation_config.debug_dump_path / append_path - return path - - def __str__(self): - return ( - f"model={self.model_config.model!r}, " - f"speculative_config={self.speculative_config!r}, " - f"tokenizer={self.model_config.tokenizer!r}, " - f"skip_tokenizer_init={self.model_config.skip_tokenizer_init}, " - f"tokenizer_mode={self.model_config.tokenizer_mode}, " - f"revision={self.model_config.revision}, " - f"tokenizer_revision={self.model_config.tokenizer_revision}, " - f"trust_remote_code={self.model_config.trust_remote_code}, " - f"dtype={self.model_config.dtype}, " - f"max_seq_len={self.model_config.max_model_len}, " - f"download_dir={self.load_config.download_dir!r}, " - f"load_format={self.load_config.load_format}, " - f"tensor_parallel_size={self.parallel_config.tensor_parallel_size}, " # noqa - f"pipeline_parallel_size={self.parallel_config.pipeline_parallel_size}, " # noqa - f"data_parallel_size={self.parallel_config.data_parallel_size}, " # noqa - f"disable_custom_all_reduce={self.parallel_config.disable_custom_all_reduce}, " # noqa - f"quantization={self.model_config.quantization}, " - f"enforce_eager={self.model_config.enforce_eager}, " - f"kv_cache_dtype={self.cache_config.cache_dtype}, " - f"device_config={self.device_config.device}, " - f"structured_outputs_config={self.structured_outputs_config!r}, " - f"observability_config={self.observability_config!r}, " - f"seed={self.model_config.seed}, " - f"served_model_name={self.model_config.served_model_name}, " - f"enable_prefix_caching={self.cache_config.enable_prefix_caching}, " - f"chunked_prefill_enabled={self.scheduler_config.chunked_prefill_enabled}, " # noqa - f"pooler_config={self.model_config.pooler_config!r}, " - f"compilation_config={self.compilation_config!r}") - - -_current_vllm_config: Optional[VllmConfig] = None -_current_prefix: Optional[str] = None - - -@contextmanager -def set_current_vllm_config(vllm_config: VllmConfig, - check_compile=False, - prefix: Optional[str] = None): - """ - Temporarily set the current vLLM config. - Used during model initialization. - We save the current vLLM config in a global variable, - so that all modules can access it, e.g. custom ops - can access the vLLM config to determine how to dispatch. - """ - global _current_vllm_config, _current_prefix - old_vllm_config = _current_vllm_config - old_prefix = _current_prefix - from vllm.compilation.counter import compilation_counter - num_models_seen = compilation_counter.num_models_seen - try: - _current_vllm_config = vllm_config - _current_prefix = prefix - yield - except Exception: - raise - else: - if check_compile: - vllm_config.compilation_config.custom_op_log_check() - - if check_compile and \ - vllm_config.compilation_config.level == CompilationLevel.PIECEWISE \ - and compilation_counter.num_models_seen == num_models_seen: - # If the model supports compilation, - # compilation_counter.num_models_seen should be increased - # by at least 1. - # If it is not increased, it means the model does not support - # compilation (does not have @support_torch_compile decorator). - logger.warning( - "`torch.compile` is turned on, but the model %s" - " does not support it. Please open an issue on GitHub" - " if you want it to be supported.", - vllm_config.model_config.model) - finally: - _current_vllm_config = old_vllm_config - _current_prefix = old_prefix - # Clear the compilation config cache when context changes - get_cached_compilation_config.cache_clear() - - -@lru_cache(maxsize=1) -def get_cached_compilation_config(): - """Cache config to avoid repeated calls to get_current_vllm_config()""" - return get_current_vllm_config().compilation_config - - -def get_current_vllm_config() -> VllmConfig: - if _current_vllm_config is None: - # in ci, usually when we test custom ops/modules directly, - # we don't set the vllm config. In that case, we set a default - # config. - logger.warning("Current vLLM config is not set.") - from vllm.config import VllmConfig - return VllmConfig() - return _current_vllm_config - - -def get_current_model_prefix() -> str: - """ - Get the prefix of the model that's currently being initialized. - """ - assert _current_prefix is not None, \ - "Current model prefix is not set. " - return _current_prefix - - -T = TypeVar("T") - - -def get_layers_from_vllm_config( - vllm_config: VllmConfig, - layer_type: type[T], - layer_names: Optional[list[str]] = None) -> dict[str, T]: - """ - Get layers from the vLLM config. - - Args: - vllm_config: The vLLM config. - layer_type: The type of the layer to get. - layer_names: The names of the layers to get. If None, return all layers. - """ - - if layer_names is None: - layer_names = list( - vllm_config.compilation_config.static_forward_context.keys()) - - forward_context = vllm_config.compilation_config.static_forward_context - - return { - layer_name: forward_context[layer_name] - for layer_name in layer_names - if isinstance(forward_context[layer_name], layer_type) - } - - -def update_config(config: DataclassInstanceT, - overrides: dict[str, Any]) -> DataclassInstanceT: - processed_overrides = {} - for field_name, value in overrides.items(): - assert hasattr( - config, field_name), f"{type(config)} has no field `{field_name}`" - current_value = getattr(config, field_name) - if is_dataclass(current_value) and not is_dataclass(value): - assert isinstance(value, dict), ( - f"Overrides to {type(config)}.{field_name} must be a dict" - f" or {type(current_value)}, but got {type(value)}") - value = update_config( - current_value, # type: ignore[type-var] - value) - processed_overrides[field_name] = value - return replace(config, **processed_overrides) +from vllm.config.utils import (ConfigType, SupportsMetricsInfo, config, + get_attr_docs, is_init_field, update_config) +from vllm.config.vllm import (VllmConfig, get_cached_compilation_config, + get_current_vllm_config, + get_layers_from_vllm_config, + set_current_vllm_config) + +__all__ = [ + # From vllm.config.cache + "BlockSize", + "CacheConfig", + "CacheDType", + "MambaDType", + "PrefixCachingHashAlgo", + # From vllm.config.compilation + "CompilationConfig", + "CompilationLevel", + "CUDAGraphMode", + "PassConfig", + # From vllm.config.device + "Device", + "DeviceConfig", + # From vllm.config.kv_events + "KVEventsConfig", + # From vllm.config.kv_transfer + "KVTransferConfig", + # From vllm.config.load + "LoadConfig", + # From vllm.config.lora + "LoRAConfig", + # From vllm.config.model + "ConvertOption", + "HfOverrides", + "LogprobsMode", + "ModelConfig", + "ModelDType", + "ModelImpl", + "RunnerOption", + "TaskOption", + "TokenizerMode", + "iter_architecture_defaults", + "try_match_architecture_defaults", + # From vllm.config.multimodal + "MMCacheType", + "MMEncoderTPMode", + "MultiModalConfig", + # From vllm.config.observability + "DetailedTraceModules", + "ObservabilityConfig", + # From vllm.config.parallel + "DistributedExecutorBackend", + "EPLBConfig", + "ParallelConfig", + # From vllm.config.pooler + "PoolerConfig", + # From vllm.config.scheduler + "RunnerType", + "SchedulerConfig", + "SchedulerPolicy", + # From vllm.config.speculative + "SpeculativeConfig", + # From vllm.config.speech_to_text + "SpeechToTextConfig", + # From vllm.config.structured_outputs + "StructuredOutputsConfig", + # From vllm.config.utils + "ConfigType", + "SupportsMetricsInfo", + "config", + "get_attr_docs", + "is_init_field", + "update_config", + # From vllm.config.vllm + "VllmConfig", + "get_cached_compilation_config", + "get_current_vllm_config", + "set_current_vllm_config", + "get_layers_from_vllm_config", +] diff --git a/vllm/config/utils.py b/vllm/config/utils.py index 91e61b330273..2da30cbf149c 100644 --- a/vllm/config/utils.py +++ b/vllm/config/utils.py @@ -1,21 +1,21 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project - +"""Utility functions for vLLM config dataclasses.""" import ast import inspect import textwrap -from dataclasses import MISSING, Field, field, fields, is_dataclass -from typing import TYPE_CHECKING, Any, TypeVar +from dataclasses import MISSING, Field, field, fields, is_dataclass, replace +from typing import TYPE_CHECKING, Any, Protocol, TypeVar import regex as re +from typing_extensions import runtime_checkable if TYPE_CHECKING: from _typeshed import DataclassInstance - - ConfigType = type[DataclassInstance] else: - ConfigType = type + DataclassInstance = Any +ConfigType = type[DataclassInstance] ConfigT = TypeVar("ConfigT", bound=ConfigType) @@ -143,3 +143,33 @@ def pairwise(iterable): def is_init_field(cls: ConfigType, name: str) -> bool: return next(f for f in fields(cls) if f.name == name).init + + +@runtime_checkable +class SupportsHash(Protocol): + + def compute_hash(self) -> str: + ... + + +class SupportsMetricsInfo(Protocol): + + def metrics_info(self) -> dict[str, str]: + ... + + +def update_config(config: ConfigT, overrides: dict[str, Any]) -> ConfigT: + processed_overrides = {} + for field_name, value in overrides.items(): + assert hasattr( + config, field_name), f"{type(config)} has no field `{field_name}`" + current_value = getattr(config, field_name) + if is_dataclass(current_value) and not is_dataclass(value): + assert isinstance(value, dict), ( + f"Overrides to {type(config)}.{field_name} must be a dict" + f" or {type(current_value)}, but got {type(value)}") + value = update_config( + current_value, # type: ignore[type-var] + value) + processed_overrides[field_name] = value + return replace(config, **processed_overrides) diff --git a/vllm/config/vllm.py b/vllm/config/vllm.py new file mode 100644 index 000000000000..7336f5756527 --- /dev/null +++ b/vllm/config/vllm.py @@ -0,0 +1,789 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project + +import copy +import hashlib +import json +import os +from contextlib import contextmanager +from dataclasses import field, replace +from functools import lru_cache +from pathlib import Path +from typing import TYPE_CHECKING, Any, Optional, TypeVar, Union + +import torch +from pydantic import ConfigDict +from pydantic.dataclasses import dataclass + +import vllm.envs as envs +from vllm.logger import init_logger +from vllm.transformers_utils.runai_utils import is_runai_obj_uri +from vllm.utils import random_uuid + +from .cache import CacheConfig +from .compilation import CompilationConfig, CompilationLevel, CUDAGraphMode +from .device import DeviceConfig +from .kv_events import KVEventsConfig +from .kv_transfer import KVTransferConfig +from .load import LoadConfig +from .lora import LoRAConfig +from .model import ModelConfig +from .observability import ObservabilityConfig +from .parallel import ParallelConfig +from .scheduler import SchedulerConfig +from .speculative import SpeculativeConfig +from .structured_outputs import StructuredOutputsConfig +from .utils import SupportsHash, config + +if TYPE_CHECKING: + from transformers import PretrainedConfig + + from vllm.model_executor.layers.quantization.base_config import ( + QuantizationConfig) +else: + PretrainedConfig = Any + + QuantizationConfig = Any + +logger = init_logger(__name__) + + +@config +@dataclass(config=ConfigDict(arbitrary_types_allowed=True)) +class VllmConfig: + """Dataclass which contains all vllm-related configuration. This + simplifies passing around the distinct configurations in the codebase. + """ + + # TODO: use default_factory once default constructing ModelConfig doesn't + # try to download a model + model_config: ModelConfig = None # type: ignore + """Model configuration.""" + cache_config: CacheConfig = field(default_factory=CacheConfig) + """Cache configuration.""" + parallel_config: ParallelConfig = field(default_factory=ParallelConfig) + """Parallel configuration.""" + scheduler_config: SchedulerConfig = field(default_factory=SchedulerConfig) + """Scheduler configuration.""" + device_config: DeviceConfig = field(default_factory=DeviceConfig) + """Device configuration.""" + load_config: LoadConfig = field(default_factory=LoadConfig) + """Load configuration.""" + lora_config: Optional[LoRAConfig] = None + """LoRA configuration.""" + speculative_config: Optional[SpeculativeConfig] = None + """Speculative decoding configuration.""" + structured_outputs_config: StructuredOutputsConfig = field( + default_factory=StructuredOutputsConfig) + """Structured outputs configuration.""" + observability_config: Optional[ObservabilityConfig] = None + """Observability configuration.""" + quant_config: Optional[QuantizationConfig] = None + """Quantization configuration.""" + compilation_config: CompilationConfig = field( + default_factory=CompilationConfig) + """`torch.compile` and cudagraph capture configuration for the model. + + As a shorthand, `-O` can be used to directly specify the compilation + level `n`: `-O3` is equivalent to `-O.level=3` (same as `-O='{"level":3}'`). + Currently, -O and -O= are supported as well but this will likely be + removed in favor of clearer -O syntax in the future. + + NOTE: level 0 is the default level without any optimization. level 1 and 2 + are for internal testing only. level 3 is the recommended level for + production, also default in V1. + + You can specify the full compilation config like so: + `{"level": 3, "cudagraph_capture_sizes": [1, 2, 4, 8]}` + """ + kv_transfer_config: Optional[KVTransferConfig] = None + """The configurations for distributed KV cache transfer.""" + kv_events_config: Optional[KVEventsConfig] = None + """The configurations for event publishing.""" + # some opaque config, only used to provide additional information + # for the hash computation, mainly used for testing, debugging or out of + # tree config registration. + additional_config: Union[dict, SupportsHash] = field(default_factory=dict) + """Additional config for specified platform. Different platforms may + support different configs. Make sure the configs are valid for the platform + you are using. Contents must be hashable.""" + instance_id: str = "" + """The ID of the vLLM instance.""" + + def compute_hash(self) -> str: + """ + WARNING: Whenever a new field is added to this config, + ensure that it is included in the factors list if + it affects the computation graph. + + Provide a hash that uniquely identifies all the configs + that affect the structure of the computation + graph from input ids/embeddings to the final hidden states, + excluding anything before input ids/embeddings and after + the final hidden states. + """ + factors: list[Any] = [] + + # summarize vllm config + vllm_factors: list[Any] = [] + from vllm import __version__ + vllm_factors.append(__version__) + vllm_factors.append(envs.VLLM_USE_V1) + if self.model_config: + vllm_factors.append(self.model_config.compute_hash()) + else: + vllm_factors.append("None") + if self.cache_config: + vllm_factors.append(self.cache_config.compute_hash()) + else: + vllm_factors.append("None") + if self.parallel_config: + vllm_factors.append(self.parallel_config.compute_hash()) + else: + vllm_factors.append("None") + if self.scheduler_config: + vllm_factors.append(self.scheduler_config.compute_hash()) + else: + vllm_factors.append("None") + if self.device_config: + vllm_factors.append(self.device_config.compute_hash()) + else: + vllm_factors.append("None") + if self.load_config: + vllm_factors.append(self.load_config.compute_hash()) + else: + vllm_factors.append("None") + if self.lora_config: + vllm_factors.append(self.lora_config.compute_hash()) + # LoRA creates static buffers based on max_num_batched_tokens. + # The tensor sizes and strides get captured in the torch.compile + # graph explicitly. + vllm_factors.append( + str(self.scheduler_config.max_num_batched_tokens)) + else: + vllm_factors.append("None") + if self.speculative_config: + vllm_factors.append(self.speculative_config.compute_hash()) + else: + vllm_factors.append("None") + if self.structured_outputs_config: + vllm_factors.append(self.structured_outputs_config.compute_hash()) + else: + vllm_factors.append("None") + if self.observability_config: + vllm_factors.append(self.observability_config.compute_hash()) + else: + vllm_factors.append("None") + if self.quant_config: + pass # should be captured by model_config.quantization + if self.compilation_config: + vllm_factors.append(self.compilation_config.compute_hash()) + else: + vllm_factors.append("None") + if self.kv_transfer_config: + vllm_factors.append(self.kv_transfer_config.compute_hash()) + else: + vllm_factors.append("None") + if self.additional_config: + if isinstance(additional_config := self.additional_config, dict): + additional_config_hash = hashlib.md5( + json.dumps(additional_config, sort_keys=True).encode(), + usedforsecurity=False, + ).hexdigest() + else: + additional_config_hash = additional_config.compute_hash() + vllm_factors.append(additional_config_hash) + else: + vllm_factors.append("None") + factors.append(vllm_factors) + + hash_str = hashlib.md5(str(factors).encode(), + usedforsecurity=False).hexdigest()[:10] + return hash_str + + def pad_for_cudagraph(self, batch_size: int) -> int: + # if batch_size > self.compilation_config.max_capture_size, + # it should raise an IndexError. + # the caller should make sure the batch_size is within the range, + # i.e., batch_size <= self.compilation_config.max_capture_size + return self.compilation_config.bs_to_padded_graph_size[batch_size] + + @staticmethod + def _get_quantization_config( + model_config: ModelConfig, + load_config: LoadConfig) -> Optional[QuantizationConfig]: + """Get the quantization config.""" + from vllm.platforms import current_platform + if model_config.quantization is not None: + from vllm.model_executor.model_loader.weight_utils import ( + get_quant_config) + quant_config = get_quant_config(model_config, load_config) + capability_tuple = current_platform.get_device_capability() + + if capability_tuple is not None: + capability = capability_tuple.to_int() + if capability < quant_config.get_min_capability(): + raise ValueError( + f"The quantization method {model_config.quantization} " + "is not supported for the current GPU. Minimum " + f"capability: {quant_config.get_min_capability()}. " + f"Current capability: {capability}.") + supported_dtypes = quant_config.get_supported_act_dtypes() + if model_config.dtype not in supported_dtypes: + raise ValueError( + f"{model_config.dtype} is not supported for quantization " + f"method {model_config.quantization}. Supported dtypes: " + f"{supported_dtypes}") + quant_config.maybe_update_config(model_config.model) + return quant_config + return None + + @staticmethod + def get_quantization_config( + model_config: ModelConfig, + load_config: LoadConfig) -> Optional[QuantizationConfig]: + import copy + + # For some reason, the _ version of this modifies the model_config + # object, so using deepcopy to avoid this problem. + return VllmConfig._get_quantization_config(copy.deepcopy(model_config), + load_config) + + def with_hf_config( + self, + hf_config: PretrainedConfig, + architectures: Optional[list[str]] = None, + ) -> "VllmConfig": + if architectures is not None: + hf_config = copy.deepcopy(hf_config) + hf_config.architectures = architectures + + model_config = copy.deepcopy(self.model_config) + model_config.hf_config = hf_config + + return replace(self, model_config=model_config) + + def __post_init__(self): + """Verify configs are valid & consistent with each other. + """ + + self.try_verify_and_update_config() + + if self.model_config is not None: + self.model_config.verify_with_parallel_config(self.parallel_config) + self.model_config.verify_dual_chunk_attention_config( + self.load_config) + + self.cache_config.verify_with_parallel_config(self.parallel_config) + + if self.lora_config is not None: + self.lora_config.verify_with_cache_config(self.cache_config) + self.lora_config.verify_with_model_config(self.model_config) + + if self.quant_config is None and self.model_config is not None: + self.quant_config = VllmConfig._get_quantization_config( + self.model_config, self.load_config) + + from vllm.platforms import current_platform + if self.model_config is not None and \ + self.scheduler_config.chunked_prefill_enabled and \ + self.model_config.dtype == torch.float32 and \ + current_platform.get_device_capability() == (7, 5): + logger.warning_once( + "Turing devices tensor cores do not support float32 matmul. " + "To workaround this limitation, vLLM will set 'ieee' input " + "precision for chunked prefill triton kernels.") + + # If the user does not explicitly set a compilation level, then + # we use the default level. The default level depends on other + # settings (see the below code). + if self.compilation_config.level is None: + if envs.VLLM_USE_V1: + if (self.model_config is not None + and not self.model_config.enforce_eager): + self.compilation_config.level = CompilationLevel.PIECEWISE + else: + self.compilation_config.level = \ + CompilationLevel.NO_COMPILATION + + else: + # NB: Passing both --enforce-eager and a compilation level + # in V0 means the compilation level wins out. + self.compilation_config.level = CompilationLevel.NO_COMPILATION + + # async tp is built on top of sequence parallelism + # and requires it to be enabled. + if self.compilation_config.pass_config.enable_async_tp: + self.compilation_config.pass_config.enable_sequence_parallelism = \ + True + if self.compilation_config.pass_config.enable_sequence_parallelism: + self.compilation_config.custom_ops.append("+rms_norm") + + if current_platform.support_static_graph_mode(): + # if cudagraph_mode is not explicitly set by users, set default + # value + if self.compilation_config.cudagraph_mode is None: + if envs.VLLM_USE_V1 and self.compilation_config.level \ + == CompilationLevel.PIECEWISE: + # default to full and piecewise for most models + self.compilation_config.cudagraph_mode = \ + CUDAGraphMode.FULL_AND_PIECEWISE + + # pooling models and encoder-decoder models + # do not support full cudagraphs + if self.model_config is not None and \ + (self.model_config.pooler_config is not None + or self.model_config.is_encoder_decoder): + self.compilation_config.cudagraph_mode = \ + CUDAGraphMode.PIECEWISE + else: + self.compilation_config.cudagraph_mode = CUDAGraphMode.NONE + + # disable cudagraph when enforce eager execution + if self.model_config is not None and \ + self.model_config.enforce_eager: + logger.info("Cudagraph is disabled under eager mode") + self.compilation_config.cudagraph_mode = CUDAGraphMode.NONE + elif envs.VLLM_USE_V1: + self.compilation_config.cudagraph_num_of_warmups = 1 + + self._set_cudagraph_sizes() + else: + self.compilation_config.cudagraph_mode = CUDAGraphMode.NONE + + if self.cache_config.kv_sharing_fast_prefill: + + if self.speculative_config is not None and \ + self.speculative_config.use_eagle(): + raise NotImplementedError( + "Fast prefill optimization for KV sharing is not " + "compatible with EAGLE as EAGLE requires correct logits " + "for all tokens while fast prefill gives incorrect logits " + "for prompt tokens.") + + logger.warning_once( + "--kv-sharing-fast-prefill requires changes on model side for " + "correctness and to realize prefill savings. ") + + disable_chunked_prefill_reasons: list[str] = [] + + if self.model_config: + if self.model_config.pooler_config: + pooling_type = self.model_config.pooler_config.pooling_type + if pooling_type is None or pooling_type.lower() != "last": + disable_chunked_prefill_reasons.append( + "Only \"last\" pooling supports chunked " + "prefill and prefix caching; disabling both.") + if not getattr(self.model_config.hf_config, "is_causal", True): + disable_chunked_prefill_reasons.append( + "Only models using causal attention supports chunked " + "prefill and prefix caching; disabling both.") + elif self.model_config.is_encoder_decoder: + from vllm.multimodal import MULTIMODAL_REGISTRY + self.scheduler_config.max_num_encoder_input_tokens = \ + MULTIMODAL_REGISTRY.get_encdec_max_encoder_len(self.model_config) + logger.debug( + "Encoder-decoder model detected: setting " + "`max_num_encoder_input_tokens` to encoder length (%s)", + self.scheduler_config.max_num_encoder_input_tokens) + self.scheduler_config.disable_chunked_mm_input = True + disable_chunked_prefill_reasons.append( + "Encoder-decoder models do not support chunked prefill nor" + " prefix caching; disabling both.") + if (self.model_config.architecture + == "WhisperForConditionalGeneration" + and os.environ.get("VLLM_WORKER_MULTIPROC_METHOD") + != "spawn"): + logger.warning( + "Whisper is known to have issues with " + "forked workers. If startup is hanging, " + "try setting 'VLLM_WORKER_MULTIPROC_METHOD' " + "to 'spawn'.") + + if disable_chunked_prefill_reasons: + for reason in disable_chunked_prefill_reasons: + logger.info(reason) + self.scheduler_config.chunked_prefill_enabled = False + self.scheduler_config.long_prefill_token_threshold = 0 + + if self.cache_config is not None: + self.cache_config.enable_prefix_caching = False + + if (self.kv_events_config is not None + and self.kv_events_config.enable_kv_cache_events + and not self.cache_config.enable_prefix_caching): + logger.warning( + "KV cache events are on, but prefix caching is not enabled." + "Use --enable-prefix-caching to enable.") + if (self.kv_events_config is not None + and self.kv_events_config.publisher != "null" + and not self.kv_events_config.enable_kv_cache_events): + logger.warning("KV cache events are disabled," + "but the scheduler is configured to publish them." + "Modify KVEventsConfig.enable_kv_cache_events" + "to True to enable.") + current_platform.check_and_update_config(self) + + # Do this after all the updates to compilation_config.level + if envs.VLLM_USE_V1 and \ + self.compilation_config.level == CompilationLevel.PIECEWISE: + self.compilation_config.set_splitting_ops_for_v1() + + # final check of cudagraph mode after all possible updates + if envs.VLLM_USE_V1 and current_platform.is_cuda_alike(): + if self.compilation_config.cudagraph_mode.has_full_cudagraphs()\ + and self.model_config is not None and \ + not self.model_config.disable_cascade_attn and\ + not self.compilation_config.cudagraph_mode.\ + has_piecewise_cudagraphs(): + logger.warning_once( + "No piecewise cudagraph for executing cascade attention." + " Will fall back to eager execution if a batch runs " + "into cascade attentions") + + if self.compilation_config.cudagraph_mode\ + .requires_piecewise_compilation(): + assert self.compilation_config.level == \ + CompilationLevel.PIECEWISE, \ + "Compilation level should be CompilationLevel.PIECEWISE "\ + "when cudagraph_mode piecewise cudagraphs is used, "\ + f"cudagraph_mode={self.compilation_config.cudagraph_mode}" + + # final migrate the deprecated flags + self.compilation_config.use_cudagraph = self.compilation_config.\ + cudagraph_mode!= CUDAGraphMode.NONE + self.compilation_config.full_cuda_graph = self.compilation_config.\ + cudagraph_mode.has_full_cudagraphs() + + if self.parallel_config.enable_dbo: + a2a_backend = envs.VLLM_ALL2ALL_BACKEND + assert a2a_backend in \ + ["deepep_low_latency", "deepep_high_throughput"], \ + "Microbatching currently only supports the deepep_low_latency and "\ + f"deepep_high_throughput all2all backend. {a2a_backend} is not "\ + "supported. To fix set the VLLM_ALL2ALL_BACKEND environment "\ + "variable to deepep_low_latency or deepep_high_throughput and "\ + "install the DeepEP kernels." + + if not self.model_config.disable_cascade_attn: + self.model_config.disable_cascade_attn = True + logger.warning_once( + "Disabling cascade attention when DBO is enabled.") + + if not self.instance_id: + self.instance_id = random_uuid()[:5] + + if (envs.VLLM_USE_V1 + and not self.scheduler_config.disable_hybrid_kv_cache_manager): + # logger should only print warning message for hybrid models. As we + # can't know whether the model is hybrid or not now, so we don't log + # warning message here and will log it later. + if not current_platform.support_hybrid_kv_cache(): + # Hybrid KV cache manager is not supported on non-GPU platforms. + self.scheduler_config.disable_hybrid_kv_cache_manager = True + if self.kv_transfer_config is not None: + # Hybrid KV cache manager is not compatible with KV transfer. + self.scheduler_config.disable_hybrid_kv_cache_manager = True + if self.kv_events_config is not None: + # Hybrid KV cache manager is not compatible with KV events. + self.scheduler_config.disable_hybrid_kv_cache_manager = True + if self.model_config is not None and \ + self.model_config.attention_chunk_size is not None: + if self.speculative_config is not None and \ + self.speculative_config.use_eagle(): + # Hybrid KV cache manager is not yet supported with chunked + # local attention + eagle. + self.scheduler_config.disable_hybrid_kv_cache_manager = True + elif \ + not envs.VLLM_ALLOW_CHUNKED_LOCAL_ATTN_WITH_HYBRID_KV_CACHE: + logger.warning( + "There is a latency regression when using chunked local" + " attention with the hybrid KV cache manager. Disabling" + " it, by default. To enable it, set the environment " + "VLLM_ALLOW_CHUNKED_LOCAL_ATTN_WITH_HYBRID_KV_CACHE=1." + ) + # Hybrid KV cache manager is not yet supported with chunked + # local attention. + self.scheduler_config.disable_hybrid_kv_cache_manager = True + + if self.compilation_config.debug_dump_path: + self.compilation_config.debug_dump_path = \ + self.compilation_config.debug_dump_path.absolute().expanduser() + if envs.VLLM_DEBUG_DUMP_PATH is not None: + env_path = Path(envs.VLLM_DEBUG_DUMP_PATH).absolute().expanduser() + if self.compilation_config.debug_dump_path: + logger.warning( + "Config-specified debug dump path is overridden" + " by VLLM_DEBUG_DUMP_PATH to %s", env_path) + self.compilation_config.debug_dump_path = env_path + + def update_sizes_for_sequence_parallelism(self, + possible_sizes: list) -> list: + # remove the sizes that not multiple of tp_size when + # enable sequence parallelism + removed_sizes = [ + size for size in possible_sizes + if size % self.parallel_config.tensor_parallel_size != 0 + ] + if removed_sizes: + logger.warning( + "Batch sizes %s are removed because they are not " + "multiple of tp_size %d when " + "sequence parallelism is enabled", removed_sizes, + self.parallel_config.tensor_parallel_size) + + return [ + size for size in possible_sizes + if size % self.parallel_config.tensor_parallel_size == 0 + ] + + def _set_cudagraph_sizes(self): + """ + vLLM defines the default candidate list of batch sizes for CUDA graph + capture as: + + ```python + max_graph_size = min(max_num_seqs * 2, 512) + # 1, 2, 4, then multiples of 8 up to max_graph_size + cuda_graph_sizes = [1, 2, 4, 8, 16, 24, 32, 40, ..., max_graph_size] + + In the end, `vllm_config.compilation_config.cudagraph_capture_sizes` + will be the final sizes to capture cudagraph (in descending order). + + These sizes are used to capture and reuse CUDA graphs for + performance-critical paths (e.g., decoding). Capturing enables + significantly faster kernel dispatch by avoiding Python overhead. The + list is then filtered based on `max_num_batched_tokens` (e.g., 8192 on + most GPUs), which controls the total allowed number of tokens in a + batch. Since each sequence may have a variable number of tokens, the + maximum usable batch size will depend on actual sequence lengths. + + Example: + With `max_num_batched_tokens = 8192`, and typical sequences + averaging ~32 tokens, most practical batch sizes fall below 256. + However, the system will still allow capture sizes up to 512 if + shape and memory permit. + + Note: + If users explicitly specify cudagraph capture sizes in the + compilation config, those will override this default logic. + At runtime: + + - If batch size <= one of the `cudagraph_capture_sizes`, the closest + padded CUDA graph will be used. + - If batch size > largest `cudagraph_capture_sizes`, cudagraph will + not be used. + """ + + # calculate the default `batch_size_capture_list` + batch_size_capture_list = [] + if self.model_config is not None and \ + not self.model_config.enforce_eager: + cuda_graph_sizes = self.scheduler_config.cuda_graph_sizes + if len(cuda_graph_sizes) == 1: + batch_size_capture_list = [1, 2, 4] + [ + i for i in range(8, cuda_graph_sizes[0] + 1, 8) + ] + elif len(cuda_graph_sizes) > 1: + batch_size_capture_list = sorted(cuda_graph_sizes) + else: + raise TypeError(f"Invalid value for {cuda_graph_sizes=}.") + if self.parallel_config.tensor_parallel_size > 1 and \ + self.compilation_config.pass_config.enable_sequence_parallelism: + batch_size_capture_list = \ + self.update_sizes_for_sequence_parallelism(batch_size_capture_list) + max_num_tokens = self.scheduler_config.max_num_batched_tokens + batch_size_capture_list = [ + size for size in batch_size_capture_list + if size <= max_num_tokens + ] + + self.compilation_config.init_with_cudagraph_sizes( + batch_size_capture_list) + + def recalculate_max_model_len(self, max_model_len: int): + # Can only be called in try_verify_and_update_config + model_config = self.model_config + max_model_len = model_config.get_and_verify_max_len(max_model_len) + self.model_config.max_model_len = max_model_len + self.scheduler_config.max_model_len = max_model_len + + def try_verify_and_update_config(self): + if self.model_config is None: + return + + # Avoid running try_verify_and_update_config multiple times + if getattr(self.model_config, "config_updated", False): + return + self.model_config.config_updated = True + + architecture = self.model_config.architecture + if architecture is None: + return + + from vllm.model_executor.models.config import ( + MODELS_CONFIG_MAP, HybridAttentionMambaModelConfig) + cls = MODELS_CONFIG_MAP.get(architecture, None) + if cls is not None: + cls.verify_and_update_config(self) + + if self.model_config.is_hybrid: + HybridAttentionMambaModelConfig.verify_and_update_config(self) + + if self.model_config.convert_type == "classify": + # Maybe convert ForCausalLM into ForSequenceClassification model. + from vllm.model_executor.models.adapters import ( + SequenceClassificationConfig) + SequenceClassificationConfig.verify_and_update_config(self) + + if hasattr(self.model_config, "model_weights") and is_runai_obj_uri( + self.model_config.model_weights): + if self.load_config.load_format == "auto": + logger.info("Detected Run:ai model config. " + "Overriding `load_format` to 'runai_streamer'") + self.load_config.load_format = "runai_streamer" + elif self.load_config.load_format != "runai_streamer": + raise ValueError(f"To load a model from S3, 'load_format' " + f"must be 'runai_streamer', " + f"but got '{self.load_config.load_format}'. " + f"Model: {self.model_config.model}") + + def compile_debug_dump_path(self) -> Optional[Path]: + """Returns a rank-aware path for dumping + torch.compile debug information. + """ + if self.compilation_config.debug_dump_path is None: + return None + tp_rank = self.parallel_config.rank + dp_rank = self.parallel_config.data_parallel_rank + data_parallel_size = self.parallel_config.data_parallel_size + append_path = f"rank_{tp_rank}" if data_parallel_size == 1 \ + else f"rank_{tp_rank}_dp_{dp_rank}" + path = self.compilation_config.debug_dump_path / append_path + return path + + def __str__(self): + return ( + f"model={self.model_config.model!r}, " + f"speculative_config={self.speculative_config!r}, " + f"tokenizer={self.model_config.tokenizer!r}, " + f"skip_tokenizer_init={self.model_config.skip_tokenizer_init}, " + f"tokenizer_mode={self.model_config.tokenizer_mode}, " + f"revision={self.model_config.revision}, " + f"tokenizer_revision={self.model_config.tokenizer_revision}, " + f"trust_remote_code={self.model_config.trust_remote_code}, " + f"dtype={self.model_config.dtype}, " + f"max_seq_len={self.model_config.max_model_len}, " + f"download_dir={self.load_config.download_dir!r}, " + f"load_format={self.load_config.load_format}, " + f"tensor_parallel_size={self.parallel_config.tensor_parallel_size}, " # noqa + f"pipeline_parallel_size={self.parallel_config.pipeline_parallel_size}, " # noqa + f"data_parallel_size={self.parallel_config.data_parallel_size}, " # noqa + f"disable_custom_all_reduce={self.parallel_config.disable_custom_all_reduce}, " # noqa + f"quantization={self.model_config.quantization}, " + f"enforce_eager={self.model_config.enforce_eager}, " + f"kv_cache_dtype={self.cache_config.cache_dtype}, " + f"device_config={self.device_config.device}, " + f"structured_outputs_config={self.structured_outputs_config!r}, " + f"observability_config={self.observability_config!r}, " + f"seed={self.model_config.seed}, " + f"served_model_name={self.model_config.served_model_name}, " + f"enable_prefix_caching={self.cache_config.enable_prefix_caching}, " + f"chunked_prefill_enabled={self.scheduler_config.chunked_prefill_enabled}, " # noqa + f"pooler_config={self.model_config.pooler_config!r}, " + f"compilation_config={self.compilation_config!r}") + + +_current_vllm_config: Optional[VllmConfig] = None +_current_prefix: Optional[str] = None + + +@contextmanager +def set_current_vllm_config(vllm_config: VllmConfig, + check_compile=False, + prefix: Optional[str] = None): + """ + Temporarily set the current vLLM config. + Used during model initialization. + We save the current vLLM config in a global variable, + so that all modules can access it, e.g. custom ops + can access the vLLM config to determine how to dispatch. + """ + global _current_vllm_config, _current_prefix + old_vllm_config = _current_vllm_config + old_prefix = _current_prefix + from vllm.compilation.counter import compilation_counter + num_models_seen = compilation_counter.num_models_seen + try: + _current_vllm_config = vllm_config + _current_prefix = prefix + yield + except Exception: + raise + else: + if check_compile: + vllm_config.compilation_config.custom_op_log_check() + + if check_compile and \ + vllm_config.compilation_config.level == CompilationLevel.PIECEWISE \ + and compilation_counter.num_models_seen == num_models_seen: + # If the model supports compilation, + # compilation_counter.num_models_seen should be increased + # by at least 1. + # If it is not increased, it means the model does not support + # compilation (does not have @support_torch_compile decorator). + logger.warning( + "`torch.compile` is turned on, but the model %s" + " does not support it. Please open an issue on GitHub" + " if you want it to be supported.", + vllm_config.model_config.model) + finally: + _current_vllm_config = old_vllm_config + _current_prefix = old_prefix + # Clear the compilation config cache when context changes + get_cached_compilation_config.cache_clear() + + +@lru_cache(maxsize=1) +def get_cached_compilation_config(): + """Cache config to avoid repeated calls to get_current_vllm_config()""" + return get_current_vllm_config().compilation_config + + +def get_current_vllm_config() -> VllmConfig: + if _current_vllm_config is None: + # in ci, usually when we test custom ops/modules directly, + # we don't set the vllm config. In that case, we set a default + # config. + logger.warning("Current vLLM config is not set.") + return VllmConfig() + return _current_vllm_config + + +T = TypeVar("T") + + +def get_layers_from_vllm_config( + vllm_config: VllmConfig, + layer_type: type[T], + layer_names: Optional[list[str]] = None) -> dict[str, T]: + """ + Get layers from the vLLM config. + + Args: + vllm_config: The vLLM config. + layer_type: The type of the layer to get. + layer_names: The names of the layers to get. If None, return all layers. + """ + + if layer_names is None: + layer_names = list( + vllm_config.compilation_config.static_forward_context.keys()) + + forward_context = vllm_config.compilation_config.static_forward_context + + return { + layer_name: forward_context[layer_name] + for layer_name in layer_names + if isinstance(forward_context[layer_name], layer_type) + } diff --git a/vllm/model_executor/layers/mamba/linear_attn.py b/vllm/model_executor/layers/mamba/linear_attn.py index 410cbef4f6bc..319133777992 100644 --- a/vllm/model_executor/layers/mamba/linear_attn.py +++ b/vllm/model_executor/layers/mamba/linear_attn.py @@ -29,8 +29,7 @@ from vllm.model_executor.layers.mamba.abstract import MambaBase from vllm.model_executor.layers.mamba.mamba_utils import ( MambaStateDtypeCalculator, MambaStateShapeCalculator) -from vllm.model_executor.layers.quantization.base_config import ( - QuantizationConfig) +from vllm.model_executor.layers.quantization import QuantizationConfig from vllm.utils import direct_register_custom_op from vllm.v1.attention.backends.linear_attn import LinearAttentionMetadata diff --git a/vllm/model_executor/layers/quantization/auto_round.py b/vllm/model_executor/layers/quantization/auto_round.py index bf5141fa4894..eb7600af3371 100644 --- a/vllm/model_executor/layers/quantization/auto_round.py +++ b/vllm/model_executor/layers/quantization/auto_round.py @@ -9,9 +9,8 @@ from vllm.logger import init_logger from vllm.model_executor.layers.linear import (LinearBase, UnquantizedLinearMethod) -from vllm.model_executor.layers.quantization import QuantizationMethods -from vllm.model_executor.layers.quantization.base_config import ( - QuantizationConfig) +from vllm.model_executor.layers.quantization import (QuantizationConfig, + QuantizationMethods) from vllm.model_executor.layers.vocab_parallel_embedding import ParallelLMHead from vllm.platforms import current_platform from vllm.scalar_type import scalar_types diff --git a/vllm/model_executor/layers/quantization/bitblas.py b/vllm/model_executor/layers/quantization/bitblas.py index d05c0c0d5473..81e51f4a4358 100644 --- a/vllm/model_executor/layers/quantization/bitblas.py +++ b/vllm/model_executor/layers/quantization/bitblas.py @@ -7,9 +7,8 @@ from vllm.logger import init_logger from vllm.model_executor.layers.linear import LinearBase, LinearMethodBase -from vllm.model_executor.layers.quantization import QuantizationMethods -from vllm.model_executor.layers.quantization.base_config import ( - QuantizationConfig) +from vllm.model_executor.layers.quantization import (QuantizationConfig, + QuantizationMethods) from vllm.model_executor.layers.quantization.utils.bitblas_utils import ( BITBLAS_OPTIMIZE_FEATURES, BITBLAS_SUPPORTED_NUM_BITS, BITBLAS_SUPPORTED_SYM, MINIMUM_BITBLAS_VERSION) diff --git a/vllm/model_executor/layers/quantization/bitsandbytes.py b/vllm/model_executor/layers/quantization/bitsandbytes.py index 29584188630f..7b7011cb06d3 100644 --- a/vllm/model_executor/layers/quantization/bitsandbytes.py +++ b/vllm/model_executor/layers/quantization/bitsandbytes.py @@ -13,9 +13,8 @@ from vllm.model_executor.layers.linear import (LinearBase, LinearMethodBase, UnquantizedLinearMethod, set_weight_attrs) -from vllm.model_executor.layers.quantization import QuantizationMethods -from vllm.model_executor.layers.quantization.base_config import ( - QuantizationConfig) +from vllm.model_executor.layers.quantization import (QuantizationConfig, + QuantizationMethods) from vllm.platforms import current_platform from vllm.utils import direct_register_custom_op diff --git a/vllm/model_executor/layers/quantization/deepspeedfp.py b/vllm/model_executor/layers/quantization/deepspeedfp.py index 2922aef32939..4a189ab4a171 100644 --- a/vllm/model_executor/layers/quantization/deepspeedfp.py +++ b/vllm/model_executor/layers/quantization/deepspeedfp.py @@ -9,9 +9,8 @@ from packaging import version from vllm.model_executor.layers.linear import LinearBase, LinearMethodBase -from vllm.model_executor.layers.quantization import QuantizationMethods -from vllm.model_executor.layers.quantization.base_config import ( - QuantizationConfig) +from vllm.model_executor.layers.quantization import (QuantizationConfig, + QuantizationMethods) from vllm.model_executor.utils import set_weight_attrs diff --git a/vllm/model_executor/layers/quantization/gptq.py b/vllm/model_executor/layers/quantization/gptq.py index 0335b9c46b4d..842ce92333c9 100644 --- a/vllm/model_executor/layers/quantization/gptq.py +++ b/vllm/model_executor/layers/quantization/gptq.py @@ -4,7 +4,7 @@ import enum from enum import Enum from fractions import Fraction -from typing import Any, Optional, Union +from typing import TYPE_CHECKING, Any, Optional, Union import torch from safetensors.torch import _TYPES as _SAFETENSORS_TO_TORCH_DTYPE @@ -13,7 +13,6 @@ from vllm import _custom_ops as ops from vllm.model_executor.layers.fused_moe.layer import FusedMoE from vllm.model_executor.layers.linear import LinearMethodBase -from vllm.model_executor.layers.quantization import QuantizationMethods from vllm.model_executor.layers.quantization.base_config import ( QuantizationConfig, QuantizeMethodBase) from vllm.model_executor.layers.quantization.utils.gptq_utils import ( @@ -26,6 +25,11 @@ from vllm.transformers_utils.config import get_safetensors_params_metadata from vllm.utils import is_list_of +if TYPE_CHECKING: + from vllm.model_executor.layers.quantization import QuantizationMethods +else: + QuantizationMethods = str + class GPTQConfig(QuantizationConfig): """Config class for GPTQ. diff --git a/vllm/model_executor/layers/quantization/gptq_bitblas.py b/vllm/model_executor/layers/quantization/gptq_bitblas.py index 646229258648..c193dd85e32f 100644 --- a/vllm/model_executor/layers/quantization/gptq_bitblas.py +++ b/vllm/model_executor/layers/quantization/gptq_bitblas.py @@ -9,9 +9,8 @@ from vllm.logger import init_logger from vllm.model_executor.layers.linear import (LinearBase, LinearMethodBase, set_weight_attrs) -from vllm.model_executor.layers.quantization import QuantizationMethods -from vllm.model_executor.layers.quantization.base_config import ( - QuantizationConfig) +from vllm.model_executor.layers.quantization import (QuantizationConfig, + QuantizationMethods) from vllm.model_executor.layers.quantization.kernels.mixed_precision import ( BitBLASLinearKernel, MPLinearLayerConfig) from vllm.model_executor.layers.quantization.utils.bitblas_utils import ( diff --git a/vllm/model_executor/layers/quantization/gptq_marlin.py b/vllm/model_executor/layers/quantization/gptq_marlin.py index 967e46c24378..253675e25f34 100644 --- a/vllm/model_executor/layers/quantization/gptq_marlin.py +++ b/vllm/model_executor/layers/quantization/gptq_marlin.py @@ -43,7 +43,7 @@ def get_moe_quant_method( - config: QuantizationConfig, + config: "GPTQMarlinConfig", layer: torch.nn.Module, prefix: str, moe_method_cls: type, diff --git a/vllm/model_executor/layers/quantization/gptq_marlin_24.py b/vllm/model_executor/layers/quantization/gptq_marlin_24.py index eba917d85411..6b9e3effc29d 100644 --- a/vllm/model_executor/layers/quantization/gptq_marlin_24.py +++ b/vllm/model_executor/layers/quantization/gptq_marlin_24.py @@ -9,9 +9,8 @@ from vllm import _custom_ops as ops from vllm.logger import init_logger from vllm.model_executor.layers.linear import LinearBase, LinearMethodBase -from vllm.model_executor.layers.quantization import QuantizationMethods -from vllm.model_executor.layers.quantization.base_config import ( - QuantizationConfig) +from vllm.model_executor.layers.quantization import (QuantizationConfig, + QuantizationMethods) from vllm.model_executor.parameter import (BasevLLMParameter, ChannelQuantScaleParameter, GroupQuantScaleParameter, diff --git a/vllm/model_executor/layers/quantization/ipex_quant.py b/vllm/model_executor/layers/quantization/ipex_quant.py index c83b0b47a4b7..353942cdd591 100644 --- a/vllm/model_executor/layers/quantization/ipex_quant.py +++ b/vllm/model_executor/layers/quantization/ipex_quant.py @@ -14,11 +14,10 @@ from vllm.model_executor.layers.fused_moe.config import FusedMoEQuantConfig from vllm.model_executor.layers.linear import (LinearBase, LinearMethodBase, UnquantizedLinearMethod) -from vllm.model_executor.layers.quantization import QuantizationMethods +from vllm.model_executor.layers.quantization import (QuantizationConfig, + QuantizationMethods) from vllm.model_executor.layers.quantization.awq import (AWQLinearMethod, is_layer_skipped_awq) -from vllm.model_executor.layers.quantization.base_config import ( - QuantizationConfig) from vllm.model_executor.layers.quantization.fp8 import (Fp8Config, Fp8LinearMethod) from vllm.model_executor.layers.quantization.gptq import GPTQLinearMethod diff --git a/vllm/model_executor/layers/quantization/kernels/mixed_precision/bitblas.py b/vllm/model_executor/layers/quantization/kernels/mixed_precision/bitblas.py index 0eca3b4c024e..fe72910659e2 100644 --- a/vllm/model_executor/layers/quantization/kernels/mixed_precision/bitblas.py +++ b/vllm/model_executor/layers/quantization/kernels/mixed_precision/bitblas.py @@ -7,8 +7,7 @@ from packaging import version from vllm.logger import init_logger -from vllm.model_executor.layers.quantization.base_config import ( - QuantizationConfig) +from vllm.model_executor.layers.quantization import QuantizationConfig from vllm.model_executor.layers.quantization.utils import replace_parameter from vllm.model_executor.layers.quantization.utils.bitblas_utils import ( BITBLAS_OPTIMIZE_FEATURES, BITBLAS_SUPPORTED_GROUP_SIZES, diff --git a/vllm/model_executor/layers/quantization/tpu_int8.py b/vllm/model_executor/layers/quantization/tpu_int8.py index 38de4b54fb19..7f738d170db4 100644 --- a/vllm/model_executor/layers/quantization/tpu_int8.py +++ b/vllm/model_executor/layers/quantization/tpu_int8.py @@ -8,9 +8,8 @@ from torch.nn.parameter import Parameter from vllm.model_executor.layers.linear import LinearBase, LinearMethodBase -from vllm.model_executor.layers.quantization import QuantizationMethods -from vllm.model_executor.layers.quantization.base_config import ( - QuantizationConfig) +from vllm.model_executor.layers.quantization import (QuantizationConfig, + QuantizationMethods) from vllm.model_executor.parameter import ModelWeightParameter ACTIVATION_SCHEMES = ["none", "dynamic"] diff --git a/vllm/model_executor/layers/quantization/utils/gptq_utils.py b/vllm/model_executor/layers/quantization/utils/gptq_utils.py index 41b833725b30..fd76af230620 100644 --- a/vllm/model_executor/layers/quantization/utils/gptq_utils.py +++ b/vllm/model_executor/layers/quantization/utils/gptq_utils.py @@ -4,21 +4,27 @@ from copy import deepcopy from fractions import Fraction from types import MappingProxyType -from typing import Optional, Union +from typing import TYPE_CHECKING, Optional, Union import regex as re import torch -from vllm.config import QuantizationConfig from vllm.model_executor.layers.linear import (LinearBase, UnquantizedLinearMethod) from vllm.model_executor.layers.vocab_parallel_embedding import ( ParallelLMHead, UnquantizedEmbeddingMethod) +if TYPE_CHECKING: + from ..gptq import GPTQConfig + from ..gptq_marlin import GPTQMarlinConfig +else: + GPTQConfig = object + GPTQMarlinConfig = object + # Match dynamic rules with module name (prefix) and override quantize # config if module (prefix) matches a rule -def override_config(config: QuantizationConfig, prefix: str): +def override_config(config: Union[GPTQConfig, GPTQMarlinConfig], prefix: str): weight_bits = get_dynamic_override(config, prefix, "bits", config.weight_bits) if isinstance(weight_bits, int): @@ -34,6 +40,7 @@ def override_config(config: QuantizationConfig, prefix: str): config.pack_factor = Fraction(32, config.weight_bits) # packed into int32 if config.get_name() == "gptq_marlin": + assert isinstance(config, GPTQMarlinConfig) is_sym = get_dynamic_override(config, prefix, "sym", config.is_sym) if isinstance(is_sym, bool): config.is_sym = is_sym @@ -45,6 +52,7 @@ def override_config(config: QuantizationConfig, prefix: str): config.quant_type = config.TYPE_MAP[(config.weight_bits, config.is_sym)] elif config.get_name() == "gptq": + assert isinstance(config, GPTQConfig) if config.weight_bits not in [2, 3, 4, 8]: raise ValueError( "Currently, only 2/3/4/8-bit weight quantization is " @@ -52,7 +60,7 @@ def override_config(config: QuantizationConfig, prefix: str): def get_dynamic_override( - config: QuantizationConfig, + config: Union[GPTQConfig, GPTQMarlinConfig], layer_name: str, key: Optional[str] = None, default_value: Union[int, bool, @@ -116,7 +124,7 @@ def is_layer_gptq_quantized( def get_linear_quant_method( - config: QuantizationConfig, + config: Union[GPTQConfig, GPTQMarlinConfig], layer: torch.nn.Module, prefix: str, linear_method_cls: type, diff --git a/vllm/model_executor/models/aimv2.py b/vllm/model_executor/models/aimv2.py index b13d863ebb74..419f8a5ae2c7 100644 --- a/vllm/model_executor/models/aimv2.py +++ b/vllm/model_executor/models/aimv2.py @@ -17,8 +17,7 @@ from vllm.model_executor.layers.linear import (MergedColumnParallelLinear, QKVParallelLinear, RowParallelLinear) -from vllm.model_executor.layers.quantization.base_config import ( - QuantizationConfig) +from vllm.model_executor.layers.quantization import QuantizationConfig from vllm.model_executor.model_loader.weight_utils import default_weight_loader from vllm.transformers_utils.configs.ovis import AIMv2Config diff --git a/vllm/model_executor/models/aria.py b/vllm/model_executor/models/aria.py index e0d7af0b1c3e..82f35d889605 100644 --- a/vllm/model_executor/models/aria.py +++ b/vllm/model_executor/models/aria.py @@ -9,13 +9,14 @@ from transformers.models.aria.modeling_aria import AriaCrossAttention from transformers.models.aria.processing_aria import AriaProcessor -from vllm.config import QuantizationConfig, VllmConfig +from vllm.config import VllmConfig from vllm.distributed import get_tensor_model_parallel_rank from vllm.model_executor.layers.activation import get_act_fn from vllm.model_executor.layers.fused_moe import FusedMoE from vllm.model_executor.layers.linear import (ColumnParallelLinear, RowParallelLinear) from vllm.model_executor.layers.logits_processor import LogitsProcessor +from vllm.model_executor.layers.quantization import QuantizationConfig from vllm.model_executor.layers.vocab_parallel_embedding import ParallelLMHead from vllm.model_executor.model_loader.weight_utils import ( default_weight_loader, maybe_remap_kv_scale_name) diff --git a/vllm/model_executor/models/bailing_moe.py b/vllm/model_executor/models/bailing_moe.py index 82cd4a26a1ba..6e470378cb60 100644 --- a/vllm/model_executor/models/bailing_moe.py +++ b/vllm/model_executor/models/bailing_moe.py @@ -45,8 +45,7 @@ QKVParallelLinear, RowParallelLinear) from vllm.model_executor.layers.logits_processor import LogitsProcessor -from vllm.model_executor.layers.quantization.base_config import ( - QuantizationConfig) +from vllm.model_executor.layers.quantization import QuantizationConfig from vllm.model_executor.layers.rotary_embedding import get_rope from vllm.model_executor.layers.vocab_parallel_embedding import ( ParallelLMHead, VocabParallelEmbedding) diff --git a/vllm/model_executor/models/granite.py b/vllm/model_executor/models/granite.py index 2c619396e6c0..893cc8a41455 100644 --- a/vllm/model_executor/models/granite.py +++ b/vllm/model_executor/models/granite.py @@ -41,8 +41,7 @@ QKVParallelLinear, RowParallelLinear) from vllm.model_executor.layers.logits_processor import LogitsProcessor -from vllm.model_executor.layers.quantization.base_config import ( - QuantizationConfig) +from vllm.model_executor.layers.quantization import QuantizationConfig from vllm.model_executor.layers.rotary_embedding import get_rope from vllm.model_executor.layers.vocab_parallel_embedding import ( DEFAULT_VOCAB_PADDING_SIZE, ParallelLMHead, VocabParallelEmbedding) diff --git a/vllm/model_executor/models/granitemoe.py b/vllm/model_executor/models/granitemoe.py index 76a5745a4f51..489c0bb3d3af 100644 --- a/vllm/model_executor/models/granitemoe.py +++ b/vllm/model_executor/models/granitemoe.py @@ -42,8 +42,7 @@ ReplicatedLinear, RowParallelLinear) from vllm.model_executor.layers.logits_processor import LogitsProcessor -from vllm.model_executor.layers.quantization.base_config import ( - QuantizationConfig) +from vllm.model_executor.layers.quantization import QuantizationConfig from vllm.model_executor.layers.rotary_embedding import get_rope from vllm.model_executor.layers.vocab_parallel_embedding import ( DEFAULT_VOCAB_PADDING_SIZE, ParallelLMHead, VocabParallelEmbedding) diff --git a/vllm/model_executor/models/granitemoeshared.py b/vllm/model_executor/models/granitemoeshared.py index b434822bff0a..c864856db654 100644 --- a/vllm/model_executor/models/granitemoeshared.py +++ b/vllm/model_executor/models/granitemoeshared.py @@ -21,8 +21,7 @@ from vllm.model_executor.layers.linear import (MergedColumnParallelLinear, RowParallelLinear) from vllm.model_executor.layers.logits_processor import LogitsProcessor -from vllm.model_executor.layers.quantization.base_config import ( - QuantizationConfig) +from vllm.model_executor.layers.quantization import QuantizationConfig from vllm.model_executor.layers.vocab_parallel_embedding import ( DEFAULT_VOCAB_PADDING_SIZE, ParallelLMHead, VocabParallelEmbedding) from vllm.sequence import IntermediateTensors diff --git a/vllm/model_executor/models/hunyuan_v1.py b/vllm/model_executor/models/hunyuan_v1.py index d28c97116790..085e740ce226 100644 --- a/vllm/model_executor/models/hunyuan_v1.py +++ b/vllm/model_executor/models/hunyuan_v1.py @@ -47,8 +47,7 @@ ReplicatedLinear, RowParallelLinear) from vllm.model_executor.layers.logits_processor import LogitsProcessor -from vllm.model_executor.layers.quantization.base_config import ( - QuantizationConfig) +from vllm.model_executor.layers.quantization import QuantizationConfig from vllm.model_executor.layers.rotary_embedding import get_rope from vllm.model_executor.layers.vocab_parallel_embedding import ( DEFAULT_VOCAB_PADDING_SIZE, ParallelLMHead, VocabParallelEmbedding) diff --git a/vllm/model_executor/models/interfaces.py b/vllm/model_executor/models/interfaces.py index d40df9b43dd4..c95c63cd8534 100644 --- a/vllm/model_executor/models/interfaces.py +++ b/vllm/model_executor/models/interfaces.py @@ -16,8 +16,7 @@ from vllm.inputs import TokensPrompt from vllm.inputs.data import PromptType from vllm.logger import init_logger -from vllm.model_executor.layers.quantization.base_config import ( - QuantizationConfig) +from vllm.model_executor.layers.quantization import QuantizationConfig from vllm.utils import supports_kw from .interfaces_base import VllmModel, is_pooling_model diff --git a/vllm/model_executor/models/llama4_eagle.py b/vllm/model_executor/models/llama4_eagle.py index 0768edd08315..572eca344e0a 100644 --- a/vllm/model_executor/models/llama4_eagle.py +++ b/vllm/model_executor/models/llama4_eagle.py @@ -28,8 +28,7 @@ from vllm.logger import init_logger from vllm.model_executor.layers.layernorm import RMSNorm from vllm.model_executor.layers.logits_processor import LogitsProcessor -from vllm.model_executor.layers.quantization.base_config import ( - QuantizationConfig) +from vllm.model_executor.layers.quantization import QuantizationConfig from vllm.model_executor.layers.quantization.torchao import TorchAOConfig from vllm.model_executor.layers.vocab_parallel_embedding import ( VocabParallelEmbedding) diff --git a/vllm/model_executor/models/mamba.py b/vllm/model_executor/models/mamba.py index 5bd268291c7d..d810701c50b4 100644 --- a/vllm/model_executor/models/mamba.py +++ b/vllm/model_executor/models/mamba.py @@ -16,8 +16,7 @@ from vllm.model_executor.layers.mamba.mamba_mixer import MambaMixer from vllm.model_executor.layers.mamba.mamba_utils import ( MambaStateDtypeCalculator, MambaStateShapeCalculator) -from vllm.model_executor.layers.quantization.base_config import ( - QuantizationConfig) +from vllm.model_executor.layers.quantization import QuantizationConfig from vllm.model_executor.layers.vocab_parallel_embedding import ( DEFAULT_VOCAB_PADDING_SIZE, ParallelLMHead, VocabParallelEmbedding) from vllm.model_executor.model_loader.weight_utils import default_weight_loader diff --git a/vllm/model_executor/models/mamba2.py b/vllm/model_executor/models/mamba2.py index 97e9c5785e72..f8a5a8f6081b 100644 --- a/vllm/model_executor/models/mamba2.py +++ b/vllm/model_executor/models/mamba2.py @@ -16,8 +16,7 @@ from vllm.model_executor.layers.mamba.mamba_mixer2 import MambaMixer2 from vllm.model_executor.layers.mamba.mamba_utils import ( MambaStateDtypeCalculator, MambaStateShapeCalculator) -from vllm.model_executor.layers.quantization.base_config import ( - QuantizationConfig) +from vllm.model_executor.layers.quantization import QuantizationConfig from vllm.model_executor.layers.vocab_parallel_embedding import ( DEFAULT_VOCAB_PADDING_SIZE, ParallelLMHead, VocabParallelEmbedding) from vllm.model_executor.model_loader.weight_utils import default_weight_loader diff --git a/vllm/model_executor/models/minimax_text_01.py b/vllm/model_executor/models/minimax_text_01.py index a92890c9f7b5..45228aa0bb93 100644 --- a/vllm/model_executor/models/minimax_text_01.py +++ b/vllm/model_executor/models/minimax_text_01.py @@ -33,8 +33,7 @@ MiniMaxText01LinearAttention) from vllm.model_executor.layers.mamba.mamba_utils import ( MambaStateDtypeCalculator, MambaStateShapeCalculator) -from vllm.model_executor.layers.quantization.base_config import ( - QuantizationConfig) +from vllm.model_executor.layers.quantization import QuantizationConfig from vllm.model_executor.layers.rotary_embedding import get_rope from vllm.model_executor.layers.vocab_parallel_embedding import ( DEFAULT_VOCAB_PADDING_SIZE, ParallelLMHead, VocabParallelEmbedding) diff --git a/vllm/model_executor/models/ovis.py b/vllm/model_executor/models/ovis.py index 2f9c6ddfc661..2e8e4a44102f 100644 --- a/vllm/model_executor/models/ovis.py +++ b/vllm/model_executor/models/ovis.py @@ -29,8 +29,7 @@ from vllm.config import VllmConfig from vllm.model_executor.layers.linear import ReplicatedLinear -from vllm.model_executor.layers.quantization.base_config import ( - QuantizationConfig) +from vllm.model_executor.layers.quantization import QuantizationConfig from vllm.model_executor.models.aimv2 import AIMv2Model from vllm.model_executor.models.siglip import SiglipVisionModel from vllm.model_executor.models.utils import (AutoWeightsLoader, flatten_bn, diff --git a/vllm/model_executor/models/ovis2_5.py b/vllm/model_executor/models/ovis2_5.py index 86ce7e9eab27..9c8adb617310 100644 --- a/vllm/model_executor/models/ovis2_5.py +++ b/vllm/model_executor/models/ovis2_5.py @@ -11,8 +11,7 @@ from vllm.config import VllmConfig from vllm.model_executor.layers.linear import ReplicatedLinear -from vllm.model_executor.layers.quantization.base_config import ( - QuantizationConfig) +from vllm.model_executor.layers.quantization import QuantizationConfig from vllm.model_executor.models.ovis import (OvisImagePatchInputs, VisualEmbedding) from vllm.model_executor.models.siglip2navit import Siglip2NavitModel diff --git a/vllm/model_executor/models/phimoe.py b/vllm/model_executor/models/phimoe.py index 3ce67ce37a7a..7308fef092b5 100644 --- a/vllm/model_executor/models/phimoe.py +++ b/vllm/model_executor/models/phimoe.py @@ -40,8 +40,7 @@ ReplicatedLinear, RowParallelLinear) from vllm.model_executor.layers.logits_processor import LogitsProcessor -from vllm.model_executor.layers.quantization.base_config import ( - QuantizationConfig) +from vllm.model_executor.layers.quantization import QuantizationConfig from vllm.model_executor.layers.rotary_embedding import get_rope from vllm.model_executor.layers.vocab_parallel_embedding import ( DEFAULT_VOCAB_PADDING_SIZE, ParallelLMHead, VocabParallelEmbedding) diff --git a/vllm/model_executor/models/siglip2navit.py b/vllm/model_executor/models/siglip2navit.py index 7d90d3a7ef12..18de4b576c49 100644 --- a/vllm/model_executor/models/siglip2navit.py +++ b/vllm/model_executor/models/siglip2navit.py @@ -14,13 +14,13 @@ from transformers.configuration_utils import PretrainedConfig from vllm.attention.layer import check_upstream_fa_availability -from vllm.config import QuantizationConfig from vllm.distributed import divide, get_tensor_model_parallel_world_size from vllm.model_executor.layers.activation import get_act_fn from vllm.model_executor.layers.linear import (ColumnParallelLinear, LinearBase, QKVParallelLinear, ReplicatedLinear, RowParallelLinear) +from vllm.model_executor.layers.quantization import QuantizationConfig from vllm.model_executor.model_loader.weight_utils import default_weight_loader from vllm.platforms import _Backend diff --git a/vllm/model_executor/models/step3_text.py b/vllm/model_executor/models/step3_text.py index 0fe723d59483..960813822139 100644 --- a/vllm/model_executor/models/step3_text.py +++ b/vllm/model_executor/models/step3_text.py @@ -23,8 +23,7 @@ ReplicatedLinear, RowParallelLinear) from vllm.model_executor.layers.logits_processor import LogitsProcessor -from vllm.model_executor.layers.quantization.base_config import ( - QuantizationConfig) +from vllm.model_executor.layers.quantization import QuantizationConfig from vllm.model_executor.layers.rotary_embedding import get_rope from vllm.model_executor.layers.vocab_parallel_embedding import ( DEFAULT_VOCAB_PADDING_SIZE, ParallelLMHead, VocabParallelEmbedding) diff --git a/vllm/model_executor/models/whisper.py b/vllm/model_executor/models/whisper.py index 7beeeddf988f..1eecac7ed76b 100644 --- a/vllm/model_executor/models/whisper.py +++ b/vllm/model_executor/models/whisper.py @@ -26,8 +26,7 @@ QKVParallelLinear, RowParallelLinear) from vllm.model_executor.layers.logits_processor import LogitsProcessor -from vllm.model_executor.layers.quantization.base_config import ( - QuantizationConfig) +from vllm.model_executor.layers.quantization import QuantizationConfig from vllm.model_executor.layers.vocab_parallel_embedding import ParallelLMHead from vllm.model_executor.model_loader.utils import set_default_torch_dtype from vllm.model_executor.model_loader.weight_utils import default_weight_loader From 23194d83e8f2a6783b0d8c275f5f8a22faab9aec Mon Sep 17 00:00:00 2001 From: Lucas Wilkinson Date: Tue, 30 Sep 2025 00:18:59 -0400 Subject: [PATCH 495/518] [BugFix] Fix DP/EP hang (#25906) Signed-off-by: Lucas Wilkinson --- vllm/v1/worker/gpu_model_runner.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index 9e7d6eb0387b..98e00f6d98a9 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -3075,13 +3075,19 @@ def _dummy_run( # We currently only microbatch if the number of tokens is # over a certain threshold. if self.parallel_config.enable_dbo and allow_microbatching: - ubatch_slices, num_tokens_after_padding = ubatch_split( + ubatch_slices, ubatch_num_tokens_after_padding = ubatch_split( num_scheduled_tokens, total_num_scheduled_tokens, total_num_scheduled_tokens, uniform_decode=uniform_decode, vllm_config=self.vllm_config, ) + # Currently when DBO is enabled `ubatch_split` returns + # the num_tokens_after_padding for a single ubatch, but we have 2 + # TODO(sage,lucas): this is cruft that should be addressed in the + # padding refactor. + if ubatch_num_tokens_after_padding is not None: + num_tokens_after_padding = ubatch_num_tokens_after_padding * 2 # If we failed to microbatch, currently need to resynchronize # TODO(lucas,sage): we should be able to avoid this second sync by @@ -3198,7 +3204,7 @@ def _dummy_run( # filter out the valid batch descriptor _cg_mode, batch_descriptor = self.cudagraph_dispatcher.dispatch( - BatchDescriptor(num_tokens=num_tokens, + BatchDescriptor(num_tokens=num_tokens_after_padding, uniform_decode=uniform_decode)) \ if not is_profile else (CUDAGraphMode.NONE, None) if cudagraph_runtime_mode is not None: @@ -3212,7 +3218,13 @@ def _dummy_run( cudagraph_runtime_mode = _cg_mode if ubatch_slices is not None: - num_tokens = num_tokens // 2 + # Adjust values to reflect a single ubatch. + # TODO(sage,lucas): this is cruft that should be addressed in + # the padding refactor. + num_tokens_after_padding = ubatch_slices[0].num_tokens + if num_tokens_across_dp is not None: + num_tokens_across_dp[:] = num_tokens_after_padding + with self.maybe_randomize_inputs(input_ids), set_forward_context( attn_metadata, self.vllm_config, From e47433b3c175ef6c29254637e12a52c26c4c25b2 Mon Sep 17 00:00:00 2001 From: acisseJZhong <40467976+acisseJZhong@users.noreply.github.com> Date: Mon, 29 Sep 2025 22:09:50 -0700 Subject: [PATCH 496/518] [BugFix] Pass config_format via try_get_generation_config (#25912) --- vllm/config/model.py | 2 ++ vllm/transformers_utils/config.py | 2 ++ 2 files changed, 4 insertions(+) diff --git a/vllm/config/model.py b/vllm/config/model.py index 3fb448ebbf36..28af82de7722 100644 --- a/vllm/config/model.py +++ b/vllm/config/model.py @@ -1334,11 +1334,13 @@ def try_get_generation_config(self) -> dict[str, Any]: self.hf_config_path or self.model, trust_remote_code=self.trust_remote_code, revision=self.revision, + config_format=self.config_format, ) else: config = try_get_generation_config( self.generation_config, trust_remote_code=self.trust_remote_code, + config_format=self.config_format, ) if config is None: diff --git a/vllm/transformers_utils/config.py b/vllm/transformers_utils/config.py index b0816cfb0702..4f5e5c01e5cb 100644 --- a/vllm/transformers_utils/config.py +++ b/vllm/transformers_utils/config.py @@ -949,6 +949,7 @@ def try_get_generation_config( model: str, trust_remote_code: bool, revision: Optional[str] = None, + config_format: Union[str, ConfigFormat] = "auto", ) -> Optional[GenerationConfig]: try: return GenerationConfig.from_pretrained( @@ -961,6 +962,7 @@ def try_get_generation_config( model, trust_remote_code=trust_remote_code, revision=revision, + config_format=config_format, ) return GenerationConfig.from_model_config(config) except OSError: # Not found From 2e1b8bc2b6d1796d65cea6ed514759d12a72cf17 Mon Sep 17 00:00:00 2001 From: Zhou Jiahao Date: Tue, 30 Sep 2025 16:15:23 +0800 Subject: [PATCH 497/518] [Model][Bugfix] Fix MiDashengLM audio encoder mask by removing incorrect `logical_not` (#25925) Signed-off-by: zhoukz --- vllm/model_executor/models/midashenglm.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/vllm/model_executor/models/midashenglm.py b/vllm/model_executor/models/midashenglm.py index 0b62fbd40b07..33bd64df5b53 100644 --- a/vllm/model_executor/models/midashenglm.py +++ b/vllm/model_executor/models/midashenglm.py @@ -426,8 +426,7 @@ def forward( assert x_length.ndim == 1, "Lengths are of size (B,)" scaled_lengths = (x_length / (self.hop_length * 4)).long() mask = self._to_mask(max_length=t, lengths=scaled_lengths) - split_masks = mask.logical_not().split(target_length_in_patches, - dim=-1) + split_masks = mask.split(target_length_in_patches, dim=-1) else: mask = None split_masks = [None] * len(input_splits) From e23cacda35132f14d05ee236e8e5ebd8703e6940 Mon Sep 17 00:00:00 2001 From: Simon Danielsson <70206058+simondanielsson@users.noreply.github.com> Date: Tue, 30 Sep 2025 10:17:49 +0200 Subject: [PATCH 498/518] [Bugfix]: Clean up chunked prefill logging when using whisper (#25075) Signed-off-by: simondanielsson --- tests/v1/core/test_scheduler.py | 52 ++++++++++++++++++++++++++++++++- vllm/config/scheduler.py | 21 +++++++++++-- vllm/config/vllm.py | 9 +++--- vllm/engine/arg_utils.py | 1 + 4 files changed, 75 insertions(+), 8 deletions(-) diff --git a/tests/v1/core/test_scheduler.py b/tests/v1/core/test_scheduler.py index f6fc1e6d37d1..5e2bdaa75d3f 100644 --- a/tests/v1/core/test_scheduler.py +++ b/tests/v1/core/test_scheduler.py @@ -1,5 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project +import dataclasses from typing import Optional from unittest.mock import Mock @@ -1899,4 +1900,53 @@ def test_priority_scheduling_preemption_when_out_of_kv(): assert output.scheduled_cached_reqs.num_reqs == 1 assert output.scheduled_cached_reqs.req_ids[0] == request_high.request_id assert len(scheduler.waiting) == 1 - assert len(scheduler.running) == 1 \ No newline at end of file + assert len(scheduler.running) == 1 + + +@pytest.mark.parametrize( + ("enable_chunked_prefill", "is_encoder_decoder", "expect_enabled"), + [ + (True, False, True), + (False, False, False), + # Encoder-decoder models should always have it disabled + (False, True, False), + (True, True, False), + ]) +def test_chunked_prefill_disabled_for_encoder_decoder( + enable_chunked_prefill: bool, is_encoder_decoder: bool, + expect_enabled: bool) -> None: + """Validate that chunked prefill is appropriately disabled for + encoder-decoder models.""" + scheduler_config = SchedulerConfig( + enable_chunked_prefill=enable_chunked_prefill, + is_encoder_decoder=is_encoder_decoder, + ) + + # `is_encoder_decoder` should only be used during construction + # of the config, and otherwise stored in the model config. + assert "is_encoder_decoder" not in vars(scheduler_config) + assert "is_encoder_decoder" not in [ + f.name for f in dataclasses.fields(scheduler_config) + ] + _validate_chunked_prefill_settings_for_encoder_decoder( + scheduler_config, is_encoder_decoder, expect_enabled) + + # Ensure it is retained in VllmConfig, even after its post-init. + vllm_config = VllmConfig(scheduler_config=scheduler_config) + _validate_chunked_prefill_settings_for_encoder_decoder( + vllm_config.scheduler_config, is_encoder_decoder, expect_enabled) + + +def _validate_chunked_prefill_settings_for_encoder_decoder( + scheduler_config: SchedulerConfig, is_encoder_decoder: bool, + expect_enabled: bool) -> None: + """Validate chunked prefill settings in the scheduler config for + encoder-decoder models.""" + assert scheduler_config.chunked_prefill_enabled is expect_enabled + assert scheduler_config.enable_chunked_prefill is expect_enabled + if is_encoder_decoder: + # Encoder-decoder models should automatically disable chunked multimodal + # inputs as well + assert scheduler_config.disable_chunked_mm_input is not expect_enabled + if is_encoder_decoder and not expect_enabled: + assert scheduler_config.long_prefill_token_threshold == 0 diff --git a/vllm/config/scheduler.py b/vllm/config/scheduler.py index daf094d2df5c..1b0a10d3a069 100644 --- a/vllm/config/scheduler.py +++ b/vllm/config/scheduler.py @@ -2,7 +2,7 @@ # SPDX-FileCopyrightText: Copyright contributors to the vLLM project import hashlib -from dataclasses import field +from dataclasses import InitVar, field from typing import Any, Literal, Union from pydantic import SkipValidation, model_validator @@ -84,6 +84,13 @@ class SchedulerConfig: is_multimodal_model: bool = False """True if the model is multimodal.""" + is_encoder_decoder: InitVar[bool] = False + """True if the model is an encoder-decoder model. + + Note: This is stored in the ModelConfig, and is used only here to + disable chunked prefill and prefix caching for encoder-decoder models. + """ + # TODO (ywang96): Make this configurable. max_num_encoder_input_tokens: int = field(init=False) """Multimodal encoder compute budget, only used in V1. @@ -161,13 +168,23 @@ def compute_hash(self) -> str: usedforsecurity=False).hexdigest() return hash_str - def __post_init__(self) -> None: + def __post_init__(self, is_encoder_decoder: bool) -> None: if self.max_model_len is None: self.max_model_len = 8192 if self.max_num_seqs is None: self.max_num_seqs = 128 + if is_encoder_decoder: + # Chunked prefill should be disabled for encoder-decoder models. + self.disable_chunked_mm_input = True + self.chunked_prefill_enabled = False + self.enable_chunked_prefill = False + self.long_prefill_token_threshold = 0 + logger.info( + "Encoder-decoder models do not support chunked prefill nor" + " prefix caching; disabling both.") + if self.max_num_batched_tokens is None: if self.enable_chunked_prefill: self.max_num_batched_tokens = DEFAULT_MAX_NUM_BATCHED_TOKENS diff --git a/vllm/config/vllm.py b/vllm/config/vllm.py index 7336f5756527..585d3997cc3a 100644 --- a/vllm/config/vllm.py +++ b/vllm/config/vllm.py @@ -386,10 +386,6 @@ def __post_init__(self): "Encoder-decoder model detected: setting " "`max_num_encoder_input_tokens` to encoder length (%s)", self.scheduler_config.max_num_encoder_input_tokens) - self.scheduler_config.disable_chunked_mm_input = True - disable_chunked_prefill_reasons.append( - "Encoder-decoder models do not support chunked prefill nor" - " prefix caching; disabling both.") if (self.model_config.architecture == "WhisperForConditionalGeneration" and os.environ.get("VLLM_WORKER_MULTIPROC_METHOD") @@ -400,7 +396,10 @@ def __post_init__(self): "try setting 'VLLM_WORKER_MULTIPROC_METHOD' " "to 'spawn'.") - if disable_chunked_prefill_reasons: + # Disable prefix caching only if chunked prefill is explicitly disabled + # (and not merely unset) + if (self.scheduler_config.chunked_prefill_enabled is False + or disable_chunked_prefill_reasons): for reason in disable_chunked_prefill_reasons: logger.info(reason) self.scheduler_config.chunked_prefill_enabled = False diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index 6bb794177db8..ce0f1708235f 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -1367,6 +1367,7 @@ def create_engine_config( enable_chunked_prefill=self.enable_chunked_prefill, disable_chunked_mm_input=self.disable_chunked_mm_input, is_multimodal_model=model_config.is_multimodal_model, + is_encoder_decoder=model_config.is_encoder_decoder, send_delta_data=(envs.VLLM_USE_RAY_SPMD_WORKER and parallel_config.use_ray), policy=self.scheduling_policy, From fa7e254a7f3ea7b28f1c5908c83ac57275d3c090 Mon Sep 17 00:00:00 2001 From: Yongye Zhu Date: Tue, 30 Sep 2025 05:14:41 -0400 Subject: [PATCH 499/518] [New Model] DeepSeek-V3.2 (Rebased to Main) (#25896) Signed-off-by: Chen Zhang Signed-off-by: youkaichao Signed-off-by: Lucas Wilkinson Signed-off-by: mgoin Signed-off-by: NickLucche Signed-off-by: Yongye Zhu Signed-off-by: Barry Kang <43644113+Barry-Delaney@users.noreply.github.com> Signed-off-by: Lucia Fang Co-authored-by: Chen Zhang Co-authored-by: youkaichao Co-authored-by: Lucas Wilkinson Co-authored-by: Robert Shaw <114415538+robertgshaw2-redhat@users.noreply.github.com> Co-authored-by: Lucas Wilkinson Co-authored-by: yewentao256 Co-authored-by: Wentao Ye <44945378+yewentao256@users.noreply.github.com> Co-authored-by: mgoin Co-authored-by: Lucia Fang <116399278+luccafong@users.noreply.github.com> Co-authored-by: Lucia Fang Co-authored-by: NickLucche Co-authored-by: Siyuan Fu Co-authored-by: Matthew Bonanni Co-authored-by: Xiaozhu Meng Co-authored-by: Barry Kang <43644113+Barry-Delaney@users.noreply.github.com> --- cmake/external_projects/flashmla.cmake | 87 ++- csrc/cache.h | 8 + csrc/cache_kernels.cu | 258 ++++++++- csrc/quantization/fp8/nvidia/quant_utils.cuh | 11 + csrc/torch_bindings.cpp | 7 + setup.py | 4 + tests/compile/test_fusion_attn.py | 1 - tests/kernels/attention/test_cache.py | 113 ++++ .../attention/test_deepgemm_attention.py | 279 +++++++++ tests/kernels/attention/test_flashmla.py | 22 +- .../kernels/attention/test_flashmla_sparse.py | 119 ++++ .../attention/test_pack_unpack_triton.py | 245 ++++++++ tests/models/registry.py | 1 + tests/models/test_initialization.py | 9 +- .../vllm_add_dummy_platform/dummy_platform.py | 2 +- tests/v1/attention/test_mla_backends.py | 69 ++- .../v1/attention/test_sparse_mla_backends.py | 426 ++++++++++++++ tests/v1/attention/utils.py | 1 - tests/v1/core/test_kv_cache_utils.py | 56 +- tests/v1/core/test_prefix_caching.py | 7 +- .../core/test_single_type_kv_cache_manager.py | 6 - tests/v1/engine/test_engine_core_client.py | 3 +- tests/v1/worker/test_gpu_model_runner.py | 1 - vllm/_custom_ops.py | 9 + vllm/attention/backends/abstract.py | 1 + vllm/attention/layer.py | 5 +- vllm/attention/ops/common.py | 205 +++++++ vllm/attention/ops/flashmla.py | 165 ++++-- vllm/attention/ops/paged_attn.py | 1 + vllm/attention/selector.py | 5 +- vllm/config/cache.py | 20 +- vllm/config/compilation.py | 1 + vllm/config/model.py | 4 +- vllm/config/speculative.py | 2 +- vllm/model_executor/layers/layernorm.py | 18 + vllm/model_executor/layers/mla.py | 16 + vllm/model_executor/models/config.py | 29 +- vllm/model_executor/models/deepseek_mtp.py | 14 +- vllm/model_executor/models/deepseek_v2.py | 449 ++++++++++++++- vllm/model_executor/models/longcat_flash.py | 3 + vllm/model_executor/models/registry.py | 1 + vllm/platforms/cpu.py | 5 +- vllm/platforms/cuda.py | 15 +- vllm/platforms/interface.py | 2 +- vllm/platforms/rocm.py | 5 +- vllm/platforms/tpu.py | 5 +- vllm/platforms/xpu.py | 5 +- vllm/transformers_utils/config.py | 2 + vllm/transformers_utils/configs/__init__.py | 2 + .../transformers_utils/configs/deepseek_v3.py | 101 ++++ vllm/utils/__init__.py | 7 + vllm/utils/deep_gemm.py | 124 +++- vllm/v1/attention/backends/cpu_attn.py | 1 + vllm/v1/attention/backends/flash_attn.py | 1 + vllm/v1/attention/backends/flashinfer.py | 1 + vllm/v1/attention/backends/flex_attention.py | 1 + vllm/v1/attention/backends/mla/common.py | 170 +++++- vllm/v1/attention/backends/mla/flashmla.py | 1 + .../attention/backends/mla/flashmla_sparse.py | 544 ++++++++++++++++++ vllm/v1/attention/backends/mla/indexer.py | 293 ++++++++++ vllm/v1/attention/backends/pallas.py | 1 + vllm/v1/attention/backends/rocm_aiter_fa.py | 1 + vllm/v1/attention/backends/tree_attn.py | 1 + vllm/v1/attention/backends/triton_attn.py | 1 + vllm/v1/attention/backends/xformers.py | 1 + vllm/v1/core/kv_cache_utils.py | 9 +- vllm/v1/core/single_type_kv_cache_manager.py | 3 +- vllm/v1/kv_cache_interface.py | 51 +- vllm/v1/spec_decode/eagle.py | 56 +- vllm/v1/worker/gpu_model_runner.py | 45 +- vllm/v1/worker/tpu_model_runner.py | 2 - 71 files changed, 3918 insertions(+), 221 deletions(-) create mode 100644 tests/kernels/attention/test_deepgemm_attention.py create mode 100644 tests/kernels/attention/test_flashmla_sparse.py create mode 100644 tests/kernels/attention/test_pack_unpack_triton.py create mode 100644 tests/v1/attention/test_sparse_mla_backends.py create mode 100644 vllm/transformers_utils/configs/deepseek_v3.py create mode 100644 vllm/v1/attention/backends/mla/flashmla_sparse.py create mode 100644 vllm/v1/attention/backends/mla/indexer.py diff --git a/cmake/external_projects/flashmla.cmake b/cmake/external_projects/flashmla.cmake index 02224cfe3ee8..c9e7aec880b9 100644 --- a/cmake/external_projects/flashmla.cmake +++ b/cmake/external_projects/flashmla.cmake @@ -18,8 +18,8 @@ if(FLASH_MLA_SRC_DIR) else() FetchContent_Declare( flashmla - GIT_REPOSITORY https://github.com/vllm-project/FlashMLA.git - GIT_TAG a757314c04eedd166e329e846c820eb1bdd702de + GIT_REPOSITORY https://github.com/vllm-project/FlashMLA + GIT_TAG 5f65b85703c7ed75fda01e06495077caad207c3f GIT_PROGRESS TRUE CONFIGURE_COMMAND "" BUILD_COMMAND "" @@ -33,23 +33,64 @@ message(STATUS "FlashMLA is available at ${flashmla_SOURCE_DIR}") # The FlashMLA kernels only work on hopper and require CUDA 12.3 or later. # Only build FlashMLA kernels if we are building for something compatible with # sm90a -cuda_archs_loose_intersection(FLASH_MLA_ARCHS "9.0a" "${CUDA_ARCHS}") -if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER 12.3 AND FLASH_MLA_ARCHS) + +set(SUPPORT_ARCHS) +if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER 12.3) + list(APPEND SUPPORT_ARCHS 9.0a) +endif() +if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER 12.8) + list(APPEND SUPPORT_ARCHS 10.0a) +endif() + + +cuda_archs_loose_intersection(FLASH_MLA_ARCHS "${SUPPORT_ARCHS}" "${CUDA_ARCHS}") +if(FLASH_MLA_ARCHS) + set(VLLM_FLASHMLA_GPU_FLAGS ${VLLM_GPU_FLAGS}) + list(APPEND VLLM_FLASHMLA_GPU_FLAGS "--expt-relaxed-constexpr" "--expt-extended-lambda" "--use_fast_math") + set(FlashMLA_SOURCES - ${flashmla_SOURCE_DIR}/csrc/flash_api.cpp - ${flashmla_SOURCE_DIR}/csrc/kernels/get_mla_metadata.cu - ${flashmla_SOURCE_DIR}/csrc/kernels/mla_combine.cu - ${flashmla_SOURCE_DIR}/csrc/kernels/splitkv_mla.cu - ${flashmla_SOURCE_DIR}/csrc/kernels_fp8/flash_fwd_mla_fp8_sm90.cu) + ${flashmla_SOURCE_DIR}/csrc/torch_api.cpp + ${flashmla_SOURCE_DIR}/csrc/pybind.cpp + ${flashmla_SOURCE_DIR}/csrc/smxx/get_mla_metadata.cu + ${flashmla_SOURCE_DIR}/csrc/smxx/mla_combine.cu + ${flashmla_SOURCE_DIR}/csrc/sm90/decode/dense/splitkv_mla.cu + ${flashmla_SOURCE_DIR}/csrc/sm90/decode/sparse_fp8/splitkv_mla.cu + ${flashmla_SOURCE_DIR}/csrc/sm90/prefill/sparse/fwd.cu + ${flashmla_SOURCE_DIR}/csrc/sm100/decode/sparse_fp8/splitkv_mla.cu + ${flashmla_SOURCE_DIR}/csrc/sm100/prefill/dense/fmha_cutlass_fwd_sm100.cu + ${flashmla_SOURCE_DIR}/csrc/sm100/prefill/dense/fmha_cutlass_bwd_sm100.cu + ${flashmla_SOURCE_DIR}/csrc/sm100/prefill/sparse/fwd.cu + ) + + set(FlashMLA_Extension_SOURCES + ${flashmla_SOURCE_DIR}/csrc/extension/torch_api.cpp + ${flashmla_SOURCE_DIR}/csrc/extension/sm90/dense_fp8/pybind.cpp + ${flashmla_SOURCE_DIR}/csrc/extension/sm90/dense_fp8/flash_fwd_mla_fp8_sm90.cu + ) set(FlashMLA_INCLUDES + ${flashmla_SOURCE_DIR}/csrc + ${flashmla_SOURCE_DIR}/csrc/sm90 + ${flashmla_SOURCE_DIR}/csrc/cutlass/include + ${flashmla_SOURCE_DIR}/csrc/cutlass/tools/util/include + ) + + set(FlashMLA_Extension_INCLUDES + ${flashmla_SOURCE_DIR}/csrc + ${flashmla_SOURCE_DIR}/csrc/sm90 + ${flashmla_SOURCE_DIR}/csrc/extension/sm90/dense_fp8/ ${flashmla_SOURCE_DIR}/csrc/cutlass/include - ${flashmla_SOURCE_DIR}/csrc) + ${flashmla_SOURCE_DIR}/csrc/cutlass/tools/util/include + ) set_gencode_flags_for_srcs( SRCS "${FlashMLA_SOURCES}" CUDA_ARCHS "${FLASH_MLA_ARCHS}") + set_gencode_flags_for_srcs( + SRCS "${FlashMLA_Extension_SOURCES}" + CUDA_ARCHS "${FLASH_MLA_ARCHS}") + define_gpu_extension_target( _flashmla_C DESTINATION vllm @@ -60,8 +101,32 @@ if(${CMAKE_CUDA_COMPILER_VERSION} VERSION_GREATER 12.3 AND FLASH_MLA_ARCHS) INCLUDE_DIRECTORIES ${FlashMLA_INCLUDES} USE_SABI 3 WITH_SOABI) + + # Keep Stable ABI for the module, but *not* for CUDA/C++ files. + # This prevents Py_LIMITED_API from affecting nvcc and C++ compiles. + target_compile_options(_flashmla_C PRIVATE + $<$:-UPy_LIMITED_API> + $<$:-UPy_LIMITED_API>) + + define_gpu_extension_target( + _flashmla_extension_C + DESTINATION vllm + LANGUAGE ${VLLM_GPU_LANG} + SOURCES ${FlashMLA_Extension_SOURCES} + COMPILE_FLAGS ${VLLM_FLASHMLA_GPU_FLAGS} + ARCHITECTURES ${VLLM_GPU_ARCHES} + INCLUDE_DIRECTORIES ${FlashMLA_Extension_INCLUDES} + USE_SABI 3 + WITH_SOABI) + + # Keep Stable ABI for the module, but *not* for CUDA/C++ files. + # This prevents Py_LIMITED_API from affecting nvcc and C++ compiles. + target_compile_options(_flashmla_extension_C PRIVATE + $<$:-UPy_LIMITED_API> + $<$:-UPy_LIMITED_API>) else() - # Create an empty target for setup.py when not targeting sm90a systems + # Create empty targets for setup.py when not targeting sm90a systems add_custom_target(_flashmla_C) + add_custom_target(_flashmla_extension_C) endif() diff --git a/csrc/cache.h b/csrc/cache.h index fd230bec27fc..427bd0d54fac 100644 --- a/csrc/cache.h +++ b/csrc/cache.h @@ -56,3 +56,11 @@ void cp_gather_cache( torch::Tensor const& block_table, // [BATCH, BLOCK_INDICES] torch::Tensor const& cu_seq_lens, // [BATCH+1] int64_t batch_size, std::optional seq_starts = std::nullopt); + +// Indexer K quantization and cache function +void indexer_k_quant_and_cache( + torch::Tensor& k, // [num_tokens, head_dim] + torch::Tensor& kv_cache, // [num_blocks, block_size, cache_stride] + torch::Tensor& slot_mapping, // [num_tokens] + int64_t quant_block_size, // quantization block size + const std::string& scale_fmt); diff --git a/csrc/cache_kernels.cu b/csrc/cache_kernels.cu index 80b4c47c5547..b1c43163c6a5 100644 --- a/csrc/cache_kernels.cu +++ b/csrc/cache_kernels.cu @@ -16,6 +16,7 @@ #include #include +#include // FLT_MIN #include #include @@ -396,6 +397,176 @@ __global__ void concat_and_cache_mla_kernel( copy(k_pe, kv_cache, k_pe_stride, block_stride, pe_dim, kv_lora_rank); } +template +__global__ void concat_and_cache_ds_mla_kernel( + const scalar_t* __restrict__ kv_c, // [num_tokens, kv_lora_rank] + const scalar_t* __restrict__ k_pe, // [num_tokens, pe_dim] + cache_t* __restrict__ kv_cache, // [num_blocks, block_size, (kv_lora_rank + // + pe_dim)] + const int64_t* __restrict__ slot_mapping, // [num_tokens] + const int block_stride, // + const int entry_stride, // + const int kv_c_stride, // + const int k_pe_stride, // + const int kv_lora_rank, // + const int pe_dim, // + const int block_size, // + const float* scale // +) { + const int64_t token_idx = blockIdx.x; + const int64_t slot_idx = slot_mapping[token_idx]; + // NOTE: slot_idx can be -1 if the token is padded + if (slot_idx < 0) { + return; + } + const int64_t block_idx = slot_idx / block_size; + const int64_t block_offset = slot_idx % block_size; + const int64_t dst_idx_start = + block_idx * block_stride + block_offset * entry_stride; + + // Create 4 tile scales in shared memory + __shared__ float smem[20]; + float* shard_abs_max = smem; + float* tile_scales = smem + 16; + + // For the NoPE part, each tile of 128 elements is handled by 4 warps + // (128 threads). There are 4 total tiles, so 16 warps (512 threads). + // The first thread of the first warp in each tile writes the scale + // value for the tile. The RoPE part (last 64 elements) is handled + // by another 2 warps (64 threads). + // So in total, we use 18 warps (576 threads) per block. + + // Cast kv_cache to 16_bit for RoPE values + scalar_t* kv_cache_16bit = + reinterpret_cast(&kv_cache[dst_idx_start]); + + // The last 64 threads handle the RoPE part + if (threadIdx.x >= kv_lora_rank) { + const int8_t pe_idx = threadIdx.x - kv_lora_rank; + const int64_t src_idx = token_idx * k_pe_stride + pe_idx; + // RoPE values start after the packed 8-bit NoPE values and the + // 32-bit scales + const int64_t dst_idx = kv_lora_rank / 2 + 8 + pe_idx; + kv_cache_16bit[dst_idx] = k_pe[src_idx]; + return; + } + + // Determine the scale for each chunk of NoPE + const int16_t tile_idx = threadIdx.x >> 7; + const int16_t warp_idx = (threadIdx.x & 127) >> 5; + const int16_t lane_idx = threadIdx.x & 31; + + // Load the NoPE element for this thread into registers + const int64_t src_idx = token_idx * kv_c_stride + threadIdx.x; + const scalar_t src_val = kv_c[src_idx]; + + // Warp-level reduction to find the max absolute value in the warp + float max_abs = fabsf(src_val); +#pragma unroll + for (int offset = 16; offset > 0; offset /= 2) { +#ifdef USE_ROCM + max_abs = fmaxf(max_abs, __shfl_down_sync(UINT64_MAX, max_abs, offset)); +#else + max_abs = fmaxf(max_abs, __shfl_down_sync(0xFFFFFFFF, max_abs, offset)); +#endif + } + + // The first lane of each warp in each tile writes the max_abs of this part + // of the tile to shared memory + if (lane_idx == 0) { + shard_abs_max[tile_idx * 4 + warp_idx] = max_abs; + } + __syncthreads(); + + // The first lane of the first warp in each tile computes the scale for the + // tile and writes it to shared memory and to kv_cache + if (warp_idx == 0 && lane_idx == 0) { + float4 shard_abs_max_vec = + reinterpret_cast(shard_abs_max)[tile_idx]; + float tile_scale = fmaxf(fmaxf(shard_abs_max_vec.x, shard_abs_max_vec.y), + fmaxf(shard_abs_max_vec.z, shard_abs_max_vec.w)) / + 448.f; + + // Avoid division by zero in `scaled_convert` + tile_scales[tile_idx] = fmaxf(tile_scale, FLT_MIN); + float* kv_cache_32bit = reinterpret_cast(&kv_cache[dst_idx_start]); + const uint64_t dst_idx = kv_lora_rank / 4 + tile_idx; + kv_cache_32bit[dst_idx] = tile_scales[tile_idx]; + } + + __syncthreads(); + + // Now all threads in the block scale and write their element + const float scale_val = tile_scales[tile_idx]; + const int64_t dst_idx = dst_idx_start + threadIdx.x; + kv_cache[dst_idx] = + fp8::scaled_convert( + src_val, scale_val); +} + +template +__global__ void indexer_k_quant_and_cache_kernel( + const scalar_t* __restrict__ k, // [num_tokens, head_dim] + cache_t* __restrict__ kv_cache, // [num_blocks, block_size, cache_stride] + const int64_t* __restrict__ slot_mapping, // [num_tokens] + const int head_dim, // dimension of each head + const int quant_block_size, // quantization block size + const int cache_block_size, // cache block size + const int cache_stride, // stride for each token in kv_cache + const bool use_ue8m0 // use ue8m0 scale format +) { + constexpr int VEC_SIZE = 4; + const int64_t token_idx = blockIdx.x; + const int64_t head_dim_idx = (blockIdx.y * blockDim.y * blockDim.x + + threadIdx.y * blockDim.x + threadIdx.x) * + VEC_SIZE; + const int64_t slot_idx = slot_mapping[token_idx]; + const int64_t block_idx = slot_idx / cache_block_size; + const int64_t block_offset = slot_idx % cache_block_size; + + // NOTE: slot_idx can be -1 if the token is padded + if (slot_idx < 0 || (head_dim_idx >= head_dim)) { + return; + } + + float2 k_val = (reinterpret_cast( + k))[(token_idx * head_dim + head_dim_idx) / VEC_SIZE]; + scalar_t* k_val_ptr = reinterpret_cast(&k_val); + float amax = 0.0f; + for (int i = 0; i < VEC_SIZE; i++) { + amax = fmaxf(amax, fabsf(float(k_val_ptr[i]))); + } + __syncwarp(); + + // Reduced amax + for (int mask = 16; mask > 0; mask /= 2) { +#ifdef USE_ROCM + amax = fmaxf(amax, __shfl_xor_sync(uint64_t(-1), amax, mask)); +#else + amax = fmaxf(amax, __shfl_xor_sync(unsigned(-1), amax, mask)); +#endif + } + __syncwarp(); + float scale = fmaxf(amax, 1e-4) / 448.0f; + if (use_ue8m0) { + scale = exp2f(ceilf(log2f(scale))); + } + + const int64_t dst_offset = block_idx * cache_block_size * cache_stride + + block_offset * head_dim + head_dim_idx; + for (int i = 0; i < VEC_SIZE; i++) { + kv_cache[dst_offset + i] = + fp8::scaled_convert(k_val_ptr[i], scale); + } + if (threadIdx.x == 0) { + const int64_t dst_scale_idx = + block_idx * cache_block_size * cache_stride + + cache_block_size * head_dim + + (block_offset * head_dim + head_dim_idx) * 4 / quant_block_size; + reinterpret_cast(kv_cache)[dst_scale_idx / 4] = scale; + } +} + } // namespace vllm // KV_T is the data type of key and value tensors. @@ -438,7 +609,7 @@ void reshape_and_cache( const cudaStream_t stream = at::cuda::getCurrentCUDAStream(); DISPATCH_BY_KV_CACHE_DTYPE(key.dtype(), kv_cache_dtype, - CALL_RESHAPE_AND_CACHE) + CALL_RESHAPE_AND_CACHE); } // KV_T is the data type of key and value tensors. @@ -509,6 +680,18 @@ void reshape_and_cache_flash( kv_c_stride, k_pe_stride, kv_lora_rank, pe_dim, block_size, \ reinterpret_cast(scale.data_ptr())); +// KV_T is the data type of key and value tensors. +// CACHE_T is the stored data type of kv-cache. +#define CALL_CONCAT_AND_CACHE_DS_MLA(KV_T, CACHE_T, KV_DTYPE) \ + vllm::concat_and_cache_ds_mla_kernel \ + <<>>( \ + reinterpret_cast(kv_c.data_ptr()), \ + reinterpret_cast(k_pe.data_ptr()), \ + reinterpret_cast(kv_cache.data_ptr()), \ + slot_mapping.data_ptr(), block_stride, entry_stride, \ + kv_c_stride, k_pe_stride, kv_lora_rank, pe_dim, block_size, \ + reinterpret_cast(scale.data_ptr())); + void concat_and_cache_mla( torch::Tensor& kv_c, // [num_tokens, kv_lora_rank] torch::Tensor& k_pe, // [num_tokens, pe_dim] @@ -531,20 +714,44 @@ void concat_and_cache_mla( int pe_dim = k_pe.size(1); int block_size = kv_cache.size(1); - TORCH_CHECK(kv_cache.size(2) == kv_lora_rank + pe_dim); + if (kv_cache_dtype == "fp8_ds_mla") { + TORCH_CHECK(kv_lora_rank == 512, "kv_lora_rank must be 512 for fp8_ds_mla"); + TORCH_CHECK(pe_dim == 64, "pe_dim must be 64 for fp8_ds_mla"); + TORCH_CHECK(kv_cache.size(2) == 656 / kv_cache.itemsize(), + "kv_cache.size(2) must be 656 bytes for fp8_ds_mla"); + TORCH_CHECK(kv_c.itemsize() == 2, + "kv_c.itemsize() must be 2 for fp8_ds_mla"); + TORCH_CHECK(k_pe.itemsize() == 2, + "k_pe.itemsize() must be 2 for fp8_ds_mla"); + } else { + TORCH_CHECK(kv_cache.size(2) == kv_lora_rank + pe_dim); + } int kv_c_stride = kv_c.stride(0); int k_pe_stride = k_pe.stride(0); int block_stride = kv_cache.stride(0); int entry_stride = kv_cache.stride(1); - dim3 grid(num_tokens); - dim3 block(std::min(kv_lora_rank, 512)); const at::cuda::OptionalCUDAGuard device_guard(device_of(kv_c)); const cudaStream_t stream = at::cuda::getCurrentCUDAStream(); - DISPATCH_BY_KV_CACHE_DTYPE(kv_c.dtype(), kv_cache_dtype, - CALL_CONCAT_AND_CACHE_MLA); + if (kv_cache_dtype == "fp8_ds_mla") { + dim3 grid(num_tokens); + // For the NoPE part, each tile of 128 elements is handled by 4 warps + // (128 threads). There are 4 total tiles, so 16 warps (512 threads). + // The first thread of the first warp in each tile writes the scale + // value for the tile. The RoPE part (last 64 elements) is handled + // by another 2 warps (64 threads). + // So in total, we use 18 warps (576 threads) per block. + dim3 block(576); + DISPATCH_BY_KV_CACHE_DTYPE(kv_c.dtype(), kv_cache_dtype, + CALL_CONCAT_AND_CACHE_DS_MLA); + } else { + dim3 grid(num_tokens); + dim3 block(std::min(kv_lora_rank, 512)); + DISPATCH_BY_KV_CACHE_DTYPE(kv_c.dtype(), kv_cache_dtype, + CALL_CONCAT_AND_CACHE_MLA); + } } namespace vllm { @@ -922,3 +1129,42 @@ void cp_gather_cache( TORCH_CHECK(false, "Unsupported data type width: ", dtype_bits); } } + +// Macro to dispatch the kernel based on the data type. +#define CALL_INDEXER_K_QUANT_AND_CACHE(KV_T, CACHE_T, KV_DTYPE) \ + vllm::indexer_k_quant_and_cache_kernel \ + <<>>( \ + reinterpret_cast(k.data_ptr()), \ + reinterpret_cast(kv_cache.data_ptr()), \ + slot_mapping.data_ptr(), head_dim, quant_block_size, \ + cache_block_size, cache_stride, use_ue8m0); + +void indexer_k_quant_and_cache( + torch::Tensor& k, // [num_tokens, head_dim] + torch::Tensor& kv_cache, // [num_blocks, block_size, cache_stride] + torch::Tensor& slot_mapping, // [num_tokens] + int64_t quant_block_size, // quantization block size + const std::string& scale_fmt) { + int num_tokens = k.size(0); + int head_dim = k.size(1); + int cache_block_size = kv_cache.size(1); + int cache_stride = kv_cache.size(2); + bool use_ue8m0 = scale_fmt == "ue8m0"; + + TORCH_CHECK(k.device() == kv_cache.device(), + "k and kv_cache must be on the same device"); + TORCH_CHECK(k.device() == slot_mapping.device(), + "k and slot_mapping must be on the same device"); + TORCH_CHECK(head_dim % quant_block_size == 0, + "head_dim must be divisible by quant_block_size"); + + constexpr int vec_size = 4; + dim3 grid(num_tokens, (head_dim + quant_block_size * vec_size - 1) / + (quant_block_size * vec_size)); + dim3 block(32, vec_size); + const at::cuda::OptionalCUDAGuard device_guard(device_of(k)); + const cudaStream_t stream = at::cuda::getCurrentCUDAStream(); + + DISPATCH_BY_KV_CACHE_DTYPE(k.dtype(), "fp8_e4m3", + CALL_INDEXER_K_QUANT_AND_CACHE); +} \ No newline at end of file diff --git a/csrc/quantization/fp8/nvidia/quant_utils.cuh b/csrc/quantization/fp8/nvidia/quant_utils.cuh index 5b9c2df8468c..5361a8b1a598 100644 --- a/csrc/quantization/fp8/nvidia/quant_utils.cuh +++ b/csrc/quantization/fp8/nvidia/quant_utils.cuh @@ -576,6 +576,17 @@ __inline__ __device__ Tout scaled_convert(const Tin& x, const float scale) { TORCH_CHECK(false, \ "Unsupported input type of kv cache: ", SRC_DTYPE); \ } \ + } else if (KV_DTYPE == "fp8_ds_mla") { \ + if (SRC_DTYPE == at::ScalarType::Float) { \ + FN(float, uint8_t, vllm::Fp8KVCacheDataType::kFp8E4M3); \ + } else if (SRC_DTYPE == at::ScalarType::Half) { \ + FN(uint16_t, uint8_t, vllm::Fp8KVCacheDataType::kFp8E4M3); \ + } else if (SRC_DTYPE == at::ScalarType::BFloat16) { \ + FN(__nv_bfloat16, uint8_t, vllm::Fp8KVCacheDataType::kFp8E4M3); \ + } else { \ + TORCH_CHECK(false, \ + "Unsupported input type of kv cache: ", SRC_DTYPE); \ + } \ } else { \ TORCH_CHECK(false, "Unsupported data type of kv cache: ", KV_DTYPE); \ } \ diff --git a/csrc/torch_bindings.cpp b/csrc/torch_bindings.cpp index bc096406c51a..ebd28e735088 100644 --- a/csrc/torch_bindings.cpp +++ b/csrc/torch_bindings.cpp @@ -713,6 +713,13 @@ TORCH_LIBRARY_EXPAND(CONCAT(TORCH_EXTENSION_NAME, _cache_ops), cache_ops) { "cp_gather_cache(Tensor src_cache, Tensor! dst, Tensor block_table, " "Tensor cu_seq_lens, int batch_size, Tensor? seq_starts) -> ()"); cache_ops.impl("cp_gather_cache", torch::kCUDA, &cp_gather_cache); + + cache_ops.def( + "indexer_k_quant_and_cache(Tensor k, Tensor! kv_cache, Tensor " + "slot_mapping, " + "int quant_block_size, str kv_cache_dtype) -> ()"); + cache_ops.impl("indexer_k_quant_and_cache", torch::kCUDA, + &indexer_k_quant_and_cache); } TORCH_LIBRARY_EXPAND(CONCAT(TORCH_EXTENSION_NAME, _cuda_utils), cuda_utils) { diff --git a/setup.py b/setup.py index a8fec8a028d0..5491046991ca 100644 --- a/setup.py +++ b/setup.py @@ -322,6 +322,8 @@ def extract_precompiled_and_patch_package(wheel_url_or_path: str) -> dict: "vllm/_C.abi3.so", "vllm/_moe_C.abi3.so", "vllm/_flashmla_C.abi3.so", + "vllm/_flashmla_extension_C.abi3.so", + "vllm/_sparse_flashmla_C.abi3.so", "vllm/vllm_flash_attn/_vllm_fa2_C.abi3.so", "vllm/vllm_flash_attn/_vllm_fa3_C.abi3.so", "vllm/cumem_allocator.abi3.so", @@ -589,6 +591,8 @@ def _read_requirements(filename: str) -> list[str]: # not targeting a hopper system ext_modules.append( CMakeExtension(name="vllm._flashmla_C", optional=True)) + ext_modules.append( + CMakeExtension(name="vllm._flashmla_extension_C", optional=True)) ext_modules.append(CMakeExtension(name="vllm.cumem_allocator")) if _build_custom_ops(): diff --git a/tests/compile/test_fusion_attn.py b/tests/compile/test_fusion_attn.py index 6c2679ccfc81..eb8c49135428 100644 --- a/tests/compile/test_fusion_attn.py +++ b/tests/compile/test_fusion_attn.py @@ -191,7 +191,6 @@ def __init__(self, num_qo_heads: int, num_kv_heads: int, head_size: int, num_kv_heads=self.num_kv_heads, head_size=self.head_size, dtype=self.kv_cache_dtype, - use_mla=False, ), layer_names=[self.attn.layer_name], vllm_config=self.vllm_config, diff --git a/tests/kernels/attention/test_cache.py b/tests/kernels/attention/test_cache.py index 1325e6883132..6e096a4c3999 100644 --- a/tests/kernels/attention/test_cache.py +++ b/tests/kernels/attention/test_cache.py @@ -593,6 +593,119 @@ def test_concat_and_cache_mla( torch.testing.assert_close(kv_cache, ref_kv_cache) +@pytest.mark.parametrize("kv_lora_rank", KV_LORA_RANKS) +@pytest.mark.parametrize("qk_rope_head_dim", QK_ROPE_HEAD_DIMS) +@pytest.mark.parametrize("num_tokens", NUM_TOKENS_MLA) +@pytest.mark.parametrize("block_size", BLOCK_SIZES_MLA) +@pytest.mark.parametrize("num_blocks", NUM_BLOCKS_MLA) +@pytest.mark.parametrize("dtype", DTYPES) +@pytest.mark.parametrize("seed", SEEDS) +@pytest.mark.parametrize("device", CUDA_DEVICES) +@torch.inference_mode() +def test_concat_and_cache_ds_mla( + kv_lora_rank: int, + qk_rope_head_dim: int, + num_tokens: int, + block_size: int, + num_blocks: int, + dtype: torch.dtype, + seed: int, + device: str, +) -> None: + if dtype.itemsize != 2: + pytest.skip("ds_mla only supports 16-bit input") + kv_cache_dtype = "fp8_ds_mla" + current_platform.seed_everything(seed) + torch.set_default_device(device) + + total_slots = num_blocks * block_size + slot_mapping_lst = random.sample(range(total_slots), num_tokens) + slot_mapping = torch.tensor(slot_mapping_lst, + dtype=torch.long, + device=device) + + kv_c = torch.randn(num_tokens, kv_lora_rank, dtype=dtype, device=device) + k_pe = torch.randn(num_tokens, + qk_rope_head_dim, + dtype=dtype, + device=device) + entry_size = kv_lora_rank + (4 * 4) + (2 * qk_rope_head_dim) + + scale = torch.tensor(1.0, dtype=torch.float32, device=device) + kv_cache = _create_mla_cache(num_blocks, + block_size, + entry_size, + dtype=torch.uint8, + kv_cache_dtype=kv_cache_dtype, + device=device) + + ref_cache = torch.zeros_like(kv_cache, dtype=kv_cache.dtype) + tile_data = torch.zeros(128, dtype=dtype, device=device) + + for i in range(num_tokens): + slot = slot_mapping[i].item() + block_idx = slot // block_size + block_offset = slot % block_size + + ref_cache_slice = ref_cache[block_idx, block_offset] + ref_cache_16bit = ref_cache_slice.view(dtype) + ref_cache_32bit = ref_cache_slice.view(torch.float32) + + kv_c_data = kv_c[i] + for tile_idx in range(4): + tile_start = tile_idx * 128 + tile_end = (tile_idx + 1) * 128 + tile_data[:] = kv_c_data[tile_start:tile_end] + + # tile_scale = tile_data.amax().to(torch.float32) / 448. + # NOTE: Using torch's amax() gives different results, + # so this must be manually computed. + tile_data_float = tile_data.to(torch.float32) + manual_max = abs(tile_data_float[0]) + for j in range(1, 128): + manual_max = max(manual_max, abs(tile_data_float[j])) + tile_scale = manual_max / 448. + + ref_cache_32bit[kv_lora_rank // 4 + tile_idx] = tile_scale + + ops.convert_fp8(ref_cache_slice[tile_start:tile_end], + tile_data, + tile_scale.item(), + kv_dtype="fp8") + + for j in range(qk_rope_head_dim): + ref_cache_16bit[kv_lora_rank // 2 + 8 + j] = k_pe[i, j] + + opcheck( + torch.ops._C_cache_ops.concat_and_cache_mla, + (kv_c, k_pe, kv_cache, slot_mapping, kv_cache_dtype, scale), + test_utils=DEFAULT_OPCHECK_TEST_UTILS, + ) + + ops.concat_and_cache_mla(kv_c, k_pe, kv_cache, slot_mapping, + kv_cache_dtype, scale) + + for i in range(num_tokens): + slot = slot_mapping[i].item() + block_idx = slot // block_size + block_offset = slot % block_size + kv_cache_slice = kv_cache[block_idx, block_offset] + ref_cache_slice = ref_cache[block_idx, block_offset] + + kv_nope = kv_cache_slice[:kv_lora_rank] + ref_nope = ref_cache_slice[:kv_lora_rank] + kv_scales = kv_cache_slice.view(torch.float32)[kv_lora_rank // + 4:kv_lora_rank // 4 + 4] + ref_scales = ref_cache_slice.view( + torch.float32)[kv_lora_rank // 4:kv_lora_rank // 4 + 4] + kv_rope = kv_cache_slice.view(dtype)[kv_lora_rank // 2 + 8:] + ref_rope = ref_cache_slice.view(dtype)[kv_lora_rank // 2 + 8:] + + torch.testing.assert_close(kv_nope, ref_nope, atol=0.001, rtol=0.1) + torch.testing.assert_close(kv_scales, ref_scales, atol=0.001, rtol=0.1) + torch.testing.assert_close(kv_rope, ref_rope, atol=0.001, rtol=0.1) + + @pytest.mark.parametrize("kv_lora_rank", KV_LORA_RANKS) @pytest.mark.parametrize("qk_rope_head_dim", QK_ROPE_HEAD_DIMS) @pytest.mark.parametrize("block_size", BLOCK_SIZES_MLA) diff --git a/tests/kernels/attention/test_deepgemm_attention.py b/tests/kernels/attention/test_deepgemm_attention.py new file mode 100644 index 000000000000..2d901e408b27 --- /dev/null +++ b/tests/kernels/attention/test_deepgemm_attention.py @@ -0,0 +1,279 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project +import random + +import pytest +import torch + +from vllm.platforms import current_platform +from vllm.utils import cdiv, has_deep_gemm +from vllm.utils.deep_gemm import (_ceil_to_ue8m0, calc_diff, fp8_mqa_logits, + fp8_paged_mqa_logits, get_num_sms, + get_paged_mqa_logits_metadata) + + +def kv_cache_cast_to_fp8(x: torch.Tensor) -> torch.Tensor: + # x: (num_blocks, block_size, 1, head_dim) + num_blocks, block_size, num_heads, head_dim = x.shape + assert num_heads == 1 + x_amax = x.abs().float().amax(dim=3, keepdim=True).clamp(1e-4) + sf = x_amax / 448.0 + x_scaled = (x * (1.0 / sf)).to(torch.float8_e4m3fn) + x_fp8 = torch.empty( + (num_blocks, block_size * (head_dim + 4)), + device=x.device, + dtype=torch.uint8, + ) + x_fp8[:, :block_size * head_dim] = x_scaled.view( + num_blocks, block_size * head_dim).view(dtype=torch.uint8) + x_fp8[:, + block_size * head_dim:] = sf.view(num_blocks, + block_size).view(dtype=torch.uint8) + return x_fp8.view(num_blocks, block_size, num_heads, head_dim + 4) + + +def per_custom_dims_cast_to_fp8( + x: torch.Tensor, dims: tuple, + use_ue8m0: bool) -> tuple[torch.Tensor, torch.Tensor]: + excluded_dims = tuple([i for i in range(x.dim()) if i not in set(dims)]) + x_amax = x.abs().float().amax(dim=excluded_dims, keepdim=True).clamp(1e-4) + sf = x_amax / 448.0 + sf = _ceil_to_ue8m0(sf) if use_ue8m0 else sf + x_scaled = (x * (1.0 / sf)).to(torch.float8_e4m3fn) + return x_scaled, sf.squeeze() + + +def _generate_cp_test_data(seq_len: int, seq_len_kv: int): + assert seq_len_kv % seq_len == 0 and seq_len % 2 == 0 + chunk_size = seq_len // 2 + cp_size = seq_len_kv // seq_len + cp_id = cp_size // 3 + ks = torch.zeros(seq_len, dtype=torch.int, device="cuda") + ke = torch.zeros(seq_len, dtype=torch.int, device="cuda") + for i in range(chunk_size): + ke[i] = cp_id * chunk_size + i + ke[i + chunk_size] = (cp_size * 2 - 1 - cp_id) * chunk_size + i + return ks, ke + + +def _ref_fp8_mqa_logits( + q: torch.Tensor, + kv: torch.Tensor, + weights: torch.Tensor, + cu_seqlen_ks: torch.Tensor, + cu_seqlen_ke: torch.Tensor, +): + seq_len_kv = kv.shape[0] + + k = kv + q = q.float() + k = k.float() + + mask_lo = (torch.arange(0, seq_len_kv, device="cuda")[None, :] + >= cu_seqlen_ks[:, None]) + mask_hi = (torch.arange(0, seq_len_kv, device="cuda")[None, :] + < cu_seqlen_ke[:, None]) + mask = mask_lo & mask_hi + + score = torch.einsum("mhd,and->hmn", q, k) + logits = (score.relu() * weights.unsqueeze(-1).transpose(0, 1)).sum(dim=0) + logits = logits.masked_fill(~mask, float("-inf")) + + return logits + + +@pytest.mark.skipif(not current_platform.is_cuda(), reason="CUDA only") +@pytest.mark.skipif(not has_deep_gemm(), reason="DeepGEMM not available") +@pytest.mark.skipif(not current_platform.has_device_capability(90), + reason="SM90 and SM100 only") +def test_deepgemm_fp8_mqa_logits(): + torch.manual_seed(0) + random.seed(0) + num_heads, head_dim = 32, 128 + for seq_len in (512, ): + for seq_len_kv in (1024, ): + for disable_cp in (False, True): + q = torch.randn( + seq_len, + num_heads, + head_dim, + device="cuda", + dtype=torch.bfloat16, + ) + kv = torch.randn(seq_len_kv, + head_dim, + device="cuda", + dtype=torch.bfloat16) + weights = torch.randn(seq_len, + num_heads, + device="cuda", + dtype=torch.float32) + + if disable_cp: + ks = torch.zeros(seq_len, dtype=torch.int, device="cuda") + ke = torch.arange(seq_len, dtype=torch.int, + device="cuda") + (seq_len_kv - seq_len) + else: + ks, ke = _generate_cp_test_data(seq_len, seq_len_kv) + + q_fp8 = q.to(torch.float8_e4m3fn) + kv_fp8 = per_custom_dims_cast_to_fp8(kv, (0, ), False) + logits = fp8_mqa_logits(q_fp8, kv_fp8, weights, ks, ke) + + ref_logits = _ref_fp8_mqa_logits( + q=q, + kv=kv, + weights=weights, + cu_seqlen_ks=ks, + cu_seqlen_ke=ke, + ) + + ref_neginf_mask = ref_logits == float("-inf") + neginf_mask = logits == float("-inf") + assert torch.equal(neginf_mask, ref_neginf_mask) + + ref_logits = ref_logits.masked_fill(ref_neginf_mask, 0) + logits = logits.masked_fill(neginf_mask, 0) + diff = calc_diff(logits, ref_logits) + assert diff < 1e-3, f"{diff=}" + + +def _ref_fp8_paged_mqa_logits( + q: torch.Tensor, + kv_cache: torch.Tensor, + weights: torch.Tensor, + context_lens: torch.Tensor, + block_tables: torch.Tensor, + max_model_len: int, +): + batch_size, next_n, _, _ = q.size() + _, block_size, _, _ = kv_cache.size() + logits = torch.full( + [batch_size * next_n, max_model_len], + float("-inf"), + device=q.device, + dtype=torch.float32, + ) + context_lens_list = context_lens.tolist() + for i in range(batch_size): + context_len = context_lens_list[i] + q_offsets = torch.arange(context_len - next_n, + context_len, + device="cuda") + weight_slice = (weights[i * next_n:(i + 1) * next_n, :].transpose( + 0, 1).contiguous()) + for block_rk in range(cdiv(context_len, block_size)): + block_idx = block_tables[i][block_rk] + qx, kx = q[i], kv_cache[block_idx] + k_offsets = torch.arange( + block_rk * block_size, + (block_rk + 1) * block_size, + device="cuda", + ) + mask = (k_offsets[None, :] < context_len) & (k_offsets[None, :] + <= q_offsets[:, None]) + s = torch.where( + mask[None, :, :], + (qx.transpose(0, 1) @ kx.transpose(0, 1).transpose(1, 2)).to( + logits.dtype), + float("-inf"), + ) + s = torch.relu(s) * weight_slice[..., None] + s = s.sum(dim=0) + logits[ + i * next_n:(i + 1) * next_n, + block_rk * block_size:(block_rk + 1) * block_size, + ] = torch.where(k_offsets[None, :] <= q_offsets[:, None], s, + float("-inf")) + return logits + + +@pytest.mark.skipif(not current_platform.is_cuda(), reason="CUDA only") +@pytest.mark.skipif(not has_deep_gemm(), reason="DeepGEMM not available") +@pytest.mark.skipif(not current_platform.has_device_capability(90), + reason="SM90 and SM100 only") +def test_deepgemm_fp8_paged_mqa_logits(): + torch.manual_seed(0) + random.seed(0) + + max_model_len = 4096 + for batch_size, next_n in [(4, 1), (2, 2)]: + for heads, index_dim in [(32, 128)]: + for avg_kv in (2048, ): + num_blocks, blocksize = max_model_len * 2, 64 + + q = torch.randn( + (batch_size, next_n, heads, index_dim), + device="cuda", + dtype=torch.bfloat16, + ) + kv_cache = torch.randn( + (num_blocks, blocksize, 1, index_dim), + device="cuda", + dtype=torch.bfloat16, + ) + weights = torch.randn( + (batch_size * next_n, heads), + device="cuda", + dtype=torch.float32, + ) + + context_lens = (torch.randint(int(0.8 * avg_kv), + int(1.2 * avg_kv), + (batch_size, )).cuda().to( + torch.int32)) + max_block_len = ((context_lens.max().item() + blocksize - 1) // + blocksize * blocksize) + block_tables = torch.zeros( + (batch_size, max_block_len), + device="cuda", + dtype=torch.int32, + ) + + counter = 0 + block_idx_pool = list(range(num_blocks)) + random.shuffle(block_idx_pool) + for i in range(batch_size): + ctx_len = int(context_lens[i].item()) + for j in range((ctx_len + blocksize - 1) // blocksize): + block_tables[i][j] = block_idx_pool[counter] + counter += 1 + + q_fp8 = q.to(torch.float8_e4m3fn) + kv_cache_fp8 = kv_cache_cast_to_fp8(kv_cache) + + schedule_metadata = get_paged_mqa_logits_metadata( + context_lens, blocksize, get_num_sms()) + logits = fp8_paged_mqa_logits( + q_fp8, + kv_cache_fp8, + weights, + context_lens, + block_tables, + schedule_metadata, + max_model_len, + ) + + ref_logits = _ref_fp8_paged_mqa_logits( + q, + kv_cache, + weights, + context_lens, + block_tables, + max_model_len, + ) + + positions = (torch.arange(max_model_len, + device="cuda").unsqueeze(0).expand( + batch_size * next_n, -1)) + row_indices = ( + torch.arange(batch_size * next_n, device="cuda") // next_n) + next_n_offset = ( + torch.arange(batch_size * next_n, device="cuda") % next_n) + mask = positions <= (context_lens[row_indices] - next_n + + next_n_offset).unsqueeze(1) + + logits = logits.masked_fill(~mask, 0) + ref_logits = ref_logits.masked_fill(~mask, 0) + diff = calc_diff(logits, ref_logits) + assert diff < 1e-3, f"{diff=}" diff --git a/tests/kernels/attention/test_flashmla.py b/tests/kernels/attention/test_flashmla.py index abcfe828d5ac..bddd7e5c50ed 100644 --- a/tests/kernels/attention/test_flashmla.py +++ b/tests/kernels/attention/test_flashmla.py @@ -97,18 +97,16 @@ def test_flash_mla(b, s_q, mean_sk, h_q, h_kv, d, dv, block_size, causal, descale_k = None def flash_mla(): - return flash_mla_with_kvcache( - q, - blocked_k, - block_table, - cache_seqlens, - dv, - tile_scheduler_metadata, - num_splits, - causal=causal, - descale_q=descale_q, - descale_k=descale_k, - ) + return flash_mla_with_kvcache(q, + blocked_k, + block_table, + cache_seqlens, + dv, + tile_scheduler_metadata, + num_splits, + causal=causal, + descale_q=descale_q, + descale_k=descale_k) def scaled_dot_product_attention(query, key, value, is_causal=False): query = query.float() diff --git a/tests/kernels/attention/test_flashmla_sparse.py b/tests/kernels/attention/test_flashmla_sparse.py new file mode 100644 index 000000000000..9036e4e7800b --- /dev/null +++ b/tests/kernels/attention/test_flashmla_sparse.py @@ -0,0 +1,119 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project +import pytest +import torch + + +def _cuda_sm90_available() -> bool: + if not torch.cuda.is_available(): + return False + major, _ = torch.cuda.get_device_capability() + return major == 9 + + +def test_sparse_flashmla_metadata_smoke(): + import vllm.attention.ops.flashmla as fm + ok, reason = fm.is_flashmla_supported() + if not ok or not _cuda_sm90_available(): + pytest.skip(reason or "SM90 not available") + + device = torch.device("cuda") + batch_size = 1 + seqlen_q = 1 + num_heads_q = 128 + num_heads_k = 1 + q_seq_per_hk = seqlen_q * num_heads_q // num_heads_k + topk = 128 + + cache_seqlens = torch.zeros(batch_size, dtype=torch.int32, device=device) + + tile_md, num_splits = fm.get_mla_metadata(cache_seqlens, + q_seq_per_hk, + num_heads_k, + num_heads_q=num_heads_q, + topk=topk, + is_fp8_kvcache=True) + assert tile_md.dtype == torch.int32 + assert num_splits.dtype == torch.int32 + + +def test_sparse_flashmla_decode_smoke(): + import vllm.attention.ops.flashmla as fm + ok, reason = fm.is_flashmla_supported() + if not ok or not _cuda_sm90_available(): + pytest.skip(reason or "SM90 not available") + + device = torch.device("cuda") + batch_size = 1 + seqlen_q = 1 + num_heads_q = 1 + head_dim_k = 576 + head_dim_v = 512 + num_heads_k = 1 + page_block_size = 64 + bytes_per_token = 656 + topk = 128 + + # Metadata + q_seq_per_hk = seqlen_q * num_heads_q // num_heads_k + # q_heads_per_hk = num_heads_q // num_heads_k + cache_seqlens = torch.zeros(batch_size, dtype=torch.int32, device=device) + tile_md, num_splits = fm.get_mla_metadata(cache_seqlens, + q_seq_per_hk, + num_heads_k, + num_heads_q=num_heads_q, + topk=topk, + is_fp8_kvcache=True) + + # Inputs + q = torch.zeros((batch_size, seqlen_q, num_heads_q, head_dim_k), + dtype=torch.bfloat16, + device=device) + k_cache = torch.zeros((1, page_block_size, num_heads_k, bytes_per_token), + dtype=torch.uint8, + device=device) + indices = torch.zeros((batch_size, seqlen_q, topk), + dtype=torch.int32, + device=device) + + block_table = torch.zeros((batch_size, 128), + dtype=torch.int32, + device=device) + out, lse = fm.flash_mla_with_kvcache(q, + k_cache, + block_table, + cache_seqlens, + head_dim_v, + tile_md, + num_splits, + indices=indices, + is_fp8_kvcache=True) + assert out.shape[0] == batch_size + assert out.shape[-1] == head_dim_v + assert lse.shape[0] == batch_size + + +def test_sparse_flashmla_prefill_smoke(): + import vllm.attention.ops.flashmla as fm + ok, reason = fm.is_flashmla_supported() + if not ok or not _cuda_sm90_available(): + pytest.skip(reason or "SM90 not available") + + device = torch.device("cuda") + s_q = 1 + s_kv = 1 + h_q = 64 # kernel expects multiple of 64 + h_kv = 1 + d_qk = 576 + d_v = 512 + topk = 128 + + q = torch.zeros((s_q, h_q, d_qk), dtype=torch.bfloat16, device=device) + kv = torch.zeros((s_kv, h_kv, d_qk), dtype=torch.bfloat16, device=device) + indices = torch.zeros((s_q, h_kv, topk), dtype=torch.int32, device=device) + + out, max_logits, lse = fm.flash_mla_sparse_prefill(q, kv, indices, 1.0, + d_v) + assert out.shape == (s_q, h_q, d_v) + assert max_logits.shape == (s_q, h_q) + assert lse.shape == (s_q, h_q) diff --git a/tests/kernels/attention/test_pack_unpack_triton.py b/tests/kernels/attention/test_pack_unpack_triton.py new file mode 100644 index 000000000000..20c0b262b479 --- /dev/null +++ b/tests/kernels/attention/test_pack_unpack_triton.py @@ -0,0 +1,245 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project + +import torch +from torch.testing import assert_close + +from vllm.attention.ops.common import pack_seq_triton, unpack_seq_triton + + +def test_pack_seq_basic_fp8(): + """Test basic functionality of pack_seq_triton with fp8 and 3D tensors.""" + device = "cuda" + dtype = torch.float8_e4m3fn + + # Test cases with 3D tensors (N, H, D) + test_cases = [ + (6, 8, 4, 2, [3, 3]), # (6, 8, 4) -> (2, 3, 8, 4) + (10, 4, 8, 3, [2, 4, 4]), # (10, 4, 8) -> (3, 4, 4, 8) + (20, 16, 32, 4, [5, 5, 5, 5]), # (20, 16, 32) -> (4, 5, 16, 32) + ] + + for N, H, D, B, lengths_list in test_cases: + # Create input tensor with small values for fp8 + x = torch.randn(N, H, D, dtype=torch.float32, device=device) * 0.1 + x = x.to(dtype=dtype) + lengths = torch.tensor(lengths_list, device=device) + + # Pack the data + packed = pack_seq_triton(x, lengths) + + # Check output shape and properties + expected_shape = (B, max(lengths_list), H, D) + assert packed.shape == expected_shape + assert packed.dtype == dtype + assert packed.device == x.device + + # Check that valid data is preserved (within fp8 precision) + for b in range(B): + start_idx = sum(lengths_list[:b]) + seq_len = lengths_list[b] + + expected_data = x[start_idx:start_idx + seq_len].to(torch.float32) + actual_data = packed[b, :seq_len].to(torch.float32) + + assert_close(actual_data, expected_data, rtol=1e-1, atol=1e-2) + + +def test_pack_seq_custom_padding_fp8(): + """Test pack_seq_triton with custom padding values for fp8.""" + device = "cuda" + dtype = torch.float8_e4m3fn + N, H, D, B = 20, 8, 16, 2 + lengths = torch.tensor([10, 10], device=device) + + x = torch.randn(N, H, D, dtype=torch.float32, device=device) * 0.1 + x = x.to(dtype=dtype) + + # Test with different padding values + for pad_value in [-100.0, -10.0, 0.0, 10.0, 100.0]: + result = pack_seq_triton(x, lengths, pad_value=pad_value) + + # Check valid data + for b in range(B): + start_idx = b * 10 + expected_data = x[start_idx:start_idx + 10].to(torch.float32) + actual_data = result[b, :10].to(torch.float32) + assert_close(actual_data, expected_data, rtol=1e-1, atol=1e-2) + + # Check padding (fp8 has limited range, so check for large values) + padded_data = result[:, 10:].to(torch.float32) + if pad_value < 0: + assert torch.all(padded_data < -50) # Large negative values + elif pad_value > 0: + assert torch.all(padded_data > 50) # Large positive values + else: + assert torch.allclose(padded_data, + torch.zeros_like(padded_data), + atol=1e-2) + + +def test_pack_seq_default_negative_inf_padding_fp8(): + """Test that pack_seq_triton uses -inf padding by default for fp8.""" + device = "cuda" + dtype = torch.float8_e4m3fn + # B = 2 + N, H, D = 20, 8, 16 + lengths = torch.tensor([10, 10], device=device) + + x = torch.randn(N, H, D, dtype=torch.float32, device=device) * 0.1 + x = x.to(dtype=dtype) + result = pack_seq_triton(x, lengths) + + # Check that padding is large negative values (fp8 representation of -inf) + padded_data = result[:, 10:].to(torch.float32) + assert torch.all( + padded_data < -100) # fp8 -inf is represented as large negative number + + +def test_pack_seq_edge_cases_fp8(): + """Test pack_seq_triton with edge cases for fp8.""" + device = "cuda" + dtype = torch.float8_e4m3fn + + # Test with single batch element + x = torch.randn(10, 8, 16, dtype=torch.float32, device=device) * 0.1 + x = x.to(dtype=dtype) + lengths = torch.tensor([10], device=device) + result = pack_seq_triton(x, lengths) + assert result.shape == (1, 10, 8, 16) + + # Test with very short sequences + x = torch.randn(20, 4, 8, dtype=torch.float32, device=device) * 0.1 + x = x.to(dtype=dtype) + lengths = torch.tensor([1, 1, 1], device=device) + result = pack_seq_triton(x, lengths) + assert result.shape == (3, 1, 4, 8) + + # Test with different sequence lengths + x = torch.randn(15, 8, 16, dtype=torch.float32, device=device) * 0.1 + x = x.to(dtype=dtype) + lengths = torch.tensor([5, 7, 3], device=device) + result = pack_seq_triton(x, lengths) + assert result.shape == (3, 7, 8, 16) + + +def test_pack_seq_different_block_sizes_fp8(): + """Test pack_seq_triton with different block sizes for fp8.""" + device = "cuda" + dtype = torch.float8_e4m3fn + N, H, D, B = 100, 16, 32, 4 + lengths = torch.tensor([25, 25, 25, 25], device=device) + + x = torch.randn(N, H, D, dtype=torch.float32, device=device) * 0.1 + x = x.to(dtype=dtype) + + # Test different block sizes + for block_t, block_d in [(32, 32), (64, 64), (128, 128)]: + result = pack_seq_triton(x, lengths, block_t=block_t, block_d=block_d) + + assert result.shape == (B, 25, H, D) + + # Check that valid data is preserved (within fp8 precision) + for b in range(B): + start_idx = b * 25 + expected_data = x[start_idx:start_idx + 25].to(torch.float32) + actual_data = result[b, :25].to(torch.float32) + assert_close(actual_data, expected_data, rtol=1e-1, atol=1e-2) + + +def test_pack_seq_shape_consistency(): + """Test that pack_seq_triton maintains shape consistency.""" + device = "cuda" + dtype = torch.float8_e4m3fn + N, H, D, B = 20, 8, 16, 2 + lengths = torch.tensor([10, 10], device=device) + + x = torch.randn(N, H, D, dtype=torch.float32, device=device) * 0.1 + x = x.to(dtype=dtype) + + result = pack_seq_triton(x, lengths) + + # Check shape consistency + assert result.shape[0] == B # Batch dimension + assert result.shape[1] == lengths.max().item() # Max sequence length + assert result.shape[2:] == x.shape[1:] # Feature dimensions preserved + + +def test_pack_unpack_roundtrip_fp8(): + """Test that pack -> unpack gives us back the original data for fp8.""" + device = "cuda" + dtype = torch.float8_e4m3fn + + # Test cases with 3D tensors + test_cases = [ + (6, 8, 4, 2, [3, 3]), + (10, 4, 8, 3, [2, 4, 4]), + (20, 16, 32, 4, [5, 5, 5, 5]), + (15, 8, 16, 3, [7, 5, 3]), + ] + + for N, H, D, B, lengths_list in test_cases: + # Create input tensor with small values for fp8 + x = torch.randn(N, H, D, dtype=torch.float32, device=device) * 0.1 + x = x.to(dtype=dtype) + lengths = torch.tensor(lengths_list, device=device) + + # Pack the data + packed = pack_seq_triton(x, lengths) + + # Unpack the data + unpacked = unpack_seq_triton(packed, lengths) + + # Check that we get back the original data (within fp8 precision) + assert unpacked.shape == x.shape + x_f32 = x.to(torch.float32) + unpacked_f32 = unpacked.to(torch.float32) + assert_close(x_f32, unpacked_f32, rtol=1e-3, atol=1e-3) + + # Unpack without explicit start locations (computed in kernel) + unpacked_with_loc = unpack_seq_triton(packed, lengths) + assert_close(x_f32, + unpacked_with_loc.to(torch.float32), + rtol=1e-3, + atol=1e-2) + + +def test_unpack_seq_triton_edge_cases_fp8(): + """Test unpack function with edge cases for fp8.""" + device = "cuda" + dtype = torch.float8_e4m3fn + + # Test with single batch element + x = torch.randn(10, 8, 16, dtype=torch.float32, device=device) * 0.1 + x = x.to(dtype=dtype) + lengths = torch.tensor([10], device=device) + packed = pack_seq_triton(x, lengths) + unpacked = unpack_seq_triton(packed, lengths) + assert unpacked.shape == x.shape + assert_close(x.to(torch.float32), + unpacked.to(torch.float32), + rtol=1e-1, + atol=1e-2) + + # Test with very short sequences + x = torch.randn(20, 4, 8, dtype=torch.float32, device=device) * 0.1 + x = x.to(dtype=dtype) + lengths = torch.tensor([1, 1, 1], device=device) + packed = pack_seq_triton(x, lengths) + unpacked = unpack_seq_triton(packed, lengths) + # Only compare the first 3 elements that were actually packed + assert_close(x[:3].to(torch.float32), + unpacked.to(torch.float32), + rtol=1e-1, + atol=1e-2) + + x = torch.randn(15, 8, 16, dtype=torch.float32, device=device) * 0.1 + x = x.to(dtype=dtype) + lengths = torch.tensor([5, 7, 3], device=device) + packed = pack_seq_triton(x, lengths) + unpacked = unpack_seq_triton(packed, lengths) + assert unpacked.shape == x.shape + assert_close(x.to(torch.float32), + unpacked.to(torch.float32), + rtol=1e-1, + atol=1e-2) diff --git a/tests/models/registry.py b/tests/models/registry.py index 37ee474d3ecb..b7a2514d8bc0 100644 --- a/tests/models/registry.py +++ b/tests/models/registry.py @@ -207,6 +207,7 @@ def check_available_online( trust_remote_code=True), "DeepseekV3ForCausalLM": _HfExamplesInfo("deepseek-ai/DeepSeek-V3", # noqa: E501 trust_remote_code=True), + "DeepseekV32ForCausalLM": _HfExamplesInfo("deepseek-ai/DeepSeek-V3.2-Exp"), "Ernie4_5ForCausalLM": _HfExamplesInfo("baidu/ERNIE-4.5-0.3B-PT", min_transformers_version="4.54"), "Ernie4_5_MoeForCausalLM": _HfExamplesInfo("baidu/ERNIE-4.5-21B-A3B-PT", diff --git a/tests/models/test_initialization.py b/tests/models/test_initialization.py index e818b908e8a8..1db0dc3da922 100644 --- a/tests/models/test_initialization.py +++ b/tests/models/test_initialization.py @@ -8,7 +8,8 @@ from vllm import LLM from vllm.utils import GiB_bytes -from vllm.v1.core.kv_cache_utils import get_kv_cache_configs +from vllm.v1.core.kv_cache_utils import (generate_scheduler_kv_cache_config, + get_kv_cache_configs) from vllm.v1.engine.core import EngineCore as V1EngineCore from ..utils import create_new_process_for_each_test @@ -62,11 +63,13 @@ def can_initialize(model_arch: str, monkeypatch: pytest.MonkeyPatch, # Avoid calling model.forward() def _initialize_kv_caches_v1(self, vllm_config): kv_cache_specs = self.model_executor.get_kv_cache_specs() - scheduler_kv_cache_config = get_kv_cache_configs( + kv_cache_configs = get_kv_cache_configs( vllm_config, kv_cache_specs, [10 * GiB_bytes], - )[0] + ) + scheduler_kv_cache_config = generate_scheduler_kv_cache_config( + kv_cache_configs) # gpu_blocks (> 0), cpu_blocks, scheduler_kv_cache_config return 1, 0, scheduler_kv_cache_config diff --git a/tests/plugins/vllm_add_dummy_platform/vllm_add_dummy_platform/dummy_platform.py b/tests/plugins/vllm_add_dummy_platform/vllm_add_dummy_platform/dummy_platform.py index 8d0687b49bb4..30d721304b5c 100644 --- a/tests/plugins/vllm_add_dummy_platform/vllm_add_dummy_platform/dummy_platform.py +++ b/tests/plugins/vllm_add_dummy_platform/vllm_add_dummy_platform/dummy_platform.py @@ -26,5 +26,5 @@ def check_and_update_config(cls, vllm_config: VllmConfig) -> None: def get_attn_backend_cls(self, backend_name, head_size, dtype, kv_cache_dtype, block_size, use_v1, use_mla, - has_sink): + has_sink, use_sparse): return "vllm_add_dummy_platform.dummy_attention_backend.DummyAttentionBackend" # noqa E501 diff --git a/tests/v1/attention/test_mla_backends.py b/tests/v1/attention/test_mla_backends.py index d4829c64b5c6..228551573ba8 100644 --- a/tests/v1/attention/test_mla_backends.py +++ b/tests/v1/attention/test_mla_backends.py @@ -1,6 +1,7 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project """Tests for v1 MLA backends without GPUModelRunner dependency.""" +from typing import Optional, Union import pytest import torch @@ -10,6 +11,7 @@ create_standard_kv_cache_spec, create_vllm_config, get_attention_backend) +from vllm import _custom_ops as ops from vllm.utils import STR_DTYPE_TO_TORCH_DTYPE, cdiv from vllm.v1.attention.backends.utils import CommonAttentionMetadata from vllm.v1.kv_cache_interface import FullAttentionSpec @@ -78,7 +80,9 @@ def create_and_prepopulate_kv_cache( device: torch.device, num_blocks: int, common_attn_metadata: CommonAttentionMetadata, - randomize_blocks: bool = True) -> torch.Tensor: + randomize_blocks: bool = True, + kv_cache_dtype: Optional[str] = None, + scale: Union[float, torch.Tensor] = 1.0) -> torch.Tensor: """Create and prepopulate an MLA KV cache with context data. Args: @@ -93,6 +97,11 @@ def create_and_prepopulate_kv_cache( common_attn_metadata: Common attention metadata randomize_blocks: Whether to randomly permute blocks or use sequential order + kv_cache_dtype: Optional kv cache dtype string. When set to + "fp8_ds_mla" the cache is populated using the + fp8 DeepSeek MLA layout via concat_and_cache_mla. + scale: Scaling factor forwarded to concat_and_cache_mla when the + fp8 cache layout is requested. Returns: MLA KV cache tensor @@ -105,23 +114,61 @@ def create_and_prepopulate_kv_cache( block_table = common_attn_metadata.block_table_tensor slot_mapping = common_attn_metadata.slot_mapping - # Create MLA KV cache: (num_blocks, block_size, head_size) - kv_cache = torch.empty(num_blocks, - block_size, - head_size, - dtype=dtype, - device=device) - kv_cache_flat = kv_cache.view(-1, head_size) + use_fp8_ds_mla = kv_cache_dtype == "fp8_ds_mla" + + if use_fp8_ds_mla: + if not kv_c_contexts: + raise ValueError("kv_c_contexts cannot be empty when using" + " fp8_ds_mla cache dtype") + kv_lora_rank = kv_c_contexts[0].shape[-1] + rope_dim = k_pe_contexts[0].shape[-1] + entry_size = kv_lora_rank + 4 * 4 + 2 * rope_dim + kv_cache = torch.zeros(num_blocks, + block_size, + entry_size, + dtype=torch.uint8, + device=device) + scale_tensor = (scale + if isinstance(scale, torch.Tensor) else torch.tensor( + scale, dtype=torch.float32, device=device)) + scale_tensor = scale_tensor.to(device=device, dtype=torch.float32) + else: + # Create MLA KV cache: (num_blocks, block_size, head_size) + kv_cache = torch.empty(num_blocks, + block_size, + head_size, + dtype=dtype, + device=device) + kv_cache_flat = kv_cache.view(-1, head_size) # Populate the cache with the context tokens # Start from block_id=1 since block_id=0 is considered the null block start_block_idx = 1 for i in range(batch_size): kv_c_context, k_pe_context = kv_c_contexts[i], k_pe_contexts[i] - kv_context = torch.cat([kv_c_context, k_pe_context.squeeze(1)], dim=-1) + context_len = kv_c_context.shape[0] + if context_len == 0: + start_block_idx += cdiv(int(seq_lens[i]), block_size) + continue + start = start_block_idx * block_size - end = start + kv_context.shape[0] - kv_cache_flat[start:end, ...] = kv_context + + if use_fp8_ds_mla: + slots = torch.arange(context_len, device=device, + dtype=torch.long) + start + ops.concat_and_cache_mla( + kv_c_context, + k_pe_context.squeeze(1), + kv_cache, + slots, + kv_cache_dtype="fp8_ds_mla", + scale=scale_tensor, + ) + else: + kv_context = torch.cat( + [kv_c_context, k_pe_context.squeeze(1)], dim=-1) + end = start + kv_context.shape[0] + kv_cache_flat[start:end, ...] = kv_context # Stay block aligned and allocate enough blocks for the new tokens start_block_idx += cdiv(int(seq_lens[i]), block_size) diff --git a/tests/v1/attention/test_sparse_mla_backends.py b/tests/v1/attention/test_sparse_mla_backends.py new file mode 100644 index 000000000000..74eea6f716fe --- /dev/null +++ b/tests/v1/attention/test_sparse_mla_backends.py @@ -0,0 +1,426 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project +"""Unit tests for the FlashMLA sparse backend utilities.""" + +import math +from types import MethodType, SimpleNamespace + +import numpy as np +import pytest +import torch + +from tests.v1.attention.test_mla_backends import ( + BATCH_SPECS, BatchSpec, MockAttentionLayer, + create_and_prepopulate_kv_cache) +from tests.v1.attention.utils import (create_common_attn_metadata, + create_standard_kv_cache_spec, + create_vllm_config) +from vllm import _custom_ops as ops +from vllm.attention.ops import flashmla +from vllm.model_executor.layers.linear import ColumnParallelLinear +from vllm.utils import cdiv +from vllm.v1.attention.backends.mla.flashmla_sparse import ( + FlashMLASparseBackend, FlashMLASparseDecodeAndContextMetadata, + FlashMLASparseImpl, FlashMLASparseMetadata) + +SPARSE_BACKEND_BATCH_SPECS = { + name: BATCH_SPECS[name] + for name in [ + "mixed_small", + "mixed_medium", + "small_prefill", + "medium_prefill", + "single_prefill", + ] +} + +SPARSE_BACKEND_BATCH_SPECS["large_q_prefill"] = BatchSpec(seq_lens=[1024] * 2, + query_lens=[256] * 2) +SPARSE_BACKEND_BATCH_SPECS["large_q_pure_prefill"] = BatchSpec( + seq_lens=[256] * 2, query_lens=[256] * 2) + + +def _dequantize_fp8_ds_mla_entry( + cache_slice: torch.Tensor, kv_lora_rank: int, rope_dim: int, + dtype: torch.dtype) -> tuple[torch.Tensor, torch.Tensor]: + """Dequantize a single fp8_ds_mla cache entry back to latent + rope.""" + + # The first kv_lora_rank bytes store FP8 latent values with one scale per + # 128 element tile written as float32 right after the latent payload. + scales = cache_slice.view(torch.float32)[kv_lora_rank // + 4:kv_lora_rank // 4 + 4] + latent = torch.empty(kv_lora_rank, + dtype=torch.float16, + device=cache_slice.device) + for tile_idx in range(4): + tile_start = tile_idx * 128 + tile_end = tile_start + 128 + ops.convert_fp8(latent[tile_start:tile_end], + cache_slice[tile_start:tile_end], + float(scales[tile_idx].item()), + kv_dtype="fp8") + latent = latent.to(dtype) + + rope_offset = kv_lora_rank // 2 + 8 + rope_vals = cache_slice.view(dtype)[rope_offset:rope_offset + rope_dim] + return latent, rope_vals.clone() + + +def _quantize_dequantize_fp8_ds_mla( + kv_c: torch.Tensor, k_pe: torch.Tensor, block_size: int, + scale: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]: + """Round-trip kv_c/k_pe though the fp8_ds_mla cache layout.""" + + if kv_c.numel() == 0: + return kv_c.clone(), k_pe.clone() + + kv_lora_rank = kv_c.shape[-1] + rope_dim = k_pe.shape[-1] + num_tokens = kv_c.shape[0] + num_blocks = max(1, math.ceil(num_tokens / block_size)) + entry_size = kv_lora_rank + 4 * 4 + 2 * rope_dim + + tmp_cache = torch.zeros(num_blocks, + block_size, + entry_size, + dtype=torch.uint8, + device=kv_c.device) + slot_mapping = torch.arange(num_tokens, + dtype=torch.long, + device=kv_c.device) + + ops.concat_and_cache_mla(kv_c, + k_pe, + tmp_cache, + slot_mapping, + kv_cache_dtype="fp8_ds_mla", + scale=scale) + + dequant_kv_c = torch.empty_like(kv_c) + dequant_k_pe = torch.empty_like(k_pe) + + for token_idx in range(num_tokens): + slot = slot_mapping[token_idx].item() + block_idx = slot // block_size + block_offset = slot % block_size + cache_slice = tmp_cache[block_idx, block_offset] + latent, rope_vals = _dequantize_fp8_ds_mla_entry( + cache_slice, kv_lora_rank, rope_dim, kv_c.dtype) + dequant_kv_c[token_idx] = latent + dequant_k_pe[token_idx] = rope_vals + + return dequant_kv_c, dequant_k_pe + + +def test_sparse_backend_metadata_registration(): + backend = FlashMLASparseBackend + + assert backend.get_name() == "FLASHMLA_SPARSE_VLLM_V1" + assert backend.get_metadata_cls() is FlashMLASparseMetadata + assert backend.get_impl_cls() is FlashMLASparseImpl + + dtype_list = backend.get_supported_dtypes() + assert torch.bfloat16 in dtype_list + + shape = backend.get_kv_cache_shape(num_blocks=2, + block_size=64, + num_kv_heads=1, + head_size=576) + assert shape == (2, 64, 576) + + +def test_sparse_decode_metadata_filters_prefill_indices(): + prefill_context_lengths = torch.tensor([4, 2], dtype=torch.int32) + metadata = FlashMLASparseDecodeAndContextMetadata( + scheduler_metadata=torch.tensor([[0]], dtype=torch.int32), + num_splits=torch.tensor([1, 1], dtype=torch.int32), + cache_lens=torch.tensor([10, 12], dtype=torch.int32), + prefill_context_lengths=prefill_context_lengths, + ) + + indices = torch.tensor([[0, 3, 5], [1, 2, 4]], dtype=torch.int32) + + context_indices, new_token_indices = metadata.filter_prefill_indices( + indices) + + expected_context = torch.tensor([[-1, -1, 5], [-1, -1, 4]], + dtype=torch.int32) + expected_new_tokens = torch.tensor([[-1, -1, 1], [-1, 0, 2]], + dtype=torch.int32) + + assert torch.equal(context_indices, expected_context) + assert torch.equal(new_token_indices, expected_new_tokens) + + +def test_sparse_impl_zero_fills_when_metadata_missing(): + impl = FlashMLASparseImpl.__new__(FlashMLASparseImpl) + dummy_layer = object() + q = torch.zeros((2, 1, 3)) + k_c = torch.zeros((2, 3)) + k_pe = torch.zeros((2, 1, 1)) + kv_cache = torch.zeros((1, 1, 1)) + output = torch.ones((2, 4)) + + result = FlashMLASparseImpl.forward(impl, + dummy_layer, + q, + k_c, + k_pe, + kv_cache, + attn_metadata=None, + output=output) + + assert result is output + assert torch.all(result == 0) + + +@pytest.mark.parametrize("batch_name", list(SPARSE_BACKEND_BATCH_SPECS.keys())) +@pytest.mark.parametrize("kv_cache_dtype", ["fp8_ds_mla", "auto"]) +def test_sparse_backend_decode_correctness(dist_init, batch_name, + kv_cache_dtype): + if not torch.cuda.is_available(): + pytest.skip("CUDA is required for sparse MLA decode test") + + device = torch.device("cuda") + dtype = torch.bfloat16 + + batch_spec = SPARSE_BACKEND_BATCH_SPECS[batch_name] + + # Model hyper-parameters (kept intentionally small for the unit test) + num_heads = 128 + kv_lora_rank = 512 + qk_nope_head_dim = 128 + qk_rope_head_dim = 64 + v_head_dim = 128 + head_size = kv_lora_rank + qk_rope_head_dim + topk_tokens = 2048 + + max_seqlen = max(batch_spec.seq_lens) + total_cache_tokens = sum(batch_spec.seq_lens) + block_size = 64 + + vllm_config = create_vllm_config( + model_name="deepseek-ai/DeepSeek-V2-Lite-Chat", + max_model_len=max_seqlen, + num_gpu_blocks=max(2048, + cdiv(total_cache_tokens, block_size) + 1), + block_size=block_size) + model_config = vllm_config.model_config + model_config.hf_config = SimpleNamespace( + attn_module_list_cfg=[{ + "topk_tokens": topk_tokens + }]) + model_config.hf_text_config = SimpleNamespace( + q_lora_rank=None, + kv_lora_rank=kv_lora_rank, + qk_nope_head_dim=qk_nope_head_dim, + qk_rope_head_dim=qk_rope_head_dim, + v_head_dim=v_head_dim, + model_type="deepseek_v2", + ) + model_config.dtype = dtype + model_config.get_num_attention_heads = MethodType( + lambda self, parallel_config: num_heads, model_config) + model_config.get_num_kv_heads = MethodType(lambda self, parallel_config: 1, + model_config) + model_config.get_head_size = MethodType(lambda self: head_size, + model_config) + model_config.get_sliding_window = MethodType(lambda self: None, + model_config) + + kv_cache_spec = create_standard_kv_cache_spec(vllm_config) + + torch.manual_seed(0) + + scale = 1.0 / math.sqrt(head_size) + + # Shared MLA projection weights to keep reference and backend in sync + W_UK = torch.randn(kv_lora_rank, + num_heads, + qk_nope_head_dim, + dtype=dtype, + device=device) + W_UV = torch.randn(kv_lora_rank, + num_heads, + v_head_dim, + dtype=dtype, + device=device) + + # Build synthetic decode-only workload + seq_lens = batch_spec.seq_lens + query_lens = batch_spec.query_lens + + all_q_vllm, all_kv_c_vllm, all_k_pe_vllm = [], [], [] + kv_c_contexts, k_pe_contexts = [], [] + reference_outputs = [] + + kv_cache_scale = torch.tensor(1.0, dtype=torch.float32, device=device) + + for i in range(batch_spec.batch_size): + s_len = seq_lens[i] + q_len = query_lens[i] + ctx_len = s_len - q_len + + q_c = torch.rand(q_len, + num_heads, + qk_nope_head_dim + qk_rope_head_dim, + dtype=dtype, + device=device) + kv_c_full = torch.rand(s_len, kv_lora_rank, dtype=dtype, device=device) + k_pe_full = torch.rand(s_len, + 1, + qk_rope_head_dim, + dtype=dtype, + device=device) + + kv_c_full, k_pe_full = _quantize_dequantize_fp8_ds_mla( + kv_c_full, + k_pe_full.squeeze(1), + block_size=vllm_config.cache_config.block_size, + scale=kv_cache_scale, + ) + + q_nope, q_pe = q_c.split([qk_nope_head_dim, qk_rope_head_dim], dim=-1) + ql_nope = torch.einsum("qnh,lnh->qnl", q_nope, W_UK) + q_mqa = torch.cat([ql_nope, q_pe], dim=-1) + + k_mqa = torch.cat([kv_c_full, k_pe_full], dim=-1) + k_mqa = k_mqa.unsqueeze(1).expand(-1, num_heads, -1) + v_mqa = kv_c_full.unsqueeze(1).expand(-1, num_heads, -1) + + attn_mask = torch.ones(q_len, s_len, dtype=torch.bool, device=device) + causal_mask = torch.tril(torch.ones(q_len, q_len, device=device)) + attn_mask[:, ctx_len:] = causal_mask + + q_sdpa_in = q_mqa.unsqueeze(0).transpose(1, 2) + k_sdpa_in = k_mqa.unsqueeze(0).transpose(1, 2) + v_sdpa_in = v_mqa.unsqueeze(0).transpose(1, 2) + + sdpa_out = torch.nn.functional.scaled_dot_product_attention( + q_sdpa_in, k_sdpa_in, v_sdpa_in, attn_mask=attn_mask, scale=scale) + sdpa_out = sdpa_out.transpose(1, 2).squeeze(0) + + sdpa_out = torch.einsum("qnl,lnv->qnv", sdpa_out, W_UV) + reference_outputs.append(sdpa_out.flatten(start_dim=-2)) + + all_q_vllm.append(q_c) + all_kv_c_vllm.append(kv_c_full[ctx_len:]) + all_k_pe_vllm.append(k_pe_full[ctx_len:]) + kv_c_contexts.append(kv_c_full[:ctx_len + 1]) + k_pe_contexts.append(k_pe_full[:ctx_len + 1]) + + query_vllm = torch.cat(all_q_vllm, dim=0) + kv_c_vllm = torch.cat(all_kv_c_vllm, dim=0) + k_pe_vllm = torch.cat(all_k_pe_vllm, dim=0) + sdpa_reference = torch.cat(reference_outputs, dim=0) + + vllm_config.cache_config.cache_dtype = kv_cache_dtype + + common_attn_metadata = create_common_attn_metadata( + batch_spec, + vllm_config.cache_config.block_size, + device, + arange_block_indices=True) + + kv_cache = create_and_prepopulate_kv_cache( + kv_c_contexts=kv_c_contexts, + k_pe_contexts=k_pe_contexts, + block_size=vllm_config.cache_config.block_size, + head_size=head_size, + dtype=dtype, + device=device, + num_blocks=vllm_config.cache_config.num_gpu_blocks, + common_attn_metadata=common_attn_metadata, + randomize_blocks=False, + kv_cache_dtype=vllm_config.cache_config.cache_dtype, + scale=kv_cache_scale, + ) + + builder_cls = FlashMLASparseBackend.get_builder_cls() + builder = builder_cls(kv_cache_spec, ["placeholder"], vllm_config, device) + metadata = builder.build(common_prefix_len=0, + common_attn_metadata=common_attn_metadata) + + starts = np.asarray(common_attn_metadata.query_start_loc_cpu, + dtype=np.int32) + seg_lengths = np.diff(starts) + positions = np.arange(starts[-1], dtype=np.int32) - np.repeat( + starts[:-1], seg_lengths) + seq_lengths = np.asarray(common_attn_metadata.seq_lens_cpu, dtype=np.int32) + prefix_lengths = seq_lengths - seg_lengths + positions += np.repeat(prefix_lengths, seg_lengths) + + pos_gpu = torch.as_tensor(positions, device=device, dtype=torch.int32) + topk = metadata.topk_tokens + debug_indices = torch.arange(topk, device=device, + dtype=torch.int32).unsqueeze(0) + token_positions = pos_gpu.unsqueeze(1) + causal_mask = (debug_indices <= token_positions) + debug_indices = torch.where(causal_mask, debug_indices, + torch.full_like(debug_indices, -1)) + + # FlashMLASparseImpl now reads top-k indices from the indexer-provided + # buffer, so emulate that contract with a simple namespace mock. + debug_indices = debug_indices.expand(metadata.num_actual_tokens, + -1).clone() + mock_indexer = SimpleNamespace(topk_indices_buffer=debug_indices) + + ok, reason = flashmla.is_flashmla_supported() + if not ok: + pytest.skip(reason) + + kv_b_proj_weight = torch.cat([W_UK, W_UV], dim=-1) + kv_b_proj_weight = kv_b_proj_weight.view( + kv_lora_rank, num_heads * (qk_nope_head_dim + v_head_dim)) + + mock_kv_b_proj = ColumnParallelLinear(input_size=kv_lora_rank, + output_size=num_heads * + (qk_nope_head_dim + v_head_dim), + bias=False).to(device=device, + dtype=dtype) + mock_kv_b_proj.weight = torch.nn.Parameter(kv_b_proj_weight.T.contiguous()) + + impl_cls = FlashMLASparseBackend.get_impl_cls() + impl = impl_cls(num_heads=num_heads, + head_size=head_size, + scale=scale, + num_kv_heads=1, + alibi_slopes=None, + sliding_window=None, + kv_cache_dtype=vllm_config.cache_config.cache_dtype, + logits_soft_cap=None, + attn_type="decoder", + kv_sharing_target_layer_name=None, + q_lora_rank=None, + kv_lora_rank=kv_lora_rank, + qk_nope_head_dim=qk_nope_head_dim, + qk_rope_head_dim=qk_rope_head_dim, + qk_head_dim=qk_nope_head_dim + qk_rope_head_dim, + v_head_dim=v_head_dim, + kv_b_proj=mock_kv_b_proj, + indexer=mock_indexer) + + impl.process_weights_after_loading(dtype) + + layer = MockAttentionLayer(device) + out_buffer = torch.empty(metadata.num_actual_tokens, + num_heads * v_head_dim, + dtype=dtype, + device=device) + + backend_output = impl.forward(layer, + query_vllm, + kv_c_vllm, + k_pe_vllm, + kv_cache, + metadata, + output=out_buffer) + + assert backend_output.shape == sdpa_reference.shape + assert backend_output.dtype == sdpa_reference.dtype + assert torch.isfinite(backend_output).all() + + torch.testing.assert_close(backend_output, + sdpa_reference, + rtol=0.5, + atol=0.5) diff --git a/tests/v1/attention/utils.py b/tests/v1/attention/utils.py index 01b5de83a59a..adfe2b2db040 100644 --- a/tests/v1/attention/utils.py +++ b/tests/v1/attention/utils.py @@ -168,7 +168,6 @@ def create_standard_kv_cache_spec( vllm_config.parallel_config), head_size=vllm_config.model_config.get_head_size(), dtype=vllm_config.model_config.dtype, - use_mla=vllm_config.model_config.use_mla, sliding_window=vllm_config.model_config.get_sliding_window(), ) diff --git a/tests/v1/core/test_kv_cache_utils.py b/tests/v1/core/test_kv_cache_utils.py index 4cb7ed6ce382..452b16ef4a91 100644 --- a/tests/v1/core/test_kv_cache_utils.py +++ b/tests/v1/core/test_kv_cache_utils.py @@ -24,7 +24,8 @@ make_block_hash_with_group_id) from vllm.v1.kv_cache_interface import (FullAttentionSpec, KVCacheConfig, KVCacheGroupSpec, KVCacheSpec, - KVCacheTensor, SlidingWindowSpec, + KVCacheTensor, MLAAttentionSpec, + SlidingWindowSpec, UniformTypeKVCacheSpecs) from vllm.v1.metrics.stats import PrefixCacheStats from vllm.v1.request import Request @@ -77,13 +78,11 @@ def new_kv_cache_spec(block_size=16, num_kv_heads=2, head_size=64, dtype=torch.float32, - use_mla=False, sliding_window=None): return FullAttentionSpec(block_size=block_size, num_kv_heads=num_kv_heads, head_size=head_size, dtype=dtype, - use_mla=use_mla, sliding_window=sliding_window) @@ -91,13 +90,11 @@ def new_sliding_window_spec(block_size=16, num_kv_heads=2, head_size=64, dtype=torch.float32, - use_mla=False, sliding_window=1): return SlidingWindowSpec(block_size=block_size, num_kv_heads=num_kv_heads, head_size=head_size, dtype=dtype, - use_mla=use_mla, sliding_window=sliding_window) @@ -894,7 +891,6 @@ def test_merge_kv_cache_spec(): num_kv_heads=full_spec.num_kv_heads, head_size=full_spec.head_size, dtype=full_spec.dtype, - use_mla=full_spec.use_mla, sliding_window=1, ), ] @@ -991,7 +987,6 @@ def test_estimate_max_model_len(model_id, max_model_len, num_kv_heads=32, head_size=128, dtype=torch.float16, - use_mla=False, ) # Estimate the maximum model length, 16384 model_len need 8GB estimated_max_len = estimate_max_model_len(vllm_config, kv_cache_spec, @@ -1022,7 +1017,6 @@ def test_get_max_concurrency_for_kv_cache_config(): num_kv_heads=32, head_size=128, dtype=torch.float16, - use_mla=False, ) sliding_window_spec = SlidingWindowSpec( @@ -1030,7 +1024,6 @@ def test_get_max_concurrency_for_kv_cache_config(): num_kv_heads=32, head_size=128, dtype=torch.float16, - use_mla=False, sliding_window=1024, ) @@ -1412,3 +1405,48 @@ def test_generate_scheduler_kv_cache_config(): KVCacheGroupSpec(['layer_1', 'layer_2'], new_kv_cache_spec()) ], ) + + +def new_mla_spec(cache_dtype_str=None): + return MLAAttentionSpec(block_size=16, + num_kv_heads=16, + head_size=64, + dtype=torch.float32, + cache_dtype_str=cache_dtype_str) + + +def test_merge_mla_spec(): + kv_cache_specs = [ + new_mla_spec(), + new_mla_spec(), + ] + mla_spec = kv_cache_specs[0].merge(kv_cache_specs) + assert mla_spec == new_mla_spec() + + kv_cache_specs = [ + new_mla_spec(cache_dtype_str="fp8_ds_mla"), + new_mla_spec(cache_dtype_str="fp8_ds_mla"), + ] + mla_spec = kv_cache_specs[0].merge(kv_cache_specs) + assert mla_spec == new_mla_spec(cache_dtype_str="fp8_ds_mla") + + kv_cache_specs = [ + new_mla_spec(cache_dtype_str="fp8_ds_mla"), + new_mla_spec(cache_dtype_str=None), + ] + with pytest.raises(AssertionError): + kv_cache_specs[0].merge(kv_cache_specs) + + kv_cache_specs = [ + new_kv_cache_spec(), + new_mla_spec(), + ] + with pytest.raises(AssertionError): + kv_cache_specs[0].merge(kv_cache_specs) + + kv_cache_specs = [ + new_mla_spec(cache_dtype_str="fp8_ds_mla"), + new_kv_cache_spec(), + ] + with pytest.raises(AssertionError): + kv_cache_specs[0].merge(kv_cache_specs) diff --git a/tests/v1/core/test_prefix_caching.py b/tests/v1/core/test_prefix_caching.py index 37b4f9a08e40..5769099e0af1 100644 --- a/tests/v1/core/test_prefix_caching.py +++ b/tests/v1/core/test_prefix_caching.py @@ -76,7 +76,7 @@ def make_kv_cache_config(block_size: int, num_blocks: int) -> KVCacheConfig: kv_cache_groups=[ KVCacheGroupSpec( ["layer"], - FullAttentionSpec(block_size, 1, 1, torch.float32, False), + FullAttentionSpec(block_size, 1, 1, torch.float32), ) ], ) @@ -90,7 +90,7 @@ def make_kv_cache_config_hybrid_model(block_size: int, kv_cache_groups=[ KVCacheGroupSpec( ["layer1"], - FullAttentionSpec(block_size, 1, 1, torch.float32, False), + FullAttentionSpec(block_size, 1, 1, torch.float32), ), KVCacheGroupSpec( ["layer2"], @@ -98,7 +98,6 @@ def make_kv_cache_config_hybrid_model(block_size: int, 1, 1, torch.float32, - False, sliding_window=2 * block_size), ), KVCacheGroupSpec( @@ -107,7 +106,6 @@ def make_kv_cache_config_hybrid_model(block_size: int, 1, 1, torch.float32, - False, sliding_window=2 * block_size), ), ], @@ -1338,7 +1336,6 @@ def test_eagle_with_sliding_window(): head_size=1, dtype=torch.float32, sliding_window=block_size, - use_mla=False, ) manager = KVCacheManager( KVCacheConfig( diff --git a/tests/v1/core/test_single_type_kv_cache_manager.py b/tests/v1/core/test_single_type_kv_cache_manager.py index 01b54ae56e90..1f6825b6d24e 100644 --- a/tests/v1/core/test_single_type_kv_cache_manager.py +++ b/tests/v1/core/test_single_type_kv_cache_manager.py @@ -35,7 +35,6 @@ def test_chunked_local_attention_possible_cached_prefix(): head_size=1, dtype=torch.float32, attention_chunk_size=4, - use_mla=False, ) block_pool = BlockPool(num_gpu_blocks=100, enable_caching=True) @@ -100,7 +99,6 @@ def test_sliding_window_possible_cached_prefix(): head_size=1, dtype=torch.float32, sliding_window=4, - use_mla=False, ) block_pool = BlockPool(num_gpu_blocks=100, enable_caching=True) @@ -165,7 +163,6 @@ def test_chunked_local_attention_remove_skipped_blocks(): head_size=1, dtype=torch.float32, attention_chunk_size=4, - use_mla=False, ) block_pool = BlockPool(num_gpu_blocks=2000, enable_caching=True) @@ -217,7 +214,6 @@ def test_sliding_window_remove_skipped_blocks(): head_size=1, dtype=torch.float32, sliding_window=4, - use_mla=False, ) block_pool = BlockPool(num_gpu_blocks=2000, enable_caching=True) @@ -285,7 +281,6 @@ def test_get_num_blocks_to_allocate(): head_size=1, dtype=torch.float32, sliding_window=4, # Placeholder value, not related to test result - use_mla=False, ) block_pool = BlockPool(num_gpu_blocks=100, enable_caching=True) @@ -308,7 +303,6 @@ def test_chunked_local_attention_get_num_blocks_to_allocate(): head_size=1, dtype=torch.float32, attention_chunk_size=4, # Placeholder value, not related to test result - use_mla=False, ) block_pool = BlockPool(num_gpu_blocks=100, enable_caching=True) diff --git a/tests/v1/engine/test_engine_core_client.py b/tests/v1/engine/test_engine_core_client.py index 992c4e01386e..10adac9bab5f 100644 --- a/tests/v1/engine/test_engine_core_client.py +++ b/tests/v1/engine/test_engine_core_client.py @@ -836,8 +836,7 @@ def create_mock_executor(vllm_config): mock_spec = FullAttentionSpec(block_size=16, num_kv_heads=1, head_size=64, - dtype=torch.float16, - use_mla=False) + dtype=torch.float16) mock_executor.get_kv_cache_specs.return_value = [{ "default": mock_spec diff --git a/tests/v1/worker/test_gpu_model_runner.py b/tests/v1/worker/test_gpu_model_runner.py index 8b571f95c5ec..49a7a61e1889 100644 --- a/tests/v1/worker/test_gpu_model_runner.py +++ b/tests/v1/worker/test_gpu_model_runner.py @@ -39,7 +39,6 @@ def initialize_kv_cache(runner: GPUModelRunner): runner.parallel_config), head_size=runner.model_config.get_head_size(), dtype=runner.kv_cache_dtype, - use_mla=False, ) tensor_size = attn_spec.page_size_bytes * NUM_BLOCKS kv_cache_config = KVCacheConfig( diff --git a/vllm/_custom_ops.py b/vllm/_custom_ops.py index a108542e1436..f07fa1e4e7be 100644 --- a/vllm/_custom_ops.py +++ b/vllm/_custom_ops.py @@ -1678,6 +1678,15 @@ def cp_gather_cache(src_cache: torch.Tensor, cu_seq_lens, batch_size, seq_starts) +def indexer_k_quant_and_cache(k: torch.Tensor, kv_cache: torch.Tensor, + slot_mapping: torch.Tensor, + quant_block_size: int, + kv_cache_dtype: str) -> None: + torch.ops._C_cache_ops.indexer_k_quant_and_cache(k, kv_cache, slot_mapping, + quant_block_size, + kv_cache_dtype) + + def get_device_attribute(attribute: int, device: int) -> int: return torch.ops._C_cuda_utils.get_device_attribute(attribute, device) diff --git a/vllm/attention/backends/abstract.py b/vllm/attention/backends/abstract.py index 629e42a8b902..b49e1c007c57 100644 --- a/vllm/attention/backends/abstract.py +++ b/vllm/attention/backends/abstract.py @@ -70,6 +70,7 @@ def get_kv_cache_shape( block_size: int, num_kv_heads: int, head_size: int, + cache_dtype_str: str = "auto", ) -> Tuple[int, ...]: raise NotImplementedError diff --git a/vllm/attention/layer.py b/vllm/attention/layer.py index 8cab8330a6cd..4ce6a864d7ad 100644 --- a/vllm/attention/layer.py +++ b/vllm/attention/layer.py @@ -94,6 +94,7 @@ def __init__( logits_soft_cap: Optional[float] = None, per_layer_sliding_window: Optional[int] = None, use_mla: bool = False, + use_sparse: bool = False, prefix: str = "", attn_type: str = AttentionType.DECODER, kv_sharing_target_layer_name: Optional[str] = None, @@ -154,6 +155,7 @@ def __init__( self._o_scale_float: Optional[float] = None self.use_mla = use_mla + self.use_sparse = use_sparse self.num_heads = num_heads self.head_size = head_size self.num_kv_heads = num_kv_heads @@ -186,7 +188,8 @@ def __init__( kv_cache_dtype, block_size, use_mla=use_mla, - has_sink=self.has_sink) + has_sink=self.has_sink, + use_sparse=use_sparse) else: self.attn_backend = attn_backend diff --git a/vllm/attention/ops/common.py b/vllm/attention/ops/common.py index c8efa6e63a2e..e659f1f3eae9 100644 --- a/vllm/attention/ops/common.py +++ b/vllm/attention/ops/common.py @@ -138,3 +138,208 @@ def cp_lse_ag_out_rs(cp_attn_out: torch.Tensor, out, _ = correct_attn_out(cp_attn_out, lses, cp_group.rank_in_group, ctx) out = cp_group.reduce_scatter(out, dim=1) return out + + +@triton.jit +def _pack_seq_kernel( + x_ptr, # [N, D] + out_ptr, # [B, Lmax, D] + lengths_ptr, # *i32, [B] + N: tl.constexpr, + D: tl.constexpr, + Lmax: tl.constexpr, + PAD_VALUE: tl.constexpr, + BLOCK_T: tl.constexpr, # timesteps per program + BLOCK_D: tl.constexpr # features per program +): + pid_b = tl.program_id(0) # batch id + pid_t = tl.program_id(1) # block over time dimension + pid_d = tl.program_id(2) # block over feature dimension + off_t = pid_t * BLOCK_T + tl.arange(0, BLOCK_T) # [BLOCK_T] + off_d = pid_d * BLOCK_D + tl.arange(0, BLOCK_D) # [BLOCK_D] + + # Compute start index and sequence length from cumulative lengths + in_start = 0 + for i in range(pid_b): + in_start += tl.load(lengths_ptr + i) + seq_len = tl.load(lengths_ptr + pid_b) + + # valid time positions for this block + t_mask = off_t < Lmax + + # compute input row indices for valid (b, t) + in_row = in_start + off_t + valid_row = (off_t < seq_len) & t_mask + + # Pointers + # x_ptr: row-major [N, D] + x_row_ptr = x_ptr + in_row[:, None] * D + off_d[None, :] + + # out_ptr: row-major [B, Lmax, D] + out_row_ptr = out_ptr + (pid_b * Lmax + off_t)[:, + None] * D + off_d[None, :] + + # Initialize with PAD (cast will occur as needed based on out_ptr dtype) + d_mask = off_d[None, :] < D + pad_vals = tl.full([BLOCK_T, BLOCK_D], PAD_VALUE, tl.float32) + tl.store(out_row_ptr, pad_vals, mask=t_mask[:, None] & d_mask) + + # Load & write only where within seq_len + x_vals = tl.load(x_row_ptr, mask=valid_row[:, None] & d_mask) + tl.store(out_row_ptr, x_vals, mask=valid_row[:, None] & d_mask) + + +def pack_seq_triton(x: torch.Tensor, + lengths: torch.Tensor, + pad_value: float = -float('inf'), + block_t: int = 64, + block_d: int = 64) -> torch.Tensor: + """ + Pack sequences of different lengths into a batched tensor. + + Args: + x: [N, ...] - input tensor where N is total number of tokens + lengths: [B] - sequence lengths for each batch + pad_value: value to use for padding + block_t: block size for time dimension + block_d: block size for feature dimension + + Returns: + packed: [B, Lmax, ...] - packed tensor + """ + + # Handle multi-dimensional input by reshaping to (N, -1) + original_shape = x.shape + if len(original_shape) > 2: + N = original_shape[0] + x_reshaped = x.reshape(N, -1) + D = x_reshaped.shape[1] + else: + N, D = x.shape + x_reshaped = x + + B = lengths.numel() + Lmax = int(lengths.max().item()) + + # Starts are computed inside the kernel from lengths + + out = torch.empty((B, Lmax, D), device=x.device, dtype=x.dtype) + + grid = (B, triton.cdiv(Lmax, block_t), triton.cdiv(D, block_d)) + _pack_seq_kernel[grid](x_reshaped, + out, + lengths.int(), + N, + D, + Lmax, + PAD_VALUE=float(pad_value), + BLOCK_T=block_t, + BLOCK_D=block_d, + num_warps=4, + num_stages=2) + + # Reshape output back to original dimensions (except first dimension) + if len(original_shape) > 2: + output_shape = (B, Lmax) + original_shape[1:] + out = out.reshape(output_shape) + + return out + + +@triton.jit +def _unpack_seq_triton_kernel( + packed_ptr, # [B, Lmax, D] + out_ptr, # [N, D] + lengths_ptr, # *i32, [B] + B: tl.constexpr, + Lmax: tl.constexpr, + D: tl.constexpr, + BLOCK_T: tl.constexpr, # timesteps per program + BLOCK_D: tl.constexpr # features per program +): + pid_b = tl.program_id(0) # batch id + pid_t = tl.program_id(1) # block over time dimension + pid_d = tl.program_id(2) # block over feature dimension + off_t = pid_t * BLOCK_T + tl.arange(0, BLOCK_T) # [BLOCK_T] + off_d = pid_d * BLOCK_D + tl.arange(0, BLOCK_D) # [BLOCK_D] + + # bounds: compute start from cumulative lengths + in_start = 0 + for i in range(pid_b): + in_start += tl.load(lengths_ptr + i) + seq_len = tl.load(lengths_ptr + pid_b) + + # valid time positions for this block + t_mask = off_t < Lmax + valid_row = (off_t < seq_len) & t_mask + + # compute output row indices for valid (b, t) + out_row = in_start + off_t + + # Pointers + # packed_ptr: row-major [B, Lmax, D] + packed_row_ptr = packed_ptr + (pid_b * Lmax + + off_t)[:, None] * D + off_d[None, :] + + # out_ptr: row-major [N, D] + out_row_ptr = out_ptr + out_row[:, None] * D + off_d[None, :] + + # Load from packed tensor and store to output + d_mask = off_d[None, :] < D + packed_vals = tl.load(packed_row_ptr, mask=valid_row[:, None] & d_mask) + tl.store(out_row_ptr, packed_vals, mask=valid_row[:, None] & d_mask) + + +def unpack_seq_triton(packed_tensor: torch.Tensor, + lengths: torch.Tensor, + block_t: int = 64, + block_d: int = 64) -> torch.Tensor: + """ + Unpack a packed decode query tensor back to the original format. + Efficient Triton implementation. + + Args: + packed_tensor: [B, Lmax, ...] - packed tensor from pack_seq_triton + lengths: [B] - sequence lengths for each batch + block_t: block size for time dimension + block_d: block size for feature dimension + + Returns: + unpacked_tensor: [N, ...] where N = sum(lengths) + """ + + # Handle multi-dimensional input by reshaping to (B, Lmax, -1) + original_shape = packed_tensor.shape + if len(original_shape) > 3: + B, Lmax = original_shape[:2] + packed_reshaped = packed_tensor.reshape(B, Lmax, -1) + D = packed_reshaped.shape[2] + else: + B, Lmax, D = packed_tensor.shape + packed_reshaped = packed_tensor + + # Calculate total number of elements + N = int(lengths.sum().item()) + + out = torch.empty((N, D), + device=packed_tensor.device, + dtype=packed_tensor.dtype) + + grid = (B, triton.cdiv(Lmax, block_t), triton.cdiv(D, block_d)) + _unpack_seq_triton_kernel[grid](packed_reshaped, + out, + lengths.int(), + B, + Lmax, + D, + BLOCK_T=block_t, + BLOCK_D=block_d, + num_warps=4, + num_stages=2) + + # Reshape output back to original dimensions (except first dimension) + if len(original_shape) > 3: + output_shape = (N, ) + original_shape[2:] + out = out.reshape(output_shape) + + return out diff --git a/vllm/attention/ops/flashmla.py b/vllm/attention/ops/flashmla.py index 2c3e8c42400c..3cc0e4adfa0a 100644 --- a/vllm/attention/ops/flashmla.py +++ b/vllm/attention/ops/flashmla.py @@ -19,6 +19,15 @@ else: _flashmla_C_AVAILABLE = False +if current_platform.is_cuda(): + try: + import vllm._flashmla_extension_C # noqa: F401 + _flashmla_extension_C_AVAILABLE = True + except ImportError: + _flashmla_extension_C_AVAILABLE = False +else: + _flashmla_extension_C_AVAILABLE = False + def is_flashmla_supported() -> Tuple[bool, Optional[str]]: """ @@ -37,24 +46,34 @@ def is_flashmla_supported() -> Tuple[bool, Optional[str]]: def get_mla_metadata( - cache_seqlens: torch.Tensor, - num_heads_per_head_k: int, - num_heads_k: int, -) -> Tuple[torch.Tensor, torch.Tensor]: + cache_seqlens: torch.Tensor, + num_q_tokens_per_head_k: int, + num_heads_k: int, + num_heads_q: Optional[int] = None, + is_fp8_kvcache: bool = False, + topk: Optional[int] = None) -> Tuple[torch.Tensor, torch.Tensor]: """ Arguments: - cache_seqlens: (batch_size), dtype torch.int32. - num_heads_per_head_k: Equals to seq_len_q * num_heads_q // num_heads_k. - num_heads_k: num_heads_k. - - Return: - tile_scheduler_metadata: (num_sm_parts, TileSchedulerMetaDataSize), - dtype torch.int32. - num_splits: (batch_size + 1), dtype torch.int32. + - cache_seqlens: (batch_size), dtype torch.int32. + - num_q_tokens_per_head_k: + Equals to num_q_tokens_per_q_seq * num_heads_q // num_heads_k. + - num_heads_k: The number of k heads. + - num_heads_q: + The number of q heads. + This argument is optional when sparse attention is not enabled + - is_fp8_kvcache: Whether the k_cache and v_cache are in fp8 format. + - topk: If not None, sparse attention will be enabled, + and only tokens in the `indices` array + passed to `flash_mla_with_kvcache_sm90` will be attended to. + + Returns: + - tile_scheduler_metadata: + (num_sm_parts, TileSchedulerMetaDataSize), dtype torch.int32. + - num_splits: (batch_size + 1), dtype torch.int32. """ - return torch.ops._flashmla_C.get_mla_metadata(cache_seqlens, - num_heads_per_head_k, - num_heads_k) + return torch.ops._flashmla_C.get_mla_decoding_metadata( + cache_seqlens, num_q_tokens_per_head_k, num_heads_k, num_heads_q, + is_fp8_kvcache, topk) def flash_mla_with_kvcache( @@ -69,45 +88,95 @@ def flash_mla_with_kvcache( causal: bool = False, descale_q: Optional[torch.Tensor] = None, descale_k: Optional[torch.Tensor] = None, + is_fp8_kvcache: bool = False, + indices: Optional[torch.Tensor] = None, ) -> Tuple[torch.Tensor, torch.Tensor]: """ Arguments: - q: (batch_size, seq_len_q, num_heads_q, head_dim). - k_cache: (num_blocks, page_block_size, num_heads_k, head_dim). - block_table: (batch_size, max_num_blocks_per_seq), torch.int32. - cache_seqlens: (batch_size), torch.int32. - head_dim_v: Head_dim of v. - tile_scheduler_metadata: (num_sm_parts, TileSchedulerMetaDataSize), - torch.int32, return by get_mla_metadata. - num_splits: (batch_size + 1), torch.int32, return by get_mla_metadata. - softmax_scale: float. The scaling of QK^T before applying softmax. - Default to 1 / sqrt(head_dim). - causal: bool. Whether to apply causal attention mask. - descale_q: (batch_size), torch.float32. Descaling factors for Q. - descale_k: (batch_size), torch.float32. Descaling factors for K. - - Return: - out: (batch_size, seq_len_q, num_heads_q, head_dim_v). - softmax_lse: (batch_size, num_heads_q, seq_len_q), torch.float32. + - q: (batch_size, seq_len_q, num_heads_q, head_dim). + - k_cache: (num_blocks, page_block_size, num_heads_k, head_dim). + - block_table: (batch_size, max_num_blocks_per_seq), torch.int32. + - cache_seqlens: (batch_size), torch.int32. + - head_dim_v: Head dimension of v. + - tile_scheduler_metadata: + (num_sm_parts, TileSchedulerMetaDataSize), torch.int32, + returned by get_mla_metadata. + - num_splits: + (batch_size + 1), torch.int32, returned by get_mla_metadata. + - softmax_scale: float. + The scale of QK^T before applying softmax. + Default to 1 / sqrt(head_dim). + - causal: bool. Whether to apply causal attention mask. + - descale_q: (batch_size), + torch.float32. Descaling factors for Q, used for fp8 quantization. + - descale_k: (batch_size), + torch.float32. Descaling factors for K, used for fp8 quantization. + - is_fp8_kvcache: bool. + Whether the k_cache and v_cache are in fp8 format. + For the format of FP8 KV cache, please refer to README.md + - indices: (batch_size, seq_len_q, topk), torch.int32. + If not None, sparse attention will be enabled, + and only tokens in the `indices` array will be attended to. + Invalid indices should be set to -1 or numbers >= total_seq_len_kv. + For details about how to set up `indices`, please refer to README.md. + + Returns: + - out: (batch_size, seq_len_q, num_heads_q, head_dim_v). + - softmax_lse: (batch_size, num_heads_q, seq_len_q), torch.float32. """ if softmax_scale is None: softmax_scale = q.shape[-1]**(-0.5) - out, softmax_lse = torch.ops._flashmla_C.fwd_kvcache_mla( - q, - k_cache, - head_dim_v, - cache_seqlens, - block_table, - softmax_scale, - causal, - tile_scheduler_metadata, - num_splits, - descale_q, - descale_k, - ) - - # Note(hc): need revisit when we support DCP with decode query_len > 1. - return out.squeeze(1), softmax_lse.squeeze(-1) + if indices is not None: + # NOTE (zyongye): sparse attention is also causal + # since it only attend to the tokens before + # but here `causal` should not be specified + assert not causal, \ + "causal must be `false` if sparse attention is enabled." + assert (descale_q is None) == ( + descale_k is None + ), "descale_q and descale_k should be both None or both not None" + + if (descale_q is not None) and (descale_k is not None): + out, softmax_lse = torch.ops._flashmla_extension_C.fwd_kvcache_mla_fp8( + q, k_cache, head_dim_v, cache_seqlens, block_table, softmax_scale, + causal, tile_scheduler_metadata, num_splits, descale_q, descale_k) + else: + out, softmax_lse = torch.ops._flashmla_C.fwd_kvcache_mla( + q, k_cache, head_dim_v, cache_seqlens, block_table, softmax_scale, + causal, tile_scheduler_metadata, num_splits, is_fp8_kvcache, + indices) + return out, softmax_lse + + +def flash_mla_sparse_prefill( + q: torch.Tensor, + kv: torch.Tensor, + indices: torch.Tensor, + sm_scale: float, + d_v: int = 512, +) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: + """ + Sparse attention prefill kernel + + Args: + - q: [s_q, h_q, d_qk], bfloat16 + - kv: [s_kv, h_kv, d_qk], bfloat16 + - indices: [s_q, h_kv, topk], int32. + Invalid indices should be set to -1 or numbers >= s_kv + - sm_scale: float + - d_v: The dimension of value vectors. Can only be 512 + + Returns: + - (output, max_logits, lse) + About the definition of output, + max_logits and lse, please refer to README.md + - output: [s_q, h_q, d_v], bfloat16 + - max_logits: [s_q, h_q], float + - lse: [s_q, h_q], float, 2-based log-sum-exp + """ + results = torch.ops._flashmla_C.sparse_prefill_fwd(q, kv, indices, + sm_scale, d_v) + return results # diff --git a/vllm/attention/ops/paged_attn.py b/vllm/attention/ops/paged_attn.py index 4d870a45e580..539b57e41de7 100644 --- a/vllm/attention/ops/paged_attn.py +++ b/vllm/attention/ops/paged_attn.py @@ -50,6 +50,7 @@ def get_kv_cache_shape( block_size: int, num_kv_heads: int, head_size: int, + cache_dtype_str: str = "auto", ) -> Tuple[int, ...]: return (2, num_blocks, block_size * num_kv_heads * head_size) diff --git a/vllm/attention/selector.py b/vllm/attention/selector.py index bd83473db6f3..6f048e589f7f 100644 --- a/vllm/attention/selector.py +++ b/vllm/attention/selector.py @@ -144,6 +144,7 @@ def get_attn_backend( block_size: int, use_mla: bool = False, has_sink: bool = False, + use_sparse: bool = False, ) -> type[AttentionBackend]: """Selects which attention backend to use and lazily imports it.""" # Accessing envs.* behind an @lru_cache decorator can cause the wrong @@ -158,6 +159,7 @@ def get_attn_backend( use_v1=envs.VLLM_USE_V1, use_mla=use_mla, has_sink=has_sink, + use_sparse=use_sparse, ) @@ -170,6 +172,7 @@ def _cached_get_attn_backend( use_v1: bool = False, use_mla: bool = False, has_sink: bool = False, + use_sparse: bool = False, ) -> type[AttentionBackend]: # Check whether a particular choice of backend was @@ -203,7 +206,7 @@ def _cached_get_attn_backend( # get device-specific attn_backend attention_cls = current_platform.get_attn_backend_cls( selected_backend, head_size, dtype, kv_cache_dtype, block_size, use_v1, - use_mla, has_sink) + use_mla, has_sink, use_sparse) if not attention_cls: raise ValueError( f"Invalid attention backend for {current_platform.device_name}") diff --git a/vllm/config/cache.py b/vllm/config/cache.py index 4c4e39c37ee5..58770649a8af 100644 --- a/vllm/config/cache.py +++ b/vllm/config/cache.py @@ -22,7 +22,8 @@ logger = init_logger(__name__) BlockSize = Literal[1, 8, 16, 32, 64, 128] -CacheDType = Literal["auto", "fp8", "fp8_e4m3", "fp8_e5m2", "fp8_inc"] +CacheDType = Literal["auto", "bfloat16", "fp8", "fp8_e4m3", "fp8_e5m2", + "fp8_inc"] MambaDType = Literal["auto", "float32"] PrefixCachingHashAlgo = Literal["sha256", "sha256_cbor"] @@ -52,7 +53,11 @@ class CacheConfig: cache_dtype: CacheDType = "auto" """Data type for kv cache storage. If "auto", will use model data type. CUDA 11.8+ supports fp8 (=fp8_e4m3) and fp8_e5m2. ROCm (AMD GPU) supports - fp8 (=fp8_e4m3). Intel Gaudi (HPU) supports fp8 (using fp8_inc).""" + fp8 (=fp8_e4m3). Intel Gaudi (HPU) supports fp8 (using fp8_inc). + Some models (namely DeepSeekV3.2) default to fp8, set to bfloat16 to use + bfloat16 instead, this is an invalid option for models that do not default + to fp8. + """ is_attention_free: bool = False """Whether the model is attention-free. This is primarily set in `ModelConfig` and that value should be manually duplicated here.""" @@ -171,11 +176,12 @@ def _verify_cache_dtype(self) -> None: if self.cache_dtype == "auto": pass elif self.cache_dtype in get_args(CacheDType): - logger.info( - "Using fp8 data type to store kv cache. It reduces the GPU " - "memory footprint and boosts the performance. " - "Meanwhile, it may cause accuracy drop without a proper " - "scaling factor.") + if self.cache_dtype.startswith("fp8"): + logger.info( + "Using fp8 data type to store kv cache. It reduces the GPU " + "memory footprint and boosts the performance. " + "Meanwhile, it may cause accuracy drop without a proper " + "scaling factor.") else: raise ValueError(f"Unknown kv cache dtype: {self.cache_dtype}") diff --git a/vllm/config/compilation.py b/vllm/config/compilation.py index df08fa20e0cf..ce173edb4b94 100644 --- a/vllm/config/compilation.py +++ b/vllm/config/compilation.py @@ -374,6 +374,7 @@ class CompilationConfig: "vllm.linear_attention", "vllm.plamo2_mamba_mixer", "vllm.gdn_attention", + "vllm.sparse_attn_indexer", ] def compute_hash(self) -> str: diff --git a/vllm/config/model.py b/vllm/config/model.py index 28af82de7722..c1392318dd8e 100644 --- a/vllm/config/model.py +++ b/vllm/config/model.py @@ -1082,14 +1082,14 @@ def is_deepseek_mla(self) -> bool: if not hasattr(self.hf_text_config, "model_type"): return False elif self.hf_text_config.model_type in \ - ('deepseek_v2', 'deepseek_v3', 'deepseek_mtp', + ('deepseek_v2', 'deepseek_v3', 'deepseek_v32', 'deepseek_mtp', 'kimi_k2', 'longcat_flash'): return self.hf_text_config.kv_lora_rank is not None elif self.hf_text_config.model_type == 'eagle': # if the model is an EAGLE module, check for the # underlying architecture return self.hf_text_config.model.model_type in \ - ('deepseek_v2', 'deepseek_v3') \ + ('deepseek_v2', 'deepseek_v3', 'deepseek_v32') \ and self.hf_text_config.kv_lora_rank is not None return False diff --git a/vllm/config/speculative.py b/vllm/config/speculative.py index cb4f0ae2cee0..f684e4e4ccd4 100644 --- a/vllm/config/speculative.py +++ b/vllm/config/speculative.py @@ -145,7 +145,7 @@ def compute_hash(self) -> str: @staticmethod def hf_config_override(hf_config: PretrainedConfig) -> PretrainedConfig: - if hf_config.model_type == "deepseek_v3": + if hf_config.model_type in ("deepseek_v3", "deepseek_v32"): hf_config.model_type = "deepseek_mtp" if hf_config.model_type == "deepseek_mtp": n_predict = getattr(hf_config, "num_nextn_predict_layers", None) diff --git a/vllm/model_executor/layers/layernorm.py b/vllm/model_executor/layers/layernorm.py index 8123259d037b..363245daa89d 100644 --- a/vllm/model_executor/layers/layernorm.py +++ b/vllm/model_executor/layers/layernorm.py @@ -5,6 +5,7 @@ import torch import torch.nn as nn +import torch.nn.functional as F import vllm.envs as envs from vllm.model_executor.custom_op import CustomOp @@ -375,3 +376,20 @@ def forward_cuda( x: torch.Tensor, ) -> torch.Tensor: return poly_norm(x, self.weight, self.bias, self.variance_epsilon) + + +class LayerNorm(nn.Module): + """ + Layer Normalization. + """ + + def __init__(self, dim: int, eps: float = 1e-6): + super().__init__() + self.dim = dim + self.eps = eps + self.weight = nn.Parameter(torch.ones(dim, dtype=torch.float32)) + self.bias = nn.Parameter(torch.zeros(dim, dtype=torch.float32)) + + def forward(self, x: torch.Tensor): + return F.layer_norm(x.float(), (self.dim, ), self.weight, self.bias, + self.eps).type_as(x) diff --git a/vllm/model_executor/layers/mla.py b/vllm/model_executor/layers/mla.py index 9782b37c321f..66bf3823e191 100644 --- a/vllm/model_executor/layers/mla.py +++ b/vllm/model_executor/layers/mla.py @@ -24,6 +24,9 @@ class MLAModules: q_a_layernorm: Optional[torch.nn.Module] q_b_proj: Optional[torch.nn.Module] q_proj: Optional[torch.nn.Module] + indexer: Optional[torch.nn.Module] + is_sparse: bool + topk_indices_buffer: Optional[torch.Tensor] @CustomOp.register("multi_head_latent_attention") @@ -76,6 +79,13 @@ def __init__( self.kv_b_proj = mla_modules.kv_b_proj self.rotary_emb = mla_modules.rotary_emb self.o_proj = mla_modules.o_proj + self.indexer = mla_modules.indexer + self.is_sparse = mla_modules.is_sparse + + if self.indexer is not None: + assert hasattr(self.indexer, "topk_tokens") + self.topk_tokens = self.indexer.topk_tokens + self.topk_indices_buffer = mla_modules.topk_indices_buffer # In the MLA backend, kv_cache includes both k_c and # pe (i.e. decoupled position embeddings). In particular, @@ -92,6 +102,7 @@ def __init__( quant_config=quant_config, prefix=f"{prefix}.attn", use_mla=True, + use_sparse=mla_modules.is_sparse, # MLA Args q_lora_rank=self.q_lora_rank, kv_lora_rank=self.kv_lora_rank, @@ -100,6 +111,7 @@ def __init__( qk_head_dim=self.qk_head_dim, v_head_dim=self.v_head_dim, kv_b_proj=self.kv_b_proj, + indexer=self.indexer, ) self.prefix = prefix @@ -145,6 +157,10 @@ def forward_native( q[..., self.qk_nope_head_dim:], k_pe = self.rotary_emb( positions, q[..., self.qk_nope_head_dim:], k_pe) + if self.indexer and self.is_sparse: + _topk_indices = self.indexer(hidden_states, q_c, positions, + self.rotary_emb) + attn_out = self.mla_attn( q, kv_c_normed, diff --git a/vllm/model_executor/models/config.py b/vllm/model_executor/models/config.py index cab85ea347f4..589ca0069034 100644 --- a/vllm/model_executor/models/config.py +++ b/vllm/model_executor/models/config.py @@ -346,8 +346,7 @@ def verify_and_update_config(cls, vllm_config: "VllmConfig") -> None: block_size=1, num_kv_heads=model_config.get_num_kv_heads(parallel_config), head_size=model_config.get_head_size(), - dtype=kv_cache_dtype, - use_mla=model_config.use_mla).page_size_bytes + dtype=kv_cache_dtype).page_size_bytes model_cls, _ = ModelRegistry.resolve_model_cls( model_config.architecture, @@ -401,6 +400,31 @@ def verify_and_update_config(cls, vllm_config: "VllmConfig") -> None: "exactly equal.", mamba_padding_pct) +class DeepseekV3ForCausalLM(VerifyAndUpdateConfig): + + @classmethod + def verify_and_update_config(cls, vllm_config: "VllmConfig") -> None: + """ + Updated fp8 cache to custom "fp8_ds_mla" format for DeepSeekV32 + """ + hf_config = vllm_config.model_config.hf_config + + is_v32 = hasattr(hf_config, "index_topk") + + if is_v32: + # For DeepSeekV3.2, we use a custom fp8 format as default (i.e. + # "auto") + cache_config = vllm_config.cache_config + if cache_config.cache_dtype == "auto" or \ + cache_config.cache_dtype.startswith("fp8"): + cache_config.cache_dtype = "fp8_ds_mla" + logger.info( + "Using custom fp8 kv-cache format for DeepSeekV3.2") + if cache_config.cache_dtype == "bfloat16": + cache_config.cache_dtype = "auto" + logger.info("Using bfloat16 kv-cache for DeepSeekV3.2") + + MODELS_CONFIG_MAP: dict[str, type[VerifyAndUpdateConfig]] = { "GteModel": SnowflakeGteNewModelConfig, "GteNewModel": GteNewModelConfig, @@ -417,4 +441,5 @@ def verify_and_update_config(cls, vllm_config: "VllmConfig") -> None: "MambaForCausalLM": MambaModelConfig, "Mamba2ForCausalLM": MambaModelConfig, "FalconMambaForCausalLM": MambaModelConfig, + "DeepseekV3ForCausalLM": DeepseekV3ForCausalLM, } diff --git a/vllm/model_executor/models/deepseek_mtp.py b/vllm/model_executor/models/deepseek_mtp.py index a4623ff13cec..788e561ac394 100644 --- a/vllm/model_executor/models/deepseek_mtp.py +++ b/vllm/model_executor/models/deepseek_mtp.py @@ -53,8 +53,20 @@ def __init__(self, vllm_config: VllmConfig, prefix: str) -> None: self.eh_proj = nn.Linear(config.hidden_size * 2, config.hidden_size, bias=False) + + self.is_v32 = hasattr(config, "index_topk") + if self.is_v32: + topk_tokens = config.index_topk + topk_indices_buffer = torch.empty( + vllm_config.scheduler_config.max_num_batched_tokens, + topk_tokens, + dtype=torch.int32, + device="cuda") + else: + topk_indices_buffer = None self.shared_head = SharedHead(config=config, quant_config=quant_config) - self.mtp_block = DeepseekV2DecoderLayer(vllm_config, prefix) + self.mtp_block = DeepseekV2DecoderLayer(vllm_config, prefix, + topk_indices_buffer) def forward( self, diff --git a/vllm/model_executor/models/deepseek_v2.py b/vllm/model_executor/models/deepseek_v2.py index 2e0bcbe5d2e5..03c43654d68f 100644 --- a/vllm/model_executor/models/deepseek_v2.py +++ b/vllm/model_executor/models/deepseek_v2.py @@ -33,15 +33,21 @@ from transformers import DeepseekV2Config, DeepseekV3Config from vllm.attention import Attention +from vllm.attention.backends.abstract import AttentionBackend +from vllm.attention.ops.common import pack_seq_triton, unpack_seq_triton from vllm.compilation.decorators import support_torch_compile -from vllm.config import CacheConfig, ParallelConfig, VllmConfig +from vllm.config import (CacheConfig, ParallelConfig, VllmConfig, + get_current_vllm_config) from vllm.distributed import (get_ep_group, get_pp_group, get_tensor_model_parallel_rank, get_tensor_model_parallel_world_size, tensor_model_parallel_all_gather) +from vllm.forward_context import get_forward_context +from vllm.logger import init_logger from vllm.model_executor.layers.activation import SiluAndMul +from vllm.model_executor.layers.attention_layer_base import AttentionLayerBase from vllm.model_executor.layers.fused_moe import FusedMoE -from vllm.model_executor.layers.layernorm import RMSNorm +from vllm.model_executor.layers.layernorm import LayerNorm, RMSNorm from vllm.model_executor.layers.linear import (ColumnParallelLinear, MergedColumnParallelLinear, ReplicatedLinear, @@ -49,6 +55,8 @@ from vllm.model_executor.layers.logits_processor import LogitsProcessor from vllm.model_executor.layers.mla import MLAModules, MultiHeadLatentAttention from vllm.model_executor.layers.quantization import QuantizationConfig +from vllm.model_executor.layers.quantization.utils.fp8_utils import ( + per_token_group_quant_fp8) from vllm.model_executor.layers.rotary_embedding import get_rope from vllm.model_executor.layers.shared_fused_moe import SharedFusedMoE from vllm.model_executor.layers.vocab_parallel_embedding import ( @@ -56,13 +64,26 @@ from vllm.model_executor.model_loader.weight_utils import ( default_weight_loader, maybe_remap_kv_scale_name) from vllm.model_executor.models.utils import sequence_parallel_chunk +from vllm.platforms import current_platform from vllm.sequence import IntermediateTensors +from vllm.utils import cdiv, direct_register_custom_op +from vllm.utils.deep_gemm import fp8_mqa_logits, fp8_paged_mqa_logits +from vllm.v1.attention.backends.mla.indexer import (DeepseekV32IndexerBackend, + DeepseekV32IndexerMetadata) +from vllm.v1.kv_cache_interface import KVCacheSpec, MLAAttentionSpec from .interfaces import MixtureOfExperts, SupportsLoRA, SupportsPP from .utils import (PPMissingLayer, is_pp_missing_parameter, make_empty_intermediate_tensors_factory, make_layers, maybe_prefix) +if current_platform.is_cuda_alike(): + from vllm import _custom_ops as ops +elif current_platform.is_xpu(): + from vllm._ipex_ops import ipex_ops as ops + +logger = init_logger(__name__) + class DeepseekV2MLP(nn.Module): @@ -276,6 +297,7 @@ class DeepseekV2Attention(nn.Module): def __init__( self, + vllm_config: VllmConfig, config: Union[DeepseekV2Config, DeepseekV3Config], hidden_size: int, num_heads: int, @@ -289,6 +311,7 @@ def __init__( max_position_embeddings: int = 8192, cache_config: Optional[CacheConfig] = None, quant_config: Optional[QuantizationConfig] = None, + topk_indices_buffer: Optional[torch.Tensor] = None, prefix: str = "", ) -> None: super().__init__() @@ -306,6 +329,8 @@ def __init__( self.scaling = self.qk_head_dim**-0.5 self.rope_theta = rope_theta self.max_position_embeddings = max_position_embeddings + assert topk_indices_buffer is None, "topk_indices_buffer is not \ + supported for DeepseekV2Attention" if self.q_lora_rank is not None: self.q_a_proj = ReplicatedLinear(self.hidden_size, @@ -418,6 +443,391 @@ def forward( return output +class DeepseekV32IndexerCache(torch.nn.Module, AttentionLayerBase): + + def __init__(self, head_dim: int, dtype: torch.dtype, prefix: str, + cache_config: CacheConfig): + super().__init__() + self.kv_cache = [torch.tensor([])] + self.head_dim = head_dim + self.prefix = prefix + self.cache_config = cache_config + self.dtype = dtype + compilation_config = get_current_vllm_config().compilation_config + if prefix in compilation_config.static_forward_context: + raise ValueError(f"Duplicate layer name: {prefix}") + compilation_config.static_forward_context[prefix] = self + + def get_kv_cache_spec(self) -> KVCacheSpec: + return MLAAttentionSpec( # Only has one vector instead of K + V + block_size=self.cache_config.block_size, + num_kv_heads=1, + head_size=self.head_dim, + dtype=self.dtype, + ) + + def forward(self): + ... + + def get_attn_backend(self) -> AttentionBackend: + return DeepseekV32IndexerBackend + + +@torch.inference_mode() +def cp_gather_indexer_k_quant_cache( + kv_cache, # [num_blocks, block_size, head_dim + 1] + dst_value, # [cu_seq_lens[-1], head_dim] + dst_scale, # [cu_seq_lens[-1], 4] + block_table, # [batch_size, num_blocks] + cu_seq_lens, # [batch_size + 1, ] + batch_size, +): + num_blocks, block_size, _ = kv_cache.shape + head_dim = dst_value.shape[-1] + kv_cache = kv_cache.view(num_blocks, -1) + + expected_value = [] + expected_scale = [] + for b in range(batch_size): + s = cu_seq_lens[b + 1] - cu_seq_lens[b] + if s == 0: + continue + tot = cdiv(s, block_size) + blocks = block_table[b, :tot] + + value = [] + scale = [] + full_block = torch.arange(tot - 1, + device=kv_cache.device, + dtype=torch.int32) + non_remaining_value = kv_cache[blocks[full_block], :block_size * + head_dim].view(-1, head_dim) + non_remaining_scale = kv_cache[blocks[full_block], + block_size * head_dim:].view(-1, 4) + + remaining = s - (tot - 1) * block_size + + value = torch.cat([ + non_remaining_value, + kv_cache[blocks[-1], :remaining * head_dim].view(-1, head_dim) + ], + dim=0) + scale = torch.cat([ + non_remaining_scale, + kv_cache[blocks[-1], block_size * head_dim:block_size * head_dim + + remaining * 4].view(-1, 4) + ], + dim=0) + + expected_value.append(value) + expected_scale.append(scale) + + gather_value = torch.cat(expected_value, dim=0).view(-1, head_dim) + gather_scale = torch.cat(expected_scale, dim=0).view(-1, 4) + gather_value = gather_value.view(torch.float8_e4m3fn) + gather_scale = gather_scale.view(torch.float32) + dst_value.copy_(gather_value) + dst_scale.copy_(gather_scale) + + +def sparse_attn_indexer( + hidden_states: torch.Tensor, + k_cache_prefix: str, + kv_cache: torch.Tensor, + q_fp8: torch.Tensor, + k: torch.Tensor, + weights: torch.Tensor, + quant_block_size: int, + scale_fmt: Optional[str], + topk_tokens: int, + head_dim: int, + max_model_len: int, + total_seq_lens: int, + topk_indices_buffer: Optional[torch.Tensor], +) -> torch.Tensor: + + # careful! this will be None in dummy run + attn_metadata = get_forward_context().attn_metadata + # assert isinstance(attn_metadata, dict) + if not isinstance(attn_metadata, dict): + return sparse_attn_indexer_fake( + hidden_states, + k_cache_prefix, + kv_cache, + q_fp8, + k, + weights, + quant_block_size, + scale_fmt, + topk_tokens, + head_dim, + max_model_len, + total_seq_lens, + topk_indices_buffer, + ) + attn_metadata = attn_metadata[k_cache_prefix] + assert isinstance(attn_metadata, DeepseekV32IndexerMetadata) + slot_mapping = attn_metadata.slot_mapping + has_decode = attn_metadata.num_decodes > 0 + has_prefill = attn_metadata.num_prefills > 0 + num_decode_tokens = attn_metadata.num_decode_tokens + + ops.indexer_k_quant_and_cache( + k, + kv_cache, + slot_mapping, + quant_block_size, + scale_fmt, + ) + + topk_indices_buffer[:hidden_states.shape[0]] = -1 + if has_prefill: + prefill_metadata = attn_metadata.prefill + num_prefills = attn_metadata.num_prefills + k_fp8 = torch.empty([prefill_metadata.total_seq_lens, head_dim], + device=k.device, + dtype=torch.float8_e4m3fn) + k_scale = torch.empty([prefill_metadata.total_seq_lens, 1], + device=k.device, + dtype=torch.float32) + cp_gather_indexer_k_quant_cache( + kv_cache, + k_fp8, + k_scale, + prefill_metadata.block_table, + prefill_metadata.cu_seq_lens, + num_prefills, + ) + cu_seqlen_ks = prefill_metadata.cu_seqlen_ks + cu_seqlen_ke = prefill_metadata.cu_seqlen_ke + num_tokens = attn_metadata.num_actual_tokens + logits = fp8_mqa_logits( + q_fp8[num_decode_tokens:num_tokens], + (k_fp8, k_scale), + weights[num_decode_tokens:num_tokens], + cu_seqlen_ks, + cu_seqlen_ke, + ) + topk_indices = logits.topk(min(topk_tokens, logits.shape[-1]), + dim=-1)[1] + topk_indices -= cu_seqlen_ks[:, None] + mask_lo = topk_indices >= 0 + mask_hi = topk_indices - (cu_seqlen_ke - cu_seqlen_ks)[:, None] < 0 + mask = torch.full_like(topk_indices, + False, + dtype=torch.bool, + device=topk_indices.device) + mask = mask_lo & mask_hi + topk_indices = topk_indices.masked_fill(~mask, -1) + topk_indices_buffer[num_decode_tokens:num_tokens, :topk_indices. + shape[-1]] = topk_indices.to(dtype=torch.int32) + + if has_decode: + decode_metadata = attn_metadata.decode + # kv_cache size requirement [num_block, block_size, n_head, head_dim], + # we only have [num_block, block_size, head_dim], + kv_cache = kv_cache.unsqueeze(-2) + decode_lens = decode_metadata.decode_lens + if decode_metadata.requires_padding: + # pad in edge case where we have short chunked prefill length < + # decode_threshold since we unstrictly split + # prefill and decode by decode_threshold + # (currently set to 1 + speculative tokens) + padded_q_fp8_decode_tokens = pack_seq_triton( + q_fp8[:num_decode_tokens], decode_lens) + else: + padded_q_fp8_decode_tokens = q_fp8[:num_decode_tokens].reshape( + decode_lens.shape[0], -1, *q_fp8.shape[1:]) + # TODO: move and optimize below logic with triton kernels + batch_size = padded_q_fp8_decode_tokens.shape[0] + next_n = padded_q_fp8_decode_tokens.shape[1] + assert batch_size == decode_metadata.seq_lens.shape[0] + num_padded_tokens = batch_size * next_n + logits = fp8_paged_mqa_logits( + padded_q_fp8_decode_tokens, + kv_cache, + weights[:num_padded_tokens], + decode_metadata.seq_lens, + decode_metadata.block_table, + decode_metadata.schedule_metadata, + max_model_len=max_model_len, + ) + # padded query len + current_device = padded_q_fp8_decode_tokens.device + padded_num_tokens = batch_size * next_n + positions = torch.arange(max_model_len, + device=current_device).unsqueeze(0).expand( + batch_size * next_n, -1) + row_indices = torch.arange(padded_num_tokens, + device=current_device) // next_n + next_n_offset = torch.arange( + padded_num_tokens, + device=padded_q_fp8_decode_tokens.device) % next_n + index_end_pos = (decode_metadata.seq_lens[row_indices] - next_n + + next_n_offset).unsqueeze(1) + # index_end_pos: [B * N, 1] + mask = positions <= index_end_pos + # mask: [B * N, L] + logits = logits.masked_fill(~mask, float('-inf')) + topk_indices = logits.topk(topk_tokens, + dim=-1)[1].to(torch.int32) # [B * N, K] + # ensure we don't set indices for the top k + # that is out of range(masked already) + # this will happen if context length is shorter than K + topk_indices[topk_indices > index_end_pos] = -1 + if decode_metadata.requires_padding: + # if padded, we need to unpack + # the topk indices removing padded tokens + topk_indices = unpack_seq_triton( + topk_indices.reshape(batch_size, -1, topk_indices.shape[-1]), + decode_lens) + topk_indices_buffer[:num_decode_tokens, :topk_indices. + shape[-1]] = topk_indices.to(dtype=torch.int32) + + return topk_indices_buffer + + +def sparse_attn_indexer_fake( + hidden_states: torch.Tensor, + k_cache_prefix: str, + kv_cache: torch.Tensor, + q_fp8: torch.Tensor, + k: torch.Tensor, + weights: torch.Tensor, + quant_block_size: int, + scale_fmt: Optional[str], + topk_tokens: int, + head_dim: int, + max_model_len: int, + total_seq_lens: int, + topk_indices_buffer: Optional[torch.Tensor], +) -> torch.Tensor: + # profile run + # NOTE(Chen): create the max possible flattened_kv. So that + # profile_run can get correct memory usage. + _flattened_kv = torch.empty([total_seq_lens, head_dim + 4], + device=k.device, + dtype=torch.uint8) + _k_fp8 = _flattened_kv[..., :head_dim].view( + torch.float8_e4m3fn).contiguous() + _k_scale = _flattened_kv[..., head_dim:].view(torch.float32).contiguous() + return topk_indices_buffer + + +direct_register_custom_op( + op_name="sparse_attn_indexer", + op_func=sparse_attn_indexer, + mutates_args=["topk_indices_buffer"], + fake_impl=sparse_attn_indexer_fake, + dispatch_key=current_platform.dispatch_key, +) + + +class Indexer(nn.Module): + + def __init__(self, + vllm_config: VllmConfig, + config: Union[DeepseekV2Config, DeepseekV3Config], + hidden_size: int, + q_lora_rank: int, + quant_config: Optional[QuantizationConfig], + cache_config: Optional[CacheConfig], + topk_indices_buffer: Optional[torch.Tensor], + prefix: str = ""): + super().__init__() + self.vllm_config = vllm_config + self.config = config + # self.indexer_cfg = config.attn_module_list_cfg[0]["attn_index"] + self.topk_tokens = config.index_topk + self.n_head = config.index_n_heads # 64 + self.head_dim = config.index_head_dim # 128 + self.rope_dim = config.qk_rope_head_dim # 64 + self.q_lora_rank = q_lora_rank # 1536 + # no tensor parallel, just replicated + self.wq_b = ReplicatedLinear(self.q_lora_rank, + self.head_dim * self.n_head, + bias=False, + quant_config=quant_config, + prefix=f"{prefix}.wq_b") + self.wk = ReplicatedLinear(hidden_size, + self.head_dim, + bias=False, + quant_config=quant_config, + prefix=f"{prefix}.wk") + self.k_norm = LayerNorm(self.head_dim, eps=1e-6) + self.weights_proj = ReplicatedLinear(hidden_size, + self.n_head, + quant_config=None, + prefix=f"{prefix}.weights_proj") + self.softmax_scale = self.head_dim**-0.5 + + self.scale_fmt = "ue8m0" + self.quant_block_size = 128 # TODO: get from config + self.topk_indices_buffer = topk_indices_buffer + + # NOTE: (zyongye) we use fp8 naive cache, + # where we store value in fp8 and scale in fp32 + # per self.quant_block_size element + self.k_cache = DeepseekV32IndexerCache( + head_dim=self.head_dim + + self.head_dim // self.quant_block_size * 4, + dtype=torch.uint8, + prefix=f"{prefix}.k_cache", + cache_config=cache_config) + self.max_model_len = vllm_config.model_config.max_model_len + self.prefix = prefix + from vllm.v1.attention.backends.mla.indexer import ( + get_max_prefill_buffer_size) + self.max_total_seq_len = get_max_prefill_buffer_size(vllm_config) + + def forward(self, hidden_states: torch.Tensor, qr: torch.Tensor, positions, + rotary_emb) -> torch.Tensor: + q, _ = self.wq_b(qr) + q = q.view(-1, self.n_head, self.head_dim) + q_pe, q_nope = torch.split( + q, [self.rope_dim, self.head_dim - self.rope_dim], dim=-1) + + k, _ = self.wk(hidden_states) + k = self.k_norm(k) + k_pe, k_nope = torch.split( + k, [self.rope_dim, self.head_dim - self.rope_dim], dim=-1) + + q_pe, k_pe = rotary_emb(positions, q_pe, k_pe.unsqueeze(1)) + q = torch.cat([q_pe, q_nope], dim=-1) + k = torch.cat([k_pe.squeeze(1), k_nope], dim=-1) + + # we only quant q here since k quant is fused with cache insertion + q = q.view(-1, self.head_dim) + q_fp8, q_scale = per_token_group_quant_fp8(q, + self.quant_block_size, + column_major_scales=False, + use_ue8m0=self.scale_fmt + is not None) + q_fp8 = q_fp8.view(-1, self.n_head, self.head_dim) + q_scale = q_scale.view(-1, self.n_head, 1) + + weights, _ = self.weights_proj(hidden_states) + weights = weights.unsqueeze( + -1) * q_scale * self.softmax_scale * self.n_head**-0.5 + weights = weights.squeeze(-1) + + return torch.ops.vllm.sparse_attn_indexer( + hidden_states, + self.k_cache.prefix, + self.k_cache.kv_cache[0], + q_fp8, + k, + weights, + self.quant_block_size, + self.scale_fmt, + self.topk_tokens, + self.head_dim, + self.max_model_len, + self.max_total_seq_len, + self.topk_indices_buffer, + ) + + class DeepseekV2MLAAttention(nn.Module): """ Main reference: DeepseekV2 paper, and FlashInfer Implementation @@ -429,6 +839,7 @@ class DeepseekV2MLAAttention(nn.Module): def __init__( self, + vllm_config: VllmConfig, config: Union[DeepseekV2Config, DeepseekV3Config], hidden_size: int, num_heads: int, @@ -443,6 +854,7 @@ def __init__( cache_config: Optional[CacheConfig] = None, quant_config: Optional[QuantizationConfig] = None, prefix: str = "", + topk_indices_buffer: Optional[torch.Tensor] = None, ) -> None: super().__init__() self.hidden_size = hidden_size @@ -523,6 +935,15 @@ def __init__( mscale = yarn_get_mscale(scaling_factor, float(mscale_all_dim)) self.scaling = self.scaling * mscale * mscale + self.is_v32 = hasattr(config, "index_topk") + + if self.is_v32: + self.indexer = Indexer(vllm_config, config, hidden_size, + q_lora_rank, quant_config, cache_config, + topk_indices_buffer, f"{prefix}.indexer") + else: + self.indexer = None + mla_modules = MLAModules( kv_a_layernorm=self.kv_a_layernorm, kv_b_proj=self.kv_b_proj, @@ -536,7 +957,11 @@ def __init__( if self.q_lora_rank is not None else None, q_b_proj=self.q_b_proj if self.q_lora_rank is not None else None, q_proj=self.q_proj if self.q_lora_rank is None else None, + indexer=self.indexer, + is_sparse=self.is_v32, + topk_indices_buffer=topk_indices_buffer, ) + self.mla_attn = MultiHeadLatentAttention( self.hidden_size, self.num_local_heads, @@ -562,7 +987,10 @@ def forward( class DeepseekV2DecoderLayer(nn.Module): - def __init__(self, vllm_config: VllmConfig, prefix: str) -> None: + def __init__(self, + vllm_config: VllmConfig, + prefix: str, + topk_indices_buffer: Optional[torch.Tensor] = None) -> None: super().__init__() config = vllm_config.model_config.hf_config @@ -585,6 +1013,7 @@ def __init__(self, vllm_config: VllmConfig, prefix: str) -> None: else: attn_cls = DeepseekV2Attention self.self_attn = attn_cls( + vllm_config=vllm_config, config=config, hidden_size=self.hidden_size, num_heads=config.num_attention_heads, @@ -600,6 +1029,7 @@ def __init__(self, vllm_config: VllmConfig, prefix: str) -> None: cache_config=cache_config, quant_config=quant_config, prefix=f"{prefix}.self_attn", + topk_indices_buffer=topk_indices_buffer, ) if (config.n_routed_experts is not None @@ -683,6 +1113,16 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): self.config = config self.vocab_size = config.vocab_size + self.is_v32 = hasattr(config, "index_topk") + if self.is_v32: + topk_tokens = config.index_topk + topk_indices_buffer = torch.empty( + vllm_config.scheduler_config.max_num_batched_tokens, + topk_tokens, + dtype=torch.int32, + device="cuda") + else: + topk_indices_buffer = None if get_pp_group().is_first_rank: self.embed_tokens = VocabParallelEmbedding( @@ -695,7 +1135,8 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): self.start_layer, self.end_layer, self.layers = make_layers( config.num_hidden_layers, - lambda prefix: DeepseekV2DecoderLayer(vllm_config, prefix), + lambda prefix: DeepseekV2DecoderLayer(vllm_config, prefix, + topk_indices_buffer), prefix=f"{prefix}.layers") if get_pp_group().is_last_rank: diff --git a/vllm/model_executor/models/longcat_flash.py b/vllm/model_executor/models/longcat_flash.py index 1a7a64bfd1a4..78e6e3d4b535 100644 --- a/vllm/model_executor/models/longcat_flash.py +++ b/vllm/model_executor/models/longcat_flash.py @@ -308,6 +308,7 @@ class FlashDecoderLayer(nn.Module): def __init__( self, + vllm_config: VllmConfig, config: FlashConfig, cache_config: Optional[CacheConfig] = None, quant_config: Optional[QuantizationConfig] = None, @@ -329,6 +330,7 @@ def __init__( # Dual attention structure self.self_attn = nn.ModuleList([ DeepseekV2MLAAttention( + vllm_config=vllm_config, config=config, hidden_size=self.hidden_size, num_heads=config.num_attention_heads, @@ -454,6 +456,7 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): self.start_layer, self.end_layer, self.layers = make_layers( config.num_hidden_layers, lambda prefix: FlashDecoderLayer( + vllm_config, config, cache_config=cache_config, quant_config=quant_config, diff --git a/vllm/model_executor/models/registry.py b/vllm/model_executor/models/registry.py index 62aa5af9fb14..732181265a97 100644 --- a/vllm/model_executor/models/registry.py +++ b/vllm/model_executor/models/registry.py @@ -70,6 +70,7 @@ "DeepseekForCausalLM": ("deepseek", "DeepseekForCausalLM"), "DeepseekV2ForCausalLM": ("deepseek_v2", "DeepseekV2ForCausalLM"), "DeepseekV3ForCausalLM": ("deepseek_v2", "DeepseekV3ForCausalLM"), + "DeepseekV32ForCausalLM": ("deepseek_v2", "DeepseekV3ForCausalLM"), "Dots1ForCausalLM": ("dots1", "Dots1ForCausalLM"), "Ernie4_5ForCausalLM": ("ernie45", "Ernie4_5ForCausalLM"), "Ernie4_5_MoeForCausalLM": ("ernie45_moe", "Ernie4_5_MoeForCausalLM"), diff --git a/vllm/platforms/cpu.py b/vllm/platforms/cpu.py index 1e15dc6a91aa..0b26446a87d8 100644 --- a/vllm/platforms/cpu.py +++ b/vllm/platforms/cpu.py @@ -93,11 +93,14 @@ def get_device_name(cls, device_id: int = 0) -> str: def get_attn_backend_cls(cls, selected_backend: _Backend, head_size: int, dtype: torch.dtype, kv_cache_dtype: Optional[str], block_size: int, use_v1: bool, use_mla: bool, - has_sink: bool) -> str: + has_sink: bool, use_sparse: bool) -> str: if selected_backend and selected_backend != _Backend.TORCH_SDPA: logger.info("Cannot use %s backend on CPU.", selected_backend) if use_mla: raise NotImplementedError("MLA is not supported on CPU.") + if use_sparse: + raise NotImplementedError( + "Sparse Attention is not supported on CPU.") logger.info("Using Torch SDPA backend.") if not use_v1: raise ValueError("CPU backend only supports V1.") diff --git a/vllm/platforms/cuda.py b/vllm/platforms/cuda.py index 1463fe34fc75..a9a8d9ea2625 100644 --- a/vllm/platforms/cuda.py +++ b/vllm/platforms/cuda.py @@ -119,6 +119,8 @@ def check_and_update_config(cls, vllm_config: "VllmConfig") -> None: # TODO(lucas): handle this more gracefully # Note: model_config may be None during testing if model_config is not None and model_config.use_mla: + use_sparse = hasattr(vllm_config.model_config.hf_config, + "index_topk") # If `VLLM_ATTENTION_BACKEND` is not set and we are using MLA, # then we default to FlashMLA backend for non-blackwell GPUs, # else we default to CutlassMLA. For each case, we force the @@ -165,6 +167,12 @@ def check_and_update_config(cls, vllm_config: "VllmConfig") -> None: "Forcing kv cache block size to 64 for FlashInferMLA " "backend.") + # TODO(Chen): remove this hacky code + if use_sparse and cache_config.block_size != 64: + cache_config.block_size = 64 + logger.info( + "Forcing kv cache block size to 64 for FlashMLASparse " + "backend.") # lazy import to avoid circular import from vllm.config import CUDAGraphMode @@ -221,7 +229,7 @@ def get_vit_attn_backend(cls, head_size: int, @classmethod def get_attn_backend_cls(cls, selected_backend, head_size, dtype, kv_cache_dtype, block_size, use_v1, use_mla, - has_sink) -> str: + has_sink, use_sparse) -> str: if use_mla: if not use_v1: raise RuntimeError( @@ -231,6 +239,11 @@ def get_attn_backend_cls(cls, selected_backend, head_size, dtype, from vllm.attention.ops.flashmla import is_flashmla_supported from vllm.attention.utils.fa_utils import flash_attn_supports_mla + if use_sparse: + logger.info_once("Using Sparse MLA backend on V1 engine.") + return ("vllm.v1.attention.backends.mla.flashmla_sparse." + "FlashMLASparseBackend") + use_cutlassmla = selected_backend == _Backend.CUTLASS_MLA or ( selected_backend is None and cls.is_device_capability(100) and block_size == 128) diff --git a/vllm/platforms/interface.py b/vllm/platforms/interface.py index de23a665d2ea..1691ad62650b 100644 --- a/vllm/platforms/interface.py +++ b/vllm/platforms/interface.py @@ -194,7 +194,7 @@ def get_vit_attn_backend(cls, head_size: int, def get_attn_backend_cls(cls, selected_backend: _Backend, head_size: int, dtype: torch.dtype, kv_cache_dtype: Optional[str], block_size: int, use_v1: bool, use_mla: bool, - has_sink: bool) -> str: + has_sink: bool, use_sparse: bool) -> str: """Get the attention backend class of a device.""" return "" diff --git a/vllm/platforms/rocm.py b/vllm/platforms/rocm.py index f67568bf07c1..14762f1b7094 100644 --- a/vllm/platforms/rocm.py +++ b/vllm/platforms/rocm.py @@ -195,7 +195,10 @@ def get_vit_attn_backend(cls, head_size: int, @classmethod def get_attn_backend_cls(cls, selected_backend, head_size, dtype, kv_cache_dtype, block_size, use_v1, use_mla, - has_sink) -> str: + has_sink, use_sparse) -> str: + if use_sparse: + raise NotImplementedError( + "Sparse Attention is not supported on ROCm.") if use_mla: if not use_v1: raise RuntimeError( diff --git a/vllm/platforms/tpu.py b/vllm/platforms/tpu.py index c2ba37224d61..4a4931f7f009 100644 --- a/vllm/platforms/tpu.py +++ b/vllm/platforms/tpu.py @@ -49,7 +49,10 @@ class TpuPlatform(Platform): def get_attn_backend_cls(cls, selected_backend: _Backend, head_size: int, dtype: torch.dtype, kv_cache_dtype: Optional[str], block_size: int, use_v1: bool, use_mla: bool, - has_sink) -> str: + has_sink, use_sparse) -> str: + if use_sparse: + raise NotImplementedError( + "Sparse Attention is not supported on TPU.") if selected_backend != _Backend.PALLAS: logger.info("Cannot use %s backend on TPU.", selected_backend) diff --git a/vllm/platforms/xpu.py b/vllm/platforms/xpu.py index cf408cc5df04..12d6a2a2d1ba 100644 --- a/vllm/platforms/xpu.py +++ b/vllm/platforms/xpu.py @@ -36,7 +36,10 @@ class XPUPlatform(Platform): def get_attn_backend_cls(cls, selected_backend: _Backend, head_size: int, dtype: torch.dtype, kv_cache_dtype: Optional[str], block_size: int, use_v1: bool, use_mla: bool, - has_sink: bool) -> str: + has_sink: bool, use_sparse) -> str: + if use_sparse: + raise NotImplementedError( + "Sparse Attention is not supported on XPU.") use_v1 = envs.VLLM_USE_V1 if not use_v1: raise ValueError("XPU backend only supports V1.") diff --git a/vllm/transformers_utils/config.py b/vllm/transformers_utils/config.py index 4f5e5c01e5cb..86345287f988 100644 --- a/vllm/transformers_utils/config.py +++ b/vllm/transformers_utils/config.py @@ -66,6 +66,8 @@ def __getitem__(self, key): _CONFIG_REGISTRY: dict[str, type[PretrainedConfig]] = LazyConfigDict( chatglm="ChatGLMConfig", deepseek_vl_v2="DeepseekVLV2Config", + deepseek_v3="DeepseekV3Config", + deepseek_v32="DeepseekV3Config", kimi_vl="KimiVLConfig", Llama_Nemotron_Nano_VL="Nemotron_Nano_VL_Config", RefinedWeb="RWConfig", # For tiiuae/falcon-40b(-instruct) diff --git a/vllm/transformers_utils/configs/__init__.py b/vllm/transformers_utils/configs/__init__.py index 52fa49ad302b..1b33b5e70e0b 100644 --- a/vllm/transformers_utils/configs/__init__.py +++ b/vllm/transformers_utils/configs/__init__.py @@ -8,6 +8,7 @@ """ from vllm.transformers_utils.configs.chatglm import ChatGLMConfig +from vllm.transformers_utils.configs.deepseek_v3 import DeepseekV3Config from vllm.transformers_utils.configs.deepseek_vl2 import DeepseekVLV2Config from vllm.transformers_utils.configs.dotsocr import DotsOCRConfig from vllm.transformers_utils.configs.eagle import EAGLEConfig @@ -37,6 +38,7 @@ __all__ = [ "ChatGLMConfig", "DeepseekVLV2Config", + "DeepseekV3Config", "DotsOCRConfig", "EAGLEConfig", "RWConfig", diff --git a/vllm/transformers_utils/configs/deepseek_v3.py b/vllm/transformers_utils/configs/deepseek_v3.py new file mode 100644 index 000000000000..4b26cdfd94b5 --- /dev/null +++ b/vllm/transformers_utils/configs/deepseek_v3.py @@ -0,0 +1,101 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project +from transformers.configuration_utils import PretrainedConfig +from transformers.utils import logging + +logger = logging.get_logger(__name__) + + +class DeepseekV3Config(PretrainedConfig): + + model_type = "deepseek_v3" + keys_to_ignore_at_inference = ["past_key_values"] + + def __init__( + self, + vocab_size=129280, + hidden_size=7168, + intermediate_size=18432, + moe_intermediate_size=2048, + num_hidden_layers=61, + num_nextn_predict_layers=1, + num_attention_heads=128, + num_key_value_heads=128, + n_shared_experts=1, + n_routed_experts=256, + ep_size=1, + routed_scaling_factor=2.5, + kv_lora_rank=512, + q_lora_rank=1536, + qk_rope_head_dim=64, + v_head_dim=128, + qk_nope_head_dim=128, + topk_method='noaux_tc', + n_group=8, + topk_group=4, + num_experts_per_tok=8, + moe_layer_freq=1, + first_k_dense_replace=3, + norm_topk_prob=True, + scoring_func='sigmoid', + hidden_act="silu", + max_position_embeddings=4096, + initializer_range=0.02, + rms_norm_eps=1e-6, + use_cache=True, + pad_token_id=None, + bos_token_id=0, + eos_token_id=1, + tie_word_embeddings=False, + rope_theta=10000.0, + rope_scaling=None, + attention_bias=False, + attention_dropout=0.0, + **kwargs, + ): + self.vocab_size = vocab_size + self.max_position_embeddings = max_position_embeddings + self.hidden_size = hidden_size + self.intermediate_size = intermediate_size + self.moe_intermediate_size = moe_intermediate_size + self.num_hidden_layers = num_hidden_layers + self.num_nextn_predict_layers = num_nextn_predict_layers + self.num_attention_heads = num_attention_heads + self.n_shared_experts = n_shared_experts + self.n_routed_experts = n_routed_experts + self.ep_size = ep_size + self.routed_scaling_factor = routed_scaling_factor + self.kv_lora_rank = kv_lora_rank + self.q_lora_rank = q_lora_rank + self.qk_rope_head_dim = qk_rope_head_dim + self.v_head_dim = v_head_dim + self.qk_nope_head_dim = qk_nope_head_dim + self.topk_method = topk_method + self.n_group = n_group + self.topk_group = topk_group + self.num_experts_per_tok = num_experts_per_tok + self.moe_layer_freq = moe_layer_freq + self.first_k_dense_replace = first_k_dense_replace + self.norm_topk_prob = norm_topk_prob + self.scoring_func = scoring_func + # for backward compatibility + if num_key_value_heads is None: + num_key_value_heads = num_attention_heads + + self.num_key_value_heads = num_key_value_heads + self.hidden_act = hidden_act + self.initializer_range = initializer_range + self.rms_norm_eps = rms_norm_eps + self.use_cache = use_cache + self.rope_theta = rope_theta + self.rope_scaling = rope_scaling + self.attention_bias = attention_bias + self.attention_dropout = attention_dropout + + super().__init__( + pad_token_id=pad_token_id, + bos_token_id=bos_token_id, + eos_token_id=eos_token_id, + tie_word_embeddings=tie_word_embeddings, + **kwargs, + ) diff --git a/vllm/utils/__init__.py b/vllm/utils/__init__.py index ba280d6dbe4a..11d6686009b2 100644 --- a/vllm/utils/__init__.py +++ b/vllm/utils/__init__.py @@ -130,6 +130,7 @@ "fp8_e5m2": torch.uint8, "int8": torch.int8, "fp8_inc": torch.float8_e4m3fn, + "fp8_ds_mla": torch.uint8, } TORCH_DTYPE_TO_NUMPY_DTYPE = { @@ -3433,6 +3434,12 @@ def has_triton_kernels() -> bool: return _has_module("triton_kernels") +def has_tilelang() -> bool: + """Whether the optional `tilelang` package is available.""" + + return _has_module("tilelang") + + def set_process_title(name: str, suffix: str = "", prefix: str = envs.VLLM_PROCESS_NAME_PREFIX) -> None: diff --git a/vllm/utils/deep_gemm.py b/vllm/utils/deep_gemm.py index f955beb92b36..0e3bdaec829e 100644 --- a/vllm/utils/deep_gemm.py +++ b/vllm/utils/deep_gemm.py @@ -70,17 +70,25 @@ def _missing(*_: Any, **__: Any) -> NoReturn: _fp8_gemm_nt_impl: Callable[..., Any] | None = None _grouped_impl: Callable[..., Any] | None = None _grouped_masked_impl: Callable[..., Any] | None = None +_fp8_mqa_logits_impl: Callable[..., Any] | None = None +_fp8_paged_mqa_logits_impl: Callable[..., Any] | None = None +_get_paged_mqa_logits_metadata_impl: Callable[..., Any] | None = None _get_mn_major_tma_aligned_tensor_impl: Callable[..., Any] | None = None def _lazy_init() -> None: """Import deep_gemm and resolve symbols on first use.""" - global _fp8_gemm_nt_impl, _grouped_impl, _grouped_masked_impl,\ - _get_mn_major_tma_aligned_tensor_impl + global _fp8_gemm_nt_impl, _grouped_impl, _grouped_masked_impl + global _fp8_mqa_logits_impl, _fp8_paged_mqa_logits_impl + global _get_paged_mqa_logits_metadata_impl + global _get_mn_major_tma_aligned_tensor_impl # fast path if (_fp8_gemm_nt_impl is not None or _grouped_impl is not None - or _grouped_masked_impl is not None): + or _grouped_masked_impl is not None + or _fp8_mqa_logits_impl is not None + or _fp8_paged_mqa_logits_impl is not None + or _get_paged_mqa_logits_metadata_impl is not None): return if not has_deep_gemm(): @@ -97,10 +105,20 @@ def _lazy_init() -> None: _fp8_gemm_nt_impl = getattr(_dg, "fp8_gemm_nt", None) _grouped_impl = getattr(_dg, "m_grouped_fp8_gemm_nt_contiguous", None) _grouped_masked_impl = getattr(_dg, "fp8_m_grouped_gemm_nt_masked", None) + _fp8_mqa_logits_impl = getattr(_dg, "fp8_mqa_logits", None) + _fp8_paged_mqa_logits_impl = getattr(_dg, "fp8_paged_mqa_logits", None) + _get_paged_mqa_logits_metadata_impl = getattr( + _dg, "get_paged_mqa_logits_metadata", None) _get_mn_major_tma_aligned_tensor_impl = getattr( _dg, "get_mn_major_tma_aligned_tensor", None) +def get_num_sms() -> int: + _lazy_init() + _dg = importlib.import_module("deep_gemm") + return int(_dg.get_num_sms()) + + def get_col_major_tma_aligned_tensor(x: torch.Tensor) -> torch.Tensor: """Wrapper for DeepGEMM's get_mn_major_tma_aligned_tensor""" _lazy_init() @@ -135,6 +153,100 @@ def fp8_m_grouped_gemm_nt_masked(*args, **kwargs): *args, disable_ue8m0_cast=not is_deep_gemm_e8m0_used(), **kwargs) +def fp8_mqa_logits( + q: torch.Tensor, + kv: tuple[torch.Tensor, torch.Tensor], + weights: torch.Tensor, + cu_seqlen_ks: torch.Tensor, + cu_seqlen_ke: torch.Tensor, +) -> torch.Tensor: + """Compute FP8 MQA logits for a single sequence without KV paging. + + Args: + q: Query tensor of shape [M, H, D]. Casted to + `torch.float8_e4m3fn` by caller. + kv: Tuple `(k_fp8, k_scales)` where `k_fp8` has shape [N, D] with + dtype `torch.float8_e4m3fn` and `k_scales` has shape [N] (or + [N, 1]) with dtype `torch.float32`. + weights: weights of shape [M, H], dtype `torch.float32`. + cu_seqlen_ks: Start indices (inclusive) for valid K per query position, + shape [M], dtype int32. + cu_seqlen_ke: End indices (exclusive) for valid K per query position, + shape [M], dtype int32. + + Returns: + Logits tensor of shape [M, N], dtype `torch.float32`. + """ + _lazy_init() + if _fp8_mqa_logits_impl is None: + return _missing() + return _fp8_mqa_logits_impl(q, kv, weights, cu_seqlen_ks, cu_seqlen_ke) + + +def get_paged_mqa_logits_metadata(context_lens: torch.Tensor, block_size: int, + num_sms: int) -> torch.Tensor: + """Build scheduling metadata for paged MQA logits. + + Args: + context_lens: Tensor of shape [B], dtype int32; effective context length + per batch element. + block_size: KV-cache block size in tokens (e.g., 64). + num_sms: Number of SMs available. 132 for Hopper + + Returns: + Backend-specific tensor consumed by `fp8_paged_mqa_logits` to + schedule work across SMs. + """ + _lazy_init() + if _get_paged_mqa_logits_metadata_impl is None: + return _missing() + return _get_paged_mqa_logits_metadata_impl(context_lens, block_size, + num_sms) + + +def fp8_paged_mqa_logits( + q_fp8: torch.Tensor, + kv_cache_fp8: torch.Tensor, + weights: torch.Tensor, + context_lens: torch.Tensor, + block_tables: torch.Tensor, + schedule_metadata: torch.Tensor, + max_model_len: int, +) -> torch.Tensor: + """Compute FP8 MQA logits using paged KV-cache. + + Args: + q_fp8: Query tensor of shape [B, next_n, H, D]. Casted to + `torch.float8_e4m3fn` by caller. + kv_cache_fp8: Paged KV-cache in packed FP8+scale layout with shape + [num_blocks, block_size, 1, D+4], dtype `torch.uint8`. The last + 4 bytes per (block,pos) store the `float` dequant scale. + weights: Tensor of shape [B * next_n, H], dtype `torch.float32`. + context_lens: Tensor of shape [B], dtype int32; effective context length + for each batch element. + block_tables: Tensor of shape [B, max_blocks], dtype int32; maps logical + block indices to physical blocks in the paged cache. + schedule_metadata: Returned by `get_paged_mqa_logits_metadata`; + used to distribute work across SMs. + max_model_len: Maximum sequence length used to size the logits output. + + Returns: + Logits tensor of shape [B * next_n, max_model_len], dtype + `torch.float32`. + """ + _lazy_init() + if _fp8_paged_mqa_logits_impl is None: + return _missing() + return _fp8_paged_mqa_logits_impl(q_fp8, + kv_cache_fp8, + weights, + context_lens, + block_tables, + schedule_metadata, + max_model_len, + clean_logits=True) + + def _ceil_to_ue8m0(x: torch.Tensor): return torch.pow(2.0, torch.ceil(torch.log2(x.abs()))) @@ -195,9 +307,13 @@ def should_use_deepgemm_for_fp8_linear(output_dtype: torch.dtype, "fp8_gemm_nt", "m_grouped_fp8_gemm_nt_contiguous", "fp8_m_grouped_gemm_nt_masked", + "fp8_mqa_logits", + "fp8_paged_mqa_logits", + "get_paged_mqa_logits_metadata", "per_block_cast_to_fp8", "is_deep_gemm_e8m0_used", "is_deep_gemm_supported", + "get_num_sms", "should_use_deepgemm_for_fp8_linear", "get_col_major_tma_aligned_tensor", -] \ No newline at end of file +] diff --git a/vllm/v1/attention/backends/cpu_attn.py b/vllm/v1/attention/backends/cpu_attn.py index ab09ab9f8e0e..6ca0c63f6b59 100644 --- a/vllm/v1/attention/backends/cpu_attn.py +++ b/vllm/v1/attention/backends/cpu_attn.py @@ -74,6 +74,7 @@ def get_kv_cache_shape( block_size: int, num_kv_heads: int, head_size: int, + cache_dtype_str: str = "auto", ) -> tuple[int, ...]: return _get_paged_attn_impl().get_kv_cache_shape( num_blocks, block_size, num_kv_heads, head_size) diff --git a/vllm/v1/attention/backends/flash_attn.py b/vllm/v1/attention/backends/flash_attn.py index f284847dd9e9..f0770f744146 100755 --- a/vllm/v1/attention/backends/flash_attn.py +++ b/vllm/v1/attention/backends/flash_attn.py @@ -80,6 +80,7 @@ def get_kv_cache_shape( block_size: int, num_kv_heads: int, head_size: int, + cache_dtype_str: str = "auto", ) -> tuple[int, ...]: if block_size % 16 != 0: raise ValueError("Block size must be a multiple of 16.") diff --git a/vllm/v1/attention/backends/flashinfer.py b/vllm/v1/attention/backends/flashinfer.py index a4bf3635bbca..688e681f0591 100755 --- a/vllm/v1/attention/backends/flashinfer.py +++ b/vllm/v1/attention/backends/flashinfer.py @@ -187,6 +187,7 @@ def get_kv_cache_shape( block_size: int, num_kv_heads: int, head_size: int, + cache_dtype_str: str = "auto", ) -> tuple[int, ...]: return (num_blocks, 2, block_size, num_kv_heads, head_size) diff --git a/vllm/v1/attention/backends/flex_attention.py b/vllm/v1/attention/backends/flex_attention.py index 807b8d987a2d..e548b51060d8 100644 --- a/vllm/v1/attention/backends/flex_attention.py +++ b/vllm/v1/attention/backends/flex_attention.py @@ -90,6 +90,7 @@ def get_kv_cache_shape( block_size: int, num_kv_heads: int, head_size: int, + cache_dtype_str: str = "auto", ) -> tuple[int, ...]: return (2, num_blocks, block_size, num_kv_heads, head_size) diff --git a/vllm/v1/attention/backends/mla/common.py b/vllm/v1/attention/backends/mla/common.py index 1053fde09910..561793b6a377 100755 --- a/vllm/v1/attention/backends/mla/common.py +++ b/vllm/v1/attention/backends/mla/common.py @@ -286,6 +286,7 @@ def get_kv_cache_shape( block_size: int, num_kv_heads: int, # assumed to be 1 for MLA head_size: int, + cache_dtype_str: str = "auto", ) -> tuple[int, ...]: return (num_blocks, block_size, head_size) @@ -407,6 +408,7 @@ def __post_init__(self): M = TypeVar("M", bound=MLACommonMetadata) +A = TypeVar("A") def use_flashinfer_prefill() -> bool: @@ -930,7 +932,9 @@ def reorg_kvcache( return reorganized_kv_c_normed, reorganized_k_pe -class MLACommonImpl(MLAAttentionImpl[M], Generic[M]): +# TODO(Lucas): rename MLACommonBaseImpl -> MLACommonImpl, +# and MLACommonImpl -> MLACommonDenseImpl or somthing like that +class MLACommonBaseImpl(MLAAttentionImpl[A], Generic[A]): """ NOTE: Please read the comment at the top of the file before trying to understand this class @@ -956,6 +960,7 @@ def __init__( qk_head_dim: int, v_head_dim: int, kv_b_proj: ColumnParallelLinear, + indexer=None, q_pad_num_heads: Optional[int] = None, ) -> None: if kv_sharing_target_layer_name is not None: @@ -974,8 +979,140 @@ def __init__( self.qk_head_dim = qk_head_dim self.v_head_dim = v_head_dim self.kv_b_proj = kv_b_proj + self.indexer = indexer self.q_pad_num_heads = q_pad_num_heads + def process_weights_after_loading(self, act_dtype: torch.dtype): + + def get_layer_weight(layer): + WEIGHT_NAMES = ("weight", "qweight", "weight_packed") + for attr in WEIGHT_NAMES: + if hasattr(layer, attr): + return getattr(layer, attr) + raise AttributeError( + f"Layer '{layer}' has no recognized weight attribute:" + f" {WEIGHT_NAMES}.") + + def get_and_maybe_dequant_weights(layer: LinearBase): + if not isinstance(layer.quant_method, UnquantizedLinearMethod): + # NOTE: This should only be used offline, since it's O(N^3) + eye = torch.eye(layer.input_size_per_partition, + dtype=act_dtype, + device=get_layer_weight(layer).device) + dequant_weights = layer.quant_method.apply(layer, + eye, + bias=None) + del eye + # standardize to (output, input) + return dequant_weights.T + return layer.weight + + # we currently do not have quantized bmm's which are needed for + # `W_UV` and `W_UK_T`, we just store fp16/bf16 copies and perform + # the bmm's in 16-bit, the extra memory overhead of this is fairly low + kv_b_proj_weight = get_and_maybe_dequant_weights(self.kv_b_proj).T + assert kv_b_proj_weight.shape == ( + self.kv_lora_rank, + self.num_heads * (self.qk_nope_head_dim + self.v_head_dim)), ( + f"{kv_b_proj_weight.shape=}, " + f"{self.kv_lora_rank=}, " + f"{self.num_heads=}, " + f"{self.qk_nope_head_dim=}, " + f"{self.v_head_dim=}") + kv_b_proj_weight = kv_b_proj_weight.view( + self.kv_lora_rank, + self.num_heads, + self.qk_nope_head_dim + self.v_head_dim, + ) + + W_UK, W_UV = kv_b_proj_weight.split( + [self.qk_nope_head_dim, self.v_head_dim], dim=-1) + + if is_rocm_aiter_fp8bmm_enabled(): + W_K = W_UK.transpose(0, 1) # 16 512 128 + W_V = W_UV.permute(1, 2, 0) # 16 128 512 + self.W_K, self.W_K_scale = dynamic_per_batched_tensor_quant( + W_K, dtype=current_platform.fp8_dtype()) + self.W_V, self.W_V_scale = dynamic_per_batched_tensor_quant( + W_V, dtype=current_platform.fp8_dtype()) + + # The kernel operates on non-padded inputs. Hence, pre-compiling + # triton kernel to avoid runtime compilation for unseen batch sizes + # Pre-compile for batch sizes 1 to 1024 to cover most use-cases. + # On DS-R1, this step adds roughly 50s to the model loading time. + max_batch_size = 1024 # [ToDo] Find the optimal upper limit + pre_compilation_list = list(range(1, max_batch_size + 1)) + if is_global_first_rank(): + pre_compilation_list = tqdm( + pre_compilation_list, + desc="[Aiter Triton] Pre-compiling fp8 BMM kernel", + total=max_batch_size, + ) + + for m in pre_compilation_list: + x = torch.empty((self.W_K.shape[0], m, self.W_K.shape[2]), + dtype=torch.bfloat16, + device=self.W_K.device) + aiter_triton_fp8_bmm(x, + self.W_K, + self.W_K_scale, + group_size=128, + transpose_bm=True) + + x = torch.empty((self.W_V.shape[0], m, self.W_V.shape[2]), + dtype=torch.bfloat16, + device=self.W_V.device) + aiter_triton_fp8_bmm(x, + self.W_V, + self.W_V_scale, + group_size=128, + transpose_bm=True) + else: + # Convert from (L, N, V) to (N, L, V) + self.W_UV = W_UV.transpose(0, 1) + # Convert from (L, N, P) to (N, P, L) + self.W_UK_T = W_UK.permute(1, 2, 0) + + def _v_up_proj(self, x: torch.Tensor, out: torch.Tensor): + # Convert from (B, N, L) to (N, B, L) + x = x.view(-1, self.num_heads, self.kv_lora_rank).transpose(0, 1) + if is_rocm_aiter_fp8bmm_enabled(): + # Multiply + Transpose (N, B, L) x (N, L, V)->(N, B, V)->(B, N, V) + x = aiter_triton_fp8_bmm(x, + self.W_V, + self.W_V_scale, + group_size=128, + transpose_bm=True) + # Convert from (B, N, V) to (B, N * V) + x = x.reshape(-1, self.num_heads * self.v_head_dim) + # Copy result + out.copy_(x) + else: + # Convert from (B, N * V) to (N, B, V) + out = out.view(-1, self.num_heads, self.v_head_dim).transpose(0, 1) + + # Multiply (N, B, L) x (N, L, V) -> (N, B, V) + torch.bmm(x, self.W_UV, out=out) # Reuse "out" to make it "hot" + + # Convert from (N, B, V) to (B, N * V) + out_new = out.transpose(0, 1).reshape( + -1, self.num_heads * self.v_head_dim) + + # Adjust output buffer shape back to the original (B, N * V) + N, B, V = out.shape + out.resize_((B, N * V)) + out.copy_(out_new) # Copy result + + +class MLACommonImpl(MLACommonBaseImpl[M], Generic[M]): + """ + NOTE: Please read the comment at the top of the file before trying to + understand this class + """ + + def __init__(self, *args, **kwargs) -> None: + super().__init__(*args, **kwargs) + if use_flashinfer_prefill(): logger.debug_once("Using FlashInfer prefill for MLA") self._run_prefill_context_chunk = self._run_prefill_context_chunk_fi @@ -1154,36 +1291,6 @@ def _run_prefill_context_chunk_cudnn(self, True, #Indicates actual_seq_lens are on GPU or CPU. ) - def _v_up_proj(self, x: torch.Tensor, out: torch.Tensor): - # Convert from (B, N, L) to (N, B, L) - x = x.view(-1, self.num_heads, self.kv_lora_rank).transpose(0, 1) - if is_rocm_aiter_fp8bmm_enabled(): - # Multiply + Transpose (N, B, L) x (N, L, V)->(N, B, V)->(B, N, V) - x = aiter_triton_fp8_bmm(x, - self.W_V, - self.W_V_scale, - group_size=128, - transpose_bm=True) - # Convert from (B, N, V) to (B, N * V) - x = x.reshape(-1, self.num_heads * self.v_head_dim) - # Copy result - out.copy_(x) - else: - # Convert from (B, N * V) to (N, B, V) - out = out.view(-1, self.num_heads, self.v_head_dim).transpose(0, 1) - - # Multiply (N, B, L) x (N, L, V) -> (N, B, V) - torch.bmm(x, self.W_UV, out=out) # Reuse "out" to make it "hot" - - # Convert from (N, B, V) to (B, N * V) - out_new = out.transpose(0, 1).reshape( - -1, self.num_heads * self.v_head_dim) - - # Adjust output buffer shape back to the original (B, N * V) - N, B, V = out.shape - out.resize_((B, N * V)) - out.copy_(out_new) # Copy result - def process_weights_after_loading(self, act_dtype: torch.dtype): def get_layer_weight(layer): @@ -1455,6 +1562,7 @@ def _forward_prefill( attn_metadata: MLACommonMetadata, k_scale: torch.Tensor, ) -> torch.Tensor: + # TODO (zyongye): Prefill function here assert attn_metadata.prefill is not None assert self.dcp_world_size is not None diff --git a/vllm/v1/attention/backends/mla/flashmla.py b/vllm/v1/attention/backends/mla/flashmla.py index ac0524ba088b..67c21f83cf5d 100644 --- a/vllm/v1/attention/backends/mla/flashmla.py +++ b/vllm/v1/attention/backends/mla/flashmla.py @@ -177,6 +177,7 @@ def _forward_decode( attn_metadata: FlashMLAMetadata, layer: AttentionLayer, ) -> tuple[torch.Tensor, Optional[torch.Tensor]]: + # TODO: (zyongye) decode function for mla here assert kv_c_and_k_pe_cache.numel() > 0 assert attn_metadata.decode is not None diff --git a/vllm/v1/attention/backends/mla/flashmla_sparse.py b/vllm/v1/attention/backends/mla/flashmla_sparse.py new file mode 100644 index 000000000000..36c3c188042c --- /dev/null +++ b/vllm/v1/attention/backends/mla/flashmla_sparse.py @@ -0,0 +1,544 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project +import math +from dataclasses import dataclass +from typing import TYPE_CHECKING, ClassVar, Optional + +import numpy as np +import torch + +from vllm import _custom_ops as ops +from vllm.attention.backends.abstract import (AttentionBackend, AttentionLayer, + AttentionMetadata) +from vllm.attention.backends.utils import get_mla_dims +from vllm.attention.ops.flashmla import (flash_mla_sparse_prefill, + flash_mla_with_kvcache, + get_mla_metadata) +from vllm.config import VllmConfig +from vllm.logger import init_logger +from vllm.platforms import current_platform +from vllm.triton_utils import tl, triton +from vllm.utils import cdiv +from vllm.v1.attention.backends.mla.common import MLACommonBaseImpl +from vllm.v1.attention.backends.utils import (AttentionCGSupport, + AttentionMetadataBuilder, + CommonAttentionMetadata) +from vllm.v1.kv_cache_interface import AttentionSpec + +if TYPE_CHECKING: + from vllm.model_executor.models.deepseek_v2 import Indexer + +logger = init_logger(__name__) +""" +NOTE: FlashMLA Sparse uses an fp8 cache with the following format + +In the "FP8 with scale" format, each token's KV cache is 656 Bytes, +structured as: +- **First 512 bytes:** The "quantized NoPE" part, containing 512 + `float8_e4m3` values. +- **Next 16 bytes:** Scale factors, containing 4 `float32` values. + The first `float32` is the scale for the first 128 `float8_e4m3` values, + the second for the next 128, and so on. +- **Last 128 bytes:** The "RoPE" part, containing 64 `bfloat16` values. This + part is not quantized for accuracy. +""" + + +def _lse2_to_lse(lse_base2: torch.Tensor) -> torch.Tensor: + # Convert base-2 LSE to natural-log LSE + # Keep FP32 for numerical stability during the merge. + return (lse_base2.to(torch.float32) * math.log(2.0)) + + +class FlashMLASparseBackend(AttentionBackend): + + accept_output_buffer: bool = True + + @staticmethod + def get_name() -> str: + return "FLASHMLA_SPARSE_VLLM_V1" + + @staticmethod + def get_metadata_cls() -> type[AttentionMetadata]: + return FlashMLASparseMetadata + + @staticmethod + def get_builder_cls() -> type["FlashMLASparseMetadataBuilder"]: + return FlashMLASparseMetadataBuilder + + @staticmethod + def get_impl_cls() -> type["FlashMLASparseImpl"]: + return FlashMLASparseImpl + + @staticmethod + def get_kv_cache_shape( + num_blocks: int, + block_size: int, + num_kv_heads: int, # assumed to be 1 for MLA + head_size: int, + cache_dtype_str: str = "auto", + ) -> tuple[int, ...]: + if cache_dtype_str == "fp8_ds_mla": + # custom storage fromat is 656 bytes + # see FlashMLA readme.md for details + return (num_blocks, block_size, 656) + else: + return (num_blocks, block_size, head_size) + + @classmethod + def get_supported_dtypes(cls) -> list[torch.dtype]: + return [torch.bfloat16] + + @classmethod + def get_supported_head_sizes(cls) -> list[int]: + return [576] + + +@dataclass +class MLASparsePrefillMetadata: + # NOTE(Chen): not call it "FlashMLASparsePrefillMetadata" because + # the kernel is not from flashmla + block_table: torch.Tensor + has_context: bool = False + context_lens: Optional[torch.Tensor] = None + + +@dataclass +class FlashMLASparseDecodeAndContextMetadata: + scheduler_metadata: torch.Tensor = None + num_splits: torch.Tensor = None + cache_lens: torch.Tensor = None + prefill_context_lengths: Optional[torch.Tensor] = None + prefill_new_k_start_locs: Optional[torch.Tensor] = None + dummy_block_table: torch.Tensor = None + + def filter_prefill_indices( + self, indices: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]: + assert self.prefill_context_lengths is not None + prefill_context_lengths = self.prefill_context_lengths.unsqueeze(-1) + context_indices = torch.where(indices < prefill_context_lengths, + indices, -1) + new_token_indices = torch.where(indices >= prefill_context_lengths, + indices - prefill_context_lengths, -1) + return context_indices, new_token_indices + + +@dataclass +class FlashMLASparseMetadata: + num_reqs: int + max_query_len: int + max_seq_len: int + + num_actual_tokens: int # Number of tokens excluding padding. + query_start_loc: torch.Tensor + slot_mapping: torch.Tensor + + block_table: torch.Tensor + req_id_per_token: torch.Tensor + block_size: int = 64 + topk_tokens: int = 2048 + + @dataclass + class FP8KernelMetadata: + scheduler_metadata: Optional[torch.Tensor] + num_splits: torch.Tensor + dummy_block_table: torch.Tensor + cache_lens: torch.Tensor + + fp8_extra_metadata: Optional[FP8KernelMetadata] = None + + +@triton.jit +def _convert_req_index_to_global_index_kernel( + req_id_ptr, # int32 [num_tokens] + block_table_ptr, # int32 [num_requests, max_num_blocks_per_req] + token_indices_ptr, # int32 [num_tokens, NUM_TOPK_TOKENS] + out_ptr, # int32 [num_tokens, NUM_TOPK_TOKENS] + # shapes (compile-time where possible) + max_num_blocks_per_req: tl.constexpr, + BLOCK_SIZE: tl.constexpr, + BLOCK_N: tl.constexpr, # tile width along columns + # strides (in elements) + bt_stride0, + bt_stride1, + ti_stride0, + ti_stride1, + out_stride0, + out_stride1, +): + # program_id(0) -> token_id (row) + # program_id(1) -> tile index along columns + token_id = tl.program_id(0) + tile_id = tl.program_id(1) + + # Each program covers BLOCK_N consecutive columns + indice_id = tile_id * BLOCK_N + tl.arange(0, BLOCK_N) + + # Load request id for this token (no mask: grid is exact) + req = tl.load(req_id_ptr + token_id) + + # Load token indices for this tile + ti_ptr = token_indices_ptr + token_id * ti_stride0 + indice_id * ti_stride1 + tok = tl.load(ti_ptr) # int32 + + # Only token == -1 should propagate as -1 + is_invalid_tok = tok < 0 + + # Compute block id and in-block offset + block_id = tok // BLOCK_SIZE + inblock_off = tok % BLOCK_SIZE + + # Guard block_table access + valid_block = block_id < max_num_blocks_per_req + bt_ptr = block_table_ptr + req * bt_stride0 + block_id * bt_stride1 + base = tl.load(bt_ptr, mask=valid_block, other=0) + + # If token == -1 OR block_id OOB, output -1; else base * BLOCK_SIZE + offset + out_val = tl.where(is_invalid_tok | (~valid_block), -1, + base * BLOCK_SIZE + inblock_off) + + # Store results + out_ptr_ij = out_ptr + token_id * out_stride0 + indice_id * out_stride1 + tl.store(out_ptr_ij, out_val) + + +def triton_convert_req_index_to_global_index( + req_id: torch.Tensor, # int32 [num_tokens] + block_table: torch. + Tensor, # int32 [num_requests, max_num_blocks_per_req] + token_indices: torch.Tensor, # int32 [num_tokens, NUM_TOPK_TOKENS] + BLOCK_SIZE: int = 64, + NUM_TOPK_TOKENS: int = 2048, + BLOCK_N: int = 128, # tile width along columns +): + """ + out[token_id, indice_id] = + block_table[req_id[token_id], + token_indices[token_id, indice_id] // BLOCK_SIZE] * BLOCK_SIZE + + token_indices[token_id, indice_id] % BLOCK_SIZE + + Only when token_indices[token_id, indice_id] == -1 do we output -1. + For safety, we also output -1 if the derived block_id would be + out-of-bounds. + """ + assert req_id.dtype == torch.int32 + assert block_table.dtype == torch.int32 + assert token_indices.dtype == torch.int32 + assert token_indices.shape[1] == NUM_TOPK_TOKENS + assert NUM_TOPK_TOKENS % BLOCK_N == 0, \ + f"NUM_TOPK_TOKENS ({NUM_TOPK_TOKENS}) must be divisible by" \ + f"BLOCK_N ({BLOCK_N})" + + num_tokens = req_id.shape[0] + num_requests, max_num_blocks_per_req = block_table.shape + tiles_per_row = NUM_TOPK_TOKENS // BLOCK_N + + # Ensure contiguous tensors on the same device + req_id_c = req_id.contiguous() + block_table_c = block_table.contiguous() + token_indices_c = token_indices.contiguous() + out = torch.empty_like(token_indices_c) + + # Strides in elements + bt_stride0, bt_stride1 = block_table_c.stride() + ti_stride0, ti_stride1 = token_indices_c.stride() + out_stride0, out_stride1 = out.stride() + + # Exact 2D grid: tokens × column tiles + grid = (num_tokens, tiles_per_row) + + _convert_req_index_to_global_index_kernel[grid]( + req_id_c, + block_table_c, + token_indices_c, + out, + # shapes / constexprs + max_num_blocks_per_req, + BLOCK_SIZE, + BLOCK_N, + # strides + bt_stride0, + bt_stride1, + ti_stride0, + ti_stride1, + out_stride0, + out_stride1, + ) + return out + + +@dataclass +class FlashMLASparseMetadataBuilder( + AttentionMetadataBuilder[FlashMLASparseMetadata]): + cudagraph_support: ClassVar[AttentionCGSupport] = \ + AttentionCGSupport.UNIFORM_BATCH + + def __init__(self, kv_cache_spec: AttentionSpec, layer_names: list[str], + vllm_config: VllmConfig, device: torch.device): + + cache_config = vllm_config.cache_config + self.kv_cache_spec = kv_cache_spec + self.model_config = vllm_config.model_config + parallel_config = vllm_config.parallel_config + self.device = device + + props = torch.cuda.get_device_properties(device) + sm_count = props.multi_processor_count + + self.num_heads = self.model_config.get_num_attention_heads( + parallel_config) + self.mla_dims = get_mla_dims(self.model_config) + self.topk_tokens = vllm_config.model_config.hf_config.index_topk + self.use_fp8_kv_cache = cache_config.cache_dtype == "fp8_ds_mla" + self.topk_tokens_tensor = torch.tensor([self.topk_tokens], + device=device, + dtype=torch.int32) + self.max_model_len_tensor = torch.tensor( + [self.model_config.max_model_len], + device=device, + dtype=torch.int32) + # this is ignored by `flash_mla_with_kvcache` if indices not None + self.dummy_block_table = torch.empty((1, 1), + dtype=torch.int32, + device=self.device) + + # Equation taken from FlashMLA/csrc/pybind.cpp + h_q, h_k = self.num_heads, 1 + s_q = 1 # inversely proportional to s_q, so s_q = 1 is the largest + max_num_sm_parts = int( + max((sm_count // 2) / h_k // (cdiv(h_q // h_k, 2 * 64) * s_q), 1)) + if current_platform.is_device_capability(100): + max_num_sm_parts *= 2 + self.tile_scheduler_metadata_buffer = torch.empty( + # TileSchedulerMetaDataSize = 8 + # see: FlashMLA/csrc/params.h + (max_num_sm_parts, 8), + dtype=torch.int32, + device=device) + self.num_splits_buffer = torch.empty( + # We pack all the tokens into one batch for sparse attention. + # Otherwise, we can exceed the sm of `get_mla_metadata`. + ( + 2, ), + dtype=torch.int32, + device=device) + self.req_id_per_token_buffer = torch.empty( + (vllm_config.scheduler_config.max_num_batched_tokens, ), + dtype=torch.int32, + device=device) + + def build(self, + common_prefix_len: int, + common_attn_metadata: CommonAttentionMetadata, + fast_build: bool = False) -> FlashMLASparseMetadata: + + num_tokens = common_attn_metadata.num_actual_tokens + starts = np.asarray(common_attn_metadata.query_start_loc_cpu, + dtype=np.int32) + seg_lengths = np.diff(starts) + req_id_per_token = np.repeat( + np.arange(seg_lengths.shape[0], dtype=np.int32), seg_lengths) + # Zero-fill for cudagraphs + self.req_id_per_token_buffer.fill_(0) + self.req_id_per_token_buffer[:req_id_per_token.shape[0]]\ + .copy_(torch.from_numpy(req_id_per_token), non_blocking=True) + req_id_per_token = self.req_id_per_token_buffer[:num_tokens] + + fp8_extra_metadata = None + if self.use_fp8_kv_cache: + tile_scheduler_metadata, num_splits = get_mla_metadata( + cache_seqlens=self.topk_tokens_tensor, + num_q_tokens_per_head_k=num_tokens * self.num_heads, + topk=self.topk_tokens, + num_heads_q=self.num_heads, + num_heads_k=1, + is_fp8_kvcache=True, + ) + + num_sm_parts = tile_scheduler_metadata.size(0) + # Copy to persistent buffer for full-CG support + tile_scheduler_metadata_buffer = \ + self.tile_scheduler_metadata_buffer[:num_sm_parts] + tile_scheduler_metadata_buffer.copy_(tile_scheduler_metadata) + self.num_splits_buffer.copy_(num_splits) + + fp8_extra_metadata = FlashMLASparseMetadata.FP8KernelMetadata( + scheduler_metadata=tile_scheduler_metadata_buffer, + num_splits=self.num_splits_buffer, + # cache_lens and block_table are basically unused in sparse case + # but the decode kernel will treat -1 and indices >= cache_lens + # as invalid so we make sure cache_lens is large enough to not + # accidentally mark indices invalid, we will use -1 exclusively + # to mark invalid indices + cache_lens=self.max_model_len_tensor, + dummy_block_table=self.dummy_block_table) + + metadata = FlashMLASparseMetadata( + num_reqs=common_attn_metadata.num_reqs, + max_query_len=common_attn_metadata.max_query_len, + max_seq_len=common_attn_metadata.max_seq_len, + num_actual_tokens=common_attn_metadata.num_actual_tokens, + query_start_loc=common_attn_metadata.query_start_loc, + slot_mapping=common_attn_metadata.slot_mapping, + block_table=common_attn_metadata.block_table_tensor, + req_id_per_token=req_id_per_token, + block_size=self.kv_cache_spec.block_size, + topk_tokens=self.topk_tokens, + fp8_extra_metadata=fp8_extra_metadata, + ) + return metadata + + +class FlashMLASparseImpl(MLACommonBaseImpl[FlashMLASparseMetadata]): + + def __init__( + self, + num_heads: int, + head_size: int, + scale: float, + num_kv_heads: int, + alibi_slopes: Optional[list[float]], + sliding_window: Optional[int], + kv_cache_dtype: str, + logits_soft_cap: Optional[float], + attn_type: str, + kv_sharing_target_layer_name: Optional[str], + # MLA Specific Arguments + topk_indice_buffer: Optional[torch.Tensor] = None, + indexer: Optional["Indexer"] = None, + **mla_args) -> None: + super().__init__(num_heads, head_size, scale, num_kv_heads, + alibi_slopes, sliding_window, kv_cache_dtype, + logits_soft_cap, attn_type, + kv_sharing_target_layer_name, **mla_args) + self.softmax_scale = scale + assert indexer is not None + self.topk_indices_buffer = indexer.topk_indices_buffer + self.padding = 128 if current_platform.is_device_capability( + 100) else 64 + + def _forward_bf16_kv( + self, q: torch.Tensor, kv_c_and_k_pe_cache: torch.Tensor, + topk_indices: torch.Tensor, + attn_metadata: FlashMLASparseMetadata) -> torch.Tensor: + num_tokens = q.shape[0] + kv_c_and_k_pe_cache = kv_c_and_k_pe_cache.view( + -1, 1, kv_c_and_k_pe_cache.shape[-1]) + + # NOTE(Chen): kernel requires num_local_head to be a multiple of + # 64 on hopper and 128 on blackwell + if self.num_heads % self.padding != 0: + assert self.padding % self.num_heads == 0 + logger.warning_once(f"padding num_heads to {self.padding} \ + due to sparse attn kernel requirement") + q_padded = q.new_empty((q.shape[0], self.padding, q.shape[2])) + q_padded[:, :self.num_heads, :] = q + q = q_padded + + topk_indices = topk_indices.view(num_tokens, 1, -1) + output = flash_mla_sparse_prefill(q, kv_c_and_k_pe_cache, topk_indices, + self.softmax_scale)[0] + output = output[:, :self.num_heads, :] + return output + + def _forward_fp8_kv(self, q: torch.Tensor, + kv_c_and_k_pe_cache: torch.Tensor, + topk_indices: torch.Tensor, + attn_metadata: FlashMLASparseMetadata) -> torch.Tensor: + + assert attn_metadata.fp8_extra_metadata is not None + extra_metadata = attn_metadata.fp8_extra_metadata + + _attn_out, _ = flash_mla_with_kvcache( + q=q.unsqueeze(0), # unsqueeze to add batch_dim + k_cache=kv_c_and_k_pe_cache.view(torch.uint8).unsqueeze(-2), + block_table=extra_metadata.dummy_block_table, + head_dim_v=512, + cache_seqlens=extra_metadata.cache_lens, + tile_scheduler_metadata=extra_metadata.scheduler_metadata, + num_splits=extra_metadata.num_splits, + is_fp8_kvcache=True, + indices=topk_indices.unsqueeze(0), # unsqueeze to add batch_dim + softmax_scale=self.softmax_scale, + ) + + return _attn_out + + def forward( + self, + layer: AttentionLayer, + q: torch.Tensor, + k_c_normed: torch.Tensor, # key in unified attn + k_pe: torch.Tensor, # value in unified attn + kv_cache: torch.Tensor, + attn_metadata: FlashMLASparseMetadata, + output: Optional[torch.Tensor] = None, + output_scale: Optional[torch.Tensor] = None, + output_block_scale: Optional[torch.Tensor] = None, + ) -> torch.Tensor: + # NOTE(lucas): for the sparse FlashMLA kernels the kernels want to use + # MQA 576/512 approach for both prefill and decode + + assert output is not None, "Output tensor must be provided." + + if output_scale is not None or output_block_scale is not None: + raise NotImplementedError( + "fused output quantization is not yet supported" + " for MLACommonImpl") + + if attn_metadata is None: + # The zero fill is required when used with DP + EP + # to ensure all ranks within a DP group compute the + # same expert outputs. + return output.fill_(0) + + num_actual_toks = attn_metadata.num_actual_tokens + + # Inputs and outputs may be padded for CUDA graphs + + q = q[:num_actual_toks, ...] + k_c_normed = k_c_normed[:num_actual_toks, ...] + k_pe = k_pe[:num_actual_toks, ...] + + q_nope, q_pe = q.split([self.qk_nope_head_dim, self.qk_rope_head_dim], + dim=-1) + # Convert from (B, N, P) to (N, B, P) + q_nope = q_nope.transpose(0, 1) + # Multiply (N, B, P) x (N, P, L) -> (N, B, L) + ql_nope = torch.bmm(q_nope, self.W_UK_T) + # Convert from (N, B, L) to (B, N, L) + ql_nope = ql_nope.transpose(0, 1) + + topk_indices = self.topk_indices_buffer[:num_actual_toks] + + # TODO: handle index / kv_cache correctly + topk_indices_global = triton_convert_req_index_to_global_index( + attn_metadata.req_id_per_token, + attn_metadata.block_table, + topk_indices, + BLOCK_SIZE=attn_metadata.block_size, + NUM_TOPK_TOKENS=attn_metadata.topk_tokens, + ) + + q = torch.cat([ql_nope, q_pe], dim=-1) + + # write the latent and rope to kv cache + if kv_cache.numel() > 0: + ops.concat_and_cache_mla( + k_c_normed, + k_pe.squeeze(1), + kv_cache, + attn_metadata.slot_mapping.flatten(), + kv_cache_dtype=self.kv_cache_dtype, + scale=layer._k_scale, + ) + + if self.kv_cache_dtype != "fp8_ds_mla": + attn_out = self._forward_bf16_kv(q, kv_cache, topk_indices_global, + attn_metadata) + else: + attn_out = self._forward_fp8_kv(q, kv_cache, topk_indices_global, + attn_metadata) + + self._v_up_proj(attn_out, out=output[:num_actual_toks]) + return output diff --git a/vllm/v1/attention/backends/mla/indexer.py b/vllm/v1/attention/backends/mla/indexer.py new file mode 100644 index 000000000000..4e6b974ad74d --- /dev/null +++ b/vllm/v1/attention/backends/mla/indexer.py @@ -0,0 +1,293 @@ +# SPDX-License-Identifier: Apache-2.0 +# SPDX-FileCopyrightText: Copyright contributors to the vLLM project +from dataclasses import dataclass +from typing import ClassVar, Optional + +import torch + +from vllm.attention.backends.abstract import (AttentionBackend, + AttentionMetadata) +from vllm.config import VllmConfig +from vllm.logger import init_logger +from vllm.utils.deep_gemm import get_paged_mqa_logits_metadata +from vllm.v1.attention.backends.utils import (AttentionCGSupport, + AttentionMetadataBuilder, + CommonAttentionMetadata, + split_decodes_and_prefills) + +logger = init_logger(__name__) + + +class DeepseekV32IndexerBackend(AttentionBackend): + + @staticmethod + def get_metadata_cls() -> type["AttentionMetadata"]: + return DeepseekV32IndexerMetadata + + @classmethod + def get_supported_head_sizes(cls) -> list[int]: + return [32, 64, 128] + + @staticmethod + def get_builder_cls() -> type["DeepseekV32IndexerMetadataBuilder"]: + return DeepseekV32IndexerMetadataBuilder + + @staticmethod + def get_kv_cache_shape( + num_blocks: int, + block_size: int, + num_kv_heads: int, + head_size: int, + cache_dtype_str: str = "auto", + ) -> tuple[int, ...]: + assert num_kv_heads == 1 + return (num_blocks, block_size, head_size) + + @staticmethod + def get_kv_cache_stride_order() -> tuple[int, ...]: + return (0, 1, 2) + + +@dataclass +class DeepseekV32IndexerPrefillMetadata: + block_table: torch.Tensor + query_start_loc: torch.Tensor + max_query_len: int + cu_seqlen_ks: torch.Tensor + cu_seqlen_ke: torch.Tensor + cu_seq_lens: torch.Tensor + total_seq_lens: int + + +@dataclass +class DeepSeekV32IndexerDecodeMetadata: + block_table: torch.Tensor + seq_lens: torch.Tensor + decode_lens: torch.Tensor + requires_padding: bool + schedule_metadata: torch.Tensor + + +@dataclass +class DeepseekV32IndexerMetadata: + + # FIXME (zyongye) + # hacky way to access the data now, need to be in chunked meta + seq_lens: torch.Tensor + + num_reqs: int + max_query_len: int + max_seq_len: int + + num_actual_tokens: int # Number of tokens excluding padding. + query_start_loc: torch.Tensor + slot_mapping: torch.Tensor + # The dimension of the attention heads + head_dim: int + + # New for MLA (compared to FlashAttention) + # For handling prefill decode split + num_decodes: int + num_decode_tokens: int + num_prefills: int + num_prefill_tokens: int + + decode: Optional[DeepSeekV32IndexerDecodeMetadata] = None + prefill: Optional[DeepseekV32IndexerPrefillMetadata] = None + + +# TODO (zyongye) optimize this, this is now vibe coded +def kv_spans_from_batches( + start_seq_loc: torch.Tensor, + seq_len_per_batch: torch.Tensor) -> tuple[torch.Tensor, torch.Tensor]: + """ + Args: + start_seq_loc: 1D long tensor [B+1], cumulative counts of + selected tokens per batch. + Example: [0, 2, 4, 7] -> + batch sizes (selected) [2, 2, 3], N=7 tokens total. + seq_len_per_batch: 1D long tensor [B], + full sequence length (KV length) of each batch. + Example: [5, 9, 4]. + + Returns: + start_tensor: 1D long tensor [N], start offset in the + concatenated KV cache for each token's batch. + end_location: 1D long tensor [N], + **exclusive** end = start + token's local position. + (So the attended KV slice is kv[start:end].) + + Assumes each batch contributes its full `seq_len_per_batch[i]` + keys to the KV cache, andthe selected tokens within a batch + are the **last** `counts[i]` positions of that sequence. + """ + q = start_seq_loc.to(dtype=torch.long) + L = seq_len_per_batch.to(dtype=torch.long, device=q.device) + assert q.dim() == 1 and L.dim() == 1 + assert q.numel() == L.numel() + 1, "start_seq_loc must have length B+1" + + # Selected tokens per batch and totals + counts = q[1:] - q[:-1] # [B] + N = int(q[-1].item()) # total selected tokens + B = L.numel() + device = L.device + + if N == 0: + return (torch.empty(0, dtype=torch.long, device=device), + torch.empty(0, dtype=torch.long, device=device)) + + # KV start offsets per batch in the concatenated KV cache + kv_starts_per_batch = torch.cumsum(L, dim=0) - L # [B] + + # For each selected token, which batch does it belong to? + batch_id = torch.repeat_interleave(torch.arange(B, device=device), + counts) # [N] + + # Map batch KV start to each token + start_tensor = kv_starts_per_batch[batch_id] # [N] + + # End-align local positions inside each batch: + # local_pos = L[b] - counts[b] + (1..counts[b]) for each batch b + L_expand = torch.repeat_interleave(L, counts) # [N] + m_expand = torch.repeat_interleave(counts, counts) # [N] + # position within the selected block: 1..counts[b] + pos_within = (torch.arange(N, device=device, dtype=torch.long) - + torch.repeat_interleave(q[:-1], counts) + 1) + + local_pos = L_expand - m_expand + pos_within # [N], 1-based + end_location = start_tensor + local_pos # exclusive end + + return start_tensor.int(), end_location.int() + + +def get_max_prefill_buffer_size(vllm_config: VllmConfig): + max_model_len = vllm_config.model_config.max_model_len + # max_num_batched_tokens = \ + # vllm_config.scheduler_config.max_num_batched_tokens + max_num_seq = vllm_config.scheduler_config.max_num_seqs + # NOTE(Chen): an estimated max size of flattened_kv. Need to double check. + return max_model_len * max_num_seq + + +class DeepseekV32IndexerMetadataBuilder(AttentionMetadataBuilder): + cudagraph_support: ClassVar[AttentionCGSupport] = \ + AttentionCGSupport.UNIFORM_BATCH + + reorder_batch_threshold: int = 1 + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + scheduler_config = self.vllm_config.scheduler_config + #NOTE(Chen):an estimated max size of flattened_kv. Need to double check. + self.max_prefill_buffer_size = get_max_prefill_buffer_size( + self.vllm_config) + self.num_speculative_tokens = ( + self.vllm_config.speculative_config.num_speculative_tokens + if self.vllm_config.speculative_config else 0) + # Now deepgemm fp8_paged_mqa_logits does not support next_n > 2 + self.reorder_batch_threshold += min(self.num_speculative_tokens, 1) + + props = torch.cuda.get_device_properties(self.device) + sm_count = props.multi_processor_count + self.num_sms = sm_count + + self.decode_lens_buffer = torch.empty( + (scheduler_config.max_num_seqs, ), + dtype=torch.int32, + device=self.device) + + # See: DeepGMM/csrc/apis/attention.hpp + self.scheduler_metadata_buffer = torch.empty((self.num_sms + 1, 2), + dtype=torch.int32, + device=self.device) + + def build(self, + common_prefix_len: int, + common_attn_metadata: CommonAttentionMetadata, + fast_build: bool = False) -> DeepseekV32IndexerMetadata: + + num_reqs = common_attn_metadata.num_reqs + num_tokens = common_attn_metadata.num_actual_tokens + + device = self.device + block_table_tensor = common_attn_metadata.block_table_tensor + + query_start_loc = common_attn_metadata.query_start_loc + + num_decodes, num_prefills, num_decode_tokens, num_prefill_tokens = \ + split_decodes_and_prefills( + common_attn_metadata, + decode_threshold=self.reorder_batch_threshold) + + assert num_decodes + num_prefills == num_reqs + assert num_decode_tokens + num_prefill_tokens == num_tokens + + prefill_metadata = None + if num_prefills > 0: + reqs_start = num_decodes + prefill_query_start_loc = query_start_loc[ + reqs_start:] - query_start_loc[reqs_start] + cu_seqlen_ks, cu_seqlen_ke = kv_spans_from_batches( + prefill_query_start_loc, + common_attn_metadata.seq_lens[reqs_start:]) + total_seq_lens = common_attn_metadata.seq_lens[reqs_start:].sum() + assert total_seq_lens < self.max_prefill_buffer_size + cu_seq_lens = torch.cat([ + torch.zeros(1, dtype=torch.int32, device=device), + common_attn_metadata.seq_lens[reqs_start:].cumsum(dim=0) + ]).to(torch.int32).cuda() + prefill_metadata = DeepseekV32IndexerPrefillMetadata( + block_table=block_table_tensor[reqs_start:, ...], + query_start_loc=prefill_query_start_loc, + max_query_len=common_attn_metadata.max_query_len, + cu_seqlen_ks=cu_seqlen_ks, + cu_seqlen_ke=cu_seqlen_ke, + cu_seq_lens=cu_seq_lens, + total_seq_lens=total_seq_lens, + ) + + decode_metadata = None + if num_decodes > 0: + torch.diff(common_attn_metadata.query_start_loc[:num_decodes + 1], + out=self.decode_lens_buffer[:num_decodes]) + decode_lens = self.decode_lens_buffer[:num_decodes] + decode_lens_cpu = torch.diff( + common_attn_metadata.query_start_loc_cpu[:num_decodes + 1]) + + # Use CPU to avoid GPU sync; breaking async scheduling + requires_padding = (decode_lens_cpu.max() + > decode_lens_cpu.min()).item() + + seq_lens = common_attn_metadata.seq_lens[:num_decodes] + + self.scheduler_metadata_buffer[:] = get_paged_mqa_logits_metadata( + seq_lens, self.kv_cache_spec.block_size, self.num_sms) + decode_metadata = DeepSeekV32IndexerDecodeMetadata( + block_table=common_attn_metadata. + block_table_tensor[:num_decodes, ...], + seq_lens=common_attn_metadata.seq_lens[:num_decodes], + decode_lens=decode_lens, + requires_padding=requires_padding, + schedule_metadata=self.scheduler_metadata_buffer, + ) + + attn_metadata = DeepseekV32IndexerMetadata( + seq_lens=common_attn_metadata.seq_lens, + num_reqs=common_attn_metadata.num_reqs, + max_query_len=common_attn_metadata.max_query_len, + max_seq_len=common_attn_metadata.max_seq_len, + num_actual_tokens=common_attn_metadata.num_actual_tokens, + query_start_loc=common_attn_metadata.query_start_loc, + slot_mapping=common_attn_metadata.slot_mapping, + head_dim=128, + num_decodes=num_decodes, + num_decode_tokens=num_decode_tokens, + num_prefills=num_prefills, + num_prefill_tokens=num_prefill_tokens, + prefill=prefill_metadata, + decode=decode_metadata, + ) + + # if get_tensor_model_parallel_rank() == 0: + # logger.info(f"attn_metadata: {attn_metadata}") + return attn_metadata diff --git a/vllm/v1/attention/backends/pallas.py b/vllm/v1/attention/backends/pallas.py index 4cb4b85956bc..7ac1a063f565 100644 --- a/vllm/v1/attention/backends/pallas.py +++ b/vllm/v1/attention/backends/pallas.py @@ -102,6 +102,7 @@ def get_kv_cache_shape( block_size: int, num_kv_heads: int, head_size: int, + cache_dtype_str: str = "auto", ) -> tuple[int, ...]: padded_head_size = cdiv( head_size, TPU_HEAD_SIZE_ALIGNMENT) * TPU_HEAD_SIZE_ALIGNMENT diff --git a/vllm/v1/attention/backends/rocm_aiter_fa.py b/vllm/v1/attention/backends/rocm_aiter_fa.py index 96f8e92a2039..ed63c7b1bda6 100644 --- a/vllm/v1/attention/backends/rocm_aiter_fa.py +++ b/vllm/v1/attention/backends/rocm_aiter_fa.py @@ -360,6 +360,7 @@ def get_kv_cache_shape( block_size: int, num_kv_heads: int, head_size: int, + cache_dtype_str: str = "auto", ) -> tuple[int, ...]: if block_size % 16 != 0: raise ValueError("Block size must be a multiple of 16.") diff --git a/vllm/v1/attention/backends/tree_attn.py b/vllm/v1/attention/backends/tree_attn.py index 1d4ab4c96728..583756129a29 100644 --- a/vllm/v1/attention/backends/tree_attn.py +++ b/vllm/v1/attention/backends/tree_attn.py @@ -68,6 +68,7 @@ def get_kv_cache_shape( block_size: int, num_kv_heads: int, head_size: int, + cache_dtype_str: str = "auto", ) -> tuple[int, ...]: if block_size % 16 != 0: raise ValueError("Block size must be a multiple of 16.") diff --git a/vllm/v1/attention/backends/triton_attn.py b/vllm/v1/attention/backends/triton_attn.py index fc5ecf6ed3b6..3983c5edc76f 100644 --- a/vllm/v1/attention/backends/triton_attn.py +++ b/vllm/v1/attention/backends/triton_attn.py @@ -171,6 +171,7 @@ def get_kv_cache_shape( block_size: int, num_kv_heads: int, head_size: int, + cache_dtype_str: str = "auto", ) -> tuple[int, ...]: if block_size % 16 != 0: raise ValueError("Block size must be a multiple of 16.") diff --git a/vllm/v1/attention/backends/xformers.py b/vllm/v1/attention/backends/xformers.py index f739e6832274..9d667ee04f75 100644 --- a/vllm/v1/attention/backends/xformers.py +++ b/vllm/v1/attention/backends/xformers.py @@ -106,6 +106,7 @@ def get_kv_cache_shape( block_size: int, num_kv_heads: int, head_size: int, + cache_dtype_str: str = "auto", ) -> tuple[int, ...]: if block_size % 16 != 0: raise ValueError("Block size must be a multiple of 16.") diff --git a/vllm/v1/core/kv_cache_utils.py b/vllm/v1/core/kv_cache_utils.py index 47a41322c423..2ff1bb681d80 100644 --- a/vllm/v1/core/kv_cache_utils.py +++ b/vllm/v1/core/kv_cache_utils.py @@ -1103,7 +1103,9 @@ def unify_hybrid_kv_cache_specs(kv_cache_spec: dict[str, KVCacheSpec]): kv_cache_spec: The kv cache spec of each attention layer in the model """ - if is_kv_cache_spec_uniform(kv_cache_spec): + if is_kv_cache_spec_uniform( + kv_cache_spec) or UniformTypeKVCacheSpecs.is_uniform_type( + kv_cache_spec): return logger.warning( @@ -1128,7 +1130,6 @@ def unify_hybrid_kv_cache_specs(kv_cache_spec: dict[str, KVCacheSpec]): num_kv_heads=spec.num_kv_heads, head_size=spec.head_size, dtype=spec.dtype, - use_mla=spec.use_mla, sliding_window=spec.sliding_window, ) elif isinstance(spec, ChunkedLocalAttentionSpec): @@ -1137,11 +1138,11 @@ def unify_hybrid_kv_cache_specs(kv_cache_spec: dict[str, KVCacheSpec]): num_kv_heads=spec.num_kv_heads, head_size=spec.head_size, dtype=spec.dtype, - use_mla=spec.use_mla, attention_chunk_size=spec.attention_chunk_size, ) - if not is_kv_cache_spec_uniform(kv_cache_spec): + if not (is_kv_cache_spec_uniform(kv_cache_spec) + or UniformTypeKVCacheSpecs.is_uniform_type(kv_cache_spec)): raise ValueError("Hybrid KV cache manager is disabled but failed to " "convert the KV cache specs to one unified type.") diff --git a/vllm/v1/core/single_type_kv_cache_manager.py b/vllm/v1/core/single_type_kv_cache_manager.py index d27239164b0d..e889f7804e84 100644 --- a/vllm/v1/core/single_type_kv_cache_manager.py +++ b/vllm/v1/core/single_type_kv_cache_manager.py @@ -10,7 +10,7 @@ from vllm.v1.kv_cache_interface import (ChunkedLocalAttentionSpec, CrossAttentionSpec, FullAttentionSpec, KVCacheSpec, MambaSpec, - SlidingWindowSpec) + MLAAttentionSpec, SlidingWindowSpec) from vllm.v1.request import Request @@ -656,6 +656,7 @@ def remove_skipped_blocks(self, request_id: str, spec_manager_map: dict[type[KVCacheSpec], type[SingleTypeKVCacheManager]] = { FullAttentionSpec: FullAttentionManager, + MLAAttentionSpec: FullAttentionManager, SlidingWindowSpec: SlidingWindowManager, ChunkedLocalAttentionSpec: ChunkedLocalAttentionManager, MambaSpec: MambaManager, diff --git a/vllm/v1/kv_cache_interface.py b/vllm/v1/kv_cache_interface.py index f72cc8f93a6c..281816653540 100644 --- a/vllm/v1/kv_cache_interface.py +++ b/vllm/v1/kv_cache_interface.py @@ -59,13 +59,10 @@ class AttentionSpec(KVCacheSpec): num_kv_heads: int head_size: int dtype: torch.dtype - use_mla: bool @property def page_size_bytes(self) -> int: - # For MLA we only store a single latent vector - coef = 1 if self.use_mla else 2 - return coef * self.block_size * self.num_kv_heads * self.head_size \ + return 2 * self.block_size * self.num_kv_heads * self.head_size \ * get_dtype_size(self.dtype) @@ -118,12 +115,13 @@ def merge(cls, specs: list[Self]) -> Self: if spec.sliding_window is not None) attention_chunk_size = set(spec.attention_chunk_size for spec in specs if spec.attention_chunk_size is not None) + assert not any(isinstance(spec, MLAAttentionSpec) for spec in specs), ( + "MLAAttentionSpec should be merged in MLAAttentionSpec.merge") merged_spec = cls( block_size=specs[0].block_size, num_kv_heads=specs[0].num_kv_heads, head_size=specs[0].head_size, dtype=specs[0].dtype, - use_mla=specs[0].use_mla, sliding_window=cls.merge_window_sizes(sliding_window), attention_chunk_size=cls.merge_window_sizes(attention_chunk_size), ) @@ -140,6 +138,38 @@ def merge(cls, specs: list[Self]) -> Self: return merged_spec +@dataclass(frozen=True) +class MLAAttentionSpec(FullAttentionSpec): + # TODO(Lucas/Chen): less hacky way to do this + cache_dtype_str: Optional[str] = None + + @property + def page_size_bytes(self) -> int: + if self.cache_dtype_str == "fp8_ds_mla": + # See `vllm/v1/attention/backends/mla/flashmla_sparse.py` + # for details. + return self.block_size * 656 + return self.block_size * self.num_kv_heads * self.head_size \ + * get_dtype_size(self.dtype) + + @classmethod + def merge(cls, specs: list[Self]) -> Self: + assert all(isinstance(spec, MLAAttentionSpec) for spec in specs), ( + "All attention layers in the same KV cache group must be " + "MLAAttentionSpec.") + cache_dtype_str_set = set(spec.cache_dtype_str for spec in specs) + assert len(cache_dtype_str_set) == 1, ( + "All attention layers in the same KV cache group must use the same " + "quantization method.") + return cls( + block_size=specs[0].block_size, + num_kv_heads=specs[0].num_kv_heads, + head_size=specs[0].head_size, + dtype=specs[0].dtype, + cache_dtype_str=cache_dtype_str_set.pop(), + ) + + @dataclass(frozen=True) class ChunkedLocalAttentionSpec(AttentionSpec): attention_chunk_size: int @@ -163,9 +193,6 @@ def max_memory_usage_bytes(self, vllm_config: VllmConfig) -> int: class SlidingWindowSpec(AttentionSpec): sliding_window: int - def __post_init__(self): - assert not self.use_mla, "MLA is not supported for sliding window" - def max_memory_usage_bytes(self, vllm_config: VllmConfig) -> int: assert vllm_config.parallel_config.decode_context_parallel_size == 1, \ "DCP not support sliding window." @@ -266,9 +293,13 @@ def is_uniform_type(cls, kv_cache_specs: dict[str, KVCacheSpec]) -> bool: # Different block sizes, not uniform. return False one_spec = next(iter(kv_cache_specs.values())) - if isinstance(one_spec, (FullAttentionSpec, CrossAttentionSpec)): + if isinstance(one_spec, FullAttentionSpec): + return all( + isinstance(spec, FullAttentionSpec) + for spec in kv_cache_specs.values()) + elif isinstance(one_spec, CrossAttentionSpec): return all( - isinstance(spec, type(one_spec)) + isinstance(spec, CrossAttentionSpec) for spec in kv_cache_specs.values()) elif isinstance(one_spec, SlidingWindowSpec): return all( diff --git a/vllm/v1/spec_decode/eagle.py b/vllm/v1/spec_decode/eagle.py index a2f7dbe5703f..bb11a543fd8b 100644 --- a/vllm/v1/spec_decode/eagle.py +++ b/vllm/v1/spec_decode/eagle.py @@ -17,6 +17,7 @@ from vllm.logger import init_logger from vllm.model_executor.model_loader import get_model from vllm.model_executor.models import supports_multimodal +from vllm.model_executor.models.deepseek_v2 import DeepseekV32IndexerCache from vllm.model_executor.models.llama_eagle3 import Eagle3LlamaForCausalLM from vllm.multimodal import MULTIMODAL_REGISTRY from vllm.platforms import current_platform @@ -32,6 +33,7 @@ from vllm.v1.spec_decode.metadata import SpecDecodeMetadata from vllm.v1.utils import CpuGpuBuffer from vllm.v1.worker.gpu_input_batch import CachedRequestState, InputBatch +from vllm.v1.worker.ubatching import dbo_current_ubatch_id logger = init_logger(__name__) @@ -52,6 +54,7 @@ def __init__( self.method = self.speculative_config.method self.runner = runner + self.device = device self.dtype = vllm_config.model_config.dtype self.max_model_len = vllm_config.model_config.max_model_len self.block_size = vllm_config.cache_config.block_size @@ -202,20 +205,30 @@ def propose( assert self.runner is not None - # Select the correct attention metadata builders for EAGLE layers. - # Get the attention metadata builders once and reuse for later. - builder = (self._get_attention_metadata_builder() - if self.attn_metadata_builder is None else - self.attn_metadata_builder) - attn_metadata = builder.build_for_drafting( # type: ignore - common_attn_metadata=common_attn_metadata, - draft_index=0) - + # FIXME: need to consider multiple kv_cache_groups + ubatch_id = dbo_current_ubatch_id() + attn_metadata_builder = \ + self.runner.attn_groups[0][0].metadata_builders[ubatch_id] + attn_metadata = attn_metadata_builder.build_for_drafting( + common_attn_metadata=common_attn_metadata, draft_index=0) + # FIXME: support hybrid kv for draft model (remove separate indexer) + if self.draft_indexer_metadata_builder: + draft_indexer_metadata = ( + self.draft_indexer_metadata_builder.build_for_drafting( + common_attn_metadata=common_attn_metadata, + draft_index=0, + )) + else: + draft_indexer_metadata = None # At this moment, we assume all eagle layers belong to the same KV # cache group, thus using the same attention metadata. per_layer_attn_metadata = {} for layer_name in self.attn_layer_names: per_layer_attn_metadata[layer_name] = attn_metadata + for layer_name in self.indexer_layer_names: + assert draft_indexer_metadata is not None + per_layer_attn_metadata[layer_name] = draft_indexer_metadata + if self.use_cuda_graph and \ num_tokens <= self.cudagraph_batch_sizes[-1]: num_input_tokens = self.vllm_config.pad_for_cudagraph(num_tokens) @@ -370,7 +383,7 @@ def propose( exceeds_max_model_len, PADDING_SLOT_ID) # Rebuild attention metadata - attn_metadata = builder.build_for_drafting( # type: ignore + attn_metadata = attn_metadata_builder.build_for_drafting( # type: ignore common_attn_metadata=common_attn_metadata, draft_index=token_index + 1) for layer_name in self.attn_layer_names: @@ -846,6 +859,10 @@ def load_model(self, target_model: nn.Module) -> None: self.vllm_config.speculative_config.draft_model_config target_attn_layer_names = set( get_layers_from_vllm_config(self.vllm_config, Attention).keys()) + # FIXME: support hybrid kv for draft model + target_indexer_layer_names = set( + get_layers_from_vllm_config(self.vllm_config, + DeepseekV32IndexerCache).keys()) from vllm.compilation.backends import set_model_tag with set_model_tag("eagle_head"): @@ -855,8 +872,25 @@ def load_model(self, target_model: nn.Module) -> None: draft_attn_layer_names = ( get_layers_from_vllm_config(self.vllm_config, Attention).keys() - target_attn_layer_names) - + indexer_layers = get_layers_from_vllm_config(self.vllm_config, + DeepseekV32IndexerCache) + draft_indexer_layer_names = (indexer_layers.keys() - + target_indexer_layer_names) self.attn_layer_names = list(draft_attn_layer_names) + self.indexer_layer_names = list(draft_indexer_layer_names) + + if self.indexer_layer_names: + first_layer = self.indexer_layer_names[0] + self.draft_indexer_metadata_builder = ( + indexer_layers[first_layer].get_attn_backend().get_builder_cls( + )( + indexer_layers[first_layer].get_kv_cache_spec(), + self.indexer_layer_names, + self.vllm_config, + self.device, + )) + else: + self.draft_indexer_metadata_builder = None if self.supports_mm_inputs: # Even if the target model is multimodal, we can also use diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index 98e00f6d98a9..bb5c3ea74293 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -40,6 +40,7 @@ from vllm.model_executor.layers.mamba.abstract import MambaBase from vllm.model_executor.layers.rotary_embedding import MRotaryEmbedding from vllm.model_executor.model_loader import TensorizerLoader, get_model_loader +from vllm.model_executor.models.deepseek_v2 import DeepseekV32IndexerCache # yapf conflicts with isort for this block # yapf: disable from vllm.model_executor.models.interfaces import (SupportsMultiModal, @@ -80,7 +81,8 @@ EncoderOnlyAttentionSpec, FullAttentionSpec, KVCacheConfig, KVCacheGroupSpec, KVCacheSpec, - MambaSpec, SlidingWindowSpec, + MambaSpec, MLAAttentionSpec, + SlidingWindowSpec, UniformTypeKVCacheSpecs) # yapf: enable from vllm.v1.outputs import (EMPTY_MODEL_RUNNER_OUTPUT, AsyncModelRunnerOutput, @@ -3154,7 +3156,7 @@ def _dummy_run( attn_metadata_i = (attn_group\ .get_metadata_builder(ubatch_id=ubid)\ .build_for_cudagraph_capture(common_attn_metadata)) - for layer_name in kv_cache_group_spec.layer_names: + for layer_name in attn_group.layer_names: assert type(attn_metadata) is list attn_metadata[ubid][ layer_name] = attn_metadata_i @@ -3162,7 +3164,7 @@ def _dummy_run( assert type(attn_metadata) is dict attn_metadata_i = attn_group.get_metadata_builder()\ .build_for_cudagraph_capture(common_attn_metadata) - for layer_name in kv_cache_group_spec.layer_names: + for layer_name in attn_group.layer_names: attn_metadata[layer_name] = attn_metadata_i with self.maybe_dummy_run_with_lora(self.lora_config, @@ -3915,8 +3917,11 @@ def _reshape_kv_cache_tensors( if isinstance(kv_cache_spec, AttentionSpec): has_attn = True kv_cache_shape = attn_backend.get_kv_cache_shape( - num_blocks, kv_cache_spec.block_size, - kv_cache_spec.num_kv_heads, kv_cache_spec.head_size) + num_blocks, + kv_cache_spec.block_size, + kv_cache_spec.num_kv_heads, + kv_cache_spec.head_size, + cache_dtype_str=self.cache_config.cache_dtype) dtype = kv_cache_spec.dtype try: kv_cache_stride_order = \ @@ -4101,7 +4106,6 @@ def may_add_encoder_only_layers_to_kv_cache_config(self) -> None: Add encoder-only layers to the KV cache config. """ block_size = self.vllm_config.cache_config.block_size - use_mla = self.vllm_config.model_config.use_mla encoder_only_attn_specs: dict[AttentionSpec, list[str]] = defaultdict(list) attn_layers = get_layers_from_vllm_config(self.vllm_config, Attention) @@ -4111,8 +4115,7 @@ def may_add_encoder_only_layers_to_kv_cache_config(self) -> None: block_size=block_size, num_kv_heads=attn_module.num_kv_heads, head_size=attn_module.head_size, - dtype=self.kv_cache_dtype, - use_mla=use_mla) + dtype=self.kv_cache_dtype) encoder_only_attn_specs[attn_spec].append(layer_name) self.runner_only_attn_layers.add(layer_name) if len(encoder_only_attn_specs) > 0: @@ -4134,6 +4137,7 @@ def get_kv_cache_spec(self) -> dict[str, KVCacheSpec]: block_size = self.vllm_config.cache_config.block_size use_mla = self.vllm_config.model_config.use_mla + cache_dtype_str = self.vllm_config.cache_config.cache_dtype kv_cache_spec: dict[str, KVCacheSpec] = {} attn_layers = get_layers_from_vllm_config(self.vllm_config, Attention) for layer_name, attn_module in attn_layers.items(): @@ -4153,13 +4157,21 @@ def get_kv_cache_spec(self) -> dict[str, KVCacheSpec]: # the attention backends if attn_module.attn_type == AttentionType.DECODER: if attn_module.sliding_window is not None: + assert not use_mla, "MLA is not supported for sliding" \ + "window" kv_cache_spec[layer_name] = SlidingWindowSpec( block_size=block_size, num_kv_heads=attn_module.num_kv_heads, head_size=attn_module.head_size, dtype=self.kv_cache_dtype, - sliding_window=attn_module.sliding_window, - use_mla=use_mla) + sliding_window=attn_module.sliding_window) + elif use_mla: + kv_cache_spec[layer_name] = MLAAttentionSpec( + block_size=block_size, + num_kv_heads=attn_module.num_kv_heads, + head_size=attn_module.head_size, + dtype=self.kv_cache_dtype, + cache_dtype_str=cache_dtype_str) elif self.attention_chunk_size is not None \ and isinstance(attn_module, ChunkedLocalAttention): kv_cache_spec[layer_name] = ChunkedLocalAttentionSpec( @@ -4167,22 +4179,19 @@ def get_kv_cache_spec(self) -> dict[str, KVCacheSpec]: num_kv_heads=attn_module.num_kv_heads, head_size=attn_module.head_size, dtype=self.kv_cache_dtype, - attention_chunk_size=self.attention_chunk_size, - use_mla=use_mla) + attention_chunk_size=self.attention_chunk_size) else: kv_cache_spec[layer_name] = FullAttentionSpec( block_size=block_size, num_kv_heads=attn_module.num_kv_heads, head_size=attn_module.head_size, - dtype=self.kv_cache_dtype, - use_mla=use_mla) + dtype=self.kv_cache_dtype) elif attn_module.attn_type == AttentionType.ENCODER_DECODER: kv_cache_spec[layer_name] = CrossAttentionSpec( block_size=block_size, num_kv_heads=attn_module.num_kv_heads, head_size=attn_module.head_size, - dtype=self.kv_cache_dtype, - use_mla=use_mla) + dtype=self.kv_cache_dtype) elif attn_module.attn_type in (AttentionType.ENCODER, AttentionType.ENCODER_ONLY): # encoder-only attention does not need KV cache. @@ -4219,6 +4228,10 @@ def get_kv_cache_spec(self) -> dict[str, KVCacheSpec]: self.speculative_config.num_speculative_tokens if self.speculative_config else 0), ) + ds_indexer_layers = get_layers_from_vllm_config( + self.vllm_config, DeepseekV32IndexerCache) + for layer_name, ds_indexer_module in ds_indexer_layers.items(): + kv_cache_spec[layer_name] = ds_indexer_module.get_kv_cache_spec() return kv_cache_spec diff --git a/vllm/v1/worker/tpu_model_runner.py b/vllm/v1/worker/tpu_model_runner.py index 2405f978ca73..0b1c3d7c0e88 100644 --- a/vllm/v1/worker/tpu_model_runner.py +++ b/vllm/v1/worker/tpu_model_runner.py @@ -537,7 +537,6 @@ def get_kv_cache_spec(self) -> dict[str, KVCacheSpec]: head_size=attn_module.head_size, dtype=self.kv_cache_dtype, sliding_window=attn_module.sliding_window, - use_mla=False, ) else: kv_cache_spec[layer_name] = FullAttentionSpec( @@ -545,7 +544,6 @@ def get_kv_cache_spec(self) -> dict[str, KVCacheSpec]: num_kv_heads=attn_module.num_kv_heads, head_size=attn_module.head_size, dtype=self.kv_cache_dtype, - use_mla=False, ) elif attn_module.attn_type in (AttentionType.ENCODER, AttentionType.ENCODER_ONLY): From 8d0afa9b422d77c780ccc642c4fe8177aed7330c Mon Sep 17 00:00:00 2001 From: a120092009 <33205509+a120092009@users.noreply.github.com> Date: Tue, 30 Sep 2025 17:59:47 +0800 Subject: [PATCH 500/518] [Doc] Add Cambricon MLU support (#25942) Signed-off-by: a120092009 --- docs/getting_started/installation/README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/getting_started/installation/README.md b/docs/getting_started/installation/README.md index 5e57d23f4a1d..a4e63e426b9b 100644 --- a/docs/getting_started/installation/README.md +++ b/docs/getting_started/installation/README.md @@ -25,3 +25,4 @@ The backends below live **outside** the main `vllm` repository and follow the | MetaX MACA GPU | N/A, install from source | | | Rebellions ATOM / REBEL NPU | `vllm-rbln` | | | IBM Spyre AIU | `vllm-spyre` | | +| Cambricon MLU | `vllm-mlu` | | From 1ad3aca6828ec3985a1de1dc3f206522fc27a518 Mon Sep 17 00:00:00 2001 From: Sergio Paniego Blanco Date: Tue, 30 Sep 2025 12:10:55 +0200 Subject: [PATCH 501/518] Updated TRL integration docs (#25684) Signed-off-by: sergiopaniego Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> Signed-off-by: Sergio Paniego Blanco Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- docs/training/trl.md | 52 +++++++++++++++++++++++++++++++++++++++----- mkdocs.yaml | 1 - 2 files changed, 47 insertions(+), 6 deletions(-) diff --git a/docs/training/trl.md b/docs/training/trl.md index c7c1a5a3bbd1..acf48cc4ecb3 100644 --- a/docs/training/trl.md +++ b/docs/training/trl.md @@ -1,12 +1,54 @@ # Transformers Reinforcement Learning -Transformers Reinforcement Learning (TRL) is a full stack library that provides a set of tools to train transformer language models with methods like Supervised Fine-Tuning (SFT), Group Relative Policy Optimization (GRPO), Direct Preference Optimization (DPO), Reward Modeling, and more. The library is integrated with 🤗 transformers. +[Transformers Reinforcement Learning](https://huggingface.co/docs/trl) (TRL) is a full stack library that provides a set of tools to train transformer language models with methods like Supervised Fine-Tuning (SFT), Group Relative Policy Optimization (GRPO), Direct Preference Optimization (DPO), Reward Modeling, and more. The library is integrated with 🤗 transformers. Online methods such as GRPO or Online DPO require the model to generate completions. vLLM can be used to generate these completions! -See the guide [vLLM for fast generation in online methods](https://huggingface.co/docs/trl/main/en/speeding_up_training#vllm-for-fast-generation-in-online-methods) in the TRL documentation for more information. +See the [vLLM integration guide](https://huggingface.co/docs/trl/main/en/vllm_integration) in the TRL documentation for more information. + +TRL currently supports the following online trainers with vLLM: + +- [GRPO](https://huggingface.co/docs/trl/main/en/grpo_trainer) +- [Online DPO](https://huggingface.co/docs/trl/main/en/online_dpo_trainer) +- [RLOO](https://huggingface.co/docs/trl/main/en/rloo_trainer) +- [Nash-MD](https://huggingface.co/docs/trl/main/en/nash_md_trainer) +- [XPO](https://huggingface.co/docs/trl/main/en/xpo_trainer) + +To enable vLLM in TRL, set the `use_vllm` flag in the trainer configuration to `True`. + +## Modes of Using vLLM During Training + +TRL supports **two modes** for integrating vLLM during training: **server mode** and **colocate mode**. You can control how vLLM operates during training with the `vllm_mode` parameter. + +### Server mode + +In **server mode**, vLLM runs as an independent process on dedicated GPUs and communicates with the trainer through HTTP requests. This configuration is ideal when you have separate GPUs for inference, as it isolates generation workloads from training, ensuring stable performance and easier scaling. + +```python +from trl import GRPOConfig + +training_args = GRPOConfig( + ..., + use_vllm=True, + vllm_mode="server", # default value, can be omitted +) +``` + +### Colocate mode + +In **colocate mode**, vLLM runs inside the trainer process and shares GPU memory with the training model. This avoids launching a separate server and can improve GPU utilization, but may lead to memory contention on the training GPUs. + +```python +from trl import GRPOConfig + +training_args = GRPOConfig( + ..., + use_vllm=True, + vllm_mode="colocate", +) +``` + +Some trainers also support **vLLM sleep mode**, which offloads parameters and caches to GPU RAM during training, helping reduce memory usage. Learn more in the [memory optimization docs](https://huggingface.co/docs/trl/main/en/reducing_memory_usage#vllm-sleep-mode). !!! info - For more information on the `use_vllm` flag you can provide to the configs of these online methods, see: - - [`trl.GRPOConfig.use_vllm`](https://huggingface.co/docs/trl/main/en/grpo_trainer#trl.GRPOConfig.use_vllm) - - [`trl.OnlineDPOConfig.use_vllm`](https://huggingface.co/docs/trl/main/en/online_dpo_trainer#trl.OnlineDPOConfig.use_vllm) + For detailed configuration options and flags, refer to the documentation of the specific trainer you are using. diff --git a/mkdocs.yaml b/mkdocs.yaml index 1535fcc622cd..6f2be65a18af 100644 --- a/mkdocs.yaml +++ b/mkdocs.yaml @@ -102,7 +102,6 @@ plugins: - https://numpy.org/doc/stable/objects.inv - https://pytorch.org/docs/stable/objects.inv - https://psutil.readthedocs.io/en/stable/objects.inv - - https://huggingface.co/docs/transformers/main/en/objects.inv markdown_extensions: - attr_list From ef6e0e7132ec7bd3dadd10df7486a8cd10cf9c0e Mon Sep 17 00:00:00 2001 From: CSWYF3634076 Date: Tue, 30 Sep 2025 19:11:21 +0800 Subject: [PATCH 502/518] [Bugfix][Model]fix ernie45 moe gate&bias dtype to float32 (#25936) Signed-off-by: wangyafeng --- vllm/model_executor/models/ernie45_moe.py | 5 +++-- vllm/model_executor/models/ernie45_vl_moe.py | 15 ++++++++++----- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/vllm/model_executor/models/ernie45_moe.py b/vllm/model_executor/models/ernie45_moe.py index d262e9e9da50..38c5249380c3 100644 --- a/vllm/model_executor/models/ernie45_moe.py +++ b/vllm/model_executor/models/ernie45_moe.py @@ -120,11 +120,12 @@ def __init__( self.gate = ReplicatedLinear(config.hidden_size, config.moe_num_experts, bias=False, + params_dtype=torch.float32, quant_config=None, prefix=f"{prefix}.gate") self.gate.e_score_correction_bias = nn.Parameter( - torch.empty(config.moe_num_experts)) + torch.empty(config.moe_num_experts, dtype=torch.float32)) self.experts = FusedMoE( num_experts=config.moe_num_experts, @@ -157,7 +158,7 @@ def forward(self, hidden_states: torch.Tensor) -> torch.Tensor: if self.has_shared_experts: shared_output = self.shared_experts(hidden_states) - router_logits, _ = self.gate(hidden_states) + router_logits, _ = self.gate(hidden_states.to(dtype=torch.float32)) final_hidden_states = self.experts(hidden_states=hidden_states, router_logits=router_logits) diff --git a/vllm/model_executor/models/ernie45_vl_moe.py b/vllm/model_executor/models/ernie45_vl_moe.py index f55016f7ccb3..21772f766b40 100644 --- a/vllm/model_executor/models/ernie45_vl_moe.py +++ b/vllm/model_executor/models/ernie45_vl_moe.py @@ -199,7 +199,7 @@ def __init__( assert config.moe_num_experts[0] == config.moe_num_experts[1] self.e_score_correction_bias = nn.Parameter( - torch.empty(2, config.moe_num_experts[0])) + torch.empty(2, config.moe_num_experts[0], dtype=torch.float32)) assert text_moe_layer_start_index <= text_moe_layer_end_index @@ -209,6 +209,7 @@ def __init__( config.hidden_size, config.moe_num_experts[0], bias=False, + params_dtype=torch.float32, quant_config=quant_config, prefix=f"{prefix}.text_experts_gate") @@ -238,6 +239,7 @@ def __init__( config.hidden_size, config.moe_num_experts[1], bias=False, + params_dtype=torch.float32, quant_config=quant_config, prefix=f"{prefix}.vision_experts_gate") @@ -288,7 +290,8 @@ def forward( if visual_token_mask is not None and visual_token_mask.all(): # only vision modal input - router_logits, _ = self.vision_experts_gate(hidden_states) + router_logits, _ = self.vision_experts_gate( + hidden_states.to(dtype=torch.float32)) final_hidden_states = self.vision_experts( hidden_states=hidden_states, router_logits=router_logits) elif visual_token_mask is not None and visual_token_mask.any(): @@ -303,19 +306,21 @@ def forward( vision_hidden_states = hidden_states[visual_token_mask].reshape( -1, self.hidden_size) - text_router_logits, _ = self.text_experts_gate(text_hidden_states) + text_router_logits, _ = self.text_experts_gate( + text_hidden_states.to(dtype=torch.float32)) final_hidden_states[text_token_mask] = self.text_experts( hidden_states=text_hidden_states, router_logits=text_router_logits).flatten() vision_router_logits, _ = self.vision_experts_gate( - vision_hidden_states) + vision_hidden_states.to(dtype=torch.float32)) final_hidden_states[visual_token_mask] = self.vision_experts( hidden_states=vision_hidden_states, router_logits=vision_router_logits).flatten() else: # only text modal input - text_router_logits, _ = self.text_experts_gate(hidden_states) + text_router_logits, _ = self.text_experts_gate( + hidden_states.to(dtype=torch.float32)) final_hidden_states = self.text_experts( hidden_states=hidden_states, router_logits=text_router_logits) From d7e34b4210fd7ce7d8f49441c50f2b99c93913ae Mon Sep 17 00:00:00 2001 From: Cyrus Leung Date: Tue, 30 Sep 2025 19:24:57 +0800 Subject: [PATCH 503/518] [Model] Move `vision_feature_select_strategy` into `resolve_visual_encoder_outputs` (#25938) Signed-off-by: DarkLight1337 --- tests/models/test_vision.py | 11 +-- vllm/model_executor/models/aya_vision.py | 27 ++----- vllm/model_executor/models/clip.py | 31 +++++--- vllm/model_executor/models/llava.py | 25 ++----- vllm/model_executor/models/llava_next.py | 25 ++----- .../model_executor/models/llava_next_video.py | 17 +---- vllm/model_executor/models/llava_onevision.py | 25 ++----- vllm/model_executor/models/minimax_vl_01.py | 24 ++----- vllm/model_executor/models/pixtral.py | 22 ++++-- vllm/model_executor/models/siglip.py | 34 +++++---- vllm/model_executor/models/tarsier.py | 23 ++---- vllm/model_executor/models/vision.py | 70 ++++++++++++++++--- 12 files changed, 155 insertions(+), 179 deletions(-) diff --git a/tests/models/test_vision.py b/tests/models/test_vision.py index 8744bcbd3a2a..a30a856a81cf 100644 --- a/tests/models/test_vision.py +++ b/tests/models/test_vision.py @@ -18,7 +18,7 @@ @pytest.mark.parametrize( - ("feature_sample_layers", "num_layers_loaded", "max_possible_layers", + ("select_layers", "num_layers_loaded", "max_possible_layers", "expected_features"), [ # All layers loaded @@ -28,8 +28,8 @@ ([1, 10], 10, 20, [1, 10]), ([-20, -11], 10, 20, [1, 10]), ]) -def test_resolve_visual_encoder_outputs(feature_sample_layers, - num_layers_loaded, max_possible_layers, +def test_resolve_visual_encoder_outputs(select_layers, num_layers_loaded, + max_possible_layers, expected_features): """ Test that offsets are correctly handled for vision feature layers. @@ -39,9 +39,10 @@ def test_resolve_visual_encoder_outputs(feature_sample_layers, ] output_tensor = resolve_visual_encoder_outputs( encoder_outputs=encoder_outputs, - feature_sample_layers=feature_sample_layers, post_layer_norm=None, - max_possible_layers=max_possible_layers) + select_layers=select_layers, + max_possible_layers=max_possible_layers, + ) assert torch.equal(torch.tensor(expected_features), output_tensor) diff --git a/vllm/model_executor/models/aya_vision.py b/vllm/model_executor/models/aya_vision.py index f6dfa435ddd4..81bab5b34bc6 100644 --- a/vllm/model_executor/models/aya_vision.py +++ b/vllm/model_executor/models/aya_vision.py @@ -27,7 +27,6 @@ PromptUpdateDetails) from vllm.multimodal.profiling import BaseDummyInputsBuilder from vllm.sequence import IntermediateTensors -from vllm.utils.jsontree import json_map_leaves from vllm.utils.tensor_schema import TensorSchema, TensorShape from .interfaces import MultiModalEmbeddings, SupportsMultiModal, SupportsPP @@ -350,29 +349,11 @@ def _image_pixels_to_features( self, vision_tower: SiglipVisionModel, pixel_values: torch.Tensor, - **kwargs, ) -> Union[torch.Tensor, tuple[torch.Tensor, ...]]: - target_dtype: torch.dtype = \ - vision_tower.get_input_embeddings().weight.dtype - image_features: Union[torch.Tensor, tuple[torch.Tensor, ...]] = \ - vision_tower(pixel_values.to(dtype=target_dtype), **kwargs) - - def select_features(leaf: torch.Tensor): - return self._select_image_features( - leaf, - strategy=self.config.vision_feature_select_strategy, - ) - - return json_map_leaves(select_features, image_features) - - def _select_image_features(self, image_features: torch.Tensor, *, - strategy: str) -> torch.Tensor: - if strategy == "default": - return image_features[:, 1:] - elif strategy == "full": - return image_features - - raise ValueError(f"Unexpected select feature strategy: {strategy}") + return vision_tower( + pixel_values.to(dtype=vision_tower.dtype), + feature_select_strategy=self.config.vision_feature_select_strategy, + ) def _process_image_input(self, image_input: AyaVisionImagePixelInputs, **kwargs) -> list[torch.Tensor]: diff --git a/vllm/model_executor/models/clip.py b/vllm/model_executor/models/clip.py index dcab00822870..451da2120048 100644 --- a/vllm/model_executor/models/clip.py +++ b/vllm/model_executor/models/clip.py @@ -19,7 +19,8 @@ from vllm.model_executor.model_loader.weight_utils import default_weight_loader from vllm.model_executor.models.interfaces import SupportsQuant -from .vision import VisionEncoderInfo, resolve_visual_encoder_outputs +from .vision import (VisionEncoderInfo, VisionFeatureSelectStrategy, + resolve_visual_encoder_outputs) class CLIPEncoderInfo(VisionEncoderInfo[CLIPVisionConfig]): @@ -308,24 +309,29 @@ def __init__( def forward( self, pixel_values: torch.Tensor, - feature_sample_layers: Optional[list[int]] = None, + *, + select_layers: Optional[list[int]] = None, + feature_select_strategy: Optional[VisionFeatureSelectStrategy] = None, ) -> torch.Tensor: hidden_states = self.embeddings(pixel_values) hidden_states = self.pre_layrnorm(hidden_states) - return_all_hidden_states = feature_sample_layers is not None - # Produces either the last layer output or all of the hidden states, - # depending on if we have feature_sample_layers or not + # depending on if we have select_layers or not encoder_outputs = self.encoder( inputs_embeds=hidden_states, - return_all_hidden_states=return_all_hidden_states) + return_all_hidden_states=select_layers is not None, + ) # Handle post-norm (if applicable) and stacks feature layers if needed encoder_outputs = resolve_visual_encoder_outputs( - encoder_outputs, feature_sample_layers, self.post_layernorm, - self.config.num_hidden_layers) + encoder_outputs, + self.post_layernorm, + select_layers=select_layers, + max_possible_layers=self.config.num_hidden_layers, + feature_select_strategy=feature_select_strategy, + ) return encoder_outputs @@ -355,9 +361,14 @@ def __init__( def forward( self, pixel_values: torch.Tensor, - feature_sample_layers: Optional[list[int]] = None, + select_layers: Optional[list[int]] = None, + feature_select_strategy: Optional[VisionFeatureSelectStrategy] = None, ) -> torch.Tensor: - return self.vision_model(pixel_values, feature_sample_layers) + return self.vision_model( + pixel_values, + select_layers=select_layers, + feature_select_strategy=feature_select_strategy, + ) @property def device(self): diff --git a/vllm/model_executor/models/llava.py b/vllm/model_executor/models/llava.py index 46cf93be191e..d823e5cb58d2 100644 --- a/vllm/model_executor/models/llava.py +++ b/vllm/model_executor/models/llava.py @@ -33,7 +33,6 @@ PromptUpdateDetails) from vllm.multimodal.profiling import BaseDummyInputsBuilder from vllm.sequence import IntermediateTensors -from vllm.utils.jsontree import json_map_leaves from vllm.utils.tensor_schema import TensorSchema, TensorShape from .clip import CLIPVisionModel @@ -604,16 +603,6 @@ def _parse_and_validate_image_input( raise AssertionError("This line should be unreachable.") - def _select_image_features(self, image_features: torch.Tensor, *, - strategy: str) -> torch.Tensor: - # Copied from https://github.com/huggingface/transformers/blob/39c3c0a72af6fbda5614dde02ff236069bb79827/src/transformers/models/llava/modeling_llava.py#L421 # noqa - if strategy == "default": - return image_features[:, 1:] - elif strategy == "full": - return image_features - - raise ValueError(f"Unexpected select feature strategy: {strategy}") - def _image_pixels_to_features( self, vision_tower: Union[CLIPVisionModel, SiglipVisionModel, @@ -622,16 +611,10 @@ def _image_pixels_to_features( ) -> Union[torch.Tensor, tuple[torch.Tensor, ...]]: # NOTE: we skip the step to select the vision feature layer since # this is already done inside the vision tower - image_features: Union[torch.Tensor, tuple[torch.Tensor, ...]] = \ - vision_tower(pixel_values) - - def select_features(leaf: torch.Tensor): - return self._select_image_features( - leaf, - strategy=self.config.vision_feature_select_strategy, - ) - - return json_map_leaves(select_features, image_features) + return vision_tower( + pixel_values, + feature_select_strategy=self.config.vision_feature_select_strategy, + ) def _process_image_pixels( self, diff --git a/vllm/model_executor/models/llava_next.py b/vllm/model_executor/models/llava_next.py index c4f1daaab9bf..3f7e39c02061 100644 --- a/vllm/model_executor/models/llava_next.py +++ b/vllm/model_executor/models/llava_next.py @@ -235,12 +235,12 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = "") -> None: # Determine the layer up to which we will initialize the vision tower if isinstance(vision_feature_layer, int): vision_hidden_size = config.vision_config.hidden_size - self.feature_sample_layers = None + self.select_layers = None # Used for multimodal granite models to control encoder outputs elif isinstance(vision_feature_layer, (list, tuple)): vision_hidden_size = config.vision_config.hidden_size * len( vision_feature_layer) - self.feature_sample_layers = vision_feature_layer + self.select_layers = vision_feature_layer else: raise TypeError( f"vision_layer_feature type: {type(vision_feature_layer)}" @@ -312,30 +312,17 @@ def _parse_and_validate_image_input( raise AssertionError("This line should be unreachable.") - def _select_image_features(self, image_features: torch.Tensor, *, - strategy: str) -> torch.Tensor: - # Copied from https://github.com/huggingface/transformers/blob/39c3c0a72af6fbda5614dde02ff236069bb79827/src/transformers/models/llava/modeling_llava.py#L421 # noqa - if strategy == "default": - return image_features[:, 1:] - elif strategy == "full": - return image_features - - raise ValueError(f"Unexpected select feature strategy: {strategy}") - def _image_pixels_to_features( self, vision_tower: Union[CLIPVisionModel, SiglipVisionModel], pixel_values: torch.Tensor, ) -> torch.Tensor: - # NOTE: we skip the step to select the vision feature layer since # this is already done inside the vision tower - image_features = vision_tower( - pixel_values, feature_sample_layers=self.feature_sample_layers) - - return self._select_image_features( - image_features, - strategy=self.config.vision_feature_select_strategy, + return vision_tower( + pixel_values, + select_layers=self.select_layers, + feature_select_strategy=self.config.vision_feature_select_strategy, ) # Based on: https://github.com/haotian-liu/LLaVA/blob/main/llava/model/llava_arch.py diff --git a/vllm/model_executor/models/llava_next_video.py b/vllm/model_executor/models/llava_next_video.py index aebc661d53f8..697b8e819707 100644 --- a/vllm/model_executor/models/llava_next_video.py +++ b/vllm/model_executor/models/llava_next_video.py @@ -349,27 +349,16 @@ def _parse_and_validate_video_input( "w": expected_w, }) - def _select_image_features(self, image_features: torch.Tensor, *, - strategy: str) -> torch.Tensor: - if strategy == "default": - return image_features[:, 1:] - elif strategy == "full": - return image_features - - raise ValueError(f"Unexpected select feature strategy: {strategy}") - def _video_pixels_to_features( self, vision_tower: Union[CLIPVisionModel, SiglipVisionModel], pixel_values: torch.Tensor, ) -> torch.Tensor: - # NOTE: we skip the step to select the vision feature layer since # this is already done inside the vision tower - image_features = vision_tower(pixel_values) - image_features = self._select_image_features( - image_features, - strategy=self.config.vision_feature_select_strategy, + image_features = vision_tower( + pixel_values, + feature_select_strategy=self.config.vision_feature_select_strategy, ) image_features = self.vision_resampler(image_features) image_features = self.multi_modal_projector(image_features) diff --git a/vllm/model_executor/models/llava_onevision.py b/vllm/model_executor/models/llava_onevision.py index 6088195c91d5..924f8ba3585f 100644 --- a/vllm/model_executor/models/llava_onevision.py +++ b/vllm/model_executor/models/llava_onevision.py @@ -577,27 +577,16 @@ def _parse_and_validate_multimodal_inputs(self, **kwargs: object) -> dict: return mm_input_by_modality - def _select_image_features(self, image_features: torch.Tensor, *, - strategy: str) -> torch.Tensor: - if strategy == "default": - return image_features[:, 1:] - elif strategy == "full": - return image_features - - raise ValueError(f"Unexpected select feature strategy: {strategy}") - def _image_pixels_to_features( self, vision_tower: Union[CLIPVisionModel, SiglipVisionModel], pixel_values: torch.Tensor, ) -> torch.Tensor: - # NOTE: we skip the step to select the vision feature layer since # this is already done inside the vision tower - image_features = vision_tower(pixel_values) - return self._select_image_features( - image_features, - strategy=self.config.vision_feature_select_strategy, + return vision_tower( + pixel_values, + feature_select_strategy=self.config.vision_feature_select_strategy, ) # Based on: https://github.com/haotian-liu/LLaVA/blob/main/llava/model/llava_arch.py @@ -750,13 +739,11 @@ def _video_pixels_to_features( vision_tower: Union[CLIPVisionModel, SiglipVisionModel], pixel_values: torch.Tensor, ) -> torch.Tensor: - # NOTE: we skip the step to select the vision feature layer since # this is already done inside the vision tower - video_features = vision_tower(pixel_values) - video_features = self._select_image_features( - video_features, - strategy=self.config.vision_feature_select_strategy, + video_features = vision_tower( + pixel_values, + feature_select_strategy=self.config.vision_feature_select_strategy, ) video_features = self.multi_modal_projector(video_features) video_features = self.apply_pooling(video_features) diff --git a/vllm/model_executor/models/minimax_vl_01.py b/vllm/model_executor/models/minimax_vl_01.py index d41b9d3f14fe..938c9a689fcf 100644 --- a/vllm/model_executor/models/minimax_vl_01.py +++ b/vllm/model_executor/models/minimax_vl_01.py @@ -17,7 +17,6 @@ from vllm.multimodal import MULTIMODAL_REGISTRY from vllm.multimodal.inputs import MultiModalFieldConfig from vllm.sequence import IntermediateTensors -from vllm.utils.jsontree import json_map_leaves from vllm.utils.tensor_schema import TensorSchema, TensorShape from .clip import CLIPVisionModel @@ -221,15 +220,6 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = "") -> None: def get_language_model(self) -> torch.nn.Module: return self.language_model - def _select_image_features(self, image_features: torch.Tensor, *, - strategy: str) -> torch.Tensor: - if strategy == "default": - return image_features[:, 1:] - elif strategy == "full": - return image_features - - raise ValueError(f"Unexpected select feature strategy: {strategy}") - def _image_pixels_to_features( self, vision_tower: Union[CLIPVisionModel, SiglipVisionModel, @@ -238,16 +228,10 @@ def _image_pixels_to_features( ) -> Union[torch.Tensor, tuple[torch.Tensor, ...]]: # NOTE: we skip the step to select the vision feature layer since # this is already done inside the vision tower - image_features: tuple[torch.Tensor, ...] = \ - tuple(vision_tower(p) for p in pixel_values) - - def select_features(leaf: torch.Tensor): - return self._select_image_features( - leaf, - strategy=self.config.vision_feature_select_strategy, - ) - - return json_map_leaves(select_features, image_features) + feature_select_strategy = self.config.vision_feature_select_strategy + return tuple( + vision_tower(p, feature_select_strategy=feature_select_strategy) + for p in pixel_values) # adapted from https://huggingface.co/MiniMaxAI/MiniMax-VL-01/blob/main/modeling_minimax_vl_01.py#L616-L631 def pack_image_features(self, image_features: list[torch.Tensor], diff --git a/vllm/model_executor/models/pixtral.py b/vllm/model_executor/models/pixtral.py index 6344fc394833..bf451c5005b7 100644 --- a/vllm/model_executor/models/pixtral.py +++ b/vllm/model_executor/models/pixtral.py @@ -51,7 +51,8 @@ from .interfaces import MultiModalEmbeddings, SupportsMultiModal, SupportsPP from .utils import flatten_bn, init_vllm_registered_model, maybe_prefix -from .vision import VisionEncoderInfo, resolve_visual_encoder_outputs +from .vision import (VisionEncoderInfo, VisionFeatureSelectStrategy, + resolve_visual_encoder_outputs) try: from xformers import ops as xops @@ -1218,7 +1219,9 @@ def __init__( def forward( self, pixel_values: list[torch.Tensor], - feature_sample_layers: Optional[list[int]] = None, + *, + select_layers: Optional[list[int]] = None, + feature_select_strategy: Optional[VisionFeatureSelectStrategy] = None, ) -> tuple[torch.Tensor, ...]: """ Args: @@ -1226,7 +1229,7 @@ def forward( in pixel_values. This means it will be a list of tensors because multiple requests batched can have multiple images, each with their own shape potentially - feature_sample_layers: Layer indices whose features should be + select_layers: Layer indices whose features should be concatenated and used as the visual encoder output. If none are provided, the last layer is used. @@ -1267,15 +1270,20 @@ def forward( [p.shape[-2] * p.shape[-1] for p in patch_embeds_list], patch_embeds) - return_all_hidden_states = feature_sample_layers is not None out = self.transformer( patch_embeds, attention_mask, position_embedding, - return_all_hidden_states=return_all_hidden_states) + return_all_hidden_states=select_layers is not None, + ) - out = resolve_visual_encoder_outputs(out, feature_sample_layers, None, - self.config.num_hidden_layers) + out = resolve_visual_encoder_outputs( + out, + None, + select_layers=select_layers, + max_possible_layers=self.config.num_hidden_layers, + feature_select_strategy=feature_select_strategy, + ) # squeeze dim 0 and split into separate tensors for each image return torch.split(out.squeeze(0), embed_sizes) diff --git a/vllm/model_executor/models/siglip.py b/vllm/model_executor/models/siglip.py index eb49d6d2c335..4c60d96c77d7 100644 --- a/vllm/model_executor/models/siglip.py +++ b/vllm/model_executor/models/siglip.py @@ -23,7 +23,8 @@ from vllm.model_executor.model_loader.weight_utils import ( default_weight_loader, maybe_remap_kv_scale_name) -from .vision import VisionEncoderInfo, resolve_visual_encoder_outputs +from .vision import (VisionEncoderInfo, VisionFeatureSelectStrategy, + resolve_visual_encoder_outputs) class SiglipEncoderInfo(VisionEncoderInfo[SiglipVisionConfig]): @@ -415,28 +416,31 @@ def __init__( def forward( self, pixel_values: torch.Tensor, - interpolate_pos_encoding: bool = True, - feature_sample_layers: Optional[list[int]] = None, + *, + interpolate_pos_encoding: bool = False, + select_layers: Optional[list[int]] = None, + feature_select_strategy: Optional[VisionFeatureSelectStrategy] = None, ) -> torch.Tensor: - hidden_states = self.embeddings( pixel_values, interpolate_pos_encoding=interpolate_pos_encoding, ) - return_all_hidden_states = feature_sample_layers is not None - # Produces either the last layer output or all of the hidden states, - # depending on if we have feature_sample_layers or not + # depending on if we have select_layers or not encoder_outputs = self.encoder( inputs_embeds=hidden_states, - return_all_hidden_states=return_all_hidden_states, + return_all_hidden_states=select_layers is not None, ) # Handle post-norm (if applicable) and stacks feature layers if needed encoder_outputs = resolve_visual_encoder_outputs( - encoder_outputs, feature_sample_layers, self.post_layernorm, - self.config.num_hidden_layers) + encoder_outputs, + self.post_layernorm, + select_layers=select_layers, + max_possible_layers=self.config.num_hidden_layers, + feature_select_strategy=feature_select_strategy, + ) # TODO: add this back when pooled_output is used in inference. # if self.use_head: @@ -471,16 +475,22 @@ def __init__( def get_input_embeddings(self) -> nn.Module: return self.vision_model.embeddings.patch_embedding + @property + def dtype(self): + return self.get_input_embeddings().weight.dtype + def forward( self, pixel_values: torch.Tensor, interpolate_pos_encoding: bool = False, - feature_sample_layers: Optional[list[int]] = None, + select_layers: Optional[list[int]] = None, + feature_select_strategy: Optional[VisionFeatureSelectStrategy] = None, ) -> torch.Tensor: return self.vision_model( pixel_values=pixel_values, interpolate_pos_encoding=interpolate_pos_encoding, - feature_sample_layers=feature_sample_layers, + select_layers=select_layers, + feature_select_strategy=feature_select_strategy, ) def load_weights(self, weights: Iterable[tuple[str, diff --git a/vllm/model_executor/models/tarsier.py b/vllm/model_executor/models/tarsier.py index 1145bea41480..ed02fe2c389f 100644 --- a/vllm/model_executor/models/tarsier.py +++ b/vllm/model_executor/models/tarsier.py @@ -33,7 +33,6 @@ PromptReplacement, PromptUpdate) from vllm.multimodal.profiling import BaseDummyInputsBuilder from vllm.sequence import IntermediateTensors -from vllm.utils.jsontree import json_map_leaves from vllm.utils.tensor_schema import TensorSchema, TensorShape from .clip import CLIPVisionModel @@ -476,30 +475,16 @@ def _parse_and_validate_image_input( raise AssertionError("This line should be unreachable.") - def _select_image_features(self, image_features: torch.Tensor, *, - strategy: str) -> torch.Tensor: - if strategy == "default": - return image_features[:, 1:] - elif strategy == "full": - return image_features - raise ValueError(f"Unexpected select feature strategy: {strategy}") - def _image_pixels_to_features( self, vision_tower: Union[CLIPVisionModel, SiglipVisionModel], pixel_values: Union[torch.Tensor, list[torch.Tensor]], ) -> Union[torch.Tensor, tuple[torch.Tensor, ...]]: # From vLLM LLaVA, vision tower output handling - image_hidden_states: Union[torch.Tensor, tuple[torch.Tensor, ...]] = \ - vision_tower(pixel_values) - - def select_features_fn(leaf: torch.Tensor): - return self._select_image_features( - leaf, - strategy=self.config.vision_feature_select_strategy, - ) - - return json_map_leaves(select_features_fn, image_hidden_states) + return vision_tower( + pixel_values, + feature_select_strategy=self.config.vision_feature_select_strategy, + ) def _add_tarsier_split_tokens( self, projected_image_features: torch.Tensor) -> torch.Tensor: diff --git a/vllm/model_executor/models/vision.py b/vllm/model_executor/models/vision.py index 08ad8fbeb424..e077691fcec2 100644 --- a/vllm/model_executor/models/vision.py +++ b/vllm/model_executor/models/vision.py @@ -4,10 +4,12 @@ import itertools import math from abc import ABC, abstractmethod -from typing import Final, Generic, Literal, Optional, Protocol, TypeVar, Union +from typing import (Callable, Final, Generic, Literal, Optional, Protocol, + TypeVar, Union) import torch from transformers import PretrainedConfig +from typing_extensions import assert_never from vllm.distributed import (get_tensor_model_parallel_rank, get_tensor_model_parallel_world_size, @@ -86,11 +88,39 @@ def get_vit_attn_backend(head_size: int, dtype: torch.dtype) -> _Backend: return current_platform.get_vit_attn_backend(head_size, dtype) +VisionFeatureSelectStrategy = Union[ + Literal["class", "default", "full"], + Callable[[torch.Tensor], torch.Tensor], +] + + +def _get_vision_feature_selector( + strategy: VisionFeatureSelectStrategy, +) -> Callable[[torch.Tensor], torch.Tensor]: + if callable(strategy): + return strategy + + # https://github.com/huggingface/transformers/blob/cd74917ffc3e8f84e4a886052c5ab32b7ac623cc/src/transformers/models/clip/modeling_clip.py#L762 + if strategy == "class": + return lambda feats: feats[:, 0, :] + + # https://github.com/huggingface/transformers/blob/4a02bc7004285bdb12cc033e87ad2578ce2fa900/src/transformers/models/llava/modeling_llava.py#L196 + if strategy == "default": + return lambda feats: feats[:, 1:, :] + + if strategy == "full": + return lambda feats: feats + + assert_never(strategy) + + def resolve_visual_encoder_outputs( encoder_outputs: Union[torch.Tensor, list[torch.Tensor]], - feature_sample_layers: Optional[list[int]], post_layer_norm: Optional[torch.nn.LayerNorm], - max_possible_layers: int, + *, + select_layers: Optional[list[int]] = None, + max_possible_layers: Optional[int] = None, + feature_select_strategy: Optional[VisionFeatureSelectStrategy] = None, ) -> torch.Tensor: """Given the outputs a visual encoder module that may correspond to the output of the last layer, or a list of hidden states to be stacked, @@ -98,17 +128,32 @@ def resolve_visual_encoder_outputs( Args: encoder_outputs: Output of encoder's last layer or all hidden states. - feature_sample_layers: Optional layer indices to grab from the encoder - outputs; if provided, encoder outputs must be a list. post_layer_norm: Post norm to apply to the output of the encoder. + select_layers: Optional layer indices to grab from the encoder + outputs; if provided, encoder outputs must be a list. max_possible_layers: Total layers in the fully loaded visual encoder. - + feature_select_strategy: Defines how to select the hidden states + from each layer. """ - if feature_sample_layers is None: + if select_layers is None: + if not isinstance(encoder_outputs, torch.Tensor): + raise ValueError("Expected only a single encoder output when " + "`select_layers` is not provided") + + if feature_select_strategy is not None: + select_features = _get_vision_feature_selector( + feature_select_strategy) + encoder_outputs = select_features(encoder_outputs) + if post_layer_norm is not None: return post_layer_norm(encoder_outputs) + return encoder_outputs + if max_possible_layers is None: + raise ValueError("`max_possible_layers` must be provided " + "alongside `select_layers`") + # Get the hidden states corresponding to the layer indices. # Negative values are relative to the full visual encoder, # so offset them depending on how many layers were loaded. @@ -120,13 +165,18 @@ def resolve_visual_encoder_outputs( hs_pool = [ encoder_outputs[layer_idx] if layer_idx >= 0 else encoder_outputs[layer_idx + offset] - for layer_idx in feature_sample_layers + for layer_idx in select_layers ] + if feature_select_strategy is not None: + select_features = _get_vision_feature_selector(feature_select_strategy) + hs_pool = [select_features(hs) for hs in hs_pool] + # Apply post-norm on the final hidden state if we are using it - uses_last_layer = feature_sample_layers[-1] in (len(hs_pool) - 1, -1) + uses_last_layer = select_layers[-1] in (max_possible_layers - 1, -1) if post_layer_norm is not None and uses_last_layer: - hs_pool[-1] = post_layer_norm(encoder_outputs) + hs_pool[-1] = post_layer_norm(hs_pool[-1]) + return torch.cat(hs_pool, dim=-1) From e184c9c510bca80c72cac06c9b041dcd90b4996e Mon Sep 17 00:00:00 2001 From: Lehua Ding Date: Tue, 30 Sep 2025 19:51:16 +0800 Subject: [PATCH 504/518] [perf] Use CPU tensor to reduce GPU->CPU sync (#25884) Signed-off-by: Lehua Ding --- vllm/v1/worker/gpu_model_runner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index bb5c3ea74293..f4c28dc24d70 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -2478,7 +2478,7 @@ def propose_draft_token_ids(sampled_token_ids): effective_drafter_max_model_len = ( self.speculative_config.draft_model_config.max_model_len) input_fits_in_drafter = spec_decode_common_attn_metadata and ( - spec_decode_common_attn_metadata.seq_lens.max() + + spec_decode_common_attn_metadata.max_seq_len + self.speculative_config.num_speculative_tokens <= effective_drafter_max_model_len) if use_padded_batch_for_eagle and input_fits_in_drafter: From 80608ba5afe6559b3b06f56905bb5cb8c691e7c2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicol=C3=B2=20Lucchesi?= Date: Tue, 30 Sep 2025 14:18:29 +0200 Subject: [PATCH 505/518] [NIXL] Add support for MLA caches with different latent dim (#25902) Signed-off-by: NickLucche Signed-off-by: Chen Zhang Co-authored-by: Chen Zhang --- .../kv_connector/unit/test_nixl_connector.py | 13 +-- .../kv_connector/v1/nixl_connector.py | 95 ++++++++++++------- 2 files changed, 66 insertions(+), 42 deletions(-) diff --git a/tests/v1/kv_connector/unit/test_nixl_connector.py b/tests/v1/kv_connector/unit/test_nixl_connector.py index 6b4bd29f18a5..578bf02eb519 100644 --- a/tests/v1/kv_connector/unit/test_nixl_connector.py +++ b/tests/v1/kv_connector/unit/test_nixl_connector.py @@ -255,8 +255,9 @@ def _nixl_handshake(self, host: str, port: int, remote_tp_size: int, time.sleep(self._hand_shake_latency) # These should've been done in register_kv_caches(), called by # gpu_model_runner. Here we just hardcode some dummy values. - self.slot_size_bytes = 4096 - self.block_len = self.slot_size_bytes * self.block_size + slot_size_bytes = 4096 + self.slot_size_per_layer = [slot_size_bytes] + self.block_len_per_layer = [slot_size_bytes * self.block_size] self.num_blocks = 1 self.dst_num_blocks[self.engine_id] = self.num_blocks @@ -268,7 +269,7 @@ def _nixl_handshake(self, host: str, port: int, remote_tp_size: int, agent_metadata=FakeNixlWrapper.AGENT_METADATA, kv_caches_base_addr=[0], num_blocks=1, - block_len=self.block_len, + block_lens=self.block_len_per_layer, attn_backend_name=self.backend_name, # `self.kv_cache_layout` is only forced to HND when vllm engine # is started. We mock HND here. @@ -485,8 +486,8 @@ def test_handshake_fails_on_kv_cache_layout_mismatch(self, dist_init): worker = connector.connector_worker # Minimal local registration params used by add_remote_agent - worker.slot_size_bytes = 4096 - worker.block_len = worker.slot_size_bytes * worker.block_size + worker.slot_size_per_layer = [4096] + worker.block_len_per_layer = [4096 * worker.block_size] worker.num_blocks = 1 worker.dst_num_blocks[worker.engine_id] = worker.num_blocks @@ -498,7 +499,7 @@ def test_handshake_fails_on_kv_cache_layout_mismatch(self, dist_init): agent_metadata=FakeNixlWrapper.AGENT_METADATA, kv_caches_base_addr=[0], num_blocks=1, - block_len=worker.block_len, + block_lens=worker.block_len_per_layer, attn_backend_name=worker.backend_name, kv_cache_layout=mismatched_layout, ) diff --git a/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py b/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py index 1c7569515dec..55d87ea994b5 100644 --- a/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py +++ b/vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py @@ -87,7 +87,7 @@ class NixlAgentMetadata( agent_metadata: bytes kv_caches_base_addr: list[int] num_blocks: int - block_len: int + block_lens: list[int] attn_backend_name: str kv_cache_layout: str @@ -772,6 +772,9 @@ def register_kv_caches(self, kv_caches: dict[str, torch.Tensor]): split_k_and_v = not (self.use_mla or self._use_pallas or self._use_flashinfer) tensor_size_bytes = None + # Enable different block lengths for different layers when MLA is used. + self.block_len_per_layer = list[int]() + self.slot_size_per_layer = list[int]() # HD bytes in kv terms for layer_name, cache_or_caches in xfer_buffers.items(): cache_list = cache_or_caches if split_k_and_v else [ cache_or_caches @@ -789,10 +792,25 @@ def register_kv_caches(self, kv_caches: dict[str, torch.Tensor]): tensor_size_bytes = curr_tensor_size_bytes self.num_blocks = cache.shape[0] - assert tensor_size_bytes == curr_tensor_size_bytes, \ - "All kv cache tensors must have the same size" + assert cache.shape[0] == self.num_blocks, \ + "All kv cache tensors must have the same number of blocks" + + self.block_len_per_layer.append(curr_tensor_size_bytes // + self.num_blocks) + self.slot_size_per_layer.append(self.block_len_per_layer[-1] // + self.block_size) + + if not self.use_mla: + # Different kv cache shape is not supported by HeteroTP + assert tensor_size_bytes == curr_tensor_size_bytes, \ + "All kv cache tensors must have the same size" caches_data.append( - (base_addr, tensor_size_bytes, self.tp_rank, "")) + (base_addr, curr_tensor_size_bytes, self.tp_rank, "")) + + logger.debug("Different block lengths collected: %s", + set(self.block_len_per_layer)) + assert len(self.block_len_per_layer) == len(seen_base_addresses) + assert self.num_blocks != 0 self.kv_caches_base_addr[self.engine_id] = seen_base_addresses self.num_regions = len(caches_data) @@ -805,16 +823,12 @@ def register_kv_caches(self, kv_caches: dict[str, torch.Tensor]): logger.debug("Done registering descs") self._registered_descs.append(descs) - assert tensor_size_bytes is not None - assert self.num_blocks != 0 - assert tensor_size_bytes % self.num_blocks == 0 - self.block_len = tensor_size_bytes // self.num_blocks - self.slot_size_bytes = self.block_len // self.block_size self.device_kv_caches = kv_caches self.dst_num_blocks[self.engine_id] = self.num_blocks if self._use_flashinfer: - assert self.slot_size_bytes % 2 == 0 - self.slot_size_bytes /= 2 + for i in range(len(self.slot_size_per_layer)): + assert self.slot_size_per_layer[i] % 2 == 0 + self.slot_size_per_layer[i] //= 2 # NOTE (NickLucche) When FlashInfer is used, memory is registered # with joint KV for each block. This minimizes the overhead in @@ -824,17 +838,17 @@ def register_kv_caches(self, kv_caches: dict[str, torch.Tensor]): # of 'virtual' regions here and halve `block_len` below. self.num_regions *= 2 - kv_block_len = self.get_backend_aware_kv_block_len() # Register local/src descr for NIXL xfer. blocks_data = [] - for base_addr in seen_base_addresses: + for i, base_addr in enumerate(seen_base_addresses): + kv_block_len = self.get_backend_aware_kv_block_len(layer_idx=i) # NOTE With heter-TP, more blocks are prepared than what are # needed as self.num_blocks >= nixl_agent_meta.num_blocks. We # could create fewer, but then _get_block_descs_ids needs to # select agent_meta.num_blocks instead of self.num_blocks for # local descr, and that makes handling regular flow less clean. for block_id in range(self.num_blocks): - block_offset = block_id * self.block_len + block_offset = block_id * self.block_len_per_layer[i] addr = base_addr + block_offset # (addr, len, device id) blocks_data.append((addr, kv_block_len, self.tp_rank)) @@ -844,7 +858,7 @@ def register_kv_caches(self, kv_caches: dict[str, torch.Tensor]): # descs ordering. This is needed for selecting contiguous heads # when split across TP ranks. for block_id in range(self.num_blocks): - block_offset = block_id * self.block_len + block_offset = block_id * self.block_len_per_layer[i] addr = base_addr + block_offset # Register addresses for V cache (K registered first). v_addr = addr + kv_block_len @@ -884,7 +898,7 @@ def register_kv_caches(self, kv_caches: dict[str, torch.Tensor]): agent_metadata=self.nixl_wrapper.get_agent_metadata(), kv_caches_base_addr=self.kv_caches_base_addr[self.engine_id], num_blocks=self.num_blocks, - block_len=self.block_len, + block_lens=self.block_len_per_layer, attn_backend_name=self.backend_name, kv_cache_layout=self.kv_cache_layout) ready_event = threading.Event() @@ -909,7 +923,7 @@ def add_remote_agent(self, The latter, assuming D.world_size > P.world_size, requires that two or more local TP worker share the xfer from a single TP worker. - Here's an example: + Here's an example (non-MLA case): rank_offset p_remote_tp_rank (kv split no) @@ -965,14 +979,20 @@ def add_remote_agent(self, total_num_kv_heads = self.model_config.get_total_num_kv_heads() is_kv_replicated = self._tp_size[engine_id] // total_num_kv_heads >= 1 + remote_block_len = nixl_agent_meta.block_lens[0] if self.use_mla or is_kv_replicated: - # With MLA the only difference is in the number of blocks. - remote_block_size = nixl_agent_meta.block_len // ( - self.slot_size_bytes) - assert self.block_len == nixl_agent_meta.block_len + # With replicated KV cache, only the number of blocks can differ. + assert self.block_len_per_layer == nixl_agent_meta.block_lens, \ + "KV cache sizes must match between P and D when replicated" + remote_block_size = remote_block_len // ( + self.slot_size_per_layer[0]) else: - remote_block_size = nixl_agent_meta.block_len // ( - self.slot_size_bytes * tp_ratio) + # When MLA is not used, this is a list of the same block length + for block_len in nixl_agent_meta.block_lens: + assert block_len == remote_block_len, \ + "All remote layers must have the same block size" + remote_block_size = remote_block_len // ( + self.slot_size_per_layer[0] * tp_ratio) if self._use_flashinfer: # With flashinfer, KV are sent in the same message. remote_block_size //= 2 @@ -983,14 +1003,14 @@ def add_remote_agent(self, raise ValueError( "Heterogeneous TP is not supported on XPU") - assert nixl_agent_meta.block_len == self.block_len * tp_ratio, ( + assert remote_block_len == self.block_len_per_layer[0] * tp_ratio, ( "Remote P worker KV layer cache must be of shape [2, N, " "local_kv_heads*tp_ratio, block_size, head_dim] and same dtype." ) assert self.block_size == remote_block_size, ( - "Remote P worker with different block size is not supported " - f"{self.block_size=} {remote_block_size=}") + "Remote P worker with different page/block size is not supported " + f"{self.block_size=}, {remote_block_size=}") # Create dst descs and xfer side handles. TP workers have same #blocks. if engine_id in self.dst_num_blocks: @@ -1005,13 +1025,16 @@ def add_remote_agent(self, # Eg. PTP1 DTP2 => P0 KV:[block0-KV_0 | block0-KV_1..]. self.kv_caches_base_addr[ engine_id] = nixl_agent_meta.kv_caches_base_addr - kv_block_len = self.get_backend_aware_kv_block_len() - rank_offset = self.tp_rank % tp_ratio * kv_block_len \ - if not (self.use_mla or is_kv_replicated) else 0 + + assert len(nixl_agent_meta.kv_caches_base_addr) == len( + self.block_len_per_layer) # Register all remote blocks, but only the corresponding kv heads. - for base_addr in nixl_agent_meta.kv_caches_base_addr: + for i, base_addr in enumerate(nixl_agent_meta.kv_caches_base_addr): + kv_block_len = self.get_backend_aware_kv_block_len(layer_idx=i) + rank_offset = self.tp_rank % tp_ratio * kv_block_len \ + if not (self.use_mla or is_kv_replicated) else 0 for block_id in range(nixl_agent_meta.num_blocks): - block_offset = block_id * nixl_agent_meta.block_len + block_offset = block_id * nixl_agent_meta.block_lens[i] # For each block, grab the heads chunk belonging to rank_i # of size remote_nheads // tp_ratio, which correspond to # self.block_len == remote_block_len//tp_ratio bytes. @@ -1022,9 +1045,9 @@ def add_remote_agent(self, if self._use_flashinfer: # With FlashInfer index V separately to allow head splitting. for block_id in range(nixl_agent_meta.num_blocks): - block_offset = block_id * nixl_agent_meta.block_len + block_offset = block_id * nixl_agent_meta.block_lens[i] addr = base_addr + block_offset + rank_offset - v_addr = addr + nixl_agent_meta.block_len // 2 + v_addr = addr + nixl_agent_meta.block_lens[i] // 2 blocks_data.append((v_addr, kv_block_len, remote_tp_rank)) logger.debug( @@ -1351,7 +1374,7 @@ def _get_block_descs_ids(self, descs_ids = region_ids * num_blocks + block_ids return descs_ids.flatten() - def get_backend_aware_kv_block_len(self): + def get_backend_aware_kv_block_len(self, layer_idx: int): """ Get the block length for one K/V element (K and V have the same size). @@ -1362,9 +1385,9 @@ def get_backend_aware_kv_block_len(self): """ if self._use_flashinfer: # For indexing only half (either just the K or V part). - block_len = self.block_len // 2 + block_len = self.block_len_per_layer[layer_idx] // 2 else: - block_len = self.block_len + block_len = self.block_len_per_layer[layer_idx] return block_len def get_kv_connector_stats(self) -> Optional[KVConnectorStats]: From bc546f76a145087ceae59e842443193aaf8a91a0 Mon Sep 17 00:00:00 2001 From: Reza Barazesh <3146276+rzabarazesh@users.noreply.github.com> Date: Tue, 30 Sep 2025 09:45:20 -0400 Subject: [PATCH 506/518] [CI] Move applicable tests to CPU (#24080) Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- .buildkite/test-pipeline.yaml | 65 ++++++++++++++----- .github/mergify.yml | 1 - docker/Dockerfile.cpu | 17 ++--- pyproject.toml | 1 + tests/models/test_utils.py | 3 + tests/models/test_vision.py | 2 + tests/multimodal/test_cache.py | 2 + tests/multimodal/test_hasher.py | 2 + tests/multimodal/test_image.py | 2 + tests/multimodal/test_inputs.py | 3 + tests/multimodal/test_processing.py | 2 + tests/multimodal/test_registry.py | 2 + tests/multimodal/test_video.py | 2 + tests/test_inputs.py | 2 + tests/test_outputs.py | 4 ++ .../mistral}/__init__.py | 0 .../mistral}/conftest.py | 4 +- .../mistral}/test_mistral_tool_calls.py | 0 .../mistral}/utils.py | 0 tests/tool_use/test_glm4_moe_tool_parser.py | 2 + tests/tool_use/test_jamba_tool_parser.py | 2 + tests/tool_use/test_kimi_k2_tool_parser.py | 2 + tests/tool_use/test_minimax_tool_parser.py | 2 + tests/tool_use/test_qwen3coder_tool_parser.py | 2 + tests/tool_use/test_seed_oss_tool_parser.py | 2 + tests/tool_use/test_tool_choice_required.py | 2 + tests/tool_use/test_xlam_tool_parser.py | 2 + tests/v1/core/test_async_scheduler.py | 2 + tests/v1/core/test_encoder_cache_manager.py | 3 + tests/v1/core/test_kv_cache_utils.py | 2 + tests/v1/core/test_prefix_caching.py | 4 +- tests/v1/core/test_scheduler.py | 2 + .../core/test_single_type_kv_cache_manager.py | 3 + .../unit/test_output_aggreagator.py | 4 ++ .../unit/test_remote_decode_lifecycle.py | 4 ++ .../unit/test_remote_prefill_lifecycle.py | 4 ++ tests/v1/metrics/test_metrics_reader.py | 2 + tests/v1/structured_output/test_utils.py | 2 + tests/v1/test_serial_utils.py | 2 + 39 files changed, 136 insertions(+), 28 deletions(-) rename tests/{mistral_tool_use => tool_use/mistral}/__init__.py (100%) rename tests/{mistral_tool_use => tool_use/mistral}/conftest.py (93%) rename tests/{mistral_tool_use => tool_use/mistral}/test_mistral_tool_calls.py (100%) rename tests/{mistral_tool_use => tool_use/mistral}/utils.py (100%) diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index 460f0afb6f67..b3d10f75ab50 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -50,19 +50,28 @@ steps: mirror_hardwares: [amdexperimental] source_file_dependencies: - vllm/ + - tests/multimodal + - tests/utils_ + commands: + - pytest -v -s -m 'not cpu_test' multimodal + - pytest -v -s utils_ + +- label: Async Engine, Inputs, Utils, Worker Test (CPU) # 4 mins + timeout_in_minutes: 10 + source_file_dependencies: + - vllm/ - tests/test_inputs.py - tests/test_outputs.py - tests/multimodal - - tests/utils_ - tests/standalone_tests/lazy_imports.py - tests/transformers_utils + no_gpu: true commands: - python3 standalone_tests/lazy_imports.py - pytest -v -s test_inputs.py - pytest -v -s test_outputs.py - - pytest -v -s multimodal - - pytest -v -s utils_ # Utils - - pytest -v -s transformers_utils # transformers_utils + - pytest -v -s -m 'cpu_test' multimodal + - pytest -v -s transformers_utils - label: Python-only Installation Test # 10min timeout_in_minutes: 20 @@ -287,23 +296,34 @@ steps: - tests/v1 commands: # split the test to avoid interference - - pytest -v -s v1/core - pytest -v -s v1/executor - pytest -v -s v1/kv_offload - pytest -v -s v1/sample - pytest -v -s v1/logits_processors - pytest -v -s v1/worker - - pytest -v -s v1/structured_output - pytest -v -s v1/spec_decode - - pytest -v -s v1/kv_connector/unit - - pytest -v -s v1/metrics + - pytest -v -s -m 'not cpu_test' v1/kv_connector/unit + - pytest -v -s -m 'not cpu_test' v1/metrics - pytest -v -s v1/test_oracle.py - pytest -v -s v1/test_request.py - - pytest -v -s v1/test_serial_utils.py # Integration test for streaming correctness (requires special branch). - pip install -U git+https://github.com/robertgshaw2-redhat/lm-evaluation-harness.git@streaming-api - pytest -v -s entrypoints/openai/correctness/test_lmeval.py::test_lm_eval_accuracy_v1_engine +- label: V1 Test others (CPU) # 5 mins + source_file_dependencies: + - vllm/ + - tests/v1 + no_gpu: true + commands: + # split the test to avoid interference + - pytest -v -s v1/core + - pytest -v -s v1/structured_output + - pytest -v -s v1/test_serial_utils.py + - pytest -v -s -m 'cpu_test' v1/kv_connector/unit + - pytest -v -s -m 'cpu_test' v1/metrics + + - label: Examples Test # 30min timeout_in_minutes: 45 mirror_hardwares: [amdexperimental] @@ -533,10 +553,17 @@ steps: source_file_dependencies: - vllm/ - tests/tool_use - - tests/mistral_tool_use commands: - - pytest -v -s tool_use - - pytest -v -s mistral_tool_use + - pytest -v -s -m 'not cpu_test' tool_use + +- label: OpenAI-Compatible Tool Use (CPU) # 5 mins + timeout_in_minutes: 10 + source_file_dependencies: + - vllm/ + - tests/tool_use + no_gpu: true + commands: + - pytest -v -s -m 'cpu_test' tool_use ##### models test ##### @@ -576,13 +603,19 @@ steps: - vllm/ - tests/models/test_transformers.py - tests/models/test_registry.py + commands: + - pytest -v -s models/test_transformers.py models/test_registry.py + +- label: Basic Models Test (Other CPU) # 5min + timeout_in_minutes: 10 + torch_nightly: true + source_file_dependencies: + - vllm/ - tests/models/test_utils.py - tests/models/test_vision.py + no_gpu: true commands: - - pytest -v -s models/test_transformers.py \ - models/test_registry.py \ - models/test_utils.py \ - models/test_vision.py + - pytest -v -s models/test_utils.py models/test_vision.py - label: Language Models Tests (Standard) timeout_in_minutes: 25 diff --git a/.github/mergify.yml b/.github/mergify.yml index 923f708ea10c..04ad6a5be8df 100644 --- a/.github/mergify.yml +++ b/.github/mergify.yml @@ -239,7 +239,6 @@ pull_request_rules: conditions: - or: - files~=^tests/tool_use/ - - files~=^tests/mistral_tool_use/ - files~=^tests/entrypoints/openai/tool_parsers/ - files=tests/entrypoints/openai/test_chat_with_tool_reasoning.py - files~=^vllm/entrypoints/openai/tool_parsers/ diff --git a/docker/Dockerfile.cpu b/docker/Dockerfile.cpu index b80157d2fd75..388596efd21c 100644 --- a/docker/Dockerfile.cpu +++ b/docker/Dockerfile.cpu @@ -47,7 +47,7 @@ ENV PATH="$VIRTUAL_ENV/bin:$PATH" ENV UV_HTTP_TIMEOUT=500 -# Install Python dependencies +# Install Python dependencies ENV PIP_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL} ENV UV_EXTRA_INDEX_URL=${PIP_EXTRA_INDEX_URL} ENV UV_INDEX_STRATEGY="unsafe-best-match" @@ -104,7 +104,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \ --mount=type=cache,target=/root/.cache/ccache \ --mount=type=cache,target=/workspace/vllm/.deps,sharing=locked \ --mount=type=bind,source=.git,target=.git \ - VLLM_TARGET_DEVICE=cpu python3 setup.py bdist_wheel + VLLM_TARGET_DEVICE=cpu python3 setup.py bdist_wheel ######################### TEST DEPS ######################### FROM base AS vllm-test-deps @@ -117,7 +117,7 @@ RUN --mount=type=bind,src=requirements/test.in,target=requirements/test.in \ uv pip compile requirements/cpu-test.in -o requirements/cpu-test.txt --index-strategy unsafe-best-match --torch-backend cpu RUN --mount=type=cache,target=/root/.cache/uv \ - uv pip install -r requirements/cpu-test.txt + uv pip install -r requirements/cpu-test.txt ######################### DEV IMAGE ######################### FROM vllm-build AS vllm-dev @@ -130,12 +130,12 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked \ # install development dependencies (for testing) RUN --mount=type=cache,target=/root/.cache/uv \ - uv pip install -e tests/vllm_test_utils + uv pip install -e tests/vllm_test_utils RUN --mount=type=cache,target=/root/.cache/uv \ --mount=type=cache,target=/root/.cache/ccache \ --mount=type=bind,source=.git,target=.git \ - VLLM_TARGET_DEVICE=cpu python3 setup.py develop + VLLM_TARGET_DEVICE=cpu python3 setup.py develop COPY --from=vllm-test-deps /workspace/vllm/requirements/cpu-test.txt requirements/test.txt @@ -160,11 +160,12 @@ ADD ./benchmarks/ ./benchmarks/ ADD ./vllm/collect_env.py . ADD ./.buildkite/ ./.buildkite/ +# Create symlink for vllm-workspace to maintain CI compatibility +RUN ln -sf /workspace /vllm-workspace + # install development dependencies (for testing) RUN --mount=type=cache,target=/root/.cache/uv \ - uv pip install -e tests/vllm_test_utils - -ENTRYPOINT ["bash"] + uv pip install -e tests/vllm_test_utils ######################### RELEASE IMAGE ######################### FROM base AS vllm-openai diff --git a/pyproject.toml b/pyproject.toml index 88c5c4067f5a..034a21f1c12b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -126,6 +126,7 @@ markers = [ "core_model: enable this model test in each PR instead of only nightly", "hybrid_model: models that contain mamba layers (including pure SSM and hybrid architectures)", "cpu_model: enable this model test in CPU tests", + "cpu_test: mark test as CPU-only test", "split: run this test as part of a split", "distributed: run this test only in distributed GPU tests", "skip_v1: do not run this test with v1", diff --git a/tests/models/test_utils.py b/tests/models/test_utils.py index b52327a1844f..9b87b1a9d46c 100644 --- a/tests/models/test_utils.py +++ b/tests/models/test_utils.py @@ -1,10 +1,13 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project +import pytest import torch from vllm.model_executor.models.utils import AutoWeightsLoader +pytestmark = pytest.mark.cpu_test + class ModuleWithBatchNorm(torch.nn.Module): diff --git a/tests/models/test_vision.py b/tests/models/test_vision.py index a30a856a81cf..60ea2447e984 100644 --- a/tests/models/test_vision.py +++ b/tests/models/test_vision.py @@ -16,6 +16,8 @@ from vllm.platforms import current_platform from vllm.utils import get_open_port, update_environment_variables +pytestmark = pytest.mark.cpu_test + @pytest.mark.parametrize( ("select_layers", "num_layers_loaded", "max_possible_layers", diff --git a/tests/multimodal/test_cache.py b/tests/multimodal/test_cache.py index 3c737acfbfe2..48e88e7c0175 100644 --- a/tests/multimodal/test_cache.py +++ b/tests/multimodal/test_cache.py @@ -19,6 +19,8 @@ MultiModalSharedField) from vllm.multimodal.processing import PromptInsertion +pytestmark = pytest.mark.cpu_test + def _dummy_elem( modality: str, diff --git a/tests/multimodal/test_hasher.py b/tests/multimodal/test_hasher.py index 2751e38760e1..46aba1b75f77 100644 --- a/tests/multimodal/test_hasher.py +++ b/tests/multimodal/test_hasher.py @@ -10,6 +10,8 @@ from vllm.multimodal.hasher import MultiModalHasher +pytestmark = pytest.mark.cpu_test + ASSETS_DIR = Path(__file__).parent / "assets" assert ASSETS_DIR.exists() diff --git a/tests/multimodal/test_image.py b/tests/multimodal/test_image.py index 271a85f1195e..2f21ad969e74 100644 --- a/tests/multimodal/test_image.py +++ b/tests/multimodal/test_image.py @@ -8,6 +8,8 @@ from vllm.multimodal.image import ImageMediaIO, convert_image_mode +pytestmark = pytest.mark.cpu_test + ASSETS_DIR = Path(__file__).parent / "assets" assert ASSETS_DIR.exists() diff --git a/tests/multimodal/test_inputs.py b/tests/multimodal/test_inputs.py index ffb3a6fe86b4..f35935d14ff2 100644 --- a/tests/multimodal/test_inputs.py +++ b/tests/multimodal/test_inputs.py @@ -1,10 +1,13 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project +import pytest import torch from vllm.multimodal.inputs import MultiModalKwargs, NestedTensors +pytestmark = pytest.mark.cpu_test + def assert_nested_tensors_equal(expected: NestedTensors, actual: NestedTensors): diff --git a/tests/multimodal/test_processing.py b/tests/multimodal/test_processing.py index 352b5b5b4fd4..7aa51acff350 100644 --- a/tests/multimodal/test_processing.py +++ b/tests/multimodal/test_processing.py @@ -25,6 +25,8 @@ from .utils import random_image +pytestmark = pytest.mark.cpu_test + # yapf: disable @pytest.mark.parametrize( diff --git a/tests/multimodal/test_registry.py b/tests/multimodal/test_registry.py index d31e75bc279f..01fbe9a52b77 100644 --- a/tests/multimodal/test_registry.py +++ b/tests/multimodal/test_registry.py @@ -11,6 +11,8 @@ from ..models.utils import build_model_context +pytestmark = pytest.mark.cpu_test + @pytest.mark.parametrize( "model_id,limit_mm_per_prompt,expected", diff --git a/tests/multimodal/test_video.py b/tests/multimodal/test_video.py index 05b7b84be7f3..1bdbb5a10a6d 100644 --- a/tests/multimodal/test_video.py +++ b/tests/multimodal/test_video.py @@ -17,6 +17,8 @@ from .utils import cosine_similarity, create_video_from_image, normalize_image +pytestmark = pytest.mark.cpu_test + NUM_FRAMES = 10 FAKE_OUTPUT_1 = np.random.rand(NUM_FRAMES, 1280, 720, 3) FAKE_OUTPUT_2 = np.random.rand(NUM_FRAMES, 1280, 720, 3) diff --git a/tests/test_inputs.py b/tests/test_inputs.py index e549834faf6f..b61b95bc4333 100644 --- a/tests/test_inputs.py +++ b/tests/test_inputs.py @@ -6,6 +6,8 @@ from vllm.inputs import zip_enc_dec_prompts from vllm.inputs.parse import parse_and_batch_prompt +pytestmark = pytest.mark.cpu_test + STRING_INPUTS = [ '', 'foo', diff --git a/tests/test_outputs.py b/tests/test_outputs.py index 4bb1c20f77f1..46da83a428e5 100644 --- a/tests/test_outputs.py +++ b/tests/test_outputs.py @@ -1,8 +1,12 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project +import pytest + from vllm.outputs import RequestOutput +pytestmark = pytest.mark.cpu_test + def test_request_output_forward_compatible(): output = RequestOutput(request_id="test_request_id", diff --git a/tests/mistral_tool_use/__init__.py b/tests/tool_use/mistral/__init__.py similarity index 100% rename from tests/mistral_tool_use/__init__.py rename to tests/tool_use/mistral/__init__.py diff --git a/tests/mistral_tool_use/conftest.py b/tests/tool_use/mistral/conftest.py similarity index 93% rename from tests/mistral_tool_use/conftest.py rename to tests/tool_use/mistral/conftest.py index e89e60c5a02e..e9dddccdc8c0 100644 --- a/tests/mistral_tool_use/conftest.py +++ b/tests/tool_use/mistral/conftest.py @@ -12,7 +12,7 @@ # for each server config, download the model and return the config -@pytest.fixture(scope="session", params=CONFIGS.keys()) +@pytest.fixture(scope="package", params=CONFIGS.keys()) def server_config(request): config = CONFIGS[request.param] @@ -26,7 +26,7 @@ def server_config(request): # run this for each server config -@pytest.fixture(scope="session") +@pytest.fixture(scope="package") def server(request, server_config: ServerConfig): model = server_config["model"] args_for_model = server_config["arguments"] diff --git a/tests/mistral_tool_use/test_mistral_tool_calls.py b/tests/tool_use/mistral/test_mistral_tool_calls.py similarity index 100% rename from tests/mistral_tool_use/test_mistral_tool_calls.py rename to tests/tool_use/mistral/test_mistral_tool_calls.py diff --git a/tests/mistral_tool_use/utils.py b/tests/tool_use/mistral/utils.py similarity index 100% rename from tests/mistral_tool_use/utils.py rename to tests/tool_use/mistral/utils.py diff --git a/tests/tool_use/test_glm4_moe_tool_parser.py b/tests/tool_use/test_glm4_moe_tool_parser.py index 91913c933184..bb8c36fb13ad 100644 --- a/tests/tool_use/test_glm4_moe_tool_parser.py +++ b/tests/tool_use/test_glm4_moe_tool_parser.py @@ -10,6 +10,8 @@ from vllm.entrypoints.openai.tool_parsers import Glm4MoeModelToolParser from vllm.transformers_utils.tokenizer import get_tokenizer +pytestmark = pytest.mark.cpu_test + pytest.skip("skip glm4_moe parser test", allow_module_level=True) # Use a common model that is likely to be available MODEL = "zai-org/GLM-4.5" diff --git a/tests/tool_use/test_jamba_tool_parser.py b/tests/tool_use/test_jamba_tool_parser.py index 57ace1fa22ac..8f819301e264 100644 --- a/tests/tool_use/test_jamba_tool_parser.py +++ b/tests/tool_use/test_jamba_tool_parser.py @@ -15,6 +15,8 @@ from vllm.transformers_utils.detokenizer_utils import detokenize_incrementally from vllm.transformers_utils.tokenizer import AnyTokenizer, get_tokenizer +pytestmark = pytest.mark.cpu_test + MODEL = "ai21labs/Jamba-tiny-dev" diff --git a/tests/tool_use/test_kimi_k2_tool_parser.py b/tests/tool_use/test_kimi_k2_tool_parser.py index bd030632f167..ad9af6361802 100644 --- a/tests/tool_use/test_kimi_k2_tool_parser.py +++ b/tests/tool_use/test_kimi_k2_tool_parser.py @@ -10,6 +10,8 @@ from vllm.entrypoints.openai.tool_parsers import KimiK2ToolParser from vllm.transformers_utils.tokenizer import get_tokenizer +pytestmark = pytest.mark.cpu_test + # Use a common model that is likely to be available MODEL = "moonshotai/Kimi-K2-Instruct" diff --git a/tests/tool_use/test_minimax_tool_parser.py b/tests/tool_use/test_minimax_tool_parser.py index ddf26007121e..7aa19c9a51c9 100644 --- a/tests/tool_use/test_minimax_tool_parser.py +++ b/tests/tool_use/test_minimax_tool_parser.py @@ -12,6 +12,8 @@ from vllm.entrypoints.openai.tool_parsers import MinimaxToolParser from vllm.transformers_utils.tokenizer import get_tokenizer +pytestmark = pytest.mark.cpu_test + # Use a common model that is likely to be available MODEL = "MiniMaxAi/MiniMax-M1-40k" diff --git a/tests/tool_use/test_qwen3coder_tool_parser.py b/tests/tool_use/test_qwen3coder_tool_parser.py index 57eaf84d36f2..ade089e8246e 100644 --- a/tests/tool_use/test_qwen3coder_tool_parser.py +++ b/tests/tool_use/test_qwen3coder_tool_parser.py @@ -18,6 +18,8 @@ from vllm.transformers_utils.detokenizer_utils import detokenize_incrementally from vllm.transformers_utils.tokenizer import AnyTokenizer, get_tokenizer +pytestmark = pytest.mark.cpu_test + MODEL = "Qwen/Qwen3-Coder-30B-A3B-Instruct-FP8" diff --git a/tests/tool_use/test_seed_oss_tool_parser.py b/tests/tool_use/test_seed_oss_tool_parser.py index 118c7534622e..5100b5ac120b 100644 --- a/tests/tool_use/test_seed_oss_tool_parser.py +++ b/tests/tool_use/test_seed_oss_tool_parser.py @@ -16,6 +16,8 @@ from vllm.transformers_utils.detokenizer_utils import detokenize_incrementally from vllm.transformers_utils.tokenizer import AnyTokenizer, get_tokenizer +pytestmark = pytest.mark.cpu_test + # Use a common model that is likely to be available MODEL = "ByteDance-Seed/Seed-OSS-36B-Instruct" diff --git a/tests/tool_use/test_tool_choice_required.py b/tests/tool_use/test_tool_choice_required.py index 130e9547bdcc..7c63816cd6f5 100644 --- a/tests/tool_use/test_tool_choice_required.py +++ b/tests/tool_use/test_tool_choice_required.py @@ -12,6 +12,8 @@ ChatCompletionToolsParam) from vllm.entrypoints.openai.serving_chat import OpenAIServingChat +pytestmark = pytest.mark.cpu_test + EXAMPLE_TOOLS = [ { "type": "function", diff --git a/tests/tool_use/test_xlam_tool_parser.py b/tests/tool_use/test_xlam_tool_parser.py index c07ca0f56d6b..94e2a37cbf63 100644 --- a/tests/tool_use/test_xlam_tool_parser.py +++ b/tests/tool_use/test_xlam_tool_parser.py @@ -14,6 +14,8 @@ from vllm.transformers_utils.detokenizer_utils import detokenize_incrementally from vllm.transformers_utils.tokenizer import AnyTokenizer, get_tokenizer +pytestmark = pytest.mark.cpu_test + # Use a common model that is likely to be available MODEL = "Salesforce/Llama-xLAM-2-8B-fc-r" diff --git a/tests/v1/core/test_async_scheduler.py b/tests/v1/core/test_async_scheduler.py index c153e38fe3df..8ffe2e57b532 100644 --- a/tests/v1/core/test_async_scheduler.py +++ b/tests/v1/core/test_async_scheduler.py @@ -11,6 +11,8 @@ from .utils import create_requests, create_scheduler +pytestmark = pytest.mark.cpu_test + def _make_model_runner_output( scheduler_output: SchedulerOutput, ) -> ModelRunnerOutput: diff --git a/tests/v1/core/test_encoder_cache_manager.py b/tests/v1/core/test_encoder_cache_manager.py index 4e3cace86be6..6ef15b337ef0 100644 --- a/tests/v1/core/test_encoder_cache_manager.py +++ b/tests/v1/core/test_encoder_cache_manager.py @@ -1,9 +1,12 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project +import pytest from vllm.multimodal.inputs import MultiModalFeatureSpec, PlaceholderRange from vllm.v1.core.encoder_cache_manager import EncoderCacheManager +pytestmark = pytest.mark.cpu_test + # ------------------ Mock Classes ------------------ # class MockRequest: diff --git a/tests/v1/core/test_kv_cache_utils.py b/tests/v1/core/test_kv_cache_utils.py index 452b16ef4a91..09f43a793db2 100644 --- a/tests/v1/core/test_kv_cache_utils.py +++ b/tests/v1/core/test_kv_cache_utils.py @@ -32,6 +32,8 @@ # yapf: enable +pytestmark = pytest.mark.cpu_test + @pytest.fixture(autouse=True) def _auto_init_hash_fn(request): diff --git a/tests/v1/core/test_prefix_caching.py b/tests/v1/core/test_prefix_caching.py index 5769099e0af1..93ad4d8080e6 100644 --- a/tests/v1/core/test_prefix_caching.py +++ b/tests/v1/core/test_prefix_caching.py @@ -25,6 +25,8 @@ from vllm.v1.kv_cache_interface import (FullAttentionSpec, KVCacheConfig, KVCacheGroupSpec, SlidingWindowSpec) +pytestmark = pytest.mark.cpu_test + @pytest.fixture(autouse=True) def _auto_init_hash_fn(request): @@ -1267,7 +1269,7 @@ def test_kv_cache_events(blocks_to_cache: int): def test_eagle_enabled_removes_last_block(): - """Verify Eagle does NOT remove blocks when request + """Verify Eagle does NOT remove blocks when request length is divisible by block size.""" block_size = 16 manager = KVCacheManager( diff --git a/tests/v1/core/test_scheduler.py b/tests/v1/core/test_scheduler.py index 5e2bdaa75d3f..3de6dffc3395 100644 --- a/tests/v1/core/test_scheduler.py +++ b/tests/v1/core/test_scheduler.py @@ -23,6 +23,8 @@ from .utils import EOS_TOKEN_ID, create_requests, create_scheduler +pytestmark = pytest.mark.cpu_test + def test_add_requests(): scheduler = create_scheduler() diff --git a/tests/v1/core/test_single_type_kv_cache_manager.py b/tests/v1/core/test_single_type_kv_cache_manager.py index 1f6825b6d24e..166be8bda05e 100644 --- a/tests/v1/core/test_single_type_kv_cache_manager.py +++ b/tests/v1/core/test_single_type_kv_cache_manager.py @@ -3,6 +3,7 @@ import random +import pytest import torch from vllm.v1.core.block_pool import BlockPool @@ -13,6 +14,8 @@ from vllm.v1.kv_cache_interface import (ChunkedLocalAttentionSpec, SlidingWindowSpec) +pytestmark = pytest.mark.cpu_test + def get_sliding_window_manager(sliding_window_spec, block_pool): return SlidingWindowManager(sliding_window_spec, diff --git a/tests/v1/kv_connector/unit/test_output_aggreagator.py b/tests/v1/kv_connector/unit/test_output_aggreagator.py index 5d2b27a9eb4d..607da6425a31 100644 --- a/tests/v1/kv_connector/unit/test_output_aggreagator.py +++ b/tests/v1/kv_connector/unit/test_output_aggreagator.py @@ -3,9 +3,13 @@ from concurrent.futures import Future from typing import Optional +import pytest + from vllm.distributed.kv_transfer.kv_connector.utils import KVOutputAggregator from vllm.v1.outputs import KVConnectorOutput, ModelRunnerOutput +pytestmark = pytest.mark.cpu_test + class DummyModelRunnerOutput(ModelRunnerOutput): diff --git a/tests/v1/kv_connector/unit/test_remote_decode_lifecycle.py b/tests/v1/kv_connector/unit/test_remote_decode_lifecycle.py index 380e72a15633..79b26cf57acd 100644 --- a/tests/v1/kv_connector/unit/test_remote_decode_lifecycle.py +++ b/tests/v1/kv_connector/unit/test_remote_decode_lifecycle.py @@ -2,12 +2,16 @@ # SPDX-FileCopyrightText: Copyright contributors to the vLLM project import copy +import pytest + from vllm.v1.outputs import EMPTY_MODEL_RUNNER_OUTPUT, KVConnectorOutput from vllm.v1.request import FinishReason, RequestStatus from .utils import (assert_scheduler_empty, create_model_runner_output, create_request, create_scheduler, create_vllm_config) +pytestmark = pytest.mark.cpu_test + def test_basic_lifecycle(): """Test lifecycle of a Remote Decode request.""" diff --git a/tests/v1/kv_connector/unit/test_remote_prefill_lifecycle.py b/tests/v1/kv_connector/unit/test_remote_prefill_lifecycle.py index 21fec5344255..207cf64f4e3e 100644 --- a/tests/v1/kv_connector/unit/test_remote_prefill_lifecycle.py +++ b/tests/v1/kv_connector/unit/test_remote_prefill_lifecycle.py @@ -2,12 +2,16 @@ # SPDX-FileCopyrightText: Copyright contributors to the vLLM project import copy +import pytest + from vllm.v1.outputs import EMPTY_MODEL_RUNNER_OUTPUT, KVConnectorOutput from vllm.v1.request import FinishReason, RequestStatus from .utils import (assert_scheduler_empty, create_model_runner_output, create_request, create_scheduler, create_vllm_config) +pytestmark = pytest.mark.cpu_test + def test_basic_lifecycle(): """Test lifecycle of a remote prefill.""" diff --git a/tests/v1/metrics/test_metrics_reader.py b/tests/v1/metrics/test_metrics_reader.py index c05de5e4cb64..16bca359fc2f 100644 --- a/tests/v1/metrics/test_metrics_reader.py +++ b/tests/v1/metrics/test_metrics_reader.py @@ -7,6 +7,8 @@ from vllm.v1.metrics.reader import (Counter, Gauge, Histogram, Vector, get_metrics_snapshot) +pytestmark = pytest.mark.cpu_test + @pytest.fixture(autouse=True) def test_registry(monkeypatch): diff --git a/tests/v1/structured_output/test_utils.py b/tests/v1/structured_output/test_utils.py index 4e7c4b33e8c4..0e2658304d12 100644 --- a/tests/v1/structured_output/test_utils.py +++ b/tests/v1/structured_output/test_utils.py @@ -6,6 +6,8 @@ from vllm.v1.structured_output.backend_xgrammar import ( has_xgrammar_unsupported_json_features) +pytestmark = pytest.mark.cpu_test + @pytest.fixture def unsupported_string_schemas(): diff --git a/tests/v1/test_serial_utils.py b/tests/v1/test_serial_utils.py index 118b40d0ef41..5d467687c308 100644 --- a/tests/v1/test_serial_utils.py +++ b/tests/v1/test_serial_utils.py @@ -16,6 +16,8 @@ MultiModalSharedField, NestedTensors) from vllm.v1.serial_utils import MsgpackDecoder, MsgpackEncoder +pytestmark = pytest.mark.cpu_test + class UnrecognizedType(UserDict): From bb6d43047e24f29ae5ef9d3abd4c93b447f2f0c7 Mon Sep 17 00:00:00 2001 From: ihb2032 <40718643+ihb2032@users.noreply.github.com> Date: Tue, 30 Sep 2025 21:48:07 +0800 Subject: [PATCH 507/518] [Fix] Improve CPU backend compatibility for RISC-V (#25816) Signed-off-by: lyd1992 Signed-off-by: ihb2032 <1355790728@qq.com> --- vllm/engine/arg_utils.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index ce0f1708235f..ec61fc4b9b06 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -1160,11 +1160,12 @@ def create_engine_config( # Set default arguments for V1 Engine. self._set_default_args(usage_context, model_config) - # Disable chunked prefill for POWER (ppc64le)/ARM/s390x CPUs in V1 + # Disable chunked prefill for POWER (ppc64le)/ARM/s390x/RISCV CPUs in V1 if current_platform.is_cpu() and current_platform.get_cpu_architecture( - ) in (CpuArchEnum.POWERPC, CpuArchEnum.S390X, CpuArchEnum.ARM): - logger.info("Chunked prefill is not supported for ARM and POWER " - "and S390X CPUs; " + ) in (CpuArchEnum.POWERPC, CpuArchEnum.S390X, CpuArchEnum.ARM, + CpuArchEnum.RISCV): + logger.info("Chunked prefill is not supported for ARM and POWER, " + "S390X and RISC-V CPUs; " "disabling it for V1 backend.") self.enable_chunked_prefill = False assert self.enable_chunked_prefill is not None From 35fe398c7cae74519c6d495770192d6834640c61 Mon Sep 17 00:00:00 2001 From: Asaf Joseph Gardin <39553475+Josephasafg@users.noreply.github.com> Date: Tue, 30 Sep 2025 17:30:44 +0300 Subject: [PATCH 508/518] [Kernel][Moe Configs] Add more tuned triton configs for ExpertsInt8 and FP8 (#25858) Signed-off-by: asafg <39553475+Josephasafg@users.noreply.github.com> --- ...VIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json | 146 +++++++++++++ ...VIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json | 198 ++++++------------ ...072,device_name=NVIDIA_H100_80GB_HBM3.json | 92 ++++---- ...ice_name=NVIDIA_H200,dtype=int8_w8a16.json | 146 +++++++++++++ ...VIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json | 146 +++++++++++++ ...VIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json | 146 +++++++++++++ ...VIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json | 146 +++++++++++++ ...VIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json | 146 +++++++++++++ ...792,device_name=NVIDIA_H100_80GB_HBM3.json | 146 +++++++++++++ ...me=NVIDIA_H100_80GB_HBM3,dtype=float8.json | 146 +++++++++++++ ...VIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json | 104 ++++----- ...ice_name=NVIDIA_H200,dtype=int8_w8a16.json | 146 +++++++++++++ ...VIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json | 146 +++++++++++++ ...me=NVIDIA_H100_80GB_HBM3,dtype=float8.json | 146 +++++++++++++ ...VIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json | 82 ++++---- 15 files changed, 1808 insertions(+), 274 deletions(-) create mode 100644 vllm/model_executor/layers/fused_moe/configs/E=1,N=1792,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json create mode 100644 vllm/model_executor/layers/fused_moe/configs/E=1,N=3072,device_name=NVIDIA_H200,dtype=int8_w8a16.json create mode 100644 vllm/model_executor/layers/fused_moe/configs/E=1,N=3584,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json create mode 100644 vllm/model_executor/layers/fused_moe/configs/E=1,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json create mode 100644 vllm/model_executor/layers/fused_moe/configs/E=16,N=14336,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json create mode 100644 vllm/model_executor/layers/fused_moe/configs/E=16,N=1792,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json create mode 100644 vllm/model_executor/layers/fused_moe/configs/E=16,N=1792,device_name=NVIDIA_H100_80GB_HBM3.json create mode 100644 vllm/model_executor/layers/fused_moe/configs/E=16,N=3072,device_name=NVIDIA_H100_80GB_HBM3,dtype=float8.json create mode 100644 vllm/model_executor/layers/fused_moe/configs/E=16,N=3072,device_name=NVIDIA_H200,dtype=int8_w8a16.json create mode 100644 vllm/model_executor/layers/fused_moe/configs/E=16,N=3584,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json create mode 100644 vllm/model_executor/layers/fused_moe/configs/E=16,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=float8.json diff --git a/vllm/model_executor/layers/fused_moe/configs/E=1,N=1792,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json b/vllm/model_executor/layers/fused_moe/configs/E=1,N=1792,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json new file mode 100644 index 000000000000..99501df6f176 --- /dev/null +++ b/vllm/model_executor/layers/fused_moe/configs/E=1,N=1792,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json @@ -0,0 +1,146 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 3 + }, + "2": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 4 + }, + "4": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 3 + }, + "8": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 3 + }, + "16": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 4 + }, + "24": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 3 + }, + "32": { + "BLOCK_SIZE_M": 64, + "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 3 + }, + "48": { + "BLOCK_SIZE_M": 64, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "num_warps": 8, + "num_stages": 4 + }, + "64": { + "BLOCK_SIZE_M": 64, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "num_warps": 8, + "num_stages": 3 + }, + "96": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 4 + }, + "128": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 4 + }, + "256": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 3 + }, + "512": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 3 + }, + "1024": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 3 + }, + "1536": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 4 + }, + "2048": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 3 + }, + "3072": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 32, + "num_warps": 4, + "num_stages": 3 + }, + "4096": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 3 + } + } \ No newline at end of file diff --git a/vllm/model_executor/layers/fused_moe/configs/E=1,N=3072,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json b/vllm/model_executor/layers/fused_moe/configs/E=1,N=3072,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json index 2c78bfaba789..2e0dd7a4b950 100644 --- a/vllm/model_executor/layers/fused_moe/configs/E=1,N=3072,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json +++ b/vllm/model_executor/layers/fused_moe/configs/E=1,N=3072,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json @@ -1,218 +1,146 @@ { "1": { "BLOCK_SIZE_M": 16, - "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 256, - "GROUP_SIZE_M": 32, + "GROUP_SIZE_M": 1, "num_warps": 4, - "num_stages": 3 + "num_stages": 5 }, "2": { "BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, - "BLOCK_SIZE_K": 128, + "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 1, - "num_warps": 8, + "num_warps": 4, "num_stages": 5 }, "4": { "BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, - "BLOCK_SIZE_K": 128, + "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 1, - "num_warps": 8, + "num_warps": 4, "num_stages": 5 }, "8": { "BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, - "BLOCK_SIZE_K": 128, + "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 1, - "num_warps": 8, - "num_stages": 4 + "num_warps": 4, + "num_stages": 5 }, "16": { "BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, - "BLOCK_SIZE_K": 128, + "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 1, - "num_warps": 8, - "num_stages": 5 + "num_warps": 4, + "num_stages": 3 }, "24": { - "BLOCK_SIZE_M": 32, + "BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 256, - "GROUP_SIZE_M": 1, - "num_warps": 8, + "GROUP_SIZE_M": 16, + "num_warps": 4, "num_stages": 3 }, "32": { - "BLOCK_SIZE_M": 16, - "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_M": 64, + "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 1, - "num_warps": 4, - "num_stages": 5 + "num_warps": 8, + "num_stages": 3 }, "48": { - "BLOCK_SIZE_M": 64, - "BLOCK_SIZE_N": 32, - "BLOCK_SIZE_K": 256, + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 1, "num_warps": 4, - "num_stages": 3 + "num_stages": 4 }, "64": { - "BLOCK_SIZE_M": 64, - "BLOCK_SIZE_N": 32, - "BLOCK_SIZE_K": 256, + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 1, "num_warps": 4, - "num_stages": 3 + "num_stages": 4 }, "96": { "BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, - "BLOCK_SIZE_K": 256, + "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 1, "num_warps": 4, - "num_stages": 2 + "num_stages": 5 }, "128": { - "BLOCK_SIZE_M": 64, + "BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 64, - "BLOCK_SIZE_K": 256, - "GROUP_SIZE_M": 1, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 16, "num_warps": 4, - "num_stages": 2 + "num_stages": 4 }, "256": { - "BLOCK_SIZE_M": 256, - "BLOCK_SIZE_N": 64, - "BLOCK_SIZE_K": 128, - "GROUP_SIZE_M": 64, - "num_warps": 8, + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "num_warps": 4, "num_stages": 3 }, "512": { - "BLOCK_SIZE_M": 256, + "BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, - "BLOCK_SIZE_K": 128, + "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 1, - "num_warps": 8, - "num_stages": 3 + "num_warps": 4, + "num_stages": 4 }, "1024": { - "BLOCK_SIZE_M": 256, + "BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, - "BLOCK_SIZE_K": 128, - "GROUP_SIZE_M": 1, - "num_warps": 8, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 64, + "num_warps": 4, "num_stages": 3 }, "1536": { - "BLOCK_SIZE_M": 256, + "BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, - "BLOCK_SIZE_K": 128, - "GROUP_SIZE_M": 1, - "num_warps": 8, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 32, + "num_warps": 4, "num_stages": 3 }, "2048": { - "BLOCK_SIZE_M": 256, + "BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, - "BLOCK_SIZE_K": 128, - "GROUP_SIZE_M": 1, - "num_warps": 8, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 16, + "num_warps": 4, "num_stages": 3 }, "3072": { - "BLOCK_SIZE_M": 256, + "BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, - "BLOCK_SIZE_K": 128, - "GROUP_SIZE_M": 1, - "num_warps": 8, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 16, + "num_warps": 4, "num_stages": 3 }, "4096": { - "BLOCK_SIZE_M": 256, - "BLOCK_SIZE_N": 128, - "BLOCK_SIZE_K": 128, - "GROUP_SIZE_M": 1, - "num_warps": 8, - "num_stages": 3 - }, - "5120": { - "BLOCK_SIZE_M": 256, - "BLOCK_SIZE_N": 128, - "BLOCK_SIZE_K": 128, - "GROUP_SIZE_M": 1, - "num_warps": 8, - "num_stages": 3 - }, - "9216": { - "BLOCK_SIZE_M": 256, - "BLOCK_SIZE_N": 128, - "BLOCK_SIZE_K": 128, - "GROUP_SIZE_M": 1, - "num_warps": 8, - "num_stages": 3 - }, - "13312": { - "BLOCK_SIZE_M": 256, + "BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, - "BLOCK_SIZE_K": 128, - "GROUP_SIZE_M": 1, - "num_warps": 8, - "num_stages": 3 - }, - "17408": { - "BLOCK_SIZE_M": 256, - "BLOCK_SIZE_N": 128, - "BLOCK_SIZE_K": 128, - "GROUP_SIZE_M": 1, - "num_warps": 8, - "num_stages": 3 - }, - "25600": { - "BLOCK_SIZE_M": 256, - "BLOCK_SIZE_N": 128, - "BLOCK_SIZE_K": 128, - "GROUP_SIZE_M": 1, - "num_warps": 8, - "num_stages": 3 - }, - "33792": { - "BLOCK_SIZE_M": 256, - "BLOCK_SIZE_N": 128, - "BLOCK_SIZE_K": 128, - "GROUP_SIZE_M": 1, - "num_warps": 8, - "num_stages": 3 - }, - "41984": { - "BLOCK_SIZE_M": 256, - "BLOCK_SIZE_N": 128, - "BLOCK_SIZE_K": 128, - "GROUP_SIZE_M": 1, - "num_warps": 8, - "num_stages": 3 - }, - "50176": { - "BLOCK_SIZE_M": 256, - "BLOCK_SIZE_N": 128, - "BLOCK_SIZE_K": 128, - "GROUP_SIZE_M": 1, - "num_warps": 8, - "num_stages": 3 - }, - "58368": { - "BLOCK_SIZE_M": 256, - "BLOCK_SIZE_N": 128, - "BLOCK_SIZE_K": 128, - "GROUP_SIZE_M": 1, - "num_warps": 8, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 16, + "num_warps": 4, "num_stages": 3 } } \ No newline at end of file diff --git a/vllm/model_executor/layers/fused_moe/configs/E=1,N=3072,device_name=NVIDIA_H100_80GB_HBM3.json b/vllm/model_executor/layers/fused_moe/configs/E=1,N=3072,device_name=NVIDIA_H100_80GB_HBM3.json index 4da841e74a79..4ea86340c324 100644 --- a/vllm/model_executor/layers/fused_moe/configs/E=1,N=3072,device_name=NVIDIA_H100_80GB_HBM3.json +++ b/vllm/model_executor/layers/fused_moe/configs/E=1,N=3072,device_name=NVIDIA_H100_80GB_HBM3.json @@ -5,7 +5,7 @@ "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 1, "num_warps": 4, - "num_stages": 4 + "num_stages": 5 }, "2": { "BLOCK_SIZE_M": 16, @@ -13,7 +13,7 @@ "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 1, "num_warps": 4, - "num_stages": 3 + "num_stages": 5 }, "4": { "BLOCK_SIZE_M": 16, @@ -21,7 +21,7 @@ "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 1, "num_warps": 4, - "num_stages": 3 + "num_stages": 5 }, "8": { "BLOCK_SIZE_M": 16, @@ -29,7 +29,7 @@ "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 1, "num_warps": 4, - "num_stages": 3 + "num_stages": 5 }, "16": { "BLOCK_SIZE_M": 16, @@ -37,52 +37,52 @@ "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 1, "num_warps": 4, - "num_stages": 5 + "num_stages": 3 }, "24": { - "BLOCK_SIZE_M": 32, + "BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 256, - "GROUP_SIZE_M": 1, - "num_warps": 8, - "num_stages": 4 + "GROUP_SIZE_M": 16, + "num_warps": 4, + "num_stages": 3 }, "32": { "BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 64, - "BLOCK_SIZE_K": 128, + "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 1, - "num_warps": 4, - "num_stages": 5 + "num_warps": 8, + "num_stages": 3 }, "48": { - "BLOCK_SIZE_M": 64, + "BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 1, "num_warps": 4, - "num_stages": 5 + "num_stages": 4 }, "64": { - "BLOCK_SIZE_M": 64, + "BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 1, "num_warps": 4, - "num_stages": 5 + "num_stages": 4 }, "96": { "BLOCK_SIZE_M": 64, - "BLOCK_SIZE_N": 128, - "BLOCK_SIZE_K": 128, - "GROUP_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, "num_warps": 4, - "num_stages": 4 + "num_stages": 5 }, "128": { - "BLOCK_SIZE_M": 64, - "BLOCK_SIZE_N": 128, - "BLOCK_SIZE_K": 128, + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 16, "num_warps": 4, "num_stages": 4 @@ -91,57 +91,57 @@ "BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, - "GROUP_SIZE_M": 64, - "num_warps": 8, - "num_stages": 5 + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 3 }, "512": { "BLOCK_SIZE_M": 128, - "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, - "GROUP_SIZE_M": 16, - "num_warps": 8, + "GROUP_SIZE_M": 1, + "num_warps": 4, "num_stages": 4 }, "1024": { "BLOCK_SIZE_M": 128, - "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, - "GROUP_SIZE_M": 32, - "num_warps": 8, - "num_stages": 4 + "GROUP_SIZE_M": 64, + "num_warps": 4, + "num_stages": 3 }, "1536": { "BLOCK_SIZE_M": 128, - "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 32, - "num_warps": 8, - "num_stages": 4 + "num_warps": 4, + "num_stages": 3 }, "2048": { "BLOCK_SIZE_M": 128, - "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 16, - "num_warps": 8, + "num_warps": 4, "num_stages": 3 }, "3072": { "BLOCK_SIZE_M": 128, - "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, - "GROUP_SIZE_M": 64, - "num_warps": 8, - "num_stages": 4 + "GROUP_SIZE_M": 16, + "num_warps": 4, + "num_stages": 3 }, "4096": { "BLOCK_SIZE_M": 128, - "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, - "GROUP_SIZE_M": 32, - "num_warps": 8, - "num_stages": 4 + "GROUP_SIZE_M": 16, + "num_warps": 4, + "num_stages": 3 }, "5120": { "BLOCK_SIZE_M": 128, diff --git a/vllm/model_executor/layers/fused_moe/configs/E=1,N=3072,device_name=NVIDIA_H200,dtype=int8_w8a16.json b/vllm/model_executor/layers/fused_moe/configs/E=1,N=3072,device_name=NVIDIA_H200,dtype=int8_w8a16.json new file mode 100644 index 000000000000..f3f1a562710b --- /dev/null +++ b/vllm/model_executor/layers/fused_moe/configs/E=1,N=3072,device_name=NVIDIA_H200,dtype=int8_w8a16.json @@ -0,0 +1,146 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 5 + }, + "2": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 5 + }, + "4": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 5 + }, + "8": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 5 + }, + "16": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 4 + }, + "24": { + "BLOCK_SIZE_M": 32, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 4 + }, + "32": { + "BLOCK_SIZE_M": 32, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 4 + }, + "48": { + "BLOCK_SIZE_M": 32, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 3 + }, + "64": { + "BLOCK_SIZE_M": 32, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 4 + }, + "96": { + "BLOCK_SIZE_M": 64, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 3 + }, + "128": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 3 + }, + "256": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 3 + }, + "512": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 3 + }, + "1024": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 4 + }, + "1536": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 32, + "num_warps": 4, + "num_stages": 3 + }, + "2048": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 64, + "num_warps": 4, + "num_stages": 3 + }, + "3072": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 16, + "num_warps": 4, + "num_stages": 3 + }, + "4096": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 16, + "num_warps": 4, + "num_stages": 3 + } +} \ No newline at end of file diff --git a/vllm/model_executor/layers/fused_moe/configs/E=1,N=3584,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json b/vllm/model_executor/layers/fused_moe/configs/E=1,N=3584,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json new file mode 100644 index 000000000000..19046fcf1d6a --- /dev/null +++ b/vllm/model_executor/layers/fused_moe/configs/E=1,N=3584,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json @@ -0,0 +1,146 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 5 + }, + "2": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 5 + }, + "4": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 5 + }, + "8": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 5 + }, + "16": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 5 + }, + "24": { + "BLOCK_SIZE_M": 64, + "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 4 + }, + "32": { + "BLOCK_SIZE_M": 64, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "num_warps": 8, + "num_stages": 3 + }, + "48": { + "BLOCK_SIZE_M": 64, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 4 + }, + "64": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 4 + }, + "96": { + "BLOCK_SIZE_M": 64, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 4 + }, + "128": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 4 + }, + "256": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 3 + }, + "512": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 3 + }, + "1024": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 3 + }, + "1536": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 32, + "num_warps": 4, + "num_stages": 3 + }, + "2048": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 64, + "num_warps": 4, + "num_stages": 3 + }, + "3072": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 16, + "num_warps": 4, + "num_stages": 3 + }, + "4096": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 3 + } +} \ No newline at end of file diff --git a/vllm/model_executor/layers/fused_moe/configs/E=1,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json b/vllm/model_executor/layers/fused_moe/configs/E=1,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json new file mode 100644 index 000000000000..5f9422fe6f7c --- /dev/null +++ b/vllm/model_executor/layers/fused_moe/configs/E=1,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json @@ -0,0 +1,146 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 4 + }, + "2": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 4 + }, + "4": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 4 + }, + "8": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 4 + }, + "16": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 3 + }, + "24": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 32, + "num_warps": 4, + "num_stages": 4 + }, + "32": { + "BLOCK_SIZE_M": 64, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 4 + }, + "48": { + "BLOCK_SIZE_M": 32, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 32, + "num_warps": 4, + "num_stages": 4 + }, + "64": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 4 + }, + "96": { + "BLOCK_SIZE_M": 64, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 32, + "num_warps": 4, + "num_stages": 4 + }, + "128": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 4 + }, + "256": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 4 + }, + "512": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 16, + "num_warps": 4, + "num_stages": 3 + }, + "1024": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 32, + "num_warps": 4, + "num_stages": 3 + }, + "1536": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 32, + "num_warps": 4, + "num_stages": 3 + }, + "2048": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 16, + "num_warps": 4, + "num_stages": 3 + }, + "3072": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 16, + "num_warps": 4, + "num_stages": 3 + }, + "4096": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 16, + "num_warps": 4, + "num_stages": 3 + } +} \ No newline at end of file diff --git a/vllm/model_executor/layers/fused_moe/configs/E=16,N=14336,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json b/vllm/model_executor/layers/fused_moe/configs/E=16,N=14336,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json new file mode 100644 index 000000000000..6d0cdfd27429 --- /dev/null +++ b/vllm/model_executor/layers/fused_moe/configs/E=16,N=14336,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json @@ -0,0 +1,146 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 5 + }, + "2": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "num_warps": 8, + "num_stages": 5 + }, + "4": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 2 + }, + "8": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 32, + "num_warps": 4, + "num_stages": 2 + }, + "16": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 64, + "num_warps": 4, + "num_stages": 3 + }, + "24": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "num_warps": 8, + "num_stages": 5 + }, + "32": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "num_warps": 8, + "num_stages": 4 + }, + "48": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "num_warps": 8, + "num_stages": 3 + }, + "64": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "num_warps": 8, + "num_stages": 3 + }, + "96": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 32, + "num_warps": 4, + "num_stages": 5 + }, + "128": { + "BLOCK_SIZE_M": 32, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 3 + }, + "256": { + "BLOCK_SIZE_M": 64, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 3 + }, + "512": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 4 + }, + "1024": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 64, + "num_warps": 4, + "num_stages": 4 + }, + "1536": { + "BLOCK_SIZE_M": 256, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "num_warps": 8, + "num_stages": 4 + }, + "2048": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 16, + "num_warps": 4, + "num_stages": 3 + }, + "3072": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 16, + "num_warps": 4, + "num_stages": 3 + }, + "4096": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 16, + "num_warps": 4, + "num_stages": 3 + } +} diff --git a/vllm/model_executor/layers/fused_moe/configs/E=16,N=1792,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json b/vllm/model_executor/layers/fused_moe/configs/E=16,N=1792,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json new file mode 100644 index 000000000000..de8eec366eca --- /dev/null +++ b/vllm/model_executor/layers/fused_moe/configs/E=16,N=1792,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json @@ -0,0 +1,146 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 5 + }, + "2": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 5 + }, + "4": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 2 + }, + "8": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 64, + "num_warps": 8, + "num_stages": 3 + }, + "16": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 2 + }, + "24": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 16, + "num_warps": 4, + "num_stages": 2 + }, + "32": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 16, + "num_warps": 4, + "num_stages": 2 + }, + "48": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 2 + }, + "64": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 2 + }, + "96": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 3 + }, + "128": { + "BLOCK_SIZE_M": 32, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 3 + }, + "256": { + "BLOCK_SIZE_M": 32, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 64, + "num_warps": 4, + "num_stages": 3 + }, + "512": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 4 + }, + "1024": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 4 + }, + "1536": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 3 + }, + "2048": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 3 + }, + "3072": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 4 + }, + "4096": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 3 + } + } \ No newline at end of file diff --git a/vllm/model_executor/layers/fused_moe/configs/E=16,N=1792,device_name=NVIDIA_H100_80GB_HBM3.json b/vllm/model_executor/layers/fused_moe/configs/E=16,N=1792,device_name=NVIDIA_H100_80GB_HBM3.json new file mode 100644 index 000000000000..80fce79fb64c --- /dev/null +++ b/vllm/model_executor/layers/fused_moe/configs/E=16,N=1792,device_name=NVIDIA_H100_80GB_HBM3.json @@ -0,0 +1,146 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 32, + "num_warps": 4, + "num_stages": 5 + }, + "2": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 64, + "num_warps": 4, + "num_stages": 5 + }, + "4": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 64, + "num_warps": 8, + "num_stages": 3 + }, + "8": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 5 + }, + "16": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 2 + }, + "24": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 16, + "num_warps": 4, + "num_stages": 2 + }, + "32": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 32, + "num_warps": 8, + "num_stages": 2 + }, + "48": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 4 + }, + "64": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 2 + }, + "96": { + "BLOCK_SIZE_M": 32, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 3 + }, + "128": { + "BLOCK_SIZE_M": 32, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 4 + }, + "256": { + "BLOCK_SIZE_M": 64, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 3 + }, + "512": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "num_warps": 8, + "num_stages": 3 + }, + "1024": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "num_warps": 8, + "num_stages": 4 + }, + "1536": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 16, + "num_warps": 8, + "num_stages": 4 + }, + "2048": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "num_warps": 8, + "num_stages": 4 + }, + "3072": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 16, + "num_warps": 8, + "num_stages": 4 + }, + "4096": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "num_warps": 8, + "num_stages": 4 + } + } \ No newline at end of file diff --git a/vllm/model_executor/layers/fused_moe/configs/E=16,N=3072,device_name=NVIDIA_H100_80GB_HBM3,dtype=float8.json b/vllm/model_executor/layers/fused_moe/configs/E=16,N=3072,device_name=NVIDIA_H100_80GB_HBM3,dtype=float8.json new file mode 100644 index 000000000000..54d3bf190ebe --- /dev/null +++ b/vllm/model_executor/layers/fused_moe/configs/E=16,N=3072,device_name=NVIDIA_H100_80GB_HBM3,dtype=float8.json @@ -0,0 +1,146 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 32, + "num_warps": 4, + "num_stages": 5 + }, + "2": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 64, + "num_warps": 4, + "num_stages": 5 + }, + "4": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 64, + "num_warps": 4, + "num_stages": 3 + }, + "8": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 3 + }, + "16": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 16, + "num_warps": 4, + "num_stages": 5 + }, + "24": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 64, + "num_warps": 4, + "num_stages": 2 + }, + "32": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 32, + "num_warps": 4, + "num_stages": 5 + }, + "48": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 2 + }, + "64": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 2 + }, + "96": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 16, + "num_warps": 4, + "num_stages": 2 + }, + "128": { + "BLOCK_SIZE_M": 32, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 3 + }, + "256": { + "BLOCK_SIZE_M": 32, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 16, + "num_warps": 4, + "num_stages": 2 + }, + "512": { + "BLOCK_SIZE_M": 64, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 64, + "num_warps": 4, + "num_stages": 5 + }, + "1024": { + "BLOCK_SIZE_M": 64, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 32, + "num_warps": 4, + "num_stages": 2 + }, + "1536": { + "BLOCK_SIZE_M": 64, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 16, + "num_warps": 4, + "num_stages": 2 + }, + "2048": { + "BLOCK_SIZE_M": 64, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 32, + "num_warps": 4, + "num_stages": 2 + }, + "3072": { + "BLOCK_SIZE_M": 64, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 16, + "num_warps": 4, + "num_stages": 2 + }, + "4096": { + "BLOCK_SIZE_M": 64, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 16, + "num_warps": 4, + "num_stages": 2 + } +} \ No newline at end of file diff --git a/vllm/model_executor/layers/fused_moe/configs/E=16,N=3072,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json b/vllm/model_executor/layers/fused_moe/configs/E=16,N=3072,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json index 26f9abd6b789..6a4018195603 100644 --- a/vllm/model_executor/layers/fused_moe/configs/E=16,N=3072,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json +++ b/vllm/model_executor/layers/fused_moe/configs/E=16,N=3072,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json @@ -2,73 +2,73 @@ "1": { "BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, - "BLOCK_SIZE_K": 256, + "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 1, "num_warps": 4, "num_stages": 5 }, "2": { "BLOCK_SIZE_M": 16, - "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 256, - "GROUP_SIZE_M": 1, + "GROUP_SIZE_M": 16, "num_warps": 4, - "num_stages": 4 + "num_stages": 3 }, "4": { "BLOCK_SIZE_M": 16, - "BLOCK_SIZE_N": 64, - "BLOCK_SIZE_K": 256, - "GROUP_SIZE_M": 1, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 64, "num_warps": 8, - "num_stages": 4 + "num_stages": 3 }, "8": { "BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, - "BLOCK_SIZE_K": 128, - "GROUP_SIZE_M": 1, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 32, "num_warps": 4, "num_stages": 5 }, "16": { "BLOCK_SIZE_M": 16, - "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_N": 32, "BLOCK_SIZE_K": 256, - "GROUP_SIZE_M": 64, + "GROUP_SIZE_M": 32, "num_warps": 4, - "num_stages": 4 + "num_stages": 3 }, "24": { "BLOCK_SIZE_M": 16, - "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 256, - "GROUP_SIZE_M": 32, - "num_warps": 4, + "GROUP_SIZE_M": 1, + "num_warps": 8, "num_stages": 3 }, "32": { "BLOCK_SIZE_M": 16, - "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 256, - "GROUP_SIZE_M": 32, + "GROUP_SIZE_M": 16, "num_warps": 4, - "num_stages": 4 + "num_stages": 3 }, "48": { "BLOCK_SIZE_M": 16, - "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 256, - "GROUP_SIZE_M": 64, - "num_warps": 4, - "num_stages": 4 + "GROUP_SIZE_M": 1, + "num_warps": 8, + "num_stages": 3 }, "64": { "BLOCK_SIZE_M": 16, - "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 256, - "GROUP_SIZE_M": 32, - "num_warps": 4, + "GROUP_SIZE_M": 1, + "num_warps": 8, "num_stages": 3 }, "96": { @@ -77,22 +77,22 @@ "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 32, "num_warps": 4, - "num_stages": 4 + "num_stages": 3 }, "128": { "BLOCK_SIZE_M": 32, - "BLOCK_SIZE_N": 128, - "BLOCK_SIZE_K": 128, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 1, - "num_warps": 4, - "num_stages": 4 + "num_warps": 8, + "num_stages": 3 }, "256": { - "BLOCK_SIZE_M": 32, - "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_M": 64, + "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 128, - "GROUP_SIZE_M": 16, - "num_warps": 4, + "GROUP_SIZE_M": 32, + "num_warps": 8, "num_stages": 4 }, "512": { @@ -100,47 +100,47 @@ "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 1, - "num_warps": 8, + "num_warps": 4, "num_stages": 4 }, "1024": { "BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, - "GROUP_SIZE_M": 64, - "num_warps": 8, - "num_stages": 5 + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 3 }, "1536": { "BLOCK_SIZE_M": 256, "BLOCK_SIZE_N": 128, - "BLOCK_SIZE_K": 128, - "GROUP_SIZE_M": 1, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 16, "num_warps": 8, - "num_stages": 3 + "num_stages": 4 }, "2048": { "BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, - "GROUP_SIZE_M": 64, - "num_warps": 8, - "num_stages": 5 + "GROUP_SIZE_M": 16, + "num_warps": 4, + "num_stages": 3 }, "3072": { "BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, - "GROUP_SIZE_M": 64, - "num_warps": 8, - "num_stages": 5 + "GROUP_SIZE_M": 16, + "num_warps": 4, + "num_stages": 4 }, "4096": { - "BLOCK_SIZE_M": 256, + "BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, - "BLOCK_SIZE_K": 128, - "GROUP_SIZE_M": 1, - "num_warps": 8, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 16, + "num_warps": 4, "num_stages": 3 } } \ No newline at end of file diff --git a/vllm/model_executor/layers/fused_moe/configs/E=16,N=3072,device_name=NVIDIA_H200,dtype=int8_w8a16.json b/vllm/model_executor/layers/fused_moe/configs/E=16,N=3072,device_name=NVIDIA_H200,dtype=int8_w8a16.json new file mode 100644 index 000000000000..4f500d487c56 --- /dev/null +++ b/vllm/model_executor/layers/fused_moe/configs/E=16,N=3072,device_name=NVIDIA_H200,dtype=int8_w8a16.json @@ -0,0 +1,146 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 5 + }, + "2": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 4 + }, + "4": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 4 + }, + "8": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 64, + "num_warps": 4, + "num_stages": 4 + }, + "16": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 16, + "num_warps": 4, + "num_stages": 3 + }, + "24": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 5 + }, + "32": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 3 + }, + "48": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "num_warps": 8, + "num_stages": 3 + }, + "64": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "num_warps": 8, + "num_stages": 3 + }, + "96": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 32, + "num_warps": 4, + "num_stages": 4 + }, + "128": { + "BLOCK_SIZE_M": 32, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 3 + }, + "256": { + "BLOCK_SIZE_M": 32, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 4 + }, + "512": { + "BLOCK_SIZE_M": 64, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 64, + "num_warps": 4, + "num_stages": 3 + }, + "1024": { + "BLOCK_SIZE_M": 64, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 32, + "num_warps": 4, + "num_stages": 3 + }, + "1536": { + "BLOCK_SIZE_M": 64, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 64, + "num_warps": 4, + "num_stages": 3 + }, + "2048": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 16, + "num_warps": 4, + "num_stages": 3 + }, + "3072": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 16, + "num_warps": 4, + "num_stages": 3 + }, + "4096": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 16, + "num_warps": 4, + "num_stages": 5 + } +} \ No newline at end of file diff --git a/vllm/model_executor/layers/fused_moe/configs/E=16,N=3584,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json b/vllm/model_executor/layers/fused_moe/configs/E=16,N=3584,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json new file mode 100644 index 000000000000..ed8afa6b6db8 --- /dev/null +++ b/vllm/model_executor/layers/fused_moe/configs/E=16,N=3584,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json @@ -0,0 +1,146 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 32, + "num_warps": 4, + "num_stages": 5 + }, + "2": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 64, + "num_warps": 4, + "num_stages": 4 + }, + "4": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 16, + "num_warps": 4, + "num_stages": 3 + }, + "8": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 16, + "num_warps": 4, + "num_stages": 3 + }, + "16": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 16, + "num_warps": 4, + "num_stages": 2 + }, + "24": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "num_warps": 8, + "num_stages": 5 + }, + "32": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "num_warps": 8, + "num_stages": 3 + }, + "48": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "num_warps": 8, + "num_stages": 3 + }, + "64": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "num_warps": 8, + "num_stages": 2 + }, + "96": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 32, + "num_warps": 4, + "num_stages": 4 + }, + "128": { + "BLOCK_SIZE_M": 32, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "num_warps": 8, + "num_stages": 3 + }, + "256": { + "BLOCK_SIZE_M": 32, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 64, + "num_warps": 4, + "num_stages": 3 + }, + "512": { + "BLOCK_SIZE_M": 64, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 64, + "num_warps": 4, + "num_stages": 5 + }, + "1024": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 4 + }, + "1536": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 32, + "num_warps": 4, + "num_stages": 3 + }, + "2048": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 4 + }, + "3072": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 32, + "num_warps": 4, + "num_stages": 3 + }, + "4096": { + "BLOCK_SIZE_M": 128, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 3 + } +} \ No newline at end of file diff --git a/vllm/model_executor/layers/fused_moe/configs/E=16,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=float8.json b/vllm/model_executor/layers/fused_moe/configs/E=16,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=float8.json new file mode 100644 index 000000000000..5fea55a8000f --- /dev/null +++ b/vllm/model_executor/layers/fused_moe/configs/E=16,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=float8.json @@ -0,0 +1,146 @@ +{ + "1": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 16, + "num_warps": 4, + "num_stages": 5 + }, + "2": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 16, + "num_warps": 4, + "num_stages": 5 + }, + "4": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 32, + "num_warps": 4, + "num_stages": 4 + }, + "8": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 3 + }, + "16": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 3 + }, + "24": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 16, + "num_warps": 4, + "num_stages": 3 + }, + "32": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 32, + "num_warps": 4, + "num_stages": 3 + }, + "48": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 16, + "num_warps": 4, + "num_stages": 3 + }, + "64": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 32, + "num_warps": 4, + "num_stages": 2 + }, + "96": { + "BLOCK_SIZE_M": 16, + "BLOCK_SIZE_N": 128, + "BLOCK_SIZE_K": 256, + "GROUP_SIZE_M": 1, + "num_warps": 4, + "num_stages": 2 + }, + "128": { + "BLOCK_SIZE_M": 32, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 128, + "GROUP_SIZE_M": 64, + "num_warps": 4, + "num_stages": 2 + }, + "256": { + "BLOCK_SIZE_M": 64, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 64, + "num_warps": 4, + "num_stages": 2 + }, + "512": { + "BLOCK_SIZE_M": 64, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 16, + "num_warps": 4, + "num_stages": 3 + }, + "1024": { + "BLOCK_SIZE_M": 64, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 64, + "num_warps": 4, + "num_stages": 2 + }, + "1536": { + "BLOCK_SIZE_M": 64, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 64, + "num_warps": 4, + "num_stages": 2 + }, + "2048": { + "BLOCK_SIZE_M": 64, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 32, + "num_warps": 4, + "num_stages": 2 + }, + "3072": { + "BLOCK_SIZE_M": 64, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 64, + "num_warps": 4, + "num_stages": 2 + }, + "4096": { + "BLOCK_SIZE_M": 64, + "BLOCK_SIZE_N": 256, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 32, + "num_warps": 4, + "num_stages": 2 + } +} \ No newline at end of file diff --git a/vllm/model_executor/layers/fused_moe/configs/E=16,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json b/vllm/model_executor/layers/fused_moe/configs/E=16,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json index bbb2386046b1..1e3f46e0ba84 100644 --- a/vllm/model_executor/layers/fused_moe/configs/E=16,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json +++ b/vllm/model_executor/layers/fused_moe/configs/E=16,N=7168,device_name=NVIDIA_H100_80GB_HBM3,dtype=int8_w8a16.json @@ -2,7 +2,7 @@ "1": { "BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 32, - "BLOCK_SIZE_K": 256, + "BLOCK_SIZE_K": 128, "GROUP_SIZE_M": 1, "num_warps": 4, "num_stages": 4 @@ -20,78 +20,78 @@ "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 1, - "num_warps": 4, - "num_stages": 4 + "num_warps": 8, + "num_stages": 3 }, "8": { "BLOCK_SIZE_M": 16, - "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 256, - "GROUP_SIZE_M": 1, + "GROUP_SIZE_M": 16, "num_warps": 4, - "num_stages": 3 + "num_stages": 5 }, "16": { "BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 256, - "GROUP_SIZE_M": 32, + "GROUP_SIZE_M": 64, "num_warps": 4, - "num_stages": 3 + "num_stages": 5 }, "24": { "BLOCK_SIZE_M": 16, - "BLOCK_SIZE_N": 32, + "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 256, - "GROUP_SIZE_M": 64, - "num_warps": 4, + "GROUP_SIZE_M": 1, + "num_warps": 8, "num_stages": 3 }, "32": { "BLOCK_SIZE_M": 16, - "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_N": 256, "BLOCK_SIZE_K": 256, - "GROUP_SIZE_M": 64, - "num_warps": 4, + "GROUP_SIZE_M": 1, + "num_warps": 8, "num_stages": 3 }, "48": { "BLOCK_SIZE_M": 16, - "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 1, - "num_warps": 4, + "num_warps": 8, "num_stages": 3 }, "64": { "BLOCK_SIZE_M": 16, - "BLOCK_SIZE_N": 64, + "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 256, - "GROUP_SIZE_M": 16, - "num_warps": 4, + "GROUP_SIZE_M": 1, + "num_warps": 8, "num_stages": 3 }, "96": { "BLOCK_SIZE_M": 16, "BLOCK_SIZE_N": 64, "BLOCK_SIZE_K": 256, - "GROUP_SIZE_M": 32, + "GROUP_SIZE_M": 64, "num_warps": 4, "num_stages": 4 }, "128": { "BLOCK_SIZE_M": 32, "BLOCK_SIZE_N": 128, - "BLOCK_SIZE_K": 128, + "BLOCK_SIZE_K": 256, "GROUP_SIZE_M": 1, - "num_warps": 4, + "num_warps": 8, "num_stages": 3 }, "256": { - "BLOCK_SIZE_M": 32, + "BLOCK_SIZE_M": 64, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 128, - "GROUP_SIZE_M": 64, + "GROUP_SIZE_M": 1, "num_warps": 4, "num_stages": 3 }, @@ -100,47 +100,47 @@ "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 1, - "num_warps": 8, - "num_stages": 3 + "num_warps": 4, + "num_stages": 4 }, "1024": { "BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, - "GROUP_SIZE_M": 1, - "num_warps": 8, - "num_stages": 5 + "GROUP_SIZE_M": 64, + "num_warps": 4, + "num_stages": 3 }, "1536": { "BLOCK_SIZE_M": 256, "BLOCK_SIZE_N": 128, - "BLOCK_SIZE_K": 128, + "BLOCK_SIZE_K": 64, "GROUP_SIZE_M": 1, "num_warps": 8, - "num_stages": 3 + "num_stages": 4 }, "2048": { "BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, - "GROUP_SIZE_M": 64, - "num_warps": 8, - "num_stages": 3 + "GROUP_SIZE_M": 16, + "num_warps": 4, + "num_stages": 4 }, "3072": { "BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, "BLOCK_SIZE_K": 64, - "GROUP_SIZE_M": 32, - "num_warps": 8, - "num_stages": 4 + "GROUP_SIZE_M": 16, + "num_warps": 4, + "num_stages": 3 }, "4096": { - "BLOCK_SIZE_M": 256, + "BLOCK_SIZE_M": 128, "BLOCK_SIZE_N": 128, - "BLOCK_SIZE_K": 128, - "GROUP_SIZE_M": 1, - "num_warps": 8, + "BLOCK_SIZE_K": 64, + "GROUP_SIZE_M": 16, + "num_warps": 4, "num_stages": 3 } } \ No newline at end of file From 099aaee53673500ac6a1a4282a9f34ee015cd23b Mon Sep 17 00:00:00 2001 From: Sergio Paniego Blanco Date: Tue, 30 Sep 2025 16:35:06 +0200 Subject: [PATCH 509/518] Add Hugging Face Inference Endpoints guide to Deployment docs (#25886) Signed-off-by: sergiopaniego Signed-off-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> Co-authored-by: Harry Mellor <19981378+hmellor@users.noreply.github.com> --- .../hf-inference-endpoints-catalog.png | Bin 0 -> 641771 bytes .../hf-inference-endpoints-choose-infra.png | Bin 0 -> 358264 bytes ...nference-endpoints-click-deploy-button.png | Bin 0 -> 833513 bytes ...nference-endpoints-configure-container.png | Bin 0 -> 273739 bytes ...hf-inference-endpoints-create-endpoint.png | Bin 0 -> 362703 bytes ...ference-endpoints-locate-deploy-button.png | Bin 0 -> 799424 bytes .../hf-inference-endpoints-new-endpoint.png | Bin 0 -> 52361 bytes ...hf-inference-endpoints-select-hardware.png | Bin 0 -> 367803 bytes .../hf-inference-endpoints-select-model.png | Bin 0 -> 83638 bytes .../frameworks/hf_inference_endpoints.md | 170 ++++++++++++++++++ 10 files changed, 170 insertions(+) create mode 100644 docs/assets/deployment/hf-inference-endpoints-catalog.png create mode 100644 docs/assets/deployment/hf-inference-endpoints-choose-infra.png create mode 100644 docs/assets/deployment/hf-inference-endpoints-click-deploy-button.png create mode 100644 docs/assets/deployment/hf-inference-endpoints-configure-container.png create mode 100644 docs/assets/deployment/hf-inference-endpoints-create-endpoint.png create mode 100644 docs/assets/deployment/hf-inference-endpoints-locate-deploy-button.png create mode 100644 docs/assets/deployment/hf-inference-endpoints-new-endpoint.png create mode 100644 docs/assets/deployment/hf-inference-endpoints-select-hardware.png create mode 100644 docs/assets/deployment/hf-inference-endpoints-select-model.png create mode 100644 docs/deployment/frameworks/hf_inference_endpoints.md diff --git a/docs/assets/deployment/hf-inference-endpoints-catalog.png b/docs/assets/deployment/hf-inference-endpoints-catalog.png new file mode 100644 index 0000000000000000000000000000000000000000..a26681eec7b33bcdfddb335d31e92910062b82c1 GIT binary patch literal 641771 zcmbrlcR&+O*Eb9zqH+nMfFex+rAig)AOg~RuL0>b(rZ8j1*It+Y0`V|H3HH@FQJ1# zXrTuPAtd>t-uF|!_xgDy@;cp`= zlOFOD^ME-q$MEIb3oLI-2FK%FC5J{8Vy9%cM}3Q?5fIf`}@Q+R>GB z`8|QrMbxr4YH?8t=f58`f@noX6TMyPPEvB$R3$iOK~>LuCYWAdr=%_-`5Y|y@Ovf^ z;9-(mb`CQER~Ke^5bj8zd6q;UZ-k@6Rmdr`#gY=d$bO`DZ-ChMHtssX64U2_W5S0@ zkARY#fZ`wUhtprANgkDJuBuSEXs&utwLi1F%Q*P{oPxm0QxcGP%|YH$V5Fuq=HZKZh`eoxwo$a&?fN1mh@hBF-kw$G2TJ4U528&D0JKO*@A8#lqG z;RNV$E$c9j575dFx)RFN4@x3wHuf}!Bi}sM3p+1kk>J1i+L=JZgp00L|Fumb(=%!7 zn7QlgCdmTdLw&Nvg`$Oi^xi}T|GZ86GVM`+ROJ0?_JP|ZoX+Oh(VMQ4JPfZT!X0Ig zzcoHey<1IF)de>j{Ol?sbE)0#G)j_(wUtx5BA*vyZ~M_l<|5hBLNDOX51nu#(jOnc z+;)6H$3F6rl(AazDP;8(*?b>m)qnxl!^Dp)j|e$HrQ$NwbYfOuF?C5gF)t=%NCaJH zI?C`UtJuUM=IW(1&VtJzQ=wDw{xn5DtNdTROq9geieC5jGbz4I`_y}yiz2KlZfZ<_ zCgj)0KzE8$n`UBBJFB>Qr`Pkw{=ujo%N2-l^e zUMHmaglgB3J+!n_DT*axkVzxyl;*yyxUUl%>GSzIgY>IQ@|g3=o92HtRdV#=B%~mL z7Q&xD-hWNm#aX@8mH6Y~>xX>t_f=}UZ@mF5(8jzilPVn+rbW_4uSH1wCEezTqs~{y?%KsX@=Nrm{dFyi)##o@}2)-z$X6yANos zXOL5iqwdhxx9f{um6c|u$ExDh=rx03wP6c}-uaT`4&=>FCv@ji z)GdO{H6kJ+DbUZ(XredYi6rkt2mExJIh@MA_PvSu%KOq6-9HCD5M-~7ReB?3dKj?gMLUH%Uohtb>is!2lS07O*aK%REK1vC{ zs`(J{`LO~`^|d0|`bTwrgzMMc?uf*AJQVtZRv|5?>lMmgI)mp#>d+1clXX*jlTABCJ2y|Mc3LLY3zv0*HPw;SD>^pP$YeY#C|@yzd0TUuav;FqNm#j)D)b%$fY zdv<#E3}db3vlZQ=U%yf1B^1<|I5tfCmKDsgICJhjbk6dsI`i7oo+GEZ-<8&3ef_@j zeX;wt_ZLNF=^g1a)aunzhRxEw(j`ruP4yRL7iXJwo8>)QlJ8G^>bKnB*XJkaui#sb z;d{_U!~CGeNd=QU%m+N_*+IcSo+zE3ZJSQ{SB#94kB^K2%9aa*paepd0^xkg)|K5W zk5j>wldgm)`wU`P1Wh{z118nBkz1-n5>go^siPvJ4}*A)=JD3|@LL zGwO8dWHYBTZ&LMmuG-n%Nod}zT4rxyw|cjDmwPVfZ2#;vdgzRP?fRN{uVRmGOw0ba z{qL@9ZwlQT5BfYJ-*CA(c?9q+y$%>SpIRG<#_g_e#KUX5w$HBa-LSa(>B%$o^NHDE z!xa-)K==ENJG<4RdHt}wlD3jQ&j|P}p%3#c4S6nQE}07w3)cIm`w|S)4E7AE;^j4- zGh@53UH6QuBLpMwGoEG~v@WzAw~_f_e(tTLueHsiw`JiUvp$k{F>HX4ef80HE@C8{gSRlI8!b-w=-|K&n z3a^a#EcZV_7NhE;GiWf{fAKvL&F9!~-3^`bt+HLa z?>e#O)!P5h#OGpiw8|~NTOl(h}Cdl(B#$(653P9`fMovgV4L1H@eYJI;E=?s%tRf-y3C zY9Z;?Vp4mtyF0}n%S24!n#L{jZ;%6`wQ8Es8dlGdytZKf)E)YrhCR!D&Ux}P&NC^o z&t5BY9fu)hVKZSnLA95z6Ba!`$FJpDsL22<^(^(Z-L!kZR5F(hEe(YQxn0`pN7yS* zX)fk)#%RV+G13Z~`LyCfPh=1JA#vo!1r1A*f~d)a3BH2)kA2X{LV^^-ab zoDXRvX<)p`AQ{9GbW+uF&bQ}^x*Nw@r=Ff!e{vOR;`bb8h;-jZkheVsJ7U_m-w#DF z;tp{+Wk5FoQh9bTSSkrT9#jYCfr5?DIp~A7VSmg|)oo);gW$o;!En2}A8x&?G$uEO zD$_|yFo+BnVEYhg{$e2Mh(wB*_nY{f6~W;qH-TM00q;uF7g0G+Njk3Z9Tl<&%O>dlQ?aF)Ty?g zndrHYoaYnFz`>Y=Msf4IF$|c1#w3JhHNGE#S1&$U8z|bSsuFPE!`BI}5<0o8eC!VX^7(e<-?>R4 z--!MlzD9{pBaqgXQB=g=wJqJOtzF#hfF2OB;#NH2#v27gcLD|n_YuVhovl4A9{M;txwwn^NHG1*A&L+GBJ(gk z{GG+aQG&@pRpX%y(9Qbcb8cR4UM9(#4<9}hceAn))slVnuj2Ta1e2YI#~V=|9&c}N zZf^l@pqniZpNNPE4=+CtKmSvFj;HRvE*=&>PhH%Z|5WntdStELE#2(jc-RA79{#G= z;x*9ILxPFvS402x`_oTrAN&7q$;JI&kA;6go?j_EeB8V||5Z1>sQ53csD{0dwUeQ& zy)%By@NG!)2?{Dv;gu|J&fCIu$h*del}>)T62=mX5(0yjklo?cBBZps zx4!;4db()U7w%R5EFm=X3ftRXr-)84y%^HyX@yCV3E3Xu&?`d97ysxCPrTCfCOIat zHa_Ba)t{+uuJC43acGm={6_Fk&UF>yH_0O@2T^)Czsu)xzU>mv|J~_7UH^Okx0ML# zHe}elJbyo5FkKb_8A4=La7^JJK5rj2v0Gk#em-m09!WTF=2J9fAtkW}0kOtIt(*75Py-1A za>WV*(*Ni+;a{$>9Oc7KaWOU%F*Y|NEBiTR*#M^1yA;d9kh>NP4Bv@-zBm8V{tBgwglf{UnU4n|LNPnKdqxl{o+@;4*_}PmESo99vCqGj;{o3J`7-(q1dQHZS|_G(n(a3 z2s{XwAocqerb!^#L(SMmvEeIKbsFfy*h;>kF5q98@w>`@+Su=In7<-||LP2X7m<3X zuW2aX<7mmamn0tMCf|vLZx7O4XRyd{9c;8-=s9le#y{z9obSzlz+4YYgkOZBPE>`i ziP?~oZA$)?6{b$OOXxp35vw>O<1|Y|v0(`Z@k;tI9*#q(v9Q*r&U5nsUP*|ZdBDF1 z=YREgg;1RL=I<1I77zhBKK_>E6(RmfR)T46mHvqzerNlg!khEx@>JR{)E5_W`SK4N z2dCwJA2vPGbLZ6MN{#!fYOIOp_kVO&zaWJ1W+o;oI}0i(Yx}E}GH#RnVw=kP3|`3< zqJYRfREKPGg-%r(@PQ@v_3wj!=H0*8;`>E74jh65hn*6@38#rlNesv<8O$ZV`@Q{7 z2{_a#eh$=_>tE6R!|Sr@giH{5bz`M}vbF|mYX<+weTLeOv8;BP;G3<)BsdASOlqvs3n#%lvDnzAh(v751YLoSiG@k4Z>s?G3wr+AUbf#*)Hgw%n5 z!U7NF*(+V#V*?rwzd;QBgyrkR3WVyNCpyQotoe$xB>?r_j~6W@CE}xIvpY50?#qX> zp1dI6m7XO?drMl?-<{raW)x?SU%#ugd)N_F2&%uiX8(JgrzUysqMlfUz<%M;qto2X z-PKwd=y{)q5bVyiNw_S&F}oI~5Q^IMLr?zMFmF>3hx0O0(6C?qCycK~L;Pe|+N+!Bu|h2NzXmR zy|sv=?(=s|YSax4x4tPtB=V6~F^WmRFzrr1d-~B^>k-n4g z+=GbxNxskE6Z4yNz5;tE680y#i|Bwt@_QKWKBFXhqw|L+WyBt0RsA0IG_^O7N`61v0@_gZb*m14pf znAhTv9q3A?Ny`|BdMTr!RL{)C7t}}mp@?Y-My34gm__Ej^tn#{^O1OH*r7&Ha60Pk z|2jVZ_focge)@fV_69#H?yBHdMG@8Dawt`j|2$R9g&)fkW(bd2s{!>>BbJN%Y7R@S zO@A?n3#XN?|B*a|yD9PglTDWDHsf0SC&EbZ8rusi)Y|oUA{Se^T$Bkrdy#$n)7MHA zoX>z~^u~2Ak0TOt0&KJq)K7EYyJaxurf*I~XYTHkpO$!k_y0~-lfoIr zrKRGGw{GQIO7>3n{z>o;rtJ5iP@>F0cY_bAOF&f2k^ZorxjA&)?KJOwP7hG5v-PJX zTT`|#Kt7g{-(yQrWKe9r?aj}ZW15@k>|ghwryA|RKv#$T%P2fYZ!T-xK!2B1>yeDU zXTgB(Y**`PuVmaMLEjMcufe|m&4pnaH@}md6EMDvBAfohmgjg06;D`^DIDv#_{GM* zQrYnYqJZmyq6SjG2`e&e#CU=*73vVZ!Lm))AQnICE#0F3QsO76Kgir&k&G7_ zS=C+iH`UH$<-Y=WgMwq;ykd{u%K8{XZ%#7P!7fA=&Ra3_!8Fs&YbfBosX)15LkHo% z*RB6_5qS8Fr1{n|v*U9HZ5oAwPggu0H8~8WQsb}un(Sd3IU%Qi2=(W@t~T#f zQqpfXYcBLx-SlWHZ(Nu!Z(PJG9O=s?t7}?sXE}X>G_S38E9dH#BzhGvVVEC37Bs#{ zF!j>{p3U$z=lFXa-fcn5FP1MJO`m91y`=Fx0c^HeEe1sI&(N<$9bNm|Vtw$;>i5;| z2(BQGWzAe)#KtuK8gi@6oa)(Abn9JNEGEy@)(vh5)BF5Y_qcdpF1n;iOG|sAbQK~j z*lpY4YhvP&J?Kbex77Nc%b@WE?xa6~7*oYDKzN#$mp{&{kMv1!My~kE(5}K!ehM^!CoL)%iv3xXsxY%x-=u=Xn65 zmkcmyH*7Z<7CQ)F2S|gcucG^7k+w__{ZfPH$~z%3+aIDpmcW5)_wfRS9i%`>x98p( zY5U;;A66nCmsK!cbbKJOLAu0x2J!h|dx`!SB$cne3ppqS#^2&)xKCahlW;3Hm2mBm z>Gc!yK#k|$eayvqrPyc{got`%ns0A)Tz+$UdxWRr(>}XgiwyI4)H!O|zuL`j)u+Ly zc5KvTZhyGaF>MWk*k6!kG^)27xUv{DGAUHH| zmXyCUax^{xzezHb?cK>`P^qyfka3OfH0bMmjcvn%ySh+{5YR(uKPV-nZ1C&0HGv=#R>-FMZgJhF! z1JOKl5^&mGh8Ekv_kVXw{fmT}$BaoPj7&57I3kMFct(EjnUvkAd4km0sY!!~VKW(X zbHEIb5Q%LXe`V`rQHIoOuM}sBjWDuX^rn&~wy3-#^}#!*tOlQ8*AX?gLt4>r|6LpX z5Tx?%WAk(eniMgasnEbG0E6|%%}=QeTC@X~3!_L%%ylhUdQvYm!Y0f7>q`OPrKvCp zf27hZzxC^v%L->g0tZ50Pw!etE`DaV8ev5TZjNN~j6~kIhTleWdns{yoP6sG_f@|`oGlwIo{1P%a;k$p1kA;ANrM?6XVI z3!2fU+QC5e9!UubJbYlQORHTV0)WNz9TSj127xNZ`<2Z$%8peWK!2YV$kA zuOxZ{S#>U>7yJ`}-qNhoIQ$;kH=d`w$23>x$jTUao&{n!U;8mYetOyJi&$@lmy@=p z2j;cArIGEcFs59{=%w92yC%t@jcV=otJxbQvqmyNXNKF*1HJ@l+`V)ScE=NmK)mqi z3@Drvl+x1H&W&`7R98@VV{P2#SGPJWvSV~|ve@Vv$7NVOQ{G_F^?_m9dI()Zd;5_2 z;`J}RLtnYb2A*t;Kn{nVtHMdnK7w9n?(_W=x5Vp; z_Zf#Neqx7y>x~PRoxVN4Lut&NnAD>Ve7LbU6cPD5BrG3_7Fr9zhu_59*rfRrioT8n z_K@JiKN;>03*bY{UcK@qdS$vgD7nOr=la7(b4R9(81`C>gOpi%Bu_=4OUFB6tb7?D zD-&rAX-1Qf7Yyi<@1NB2h!)6^ZYHko#Ha-^Dt%#=7pM;2s(lojm&pA^{^rKSfsz#z zYXw8x?;p?6pWdv{Nqqetv`1#9e129yx1noi6(>a+=DB^+Ny^h_>b?9_ ztp4Eh56h3^E7{jHf%0B6soFU0s=zionr=UY_^hXH*Hxh z!|Io6Jnub4=fp_HkLAb@LIFX}U>QV?*WO&>n(s(P8;ib+;0xwwYH@L~Q&;xG*Zb4u zZ{E{H*o6)V!Xg=khSzwP`Fh)uN-K?X?#Wvp&5GkRz=2m;p!nM&XSAO#sLA`r@XCpWp6{`K?7)Fbwbes?Dy+7En#)i8!a1~9V2eoUsAHQ%f)ITwc2fUIo+bVL8#*}?TmvPK`28Ic5BPW z@W?US-bW`!7$UE`R7na9aFO;s`pF-CnbH7u`pCOnTsJpWZTBUI2%Gs|d+*0rNytPf zenp|d91pg;0#63BbD&KkC3vZ#SV!}0ch*wb6-Pwg$H@bJ?bFK_u>{^5q3O72rPvru z;^FBEBX3ytEMz?0nM%62H#L&zx-#{sKt36H##%Dmg52Vcd!kkg;G@_`p!5ULbsSM` zACFIlna(ZFsBJ(poWTunV54;NqI=9_p`o_d!QA8J07TRl&s!E276adQvv!jQfCXVD zjGyZy+cvZ!Q+Gc&2>a!=T!dV%NzPq795#=I_^D9&kmV))M=)YAmYHDbg$)R;SkxwL zOcJzCfp2Mff$2zym%GOrN6JOPsVDIBQ^58%=ko8usd-1GbqU z8-%PF1gZ07uxk(Kp!8`sDyCtnpuYD_W<*{heeOK%Y=m|X3hp8OdywNVt)-wBL5!@mi7$Y3{~>G6x?0^340KbeGg9S z4i+C6Q7(`k*alS|6CbyOCrm?j4RE9pJ_we)&Nok%oA|i zjS}%C^sHyQgKD<;_6ACrZB^R-|V*XE+gGkduWw z>>}4HiUp@lPEw{w1+_=gi>W1Y89Iqwz@H1*yA_@6W8Rv0>e9k&D~?Lyz>CU>T)Li7 zy3hcm-`TE3hood+CI>LGH9t*TH5X*+o%8D6U^vBy!eaAg;{e|X9Pc$1Mvmjm3^@L% zk}j&NS8ih8pbQ`TCe7T0+K6^ri`v_}oX(jB*vA%UD~)90bi*YDH9a+WX%| z#Fk6!W7wtE^+{E(g-Pzasa(qpG;+jbsGubGjEv55mb)D*Fg)R2%<*2fCARNW-(!!# z=zV`0kz-%fY6Oqd;tsTer_30@tuBcQoh;JsV{iSgxCIxLsUGw_#ZY!eaCLf9-=-{de-$sAcLF zo93NQcxNF^$gP%&re;sL9twk8&QnRLcHl~X{GRk4A9+a*T129}u5HI-*(413%4GR#|ib zs;vj9`%9j`o+{MTF4nI|%m5^Pi?ffxx7*?fRWroP)|6}-FNQ@nO{VI{cRuXLC@yV6 zM3A7H_a$28Pox>k-a@${B4@!MbH+&WKR6=^9>8cd;oba&Opu(L`>aTG{vxe}B#~~= z{Xu#DB5#%d19Kd@b)|IAM--W$+=`hpAX37*(+gkXUvM8SQS1*73a>5R5N9DsBOS}(4mGH{sqZp~P z1AIKXClZ27=vI(G(X9#_0|BrJ1+JzG1g$ z^9h!eT$*6i`u-4*`{hnaMeh6Y*?DsJb|sO%W!GN%`W&$Du)ouMeOaVF?StaNsH?9= zFvaSoox5>fTO$P*Ry(&g?-yN19I)+q$U4TY#(Vko&K%XEwd|ov_O=&r@ty0JT~pUS zKDxNe{YY!7SeIx?wemV*vBf({G49E1o?W_a(XGFRufxxVP8gE_flXV13GAk#cIa^} zZR;O3BiHJbfrb-HfyOQ^r_f!P?YAMm1HqUl zJgtZC>Uh0en8rr7qNomv+wF$^m)(UaHU#fIDXtRNa*pInEl@FjP*{CB3@xc>>ZzYM zZ;qrFQZr+a5MA-rjuvrfchbY0)yKIbMjl3Y?2@9tb61ign;9fViMTuE{sZ9)zq=9& z-=?LzpSaJPsd@E0l4+Dj6I(Essa0SFo1ockdmsfc4f86Fd4DA3F&>C|-A%D}hw^l- zUlan+DR}$WmeDJYgj3M1@3SjX^(_5smTT7X0d}~JR309VHqrR_cf=>O1Hux|94Z3P z^J9MrSg-RgXG=fwt=Bh%qaMYb^{d10!1Ck=l%ocGbqVZ(s(NF`bCIO}uuUoc&DsN= z?H_@rV~wBx+M`~;2_$1aR6`$9Y^cgWn7>Gj#Pv4Fbj0t7acuhsV{u$NdnLS>qz)K&h$KWL9n1h1oXt3QFlV_|l7R4$pcMZq_hgOjH-P`R1 zAx|~N&M*{Xo*CN{%dR4ju#!|SF6>@|Ajdm5^H8L^WD5J6d58WZW&Y!n0{y7pHk)4AA#yVE1)6dQOg`)-%t zQi`IyDU@P==;im>c5c(=!XY-GaEW8K>JO$u%f$fk^v=j8=n5S^EGnL3!e}49t&y0uGt=Yelg9fL7DCIRi67Xya%bZ zmQ8K+qP0mW0=7o{J3K7nC!<}gr3caU^RWC>msl!Jy>m19aPa9pqLXO#a6;c1oX?+LE7mFE1MAtNIW6&wG{&sgxPk=z-{SvzzxB$+MUtXa*_2lknFk>BmRC zYOk6;)vI0m>wa=O?X)xSf|LbwA@L+f9&9D$AwG=0u&!TQZqQ9>K2A@g~!-9`GPUVxJz^P?M`p->xFnqe?3=Ihl0!S4ykJ1+S zt*OT^UT&S8nBi@G=bB?>(ef)5ajwbW9937}m5n=OF~#4nJuV18p`l+G!(`1X^*ifR z)-fJ~CD5_(UJckfcKbLIQz77%3|y@IEWS9g>EGbsoIP1$`r`Q712kY#% zUg)Abo!cyk=+DJc(Ixm&-8c}}cXLzvda9>_Wn)Ba+$eutQ;o?hx^Qo;WeIHLiVAvS zSAXp>XMbx?!?iZ=y-!H1sbG5P169!4t!)o^)@7e)H=JOSQLU4O9M$tEduVw&Bsq(2 z<1`@643{{>(HT(o^@;MRB^!zK-ZybPzFZr$CPzTs3cI9l}b{iowKbt$7$ z+y&gB>97;;jy(+a(AlXOE@sA~XP<@QZH6cFRcTWMyvr8@7h@g>E|4LHFNc?7#E-)x z=}_;cmWDYvj%)2F(i-H%s^OA$wwq1voBcae;KqF(<9Zd5S-@+!5XRIy-b3HUGdo?} zQ%OOgGI|&+iMu4V2l+bcO9cXOEos`&&C7jANf5L|e|awL8@+y1dxdK9L!=$++6i*D z3E_J2O#O5_=^?0D_LN=4-)YQbcCtFp3vq%@7PnUF+a#rCV|%QWc)d{yL!Y;p?!XnQ zK;wXXKduHU?>;eROtL+EPlTBY4Iv?Cmp{{$cJx=1%h*TQG3xs+vF#6&c+Ni%bYKCq zJsoO!v-C@s9N+)oRet$;vy%w`%iwfiF_ONOWhl3cgxIrrS_ zyZXC%+RwF&>hxL!)istaKlZLKxQ|pZl|Xf!cE)lFUY`+b)!JzqW;0DoU5?6}aZYP) zRWx4SR8I0P0GD{Zf1+^RIa z(`TrC^uapKY4`P-qV0ax%y>M`IzIw;s|DeE2rRjE(1vUGuv_l%1OqcA2WCOFi?4=o zcmZty4?(WqJ?#$1-{+9;hDMqh(?u68U?S?yAI3Iqr$m(RU`5zK7vfBK!KT?$9e6Q5 zcIjS3PqcL8a51Q<==ttUJ}@t(+TG&?$?jX%zvowGI27C+L9Oh#2h1H0phC6z)@(>) z?yr96QAAJ=g)GI;Qrn`)xB267Z$>w}%2+wk%jL`5v;HsYgd((T=r=~Ba^AL{>G+b8 zHy8Z^Sj95wW9w4yz_Oc(6PN?|=4m9@MO5d`W*_0`CZ*Lt+i6B9>tljU_eL+c?_O7s z)y~4zQx$HS((F=ztE&a)+L)z1ze^7x9GQI()3*c`n0{gs<_qAI^+0jzG1f6bg~VIF z)oQ7oK4}B5d~Oxq1G+yrwQP#@wb~%%D1<9sh~NdP0SmLXug^}wCN#Ij?;z_YY)bT= zhJ3nP-gQ~~RDW=zDJ{gazu$P)g8mRC&}bc*!Bsphc%oQTjIMhMnmH}*1- zqu^B>SC}sh;HNjcI14KC@>+A?|&~o9PBTp{BN#m_fEnDFgu>ZRLPmiz(#Ia2z62= zAH%@!Fd3os&74p<^T&@IH>jz1$*pUQGqa-J`%H09)T{U5XR4W!i&Pd%R+FbYeG8ju z?&Z08dByGITEkt;5NG(-gw^MZJw`*Q*}R@4dRJRtlD^6W=q8LgG1~FtR@A}xn8`6L z*E^@Sp3PlFqwhp>UHai$E!2o%^5YERKA+v#^wnCU*CWo$(-EMvqV!Yc9aW-?6TEqDu&iMxN=V7unpJe5 z4yslgcZ^?ei^T&mepjRLk`Ry4#g4+F+$T!cQ*%f%-!{PQO-#^G5?)R1BbPd-TRoMb z7xRioF8MFl^DHhS6GCdd;seop-nq38lGHG&L2dFAQU{*y40~woFBX)L z>)P>*GBs{&51%g6YI3784$4VF-Y*SkyX!HS_;ldWC0T!2`@(9PR2s$7fktY_k_JHI z&60QYAQ1`nOLq6C=6(1@gY!H$ugwAAHcTlHfJ#`B(KEEwmt=sjJr zR|Evil$HZHf#TG# z_ZNpKFk_79L&QKNsk_y0!11N)G9?*eEPRS+a%od>8MbD%7qU$@BY}zcfg=Spjt55Q`3Ed?V5D>0t}JIE<)Vj7R4ppf=YI2vFadEQ)g* z^=FV9x>9z>LKbyg7CtUp1BJA}1Y?t~phQ z=ecWoEH#5@Ws$Z_Pd(=rF3eg^iU91Qet>=Xi#ilHDnmZd=(@JsYB>emd**a3!pxqk zBDCa_qY{KPt(%tsz?#r^V`mj>%5#lHDbeFg=jlI;n1qP-m;D#xi;mdy6QH$_thsPljAjri&i>RwzX2E{LrD0~?(tvj zShkVEo7zW5D*#e7S^f(y2AAInd6+wY(;baIF<-MXUFh!_Gc^dL)3$TnC(=aU+U2(J291poJb}J5)Rcj z^V@!0f)^ja0JA_C6TeQz1eqNj9rds8tT6Um*`BwVsmVUL|K6@X2@2@7tfx3-sh$^;S;3Vsa1p=`40I@PnoZbk z`GQ5#JAYC>NCl0krMqk3fhn@IfJ@5quGVOsp!83E2dcw;NF1F~LJ0$84taFS0^<#e z4Hy`|j_6ulwc65HHb4DDgWuX4!8-%NxvEROFe|!^5V@684%&UPs|W5I3#is;;f$1D z?hhl0j^$0SP8ZwwKayrUYH7u;PY+LTQ)k1;SvxPc#&|w@tq3ZLEyJ&`44h@oU`3@I{e zBQ3k$xe;7eyfCm0yB5I<))&c4;?^~?_J+bEYfA^mzf5ipPoL-Mg4`-)n0V4lUW-iHkloDT=*_lnp;lKh3lY^4XN0#^;nm4!3$byD4>7hL*Eg!zaXOL@7WahL^T!e{?L6ZBl>m5I!}xL>yXJwHc4n6SQfDy^?}yOPKDq+tOuXxo z!f#!MRs-pmU+Qr;t>3ZL^F8h_(4q9-i--H;QlfY>4MCo5tPd=(A6XgjXs>+p#nE9` zOdp1Pfv3)4XXw^G^7U}YHhhw`rH~P5m`U+7WqJBUX44anW0fuWvN@Ky{W@aIDKE#VO-xQ6M3&g;nVeal7v&_`%3?rq>=Htor#t&V`rv8gGD|}M z(sk4yuPE8Qy=y-jNzq(LzmZhLw)UhB52DLeU`Nx8dhxahg)$>SQ>e`lKN+HYkRJ~! z`5tyGl}E_OJW*O|b+O(1OtC?)ezuJ$bn^?Eb(LG+NCkSOr^ey&&=xN`&#h7ECI17E zyD{E8?0>TL$g(%O=0GfHtJ>p-o2|q6x>k{qZqTboyr8pL+w#7{ImHva5?I6U5QY*v z*j@KikpIS`J3w+{Gb+_(hmA34w;=*=UmLtck?wMV&}U!9a!#KR1kDv>%ONLY1a&)VUzB5+qziMMEr-*Z2pgD-7DuzNY$N$hdwq^_Awu`)XT;aKjOdVGO@#m0tWvqRN9;TdJ^l$A^E%hlG(sN=zk zUH1d>gY%}n`uWm6=o5gjb3|@?^HPp{h-`9@oLurj9utH_5pTQFi^yF0%LR`%UR=ETtKiw``@5S@zJjrH5B)4PZQ{HdGldSc@iRLX?-ul< zTUI-tzJJL?L%wacJDL{^kW1zu7As>#}};px$R8pPu2>m3uP{ zY3{KZcTX0~W8LZK^#H|vnE;S8lfuPOFJo@j#Xz{4zL{^{lzQAyzjIqwi4qk~JWFtz z4vHx;ZK-z~1~@XTh5u~_Y7ryC0&*1o_0BwZ~uc;X_Gvlz5(N{I4DbY z`zooR<@WVi8xNdrgLHlHF=-& zm|wWC%gWugcS|diZZ;fM`6NgjX;pm%OxJX=`|SC~vI$p0VMw zE*YYSeNMcD8A2Q8J0S@=@PeIOV$p%DLASi<-!DXxR4~k;@X8#n{s7LO1``SA4yM$aZfFl*bnGBsB?s;MW_Go*t0zs!$dy9Bz*UR&!R-Nfr>l^J1`yh-$gJ#Z zzXSc+hg)%2bqQ-2a=IYLKwS$!{${#>k$>}a@*aU6?l{79$yzmaAX_eC7u_+JpN<+r zTy!YxlnmC;ZA4IxN$owef!yETev^#XnFOL}#q=2bwjPOrk@Abpp4O2Amee9ko0dlb z(0z|E)JKX`k$k$<{Lniw#nwrQvM+nM8%a^aa{eZ5wy_;aTj^J=59bv)|F5sw}A)(-FbkC3p*}TLwI3!^BjTZl3K79L~^6ziaR{r&E*X>Uv zYK$g(B?X|?Xj>j-oo|kJt#qX(UP|084sZ@>Zj}=i_G+%EI4f}4c#|w;1{!N#E1gj&4yCeURH?DH4QpR$46Z6r zsMJ`a=j5p^Mom#Su)*?d;5akJX!DPVSrmGTmd$D1vH zpJ!cbS!S58y|=xueYMZ$Vn;eBXcB7l67hELxh5-`5_)YSRR%1i%KtjpdVCx*dJ1zX zqS37K+hnkISRQJjMj`#8FSZb9n41;UZAyFE^6yObU!%#-j9 zZNK6X#ov9e(PiOLsuRM)HMtSa{{^lU{L7|t?o`hN1C-4 z4aV?|4K5frwmptUl$wPV`}HRbg2<>+$0eov*QcZr!fHK$uOO{D{39$po z4F25Z>`g?``t3$52;X21G{rO8EcF9>7=3o(CzU)hM% zwTh(Z7c4nZvhnKORJCKllhwN=2xv9uI*=|6@7&nc4x9(MUX@(}DXnyW*G zGjVe(e*fh*$XrXRz1tetbwW0OeRBqj<}@8V2xM<=u@q zPF9i5;rtjS7BLO6cq@}=Ryn55mB>XsR+lKb!h!5Dq-xEpeuIjq!}|N5a_D46(Nzng z>@fw&_DkZS)~4@wYK7`@Ku$o=am~iDlg?VI%WonWRAo1$zBzLLpDo!tDph8=lc+ku?|87w zhA)(s85Z&@B=R-+%jaWv#!Sb{EsYmglis-{MR00_in{J6Nf+I%VrwzQc-YFmbMDut z|DY?%)JJokMYDTwf6;m#`%uzw{u)2^C zDR=vZv{zdL-77pbTcOxiaco3LgM?3Xi>0f;w&jh=xN}wevy#zCqNYKm)iWnoF3>j| zGzN=>K3w`ZqkxZpc@9JjxC_@tO~(~c7Q^{rW2TAJ1GQTjZ*;A;-ETLK1*SK{6srhn zb4E+ATj===o6oJp?i7pP|z zDQGSa)Um1_oYwR#+~6|;No3om3u=}F!7(<`XnQr8BfyvDcsW|(BY80T)M&TyC8aR9 zn5Xj+Zh`@~vGYg-z0P1fzxi0K#TEa2Z>iTN_-`(NU?xAa&5>v)oiY|f#0#{Y(C9OQ zfAf1I>54whs4rzy{hp>CQtzBCpYKJ=^a@!|3YRvzeO6h>w6eNB9-JpYCYn8)qk)hC zrRnO_y-NjkHiXT$QpbmbJUtF7UXF5=nGbG4U>*Q8q6@2Bh~2UvY=Zh%EyYRa1I5WI z5Z#W7eE04|b?5c=HrskHZ`dY?k+J}cLJ^2!O9jzIxZz4AGV9!D7BxOSCL0COW4%ZK zn5sJvZUlA5T*${FC3_qD7$%L&t5W?ul^9cX&vmvX*K~Dugx3b;A}7!5%hm{Yq#}EN zxp7CN3@GawG;4NQTx^pJ*mJ}!A`jfp)In-p_`4CQl6Ji)KKAuo5^9L`);X9SybqSf zF*b<+At62a6XtpacZhnRAye-JoDZJ=JdJ{CclUV5#?$uY9aPTen()zPnC(e2ROUmi|rbAeu@09S#KjCD-ceqH=}~ zYWakr)eaxZQkwAgelJEr>Ml%_B9IsD;mNA&aWe_>dNHIpz5nHvU{J3%s<;z^UO{*s zMgWw6e(YVWHQ~ijUN#rBc7iV-_$6|F7n_bJ3hrYiqlalb_qV6xeGm6BJiWb(2|L&| zGg5Def3JW-%n4em23PbvD0|?3`-X-G$ifMsN;zzS z>dN$NYo64%#tTU?c6jC05u+#o=B<$8)3Mp(;6U-yyvf%Bkj~?H-ZQ*VU}gYg0z-2D z2SENR39u-Bp!3E4Ibi;+^t!l6vBe={!oJD^lVEN9JOzz__DObMoc8o^ToQq^zF0kRQT6PZP<%88G+&sD^kN_XUa{|iqoPAJdaCww&4q^Hnaue4ZEGn%R+X&n$IIy}PPAEujo!&gv*t2kbaw5wm4@ zc_aK290%>vB5%?X4hY=&XvP&k)zRpta-P?LcaYB?orNg94W4$Bd z;98Sh7QJAQhttYNqLQa$)kJn`zexX7P5Y8A7NOgXYG2~3U(!WY@!@LszsptbE#K`< zo!ji-(fU{kp^yufi^~#oUs>v>$WfBiNP4&!60>qEmNU-wo@d-5GJ|o3A+zpNX?6dn zABU~4yMHPqj;s%J3johx@b71$9Do%ac2q%|Utp${T0DG1@=W#SlM4%ssM#FBYyQ;^ zvWJR7>8`6xLhL%-_hw&59V~rGaNvFUd5n(F=&#&v3bE+xg5`C$li{CJJV>8o>Rr*C zQq$Y?tJ@^^FvD7wy4hn3-)@EOE!(OMzKns?*%=1@Mft&X^RiZrzWc)jS5#{;wkIOr zb?ZYJD3#jLDM){aSyf)S`j&o?pdB77nLr{VBL8<-NKRY;|6SRT@al& zne@abII7>=(S5o@N`Cbk)8oLxH3)vfuCGD~I(*PWd1zIQ(sUKWEoCOnJ;lDi&`$vA z?(sN{DR;E!(n!jugZ*F#A^C2XKqE!x?p4FlD` zdO56L&`IALbe&^BC}15AyVK{ZOeFI#)jn&X*tp~LYp71v^~O;cw@tM%D#LKQKcDtW zXr^mqS$EmnI)^QaNKJX6uB!O(_>B1d3X+bC5f(h6Pc&%;Z-t~x`tT!!00lsL!h^H+ zk1Mbyi|Q!5Z2iyKJGS+>{*{UDG9v0AyA-Vg{mphx!=Nxu;fl9X(TCqOMk8-Mpj*(p zuy8O6-q0e)B>pvdoqpj!QpVP{nDW4>hvD$pKt&Xn&JBfdOobrxO}bJ#K`iS%ID1+n zT+nc@GU&$Y2WbxsNX#0|@9SnMPa~N@ik|%lHp!up^I7&BebaaaQ%j6cTEC-9hO)~! zW%Bgnq~xIhS2)sSIR)22Wzf9PgUkl<=dFBP4%3tQi*aT|{VyrKC=eGS9N^R4aJH~b zY+GiM2aDD{x+O={81tSe50p^GSpaTHbmTC*p2%^fXkHaz2X*qwC3a?J$-Qk9lVS7c zmWrC&%%X_q547{0{O;(s`?_W38ajmrDJv`FAOLiwzy@@R7InPOUJ6s|iokXvkYJ1pW zC=a9iR#61P%mUn#r4BYjF!xBe!-E}jeOH~@y=Bhor{mA2YE$c`*5kL(hvD{;Qbvq< zVhICTDom7Z1kx2w7BoAnV~NeMf zBd(1*u*%u`_blmGO0|pGyPLWdYa}_ZIkm*E^41x+8MBA*d}G{na^IfniHHw{r4Fe^ zAw^?Z;lo|dENeWP#9XUTf8S4CF0XVT9;Q%B@Z69==ZR2!EUDFfgEqrHyFZoNEiq z#~(!(JL7h;jiJ8e#@{kC)swmv(fuxinPx#^+0tm%=Sfqo(^c0&nAxFBaKx{h9}}kZ z^56zNk^4wtJ=0-O+fieM`B;mB zk7lUyr8rK}N$S4PM60U4HqXaznmQ1B}rdd`g=sHceF9@ z+7Kxcq`goh-W`fVS8b~r?3JcAXHY2j$)TJP_vP#nCg^}URE~#iqJI8Xnzz4^CrBxQ zC{ENIPjV?{EZe!>bu!)#k?Gaj==z9ptM`l(>Px&K;|S??!!Garb%tNO9UUZg>HV+|v#Mswr%M*oLNYV->{VijKS-4K5dbm_SK7!;N^H&^;JCk@meT~p$KQ&JB#eAx@HK4y^9`0{6&LxLm87wmnh~#itSFLUn z-o%pfn|$6vz>Q1dVRTIiW|989p4dBD)tgH@cV#4}QKKck{JC^OF6yXLH42^XKt_+nAQyB_yrB!5?0Pw+x$ad{;QS}dURSWh z_kOGhMoyS5_er6?dSVm=b+HG@sbWyKH|~3nvDwew`VgV)%R91l&thZ3f&V5HcYX+6 zvNEKhSeA;ZaR#Ic79;X;}+{tlmV8*C}?rWg6=)#wW~ z`^Ev6l?JCR`K>!lBsdqIXx`s`Q)(Pgg4JYy&DKo=?wH|ro={({dv!<`NS$urw`HW} zOIHJ5Fl-XFe6U{`YNIxdUs}#AfhfARcCWB}al>t<>$$YZ;`z*5GR^I}=u`@-+A1fzulH%hw{TOMf?#?Wt@vz-EN}nZH>+rj| zZdO?J8n92ZcmTue&T>QLxEKifUggo*LEmVm1w0kaPQOn?cddl(o5$y|>-wyKe&E%& z`eef07EY*C^6{c}DZB6=#z$N93)@=Iw( z`Ww4hF8xTo2%DH%{3sJv6H3qBLJr+r5^w^r7TjzcdnP;nVz2$(njSRmYUiaTggcsx z9TiL29j@n!;?n{FRK_JuhRpj&ajXgIKNyjvw=9h?Gt5N8*!MR8%#dmRly0^vbEUXs zt!27H>kHB$ZRuN2N>9CtPP*NQim#QFoPufOX5?0m0r1<3OBkrBo8|S~dXYNUwZg#r z0EF0-dq2HR1EACn2?|j*ki&5asrHUK;>+j_igG6u4<}2GN9A4}_7R55Jnhe>(q6o%c-|z~o>J}ZgI2T15G-}u zjyYVp2#`&|e!NLj;?|cP`_}@45`^tTPhVn@CG2y2c^#5QA7ri7eb^D@Nk0d}_gPj< zvJ}+rLD-*^X5oE=G35%F<{;jrL=l?3j4bOs0jmjZkNpi_ue&j4>j-;s{sKwKTdTC1 z5Ukyp%^s+J=ac-p)qAf~iw(+>F&?8bD}cUhJ!RxxBO&Dp*_puHgx0!Ii{6kLY|a8# zb8F-+-FzO|c>5upP!Y$qb4fN+1M!Em!PsK6ev@9zTL+h6!`?LYviS`PtYk3wdvfjJ z?3P_ADa{|9o9VDog0*<7h}DToIYp8D906rouQ$vI)?KlPNBmEQ0ch~fU{J{$<1*ON zJ_zWfmwg+CGUDdKbZl3(Q`6sfC!-HHT9Q&@@F1ByHlI}%bP&h*XIxHzfbvxi+w@k` zpdv;Xly$P@t(lp+0%Uvq8j5Y}nP>tw<|27!`yd6XbI)HXwHZ*jw7oAoE8TX(*=Wbx z2=}Z@WwZ=Z%F~~#6dC1WdZo~u4nu$xn;~zrt8^k46Lp<)lWd-NWa~23#e4c77;X0h zJ?FZdPqX&6kGOC3M{&w*e3`+2H6929FzW)xN4K89rMW@K@5Oiyl@&`u4vyaRt%)iX zzdxV#NtmOrI9EJ0Gqp&B{%x5AdPOa>%0YZ}tVS+JGyD7U9tZ~)=rq!Ah^oJHMh2Ay zn|g2=wgeV=FPeu4oAssn-QV~c*0->HH&jH+O6vsTY&Qfd^bl6Wc$)PK+!-R`uoiN= z0DhyOX8Vcw2_wDaptOk^S4Zb$N9%0WO8?v`op$fyxBl|0@~oGhO)aKyd^EFG3bQe6 z(FscK-t0oxa`+R}l(wUuHw9EpWv&f#&)?hIPR`Nj604GB{Ws!?cZ7Vys4txgC7!OCV9hA%#rWHLKl9b*036&c;nnQny9){^AYsw zbC_P9IxfySy(JfG!-#I-RCr0<)F}*bSHcMgq|!b5q8cYG9K&~yax`j6qUotd2vhf< zMx7wDC?OUO-|?d&^$wn32Y1Q_fpYyCm(8OJ7nI|~nt8CZJ|iBB;`}5Pd^cFTy9qqC zw{PcLo$nk$zh(|qfhb>bBt-8lW!lbDYRQ^E$#L77k=pbxmav|C6;`JZcXu<-|C+~| z5OU06X4Ukuu!)w~EN{{!rp}hs#8E6mW47aHUwUdwjbcX~?lH}smzRMR9QA;^Dy!E2 zA$t6L8vQ~ed(uurkmc&TRxt@D@1GXI+%YWSi`X6I@q2o^j!C zP5{*`Hca!gDHkzWRK@rIsT0LUo6|j<5*??`42o$bUUkv0YlM30SxSC|sb{C!zOcMTawca|`PpU*ZPUT` z0NRdWVLrHvM}LNbIa8x!Xk?^`d%_Hex2{Vrig^=OUZB9WojWKtITXq)C;K7xPTtOJ z-+;3%@w^CQiKo;$Zgai$20V*lEJ*`Hr zwG2?P1H5SlVc?bI)r!r>h3*+v!?%g&(Ni@&9DFa{;ZB7!!rZI&j1{vWs~eU#?dV1K zKvY}ahn)VoaZxQ@>Wx`n7f3gc^*az(W=CjLh|PFNyWh(HM(X}0zV3MQ6~N6@Pq9k` zd6W7Pr}2|i>_0MSbte6+ZTfF0$mY(AePt(gLf!LBmBEjhn>xFni+!GVxN?`ZO|)_6 zgr&J+ol^&;Ia?Q(C0Yo)h-=65BKWIED;i0J#tfWau` zF3F?6dNcu<&2mW)sH{!7*ygayfYzkRA-gqM@mU4Jfmd%o+*&wqDl03i8gO{eS#z>~shQR~ zdS;7`Trl3B)w33|5t()#7 z-W&^?&woQfuNV=BM~5C9rkzk9C^3|&#K%4#vZsV7=E}TBE{vp4!-4r=IAl zaK5H`ur3)LtZv4t9pnI5@2dt+i&1)Phg>jj2BOYn;uNZ%M!l4EXf`4nfe zSML~|e^uvd5G$cSgc790LI?Q}9$B*#vE2wHR0vBkp<@ntd zk1&>~_N}GNwk#KMsIAmm#|JZH!Lgva?-adU>L>?h(bMqJ%?IE0Yil`pt$$ z`l|=fV=DC8?vojYAQpd9C^Vc!qhmheY=l~kJJi@|yX#=H?YpO){NWc%K*2)^s4(F( zCKQ~|a*&z4HBhY+R1wCUm*~VtrYZv6TBEvnCb>TQ6W~)-N^qoZO9WF6HE;INu^7(# zNKP-(&bTscI-*@uzPK}D7%FIOM5Rcr*{<;CjLzO!oV-h6Ct`$=$^$EH%Vz+^GL!YW zY_{}(`R>AHJ-q510lCw?WBs`yej zm6;bmJ?{3B>s>Q5AN7!p-+g4BPJENEs{mU)fWZIa+jR6|u(RQ4Ix7}$qQ4dVVT1m( ztJ_kO%I1)6|79jO{^d$!qG3V{aovqx$Ry7y_A6@eQ8W*SCmgn%z&Cgtp(}^N@>q40iW80ggbg^4^otIK8=*^u`mV8O#zBxXufyf5-^#+D zy35RD;XGR&y>p&NWTM8_6@177N~Vr@0UJFo9oKk-TeuZl-M$goQK@SR_-T=!8yO0H z#7GI5-bW5>$X8SMte^GxWAaAUFWzQJ455ea;mMDf=G94DUbAs0z&L0$ zAfKcCd97XA$sahJDwgnDBacT;>FjkeWOsYLEw%vtKC_XOppLMqqC0)N#D%6f~sx+e{= zLtz3ge@r3%gFk+slrnmo`s0G#m<765Uoz7A(~S<(2gNr!;}V_`_S{6$_lLVG+n#mT zNUh;7uP;CE?j?0EC2nSbOMxV2y%-Vdl`C6v!5xbD7)SZ zmY&wd{9a>ri%MU9b1Yp5g2O+G7NoI#^n*{k_2U_!E2V1h(tP;if=h7WT+lK&h`dQH zg!0Op48Ocl%)UNHd9V7h*wS!z+5jvPJ$F{b`{Mlz3m;g;q!2+Q0Ebv(M+kx|BHT+Rx>B&3Z2)V5JGw{+8Inn z-o!ce*6}%xKGUFP&%Jy^NmyB=b0J%e30jWFoCnZH`~CLfN%9#{ht_-Qck2lsJ2#?F zVPg8pTqOjDAg`#7h!6gr+pnD9^s#?EkmAP5)7yGH>BA6$ENwpYGR2mT#a|hvBi?zcG2fOE@eX;& zu6zy34G6?5!f!?B=0x^zmgUTamE<79&USb5kZf`w{jP2$72fE?3tw(;Y?A6F&edjiXEPRJZ$? zWM7^)T!1U?1wpvE&`OXDCsZpje0zv#ty8K{E;rJ6tK6Uy|F}~mIIWM{;29`#ofR51y0>V_mjTgHVv3UJs%4`JpP|He~4sDzR}5=ozK{g zfpj^plLR@$>vp}0*QJ%DLr!R&xwj?G$m}a|RA&Mj6AZKU@Ho7~6{k;A>|=uk7Z{J^ z(8<8PeZPKxnvtUOhY&XE!OQ=}H*Vj7u=(QV1m>q+;@Wi4&BOP*KU`f*$+QCv6$&Zq z0X2h*o`ovc$Xmc|4p^g{()H!@Y8b~19|!F1oJW)5x8ja({~*9+cu5k?s9jg6U+jAv z!vy4$=e;Ag+#_a({9`MULjieIsG0ixRv?Rv0|J>=j5Z4`|tCD4RUhRjRynb1w>UrZ7tr+cy%4NwDO(!{? zqf>7zOOViJ84z{&-tZscG>}kv;GQ*7gx{^^(wFPFKvW_9&n(<8f5>a%M z+K~^GW4zB{W+>QD8YmHn#W?c)-nz35aVZrL(zX72tQd;uhj?E~7Cn+zNhe_xFMdh< zv7bM~3%qg{cl{~TCw~5PBGY^2_}DLDJ^EP)2kODwH*daRQ%)&6F7)km8_>P_ro@LN z0`efAKnGwK6Z)P2^TXGPah}JtBO`i1Cq*7 zjtcWKn;eaVy&GP~h)-Sn?Jwe?B(1zXmu}vF@ZI&zqkD`;qPo`hkgqe2lcak*T*Mkz zI|HA$v*;#*Rllgi5x(1ri~P?;nGAJ|dLeB85pfgK(HL)Pd7ZKQeTo&nM4Z{0Mf$Ec4)jI{Ll|Ax#s z)EhjgPNnTXA#40P>g%m&K<;X2L1F7?V)JoiUv%pQh(lMFx}(lwmkYey%&uj-52{8 zr$u@b3l_4bgRDiKk)me*`~7Jk(kVSe)C*l$+cWe9eT7%Y0AXIYu2{wY#>5Bv$U~k@ zxequ!^kWToddR*%*n#x!kvQYd(8{aiqsdpg#}Jczka3`xvK@DBycBUHDN+$&zu*JV zJRZRq1QLFDS&n1j_z>?RT{z0XD`wz)r%U^Zs%$FYqzxX+r4(cY{lby>wqGmo`;+}!9}L_gB3@V-+Ay;IqqFvZep!};CQp!_=5OsH zu5p%Y*VOM;v3s?qhMvIu_1{)HMB^U6o#`Z2iQ_>w$zv?ORH zqGLZlBOL@r)@GBg{s+%E=KYoARJ78BKl9H!Mm(9A_(Z4Hu>i?muYY44;8#L#fJIQ% zX21Mv{HLOkbiX~1qd$>lcC-L{rwZMF4t_Zc%*B4IzF7C~J@^}|D}LtmdETUlqyafU zuf`BrFqh1}-c;D1JY@1R*d~Yvj*|%$))w zqtlX={Jr)3^$&dt4$9Yj?Tf!wmE#&%Ws*YezgPLUw#qejun4U0BOd4+B>ulf0%;F9>8XW)&&W=2K=*${^$3UcTSuM@HbLe z6K>Ou)7o(e68-UzEEVt7=Fz^JnIDZzjBeI20DCVXn(U`9qYMG|eL+%7Le1m<-t^3( zCC=4Ew#VMFkPrzQs{`z%;#IXNC8tJXy1DOUb1{sNN6=Q zd_Fh2Lgu^Bb@%kV!ei%tiVv>TX^pd;Exs70eGn8%p&n{vIP&yowyz`H(0NbFn#Ixv zujPBc>`SdQGB(!4Mez_}G!$jMfB7QGz$~|8_zj2iT8J=ltd2)pALU)`T0l8bY!i@? zt>$e0!`}?)BkqVShy*pyekZ#s#*993^av=`yU8Xos5AV$amP4;(|{7aJ87D!|q;{^3h70lh6N^cC7c(^WjBq$@tW6vZ4Fy2lnQb~tJGd$XW%)Jr=t*{JTybFzZT+Jj<_CK^x-uA zq4W0xxN|uxB_X}xTeQ1t5idIW8RH#@sE(oaXD(ZdYJKXny&)wr z-oL3d1(72k8QS+H7yc)=&OdArZx_x&w|N?%d04?=1dVONTvqCXeuU!&$h{+Shor5n z5{(4ABd>JjF1Y%6;l^xUW|REX<@Son+$TE$?1_iRJ=8il0`g6?$jeEOgKn3Br(R!B zxpGs&X4|A*a~KQ10L4x2w1+Y|cAZf${S8f#OgI%D{mAMp6~V$$n;&PAmN}f1V5;lnbTP$Y5{oc9%KJJc{zQ)4?jV#b+*3r=^Ze!`X zL-BM8wbUn=TAk=BA&l85npuFeJ^Gsq;N05R_2=h@>^#CBuH%ZR>D`fUo3mbdyo2!U zM)yd+LALazbEYRJi$8feAYWWUtGO2!|nN1IH&5a{zx9WjdZZpaNCDlH9vdUh7NhclyQIF*6vk$eX4qwfEhJ1pbEu@fZ*5@hiEbhSZcldVPDtYRp<{*uv z5KAd4wN#A69V7en-)n{h%3agmx}!U%)T+iWE%QA>he447i4vaWesYY0a@q~#A1bcI zQ@AmNVw~A-BNPIz@I;(ksLs}li`1Xh^{WGlfjC&>Z^ck9u6H^*XxZupGJHKA!BYrN2|dH{YsP^3d+;@7$E* z#A{O?)#WsSi<40q!4mgQ2-79iJ2xB&GNo$V+w%&+qVC;zg@wiv&#W*#FVsMvQq71! zc_qd}#qTl|m7kc6a&hff{Dfv~F!2wo$voey^09(N2#eXm>5{Fjb8W5CVV&pt1q)uZ z@f|LP@=SZEYc-L`ZCcQ}o^`AZ=R_Rp7$s`H?ub5S=dL;SxTyM=gJ8jHTPK0bFA5-W zFSUe~?~03U`f9u-V?*diYGt{h`Gzzs@!LzbHp4FkWf}iL$NiI+$w6m7yW%Ys>fPq_ zR+h;Fj9hmw1u5LS3sQXIMk0kFUbscA>rG8f#oTq`L;0NH`)}OdeE-CMoAT&c^-pyK zI?dTW&=|{p^3h>Q7NZZldFnIcU^WA=0;Vq_nMXB+&Q^;{nTG47xGD5G%L@oPf?se(=^s4L%!(^7Y2W~=m~lkbuL&cGHzm1Ze_*;@jjPz zCu#8JQz4(%;e`hlCu@Zd+U}0fS#xO;aVIe=9L(BK2>pWYjZ&3$aqrFSsD)WC(S+@ zzdJrzvqacvooVG9ny#FWw0EzFaGIUEU+}a5zV4YlGQBA$C|?g3A=`au)YXGRRHH$T0=;uX<$&qA58xW6 zjhcTQ^8ey?G%xzGJeG1%q}@Y9lkXigOeX?{n;I`2>5y+(l*2Y+uf%SyxMl5tY?rbJ zHukBG^&9%Kj?PYA@68yNI5(r4y1WLR?)UWS;5aLxy&t+On^{?Z+U=Exii+Rqcpd?w zElCHdJ8zx(S%mJs5-Zz=I}8pTWGQ03aQu&5Kt=6|!YGs2VY#W3djs$F>WvdA7z6lL zBJ{5}$F2=+!PF*{&S=3_s?%i!X&K0j3wiPt7yaR@+i)+}3a_|BknVJkE%RIl?2Zvq zjP3&FlDZXEw*ZZczcA0}r2^1Gpc0^U4Xs#4$XlLo&Q>cWw`_8B?fY+MH`666bP89Ft?F55(Dze5-9PSF5Wusx_Tr2r%aNPP226) zJB-4tM|c}e!W^S660SKNzAD98(fr6&cvo?rzs1%M>4N0xATgMLH1U$|IBq5;J>W&% z_&{SasF!?mEe5ZWut$Ue?bzwEV8fo@T_5+5#EP_Q&(17OQ5qSE!z*FsB(CXSX@)8Z z&R`R5K6RhuiEPF08}Is--6vkqxNct+rYJ0<50Effl(050TuG0ZZgVgrW0}5iFJim- zY1SE&%U7pnh46D!=Ur%%Et2 zf4xFL6<1fF`8unsJ7H*!wdS|(3$DaRc~S~7j_Gb+gCxvbAWQ`0Cw-G>s2TCRoq~xo zijS|Kxmu+Wn4%C~v7tg3GAVQRXC4zPaqVjK5@Yb)Ja3Hi%4T}jH&<~^R#x5ExO(R* z{~v0Dkr!#(dd^Vxnl*>kw)yhuDP5ysTs zpY0dPsMGVR(&`}gTr{JY+vh-9L2h`M_jR&?1U_R~1M~gf(GpWRfVr6E7682-MxO`V z`p&JUYkH9vy4|`cp>#y6d)u(Nm6Xv<*}=Py9S|mkiMj7dBWm5#?U3EKqYNKwgCtBL ziNwFU!Nv71yxQ3wFCYE78tTXxFGgM@q5bwcA`s&|gZYAFc1G2Ik(5IXTWWkwy2I=J z=1KpPamp){ZT^(G>4k-@L$@<+nA!x0Fk_X+b`1~^o_l$JFPU2XVFZ=sh=Se0!%MtL z)eZ?w?VX(=AMToS#87{1=)|h=71cOEa&o`FVJ^DGVw%nW*xbh%jr`!5?9I~wPbe5Z ztnDUA9&by5aEpt&r7Xe>%f|)u6h|^sPDk&+?4GKGnZ`^bcs7}czuPBrLhi2~9P^_* zk8ZbHsq}c|qHwa4g6^L3;~=`FoXceeMC;Y7#c~7Ay)62k2Uyz{ddEj`l(E~O6M4to z@t2oql+r;CuTd(ub>HeUey!RV&_WbDZG2x3JmKR4*j$eQ_Ah~AJk6B=m$dpkb23K6 z(JF=v)W}P6*KGCi_2;@06JZ8#b^nZ?{1^9t_uP-`c)WCL@~YsteMOlbWsc2Lv`4k; zk`-pm(L3la>=Vt3;Y$6sc^9r>gYAHKMEf#eYE04!B5h%cK~Nnz3Q9#ee?f8wi7+ej z4hUmT`H)y#ac|z{hZ0)ZSidz}sLwoBqwSVq)AM~9qnN9?eAGvGdGeLhpT&2aG&Xr= zeP*$8op#0+7+HX}I@|R@(46H%r>^2+JJqWn|BUqgd}eFEe)I-kXUiF0M_qE#Z*dls zHBLpGpN~0Ls4vZ}XX)ihW%~3PB6g7xRQC!fUM=!40FCH-Hc&&!I)8n;gByA4_TFm7 z%$FQzwvS#oT5Grppy-NJ?(z=^NC&J@^Cy+SjyP}n{)#YuwZs_ob-q1pgR<%*;kaiw z^X0wm-cn|VE4oIj;SLiO;CV$#GCs`8h5?wGN{?38o5Q&?ksMV(DFUz=I{Eg>PHg_qNxhv#RzHs<$XRMZQ@v4ORmny_-{E1Kw$tY?qe zb|*K9&k1uqXN`*6?xCwj=Hti~J?RV2J_5WBAIhc{OX5DK=GG)h*Smps55}Ma zb!Z|(j34BQai&rdw=bQobg9mT{!`Jx(^;V(m(N8f3E5@Dzxr}o<#?B%ktu`o+yx;t z9CO!bP&U8BYErnZyF2K~Ng|AjPPBFxu}e@bpu5=wEb($lV(w{x;&|}s@w){77M;ccr!X8THKi|)RAb@Oo2cBLH{`!i%@oEi!&0{`tWN#ZtBnH@}Sc z4dAqwBAo`OTt6~>1`C$j`R0%rBt zd&brPe4`|`hOH$WYrOKU)3i` zC0h{?6utLUTN{M#ytz@ag4WNIG|QOw{WySipXpY=KgrSvN8b?DOsE ztNh*$8Y-F2tS=8=MS)Q2T5D+ArK>`=X^U?a#w^Q6P58h%ykpRp)&IcuLoxgpW?0U{ z`pI$Yev?bC788^2VX)ff9m;M79vn3I5?QE{b(H1t`B{|i2E_T|x%lB@uh6gx#{dX5 z7HW3!7->hBc{KY6g>P1`TrBAiZMu7GpB8vTRv^r3=Ie$j?0eHI#nxzY1=aDnQ|=dLN7^rLSRVahSwA8IVl zTsm(}Fy&c%UA(lfvw(71dM%=c#5ddTzEdPiZ}Eg*+np)Mix4^sKeju4CqTErx9zUy zQ?qiMkSA4#s2ZF#!!QC(xa5^P%-yvd zyLnj}xOSK8WfgORyl`*c5DP6Pzcm`l(}~Elbix9n2=%qPL&Ewyb2`ulHjjkx zpcvGzuspu-&2Bfh06rZ0IELQ7uNJ??NplXW588QVcJwPj=%1_tLoA$}Uie+J{gHus z#Zs-T-p@H#V1l+PTfN5IW#S87a(~lp0U&2_?u(^aPu`G0x%$X)e>XqX{ zL+5-RT9tHosN@cpD_D#^tF|wHRkv zSeuVKhDz93dyZVG6q=vUe9j#w%RiLY79qMVa$kL(lSdTF4VW{&Ia$&>ZDu2l785>Q zgiYC9YHpu$U5j1cw0CZJA3!Lp)}q`nE#olon5=TsDWB*d&u-Y%VJU$->@_VCa4nOC zbp?%M&JE9|cs+kQH>b{gHgFQ0{D8+dJQLJUj#-diD1FW3L>e@IB6SzP>{AAAfH(FR3q9^m zWjf6J=Q12om$)I?`mf-n!cRoUD^ckWeVp@OSy@?4h<5@&TnC^)N28t!d8!E+5Ws~Q zNu}>}eVL)$YVygITTd?A>ld?XO4aWtKX_~O^{)%_YyQHE<0l=)%vjmExWbhn2kM~x zrXFs_m+{7sD4@EU2kj+70s{m8S{l5=XEkx}2q>6p1vXt4mpo`)2^g+$>k?>4s3h1; zsz+@aD;e!GtiQ)u*Rr?J*##!pt`@Zb)Tf%8T}=Z>1GH&q=^0C@R$AUFF_izWU7cj| zBYSBu;h8?#rn+-8X8aDPV6I!9#Ppj5TE`ltzVS8YwoZXM6gR#l=keDq!yw@aBda>u zCzi!UXjrYItC(9k~MRSPxyK9_B=5hNbvBuM6_{xe716{eP z$I@#-x?Dh4{v+x*z(_*hGWf(ENsIS)LJ zart&nz^+m0&=m?u{}sz8Y~_UJLL@*7EPb2fdvERkx=R54FZ@obSQu281P)oQRvVV~ta;M%v#YwN%BNf`;<7zJ zpA+tqr=_P|y4|*VrH*KJae(>__4f2xD(-OLF}4G)Ll$7_2fPGO_`Wg=Ek6yDkGO+R zqo1R=^*B1Bc%neZ>6;6U6u?>H#AuR`tq%J>{7$l~B`JD<2h`DZt>xoZD@m7c-*?%1 z;i~vpK9Ii{bdq)kbIP9wijcwOuLr>`z-3@^T`lsB2PdWvrdNG0{cBR03)UR(_3or& zu5auixVN`=S3xI`^17xgy3$q733_>oC0Q%TTa;CR{%F+qVSDZh=xi+!F!8!y!4}Z{ z`8Lmzae>y0nt9sST>xYMlc8@Z5~w`?^8DU*Lz;h)RlW%YS9L-6IgHEUIAC6uHo+zP zLtcpGcO|k+F@1hC7+8G{6Fa|8sJ1G5Pbhg&w7)lgA2*X(xw(|bBSG6|+zhd;Tdi#2 zPBANcc8!w1GwIGWA8s+MhYL|%0O8TKDsr|=&f6ANzuD-Pcsd$kpCs{;enr*Cc{0Tw zhua(+E-~(RurvlUD8XxwAMWyZ?ueFEtd{IB+|G?Rp=oU-U*aO8b=i1un<|f$f7!^# znJkiri>p}2rG(aaOd{J`ufbE8>2gR0BEHr}G3z!YZfG(EZNPVKd+gmmx-(PKJ8s!e|X=PGoa_x&q%~Wg5*S8scjA zthwS|3%;P0TU^r#cc8`b^UACPP=Q}0(O@bh_}fnV6CvsckxasX<(E;G*#1<|O5LS9R|7DTms#l@~B~Evtv{fK$ubFg8SE6Dm z=Dt0AF!ZXSb1r1&$3YbhVt3qibzNiX5usG#Kh8qQaNsTcMNIUP8EydH|8BX(zZ>gV z!Aq$YmK2mNl>&uhKEWtyYsU^(4^{lVIi)U3L!5lLsZ#p>wwrs%;rde{?Qy;`GAW{t62Mzc0PRe6$BD0TKLS4_o}92lgFC+2U3amx}t;95t)nW zXdB6yrPCu|TFQqHM^JUO1q}BwaN`*hX)P}mS|jtmL@AK}BF|fjwoX=7e-mj|Yu2vh z+KID(b)X7*dyX|l%(6A++;vK_D33XTHw)nVZMM9kYQ-G)}>F=||e|-fPtSEKPW? zW1<<}jQ9WGjTc}1g`so1lrmfx&@tFP}2$?JZu;cP~!xE%#} zhcYZZA*2^F#fCQAYU;X6EnIl;!Cmj>U7Y9zOym0Ebe6;8haM|E9C=)_N}`s0xt%e+ z>{dv6)9hF*;tkaBsF)b)$!07}opqqnS8=*B$;^B~1w`6;bE1@In!{%Jf6K4CTcZil zvlzhsULu~TbTH`(uhwLJ6KVOH#xC%HX9h3oNCf4j2(FRP(H-Rz@%gK3cRTsG7eA82 zxvih_lw7FAHSd>hrU$5((*v0mV(W9{gD#r|gA^qp>94JBsN~QvPCB<1xJn~jG`-J_ zp=!Q)CVqNVYh~FEnfmTE#^Ep5)FFtnDKYFD!$}!j)5@un_;nsi7O->yq3VPq7mJsi*(^rw2m#l9 z__kZ~?!fm4v-w}*-_}^`nN_m9?s^qDj-)EKr9KDB7k&e^kIs0rf4k9?cJ98W7U((} zT5kqy4u-q(QWSSoV-aJ%kIB)`2xi*#eQI#{d&*>_NJ+mVY~mKc_7Y=+%|Yw{YHs{u z8W1fQO1lm8j4b~d4XV@LyUU!*+G;@ox&NIIEc@66b5JbD_s`EauA~}O*UqC7x&iqf zlTBE{ivT4oP&2~m2nvp4BGY$z-v#}xp0xAt()_%f7OqeyuqS{7^oyMP8?FB-_|F^N z3z8p8MF@Mzq~|~t&aigi&&~>tA2}hV(e+-zv}{-O!GYrD--VJLGV;$zWN;v~DU9v^{AXBgiTS(V zY7X8j^c0dvtzzBSLeS6Mkr!)!*O28#U8d5%m#&G9PTVX(0&?eC;^p66(9N{XS)$tR zO5b|d5)EAJ?ZEqq)-)|oPhRr*K`^1B+Y!Rr5hGr%tC}gD``ZJ7_(k|#QW(oXuS;os zXm1TUgm6-)4!6hmz6V0IOO*>$&vn3`$Ru?@OodYJ=>||F6mVz71p$EmK)ZMtYXaF% z{m2P=6Dcz4AOrVsNn;1HKs!0lbC=44My#@ARrF{sBn|1=U^^Ds3E0E&ciT9QHw-^* zrO!7RvX^jvgkosgtJ`t20JzhUSK0KjuoQiEqxXlreBE~90_4@=m+&VtGAb4z?)_J& zWW>xAc&5{&C&fq4SK5Z;t38RBu1>)%$Y0~azsM$u+h-oV*Y0a9E#Dd~SYtE-gTd6+ zImRyQ)CY3O3j}H~Y66=RtNylv+YK-+7u4}0GJYt`_WDuV4MEW`vU#Eyzo81!BKW4S z?9|zU(503H3Mqrp{JIdST)Jocny6p`GPFS0?q%s^>}L4LnC#l@X-vGND*1 zbNT@(q+N!#Kx>lkLZ$z7{}#^ipDMHQw1Nduo79imOG3>h+A4-I6g_LZ(EcKqO?|x<#HuWM9Ovv&f70>;p15%;5 zdVk|mXvk9H*>+l?Ma)~lV2^6lEI6{Q$jalA-Xu_LY_T)`HRDhTEpqM^qW5M=+HUz< zw^SNXIU5Ev-yIes;9DerK2A`!Z{C$d>YtW0wH`6&HP$>Cj#H|AXYDW_|F7f7tT znSnheI?abiFeacC)DpDst@6WEQIB$Yk-PWj=>-OrD zZ&g2A#BlqHu)#*}os=(gFZe~#MFW=7s#!%LJ5Y=opkge?mMH&a&VUWNJMQL2;h#Mz zuX#+27q2$&0tB_om``mr#C;ezBd?F4qUA-ee4YT78&~p+99))P=XtGL;FxNm!Szn+ zhM7=fD5EA;Ok5oIH+>w4-G7Z9?w#&68#rUPki8oTiI*sFuz?UH|n3 zNtMH*vi+AynHxpkK;&|bp)WcMv041dSuL-f+GZGBdt!9>aFW zjfW5?eo#nC?UuO#^VRE398R@#leLYgocW_z8fzOA(>02FB7*!-TsuCV4Cs!p$1lo| z=0tLvPNRU=o|3c)TAH`l@~Y-v3g`6M9+nu%KyLM1W;sjk%6M@NHCp{}Y z6RAtqL*`ttPbaFz{(%BW!Pci5_LKP?XRx#VK;9$d9oTIt5}M{;UYYjvkI5Y>i{691 zP4-uQ6MX0j(zD2aeaA~Vv9d>humm(W=kU#eZ)i~9dbbWe0I2Iz#+sATv?hkA2%3df zxlQra&5-qlIoJ7RijxpwB7_j=H?0I6o$^4$gDB@DbpE)hEB0g6ShI{ZS?qOnJd5an z*>>oI3mM)O9ZzZ88n?`ZuOGgx-g29>8OZ8sT77%zwJhlQtvOzorE84g>CaRIa}e5} zN2_n?y8aGLKef%==Zn2SA*Kn^z5D>l25{dv@ywzqF2+J1p)IQ|O^ayH!^STU{|ENH zaDI|!ruw+{5{>$7mHd>R@yLZ|4k%7PZH9+RW*FXR_^I|aqt9|kTt(!-6USS@<?6 zeIHP28|#)<;tky#w8DX*6Rxq}-~Qv*bu4`GdtP)w(z}r81KPY&lB!rB3MjOjS&3xf ziszpd3EZW0q1-vwx*t7{B?vg`rdyrpPQ7$Z-Xik^)S8QLIHU^l=*_hs2} zLRQN=73Dg1L1+V-0dgdn5jdw}PB_bceO&H*VSCZQ$8T`FgUkU%fz}ipHxj~xZVSxQU=6;a`rLayh z(aF%9c`aHf#XS=-sC+315B9s2vTA_W@kb;Wvb#n2j*$qFv~?;exSjs z4N8}Q?+jR(9r=1uuLNNyJ^so^*Y`ZzT+cd)9Oy3E-1s;%WXCm)vu3Jo>lVJ+P&IzW z+XsNCR@~H0oxIGw#kC{WGzJ z>oHUwi8BrIA{gkb!6bs_&gq49iHQTVJ9w;I^bQx#p4HZ=nOg{2sxmY%3tCz=kmLI4 z?oPF5{qCPtoC!~iXw$#uk_z5ADiZ4-NuK^D+wZ!!EctTT7%Yme%x4pwZA5u8;JxTCPfMZ zbf+XSA{(3h99ZEbxL}tdaR*$!fXti6_QqApd5i_u3O}d@gf~@QG*d{yuRj~1B$v?5 zk56&ipS2d>aezGJ5{2xWEw)ua2kH)V55_3UV>M1WcN7YU2=U>^1ax^=-w%$xU?zOq zLyz!(JSRKU6}oB^qnXXd6IjE;5f+B5t0i?5Y|fRP=aiPW7wOSaU0Oa6q_HcpasI85 zykmIuF*#Xm-X6NravA4Mkn)Le4|ko#*VnpuPx#Chf2VaM_Pm-FA|a?}ZA8WT&un|FuBq9=a~1`{ijp1G`+b@H!8< z7LuhL8KNazthZ3f!(7NZorcaHwi1EVUz1+cbsD>UY2w{D`J=k{5SY(6pNq7Tg_RVgh|&PW7ul z4zgo!3VJazy?R|W zWZ?YkopBjZe^@6m-1~wF2%kQ799dDZirG9R%+_B(oa@sT_vj74nDZ7H$E|aoplnMp z+E?f6!?vel(7$}{ZoWTt?dmjVk3rLp_4A2)#?qFvb4iM5-5tpR(%0Lf=<^05T;`mt z=lJDa_=j84&V0b<>YzaCxckXRV+A6*%fO45Gq~|sfcz&La(Eu-S_9wY9ak{_blk?! zrg>MrNB>DB2_3cZm^w!wO9SM<`ch?2^ofnl6hPvFgwGqNpaBf_C&CZ|tg3!L_NPC; z;gXjTbc7woOg%~Vd+gCC4kuylYM`9Wxhru^g>UlFzH^XJp`lOMcXcZ}_OcfW}X zy~io*8oQ4G(}S_jGs4Pvq~*w|h|o^W_NCc_YrhC5hOS_APM;rtE2j9Zv}2N!xPOPZ z@>KLVg7Z%)W>&rqPVSyq+9Kl#7`K@w(h^|ukzt^;iQyUYhjWCAx}6bC%)&twzd zaB-z>V=z#drrc*k-9G-?DQ1y;th=%JTe1o@w@mQY|cPUh^79-9rDO&uCS@ zbq(Rk7l{ssFxXpZ;y=@CNRpK9jrZWwvi^D%3Vc#xHvuN-TTL zy3K1k#9WZ&^c)N38q~hgRpG`3>UaSj z3ViN|nYQ3Zb$sM0PB$&MRtFmz0J@-ot_VCRcJw&FK(Dr***V#g{&c z(qj-*{)?aZ-D7N~)Yej1#pWUK%dJYSy3z~OLe?qX$O2+Cpi3CU(ch?0AE&AWQTp3mmsj0pwFSJF#=xxs&Fz?sJ_m~=jN1x`6uqD)FDpZ-_6 zAe0(M^bB%BG*|L}eHu*z&clVIe z;Bth2j+22KfNNWMU@iz~#BKm3xKDZAA(*wBC{Nwvsih*vJyyetc0A!4tU!Py!aX$0 z`=y(lo@QI=q|kr1GPB(9Ns0YVbWE&D({Nn$PI|Q-33yW+dc&{AA~#L;M9WJ8%3~-p+3nz88C{cLPuO= zX$8(Rd*ekc=BJH#2HBwmGc&G6^}5)3uYI#TZ5#`%N=oHU=8+2^(hE>)7-se*{X>Eir(WElQbc^dMy3H# zcjCE6%ImHj_3^C0o3iyzy<$1cBx|dKw>4UK57E=8UWTD9zMWFE-${|t!yS<&4UhRh z!EbsbbPg#!yzAJU}WB*AoY=g8eT=?9#?XnFz%9KflL6C_}b7-f0!|aJc<891-@38 zQ{{ z{gQ-MGAlBak&@JG8cc#R{0cyHp z%ojb99nz&My#z$mKUM!G5ZN%{*T+KvN{M9-XRjm&vLCl3;*ymT&9vTR!KN5w1adfK z{Ut7xv7)4cS$#PcaGeX#rXK)x{w3#hsW-<|OIudca||9a0O%){f54b3tUG^_xjl`< z;rsEE4qPH>t&%#`^D;7E6A&$R7O9m=h3g*Xns|Zai zU#Q0}K0b-^ZTK#g1PF79hKG&_Hi#%S{~zMp^A{#pmWj!N^k3LyU^yJJ?lPz`OPQ#( zNQ9VXV}cyGb{F;XwBHNJ;A^H^KmfBr2$#L{X1^}Em-V5=Z=w#T?8{3bzm-+xcDjfj zH5fklcId^p5pUzq2em?2t2T?^?qd>4k(FaZNme2W=OK-9k^$^~|LJj;=Yg-2TEOB1 zY?41#?u<6H7z|li;zs>Zo_0+|xQ_~=8f~o>!#5Jt7JW-OHHXNuO59c7#@WB;*#LtS zW~3W1j29krWUDti!dcnPo1{n{T{;rP{U8^=rnW3{nHnGwuz5BpW1Ry?B6bf|HhLKK zZ#lhCCbj;f^_Uwu+_G~mS{OKD8Kj$_Kk=vf>2=Tkfx|d-F2`8dVDUQIL;z}dMFu{f zsfK2(aSanIyV2w3pjw1O{Z>J2=S5MUmmu|>7J+4zxC$KUNg!1$Ei{8bqJMaBR<0ET zLNmE>^(Mjw+b3&1$9ez)54>eKEKEfmiXML!=n)QV99NHYse(cVmi-12VxnuP{Dv8u z-#$IBVZfFU6i>D7bd!mq*G0=>6Qy@g-Q(YtpnSm#n^o+g6PyL`;;2-r10q8Kw5wr7 zLG_T~PcW|>L(CCsa-T43TZb7P(-wwaxW@V2%2@!bKjjJ-7F^aLj|D&edcjm$*ch78ewd^u^Q4=PA@M(f~ z)x8IvpC-=rnGJNz0*eK}h11mScqw2v82j)FUuU{W%Iog9v5qE~s6e#(ZSJ;Jf>1Ko zGd#iCO58?8s2>yF!__$8nua_#{BC^h!~R!XW8#`_t&+4}&Vy^Bmuc*M=}#&K$(rYh zO0GM(VdyeyO1GK;LPB>ijtHD1HXxx?EO5xoc4?Cb`d+U9-Si2ku0`J}nzZTiHgNvA z50zlx{Hs5~qveZ1=+;Loht2SRjBVWkHQQfN+fCG2fFer)$Q1oW{fzuQD?ls4fi8x- zk+9i@kjFoD{4b5EfB5zp15-(GGR0n!Q$DBipt6z+09Tybhg&7Ri}rkJQMPG*TERnM zM^vF_p1;w?QI)vRoJL|EdZ8Bk(Wt-t~~y#)cEkxAPWOFSVZ#tp@kOI7JlQ z%mQ$Dc#}tHHbgbwl(Gvj^Tf%mIIYoh*Cl9O`+=Tj`}~P;pgnW;{WaloDswEtV;QNq z3kaGHy1qjKNK}?BvFT}YNY^78ZKL?q-?}M}4^4yDd-T|*zL>SPoCU%EPL{pz{7aTm zhon&GVR>E*$vu7^jUH=R@WnXOT|0Gh;C&)fjT4dfEB@ry zzdwkH<0qYjn8SO_$+^?X@*~fp(@LNWu#P>q6e~Xbg7N87+xn&g;cELY*URLmv0W~U zAH~C{_0vgj;K5jV!RumjNChR2IOZy8cmD=}@W- z-9xgn^UJjJ=J9FWe0!1m5?2DyLkJffY^#Ht2!OM{{)2cMt{g>4J5V_b?`ps1bZnKB zOlJn$tYAW`z;nOcO}CifptsiH+m~N|`h!#DvQr3W71eRcbGum~JA4v0=}w%8gj!sm z{H;-J(+)?20q9^-YRTM%sua`X+M`3-qa3r;?U#aR|BuCUgI0Fu=CTMX5A9(bT$fZk zV|j^h*sFPWSb(;l+0ZknP2N4Jka4Lw86umjYTezs85gicr2WA0OZcno3*XU`XCcOF z+zgm{{8;LIh`f+|`_C^c)NKk*xjss!Dfw4JFwZ?E>fR>45=UsOD-xaSGd;^UND z`>>k#YH&*A)y<92r~FK+09dA+W2_EJs_a4kDA98pkoKE7ED%HK$zT=EGrwox3~6L8 zx9gKwNVa2@H6ve6c)yQT3VQo3^}6gz@5&d3yb0=&(lxq|YWPCE>gcC~;I8^fc+(F< zGFR0&WJ_nPM0PxUgxzr3nO(sOy!A7T8(v3$P|Kb9IfvJo{bf6E%O|X}{o+3I_jF2T zrZ35(*RTo{5@up*yTy5joMomcbhuYG9WdDU1e(2JQI@enoNYPztn<#72yb*IS_O0vYVzm~pAI@wMYCHQMFY+(szpnKK~w)(PRD%7p7)_iIU^xfAxz*lYKhO>Iji&91< zRk?tEv;)x3olifjGYlvV%s)pY`E(&$9OroQr}>(JVF{E^H!g3zoUMN@bR{ln!LHKM zYiRND#DCbeBjNW+dbxcm@#2C7UJo@K!+W!hwx%NHZO2x>8zEiz@LocS)74X<$L%YB z+c{XIYjhai%`w7AzP3;B#iw5K>$~BqzV~5ZbC}v<{i!{4Kmt`f9!LaVdXo!SWPoK7auk~1bb3z>3 ztnAiRT;VlX+bRT!v(XURWt&$nVrIlwex4M! zO#o~M`q)a(6no}=O#KHclnHHF(bW&NyjK5oMz_BB`F7h5^w(*SFX_rn`-!`6xgCjr z_+Ue$NmDxuK8%lTU|Q{^6X~u^7Z~k)CS!thNYhawbyO<4L$MUb$ZO-0;eL}G*66Io z0yH9Q&GpaiX77_rd4#L1d-AQYchj>yotzb^9^5f$-Qi{KM&kYx0HB$-namZIIX;T) z6G0wyEoxe(P-kaI?NK#S0T|dYSZ%F7W%*LD%f>#PrOECbzg;7o$OOnP{xW`CL}PO) z@D75x#ghGH_-r*AHhD!z1v!kg#C^HRmvW z6T_JysC=dAkl;Hq?J`PGGVDQqQw!Z*)}!~uljR+Ty2S6&MvcD%Q8Bsnqj?6hhB_)p zmv=OL%psD)RTMH-fY0k%ku#IeT4ssS6>UAibC#aO5;Oefy6JuDgsy}DVMLIwl2&_9 zN}x8QuvF?P4~1MsCui;gBrag?^viO~D= zO1zxu-F64Wh$D=_{}KR0=yA4H`;r{IWeP5@r#3glr2GFIpdmr{hB=4B7Z`*EH!!^{ zW~^jOrNQ~`IY-VDQB`t7b$y2aPTqM7%gLe-FGt3A*$ku7m$yNh`lflj}#DHe~+FoxmcBLJd~zm$GFysQma z7RJ(u4r%Acd@2JaVkWmNbZ}K>4iC$G0Q$VeuOKAs-UR_X)oWQ?$=)r&CUptD!cex=e;Q`5z8ZRxGpaZ#@_}g6;0QxFAiR~dA@}+YrfN|Zyx{AlO+hQhfdvBitgt!1 z(0yG*B$0>>dW=YI@6qRy6a8J?%rn=Hj4sazW0};e zL5<37e{SK5tZYh+4E$VJUHvyI@yK8$qMh-LqDLWM%aACD`CZyh=VmL;(9fQ*dP`G8 z=wHj}M)*&DZFYx3)@%MYuYX`wfD4CRDk!B@gumG33kRg+Cd3l(YzCBcUfn_A`oUDB zIL6NYWHIQ;^D3bo^{1uQm2LWW#m4N^Wxb-AkHLTqX4-W!zB=xDuFB)G=H25EaT|@a zJ#=ZkAVAs1QmQJRY(7B$*(8H4SXp0RcLAhPe+{lbC&FQ4(txkn;}EPY7TRqID7e+A ze<$8-hJT+SA{-Tr0PPXTk4HCjebRYwGfwjC-=i7j z!rFmnLxlPqL;nw{<$cS)`t^80x`cf64?bL>6(xwoCjY1}1k#zz30HzM>?Am}<3&Yqq|jwIoRzzfB)K*=#kJ9v*gC@r^-V+^Q~IE6{K+u| zyYYFC-$E4IBn4?7z8!3777gQu%hr*b*0+a&gaAG>|+kexU>D+0kINee|vPd1JsV4HXc|H?rN|Wsyrr(tJTc!yJRe_?EY!gWNXAr zgq71bt1H`g?U=I7#TEeW4s!c5EpjIhzIATGerc0-H8+Vu1yZ4<{06F@?qu|IYL)^9C~M{e!oZ^cco(M{->kMT$-6e0_b*o)WOzq4=P9gtwGIh~0wZg>Hkh?d#{Y_(-> z={;cElj%Oz2yY~an5NG?^UkqzxEdG)YwxJek-8rNh`Gm`?wuTJAS1Yq@Ve7OyFzIt zB6~fe*CALTZW;N#UqFTnKVAWyDFtK;kaTLjN7OuXM)o=fNSo%yKDOSCggEvV?d-tb zR%3jfn_ukF@?tNIRL%m}7qZ+kVG?!HSvi$QeT$4h^u@$i5jY4AJxiz{&~h)HMV8EN zhcMS?x3K#ke!Z%BSe3TONa={qX<_Y@MPUg7mVe-o?G)GlL-8~!4veIqPinMC50v#J?wK?WJcE$ca(H6zAv!q)qp$mETsuUQUUTZ$dk{-i|heQNIBBt{b4!TDovs*8ODa)wouoegIGK60IHN`mKvAJZO5sC0!Ib>*2E&0 zLkt?A>ejB6WROEI8U&YU{=(*700_bu0aD_yZ`i`PeLriTvW z+Vu=A6P&_3K78|Q&awAX>gr`<8F{zT-?YE0nQ(%R%UPty2viYbB+d0(_Ht7)L9@)f zJHV`1gUvGAT~nmHY(MkPIRQgyaSb@;Iy&4xd1bsrY+$f`F1`#btl5EM<&aXjav-Kp+NTKMz_lLa(Xe$jwA2agrm(}<(cmx5+)AkIs^uV5wjJ?)FVCr0LKQ~WwLYs| zzj61_p}k_?{}kBR7Jso}4UCT36j#^4b&D;t8fpViU$~Hc?((xKN4ZeQty@>MdbkWM zGwT!j18T-?U8m`{HMf#9H;$5`F?P-8g9`mI=2od>v`TCAK2%=rt^4<13blGUUfjqE z-D7#?B6s*f>F}>7Ld`72KB}5%ds@5R!)vvp53@M=bvpq?D3t#5swyACouH*rIY?t= zv#lv@6{QhgrCq{XzL%oyJ^pm%vm&6mN8J&CYur#;&R2CXvA`d0u!S+16|U5v&B{gI8em?OA5m|F*Aq&AE}u_$j)-ECvF}~yjGxXqltEG^572Ec*`xcD31(x>n9?5@ z@Hr11AJ>(m)ruDu1Pl+S6k%)lDS!PwX{t^yF6C20a(rU@2C{l=uTl3!hI7dVpcP z^lz4<$eOsvxiQf<3e{8*%-Iy2sD=LFz*^3F3ih>O@k#p$pcrylNc>w=8LqOp-~isL zJv=$k2d$vHZP3|388=hnxbC)ww8oO&AN~$_*(yC=jF3G~+VTELTG0627)X8OIdpN| z9n)EamQUW4a!cdt6d+iZUlk#a;hT$te)vo6dwuSJ6stY@uBxf2Ed?|_cLGL>kx!@O z_l{=Fm^NH#o_3nivPq@HZeW_#2%2BBOj$A=HkRp zE7N~Z_x#S{>_GG&yLE&a2L!s^tJZH&GR=?U8noLlF^7YM2UcoBQaa9L`n4T}?6yMP zZ=?66{Q!8Siz)W7Mjv7hTn~omR&=`Yp=o>FtA2jn%LfG_1Ii9{NK{ie3 zkA3c$(-~M;rNh2)!Mo$x7rDFwrh)7SX3eC4zHSm+8JN9O!j-_S@=8?qhM_`dQjYqR(Qj<`l^df9qoy0OR<$_iI{Rv?VW*~{x6~8dQ0Iy zkEBAi#jBo9+|SJvv)#rQV|7`3DTNPEK1pZGU{v8W2C2Pfqn!|qjc*!AR) zOMUFc4MyN&u4fdiQ4hx5nb#$f==9|60I&DJ<-q&$hyr}u%Nr_QvsP$G{6p=zr0`J zlVjmGFOZFuWdtA~?0LJFf@`2uJ>>$vWjVj3Kzir*yvz#+k6qkn`$)0fB2!<-JETaI z(w(6Q@YHeDuwvhoCmeh4{xhqfWVyyixJo)O#&H|6)q$o<@Oa94NHF~3G)R=Ho6ouY2J{PsDdAu}3`AY`2Gpx_3 zGE?~4*M&639V4fEW^t?c zL@`&kWW%8-QxMee`rNgVn6I}9@U$T)hXa zyLnwwWG{txjuKg~ZYKX7J}_J1;~4&K;-;)LsJ47O$}NpY8U!dEvxLmQbRl>H-- zF&Vzddx-Ji36a^52j582dDI>h_6q+dP0?qpIw_1!(!euF0Z14m-Hn9WZJ$cUsnN{N zQyM$stXvVqlE$see-c22L(O6K;Ss<)-QkF+=$T^JTcI{D4`5f)Gno~_qsuVLh+p64 z>IXj-(yr1)+cKW6G<({XxZ=i!Wm6%aK3$ZHk+q^+(3KwS9zT^dUp?07o>lBSQTJR9 zk}h>lQ8f$F-A?G~sp#5|*VEkD_fcteVl-}js@-8U8hB8*3Xn>6ckPi~e6!bB3li4% zfec_km!%;)U_z>;elf0k;)i(HKFw=V<@2z8V}Y?$i&_)t-fYcgU!M;Nx_V9Bjnlhz zmW=Q>db>eQKc}cA_8WXkjKZc?7FJSwbFSoZE1OBK9$GG6HiPz9Nsy{9?CZXNUu=W^JtzzO54kzd+g z4yD=CnL6vNTfh>vNa4velDZ(%_qKPIEm$(RjSi$9t7<3^#FyIEn#UXN%x5o0#p|y& z^ZIpZ5?`4sGwL28hfoTi14F_y!*@K>G(0~*rz7p9Y?%5sR${=!8~u0Ug)}w+jO11u z@|L~gqaxKofz#yF{AHE;nIkYjS^neLV8JF3_ETi_OTWnITs1_}4>YZ!4W&(u^_eyIxBEd&qs(R*4F7|MWZ>1DI`s)`3`=GlX2TgFZhl|Myga zz^l#hT8BX~im2(S@@xK(kFC}wXUm}l#mchhkJ5S%JVITK$|cl{)-SHx+%v2qtYW%} zy%_#A@Yn$ESUT`=5zq zLIP-u@-R3e>UqXx_~?jVAK%<`0vZt-LR~zehKvW_l2;TB~nhROq`F+_~8EnNx*CeQvO@)(@rbBCO`D@F-yJE+q0!GXMHT-r5o zd)jq}`QYn=_Af5Wqk-$|_>s>^^X-|J?yP8Q4|!OnG2|iFp3CG{icu;n{b#Q17~MAa z^a z+M|{&T;)HCr4rj?Fzcfy(EEAlNeONva%PSl6@%-){(Rdi+mDy^bO$}d03Yy`75--x z6s&B%eoF@len8$j$1o0@5W(l6kZU*OpWB~!Elwa7!JdoDTo=Lj%1frwY)_iBHn66E4#uTg4_$Yx!wVcUj3__aYG|`DD z5#H^I{-E@Te77iqS#cHJ&@S1u%Oaa_R+S-+93UDsMIkfz^kR!wGd&N)_Wa3I$O!mw z(WdI$AN`DN;F$TH=AOCG1p*Rut`|-jQ{B4K&$?&9gMN+xa|i^SYkq^4RyXWY%m>)Q zLU}iR>^k28^MvZw{!T(p(k*Vt^F5ri%k%?0@y1@xd9<-wixMDIbfn%tPpGNk&baR> z#Fb$)I)e=4_virr729^u|Y8_zh6*d6*i8p#;gPwlZJ_HjgtbtU~Z_5wO}ubyc@MsabcX|Z8(AD%W^jaw<*szw8H=^MQ3 zlApMM6murY%KmU8vm-F%$%Dd2Q(S*iS03`eijc1O9D7DywhD}b!PkR0AKHIhz{dEp0#4d_n7JW)oLy$8d$JL6O)O{t* zwVuPTUZv$EeGf#4r&yD!d2TRT{9UaF2!3dQ|Yw_uDy&Z@3N!U0~l8vgf{Z6Uxo(hSN*^!(@KM78!%G;kFxiGYHIDahL0eiAfO_m zAfSSvbg5D#V4(`ANbl0S(px~JDN2!Eq&KOdmjDTh^w3M_L4i;L1PCOAK=?Q3oconK z{&T+f4#!{^He`oqKV_{o=b96Vaoi29+HJ*V@P{1k@5mi%iN0}uOoDD0*i3ywR3Gy3VCOQ;`v`%1nzq~TJQQMhgSP6`bAII+B>`c@o`T^7t(yP7n99Phnq^4{`S%$y0(-lk;kSvbd>{1sC!BgSv!l_uLV>(_F< z&Rn5X-E|83_%cG9S*F%;aZZuBuVT&89=|o`8fBO-A$8{S)sf08c_S0W6Nl57+2RoJ zkH`7-&Ck*=xXZunXEQU>zGMW-@0_+yS9aRCk9-24`Ar;#>SDOQ5*eruYocus3mEo9 zx9n&@=(3|Z>}j94wD<1yd7Em+*-q+P@HgM07h9Ot+vNQCp72sV1Km04=gK5wWH4v21ayB*KWpHl=w^QBNW%kw zJjdH4imNdeE6S&6A=X;4>t4~<+nnINFAPc>FhK7)yV3)QcObCClCMCHbO~9`1USnB$`QgzwY9a`lB~Zc zM7(P?Df11Fr#TT53&$YxPK@2B$=FkoqxKR+5J1ArP23g}6}>G~I$kYtZVTH>>rTKR z2X!YPPTSg#S5Jj(f<_hFrO`XCQEW7(Y>Hy9Zw#0GPB!r}NG+EbD`AFkc>-uxc}5X) zW8G+5t?nm65YPA@2lP_27wzB53W7kOjh0odd_!_>n7b%#Y|8oh1gt;c>?{uxB@z9 zI$9PEI`3HV-MQSMLI>lSacJm-g0+GT6%Dk21k`{%EgQL4kZkHe(08pIjwCa0zCMpg z_JSQti7qcMPZcR7;N^eewNYAoHk?QuJ#$l6@=))NwWr?&zj$W$e~F%SfanQwy|Uz) zyrej)cm+XMA%dzjc=@xk#SqH3Lv9ObALEAn4*1p(zFg-H1X(lvu}UEoBECG@7V_Ks zJ_V%5nyeTdSAr`%D9f^dP^H{=U)ej^0?4f+xAz2#U0Kq;?nebx9XJv(t&F|NpWyqD z0fwwMM>m_uY5W@cf%-#%&9p1f{w(rRgxd-y`RY}2rq>oXk*I-8Ycc5iK-&2^dXm_D za-`*&PPfv^vpG{oKRqyA$F>AH@q3PLZEka1p|%OY-XQkWfy7e_x-kU*lZ_cEhq!7XCZHOY{0{xkNoImm~nDT`#Z`R zxeq6Mr3PT*IHx6%sqO)J#apJ+Wax6BU*F~~dIq=Q#)@0hWw(wli5RAgY9A7`V3MIbC)Z8--a|tEVe9~?uj{B$r9bo&rzI7KOT^4 zT3cyo?&#f^i9Bw7))DAVwuM_KNpvwe5<2?|r-NK;%64G*q6R?+TtjtEEUi$ia{9x-(7eiMU;<&KOUJ`Qu6}oXYFchB^WKlR9h|JoQJRz^3dbr)W z-pN7=_V>|=W+1_y_rc7&QuA->fT$H!Wy zij&3Jj0tQ{OGQq|?+2|$Co*2@+sShSx+kjd2dGno(gKr$Jaf+sJ&*g$US4pN0!I3v4LU{8uv?cEHJ z2n5+_&5~t_Up8_tn&G)8&@F7xix}NL4y`IW8#Y?E8L8vw+&sbKBfngB|4v`%1?$vp z!Y4?Jvsp6$vc-LZv-q6VP!V$#M}|Jk1b=UtN~zErtZ-ZJX0jSjquE0tz(vM8cZ6+)44Eg?rgkN1fFW9H2RoM_t1_wQQFJrS-(k27pu&(ZHrjQVI zMS8>sn65l|gRMt}kzsvJdfig~Z1gpmyB=TEV)Qy!JM>mmY<8rt%c8m0Uc1~pw@KS? znUWdcl9Vyq7VsQ?^t(+MoRQsUn~r;#ALFnKrw#zBOc}D>lf%KZDSqq$N2&{70nHz^ zfy_5)O)kmZQ2O*cgRchmyuqYBzEQ^+yh(MpG3OcyNDihgzycH97G25#^bR3DEe!3F zexX|1aK`mFtMH>E3io#PBrwQ%j_;^-M`gkNuXJ9+PXn%O=I+bSTbzx z;Ynj)xW}f8aD^x&UR)f4GbKJjEmCHQ&)H_;o;^1`dAe?DVD)HQ@`0JCUPF;0(LF*+ zQjv7lwKMT#qH82E;I=2mn|?MB8o0U#eQse&XsRt4XFW8ncS@BNpAmL7H*Nn4SmQ*o zDj^dy19p3MfyjGis2oOPcV}hf$1e19_+p(*D}3}13>8a)+{gfEkX|c8W}I5-RF9u^ zWTF=heO3$9Ls;&KKt_w6rQQZetWe;z+iL{q8i7Wt(zV|KJ;5bFFDCZnXa$51rz*d} zUQ8N?1l|F{TrD6zPSb7Tc|CwIL{mvpuQ zvgOleK&629ZS3B&x8TQ{uC1QyO(VRVPcSwEU+?+37#E#6wjV{DEZi&jbTtKj@cYwd z;8P>PUCh^__W>k-xGL0V)-bpCj_pq| zp&b99BAxYE=ZI9nJsWL6(a%?6aVh!H4jK({$z1gKyqt9?+fp_UF} zaA4qz1ZJWy_?RZYYG(`J{}IWkPzYC}UI*T>&&z9LR?u;xEZz$DBvg^d7?Rpk9gRG> z^4{oV#qYZR;azESw;V}5+ard2Y_bljJl;0be~QlE3_MO)wnf{7Qqi%b@?M)QOAbAo z^sv8nP^52N!hiAt=ZESI39mpMs1CZZ!~yQ+jiXg~CCWk;o_f27_tGup7Mt_`)#y6fKE7E$QKwo6TqY z(+FhV!rN#sS?ArK!@Co+G4CI5*$j;JSQhAnR<;^_0rvh;)^w>Am0&ldd{2wL<6RhF zCznwh6cmmKu*^&sWO_`B6X4_&&Zf2A}jvd;ujz|5HlrSJXcG z@uOOy@v{?ySu#RS_#!ib#AU;}*|S?}m|Q_DqphPpIPZRp7wq!oc&pC6kA-T7GQ{1| zf)`TAl}S^#kxIo&E5Ziki~x%tGX=UhPm?7o6g*- ztqCUDn&EgzC~c0Q!|}@Wr!CyNqm}$7Up0jn7GH`VWL}4UkP0l5cIAqo$9Eo6 zeuA6WSM)8DRe!akB_6CVF(HI&D=cuocVsRY-D7IO+!7anEK;F6BPnS_HQM-ZZG;zc zg$*3PV2@!Gkg)e6bK+^8cTz{-59T zRQ$WdDq%Jyl9MAYj_O|mlTxm19H`OugEqzGurl<0jt@*9V?UzJ?2<|Qc@~>>QASE3 zyXV=Xr4)(nFFhLXW;XRJT+Kdo%e~Z~ryGWIKvXHsf==*y;0C3EOs~|3VTN@_XSq^M!C9H1?6BGL@|rLSUF zkS&J-F1zK6O}7qu*|MQuvw^~Xgl5>y2xIqpZzRvS9-{AXF8h7!-UDg{fn5Ww_-H|T zB@8&qcw#9T5u2Tanuz8KS$>DN)e7hz;+EdIL22O>P8#Ajq7Z+LUwnSc6-xfIz` z*gs=M$fS&OeScxvUI7-;sxDGlJ1^)l-NQ?*gcxKo1uIvJekv_UclSaJ&*i&!;d95w zlgo|y*JwnYX2Q>ZbnBste^0?n&aM~`9_hswj*9r<#$HL=QT(*c@J-9Ox)}jV*m8|e zOX4$;p?;)mgvf9stF0Ig`}h7FHgz{LX?ajK3vqSTeh7XiH3-`N?dX0-Y%x1Sym$7G zi}wOHTsfUlm*ZJgSMsLQ-UM+O)C7LIlIBoZk?w=OyJn76a@R)W$)N>eGh_*q=UMHr zL$JhJ*BK=@K#Qln&{=VOz3Z#YGpIM#xVrNCZbZ$o*odKcay0vNPsNK*jx=RD zGP9iD+cVMH;0o8gHj8vLN)7I1)sYlA8P4q}d<3oslHD0Ghu1E#cJ?k5ZYI3AZ|P@_ zDHO}?R-W*`_FA7Dm{+iJt+-nsiEi~e<}uj*#p1$*-=EHNRCbrLqw_bFO2g%xaYn91 z-E;2W$6|r)YY!6h>)ktdKnzhot2-yiXH!fHvxY#MQXvD%7RKV(XAwHwGIT27z+ z#b$?+3;R(j=c>IMJnA$r6Cxue?`&25HOOr-+n4n-cIM7-df}pQP#iCR!_v&4@=El% zT7q6C(!2$p+agq^=&Khz?>x9a*GrjIGt2w3h6FpV4e3~9G>KooT8t0$rNBYmS$n~! zfC^r_G}|$I7^TB40+Euo@4)y_P1UU*0&NPLC_&c)pa-i#C!zKH+iM@LrXUIT$~8|_ zg>?P+If13|)#K$t4<9+vzLNaaePBl`@`3S`8t#MyKj8-op6Wk6^=xvR!zUnSt|2g> zVRy9}*n14WL(6!tgjV#;sl3bp@IY@nyT)2g*YF{>b4PQ_$z~i?C2+V#SvxeaTqWz; z?idff5k~tg-}IWJ66FaXG*-FtdSkLm@%v$9Me_1Lqtt-S?6d$7WUDU|VufMCc8LO^n%2d-_t|)2u0X9IA*j?pBBdM3028kvHYlSu6AHROF4WU)iTg zHWPp!OzpoI#PDVX-a<*8@!hOw+_+6#(+adpVz=y~>@&3um#DlPXcDlymJG8NPB0@M zb$-~)6*QY)OO&oKBOBA-=A;?rUwLWEk8qu-nlVL|8x>5owNN+Ag52vusa8A<%S>7* zw{}d;@)yt1BBUI7Mhl~(#sozA_1A<0f>;DEVy6=%f_g-?Zw0)~e&ka+A?SSPO+y*{ zq(iUIx@RQO>t#sz`FfMSSch$F3n|BGroOL!4Fzeq0GxA8R5C%&^@>)dL5@fE%O#CB zSk;in*TCz~an}+SXn%1A95j`Q1-l|MEcuYlV3R6`jHc2xN7>RBi=A#e1fC`vQxE`1 zWcu%D$X1Yir=Du}UFZqjN*9uQ(^lp7JC;1}hkK?^Zc3hyyRDu2ygBn>Gxc0-_Nb^> z&GNX^e7xdu%ojz*|D(r#!yJPE%c#8*4&m$E>% zV_G11*XN$3W7Ci%i+ElQ5vdo?>UHXmo%uwK4f1-a(R1>o5E3QL-u)G5Ryy-ojE81z zH5fT?=ay#cmtXx2pzS0!F^5JE>_iXSpcE1zHVEIM`W>Gx{-rp+6VQt2F=g8ioGWXe z`y)roywT3@jl?XFPPWpEE}TCI*_CZc9RitH5}+mA)Q&r32G}h> zST8V?pTm~d-k{7=j!9~AUkRrQnB&_clu~;xG7lj4mFN%{x$=whR^1m38eB4>bsJ@g zOYzo#PZ3&15Y`rntPMH|w72HO=^d7E`YfDjLbHOM3_E066;(qfzqow;Hp%)F(Bk1C zKGdsw zL3Q6Cixdu;0i(j}l1}=m7XBxqU&1}-4xt-k#Y?z2c!mp?>54jkE@oz1se{q0Ry)?+hihv%@N06mt2tFqe}4WCA#yyqk8g{#*N6ctk)e`z*JAehz6Z!TF4iidnUH3Kl+aeC)eEm28E^)NHC-pMr@W zBTkri0(4i%<13h4D7$i$tTK@CSkuDl6IL<45H5=G= zGuE*aG7ZKPsQ9 zHsRMKv;{Lr%hQFPX~$2C;2TFLvzm=&?_K1Q$VTog-O=p+RzDiGLnwY!39uDMQKMx& z?6{b^q9Vm*g+`A608e+a%(vtjkOq7+FbF=VMt8BM1dW7xnt1NnF(Ydjm%D?^Xpi2H zM$WKkrY~$d+C3+H9_Cd=3f&2|fLPq4%*olK8vya&71=P``)F(HtFz(nfqfmaFJl(+ z1W?9GW0A`g2MDNi#XmNs*6jG+#ef#CoRn}eL~I$^wap6FZpRB-_oqd9)wUK(n&|3) z8@^&(#kHt>2G7pdjjnGe_dl5FuLcJ_Fybd>iTQrcPsMbi3T&+a>5u*$o_&n}=i*w> z5Surpa4H{(=2Phgr?)Zax@cug`kg+#@QRnX2a)=z5KJip-6J2AyQ2byxX*|1@x^Qtk5je=50i$y@ z0FZ4_qe%f27#v@qhPJ}lTw($IQrN?7=-^b<^W+c-e_yC$we+2aGl3JRkT4sF+ohTT z-*!WqcvEQ@0mL%!h{DiMwa*DxQ-VmF4c0*W&3%Z;%s?jl!39$j5`aOVKR~L6GUlP4 zr5+pC0^Bjvb&l3R|2Q)Yy_Yt@_BxzVxM@nvquH;2t&RnvNqOj}-3=JC{H{}*2>C~F z6Sy$OcZM(*D9e-*<3I8O9uw`IyMNBi3>zHW7Lfhph_0^J8;x-VWL1xaRu3nR1JEC7 z3ZkwWFZzAcSt%7=S)3SR>m~z{@se2J_eD#!c=QQnl+rk054@TwPrKr&=|<)ZaiKUr2Lz-CPabT zDtI{(V{1+>b=B(yxTVR9)alvV*>Tx#dM190y<(xj$$D=}xEga#Qbu1H!K>R*?SFk7 z;q-K;PdIbEf0xEHJzc8KUR$j=A1lZ#ckzmv)JGV=OLu+u0?H_V_~VuOoYhLbWFoHU zt3;>c8spmtU0tLSq(xq9gwbjeUqwx_oRbO)BiW!oQj88h9G!twQ`6GYS_5Uy!beBE zrlb5q4+q&M&IFmuzoha*PCk67*fl67Wf2;_?g>5l%I;k9D@KO+B*Up$59OoBu)!Gm zE`pI=;P~*yrVZ6T&>wUg#}a5$6c)Y}!rE!ief4w|C3|V2TL@pA=Aff$d-8?NU*&%L zXmtc>c!}ur@}{M~&-El1U9?N|Ykl$Elk9FdjLb1&Ilg>`0(a-?m+)Bt>3b*w(%?s= zYl6IIDty|g5Uo8xvRPk|Az9hZv*SX64{mEU>P74d+NML7bDw#uOWJ*-r#RtlQ^IUU zJ{yv^3cHmzkI&UOT@@Km1&({qqCJnNCwR~aiBF(6KUm=R~0QPPS)KOJarQK~N5Bq2l7>dI^R(clD;cA{mU$TS+;DE8) zXGVUx7yeEqBRo$z`o_IxugysD8O_mN5^iNMMg}e?20Qp6*SkqVL7rc021Mp0XE_DS z$jR`dj?6xN{K~W=Br)q~iFc$(YYEP!2Ar&ifI?GX(-su-dMaU6B76MEggFWzK>>O# z8su#t>w?DkneHEjdoj170bM57($)r7l)Oix#bVWL*`@mD_n2h7w*>t1{`eO(4Y0=L zq?+|68CuMH-a#ofyB1J_(KKfrMpht^{({(aINIcE_LOs#gCV6xpL_PX?;o6Hv2ZW@(Iv*i6ma7-)1g zR`8hxrBU%?-!>f=M<0(mH*SXEF&I3?C5*`4yy-eHQ9knQ8&@N=h0ePPChJ>;Z%Frq zKlHXRF8P6cVh&SmKdXHm?m5=(Je3=ELDoWH=%|{nJ6Qr{rqP;sK1pkzs&6h_7u;Yf zZ@QPrK2AGDNpoY-CTu?$qR&tC_4PU|5Gz!B{rWvSp2jmzN1V~fpgUCm@fuX$j%x4v zO7g3DPdW5iu zly8|JtS8SlM#ydY-s6XyaKaE1P0u@6Ym09VB{}`+BeT&RANFvUn4Nd;==w?geV8{d zhAnU^x7^)75$?{3c~hhD=sEYnhr6X_ z$9%t)=Kt!#c=Xe^>Y0F`2;6-|bNG#~feDOQmi>J%g2PV*AR!w$;2bNOlq#U>CA2y` zs4B|{Pai-eB-iEf<9$O_qmY*2+1lH9DO3QHCp-DNAJ;o->}kk&vh< zM!4xa|LezMFG@dW`ENDt0=cKM(`EH!f>J&ur6K$|JjM@l*zP(|06YUJWxKPjYHz z2OsltpC!ZYmV?W@3d*@gS_F;iRf;q-Zuh?iJc?q)4K@Bt3*evSKb{wzuEtj>QbqR< z91jhpVQ&|0(z#W{Q5l|Fv%{SH-*%24Cb+A>#8PY8(Qq=@Up?1m##kUYFe)pnq!C8I z%H9OhHCP#{vL^rpQ@*UxS?f;_8fn(kS{pBo@YtMHy)3TG`s)Y_GEd5IOq#PxnzK)m zK&lg5Y^E#`)erltP5P^Ox(t{}?Dj!NGsTFvM5OkFSBe>)^3mT@<)$@?@`qMcCe}6RV$r!*Bq1$5-J7s7BEUrh znYBDi5;D@soU3+*OF8G}i_?cj|GqR9C6Q3UhHCUpZy^NUOyRh9K>l6o&MhW3y0v0o z#E_VYrZ;R}=12AxWzjsco`ZTNjB2r26eaJUF2`EmX1!bThUS5T^T*YzpIog76<}*; zT|OXG{XJ#y@|C_hl2cIRWXBfOT>>DV=|gQtmRfL!z~D94%E{(v_L-goG0l(Q;FpHF zo!@|#B9_420-*D7n8Q;`d-z1$>}e33FgdEMmaNI1yB<*EktjfRNX^J{dK3h#JfLjz z8TyMWK*=E3xBPG>2fW9YyDpYR%7YHjpo0K@iYsU+jmgG+8Lzys(UjdJo9r@6y>hI0 zNS)>;EP6s=?~N1+kQTcWzTXT#XIwzEJ999!k7*2=AzNe+y6gJul=Z^j>3%hE_4RZryl5CX#81S~vcro*>kIep-NOg0dC7XLnh(ZvX!%0r+}&4u6Zx&D zs;#TRBrpH17@76E{B+zf8f#ENg|-QiCW!T@Y!#yVOHg{TGP{z?EYEl!J?^cUE_o zg9(?mM-SXFZMV6iWiB6R3>r3Nzw>d&PSEFg`|#I3)7tn8pvvMn0?*EVZdm_J!aI(8 zGW&D5HNe)IRG8!+u}FT{A2#=Q=EUb8d`{4KmE8FK*s42Q{*vKO*Ow;e5J86>mknUQ zWwF_8JV~#_2|P;I8to1Z;$F>4awyK7orTHC$vi zV}E!zr-w;t=9fXuN}nl{KC(hi%H${1)tU-WC2uYRBnvkhgg<8j1=xogo?fjXHQ8cr>m@QN~jY$ zi%w4GTg*yxSe!0M<2zVMuAzkqvBYG%l;wEL&9j&ZCf`xlTh9*Fotm&eObFr40{1VC z^v^V^GMT!4pNfkedY&mVDqfZuo5Bqwu14vdV;xY}0xGn-n;RgmV?%@Qani|pATs}o z$u%?WaxLRlI75cMb}*4QE69WvhuaKHWL{vHCaOjMZ13hw;k3-WIW#Aydb4(vuh9?O zC^Qn!V>4)QQ?}pIyq4`jcFflY`B{z}V`kQww|Cy|XmgJk7WVkY51vcIDyfxBVez09I5ui0l$7b5|FW0+xTIp_h z{n^8Llo+IyOlc5?4xo`gY)U72AiO##g|fFa@T^y)+&4YO7+7dFEz9LNt-{837@;g66;+a$(#I{I*l3o5?4sSgw?y4Lf8@c@CSwG6NSgv;mm8F9iW?UQvK!JEHqt?q5qcnzTUaMnXjEFTZ64=gZ#!XmRiJvz}fVU`N!r)ATlv z4=8ATbo=hj9)0Hs2JUhp`t{JV)grC=F4rTMduPT=+5$?iih7Q$SwvJilZ1S2=OcpG zczg)Qp9 zRBaDuh)dwnup+mzDy9@3ugkYt36FtGt(# zOyI~L^?Q1`4e6rVb)V|VZL@L2Uz@BdsDbmlAni10;b{kIungDsh`MkOP+#EM!qyDR z^NZOaK0FxIXaNY-OF;SHO`gT^=1fC6oJlrR! zV7pPjHpn6zU(l!PJIbY#l7*XBuJ*)e&J?PzFo=2~R3NQEH#vfR;SCieip9BF)#@NI zFQ15FpOd8G0IbX$@0=7eam7=AqN#Wzph0WD0Em%-M8uTc+}se_Ixh?CN4MQ|&7$G8 zi+{CkAARCPch}%*S_x&p<@^_fZ&8|ewSKgjl`4~WnN2sA8@f5syX$r5kZY)E|0%oc zH?}v}s;xYSp1>!D16y WVbqQsxO@SKOEL}cj>GKGmqm^Qf>+w z{bCcgmV@;hJqX0!^tyV^-;m0beHHoKv{X$$zmT-l?_A(i0g>E_1&mdo?Y{=t`&3cUH@&dH&64)YO&DSOB=eWj%dPz?*AGUPU_xeMp135 z<+mMmWr4X$b<68E)tYU7UrJ%z*Od5$DQ4wy6J8p3TKc-;-wci&9_7+BVcX<$Oc zEK=j?>3lFP>*xCQJk0|qjIe$44@nz2UYO|@nsR`cf|j#Q*S>N^@wbDr;+Fuq!GZDm zm>w-H&U&Tdm?xIw8_Yys|L*Bw!qPd8XMc78D>~m_lrLVzoY0WsFoThDsVhyp^zyA- zQg+2mgxzuWbHBG-^l}7wXnHI0b=LU=h-veS$YBM`Qh`zU#A(5noOUsQ~Ti zcpjiE#bE2bb{S!I_%hju?Y#%7jV4`nH@sH=V3*(-ztDBoL?zuv&dVW|RzDka3 z^Se2{)%hu8Yr65lXHy6BIn)3f!`ThWVE>G3tGVU8(;lu(ztwQi$C8GwEO)2OASOHH z=+z8vy{xI98il8=I%Cqo_p}SUfy{>kd3)+1xK7W73~x|^^NbgjhCA2jVEWDemub_(Kp5sbcXk`6 z(G4qe*1)M`hsujMWMOwdX0y?Gyd%0!6~PY_|mXkPo=ZRb)m)O@u) z#tUow1gPw~X~~~Jub@{V%5UGguPT)OwmyRc($-ReFdLV5%7dM6k6=g&1V`gF&R^)< zl?lLiOet#JzxFrUYouqr;?G~AOerLS1uURkTwtpt8%x|+6YGfsW3J|TH~fK6Gaqd zR`SNlM-e8q_uKWUE+vib7=FhiMiz-YXv^bVVD@;vGzUbw-KOn}dZ3;dzSa{UflmTt zPM#hTux4wXRTTevY)aDIkV$Z)l}w|BMO`j4Q=5lJ`c3S6dZIK$nu_Urn%@Q3!Q;BFCU=yIio;#nZGbM!6)(`= zO^b<+S;yZ|HG`~NfMm=CL-|GXq!v-fOco(mtM}9r{%dp8cp&IH+%Xwkg<=v!fro(o)K}>V7BGN+Eo!&U$yOq6F=DUj+X@E(e zTy4{@)a+v~0ONe#;e9k?FXqx(Bf0NEiZi56+}+kFvZ;MjWpiEI+;s5QY=^JgN%~=65%Y|* zR#1`^>J#NA+1D#@$74o+#(xtaB?zft=_Zq5Un_o=ms^2dB8`HwznmXo(Mw}&8HBzA zZ@VRDWeVhxq})og|J?fwg7@}3ngG|;Rih=T9ZOd+CPP#-t^oQKwJbJ|+9vNxJr&!ySwe?hc)%(suwX_Vi|vPjL&EWS_htLYS_OPs;tKshO0!? ziISo-{GZ(vSKkX*rzwzhHdK50`gJ~9M@EBk%&2}V_Wk=u{wyEE!}{Eh5g8{{PWKqH z8E*`Ag>RO6L|Ez*$$KHywu7cUgTe^|8Pn-(>U<}6KVFkGD(0c-ud4(S0_l`QL5*}C z^%=?XzOpea-&k_dkCvGppKI1P5aQUU{snEu?Q>9%3$**qn+fAc^F0pt*=BqSL40(r zY2D6L_Wb$t)`Et`MAivZa}a!$9o7#B-bU?cO2l+TDi?LtoG3tBckYb5edQG>hp&<> zZ|6+$wjk!-b~jnoP(jDPAbFLS9Ql3I;K(7pL{Z@U0)zuoxCZ+{S021#2_S?aJ;O-> z6K${JKu&yp&1yIqmuAtV8q9X!S$T76nx&AFv+i(0%iJ3bK_|7L6vsN*x{b#jU5c{a zeU8(Acq)yU&2b*!b3HZh}3aB38Js(eC8eL*A$ z=pqcB;9_B*`bT;_aNz@YCF)AsLo3=}JiNT>CHLO5tr~NW34Zt4kvcfgKT|aWKDV`cG& znT# z&Wu%vP%vS7W8pjJ3iR9uYPw`vSy`#(DMcaTK_lza79E~J7)x{0V6@PHL?XHd5gRxp z#yso%Xdb-ny$Fl>-jPLP5^IXgF{yn!GfO`AD%(5Q{~p7vvPp%HY;c{jK0NtkjFxB< z)E`;#Af$J9;1giMJ{EMQbxKOsD2pPV%ii7|@9lXEa)`e8Z{v2?=q5Z{!3#U@%2sdp z#euGf`m#>2^*+X~Q~k!yg5Sb^8|M4@?ZAsC1j^$KkA*fx_1Svoqex$*in%FZS@j)& zR?|uriY4vP6uUWK?-%3YuyAw?+oWE!D0BYWI;?F&n$D`II00gkJiu zl2v;13sit@j+KqgbZZ7c3?1gB9J=)!dDhr`9)cGGZpYNL)#s+%B*(cn$L-eOVmV=eI<>X}s-D+&Ud8 zf*-*5efjE@6(x@KD>62jv=ZGLwlense3p+ZTR1!XJkK>dit;RQ(<|LF7ATeHzxjcz@Xj@5a!yYhAi^|XG4#MVPKKPNNW zi2Nz2cjKFIO?68}x6$NyXG_b$<8v0IFRYN^jV8!#OG9>zxggMyii$90WvyhGwI@hHmIv;`$^2 z=@ynGLd4Q>`!Yy8N#UkGpuq}C_QXihXo*{f&=3+AY4+*U2SN|%u3Wv^t3B9XO~}Gy z{Xd_A#z#^=STB8}J}qwUeHQ4EfLChwXE+z585+1+WiM0Uln}m~CE_j#RzX$k!%cvrOV|DfgQ-jW+edw_eK{K&*^5}) zM>FkOH?E0CeJNp{1Te2!(HQ5EE^A;J=uw?6gTem3WXeKl3;D;^P(>Y~xvYZ2QVyTq z79e-Ufac!{ZU1$qj`POa?lSrlie`+>0Fb=@;HPY@K3KUu>CY+e(EM(`EYUu)lI6;k z=?xRdcwuKN(dC^j;K4v>;!-_T0GYZw_wT(hOa-J$mgYd-*`er+v&X%(0r=&YQ%#OB z;!bx*>fDvr=F7#Hv-v;u?w-35Od-S#iKuAbLdi2F33m&}0?wbRH3>Y*+0PDcF{Lm{ zS)q5Ts{gW)!6>1M6Ere+e42CMq(N00AQtvGjT;VxS+7xv4c7xPlJDi}>U zTR@?&vR97pZx9|UDt-ZiUVdq5X(eUu9;+UsGyz|oxqO|xVBd#rCe4JYW{Q{E>1qI7 z6Q5kHNuXIiZkLJq%iA+J?cxw4uEM>2dQd)ri-yrA-E89gNQE;$a_Iw>DFyUf_{<;w z!B(+esnGddz-S-I{yzLVn}iFuDi7zE*w~8+JMPY_e`%awR8?^I3JVL1bLHz_S*9nOYPG-!8(B6cR(h4GtCDH@$@pG){OMzw4La#8M&{OdD(d-o2+ z4f%xefi;!-pvhXVxTlURR9G(F6d6qXuPN}aIp*BeA>$`Dy3sd!0hPM6* zzI^kf&`6W~Vso!YQr2`_R&Vii=>D#|e?&9XP6(#YSWd*YfbL>BD;%y&{3S@C@a-L4 zAI5{-Ytf%zs}k&c)<8{-N!4*)z@v)G?`zx9CGerN&%dQBe=W{nKknyN)t8&3edFSj zUEMCDvx+YVG_JmRDD&vA<3F2^idEd{=124z_eWLLxY@g^th`cj;XM4NCMM+og#CSM zz6O6YtId} zjA`m%qKqeW(%}F7G5^;_l5I*m25@ubp-&&{dW*+d#&`gXd_% zPC0GQOV1iBn0oipJA(SoE{kcHn<6w^Sd+BWo|Wm4OxV1_+X9>)&}~ruCyV3nNBYK! z?)27~;a|TRPc?wrGrx?DU9q3euVRV)b6zXs*A`d*-1JIbwoxs4vbpG!rXt*y0Y*ym znyz5Fx|`c#$!wHNvkL{z6F*k9_-|9?^l$h-rj$bKrEfaHzB>Jq%>In|oV9JCXiUFn zXYlFIcxs;Bt!wU$oj&ZTVTrhe){~r10t)8x)pXkb!5;j-msp8b_5>Arf2gLKQPt0{ zP{$59Qu9+pa!dKJhccreCpH}ApU)xq!MVnFEfs%`?d(`$YN~TXY|;ecDA3_@MEN*M z=VA&3zLLasn)d$hH~;tltw$=9UqJ9@!bbv_fEEpx_qfxE#31c4tMRu0i-PZ2;|KGC zi=A~@7Vmc>0BuBBzCPKU5kUD+onRsKe-GDx8@>PbQem8KY#Pv@@YB?+*pQ92=p{vR z^W0H0bcp0mg?Z)NA`ZQs`M~+t`2W1S@MEWU@2MC6W+wP-Jb*85crRv+2}oQTp`LD^;}%vyIDU;lH{m&Z4=RMVSBdkp_~1 z!?O-x^Z6I~wpMDo6Srhb#?ji^X=;m|s$O!23i8t)4Vr&N9?FI9Kv%FVZ${nSam(hq zUfCrr_OWpzPe#(Da8lL}r~!82{hPyQ|3~*$qgn?Z9W&5+#NB>&g^re15V&#FvpS-c z{n)p?g%-2ei>+oM7Nm@t zIyjB?NYaJ50bcl9K>k>%ak>{4_rJ_fEf&zydkc&JbAzSoe>}~BMG$;&ZVZs0<$zM( zka&<~O=f0hrV?lUaK2wEX=zDmq&svKU{$W(yOd41d+jttidH$p5{nE$qS$rQ}K+a$0@J+odg2A~w2 zR?^R(XH^{%xK#_YQew?1_vkgo&FK{{hhE5iefpfgc~h{)B7>fy?YUWFh3^U^98ute zh0SQ6p->VIW8HGa`Bmc1aT{5PH+QJcA@nZCcKnYvAU?t8!W1~ooT1QL?rs`&3tt0z z`uU5mZJxKfj4;*Ej%EJWntMJ9>_Lu) z7b?MjEra!9U{Cb(do3;SKUfyo4CjH7e9ZgFcZu<%R_Za<0BePxv-1^Yh zRGROvTRRxPYYT|VqVW1_od2WuxcC6DQ$uj>B>(aMQTEnxQMOI{xT1i9f{Iv37)VMZ z(kKE-DjibNAs{I&Dk5MI(j`)oi!>~-7<6}oO0$Hpgv9RtW(B{W-+RCBb3gi@E3nsf zo!6W*bIu%d9D8=wMD-oz91aNI{iE9yu+9@z?^ymSPK6zS zML4Fe9r)$e8e#}#DC;ayz79ozGJ z;~a%A<qzp=CV28Lh1_uhiA0G=d$ zP~jt7X;l@rk*Mg0vnT#y``-40d+$vQt+;}!bLK4f#kyf`M*>7c5R;_X%$^?tY_M-jl4q zv*Qth_g+tn+_#ffWWsxwzB@?UcK5WnvZ~_iTLUaDYI`{6+gMcC?XFd0H z*RoDSUe^npM%lLiHv%(gBuJ=&!Z}Qzb8Mu8+d$j@c&ZUoE z0w3)=V`3-EZh zLxQY-V~ka|;QpB|NA9{UpR;gh2~Q#&|0XkI2qL6nTK9~YcFnEsCfu3y^{gG=_=lUf z?mm1;>1BER?!w-Ik5Vh290vYoVa1NY?Z4(Ak-YZns&Bc#PB`T?LLGKK|JkjOrGf`~ z>^gjke`h5(2n$__x^d`V8*2Xu?aV7+DCAhi$^AQtRUHjHkLy^=oxeJYDuQnpxA=H` z_mz&ng*!X@C7JC{_Vw=#=EF|dEXQbscYE#<(r{SDJwo?re(TlJ38P8~%Bh z#)L%(@9R6b84qBlKzo2{9hk^9)r8DG|&@}ez~)-m9WqWi=0PyBh;T63Rwjh%5`sl z!D~B)zD&pzM@k%2|H|$|;r^AhxqiDcL1yreq}(%P|JQy0r~hM+gD=VdQlq&0+JXos z{NkzR&PjGR8vXN?{xdHM8iX%#(22zDyi?|P@G)@OcdFno-m#AdAq3Lv9(wk8XJHM& zPV^&lE=}!x{78@?hAZzrwvZ)@yU-7!3_ubpSP6q{fRoJDsZA5?~4^ zgUGA7{^ky^v)_Kd4?`nh^9GncR0*tR{{H?gCRw)=AI}XG%E^YYPOuxjn?5KXd-V<= z*%o6iQ;fnY z=($dWy+vzAdiub8tjYoq!m~R$ZvTA1a_8NR@U`YSMexmRHERT)Gzl-ktR*n}FW$l4 z{q}lq+ejvL3e3NybLdyNIk|4mq$!-*BGAD&vxe)xvE4B(k5<{#2JN6p#-iwp}X=nW( zo`BhdXoH6f1tsuXH*azcz$*)fK61)+qksl`HBkqU(1p1`5)rGN!#6 zBRF9~vg~2F)pI0hBVD?=ILgGxC^gv>WyIP6VoMscUFjFh;+)j$SXThb9s<7;g@b>v z0%t#OC&-Qj)rT@QE{)Yr1hC}5K>qw?oH7WJ%1GvT=U zS3anG2?mBh5H}5D(`e`cvLoI(Vz4Oty=`uI)Q8RW(L$>TL7DYsPSmzC4wynq zPo55pj67wg6~$wc46HmYpOWk=?o{lw!=bD6F&4yWk5l4MFE5V<+z1xzpm9>kmWJ67VLbvb2E*WCh); zbj7;wD|H3)X;4OWf+(jnB^xOFbDpW8hLecdBG7O5D40$OF@StyPKs(9c$HoY5!j znwZw<%nI8QCD9fFM_K5C-7t=aok#YU#3OJ|D2HSPk#W(H+d!ruR5? z#X3@CwM!*_V(o?ARmFWQI^9@2Gv*hRE4@>gDXfKcu! zA(#8BjiwkUQF*AT*u4Fds(bNizP{|RcF970Z*2F$Yi5_YP%sUv;xER{&8^y-O0-e( zVC9y15XCg{ngGjWd=)n7 zwHPZ*%-=+Pu&oE0-X?x>5J6j11qurh_t|~N-E`&NFn`l(IrJ;*u>;HbsH^#RNzd$l zvKt0s7bp&Ep)f7NrujGbVPG2_$-YX*!EQ^x-fxx=#+2aE(X_b_vvviiblAkoY1?MF z%wPRO>wAk+?lmt?x0vFp%Ja{Ec{(T|umQuNDM|@q=ZX|{vc_}@EyL2gawxQDt@@s9 zm-2K}ZN~~aBs)yC1Us@2v^Wl(;j6RGG{m#2Wh4(OwYGb{EF`KRK^OXxH7$-*=(K=f zWUJ&Eg=oHDV)7F=CmO;!X`c}P+9e+$IU{v3{9TRvBd1rDPQ}ax; z#fC0)72Vfq5BNGziO0FC6mh@!%>_UZCoz9CG)xCv2Pz7c@XQ^l3N)q54QwW#o*rzu zq&=Z4xehaPC$mp@1FMc8`pgw;<_8ZRn2Oys(AzhAMrgFs6G;#kfxkcycH{I)5|y?n zb~Y0czJZx)9L22(^yeBr$sft_$RU4BJb8itrmXBSQKiFP$U%b4_C8XIHw6E-G_wSr z$3Fj-bIq)n^!pijLuIH<^GNK7`(hX{H`K%ilFZm5=dUQOgYH07=}4K~=E_WJ2+Dn5 z;r+I^K3{5U8&#A<839mg>b`Yk`scBO~P$BSq|EV1I_yNR8Qu@&MuV=iX*^${=X!I(7F8|M?6 zRKP6FYv`<7WK>I$u9g|@Ug!Zq4H#9u&9o4D6(Hfq(y|R4h3BZ*)Zg6paXsvazB=4Y z@3yYt2)S<)gfS%{7 znu`DrC>CR`5Vc<4@xwfXbM(2RG=F4fzjL?oJPDrzO`u+kjX#o45?y$&9BJ`>z~(gZ zB>&Jzse_rj1ExPwl9WZSyd+=#N#6q5JZy|Ga?Wxta|2wWA6*Cx*tPd-Fcm1f`X6Hy zY0S_-a45uFaS8`HLb;wCU33$Hlx?}s=yjB<_ajkW5^%wxK!`Imqro(1iH`U6ZZUSr z`nb8m2|{mr>k9Jne3abg5?&BOO-Dx4TGCQeWmQ$*9PIAR(&}_*o&$pKRrL+DKAsh# zS<%U;Rs$c`R+63<>La~JL=O`JKLZ9b3|`wBy8o6U|BswQknJ|ZJ`8y2BJ)){7lP7_`Ih5llt!S`~;1$>(9oAJ+1msnjh|&23Puig^^b$ z#G3pl@O&@V(AZublz|bI0KE1_!cJo73Jx<$z=-m%Qq)*#<`AC^;9b|=T$_(!xpmlR zeYTtJd)4+UTH#~>Kt~DH5WKcFt)7lE5eDK?!og=q$_0vWWCYsLxpsDuv(F>hXCp%2 zORL}>3RP%3I>Jr}#Aadm@)I0)%~?O8&2m$_C&6$$^I1cx%58qmq$S9LoZAzpS{}l! z`f$N(`202ZuZ})n-yAqzU$y<1n0y>0)7JM`-QF54peS`UeJLK(Le! z^23sNnF=Wsl3dOoCTyFE!JMLvor zUJs~VIlzsCfVO7jTOfiMUC>&jY#L-UYmPR_n0rkt+@m+{K_~1iFB{Iz0uhf*^*sZ+ z7ZG9EogwJSwR2|pKsb)Q>?8V%EK`EPg*d?2fFO6LTxxIf+8T&mjDZ?C-b5%gHg@h{ zp+LDUXf)|8Qw zinKwj7tH7i0S`&nq! z8xFFA!q}M`eiXFhg!t9frAkOvj3G!Lqo=o}qpNGt6eT^#O?At_t2vs#BTI`{iE=iL z^1?JfA%ioEYv}_=UYy{$)<1b}&nhiw*YMd4X#)9>RZgM1jPO%>kgRq4Qd84lg{Zi+ zKI)Bf7l7nCU}d`fbN-9dzt*M=j9zju_ct=(kMiTMT?dZ$S8cb%2~UK`aU9LIjK4!8 z?3{HMIK55t!Ex}H=?gG+uB%zs_xN})1wG_r*qkyC!5tNhKNd8u`ho6B zvy0^AwtWGwPOz9_Sx!?iresIxhjAQbq?S#?va~VPeEp;RmSL0(R-jxj7&dN{?BZ$P z$-*<+Fu2ns)dWGorR_+G`mJ@1e1W(PTnFpSL}P@2Ts}=?uOb|SOj=r9PPJP5l2IGO z(dp9u`0fvGLWcro`u`2dO%2zv}RqwVa)SOhD@l2NcB# zY)lEv==nMsU?_WE6sIKx!dR{A81J#SxLG-(8 zn+@uAmG&Fv>FQ$h^Lnnr=U=`09a;Z14EsvGl^_8DXS(~>rxyYZG)Q_)2t^EVOC=hO zATgQ(Q5Kv-(%y%M&AoqoB%SHux9)4pqJdh&8&G=-8WZ_Gv;zL0zuctGHq_^-jfg}#}GiI_n=Z)^&YbqCG_yK%Cji5M1I7O{544Zs; zezXH}Qukm&D#cN;qHLzeG&Fi44h4y1C-IU7LJ#JOu5@?v{Ua7E%QLWO8E?_2=dp3C zg{UY}B&jYjvskH#^tK+Wz9Flbz^(p#koBoz3AAdT(|41FL zk1E^i3c+(~Mf1fS1KveEjPJ(zGBaE^c+_u-&7YQcr%p)IWTBgvIrWBx{NC@OIh{^=i|o@kW_7Hw%~VBDNn^oTB=Z+U|xf zT*M1?{RQQ(2<#5to3=>B_{*&vi@D@%_b+5uzwd}4%)+36~^koY{lWH)z1qG#b| z*c0z(*;Z;V!dX?BIn5*5UkH38Yqk{+T5FSZMJM`QN|TXG|E?Ot8FVuA+Djr{I!OLUz#7XY7&TbzkR4#opn*`4Xi4zy_Y%B^ZUu4me&n2)#SCaSd*Lx&!l+Zf(g4z?i3(F{lrxYtTo`Fi& zr-SC2UK4A_Zzzs)K;tLh(qDj?YJ0vi`vndrO@l`>;{Sxxo58`@px`ScAq==S|d zpO%4=&{jAn=C{*odCLWz7|te*2nR_9Z!+`Mr3sx**2V;NA=z1NLs#u@RDJh+C-GOK z)h#O3;!a^s__y0G;*JMR^`E;kuaL>7UupcNAjd2$Chd=Q!5*S>@P_;z8FKI5lCcsP&DwS!bYXdz zjuf-0IbTWzW~RfLGIH{&lWS(^$BaS8j`ZgWtF2@c@w45KOFARMa3?p1A?Z&R?`sz{ zzr>w-&hE5Ve9jY0@}{nh;t3xS>OKc91}m4;NN#hxGWiD`%dT$M6x%<()--hWynS6K zG@4sETcj6zyxrl{?t=X5k(P|GdPGj8r`UI%j93O9>|?3=i{n0z*@GhUMWJ05!5OD0D=APy_8}Rw&DI*4Dp0k_&re@R-ML|6Jk~v?p8dwA@8!LGa@- zX*iyShfGBdrt0HdD?Yk@^&uSxE$MIyTA@1NzGNCMb8_fOXz+&vS;Hq^@;J$w+FChl z>nz@c7%IYX^AN3&YLU$_tH8*^q;gP1QO+~IH5KGA*%S8>cV^Dg`6 zW|z~#UWJsYJq$YM@`hX*p>s@m;&qa06z3_8z&rn~!7p*u=l%Q8>hBbEBFs>|A}@B0 z&vGZE44_y&*+)1neSAVXRe$v=Gf^-+<7?G``CVgSP$BXaR{ z@g^IdsZxO_@+`Yq%_8L@IKrT=EQ}#gYC?~uE6TPb**{CKLVHNStfk>@{Ov(P+Ah=1 zltII1{p9(a4)ewN-o1DE%|JxR2(@{qNz}$9O(`hY z#%-W;kcu_0Xwgl)!NNp=S1!uglXXux$P^o_T53dbnpPYb16?4pp0A%@sE;muFlnU7 zmx9()vT_QUVG&1p=Gxpa6Oa&dGo7N*sdaewt;h9>@qf$T6CUBjY~Tcz_i*W6``J3E zmL@~#?NVSHa4TB$=ET{)R^dgr7|#tBw{D6Wrl>f4l|P%pMKT#08fvmO*Z=CV9x0Zt z1mTz85q;Hu{P{K{S1s?zs9EbE4I?}UiqO_Tpg)@LEHpFU&N|OP>ObI6LEsX0UF$a! zZCu`xmxeJwqMuN!UX6W5QU}(C)^cVw&7+D$O}`5?UKlEkIy3oBDzNEgQ_V}7PGU@g z>*Q%dgC-7l{%})kg7_#I%7%MO4WTzYq$wE370luzO1dx4*(~4<%TdOc87jJBDi3`f z$4Z9N?Ju%0C|DAb0VXz%@u^OQLcOF@^BEN1=&ingl;l}Jrkg(a{A@KDL+py7w8uS& zo87pjGF6wO>)vcgdU$$F(4)jD{&3WPfkjj?zO{|^{_f9oBXHH%5>6{25qfSjZ~Wd4 z@4ZiiNz74f;q1-zfc}dJtvA#y2$IgO;r+au!!Sh=EXqbW4K+pcH^RQ~CSVZsd!MxL z_?+A3`^g*(aKI6HP2Z&(5@$!XDa;meGc!=CojwJLH5@oDDJilcU<^IaO>*2TKr?d= zI5E2(Jm=#Y1wrMgEBB-Om29)7nxe*5KHiwz-(#&5T;aD*IrCnDYJH;J%5ZBkRw6TO*A5G$?nj>sm?u?EvFvBI&}O?dlxj zIGPaLuE$57d^-4hbzmC$O_hgGh(`q>hpQAhv^yR7nC*7tJ`AD!urf22ZD*yQ4sAJE zbWiqtzXv|*j)9(%0i54?jA{-!P}$WpM(nRRc$JzdI>KiIbS4^-Z-w4S80u+r2Ub$i zw6hildXj5ay*d0_$0FB{n7Wx>yPLO4dHTG9((B-3f^S}m;W-;W28s*U7U-w zaksA6rgQmX%{w!E!d~0=)amuldakZ3&iQkNwKTF@JrkeS3=!K~N~dbHTRcpeMre>K zVn5xA`{$d}&Pbh&?#Z zEsh+`l82?+m2SFy7rN3_R5)gaJ_?bdFJ`H0?!zq66xYp_2GRyftFLYceo}3vJQ+Y) z+#R8gps%78{&lZ?Q2Lq4nrhyl}AD z%!J;o>ZoECoJ(pI_a{UCntog8F%#waIRRvhLPCWEzy-cJs$@7?q^lsy-Z1MauU(o8 z18Ogw>dAek*r?{#Ak07^Pp2V|-TqSE`TK|5-f^q*Ka1JYHEa>=)BkpT>01vm zI`CgvnD!7!X-JThT(RnD+6#N+2=mQTGjP`sw2m`pLBEM_f#MFK4UoEAqRU@?EA&$8d3WZbFJEp5KKTArD_i{b zAi{b8da3N%1vh~vdJwug+hOv-!;;w<$ zxS8;H=aR*_ky^j5V1mUwl*%? zY5H*+YxCN2jgv3qT;r~~&6UW<^2mgQs*G=1xn0Fqu)lyjpn5_kK?^^hk3(vfE?e}( z2;;a;M6AxTcW0_CmGtrcvcvu@%zw#&ACaxh4{B6`Y{!#C6smVmMO^WLOUWC)gfuQw z&l~Ghx<29_w_!)M>5hXMPn1BPRo{qLV}W}Dh{oRX@F=gRpMEg+VS4c=KX2k?=na^% z6wo(2nzy{WTUs~rY=N#3@<|0fi3kh&#AWg|v4yH_VCuc#GW5VtE5(&KyTco>DWTr= z%trUtBk08>i6@<`H*5BK3i}{U7`YIX29P}tc&ropclM_@kNA1c4`T;A6Z52_X`-Q{ zU%Cds0>0j6Y_9JDLNExLbnW|bnHmV2%GPM6Wm_})Mr^tkG&VM*;DMODx0n0~fTWIc zSBF+friFP7w!rE7ud3Cr`e!Y1(fv`{HgVr{uaml_=(L5- zM~7BtkaNFNRd(xu=0}TGdn7(B!AmG8B;;5DCtA&*MGR;u$uz}pliHL{oG^;N(JzK&ablisRb`1z;pipttvTL&MU)vqCZeiLa5<~E_Wj!Ca4YBi`B&pr z<%LW{OGp(W(pSloeF^&N^al${z6)T?est1YUvnSDEVVdW3DCO~ZGDXyZdslor;3|6 z{qoLh6Z-YnpM&FCmrIgb#Ig~3mZ>u3sLRvbPwsDRBXyqG#I$Q-(Po#$re8?5UYp9# zd+D8!)q-}pj-uJ5t*5-q$j{HU8M;VB|KhY6t|>a7c`i50TVy#)Jm_Itzsp*Ab-WYq zGkTNpEARzwrqc45f>U1B*(`dC~iSr#{B2-NJt;0G^)sG-M#2-Dx$>B zAHrf$SfoSddCxM>c=rWC4Dun?vs_Jr+d%ESQiNx2MxXmyK2g(5QM-}3&DY{lFLF&o zuc=6NgOPlMyD9YL_j<=)+iD+OLUTXnXFIj{VjYz3d1A_;_1m&EhkY`S)-xrpi}RFz zO-Gwcra%IX@-g=OW=D45!?jyHLr~9&<2FydHvEadIpjUU^h}oDp;(2J9CuGWoQyIf z633t2ba~lQb5tG@VYcC)^O&~QpFbZeaYElee%>ke;{fzhh#)E(*U3MNkBJ-ovAEcJ zX2v;s?-|H~r}~7%Rzce2MW?R~y48Q$*)49x{YZ`$W%&M;fs)qI_#RA?yeZSr1H}m; z)+QZ|px*^7LC)KnY)zxkv$=NV9Q^ar8<9Gs1^<}fb zq9j#GKu5gFEh7rOw$SjShq5@8d~4>r0aifQxzOu zum&P0dthiE%s(fN#SYSF3($fRv4;DVcS?oAYSfeYW5%LC8K6;O{cOs3_-6y4IAZ7X zj5Y}nI2?A-TiA0(Wl@`DWE8p`Ch@pJo39hV`?QQu6d*t&{8wL-%!DV*5;tB+tNS>% z*Wxz6c0*V|L;L6TGS9G!bn@Ymk<3xpTXO#XYiqWm5|?#tw9nnDPghQpzO^yCn%c62 z$@t{qm*3F%X_P`XC7{~)yRezi*axP$hK)20wsZb|%Hu!UkJtN?VVv>bGHU&F%oYvNg5p{}k6M z_DWJK(8X7r&z6#^y}4C%IoTHxU6CXD@JBBDwn_S2UlapNy@jplhJyh5<*~37jbid_ zfw!?9fzNVu3WXF;VDsefWfiLr4K0#szAr4XOkTr&@QyTG~c3#$MUl2dMiY4 zCe00m8LX<@wW<6D+)tkz1%JrA<6Ru+iaMkrB{n3Kw(ruvmy80+AE_Ns%KCwP&=y+_ zRgO>Ut3@9RJ{3{R7Mm$_{{%@|K@5ljYx{3yWab>h>Xsw5u37oNe90e35Cl2RWBlCR zsc*)BdNTW-_Pg4`0Lo{>>n{aHfz!Q4`UF}hz4hwYAtqj4i;jyk!^mRqle`G$XheHzf_ABXW|EGfYkS75 zGkKWIhhCZG83Y$XY+=rqTKObf(1K%1X&4kX1TpGCclq$&ZKBV)L9RR;cwG?%E!t)1 z9l7^xT+0R^L+XDZQ+ho< zJ|=E)VbPV@u-gj=^@n#(9ownl`CfZVs1$^RBx@I1wpbxnp4GX@c9YS?mq)c(r6KA& z=)Yi_@se5Ip6@pRa>Jv0YMA~jMIpkybi@ua@{Wo+s`6fH9L;k`x#Ysxg8_nB+lgqI zrmWo@;B*Pn*SAO&2>sPL9R=_Z9~^M=ZO&MgagCzoVQd+GWm-St4;1>N?P5a+kWj50GCU_jU1Sjm*!)h|mDhj(_SadnOOi>Hv ziRae8@-S>pJc#`XURiPk?^dkoTj#(B^g!e%Ui40^i_oOerxIfddMCQflmqIbX<3v% zHMkYqem(3l)y2)z**b3FU^3MrY8v5uU*}fl2{U6;7xcH6J7tN4XNS?ZrpO0U2E8X= zlbdu-&d-Dol5cNa*4jj}>#iXygr?1A046zFSRsNAN8DvXv1{3cJ6&C5*D9G7Xx)rg zudD2t=@xC==G~-pF%m1rwO4P-g!j1>rpQfU#=RRg8f{T2*e8Yvy14O`Gu_+BTP3%l z+A$+^Z|}cW!6kyJ+kX5I$8RV(;M9HZx9{;xR@gV-jPXMbe|+Hb?F-~Zo%_qP12Br} zlH|06oM55h=}Sb#nl>Qs0ZPoH!Ne+%iEeKUVAWa~2W9B(x^7Z=IrvGYpnu7iy$+pCFB3F01f8uS!) zX4ES_kD*)?)2#10LG23t$7V->E=&MEpb)v_Q3++YcT;H!h{O|@yC%ge^#*T}5l6>K zZck7)5%77-)hG2&skom!oK$QTvF1JqhNx}OPM1F{Q_>2wqHtnSi&e;r=C>C*!27{| zLN;{%f$gz59}3T^>liWIV(PO66n0GqQ{k|F)Y8p!NIFWy#QA;qwTSfv*AIL)$7vjO zO-~++Y|T0~xZACfxIyI0%_Z)+={Pe|fpoxaN@lM%T;%;-w8>XYFn1D{%3!;WJ(8(p zI@?*c`FR3*nJM}xulOG&BtNn>Y+N&rk-`R`e5c*7XDgx(?Mp&FYW`zjn6GLfhi7q- zDIXH-yaJQP@Ob)w)k&GmAoOSAP1PT5iISq?+vroP2epwiS4Z6!McsQmW~o}d0Xdvv zKhZE*-$&@oI~*Ej7WQ$iSn7~?n5$m?@WIq%ja~vRlP9ZZYrN<(tgOGfd2!4L^X$tf z!z<^Au%T3`fLQc=K{&tI0B+9VbCN*HX51b33KHkew4zmd64?lj`!&m-Da9k1TAXOG zIg@@x3>^KNHd>S>U9XO29`SfpG+~u3u^npR^Aqi+7EDUgy|#j{VH%vk9CUBaYt^%0 z{ZJ?;gvPLP;cG$upN}B$=IhDefnd3sK zC+DARNMhFPu_DxluA;A2*&k&Mgr9G3KTk_D;c0-wAeb6IB*{3LcOYw}l=G$4cxh+! z;Ye1Pw>{vrl#1+RO}L~h9LGk@_9hg&MP)LZMQlI*!!Ungwh?(NMb^UhLVtk?vMILW zb^(zmCiP{qUfdbwQe~K9SMT(+-K>e;g$S+^wH{cNFx zRXB9{C)a)5p-WNgoe0xF8{L+OGAGN1Zx>`HG9QFC^N7aN^^Sh>{r)){v;JA+-?wHv zb=zn!7~&uall=)s9XySGRu~l=$?JlRRviag?Q<1y9929SbLIZA%XIHIDDE#@e~mlz5-&2Z$Wi{|Isv;J zwk_0L-iY0*^2jwM^F**Puu={-Y#_@WKwjw>1yr*5)p9wnCQdwdIX>+#8vVb-y<#n9Bla*d>) zTY|_pQ?M5lZT3%gGijRRRDCB_e-4D(7B)2L%s=K`7URMPt0{=`3(jx3Ip8ySRw?D( zD83P4+}9RD7nxdnnv<_rliAOQjo>*2ADq_`!`eS~{UP;aUX-)6%{tRNTNicb%$FW0 z#+FO()(@MV&zSb!Mh=65D|xH9OpCeds2*+m)VhjkWBl+|leZtq$iqU+=!LD1kIk(_ z_Su&3=YC?%!-6=2414pLo;#CQWixqRPMQ|#&!1qn_SrCI`|%zjl9w>smBzK2`o*hEU0*mk(Gm&m zGCeQch8aYlP4DO3Viaeue(jewk^U zPTGm7XSEgAGZ9GrZZ#-C_w%dqdK>7C>b|wwrZ-3lpff};C@W8PEFEFpb>#82pAIoVZ3@@H&VlA z<0Sp7Fw?`0b<{37JVx%{5;bSKDj%m8dDSbhU1Skdqocd4lQXR?C3G}WKss#}(Vn1b z8=M>e=>YZqR`av47xE|BLte%dMR9E2?f$G9zP>Kt*`t(af>vZPso?(p4DGh1krty= zShE@Ys^5}2I|Jj%f%?*4LmP?EF3h{zzID$`_aYT&m+{oNJ(3W$?8#!SKo=42M9_>{cbPiJc8=`S?k?mPS6$0VQgm)oGB)c)?-Ktabg9y^Mc znoATsAk6lS(hvD$3@7iqwP#^l8nF2jnTSezKnEi~qrnzQlIG_EOqqwinYb`%i*EZf z#d8II9~+DNS|HA~$eA_0aABxmt9*A7jH0X*74#kInaK~vYD{%pBy{QN+O{7fEl}Ey zcVg6Ky{8lXhB6&I21M!Rx1nMykm%8jFQI>IPUz2Se)weUHH3re@kf#}P~mPbCiYma zXNqqtQy=g3+cS5jRZ4**Hkp8_0H2otT-@fL-e z`Al@CrZ>FZr5}Y5rRU&Fpn<%B!^c%8;TN0~7OC!z&nHl~!Qak8^xBI%r9X5+_pCl0 z60jLEf4q3bI$8Y7?Ak(FsFR*P@nnuc!ksqV_4X?Oq;FMe-!#sxbZfLZy&8JQU<50f zyG8)3w;Lc)Go5)0xybuTN+6RJU2q)>ozvOT;ii*eRUXa?)Q$9!y{yZdWQ7LJc&_K} zG@c<^`Uhb&ig*3HNP^yx2y0Qzd!k2mg{5)q$?-`>nA&)r)PqfVVTp+ zse(n2Jk+#py2;heU{g91?KpUmF)rxxpmSZ%eWbC=C;A>8@mE6BOLE$XJ#(Y$H`c&6E6@yFk|EMOGL>P>m~+4V?S2NALYMV*8A@!6@Zxh0w)@na`8p+4 z9#yRDYkTl+Ok~r zM)gv?dbeY6YFh10|5NKX9ZZE7`57V(XVq`qv$27`p zNY9VhhY6jW3r)OGtZiHQJS3gIAShyFkTrA)_ig_)zkIw7^(5BN6*6ArRR7f&8AT~` zvHFHoM1Syk#;h!lEfQDyNc>8se~9CD33titvW;?S$U0u&Z0r##aNYFYOBELrZjya4 z+pJ!w!hMvK8Bddv`?Bp_xJpU?Q5)W4L|{v3-{uV4!Tso3y|vN$d`81XFCTu=?9Z{+698PdSJXxL z>(ShYx@4AlykyJ?lLGQ9B$;BiR~o*dwxRJ>g9E1-lVomK2k76cGF@*U-n|?9hFz;e z>ja1`Q64m3sP!_h>OWUUm|j#st!2Py)j~1DbR}*voukw0ZSNJl8GcF zNXsxuC+OEsLsY??8zZhdPiZ$e2#|Ak`&wF;zWh`o72*y*UkkHV>VQ1Mf9R$U&eWHn{CHix5NyubpCJ~1FVaBv!!yr8}-KW zAWLe4ha$!1m%sH0((Vk9{710o?Q!Cfhhv9J zLGJE_q$z8r)n1|ILw)nmO$vEMt5msHh|Ooim;7V{ObVH~L@n^D8jOgaz>0cZoQJ`^ zBZuRx8Uw4!k!85P4@L@CzR(p5pF$Wu$mT&z;Q{*J0J`^0ri8K8A^7_>j?%T$fP5$Q zTxpjffVo?ioL*3hZ|8P_9? z;Z96+y;5c-;~XIp$&*Q*RW0)?!2dW z$HaCNqWc5V&H0JVD5ykW8~{_2Y8e(*RlbmR`P{jW<K)uthqQy`1-chuJSj;EzOK4IHmMG$=w{lR#%&?W zDVorlb8o%<@3miIMFFNZAXmC2Vd}`L~orkX;bLE(KJahkl^4fXEEHd_1c`?6E}L6 zvI$@5{Dl4PO3Z1a`2qf9%dm!-8uy<+j%v{t*RvyZ)*WY@m+HNZ4-w!T9K+W%|Dyr@ z`oV2u;%!5R>{EIuFYF=Fg1*EPqFt%i+_7xo#8r{B86()Wu{4GlD0cN*h!%+-0oUrApa* zoLL^qZ)aAZvgnxeN!MkVvBGWDesY*f(vKnx>g~dTP@pI9A6&tJGebfgbQCZNy`&H_ zQsFL)H9hXwdssz9g%BSJEEHxa89LV+&nW2^n#{Tg0oI#q3uYOYH3dn@xVddd${hu+ zZ?4!CT6TBLf2;KA2m@l;ALR?I!xTBhCv_czo8p9fwL)NhvRmN$wSCQK>!Qr!SzwUB zvBp@*xL0*+X`LGqR6-NO^Z9eJ)k1qfzP#CnQt#s33M;A-1vZ)tst ziO9absvuxll;b_KN#mEg;?6aH;YSPN>Rtz$@Oj%oRCB#cx}6ny+t@WB)Axe9hGM+e zel}U7U7DM=*Rbg!c2^lzGjGVnHI!n8>zW?Eq7&xOe<8YY`%W5p%ZH3oKb%Z46;nud z@#)y@+CoSfXY##5F6+2Ne{-xTdD4Z@Vf&m&E9fR)x$PFx&!PHAT!08O#?xYcpn`pwMf-ZvRbN zRyJUIv3_1+h1O-rknCXDx<4P6_u&0ve|5Ls)N0Dl)R_d^yp6F)i<{h^8^Nj10k9KC zN~X^BY+LNIQTjh(t_Np9oyiQ?Hzk@o(^An!z z>@R^as5B&YR|jE3{%!95$yL)Ny$ZK?Vc+LZEIj*+CF2~{+t&e;^ZZCCMU>r6z-8a< z*Rjypw+7BUr(Xu&k@nb0-;=f521l=)28ufX|IOEkQSx4O)_vbSSQr5H7VI|6r4z=t z-3whaHU_K^PLl}af>3*}QNM!Z^ciWFV!#D!t@v%<99Wwlj3e7#TXR!+FW|S3`pk`L z+-5_N%NvS!jZlg})8!D#lt!6st7TBi9)@W#g+jt8*{X2Pr~qxgH@jcUlA1532 z2A6(o1|Q<`FFVfX_5Lq&1wYariO*%B5}^OrX6xTWxRG)q8}B=i>SWn!20>fEdwzY7V(qzPk(a( z)E`H5eb^+p-6?MKHU_hN&_Qniy!1p9`l;BBL`s#u%bfclnB&VumcCG}-A-1FgF@wZ z98Qpp{Of+0BY4<;if89AM|_Q7?(U5-`U8CVke;70ZRXTgq{6v z$!+Nurwm!AP^&#UWl^0gHIs0BbDng%?!9NR7m;8d<{)~~`;lI8y^_xc;1?d!X|}~Y zW0VVz)JW_frLOSLR!#|^7`AX3aZM{N&C&eu&fLV)Gg_2Mo-l$>qf&9L8=We!1(G?I zIGD@h7sw`z0`iV*D;w&O7C>XgE%340-mfdCSdNNan0q0fPo*+BAK_j3p*nvsmvpwi zdMa*G%e1v*AEtLLi`^_PO6>T3TX*$Nd*u{mZr-i+lUEwH8U0N+`?t_qc$&guS-k5~ zDK+CIA=xWCxKX1k50}1amVS*TYpKEG%ws(GS*~TMv2cvxJBPI5Lwz>ZYZl&6Rw|NT zL2amBuo!%oQLFVpR(~R;<8wzS&&Q-w*jtQ^k00M? z%9xF}|Nd0)F-{=SHNU2#iY(S@;?caY^R2tvC(?-A9p$-C_u*yp=O3U^2??GX6;U~Q z6#;j9^7GB!beeGnUDk1qG^v(*6BH<17%`BhA%Kq)e>nY|U8_tMbVk{F*JoJTdmboV z)cLmB+W85+DTVGyN$N^hlXM(RVN-A1T#bpvIk0k}ES+N6wPV*z13(qx7Baup@zuA4 z=YgZ_4wKk>Cwc+VTIM1+D&HDear=sGzfJ0vCgoYxEW2g;1B8xE>woLmoVZ!!rSClA zIu$iKHF%8BjQ}6rpFgQm>^@)EI}RbaK?Z{@$Duy<)H|nF3kw+%*_J4v14~Yb&9>xt@TkK*alriTr@fCDJ3{1XEk%NQ6iZ+^tqJS$6Ibi~NS9oY7U1sP@zD3m@`}#y@*Qjmg z7R(%TLc+t}A5S1A5Z(3=RWK0K*QX=d6^Z>w&CE`aeD~jCKPUO@x!8lVIWw6_ zL)0_;d%PPjgI?=*ek)Usvl)A_5H3Y^sW)K^*v_cKX;V85vO2dbgb`D7MmHDaOlnN* z9wgq+!`s(&nDhVR?mgq0+P3{~3ksqLqNsF41*JW4oOAV_z4zz;?s@0lK(f{>V~#QA7~h}Hlv9NmZO!rb zqe}Iwxaee1%OhK0);he1z=o7lgDo0n2-;P%$9auT8Hg+h1y&@I#&}-)y zP_C>#dY=>RuMp2P5WTz8_3xGQ*Xfz=#l3z{Cm6*tYNI>8LU#7-RVebiif&nZA0^C1 z|ELN+c#`ykeA=4DC0AEtY?V@;g@_kr))1n!w$dNoxp!7A$LZDC7EHARNQk^&znXPg zewe3Kh9<0}lUaCs2tlx|c=KZ5zv|(`;}VHnJ`;t}iaKa~REfC?!o8CfMOwZ!g)+<; zYGsK9qiHaukSp`!mjI_oE6^%;VLH3_gsY~D`Eya{x4G0C1+l|9MTHNspV|=n^^X8= zD87niH0}7Bx_XdfcHK_NSZAQv8a-EI87|65t=eD~ncO3w^Qv3ChRFY3C@Z``e}b}nu>sg6+G6AafQa&FfVnG zoEUkx-ADNMkQhP!2k&9@yfF zeWJrr?zHvW_!?Vc>!AsIH57^1d@_72A;{OEqJpk=RaaL!-T2x2J(J zEr2r;DnljfQ_!3F03e*iwu80Q>WCM7PPX|_B}}Jr&W3BfH0~08Sc0BWgtY$tHH`uf(>ag3g!4!fERsPFD)a({E@v5Q zrc$j&Dx&7-XZ9JXvsqSgft5L3-zttzb=r*fWhmP3xCK$<$8(-Iu`RM2lB;7>?jocT zmaAvAyN{iA0s&#l2B;}nFV{M@@k%dqHD!A?*tXql&v*6h8m*W`*uu8sBasTjYem_< zn?lpJOl^TC1JRs?MK+@b9Tve_v5X_-s*T2i|1D$uSiJju*B95V3b^Zy231C?RLp`@ zUS1_`;`Dn^%*FC(?H;H^VbZ^0JyfXbbC%>1ur7nm^aLl{n00M42Ly(!4mZ9ULYH7I zZ+ES^l$`;2{z)9+sRfh4_;N2m?E!t+R-D9JbDUc@T^4k@>-^sjLFG5@pl~uq6O_+~ zgUGNUva>}h8JKAJc-`|kUoFU^k446mw~hAQ8GC4~-Pbt}43G7vyn9O^$yeb+>7Ymd zq!g5bN#=BFvu4|o{@~C+-K8&G_y7l&DHBz|V@CJ!RhrG+E}jhNT1 zJjWU3PKWvC*2m70WO-hnR@pF%=s@RM4`QClciE~H=JX<|&cV~Vsk=ItmLF5}oF421 z36diNN*d+6+0m>80Tdf+*#$OIx7R4>>FEnOhr2wib!t8MbeeY280BL{55Xs^sYn=mxr5Etra!*6C`|SuV2u0|S&g9Gtlh``VR?C>GdeZ* z)Qv$^)+LqBEt!zoHQI`Hm+#TV$FeimE|v6CAZcVzJsPQG&$DP}xh}yXUOnLoEqpXKq=rD#iDL9%ST$Rjbk&*Gb)O0*uA)xUm$O~;s}%1U!@ zXT&Kx>O&r}Gb95g&DuIMz0R7(#yg)pmJ6~G3MulTldpI>xXKcraJkeRM!DAz{i;z9 zybRi~dwIN1wxx{;?T{mlv6*fhRHdnxR6_`Yf~3zk>LYd0D2gBsZ5At1_j6YZ>fCam zI%BMd+im^r;~$cx2KGoYxO(h<5oZ7IgrYc#EKfc}0m2mlfQVV87M~NC9R<`y;iqgD zp=R)p&i6wO(!MtjgElZuN+68M#3sQ|al|Eb)8T4TvIHNCpE;(00Y_iMta+L?t=vj$c+-0Cg)y`}J)i?2YwqZI_Mr%oLa1TYCv8NsR4a^(gWBY;gp*K= z9Dz|$ETHQg>b{N?ffqv{N+4istx*lNA5TYF`IeHU#X~wkj(Q<`2iJeKfMKArFAZpH z*k@OXCZE;PvGP`dj@r4*yRRQ!SC8p)QZAid7xj-Jk{!v>;AvyxGfAc*RhV2?__*Cf)&%6nLedyFzr_b}1r*q=Vsa`-xoTV4+{L9al(Tr@ob1`V}uju-u$bZuG8f1lw}AOX4TY}NYG1?e*< z&*xoAh;!g4qrRU@q(yd4I1W%6RXZIXR~<6fRwW1!K0H^2VL+|aA-z;$?zt2++XRtR zM0-xJ2E{J5wejF}adpA?o&VYpp6+%8B9eLn959fzJ3QB$77*j{HKPZ#S@^oXHMsi; zg1)!?wjbx zO-{ee;2rBDZkN$Sr7;ps(vlC}e?C=|?^&p+QLQj>EyJQp>G3Z<_+Ko_Ys#R1*Iww+ zf$Skl4Wfnc(F!v@7Nd%x7PG%uDM{rB$h?lRfJw{*mks4!62%Qq< zV`QT*a}AFBJMa~|2LQW3uWQ@!v+qPa1rkECu;IJlE3QNQFwdiZ%~LSYH1ujo>c^Lw zSIf@7-OQ`h^%5#tue~%_EH+k{safS)Xse;QB0KVEfB{b{PCva?`xNeDX-(&Xez1x}=y$P0G zo%M(R-W=WsLzw$%Jv{5@m@+&7OGffe=gTwVwF=^r80xi2ITxGv-b3#+la+71cjrva z)9p*xsV1FgngLix6@p1rf z=*hQmCUrs%i%@ANKEFJkZw|{zrn@G4`6?!Z>19!(Je%j0E!{85&*x6tWTnrt^d!EU z8GFvP$Uh2TVe$-MjL1hGaGXIAk$1&M@-McBqqm7O+pRDY_cLF3&scrXd=nA0-$W&$ zO}}T@-f>ni(1*GFaM!vwT78@ZX%H1dStSs3XJz!iDqZ5D`vG@VnN{d5UnKA*Rf_PS&?KTh0C3RXM zhoYxA4O-uxuU>drN-tlW=gG``v8qCn%0N7+tGjV)KJxiOjAZH;Wa?^U7<*#;+Bs{H z$}WbtnY=lM{UaygBJqF2(t=+Rw;-rv>VG`ZzAlBIreKU*YFd8b#zPqqmDp@DP z$hE(?qcP~p(72LuclT>gnoRrs?F*T&DI+&~hCKqsm7J+~5`K-UQVpG{mU~|1IzfGi$-xQqYmNQ+KgChv zIt0^5od^~IWgYxTut`OKhyb0%1lx9$z0b2&VJ;+>h>8cyK(*ayJMr!0^WF;IIDwH= z3N7c&leUIaPV+0+mrDy(?6Iau8|8Y?$zo}2MDJT_`eht{Pws>b?T~6tRr+NAih>6n zehmkgH^%Dwm=cf?rGVK~{px5#Kz1!dJd+{{v(KHsD)xqL{&1fGtCUJ$b#l;$5{$8D z;VR5|eoXD1mh}Fo2$mq@V^gbQe|K5lf+h?(GeP~JqjHxHBe1`lqsGJ0ViF5~PfN~$ zuiYVpEr!19w6%{nViN0;-&&LZrgMSgGm zVW~?cUd=d7y5+qmV_@UZv~|{XRbZNs+R5&PLd(yj+&!EW0-VS5?V1t|Dz2+j^`V)H zzKGBZ5v3~J#ZYH}Yhc2q2Pv9w$1qOqyH>IN0V4x#`9&@FJd{R<8|4g0vJ9rr#Eqz*JRI5v9Xz>kH(Z zwjx8tvPWEHOK-8Z;Y)7!F&h;3tt8=ZoLH&}oy*@B&zTk6=P8Jm(vN?`yXkoHV-Z*^ z)umQ=p;=e+WlXMXnD=U?VC&mB`!RKp)U2kC0m3~a;2}q;+WyYr^gm<#>^8$BYEz%t zCeU^oFMtu9=~w1{)J8Q}CeX;Xzhk2Kc`?-Vg2X%h>?G}xreHvaR zEbYjZRndZ-kt+0LA@+nzc*~-?pqx8e`0Jb1t9#US9Z;U1JxE!1b&+;`a=6ORg$#b1$Ws z%u}g}U3YuC)^mJX!P1<1R{w~LE|Qf0QD@LdjAT#ulDzV(Vga~kIl2YV6Y)dd*1Gxl zP6Lb4N^f3Ujnbg%#%5jUi1dYy;8Ulg|H4f;a03tgS^pQ}AWb-^MOKOlyqYNSwOp$d z88#mYC&B zJXZK_TvbVm%)Zek=Cu3wT|=|cW*O$D<#o7?s{po`RllRkD`j$uK8R`c{$%TQ+SvA! z`?A5>Y50&217)PCQ-br(chF4wjf*|}9VS&(-%9kgo1>H>&FrUn7E=scsn(dj&zK2} zMBP~J#p$iri`dNzXyrd#i) z?MCfRh(>;E;a_Gt>MAy2sS$dzQ$8q~)B4K0fl84wv@da6Zb3Ea1^uxpwt0Mva_1|X zx$k+6-1G3=&&$!K&sUAT;XQD~m%Bxi)o+xN?Gbg$Hq_I=ZKyc}%Kq*Y{^un8#f)8p zp#qOj*X+61jI4b+?)W_Xlx)~`jup$kZWxocj^?!U*8x#wEIczda(%BJ*t6VPPhZWu zyOXW^TxmAgyz&!;6_R@%*fBGwV9RdJ7#w;i@cM<3{2pB+uBzyVU4xjFudia47KVk} zQui8}TX@zBLzIRKrf|#xEo!U;I9rd!asHkPc>C@4Em)C3F}|69-h3yi%E^*;zVQ1@ zDn&QFr&qA&_xkt_QR(&(JNu($<2~Nb`9V7km9NS>mYNF!qoUbw4G?e5(?o-&7QoXER`d&WlGaSaIgImI*;!9=u{))>5VAR}^BPjO@ z6~2k2?8^Ry-D($9DT&mM_qUSm9t+c*fY_hUUEY&05Fy zF6x$ZE5#vQ)7;@G{)@ld6l*#2BPZIz`<3b0Zn{5wf0SydCT?KNMS1vN%d#vJPM)g>tob%?l!P9CY)8Fck;_wPdXQSerb551q+F|SWj?OA5>h7@@-_8t`K*#UHv>piYth?KwOosp5F0*_7e z6f8}Z`h`b{7kGOjqX(5T_d>^MS0|50uJ(Ezb-+8np6WUL{AmX+(9ZqXw9iPjUKpA^ zePyrPkyfgHLI|;B?K`p_&~8=@jx6rY_x1hd&Ql@C#lRWtphdBj^D>6HkbfB`R=n5} z#*?FKqZK(H->sRscZ}c-kplIqNQIN@}s==HlyBRvL|8e#?(FxENJGLujm z#nxW+7Rmuh!r*Kl(uHQ2SW(aZ%W(G4m9{5@tlp<{SY2WLQkgUzsA$XGqqaKts!jAl zchitAt3z0OpjQ?Ck5muxI~i{95iZJV0#@is6DQ;)Wh@EM}iQJC#wPFBHBBoai#s zFCGQin)PX7y&Z$#T3@nm>P*B~0jIWdSz^MF>wWaY$aUol+kHVETq;`z<$Y}PWfyj` zqGib7GjTM6yP5TuCXe-2P-M34{xWelIBxOCeH{JNCRkk)dD2$jG}FsU7)G(sN)@s0 zx@V3ZV#@6tv5q2{)z_!1DR|z<0gm#kGfb_B%TXSWN-ps3h#^E z>5MORBcHbxaqr4>y4ul|+MRybev8}GuX26UsKB!289(sP&&H7=(D}$M(i*e9-Pt;bkI>+JurD$OLuXOa?3?K9%-rJC^(-igu}gr zSLCX0|1@clX7R3ZkC;V-8r&CPeRa4H1Ipz#rp?v$BNg-eMlhSo>^CyYz=z1%784s~ z4<2JpPZ&cZVk*}|z~JC&D|3lgT+EAvcHT*beezpsJ_$A>UTCwTMq4be$l%a*HIx71ZV_)@!4!6d`+^Z8$oLq1D^D?sluBU&EeIP>gEooK!aqe8#r8lr}nh0mJ;tIE-p$Sn**jiH*0hiG{QKise5w@w99O;4dwU*lw)KLSUmg~~Mg>0VPZ zuI6>Y5waHwgX88wx@O9LkmLMIFKtgH8&;Awq)1V)CZE@HnnF68u8hvRMGsmeeQJ={ zMz=Ne;?mbP!w$>cZQgHvLk?5!tFeR^V_yFNeqV+3V~_kp{n!vcanR!)kBnzDVX}7N zkd>^)s=IWI8@u>uy<2);uqYA*?8kcmLnG5aI=GhH7?Al@cWQW4%(}$RL*OOaenY5- zdui%smVC7cdtPzlZqaT(rSQ~XiED(Cwvnm|jt2Y5#!Pe>t+*o`j}HUBYf#AUvCtY3 zs?zf!(DJoA&i9CEu;GqyuYs%QXY7nSvY~a!Pyc zg{<)`OX9U4o5)^{b8p{L-kZq9C&TqI#r6)E{?}6x9N%&MKZ2yg;P51U@$$~!IyHYq zOjFm$fKEd{X?8FS$(c?^KU;(lMi=y{2+^E#@p<-?HX?p&oWtEiPhxdLe0zO^w*5iv zbve3r8PP(c={@HD>}WTA9!e*@_iHGD?2uJ`?)I%t*uph%Pw%?cTEx->bMMsK#QXA^ z#~xqooQKKyFRCdB6~7FnbX;qi=Q){~3T=?Gl9JXC$~cO~?N#N#VDwjftbSuN9cu$} z@u-#3K8S(D%o~UY3op0QSnaSYO+E2y1`;K^&V#)nuXe(&(T^fLrTX`YZD%Q zLFF&+cDRr7WM%Vb37jjO-$fQp7JWyLMo%3?-)4sKNtVqaK6!ZO!D!gEEAUNt#e~j$_Gp zXOamf+ptJSwZe=dTgkGmy2PggBKyTs3zoaNudEgc-tUf%sk7CKgrz9sZ*?nsIKARo zo?4Y*K)ILrttIigV0XtrjE3ED3Wv_W0HvDtSth>B3zglq)}{iPl=N5eVuQ*YrOu%f z+v%&SjpDgJLJMm+6%j1qK9-fUxuFFJWGrIj>;3T@1M_N6ypAW2&Ru8cTR95yc zI1G1rJG4v3-CN?x)#cE4HH}^j&^%dSwbv^l`C` z!Ycubq_;}sM7KQRb=RR33tGih7_*P2Fv^M7uoD?#*bkIle_tcK&AF9O(V)RfGl2Z^ z8oe~o2v@8y){dZe*tTS55ZOd&^^|Bo>7|F8VH1x!jFvi^y#uv(zcN0J78(L~sy&i| z5yanJ_p$LsGIU-UpWh!Pd4k+GGnY%J)FB%YlsjPtf*Zp6oKg;votE?N`+iU4mvP&; z;-1}K0UF>J+Uj3r9WafU;!%+eNs}pYD&CB?uetgjQ?*tiHhe}r!2!A|7+H_c`X|!I zbrPRn9G|XwT7^60dH3ROt{%tm`Z)~)SM3L`^sEsS(}V^!xA|?_xktZKJYL7mNCu;Q z7aWOM^67hvQWc{=eN1Y`v-V8oS7=8&GttX=H%TLgI&%Tj@WGb_U zHm4Ym-%B-}RESpy7t(56O~2ZyKl3u{ZogovNWlZm@>yzPPwRr=jqOt)ZK_p4e?GNE zUA?4LYW}^S&Ef>HvqhffYZz<${N$cZIeKLrB~9=g?WZdsZ6X?E|iX-jXzu=8K-Z{gRD#8G|X;wK6W6%ma_{Vy&K!(bMtttk-q=ei=2dL zLUJgG6y33mUskPCPl{^QW$wRzofh1%CXhq>sX-uK;UDcMDQG`&QWxFvf<>BfASMI1 z9D1i-?{Krh!Y^IWeQN}dTDD}i=wbNChW*y%vH3mMcd`8bf|Al?BF|}`u3hVr01IUG z&faPF@zGebcUHqU-H~d-aP>Fwo0~q9*H}w!wVxwrr$K^#ubFOfRKgPeG}pMwM!$3= z!N46qeyDmByE~Vo0gD0`u^iVk%6jlEQ_6P32qSnkaD69d_Rnj)T?eX+UHf}S<48|_ zYV1}S-NOZ2r$|qH#r2z2mwFAoPE#Da-nr}OQ>8PRrWFq38ZB7zc=HJ$d3 z$;~cQUV1Nx3W)55Gq?06>B|;kBuPZ0@*!C;J5Z+}eD0^t&fh&_pMj&``1I)82^FJR z2J0vc(QG~(-1wc?!a5q7lsys`DHvNnsWz4BgAr~lZOZf(UE4D2$gf#cuCFm0=^vGW z4eNNHX1f{SDvY1{X{;}3?Bn2yPZYiKLS=$q59dFqZKCK9B8gFPPcJH15Luog-JaZ= zDLzMxwt9h#p=h!-;JgDx{0q~pTw>q6L-V~9QU~`2C;fXeBh4?4#`}pT+s&9zoA&y% z1?2E$Qk!U6*j_V_H;_5VVV;MfHecr2ym1v{O*gBaeTqvYWvsN(M3>OBAlRy_)B}F499tvgf6oKcegXb*Fl(PKbakodyK| ztc{3imVXeaqU_G!Ohj!iJ^lpzv%Qi!;;m`F9a!VW?a-E8Yt8`CV-pdPx|{i)2=DBI zCwMA(eD@`z``;>0M4axgfQmr=(C$7FV)5z$|4c*{Vtgb6h z-IBY}li~d6jfDHU+|Fw6s7oV`Mz=z+mtb?gKZ9ve|D$}8NB@YDf0HNIg2VFZ=C7|K z!*t7ev$YM&OZI%!(*ak`&)oiL>QS|zY=O{5TH){2931@75_Gd7nnOeJBjiWPa{~Xr2YKw! zexCm4Xo2tUb6_;+Mk{^!WOh5{GA zPM3#7ENS?X{g56=Rr$II_i9QvPc`5RM2R0jSiaO{ZC z&wx$*0}$|p-N3VI|B{dYJA*3~1U^KzTz}a*ZA=k*SH6yFZ^jeFd^vyV{2i{exA;FbV!h((B~x!5~qng9rIg znQK7t4{oZ-^wH()Cnwy?+=EPZ!$VE047r4dCtg4K{{5XAk7!Ph zcC2|N+fwz|%EezR?q3@glt7gRgrAAvoTE(F5Bc!1+fWVh##1l$?187Q6zLaaB#oXs_e_AoMF>WrTo~+(mOh>_xr*|)g2k< zHUhMVZ&mk5etoY*0{5|LM+b+g&Rs$&F=yY`S(o@G?n}e2H-K8S-=Oh0k!xjT?$e21`;-Geee&JqzYSisV?CVp z4+(%L>|~igmR5Bz%};H_!vwFyO^MplQcNRYZeNczHa14(-75LB0kJ(y(mt;fq^xHr z(HO+?rNK>Cx-!e)4@-f}6Lnbx45{eGdf?>)FW~W2)oy?dg6&<^shP~yl|VK(hySSr zpW)^aLUgTXCPfS;gVB4MlJjK8`m>}z1ZSbrpJn~QddX!QW6Aj;6?+JEnN9T#_!5jf zGt)rOMk$qyPB9T-%am)6;8o;@7V7X{8$`R zAX-8bc1YkUQ>Y9B50AR&*6d~78n3DyK-LZi;9`w9zZrQjPbh|y8hy2%%E~zl*n-Z7 zcXdUZU{t4n_Q(FmlV}_{>^hb0;XE%3cI+8M|EqB6mBplAEf)Xw)Z$IY{2SZ`&ycj? zKfROm0_1PCdeGpbJ_O`XzwXxCHv+A*X3$j33<|Gev=h4W8l^E&3FZVu>k(pU!uN|UBiFR2T7)V zAa|b(xW_4Bh`E$-NINzKTf-&{)XOvhDZ=_R5bkJ{H!v{JJ~!MbkZX&8fmhelM^WA4?lPF_@CYFZ{(kfgryxEm#_RV9=gS2!tS(NFW9+_ zTpH2^{~#%;I)1Fhv63zE$Z~GwlJBdFHyE!7>;M?c8(~~&eT@Fo!w!;LI+GXV7jp?A$n@&VlAq6Zs16IN3_f>28zoj8sI9SN;K-Au7y zZ+E*D$RYsU!iOmveZG5N)79+(p(_q_c_H^U(+X`y^)5vIcq;kp&}*()A@5IE%EmV6 zrD)s}_g<+&PcMs21QBf`OvzByNvcryGGu0|3`u%3AAB#Ub5rX-#nJ$>) za^kA3hxvhS+goxnM3$vVDu%)DeACQ*EoG9OOKMrhK;ggxIm@IvnOijNv z=jQB#7KQ)Z9tl!M1O?0T;hmxdD)q;k)ru@1XhfP7-cf_?0)Cjxl1Y`Nk~_m0RSYO_{OU0 zSuiqUFqn7mDcCD=g_Z~)W0K7UEBfNfa(^BWZA?Ks^v43-0<)`2Bl)I&*{~X{fi_UQ z1IVQyZAqwckIk^WZ$&xaO<t$`y|T1ZkINW2$mncCg|=>TrhHzz*XQydy4_Wc?rNh&J%}NZ5TaWpxPRk zgY9pHLukHySG>_#1_4&ISmn_XeC4t~#JgTIi;stde4G}C`Ug|rJ|yW1ckW5pQQ)k) z0x`|pAJ!snp^e3WCZ`3qvJ74niCF{kC&`i83M5`)D_xRH-y7-S23g*CbAZifj&)Ld zYf`H134qt!UOR5xEL%7^bP*C@74pHKin)DD;I-&!Xv_JA!m|@i1r; zL%pvQ$dV2_A?zlehN~(kWs!lPJp5FF(1W1T12hs zcaz%m2_ONQNI+v4_Z}GG!x&tK_%0{9;E#+CgGK)05VR36EU>MW`)aWS^T^;Vna84r zf0!Y%7w1rIZESLb;g(p-bX$Q}&t87dmn~IT`Q-!uo2`1Gh*|>2P7{D5(*qTuf>`5< z+~L70xa%gsx5bLP=;=i{Eqv-8oS93D4}>dJ2bEUBYH?q#^Tn%d+SKh7}F0hrl@;Jdg7xP%nM*?_KJHH|tt zY7d}f4$JKVt(lrxzPJ33j|T`&N8BD>URsh0SOx97=Z2<@-oCYUB{M3qxauxk>IINe z&39#JuWB3EKZgsPy@=W7GHbhdHf;UVtcir=j0gK#AHd_?CMEwgyr5#8yFJGXUwgDX z+T&h?0N`||0h68*f>U(2K5cTiKXPJM1m`%UHiC(IqPy;F#bA`R5kMa*-a|*jI^4S_#^yS$!Z1mp!!)`(~o%vL45M;7IKgn0|?%hJm${rY=45RJ)>zbWOq(>gc zueK9dPUht2qTnRh3g&g^3e5bCLPjq&wV+C%XwkuEL5_ zH~hS7#G=G{cmR8@D-FI6#=~f9ZIQamOPVhVNR=A6PlcKl9N%8` z09Faty#w&K=gCX8K%1=rM8dYJ=t5(}69z~LvxiZzXT_G5b;8OI zoy|OMa6xGsAv#@p^`p0KL^A zgm$O~`oRU+7K#$B1;Pfyd3bzwLdG!?jgGwzn3efKWzQf+oNTeoVd zbqYmaG_S>d`r9L@9ROd6Bwp{hgL$R(avvDmHY(RyEZ9P%;t;I+OR)oO>L*>-6gb); zH*{)L9aJj<;{9@|7viG$_JWATrz!we@gMf%tiA>S5mmiy+vR@cNQhMUii)pg!L^=5 zplPx>IM(C`HCWc{1-@9I@|PGowkq+sJz7Ax9n`7ACm#kq&@km%lq zNdV&MnF#GPpsy1(k+~goRRL1Jq?>tnfeyZKBaLxKjyd*<*xp7`mD~shP3pA(^d$$U z$m7epL@~^z~_w`NI7e7c<{wrex{fkkG0kdTY z8jY_HP`hQxKixX^A+WA!9wfKsW8cDlpBeEFXCMP4b!-|S&e&CMeScOKeZjyE6$98+ z(%8X!_1=-zdhUj-ZWU>OGbINIqKbhdRaot=A*@jtDCDlE<-i=;2C8Y6^h2Acc)7$t zkOV?p$2Kpte#v<>&Zut%s4n;&A9ZeNYb#`#&=Ob%g`S;4^K5|V?LR1!)e!IY<)xXC zBo+YT`IomE^=+%B+TfkDtIE;*?b$Bc>ZH6kafnLb&oqH0$u73PQ&3i-w}};rwW~V; zdC==DXH${m&o8usV|WxN6Za{#L` z>xiv%_Yc*Y!w67+Qcu93ZE2v8YOsBq z$w%eTIys@ORm|wqIK%c4L!k7G&;=me)m?snEIb4Y>Qt+w#V0N~Y10gChkmlM1< zZRqy_;wccA+qY;?`H#^$P96K&maKoWhrRBv~r@~+O-T8IN@UH=5 z2^8mQe9Oqfg#=*sCIEiIkF)@sgYdF+`wGyrN|G-fwE+<fXeI}YO!FFXn!h8>$Al>s%L+mg)=6=f|97Bpte8X2M7 z*Vk28*T9 z>#7GBU}k_LA(niITf35&7r16O@o+NOjVFB|Bys2H;Z;Z8sWkwxZe~i6w5&4ZB_^Aa%F|PWl$0 zi!%u#9VZveGDthr0sjmTLiNtG>pd7i07#m0`cTrF_~(EC*aRhhSV=&GRzgO$1G5tp1eB#>Y3kwi0{J)vX+)6PRR2^G#cC zuUi4-2s6XpRHayJD&v8cZ!VfE;6RpKxhQ1u;XBCTM7pE7a^AYFJAvR|81)UTN*I%3 z#Ahr`*AAc~O0o72&tPH!V3M{B$Y-5gj1LOB|6~`kQG?U89bR8E(jI_pR)?3^&bOyA zzz6Hd7YPmvV*=g-r^c?xMZix>(C-ug3Y2Aw#ly)!2ybn!-Q3HodbXYf5cXplx5S@L zKu9}CO2`Zb>mLVuQl?wr^ryqlplS#tNZz}r*H(O~s8Ig^D&1nYw=vZSICQjvl?uEi z2cRz00*;1R8khM2G@b1QqLuQ|YK;ijn~~7KlK&Xk$^~Bs+PA!uBfkwB@ILmAF`Ck_ zsBO)`V$T2R=i_W-ZT9IePPPHclN?w+(`ST`AkVFMQCL*e2rMZkDvyEMeAjQIw7wW) zD0Me6Vvgskat+)Cx|e52TIQ4c$BXjIwcKI(4_?kel;&6y3W2219zLhVxWX3Ccu_<$ zNPGcWptbJ1`_CU<^>oeK1L@gNC~h0U5iYHbv-Q*z*U> z+J&SwJ#-bwi!(7mK22K7F3g7l209+6@m>kytZnf?n?CWL3rX4h(KAAu;PppJyDKSb zs|8l)F#;6xe7AB*1)+d#sb3p5Xm|rr0nk7(rm-iWLj#`o4vg%XYh-QoI=9_7DW-&O zTQrsZ9vvPg*f}ZHa#JIDrMKoL`@c@mfBmaYmmy+8^$e)vqvip=5G6o$$|O{#Qx^lK zxdp(A|Jap4NF~yYs(c`G9&n53DlB9Xkp5gWh++MXCpr&RBPwJWh^QztbfO! z<~o2~JNp~Dq5=?Ffz`^)iF3viV8cXMhrpSLzzg;jp37MaBR!Ak#x@PQs-!^G>?6+?cQP z`o)i~7b*Z7q9nZnNtx9m-yTPzo7&n!fKtPdhCPO56RQpqT~mOS9}JeJ$0I7hpnL_i zx3YT)k9m_&Prn;A>ayI>qku6ZR3++Xt5bo*h6)rT#G{&)dlO~C%&;s+_(Rs$bVap=+}%7+m2hFdvrK!TCr9GJ8} zg?N0M{zX^ytVl8Ird7(00M@NO30;-YNF`tsqX_(Mqbbkw?a6GXcHsFDOdCFI1274x zpT5nJ|7YLkIcU!+{H%wDZ!S$SInhy5vx3|DK=5GaGCvv(O~CPK6#XEl&61^s4r*wS zI%|dzcLQkb%IM>TG2!9NAZNk9Kd8A-^rg&o6^Qi5CGe8qw&S%rmvT^iyzNNQQVkZ;S^QLWC78|xWYqH?G?N$T17>Yo z{W+kO!`gf=rdV&+s0O)pNCxB_+SesUe_nk^{aAw{tpBfTkWrM2zC*yb`e(?i8hNm` z|F6##pFP&o!=uRjW#Yc%7^TN&^@B4`^o{Pz-_N+6tF>;r8%T;Lf~Td8MR)K5B0rP5 z42(a&=J5Zug~T5oTM0+b^FQK?eK?x`k}QgJarfHSg*V)%DX#CbwM4G|!R$|%zWGi| zG=1|NYO|?yX60wm59-C)pXbTO|8<^_>7lN+4&0=D5s-BWt5j7?$w z?VHGKj)2g0nD+ZJ&=*y{Yio8zm&i_)U?;sE{K_yE&I{Qqdgy&m1EZ?90{eT3Wz%ZNRiOG=A6` zYA6To20soA9)glU_1_X-;t?mgjLuIBG-bhoJH*I^f8NS?@K6LY3=v{^1ZgrU!t!7^ zdkjV30LS7^e2S7=N-M`;E+!GkzbpY1s9@{=s?Rn(b1g?N3^4-k_j}4s)!}bC$qr5} zo`#a(-pi^M1l9X{@G?MY7s&x$;#eq$VYyqj33MItufZoO0z(0crLKx0fwcxzVI%P( z2R~*wgfiZ%C9j=pLCb+hb2$2?7Tlv+qUc0CnxHF{UBc5e*W$glG78#2c=B(qpX&L; z$o~8V6W95Xfw(E2h9d`)|9x?Cg-C-61%^FV1N=1 zC`iK~(%m32ASx&+(kY4v(o)hbN;eEOba!{W>jw8Zn|;pve0a|DjeT7>d${j^to5rE z3afo+sMPe#G3O#U%QfJ5tBuW2lvxByMH(BV8XW0Ua$B z)OX6R^r};!As_>86KYaUOyg;!;HhRl0@a$5vK%DzrQA07J2~UTcI=20W0ipYR*q%V z-7O8|X&?Xd%HL%ANMAP*IugL3@cdUcl3ULEz&|XE0oWyu0c9jPzpbt9<;I85Q2N51 z?c7b6>B9!efv#?DJ|vR2pV+QUGXjbkFgiMV?Y;1`0+9<`e|wfM@0=%qwex`c%MlfV z-HlIN)by8@^UyhD<2yxgt}?=5w(4`sV$?e=2ddfH+w5OSL?k5xkp_kjct=HtwV`v` z!NI8n0DV@$NMuzTz-}}^AwYp2%4->Sl2V{cJv`YNi?%~0!BlJ2Q0TI{%-lUF1~ z7;Y(CgUl*mvt1mG9lrjtnF|5j01->&mK!x4ERVRkmQnX)Sm94t+Dd# z&&EjOSa~y-+W3t$pJ+f+xYJLw3ZC>3GGz#%56VkN+oHTs2r?-ABZXV1cN-Eny`kG| zkH0Now>)_hrfNHL7_`+xFbs&m1A6y_E`L^XiNeddRFeWe;T6W+?d)!)b>h#0^t+2} zFTc+hHM=t+mERJ6E?g8xp*V{NU@$>Bl*x};K0^;&{*;1FwB_nXkUiNkAbHV?=bkJ_ zE$CyDGS00rd@X_;#3AVINw>;E9jq}ZU^9T}HAAQ@NEMybU%3atSD=2`@r7Ytq#e=p z9C?Cz%{PJp;HjIM_D2*B2w^%cQ+wOV=IyI}gU^no-Vt>;_$_9-HQUhJuBcD!`W{?S zcW36{fToj}f8ON*O==(=-ja>-0L!X@O;Xto37O)hu)lC}P?AJM&0=ACpg@1q#bH(i;=s%1*5{;<6y z8eh&00uB(vxf@A}*Dbbn=V+3|H`*us<|XsW`-6cU&VKC?h!^>Eoy#N8zq=Ou5k_KY z-7!{J?6!sigu?h&dZxaqn^sK-sX|nVk+vfQ+Ny$fi3)1-Ou1mlFr~P}uzwRChfV0h zOlELLNDgU7GMO|=k;Cxm;ODf|RLu=9oR1o2cxXuc=~J0la!OiM zD2F$lz%p-8z*Ro$nGe;~2KxG>u8C)^6JBig&uY6Mp!jwq?hgW!TjdeBMnbn?^{cAV zXjI^pS07AW^Q?_~^XABcY}vuXM@3+@YgYJBYqjT^`~*Ip4$&E;Meq+`ZmS?O6w1p# zTo?ZRRu$&p-ZM669vGeLA^{un(S72v(_V-Ac+k;#C@s;%yTq)!Yn)zI#H8LswZf^7@S(l4{c%hZbtF{~B6& zEE*|-2(${tY!lo}F0bD=F)~WZgW&NiHbUZ*iIOLut|mXT!n`P;jL4>if1_Ni@T~gsL&;yz>QuxFz5PMLwva_(IlH$h9GpBgOP}4`R*s1mJ zM@=c(PL|7rIY_<}7W_?9M; z&;|IU)$&&5SISRVSASfz>_A9+g=T??j0yderCcoV-wWhMj`&jr3;T-izU#f0T~=iH zyL00LWIq|>QgyRcglr!~8+7NC+UnfG-gCZFM7d^|XUi!MK&(-z2>LX@Z zfy50-)Yj>9f&FZ!EcD=k3wdWJ$b!<)okoCmD72CjYC(}v>oL`SF)3%9u7Q%C}u)%CEa%FK}B_?Y4UtfvUF|$hhbNdLT062#LyQE^i@%Y z6T?zCI;}GV(z})2!;;H?afFaLxCC& z`k@c9*y*;EFR^rm&~?*ZDli>*a0`?s!j7zFfLDTKvv2Nj+R?n^tp4L`03wPlF0}!$ zfn3#-11dN>XauA6vSfAVEtLt2^kou!C3q`t#ccPF0L^C_uvIh49LSVCNbFeS)|FKF ztnt})lceC(lfJHepgg;tbsvAG8e?*_)e|_?$q-7f^X%<+V z3uHU7QBE6hBRyrx>OG_yT9s}DwV<=Z8Bn4n(>5Pi2VhjQ-219PT{4mP=xefdIz79_ zna!WBBvZl*u|yg$(WqV_#>tkj)lTw)!_J_uWLV4k$|&6J76F#OHu%1^WFvFZvZr$< zeg-Bw;4jT22%In8FMvKv-Me+7$SAyX-5^J!{em{e27h>E5@#$E&@&Y#sSVI8vuZ}S z?W$m1x^CmQPq1dAcg%M{pBL3Z5PczlNoRKLJ*hq_3;n)5!F4-#$J?MIwRHXKz*Xt+ zc>NeItwNKX>w8FR8P51u)u{;f=C1FJ0Z1FZku5sry3Zcec+sm6)wofI(+Lm527tS7aXbC1;k?pD!JqFnpHX zTKFaMYqZR2BSg{3i}w$x*NhW}{-|(zJ*I_zy#icys^iXGc7Yf?x;Ta3Q=>B8B^xjL zQZMZuuYG26V}B;Qkv>AIwl_XBjmh^N&+LM!=Tz<0N@mxqJfMjwhhbRs>*80ohFoQT zwd!@Q$Ds*Z^i7oS7hQ<<^7Q0v8KQj@^ej##mp=XNc!5rk%ErtoF8q#jgdLd%?Ts8O zr|9*I6iUmtX{>paYQ%`qt>c66@hOGUt6fP>#yG58P>HT3bQHdt^kK4qvB!Ufl@ze?^(@S4|@ zq9W18!_c*a$x8|uT3>>(_4ynA&03o0x6kZyVRey4tOJ@K**pSWmzLejDVP`XzFaFP zndOa+#*>~D{q2CDbaoAAztwYJUJnW3)TfztTT*-{{QJXH1#p+$vRyG`G;6)DiGFSk z*Y$U#?I&k%B;LvL44mrDm0XUV4E&K8V)LedMWO844+ESoQ<~aqjo3it-`Rtj^jJ|e zx`3dN+ep4nVMhOfyZGpfu$7>myYxbZ-N&0In)7t;i$0pI7F^|({f6BuyS~lP{PA0T zdi96<6P~w>iirtTRn{r81+rsUCSMh^`=bZfRjkGM)1Gbrr zM(x{pO&mEjxjr zK^$sS%d}vzi$U0Mm>R4!IH#^VMxJU^+FHsdZ{BTqo9n2J zT4{8mtaH4_wZjYRV&gLU+`(x-jMeiga@PCCORegARKmNHl4QQva_VBEogW&zcMdnO zJR$uGM#*fhy3tq|SEnDOa(`-B<85#L z>&0cO4QY2-2GxvWhV|oR$@2N#aZe+(Wnz=BCMt4eSfA*u+l2s#Iy)cz-oqu{8SKIm zVy+?ur!Z5AvWFZ;!mX?6&6Lu0)Z$KnoIwJ_SM)n|-I$jOD6RLDf-EQVPqA2>&w1*v zb=QwB^4dE*OJN1a@t!38PD?}mnRtEc7U7gilaaKuyM?QS@UIuMbR%8uwa^H&<{Fo@Xfv zd(R{!Cfhnbsi>e#Kc_GAb>yR6Z9X5nV;QMjIZPeYja$~Irb)a+(Ipulr!Tm&7+APj zM(&==-;q~9-McmtK{OUUVg=3!cYzz9i1@SU`E^I2(Wrt!&xg^53C3sZHT3No7cd}7 zLftE%vCx0D2|_M)*@>0nk60I&ydrXxh{C{FPXL-RF=^3!P)D)B1S=bGC{znr6s})R zP}!JuEW$I8FLI6)3sQv%pCX6W5jX)E$sYDW0%QQMDBId#cJ0zn>(RppLFVKOT14K? zaSws8$R2t-x@zmkIHW+dle6EClX=~lCA?$jOj9 zgD0t$#8HWaGm-F-ub)fo#d(HW$bcaaK89rmL~veKq^|O8bE3 zv~&i^@RFhHdaICKYUf4HZ`b|lWxj{r7r>RVy>fq|GvY7nymndk>lzFsjFNHxb+waL z>2;v#4dP+nsc|-nJ#@~^^<|)-Jh5m$cA@@a88^f40ND(wqcIfEtHj6Zj*@)jdJQda6*rW%|JiggaM4m4Au)@)p9kw z$taK}Ek76eN}XTvh1>2ppJ7+l6Ap45{}_r1!*eJ33qR8-E~W2S{)AZy3WqdKA3}yt)!}q0F6=Y z4|L60nUc*6ESetF$9hF9*ko18XUWFs`oe>D$F0*E&@n_$vZm6v!0W=!eQZj=dFvF+ zN>p6*Z+rB2H1g$3rDxNxzD}0~4f`^xIc)u^8O~_teUW&n=exsYXwdzK;^*lGxGTPC z%ak!|*VYKA<~x1kCdM^I*C;cm%c&GzwXv zbg(yhSi1%AF$#xcZhXsQxC;7`iinL6sWdv&{L|Vj`jH zA$rnMlIUNbMS>*9#cO)^j?>xX-Gvt03k!|1NKv=|*t+>T$8BjdbMwQ*!D17w z#`XZNwZCPJDE9A}@`o8+Y*-w+RWh-k1H|G@XQw~Eo=D2A4O3zux_5i2S=tPqWsbFR zT+77$^AGjt+}_x1Ib4FP|6t4~gz03@qTM=`Mafi8w+^yvGi!n%BCZy!8sZKalWv>& zB+zU!ojt<4@eeEIyQhrwWWm=hNlb>uEoDjLaZln4I$M%n{YLXAx-}=NG;vJS6tA5K z3|;eZvHU7kHh&Pe)0U&sYFN%;IHl;zLitkq9bUiY&#J|V`*{pA?{ARzYfcMlGcJFc zQ(1{JZfNF7wma^nG#2`RId_uH`kXfUM4XN(*{*R2LEO-Ouu#^?ssL{1!Az24?rWJG zJa#$M>%%{8*5d~Se^yN{ptCfbm%f`CiLmAJ5ui8pOpl?0GHFo(#@$h$qoM=YP#2h+ zvVv*KxQ#m=i^W8TkD=N<97canMS;=&*T>0j<4k{FHiMbW;%QpK5%#^7C7Uo3CJL05G;=)y%!w~K zt*5(QavvMz#e8wvXlX9y#-Ey+s-u&MD~H>2O4tVUupYj+-X-h$ZsvU>B1|lf8&syp z#cCTeHdj!(My<&c593^?ZPjOroJy?^#LLFsARKSu8j0P>-`ME)*XmZW?0(ReWB4{f zC2-{vJHf&au|7f9!0_uotAySK4s9_~-_NVwj&}h4h|oahwCI2p;*ZI#phC5!+_Q5g z^Qu2Y+(*Nh6KPDH5!0K=Lfr6I`{JJ8il-isT@{pA#MC`GC-$S z$5yG)(IY?fYL2!Ss0ZGvaaqzA_pc;9EMYUXj*`_}&8K0KjeevHz+W^&MaO(hFtZX@ zi&%)U#&D)#&&_zOk;M<|-`K^%e2y3q!OkDY85GA1yoK2JPL#+KRgd>hlp7L<(A&>M zque9v?i+jGhE`}-sx&wbS{g`tn!VrPHgSTCfyF-w=~9&>gQ71y$52m0=Wj@N8nZE# z8O>JTv%Spi8?9oW~E|iwQYP9-nO>r;bD?%6prAZ<7eZyw!a{5XY5@PtT z1N{=_5V~tKt)+zeW9LNPsz(4zM84g72BnnNOLX~KoUt-#Iio0n#Iq5hc17vAzQa@4 zC)i{;@i>{-vn)SrS`Qz&n21l1u;JKBSQP14&ez+ECqFBGsgil%q%{Goq1e;RBsV4I z*z)80x<7q0?FG+dyG`l04dZ+6rW#tq`A+vcdtM9>t`+WV2*Rl&kmcrJBE0$nN*2#R z1cOg=3~$KfadCfm)Z+PmuJGOcBFVW_Mn{9drHc#O7g0T*z8On-fbO-@Hgq5fca>l^ z#6Ed0JuFN)jXv?7YKvRAf*M=Jy1G(xrryVJ0uNfJ5dwxHzw0NN$#ZSc*AeHF9e_O+gDySg*8vn8ki zx~1600^23MTVqka)irKV`-F^lKr`llQ#$$$%0++}D@sqUD}3Fq@r@>GdZzKKyKV?j zU0Lx}`U8mdbp89C*|UP(H<0xheYrw(Urbv zK=7TPPDFrrBn(vA64uFuuWP+PIreYn*6EAHCzj-ZQ@`lO=@uG@hXWZ@HD#(gjJGoT z`r_nyA8aKj57D>ECwq45yL16TVy(^Y*zlOG2O{Q;{OZBIht0bTeJMDcS`AZY`4PeZcXYZ((UyuwJ zz~nSVI?JV>ewZaX5WlmIEew-Uj$`f>_EK=#Gt%!^@OEsq2xa2BH)!LE2 zy{0B`lbQAP-Uk-#>>V>MaLhQ)j@P(VG37pz;G5!T(3=Z65CLrKmxu2I5UF05P{YNm ziUb>={C*EbFQrtW1IG8{ltd6#u7BJ?51VATjwe?uAcYk%-q!J%gCqa&srC$gxwHg= zg=tZeahN`>@oUNmyY|Azf2r>cU7mSYZ!p;F9wD&) zQH~uT!G`T|o*}c_fRwli966(#)9*$<2=%P5)K@V)iw3$`MVzYd`0He6+>xtNXAzF2uC9McOImSj3KY3h#-1{)i3^985m4d78O1O*@$(>WfQUPaGnXE6Bu)uaoxWrycS?cH>dw6G# zWOg23{F>#a1I5Bf-8s~EXb^G?y7LrjZ;f2C8T|0@(W~U{pABLHzz;@?o-Tm0@)0E} z8cR2CRkDeQ3IJH0#};nOfH#sB;2QBkYSxN=tEHvQqm4kJNRQoU^bB}GJPdP0se$f4 z$rEPF57R;=lwjp3ezh-iBX#VeF8eDBpw_BSD}CJ&Sk&scUQ<1(6@b=C47EB+@*Cxo z)l5nX^ADpAVdbvqAF|03_3Z|o{Gv&`P~m-$0kd%TS_l^V%1UG7#+MhtdxntLlV#r{ zY!5T>-0i_NxF28mKsTu`lL#$)HqkkjLICQ)m?3ee6L5ST3LyfWbg~keFW5?b%}PJh zoTI9phy@*NJJSi)b#FFKjlVv#zEZsplYE$2lr)J;wIAY@JVPAJBIwv>HW7_G7^{#E z&9=<--n<06w`|76QcYU4)ye4GyFUzjNPoEHsbC6h6-i^2aZCub2H*XvN?#QP$%W9sgXytGab|x2&pOw4%6z7WU#`Ka-~MxP@&P73@|Qy? zO7TtE)(_Fd(~e*O;!_!b#utninuxr2nfHsaV4{4=^E($${Oy7TXTV)NWD(_BM=&T9 ztUpf86KVQI7^U1^x^`sRyeEadvyxMUF!WKOSqpD_ioSJQTaXKC#v$F-2G|J_E79ed9%eY5)XE7-S6)Jj zgRt?ej(?WC2-M26TOv1ocr zKKaIWH(@K>Hte$MG@ORS^HrG^y-#G* zvEO@&q0hS&C01a!vu_SlQNz{8XSolQ%2=<6u`4>S`Nw13yN{&1>4`4T5GIB?eXaSt zgtCpd2#5P71~B1}N~Mhi`Qk`w8<~;#r!X6LVuXLxpogKkATd!+VS+x<7c;+G(|B0q zy9PQJ+^qXSXOQsfMKELVih|!AD@a=+@Y72^3Y(eDc)Z z|8VE{i?!F(AC5Z+66JsXOqw>6HE^mSY8Bo?h+e<#{V1?l(8MZ}BygU20bwUJH zVdgNs!ViOBq{pQhvj=ZLWW_uw-BXdlA@5!dZH(b}n&@Q33^6EIX>Fw?1J#3{=O6qg zx(=}?1foY&vrImx0}pb`Pqr6ZIW#Tm`+K70g#G0_I}F2u;ubDED7jN8j)5X6E}LfF z5zvg<&t_(3)k{`#ywZ*SOyuSc29|Tho<`xQM)#urmE5P6v z996M@riHLt;cM{#23DdR9{k-QF#mctUSdv>SqDcgoo#X(?V?UZC5N>hc@dtT6Hwr$ zs7jb9tq^Y;kkpf}9ih?w1a`9ubz+lyeDs)>tqIGLWMa*)Wf;7LIPc$U}uQ-I|}tMkt%f@^~$XcJDNJ$#==`5|*X#VQOMfin9_^WM=;h7mI=GJ$^Stx6b0o~qpBfqt6<4z7Y}L}u}qGtJz16t_^EY@!}< z#FBr$BDIol)RJXBf+l7wM2h|lN!WrvBlPE*2tn(^`E_NJVmz9IodUzKa8Sb6>~4Lm z>%I6=u|gy0d4OAIj)``)g4TE3nD(WRN~QbC}t2%{>p~ z>=N9#w`4N0@+pzJO+W2D33CQK*U2P2G12SByKchGbuTHYjxngaQu3@Zqow_o53^YhpDKk%cxd9n%CZn?x=&xRzLs8ZR$_2 ztRj<;tbj-UR2!?9^~Dcxp(~6x$ExQ?=6m1_hL>E>pcTLF#mA%2-Ysu@Xe{1+X*gEa zK=u1Gr~Jyb=E`NDJ(56NVrwpH%!VhacvaN5e5v?I;z!V%clI(p3C!TlrWwSSRAf}4 zFG3t3mjqX}q~T)ol#7+J*-2ps52G}uvirT4xNrN?!2??YI%XS?YNM9Sg&6f>5Gf<1 zqeJ07P1O`X1J=Hqv|%Q%ICSdo0JTLE!UJ~j{+Cn07C-NnI$eK$paizx5 z#3>&)zA!aOFrMzpzB*R}P~@`2+mqza-9&U)2@+)~Z|gpi{H>89hI9d$&-Dew{4yo0t9Z@=_(k^@nARJMpwPE3 z%ZKsNpBnj~FQeHMwT8Hcsiwwjd_jO&1)>{a-_Z=VWds_c%~tkJ&Zyjkx1|ADQqgW% z-B(0iZc6pPh)QVzEgxbVlsZn#9?y0a0FK} z@18S?`1sMNpfE=z>iAa5G`ljjHHN1bc8ZtoIor$Wwz6Q7ytjUCk?$M#g}2b*rwU&9 zSAPTiPVXpbn_sptTiOqo18>9wGWmqXLL`%)FdE=l+wlpwOBC%LFopEONV5edHZtZW zp=IEf=?asr3MLA+*Y$l(W~R?>K9F3=>9xU-i~t4K)2p?hCA|{La-VmwiN$f-zJUs} ze>!TB{=t4tv^$NK;~r#J%{lmO@5P<9JIL(#or*XH;tqlfb-=G=ACz)(_#Dz3*Mxx96ETb^hsIw(sMBBqVcuwA?<)F zCwD{7c}>YEn>vckFZaD2I(~TKFg{2j@)RgAiw)l6W7FHyHiD1N(7(zitDxrVwR-TcB7RE+m?@Z^H9 zFG7B?y^e3fH|9|*Inu!G#NJUy{{ z&wG~*-M&T+sFEC51a#>tm=)8@pNe3krXd*tU^4$}h5_HL5^a-P+z@l`C)QDY_ol|i z>L0}wmA2-OeM>X?9ZZ1!71$Y|++nGhZ9o&<1`xC1=ZJ}P)Zl~(rZi*BvkMN&-(6uL z?Aj6{=Pr)`m1rE-_Atj`V`U}~F)(c!!##U-quvD}13td!Cjbp1s(jt~#JNfYT!T;{ zf>4Ge(iYAXv~l`a=*i>ksBm5xiCaT{S=OIVt*bQXB9Un7bfbPfoK45K5rO1rO@41I zC~oFUPB4qWr@fZDRb^{r>%}{Ee#yT*?gfMbw87OK1Aq)Gw-8QQlP%MHIebH9M~?SeyzT4fmpn& z6!x>l-p?s(Pl0Fl-#x^^Be#iuOB_K#0UD(ft*u)wZfDdPn7Z>ox(oGHVp2 z#p58l$n^+(mU%_y0UBX89IRdW z3h%@uji{vLIgF)+^KsHg7YP-18!|FHwP zHnDQ!HOfPCsn=+S5A1t}@UI@fd=c!(6u0{fx6E|M^h194?K=U9&^q~z{qYkf+l|b4gA`+G0gYSsU9^ncOcJ8?3o&gIsvTS%Hf2n8qz{ebZuYzqC8qb+m}`GXd}kgB}z zH=_`ywHRE^vz(CC?J3BIOa-o_*Kh?WpkMUiWlyG79r|%yP2lD0e=Jm|=S-jc)2uXS zXcPoolhgh@TUEU}mj`*(>|$2?P`_hWOwJxZ%`Cp{KCO9a{2fg-D5%ijQBrxJKAh?Pt3K6CjT{(s4~JWjXwjTP!Ada+aQ0@s!* zSz+G&zmSc+pWacyH%6ZMg#JOu-8{dXCn2z=5rctnSgA}S6^Ir9bbgF+Z{FKJ99rxAsd0vANyT>j7Z1Hbi7;YyQBbZDI&h&J_oQFThdJVhjlOKi+o66^~(snyv<@Ck+-l=Tsa2Y{@WvL z_)~Ozac@HAQvKtV-?XAl*952FR(qivIs|V1=Q|S zM#qD(8Z!V`72er^g3jv}mPlaribf$;+1wLCGIv;92d~l(=)4PHNuHQ!iPv6!0owlZ z%hp|2kpz-Jw5=$>d*hd4!4bDB#zxd(p^AuEhYgHwG4E?fvz9viq4fBKL$_t<2j&4H z16Dzu;M+E-xJi5X8u`N=esnUGx+gvc^V|JE(tjXlsO_apF;-Fz{Prose0|gBXRNF9 z8TU%^Yr6^pZCJWO7t9R)^Qj$h8a=dk_BND_?X!ygc?I6phv3Au2igLxmSfoD2;Vk} z>F2r`3#{tDctpEHUSg0$m(JS@)$BAWo%vG$J{b`wWF&%^K2mg9(erjkl>Z1Pxj><4 zd}F3g!?i*tEtS~{kT*%BbxY=OSwp(o`**S$SY5au*$moPMid<&)Uy%nf7%N11>8FI zMGO<$kf1h{pakG?Gc?#x^xkcUVC36C#nkfRjNwDVhalu;*V{8>!XEOSa;AVrBJ2jF zPMBE5jC`vY2ODl+w)sdU;+f>pp+!kS0}}DC~MQdz~M=KQxD~D&8}MK9g1$;0`Nz-S|uTxrzxzXE}u8OoK|kA^$5DVk7`90NqfLDAI?3UQy@Wp_dd9P1umUc0VeJNUmMw{g6N=ELvqa?+9*RVZEtk@zKC`k{P*vwa_rp1Pz%*PySIb$q!4 z4KpwnE+6aecAp8Fj@WHo?tEjZ3*6T*{`rn-blWmH5MH@h=?QfIh@k8!%SV;NU=L02(H*#>7i`9>hcwK z=#rYVVdoELTs)rwf7YS={9+_B6m%*uMf(<*P3>fw>wvPVG%nntL9%9>MQ z!MT_}9-)0l+#R+P(5=jaT)1av+Zu_OugL(a##_hCDryWi7<+sUh%^*UyZLorfo(Zo zv*ZoIv=$QE0|A?KpK8!>^$8kwa3JRej=whzm_su->qP;zLD{8abYmcMsuDz*k9Lp< zxT%_ZL5@gD%o=E}bz600Rv?zAs`@uOO)~K?qE(K0ZXfk#4e&FOk=pQ@^SPE2G>VA= zjeh?A)PQ1)L~frit4*0aHH}aA>itZMb+rH)f{Ye5>}?(1 z{})>aINlEs`OBT8--KjU@!MIIb3yn4DUhQ`05RZvAgB~j^x1&RnHCX(H3`1Z0s=rP zM&feQB}3JYSg7e!F&kD*j_ZqKjj__S?3B}#s7Hl^KH}AJ03uWZ+*V_MTYa8Rts7G< z0^-#xF9lA2k~S&H(M{h*2ulQamzdATQB1S*sBI%k82zN|UlxQbr+@s{NU0@QbzGPm?4=0G4q2mOGw-dd&B_s(~EaARkxrT3KuA5 zhm$)1$v=ej)wAC#<7ecHBovTx*h^{`Sn&D2+q3_rwb;MGEe`GhE2I63|E!>qLsUfT zAoIZE0#HwZZX%3o4ar_gPqoc!Dt?XsRlDwEq)fHt1wN~USOFv2mpfou zX0E@CCblzQ;NzP7vL)z^E0HvJqZx36Ao2LuDN1lFknH~9Ersok=RG3yKGbU4h$<0i zn-DdC<`fX(8{&trmew+>Il$4<7%#`1W@u}hGwM(PJbl`GB;!MZf9dJkgzCg+5u+XX5(4}}&7xXPJ~%jdgsK5wF36e|w*#zU zv|FB$FG%u0d_>S5ky{M+pq=9`N&YZx7xWsF-vzz?>!_70Ie+jV3sc*_-GKkP6{+AO zOmC){b^r7GXHs*3d9@ysA=-j@;FqT_=yuwl zI$6^eftxD@ti2k8<)qS2q=2!gmlSnRA-$W8?tzY600L;`XK8Z=-;vHZ#s z^r7XDGdwVfjD|1+S-coLUro@t%|j8s+juUeY1ain1;$r3SBs}@>|z?g&-hs3RV3Ye|*$I-67n~4o@$hGwplHiUf)N^P&SN9)ejXA`mWnVv!bWH4UT9_x|R3mS?@9Bj9$|Y=T9@~ zyQkX{=dW;5dj)cn%b}5CQiLr!_L>N*VjYg8q?#c@YNTtI#iy;-XWN36$w2A2nPRtK zKeF`s4IuUcpCt2oQeJxQ-Z7_7BLt^U+W&gRi2AaqKAr`wXF0%cMscqCCDy=|LLk8d z7=$BLuQHsAgR4OUsN__Ll#iAA>K2Rc0jcQCCy_!0oz6<+Gl+GCT9H+do?aZJ$Beeh zTMS7}u+cQ&VvZ!3gOB!uP^>bGxb^RE!VLt?21OQFijlq;;^OzEQHq{%ytOoG_VZI5GCS3RqP^%0A1Mbdq z4CS2+1CEf3UjR^G({7Q|reBWHFHhr;Bi{9UfEHjjy`yJ$!Faq~!oXYozYKso2JrSC zmC7VR^xt3}4YFCbQ4ADe zgG}htgk`Da?;soZw4(a=h6+DaotofcEPne8NqOUm+q-qk|JPIZ14~4;Tu4e8Rj)+Q z&R+##E)t;u1;R~g@qm-QAXnt*>kYh;GDh^$4)blboz*Wf*Q0_JeL=c3f ziJ6~Y1%Yl76*J6$PxwE6+4IY_F{a@Ys&7xa#S4c2vvWH}`v;P$4T8Z%H-Zk#rM}8e z`N!ziJAFq00KMGsX00TgOX>RQ&?188SW%T4^HHO4wgY&i?_uX=ePj zG>)x`7kRxt!@PjCR8Qakd#0!No}lUK1Cvu%4~YX_4CdDggeZbbuXY_&a^g@23}y{@ z`NG(?icfc+9=0^uWr1(+qTj#oyX#xSKJ4e!jj$9jnf>I)@T5fhi^Ozal5)1JR*lLH zT^ahSSQS2`&-Aq`qVTLOSUbl^AkgYSLFkVAN2zm(`p;{BSb?RP)FoyB0`F8*VG3H|y0M>Im*-`|08BFK;{u|Ot9PQ7MtVyb4+2=}UBc#u+iCkE|! z;KFy~wx9YfU)A|yRWoJtLaEbtruprLxZ{tny2(svL@Wgaz7AoDkP-tUFhfR@zQ_hw z%~!BhZAt~XFD`FHR5^8BN^|#>9KXUCs(Dt+x}d$)A;ZF68hEZURVK6=^K>6P>fGYMBqj7@RV*4lPWG~xuBYjl;xg!BXo{}0 zT0c47(;2M2-*A2BuhMYWx6-gP{(RX1Y zj)vpnujG1XF9eo*D3~wcWPL~MKDG*9XpRpzF8pGt$$;&{5f%DvZ7h>VLYRaP4LPAZ zzZ?Vi@}vNeMcxh=3Dmm^r5VqF*Z5{%mDlFY8f>(Ga=3u4%D1lYg7G?+ZsTK#wC)S1 zs}{@JT)}bjb}Wu49K9Xrv?gG`Uh^h4)kp>XD&NSj=QV!Hr3it2i?*nNW^c=h=1tD3 zo}NoBCF>i07w9Z~CPiIsE#I?Y8oa1^SLae^nbP+&E-Awgp}ca)K)+jXLd z!g{&NCP5+d2^oX(1+!(XO<6pv@bOj@kwQ<7CcEzAopP!G$p}u>;Tlyc*-I)9+p(zgBhbFD`bGrdg5gtw?Op5BZRWVbn# zqWrc-)@KomRzYD~J>vkVg{{o@Dz)PH*d}`w_M(_GAlIgR)b=$>p)_HKchdD&qp}lO2MD(U_ zbS-d$a_4S9D;8%tzZDe`J_iEArxMd<326p(4Dwlz96qu>%=!2tFI^`Vr$Y1_hbq|$J&7%fVq8GG^`w!-1e+X%b7jr&7v@k_!q!}&ajOi)Zl8V=| z6h(O^`e4)yR;F>Z7unOIv^+U(swNqfd#9#ZR}Cj5%9d>%AI|ar9yj?m^tkQik}2`d zA2V$?m3F#};?`nh;;%uy?%K@xJeSv~EnNYG-c&Mla&B$OH)PwAJ!_L86!rD2hX5{0 zVk$(K)gx#n*c%+-Ou9oaFy^JtCTa-P3T9NjLT`j}@2IT@6||CnV4+;l{Ca(#Oyk*+ zJ*m)jOd@Qbz^iC7$1b)!s<~7VCgB!|yo7HW?acfBf%glfxuB}1-L_>rv{%Wvecf$s z2?kdd-i;jX?f*(Y6hm%~61M%_CFWhD^B9|__yh3uXq$1#Lbv6Xu`G3^in%E-)`exk zD!HT&l+o2J?w%K#Kjd#Liw+M94&57=bQGwm9w}ix+-?y|1e8W_G!NEu4IZ>6_Bk3!cX~Z7SdB$P%>qyAj(nU7S^#mK) zlJ!%!UR=R;ciB^@%k&9W$T+L|7O3Z+>Uk7IH_@scvbDKXpK{2`h;j7R7MuCU$;_l= zySV4nH7lbX3USUhZ6ywe?}&-rT=;bu_q)2sArQ_2zTA5KhWHJF#!qO&n8X{}F8K?w zOZ-NRzcYz8DLQ|r`wlHksIPbqHQheIR$_bLXAXM((=X?dWy0L{T9x+sIc?QY%XRDK zg>;e{`)fuAWK$_L+SK@kzDSQqOno)1?*R$)t@-J4V{SUd`&D}#{EizAPa!>l<^yQC7 zXmP{lr|n^r5B^!31L+E9Vyoo7J)>U!`2Vr@=HXDV?f z?mN?UUFUgz&hz}7pVN*xUYG~DwGcz69Nv2WOld4-YF}5DM}BMVvhTW<5o1ZoQvEH< zmbi&acQ^A58Mg9TalYQkjE?(#6PYbL3auEF-re~+svQQ&Q$?orbn>l0_MjoR)%tn& zmrqH(C7QK6rOG~b*_ced*R{ZGqK1<8kIXLyhrQhRg0kZ45AC#+L2N2*UQN7a#|^}% zW2?M3`*YA8@%XOU3sB$vz^4{=j)M&yWnI$^>NVj6=`eytl+9qYx!YOcS|ecKGTzB0h}jd*7r{BP6)H+fzT&CC zYIEjbpwj%U$t&kJuQhS+h%Z`q0fsxMe(aZU74TZ3yh1NwKK%D)%mpgX! zkJ~4Ar4s>6Kbud^jqRqP28<%@yTkM%iI??4OrJKkA3dlE;{99Z@$vF+{|G&;^UC*6 zkn}(7P$c=uN9fv-RS1;D-4ha$Ra+F?nD32})i&LWwh}k*(4Tj0>;*J;h*e)jd4fCR z)4X(0L~cpQoaUwNN+{K}AWCP2k>Amsp7Z6wz_;#dTk4#h-dpBg5+gSFNiQxpIyZA17Yv-^~(X?<> zkM$a-iVA}ROzY;ndjtG6QROw@a z+`xFrnvD#FYD*dImS?_A6jK}#rMHD_Nz~y(bC<^=dJ}0~o@2AilfGveIt$H_i+{8^?Vd|d#4XVSyi+LtVMyJa>7E*Ii&Mp&a&FCy5SCe`--z_nMeLrsj#8?dt&8csp zJCK|OH`^OkIy!etqsm8iR|JYgRL6ULtrWeyceab{pMqXe#>ow+h-{x-_|B{vXU#&? z5mL-`O&q07%*&RCh9^J!AfS^aIGy|x^06Iwyx#sAQ7wF+z%Vjmr2pQ+k{$0LGE*dX z|G5@V^3H~A&%Nhx6CAbKyqrb(+SJsG0N>l4-Y~WBT;1~Ci%E&(6~u%a1lA(8L^VWI zr-6R!^ycYy*Hy$}P9P>39;Q7LwI@hp(&_UK&tlvjGI0Q>DXC zjBni=aGFAoyzx#Bl3E8Ka}p!B&3FQN3hS=(?G zs?zZ`t4orAHvIimNWMH3zSI1FYq{M0Lo{NPUi~HmgjmjY>I8Y{aZ;>`O$#tMHiuIH z?4;6o@^9~;R`Yj6_x7^tKl>+2^phZQ;a0*cbpVr!+5DHDk0nd%GEZVAwIbaOMD)LP zXTMC=(Hl%IsCLEBp4}jnX5?IIk<5h}#0nZ(#WwJ2pqwUJeV=&y^&?Xg8ckITs`tj7 z7cB5uuTjgs*Dirp&zaBGMSG8D+@ zG^Gxrs5?*TEh`2lZ|76tQ6<*Q1?|$>QLwVs;S5o*&~SI(YwxsPjoi;`$&v+dt%Rx< zMP7gTbDx(RwE^A_UaQ3S)!0|DjOfEISm#Nq^|wpco~;oEX%x%7*3v z`81sqcVooSkRSX{R3`oI>R%1u7kozA?ceDEbGuo7-2A-TLa2w{mxJjR>rz0%-9EGW zZLdl<^lw4^wD#|uFSzQQO~ju+F(K7}7&pn3zy4UBW>`Ev`dEO$uP}xl{$Be$;4}lfgcj{O@+~u&L(v*uH%$z7y6ZJD+aF z%!2i8Q^^czSZuPVySeLDq4(B}CwZ7zn+|jdV>DRuW|}>Yj4Y@c0wAeUMD~+ zd}1D?e2BX=nJmr+bi!Kqmq+3 zdTWW^uhxA}xf>Y}`!A)!t`Fz_g)_?Rcx>w=$@=m}E{=qyKHo`p))vW1(N>fzXs*DV zvwq)9Gg+)D4P=l$A2#{_)vV#BNB-&NT=WEX__CSf!g(W>{f_v?b~T#q_vekkdb}4N zdbNEo{?1~pb^c?GexCh2f2{HAA-?9H1NhpFd?k@e6D;gjy~?!pEvkd97e28r4{9!z z_&Qc6ZazME$Gzt@+k4{afWxQ8<=yPdB&_inad>?Xtug7W$rrM+(uZ>L?rb%EbB7ME z0-=$y$5HNl=o)-oTvQPZTI)s-Dibinw9da{jE;jpxgh@_#m;S7EnXzdw%BaYtdC7h zzAH%pLmaY`4EC|OR?LV~$6qoTHe}vMS7+YMTc5j@xp?X?EdUbka7}#Brf#tr^zk+SB%?kY!^F4Lz4Yi zs$m#6rw|bR^s=#iXyoGG)q{@M|Bi9XizO!Ze-N{S%HcHposC`3JIEt5YW}@n4D-fQ zMt-60#rI`*Z$_7UhWcDzqV#^0%h5tHy%_`KCAjP$*L1!I5xT0`MPPPVi_PxREq;ws zHLsTH#h3C3ySN`vKT8%YVETOVR`uz!zoKw~2A#S-g`y9r|ENpHp#V?;;b%UK$bs+Z0_Tj)uE%x1f_GO_@^>b{W{ zkf09Q)?2!Zp2)e%T@xyZo*b|NZ)ClXTGC!9De^6hR|fc|JEfl#o9H8mxeuTJDKWY* z9HpJ-RLjGdENWk4&dqyWOTX0S`CHdd{qT>y0a2n{4(0zVEguImCnQatupsqH!3N;3 z@N%SF{7X$mKjw8@lbxda-(t<^v){$Ky&o%Y$Jl=ZSrz}o=Pl|p9r+tY@Ettkl@Mva zZzGqcE}5l?%y3>RMECd9UjIS-sAX%ESzMegHkzBllgN6_#19c8Bh!BGd~cT0+iSBU zaAX4AmO60%bn^T9-`p3)sPbG@m0~@4XW4Ox-~l^pS6@YgKpNe1_(?OK8mNRgi_c89d@` z&q3jGZP6|6e21t5km&Kizyru_n)6#d*F5+9QJ9Q2n~W*Ky3~w|Rf>0FGbzor#U_2#r<# z{v%NKCTS>oUip1|tU`MH^2BTH^pe(tDiz5?S1pzjKm?M~cC|{r=IJ%?=K423-wZuB z9d}8ZUU@Eqy9h|$Q-i$QTQQ=v4ln6W#M&bt248kD$xcXMA#t#Cjv?Bw011~N6ZU^i2WVlk#b{FPhI|vV4fH#0 zq|H?bB*g*iJx(_#fXr*$q*Dra%ua#Y@csZ(M1uoyzT&soIuY67eKL<-hh`(Y(2cD> z&70b;XDGKuYdCpzy|349=oW@VtcsN~{^mmeRiE&8Co%q1;v(K9#P?^_U=k$J_ z^awiFdpAzMxB8kM4EwSJ_)OExZ{+}cloIU5uYoOsfdeunE>^R{c}2g95Pil+B65FD35ziFj+gb_&odU#$u0FcBa-lbcewmrh<~T zcd`_GXp|GjdBFCNV-?f~O9aI*3r33-DZg%=lgt*`&9xS{-dz6nboO468kfx(4>$EJ zjq=Yx5F+ZTiCmd}Y?Y5vs+|c}n}b?&T7n@+G1Amg6bpH#Q}>+e{N;|=ZoWtsg?@he zCh6CdE>dbzlS{kxS^UZ-+rhANa3fW z`}bM!H~POZtfFs8zx;3XqkK3r&3Kn&U{l9G?sOUy+{x%lV!N5w`kgd*Gx(=Cd+AU4 z_`_y}>(6w1xMq@^^n~BO4FHjmpjw080c4r>uOcfY*#t9_U}SNBuMLd7ig5V2RYy~^2)_qe&I#3eL{w^t#s-!4d6N3iQ-IOdc61j5 zyXN_(+GA+??PIk(iPSkqtJL_jvYZAK_mekFnDX*KA+_5%S&q;FgsyJ-6=k=GZ2MZ> zEb;VC)W}LtR|Q?OPy1KhG+Q#W&Xky{-i)er>wYuy;WEr;Hx?8I#%(N9?Ey7s>V>nq z``}`NR!(1g5SO1==opZ7Rm??f$^j^~Yp$;~S(7ZP3POps{Uz;kpDat+#_#*l!fOMp z6TA=zroxR>ISheb%}Y)*q}+O|fsFb!7C70_QNsBxegm`Y#4X8Z93w~eW8zn*cFfw` zk;RISwDRg*Icg(FRBQJ~zr{#7F29|Ur^|!9{OP&NolPM%5FQTMX9YQCvr?C?=ozVB z*#7jU=G2l%`u)t8vJJXSxb|29KGxKcu7wLJL7u+O3R`xDXgBa!LO7CoZkP`WP79I0 ztA@Noq;nk7fByRA#^9fl-v!@PP_=KP2O0<#Fo9Qb1B@X+Ws?J8{6}=>kN3RM{oA9; zck4t<{4VS|M4vb;^*tr^ZLbd5@R}|UCfG`3#cC^NKh8hhg?qHSgsFi74P(I_z0P|G zxMq&h%@xN~GsbE)_>M z9_XSN)aBu}Kr*2o(ng0x)aG#}E zc(b4DG7; zXZ*fikqjuK8uj77HQ>(gy4DLjPQtq=eIWBo5j z*8X2P)~`Vq(SWaIl<^?JGz@Ujg}^L_s)hYXxGfljh>D5P`nXEsqXSLnhaYVRE}}ARDka3+rmrcK>OoL zLX|HBp{XXN2uRiXA6Iq2WHT7()c$BojeG=>7}Au4g(PW?3=rg)pT|8ffF1Z{Y2|=) zq9^;|?v*8H`NOTVp)DT;pulO3gQ25!#Vw5BdQfQ{@RO4;+y{q(nab024<~3>C{pZHJ8v`@mCe3-Ir`S z{rU?`0CV*F?Yq&JAJyByfSg0Sfqj+MS6^nAiTX?En$1CLk|s6l`bzNN5*X8r0e{-F zpujYpV~ysuu)xY7c5^(2fQb+THP5eE4!5YZ5PZ7iGaz{k7rK;xgh2-u%ywHfkdtmL5p#K1FJSf_HIWD8|njs;{~u>O>pJLdE4!|1IuNZ zT0g1motnE#s#Q1r{pmj4ol!k~;;2RtWk;Y((Dz4zXKXGTdM$HbrsJ+sVW7WF^I0SC z^?8}++^2a?GhBpyk380GPqehyDuReZAdn(tjqk`7Zh@Szl;G64u{^-oOx>&{!Dx!iqjwg7E^0eVLZ&BaB8vmf+Y|D{i8;M4uUH+wHvY`Vr2j8xt8!Y_s9aCE>_NI!L#k$k(iRHjE>|b7d~sY4t43-FLPhh z!k^C%c+MzrJ-;rwiBCP)n+sx6`&zxz1J^#l%O7YagI(&^a-Pt1^mV504oxFm6(CCO zHH5^PEOgSDHFK8kR3JN@LS3}Irzqm}XyZd)W%In{Iqa0Mdlqq_iQ)p;59tVs8#c%E7A0b&4TBFm-8QP{vib-@#y{Pkil4LT$l8GuG_nH`~6;fEoFAc zI~(<^)ea8qXdQURixDUx23iQkgkQfFI(M*Xe}K^080@P?c%jWWe;TlJNPz2E)~j0f zU8WArvn`V)u0}-kHl5_!8>Q5-_nkMuT)}VH;hC3zZPrrC6?owjN0Octr_L?+#t6ns zLfc^DOsVm|Mz)FVHGA!IbO$SR7*RTpTJmi7YJ75UH3qw+?K?`&^=uv;P8e&jwR2`O zuwXGsR2;ClTB=u&GKi)-kEUa4mMC%<>bcQNlV zbjNAKb~0h-Skw{sSw-n+Opx9@geeiV?E6I4#9K4LglkjvP+JOrbNlAS=AoHc3Buj*B?PwNUSa^Vj3HZ;tCgGqb!-gb$8PUXrw$ z_@KClqa-7Jl*J)2s0J!x)}E*s0Dcm&>_c!lHLjy?7|X>R?}M(XHD1At5ZyMCre#o_ zXBBXgFuY`rWcBJ>A$WxEf>{sPckpviX*$PfBfD2ZUSm5Um5_i~5`Yr!nTPY^V~8NS zjwCiTFM~P3VT5ms0;lQq=hQCBhn`iLGA1Gkwt8Sl{r~Zir?gFb4O#La{Zn%CUD5>_-ctc5dEeTPB%gfh zxZWpIXsa1T*0rF2GXGN)Zf)LQlk1(EA3oFh&o23KfbGZHsO3*?jV}oOaoCO zrntVUCD$IHcv;Eg#2RwYxxA-lUQGu5MuMI_d3LS>m?j@AM)0-;5@^8WkY*L#t|Ikr z?gKTG_-vLAoke?}2{r?aPcqg=F?lhNYtMMMX9F_48s0Nlb4eL7<%zPbGxqAxB?O4_@s@ zy4#x*waDhstnFN24-Q6g8^p2?w)WMZBjj#Cz3VFVzW7ZEKoeFrmB}n_d6gwl%vxN9 zKs#XIn^rJ-282L~GYc9C4FG6J(Ad4^q&iyNXPhY};l718i0~M{TO<8A6c5gNedS=1 zPhO7$$kijzzmW3JMI{owlV)FdK7IrYE`XgPw$h~ab!nEw?mf=LZsq;#e-Doi%}X%t zPn;AMzcQ!z@8Qv*Q#Q^yt-B`s3jr0CFQ&)7f^~Th7kL4VNqZE}rmMT=DnLrP(ztjt z8@52M{Tby&zfKs~^Hn)FLR=ilHUK`_)*0X9i68~Tp6mIJG}a&BwFkR32M{fx3hLwz z))o2nPjn@&bY9;BvrB9C7klM^njA)?q?G0ZN73kz;5|tU$q&!f(fK)Oiqkl;qMb_a1@4f`O-prUrR+`eVJhr_JLu^Gzyz^~)|rC4SIw6o2tR7ujW)%s z@{G3YSrM)axmCtQIB^c1JrNG+d?_ja<_1`x_F4XkC`}4j$6Ffu-QF968H7(XcwfN* zzb=2AfF0R4WIMk%7R=VS_sR^=l@XUW$rW!%aZI~1SztQ0<3NM-vg^Pfz_>EuH6)dxa0dBc{=oM!u@N}^4 z28>8EolyKiA8fyGj)bQst|4j(AZUCtG=>5Bx`E*DxcTWZi)t~y7M_rPQ#{26IE-(L zpPjbm=hk>-I^XS>%|z^iMGt@t6qt1q&|423%UDB>?AvWL^2z-`h(i-$0J%Qj z8riDe(gI?v2rS)!b4iY2Ud8^RJr|1s#ImlEpid-)hD}(z!EpCd2!vz{Ii$dZ==T{L z^3fW`rtB|R>Zp6RX%2m!d!tinx&f3Wsw#e^3s1$cnb`n4iMFpUmM_L`X!Eq^N{E)@?)B<2OZUmV@z z10Sb-b$F|~bieS`L_Nlr1b}VDX)R=iI`&2<)Z8mH&70BKCC3Qw$2euSLRD5Y>hTXT z(xp8jU0RYy;`1VKY5cyw;Qw@KVt-wlSlVZ!f4sDNe_a~bwNnGF0(kw~glMtxu%L=0 z_0JB8Vq+pmn3OhNuvUIF^s#57+d~XUge0g3K%m$~Adun+V`B2MkL89xrA+PO_+sK}S(opU zS-TSpT7Hs;E{nJWKNv(v4xW1(f>`#MhV+1lxqoMersfqnIP z`<+C^ap2JK+2CDQA(lY8lnn;-f6aXiq%tmEOHu=O;qJQ5}; zZ;C;VVg)NDsgwHp!2>&(QI^jRzcGH|{&aU-waW(VU<=j*#0Q`cmWQ0XN<94*t)cjx zbEzP{$9d}~Dw$E^WR)){b-o}d%kTUEcm*r6jd}vqe13xh19PWs0KnO;wIAa;;UGWr zuJSZZDrZNrY-GNm+iyRDQ>IPb`IVVXPJ82HpCK!H>*9~2BnR-ATvbeDL_YFL|(>xj*95G=T~;T}x_xxOIN&-4P0)piSe zP)3c@gnoA4J++9W#&NAhhCH_SY&Y;yHIzlmY?CrDBes)6h|C>IQKklTkApLKB3yFuTnRH7B-D5_ zoP-)D8!?X)eR(5PXVd)tDMOEa^e0h>JIlQB`5$ur0xeS3IL8R0dXtEs<3>M9t~WeQ zuYdnnrd)|+21(qg^p)HbQTu8^`)cs31aW^ERwKRsT_Fc-ZRzsM@&^8TMY@_MKrTK9;_5pg?|P<`hoDN*g`c}0Mhf6PhRa?$ z81rXW_e!fbgxaZU4z?o>p0UQ71zJ3TyOrdW9GtJaZ?qOQ_b9^abkF;%z>l}AI;ZYF zaRs%6%FDRKkv-fTDV4KLMFMErLn*g%WOpJQGlY}K<6@eg)2Z>YPE7t+csJMtQKXSb z-)Q~u<>A97KPl+hAVzN*GY)PDcTo}cV$CI7_nPgjQsCKXpI?uf*b zN(VaoOUuN3j$i$KC;;&6F%*lqcDxOE>jp#PEHQeO3&Qc4i)V-c+h&E!thKnUZj*Cu z-$dTT3RcjSlFs8RDfJkjAeb0tB_oZAsRx8q0X(xSBCdKpy(e|AHTCh*QG3l&FczFs zM?&r0#V2bl>{y0L$wGJ@40y`NvI{13MY|R}`(RJ<)SOcs@(HHTCtz4`QW7%+FG~y_ zwy2X#B*c*kyJp=tK|EvWNo?A%Kz6$3YwnY<-$s%AZc(qq!BzPrkI4&MdR1#NaGQ1_ zz_FNcsDY>>s>gYT8sA*fzIt4;EMRKh>EfPy&8{KR8&oZV&m&t#e9jI}+9OC2BWBe^ zLY&3o0osD7b*$*}FcwXBZo9DYu zevk%2rUKW__qjZK-<-VacEcr5)?5ZvvY`F?O8z0(!F~=9pzepeB{q$X;rxzZe{s0} zdeoztvKBp8(N~o}wF)pEc1M0Jh$)*?5sV@1>UqT3RAN>INbbx(Xf5@jVN26~`8yzV+vBggj+uJl(}Mddao022>{1D6 z%WP03bBo`k#@FoR)aWNr%vp?iZ7c2|OrA~y3A#;|mP9V4y*jSFTdqLBs9$lyV#EfA zVtC0%-PWO-0LLUY;YsO;HEMMEfVhk~u|!8USpOOVN+&h=; zylTbnc}zrWsulX!dHqc6e*EY#-%=+PO=M64Y?Fmfx-|a;g&e4~Mq307O$;pam)j8= zE0?AIGaTBKqKNf?y1h06kisspsgG2GrsPBYQ06i;borqeL(nK6sHebNm;g(J?bZ?P znmCp{P;*;;%UjuI5wg8P`Je!#qea50%P~!-@J%ZR0HeIoVvYeNORyOcp97Mne6I<% zksIeGO8w?bvq=D}3ZB6B)(&J%Ui&qq;__iJ@mmCJZ=#1|$swe-x>eOqBLO}al1R8x zL6G)p-2~yjLJW$Eu$AAgqr2kQvXZ6ln_U449ZNzg5gU3t0`uSsXySloQlXeU+SmjZ z#$^tA3X%(SqH?RR)gJ@OW{0510f2I|zCT8yk@60rwnR|UF6x;gRli2#)*G6(n&a#a z&Eq2(?1sVkONvy3fXPDU!s{9T6x4au0~U#Lsv~iRYq&2Kx&JZn)M{TTXx#lKL;`P@ z4%S2tzV$4Timt98BzQ}IfWx?ek~}CkSxAI_FvFyAZhYOmKLE}el1=)6RTfY}n@%>6 zg4#js#hZl}LPy=$65t{7xT_JfLMJ4lUXr2xT!a?oL6K^JS-VQH;MM@riO42-%^s`F zWL$;nQVlq;d{89V^{@p3!IN|4UA}gQ*@1xMOTAi~8z11Gpi541^F47aAkufRPIko2 zqa@PcKe!REus*?9!Sk?Ma!%u#^9LYkBD_{Zp*DseM3xdoVo`{H;YtH$;UGzhcZDGr6+NC{ZeTHR#HAi)TiUc2CpPA z_5^h^Gn?w-g9S@@q!%c8aip90m91F9HZ3;=Rx$#1Cjvi-OeAte49%*!XQSy@(JaD2 zhoz1Lv+Ga3azQe3$@e1;uk7>hi!>LY;EiaYm9I}LOx0zD@b+JI`H+V|rqaLr>8*7= zsKD$QljCc6ZInY$eK?Qp8&O=#4|2oLXQ-l`SgoqP9z~>{j$f6Wz_N{8TZ0x?)C^4FN zMkpcBO(kE3Q_Qc|s~KOFx15yxY#CW2S$*E!5F6%hEZ#6k=w@z7NeHnC!}wr4nhZj4 z+oBWLl;*0LJ8w;gJ}qB%345!65_VMHx?YGHg##?2H@%^-B>G&`GJnl|YvPn)XW^0^W3q)>EG{+#Uz%SzX+OQmZM^p`Hh<5BUi1N%$jC0- zn&!;lrysHM$9Nye2Ra0z*wiG{R;w_zhmo7|ucOGzPFe%cKU1D@Q`CeIq@SwrZ z$dFTEi*nL@Ub9l_R-Jq?{|I+4!EtzLi4s#8EMHu)OP#N<;j63+m%4S;b@1fLdh$!f zR+QmAKkd%EOJ(W58{O=N2$~&zD&Y9^oHG7=>X9ecIkuTjLxPK4w>6{ZSUOX_H=_if z#FH(J29Z8pJtK;mx@_{$8>aHEw7o^gVLEiWI$mraLQl|HoqBZKVzq$aRH1jrj%72| zp`NPe2qpW&WYBm;zzJ!sqzrs66uS4>@LK2R9SG=9^^gkDUtvXO_JEVDFg$JN^Py=* znbY%W@uK$Wb9NSw%Juqvfj7bW@e;-2%F#++7qG}aBK;UC(h}ibJvoC-BVe;r%IU7K z;Ijhac1k}}cgs_Y_D|ZwuMO z6T+@xxK+_tVrGgPy(EE$)5MqWfmTkQn3#6Qgq=i=(iV6#PEv_-@pgVEFXCTD@W>T^^z%ek*97xLtd`iO-6dz?rfa{nm#fyD z=B9hW#q#4kE%VKxml9K<#&5{1=(txU+akEh<3Fp%U95>Fm!>sQ@{isPTe>!ICW96h zPHA-0xb%d;dYuuyWIpr?nJ3MsNh{=i--UI0H-m?OKrN^iSJs z>^rqJ@j&UsmvB0dlAX8829-@G_E+1rUI$8;+-+DlOJm2>F>Zm)WUBoB!{jYK>ks#6 zu9Z$@IfVAzI)(zTF^{|Ba=jXlZ9+tUsPPqtGor=fCnn2KLh)k=>u;-MhF|-KlU5|4 z?4#`9p(I~ClZ+Qt3|!%q<|re2@Hu5x3w5P^d#C-q&eMDN-O=U{nU|5(7>3pmCOSmO z?V&ktDvZ;@@zQG|$1gMO?lnwLTRh>-@){nG7i2{+R7jH1M|zAkj@N!~BZk?||EHWH&=AfRE;<^uPYRlZegO2?q#Q$q#2a^god0%iTPtO7#w z8jlSAFe#FmC9_)Ke-&{owoJ8bKv(OzkCf00>?)7T15*JTwm$g9o0BeD-M$p%UOY;K z!;{C;EE*NBy?Ep|9#t?kEbbLXufoYSKD?O1V7q$B&d23k{j(KD%1;wt6>16^t41{Q zZc)@bp0EkGq-)aqjxKuL|e*ms=T1VO*WVuwX-rh z0$XmRyG6OU!;qlcfc03KCWgfNB0G^e(8kWQ{+qQ9jMicw)`%^>aFd<)vlr20T>*5L z)~MhMlgd(VRG0AgR-kLX3BvKIj=iE#K^UpNDhqlRsn2RYa5dAcLV;;EvaZ!6QkG+Mm*%;pAJI$c|8-&e%p zw~|+AZP;ff@`l}xJpKdi%1pX?)5#k*F|-%P6gK!vH+xEg%RudA{c+=USI$bK` zm0~jncRfnI|Lxvol+iJ2LIjmu6NhI&(OMgi=moMR#{l3(->Bsu zsf3-dS*~S%A+>pa3p%Oy($H^`;p58k5@vqab;*DqW#>qZr|OCx!&A7OOpubPF%~E9 zEkdenaN_Ug2AHHc`_ZJ?Hgb;2Myq>NC;yc7s<^D`L>myMt2y+ZHJ%~ET8!7Y4^mz_ z$wl@)z(RX%*gj$}G_2eSed_EoCoQ*IVAgcViWn1J!9cga-P#p;^B3e-3NyLo*iUs$ z`<5DPROecB8>zE(_FrYMg14l!mWx*-MMkAxrCepNyqWHCwJt%bKjD?yrre$JMi0jt z@z-3Hm(Kf)?>y4SBlapQ%{BV1&QF-PD=)YWi&vjMh1n2XnA=5#m{7>?+0suOqtC9w zu0MS~Il!G`)5pJ~r=~%BCW8lkBGM`Khwp@{>BJziv!exOca(qsw*Gt5m}9Sw3C_yB z9yyoXlJmN0+xcwW=EK2mp_kU*Il~w(oTfdUoIV7upzERgkl-7sJ?axmzrYGekOF88o%RcVPRXx6W z6f(oX3Se!Vm%(uVKKFb~^cOR|7#9bZg4gd^ruwM_sp1^cw0KVjO#oj!Ol^8rv@5o( z(2m!|qVz4pnM7T+?^0N*i86taU^3bN>*O$Z3F+)1vaMLK#!tksG34v?3)PEVIT17eQK2+LS&pKN_9y!C#b4Lxmf;1m(Fm7N6N|}?YoQFL z!~SV|58?K7R;*Vy6Rg2glQX3_)F1oQE>WY4Mt4f0Ft%~O|T6{ z19`JX07{j<>OAZE{O+U*H)Yj?!cio4?3VMw^&eB@bo87l$#XXwcp6QPWYmwnbm)I0 z&U}KRiSy<%QROz>HGG?;nBaVh!sVoAYC%$1$9Zu6$qL*T22XEIQe0}F_gCuYRT-gD zIa6U8$L%W=>w;!%!OLOJr1Q}%xZXXl=at4`p{f7th>K&}zVgZP6(`w@WyDR!=aDiu zOiaBmHwN0JR6RGM=eg{5X|PghZA|ni6*DPicraZ4qRUZdI)w7uxx$3Yw$4^uoGYJf z=^_z<3-K*Bjnswt(^Y508Ii{l#oyg{ddK?7q0v~%@Qrfq|KQRh6~Aq|NO7sBfpfFq zYaFTjMQUAVlUR7Y@}Mf4ld=-TcL($Q)4wbSPO==tF4uPR`lX15$xywljv7O1`w<_= zlx3mxiR-kUqwRIE0PmV;3fDTq_o5J-%>N9t4cCmbVu;tb8=$R%hfaAh% z1;4i3f_$7r1zvQc72_GCe5Pb^#@S<)dnnbR!EayO@)l&xW^K8-ln@p8YZvM@vk*IC zObnZ)Pc85p%2!Smv5j;cYBp#u67r8YtsC-5m2hI*-okw-H=rs_`J~$>!9L^pi5F}O zMYCP5GJ6L3MEd7H^+&{`=!> zrj9lQ$lj<_5N*xU&Q(TB&}Uo?On&RU{eI+`Tv*TdcTBx>7+0gG?c36bNphhjNog;} z5u5-qZ|JRRnXUgR%7(Yex1Xg6Zz5`q_1}lYrP8~G1j@X0=&3wPTv7Y1;9%j7`q_w; zI(EkY&I}E5SviHNVA-~~k!EY6$gTGUL29|3{^eew4TlAp0VZQ$67hjVhoHnr~*cym0JALzCyyyoKHkq+`|9Q%7I+ ziP+EupTe4!UCoyYHw#K~G^y6t(Fm9A?r4_`X`Ynez0)GVkUPX7>!ANk{L$c2_Xvbb z$;SJ0G_3^JcyPb;mrshrY*#N7YPJNlZ=8m_nbdrt$VU|IDo$<-^^pmZ6h8wAE+U9) zj~(MEo!>jtvE)~v9lZM*R~F-GzQpyZeV_6Nzx|@$EuE5XqkC@obUe3(9!IQcy_>!J zaN&4sSE5~aulD_;oem{ww~V;+b?W@(D?{js4gw2y*lu@*AhON^QaF52Zc=$0?Eo3N z$j4np6()Ox%CcgP6~}j??6vSA!xnUKQ}X5y>yo47T)KxPXh0asCebi^GUYb1=0KOt z%@r(M!MNe(*8stn@314hm?cPowPL1X-eh5f7aW}FBwKayso7`4Q+=f(Ur zC3t_BIy#7ste@smW4#)eFG zP`{#|KYq#LB5wgp0QrM z!TTg&cQu3lBd=TF;wq1uhfJizRGBUlc||K;HJX+7Yh&lF9qwa@z_L##ATNs_ojQGD zqsguy8I~_3oXdaP*@qH4*w7yhuSkE@cuDD|gT)ph>o%D>sq}C>V<@au!B;6h|E}6i zef5(!r_WI-^Shn2Vxvi&QwgDWEIS(B{r>pye#0}$kZYb&iq^}UGg0bC%O5)W8YVi6 zaZ+7>d!y#kBN7N}^+?ROd3~Q70zDHyNSg{PM9MIBb&c1{jP!+5EJ!@nqeCmBky{MB{U`W>q()ED6K8^wZ;wavxgC#U6=#G5Pd3$4F3_G8Xoa*H%$a+{ z?vZmdNCs4HCGJ&+Ir86PxV%v<$j;SlFa7z6gUnUBEVl(OZN6}K$exBho`n)Yj+%O< zDubg#Fx3dHxs8r=G=H!h+kn%qx^;Ss-pY9M8Nr_yx4XqXngzA zwgl_u!}~co=63unyD20up_<3ef1`Qh0@+ZZcwvwFV2D$Y1W#|{k87vwd7cjr*cJyC zpJZyVdlG%~ZW&|EfGy8W%7x=qtJkIPdU`xDo-kECHRCl6c}WqrR(~X>Uz47THnz;M zV%F~7k>f68QYn<#X(9GkhQ?ia8{==%zLvsXSYq!=Ksujz8Wca_r{U*()MAoJf`@$y zL|bSmF9#phq$1n(j|vJ4HfXC?cw2wj3XRALwGVjB<=Z7RYtQ(P?G4U*Ylbg=a&beB zSqIN5U8XeYK5c@y3~}@SGR(NXI5iwsawPHQ!&B|bHtNrKlFYU)ow-m)r_vTl1=*9! z!Y_9PsSq9z&4>+K!cuOJ{9Dw*i!c~x?RBvAY2pE*ZDs=zV(^4g~Upmq^(1FL5}3y`3n3$a-N%W0h4Yv%yVCsr#Sz9B%Tu z(RTAG3^Tfy8Il9jHI|^^X>*}1bk`3wJ`elwpotx85zI$+tkC)4PQS(N+PPP<{z3h2 zul#Ammck?vAyGk?w@HyS-pnqqZ*m7n(1qxlxF?qW^d(?}se~5nWJY4hJE?TknPg6V z=~eT|D&_c6``LRvYrok0dBK`juvq6c z=XoB-_w)T8SM^XT`wDnEZKO5X%KnRcNM&ufmRFcgZyqAv`uW3fvY}YdLVN<3#KM_6UP4yqY0By- zN26-H_-T0YfsPobfO)np&9B6`O_2GA$jE+q$B&&Cbl1Pag=9=I_;Gh@l_sETknfw* zuMhkl7k#eKut>%OUcn&_X%tTDeRpbmh{b(`v=gHgU{OC#y+AIe_hmey;|kQ4h-}Q` z`S5LBhy3F6x$&<6axJo_d zcZSJC47Z-)PD2Gp1y0a9@Q4W7@S?gG`O-N<(KToSP=lUe%*Z;(r1Xr6xqT68k!h8= z3B{r0_n2G8)2?TzcqFxy<2L4>x%K$@JYEWu-;x&gPVn<+=K#s$+pzhHhM02)^=@@$q`%hFl> z1>I9r1H+9X4ih(@X)q~ zZ#pg>soQ|H)x;~-WbW0638udhqosw>U)%Lk$gXc|TITWo8KaDU>&b4h*{tcWY~NkX z8j{*Gg(iF~bfN1@5)MU#YY3*V4R46kW6{zV$r3Fgr}3^x}zj&WoG z*KK@S>rK`GB0SC>=MWiMo?uhwZp%CwSGwGMf%z*>Fmp5d~yv-Z~^?9cCGC&j;&BU6ua>m?k0G}ZKuJ84I|2E zvHYnrIP+u)C*{iUq19Iz#cnE=a_&V6P9#se1&+e%Aj?cq-TArL!Z}H^5YpvrEdPk0 z>RnytKv0IFRds8ESc$`}-60BIx5x=nxI-N(nVVqeFL;zV8sMNuFyW<%%b66mV8v_h z1b-*7?j+V@PHFI?wPQ6M=UU5zQL&iz|ON>~t$BF_GE~ePiy0k#j~X=D)P#%r_654L)U!5GQwJ zqbx_SwwPDLvW&FFyE&>as;o(h<&e?7AG#)U)olHWhD-3HklHv+$v+*8M~(|1q(Av zdU1+X*zO)bqdc~UXS3F3j%&tFrcp4&y_E0Zg5(6x0NV941e+TGupH3#@j6 zCZi))9|Eg#{5V$!gM|)F#=ATVpBwa6e)V7C@14WuKfc$SAs7``$KXB0Ap}&SQ_=6h zP_DBS$c1#a_IJf}J#ULU{{0`~nGGr>*fOR&9uAJkv`1ZfD44~WFU*-E1Nl{3@_56` zv#VuBj>}ejcypa6`Kl!Bv()BZ*2j7-dZXF4TQkVr)7LfFIWfPJHtRSxT!@fIHME1v z$5XJ~^?9mdrJL!8`0_XfSeQY(;CJpN1xd^-*qX{k&VG&USrtTyV2Kzw^5V%}G3cO| z#wU8S_?BJlM;kECyE=c4+7GPsOh~8lk$^S-5mbxHlUKc%t*Y(1R*2pk`HoW~?+0wQ zu7Vq#0-j57^PB;C2ScZ{!tm_;rDb;AVbn{wf@4W?O&zMwvpte?m3!`}BF2R`Sm8vMQ(J17*;(rQVAp5L2qpQfBS%ZLC`{`2I# ztuWCgC}^Wv&4l6Izfup1@6cGz!WeoGB+R#}3*fjkZcaxF>j92$e-Sz6|3u`%V3;?> ze0gvUAwSwkst)nee2>g80ee?#uV@9A;nvKlEm4zS04ZOIcei)gF z%+FgwHfm+5U5=!qmw@iOhPsIEHd@9;lKYWsEPwR0-=U_VG%mpCo*=)*% zjacj)8(;xbab7=TE_NVWP$|KlWI0gX|5~^1b65eot3pd+zQXFs=(PoT zS%2zRdh4yCBx%Ud(Qct6D9cB@idJYh-tsvCjk6j6J+Mao6%;sPtKyb7$i0?X%vTiQV)x#3b5 zQipA-^2;xB0jwn0_Sl^8N~>9D0vBoG`+aojUkz9D`_|^VznGiat^bG24Z8l?74M_H z2hUvn{Hi01w@>9dxN|iy1$-wmNHHdNzK?rXEilbSC^X`t2Pc0ep|RE2XPqpA9OJaURZ!`dT2<%W z`%uQEfV`ngkhz-kZ`=k8u%m3}tYL9_Y12+V%v`XX%WQzCk#-VBOd_0&d zZlc)fB6N_us)2BNjJ_fP)^UDQvQ_2|e;QnJuWX+jzRo+craT^WM5*>&b^Ni@0Yj4KnU(t8(wCU$jX? zrk=GuWp58_ta@5hv*C>L@fj$p{P=nzdVbhX_Ye);-olQLjIsVkZn=Va+VQdsk6-Lb z-&PpV?8od73#Y@pw=ep|wNcZ_)Nk@L9cn~}^^Jwb(|ctp7|Fq0^V~-!uPJ?QRnZ0s zYE3cUG5ONvMlus7Y#rVn@~S#*7Eti%wiB{%3r7WEvdy)Tql;-`7|lvXH*jk0g$LJn z;lc>VC7yF7@CQk&HfdTtw6KGphM5+B27~sBbFhurCx3D-OvMDPTtbL9EYn$haCuN} zEJP`KPdoButLiWj=5d=KCzfnv%3&)BiLI6MbF*%@Fo#b|Ei#|gv`2Rz#@qNg5x z_{aY@LA3<7KmQKdU=pYg$qrp9F3P6#`ZRwxaGcfO5|I~vkCdht>$DziIiu-%$hwKi z>L1us%cDBCAB*VsRP)cW0dN^}H@!5v-EUI1qFySYx`oX);-^TqkFmZM(?))a$u9-;2)W(EanD_b>@}3*e^F z60>I8&OSD$VP!`5QW9(*jJJ)o-OzoMX2V^{Y1$rd_=1Nl0ephtD->UR{pH1I z=DfI?rIWRk(xxYO+-kW$1nXxf&`XMvNr-g;pWN%|9*ok&&R8R@Ifvo6O>Wgp-U~Dm zYCklwPk-4Y7`dp{n_JZp47_E%Umiv}pT&%#T9VcleVwE~A=}u}{)fG0CQD54t3~@@ zQy4pCPin|}V*h9+A6y2LLzRjE)p@2v-`KYiGf=r1h;#P!*8uOty)XbOy^mP z(ZX}W#nBblTCCv7YY6B(*;N;?Oy)QXOMrz_-j8#|wbiVT}np+`u<|yq*+xce%5tKtQ z&=MIJCips9%Tvu>$!lzVOp7&202tw9FbP$R0LNZMS4x60md}Dc!|R_#Ax(msoLa6m z9Zq**rGo`^KVNIGLCnj0e$LZqD3he%b87=4zxU`+wb2lW9j9wp6}{4oYOICI*$Cr| zJ{hA!sr~JXLq$UlamqK&f_oj<@-=&yH5WEYkRf50vl9{FhX zZJvpy1vjdhv@oC^)Zzf$^cW@OK^?IIg1L6|31(I@T{ok-EWs)FT&2bJpz&yeZ1zRN zk#qAJ>+w}Ovvk9{M23QmbSHq=>UPF9K@5+X>W;o6oIpz)xZkZD4CDbv+BB2IQ_F*` z6S3{E-dX=8L!qPvcNXCJlDdHJ7BqxX_4jx z0hD6wd3-`%XC3N!5Fgxz(uIMwqs`5xgly(*Y?A;q8%O?oTP4% z6Vq{h+)Xi}jowGtkWi z*cvSAdSkI^%xzp1@7<6&KBw}AovM4`oP|$y>J2YJSKS-d0%t{Tkb7I|wY4R0dqa?6 zlf_B04A=8#BIO-ZGHz-}`@yl-bLIAT&-`iYj)n*0X0)|#eh|1Q6TGJpK>AXGd;7_h zmI@IdJZ3@9E(vH6m>2^u(N=dhlsm%mz}QS9!M(n>`5!ARX4DdJy;HW zCFNpko_Vgm<#Da#*>BQdG5X*`O?pc1j!LvX=-X@m2akOzs2Y?I$l~mpPv264MNRh{ zk|5DL51>on1GaF5Iid!WIzV@23l`m_8W+blaG|Xr6P+WK=p)o{#j?>;Y|kwJ6@ene zdII@dms6yEXcQ9Q+;)-Iu^wxz0$-@|$V>B1YaLcPvS&$KPgszbw3Z+MGuKz8t85U} z6|nt5Rwe`gJ+f;2-^l8JBdh=aAuIIcgukW)h1{kF>N|DwK7O&q&kR1{7K6{{0&BrH z-#-RnhFVEv`05vdP6vRZKylKqEEqpgXo`6ySQ+qo=&>vI>!`sUL-8L)(SA>#zQD>i z|1eM8i_Ae`mvxe`@bFW=TMEH$$ZaDKW%4PAWdlEwRkVVzQ8MNNhzOy58N$R|jjq$X zJ;M@j_oYIJUb^ZoEMqRtEK7XLq34kz}^gQByJ>l%VL^(mKJVx}6du zbV>?s_h7U1>SR__c9KzT#iN6FGt8=Jw(qIw_C_`l{nk;lqg2W&8-`|oG+*quhtt3$ z@(%hjH^bkT5HjQ4d&=GXb(VB|L1wArYG7vmxaA}^H(nsf+ujm!=T6`NtB;#no6FrZ z`04u4W!9F(V*2F6#HiHC=NC^G26GE3^^8W4s&f2mZC(;C6AhWcxzih(sr7xGYUc%FYB~Lr!wP271k=N;6~q`U^NhX3miH$uW#a2!npdcUfyde(;wTh~8C1;m(!OsaiV3dfF8ME;%aD4iEV;hL zxFnSfac`u=1l+1_;CZH2bM=Gp@Ba=iJY4DiQ9I)z(@ZfoOvEND*{q$-HixkjzoU*( zZ)0!$KEH+~CBg%EMB$cUDys)Cv?!D$M$B)iN0U+cVK_6_F!k9u&`ZvzFETALQPYMJ z(n#&Q0jheu(FaC1-Fk;+ZBLOd7woRMhw+Cx-R$@uHmY{7{5?e)+l!Q=}(|S>>za%qnKS?zXP7Q`_-q4?DzkOc6tAZc3NV9D|_xp3U(H}H?7l9`}wED@f5ZL zgVRxvFBs?ElAQiedIGC8!empPp1hEEhropi=VBG8#2@vmOp~uQ7QN{GO(hDL0x)VD zw=gu(n^mb@$^|snW+|=w_Gl@-f=qMTdGt5$FbRCe#5jicpQ~5NYh&}NrbuP-71lCz z1(ZPvYjYbS9AuLQ!nCgl6sa@OcsXP*4Z#?}BhQ-Oi5;*4N$cn39Lu+UnVqA_6V zAT8XJdej|Ex=@LK_h#eHxTVTbJywwSrnTQyh&h^pNG?XA!h77aEp7_+^)S^6=z!Ui zTG2K=;oh)iIu&JS9DMIhqG6WK z8XI@RTL=QXGZktyT~ALP8&4VhEjH#4Un{JVTN}8Fre8xt+U-J^9CgQ@Ol5Q3TOuw2 zMjN>xIrRPzxZNX4mcsMVJhI}Ql;4KJj(d$$`}bV?r_tR}@pwp7kI=V}$j=(VIK}N_ z@orNLw+U`3H6Bs3^ETvW@KHp%klOu-v@6^Z|jOs zVP!H*N>z_9s{pj+tC)d?NbOf6aw!W;KbR--F86_c=IFX_Nj>}?Oln>QEOq-%3j9@~ zS0_qbaaciMNJGBqq89T&iO89KT=|A6dbJ(5VI6SK=6j!cejI14RYW9w<_;G=9OFR{B^6uOEDkf0IR&KVdv~qMeF`L>s9&mbK_lF7hJ2K z$_#*Ux0t{t!P}g+L!%(AK?C^>9Ybmb_`DPc7-9u}jZV4w3&-N04WIZaM&@P0WX9e! zF}eN;xy?g*m+oG#ES|4~NQ!`;K+P$A6bF?C-VX)w9H>fh<9GrIH&P+yB%?8b-bIGR z6S?5Hk{XVoU$JC%{@L+FyW&AalF5U0tJTjT1XosCHJsf{zY+>q*b{gm|MJmUo+&#; zC0{6qORi6e0*#^nvV13<)t7QE%!pukeKce?7s!7!u<{+?Do3o+e~NfDOVZsN_V-Ps z*Qzi!x1q^g)3HFXv8J*_!1iSV2k4m@)MdM#Hc*CtH3=MFU?w1(74NjzTC8i>F`H-3 zHq0IJX-*^&CP2~V9;RrNHg0Jz2#)0zZn-5zm={$0S%YOHtpNXPb!DeJ-gQ}q&TFogj0Y5THG8AyA{VS$M2-!2 z!!eX98P*UK9SgEuJXj9F%d7-6v;2%!aqel$*$7GldZe36Pf#B#a*5rY|B&@Ns{xLKKAYmgh3*j{hc znbf@Fz#*W)qERehA7fLM$g%{M21z!e^K_8LwuhMjh$gBznJ9@#Z%mu!pZB8jkU>#j zRgC)7^}4gJzIN_Q3EX8&%9$GMo^3~oxL!TbF6W(92WiRaJB*p#V~xG`7)CnyS2$>~ zi|LB$&6kL3?wTWE5ah-`PwuE4OAytp4H>kLuNA-dPo;S8cZ}tu<-)bKVRx?ODJPGf z#&~n*OZC!1d^>{$XBAlorAq`XAPA(#y!v!jEfsGM>%PNJeWEhK@h}EE2{{Kn#Yr<- z8aSX#Kj+0Nk_$eZ;!y}CwtOycmU3|qEr9<~U4M(;a~-E%fLom?pc2vMMf@so+gcJ_M3z6c<@A^OgY;T8w1T(8^{um!qKgcOUYF9VTo=2A#in!##LB3 z=)&W{N`cP9ap>g~VBiNrM-w`51EPXc3=tMNsvztkXCJiTjI2!QrO5`z z33~<+BOdV{F_zT6JIJnnFUv_*5puIug1d1Df-l*FRtpK$Hf6QPSR12s@yit!V&>kt z4j|t;q0z@e9=s^{h%J$==m4gQSndqlD1aU(&)F!W-%!J zlsm{O&u7O{GN@8C8J7kY_}sj3xO|>vGNqEGY3vePgFg; z!Cuv*F@}L9Hxf$U*r~K7xjibx?|yK4|EYe68zWZs>a>bGjTq9&S{MM_ZB*n0;ag;x z<8sTxN3!m4qC!=kxiR!Xhfc|oiwwA%zwYYTlpK6rtHHOmMC?bFO9ZTVL^rVF?#;La zOv-+nOqoJb;sJGBDVAYUD7TDYlPsgwMVTX&QKg1J)f?a4B=XKbUl-agmLvDKAyv=t z3gnf=rG2QglJGPT>G3NH+wTsxb zKj-3@5MvV9)1p#TE_3@wHQ+|ssqfLRs-fH?h4?r7{(?$&M|XyI z-7mNCRhcPCMlK_q-{ubr%glG?e=8gX5?x@Jj1W(-D;h5~qVdv~@*Z<4sX098ExDyJ zt^DWIZo#!ohV4^ zi$!%cAK(|zVxAw|;IT_n$C_9u`uNO0Bv;vvbOd*5{!G7J(z(A|sMnt&+mUH02g!F= z{5j@gp=_f^)M(EBvV)Wh5-!2-mY$y(5Z1BVM_D_dXIG60_+l;Z>PE!-9Mo+d3Itko zE4H&Vw0%XYz%zw6Z5hjAG3VDV;zc_C@}5R+BAaJF_*xi>E=z-KcWDzAj5tTgE(f0t zi{%EASkNsOo<}hT%^%k+ci`A>SlY8of5TUFW8u(VR=6CL){#JP20Bb04DfvLd2|Vc zy9}oeJt8XCImA=n=HgSiMf{lcf|&)#nw*UVh0$^c>+kIqa~p7s;2GOC=#GteqWGPqJOOzuk?vEc3fB=wq-Y=F^w)ZSU)lzxXvgd}k$d^jFJ(Q->V*@!as$bPQs zcUWhcz6Rf>lOO{7wmom1PyCwbS0;&AYtm~h<@zuVbVW_O%ik#+kIo1YV!Frv{cMb{ zJfnS6+bf#~fn>DQwc8@~jE>ZwoL6HNWzbkA$<@IBJ>=9@SvT>4x8(D!h&+24^{^eh z$@c{ew5aY;wwIIham+m>6FxC!jH@63gAFOpecf%ShO7&B3W!+STMe=$C^ANfuVer5 z>?yE*r1_^UYL)$cF{8@o3O{a&x^)Y-&MO)WvgtaWCU5W zdh>7CIG8hbiF>{x{r@(E8fBwsPUQI=KvaT25N#MNG z@$Pi@OOAqP2k^)q(+Fa`VU%4RWIBmJ4VV|^jT&nF%!!Ve-yMB?!JU&&XNH#z{TGpn z<}03{B~=@$;33RDxHXm%EY`-XSYCIRip533v^>V;qHOsUHYYk!Nd%=XZXNQ_!B!q~ zr?JxUIjDb_5OF4f*+c6jOVo;4GVp7(o{A;KRe+`}p*vY@{G4HK{~+hhfe;MIOM{>U*kA z;aiYtB%f+!e9EZ2ton?7@JE74j~Qy85sPlDxSm@GN*9SChSpqk728Ltfh*`~Rguk0 z#k|w$ZXP*D%L*-uzHx>c1?t zgGytC8sE+|oW9y#X@U@CW%o86%Q?P#=8T?SYf=%MbN^)4_4>*6<@&kt`9Ue^l$YYC zbBwn6hSJM7Ir!R0j~@BKEN;XJiQY4{_F<*Gty%o7eI?H=%on&UQoYy+wPe|B33LBs zc6<}_Fq9})q$lA{&z^7ESv%g%9_PkSva?uc8X;b^GiiyeS9Y+TA^NF7gk4R;kHwb* z!%J?!kdI}wPqS9GNXRA1E%ZDP@;h+OC`j`r(fJ0BTmx^75YDs!IGN1UA`(P-pRUUGqAXfKf>mD7M~xp;xh)$ z1Fk)u;bpzR*C?o=in{JY1*b_=lECWDR0_DJeq?J)(?7f7tnQ<@_;M|#2xCrqr2&ES z*RBN`T5Su&A%gPNz8hd1DhH|zmIX~ug5lkL@BT%vMfHr!*fplk2#D!JGf($RUF(A` zzjf$T=qb5K!3kZztoj~Y(W~_eaScRu%n*zZ@@;k)Q$aV4XukeiUMs_SEwKsb3vz-H z)pTmR!C6I(d=|6zH431UQxmAtgMs=gj8w?hui;l~J_Vmul9Sit>M_^4KYg}Zzq*A# z3*nnaiE8!%gy{isK6q7pLFSUjedSq)`ioC*7Pi?VzHVCjJbP6h*(6rfN}3O?EVVqP zA7>2#J-$l;3vru2TX70kbgP#f37F@9W(P5sh*fGmbn86PryUcB%;u6^kyGoXgKWG% zW?!G;m~Zq~ryYDR$fj%_6TQB_6*8fm9WxF~zgj>+t!F9UcvhmwggU>okq97~qtRrW z)U>>0>MmzK1;6xBluz;Aee<C5#S-7e-NYI{C#{?RPk+XC-Y*YeKuI9i^8ar0>8bwk(qn{XQVISPpIqW;H-Y3u5AZdWSCAlBB1B)st!de8nQmHg zFq#dV&aom4&+=rqG_>V#?C@QJCjogB-x6@*EzK*$!Q3=b(+z4)ToMIQoJk<6$;nSr z+^fm@6Vb@06mJCPtux~4e)PYr$N7ki2zlpREjcOMGpmEqF!0)ExTN9nqwA1H_eneR zhOIP$yDU(Tp} z{I*vb4xZu%X4^xgWkczw&^T-u5uGNDyd``pT=w5f-XqOZBr0>*r+13QFsNsqGr)@XBZoV${t;y3LH^9B| zu%yUZ8=RxN!CcEka~l#swz`sm{|n>qxjQdM6c}6SfVQZkjY8u0s1T^<#jW)dnBlKP zG32(vGED^^TQ`n}#^4WT_f7BNIdn3(lkqeakxdEZ$gd<^28?P-zaH`bkkn)qnt6-K zL~m_xqpOnRioHAbBD8A9?(eh`;gN4J<&D~?@#($D3hH8pLdI}SgYan}D2=#uHb%hLIf&0ZEOULqbgUaoED`eNVBpt*jrYiOj~#pT_Rx8jul1Ds zp?$43t<2Gze$rG|A)v8ufwsgL7s>zK&tN_%Sox0xXa$oqGJC+#MQ&)Q(CNged^6X? zj58<)RyU9M_9N^u=AIym=o6|1m6C-L@jAt~toyVN)NHLVqgL7a>>%Fui&vM*s7a2& z>k%0}=B4$4_}wdfU3LS3u{-At%#3lNY>N=TRCR>5PT855Fv`DUsQ;BhgtAmh+ta_dCG9iZjE+Zh2XOdM)TZ38I zYMIzA!{TIC-k30T$b9b;APR(!Zd0GU*t&~9x3?uOM;AidMCUC&m?>+@s`Z(a zkNXrb)I^NSqEBu8)+`=76-OTH{1P_h_a3<`p>P~vtj8#zk9(qtyU`*h8-m=>& zozEVcTP70wc|M9cPj)d6z|2MzlNEBqh$3?P2DE`uov1aL57*oWHN$D}NA`U(0>+Wm zCo%%#GdMG+w)lPLAGOdwz+J>ww3yDsdN43(r<;B()n{2*DANVl1fALbsBnML)GkzW?*za!Hf$jJ_UuM;8b;tys`m!_wTd#s~y{pYpT{sZ^!( zLz;f%3&#mV^jzsFI5m2PzgIbbj>2XR(iSt|v?Fu=6Kfu7o?MYIdzII{+-hE}EMRMr#X-)SV5f+bnyR&zW+*p?!8KC+#2 z>JkTNMfyt?CRX$Y<)uyfop~zAxQC&H@3!b~HAlcsnSf-VAn8q#wU~gD&p|_g0K1@5 zG&;rwVqJEN033=QS8cT!+q5tO=aDfHJkLYGR(x;SEFF*Gc@OnjhYVyE3I55B`iba3 zi4StiCfA_eAX5D2tY*i5vSJYe8UG7u{k`=rw+M~5@s^<(D4RM|z7LE0K5DZ+5#A7i zu=rJRSsEng+_D|JxKJV8z_&oq^;!3{m;D!m+2zXihPs*Q{9);hW#8GRouzF{p9JZp z;Q4!&S4kE@KFu=Z;T(6bLmI9!ML--q!s`{y7?bY9lE|o+Q@}j8ZoihcQ(Gx8#$ID8 z(?njpN7eZ@d=qnGj^7YEGz6CV8IXBbRpP-?%7@m30E=!PaGI5N?zID+OV``eGtCfq zOW6EV9w#>Wa1iJH-<4VLGEkX$FYZsTbnGPz+!~8l5YtZk9Vr768R}SCb2sK39>ib~ zpJB|bF4EEdg@g1Yb{|012-qz{HFV)~!G!Sj2MjmRF~{a-SZVLS?Fe(p`h?_7@RG{m z573Xc@tv^ViGVZFJMl2T#v*fknuOHTk^ypFEguTAAfI$pvtk#V1K%|ky_=0!G!Zcs zBq!iZ+QH3v_GN;mZc13}fOybu(Xp?GXZ4`w1is+d#=0?-w&!NR_oxKG6+h_t|M99XE5fUF_M^W6*F^hZ> zVH@BeNM4Zcj@?L}3Xp6Uwn&pN5cnP8t6$@_Y?i*BcsBfDSpK}k839~98eY9tQ8RF( z4V`o;iQ@(D2-oPip|EBo#y#rQorPS0f_^(8iP&pW}0wTS|u7bP`@~OBhQL$&( zDjsyF9l|;=ov@v7lfV9`I!fpNAVVaL)k7IppQ>=%02pn^2%w&K!i|A{p&pLksHaKt zDSe3ka=n`K@+K@y5MfynieACd4a!43u{xg={JlzvP$?9dq;mAWSQ}6^bd&2H&Ftf_ z9ly-4sl!)pFRyqN-!a6lX-~zWvZTc zP_Ttb-pXEXK{(_D4|z1gI3N13f5*RSTgYE2_MBuZ0@jm1P0q~vMm+GJF^0KZs0Aa3 z9~3I2_^(m8D9NF^O_W#kk_NMtgp(Rq4ew?^c-u3eOxw&|lymjJ&^mvRFYk4|jeo7a zlN+J%Y`XNzJjsb6yscTW?> z;%lK+!j?JbF{7)rQPM1w{VMO|F0LRQk<(g$aa#miBN*l^2%XcjaiwZtFk&bc-#N9~ z$mzd)8b22}xjrAa92uMP#`yyAwG~MLV+1A=%AI22?3u>>8^DQP0H4wmebP@SsT?8% z#oumE$4P5#TW+^8r-Cd?bP;0RqGhr;diwq!gwrB%5bB$qPbvF00ahaQe=?XbcjNQh zyfgAyL$de;AMwc+d5^r!i$%6uch@j1Z&;Q-T#3&I9N!q^zte zf^WGBLgvq_#aYIf9?f6kP8R+Oc%)waU*{5Lir2L2z7nggzM+lJZ4$#VA(aI|5igbg zXw5H};uUMuvHq9|$J^|a`S5HW{>Z{1_3#t115#BQ7%$i0y1E(%V*0Nzmn{IAliC>x#oTH zzi@P4T%N`sl;i$CfpUVb6y-hTdW4Myoj5#QF*wa1hnn$Uzk2bm{_qv3G(Yd0^8DDP zV^`E-xX1|vE_ z1;oU|LhooIFUgGPDyxPeARC!=q#o_1+*w546GbizF0+p& ze$S}1=KZEc6(rxNMI>uNX?hA)-vxsKrH0j_i{Wg<3DEY6=6WH{u} zSoiLeB_#x3y&@n84fdq?aht4&&rDHixrAUmjs14%&jml$A~Rqpal^oMbM^V|@Nk`s z%=up6qM0ScW29NsHGf)MMrLtI&794hRiU+%d&&)mm!QQ%vCYpCSLNXW3CB{K#IJ`8 z9)8CKpsqA`V&cOtwu@Gz_?GXhp@1`TI|eTL87f66VIkge!L6Ei_}RA^`EPa9!$@W( z;DbeBAj!s3D~wws`9l~hca#$>VxBeib2zZwyE`lNR1oI^gxgP+XbS3Oy7A6$`* z-|Bk1e0VY-RF_cl^{cSNy7+0X*c4SkX3A(=uufB$_mHvE-uwM$=aLxNKGnvhcMNH; zh4HGRjn*yX!pYKI2b|!eYsr%}qsnrQU99n%n(B1f5S-m8<0$m=d^`J>rizQ%?rmmp?_k&ZVX*;kq)10@jgR_xeGeylVS8G7H3WXu*C$S@Q@nx!6zNIJpbY222{ ztG4UmsBq5oE?c~Q;xsxQQGL6DLB(d`rLJkn(+D2{?)s4Tc*Z&(Ga57O@B(MHau${5 za~JgT21*eek9>Yqd+`1KjbIx*yAel_VK$41Cl86Ui(!}pVwMWzlcYAR^1!)|8)`U6 z?|cj;kXMIAALAN^ci?UW-+eOosPQIV>M;4kh@i6fipMYVrit4tBWXwO_s*nAPVRWV zN&fZXF`>H8i`$9=Dr>Y}Is9DLnlNNdL z4|y}YXN-NdU5vm^!f{qPlxF=JOkG3LX7a#aysKC_HFBA8%bM9Pg#6iOYSkKQ-J&l& z#J`T!6O88SmV9xg6}{=Q$?kv0zXg{&CDX$zODst+Bug4`S+EF6U_~nPlQIcRMhXnG z{bXR}7yYnuCkEeIF?tf`#r4IVWXQ{ONGx)Fvs7l2*WhF6#V4@8RN%5pU3|y4jVLkF zRa{WyTL=mpPb2heDwWOID+alH9+BYNDP-J?U&F-RzvtX`ia_19X(IhFtbJ2XLkm{( zJbl$RC-2W12NOT06Y5x>@8cJo`eqr2nN445j{qyi6TNKai zTD5t$aCYG%GInQ#f%x3d>wR_9O+S6GAW{%N1T-0>NiOCnfVRT8tk1ut{dcz`?ZydSKIBmR|N{r1ik8=AxO$C zf{)jc55vo!db{DMDapKg&d0^fsHQkV#jtd3iy4hFlP&IQdC8M4QT%PGDlO>!aH|W$ zDNf7qR($%cBOeu-?bJw;9%5nnNM=H5P}b_>f>=f&KJG!IdFnfDBG~_YiCbTW2+f3} ztQxIfI(LwK2F?8JHR|@@Xi>Z|0QchgZkWfj*iN}I@Bw{7h+rFwj*3@=Z5(#`OlL)k zPmHn+E!;ugBl8jM0M{tn5ZsQ}a1iUaA)^G|wKB@`8Xw&FncN-4Vg4|=9T)qWVl}<9 z?*=x8JL>L>M*-4%j%_#$4)G<00yT*gXB{>79L9O{+Bm&v%yQ-uV3=}~u{Hb#Y?IWQ*_aP$iD!bn-i zuL6DIN@T<0VgI^6F#&+f&*_9ZsZ3QRStV!PxO-mZzhqkVZJ7}cp}S~ zH`B;$kf~QKpLw#PT$922evA=3*B+p@mFJ_3aLm}jkKRy(V>qkS$M1&jLmWJxc9O>= zGCgpzLl(@ne(AP71Gjv5E0tkj>X>pwfZ`USk-1Dfb@lgToxd!rIZL;n^&g6;x%_pp zj^U~kuhOK6^mhZ(@-Rh#+%YoVa+aRIexEA`AIpN~+cy$VD_<0>Rx%334%{&|L?2F{ z=z3+%BN;#i$DvDRtSS4atNnWrT4bmJS3HB!wQ}US5ro*TU8X1TrSQn%ehuUe%AZ!R z<57lsn!`zw_C_1XqGjWmGqk2)$NJH*BH2O!ac`+f%(3_q06`RsZDu?#9wAZ2jW`O0v?O(8%k0Zx$)-Dq`=wXH#X=U_w$;#20mC~T)lBqor7-(i6Q`JX+C`5 zxq%IJfIJR+9?x{IL~E4r;fo-th1M6e{J@73{T&Acc-m%``Op8%@yi2_j;|O-C zG0A3*BnjVHP@j4?a3?G?^aUAJ@|?6+e^ht;xqGmRGC5RiKdhf;p5r^SIkz+>HI z8!V&(t^|gs3pWTbDGMt}Gq#yURrgu$;fj@zryJfsm?wghC*rz?v>$(3kT=<5W6dYr zG~yEx>RcIp0;H}&R-H7CA!2dr1CJM$s2L~2NLQotm)=bJJtrr4qD{ZvTH~89beQkx zNlRiYt1P*R>^Vm^G+KOV*uDz+*~>?Iaj8OY?6S^*o;*q&81qf|uEBDP~dnblQA%%wmG0Zvghh zU!^MpHlu9Jskj^Zq|zW|Lo1rLQVOc7mv>s3prJ4|h#JoU{=;QKNRgQYpWyvV)`V7E z4p!4s_dqNrC7SPD1@B4cNs7h7xdX|2F?`f-kxla9zxH%rwGC17I^gs@w$S?7UDIws z6lX`4$tV2OTs-5eN!XFd2d4Wyo6WDY(g$vR={@@B5#h6!BnTh)r(($rSrG|_*i*ht zirI+WkCYswuCAG_?ZtkcW>np$(+KQ_`EPxc8%_DqH@OFlQv2=|qfh>N;w>S}Y?DV0 z|AV);42r93!!?lr!65{9NPyr3cL?s1;10n(NYHL9I5dIaPOzXs65KVoySrQCG%%aI z@Au6)b4F@vs^$+rsOm0Qd+kT=>weafHTzyO)+k`TYsn>RmX#fQS<8XjNqM}&na!*A zN?t<1XGJ>f2j!-ig75+olNDttqB-;T8SSQ%i!O^=Jr0M0>7H8@}OWEgbVZc+HgHQHc*Xm2%2Uhg=0GA7RR<5o|Wu#mYr+; z@@Gk+pITGTy2iA{(PhZ2EHPjq%UD)Tu{6!5IL@!Sdmha4N?x7MNkkM!KX`+_AHPcX zX!p86h?s4(0Gy>7KJFxAAQX&0RLU>gKrf=s$dbkmb{uNO7*YmjJ6EE1h5Ky7d3sFA z5CnY?$V0|@-%^bJ%8C%!@@`SSFwbCyr0vE5mCbL}??MuPz}8HIJ=G|#^~PV>;0z8l z`U6iZ2_B@RceQ44Kx6^*=n3oQI+Yq{PZ1}E8}ZM&`6q%nOf6+d<>-0FEz2BtO zY0?3u^j9gP+mH1C{4^v{aejbS?+JM}|jRky4+WGt(#aGehQI;RYBvfjrzWce1 z@k>{PBEHtt7c%O+Bb^b;N+*ImZB8R=Uw}skuRD>aH92cc;Zkv$^PN_G#GiUej)d(! zS{o6=3i*%LE#2eOc~gDUVnM~)fQem1LjulMuA)7=o!2PHDU;( zhWDpAXni?z)NP^ONpqp$vB`=cpFT4O3MQe;wv})GFo!@#mP>_K|+2*P35HWN?iGh|Ebi;fN$ix%TY{%lqGdl z6q5=bMw7X2NXP8cRh-)2##Tu~rO|r~;*rZ3I8kcm&6g zkSHc%7GcmNjo@pLYXI%u+g>b5))2bEImt%0-_L5yLgM1VIDS=G1~_ktNjjzMk=;(R zB=wMEMxN*X z(-D_Qr6Y7!Xi2V;Y8=lN{CP&gdhx&RtJBchmIMO14nGizy$}&rKqJC%9CDhp4H%E~ z^vNJgcD^JOWd3+SQrw;n+LP6{V#PBllKlo$B-0)1xGvkC>$tok0x?HhnI){eG{7q8 z1xIHE`(yf0RFrc;2v_KW=?SXLle^(tg-xc?hCl4x-Fc97-@oYT*QbCBKn(aCM44)2 z7@bH>{ZAV0$2gBTtz7Zx{d!x_kyI?TcK?nA8kgJ9F3K9ZaO_uR{4vU+kuLosmxL!l z74ZaJRXY`*7=4!KGDWsd2rF{@h2&!MtPou(EC4R;nE~dz5TvTYUxjaqD?hf{c?3>5 zSZbg|r(w@Tx0P8X$#@5pp?C8?lkp>v(eKPgwo*%MD(c4CP&_!g%mh%FLRom60jh^HaBO_v^v@%}DD;~MBLncelkRHXg zhm_t1yd*XaD>#MQ4`98e!EVR8WV(*}TD~KXZ>Vjvh6JTn91(9;!zGXnKwykSl-hy| z5G2pkaFMDkbkmON^7w*pt-ji-MD^0fmB0+Ar-@N~QoW zyy9QDU;9mC$V16kOJu{olT|}-1utGjN+utYC{%&#i58^1#HNwlo>ommDEN`U9iM&; zj&dg@OF@X8Y#pbw4FO1(JQUe)nt6o8otn-dH&Q6#*|w0)w@vqo6*oZ=>6? z8M@s)usSs;W|ilFN^7{8!-lUECEG>EC&pPcIVkQ{eKk%0mPwo^RI%`^TcImB1SE2-Q@9} zEn#OV#v2cFV2m|yxP=;N4qeJEQeJ(AHz~n}Lfll4taXlWl?St_OokQ28Tld+0!k$g z4L}(6~M*JYQzRYml%qkHv*P*c~BAL3pMA85oyx_`5!#F1-VWcj*s zexPOTcnk+wPIQ1h%s%vl2NE74*W&~ssv^$?uS7XNH#LR_I@kT+kpq`NjF``8WNhdv z%o)P^kBqnlW@(QSl1j{LaAGPzXS4s6{8a$CbYPvCxxNY6?Y-X+iXiZRouJb*kGP2q zJ60Dl_nLS1c!ub$-hTm8tf5_$5az~x&P@-%#AlHG-iE?;qjnU%!xIB%FJ+2^K6wEi)m0Gp_-C&rEmvBq`xMAb0SCW< zzX=74uIW>0{@>2l3i)L1AmXiwNWCe60}>072kynd!TWQeZ?BT`oa|qdLQ{K zLkgY*;rSP!m|tcs8dt?*`fN`uEB7jT`+22EYSN(os+-fZaPfJZ#z>ssFLR%V*s&JctYPMAL0@;MOx(*(SJMqlvJ}j;G z*KO9qV0?;G8#x2^kf6Tbr8t?~^Vx^cP!xI!K#J?ppx`ssz?$n3i9*NR2#tSON2*-T zAXCh3q2UmZVGjyO<9dN9s2CybSury@E1qs}c>bW66X#&%*Y@HU23eCCHVb!np|cYw zH!r3}6mN`HN_i4d>+2*qliyB3(bm`l4B8WP0|n8Et3h8VumzCvp7y;iJ@el>w9pgT zZr!{V4YBwWo!2TvpTNmeS&}D7s*&;W6I$MF560Fe8;9X5lB987T$KxmCmNU5u5&oz zRDaGYIPYvOF}IfTk7Eh1&dV%nzY4VPUvws9UMnziAohY0);yxAm=uh)Hh+G(L@>as z=0`wLoFMxly^y0kuD|KPjisX~POkl=DGHbnbjjw%neyhh4>mBkx|HIu)*_l`;i(n3 zUeJG{ncfTh#BhulB*;$Af9wQy&ZeGc!-c=yd4Y|EXPgPN?T$@2g_rHMa%1<&I@pLS zS5`MC?;hL+9Je`2vA~KEU>)H>|H=%Ppo}<5$WlcE&$>AW%DaCwL=j=zZ{7cULEcKp zi)hq{3N=e}%rwx`a9A_XD=~c7g${YTe4E)BrTy{4`Dbx>Z23RQA(?Yz_LrZzXSvI1+@Kg=iTWu{T|C4@?^I&9O0p`d!OBI#1oIj_?!yiMX;AnP;E+j5e2(^?WY zK>bq7G~8Sht51-`DnvotVSYZVJ&?n|W1 zM7feeu6$gT*VPv9k3io{q9Qe>>^AFW8!ld>|3@8fN9`~PD`+v~I)KAP88-=uY@}3Q zWT_L-#F*O;Kn3*(1UnS*TZH#(2UXkiKH9mZic&Za9U|^-UP;w13;?bq8*1`uZjeoV zyr{sIWugDrIKNTvXOa|oyd4@NIJzW;T%vUl3vK^t<+~TUjBlO<+hNWax5fd%!*#S& zSN@YD?EM$|eQvT1r5%R`er~61U7>^bFV)KD^5F)*1z{oE?U8rd4w>2-xn<7xz|Rb% z4ytQZRuYAB{N3Jr)IqgAdKL|V{|Kb}tL`~bd?5qD0qlvzwd!kfFG5n@jdnd^V2P9a zJSnp67*_N_rtD9>c`^I*U#jp=d6A%sLH$93N|Tf@KJERt4!-Jg_SyBid^AVSR2joXY*b((|hv9bn^`%jK)v}jid&ZZ> z@3~l9%1P{DNBd?Yi>@i2_<#PZiC@t1$Ag81dMq6H^ELKe^)9ry4UwZPKFfF_5d5vN zRb)Tu=s#&q{|UVt{cwh9z7?8D)1LxJYi(jCd(FPW)V~R7xDMEUUpi7I;VDU$uKsN$ z-M9Epte&7ly#BG<7Nx}H;}JoU7CW9y@=cyoXV4RymVTskl*i5ZSfS(uf6+e?Zi&ZumZ0Xr>HNmf-a zF&*u%6dv)tOd62F9setZ8z@Z&vHg|8G5#xsZ_9XAFo-bcdXn2$8;jEmU!TF%CxCy~ zFT`^CWu{OF6^5mRV64kQVVui|xvPgvgCUeL9xT>lZDGBlrvoX_S6d;WU>R9Au^JN)OII7mQkQOq8Z^*(?Gkam6?aY$@HnY=d^KH4M-7*(_Ls`Q0?P5Ff(#EONCj^QaK~)5l62NkNFU8_<4Zq1!w3=ISjmvJQ_f=K0`l zJ;Im5XQsLi8=L*ptdITET|H6x!NF%pzgULN4W2&R^)aiUAWf+;sIW+6ST0m zs}kYY5R$J>{NIV0Bu*HRD6G~;L_V%tuglKunYc_$`ul?etK9qhv-%qf%iho)mzP*t zrfH9F28k?x)mBD66kjt>jZY?Tx2Ps3qWnkrE`M3+liMLd*(aWQsY)T`o0nC)pQYgI zLDf_3v(mC_J#=BhkIv&ou#ygcOb0kAu7Z;E-X(@M~!k`Kw&Eqo5Y(4)`iRdcAqa77%?mT7uI5ID$xG6fo z)e-pIC{>{fpFRrZr1sEm{qk;c|2**vldDSUz4pja5J%uniEl8IJ*27LZ|GJJGxFtz zZAyw-V{_Y|h6{$@CQ79f>`7Y5iH{z6H;pJkOQ5Q4R$aU;Xk;SwcBs+Lr$!8&Hx*9- zs<~He%_Er)Z|R!DpHu6tg(rp6XyP4+;^B_Bq!#Sfw$k0#=&v|{sEd8`N7?(z(ql;M z{FJLY#8WcNf|A##P@QYKxY3Dv&wyP}uw)98seL72=57^D+8x(gp&cQo*Yr1>i^<(P zop3~;@aBgNR*RVr;s*OlY^^>Q2pB-G;47&c=0SAEvOWxWDEi>Uzw@#}wvh)~w_nK@ zP|GY`RUG*2n?>%Z8iI2IBn{il$}Q(3JU;e8G7-bG@9!chvtJwf=zV)=e7Hcl=?Xsem_3^FNWL}q7U{?5Go-s=)k=~n zX95Jg`@Ccf>P6yoH;coWO z&EZ79QST}l4j!xPf00iNaSBX5IvcmL$~6)nnk>tZ)l%y zyk&ZsfW+P3npOubEGSwoEL4{WSfM*hy(m3rQ!$mj@Xg0u@SrHdSEk0qb zk+mTj@PmJC&aB~AheOrKY5h&mn0t$CqQ0sXUe@oD;}ib|A1zlu15XPX<6c;BLv47E zX^D}lpZt?Jq;MyJsV&|^?IF+g)_9As8p8LuQrmf2Qp7)so1#Iw7S6*wl@RGdRVK0H zlOp5goI&R73msPoUs75AOq{?Rh@D2{@6C6a+XV{2M^A1S74HpYOG zKY8cl|M=h`>4@z%aJ*vKaC1IPh&+OcJ-M}hznMrR*>Y!H{ORm|*vtA6YT33b_2Tow z&&1|my0tb5LV}nZ;8^n(vqBIRo1=J0VN&B+mT(d_%1TBovSO3R<;%3WGX}Y=3k}30Qb4I{-GKsyOmRcaNW;eyL!i^FvX!jwUuvtZ5kNDg zj#`T-Juq`-3CvL6rv!GS@G(I2-e{tj3DU$dc_y%(|%;?!$ zu_ZOk&WpY_hIGfqNEtmojwT`NX0r#OU-gZ=`LB!nQLtK_sB5pb2CEmRn^2*jg_h(9 z7g;%>UvFM~y&~s5_`_kCzp3Wt!$qHH>-v<((4cAS0OekZ^}m?XgHV8B&Rw_d=xifJ z#btXmQ}jaHnP3rer&jpB%ZeTfg_fH^$D%W{5OVh{QpZNMKTEz>i`BChsZ!^PnIh?B zEe~EKokl2!)#J+5Go}$zdkQ&Eagy!xzn$}vK8HeseR1u*vaP2ZZMnmi_m8SUZyh0r z15+Q7(t8!D!3@qsi=h++m-(&HlkCCj&LHQBjT2mLM@L6HZNz)j-xcwIsD>iS@JC@Y zSLxHM$DigEh{+GZ_GvD5EJFPJ_K4u|7`g}4VsYyO<%?}YC#0r_+j*{-mL1{i*36O0 ze&*d*1)TS1Bi4K3)&6(;vjvpJT57nrv}oj^H&DSzt&Mg8yBSpwvdWQG`}inFCU`bG z#DzUZl+i9BhwX4`aZzGi^g^VPjKk*Dc92%`Hq4X1vlU?sZ*1v?&9_~J-9+}s^G)F> zI!r+TSSYs|BOQKsg};E2g*-tEUwJM6%l6UFWIPW*>6sPyutv0JmRiytU{pxWidce4}EqUdMV=#l>8{!5!7_K zn63t@pPylAt9E7cC#dOnm4*?v=>3#HDpr8h3?W04p!KL+uYw3%eaf^D1}OPglx|F| zld1L$9)+8-(m75jL4sN1`1sCS5}p2>XghGT;5ta`cGCYsF4(a2EpZdmgR7U}CEkYO zN&U=}{4Ru&&&69$%8C1DV_ zCvAsFoV?0T`BOVDJ0{ff-a&(3*$wmObn26rLCpgAEdsbnw{ceA z5LZ!l7}@h?13RRS4ejWpnAMHGXera|c_z{KU$GwK@jsBI{+Na)X<|-W%y>hyIqTTjO>&Dm)oz}HwN#U}NA)6EQx-g|E>h1{NoqO#J--87wXG~U z%rkE%QM2S#%s3BxboZ>E4Kk4^`Bd~89sM;qnYa76b}>85=b~6a;RWA$f7)wH$m9|5 zN`1InU^*Wwc2vhM$8d1K3`7|ZIyJM3!<^1*gT<|F zU0w+u7wnGXl}(dnUM~yyFhr4!%I}voc^&j^9)qt@7k!T<{P*$_NK{h6(0zFOqXpO% z_^doHNQGDQcJ-1jf^%_pR;s-DlCJ4I9Oj)q=c)1NY}PYYsi2g_!n2&DC9jg%cw{%w z?y*AsK5*{wesrjMilq2ibshAK$#DdU(5N)LX3@fB&b2R?PQju+9n8HI`n|(DK#4~u z(|yLhf50o2Zh$aPv{=G211jTnm3|dJ<19ldYbH$(Zk?X|<%cD__Nr@Mz?3N617@P> zk~w&tM&Y#~w|Af~rr?sfxMFLocax_M`nvP=QAnx0@e;W9>#~8%^!mB3`I)5dHdwR3 zJR!q4ewfx)!|q4g=?z(Qd0g;G53)oES8*FDachOcmIsCvVUTHL3Hu>Ueuad@;!gmN zs3J-q_@ALaIWV|_rjd|0On-~Hm;_QMyhNEW2PAiWETt5;u$1N*^##be$|mKr6=vW| z6oZkS)l$XDg&0N?;9qgU-$4Q83>`!(4fMEdpZU_3RX^e>>}AHVJhMFzi<%5^d=Y~| z7`Va$XM$DqRDW0*jiA{F& zXCNY#4JDF0!cNPGi1pFbQk9NCS|I_owhLVmvuKR@^Njc==l zc{nngX?n9Q-hUumJb`;m$$Zs-JP)sTF=nH{mP@VF0f>`vDO_yZ7>(D;?I)e4znRDR z;A-ig`sTCxzkM^CbK}4!JQW|6N61YcxG(4iY12c;beQE+qav^R;B^RVrezBmTYp^B z$JtMf%>2Bp+-{U?=tCg~>H2g&>=AKRDml$yrZS7#vQ z^EX&YSBJPMZaZtLIw})~^cRBPm|Cdd=*6`HM@-t7ZCGsjikOMT0_n5d1&alK=IDSC z8W=-7tLXuxJ;;F)@`S3it@OFwsUThj9#A3%s;`qtGWxBdq>~Jh3=9c=f+J`GxOFt2 z$mKtNjH*EO%WD=-10&R!t^Eischq;QB6-$+kaqk8_y&q1+Eg>S5}!ypRT<&QEFJ5g z7@(_4N~oiH%_a6d%;7>Q5M^wL6g!(`$~rf}0@cq7H2u!&0iMxeK4QFdsnE78he6=V-CsqjlBp;KA-h@?8S?2GRij>H$0nv57{bT*5yaDeQB zuJY%P@^m;YW%^=6{V@YQ*6szW7b-`+b841AkR$JryF7xgN~!GNj=|6GNy~#FrE{oI zN?)OL5O@`Tf~SVOh=zfw^!V3y^m#nD?PEX3Eqd!#0=4T~l_>`+cFHxOs|@c4^V<${ zu%@IM-j>wt1l}-kT6$w1BKY_4QS}@nlP5AKOeuxk=-UHE8N(Da8{Fo4SVy#w&Y#yx zNss4M#OE+*`3RJ)Nxr=4(tTx!vA9H5blDQ(7d?-AZ~6G}_V%O$vR<@X@Yujq+N=xf z$@Kha7R68S8ybFBb*)8Da;=3CT=Tm~yaK*bKBu?Xl3j?1o|eMcfZZoXd5X{zCmQ2) zW~e$v#B2kdiO*x_%j@kmJ^@+16S(LXJ8T)zkUk@8i4_0GPqT@i( z)R+PALBgyXMT{3=*NQ>w3@GV{OfWM8RYy}S&TK1!zyq-jCleNVvQ5B(6E)#EEcsAx+I^(-c$B1j{e}+PG1sS_%-eS{xinz z2s@7mEk#rlIKwE8Ofd}6^ix?Y8Weh6P~6bau+w*kGWNavxs3DtNItyl9z@rAwrf4} zbJrM3=EYbxI8Es=eavaLrdu3wryWsdBx&_D?tXrYEf;YpQ-X6o*3j6Z1~u%0o+Y=5 zXE#qAGiqWWZmXibk?cy!E@BQ1y5&Lq$q!^Hae@YvRfar!+=g^hXOUpwlzwOp;P#}0 zg}1i{L*3t9ucW0F2YS*1xvMX?rHd>FiE?W@Xd_Eve3o(;LM+x6;TKkGQo%r zPNl>1K@ncS;?&b(=z$`JQOanH8Q{QG296IMXCHc`&Y>f4hRdS&`<;E!D=jo>@pOk&AbW+xvU@NOEFQPr4&oeopPSXcNu z7IzVuEUUD4pr|HkRZPyvRGv3F3jOqzu|db_ed3+Q1;jEI#Z33=G-;}MYzw&cx+8Wc zL*ZW1@Z0JJC1$}^ccFv9`2Dcks1EP%T`8MoDRAr*`X!~DLNsbmU35cHRE8&$Ko7mr zqq~t~eiN@sCc9{rJ@w)Jd|zg&;<#h65!jr9Ew$1rGVx+I=q7@#Iq7oo^Ky7Jo)%TH zu94SnUiI}_M32Um^(>x>kobOCL)n?Q=5&I_@BTqy#M@V{gg-vDH!38A4v9Yc0ArVk zFI`yZ2UOAp;`5bK^AFNMx4W)0CYKtzi;Zk`PwL5a&PO=B!4?L*t?z(zqB#<<8b_4@ zhaVmUtFDH9_^_?-h;(rp0nu!!eD6B1^5}|ue?NpGbQj$h!o@pYCOX5|o^IYQ^vqoS zq~5k1Iri4H9fmHndV`&4*mILmf~WG_A!}08APo7m)A+~!!3hQX$1zxi40&%c4C+`s z?KEx#7oL11Vp$BAxHLI&!z>Z%ON#P5$A!uNn@-Katu6iQZ8wWpHuih(hcn z)$oI69#O#xzQ}GhM_4IGBUg=YOKRo;ovW2)VYw!QXjH-Y$*>WJq@Gq|wYsSOr zae1a=1SIU5WTr-}XfX}|)h6QFF(3efOpa&(>ofacr9isF6~73tLL`AgOdT5sDlYt* z#Gb}*LbNP0fH3H3v{M}f^Y9vr1f;-~iThyn|5K4{)RMu5M!m^YleG<;g?w^a!`b9lZu_% zwNiH%{Zlz11YalFcKbb}(NMMs>J4tj&~{=dp0qa`ZiA;XOySsVTov@SZs8i)=46@0 z{p2HTQ6*DOc*|5@E}Xpo=I*nAnTjI~sDA|44x&|KnTdQB($n?~-u3*IWRHxnRkxIV zyU(5nv9dNFd?1hNZNd2<5NjSHBBfZ^sTZ5(PO*C=x{^!D*j$t~=(>Rt>U}+9(%@;O zf8)*{F@DEW(FeUnx`5h7ODTdu8pHI!n_TsKGi3dvx7*a2w(dt~h7YWL7KSU0 z4Swx|r1FNYuPWB2ub&f2KJ7O75gRMl$Gg`wVw47JZ6*i@EZp3w+LxSp&w{@1Jzl$==*qc*gS-&^=AHjwyApf_!XpUv)sHFj)HApGr8L6XQATJ1%284mzVJU2T&8*lZ!cQnM>6;WYjhqgGezcghIQc-+*%5d0RM|kIFxP zC}D1y*-_)N?%KST=;WyO^SxeKSJ}NGG}Q9>B(i+zojO{G+d)g6gtygFH|oV@Q-KS> zFWy1h<1CeYurgcACBX%=Hcnqf+2ZG0R&E|FO>iQ%B%@&9^Mka;f2tbHooY*9IbBnZs z$-5*Dl$<$d#SJy|*KPqom>CwjdST#oWAtr7RwOuDWDU;7#zy_z7h6Q0cj+AamnbN# zYqziiRCakv4`FuQXj2hQ{Sm!w(h&mXm@kaRBYp*?Xw4?Z9`Im>P>wvWYw{ZTV%k^h zcTE8$D>dY5!=CuR$L`g?%a653nw?%gF9+PvNXCOuOHRMq*qHVAYx?D9&*x-_Z@?XDK0|lXbRn)&KG@YIqIUpT)fmMoxIAsB?)6 z`SW!H13#?e!VW|nAN0NU-#>V6r;;tf*?5h)x4f^Y3(9OskYRP$*~$ zKEU7P0K*YXjB#qhK&Qt=IjL&BZoafhKo43tP64jy(KHjS!YF%0EnJ*hy<}}2_-#WK zv+}!M(EY9Ddp`^LcPNNF7g%fDmKDFs>xU25;q0~sEq*ZS=wNgb4bT|IgD-I2={K`f zEdnaACpAIHg_Gc@bee>Pgm_ZFhW?w=S78!L**j}s^f$KX}Z`o5EyI0M!g0_}UQBA@d9JNu^p@9f)P zWBA7_Du+-c8Q?BW(>S5N7NkX?{7J)94wln9!e!g+!6}Expp(@|L$CF92VDn!L>6Hl zGMR>xUR%q^S?&EaSJ~V*LhOhLtE0_{R3>% zyZepD4~`gAT4{RvZ$Gxf9`24+2=$%((Nr`cEjL@Klk&k9jS!JrhWi6c(eKL-mo-ls zb@JqWK=Z@xzV&wcXU*nscw1GxVIrsoBDva@>28%wKjMOpZ23)^mF3t5pJ?|_SxbQD zHfu8t*eU_W1B_Z#O##aX3PTRtHW=34?p{_xLiU7!Pf~tu(jp3lSHO}7bTPD&ds_d? zl#Xq@Y%im2}V;UAUWnjq?i+?<5Ka^JIIxw+Zw3e_cvAba%lFIyL%mbd!<^$7U z+8ZNMywdylNvmVZBW;rcQ2=V$rpi3^hFinOxvvxT0|-dD-`(g;1bJF#VvD9IXZni7 zbvuJeTCQhiECa$fh>)aO;3g&&U;kVX!zYop7Raco-A0K;J^qe!ah`)s)mCoESE?5ft#x&<_vIW&6pMKk34TiTxD z&n9R6+$5X+aTTbjHv5(DS1H#1__;YxOg=|AyBWM=`e(c}yLdO{q>DcYgcxL!|Mbp@ zVLU&zQ8i@jqhs0{I!;r0GP3AZB3Kz)40!p6od$qlGvRzLLDC(aSn>athhxk` zXANrLP#*r??>Y8Lsn6C#pceK`k{w_C5(AnZMddbm7|DwXdm) zxJJEZy5wz1Q?2bi)t#H@*hILnoa_-^;RQLZg+}#M#jO% zWdwzjvF*p9LYHA%yt~bJNAFP^kLvfW5)Ird=*IIZo0`&>Jk~K&M0^F5()hk67`W?* zWP6kMN*Qi@0x@W3kazimSAwCJb#X=nYker_!Nu`QFEiejz4>~Lpi)x0A+hDvOk!a#TVudSVq=<(SH9 zX)sAP(z#qk4)&Xyf#$JjF<1F^>Ai}u)S1HbG33e|#e~EAtK|Yf)>kOqs++SZKJEOo zgC@jd0Nozey5Cuch(@2^{o9Eez4p3Z4H>sCNVj<35qD|9C~R+U|4Jm1d|&Bg4*2a( zFsXxTWzC{H=^qqUDLc>G(sR*2kCvIf`I^5HAGWHtm=FWF1e+es0Q5 z&6En?7LHr`)TGeu%(&}erlY1*S@HNf(#o7|c^SxmbsZw2b{0Ybd#~Gltx9d(-YCKk zBfLA5Vk)g|JktCrYUw~q?gC2=H+1-7z7vB8D!Bv$`kjr2ug>B_u64-hPZJ45;)7nv z8{3C7@Oe9WdA6*{dl%Zn-_WaF-Ln0Iot$8COxMfZ9%+xUbGDWJYhP@kci6(Pm%c-I zn}hcKEIiqz&B9$bbF61x%F%+iVkDdbH=y5DRS{T%XCKXfyLGY)oL+Y*5(2j%qOZ@w zPP-}|dI%#}c*653TvR0?IFFt%;`*)n)DJh(%i0k41(5BpOc~8L3@a+$Jw7Siwyc;W z3$iRQ_+dYI#7XT@KmWI$biU6Jv467eu;plr^h`rj$Y~Ea4v1oCb{RW0yRSK>q-(aK=a8L6c^;dLHuYTI~Z% zeGzl&bgB9^8THc@L^MKgLKaa^mtECKjd$gxq;CgXBFfWw?xi<4ASEn-395!7x4l5 zWf9J>W-}-DaR0{?@Chd>_altXi}R#UOH*O?|JULsKK1GEr;l!(Vc2Eh<)P8e1V}*~5>g zsNxSARlA40-bXpyCL+UtsHYK~$U*{4T>}GrjTk)#iJuxvzV!5aq$v)a8TFmMUC@Rv zrHPVrf{I%t^KL;h_hp)kqZyQZKL#P(vP%EbM0#3B8X zabf$(S7!7J;pxd~|MVlOOk!!(e|J~4^-Sg@EyQcYc`*;?QU4oJ2;}c0+yA0XR-aT# ztdb_7>z&oYc&CpgAjSN`anOgbONUHcpCF|2e>5$*o)GDh<1lBf`3vJAelh+K3gv%+ zG8fDz#?g!{K!B@4&Qe7UB6LT%ONG4jzq zFFx_Cd-GDjjAzj9_|GfH+r)UozoMNw%ph%M8yKT3uu=BD%5liV*q z-e;yTW1GMIdS4E{XR@yzm1a<{9psrm3HqreOUF2zyk3gKh;K@mT9OMs`?C~8_D%R| z!R0)48793enVdC~EZ}Cteq&f3ch6F@5KG~?fg20K?ChB~^j6xf+f2}-x#V4Nl8A-i ze9Qrl(Z!uVoEM0W9)qp~xkvT3^-R9t?*REEKE+O0RVfD5);#Cz3!$NGtl$m1`10== zQ}p_cSOu$O^Muie_g5EkJClk@kK3Y;a)9EHUP5>a<(9-UNnhXJFC^A`-XkERXh07$5MB=_3MwxPLs0z+L6sqAcc z7hNGh=0hmLmlutZyZpT1jA8)D8AZ$v?TsL^d*IP%^`w;*_JZyPIEP2R%25398MEK% zA?thM;9e{KC}Oc>>udXU+Zm+aOk{4$CjNxrZ~h&ZuOM!fgt7r7FyZSN2d)DuIgW~= z$4@ieX0OezK@S>lND6z)&zg|#NMgAtV$t97wm)i??_(a^9=}tQ&M(>b9G{?kj55&) z7M@0hstDh`6^0H(_;3A*)6B)3(tAG|(H?ZOobIXrv+{Q{$#plWZxra~qNce1Cl%L? z(}H5Tb@l6giL1QV^pV(maqjFUpm+_sv8_}G$?c5KUrf_mD`kFS`6>V^YTTeBtVIxi zzka_=N_1he0ej5c{L$!2RFQYlCy{Z1Vt1Tr-*n_$I773~F*k46C-kN)8c+(co%#NJ+vPqWpN5;DluKR5@?RriO` zP^jSNdSvfX4(&%&)l|3^2A70X;R>E0ceV)ucZznMZNHzVF;TUbkg4bQ8Tt9AdHF65 zSE|7kc>8Kn>Scc)fx}BrfiK4B3EM`k4e7biH?2?6QfkYh0J-w%fpDcl81RsA^doIhrYi zO~mP06yrArnY5ubc>+7LmO`EbQ!wceo61yqeiIm zZ5JB|H`%xp+SjaTGetka3Y7NvaotM9qd!ze;^XVhyBy|!&pR}X#%mesRE+V{+4f6r zxeW1#xG(!&^-5YB6eu<)hK)E=qV;aC?%Y-2v9yNjCTezzN7u zB$iz3vtL>hYoAl|Ca5;IF$ER+Bq4vG#JPbE2|^s2Z!F}`)aY_VZ#Lo>FAY8~YDk*$ zj?z*JCZ!(je|z&jz(|h$CMZEa)RENgr?}KFSX$0`%eabYQ;srQ4C)RFnUkMk?&YQy z{6-+A&g7E$Q}n{S2fC5{PS8i-N_h5iN*QNLQYuT&yHX#birxIw6xP>cv5&J=mc?G; zrxK5TG|H+XB78epfony5l}`gBMFUUsC2Dj7sZ18uHTL*5$#QSr^PW8n`fLk{sGL5m ztxH4Er%v((<#-fsXd%K1Y%OPARBuh%`Q`%XPc<9rl#^PZOW^Sk%3#V#-za+5askhr zxAT)|-r;-S#YAjUIkpDCnzF zJn9a*3Byaw3%q>9O?P?NgrTz?GYA8^9HOt=A@@WxOfH38)X`9o+_U5}HZ|VVu)rY^ zC^UA7Xi-@hO5+d#(O8`@!`d>!CCR?D$D0EzC=tUz{?D$IS)T9^$T6un$BaZj|7UxKlL+9r3=X!X&!*jer$h8hw(el0YcZp zq1NZyj-VTv2k1+Kt3EI6R8i-UsLkYBB1j_mm?tm-FU`^Wu-&zz5<898km`fs zCxpQ4ei(u7;t6VYWG#KPJI{i`Kp|G5l%o$;jw84mRIVJILDl|c=zGzv@aRQ+Ts?2Y zi)*=0*FK}W7aF_mo=!a-I$+4_9~?9i5IuY$?{`Cs()h-RnWY%!F%4E>`#&7-uo8(N zAa+T~Wd1sPnnG9K?R*(Z>Px@di8obDs6DX7jShw@TdNw6xfC>(MRv}4ehFChwp+hh z$b9ZVdN*n}M2GCCc&ujlzM7w49cN}0Pc!LwH8H}T++_mgUdCWvj58u#M%gHNM)v(x zs7Sx?qdV{Y)*PT3i8c1M++QShS+TQGIO>^zIwn(gPAD03;cIvB7{ zHx@krPo)LP)IuL6DfFY~!)snQtVnBYB`&v`UP8Yg-JjTv7s|hYzp6Rv1W|^_ZS|d2 zN?Er1@Ec0+5fTDT!mexIE@7dBSGfhW-x**&g-*by3{UM+XIa{KG*@mm4{t!5kgFvI)I0SE=aFGtx zcExQFih_7TUQLU9WA>0+okd1>OWrO{88;`YJ|s?Lid$dC(T+=mlXeD;CYLC?%EE_)pw_Oc4o}?aqyT+ZJ(3h^pp`T;B`@g+oM@I9IVfgWJbLCuEWru|8CBW2!$a1Tmzo|Se_35}b%}~d z;-AvDVIYX`W!Bb~ML~WNhgq<07%5XaplbBF4si6utOy=AN>qLV2)9VF>tWN?lIzW8 zqRjaop064;sCVyb`MR6;f8*<|qoNGkwqGTrq(lTs6$Jz(m4+b%L}EY%Bu504?yjMv zyL$+g1_`AZx*LY>7&?a>U}oR=ywA7aZ>_zy|LHPd?&Woz_i_G?;}~1jPL>Q+W-W)< z_rHFG2szzvyyWVBhdeMq^$X@AG<04rMkD5-2Y$@#+8Tja^aPw~tlQr~tJ`B|vaAQ3 zJQfz;V*JNCd91`b$)zZ9w0*M#3TNOZ57rTd4|#gNodurg~u{5)H5Sn*5ZDJ5i$hIlCAsJR)SOy$GFm);3IDZM22 z9O)SO#;TMg83SqOqu8#3Q}fb>zvv``uSAHdFn*8;xcj*1cPaDs$loK% zmanOnaz~Uw9%GM~-*&NHJB|EMs4ZOcx}jr*Z&WTw91&`mxnYQ$YN$)MA?hZ_(6nPo~fbH zl`4f}{@S6uuTQh%HFfoOR|eB+EFJm|zj4@I-3uL63%q7fHv%v=^7WMmAm%4F2R@)* zYl%9nQSpqPj=4pEEQguDWXdL5p8I9y|TXyW-*``@uk`cE4l$i))q;7-qku5^Z*PcE3|eRAy?zK2d%g>7F1` zvw+=L860!9Tz#9&vyeHU$oHALE*}Nczy)rWuNe|GVxdA~V*oz3jH9Bp!41e6r)0J` zOTQIz>WLkAbuw#sY|c}by=(85<4&&)^}<%e{|jQKVv;J4NN{X!VE#KHQaVF7aVrlL zjn$)Wz*xlLASsp91C`H_FUc(sUkD;31#evVwmx4ggP)y%mBp}D|AJR|e7gKYr4Al= z^AM=t#8y^vn|v3L`q1=)B_!nWB3Y%6%A1lP-NbtjwG0`7prI1lH0MQqB{AwlDOXTf zr&u!{5FfJaa{P6B^k9)N?G|uyO0+^>g>trhe9eA#bp-BUL7p=oFgs}BI#OR{D4{d! z)97C-WfKd!syR=@raMgO?OJ?@`FQQL`IyDD&u0JovZ-KSP9S6_jGbP%$Y%hQ8Lu7S zB2j;6K+bqXU~L;uXh@bF9e6c(VH+u;dDoh$J9dqrKmld9*F<7N)k+n5ZtQTM;g+t1 zITI=$)J&eI|tL)Qh$SZ6ahQb&fe?^Az`kufYCaI95Ise zh_IdC+JZNZ(kEx4Kw96F>@bl*j~!74Zt({yiobgaR)*-F)aL z;e|lXresn&itz?Z)^g;f-HJtyF#6y=Xgk4?=nPi8XY@GeyVzGAQQA?uI3A#?d&nfa z!7IOv)M2mj$b3hw=5NGzhkj4g=4q!$wKUGV=^ga&`#Oser8<>|*>Os+ImOGX0s1Nt z+tMXqEW{6ZT!%4-ipK<8tQZ7jJ~8{1wVHWYJ}@T%Aj#2%Kv3HN)TA+8@_`wxk| z$<6tP#3u7_D~xI;%irToR%fl{Dr>vP9wD2I$IV&R%NkpQy;=v!YkUOEHM!Q%%bDi3 zWg2}KDdpdRe4h7;;PCn3N^>@=@68}jwt-Lb;G>dPg8C}&k_Kx;Ce_Wb)rd(lHimkRY9=MxZOQ=37I|@*T(xweQua7o z7Rka`{s)B~UZRa~!TF%89-pf%AEoLE!8y(6aKJ~Xu+RM7X}{sDpnI~Mdj^6zO6)2n z?)qVI(R4diDGG}egcH4A0#q^S8LMsONq*2+l=|#5@<|o3!#9XTNs=Cd=k9wo-wq5U zXZ>9-e&TwpKH{@I)OVh9?0LY7py4%WO!0DW#;3vK#N2$|Y*{DDS8-z9tjx?^o#bd% z=CVbWU_hdZ2428K8&Wdd_k?4}ZTe0lc;f$gCDe`o>ndKP_{SNq&;04~3sUiu*H`H^ zvpPC4Pn_*TX@p+QdR)RcP!IVH5*iyZnkFOfdI>6bbiGc(DYe=DPVUg}OjKufrSX7F z*Vas36iFOWGTpPnmdD-S`136XX1yU!0aawV%ocU7s^bf+eZ6gGhQF>gicepvY;ktA znwpkUfkgp}*Iq->yn89*g<-YeX0$x);|uq_LtK4oFUQ}xPgwi5V<$`NFyoR74;JQ= z8Y|YS;+OzhhRx;KX(8rVu)T__bQ#;Kmop)7-M)4@W2x{#KpS+>FJJ)Rb_3bT{0-`7 ze=qiaRZdk25U(DmLzqOAJ$W+0Xt@SukW0(~@Zz#wjVUOAH#TdUCNjcP+vuK8u=UAv zybpT9)`z_anAup8&rp4^BJs+blkIRz2j;PJP29RaAf{5hnj!zdEmSgXTU$Otl?q94 zv=Oa@ek)P2+bQ#~U$djBI z{33bpO#9D3xGu?-a-RX~ z{r>-nFOvNiUz8Osvi~rcZ{~HvR~koNm#|~+|Gt{I+{M1!;ZSM2Eiv?3D$(V$P{-pS zKl~e;yO{I`4@(!{MYlC(R3UCN;8ko=zE-1KxBf&xS`www==Hw?jA~;1Gd}IUiFowN z=vi%B&H@1;Xs5s~bs?JSey>l7f!HuVLFujb|AQr$`27b<)=HoLzqw?+$q-a52K9iC|UMjU?@Jht9i7b99`!y+ALQTDsdHy19B5(1|tWYB(-A!?8se6O7u{{Z+O9 zU~=gPxIf{j23eAV3R-T*C9p8rSjFjq*SQqv+=ONPrahpyy(DaaM*eMJu zc5lUyb@_CLliZ#Ns`~yFX+zK*S3JhZtTobp{La^H`Uj*I-?M&kV%`W-0yr9hMWxlJ@4QIKA5U_oFUIaCP zXbQ8#V?u(=y8x(34+K&*-2_sNlV(k=-+B0G|9paE|5i|zI)0JVPU^0c=jo?-j=3L0FCmjd-Yc5uZ(2WgOOyx%3;)g64jKr3eB}*m|1417w;1x z6)&%O3-pG)b=dRrmMdWC(Wf&UylaX+EDSr)>F7VF44|=m6CAj{FslwOHp;3TK?1oC=60lXz~>A5EnVr(_-|AUll!j}P2 zXBC|oY1*Ol4am%DM{=BEcqWC0LL&^jSBlc0nu-Kue!1L&$xDT!DV?$ zbQ|Lp*IV zNxTl438MLu9ZE{yk?AMD4M&`nHloc=LJ9Jy#yrDQH-KLp^Bk6?dH#M{hjrL+6!aoW z;j;WaboTFI2mPoYxkO&UG5PD`zft6}ZIFjSf>o$N5U_OST0{D~UY|$1X&qvQWXR*^ zLM#E_?W1HPE}eNI6yj7z>qH-2U}5Vi*&`oDn~ne%u_$JZk#QZNf46(L4+MVqk`^mrFK~={WXN@{g8kBGm3xFPA*Z>>%6u5vZ5} zy3_p1><#A>YajQ<-mlgOq321JUgM^n2e{Or&GVW3o&CU_DvR7S8zEA9fw9u8HfrP= zt}md%i7)c6`4Z1R2LN`DPw3M+=ApWMPs!2-%mYBja>k0d9uZeAfX~MIS@*W7kid*U zL^u0ND7q>UlQhRiAt|{g<=Q!-c4TG{g4$yHTAxutyjb%w`;2Rbkb&` zZIW-}DkY=5O)@DQ?kJr^I(Ahmy5fcUN(7JNnPKHlvL*67v% zyCJ%-YNf&Pb${h@s=)s$6+S`_7WQLJcM;w{iI2enXKO1;MF%J2!b`Mk}-dh zJxN!a`6Zl6^5wVEU8nMI#DosLfmM3RQPb^0yCI2FQ(cos@Y0ST_HJ$Zp_ZoZmc^0w45>7g5j*nS`Qwy^cfu=Ge`QZAXZC$w=p zf>u_B?wQ~Gc+Ka>rI^>==Vf}9Bra>TfiBVthsE|4V2SD{QZaqZQFp=l5* zJR;JGpuaj#ISI_j-AY6v&Quw&<}wAWu=UYUVSQGuAgh0zrS#fER1b^JrozcHT5j{T z!}2>9JO!nly+Uyt*D_*yo^1%lU+DI`)w4@S>tTpG>bJkpooUE9jYqm|Dj~Sj#=Kor zNBD4W+3Hwz`H;(KuZM-Ap*VQNVd}Im`?4iCwB83ll*CZ+Y`}Dxk0BZfz;iGl*~PDA zHL=GdOUe^|A6_k+{;Q^E&Wxcv$yem}O;3Yk%Zrfcw`}cJO5anR)i#jYk9BPbdL-Gh zHGEbC|F85fcg&KPA!nck{0-hl&P%dNn*Wvm<@Ao38P%a~1M3}Kn+i}$NG$UcM%4@R z%=^0Tk73C7PLx(Pp2)|8=6QI(>x7D`%M#TUgDZ;+LFFvrY&IK@>fD@M)xIz=z!UCq&BXbgM6|tz8~fL|U>hQ^xK*dBk}DU7=r4EiYfbW`lX$+h}gHdL9+LZP*z>x^z(CFVc!uaPql*a{_t1AF#OA z@EB&RhoU!gUw!SPos<1jTKc5)%spT1pb2;L432GBj)lZTowF&Y{Ma}G-^e_F`fuHd zg$q$3JB1eZSd9g&+KZlbL@J1pvu!GH!Y?7@s7~p5f72yBxUie>L)m>304>5(`vnrNfPxM2_Rd8{i$r0KVTWzuA$B z$$C1WYtMIWw6C|Zra7NU!quo9Z&}xmtlW${ka_pK{;FVma}(Q`w?#f_&8_X)zJ|cu zNIrMLO^KH6)QLGjG>Xpd3V4M003A92SZ+jCe&jrQ@8b%&)gzCC^*Xn8TLoc%K7@Jd zXF-TrRdf@`rm^y6j$gAR3^3I>>9qg>oH)0RU=4R{QLc!cXU6J{a{K~~5;{QeJ?cI4k zw|O8@udXD8|9@LvWXvzo5%{|#@;(PWz`S*9Tns9J8^lQ@ECFDv9Faf>{5b`|+&%>v z>!WFqFT)$a#Irw2YZ=jAS#{H}@Zs)Z7K6j%q+x8Ozh(}Z?QQ#ZAjOUhSCyRl#$#y6 z8=5`oFLk?eJ*SoWPs|>3*ky2+PcK>R5I*@v>j#>4lK$*`LbIi4pHdO^T=lVO+f ze2qyhJRiM#YdT;(Oozo1EC@9ah9wZD1r^N?@fE|kP!WOJbo z6n2_Gr&f(&%XAd`DaQp^bO2bsNMy6@xg`IyEWg{SZUl!Z6_e8w@N{rJc}b1EMN3-zJ((`qX`DrJ%m>a8;VE4o2=6Bz%S$%<_{$iB;hozT7pcorE?ZS$Vr*Si{A zq!??Kzj3Pfc{@q7-{A)oxhi5mRg(Wcktb^)yf1%jv$SC0WSy}#UdMYhna_&O{-+)W zsU&*%O#~H?u%yORbO1wrw|(-R0Sp+I{tqd?lJLJtc_Q}&*QPO_4k9WQ+J0sSK{&$> zz{x+Ubb7U6mRZXl$8d+UueX%7yv&bljrXN>dTVxH+m@F57na?sepR{|ZDEYVG%0Fh ziAL0LEr!@Rl`qa|-YBTR3kIdMg|5(0vT*i6`Z@dH5A_Tr6iO9cS2Axq?3T*olPJwz z7_sTu=m;@)%F5G$4RIqAudRDr9>yijF|HAPT6NKafs^FJ_$W`yd%1Ed21FE3#@hHlax$4 z-DV<{_bhP75irmFRN5wzO`_|w-J!V zO@N_2KqMxTNLM3qV$BHOS&LfSG*+Se2so_(pfvXv>;5;j*z=#dk%|_@HD4Y0U|gKB zg9M}@WJPrji+ zJx6LMPE`r9sNlJB19dE**vJ>pml9t@HXe!fc>e`rnhF{`{GZHd=vc}E@T_PX$8RPo zq~)~+!~Y>w8^up24J(1yVY#a@P}r17PWXlo0aMV}HZxD=7l2}Axp;O9&|f$B2e>P}j|AJIS_RSa7s zx791kZGO-U{xz66vtJKlj}6|Mv3UI~vp?Ou1S{%zSbE(h@Y{f5_s@&~tD^cb z|6s$LM12g+GfpbsNHGtnm0wR}nZp_wbooTjtNlc3*pBmsOd!L-inr4ouyy7pcliJo zO8wfOQTVZ&cx`TuWFrk#qUtc~f1wF+j~%Ex;82I})tV2JJI|@?E_6~|TCWme%6H3V zPAlg%bIvprsF08~#N1%r;=5;wf8D$?0G|~6fLPCN)3JvP^Q!7@mT_W=WUG$$#vxmV zdV45PA47n-W=3y^JF_o9t1rob&9wQWdO{|rBGZ17`-}2?q#@ zAnG^uM{)E??dh}iL|!_BnkmC#%Ny{N+8rQcE9OSRC-A5Hed9?e3>o}2`O##ACkV_!Fsqy+QG|*xTDH`8m{hs}-qd znm`3jJw>RDeD5EFPvCdRI;*f2sneml>c-sM@oS*3BJmg7|O zE61~?AJ|8pWEIT^ICPY22jZmov& zvNLhVk`*r>-2TA=IL6NuV+)H*J9-O}c_0yqSqL$p4J`YDf@3gAlWFQ5xPWsL=)dY$ z*Tt?-SP1p|mH>g@?fBK!t;U1@pI(FI3zj2~7P25Zi_Zjl;L7i~peL9qyrv|7)uh{& z$jm!P$;2dn+^^D7Y@Zuo&KOt&_wC9bc>^U?{phbr29vV4fgjYfk}Xd{d^6dJe5<{A z*(s?>GanH@dGv@7ikI{B31u*ys;#`?t4Be!Rn?EX8oL_rR=}@8>kiO4b;rWOLhG~L zv#)spt`Jy3h31KdURAA~1HRWcHe$_&uWjnP+lhUVq#l`dEh1!iv^#R*%Q$ZRO}COH zHKn?oe0XrxhSVv_ebw>&?irbsD#z8z-|aZb`z z;;t87L*hJ0kGRjkc~sn%PCmg#^%nIbE)P4py)DBM`;&-J3Ln_0ie0mLz#)*oFRbHw zx$2Y{9mmvKPRFYn)o&s)D2Svpg0AZpuDMo7f!K{9DT4fcV43^Gxs_BfPyCce^XzUi zPiZMfhOlZm%7@%T(o5{t-=Xel^p6J_9{6Yg*Ho|Xk+3dNX#Z`P!}V?AVk0}g$?~Ci z3FLZ9GX$!uYD|*O0|yex3V}8DhmSPMNpQf8KwZUV?a10UL)BC!4UB^Gbd?2gq0H+G z;H&zne^xuutx0fzOQHY#dL1OFS&MiRXWeooI)6xZVG9H(aGZWivkl-f+o|F;$RN#z z9W~BdCnLEf8SMLb>p3Cc7E51k&||xh=PQwk+5CfI9}W|7cEZ{EQIX~@>QK)W6b=gY z}fs|@wmd)!Gc88$3T?8V%nfAPK|Ai$%5 zb7Ib^Q@qmmtdw6^VvxypQtG?YUiG%Vn-8Fs!7W;@q+Shs%A_3V8|##PiLd3*lTBYk z_N5ojA3GD=FR0!LDIBFKIezGvl8JqA1mAPIOm{8>=f)0cO)Ob^6uV7XVca8!hj~wi z`9VK5N*BRqYcmA%XJ=-GFJbXCcsta}N#BNbuy(oVzV#TQ12Kc^Awh;ri4$y0DN7A& zAL_b;=+%XrMSl`qru6{CW>0?A&22B2D~nr5oSl=;^N5OBqvC6_XQ4^U6)w3%(SY!N z#l7}`V_Y14N4UxMDgL6R^K3B}x)1)%%sF>DjrW>i$*1IHPrr$YH3fVQYRo2BT0FAw=RAot834gZdU;Th=L$`fpmM!e z<%Sc9qrucgzkS%}UR4@9>0XfDt#D>-{6mU80og$nrjB2fTiCL zKq0m56cwaE5m?XMOWRw4p^p@ZM1_rpOkGPVJKf*D4LMyRoR+jxn0$UK=X&&r$$dxD zy4^7Bqv&+!8r-j6b;UXJtEDWqoj0=E&hhkuTQgzxHl3<|^neIicc^rXporLU7DJ^a z`5NmMC;oR`8ojH0Lz|(tK4?E^Mbas5g;W)n5IV{dkwU*q9png%kw0xSi$fw9>Kc<& zmSrkG5xvWK{?BhPD_+ih7~{3c`kd6B|JE==Ff_l;rta2~fO;tH#zPt;Qxb=TT+H3OW zRV{qzohTtfId=3$$H*3n3h@6pAZhCP99vwk-6MZtg{twGH3~us-2pWaV-pLy(cG?8+0DFCs+So z`Sx4!4IKY1jVOi{2|u@w3?Ah`K^Pl@6SEa$yr61__{u|{h!n*SNDx>g_8j=H|Z(NxaSy@ z+R>}B)9mtaMxRYHX}#%q7uYVUmAWAc#~_%PA}4@@4=ms;Rnj>17)_o3(TGMENfY;q ztqTth%cft}?bQ~ddo+&_FLKRWu8^tLCCGk7k&Myd$^*c%P$jZgy~6jv^Gl7jqrIo3 z2F#_8^BBIZw|~zQXaoOU&c^sHpomc~qEOnd-{We4g`Rd*39qPoC8Z@znjTjlCF#M3RQr) zawNv2_%$9zDo9*!f(vV2x$;}_;_~A58v}_Z6Y)f-)==Q>coyf*ql~13&+@Op3G0d8 zvBzJ4lkwf_*SsnaX01$Q|4aB+4bz8>1n0|%aM`_bSohyu0i6Q)l&5CYoxi*W^J}9^ zflPVGfn>|`mNw5b&=1Td)T*y%BA0rHT*77{Z13xOhC8-^>*{!YGf+!G<q)qi{-7TqT))8hu9KStgIS5oz5bhSI&N;T~z&*K5Ia z$dJF+)Q{KQ)}uluE1d0K?|arwJH!H6YbwI>tIqSTqK?Ni3r{c6uxujKRW`~$cHo{y zhBtJH4O9^N@N%y4)>YT$dcHBZ_`cr0%ca5Ah3Nk04ZT#hz6CJo{rS`DPd-(aRdeqf z&X^-(Jg)~CkNrqJUN5>1&gfsass$=KGHU5T)34PFC-QzOCKfhlNxinl@F;)I3T8Sk zg$)|cS)2w@Y*0LPZ;=ZuNRyMI`7F4Hh<{G%N6VA>^zS?Tpir=KUts?)H74)5BobBn z0UUt*7LnCuN`K4OlJFPwu`)OTNW2vt-lPG7G93t|J4P`#8R}ek;5VGOPF_;E?46|v zstkkgmDlbl7reC1dKudAOpnY2lPLV@L%o;9P15ZpMg8rK42P^Pu9?3-YlM+xbaXrjA?MN{ zL9&)LT+Ff|LSCl@BLB|)O?@xFC$0c#OD{&=9f+q!UIcb}yZ~VQDV18!FFk0g_?25< zor5io@ITvAI-9jrLneTxuUR07j;Yn$zW(k@9Y?^3$V%PW8}p*$kni{x!*-Y}BD!BZ zuS)PPbl~lOvjB2BdXIbYA#&`M$K^|HZqd$cyD0f9W4JI`9yj%)j%>-S92^Z^O({zu zP6lthjr)Ikvo8gF&TTGlQCnuOl~gQb)uegdv;{ifVEWVmrayaPe2Y{Y$WI|DNDnL> z)Ln5@XOJNnNy}r|Q_VFcn=M|E^bQu;stYNNX!droc#tF`yum@J)y(0m_pIx;#=lBC zvJMJ4cG&I7B`)eXJwZ{fkNl4!`tUh3chDILegaYcKr*gbR$X&kz`@g(ILVK9-{Pt@ z2$_t~f_f*Pmm4uWGj<+IBgrst!H$O&0i$!v11~=$>p>4Ww?JdYIcrK+qnW>bwERb- zZ>_R^TRT;w*(7N&9Lx#wJ)7wQ#OZOpu7ySqe_IyCi>wh)C2t-TYw}Ze>?*(E&jIFEcPpL`T5hE=iT% zvNHO{5Niz6j76&dbgkRS4jpr!|4@(fi$@q!DQtkb7=$*WulJ$c`d0vJsUhYH-}(i9 zvH!K@stSH>sg<6(D{)pcs9wZkbUEPM$WoZfzYnO}ZjL92%4u`IRHlhqJ_F3@jCF4C z``}dF!@L;-DRoedkC8^s(?Rh#&JR~_jKwTvjMlc%!0qGz8Yri z1DIMI*ppr&0PEJLjTWUOp22}bGc>om*800*n>186gnkl${M9}Nt*oqU1%j@!Zp>-a zrDBo0Y4!ur!RMJQ9l&rrHVE+a#hg^g-G4oR5i?WSJBPTpO1A`rffm4tJOwu?aWOIQ zOWi0B*7~<4sYlGi{-{gS<(kGrlA`tf%5?pwmzGLN-()nQ`)p>LREqqUBoDY$p_jFq zBm7j~_@~B)oO3a#E#~)DN7P6s&)XNuR84hr7<7}&mWOt+r!N$#IHL$D4|PK=5hNBE z*4PhX(+(`_yXCgx$QyM4%>g>(IcE9(fygFPKo4sl2tB#76$aLQx}`Se<9(I2_J&dGGi`n23g;d{%~qh zfVvDGa@&4h!mkayef%F{o=hr+QXm9!L|A*v;7iBjX>Qp{&@_Fe&wYQcfTSpaq2il1 z#DmPXEMW~b-R1GN)CJ4QtN=uO=S+FC2r|YBhZH}{m=U%m>wO-MJD*^k&_S#%v@A(r zhy-1cN$$trKYZ;Ml*1!&1hDed2A!nuLNRPW7vlYNmm~CQuh`RP{gPDL&m=&ko0Ygo zqX%pupW#7ruB0zaog4&pANjM|O8~LL?vY0s*@L`+GM@v%Vh@_bg{4*~b8>Rbj4b}3 zyJ6@tbe~NJL zx_)X4HXjvMAy)I{9pgzp8~t!0r?5k$!k_(!4`vUP6JWBC_U-+g?xX7~yf^&QwsPfcKUZ#vT`uJFn`awV6e6#RvgN4E zEd|XX;m90kR|8+?3vqFVv9ewh1O8%|7gC5Qn;W%4d zy3u7}0={1-@^quMWmCK_oxZHU3>c&2h7}Gp*!|S5T`#g`7v!`u1Af{I^P7aX>eL@o<^ zPqwBn0~^CSdYw`(R*d=GTCM2CJ`hi3Z~z@^097YF{Z24EWv+(C7_4`<7oP4Qt6k?$K3L*JXx_7i%}&&LD9ffTAm z%*Q~lFb@Ey{|JnMTy0_TTL7sDJ{g5!UKLh2A2;@LPNRBG(Hc$+o4Yg0aAO@77M3mQ zQkU;aR_n|I_2=!}8Vi^6XKmqgPBY5DDplWZ))MY1v8i-cW&(q{o*qU*#dNYV91q+6 zN2f>xbP63{;s&fuY8k8lA<03X&5!RFTS+KHl6a7%&@!s6uoR9_6mQfo-9d&sQWZB* z4Z_hU%tHmINYcZYXQ7C39q|vA)k|oz)1t>G2P}~QiFx|z}q@vYHC%@ex+yh|r5c{TlXgaiZ zXk)`+)HUMjVhyq}Z2TMBd4UPo|3Tx_ZS-(hk4nm2Inu5XdqO) zfl9?H+An{l>|JG=m+ld)^1UT9No>9P&bWDlQ7ez_vVdjTy`*lb_>xI=HQh2H56COd z958&YVf;{1nnX1MYDSBzQ0+aZl`*x|PR8J41x(T0%Q(s#RGeF|6qoBcCeb0o6IYz* zU<$uQ%YRwVzyuCtJ^M;X=`{T!mF=gnA()n=d7@4`@l#rEWYQOVAbP(7MK){SHcH7H zH`wH|L#6Zp#?2KXZ~fPllsuw|Xb7ME5Rfsnq$z*ZHeanRKb^ab%tx?rj_E~moOBnW z5c74z=Dlg2W6~$HFJsOK;PsYDp4%O-w<6EN^1~mH=E>*_$nRparvl!V-=kqGo_0Fe zv){zuj6*k<9+ll4|GgP=+i{q8`#GIi^zQP0(R&a&ygPPm?DDc%P${oi;N$VtD;R74 zV9d>J;Ad#2RDY|A7gQ#s%|LML;u3BOfwSsMcvW!KJ~1ViY3##%P|A=mClI?y!`jD_U>f0`l*k( zjkOhUJp4^RpZ#Y>YqP22!h7e_j&!ToG?fGAh8ug1PBmz9;Ng|jEgxgq_7ctzx6*IT-DAO?C9h=Jy-Vmx$c6BI8A(CWu| z?9}6Y^N_wE+CQ6UrzisL11=O3B^}cbFELziqgVw@27lma=l?QZFtXb$gU`wY@LHM4|a3 zbw4X73DV?zD*p|65Ikw&ciZt~a6HdC7h?HilV%b#jGWr0=L>;%*hv_R7x6w7I29ZVgQ5+N^!>vxa?j#Y8T3 z7!YTGSZ;5pYAdPT;yCFLY*d?;g}Hc~%{dnq|DEPAFTKISJB$vBS}HyW4AW#2FgmCT z2%p^q*tqYM5ftFL@*M%0K5Un4=?*bP+Lk;NhilwWtp=bjBjHQNfjba%(7kG(Iw(Jp z?$Sq<25^e}C&1kilw2O)zC=RqWm(ZLur(mMANRzH-wOUqK)l4!phavwSrdxwg0i+#jpgi=^20#(^%;+Gyoj$Lf?)CURizOd=DrUYp8czKQU6by@|%4`u$Ct@V6{ zWmN1Ym_^Pxks>D^y5eAJU}#+dt&DAb_aRPyCa`Ae#Dy!i3HS7;l#hhtE*M(Uc-YVU z?j%Cgs&M`!VpTta#LHuMaIC8Fnk9e6CWmxPUc#ZM>#ff^xC^+yhdf8*2VZ-fDkn`q z7@8}s@vpOqc3;$4To9i`j}8RV{lTC^5$5C`_{x%yqP@#)zb&>2jIQ|U#)_6FAPBv1 zxC$K1=H?6wzT)C`DVcVLiI0hC_g@WG&6YIU_eUbSwytPKMqe9bK+fl!F7>p+z%O;O zyp9(pNE-aTW)F`V^ivR)(hx?M0f7tt?jveW#PeL%LHxesg6q@wbq%ZdTf_1&RmjDl zb^cHAGx(6#8Yf1sVXsN^tD8%1L{U~AiSM8+xp-RTw(rS!j=dW5bq<13K@;XdN<`k# z9z-xn@hZ^5liFc+^2qgKYPABa`}nBOX?&g7j@=^g@YrFsRkZirzPE*-seE* z58N%w9cjC-nP|I0+jMV%amyNpO0Uj^;l`LDVJ=1Ik>-hdye?VSxf#-ECI{p z6ij97P3{2ZdCElVB!p-C+q(n+@w?w-boy(r22klq zB6k9n9GqN;+cd4xHtfLlPGKyBq*K9<4=2wEesiu#z{9ETPv>i4`}t*UJr)(0@z?Zi z88%4=bd3u%RJD&1X`pWTQ`Pj3Ti*y943Rg4C9h9BAy8cY zT;}oJ75xy@aH3Pj`Y=-H@aXom^)FBT>fINPTa>NMJs6j>YLT_jVCH7&wrcX?M_3;R z{0mT2gt^!2C$PeK>%^(8nnAjcSH7`T1Wd4xSeLTvWxo7ERH9dD$$+~~xlr@07Rjdk z_H|{82Z#~r@O8yrv)|GZAl8|^s==D%!Ohav*V&7FQ5IQn%qDd7_e?`_jU%kcmVeD?Wi)#BL;V+C_Q=_Sh z$}AYioVIsKXRd(^iEiaNp^k=sc^}SEYJMqpJ14)?_(MrE<}>k2O`l}xR-E@K5UWQf zb@xSP;Ul9~ij)slQ4dEV7zjUdV?<2yyHL&m7aKhnl2W37spE1{@jx{F@2wdKJp2Oel z(215X=uFrqkoT>kaf0;pToH1mK2CR>Oh7=Pk!&{h z3ylXYBo*Z#%weQiC!=%U2IOj)CghUnF9}z*3HI&wV*=eeQWjfNjXLerD2<|CqB~={GO8NT zfjNEdn>tnvGS|J&AX|&F&19k$GN4AGmZ{wBE@`M>wb=<>&!yc(%*54_fmDs)PkRjz zth08ChY3L%BTkt!TKK(3u7zXkby@Xx@2w*}K;b2Q>WRPeevf~`g^|JM z%grmn<4IM#>A*!)S~1Rmp(gwaZ~Z zK_&yE=du2Ws&59r>Wj~?Y1U&O91#6@(=#ngIF%Tf%Xn*RqiM93qstoJQecu-boRck)8`A%_ODgcyXVH#b^y$!+jk zLjF(A{?g?OBh@qV(u`95x>V@n_*@N@d?2&;V_EvIs{d;5vB%$r-<{fTEM&eaXlU_J zQ&bcdX#YL-b#n?m1fge+5F?A%QuVEt{ykhM7WgKx>j`8s^ zN(&p%8x_%)%NN*lCfTT$ou1)5AvtsY8MexA<>yW6uhwpXixg3h6Ux+kP+==O%k_w~ zNoB2p`?>AHMxu&!0lQ?TWUh3@?+~{SeDA*lcsdEsHtZwp1oB>^iR)iE^&HhPP)={lDfcd7 z=*hjkM5ZJ<2pY5YSc^)J4!ZT>xE*LKGr&n3uYQ4TpusWCJvuB~4g6KTzt(*;+jtb% zQQm<_8Gdc%q02lf&APReka{;ivfi|DI!eQqd&a89TGw-fr2dnBJ6_y@a|zF&z1dMU zm+OL?S!{a^H23P#4qeVNh?{NucY>_&+UX_i0J`l-wI!1%E-T!B0!$h?E5;GHS>1=_ z<7|DNrQ)hz%{)vbqvE^-?Gteol2^TMhA z1JtmqRW@T^-cs7@CS3A7=Bq`5EeK`}GIpDQA+r&Cz5lJeeiTT@^=x^$Ee?p;h(i4n zj*|&ECZ24-HrZuhO)Lw>c44@Z1VRr-768pwDx@Rc99YRkMF={k7MWsxA8dsT9!B4& z^iC~eVBdM;M7?svpcWPgWLqc%k}@GLwaVWd`kyFr)x6P&U1xrL;BThioT@3X0OSk& zAYB{<^c;!k8JCrC#7*~>3&O6b5(=d5;Fo5WcK!imq5@zE_kcC6E4EFzbFw+rOm{lfbAki^y38AmQ*wI zQ}C;b<8IIUUy#1Df}m!)vMsv9#ne>}+Fa3sDuSq3Ll(ORy>c&!$8~QTXO3Q*%Z*t> zp`o?A5$>jIM@HYR^)#_fnc7ORr)RU*-;YmVDxj1!ox zF8N*3d`8nRha@n#pZbSzF)R#Jy4S#VacGqH6euuT=3Mv)$MiG5Sw)mi>o8Vhdqvy! zpDh5uBIIWpBqZH^yn8GHLlAWav#C|OvtTp|>|DWDon-0kzu8uqj{>d{67EP)H7@f( zEX`G6Efz!IpQfldmwtH|tBUY3rb22h{uSNRz@oo%({nc3UsU{^)MY$LkQswwSLGa8 zl(%5w&Iuy>MdbGCe=m;hbr&u$kq9XEJRl6_{AsZFY+7#@zA(R)2G+}Fayu~ibA39f zvG27$bGPxya`qrb)U$chDqrFx?rb>Z{DPd$@}Yh+wk;O)xLr&NYsf4-@Hzje#RX6$ zvpufT9}nVzJ>biQ3#z>tJ@6k*o<@l@J3r>#+UV0_*Rp;@ym0u~-U<7}dX`y%?9;GE z_EBTeqw#C808mc}0QE1N$xT*g6~s5>%2qGL>3N<=T)NVEWimLVZrC$>txm}W2T0&k zypYb55;r0B+-gQr5-1K1%{1m4YF)Od0rm2xaf06BgRsF{%N)`n;I@4IBSI0`n}~XT zAfoJ;GNqaVQWxQ z9j&%*%1gCatTJlWYR_qTiadSSlG6qFHStdgitgOQyft{v{Zwj-f1;S!0XhV%7>&guEHTAm)Y{PleBh2^xc@@zE{V zQ|q2CKLVD|EAqnbasDiy-$g8+-(W;6p9lJhAAgq5H{i%lL_H^sx+iqD&z1erM4g!c zUb;z|HpQ={Mj9T94K7(5YVgM2SZttOI*V;xVSGmoXslik{g(Dyg3TJ z^c7oqz4n#G?0w0D=5)OjsSIE#eXU8BgzC@r)8jD=T+yy8MM(OPdTs1i+@?xd3OffW zuSPub-M#oD6bArw0R&iSM-gm;j^dJgU_SH+i<9-EnrgmyOq3ICg`0J>9IW)V1Nq3fu-P?UTD)~m3qCoqI~@=pd5wGD@ybnm0#u#i+NRS|8wZ{8F0u#G zYQjWMnk5f^^kW~3yD*qZIbYra%Co7SZMK+@h{~ooAhT_yS;cFM6E!-(8TgHXokAG1 z041&nB(}qo+s6u$yf8m4eeI_j9m5)YEdSQ@;qTgApzqmIlDc|dp$6Ov5%fOFdLOzb znSJk$c+XR68x{QJTcqTdj1MUoeH}ZlOo3hlSg12A@E>*@?tzI?vbAqFix_3hN9}~o zg=-XJaDEqGU;O){XGL*>#@YVzLQtxWey;KZK(y{1$1zgK)c^L|8jcT1ED5mrzTlPU zOMHizt@P>-#W*y`WozDX=6#r(e4{qA|A9dJwF^d8&WHBB`)#=z`-Q+6d)4DE`wpE| zfyUr_GX^i^$1j?6PPjftZEr7ka+J@z?VF7s9EgAGs1`ieNUnvKv%K0q8it=dyT_sn zX~gF?V3b+`3LkQ#BOp?s0gG!Q3y*KRS5_`eKEqaOEMkVP%AhC-vtjPGvagBTTR#7QH?@n} zfE0&pY0neF`h%;IeQ#{dLquz$l0Vl2W}&F7??(ZWCaxi9r3pm%VmOc{&f+*y zhrcq9^iD&kUbE-msS(gX_>ZqnBNFuIA{TwoMdiFU)v(xSW%}n~5-?rl*+c#z9F(Wi zy8!$C&-+XXsTXqKiWqtwSfN$}drnLM3nbhp_cqSFKkV(m_8~W@fBCWSYU(-amc{X{ zQ7tQ;h-4Yaw};l_xlfgu=XyJ5F^_6W5l%m?UKL8x*UA#{LQec zu8+l3v7?ob@9os*p05{fw%mE^tNuO{xs|lhAM}BtT_6n!N=;VG(GFU2rgLJ7X~Pb$ zOF%)w7mK;qUWef-5h=pm9_~}olz`}J+6xbNY2+O2Qkw;iy;GySfi!)(X?D|XxXrO$ zV{G$-5n!eGTH!uFj^Lbspwpl4rM`KX-NEPwZTe&ky>MCmum$-Vb$|9CBc5I4IsyDL zl-5dMss+@beggQ;@$P+5%sFM*N&+R_v{2okEE9c!nOcB0(Ixefv(PhT@}wf&i>zyC zi67SkMh9RhgMN6>V(|>e`M}Um(u0tBf`A#=E{X+44q}ReUW|qDuzSe#6Prf@W$dd^*pGnUdBw=nGWnJ$wi6R4ak)A`j^6{DljoOr>&kFP>W{l#AQ8U2{!ZrK8bP3JKJ z_MB8XTZF%zhXW5B>I%(Djrj8Y57&85FOE>wK$~0WG~8KRK8<*AYw2SdrhQ@c_*C`Y zP;U*6a?b&d#N(AMz>Pin(Jt#*pLlzF`+yJEO4OWfmUlr3>8|B6b{CMTteOGbys=wA zJQO~Svy2!djL$BtnYJeChpmqbW`Jq8X8~+EX#i88zSJ9a>9wBm#&jS#x?dXOeSTWk z7;BU-X_4jDCh7%$K6A=V0%7hkL}Ke1ZetOWCrl<`QxroHEnwS+6aN(%JE1I zwPZObw;Zmxjx}CUo!x);S0AQkM_o`cqw(EGnkiz5Eh~|%5Ns5fdHRWDdPR{5Li^R_ zUaZ;bAq5mllg{w}QYE~ykCs!XnHuIOl1@+fcgo|2bp4Nqphah*2ahbk5qP4H2S1#7 z5n!tfOVY~FD05+pp_|tP7%f;O+-{Y6iE41@{p`I@e0efk{^Hf8{#x_cTZfe5>cN2e zss{u^fk)Bq6E4aN(@UQd2r-pipR3{9pGKju5ud0hC+()HPE5KKF?wA1F2p!sK^2qp7?AF5W>(*RKoxDhoEO%q9SQ@o!IA z`pwDHZ<1(y*ya7O^lT%>Cn{L**x#+O5ad0Ve!~;o8iFtXXDV@Y7FI=!3BcU_k!QwD zpjGP-I*q@6cpOWvo%%sZRIZ2`PniHRvS*+M`M$_W+a^yFG9|*)E{KQ6yTFlXe$l6_ z8b^wlZ-!i@-vz=@-wM9OX=RIaVL4GAl7t1tY(Ij4nX}jC9&0wvj<3l6WKi1N6UMDA19Or@pQlLN4E8RfccB#ZSXi z`-wazZ}uavb_6)`X9e?{%L4cm?uJr_y{0uby$q$|QirQ`li)TP```S-yxpl8%Y6n& z3>o}Rv;_k0d<|Ex8(B51<0N9i4rB7f=(Jakz2;Ofd{4 zwhaf_1%xCe%MkEfL-YV^YkA*wh6jBglQI~el9kBe-LRnCS+=1Ph%?^CU6rL}Dow~cxXRmK}01Qi5dEjLOa3<#OA5|WY zq&7W^m-}Sf0J$5+)q~&9Og0r(hR_YIn(p3Jool#8>?46btk z=_N4}$G^tnkTylIs)%DSS4-JV@uO$d7`*1vDrz5a2=%8U716VQ0CrVB5e`};@CIqV z$#9ckVr)hJs@5>8(cUuqp8S7W4R&TuHq77t4)K*AWV^c0c;YObn>nY^$WM%X*7&93AK9d6V*Eat?NoZi4))Yz# zBI$K2R}{F$zdRB>v#{$-ap*-zfnjVmdp7-w06AYxxY_4DkW7#*|7oiKGCqlElN*v2 zn!*5i8m6-DQU~_hJdLtM3#R_0C{j@(xyo|yozU#r)eR?tpS(U*u6i;N(JT(l+G%Ar z+#U(N?5HUl1;;Dv!P6yPHBke`NI%q@8`z+>@7>ZNZ4KXyl2Q#BukCHrmJ8CC_0Ozk zNNtwfgm8b8gj5?B@a3Xf$6=Le&9gBo#02IcDxYVgTC?$iT~hE$ejt$}pp|CYAT8~+^nWXTdgh4w$9_2AzuwP4+F~~}&*%;@ zP>5qCf8z&ir6z}-s3vj)mC|*efT{jpV7KTp$RmBNWgZC6r4{Bj6<=&o;zf473=&BW z0jedk66UGr!FaFo(dLj@Hq7xXLXW5d>GpJFrah|RvI3~f-NqFtZ65SsjUkN03N8$j z$L>ODBZOIb3XFVtbasqiDKOQk;kHy_3Mn^%XtfcU{F4>NM+vNbqbrw(hrHCIPuHe= zFM31x3qHIGEnqz;yj@!Zl1d^ulIMQkqxzIqvg*C+5;}W3^X1%yObY-Em}F7n+CSFb z8NIYr&JHO=f`At_3l9Om1{mDmiQ@rQwMnMWR#DiTxQsu5vKwsjZ;^skHgjsT(Q+WP zbDzY2JXy1UaMPWrIH{?QzI@r`Pc*rUCWqLMlSocN^6Tw?hIN=tJQ}6hMx6MT7_xi^ zV^w5;Xg+Rl?i}cUAd;<@`o+xWoM4C&17Gz(TG8`mh$0@pIVIGsrA+{eY?s}Xq4c|V zcQe0=;2c=qr?4+g)q_P-l*;otSD|L^6jUrcAB^%0Ve~=EtBn>dbVRK_duT`TOo&E%#l!wa2-CJN2A7(?G=?Ak3l-5!EAi|o3nq^<7fy%Fn=&m2`VUbh-WxnB#!5OHTDBj)hrK?@Y|JD~FyZE|h^pgA0R zDFDwsNpz7n2~f?xI@0`fk9^-a?Pq^a0mj|EIjzJ#jd`KCG-4E?v?EY-Hb-A<)1PSj zD2wxVm)YjBWV|PmCzVAKH_{JxCS^RhfBT7o;-1+QVH0kvZm3IR2^(LfI6|W;#an^f zgj7k-04~WT9MfdvQhKmyMX&}hjV2TIxr_o9!Yl98W%IoJ%@sPvvtYosBiGIj@R27x zfkz)R!qL6dcBWAKsEyuh{`jU9$rqC@((o>_ne8!K1n|tAc{*~=gnm83QtgAjh$D(` zVcS4S7H)Y#tfVb81Q>H4AIyF<7v4BzmZuv-lGyrHY$RyVmBDA~?kGtSkn(Tqb;-2B(u+P;7by1)gFM>qTU%nNuu0aV~zRHVZx_{&i# z)9@@b`By;$ypYCN*ux)VgJ}$(VB7-UDs6`HxwKeP(r-eNx(|d9zyG%&Ql7=;Vx}A zqj_$fJXRPJop=Dy@gZXBKs4!%ehJLbn|bi=j#?8Rs*Y9J^oB;bv<9Px-BdCop7}Xd zOH4Iz(KOaulCNrJDIGwd!)K`9Wx&2i=JG-(BsEnuzlStqSBv|&4Xc#&kf9Tg+~`FjHVWkPZZT*J{vAmKTpOx%pmibAOIY+3*qWyGkAv>= z6WRk77^4@BbXo{ILQtEbsDWw@kyqHsY4f4Y1qlv?DU)@26uo=T|9ufKT6gT+pq|qL z1})50J|23wI`w60$9hc6^6)1|qxKLMXB2nj8EPRWyI zRzst^?B?I)PW9QPo&5tXXP-2S;HG?!I^163_fFPhU|_5ozc1y{1jjqbBJD0I2=ho) zPCZuT2O%ZC0Q&JtzEK3(RbSYY{0QkC9bt{!%?NjWz%N|qD|Rbd?&=?wd5H7NCoRa1 zj=Ogtg4EpSk*D!voP7$Pv=rHKy1OM;MPg#m21t>|t76FBOM$F<$w(rD2a9|N9rC-D z20+7iKoQQw+4<3e4sHbF6}VTtDx!@N(hZ(JMD=WcQR6AbL1zFNd0k1x-`9`cEsD9F z58--tpQDmE-JyCyv4vi>bY$jHCgL#~0Cd;%ls|M=dIa6|8YV*3NQl;1bFm%80mQO{ z+|_WYp~Ia9*|goBrqcSonoocvWL)K**GIP-JcLV-wX1hI)Vt;l4AF#9r3P`7H|o%N zPFXBZYAey@0|-PNqstn%e3m9kdca1!l@r57ynt{KY+*`Ww8qAOMM?P%+K-YJyzjiY}s)aNkaVpaNJs13{S zyMI!rd{4%#WOfKw=NPZBbxw7=E@RWtQ(IRUNqA%}4zpza>w`D+RXhz#l}nzXp+0zuM(<`SqE^{CAk0ueMGDZl@#L%au7NJ*Ls;O?r1?EGu>?Oa+ z`vk4C4mtJZi@&{FbLIHnK02na{Uz$#_t_-|GU5FQz)PwC!v|6Xz)>Y&*1<^i9Yi6% zCQ^yHiLD!jmcQX(#t)uhaF4bK_kr15{vO^h@reSsm0vxhL+JVm8i+zswJ-pk-a{CT z_m7neUk;*CFY{6vz>#VFyx4~3qf9VTD#EWOqlD?;H`y%q8Ih<+638_KnM(Z2g%O!9 zt1hJT42)%qhfgjNNXR$Hd7=nXV|;HoV?G^$YLBk*j3W}S_5$(RUFOge_WtRbC4d&`Ake3iFYEnfE6EbT&Nui5&})j9 zmMn~@PF+rcq^U+G;N{s$ZNAArK&;T(0(Sd77u%_jW2N}h))atI(;!V{0MtdludTEO z^B626j!fO1c!Q$#&Q$9-?)GBq1B|a|Z7{od;)Srd1U^M6;vFm*rE2+-f2kCC4Ds;3 zt2sd`V@zdcF7Eca@kE-kh;7ynB0AcR>F)NM!Vj_kQ=rv7=PA`-v|8Zy5hdJ_72 z5!D1)^_Im%U@Y~m&T?Q4hsdJ*2-mWmUw!48$kVyO@K+xP>z``}h9k)WLH7*6C7{l^ zcl9LDmx3|g7FERz`A0IM?8a1rQX(!KEJ0Er)9f|OAR`nk+5TPT6UdoSUSbRdb!?4O zLg?Km*WGbm?n7DR>QtnnLWw23;R!e7AhCu`|Ulz_v{W^&?awlJ#U| zUT%%TtO;7FTM0r*0bXv6SH=*i^bSA~?2aj0*8&*g1@9rhc=k0m;Y2mfFC9LlLo1_9 z{X41OmpVpjsuG+4#lYI;Wl=&_Zbg;2l^}2684q$!VSc{xD}s=Mqi?`s_|1P7!*faR zU0!)jlg1^*0e#E2-HNEYPP`HiDmEH!xD4K{WOXr;2YOv!1T$3$^TLM!fv~5yEEv~E zsdE?am#W0aPU1RLZ%5}KEyiD>i7|1wVg?R}|1ny0mydp8$Jx1Cd#{iQ5`O>c)KDZ@6Sa*lO>jVL%uE*2J)kTf_$V9}(vAlB zSq$%J!2ZI8pKr91MD;2u0Sw?h#z1IF&(SlO&qZ%k`|@FJ6a5r+QI}3@@^Bf~uI+|LJtN`8-)>x&%AnHqo6&cNu)~SXE1yzsCdTwYfK`ZOI5lKOEepTy$f)X>k+W z5FB2Yys4;8aFxkM!6IiO{S0s&B~Q(dBSZ3SH|!@%By_tRM1KUf7t>=59p7ByF_tms zT_|L1JxqY=IxtD_XcNRNtM(4xVI(SK8%RXhKfR`PtEmMfiG;@(1lphmqLPhf{?IOG zAz)$oP;OHrJ1ofcaTu)@0S4mM&?w32{>Z}?c;>^uhY8J376ci)@`Mg7%rQaFcoHzr z7046sFP|D2$Z$mAuJ&Mmd=iQ~$KPp{{it$;Z3Z~ShTV>^+r3qCnjinctI4m^J^9I_ zW9a=&7jNc8E0D5^U*Z{}RZ2_URFR@>FX1%Px)3X)(iX)(4nl z^SXMGoM-N8FXWTJtELi8sdpMF``enVnxd~u`a(a=H$Ny|qlKWfO znbHp+Q;VP_2+ncUEV*_&v0>@f0}omAz3>=+G}E2FGuGB33xpw<;-xeu6KMz)aFoo3 z$rOg+{PUERZz2M9X~ZDQr4p-{3bp7GJ;TiHMN*oDmMn$^Bwv(O6RM>-aJR>n`cLA+=nmn~c-hpD@ zOs5R;{5(Lzbx|u8+T+ZJDxxIpB+y*7anIdQt$2&3+k9+e^g7pQk_}jY!-&jc5-XIp-drWtW zb_*Y$zfd?`nPR(r!4gjX$8VUKOBMIy?;K#>#(aEBz&^bs zhmr3>l|$O34puo!FEi-^r#X0`c}K zE7^dG}@+r-PtAOd%NgRUT?koiI-W9f1#N+c2>$3D$P_>vI^@HgmHM4M{` z#GyzH5)DJ6QI?o2ClFVpD(x9g{#%dtCnX_qd4u zUOKt%*D_Oq17LTa9Fir3C!M9~Nr^{JIb#41afH`Y(G3I%!j}Agp&Hx6@@(o4!D%hi zWx*vKaDR}j(d?0Gn;_2c*L*x0#oSUi2kkXEgPh-5F=&QVQeICllLHe!9ui>POeM!l z*8gAu*z(E*cwVX_B}pYdULrUDQozI z*@?NaK~|je!>bBr1+15MWSt4*K!vU@wR7bvdPvOSApPl6`1iiS5~Mq=AhsI2P!D=b!+F?tc>)Huqu5}oo6^Un3FIV)@^{#We?3?oSAjZfX2j{Ngg z{p^+opqq0BNO8DGd$DSqKlUfL`*EX-x~_c*7nN8OXrL$IWas5fY5k!D@eGe&W-qmG zKEORY{`@KYn|K1-@D5XHA#wWFL+)phSA-=4pyE;sCCX=v{Yv!8djCc@Drr}EyVv{n zw^|5rXD9*FS|#ArcumJOyyvR?isD3nSaubv3XAnp6))5`=^%owRvMVlJ%H9Q z$HgZ`B{jN8x(~VLJV1`ZJagzH4b~=vD{j1p`bDBrOsWoP$csO(7>W7~3|_C7WyLYt zUZ0o`NtWzN&HUPfuDTO57*)@BQSFJDOW@|#F&5js>Pq!AW^f37?RloPz@GEAjctRJ zD0_H5Qvd5Cq#{g{K&^QkqdQyw(Jv&IQduwL0>&Sd@=FyY*qNx}%G`uq+qzB!mcK7c zO_H_Io}5K<^nvZ-0{)I{5eJhc#!Kgatm{&Zk$QcS`(LkIx~jk+inS@EVr_RJ?TQW+QbM1>9bdRE3Gix78yd zo7Uc@hFM@Ao+HLW@$UtY?D4Ntzr1!a&4-wjpN7WKz8`(Kfzs#jy-xS95HiP3#>n*m zppEhWLam^2L$+@>Tts>x{RLuzG0&YbauyLSIP6NH?*bOR{bvm9HN#-vg7dF3$Ok|M zX|(JBuNply9xuDgJGNUsee;>}Kd~yL;ISNfcfxSW(3~=pgso!Sczz{hVFmT0Ugt(yD9vzFR#>kMi`Rv;Qgcp=@86>D!-WeY9Y? z5wjs4?Ea*;qhha$F#iFofTM}9>>a3ATdJ;-xsS6qYTBLgf7d;+6t}eW4Q>if)l9$d zRKML)YF-b_mi%;QxVf_W`<+@mL*9o~??mzP8ZQhvX~-yvxIb%QUt=%Cw+%ymhm>jAx=L85p5;fHGYrrH`~I5=B4phWy{4@JdE_8 z(ATs;|6C#Z*iTA602_&7p&`7XM89m2JnK}^Z6^EUamVo@S=?H2u-Z}oSVjI>^aj(UsrSAz-`hQI`J_+ymAM$PM0w6-FNZ- zlr?{g8vv99IJ1CU+*!2YjN_-nz_!)rLjF99pMGLrbzL8cxL*nEJkU(n)KAaW1TY`D zQO7QbX+|U!R)33buU_E?4t-oP*4|_L$e!p({@50h_07C-B@)RaOlwG2w<0(*Dc+oh}>-OjuVLjC}iBYc9qSqkg| zoNX!b>q!fhi9fR8=a}4R!>s||HSmV6l%{x72IfNL-dTs@U!s@<^6*JE!MVY8X|NlJ9=$4H9b*b4wQUatjr@d-#f!-Y@pHcwD_dpo`rY z4RPb*eo%kzLWL4*pYNqUe`LKFL{|2t|$=WSV>VXfkMWhJKRIe-~K`YnTLnIg4ZJ8CfsdM{lh&Sflu zb4dY(p)Mo}M>hnqF5EZ;{T@JTmr0;t+Ja(iXzQDfBS<3|hdJ#mIzu0sX5OP_Q_3T= z)7ESZZE*}o(RRkkC*J`XHG1L8Y2B=tSQ4_EbLjcgXfK}-Iv4Fbn~YdyB>-LVQE)NY zxTRqDex)yRLYAPH*V>m8Uf`K@raG>22M6A8CT zzOUBZe0m(4baN_iWM^uIwY=xBw%*u9qzzg5CqHM}+NBxD?bgTVhx}KD1+4l_M@yauiBH z@4K~rOmdqvN^!`G;Yf(Y2WStmeph)%?8{oCKzE^l?%Oc%OU4#`g>1Q+2$W0fj*rJX z3#}J-zA#=!dc)U4!P0{zwyO+eJOLg6r;F5UTVZr$KBTgUN1)}fSWuTinw zw9pa3pzC1r6YLojK}R7siWGVtU#j5-2X=`ed*@3m43aMNJ@X4ObDEX%7JFOnf>D6&3ZP>aVk( zZEZ{K>(|Z=nwGwuMF<dsr7uHKbU`OnHq1zE3$*f0xDiA8Q(gWCAdl?DiI0l%TL z)}P6~(-WTDu3qN_R2?9wADFPm%HUxmdh;2jKI6S5HmW-&3#GWhq)1}9<#%MadKoqj zmzJj2LXh5oLI)DnIiVwt|2fRxa#@75dNE1eDa`PWE|s!R4)6b`C_gPT$W*V_%HT=a z!|oXA2W}3mUru;SB^}jp$)OzxrDH?I^yu)(0B*m?#uCtXj31HUf8zlmZ7l(o!43c* zd)?XmuWtap?SCZ3EB-4no=xRw}5Ow6;2`^YqaExczv&2-(Mu zrWVnVry*Mh23S{Hu6}iR?rlmG1MVdN4S$W~r+&cEOY-Zf;R9Hj1jL>oqZ2EZ7q%=p zx`Zr<*3U#!D+!koZt=%#7C5pgdnPk?8zOEye{_pd&}$7!XYH1PhlTI*rP9DIK&^LC;t1?}{zo z(ei*ObRH}N3&wLSzNoi7O`b2)NWU5FSmYW(Z6F&1_5so-dvV@N>=1*Okr+0z%$3D? zcAT-6&5=#enYI-nu9ItEtgjS+yAFg#V6t?R5S94vBGIOsYFTf$PFW<{WS1-V384DF z{QPOaY&E(9_kDcGqSnI9k{c=ztv#~I_fB&Gfdd+K8?=V#IE(sh_>$1a2}jO&_8^@v zwX$_*VgAcB$U%13?Na3p3v>HGZh?haW%nCHc*`fzGJq|YXBK*3MkBI};x zg<;d@XZ-b+dU9MM1Yg3lD1_}wfJp*5YXpW?gYquW|96f)%+*A?jQ zw1bZQ>q6ce1Dmd|EB5R2Du@P|-B#d=Dkf+>Ke2zYfvh?bAvCqHJPU zna{Zo?aBb)bBXGPp1j zOM}Lm5KfB$Ae}>wbH!=j`zr_j$vHC}_X*{G|IeKBVQT;-dMVR+k8i%Wo7$^B`q3+a zIJ?r&Dvi*Yf%LqUO`T>h$+*;?&hvDLRzJqai!UsWO-68ept~jHO z5F1|Iie>hSU%Dv3IycTJrj9YNzM>_F2p!2S|IXx$ZG6iZuM%ln6 zIi@5rKbnB`M?i@@Ej^+S(*f4Z8w-(BXF_w5v>XDBz>{06OlCzZCjW@9==E9D48s*R- z41jm@U1n6Im-8~RBclkHH#9Jx);)TZnu3~*Q0%q~_b6vugp}o1xZQ(>T;y)knbxW0 zWVJ-ici?`_VRJO((jkE|MQ=6cdHI5#XD(vI zvG^BeTMX0EVnXI+x(ZRK3%x9Mqa9)&&YR?G6U50rBcu(74vnyv7G$Rd)_FL5=CvUT zaL);8otJ=a?(q4i^f{UzfN-D@P--ALFC686Q$HIgN3W}e`R9$0F;L(orpgnJEpj<# z!f7{tg;*p-wIMsk?tIJ3#c$-N2U;BsbjB=*I|iMm5>i+35fhn5GaWASAOl)Z%6(@P z1IWHxDpIpkv+w$oahyp4^})J3@f+Fg;yyPJ*xNVe?$DA@eDbBc8pG(;7vQR)9vz|O zaJ#Y7X+B(jM9uvv=kQ5-d9u@$7gfS0-gjQpHDSWPQ?B8LKc?8f-L@qLb$?5PkcEH9 z;_RYN)C$GO4e>jo`$V%g9Chv@Ft{_4$#6c-u|G@VMSrH4YX3(GRnwh`YKDA7%RUi~ zcMx)y{$Bf=&uJkw)n{(4yjdiPyd&$&R+;p+Er#S8*hnI+o zp{}74mm%XHSTSP*IXLT^;C5GWrQSxo3&iv>XVCA$q88&9x*rasL8w-!&k*(1SV%t$ zC?)H2t)FgJ)md1sw3p2&+iRq^SyI`^u;-zq*^#~DxHnwUU6Q}UqmK3Wg_YJO_Y1tfd9%Ymtb+?W z$1g4*SAQ9cA6$`6a&!*t)Vadg zA5^;VJZv(76D-`Af1}` z6GdyIAY$!pjAOFs2&O4mPy4~0TuV_L&hC}HyJS`#J%x2T(3QPX_xSl0{AN)-eZr@esa09GCHyib2%27d}cAjQ3{z z@|v!OG&*djboLU)&d+O`2L3JD&{g6){~zZfPj|k{;>RF-{1LP(>ncvoG&<;D)cJ9*DHV}T z{2y?Bi8|HdamxdVG0maM0S>FfW}wK>g1^)jlB*s6zU`4^*xCN?+xtQ};o&oh-=S*dLQ$>`Iu~ zDQ-vR-rH||-ETuy4hA%GR6@i|3;`*S%(i^-Ssj=?;qekwOSc<4qKAL){4>>WzB@RK zrXeyem$t>?&cv3n5i6|!R2SjJZZ4LEVI(8{=!ahb`iwLJAB_6=JiLFE@Cu(WB`O8T zNFr8vNXYaDG#aeL3yQRslWj_6y;Ccv1XEsLP}vyA#jQ)t*<|neB;QGleA#GKFN@!LsRx;j87;{=Y^yEz2I2=MzL{7wVi@@ z7TciN((?L4Q|+wm{Kz$wr-4Jh!4*FAwjuV_z|KDycEoBLK~22z@k-UVS7Dddjd+K` z?Ny`tYM7D7tt4H&Y}&vq$1B|yT6*|ocSkSl@TN-3H!DPzhHl1hs|6$g{cHO-`F zRz{Ke6c8W{HSG2AX;*KZ%h6*R?p)Nc0!q@@)I;wO_Uf#ZZYyKFRcJBP+cv{Jqu`JC zr12-*^sk8i_h$%|!8vQ+>vttJdhb}~-bI)PMaM7ipfftH@Tsx(_x4NttjNj$1%^9vPYkVTgy~Kll=wiNZ4@^>`l<4AB#x-pql{tp=#@U6o9ha~zD)M0DBL zi(cQPa;P35;j@)1+tNj>WYW=JVYviiAv?D5yJ&Qm!-QWGP4sW&2mssiZ?M+1G_NISO>oj%!cBb)0l3*Fynuu(VOrRbAgz7?Sfmo2x{+in!G(kGa|L_N8!aEnt zr!T|3TD!4X*Dso*dU(^7e;<`xfD+nqE~bl`Uq^zTJ9%8Gu9Iic{svT+&@@KwQy9w0 zcT`cWG+Yg?8iwqxi6-phdBEO970KXEl@qY6;58v-XRPKw*Xk!2pZX!6ccKfgq?&4@ z2iSkW8Hh}Q(G-3#&4xBAMru!zLG(6o;?})em@3=sBlpjx{>m{;cW#(=rUq9z5DH72 zc5v@hIWU!rGGTwK0BlRw%7N4XOswn z`Z3{-EUee3!0}i_5tQ2;9YAfu{1P@hI`vgJ7~??r*}v*9!>x2RClCH>CxVNFd;;38 z+=;QVl8hM9;j`8@7F7HRiL3uEzu>rpBceO%bNdkf%gk({en4~zXfYyJbdk{;orve& z^)}x0jvnc{nc8v4X?zZ|z|Mv0%KIw5VKFVzfFr0cQ+WS``!Req%$cUmODk{+PYQw-`QJCXC(8#USg|M1^@t6>SD;seNa4q z;zaN6V21A zk~65{;%AhWQyBe6Z3ZmLL*A$)LRYKBMEu%WddcfgOa-zA6?M2t#34CKzXm@aHN{tH z23#a-yOJmQtd%#)@S@2?FarL2k?yk8OKcKHOFPOPp=*LK00*BaNVfBTq~P1>4-gC| zg6V)aY{aJ!LZT}~BDJWA{J+*}hPhpgm%UQ#pVr7E*{d2udM#h8q@tFAJ|ElBZ+6&N z7EZ#pfjbR?0<=;XP)+>hN+Nr0kSG=qVbT6SiLh`Q>;G>fENiw0D+6bbNAdf7)dcO-Ej-C)kP$2oA)P8RDOnc?-4D3v=+{}%3~K7m))Y$H2n?4KTPO~ZcxSO27Rcc^rj789cKCX5?@knhI<_~ zlO+dil~EgiGn$ayvqGZEko#5g+a_zx*d*S9eL|`S%q*Frly>)>`%^oDtE9M8b)EZ_ zIacK#E{zTt=g8^uH;i{rsx8;2k-RrTEp`otc{jQHR?v?t80TIjf zBe=;5ZUXqgKbRS*{gu=aFgi|+OYNykO;&bz-r4I5w^DhVLw9^8TR-jO?KeYc^o>7u zNId~D;CmJDCY}gkFaG=K>&7bqknaHV!^P;4qt0WaD)~6|0~5cho?D6C210T59JN4j z*aaEqeDfFXyA99ey}e7RuPe>IO56oWn2*@u*S>3Aw*_OY{HdB`H1m?WAZ>SmsY8~Th7G5VqTf!vt2Sm|XX|dPhk0hf6?aV(OFu5G8 zc>O|=c<73+_YSlFgK<_SQX16@*I>q{y;g0Ru269jXr_GDJ+sszdk$0PtPJ)8nc0mZ zP1BkF4N^LO=}`c04-x#-WJe;cov86JR`!LUO^1mlh{cpkj| z*4K{Yi%u5|Xk06SK-Y)d28)$@zyomDHEf{({=}UZ7FrYJgzllV7>M~0)L$9BOk9i5 zN!mdIC1v6ZDfkn-sqQ2YHmt|n7tgp+V6SNpzWY`KUN>i05UBG%f*s4RFo={ad{+Ye z&9WBCb?;W2Bs%gWYU+$mqz-BPzHGpfCc*wm$#pTpH~e{{ptK~z47T7~X6jV|KoA+8 zcMc*<@4JL8#_dY{dK)d-MGFd|lLRvDA34Dh-yL>>l&8PA`j;iT;QTUf!yt{Si4EIi zxWQAl_f-*LE!$~+$8b$5}SnSw|OC2$i%IY_;(%=XI=>jg1eYA zxKtEycw2|kVYC!F+{PW2w?2;c<7jZU{nYmd{M+&K*Nw3bkyKT^>wYz2gz8zRsH81} z8ls2P-cQGEz4`2v*S+OcD*c_)3J<}cm2s%9dsvjSW8a6GTHE^V-Nzc`P^1sgA2J(5 z717k{CEw-&g)-*^$92j{f9QWJjgju|D&Vvc^KIx^g|hJAG$1|XI!J+OkKywJ$|UZM zCo3h$rx8mVQ$RYQ;pi*kyI%)ZQ!u;_9>5!pr@eqN;%F^F6(aP^qBK^V86M`GVAc@0 ztTUcVeFR9$NI&&;V52lUkuf)^*RIjDhb!lsJzlz9w^RIA*%?&BMjI9)7;&j8Vs1jJ zaMkf8e{$fTwZq*sX{VLoMi zWchh9VVMeA1pg}fW_=}-Xv402x4#j`Gmljz1)0z)F`v>Jys}fhvS^3fL~T{Xqb%S| zoglocW=xmw1^x(J2$2i;h@%34^~)~$fTwZs@n02UD2fk@ke^C>f*%Qu($(AsIu3mo zp3iahGjf=#(4m4@yp!@h&eHNF8GKP?`J#&3#@B6-OnO1{X{2Ff*`51b_A!cM4}lYj zU*iGwHi3*6lK+rEMZ45Uhqv;J;_x*TZAy6oAWh{Y4WgQFEqJ@hedB;b1U?A@(s9p# zJ?`Jg##%%6h(P(&#`{#bzopvj1KS-~H(^!aMDaT2j!=bboot?mjVBaqzqHVP-5DIl zB-li26Uw!c$821UYpvC@;%`u;^$o z?`Wt0AI{zaD$1|j8y<4#lFmV-q>+vRL_j(WkRA}Eo1q36x}=nnZd9aG8ir5_34x&- zL>i=|z8n9~dA@Vbch0-s^{q8)7B!2Rd*6Fs*RS>tE&nwB{5XfJLU-jN`Nv~Na@(vj z$z_(NCv`*d44mZYRc#f`^t!6v|YFnsdzN*BpONky{Tv<2R24AUJ0=ya-Oa81_z7YEmOUOiaZ(D&9dUv|Vv75NGiOj|jnXyA~_w6B*HFCH1zAP>svyXzh7TDFm zZMY)HWswSApr*9x{=O>D`x6c_kweAx+7&jr`sAy#+e1x;arV1|VGaEPDXAjI&9Ge6 zcl>f>*krm0Q^qtYx7*f1kqmZ2PX<>UjAV z#?wi2qw%P>SdDuZR@ms9Z!etc#>Fj_Q|rs+;2b-$7E z5yrNZQpR;{ip!b>1#~K-`H9L8IQ@ctWV`uNsi%FSE&S+J2` z$E4f-S)`d?ZZ6YyvI#({me;2qldqnSuGZWWT}s_6hP#4=hz1-=`%8k;OS20 zsN&U-AbZS*ULrwrN+S)XOCQ?mB@`aF#%fQ`V6j@vgzwY*5tuoX;Wdoda7PcOgTs;E z44W6(6AgS)Y^Jm~@j4%Q>aWfyCFWdZ#`Ks-DZsKj{A8kAT$H~$7&Mwwd+Pm=ucF9nm9Bg8jm(h{8$v# z*lS7LN*%|(rRy4>A*mXRo;mBUZmA$2m?V3tVerk6A#Imj+NIn!cUL)_oQlSZ;FD+4_Ah>@IP$to8ZTF|MCzGy7JnW3 z<|=DB_c-;g>1x;+fwb3a4CRmX+GAJ3G!bGWH(Of)FIF%+)dYTQ6un|vP)2Y?$m;d@ ze&G~(z|QZpvK#a5&liVGmIp>0!hx*qx>Nh5N5vu?#PC+?*1LP*FN#H-nj1?up@;cu z2t9Ms8+u2R3AV_3ik#O$<5YG6C@(r4OgxfQge*zbwXf=37YgLZEHjs$7SxdFzJ%$6 z$aYtZ)t&u=Fvydp0={AvxIi>$kqE9aQqKYLP`raSq4Xc4%s!eB%=(sbd30AAWanTM zG%_qS$Un3v@m3yrAWio4r#i5*t7a$kpxSuigaK3wlxYyW&vg%GWq+zY2h zc`UvZt}`$-ux2kCBs>@SX)va6NlOX5xrrD#p8MI53eqBvQlu4%=PkHT?rMS_@~%nQ6#ly3uBxf8oRrGQ}k>+v(FLt{#?-Fi^Uy)TPrpeu}U z@VR=}O-O5j_YiH0`3DmxJ8tp(a9(d0Mk{&XI;k4Hnz zmb<^+EaP6M!8^xReB7)2yg>TNfSL~RxHsRz$Cl>QRJ<*g9JGhq<(3tZyol!H3h(#1A0$9T;&vBZodwA1()g^dwLHo-IbU%!C4!aw?n5A2JX zNg&)_G78VqY{t1r@u^m47u2B!(0|;v%CsHMR08cUdyM z%~PLZHZb0U8Ied&styv~NqRqib|5R4qpez8Yl_j!9u=e`SV%lf3l^J|G9sw#AcNVc zf4<^;w3BfWQWkwdWB9DvJ+Pel8a&eEP5!FfeY+h*eZKhB0KziORq;fQw0oi0GoYjN zFihd_owRe@i#qGD7Y$BXQzy*ogZWB5iVT$m#cwzk1W+yJ*)Y!gVg_i>&9Vxq>$r+? znP#X0Q9ZS_is@#aYmU5$+S`Xjuus!82$XU4|&BX&NrVdG0o9a3I&UzD#?XLM19D(nlcSNu(kab3$TXGVSUR{~qywpZf z9ynZW_sWOn*ks$H*<)g1>Kxl)iZ>(J1sVE_gh-%Dra)nrCG1o@VN*W65*hy4Ec|jm zinKXK`RHR*AmwR)BP>AgJ3V72_G9Gj!2qj6kyy{+w$8?w z_Qt-_3)ymR)mq@anWizVn4DU{uar?Eb6T~3^x3Rnyay%$o8V@A$<}e7vOThIEdZA4 z2YL9LF_tMnw@Z$e=7{IuD-GU7yu7H{JvG7tPn?L3(H*tS6GDMDv^7D#qHSVhW8zs8 z`b~T6gAJ;HAcp(pKfh_MMINcYV_2@`8Q>TH@I3%C+Y1D?O`LpXjLD!*IcwhR8HCww zR97<|cSh!1thDus8D0^MDpRoR$8dc*s?tMS%jzKQNDD2DcPkYF|3nwMg!-bhduIz? z6iTPR)+jj20I#pp1U)2wW>e4tW|C}2gT9oivdhhw{NhtVf4=MQ5h1%qi6O?d-d_0T zk)pd1^n+wCX6ZZU41IMdOnf!M4c%(fcQ%70T_*8Si1^0Cj%39}`T^(q0QLQOv&2Un zT<|_LGj@x{+qO;}T#lV)aq4#U?2$oRbyOoN)A#a8*EwJ<(c%=!+1H>8 zXh$+V`NW72K$^HHsWyTx>V-%sw^X;oZJE|Dqji-(61$48-N|;2*km5kGRbb@&#!=C zMoJCce=@gK6^?kQNn335Mk;rq{HMuj)lMuKPtRiQ&gc{pN?23O+Iej@N(0|u8r^IKVa;x^Z>8|g?GVA%X zpyhM!v$cb7p&}-7g=aia?IZ7f9J)Ki(z=~rO(Pz^jBL;XN0Yh|zmLT7K@CwBy_gki zliADLP5pe13PRrT86UFFdtYJ39!Ywo^8OeDl+--?$%vWl_5CSN-phNOLg2Jw4ozCf zIyCR%OU92u`!mmPe0vF~F9q)Xf}0M4Iucd*1V_YD(G0tA2^g+lv3)S2TR#=ig#VcN{hP}SRsvyuPZI1>uGV}Z@Pr&QE}YUB!ZCH zE}U)t=(f7)p2S!dCc9Qun8Csze>vFoy3^CZW@Aml#cGvThOT0#dtq}R{On=`=$WbB zuZC>ta2>jeEvsZ#mLQkgXxq#>`0WEs-<&yq?%r|de*WCM<8@~*{v8GEvE39S)427C zq)e zF~XoTlvsILXdy2f=sTc0CJ2Y#_?(kw7_mcL(FGTF-c=Yvl_hl>avx5=M~o}<>W{KB zF+58pVFou3CsKcdsnqtnkc)r%^$?QDf+jX>QsBMwyxMBE$;qUPuF25^=Gjco;ipS$w*H}Id*TX2Ll%UgmXepipFQs%e$-Uzx43=pddo04s z&&lC}Iyv0ec?@)&aoobFaBcMP=OeMSnZ3q-4UGZu(^;4ikxWtYY5$>BaldA2)+K2U z=SL^H5a#y;M z?!{PQ&nwc(C6rba>6Wlha$8JN;FY1;rS;-Dw7om;iEh>PkhCQ*f86ErLA90(|i zJ)KM;rGqu>nqZ=WkCbVP{bHrRyD!hcFdx%ekez-Zq-Vm#$|b*@KUiGh3i`K=YZ-hq z4g3A7zfFOtjIB-+v-6Ii$KzHh`9_!Mt@yO%5SpC6pVI7<_-oNJF3X`NdiXLwpI7&# z(_nVyR^Az3AAbYL$6VnAj!C3NVovAU7MJxVFFW|1x)8z)%z}?&V#6$GT7L({I(dps zPqPjSGLo(he_}rE0TIDz_{`|G%wBHa{GBY&5C3_xs4}eUml;~_8di<+8aEE}PFXY5 zI9XCr@f$p;o%X|K$GhhhD}dnsmCtRq(6p@EqR_~lE&^rD6WXR!ViCAWax!)Irpa1 z;*@;sf38XzwzF(Mz~Vkl>1?pz^s1&}OFc(0Xa;mF*78}x1jDyW0jO9?OU_?*T^^8> zG0N%*wtf#()Wafc44TW8F#e>*GNZBH{&Hi@MbfS3Prs-PBP{i4aP=U2(CFpH8e6=i zn0!LKSMCZ@z}DoFcq}z%%_8JfXHqC2!|Wv*?zMDY;gaHt1Sfgb&UGbT*AX&bKXOH) z!e@0PNWJ}@6Qvj2LCZ@Fvg*>KxuzNq)G zuX*`3e**d>ue0*BKLN`H#r(mdb)%L59cE@ac0g>#cc zs3q5>FPz5fCseDfn-gcLrK=pIl&yo!Jk7--&vOt(+DKYAFDvt7l6o@Pr`2em7Ek_{ zqwzwc(;-K5POwDGR)GU{_Z>TEfCo+lUBgV)o(d~@H-;aXW9EmuyF&-8<+|L516MRh zF+PqH--5={<<=Qxnr5c%cb(q9=UnOks$gBR;&}g~FxRC6ad%9IpnJMG>Xac5R z+grTtPaWkVk3u3(5|mP5Ku4NpCSi{ckuYxA^Y6J&s4s3DuMvWQ`YwQGPc|Pe(^8?f zk6vj)w)k}V)|D4B!qpq}+PGmvv&HI=FFe8qUS2;jN|+pZDJM<-qz*-h*30Uufip;Z z3CATLDV8=mhf?877fZMt_z*NdPt>N_@mMB3dOcIslAx_hF8*9_JB@@%ZvDP-?hZVK z#k!`EdEot`?_D`3E0fJ*wyvIIu(HbMno=C4@wEA#%9SJ5r=irNisuGY{dYpThN#Yy zQ!;An#@`^tO`OCMXei73@)a&4n^no>EVj^dO-c)IR;0F$v@H~w_P)K}~A?_>|Xcmy6=?nJxHaQ)8v?^P^Pfxj! zG)w}I{#Mj@mCvjH`y_^K+xUQr(yS47T9UR<%wzh}EHGsHgo5|%j^#u~&plDGz(xDY zxqTnm?da*m`$dMsf;@F=btK_@k3joa(vv@hf;)q3}zJNj+ zkMilxjl|o9-)Jgg_7X?x)Zp6u45R+KU@tm6lX!#zMFMZ*b{-xIWbx}TbA{8E)EBc7 ztBJoflz#ks2@wI0zy{Nmhg_T#eBKuo=)a3V5%5 z+XMmQm>+ybO?YR!$=zOiA^e$Ai{DOD3S`BfAZ==XhaSVqkFIFDfkbgxS;R% z&!u+L3RYP2TF8egbd&3UJ}((LP&V1bOby!h?d*ODKf9uVC#tMo)HDw7oxZaqzp$&; zB_WT@H9eGuWxxA+adMwG8G&3-w5RxdRWrEAZ(tP{B>nXP{=C zf#g-DZrMPKg3rhG(hebhJGHIvq`mm*wQZSx1<8E6=DDNY~+EZK0^70;C!S_}=FkY#VD_vNyPTf9HXF31ZJh=eswHXg}v zwlQ%aLs`RJ<#Ym2L}>}yXlzs-Bb-LbtkeY9%<;X5VQG-dFd4kMSS0j_J2%;-2y!C% zVWZ?5c%1SJDvYdq1G;eJ1}p(X0pe%$C^wW&_T4uvc*ReAEIA+lazt7+w_jK}bs!+2 zv?&c}_F)Q_Xpm#O*>HO~_c8qP`gnhVichcyTN#?!VLmdc*( zuNl@|TXapz=)^Q3~4#9n4YqWasXAf*7*L$owRO#|8+j>!28&soGUoX zo-*Qh)nIpPea35jGAEDZAH87k`_QcXpKp;3G184p5l!dGfKWiemE!(BnbZ!O_A84f z?l`5BB5^zh8DyI>8Cw4i+g!5CNry;ijS}SE(_+~Ml-4)<&8(c}A^$=9`gjL0YK@WBa0`2e`q+n+)w%g0Y@88RF*$U8m6N8A-&{nX^fE|!yA6a#!Ht#g7!!AR z+XXjtyb{Gv0R4S*m}6T{ho5U^h-IvaztVmO9OGtXUGTzO%(0g$N3dFdz#_t|QXM`& zBnrs(PCDG#zxCG$70$x)N97;lX6qype}j?cRV_sTWtj>qA7>kv&%Y`OTK@ui)d33Y zcePVQ*O?(5jejB`w{zU%u|$s}*elZ9MM?g1#OS3OL}-RssuxM_6n)!wE#f(v1Apax zUgB|-jzXbpDRL+m!=qyf;YE3&ksW!FQnk8JWHHgqipx3v0AVdVa`XT)=irY2B?UD! zek4`jz_*k-6e7muAs<@#gwkJ1<8ky1$u1d&s4_^NAxZpo%$WSxpVvz}#*>1i$w#FN zsrBE^!({z$Pss%wl`z*rG<|w^tx8l&KD0`0iYj8k1Z3x#U+~mXs%Be z&%mzT&pjhzK@u$=;m_~*KCWe;N%-Ah&Yu4bV^q22GxIrI_vLOW^l`w@?D2a2%wBcW zZtcp6^?b|bfrrn4#>!RoD308jCG_Fo))FNlmjg9gbWm4Btn3p7cZXZ|!5x5b<;p)f zR2j(y05@aUuA<*?l|%cja5^>$<0f(N`Y`F%GRz1pz9}TjzzVvB4F3>~ahINfWKNKA zJMli|H!$~L1vW*^To`TY3+0 zZdciu$m3zt{6 z9x6tSVRJEeK@uTZpKS2JR(!i`ONUjyfxH(mCEc9XkeuPbm>p_hG&&0Xz2yyNN}Xjf zkcek7mkIjKcgL2&?Cz@q0&1Ihd#SD{nTb2e|Kdd-=jvlVa749qz4+`__rmq`{)514 znHpdxq<6_%zcEGpo(X_s1UPISX!UsVm%oBd+HNu=aR`Ugc*D4E>65J5A@VkXTH8T+ zAurLvKGUbb1>V0Z5pKs?U65sclyn^Wj4L}WY^AWWUsh1qmt)!~Pu(x0;x{#W53|0X1xF$wykSe9$_cV4rcg}F_wksdKaMQmy~}I2s^&(;Q&&57 zD0j#2uwTXG=V8$ux+H#h03K>vg#KU+=Wc=nf)*DO|8fspMRMCXMxpnTA4D}|U&hD! zXO2v0zk@mE?+{EQg3s6n9kah+?Ep0Rlf%vqpwvjGckJf>G^*HtYG#xPDV*=++}Q~t zt8!F4BfLu8Sw7;oCuJxjVAweSxE{osTRp+=t$OeI{HBKKBl=i2xh}m!{E9n^lPP`6 z{8_p8_|a?wC5UydazfqgHA2G4n2ww&Jkm*)%9yjNlrF5oQewH`O1W=iqP9}qS?y}0 zt?_w9EVM4(h8boF@skLPvzm^sA8 zO2OPp+>;nbdj37~M@CWFy*7r&twak}^7!{8jPq4q2^r+g#5*J9fTjN7 z;33MVxsln!NBt77)I-)bHL7qoQk7$1A}4C>>kRbSbx}%d>~Wdr@PmZeLCm;~cEVcWmNtF!?*| zMSqO`U}N3!&!7|3%+xX8a`)ufK>d7C&64P&pYd~tralXO4h9}OtC&_ka*ldC27y;q$tMIR# zH>wR?xON@y2~sy09S-omC2vh;{pQZZltC>Flv`DW%N2Q_`QnKu{g^O=ECCQ^q${FG z4ghYx3ASxh5iu7Da~n?Vgk@%@Jbb4Z4mOrt+K;<)f~Xn95@_i5!h$iwhivHyu~4Z$ zwvgX*Mx$m+z1D2=UKdhz1-X08Ct?ne9@^;r&WeskxGV<&)ZVOHLZ~CKUT}&EXz1iW zUm(fYPCfjmuU4}OkSS@gz?3DsjDj}$PJx;7U6<;(*dxjJy#@XEbhb2eACNF z>1UQctT{RbKB}{X_34(+%S>&b14JGN$9jg}ZY3W_Akg_;Rl8huXgzhPtlWC?-1N&- zL+KHIl0^1H09(u~9S};#s6P*q%}L%%T@vJ_?8|wbBl%uGR6H~e?Jp`!6iJUJrZ=K? z5qdXG2H&Zjk{H2I01Dy13fk*W_r=5$Q17atI>nKIgXh5bot^5HLd~1lDnYYKmHaAsGWqwWkPwe}02Y#-!4r^o6+4;!nPtHJ zi}mu&O09A$Y0k1BU#+PjE29L^s+i(H1JzhWOj~&(j#PC*L-N!&m|h5Qi~Z43RSNzL z*iBH_QXhP94pOx)@rIu?2R@>H?kpLbje7?iGm)}MsZG|}-w8WnBO61EG+f7^e ziAph+t=gr;eqJJ?OB;TxaG8jtr*_N5-P(iXKD;6((fXG^=s{|S#++b9#@aCp#&Mm; z=7e=;my-1$ZmUb5RbCnWRp2GYS`*!54Y$;v#gA;*ODW!AsaNCkqUej7<}V;N!q!VSYF4Rr<|=4hNd*k)6t&9JTDAn*Lt_PPe>6-QpD{e=FgfG z>5wT|k=RXRva}RTKHG7q1NOAYupN;q zjhHL7#z+DyH+GEXMi#t*-tWz=+82@;4Kf)`!}0(q@Q1Pv+LpT`yrWE-%EeVEmVWf0 z0A|?TkHgA7q*87HRTv5SZ`A_on}4KgvG_=is(7KFXpx(TSX`^$R}~RySbLjp$y=Fz zUhh-4dIzo;z!fnU1ApC4dtYHF&0RoysHq-LYG^%b zE24v+sgc4wTK0!0%^`yZ(2uC(QUtorWt&vI;#|yUtRGVy;FNh1z93-UN67?YEPz|!)+La?Y? z2fMo@t>^xTTK7aJ*FP2S-zq-eyiBjXuv)_I8WKIZV!cdzM|7>wIHDaieYOZnt_16z z?Tg6d=+7;;kxJ$Dyx(zPL<}{JyzI(=mKav>Se>2jnw*87f1JDr$kK)7$T)HZz>FV;K zHRLpu2YEMmbx;uYGQYTbnw!l3t_!ZYJ4IebXhM1gi0pH#nuv5Bn5XVy^@zu5ec@pt zJc=Kq-+rTJM- zgAG-`*kwFOn?4sj?C<*%jf0t)=ZiryHFqSkXoQX2y)Ci$1mZTgoE@P<^I#iSY2e6X|I#W+nib=K#VAu8&x)mAfUQ8-5L>KJ-7K7)P`%K0q) z5m2gOUb%9%Hy(vtcKu+#fz>D=_OWh#dHQ7xUovj?FQgqxDIynICmSiUU2K;C;Y z+7xbjU4)HtlXxiUpuNPbRJldz*!v+(dlkP~i>D6=_N_(&a6-BDQ!}`G#+0!gGsaXxMQJ)5gePJ{RTDCz##5 z3-F!3UAKHE*K98L?dZv$_b^6~C^BG?=5eDe4Qy;1^O%xaj-6`5If*a7v&fpdvIc+W zL2Hp)@sT}^Q)zXn*-y|0Giet_6+rS@e7yZ}nhQs%=YEN}u8xS565?&)Mkn^%PUsLP z9S@eM;_i6)+(h}NW<^pj16;sf^yzM8mCi7sSo-+X$>TR9Vk{yaB!|(%L(@wvO;vY+ zqWO-`00Q9cPoiFPo(-ODJF+S1N-@+Hxwd6BhXLG4!d@?7&X?WEfLC$$iv8 zXDwyV4wk(HGUN8L>TD5|Id5tz#e~rqiukZOJrTsk&4d(i;UHrIi+Hl*tdw$?#3rUq$~XWq|6D=03~oaUHxJoN$Ti_Y$Q2UR_P^N2}>VeX`_99 zZ<~^7<4V{mtq-C1Ux-Wb0UoZaw^Lqg+L&XUg95X5i7#4B16gDH+pTw~Jl;OQelxj3 zXcjG%+BoEiQ{Wfk@jv-=& zK6LpxB4S{`f;Ra1lZ0UqlR=xevS+jp`aunp;($>!S3|=He26^9V;h>u!#lMiaffg5 z5Hng_hc)DcFMxl5v(zSUX`MdK(!N9J?Aa-LG1!JA)rprmx{Ns=%vpsTuW`y!*2h!` z1rFFgF^2g!XAMHvsa{OZ?5{6-;6$nrd_n<*DyC~8VQ$Hq(NNO2(A+oYCcF?A;)2zz z;M!<#%UTWuejVyxa$Ai)PPO-zxV_p|8S0ot=imEPnB5(a5SC2qj9UKd@J-YUsMw!< zPo|XyA0=f+FPiKdf>(54Z+Rb;89v5Mv*yJmZAx&YBW>J9o%@54%k$Zj^PiI&L989s zO=qoVWUt=(9#G}593M1{vuV`=P#o1Ae`$EsX)MYl3G=VXS_^X!YR~37hwvIZQ!afy z6Eiz-cHIyQ&%lh{>^R4RYmkfm?8>H~=e1we{icn1T!Q5zvS|R=pHyabk1-jmn>TDb z8c^6s{752Mi_`JM2)fE&gULU0l1-b-y{j*UKj+lG{CZbk?#{di;CMm=F`PW6+TxpB zmdVbkFJjz?*D*1+B$B{uI7>@3C&T^GL*qBr!^)_*G+Fa+ER4571;^n!486O5EA!sk zemnf%AMY;ZwbeJ44{Xf^_sN6woLBzzdcGB(x}xlUVa#I12FcbrIw;_g7-*c^MaU6E zK}gUcHY*xbu3mAZuCZ!~uXfwh%m(aho|9xwTncS0j?4``*FC$coDcWh%;{m4HW?i{DvSDY+T*pa<=4^x`wU$*s^BY(XD>okCE9gl=&&(EytTh}=8B-@yI z$Nmu^T$P;`D0zuJ>C27$5NFd$(=s0Jh`G_SBxf~RtcFbryu)asi8&_U4qFul!XD4m z>!KrAtfO|)XyUHh3@{~NRV6V-WOtQ6TOCu9&N@TMtiX8%R-fo~$5IhV z7c2I~eJlS(1`Btww;2mXt9^j_89_?DlhD} z4)l4?VRvq&yHY^W8i%7}-MvN4)C}TPoUpAb8-QhV((8^#OTinQ08&~BWCX!B{igbapf0JA%B2j*Z1ZAl~2cnfZ`(?*%bj`Wbe7& zlZnBK*Eae^*ss?ZKN?LKwB^M?e*M*Nnc(?S0i)1v^67tut(PyzPWr-b2U6D6ZN9*klS91;RDb||!022Q^R8x=<0Q|bY>!yoV2r znxaGwrXjvQfkn<+-++$@9nByaV>|`K?vboOgo)nfwpU1n5U)7V8u#6=G3HXq;_%?Y zd0d0~i)OQR+yU^zNSIHI2__##2|5`k{9Om><0fuuhqyVOn4f5A5^`?mU{je<<5r*_6nnuZ^^H>}m3v~+(*5s|sS9X3htz%q=WpYWuzk900C;my~Ukn z5~sdArvNL#jBxDeHAcHNk8lv%dBNA*^e)HuOF|$_KqVaQ6MGBd1tJsCti_p_=H+Q# z1L6SEpa>9O#dfId%MXCyLGr~&1uEm_9Oq3>uNO`jb-wRTA2A&vQADYHcf4&L5Aq>C zG%ywVZz4%APVHBGaa0($UwR&}?PA9Uyt&&RKzTemTc-#Rh!(@5+ zV%9)flQTP?d4aRKGBLutYH4!jXO=)mmxoV2-ir7X7t2GVVy3a6|4Y4(6*tX5u+L$> zdj2}OE0#fg>S=vzpI(j4NLPJrvFBCcfjNGh|FPlLBsMaM z5Ww3`P1Mq=K*ONfLd3Y*Wvh5w2c9IyYK-Wui!nyQAG<~gD(M`nE{ z=bn$7)_U{QygayDGc_Q-b-eHpMX1PKO*3)Ui_xr53JIJRyfcbxLsJ*!^8WB-tdn-B+Q!&tNt104> z2Pu+!+7uu_nsoJjOCL{HfM6aI>6;u!evBy~U`3K}J#1pwC6EQ)G5?87{HrpiB~Ugn z&9Y+Jy#^u0-d|j%_%^ilm?<7jAENgaCt1lPf`Lnxj1zIh5N$D+KEe6tB>(RfiwVP^ zz12xcqn8#5fZzr75#fy8CrdPIt$N%aAu^JdPN%0IT>8qoOSW188LDOpLGWJs2C|)w zxPQ*ND>MKOSDQfoP3lRUHp4!ts*(ml@PHNJ$X?NtTVVCh{c!rIV(joBa#{!7%Mgj^ zkGqxRpa_%kA-jj9QScSaH`ijOQSaS=aodeXSV{@yF_?*`KeL}m6D^&be)N~!Co|fcyw96ToAI%(`j54RB(Fk7LflMq6%lrRghKD@_@^P4>PK6< zSgS`2Xaks2D<#gM)&`RR<73p!aVfqy+2I9^@M}3K<+5?#2rCslxu+73e4zXTVoJBt zp9y!zA+c!n>waXS4QbTV+p|%=QS)Ebmj_3(v<0v%aTjM*j zEulN@d&oDb4ml@0kNXyGX1um?L9y-2=c_bNT%%z>3wPBY^Iug;KHElEn)XjOwWyFe zvP0>xN_l>mhVKCxRx(EZk%F4kGh9~+WY@+E)&GzBH`UV82_zLI|Bp#U20sW8V{PcI zr=lfh8goEE^+j2mFivojq>q#xG&Z!yK9E!F1Ng{_0y z6VW{xIk}Y{#y}j2 zfMM3SqFS{2IHsuD-Nx-|?8fQErT!bDUQp=MbopRcP z*ur&X6RT=vgut3 zg{O;H%_{%SDQN>ar7bN|SOas_y=FU2ZF-R*=QMd6Hi9J7F0jE|R0Zp7gUTp{{Pvl= zah%zOO>2V`!rE!vSK5#~)@}kM+d`){e2~|FdlPB)@8lEoR2|JAD$$W6Qws$Hc}OkUeCo zy0CjmOD0;$Kv|L@74|oHXkRke8%2Uge{1<9-&q!0wto4{^XOMf;J#33JWmN*%tsd) zw*V8|_mU=_o2D;C+q}vSRIvscGA0GTb2LYY7_Ph(5=4-qo|*ik9A+%E{zCj_zdl?1 zQCK(P;Yub|?rFfkz1%kY_BmUS3o|A|pSx<$i{Vc%EqMoMwi9i5fsFz_(tLr;vF*xy z$7%!4d-dJbCv)IDry7(Ht z@X?@q&V_N!9DFAE@^VG)y5RHKa*X2?aOjxz$-Lj5-jzgbo&98b&3EXGF+i-Z`+WDy zo&pXH?~|12yNf}Q#*S^h74D<4w?3FLm@c|C@JrM__8Q-=?ce6{|MrD7G$<AdPL_86w4cKtb@?rsXV5wD!l*g%?2S5q zl{l%V<$>HKN)CPFSTiX0;MK%CRW^((oDtDUW6Rs)X^*zdZl;Yr$DdhOw4MlG>@?&| zc`f+wggae?PIrVj$xj{!r=HAM#><+=JtLLZozrx=7n(E+Y?WD3zBc7GyQRj4*@C9pVDl;;v_}lv143T3!$KUg7w&22 zEu_hsmZ2Jg}indFD9`$Io99d+_`f|gUe}sHMaVM^UZ0$ zo@y1-|NL4FQp_h;0aJxeX*ZkNfp*!)pI*JA;(0DIwEm$a+s^bLS|8naEH0Mb9X(#E zpK)`Zb5lhwvmZn%BE8pgbWiz#Y{c{Mrq0VxHA8k9i@6~r!X-HYO`jL8cJ=L!!UPbV zEr-b!9#*e1UBe}Tv$wKjhs66+_}uN!F-X0UR~SgS>!;I#f!*XPv{qr0#NxB&x9fV^ za{=I#x&rONoP> zZZ0#J|I#RR-N35~}xoSMGMoOP1cGmHag7OS}TQ0b>RHq1?o#~%$| z%%4J!u20(+NX|#%1AZ{N4+s@4-QEJgL3@)j#~-xo4mJ;tTlf4cC`;+RE!x~n+7LcP z**Jgi17#$``V3xG_J7!2{>KmWzx~ks2R4Tls-^v_@g2mUu^gxQ^C7Qv|C6acC&cyK zUcjq>GZahuTP>-t>lF+|ffq9i^!|3I{|{$h9TwHrzO4uXDxnCdw16nBD1(52A|eta zAf2L=bTg!cpiRnfCkj=Y_#&zY#9e}~a zfFqN)UM#p)eXR8bwYE|5L)hLRyLbo3tw1;ET9=Ir@wCBCItaB+k-_bf< zwv_9)Xj?--9d(RUpnAYD&!F`}BHW^CDYpeRV2Cukbm`K=8yU9gW>FSiMf)pdLkt(e zS81;Fh8`iletO(z@6t6+wx44Y{+^%kfBA5Z+Cyr9_t2Y7xF*(N6v5xf*Dl;8e7M)n zAUFnDok?a%!>|jw^&Si}y6U!*2glIYuY-eZSSGKET>`Iglow5jwSJA*`lw^(I}awm z6b_l^tuGj?RcsHohfax{uzDeNVFXMwF5qDbSNAdID@b-WE;2%-_!8FQ!uq)o6@({Z^ zgc3QJAd7LRM8_R7M6_@fMFqJ`G0%Jmm1EyiEwxQUto!Mh$_Y&1=Q*x_Q;563fcff+ zdc_uR)Y8*9SYXn+KX&MbUbb)_GLMiX9N|1b0_>Z~33BCp{fXXb?)?nh9TU0-e#sn0 znW)hCSsMAleb}n8CLaU-ZPaD_7(=!Q_5__IG9j>a$1SeXeIpcU>IQb`JT}Lp!Ldh4 zMNJf-q=WS2BE{gABvENc)qE07O%-k{E5`kB#~l`_?PQ!5ZJr8&l4+peLTYovvysd0 z8%=B)_Lw^S!>tc&x0+&|7Sd)acPg@UXk8B>2PKT@2(mW=)mBPEDw(%iW56q?23wcZ z?XsLlTH0QXEg6dLN8en^t}_kJERnpmAa5_%CNO!*W;$G@G#NZmS1Wkk`tAM1qg>@{ zwI^F?M%R2!Aluz>=$Y^=m!!qns`b9^6j7{k8SVfRMuKh@bN7DlZZ`qFEqZLEQgplj zNvrcHnjX~I9z2K>t;8{kyPsO`jI8X&3P=de4R&a{azoI$RQ{~un!=%AmHS?cPX3q5 z1G@}@NFBG5xH8~+`kkf=+C}#JckB*33Id*N0>ggWG3m>5hS4J$4uH`RN6RPvp8+NR z7X$n2ccK=)14X0A_Emt!jjH^R#qaI4yPG3NM<^pi)Ua3JhzG_`EaoO3lQ~E2)sVT3 z^D8B4!{!(7IlE%#Bt@@5FA|ofN$ntYTK`Nv#1>pPYMc_cl&sfpmo6d-K)obAuT8K; zc&DV@&VGbur2GI{wp^wQF&TE^`)s0nQZ~TVh4DAokuSNux zcZkOrtiLeLIFIeeb5w#d!2If6wE2SU1r+q~a-fJ?$_E}X6cNuEV5x00!XV(+aqA}VjkY%**Zzx+jmuo+@j@i9o!F=q$lo5H`nalb`!$hKI(F?T~u`gaSplZ(QTe=n@S6&xwZs=_&;!7B7d z+%SvkE9vUDsMu28S0OTeveynMK|qo6R@T)y9o4UPX*~J~@cb<2jSBbjMV)6~`}ZK0 zPX{pG%^t;<$MOfR(&LJ(7{5Hcd9j`XK73L4!C|Ouk%kChqo%6%(kL?bGc|Wu76Gize4rd*la^s>FnW=u!;HqP%zk-L0n5y=cfU_(GF|pYl+#ynz3#dVh$MJKgw!|8Qr`zs}Ucjoo2!Q zMz&YuDPPf|-|iG4fyPwRp$5O%V0U*B7AB;722Hp;_Tx4o|M+B}X2lRu_b_?ey`7R?&_Rc3Y(_*WUpE zd;a!#mU|h(4;KXB_764)Y%nKYD3};MfHKiR9!#-uz%%c~iBM|u555I{5qH@-9gcx8 z^HT;~s0wh(xaA8?Gy~nHq=Ivd8n(cJo*#I)l#WfYT2yRp7e=F+yO#A6%UrDsMI^Lz za$of?m809*d@I+HC%9=^0vhNlL6k*nGxy$iP%l7|pDL!rX4a?GU!*{T|Jl&SN$%q6 zIf{k#4J0qaj3+vc>6))6`u3ua}qq)bg&C5 z(p%4#N@nA?&-LsqNMhF|@K|_-h}MYFc~9b_S$`ep{r@?Yj6Pmy>0|AhxrX7%Akk5m z89rp%^oN+?=`^$f?qDx-bnJukW`|@96gKqiS@4?=8v#0O+QFf{eu5b;;?RSe`BvK6 zYBXm-RRyB{R^0w-5c@|k?IcLx8<9=JdBgtgG2C`hfce)fXY58~)pf{`nJN`C3F+q* zR2gfDTKmk}3;w_=ns zR&VqkVzLget-7u{w@yduXdCmuufYUYH^R0Rv;Y`xu-NU_y$tahj^Z;6*RxToP>DS2-(H_W~5F5b{&abGK>N4T-wh> z#}!~5Lnx<4Au7PCPs`yv_Vh{Z(lmFZ>vm2u?*1h3*!1bppqISoM_{OSPbq`fP#pKa z{>S^7SO(*RSSy*1&_KTmLCdkd|_?{0r|pf+r$k?QH;EaC_1|Rdb?u- zpQhIC%x50A9InokgvG9o^$i$xBR@)XuL;gC3do=-Y~-ukxCdeFiylDOAlag1vs2n^ zp8_tNvu(Vqo(9kEE8a|k$Mmr+O@`+VsKXxa!!yV7jYIR=Z|!KJfn%Ut@yA8ITY0sd zc^lWTngTa!q*$x7e+Jkhk{^?Z=bE8a)(Z{js9fD(Xq^BC%7^JA+$7$`NyGHOiXIjr zzKGL*d%Zu%TeX;3rjA=E1`#2ScT(2EDU?p*=G=_iVNYSQ!VEdi9P%Y$IOvGa0Ppi+ zzvT#c9dd<}-F()?{D9S+Ws|ycj0+Uz$laViyNGMi!Q~>Fq_w~KU_fwVIttvohS0(3 zmnQWvu=sot&}Q_f>FfQ^I{|i%*NjLengq?5Op{^Pm0|w6FGXc?D~ZRoseChur(niU zd-EcCE6bfuNh%$F`LOD6U$l?k&St$Ue)~EayXro+T{iA{5_Oc2cHCty1}r@W-&`~@ z#OwIv7;brNDTwf4Td#p364*RAGH|7V(YrVaq#%{A@zk7uuq*`b0!LUpeDKc6f=mx`AqLbll~EU zh~p`m*9o7)0WQsdxWRL%ms!5?Kw~p@R=CjxgfIE4WTFR_=l~0sW3sJY^qY)3mGiBY z`zt1G!e|+lTWT~jc6B@7KKp6W%35fn-}XhpD%S%to%+F|dE-EJWf6WFg?STvIE0d zni*`0sg89M>pj@2gDXkdV_ex7Hgm)=V{)ZpXLKgFg1VsOR>77!|{^sOTSd96U z=+V(blVf~9@G%eRYn_ITx#UTt#7v2T5F~nkcvgfpY}fCox$ z3I0-HoWTk}ccHTRR-k(bCDP!aeia29ph!jUp=V(?7}WMoOVU1uV%Wt)K+Rqv zz?aO&+JU_R!6K+NG=7sMrVHY1HWM$X-@bbF>O^}QaCEQ(9)RTyehg3NbY4O+mR?J3 zD>l4d6*7EgV#~S%|5)Wqz^{tol^Z=i{&N<<|B54BufIzY*hL)+U~h8hG`MTCY1wjN zKy{noA}1PbPhYR-_)#+6K24hdS9k1IUkt>OGRHns>u3(1T4BFvff?k@f_I9n*HbLt zFMXu}v+?B_naiN5+)9EsS1qJ}T$({=YoX}v>kg7-W0ty_*YVMbK_Y{(ww+QrOlBhy zTpB2P=cUJ)72E12i?Qq0wTghh@WtuYkKJW3zXR9mTG)kxOhmI{yk4Ut4vn+` z_KfOuECOBX18)1vTG4|!Pn2jXF+ExQkUq6+^Icpc6aHG+#}GUhB()0#1g=m(u-$r- z@6sReO^>vDx=X-(0&2d;@(r-c$53kGMC8Y;%E5>yUOO728&pMCwP-|_jp2M?X?M&- zT@wl_$AC=xY58!L$9>Q5h8^c2$P4x}NWzZ#=C}dsV@30Y0fY)MkRQVFZe!DxJ}w#9 zxt=Jh)L!$Xj^c5dIBBr{?Q`$;VIthNk7VKT2y|*sjl9GHDW;tyCFX-&UjAXHY(K-n zIAQB!UpnwgKi55*E&JT4;qst)x64_r;WY6M@;L$O6pL)L6L2}SY=Mbxw zZzP{qVjckrD$=5Ni$TbOs#vk(88R<5kLp9X&51KCSl?RC??QoC(s-G=VoqY_qYgIx zV;2l)HOS>4?#(8&V}NyCdEkU-D-IBb;10HiM(wJ-RttX-IvpL%G3tXa7n3YdJd2R9 z&I6fzrA_0PKQj4b@^kw>dw{b|-h)-`^NeWj?>35#&iUAkFlNC0caZlpVn%pM=Pu@< z+l0!@VZ)YQBbNTMbL#7mFfP0xA9PJRg7Ye*L@z1kL%=n_W_&nKC~E1tC96X-18GDN z{6UKpGay3)$?=JB{Jwx$OeYDpbdR2=RSo933dk_TQ}4Mmt`%gtw#GWIl+1Wj$nj#P zoDO>$eGntM*q4&RwVH_O@U|_$8m!S%Y@!1qvYIeqXu*nT;(!DP6t^_BV$1gf*^j+i z7Bzm8@C_433)lTuye#mV0+vgY{47F#{^h-x_ z-zXY&GMR7MO-&3rLX2|31ATtz*{Jj2hyOp@|Hc2Ue#FMRO@cl#1?9rvYCAA1?b`pz9sy{(Hz>w=Yb-QI^bB{c_8Y9t~bJVFy^TiaVM6ZXE|rRe>ffPRD*gAJbbY zC$jexc=7V%28jz~__kdohYL>tZ!3e)ssYQtP3GKRw?sw?A^Q;TCgG9NJso${*X_=f z#6_Ql!oE_wMSt_>eTPZqVPhasqx4KL4pgm1MwU;pa=VS3%@Upa+6Q2bb8OXRRwz@% z1!(kr3}JI*=QnRGkf5E0=YuXRXhZ&OIOz5P24Yq-lv$21IGi9D76BsN%@_;F&rw5% zUJKo7`bUSJ{Y(^aGAHi9_7^Cc>?qcO!lTi04<>06lTkEUTBv=}j=9LZSY$d(@!8Ov zV#U~cphMKH?f%3^haO}u)pLpvfM0b78nkA*?|b-*zLdH}Y1kZtW%bci5F<;w0~ng; zzm|9hnd#ak3j?nfNt8i$x`6|k#guICu`BoSuH1?Yv3zNQpR z+JRtXXs=OmPCnB;K+N4}9iVpyP6>}rLubNguW)?=ek0t^e6DZnT;;+>AAe~)*z}pV zi#oajAb}Lu3NidVKZxOH?im#RubvtDeO^jpnL!@}19)kQChYW>bqg1mgAO>D$STND zLsFt!-kh^*=PtwHuH>TR{xSfW8MmD>Bt&TPb#%f@Z)(ACz@=0baf5ue%Kp?I;1`2T zw#?Wb6&*8Jw@D?L(^s!&$ON6Cl9W3mmLvwHH*xRDTvJOx$U3y(I@u5~v(ds+1h45! zsW(}|czI_O4yG4aeDW|ato-(xV=38TgeXfBm_2O2O0{bKQHrSY*Obl0ncWeGv5_U< zX2aL)>TpQhCR-Vk_995V%sz;*q5@}xjz6Bpytu}+3cEc6RBX#9GDRcyF$2I8nK}x( zm$uEe#R;`p-lPJ||LjG;{1uI^ojdWrg9ZQ{1w2K)x&u!A_IxSbhSLJTUoGn#`&vJ` zQ@K-8^^JG(jQ$<4)Kb95ix5-L1pfgt9U=CA19mxxoLeij+VrC3Pm;tvJ>3#HRCQ+j z#I6qzlzX8JoEmIeChAWFzVQ= z)d}|P5eFSK2ot&?zL@F%9kJM;n~X!Zefl%Rf>tQ)I*tDe{N&RfkTxc~ofE_*QNAoeW5yK0W1vkUyd|W`}$3a0ogj}^aTZHkS7)14T$Yi^W za?i2EPl2)V~s$ zz#mAQc#!ocmlMOAhtMOAQseK3vtL|MiKxUIMIxCpPK*92?CW2ceAZTdsz`8w7qX>;L82e4&tsL?QH* zf8HOonr@vb&yW7e16O}y152OhMoj(_@%()taQ5M-_6Y%29rE&b{N zu1B2>aw4X^S!24r7X7+lR`yfyvk||=s9vGzfWq}}LccL<4`XrNw%D5nE5kRzT&_I> zJ-v7}-m8zPIHSm&SMr-4UpqF9&29S7p7U#%dFKw*LV;ez9nsm%`#Lo^A zC8)^tU`f>i74ide8su~fL9CbZBtx@c@RNE$11J`I$gP@P&8aEy_|i-}A9~Y$gDhFS z?YA{yo6PUrZ(4>5^FHoskTvi+uow~CTl8G)i^L_2hp_sgdZnq_F)3|mw}DNKG5_DV z&C)Yb3()kP%0>U&TR-n!?2N|%I7HLUJqDjgKGGszoTPrs>PP)HqfmXW8S=)zEKSm# zIip%;AE#E}6leNQXEsFhzsj@M+y+eFn$7&?h%4bKPD9(GGV8^C-YeM+PzVAI7Q(Gb(w<65=OjfAZ-3FF4 z(5T<6f*$^*?>(%I2EKNGU-%eB6c^1*-zPQ0m13J0)7PQdVy0l2%0w-}F`FlnzdNGQ z;<_WHTQkthnHmCOOcjPkpQM3!-{A1Zol%~cn~Rarzc?e{;E0XkUt0WrIB$h+O8$I4 zU1F3E3HqaQM$X}Q8ZnPr?icfj+09=cH0UDt#bB+A6)djv$EY{8J7rwJbIZgk&D{NY zy??G6mg3wF)RMpIPC6g_b6drTP@5!ZoG6Yf+#jenhRF;RTwW}iLE zTq|uIJ}Xdg6}gosH}~S_rT%$N3{HAjjY1=X2XfP+*nV8mKe}3Gly~(|ZC<~n@TxSG z#SMc}+Q0qsKQC}!SMZE#fm22aNHdtt{aDxuTbaZ&e5;X4Z?gaO+kgMa=<#DhNA1&W z;-60<<^WCTIPf8%X@v&%d#p8WjPwyW|9W$(P1(jpZ!X3QnVNt;Ah-!jxek9uN>I(O zH=_Cj#kodkU6Gr{20wi)FCQ_gn*?2|rhRzY61eGJDAE?L^l+wqwbSC{Ev53eb^PN4Zws;i z_hRDqjnAJq4@I44kE%-KIIO#0Fy8#?92!VaGfMfX9rLQ6oRadSy?pmXh2l`v6m@EZKV+i-b@Pm|>lifG;`|O^7ti*q9 z<5dY=lTp$ErH7ZJqvsW}%$K8{X{0t-mUEtEO8)h^IYNB*!MogsmdjB=n^L@@f1gxU z{AVG$mF4A{{HDXp++)S^$E2jBNd1oT{gz&e@#3+vCgmEVmE)4sx;Uj|{K+B_fsq8x z=`zeeIrP8s|EZTfnlJDq>Q*Mey?6e;Fh1+UP<-^>UKxI%T3T9KNKdx;S2w=DvEpwN z`}yG_)0;B=va^(w#=Z!F)<*GhM>Sz#*23R>0j#IKf22Gw19BBlL-xn!U4_59L_9OG z@uJ6%gr++@KfgQ5u>JV&=tn)7M{VuJfn2+?(H`cIp5d2^d`&SVBMn!myPzYYgDG&elO@*BZkXPwR>5ITJlAIcygK&6ANVzNy1 zMX%!|uT#}--n_Z7J!BzmZJm4S9Iw~~e$#UuSd8X0dzSw6 z#*|*Fs&jik{jX1SDlz^ivt*j?EROm4oQgW8#+GNbG>szvevrQqUd)u@<+h_*!O0ni zmEkKuXNW;X^GUj4w^XELWWu~i1@CWz(k!fEidR^Sz`ttlSTZ_`Z zq;wz5h6;t6Gjv_K9cx$L^*nr`%UV+BsTAT!9MJzf%bpRI8j-Hr=E zR!J|@Drh`2OqN7>zkM+(ApsKWJIgC)|m^i0eCq`F-hjQ9l7^pg_dO6#4AlHaLNFVz1 zV&N32S3LJxOArd0nxBTJ7q3L_sVlVwQ~vG@ce~(8NPG?{V}(EKNoNhnUHEM4dsCaL zE66wC*VgKB;Wp#oq<+t(D0fi2t3UVk{o`9+C!7yPG4tFI345*p-U@=x% z`ar<5?tCnh3*N+unT#`pq_meo<&FP!)BdloUq?^IpO}#hq`d+}f~=Kx<{;sVKuXd; zZ3d8h-mVD{9X{cv&%u|)MwGpDT0IpTR)p8H2Ub^ z%DUXIjfv4imGSvHo4@$uMty(r&(;9vI7R6S1AfhXo*nlv3W`+n)5Q&%WQ7|dU6n;o z7dN_tI?d**bz?bOvq<-dkC~+pKql|%%?WP`y$xchKGYg0e)EL@9`~595ujD?TY)+c zC#q|Rji*qJ$8vtvAx@*zVbxrxu~zCjK04@`(4IgY?}OK zq*bb;vEj5ZYs7DuIm<&O{DhmNN?e|Mpz}r9pe;5HQL)}YbIo$R`j&yp!-tHZ+e2|k zY36=oAYBqLcuIQ@t%7IaoH~`7kzGLPe>W`*stv8TdQ=v-B`-#`x3=~yj}(|h+Aj8j z(`2l`bbNYxdIR*y7#q5Ug@#iO@W)wR*8%z*qMD0xv{Zvotm@$c{Y97*+(qTPkJ918yjP8>OXyo z&zW3`k9OW59an2Zit0e1Z&}co=j}}Ly|?hLm)LdD?7NA#D7g}oHxm=Ev#!58-%8?) z+70i&CB-SByH!vA^w9ri#rmgT&ImjzAew;-EnkloaV?!ek9(1w@NS?aiQzX30crmG z;6yK8#>1Px?RC6qt|RfY^wdH1F|sdFSD$R&x}2Du{ScG}Xzzh<<*SaepYmKwQ%q@w zl7SNQlb~{l+mDJXgoK9QD5o^s=tSPc2hnHUmpP;uW?N&}K$#$tyGusw7PDkv{^uQo z3vF=0d6qr!Eqp(cuMMoy5p&iem^o%iEL7~Kdm7Ih=Ydyv#S!e$G^me%14`@!bM5B7 z-Zn3praCWZo62(Ol3*ueCcUUz`Exp|OHmV`Ix~z~+wt~bzVVZLzQQnxwHn$P`LC2J znoK+un<3n2P{|bXxX$NhGxC;mp+5~w4y2q38~N@+2`?3G=j9vo19>Tv^qNbP$Kf#|LcZMr%>8i>ZJRIFZFAViy5%&Llz zixZUaKhw6rhIGmOq?UJSyn_4zRMcO{IM{6TI!1JViI1yN8;H;0OAfK1aBXwGt1-}G zFh31xG%R;8MvOl~uB9;n`YeKv9y_iY8Q%zmTrr3cG&MB^+DfaE-rfi06j#9VzZbu3 zVc@gt_W3ns{7HgBi!pAf`kQJbW~aCverxN|9!SqD&v9uK&cz+6-2HY^sLd5GXDi>h zYbrG{(3*{)yb$x^)O&oiVLT$)1wc{hCs3lV#$CCq=Nd?NwEBXb?+LJ`iL{^@BG6RG zekq%7j_GBio)>7a5)BldAsAc-RU9$$m|>TcwNoRW4*BPoGS&c+JyUUC94u!@z;I-h zcRuKp^y`VQJf`Ei6;A7ZupXG}r8Qnsn}hrgR2NyvTgf}L1@7Rfd>`E05^HJP|^fve1O=>OM;14M&zMrqbU@8z=`t=GqtZ-Tq zHlD_NCeSDT#-H~uJ`$3ssTga@`i{=BO7z=HX8dmmfozSVckBhiG>!&t4-5=!fTFA+ zv$?)35HSTqKfdju)Bq0bB~V6{T}ePX3rY~gTW zF@04_T1TsiJ*@>kB)01MeLg}&Zo{Kl{7^wbVbd5(%xqL(KLv2SDX;wLF7)0-^|J9LOE9>A3AB#Zj{*>yFCIjg%GXvUvcQ zeP-yq=z1p}aw%2r25#s$>ytARZe-fZ)q42l#|(H^$u32~R$pz>LA^CgAc1Co8a84Z zstJl(oU90_n4>OYK~i3_3JEaR^9BAX2tdiNco@0qzRF)Qn#sC?eR+u@;&-|YxhvlK zHBw5RWq%xn-mBk&uM}hYn1(*YmM%MoM-4?H zk*mdoeGjtDXXWt>B{45gGcR4wp9KAY>lvV#E!gl|{i{G>R#w(PTg}VzC2^9(jo8l9 z@{b-(Db^YYjbV4jRylZh9Hx{KERbgs7KOiPsH)E0SHZK~cguGGWUKPqH$6%E&{ttO(od>+D(Ii`R zh^b)vJtOhkw>_9a*T(3g;z?$g@j22l*PS?59?Ys6b=cfkaGmbnz}5Qt}C!i=9^99Ky7jva^;P$A)WxvrFfhuqTB%E)F((O#(O1WqDTe zA)cYU=Tp}}cNy3!Uz>pVsN*DwsDm@BEj6ocp;jwU+?WAs<+oHfWk0Ta3QEib<%nAf zVk(%uN~8GA-rBQE3J;<|1=W2$y#%3{%N<4jv^NzRV@MZX^2YU#xOTu&-N%zNy$>;G zG{^p#ZP%lLnEYU z56nv~a!_u_?>YuLikTm{m`8e0pyP6BXVyvS0Pt~x_T>(nUi`88G z9ZIqX|JXNExx4h*12Ni9E1UIM<5{kLP>HG4$o`D0e#NvHL4#_1>&|Vm3 zK2kc+cani~ORTi+vl zvp`n?S`aIE>zt0mh1b1@E$c|^4HS8U2+o0c-u^9OW4)2VZ?IKNa$*B1n`0d+;;`UOaExEU!*f&Vu#e<0=C#l>RpGzm+3)n2#g9p*y+Pp(hAHDh} z+IRLln2|zYMx0XF+^ndGkymq3<^CQ0vn8r;5qMOd&nF0fk@Sx^^uLJWBWe$$Kyjrq z-BC}X^_BWp6w}~_sRh2o9R0Qnyn9VxDTn~30p;uH0|7%}^$nqi=dK&Qk@MsbjIGNc z2OBUgH2)!V3P5pJBzZoARsCt@5D_2dC{a*8p}#U*B6tYMr0~dEFcA~q;EN+g2!SOH zCfc0q^{vS^u4-!0z;D?Vf6_SWM_g+a30w6tshECl5&nc8>4q_`Nx<{2t-V9catgM3 zbzi@stUbpFZ}m24nH`o2G~wL^g>KV;UG-$HjJco#D#V|MpO)J;5UmCA^=22`8k988 z>y>M6wmO_3MT3x<)avjxC5I`Y>qoAcj&jkkIv2(omu!b4b|mL`tL>K{mbke*)Mx4$ zl=3?Okkkd zanLxW32?QoY0#3!0}K9!J)1_a%Y){Nvw>D9y4|s?{*raf`ka2GM z7oq3PjP(qFuV+#V(djlGjy-U4PU2 z`X=QDqmwfl1el!TGoc(UwnE2O86JOmyW9|G#xp$|%L?#7Vgyluug}rP6 zCN3_Ll-phdS%BI|j5k=kIL&e@E3`cn4*dXCq#f$kmc#n!Qt)1CaX+Ii2{s539~uR+!D#tC%X%k|5peQlpQSx z<@ip9$3B{Yfirp3ysB2hkf*GXs?z^*Nn2T;H&sn@^evFu8}%t% zew+2yknXQlO-DTY^vkq~t_8HJ3y5ZDRn=;QzusXaW4eAP^pfmTP+x?hCGEGYz2}IT zPniaN;$5@B2NVDq!YN5=4uI2R11>Fv`&yLlV0zbLaf;qNh*s`1hDknuPV2d7SqsGi zz+C&(fF2}+0L|WYC0t9w235nJd*$WA#$Cw)%WVu_)(=3FS}2&|+LLLs(4FR4c!`yD zzzT|chC%5Prj(rk4Z@8u5G4+-UasG;Ooz+K6%HB1{ane!On@j-pa7lsqk7d7NW%e9 zUAQ>et!|!x)8CPm_&ZJfFlscL0cavWb;K_f(}y<1sFuC}od(j(mhd{j+RypAZWFaHAPydLEw-M?(`_c&(DZ2Tys(AgsYw=K z=Ij^G)Eq!i+$qgdU|tXel}9eSiMSm&LUC{$)k+gMgiPJOdBv2#U$B9kaS``1dZta^ zafYLJ7KVp4;W-Y~cz0N$uz)>VEqW7EJpJ;9$^J?|;>blOKCyl*x?OZJ6|nrTRJ$>V zba7r$(|%qLV9{K$=nM%8@?C(Ytp?~#M5;sQQziKt7kT+$C~Epn9waWi?r(?Bccm<1 zCK}u!zv@tI(wFJB`~)95Y&wKL_>z(}RB$-uuf4JMY+LbDhu3YgJXM#d&k zl_q)o)VX(ELbh{|7}PcUGXU9Xjpl1)I$lKK8S8z9O{2)?42q0FM6<#Fd}7`hh(k8M zij%1o4K@N+sKs5rcE^<@K|GZNX#`!O=60V@``1 zg}ijpvP{l9D((50(&@+HKLBCd6nOaFt+x1E8sog}-w$KB1a!>VKL$*M(PUv&a|hWhT$C>6%|uGs z2lB*b`k=a!r95eeZ%MU@a!YI*$B|zX<@+TCtEWC{V`7{Y;Q)~M8eeBc$T@0)A3^A; zo#3-)!)+P@(Xi$*hOSn{>cex zbiMI?x+d>o|C1o%`c;GH&IeZ;X_?7m>*g;6u2Znd<>!el;Q$;+1DjbcURpAp@A(+X z6?U%{lHds$T(X}U7&HR`6;i~$E5_^>rI z?4~9uwm23wH8s2%!TM=S(Z-A3K>mByw;mF5W<*l>7zY&JV=6(~rWpX<)}5m*#_nKm z9}CUJU{A0_Nyd#6HQIYasveXY8mhp8;HYExO>c>fVabFK&1T>+FfgX?irarJXYBwM-QoV zjGA~I2i0fHFo>1X6zdqIxf;L65YNQTb;oRTn$u}`l;o)BvBNT zAk6CuXI5&NK(j{f(^ph0zItTxf>gBEEk-qA$4(w$RBzPtc{l;`JQ038l>(3W>m}xa(?4n{@r4Kf*q|$X@#^CE+gN>Z$~_77J8gTp_?DrhE^gkk8p5BwYXW z?)AmV42B|qo``nCNM+(U93gAV6P3R7kWy7COFS^gFLE~CS&oOQezq7&Z-8km-s7;V z1@}%@*FJS2ExH4;mb#Mf(@(e%etdm>@$c$RC^PK=IDIo$OL~dQj$H4RAo)u_82-Z` zLcP6$kA73jD%MZe!e#Zh;X~724kznVy4PPdD~XgShineR)VM2zr4;lrA0J zovAg%q1Jf&3l_-qMjmfeE)^RGox6HR$T?;nQlP{H0(kYC&z=V%TC^$BmpMuYq(LCb zZ!mbIQa(?mrZ_qx_7VRtijFh44@@XSw@qc6y!|yWE7IA)D_>Z%dXZic6B!5D&tyUDuW9YTv>P7JIggZ!K zr$Z;T(2PnrM~#bK$o<#WZ>w}I>29u74~}{wZjks!7uVmllacv4`+h-Rkhq@0v@pXE<5+L;bp+Ym(D^dXlO1Ga2(A6Fy&@=Oc>P6-makGjF3c@D7 zZ|F1U1rn;_gdDbQJ9f%%J_uIL*S+c`P^pro-3c)HO;Th;L^FsG3r@NLrfBXAvhgfj zTwGHX;n7Du-SSWBS#M4Puz#aoFy~})tr0H-M7r~-AeD4NbBZasZr|@YTJ-Qh(Et_2 z1Zj_fb)rD!+5Uzv6v75Q9l0lz7xPX{R|uUYeHGQ9L)+3D4SrWS|3ONij5E4N)k-g-Vz z7QIq%=4n-Y!PHCfYf6HRUp%dX8uP(sEUaJf@6m;cq9pZ9Pdt(Mo<%EaY$obRr#4XM z)!zrQ75+Wj7??$`&0G}oQb(dM)MR-(243UxP*~<)b5T-j@CCG~Xvko1Cm&vAiV7n6 zWJ*BXZHfn2bhnqo+%(VxI6&wOuO<~|97|W?7|LPrQ_sHa`B(mg>3ja9n`XIuy)$S( z`D}|2pLF5~HoIA9C+ca*>=|x_lP8r~^##!=zG)B-+lB>xj*cp~AzXTgzg|MgNZM#F zoI?dA=W6t-KKoGWoC7^vCWot$At5Z)4cND{0n=HFz-q5HTV7vF@3>kIYN<`l}&-56~7PGL`z0|1O5N)K@Ab)0WZD_RX%qIJ|! zay9B=xt$zD>TdWh-Qsa;RtFb%)mE>DK1g-M(>2Q7pJ6%g{q3b+_;ab6veilrMn*E2 zrcLXO4HRFycg#pI8TU4zm0=)jD)7xqYjb5pW3?ePIQTTDhN|)Iuw5MqZ0=Q)rFk{? zZ78bM=>;|=xj~A#QTg^FXh?bQK$e~Et4m*|v`jJ)uA&_b^3nrZpY?**CmVe(Q7?r68ADKzgnBPoHuvdl8{Tavx7+WTE5yqpnvLV193{OhU;HCBetbh_gAYTrMXM)uDYC)lU(S{_&73~qc|S#ZPdH} z&5x%U-2QocoAGx+S9u)X>5j^J_x%mke|>O11_Ij8ZHgeEJxhSjL2C}2Ig|Pb58zQD zG@q}ORN%fG;71m83z=uVYRS!RVK=)3=b@}OILF;_0pG7j9`YR=KYcpGS1&E1}?K+_Ed;^~OdW5<+G zHh3>Z^u#odjXJgvYSMSVSSwsNn+AaTMYND5|Iz`U_0)aI<1)GQuA3d6#8N~ZttF7NV#qG&We+D z(Nxn6mqt>4z6@JZa`JuUy5pViv|Y~C{v~u%@OU`AcskUp&@`l+5VrD?{0bL$q=WY8 zMQpnfqgJWm{ZjRPc}-RBpdeJl^hsS-GOP0DmroSF9pa-OaAs-F78q&kd#H-Y-$a=- zBJAQmWh!=2OqCzC``}!3suZ<2R=s;bXh3?-!6W(+%X^#Iwhgd+QW7Lo2}(RT^IA2?*D~ zgiS?}gEjFubF2;M18f2rdtTDT^)TyB4Ss@0^?ejU5~$E9?FQ0;)FBQ_hsc)Q2J!iF>J4Rlbl|R<0vo%XvQ6PGU_@WC`Qws7TM^ybT6I`IJpQQuf(pIl`OcN?s;jChCrQcv1fwV~9OeX?biCb@FYKZrMi!2eZ=+6%^)~whl3w z^qS{qwNjk2W3+H``dmP2fCzV`Z?(F7VuoYiILGD8OC~%%1JqZq#}n7*0l0?&!Cc#~ zySAzQJAEF%1o`}x+>aZA+L)Z86EhR#-UK?pD3pfkSG)DrKyBt1Dw>$p6R|xs*Nt=~ zALkMVE-D)h7w6cqLvnC6!Ua%MKp$ziRLw!09Tyl1vJ`3{TCv}c7j>`vkd!3Vm#MWC zRC2%7s_T&3v!?=qHrRtm&7JMAu(MFNH zzI?tzSS706W^U91WXNeCbY)A|toD~9&`Tz<;}x~5{CBdqCwIt@1z@as!Vr+!bSk8e ztkmtB9zUSlSWrw;?+J3~Kb0yPD{+4OxP$w_h*&>xIC!`bF^#KMActyO#?DE54rq3+ z0TksNsMIbfeoE^w8s zG}lpAe{CI>#j)F#cZHf;U83`y8=Y}aLfQyU_13GuOf4QwjWiEWvoMXslF39csc;b9 zC|JYeM$`0B0&+Yn)jn#Mq_n86Af#CG+4q9NLyBgi1$&{n%f-Es;VosWQ}=h`?Rs`P zWP~aN<<>?ZsOz4jJ;zq#aMwk$kZLw&QsI74gBGs{o((Hs)YwR?K%QYsf`s2wASRWX0kG=5W9QOGc6NCA>X25)RbNm&r2wk>=qu{R!D!xP zl`R^Jawcj`OAluHGMIQ&B^%R)t=bPq&Ag20lgS0%P9_*JH1s%a&P^CCykQcb0(m^! z`iVWY)tBMATNI{|+*;|4wz~h1wYLt7a_!px1wl$lDG6zimXa0~q`SKWq&tQV5fB9F z2Bnd1kZx(|ZWttnP6>b4sCz&A{XYBs{q-IPa*V-v&wXF9uC>ndv*rTNtUzY;Y?gTV zOT>TqcY+BK%Xsxn%V$5jgw4I$Au>mY%C3gS#JsAyG!n|!7ErE+TC1N~jdp^3v}Da7 zY=Fx6R1nBfwnCYKh_FRZR3yaBvp04;_l-T#Xq8V4QMW5xU|`d5@NyBvYxRUUhUy@j z#TeQ$~)!`8w?|n>~vd8J;uKT6Ereu|VXwq_DZn24Ru_^vDwf7jLq*cI4aJREA z_H6@0W-k9qZoKo531fP=>xBCF5Crgfg=w#nwNKE&P2!Z1;vyh4wga$PxE@>FOS&vo8{Vdvkj{0nbx4-+03TEC-r*A%fe`UZ{gj`JY?)=wkKN^ zNeLM4@};7*wWrJU3UGPG!*&ot(JP#mu8gl$=_gtk1}rv8Y3zLsa$O{w6WlKxTfsbS5QHvZv^O?t4*q1yu~$BXd6R|3}UEBk-|R8&&NM zHOEgq6*$7U;;%PcH5DL(*vp%|EiZRCAuH1O$*db$w0+~T%rV3mIb`v(cc;o5a(Lu= z(b;Js`dd0PS;BTY@6xWdm`(WVvbf=OtnI>{!0{PZddZx+k`B~k?_|7*?dkk-pPuH< zY*j(Odhvolme{FsPM;3DPte}9*~A5U=vDKi`uOGB1Cpvxy_P-ibMjC@=s70zUExQ9 z_RIN#xvrvD-S$`K2i}m!#ZdgY#YB49q_tRCpJpsBVc)$1so>rA^Hg;^AG3(Agxgz+ zbVz#Ro4jGyiHn)`!d6qIz8BPz!7m#vaQx(n6~CL2=h>X`=5gHG?|Wx2?fNt}Zvh3m zTlbPFonvypNO$|^*;I+$%YQCQ@Yf$!P z7fj5ozRo6}L8=JT;EMq9HW_sMiR&HQDQ&ctPF&11yBR;upam@_+rU6;V+? z7i)B0CM06JW+#J|VH_Kx>_nN~D^uq3EcT32SD-7Hq?HnsO-}?94p|p*0a#8mzSic9=5i8v^8(|CBMlFoG zyS|mWIRjHOdQ?tez_xGI?z3*0QDCLzq?}n`JweJ^(Jj|40L_)DAJ?{@z%jRbtsXiE zms?b11duP!V0K%v)j(Y5rRX6PqDcqYmc7X!qyJCC1~{1_*EElvUi zo5ocv*UdY;$1l*noDe`=vbxc{MBflj17Ffc%OT-i1eC7wc zf@+>7q_L=aE<1DCr1^N+9`Tsos)f4g)Y{cH>Wf}t?oKeGVON9Zw$FT2zvb4p&2?f; z36X5sox+kTFZ6`mX?vX8lk>^0UG{Qfn=?fhe3a|j_9jKNnBp(KoZ!+vtLxuh<~pnQ zrR3qv+eY$)sdhV5sr_J49ek9_9raf8(Wlg{0f!7HOhUVzv?oBu^pSSOV+C5L99QOF_Jqj^=%DwwUh8U$hvC;XM;mQa!4a9g84cG_uo$lO1cKj@%6!Le)HB!9*I|4}RvkZa30QUVT=zMT48z*y1_ot9Cm>wg zjX;(GuTHv*vd6vJ#SBJb3xZs}rJ%R?^k(5b)==25EYCkvUMufKm?Pp@w91vVIG38daQUmf22%z`$NKjeUpoI5|fa^$!-*3c`M@!P>5nkzzX6$`g`Vz%KR!7 z7Yn|70*EYFYc8M?Ye0@@ zvd(KgX_;zph=~ww*ZQ2j)f3=JPC(bYGo}rDmcb@qWoV4L-K7gEP5#y~^Q|liuDK{* zUOpvc)7z=Bc4jDZ5JNudb(XP(ShF&tj`1*&8+iopGI4^<=%KrZSw%QJlJx$uNzch> zv5~;=#(->Rz}LFB2-ZJvZa4Ip5(o&5gYvFHTZcH1eo|(CW{$kRz*aNo5bB&y&k@T6 z54cKPTOujH20FOl1vj73xQ#!QVt**<1()FHcph|yYf~V?(I;f~t*mfa{^nwHv#2!t z9szNJvoXTmj;pCb;TeAjvwNVz(-_n?3oN=kxDX%sxIO`OR~z*|a!0-az8;9*q~0)) zQ#OD6_O3jlj3%Lm4vlk6>Ss7?cFH=eJNkEdD*WnCO=DlGLZJ_QG@|?$>d7#oJFF#gA!O zsIZ_V;vVyADJn+k_&7S!cBvmQeqp@b$bY5?x7k^&9aF3whW1f&<(6XOW zL63>NALZTy8n}cCKRZI-^B^$At_>}`_O+g9yskBeWr$};>RbD?J# zt7Xummx8}+7=MpX@S=m?PlJq2xn863taG^#r zL)|Q1)(gw@cW093A-M4cwO^cjvmk2zg|lnX&Lnu*CY+Jb_r0`=ny=QaBtOXeNJPHy zh?fW=5N0V~{E*DRG(ppwp;{G#eVjhs{28~uJ-ZjSA8r|JfW!V6^$fz zjC1fqfB!twtALORN_HVosX=a23GM=AKWndrOmQr2u6qYSypvuWI18HROstICuXZJ> zZXbb+>$=j!49>$s8`S1yD6&0I%Eo-FN%0z^Ec*7bEkAc~UWk;{hA$!S7Q9FD#+j^lvv%I_< z9D3XuZ>!xw9l!#@p2iC7ltWp1zO~l+hS$RxLP-tBpP%Bk3NDabc^e@hq?KJ>xU&Q+ z%%+51x?nDoZ@Fx; zs+U5e|J0B4eR|?UhODEnJZY2E{VzmMMk^t8bDpjk&V|_>yTXsAG#QO;w5j```O@4I zs+hCquTI(+9t+Mv$0-tVy9#)n)AClds--`S4h=<(jlJSlgDU@2_J^F@3?$d+@;gZ> z3Ji~{o8JE!mZ;iUaMsII^7^27ND9E ze!#=r`?hq3jNSAYNQU_{<`?IS$fgTrh+Lng5bs};LJ@#YXxs<}8ue(E-cBYi-20-Y zQ@XckuR@G}p>3B17 zngsnumjJ82+5Eom(=5U+7Gb|9>P>R@=^s(KND90f7`G*g4Z=O-qb1~4k25(j2di|N z3uDQA&$Cc1{Ld>7CfK6<=s&Y0qT{`q>E|#X&5#M7s)=LP-&0;WN1D!cPE4`0!-XPd zD}-!4y{;3zaTa}-MBQ=Upw9D1JMpTDS63b>X(*c!>Bw$XPF9w?AQlXF4I5At6(`ln zqWoTsh*Sz9w^CM^F~u;y&a$G7yZe{aNq#?ypP=M7VKn7;8|4RD*z%9}9s~Om+O}xb z`8)yY^9dTyFXDI!FXp|T&4G?C)Ih3uPY#A1u0fo*>=MCJ40fTK$9!9upoSV_IxvQg zPHlkG3Ub#ab0bPSQ}99(EO(%X+p6bYP04E{nUn?4;KBWHkR_wKxBJK$BekefA_80e z1uMD41I{W&CxOP7G2G}w!iVEizDT2q_bTy zI*on*P84`x*M1M{y7i6V0vnUTa!a{MU!iof@y8z5t6i#lco)0a)~$jB(guot0!y~! zNV_4uuy5!Fw-1CD*j(-87mU?Wr49B5`z-&HBc$+4lBM{dt;X9}%s6}nPD}$Y zg+^(V<{VAD1ZR#>6Y(3?W+yB8`D|tF?hTV>na8*|zjcu-D-MvYGSbAK>SZwm1qU3o z3GzyaXBC4(AEJkBC?tDA+|9a7d|)x+w(|_gkX~VOf5tD7eOK`NHs^HuHi2Xj zXW#&{#$dtn>fWj@P=uFzZ~1{uk8bLsLT?e^|W6lA)c)U$#lZi_pdp&BKw|5AI4vg#qTw+D5Au@ zuF9uc2QlEBKjgT|GDz1!BSQ4TBU|2dK>4{Nt-q~8iNv5F<1-SI*o!fq5wvxG-TbE) z3-rk~6H#*3y=z~u`%Kg$I?HMjVKrAvbZe$~6=CZ8`UY^Zj~wY7CzrX+&DRgyQ4Wv5 zl(HmNBe)DE!&68nE=4uinJP0?5#7)`0Wyvt>MHH(>6+)}8HF z9zN>*Oz6e8U*`XpAzkFrC zs;GbR?eOi#xxuQvYrsOi)B3@pFxjkx*4FOc6&NCfteTXxv?g1Z1tWN{(ZRB>!g1-+ zzm7u@>+}PEp#nrm@qTSV9bgOEC+H{aH57`DHE2>Xy+$)TMWRh$i{T8Hi_+>PE2w0C zCcA%;r6IfPb*P)TW+NLLZY{n3{aTcugej6oR+;97SJxmDL%b|xeWX~jy#GhN3^)mq zw_bsMQ`afqfYwsJu&yQ?bL|0)plDSHW38Hehind@_&<^X-(5Yk?iUq0xs>l;fnTbY zDX~@e327b4x)1Oqn0I^31^L2a!TEfCp0Lw7m zjv|saFO&~H;7)r#cV}~TDcG?KUmn}zh5(7DRkIjf-q5zLE|8!(ZH@KVV3H2Ys`Jwr zw2>luGJE&*_Ld*Hr;5H{e`xXPStS{CJ_Gp}gVi6DyNxj``=0T~n~ zarmI!`PCBge{SgSouT6v{LI9}$Qs!xt#1DOWXn{lg6_SQ7L$5OY(|EXqF|!f|0E_u1uZ@8@P|RzX@ZbadRc>k_e_wYVG3gS5 zJigSa^3d@t``f;?>*)QuNBbPJQlh2NglD(ABS+m%z{+`VK^I=~0nVOVWQlWfCZ=Sg zx$={8tH3y>MYL0l0U2fuzK{p4;GS@l7m&mDt zRUBG){SzlZ6_7>N1}_UTG@`aRIaJ_{p%JheJg)#9g-o5MLg@;V=bELVoE|_AcdWLd zrdL9()6tK1V7JgJL06&=S;vRV1~Vi>t0lRKg%&58wi^k`gD4P^j z{Uq*dvo>s4|8%7o&*z74$aMv#p6im~1pNFxsYcy9NG1sT0$kU>hFkYOB{$ZhU5|={ zMw7hci5nE+Jn=`b?f2p17T5FO(ATABh=&BPkKfWRm&o`*%uXZwBYj>pN*QuWf-B9L6&Qbh$Y|~stOBtgksWFf2tcEf?A2y( zMU3UYu9D+%==Nm_K@FT1X(0DP99sw-PA_;=sP;0M1v z1X`Jl3JML!?f3g%FfUeFhg90CGRi*q@oF|4Nwbdi8ILXD=9pHNHD`KE?8U3~8PWVy zP1b>O+*e-3@)PS>65BZ@BbA2vS5?f41SOJD(O4|eB^&Q5tx_n21kRBJ1dBA3r3nh9 zT!NtmQtVhy%u&aSd?hP^4f#2|4-?tCOT0J@9Z2nOkZ%)Q5`_S;3L2iVU06E5^Jjeg zL!uJ@3j<8GPyF*h#tqPQk%QH_{*Ve*tL?B(bhwvU5Ok|CjUpJJE>fh<E?$YX`vAb>va2RVv= zS!El$$pP|v%?1bPk62iuXNb*5nsk}Oo2&P}#;X=gZr2v4&Pqy^D+zbo{Z=sLp5*n7 z^G-!9TE~a1R!c%ca0d!>3B@|A_EIQnA}k)66PHHHe`venAa)*L)U3+WA(jVPG~9kL zOD-g`Mx4h+p{Y^p|A^1?IBELyqJ z=>c0BvU!!%Zw7(-m1d96gez?H`j4I+1?wxUUa@;$uHtxoRH}WK_r__3W3P-cB zyBHx?jykn>Cuq_bTdDC);H;bgNceVNFOg&Emi@z{qa$Q0*RUr>2!6!7%|E?^>fL$0 z-VCGMefm65V(XsDL>MtAT;pI*V@zrb+@nCdx*1%E_`P4=^)S#YR<7EGDX69cJ z*tjf(T6f<>(NLuItgHLVBc{u47>6VabVx!v*5xgQx1&v3;bN`iqP8@wcM6j3VsS;) z^!#@OR$?|Z@kgN}GWSanx3%N+Yf8}5{>XBy7woZ6sP@3vbV%P(Nl&fWDWolWKv+=p zZk+TZE_r-GQ3f8ar;<_<14&Jm)JyjeuDr}$#q@A>+dNv!CpnVcJoT>*-l>?6mut-M zu#SXVYgg0vWyGFzp3kwU(k29Da=qELS>_<^#gRc^FQ!C%YgZ5IEVWK=B#@7QVk%|3zgr}et4M2U26Wn1GRc5z6=&}bmftl9+Fvrl^ zfl47E{Za$Jbmh8P;zbtI)6b*#`_n7w4b;~{1XYyQX`H_#NEqNr;lDInDUPcs?jUS|&Keenk?>O0+c>LL`IIQir zLe^Y}n)@ytZKp6~ZNa(R$~*sDDt4!Fj9$`0$Eqcpcw}#<%cAd4)SCUEJ#2tuvz5$! zu0Nh3N`>heYh7bCDdhPJNu}LG_D|;)KTIKxMoqoZc{<X71o_C+_kwJaD8r(Si)D zrtmphjpA?$#}JET`CkB{l{~zATm^cfAf#LewBHHm{qHk0Gfr^L0Y~Wk$oh17L8(S! zceYfwOYgH$R8WeFuadHg_>hu9qsw2VT+epHX=D28{pRTJAZaur=`T8$-}x_&$+#q? z-VZ}`=)uM5x7+C1$k5HP4syD!YS{iGoCbM!7Zp56@QWNe^bYB~2tFoHDEL5{7x;rt(6~j~^TZ8wsJrCRGxPh+D?_w=(uQ#|Ie%Hgz4;gRDV(zYqiOH69 z_D%N4tc^A+%#{3ybT$So^Ez{bx_X#fwXT4s?|(LZ*QUE1%$kWyi&(U0d~^~xrC;Ip zxymTP2n|vPfC88`Umnw+j-IrOYo`&K+if7-fa87;o-o0kC_bEUwgq>)@C_*EAQqdp z&{SO1FW)qBx!z&l!*Jg9zG z!#f$Y1x^ErKrq_=QwJ(7CG9Ne!XypC-c-;;bi} z#f0O>Aa{l*z9>6tt~YKhL`*43J|cf}n?SDPK`YtK@HhUT)yNhnae*X|6a2q-qd85^Wxt?U^-KX3kj=7%mt z>(xX9S-C0fW$Hbu3)WsuVdC8+dm;b_6KnM(ZMtQ}@#i?waT^}tTJl z;r1HbrI`eEeRKW;Xm7{xYyj};Sh|La3a7svUyz!*{fp{D%4#nVR9n$AF+E;DIs#_x zc)-5V9;k`PO?638=|I{!I13d@`wu85s9nWAMj)~dx@aa~9qki(_fld!KnvFV?ReXirFHB#Ae{LI_5xrN5A4Jz{wjAF(jS zFuK4H$+0e%{&D1l9c^OR__5c>E@83asA!#usImD%M%?%tDS^K*b$q=(fEse;vR=XQ zhUu603Fs&WzsNoWi46}vfK%G31=(%&LE%fQN9s(t4}dZ;vs>iBYjZLn6jd%e~)e2r_2O@ov%EYevNIpw+ACwxdu%f z!TD%UrA^p+O(B)!!)z6Y z>leAz5=DLw<#7sa7h#nSbqAliMj(qbN=nS-_QoL%7i%xg6&C3OY;E%c zkL^-44kzUxS81fvsvin@Y*bc_vHJ^C+}!J~t^v0Om;) zP#AP5q7bMW+gAyQOnHMIA$&S>0W9RM5%z{?ylw>xm1f>xSHW` zOj&+R*_%gM+IEE zJ8WX`(T0nke9-U)tSJuftMk$%q7Vb}mjuv3#JrXV_0A;dZ&dmI3;rvDkMnCh-|tg< zCEZ$V^w?E*uB<`?BzcZ{cL_Jh^^f|nZ{z&duiqhONd7B^=Z=IM8C2-g-8RM7F`xlY z13^8zo_(bgbUF47jMT4^H3%h&E zRm6yOy0&w*!vqA$7tL3P(tMz+RzHJRT68qEhG;2$hUI;v1Ae4=RO}XG47-|4V$qQV z5$8RwCGP1Bm)@WNqTb3je`E}#!Bted;UEKIZIxsMyxvUlh>6|Trk(&O`wF;cO;x5g z^y&#jTCC*+wF4oIKG*^6MFZ%1{>}}Vph-{s3xyj12tp^IseQp-)+A6eS*3ohQE2e? zwY;9TjRls_@rQ-3`9?bgIrD^Co`7lj5=rX@AGewYux3Bm>%Ke&`L#qvcSDo~+c&`e z-Z;?sh`E%b9{C9S$^-V=cOGj$JPAW)DJKF3JHR9bjtsTPAE;(^553P@ae`P@=9?|U z?>79=JpEeI^AwT%(MB5QR$j;-l5~-ll!QJq z0>;H-p11bIoWQju^aQ99}>J!7LcrN$JX%n{0C?=deHlMbWQ=QLb|F%4ddH#wtTsar1;!HvFbxP_3X%{- zBrF4UHFnti|4=~xDtHy*<>jxMlnn~0T>f618D?buPOao&|5}_CZf7_tHyj(=;`G;( zHC=*aIJ_gBO9e+~Id9OV8-S+3LY|#5EvJ3JfR5bt11Y+yd9kmc#=|J&e*7)`(C#~r zZdOr0(B*U^2Ec;}*@}oIH@LV0O{g{v3s9JI@BPMD7;jX_mA>bhJ6TS7-@{&;s99{7 z@bOKwhgjQ4wlo}k0Q8UkE~wY_PMaCC=ipXf$eu3e07OmavdmlShR1#zmefZYu|)QQeaxc}z$w9s@=1DtK;{j>0rN~lfo>SR*ibq@Zc`R~ur@v`AA zfef>=jv9|p2ewvz^q2bkf}9*2Yrs|RSo>8If0fUe!(HBA{W|s$sKmQh9q&w!dSmUU z9O#ideS6l+#~~}PwVKLBA6Q0v1?UYHu6R2Kcjpig5t-3jp9caxz0PESAXwm&O)yD# zWcH>Ex|T_ONcO{W<!g8q^18t`G| zyQ;AFwRpixnV@taDX_G9#!O4w+27{)S0NH^^?o4w$CI=*NzGQ)5c}xE>n7k$8mSCX zZm^hdH-3fm`^1X&L!(dVEI##c4sSNL-80`ILH>i6$K~4R z4Y_1*<}1pirzd?--!(8UBdtFLR?9C~L;-GVk>aGCkXTMoKoP25{hG)691Ti&b>^Vo zJ+%xPb>*a4XXv5sop_P2^4kQUxL*4eXKJ!_J!P*p3=h>fF-p0I!Q!$zmmi_l;M}nr zs=;jVp4tr!TGT*vyRxgaG@XggxU&pt5(ghd}^Hj zOia}3Wm0_eVA@u}8KASv?E~B(M3;6Q-doT3QZk_ZiFoYI0tnGIcBKyWHg!i3U1KKO z;QIN&Nj=ECy-l}ImV6?ISbc8Yk@xAe85{@&(r);{iIjK4xeP{dNGU~2Ui%6NRewL# zdW};k!<+d+#r6(c>z73G@1(332~G>q-XE<>elRJsKim>J-C|onu{=Y}dQli&wAx4) zQFIA(GWIMA-Oe=8dlT2Zcku2dTr>^0v2&_G^v*4-rO*&b z18xhXzBaCZ4nbb}C6|Hnj>A6n;$g(|{;>JQ6UvzGH=k(V={GqPWp0$Mu0 zIy^G8uJv`=8Jzt1?Do*!6ddv@17!ULL}Wp_qW~jJ!amQ`+9<6>vpD?%aPw2b$%v`% z8C)#>yiq3+7IkMk6t7qS$twjC1eGkZS~CD@u7*ee6=vRHSu~Z4%Bz2FSHA{{8ztP^ z>Y?;IR5(pz)BdY{2td~V3Kc#8->R4LyYUfO~OZFSu=eM0eh(_M_HcCl}xmBr|8 zfmP0=4sd2*YA+cRu-3e72=nCFAK%$HA=LLA4iD#U6->#ctiE@D>FP}gyXCB93cR;u zwYKxe%R)C$+vh{2bPz)n-GO@#=D@+haB(*+tIFoht!9(HxQT49HvwwEz~l8E_VqE& z0~uaxGc|hbjZ80m&HDy%0Y?D;-P7WCyIn8r0H~_I!i7t$UGk;jfmp2sFF)SmjewIL z-M3wu5vjxSEWkOTM}eO7yd6zl->XUxh_1agyWf-K63U*_Ua=v{xxUPGiAgr1zI|-9-RV+deM3VbY5`ROaM_?$0SM`c2~K%VgL{g?jlhZ{8;wMoLf3eEBBtCxEfcFW=ic zCS(^ce<0bd7vgYqyC};=H&?n*)lBsvA&XX)EG_|?9;xDZE7B1IYG_eo*nRewK^_CX zkJ&v-oKb?;DU-17vlASH!!YVy#5T0^G>kKk1Z)k;sJ$IfR${;l9UD%!gBS^78%OvN z5NW{fI)|w>+Ia;@dy2haZxuW@K0{;+@~RNtLG&#n`KJ{8*Gd#jkEpk*H!(#7xbGX1 zFP%xH>TdqAj){z+73#*Xw#zpJfswYdFkXtl3*Ks(Bpe436s(5S4F$ya*PVUTS#ruzGIQL>U(De z>fBS$^L^hlMsfF{OQOF!bb)h$!V&6DKU=9l6)?Umru(oRuxOXfgtzmpd@#LpB+Qw% zflvQiBMEHUF>!I1IkE#M?;kcUJXxQ9-_*0pb58fPYT?Pn!AX;3MDbZ8mqqH{roiNn z8?T<96|O#?WOeOtb^Ayb4QMgrX=fK{WeRY4YdIP_ZaOPX zR#)?MuNcm-)A#B!yp7lUDPEL}aP-KyNdAXCU7c^;8D3l>a0>Azb@9pg2aH`5Quy9N zME(1l-hGJ(4;(hf8kE$q7mVU)B;haVH1zi2T-1vzO(APAtock9}xDs{&7-hnS?de zakqG2CcO`Hz@3|m>(%57&M<&O93LEm9n}e3yRce+Gaf+E#tjjDWoP$oP_2l#v>(NH zWNNC`0P2)_sO8hu)wPq~S7SA8B!8)^X9K0ue=Ej0XGvD2pPQ4D_4%{Zg9i^BdJRf6 zhuq!Wcc#(E<1h4bAMfq$)yQ%ss{u5rb-DfO%Y#d^iXoO4$}r)_`n}i#Vd)LI1qB5# z4=WI{OS39nN>#LOau0m;oCP+talmw?Ov?kpK*UeYmtXXhm5t2?CyDPwRnRi4*uq?| z8+if_Va<_5g|>1jh(vr zY;U@=a{g{!>1>ha(eYlwg5}A{3Ey?Nkoh&=Lj)vjkz4R@AMKE$G%H6qO#^KsB$$Ow zI0j3i-yNEv=a!3U;FWZIYET+F))uYkPe$0MbBSr1EX=jG%(+;2*pj=5-WR?-7E{m*YJ=plRbISam(FI_WP}io}SvUa4{I{t#wreN?A2CqF%@g zNOoGCOB7~Kx;HyDnYHU$?302HsbJ5;R^5>ImmP=ND{Kdn>q)p5`=bvun;#i8Ii*;l z?LM?MWsL!;;9N$f;M-8;SYzbBAJ*^x)(;VfLJz7Z&PyC*)Y9N_Ez$G)9^9|;jkeX8 zdvjo5PTf&#&FOLJcW^lblXz;5 z@eFK0v*u1~+QU=JY^56EpN}fRVV54%9HgrZ01#kgdis%{2t^fJ9hm1?`T3G}?%av0 zry;s4#Y-9oPX1%UYX9{yDYPj(Jw%NT9$f~$JM^`3U%uE0m|Cs%RbNri%imBEU^w^| zwO2eBzv!J7lSth7Zt<%BtK+ts)X}m}8 zqU{;(^w*CD#k~8qD2fbY@Ls&_lr1ze4@!P37Yo1#=H}Mc1GOob{#b#}mGt%5yqoon z$`YJhUCm@zQUyO{X0{Jz#Lhu|{%ah%Rr;C^zU%yHSbCKiLh94`>%oj3U^4n+a2Pb9 z@<-DBy$JsvEVXnZ4a`4}<+?r*v8VsM$rSzlmTBk+^oymnXll$byU}m?<(b2ITs%Cz zU%yJo#TOP9#+;tI078DxrXc3;XY`*z0$v?HT8ib3QQt}&YC4kAyqj<)C9f#|Uze(Y zK)6wYHW8%AqfTTD2dSvWAT!FdDs5*0_v1XB2DitmSK(v=Sv56Jss_;i=V*h!gMtJi_~19U zZ{7O&Gd7Ks`Y&k4PAW>ueX-N-hlV}R958W_kz#4yQG7(DqR2pXcgp~|>Rj;4-Dk)x zhck^>NKEk=J6?xx6b_Oihr}Q;4NKznB$J!0v;2_Lklfzs@nOMGchNU|J+FcsVoUF9 ze*HM~H@6Te{`x||(`Ysf-7T|-MMpSGsh?kug{-kwr98A=?~UcMUlG+RGeQRRuCO>Z zeK|lAR%!BteAcPsNCn-WoWKD9QGIXy z+m}c%lD}Y3FG{OtFjL5(iY-seA=?FENvg^*j{v-nS zJ{@+fV_x3WrYtEp{IdS-Ya~(R!-fKOf9{b~JI|jB2xj-AXfAydLRr#JE%TO$Nd8xCxO8tDI3n1IjjaCd zW5Yj@Eutb{(7Si33muBL|CdXkNJT?}U?R7A@|SQ~-S#7;vOE5+2p)Riu8_~ol@n-5 z4gDtZod8K>=@qz*gmLVA&{)Wu9&F=)e|?y@P~Gr;Kk{R`@CX0TX!!fLo8ay{GN3|7 zA=%Uhvp3h5o1o@8xw75lb*Tg-uha(IGINa{PhuHWr2>}RfT7AaN*s+@t1=j9+T}pY zqfco<9!f7?zN|Lmx0&NjVmCx|-knRF0X1Hj_qY8u%3l|n4~Fvp$%W=sV)s83_S6V1 zt|%|(0^t#V=oUZ(4qD!t4ab8gH8=Nse%24FmguE|biX7r`u(rm!XVY20Vtp&!2b9g z2qSaB98v(ajeKzQb^^p?CGgoC2I3g*#fsyji3hhQFH)zpZ6cHu%}rU)d| zS{$&1glhk+Q;k!HMrt>(WB%Ln1BS84DeCW4#MBo58~Xn?QFKwkM2RKvznHbDf3FAh zrX>0V$8ijbZEA6Ni9oQY1iBYFO@fw#8KCLVS_gaN_9$CCFqSI-74z{GYuy^|18xhI z;{I6VCc$F2K{>A1;NMo}HG>9fA)ixKCPyUM*_PmWxGz9-lW)-Kuk?oZ79uFjlOJpZ zwgU`y9*B@0H~l)K2R%uDDi5f8lhm}h{f>bFC|Dz48s!5M ziCZb9ZxtW$mtXrx!~fMjqVnAzqMQIXLXQB(V#%~VU@sp7)y>8^e<^!2Xoc|SR+zEo zSnq7QFEl_@K0Ob_>v|{;04?G0ZeSME<+MVeB>Z?%9_l|w&|mR-mc{M@R3&U7+-$0! z@583b0uDEZ_SDEs_(Xi4(vKIYPYQXUK^+a;oD#dykS+pgWmLfYHst7-`X5j8AA9fu zQicO2KE6O>NamgY?YZ)U9Txsv1u>G~yy^iDa3mZ9u{S7D#|`ey)kSNFBuD#oo09O- zP$w)l`#deusf*IE()!|L&JQfORk>F?GhduGm4KZxn!pt#S9o(s2nZZg!S%wUf~eDd z&!rXwEg*d*pavbN6+B)?5O*+qWhKlEt(7#bHXV3il$^w4Ed^G@_u7-NHXy(>MO%Wv z;H}MSET&nFv1a^HH>*=jNC%LOGD2nz5$zhA@uSu2*!~4}>$MvGet^$DYriZ6^AftA zHFDL0=7Y(H#FDTGIKNHuhiOKN-r)XKSZ@Pz%1QfUKolRjj9QfO|8VnbtA<(~{htIf z-}z#2%ij{j1anBEtxTUdse#IRB^jJlH|RJGtXnHHr(ia1SI^ym!cmC!9$Xr+^~f(@ zwM0CET=U)ouF|fJgY&1m@;Ay0yG1{u6f#Z=0lh;YhpeV?ElRo^z>S$zjaNXnc45n7 zBui2g1&h3Y<)WO7L%7@O2n550r*mdtAG3C@T>#nPB7Pe*TJ%BCHKCqMyTV8Ah%^@E zXM$;u!{ZGnU@h)Grbm|AN#X{*!6qSN`O5U$K!GyA@30oEB2NS_@(_fAcA)fyBzqa= zXW-yl5fjX}UE*_X8F#2*doqy13n!j;Z+9`OBVH5&Y}@FB8n0tBP`p%aeW6vdE)l)a ztXXik>_4}GQgg3Bv|CL#jXDSv)nD!z+8E!s0%YT>N0d!Pjp{E$maM(p#?_51 z3T^;cmsD9uRamMeJ)68TSW`dE42-050Pze;cw9#jJ5=uk_ivf%0&u~^)tjO@v;JD$ zdY2LAzPU`#=B1%-P}eJj#R}7?VZGOYrGx+wt;QMI$+s})3jikUu>iP){>J{wTu}N~ z^U_nU>pCkGH3VLA%{jH5=!CNeB`-3D0;c*dXsoljFYW!4iv=j|tE z2R@2eV2Ug1qfBA_``QfwkQaF9Xr{NwmhkZ3=lGg4Jakk{DAoAL^jwM{+!6G$d8zxyty?pQaMAY@r>gSlJ1y zk~5Hpk|#%ljt0Z^$9wcp=NN-sMS>0wueTa9dj#ixw}?G%t}fJKEsN{A7~{7Gr|x$p zdvsNfZYj{7bFOSGw5Ow-yv3}@k0tQ9c=n*Aap0^#bj71UndUh)Je>SS_vy~y|J7mP z!YxCJhFI!+H2p?&ui+@1{1W)gsc2(p)iL&8%cyZ1NI_eWXm+CA?0S81q^$ zp6L_QJWv`ucCP(s+#R0vrtL=(Zoe|&o8zSdN^RcrH$ccyRm?Oj$O0go8?h}av!LU_GpqW2MSn%D3e$V|j4EF(T+jCntqUsG zAqP5cHqvp-p8yOnI0nT-H+`cO#s~cX9}R-~X*I-d9*>^!dmVHVsy!YG>;tioDA2cH zz-bQPgIrnUsCDw9(e`G8-8&k0UyB>g2vH(m$ah%pRnyFPV_q|6diIA3PRlOqzu~g^@&_KTa_VJltH3(Sv ztIS8cCg@9EfDDbK5bvkSq_bw+&S>{`#8?(>wMkWf7t4Wxu7-4v)1Cb7YY^2}SZ(l2 zSru(qOhm-1kWP|I7p+MIN>kiDMYM({WDJ6_X_bq^>wu>>Tbc=258?S2jbyOa4?9n0R*A#8KBUV0JriZmDsxv zhu=&3=1&>)709RlKla`-tjccf8b$;WK@m_80}utILAntErIGFq>5fGUDk&fmix#9) zx)xp1-3`*sf#=z#%h{t$Ilv|YUEp#hk)I2%-WbY>8H_!Vw;bz%*miFl3G-gp8~oA;N-MF| zS*+Go`zkO7VKrSu+PbO^AM({2@E9>e5n`NMTIELR7&&GKUqEI31?jPt;m^@|9qm{w z_VyAenQ_Skv2XtS=tDZ=ec8F<{7gsv51$j#jE2hVqg&^C?_E_IO>YXqszcLjw7m$8 zoO*D<rCRs6lQYg(q>s*L#iOZaI2}z7DU0YDKpsJU`RDvso=9zV$e=d zC>F886us1Z_H4Eyr>o|CpCJn;x8Y((eb5_Ck(a!&c#sI#{@F|70THK$BJl7o02DH6 zVKDe#Q;d1F56lNtho8ty-G;W1z$XY)ZFVcMHSn#@KsyoO^oplNJz z)jZTR$Yrk?ttFChDd#_zfNQdQmop4py}d}D+#e_qB7OLQv-rOSCkX3E+JVL_qHGtL zOrq*^Epat9fp(z8G_+N+?i3Vqv@RpLV84_vQ3({iIrZKiFw7{_U`PLM545qe@ssAm zoZona#+z;M3vH`F0#VtY^0;uL7o&dGJ)r2qo^Js-QVYUk_kD(CAvMx%T|(h1%N=C8 zqcnycI9T7)`7{}az1FDn7IPKCBc0SP071WCm5O0+h*i^ZA$+33kRpuGHY=WrOS+hM z>!od%-=gVQgR$Ichg%u1UEO{>WRb`;5IFRQ2e_iAX~&bqhFpM#FPwH?1m!Hm%a}jy zXciMQV|R9+@#Nq{MnopT5v~&eoabAM%34Npd+heB!tYS|Z^10-J!tS$ zA8mpkF**Q|W}D@75pw)Eai^mOY@m+d=VH{SW0!}JmbH3L2xe$>5u{s)@tTDm+tI#oG@rzyJl{s z`8bA#Ivr z_~-;@Vj2W1(RFz!LwClS@jD6`6`OSk{OUu=#S1h+DU=P$6N>hAwXst0Q>H?0dmRBE zsJ4R7EjRiqVg1@uFV~H0z(Ffvf~eE$ea4hkYqz|Ns$-j-KE51A@oaecau>{>gF|7(bx3>qA{fLC(; zN4?FGN9w&$URUmy7CB2~N+>&TV+(#qFbO~38pwks z$IqjpS7L#c5i-d)JK&>XMUv_f^cBd|00rC72+uIUa7I4vR1!@1@{VBTAxRLcEfa(` zlQf2vPm{-{_WF1M^0+djw64U+Z$8Fix|wl(Q3>F5p90$Kb#X$}~ldfh|iyzTr^#7EJ zgD&xae(O&<`2W6H@bvURb_*ZKpvVhJm!C}KED<&Zdizf@;K2e4sL^P<;J%O!=yE}e z)eRDA_9}S3FRm{FlBP<`BidZUyd7}eLOdA==R4&42mza2(m)B*j>YSa~*4K~967>j{9E_TH| z&oKVzKMo^^%RO+2t5-btac{jOIFFCvT^@X>YIk_!h zcB;5dfJHB|>-R6f!f{0n0+yIxA!5|9r2$4|W9Z)DUmjDJ{RKE*!6i|_;FQg$JG=-! z`vL5FQ=UK8z_O^-EHxmBj<4->n;agDBHdH1AheoIf(D!}wZj1j zEty2p+I;x43M^cMcOGp^!G@X#K|hRs4Il#liw_Qhu%g%P1uF@jJ~kqd6`KT8x6xI;^TYX}Vt^X9 zICORZ9LfXWp&hA;#{k{PZ`NfpO41%O2P+nHGPSOlWqKU}qw3lM_V^igqB>U|dY*D2 z+c}Io`ez(KBv$q5m4&_p!}_{{mLOSW%B>dAbQVA`NuKNb+K;NQ0Xn1df;&C{mGaK& z*{8T#3dzmtc*a3%?$Pc}aKUT}Vx&)18y3mtbRMCN?F2Z1CkI>Ib3SiIK7zHZhC63c z>$5Hmz$-VV*lJeLwymUSn)iOP_BkMb=Z|U|;%yKa(%8;J&4UR6MjC10PG19*t7;0@ z7tJ}y5-^d8i180bNos;NWH}PcYKjn?z7RHd|4#T1Oq!JUa`2V9MBPAr+n3@>Y^1R5 zq&Dl{mm8SD0ucY`-bXx+Mb)z1HkuV6Ss9wI0()`oK*QJa$OQ~sFKnp)d{I^;B41qrTv^&K#V;a(w__WM={xvU) zw#JwDc)@Z2X0ucDbM}K9ZZ~{bqm1tV{Nz=-$zUb{&cZ^Brqe9CEz~R}>cIj0{A5e6 z;p5GD#OwrrmQr3SXg`;a{npvIf9B^_ADd*o1#5)lOUzW_95(~NUTPVzP%fuJ(@%c9 z29z##llH?Z4UOY8is(;(-pWEY$v~ri2Ow9UvM57AvrI|8IfM?Z$Wj5AgJ^6*%K(Ni zGC7q~pO0FE&Ojod3P7HvSrN@9K*}YuQPG7jYgV$wUcmgp} z)TprQZFt78YX?-fJpgC-<0k|IkRW!L!lq*|*Z^0UOnkp@Bi?BavjPaJbND!VE9_0@ zs#2Gqh_} z7b>!H)tk{%qJyst{>|9Y>dEE6wE3O^0H;Tvc)|gK3B5B!R3ETN-unDNDM)0@UjrBvUQqL0eIs=%3TZp^kkSp3sj;)>Fv#&)@B?4Ri zkYe4Y8}I@|klR{LsjAzKlh}9jtodO*JJ3>w*Nzu7baJ19u}M*jY02jcJ%*94naF_Q z*f8jH!G{)ERJ$b^6@Wr@%Hq&0vj&teKhtK|(PeL0O~g{_o`nq`=Ag-zA}g(UiOSKF z1Arx)4RbPsl}isO7gw}sXU4E~ zvvj7|sH3X>47tXPX>wZ`)>YqB&QSgi(yfGg0p&{;IG!2V8o!!MH#Y&`EPVHJBm4E& z0Ab8$eu;S<3=Xd#XfL{Eq$lY|+meY}uK%04!tFn^BGMr=H!xS!UnymrgJE-D`Dbjx zMivqanLbCzIT66I!mB?&`;4KhF?*N*aIWYPI5!3&tc$px6SVjymTWIFUlZLC0$*U>FO$lPrMykcD=%lQ>~*9iky-)i%ssbnhtgBD&{qo zj?t5+l9Lw=PIW=lG1rAv{H(~c^lFH|$?Y>S&=xK`69Lej8 zVFCfx7N!6X7>;#{>*89YJqEapKF68jJFq@ow%lgHcUXU>_TAVwK9}lvR#Qd6t#SU- zZEsC{701o6r61r`19Ksf;C+AusF*g{0|t54G+V9lt<=dDF<<(sJ=sb2@g|4u2dMfF ziS_Ed(7`~LH6vFK5eh0Qxh!JB=NG^OJI0@oLF!ZyjW}RqwiL^Ce^BXNi4ntz$LTyt z4&RY&ttEzb3|MUal^+XC3j2nSn5~hEn~Ne#tyUN*c{||uR&_9CaeN!yuwG=k0zR_P zr~9m-E6dR7AGK=@@l$CMQG!SpSpjD0VksoNSqyh|71Ul*@8M_w%ih2z! zorDMjq=61kc?7;`tyEdpe)PjNFsCz1OA066ePio(!)@9&N36CrUaH`q04N`)S1Xag`q1evYkmPFt+xYetLka;U`Zc()VqTZ zqt+IRwBuW>{LfXC-9QxxCx0ZnzzlH1s*B_rCu;zP`jV`E4p{|}C1W>2%C&pgdMERU*f1%A9qKlT@&kPSWConm4JHhG66B~i)4svm&Z}P5U zDz<5T{pxI$dq*KxIX;>-LmK3xTZ7H%IOkI#xORI#ba+46b{k9GTpJ8J4s#u4$csQ` z9_uw9x`>O_am=EN+Q*sli4Tj-Z57qIJF#34`hBJIsjEq;pVr=-RTOu&Tle=HcBB9~ z)5;oarb41(r}U{ul-jqO4@!H2nGMw=1%;TVEm{ftYY2)?&5+ToG9qQOPR)u@tRl4B_W3GMdC2NF z(FVCKC;AjQjZw4U&9?-n0Y=#>9s#@t-o@QKh%YLaUJWLx8{x-|gL)aDtMpKXwv~e_ ztZnxh!~V#FV@dYOWZt*pL@lYG$Rs{dLJu06wqk)MN;H`NybM9XpgG<|zt}N>BxQ)x z(hkox(FUFXkF=sC1enj4E9&8%U@^c1K(!KFZsGqKLRUubg5u~OGfKk)H(>Hes;`F>1hZM>CZPSpQ$rM^<%x3UGw?aZ5LKhC*@8S3?9QaOr6C%1 zr-1mA0c-;lBVn35s2)M%MaD0YYCMJ5dV6|_@NTgAcmoD!+oH+<$^0^{j`^ZK03%^+&8MZ(<=;RzS()OHCR)?~mH zT55Qw(5Q^RntykktAkzKiWB2JHb3nMpufa4R;T-V4D3u7yzhJ<7>z7#*M5N&!Un{J zOLVC!HL*@T-3*0`Mr?`9(QJEayeZB!in)mihIC&81<^Ac53mXbhbNB2-!5jCV~!U% zRrep|WQY{EVXnnx6P{%^U^IyICVFGACc(#xlevmb7a{8I@3&>sY!-Iyte5aDGuk9M zz*Y)!gGaoA!h;vz_9|gnd5NXw)1psAJo46tGUUE=DOuj!#uo`9k_JuLu)@*wl<7rJ zJHqmNVX&nKG^x5Y?ocyox$dfw+RwiaCJLJGZz_Pbro)`v2Y3NK?Sw1`*r_zPWgf`E z&;W@#*?d9uch`XS5)ib~)$ev-FrSTp4Fe;IyuF2GRbW&v4@&M4F=BGsGWBX#&U{DH zTdXDnJ;XymGO4asKzIO(Rg=eCRB5qEyc8#HD7D2651;+y%-0Slk|5e`QIqE1xOv2d>ELh4#o`(;zCSQTNoG zxd)h=AnlrEcjlV>J`A*Ok|XLnPrQ~|I!1#)k^SZ!D*D`*uIB4vUfUT?v(bVNQoQ@} z8yS;#a%1#_nc}Du00jdtU}-o{OB&2udT<0A@hxIp3*e9?pQqPu-cPPXN|N>(xR8nx z_Z}ZC&Q8mcd(4K|Q8-)=x$J%%ct1(~^ddP5YrGMs7$kHBxF{n2vBUODyi=7KpIeB{ z$*ir6(~=N8aP55qqny; z?EQS;;3gXP75=sF!hXqww3<=|(o~X?;VtJd;t}TK;c3Yv=5F5$6XOj0ROR^L$Fy}k z!EWxAimS7J_hSPwPcLqI`#fW++4=eu4bB9W`TuZ7e&sot2?UhwDKBtQSo%A{|9MMBIv*B;Mattm<{y9MCUD==m5Jo(a$ZTks@ zzpNIA)(WZZTn05?AU7Mx$5JU&yp)v6!=KKmT}ht*<<%`}=OF^7pqyk_ntXV3AEjhc|b%%+U4Duz)8mXWykSK1-Ft5L; zkpeQv!oy3OFJ0jU|7S9kI@ja~8k^=@4>>|Cs*eoK*=v2MSnbWp z=MT^iM=&Dk9tZbb%bw-&5x$~4*C;X!IY zHo407n+z%)wh|xJZ1qi@wntD&g7h7v#A?B5JMrBZD^9lJX*hj&<;ATw({K2Y_E3`R zl@1oK6sVPaxq~OlZu@J;AK%u`df;7Jq3Mv{A2tVrQ48SIq(ln1ZE#-H{W{Vy)>yX{Mc>){$EmLMs3 z`qz7;lOFp3Dk;+Aotc}gLf9;i;(~}dDV&B0l|5Z}2C_Ay=$Ea6^!0T6D_uV)31?Vt(dQlv zLO)vH7&#|zbFan`{}EY(QfxByKAI(yQdse{?)#fYOEhD5Lp`pXx{R@YUY95A<4QTI zpXudopUHgn9PE-fG@vop?;rr_iYrYqZL9h0b}d(9IS{l+Rgu@wpbMMVf=Yuw1b&eZyaPa!h(DXK_O0H`2ptmV^6wjsie zR?io}PJ$Ac1av#Y%XzpGtp9V7W45sUKjfJo)uIhImG1yt^hzMq&4Lm*FpQ9|Kjz2ITZ z=>o$5Xx$d}OVI$=;FDo@yd+R}({@6cUJhp|=aYRR;g1KPl78_^RJ0_iU-z}Z_n7DO z9HmSdGcxU*gNA-pw<&-|XG^%P&W>-a^aADP$a#hD0d-H5|<=YMKjf+h7og-;l z{DG2Ibg9(bxbl3e2Y0N}O@$n7s@#;}5vS`92aV^&ri$N!mJ;YxXrw;ou!J(L#j1J_ znU4r!AhsgKk6WNL&JL-)(-L37Rt1#>3rt(S2ELQ#11{J74I#kq%i8Rd+=DHb-rbwu z)0+4=U~m*pdR{*@WAYhWmSqC%vJbwsN?IDxOI0j+wE?iROZ5*#*}%` zM5(&tgUfmcVG+sG&PTFewU5%cP*u@rHFwLfBU&1);~R3bHye&N8P3h^PrSYuK71kg z;rZJb=PW_d@anm~t}%imW{+9fW8=k#8yeN0on`c9lTwald2;)1G5viCJUuVjQE73T zehW84w zIsHlEk;-a1xaOE!;l)Y@&enAK*0~`JI3}IHcd8R3?6)$vKgq7vZjT0 zE7a8L$=6xAtKQMm$Z~i`&}+RcbdU4sxMt8l=tiGd{{f4<_?DkLih7AIt0d3)wJZB* zA5`c2Gc4lnheeeK8+L0*WDP%bAx#vh!>%~(!^&{VPiirH;~`lGqnF=!Fpwo_A9nBb zNAEWl>$dKphATH$hf_<;qt(p4rG5mvXvWW%#}lzxj7gk+yprc&_`t|Hf6Pa2Fy2`ZV4F(UYCg)#A^Jp&fVNQAHfVCYV0oZGMZ<9sDgp+bi23%?8T9^HKbbd2kZtk+Jj{FOnjOka-A=HInIMlqjrFu*-5{6Hyva&5_A z91|H{uFC%77=8J3jFfJ;fma?*qtpj*n2Zfm!2jLfoa8n)|De-L{vA+*fRWejo5VcO zp2lekJa!ixi(G~+s6#%#@K_&isCc@dZq>e|lbU10KXWCI!z?MFgP=olZSQ0#- z36CS*>fDi5sNcLL_SW*_=S(dGb>r+D{YH3t6-+9ve&}K0+kD-DG^HgdgoMi#s+g^w zg}&r0FyDPB%D-Z&ZkLM}$1Xo-i|Ib}cnZh!Rd~AFb_hdom4<-*G6JQck&4-Pba&9X z3=~1n7}huR-^OV3dDM9gBJu$D1NyT#o0f}Y|&l=x`AP`Jy%B4O$=h*O2SJ06w=#nNbWx77m^ z_baH7Y85&gs213zbX8(zoSkm#2hps&AaTw8&_w&a8j%?DHh;0dI?xv> zt3#t_^J5lJq96H>H&0IfA zL)hay1&N0melTI|0aGkKrPbfGFcquuD-k4p9Cs~Gp_H4* zP$s+7>-d|oNA0N5+?Kct3}5*Y+VIccw=%yy@@E}$_`&;&YAh2L z(5oGCOa}7LMiueD2=ho^?4ZHFc*Gj;-swfeF6K^cK)zlU2P;gSym~hlM=44=i9WiX z-hDJulW^mXyd_InrVreP6oQDj%TNZ1izLDJ#%qU@#5Sb)8xWT)*I1cj2`ZcUXXqo=TiD5xfK5e-R5~OKRLs`f`zR8;-C#xxs9|kc_hm}Gq8WZbI(xuo(Tjk3xW`q7uvtuqNxgBEIz6PE_h)y5*ZKY~zpIB7RtIy0 zH@eIp586LW)sDY;kH_vQs2zPrN(#H)>W@;RIy*s2=?I_yBhB6U6B4(rH5Jvq)j^25 z+B>?hceNKRX?qdP!8#^HU;nKY6!?Cb8%K&?SpMrPUTRy?-z4}9<9Mc`LUzcsIBQ$g8<4`^Mb@7y4u6;M(i8_k>}*9@kFN`J^1hMG-87e6pwFUyFC zYw{;dJKi^Wvv9*8!)mrZ;?)k|2Wd^D@B(yt>i)EZ#PN<15Wj@eulD5LjZ>}sD3unV zJyoR8HwT8h%7Tr%7Me>b8q_-=)Ajm?o7xjb9;^zch!Lbk)pP*gCW5sB@k*`N2JK-5 zH*rg}HK(4-kUsIg&z_xSrXjR@!-VaQ7i8S#@jp(dDDH1PWgrI0l0h-+{n$u!27oiD zz@+0YY+@sij6OvF>j_@|!Rq<#-zWNuPK$QXj!MZH0E=ITelIs& za*$84K{wrW$D6wu`vIZSHqPTyU?80+&aiT)pQqC6XjF`cN8HbsGmJqi!dZRdMcO1F zv3B5`tUqX*s8qr_{<34yt>@$iPVl0f&KhefLh$^EJ^AaJ&qMl(uQ?kpyfi8g`qH%0 z*Qn9J+gKXNHOIL{$&v6V>S& z(EFV^71_Pyt*}qtU1(E=Fyl%ycE^v5KH>I}dNd}Jb6=s;5VK!%Zr|y594Ox|U0^Xk z&Kuh-REvA@Njl3k=A~dAEZCkAeJizPAJhfuXS)H!Sz_Tg2KMV6N~}3;fBwLoQ*t(X z#G912Uz15`aECU%e}x+A=p4DEB;#>5`LsA~78pm)(wKcqb|AL|p=idS5fn`HBC2CF=zXpR1Z<`D}^lln-Or`Xm`G z`8sCy7`Kcb&0#La+1j``=^S^Ix0R5Om0V(r%Pbe7V1<=aDB;#FS%i^Grw=+u0)E}k z!S%iVY#*82^~rEB4JKgL=epkddGFRAFMvg(ZUsxX!~GF860t}s$*M{%?NQ7+*auY1 zf0QE8f1pY9T8N7IuSy0D4u)4As#cnYOFE1Cc^v5A@61&Hydrf#DT~a2GehXSm!}=& z`aR@v*R_jTYfbNNSi=ywR`DAE(9>*yQ*CYKsIh*#W^XD7hTF@vb>7EZSaA&H>Ac~b z^r<9?LM!889B~aznJ}A_T`pdic)Y%I8;RFzdtJObZ4#>0uEn`Z`MR87>U;Mtm8>sf zee#uoSmeOM&iQv47m&D?LkeEiXB212fnc5yx*E`QyvAmh2+Z#ad17LKhL73dG`PsH z^V%fOH<`!Cc&s3|0qLqd(C^D0#d_P&oMV@Qcd?{ofCp<_1GUTqAGcX~+yOMpxIQ^b zUBQKU)#im7va&FrD)^-A*{K=y6g-<>Wo-P3`gbQH$qVC>&G^w`-ZaiQ)hw<@@}|sy zjD@VRtDAAy_Ex9-#3HE^;ir_3(BV>+Dxq6r<#B5-iH7u1#nvZdL>u@|#)j1VWXUtS zQ{v1GLUHR{t54CFV~31hTT67{WDVp()+KL@muw~DeybU_7t5@UVap|v)4I_XE9+0K z_$;e%#~tP8NqvaUH#^|8z!xB8YL&@Cd3}-Y&g;J=-2#1ono~LyWIyuOO49)wy{kQ& zE)m~`kD}}8(_m1|#zu}F-1sNd(SMnT)0D8zE|YYv=&$lshb$1FSEndHT$kvS1)1IB zO<#8EE@x-cW@TRnnIgL*>S&e)BMrdD5MH&ZgD5_dg61V`jGUWgYGg%MSL?OVAL|Y% zt7&xIp-~Jx)|_HZolyC%>_=zFWUn~% zhL<5td#ao3DP9L$V9^`5BP}La!4-Tced<~ zZOx<8#Uw~c#;}C6mnfsbO9UeWR~FjtbX3Y0-yM`CJCRx{?KMEYi~w%OjZ`qi$YPfb zOFcb;tGNj1U9LQ6wlN_&z~ig)CkygD@qH( z?sfGkY?ko8RDfHV^pz2_S~^7EX=)tP8}Mgk`^=-H8@x;##-F9~k{SswOD(D;31P0< zjtVQ5KUK`pl;)R!rWRUwL8@mWbM{tPDTc?hVLv<$8iMV%Yp)oKv}y1u$l);AapbYO z@$I^_b#%y^Dt+RL_n5oeuRdz}sDnhT>C1rMi=dRnOdndqvx*RakDFCC;t1H8O^ZRO z7>sC^ux+7y^H9fQEbe%5)=Z_K4CeJh%V)M*nqd9UMqHNEdK}dM${k-v+IT%>T}b># zc%nc5-o}d$Hw?Cn*hCJuhJ*B1 z(K{oiQNDvQ4LM_Xa?+~KN3pwl!mB&$Sh|m#P9@b!dNo=3T*4WY5Mi|k;yfpEbnI0I z<`YFKE)o3jz~$N z{=X&C2btMU(|F|CW#fwb+lYnf=YPrH}m`F3P6gNf&ZwdFCY4h z7;w*iXH#CG{z|Nau5?7vG% zDa*O^D}Q(FmtQKR>oR@CDcvvmQ>DNE?T~=|zYqU^AO8Q$g6Mxw{@<5R;FmYZ|Aw8v z^ULpc_y5nZ;~KF{dY@o0I6H|H(=;jkaCI2QxEF5VO zliTkO^aIPl@V6irY#Mr>dubm9E`^ZaZxWK@e=%3JhR1W-QUHS-vC?02zk7_Nt;9)6 zytH+$-SND}z$D~O7z)4H?$Pc-KLyhFdT+CGt0KBkr{NmcMp6G;U|rxmwLV%S5%oOR z{>^n{YFzovH|S#hQ{nv8UR?h5f9^K-fgWl(UP8wUMv(EaZXUPO^@V@uL2n4&+v@0dQpALr0&u0>KRa(2ng6*bM#z(H2h#<}!v}fWf(DO!=2DlRR(% z<4>2tKvu_np}!xn_J8XE-_T%0?ojIcWg!MLF9BO704B7N)h;DccN)M@{o3PXeA-OH zhFY%FLWAyMHkY5&B#=uyy$a&vn!w;s@@UfA&>+IuIj9WcP!U1oA zNphokmV8F&_Z1*GeNj4L6OfA^p?%(y__7FS%Eo{r2ux2cQuPw_2zZ@zuZy6Ju+n3z}0jKxx1v92YyY^m5?#O`Vnd54d8ub zgGu=DA9MA~=ke4oB>v8?Hw&8^aHD+ z6={#-F9;a@UC24$l7`R%>4iVB+uoaKq{U9NANT^Mh8yFhrQt3H1vJnar0@<*|9dBq zem8qyt4kwx)9HAzXMFQ*fl}Zaeo{u`*RDnB z6eG;!KP9;89&-h&9w@WroVThDd?AI56WB0f*g2r)vIgk&O(4pb;*s7WeGZ>CewIJK z09u1pr;+bEjkZiJ&zBkmk6XV`uDOHh|4Tq^>R$na_BCFk2QyA02>z{kz_uOF+$OF7 z%<~nbZZSNDXjJJDx5I#01g%Pv`6DLX_nlo}Lq{Jtk#cymG4o`gJFv1r`s|5AfBum# z{yURQa9#e#R-N)s0gG|45{Ltln}nJ)C<&ehKjaLRu;y_2QUkd2Map~@4fBMW&MV{< zfd8FsLfhqV1D+Gmg0G%1ZvBf_3e@LA#suBdz%Peuil@Ph9Xn9GC~6gMrOM?=CQN# zKt|~WOGwQZIP2=oaR-r-Y*pGWFWbED?q|+x<|sT%lnJh~>^igTSo6R{Onc3XccYNd z!JXR^9-Wia#@G+cjVt{Zfsdr`hAW`;$99@QHob?x44(nXWT}R&;UV3XQ^spPK<)U& zcZGAR+%~O{%N#J74|UK+*_rMJ5$f)7QO9-QWxZnz|TD{PPC32NY1tK+hT4~ zD`uxp%gbadGZo%5M&7RNr%33$yUymjAPBLy$OMP~eNBS`kc{7RrvJB9tADI^JbF7= z^z#)d$RyW(!xRvy;@e3K!?<~02pAaVS2yd-k+~t)~-9E4`qn+g`T`IvVW7v@(K}uJ&aoJp`jjk}eL@{W> zIP!guy12InD85JTgN@fp9f{nVW!@EzTT%d-{|Z(|y}+tkAai*O(owZOe)H(v4%@T~ z@B)ikoTS(-r(#p36QqDw&re#nla10w%d0cBrmP<4uBw1<4)ppVz^F%z#C7{!b+d;P zy`gq>0UMZsp5O+r0cV`T&Bzm^Hfz6HJCPH^aHP`F(j4FR6evnpz|Q&nkJ@?rJeEe< z*5$@n2Mulok*)*_@R*xUuA3*oRwEGzu*7OwwcEm|B3e8MemH{lajVfMWk9=WKPArC zjU%iMbZySODOcTVJxO33cFQ5|m9<6*ZWNJwS5fn-5Yge0@4;%rZl*1Ul1fmm96!E> zXk&P{WIz9jU5wxt^X-rf`Hr=&F3O|;IOSozx5eK`)}I@RN_IIU?;;Y< z+8QL>4%rBiF0WAOC4|6Y8W?nzaIFg?z3F-lXuC)lwLLc~^lbzg zyWJd@d)|T7XfHq=8J1|*fphJ9j|wV~3kaECvuK$b=2UmvAKU}iIy!PbRT6j>PQ*=g zd<0pZDgU0DAQ(*{{29m)>{CwO=ch~jrk01aUyT^{9v+%sA=GqAWcdb?yzv!w!tY;Q zK}|TsYE*xxL;TDSpHdcxg3T2cbgay$j%DNjc?#7aOSZ zwEHAOCQ(pU*u~ca7+9?UZA%(hPo=id;DeI}Olwz9fq#uSEO^o@Qu`b@Ii>-n{GdtO z8DgcsrOkE@8Ipm?-Z-`11BS90R3yg(5Yp&pN}v2_AXoP}K!6k7B0A^u^6&}<8uGQg z{)jA#LW$UyD?kDL!uEy7f4a4Gc$c@JxKvpjG@6OT4Vft^^2x*;2M07X?ss1!bYeTo!bHuF9YKZVT`F9E?^LFfJO{UAz+b zJ=`Y-4c@U~6|0QEKyg&I-+=`tKD;J{&*Hu1vbzny>S8TmU|F#cp*9i-ttC4K6uuQ3 zM2J?R*NM(5(B4bo@N9eLmICQ=CiswqR+pDNI(&kp=B@xNv(RTB7{)5cpCTRPk<{ax zF&a2=G#DZo{&513T=RSgEns8WN#5rMR$RC(k> z1r4S&1Idsw&ZdnjA`Y0mmh^R@@e7gG?>iA6$C-oE%8c|gic`qu8P~Z{xKIzQkuxkN zD)nJNS%m*@>*+TFo`Q!n-neLwM(lHQP#?B{^kYB zA&0icrnjvpekqc8D0rjflM9Asl5#gQ%ZW~22&neMQ6G5D`W^c%%}fkop+HqADaLI^41>8KxPwN7d$J(c5zDpyEA=mEa? z13eV}&cM~amv8g4)9#TTtvB|)3Y*toq&twNiSWZHt1Yii&=-gb_&ML`=OjvKLt?*} zG9gWC-cJ^>zfZJwMAhJ?IJ|FT7q9D!O?D)dKVa{d5~UvM`aHj=Q*qshbVV7F|FaOc z-KtN1*H1TNjmtIZV_z+ixKH_Qm*_NVj;*V^IN!s>r~GZ}ENWiy9qmT=c3<<*K?3F(O} zR?U4n^o{Y088>W5LX{CU%nS?V+9g|$-KVP*h9wW+EU}S~)MU#wjx=W;W-p&AQ?!{f zXX7+le|x$`83B2ErM91Vg*j5!G}?s6P3z4y)qYO*Z`OTS4Ar!KR)64gps`l!>c8#k3UF5rSFJqL@GzOlDAUgi;tT%xnBzn zG9VG>dytP!DM_C2ETST}fzssJ&DeZ9C{}(NB-0DdM|y!GUj8$OgK!po9z6ClxZsA%#$y?t5R?6Y7xbXREi@YJ_J%`$dTu>W)9eN0(#V(vOXb z5(TBE|dypR_Suxad}A2<=+r@3)7Y;v`5+ zL`CkR=q_E9LLn=f=NK<{MVtB(SwdZ7IcE9ZxIsIU+hYR_c!*wt|1ltXuafmnt!BO* z?Y=zU8%J4Q_IV-1+d>6&PODxA2%Ai?es$MKOJnHD)GNn#3i<1J#Prq^5G;)RX7EkS z8Y46^bL?oW*(%@`WLL!`TSME ztE0Z|dXZPa7fgY$_&;?03IEKf$ZX3}xL|8P=ZEcA>iO|q2214Tr_E4l|0{j6$Xgoo ziQ`S_a}p!khS%jkHT3wezJ96oNX)<^ADf|=e2W{Q@#a&=RYbs~yB?F$s#<~?leEJ2 z@xhC+)CM38diofjhgc0R&cZM_)Y}menQKcfrSH2=Vq|tNLd)yhsfI`?2m)^>g7qJotm=&0(p$3uLR(|tcY8<`_U9d2tCnf8$<|%4^ zg}rf(_(U=$V&_h=%hMXhWEYy^d}8?>(q<8^4!SE(;&fYV#x|959VkplAM{$TKNomD zS;-PC^4UAsl@G7k#4yOP#5Grn=kwzof*wZ3@WsBWUX5K*CIMA3zBjb?g}9Wfii`7s z2KT^1L(_F56yxQrml^8}5wj^JKiyiYX)|qt z^OZkF%C?>axhcs%`RRG4TRN@W5q>;Wf(w`bK)NOUB0f@Bj1g%o)(5~3qgjb{gK=r? zHx9rw+iq|v-X#cWU6Q4eA)B%s>~Z?U9@OSi{AYH$(#ybpDZZNcZrv%kl)k(>;@j;D zpb2^$|4r#SsN%`WfoW4OKu2{Yu@8^E~*9wV{w6!y$ORVUP8CMAhs94;4gp=^c84N@L0`8dZM%7=0KK3DdZCX z?(7xZ5RxuZD9-h_UKX(Gw-gohlWl~`t!Z+!(+Bg-CSIqO%v!^bd*2%*;fX&0aaCik z%ptvEm;V}GB@0gVYizD5+oYA2d(S?KJ%rg0UF7*i8PYbObHO-0?aUhe4wVPxz19{) zZrd8rf}8 zg*+in`PkPcwga}{9@WT?L-nn7=j`cK-;~f`d@P1?+}D~dnK*?}c2Q<#JG|$dUtH^z zZ<3LS2fe(h>wE9<5l4P}kWb=W@RjSg-*N4vc(aUjKG2ZNA>KW89by^uc`RIEWeDC(N z*GftcWOuDI#}`*BJPanj&b^;$(tZ`Iy{^AV4!3Q_tO%|Oi@T!d(&`{e-Ovs*_dZ$a z86tIL{*z-jUx8b*w219>dWDg4Bf_>u{-MOrWVeaZY$FgZB`x?vrPGx860G%Dtz#qO zl=Fp7YWGRl@Ynq;P{pGrxwq7w`-X}0rEv6m(jN#vU|{`j;SSRjb2u~u2jBibJ?J&Q z-F|`lTzXMZP&@d3S($npVJbR0{ru~%@b@O@OH}CugDrfz<>9pSb6~Xw%9na^3$)PT z{M80C3Jr<}YOcW^T~8Aj6}^A7gwD%&-y;(NIX8QW-2SdWYfFYjOxkwDA}1cDo-dAf zL4O>Gxw7z%E>F>$A!Rny4V7KP(msBYZM|8@r%-b8u~(qYoE@(^UgX8*%iR}Y;O&iC zN-$51`aahE0KT^kf8XHX$1vB})e=a>rWO%%!trV(7qd)!fw*3(#PuT4D$1Ebmz z%>s?kw(aMZ<1_ADMXWz21CdF4!sltk$!BCG!|6@h-A+ah1z>CP|6%VfqvC3}E>Ya2 zumC{|CkY;qDqpaE(BM2ZFmpa0u>@!mSD`w0DvBea|`HIodlgFD$07o ze?2&#=GXr=(WTMx`0{YWZu}M|okBJyb*VC_3{ycybF9XY@+BEI9c3}ciV#x_TCDiy zX|~ilj=R>Ol#{FpIr%G!j)H;G$sxk9?EaB+z>2$2W8eO)>4ND4(?V2OZ{Jrjc2f0g zc(hm2^|{i^Cr_Vv>B`$A`~EC2e3f|+Wzjc$TE=kB2VbUaaxeCdB7|%Bz!K8>_g%}c ztc}iJRB6obkUni8cGXzhi$IR` z9#1$gsoSj`F9Cgwy1O+a)f4nV$|O_uNg+i(g-Q{MQMv&h4HOdiJmAZb{wv8$FHGfC z@^*7L8FYM0eN?Iym%-TdIXRIs6*If3^=ue%e-y|p_m=bpBgL=ymFyqj9Ye{mPc_OG zs_84tg!hs~=)0XGoZd;)l`_Vs^J4CnKYFpE2OC?h@c>1iF%pNpXu!giM@a{}85c;I zu8f(xv~`Gq5zW*!7zbslD#ER}q9N++uKovep%}8ny0Tof=G42qx;!i`L{Y`;Y9Vx+ z#TjUtMypyNCK2TUSsP5}KBXReQ?$U)Hn5=LIB1R}uC!W7y^~xqJ(i!*Av0u1QQYGi zPopVAc)WhsxQgcZ5|p56aSgtZ%z{w#n=Zq_lzXF)3Y2 zW^N}u)6qWpM{Vxxv+8{JA|jZh;>pSc?b<`lmBRS6A_pZHCOE)(m9|}nx63B+f{l&WU4-_r_DjNHI#k?9>(!SJwb~IJ=xA$JI0h9T zPD=IA0U>x|{00dXR736_VvU z$Gi8eAaeq53@_>_YNJRUxlmv1)3(P={WQHP=&Pjne(`>$+uPF~I^cMa?zYWB+FODD zc^^j_pTf8%ed-20H_IlMq{ z`ONp|8T4-B@?EISJYv8tRatUVt61_l2*Gfc!@w_n4VT_4p2_Q+$?NKhdDaMf2Td9D z`dl}5bl>Y-0nsIOT-L92GQ!?B!#D^TaPIqIbmc_3$Eg8{fXr_(7Ai|*P1QYE1>`b{QW!iY%icB)r=Bfn_UhOHzCrRHeQo# zp8DdLoX;KmZl8WFIlBX2^SxTzeR~}QnK3>eJuZ;kCY!%LV7M;(9zJidemY3AaH?e1 zl_gnR1HBE;JgH~@`o%VxxU1uph0ubSAP}OihRu2%ix4VT8jGEFpnGv7);d zFG;<4Nzew;iSKm_&;}XBCeWT~Jrh}Ea>8Af=k1u)&f0#m@=L`MueaI(Hv#jn82o@w zsH(=@!7-Q-ZV)#7j!HVm^p;*o^#@GRkadEHGMP7F#@al?X$6B(I6Hx`)NX1736Usf znQq60Col`%RVn^uoEnG$@3kF8`BClaoOI<-|7r4f<|<8og1#59$N1SKALJzi@<)S! zG)5F4>d%ZgbmVZpr-L5bM}Pd%y;FUMZZWOgAuooJ8O7<(~eGJ&fz?aCi^DySL)~N zH1~?crhJCOzYsj>xjB#mMnN}Jm)t`6Ws-q*UKvWC$3$xTP?4J&uym$nZ4b}pM9@Se47X)Je6(eXlQ7ldsn-AeMw$DNj8GV>0IATu8Qov3(7Qx zt3vy)Lul-H0r!(2FfW}Sc%Qusyo>l4_u}}$#nqCt?`Ycm%Wd=H$#~#*0vRr{ACjd`_wqWqOX72lo$!_3{FRb#-PB-A z5g@Er6CQJ1w@F_FLMGejL)Opce3!*3fo!0|PJ3++`JwkBy>I~ekEZ5HUv3QSz3Un4V0#}uy-?Y-gB z5+mng_7MHAx2v8J(D6N}E>5y_i66gq=6RFMmjl54G9Q}zw5W?wv;Hr0zoQ)=yjN}XWqyp?Sq(b4XGU|? zMGh7!a09Q?knnoi{myM=ksy92rpiqgsrv$QFU{hpRvszhF82k+{ao>5fIW`$A|3w4 zsX+EKgB}w7mPg_DgSJPh0l?Lk^KdF3MTK?^1u^6#_ifJ0Gq=c76O^@tZL6{2EB_R ztz?Z3(AKA&{A?Z42oc+c4!WsiOI_GMAd&+O0l@t0e}Z z^h+mJr>d%o-ol3axq(N(}SldY|4>+|jjC&%?NsLmR z?fY;nCxS@#cG@GK1*i6m1Gl?M?m2m(jG7Q(D6gEXJ)5p#tJ*( zp~Z-5_?|IJM5Fj78qs3o0w#ue>a>?iMstnFk@AmmKSMlo@mT6Zj9jUEeoW%zz61JV zL%mz>am2GXp~7pZRBm##K@VTeVOJ2OXg3Kui1&X{O(0B}^jdSm+^c^&UXOzU2Y><} zF-OAQxKt~HoKt~4NqN$(sKOW(FQ`z3B3QLb#$;lt9Am1ALXbe4s~^JoCVPo@COc~4 zxlYmf6H?XqrPeJFOx1XZVYMwNa8?dXpZtCG0emE^0xPpr?ij~Zo$Oo4y3(v8CmrX# zTR?%Q=l81dBcbo-)#Nm5Bw^ zdGoxfdNAC0UI2#kv7&@LAkyM)BNrQTNVe7Nv<61Gdf&nQ)a|h3f&*dYT_=W_w94A$ zfQXTw3dBm`1-iy_uqOsz9b@70=I^`!_v>l(M}`t~jc_JVSt ztH>B4+;ln9BsoEEy)WF?RopebO}4kg0KQn&xcXRUllA!;D%=E7|2e)$QQ{tKU6nSt zLpEAvzp(rA{N*7Mgk+SL5ne&h6A9tt&`&(xU(T6vJ?X`FU90M9q?z0=w~Y3}QBNt#xUuH;VQ!ysLSaR|oTS+2&`{;?|EWi3=W-a~quHtS4>6?V!b0$v_=G@7k|d-fn+L|VTH26 z3V>%h!=fn}JNmS{+D788f=~biYL>yrzE=Zz+m5}x*waOZ+Jjkx{@r6K0CGRkN<-G> z8QR8z!W#tcfr3k(@as$v7zcUSKmvcu$WdmiXe_j!fsIH*SVC5VFLdlGIB(;p`(WtL;9-d=VB*+sW+6z~+F(ZIeQf~S18IUDsS?ak2r!ej9!l)@$ z$%DL=P(2Wf&P+BViSBlWBms}a%EQ?b#TO=tn{^8`MD_?lcJqod*B|#FWA1AI^K*^n~ET3{r-tNj8clAgiDGQ|W#LeRa6qh7` z(K^fygKve@iO5thh|}MpPeh4Zrqv_@06D|^sbDPdvIE*S0B4pYn(=vex(U^r#RW0o z#AiUp-cdES55-lpmnfRQxKE==F7SzYD4Oxt;$QcA*u+T5xWtR(87mN-Qk%$oc z(~ug%AFc_9?9zw_C**Ioyv)qnCLh6VZnH05?z&0C=@t&?pol@=Mns_WDLR~M0mila z)_bIS?#&)Utk2p>Y zsl3p6og6FNu<@uQKGK%{bv9t=`|GZeRp!-U-uc&l1YiP*zWjxHuHbrIdUvfO z;pa^)l8as9_~$HuIT?H5kuIWhd@Skna%pQHM3Qd}f&%?;yM%hO?5JVkyrENSN80sc z%-!=9XxoowV)fac|>BiuLGbg)Wjf#>?uEn^z;_Evhl*`$|12ewp{T*@&j z@5Su$kLopaET>z`Am&w~Ctbm(`~420Y^1SN-Fynl9m!zKU3uCM?BAe7N1R$KE0&RH z_}nqiO8hP3?Za@xe^qNZ2q&wyMS3Wd3fOO=q-lO`K%p@-jnpe`HfX{{9ahN95n?dr zUp`a`8;-lc@zW1Dea=cyTP|p%2ziY~(EcO}quY^z&nWhDw$v=c9aVFne9gS?7_IUQ z+XI7=ud88=`ITNt=QD58juq(6<#n5&nqENAkDu@!|#tJ#{v)xqsT9?VO55j_J<=;GsqUJ|E0>=!K={kp=CFUPERw%ag zm&9!XKnF%h!hi>@el|GY7x4*vx>`(8%?$Hja10L9Owc8@sGM#}e@+@G=NQO+cUII)xMe2K4j4i|JxRE9 z(y-^<57^Yi=VqdtCX9R=YJhiN^Z~ZYSu9AMmDpOT)VBEs@`nbt%YmIgm$2rxGcC9V z1zr&2OuJ)##&fmIBXj~(8PGSKmZ$rn(l*=BD?PiKj@k416Cq&o_+|gAB%n(=U(0*? z%zIs)fuG^}v}=y-O7`9q=8na= zGNm`|W3Oi!NCNJKzv5khf^%uR$bGk6wdeWai$RyTy>a|ATu9SG_}fLy2iXw82~sw# zI0Yc!FY-tsYCNEm;YN)sS+!zM*FmmW1;4w5MXopS%OQ}HC23I@tFjZ|na~Ft? zf=>ar2BQNhT>&wTj82&oiz-CLw1U|)d1YQ8Z9DPckqi>VifTw zxiKD@GRe4dNO=k9Lrt|8ro2N6b1{}GE2_uPnc`%2Ws)dsAIq!~9qD9_(ZIEMLsign zNZqP2%VGC$lPP4L8M1iYUmy<^`=nI@B-h~a`X%b2`BatdBMPzP&}x$Sr{q_LsXzL$ zm`Z_rL7DTo(MfCM8{S#Gxgd4pc!E#7gz|1u6cTR~7IBVHV?|wSAg{Vdnpv;cqD@;# z@RDs!<#HY7);`w*9zg-&?i;?AOCT$Y-AT{2G+=7Chgc4&U zJhAaYnBQOLgy!q%QxqrrQp=-x*VE0H{S206uq< z&4g5a$jGQ9XoSYmE%n%OaB@my_Q@fOthZRnnb0C6*)2^k7QK%3oj@tZ!dEfYK#o6 z2mfw$5Re)^w~Nw`EJBr47za5c&>*g0DSlq|haoejKgUnsO9(=W=@mXnb))C%l*qP% zP%OIXCcnl`ihS3db0||+k#bN%o@UaKXa~8NoY8OtW3Lmi?2x>UCVr@Hi1@>C{0F7z zANDL*+lX8y)KT+!U>rCk>C6EA6(DNxGXtDhj2gXxuM|I4i`X`p8uOF&GW(`{Ns1e3ug`NLCR2& z?7tMJ^=O1<2d6LYuX^at@oJRZi1PV>Fps>bDgbEW%4w6#P~m_sVm+@p-~~=vM0tbd z)^dJM{-#BZbd64YTRz3Lg`9o~8*R|*aFoqEeAOLiiH$DI1-xAyQ;+ic=tZ+_6({Pj zOs0u8L;M$DXt(J@g7q&e1!2nSC>w5@D`6@!7V6?Kg2>Z*mLER_ZjJAE{O06|ooC(6 z)xbnLeAeWJjG*t|@?vf82RMAYrCC}(42y7FGK#1Kv@ zwy;@$ZXWQoqv*7{VZv&f+`T}}2yqyiqfrnb_m%4-iaTM5_sX~_^DVPASi$^kU*m@w zwYOjhV;pwht|9w{)`p?~>qeOWGECK7n^kGz6IC5~YIEib#i+rcT;hArRb_-HM#LHp zZ&e}9d01zRN4%>yX!8xZYU_Wl+P#{8%c?cRcjgN=Thd~&GiXSnnz`g$bD~c^MLqn& z{w?G*fFAak_aUeT-M~<;V#TQ4N~qSc--tNm4hKU zja2-C;6Z0fonkL;G6}Hw>l#CJL7PfqScI!Y`j?0YVWH@E5_{gcuHv{R`tQb=wmF-vmKNAP91(#i=g_ zvcbg6dK1cLH59t=gk+b!q{5dNm&{_srBg5%9tQadtfAw>2(XtXR#-xP#wdVt4uG+s zhkPev?BYM&MM+Zw___64U;5L33wR;Y$3JWGII7>4n_gcZ-0eka`i8YrBR{$0J$Tnr zHU=yfXnGGs$22*nb_m@RV3s@2u4i*n@(Gexm$lL1yUqms>ls=s)PFGT9X1Wzkds)h zHv&@TJKbD~1nA}O>&MV6+t)Q?88`1EiS0mR^+gpaZ(7w^GOQF4451DUKOXri(iei`1 zVdw?lYUOlNT1>C!coz1$`Yp=WwhnId(H5E3+WL6GAf{kBL$rb6#1JsR{5GM`mP-_# z|FTKmD@bt7(62U#pP=6INI9IJC1kaoT|=$x#C|>N+buv&b&0d?GVHpe#s4p)MemiM zdw&aK5oOd1%Pk!g0ex=3*c{=GdzA!Vh`E4z#6mL)GglrX%_DqX-`iZlk|A+IVW2zK z0bnNj??4wui}c{E5iEhgr=zLqT##He!Mb-<7HrqqRY+G8{MN*H{B6<;lxQltO%jC+ zwi_9+50|zlFIGO>v(@i))Zag;7x1K}kNq|2vDYkBbzZ;9S=?#pb|y!rknu_s$uyWe?*hq7IVJ zQ5cVP>>~nw*l#%z=-tJVk|k~K@yq{d9mk8&Obtjkn{p?y&#@-^O|TzHF;qGXh>XXr z#0AM7to13ny2$M|KR+#-sW{ZPA!iKuy=ajs&kR%9=OU~L7iwqdC2js z3~$Rvlm;3Z$a?$u$a>(YJx=ZE%5g*vGaK&2(YGS*ku%@jU?KrF@>Nv6ar?j!W|Z`J zfR7o>u3sDtq`SX1FZH6~BXbuRH;}YBC31T+Xx9IM7naV*h;xo6BlmAHmdo4_VwFnp z^FUqNjgbXH6H%|Oa^;`|oa|IcxfRu(0*!{^3Qnk|l6t#N#*K5zhJe(p{tJg}dJ5>9 z2kpMRA>LaJrdo+$%iP*g<`OMfFwSooCfN}eIGS9oi83Pe+1WvDtN<3;;_Y%kxhk5( zmAe7p%p?#y(+6SVDR{U8XkX!P$C_NE2VYim=Xl~Y1OswgkypD~1a*s2PZAO14GkCP zRgIL^Z<(kv(h_TGwl0wZf5Ii%Oa$dxXBz{Fzzbo2!^9@<;ovLTV1Cm#F&Mk z-bXUuq{_#<9nq${klSl-VzTZJWj(md+8Bt+akiU}FXDIZLkUW@dJPA=psj8GWZ=?6 z_ux$4kuV=33p*=CiP+Y>5Ov;=xm&U#E9Ib!T;BuPHaruJ6FaVZEj1G|>TA=hafyFR`7So$1E*rC3L#Q*niYSk8kU{|P~Jf4Y}-)?_uq+vEUxWLN* zeaEbgz8MNEX)V=F?Qh0(LdOSq>eO#_(tes>Ani_xY@|`{7U)~%i*r&J<7UBXN3KFX z<+o5yJ96j2QIS07i;p=qP2l$t=$e3kQ3_Z3k2N*#j|E|#e|Tjr!-H9#(M8~aro}KX z%v1$M{}TFc;((E%ni=pJ^j|96tmpXAH;T=I5Ipz~g0usQ?h0}86(C~>H|B{9hrJQ$xt!>E0U&$Kzm)L&7@M6x}>b|#mN_zM5eaj3GnK>=4|QiP+aVq zU^xlorvm*%Vnrxn61+{)p3a7(aI04yi#>jR4^VunP`=l9{{!>$4>zqaX}cg}(ziMP zo$B8eJqIZBKQCU(KdE!I)c-@B{~zl7zl=I}Y4dWLD)@U8Dx(f;%|g56EZ>zLngQ~@ zFNm+3Fcz*;a91h9Jc8JZDwux<-T1VoBgt_Pn_%@`J6NfkEwo=V6+c)AtAHWC_lpDJ;)f)@z#t7IL0ae%+D%`jgDbq&! zN3!14(oYim05g?@)$;B5VxmJFZqpdgL4zw!jM5t>^`8RhZBlw$zUKwt z@)Ft@oXK(6AZ1*Khq$jV0&bxeLRdkD33yvI)Q?<73X3pBVtIgiCSogEciDP4T1yJ(E<~3@_IpNKm|H}QcppZFv+;;k++ zQ9RNIGLJufi(;SSUd<2bV3glX>VFhq)#Y110N(7sr=e%7fTwJnz_xh?&I-Y2XAx;* zljh``F>$jX176D9nmdkIl(AAQZn}!NhL%Emwic zU%FnKq;_4n4a*~!clEFG{3+5i|L1j6o&(AO@r&{N@ z+-2weXSQ}HRi8`ooz#uay|rdwI$=mhfZBs<;o0E^wn zCjP3P420a11IY=(@K>Eb+FCzRdrOd@eD0=GX-LM2ql+M3AV{j29CLHc0vB+|R&#N2 zANk?1#EB363N9uGAqh10qfRgKKJZ(a{B}CKmO=?NG;_?Mca*pc{lXnN>Q-X~fR80d z(B2)ZqKb{w)BDEetMHN{xG4$hRPK*+FHSrT(f#Fz-who~P1n>;hxfBpUPOk~Ro!rv zGI!bM>hXhS2u}ys#*(n~Gd*u2n38Mqa_KAHPmEjWq(t*tPW`qiM<~&UeZCOEvW_slNsNcSl}j2eejT6&3xk zU~*LW3MjHgQfaSUFI}|l!SvysLyC0&%mmlg{EwO7nkWS0D}9P1^cX&+U75`e1;1@g zuT%5tzK+k0<>v&tiny(|R*qk=6&U@IjK`Z#e%*ANMG{|xp&yKi;xXh{cOCG}f9aC) z-t{Z4S2?w8AEaP6Be`G^G{^Ox zYCw-B7^Wt(^+mw9Xo;;H;LI#>YvhXl5_KR3>SQvuHruk-)?R9vsS?kKb00ITqG}aP zFq+`_39J&?{8Y9{pDwgRhERq-Lgoo&|2#}lrc&Oc@WM3(&5%ZTdoC5OKx6Va!`2$1U@$mmV3p39HmII_JIH=JrDe5 zIA*@7b<}<>TChYMa1p%K#6E*GXW6aG!Is4Ug4F(R1Sus~wZh1>lZ?WBXW^`ER#!{a}s)*HdqcnEL5W9n0>iO*2$D^Y9TjVi~h zdQsoq^r^|xYoXO2`_3?k;(Wxk?-4T4K2O7&lopz*^sbv~^bVaWGYg$+bPkpvbLE-p-FG2KG{`eZ1jCvh3*fvAo`v$kr^JMyPR=Vk&d1QLCYD=Az%iC0{* zD&@`oK)E^ZpcJz-+spEAF!QGhbMF(9tljnd!_J>p0r)uI5VXW2dm!?<{ZeU+%U@ct zpZo__uCn#V6!dy-$KA)wxqq>6v2%L?b4qbNMXwd0Ik z_&d7mO2*^)XrI!ui6r+M>Bp)R*^R^?(70PyoV0QD<(V~R@7=``-5P|;1W~!#c%f@| zDn+QbRc8OU`&?1=3#u)-#+)%0B@D;kzeCQKoaXu}Aqyfu=#QAB@NZjfEY@}q|MU6@ z(8o>S0zj;rFWr+lTFcsRScE`bD$@fEAW^Qhu}TZj*0nWW5*}bmu6{C3W^}@?tq-#7 z`ei8&dGyvEuZ%#BbGIA6-J3+6zO>(IX+Khg>Z9L|mmID~1+!bOKF(Trt)bEpn~Ye9 zW;b2+8C?6NOg~|%?zf5rPk{UdWrSkaGyvTX>K=FN(`~Psij#Vzg47(5@y~vN=IPhJ zk~6@b=l{VEt-+OXAN$;Djq^Ti>XUNYEfjS-E)?}WuCR%#ij6E#24ZG&#|!@*SdPm? z@tpbK?wtABuEC?!;~%}nftXQLrfq1bu;j_UNYeHW3R8e<`+Apuj?-c|kDChof5(Py zH}DNNTZJEKbH@~{ikR{+fbBBqEtq@tA+3AVfO-u1f;kj6oI@`=W`#8NVz;C>$9tWL zBv!K%n+nkpuoV8F)RpI8qrmy)=n(VyK7bE-OLB#Y%H>LA<@)3wiU7=~ zCSVNUe#sw|qI8enN0VIyAV9=%D)G32v{$2=MIlej=Po>7f_3>-4ULvvuE*r?#*LXrlqzyG^ePKgV93|oD@eE6GfK~cwUsImm zobB%EhkshZ_5YL=+?T>@a7?D~AS5-VH$}Skg7ErYIDr#P`tF;?9|VfE0Ks!cw)Ckb zIGxD?W1uI}DL6l53s-aEM{R-Im!hPfgz!1D9K$bvxAUH7qb0#Y5SYE6Q(+xhmolEv zF{>#K-(H(^>4{03BxUs(9#+@Ac`Mi61aH@AcI~>nCo{36zr$S;o=rCv<@sHbwbf_L z13e{5>7r)WQ-s{d30KfE`Q3vC&y!S1&*Kr6NK>bh(7V{8-`t%xNjtfa4V7f>tz=wu z%IKIPU?Av`kIkBPo)N$jyU{FhTN93WksM{njf1+dLWpoB!*b<|CG8TZ7s&iy6Oc(O zK3di^N|=H*On^*ICl=kOTTK@BKp`m)^a$`)tqQ6({tIK;zqX1OL0FguAKk-sWY-3_ z=*qYlzCEkjT}vUDV0}ZJQw8|4%ihJhKj34YipOxuQ-WNZ)USdhp|v*GxevPZuvA#v2O4Hc}h|f`IA{2164=zwCkb_+5!FI zx|6DII?JOxsft5dN^-`KKR99PUrL#z=Tdo=6H9*>)&p;r+e^ms|H|$^dp@PBdHTwk zYQzs|T7MuFjxz_)n!^8UXiW?9L_u`x7ZX1=JyPhf0YhbxoaAeuf8x&ztE9Q3-lTCa zu+TL_4latR+qVDcVoIC+mlbR+Bm9!zZK1Kjd|_Vlo0iYkhGc4!M-hKu^ zA74F(u0BARrA$E~=NKj4=O;fp5&anquHMbXm1XLSD=fNX4!0hr5vO3c)V%vdf+Np^ ziH*AeFqlIo-9*8D0!f%m|BAu%YC|%Zb+<77&JPU))&Fuobmim=0TIv6bO~>{t7;$T z{${(k*E7WS0@vqRRViEc72`?Aj!gyJ$)R4mm~e*Oy6=#ADMjmr-OJsH`h4{Pvb8c) zKuiayx=N&b8b(RzRCvZC^ljH|KkP!VbsGoX8-E~s87F8H?TuiLuQmMqw7{@{f~pEF z!&>M3IBo8md54$r#5X4k(+#{wZunDlPxfan4!DGvNswH(M{%$uxc4Ox%B<(BHLKL>F2} zwD{3dSEw?jQq9th`5f?~rKP`Bzg(vSXCKUr-x4;2;06Bq&cH*S>@? z2{ih&Li{M@rsHYS{oG#TKp75^$?9G6JQ3OZdG{9$2rHp%4s%uIY#Q0#mcQN0M|MD= z&fK_dO8Tp`<7SYkxSl}ufveforuR(0cu6_xCP_halH_6l4VnRQGFL6=-v=Q|;6N}$Zq~z#c zT?zMd0E+G+!|KHqb&k=@0H7opX$oJ5ku8UBs@soA{EKEN>aK1~6_JNPbt8P+P@tVr zfmYoQ;4HxD*zVwubTuuw?s*}`SF#L%fMbBWy~As=5uNulEz2A|!u0bSj9QY9w0~=? z+Mry02mqWEO!yd2@X@zglliTEj~9Ot=78H=A9K>u5HODV@27SA~(xx6HTz z?e{rL%HIPq)%e)WzJE932MfiXjN-W(sK3j_P6bK;K4P)5-? zU$3@+G2>@b7MYmyFoef_%%i0%O)Y+WWaB}&wL-1f5OZ}H;Bj+98vIN zCJk|2<@k)z%-aL*wNi1f_z$+k^IIOdLnmi`J9kz`9hQPHEGAd;ez8^p(6aN0C7;CP zB50jNubs9kv+hD0FNPKH7u{N1M+~6p#~EdOyB7BZAhyr4de@kg)|JLp(+<>@q4ZsW zvMOeA!&v|#m1mw8TgbQK2koS%3jO2K0>nc_-{k^op^?STJPra;X#-Un==tWj%UplV z2#HHWl$LIpj<$tyaJ0;iuslG5*& z2)ArKtaFgdd8!gix?`xlVb!LF`)pJEwXzI1(_rb=QA?$>rZf@@0m&I~=D*`zAAqi; zO?cbZrlPJ$P|RdzB>vYEx#cJ>4TagxOl*{Jq%nwLV}JU|-|f9pcg)QY1nf2}v5*kK zYwD|hHk*<#FeNp`;+tag-c8y-Z@2NqI1mB#Us6oQMf1CLe?}yHgUr)=9`7a?Wlio0 zxldy9R@r90OQ0n;O0C5gI`F+Ob+4j=;}exJ6;q5QpgMLnSO`sV;(z*^GZOFaR)F@Z zx_jRjCn&z<*Y8hG{#K`j00_T?oAYqB~%cYdlck^0WI&Ud`!K~3@H`Q6epim)Qx z9kYJbb+`VK`rWr+fXLAzkxz_M(?q)dl1T-n>Itn59`H5uD$owcY?}aFKj0~X!n^MQ zt>NlQ9_=!s5V)*g)f)52YyDWHMN(mv=%e97jBcWiQ2US{`Zk;5b%SwF6B%dU-%9e3 z!5~FW5RiL@7dD$S-<#lJCWQwDJz#vYOjKp8lgWx&j<((#859dmTh`3Gzxfkr0~tyc zmO!7@BMWb6l*5LDgqglYv@WPQ*G$P^-0G(5_F5f#CBx*FUj>VbFy%_bFR z=YipCIZFF}VOgwC`W!46SZg?*_;M*B64-in$wuy$Pq6>yhy0FKhNll4OazKvfXU8< zFf31lR(NfocV%f+#AymVg>rXcas0y#!IS*~?U=p9zR3*U^S16}n-80)mtnHI{pAFW zjPFuzTYq-HD13QVmGt3rURiq3r3VO$mW2S-A!OFS$Rj*jn@q0Kn~oUfFMe}6Vv%HaStmZG_pEYDh%#;eqp zaBcQJ>M36P*k?^)laMS^#O`Sz+F zYdVg4b9R)SoXyu?CO2G&InOt$kS`dx4ILB%oodrDlv-A9{QqJ+j57vI3=C z|H${6koR_IOHY@!M)Pxnb>ChT*QY4nn}F8Xm>pVAPC%vrIJuKRJJqzd>?IFikHGI` zIXdVd{?$CKJ?NFX+_XL<*n#Aw85$h2v97D?iTFzr!yX?%0__MqMj%MBeG3`N%ofM_ z89he%KX5Fnpy~?bZ@t#mp`Xz$&a3p|vvnM)GR{0i{A#+Z+1!wP zjT_*SUAPJb6~I)>C>U*+;;U3d$X2#{aY+`yln(r@Ac5(4fd3>O_?wr`Nb}#uO$9T1 ztnl7Fyv@=s(T;4(#ian+a^=|ClC9dAR#yg?1l*jHN5?ER^!TN@Qq?m5;N)x-YYBAy1A5_^mPa5?-V}J& zt^V7%y;U!Iy3p^-f+}HkQYAd zgAa~O+j<;7i{;~gb9TQ;q*0 z;X;`aog_p#$Zi9%aOCK^DJqsucWtcz5drYaBauZQjsf!=-azn%f!14P!w;+Y|J*<- zAtQ?$ss`9#$4z3EP%iY!$bxP#mogJlfcGpUfTy1Gy8B2YwGb zi5?;>iCiE6m^?uz^LuUw!r?D_VkSjJagtWt#=hxuBy*7m41Ik3mw4OE`qV!X(&n{* zt-B)@=r_oz1C2(Gjp$1ugN2ERZkr>3cVN`O&$52IVc+6SX4O`vL3d1ip&r0kcxNM- z_oq*711K15rC!owtm(WTlX*p%lN9iZ1g!ws7LTb3bTsv*{6LHUJ1g61wn;F+`c&O& zT$%;!9R~OTr`Z_rex0aV(HBdT(=;)>zuRyy+_30f@vF?SHeg(1=<?fwj4$nVogSO3%NfRcU> zLnb6G1}TEwI{%e*IqAB40dYQ?q#GEH|3Q>;d4BR)~72&y>`2s zq#T%+DSDC_v0ieIrBI>N7&-)*e&L=PoKEhvt{nac4dy~7`p;eP<9G!-?lGw1?yv&| z87J8rhR1mBKcNpEH1a>+2k%!%ZDV7TEFNQf#kXCsP&QW&7qfmI%Xw%26>+t3=2?FK zBY5aBgJToU?(-te;^M1KH3P#R7(qZif|P(07UJGIscHM7>iduAwQmY@8|C_+@n zZ~=0?rI~vQ-H;6(JzTk4K{sgmgw}xs*e&4FZcg(|G5Yr*hwK>0kiQir3K`_`i#zqB zlR&sLG1f4;W$Wn;phsgj(0Ir_t;SBCzU!kf&!3{-rbt$!3hfoEDwc(ZmF61CM z3Qtf4v#<#2-WaG)dL4-k?)o>-vE6GpsPKS~+Q?}2CC11I2P6jmH2{b2GJuxR8q1~O z9!Fo$vRDaH%_1X8y#4ieAqRY2eEb!Krhh90fgtYe$2Qf4mStIQe;evCQ||9XIGbnL zBsS>_0sgIv@GU{B@+CpFX#6RdR7Vs<7AfVvsAl{1 z!@XbME^H^;N*ICB&Iuzhb1{)5NC2Eaba5Ap5kPE5c0Z3AxkZouHrIv!mz(RJG^QC{ z^@xBnk9%OpxbVG3pESr%y(-GmuWXS|7B~JIb#EONW!t`us+35HD5x}us5D5&C@D&c z(kUr5(mg03AyQI9mxO|J*U&XXNp}yOLk$cw`{sGx_j!KtuJwI;@87rAyVt$=lZ(Z5 z-`91X=W!h8aT=AFFgUst^+jlZn_83W?S{v5qA9jm9wY3zB$Iy*{`SfJX8OWB*rH@H#8oO z5(wcv9N4=xS4}`H{OH!r+%5r9BVq4*RG0eazZ%{MXC}P8%eDt){!KtZH8h}%_FIfF zD=ob2FT%XQ%dGwN7gJh2%%KK2?L=HTx0uDg8Lkn??VZr*oS`XntJ{nvKmV04|7S$h zB@hzghW1Z8hnBd{&p$8os=^>Dy!Y2Cd&@F!$v80DSUhKy>dvP6rxyU%S+VZ@v`0mk z61f1eza?@RF&{78Qe8Td>vc@b4L*FSR{)xPneP3Yp)u+1B{qb|bC>acU!xPTKLpr8 zP`GxXy@MzD?Fq=cBCcSoz+zzXEk27P0C3=;5F{X}ERhJd^A z@^U&Sr+eyk+?5!M__agvt=_qH%*5z;xK@?#8Q$N$y{%J`|4mV7Cl!HUVTqqN`>sG5 z6D`b$@9rhV@~h)$Z7Jl6EKvIWTgl;W9btA{PWRYzEcex zKh#wrqV4#rnz(vQ!PyT}Hzq@sLKwf(1S(;p@T&aBCi zrfbgcXztGWytoOpAq}^f#4V6VA4%WZYFhIw{eNM|i}s5eqX#@=6!Y*dB<59xotZrx z=mDLV@ZUPGP-=aa_uCA(>7f{JJKKEAu^Zv3#jqD{v-N-suy;zzm_seX{LTDiJ`WTO zSq_v~HVzW4-YuTrQp7n%pEYEX(e7Bgj?%XfaG_JW!&?~{w=(i6? z`vAOZpaSLGvsna6qX|$H>Q10p4W{}W5J~j6X&&J6QsQ9!4Q6N5>F+%s{>Q!kopu3e z@Xe*FYdYPL@k!3 zw^EQNVIa-RVYS#MX&XK(shJ6g9s#H;lG~ofLoR=GmJu=Fj~M)weFJhJ3j*$JG8CIn z-%x2^s+4(OiWDKa^ zj}O(S{Q?SPt6i7xJ&d}m9YXN!?yWBb)a+SZa+f`eL8A5&W!WjeVQkmmkHY^_ROvai z|Ic@c;u|%&u=HtNNeHUu1G@E&@n@dr*?%`3c(&6tl9tATXKfF-ooA;^{|8rIJpASN zI=L(B2kgL)@T@&T6}J%XOrDTvbdr6J5H_kq6JcOSInhWSY=%^alx7PJCT+F%t-|tS zRNV^j)8WyBX$OFG&DKVsyc^ZZ@NCeWUo5y{qv^GsIKrS)_HE1!$4v2u1now)$g$yE zQWfK<3b|$96wUIz$xRJLU9k_$^z5ch|82LV6fS)fg#mvSkrVm*{|79LQ#Q@0E>-ya z6b`092l#I6i=gmNxBqBFzI{*42@e?exV_BK9L_&5qyDv85qzJ?7*K)ooWvww^E#rkHmr2J(>db-#!z&y9L+SfqkxmYarda7}w~e zymeNoD2Yh!It(L19eaO;c74$qYteOP3%nx~ibq%7dgoGkdfW6Y$$pc6b2~TEp8E;XyaIJiUHjYS+GB_9@%H|Eo{=!DXNF zzd*M(N4G=&5AADl!dl#{2;v4{r_V>hx4=}TCQV?$RTJ6GYjMA%>-2fb{H384)A*C} zn<`NP*H)M1YPY9H0+Fh+dFs{e{hc`0?fvMx7h$l;GL~rWhj&ywP?r z?S{^o-QSi%T}R^Kv+wQeE>xUWOO}uWSRZ7n)=cl##0<<~5~9v1o|wiIVL&>mE44gy z?s`t#xHMnnUz1Ell=QaM^tDqsFqQ1XWXRg?L|y0BWKEd{&uU9i(uYH z;|BHe?Xz5giqGu&Vtu2-XlbT%Hsi6=B)Z2kr(jW~o51iCE>&gF>Qf53aUK$xnWZPq zGs{mb`;rA2abvy$xU&$|M0k;B7D*jvL)2rqN8kcjNRK;1}^IucnFU#7FC|;6(T^&q(9Z6qqwIvWU=XYKOEoU>KA7sKcf*<}A zmpr)7W)yrA|I+d=_}<^*wyQT@is-Wd#TmI1Dbf7gmlUSMOOq3cTT!bI;{vLqe?-o6 zri<|?Ug({mlma(z)dmgdy^snS?@_4#4yCk>@m|03r^i`$(4TeOF@7ymTGf{2qEo3l zc042t35exVaXtH~++e3XTI7(|0ZppIEZw*%qEYf#U_|BQo^w}!1`R3@-S`Bcqkcrz zy8}S4&-dXY{st+1gU%!V(E@ThF;3WGNq!BZr=gIdj>2quOuD+j3Om62{N|%UM zMY+E0?duW$gyAl}NU$CIs0v~&Q|rPtka#Q&=Iwj3E1EFoO- zpe;q|Grr~35f8Ke*O3X$$(ku1I0E?sYSN-uf|!+uZaG9884*%RCyG$s~PZc z{M;~;-z@**;uhkxPX7-VcmB0M_K2c%(&c=x8zL|225C)}bZJQjc}>!Z6!+`9p3Yn0 zEpJUYA$=QmWstR-peOr%eeDy>M$hzJ03$UUMvpMSa1~;DOgncdgi7e2=C@qE|6l^p zZ$evxj^P>y6iGG=XP*bz<8hl|X+pl(I5$D59hV|6Xs6`aD_T(rj|*Dd?`h9`3=*^D zK04I+Y3TDFQqNv+*Yjj9X(+cpv7eee;`Cb(_cBlCX$@#EI_#`0?MN9J%5eBmo?KPx zG$R5)|B<`3$XxI3C4?z#F)HI_rS~FW?a7KevZSrEDe|dCN2q#8v*xA(V-Fa6e4kJp zLKd_jaBqweb|eI}ZD9gxgwT>gs;5AJS<C=Vj?Yb9Bnrl7!mB0P~GfBs6keYFm zsYFrf@qQZk-~V(>y%wHZZ=vn+Cue-)U8~4*L$)n?h0OU=nc9Cdh2TN5qepG!;V;bK zS@%#bVqJi}*T}~>3Ek?}9qp^|%6b7EJ2B zlm{H$oXH?s+$yoTe~vI9w-aXXGvwvLHi*+4Uwl59HBZItH6Rx}Jd zeOl|WVAUkpX^;lVd4%X`hk`w{Q{U&gkkgKUm)q_PMrq|`l@90+v(XoHQ;?~_-u_^% z8C2oZq6e3%!>&<0rj~j~Gx4m6$&sVdm4bM_F2_;XVx8MSB+b0@b&AKSfwM0UHeysZ z2?ojoAd;r&&iI~f3zBW-Y#wQq(Qe(xw;(S=c1E5Wf3Ku_&O=aDKrOOKEbHSw7rdp` zBIfiX^lu!xe(o9Y`qmA{z~P|pUSzU-^%RkRDLkJXC7TE|UQ2L2 zBlt>P2w0N;I-~LS3HcU;BQQ=2b4|%L*OBnSke{%f8Wc*rbKQ;0MSlG-?YK#g1e_e{ zk<$|b4YO`tLph8mYe7j~vso9}Te$PUD(DorE^BP|iEVru8|*lWLg1vv?Yw&h$C(s4 zcNPw_@jkSJ;(QHpt4OE>S(<4S8A^D@JJbVr4lc8S4(-Au zjze$+^Ns#qy{`;Ujg~E^msE+p=gvV^hFFl{(j&48lUcFzZkmfP80HqczFjwLZf;^e z;~siPVijaSi~JSAj9L9qQVUo>SqV46=H*~!DhyN8+9jr|v!QFJm-$aZKocp=#dsxFFK-P3`5|0;=>BuVl`s zuKg46(c>tnToNVx&-SY>Luxsa1b`OsyD!z?3VR#i6h52h@h@VJKzI~M@V@Z(7xSVNfsO?RKU0>&~n% zME{OsLp%Z#g?ILLRe%Jyqr(IBTi8~3Y|iRVkffAqW29HN07ZgNU7ZhX`@#~zb1_)5 z*rdfUYm(~R%YZ8GpEKWaI{DQPs|pigG>|f^2 zVj=p~b8l%7sMLDqv>&ceJTD5k{v+6g;5m(YzkThSG+v-}P$;M;X48CO9-%`;Y)2CX z9A}o1%}Rq&PaWYbEC)7mgt^Wl8&L$vIg3HB{p1lP2WNffMgRdKtQss-{e{seW7}Qc z|4+wTvQPg)^Xg@PHQ>Km-bRQ6t<^VxZF+%wmNcb3k#edavmFPIlUBa)lv?A))v@rzsfbljS7u>6UvO;+AXF(-Zg~AyG7Kny zpZ%%stODHyk1MoyAB6jCoZmwqc%2{HAsO7}JnxTPV@ngb01yDEaO3*+t-cN=@#CE- z%f=O)`@2`7maP(ju33A66b<(WIh|TDVC->zFOk#-nbyzI=JwDky41rqg``fKzb{K##9hcR zSQSF%SL@hezDlWaxMLV30&}fhCl@u|De+rkr8~mrFi2#9y^zAdHDRJc1M*f{bGEQ}C{& zk_7I_UG>|Dp4Kabyi8BNJRrUmS5K0(Gm8Pe6_{(Bk(?16k(6{=-T5`!KWmisGrBIv z0_m(Q*h%4%i|)rcKI_jRK<#^f_SCV9QCju$ zm`i-|G?havga<18M5du2u}wvxq2NaUl3>0^Enn!wd^09gYK||WcA%9q-u=vHTE@Vf zGs?*G+X)}r{wLmBYBV3{z`vhvv+=@}K5x)UwlGN!xO|2Aen$md-yRSk4xvcPfHR5J z5^KZ@>>uoJ8UP#lN+`S?*E$e2IB4`qR#p%gl#RHaL4h%?zIj&TRFNTK9_b? zhbxL!G%b4946H-g!=M@TVpKk)Ol#RUEkJD&(*v_>y?NA=^B~AM%T5bpgS~_5y~G9( z(-hghY~u&%k1k&qo!YZYAD7^VZG@-8ftP;YG`xZ(d=gr_nEQb4&z1E;+?;;I+m+QT zNTK4`epBgj;06K}b+T36r{8t!PrSrAO;@=}8{)c&m>YP9;q|{Cm^9ZLrC!tOdiM9z zOd3Fd(jm^5H1yP`nF=SPfRA^{-@CgOzXGSWm7IRu&8WdAGRQX7ld^ApX ziuC5=d8YS<-huB8b^9aO(b&Cms*&qFul$ER=dY_gTTKrmF;r%9Cih}dIS6ytD-LF5 zAdgiK(k)g?HYcU#pSQ4Y_qM2)x-r9DQBn27t0xjF{O(#g`@>s9b8>(RrbMYz$H=I6e)r4KjgD?{7lyx%+LfwvBX@&xf>Q}3ol zQu@4{9@ry9!+$v~C*zW8bw=~*_2E;gphrOxjXcct8=r`4)>HW=)M-*c`Zy==V+Gok zT&>a67P%yz#=|CpgUnoV*{zGCiY?@~%2H!2VvHxsfKd~*@9PV6P8|k4#v%I9Y2K#4 z`Z9mSu&UVGv7dLL$GekK1(_qJ9A2NDQ@@<6mpaGi5vUrGVs5|tD!C6h>Kg0BNBoX_ z1|-PPVcETkCrUg}ilb6%esTJv(kBuTV+CjD6&8YSg%?Wise4x&QamSC!ssm4`18c{ z-$Bgr~^QDQr+@z>PbT?Xxmi98l8BOCGc{U*254 zq${BVqAwbu*VDOvhF9Nx68bJ??rf8EGQ>I{KD+t7G})2`z^FJQ|yXWKb?2Ub{il)aYib)gN{Qn(r@u=C9_(Q7)PnH5Wm=vZtL+ z8)om~ju>w~P+s*zS7*7+_!K4o6!%Xut))ir3^Bea7$krr$Itg8zG=Y@UU)I!hR8^e zV?}dJ4~;BLj#?Qie_@MFLtILJ5}=lrmP$7Yy;iUzHf9|3s#CTN(w?3<)aGrd1+k(< z!t{xw3z^(xHknP~iIl7H;1d0vG4`jxWdPdLFpAOJEGnXC+~qpMi@8Hg13IcfEdzs{ zO6)otPM~e}SfjH8f`e64~+s^23;lPueh(tOA3&=Vyl~me6w?EiY+1pRBJc*47x&wVxiBOd(xeFQz=?@2fJMOSmFvcgjGc?E)(fJ;xW0oUY9cTMq6EF&+2Gr@W*M z_u3TKH|YoDP^fLHx>LiI`$B`>Zy38M>4zFbxuEQg7FHIY)OhmA{MNO3jmjV1qAiR~ zE;W`KhFh;EcLOJzP`2Kp;+e3gagc}qAW`70z*Ee;Kie=3^?MQQ4P<_QyW8v-tf=mHm>j40c5w>bVYv@WZC6OYpGVg20SNQ95Qfcpaby@VE0cP9 zs-WxnkP8Y^F+D|y8kIo$8R;UUBYdlNV$$m6Vdt%BE1Rpn{RdkWV><_dV{~`gQ`}m} zni@_Z<=nSnsymIF3~$J?jD z+bc6*pI?%IgZpYvnDDGM&F3`7-(Fkq*2}MzsbOvAFopU~ylW6ydAjUo(f>FFFvMVVPWqu&rB;na47^-TS-i0o?v;!6uY~d?oM2SI**|C+BbzN=8Rk z@81(6p?wm4osIBr*objwrv{b3j>p1TG$Ah!-d?n-6-_3|NqR29&{b(52Y5x_yWVHZ zUKrlCxuktH$a|QQCiv|2xP!kUkDk!n7r&RcpkLXQ7Z|n&YVzpUJy=j=Vue>$3;}2{oN(i;3xi`Plf7~11AfzgF zu*&)x7SUzNlggomgMTfxjzC4|Jw%=795x<&6)bhla;R@mxI=>MWfS&9@IB2WL8{te z?9m;TJTa>~8eTWXgJ89WSnr`1*`FjWf)qVa8B}LkyHYv>U`T+iD=_0UBFu@4Aqj{+ zb8iAXCm6HdD0lvmxaY-x_Dsk(q$XRWR$Ua#|Lm_@Rw|C@F99hX_;_)x_$4&@kUdIE z*y&w;eC|<2evFEJAC$T=TGq!<_+bwp#1(cWD z59mO}=c({WmH)^0dKlhiNEAup|9FjV3yXei`b?0t*t58t%8L;Bo!N<@aCznJ?|Iw2 z_;#NDdqsMjb#_`)_${_(`!$Td{*x4eBtUuN zUSvJSecdESSu|sE$ZA_p+X8GWq}}8MrZV5UPkT;1L9dv@l78of)A=krWy6+Ryypl| zS_VETn4=A=BLVg~N8m zk3q{O991D_C>3p9*lMacTp-QiBc?yMucCnC?eWD=RtOMLg+qIJFgxvWM~bSR1@#Ac zG#%}kC?iAH8vVltT-l)$x%dn6A-Of=Y1@QRr0e`_T;$7eTfPCM0UAUn_$K4Q*NYW&zvTC3_A(Bf7wE%JyE#MO0>>?JL&p6! zU)w?P0ghb>q~O8c_WiW2o5bY{Z4z%6XU{dKlEOpdIR|HIk@Dg!mCgFn=1+O7^M8e| z#h=u-z{*%mp9tKcTfau(Os#2Ls4f?mq22tbC;lKC6~I54VR`Eoqm;}!6aUOz-)MNx z#o=U_i&KNBH@@i-tGc#A5T3yX{X2ntj@UFS@8PKosyxb^wp)kg51Ur*(mX{nZ9RPs z#AuFYmOz2U_ehz0N6@P&LOpjsJhA;%RxEx!PluMLoWx;}<2i2?5$*JFpv5i)B6P2! zv+SF$#??y(^clOFa(3G-XGPVYlun{?d=TSK5ob!-%3Degbm@;XNbYjqqFLwf3fEcE z<{$M(28a;Q_IiTochjo!+QW}dE}>wdSfXbyDg|-hA=EhSi%fuW?Oa?cN}P) zs+4fdf&EaS?{)!V9gO9u`X=LI0-k7fYsR#LaA5_yE?_eYBZGm9E=kwQLnS?47&4>6 z@ppIHFu@IEeo~jiuB3eBj0J}mcIXv%EF*6DwU?opgD?rwZPtUxqf^h26?d`D)2%Fg z0pCEMXw|<+tv)e4p2l(mLT(3@4BHNScc3H>}Ypqa4Uh^+Uq>Co}4}aseqD z7Z7HFBAQ)tVX$=Rc&fXY`?qjt7tVQP&NdB=8jKE)Z4V`6opkx#I662WzOmj*Npn@)^hdu};n62E;zJE?4JT&z#y+njqZZjy_I83hx3IX3Xer7t zt|5me?0HW{zeJNb@`X+}l|bQc<0?dtUa!=7eV8027H4}^I;erG_y^?Gs<9%9D@5>! zh_K*U{BviKfoy8N8_xz(+AKc5)&fCZQ~0MUzVJRJ-S+R2jUCO)@L@09e`8ITaWg(c zk}ryVWZ_n;tcemqhr3he{gQVZkqb*rptbkU7x!gpbHz5g?a{&cT>FdH<=`Fl8jj4 z!8 z4KZ{l%o@~ZSW>Rg=P`9_P%zFBS4-?MqeD@-;p&f(o~g$ow8Ex#mq^YxNyJ=!!_OeI zOw->@oD6x>Xgu@cmHWw2=GH73hSVG8Cvzvykc3noMZ1~Xz-6$=_R58OtXqF|tmX`~ zx;@m7auu3P+WYE$Rtesn~} zB|^uTmf>?VVU6IPVEJyFVCu^-*4vBx03vem{SpFr^z}#6U9id(es8){B1x_n z5~)^8ZxkdaxELKO*bM2Kua}c7^ge4_ySx|+pLqxZEjlqx=K5`k>F)t$U4}3MMy97UiA7>fvS(XeFIu(3Ip!< z&5^iB&&|={U)Q8Y_^!~>bOS@2wDM2TK_D4&kF%@C(-p>2SpN{=#+^qVhgq)uhnuI- zeUTLuc3t%$?yVqjl&TN1p0EZ$G$=%u=P%3^W?~y>h@8rSf6-nl(EypRJxzJ%VB_j? zZ;;M5S!}k@gHL`j6q@fELzoYq%>qCWo~AyDY+z~#(f+(R5OHJ-C~A|6yH`@4crQ6? zTs22LzMh$Ry%uq2uuNw^=YTe>$Wz?;n&4Fcy`34vRMU0W&GQja<`M(7eI`Tg?RVEr z{v|MsbX;2^x7X(6RO>5pRb(^Fy&p*t##2x6+>IcokZ7OxG6$eny@Z-ORvaJj?G1CR zLH&b=@Gn2J;lY)N2h_xWtczOwxeXO$HnnK3M@Ka9~)o{emqwFqrOPD7%$nFRG(Na;>*KQi_*+=X!$0$x6 zEBQbv%AL^7<=UwkRnh!Vnrmec)KF2;ObMO7G4*QFu13Y~n?k_bCtCh%(y^e;QD?7b zd6r>7%9F@lpedv3>u~ncL3Xb#a1T^Gb#)9v^9xcgnP1ibI-1J7iPxWqm_SN^DuWz# zXSm?BKJxRO>S=v~z|G`FRMx^P?;L%xsc@zPLuMud2^NlEcDQP?C~tb2;}GpVPI zds6r_cc8TlZLI-#DbHLa*y7T~@5jTV+L|P_ici<(+$L-;*7&gy_v)*3bO+Hq0UGH%O2d2e+%G3IBd> zSbS`8t(u#+p{90d178t}?%os@w@6UPPohUEYzV`qqSAIRPUB0@J_$zAs!r_gm!21j z%ax0we`|3o4LB?g$Q+N+mQ~CQD|-RR{7&8WgHS>ub0tO!nEkeA%_>%~6tZ1geM6_* zJ*xhcp#STn#iE~wQ-hS}S?)@smyK|~5l{!LoNa>RI3OqNv|}*XVtXwICM)Z1Qc5vU z^ao$i&YeB~zB^zkshsLHKJ@GUm>-(Mh5eYv_eeZ$SFtt}#}VOa(&CKiSfS*RP$CyI zhULvwq+hf}_kR<#8P%OVUEF_B4LRsDRV@Vp=FOtW-)W=KIY8F1dw;CT4yBQbk@c8} z3d#j;fKn1<5p2N%eYo0a>r$v+6IQU%7wRzt8kaM zpS3W{k2E&s_G{jB|4c;lBzKtG;0Z#unL3;F4`jUSo_ za`_ut#zQ1Ih8-OmC%vjiSLOJ9xbf@ny%765h9PSo*||*=j*%kBQAl?bZge+B=jS9S zV*0r+KK7?vLTK-(Ei^`eN1MmDXm`>ucI78Eg4P#lz0-(oH0OuSJt)~1*RNq(`Nb79{IqXxpOrY}DWl z!6#GDzTsO=leXP%j1s`Mvj04I=1Gr?Z6 zW=1Cy{|v!=AA zefHgR_X!!An$8Fyv~?btrSNE1tEIY)51BRlq18{6^m)O_FsG3j$FqdAG6yKZ;*x0v z?c&rX`Fv;N(viBi6uS-yQka}7An88azI^1Ik*4c|pjR)c;gE$1E&=<3iLD{e_q@W_Qvt258@&0;8 zBtX@96IJ58S82_Hwi)Brr`FR%6n5&%j=u+$NkQrn`5;NAUd$NGfCE%>TDq3lex7!O z$n9R}FT4n5nKf(|bxHz0vaR8_QW zv@_8SxFC8+j(BAO{#M{>2!+vKOs%0?uj@aUT9SVYrWW2|V}C_u1BkC5s%OLzRHI+A zB^{@u{7X?iTc6RZhWrm`3=C&7QNS7G8IU{3ZS*YJs#dO?d)T;Z#K&oL3L^{9eVjNa zQ`TCq!#gKXRM}dOh9n3Uuq-j)c?xksEe(as4D#Yfj{)J}pfa>Jd2md4DAl7H|88(2)FeIljC4O?Fs6s1h=SeP?2o*s$sl~$ zpb6^#D>zFkr||SKITJB(CSRq$;@baCDNrtBnHg~vzu|VvRuD9Zc9OQ?-iS0UrW0(` zW^kG{#*oak#p7Vf7m!AlRzUP&$jYoGO|kN;KQlX*IMkF#)W9 ztZY2tF1+fgGwSCNJvp)oU==(?(c(L_95L%^Jg8S%+Pu5*xlMQeBE)X;tMLK{wbc*( zqb-2~H5jf#>%tDsuA1CI6`G6~v=v+CjmtxqPs4m7%T+qUxz6?NCRJ*e$+z zDZ2Lld?Sn$C1|%%ePra;0|dMCJd>54U&{lQTa1J2(O?7T5?gJMi|M&wkpxTc>o1M3 z+%Qsa-LOKPsa`uQU`Q)R;s(0!ggXw-UB}YwhH(H#|I)OLva*pk128xKc_BtQX-CyHyESIriSJ zJ#wt)ZW8X~uG@M91|dg|lR-efz9cMmzIxJqFW37@6rt@AVLCdh3CxYl(%(Oab>W>9 z*$;s4yZ!?Zh@ObI=L9MUxKW*2A!cYqb0jpvJzT;L=X1*%U}JsvmeSyCp!13q5K_4) zM$&aol$oSC{KBHS%{Rq^rMJfYoIZcKlLq&?um;(IuNvn|9C)j(t?m1?Pya>bmrD2U zTV6|iPC*_c%FS#fub}oi58x5bpV&;=6qtTpxi9xV(Y5`5hfE*H_;=!I?j(jkJ+9xEGm5^l zkSh+hwae;w>^25}m0I~MwaN&F*cH&qcg5_Ssf!+AmZ|pFYzSzMLzB@z9a2`2U4D)2 z6$B|JLFnI)$&O0A@A|TuuQjH+eY(<9;y6Gba#6g zejmKdD--|x55$6faD~F-)#(!JN8|hIvf|F9p?MJ@E?ZB}q+VBDHy{*8I@6Nf2}z=1 zVeFq;)BIk$y+n+s9nkmupk4Ngow`d~yPD4JEX=5pw7Ix~p{aV?dvYYFmORq(twI7*#~;W9M-I`P|PQ%xElu z56vU>c?62i*+@K>DW{IRun}9)xuC-SKFWtWx(qRnDKJ>-F!I8DrBYAsP!5Etk*Ll&7I7aMpBl3SYeG?< zK1ofyL%*i&6ym6iF^5wpAp@6Ey%-1Do8slR^=xbOQ*m4VEMh(f72t~(@_{zO%d5wT zXf65+%+ptoYY~2EkkJJN`ghITm#u#?Yj(2dsQ^x&HZX$+@&=mPnuDx`_j?OhOii;6 z=F-H*e)j3|`nHA%!*iw1&8KkR*(Eg%J%&lP(b=`dIBA1Xo$Mkt{=bypa;+O=G2$Q&N)oW@p!1k=Dfa@I?Ao^*+wQu{d zhc!?d6zk_hhog}(Ipsr;Ymq{CbZT)t2B=Zu!A7T4Q@EaZSXZSSq9ppnd(YIB=)75u zWZ9IxL3ngY%D`nz^592*3j5}mUHR7ryoQ622%iD2H^pvBX)zymee!|!)4_KJ)9Ns^ z6}8tJpQlMQOBZCZ50W#CuNg2W2CJuayhIpZF&_}U+pn-3ky^g+dtO~H%ANPx9}%m| zyp4U0vs^-le(3Swdd!^!;L;V z+;I&0Aknf<^={qvtb@mtWv=zLkUl?IfP6E8>|HaU0~vGW(zGR!1<#%QmMfjxX^674 z#2IH{wtgy88nk!!tke#Tg|Z*ky@}u+V2oiX!*ld$uD2QA^K|W1O&TIcwE>?r=S`ph zkDA!=)X5i&I^AR+hA&Q7*)?O3YCNwjvLxhsdnCvl1~bnnzWo+Q#H;nM5;1aSG#0=3 zLkF?)8rY?S;x^K~8(KEHCg_o>4xg%KTaSG=&4rPTQvD1w1oB z%VX|&B}e2(M_vww|Mgx2R9sZPUEE36k0KpTI8pDNJC9#SlN|Avj?)3bA1Qi+jv4Ro zhmD^%i5hX9!`^5*nm3C{9Zs5q$MZye!b%z4G4vLmJ>;2bK3AnI(!uvSd_@u8IQxUP zLp?Wj+8J(Fd;PR0@5clnjd961zhSUvxU3C>Wa+ET$<1_;A<}ph8cru_o-`hggjyR_ z#D*lmGf#g;Kz5_bR6w&kwr*$aeNp1O^{>Fg z{ZSFVTX?_6;_Hz>8qmwKJ=AwvP*L?RpKB%H1U8atdGWZ;SjtD^xcgl4Ik|)}Z8xiAS3?ZeH z&+L90`@}K8P(+k36b6RT1}$H7rZJQxyJN%w9T=RYb|hN4m9^CKNTxQ=G?i2T zDHPUPulX?3PiNc9<5#$Ro!Gm?$HwIwTKCY^KA07P)T~R!*Y%DE1~DRXFtpm<#hx%<#@%tuCuEnZVMx*=}QyKW9g>Zz+KUW?xU{&+fzUW}%KRZ{hV z)FU(E%zBbYK(+M{VCd@b;_)=lFCYsa`PmQ2gx0Wh^ioJdx0RI$?5S)HRj_@qo(pG_D)d{I`wXrM$%1Y11;wiWmH!N{3vP zHx>L(`wYJVBsZ!dtEDmaGR2V4cUK1aNPzMBGF<2T_<7Ad8p*fqpEl zUyaQPiFI8gMJVFMi&F^xU7_TZ6^#&{UAX34iW6e>x&6C-clJ3nX+#% zp!qRu2ttNjLyoT!M+}Y)P_2+(Ja6ed3L-|1yNuDV*nuGxz#pnvN9tv<<3s5;jBp$A z?R5=({in;TE5}p2<@I3qZMa#H!tMgwYHK;9RbHpg)gOB|+X&Few$lUnhSatyK&%IK ze~`6V;WRLY7qqFF2xvEd5_|L9l3615T^w`L-HOaLgz60Cf#we*`q%Q_Q2*oSX0?3S zhw&q$t-o-*w@97u$pW0Z^&g!2@flx8c!xp*z1XMwCIE`7YI99L+~K(?d5a3wC4qK} z^PLE5s)baHdc?hf_gP=B=C+KAk<|HrOsv=k^jkCyxA2|3ikp?~td6lwWD2K~OFD|b zetd)mK|iEZ<>kD7M_l`zeI&Ss4t^I@YlgX?00AjYNT~C@rvTo{KA4bF z8Ke+sZszMC$f=N6Y<5~N~4eZMVs>v~p9(cdSi;U9ftO}zh%poes zKUj4YaYU|0HzrwLVCu8y#PDW*%2BDN4U7JeyYdr2ET@yOeZ=1(pMSSVr+17@3qd7; z{O$%E^TBs9u2w`HbH0>#l&^-$tMIZs)MNGP!Xwy2C<1goz4;fU`UM|3=$}PCz-~I5 zVwAb_FRINhf})Jm0SP?a+Uw1mNje54V-bqZPo*}t2Z)ESYrG@&)TNqt{7Q1)>85L9 zI>2AX8dJ93+kcA6`09!Hs6uN_DsrcCRA-zXf4CrwX7Y3OS}YTE%Ad^M8Yu9CSmqR_ zmMUayo-kdGq6R?v+8Av=S8W^LczDUzeLV5-KUEX5nIk5QwI3H)9HO}W0U9k%g|&ib z2s>HE@QrA6-JV*R+ziO(6*J4l|0Nt(=F#kJSi4nZbQJnoM?y+Wf|DvshCiL8Bc4Yu znf-O`#VvymkRWb}2u38~s3~KESABP?+q>p^KIg#*2s1??gtruCwa?-)h@u$eIY8#C zY>xw)@J5fp_A^sIzC^R#&L4BNSL(lHb@`#iwPv_-)6k+euA6q1-zXYS?uxFG7)9F@ z)(lokq9XB@J^DQYtMqz-G}{e_=XlWBS9wOwqjQ&wyI+r&@nrYo#R*Ub&Z#t>9g+OP zB+Y&(m$2@ix$g}JPX67wquFTlXC@kH2ab%f4aWG5k` z=ssx3I$MJTNlR+^6fj3FTfv?)6xW?tUbxTWu|xDni>9pD`X#pg$;8n{9~L5xf~b9q zFV@?4d~uy*j&nXI@Ma>R9VJFL0!F2&x;d=aghOXgXPusyQG>-V=$J09$$O^3zG;US zq(I{e9n0i;X4T$8JwE)ho+*mktbAEkaUloW^brCG;K_+UJld!s2!CFN?Y;bBw=xWcE{;l1D>4|OsxqYO8SZSDYb1~ymHyB# zccd=TIiet4qxgO(sRnve>k2(7?Ieof&m$~=Hu_HWR}F}+t*Evu6&4pfzFSNJq~#0G z;&t0jMet`7ZI30}M~$-wg_Uigtam@TGS}Y=zH0!8nI~SanO<+d^We5q zUI0L*>vFPaf5H(@G01A1#4z(km<^Cog$B)pm)ta-Np~{*jbS(Jce!tryKrFac>`FN zYCn&fk?$)&%5z`&QzCJT;?HHFrdCe*@scZ17mn=}tsy6y?v03tlotEqol&_3QBi!+ zbSD5}uEyiE+I<)Z(B)3IMgC^cqzPY|83THRxvfxJLl}Cp%`~U;FnVJ==g9S>#IAC- zR5oyi#e#1OJ%|2buM%Is-_TACH(O0%;O5X=vv=@maO`iO=!*5l29bg%Z$Fm!FXg?( z#{NnW=$uKXWMw$$Fu0_<8&p`Q2*Gv29#&Vrk@VVWjONs@IX6^zY&|R2S9hoG1hN@A(R@KEzRhIApv%FYg6Xz8**}4;_Wobgy>(QT zZQC~}EnU*!MnOQjq)S3jX_Sx#>6Y&97LYD!lhNUw1M7X2m4~*+{q?u+Jzp5Aqt2VIWk#2lhzf{7pm=EW zvBhQo#MEr{cd|U^s{tps0~Fb+Plnx1V{*K}XT!6!pe*i}>Yr{;YgE%x9T#6N9(|b- zuhyhBx;wv*nY*(OG`Ji_Sz=bVRB$uTJ>_oTbvqhjjsXFTr1EYnLOLUhOIv1i$Tdin zDQ|O{O;~7?VpuZQ{iJsD!_7`L|2qEtsdK-XL$PPaH*NY*WTIE)DL2b~-?Bi$32yE; z`rTV!Zwk!dwAC7~)2{c3A8S-fIf|wj-ByII53EgWM;BiFN!lyB_eOOKg1|T|i{qU* z&KVETEEI~=Y>K>zU1%P6`W_4m1!33=Zd$g}#5)!qhL6c0#g2?OmR}4jJg6AYX!c4P z2#rG=XCu{qp5zdk5qW@qPjdAW?`LACZ2~*+ z%T5y?*wNYEJeI@@Mf^3cv(1D9xNxshkd2LIha324Q^(Wpw2k|Z93V$W%--~^y&MZ0Gf3IP>Eq%-GR>~6XAJJgLbXF zvgHgF(XB&_+r6Wby$?&HYk_%P{+auEa?32W_PxPgH#oY(<@jJom6B!Fvz1HJy+0x4 zaJ}ki|I5&j+e9tnF44ep26*SSN8Tq)GDte-N|cFqMYVFEYTX`MQUhMWH70Xvp~+ta z|Cl;UC^ox_4E1=wXcM@qP27zBWt>H(+iyDUXZ!hZYJHr7PPR19H!>?3GcEbO-?Ebs36m$E_RF4=C#v?8*+G zo%%#}slX~j`eeMf!}&bh`qS(4|{x`}kW>dBe0uT5pt$576_knL0A6$Cwta(@U zRteBpbc33t(kjjN$|5_mz9x%ETM*)I8+uQ|;r7Tx+BJ1LX^3 za<=;=`4O3yK~ZAz8wkQ}wve8?#=)CNtfDC^bNu>NN0Sb$2?LAHb;~A;xSQkhr60+S zgL73ni^TU3f1>^_OtQusR_TVl`YZwamZql5u`omLoACR_XQm#;0PVkK)KYyySP#g4 z^7hD1EwUPV4glidOY@0}N{|0i({!5fwn9T+vex-=(7{IhopST}?OVns7+!F3-Xsy) z2r(@Sm??HqBia_Z0taT64 zz5j4HXph6Qk2%4!FD9MN7kxw0Wt0{`X5#X8&;9Id5%%hN%@d}ImMB3t!z9u36=|28 zGgj%{TZ2d6*2$J`(gZGs`Q{vZIL_iXgwLab4nAw&eDKH+7FlKuU3Q#cYyK9|uvg=n zFyT6peoq6SxUWYftxyqx0)wJC&B}F#>Mk&a)N?ebqhou0M`k5>~ z3lbq!n1(aoPGisBfD!2O*)Y=ZpYqUNRZmHw?~ z)b)>hvd;&aY*ji7Q=vKNu4@J;lY~W7&|TUsdgzrd;kNDfCC=yCw?D?)&sVjorOxJ> z9BhXV$?a!JXL!u<7(Q*{Bd;ZN5KY@cH)^IL`iz=ew8A2^ZY|o#u+-@Sg>3o;yIY!G zdbke67D<(^^;$B}u4@L$ZDL2dW_vD*-r^o>q+$=;mv*@I#!hy-1NxqFEl9m`agtPi zd@$g9yTd2ZT0_qbXjvhl)`*iSWm>SY14 z7VM^s;#|pHcbhAAuREJ|2Ods;o7G+1h^buMNTNJ>D#1|_zC(u`=OpTsk8{VT#C^+qiiLHd16K4#*K60k zl{3JH;+ZMTd0}}Qh^%%sfZ2@#avO#;?fP5-k-*g!CngU-!+G$W{2JVW(M|hT$!kEy z98x*l`MfRBZRU>=%++(4(_kpw)nf$(9<%*4U_02!-<>v4_>nPvtB=eECYzR)vKyu}w)>ubWjZ z9nXk_?3)`uIDR|l1*I2T9`8JL335xI=qK$JbDNnhUTekAcVsjcLU$qYU@^!WhkOsG{ zV?yN0iGc2JVS{}&aMZ<}Uqs8Z0R2y6*)pk|#8tl9z|LL_(pBZi1v0iRSYYQGc}F*P zYZ^sx{CyYRrGEQ_QzGr^G$*!`SaXJYYY)qW_lHuXs>`p$ehBpuM{fVl<9~H!ILm<@ z{{Ufxhy&x1Li!USjwHp)<L!zuNxjMs#A^QeZ~Aff@Y)NB!@`gSR+PYcba> z?yL{SzV{bpO#45yym&?=%(-(IMeK z!?_pe@2`K9a5HW$dIauq3QP73dDzrx6o+2@A!wk~?ax{2JoN?LYj~VNm#9C6rTM0x zedLuU3P?W-wq4tA*pC)GbMhmHc6rJjuHzbco@vw_b*V$^@%IKkVOJbric}ciecRBc zGXo`k@Y3z-SOuLMJFkzhn5!~IdO(wI79%}qUZsJwhK<&0W(>`E*S6&1X!o0l2I8JX zR;RUCXPyl1qyo3(J7wvrFtpH|n2+^Q3W-WckA5t?y+%lt-}s-{^;ps8$S}o{AvCmrjeXYMxG50Chm?Q#-8BEHu={p@me;0exSI*K6+hd>y`$E*qFw zf50YhLxqt{ZQoFT$rN5U<65Bus@;xE_HB7|!jOuvHc{kxa8bfg*py`ETu${Fm%SVb z9`u#87l63vT{Ud#JYMmCxF>!pOb9u7vvQFPbRSutbPK@KNYO;c%i8xol>K zLv(ErOE}hPxaJ}e(I%Kq(d~)< zm%piyDDUFlR`U<6Lqw{elwk_Uz*IY16Pyq)?~^RCoxPRIoVSF%K1jU_BeOPa1G0kH zf|at9R?~Fo5bPked|_7B)BIzHY48|truNEygH_dc^%kvwKuLqls|r=ePugi^#=>%v z##Y+Mk4IC_4r-y6w_hiz`1fj4Pqre~-jkEr?Jm@9TdFl!`c5?X?=uRIVOR9ewpcbO zA)^rWV9i;@H^>Vy3?3cX)ZIojtO>d6we?xljNYr8m4Rc43Wd2W=c%!`cRY(N8+fMI zMke2@ohK3%Y;eB&Z0+NNYUgjv9)88CtcQ$1=R4RY-u$ksyg=WA6whgbi~2DJtiZFw z|3q?x%7@M*r!496bA5z36Ey41Y#T%%AJ_vHg$KWcq$8U8!B}duKKKD zDNm?d6x-D7xvWRe4FQOYhZNZ7sllE{=j1%&s*?IYCK=X+eQz)0EafFbGpg5lfsFp1Qolz|L5GB z+~m6d24MiH^gmJ(+Ab^uh`q7@pPhK9chG-j;<=|>X0-Uv`p(o(d&=ADKJ^Q(9_Nus zb}}Y7Qep`Q_Ht91;Y)+RnAPupf=*Ja;+esot1fYNIy439JK1W#dkn}~8jcdqjsNzPd-zuF z4cl|xrV7zLFnH1|uT#buW+w{l*Ur+Qq=f0p)KBjHl(I@hFdN$Z1xBhrX54E(JcO_= z)6&(|g5b9Ajn=oX5fb9zE_!Jz(MSAUhsS)j9%KTEB{>lgJELlR70H!To9|v?Lfb*; z;_lCPYumEpnUbv}UOq#W?uyB^j&#`67>_s+Xv3`4_*Xs_9pSg5JeDnk6j@bJbv9TK z!FIHDeXtYb#hnZjF_4FKmz>nrGo6636SJi(NC${p*sD{I&-;2Ww~d&ih=g*^sP58l zY*dBLR}1otd6A6NHc(ke;^W>*I0|V|Hq{$@6yLSZGsbRs5^PPU5Q2$%ff9$HSNSI< z$@C374uE$E`y z;Q`mE%_@f-A(va!D>}ATRb_TY0 z(-_Lu8CdU!)gH>_BpA>svxXe!xNQk6qx>0u1z77W!&-U`s!pwUgu@UK{$LUUr@`AH zO&-!;vyK$30%bE^B>?E#Ski-deU0G+-_kKeK+;o$7)m@Cl$VHYFs_emH%q7AT2NXw z*uGDhVQA4f%-ktag4j0y*xXu)3`oAR?!aw6b5d6Pn+hxdAXw(D*Dv&#yV<+2cZmak zS$2cPzjX-N!F2%HIGRev9v@i%v1RbAr#s(56o90MZVcV;Y1HkKcBtLr0udTRTi%gL zvGG8x?cHJR(k`>GRq+XUgaUHfa{1u}uL!(i;QvE6YpgH;r2#db2*N(2VwjncI#f`Q0B%Z?sr;j&B0L4gyF zt+l7ZtbdT)nW~L2uk!y-Xw~B9fQkgZV4rSB`a7(nprCl#rux@q*SZUo3NZeJzQT-< zyR}==KK^n<#>Lk@VxPUm|A!%;sOPW0>t%%u&e}U@I)Kys$no4A0h-ey=NP}yrGGOr zSvXAixKd9S-5g-1%v#E!)5*D?S#j%qb*a*90$m$@s5nY9QO)2FHDV> zg3SctNG0hy4UFZa*IwJ#cCsoED|xdmrsTi+p1}9`3nWeYrGU?3w#61XZi8ehu|W;bnhkSuNq7Wd4aXeX?l+s9c~hn zZ{yj3=gO%@-jO{iGj?tsFfJQzr0EfE2cYCzhu1*_tbB%)N5j8|mBEnN-Z-0$P^&jy zyc_nA26ZFmL93_h~GD-fb!&#S6lJIOA&d(hd@bkcSC)X{KPi@1G9?iBpybBKl3j`Jd>V=%BHMd z#liDpkDE1pI+${*!wB%p(YWZo!rbGx3C~kpedu3{u@@XHP=FOrOoOIR&)SuYKM_Qr z6o>&rxqB&xju%Ng%QK-M=g=>yVG&Fu`P<=&!&VRD{d0!9aY7iR=pgc_z+6|UikHIQ z<5vp?{nGV5?#&>hA;fN=(0|fe|8k=-`%}FUFL>T`6z$LMj zPzFW7Z?db13_yM)^;vN?|58X`WWzWg{52pN(5IVR{C}J2(C&x9N}_xNayM{NU*7_J zBp5pqa|eI>)mjJ{q$i%GNbp&q-xj4JJHoxku%Cc)&FjReG1?}<+Y!CX?|<4j=h^qn zCKs)GKLNymwt{iQQeQ)5M+jbmS+i7B#{+GHk3gIaF`c>$m~N!q1BN3R)q*C+hP8Y?#fk}Z~~)NK@elHpx9SnRy#Ic0bSH!Nb-@ctl`g> z0<~BGVt(k1@XJ~YNZBpxRWCj#8GSu^4|`z#MoEs5KZ4nQQ)l^s%>Klp#K2+v0Wfs^&` z+Fxq^9KmNwsNsK-L>4(fk5lG>W=Qy+7KzAr%%*d%dtUlN!Zp|Pe_=UXIIMqih{j^w z-HneNjKqhcc`1ryKgKE3fcXm8~f*o$b3r)!SM%3 z$V7;y&%=+5g*lpR! zZoKhAH7oR7)TxA)se#o__m8`J%olYRJ3-q9H0a9@NfIbXf~LMBmzyd>#0&=8=8Om- zL66J^6?dIHH=rdFRgZ{R3~NdEYuy9qCWoX$|AXCwNc9nbTIo#>=Vy*OSY0Q#+KQU{ zH&Vy7fl)dv0JP>edgjeI$=w5_98g9l23k&PJ#TGEeJi!LXYo#iIsfsqnK^7LJ| zmx|PQMQax#ROz6bLM7N9qS#SX9E%EKM03hqON*>@sKBTx%l7g0uqn-o1v^w$9Cx-3 zQQT9K6^^XuaUd;_?064MB$wsg0C?HG*ih7QCEcYpkCezo@c!-Do55zo=C0&+;VAUy zf)&F-4Tq8?>hnslSeqUPF@b276dWCjtP2gBg1=nZen&A~3HpFrj0@%#LzVnBax4AH z3+@L4ZAx3aUoU%YW;h#1sy&YbZPx?d)9^1F8zDQW^FBY|Yiw3+w(gkaMPU6;Sd8tk_rti~y43dX`{lY;7Ud~8(hkOh za!?unmqZ}94|2+>@~4PKk%V|GPdVYov7xt&l~v2h(81V`qx#wo#$DEXJEO~7?#IBT z70)Q0xce}1U9!IQv-SUIYQ9Wdl=cD8;!xs^Tv74Q;u@F0<7N%s&JzR?m-LH=wBu#4vvhyU-ws zNfTa^fB?eOBj{ba_RLT3ps|Av8`8(LPaojhk-Vc|W|OwV)4V%mqh5}TeBfguj=MJu zgqsP`OqPvpUP~=JwIkV07_;DfM{U{s@bIWH+)|=&PAHAlOJb9pT3Z@%nDD)%21c9& zTc6dfJ@TITi}wD}pOue!eDGn4Y#$Z;hp8QDK{0Mri-e3TL4jfh=2v&(b@{YFcbl8olw2K9o;>#Cg(<%ll;?Ynfk4pganHIYSqVt14GC0iVuZ9W>_PG z!@E(Gh4bZ#5{5ww;ISXaP<;OWWYd)AItj^9dqN@LP?#qu3+4&RMd!7t$K#+AC$Yn# zInH7KBC~qXn$zC#CfZ5;e@c|h;dLz7Y4^+Wt696LH+_+w~l$WE};zM?Tzn_vJL+fWZFkp*A%DS?~EVH*s^dz46&#hnzu zP5#0R-n3)6Pw5$i)r)H&mwdSv|_(rG}W!BjV^e@7&E8qPi5A7WU|cQ=p&+hr=; zG(Nfi1iMvX@wb<%)BbV;1Hw>yBe(WlawEclop5Ua^cE_d`ymrHM^@G!&-A6ad9l6T z7nYFSy4&yNoPE0e!feln)OFQH#7bbcTP>LaXJ7?OiG2img(-!m?=02Ud#2v2N`Y7( zfkq(=gY5?Yzk}_f&o7^vO8$$F;(v|X5s+pZ>X;%`8?tJtiwJPBJ%=6xp6C= zFYOEKw0;*22UXpg63nWuxM_5DAEh$fSgpIF`Un-IKTGWkYvLjY(j70AM?0p090uib z;*Vm;^$Z9=!jnctoA2az7od_Ofik4mxRt&e7YfloI2h@Uyi)fj*jNqa!%IFhBdAKLxv{A74c86kNE{!LRo~<1pN!bTGtZYj)3PliD_efIMc;~bDQ*bAolp8>c=<=z(d!0rf z?T=ukhR#N((c*y~l_M%FBK?8f)h>AVdl$G-w^)m8=$sG9Z9BP$%}_Mte^_PMa|o zJkki@9-8*lC8b}~f-~C*u``eY6#~&V8U6AU!R(z#3Ss{zKwhfLZJQM9ADD}M(7^ZN z>78zBK2i`YB*)Yb@a_v1p<45HA%*ik_XoY)i`;B1Kw*x=B~<|xfWv@e*Ry^@l3*!Ik@32t;~=il4rABve1BYvz9c5 zmbyt}R7nPsY0sh8&X;ib%0I>jk556Mo&Dpp=bC&Zi3jgT>?dRBH}YU|=?a0I>j6e% zKp_htS<0^*qA*Dk?l3sVzat~t?SB`n>JX~yxS{L4fzJQ*9&#r{jSME4_1-#P;0cG_ z__1BacS@L(#eA{nM<2H_r4m;oR5n>nQ6KW5FBd$jz`#w{i)C4VyMg*I9*Ldtev@4a8! zuCPW7z9S~Ww&egD0%Qt`U8K$;RYHu%1=4VrG5Aob z>Q*J|S@H6UI5>Zj{9)q3&3XXH&a)qUKt76b`NOmagPsa2|FfYd7^1a4|aVAd<|kQSo#W;FW{?0-DX1 zz5xx%oohbhKshgvi5U|MUM;O2EMIhahNiUnMK4yB?HRKye_>ve6h!?&%B!R^eHB*ZJ zM}v94XB6|49dGpUyv7b7RdaLgdOJ+|*T!Vs;V1>>$?8mEa$>0Urz=|JA=kSvvp=c!FLjP5Q+~0Lh=7Na8E+Ogi~d%R{w4qGzGQeQ{ouT8^??^uS_!^9&N)ePGN}G z)aUf;phZ9tGF41F??hFb)?VE|(9Gx<7s%;njFMyd!Y!~rUmt)8l_NbGt1MA-ZF#0* z+28{y7T@-Og|*$h?SAj~3iZ<0a5oT;SiFKwvUIOVz2U?hcZNMfk=m93pJe*1F5y5j z#rmEwf7Ld~cZ<6Z4SrBDG!bo_Tg4l}eoz!j72z{bL;!)oe%}S&X24)SC$H-KtM9dt zj$w`?%ExYjfb)-ZiDfI%!~B!8Sd;k`i$WF(n8iMb-C*bwrZi@L*Rn?3X?Y+-$8f9Nl zf=IQUKtB6u(wf*Sy6xVw8GLB~T}2Vzzb>Wsvp{9|&vZV+{H%V^Yeu}^$Ps3Zv%B&?C;TELeF!otJnhBwYm zrhgm@+MWhv-Es4Sd?54W8P6_aiWxaG0i7`?->EPX#ybbR)w3fiY>rz|xKx<00XgpM zU*pIqFmFzfWX71NvsYJ9eF3i6x7aPyr{3g$j?Suavk1rkH&{MN_wX|u+aQ<}Q5wqp z5^*%B9ZLu1;}OI@d?m$i=SOikH-!krzJb*U#jUwS(oJFEdHc!f+Xx{auQVP4*_U>I z5F`Na0=D;dpnQSgt1uwq$-{5BjX2B2#YV5cT17iv0N+-vXt?`nbfmO{U5bKb@|@qJasXDqr-vuldtGnF`}-Yg3}3dEq$u6V<||uM__F^p)@f!89O1R~@yp6U-%KKOZ9q^c`9s(>uS$ z*kEYFvjx%tK8^v-13YocaJtwPOWz>n;qnY*-&_fI=* zH(wSAWk}@?`Fn9Ayhi!kM5RGwjH1Z#@RVKhGn(B%0F3(3))7IR{6|v*cOSUMT>fVk zz+X@zm}rHOvq3!h%%(xTvJSNvef@O)C-AUy3kl`sbGHrkYCD&_dOAr8c9sAmyTtz15;89X#@b({3eaG=AhHU zi#eI!+DgX%2yLagQR5j+2I(-p_g(4Y%|ue0DrR?;AuF$u=&SBmC)c8TfmMdxm%=x; z5jX2!ct3T}#|+H4?^b_OeB3aWz1z_Lu`=sszD|%`j;MbqkyBV`QX<7c<#Kla*ulF| zR*1%y@k?YtxXIW(Zqd{;tF(~veYOlEpiydcGpGu5QR570f88UZVn|tHIKfe(T#=Lq z{bosPWu}5GliDYpWcZ;npVmR>8Q+)!1apF3{Iw9FLV8KgRay^=V6UsENfOcgQ_o>0 zowfLF5vFOMSvLiYb~w%M@8_t;{!&SR;D+*OnE|N4B)N6BKfTgkl6HkG>Q{ z-R9^tdZk1!+$fZi9}SBYmqc*n{p<**k5&CN;tKYSDIPECUxuWA8lCQhU9BExzkR=S zpYCe3pW2|k-;NdXl-XFM#D}aW7alEyl!`R|LXxqHUO7=X>~&vZY{8pnp4oT|aV2bV zo#{3&eHq}9HALhU#;TZas*O0ysP1)btcTP(@&x&{SluaKx z+xBpQZ1Ily>c11A>3+VI7*0_%f^QK(xShAJnxg@sJy7qfz(;m>Z*?TTdTpxfi|m?o zLbTm~1(|2(%H$uBe*&al$q;Z6IkxAvzh6S+Nt=FRV2No_p}oNJ%|=9#dEa9Oeebq> zjHvgPohd<%!p$PY>tWEZA5KXTbjVJ{T8)nLC&Cs?*B&90v@(+h>&5B3?wRx`+TeL? zhjprqOy{mUa-O%~E7yBtzAC$dOqbe`&jqwyaNUh~ zK{mUw%Bbm|cFbeQa;Q6tvpR_liblB|RqI%$C6 zwy(9Pt)yG6Ul;9D08YQCI?HTw;vo&G`MW1BHWVx2K@{)QJqrge_X0%o;Ty#39$#*q z`E@cUrfpRm-aNBXB1M#Idl+9um+W4 zp>m+u`@Ttt|9&eO8{W-zQBUpl=Hw79UKDPF`0@|`2Xo)TqaCO|zL&W;TQr(}S@qgU zoiBBY$X_1OA^55}!3<6>UV7*R)oNaSgx+8L(K}jl-H|u$ldn~uzjQj*7$rI*!)Mh{ z{N{@C#fbJ}G1+sJE)F8R8qXjtjX)Z8hD@BoR%V4_NsoHLCuMJQjjk94 zJbUM2k=vB4WfAJWok(~(WBMH|y{FanjQ!~6id?V#g|gRt(_@Aa3rLPK$YjFBf&2C2 zjf{HeTcLXox0LZ^OMfq5;7&|_#7(Gma^$O1*rnC>9ty>|TH8JA@Es!mWoCxwcBA?` zvvg8!i*&w&ad*1Ww}|38ZFtt9FXqRv7qBV1yY^T*r$e!XKiX}3zU(S&z{^e_fA7$e zi7x-XIMm(oGhGyNDcSfP9XpAl74hl%-60Qu+})(D;xp zozj(_umi)J1;p;+uNNPRRwmzg`nOu-4m%az6(-A^_G3Sw5V;R?b{%SsoS_|0(JY~+lyo1JS(zSGk&3IO%=iL}FG)OH9YNq{2cAF#}7w(R6&KwIk zx1Bld&E=k}jsy~X7Fbq$-b(1$;2)mn%y+1F8Sblv*P*A~cHb7Tc|7UWJDyT;jSSr} zShqqn&46475*Oh%m!oTA>s3Z#&m3!e5nG9frI0>cYFLg7dGyBk8f;x|>EU7}Kw$8Z zYb(&FY=@35;5Bc`xSwXWhwKQ};K9o`co1C_&RDD(v}J1&rkpms^{W(xIo)16VXOxi zdNp4kiCXW{hm=gvTNa&6*}#S8QBg{liS^AB##Z`uMNB3HBU-KdQ97k4GFNaMsH)K>iQQ%d;XKe&jh#zNcZ5Rk_j>OI27DH%7}b!qjWw#P{0KoVnY z4M^mIX6gRY{@lCML*sabXk2bin97o9_`J)=!A`3h54g#}{$5Q}KMAcaK#a56=UOYm z*|SI#XMv&n0fA(W!cHIIB(;okFA{Ix*Dj+ul3%f#bb2on#p z!-E599i)OKoTs5*FOkdMj-MN#GtFr7$ZDsakSVG9`{=cg5WTthFzeiI9%m~n9RDEv z0r)JYhAz@(SFwu!cnTHxnrQ(-bf8!r)=^g{*7G<=YaXfh#ny(%4#8OWr@e$}!kNch zP3#ex8PAA$=KVk5(0Jr`_|0=j?c-1T?(Bo6?-QQqouywdf$ueSEnC!8g(n(@SSxyj zn(Iaqc%dXFhDQzR|EozE?EVz4ZT4+hUx4XMn%Um-GflNyXH~C^@s<4L1|D z0NP%(VU5m5ZhTKASz{s>k%(RD0GfNA^O`#_e(sI`0690L6x1MNpL1EmYp+XWY_g0= zhyilxVpfE37D@zf4w<(14>B|eurC^GQXpjskLq^Q33W{0dv$Ct1vY!ojAA%YJbNV~ zri);iRmE_6XOj^F&Ek$o1+I)iCjB7>FL_<^;dkEBMh_c8R_20~Uuz5xWsq+B(PEN< z6=+|w{+xGEW=26to;(kwP<#^x-?`>cSt8BPy19d}w8>hM+GqM|;65IUB_D~rMu z^{!&I*Bq9a?P!Cb+!*p-V;YLg`RMPW1%Eq1DsDs`TgR)_&fT~8j*>)7Au8TfwH}l6 z*&oMuo(^Eo!6f@*F5Kf;l{5@w2)s99t>D{s;F~gneqAtPU%N{B&Hi6i}YXe(h z#{I^zTlW6w^V0|Gx2pu)i=NYhRijsCB6W$?Xrar^#Qd-u_j}iPT#}aa*pKtqhIcU< zEJj4VL()I%lkzDXrsTL;JSprSEBJ3vl(|2+PH!A=@($o(YZB$rFg5$+G51iYgsF6Q z=glZB5~Rw3pd#W>7qLMqaV($k*_+eE!*b|xO1`hnu7cuKmfxhko*`*Vrxr&hIn6JnVst&SkIrmg46rNKdX{UL40!Ry+aS3E%4QXgs&C&R{nwj&qfe{`=1xUA|t}Z69uoT*<`KRUB3OM*C-<8LK9<_kR58*rzG) z#3^vVbt6PemI#yUuj1R7CQahQpkQDdE7$Ez=~IgV!T6fsYWi>6;6S^!X%`PtYJ9 zT6^e`fQ_-0?psgj=6<$MunzSV&*Qi@BD7trDZ`h9f2Tm!DOW>jxVus@P%DAgr=ixm zdJFUWPUr5v@VTL8kN(;H^CTmoNb>E1bVa8|qqCDcG9_XAti%U<#LRJkWt+Q!a!fN! zBVE=AqBB^7F~;?R!-Mc zN=r$T*tV^Os-5@a%{LB(Rvxqm5>4>~J6jgRw2OJ?9BPzs`(29A7#`YAgkHBJ8mKK%uSRgisFxxP}_#x@D?-m zAm!ZPP3ms<<7YXZrd>d;^yeKz_Tj}>n{I{nk8z_umr=dw=u|WnDXpSv ztS#={%oVQZS{0THA2^!q+HFZsUUF4}hTUY_ioXJNduqu&TZiLYoZhQ3#;f zYDMSIVbg`*FOb?ZKbtT@`Fzgp|HHIc+@ohRU}n(FC4H&-{`uRDvzj}2DHerImxr{K zC5MC|IgP{?)VE&Zhs(%x)hg;sLkJvI z82vg%zO;6;9j!uAo3ruOMXOLGfTSJkxh-#csoC>j>n`B8X27|-U5n!k z(8k&Ef88vpT;zi7PMmvVHfDVybrEF1HYtf0iK$VJ-|srlqY2;1h)D^N+>4E<0> zBXJRVV~?)L(~CoQs%jOop@unV*Wa_vV^tdX8iicTbKdRtk>GHfEMtQ#PL@x===+Bi zrMZv`nhg~za{jC@BEYlmh(`BLY6eb@Ac$WK=%JXG%l_LF{_C~)uTA*7^M03ib}iDt z*Pjguo6x{ezzfYSDPbZ)IjHU*Dq0sH=|39>f?e4$kvRagT~D zZ&~{aU2sJe3yT+f`N@T#3mhjQW*vUaxWrV6Z{YvTJax+nHz)jXMBNrF(<(J^cuX+3 z=#-@YtZJUBpP5$TLAgFqIu$^lbTPe1eF)7ZmqP;%+ArM3amoI37nwk7DuCN$6Yb(* z9z8{z0$LiUuoeDCeMjN%meQF7KGmmBnWq1n>xFfL%>nZnMKP?K)xCgrT4o| zSE$DxAuT#}iO~BGruX^F`%U}E z&4&+CFZw>SOgHhB>Bmk2D{!Th?YE;C!)2l9safX#Z104b)4AbqbY%geyv@i$ATT+aAdU!mpfe>u zx*P)g^IUA+l_(W>hYZ;JdprdvYy8(z1pXJ7V)B;VG4)f+p;hk*4@>p6flIr<0b0Qy zsAP~x79wGv&*k$5(YulE>MJV%cUd$)qp(}zRdzjstBvR~>SIcabToH|fVt-@=1YOM zbdYl52o9&=?j z-(pUz_E+QAF0bs6Zt57FUfC-d?N{>Jcl}iHKlJr|!9!KhZ+Un&$lxANf0{jr60mKm z7>_BUT4@Q+0HG%OEsPDHJ|L>qjr=?cdZ7<=dKfVfEF*}^dsU+zZ+tdobi8+{+CPOz z#dw7pg?&u<`+*`-mJFLf(cys-(%_HO!vgCG?&lBWV8uy5K>Mhi*6R))hs-YORyKOd znAO-5Fn~a826Ks_pDH#P`?vsWLaQIzAgjm{%F{w`*O&p zcuix{TeUoWCUbY&<`j#U&^!I?!`a>O>MxMig8s6{TQeJsY8p-Dh4S1x(P&SeCzrdx zx40zJ#cjsHM>zGlX@+mAREsmvbA9u}_A_x)Q6v&TtE{x*k$=Bv*cd=#ucqDp^+h|T z!;>c*Wv-<4kAIG?tVcg8dVU-I0H}KO6F41tIHta}^NC@l1eJY=%6M}De?CIoceH$q z`)*DRcDv6ycFca+G^`zv+Q zvS_w9M}d~<7Vv`>Hrxr@#$uDMQ2`Qn(>i1QaZulS#e&YMPd<_!sZdz|gK z(>P1FhKa=oGzsgBtw*YK0Vf|TlV{DM3y37xx_Aj)tKKu5DYvn`fM9B7r`Tcvo1LSb z7pW&@ic2xwLe3cYZwc^qgA|Guk6{OA^@L-i`Nv{y>fX7qsZsfMpB=q`7@5s|)uZ)V z<+~8s*ibTuXm;9s9(L4*h*{z%+=?*D18pel7rw3^rk_{&<|06Z^DD9@Ur%C zXNpQy1cJ9s7qfzs&6CzMQ~m6PhidQfo14x_9ONzV7Qe`DoJu@-NJm;aAzxicmOzAxDeD!rH-owWt zeH8fNtC4vK-cr2YE<>1SJ+jw1m#K8!s*pD0RD32Aei*08zEQKs@$vs)?>(TR+?I7w zKoAfC1rcklSv->%KEJ_uI!{Qh^5{@}NK`2DLO8#B@9hXchxCRV zfbB>;G|In6RFhat8}Vpo2FrsD)EVgiuFgP9(d8hBQ-2O*`(}5f9vZ?7gQ6d-J(DGP z)yzD{S+RyTS$-6Z2;$MQU!y&ho=hgQa|cc1%bCu$ty`MXEAuzVUWs5-|5Q-sQr?## zhZ@|pGk>u*T}BNk0e4s9Ww+=H5NpM>-`W~Pru={inwgo|el^&9%Y}C&(`6-=#w6@% zPu+Wq>&~8^mkoQ0dCmsI;kv!^***LdpA}w?>yH{gqD7M8GVk>Bg>IWMYWcD78~N3@ zhAj;S3*!^7$A}MFDUorWsK_LwGigYFMEE@e+Deq5RlJn-zV?#+?(SXFyYLQvJ2{g+ zF3HTtLf##5)%^M7m?R$f*|brpS?G+KBy-FT(e`y_R|PJ+knOSPuguo3I0ggl+(d{U zD1Y^tAw5|Xh=rK)r+xBV{?c}(Vf7st*_P|j-G_D*Z+n!O&`sx_B_~C_-1H%0}VQidBf7Wogx=sZd;byKj4CHz<&Z`yN}acqPYYacLwi4at-6oA|zzgA9WE z159cT2i~nCo2~qIo)OxUFDZ=gMC~Sbkn2B;wcaboU6>@k<7g8E;AZ?xLP67k%=I}1 zsDI9g%R@=p@|If~U&6M1Rc&ib%ScEqYA$YtBYd+`i#jNzVPQ?ZbtvAs{vrMDlJo~{T+ycG?wAj~Vs_0{ zezIGRv7D5#1Pk7Q4}f*vClel*pSH4{z7Ns`5T_~!S<1=4O}P{!*{*QYztr6IU3~R7 z<25$f;4MAtqN#RyprZ1^xW5#iV#OO3$$E}gAeJ*y3?Q=<`@~oYs}vEbx_i;0DEIBF z?{te8Z>|G#uhofn72*of&j+|L4%I2@m()2LA9jLI#K+N9S;7K`NkF9PtcmS?YQ`hu z)C)mz3?3&@Bt8Ag?PT9$rrSnc+HVC3Ut!Oe6j}-J4;EExfy%}6bC(%QQ(g6jJ@07I z=~KnW56J5Aq(Z5t1;66q3#IZt1wr1xr!>ma$2xfPq&-q^kMLfuB(pjWJD;yl_i&Un zy%v|Q5uA)0>1dss)b81<2D+waB$2~0aTC0+I!%afp|)R4qB8!7a!-3G%Qy@y`z*mBkN3nRn!5hleT=aXL)GLn}SVljH`c1iY3 z`neyWt!JZ{b}~T0V`6xNMR*ZmNEzyZSgAtDV6vlQxU0B|fr}u#4m|IDad8_kz$wka1&+rW zi4kEf^9_O90u;oGgby0Bg679^Rs=u}D8Ej|H9#etP%;x8P?9OQK%>mF09pzlPhbwgK>IIugLwmD!6GI`Arkvn> ztaTel5}(z`J1Nd{D^Js^shm@m-G*S6gjH}|q#q2z-LCNT1P77LNEAHVo5T89 zVA#|7kI?Lk5z7iA7)=(L_NMG(4i>KNqv7W_>d(5U%GIl-%$+tT^Aq~ z9d;|jx^}Qpv!>=|NQMk`d%_7neUXJB3q$clq3#LgyRVUhscE;d17a{Wv*F(Td1Ba= zoTrlVNuG)Pri}9g4qzmi_Wsmon@(D}7Cf)om<6 z=W|+{;wvChB%LSPW?%GZ5OUV_Pr(-)^sm%GNC%#xl2)RJ)W^ik#KdV~{In{LKxyC@ zbJAmaJ^b5GFRz}~0SU8rb8h;zdP)$pKPZ-y#$v%l5I;vjq(c|ul-Ezv$J7` z0v$0t*~2%o`^vs(@p?s~p4T>kNWf6RaU$dd2y_*s0&p6uYs{{|&F4`&M0RpF`@VP0 z-dNcU<9|mdB(d&E@h1VhLR=cU`M&pyPZ^$I@a^V4i_MD%E*V z?Uj9Q|M~2VG?@3JKA-;=u;yPthq|0@Yx3#5aN4E5z6hm-qNzQv64x5jIyMk!$dw_X zvF$tq>bkN*E8N!Oj`PuTZIPw6rhYe8=c{ef1so}JV0~hjM(okJ-tbB>f{zZhD6|ldjTA~J+Lq%XUc`Y!j_h! zS=IeXwW{QoG1O~gJk=x_Jf-t<&1=t=?(jl>5WM^@6> zi{y40$Q&ocgG8OE8^yuHuK+l0bul;NSx*7hL7p(1G7N0xvnc+Se>U8Mb^&-l++6YN z^X>pB^D00+xux~_D{Q>54K`jia7&HXoMfUB%u~2d3|htu zHF5_Av1yg%YDl9JQX^P6I{ty zcb%Kk(Bi6+xs`zZOi<e0hAC6s z&?WbTh?;w_C#~kORzDx0a}bXC`<|NGZ!!6pUtUJedp>uMv3C8(rI*o7*OFExu*gg$ zq&lzv3BHZv4KrX;tP){~RerurH)vu98=PzS(zs#%eg8TjdwtsbK2v=$VemevI-eLf zxNs$sC{v^Ine4Gq7DokJ#aCSzSgN@MMMS9kT4*i*;Y zb*h+NJy*hjegeKhA8*LlRilMvR#um)CQywRHO6dC7a7}WJ0M#h^E=+1V0!-P!nG{2 z>RMNBUn>^}cicX^2u!t>_Y&A8urf}?1qe-Hp!_FTKX~25KIx#g>T3zR8zklN-%J^= znux;f9U~BX)qZ`Evhh!qhaO)LYkeHZ%w3JNt=1kFUPoW0LlS)A{9?2C#JZl4DJoP} zuqDo6+8=5p=Nv;xP2C3hU>xVP^Z{BjV%}D^GF;pya^B8X-!z$Wq-2F%$z|DzOPI%~ zsVOp=NE0D#{S_KiJTh8pQR6t7ED7mnvX8n-I$`)$82U}tVsf<@xwEn&953AEg9=U8 zcf}Oc zc0EPiN{0e~HE*tl<}YUjowd4pKa%RB-%R8`a@`T}-h;Mb7gIky-Ge%*Otm${eJ^qo z3GtBr{P3KnIzfKb@I@oOyU+T(E#R)cw2HCbrRhD6uJQ!;wIj3O0p%Ai+qG;yN?dEI zI-m38!M!`S^=B{S^cuu_hm(0Tbuzf>)No)oFcg2W*LJkK3;hyerG49)-V)`v&}jRb z?+SpD(I0r{OQ}yKpMsIiIn8E{kdsxsaLVYcM?_Z27@fI|H_!d9LRxDxazJeNK*N@7 zTk?Z2XXNH)ywoLf_L;66OxEGKe5Q5lx&_zx3;b0-cYKmnu8~ssrVbxXNyjI3f$uNg zI0k?udMsrgt@3f(yB}RJq+1U#bfTL4^tujTBh#C-BJN{X2xkVZe`DydF=qr+Q8JBd zj*}LIaO8?K(cT)8W|++3OYdW0pLzMf6K@^UIet^rWjZX#Fxg08!L!BKWrm3a`|tM4 z^W=nvC;v}nrw?DXmXe!@p5?&iA{nE?6jVVeLOpnccSgppYYtuD2ns}A#Y82!oXZ!-zMGn(eB({ErFp0j z$4O7glfG_^Z6#6jaW;4LHDiTcrZ%$M4_r}@-GVu&Mx&&x||vp1dU01-i7MCVKJg_W9JECm2hlOMs; zCw!Htug*%m;pGVvJ-qWV|LqOzRvgZR={vcP9iTQJT(<_X&7*b&G3ay^K&N@AKD^J6 z*llHvMvQDz7N8v75sM5G8^2b%YV<&i9Y)r^_F2SDHJF-SLTb_PD8T|W@W6jY183Q< zZvQ_9=WqY|hCv^GFvQfBSGp7WEKk#a`PE}kVG#}-rO?F%X^K>?I3}L0jDJ(~>>18f zAnk{i0a2mE9hKI%97z&ND0DkWyOTeN84BAu#z9`+JNMn+SOP?rY?hM zveZ}4q!j2)S3dlf;`#rw6y5s&Yf^mw-&ZY>830L_Q{C-a5oC*8%udZAIx2j#LkP}I zcd+Ky{!~dr50lX#)`Q(g)nC@(g+C8$ojrk18q>yl7~%fWB?~$)GOq%i%SOVcj0Xb0CFK=s`GpAUz(O$GHB6G#7We z)aq5iAOx*Y(7ahwPE&8lq1Zbs3s)|q3H=S*!B?Sn*FQb_@D@F2-E}5Uz?;RMfu1Yh zIcDuMuX=0P88hSD%1PwY?6)@vVU~WuY+o5SR|9o-ik=Bu+ng#EUw|wIoH%0XK2c?O z)M_@)D^P74mf~2W4M+=oJQ#L=yi^<~(?zd#!}<9h5AXZx{0G`%Z3OIeAmCuQFsIWQ z0HyxQDnAj)QuM-pir>|?+v1VV`BQHQCAF?knHT{)`)LOT&*sKqACVhRx!v^nVc+UE zEv8o!-^#?qHvvdXO)iucHc>Zq`DwY8b`+>uHn_VM_{_^?Rm$W6CYvGQNfP_y3*P>8 zOSg4OiUcwfA9|3Cp9zy37)&>|e;Jdcp_LdhESgq1Dx%_+iC%7A)qL;F^0je;hJ1|8 z>t1Q=2%ChfUM2m~^_+DdGC2Gx}8I7~Ig5Lde-vGTrSpl;;H zv5*>B+?o(ePv4`hn$hk7bB$NSosU*?wd^M7^yq$xsV!@m7I4d>NR0e-x7eQuM3_ZiZJA(&b57P#W7Q^}UvBJ9S?#OpYOIUyb!h zEK#3HlScX^6M*zyQ2KN>$Lfd0cr)Lnjr-n=xE(|j>>BK(LP^OsWlstG9-7IFLyzG3 zS*OT>dLfQQUt=YX`kZ|{h)BYeP`Zxl$y}}r9)~x5LPHg*r-}V? zxNa)RLd3Vm(tFVbb)Rf&Ae>9i;zJzc^8)Raol#9F(u&Mys_t{GH*byvQ(O=~rii4| z<#?CkZ>97nYN3b`i8V(vPrZqR(=HRexzGF0iUewdxR|OhS%8|H%9BOH7d*z{cY!0471$qHhjzfnA9GS4Yo7CoIb)#;m z6{tT`0( eVpSIk(Qz6{xn5qgIx*Qr!9{cwZe9CH&VD~+;Oo{KYDIjvtkFvGqJZ# z3i(o;4Ke9-O{b&YOzc#*yI;WWR#oMRf(B@Rf-#0^rnN>}JrfKiCl8n|r&69h*4t^{Z83k&>6Q7HmY+`jf`fo6jJl-|YEz z-~}~FQ%j-;gQ@LJ#LF)uj~bwv@|6KWoe!@rBnr+D7LHo926URhBr%mgE2k)GIA&Js?H zB|P^E@HWqOJysrl;y}6d$ZKAxx%?jUdmuq-fIK!5bq83-#|rb zENQ{J&w;vEJ+m}TI(W27pnH@k3O>=^(NMm!VaMxp(wWd}fT2wk;kG)|D@f|+*IOtp z8WOYEEa7i?Rplny3hO6l6rE25crt1ab;-__VJD?sy@qK{H96mWa;bCnE&EB()@x(w zY9+eM^^mcBwaU4DT$oF=)KXNO-xNLUDf;lWq4l+l1vixnKmSstkUhxuJ+NU7>7H?# z*l|s62qHRV=cQ^hN+lzSgXxtxlPIyAl7OxHM1P_**j%$WTUv}t!VCv)Zq`XyaW8er z$l(c)=o`Y>#vgvj@j$@e;XM(M+IXztr>ai5P~WoyTq_TpDw8D|DpY?Fw6AwHZW&!% zJ#syAu1g9~SGcp5)xs!ooOP%0fVw++t7XuKLens}de7M6a53@V1JciZ*YCxIg&Tr; z=0L-$g`p*CgkPJ2@Ng&*t)jo$!juT3_-eqYI^!q0pX|6(F=4h^jauDjtvNH!vQ)D> z9T4hTI%*YAzS;edXue{&=3px?w%BBV5m7O?yE$JS;j&ZFBBF9YoF|lNH(UX)ns7O6 zN@P5kuh~0o4f!s4Z9C5m?GA+|jzH-~4vfON<=w464 z;ngWoHdID*%x%MGIjVZ&+Yv-3`$IL^YivwM)otojpd$lgFTk;O`&Y2A`#-7ZvR1Es zdZ8mYBRf{VZK1Vu6_M}S>VCkZbNH&h+#ufkt>ANOw@73P+ULYZnIqg`d70DN%@Gw3 zjYvdap{!Ud&f}D%6(Bc2z14tbPn7k;b(0TC&G*X75xJ=Mh?Rv@Qa*~xJ;+xrY7A{? zlHgVb9)f0FW1Auxl>35=7-4KYIVAz}k#E^b-Ph;dxuz%=m_SdYpd=h{wLrbE`!DK! zp|jRE?c~M|a7W`uVZ!7pwYp%*kvC*nM zhtJUElX46jY{MrK(^N|Bid72rpFU^HAO=aYMp*~W_MPEIWF6`);0UzFNvoc_zKu2` zceEFpa35=0HA)wxicwFeO(etLz|`p0>Sc`{MR_>icp?<*x;2Dd{Yk%CXED}rIr(Lg zCz-er6@e%}F5ag82HNodqcuO@f&4Kx>Q+1i$v!Z39YpZqZg307OSzDmt$wwt_=z1Q z_bsI+^uroV!hBe?w?CSKhZxXw&Jj4=NA#?^j^sJEn*_$rYlwDkZOJe|VWzy8+_SD4!ZnWTS zCC7i*9ckd^Sr-(){)PpbMlPpqKeHFjHkdgk*M1w@wJjjQLe{m;`Xtn_&~Nj~MreZV zD}k9gwIi1RXTq-@88l7gc3<}~FnBI2Cl?C_(+Y#cWdqT{qUbV)G>E=RG7+_IW?WZE zh<`5%VGXu@m>_)oWm2WUu0}lZYcNoB=+jQ z%N6=6ZNTE7pf7t1HEYs62PQtewMg4LFr5nNL03LQr5zIq2n2sVYLUTEE_(cEEWh2v z7V5(6T*zfCEf30O*&?qmx(YARIwD|Ox4&*PiCy?Kqa$;wLS#$!x^@!{0q7dx|69){ z#Ii0oIj!M5rkC>(oz++RJY1LpRg& zy4l$a;t16$r*xGFpC2xr7fy^D9 zwOVbWcJv|ZGLYt~G{rXBky!e0@mjp-Q~eCqCL(cik9bUAaO2K#4K;VeMUxzzNyG$2 z6CXTi2er59`vkeEpy+cEh$U=>`a`v-nga+B^7=%Wy{}<9m^`POe(&Sj9l9_oX+d?& ze2Mx^dP$lM=~@ySOw4s4SWBV(yPf7=?QHsoWmETi+;IqMdUFM4hphdOnyD%+-XbEX z7HW7P#=;#nLbx*kHg9O_&Ad%Jx3eys%*E@Tq%O5(nf3GYPzs3qiis8%h!SEI640^B z{W)Bv6K)j(+k?98Z0^X1h*qU)&NAf$0=mQ9=JsZhZ4OBPR>i>K;OJq6-}~D$Z4t3b ziHIlGn<0Kox}T|6nW3b&*Ip-9cZ=LXcfVrFIs}7+wnuk`8(E{=ECYx#dgMAqr(Xwc zLpP$TkS<*`c7uY}pIpBn*PAb`P|#lcEyQ+5-t zkmH}yXJ+)Bte}zD-EZC3phOU3=c{8?87mZ37V56?j!P|>@0nL4uke~H8&K%2@}PFw z_ob9ddstd4zp<7~m>#xbe+;4WE^!#9flx;nr*9&hr9*KuAPu@q)SJl(le$_vm zjW(%9HAF{Mi5Vn1#yB<+Hajo#)l@Z)ZiXI8Jt@hS233sv%gTKDgpR zt773EGTBa1BN#og`3kAG&D=6yy=24kUXH*7xb>vhwa>NK6ZNMPQb2FZ9qY--I~5LY zts*{d-H%3^+YkW;i{-;VNL)V$1O4j70sp+{hO;`w^DvV2GwCiRUt2^Fput2Sowozf zMjM|cWp~=1OBp5nYFYa=QR~NCt-zC>IyG9n=fJYoZ=?o`48umtdq(X>%zPxbF?e*R zQ-9%pHP})uTxO`-=OVStEu=ecU1ogXX1+&=nAmONgD^y@*DL1H?N8#jiyk6G4Vu_5 zx$Wv$LpYkXozVO9q88)pZ|$$?QmDW5EQiLJkfAo z@--fV?RKN*wJfar`xFN5QpwyIRRXSVOH%V@+qFGgZEn#?Sxx(|kfk+STl$S}{9i1g zYn%sNcJ^Y)oPTZBmFV!B)w*`36Cp6?D&b3#gEQ~l(u4h|-2rQeJNsGArUI1)yM+KM zb7JRtrb!p5CV3E~5gOLH#Q2un|3}mMPkaLPs(pMbimrDTC?nw#ZoEk<895JNu|c1V*(M^?e~qVMn7 zk6KDX{kQnW9a05m0=YOv%&1r8R1an|Yq)EWIj&#O$>PyA*#n<=EgT!NSIFg>IAK^IbaqggT&OnC>HvMRJl%BC_BqvJ>H8P~gsc4AjOuaS!G zl_B9K7MoCJHiult^#ECYrN>-E(3KR#PBlU%8i}c}Yt?KBwG6g&3bj;jGStkkbh=Ev znPtN-%dDtDm~pwUUye5Ab8~4kgAUXnYhEmj7HaAz927-str=R??Q_ZUcf`3u7o{$l zVN_a-5*y5dxtePEYly|)*~vdz&YwT=%q2Lz^$I!jR;Z&av({p$fT7`P5rGKTcdF#_ zRill>{aGe1R`bv0s6wp!d2DX$0nil-cjxc9-Jjc7DXq#3ddKq^S4}tM?gu1P70me42>$W*_CU|cmV?c8@nQRLu?b4q`$FA3lONO#12`UT${mJT3OKAKQPRY(|+D;t$m+u_l zqq{cz2@!toYkQmKHSwBX(bE)FwQ(D-40hUeE*EJZj25nhb_FLok_jTR7l6T-u9kyw zRdb}{E}kk?PT@vh@X6E0kF+bdjZ`t43pJD)LWV(Nq*$4?@>t;>ppg}y? z#Gr)DOm=R%I|)_#R#rDeVJpQZO0nq-a$9UR--yR|Qxl^PE0(KYgk@Iqjd?wsOfWo5 z)Y|p!1h8&wyHB3u)A#mWP3jjQnTb%*YSVJZfpm(k^om6&=+%b9rJFj1P;*eDX1bLX zkZ1+$yZ?eT)~kL%8s$SHBla*(Ep$_Hi-@p}$emlRg^!OrT7-7&aB>Y)jq+2+eU*Y{{!5Oc0>b@#~fFp2Jby#`MTD!&zVIQ(>iOHLT6 z_RgYzb$VFp}?u`8(3$~hKjBb(dja(Fcg zQgeu^iOo?fjiEy?Nb0xHfax<*oVENm3-2t6ns-)*bY*X^FGA<+IQZn=O?>VVrr#fF zYPZZw*PVe1!yOttUN^Zc-aIth9&}hOhOqj&kHoUAaoDen>sId zW12fwVHX<|%Nu3gaeM4jp5rlSg%LWvul|}>&+zC?_FXh%u>05WYlJ4$X zpwI)+Pj4#iu0$O|z*MpLl4iO=rbUFR2#3zN&AOx1!Ls=-a6r}gLuQ?Q4G?NAck}1- z6kkQ*<(-_FW{%au&7{%P?wbnM6KGNf>oH$&4A#qUKzXyIz)(|@SD~@T$7Vkww7>kn zm@e*xtyhkc>!^vtY5^3i5FZEKJ%*axO9!<1y&? z^ZW^qHtxupFIUNppEsVW$wJp2prhA+VHKRZ_nTJiNA=vS5IKbvu<1h{yE}WpBkDK9 z`j^@GM`$b^wp&oqgx2ils5y)F&V%{LXo80;q3cR{sz!G3ag&TpMlMu`W8cGeLOD@h zN|P^sZ0c^sLp2tOmPGxBT_R6wT1{Aqpc)B){!@2*Z-Km!d1YO>+$w#@ebOVkq-MXa z4Sn}314X<^f1X;yJ7nXEeGJ&XDEwFWM%k=4-Tbdqh=b#tZodfdAn3iCgooHFYx%1@ z=br~-Ndhuu2lAC$8DGUqd_Rv^+~0$iB;9Dx09~OaJ+FJBsFU1 za@Rj&N{vvu0A^LCzFaIFohLI|s1u%jy+CA#Xo=P(TKRgmLVlXWzA%KE<2 z(!y*#79`36Qp*SYxf=Vf9h8~Tnc%1*h(5GSH!NaTttRSfeoZ`-F7~Tn7UsN`bb75x>M8c{)Q4Zr%;aNY)C9u!1|Uq z@;3btuufY75S!8(Z=C(3JN)O)^7n5I--8`($8a1IX0jbjvq4DRcN;{i-8Y!rpNnjN z4vuah6}>&QvpW+U8|=233Kcz=5q;0Wn}CEE?77071}Y$@P+>c{n&y^BnbQ3{U=@Dt zvvntiitO$n37hG3I|LEjg*J=buxjl7V3-0Fr1%wF0Xbs|5V?^J7wZjcIhx5a$SMYs z-`_70g5=UxaN1rRiF95km8J~7|;^VRahy6Vh^JdM)+IBV#leoxaDC0$^1f?W%2nUBe zmHE!<7&NzHl~c$L#;&WY3l->ed^=QTsh@25K>=UL4&&ym8twO|7YHj154%g%A3G+y zR703b%(=jRx=Hk5AV(u~Vl28!cb*jt4WQM3j^sRiv6*i7-34aCDy)teql@yps>|r8VH#7eo(Z3dp0XNMwx|P53%RPZ-c&T1G z#5C-VZiVkY=ycrXg51>HOF5Zx!y9vpTc#i+uE`J1`||I+rlf|ZAyyv;-+B=+hWLe1UGylQ3OQWIXDKo5;;?-b6}qj8q(9^gSqvC_u=!=U@v zFXWH|-b(-3n{{_Dzl5VnK{ejx>n)Xnuaz5mPG9!#@@FjF0q}z>c(n-;UA_Oza{c3X z2r&0xr(5|wAmqQivlVGqbA+9`_G_N!J4PToBdWhdK>qQO{{E9FQt)=7biLsUKe4b2 z*dR&N!o^o%`8{C#U$EZVcbM0u7Ztl9_w#xZ;b4h^mqHZ&tu@Mj37p7&y+Fj$y`R_f z#TW!>-_&D{W0&+l_%Q!Eu*UtA+c0C*pVu2^0^(cYdUo>PON!Znv`7)(HGloJXAH2s zYDWHY|G#!^|3seL5+LlM7XKjj`#p@n`Xpu~oBg`MzqE6CA~A-AxT7KPGYJD+MzB8j zU%u;0`FA|SX#w!MbQ>qqHhu!9+KL!?W?d5K13>G)6ek~jnaB?;CR?2D2iafO%fhVp zV_K5)KMed|ykr1ohh}SS$j$iqzW-yq|3umT{Hy;l-v2S)f5D3X$9OS2Zy|R^Mn=2m z3H)|XFZ}j8F9m&L?T^9UAdFm09v zGjXiKp?aBO6fc9rw2fI`4mCO02LuD$(Rz51_R`sk|1JlS&dcfG?CN?`hX0M&T%zo== zE`(*A#nO|XJE$Pw;DP>;Ox=~?s%Fr&%BQpH=4FN4nA3m$JX(cUM89cN6m>#@FE{cp zww&69voGBRUbc#`>lj9Tm~`W( zM4kp`L4QB~y(30|{g0mh__gS$DE4e6nW*zO8Q*gpVdBT_(zn!cP}?hffJ|9Rw6Mb@ zldD!S3{u`3pDB8LS0h<6Qgbx_;u#Wa^GDghu{j62sb_y}*q$eks74r=EVBg8Ejwm{ z;V8C$3~xe9(N`T)fILdfFVJDzCmId_$Pu34TJ30`xRAePLeY-CGRCInGG8fpJnrXt z#uHhMQH$i%RM+2{Q%ra(0uO;HeU2myPYDDC2S;x$gEJBQ=Zec)pC3ef>daux zp~q0QZe(;^+=h%9ziB*B6P-hzyWkP{=ko-QPoh0D2q6QQaoHAo4{rRevc{Bu@d5-6 zTABeNoTn1ZzN%5y^wau_dyC<2;Etv^+OxG(jh>(wtxEIR?M4^)I4Jgo9FB&#^3hx`>L29 zYAeYXrZJCy``Gn$4(qQ| z)D)u}3gZ9xia$OXJL>=gX*!rK@r!3>?@tX$L+KIMD_;NOV*K*V%Coq`m0G;L;2@82 zyKnp3Q$~ocesTSQ>gYo4_EWlVhl;-5MlGa67lNx{_z4a*OKytWV*TalYh;| zDd`g*F0!nOj#3`K#eV;<@?m8~+-R05q7#nU zI2-jBjQFR+MdLXqs%%?FjVRjuG?|9(Bj)1;^;w--qAqm$i5VqgEM@-?7LQ+fuCK4Q zbLRUw+z`fytU>wI?}M+Z&Z^xD^rteq@RhN7aftp5fy6s^3h%hXA*(_=5K4k;*J^iU?{c$<$QzO% zI!xSo2lK0J7Axfgke9J=&Uj$qUdF;E5c|WAH=pH16q|fZ7xyDhv(9`Qg7d2C){_RG zg5$n@nGBaxkFAZ`5BZTQq=K=xWd`2IXCfvmR8}0gFd!jVWb(}R#vRP%Yn>!K;PDVr z2YLYrA1`lf{r%;XHhXyg{ox{`ct?+-e|U_e+dW>xU0PbwYw133C(5@n^i;L@jYOCu zOb<_#XYuke&wl0`q35HawvBq7gDN9p&h2tRm7;Frv!e;r6fF!)#UWm(bEOYR!s#)0 zOUfr)s|t-@XSoKR{$p{<*+Nj+W76PT45RfZ!uhjQjvgmp^Neyy-|B9XFSBX318G}Mp*kHnMAT8skJ^4+1_Janvjqce)lae`R@IT;PE|i z=)v-ApDcJs1LbLr6sancij?iH7S1bd8NZdn@TZ0OA(36&kJ%B(PYu<<@_DZgKP2cA%t7RohMv~Ze57C}N-a&(o+Gn3BE^da$P$@~yLZtm8U#M+FrQP*qF2N1wPoe;pFg`=Uet1}wT%)?spL2qjkKO3}abgxr zN+Rs-?QJcX%T6;JDe2BJnXg>Exm{3s=~9^2anFr+=d!7@N;#2-?S$1}z;Ke-k zeoZG^=2qia?vqEtK;8kiIvOMnw1Z8x@(~pkRk~!bOq#8w<*G+F=W%xDAy!A0WnuWP zF(C4xrRB}Vp;Qyi9dc(CO;Xa9!pph; ztzX3~wCTt#T^1z(7FuC>{hS{Xk0|nuo=Ue#kNFeDuO}SJW95rZA7`VdB0Yu^dr8u_ znTr2Y3-YC5pP8@!TOeML3T@DebMbp7Hsi=XUZc}T!rs6rXgReXP*Aoow^?(>uD^Ku z8@MUY{ocGu_`sfd`gr*qkP-Bdgg*sua`TqJ>e5o&JJ-2_?e1(Mc^zwK-r0kSk}-3n?mY2KG*--a3A9X*}7%*lkGj6vK%ly*cHBMS>`&ZDDuMpDkeW z=?+%sOfx&qonwKB{9Yr;`SZfDjIxBsf5V0GOa|?ZY!ZZ@R*CAf+;$mnWDs9g*I>dewC^Iv=;_@~Zj>Lgg{20@g&qMqiC>=-l1Y4);an{{LM zXkeE3*wyw{d?Xm7)L^^!-(r=(|H56fHCLTdS+iXv_e+9Kix${E8%g8%VItrFNcgNd zP5pk|`wGoT_QZ$V$JvFL4S`~gipk9pod)8RRGS1Ns`JjOikpzasr$6Ys8T-{&S0CW z$Rx%>SJ`W{eHfZGG{&q>89e|bX5fh)%2V-NKXoKZpSwpJz>6qRWvc-q8mM(kLdh{f z&g+xd8v8eyV-0SOI4`&3Wg?+LYKkx;`R|2}sbVb0Ji_HJE6g)=JZA7V@@xIlP-!=r z5xG3OzO!V@H?vserPem#Ho!Qu()B|pXW1~4=ioZJf~YYP*fZqiI8lYZ`RZEUwf(;qIUWtAzbMjH?Xs8O^4HpgXr+Om*y z6chsYhmH-_(r`!fB1mow0!>P9@TJC$zryD6bHBl|)V{D?Aa?7;F@M-~8fa7uHj7j^ z=Amt)21NAh9nTsduNC?!U|dVTHL#@@7C7Eu*_>6IOBJu9yAo6)zL^|qu-=RuXpN*B zcp&3y!pzx8QDQzKEod<+?HfF(TryZ0e{PD4PW0?IPb^$AGKZZp`#C`84w&BuEJ)T0 zSU~`jg{$BE)uNQ;)I?#g-VmH?qzsjhqn;Qn+`NRoNHRp!npYQK?O{PZ|qv|?TrVYG&>W7 z=i2!u0%!&@pVry^aRbFYPbFaj&pHFmWb%s}!{*-ZMqMPc$ZnLPx$lnv8JRV~>@aUHO+1$dlkuYnHZ4|- z3cD?B`FTXhRY|9fC&E#}&LyVBea{l6Qze5Vz{7NhQf${XQfS=*9A(`{rooqlUwluw z|9Qs7?f~$^+i2}ar#=)|%}FngmKQPwpZIN2`6A@Cnimch{D6}q@^iee`1GpPfVZII ze$}4-S)rB5e$Esv>cy^HmA((SNB(x15;!_e(MgP>t1R(XyB_Ao&8@t#uN&|JULCL3 zW;QG(Q_R5kR^_s_0kxdd448y>+hVZkUW};OH;IA)ODaT9A${t_kzE58!zJLA<Qecb>86rdlW*<$vG*Syf_9p?Z{ zemZ*AnPj^F68N}xMQ`y5;fRhVZF}F;XuQ+TDhrjFNGS6oKS$fZysWIXEEJ$MwSWmV z_9H#p=#bYMGWaHYHb=;83 zW5WiBs1!js(nJxY_ofsT2+})92}rL2=>iHO0@4XpdMDC5sFct#6d?piXm7`Q z?zrE5W8CrHU*I4@^4n{#z1CcF&K;8$BJ?;@G5Ln34FHQGH4Hq}_$HkOK4PkzNk~<5 z;qYizTI2&#QO3?2?ATvk0E9m>d(C^|W>ld51wgdV7Zxs0wqz!#l_vvY@0)@ov>rCo z7*JEPWg|W?tS|)|$>_C5(z4*~{3Es{V?kt12%22@U8-$`NaXevt8V^0vBO+ETV-GU z=%U(MDBZQVgc4N1I)6?kf*;k;xohniXI{S0te@%m2zhk5ys`F74i1a(F0g3oH_u9B z26kJTs{-W0hC-ca{6{&Cx;Z7sg-*D0FRdc;pG@&o^!u8hB&_)c&BA2sV4!`qeOz)w zXL$Px%uJ}9Vl>|}Z1rivynVRKfDzsIwbp(NilOMs+bB{DQILj%bLjaLI<+Wbhw)0+ zEu`e+9L2cRQ`rbao@FF*U5}BjR*xgv-otjNMd~eo$Y%L$Z6G}Yvb7>H-7e5Z5pQje zR_`B+WVJEAR?UA|@HoG)KjneF9V32~gsho}j`iMpn zVR=>4;`F|dDL3@pz5?<2F~JFhqGKrblTS&Hm=3di?7QOEQe=@1T_$beOwJ}*F4LaF z>v9cVhpVrFPm;5Im2K6n*fumX#o?o@F2LnhUg%D|o|lSPMu}Yh-37I3oLFZ2%Yx1s zU7}a-%!Jr}^Svo$zOK6pZCkPJLxh#Db_vcO<2D8M?C_wZ#BO}1+iB@w5go&2hL2ozjNy)K@b#N+$Nxk-5N6# zOdJIFv*}QfnNF>0Q@JnS%*o|a z`WgfpC0O)usQqi(44qjX0~3gOI?ibJmaQ#p+Cw}37Djlz_b~+b0N6KmUtyGvej(Go zGL)y$|8Pt)aojL~cFy~FKiLYrJ;|&#?o(d-!lXm)xvZ{&ye)ZV@oGJX^|RHx9v-?uhzre6AyJ+hr4Ik z)wE09fe@(`jc7y0{|Hy%AV^fHPaf|qc)yyDW5_#D-Z0)GCm1^#b? zGxSM6E3Kq&)ikTnz}9yNP?17yZkTLscmc5vRmJ_{m<&trwwIu7?tgRj&gvJY(en?< zGaGBUJL->*+{X(OHa-c`fV(k%b$>oqmp zIdsuX#SYSS`Py1$B9p|tj=Bcpw|Yd1bwh+wf>uY&9*lm>R~T03Z{O=H$(VCjX>|_a zKW!a=Qx_NPyl&=74x4h0sXn zMu$t$$HRZlR8&p1BVQUco z%7%Z^Nd>oB(qpB6M0A;8B!NSUAI+E-KZ$)V6!pqaUXNevNu9I;o8|}l^^`L-3py_& z+kcflCb1s@bZfHNf3-;6%?WKJ+=1lkgghG64IS`!Lgj*6SRalLAnj)sDE!A`kd zFduqM%#4fwk!FSZ0eWc^{uItVbB}J~ za@DUtD)ID?`JLhpml-1goBx!`RP1aWJUGNpS)&j8?0docbnU-2* z`P$6hrOMRQ3TIsvsyWfk($vaR9)n6=de3pNMYagNe3I>jgrgVHsWP)pwlo{p-^}gJ z@@vJIy;-}Z_jQPQ@ao;fn3Qzj7#RC^Q9}9ef2|1 zbvhL`kfO>01Gm1 zSr|MofVXVe*3E(>8ZA04f^-`VZdXKZIq3^vIHTd8y8$Jd4gbV1f@iEj$Zmge6tpgL zppAjO37j@GK=`axOzS^>{MZP(4XfUr_V!-YbUE725+8*n%EN9AyP_eI-7}!Hj0ReX z0q5G|g?QUlp|MQ;H22k_0ISkI85^>cbxUD<^%9-e-Y3(6dQbOJa8$4MfEOl}7AT70 zq=5c;x^<~9&6?%8_uxi;^; zfVy+!LYnnhDOueaY{~`C0g2m>kP;iVZimaFs%W$}!r zF_)bdQVhuP8^L%yUXz07GU^PTM@yo=dOdw!IWQq(_pdq%bZRksi$yYQ3N5l7>fo7t}YI1^WT4LKV1{p&j2$q8TNtpm~Vb@_rj6lKD=wF;Z~%CWjZ zNL=pWm@Vn>odN4{m%_o^+}ryK6y2LG*KOG>;s;b!RE)>bgPApMSd0CF7z_?^V+ zY6YaB{;>RD`AT}Z_4wn(?!?3gm=+_1joW8y#X;i+z5Yq$!c?eQp>R}GR9lRJm-+aO zs8F%DX*36X4h6Fdi_xFNexK)O<0ZsKJ4$T{(}!0weDPyx`^^0V9;xr{mpy>f zWib9ZiiLb!F%&xZK({x_;!_@;RWQ_hE_8^_f_7OMR^9kP+K}e-v%9f%D7USdSEve= zVa+=lC><<}*`SAMpC=BpFz^MxRD@C*RTSZy#xKOTClDjFMvB-Q4jjm-4Q7{+-ttY> zavhXe^CPaa&nX9nDV>dRkV8;oBD-r`=4~!6)5T6Woyk96Fx( z?afHoc5i{bRK@MvwHfSDM$Q@*F;JL zzL=}PM1c7Qd9>pEOU`D3v_nY8I*vZOx$ zmQH*sY1YP&x!6m$re%3TuW5?EZ}{$P_Bv}Gm)}awIBX+c`mI1!jtRjmD9JfW?Fm;1 zHp!vo?+F#F&o9p(-82OmF9cY@tSqjAvJbF3MkV?_m4( ze6Do+qrfel7FSWj__6Z95Zh%BNOZW~1#QK}+yTe1#Fxf4ccl?>bzU+d zG|S6rH4O{IXO7n0LW0w3p*L0@Y8q^G2=RBGWO1$o4rzjhS}rx!8x1xGc(kX=Mvvrh zYT9m??2Tpfo?64kw1)kU(?R;V30SVzN}m%CArY77=#s*ubiDmuBd+{q$gw|a z<6pOGb`8%@vBSkdx(bbcRmZkDtR1-q?J8^7TN`$pzV}Int;_)A-A$jJWRo2uf1w$L1H3dXuF= zk1HR0!FtjEy3NDZk6oR02xrK@-uW3HkK!0}?}mNDXzQBz9pk{V=WPy_U2ztc>j%=P zL38o(?TH3#@u%!6YXDuoIX*qETaf(+U)Mbau-dPehbTlB6&+4O)5C|PGGiR{A}+23jId6qi|4q+_w-$CtDA<{{e}cUg_4yOX=yWW{ z)#yUG6w|Lak4~S^tGw5jufp{_zP0b%>7IE09xk%E*=?9WWyQ)MTA(mjf*j6Ln8Z%M zNB6Fm=*EPD)3nsPios1(a`KWupNHH?Q8Hxd0i_MRU4b^?@DYX1#5^dD_!Pqk3S&oo z3h~QK$7N2l!TfG-@Qdj+lP3Fa#O(SLTSR$-ISmF4UP(ucmKytM7j(K5j->gl$CDkp z7_$#mX%-ATHl;girQbh_t`?w-ly}OCEHtF0t2?v(=}ojQzgGYb(Qq`@?{3z+hZa-- z|2*6o=g7X}oMZsu3UGz4Ctp9pcNcM?=4i5thc-z5wdNEjj!u7U&*inxy{lWwv+x7~ zLBkzlLoG+5icFTdg_h}`KN`+VCAluZy{-!W0lTUGd1t%BRrabY zspjL(SqGQrN1vHE-ACKi0{w1Rqvzbj0epPGF`~O0vhxdMzIFBl0a9Xmm%Tlk!xF7! zMiOUDESNDNG)p>k zTUW8(nxLDdG_VY@>?(ItsKb6ci78+TvJA#}?8wTbO|ZNXTj%pVv$jw17w?Ixh6M5R zHDXHFP5wG2Xe2AU31Z6|uG&3oZwsyhUkzPFD)0Rx>+NJ38(E$&bjryM`1ZFmp=Lo1^yI z{ay1sCP!uLGxIHRsV_I;rS^f(gJek@&@#D;vZ>Z@?f%H8R|9ZTwFfvi;=$qJ{hGx7 zM)?pny@YsiZ?B-j>&Mu2Z;anSv`r#EwaI5g{f&V2^0={voiR&mXJC4n?7}h9;=}S| zBuIcuP2exQ|L)zmh#=X9n7EjsIU>xXu(qQ)1Eync*%&WXQ&?{5G&>AXiOG;O}ask6f$rf8MwOcHU9LSF3rLrLlft(Lmz(g(rI`2j_2Cg zO6;T!uKkVt%vm*?Sm*4UL!Ae9jK9bCK2Oed8hmJquFyiPr?hvKe*md&dmaOYxcc#+ z;K|O|o6^D239d&n#T!1?2`m6kbki1Y|IYlVyT-#XR-@wd)&q^fcA6`2?}AS6X8(1? zt&8T7N|fIxaMAiQ|_T1i!pEX@smcep}@*VUKLy%7i4|Zdsxh zc6OFOWy~4U(VbE!3J&;Ysm+ji?8dZ#{_~kvBbxv$VvJjawB+cwgwk;^kA0wQEIl9; z<^Y7A(vt$sny4V?>5W_wwOel}&6Y-r+-2vf7EPC@>S_QCd`w1OH}0t|&j`f9CMe8FQkulYoEyGci|H<^W!I~2y{e&0CJZrK{MJZH zK^{DPf5HCb;R?b?`PS}W~m z6CQBd>b(OfypVH(6(p|tqLL@J2SQ#qu|S@;HxS|~>=!Fd z$Z^~5ZO;i3S?r1sylaIN{3cu%6Cuu4mM7d>2@Y{1MeBR>IbHH=@Ho|wv8W;SdC2aB zW9UUXv57?J#y-?Mb9(YtEBo?Ui|#{#XSB#u)ZxHKpr<^MRf9aTxjayKZ3%TspqLb< z_w@L1*CqKU_W3<)y_C4J3LV?rs%kM+SAuenl=V2Tut)B)stQL6bw>Q00|ESPRX3?351w?C~B;`+L=*X9BxLbCHR>gu?u)Y(I#Lc73=rptq6bsAi z;kBqK$dOS2w)bRE{_eDa%}wob-+@n~UyDKY-xnY%noPzZ(nb zlExdfym8?TZyoc=3@6$U@%BkB$ss`=g<-na*fhVerjdQ)$*mpmw+Uvb&yr6)dsn*< zg?-~{yd4r0VtcXwhL$7|bwM-bl&DZ8wr_ZuT6Xq*uKE0F>A^nd?V;)QhsV@alXPU- z9iCt_C+(Dm=qOGOSCCc0pS=RWA+jsjEXlQnK`yOmA4wu66wI0M&%T5gq7^nH^y!(_RaXFnj31aE^L!1>>nA+iqCI ztv8YNl23uX{*A+glgEcm0-eHHU(1igt`qH5a!ESMArOS2kBWMOW9NgrN&E3C5|~44 zI8klFfYq;R7-LBDZNY!goqDp6&Ai zG)C_l3q$MCJkM%O#&CrA=~1RKUB_XL!s?aVp&=G|yq}yzy4baf8N2;0eM_IW+kr#* zCmV%J;bW0>MoCw6oqSq5hgNxioRad55_nWsQj9rcN*n0_$<^Zvd_26|EUj==xMhkR zQzLtA(JSb4HnqzljkD)E7D`^~VMb365hg>vD-c1ju#s1D6^ zjy)}M)C`87&MKQtQ>om}FA0q^B%T-+=-%M(5a_v%#Q6o692jPxzA%8VO7p1qX&w2F z5caZ6FD=81p~ULa(#ESL7HWQFPi@Mup-NDcFRq13Jg7~|eV0-uv6&cl|KekirmM4R z*s5xc*{$R(~@96#%Q zHm_nMjuNXZh2!bUkq)*C?r>MpR9yVj;V1oT-lT!W-$Bw5LQ9$>)EQr~(n9Lrwn-$- zXW^gvhBfTn~FK=0;oXbcA%!bW|rw^1kOYSH5bXeVa#Y*cu6MToQ(Ksjk z3)KxpO}=EN)wmw^Y*Rqk@trff!}0hqR!`qr&knV!`4abuB~pjtB3s!)^brlT0wsVF zzvrC9`^;dL%~mYSG97m^7;}))qIL9*3yeWbiACMYv0n-<lsHRKx+F!iqYu(ADb`!IksKdI~ zMAluR`4EqTdHD@Lp9-ZZ$igP3pD}b1cB`+ipAuXf&6OO}Q{SD0f&i2Z`EZ-(85w=p z$+EPUH9cOUgRzBNlb((%88r$pABp)hg8gE9U;L7ohhjHS8i^a^8Kh!+A63+Ymyb%p z{a8JFHBjMwaA=n`#Cg)B+s2YDl#%H%_HEiZJt`&ErtICs#PB1siC2S6S6~b52*;+x zet3lT#)n1{OsmRh8%@Hbbw)|U^GzeUv?K%fwX$fT%9{Q6`(UbRqio-H?XR+WnHGaG z3$^!eb-5pvc{Vh?n)+}jNYH^SK^_6XMaNSf>ns<-63nfgoUggwvx5345%Y_ zH{ZLN!9v~Nx?Lyn^X7!< zMAK*~PxhmmI#~=o9CC|q#WO5o2Hhl$zZx|33NDv7m?!&z0E~gqhtj*w{VDTR&&}1; zS+(^?eGfwZHRNSW81(BGyPO?Soq2%jHy`_UsdH%ehH%^6y;L-%{z~up1JaIfnz@Y6 z{gLF+U1sNM7D_JUQUO*cJ#Mr;NEuw3aX88nqy0dUwcm7@wq)^^g^Fw!AhC9 zpcy}(X2!IJ76VD5VPbu#ecUvnNvIBiJGcV%>LtrSz5cr-zkgW`op3!^+ye&l!#4Sq zfbP>m!Gv%2l+Ddu_3LK%fOVaxU7Qg7=wq({7;m_R)T|V1)aj4!v#|hsPPwbh&{RWQ zRQ+r&WF$2t4pP0Q4APQHgqk?% zqUNGoaeBzk%0vSQ)s|&+Yp?TTIl0-UxwuEUYx}g_5rUm)5l4K%I8Z^hV1<_ou0d!M z`SB}Dz*W3uXyC0=;dR*;2Hu`_nK^R?B9?%l9>h_9skacaVEvvqb7n&U#(niEM+w_1^b_Bs#SPHu zRvZg(pK~7-x;)a6YQ`Zq3?}%8LTau#0vW=?Y2ptbg3Rs~OjK9J1NR&;=eRnlTqc*> zsk)9-6vaj&k1{>;^Yfili!1JE6u!MMm-4sB!{5Uw`T;<_=1?J_PPfOkpGS{xn6+4$ zUCm3I;Q>Sb^8q+{gR8F0)0|fp;o4!UMA0)O*Z=crT|jqXVOVpKIMJ9`Oo?`(YcZA! zo$z-Cid5zHP|qZU8hV69UZV`a!NsNF{A3HHx601kP_`iEBf%iVsy>D=BVJt(_B$k< zR?}J}WiLB&Y-p=UOGS`xAMQq>_Uucn1MskL?)~G}WDFwyO1!Jgj<@8YfA`W!>qCVQ zL|EA&{W`a@dGEul*||KDg>Iv8n~iETJL%iZn`$-aGiyD`;^NF4w0@4gC)8C)Fh%~I zA=F7j{|s(K<_d*SakuCVKF$aQ9sk>*M<7|=$$n6r95l_!dzHnq;o_oZVv=9DkoF#J zN1{-V__4>6tHK<#$Z6GWXJJaq23lIDJV+-vvWuL_gB$lYXGQTVXxB*xn>ws|kL#|_ zv0)U!U(<68M52ZC@k!>~0I~O9)1BbYM1z@=vp3egs(0fZVqLcT3S<#RxJI zj%TPLM})w*|Ne%v89?SVM>@RfT~4h@cj-qL9x_?&!*aKj?H{|e%mQl6wr&qNw1tPO z-1D4G8yKnwLP`{*D^%CQZ*xcOZ%$M>ih8OhVkQ4(c^`Tve$%_coC=o8@+U{i1NMDg zXP4w~Sr$ZRk>_~!t@3V8?IfrYUp0D6kvN)OalF_M53Bcj^a@%zOWgDA`R8IBo8hpB zvm`IzE?;I8*Kt)s%RcZ-!+uJSLiI+p=YMM8Ci#YpTeFob=%a`PwBD}!MYzp8b8BcF z4EJ6O_1@Dg-!bku*gj4?d$8K$d5&i{yUEu!gr16--l2B}s`m(rYBek=5QJLFuM1HT zux7a)_Tu`zd?UThi^m(@*i33sot1lt+m?|g6^5FLHoR4dwnMBc0|c)#);!2S^}R2J zi2Z5L-9)`9T+dzLVi;v?-T27xdID`#TRCW0eaLEfryu2Pa~jYtV0{AKr-vW{TNc3> zog~+2F=KWwKE(0)lk|l56>K7#P2~1uu+l_uuQAf;#=WO6vTUnI!fxRht`dWmRD~x| zFz2e@ROS1ynZ!sE@`AJ=W=W+mr&Xtv#nJ&t3C{{JY!xEGgxFX}e4FMjrQMxC6<3*t zTkJ0jBxGGSoSI2Th@c)i1s2kE4e#d&XE;s@VEg-~a{wdZ0MPQthGma+?ZyN@`ki)*Zn-Z@?i0&@xf?i)MB z&ynt{(C*dNI_tCUqt&=&5~*+yO=?6gwAlVDC+gF0b6m}GGRu6md=Ifx4RYrQ8Exf{ zP#Ti}X-(T^Fk8u5fkThe zE3LyW)~Bb@Ja1xFo-oyeyXfoPRup>zZ7&T^{i@sxh)V#C0v};ti1z z!O{+JpN$S5;8b#6FmSH7{}x5ymTiG?b32$AT)X2w^NON<)GYa~@LH(~TaUo}8+$9h z*B8^0!=t&+xGwfi#hvX)nw27IZf-mv82+#v^`kN|(k(X^fE;IJk-#xC!pE!{;3_3Pd8Hv^j6&aQq+%dY# znejH^fDu~lfLe7CMh_lB9H9-Wzqh1Ojkz&;j5dlaVuZu(BWG zu@Qih%HX^@H3|TZc|bh~hAxX#g2!NtT<&~S-~=oT-?%zT=GU*s@(Vl!J=I#RQ|z-> z;)lzMMl_{s)!p7LtlI2#+sd4z51RItYF$FJ>|iT)u|pasgO=<1tNi<` zY-kLZmo^3kSF+QZJdcOmv!*(H!}rAs7ZW#5WUXG{sHI<+At$Wmutt!6$#Obagy!hA z!nA1(e4VuKZ&VE@@!q0Ko0T=5BKJ1nOzR3+RZ3=ascD+yL~`=AEsy+f13>ND5iOOQ zG1Vo??~t04|5ReGf9% zZ+s&D^;l{``i>DsT^GEoGi{FNm$dy$&U~GNzrO|;w(f!fN!IV!8fgu50+vz z@8b-`W+;A~90B$(E3VzmFJq?^8mD%+U&Xr=%VpWCl!1EB@Hk)YOpNxOn88uW1%-De zd0<|{yK|t0`r_G@Wg7@PG>}tBa;)#uE-UTE?`u(KOnHRxhphzHrV!>sN!k{~;$0pP zj*{*hx~8I=$<10+Az4Xo?UhUJ3=~wxcAv`+5i^6Y86?{TjFD}J08kcn1su`@y&+0d zQKejfIVz&T*2P8+JwqKiH4^hbZ+|9k+8H~;5#4Xke6uvY{~N&DuM88-S|vOl`pA_d zGuYRjBeg`Zz8A%tn@|OxnVN7q#16a9;TDr_Bm)|i&sV>j(Cf&rAB&ewrU2@BBQae1 zA?a7kgeBuH9K&t%ZJ0|FQ`UPKIm|AhXm^H;k2@`-m!}xdM(0@b^Dp0VnJji{@IF4; ziKjc@!m52;FK*kzp{qmvM(~UG+2PrRIpxWam(Fzo)j0FgPpSihIVuGwNkw{944`l# z7NeVSj+5zW4FkOdOW;0+J{P}emtBor?AqD&bld0OV)`oh)8h|@)7sfsSs!U!oc{xN zwVZwCbO&j34do14daAx%Bs`S_u#D= zNg5UoCN~BZ%A57CDH6GBiI8_a0yVSANghe=6R)z28X#)=x0NPnnY1q(dM)*)pbl^t zQ8cX^F*+__Kez&-Q~k4;;TSrLVZJ1vK-_H`p;DLZx#jdi2p1gA>VG&&NFVO;N*+Oz>m|#}?WSOG#5rWb? z>yzUuC@J<`n!K5jNBHv=6`VW17udhi5D9L^x3(p5X&cbu<0C%v33vl#2MS!}W=245 z9}RITHP7PjnfIrs!@>d2$wva3Ui{_ThWD{Bq~U$h!^{Xr_H0qub2=`Qhq~86j`mTx zTRs%<&xE7YF8SSSEw?j1ID(ArHn(z9B2`sSZ;driNCYqji@xHuO&dbG{r)MsZ_4tl z@3Yit7m7!tf;xBL{3n~r@^s_lou=^nn`4%U({C^ai(ro3-u}2lD!pQRCc)&oj`YR%`AqCY(0@#WL(urZiqd{*RPQ2_)xQ>YAw7vT1j8urtS!jj&{we$F&H!|qkAiaF2RO29v-o~wp)@vR$VQB^Spv{tn)3Jg$LK8pPhAJ z_TVY)vb}3awZ8X}TO{d&m6+TJZ?fK#E}&C7)z+g9LUXe@D+&eAq6%<^S-8+F>_=&z z4CR4rfVl%hND}n$8|>8@rGd)ivo3Z-QA==w_2hofRxCpu)Ym~Vxp<7aZ(1rzyj@^2 z9`6^zskAU;KGjMyG41j8z%VOP$M(R*wcP(RpBtV*CC_h$FnM#_)_~^b=CZ+FVuk6L z9b?78405(z7|VOROn4G7eciWErf%KJJebC0IF zd&J%Cxqt&)bOh#HqCS1b%#Z}iJF|9uYpbJOE)owxoK~H>+nN(mBut<+J&_#}y#?18 z(5`-XIg&8*4rPOtqOye~Aqzs{2VtXrsRB7~qory!M5kRBX+!me&Ky14fOsGDm6hgC zR~bzEf4QMJq?8z!XrfQPs`a+FSTf{T(s2uzr&jP#CkmVIOLjL0(o9Uk$k&KO-#RR3 z#JIi5@J|tTJ4)OV5@;<%-WGPqxOd|u&RWHE8x*l~^*?_X%oMmZ-4wGxTSn8>W zl9pUcJDzok9ey0Q$dQ%rWK$i%em~?oe|LXC;lZx&d-r_y=fA{BU?^38sde$RggAp4 z=Lf4Dk>i|)G_pUceMuyNj0e9^ZQU+O?mp^gqg&*;3PnG83Rp?bOY$MvBOU!^4W~wn z&;RqecFb&&^m+E=c%96bkt)-!xcQ6rR`EdE~R+}0A9TcLxrGduLzRZ$`hkwW~PrP+n@I$Ug@axpD#vv*^ zZZWw6Oj&X@>A*<#tAM?vgD;I?nUTtc7mEpwkf#QN<~x9DR6>gfLXDA+T-6Uz;a7Hi z{Z>+IBXf?^LT1{(f}*qRhWjdK$D#TQ4Sm<<0b2^>L<#qfaipu73D zN+KFN)Bs?FJkf6ZsJPZA0dw?KW43x9?Go@X65Re5fy0{k=V0zP+rIqu^^OMZrkX)R zLXJvSwe-loq+@?h7G!AUW1+38-PVLo;&Gt<=Jnwx68I!?mP z1uGhBWf&Y*sw-9w3b>pmCP=XBTyB$SqiW&#@W2(aL9brVo3sIr#}gRgPC!O!Z+y~& z_PYzXYDJ>gjw-xvJvhcL+CJ!DtRVz3RWhNxPRrts4xn|8J$x$N`+yb#rw_mZsfUEC zwk#FF!F@**dHDX-ROkfjQVM+mqVvY{FMrD=9s~+(Ft_|>X;#sH8R_yZm+bo*ug%jb zO_$nh`kgqUf~0XDy?PCR#6zKVR*7pD-QgpWWCzZyt56msd+hxZ0)_wFxL0Cst!hGl zqND}nCGI`%+xPqy7FXQ7djo#tWzT0TUina8rvd&4UVuFlJ(k;WxTCYrZ9T>#{4sPI zlE?Kg@J;2E@2UwzA5fnxEYbPE{5GCM-yY8Dw=BQ5v; z1QEYQ)L`cq^MJP9wY>4*%XC+{4Yi()n))DtG?&yf#v81zWnO@7>FCH;WMMB`K~tmt z;pNIy(1#|vZp)*${p9j}0&)gCag7C5Me%y59vIXC*##`b1R9#RHE6Fk?s{pBXdY8!ZxgEquY)LVmZ01BQZiTVAJD zm|ELgX+;sSPCQB@7F@h+J?StBi&!HP+-3>i=uW<8c>ld-u=5&wjFkb!h@WzAf;=q&1s0P4Bu9m=J$d4rFScdN}q z8f(2md0g`SXcl0rSmKikUF5s2ha`RO?}eBB9xQ$p292=R{sFieWm`ISJs}Mf-|GfUAx4WzyDM1x%zbH0uqcMKPx_ zZL;ogFP3!UKjBoy5%<|4RzpkDc4?c)^Wk?aZOjw^Aoy!fc6npk>(Bh%Pk#k3bM{{Z zJVu~4hNthBtbzfTK<9BMtZu~d;wW0$jCj~+Lfopa1;J$Cp;!4< zDk`+hk~)8LxVyyv;;oWyv5d~Rt$B-*d)fMHrm)^4&GOlU{){pdMREbB(&D0K2gUHn zB9C@!ZU!$ZTg6n{kK|tW7ExRe2$K)Sas`UaYMNN=ZHC=P6d!zV8t_AJDU|<}T{i~Q zrzeRKKtmRYcDl)8?83NJ`Iz|uMH~;ct4g)d;!ut7OQ55& ziHa*rUOC}&UUA4!zav-o_)aQqlB5nSHd)-eUcGEu`3nc8UyfsRapJfR@D%NhvoT1m zhl9o7x}GCzcO#YId+Z&c(d@=fPL}?4V2e%B_6Tpi@l8 z398p}I$1sQ?o*n#yroiDK@2yrKNULcD*!r3tY+**fUuOmBqgt^TCSfO2hyE=0jFhs{x192VVR9u45UZ*8#)V#&-5p7@>JNv8f)9P zV~E3-=ayYwPvuQ41E>L8Iv)uuZ-4s-#&Q6&^F%=%q?VWZGEL}g?SinYBm`5nE-O|+ z_oF3mp?25$@F4G~&IeH0u7kYVefVEBi|fmfSKUB2biaRA7|CuIDH0> zP1M~v!TRxInR^mBITu959YHA`KLWf)U>pyG+e$m1=?e`VQG@AYJMfWRqXx@7^ z6O|@IcR|D<&4Ujd!BGCxj$Yy`@~+WzVkW_y3!~M4eO+%@N2i7e32LaSj*ya6_K?yF z@jz0$X+7P2x=CHMcI(A1+JSXq4t^48$$YeO9MTaHp3H2r(q-H-^hrkw*Wvf`etQVcDYZlJpofdVZnR|92FQ9Y;RViZG1 zgBsB>VXmNp^n6cRajNdq{rNBVHkh0L9GN%2ID5HVJ5G404S3XyEIomYbgDh99VodU zNpHEgS)X}lx(X0Zy3YQL%;X)eMd1|I{SGhj>e&AOvAg5IQOtA%MU3}mLY zHuG;s(mGE*(JpJ}JIoyC?kj4RV!bxYc4x907U8)wDqHQmW}(3Y2{O7_k9kNVJPi&Ufq6$OjG)v=wO+!$$2=QQ;Ey6`&`S%PBJ>%Ux9aq)5p_G^U3}wgdSppkxzk|W&t#g? zmNYCB1Fp0(c;5aJ_VmWW2L#J3B|Es8rF^kK>#t&DouGAOOw71(=5P*komg;V2|=6G zMMjYqp}<@ur-MJB7_pj*}UcB&d0LESOM#ax_9TWeK>OJBw? zNY(+sp`VmuDQ$4vu1}j57uu}>w?d=~VEzhPniz23OqC8=-QX)gI4Umk{z*u4Cw+2x zEeXCBBe`tAQtKLro?*zi+(tpg2h8L4g~vm27R!AFLEEcE>$Xm!Glgapa}5GNObitH zpArIc0`pHI=ueh=AJiA#yB95Bv%opqg^m=|CfyFSCjvm{%^gDYyjt7 zx;G6hK#W7MWUJZ`)sXL6k!m6`P>oQ7r%>YS`JFh?YIXwuG2qNYE?d&FAs$=*(vt>O zA3jwR@gG*lR|K?gc&_YeyTo}d$2M28uAr-1Ffo-5CH^L4R(|rpV;9*`U6>6CZPj>bfD6W zAJ6-p;0cvX`7qE5iJ~Nc6nhS6S6|Z-ttCAF8n^{G!z~b6=Mh6L`>#zSoWg%b;5r2R z7+E2C;Va;v3%>9>mDrI1scn#)70Ly;#7KT|BC~@$DwBV1m6EUj&lYx{IMr*DnXj(Y zMuQP~aq=NVs6>`AalWiUppz(A$o2QH|EcI5ly9*9@{Qc?4MMj&Z+5chW6-xqtp68K2VsfgI{`YR8@BvUxWs|9A1+afQtNWmi$yU5hzWy+^ zpYlqX>K%-EPj*0L4iMY0Lfuq23b&Ox$#pEQk@DWwO1aV;&@Y zAltvJI11KClIO8!jq{oaj^-C_;x8X7G7x$s%RfSooB1K|mH8efA7u%;Sg}4WQMO1g zldIZEJpPRyNf%F9{Qj>$N8umcBqbQk)C#nMN<1#e2>)VPZg*$p#a)ctYN?UYqPzOhy@{2mSFlFH`5Wj@$i=k2R- zhg)0DwhyAfV1a^pSEs| zH|YLQtN!%@AO?x~@i|L`olAe%E?&~K{gG{Z|9^Olfn0zS>0kQ!t6$&y{s=EW_#a)X zz?*w-{?j)nGf?Hk?_&C2YXg?}=Oam$rT%WI2YyJ#e2HM-X9P@Z{@?xI1UeTn57Vyy z*ctSn39`Cyxr_hnb^jgcggxf--9Zm3u(s<4cA=4k`cp z%d+fL{Xcxk{=Gbe%_u=2L=n6w709zr{c3ssOEQ-bBL`lTSE&MUfCJVJ+(7OC>URC% zs6}1@NX;Z+;EpdzFzAl)xPf@P#AiDt;=0*PF%J%=%MuV?rS+Qi>bwJROR{ZIr^mH#pqcFeo7~v@O;C_^H0UVPY3!TGx|D&OWpoSZ zHhHvMKs6r?3YPLVhgbSfJ>zW+Ls>9)1^a*sLle;0BCY}bZa5egKM{hhT+s;GNETpZ zH6yru%*${;`2g1LZ=Tox*^2UP*iTD~rWDC5|M@YHoFzy(Up5}Z@miXEeDODz;pdkU z6M#h(3EWfIzUlr0R8@il&Pz$cE;hQm1Ye;DW`%HrgRd0!i*fD1M~4E615&j%KoO+6 zBWBWi3EWjkWQ=@O?nu!3$8s8l=4yDN6ns{KS_+n)lkoCOHX zU2@|t4><@7pp`L-;)gu0|2R5t&9xPSj?S%p^KkZ4*p4KK(|#L~rt@GETneK<|DX5e z$)Af(MR`-|w+F}XSRzE2Nf?Ny>{^#t1Oib(8_FAA%?vxM!7>YuELQ;FuZkQhGW5_r z|Kd|Ldw;U;VLx@0$L7~&XA0o9e+S=pButW`V^#EuU=r0|V%Af7@8WenwtKhT{t@mp zfDNf}xotm-kC2IWzPXGZzX(=s^#EK=*#?^5glioD`223))v|I*x&Rdc?s82aT&P{- zfF9_^3rutI)^}FSnGo&+{w^H4)mN6yBEI+KBn~hhsK{G(6)#GcxPVO1Z^d*!Tl3c8 z*}qTU{Lh<)>*0@n4*n_SKF@z%^=pJxZ+8#CfhJo8k42jq54AR|P)?;hAllyp;^#0R z6&&BE?$YVF=7tAW9RE;bbqBz~cL&ft-Sazah~^fgWHc5xK`TERt)ZuPcOX-tWcR|5 zq1R|Jw7)=yzb14=Y9GwN34M?Woi`9v5ZszZ_wfc$DAa1InJZ$IFvKA^u#GcqOZo%U zo&V7?7Bh(Fo8}U12am!@*k#>H{4dAVA|$v!zBat}1Db7GoSm|3f-qm@Ry&)a4bu1( z%Co;ph<_cz;M-QppRRH(XU>26FFXHd#qrk(t$zrfq8qdeibBhbu1B~<>krgwUh&rD zn-WI@$w!R9I-@d?-*G{H6e!k27cVToFE3XIOcxtrFN^d;(7!2ZSJ=>^QsWhNOw}&dXY0}Ze5@c*}F6f(JV{qjLu=k)KrQSV2gDIB>zz32rZ6pNZ6cxWs zf}RO>WvCwv|4f>P_Z`uV9vyS^+|!o_^oopL5v={3eF)?d2fz|$WM37Utv|vY+50?l z!1ZXDpZQ4*fPV4n)@Jg?`$CbAenh)9@1;#{b}*R#T45M>($ zI9TF^^0%p8t$)-tFyH`I9YsnsfC{}u7;YO8M4EGeQCi%i6@68c`Ih6}w;2Zl#;(Wl z5QTrIv4puhVBG2nfPY^CCF;HtNifs`n$Nq*-bb!a;rClr z7k9z-?OJaK9E%JMgueYGI4+_I@=NCmQPx*oT{cYZ!Mq*T%z z@G`b~ACQh_foy0*YIZ6LwwMU|xShu9b?&>-=g65yFbZ-B^YhW6aEZ zxufUzbU)AW{`3C#{*I%A?@?dmy3XZue$MmyT&o$ev2ImSDQC~tFSkHgI41b=zk~(e zwWWFSVq4aK2@6M7!vb1p1h@c90r0_gX$KXU;EcqMD<$B#V12l_?njL<;cV;H#Imz~ zug1?;Ju>acFqW9cW5Omoz$D2>Ac+HG0k-;z(3P0CU`e{CuPA}kPzXlTHl1ODOUK{N z4c6dC!9>t5ulICDsP;toQi!&?fdS zEqtOVc=l?KU^P*QxpG>Dw4--1&W#)GWX**kH$R|*^XF2X#_n<6H-sF{y>ZuitNxy3 zi!Oc8IwImZcKCqu6DO@I^&&(3o%GSYx7J1goK14o< zjp?ASzKT-y9s*;q6qHodpjpL>?_hxLKFtbT*!}|)83owYiQoL5M-obGG?JORGHJ>V zlz6MDo}hF(^4E;5dD4Es1QA@@w0-V2DU1rPs?|=p1o2D1BNK}`-)YIWsbCfpUV*HX zERp!3j(-_mzj*%)=}lPKJ0k~iFld{%L(&v`ugG=$h=V1ZeQY#x$_^msH+gTzT|jWS z?sDmOGvMEL`a=C0X<;OHu>D_R8~^j2+H-5cfemDg>j6Zxp{zE}hJ$6AUYe7-y2QXB zG7Pa!*GcG#9az1vJs2FzAU6T7^^g*|!2Dh(c6mZV$+IxN0QC5V-)Kn%to{CIgB*25 z*RlIa`_DUlP@bT`FQHX|)vy70z3V^!HN@6q17QpARJy{xXEZ}LzXgwO9*GsUE{ z8TPn{4~a#^fa+y)aSPyjEdb)=$0FazXf3X;z%6?jzF}Yh(f56?|7Qc3U0Z<)fdl_i zd-%87Vd&Z4wF4|lKV=qpZ?`49PCTWS530PYh5;bKE6_ctX%ac};Q8vQbZ3k0#|ME9 z!CNptf+wzCS3jJV(-b(_vr6(gA{jApY%oYv4}^;k>Ys=R27;7@VEbuX@lRi`A9v1q+zEs_W4GSgN3JBM=fWGCU^m{{l8ur4I<B+5_@f|9#^*7M>w^4(svWZ zkKaUWwFTHX3S=(cfn#N)iV3jn0gWl38aON0tn9auTQA-4xSE<;{!v7kjgpqgqZWSw zS)k#nefIItRWGDB)6qN1G5Vmw#2&jQgRp^p}v2u zuyyC2Roan*>&W4onVW^-3-sJ&IoH0Yxplhx|50l4UipQm4^&1e1G>lqkcj8P1oHkT z09&P2KUn-7FG~MDi*a~Wuh#y7d3Err63{7iM^IGH1OGSNCv9?SwWJJ>h^SKA7PeRF zRUAOw2JzlhgJSvhgwT<*TBfzXHfd`~xIFB`Df}{yfuuH_8+()3KoKW4G7+2`1g_lV zy}7yl;^xg)Zk9+W+MbswRyre)WlC+UuB;?iBch_ZLb_gtFmpv<@|JpW&ijRox1e7h z;os|{w?SvcVAxJjH= zN(Rt(5mr42lqX7&DCS%HTTOC!Hc$_7@8%m=J(GpB14H^`FdEmvBN97w1HxP= z;c|2+8b3h`buZb}1}Rkadd6&8={YchKUJ+yk-V`8W~y8OSCGM=v00>I9BUE>6Ogm} zEc;|QLZ1K*=8&=T5v|F!NV+up69d8?We>S6(BCFSEj7N9KgGv+=0Zi@n;H(Wp-Y=# z+c!drr#s1`xPmq}`)4V=MmLnck8h(WRkh!I^XuBKe|Da6`RQC>zJbut!ws-=Rf}863$1 zx|Nh`^A>?jHAS$b5iQqsmMh%0dQ86VSdo- zI1mH~(N^)%x`jZ^L?WgT&`BnddHsuB=3&Fi00h|4_>pt`f{%d^GJM)*ieA3gOBZp3 zjbe?XmSc0YYuUrN6@HqZG`!r(1!$cVI08k&hz1q|$)2I>_UAa)B5ce7(sv;(cVJdt zc${R%ASsRnh0)l-)gqP#S~5kelWEMKrp&$mL>$AN-DH7<<%3Q;>4mUWBj$W7rgYk# zZgquPFCoHSsxgMC;0v6S7>$mILDnF0C5$Wzp$E9XiK|&Xnn48m%~47}N1>q)2IMdI zW?mPpD)D+pp z2U(fy*^DxD4A8w}{jUvMzp7X3>is(%33|tyjtdSH`h8r??$@y`J;iF-njXmo86EoP zeS>9%hdUGAAu8Z_9ZmC;+l+DDkY44Fm%5-)Zdk(UqZaZ}|NA$Zt^c5SA zV;V)Os6^}Mx3ZwDA?Rr$av917*V)WMJuG6%fDNvo#P396n~sJl51_bF_VMfaHPp&w57mEXSzG5U?y5=rNx){MHE*)jb%9O z2yysRMp zJ$!mdv0c+Hs`Zx8cr<9B)d zLbSbSGs5Q(+yZ(5GL_kCSrBYQeLge)mcg*QZ(<8oYnE%YYWSv|w^}dDsKCkfJ6v+x&#G*jhE|JWE@}}jU=ur^P z?ora?CaBYRru5Q7qi{5~KNR1Eh`T3oMHi8&5eG7N zm+|A)V*WSFBqpZXW~Q7T}QY zRlF4$TDw7$^z-UjK7iSTMnNm``B*ZpB}Nr_=?IYu(hsq;F{AqH&|FJJSfLWq8_M-O zjMU^a%%zatdoIxt5bOB%_^S{c#Cxw@~+OIeAc7&*=GkNdRr%iSPFKgnK*7=0rQ z!dGeJa|HoVHq$^d?*MISB!w6R3uQzu&P79wN91zh>Z3Pc-{i#yXP?`@!BR391RuuC z=#4B$_BDf!wrfoJ~d6#!OJ@QU>xX9_(LSW+} z!cP6^()-`9rbh{k8NxbhUUdD2=hFfkLHutx@l6(;8ecQYn$yI1UkN(bNk$9ym@Zqnz4LHs#6FjqPE9F(fB1#nub-}JA6qYrIS_^<51Wb#|*_3@r9DE zX60VF|K3@w&_LG_=eFoZ&Z?2aILwZ4J~#Lg)Vmp5%SG`Q_Wa<*t{OcT%2c5-JB*Qo z=@I5ZmUp(PzxdPE(l~tX82B4Tfb-V(CN1nA!g?R4NB*oanY3^_VQ{U$3_LW(-afhh z3#})h%e#PyY6^__AKXj-eSzy|-u?40J`Fr+s{T59<~t7u%F{APCCfVq?~W=Z`f%=~ zMe1a*%EJg?s^jhq7p0poHAfi+#MnfbKo6|4bmp6jq187KJCxYlb*QpaqU_mRFT`XJ z`IRpfgWJr8Vn{M`6?S#TgHo;V-tH8F0*Y+C{4Ed&GHBJYm9Wb=8| zWriq$#h4zX;@z>xZ2=GgAuq#ykV9~i+GxR1Zpg=TVIG2a!9`hiQWntAX4d11VQQqn z!4bwC61GESu2KXp1XO{nD^`>(pD**SNA6z_ceOU_SqMG4Ghi1ZU?^>SB55(3HTkab zSe{8owT0fWp>)h%1Nz>7{~BJOD!yL}%@2tWT!&&E$9xVu-rcv&`X^qsdFuQo9}+N%9U;AqzjzgG|qJ{GDPHD&=-c;H7#|!`v+KsIh z(u^=tj{1nVb?@XB==qq~sZb@VnEN6RVk#anaR7N9zm=VWqQ6A4Ym>r?W8{fu0v(tF zeRsQ*!cO@f-6fKN=XPZNxRRS7mcbMtR)i^&nJe7|=s*{l>{x{7q>w{Qh$B4@W&a++ zo{}MM;KJFJn{wR6IFz!3IHSXikSqz>zz*plsqCzRz7kAvlBw4d6S;bMB!iCbQda$5 zy4XIUoD7w#ubH?huf~*q3U5VhsH~P?sD(fgLj%>8%__K`-s7nxz?D$LoD{o7ilm~CqM*5Vy^QZUH{3J3KVkT%1CnPPGc~+D9NEj`O0id({88(C@}fXP+DRnq@$kI2a-kuXX&4J+xi%+W(VMURtmS~CD;5?>CE32 z5#aoja+SHE&p6+it^w(75ANyBkVt8DZKzyRCbzOQ#Gm9wq9@i`f%- zSKj>@oEx64CY7Gkkf2>}puaPY{18|D5$6m~VQ~n@DM-0D^Mx^yKBL+uSW{ zR)jga@i3cMf?1_aPw?X8@VC8vq)~x~jfzv5zQj*6LiU2#=}=OE5fbK)cBzYjx>}Vi zKBSGAL!;3A#4vJ|0W;roP8Xl=M z|MGIZ|63A6bdcnYL!(r-FjVrldEmThFrfI7#>h}9kJARGQsx(tl9b~S7n;pg(ao37 zqp)l^7%wtC9dAz~62CaAmPbxw@qf8Dd-D`(L)gP&e+7+)LFNz`P{>mc z`gieg%#^Y(x)YbAw{|(+?Anj(E@c%Y5j)bQ3rq=!NUzz1R_SADM!0hCsmuIDr670g z31S-co}Sr}&fHex=vvclIXkTcE0}aBImaG;%GaCO3NmpUCI2$6-`~p^WPynY(=aB> zIg;t;lK~r+>t__(3A1WhKQ2-g$F&`u93g|@nSsUMMwogrv~E5cdngPthBP>yn)?#u zKA-_Dzoz=+pDlVSlKbajV*Wk5t`5!>@ z&9<~U#daW`_ZLXjTMnd0enJ}o&=MtYD9!L~^E5akvT);1aB6w&eyy9BeH3c=>UYeV z(M?rueoCeZSro$`T?`^HO~3`hoYPtvnK(<4MovG%40lU95MOYS zb5KR6AXhx}W_x!u!TftH$vE7AK(9WKp`=8pypHdT#4a0yqvXx>l$-2sE7eQRDevUN zkkl~95YS5K>^_k;`&?yl^sA%sWDt$$XiCfCF}eFC+a52mj}gK8Lht9Bsb`AV3KTcz zgiwb&BSz%CzTTu34Gp;Clc~N;hE3Ny2En{pg(#{D*O8pDN0$tck$vunio%)9h333Y zcDqP@Agfj`4rLwF?C~l1GMF?q&B?kg;=1wiR$yJI|wkg05@p0Kzw8F?$!-YWs$immT%$ zV!0;opaC87X$rhRLZzCxuD*DxqB>tJQ~ZCb6;^rQ~Z<7v{2DWJog@(Sm-J+ zs~0ff+sv3U_)L$(pLx`s&$W-jF?vZoy48-02%db=DO1ZVKlHq`$V2D{c3Q{*+|W(L zH#k-QuTbI>*DeCnl(8jL2yZ$EZDJ2X?-?qoUfCc(%Hg6oDhzAyjnwEm!A|D4Pxi=$ zch9(I6qc*Z*A8OG%tf$W@eNpg#L{{N+6Pam%^ufz!A>oRzX zS(&=w&Je=Eiqmo^ZSl|96#@=08j<}f%vfMV+X51Q`N32c5KN z%A$T+%+8mn`5endNectjNRyVvpULIE;l|4umu)Ima+4CMzgeP?RY$g&x_`s(*Va1b zhRou!xaPVgymr$;hnh@^WP3cI=#u#{%sPH!*f^cs=gVEo!yX#J6VM26sDqW`H_=>| z)5Dz5c1a0==Q$}q**3Eem-S7$$*SxsbVTatOg&Ujn&#b4uWBoz)KN!Z_zys988VIv zG$s2tPIkZTg)C;#B4UH1-XKfyfxax@-4A-qCN zBEQx^&d9O*4gQ8`P#@A#QD+f_#H5y7i&n$GW#Q!-NV9|5bZNC%W>cF0$^qrJunrio z&GWJ1j#14I>K^PcWzT+cEQ&ONx2798B3t{~BK`b1vvN`_@=D*wD>n1&^zS)^T4eTJ z4lA^v{hGh(i50%i3dpe;Jy8{`2JL}$o=rL_L4C$R&` zu{3)W=T0geQ8%6Ooxzm}x%MH*B9a~ar#c2hReItW88UfhSalsR)b#J3`|8FTTPHVf zD2*R$4exxJYtq4tJdWzK$pJ_t)KAVP zuUsz359wLm+Rr)J&F~4|)!aK=k>ARcCx436U^;eZA4k4{dsM89vgc0H>hdEcrh)eH z_bxjVNBjpkX%kPOhzT;sUBCA>lH)y=4FVq(&F#mZMv_kpjMPKb za0>%2L^FTk+mq4~1>|ZYLMcsUEbLzpE1ZjBx-t()S+;>g6cR9JaOGhn0>MBN%){(q z{mDxPE3;t;kM{^V|Iv6aL@MKlK6W})kiFbl=)xMazfCIw^{xR2iZKx3z>vV-+lM!T zdg%&F2#hU0r`I`s|Dy=w^)Xp@IA7p!A;JJ@gOyj+$TgAc-gM&Z3xK|xkCpQOS%#Ty z+gZr{iHlMZ8d4Gfko(_%g17DTsK~zhc`bqpe8|x*?$#xkql4@!;MChTCIE+gWK?62 zFUK2v7?m1QFt3?BD;+t6{P+tx+QcoyYx#>+N#r_`vc-xoRp>Ssp^X<+9N)<|M9LGb z1W4xS#mUaXFyvTbiDu`>Vm=q8EpvK8W(PTeq%wk~`HBoXlM6dd$XqqJCLu(vj|1Sj z>rrB=sa%xEdn*p^O9`B#%t+Eb_Mj0_g|^bcALrf_P3{WOV9@Hb&9H(JGaeIgdgSr0 zQ>2A*n`&0ZVtuU0_z)& zZBk&^la~ffMj2b#;_QG48vcpUqrrEj@7yYs1qMM0onogKGb|$vwPX1fPZ)JtIuc`` zs;zMz%ilvOA4|3o_4l)L#<;*FQ*o8|Ft|R$`uq&kgAzav#>7w6=bB-a4Lzm*TvF!x zlB)k+(wtvQ+VgOgO$d%L{B5agh@i~yIN*@OgG4H8S%LrnSK!0+M7UuHa0#MnSf zLfOr~3$5EYM6TltzpT6gWp3dt#NAK5Tkiypn7WJmMI2DqyO6Qf0KXqLe&#w7xNMrdSS* z0XX!9y*>)J+~`dfm1h!am{b|4i)iWGVuWC#zzAP;LP>w=O66iP8A}?eZ-$#{2s*#? zBKeKpUTzEqBTK~qOW}&liy_o#D6+-=F0L16+QCKXZyU~mTx3qCqTO={r)GS*AV@Zp z@v1mYOLyfmV=KhIGkwj;yjy3CAAfP-_j*9qaBZZHhC~r`4beD#5Ut z)DLs#&$bj#{0sWEZc7#SMUOT~_W~v4qJYT5wionyX_@EHDChxy5T49#XyZzp+9WYX zcJ7QWZk2@#G*(bEWhMvt&bdMv`eqcUA-`)LkYs0HuxuW9WKC2wYakcRaGR(uBFwTa9PNDF)cLz=|mOlJf;A@$L{r@FScuy#~gx@9rabhGDJ8mFCdvQ3}Tq$ zL6vhW=co&`oF?U;akI-I4$NgYkTFfEH8~+oGsu}@;)cP-1yhXnIJ9SEVahEa0^CAS z&SFZFGedGA0`?!dP)*DV0Y_&Vk@f-YK9;~)=#KJE#(UF@i7w5!KJG3@5z}5OG+&1i zWXgJnx15_*gbnJDYmaif`A4Jea#$5%s{IbQL3#Sy#(IH~963@kwT28va2Ffpr;1XQ zW@2TCw&~vQDDOt|i?l%tSY^4(l6DQhf(we6o9HR&%{NoEg>1v88M~u+ia-+A()$T% z92)@T8@SdR<+TzZYMj48$*XP`+NlEmH^6R&_j+`el9N%aZ^g}BaR7c1v(QR)rFP~y z>_phgq_SnDjNGfQpmaTFxi_wevn)^gCQ>qI4%x@6BD0{m!7!~yJgD{Lm*zPViH*n zOU`30f!kjvS)@Cp)b_?;Am7<$%*vwutgCp&Z?C)3ISuVq1)3!N3GYigd%X&^NM50M zr6-qY>!r!hQ#y12usSIe7Gg)f2OIZVIl-Erv%sGgQI(uD(l;Fy&M|!- z)yc!YPdUz-q80Q7w~?l9D9|@^>jU5x@$IjqmGIR07xWijBN9P9&!ELJxJ^5#o64s8 zWSSkE@%g0>kgp#=R;MF}RCCQ#H4@~1gV?o)mYA}yX_wPAW9>J602^VN+vXYV0>Wq$ zaNfPAo}?_Ey?XT@bq{L|8qmW~o_l_|txp^v%j8c5ZO)(YfS1Q_j8lnF@k(5_l44)j zGaQG*b`s}vlqO>`?I|wQj}sWY`aD?h3U6|de(1zq`mjiG<#b^Uc`q(z`a4Cxhi@#W zvxrN8Us@neAQh{PNvt!g8A4;-CotoNYcIYpdF?0#rTH&_>? zS9SYP#@?2T!kCm=QzlcSIMUQJz_Lj=a>m3KSLxJS-1OMAbp9a5ZBkF57?eiMlB`Ns zy+T{TI*8ctwOeYUN$wcDw1M3+9Zd+GM&IVPz;QUUikov&!bNV2HwOn-Xv>Gi-$3@$ zkwc-X6SIbK64-em%p!f1ShOhTu4>#6XIQmi`x8aIcY}#?^;R~|akuBBrEm??MUyY8 zck0SF%m7}4+$MsnWQ^sQsWR%hM&n%GIr8IAllI5%?OiR-Ob=McDB0WJk^0kmdf`M__dTzLcGNmSb?g^Gm7VuSL*xat zOMPk-g`bEKbm~Jm0LF2=tt5wB59V}$lH)gvB7ggQ$UvsAgh0EJP_Tqu_k4-hM6?yL zp1Na$7oKmJM`7&gLkJ$@=ngPCCuY%)KdsRdPw>8PKZtxJ=sm&JD96~r0kuQa$wfK3 zd38z42*Rwr-+CIsF9%=lV9%hQ{7`XZT0tWqV`WTEl(hXE)W`&3*kO)C5+DXF$&|Q< zxYMny`h9KGFaN5{|7H8UM>9`O-b~$X73@Ybzld~f=b!ZdhFwwoK46w3*9>8jl#E!%aH2;6V5mRRnjWFN@B2;fpt7^O`vT@GlN;w> zy$W2fSAL_HQ-Nzf7j#f#QDS4N=7mX%RK>@EMt>589a3vQYr2Wg;>6m%?f@VX{472L z36E265|;P*SR8w$awe#=pZbsmdSsh*|IsZ3zxR&AzkcD-OMmIa1(t2+1F3pDPyD6d zkY}w>^Es#ai<>k}dV9rY4ZxNq{#|L+-s{j54tn?*`)52@Z;TbKbD`Sz&iz?QufOvy z`WL5;YdD(TwZ43Qzbd(%w#UQN&fX^dYoFE)^4H%G*J{V96B!5p+dK8w*Z%(_ul4NK zUdsIT%XD9WDXk+1zB>M<^8fwe_f!asPoL}G|7jhX@cLM+xrQ|5s)XEnRb=)nEmatP zH-EkJ=Q^ePd%xEnF37L>VR1m}KN`Vn@06WgYdA)FeB2we=6Y$Tf3=s)(ucCHzL@xj znXE12hxJfSth{siPg7c7>gcsfx?cH5p!V8JdQpFe?#fKP`Mbw1FRpjB{#@dgb)e@k z@bw=R=K4}MtfdAEVVkOTik;RM?$$Er9j?`CnI9CnEmW6+nLa|HT#l*ZlvCNdIg8|7-q#X5e)= z^#3~$@=@M*z;}7yIw<%9EOdEn74)b)tQD8mQUA{K$)4EtjIy5Of4N$k!p6&Kk)V6` zK4zch)io&e&X)snUSI%Z`vb3Qj+o#4uUJ%x$H&4H3a0g&WcVZgF+%$5C!E^ka~SkD z%n%3EZ=G55@3Mo(%6OVy6o6L0@O+mSd+NIhx`&NE;P>`>Di_suileG~E>K8LL_~yt zv~eT8!4C&7+NYLb@ZMnQ$~%$FXZcseQnamgReh?9P-VvMe2O1Wz=D=-Ud?3>eiVup zei9dx|J*njqjrg)Dt3_fFf8y#^1!QamfFtv3S$>hZ_RDigygf>eODWrilfcTp2sA5 zz_aAfCvva87>?A;x#gzZplv&6u%Qzo!VgPN^t;+8rsFR+%xA5M*#y&v^YYqia70kb zdzL2l-5mBjFI6+FoOg7k&aI_JZPT;L8#+A;`(N$Bs1SDf$f|REkbI9s;OF3o7~H#w zkl`$HimLjd^WQ?n-R!?Pdnbu?q}d~Wzj_*LU9k=z5j7A)M^wC!{-Z7TaZF z&QK<=; zO4`N5LEUu6wXh3YHx{cEV)F$vhSXFOe*C;Hd;GNGiPLs%`qQ}1CB8?u@4Cw?nuK>w zw{8p2eRQHHov*fs@ubmodh_AeEKRJptY`UQ#Ng6GPK@`DdTe}0p~;Ac|4P$2lXxxdvf@db;apj1 zY@twdorjK)SNuUO|MMs3st(>fb7%ZeViwt2-MvfZ&c=i8w+@)@IA%G@=MuVs&mr)H zbH%QREKwawYsvxRCS9;CKDb^3Uc~+^+r3|STQ}?;Z=2>I?%nhvCzRvC=%$nnf%P7b zG?pUhziZ2-e9W#V<@YW#i{@GF+<}p?CmY9_&TN} zEvHNSJ;}<2t>Ay5|B8N>wq~43&(O=2qTGa}sNxqBF4A9DcWs6T-h8#~Vxnl`Ynil5 zIntip{Kv3m{II+&DZ;ZAi9y@(5bm%VrOaqEI$V6a`aR40GR3KN8!s*9HWr7K zeo_3Yc(s(J;~0C+GP3&H`=42cMS5imT<6Y)XvA)Pb0sUK#SlKSgO+fkRl7A8>c8*8 zu?+bI+=Ii`g`A{Y;$Gu~B~?hNjb;@0P)ZPh%6bJ+wsw zcU@`>31S>ejv+$3kOrx(sSaU}_SGcGam-I}WIo&eszvzcAfmcB>!e!AkQ&@i`vj%s zu`&UZuwa_iel1S4_tK*IWP)LIuX-XR_^+>d`HbdBId$}77*6(q} zXGUwTn3Z^Eh$wqkfIxNbY=MucoizR&*XH$CYsWy2YEM)0#iE(Ege|*crCM`b zUM-z)-p#S2<|p3CVkN6a@fyZAZ`*)l9LJ#N&}FuG$R>EzWR@}H0QJ$&7BCPaX81hp^DRzMiEh zO-^nO_sZwM)M7q*3-auAs$D#59|b*Vf9H2cXH4s9$cU{)1$=QEWt;o6D*21p4auqA zpB^#3#YXU>^Ur@SpX;A)WlB2FpF=!k^rmgxF`Wi&|2aU3P1_>hFubtUYolbb@5aF5 zmSi|@$d(H|lA3IpkgywuvinrE6X&V~TQarz6kirXt#3p`+e|HtD&OC*QAt1X84vZT zq6;F6yEau@5=tV3${P!Rv)uIH8ti9u{2gl{(rKTT6Wi;uysWL@BevUV#MiEyd6gh9 zB}O}m_`Y+B-)2|^6N87KT|a7j<`M;qKY9})-;dh|bCei`)9>m~I&TppYl;L`_WShh zS4wU__vxS-SRE7PBzj9A_e*Nd63O~-`#C-*$_qQ<~ zPi0BTO8+IMGdbr^Le&gDx)w4b8T$S4eSwy|4Tm@;rDN~L?hiMQnfG;4y_RXNnVjZ+ zANp~qO+#yr&;4ul{1Md6B#&M0_R3QM1`_#ZSeMdMv5>BSy1#ZnAR+p-g?61aGRmm) z=$6?hNF`-tmWFfpsoBn)3C{x+CSs!^qvk=od-o^?qCH?Y`twW0g>G#sRx@wg$RE`H zE>i$4rF!EauPpUgVAxBlvw*Wq&gHDR;p~_DM*aQX9+&g)Y@jcWKds;Mh)%On<-C_C zr8t%zjyY1;6}wo)#WPXtIT&_1E4a$l+m;ZF7Q zCg>a0Jvz=CdilPKs(yRymY*l(yw8|)$+X@*w3+vj_^WI0H3$*P&?VTLr$>zYLr->j zhv#4GsX24#tzgQXgC&eT{x%6*w}PCUUJ$>2+5~6aR<+iwp*)V2zOW4pA9f1GNQ=wa zN8Y;TlqODhdjDZZqr^+x<9^YyH_ObUa#=kwbEPPT$fJ&Fo!+Xsi&NnnYN5fm9t;Mb zFo2vy+diEB?s`N!0L3Q~GQ@|-ek(HE%X+}*gVX%< z_&xP=1z@~dPHl9zY0EWUhlPA4>mEC*^L_++R@nLU8VFMz9kLYP0a@F zhMbnF#t-C3^BQ=J)?Jvt&Ry4MRhDtbRs6sSMMH{^>An(AmJekzhSwGPcHJ6Me${rMW&6}%!=BYefH znzRMJ+kMFIMufg5hpy{WuY31tIz4iaL^MLmoSxFg?Z~FHKWy}G+}m-wO4zvj)cxJ> zQTymlrh7hDnG!HzWH6xfNl>YC%TO!KZzs<5eoc6E1#RQh1Gt!3tfj>uzlCbF@%7px zY2=EK##vQ<`+(vznY@l(J=Yoq2#ZjUx}{>p*h&?T1Y_^pq0k>@RTqoT4DoQ_y`ouL zClOwF(}&lq&UmA_guiX56tQjB+sqEs^wLK!oxzM9-m>@%G z0kt>C_)zm|haXLhz@g|^#laxPMpdW2?vLprH?s<&`WghMuK2yh@nj+14-6oz07|pH zbUfY?5gs}~ox3xif2}_yAS6r1II}mG41}r z!DzGF-!shnMHK=R?nm{=Ciq4%o{z|&f5>L!H5ZD{qvU+{<4&gqEKjsuyzKA&9=eC5 zeDHN=fpj%0Y}Ztvq4E23w=8Uy-$5u=MMnL&(uMb01-9XjZWW9TYKT+itgh)GSK^ z=Ny&j{Tf=&{4`wQEvBhjsLdVS!&aNXX;1yO_=Xwp>?78tecH^rg%)Gvi8zD4(m`*8=Mex_#6m6Kij2r z%{=t7e*&?!{MDpI&uP*IBmdv+i4#XbsnyZJH==c_z$fNPqc+O&lgTJGUq&^R7hzV{ z$ywy^oU``Xl9M0tWBY;IF^VULg%HRZ>LZ`<7@mXri89M6iU3fiBx>)~qU-P8S?_#B z0d~z9y6YFpeDX#V_O_#G$HtH$XvDR>H*#wnZcQxwIH57r_Q~~%N{%`CGxq9gaiD9; z)}%vYj3iZE&VOKdKFQ)9FZdWi`Bp+7fe|9ZraeU|660?}!@fd&cXOi7+vtP}wW<=; z2xB)zHuf*w$5>nVbp4g~zzC<^xcpqkjhM_)bVBb-o6Y++?eE5H%;pPPV5j6SI$yp6 zKD#HbJXl>3?HXNcO`2t>{LziX3M`T*Vm$q#V+5Cef zBst%9cGg5tL};tsiS6l`UpgK#WWFCAw6JHjGTNtxJV}s+#z1;SSb;02)o^K`V}3*gK;u5Z7<8 zgDFkF?IFE^Hm`(8>%}eITd8DGc_U4uOTJH7A@YuCA?t$pn z=G3DiH;#s9tfFVxGvwQSZ3P0u{+!tpnbBe%ZmA;ivkBACS0z5B-#tgvkO`roIa1ZZ zmE@#BiPs7buV^R^c}B}M$i)Ab@+nf~j0E*-Z-yVn@s84nRi^o7Sy(MUc2A>upQ^lc z6j9)tcI!tmrz;jydp>E4v%@VvO+1N;0l@ZI9lz$3q1ukuWjcx6_I08y;SQ<9*2MIj z*(dj9T5UqL^U6j7wqUz@mbiBQmH9y6a~gfjFR3M2a9Z(3X+d_v5jE!1HzKCTvUJ0@ zy+-<2KI}PnTnf9jQ}kXjX(A#=LuFoY)HHZz=^8hzWcj22x28ecM${SmAW`LB`!IEs zsQoLqoDht1Q_0gOX=O?~?dz2GAa&(x8kKWl)Vt-6OxejF|KcUw$B3l_>Dvybb}#8q zIV(t6Wi;TjUZfm_sY!~N5_f+NW*5Lm&eSMwo$o)3Pu8Bp>=em{&M|_gU<5{QTilWck3^G$aZXF9kJn zL`k2sf;;75*S852b!jD(=bi}DMC`NnIA5EjJ^Egs7vijux2G?>Q7irg92ncq3m+#B z6r*X%+Wu~iamRm0wXB~(wlbD&6{sWJDN*o3=Y=kN4PMzo(r=L-N4zuPef+l zPO}fItdF&lgk+@m7Cmi}AZ>fQkeAylr=0B1IrXrV~ zsP>OillPGEk}-rHZW>@&w{Mzzu1JO``c?!NPpi`Kst-I6Qz^g{% zUe2v~L56g7q_cfzPe5BV#GE^fMYZ1NN8*XGFOiWEuuneDS!zG$A*EM4w`bZk&SW$> zPd2U0bmr7X$jPp}z}qj!I1E5AcuEeOt1{5wC?CJI+^gS^?F1Y2v^?m#AAvB_Z-16x z@V4nx$DSJP4H4CT^a*1F1(-&TCRW_+`n?qBp`pFwX*earzTA|?E8k;`7c-<2pAEFc_Eu$5=)5Nr{1(Jesb^Kj_jsiZ z=|?;0b&BL~)~tvK`^zJtAU^0XfW%{VL$Vg_$V5PX9p{KOs|hdKcF?8bi65vGN;Nh8 zbCb=dwuO_klRdw!Z07iRWxbnwAG^8B8mlMQFjIiwRQx5@Q4=0fc0-3UiW9>N zm9hr_=o;wTAy0LxyVt)@_BOUtPWmRJ2ezpJwV%?v!j&a7M>o0#=s%IpnhbI0pWGKz zu1%(MifzMj+z&ga$d$%88^0lOeEP^%`MRuD1@7}N;}UaiM0%~2WiLC+LmO*46&IcF z_MW=%T=DCLs$1bQ*AH3jPo9Y9zRIbjmL7t(+;CfpKb%vOpVHnWn|8_KVseg;n#e|; zY2j@*ei-x6hs%eS|2Kvyf$XVnDP{{API ziBjX^>Y4V z)#zl2c_>xm(xB!&)1<$%Un|Sf`!o+hah~GmbD+DQW;%5wtd?fw^4c3Gq98YimWz2T zn+Emx;IX3D^nCo*iMzhp!mS!LeW&i=A628K%&0* zjDwOxg$zKKnAN8(4X)hhP#xDvH?4RV8|bYYvdl#vH8oH8pT86%HS#6S=ApCE z2Ll!Mgx&qTZs0ckbxf&kxK) zC14;4)WzX{*TtK_^{>s6{sK_aU3XuRNxuDjpZ8SC0+=14EQ*ag@$|V=vNJ>n(dHWU z3VPR%ELH>XFYx8-gN$DxlIV=>abfhw~q$1;K{!`tlp3UaR zmbSb>)IGO1g`Ql|x4Lwh9zN$z=jk-QHDyAlFWH}BW}Xo~>IEnZ6|;DX+V9bs(_$MC zPWiym{Gknx3YZPJ&pWqgHy+sFBWZn0I{y#hFh zYt{~TW-mfe*{lRgG@2sXcrz+3ffmfbOkaIpNjD9D&W4mf^cu57HDyG8OGUJvBMx*Uy_J*?e zywd6YJr4XD>NDS1S~by~d(6Vl1y=MdJsMF>#k|z#b#Q$rTkQ`FAw|r7_bk=fAMatU zitUJoR&2CT^fxR{+jajK^6T}l;)GS--O9V>yH7&9BsKZ9QaA)92AiIM)qW!;e^fNn ztXb`EI@eqHQ<0%P<-I{2)?HfvGm05uJ zLMhlkQ*s~S_-u9?-uBj>Gn_*|lU>W4r`}W(BEK#^3DS6f-HIPoG1r`Gx1zxh_p0E0 z%CkWUeL7=*HNP-4N2$}x?qebPCGtq#LZ@sNy=?N?Qp`$`Q(>Ng^UB9rD?O!usf1MR)#k>c%^CEjcbeo@pCbFJRF{`0g*V>*D+_&lRLgMFtBwf!(g#kVJ7Tsn7FZ!uHj!VZneaBLOHd>-HM zT3k^$Xc-Hz&FW-*{b!1*tsH^(O5@(M7jbCD4-t<5FqayNalRTCVIPaHd~W)O;z?5c zy)<>9>6TbGVBR;rjG{NeM%A#>qn#;;X#8S+YiIENCqKF>pgoN?K*1yv%S*yspV77{ zyJlUe7c7*q12oloOkv&OkKQg^R3boG0OnO2!gZsKNv|cyuORBnqFB z*h&z_{)cK=0IkIsywb`fNi-Af!B*r*?^) zIldRT7{_H=C@G3m@&R{hNVg%-SNKd%T0<{GH(ej_*kG}k1E$(vbd3ExL5dnnBMpFKwTytO zxRLxYv$H>g-oE|3K|w(pH_G!D@|QE}GC3Vlsiw#V8s?u6<&8My8`PU^Yr*S}X@25y zuiK_hzYJhY+>j?|!eD+IXSjqcWh67$KUg!frmUiflfx%)-`H$kCNlqZV+>+Od_(tT ze1ZYI1qk7ik0}%5Ef*V}V(go8uXnxeo`B&c-K6%CJvxOUk-bd?^;+%(^R{f0UL*Q~apW0P@hqBKt0a z6q$|t0hNVw^AmVQccVBV1{FC9s96m8ehcz{B(`cF3HI+dY}24$#h=I*pXSWCj}--? z11dLZoJ!E9+vr~ADUKYjx7|$W;Bm~l<@3SegGC_|VIg)7G-(9<_2XcdXPMfh z4KpDuh`=(?{}dTgO7cr_0WviWlW)`0qN4fMrLoo4^_}@$-FbVNzn8REvq#G@62fH@ z$z`H$Lz3+wCL?28xARlqKaRUK1??Na0+QzGVGeI!MJ-K97Kh-ReTKL$pYwcI{z*wv&Bs%>dCI0sG>C)7&hYA zhd;jXaC9*sZ%{hpuP@44CGhF%DGmF$m+fYDRh z1-L~a5on@LN2jF+k#Xid3q+a1U8c^)S0^)ZW7Y1?kBWpFiuRWY5@jB$>AnhwM2+a~ zOO%*2DDcOuYEdG1zedoLLQ%9y*m0FuUDQjVH$I|o?PPlIZ6lm>(3*V5dM+TwG&UD% zYSm!>`-08dtJ!3~*vFpH*{M>W2D9$%2Ug_i+1aqou`>AeGqL$UWNf0g!;$6<_OP|F zPRz)o#-*@1;cwP>jvSC*1&&j=<9<*aj(5u*Q90Qm-GWi?5%h#AmcrRr6U3*Kn!sgh zcmcnV$)*`-t~6}bjlyYs{*g?;Q;hxd6m6q7T^%1fK?!^Sb2fCGp>4&5+X_HOUa!9$ zIc?L7=VN0=AjA+Y8he`M%QW%USdQB99Bpkq0S4bdlDh%+YpmDFG}C)eUv?i_dO|dNRXmm}?JtQgEmc_9HPK;GNR75H;EIACD5sH&^=jpOF4&8W1>n2^@Lw zSz}1Kk?=_?#+=~e&p%=Hl{$_l=`~02A0%k}LXZ~mrZz_P*4_k%pIOiQ1ak@G)ymu5 zt4S8P(TRXbc+N3~Kk+_| z%idpk3-+v#MWNY6e7$jF4t}SbWsn0z6P%mGLWfbilXgp=ihpjml%SdBJ`l)a5sF`w z5nX+1qTTpA{v$)+%%zl}aNGV8YULQd|2gbA4Vrgns{B+u>G}-g-fHWlsDEiBeE$Lq!kYNN522N*KjacHcy-*@Lc`T15*M819j^V ze+=Tp;AG{=BG76`G6uh?a{1D|agZt81_ARKR$TExK@|q1*@yaJQ-|2c*n0-fx0oWU zPt!c+fBHzr#3STyMttQG)Yx)7U|YXinJkLSK6k|vaq`gP*j;U%d-j4o$b||i>-cN_ z#*>m4)!Q+LojFmxNOXakDnY8_V(^MJ@6Lwz$VA(Z-sp4y5=?Cz5|5a%CR9GYyjU!x zis|^yAT;FZ?h6hu049&5v_*2#R(%nbg*=SD?$7utRogZvD+gPs^+!w~8`&63-K1bj zAf`_Krm2qlu@w9JcHv&L#wp>5GalEx7)#RMJeh#fny>##%iD;-UZaT6R{7?`h0SB= z!TW50|Lhlxwa_2p{^|2DzuA?z|C6e3MxI&%K3DtRS!0v8wBuIryoOaI?IKt`*rlLh zQEI}fCZX%VC+s{jB+;i#dKRI54={sX;!$&0TSS;7Uj(`GERjI{g8BwTby1V7-OwxM zD|)uu4vumE< zOgE2%k$OJ?3`X=2|F>eyT&y4?Xon@x4i<`;O2Qaza7C3K0xUG2@s0|cAw=wXA_XcK zr|ppV5Bk5B+nwsFuP+7>V<~{H5weBb^d4kJD%9BtCa$nsodg-KPE*IIk|V#5HXz{m z;dd$R9*Z)?z4QQMI<3|egwJT_p5AdTzi(vV@+fV2hLoNUT3AH_wiIAWQOFKC9Mqx# z#9V^Y)SKPl27#<+TLV^z??-QH`y#IMB_E`gpGVtwzbET6UpZ+L>e*ji+#-n<4N)`8s+EtQd1nbmrBXDo$uKRocvo%@S>r#& z0v40{D4mmQ&_mj$#Ekq^ z8LTN72N^&XBJ$VX&v$F&%&6p+#T?!uHJ7gbO6vJN$g$6xW^NV6JVfQdnU36X0=9=N z4hYmNbryJ!K9OMma`d&;JSgz#e(uX&_B`AX;JCce zP#Zx}fE?HX*Y~Xlu^(n6H9* z$pkQ)cgD+<2z_$Y>WTu>1zE(i_KtV@BCJ@3t@BwA3DGGqJ_1(rTCwHwHz9}!-i#`d zfthxB|Cx0X|G}ju&l_$C-dw@ko7^n&D4_aSbTNCDXh<}hYotpozZ(m!I0&`Z09$JUO&)VbNguAt}1cYM07KrRuZGL*lC?&k? z6|7~g+{Qwx?6LL>ZISjV<2hg~nuv()>jCs{b zXr!tDu3na^i58!3K@U~0Cj0@U2XKq=m34vEO(&D=4cme3LflaoNdNVWYH~}{4_u9l z(KBYnrS_#);>Y=InhG#|QVV(W^RdH}J}TaP2JoLaA-TrCg1f*OO;dB>m*0R?OttQ* zCr69l=Q$pU5HLe-wM^?lp$#?Y{Hu4z?h+oLTIGTm*en2BP5H8zkNr8UYoV3|O03hW zX+F(n5Q!ZBR{*rgWwOkmw+ z+I6QV1)G?42QB&Lf<7ucUp4XulaFBn@a7NY8X+rlw=XI`i0>qB3f-Ig!Zh5fR(~*y z6M0gb*x$w4z7$(Ibn@Z55;F@a1BlAH>jp}<1gAA&i8E`GS_d)I*Nh zR9rLHHmy+W+iTz9Hkf$*wi8D3ifJRzPS1??7(c=;;`i1$s*xP*lA?ALnQjRNEXC47 zgklHdBfgb+TO?BidZUpUzi*qiB4s|mp0K_-A4>fJoZC12mfW%rkr#05Tc}o4m_ZxC|uagFa8kQUX8Hd&Ej9Htf9Ku_UF(dBP z((%PM_d6#dg5`x~51t18g;vL3oIrKi8DOw-Rs9h&077%31>|Q5#GIfdVuk#1-C-AfO+g(kbiaw z_M9)cAsdGKyU2!JM;&_GAG0+FJT}~y*vdXys30YZ*f`+~&mM`;#Nha-aA}ybtyDZ? z!Pqw7QPXr+#i&V9;mg;D*>h^h2Ut||$rn-O*6zPk>;JMrfC@I6PT9*oiQKUF{F%r{ zez1i`u16aG55f&*gc7EIO+*|sR!#jE2Kz4z_Fow6|2YN&g4?t5$3OfM6)w8V!skbB zVg?MqFd~R_0a>^hdbLKA-kb*_*!ltw&BUaNJsR`~h;s-e;+Q)TWWFR8(-fUN@RG>- z*F?FlhH`dShB#t##-H*F& z@XO6u#KEfsS~I_n>~O$|Ln*Tv;|^B6WV5E9_Y15puugd7IXm~nutAixG1pDqEK;PJq4ABF?q4!Z$4Q!S_hYgNv$ z-%{a5R~uL;H)!$orsyV4@wOA6+QDd&0pxJ6(b@Ero!NKBU$6P%hUT(IeM-z9u0Hxe zP_PTuI*3t3bz|CqF8608`TdyRrJ};B)_{HdVFKK0zt+erdes+7hK=cc@O9o{;;k4W z=F^e)^jFR`tWasu@a*kgw9tm~X1%BR7zM??MHHB2bhM>}4{7>6+PUJ@aNj2LhhdI4 zz%U1~uZwRV4FYNf8oxf*{QMogMR)|MSXQ#g(;vAOtr9NeolfIooV}u4=|8h*eLK%G ziR6Y5z3W)PE2h<-MtMJm@^k3`aapGQJ9{j9_+c1qV)y$g zHKpx7I~=OWam%mf06A!yd>db$hEFP@<(oo2EUPlD?w=ZP^)@5!6U^pT(AA<(fsnBw zx`mbAf8Rgcn2Y^p!s0nVF-dd)(5#*52;_;60WWjQ;jd5tV3%l(L!6X{H_ilTOn!}x zJi=^z?VBOsC=ya){XGFE5-h6a?*F0<8*|;G+Cw?e!jbY``70wn-{%?k;Y7(TqGmZi z>dC#n0!NBu09@8H?#s6$lSR?@fSpDooA+hh_gymgDUL1k-?MryB?9^S*5?N}LZhZW zG=;0*cD0(}cm0C(w(Yml&2H8mxB9EM$~mN$$$VEz%8;5d1Amg@HVtOy-K43bm!>xb z?5j3qWD#GO%G31SHKsT8UqDHLVU#cvWeRIPG_~gVw9@)-dt_%%76vxWt+FcPtx{GU zjc}i-(+YI~<`@S)l&0RcO#dbDUQM+9H5Y{tDytkZx%Ww?0+?3w)wn4ACqPX9?vIR1 z!9M=MY()+Vh!_H!^y9Rd0wJewElqVJ@fHmwl~v0Db|&a$xkq97FFKG=HA>c`h0lm6 ze)&IY*i+knM=`@;aA|Jq=v#Mq5Q5kPND9W?UaocsCk5W)hnX;iPF5$MMuy=2l)zqU zwpII}?%kG&zYq+J)F*^t$MipU!Y;SRu4eyzD=cdIW))M9bu=D0e_? zU-e%?%)D{Wdo~f`fc{_~LHSMNgRR(4Id5NPHbYy>9v{wwj^_M8W!Ru9<;26QRB+~f zo#9G}(CK#?913@qehlMP!BnJLp42ek~_rCw@f@Eujod4R7@>d2o(n|eYhx1r8|J#|dv9nO` zxiqJFHWS4!M{+HVBkTSo%}S*B+%2Xdf4dbIt)qq5;GPA39jK2M2ZWu)FJRe6J`;yP z96#=Z8SwyN%>|Ol%nuS|nL$}m=+=v$2@Q2=-c zb~Rn444t^du;i>o7icgb&6P%hITG`)8EMw?bFd>8n7(X^2QlQ93aERjJbtnCOW9bK zBF@4WIJFHJ8e5)DgTX5dbDwbTl5c2#b0azrOHMfIf&1g%Fk~Gx-nVnYtvSP(6-_85 z{yj97!??Y)saZosCZs%{qa0w9S)wYFs=`cN$MrdSUdPvqj z-dh!-yZZ=%rG)#MzkvE*PdUEi>cZ7CS6<9uLmb68)4TMt9iJ2Hfl=`TVo#5?r`>~t znAq7>660Y$1qgX)a!}BuSbe-~%nYE3KpEOZT&0{#p7zKmqnli4Y}9}sr(EOiOz&w8 z!@VX^C5^!dFEkMe0Q+WP#7po+VPslMWMx(VD{6yy<`%BPfPrt=2}-pBbm_BubE2&S z8zzFe(t{3?kb0L-z^dPvQx`1Jg#h{NMAjEm)rS`7np1de}ZrMEMy0 z9ZRe0hdA+b*LKNVAt0a(yLD?+Q5ilFkk17@-gK86HYw9DYwk$EM~?^G4kPVStx&*E z=3T5t8`FcCMdW4UNk;oDl*0(5cQ`wNS1=2&Hjp%;+q5AwlDt>S`F|B^m74WPy2it9*3DuKTF9$B>L)xcY{Gg)N_|L~C+5{fc} z(B=u(5e9!Isq1uaS5A7JWeSvgq=kcZ74glktYt{g7ADfBRt+x=CiY12GeZbOQIoJ*PKMayZ4F6|NW>b3crJrYY zTIaB^d#V`p-DmRV+%|{c`{?V38L}0a$*!y@bzCL{lZLaz9V2H`Z2f1dvn=@>vLwQ4WI?b%>DhF-8s~JBn}U zqw@m;X=w?N<&@ad@j$4-^ctG@jEV#(vQoS%;fx%V)}G8a#iiH3DoQIVDq0sCtgSwU z&sJ*u=zngsXw=}e2oRHsj5FK7wNA~Kv_}Ogmj%y`m3lBXWcy_)`)0qSG6eWXUbw)} zj8MmtUU;mveZTxtW$G`h5N9g(iuUizv}d?Iy1~?cJs&pmu{H5U&+(gB_@$>!qzSFV zNZXC?=Tt#B)U4Y@@R%ZRiZb8x8S#OSKg`{y4EGT|fll;=yjb@$gS0@OcMiCYtt=~` z)mozh=e3xl&Wxas#i?qBJORBLPPWG=806)l5Z8@wn$-&98Wm~Y$Y8)U`XJsc6fhP; zXN=23ms{XqF}tV-9Rkc)UcM%lBT(jQ#BB)+H2y6R0BbAq(1Dgz9zS-I6vTRym`W8& znQ@>shS0PS;T;MxkV!*A-=HJPqpl6+JT-NX3@LQnYPuPVdw&*IAn9c}&QggupCgfm z*qstV1tr>i>v$14h!5$ON)QPR5so~YeizbbmbT77Q837(;xpLz2+q@kM>8Nb9n5&k zrR>GNgGX8gyzDqS+z6+dh)r11Q_+@0`h=(`qsDRSP>}1q0F(yHpxI?sil!-bldD@F zXW{}cB4{_@YJpn3QUT;*fFsg@9}A;mo#2vmH@@gaoD16$_ToZ_qaLNoHi-T{3QZrd z-C$e&Y+KIZ5&zTsFnzT=p|Np82hwc@=G5GdDK%$#B5krbwE5ZiWL>O=ap;m#2Yk$@ zxrr8-!}QX;tHtE^34RWjObGp>QJ(0{CE~ZOSce{^ zwOR4I%aANR_l3$wQ1%uDg40g<7EjeODvv${U!NfI3+IUOdH z^=mXYcK9o3a1fYInW4=R-C#ixb5kLebkq#{KFP!6YI`kbu0=G3VD2a_ibHCohbrL(X;)aFOC0t}_sCpfl0oT-KPj&6?qALkW z$TdPY#yvGv!;jWNnhRTY?4X^LM%m&;*Jn@FdVCuZ_?(h7^K^ad#2as#CZ|hPgcBAv zn96WIV?5Rl^|34nnv1aYjU1GWM;pnL?0!FbK;s%rjS}h2gH@aRIu^!wW|CU-)6RH2 zC(Kh)3B-r0nl5`D$FuFkHr$f*>YUaol2JE|#recVIM^ezL}-hFGx)_N(cw<9CYIWt z-z=1(Dp)|vFhr5iOO0Y(osW3lhK0p8_s?Raj>F^r*HWZ?l{P1((6a`5@m&}m{q^CG`KB@|hf96F}rhc`I zj=7!-*6aNsqgKN99%ivU`*NUC{lw9pXC&H%I%Q8Cz) z&PyE6uKm%&Gh&6LnQP}Mb-AFN@Zi=+4jWpcWY*6`NuQ@neY@||{UoHhP|1tSbTFQY zry~%Oae=3S;n1tC!H&*iB?zsQwC`)dtH~=~KdYLhl5Run2_kS*6v>N_Vj~4QzJ- zOX(i86R?&f5BNE)JYR{D!umxnr8q$?rbU+n))T}E(@o2UqR1$o(> zWZr4Glz8IUkGC;+bVM{DgW9Wu>gD^1*TkxKhoYk0ehFLCNiI#AWk4oII!&ZX2E4_@ zvr0p{{1qG5fhq0?tG?qfHo&24C{0P%t;-TGoT&RcLRO$Gx(l}|#EIZ}z%m-1zfG+Q zLaaA>7~`lVj-4rvM|(6mH{l}@G=;%yb*D*Te`Zivxv`ejXU|iWjs%cz?#oU#94|5l zIdM3W$Z$JCx@4MEUwt4qNNdy*?+d{o`ddcD3F8bFj|B4CYppy<%QU;^2ygO;Mrb&@ z<4Dgf`r?5v_wmr<#A@X7zk*@wVC3LzEbifi>g$HV1hpjFM_8+h6ANJGoIN`O|b#nFM4b)KliwN%ibwOS2BKxu>RmlK;NS$@RuahBP+7HHK)^?S7+$ zu*!HE)}9(V8W|ZGzA9hxfD#Up0j`)wU2SwAIuN@9D)0s?5;CcOj3Q*#WacU&}Am8?XnnUc|%vE$vuHI7o>Nv65=K0#7^Jp)r%?m?7|eT>vXeau#MSt2X~qx zozP-8i@>(2nHWvI$RKK`;e7W9Sc)jP zn}u37=IpsmJet#rFvIJFy+JSiHdTAot_2oJKD1uOQO?l$KF%$zAo+7SDg)o$k~ykK z23*Rx!2=q9Q7zZ14;S2Ap$VZqXn})X>Sn^g5@|u*AnJXi{_$}2(l7ew=kC|+@#y)- zEbKSVUqj_@qFgQse=K~)giQrsvK?FY5y84N1;#4jz~y zo-}GDd(+tL^}atxey<&mHhmF9JW>fsS1t2`B^bYoKe+QC3cK{TDK_I6W>6;bQN%2L zJbcQ_Wf=LE=E(@o=QuMaEY5esFzUXt!0+6_9YOrETD=a~8{=nTUy#sZ2j#qftN3J$ zH|N%UvmxW-QW&N9qJURp<^$#&b#hm)l(G@g{~ZJ9cDIte}eqrI}I-TB0a?+?v&VL?G#m2FVc>L)a#H_(|M zYKx0TZkyg(bd_!i_Tb;ekP`9}#$2+STtDu38Fd?O3FFXXJf_UDqx4L<<%`FL^ferl2S|l1Lu0-{)xu6$KI{TzQcqR-D#wug%ySn$YBRg3qM-(r*#h zV>LtQM+J~IBpEA_TN>3yZ7`D$7(lS!T zH9jnj*kjvWNW|}#=}LMigeQo)V3Kt###zuve!zUk1I|>+lC9f%()O=Y&nI|ubTt!T#cf?&T7<1oU8}Fw2VN}hC zu-R+r6;hh~F_#uUQO~oqnQ7U`gb{wmm)o-*=RVTg#)PhWj=ZUJP{;UgrG*1WJw-n9g;_fhc6 zFCGxo8FIcjn7#j#FrYWChJSIJgMxZE=-+3>3cd@Mrr zC-Wy`@ymt#6$R?f z?h=_Uvs%$P%Gz+MP`2>&cLJkvxjX1oF1fKXu4NyAuTJAv-q%`1*uBuDa_&O?oF3DJ z!M7Hg>fu|FhBV~tFG%8+>-Xfn$m&(Rj^(Nvc^;XM!`kc3Tu4}!pJB#8nvDi78 zsLxXO3~gg)-XIMF8V*HE)@@Fbk|tY47?_*!y^t32wKv}YyQx%7LGubKovpwoL5N_{ zIg2xU6IG26vgZZg<11!DbM?FgQbMlHo+F`sKR2q|+cDRZ-!Rx1z2B{v-V5~Ib-9zQ zil?g1Mgi#p`YNwjM3`cK!^4gjCujTF_b#iPgaSiTI~4u41Ag5gRMj35AfnJprBS9AUJ6D{ipXPYAbU-GZ?6r+h?3pJ$npvc+M*&YKJ{d}O2ruUW zNq6EjHqqgghWkxD6B&+3%Yhaezhp!cyfhs91&?(cDVQcPSRLyys^uUY2ymwC~JHX8WjK#(hA7+LJ3Yp&C;8*p-eNCHu{@?g{FlZkYdoH*Yt#L5Hd@!&U$v zue@WJr{Z{k@gB?# z1kQN-_m4viYFWU9P0VV*lNEP^E4>0?iXYrDH+8Z>K2edxVH7&MUqsUc^WOKxQDeXEUM=|5+M zHdh8wqvc((z-!^B9g-rpAq^(o!!*cO871PjRnrnjUI-Ex{n|%7=f|3s$S<%gadX!5 zY9Yuc)rXHSJ@TN%@hOVw+b~SM}UM|S$f#0Rbp(_%ZJ)~x(SnySvAirCcOD?~F zi{o#rTfNsJTbH^6HikT2y4g~-x4km8$4vPh`33gy$2%Kg$E&?jap>J>SNKIB>A7ba zCSt~6Rs>mgJXYY>sC`J4%4b3mm- zZEcA*)eEY(APn)YW!{n>-N2(l6rnoAkD$nvF}f@j5Y?3XrrYuyD|zIFd_M7UBxVV# zqg-B5t})WSB>R@(+M_*4@?alDIYwY%qExIz)cnIOjf?&4|Kh!5diHU4_hxjA{&OBO zio}g);2;oA_bn3SIx>XRPAaW~+r(;QO7VM|nK1|0rZLPF(FM2Cz4ghH^oE82mWOTo zpQ|Jt(bq*_PGXt;5uxka9yhMH*t>Z=F-v^h9doosOBK?wgXLTu9&b0?Q}vwgW@y;r z+TaG?K?GKr4LKPVRlZzI+9ln2UA~BT+UG{zAJc>PO7L+ z@iwqXY~Xh6v#?(FoXSm|VEZSLaKLGb8;Mjfqsd;i&j}HBjjWdXX!&lFDD{LRmBj#F zbPXHhf8#mI9)o;32eCRK*sJfsb3B(%WKC)wdhfpvrGpwT`W-J_K%YF0-4yr(ekgNyIcloY;$S zE5=o2zd(KId!K_3FTg3q;^?@BET6K%jlC&192gRE_tKSxzb_11>N zdlHp(jMFf_i3xHZIlz)Sp_4cil%8H&>Nc=aUtLl z+A|5=xlWWfd1B;m8WHwFgpTmO6yW3IqEZ9GbCnmDs_F?Ecr~&&4E3nH?Eb=3?6*Ho zPvjR0jt#<4+}pt`TU!&p;sc`Jj{`2t!5;V8`3hk&!|jyTxD$|htTBfrZ(}NPqZ1a+Q3dt89+1BoRv(_g& zB`ZJQ1-%RE#G2$c32)}D8m)?xf{eP#vFkk>vT9wl3^U^i@xJbItirQ*X&sfQ*ZSla z^Q=zQs6(Y_**mb9>tGo<`PA7!`Cb)?&Tz$jUdZh zcdbwDw+hW;ET>As$q5LZ;Pk0p@IDuW@09{0eLI;Syu%WSYOE3KzAlIchmEDJq#w*~ zf_2R4b%$u38V^_h<+sIc@}i<4{n&C*%w+0Ew{OO}bcuWGE`PPDv7rMl-ogMY||B`idgdX%9oRp*W@75XdU0=f=oE=-eamX9%7 zFIHPGAL)GPUGR99``SNak}x(8ztPB)6<-wRt6(tb>|p-%*qHA)q`0)r&pZ{a-c0YnvLNp$*;2iBZ{!GpZ+Lr0aF#(Wgo;|lw;em~_+$hB~w73bE%K8D8(j@?mwkxSF#k6=@7rmCrJi2Y1Ut&K&UPvya( zHy$vHhtpMFB4a2SaRq0iL5r?ibCVP&d)Q_Ss$=QxJ981{K96}(2c}B@#Uey5*KiQ| zBW%UIICx9RdyPL-I9435EPs z8QI2QX{|!}v84<09&!`HuMb==^GJtAvPIM4g*V8Wu691#b_Am+85FB$E00Lri@kpH zhS_rE@qdH|OaFog3V*?aoOG{2vbDB#KNXu?34c;U?* z;U4`g|ECD1bJe8x)?jnvYpFr`7JvWmy+={k7z*(LM&)|?YtXQ zcGAx?u+sNHB4~9O=1$ykR?Py{y!$#Eb;6pH)kFn<+YCfFl7Q#l@heCZ^y1HUeE5|p z3)tnu0a4(cj$*gL#y(p6L!GDzhP896U|8wJ-Ff@SBlvGn5Z#>U6IqNbu==T(7@4Rh zYeW|?v5g~VMM->Qk!OQ>JQhuVA8_9F$gr= zyKRa5vn<#kUUj#LT3vXKj*~N!+k~lE{(>2JLb*LywY(iH8kF+krS&{(ySw_eeKzl0{L<;G-2O@1dg{xhi+TU+JxkF#nA<4D-)#za z(t}PdxCOQ5Q_;ouw^*e9Y6msqZfxU^C~bT{nkd%DQxacM@%I+STz{wrXIM}4=M;`N zxsp@H)Q=ZiI(!vQ@-VFe9U@tYhH3*nX-Rx9XpP^^M@O>f9CpSNLDDZ$C?$@v1#2t- zcrfM%5_K@8Xjhkc3AcWR5YQ;e#%G&MN_+C0C?b=I+x_P{(yXt8VK|~l7dk4+O4UO@ zFGo|upDBZ;yvX5#kQO7GnzXz7P9C=YX-N(Pob(tzp)p;VlC- z%!Q4zeIr2XY15u=ImaJxQ1ib92XfILJ9!D)fJDj+k5GvPybVrsGbGc~cA;#R?Rg(F zfKPKF>0?2B5484TR5@Zt6;w44XgplHb_PQ^PnfzfY%_LJyq;^-@|Jgz2iv^>VT@`E z2SAn$iAJLZNO4YaLbVsbInly+3e*A=X)6*pp84=xJW@aPA=Z=g^TYk4G4o5BpQBO@ z5L$XHg$yi*niXz8j|h)3-)ZDS2&25ycS}l<*twfZPO6uf8=r^L^3?DoYzVu2Gcpn- zBY+*KHZv&o4>Rys$}OOjTRLtn7xR~1sp6)uM?74n_}O=GvlBAr&H-BoOBkTLCKnex z$^0GRNijEZlP6o9a4q=%og7g9LkS8+^OzBv_S%N*xOtcHRH@&lGEI8qcB>cx>pFD_?=YB4~724F{c!I zI(itkX`ateuDArzBNlZA&cc%IiP+{ySWu(X%H3S;h`J9zoI5XeWH-Xnv9r(Sj5372 zhrbGt>XNO!Sdj5u=D%U?N*g&P_WwhlD>kpAEkEAxJ{lO~j!%D1#+h-JPGLe^ zq`QWWnSJ7QUHAPwd+%rMXYciXco!d8i?ujr&j0cM<>|+)^!cTYTc6JA{`#p?Q0C7B zzs1{qm&zVmkEtRalWed3+7&!)kG-nrPFeplI9Zya0C8wmob>^k?{Pi+dl=GjiDI4M zey$ffU_tRdQqafcHw9;}4_4DFiqL%i@S(m5sHYqG1q!J$D?QO0ZegxZqm#b+=v+hoxi#@+Vk*CXFun1+XJ$E6_y}ccUVsW*U zCy-+*s|L5_e)}Ofl>6K zY&>ouL_o}Kly{qYQmb*yvkUJv@Woa$0)6Uggo2Qdb2ifj37Q|at%Msk_Y1u71Kq7i zmY6(nlQbhw4ew-b)etJK2gh5mxnhsz{z@!}?<+A({kMKm+{wy@ltU73_em511?%_( z##VEM0Z5J37+I2SJnxCiyOm37`h;Z90i+3RSkR&5O$w=*|3QV^Z6dMU@w|Q|uN8#(cd!q(q9bVqq`xfoFxnUTKqzEs8qev8z^P1J8*14-B|`0(;ia zSQd*dv9-82n^W(iCwkFFnsGz;HN8$J0LTCvfyU(Bp3O#kU-m(J!wW(L#_z7)yPt^2 zKY+oct+DwXX^o#JU0ZC;i&W4QB@LWucbQuZe6jcLXZ9tpsNjzwZZY0Svi@MkKX@ZE z3b(P|fRIDKF;2M0WpvzpES6^7N@T!e`uWB?@oReAqz;zEM8dn}XoZaI2R0)=X9r4} z;k^KVTg^~9>5}Zf=zWaf{~f)b_+Qcc8oAgjxdDuHbH9O?fUvSWoViy!t<0cGU6e_%%{MmXE@{ZUkXv`%~i-LVchB1Z7>RvQ})_5wqF#+|fIVl2S zJM>7I8O%x-bd-cH_b7vnGfLK~n(!fhR4ewdP{-m+&zgMB7TjI;P7&NkHfgM&xov(MvuVqn3SjcMygsI{S6twz=CW3?!? zEd9{KY)9M5p#sANkV|v_1_pf{{e{%wiUChD_sU%0CC?LPgS)wtMnSFyH~H!MgRr|J zYt1_5x>3bU-_yzDW(94}4a@OE3`f#YN@W=lUNes=&HI~T1A<4H4#S?-ldxY|VG<^> zg);)FJ-K;M0^it79f?g_1BxmEAour)h%X9Fuy0};%ITtnJD?0UjVp?k8v zc(R_IS(uwD8qGjA*IU&C5*rfqweRsUurASMwD+%cR12RLCIaX8;LrKt1Lr4Wfo5QC zOt7N7T+NgYC{rs%852(kOgB7Pj_Q_;)YWi5$mG_&m0j{hw`=ho0xvHGX%9d9aLL0* zwI@{O(Tod+ZHaeqMBw;(xB@nz9Q(GLm1u|(sOjXBWl4HE?F=j@a`mzg)S^%`XOA=Q zG^tm)U%xXtyKu%oW83ktN}a?Wc{SMrD@b!5?Hv+15S;MxS^5FchP3lVC-rk-xpEpJGFM2u* z1bgh)1tZRCT1Li{A&M`QF3MM;M0)jT`DB=jR$IrJF#Tq2TGIBL&h_25zZAf#Ce(8j z1?J*h<-`rjX@V57NmCvEuNAN7HSFHrfjW#duz`22iHeDYOi)aQ2SDXa)t$lu}i=N4<# z`~gVfRC{y)h5zU;HN;zYOke6Pr6@n~`4$60r`ZwvA^X9~hQCruRj&e-1uM=UB?Tsg z4%c^0&IrZ&Qe2$Me5#b-pl!7AbyfO}+wXBx1AEzO$6hqhH*gZ-AQdS>23Oehzh<7g6wOJ;j+kF?bj?LBak{=(2i9i}?uNZ~|a z^XNIk4mD+{vwiw}Ir0kh8HCqH2QeQZW_XU)ZZXZzaI-^++zT6u zWuqRh2-ixCduzOTwLQmO%$)f3{qcHVtEICB)kA$M;K9*RfAPoZFi*iM2F-tXQys(6 z#9h1o(-xgOLqK(bv=ist3vw@A!7#-?i3lzVX9UnSRyHOjvV)~GI0U6#=`EtkpU{O814iQQ#%AGd{kb( zmUcwd1hx6XS z0bfl8=bgb>9rpk+@IV05H)@8mWq|h|7(!0?6%Ft|o4E>AP90n!FaoX&H7Ojmw+_-PLgL=7=nvVy|rl zd{ATzm?u;zJSGpqUa}WLP2$xaxg&4S3vfVgMOjnA!fC0Cxd{>&6|0F@mf?#SE79I3j8}6TV6*uzh!Do8%;!U*Z^l^c z>n8O*m`oOd31l8j<4ixY4__aU@AWy3%@{_-qW(C%EcIi*ZX;cLdaEEqsfXG@;Wabq zL^z0c36_m)Rd2o15kC5rCjjzkHGuvmUHa|C{CYq}TdDL5D9;5&d8H)hzDc6@eetn7 zDQ0IY;Z3V&RwB0;ptF|vS`RoD7haTJ{gZ7)R0__5;IF8utC(%x7D$rdyKlH&_MvKH{ zyK6G4;*6YDubtCuH&q!_O0@gZ0AlRs*(NEs-R?y>P7A0_=QH*a+O3!~DLoSwMC-b9 zT+4jRx(|FQ!x8uENgXNpAEs0Wql34zNH??iP0Sno#^d(BhPhFIh{c5fRj4K}P+|&S z&$_koLm0|FBnbUW2AkDy{|1%x2oYW1GhipYKRpr>f`yI`FgA9{Sa_NV0a12RD=*O9 zjDGoeil@6kf{moK6mY1PJ0Ru7jZ=F1)Mn6miE{{(yMo|${~xJmV}Z@oHpHDpF$WLv z#k{Wc|I7AE9$d|lm)*a&k7fYvgSkQbFZR$6_hK;d zcPXyrPd<~W9{5Lria#EC4b{S7h{?ryRKx4`Q{M--@6IR(wz@IY?&LC9h*^A1OKz(d zhiI{Y32`QauX+%m)11~DXWBpaz1g6rHRCfj0nUxOdRRYm)}Ir&u0z8ZOF~4(pQh-D zT6Hl5KP^PZxiis&*mKB3PhRi#ylrr!3-T_V0K~9NTu07lB(+Ewdfn@cILXWV^mBgZ zR&&1*%aW4LZy%#YjIhw^GPe=}hSNni#*ceyq7c%7?@LG6m=%u1F539ot6cqwd}C}g z{7}<6g6+>Bs(74rfoWr+x>=DFCH7rXk(>=0qmim94n1lU(*8xajWm0fD-4`$jQ9Mv zMn|V>iO@l!%n{|JFpM<-8otu3S#Yn~}Q`)fq%1Glz&v`m1u%uD4 zlVy7%=t9jNMZ`$s?7VS9jJ|}`lgM)!HldMfl)6_^Jrm}QfM<;D=${LxzRXu(Pk?M@ z2Jo!CZ__eMlMbfyc71KknS>lK05h#TZy`9^E-w3YxY341r(g_$Cl3t+PgW}Mi0vBt zj428*nXqJd@&NP0?vr&k^2vt|xxcPs^8UY*HXGNu3uSekHQH|FqR6opV?t00w7Vs6 zOmZy$>?#zi^sk2z;5d~Pm6N8Vn~W0Oyn6^^pW6GqFum8O#3l8Ha4RqnZ0I-hZ|F84?gph14 zio_YXvUe2D2t2}q%Q4d=uC$x<#duhj!InUL6>Ug&u1I`4iXTS?`nVykk0=q0O2auQ z+@KFs>@b>@J5_4!zoSY;aT2%%0d@m=by#}gItm`S{wSq zH;x+3j&;XMB0Q45`MwTv{(=q&XMn-&Z{f^tjiKSCt>r&m0RMjx(JnqeJZBrmZkM-s zp)!s8Y+q)dA^p)*L_xYbfl9wM80z=yqZ@;%q;lV$;dh}xZ}dPDAclo;B#wLE&TkoA z<46EFAyC)`aa2=otiD#R{e%mmX-K)?so2V99!7`b=H&=TW1_&z5|+|-P4)RIMq+Ds zb^i;m_su<_{E(pdZLGzM_K;9=TNS*S&PyEqIDVOHPs;UMce6tfPdFZjY(BYof;6Gf z&I7o~W5VBeW-r(?^itrue5c>M^kM#iHg4^>Z&REPK@m0pDEw+^LN?xuegkqU&oyI} zizvGybsehKhyKpjoO!pS-scl+#jBTd3_^$SB6ZOW`XyP}{Wu*|&>b?-)%$&uRRBh) z%!fbujTJ}PrlHUgPgGXNQ_`Xk9pHKtQ!W-tvD{JIQ z^E#V8*|YJXQmZIg?@wmjYQ%sq_n}@Nw6y5%g6)>2t~y!33@z;R$6sjyorA#YTN`G@ z%M<(&W!w8cbgoONa{MDc1_FQB6d;pwX9@oc|B~4${FOWU;ib}85fXfxmBp|Ptq&v{ zIEPyg#loH)7!XB$^m8^_N;bC5{pAe-HM$YK@30x_bUV zQUZuFeY54}(5?*Z+mj8Co{>C(ngg7RL^^@kZkI<#F1F?e zu~2JXzMP*^-m`ckgQ>Xi#Ax8;_vJi)!Z|x1?{#F8Re2W#1y^DcthpYgz-|Pnb|k=;DR1O-P`nn@lD%lf0#kAVdsOx~x!-0O%vgA#?-_Z}jD z*U-3qf0bTCi3c}{j-q% zKae)~tt5%KaFzfYIX#m;AjQ~oH_+c9ZtZuGe3e%jx4UL1G0~n+p3VZy-WI;kh2)E& z3`j=)5od%O&bm$Gk<26SqTssC`8f>44iqU&<8?9;`eQ-^s@SksotmpS%_H=UgOUu& zFNnN>6X~pnnwLeC<#P`AxiL&&fe%ry<`2AsAG6WgwxT)$fVcPN!3Va|TK}_YGrCBP z-TR>CTKXsi!h3svUuk;+q%dVfW-l=B_$GfK2<^G`Q69GEbo;IuOS)exa_TZ%c85{Qf=t&Vx5TgP-X}I`D_`@DN)U9r)MxO1__^fQ=}J zHlhVXq5Q#LVy|~<21C4quyLI>gG<%R%gQn^FA}LbxeeO`6I#fJi*83a1-Q6g!u3N) zYZWrM1EA_6*%mQnvI?t(Q?Er;Pz;B`KX^VxvWRyQsPf{Y5h)rOlDFIZhJ1cTY#?** z0Y$7#9ago`pkxUIhuZhvCb>yN*bP@?Xx+U#(dtxX2ibhGn1;-JVIg!;(tW7uJW6{phs$$?Y?=bR^O9fhsvyPwAe(91nD%9vK22#TR zsJ0K6xe4E^;?3ZA;l5Cn$E2rb=+6RZoi+=;gGYhBp(;}-q<{lx1}>T9yUe@%IxGb| zP#f+_9YRI2U#xh&TN&(Y#2itk^vC;SUN#>ex?=hAjEq-7st*L~eUPTK@(pzOdWW&= z{yjeC0Xjv8_=`j3#zYRylQ3h!qfsSyuyBxg2{~!M?3S(*=1AzZbVI*#@{@&v&Nv&nL<11MUNs5Cwp|7Q4OYpst2-Mz z&MVnl&t&%of(~X8+C~1aA+;$|iI7n!?zwG|iZ2BmEhy@dX16YO72>Nc$|E4RGDo@> zjpOfg53fVdqD9g{Yd!$&KPX;zGzcXSeOO`d+W;Iudh74Y*jC7eKQ2p=*o6yAf8Q`C z!4g$?tGf8wQw`U>^Ib(Nb?@JM%G7IrUW=f6bEu3kn-jypV^v=Ym%7Z&=HxonNPodc zWD&kxV!nPvl^@u@hDm6k&p*ruUXx%)_~YKC6hFbFJeCtB+zeYpmUJLz>5^Ywmd}-F@^2#Ajj#8L`+o1v{57UC|u420OLg5Xw?Zn5DZhOEHCYEgP{ooF?&E zkXqIL{vbC3AA%?YPEdmv8TP$T9e2xnUd`zK&|}6bNOync1qfbBCt*b8vA6C!B?a+* zh1+Z=Q!4Bc*IUXyuViW6aubweHQ3*`0FYztg==~2%~8)zcv(M)G2~1IzdS^Lb++0- zf0sBq?L>{jj?>N|#PkRCax^j%-WGJUW?n|sSNEd`{ZRk|&nmnieMWmuaKeDJG) zn+PM?w8lc$$O-j)n(FzmjNtCniIWD%T%7!n@M6Gd^08mH7u2G_G+w6@&uTO}xXdgM6=Vl67 zE_+{nj>Qhiql!-DuDFv3!0|htuTHW2Klddh#hx zerprtBNj7j7+hkWaek08+{^+@?48kr9a-AN#AbV84!v=f;z@loeww2?<<)ZSJ~}r? z&Uv?Up9?R`q=ot}=1PS7j^^cfdm_|3e*dCp-Vp+Qrz{3%FR5CmJn4QoSx~ZYWt!49 z8MIUQcxQB{$YW~_VaKBM_Z8&yg2l!oqGzMEOyvz+UCk zwq$5j)YT8aI2s_58p@OK$?_{WOh|2PJozq^dc!lkO+=jes1pB0H)E5@xP%o^QYDBg z*6>@7kud#JjmIwQhGw$fS?@a6XfjRmo1M9j* zhutT%YgW=-+#_VcZsnfHV{l!^O_Op|4!raT$>Tu*5zw(g+q;dTb?>{{(ruGeemHt? zu&lRtu)R_j$ul$8C}R0bx}h;o{w!e;nUi6C<kAyc^;1^7 zQO{pu|I9-`>kr2TRL*5+7O+bIvvBj3c?q~bkz_+L1GEz}W+F#^kXMn+NJy7m@{lG@uX!_H*hyvNL&)AOxG5b#{LI`w$wJt zn@R%&S<3~6*F^{2uhsC7JF(#;3*YNUy@5XaH&UFE2}$-!Qk>F0>c{y)4(l&tvN7PL z)FJb$E%3AhSbkT#lO1{mjha{07H?kyhmK6k<$%rRXNSS8R2wC-zAv8b8_HhWZ;av4 zweM+=%hg!X7{S?k0|Y>J=BR1tO)$%xFZw{NDr1^86+QjLPqHQfvha~R}YMj z%fd~q^jEaK+ZOY%Z91&NK!1@toQnAXW5%=X*XE{#UjN%L47gV~+3?{Kied&hi1{?P zCH=M8_m)l^w=%?vBG3wBbo^YuSpD%G^Y6l`m^dAU(3a5Zu<~0;S7GA!oFHX1||R+WMx$QiQYa z;oh({P85|psB~TS;AUs>eC*u&0>`|$6a)TZIut6B+7d!wC31lTUqUK>%51@-Y`*?I z?t&u^j#$$3ZM?5Nv?t72=(<|o^2xD15A9h8OPnJ%4C{9?&PQPrqbJW!Je=@&PGcI``hLgy?mvea|PXu_53iJ_G zoR=2(qUv?trxI@>lL?9afz+e{T7WqncMuIFbq(g z2lD`i1T?HwtB*9p*1^mJ^?@ldf)C~(JCe{ntYMzN!6G>)_A{tXK&4j!kYgQ+{IXkSRL67k2lA=)pa zMaQn%PpkyCvT68JU#6Cdr8Bz}aO--QLDF2*uGp-K^gg$k&tp4|qP{5{JJ$1)K*TiW zfV5xYgX6uF13ZgsPd{{=l+%2Gd2UD{(2iG)foVDBX=;d)m`fLu%2702i<0Dn@ZU^x z{LdZ@sr8E`QQTvwPZFqaFC9qS^e3)4tS}Ze4ql$kvQPZ@E;n4g z9R|e6@X3D6Qh3mJ0BOs60q7PX*%8x}{U(cae@888UUyj>mE}H~Xr2~R)Gqpfvg-@j zmpc4R#)ZcNi+Cuws1`Egd$gwgs*Zg3$ahD+LoMm{wEh(81TWnmHtdw8e(#Itv{pQ` zD^7B%f27W>0@k4jRqtoy%*?MO2}<#I$F4;$$#jTetGrg>WVEPcMoEyt;19 zDZo>DrKN~w=sf6h=Xfzy>oR~zrO4kU3JK0}E_NC5^b1LN*L&{)yl=S>)HDs!fzc|~ z2yjo6tNxdLoDC6xI~$@4(RUr-n;ZsImhC13#I#vP9^Ws`4tas3yRgEehK}$Y04H_e zi9`&Fo$D6Hvy%FSV{ljJ6qWI(mGYE}G=@y*3i>ShVU0?Y=}GY;^{%Am{V{N0y>USy}Z?O z6aO?<57=B2F=#zqIg#0IjtHYp_KDo{I*T?>nyV(7axQ)MEI+2>n-GBd?KHCvn9!J_t zzj#w92L)G9FS#*RWnYk!oLyW%)Clhcxz&?iOAWSx!7oOKBwMKeSQA}wku$b|pqX^&RCqxN`$su`h; zhbka5d;{Z~kx%nIAF(?8)x$nl*yTwPUCQz`(i=Y5$j9~2|*N-J0(3_lHe~97!tUiL0M8J5 zHihLd9`xgM=eA+77hyrc*|Warl|;!}XOz#WSt;b$HwS;!5-8*GQ{vqOs}5rVgNaoF zfS5F0k!8R>1b{}irrSN6Ngnusjd$$AS#^!M$ukC0wQ!7SP~#!mKuR8nvto}6@`LEt zI+|1YODH?$kjO7U)-~3+dWN&E47qizqt~l)=%H~L)>*t#uVNJ{+qTI5AxId0QdYnH z%pG~Xm@zFjQT@_qBh4j$r?ere85l@ofGwoNyaC8%nINj#lL!Q+tZL@fUu)>QG3`}N z>QklyC2no7^___4gP9En&8eJqtDmHzHKn2vBLGyy4gNP&WaovdlJ@V2m{KS&RgW2^esEu^aLKB4X(LZy28Ql_QQoum_+t9|WM)pY?YFT(3Nx=nQjQ4k5d%9PXoYJt zFzkkWoJi)~XCb-!0yfozUN^1y%+8!QZ9|(29(12hlth}+z=zE)EKbY0VPEqLrI&-q zpfQt&#@;-bEt)@8T@v#MX1PB>)S_6sk~!|6p6fO)=|Ra?1 zi$WngT9UtF0hQazbVeOh)&K?Aa{BJ`6Z-+Nz!5s20g|&LUn2I*wpCVa98GLIpWRB} zV@gGu8aHSq2n|TK0qSfV*Zu3PzK2PZDuh7dmFh?QRqotSe6?*G4XbiD4JTlTjS>yq zg-LY=_+eFk*sbkH$71i3=n>4e%kmTD-hqJWqi16SMy}W%#ooh~WDS8GwN+f`42txD z)`fzA)*<1Q$ulTtfU(x+)@{z)V}(mk%xQ1$`Id9+g}4F->L$ZTN7XB~@DE$Di|a1o zuv@{YrlTI&{+Fb+(Pj-(9CshUBm^A4P^(@kF}yN_KWa#p+Y%7&g>%; zG{|0l$+nCK^s|#H>)8D;e6Zd6=Q6_SW0iT_M10LXVZ9)mm0?kJ_d%hjg}=hshs=nV zjH9 zx8D>f&PG{iQR8L8eRZVvn_Wd%mblWmQS|GW0HScMJH&@&Rv~awV|2 z!JPh~wY8JUcRTOBH#58P6KBI&r*q$AaPOd6__#@0R}aiL*g~UQh2`1q^hqa?@?rRw z+6e-_37_2ZL*>7HEQ9~{v4G2_JDcRG13Z!13pH_MN$09I2TxLjP3+TO_N@sl+_<=} zjomws^<2e)UL!L;si`}Nmgxs2lpM)Ox?i%!nBku{?vEtdgt z4jf5jLx-}|UJoGAcyBa@q=avap`$lsr?RvjDRW+c6zj7-871c}mmOvq8Yiel#;&Un zPRfy0x=*{EJkD5#%{}33w&3`l>U{;#qpLt^Uey?TSt8m@3+aDMJdnfmhxKy(cEAj1 zkLI0aRUOVl@prq=SBO=TDK9ofy=-7kH8aC!J~YK0J2;m)OD_J$#Z!CyQAhf37taWc z084C}0B@as1}cg7I9M+s8WzFX!@kF{@+_o>Ske>7@!%w!w6OlZ3zWA4?>rS$X+s&8 z{HHh7SdpAq+=EY8PmnSiGKYGyffewJQ`dPoI7)`!nfxwP?itb{eg+lQ2|X7FV&!J$ zanVxLZj4WEWJG+O8Qhh|2eXk&;tjmg9e4z{q2`Vj=q4_GWz}I49*jQ_VPF;n9VKlx z7?Ev*tjPOY?voAg*g1YT=EOYw6e}N^O9m;dI(Q1|E2XzQ+~K7;U3{~txD!<%Ihdx5 zlMrI84gtfPe^&O9j+CAOnB55L5=u*CB}(rcgB&(58?Nvh$5KUx=a~KIdo_O0(~R>K zK4mk@Fyj_$sL#IG0Ttg*2w&^sdUEBxZ@aoB4%mX+Q!GOt!o8cu?(!Bu9Q$Laz3UX2 zQXCop{CahO7?xFOiEdfn2Iq5VH>0G5)gGIb+y!$2UXUcg@+L+h>q$J@mgyI;ej0@f9$vN~fm) z2fsPss%0rpy#nfZHpAFTGcyn;1t&`Z_{U0R#mtRrfX^tVL``6ZH?|BK^r3bwK@I2=@0}QFJA8i;bYOd(ue-c z1VAg}N=xCa*ZI6TkTl4vaDtUKa_9P7-ZFl7b zIV7FSf;_ONTAH@q+){UZ#>v^}q(zjR#ztSiIL?#2=Q6vvpDpigm-l4xB>&-7ES49! zT3rY}O3jqf8+}#;n%Ma+{L^_z2Y!B4Bnex#A`ub;O2c^yq<4kg$A&hmrq_)E0mHII0O& zOnkAMTbcv(!HJl|ADhm1lyl8ta$5z;wbnaNR&&(ZB`;9oF}PH6CLK)L z7A}eMQW?npkLZ4Qx%hIIi*CIyY~#DsEI?JaJY+8FcTNA=SVRmCIK5o9DjdgXLSv$5iTF zzpUfU0 z>IM4~fA&Y`UGntDq5|#5l`G>y(6bahF65lU(1^(5rBeMtGi*{$w}8L#WK`aK(d#^3 z4RAGw2kp>ZPxWkmlIJY{GSU0TCndD;IW|sBiwhJt2V|Y>?cB4uHdeoN;N^TD0;jqp zSj}s^sp61jj+V12?;^DqKF=N?$ZJc! zXlC`M%;%QP3UE+hb10>3LQ>kq|Be8*-9-RL5U5bq_k=yh3*3E9OMY`f0n6AK$j-#c zgd^nweEf&Updp0~nxD+IZxc&XuREWgBvM(+uU%LjFkbH|>FJGt269Qf&49J`pmD?d zY6CK6RxDs$i)sb09>|S}(dRcWXZcS|>8Vjdzj5ney2RdFA<82u7GD#_H_{B+Ze*yMqMY3}oE-<< zAfLWkeA^xQDk*kM+Fu-AS2Oe8L3pWyPNnM;yk*W7=k}@>^6ILoVgF^r@nc6jPxu_& z@Jh7U)1j{HV~Vl-UWc3Smecmax2a@5(#};Af~(Um^{%?e2%BAwKo^rF2^(MH)@H36 zNW5$_=F>z?^IQqR+qGlHsUc@#zo$B0-ekAN^n8A%WhKP#v1uXQCSHL_$F5Avs}R->P&#mNgjkJ?+3whh=<-IImx_{!fgG%c)vi^g9#MgLXqEZ{n&ju&D)aFkW1}&JqeIStvWx^fW_cRNtFC zE3o7Xqrdu-}uV*boJ@dXPH6W$e z4#_+-o65tGA!S^&c*?YH8c|vq%CdG0+PnAihNdIH>aR zk2&}Ez@B9N8Lq8w0Tpy5Ph>;CDiGml2JNnDavecR9AzVQ5Z^yHBZ`A4(w||J=~yL2 zO~v}$R=*QHbv*CwIyRyv@F}5w4{xcrwd6=D1A1I^gDxv#bnXV8bKLweUj`F-yC=oZ zhg_cCG3C9VHAu9kKl0tgia?ni3-G-a3K?r|<+A&)eWSq}9{$0QP~;IhTRPN>%e{0> zZ~*J8ZUYQ=^@}Kt$%H4Qa%?z#oscQai%V_9(T~dHBlf&WBr6?$ZWP?d?M!~Fc2zKpVWiz? z678giv{TQyRqxA$CJDTVzPI<{dEw=KQ(~nR#G$HKytb8?c9)xbFK_J&CyS&&~frq%l)=+TEykp(ZRxE^~g)fYiRZqwZgm9mQfhpGeLey6;c}6DN@c?a8Jeb-(#Cbkcnn!Sb zFin1^q=xooD3EO8Xxh;`80}5>LAce8d5mPcl~dgS{xKF4tME1uV8@v8IB4RVr|e-j$p^ddsvC81&D3N`^pQGFUmp~dMNguGP zoAvjn@P((lty65T21%_0v>vw=rtqJDv;n&b3eJE=++R)J?596P+yXj^qy1dpq0xuaL_a^HeQIPM>H2lELWriH4F;cmih_SPHUUs64h^JdjMa^_gCxsFdhzy5IZX(NSnu4l5;mqTclgd6VaN7Ev{SzF*>c+Yg zNQcOFhWENl?taxT?bzxW`#0Jyd(-1X;U`^Dw^N~dT%LQmjDRiIecwa$YHH>g|BTOSMX~9=#@=0$`hb5zc57C+@yUD8^RAl3mXfOfBOE4cZGA=j^6rhZ43Y2 z_c25V#Pl zTj6mju-szbzNG)oGk%F`FkRbw_->(W4e;~pMi@3*8k6=1UseF_xzbB6XnwS3j&rj@ zlU~T|{f5Yf@Q0^`DOr}@Fz*K~($uyEfMt#k?`EiZbK$XHoyXFLE%w{I348m4?gzOomQ|l@24xE)geeKQ$RZ z8zGE#ZTDI0T6)$12ydu{TTi)Z&6P(l8X7Ln{DY;!uU22Ua4ZQTKP6NFqh&I!8CHF$ z`;v(WoAZ%-SAIP3f3pwXIp%t!hO9VJwF-a+II38YeyHP!v^}9PCx&14%E~v6q`QW! zyM!l-yuT6Z&0RJXa`PKMmgnaZH>cBqyu*jHOISvYopuNV;)r& z(y!rq3x5J@Ma8Ah!d6u)4Vqc8h8=)R!Dx98Et`vtEQJa?3<899%;VKM(ONfQT^c`T z4lDXNa}K=E8yv6qYRpKR!}VtGQa@wdJ=Bs!_RhbWfNboVk?U;xH83%ix`ANnyR4)o z4HK*OO}?OEp?{BKWp_XKQ!1vY>U@O_N)f)sKD7xVF7BNNx;f2nENpgES% z1Am44UrIL`q(@I^=m^zCul$|EF&j}e*`d$47sCq9bH8t z|HA2SV=hU-NwTs}#KN~W_hV`#yJl@jl*n(YPr6p8YFkr`W1y5GoF4i?W*G1~!PaUd z^N};yxR6I<&WJ=@mX&7f*jxvHeeH77g2{u%%t0ej_cNh|x{p{T=3@nKg6H(~*$-P;nHQx<9Dv!%j^wy54qBAjP5yzgBFH$&*yOR|+0o+SsP3f}+Ia#M~D)LuFeFw4+9 z_T$dJ9a+faTcv-?-5FTvXMIoYt~$bmI_w=&m&sh1>umv_$lLGr%dS-Xnq9i%ikS>7 zmANYpO~eM?d;S7zJ9$T*zHE4v0?ddG8~g|~=~gT0&KN4X>*%zKW3Ko3Or@B&(Kj-S zKdD?t-H#fR=3Fj2gFiUVp}ye>C6fM@A)C{PmhG`YUr%LC@X6FcXm`f-y*so`-bdA0 z;iL`L^HoBdz9mrkq>j#c^nWn-o>5J9UDv2em!^OOrCC9wgeIK;A|TR5dI?pEA|Snn zCRITNrAe1A9i%68kdE{Yp@$kEKmtk5<$XWz^N#PFKj-{AV~@dLgddDtd+&9vHRoJ& zRqpt@!kQ(&cl+C4b!xUeVc#fKq1e1AW*((JqHw!!0zNSlgpGjghWpJ&u0G|2;BOfJ z0UK+`>8DPsf&#^G$TJZz?Z};tfytYXUNB-BB%0W*Co|E*)OGTokoK$?gN9nhe1YyyR8D+h21SNkA{WvhCFs2$JE>Z z6CmSf3rpruAQl~}|5!s|i95|ynfxkd8_@mv^SyxAUlN=6Dk(mZ-j#@cQ+ZW5=r$eA zhZe!Ru{0~fAv+kX~Ym{A!NS&!ib-8nL+AU%Fk0LJn zM`(6Dg0=1Ex91C0EDyU?7vj_2QMEC;ZR%KeT>tPyJTr=9;hRKCL+}I0g=OesE9r_w zM+}ok!R33XnPO$Jscut2b1A9MPYl9WTfU#QSukl|&-_SN3}-a!89!20Hf*?jNiw-~0g9-*7gmM+$!Xz*^%ZJg=7jhn=fL#yo*F zdg}FErJa)}`wWM9fI z!B2mpcHhqsLa=V;C%zu@9-XCrCm`xa%ZE!_L%vbiwiZ$9^#C(5d~VrAV+RJJdt7yj zyNRjmhP0Kwm6_N*{t1G=;O8{fpY?pJw*f!)xv_8V6Xkyr1GU@KT{9pve~1tmK2Z(0 ze1=R!=$QFAxu!R7VZd}n<0gC6_Dn`FP(e$u%>tN>$f6xReR*K~D5T>LQvTq5 zyo8MzM{$(s%h)LXJ?YWPE-N1|j?x9~p&u$}J`IzLZXiTIgl3lo%NlZs zEH=`qXp^!ldPh+k3vx}2$?>BJ5h4H!fZ8Vg62%mHKo^vxc*C7$IX57Swu?02a)u`p zt>O~hOeU+&qwm`5d>1Bx;$Rl9zu(KoPY`p-G+oBTSZJ(FZcq643RzgTAQc;p~a$ zp$nFQH9L#Iw1n?uM?P@Ap~N+8W!JKX5d5PQbbV2-uKkA_(V8|x&&N%)syO*T@#^<; z_Dw2f^zXRIdxn@rA>u84+&=mnh$ik*(mnmOO$N!2Y>%0df$H0=+Gl{RIZ8O}f4u+uWauTK-$4Ct z%(neEd;YsvL`l^7oR2~NP6~`FqjX0-Zi*aFypIQPj@Wm8s{7>J8AhucibR)mzSR;h zt;R{&z~^Z^F>||G0Bi9peHeYhE*RpNyp zS)$jN>RRmSOvNv@Dr`$tUuHp1_sd}WNib6f2=C~WIIm=qXj4xf(L#%Z+U2yS~w)`UygeTWxQ#sC8|A}|}L&=a;A zYZd&1c0jY|yK%9B*c^W!5@f1(heA;x32JQoV)48+7eyi0^fKUF8dMC}xZJfy^@}(c z2OhyLqMd%B&!j;CTFqPhFb9+(^{#{6z!FTgh0+HG9Lx;-m&t-=@g4DiZF+OR*!oMl-#?=Dk^GwXJyvaH^OI2F?B7bUCH1J!A3Sniau{JA!g5O zK-%KK;&qO+t!_{4*1qZffPY1(VsQ3An8BX?9TBjYS#D)?21A~Gcz(Ol?XtJWhH+Mg zS;dhq{!fM0o_Og*oFz#tXD|)>L&zJ-SZ~Lgpmjz0fk+Xpy}wc|##S#t-g$Y${koF! z_jj8N?9$c@=GhmXw11Ep{Nas1D14c!*;|7tQ@mw}_jb56kwNsnTuBV{;Usi-=&0R6 zPsKjiU!9yF2^(W2hAn=lygnnCJv6!hXD9Qe+xd#5yU4fLDZmdy6@g|nTh>OM;(gxK^yL{zVjasaM92&DEkdUJM(QYy{fk`ubqM6rkzj-S(nr= z-OEu!3nVIIx(nXTQq@1=7m4uzupI!F(Fsf|DW9yiFsHu>JNN4h!G>0n;8$$8aanSF zNsASFo=J&dP7q=y*G~sAH1eyxNe-%fOu#Z_FAp5quIveBZ1f%aCqqtwDO>l3?<;Tg zeV5J}?$KZTG<9T2zzci5Oi&zRs?QMX*{zXDq%7&S1kK~5)H+mt4~zQxI4;#wNqwk! zx%zOy)Uq?s`x{OG#aA&x$9)kWvV=b%&8d4fw?xyVDI{Hh1$L&~t6?n8W90`o|H}P> z4vTXnc$ePHDkbZtYmw(z*Z+6{_z<*?rOOsN?COzv6rGm&*enVKHu;-COs16xW8oV% zyEZNQ)a^H|Q5IXmWDJs&ayakGYs$P4QzF;rvh*)gPtN?t1{))E-_oxbHFTq+ft zcTR$Qo(^t@k@Ia{FOQ7tQ<8=0ck=smm*5B?+uE*+I*mqj+U{vZAyQgq>P}GREQxBivfG)TcZ?B zELV3E-ipA9ikx~GxUr4hyB@#z{2@xr@?O|5=Kj}Mt-y6^6ba`AcMeLxOHvn)D}!j7 zxhyN8;t0oO>eFD^!&a06eO;~O`q1nGS{zkO#L+4ePMjgC^UEjssVmLmd%AXgSN!qy zfE+O=a6=q6&UTdV(AtFW4C-{EMm;X`zI>N)%B!`B@TJ90?i4n@fjBEer4+X}US8Pq ztku^#FX7gjqPx*_R3`U>Fhs-_72=W`7nJ5tj!mTAa1vKHc%>0%krag#atCnDX00Cr zNphziup)uxV-@8Kdh>I?SFT}Y+e@vAsCuyfSDs(9IdMOot&NQll;!u;2=_hH)(Y+Ay=Hj&synyCHdx7s($^cBTN@ zojjRWZx5u{6oh~id?W1R-A$G@HCPx2sdacjC?su3KH7IIgv#;tw`A4=jrXy*Byn{1 zrL|$9Ft4JpJKw{F#0B>=AGBb)Z}ybniev`9Cc7-t^%lFTT(##!+CQ-ozso>RLSG$< ztot2G`ldHa&2+NPKJ)4K;|$QMt>1G&P1pk=y-zIJ^5#TUotpNG%h)eX!&>Jtk~P&= zhTdrJuM=457qj+Y8)R;AZ%yhi8XjZD#K)miwc8CG`GC_;vE9CYx6|abE=mZD(B}s& zzV_7D8d|f)Fbk1?C+Dda|RuuEI^Isl92jjZULGe?E&W4tU=I zZvHfe$KLkTNB&0X;L(k5_?QnVgGaVf)O*}Ri9F+?lk2Y>eTy96>d;HO+b1uX-;kNE z>*~;WUeD-7q(3PkP?01`3&4I)jf(RxpfV1I*+`O7UydMyJdA!GK7}7pe}4%%|H6El zc0t~}l^rDCn)#%*JjT~RSoh=2+eBJJj8LAqab}g#-T|yZN zk}fZFCGI%YP_mMhfvUX2kgXb&PgPgy>O)Wgljfz(Y;;G1zqt^@gH;V2Oh+`Fl!XpH_Bs#EXXi>!zE|;4vceWf?yj{lY*)jRvgqgXh zv(=8pdQ1g^D=galA<-@4736xv;!l~W(S8m?<3bXIs$BlaFYxyE!VnCI` z{XyNR{E@US>gZkkMP+O0%5iLDW&6de=Juv^^N|eSD{c9YRcds@*g)>VWX@*vvJYp- zT}Rs-+n$|WY&cwGviYiisE^|eBiXf3zZ{|wc^8$%|DiB~|1Mo$zu*i!*IMr!`7qJ| zqJh06adgp1bIpM9xxcb0N`oUy?LOc}Dh>$TNd6G$o?io^h`*Wj2T5joXGQ}x88{1C zhfbE#^BBRuJX&y1pxnTehwfM~$oHkZmx4{`l#-61W zOMbAsm^SwqvBewg{tY5Ii&6^n{#wiJy2Jx=4{S|5@A|$=3_t2IF(6U3-wRe<6nI*K z`QCMHdZt;o&;VkoH~4wL@f;~2qEczJ?%DsYSDxtDNSKDuVy_HONFNIw1xjo69NDgl z{ZqTt)ZAMKJz6|Kfkyag#0I)s6E^x{0Rfg(-Ns-ReAX3A(uu$0B&+LAt0lpdrj=Z1 z?a*tz$L$cwlbM_%*eb6c5OUUUWVLIuFetvbIXKrCwZ{^GO72ZFBzar|0J`NdXD3LN-EIYT2qvLM)q-o9+)iZIb-9kE;pH%6Q)#T)tmR|3H=2${7IAalS97CyyJ&iwnumOFdOMk zT%f6XQ*dA)h8LR2%sdAlKMvWeH=oZkjAnrSbBeBnGAuiVAE#Sn`wiV6Bk!6;#=L&x4KuzuI^on9i%*)GX^ z&>(4rXp&;g{3k<@m+w(HoBg)ow-E?3sbb7SuH*9kENe>VE7|1wKt3X^=P%R=f0EJV zLa#0qXEZD{X%d{{ONdy*D}b-G2+9Tm)(y-o{H#aC1Xh(4Q|yytC&lTlA|z zJLU}K3+>ms)%GWD(K#vSG2CVp8cMu@kSbt>3?<^jH5D}J(y@z>V~Jk^M-#->Vj_hwdaPM^JUKmlS<|zEc`a zG1l%9eNQ;iK~idNNJ~x%26gqE^tzUUn=~z?^JJgXt_X!b1nSZW3sCD1Qpp)V#*#g% zP&Zk}8=Jd-GOxOTa_3+j!=^4#CcIP6IIleVEWsj?#m+>O6^h{a3q(D1znfBLj9eDI zk_;Yw?R>I<6^_T|IKV#ueN-9Tp&b9DBO9axNg|*Ay9DjYyNB$h|%!S$y2BW{Sn*NlhKQ^Y^*oQ5Mj6j0(R^;kE=Xq)o)#gd;~ z!mnX1yXRYGE0~QvujZK5ExY~-!qEc#MGmgt4q?BUK`5*mGl2WTB_YdaW~lE~83G78 zLA%WaxeBr)W5NLJ&=XVQZ1(0QKCv6}1V7*>_a;`6dfnsw?(8UWGJZTR&9lgNj~Z%g zv**7dGNzk;;qCT@=wK{ZhG04PxzXHKJ~%MM1LNj1>8_?;9}4{adTfF9gjJ7-|Bd(= zcsS?<3|zuoIoRFzN8k<(;0pzeB%Y-lgz^ksdY=P)VM@yw*%45lAPVU5^W7gV?s1}T zV~*&X_QOhHXZc5CD#X)#jrzB`IDO7(s(ZT3Y!cqj%wYJBj)8&ZPKon)@xC`xuK+Ce zMZb)<@F-HfFib^)!$MaOV#_!&6zU?JlHBaO{+^xZMfqWwP#}Z(wio_zdv@!tqvw>o zNs_Y3nw?&d+zSgPoah>VBs?8iaFc}F_al=8qvJ5+2PC^|h{Volf>tokD+*B+iXvGj z*PzkP%x5<}U3;?n4vn`y#65c!McAU;&8)MTdT=Qp-*YljWL%6fbhE zB{r?r5Nn))Vp(g8t1Y0|oG5x&VT5KEgun=WEmN&Gn!GY8KB{E;v`rED<*!25_YnvW+NEald%I^B@ST4550z#=pkI5ungwVhn;A3R@rNTc+r08u1{sR2(J%iY6_S)YCX@d zyvxmnl&2EIS>piaG+@22M?M|wws8P}D6%;%z>vOx&?p(``bn0+%N74*{TGX8RF-jb z%hFK-LnJ)7>>(;D!b?J;_|G{+o<$AA0;1KiT5aODu2(}(5z(I|Z^V4#GeaL_;OW<_ z+=c35rWY9@^V1blw={uaSb5kElK2|{8q0E5LY~R2pU$H6Efj!aTedV2Ksz+(v)t=R66&zW1S9vDB*uMp>un?@?KRheMydPSGoSNvULx=A zyB}O~^8G@Y`#$fAs_9?X~kr_kasyK(5W0W?ClGL zQT>i&O%Qq{Wq!S{ZokF&hO(Q2p2&m(-|iqLX!L0Y`C9i7*ILfq{0GWFJ`2fjq$(@? zc1c~G-1Cnbjl12ELQopI8EbC>}ot61U*ryplNAKJ+QSBmdupvjP03x`96G zH13{Qqmwi*3~6^xT!f5JD|fXKAJ}-pChOvGx!utT`)Q`$tx@W&LB+OFhW~jbXFS

h6IPsc9xbK$5p zuBhfKZetFHlZ#xeyyU+*qy{6>1MhD$#*0E5r|tg%Z$C`yYDp_KA4nn!3I9`7ZuVUOCaCSm{8Onz|++pV?gu*mv{t*fyDe(tI?}DR`(A z(91Yxd^=yYYF!yspntr6?FAQ!4;QGkH zvKz^a4UKzAxri{dy3uOfP?;ZQ+A)TYKoE7Kz8Jt&&D=0Gd1wDZ$lac9b1Dt9|=0|~SibgIc8Kq)=W)F*4@}-fz>VZ%TEA=`miM)7 z{8J7|zjv92%(yeN$zN%BI$A0v_e4a{>+FqEH^?xb#W0vjn<_1&=P6Al zkdItziy!F=%7AV%Y{h*~{63Ut%&b~8}!f1lQ{gyLQ=z2e`G3x zagRZZv|iiQjD2>aK=#U`WEN=7!=}jhf{xA;U%#$c^&Eq4P{oxZ=PHD_^VhG9_o+D$ zYV#{}omTD|8K=%_sE;t|dV8};4=ey%AuxqUC=KF~L=LViRAr128ZaN0j0hyWoyMAp zC%ixqx>(2bCP70Dj30EFWlk1eqk`?&KSK{^^O$S-dXEwSa}2yztyP6Z=FUT_R(F=Z zFI%@??BtX`W2FZwC1I(vJ4b=KLy1P{4QwSq!b)#C98!mYy?-^fHwg>@^F#srTCDK= z>pbF{DXefk#IXIv9Qg}AhUiC?bN|&l4tUXo7H8D z3mhM#g+|kQ6;1h1^tM_`vFKO2C zk~vKdL{X2EM*Mx+1|oJ9V`cO_GeUL2}_Q*{4L zwAs3WZ7INPZGDq$Snu2X9WqE8uJ)Ju@k#;@E&d-kZTqVrkR1?Sd|=&t+rI{Up`ov6bv#*+T>PSe6=nhRBpsx~NK^!P>DB2laI*0o`c;3fA=kD@2V4Jd_H@L_+%v>I+zHSc{J^Z5(^;sWGfXvcQK$&!fZ6{ zU{DR)Cq+)uwMDaYhs>zd#fw<%m6mOD%#*5icBvMCVDf6-*dGJTbZ)daY^=tQxn-bV z*3=p9%;ZxW7#NJt5>WvtDB?t;opB}#u8P=;8}e!0XRQY&&=$Hu(f%r@mmQ93n^&C2 z-h<5Fc)#^(G%3!LuYE0o^Wlq*fgZ4>`O%5i*BxFsOG z=6_2x7}GcEK-85##4COCNFw9-V5Kf8erCCu~a`DcC&bDib$_?qgSwALJqGCSd z+JaNq8#*?hCO^9=rc;KUSXrr;7ILq0>}!#od#!!_7k)_HkOuQ9UHsaix;%k9$cW|CUQTxWC$yCBlCYQZEWeMu+g)?c`qU3S_*3w~*UScIP zzT%J*!6@ka-~rt7lfQ!Pv;RI%1`hz(&~to-s+zALxVT$P&UKxVxeKnfT<0-}y;WaHC~gp-o&zbkkK&WixqXx_ zBhihcrbsg};0AN;&L^KV{=+qEBfheXLU-u?v_iQ5<*8NTwYC29-2|x3*aj_U8~q&_ z$ofAa>-;HzBAO*`^a)HjXai(5Wb)OQv7@1`RPf5+4<5nd^k9(wCPOinDXX5|@yW{% zQR$82YoaeXj;WO&W#ucl8%`;Gd>}SsIS#iPIlk&zR}ru!rXfL6W|uBUvn1_)S0Bqr z$g9?ljiQZ^FtpM=mtJEC<6a-m>59C^^8T$nBSjOxtEdOb-`yiegi>|2MuK=y+9b+j zH3lEwl*iRuG`YLqeP2rRTVd2cS%=a(SDwzDEMp88QkJguXf?|k5Y+hW>;FekgCqii z8iBupn%`9NGK8jS9DohS%g@|u=t{s=M4RA}*Eg(@;v@=YFFtf=h_2mE$;xl}_KMz9 zTxj&}`k2VKDd@@4by>_F4rJbTrB zo^_v&hIl4$lG^41%Ac2B32U= zMEUI}F@DZ-`Gb&+n(`t>t+|pw^fLVmzg0D6xu#@mWsVt#$u9QD5E3~H_dMdf<*BRfx0Kid$qGal|#^k3E-&(Qz_{QNM-r({T`Q;JJ7P@Y(t3b&)BR2^~?g ztnSRdvbwHkdr|~%qn&J!*BdeE{%zHwiMX5ld;&mt0rI%qrWgswt`;g_IE6Y(lq)0o zG3h-TW@PydIi*pne{?Ri@7ww5lKL_wEX?S(L7v&|%qnY1_Oskt8uk}YE^%k+Ub{&n zuOg`AMB#B=t-h%D5|vX+j%(YLrL(wUET z0T^FbnUt%wy`_L$eebQEg1`DmYrD$WiuvREc0cv-hKM(I-oayX5$^meCtQv}SvCIJ=u1jJ2c65b9ltUzYO_>}4`y~xM{tFkU6l2|RVeNO% z(+&eXFey+TG482f-rrL)Qbl6cfI%s;o(EUvx_vE{!>T_q%KNrp{9?d+&c|5Mb^NW3 zz84&qdKt54Ea5w|eZXznaT@samkKx1-3NeWjbsNfbP_8}Bv+WLDPa$e<$r zXsKTePPbrk#(M|&1_d=FvA3AcTReR2+JbQQvLCUG$$6(8~W5VfqQ1#le_u0W%W2?Yk^;x^C#SZHgBDnI0RE=Hi6YKeB7FpUdTB5GHK9+3z|Pj zYx@!jBsY8+U*>lg!Hsj`0ms<~{4pl|$dgh4!-)sqx^TiOg?&J5Fe55T+qO_>ODL1u4F5! zk1jiW8R2^g8ipB1x4&D!LKai8@4Skxy%j1RTFIWvEjHDsUrG>E5|Lpayn=PlY4>Gn zziOrwwsJ3;2hje+TJ4X)13k2ClA^}V(A*P{4ZD39+~Ij zBrE@{8Lf3T5Opn!_8!h3FHquDHNx2FpSfJ*(1lgyV{g9$3S~Fn>FkY>H=iOqf)IF!=v(CHo9eILTxT+n8|Kjfmb9W_L7K(ynUpbNQs9_!5c z{al0RwGS{|h9Q+qUiIcr%D-K_Av0!XcD`6*Ab$EB*C8x z(&rs5u_G#Xd^?ilVQB1~rppo6LdhDa@1};^3)J7?O(1c*3v8sgeCpMocaoId@Tnbt zOnm~Xw+pH6a$jPnk_sOYN%hWCoZTM|kS|EDqk;81lRvyTb?CK=d{6u^`;NF&n|Kyy zg2BIs?|*uj`()91Z0{=xlUaUIW{aGqKLBl)aKK;P$Gga@=p<=4quUjJ`&y}q({(Ay z&`LN;oW<$bQ+sd%#MAv}g@E#IBYW#1ILWzJU0~?HLc5-un_n+8r*Qt8ocxqNK(&6P zC}-H_R!F}zPzip@7y7x38-ayd_7gW+_gpMd3bHgyXMG?IRhE! zjH_kHkbpO?%G`?CySDk#7$ez9$c2g=DhqY_v^l?|S27T@y;ua;S_$j^()iUNeVRMu zhH}bw0)iI)L}^ji86V>%Ee+p3G?~XSqYN_6>@hXRgd-LT0I0-F0gaCDr(>V)DNZM zOi$uri>COEl0Q`$r)78Q%xJQ1_T~ZF~u&PFkPR&i8qw&(1T5c(hhA zEJ5k0n730Pv>~u>f*%FqJe0=-oiFy;d2K=}W3%Zx9#8J+MbP(6Re2O+yXlU6kFA0! za!e^|9v6$ViP6TlzS|AGDCV-yWYvyVAy$WT)0{#3KSjmducnP6hiuYKp)%GqDQdQU z^3#zD#Oc8)8+)M*;Eacq>-?4LZp96UTJ)DY59c$oJ=2LZ7F2K=5OyP}DIC_}JmNBE%4CxY zKhXRt7&a=nPQK1wO8VcQn05so4C)X zAPq_PsO?)I+Jwk>LM{K<6CoYVKgRilO|s9gUmrg3_kjbba*z};B6Td4STU8{ytGL? zt!CU96XC2;oImMax23?S1|*tQU+(Kz?VPvnBb2q=;WV$Aa2qR`YistDah#HVL&3; z^|fSPhQXef!|oR?RIb(hNzQ3t>z2Fp!^|l5^?#H!z;tXMxO!ZLzu1g_GDptN2%DUA zR!LBw|D)(E0c6_6t;qUuIm-hu%#_^3^%AEEo{?3w`_ej>UNw+YG8fBy247s^{yGHz z(HW2r`~Q!0#^=T!Oh`}_B&MJ)S;RC_uyf7?}kbuw6L@Zb<1Oh$8W4KKYfnp zC#2GhrlcpEz`NEon!(=a`MXZUIj~bftMbi^4Y;z9`}S4*V>9)ZFmBPS=CVTOV~X_A z?@9VUs9QX?ybm;si3V!Z$#Qkgb;n7~CK$5l23as>hx^)7Tca#vBHM*UKKydtG8~6X zTt$A>(uG_alz|D6bc}~FWL!`5jaS)ZpyG7h1Zv!=plU=Y!&8z*9_S>Cbu8J;Y{xi5 z99Y>uL6JU$9q+7mCW?xr-nn5AO(IK^$#LVa-14@k4wE{|)!~J>^}tK~CDm@o=ujSa z%zA84SGyF)UW^PAaaxkv=0Py+($QZDqjWeUW<=_zbX_n@sao`BZ@yd7xACf* z4T@4*V)G^SQV+vVbjpuY*{ddO^y&!01cIXYts`*Y{`ZEvyAf4VjeZd(Z*wPNbr#PEtIC%ZeSC{#;PA7G&P38*0=-9Ld6G3g z=7EK3MDCdNQ1e_LB@x0< zAw}$>YRc+|%r-7rt>a%MKQc0-A^lgkCKMJ~T)hSeZq+Gcd4Pk9HXBT39L1$qrk9R#gyYYfF>}YwKcgOmTh``i17qQYeS&=v1)%b z!=V?yj_%UF2)tl?MVfLMkk-Q==+6)2y`TJqd2iL5$;+8999(q(22N= z?C&LrFa3Mf6u-E+{`aB*R}MjQfZUmyv0Uh-5_e(Ok)zoHL48ku>S1f&ZDvZ{mCJ&= zZFqXjm9l>e_Wzex7W5}S2HiN>mCjtRTX0E1a--^p*8` zslo6Kq^;ncm$9MW8sGbMj9%_Wi;i5q{B?(bzwz%TGx86G^RHgRlXp}ObCrJczD810 z7dTQxywUioq_Dj&V%1=duJ~wh@yEhB_ylAjBt`=#zQKM$?1+|mIm3?oqQOgaIoQoV zhf{uKg%s%?PvPE80ORC%}aP95VDK0-3O znidD=4{crNvj%!zUa_G7vM`m&`cKURHzg*7hee@z&K%;~tepw;pdAOnOu0OA&v2tn zA=frJ9>G&_{r6hp3q1oZu6-Y=s*+emVM3uSm7g}~Ly@2OA&f-rl*nIl-)2pBKQi~y zUS|xdpkfTDoVHqDYG-rrO22u)cy&H#{Q?wEJKlPj&f&?pk7QvVtQ=O6$sGLi-FvD# zn3Qp}fLlP&k(@+O0Vq21<@;}61rGS*DJ4N3h1ipYr^SA4OQvzgWWJ4}1y|;jcuqNm zR@?e8EYq~Xq%ZHf3c5Uzei-X#MJMZMDiNZy`Bc=krlZ|~)KS;BRDJOS3M1}22!Q<~ zmsTt?u)^!V**B75^)55EHHH!o4yh z+-U3v7mw#6?K-t&*X{%DY}3t4vDl35zg%Q2p5|vS>KkuqQx|E2Q@3Z067#@zVqofV zh!4-Wp+@0m6q&5z(z*GVvH;MsZ+ZL2=5!U*I?H4lN-dJ5oI#W_g zd-rV&$#(&ArQY%wMLM%@lh5t^pFVkhtfgX4(fOcV;IhOn^xM$XN}Vq_{g>tARhXJc z*Y0V_?j|A%Y*l5JCPx_Wjh4v_K8$rAcYO+(S{~u4^PP zA*fF-kavweLn`f~C~E75 z3g#Klxy(nL|Cs$rn!WVQZFp{2sKcp}y7|uV_C&{W2Y2kufC8I_grjLX@oe{=bpPuy zx2wFoyr`u=fh#EL1=;*k-oTExE|W(gKDnOzFHv8$9(pE1=PDK9^4bLB#2r3o!YD~w z0%s%ExemQ)EVyU~|KPrLcI1C^y61105%eVg0?^PEm+;i?L7L6PN&hwW-Z!6FC;xDq zmhl+QO-5(@Mvq3DE;o!S>0fNL3w}J2ic2s&Q5De(|K=MH;`sp~Ypk;N5d97~E~y_y zQE>4Bq6xvLEzN=KF8`D5$h*Sa3*70VzolHC70&)yT&hhdV6>=-4J zro9OK?kdF$LQ6z09$Rd(oSbC*I+V7THtSG%?FpdXaFT63mH{%o0Yi$4YPW!c>x&mV zI*>p7;1qrz|)CVyBOPL|KxPz_WZuww3Fu zM1~x!R)(N1;2^-C?v<1w9A(7!-)nN)&Z3E?eE7o9NDy&EIxW~(71kn~Ns#wK}3N7B@-a6!Q=n+wMq*96X^pw!rE zorDv+TW$_1jP)*;JU1Ci`YbuChPz z7WTb)a71lpHBNkE{F3Db1EATA(#tOq;KVSx-5(Q@q40}7rN2Qj{I{%N6wJ%xmmT9y zDl~dQ(uK&q%&O$kJerBtAL&bBA-J-R_jPK^v4x+!yxpCjw!ot53P+5yU9^P}r7M)+gUW5{PSPhIwfbV@ z3Z?G4QQ;merSv~wGne}L&43Ot@hn$uJ|C^ZG7!LX0mU;7<*NMroI0EY_Uf zwRwAfuPPcZzSWaeS(qa(My2~wAv)=w`1~JAN3Zs9TBuWew&HxRH{;m@3aWZ@dmW#b z4o;1f8%i9gY_8n2ANxeA`tdzJ+FU%q!3%9 zf5popOtXtgIl=kTcNc@JGo`7Fpd#99Dr+I2Yoi)8NyBu}<^18L*Be;#_&VY%DJ!2y z3ew)kbSWfqyoLW1HoWzPP4y27TX`cHYyzZl`h5eaav~tTF&cLFM~{h?{fFqy&~xtb zb^EW7%AJ1x~1q)J#EQqBh@O}RX~qV@yt;A~IvfYRhc z(lgfMhxXDTB-ce<4P!l+#9d90?VNNw!!4+p74~ThPB$fS1w}=Bz3xQcd$%tJsrLKm z)j-nf#M5#|`exoR@YBlDJ zx6+=llx3BO6%VwjYFJ{}wIt9a2a7lZDym)3@dU;5^*1(NSdBxO2SYDk<^O!w{j(_b zbmz7r{1Tfm1zcbq@7W6$M(zEMI8db2Vgz`=y#Ce0zvtMLGB9wkh^wO`Tye+}F&|n@x*R2WTp)y`RL)`@?jUhiW7C*#Vc;Z`x9(en zxC$bbqNV=g_Pct2DO~{;Fnjv5EMCQT7MlK+f>#SDhm|Ar7NZ1Klf;Fy4=JR=bFJ5t zb>+Apn6)^;H@_x$zY1k^je@>PSHC+sG~9XdC591Ql0)q5dG>mw-BCT6v$!rdBl(1P zJo#)uerek|n}=eAMwaZCXyg3HqV1d{hwE;r{DVhkq$;0+n8yOZLy4c0To3Hiz5QaF-G#PElK8)L3 zdFl2~1GiQC|0il|$&@a@*xljrHu`TbRbrPMsSES}vxQqGh5JJqom008CV(hOwT9 zw^R-|5xNz(U@qWmLr?u5E(W2Q#9tsSrs@9n4?B9^`&GI;0&}_2aQP;bwMbQwNhmiy z4^#5HY;p@!R$OTj=;LGL36m=jsMjnsX!0t3HJ}n|v-CdBOs{sTWHqMSr}eq7=GG|w2KsdR-`hZ90= zn+G%ZW{Yu&H*Mnqy!jRG2_gP(R?|rHCJ9@KeDY!*v=v!W9QGm7)z|Z;oR`u_x~Ro`G~9^^qbv$37RHh^7uD8C92UG+D@zh^Mk2Pit=4Ti03vIv}zy|?|p zka1nqUx5xQo(yVobnTz!Y?E52L6Rr|`ha7#UEf3>pX&u)@1_0a-{L05V%cZM(A~Q< zQU4clZvhnN+O=yU!J+X$a0%}2ZXtLGNpK79?ykWT+}%Bp;O-7Vg9LX8&{(7McJ|)? z_x)$iIrGoVIWq-CpsG{d)o(w~v(~-t>wdSt{slRVzgR0SybO#1+RpS$EWShC?Mpb! zsiG<0HD^-;TG{%cQdFQ`S5K<}grHz6G7ktX-cv-`sY= z5%X#P4{7A@u0kXY&{93)R0S{h*MNl$zoRaC1pN3uRYVm3HgKN0_P=bEh8z;XWRP~0^+10Cs zn)I6gQx=);Ke5Pdr-@J!*(i*DdBYd}+PnEG``3Clg|5Ygf4lJZe##6<-uMaW{-WGk zJt;NmbHq*XM(qOM&W)vjx$EecoYCb#<_!JM0wINoK|7I<>?VVF&+FpLc_Xk{IQr@I zk959?(vT(_OJzUE2Qh_2p%E)*K}D7PzXvMo+AFg!_>LUiZGdLfmG>MfBXlxzvCn#I zZie%EP~)W2ZN^00neU|<`D|K_;uQ!d4+fBV>26)M&{r=7@H4zflVwxE5ayEzt4)dk z?iEb2tlZ}hWW~d6aIvKUltkcl58!J~%T9jbo^JG<50PWFFNg3-nScx&$RF-EO;0I7 zgShnmBF%Dj9o9XPxeHSK<#pBo_-<@iqRmJ+Nso%4ai2p#%* zPG;;gDh*NKaTTvja?^Gr89K+vFtF5PwXmFmj3VKuM)LWMdPlCvXR4)5fGmzN!nZ$V zU@>6xCW{#ej`Du}sTt#bBW`)Ss$THaq@SlWbsgG2>jMhy_gK6U03*frfbEGD_2LTS z);SQK;XnAS8pU-f)v11mEwfj6ZJ25wgx4^%xBL9aR0$!S^tYT ztkS#_9F6P6k)_0VLmCKx(oqbyfy*5y;^DD<(iwzN=J9>C1Nl3NOd%t1I2nGp?uvj)7H z#x@E}dEEG2CN85|JWF`iGW(d6btTP0OHcOtkDB(ak-N zxZX{gAi3`rd4o~^-pZ^0SvBw?ltZgvxRoUv#B)))C zPxo@1ui*LC{Iwvaf`pe*ncpyZvE9b1ta&u~&;MEbxYh{}+qY}XTRQ(_0UX+Ko1mdZ zS{fea{Y05M3Z*YJ4f7BoHWhsgmgLeMwYtH54%|O8@beo;je$}{Frq2#jbu2ruo-~h za9p4DOo(nH8lz%aOX8_v+88Q|SYtbUFb&Ohtz4{g9xX2S4SdgqZmLg*GEag;}&!j6~IMn3icqorlB+l!B+zIQN23L72fBD_rCLH{@}WKkRR4x3;b zSHE6w!2SO7mrPlU{~#}0;buSL)5+8?sd9UEQvFWU9MxPf^X?l~tQGxS=6oP(?mCX6 zqdgnBV^R$HhVSCOpX0Din!Q*d>X!@R^eY)`)qr2%;`3{ zd8@-Q4DzAF(N-KMlkgnDAaAI2A?13>LlS5h83Q!wf&SlMg%c~2Eq-K&_MtrLHutA@ zBh03)5!3!3@=r|$`(+l z@bRc{JW)9EH$Bmv#fY~}FLE4+DK5=SAMP_CI4iv;mI}Xlt?k<)B?Prw+oLxS+XR!G z6Qde3@BzZ!^vjqu368!e5GWT!4rsG%raAFgeeBQ;fB`-4`9C9xkN0vEB}%9dx3nOChxp6w3Z%bTz;A_+s_CQo&)g2{>0tG6DVDZ#I$<@-0NQNu?TkcRVg9wk&z?i0A zs3bN)f6z0ZhpAmAJ}RB+9H!n81IZE+Bo;Tc>+oXum`#g3d^pCR4|h!FNFfBZShQMM z7}|rBw;16jxk@@LXB3E3w)wtiWl#Hk=W-vFCD=0b^>oz~Mm(lkGmEPR#z#6JHs|O( zW{;_v+uI+qgH3_tOSwh1c1U-4EINOJ)Sdgoukz>?VjofH5T`|jRj~d=YOjmn5x4oL z!}ymI$u7rDsbatfIEMvZcXy<*wPByMJzuZ3;uE5h{CN*f|YrAlG!uZ#npO*{C3(&;PA(Nn;IFa1Zh8u*oT$`^{k($pN}L7<37!rbu(4W@g4{29~S*E~>Jjr^Otgh?|Hx4^W!N0SI&kFob=2KTCW);{sQ9{7hIR6e*N`3xC+b>4Qws^70I9*`e z?=S3Tk<;L*;^IEZ<6EP~?@)TQt=%2;gUszqV&=F^Vsy?BBh!~}WV!9kkU#GR@CKhT zq;q>IVGe6R_3k}mxd^<9d#kAIBfhbto6BS8bs7)q2=?gj!xLpNeg&QM7Z$_XSUw_s zYFvoQ7eox~V|4vWgix{dLwL`-dpxDczX7$C|1zM~!@d+u`J|o3VcTJ6Blo1FgPneS zaDP_)?%ftFMuJ;Gc?|V=7bsR#LK&-PZ>jDERYuluIB;n4aw>_5v`2z~>51j(v<`*k zX_!mKIrPDfX+Y%$Fk;6(_0AK;2Ahd0nZ@X@DiSzgvj|CO*7pW!FPqJreDcAq-s9V6 zwHUR5DG=BL{Z9mMQW?@%&|6VYWeGo6zr@U)*ouRS6(JLNOlgEOOosXLP zEMV>J7nZ+TkJ8hVll?sB4S#CtHK2XSyk;0Cc0bzbSntmV?yCrlIDSanPZAxsXYP(1 zjc#p1_Y(`QNvrlKVl;No0M--c#gn_^+jEeNq_K1ctmCGZUSp%5QrAZNJN>6#w#wpJ z6|GPF&C?e9YNoxWxVT68z7t~LN$6JQ#z`%Q7%{tLi0%EXJZp& z0Bg2FkUvR-lRdo>JpWb!riiQ=R!s!e^KsJ+_ubt0haH8xNwz9pA4d7E6 zQPvs>Lr7L!0co=PwK8W+WdBvBe?YWdXi+&{)jy&Y=rECZKEjq}=X_}Aa`=h8sEoIg@0+_l*WHFa3X*4|1&{VUmXi5io=e7tnO3bmwN=L zCBe!PAE}OGfk`GHn8~ui`}*c8aPL+ps*anfvuuM{%0wKbyq=pWgp$v>(A*2f6X=eH0Nzoc{I!DfxK7JQ{Iq;2HKQb_lfrbT+pvR*(8Y88=yj z`)WP7r59I%^{tNnz?Fbytp97;{&{*-HY3WY#dH^w?M6b*hmcSP^R~u@#va#w!I)Dx zww88PMT(gl?(Lj*-l0Rwhe($=@mWvZp81=9D#mAY~wc-c1y?9?|CwksAb8-Nr9$JY)R{AOz-c zsZ->CwD0l_+u<twV6NZ0Sf+S4i7daBdlgr}~ z-cl|#&~Tssh3YpvddB}){Wg89FiIYb@W4B>;4wQf6tSDb^Z@M5SZgj0hqTd&_-UnjefOV%0R-?6&;fkBT2Dq zL_H7GeoDAqJoqa3)p4ODBRgAtfV@M2h1EK>UpfieNAibqK~?b;`<`eZ&t+D+sivmn zJ2Pua-`T8@hZT^B9om=ufkHk(_>Bc`Pj};uvMAtfxbrXj2aPTh>Mf~c|9xw6>D)mP#vpy0oA0$f>E#Z;(xea zdmJnB8$T@pf~~zl7~;EmvfqPwVIz0ac z^roL=`uJt!mJ2cd(}gm4yLy3CQzI-nl5IxMG4svur+ZVEE*inW`&ush9hR7qC+}(= zm&g^F{q7{2A6DB0&08-lmGvHn96jptTSe0rG_yeK`8GK{XK`#WLMg|i!>y~z5E~a4C0_37Keg{29!PBF}99 zXy(5D??m8PH704^ff*#7$Y+B8eCU7D)>2a2D=P$$P|RuGpL(K? z59wx#|2Krv=2*0k(g)YRp+IU5RI&8wdOtyje?H0aQBOuJ=)f%R6Xd1Y8tRN(b-|4o zaYnepJsnVmnmO>VTQ4ODRX( z4%O{AJ3fJ?=3M*6 zK3Jz0ct{x|xhj#Ym8K%1fF|?s<0cGd96EO*C3*>EM%?2#l3;Nb0p~_?r~_1%K^igM z@tR*p{-eh@NTBuirrj3P{CfI}9?293;1R4}uvktdfq-wOo~^zsC5LToIwsE$d0B$k znfi5zldk1QylCp1*wxYx(L#*`0Gc40VAtt>4_g$$_B^@Qm~W{$G>n=bcEz7obiyx< z2A^X#_?E`{L8aZ_&NXdbXrj7wd$#9mxNl$h!OG6l8B_YK%Uv4xiu17B2qr}2s2GrM zcR#vSK$4|C~*+nBp*nm`rxO0ge$bkCrZxPVQfymFoqHEO->p|&=# z)C=882c*Wo6Hws>((Pg7TKseq()0OJ`o}|OrBn+tHquH} z2VY=`T49@3Ht${g;cIbmdj6l~pX$eEpsBsW+9rm1;m61af2P>m0DEuioQo>WU(lm* z*0+K4P8)rt>+$t=#iZ*qB&xwnIJY;w>oP3PxN}Y>p1%Nh4A1rut9~qCkD1O8GE|1y zU;oP=<7&10r-1ycbjb+lg2o|38wVDH4f91qG7sB9pr}ASAinb4t=J&0x}__*j6C7- z=wi|Ls+>7mZql_`rA0F~+5JmmY_u>D|NYW$ZH@Aq-D&{iDFlM>Q^Pls`DhYN;A225 zbK1P|7d>gr@8cEhg+_m3L1(lF;jK}d)p>KVxl9-fWXAu{5pm|yIj3gVr)t*v$3*Ayfw?Lp6$!bTV$-DV45cVgsnQ{EXUD#4z(5M!{L?_-K7OK_x#O?I zkgJN6aOf1!mHv!fOwuepFrE^eOy8j!RDWX@xz4&D@DHO&Ht~eq@+qEwEhr16s5fZc zUn;x@4r;-pPV4@52}60=FSgD{CmGBewFkaz;YOpkvh@=Vv+e)ELE^}A!K3(d zhWn2=_ZEt?)Vt7>)o~b_NAU@EGPkV~g5EcJy(svdt%>Ezr&@VBXi&ZKTRZIHTXL&mlBfT~>$>;q-+F z{JvK@m|k6|sEW(nwX@7tH=%>vtyn0DoHDOCL`)=kw5jU(oqdQ^{aSb5>KpUr30*hG z2~K(uK6834PRLZrSWKhHq8;5sOoWo?7akzZGiVMAL&tnv*=&(|$jhIV^`+8I;+yjG z$UKV%ADkig>T%~fR7jV?FSr|45p=JaZ5!$Tvozl{hWk#MUlei1RgC~E+hYo_@L{!* z1Dw=6&c<(I@d(Z3J*$yHHxByHwrQF1ouPX4gH>*c4e?K1SABLUoikF&{Lw$VJ_yUbzvWl*H^LsjB0xR-{kO2N#XhY0) z_WH#(|4aPa_I%fBlAJw34FrTol+EeW0GrEG9_sxtc)^HnYtKk>(Cax|?Pdpd)(wkJ zwVp1&C_p07Tk=Cm@IdfU1Kz!NUn+XihrKoR*zI^cMgLQ<-@naV`%XGXkZNz`ucf|eY33S>y!CrvlO`4+^Bl=Qg?PM2)Z(j z8Q?w9`JLV6Nrf8=K;%hmlL^@ZkRMTc%wioTzgo9`2o0snoD)HcJuVvyBp!L9jY7N1 z!cKxY;FUrd2&80n%#T_7lop1n15&hmrhSti8xYr!LSq46M(4XH>GznI$60_UeDT{T zrbn4-J@MJs_LYTG65!a_Rhjf8K;V8BE}31V17s>HFq8LlinkNj9Av-Izto?a@3b8@ z-a)MH!NvmwIz5R6dg$qJA-5;NZo0s?H;n=+D3N1H$@y-Y+7z((cCRGX1D6d|6$I!_ z#jgpwEeM_9!5}TTjE+ipi!>&#^={pS_nWbJlcK#-!GpHaFUb(e0C$^1JQB)6f>bgP zu{b6EiGN>=Z7{uRm!5Xm!r}o=bywv#bG6#^In6yjgs$xJzPoIcuA*XQ0(bYxKK}NH z8(*p4(H+kV<+2%-<=gPv)Fm=Cvz0fy4L73`EM)?9k>0iOtq{1cdN1`) z=&RLiejGPw-qIZ}#3-@Y47+UZ;R@JGk^m9y+Pfo!HH7}TxStW8YY79kLn|&QeY^02 z&=?4?3SqZ0&Ya$GqaUjAZ-JzcI+d zZ29x!7fI?C%Fi$gil4%!p8=_%!cUn2#e_B^q#oHC~fsF z4&-9~bmsg-oH-LN59K(0lF!>xOf;SkvXyRc-xAO|lX&LdFE0nCEm6^5&;nJm9&! z2)Lne_1F1_A%!~MAMTK1U6pkl*h6gHzXkSb8j+GaxYIw@P2z6J_7yW^7j~R|jsvOX zdjy#1xN4a9dm5B zXd)?}Bd&-6!JURh3&IDI*TBJmd_L`?d3`Oc&--D?R7L=vmKOiY*sBRVv@mZZ#>M8!^v1EQQvFZRoW|1sHm7mKSYx*BL3nhzrHq(Xq)lmC zxgy}+-AC(xm+wAGY?Kb-tO=xOCZS#T>MeDyT72wzGOUS-Z4?GLNCNmz#d&Ah{JDHE zoNWCV`1O(7jV~Y59nY4=6;|s0w%yaUnpQV!Yl2!R`$WRNk0WYhlmToe`+lTB7TXdG z*u)uq5dieMg#bH*9y_^$Yt_GtxhaBw+&e(V)k$x>32b>H=+A^9o~lUSoLE%Cr-Hm> zl+|HJ#z20=>g1b78+@Cdu$_=OBn=a~7GqM;t+V23NS(B2Oo7w|0*4d?QqJ|SKfO6* z{qkFY6%#b)Y~7m5$iWd}gNsWTta3;HMBx+cBLa1Y$E`1nuJQ<;Ai*h70Ks>7fQr6H zsQOSK2Ux_vt2P@XR+oJMDK?qUAIyQLX-GEB2m%-lKG(w+3R9l4?%>RrBqjWnMrbOG z)mEfMG@8qB&E6N?*EJZ{AyPBCu~CK#@M{p;9_dq0ibKamE~i3WBgg@C;xR z(-=j}`g8eAwmMn9C7qkbcINt6H+n>PBI5&tSSY^(7)MYW?KU}gfrvIBGadNT`UY~o zuV>qfZZs$f7}O#RwMoRF;|UwTQ}|xYuo#rig<$$4e-5XTuK%BX_(sFU4A(%cBILzq zrP~WcpGR-aCWhj>@++-(K-S`i>sCoRuo2V)3i$H(_!ocuMV3tIL7+m#KVO}lP=BVZ zU)TKet60{K#a12B$6k@wzFn95f4-z^0c2?o0qi?`oM2APA#;O zN9OEga+By|qMiS^rtD`viUZMppBpsAa)vi<1b#~Bscei6jfZ{h?z!JeEdmt9;Y*dA zeJ4He0Y~ROUK$yE%9*@Gk?I9u9we7~FfbNRamp+(YQd_KZrrh7VOK5o^B)Vpn8>Sh z^#ix5YP;e{W&$)@%SuBco5XAl*f0NW>Ru^c$`0-(Ejl!)jk(v5$v2{?53deBt-OUA z)BuFF83D%57XGbC69GoEn3o08PtL)vsu`R|IR4X*CXi8|m=~uT7R0|K9~-!hM?IgK zV;<9QsNOBjLN@g)D+b|-=zqTgs@ts~QZk>w42saH2D3B#h^TH>+zzGrF`z4Pu03Bp z#}rGSJg*j3mo{5?`tG<9gemyq?ncGC^6ZuS&b0H+&E=RgHo??a+yof~;l@56+2gDQ zzOxA5_>(XF%ZTD>K_@VzQp>b0lf-aswn`+*-Ww$=BtTh~-@gsynW)v1SmT%()etb22oQJ~#)^Iq6&-?07W zYwg2^N?6H=OY3cNOCDtY;HcMXok6VRvz(aA@#TABa^ItR5p_#qIqPr;r!bB09v<^; z#p6$B78WJL+p?1hAPI)XFO|-0tz^tUYO0UFiZ7PX0YD*nE~B#xVn3I^NI$uj+Ge=z z4S3lS?R}$vFy*0`#db&{CK7e;TpVfs@NWbF`G1c9;EMPk5day}*?I_hjyroVw(39f z2LhK(k~x^`TELXzZva5@F8}~4rnoo+H&V&AluN-Cl3ED9reQrFcPGpl6K8;lhFx8~*|Wq$eRqtyrQO?6fhKAR=K2iL=tC7PVLF^8$D?SwCb z%+CA*&fjTYC9Zz7T~KQBcv0dr&DQ!&|8|?=5k>RX@M28`4~Q|Vy?49$#3(FbQ=oVw z)23#!{Fw;1Bca!vI%h2TeiAp_4;MeC!JcQm-~xu`-EA~4;H_Q}e|B_%zNTb`pVczu zjhJ}ovUEkDn4^vrqNN8$5^Z}x+avi}VhP;R`8JbZ(e27%gPVzTiX#my(^(*qVAAdc^FXrQR zDiwm!E|8M!6zbfVb%6!-R=<*eW!Ty5=ZbbIj(su0Iw?h`(4p9&4*~iq2{4DmGjHj2 z!nYcV@+lI+O&SvJY6{COWX6$?#>t&*s0f(C)GpA z=R5~}7cx0l_kk$O304+dc2lruK6}UETdVFiILrrWY7+%`DsMK-nod_*7p4nuLvq5O zqiwtJ3*unh4k_Fqd{UWyqdt`orV4%m`9j_-TgsbF+i@AACpW`0uS?yindvz!k8J(w zNJRg+MaK{lf(6o8^^)A~`&%jVcEyf`IS4O3Ro{k3X1m4iNPWd4-h%;71JpM;4;naOKu zEwe4PoN|@Kd2syg1YZ(Um}dr+U*T^Ty^rWY|1J7%CGA8zFQ%3d)7A)VV$xB4UN9|A z22U@O?`QPLe;jRA3D>>%%}*vjAKsN?UcyF@2}A*_YvkgKtoyBQ^@QfHG(4{-DOhDG zm7u+vnn@gzsd9!&iYO)Z`Q8Z5I?T>Nq1*N5^QwyoB9ms#-(==rS6&b zbD=&ikMZQFzii)s{R}c)12`bROCG+G!a^FC?voWuNVxT-hOave3Z-yQ{<7zRmjPt3 zTonAVfSmC{?4eVwb&?ghds3ALIV+ovbCd7y@>OC>KL{a5i+=<*djg;BZw^3>A>NX~ z37KRFkMfNzgTQD1del(afZZ-aMw@ZVAB!}!5HDbCW@Ufq7G6z}el7D;1vL_O(PO3x z4@HQjHegcNT>DYHIH%W`Y|!~Jc-1e5T5mXaK(dWH|5PEnI>hf5R^!gtRW_k$>hjju zd`$nCUOxdXsF4YRlKVM9Y$np1gVP^hN>fzcifId}l$K~;&@X`k<=D8O-9G2JKPv|w z?|L47^E=L8pLcD2$Rg1a{TwSGf#iP$RT>~KcH{A2J19(BLq9~9{g@6ZR~NM$xqtj* zJuv$rf6ykJjv;Dicq5}8%H#J`$IAb&CRBH@-<#`NFxKHawK zoNU5f^gjFye&|SS5j6EmMy$CVG*JD1=VG^a7tTNj>n^k(bMiX&93xba{r-2v5Adk)%#-nN#Wib& z1228IBq1j^R-y3lM#JSS^R0cZG(ucs?lT0L{Ebk=J`4x>K!s)ezL1=fMyw68c|sr7 zPaY0KP6&8Ygv(g=h8@c;C(9=8Ph}_iE-o%^E-u+8SzKHl;PvxEKbP5#$0s|D%l&C} zC)vdpF*Qu}o<;A*)~1GA{PbvzEkji{7&k*H8<=nq2J_A7>L|C7uan4sA;yEvGaaPm zzxmA!maCIbW8)hXYbBY^atB0G&Z6I(qf(zQ7m%c|v||RXFz{-JDgj{MMk$-Z+fFpoakOrJWcIqzfug|E~aGb-@0mnlBFzV77D>XdP5MAaa^PFl!DG;9hN#tsm!a9=`Rhk z%H?4=EQO+xKe!oJV>iAlNXy{l`BMHx-S+MkCYEi~t&M^Wf4BSsM+C3(Bga_A!6|#n z^Nn~oohL<|8Z>+w>1QCBnfB=O=S0&sVJn5d{giWfV7KN%xVdI&DAYkh8Uu$0VLa=* zRYA1$+~UDC#&|YF=&z!d5KpjRd8A>)nPz^S40#9vH#C>I{Bf>&O`R=8VgF31po9q* zG|z_@lBO}HQhzM%1$<0fR}NzM*toYc@|>BJu_*ywj?08LWqp?%iw)k zM$O^7jcKrmTP|DOx;Ofao*$O0ylBWh)-}v#KvmxCrVSWvn`V5)bk9odhZF@eoNLI& zt(JpeGQs&7zHW5Y&pNofAZ?UUVO{&Tm0xh$9G(Z{7_TNduDp9%Bd9jYT0n+zlMxcw z^a`&(MzY-(cz|iQe6um-=feh3;f`k4rT@t{Ol@1)H;7$ZKG^W7`55h?0L?(+_}$WX zoA-xvm1^m{RdVOXrQaq-(9KX*4DhQ2h_@#%ggN`z*PKJ1myd++eZQqa;@3&oH5qq) z9oE2p6tbTroi!UI5-4bhKBa4)MgSKFiz>nOGkcl#%ecX}!00pc!v{iWE(pHj^o8CjeT_NAT*tpZds|6Ez0tB&{vHt$P^?te=r5?+zPlMblxx#ij-yZ}WhAH<|PZoFv7hI0eVpIwFz%{QcVr z*r*Lr##J>zbLH~MJf-VFv^1y1e#3Ez2E^`w_X~-PbobYV%OUL*lo_!B8@I+i%tOU- ztAxF}F9MaMEeC_^AR*^p%51rASTn2nerWbiV0pC*Z|jV5iorP4`JU!9<8kJS``l(V>xyliD2 zB8C*CvHDj*mu@SWd;2T@XMcSQ($(njj!uk?m9y~KsdI60S(>gO7Cyah@=d+#+hRA% zU)Ctr+6}uh6}gNehuc6>-WdpT6x;X1az0+BqbI$AVe?+cZw}NgDw%$@0(*K=jt05i zRS-mNVk#ANSECeV=tSCE^rk+y%fh01t<^pJ)~`7I5hfh-G&R#MAWMzw$9G&3(pO$E zLl%y|yz2rIl6`vuKH;8G#u(1{QsBQyBWSlEBPa9`f_%`fTXTXW+kf^Q#Rpv)AmN?$ z(#y8K|F)UEl}xN3Nk?io(T`pSreS*3cwvw{LHh(Nf8K7~f=QYnH2SMia(A6C6LAq; zLym_&4YM@lrT}Joe!i1?9R7qoSJE2}>BQ$aSUVm>(f$)9KH-t}${O zoe1bZA-TpvwyH_`?mAwo%zOdkh;?^AAa;Xow6Wvi@wJz}8+xjKUAt%GahX@ zX(YL-CnmJ#*8=#OSal!a>S_(xxvh1+ph~2{(am}h>q>XX1c|C%InUJIK4t$LSCu;HK7 zABOf;KW&6~UdTr*@=ohLc$Xst%gxVqM<7+>X1QQjf38JE#0sAuRp6BL*7eg8?aAx3 zYrVJODya5t;BT&;*H|``qWe7qpqBWPbZ2@}uMv|A$h;y9|S%5r(n1H_b_Y(bY zwfKAcmGKiKn_s}iA3ota6-`*S`?y|6K(9#Z1&8n$cxE#bG?yvYW{KcC<4^jw)Y>#M z8?R+bI24K_G;}kpGo@1Wh%L%1@7Ap}-{9x|_{K1HJ8IX!tkunmt%VW2JZb`4%=cC83h zZJYR0s6!QLu>3WR`Lxb>vM$-ZsfNInre1p1+Y%CeG1yhvHTtj>ajQ*Sed@;Q*aBMr z$y;%qBz+&d%=r=&v1{IUiypX0yoP55OY0$y3o*P7zqvOJ!(rUt)9)B71Lv}fI@v)L z*a`Vr47Bu-QNxgdzHce9H9}LW1HNJHq}bw7UX3BLjbmjOza-`~eooP^qd8cwBd6Lnv?T~x%25r5wsSQXn>qqi<%di-ePercdZm|0nj*sM} zAy7BW%8a4Gc3A!)E4arQ;PiO^ov zULLLg+AWryEjSPSsIdIlRr*>X-(sWFP3M4Jf1%f>ou=|N+#|m`@0^ozU+Q}nvwXHF z0${JjLxww@MJZ4M`wI;?jr8!sA~XjIy$x8Ap5lF-&W|`qKj4rk_*=B!P{%xTMv~ti zvE@xBuKUwa+Z6wg@cG7s?hSTM{*AZJ z3VQan!&6Y2jL?vf(CQ~$P8c8qSGo?ZN1j=!bqt&@I}v{Xi$?#c{AKL0pjjW7vdk`+SsgTo;a7L$apKM|$8ZSvaLjPEsW=^83-w^TmQnZN zXZ)-cc5xO&6~-DYPc*p>8%BS&BI9elPeHfLHH-)QJoCJzIN!Dg*_ zOTR&15Q__MPVem;>uj*gy#<3&ju&H3_C25Sy5zZe3}c&xd6O~G(wR(-s`NIncZqxL z5*)yIXq0Z5rHyz%pZOK}m|<)tRVQlS&F9h%kSJs& z>s+B6uDaKZlJ@t~{e1?P!LD?L!zk_b55jCcop7L|w^G^UUmITdw1oy40`1}rt18h8 zlC_tA4!_%ZaGtjCwFLFd-jecd?eAg>x0;dqt9B}P2^qaH?#<+WCTbm*Csha{egqmW z2^W%UL7@6qnASVOeS))wj<39%+xn_ges8LtexFMnzf|sFOKwm^oo>^qQSyk6X0Y4~{wLCAi1o35BD&Awj1q$^dL;Dn4133nge9oh|Jljan`g2wTKDBz5S=t!I zdvnzTfcpZE9Ub{%%}Xu2L0q+BLYPG8*5&-rru-6A=9J|1H>AKE@`Y^+%?lIEOIW!uiXWfG%>1(8_A?s zVgjm_GdpFivpjzQ*AWXQtp~(#AWOge@_p>lLC860h_*p6X`u;#&7;b-M?0$Ax$E&> z0ZTL_hz;k1{Id&;&NuE3Hmo6PMp3#r9~iCbMl$3szh&@_{$3U+{%*UJYiepb(~Lnp zo8=>k#Ak0)ZwOW!$WyvotS<{jP@6 zYxB!O=kxfaSXmFN`pl4prVQPaPN1&0vfP1yV z%0S#j=%XzPOk(0xGV`csaD}!&vjpmBXYyTx^I~*QYEDeObRZ#TeRQ@NN;;gs!t9}=rebLCjs(~Xrp-|l3kmXh0D_;s;hD#Yg(ek z$BBoxOlZhp#E7guUJt-6Wva^kTgG@p4%$GCt>lL2CS_e8c{_I3ai~qA@|Gq3G^LS; zkH=)xLfS`JRX$ipT1oi~9L=LO>$u3BRVNZ^i=@-F+z%I-Z#s~0Y+fre+&HnR3^G{@P(w`1PFVgktSN0`pnoJf zLL|+>f%X+Fcq=&!&|hTJxuf6`z@lHF*I;lZcAl3|pg@_FLWjTaLGIOAf8@1sJt>-u zWGNM<4!a>xbX=!xA4z~EDfaMO3(|lk+{GD~$FwR7c}4WXjrmkbU6H>vF~?`Y>A3wI;&FF5A45Ob7jmk$%%2VZK!((>L-t8J$KpbK8r|hjQGFw>Rc4^W&E^`ES*iz-6S@3n39WRB&NbmO!h37QN5AN~-nNWheXETG5$1EZQ=w zvU>YAYXDDyxGtcp0U^OQt95O#K@C(X6zgf&7aBpx10QjJPbW%`$v_+Y02BSWbS|aw zRHBi8z3icxJe>HOl&{6(&l{tR^4Tl(RI&bgM%yRvw#+Eec0+j05r5i6hrf`Rljr6gT^`l=s9Pxa1oZZ|K^c z3&SVQZw`k)6C8BK%&Be2`F?}E^4!t*=6DaDb^Su{wU19i;QLlkjeW=G5W0tS@4k+% z%*8!hbiAH`eHftvWMXH-*bvWYB9E=}c=45WaWV}-b3MKLaG?@qe`;Pg;cvnuG%)Dm zIfq#*1YTr?J$Vg_UrJ~1W-<6|IW9qBM-)699BNCZB#cqFt>^+Ud`EitV3yeM>j(MY)+kOx$6T#jf@CcXbGqQ}N=Rv4Kq)n_M5^me%+ zH@!&9cUmW8WWy7t*NOWcLhDo5{s~Ks9kpXyIj8rERC8)C*?KpvRy;aC6$uSZ)^`B8zwc~jdJ=UG|HNLYlT%XyA4aiLm2IhfT-`2!3jmIiw$_3YI|4BmD&9mT;#@k>Dn_bd&L; zzT{wVeYHT@*WGu=aP&q-RH=R{WOaheI_z9h zR6u4qrK?6t(D9y-$uDqjN#2)m%oOOgp8|f_5X~Z7jjAG19=ZFS;@3hhR7FdSJoD5S zN~t=Qo#UH#>R%d+_f)9@Ct}+Twp${VZ|_`9lSAb9OnNwft_4b&6=aO2F7FGTZX1`x z6wnkAX^(BLj|iLg36ds(c0PwMEQ*RR;Xd@Q@P?oQ^}!E~L&FAR4KusTMp! zby_iXW2v{ac@qTlrrFN>UZuDnM!z-9Ecm_Ic>Yfo0823jIIZn5)Z<}H(`s(K?AswP zeUr)O_iHg&*$Joen66O~`d|solf5w{xrNd^eebo9aT~26?L-J{ z#E`yDDzTA(@p*Nv%lxg0SS^j|*;9D6JL2^!hnDq<1YpBrS|~*<4yfmteWPQ~QQ)~w zdq%;kqOchVpAk%&Z>}!V+T}ipj_M{=2B`Hjy_6otf_GXUOp)sco*9K27Mx7MItGyD z|LA`k5964Omf#h^8alJcT@d)f?3u=rIO8pF7twaAdL=h(+s$$cn0o(AAf(FZ?hw`@ zpM*8^E!*E$7}3}b8b7R zT{0j+37xGIJ3Tgw5ze3P?@LnJ69mRy`KVzeD1H2nG{+%t4^#;4K?_&Hi1Gre?fL!Z zLh0}wuQ9)2WaNBKR_DGjq}18MRc%#Ue{k7L33{N+t;eaVmPl+ie^=D?cq?iFpByvl z-PQT>`xAt~HA!v_T-P|6{Tp|HYDdE)pX6X(uBn*Umti2dR9&$Krw<%Xn-&=FU%N&` zzkvjOQDTF*uEA7n(9fuh-og!xka|?&)^V>$n`h!82cQfcFWR(g>iPBDzw!JAZ89zL zI3II&O&)Iy?iZP4yR}tP!uXFDhAKc6Zd~_{zqVL_S!cxv5%GiJW2ftFgG>{AZ}wRK z%ki2_r>R7>X$O13SLm!&Rg#t)#kBX!vxPHk|J2XC4x8M>N0Twt7v`pg&!YB^$}{Iu z+S{+NTz=nA!WxB8TotJ?z zhY^}xzOht(b5f1A@aL1VQ{_hZ+}uLP-ao!(iQb!ku{U};DvT12a1yo8vx;B0*;{JG z#kWe{AVpjmE1#RqVb?td*bn`~S!Eqo89USjqXq}615be7i{EP1Lvh-Az2hn2ztM%sS&PxjUytZacox`!g%Fg`&z)>>-0F%82fOmAPxp%S4CEY-f=`c5 z$%HLxzO=qC6kdE6%}!=n4E1P(xlpi6SZy~}KK|9Vv~W+35+^PY@bjnUH^T>_7Kip~ zC9qEV_Pyq7IE`naTIm`Ou}%=jCH4+rQ)|9z-qPN|6ts_DEH0WAg*>!>S(-EqMIg=R zTPO_`rO!h-NjAhR)oT=p35O@wH`v7ksUi!>{vXV}2UL@5w=GN+5GjgodJ_Z$q*$oZ zL_~TAl@bu8LnzWqL=>bWh#(zNst}M8dJqIedI`N(2~7wALfU=f{=ReXIsdm$+2j7> z|HoiB7$l~=&wAFHYtFfzcUWjh?YrJxpArhD;+--`>wEXGH+Rl3mw(Q_^nF7fPx)YT zV)ZJGL)&@H=xl7UJ7>F^KqpP(^-h{=A5VdZ5-#a zv}oXR_={*+Au!~tB_}<3jJ&bCaJ9gi2tr+^Io!22J6kION61E{=ZL(>c43YJcK^4Q z3K*Ltuph;*#xpT`{uY9$=7n|_{b*}f#IoPt6g1Rz1ktD6qGJ2bYc}n24)&%|lZKWV zcymq@JqjryRMITC9Ie@$kJ2E$F%>rT@p5|kp29xKf8x;_C?!wa_SCT`JkJ&myP5ld=G1G?K zX%%ope6#Ug(X-Sm|C0-nhzF->KC;I)i*r8+32S>w>-YY$a4>%*bqBYt@;hhKi%%Ks zHzXBQLni^MRuRF$)h>_`=is>4v2-k^MCg(0C7&nLrLA;-Rr7&27ot2+5jroxf`Bj3 z!fyoP-|4wEFCj_}xq@sW>+|B5+bOt=TrpaN{lVTRx(v$ie?6U*T75A6x+%BXX0O)n`{!)NhQZHro@T_~=MNK0|*A(9#RiHO3*MCN_W<^XX;~_t} zMS^6ZUh2Q!^3e7d-p{1{V#rL&Ifzybf=jo0ry|y0s}5b=K6@)L&qI&LY_!dJ9&e{H z`|0_4TfP`Je@{w=`ZASUo>2wQEHWg86t2H=S4^g?{`KjF*K?LDb^A+M;3vX6U$DZX z4+_b5Sxav*=(g+Zg0=T7+-$yg4=*ZSJ=g@4RjsUbW$wTZznY|l%U!uv>?u|4a`hbO zX59?V#_NO6LYqBR=oL}1$s_FXDIa_{vP;dPZgo_2^tVX5ScV{O_GUwojVaz#6u(*H zo^8_eP)ll;-!H6u}8a00BdVu?MjZL}r4OiVzli{^k*J_=N!&)mk@vdpB zQt`Bz$Ur4VML*z%&~|uyEWxP8Imz?43aS>Y zp;{6(d!o(9KjWuh+eRyU&pjlcQtl4-e?oA-V>~g2{fBl@}GFjfGA2eN4PxD;DB7ey!TSzm$+A6bzstJ2k zmxkaD&g@nnli!R1uCSuvGa3t0nf=aLH*Ba54+n2>`P%bw1w>evcwatlHGODwn~_;% zIl(L#uNH6xbMYVoCEeEz>Xv~U+9W3`y2WPjP^d9iirw@l{#*-RDlvJ6h&BJlP0yvm ztm&rVKS}$1&8#NnMr+jlCT|L@U`q4LqT)#k%ZQCPU>1$_ih&9vWF#l}I!L_Mt3#nS z4F5>5`D4vt;KSWWUYs;fYvIP5TS7^o{%W1l0w<|!eHX5_J^OaL@BWtcqLCt|pt$@G z;NHru!3!QUY0j)jW^VZm%XiCdH`7p!T#}|yN*^Of)VLZ>?l2fin-8n0s?X)KH%82r zUCygX`Exb&`^YKp)k|}9G9(zX`*B|4)yv55<*#K1ZtS@04|F)%Z`n3Pclh22&G`*k z%h-CFv#2^Lojh3dzF$tKBCDcj<@B77)H>5TvB1b>ZzEnlSCLklfiLt}GkxTRHdYNQ zb+H`rJ^F*0BK!#6<^7?A);Lku-m@ks@%ikDr+IBQ1n35=-Ac zIe?j_C;QHNtCuMdA$Gk#EdNO=5*s+R2+%^2R(Noc8`-=B=fLlZS|7$O+r!BL6x54! zKN%Wl{I+X2H)OwepnSi097bX)Wcq#F|7fUwNrj|6S0%Icm~8{q3uvtCp?C(%Ro0B7 z_bMqZese*!u8FO5o$gi*8D}YpOverNxv#7X8w?CjTm7E1dgKmY0V zj|E9h$;b%SOMn|8gm5(u9E-~oe)&52UFG#oS%)Rc5Qh!RJja?iz&((5Dg<(7;*-}K z$OOwIc^8bdh47(vYPilbm7F|hD-e>%gqY2Td+V}!zTh4L+Fit3W=E|gduv&|N-iu7k2YX^auNAK`q7uF&9ISc9p&rY6MXjs5ko4R` zbS>30Wi_hZ)x-~Vk6s&aeQkldW53tBj^tA%SxPK_zo{v|ZR&$R=NyoS^SV}hMH$B4 zPR%nJQOrq8L0jC@9jjtEw;FxhkjYOHpx+fF=xFHJUa91V^*0c$&H%*ip`FV`RHUKUHbVEE5tpG z$@NYqyVI{24UK1Eda!vC12SP)VNyiM%=wLX#lpQQKB9HQ^f#pYjks56npiZ*JkJ1} zu_d6E=l@pAz0M9gyQiD4@imcqO7Iykd`pb^S^hr$c;mz4mZkqfF5TOBy#OG0{|^(_ zLN8~V??rcL^l0`AOn^?uS~v_abjMgBGdPrS-P*0GwwIL4Kbi5 zlN4oTp>-ZQr2fb9sIm6o7Dvb%GPaozve1pBGl{60X~qxO9k(4NSZPqfVvhP=6%I$O5Qz#ajb=e|Q)%HkmL` zKd%wH=yk}!t57mzc;a*+PLib7-24!cgnhjZSZN~PH4vK_>&)&_D_1Ix&0}6EsM?r` z24<{#w{9OS%5E|1)Ravhex%@$n({(ZLcg4+9_aAMtKE-=o}Zo2GzRj2D4#?xv=uhs zI`$Go^h10L*LyHBwDaqZyqjiby?@5O)LpV>v5P3_qGjlZ#;Z)e6Bv0aif&So{ZJfv znRJc&G$*s2itXX*3kkG*T}rce1ZEw~g7)~fO1GWf?&`IAl5LQlUj97aD=V^H^;q7Q zV-YXx&+k7lb5L&>RPxGman{EFBiTyC?b3_x-9~SiByLywh=y}ACRYO30Qq746?x3q zV;o^)#MWG3ZsG zPyUlw8Wq=dDbi4DkOdYtAKix9A(oT^-&6mKTO2h zCAiI5ggN+}NM=Hc>2L0Bdf=ivD_fv}eZQx*u;T08EhyI!wVt(7EtGiKQi97$%2_r? z#fq>TGd~*Vu`c1zJS=0QSYt6AU%m3T%$eq&&BO~Zh-c}@h*O5#FYYvjK2{m8T(DMI z@^cl&oOpDWiPySW@6=EF73wBd2I?1NA-)vOqqi3a_Gb?&^;E+v78%X{nbF9v|M0T< zG|-gj#m4$bxR#kQ>uZJS2Gu;o8+>yh(2{W@HjkFZdZR+|I_R-m4Y!WQi;}L@h^G|R zW6jMcKCEF>KSh*zEo4!-x%oo7QP&lFn(P%F$hO-(FX*s7 zOiCmUB#_j<=*R;OWZ;x7P+cDrUHn3PrZ46ZPZ z%y?vMOuN0zP1VXvR-JII{!(z|+2aO}^a=Gm2nO8SKhx*rHeI8pSDs^r`r^z3gGQW7 z@h8S(r`Wg!m7y#^ljshcsF`(JW~QXP#k0ic#Uy6pM%K0y1{74j+cn$WM=Byq0D=tA z3Gs$!<6gWF%*Yl|zFK+Xl4|4VoviC41R6nRZY^~TB9!^pSMUDK*r(VI7wm^JxBv)EL{ z+GRRk?}na=$Z~i;k2E{0DaSN3oD^(Ie^WDc{-#{3v#npqd6A1o7?~f6L;u{fnD^`6 z^PVrWMMpG!Chd!8EAL1XfTmTnH1j~CU;5&Mo&b_kx1Y(c^0s65E@u~rra4}5m74o1 zG@b0;PqN)53R0~#c3&>O$i~%%b@KDeeTAha624aZ&|6T%RcSWv-3i;d2|%L&(S66` zogTcx?x+#t4D#~&3g{|+kQLaY?;{4OE2SwTtE+R81^R@mL?eUWZj~)qO8RNcnqf{R zW{fh@Y$P&-*&67UBKkX99t>YEpF$`6oq#1P#V|?9bA~8y0{SH@jPN3+KK{j80=?7O5`< z-WrVqaX{ld5qR}APE!#HUC%zQ$v0>9*Ol*{sx?2I7FGU+Fg{PSD_|=eGa~500+Il~ zzj&tUKM1LDE`F^cetgO`dq%yKWFo;$?>pWuZ%UUsP2~S7$a=|Tn_l{leA;4hm2;vU zJ4=g6%us!u%WGcNn(@qV#4csMe3|pZ`2%fi`^n0eGm1sVto=Wpy{;6)iqL=O6}kD& zJHE^?`5U1j~7P>FmO{E0=Igx(x<*_-VKWDUUs=n{Y->gZ0AOH}FrJ=g;on?1W9-N87= z+ClI8I)S^toPOT{Ff^FtOn2%>ia94IM3-t&Mn!0GIp=fvsb)T7_3SXwqdi#x5`8~f z6_+`B-4?LE=E%@Ft~OrsnxWU23M?A^GU|)E&~L=#Ck9ZG=KvhUneQ1<`n%vA(90ij zpUNoksM$Aa!iKE;1r)yrZp{`m7>L&IsRXqw-=o^Is4Tzy!nzl4NiXrVvg?Hy;QiXnXo<{^=w;4ldG69_`dIo8kga>H`VG4hE>{2mEn<% z5uDFxG8Hb9^NIi}_Z+X*#3h9Hn!vru&w;wCgu6aLY@g(36?QXt6E3LU1-s7*NIF$= zK6|cBLV0zD&msy*zi_yZNS!qtz<$5y9$@^nSyIbDsCoK~K-5?XRLyV6 z2*s33pz+juGY;XCXT0-*yXf(ljI3@SY2b?cDk%LzefcWWw-w)jJ%sfx)FYS`wud6d zR#&NL>b= z#|)?-zFA%hB(G-$2oBtK6`ZjEOA5<-t57fP@0+A{H_F-Zq>Z&eMtOE)`UQazaKhiy zAlEED2T;0QJt~FoO}Kf^jFLby<|R z)%)?(g%76F!KD|T%Uh~$uJ_O<0R7TXBdVBn94G4(xbV$(`A6&fwsRr}9hYeR;vV0b zWEx_74>TjZ%SfF!jz|^Gg~Lp))#g?j2{9P`hwG3y=Sxg~%CTLS_Ba9UZc5IbeEAK< z$`H)Rrr?+CW1o8VJM?9mXZwC>Z3r;{T5<8B2!-E)rjcd*W?jdwRQMN9SBf@oY0+|B zUpf)TqP6#T26Xdq%duh_9M6bod9oRbXi9vOCI3cKN zE_xTY1zHzhB4ZK(JIzE3?C1#J#zj94Pmc+M=(l`(rEEPf%H$;-Ti z1=MK#%`p(?yY`wFZL7H@vM&Ii4iQvBl|GUuU|bY?=%<#$daJJRzI=s^5x>2;uqsAi z*d4p)Z=c7;s7nK2ZfMns3N5DWLk&RSfV`7y#jT6EHn21-gJwJV%rpkt8^NRqTD^K<{&4EIi!g_Z#A|oHM$N$s1Q+-)K_N4pP0{T8O7{tjzUl`K`yQaaGPZ zt=)g>C7ZzHh5q#Y20Axhva9wHJ$;c3Bu+lyCM@OQDnwZvw#0p-daSGFo0h#!NM!8A z<&ZcTXo3yR=pByVF8y%!OQcn09%B6BG~!M(_jHBc@fLYNZzpCpt^Yd2ufrFN^!*8k zd>tsEig0*3ZTVR`p_jtDiyDQfPxhX?)$fhzdO6&vpQ4&%kyj0*Jqmr!>2#60hMGZZ zCLK4Q{I_R>cn>$qd|$`h+n=Sq0a#wDJom%SHkrOLU~zx8bc{MTlMgs)_0P*XNNp!2 zaH(|t$jw9hCG+09(Zs7;ycl-b8g?Ceh!<$@#M1<6Hq^#aPnCox+?m`bEQ;MhZRUp< zb{^g>sx;9EYuFU>?RVB{W0z3m_)dRzBBGd8Wx2xmTjC^#jI76S-9LL`&U}7ZHc61c zthq(>CUg0)^!5eo&p@^HNMC4h)puWLja`rcT_=BVmqL$jG4r#m+k>+4a%+KOsJG6i zg;dVw!}w@IckL7zM_IBeL%77agC48S`l0ru6qKd|?=xy*lCn;dLW|Z`R(9i3+W}!dlYAzm?Ov@zd`Rf{ z^)Q68WJmo?BX;5t0G>mn!gelEu9ge;sb~3ZKQX5nZ!(pdph&HTKOw@1&tjrvyZjwEyF%n>HQW=#+(GxpZjg z4nJ}plTZ1LQPKFCO3B{JFZXBCAIA&xSKg`m0twHZuNL>swn!r5e#&mfNqj{t*$|mu zX~RAu>*FklwQ}k4eRO-j^`4aY&`A9vnJ|e&_Nf@f@R5<%2+0QO?huq@EysH1qhRX(mNS4^w; z$X>J^!{)?qb(OZo0J&wd6z_8Gpprjef9m+Sh6`|`Rn44hJ(KS7^M}#hd)Hv~paS34#uc%tszQfzAGJ@c3yFiJ^MH2DvfW!AZwlvzN{ zGnpPp?#X&PYY{sSqkT(KU2jCPZ6&F4g&R#cSV`&>jB8IxK{f3Gi!gG%d?wx{xPST~ zs#llZ#dv3!a|0W&M7Y>WYLN0?3)O=nl>o=Xy0SSRV@ODNwuzi7|2CUOrYa4;?D_sr zXK&}#K?xX#I*i6ycGa3Me&)8jU(vVkE35T}vwMCsdc2|RG85@Jbbmj*NEgp!TFH4; zNRGBG!Ki2S+b4`xC^3R-Syg_U|Fj#nE;Fl1!N>OQ<{2G}yZ?gP^5>{d|h{y}@ExCCP4gtXS(BGY#l-Ip0X zq6`&VN8W3xQ*=H%Lg-tnfEIS8ezAE7_$D$k-`HkLl_|`ivXyJEFa%`2v(HN&&q~h& zJdaj=e-caOSHSYA%B-rY$D?n{keS(>y*|h5$7R^Pi$rnPcNjJ56w=)`{pcXvnkCEd zG0fvDcBiSf`WQG+d)ujYUtpK^S}2W%&79~#+a>BcvuKqjNrSzKm+%I#&MkH(VJP{E z!E)DZN#id-HK^ZLj=kM3^QeeL>V|uc$Q{z?q~=h@QunhC&X-l6nhbB;UC~TDu(@^d zftG45gAQQe*Nyw+=MyJV;;-GUDyuCGIo7rFCz+%(m9vt1Y%m(J_mtFAs?`h`v9 zA+ElVqzRyu;%}?FG!hdQLcqv19R(&fgO*z_;}T4VK~vwlIBl}E2LV&YYkHvd4OTHc z59!A|3QLCIP}kW_SR!>#u6wX~2RlBefP_EsOE#Mr?7dEW%?BL_wAG-VEP40o;jL?n zq>aij5c%(S>6C4hU&trT8)AZsKlVFoN=A49cOa0yw<`q@2Pvm!4-69|y7bT6mYS{P zilqrQJIDtQah<1Ccx9b0#57jkQ{ciO*WaY_`E}DpDT;G<&#y3g7u1taR7(w*8~kREp8_!#5e_EL8mVM{7Ug6OP`SZTLI2h2dHOJ$SuMd`v$*Mvfyy zSVI}_#IF*4;~4c|GU1N{%Wo$sV)oa~*=Q^Za?-SX)GgNES0u9TI@&pg85t`Scp2C1 zHk;HUD;*|%>l=g|V@f^`|Aj7C&2)$u` zm$r(bSsEQ@`%*1JA(vmNr~F6rm)57`lmB`;Hi;7y=lW_|pXMCfQde=j1S_kfzAjOEjND%at?eZ?k;+nx#kr>ZfMw4YN+>@t{07l9CqYT{+tRu!z>6G zpXL}Z&V9WE6l94m*8fV5o%=uJSa{nc?oObV@^0ldmmpdywYCbwlw4Mo$SdZ~(`(F?zV2<4w(<%Ibhm~IuLW2^#pI%0q6)N5 z#fmr8{VRjbsNVV|bAspupUgzd|JKM0>c5^B?eHjAF3`I1O#UM3`@Gv@${B5gShFTE9G2{47cUKCBw!2Emi;P-S+WKjn5FX2K*?6Knimt(7Q z(U-~O08dOYElps5j=<7U)k%mrrZWaGdsC^j5;cjyk=uS!_Z=R1=e3A4otR08 z{U6G0j()AwLHBy^&*(oZ0Ux9uktL6=He(zpugZ!t{EBCgSG=bAO%YLQAYPugpV@_=u)8RxSihT@mcPJlVuM^0gqFMWTL6BS^`AMfD`{q@dSfoUx( zn5Tsr;hrSlr&uZ)WuJ9?s_B*b0h9FTzQtLo(9mn-*&8p~-tcXouVKzTc@YfK{3+x* z{ZSp*pjO{vofEt#OB}M#g{+1jbk~l(W1e$(%@*))<|bZAf~s$d@PVq*WQ&uo^3Qrf zzMGntNW(Y1mvR6nE-S4(csb?NfuCyo%mi^8FR{rh^Wle^)kbW_naJk{j}(@!yfF;i z%@&LmoHX#Dw&RQCNJ)g`CT+=H^xvg;<%N_X;8H&*sEp|xyh;sGH~qumG~TF$a~AUC zWbNg^MC%am*^Fr?8uU!TZJUBf_1T63=DpD886LK6OHR&VHp=?w{pMD4>IW%N3+H6+ zb;694lgHyUJuYidk@>%-BN5pmmf+>RPZ}u3OTAJ>ka}EQ2>`D1B?9 z+wkQTg?^inIdwX-RmL65}Ep$PH)E&o+N*9)yyyb2u*S00W z(wnPi5HI%gjSXD7O>miVg0{`PwjrUMYt-(S1ZY!b2X<3NPM*B{?@g~>f7!Tya{TEh z?^@THM43nF%;{5zX}Q+fv(Tf=rd*E;77}}_O1f~+)0M2*K_NfF?Ly}IBS*)al!vbd zb(TI?n}-(|6*~VvhF3YzzlT=~T*m!JLV@XJW^eg}RhJHU}x&Ai$i4f`e|JjBTX zXdw^@(Td}Gf@92$PchWbqk$6)zNW^9Ogl?M#S=@hl&en%qL#a;&+cPoeguyRs#w9Q zv1*yHZ;6#(u;-eYt{ihd4A`VTYto&g8V8ZWuuyh4bFBr7;uDoLVlT6pk&q zHClgDa@8b1qA=oQ_$p8fDp*uv?)2lPR%U)lC;M@c&GHM8!m^-dph|9dO9dlr@k%<> zM&>)UPD`d2^V5Q@=J6PKNghqVsM06 zq`}_hhgCv<@jqB8Rt`whfN8yPk9owBfz;*Qfs5e^6H@9TmaJ>ivCY&&T@}E7b7uJ!Z|=D=KG)j{7fX1?K|#%m!p;EPca$y zg8|i2;6Nid^jdYMcB(k#iXl5!ye>(!I4X@sDcNgptw-6MV zkT>QWhxV@32Ol>#?nZLzO=V0y1ZD(j)(1v`Z%bP5kXtK_7|Xe2$NPCo;dQRnp@}O~ z7f9dpc6XG~U&Y$L48VW%LH>uBWeCOhcFtMXatqxw+Vg;oNzBNh^<>bdA(lbv}s70WX`( zCUHjVU&rr%{>gfifl!m|wJX<0&Jlm{qZ747Kk2|I&;~HmW z`*IKZSW7u8;62YnJ6vr(?}il3XsTCa!iN(cya9-JrzRQBXH|lwFDQXmj9z-2^q;@; zzkak*oDTRpU%1ARA9)}@Hi7WjscSdVU)$$@}D0h z^=FVu`0sd)Z~T8LUVHZ6S$Y^H2LJFG8^~@N1BM6@7_g#FcE}GOy`v~zuk@hf1}Id} z=y$Y;4Wq*-eyu>hmWU2fmiC6~vK+o69v1^J?(heq^+_UlRl!OAO=j<7FbA^)3W0drv zamIOgg8Ac_FKZ6InFZi0w;5emJ%|KqbTtTPyfd!^^?+=<_th4?iKzMF0l^qRDo9#u zRX108VNua^J$sGbOs%|A66Lcqq$;Skq0*8R#1aHtK0Y^(1Z zYnF=cD8l|;nTdgX!?PP$m}H}AHhLZQ9w~{PxiL1k=Pm5}m4=`dir!i(8p~&=I`&n$t;P71!%0y(t7(&};#*`6egK zeagYcBVDpn=05+QMQFDXgY6j%Q49T5klW|JErIeYQk>Jz#%H8m)Neo$y%I6>=yI) zW|7_GF-=Xl#{;O&GEqiE{F%4G80yw!k z7x%5Zn1#5_-*DYM>mLbNYcn3>5AV%J96wnJL+DHdB6L6>cZJ}2QVGyHS064P_A5dp z0AeKa<=Wp#LP>2RR6yaq7^L&De4YyXQ&>r-zZd4TrB}!sOl%rEJbdeh#HR-B(h~-r zc>Ut1DW#v^FC2-R?XLvtW{lJD*(&x_&h@gWJP$(a(zQKf;$G&0G~;39(aK(+tnVy0 zxX5pN^2*wR8N7a3dkc3-GBjL)z~f(4r|c=)*Cf0P3@YwdaU(c zkf!gpv}Rculo0RMPJD;&3Tzor`8Dr@IQ4ZMzOAitXpn0}Jd7TkY*a|V&Ti!(bB^n) zPy4}8696q(AV~Snt9#C-Wy#F3)U3?>eWQf^O&j6Rh2xjOC5p3zI;3dBIBtuGZ|pai zkl!nZ8o`v2_f2JE7*HNd`BB`TcOmU92>vLpk72l!+%jknY+&UR_GA$|a1Fl>8G9)1 z-Xy~+Hs5+m4IqoXSSft+zXH_%dBp%89Xk&!w!)p>MH~}x%NxC%2LJlR+hA%Nt37oX zJF#LQ58TtHbuNAH1Upeu0L*6N*yxtq^#=lC()fN z9&~?J1fOyeph&rqhX3h}PPPz)dwzg(6DaqTgeSgb6Hhq@2Lo4n5q42L$z?t}Kuv`Q znb!@sx^Zuwkqp{&MXaR}XKO;cMsxBAhy0Kvl!;?Au_MK2F2Irn8LUAhggP-43BJ(I^XHfZ+X;lP`=S8{R-p**rP>dL|8MOc&3|;Qn zC_{{nj{C{>!B@fh7Ni`1lokA|v3k&C;dG$0@IGXe&;Snm6dWX;FjLWAMcdlJN6O=h zFSRZHHQ4*Fz;ir+m_r1tbf8}dk=vTtxO3R@QQp>HJ1#A5$j&WI6+*NEYH#5Bx*84G zocL=%cIB;^{|&dPdm#S~DcL?tw+OC3ujvW^QW4iuNj>9fj{(GvcqgI)E6EGzn-KIP z5HVUslYV>hdknAi(5I_`djw7NCkVE*#D7_Ur!*(zK&wt~E}U?548h>i5Sq$IgM~=$ z-GI+QDA#K6XQ(TYH8C_}8V>$yDu#79HpD*YB1u`ADoZ!FkUQYec!kqqf$|-|c?l2j z+;#`s4>r)G*&NxUzgT3u#~jL@LJrh7nu@xY>vyKPViy(9Q2;$pt9tRXe>)cEzq(5Q z?%EE$2JpOWd+niU6Q>T?f7^Y`a^cs3)dfQ2DGa`bK>oB`5Va`sxZ$=Pxs+wXk2nb3odJ#&Cq80U$KAQidCd)Lp9ikw8r2M7n=P>Y5h(b0yH?}rf{QhbUB<7Ya_1#Y_W3`r)4fNs5Z~UiFf=DRoC%^Gifu|W zRn#7=nUf+um`bXEQ`^8xW%?kiE5yfe{#KU`oJN{eq4LSFK?%sp>nVp(!_6rvVlT+1 zJO}$>JIxu#)f@a>+n%l92t_zel{p&K5B(7IlNYb7Uoy#nI%N&z+5cHI?Y993fZOX1 zrs^X(t{MO#F$bqfKL@%jf34L2eLms;sN2aPbvZ&c?;gy(dPVI!!lG}o8+p=rk#V+J z%lc#&wqA;Gj|A!hSA`nG?*ien?Tb2YO2etP>~8xY2%+@jJ*2v*vfrm6f%nNTt~^+2ddXpD%P3+{#0ESPZz&&(8;&-fLmwIJPm zT6=&pg0TTV>D~e}@=_zmIg$BGX=GoVy(&|X9=h;W88ig4ysSoz4v#pqWF$3M>k@bl|>*WjDs zmcH=CH#2Ai2zNZL0|Pdv;jW*W_NC$P3Y*bXCTzYt7PC2}@WmnWbNW^21TSSaI z3tCH9*-B}tO4!^X51x8D(i`SBK-`NWRw=A$L7cb7Ng)#$>+5ZvW47Ed#8phTQ95c@Xe*MYkRoBcov z{6~;M$N;Eh0n!#)Ztc(fc)P9zX#qDHR9^X1_Y((evElYQu^UeS_FjV|t|{o4n)mw7 zp(JJKYCFu5_y2ZPzl=TxG|NN-;)JPa#28hUic7aqLNG*&jKEz<&? ztW*pA`2x;#4<3qTM4y=CxNbG0;~_;J{xkM*J06irn5slm0CL*Z<&fb2!kYhXu?_JD zyg$2Wm>1w!fi&dO1%y(`U>J~_2$1XXdx42f#TKoH)bg6}mi0&IgNf_{QnsiIsgmhy zf>1A)7!Y*OucPo)@mjoAxE9{hiIz=wm%W^p!e+M)9ELT-Fm!*R^#h~Ip&?q~^ zUI=?7W6$bYNl-uMm#vqCKd_?)iW++Q`jcUY4p<0b7GByDO^4oK!U|aKUROS>i~u(C zFLB@B^?Zx+IL*5T|1rhDI5Fg#IyyhJfpdM&cd)5sLuso-n@GWA6I6|p->X+8l^E4G z-cYkT&vT3bQ!Gu}GRpiLB6Vsz{<|{%->&%#B#1=$K&J%x5FA!?+68NMy%RQ>d;7i; zzC?H;5^_op%CD$D5H%UZ*X<03E-fIMBu|pA!sn`B(wRQv0xvH@PU;v)kJrf}ft`|W%b&5{x>A5xr<#_;R(QU)AAsBnai&HlZu ztybY#OQm0gW1|$4Q6x6arm*z{xH&L<^S6r>n153~(E~#Vos_ z&5iqAnPakWloEPp&*lYlJaC^|XyfSwa%vFrzP!Lk3H55QrcOBk@o=LwR)WxaO%Yiv zfFQLcNOMh|{4i33U;!uSeZv2on3>19kVv4X33CjnLREr45u2Sy=$=ai?0hN68CJY% zC~Agv>Fr1+a*&9v_Ric#Yx#FH*ZJl!LB2;Ic0By!rmO6WPGJyYW6Qlw6jRvHUdcwz z(Zx_XKT=1%eSuw?j|4W(t9_CGdtk$r1U5M4PGn|b;9P}Gy1`1$(7_<|d!&Ctle&9b zZc=~!UYcIR=~|drOB26Mt|iQ){Pg=P@I=%R`g^y^Tv&CjM95$u*f5Ga=(W;dL>R5@ z4W#z86tDWhm3Rv)!&#wI+~S?ygf-wNz3U|Nz>{ZH?BCc z!i#sNt#IF);>k#m*0SR8-rr)_m<@oevhwiS`wUV~gc;wKO%FiJwnCO8_KeHS%S+>l zunDWO975I&_ZI&wfJGTAPmntD0sZnu_m)-EZ73gw`De<=UBj`7$+asL&Bw-&8V8c%eTjhJk6S$*C7ugY+r@6TLlrhR($P4^C zKi~uz0-heZTBK~TGuzNx6FA^(U=BIVqgll{sS~~1x|Pw8CrXgvabGd~Aa+(qX=_Zl zST9p%`muipE=6$N-4a$}pg2KkI_7SWQkR)sCs%|3Y9&{SsOP0~`D8HzhrmylAl4NXExSRr zMy>p(%Ets=fC#=EwdSZo{4`~%qNG&53uMhQ^p~Y{Xc*8ABY{Ox5$#OJ06qOgnOH^h zqyD`d3%JjQVyQZlywjLRurpM)hG@Lpe1Kf|4l9G2`?vIZ4ZFINjDa8a@Tw%P89A=W z%^qiP>;o{!#_)!9g<8ijo4KR(0tCP>mjdTOHQ5oa8id51#;OFr9^pKMNO0nyZ$UNk zo~Ur-qSl)6Bfkaj$}tqQ^8) zMJ;&>x+qDEvZz*_@ZYPVIzYV7B-<-qV59s~{j-;16O*F?R+D9obDj6fLc+rOnn$|I zM!cj&_mB0HPp2+TGHEWYW?m6MQ($ra@!w2Q3jpcY3T{6%pisK-yWQD4f8BcG0+9AD ze*na$pU%!{q^?79$Lq=A->FuWZ%MqYF^^JL>++v$2lS+*vX~5b*Vpi!Y4M;((^p90 zrW4G?uJm`ZtoT!sc=A>=u?iaS=amx+6BQRF0|?$_aFa}I&O1Lkl3F&xqj}HZ@8lw# z_awD!ZPLkX;Z09ERr{J&I0L(Q+ z^-M(C_yU_c1@>67*cIAjq^GM}9Kks2WXiiW0g$gR&M$cPUR!UrNj&A9aA0ONNCrk; z3qJSX)NP5 zco^L_Nl%*oqE~Ly^2mLt)nlXj9BxVDokNX zpWm}Pcif&>|B-&9&zp^+prYUI16Ls{AW}!(2>yE{X0ebIU=(r7SZMOko%Tlgu>iL^ z;rS>eELg&K4RFBU8eC%F_4AABD z>cFKlp6mEK%|!U6ySBhd0;Id{U=!wC!oqSEuESplv77AVkhOjd@^iRX4pPa)`e9@^ z*BfM-cm;y2-cvO`$n1^g0(NN8@Dqu@m7#9ZovX9sS$NU{UI@cpLuJa81NZBwc$l%u zc8gB@LU?x5`q+56qVC@5%vw>Qkk5j|!ZXqWTxJk#vi75De3aQscTTp?v7m%_+Logf zjK~2F9hz^!A_#cwSbO_5ZBPW}udc>}AgaUaljvx(gNe6AtI#(6Qh;rBK_i_BzH6Uk z-^Qi?MNHXJGye?jXm%);jj%JRAP8**PD#~;VkIKdGS@BI#O-6U&wI+QdQ4WD_kZs= zj*gC=1(=`gf6kfe!*6!?0HHgoVc8&7qMg@5_1c z_W?5r*)ARv9x3C1#5}gG>s1}H4eO0yAQ>26(56ylod4J=xGZ zzwz_@$ixPs%%Y6F!Ox%hTYF zAf{mgST1jfk)cX22<~wWLa6;xc&7TV>^ne~if~V#XlTmxM#Cz0-+(RWssyHPLI=0v zQLYZ=y}1>uxu3^wpFEnPI*jaX2|g}uz$p0@Gz)zR!*gIpD@=|u*U++k_8nJlzRR*n zIqj@v*NH>lS1sWmU-w>Wz98GTH(90QKGQnNxYXAd_Xy^9uYX`*@;c97NkQVE-vbmt zd_JYbH|u$?MEV}MLCR|b|C2^iN<3T**t>5lr7gg3{be_h;DyPSV9@T@rApdKPC9W0 zilEW8?d`XidyiYRcsSQ;krt!<2j4SXSp?gADcewCbKcDMyo-{i#pKibWi$H%) zbwG7yMhe=5;Pjb;H1wmqJr-J5IcsZ?J|S<~w1x6y+YR*e(&6=azK;>5vqFI_YV&y6 zvts_%Nc)O@Bd0Gr-ojF=(^g1viyx5~QlPJ`yF1hu77G17961ix$@SN4s;vznW_#RS zan;cl%MkbH9Y20S&wD%k)ruswC6R(2%7T(zZ|?$5HdMc33lfy7psM7z>5c-w)qG!O z{VlO}Lu|r<_1UI8ma<2P63R0ICTgZy_kpqh!Km;q>X@BpW$D8qkX2~!MC*`ZaNe?@xY-}xLb6V8>{1%F7kftdjK-u^LV!)qOxi= z$m{JVANunznAdmQ;oQLOGBVw=CL}#FKz<#)FaN4EcBQv{0A}WIS1yg49S6j{m%e#{ zEsjiP?w$L$&W4JJ{fYcK{Fe&sN0kc?T&OAukye>H@oy6R<-zH>nVA-r9`g>7Pe%(G zT;`^yKFQsUd;b?C41n%vrep4 zxZ*Uo=WqKNWK{c#?);tRSC1&zd5m<*uEj}S5x;&Vdc^Yaz?}>FHyhjXoiB*rSQyOL zEvDnh&*!;B`yeSGV7vCsnpmA!#fG$J!(8g*=w#w(fv2CO@xqR4&+b$OadmfBv6OUthC=HP;U#qvgEe zp}D&}f{`gHt2&oQ$WD{cDxUi5AIL`{C(o=H&;!PC?DqOqtvrq;zv4T8|JCt-f|OFw zCTt|JsdAj$gW@eBkF#CLmeHy4>HL$L@5k)Ki_ZV&0>FPIH~&~(sB&e+Tu�o9o+~ z47T>P|2_!*xoFEzK`;>oBj^9Q%a-$>^sS~AQ61}Yf?4q0l~MEZa?#cdD@y`Ta+>57 z+ExpP9Gf}!)t#_41b{?q^F5c=FM|D|9sI?%VsSIrndG>A4k{X z{JdKD&)FTO?n?wO(?Z7s&A$X2cMJgteMz>r2@7k9k~Sf{l-pMMX{LWZ_TjLK z|5xc;930A6^jZh{E}>gvUm>dV0Ju}G72CkZuM{oSf!7pPqW_OM{9%{bMT+3NRn?PD%TpyrTU&cUt!@o8PbCRF zi{dj9@~-b~u@xtJir69Ypy&b_3~QL12huSxXgpN17_$VO`mg1lCdm}mDN0i?C|cAZ zUBT$T^P>Lt`j4CVA7AOasD^E>KfCvR{?hGaz6G+Tpp)_nM(;Gai!fzR#Uc?_~&H&?h14 z;Q#epic?HXKlDioSB?Fhxvnl}<)kv|rJs_~yNw^8ykk(ap1$XUY%PMA6QKZ8d3V$> z0(I7Yhu)a}{d$y%z^ye45n(aWe$+YX3d@T+_Xyq;JFOh#HR(dPVkvd!@PNcyIO4rU zk<>*Z3A*;~?%2#0S^wwqh8g#E36jJ$XeyzC!a_8NS6BhGHJ~H(!xrL1S(#keAysx zXKx>M2Hky@3O()jF*`e3XA9**75OxHVL93EWmez(C(;XjlC7oIQOs8Hl6$K^!#HaM z^Un~tBoGs2gYC(J1_|9OxtlKgIt0wpKB(Z%cgjp(-}Zqe8REm)v*aC~n~z+oeM#LE zVYQ!o*iwY~_#%h~o!zYDO)|jPT;B~glS{WC!0UWR2IY5bKf{@PJnU3gp;BD`W8(ko zdTw_E7In<~=8OnNuQ7(V4pvj=<|S@c=M>i3ScBzEJe2$__QkEw5|QtbuBBR2CiBjz%J6)}NKHq=k9uWtKT>}o+$|4lVX4z?h*vQyE2;DDQ`HFB+x9}@<6Xt!d ze36&v^ghJ{!$CFW22B@N*Bn!I$c@f=C!1=#)DPFGOk#{6Pto09KVS388Anjb*Hv<(?O*CFHQV^?d1EEP1M2Y%GEi928ysl55`SKg##c-YIf!Yg#Dd?GtkTJ(|N(R@0e6 zPrw)1O#o56kKGLphsu|W)M$=+U|ZU zbeilc?F05JzG6%7^q=RLPyMRUyruX1QfB9WA5PJP;x{v2iI8L{p=|RuORwXn2> zt#a`%xhWx`VYmCJqr3avV=-PzK=vo8E^c-88IUM`uqDvA#~e$Z!X$>7$b2oiqwoFF zK+DM!_tg}pkP>=gL;MrR3M>-30X0uZHR*z{e0oeQ=$+qMe_gOtYM2`=?~vS0y%~w_ zNNYgMK9|b24ZkpF5R7Q00_0ui#wc|eB7wiWqgFx-|34Pr0=r*zAsrwjd}8U&1Vl^1_=JM}V^l!VMX zmXy3JZ)-4X-A>p_UY1)VeFsK#O8q*m-SoZkCvG(|_&u+IGbXwAnhj6dl!a@x# z+0Cy3?yxDZA~TOso6|_6w`RW}WULW+Xl>y_Bjo~GRT&6fSj_>3N{nuzS6tjKBT4tkmMEW7kJ_ka5{Mx1QFw3#bYnzO- zDDi@s*}D}CkKULVv9yDZn@fAC#1PV>HFiGNK+qB1qBK;Rkb-x~`C z59{A-2uj3hzSo=|c^vlJ`SJDI6O-t34{aOGoD?=4wp**EDtE(Bj#hc6uTIs0TwKcy zc*-Ay>G@)AafcYdrAugT#4V>Q8D}<%{%mAKzvtY0jkoPRv$v_qNJKcy@A?}K&71|e zXvcHpr+VtnB3*<*z;G53N~Ya=wZqr?=O}T`r}nGHmckKTLB2oh{gdC1NNRo=75f$$ zjdOf6xS1vh&+$8Alb`b#O!! zu$R>63=ONmTw7>#D~*Ee^%ir%_-3DbLT9=EbmE_e%|gBXKX-_JKL6Xa0H=av)3b}D zSH@r8eqS@;l(;#`_Qp@KNKQ4BS*R%)BZeC*Dy=i<+%edTzm?4BI?s~`UW9SP4oTi@ z{}zuM@gG?=#{KRUyJ!x?aA^o;sa9p~S>Jm6lhkc8lcPK?4BmJPCJ}- zx>zM1#L;~#_1dC&*6sGz4_qL=Y|J+YPTK*x7yg)Bx^(|^DQ~K%kk#8A?Ohk#KfZ*+ zG}cg(8VeC~oc^8nqatIO(%W=2^6;hG@bEBv(sO$9NzBgBsl_z!(gW;pQ(Rw`Dr}0b z&!w`HON8yb0@+^(suHMP7wy(=h7-okoc90^Y+1?zJqHEWWT~?7H8t!@_`f0V+UJe( zYx8-j1@u3J3@Z0?Sn092G_bCP-x`=T1uF)7N|-)4PREUFk2(07>L8-ikRMTIoe0H9;dzS37MUr@PW=~=t zjT|@iLa)wPI#YG|4<4H=>`wK-_EJKoMMt`@Yx0?!CxAj}WUx8)RUn^q%ibMn#%3pV(LdYr-%` z{VsA?yRfL}mSd57@Wa-i4%>9!>ln;RAnW;ieL{ejk}yD&|H^1%^!PCE!PI_+fR~;o zxxfjSgwpR#zMapO2UsF|&FRDdxou6&yZ#V07-%|{G9lPGz_+djg#NPUnq zkM6y+NK5|;bjxPD{i3Z!T>FvCCOByzjMZ+!NGME@?`v_m>l4idNU+A2?ftL1ALY$# zFMX7_!DYxkNQ^Ss2$h_QDA3QtUWHcAQYOm#)-roeyqI;9vDsN2bvZ;2!zc!YO)Ax{ z-&;CpvODB<62FKyOIVA|zrDDBr1?>k$OD`<#6?1_@?S7atH^!^fP;?%lJ4c5lT0W= z0-l*zYW3ay1|Ar0jPQg%(8{*fmbuUx%m0TGM}7J2!AWM8VEqH zhYT_EWC;LloO8`MdWm|H8j17JL)m`>bKFcHHsk#sFAJodXOSuIR5e_cAe2o-cj`6z zZ6&-1s<&C`BX$c7?#~L23zu@4el43W`CNo!h}YN*A>%>@v<}n^lJB&!?@WhS;~ecM z635GJJ`|h6v?QD+cnk~-MC}K7=4^j8LrzbZJ~Y(w-&Zv@PORYp-jMdQ!P^tJrOM{Z z$qy=x0d}=@oF-#t4`%bLX7=6gJ6c<~XKtQaj~+7J8U$IYJ2{o==1##XD+eZW`a$8} zIoSO6G61HbI=~^j3LMqbnQvUwl2X#p(2$XnPl8~8rc-OLK&9d&+XDHz*Z8<|lOS>! z$`B9dB0h62;>BDWqr3R7q$SLGT1cj=6skGh=zBQKAuQZGk<*P-rjg@pIzR{`fXS>F zRIfr)_YM$uwS*eBe+Hn#Ww+iQ%yTPH7%{BhuIyan5wXWc*RKs6t83@e_{D6^KmRmzl>AU zq{_2&I`FEPa2tmn-o3k3LUR;2cn$(CQdwQ#SW7M|(tXtP3=a{*ant@V^FoIP6}2g~ zs-kV5Fv{1G616tZ7sg#yKgM%9%-)EKOy{Ro4U!t5-J+&X*e#4iIM_3BagX!gO|(zV z|LCLKFyLEXV6zPlR!%`5a(ZrQf1Bx|w@uH8ik$R2CBs@WS?gXj;k!Gz)$4yG_7sSJ z-&__`J+EX%BNgkU4IVsrOD&Y#pKt?sQFpnG%92HJdAzWtlhamH8h9ab>!+r$x|UX0 z+4_b*u5JRD=WtY!8)icLeYrh-6kU)AoEm_Ce<4HQktT3zY9fhQgx0OgV7>|aJ$&7B z$Zy(xM@&S)bKNXa!ZE3dgoOy#%67z#TIh~0Dy`*vDrei1VrNF|8$msYTeVe)RybVN zK5|VEQw};8Debz@{xEo%#*8H0-oYW~lgMu--7YzPe*SSE+L=6s!vfiZ->nNm=bKcr zCWatj-;HG&r3o)&$=XzUxZF;{!S7&&km@Psw||z3R+f0ZR&3B`+WeJqj(;rR1LB|o z)a)&qnmfTEEvmVmH}U{_0`L7xip9_6S<3HxZL*$XIvd)$zpJ2lwDrrmIFHght=VV9 z`aBChtmS{=l*iX&Z0R4iH*XD=AGBVfZ#U%lUeq^-%D$cC^DSy^oO@1_Eh0$w5HZ6I z>UekYOb4Sab-X}cVf>XBhYG0SPN;vK+8G+YDAkR0_6ALsmFMEKK95tYVe#*FD<)^3 z(seNU*3PHD2)4E^>?;#v-n4rVp|`z+@VdFXva-T%N(@^5w8Iarf3ymPRZ@2$pI1=V zSU&$k%95r{R_We;_<7tZTH$Cj9P*~p#B1ga!+gtG;b&E!ZC)()q`GW=V}S2(G_kqW zx!Y_^H6Q?=7FS7ul}au?W=u#2f@cWJts(L+$p}*KRB?y66rZgQ6>kQiXBboZX9^8k z{$~RkS!c8yhD+(?`0CuTha3LFuxY>=NfC?@GSVfwMoZ5D$hs zOiDohF%|Ndg`9r6yf%Gvn^o-YZcIYK~3)MTY*A*Z!Gh!xY|bdphigBcwX! z5k{W3-LtdUwkIl*Z1HXXaQ1DK)20DPz14R7YgvPqi%J?F>EeOL9kPXsQmJk~)wkB` zHi8}yv&i5MP`x>pO@3`hCB&$!sq@LHZX>4Q%u}P;s(N_FfVH(X$?A$rjrBzJ3nkSu znvt@>SJX~4Z1VMkCXq&G7RKp@zPtzKRNy;nyrUOeoDvk~E99C~j^XlMP<(>4~eq}u$`!!EVNn^C^aqv>WXZIwL zhp(1?Y%TR=*|Y{xwY}X8m&fLp!EbHUb3|XizqK)?Yh@8m>9;d#tJ#|-hhW3)sRnbr zreTt}*CUI%XWZo1(3LFN>+k3Kq%ZSsCekL=se0xNzBVr@3c(~h)vAZdtnhGz;C7pE z4fprV!gHJ4mi0P%s1H8gt|vlamC3}ObVZDfnP3(baHF|LL-b=N;rA5$k9>^F9=&mo zzF`RhabqwT$00J)vX`5W9^b^pTlz^`RUG|*ux2!&$u1CithC&&MtIOAG_97y=Wk78 z!d6EsR9~5!9$pP`M-JWS^kFDv2>$al(BmQi32)Y!GmHiP86%Gyyb)Lc%jn~-c#OPy z{;Fo-T`>ggYVHlCIhu)g`Ta}jbTY;Knde{Kz8n`Ixm~&nV5!5c)w9s8c-)UePxvn- z;zRgCSLdLE!9vxj*}0c2qE7?p=;^V;eF_9>kwF920;qEw3;!j}^pxSR87flF>!aGt zX!oBfsa#A&bsdlWy*0JuG33(Ewjr~+9BGS-x2&fdT}D6W+?HLJk;bhCe%qb7#w+Df zX)RSh`@1^ZsSL{!tuMehXg`nT$kX_;1pk`2yB3TXVE<)rA7&gdTDY5zJ5cSBnmdQ& z!_0mVz7yP8(!Yg(hyJdo_A<@eAwqGd`;>PjiECt2g?5yRGn6~WZYcuyjSW}bd#?%@cVroK{Y}nFv%#KU0&*S|NZf4>*j#d3s-f)YZ^6Q05dW@vn|d90GcMhI*&C` z`T7h6>xZdn{zu?BVu~U6uD?zyv!MazX?Zyt^w|hPiLe8o2#ullVpiz2M1}?an+U5c zu>1+nbhxe-)XU#<4rZ+$NZA&zX=ssaoezFdlhY`xoMgt@7ikwPrd_^86Xo={RR26! zyujZ?zO=tsrNfEB;Cp!$oBu(XLX2Y^YPF)blPlirk&wsU6#)?iU#wf-VtZ#NrIcZ~ zgaUv^0^FfZN457-WyGygsUfC;eO_~2iDC#KYVU09epfisb*e#J0@_bC0M48*ty`xw z%467G+P=uGb~h4Q3&^x{S~9#=sqPus*_^*7h8)8cj;hUk(StWIRw|%Qs!IvZ&YXt~ z7B7{AYL~7#RLr+AImZBHwFuDh`b&4q{P;kqn9a{jep0|#oyHY z0(ldqKc=6=BJ(5dYU6ci{qONSn`Qp9h}m0D+BX-OxwV2dMUy6He~VvMNVK3&0imkK z3ppPzk?6hyzld3jSQyjj*xHH6yyz7BM7*+|8jb|XMaQR48q?R+PtI+sg|o_yy)G1P z*w(Ar8FL8j8xtF7$FAK6{Nl^28SI{w($dO`J-USD?&&k>H0t&&cz*Ax_V z1`UZl2oPLMaSf}-t`q@X+P}J!YRB+YbDCcbPN?jNG62esa*ITsLH-o0QseCZUzjH) z-}VB!OBeJXc!(aKLr~gIXkE~1s-F#(RKIHzs|&C#s_*p3qhch`W@RgyqfJd?O-G1P z=k{DE>@qw=WW)9y@*dDLD~%0Neu(cpN?8?MyJ9A9rw74oX@vWHYRP%EYGLQ(2LJeF z-{#ks70=hoWF3Rr71+_wruCZf>N_!6oMIr-nda82NX^>k z@etj5)8G;G=^2s*f872w(`iQsB|xWM-pGg}1CTivKJMavL~^KhCW$*7!0~lMD3c^M z-n4f~XAgI@fjd(?1tfXq=gp^&+)DzHKo2T0+HX+XY&m+cs_E$sST;GEyopa?qS?4gKP<56a%u3Z@KHD7C!5{M*~bl*WOtC ztwlvg&sj2b!J9Ud6M>+vd0jhg!mFx`*{%lr>p5`8IDAJ{!f`CyHdKAO0$7?Y!2tfV zlS-@RdR%j&;A$v>%GUvuf59T=sUa_3`9ct|{xytcckm)`1cfs(cDM{^8?(6E9z9@Ukf%=)UNaQtP%(JRA=Xb0FIuYeLVqTiPiFeI{KEfY98?@QK@qR5?+4kw zUKDfK)jWJ(a&7d@tkf@_4xTG~VJ)rM(y(~SP9&;`jXr^4Q+=Aa?t4y#SGGG#5-jZ3 zRIQcV6^vM~EC&EgDMsk*1I*amrhRui)?m}D&wachm?4fB7_3ucmmH_hHPY;vze4cwkLvtD`m__i@5Xlt zpFSfA)R8@22zM}cG>-P8SpU`DgSqn^z3_^((r%zak>Gq`c^;@Fz3)CY`-rbl+I~!@ zOihkFdp*BvAkl%`a6BJQ*&Dx&6x0gVRte9Kn(z@X_leU_VU!Vo-d?xA2@Zy7rFdC` zwHqkgi<*5ZOiT2$;6-573NLp9^|_z)vm@C=4P@F8ZKvnHnabXoNIc9g9TG&NLuWlC z3nJH;Y_@rb7XfXT(^>^53XCy?+nquJlOn>AXC_b_`8>rEO6gX;@qf5g*91} z*uDyN!eJKEKTvlhM^6%?p4@V>CE(lZG8mT zC~qtP{Y!LqLip@u=f`L#Pbhm`z-OHWy$YA)w&$N z-^n+>uTVSazqgI*iGd3@G&eCn3u^WeiQOyx0Q6{Kab?2YcnIR{V17GGm2|;Ct#Jqu zn|IWpHs$LL2}!+a5K6KUdAQ#g_v%1E`uT5&sP$84M%ns1qAW7ZmU*u>0pW(T8n{5X z+W*}JVDV<&S4L|3xCRCU=Mlggd%+u$mU!sDJ^*1bLUqaX_|@j-Eb zkjPIJ>b|v5ZuvDYBMzwAcf2Z53OG!!sPmL8JkHs6cVk-E>$RsG7oW(q%*0fazyIiG z$8lKfGjY#J_u(o=CI8`4^PezhOI%t>ue9UX%|tox&;E636jcqbi#_zd_3kFXQQuPE zoT>Kam4{O%>rg*5f1ZaiYlVx4;A=U5!qpp%^|wo%v6M za%!r7yx?1IkCvIE^NPXjuh-}YQL#LevuOsb{#4j&0RSdm)Em8g$sQk>u9hPP?#qa5 zI7qxgIlv6y=CBE_H(Zn7%=2FTPSmo~fg(Y^eBT;05b3V0@#{Gj^dV5)tj?l{dLWi@ zi9lOWUh|k-d%LN6tt19$6lL$1t#`;Xqn{crRtDZ~xv!fnm#V}nd>ZAotmT_`0M!hL z55e~xY)$Mh;zohhQSYdens7(^r{6RIncAgCdhs&yHdO>Nb)#-WzUFXeoVdq^WpJWa ziEIlk;aa;=STz5RJ57WE0pQCxpm((wkM07_X#>wxqk~VWgk0`>a>pGCG8YbR^$&BS z>_XBIB@F){c?mRM?t}}l!Fgj`S}D4qMrGH2nG(58=4q!3RXpi_ZzCr4*W92YFfg(pT@Xt$_+$|(wec*U&baJBlIMDCck+HmRKs-i*4Tv?lC_+` zIi$Qj-F`vOg$@vh4QT%gVuRknMtw~J?0MfuCBa#z>AQ=I3sK8l?sG@S5}>+F{LqF( z4_gY;$Zbd5NC&cmB@PSX=yEO@!J{%@@s%@Bkpn|tSfGTu^yxTvgDk*YR;esRD2Jiq zNTA8NzXfneJ7$8}SbxYPZ-0uMng%bjB;K@(a)(7MW*Iw4K`?OPCbKj@>~esYJZ3)H zdDU`6(ZGW)Rhc@*PG8%1BWDnJd3$o!VxjyqT`3-F5Z5RQBpd@i=p*Gof52?7w! zl;uszZ?YTt$5VE(XX@i;C0@yp6f!Qq(-y|U^eNp!>TtUsc=svh2<^qz;Ec3GbjCW~{;e&S zv)%v~67wj5LM3~ktw=8~e98lg@|do!dyhvCY-d{Dh=RAQUHfojQ{;z7EMtl9Jn+>w zK7b>7Zf)E=l1LuhgIgY51Ny5lHY2g&`1p8DJG+9KcK$zenqtyf{1Io8rAkuxpHcRt z|MD6?TJ$^fPK8D!iE9Z2ql0|Jl9SZzlW%W6-HUhXMr7QktTN}J-mo)b#a$HT=^vBZ zmK#y%-4Wu7$UNIadHGG^5#t5(c+dBh&K@e1mo zd}J2qSE(aZi+^D>@(>52zBz#J7H?ZyjCc7eq}xW13OJo;7}zdVd`X4u={ zxrLzQP+|I+Ip(Ye#-B~nniAD~HV-E~cvqA8aS5(A_m~P*Zz!x+_uy`|nNYCY8GdNP zVlt$EjTprmuBs)CpyN1jDtWAm0#2mL?Fac7MXVH)Cr$icHc!m-sj|i5n`~w=fKQvm z30tlMNopIc8pdoVaQ&JH?(0SE%Nf2E@)?aQ%{Wu%2Cmj%8X9y&z+tAj6Hq?{+zu2t z$0&Mdo9AGmfzg`5@UM}J?2KddK9iHx@fQR=LGMBU=I#)t9aR3vaWZ)l7PDH@ofq}# zmgh!n!+vK@aq&H`omE~k8hJI1_a9bAf6+M@0LAmSq)|I}d>1=a@2*Yo6P7rTttc@H zTUF4(-&DJM1M?jxpRE$iKp(RqoR{tnN%3o=lLlrH_(am}ik%H-ytK1GZxG2jNP_}r$64_z0V4q5dje9io?Lu9Ue6lN1eMsXGLY?ft)xqtulo5=T!k&iA5vP zkMjBl3iJ^Ld1<0Y3+}b9WVh~A;E&Dga$HE(eKk*P=e<2_m z`J+B9iA$Mu(cp(U>CCWGVT5!ALwnixkSGpnHle$i#5M^IBPdRH|FUXM7OlG1UzQ-KX`%pLgPpKNlSQ1 z)T!xkCjc#zZ~0(gCr}+|Eky!{q}bg-gu>phz#|)ENTZOp!eOQC?`C2ZtBKnCy#Nc% z)bC)94Srv8lCDw!c?85Lg@YNIk|OJt*A(SX`>x?+4wZ;mGKcp~@)E^tJAr8<$6}M} z=Nb3XWJha_hl-6a-i+|aF6Y4yFsQ@*FqT_}+e^5TJfJJm`6EM_*bwR9afDeUEAAhj zKic15U|9wF8Q#qju1fJYfL3S?BF^;dvRhJ7-S(9d7k_k-<-5-}^LiC<6V=~K>$fz( zxWnDex^ZPx4#1^1fP7WZ`IDREL+P8|$ z$MBR$mRgBD2oEhU8}eSwlVAZx@Y1KrTZ=B&`?(kWu7ghskbMw>k5FXuoR3ufOi_qza5wf+VEH&K|DDylxbjHx3(PhY zmvY4Z1vsR!GlNgod20IkHTF>-Qk@EGFU5gTK_=XNDT@+EC&Q~=4ptT$r7Eez5HOUofB(U+sjQ$v&$@cO znmnZ;KV4q9!o*c?Pf&aQoOdUcn0AQ88)*V$tcbQ_KW>YU$R+5<1%MxOHi$ep3(-08 zW97a_Y$UVX_+xTQsyY3rkIu4#s@q&bfC9U zD$l1DFG4|w`M-R!urKNw%DpvX2L`>ZX9tw9zgLugC;EIk%ij;uxP7&z>V~K1A zT3SJhKBUJ1W)7)kNA8B7D_&i#FZetO1ekOmP)~ao(w72VKBiZWlrk2)vjJRvwioOwJ!tV=v|hftOwzoQN`^p5E}?`ThS_*&lP+K)zH zwzlF>EO+4&em2V9Lcu=t55qX=MA061ia$WFQh_xB^X{~7-{5oOx0WDRf)M5hJ*4`~0-V3_+agLmeM%097H&CH z%pVlT&H`MbQGm?FOdY(9S6u0c$S)Jc8i@Y{hR<7G$#%fu%Xqd! z$Ne{o+a$GPl3AeWujw5pypDHG9QmgSn5d#E!0T913w9**?BsWZ@T4B;D0ls6RleQf z%T@nyJK7z3BFVGM0#5j^RFL(__niFv{p4N9K13_y!?ec4Hp%n%!`~_1#6G#l7xyuG z?1a~&1UvnWkhWg~7F_j0LY}jUd!j143*_l`ce|!`BgfhajmpQ9T>NkRj92_8%HPbLg*vft1iUUlk(=lSO&4;@ zoqhNmVkNS8GW5QlzP|H=&>ueBpts%RMkK|MiOfu&j*g!9Pu;+%nC<)euRtWV*J;-p zN2|s-d;2`VfxSX&YH7Culm6-{AOA>#H#iacA;MnmjyJfH3~+p%`pU|ckW2H3MU58^ zApotX6!un(-Aeg1beMav${jF0Q46^hYM^xm->3HGZ~uEvMgV~jeU~; zXf-%MO%|JLo^oOv599aWdk_$h2mLSU7;~t=8X>z;tHcw4Y9pVBfXEOfGiFc|j~dxo zkGhMFN*Hp^Dn($9ZGPd&dwtS*ceSZ=oxlm=8>To(Ce7)aTrckezHEfM2^m6WHGcazhZRPr0J(* zRt8*p^}5xL!ept@Zuf)F6({UUzG=+?mjC*^8W}z!w;JSz5{gRiXy1BUSxD-UxpWT? z&6v3+*G!H?`qQe3on~nh=N3yC#~L`p)@|3PV?h--Qe^ysgs&gc2h<60HTCW{k1d+) zs=FG)I%+Kot8OHrt4{9ZCRA{dkgnf0p6boWbnav%ARZFhA4C)sUC3A6%_ZnT8R+iA z+w7C1T|XQ@v6t2ej;WOFXIMBj2#wje$m6fP>c6>~T<|0t>2N}CeNZ2qFZSQq6QPBt zS>_epHzjn*S>A|G&Gk;%j4f+V<wl zE;QyikD;lv&DSWhHKw0IkKP$MpGfP@1aZte0+Euo7EsV_*duL%;iKg|KuY(K3VrzR zG6OZ%(Lut7xjjT4!3hX3Tkh=)zGXUID!=kudA!v6KV0~S0Gi;tC*naE!QFvm({xEnN(PV6 zc_=rt5-~C|^7heUpJQlLZi>h? zpo!>$4gcV-I6NjP#|o{Uz@H4CEK*p6l8KRLX9cv*L;yYUj*gC~iIivJY)y@c_EZ00 zo2q^86fAZ?@`AM9Jd;U_9q8KbWIeb}z_P|rqmCZ_ZZ7QnVVy8bu8#8WjP-cxCzXGp ze|9fCsX2{{@OqJEK76PJ80|_JhAB_@e$m8`*A1YBUP5v`q%)H7qRYV-jM1Bg?#6<^ zE%7RQ&>8+UPQE>dDS-n);M$LsQ#<+o@77PtCS7X60bIB$>7d-beY zjMLH&L(Z|u@C+u?=^du~U~L{Zg{j1AyBQs!NjC~;#3a;#b4(bF!2R5%kpH;$y?Owd8|)8UcL3J*-zA?tDQQ09H0vNRBoTe$uDXr&!*e{)n)nU z3V8sg9t3NS5ED@NDaiZ&X3FJ=|aLz30mg@r?wS@j2I9r_%0wl+5J z`qihmTScPdh%BJmNdUZ4@3a~}+l))?EO)<*L- zW@$ZuM=85HcTM^ivU}XRV@o_LHkEfVU0ga!zrAzNvLg=;@_1QH-J34EX~g*4YMOdi zb2v1^EZAR80hDxwi7NBm#jwb$fz;)4AReWcuIWJYs&hbJ>)zWAg>>-!BwvdTcyIg% zVJW$68Gy3Yu?lu(+NWl8iMaP*`U-95%|(R`4P)asWkpBhI}JD)kPMayW;OF@v@DCu z3QDuG6iAj4(<}c+FjB0g$nJE_cdHYsWFR1Db~NcMK+1xc9f~1+<7M}qG=J!jA%vOJ zHPz(N8{8(e9H&{*($Nq&TS}9gUa6VX)nRf0cRCZwJeeoc#rmgzB_+ws>9E8I3kk4u zvQcyH=f(z5_kxCpFoCGMcS-d=Vnf&C`&N<*&e;PoNH2$F++#IP+(ELk#wG+?gxmSq zf0}IDsp9a{jIxa)ZjS^WFJviLWC*=Ds5IZXkv5zwlv-l6JtI0={o;ear0tV9=7FYg zx=WV|X}k20@YgvPD4?WVi z+n1c{F^$YX_;DGS2|nXkTB8kYma2X94miv%=hnJhBrkhIvG;SLk9@i;x|kB>kJ?w+ zP_$~Ky2$@W%7##r>LH{b1czF25X_e9Jha1j_cs4jtb2s@0@%{=_m4a(S2?#cG{`w) z1C9)X@}`}ZZz;$q@20Nilel&_j{f)+SGW=vi9kOcCB=Z%2mSa070W3H1||2JVl%R< zb+)!#GJy>*RSg+KaG>VN@w%0U-DTb^biVszW(zKycxeUkp~LUPC4W99NyktgktQ&l z8f@FgedI;O=J&~GafIUl7-mkYVnRe~gW9X-!lNE4lO+~`tH|9DGDcdDi$-zvHEJFk zLJpF@fR`$`XI*3=(zdp0^xV*sMpddSK@WrOo6aA8gA;w`8sqyYD)47w=Vgyr>CMm! zNZ=z>gl88hP7t9M)B;U+z2>p14(FU&w(Puy`&NZ0jFeF{mZqF!XeUN#$)A12fGFlS z)e^5Hg0x>F>fGK*N!=$c&cFc6MKG#WVZfpQ2!4SE@c7q&HA9Aj`81FLwHLY>z)5o0 z*xKI0Ew;{`wYpY-Fw$gD%Oa14X=oo}(NX<*_nyKmETACYTkk6-*SrE*sp{$@5(MjpZxnDX`I~fXO=fv$5V?GSH#4_1u%(IiczrR!vgu)k&%v_B<2znB^PE@ZfoGW6FLH;_ z2e&>fwm8_d&D_!Rwf{^1@rz#pezQ)`VjA{$KJ<=Xcqu8d zbZT9)U+Hkab6%Z0;e(?^Nfvx@0nu@HF?|85nVhO@l!3&>7B*o#b=ab2o_CJi;INs0 zo8j2=8@jlt5q%}czbi7hBct^U6UQX7uf60u+jB+d#LTfD7CH(KOe>yObY+Dz^D3X& zjhCFpsO^9Z8I#c)G5n3*X*Zf&hb@e2ePSt#ht(>29ax>}SqoKW6uyX3Hc|Tqd$C*+ zFcSdYU@qR{ijcG+O)(Q(k`OfhtdTT;uF~S-yXpV4y6mT1d$3@a^Qq@DVxi8mKNgL4 zF!4Z$`kj8;@}O-{_p?rR96-j=Aum zQYhnM>ZR?QRqkj}WGtplVlrGS?PdLq^ta7Ekyz)}D`-hA?Z@HLW-3)H-1qOZVun$! zAv(UWpHAhLzL)Q6lPpl3IrjQ>kfMqxtEq>eG zF-_`E6>pOO`><9=O`T#UD^%ZEKY5}u08B+;F4G-nfCiR~GYi87zqW$e1f6%a+J$*^ zzD((dde?qI#4TF-G;oQOwv02Fiwr8v)*RJsb)7|-+#awW4?$_i*EkDAVQXtWdP`at z^6s-vukT=-{n>9dQ8=>|+(woVW7k+OTer6C%Et+sdY&2mS#IAR5okSI{g~)8ZrZgX zv~Nw*fVR4@R;xZOH(+g{ZhfB^!VZq5En23rZMnbn{c>%zBCkuz{L>HA7}^pJyozY3Rti_HOgh_} zEMe@csRZ#)O*5{+@?+dE8jmMGLdf19iQMt-zuVvYdm6O1)<1{_HtT#D%kOn`qA}E( zZj`fG;pILvKCoO1T4-Fuo%%|;m^Rd=o^G|gHF!L6X2yMA%J2-;igOn?rwCN#u(zit z$^~6QdDq?#E1h@Q{RYnX5OKzMYnk$~8|M5<;1x@N2K~&wcThGA^k=ODQG`73r=}md z68A@D`xhOkvEsXQmPYeYi&L&QYJIlM{OzM!h!%KI@%Or78Mzl!=sLxTQTS%1zJBl0 z7O#(j4toB)2++>t>x>85nQyUAwkmTS#PLmnf~hbaBGl0N39*WDME1q+8aQHXzBB6! zI2oH@wBxuZlVs}+)$q8!V3tQ+`^4q;1={IpE^P&ZUuNiwhT!GBhK&0mtm0;2()QV_ zV-3A~m!fIniz8HE<0BI4p+?SHz`i97-6E^1n)^*h`#F*WFv$U?BCTXm3u@pk9ktT) z;K$Yn&_;=!;Vi54;@j=O217k#W5Kfnr<tpL1Rt6E+56hpzV^NMJ})G!G3MjQ*2~a6YH8phHqPZF0b^Ek^e)_jRJL)&|znvbEUIZ zKQ>WnlBPo&zjD}8AWIzXZ7|vP_+dv7LGeOId_`BzGGNbpKDKoq^oo_sjI82yBNpSy-x*E zZZA?Qg1Xs2$K)_s#=$`X^D3_l(nlV9K8xIwBnia*Sf=v0(|HBGSzE&054l0GPp6Yd z*hTuMwD-T)$~lg^^^e10T(Ix=RP&qjo-f_kvP^P=!yQ)Y^S+zA#+(Nwmhsy1qHl=% z#``RK`Gi!+mYql3N+VQXadf=3_ToE9;%R}xh4N6#v9 zITR>PGbFyj2AUrfmfG#RoNVOdu*Af7*HbY}Yk9Za^e`Fy?e<72qG$r zo|dXU_hwx;DKAG~zu5UE!K^Zk$byoh1Wh)OLGcSSQIRCUO-$4}U=q$0Y%fq;sA>_b zAME#!sEo%8TH1^~ApyF>FogRy!OZahjDdY0Wc5FH%OB$cv?M=@v{&xG@{qqZHd;|? z8(@xJY5kTwx0318S z$`X&Fn&I$kf~9MnPMC0$y7c#qPPR56#RJt%LFPM!7)RK!eI`BYX)9)IJf&cw@tSzc zK@oVFdYFC%frih1bk)E_R}lfF2(*10Z5tdI!$4^n7F(78W88PA^%6?+87%1PIqLFT z%cxz14?mOzIe(nFT~@1WOL^W|=v;}Yiod(S5GL?0h~SZ#)z;?{&0OITO=SkGfW83- z0r!PWaW}oQz3wj75(##~zuq~@0`h>n?~DV|Bs76@F{ZNcPJWV*Dj&u7rGJ8(Q)IQ= z0j7Smc@)?680eY1Nk}?;ZU+~Ep%G+E8HT0$YzCTspP>xDZ*ri6y&g#}^kg~qQl&>^ z+K6&2?#o)qMjGk_8@M;1Yd8S}m|SrawgX$;2G8o^cg=$u!a82dt?#bjz!^0;^;332 zqNd0;l|8{3OXSijyjZ(hja+U*WJ2<6->Y1O$E$fDU%=PR-`D5A!iaAameM7^N)NuV z#D9mKKk?-!Gms!5us|g|$*0HG3y^u4&tk3V=n0qKM$M+BT^|Hf^i|&k<{4Y^520Q9 zVgJ}EkSMzIDeWV?OKo3wp<@piKcI>jvI6Fw?Z>OwN|^e>JD=fzbg_QIvL&7j4GnRx zm!Ni-ielN+Jk5)fUAo?mR(02+SWXW|?;owr;!iH*z;JfgyA=Fp*IOplIjM^!ia?nY zJ;#)h>gnEl+}nFdx9y&-$;!Oc<6W}_WZDE+>b}BaRo?vBjpS-_&ZbfcchH&NSV{0h zC>f9Sms673fSyR9b-PFW@AE5Km}*Rtb_0*K)H7>pbZcyu=z!VJUn)Oa*mHB8N%&H6 z9foFbW!9{x9uNC<1aydrr$xGG=q5Bf&#}5)EJTJ`d`BvDK-rof z!(gzN_+aGl;^o6)owfU_93G2vUAd3N2%-T23%5oq#d*D!ArTEuA#mE`S4%>6k><5- zw5j;+2};e`TEUqD;6gIwBJr)BWN*~|JKy_;cje*3T7BvN>ft}sse$GmA%|*Y60(sr z%NZC2;_>C_KnuEwKC)Bbop>UzL=s_6=L>e*lL0T|i*PX|p>a~F#u;qi4>*aoRNxTt(d=KfW9n<`t!O;r$_G8ClF~=klb{SSk-+qU zB|s9>R7GuXFo9wh8vC(x zhP?sCm6A40wVg)ff@Sh#L^g;~tZ9K$b9lec)p-pV-OXa#4E+X5hht$m^bGrs)~Xk* zuCAU*5J#%&dvBRDNG&Z2U9M@FE~3nmtftF5HMq^&H696?eSDt53=GQE8E^x_>~=66 zxZJZ`abp0|aCE+RA-xL7bK%O`c6;W)*dFQ(@0xdNrSD9O1vZMGD$O?YefyDi&KIsqCvSuFr&5{4lzDHj^oF81k zIO+1q82-=PG~n))&q}Y~;bP)}Ek6#nN0eO9(}43Pti=o=)cP$I#(-157uZg*≀K z%1)tliQn=Y#ZE<3+SbEa+SbS9lc+DxPi}1sKbOFmI<1AD=6wI2lIpnLYdv_+Tpx*d zpHjE<)erZrnJSfB*)$_C_C+25Bv7`Yg!6_MG{Zh?u0wb(W{uk~@XBgQ68Aq_n;Mg5 zMS(c%1ozRA zr7dao^NY)0iXCf-S>mMIIAE=^2hjfl3(*gMamJ>}%KvE)`NICGfRWdj z>sfMrxOYE&24cViUpMi@$Ugju&KST{^?-;DKY*->KNTZH`kV}6v_=$Kp=@gstm2)B zs4;ydB@~k-?`3C6VsAcqawd_?n5ly2nNRIfK`nXjhy2}_m?DbAcQL;VzoL#31!lW9 zBZl}rGJ3)DNsgu_rY8Hq{?L7zp(fKow|?8cWPyD-p7;fkgO;AVsLNyMksi#&{BmSq ztPHG04Rk?s)LiWDnTTH6Csv8bI*8kS7~6|03X_6p!IAW&rvtdM;J*Jb2=zs!=`0jQIL1j zvcod~vTXCg6=Z*zldw?$T{FLDIs3iWp|z#a{%F8@4@3vzwR5ZYdqK zO0eu`LhT;eHaBZfo{cTh_M5n2yuP&GF58Pq-?K5Nn0ppnpvP4Y8}t|`EvZeW6KJ+2 zlW}y)29`EFaYde^ZT|XY=kVer-#@3mU}V}V(Y4&p|9>|Nyua6a`Z*4X{&_mk5$)e+ z?*m&6jATCILmy$}dJy$Q-ymDB%9dWDjY_>iX;op6jt}h}$~;z!w@92C06Hj;0?=Fs z3RqU~4%5A^NT#HZ7x)`2B_$5vl?&-S3o8(0PlV4_NJNf%golTRuVqiwc2qZWF~3A4 zIKMVRp0&w&K1qM(tbZvz%B_rrozuc%+XzIsOM1qGpp#DMZ}vG8;T&4{4mv1#bJQJ7 z$qfNASqFq7wiM8WL-(^mCntHTgLFNna^j10rzfI!<_Ll3C=>k>AG)V!asV(Gt?H62$i_+d14zio^IHu2FAqMxiUl2uBl0p z_-tMo zeVAP&H&@MN!QA6ENdeM0E{QV;Nf(&YuhzG}=P?a7S8l0Yuz+U5A3g*v@+EP&`8_}E zzkg0HfXw%2@0=&GX@s73Ejn+U_C2WLu|KK!|L>*0Hcy;#8}vpRyZu}ma#xxRU-kF* zADsSf9<%n*%Qd0CtcBI*;sP{qcD$*qQgwoz#_tXp)DxkmprA0Z+81((Pa?C`uK`AC z(V;Fbg26Qxk@4{l_}4#S%!~QX=@dRix^A(+ET@X`=l3bC064+kt!)g0HGjK^BouiD zprV8PgwE0+*$FHnZ4s1a)x2ghm^wPJji&-Vzh~0oQ$iWm@!+L&=g#hS%-nv1-uZ=l z#gD9pBzHTz*6}sl#7GBnD}3mSs0$JrL0`NeVOIj7Ut+l-OUfXY~3&mf_|O%5tS3^cQoCX zH{SEKu?cSC0FBXtwof2NM?ZkAs>dgUeV38UoKS|EseYS%G7`6v^s~WeT{i@%D3x2l zdW?Qs-rR1dJbbVe4L&9kw5U|HVO6PEWz`7ON1Y-_aLNsY;RRpGL`j+|6q@0#rGlqF zwX1|2yMij$8hPVfZCeVgzOOhzC+G9;>Ux@&3LcL8Or(U6^v{JpcpKwhwEZLG-fHul z%~Psr)#jJ=70-v`ybX^ssNjt=Q^KV>+~z@`r#!=G*!|J}FJAZ|m71~Mf#P&PikAU~ z1Qof*h|sd07tnIKy_RiA=zN0B^NvX*Fa6-TX@s?*zP>0jYw-yRz*QmJl~oP$D3$IWOUsIKusXC zGH^)gIjrf8bbN0H5|Mc9E}au9K%Z+r1za8zV|G8?Bp*h36F4=;t#AGG6XHkf%X@v6hv&N*peML zgnVKnsx5PBYXTD}i*)K%hS&)yn;ybx(>YP(T^taTBbvO$4&&7I*s3x- zo{qPLh4Vy7vz~{^X!%(DB5#IdpxmXrPx>2IcWW1M)6})pX-Lvs<98ay2;MlpcpNXvlxl6G!ipw1{k}txY@G?ltXoW4F;)7=T?}!CyE`ge$}*wk?(PAA{e#d+ zlL;#k>n&P&fA?{HV9G??8k73l$n(ID6>muQRq`%C(5w=v3TRsE{<)=aOKdFV?^yu9 zzd9iWP&L&LM*Rf_x{&7e_N2;-2*|6qWf~J~*j^3`TD!2_!EVN1|L09=T! zcic#-Z+MKX%Mzyd*sll;v$TwvB58WcHsJQg+j(f;ILF+qk1zW78h5i7V+G&Dvud#pf%qhlh#rwt-kG7)iZ7lQWC>LUi{I3m z$v%{K2D$No)%P_A`y{^};r`+UL#kTbIZ@wX3?143vOuu))1BX+|1RRT)HPdJ6*_@; zrx%D_3xpL{H~AFL?Vqx*-Yz+#-q-Kt)(T$CuB-9ybFw-gTSodQxw+s2g>7 zuIE3xZu6&p>|Om1&-;`I4(33kx$XYmCK-DG>KJOBU7bT8brO=1LQ@9bCpz`y0b^%A zBLyCImuR~+MqY?X-TU$drvxuB^Ki_y1lr+}e!hukv>Cl-a+=+HZ&hNc<=xhHvn#&Shh_pWY%>p#bt}hA|~L(9?Prf zwWxhoQ#>Usw+Jf@&-vCOd^Bs3_z7FfDiJVj?$n`T-~UZ7{;Y4Krb0i{{elmLAD9qd z6N7z6!~nxJb}II`z+U^PoS`hc?oVHo;qB#^RBf+p7NmEHM0Z;rxo?6>7@So&K`no% z(qYAidhAWJMhaGpt~!k{@J4l3Qfr-kw`rL-?Ol%}MV7U$+j9+UL)yR++xO?o_wvTZ7;n0xcWf&gqD4P?4-!H#x^! zVUM53t+y{!94{9c2IIYR&K*zjwR(;V@J$-EN!N!2jFac>`se7oT|q^f-lg7Xttfb} zj&1+Yo~}_4o1!cVco}3SDfm!tr8Z!ts(*bz_z4WwyV9MuvLpQKWAmDFUfP}QflZO~ zO}4cbY^0T^Ro#jc9OCoq}-n{tzR0G}SE?Bz%(Jv3iyzqQtfDA=2;` zc{Z#!a7Mz)C*5)i(K3Q4VaKoS@CSWW?RlEVp-Y zsOI|!2`QLxSmB0(TV#1GqP0NJ`;*iGBiu;ml1)z=drXLO{1rZhx@cxG?#p&(kZ(R5 zsnU(=l=d**`f;=Vn3Jq`M!o6%34n$EPsagcWK15WAkjX$@8&jGG>9B9ZI%`;NnYoj zbIRp3nd95=>tC&g7C}>Q7tQ6;L}sWo~Icv^o~;&V5aL~;YKOeCcTX&gHc#` z5Bg{#kz;OOmpPTmBYIAUBGG3Lhc5+QmaMe#m!-aiew3@#OvdFh znUbE~#!d#R>lGaArsr_7bQH6l<~7mqpL0!z6koSqk=pC`xo%eFNL+iavRCK&0Rhwm zTLjNe@6%Tq5+u=P2-!C<^tT{i&RqO(mW7?O90P827AxI>Fqe?~B2Ql5vg5ktSXooG z)bs|)yR?LE-dF35%$>h%RWd~FVQck49R=3#mOKZpgd}!N-f4E*&U+1xD_Z+KIeM45 z%#rl*H(RjD364A>lT9hjG# zo$Y#I*kPUMN*-u6?>V>NaB7RxKiRAAE_G-_U1Xc)0vZG8^0H`a_fR%GJe^;tx!D2y zHX|O{g2w3$IYmn3$|bFW0r_jGFU=J-In6-nlYQRl@1N>5efD#0s%K|Imt$R4q(P2e z#;m)vP~~rYIKnhlybd3d{@?>9TiU;nk-wI-hrPeThYo)r9$T*MFzG%|+7$n=aAzNh z0Kj1|Wx95z>5VrcZeYGKCzW(vUc|x+U?IF~r5C}Z5Fh(p<{3}%nA?TA@3}^8>&-(E z$CfF*dZ_Bp_|SA3%~=2x%*72jZ%({DJJ6$y)0;oH9I<_W>O&u9n=q7iYChxMuQ^!i z>x;&}lQ>3k5oD|Tl+)De*YvaHo@BygF;4D45{P8Jqo%!Q5z{s z^@^16Z9|i@ZFTRqjtP-k(*Hp1il+(wbw>&RbCkinS=T}>xNAc{wYlIlu+Cv= zQWdkAQ0AjU>1~y74~wqiiS<8CUEUcZTbpDT*$-!hpJ6%5L`YEZDGmx4W4Nt%Rv&Hl z2YEkT{>jN3C3cz)(EK6AFcX06kuuCNFYuv$fKW`X$aUQ-j7=}7cIG|`FY|iKvtX3V ziL8Lgxh{T3(oG;*Vv67tG2(>=NEqC<50s9G1bZXjxOopUjV{at#)g%OSQjGBg33I) zGI_v0C++>li`&qF*?y3d?@syUWJE^eUcJ0c{pD1b@cAdP)c`|6*AoLs(~#cdvZnKf z=RVH;gxLkX7Y{tZMGrdvu>W_1=94e4p|nv#f2Az&9ZgR3f8+#znPF7xuP0ZbU1)Ks z#rWGi{DO|Y@ozt$+1sXyZI(}Exs-x!d`qeM62DGb6)w_OVxYU>e9X_*OSbRSUsv&4RrGIMxh#}si`}i&Ic3vp;@Icr-_K-{jXe)?hj&Jn#Z;8A$dL{; zvrDkH99tL;O`BoB_i9TWPE@yV{!yGK+#{-3ufxA%++~k9Jm9>cc*?ieFl_O^MApqi zpWOi)ek3A6#sxCBz1l)W$es6}5PxPP5Ef%8B2ty_2dKiRHCvM2Q(KR@)$^<(o){>I zM}E@fq(5DtrhkfUemV@u=6)%}Uop(LuS4hoI4fJ*Tbkb}Zr<}@bzbD~L3UnE0uq~A z4nCa`LoOxd@j=^pyxqqrj^-{Xxf;i85v7oK z%8$nX8}U|SO&(q6ru={06pcshS~q=m<))C1@Y>34V%rv%cq^W)x21?ZPq$@TCYi}8 zZ9Zj}C3|f}thH%&kky+E@}q(c+0vB47rIGH#x)>VpsZ2CY0Y0GoT4up19(;l<6!_q z4jxvlGiNaAb2JWF8}>ipo^!oiCv@nyc1^KfNcG+xYRN(d2AEi;0-xGc1QqRhSZSF$ z9Zdc`GSzAR%jo#{udwP&;-3d7_*%=bN@Y8fXlOTvv zmJ^({V$-dZ5kB0+qwtaN-qGjZJfk5OFrz67g6}V#;khlhNyzjxpCCS_qLRi!+czfI zeL4ueSqU%Uw)DscT2{bB+w+FwlVd3@trS3{WuEaE(lMS&b7AZ&dxw3XABqpPN-Udn zOm+V98#(^*d$#=V3WZnAvE(&?#NL2gZqO1Ii5gw=a`=@bW^24;XOX3Czjqt{qmwNq zjBwsd2!3R0f@z{xiVr0eY|t)645Bk^&=4jvZe%q00vn+hU*2`|$A^AVObUVOdg!z` zt<|n7XYlEsOj~aZcVb{*I2=9N=#)>LoH9u!*4y#QogoI^7pXog00CuWS&q$O}PJwUKeQ$%n;@>>zg-Dg4ZQBk^Id z&#@@~T0j=qMP0pi!}|11uj=97TB)jMauwHgwh{ZsHg3o7U27XcSGECvYS)#p-nWw= zAQSJgJI?9@pOUJ&Z0_fYJGGf0P4ueg0lS_O^tCVy#Uqg~V@}yh)96x$%p5EL6~_O` z5uhp2WJTxn$M+{~=ssCU(=i#LY}|j60Uz%j9iRSXpP91eV+R=kDqhH~Fw{=)Qq>}cf9{sy&4?pBQf=H$7j}YP;17y z;Sag;9|kYrL%+pn3%LBz)WDxW28-M*PS6_LW-XJ#n;$NpPOwvLz1{_kI9%>P7*L}e zamjW!cjGeVol*o3mT3%>1tAQSt^wrEXKm@vYY%rE*?QOXF5;m#U4f0qyO0QBnF#)$ zcjPNs1o^#w$KtYP_60I_4AuVaBL-G!5?0hn7aEZ#3`8 zSND|K<|pX5dS|2_9nuKd?y@CotYG{tFrF2FkFSgV4zwTl@6WJj&^QHMXT=ZiUrCB+ z!En4OpvHEkn^yb@d{4Qc+dhZ6h%G{CvC|9JooeMq&t1Rp0lPd4pbW7(gM6xXF|{=F zDp&TAd(5$+d|5nRX}alphBM;apUM3C1YCc4LOqiB%$rhOem<|fz;-#>e^!}LrcHOcc=~nzqBR3Z z+>6Y73>{;vDrV>%y3DB(!c}4#QDX}WG!kicxjxetOJ~iNMq9DHCJ{DDpz{W*zB$`w zh$g4`5hnlIS(Z^G;U04yF#Vlbvq{E}blnyKw|3a4qU5l2jtC~Ni|i&0g9`fuHXUs# zh#^!HbQuA`^uEiZX6O_sL2JLRo704o3qqxfayEBr4qW5`jh?LhkwDEiSfO;+sO=B= ze`m2YuD2g1T>}zdUt9&87m-Xwq;$Z$5NX0_Hn9$F5=P-`HW?)ypPcjn`*s+{CNrLp z$?1vIK!BV`^X%k_=JyW;%)qEcdSF0siVcSt;5VHo_g&kTm%lchoTIyWwY@S|!v8gA z)f^aRZ5^^re~4IGLX{)VOly;2REwsB{5w&XMxA{V9PGokT`wlsh?;m%vTl2Dk~gSI zbYZ<_#FH7dwRhoEe{-utgo*5R+-l|zw+dowzBZfG1-KPeb9Z{2IvNm2)Z1Pb9}qcJ zAx9W8tQSi`-`xF?r)3wPt23obs7PC`uv8RUg@353k_wh>YbA^$gJ!Y$#J;?lmW zWA7uh!-_raYnobZ3N7VI+5_!VI-KOmRhV4Xvw5lNx^Ld++`uDArK6f&yPSl_HQXbt znPwjff=dYnC-GjBuZSXlSH+;FL!YiYu!$?_V#3Q)Qzz~!M8r`o-(s{K`I+Nzu9?A^ zR8t$tX`5(kNk*0k_a_v2vUIt^25y;cAgzaY*-Fn%Pqx;_3I8oe=VbJQGp?~MYLP#} z>2p)s>q7s?OW;Szg78)KJ);BitY<>y_thpt@-mo!HDnx`wbj+Vcc}yf;w)-9m=^ow@$}^^ef|@6Drt;mTHUHt^yYnVO*M0Pv-*uj$i048v{~&0; znR%Gpm93<8r>UyrUfD{oZ}9=g13;D^MEz9H29Usr4v=;9&Jp}adjm_V4mNc1t$%XO z=pn;(xofCh6x;7y)3ctF^tyKkyLufkDnm7mziKU=*tP9aTJ;vy^QdLLRbOlrbANo< zv5|=V=!Y4nb(t|*pI>bkQp$!|ras)He?u|^K9GDJi>iG58_7xY;kMr-I<}+8}W?m)m`{Lskj65R1{C3}H$AaUzWx3sK>Vj=0f94smbyJ&mgk zLnw1+IjhY|dNb53@4eWX@{RoqB{GJWB?iEfJ8hxxbYbU4R=j4KctpS*4VGr+#0 zbbzRo$;P!>aJeY{#j`!k_wD~~BkX-R2Ojx~F0GoZ*ZjPqHkcgNiBqb-y0p30bnPwk z1TWk(QXQu?X6)6xY~FC(!BwI0WmrG%!$&oW*DTb@v0i+r8jIB`)TCsdpIYxzr5GI> zqXDjM#pLFS%X|Yci+Z^4>`1l}8#HlB%s&Q>v zh-|fIfz*(9t$}P^K%mihU{=oD#yt9P*E%bJx}Vn93J2)zfA>P5!N|(8iVL_t%v05C zVbrA7k0Ct%oP+pL+%+Je|NU;Fu=SA{QxfbtaE=nq#ROz$3Bx@d}^w*g{NwOU2orvtLzxtGoMD!Jxu%>S7m>P zt8#v7o&~F`u(=Mnj_9H+I}oj{AGJ{+ougBd^RsyPkq|?oCf1$Sxi#0hO1VLy;$El= zoE9H?KC9iFE8Wo82#<%~;zP<*c;YVa9sz1Ahl^FEWtC@9XvQPb0j5hds;!5Oi`i#) z`iuh4y`DWHCx%I@9^R+Z7uxxvq8gO%K>ImaP?0Y>h~iBJ{mmO%x^@Y9FN_Pn2BmBZ z-jK{|QqotJobT$U5)QQMWc9@5^VHUE?I?@jq}{r0RJ$TQ<+zwfH1XhubHjLtG~Kdp zx|Hzqv^q%R=<#acq3D#|$F%Y+#>qPL*P83e1cWrXeCd)mT!;cAr6&2DA;?X}Tk?lD zHoS!&Q(O2_Wb)|R`CjBNb$uZ2oUK$KU;1RRr5qxWYO`+qj)CaqooSxV`5T=aJAM=r z6+BngzS4S76R5GQ&If)eotsC0wvTa!7G^&ku^#0*ey0wlFTBI| zx!U*}9bsbMsIzd~(q)0&D>Rc>4BvXRTjbMN0uk@6pM(?PMA&QVU-QUiVa!hWe(m#g zvDq(T#Tv~&z$M4~V^2uHA^-05qUf|y_k4lE_96W?R_OU^Q@hr>{CwV<6AwK@xg3t$ zxGLBH{4TK63eFpRu9pMpcz-^{pR^}ix3>mLasswVwzgDEB`gCIXkn^RqH>s%E>3-Y zah5V%moD&zhId1AYgxJ}{5dJo54n=n&#zntD1Qh>$!6A*7l--(g6g4~+OS=s9yp&E z-X*2i&&x+{AZ*AC4n!$km;=R_{U|agp|9a~Z2tYs_Yd!2xJ12=Q1yR+Zud0M5UgA? zK#{vqP~#~xYk^K0E2B$ClCPORRd)AQC`8z=aXgpL%~#@M&?G)VPnWHspzjN}f@(WF z+2zt>BFrB$(^%CDgypmyb2P7va`Oq%^cfV-7#Uy`azmHT*yZQYQ$HzkQ;U+HqV+Qw z_H<)U1*@tWw-{3jOto8w1W<^o`*mm%oU*ze@_xCY)?V{;on9eDYaF5;(8jkT8cuWr zkB0xps@dm*M0|lt@$~#BMo}V15rvW@(6sFbn+3Qz-opCU{Ns>SKYU^hg*;&`9g7m% zr0$???eTb=K`KxK5!s+aopXIC2zOZncP=+EMU=CQSL()U+AUE&GNpiS#+@Egx6IE` zFOfY0g->+Au5x7AdGy(CKEOp9N#*NO6G>_v^dw&Jwv)3zak2v}r5JUr5{WpA6v`#y z8OZYx-7IOqtxH)Ong+cTZW0sAtnK%YM$PW*ZN^O$1t%ZQa1lzW5HZTOE-)`mPCg08 zhjldYNJ|ulJu>40O7th)MdSXObKXY5*e&J#Zx1~MRaAY?O_LXGg=_p28Hp_sJY5)E zT!h>tgrnck`p2kVy}}x*ISM6uojH75<#d*}hTV=58I@`B>@f7`j9TZ1 zA4PVep(^ta+Ud6{@NyQhPl6Nigb^7G8H{-4!)0~>_S3B4ki}}|TG`Zn92|j=b_qJm z)CPM{*dfGV+KfXaD9kq68Z8e3r{Q8x)KY2&tM1Q2n z%w+|u-Y^K3IUUDc)JPBMdQ1E2)!X?t7B024x9DBB^fLrg4}4+b_Ga==$C~NJlgg-`VILAe<5DGjGiT2#4MuE*AGL7%(4EY*Gz z?ZVYRB9uwCphcZtho!V=q0O|EkK{g#@8VK+elva9BHnvEjyWIvwT=pI9k!u~Mz#*k z9?-bXf3reF0Yz*t7p{lOlGsC7mKj`L#kzb#p56d^?yL|YOFG{k%=i-0s7A`&XbDr- z6z=XFMK%e~uXUQjdt}8U?E5V|jSU{aT>`m#dG$GOyTYyGX5Zsae94*pc|k^q4*dk720!l75xmpCPU(0wp6NM$gEUNrvOJW& zusNSo<{mK-gADp#N$B9>7Z1NQcZ>pTeGB(66y%!g4V}y9~y~{GJ=*TG& z;-dlCAef8~%=~)CdCC`I|FE6d*+67RhS;rx{|dOJAk6*QEuzcMqOIrSeC7wnw*$p! z(XV{h_X)FhM?|qAeY+by17@m(a(3uU&DhPUs$JxdAh)Q0yO{KwqXYN&94r&3%o|ex zXI-a4%4a}x-J11y2lvZ0o2?qWSyDE*?|6%Iqb<;)`sOZ=#^-hO6m>C1Y~zn=Ls3?g zxA*kqM9S1Pa*VOeH;iJA86UQ(oic&`fr}fQF!$V%vh@Lxf7Bq(KAPR3j0*5;4D9XYh;91ym_ssa0GN#!hX{4`D7o2ccN7>7i#SKesVqJMR zTI!8N_mC192uNUy6H^v1Q9cJc)pVs?1VS{;ioKxAJbnuQsKG$j=O<4;hWvE(Oj+qf zX{qq$9;JJc(E3#XXu`bek1i;XvzVyPFOxtJ_1l%n+Q#;M7kBIsq->>EB)8@=X)QIH z5Xm|oN@09AUXd!c@*$lnyC8tjf&83y>6BWJPCp)kCbHv~8?A@SU_VOegoQng3*c+; zC}Y3QnB&~{HRI@V+45A9TzwT^w{jK>ykO(!^9FCzG-PfCNY zlMmmbYfuo_n(jE2iw1JT0M}Lu!^DZH8p-6E5G}|c6bmd(Nsu+cZ9c!%?uElkK253m z&9h(7rCnxJ(?pti8PVp18GJ?I?!!K+;Yw5(VgiV52cIBieS4#*V`Voijo?9kd9KYg z&zsLTuskroGs<#DtzU-VLuu?39$l|Y-{}F0q+{dZN_(jJ0ce9g@-P8=6%NO!)Ia=K zyfdsPa3qGFl_2WT|8U04A>4{POWb9#smnZ8BBn|0)TiU|S_>b z%L@_JYDWK+Y-M}P0t~AAS}m>ReDO0X5~~1)F-!OACDpuM*kT9y^HKSb1rI zDDs01u{jPrWr1hU;^W}Hr?3jWxu$sBs$DX)P4LknierZw<}_V*?=_lTbmr`b9HN_O z%eP%V-L!988M=c}$ut_pHL|}plG*tZ!=5}-KmW)yo@vew;A$^dswk`COy3)#6ZxjS zUg^l>snu9-5B)Yj?%o373ucV|0Yd{k7Y2B)BC%!aMJ{L5LeQ?KloMs!W2GPpMWNJK z!a0tBPbDqwb}gE!SkL=aUb#~W_Yj_ZV%!SJ&y|Za`>|<4xz(4gOY28nZvD9)?X&U! z!Hbn|e73X01+1ZDgbz=-XX&1RpCX44cbg*>Zh5E;^WC58JXlx{(BJJ#e+F&1vrhLo zV-gU)DpMZ6rziwtWK)cPtU(X^d4n3qD&@V+$4tuqgT+6+@rJK10I-Njv(&(+tNS4o zZH!6j$qIv~3_M^7zMj-q*W^^9=(sqmv+QM1DDQf^_&wGg%xXJaTonp5uEnkP;i%B( zwYiFXr9z;iSs8y;FBLL0ZJ&nH;<5oL6ZHXnXbxrZ>Y>oU4*o0mC> zZyJ!PBEb&MH>!0)sK)*04I~tc?FR{O$9{?DYuYBQh;r=3vUtL(0cw}HT^$&@lG~7b zUiV@{-GIDC;y&6EM`Rikwbl~@?D;tActe~T+2X$d}>{Kc&9f}Iot+bxxOU>7U4L9B; zu-qLA$*c6+mAcda91{q$WXlANnM|$kv98`8di6x$|L4PJ3lq>zVciB?{|jrgU^2LenlMv10ldv5kHeVs(-1U zbqP+Z5ItnUoS)Jjb`H559%-j59<%^1#uU|ScV7~|hfL?=5_%d$X1h~n7rnNs>Ai@2r$d$jv}-n4263ql}-qOK&K~ z*XMS0e0mzeK=cuC(X420xrH~UZ;8`?q77bpz7hp!+-vKmW%N@;Q1~I^RW{1ka(7}p zk0_FF0o?yx?qB`Rp0Y#kD^76El}GxnSmRZ+mf0ox$IHme9xP!WU_6ABQw%)b&wp%( zYV(}7C${W=B~8g5m$U8p40NHq5`Ye7#71|-Am%H_b0P1XeN9Stha6JZdd(wSy>LE5 zmw<#W_&ZJT@>ON?=XKXp(TG01(SJ?4?i^%R=Un6=pmsl123=L^hHR}Fc*3}$yOMFv zhr(h`B>s)|IjyhMljRnIg}H{C({U>DYW0-{-V~Zwsp@u<%3gd6>p|6PbuIQ3$U6@;6 zO?}5PkhVm=)skh;`@Gh2s=<9=*XM#C```+V>)kV2D2Uga)F50#sQYIZ1m(A@ucJb-Io-YOB>{GUw; zf9uoP()vGum-lF|tDt<>sXqrGLColTU32BT%+GE$QZ7I=f8@v$yBKI0+UXTyOBX6of}|%3A+&bMuU1ecyKVMxQ-+Pqaf@ z=s6w#_;tEPKX&&}jC?GX#xHd~fSV+A+3&xLpQ9gi#SHk{yXy41&JNkFxRNsDaX!iz z1T%hE5x+5)f3%`|Gr@jhMXPR5UrQCM&A7n%wTL9<+g1WaWs>!0AOFf3F>RTLUPEY! z9)F|VTYf-VG)RkPh*d66DRE5TZWP%@x2iE@X;Zy$!nB(xCBi?HcSC^8U`Qm6Xv z@Eo*?NZxxBNW*+Z1vbKu477soXcn@xfDWnIBl!s0$=}a+VpLe|J!g<->65_!+Sap* z)@YVF8~UPy>*Ztnc3L|skB|+7n)B?=6Ut9Fh0B#y$jVgOsc)pV5hxN;Rd>lS+4+Y` zDUJK58vLd9bQ-*tEB7A<NacYWCUb`JCfI zA;QlJL1>AXZv&)4V&cq~ux0jur(5mb(@Bg+L5^2Y7}up4|JKX5Rh4$GjtNzlQZR)V zEQtw3N?wdN8g)SwnprXdP#HNKzQ|ljk}B)Ga7U46tGm-0#j*HM)`#=7*OWl1eTi{n zWjj1IWcSvK%K6C|d@Y_RJRPWQ;wjoJHe^x6QR%CGloQ{y9+n#fZ%Pyohxjbt5(p6w zPudU^B#g|YFQW~F;PT#r&OKBor6WXPI87aadDD(mX=8$=rk`Sm1U{|o@Sg5aAS*Md z!`J)#F%hltD*=_UzjR{zoA@p1A$M=OJ{q|j&hu^Ew3+D1YY&5UhF`;PJANr4uRBR? z(Ev2p7)H-Jt_ZTJ%DZzv4rMbXNvH4tk~1EjYJhF7$UybLO^*F`ZdQGUJRLefJrA*O znmt4VW6MwT1hQpgd%Pakqu#m0?OgC$%mq6lt5ka{JX#|xAhS+PN1;wWyB_=4gn#Ft z!#?VksI8DU-lhS$OMW2*@*)7{(_1`3r*A)Ri zVvf=&oZv$@wX?+j|ZJ=N8S~m$D9Hd!M8y^KxaF{8l# zTVQ`{MJ+z&&=mq{q9R8<6-pWHIBHf`&`lTCdOWGr&W1p{;Jec=Kl2w@C{UB%$su6l zF8d_^_-ArKNfE1XewxYpe`yPjQ1o*x)q{Y>D=4(o;7;8hnKRJ#E zS(+yFS>Q*`2;yTezxUGfzlk8FKu zEO{MLDgIS?{mEYS_jmMUw3^u@4kf!3U{~0mn5J2hA`b-xCBuDG?aVs73c9dqKLM#N zX{^KuhGVtvP(Q>U60QNj#+U&_?haP#+s_t|IH`x2A33KVlm5U877@GMQz|Jo`WloV z5&8fd|A+l2zi)jRnx$49xIhvWIOGmiOAiUhmBa*;E5gv(`S(e9Y(Wmn+wceod8Q=M z6!w)DM5g}5wf9m6MP0~hpif8KFoXXe>fSP{%C&75mIi5%?gmMvySt^OMN&GYQ$Qp| zy1Tm@B&8dqk?!u8`<^bBYmMi9*7LsKH^$y$?;j2(I-Nh}eO=di9&sL*6rQu#h3zW= z%K~oZK1`VcOCV^@NI`f_Ad{6 z?rqY*<1<&G$iiQ|TLqD<04l(T#ml~c%79v8-! zFk+U9)le_un<;os8xp2->;Uhqm4Ea;o%rVlp*qywtZ*n9hz;KAQqk&grV8G`^|MkL zM79mm9m6uHfCN_#-??3p|M=mSdB}%?UgAt{9yf(lJ;?%AF9QpHA5Z`F1sxio&Ob%l z@%#OB;)U2b2XQpCmna^AJ!7dF2oU~~ulP~N)yO&K*FXA2v4PRXizinXdyt`wjT@Yr!1zEaJc zgG^V5b9-L^y)dBijV2e<#X}=8wV8!@8y}_oC%Dz{p&ihfST*c=){BV%hQ-R?@beBc z?+%5$D^uwop6JM|;+}o6 zatPXSEo3B#u1)lTrHa-7z&-Zv#k+v7Kk6e^m}=-0^j6T*Go1XPDo^Hd71I^;EEpF= zFze}CpdnshItyD}K{0$gz6?f7pxYIF0xnVg(w;#R4sA~dCu*P2yirAlN9*-$Kkdgo zJ?XeGR8va;?~)o@FhejJvtk?D*)pJeFHh+c0|t@^lbhHV*I0%uD7;Ep(CuHPP+o8+ zlJl7WM0ylWAw1rCM03{j@otZ;MseVf0XRKPZIlbH1_LMi;o>q+ma+4@O&hwYHBO#Zf{Dil13W+)Pkfx z(#&Z3gUd={&})CV9i>l6=gt5Qk^F^2+_bn;BlRNRyeeGvha~fPMUa+Ze(M3e`My#$ z`x9=t|1*c;kPq?DydR(c)K>E5PCE=Q*D;MDuy3)#fg>o#ZlNi5qxRz&5d`?q%zqoE z!g&}+VJ8@DuSdm$VjJ2Gs8h|h6h2m}Kjqc>-D5@_)AEu+zm>{Ko=g%-KT2S&6d*ca z3H^i(ew?wUnRXZ-Bb6{*Y!SoMOh5ZnK+e*j(s&rOC=<@9^gG&sWvda-&YCA{sRSRF z^d(3Zf^5mY))w|Jt8MnF`-?@w?4xx*^vP+^Q0_maIgTD>7EBO@6q& zx3JT1rk8Gi!~9V)Gl=BYy(TpRbvMw8Ektz0j}Q6ciW>{X6|OMR=ML=|;{ObkfK66A zW>xCHo?l;7yO~9NFVqxe;u0C2MWEt{zN6xM6+ZQLpzVn^4YFT>a)=2gRy_9={qI4D z>{Rc7XYI3%EZ9+lx2Hb3Y|01XDTWq=U3U};@$XZkhc1(s3l)-dyP(Jbjr(t1!tQF; zbi6r?wXL#rh+8DTFh0xXiUiAEV}XT z{q{YLjUD1%PpnDb&r{{}78vPPxEBS%?JF&_@561wj1-{k;@AcGe$C z8F3Y{Cw~J-0FJX$IUA7#tZcD{`x_Zmyy6&aGO&&QvuymA!G~9rzXo3?iV})UwDuSb1S;$+Yb^=Zux&?M@{Q!a|iqK zg@wWVA!W#})!Q)=RLB!9Q-)-Zp+Wy2kd}M{N0mODy;K4=h#??Nj1RBclB(AKXg7e} zM`Hxtsk9@gIr?FRbaPptj-nxPE%@TEP~R#6N>xD#Vp(nAO#=8?}WR=erMnf zy68yzf35U?{EeCdux-@5e6)yvJ(tjDzGDk45nC71&w3HV@X8l=z2X;VX6QXzTE>^> zOYqk(_I`k$O5bOpFpKj1GMmW2K-hw!G8d7ZK%|G0E3Nv2&zK9y_=g{GIZ-A_e)2Et zJ#7m%aP`tweGSadBdAMd(5`sfc$~VxL#XfLhec!ZV{|$H-i5dv8Cv1BcWGFriAOlH9 z*4sN{pdD(=MdBCd1!^-|qAJmB>>P$t>@rtB%m)3bMWN>DC^nb zJh3fwLULokA>V;q4rf{2&n_T)itngroJx-_{+wAcX#KYF!X5X z{J{&=Qk|El-8kt2`%rp>H0}JU`Fe3|ZXy4dmNFV1L{#^1$Hxr=$!6Him80K0SU8;!ac z|GAxr2DcO0()z#mCdyABG<9t_k1F)O6m>rLF=(kpH)YrA^ugq#SN#0K0_Bi=SfQ5J z;(V#qV|@g)6e+iFYO3CjYFqjE_X01&oKe2AWuE1Q1#;xDC5cb7DV*OX?A0^VsHIY* zwX8{P$pR0&g6~KH2n6d&-a?xwbNv(kaF>S_LJ!(cm1iBu+@}+-qx^hyUIyu*)m^Jj z_+`nR8uq@=nev4P?9j^IYuCR|Ei|i*D0A?_>M?YmwAVbZ)DdjG7ahU5;?wZM?Lh-j z57RKs?8j%m<+y)Yggrvu+8d zYi{T!hDjGbCDc*}U`v0z1GcozUM1BJ?h{q@a$STs+<^uF1qZcYsNo2sS?mK8LHYFB zMx_XSblA1>8n;%~?SrV{LzIhw3*gAyroQRj3Xd|~42$AMyx1LkNMQE_5`4de!B*Q$HNyI|KmM51o(J-uC+FKkbM0JwObbiL3wqC01?Hy zk5+TyhsYKRC=S4p%81x~5|*DU5c3`oS@m+A$kCyF!O&8Pp_2(rRbrZd5wZ$`&63*#&|1w0ztMp<{OE&Lru57Tnn4}BnhZHkE2 zbf{xYu@!%9l>Vi8l)m$ez{}|=4k-ttPq%Tm<9ByDN(--yJUTt-L>3hPQGxviekORF zrN75s3r zcY!2EXk1m$kC?}{rB*$y`HH!>h0CL|L{ftwB|qZ)sDbmh=6YD`EPHs5||TO&x2fgE|{MB z_0ri%Mqe?{Zn`*eeO|4q**bA62wEz=86GL$BqpZNyj6vLWKYpZrHZYtI-VNQDyYS3 zY556Y39Cu_Ov2D5_BZEXL|at%I@V95^Z^j6f&R~hali^QjQ20=;27SAzl;d>8} zC(odG#X1l~>L9WlHbiYd{J}+&TNV}G z>l%k!vP(g00V2>s#Gs>-$XAqv!vfahd+~+p$*05!cas>r3cC3tW!P8UO`o9|E`Nls z@IsZ`;_S~jBR}{(7&-Lxm4URfEKQ$tl4F>uf!;5pHvx2H)p^G&@3ePjQ9ZZRcv)@U z%A$teX`i+Qrr9_Kd%t#n`(17R^#xi2P>=quu=;XAOQ>}6;R}4uPb;|VaB=BK;&L4$PH8zv@Ei%~D$xVks zVGnpgO%z5^l9zLy3A3Qqid(IgGX(N76flVUlHt#K7ySQ~Yj}@u)m$5UPuiM}9pC_k zT3IQ$aZl)x3&}8~v7E%2I;43OOh&)Xv5Pf+rB7;$^^N1OU+{3#cGt<sx>G4_$lj8L(wCWWIu)e6i~`&0fW*#KDl zj;`Ybe{=xD9LwQX@!y{uUs2 z<2fEKZlv2Ho1k;?nZ(yuHq>HJO3&m~C7$(eUn}jJZ!19z*@t#AnPqweHuR~Z=wuNZ z89_LI1xU2jaV~3}0SQaYUe1r?=4{X%rIg}}fXxyxX`dQKZ9#V$@Fd>dsFE9?;ShQo z_ynT2dmIN?sa%V=l}m~}zyLm5l{gu;w=OGOyZt#!v>&^rXhV1>VLsf6CAtgQ05h+G zgm48<&ZpPvB8htpg9_(X+Z5Bu*i=`Wk)BBIxvPqhOVAv{(o7fU<@uh;pu#7Q5NzrO|Ow>3e>GI{t9Jil0!Wl=ksHTi9Uw zT==F-0P1C_7LazV%sx~n@pz|$e*z3HS90#zPbU?xJh5pGY~OIefbd1FD?egjp?)7yz|YyEHljYmT~0f zxp|6y&^{G{LILck3r1i&>SxWAyUbIAABE7J_+yZ~aSJe1!*3Qv>O0|oAs#f8;8y_T z+1d)=V?hDCrO1A|rTQx#F@mPf!xHabpm&4GPDAA+?Sabvxs66IHnXjJ+R)C+{K8Q{ z^^c%^qWy*|y%hQdktn#x=4RSK!cD3?528FohqVZcqV$LgL9)knaRRMaQFslLq1So*tP6_OLH>BQSsG`EQeKg?l0~hwB5I3Qs zfUMaUkbsC>fB%}$dNjMBjdgol)h71n;B(G9u~v{!7=uo(##uDA?+zyHMllrZ+ss?I z&IOx9&MO?JsuMzHr*^w#1QmvMFYQvx_`0!Gfj__X-_PUE7gAtM`t7?N)t?)}=L1-^ zsWU)W%pq~x;~cp9qd>S;-DiKaj75d%A)-`*fOed?|C~cVn6ycdl$g_y)7_jf`Vi}? znDwFXn~CJ}@0;nWTPQ!WR-v4OCHiSLqyZE|D<`D$3r>DAZb#u&6c<&%5KrAE0axrs zpl1SzZ3WZJ|96~gFzL6vP!IYxHW9@KuZ@-%KnHiT45)Hi`PPY`{SHwZ;EHo_UC7OB z8!<+YLq5||0!$VEv_F(a7$@V3Z}+sk{u;F-RfC*bV9_r;>8vG*JkimgKZIN6C)&lb z+s#VaAi&=p;s603m#+gpnR3nbG)4=TUM`Q$=DT%Hf~K|=p0fvslb6tWEF8YH)#nlY zLz4L+06*jxBxsn8BmVFX^E=p#HVic@p(a$NLnLLT@dNIH)a9p#@C*vdIGYOPs8K?B zUB})xal6gdu=BSW&CgUg@rUnLf6rvqs_lKMh1?}B7WY8nR-6rRQNRH4F%3Up`R}HE-!ID+)vx+ z{~V8KUog)kncN6WG3Z|puQV>t^d*0A!xq%ShzB6ZFz?&;05fZ#N*BGO{my2?Yh?>M zp|==a8_-~Uv3YJD{X*^^dnbT}_65qcijNe|@A};hUD#mQT0gf=A==i&MeF(SFy3O4 zV^^)`o>X1UM&;5U1H9E(JUuH*R=rrq${xRN$?cW4c2TX`MDt-F?TH;{|24ro8HTXh zdF*x)rSGXdC|DgWe6QObO&iwWvbA{xxrAPWB{9{QG@#}7dGst+lX#tg05)SE3y8kCyPOeoAa-ySwZCK zt?8Nfo=nnS0Jgxe&=LJdY>@*VXlA1r{=H+QguvJ$J;Si8tkF|O@O*l%!6XLh{7SmY z;tNR`{wA1s$!CHbQ0w%_vq&y0vWcv)vU-(e1HX%ToqI!z%z=JqH0q5henxq?p2hL? zHbcfap4{-*j~~hD^p(I0HV+ao(uR)4YJf8da_F=&LbwK3=--17%d3u8bg+59OkraG zH<`j-U!brL?15VN%$69Ii)&-5)Y-gZ-UlG%>yK~?S*3m2#WL)hkoMTwie z@q5akmSJc7)z*!!a9a^@$ZPk1$-n>Wi;n?sf^KQk-Ab6 znrS}Ul$WlVW5;R>z%y0)2n`DhYYdElXVx@06|-@0BywUT!Xj!2Z>Pb(nyEgPXXrxu z8+1U$kLJ4()AIS?^?@R^)7hNxs?@l_>XyLKfM#sr^Dv82a^qN3Fl;{3guRX_FfsUq z{QusuqZpir6yL|Px;n{e=aQ(JMe?{st)kjbqPCRUkR`h$5Y0d0z09qi0dWRr$c1)# zz~u&?>VQ;Xfq5h8qu<}~zq|nM6d>4?JgP9=`1KEwbiQMgED?bbT!l#{7x$085Ie7{ zUctuH7I6;L6><)qfgeH&^fd6YOw}f_te_;y?qIK=Zm@_`wD8WyqUzW=H8X=60@j-% zVn|^{;PNxqRd?nvy%+&3YCZhK;0@^$3}A4JSkEFXA$NCXE#_L7zmMqu)JPEO11Zrh zcVX|quD_qiq;Aleg9;Yf%ap6URpJGql~&AtZnO_4hF#S}l3VsF+&iwfO@3-bxBA(8 zalDUu6cK!RrpK%p(3cBtO5uxELM`j*>OCA0DzD#9wDE8*NK*ChyloP!F!Z7fXEORt z_5RDib0{o$XdhWJf&JGz+Tqb~<&n>%kAEDTwk+z7&n#v?imol&;XxfO2W`U z3JtZV=fn8qT;Y=3tm?VC2wR`nn3%Do-Tv;1us#}((4c+Ia%d6s!Xd^6>!3K`Bd(4r zJqfonGJyE;-(UpVH~*V3!u!V{kx>yYW~lC8&aHz#oLkp#{!mS#Wfr_eZmcFl>}DbG z{UCyPm7o^HNMy)Jmzb7IfIkIGOl<(Gq}Em#mMS`!5agMz6~5@OjdSgN+N0}gj_r7y}w>_@fe%nl>4ohUqUgb`?1rZI$Iiw> z93Gbcy{Lj@KmPywyiJ3X&{mcJ6!5jZ<58FO_D);4t;2UchvBDB8lfYxj*9R!$Txg^ zq-op;^43g}Pk(xK+rbF8vDSntWo4C8%~m5_;x^DMpu@g<6p!#Lr3)`K!_CWb7bnl#-aDH`w=TI ztIK8!Yn8*%A+_n(L)NVnl{C767Kcru#QN!7yTP!-5e4ov0?u2x&MURJl!FSz&bD4d z=!~qaXj99h7m3Ko7YkBYUOM8o8W=GRXV|zX1c`~R>R+EvJWy?kX7nUVeQV&gddhun zv-PAWx%Yzv$fS*sF6~Q<J#ZvrCFgz*U%PcJIrVj0VgB z^(muq41ZifdS=t@%tyr&zJf+48XioXIvX^EMD`}DjD^rfeVw!?6cdA=p%3*mopkqO zaIS&^TV~^!sIGVc&PP)Cw*(@bPYeU)BiBeRF%1EVIvFwp9xe|2D{{|^KP+w{9p{=I z4nzo%XH38pP^=sb%qHaPGTFhlEsx=EeNJf3s3KPsz(=RL&B{Jwg{GHjnT3Fd{2n-p zEHvr=>yv$jJnvm)1u(B{vpi(yUs@bc@)%p3n8d~5R~ri4rZ)fR)3eAYD=aeSNSoSV zHA_qrtpWbNRY~4h2u}(23&g+wL5c!3$3hasIHmUcExvt7#eK+UAMgb&=X0in4tK83 zP4b(v!rYp83o|3)ts69s;Z>vbwVJJW&q;mlaa@dZ(H2Ef?T&v z)i=4(MBm#^m+n}18VRh~JK^?0>~o&|;a)Mb_S){eDPEg7;ftkB){4tK2F#M-jTqA` zX|bSuSD~Yj<@+dsn`;${REN!vY|evbmNEfVCPTS#`tAd>;lxv7?jBBCVqTUKvT-#^ z@80pM%}l=H=e&uFi%ANs+-FWQUbQk)diwS2G@Ti#?QqP2+xC%7|6Z328BrTzHV(`~ z?ZVo7-D*(m&SbL;|t8Y#5=2n;c;2wjrW9I~GE zQT;UXwXYSEHem#C2a?XCH9{Pg)A9IeY6{hoIbx=lFu8`t-fi5-JK|X!?WYfyI=flC z##_7*arKk>Aa7CFzunX{IhLwDrDT*+EXi*yE(PBhe zDr$u~_d-;EF*}09t2AqF_q7)(|CQ@7;lfy{WIYN5y4-sA05-1OSBo{4;(R1psD7W9 z>HqYDjwK!Wj4m>t>92i2n&ykuU?|Www@neRt5;s-RM3DR;5;Z*Sh}f|`>C8kr}_)A z+dlX1&f)Xzmdwf!Ao@6Os>j`?f9a|nv-<7HxZh=_(r_8ovQuw~*|^xz;0B2u=IO*p z%a3c?Nfz&!hX@tKk-!LJQsgsz22ClOwg-WhheOB27e?4RWt|gSunVK_WEJ$MbH>^^bF^oofmLF z(5DEt``&NjjuV#gN4vY3Rm{zw=d>%sfcp)x*3-lVt-XYAb#)i34OTHGG{@N9P;yS0 zm{KAo#-Je!3FEHwUTQ`f$-<%H^B~!?)Vc1@(&5&xX^#)rltM!XKw0_`!WjAk0{aVx zg3iu#vP)oED;av#h|4Is5kS_JYZck`{<2+_JGktBT_0oG5xA|;Xb3eQr1{VBM;SceX_W%&TCU)aDpyszyMwwjO(@x=7k zWl-0m!FGKpv83Q}tp{I$buRdMGG7=MT+geY23(Lx1B<`e)C&GWjPlu(;AuEDP?(84Wh%lgu15fe)rd~&#pgb zC7GYTzCqpkG%;i9JaOZF2-+v185NkH9UF{(a#DUt$hIcp$7l2UCG zC^({~Bx+T4ai-(r+Ci3L*h(s1_>U{@=W3*UBSf%?IhHH&*Vxqmea2;e<)RU(Hd`}!t-;mfM68g;Bsz3+8 zt`jzbyslmS5PHZ^X~aDH@a!=HA~FJ67k9W@pw6oCR)j)BARvC!qq2|?iwbzzdTB<{ zpC!1if5x_!rL=@=#2^gjUT9kA))$8?5BP`E4WFE^noP2%FN81y{)XZxKiDzjD7j85 z0_*96r_X^>e(yIJdRmqs1<%^X*=gYLTy3N@lZEb$AkqDZ-1XCfS>|V}DFQy!nzJi->?(oV*Bi(a?1g&`}qi;8aB=Si{LwRY-EJe3Kp zYz1vvk5w>uMyZdhuR5&AMDVnGBN*2HrFsdd4rg?GZ(5qmB&ER5lM4qdIps^D)?U zG=OB~<-;DDH>N|TSkiE4JA6tE1$>(zlCq~*1lRl{ws-e(-A?IDI`4rSlk$OX54ex3 zAUAhvVpqwVe zSYjaq$|*O(*IwHmATLQwj6|1im+AH-;0Z0PJ-i>Y4m>HrTM}#5`p{1(Z4GBgl~~;H z3bGx-y5P(eLcd(5?U%lhqLchiem3zqFu#Z0aDNTE(W6rSvY0;Ko%Y+Q@Y`x!ec4{W zB|<(QH0rm9rXbd*z77bdDAr)%R0u^_CnA)GB=YAc!V&|sZrHCs0+B1#e?+cib$dSM z_Ceh6Le&lfUDc;$jG3kHGN2SS2~NK0Wu4R`zVCgCmp0k_wfUF0BUiY zTI?(^Vhlp13(R_=W)+W0DjxJ##r!vYr0+A_QNTherHb>{2^Tri-v5(x5urW6^yKUG z2&||o346((GP$rI6o$N7hBji#G1mtaEE!h8`>w3!?$bmo6)AH#nPwez5 ze?HnY%aJt-=-Y|p`0ZW^5w7+3Q*55S1^U5S>AhlcnzUojDzA$R&WTXGDmj&F@vSr& zWC+xEQd##_-VX%wNgSPPO}V9sC=Z96+s;eV-zkVv4Ah-T< z#q562K!5OXL6|j5fKr^ms?Htjx&~taX+^lH)J#D%2cy4cEyJ|!LwfIHp z5z7OL^|*eu8C`FFc3@BLLTG?-?ihKoZ;v4H<~&b0N>=bTn{BWFGXR)htVKG|&davU z_6sqs4E3?JOMsEM4sLJQfaR!qg?BXRX9-cX_0@oU0&Y>1ugwYqxUML`z@MGa!;DI9gj)~Vsa`|y{$wb%d!gk^cZ%fZ4u}A#zh7J^=p2ni5UV@0fNu#J2*IIiw zd_Moqn&TF8_R{fkRqreQX3qEfr8a&G)w=H{4m|gfHjP8c7kR0!QHM>S7ZjioJry%% zYmvB(8I5Hr5^uKo;xzBWZ)_G~wcH&@$7Q9Z){W~kZLC#AMhr;)Ci-#0D~drDl!o|i9h@i}8rv9KNZjxl(a&LFL_9-L^ozH)5L^uiPi;jegOb769;y2labJCc)8aIUJj55kO`U!m zb=Xy_HQ$^b;m3wgqQJnubQI`K;bH)*#B4?>bVfK?pB-9lfdP1c_1Vt>R)}8%tmR=9 z6TT2Z>=(e=eHAK(_T`hmtNE!=MjPsPk-LkKXf2VlRzF*!uP|_xaNJ);1}SI;DT!DC zYj`i}Bs%zfw|c%!mWUZn-V0Bd$23Y6jJN52)8rRhyKx^Q%&cqc!abx#sXd`<&1FX! zMlvPXc|S=rx3jc0uY&kWS4+Yrg}eVcTEfG;5B2pz*d1JQ8?6+T?cic4Xo4VithnI; z6E?U*tv5W~NqxX;#(q@%Q$dmBrHtD^z?i4wWgY9iYpLIEQPOo)mhCrGp|h=q^iI0r z3u(Y*Jg>Mb{U2AgEh_-ptvMWZ|9aOeBLia~$Aki%b)8jH^%_Hm z+g6@iEA=g@3IaD_nb({Y#lI$Yy*GE;UdmamO4=zNt8I z=(=hZ*S2XW(2COsZbdICv2yJB9*4(ouGw}vUNy+z9Byd6@tNMM_wG$;)G+TJK5#VT+N4NOy=r;*~fzr=~5ACO0U6eOB!B=&&Ba;Tgv;L@2in zC$f2@D9bAWI{NSZtQqM(^^wt}9-5<@*e2-&_H-3WMj2MTr(&I`;KLfY5;NSd+)hEt zH2pn&?Hb*f9e6I-UhMq1OTf|u9-YAR!|MAga<~^J$&uTJ!d}3!4UF!xIoW5TPlXZh zK9^|$wDzAw6Fe)xGQb%o!Dm7Htv!PW#~T5^zGDeDe0t;lMiT95Ztv67Jt?1L7sHQe zb#I^3vfzxm>Ard7-*QlpDD^LQM{6VjKKgvJQ!g`~OjCKc8$}51C)bck{OglNAuql` zUEVWKBxP(^$==lOW~t<=E4?$#q*c$tyt2t&tUGR%K3KxxG20cKFLqYuRKVAlZo5&w zggDT0aNo(}Ja!?47`UJb%|C2g@nHE&)^c)S?$zQRexAt+_GOa?^YZ zfd`DC)Q5&Oms5$ovCH|lrfx2)OJE1cjvs2x z&1f1yrZ}HgP$KV{rn)znXO|V0WCC=Q#d^Zs7z^{zkGxbzBu@bs0o_5?gn?Ho8uD#C zAT}>u&r8n>VEJd>&F@*~q6)-Q6*+zdS{B&7607NxcfWd;P%f_;wH=&1;vo6yz#DIY z8^VVNUmC*2XoY)$5`J?=$ylresz)$!Ya~3hUK{A~!tKWnj@j31HR*6^ko^X`ccvDJ z*PFn$3ee1Uk&5$q5p+M4d3@Ui^VJxAS+n6i5Z-=R-K*ikv6o{OA173=kQnHiUO zaB~K}usNh`-B@=xkXt_jbW#0#&Uh+9BI(CJ`4t2(qrvl~8=_STE40Tuh{p7F+JTi@ z`)2{jw=1NmR1D?rLtd;m6gz&f#kk`_a5#3O*i7MRSTZxcWey%2J!I~Y2*+UN;?xLW zE`omZr?#lv=;UajWB`?l0PLWPYjj|Mlp8Gz+6Ty&Qc?&6&sf765l{{kX{x85eL)N% z2OK>_eteXpb%IK$Ok9!teIM@z`Rt{CP8b|>(f5@?8v?}v;Lv<{EknXdv9Tkq;1{8B z)is<0Z;&pe0JQyfWFP{hNyeNf>#1d=L-lY8Vo%DouRDvq6lgryadz9@*ca_Sw6dFS ztF%LYxKD>QI=w6==5cu5X3T8Ra2k(ZeHgGWcWf%Jt6(!!*%pPwl#TbeyCyNdYvf8* z*L|#8bIc=jdZObwU zxm3_gObJiFIh3S$GY5AWhb%nv&qe%i3A6H-FM*?;eob#CG3J+0;fulIBQd(_<&Wyb z7EZ`(F#Xn<%eAI)kYzch=|&gH>(G0f?sauJ$Z1ReyBFe|?L!U!&9dWO3-Ncx{f1rz z4TP|?1OFntQE_rTkfGt9DJWZt3e(Um~)HU-Yc9}nRL81N0xQ$)oJb>G1c$wYCm=? z@*U!Cse3v1-)#E&0x8vA`qnp7Gs~Neha95%-wogKT&H_3CW+2Oa}2f#K8QA6y42ye zVN!?YrGbtjl1(mNaSn2xAf0V27?24*2D-96fia||v*Uo~M%i0gwNo^L>$OYjMM#P* z zpLD3dF~oNtjBk*hFXQw@46-l}*tLytL=%v0=o2We2en67zTB!@zW6aM(DQ6p9j|<&I#_k1o^PFQtJ_ne{vVCNS~0JvqT_pEzCC ze9oX4eLu~Gf*x5Wc!F#Hqz?;%iro@YHrsv=G zN6dQQNXD9uhS@SW=FEtA9%m^Q41p!dHt6E)Jf_vw?8h_TR4?5UIxMgzh9B)t;0OKO zsk3>IMSoa@GIk&|VEh6-r-QnlhBAUiS$0e}6_gF7p!bT2CU`HK0FgR;g%<7f$>_IF zp6!gbPpmuT(b4cD9kw?+M=<4BP+Ymlc14H5O$ZXz`GJB<+8f(M4k*H4wUOdQ54U}yw{1aqL|K8&aihXM1XF(oaiZdd>@czM}hHuocY^9zLz?2&{iK; zt^u;G!Z*IN1V{%^Lqm}TuX9Mb*JZ$kwRTm_d}jLlc$>=8^-)-kJ;Gtb3`e+dOZV@2 z7q8yj=brhyl;nx#PU3TpG%t#4EKNYh5%cYhQqP$@JZHU>OeK0G0fkJn=#R*s;*dz;lu{DC{5$RdBTIv(%%vP`qgTH{xwR(!>A=)FZ}t4_r{Vo#PFP z!)DU_vZW6@t?NSmaNa8kXMKiB4YU&*weCApGfQlxt)45OQ6^riJv;4?z#F;Th|S@( z1gIMwa|_~C;3(Qs(DV;u7#V2_lfrTMg*uxA=Odo(2K~V;>z*`OVo`ptB1q6Jq_t!Qk=81+5#cD6*i~N-ZUXZVi z__J3I8~~yZ5Ix$7bH^Aqvj-;-OvHlqRt zdPe^k=!w9-x=A93-CrAl$Mb3z=tP~Xt7c|hCQE)<H%P@!so#t+at} zc-bZwd1i2!#d}r%^>)mTu2+oEO&!Pv zT3&~Jo`D(pMgvsxp|whu|Detjc}i_yshVXe1Bt42X9J) z)0D&q<-L(6#xfv#1hCgAgMA8$7+IWuW&!*gR{D!HeLRyF3#fo8BZA+GO|RZN>}EY{}X`ehHtzP z`o4|o{|Ug5aNME_e#bAqpf#a$3!{i+VODp1A1gMT4<{WBvh4V>s6~#vD!)PxZRh10 zIe?(dxt5Q8FT(T`oH+p?!~xviV%m!suAialTwo0u8v@9j03<=$5L>-3P(bDc+4zUr zt>%q;zqdLXCoXV+D6Omn4?E?*h!&WSI8TNH6UCVf?nI13jnu!~LLzKQ)!J!1+G%v| z4%zYn9p>E9R~xLo9K=JWJnm1~yWgZ<-O`han^)tCrAHtIQW-#uFU*t; zw|-g3x^+JO%^SY9ZSxwvdb57tg}hg{DRh%zn{N9Nh%0E|7T|SnX(Q9qo4q(Y;uyOm zFflE`G1UNNg+1Q5RVVBr!j90%;Vw#=dh>QJhpF27dr_$M5p>!kpZ&#k!%PRd;@~wF z{>OGCyARL;^Q3YcU;X9h@q$$cQuZDT58eUvR@2R<3(4|5F~1x1)U@h6vXOPli!-Ct zjyv{V852I<^QOC3PHG@zBU6g9!TW7Yx62!ilnfE%vpf5&J1*<@5mnXI)nmF1T9e(S zwWZ-GHj_e-I82}!msj+cqAJJR>fN?2lk0#~X^R@f<>xx%T z-^Jt594Ue#OCNM=A@uO4QExYw&i6of{wE}pEX=**bf%9Xjl7LFQ%6{i8#$&42?tCM z7s%(|P1DZ8ZeKR;YqX{BS{wayWo ziP=m_-9`|rI=kWK(48Mszw#z;(cK&MZm;15pz%S--vDD{}Hi`i7Al^1q8$! ze2d6~gz$gg@d$_9&H|f`Dgc1Q)=1)YVPS7TfE=Cd{upS9LMW+ARlq6p<^@K`4-_Ye zRRQ!^zI4t3h#-<+e^q#3YAp_!S|bZ0w>j3QdHUcD>wyZ>awY~UQec+d}B7(4D2WDRaFbmQHQ*~DG5RsI+P@rFpS zo9x|fGUJ>e-T7LicI6!EcVVmz^i0!wYghHr z({Ujc?~zYKz9zz=kOp#h;q}G2K(F66KtE5F^yumrO$e+B|>Mxu9K( z8~)8! z!%dRsT#O9ozD27#e8ie&VMc{bw!amZP{ThiYuvZ2OEBd(@4VOA(pMOWt;N^>)6vH7 zB|F6enw~!zTT{Fu(NCqc|4LJ3;t(xUt0KBYOFB@iZPl|iI?WyCD=PbrfKhw8A2nc7 ziAYe0oE0PX#x(wlbB>guhfEs{Sw|SY3ir_9Gj7TrxjTy;cSX5Gb^9=t3)#o2ZO#DR zP$3kN?d%X?-*Qo0qE^k35zNzeuD^&rypRi=*^*rE$#qomXHbC19t2}$&1lm3E_&}C zJu_vEf)nMtzfK0gI-@>^sO>5j)kP2?5(Rr7RXVtQp?2xMh+s7r#@BsCyh%pWQCohzjU6p2BL!rd4&QxYlnub321GIT4#sM> zpFlZ5gU|k?r*6TU0a<-NhyEN9`jO5~sd-#kbxk#BoSa-nAIf6KD9TKeI*qbf45oO! zUnE-SjOdfte|uxG_oY9R`6_K5l>Pwd-an2+n`>*PjL&CSE9EaLR&$(40a?=a>kf4bP{=^1rbcr zQ8GcQY=WOPfyCQETCc?Y<#7Ot?-hjN3xst1qWB{3{J%i)mCg|KGF5+gU+rH1wJwae zc6Pg|ymvdOYt%Yn$w3((#G4z*)bRC7+)p>HovmB>%K6gf}&8!xEHC?<5JlpkN%p8n-Af0kQEX9@6u*9d`^~B^CktG1g z@qbW6E{5>7Ea*y32i@6rmVVT)jF9ws(O6e=Mv(A)+8n&~TU<9ji@b1(8g#~(4LyLt!WRPPqm!Tq)TbT=~liXz%2mQ2OA`soGF zrM9zu`x)Yc720q5I+ed5y#&%9dnD#&qe=63m?zMC@2ovE5B=i>b2oPmjPs@Mus zSJkA1Cqu}tC-LYCMJ#BL!Kj7CMIZO#sz1}-~4{+Cs{j3jBv$jkl^qjtk`?AHq;g{9j4euRHiHUr6 zfI04uieZFlxB3gwskQj%g26wCQbsS5V2D(Po(V)NBxg~F1j0ke_Rv8SUsCl2Z&O8R z#>5yc8OLX39D2vMtvSO!MfMMR&6#BvM3Sz(H@*zk-C2LT&u4aVM?ULffDdYr0$G+w z=B|Bkb*Jz=D!W48no}swWNRGLy90+OAZrz+_bsJS1dLwmVQu5T8jeuulhPT*Lw|7%9`fp z>nWTX-Z%%<_i-`j_o1Vd5N2M=N^ak~?GtZ7lXcelZ%uPdb3AjMV1`rDZ&j5s>0TGl zYA>y$lFsEg-==$=`L~fZ(589$9zNc#Ns?FfYVhuD32}0DfC8hx(4m80;@<9<=l2POmw+H2X6zSQf z7bfXrpB0*njJ1PBf!My%>m^++@deNfLI`7HJ*%f0?OX&Nx721OG2Gru` zKryB6G@V33TE>RQJE%pUEftmX!=mYn`BRsQX>{OX7_{`MvWJZYjZBYvoeqf<&X^}OQ5HmwfL09o&&vzG!R5}oU5i6|0^@DW zs7fXHu&#Yog|CsloDIz8d)aNwmz?YAwl=tyP5O_QPlL6}5@vY=HL?V%igMR3zdN_q zA6s#0UtSV?*x6p2S?mA^FEt#_AfF>t(9jZ#!-kCQWMP2)m1 zl%^v^okV(zuB?>|kp0~53cXM07p}E8u;U^)wc;!1+*F44*Ubi4m{)T3?~gDA2H5s5 zxuk+~pOvp1)lC(rX|+1EbskmULc5)GsJC&tcXG{HP~WU5A&!;3@7qby)!FHP;{8xV zJ63=_5LwL5A}w~_3O}RXG)FQEqSpC1%EElZTz~fB{=68GOeNVn@#JA1{KLWFJ$(6! zJA%C*kjV5VZDr=*Xyxr;8$lwdu_&*Xy{gMH!TS>h`<3RQh(G1yXhxG!4^Er=XwWrf z(_!h!9jA}(u_iPMV~15jJUFAGBK5`BqQbPLG)1NX{QmFPT|_nIWr|ew+V51d&i0Hn zHJ)(4mg(q`?cIR0O?5B?*xRUo$=<#`2E*Zoi;>HR0OnAT)h&kXSvO`ch_GqK*6d4! z4-3<7OQ9#oALMbjd{^>BeG{T0VVsYXp2l!^#*JlP?F>XlhQjF~1KK4yAI_?Rt|%mb z+iqW?XHaO=JOOs>;x49Qk>N4;Jfhf;kPqw~(Ip=17;<{Nn_H;4OvR|rsA`IO7^ycF zj-}s$uA4kq+CBKd#`OjQ)_+gfb%PZ4TLYJ-o&5?fo2L)zDb&)g z63-$?X2ki9O7J5{CYFdI56n3G!-lLHsUPmbevEXLZ?RB}zFO9PQB-958IYc+B`aK5 zNmF*1B`n$XF^?!M-Ezv^g9XkY1QSztFHiMZlOGGGPrO@ebZsY&2CC<=n$EJ=3i3Jy z#g+iM-V(b%;fL7^u62s5fdI|0v|9Wv@kj>*y7CVo)zX$ep@5%mLj zqSs^ooB&&6-_<+ZDLpTY9DoP#q@i2x{!f@bT5iw)%GVU=It#vVC-6IvSvNL-*o0g1~pD&f;^gqh$uAi6WF&H9kv-$DYz zZ%o-nkq~eyrVP#~M6G>IxsK(ikh)B|Dbd3HQCyntBVRRqXAzrBvj+5jFDx>JhBaRB zMQmtog`V5D+o>b>Qh$H|p6o0JBa^hr%$xO*WUt;fuJU4Jhl@F%2lbTVSjZ-ym)Guc zf39t|?q+*AtbuJd*SfGy6rPW!K-nii=Hiu&-SE|g&v^mH{Bn#(uT zm3f?ZOqPgf^y@ijkiP*Ea19mG-_ZMCNDwnvAPkoT>WSa~<_q_Wmuq?z;zyjOuQN;M zc7@=gs~c21r}-Lu*NrXrM)xQc#0Y%=FK1-Dmn$2u0>B>=Qxl)qcc&&Vnu_W({PFOO zChM6SHN^Wg-$7J^B;4u7X+53}!ed^c`V*?+2yv&cZ_~Xv<4s_`WmM397nzv2K!kZx zyOB6Cw_VO-Z#HwPw&sjB#L)DT(b5w1Ghj%>tjN^Vw3k!8+Bo5oJ@Of)S2ieJi;*{eS?Lf5g*jypIdBlhA?R;nT1l@N7;qUb)55z2!UAwP^j zwiJjZl7TqmS82lnWM$B_p(^Pit=_DO6tjpgg!@AsqXO(~@#Wv_Z1q(*WlY52Wjvr5 z)`SxWl3B=zv*}u0l`w#0mLO@u*)(t*81>W1qv};1EFhSoql5F`ZLvZU8xK5)4)g0K z9fTAOFFRT3UwQfl*#8ms^0m_KSsrj`1@CX@-(C!IdWmUcE?j8w2s{?QNqV|M4iMBVzp#kB zEg<>w>)qy_+Sm>5i=FHmfwQH=h4Bf)%L7|1v1v0q3Q(ne&I=ZnjkuP^%r>dP1O#8wd zzhm5mI(Y&Eq$+QUQJlJ;vY}0>jSM2v0-@7$z~QY z6=18DJXoBqL!lW$-7Mplp##C7BII~ygh)Jaa@r}T-J8v}qp?8m9B%UgUxKk=0hNf7PHyb zO^3lKsE1`xXXsRNhYA|@SpbDVTV-LQQ)YOW_)uIAW_-;L=Di+pu#dyiwlTRW@6RsC zZd6lPx_fm&^#HU?IZFcLp>m;22!jvu%yL9BcX=glMXPz&c3#c%*9^2>1}I^`gh4o~ z_V=#+D{V0hL>L04>>&Lc-bEPv6S!@ma~|yJXBkrIv|VF(Jf@#~?+G{ba!Jwe?IvQQ z^MpSDFg(@I_+kpnj*f^{Flc+GI?DORHwVpODf)Swd47WB(Yx8TS;*J31IQOK=^H z2c^}RZcMcikbq446<=Hu#@1>b>~W5?LW#M$m8 zAmwlE9*w8NaU?N#*gm44N%yTh{$@Fuci!6D!`qv6s_o@JdA6uWsQ+-l{?)7c1QEyn z^Jz`j4S7|%A!Pk8qezz<762f~8F5LEaqaD97ZJm{SQR~NZ}YJMWt%oLxJjcsg2~g^ z>3WwWQ9F&4A*bB0?nUo)zM`RAPUtP4XEhKonE#_QsTXi!snlgf#Nx8#i6q5d84SulyP3$e&;^8l@hLnsDDZ6AOt*myD+#x zoK>tOYdj3(LkZ!SK#2?d*}@SZjd!IJ_g^B6vRjk?Jj%qdO0otN`jo2Anjq*~s0 zNSXl2(ZUy-_8TWgTj=j@^zZDjf2{wbVIay)n2)=nzmLRIVvQIeU;S%@&?l}w4+ZA; zweKRbV}PAE6alUUez-ZbMVOSUmw6?~_3Z~zlA4~Z)_x{HAQ@;(9a|Yz5=63QlhI@W z4Fls&!pB`X=aEtPwA8r^Q@YB=%f@FpI-Al0D`A7qn90$Fh`s$m9Of6P$|PLHdGX`N zR+ae{Kx~7BRRO3LNp)>}gIIyYdOC>Sdb$aFWW4L6)Eh20iSM|7WpWX|Mq~#g&Yb|! z(E{dZX!^@i+zikJz(%^8g1)0_=Qcu=jdmW#s(uir05k{((anB*;pGk>1+k->SiTHl z<5&>%0u&tR9}tjf=4kr_&*4Vq+h_sEap&Q#9ya4PXTNcFS7%Tl`<&>{+MzQ5Knxrf z@zla+HlBqHvgn336tVu^0RM5Xs7XR*i0|k3-u)fAR)hgQYD_m`53goz`=LQt6Kw4P zjh(S0w-@U5k-Z`JtXTwrRywP0eB1eS^-GCYf!{}JQamJAsO|L*z&wo`s%j+<2?@QM z$Ki3|FMI8Ue4>=R0VqU+H)s$eW}oY0Ag#t9+!m^p2=v48KOt!~e8XRsh5LUcYQDhn zdG-qwodxX7g#bHqDnx@=C>Fpzuy+J&B@?0tR9i=qfFxf^PHO6TAI4SxY(xI=ir;Kixgzw>~fK$_&ho(={uf@@{-ZXI zdO7fyFZh9=!XThvK$AgmyBfeAWnh+L`l8D5Mn!XA%gM>fwVy$EFcp7BEx`gA@KkG+ zmT1N51$M!0Ng|c_^O3ke9MxETGAUM&)}pnEx*Gbv8GSXK7ZLs&L6#Ks(bVIG#DWB{ zRiHY6t#TZQ3T@=S-)0HqXO66JIj4zKK0$0Td(K^eiX(ypndHN3(_q!2HG)D(3!5V&NT&fZDMZmlguirIxhOE!I0g$@39u zCNu?z&usY>pV>_nvKOPY1&afD;$ZU{q3xI`{fYE+aNM-*uBfNi&+4J2*r$~4Eeznp zBeYsJf$KKM# zS9C+Gx96Fv6MW1qDKQ*M3hkfWfqas@^v4?l=)Hp?`_qB zBc=C$s-Ue{MC*!ogJ&J9_h{ViaZqt~re(CCsy)QEOp5CB#`83b6Y~0j7Gd@ja0u7gv6%a@jh*>5`i*qqP8MsTiJVU`7x2LEq6;jc=!SYHQXpw4!9JS98tMtOI1nJh&7H<99f?45Xqy;KbCkFvmvz5jAE)E9MVGs+%C0Z#P= z)GmezW;zbm?!_r--s<(+FR>7ulIEp?4^mi}ZcO5~hOBF<0uzJNt_|J{3JrQ~eqsLs z>?5IQfr854TjVbaG+}O_Jfj$lu=-tb`$GSENwk@AR$hJPljyI;_?Mu#erO*ZW(lV8 z$j9+oB~XlvCnOIr$lyVUslEoRlx%`b+KZwG^DNA=5~>`kQ^&6Zj3uwN%K4p@r2PZk zja|JB!{s7l>jr$sMq1MR&RD{OFuSR`+n(wAjY__yA*Q2bgB)wWEXx$Uh#0U5KFpU| zN`>C#_pn1j-~|XU50u#XxV^-5i=hWDuN=r)sFZ`qum$)_F_Vc!+a-1Nxfj26i-Z%? zK6TTGrT_?LT;@YeWE-h3uP~p$#ps{K0YsgMBwp7{^U0#%usA1)8%XA#tP3Mr)gm) z3?)@n#xIwu%5NozhggQvw_fz74%rqQfrZqQtB z%mXzAxa^JUlwG+8ewC_?JwKP4^jLjAo%MrETbM&|=I8E+GBcmBxzr|16QAPjL~7rZ z$^*BMJxZ1zxb*MZHQq@xqjcJlTg3CDM>Pd~9tlPCgs)(ljZ^r1`3?{kAK8 zqcM1Fw^n(gN=WzZ>*iQSNZK*kqGqjH$Ni4L74ZsXlMwqmh*H= zqqCBL890rHs|3921h7LlHs-ZrQhgOC7s`9PQX__!=+@#?3TF;jM7JXIOZ{C2#-?xzlo65n#B4Yc1@$}yeXxw>m-gghL^Li2gL=p^IO@oc-ms@~=mV3e z1t)&9y3S6xW$jMh*&TR@udMhR8Cr^MC$YnV;}lPa zRn>yi*|cF4PuU++vyw7apys@HCqt)JQZz0gXwYF(shEe`l62J}++Ryvb8~kmx<;sG z$2xw(HaImgp{#9*uBxtnjyS;qsc29j70vkKh|IwOXfXWrr8}Ua;ZRjmReJUV>~Lz# zDJo)C7X5klXCcyQ=+2{vQyJAOF}Y}R4-J-X28NUlx!vTHdY6u4Y#(@65j8rbzfdcY zF*Dr$mDSa+C>>)TSk@N^5c}(94;QrStCnkhiQ;~Um*2Z;h?n2?D;uE4I)e0A$jyKB zSP%nv3LrBo8kY4H6p^GI(uQfs@h|}NQ`6f8Jb}cGJ{b*cv|oH8phkCg30*0cNlr`n zEu)!?_k3p8KodX|gZ4gUFk+Jlc{x1e1ZJ_0z$_M}RsWA!Y{$1L_5pjShqz|;J}~vg zog{C}%R5hK!?!`T?zLCtuP!om5^WzgFnhO~D87uG@w9ICHdwG^Mse!aMOAA47)n%X ztgrWb0XW`#ixO%&+2U~(({Pml8muwOAwmj$jDgcZeQLwIweEV|)?yWmJ!}k9%LM1? zQT%^90-{t!c(usFth=4&G;5aj1ViY~X4ay7fZy=Ry!YdMS|?cMGtEep_uU$QLkl_g z?SA=AD=M?YsD@z#hv~)sOyoEE7PRhCn?ar}MJdFgxV?85^Y#j}hEW1q)NO|6IiKaT z9&ei-?LRHJtVNj3{xk!A;8zjw%>q)9oeEf4`IPe*ClSuv9lr(>_1UcPpX}Ru1kWQs z<#PJ^_3L&1CzFgVinN2-{!kk9dI%yT%z&^i@&#>Ze0prIkFEPsQ9b@ndhO%SCZqV5 zi876!lGkgLbdqTM(E^HzH{PSk0r{mc_xCu_{#cx90aUS+v?3S!Y+@nGS&S6yG_JrH z5=u;{6}ywFjwRAP{%w!iNQuAC-%uJZUYrpq)qfsC!u;PhhLmacU>-zjWV30c*Yh&c zUX%|$Ld)^hF;0mOJ}i|6--QqLZLh(L+Y`i%_-%;GYW5YRHofa^=*K8(Be|v~o;dM< zp(eR8wZI(-VD)6TWf-~ndd}4&>x;kf<*qGso{kw)q_3$Lg)@%BEFs?e)Dz{Xsbt8$ z5VvC`j$sYd-WM>@nR}=MdhwfX`1^kNdsR-)2I~*-Fd{S$H-bp5GJpj#DPS~tkKVoa ztsG+@g@;YSx&#rpUS`OFg~GMnr~6=+R?6NlblOerx+dn?>|Kvh73AvLZMgTn%E?sl zY*=ik=xec^k@l}*k&+3P{#~2LSV3u?8D$6}yT{WON^c@Iu9l5XF3g1FMGwTenrlnr z)c7EsG!CY=@X6$aDla!f zHvQI!R6+6MK&lqi{2Xp|l;so0><7_=F3c<{+wSLC$(RAqc{m8r%V>5;zq;d2*gex9tyA*8PYpEkH)*_ej-<|An&Hr!yQp4Jar(OTq+|g_zEXb8gv5_Yl z(QANch<(U>hIqTLY>D*j^b#S#QP8T9{!?Q=4RgflT_nrHinoVf;*g8q{8Q%gzAy&V zO8g>67uM$Ahkop6b}Nz#9Whn!P|&!qM%K zY=2JlnbH*?5MbVZXoRV5fA!dXTuEvKVD7JX*Z%6+Amw9h(Dl$36AM?0R5-aT|69!- z!ZG$CrBq4RMsR5Is%K4}tG&6_;c!yW4O)|A3hDL+#`hX?rh+hFIkp=&pGjxPW&x7O=Kr#R(lfM>BeawwWS6bpOT@b{H~4F*hxd=4+l}hjCoeY}O}aT} zq?oN-OpS%o-zP8S^5~GP6f)ml558&cin#8=u=Zj(RJhhoq1d$!0 zgMri$dXQ~9Hbw&=98%1LTn^FEzWBNNUI6F#4h4)BLB_VdE%KZOUW#75$y*`!#->(VCC9KA#cltcxE6r%93B#LjVto9o>=PK!&C>z8igT`_J5DPFAf0j7?_-7E$sBg6o> zm##`O2EKP#xLDey3bg_z7wZs{3;Hv_+8aD&q7h(t)%O}?%O zpv@^PpB?%wKEjg;DN5P=@;=H4HkIcNC4&Um3dd7?U-5um*LNd(%oAAj;A%EEP8q3> zW}1>7io9(y#FCmh9eCBM+vvdGRoKKt-R86AY7~x0W#=!}O+`25yZ=c$X2`M($yjKt zw0YoklOk%xK2@oAM}Hy8b?3yVx(XyYEi??i4bW1WkwPV8e(GvJQ}i{wrR3k+RAiv# zG&iG+u{;8Dw2^RPw(a}NCBPeV6YwJZE=6kj6Wg+8BhHr{(2|814Ow6on_ds;VlF8(<2|De!m)KI*fO}YCW4d;BkLoq?n`cicBFUC3y!G= zWo0fW1}c|lJ7YW9QG!_$>bj|%^EP6$#>tiim6em(^rEA)nwpwBrCsD~{LaqKWADtr zzc+rG&TVU;6D|t-X^VI7+gbrKEqH6ldEFm_H{`G$EH8zjdA;n9xt$xY^U`B#d_0Cd_7VmNCM#%Ma4BjZc8UXA0)qqe)d8@`z4 zekda+C+B)&3lKBi-FJXLVXpI{YcK(WhOnKM^rEI^g=(5EF_u7&Izgdab-YAmS1^TF z0xut*S=0Rm+j%nj9sMrP$mFDPb<=Lq*X^pNd&YjVbkB=S-G|foNHPbe9@pb`Tm>Sp z^)F4WYq8H&`fJvRYSliGyE75~5>2*+{My(pt-0e96B5)?5m{-1X}jiFg!AJ}Yow(O zc5BJjVtwdu{Wm46U%zCI*a^Qwaboi&;d&AOr7z<2fpi2=!P_C0rgUfWEDVz#b|NpW z$4UCUlyo%}wsMn_G=k;>kmn8Ry0(UeRhkO9>2w<;0P+k8z{Uv%q$mM^6orbYJ?z#a zTF|Q;x4bD_-Pt~HreA*EP@{{}5bs1?Iw=i^5jy<1h85|XQqF2Ja}37=M;(_?l=Ph+ z7YQW<`T*fX)YlgKqew3w%rzv!9(=>$N9wrc;M5V`;^`DFfz9wwA|Wf(L4 z+{{`%+};-)<_vxI1E<%)TM0v%rf^3``*HI}bl23@8C@c_aLCVOIU61j6E+ND?^9?G zPRdHOs4y?=?@#Zc`>J(5S`>&b3LC25>2BaR026M5qT*s;eM;g9Wk~*eo{HKNgdo7V zf_?jy{jCI{wm5VBvFNhZXPVVLKsm@BQ-?1vcy06vL7(BM;gY;(h^z4g9gR2^fd{bD z+u6xH@_xL1T5fYoXjU5+WXx~~*zEZ{o{cG#ew61sV6QzJl?A=^SacYZi}ctn%pZLp zuf5sbbk)JUy&A^sw7wR}7g=0)$V=U~zpt)kE1@T!Rnme$Iy_#eBB#={=A*tk$y5os zxbDyLg>v8b#nosho~y|71Tr<=r{< zx?Q|{&E`Jr)FXA;(S|X)%U72Q@`Q_y@J4dwd~Jo z#PXr-4lq?Run@XC7K>^?(!JkR-?8?-P0pv%CE1OM-owClu@mKdZP+3J2S1^TiuHYWzI;DJ@0E)5jAftC>Z zLrU5fywT^|t=`_Gc31=s1UXpi(oUkWxF3a~pg7B9#6{l3+N!akqkOZzxue>O&Rmy` z{i&2DgMrYE2lzTgbWtIc6Lmy%%ssy5n$_H1YINFv_wuLt<>lWS_21>?|7T7B<=GH7Gh)zn zo^VOHw@f}i^QOHgUhv!V_9nxrxuu@;+vfhv^3?kkqFB%zH{+>tKF`%vl@^?MZ>INx z1z9umoWB1a08}ukO!i#r#i@vifg((;+W)^?oh1NSon}ku;Uo&)EIR0dYtf}`tbWhh zO7N&2EbTQP)<67k+Q*zsW!8xCA~@ZFf{f=})NZ5#uzik7)8aU7hDJPcC`WBL2&iu6JtA zMFt{f1|6m)&`${P*F_{=cGkadzE^{B{pw3*=)B~~^%|}BHQ0dgG9_dVkObp)H@;|( zBFjH*4yvz#4olA#-2(070dqe+mzO13?EO_$&ZDc(`lJ_)l;?)q+3GNJ@2S{2YfklB zl69Qw9wjZvh!<_!2v7A4GH=%p4`>wd!gC--l=%aLwA!N1o%l+WN`!)QI2P zv7(lotZjX#mUxeVzFU}B$)4M$uh0aGl@=23<7qdnsWqC=tYe z08}qf|9Ij)dk+`^nHqD}y{**vLMXwwbjf$Cy7WN=>_%7g1^`&2_o5lD_=8&)*t$va z?OS?xF)ohmybATrZ|%UGGfLFSt}iq9bI|=$<2%*=?yvAyb6GV!gdp*@J4n$pnsTUX z)pYNqpmGKjS7K}?gAU_qo)>Rt9jCNGGnG#U8V>WUz1OUw8IA2Phl0p+_Q>tI`uoDq zq{wUo+}V08wnoMI-7hSFO09x_PF$nR2swg?)h~y2X2F7CE(h7;pkgXdB8-6eCx=d0 zBg3eMcOPSw5&Q!v-)ROh803^-Xs+oBTt23~kJd-8eGce~xE5s=-DhxH30jmzCSM@M zlIDh-#t#T-KD~E(wDEm-$XHkk6#Yp2PuSyo;QNm~K%lP_;&@sY`$V}s5;UHmm z_sO2>v^FVhd4b(-F3t`T`Rtb(jyzSKEv2L#+RHz>9r7Cu9^86A67CulH(bn`_cB4n zuR0}yueZ{F3<_M%8{M&Xkr{e{D9wgx6@>18-mKFK6~-~VHQ<^`?PJ<=Kd6Xb#I9C9 zs5@#bT)nFutX=t5KKSxvBk9{MP}Cmm#eMk9(@n^_cwX$B=oJ4|ljRXW3B^3`)+jIu z`ly>-w^H*51#c}3YwL%*RBO`LAG&W7yER#k9$xq*2V3?L;;)mpS_v>w*D(*C)c8Yk z!MC+w+H&>0B8sDR>x~Ls<19;7f6Eey-C9FbTeM#QrZ!e3p6}FU zg7+0!pzFjXT6`3+CCh#^SWBtYx3Wq$4-PMTD)(gpq+mt!As>H=UE> zoNp62%;RqEkG#{_-{T*?LVIol^gdai78Y}x-=btMv^vZvX@}mu=en{VkRi1a} zX~mM)ZDDpK?<*PK3l#-{+(GU&v}_qE&nF$1)1~}QhkQr%U)p6*t+e9Ofv-+f+3L1f?`nK`eRybf4$M~y zQ=Mj?Pv{c$Jq9#lu@cCV^Xiu^Lo6$R5-9>zU%=?OY=kO8AhF}m6D@# z#g;n}L{QGfv?1pC(&Gcy^Q)oT#Au$rd3l}$1y@%ERqW$k^1-w+WgU<7B$+I_ z=xknrhuM>Qk7(h9^Gew8>W?XXEZ^Lg1h>4`k-M4{G%4{f@?SQ5&YOb>@W4vg#`{q{ zIfSQ)7gWG-)@VP(!B25w6tSIcPzN}MoKs}HMiYtk3=CpEi!SfIYu@+1F`Vp6FcF5xp0n-%$M!{A#L%N(6(=s(&g#CDWy}I ztG-t;!u*u(3!IWg*_F7f<3o%yfA1re7DKW;grB3skKXdZ_EYrl_b^;AL|B_EMixVf z!w+}Wtr^@^9G#sDBREqgL!-k*{$av6DSba z1E4r(-RJBU^=kKKXvBhKt6G^y#GV$>YWKq$DVdmR&u*q|<&226xuu87BsLR_*YANx zW#gSR0sT?WIZ(81?S@cCD$=I8WIdmkXBw9`GO?Y}*9t9#v`RGb3#qH>-{0jDFuu#F z1h!Sq&}go#9CjUE#auAUjL~Pg*DCw-)-@Xhj>j#Cb5{KrkIM55!R-`WR$6x2je3;2 zl%@{dgrrst=aV^ez@?jSGsBBcwp@< z`+cUCe(#|MSgjMDuFJr+rJihAnYh5n<2=)?wGoCoz2n{D2mh*3+Gp+On7j(1wnGf- zLa9>=q=CxfTyRi9W4rmPd;g-M5xDVRtzK6b~yGe{ry`}ItZfRfG zpDCXh^13K*kHcLOpf_hSXYOJf;?J&}{XlOY>D|rD*Uik|z4G!C=hBw^tSb<|=^*I4 z@`hz;ZZ3b(!@P!s&*@tdw`=G-0m>h`h?wN z*?~GMQ6&Ol(SksQg}0qOU7jORSgIe4W`mNsqWCe&XhNLA5Dfz7vlZwom?#!}V5mPM z?$<`%yx2_4g{S#M`x^SPCOGHNT3{$zy9;)t8PVw@$*SrrA~+wmY2u{S?5+eW4lA1x znb2!WlRxgG((rENN(DKJCHVQJ2t1KtOebNn6&8kIKC~T%4mh@PP47aJgpq}xuuxa6 z#o{qYDQ$zN!)61Zw9eA1D$G&Ivj~b8w|cEgKv}kZ7&kj39BRVK2)6!xxc&d zSOEewFrMG2WgwwWm1J||QKzWei8Z^XJkdVGSm3!CGRG;!ZbWnLyldb?T1nW5lYPPX?`_W~)3e?TqYR@TcXSvRv zkNe|RTeN}KvT+PDH%N2ZPBRkMD4Plb=WiTOItXb!7d}5p|Cr$CNZRRJ! zl8SXb!ja09ba@o!9MZy`rtbEWAW|qT(t~#=UV@U}^%?o(@_ta0=_AN51}F_epLsr674pQ)Lr2f~lfhy)aizlE!?B*?E`pdL&+O@Kp@=XA z;Wr@`;c}!5A(vK+=N?o7(_&eNaQY?%iShF6Yh_g+@P{4`L!7=C#c;lURFuoWpBvC) zZ#hAY#E7|2uZOxSqRD6VO^O^M@Cxv ztwGhT6npt1VXldmm;0?cRW?&hR+T|?Gjcjk?p0Cv^lAksYlzNQrf*oh@6V@_K}RcM zC?Q9+drTDoq?I9~ZP)YcRcMB-wEp$Cx8<*~`DfDr*tk5KcOL-I3jwg2w_KY1UYx>Z z8TuLgh+X^OJy5Hts%txbA;!Ip0$S~n{gU*FM__=XetXp9y<6td0u|hV+w^nB+^Y?p ztAN>K1$Fzw8zG@Ojh1fu1-JC^?%cqq`yVDy4$y~HDvW! zm*@(pRE`)EAm{y84Bl4{Pc;5)FMGZ!s`e_Gb)-uoI z6gX);7c3%qcyQU1GQdBEUC67In?rxqsG>+$r{e~*-iI;kIHVPAI`tbX-$36FxI|`7 zgVkYz*JMXv&v&m}(%1UAt_QF2_fP23A)SD({QyVa@my+}rj|J&t*q{I zugb7~z|?v?O5lPhDV*SWLesJz>iAoe)Gw#hbtVdF{5)pnZ&)4yhDpHy*Ao8@GwtP~ z`;SixJsZ(}tG9=B3O!6VJl269LqQz|Et}Sv%yl`jK`VSjRuC8n2BVfnq+Vw!0Wy zM%{GevhhWI*Nb%-_#eDkx|yuB_=@gjVHu&0#)S)FC^0 z@Up4pM&-)`2sbx5Xx;Se3Fpu`Rif}m-12?_-?dQArlDs$4iyp~(gUP`7kPCDjvYEFWX zT@Wan=rcdp=b&AA(My%gTZrUuUs}#~R8X`H>q!+aZreS<8&Esi7_y8ISvTfIG~Fg{ zp>Z1TNcUkL#inYH2cY?b-0yC9Z8peGzZB)CYCvQQAsAEY1{;fIy&7Q*LiHBCr6 zjCPB3*{?;E6`zlDY=JX}_8)pSAaw3pJk#xn6o;6Jh!6faKJM`|K0zNzt;eGj3JC@% zNA{HZ5##l?l)@QvSbdkP;WB`*-x4rgC&>U?Nx+>-DRp=d&?O(7dXRkgZnhcU_Nq9G z;2P-9v#$3gVKa7UXP626*Tn=Svu`A0!qMKI3NFoS=>c_=;oK`PV0^6(+_$N`y9L?G zQ(<`hk2h0IYPvV0@ycH0-gmYffL~V5ccqqkhuGA?gM~2#?v;QXB2UQJtYg`*D^kwm zmD1&9)8l=6+(P?%bz3qkr82w)!1;kO&*g3>KV5Cks*bUigi)Erdqje80zZDqwhL6q z4CHnjsZW1fb==`%VbOD%=!%EhFUaP;crSftnN)>jPC1}icFJRXgLZi9=Z{x)1k}#Y zy(>$BNeId0kA{n+Yvn-CfvPr-@^s`X;6AE~*FHQj1g3mShw-fIQ&`7uV>u2F4=aP| z+zD^6vd1Y&fJU;-ui1t^AztEbqer0I-hs~j<@@l5@U3R?gvJfcHR@S`uT5gZGv+5<6uw#yk=R=j|e!4sU&o(gSDlxk^J?8@jOvYyBRmvUbJR_ zD%A~P^z>rI9GLQ0cPP=bYY_4Jz7{DJ4i%;;hDShhf1NZ7%E=Qj$+=5Zig2AF&mE+X zmEP?fd)rcp({97w0@*L~E6VjS43dz&LohsC&qAS&jc`!XK!OVen@yHw&l&;A6dn-B z(9lthKw02)c@XZBBg)m$y$SMiE|Zi?N%dHGPH#h568is6?jPAn{4Sy-dAlDpA;`5XH6ZP z)#5>GO599!)9J38iMuN4H?g&k54QtY&wX?OTr}t_#r?L_tI&Ph*QvZ5#LSi#O!ti` zz=r87zMO`qux$dT9u)wPJYk;F%>QWb%EPJZ+P5h~WXN<%2+2?!!kd}MEM$m`2ZfZ( z^LUPo@gfx|Q$&X{WtPJsBr=Z~PZ=@~$I!|2Tf4Wux9^><@Av)vx%TDS*LKdbpS9NW ztmnD!d+*ilx%8pTZ;y zJ~n2vo2V-ae(?v+Kw72Ee z0wkq>LIDwU+P~qAMxHL~`-&YTYq1KGdn2{gkTM$0IwzeF@3j8rO@cU-5u6}8K0}%= z)N?IGIt?w;MRmFt&CFftmDkgprbi_;vh0Y4Y`AEW1aJnPjr$=urD)IJw z5S-VMpJ?XBjp!tudJaoXd8yQ)wU^=szL{eS$Du?f#wLdOq+N19J4d*7ahf_2$f@IW z24wLC{w~%Zd4*lt0E+U)9AWPe$!b0+HUGgO?sAM1b<Omgn{H05g#)X;IEaOOTFZ=2V z)W>~qr_zd(OV#vppH$J~%xy-_>WKa2ke3^IE>;2W z)8x$Fr$}fyl@Hr#dW|Hd_DpM$>VM&s9!0s~_^jMq;;P(d1qu47z}=mRgDVjJ`2O}m z$wFPAy7e|hargK3+F~kCh7suGHsXhu_JLOvOMm^GwX{5A?zKAMkQJ-FUlRH{0AM^o z?ednXbA)hMBj>LR9u~ z+AelcD`Ol_hVLlF==~spR%%(<6e(vH#VZA|TO=t{&srLUN<(UzmFel_tOvz6Wb5f= zIia#L=I^Tm=RyLq}~Qni#jp4%g__&qriTvt;HenSpZLmU-@J z@OqUBz!4Rk& z-+7`gx?XytSG#x{v$MRg@bpTdWopWb{LW$)q)c+Qva(G;`L4Y}U`fjG9Wp!S17(@d zYnM^C<5MS|P3{53lBTfM+6K^FJRFiFELa^l9)4?mR})77>{FIy5aIf((5hO}+A+FW znZ=^e;Tx7-7zH5iHIT{8ipwuiUGbmT=>tzKpaI+>x$TxJbm#yGJ|+@VXPjTJsWP@L zeE~)4BlZ=mz)hnv;KOHzsW9DKO<&UY7VR%p450{i3XJP#sIG3OpF(wZ0^;f=W`DEr zd6sFgSAIgoPj&b6TkWQTOs5f|xU1)xKG0t55=N)BqT54g_vwdU2>n>Ris2(s38 z-yQDqoC;&b6pd)C1<4w0l1)!YOjS!MF*aY!gnjzpuH4=**lO z zEPNgN_1SvVu@GcI1H%Vvvy_%vi5J&%D<>G}f5h-+{JEC&h@5W2AO$X>b785OlVKFZ zx3WG&$k7Pn9UCgPq(a#Z*~vJyy1C?_O_ozL?JPg%6E}2xPD|nQvGw6i;Y*0T6Ad=A za47I)p-MiZSm`c?tHsBuso=z{^>(W-G(iZtafCmU$aFlZiGj}|5@j`)p6g=l^(bA+J;x8A&sjpD#K`#CpG~!etCD)TP zs+*hYm2x9S=y1~5A}^~Hcvj~9ue-v0!$y`~&y#G|J~qt^k{xG$-v+0B3#WU_U?u_& zd`!@On?xD*vb2^cmS%I`|8bhNs&_9f(Ihp1jk$k@&2UmSho(>%R6IS`!A$oL^=qUya`B1hF`~aFOYtJ1(ei$ahvaG<(t_8W&je zA^0O>x}JTSZ;mxrm@DQ}j~3i0H_gv+z6YXjtON3j?M}G8<(i*)?5tyP4=xgaiWgv!slFH6N0{4qQKgm;y#gx+lrALIX9%ePlOSD|w z7rd{QM&vh!Z8`OnGzI4nVbQBZ5SQW?XRp*5!@xrB7Tx6!FiB~NmALg*rx>Hcr11$) zQj^GpSM7lG39S(q&}=lZ(++Jbx#Yxw=0*GUMDMvn_9p==zZ$lIpo^+zyaNSi>F$`e zaj*7#UVN;^Sv4imWf3_4C~MC{3T^vgk1uv7?vsefaSkf^IYgwQzoK@YQ10K2k#x)= z?c@p&0mbNC1wx8m(5zVGGvFvk4hY$d-p>4k&F0;-BU#5_MWdFRl~K-)zV#lCmm-{- zFHpq@;2Wc-AA%Y8TR;OUM*mW>NM;|*_et$qGkCWad4u)GH&L3=6MVX9 zPv&f<-#u{beyWm_vB$n==aB!he0_P|S;hw!#@crmj(HB=S?u)nHb}Me^7Gr4?@1?> z#rt0yF1eml9}MW8jq$`1o;|RG_D#10xZ8bS1qr~Muen*ZO1Bj(?T6IBq79GN3OG;z2bR@ zqLLE8zYzn;xGUQaQ^VKKY21+}Tc#GXZYSA8P>3_pHo8?d#y6zM9+Xlr8uLZQ)#Q)A zFt07Y8C;bKxENLQ%1{bEB16#ZU?wTA&XSwEaZoWK~K6F z5|I`cK}O6~fwWHbZ2h`R`FU#pd0C?lIxSP|>~%VU5Iq|cy)zd))EJ>S`OG1$K8+y$ zE;G{q4wBrKiHhE1XtZf`4iQ3obB0*{Aj87?cS-Z2k z=Y|qXzj$!QZBu^S`A&c^5T~|eMym`=4-MbFZc==My|JMTpEAM zVZW!hh^eD6mP2?RZ-S)z_qpm|MZ)t8VRZeexHkBdUGYTsldcPv zjq(8Cbw;jE4l~-Gw-&yQlb2xFymO9vnT`0uyU!Fd85r4l%jAW%o-;CwbRj)yNq%pP_~n%cJ=IuDxGb%x_46(J@S7tpsMJi%s!>H)j=fD5 z>Cqpf0kgxY7_^D|)Y+zrF=>3n6GI_Gt>YV8JZlFke2vdRf*zdETLswB0y^JT>8UIB zHkA{oywGH8X|A~lQUaFaXj1@pf4c$S@!_$J#@B+6;72deLp5}q4NMpdm|Qb;TN11* zl_GepfHicS^8T(PGz6Ze-ZV zC#0mrZ5o8eN3VFo>(vF5Tb>GA zVU%h&z2MpsUW+Cck_tMp%k!7_ZjN`$MDr%yJF-Y_COu0d)68QFk=}WR+k3LN(pla$ z&OXPn`LSy>_n$)JLo5uc#^-rDJ+yDbLM~dj@HdCB-qNxiB?~u8%aeN9rh6VCmnC@j z76os%Lk%gb1JyEHjUq`|TI}J=iwmBwkE^wfk3rg)3BQ<(W0lTMjk<&^N7B8>UrkU}#<^7Ud!`z9>={_#U;23_yxv8f`Bfm1Z{F%D zd$rYq3WwppF9km>5im3uJ_UW_H2mq`t|FtUV&0&9FI3yTH!XYZ9X7wTq(!O4<*Tb3 zC!Dd;4cFNew#iakK+zI3mOaDRHCm!|^yu|3CLEn7L(o0TvCLk$s8rs{$f(u>`Gp&i zLo!`Jxq9M_d7qqRMZRS1+$)ijAJCd?{z|-4w`y)0Qe0N5^kdZ_q2jzt?NgZ|sr*qx z*^^JF8og6{|9K`+a_%z$OKD<%9|6LYhfPaGkM?w!SK&>`dX*v3XwfSn$<|lAePHLB z{J4psc_e)m-(IsU6&00KSua$VPEYTSOwHu7bV1HOznLYcDq1eYw7`b83zA*~yoc2@ z{6+5X+yqS{FT~kV-_SlO*&Iky6T*EW9IE2|8Y9SULh@aPNSpxiF)2^6N&#CcVsyGN zUd*++dm$M5(ufk9aB@dab^3=Df?-W2k(eD<*I-sXQVXItSbCBbU1B_u7QmmCAq2V> zr%WnZZzOpO`HQx$H;^ku?@W|+r^z_U1+SGY2d{iSh^xJFL@)=Oi;mgXQ|DN zeX23^-iGz}j=c=FE_z(kTCrUsD)Tq<7|Ea+0u~YvoRrh|PdrZ{qMpe=t$pzQu710> z|M+(-Wf20yz-7tz!)pGE;2(q)xzhjjegCB~zh6|00$ygttzFIiqdnNEdKBRTi|pTZ z>(5>M=X2O`(CM7Vmt%g6Vq+RGH)8LoJLVwU!bZW|HizZt+E-STHs!4+606r+?_(|Uizs66QQ z92+)t|7H^Z^Eh`!g3e-N_~`lLW{^tI@KCZ*B>dT!|KQ4fbuc%08?w**tGx@*@La=+ z{kZUt=KaxTWF{BrzD!&PB=&?3kwsu`&=6ddPyN5K*GU)Lo>SdL?fCDjxO5`0U&ehb zS>nH$%rtzEaXR^}Ro~};A{zq&8^9(+Zou{bvh%kO|Ic;*=eqywu0Q{^E3*E8l;emH Uy%R~PCK342P|>+qpoBpEA8<449RL6T literal 0 HcmV?d00001 diff --git a/docs/assets/deployment/hf-inference-endpoints-choose-infra.png b/docs/assets/deployment/hf-inference-endpoints-choose-infra.png new file mode 100644 index 0000000000000000000000000000000000000000..09e92ad3fc7a097034dd144f6f0c21ea74396307 GIT binary patch literal 358264 zcmeFZcUV(d*FH=Ukq%Oot|HQ#^s3mYO7Da!kkBFYh%}XIqt{VEdQa#eRiy-}flvgb z_m&Vs`3}xAGwS<$=bicfesW#8ayV!2eb!m~UTfWJ?R|Kvt$CY@{5&}U0Rh$BJ2xH> z5Rjt?2u^)Ia~e4FWnycIfPhlnK~+`zuBs}Rw!4d+1K5^;;Lg+62Bd~MpP1539&n#J zb3^6cf(GsLvnuz_2tC@l@Z{bdqR_L}*R@OEM3NVo-6;7jpX(VrZ7Ilrscp@eiGp8X z+3G`49T}nLh(Uw&q%`yhk4%E~$U^5Pm41?vwj&=lVY2F{dhxp_Du0_WG>-d)Jp|b8*SYigw!buI3G&Mh#Gn%pC{f?TGlN+dgiz z%hybKUXZq9(5Z16G3OGob~RC*i}M-Vwt-4lb{qN1W@PyT)#Y_smxnvpD8y&Za%W#> zixbg&^2m^Q`i!qP*sxLK-nGlpR!z6EpWJV^A}m$EM20a~w0^f(mYEoYPpjSvMwQYh zS()EZZb(iz$Nbx8M$C<-^=;1f$6k?_Quheip9h%ksR&-buae2{ohtaEwIk2RR4aL& z^Ue84^j(kN_E+6t_ccpiNQcO1rgO@RN-5I_Yra-8ztkQ1Ob7Qg!G2TqjqcG`|7$W| z9}_eYksJ0m*~F2w7MksvZD9!H#C zc09RFyaEj;PTB~%B_?_?Qpxfz<)zcKs!y)nFc~AjeyHDY_1O&>JZC?{LF^WQ;8ozM zpyQx(rX3}NUVchpt6IlKF`5;m4GZcE>Y{Nbku-CIjj`sb+NM5bB_JIm>1p{8IMr1Y zH;cg+c9Arvs`K+l zae>_6mT+UM$P!|d?8+r?LZ{{>LJIj9NlPKreI5=|H?4jBu_mxs;E|1LT9(rs{-vJX zgEifmfyJGhg9*IJ@i}8rW1spK#ocb%aYPa~vRq)&y5P$8!QI>4n?wb7c)xyP;s!R* zfYz=P>U^|L;KRATx~jO9MxjXEqy*i&Cv%Wi4rMo zCMsCABzRMXSo$l;feP0dlIL91mx7=6;?=8qY%Tc{$>xGDXcNVUNM^EuiA=6BqNp$- ze!ZtOAD4eUTSesjsMdAY^AU3lDBm)r>*RM)s@o@;)S+#+k!_R=Mlyk6M*{0i%A zCg>rX9AURgPA16$ktd&Vxc+aoy>dI3{K)Eq;crBGIVde1S?DF&*hHn;+b)#a2(rHf zm6uu-Fyyy7mfqS+Ee5R`GGpj?rzumz^={V{A?_7GJ^eO$D#CC)08t@$ucNE&C z+~wGfPEJlvN{%qDNH&BN87GaJS8hAf3@YV0<)d>w26YA)m(&KK?=!R;-yDVCdSmqE z;NhnVh0zDblB*1>gsUN^Ra@?dOi;T4o z4?}WKvr_f2cv20YTaz>ut@GJCnrb}vTex?)Shxfw;yj!M!*Y(5g!OLp(7816C2_m+ z_;dE>ilvn7#vGbOH^t~u3Z!J6w@B3z+CZ}z^mp}Fh8USeHJnTAAz%ii&0tLMczr?jNS>W_x+{)OAk-u;dG&N92YNL zlYU09qTSS9H4c1MDm36)x`GIKzEO(B|exp>#T}ru;dth?^JCo&Lo8 zViMLpZQ@qEZEI11%=4TG!8PYPjk*gBvcc(bFSOG(l*G8)Y_0HB&u8qSYhT&KUD9;C zbD4+tl8e^bIZm0N?!6d>;a9)CN}(^{RZnpR{q~_X_33TiFT9()?00zHUd~=qKWvWu zX0YYo=i{*4UqfR0!T1A3R)%Y3iOx46V?Mw5wX_EDL*H;{dtOwJ`eavMm#~ThEMV=ZSPcj4c5Y-T?7Uq;+reRs1D!W$Rx*DBp4vC>=rab^oGQPS>cFp zFDLmz^aZ0Yg<<9t(n8J5Pnj!)IqH4Jr(N2brz3eZdEW6@#Gi>bJ@6es+A3p(H7rJJ zx*#L%v#ql)W~B}GiryRNR+d@L&gfYi3>MlI3Y#B#cfXZnzGrq%y6|0*UI9deLd40? zzSJ34BQl#bE6sl(fGLnRZS*L>MAt@>BKp65?P|X)?_}*0p%AVeYVxG@OPX zhByq=sx`rXi6Epg;b>E;!5ePuwbA2Vim+3p_oc@xN%-m?j^TWD?6v1#E4Lqi*}T3# z?P2gG!x&ZsbJ*VcY7qph^o(6z^WpIy#QS|>J(QN8sHuajJzetM5XO$rW7{})lyw7b zU`(LJFRbp6EGggKu@`#PSn>Cx81m()$m>;%0p2rQdCE_&51 z1)vOQ2b175!N;L`_8%LjPpb$WHx$fT-Crmh#EDjG(}5*8vaDq0i^q!-U@4e#1%Xb|uN z$7cvm5uPU?0gecPuM#2apU1ZduMiM_KTkwJ5aB>@>OXBXf!~u)9PmBq^ZPIHi*N!` z;IFg5*C&(ckJjX~Zn;LH-!$hOLLS zyMwEzgNrlQ$+#dZ7cWo63l~mq^ylYyIcNyj6e#?beMN)8cWST1;FpU zPJ!!*|6PxJL_}~NO0$0l0V4RsKZoXb$#@6}(PzVj|6MOall6aL0>FKOUp#sQAyZBG zSG^uRDun(4q5tUq|Cm~pyDCla&@8oo)k~#mNdBJ<|A+MdHJda4HJksM&4112ztQI3 z#SNnWMw|aeoBxuu|B|%-lC=L{xs7-B+KTLUbD3>txyghzb}Gvs;Vj?33f=QJ>#Mrj zl_srR8+(nIH2fMd-fPd_{hwzXne_mpgD3EoMiWs4J=a5#kW7J&(|*`H>>rgQaw#` zrZyTS8O2fUzc5(dfeBzp@pL9cYgl>E{^N-B@FkFwq$DoU>@V&j!sooYccJlc%oD<% zeoa{^N>;h53NO4jLchs-!k?n~n}Io!sI7z3;0pZLgv?V zdeWM6c{!Oeqt-r=)>5_|t@D>7+TRH@uU!leufM%s$){7{7xf5|ui*WU6&BnLq|M`$ z4em{F?$dX5UXSXEd71Usu+%~Fl5i!e)Mp|%{ZzU!tEC_(60k+;fw7NwU*w2Tnq&}| zrD3^u)y;pZEn&t>-4w=-Bk^1sqxIWa;m|KOHlwdm#4M<`1mj^`oHZkz%g$Ab@*R%| z51Yg0Ga5#=^|XMZfRI(%AKA|TnM{FYgV`@w%Hzke&D$cHWaX1-%suB}?v!5CBktD= zyL72hK}%b)x>4%a(9#nDyb6q}=>qXW7L6jOT}5yx*)03mHAZVh$LAq(l;#txyll9M z4%(a$ZB#xVj$eZnSg!UJn#geL_a8RBSB!*oPD}W%PpnxAd8ti@a%?WcJud!Pm zJXQ~~HC~uRxS#?NW_eJ^)_#rGa&zVI@nv~)610Y$C)Ym)D$1FNUCz>ky-0DTm$-Q< zf~}-m)Fj{1q~S?z5cXlBguOX@@%_d2P%+X~oe8rYW+CIV6KV3wc*6>xy9?v_w^_t} zEK&A5=2==9=51B+a)1?4i!_QWcBM3O30gFW??iX3)GYUky2sLGK`E2tO-zX35)+Ax z`t7nhtmlg_nyINlyz3fY3mAkwMQV^mtDP?g;nxw+g0G`FGB(9N?Dc+_D?>WGdi!Y{ z#MLehyUq_yYXq-)E&{f_$cmYS+OSA@y0)R<1?uVILh3%4uw7N_wy%*&0d^@X#>xKO z$(0~YjQRw{_CyNjP=NzEF)4eZ{wQud zb3b-*P++ALU)1L3J@)MN)VQq2Ovk~-#N)OsHP!hpKcr%KgjvL3al67Ij&BOboNcqc zQjUG^+P4{D&}C$P7rU{Q-#-&rmr&04X+`cL?p#j4q#y z#=-@6Rz?(8YC3z9d)=`tRve1%u?M1R{}^DsAQCxnled3_Q$M|nQxgUb_*aTKkuLKW z9?0{|A#XzX%@d8p>h@Yg=bOt|L<@)Iyw(^r+5N#NCnVjjw>_EzwnPJ3a*|kDkGlEUDZm`Oem4EWPA|dpiBLB zlSM((YJl^VFsUmBwVV!<(3I!%T8hj#$^vPj5$PZk`)<(#h$jj8&G+ohV9% zP?^<+Yni(Tb$he^<1(DeZTf{cLNtc9NQ0(Eh!9=9%@iuX9wJ4WxKSV_!-{%IHxP9d` z05$_Odo@-SqP*NGhl9An!P4&zR}7$`mHER4>)o+h!+jOrkcMQT+tFi+QLO&zLz9V* zATxclQ*$QkxrqXWfJkSDB=yo~UX|V+)a`_*HDSK6M5Qqim(A+0b3C{?L6ZSB_B9-W z9lbMXd8@20496}GOYc0@dp%!|c<+3r#-tz$|1=UQ(7ybCl*j(^=)p1ky;DT+RUXq6 z4j(l_w4PXEwaur#jU^|SP={iq;Nv3vDmK`)Su8@oPQb~^a(mxmJc&e2S=0~dxTXmdAV#cXT~pxY{;>E0*QF60d%q2BT9yTQM>~Kw)dI z^}@pxxP{NrqENd0jP z+4=L&p4By0&tO4Q&28Lz&5F~#ATHDgI3^{_Mf5lBqX&xbPF`KdlNm-E90 z`WX2OD+uJrnwAG#_L5!}K^TO7l}0j@a6+BpA9abZ{@FP^lyB&W|Y%W9X{Nnr(}2Hc>`$_ z(joU41B&-r!)C`>2ep`78YF9-XD1kl8m8uIoAX}hm&xV}03?;yykmXX;sCmFp3`AduRnjpo~ zVam8o8pioMQ){)>hH{lzVg-Q{MmYtX*bbOMu@Qa96x3$jyq>_d%~IeWSMW1K$UNUa zTIDoPJe%$x^x2L+L-Obw%K=;2$TnCVARDx?iu=7K!yGNrOLazZn+ak!SWR+zsKl=N zU2{925YeEbKi+r%pP(hKNH7QdL*MKnt({njW{!CiW@1vAX~NguSxQdN{BR*clP1l2 z!`<2#h}pwO)0mPX2Z+?_m)kL9q?}b+<2tX3#q{PBXF}Eu9v4DYoIv0FW~A-W#IXvD z6FxI(;Acy{-X_FwRyi2?W|3*BI4dU0LyIDWbXSDhLCCV!a_WUEntk11tA0aursdI5 zV5If7COMr2w8bUTv8{vM-%!SK^2^8?pv|Z$^l;fxVhEPvHhYkEkYTCd^}0=V+<WyEZTyN@JE%w#m9bw(uN+<=_4=hN5jNOFJ@ICbhWT1zpMm@(zg zOt9qV&#h|B99A$)r(yP4r@nOX+Nt{Qtk4OJr@!RXhvN3T5X~*IN-tN{f@gG}d#P3K zDA>1cxRG;-nATkqbO1+v7}!O6?JU$c^YL;Rr8?>m!?&3Qsn=WQk9ul7T3+yUw)DMI ziv6S7~w#?lQ zYp-wawEN>MY>oJd+hvEfYjRgp6@$P-SepOC2mck7QFN!|Oq-;#5D9S*z+iD|?wzaH zh_=!#2glp&ZmujIy$!gc4XXmC{T0squsK%z&LVv{B?oOdH9JGPFD5L&<7N}4-DK?( z|7>=ewKd6Ly*1Au91diN6Jmfom%=Zj&FQ2wtW^ntOe>k4!)M_W*avB@XdQ=LIp|HE z-C`7IDRypfI>fN3AL$l&qmA=fI7!p#Lf+N3JzC7J&?^;RE_SIczF?TbkEcF2CK5zR zz20p>JJ+t_;+S&2L?dfSUgu7w?v#w)61&2C7>k^ZwvX*nREhZcw{S5+G{ZtVNB(iP zl;gKG&=_i0&!L=3?^xY;cXwkTbiS;R`1B41b2`jVY#Mlm==I?;a3HGW!&DSadLl`@ zP8<$!ZFI>)MV~P2u$3v$ao}ynj>W%}$5rA%el_6;Z8y*0I{Y_Cu2#uO#4Rrrs-6bZ z`}v<4aaM1_jQL(B zV23KdiXPY28~yBvFte&e(?B+ii)yDZEC;5r!w7b7OPK(5w8qw_;no=#JMk9*E3c7( z2ef-tZ>V$MbKxWAb3*5>+NK7icz*Z2c(cY>J*Sj%*$0ny7cO>$g2W6{5a>7k2Me!9 z+fRXcm9XwUQ*8!iGoOD~^{f1GeS_c3-~l(Q7Ki+wIdxHMSnS+WzRJm-k4=A&H(?@K zulGaV;;l^x8fD-4P$Tri$)8zxlRm@3!z?s4Yl#NZ)jtB<`C(Yg?C80^MSg^}-k0HH zLp@K=f~)@b3S?47kwIR!4O#QwWqbFL0sKWtR2=X^d>J`xCH2bsj{^ntLp30Xaqmz3 zQSq@(g%<23?A7yYQv*6NWW^0q7Zo-KILlO7Ydonks`gTQfzPNeBmTty*86Ex9T_wV z#o+dC&WCd-)-W&6YF3xItWVUpv~kLo*VQ^7XLVc-Jttvji?8YM6^Y(<0jG#d>U#zH z1?mkm6df**%%~0voAGClj6GUi=kQUeJH(`ddla!Kp;W0g)H7WF=j_wq&a74E@!B?f zAM@@pT=<{4^?oqT3P5c^hyF=Y>KU7MKHp01%LCKopm}FTFQbVutnAdv-yVS*VLxaN zC40)4e5l9-AzWZyFZI^KP`xwTOM@HI3Cnez?{!P~U*Q{^ro%PcDFLaT+7LIxi-F1EAPS;#DHge=j-m>3iqvtI@(4gkByU;7fl z5qP;P5vT9nl_jQPJdvO^VBF-5S@E4p$8&av!x^@OSW{s=xuB0x3a`D zi&do|bhKoJqsBaLG#Cr&gpIP!lz&9;^=pyW(s0HXhc7$tK?+XSU8aoD^HTQ2RP~~& z1O?5@uCAQTU-e!!J|`ng-S-0zv5cU9$ft{DAg&OYfz1~gmhu-El^=;bU2&CO5C35< zf8DhrWyCex<%d~GXPg2rJOZJqIZHIOgu}oYFt>XcTI6|U@0Xl&?Hq>G4R8Rw9I8=R z#XTFqnS9w_l=mV}2v|w&mCz^9_<&vxDpj&f@iupzSOoPp1-03}C0JYM3=*%~?isT( zk+n?8BQ#?suJOH@d31;m+qR}HlRJ=^i8d;I+?!l7sr2TV?`J}Exh5xTI1Q)qib;v< z-Ky0XeB|y)Ss|Q?UAF)+RE@>PG7hI0v=wq3A4C?1-$iPeH_G~@dX3&?oY#`StR5hy z9)zo$?u`_VnwnCAP|zsvZG?($ckk+f`%gv0j3~3Yp@Q2?eeKukNW{+I4Jn@&QN6jn zdQMYz@jZCBrPOhGxzx|eu_c^}d_G-PczOP~?xZr>bdkQfCVN*|rHGna#00J|#zH*k zi*Mzbk8n1toy#BDN^m=VCL>&V(TZTNm*tc&;x1B)4B1$6l%|Q+QpBM|-HXScanBz5 zuPcg2DIPtsvRE2(TUP~5jFF7MfvR8gwkAcrmE=U-JLo(Ds;pa=U{>Qrk#PJ8|JwPa z{^fkQx)03+zvOhe`&aq0DMqCCL5WnDLxQl$^DrlhFimdH^`ZK`=x@Si=gTJVtg*q8 zbT9tzL={y^GF0f)b5sNRtS|@LbRAqWjH|#M;x%E54O;8zlDPJO8~z23wW_DU9$z8# zf}~&?HAZb~!>g5*ixJUk2_23Z(I=TAV}e)Hk$=QYjM7___qd4l>t0ExvR$RX{m>Sg zagm+nVd=WVp45%LSBoG>!7XFw*+-Dkqt659rat8yYg66@eO9F&6`?#Q&L#MC@K#R! zkzJxZG{_smS#KGzFpSWWf?u#S$yr*zSSr4@W5_LCnJ$)y<1LkQHZlMD6j{$^?z8!N zrAqfTG=nCJ9KTj?pNO4kF2Ot3+RSSyzpV!XhbsARbC6@hEbmyLhBlb5A9j)yP6R^D z8w6GwCnu)%tS6)eyeoR|X1tZ2*bi#^dR4G`MbWQVG!vFlVhOVLf%c5WU4QGVM%dpe ztR!9AkzkR=k)FO0&>+C()1(B_r9=puG$?GRG7`gQ_1Q}_@SoRjH68`FOhv3@9F|nO zEtZuWH@vwn`(&*5of;el__~%cEK{g~dD?wl!XBg!dr6?8J}OW~e^jGTE5M>>|0`LR z%j5)ZW3OcmsH;qlnS>Vi%Th;Po5@lo4Zj)5+7cSKQKw|G`JEa1re`S88Z1U$;JRiDv9HV1kak zxgFb^Z;x0(#6(8R{m}LY!ty(mepA_ZdeUi75Yvf;H4Ky+#P%Yf#XxwgZhtRyt}QtP zK7E=H-Q6S&eEXj<>(&Pw@yt`MCXVg8)LzrR>jB=)h&Z*V^o;?*QY z6}0o|E@-qOiLqm_pf&DWRq5@H%!Rd)A}SKF&YfLm>G32zs0L}&Q%3#RXRr082Ohi| zTXw0tcp0hoWm}o!)#JPl4#~?v`Q;mW%`da6IDSs!wUhjmh9@`o31P5vpKy>IPM2fXO-#zQ*w~S=2M!Exb+vV__W4i zlwN&bt=4*p;AwZAd}>tov-`@&v#V{~d@A@)v9R}sT4}lt(t0XA0&E5=f`tS5nJM0r zZpaqUL>)`dgvv4K=LGbF8IC0;_c?~}4#7e`?tBe#H$}V}%t9&*BmL`XuQdaeqk05yy zqxts5qLwNdqCUM%K$XisVy*OP@41r@XfG(pd{%NXp5Fv7%T4x1F4>#ZHb(%F#Le| zV)#kQNsPsB1aOB%@5r;;n(}&B%k3gaIfL>pqpF?iG8k;SP54O;QS>jHyQC+Js<0J} zS!%-tdCQ3*m`@QdJMLzbxCS=jE|G=8ymmTJ#bjc)J0MG890CJtrRXAD9N38cQN7@4aDE1I+OTbJsw0DH+uC+)X{Z*u z?%M1Yp{H9U6q|Hj6&>5L;ca44pXp{2D;e`wr7Cz;L|eo#1ukSL(@X3E_wyp zc;^D9nhG}~7CBGt@g^sV2z{{ThhCCMrZC3Rg00M8;^vYpVuaewECx}HslJ=h(OGJO zvkDKtHnCh?vv4cK-6J3ENkVXi8zn@rsK;iA2F`fV^gvyn&%Riw5!FwHM0FA#?zeAj z#Yt6nMSd6{p=X~M3ctK}62$Nb38n2m)Yp%ZNea=|-vqp!F<~EZZ zvH&*O0J>3tLRWi07MJ7aGIlYkfUn}ovPZ6g20kiYpJxdz8jkPyx?6{3YR6jHE({fN zD@9(*f;B4bhTEa->cj*Tc0Ps{WIX=T!RC8MaIpSDZES_x?vfs8t?uSjx6wMo)GPye z<1{_9UB})J^}*m69&HiRl!gzbqDRtp#@Q~Glnax*M~K_2E_BcWd9TX9ab0K=u36NB z=LtI!dAcZDOF=`U;-*dv^$0o2d0n~R43{?3<1kL(b&ewQT0pZuzEB|s%r*jnH zbc#!TPF0ASh*ELez3w(H_8%s9^tDR8>*~^=)3gYmRJfPMa+$?h@M6mO)umgelt0+} zFOsQ5(0z=o2RNFPXUt6n()Q-m@fxqf>Z8?jWk7|~mraTmRSg7a6Xv4&Bk6O^nb(M$ zN#=t}ZpP^bV{2HQX*LbWuB%UlYnf#~hC~{A|K+D2Mi327jaH;LB$aaM7imn5u8r4r zG~Ve$v{}?RMIgSt_!}}P_e5oXWZ-dFG-xnS&*-CfFQ>qWeuQKGFk*0(4CO4~Fwfl^ zd@9KA5R&CnUPuJmQHCJ0Rw04ot8g;*m)7DLaa;C7I z=j#1%Y7MsWbYrLh8ks-gHOvh{j^7S7ulEaO1Qz&w*M-O>>>AFgR+YL#z>0pDkNo49 zXKK+um@x)mQB~fXFKj`!2gkTt@71$Fi6YY-f``2-iuj;i#~ie>84=NwChy`}K=^}V zz_8#YSIGYHv5rZpm7ZZ=l_1`(t(Tivb)O3KgQ&lVpz^2*PHR{T=VJ;oW$8nf?eDI# zpYZrJxLg8UNZY9l zLXkf$GPt|&O~(aCTs1!rP_Fd{tnn=6Ykp`Z3c+Ozu?C z&r#5(`Pc%GI5ybohyv7l->&L29`T;})XT2o; z%Ltz8!Ap-I)qW82p=_6Xp_<%LD6fYdb=M3uerfoFK1y21>$cbGXp>d-1uv68wz(i) zjOYM-d^oVlrSG@7U-GV1l}tHPlOofUeF1|)YG64{6}vXx^7Y!`=m*ByQ-7znb&4*F zl1l}~mHa!aXn~!D2}p`TDPu|CvC+cMelwW;Rwl5O0`L1#O0UeU#`7jq*(w?VT|nmy zQ+rH0hyDb!pAcx-K?|%bC@@7C#fLcoHQI9zW=C}krHTzxMEb|$@g_@aBw)m=x4u7h zdZZ#x_mQ{~H~a0wXn;RCN*Jh26d3h09KY2(1`vS=c+c}c{MBC~1bcG>NLVVdoSN>f zbW&ZIEY@Nw(O`~`0OzY#4lbwo(-Xl{1QvXL)=Y2DsVy!@kP(nUk`HI^hD9cvgj;|I zg#fIJ3a9Iyvc9ArU-81b46?A1U@9~WNH0$`P3N(B0&8UT3Mq5eKn6D*WRiAa{ULT^!AV#lcTabiwxTIo8W26 z+6lLRZVXRQ;mFgNN8$H!fCs3K{~K(o_|pReAL4iUfrS=B z^n)fTv*Y+YD>UEHMqzb*aMiC{u`Fju@{Z}1Es!CV4oN5att-Cxv!(nr#qdHEpcDoG zD&o;9JdnEQ*3TM|t5D&1-|cdtFhP0C6#{5)?%t1?`wz7o4J2Aw)<_&+w8bD#Tv>`Qb>oQ24wgDTG_p=lZ`yZP9bGkG2fLGP@eOE61pp28&qZi^L z9Q#p!jAC*r`fte3LmJ?tIOp9g|5pQQ=j*mG%s51rUt zE<^&}7OQqz6@>q)ogd5s>;OEwkdUL&&mbzHCx6wF@u$el7io~>sG(8MP@&EQ5t^5m zHTZ=k6_NB>3Z;g_+$B7xCIYs*H&E8GVFu>DJYA`>5|nH*qAS(&=~Gb>J!h=jqdsYF zuGOxGe|pIqL9Z#p4eUeoP)i+R(<9Q*(3r~2NhengrzFF#A}j{Y^M4TUPsvxF0BDz= z*74H%`Kl!=#kZ_c&(S~7zN$)BQYg82$0X=rhBE>QJKAPUNNqf3WG$pc-3VrNuHrWJ zTYTpgt2F)OVsq2kC-Dmo#m0E^UU^7pG>4O63+a7G=@!`fhk-*&P7u(oIt2eI;FN?yC4qRuVrNa}< z{rJ=~loacBhfF}jX3%c_y7WNI$P}j0-oz@&sV9nN)!l_jkTh(BiG*??9nWMr=gF8`HfoJ>?x3=aTNYeg;S zFBS`rRFTz9#BN(c8pcM5;W_Dkj!Y%vFKDjlX|qW?kHx2!b$b+^(A$2yGXvK= zGV)RS_3pE;4Z`z_8){Greme_-EJol36?VW>u_boMboPeBRhNikJnYydpIHoyDsesB z9ggVNQm!9#TZP!~*CY`wBNevQBNMzk*rB2In9=;p1L=7HTdk{Vn^`dwaqSdBgz~7x z+Xw)ZnJmG~+bHrXvqgYg)naxEKjygc7<=5DqkOpd45I&ong5w_ph{2dyzAFLVhM_{R7Uaf3@ruUSK=ySknq$k4;NEK};k(FYm!8G%x%{%h5s z&8~+kfBY|iwXtf55_Y%&QyHg315_qP=d9uLO%DCpVZaRLOrMRBU#r>}Uk{Q-AB}k+ z!)9WYm3G#~<=K3{JqPu@z1xQJEI!_#Io2*%9$;RWY=qI3Xk6SnUd@xQJ93ybVHX$|k=0jzoqM#uAP7`PT;K=ZYHk zm!|dDfqle@!2X}oiRW@ea5Tn2$h1n7&HW=cXsEzQlJ}^~UWs05|4VE*s3ja&M+%gr z=nq==pA={hoR^;@gW(Y{lL7>`M+>&!zmcmIm^UrnAGi)jJ2RpSz{yRlE_HObV%Ut_ zT#p*fPTbL-K>F*TM27+h#sjgF-G9|$POb(EUDT$&JbS1Bq-7*PnW-NQuO>vdMKVVS zo7IF))cd$E;7nB6W2&g8La1gb)nx1NWvP zT8{AR789wv7JzRom|nhro1HP}XgdS3AJ9oE0#xYydBZ8sJ(#F7I<(oG@L#t@W|<(I zSyPvlk7ksAw<5Pi_OS;@(F~stk4a8=f&VQW98&>ao}VABbZR-kZI>gFC-t!nrgXoh z++@dglWh-#Vx11_4Y8ji-tXoXd>#l~=g=o{t4a1XGh_3^t_ssK)Dhb9=L2WNq*vsmpNYGhsUc)q=)eI%gVr9 zH_t0!tM!p(rG)4X4u8Qh|MmJQ+by* z?D5fLWVfQ<&d2s$An8ZNKMqGs_&Mo+kQ_(8z5DVyIbXK-*I?3c>sUo8e<%iU7SoCD zyd+kgNYVQ8#Ux27sc;xB0Y;YV=%O@4ms_ivXm3dI(Z*bUTHAI5;% z=2%RfPX4>*NC&kmT4>@rC683c`s_f!+tpbn;&MsrCH4e({dSLhTb7t9dfKfmvvBO` zhqh#=C){7C(s#=<4#911Kd`w)Z+rHx+u~rF z*(447m_qnt!>xGUXwZ;;?5ARZ({7P1c}RBEv9<86zad4Io%{P|+@*x;=>fTS9jn(rnK4Y|}TW>2G* ziyIr}0!sH?N*l)dE9Gu&4cNI3(X<*B#y!S~mCG2?&a=vCcomASX z2ovCXBvH7tUAW^>w6k5bbGWxtkK6GmIk30FAwL@L8?|MRS?73aA6(AY!mEiy*JvQC&AFNM!JrWO8_}_`C}Yk7nYH zBTQ$5#U94%r3rGT_LK^Z4i#A9YbOtd-{vK;_CL_;9?J_D?<*QQSn3jEWOrs_B(iHc;h6wN{&CN(vbWI*#71qPo9c*<9P$!95-(rzm;__S7i-Mgm{avdTxRzVm4Y zfrdo;j^q%;RyeS^p%`|!JXO{y?6lj(_X^i(aa6|g^6``Y94!&xWw)at zsLW2Us=(veMJ-eYkYRu}zJ8c`%A5Y4jGP0YKU07gR;7E}$y-3ZF^kSmI7t(lPiECN zc!S|e9q^_^enhBYnq<^@Nq%|cSha@}CWi`#&)DZfr6@O6q#Ju>ZjD1Hg&y}W-!3@D zeikUER6g2_?M!JtXP-)E#fY{tshRK`&($Nf?uRo3sZXI8oMRb-orqdW-JSQ8kLQoy zX5Gw})+*hOP1_NC5GP6_+2L4dROP)ikjtprk55;6Kr5!WU1pSgF-$3+_!%SpEB(BX z=d-=`4yapRI+o%jy!j^L%K!=X+4{um)Mdpuy)hjr5?&#QpsKfDG

)MaN zOD&|)D6WWF*XQUz^SdS9S607Co1H?VL5q0$pt*D|4`vFI3umuTYEf-HuQL;G9u|7v zHIf!)r*90!w^RlT#0*=|&yaK1rg#;Ulxqwh?sM`-=N50C{f_8-!YSrYj^4 zKjU#uqugq-hNrDCThCdf2l1&cl7*RpCyhDMkCSR?kL#B7Vz)hzJL2~l-S4n3T`A9Y zxB#IoHAX^qDa@;te9?uJWsNzg1W$PDxow!^B3%SZKrd?2x+6qY6rtyR<-~$n7%T7_ zT~D>i4R7@XugY^daWvcOLACbom@lf+5q{dT)NwGh1N zuB3$%u61R$%aw771L*|5g^|r#nGb||=gtktj{C>Qn|ep7I7aNlRhm-s z;C&%*e`|I3xr?S*H+p+@;9eg=AzkI1S8d`n&Z5`ni#I&+UG+3cdzbf=Z6jOH{q}!4 zZ)^ARJqdRo@TO77wE6NPeoaT}s$360K?)NuP!N;OwLIPlOj|jdzxlLY?dKO1|1`Xo z0;$R+<{G(AoCWC%40y$bTg4>2ZW^keV$xPY75WzUy;KnP)q*4u)%bvqnTj0k_by_z zR$OQ3q96w=abTBow0Rvg!-G#uu0ih`!=XlbAI(7JmWkd8wI*AwjTRf#L0KV;*ocDI zY9Eb28csTbKb@be?s=5seVJ1nk>J3CR^uF4vZ?dd&N`3an=Q|rMFr4T(d$!&#_2O9 zad2_)JW3#;(3z-?E^R>al@;yQ9GiUmr!|?jj{k&>w>GXYWp??*p{p@_x|=${QRSz>c5v9sQi1X&o2jX6p9 z3LX(8aeY_>vN;^(2W?(n1mu6J(4J^W2h-f5lk~0XbHZO3NaP-qi|%o=<{zPKUYzz6GyX&wEhWvK1PM}gl6G` zrm(E2Zs2v`vZl9(CbfER{FlPp5(|o&*naKi6Fy|DOf`%6i?`z58GV^GBp^(8agwkK z*N-f;qgcp!XMod8Jw?-R*e8|d1_UP39}YV^g?1}HQd<`dm%{>ZZkG!MVc&Z|XyX5_DKi-iUH56ej+{|{hxq7jZ-bUH9_&s-vh168M0bGV0!YIGv5O@ncb76yw}|} z;}!#GfYmn36Q@DC=}0&c2?_F|@Ptm!`NPHCD?9=|%JEE8PR|$W{k}git@L9*@5IMJ zVLc|MNbt}M3V!r*oX}X>riuSJ;7d!juw0YGV)HW7ru!qi-RgG;$8_NFnWVlYmSsEz z-l5R<<=wP$5jpUrlw(#|`TbS=Oh*)`mv9UFs|Luxw?n)5n31waA@{Eam71&fq4rq0 zu;-Ulcpar)jv}qW)=$cRh3>*+AW3*3HvBJNxxVz$j$^~YDKkE+foiEGF>cD2W!B}# zk}jqyPLZ|t4m0sTrb61p*lhE^-hD%1Y;YRJRW%5vD@?2@{@?gmbNl&Tw3Xve1jtkf zl%#7P-)6k9grA65CLgKr5gkZ1w;`+5g6%^96*q6M!Hp$ zE>TLlh7OSs2|;1#kd$ta-q&rt&!g`By+6Kx`#TPP+zi8g)v9Z)b*`o1tX=xHCocwv zrVYV;5kDBIS=td{rH!n86*sHM1vzx`RVi+_FAa++=vY}bh|>MX?w4W$@&DJa@~tgp zD4m_lKUQmQob6;{TK|UYM2>E) z1XBmp0#-!MjoPLi=2nH2J3pn(uU30SE}4Qag><>1e|#YEt+U%uMcGa=o!sM7e3XY* zq~ELpQI7zvzG8H?5B**?!N* zWX`Qi6m9vsdOjmqQ$1f$ZMAAXJ;5RNT#qrX+3XM=-uq~8^nn3x1tYV@S}wCE zbAISMW(29SC*`cI_GUoK>7esC6RKzDd?!G|nvP&w(2vvUz@R~%TJTYV<3p7? zMD4~$dT_^K47p(6G+t;~icuU)@8$KdCZb;npXQ+xNF)uBld)|MZQ>oQ_7B0Egumz4 zL=ApLTn$Nnk2$e0T*H&XSBoB%ab<$P}<$h zXNoZ!Wj!{#_DSebL;-=tgkIZ=q|I1OC7L(;6CIqM^!l7Ot#y}P5F}$j2`$&-c;mW=l?64yL*ih6;f{J)U_gE4lKq34oit%J8{O3pmQjpr zc^?LR_^2=GhBg)k9NI45)^Pj^rJ?nDlr9FM2RX04&D59)>9H1L>oo4(1F7;`1NXBp zB3BcWFpfTTs`UA zMMBMH+U7)h2DoEPDREaqUeb#@0_z*L^!aNXY+EOL`74b}%g5(ipE#&T>g1@j7@2nq zX(PF~VL9Af_|Koy+?rOga(aF#WKsVWYzancvw-Emu zH(IYWvZlV+lx=)bGLP|B%h5sZ+Sr>KN$Z>L1U-L|+8XIk$$gJnV`F~2vtG8=&*L5t zG&@GpUrg3ZU2MPZ8w{i@UZLqyD?Vd+0w*7fJpUMTJ~n?o&ef?*!}y8Hf*FT)1j+Vw zr?a4&hIaj~k08xF1tur$%j>pk`DL*=Zr$6aOSfra4Qvr6KQ$?eki~0;KwS4rk>9Qp z@gH9G0o;upy4nYy9l7>Ld`GKS_D*}8_%zzEA8qN`LYvQPZA31DZ54h*tRVQ zSL;b8d#v>R06JB9Jv1Uro?gM)Hvk@N24X)cqQHwCRx!{|L(WWWL+;cI{P z+>>N|SE@1>SGJSXnhb)^u!{a4F&UULI{36&(>$i zaJ|h1OW;42T4xyZuu0ZEr>v*xI7+R69tqDkX1~oWk5wPef=e%=wb5?n6bW3~ z6>S8{j8};EIdlwk?zYw`74KfTi5$R{<{ZdK(I`GkSchG&0ddL0#*$pG`1;kR1Qo#v zF^)pjRelU(_O;zcUQA}%5|3la<+(f#E$#USiD@IZFuy!%3k8ChkK@sQM8kR|8^#zkZ-Ac*&tn|FR2!|_ zYMT?3VA~<;aw*LdxFMV@Ye6SNWTP*h0>$;{#n|fkT&HnhxUY5W;lP_rT@bhL&IB1U zj0c?4lG)$oAtWoS0I#W|G`N?e%b}3IT(ur{dKq9SKN`y*HbTxZ$xe2?yyiOlax*Dv zHq8s!_3^=(etPwAsHsREEsS%O+3_(Bo29T@tyB{QMy3@fl1-4L7DjATft_h;G4ybM z^KOB@04W)V1ab$|hV_(Ly9@hZ5U9&cuzgl);q|7EtXwVf9rYMN`>JeMuHt!<-Hq8p zrYb{6t`|pIGac6v<3tbh2{J8LgQva6n_amQur*=5k7#$*2PxafnS4g5Pefig=$M+{ zA{{SJxn1>)bW$#aN`IY%dcEaK62L?qATtN)0?s}scT;~nt0C<-^@+iM50zF6S}!uC zlp*D=_NE08x}PYRW|x`O;I6f%>Muh+J)(;@t#t1x_HWUdNoglKbzI@ifWx!-3Yo#< zSql{-FGKDX89O&428T>eaP}sUsHENO`c&dpCGc|7!FN7zWN%ndc%YTL^dJ6L>q4nv6NcBYD)Z&)6-*7>wy?m`e({8tx)@_^2UM@>|j`ffK zAwFQv*_n{J*La1`5dVH9nwFMI+Psi&ze_&olLfD%(BlwR6WtK!*p!|OFbp`|{rQ9} zg`ZUC7smoz^?+J6NL>=<>pkFQo4a|PqFJBp0?yY085Vo|dyfY4 z#;qpLvgi@rn9$($^X)}+lq{S$Rsi?`DUgVirW}{9Rk*7+ALR4M$1c|hA(?dlTJ1QEE$&?r*PzkGi~sRHEe#EE(K z%n74&)sItoc40K*dj(+_w08Q;6D0xl-hs)qtYtU*-YJkCQFhN4)GrgybI(;>zF#bs zlB{^*2bnnnM2oAXZBEI#EXyV8WJBCT;Jl{p-Uu)WFrB9$s|Xo#e*Ug{)V&MKe&b{q z4*NoU_m-^YRp6H6OHwdsFOdzVbe?!qs{;bm*{7puSqPtE;S1OR@Z~uSDt{$Y_ag_t zW^gY1MKq<#+mr@*{S<=ni)~w1b4|lO-zA`YODXZZnq7`VklXjgQILXO4_eORn~ zP}8vKpb(MP!kQuA20q9;y1AQ0HPo$Fj5$6>QtpB-jjomf>UzP&@E+5_;Y(67FK2Go z%xlfxr%vY@rLd{sFbp(QHrBRu#0vt5!}>Kd?aeLS-o@A2?D_Eb4Z8+4jM$nKRxjPu z;}^EPB^S0@L(cH;M=^(K=jOZ3S0L)CLuWKRZu!=2ftm`NyT=VQYHee#oL{=6ewRXF zY$DA4s4viHr;k(N?U$FJ!geS5&|Y&W-5y^4(pUSLr0J}hLET*YZcvmCGjWP#t#@N( zSZF6_wn>c5UQ3SLzkI5Z#-Y)^Z)9lsBO{@&pz0!Xt=`Rf@0w2{xd^4aYxB%n{`R(t z!TSdjb=6T)8MBT+oY`4-yHwUcEE!k~Q#HCBwOo2CE~EtC-r|c}yO(m5 ziUkFpRIsj>%QW-P(M%4x_5x|WEb9y^nqQt?qFrU;0jJfu0MjGtCBfWtTZvb}CpDGz z9nQoZcXd1VLRU^e42Cb1@o^{*WGlM3!Tt1ZH^vJ-DjT>W8boP`e=@?cee}eDaK0(c zp-&?^cZ|{j z$}4)1=1W&yB~d~Ey%RUB{0XoM zLQBw>V;M-M-j*9EQ^fh^;T{FkX97WUahEtu+ZMBUTD?!ZBwIi3}D@lH7&HV|5WRU#PuS zUm5K-26%AgPRZ7mv5$yeN97NTXT2SPzzhdmy89i!_&(#^|~&ztnp#i5DgJn zkbq(cc{xzyUklBlg4eWjYfr-GpB4zQ^66@~CM>yKr77VA_pVgUDP_Kavrj&!dE4dM z0tae~x#r_KjHm9yGYw+&`@lzBJw{f(U1=G%iS4hcE1V5(QOA+-GINn_Gf$z`#^y4DfO0~V(_&tbA>OwMAS{Mq*U%Mp>z)W>zvCH6Xf1P9}1pUpy%U> z_XG`j(HWQ$gr?RVAwSTHS4s0i#I$Q6< zcexUY+<(vw{W14xh;B-A8*@NBxJySh3v|NOI4h$Hzb3_l?+Bd8t|_r8DFd2bWmj-| z=Uz=mWt81oM~y0@EDH)UQXY}?Vy6+*FkqwP8|^`3mDP_@$(7ch>n}-Ow2+oq4Kjq6 zN)XPo=i76dT(PEB660|m(+Ym(flL+xeJ_9_l%EUI^w^m?%~twFx>R>x&Fmmk3t0+B zRdjF;US9T7;mnu47r~~q)pVvQBS_4?%0S|H!F50|^)5Li`{4e5`L43^SW3{B*83+d zw1a)^L$CI(jjL%|QrvEKnt8P)EntI?Nwc9VTUW+gu=;2RyQ*Rd@)_ThS2ecjU?Txtq8!aD*{kImgi?V=&U^qTEM z=k;l4XH3boxx4nLWsZ|46-P3$WRr@ZNi z1p>gG_y&ZKYlgFCswkAgDaM**0tgr)tvUP6w7zbV`U^$kX?Cvz=>Gb^$v1>D!56Vn z*6)^qCwNl37NGDBqUOcJ)M>p#=H9~Xw<{5p7TN_pATB$vM7>UBKxTwXe`U%v7bqx1 zW6!>>+iIify=(D~dsCoPPk7n1q*vXYb$?lj-b~Zuy)VlVOhT{9LE2K)J|h*u!-Ds3 zA`A1egD++1U7HWK12_-@PTd;u^MkO}B3=2pbDjH7@;nzBfOw$GPtMw0({#Z&S&2n( z=G)_~Wt{Ly>54~|t8m&Phaej9Wmo$oPCZxosBoGi-yMXns|&`%@Hc31)6LIZ4lv}~ zWz)vi-1e9YyY856u&+mylV@8lt3dOeB-*agyLZoD(;nQoSLc3Rc0MY^0jZO_9CSgY zx)9Uy7x{W$gT@B~>~UELtPSOFblcriRki1QvG{Qc4>QLi z^AP@^qF&VJ0^(-49&bQVEw{#i4V*kt;mBztTr8U$!L64=Iq&bmNJw9=k|b`#ym%TN_cL<9@D%HCq;VZ3!Coej$o7Y{;6CPl2$Eis!~IFv(Y!00X+XJcTha18eK;{1ln^gpZyRo zN`9LuzVfUPE^lh;=HRVcJCAfZ=55axbNS^`=+(-T%^Kpf>f0ms;D;b<(4WOdtYj^? zSdX$j!D&2DN7RECSuCKg>s}Idd|YgwS2xOb56|%Qm1Kq)0w*|$fm&}Pb9NxB!Bw3= z0*m2tKM!BLNf0Xj7&JOuaoqP{@KL2LVj>Swz%;4RzY(5*n2=jPFRW=W_w`tjX~)!T zo&q1n3)I$e+6+KbzObUYrZHN%RKcE7vD$Gx7ZAWW?Vroa5d7eG}R-U+yTswKQHpL|u z&a6`yl^NnOx%cvIj2>J&U>%VIj?Ei<1sPq?Nu zSX4Vw2)X_~J4ELQAzWfQ()9y))cOdh=V)@v@Ky8I^8R7un+h zqGdk##+AOU>IT@{jnF;}ckzVHZj6BXNi?m(EWJWOv|)|mh~c5(**X_2X->?E18jtQ zHZwwrdM(A!T)m>HrTSgIvQTb#P7<%@e59bu%2nPJGAKo|>QbIY$qfKAwh9l;kO`5!LLDm+<9s39frgLFLtqod~Zbq+yp31 z)qL%eJM(S?$f67^U|}3D5ADa#P3|LnX$9f~WI3VX)qwttZUa-UAW(;1ku8SA=xpB1{LM z+HJM193d-|V%)H4Gtm@C`l8QPu8K{rjaj?GE-=6wCx3RJ1Svxk8ql3pS)gay+-4kS zIi1=F>L{ZJ8!V(36Zo+;`vE;vPne?0ZD-zt+>Yv2xiVHz1ZpLWRO(R9^#1xF=y%gN z$_>Gr4D}J9szJ4#9OB}Mvv!ZEq|uihTUevsfgF@^?Q6-PnkTv%b}bIQJ?xkS<^KdQ z0o%zd-X3#LDEnytVA=CDb6J7EoYVz5nbat8Ni|WcTB>6yXCrj#MX;?#zNi=$^eEsR zkFJ$dUQAdIPpD`jBBMsdO-@ZYJ5#lOfPmS{eXyvhAokpRLn$SL-137bA%a-82Vm3D zvQ(>)R0@wqG?J$d!A1G85Z0ZJhh27t!ocm@qRBjvkc{!Y6e1G-XeXFAY?*`E-3 z2OazH*>=zD(09G|1(=J>G7%`869Zr5yyk|HLmLJ4kR#<`gNwg?F9I(#0|2**8UxH1 zsovl*r)=(0HXj!3f@r!-wrv28iOxX&Iw)sz>oa-Wq5X`#dvDtPHuan~`lGMcAlTQ! zXW(_6%Xx7qC^f-0VAbBdTq%C1q~k@Kb^`NpKC%T&_NP z;ZOIJzeD6|(bJ!~)yq`JvQ^|3IE`m;T0VT(&V^9!Bh@p&)3r9REX#uec5C(wnWKel z95SOw!dpMoYP6=>ooho<3^}MQY}k*H@YVuVMM<>;h(Q(zD(zaGHnRXBy=PQLI8Wls zprBSuoIQ}{vOyhtOfZ+v^V;3JcW*d5KaY+g;ZK&!#s(54jl6D~!g65p22g8HX;$2| z(|RR)Hu=Ym?iHO|b0y*|0-2uE4<4f@)1-dFQlw5Ydev421>I;L)uupkE`3aWIT<&D z`;QwnlT|d!Sl$3#gDv+114G znPHqE_fmj_TAt1#e%^-oKH?F(mLnbmoQ+Ds6e_B55|;x|OfcKc9CrH2=K*z;vyA5J zCpnLHdus_~;L^lQ5{8?-mAE!-udYXAuN;?5z5>Vb_|^hAoc*0mBX1~YzV8`arIF@hoh_O0WM!8y@48*2$J0{{B> z-roK0jN5AR_@vysG~7jH%oc_k487G1QA8>kE_gR=p2Pma64syBs2g>1@RPYX~S zjLE2@#8#w;zbYOiIz|v*pr^kiO-0AD;1_)=<&H>0CCJ?3#0eSn1Vxk3p2fK8TO8333TZu4{_bM z7DHk!Ffejkyt&i8Dxi*)M#L5i`mVWm@UI)=+MjV+%^b5n09+++~Ny%^ko#sFp3i7Mpz) zF`1d3Sm;k>63st1wBuzPtx{I{S(Yuk0JvSAO0{KjeUW`CvvhDC=O8pHtNSx+Ca_gT zYY5lC&JE67C??{zlD?Zq%pEt@NF@U6wn;jI^52%vz^x`;hyka;vS>Nbc}3xtXqztxGrz@;cKlo<-v7~fP zTWhOe?KQhGerY&u;;(Of<1}OWWL`Y`%KU0&+<@J>7Ua6+s-{I+%3RUes92MhF?`a^-ipZI{MbbE6`1nX8yfz=9U)=- z&=KjVNIpq((u#Dap|YBvtfyz|W!R*IA{tp`; zkrL81q)Wab(UK`iWQ+7U$U>0{Tj30%F`Omo4#Z>h1oQcXBig3t^Ft4Z*R4b_1JAy= z^*ju*pVQd3+PKFNC13e+DlXT)qnaen|N#^c2t=Ajni$&vzn+7 z^6qj<;t+l-o^`*s}75KcXO?y8fUL zRJzQ4+jr$!=_j-LcB7u}c$e4bbQpi@VCc<)vM}Jn#bC*k!d763Zjo z2m0b{zlVj2Ik0yfk9myNFOPn$K3VJfd7FEmIIT8t_Q8GWOWA&8txhOTYGV!`7i=t; z^P~8J+?hs{NIk-gD9HH3nbsFNS};BDA&|4Dr4aAs7TPGp?IONY24X4I)ZXc1$TJI1%8w0Zhatr`Y0lRX$_e_?Zj$6PBgYt?2D~@?*XHKoMd^I&tYbnd|awL@t z2<$PIa+IEhuWExHu`fV9cg%ZwHim}d+`>fl@Hy$5hS{wyy>p-*+DNnym7(732l~xN zu;b+Y%gP<76Lbi6qBSZp2MgGvHq%?JYr*G-zvc!I8L-t>9v$vg7q(N5AGU6ZsEJbX z!>M-%X&0&1DcrwYDmrlRkveHc`T@aT9GqCdwJ`O7!=&Q+)zvT>7QV=bIIS@}`d8i| z9ut+PG%@g+7GkhMZ~`!$4?J!~55h%cf%EfvyO^oF&tt__UsE9+|FT#vP#jM9Vu)tw z%&zqM2&Y(LO#{*G^^kMCbMa^Fr{DhMywLYkspoK%6oaaBZi{; zHZqza_G@t5$wQ&B!wG`JYB`Z(-8k-{Hj_%>hC{)ObekYx;MbJ%?M`ei3eT#v7goEr zdFO8Ji^Z_m7v+z`>_N>EIr3Y*>6VDB=Vvj`Ot+cEB~8V*V;U&>4i_JIp49d{*whgZ z4R;G=wX4K5k3&g(^LxOQqAr^@u#(ZVeOZk=ak>E9jb)M)7`A#g>2BIQ2%Od`uq&QX z+W-7{`ZP;-Sfu0!K&k+7z8#Enz=r~e-6eiZ*tg3qL$p_c0nNqpS~MzpMTL6y8K4q} zTOi$@?C2PoQ{=DvFF9;XB5Y4|X@2*m^W*RtY;pQOh*TVVSNMt{%tG^6>CTfxE-x&! zx-iC|5%%U4xXTw#MQXkW$R9d5zpc_4REQqCgJH`p5brZAl_VLM6Svib=f;q>fnt>V z3{5VH#Scbx0Bi_ZX)lVl%x%rNS0opril8CVWMnHkm~KXDa)*$<3@Ft^l28j|bZ{QQ zM`0Y=IFv%Ep-*@%}bF47GmL+eW483LcH2KPBK3#-o!px+!{dD7_d=Q zLH#0)$|l?~1PgIy9xZfOhr+Sko0x44xH&FTSH}B@kry-w0kC#(Ln-U1u_;oVruNF| z4HdZ^!k|@HSctpWByy|dYAuz1DZrdD8%^oE^VV}0v06P((2X|P+1bgo(x~651&Mrx z04UJsmOp#|l5$P=nqjvWL_;h?=q+*%A?wyw&dt2{lg~+#cMLJKp{z>01`>lL5b6Mw z6=F|r7*mn+GF*0Vx|1{fu@5nU-mal7N;SKvow6?2Rl!<0JgYG?Zsu4xNO>26e_jN> z#)7Dj5_i&tBz|t`z7qb78sLI^GDDV44j>xn?JjT!FD(Qmb?{V`01WHA@Tgl+wQ=^$dq#g+ zGAfb}j7?+(4BSKCoFcI&j%_Qsf&6dV7rU^DnDmUYA)$%~^ts2{UvBpkKp7&YiMa3I zv$tK&y7{_&o47zrM<=n^EP>tm>*RD((0k-B$6M&lU!o7eujGb|7#;t2GfgxPLa3uo%O@AWLI6xEr13s) z_CCMoPBiyw2N;*{Yzvr`#MZ<#!nOV>s&L$A=d<*@ctqe1~{`x z3JW(ASlWWJk5Nng(`V*h`TpSPH^10a9@M~vKxb>c%e!5#j`LnD_hM8&%**ULY z3wh=rKNl<#6BWg7xjrZlZs|-NzbE4*Pmw0qp{F)QHE&`ohSh;gv_^|c%Xy;tqN{#{ zfdhWEM+eIWX!FwEcFYQ&)lgZ6jWY_D+8c`l`FhL(P`DZ`RSun#YmMvnl3MohtVnnt z7#wdDKPRh(IlLOR~%0J7ss}E4WXg83B~Ns*oMcNA3bYlP1h^qR>2UaxLap zc|EB$Se*u^Psz@7L_)&zs*o7+-)OlA@;w!OkrR4^8!HS!>fZrJMh=Biu^m*26X5mQ zt*$0SL>T%8LBaByAU=e{Yd?4QjG*~5$2K73$xwmhZ115ck}AF$J=8g97LyZ~tyy{t zAn^?wi~Fq3S^*#mROb*BfYq)NfZ$v{Q1*cnvp#UtJZ$4_?tWbd>U%S|E|#e0tbc>2 z?c--;P;&O<6Ciui>(D#lIOxZaZqT4U7GTNVrcg*h(dSTcBOg!jKMS2Ed)Lx}u52r@ zN?^}v51u&HV{D~c@bBu?kMSX#qK}T=S@5zs7A*Fp6Ts)sRu~$o2U0DcLY<)r9=>e+ z>`Z2Af!)*7!{@MofPe{e*#tgiz_9}lXo2nv=Fou&Mc5_084xgfe=4sG6h78eKs)Js z0VS7gM5cV3QzK8G0`po4W!XwgV<8Cbc`*w%jgZx|x!2~Id{+i9u5Tat&4m{7-j|@c zYH;`je#kHV>#B{KBscB3@#^`GQek1+Q4UL%YUKTy``2@KJTCXbkhTwPSq8OU{_C;; z-OQc9rqXaJmNlK2#z;)KYnC>{@Y|`(sw`c?aM-ku>`N>Lk z_{(!6Jb4CUY~Z4yKxiI9Xj#zWsp-993E>$&8ISe2(Qi1gC~b!)(%#1#pOC)<{z_IX zg%3r>?S)vNZuN=T-aOq_-zU?N6FTCy(>E&}J|9*B^_y0HoC9nJ!zUOx%FU18k5cH9 zOX%ibi3yffpC-XSQ+*k%pxQ~;Ke&9NsU_f-xcdKi2)zkjDYt}0 z`44dGudlbGyn@WTD1W>%?y6D|(XYS#zdm&Gfif(Ee7i9@EdQALk7b~qL3BXS^<&|w z+I2q*0{n9+e>^{x2eV|UzpwU}Q9JKIY_Zt;gDq0Si2oBZ_~S)@PQU-e(t$8moK8|f z(_dcs*ai|RL1qZwk~t79wHac@er44!C9_#3JH+{7Qd$>0oCX`oY>{;lrfx2bSO0vb9d z^+Wekii!7G|7GC+EcqMF0CfKOe9MR0O$5J{hk^E8YMK<#ka^kjm8kFSww^^<;B!gr{xgQIk z?&`I``?r?)2j%=Cr(#@?Erj;bF{b&;_zaB@SsDm@*L8|EY&@?2^HwsTUICLs$n1O! z>0gE~)&lFsJN9GUnoD2xTK_WeZ|9wy2^le&n^g0E+siZZz}U)&{Xytw`fRev{xa~d z75{jUVgUSzQ?T;gGWl(5q}(A&H~X>h9Fp5jtbdA@KRz+N3HDOxoO|To__5sw5^c0U zMcZ|gqBH*@h`+K?CpIM7urI|Z{!O&qB?n~r{-F0oYbH)ZA3U2*n z;6GL`4fv5LrQY_h-`!*al+N-)nW1&$&&2!}LL-?JGIs)2?g{*5mI9|Cc69x@aIWfh zhu;b2Hxhy5!a7vrXMNf5yIJ&4Mg`AzZb5ed`5)}~%n|2j{h+^Y;?ED|$)Icp+^7A2 z8D2`^99TC`=MTlh8V*PQ7t%785>h;Bo(1T??B(Mp5Qo42sd%1fH{bYg6b~*Oa5xr$ z(qF&Z9S$h{$&DWi-{m#3_-}+p6EmcE3QSu5GRsa2h#iT39(ccAwxn84D>M>7R<{vP_@TUk3hN@!WwF z&z<4M_aJri_cNC=hKv{U^dE}HcC+J5s#2 z^^dTRA9^trG6Zu4r?AvE7PFtfc%F$V=zLHR47)xkC7Iz7UrMq(Rmzvgp8_UZ_#Le} zbgNCATyHHnRa92&UHFoDsmK2Hh|Ap2>)cDVD(PlTm%P2Pa2c?U!KART;a)!;lc({& zrhlcT^mK6<$l(5*nR$VqM`W(YF}VFS#;rQ@&wu#G|3Z&a?qp7@sn2y9jW-=)BO@t4 z4Gk^0$HvC;)v!D^v_H&mqL>iwr?G%p1T!{^{u=eyOPXQdMyE&R7!vvZ2qY#zh3a`3y(pU4Rh8|Khi#9=EyxUQnv6=rRc!G>&yI?jU2HOW42^k{jNpY z;;`W1GIUbmE;PXlkunMq!=fpv{lz6EE=gfqLfkdH45xHk1-<#FpO~C$Q{h&OF^N=) zd4BN8I46{bu7sqekN3ohZyV?kUtoCC(Ig_~)8KufStO_&K49R`SJk*stQZ$#GGX11 z&y&~E;s1aU?6(tcbni1eMjp&FpYRAqER-B$9YV!UfjOU(`YhX(KbMjWWFC7nwVYm- z-P-n)U){>Pj59QG`AMt$N-V73PEZDBXIC5-7AEv*xF@g=bIqiuE~I=D#cR7g5iwmb zLxprRg+VVqA6*xwSrRvVl-g5eA4OcGM`@sm_pVCr`q)+IuL)S|u}*#=f~#5qzTl*l z36C|!E_B2DjA!1K{&`lgNok-lp5A>6vslO4en{4z>R{^BKu8ik?MZe4TBj7l1MK19 zgtYX^*1=(+<%l_s&nnzEV@F5CtPZPu3eq}aBj*6^g~6Sg=f&Dx9gv{CS^r#s>~mpn zXT_-)6((>p2(VXF%bZHtF9b36N|s=^-jnHN2d;tOjnNgtKgbsg8xM$}ocJ_-Ty#0( zEk(t+H!t-01q7ao@m}~w>dE)9FJIP9D@`9P2A#^|)pPFWKRhVjY2B9d|*3_rl!8L&>>6gyCPtAGj{o?(d{DuQ7t zp#s-0z6NL)e{#}~$H&4zaKFa~rx$lNX+dqU*5ln;?5&%97>v2@gOR1hvc7?j?o3oQ(7%r{GvpnA9+wlLqB3G7XJ|N)xU4`16A~1RtLeWi=P;S!So&pzcYQ)K zBzm=FvS3;lZO$)dIc$y|t!lIo%xi30MhaCYDEfU)8_6S{0|Yv$P)E#fY;D(+y_6*M zAt#3K$jR)z_}aoN=l$})295#ExOUz>#~2)kdHI_v+`3@3QIW6c05_Kavb}Bv$l$(^ z8xAZYZQ^0pqc0|niihkv}o^|iqQ6jzOvL%YdQjuI__%!PO+nvl zi_&{p{U%Yw>yf=BF?dBz*36V-PoQmn2uBF=SW-#~=pSy>MLWCP+efT#C5W{h(S9_X zv!Ae=78KhR<_Y#6$A*QXg}AOo z090louzbIiTjntAb%s8C$$_=9r(EB>Lf92x&dK=c_W4Gw1p<={p*639sQxw&>GBR>duDWibgI|2`V*g zROc1CFQ*^9Y_^an^`JA=(@|+vXGa67D!Gx|H7yt-SmKguAPm;iic4*o-BOwZ@0(T# zviqIp0PnFtY{1gV1G+Wk2DV%;a%lH*_jNXP%sOYNt2%;osi=I!W{vwzbY+rJQPc4r z%%514qG&^xr1rEW2)7oOppVyPym{%#mBeSFBYIygnpe2R zpXUtV0A}poQ9)y_y@~vocw%CqxHd~c-LD_$iWbvKj-*bk$6l(?%On7Ej=-hX*LjOj ziHJHU2bN?nlT6wadnNWH%962I{^Yl*{A(dn91!P30nVu;eZEjp;Ig?JP=>Cq9McfN4nwy>OIV>b)M;Z!M5t_0y!3WpTV3^f!c>?b>FJ=je4kyc*JF0=O+cALy5&II zHIfA(P44^I_yU(sKqI}E8S8(pGVNbWBNQ%9K6G762CXYv2ufNM8tJH{1^Zb*LhO`$ za^eiwc|09L7Km_^&tY$hV@?HuOt%*e@+5zmHgs{CItdtHIKZrD&R(sJqUy~^&#>9_ zR{KD70Q{TGx*JkAaCnTLPn`OEUUhXlN3n}?f%qH`^&DC z$W-b+d@xLMF3fGa_wBS232t+&x@Es{jZGWJr;l23s$jNpi+(+TT@T1IN}WTCPZ=s6 z=;U=1g2Pe7-W0){8X$vNoMe@A`K28p>HJ+lF+pUmYf6P$JbrUNI{+!dGCnsu%jaI? z%1j#w2_qnJNx$eyn)><4VdFUr`qNK$kG&-sEjNBQv^IuF<~p%}F$?n)9v{FuMGVP< zyU}%c!h~_M+s9==Wuo%8@z$nFzML1TMWzK)`B_dt<4!RsD?u{!p%&xhx24%(Zwg>e zUBy9J@^*`y_zN>)0aCc}3Mg}GB?Pq?rdrAd4v45{Q@+*>skb>eGnGEI-SFvhlrY$kEOMREK#xyPe>%Le%R@W9zI$R|fa zRQU!y2?N{Zm4$2WoPELf0`}mYFE(7?-}J9p(?Pt1@}tq#qj9#p6TutD5pANOsaa6w56An zB|KxQAnjB=%tGNsbNRpc9ilbJI0E!|g4*gnYO$f=)xC0a%dxpH^Yen2hqX~6-v~^q zjtKkeTjsP{PaeItdOTZ2O-&?2N2+(aPP9N$jgx!btBH|*L*jv#4mzTAJc&3E#o6B) z+pTPBYZNBNGk5N=DQYLytPlfQ9be$|H&GDJrW_0?l6r=lozMqN1kRuwY^a!wElbOg z&xuW){)1Wmsqf${;Bp(-W6gZhqIZ)d`t#>dF@hi|1s@HOZ>(h<Rz(?ajFg@xi^#3NnDTne5YvSC(WvT) zUA}A?$*EgS8b;I}CpKv3Q20_0i{L~Dd1vjE0imV2`Ne604wRHCen?mtXu#_&R&Rx# zbpy7$mTKiVfvzBrVc3C#b^6AK8qY{bd)c%Eejo4}Y-sh1kLccOl$T&syhtxc^wh`$ zwrN3>@i;_&Qtj}JREWFUwbZBxAi9-Gj%|{hY?YN3 zjR`u2#2!_eiKuKY3Lyn&4!0NDc)yD~>!`YPF0BWNI%u2b4U*+SOXA_8Pomy2y&}CV zRFEP825Sjlo_=;q<@M_@@Ozyo_#N}=7%|F{`cwwY4z`AGOWs7a$?_IJC~*F)T*mFn<+Qb+*)8cvK`! zc<_SiPMEWzYQD-hg18Ioeh>KU-1F&kn*61V#_9p9Pm0rVk(Apy+&YdcnaXc)Hy)@ebsKI9 z)|?vUPE#a^dMY_{g~1YAMdg$6PEZfZLU_=w#>~ngonDt;#4t?4#ImkQ;a0q%F85Mc z=&^}!vKR}6g;K@&G~M?g?_`7a?rgcS&I1*O+}IOs$k*4C3L&Za>{c=*MCYlc{Gt%k zWt>B4P$55#d)EnZKAnV2(0_?FxTzkthjgM}V^rhAO;)XC?7=5pyhRR+yKT=qfrU-U z6&)vf4(pgY?8|6o-^~EVGq_1wn#Si=6?+Z9e=CSMnghFO|SqQ zPQQrH1yl9n$JL@oe#B07xr2)~BW0zB-tjMeB!xq!{3h|P z_Suvq>n30UmE*(4c-pP{D&H%0im&IR{5n47;RBn)$THD{APX2h-*H=8-z z_B_8oZM|9@*N6!in(uMVG1-oL^CUCmOjMFUZ`+d9ds^~u>pHfLpG}_# z>fYNo6z0lgUAfO%hZ7B7H2M$*}UFQN>x|BwqRf+qcAMk0&`^*O70! z@ZVA0K8hgORyn@F70+CI8NIkY7Y5+C`;E}Cu<^g$W{?Os zhkRMRIO`U;HY+wLXzin_SLrtVtbB6o9KW~@-IGPGyl6iE#Q_TqP0~KK%-IWQDRBI` zpjV74+1GEB_L%Y*5DQOOzKaEUCUBrlMuShC$i7eHHg;%d67no*pWI*))gO`r3wI4N z4zK6xuafpL<+rbsG2Qcp`5Z}1J-91_j);gb-mH67J{4%Z-lxE6>~`_&i^ZxXv-Lbp zrP$IcbMG>AYHH)V^z!n40YO1$`pgO+8@P3#7gv0iBFy4B?)QtrD_Ja*r3OpO_uJA_ zSeba9O+T6?@w*R7>X{Vt#*D@_Ty)nK+wp(PwQtNLj-O$w^b|>j%Hie0FI=;@@w7)2 zXqfcF&^a#j#bSzr9M}U@hP;`{V7Z3Ei^q#w>GjC+__Rh5Qg!kyj(yxpJD$tPZ9gDRteu9Z0Pb`JoIjL z_uUVzYRFo7wR)z)o?&xU1iQ_}nx2h0lh(Lp24ypBuI(AFlZ6+DZ#{#&waJnAWkX4o z;>4JTafV`$D7iuYjn2?FO39}nlQhHV=lNpRo(C=dDSIxGuLLg(aMUb$U68;YyeLf8 zcy-JOOzJlN{OIe$gt-Dq?V6Q+gL@PoL1D;n)=&=?-6PSR$g7uN&*H#AB1O3_^L#F3 zP;mbIoe{0RO)=cNyyfY4U(_?N)`Rj^^aaVEoRLhrYn#K2Jvh{h^WBsAZ*3*dkbFga zN8_!>lSG2;Ns5gHy*R8Vrm(vKUWcJKY{<4eCm*PW_t1K}gtks+>0!82WVPZ#sYc7? zrA~MB2;rh{VAFr2l8;PY|FE>K<7y@LmRV?$DUE+)eTEx9r(wEB{--hKC6$N^9bRyr z!DU>u^COlsxEE!lj&7xj;LqFKDN`=Gt3|_x?h$GEM9)=B`0;9TJS3c2rG8zs891^3 zA?+W_)J-{uwxU^AS?In^F6E~%3Pnru{&6V4&3Rp_(fO&?+i~W6?PxaXr_#x5klA_z zlfQddX27Ykn!Wo}=}uW6p9l%|6jSoanOI5C&>QugrOlvN0_F9!?>;>iY_1PTEi`8! z+v>N0;H;gngK`Wh%Ch+2yW8*II+j5R+a^Uk5Hvom6Z)VKa9@>a>}ZgJyuMk#XuM`S z*!b=2l*qpOSCJ*!aRy~eNZ2?q{O<{yvD3q52sd0b@Wfa7$g3HFOngl%et2(W9M4X= zEI498$^XqG9v27REx{&MX|+Vf>wK`up+>Z_On^D$VLA=_QeQqWeg`O#|6S@%(E!v9 zvmos|r^O`$F^zR1BoSZyZ%V`rHAo_IVh@UQ(?5mefOQx&hW`JJ@o!a&0@~yM_Ugaa zwEwqP|3BZWRCzror#=pysy-;M-#9{1=)#2=C~r7hqvT>lGl&)&q6f^g+6s<;NQ|QD)Ielb4+jNWP{jDtd-PRJaX#SH*4Ad#Z*i=;UexxKaQZ&%l?<>1Z!okp z;bWU^b?tJnld@720a9c5`-b&vN0nG?S>Ye_ zd8U2HZ}C8|e18J~-kt_q#J}vm>>4Z=@xdv=Ax4&_H4rZU1b7op=|5Ff#eX$?ZHAwV zha}THLd6MOVVrd)3M~b@8;DHizJ4up9vmDomN~_~M3Lv@yVI(BXn-E|*h@ih+$(IT z`b_iy7SHY+Y^|<6U}RqN6`pbWm!;RLs&M)Dg#tGkl%;s#yXsN{`?YvG;XJdHO?snTi>Ttwi$mZHQXwrfE@ab98nGZ?y z0j=8%y`hT_pT*=^9@;q$+4z^*21@2euYfim>TPE6D(R3(l;sbdd{kUrclWdMjgJaA z00gm!HU|9?h#GuJmTHeYnCb2FJT|l1HAi>VF5hhT1kI${Fq#j}zIy;rE+QYEDnl9p&|NY?J`<(Nh z-+}kPiX%s0r?I;yHRT2XCXWFg z+-Otw?3RPI*ZD*bSYH0*mwl?L?GSz1cXiPx<_Qt*pN@~z5rS7V3rsQ>Qkg^UXaY@* zuG@E2W*=*vLksmL0B3%4`DNW=3F!`?@LLVRY^l+f?S=Rig7cc5iEvU(0u`*<-6eTz z*Qf}^>;<*cH#2bpe81|(iMSE|a%dv_p9Y=wM9j)28#F@)1_q{kNOzZL_)t-AN!6yY z1w;=XDS2qW{^7n>9I8R*_q^;5dU()ia!TRCen7iow@^`Q7pea|0~{&I>x13Ld{b|uj(0*&&Kk1NQXtW| z{mIgOO{1Ri*YwiIKNC>W;b5bHS&kbM8fK>8kBE2y z(y)+VVY5nuk=jKO@8DPUmz&neq*(3mQ|*K=f%Z&c1&0_LDIhG8uy3dGa~{!zmr32X z^*L4{i1le1+sHrOzV3IZJk?|78@2gAYK*}fH{9NvONt)d`X21QF|f>aM4-WS^kKHU zy}iBr`}gm`1Xn#ZJM$RZ%BVx3FCVXGc+%d$<&oC|zl4y^W!8`=(3GB@9&i1yi30I3 zd>X?7e<0?p#Z>fsOibNzc zdiCPl3*{NQ36h?eN>8aXUr7ogtvJ7C?I6d@+EGuf@r^LA3&Tmq2!3_J(}7e3w>^-o z>21Gw(R=8%vHqQAg;qk#u7eI{e|h?P6r(Lt0lQh(zK2I(zh7u5c6XxAm@XU}HJa|z zW#w5ktFW8do2auf<$cR^H0i4^UQM1?;FYMW;5V+*sq7^1cogf7qu!)GV$~?i8r?7f zL99;N(-Cz-%+oBExV@&~{q@g4vJX(y+4vfp7c^e+M1udUkfKV2kX`4#?oL;+Yz#|$ zF}Al+4?|Msl)SKcin}^7(s)>|ZoCq+X5g^=mUV|UPt^?F`Zv2P*{%1h!W^|y^qv|8 zGMptuqIdp}4hjjwbyv>4I=tZyoM##yS05oZc?Z6izhSgHb9FgGihuTGPMfW>{C)mi z0U@D{O*~t`RGctUcT~PCidYbR<|?68Kz6h1MFzI^^tWf5dd$^=+JH^1*s-J6z;{cM zAQ3mB7w3C2;~-_MBdwE5+tVdqU7;JRit~;|(f5t{YLcC%1$g(N|L8kGS{Og_q$^h! zu)j2jCmM!eNi1_sr)itC`_y1Bj5FkuSs0I;j?`{B6vNeE`2w3|0|xapCBOU&=z~}V z5t+JZ)qfG-*{cjcnv{x8&#z45kccolrb$&h!=ptF8PE~IEGmGAcJ-3gwWcKQ+)O<2|C)*(ZKv?CW z_1CXo4MGYB@4%H#48w#?3TpPAS(C|4GtsR-nG=x6i{OYj1-%Q$cn*_AGJ+8ivrRqC z?ys=~gb7V|1Xa9;Z$ zwAR%^$UwOmvs6N-{9he9u_dU2c{cnINz+gZ&*&8*C#xPW3I$#gcMixjiFcdW+-XXZ zkS1)#(#id-6-LQ!;#(8OYN#--bTKO*zREk@ChtY0k#-b3OPd@D$ z#MWtnwwqBO8iuzV4X2Ob74d>wl{7K5NE2!-JQS;-DZ9t7wvVN&cXR%XtmmEWW0}f@ ztBLwc&3W^DSy`9ogV;mek3o<~1hhe% zc0&c9_WADuFdPs}XzJ);5nOdkrS_jL@>7Dq=-On0TXxC?$tA$ym zs+8odoPW{ESEs*mXWsSR4tR8|yIS*)>yRe!f@C%iDvxBI>S=C#9RcxO)BgKo)SGLj zPM>)wE@n|friP}pz=}9*+i~2CVIRCE36vk4BHBP@Eu!p5o>h{3?d~y9@9L3uaU{~+ zRzpNAR65kjAt4b^{oQquU}MC%?#%p0B09(bjkq@fNQ)3=S*yJ%gQS_A>EV%i+NoNH zmu;7iX0aq`t*EG|huZiIEoG5@1CVmbX=-ZvXlLU>+C5{A6PCzh7kjbw2adyN;>-N^ z?%(Gwbm=h?MFj;ubkEvI zm5$K~WF;9@RRWC#;r`bz;vBg5@9rCh6tdhmf$U~N0s;wUDbJ_d5*)Vfbm7azc4D?j z!PX`u?)V)fSeu-Z8&xYQ)0OsscHMYh{9gK#&vHf&c0kOYOaf|dpGQOdz_mCBAy#;& z=(E2YkO_$h_EuTD%HXSIeApI1YQA9}Ry(zCJo2m_pVTN;yMt!K$Z0N0;ZeuML8d4V zwbq2bA6cUee*xu<*cBeO36*d3b_H4dH!?{WJp%mcCB%pw%PEVZ`SNA1D)hb~ZR;av zHvwv_Zqn(U4`L3B-@JhWvoMZoC(I+dreb0XFk#7dv~9V|OUV*rd2Xw4+le!{=qM_&}JEdIR;$fKh6_R<%5fa->( z&`o5{2frHx&t)-k#Ct&7GBF9bmEs?a(apEhgdK!Jv|TiX8p;gS40N?AFZ-oMB4TV~~^GQoo9{RGof2&!0Rfw^1o3!G+C6#et!|F&qq8w7wi)gt5;-8|*1si%) zeC&%ER&k$)GoOpH!ZrgKQeM~XCvO`C#wOlGOmQw^qQHrT9Pq#U+UMgy9Y$R%q$;16 zc3)ncDUNycZkM220PXoV+XOj?10wCW5fO=TwhC54kdCg&>(>wV4}tF9X|TVfl}Tr> z^IE9D^1b~6r&?KT8c&i1c`D-q?dsv&LVwrA52qkKN-Q=D6F%B$6>!0Foa`BwYxEJ* zI{(O21lYh#dq8E0|A0eixH%vNiNOXy@NhkU`|0c+9r*W7TSQaO1~ZZDG366`Vg=@bi6+WK%IXPnaLm&Lqp}XCVPF9>VgaR2*WKDmg6EB3cmi;PIl1D z|C-*0&VNZrcW9^&OFQ5I#cicij!-yTbxcl_=OPf}f|E;LcmV=#$z_VQM}mSO7t1js z5-25ml<#Ib##K&km|yBl-=*>x11h|R7uZ7W+nXRjbk{79ZSRoh3?Iz<-GuwbFdkja z6V*S&7CHIE=QQAVx7kzu9FL9DkEyk#TrnHuJs?Z`76OH2*qe6iV8FM;fph#L%yY2) zEbEcW`;Xl(h7OO9-LPlYi%$4%$EhD6x|a2Y`mj{6|7O1hU@3~{-s79=Q*3lR_U%5N z^S)pmYdL8UaD+XI+MuC0m7W!3j_Gkkf~hZsC3Z(!$>0yGFXn#L|Y=1}6QZq#dyBwC(b0gi#$f zY91v|K_&B2lS=qeJqmw9&d~$$ibFUq# zXo{6R`~@#8pY}F;O_x0KsbHYJV{D+b3m{N4pf?lYi>KRV%uK@TV5kWvbJEIdfXD5X zOug=S$uC8@bxRsh!m87j3%&Q!_LZWo}O zKaNMJBtwBuZf%qoPWs3*R$h;}zGv~i*9;qVFD+5s3nqh?^_Vk03V5T016M&IwhQti z#+~wNUS#&ZOONmBEBgnTrvmH-BxBaRoGu&NJ4%+1vvmoR3#n)2Psg$t*|7 zGAjgV&rFP1n`a9B`MEcMn@^hGErydW+M?GJU zxXUJ~$80c`b`d$h*;}lUUvp`5En{7i*1D*YO{a>kySVL$`uP2KC(c&OU0KGG;o^r(3AL9c2w{P`sI0LVOIVcp{;0Od}Vf)7VlXaXt;eO(~59R zHN#b~{G>|y9?VjNRY}vVj6xOESYYN~)ts$j8Gx_EstIQ)!8}o|KN}JwC`GrRn*&rC z_g4puGNbv4JS)W*;Td1pKhQR3KS+a%~Ae%i3;^ zEK?EbIO~6!5!q=#=2F#jCK?4f6XU+N2<*vC_*^n|sqo9FO;{*kxB$!#3mG-&H&r!J ze*K6c`a|uK{0K{QO#Y?gw@G40p#O%Y*9lS+AN|Mm8!V(N;?Ic@Q*73Y&xtCA@fnJQ z*YirLPo?YjW&aWX8?whm9jT3r@$w;2ZH44 z>lWR5qz3jbd(~oO^CKTG=5~4{Pa=3r>NbPuX-9k!<%k#C+!{V)j%%79>uzfBp}<#5 z>7@R<=4rwa)2sfR?eJOR@3WbZZ#d5a>RU#t>wYFcJGHp;WATc zb#q_orbc5AY}pDtz~g+7kf+vp{$n3k>OO7}0r+b3LC+*GMF;U>L}YV3|FKqmRBL7U zvJh>m$fHfj2ZWWmrY}?JNBjVJ&--o(C!mOW>+J2^t9GqSz>uysdl=v6PH!m7zth6* z$pPRbuZS2LV+0-+z<>2!hI8eiNJKM%M`-gbl}@4g3^CKcS3PmX>%TKT+y}TE|7hI? z#UN~-uJGHpZ(<|@Cmo8Oc+w2-)Vu_2&4u-0mR??7l-T8( z3pU$2i`$gg(jThmqh;KepvUxFdRCw%0~tlMp@PhRHMC$~X}lX8pGK`c2|9Ba`HrRY zxv1etO7tv3inb;wzvi7Xkz3Qdtj&rUktZDP$S+ZN^^buxvN&P9G2g5DE{7-q1#Y?{ z+JL7`L1XEl+H$a=PLsEz?!|~@5VVh;pSSwDNK+J@$O`p*k&oIHs@Fgz6>xS`4%|Tc z_}9M!0YjQA=%d%vX`ESj>7IfFj9a;92g~}(DdB-TGWS+=+DfXI=cnLG4#wE8Wpv}#odsEAK zWP9f+(zRLS`|~v!dz>^;-1+zg6;N|o(?WL9q3o0Ul|IWjtKRi+O=lRwiUAeZCSOY<9U)#^OI++V^vru+nwH2L%u?n6v z4Q8ty{`hts;;<04zkPk9k!?iY1Q%u9pzMtL4+Xmf$;--?Z7pN^rTpCzXEr|6M!w^8 zxvGvAfkD-)kzox-pf7R9CNfb5K?Xnzu;U!J(CpR#%4XLWpTFVtbBTan%Or_|(xWXccc1DJzDH?KyRXv%yev;{V34eFa z6SVHsPeRn6;J3K;rHg~Dk{cTx^Tool`<|E-+*5QSS}m)Aw??yQV=ZOTNMXG~)-mnn zaqYGT`%ZFN5I*;m^Fm0(TZ9lDi@k}K!}Tq6$n$4^1q?!jpg5Wyhhg?5>HOK3hzQ(m z^VRbUuXEsuX)%IWKrzMNGrsTV=Z6gRG$G<0O8H3*MKwKA1S{*Q&rIj%_-RnmMsUjF zj=rh$)Frnd!p-YmX%xI9fBXG0B2A#lc2gNVT8IO?nUj~B>xf^YIv|LrEuXdyRI4Y`TKvGEYi z+@b~*oz09bS=}=wz0YcnTj8ox0dZ~hC!|iLuL$2a33^=Y_=yhY3F}(DJ=;I6zdEmP zIJOI4r?(%K>|mcLC_(JeCVgm75trEeS`U~+nW(>G#1hp##>=(|^{-51t3rW;5t93& ziQnkD6Pm>(_ZnZ`b>tZQH{*atC54an+?ZJsevp>TB{wXfz-_&cQt&8i~#&D=96) zz#sH3dO$g`_1^W4=OzS^g#5?&7fn&ZwnIU)5KOb4r*?-wmhKVR^>{R@iV3Vx>HqkS z&X9z|CPGaZm`Eh4;3`0^@IB_E%KX6iH(o1LY21MiWJnVaaVtV0&qudD_*d=_fNv8T z7Um|sDF1A3W|-RUE|I6Qcv8a=kWEICgf0Fl5rj0;b%N)4kIgEdp~mxW3YYP^ zeHR^Q*1YIAAv*R>?L}@TvhuC0JYodE$mOrG_2hVL4yc-Fa;opv+PrtZ%|>r+$Y~t;H7~Ca@eQEh1BxA63(297A0ctu5`NTHtHz6%R>> zR|eiBYnyu8L05!TANYI&H=^vy9=J@&(ROnzx{wW|54qUCip_kjeLtnXCW=9!5L1%s~z=)gyaY+Z_G^HM^P zA0{k3A=wYt@_4INuf@I}Ug@WtW*tjrewkY7@*T9q_P##A1j)wC9X!w4ul3(|*kGKQ zFBzU>@5IqnVZ}4KxKJKQ3J0K8;Hc7@tlpY zvYT+F)E&*o7od0qeN;LNrQwBt3_$3?&q77GLk*N1gsYIxx3^*4zTK^5^X+DYKvuoh z)%Kt6d$jUrP#!CKiIU8ZR?iIt_FH=qoFxKR2IW3M z1f!kuD?3|Mh#H9vf{KPzOMQ{VsWa_K`B)322~cE7oSbZSGqCXeuswV%21SFM=&pAx zvHU|hte}IYR0s*1eAktsV4_y^5{;%UiYCjrSY4FN{JO(Wg&o*jwS#4jYkI29Pr=BZ zxIaNNSGh~C8Xs9dsdi@@ZM#p?>dddr?zAovX`(2+$6B{e-=&VGTeNNCSN}QwzEk5TVFz^B4IEFN9IMpq9q~} zJ-2iPwB*RA{yhe=B5W)Vz_K@X`Ci%&kdyCyRMIK{v#pk;Ms+iMe|(#>oAA9q94co- z0r#cp9pQuTVyU9nBuj3PAF{J9T& zZdalch`}#3bqEwZgv!N2n_U7%h4l{&aOz4%l>f>Fo~K^t0;*6hAfj1qYfeD8bYd-n zK``daSo56Rlk81ogGz7g_66nu(?hHv#lp&Y3>k9UDl5(|c}y<^UQEj&Es+8#gLCJr zj73(=sks^>Rq<@U$v9jFKCLFRGK9`MYD7TwjYv@O7ouTLJpajoG(k<$`9t}-u;Ti= zI_!{=E`Ph)FYQIJwE$)_P<^qeH74^+r-!6YozZMT>ty*g2#s)wvwo#EaPLDyLL^%C z7>@yTWF~mBCOIKtFNfMtvC>*fk94H7`PDlJQwypA)pw}I&G-+FD=E`s9{rUGf=Ra> z7zm5EM(^-Py=Ad)bJqe%LKj;#i=Hy?u8OnNH;@B8FpSi4o13|%6bqQ~jQeSCHm*<= z$y2{VID?nU?iFwv`-vA!spHdF7+_jRnv8-Mi5ksc^uKw6_cecc`RCCKNZE=~#O#s< zxN?>vtYgbPJ*P-nbJU5<6jvs_`r%!G6-jad-E6KFzmfJbL_CTB5*Q0@-+`MIx{)~y zzJNVSwP{2z9`0f*>wM}V8Z{b)N4-Ff2uEeK9lD;S4wYvJrJGOTjK=t}SN%7c1j)PW zRzM!q3J_o5droJMZgJC>Wo_M{_+(C-E7A(5y(%-+({&aM_Et_#oEI}Pwk()ePP7XW zyGx>cW_Zy72xSVvf;49{Zh!)R|Neahr^E*B;hrE0+>^vd`?@d1Hk!r@nmkIRyFm=$ zVd2m0L;-_4(xtgtdnCBA_DzM>wQD)KCZNzw5%VnBcaH3$OJiztj>_#8MeK&%I5)l> zGy&sdxfT8vj9@epPV69fHWY*HN-3|5kfQGqJxl&8Gh0kJe`!EYyM#z^@^n|>XFEE zpD_n1(CITKA=7f1%SQXzU^^UeRXf!do%VJT{8cV5pPp=s$mlW8b6>gOd4i^qX)kCz zi@BeCc4QR=00ifix5cbls6FhgFw2u0ZrLNiPZzWpUAW?nrhu6jjCa4Dj80jfv=YEn zkXC)WU*b{6V6h7r`u`wUzk-s(u>lCnM+miGIm#mVM zpoCdwv}H3(Z551!Q4Mh8md>F)T7FU(8SX<)p`>DsF51*ZBKISg88aCg#i2r`3RHr)Puxvh;X#i zIo3QWV!P?xO*0zdLY&7om5BS*5@6Fqt4;K9ih4wC_@#l?1BZXPTxH;Jqg8c?@3|RS zcmN~8V}E|yD~fn_0&quia)r83Nv1v3gzef{0M|dx)xBiW3kyy@`+zG?$`FWGU0A*K zNFHz9&+7|iQAo?n2x-XUNq=@B{1aTx6*F39F6gxxysM$3p3FQA1D?fh3+fs09|Okw zRG>rQfn;lIE@fv0ko`qF${S~6eV+80Qd-Va^8M4GScAPi{RpWX8 z00Yw(8+UL0f5!$mS?mA@&sUboR+go!n=fa%M=;tt)jgDWdkh8sgue#!=}GjL`imuY zd{SztuK79CD}WW7&F ziAJhE+crdW-8#&61gjBR^DlGZi_B?}XB~I)=jt@VKqoGiU6-6+QJjBnB&(B6Nrvt3 z(z^*se~yx`@lpXLXU?5go9f+A<&gkjw!LUc!@Co7C(?1}S^7iHY!YbxPj)p}NJ5Ko z4rfjq5&rk1>+QuAdd!#lmx_*Oht;VUiwrQ=XX>ZH$OxhNwWH(utEtqqMH{Mh zow@Sgg#h z2dVdz1w|gT^!=aXqCpg}aYULRL!>cS7|`0(UW4vK6jB1qjO?$2ko1|P(dyt!ng>CDmxcz-iAx0 z-{k$?Wjplj#mO=#zUp`!4n>?v3om{S1v6GYka*i3J(!vu)D4#BSuIL7JTo{jq#C_5c#o~>7t~*+6TLklpBBw^nW#$D{ zt@wEXujNJO>=xDPl|@+!0w-8i9i?KEO+ou;Mv^=)LMYYqY&Dn+;Vl#w#cdk34}5DT zar{-_PsSSL>haKqVa2i%edxYhtd7YlCl2;`P%1Pq8oo#h!dG0I@!*ARwzo@* zJqf zk$QUTbjbp2*n&+#RXeKa$v5$gIrjN+9y2!kp;xLBtxw~-STxGoCwTaUJlijZ*o9&o zdUv+bjA;;H$j?h-KLO>hy9p5$KC?NpkWu8C%ey=qfjaltx^@uh(k#l2`cc?U*|94> ze*?_i`>#KrTt~d|MZ+IbJAf@CMs?b#KA>OK4dAX7rC(T*IX>y%(HjE9*&DqYWD@e{%4Eo%Xz`L@c>y>1zkeN}&IP3nm^f;;}~1HB0Z_ zVUr*0-0=d))1PxW64%dWETvTioWuMEUBN>nU$ku6l}gft{@7)_(N1QiOOFCk_a72O z#ypQekFG}fb5ha~2;p*Lwj;WJPa4xPEcRKMW#Vv9Nnw`vqtAlu|7yd1(G}#$8LWH` zT!1PCYuNxf(O&%dM|*NRXcpO;4A2m)E4~gu$xo_xzpr#2MdLJ>Ywyiw5u^e`ZA`aB zw`5`uns02bXe-fj9O(Icm*7dFG1m zxvdvD^^TmK?YiaOtQd%a8Jp}hnw4M!PG~TewJ&YEA7_+)X)`RAr^j6zOtEmMKg@iX zXXm?rr#@bGiwc5aNEAoaKm_C~nymfN5s)8`EzZg@&WHSXIsGR+sO7$jyAwo$rmDz| z;x=lJW%JjcyG~eX$GQGVIgcB>roeX6ixKMiH(I&{AJsu&Y9X@pQJEUE8QgKZX$bBo zphy^ETr84^Ig=DV^?p8S@;=r!C9cj+%r-U%BHm;O()t8z5o0hWu7$Zb8g~>A3Um{} z@|9W(Z-JFO5RZ9jqrvrN?l76(XLz0!{><}z%hdO7&@vbiZ#!(mW2iu*mOmTA~GLOlW|%v?w+`hW_%677lYD$hp9z6=4>NIh5k( zGowar*wx(Z^O7q!ihIsh+HplWHlsnslcu>x#zF3-u1{qoZ)}vdZBzkn5A#M>XviMU z?E+V*=B#&(5tmg-91P2#%pgI*0I_ZJ9-GHwAj}6ryUr(sV`sT_VfjVppb>mm^Fw`M zq==g1{81YLxTbrNj@gGRWLqotL#lg+)s;uPE2($cV!=x!y>~Be45%3+-Co!@87lN# zDtt!Cf_+v4a2eh4exfrF${i-epGAIM0j1C_F{`hufVwy8WtYyyFV2n!kzz`Gk3>ZD z=@~ME33%hOSIyR~PrBoX4Y4;BHBSGcdV{(qOyZxi$GG`O2aK-F&CI5Eig5wZDxs_ zNxN!OQ&UE6E;C*sE<=*dI{}b)9exM+zSDKxs|0}~R2=|wHI$HC2C2b;l-nExkWh<= ztOD|$A+-YGr_);x^z-zX?O9;%U1fqi@nFC`ECebL*#2gZvprBlBROBQGSrnd7Xiv{ zrgaran)@loH4OGzuN&&9dl$Avw}HyYLlypwFeZKr>*Y9*+p^iD)V{E4D;pNOk6I3G z^IpsjjG)XSmdjjVhIig1dV3&yo0(%GMCyRxRS3ZkdXkK z@`z&9jR=k()80G)rc>`SQaw#@?<%ihxH~a+X6A*~`N?`BE6|V8_ceqNalJ;d>wHr5 zRImhU2zU(8b;==0?}|bHr>fQ0V1?@R;71^JzU}An{qVZFx(1=XEg`FotsRY?JOrx9 z&J74GhtdxS>wBeB=3H;$84DE68}9>%qyTS=%S!S}&vv6dM|`3uK(HJy8x}LPigzA9 zu$Qi$s0AR@*0hJMB6((cdOhuGjv$k4%b+T;Ljhw-ZFs&N>!OZ~iWbSLqTOXEG%@Uk z=Al`^1fvO1?7&$O4M!M(jECFN&)lmlqDJJns_*9f1!`9;J71zDJYKYOl+2XE0j{2=LHt*NDptSXu~FKQ6&WiC6SG8)yxDO=HPq z*a$#0Jf+)p=O(N=%l4v|tBSQ8ty>tWaVo7^Rgykry;#5YEKCm_U5|MaH-!~W+dbqC z(|InAc`oOaG%#S6j0gNgNPwjPX|IWTqqVg)#Q#kj1HG{5g>c(HjmicGBJ6gnt()b3WPA|yJ`#zbG@>vGOplU% z>;|viLdSXTreYo~-&=_?pZDJ8*XKo!s%my2&f@AMh{C+eFW(v60O z_$F^Hx`Ts5$Ncq%OUychbM zJ|*F$;W!+tEw^cPj_=4Y5D@D$jeXD1EBCE~W1Ir>&f3q{%w71YuswchU$K+p9HX^z zLBY|C##p)Q1o%@4wD2M;@!LDVNtClhOmd^Dbitv)@%qd;1h#=+d8;QhGpAO2CQf@W z%*W>BJFfXHAsUM3m?(?Kp%tX{1QVg-$)^k(1s7|JRG+PICmbE(cxQ4ePHVsTOzvLR zYx1y&2#%J#&lgl>jDbq=_EfM7x=#GtyP$wwDeJ-GV4QCq5PxU!yoLjH&gR-0cXIM( z#?aQwpnowq9X`?P-?9*V^vKHVJor{KTwBFx!qV%i{i(zn&Nx%R+BssT5U}Ez2g|<8 zThpbxMofNkgcff#Hva!E-o6Cvb+~B{$BG?azoww0}yWyv7z zIh3d|;sAJN4+bnhvf$p_J%UiV$Z@P6TBW16F!uaOk~c-NqE{|4T1L^JeH zvhLle|3lxxqA`!+9;forQ2V?%cmKpdND%~gS1U2WXwJ5s@Y)aFOL=C80RTfsfgd{OD>DFg0uJaThY5fG!O(8hpM09ZFqiru zRckjfF_Ev9HUaXISIu|+?F7rk_1v|{pONq`EG(pqX#<+pJ`791?%>Hfls#-YG*hmG z4VJtm^fcE;c#Z(1C`1;lkw^dW+xzm*Rgjn-g+HOEXOQ>0`V=1*chH$6Ts^X-G4X16 zujBqdP3S>n*tgHPrJk3cRHNZ=m4T5F^KlJLu2)ZACq900QVE7Z;8*oP ze3fGx&%a?y#cWZA$N7+gZ-+TE^rZ|LeytY79V7HB$tpc?DoDx$L@aM#gjs+cuwyvT~H) z<4Z|HrR=jcYi&lwU-xb8?8|-V2AM%{=pIm}{N{a=j`r3yddP3oQvRN_x!iuan{!G# zIXgzehpP#~XDa((&l z|Ng^-cHW$#_lf~GS_YIWD92dS3j+F80CMxj^(v4 zcdzTj>(@d2`%mEOt*iA=NGQR@m}7qY3 zUreUH#(KR!?BN8V&flVxhWSkMa+8_(7oPu5_6G_O+Xixtk_IhgY+7vcqwYI0h)HGCY4VLvG2fK>93a+DRWZ_(bu5%Eg-^!IA+4$ zn^hp6UNgv;UjM~}O5X^I0~FfBdH;@U19h(L*dWn$Mq*;Z!Tpv{zTN|vE>}UMwLfKyUfoY>ITe>+(nUg^O9hsKK(-G=Zc7WP41I zi!Of|OCD#V9-dzFi@TJP?>fb7`!;DLbK|A=V;4fSXQglvzm4Z|2N_~N^K-&)qIcB&zTAXrZY^R<~0rnSo3%724)(R z?5~5ygtHg=AjFPM1Rd((?>nGC>RBRuuxFj;vN0e%0z@n=vcY-K%Y69};@tUtst~akDB|LuQeWks_ke}$co4P@ zj@}YPQ!xCK4c|aZlhU(!-JA!Aay2wGy5F68q%^2F78w&w^b_Z1so3@Z+AS_V*^8KI zLw^572L@J)pD`3sp#sagfOd=duCt>KKxO!C+Ke+(K{0CnkhaK)+-y2|81r-%l&wp=J4tp`ctIl{bC);;34d!ZyHHUGd3X`u4xZG0#1b z_S`IAzixi(2zRR=0lW#dPsUo{&PQi%YIcBJThpw3@I_bPqMs;RpW0b>LVH!%9|Gcp06ez$;_3RPk z)^>I$b9I5NvNXstKz==y^qT+77sBisim=PkOci9k5eE)v$h5_?F_a!%Bz}q+L zsm$Psp!hpb)!2R$GHPR}(vAtd=C=U3w6~ZBfE;8I&I;8KLV>7XO>~ zjnwi=I(VA$zzdgX(1%)EPW!09kqzFuv5L9KCBmUUoTt7`+_P0){=-oXVwb-D z`~|$7Z398;X^VmBVSsz^>G~!rI$U%D5emiFsM|6@#-0NiJI^1#6A|O@)pBIvA{Nq$ zG7d3cM>R$cUnQpm0b(ieSevNg{FH*9u&}}s3bxp|DhexjnY^R_xs8CG@oy)eSv}BB ziUzDqn56p$M*V{Wr{bjSUmS9*7jNcoL%#*UxPY0V|Bji}-m7JNfJ$<7j{KCyPS}vNjp~UjFE*V5oEczAra-PRx zXDF9>TD84L$O9(dE(9`aYxC(h+NA%n%J1zct!)h5Oy{HpRq10aW%YALIvJ^w%Sv)n z4h1YLZ+_W9P*ut`&G&!socvFA@W!i__`wenLR}Gj{3w&593B(BOF%-+V`g1PGGLMS zY50s6w!$ZybeY*92_Hpa9kw<%=hLpO&G^zbU1n*8HCc8TWPUjpu6`e6v!S><$JjJ9 zh8bR$pDs16c`%8(9lDje|7~wZn!3ntNW(g1gs*z!t%HNJJ&#P+Bujvrdkum&#u~H= zpaO1J^>#6ItdsQ65z+n6N0eW9c_{fwTt+RdMYqnqN*!)7cI$DzQ*SU9*zYzOM@($g zDwLU-jAOhp6`ITUw_3vb!s^IJi#rL2OILQh?HdC>imiLH2x~jMIYbMa?Mke{1!-K$ z9}K8|{IKPkz~FLq`fmIzRgya&bS@OYvA1;Ns+dZRDh3+au4Fvh8w| zN?09~zhT1s=QzC^OXCD#;;0}TwItCpw<6avh+td+2%D&97=B}x%ROAMnziE^Y3dTi zX-{mBWv?}|`npQLy}b%>*gR8HQ!c5R7Ed;5#JP(ax7?{Cahz8V;b1B?OZ z-C)6QPF>h(OY54FqVglScw!!Pp5>l6oca?1gMr42p@5`<4#bFoQ)zT1)+*l#Q%Si7J#&pE z2W!5Hw>ixcms`+CT<91kC!ft_qlTASXO0w~v9*cUir@@~s;)=*7#JA!$S@=u5wb1b zG7^S6RIMKX`R)n#vw_s3Ko`*Q@6OdZ%qbzvzVt=A1qGn5h{u477|69w#-jg+SB`-DvZ{Y=;2jIU_iOt9!9{cIq+!f_mo6Cn#!*8 zCXN`xXVUsul4gv@^D7-&G4?;SW_0JyRQko2+64Kx!*Q*a*}!18=6q`V=%if~K%~B& z_Q%$J6zCFN$3w5J9 zus_NJapSn`wh+nd>ZXo?wQM=Cp}u_n1Sv9X^b7p0qa&f6>l?F?d`(jThVqL0_U&;g z*mF))t!dnfV`18+o*XYX?*V`VqvgI-Yp7AL@#C=+Zq>)*kF`hRT@$c!rTOPxx*QK5 z8hzNDxy+lk;xbpU9g%lgUF;=;Sz1XRhJ#gf;WBE6Uul;ePWX&H^82QY>fEMtVFr7S z%a31bT)UM$DRKcJ4iq5Q9A0!G2Cm$PS?boU9X(8=)vHZHh)GBQ(`#~u_0j#(uHHgk zYeWTK{8*_eoruH&pUDc9=K*NAytK7Pe(JmzOyDQHdSI}rYyUuc!sEb^CH0;mk&%5pW>R+qm zYnYym6!L8*TV`0Rq`E8}X(rZAUlf{jvGU5Y=y*C!xp$E{29pHH513I;~f-^xWAsKW2X!Ymo(%Xd2iPG&3 z{bQ@s9^TYH1YAgvG#L`p3qqKjD3pGx0SO`ttF!$Ne9RapPBji;dHYP3_gAZ}Yd5X7 zpKk$a^CRIyW(JSe1ijLS!zeBv&)ct=FQcoq*95#=64&Mhuim7not3HUJ1DIWAG-~0 zCh|;JRGrcoTF zJw#MJSi_KL@@g-(Wtw@$)Fh)j!Qp9!*X!=L>iYUo@tm9aJ%H7=sF<`!V>SPR&vLkc zf^%JpzNb>VjDomW*Pn7^nRTYKo|S7LBeQhqU`)|ISK85ZgH47jql@jM$Hgd3-XC`M z)|9(5?fFg-n@;uT+WXmx9!&d0&R{~8XkJ*$Yk!{z!p}hiu_Tvd4=s|AbFfck5Gu4M zEgXCT6tl|&zzzskq1-XUD9|ew{l))e#0h{;;a4vR&^$d1BD(=K zK5f)F1jYLlh#geL=8ZntwF9vG(HEc04j@xo=m{;YfDA2YIhd<=Cs8w|=m4MTo?5k4 z!^ExyVERuIo$oY|!-!YW&|`jg0)RFRrCQ<>d*7l{L05c_D+A>^l>)jgdy?;l%+G51 z@Sx;=;@hw&dv1@$tKm1NmU#+IXKv~q@%CMsa)7k|zgVjHh}?0x5nIPzynVGNk*2*@ zZWEf_h^y`V?D(V61=*fx)vMk-opO596u-J-?d46kZQR7I&wdnqqF)zRif39RtjdRC zmS^7|0mUNy>dL~N{E8V%Plq1r?P`;A?vtvSN36Ws{Co3Vp86~LVJ<(e&N7mF{6tSE z=4chjWM|yFES(Z2wkq4?+L4kROp_5+N#@*$Xa4@P$6hr(lOD&Hh}RQ+eDSRC(a9S7 z`r4j#Cr|m=>L=Ja4#^uuP4MM*?E7zkSPt8uQUA*0zsh0q*XC>iBViYFD*N6ZUCEV} z6|TOgKcz(CG=x@_XAJ<>t53R1I)>6%mw=UmG1gEcJJ=_{S(tk7g zaG`qMke(xK|FKuEsrfb~>-rdA}Kug<9ebE|1n(c9ZbQj-H*gpe%H0Krb84xwko2R+m z{^tEHtUmz*LsUIMUX zmO)FPt2O5F?m`>-(uYSyG+gGxEF_QKW__WIx3fCkTN*4iRdmVkN#s)oA710BT}E&+ zXN;U%s24~WV6%>J6^^pfqAW#+pSke@sd$+LCz|T}+{tJUzwstF*m6sHcKT+h;_o~7qWKbo-+iu~00qh>8Tb0Rt%g9^^|u0ix1yFaD&>W8O3sSF2L$_Rf1 zmz|kcF-#do?2+bc)K350+^Mu%+B=Zz>s7&IpQ0Ph2@)FolcU^vKm5_)#)h10U})%P zQKzj8uVV_r>b<0*gpcupZb@A;Q|H+CGbAY74Wd8XGRpa)-}LCfs7)$4M}TBujz4!D z-M^7cQ0NIZceXXN+eYjSl);9oXN_{0^xbmpO3qW8N2^>buU85@%B~u4ixkToGsvX0u$foF9pTL}#la5*X-AyRk0FGln@3bMr2?ASt(95h8ixHwsoN$N2d|n@vT(R0-Rx zw$RNTuo_&sKES%&ZehhL6p8YP2@NKYw!}HtL|T0kI-KzAnlT%$P})QLxS9Xil74Io ziqkeZJ0t7$DEC9&9E7&QFYS!V8TC)VrB*@=LVa=FdrnUE*e%`%CmzrGv(E!0aK$!$ z{1hQoS$(12YF29$^+=dc#555^!S%!=8Jy?H>730$)*)ZIy3XW{w(clfUDP3sm_q7# z?=dV-mSnKHH7FNXV$3$*B4KDo!%b{So=z1X`AtGxW--pE=!Knq{Zr?8=Olg& zvNOqAEx#2j7EbJr=hBT#muV*Hc- zIPXu1$JWHCQl5NTT&K-VQSyI@0>kL?23FJi6irRcQrNcP+YtGjKnl(%{cChfmWmnh z`2JRI(=b7Wzj*%xe#sMFtMyAjY@7`$h9!t+sQsFA_{g(e%$2_q6O&<2V^CI|qIW=5 z>&cbAW>_F+&?TKp>=LTiau!rqtN+s^%))1EYQ0lLN#T&!AGY=Lh*sA7KADNg4rfJy z5^5IGa2(dL&~qcQIo>NN)rhRv=xDKVrg03%i%&=Q7RWuJ%dzJL7CTNV`-t-i=l=FK zE6s7dx~G1GP)H4>FpF(>Z^!P4ysS~N%_DjwF;F+)m{k0murp7@F;A#0P*>)^HXQu& z@sgG2&*~Twv7exWP(~G4%$85q6S#km*V1vTVppcb-lHA#Sb6jOa6`LbGz0u?=7{JY zd>>cha?~tns|w-k)flUHA7trb`^Z@0lc?cg|9S|d-wq^Ld{Luin`^szq0Nz5Z}E+8 zy)Z`a&UUassa%RGg||1QigK%2t-JE`s`r=2CD^conG#oq3sfYbb8@ar)=Tm)G)lpF zOZwSg?z`2I>d0vgTs+Td=w;gl8jaWEn^d}e!%-~Tv}HB;7QzP#FNu=xn(3X)!b2hp zcmzCGIXYL)vdO2)9g#d)^_}5R^?a@1UbYVAI_SpnAk;6-MyIh)m+pf<)kP$vbXW|f zHGFTg$Y^OU>{Y|x+AVokn#|!^{#z{nkUOu-;40$ITsa@=<}WG-xoA{o&eL1MZ0J`L zjkK3esuSpzG|JSH$ZS@Waj>iV&DZ52At9K1=@=eK@AvH|0!wc2HUiByZvULK`qDYI z`+RDZ7oH>R#M-!2G@-A(UimKH#+*Q}{91^0;o{2cljDAP;NSOUgjq9Xgj12)jxH@3 zN=*$=bttP76YvtcS{}YdG~-`3cf}Xlc65=rL$Mho-UnSevmW4&!2Vxf4fw%7c@w5u zG)qhnqwo>e`G*7Mo3Lx-k}}iifS~Iwj2-)1gxIo5S)wJ$JLmEkBMlUDn-HIuUm;3O z)=1Ry^(EoXqS2?vOX66F^ zn8xiB-OhI*a!tx`c@qN(ll5=9ebj;ee)piFBM}02TG@xYtA@Fp1JGdO@>JV$XjOb* zU|@=jjuu!qIMsalm>^F-aqz7B2H;lgHYP+XWiPC@`EjKzEno$!(Akb{)drGp)67jP7DFrWYhJc{pRz8Fh z?&aJf90Tq?Hj%fWr5&DZCrMK`8(dSHr&&dQ<~)q%6w?&zJMuY+cg$PJpen$cw8Mj9 zzz7xoLp$_`G80D<@yd~XPyqay7JaErbxW;UN}^RO5l!w=C8R`|4c!t`!16Q9OB-~{ zD106nhaHBx{v5j4QpD)wWU;U$PZA3FjHAf_b0tYUb~*iUn8g%DQO)9|lFCKr(um>XZD3rZqr5^Pbe~zP@C`?3IU=<>K8M`Xfr zP_v9#iC^HJ&@%4)z*Tzii^xE8i9w;EM>vpa?OdQEgMKR=x+%~HmqRr>MXxma&zr@? zyu!vt7dOwdR1GdzrR;^7ix4)p0O&vT7aQLqYE6zRrW)mWipk7d(M5c%bgPmDSC998 zW@3W>tWEo8CIsH&I*^h?+<`?4UUR0$BY_o&fmG=ok*&|aJD90`H#(by_mW11R*>QH zB)`XQ^V)j))2;CXkqTr&vT}Ie&3^x!^V`FW`-0I&_}?}o?<|ygFU9zvq$;KPoaRZ$ zuUO~f&bvJ$SUYFNxe%^cS8Rrd?zIg;qkbAO8o}gd>RqQTtl6O}K<|?=>#QU`&40SCoWBk#^!?YQz`_{svt#yl)>Gn}eIp5@{W}E$0=wzLcV}8Gfk?EL2@G_{gow{5|Lfz9A~K;K&7tkrkDnot!&Gp|_HyOX z{M1SuX)>9uV#G>aOU<5P_ooaTKKIZtDQuiEP7l@J?$JgSdkmVWI4FY+0+I)+kY;xS zbvdRj#^q7$9(wZD=@17~lMK2C9PEe{n(4^J$W*a$8w9AR7+7tzf|?!Ogx==m7j3ME zgAZO@Y-}FsVW;1#4_MdG>j%)t+*t&R4*0wt$pc9YHjZwS1=3S_;^_Oc6-0Dul|M}S zJol%lkPN7lY*2M4U<|)u5%Bdg=8o}5df%Ho+{Vo8phk4r&gxr>IY-@B$0RCYZzY_ZoMBb z=vwX5xLso2s#X+$);&OzHJrLWDOt#GHkMv)!N!{zu#_W z$o;uC!?=Y3j+@tddpibF>prgyWIuGv-w$yFxxa`_W_~*46C(0V6X9?Y?z|kdc@*ui zSty9j%C0!a9Kh?TGi-wvnzuK8f1x9@Y*I||7mGe$Feq~HULf6hy)j$n@5oCnQT{h+ zrSGIFmBp?DBb>yBVY9kdJvtKWt?neTR?rdaPZ(~(FOQ}1+#TwkK6$UjDv38-ceaoX#0v$*D?WOQNz z!rQ_8HraPo%KB;ZCPfr{6;J8yx)))I(`Gi|7-6V>b%stxuV0eSnY`|3;bD7zuX=-E zA%)T8?gJ}n11rm4=ZKWQ+siGnuEn|11H13Sa+e+-`n&WFKy(qiB6Tx3B??}XBet3YOeP(VY?N@K{DH}B635&gF z%C#lSH0mZ=fz3-+41tM% zt1bFfv3Im;ej}2x2IbTOGx4^-G&UT}s{Pi$JmR(^ABqof8Ed-|{Pr*V)Mn+AONyXp zQPco4ox50UTy%DtswEozLJa;{V=hEh5SudBppB|=KoNb=B_ad9s zcIMFBPk9sI?BItfE|h*qZ=2K6H!^KM?#Od~J$Bsa8A#--UwI@vua=$9+!{@KLNNoHMLp-)*3>UH+hk>LlyoB4D&b^Ip`-9QNi(s3 zD>YbayCQ=r&Pe#|>1HR>>|myhxj2KzU84Fah2=B+zzm1;&6zO{kyUJlmQ8E@>6}=l zFqdwdn)(dhX0aH{d658XnjPMe#2u1G}qFExUV9$fP^kXjvMM&ey(J{{}oKfzI6 zWxpli`ZnejGU0bBSv+*Xjz1vjaD3JKiSCZ%_lwu#uSle)6hB)vFiG>8z`T3;4^W^5 zAzl;7(dxm+*pzksyP3FZbbG?2I`LbZ}+8%c7kjju-o?GTU=1W%F@)!EP zWddF7Ry=lmxiFeiFFHOs0WjfZHZGU-&>)t?)`B=Yelo|bP2Zsyoy>>3fl34o_nRNk zZ+1`Lni>9?ud8_Z>&0Tg_Ai10y~=9YH17BDnya(-6cI$!9mpS%{h-Zc(wJ2hg-R6i zeaV=Mgi-cL=Dy`;;B$L~Z+Fy4(VQmBLx1?C`BzUg5Ctp>fxicxVmzxVewF7@99t~8 zQ>?Z3whX8IvnH^gtR#GwTwZ3v`pil2)T?DR&V^MFfybqGc%9m({;_c9x$HANKy7vz zoZcm)GMLWxmwQI2{7~x4AC&)SHo!f6hron&Io!Z!w(5QfUk@+y5O{O3Ov z$>*9bHQJp6cGfJkh#IOtN4xn;j8Kcm%^8)=EtnNPkVyVTiRAUX9Rp_*^0DEacSh1{ zB!?#Y{`%t?zd*o*8>k?rXtt0ekA9J0C!`^qY&KbdD%QC;LoxIip;*$`gi6KUlskp4>rrn}>Tpri94`##K)kflUqS2Sp(lUPu;lfB0aFX9S2U z90MMy0@T6?1Pp2=F*F{CPJo?AC9L{+abZTz?yjk!!U9w9Jn?TVDg~jxf-PTf_d+4d z)HHw@QAN1AI`~HUS zAj5@D5x4qknRnyo8EaZqN%WcGA=DH9%55!qU<~Z5qPwv6451f!{)6(fl>FZ_Ml+)j z>W>Y=t_+`1Lze%bhCCrY6aaaI7)!mD%H$q6hB9;>_)Xu*#&k;l#@jK}31Q>X34jiH zm})z=&SyJgF&iIkIu+<(Z77dYz%!>{Cq)dLJg8{TOGqWa`DZSPPk!Dkq!;7z3L`pf z@@-Te^Ijr+n06mX8jqYj5JT=%;TuP|AS}A3+>rE)psuOjKu0s3N{;7zc?}6DwM?1V zuT5zC4{)*J4AM-*-(j8+710@%%0|Z)6LM?qrAV0AA&ORiL9;J(MakM(8P5U}7jvKwL+wsUI9PYeXz(8AAY506NiZ5D8e~c)4=|_7m1eMS z*mNlOZ|+|8PY*W*NPJjg-+EPr9zSu35g%W$~S6q@g-&i7P zXks+XqL!}@g}-?ZD@;|Q{Y2R!I~9M?EMl3)z#79n%y_-hynC@y0JX1eYJuK?)m9!D zx)r`c!5XD#-57=?hq$=;9)Zx?_YD&o^|V$2H4=E#kpL8Tl&F-JkU-1p;m4FNp`xxX z+ujon%2>Jsn}Fm89cKo1r$aTQ31%hAqhf~R#fGRX%iM;PJU{{j**vmc)+Yp5!v`Lprz*p~%?Cfc zP2TAH>MkD+#5&z~Q!JF8Zi|%FFKqp|_9be+?*t%XqIA1<#(?HBKyc4zjbVz<0ld5yRExI^ zPD9E6P)$Uct!6F*EL_%a33o&qUmOjj7phB-ter_w(o+uL7mCU1X zrtsz#mm0J78PNwHLrlknm1JJsMW`zI+~%pRQ?}-`YL;C1WI!FUkkI5drn0tEEGFU@ zXbwyoxJ$gVT;%+GYeDEbD+L~Ig`S0_s7G-H(+t-J*`K*_2KmllTSG=yOQjZ1h1woz zp;T_^^9^%fE*=<7(ACae0s56EiOn!rZII)&w3}qL{v%^g@L!&*^E}(?QLTM$_8MyM zm(K7;J2@=QvBlvu%IHn%KQtD%^Q)nR*cLdY}Vc(r7BCeiDxqPuG`V{`A<{=gd{nSIQb#kxc36b>z034wnX8(q^;Wt%>UpV+`EtwAd_%x3&nKTaZ07R^U-&rppk(fqm5P!_44>&+n zHBqmtG7<8`hWg=dylF550}WS9zsZl%JKKJ$#Pc)aaJs6TsZGd7RMBqCn0w*-MQ8ua zcVa*7^knq!mJ6U@!{qZdB7``KmJ4DmW29|l4lj4d{rGqNzxtY&s+7T-7i4E=E2P-o zU!T@`J-E3Ndb3$A7}Ys8X6Ti|V!SOrk{bEH@^G3b%k6&;VUofkstpJ6m9 zq5Oi845RL8WkdOh>-P}aiB%7j{i~Ecz?QFU>l&3ETskFWXRte#tw4uiT_i1&!HqK_ z#SZ?3PY@jRUM&4lJRAk1m%=E)&A8GHHp>O10q-|!jv_1y*oQ$Jn%vev0^Jlp#6z?V zW)E`|8zTWME2fa^TG7gB^l!=;z?n*_Tna4ETQzU08_Y#f;P$@*tREu`?H(AVC2>>G zyA>}i*P47t%U7n#G*!`BoDb*pv&|!%;En9_g50Z|PjqfTO$@u-13u9yij73B+^tF7 z*Aiu>d>3%8qUuQ4^t4rKb>^6DX9G0xG^+FqZSD;LIi=voDGPn|c7K()O%N0Ny4f(? zsyy@#xHKi08RLTt4Gr^e8-c_5a=~JureOWW6ZkFk2Vf2$-=y%kvrQK(iq25gets_! zKDNM$IWqJ27JPL%fKM7ngJA!GPx3V;((^jm+HT-t5WJD0vD|F@Lc^J1>NDl{Tq}Ul zq`_u2`!6p7`Py4_E~_{`eyfNntQTKE6uP{;k{ zXeknRMOTw+B>1Pc?@{wb$0Ty8*Es!6$=pJD(cHQWjxH%r6W2{H1sTq?B0p~!dWk>I zd^scF@fN1iTQ0{3PP}vGl3kQPbYGtNQF3#4I1NRHzt4gjQI;^#VAMAeNbI6%na?2O zc|*h!fQD`@5o|u)nsj@9c!=8gT!6S86HVG~GMtWNqUv%?r14Dq+NWks17sZU>-S`D z_fv^jPZ~_x=sVtgj8b~FjZEHpnmBjFC~^ya?BC$p=0uxWB>n*hH+Ss-q2?%{wh^v! zSlH~GlDDM*<@JW<6hb%$h_8-cTvR5o0vL;TXlT3TbK=fra(#Gbo=uzBeCNAWkP1^g5wA z>_BOq+2DE(F`qtriBLlWqv;{q{>dvakO=GR+uf`99(o!8(PjgP_Ru^=cPLfr9ane4 zBIQpbN1i5p#{e<Cl%&&!ohig)8M;9a)}of|~am@Ye$Fvl?-9BcQ=(@GD=4ZMC{o)v@M64XjUc+j>QK7-G&$lcXRQr?<&ixa9?g*n%k z?!A?^mTpN|lj*{I)$C!q{oiXpgAfoC7)Rjiyf`8aQg(-4oY$Ti-oBzLIUV#6L_m|`Q52rDSafpE|oio~6U{+WMvjX`XbLY%* z^QN_nEUIM`As57ESTjd^W!`B_gC83oeAXlO>+0dkhXvejKR=N`+SLUCzc^LxYUtJ| ztcX2k%MjcW$G1T~oLy=_LMwGuPEZv2?&Fg%rR(9!-^_w{Aps{hNSlSPQdeZ9@ie{N zIR^AiX5!Aa-q!`RBKU@5qZ80oMq(b7>{cqg894hA)mQ9PRvRz@kTo2^XQ)!?e=7mS z1KH=&Afha`DDGWUIj}|>pt()dHeq=3$h2GDipTGnbVsazvESGj1%7pwOe$ z?G{NYnKTxe&dFSw;j^h-ttOvB5T2?X6`8P+U~X4B@Y!fy;XH>}&;4(zg7#pN<^M*P znw|E)sLN=ZjM1a_Fn7wRmUg%GrfD#7Yyo;SBRd$cOe_DKTpno(bTX&#s#gcoXDrBa zTJPjlwl-022NHo2p8*98^-H#B67iAtW~Wu~<;%w|8F6SB&3E3A<{o5drbh#1eh{#u z5CZP7sZ!2U3#3&W=h0rX0F2-5YB}BtG28YIJVW)(F0o3h`RWg4;G_*{_~X9?CSnh) z;^Cz4-(QG}v5Tn?uP(3X~I+ZT*=$wkJW}a%JV}8?xA{jv*q5VduO#*i?ceO;)g~U*2+?rTdtdP;xyh{ z@~x&MOjMc@1QLs=omitij*xGyq!^f9tF3~xLQ9`f8~&nO71{l4#_}>D=2>eZWVQbE zA&+;oKZ{GVTOFgn^DC$_w{!>3i!q{rJO1?#kyXST1J>BT*3+yiK>?C6!N%Vg$2AyrGt;3E>+FhadWN{8-4BhhWR{O zuJ@Kp83(Da{#N+feZUfPKqS-aF+FkV?}OV|TY^Gw7OU*q53+N72({ey9$|Ws^---| z6_6yb)MOOhb5&cYR7geDdHW+`h^1QCVq}aqMf#*HI%mB06J4mj^=@1@Nv9M*K6 z*rD+nqYf|glIPlc`Ra;whS*-$Fe0ST<5`!)X_L{lYPr|Q)w@plR3V#OcC;y1x6dBqOY#}_YN({%7i3CzoQnF4klE6ci zWfBh~7K&}ulTB}92x&gy`$`>nYhLeU-3}8?Vpt9fRKmKV=QTI1xJi

xgA7bQu(j4Rw3T+ZcWuQzbz-j*vB z$FegN=11)4JPb)yK7cu?*lhCSMHaY5#XlohMhq9k;!B4SeyT+Yonz7Id#U*tda33Gi*}R7l;;8AgqRi)$aUweq(R7UQ;{Pv4bP4*0y*Dc9n3+>I@x znDbThxIH&(*)&Z;lX|oHeR#SsV26lI!}#222CPJCln8>3PGG>IE=o8;@TEI@Yiw6* zG2nAvTA@0SY4xw1@TCZG8+=ie5&2wm6!A(-g1q&d3n&i%NEY1DUVB^2hV$%R9Umm2Tt_i_DUWZayW(ezEj0}B&NkK*G+(@xK0^`B|3v%J2FRHaa(=! z=rl5F0$Q5ZnX?K$IuaTH5a)5PZUWgqH3*9J))XGhcDlt$UnM0&y(x7nCQ1h zg{|bG!3ZuG)-uHw(cIq_@hw~K4@8&a^j%Q$>FDUeVL4~*kM>%Fi7ZtrCSGVIv~^q3 zY9o|J?|zV%SOYvyw!Fa00nTJwp#d1RNpe31B8L8b}J;eB3E>dc^CMX zEDbv*$FF!`I`(2u|9#U}Ngg8ZR|O#>Fq?>3^NPJL`H{A|8}C$(E$}zuGHRZSRa(Qj zo+o>^opX4Pe2Pdrue9bD?J^DF{l&u6;op9uQq^ippI?8pIuk==I@Bqpn8W?K$kjX7 zzl+MCh9ZT^)^f?(Ywx&!4bN&}-vP^g%fu{-SbyDq;{+5G7qju&Ms#T6JTEvM&-=ex zGyGxGaD)~{^8Z^c2rG_i>6GIk_ttoen`D9 z>P-}tEri-`+bJ;BeDPjM*D@qS(o%PmLIs(BSOv{R_;%fZxx7Pe?eE`?_h1z%Uh#`U z$X|TX)C@(YD{L@@?$vhl!eAX8rVShr&cJ9#|EtR7Nk|Zyk3M zx%3s-2dUyk!Zo5_F337Bqxc@PfO^1U*1!3^5@9+{^e(>vEIC+2axM;LEf6!P>EVBh ze9)v+G;nuH76?ET-x>q7LowxqQ-EGrWCfV|u|i|9LV}@}Sc*2-Y*w_m9D#CByi1wz z^GI4uNv(t@222aMs4Rw7I_-2yO&f2o`dy7AK{!FzWWETyh^RhzWn~C@zxH;GvxySJ z64J<^k(XtP*Xn9~H32s-FDvvhb~d7z^Zgttop6@FITMC1L5fw5mA`!%vq*It7zVpv ziF+TY3}SymX+15K&kbz$4~THDHJ_$N;@fyBHZK7?AGbAG2y1@`8dM$IQt~VJJ~v;h z7N;Y!swtH@Q0CP24POUc4{%sGD09s9USj8I#YTgpDq=j@q@Y!%3-T$PG4ylZsE!Y zupMZeKT<5P>UgwMD7O}VclwT>0;zzjylc zM;tf!UuCnq8kb7s5b>M=)NM?n^d2Oe-^jvXoYwQr)K)fOxzy~)QPJ8%-E?-JWoyld z+W~Y{|HJphf;REpijF(+R1dgFn{Ye{a2P~`n}b=Pa(KvX;wq(Fsm%PFEo{Ab2O+`x zUY3f<`Xlejk&a7qf5Z5&5Ly0Chd_k-jGxS0d7EHeh{507=ei+3rEgsyjK9gOTq5~s zEHEJBu~+s=M0j3<^(3E{XR=##BDI`}@QRfx*jCr@JI)FZ+y5jHj>Ro3K*5w0S1>&5 z^tvmjZTzRzN%A)wm_;`M*apW^6D8t~i%6vTs*29**(vpt3D+t{^nzy zb~luY@gHeSIw!j(`gV)UNB3wptL1XkF4O{n0I{lEWgE=2qt~SW;~;4UyF=dhs3e@m zqYJ5$Fd%i6ZVvw9`x?*Q`VJP>f1+I5_s`!0jiv<>a&m~RRRC$!y=XiGEixuN=Ak+y zhsTpCFqTQu9j~jAlhP(*$=6mT$$YvDD7q1GF-qnSsHDw&{IBI9%LC(smHIR_kdwm8)-l_TEV0>v>Ij#OqJ5E{!_ zmR%>E+befs8JDZq6cn5zo{>-Vr_MIB%;*j#;t?!@(;uc|SrCJXjVvr9fWH~!dt$m; zE=fEl>3UTWca(^MAEmNdkxWkO?cJ+&dNHS;LC8sDD^ReAMZfbXhSzf8HZ`Ne_4)Pu zH7M)#L|vmh9v|*B^WFrkflGN^+j*HzsK{LW)IuqG&x(p~%*LTX6?%M}zq7nt)*3yw znoXgPz`*g>!*`_`_MsxpY{&7w&;el(l#heIZm0&7z(M}WJZ+d;onCU3e(xEPMhsnN~3 zS)Jv4EYNoxP~x$THc3RMP}uoLP_SO5Lzy1B7{;7|W95#B0EF_;7f#R1>loUYv?>C}eF&xtH6OuN4|(9J6PBe={#_`}hob@ZC3NtgqXQK}Q@Y%Y!3IY{cgYAw+6l9 zZS6`qmxoueOa_3aY28B#mnIfND(A3GiwNhQ^MY$smkR6;3l~8~= zN2O9pw3c{L2CM5MLh;GTyC*NhdqzYgBv!bsjW5J{&-mwq-lwc~nw)C`UBe|z!#{(g zaw+`^GAFaWtKH?KtK~6kXPa*@kq830A~q;rIGxL@gj!*ug$mJ|~ zh|+FyWV2Ak0;}2T9q12dvGx!!rtc#3G8CSHKL80c%gxaok*RTM!8P|iP>vqVJjw8V zzZJ&9ybj0XN-;rD26`Mk+i^XqFK-ovqzmCv>y>v?Et1i+&|a<$A9N`u6yL{C5;mi3Z-( zv6~u-O`(#pc+Br=kP#?dnS1Ni%UmN!JY94J3Y})PPq`Ma^eF;oH-q??VYVp}nw5`@W~}4) zU^JtPv&u{2C5?N#Gv5+?l1u;OSOh#0v6i7nqx#H#dU%+a!ll+aM@?i!s^gCVBM%tLCzL=jaJ2Meumcv{PFaVDr~s@ zS~;?rS>5Zt-rfmdnG^sur#Jf}u`%Dn`Q-lC!6lG#e%Bt&D5+{-e%U&OOmLb|$`nf` zC)WDoYrGAX-_936KQVL3$-`Dn!~k)`G`azRV zHE>hKo-{{uF&hUbGw>f;=$L~60r0t7Ty(ap>AxDOPC#nBpL#VZrnG?I2OWx0D3c!3 zeyN21M!x!fZ24E_KPC`?f$j*F1i?1ybE)94R*h|GKz3#O?O@iIug zFTbP6LGSGQr(j7Xr$p!f8L+OW5SOixP8J`Z-5?VR2*b!>GQd+*9eRWgGFx*e8q5NX zDjk&~U3)U=_FHBT%*9nP`qYnn;OhFAb)v{dGC{*g@`Ak93Ws)O5)ZRndoc)1(IU5s zY|ADkU^4r7FkD-u=bmSf3ac+0^bhUpz_*Js)pDK3aLLMUO*Rocu2|QyZGKmMTX=!X zIT&M}lURv>zciayoWIyiurxa-(8J>O&wj%`nq||j^o)Zl`EF%;2`)P4g?wn^bof5m zVNxKE-LF}enffaF(Ukb-iUm)s&$9ki!0rpqF}koVI1Q4F#7_jlI?SBEzN8en*Qpb zw}d$n9rr?zM^38aUg>66#Y*b{$WPz0_^zKn5zNDE{eHs>%e5iE|H*w}S6(1jb#%AH zfUHKumHnfK&Rf=)TD6M$=I(A#MvRJr#6RY+NS5pUG0W!>v3xb*O_&g~i9B&Y zMY2m%VeAR2SGr#vpDDomRqwxnPbt$ZWEJ?)6JsYVjcQ&2A_Xd>;DqE+O43kwQ054H z>5ehs^Cg-vPa{H8|43HgFdGFc87ZBPyKJ=kMi)?=m;5bX_`5xv`xSawkwU#v?<-az z$b*H+Nn-|*tMPd8wWzJq#AV2w>xoy>dR-GTv@Jp>J zLP@#hApwz(0MmMhZn@m8^mJ6XRGFfuIVMfU+1T<-mWCZf60-Xl7_J**Reo3i_MKn# zKh!grQ}vLx<{Gx?#;Y%Glauo8gSPeiPH+9^zfD*NUD9cjUeAeBPqr?n-={EcS%o%x z2ydV0S6y(%zdcIHoUzIydTNl*p#N74w5lVAB8 zGLOkivORIx<5%AdMLWpijen*?r2@x^?w&>XqOV^ox5*}dOBm)eYM|%P<^8U`kgAel5f=KQkJdhKt*lF*Oo2 zz>wN#?X+)Et`k*YhrUh8F=S|@}#urArPRp{QMEmKqSu4F{askBisA`c_NJaBKh{(kXNxaU&A5_SY;&(oyMV4 zal%s31rfKCBjwD93Is4Q@v8WrqQfi-Rk5}j3;QXMiB%HMbivsv+T06s7ljhHsp=4l zCBSq7>Jze>;Pt0s_RmXdwW46M7+*24L1|KI?$J0Kahn)trR;?VXPm6L|)@eRRUF?QW&psX)#KM4fyvxcC&+O4}JV` zaBzwV@CX21>OzP`uSJ>LWZwb#5uj1SEGH9)SkDZGz><$|iFl>j+^_11d;M%R3Z!r_ zyNv-<=!f`mDoY=*8h~2NOnUe_n1cFJ`@>vw9C?>ps2&~>5rax)z7^~%U!LI0%WxLo zvZ{7^^=hFNO+z=SN>Pvi6I`V+HJUWPCkDMxeaDfJ%ZL=ZIlH>W(;|&6y~qQ z#UO750>1?LoLh^vTkJnZM+4M-6S~3W=GsEIi?TkE+84!zk#sDCmZ68mc;+Wg!+shI(6iPmoB9((@fcgOqlk(@2u z=sNQiNl!PYnaCqf@TL!tJcrA9o+e`RX+HM*{j?uXUW49h@Yy!AZG9L~Ai4hSQphOO z1k~apVsH8}`I`h4oNmqMb7CfW@lJ=8WdF!iP|Vf;M-X5XC;cC%;gHEV5D<6L;eT>t z{;(_->s%vpF>r2av*VBfxYU>p_F$#EcJrx&;yz;!mdCaBzLRrle4O-v_| zMpgs==o73A)}?Xjr}z^(1ulMV1aT3Fjw1S6f!9}AGjZ6E_SX=&UGgsy8bw{t0z;RR zp|_6q@1(G)b-u$7dpoC~Kue*X?T?qepRUjpT$Z8X;^qA+(#EF!*ZXKXsYnj8gCsnx zB5N|LDMb;P|K}U&U}6plbmsU#cjWp1=X^j27L8kRNiPo(p<-l|`p7639T02vHh5tI z2STiMx>p#_5+qmp20jla+O&u79qU>sf?6w#?>jtv{O?q)UC3~c$(3!62W-nUQv{&b zNz9h(YsH>S%;Blau#(kXq0B>UrVyD$!?7`jgjssM6YBBZ=zu34Zm zpo9XxL?Q%p>(%)61vo4+Sq$-5qWKJ$r&m-??XWlT0eBzef@F@KwS@0qc!SGn=b#sH zMHtfj0Mjv@Sft2AZb@Hoi_I#puUjP#NJr5qzAYUo{Zt;=W_%Em3Z(Nqbr!QDnn~3u z<>iO~BV$Wu~nHc^8Yi7j^q5+sodDCQywc{HeHEGU+3^-u+?40>wo+1fT; zWD{*p*L%*F97fIZpYOC50}%u;ki7*%u6!WyD~DOtG7PNOnoS0R^cv#^Q}FtM|1mnx zfXs)DFbvTWhkR~9AmHIYB7g1xeoM<%HEv8%l1_*Li8v$ZaWE=v6z{*;|1|NX1AqE1 zVg2d~0H;Iqe?NMv3>a`4bt_{og7-*jbNlNn@D)sEVc-2&EWfdu%siiF=QG~{I(i9U z7?_D1-UmWoQNFKMHtt-?Garb`vb4RQXpl>lP(irl!NR2z=svw=CWSJ%`gQ4<)C`Nq zB+tqq+W&PmgcA`#QR$ibe3%@dHZo!B#JU+6@r)ppjmM;x$`x(>9s7;3i;HEl`>tF4 zROjVn@y7e9@4p^Ka@5k;75_Epr2WlE?8uPifjAzQmH_q4!*geQm^1E6ZeN&fy;>_Y z;62M~%{L7z>|h#LvyQ>0*D3flxox5RV+iS0Q-yewG?ic4N z(cB~sS+}{>?G~3G+i~D0OeT%q{8t6^?d{gc^s1+b1)F7lG`DMye@O@NB(S-?314C$T9T!I99twtOR=2x}kbsT?V_g2n@3anYnX3GI zy5@#69sz`WHUd5`ABlwUyong&BD=|;BByjXdm85iEU~E2>mo}>ZQ1!b-@D(P@yN-5 z;!4$V+xt+OC$B?UFp`;hwie6fY#lLG0Ck`&y;exBGVPjsRPrrH2NliCX9%{Z8}L$i zaZ{1$Kts^+?CYgb$tzPJ;;DccFpF2wB)e6kY^pfo&>JQl*GqYdqI8NJc(NQ%kamW9}dV}ZGjLP=ppc@~?oSmkHtMxPCSg2}_5L34Af*@^? z)M6)uD`meQ(YMS?Q^_dzS%EGb&N9y|m-3FqE0N7d>lyAMU>#GDo4f(SKtrWo)A3eR zN7KcL60Ir&y9W!XxWxuVtnLP@$uvUi@hR#w9%K-*p;$PIFdchU%gpa;ciSz0Sv~Y< zP5KY^Om#p4Lw?T>YPYNw&(zvsQ6kPd7 zkwxHl9!@vpg zj78Duhq8Wpj3pCq+C0qS;leEC(9t|7*MoeT;)@L@iQ`Nv93r$FN?f-%lS^&Rd~`Zz z0oy$KODLTYzjFVr4zxPAutzI3x`QP(%_Xo#z_{?gtQ^M|>J0l6>bUG~D6rc!f{-TP z4>x}<(-#eVS+O7L^`Whv98z8UWJ9w0%U!ZxnK6^_s))@CWV4B(6_`J8Z$#`w8l@nY?t$iePpw((c5PpV9=cb~URY)Pqv7-csH7n+14Fbp8L2wYQFn zGV0og1yMkxQxHMA5tNqh?x90qKwyUM20=;bZlpVh?vj@725Arwr2XzeeV+II_x*iq zv1G}DxzByhKIiOxU3*`VUR3k!x4FXmo98pplQ#S>wjBxlIpOPTe0a73D_0MCteqXF(S}fF65U@}_ z$=0S|;kK~`LQjUHB1)VzH0c~ZltccfkPYN$=jku&Vx&KD*)JZArIvD0cmX0~CoM(Q z00>UM=aWWvk8=H0K z!-Ro&k}IE#0xe^|YVsk%tT9z2(jeqm(f*2iQu1`}HNWsR;GEPb)j!T8LGr>1j&1LV z1#%Izu}#B(PzKZ3_u*5#@JyIqp&ET`D0yWFlva$-ipt`3JLp3o>0ht?h0w}c4x`cN z26N^%5yMrJ5KlsO%Vo@g?|{iX)r33yll+Ch#*g#oy&<#02Hw}v68a?uzXf(jbDAH$xW2m#U(&fr{LW>+ z`+j!#{fiwNB783)jNQ5V<^>84p-|d(a&KCNH0hQEjZjeEZGkg=>CL z_wZBYEX^M=?@jg1d5@#k6rNqBDq%aMOpI&}a2lX(vb)sGhtH%Lkx`9`a|U$Q z%~2TOJppJ&nYE(tJJiUf4@Ecz0TfKi^NM>3aa?xxTbkL#!y6V!z4&+rZ7LCW{bom~ z^O!h4i$-aRIH}m8j~i3q9eK;HG@t+p)Wl2u1`zFtyQ@7`{FrYbA0;#nb1atDt6j!} z=g;AY{Mpg+at9_dP!3HB2o@SAc?0rA)X#`QFug7}38f1I7}x{%`^k^$2-2J*W`!9( z88PoNKMhP&3h!R?|4F16hdDx`<`MpazYzTBveRCx#zGw^c?c~WerW&JIk?S@3k#xL z4g@i>Q8gf`RiR~HcgMRM*DIicx#s!{rZLrwA5mCC&eL+Yo8w+onfZKca-yPh`2b%a z(B~)K|65`VR3uzTa68{s%27}}Z%o{X5%l=rddkN{>B_02d;6=@uCwR5vU1hbuVnUc zVKFux&`}8L)}Vj#m0>Fw;%^M0Pts_T-5qK}P*fGGa}Rk5r|wP(ys{%r&vx+XA`Co! zOMI)btZ=yN5HEU_@YJhGl+hFK%=akkz9^$G3dP9KZj>RZ)ckt2Rc7i6_+X(LjIGwL=G+^Z}LrI@` zQN{;9h44QC%46L$y_DP%b5$sk3G)hkQcJVuT5>f-XTEKu`@w^~TlJJniLfbFXYqUD^ zdiWGxnaSuEnnD|1BZW9zWlILgG&F>3IR&+QhHa+4{jrSgW)``b!Ys0Z3&^W7JQt=# zdMTmU)?M$Ti||G|zn-KPzqh{l4S@W6gzu=I)tB}46HA+XKO%m;#cX+$WLA1u02D~q zx7Qtg8USh1eJJF9UnGmqN^_y;1xP(nhv;Fe5$S|{qm^^jxmK{CBEWIRMhkvzQK@RM z&OiWoOj2f}Yl=u)6bILY%M#4VT=*S!t0(M|pVP2H4mq_vMEO~rt{Q4p9%)p)>~w8R z6MCL$=;9xjC)OSv^f?IT<}LJKVgO;}EaL=BS@UE6E75SE-qDd2l21GZeKKba4Z#?`g)KMVbMJ{jM@eVwGou z5FpnMv62DB!p$#TARM*;R1{@<-CmhTKPCT0Tx(p5KN6Q7>4M@{QbeVguj-etTW9l5 zN1E>^kb)R+)em6VaKZp%yuA-7m>36y6PRhte?|(ie_Y!D0Q<8g&P8%wkxfp1aAjf! z?g%O_&9{r=b#OiaKq>(+LrT|=Ve_-Mj6jkhT=a#$Nl2eD0QG32?My8b=?O&ZVanHKcg!ikrORgA@k7vDxDtVb>|pd;l+6M?lusLL>Ok)XI56_KmOY{YOrQG00yB~hRua&u*Ic@(R)h0Gbd$v2 z2po!}b74G+c-Tn!+T<&i^qhLg1E4q;g;}5%E}!&(as>lXX(+J4PR}FcQuYymBQ|`{ zm(gsZgli`OFuhWH2Mig@1w4Zkf}p1n<)IF-2B~31E^sI-YnYcR5l&bK?K6eJph+RZ z+ccnMp(To$wM*^09hteyX?-)T=q=$HkBZ-_M!m^O@f~JP8D}KFGpt(;wx+Lrk(=yE!9c$Ly};G0a%vg|Nww=6Nj~d}eQKzZ;ry$&KzmZA6lv>~6*(N? zz=Idz7xo|4ncFPN~_b(m1W=an`M7V`tiG(GrLpy{C9FmNQg9EK(?clBwZcF#m`SgrXSVoktp!jK-rFSw z#0%837tqY^cjpj)%+RyBXk(_*7@MIHk-qa@^irhQcjeqSiD`;J7Rr=y-V*uDTGnh% znJvDF1`3R*)!?#I`(;0@+q@Eslj!1R+~`x8RNlLTM?XOP70&!$fQu+L-#s29S0}@G zaBIpZmfGz>rV!Lp7o5!&`CMGH}Nkm#;fec zqaeod77lE7l%+cjH0JOPG!YhgjU(}8(RZ40U+vp9aB-jes!@n|-l0n{)p(hQ2U zj18+>dILl!6m-eNlNQR4v3_RYp>C>2&}UL5;3Mh8ko^M4C$ESn@vsRl?8k*kfMQ_m zMiWpF*eUxO0Ln!+31k}FssKDt`t2P66`n06k(h|DAR`v5H-D)unNBHKE?fy|V*=MpoNBIhgs zEvzNu?JraE`ahv%H79ad62QfpauFHJA6F0{c{H;k+WlvdeK3tX8mT05-Xh;M@5iK13$jbB5fyqOVAkK1#H=FPh` ziWd7H#pqoB+=slzQ2X6taEk^7B*!LQLpL4YPe&zj)GzO*G`?6Wq=lUzP-A2D00E&$ zx4&MqHgy_a`J))*8+gWjV#FrGcGcS;%s4L=0$DN}MzX^w*03OeQj^O^yk&7pmYGMD zCVLD8u^lhe;?5U?3N;nHW$fL7F|tIEUL}P_q`C3mB2AWVHpo-}x?&}=?iS>*zM=eM z3I2*$+NOCtdXBA>2 zp4TjmXd4xbBGDgfHW9_$*AIcyFze}TW|{4#K6z-GA)nX2Awz^<*y7=d5^#W4P@P)Yd0utKoVDH4|&VFd?YV7pR6v2J~(xdx2^ zmX1j(APy!p@qr%D{iCuA2*`n2QYo5rkYNo7-635PnLz0*j6yD&To0Vpmih?-Er4WM zihSF(E1-}#y7i|nruo5Ab8-ezQNRDe{OkGWJa+zoN}Y zTvq&%ClcA#Yn-q9{oF1Ax&Ci}<^WLingXx%d;C&l%+tKuk(@h?+g1c-FI5F(7X#XY z^zq|j$swFuN3Fl(4p?C`48U5__ak}LSy@*^PrBbqvwWf2w%$|q2(6yB1tzm9lVLf1n`zvvg*u2=I$&E zCWzG08d5CH{NETq)aNx4i(U)El35geICZXDs9@nvSAhmrFY?GulU2K_!t?uielSR{DJ_2H55$x5=M?5{Z zsSa|gbeJYo0DCl?!p|DhU)Fat>}SM|{9$+LBCd&KHwc;hEQo5Nt)8LyU-)gr=ZQi^ zwU7}KpS4}7q}+RUhfCY7$-%u4dpWdqF%l+I&!OM?(EEGyx-W}A}-o-h~Qp_>(Nhtu!O&v=o$T)Px9%Mp><30nRA#duId>aL_UvxAH| z`T~(MzH-@rm+Ls6C7%r*YUK>J$rvi^pShDAE+K<&Y; zNpX^4S3LkkeglvM>H?5rsklo{T=p7+PEH>ufaR|L1r#i-IMVlL#~)S8h^)c4v$ASI{^tpq6#S!O>cjD$py}++xDr!B{^lJ!Rh77Sy&{n z_7DqfRy*t%uYt2&8Blo72bS2bk|0}Kyto^n$BMyrC1;{v1wws93<0f-YPU0s0*Z5h z10FDwN*$2Vsd-k|9EdxQMvFo{zY<|6{q1Xi({ar+gx4?I02P%*^kKBWYxM$wDMBVt zF~cxZmxP$O7#pJ13<{63v^rxB1C9ieqyB!-^5p;TCqkqUl7M2^XtpYtL{4^3XP&M9|n5aN7fB|L2In+y|;r7Trby#@E_)N_8$R z_DQVcDU=fAXjm6OEpNWl!Q5Odt8SeTz_pKko^AiLaBV0ij}U9Sh945LNc72W4G>Bo z6f9d_(5SW+f1O@*lfV5{4rjQb?Tu$~b}jb_>OaikYmMj&ATg-0zbWf)10LW)_yef& z{@DX2!fWs81Rm@2A12;^cbVls(xv3^ZjTmmHOzQDJA;=~0k7~!FjY}t4L4^)Y`WJP zs9P1rqO)%Tl-J@?09Gpw`$Wp91_Anz(J?XkpKM@oyes9A4Hf!M=_5|>e!d?`*kcq8 zcn)S>G#9~t_hu*{y1bph9UK;xmyz+{$#}u^+C2Kl)`6!!JUnVgJt(;gyYm1pZ_tya z4k~vV;RacCO@31D zEr;t1v@QDjwD1ch+nk!>i1&643|0jnEZSo7#Q%E%AYgY#*`BmK-N49fYkLqP9=Xdm z{3}A*J807RwF;m^;vxwz9vly6?S;=2FvVYW;bU)L_`i>R@48GLWi7_bPKk7{ucjXp zwv;ce1ttlqK`9!3ws_?*1HaZxI`3@{GBMGr@+*F)4D>WxaifV*Xpv z!S9_V&AI=_qyyW*8>xJ~Z8P7pGQ;)sqY6;e7~saNu-!UcDM*n&7IrVc5X;OITit(}s<@#nbLW(o&+Eu%39lo2k)3A?F8s0_7k ze}?{;i?-{}&{x3s1`pKE{2v_;oxSU$2}yY(Ui5slHryKCS2KMmCa;mc(d3f_KD?l& zp~04V{^}9siK>u~Hzy+p;a_LwPj`?;WZ&#;lLm*&!ogPrfpV#DEYqS5|K!#{}9i?scb zKksQ6_AeH!Y5D!E%h)kX!QhU~{m}k=t9*Bxt z7(wfV`5il?|2a$|-_VyfXKqZ?U9=Ictz?^hq(-W6XZBeq(NTLp9T%kLhg>tsa)V=t zk&*S+3{wimf+n*G5K!!tdY^*v=K}gVe4l~<$f@y7qq30Pzf79M-}XL=%g74%*&e?8 zq4!z=Tg?CKxBgbCxHX*HxB(}{N)-yA3`0XrtLos;)J3Ius00-G9BX;ht^D_F2960% z#EObzqjuGlCP>=bmv}2j*3pK^vzPr=)2i9pF|zfEhO%8iwgPCp{w@4N8R6SZUH^5> z)_rmHzpZBTPrYC`aue(C=RdhGejZNl2>-VRTmM{Tovkxp(ffs1!9kQ@_f^@Kk(0{; zHV;tk>`_lo#T_rZV@v+O=l3TiR=UlHQoWE!RR4g-s!B=^gbO%jNRRh)YKTAJ>e0Ti;oI^t^E6(ZjKa2v`q%rD#?*t`_=S zLpID1!SJ6TG@^=$da{81+olflkav6^posA!IQy0aqOlmXWQoBvILHDaU+<9ss|4 z|K*EEvBFR~l;2g8_%x&j(hTyiDc7}JMhAwPN9W~=Ob{;)!);kHJRN z`91Q`=zda_kOs35t1^6UnJx@1VNILEsUIp$`<|=K1wOrBr~A(#Hv+D_5-A>R3Y63YCw2Bt0#imK5J7u&{B1)Ai~+8CFx?5 zR8toPqPnHO!tm#jeYE5cx1w;6U?PZrr%ewDc!v>`GHuD`Jn;gbTxD#szT;nt}D12YEhOA0vI=(if+h(x(xhx;WGqx7xXTH8L! z8EDvs)Wnrc2iHyQ+aYO|*8E0|{H3p;b+ZqcBrvptt@L>K6V$pt zPjDVjSupCAqRcd+Km8gWBN2mUi;JPeJ?%VfOy4iZyj4rKIAw^ki7k6~pIJ z{d$p#o!3h3i1h_x6%=YYDq8U!$G?a7F9){gYW7%GD=OGxfLpyko|X{*Yq~}(W!`}8 z(rSavVF&(|cbWF)iP}kA)(cxI!_8fJhpWM73@X{=lcma9ew`hg4V#0r_Pk}6r<)an zr}d{?^T^hb?BISe9zR{>?*Pgtk?}&Nc zkWT5&d6t$my2r+c$iuofWu0kEJEGK-G%*t0UstliS>_gN_hS0ac!-AA@hWs6=Q0IS z=s6hA#kDz<8Xa8N&^He=xzw()6H8HObdulWG?`AjtzIwljM0j1bPSPLin7z4L7&7R z6SNKf{G2c6G)^&Wnn3lev6W57iVTC4N1^v~I_SWGWwM9x%>A?a>gbeaWKgjU3WRdc zE7#P#?(>ZIk8-#8R-tFFbMvwS(6;Y{EjAFKVV*IfJ74Bzr);Fp4+Q0sYk!MxjTo?B z6)b!_ZWP@N2GbdTe9H0&wp-NG)s^q1&Uv=f?m79k;QVca2?n1&>Yp>%HQ?E*F9dIY zty>l$mc5EtR=RJ})fKWgBG2bQ>PRn;cBg!x|2c&x-=^I|tw7}2q(Xi5^MsfW7_AqNzR0>ZTF_uzJsU$2+u~ zl2toWLE9uiTU9};$|Ng_I(S>aD1Q?jRnI7 zCjq(JSiLz?MW>_2^5^wFwZqj@8?pq{@aPiad220KZ5M(NXk*?(!moa` z8!^}C{leIOXP$VorL4OBu>l!Gg4TC&Bg93ZFr-%=byvU2;KiAq{#rv}*T!9S-Rq=+ zA81LHMQu4a`u3KAb+QPQ(NI(9_Ci#u5I4ftzWx>buq);BWy3Wc%?Tk~{Lp-;Q3hh` zzUz%In-k)j=zUkjEa53{g_Hg< ztrOWHIWi9WEt6_%#zpoE>-_?A#4oWgfkP(JJ6tu(KN)r-Zuxs*_gJ8C?=r41O~YtD zX71b6j40=XKw&BrJ!Cs~ShpDDS4l8_oc#?}LP9UBoKX>NXv)n_XWg)w(ewHI4Vb* zXM3Q$!r1<2!f1qLSgLs$kG2^jt{&MFjI(i63>$a{h&P4s7+CBr)raA@ zx+%E*a$2l@f>u-VPWUh;5sklR>aQLH-Wpxh6HX}rsTv4USvcEUS|v)V z^~&VQKZoW4l1gIRWD#n=QCua^S&BK78CJfjefMKX98nY0%=y90Xi@mvU58H^)8Ri%vTEPI&9p7{dWbXeykSwWl%V)Pwr57rT2TnYFclW-JfH#&(fV>oW~p>zhK z&K-A!$*labngqBa$Dv?t4v_^dKb#>XfI@8@vmdI%!Ny`vFhih0z-hPDpOe<@O?M~r zHdS>BI)u0%j6a~Ve#H@HzbZ{~@98jol}T=lg=vrqf-&@ze)sx9&?kO7;hzaw{o*uh zx8rr#PKadkzF{h68qt%jQ|OJMo*K`?DVgYDV03tR?Zmd$CnzQF;N3E(8P(Tp^p{a@VH@405h zAj|0Dn4(j=jjsx}{S9SJxYKH&HBYTe4y(YEj1so-K#$hwJ$u^P50Sr3G~;b@l90-w z*DRDGauByk%)t8DR%J(}R(;aR-6x5m+)l1=yJx?*`yn0l?Mms0ciM|z;e;&gOAZZREg=XwUle?c&xp!20 zf9@_RG&`G(D-R*g_e*!oTUu0}fjJ6pCU)$5G|?rUywk(Qe4&7)hgi~%Xc|^yYE^+L z56ScurM{)y%9l<=MtMbzPubv%X>6j3jeRnueEgPe<7>ruVOer@aH!j_XES9*Y%CVT z4{r)2$%%LE=_!$51;2DxWdP4dSwZMAXqE#FqiY0lexQ)jZQU&^)*sRz#mtJ`_krqv zxH9B(sO8lw0naHT(HN~f z5J#bMiQi}3KE(OSJs6e_bS+G9rLbS(+Vcs-J27K@qC`)g*>;!SLRDTu=);frm~z*B z|3ryfQ))lG#&;w@A40OlQTav1aXTP)3YrWGo2*kr= z5j2++dOS>xtdR6-)kHY%M=;H=kA)#tR3$Q-`c_`Z#JBTux5wdv3Mc+3ja-DnxrGy& zC^|}7u^iSAY{%88Nr%Qth^7>C5ks;^wTbUwsy@=ICZ??^sQ1qHqe<7)IgWT$dSDyq zmOrZ^9JS@p%1Xf@;WAemD938m&>A)JYN|&I|jDK2~S6fCMkNtM;>2*gtxPOse3u%@FTrUKI`{P32VPSdV($WTXG{X>x~Ne=Gu`PceFg zsuiDG3T`aHMxgbzavTK5>>*n&5dQ39wqVUk?1+9a`V$8D2*uzUzGK#bhN?b1Q0j5#;r zbSJeuHqzog`?VZIhHTF;K(sU7+d!N1Ys8=~zxz$%OcpGO%2mNrDJ6qxbw_?ud6BOM z!=;bin~a%f$lu@c=iW9wuWQvN7T+V-R_s~x7mLKE){$=dkfPcA3~k~!;o1xfsL8Ib z+Kj$Xj~jeq4y%S?E7_X2=Ja4F=LB@BDd$QQC;fzQa93UBBRrTy|Jc?LHvj$u&QcMf`tFiAGo^*{r;l?}Hu_@Q{dYa+mV4nt z`NjAPd}A; zEM)Q$!Lw22Z`;xuCJFBy78rs zT1lslh@zd4C(ntlb$w1D&#^?qxWnLBhP^wtR#(%D>6Usycx zndr5=qA9EDcLIy0Y?k#=lJEmeGJf?k8MY1RK z85PQ(VEj+)%;a07fxDfR+eWC=F5tVxWiyLTU7B{e!!ouSfNU{e~!)4(V=z zzfRLEb$M#74K*ou!h25*+LWK21VlP?16ZbLf#25)Hn~n~aAr=%oB(aHNv++9mVl7W z9K(}7$LSGD+P(0nkoq4iU_DMyjc_}}7X^YoT6^`)dqi(>eE);@tIgnzIyiL0^fh?H zjAEv;;;Z-1i)UBO`4VCMYI&yM0V)xwrDn!!vnBRas|urld}t9(-1HBpwq;3Kf!>R6 zCA+!858a6Rq)1|(`<9H)6Mrf1HzsIJdIU4t47w)Ob#DA6C=r^fpbVIAh#)(e|fI)!fNpXV~akdi7EI~%0biDtN9U*`06=?|w^JZmbQU?Js zc)UX!$i^s<3=WLbdjW~|{LDyJ(;a)dn#+XXTDw^kxO`(Z@In6u*%#Wsgq;0?2*Hbh zRw22JS?Ks#vs-oEbBjr=B6%?}txD6J7U$|;fiURS3+>C^bY9)ZJ7DsJhadC^UPpw- zr5u(U(3F2EVRhX3CUSj%HB+it;|n{QDH&IdW!O|FTdCJ?aiAX!uA4QoGRmI#jV@;B zzOjv(#Nk{ptV;&oWSrPUn?0e>;ax4M(|&Q}R3oQfo-F9gtn-9~%NC|>dPlrDoSf{I z57w^8VTOZ?y8i_iN8vCIASl>1wR(6sKg$}WG?x@03mRYYM0T8$Cz;MY0n1)Cf9CP4 zR&VJ_UytQ*(*Mq*C&#qtuEDkL&}W^GIB)abGQ0dcZxXHc(|z~xs4~F{U(J5I+6PSL zAt5jcDY5uHID~p&=)g}Ml(SfGNye-y`|@;3tN9n9(v*hite9BWRuV0mly;Ndx0z+H z>Bd;bSPi4tUA$rFPb?$?Tl-lZM%i)GA7^%gerTSVz z{On1QI*X{e<@o`h7+PHMnDmlL7F~_i=r;z}vQ@w^ht{R^CK*&OPri?tOM5L|^Boe- z0qvQY^&2FRlWFP1WOS<3n$2!ev)XvAae~w^>Ti*q>Apv>$@@{xH4QhYVfjV_vb7q`$Tv#_O z@ilk?1PFaIcTtHv{?9oo#ErG$Oa@aM@IJPf6{yPRq~ZL?&L9kZ46Xq&O`SA;o+OJW zJQ6XtpWaN3D`W9;x}=PRJIF)L?dsMzhQ~WQ<7VdFN%DB!JfrMRiDT6HuH$Z8l&;6j z`mm2qM?4wfE+k@U?8d$d(@YJQt{ZsFaiKx-iobpy?NY1nk)Ik?Pf8A;Cp&*@IMZ>U zP-lqGd+b+f=Y1{>I zeyR0Jd9kwcux;~7rv%;mbJxpyGp7rP7ju|+=m$O(-mVNS?%uL~$s4WlFN(u`6^hwf zh-nOaP>lN;aBbPz11VUMV9E|v(OMJuMVy7GHZo#SYVlp&dT=OWbab6IwvCz(pFSa8 zCZ1vS7sK@g-^qhk}GORW+s1J)bXYV=}+E;Q+Vl5V9rMEXTC+t!Mt46X)pq*uw(FxU1HENOyQZdtZ)!EJ6xn1c?E8K_8qh)Sx#y0~ro*$^(njXrYfJ!0K4JEdu={u{2{Qx;~54T+M z;RH`J*oKusJ!Vb`TPxZ$5a>*gw3M!T4W_rnwQ_~3;7Zkbtyv>K zymo%Qd$jQcLYY%?%q2Y0v>QxBXw!<>WG$)Fxg%u|%e>2_P^sW9UP9h;@O{bi6OjD# z`3rVg^<r3Gv=%j3yI&AkmoKXz68z+nk zeVtFuXdEjD(sDGmm67AEIQ&jb=Kcq0hyNQ9g*3q^pVf9k25wiA$7vV_$xVI=MB;a) z6ZVd=srTZ|3bjDQ4yk1~v8e&=$!iX51p@$n{Lk`e2BZ!CI93`jr^^&#jVM|W`krGE}P{0bNlF=V7#c?@Jx%?%ZnE)@hQ zJ-l*IKB~T>rS>12D&MrceWyHW*3y*>a*YGViUwPFoT||QG#kLgV#Q_X7&DdMQ<)vB zC6cDrFx=aKeV;j21yIv&39Z>^HdUHQ(>GauRv^kx1;5bF{Di^%3nD|w6OKHeoCl2% ztfwe8k^VR-D3C9EUbK1DA2}c#2&LMwC)L98ZuZM{EJqIRs$UFNymZgO6oytDwppLd z*O17_UcV#IYjBCI1dm8GyHf?Lchr|87So8NDMusSYEk2H`qo)aQ1!}NR8m^^;YY~m z&`sCe`!3JlW;0!}NL6n$5(IC~?Z4o&4QA1o)2-=xuAk+C&7lx4jGTB~_?`RE%U*Em zmgrnH{tp|Yv%5sSrGIb{H2%hTiXydrikW1TxljPb1PxYV>q=`eNTA276{fM;v2*4dNR19TE8nOjd@3~b^EHe$^@ z<|2P?J`xb!PwA)8hdWIM2y|rW6lshN_{-GAGCpk7nh>%#7#BK)rCJm;3F5@}M^mrO z6poHC)hX-HQA`R6Y8T2xRM)6RlaFKg9=rjU{K&4<{z`)Ok(hoD)TAwA2TPZb;}p}5 zWr)j>IJ2ao<#{4zIABzhXdl1pdk)+aDiss~H8nq(=bcFgzS`1UEsVD3VZFPD6|L8B z_74dt%IF66D0>x7nL?6hhM3C1dn5}>$331dutP>fP7Fr9N^TBwH9m6T7rni4?Dvu5 zJq?NO-zO5PZE$`9ub0|$*z^3G(Wt_WAysphDvc4`KE4(Rhvo>JQ5d+ zLB?aQY0IcY&gy8jERyBwI7C5J_2c(i&cGXx*01%UCJRyST8=#~Xr5qfesFD*tGy~( zjfzuUehFP z@zX++UYj9V>*VV~HVY$hIyW}K~C|V5+RKMiMe?!Kx z6%fx!RLDfIf_n_u!i%t_(R$10lnA0@>_vDuSHQ8UT6Bk0l~}g5_cje_xmlGFK#-~{ z?yl4tf4#`)mYv?-0N;c=Q{<|RwMeX1W5-jLfAHRt``wqa!k+2Mf?uRd zclF1ZddkotM=Q&T8Aj{3JyY4MC4FgM%Wyjtzy4!bNluLYaN%rdn-?O5)JjbENa>~G zXX%9fssYkj%V`&SeK55`2yv7@`tA(+*;;03$GdyR{SS+-M%gAx%El%`SEL74H@GyO zXp`avFS2@G5dp`5_R^q{*17vU*QcFmxvf5dh3c2&M~|k@3hV;9a^#!EasasT+jr(5P~k$!(TU{7 z=(-tk-eR2>a|MRl;NTP$fah}dTUZ4d1%%npbQ-P17EP1LHpkN``_EOs-@rlUE06{7 z#LdEc8u|`JR|PSLO&T!=z*JPbPjYTjjhQXvk7Rs1@)38 zoNkSXqwh}qXz*~#K%Bp{pg*~_khc4UIR)&Zi`z4i1##-A&v{?^Ev8MuJo<5sP6jGE zG!Zc@>bhvP3(BSQb@EzC3%MVT>Wdl&dOhrQt2s=GN7p~Kak?Ek+R5^YYE251E{(bx zhs^Oe!r1)7p3_X7&KxNwIEct+CzsMp98U?!XBVjy6rY#rwZ;M%Z|Uu=yK}BwBF?qs z>cZ3ylR8w@E#;fdfd<{1&Nz3=0_nKSQsT@2Q47R0w;fvVTa@)If;6G)`*6YiXz1$D z-~5#BP$cV-4u9I0E6ye@R7kT{LGyF(fV+jdRCxR~vgu_9p!cC{0;GZIYSe#n2jU93 z`q~Mv!9U+L#M#8kqtO2Ky)1`M-1@r1g3^9U7SAiwa|FsLS~P`~ZR z%5QUm6U8R8+MX?1h9vaRp^0U}LbmbOF+8#ade)5}*vsn1)?jXGb7(-|?{Opwwd5uR z;2;g1RhuQ31&6KD+n>F==siTbWmZsn$>tUy|8`gMZa5(e3Os?~4mI$VuW@j? z9%HNf+j_sQc+bBX2ltLp9FiE-fGbuj*YtDKbJ-nsqH5o3W*!QIBl=y7}P6GhVO14o*x46U_~;MKDGYsD@&)NKgIxQc^&qd^tYj zV|b%A0BZN?1gn$~t2^Mq+>-d+L0=9X1jB#L67L4Gsh+)LM3b~z`k|~EJNXHT0aT1t z1DzD}t0BpYd#u+4skw^qo}*3)NH3XCabsf(il)@XxS9>pAk%S8{5dltwpZenb^SI) z*O!@=MYiPXTB+J|OJDoA*DaOIIxcPS83$855`B5>n;!WzQQNK9i}?c&9t^HCl&@{| zx&kH$3`XPcZ)Kb92*0;rbxpB-sY0X!O9w2Q|2{J7`r}@ zoZzX+ji-*QJXd4IkVkZAH|e#nbP(9;MhXq{Wh#7&H6t;3pa6PD$aMALoRqtGFY3-h zBp}d$dX&`rAZL>icUgm^h_}#4g_OE?QLjR+-)Q_rs?J`gG0k9>EFr6k6?muSQ{^G8 zPF*d>Tbg#+@Y7&Cc2GOeR+c|G$z*15ONOAMZBbrI3NOuWxo3E&Fx)~zr*mz3bUk;E ziI5>CoEHYQO1yn-HZs0pZhv-chy;tukSpJ~J+Cd=TtYL3h|Off_U?0K;^uz!1x>$3 zW7wN4P?#)?!DL8oujcqZnF$n97l1zJ<4)8JBI*#d_Qx^16ZGd_y=;NGqG^2d{UGTW zBkcUEIu^$0+AP>`)}Iaz*YQudk;H-Q^&zB>scCwT1xmkQ5L zksIzz|0a+xAYs47dJhJC`FLjW^<`aw+<&;F8=F--xHV%vMX=9XUSz!nmFh}<9uUy| zmvn`IC547~n58-U>$?om)_8w90l1B$8TVDIb#VHVAlcM^S^#<|kYnt=YuxovCG3R9 z7U1(*di$N5`qN!X2x!+{(tYlDTHx%Zdn$|E5g)G-tXM15?^eJJ6_ATpIdWDavqB>r zw0?fN#fAnP9gj|&TbN$%4m@G_?S1#gC<@l31{-9akYirlfv!`pOfj;ZlrCO}VU8^! z)h`Xp_q_s8u9J#Sf~CI>M~B%PAzBtUT>z|n=Wo)7KU1pP90`Li=bVl;nzrq+#T@Xal@eordC0hfxUBG%k;58QBSY*!(RoJ0$X?Po$+y~~G;X?R zR1%JS@mPi!&f)MF*hgAfi1N`aocjloZgx*Di;~e%3bv1@Bx{OFR*j^tZqn>D(plUP zvuoFY0OB8-r7#C+9-FVhimUtwdY_m2OYZ6#lxo#Vo55szO(qOR z3nEg331+Tv`rKr#6O%G3Rpr%Ihal~X{D%M?8tZu2bxeG?mDs8wwP0gq8G2H1E==-7 zsnm5IW{g>)dzOb`M@W>sULjZyaQfKq4b*8ybmUz&s>^fU{)ex*IrV3}_pB@bKla`_ zs>-fw9~A@vkrq%|LPRO0q(P)4MM4@xxoNHe5nz1mx$P42?+ws@d2+{+fM%0JvvOC02O>nI! zKOlbiD|mdzT##cwz!U{Q3)>9v!0~ef=WgbsBfUoS^~d9FdOKwU>XCHu(ObHi6dM@OEa^KV>{-XeX5XG%}}Kq zrki8qJAEaaOWiHXKDlA=nr8leSYD@xh-!_v`hqM0RQd+8Iv<)BKC615>2S(}2*%e# zONvB?=g&-vv2BUlt6oY^Je4DvO|PirS1nQY%TiQ-CO@oi@MJl3WMop_e1J?pLSv%n zR}Q7T&=FdEvC{@TrNXEN@;Tn*{_d}y{f^s2ai+Sg9^fD@6$sd)nB_EH)fzAGDAt)t zW>N)Jxn7k+iB5UXr#DC`=cQ{_A8&wk<;pYZbvOxU6l~R%bXfyk+)x{N`m<(-I5lZFvAb#c>Fz2$={rBzR&^ERB8g* zd<}^#9uzs-`uBNZ@_aU%cg|K??K%quvh4S0iWY5XA9-~bn%#fJ4=$fp6J%vco=GOYW5>YG3WhLlkq$+v>;%5eSbh@SC&Q3E5#~B3^%??Wc35IvFkz&&4Mm{ z%G^lLh?*m%u5N>lcpzyCdqK9;9Xd5e9IV^(0`;Us`N2xsw+%U+E^2Qlf90&DISYVg z~&6rgU70 zz}P+F&-9{j7fy@ql^^1{5$uWn7|vpmTR@l|9Yrd$nqwsiUYCW)KDLcy90GWxU<~qe z0ZtVrw^$`|%bZB;C?I@bK)3c|w1*IWVy>bphe_kKlm3IU9PoUPag$@hr?* zO}%;RmIr%*N_Fh0eqJzzpd_0iOaCb_&i*z~w;FY8(V};>y|QwynbT( zo+Cd`H{!E3f#Dks4KhN;=qTU{SYcrJmO1x*Y7H<6C?t{ljTmtw>jA5}iust3U-n~1 z;y0>4q+}f*xhe93V-GrCEn0>fZU``8=EQO3r%uqz;i=mIWi1&&aqMyPEZQSk&N&!D z(`WBH=Dyh8q`v~NM9=5i0~wyM(HXi@M}rJ}i>a$MRKu47>oKgRLfOPQaT-WDx4!eE zVZY(x2qGpRNDZbJ*KrY5<)8$n&CAk1mgRsI1=;MW2$@Qxl82BmPRg zc_F%4T(*B9?K38^yxTaq!qHjp!3Izna_AzlrqGhzVt%v~qQV87xAY_u+IG`T5g%q} z@e+Ki?7r>YU|FcwV*4Tl{!?Im+=hmMsMKD^!$tBQCcGdo=fSzI+eKhme3WxPg9~yw zKfVV&i-2>t1wb#l*@Ge(*dU?WZQgHGHcnww#S)>&CLv`XiMn@Zp_K>3M#nXTXWq{; zx0%1}*|h+L4-C0wnMXSHd>al&TQBZMtPZ&G+MnOODEP@!O!!3k%lf2R;QnLXBve2hOm-N3$FrlbbWu1p1xPp{slhD_4|dax56T`zp=R7vig{zZ5L~*~ z`L)0dxO+XL7K*(#r8Na!gEsO4b?yU{eJczP#sOSamL3ty@%T;U09^nM8!6XLTA7E% zQ-Q~(crPNm!%EY6-9}#USkFt(Tiex}C>Fk?`$h*ofAzD+Ne|N}chs^!^7g5FEK|+q zxPvDVlVmB%fP&kXM>&b8(*-}Z(`{e%)wt{kPvpWDV{(%BH8KsBewxm4$sph}*)@FD zt(We5{VGb4k|ORDwrdmtn4ipxCJqvAi(5c2Dns9PyeN^e%LzO0fJs zHF4r*W4M4L=3;7k(CU)!%$~avf#ldb9!ti)9HmrX&~|jNY!>K!7fAC%$cAp&p7<$J zP9TB*7g;mYx1NlrOMb*lU6HI44L5y*29Wh55}EqB8Az<0B>a9Q zBjY`Rf-{pERaIRFL!CBv8z2z{;vnVR`EHjpLrv_3!hwK#@RcIQN_>RahY z@yz3%Y#lFGo~p{i&J}j)*ls5o>m|@m`;YK-avd(9kS%8+F2AnpXf}fxi5_AaA74IH zEA^`;_O02Hr0a#V9Xdle0QFpve$4m4#t!diX(i-zhM8mVFWa8%T2EBzMNxz=EG&aq zxq&K+F21L%ZwUldW6dVR$^qCp@&y1hhGx<+7s8vqU;-*@s%D)*Vx_ z@@>_LSW|wXx~3AsWk=<%5b-+&`{t;>sRiz`sFpC=#Vfzn;}-yc{rA6Q0?%m95B9RW zgXgf{5Gg-V2_*E?_LqXWUq`$5nIlEjRL@C92K0V>Z1~lr0FshW}e>G$jsZY z7^~<}xyyfJDpQ=d)2vvlT3QuzJUh>)JyLe>Rg%C+k$u-3N5pOk2;Vbf*#Pu@exs_% zKdL{Wd=*FJPRojwD(rN{$C{-A%VaboTSe;gbU`CTe|eIQOB6IsP4_wCkyi9XshQbN zR!p-%Axt?Xx?6=nB|^((LySm%>eGDNz5BBeqA2K z6d;gVS-4JDC54OW5XX@({`4z9sFdcKM6q}1=2oF7m_SdIUg@!M{$6fk~gS%ej?>r$VlcM;I>L2c?Hje+%#ecrR z9Y*Q)HlH^nMpj2R8$6@*{+z+*IpRPoXeLvj5rZ%6uX0)m<2LSKm+f-vFqNFy zEH+vR<{*t=Jt*aVtFF?MNW)uZq~Lq|QF5UfkmaGi7opc!!MLpnS+W0G?bt7R{!>}# zHet62Q|t(rfLQdPz9}&94Yt-8AXqh;s~M3K8(Ghp6cq*&LInZQe40Vv#%kU!zP*1` zeg;kFd=XR}ALQ9z<<#tnioRTIK2;Y7Qt^=IN$NGmKmm&v(@gLt zcd>$&zFk=UeDP;JGh;b;Eg?Mu?$@D{sT}cPGC4{8!jDKUK0WXPn)) zm-3??suHD1%KW6RZmX4l_lMCM$viB&>y!AfScKxz5(cjF$9G9oVNR?-S>cp9mfWjF z!sjr7gK}{{$6@?!NM&J;?sjff($9+)Gu0CL#L4jpx)TKoH)_bqm>fB>?Dpt$Zp&<2 zhj_OL-Fn#}JHrS1Q&oCkg9nD3V;~>sGdP#u)A=GB`WAasMt{hJOonvWzC#Ys(D-}) zUc|Ya$L&u!d_FK)M_{|*4I8{y+A_;0Av{`_XC_wW1tOQCcKh{i(GPPPDRysl<$(ey z(vjRbE!TP|um|y3hf4g+-?zmLhUB4x|{p*}8M=f3`1%L(iU9SB;=8Fy<9#oIaa3VsEersjlYN3$@Y(SoS?oA=blaL#b_bYbTNNC< z)6;uH2X>Xw%Hh*aYq&l6Wi059NrSUbVA)x+?CLIcdCVuG{^i0-Rv{U^=DP13+b2=OQzjKxMLmBhK>so`jwuq^i5ZBCf^?b zegLCE`KL?aPn(Spl@Ky@&eAIIazH!H4NA(Fh&)XjoSy2q_y4K+?%$e?ONE3tZ1Z7H z1RmXb2IHi_bDf?Ga5Vb<4jhdVUvQ^H;JO;=Z3Eci>4A)*A+eeUvs+Zcluv*`d@Q`+w%WxR{qD3t`5NeHK+gePXFtj{*QbA|9@mo zYYhg)qXG?k5B%XvC0_N;#%?Qk2wfPuicVRk;QV zFlbL}7{%2th!~**^CYm4NM!vSk*8c{!CE< zM1Ft|WPo29Ze&ZM!ak7a1EoY1vu1{U zC-u3h_obXR&-Of>33!Q_hE$~fG~}@oqv2-rP2}X?Q!UK15yQ{VMhB*Xf$EC3l9 z9EbBM97UDO7BwjxV{@V;_?EzK>92W$;5#XGwN>&}AJ)uz_Q!2k)*8G-K1l7&0~4rZ z)dyv>e*%}>vR^!R4Ul#39HV6LfYnUkc$vS#6R!d`m^NN9iB~m*u4a}ePKdUQL#03? zo!XUu*kMdjh*?^z}`0bh)+@=j3I-N1IvJ|Od7Ldz5l0{CjbpK^9q~)husU65&$sU z7FUpQtm&h9>7_Qx8(2R0GNreU;#C}{r2}{Vclxb9@J$8+8$ zt@;*!_JL;EE2lG97f!$dRS}1LgB}~4$$XEPsnHxGL|e+yog^r5$>U7TqzXebA3uxg zVw$r**z?^<=ghaT#lU&rmXq(*z=`Yph2SF+CibuK#gdZ=GQ1- z2DeFnDR4#BXxREt&aw^Tw}g`?Dn@7Br|eFhj8!R-9n0oJz1j-WUkFHm5mhEez?aivynyM~D>93l(4+NNH_wjJ0P}ifCA7U_;%a?( zlFhTdnN9P^1@u$$3lOe+!U=sr_1o%jDRAQvV_^Or=7hkbwi!@7IDTst4Yl|(h-2422({Aq0VCV2gE%}Hp*2Zv(!;I_GRbdy~kNaNZWClxy zNJF&xew-m&yP8G!rA4f(iNa{=ipKWZ$CgU{X}5OtoD&>4=!Vm~>8XucGk z?%eCUaOiJ5wT+jo9DNEHHie}T<_P}m(vK@oKX~|wwn(&EHv_#s{CYqL_i3V- z%XCxO)UN#u@>(h(lR^j#GG5*KN_KkSsqdbnYp?)LpWhq9FKuT6PJ;bmOl@8bBht6s zdH>J`zv$77|EWp-6aNZ*!vnOJSp6#S)xW&;fG|q%4&PDJ2J;H1rJ6RAbwUEC`IIag z4!Je=agU)$mcdacvu>Sz0Wip^7Y1cgE3@#%%U5VA{$P6a_;4j7ztpWw#=qpqbTln^ z#^W=Tc zf%l>Dk^mC^Q~Mo;+NnS$?U{v0F!NUSi_-x$tFrf}kJUi$04%f{#q5CH$ zrc^~oqS$R=B7kRG>}#$J6`BRc*bG6p#Wh|QmYFK20z(&aho2m~yk#&9-QssRx}CFn zK+K#PHFMs+A1{vzGU8$ko%d|M_7mKjoYcIMD8P;AR`q^@7tbQ=NI5&vE#x7}Rhs!t z)~YgLGaj?7fl1G&p0|dh2|Ah<{n+U$F@*?_oXigLbRNhFO7mBhvZaVzzuI*{LD}h? zu+6{K7>L$!W#ECWJ`o~V7aP13%F-uyynIt?y5sJ~<1br(vdHxKbxl?%nYx(sQhsnL zZX_C5@ZLVYzC_aW^X=XR=G)Gj$zC5+lH!PT+5(cqlHA+|!#3M?>hQxNG=7OMD02;x zb069QV}3&Pj|Z~#-vXC!>Nrih&iWZ@4ZnK~nt1B|vwT?u#Va2Zdvi46pwS@zz@J%Qr&6l#GvKMW{ zeC$QA;6*XsN|)SB#so($2~?TF*2AbNYbVPp8Cgsb=>5FAJNBnejZ=_M0{h>j3;;Z_TEx;g62xEHK>kr+@Ik>!Ea_D1Z(!@>EMCfaA+U z%PoqCNi%%`WIFg!i}c)sVx{(CnwVYh*p+;`SXIP%<;cV3=N_>Yw^l^@LQgTuW2613 zZKtFDWVZovU!eDSF!i#kZRsw$nWmLan;6^%B(H3M)Jwv|6!4sP0neFNhngSDl~S(l z(zsw>v~=u+!=8#lD2EiSJ?a<^XCg(|!ZF&kl`t46iI|)A8{ucy!1BD%+7x#k@ zq#w~GMIA~G;<71kx32+zj#PAA6>=iVFD30Bp4GCvEnJ%8zq zONAY8^L}3dn*M!ql(Ho*wl|52mNcR;b?MjPtma)rj-6`~{iTAsULx-PY^|GdUxR>_ zW!s82PI~`B=DJ>C%?U01Hc3-HjKk59w-{!ZZ)+1XdZMLxho9wH6EiCq)1BwM{lR#& zJw85itg(;5wGS}9q)w2#bhm>%olpa~wB3CsveJLNaaON(C*a@P%x%?wfj5>i#k$)_ z@Vj}SX?8?RpQyPEIFnTCd$oiB4^)X=pjkrVcDkm%{rV6#pYGA;Us#uUK@J9`L}|M= z0qfzjKc_v~gs{{?3xzM%rmJ_piA>lLk6~ZVz2W!L->JJ`*X=Z$sBt1CSA1eT1+-c! zJyMAT#<55OV~uPiZ}L*j2L49*cI8A(@S|Bj6}SXJi4DT zl-yf$*T>s+*T2siZJt_ev$q@WeZ(-FtCV6X=)n~|SlbHJHh*84GmH)c(qq&+LvDh$AirXZB*^v@u| zjxT1j=Cl=lk&VB(D+8qejJOmIkE!9Ve z%uoaEA7!^|B}@24RmM2!ZQ}GicMGaZV?4&Pa5#4z%bF1mo1PRC=B}Ma`K;=}y@5b& z)xdH-@7n4f!8dg`+Cs%_KBXM_AYf|X(vhpBSMG?}_e?EEP0Hfp?G5IlrFO<*^1e(o zjy!GuuY^p}8(e0|Tr<#aQAgf4s?9QM!4RH>6AX2@148vg%={2X4jmZLsErk>MdLGj z%lrQ62-?ma`yY3!9b?qQjv{{QD~_9-_E`$-J9Qx&n@*yF!l>lND)Oz}a`FnADLZ^! zchmh*kK`ggPu=PqVCKXvf@c~C?|%}o%h$7PqN5DKlUxr-p+x}9I-cJWebW4e{ z@?4K~_sYRK6*;&(O^qXWZR6r(&?u^AiuXJ5#YSCqcV+_)GV^b<*s$67ag}aEWJ~@5 zWk3Nt;DETVkbq8NvQePRD<;3QF}A{rw@rFps~0z+3XlSG2C3!l2dPf>yXvLymU!G? z@F>{b0ci9$9jahBY~gA?+(U%;>&^D8aZqvWTwB%6pUTb$xxp(ikX+q~SF2onL26F| zrvj$oRIfGh7!iQ#1>7NokDa8fqp~oGnKBzKU5!4u9nX;}7RqAjQBvSq!4rr0h;mEP zAVe5)Tz$MbNHo|p(g~gH|Gd3A?NvahkqU$G8&9Up31;cP?)vuZxYh~SpBfCa84swS znue+D!D9B{P(GwbUwwyiOkMTUj$sh8AbJHZF#Hk~%|0ch;b>!Yt*l%yz3?5)qDwW3mjSt2w-cA9Fo?pEjO55M!wI_Y;$Y¨L!ojGB@bJRh5a;60JNv`}6_{azVK@$P^D9v-7b_5DyE=(CCX=zS++k0HH>hu*d_r^?Sg;e!LJn1C&O-py zx#XoQR8PTPgkqV5~Dy=#zh%)__yJK;WWd=A(6Qz|jdl3dmwtdEz~it}OL zdul-KcC=;X7SIOh^+mraAIirY1lNOSm=c}-3o8b%n%?z&IR3MDJCKyCbT(2GrYCh1 zKK@q`m>Wa8(_yFoU39GHNtmDrDst6weceQ+& zZNTGT6>MyIM#!>p(bd=c$TCqwjG{Xc=;uim#^KNPo{Z{XYh%MSyHK6pV%vQx^7Fj? z=FQb@m_u~^uVh9TrmrfqZiYEb-Z;?8x?cTbPK7|UpX82Y(3Y_9YXc!Zqq%A*xItKa+ z;r`h)hyR;R11MYqU;nZn<^K+ zm~lN0It|%*zOP@dbtr~C&eH%oO7>|=TK zg4&E>4|@v1MV~2DBCVWUpu;O!O8yTijID%Nk=1QuUAI$I^%z8a?rgT)LZxEYu+9ujU@k{CH^%$|AVy zT=m5pxWY|Jq+YT#Z-eO_D>67U(Ir7SAN)NW;E+gVKE)soFYau(B>)?Og#+-YBheeK z7L+h(+AqIvQ=907?LV*YWqn7j;Xy8%_T;5=Ewl*J@lPGq6_Y zglVOa&HNSu9R!j^rBv=)3%DAx0zL1&V~{1Modrvy&ckt@&hOPbX=rFikz4*65ImZ#SkzI0#3yMgIj@-DQS4AUV~H!ig$c{@K6AnyWLXogyswS9wPjm84Mjt+E#jT9@`&%sg;NrR^%ock z{0_48E#cer0jFN?2GS2Cs$6!UYd^Eu6zlC}&UVBLUC5$tAPO@6%x33YTf?KW7nGL3 z`5){Hl`Hyl$(IDoLx3kgB}8ywK3xYJi>?o8Y2^>>YXce22s%mdBH9=Ywyr(mfiCf7 z2u^*9aeeQwE`#xu-L)UTH>$;HiJWc=ea@=eDSftDKePV4{(P4qg6U1?mz`}ur4!JP zx2R_YWH-|sZUW_AoAbdR6rzNl1JfWfx)scuFWIBfcDAIvZyo>m#O^+S=WNmcWoZ8J z`yhlUnO-jlJ-ZsF#TzZX`JBHSo$p|W3u|MsPPqZS^es@93D8%s?(M~kT}^^tE`@$M z2fEG}wtSR=dI-==NAtwA-OnPG6oH#uDXJjJhJg#I?q?rgO)E?SoZ2=&bafjoaYVP5 z{|NVJb7Fd_!U+u^b+73^KwUc1z@w0JTVAq20PR#dJyUFP1x z!GqIIQc5_lZ^yYYsg{7qB^2}h4#qWz?#U#ucIT#Jf=))$SE!mFw|XSH1OP?H1brMI z1A#$d3v}3v^+WQeo*;;>{JX!)6MU z!fE;om(wPWB9(`L-i&N-m!+%F0danxq;k&NNFVouP^tZE!gHg@y|%ia0B2^yX3`DN z2M6l0a({Rsmop-W4t)hky9cxIPI7vevgvuR;05c^>cFpkWsRWV%n@?v{kAD^y$%>S z3b4KbR47nc7UxA>`Gt0t`HSW*^j_yT2EBHDJ2}g2wb#vH8}Ksb_bJvvPr=D+F8njt zP0ZaA->PtBFlDMDhE#4QdT(&Gq>z9rrAc5-{4?-rEAmQ*iV(0x3ImGxJ+t7n_1^)OXFOr9Os}>d$s)TBXwp z>&Od&iGg6SHTY!YnFSTvh~%1MOi9Yliwk7KGb^y1<12a1a6EL4t6SS zJTm-6227x$i+Tavp&6^?==249AtIsm&ShtM;kzJLEtwaVN$eO2&Av4-=lY-Er+kci9p&k*or%uU-J1=JJ;=iQnP0;KfR!b2Lxx;Z&5lX0{9qav z^YKh`G{V zZkc~v&WwS!zsx6RS1g0F_fXoI9i|OS&Fw7mWq0b1sDVK? z!%NEa8r2!C{Q_xlurW7C50iGXSz6#LuK9D;zfjTbhs+~2pLEc>E$ z{I_HuJ3QGJrF!KUWG-(uMn<5ORYRN_?#V|T09Egzz-+gHkI8~umg$5~MmrwVPJ)4m z_!yVqK3*g2Bz~)GVVp6_-#%&n^O z(_Wi5_7s{4b!l_@?^ys3wV{Pa!s&&76(o0|GYCo@S1+AL-N>%?bn0n~EU#X(G-LlD0uW-CM9#Ax6$nw2> z8ag?)BDst+4NiKj_(?{w!TeP4vf|6{N$=%Zn=i_7VPP9&cegXoYV;c?Cpl&!>y#^j4usVx*ah zj4H7{ZVX+Nf+GRm5x!jC-UivJ?J7P71d<8fjWWdzAS? zjpSs(;0$eXx&1f?B8uAc^97_MoNm7uCTL8v+v!yPV#?I6C}vxe*Y#wu=aXQgF1?CU zQsCe%Y@MC{P|43mfBAxYDzCH}@>E0C3T&+%_!Xzb4#JrL-H5Jr{*Ypr=-99YjH?Ik zzlovxN%6-it;7gl-(T~ePALNJls;cOr6oa;kk8AP;i&$RgYl>Snvrg7^lU<^EAU|U z8~2-swpQ})c#X`Om8~uB2t4vECW@H(A&1QrUHu)Xz>M>>sPga&6)o2=o>Fs$Y9C&Q zMtl-3>s#g%HB;zpE+#FJnZIc^@mShGMbq8C2+()4)a+`~I zQ2Jrq#I1^}i*otH92yWj6XX9T*25-9mg}uLZl8G^Z{Ms7+=nfk^ooFL^nl)nPZ2N1 zcMUVlUX7;-Juqu}xQT^Xr^3~I_BIK`;AX?$ub5;U&8jF`B9XE zzF)cd_@E4x+pXZ6T_2RrOqt773cpt3vDqqndYb>za+c+jo&}Ot5p9;$AH%kcS&dM?w#@O=t7WNHfbSlrkzhq-Z%jZvf$BgLJ&rmw%x1C*{ zMFlYBCwSegWXSt=76UG4uQz&X>$|`mYpWOU?zO#5;k1wsS)9ZyyuEqeE^-`k0htnc zRWn^mE`3RQ4)aY~gv_jTJ<}8G%JF#1kgOfDq21(>Mln~l|Fx@YY^n99d2m7GNwstQ zR9Gur3Z<-yvKI3bM!|;$eI1(;_1fp=Gc@{h^S2&^yjQ$usL;@}vtoh$UVlH;*x8AlNAAx989V|$2a+)VG(B#yzVjrEu?(Qa<< z%^5c?l_1`uNuhQLG}B)01(f@$Z}{8A?Wg%Kd`hsSip3G1VWEDkF*j+RO$x=b8h4OWDG9*)o8Qbz{a`FwtgjDfML zowZ}v(C=l(!Utg=w*WH{OfZ90&bFbHOS7r6Sn;hxgFz8f^qV1DnJ=Lw8V%?3lkq-r zb*yvCLXms#HZ73ONbt@`9A;802Y9&Zld3BzyCa}9D!sPBZb9g-YZk#6yq(_$gE}7F zOnUvSv$t`Oc6J$J%T>-KbG~VSjxsr9d8cvh7IyN@8s2RIhTYHKwqP}t8jjQVMeK}% zyoIEHzYym#Q-d@LuFwsnzrRSlZ1`kyRAmtBng1mYS=CQNe59jBsK6(2y9$CAgPjbrERKa|qGCr$Y5$^ptQ{0y zBg9;j6lY7T6L)^mNc1mtN2e-Mgs)VJO{O8zlj%|@jQDMZ(GN0*SG!S*nPeVmW%b%O=8}FAn({7sN<}o__s(Fan>-i9e3+ce$)>aF}ED&X3j!H;dM4OFldtuHB z8^z4GS!t1MfXS*Ud!^(2Gp2sq7AP_mo0n>;TF%77RZQ}$e}v}s2@og6Lf`ZC!EA;* zjo860{gJ@(X&MOs$y=Qn+r$c$$4@%ZmY#3k^?HeFGj}l=PlE)0xpR9&gi4sS4d%wLu_@hLINnoWC*~*JY?N#NL_+BVd^(u zs)`rXFTW_~2tHo?{zt&xT+cHD{-xVORVdO~$~(Q%1VwZ@aRpHSi#wKHN$xNuio^@W zSy~9#g-ccURNohZVQa7YK*!zMf<&reM1oa}iWha&wD_P33!+R=M82&Oeb8lqbQWk< z=UmFK(a@7*Iynil63M``3Kz00lq;x(ff{yZxMn_xjJpm_I_^ir6(E`W5w(AY*xCl6 zn57m9+WuQfg{Xw#63dM9Ae=h((JcUYszdotkMU3~q-u?zM#%f+&?96ihL9y(RV1QrAb~$f7E>agr$^%QD6qB#$MeMm!`F<<^xG|n zX?raHr%!;t=`dEfk*ahPG z=NJ{Iwt;cr<;4zH_-A?<{W&wv&e+iYVt>SRQPWV&;aznp*jfd4Piz?2Cap?7eqWNi);bY0EQuOJ3XUB zsDm06@pNwwH-1rI9jsdzE?UGcTz~OGK?U@^&>{n448KTb_gI$$_<4gJGH||S=)%tl zJ%lViVBi+XMA1SK&s*VNnORRq_$lHu`;jYm-7S8FdYT^|8Dhgl`yYBm`$*?SnX?_@ z%rgI2j+6TU_;~>^x#T$)7LAT6?LCAn(xd>eQbjwC1ApE#+%ryaw4%~nHnCeAzUD9g z&rD-2{?6Dh?X(+=>|ChDdCE^e!xz$*1el1EPA%WjXFvV*Gw}QN$jhxJy7`?0-yq1C z6GvDQrE>=gLL-RKPlmxKGt4IFW$c11+7!GVFIVj{=-pRrWinWh+TcqWpoXjaA%}70 z&rK9yW$hK7anRTgbJ6fKoB8#XP5dPQA#Auw3D~hE_#=T&F*jOvN?yoy1yE$akV{Ln z*-b)$pFygOvntt{r4LC`w!0M>ZtQT82z~ePsyuMD>Fmzv`>MBbd&y_vu}C2sf8$E5 zv^G)@+=B0DG;AfAuA1I*4;;Sb`pppVa+T|`E-{zw~*WIU8Px>^6RWxX@qiC zXwM$?RxhoO=O_DE6ukr?I&?&J@Pd5^!zgYa-_!gmQU8fxoEbKI%`tKNd&+ zpuOH)i`v&h1^+*OynS5PccBtZL=FE$Vk@XiB^Vra`a}W~z6lV_#ZDJ)Bz4G)eQR(Z}-$7e9DtYT=-Zof#1}iDl)Q8T@{~ zDZG>GTrsgvQ8~YRdr;%*Xji3TT?!&PJ&Q_|m+DmypC6cRCKj3}0~b&c z=YoumwZC>yg@K>>bV8^H7v4XyGr)l__#tX> za-n1t&R@>tFTvTo1$+^Q7a+9TptJK0(%7`zTy?oyJ~^E>3{MvNr&_(?o?_O(3k1S8 zHnG6s_)$?IAvU7;S*u$A-8y;x{xFNOw(U=B{bF!Ni-j@87UG@0(CjXPHEo~==E)q& z+I14lf;D`H6HI`1S$9QzCx{LTUJA9W;UljiPcF5xcqiHvTJk5FTj-$ahx zXtqsqp0sSF7wZIFvdtah2{kMsCnRVN^o^OBEi(gflKw?wwYGp&pi{s0cbEp7{vLZA>oEJ4hoNmBAOTx~I z;hHgf%gb!tbYM=jfs_2}=FNW@hZh+!;H-OeGHf|?an*ATcqvzJ#!5U6!F5nF{2@<#?zLVS# zQ(o52Re$s;=w#)n#-@{EJNKIfN&5=P6%=cy@G^tKMaT~!ViE-`FhB@&#E~(T9fUEK z@wmp1Bx#k~w2gSmCl5q`yBZ`13;#o%U#Qznbg1C!Q}vzf!FWs%uS;3NQdx8+7};rky@>dXll$J-1}s!mTjABF-gu zG*+tRw?M7Rg*ZLnD~8mHZomzLCZxwLklR@ zA#;Tq6@X#-Mk~%nQmo-NFF^$Wg+9qNR^bLtRCdn)fyI!j;c&0gfM@h3XdFc#T>OO`uJ7IB2KLl-D!FIJa z`!kc;giqB2jOhWTVa4^@pfHV!68r(-XxmhH$ZM&=7czyt268R9uV8@<*JN5IRx$x# zrL=D2<8T4qQ}CMW8%?_{*1)O64f>yvTL^re_W=NJ9wLrrc0`HMQ4ju|3dpaZ|*|q2D z9-8e?+Fo8i4kdX!p7A*<0sbc$Fn;D}RIl4mRG#nNW@wi)j=DedrI+J=JqkNqKt&jL z%6mD^7FSKpqN3sNBGiiNqo*Ct_!tTNzU3_fc*B41k2vNOw7NJ!OEUVy^r6_iw&_0#E&u^bUVJ4D$Fbd22}TpBI_wXmxIRb{;3`~3MB zRrO@O4OJj!@BiZKyW^T%nzj!pqJV$}QIW22=!gQ+5k!=#KnOi3O-ewzbWjliY0{)y zs7dG$Y67TqDbfOl-g_rBA>Rhi^SqvO-tYa#A9rrq?9R^4&RlcN@YGPJ2`wzANjrqq zIo!({=a=S7B!w{M5w$fVb8o9mp2VBfV2Zzw{jlDTYgYMGOl+kE9$D&sR2+wY?dtia zK$1`UWn*{>1(x7-+zHtKX&d0v-VJpEX>9XU#v42dh_cJ zyZG_K>7_?KbFFf$4$)A#r4WzN*m#~8I8Gq0t&eaIPT`}jcGZ&PZ62KDw_j^DOxtQ| zO>=1;-2Dz4?@V$i)+kIaQBI)2u`cWyb$6(LehjqQ)StZKu;T8+9W} zQt4H=ihO;zM+B-u;lmpyZMIh2`KBg>H_!HT&Y@*F;;9Kq+)m_(q^Q1j*&K)UR-x0f zyN^-J2q2@5iH*JIp>I7( zO?_-wKW@?VVKI!wvsByU=bvsdo`O zeg(@5lJ4wFT;f7mz+C8aMuz%?b=R|-E?oU`1Y7)Wb?EPr+}o`aq%-i@J3u8AzQp{I z&q?^`Xs5&8m-O&~(;E;nIyd6wwzyU`;yIPGT{RUOF=tye5%OIHTytUpa``i{r3*%)kC*=ooRLmC zk!=9+!(TnqJHK~EeTg|`er;MNKslMah7x^bErrSJ>y@;O7Nfe(=1Qr1?Pr+=4$oI*$5Ce9+lDbT0eb z2fo|8KE7*PyT{m#=0c0vLrGg{boPNh7jvpWQ)~Rs=Rf5G4I;80goH}Jv^iH!ciCWn zGBnL|vdTkt*E&G+g4nJ_a3XGafZMls=9loZeeZ5v&lr}@5$uSrM0o2vx&)v^GUfY> zzH^G&uqb+y$>P3lKjH!UMNdEK+~#k8d@qpX=q729K*kU*5?b>wS^f93t5*TQ z#m=Qmx?TVFWdYAf-XiWgxbr503u*rG_V16iZ2+?z@^*c~^v`e7=KzMIvVx?+aYoWd zr{ka7_TQgzvGZd;gqFx*}UY$V0+ zzrQhkNAhfC1;8G;y)!n~_%}TH+y8E2C(XsBPy+I|F~E=~R7rC=Bx!tImu1aAr}}qE zzYvhGbt?!Qko)HzNwAURqC*>@d1JnQ+7C^LBa(7>xKREr;8`R zRtME7O`iYf8-SMqg>fU@y$M*}EC08~ut9))b6N_S|F!U!NA4)e?NgNg1w8#-q`$4v zJOSX{{8gqm8UHzBang$HlK{}X(7>mZ^v|jOeRXHg0J*$AyYR2?kvAfF6MP9^MUKsS z|27=IO@(drF<6iw_SQ{Og5me^1MJR_OoItYx`ufyOaD~Mzia7hut!WhpKbp>1{m_5 z0ZHv%kq&j&8@u0j?|(5gqi0ETsi434ubAHM0+@>rEol@n*ux*;|I07`cj!qpknSM% zFj?lGdt^;o5#|PIMFJ&9fbQ=L{`>mbjsbnS;or{wPiOO@JxO;vT0z=`*&dp(@$b;? zHyJo71E4zuw-@9gD=~qd&d%NLALOHSFgz=o_AkptV@2AvnJ1(}F~iqh@y|o^-vyHP z`-|bxz}rJH4@k>ZhV5-#DM8H311cSs4 zdlut4^Nr7*%|`M^;x%%qFdCNQt!Z9UD5)ien8th){+UI?xNr7)e^>W=Nu+bfoCvhk zdcuO_Vf@c*{`He0KQQ-7Ps!0dpQlzWHL*k2%H&2aob9(bmwyH0Y(48La;}gZjTBmm zYNgA+XDN;fJsLv3@1(a#8B}(+PQ~^Om#t%M z?bR41>~1Od)u+wuu|<6+A`uh{>!u2Q>4L&w4}F(0ZG68o$kE>sz;)^Iq%eXc1Uk zu0UjN-+I%!E5$>2a}NzOD4@f$==aY?zlxK!)>_KtZHs5Pn?pnr=FxeQ=m*l)Hbt_- zNif&e=O=3TZFGAVJ@?ZqrD1o&n&L&F{4wg#?RyU&B?{gR$=~3xL#5TgFgL6hl z)g+{i!<6Zx`4K#6`i|V$F(1D$+@w^^a+ER~A{p*ucPvPu$=_|#?~eg*NLj7thZC%xN9HCfHTzMjhOF`)n%>q!;?E09Gg9t5iiE>oTkXK}dQBon5<-@$QNG zN8>_WA9afKe2)^*k|*7wAyX)y7KQxi6kxso6M&nC*5-$LJ56*it?z2;_HNr*O96u$ zuy|I_Rw?32nQhJ+*YEKR#j3)UrQnptla~Bl7g{CoLzAApa24(6W3i5=t67knz2f^V>`?#jtcjrF$Ss+nI`lQIFiBB$)rNt!4`XnO=f( zp%=t*^KFpfRLzLNSd*_{D8X-~A$Wf^3C+vae11o=vFuj!<%1CYw?v znkP41$jRRbMTof1f16H!Q9GuL;^m7>;D(0a&i&+c-zjRETOO88@Ki?r6c#E)xK$=D zO$9-k5N1KkbhDL}E5kP~*Uyyk?#p^>xGKyrc^poQ{x?@|T7Gi&>C&cGQv53$^>VlJ zWab$V)Y+dGPLti-d450krCs;aD1aQTw1AsM{TP4M@9O)vjXiqN^cvi9zWr91VV>=p zw+2%`nvA_db;>tZRrIZ0Zcl#fT*&ubj?Hlw3o^-)Ed27yM8a3W8j-|ZVw{Dz>NP2s zn32J`+fo`u+3&RUSi&s7sA=F~en(XnL^eUHprDv`VECzb?3drOTl0 zHI0>-wg~0#CZ11}sJx2A*iCKqg@2(z2jn#fp+&WdPZAF*#8zEnuvqy{-JoGqe%38q z)=hHDgQn}PFUX?=r$uFVo{%}q-3brcKZuHFL|GvD224K4BExc>4^fu<7o|w4Et4-m z2Cjt1r~WUK>+fF${L6+4S&$!fn(bA!i&dULrFL>K7a05O_V;SL26S`g!{6@31yJg` znhDCOG1~_AH>zY89-ua7lTw_vL+4JhE-0p8V1YlaMI$C2!uCrH^E^s=9X&*-bz#Gw zKDQsEuHf>lW=ge*pCRHe863=}w3^4OhbZQ2CxF8nav6VgXO%-nldB9SH`1Q>&K>X? zF}YvA13g_pZWNTd;I~*NRe!#Z1P03u;`^8{B^l3cJYUtBC^yIRidbTb# z={#3>$vypdO6|8hQ65cAUl8U@;ox0qoVs62b+)A@_JPHABP+h_!?Uvy0rSJZKFUS=%VRxT0$I2YGIY4?$J*qnUp2CW z6{m#(7gtI;Pr&L32mNi8f4li7KLRDc1@&2%OESN0_S2>ak22&p%+;CvUx+*@+#lAN5ewok+N zpI7A+W#sbI<4LPY>}kKL344zjq`iLsWPH84EDujhyrUVFd8`Gs#S%j=5z0%G+i{d$ z2Y#EtYArHlebsg;z%+Mc1sa445KAetcynx4R_n zx6FwI5L*=k#Y<9$Mxsd&(eEDI-ydIsvvwMHGRXv`P^A1drxOA7Q472sFMI!{UF&Fi zUacDu$-KV*xF-2};WtN_;xP!(CYtCyZ4~B~6;|!En3rL>)%nci1uVQ`xZRjElb{!|)T73ff*g-nJ2|j@dXDApljpa!2H5E49 z|K?@v)JVM)-flS}Wj`DH$(<*bT&hC<+Yw>xkAT|@x5j^P=?^ZRt4W(LeL>aMeny^+ zos2oJmmUA3g}maR2k{d9=-OZDDF9awy@vk(#YEEV0pN4?jT!iCIp?ckZuo9>H)bJi zkhFMisr~nSiRl@Nu~%F#=A{@hWJ_PRU0V~s*_t1kOK&wPd4o1b%`2lz96KAw$LdJi zuOXMMAhz>@yND+Mw+|y3tiogH!skUozWo9?B7RqI0i|=5k@Yde{fgX=}RnmGQ!O(uHLMqP7OKic4PJ>TvH6r;H3HfVd#rjhlhO zG#<0k!ew>lti9u=U=uCYjXFwZza+>G{?h-pJ}2YIRt58htCt z$-gEKyYU|HkF}KdwgdC4;2=Hosl#rt$%@?tpd<;M-c!sbWTPYF>{ zam_HI&K38RI~UyCwwtl>Vv>3D;4ht9X(HW>d}pYc*LMt=!&@WzKd8CkT}GE^*PXr6guT``d-r{Op z{b|eIY)3!zL#C_OCvx1YCi^7d9|V#S^g+7!9&_l{>IMZS860Shj9=+-ll*Er$f?oz z_=uSJ2f4v07AiRzg_HW4|2Bq~`}}1WdOl6=g*e*kh8Oz*esqevz7O6jNbCtbS;W)4mOcZfeHQy*g5QOCEYDXSuj;WfR?J+Jl`l zf!fxxojlLTc;B&5Sw*$_CH#x`ob6}fcO)p|#Ge4YW{Sjep{bpLv;%aG&)Pmle#d=v zdn5?#LL>=#^%f;Z)5@mE@NRnEUu1i-!*#A9zKI?0k#R53RGWDZ8HeP1b2u@p(l}J} z)3MBGyWBYtm)pk^*S-CQwn}nYIDq0H@cEV?mwnFRpBQ*BxHxUo+>lhrLVRTd6-&ip z!K_V@)$lOK$j=`W{fI+^>K2rFr@dSGp1Q{#QeO@8NeR1GZ?j#Gc)Kdemyt!Ki}Hu_ zvl)xJ!(G3HbV=`ee9*)m#Xgo z3CL73;E<@3Z;GCMwtfQeI9#Az7eI7t4?Y1Yy`|J?Z}8p+JblN#^oN(+f6{(r&|VF9 z+GYhKipV8ABD4mcGDLLqI?l*#-;N*LDS^!0bee+q3B+j=r>{gF5O5|E6NKreHidlX z72`jN(hU;tnGCKs2a{qG@tWC?s$Qy3R8u`NE#`yo$iIIW{F0~7v1I&+J+UBHNTx)q z&*-|Oe zok{VrFU@{v@am8pT`971sw%d*&z5mOp*S?@z-f6X_xj30&c05QrER(!UDi~3|B?49 zuW{;%t8p~H(YQ7{pK-5nmPSf6;D<0$&(4=thAd^Dp>SGgh6vi{NF`YU^xO4F=orNbFp$nCt(cAYs0Lc3?81Iq*E|u4dNY zwv*T7vO+#LqA^+{WaC-a-i4YgdC!p=Uz$S5D<(=diHeR(GAlLD=UCKBq-Kk!(nHPC_XDtvQ{Zo1_qE3Cvv*3sW z0O(ZbgzRHjAQ*I3qh{R?VOc7IR)P4>eZ4`tgbG$FnceY+5W6kL$%}8#k>I}A1hG1U z8(g4Zzqi*WDG?tNKZ0djjBV7;W^ph%pIxq}_kjN7%c&VDzvi2E+FT@CkbLGOLCAM* zYnMEBqq4H{m>IW!b+va6g8!Uxlvz0)(BT6rXI~J{>Hr;X@A_N9(6R4hz9ae(Y40Q7 zfGrU*=&J-wfY59nWA!ESgPn2K%1?3iN!Ke4ASIAqMVtXoAtKworiVQOJ7$zK{T{Ar zu_BP-H-PUO;HEShp6yTX8khE1Mb+(2ta^=XHoc)Vyb_VpM9wTLV;E@bTAJN-JBW%< z{+`{*ZMHOzW`EXWtz+p`){m)OqNLp8jh#~#MPFXkfAGmP8-E*wpB&0ZJ#eU0tkxjI zZ#6PBS5U^NKDzz0xP0%sn8$%O(#La2U6(J;dOt(x_>~$8eiD<3zR5LUD4wu_;Wcd~ zIZ5UBA<-6Ao5W7(&FM;AcFlw`g8ZdIyNq{tJM5KWl5`7u_WfX66Y}(T76pAQFD#>BFJD{3L3{Z)MJ^ zZMMFWccJi{N(SQQU8kFNTZXCD@m9kT2g;(D9}lHsn_sTAlRX$}zzDMFofJh$+}FIT zYpZTa`Qyp_aN(CyPq~yKpQ4qF_OVBr9}xVa0=~6`J_+Xao~m}TX~i<3*c$ls6;q9A z@6AfzSgJnAuD9poxz*Ky-f@8tKh}4y=Por;^@r#1#3X5CzqzCnK4^t9-7LS(QsK6~ zdhkLaoh<*zc#80ulA~pY<|!|o0M31_5_#&!(X!r?{ak(z0S@3`ovB{X_vTc8$c~O{ zeZw_@t|8_FuM@_(=#SMm=~3#3R0Y3ilF|OOp!X8G5oKejGZ{61hGCxi^nth8koAmU zvL$`b ztV>VyUD$PXTvVZ4bNj%DN-g%Quqz2^bS%~Dz4HaCk^UKRD(9sGqpVE7-xc5WCBce= zUG%CV*UceVdFh8DWWZMU^d~CI{pqwB4Wgr)p{>44k+5l$s6j7O(c~qPAZgP(Y=)X6oy9>V(~wnwKAI-hZ}jWu{xE zJd~l?s-e_8!(b<{F_}pcmc@~<0QU>ujpqwHEiiv(`b-V%0kH?k+nRwYxhmn(cXTV> zQ+c0@QA|uUgeY5#pbFkUK-ltr)qdB1u6fb0YJtHgtSXN(_x2wFgXi9p=v~W0Tgf01 zOH=xX&uO2w2P4i`fZ{r7vN7%@n&$xm(D722>s{Q#%Q0TC`2q-+y16rR5oR1T1mJ8Xqb!nq0%zHf74qMxN@!i7N-%hK7XnOfWc zt%6s5XU9iz@)cLU74QjLdb>^nahu)&Th}lvAxyG+H-K4`aLyr-jpW(P$iGzN-@g#+ z4}bf(^5gtzUi$sIG6JlYP{zb;Qax7Ad&!tNg#+WN5hT5p=lsX!>Qbxuc3bi>bczf( zgV);Gd+KO3uv-=pMxs@sbDfUqI1;;+BAPcHakE+>K3h&YWYN`8adQ3xE;t!4(1C8f zv7#S<>ugZv?qc?rnGNBxo-6pQ;GQg|a98&ZY`e+r>W1fgGQ%{Oc}M7CUa%fl_ualD zJ15VU>Mvu8-1pPY>AgHNH6?Hwlv}YbU{C6nES+1XJM=kzEP2k)?@!*$=kV(7m1nI$ z@$$_wxAdU7q7z1Wrd8#>!Boy${U=pbFj~g3!`!L;w6)Ynal3LrL*|9KCuPL;g6t;F zQcmSt_7en(O;P(Ag%XlRsrzHOaCx_8NLdRS%pX2&ok`AjNYcX{CvnXdMM1p*s=sTi zhB(AsCNm-k`)N*`(5oH_%M%3fBt$9KiHJ-QStuoB?^-q-;$vET4W*!ZOZhFfzv7_1 za!X=P_yputFT_h+uby+oFZvn1e%S_Eo44N3oNgfTk8o|NePlZx7C0Kc{bxKJ$LI9J zM{C2+U^Y71-i8W(BQx`Xy=cJbNtP_H|$ zk-x~J=cX)VNHPn!P5;EkJ{B$JHTFVlYbxlB{>~KTUgGWY+x^!_N_GO6{+nz?NAe9Q z2O8(-@hjCCc_Lt}2bcaCFfSQO3*gwj3TU1-1Kclv<#f`5w8!qF*rGkEKr{o37gBrK zvrfXas?qYAQ$OJv8vYr^|75du(ogVInnjWu72$`j%6h#N(3uRn(p&bf%VZ2iYX}x= zo0aIsk>wb_nre^G;hW&cmc&$XG7!;6J?pI!^(nWs0aWiOb;xF}<}ZnqwkGN^J+i zXEs6yH?)GJ592UDZuj$kXg)Q8aq7{?_<-{qmrHYRg%lgYH$KDcN^GK%-Tj>*NfCat zu^TIJ+L5nfnCTFfip|bV87Hl@l`}Z8`)0O_APB^@chIfuC#AquDNyK1&~Ij@?;dGJHG1q?y`V`>x*^Tnn<2!V905V z$RhIGuq6KCI)e&KqmipaLwzd(8|&*9I>x2v&rfpK6tfRM$6CIwlej|KhmXJzhCH<+ zac$Wm3&6u?9n?_@KJufy*tRH7s*~vrkg)n5+E19M;GOl1aPEZUv+(inw{oN$-eJ9RngWxm!fx{jm+xO5cKiRo#!^1?XLZu-rn3>mS zen_ywHQd?IwOdHKT&vG;fU4SLM;`fe+i-z#@JtrB_I6HWTi??>c`3)cW`pw;#qBw9 zk0Tgn!v_66<}(=_IQ94HUK<0+oC=4K4Odl{n!-LnoKg++_@$Lsfumxa(hmFBoVoD< zAlky-q-r`}0f6ML)Tt$z=3bgHs1=(pyg&bGCi}-_9{x862@tU zZBIN_taW4aypF~k@yb87UsQcw+z__bR2se1Yre&4x?TR@fLy$Nb=E_4Tt|2;ZT_85 zy~K30S4=BYW z_`@|d^Z5bBV&LG4{cPzgTZ zD6OH&{go^)qqK897hZwYpBBAEsn-6ZOhL?8rcwpz`+A)$UR1>^J~c$f$XzWKbyL#K z^$H~UcB)T}D)wgvl*Dwah}tb0z7_FPpTa4{rgB&^%Q)3j-0gLFjZIZ$VASoNCSA>o zD-Z_vT+{i#Viyi5HHO95nC*sU2=m*NPwh*YR;W(fC!aFTEky9Wt$MMo@(-r#z}*Efp1=awnW?ArN{Kc?zG zhThAfJt(FzyHd0*+YjFmk?<`msy!I~;43yV@v}a!CVzu^&pbX%wReU-?dLYr!PnP| zTbeWbL1j0027>+Jqi!1WawH~%$S+HKzQ7p7FiT5)R#wcNUAF~Dt~#jBqM8-DJKKr4 zq@s!u`N+HHUSa8x*|;g1fF(a75ZyBYyW(-<7tQgW26&JV`pW?|VZ-kwPIh!+DaZ_~ zlzWh&tah>6y{t*D!i29wDi(I|#rKGT)i^6W#J^|nSR%-P;~hq0&mGzXD!2XIH<^o6 zHdzrC`qZFtyHnF|G1hs-KHOmBTlSwkk^7ln0orf7+Io|LD%T;{_vQwo)noGc3mpq) z2bXVzzkw3 zjMuo=N6Kr@d_)!1a;Ng`3THQh8A+>*<{ZEsw~J;l1F0Y3M-8RWki(+8ev$V(BlJ;<(+GM>{A$`-IBLJD-p9Gzf*W3z4?%xDk8jz`(WB zVT6%V!5g0GfOJ~*(Pu2|ef}w&>1C1|%R(wRuh1<4{9F1aUGQLsaVjAZ%SZ)2py=p> zO08JdxIqd-Ud|J=L-e?E=xU>tmf(*o9NWDxonpej^GZFd?Q=D+r z?PLSLDhC;T)G%~DgWY?lFdD--YHrL9;)!bylB%~|q0esEb(`5zPwh?c!s~_^ zh;Tnt5}b2ue5@F0X42ZUwdZv(%_!qkZd(j*N|J zkx3YvWV$?sEXX=_wHPkh*M|0AVvXaAVbaVymEsvs^=3s3{&3b+P!k;9gZZSR{v9MZ zrX7R>zlz&5wRv=UZIvHl#I}+`1xV-%`+2==L_gsbq%ar7AH7~Q`}~w*fBL(ikNLsP zpGo~3*!TceTQn~6{T}H72|S#lz4h0uad|? z?dD5Up(Bsc+L+i);E&J6@rb;!W(9lzDITdiAcmh*sU-xLRZikEV()B^H1LT|5oGz- z732==<9}v9k5WS5C0zEk*6z3@2>mQm#U?QBhw?LIcoh9?BG)&8l(8Dduj~YdHHg(n zuc{QR@7o~O=iHcROo;RzzV^Owax`;pWmom#_}jyTR-cJ2-L<9+WnI-rUomG|b&04O z6Tqf%uHN_{)xmvchTHPZ+p}91BF2HPp9iQ#oQzqd<#zp-B+5jaDPaqKwh0vxT=vF- z(cF3rcHGIieEpT%vQ5x5fmE!E&g{$VPr0oT|#1U8SH!-3;Gu$$2*h&-Mz;2~%7RcFPO{ogXigJaJhuB;Z(p#*+?ff3 zVf5Mu#4E{jhYvsCB&X`2Dag)5Gqs^C1u?@fh(zSf!n`h}N#Pw&9g3l6q_ot}`*0-H z)^Xm}!8O}nN(^4+^PDJk*Ao9Mos5wak3jjiSwatKI*1pCU7EYA)>q`+>pX!ht<#9sp zX*FlR7K$}`>WB2+Nnr>Nb`Mh9Q_Wc{4HQSD5kUq)qnbx=%(wn~;ChD{@5B<9Rh&K< zesgPSt^x04oS&S9y}}?nOAihVp~->wov{=L%!Q$Im95MKyP?C3`ZhdP!FLxhwytMF z5Bkxq`Uw*8tNw69t2BPQH^dw1eb~(aONJlc1`pEt`tMTO4f(Lzl^YcPgtwlxhBlPR z^m0(gXEQyEd{xzcnWq&YbZvI5-UH?w*tqa9mDF2#Et2_th_7DbOE|Qg(}A z5!IqfZh0>iU365r2*{eUTZ#wI1GmteLPvJJkw~5ee{tLQHta@I?IH`wk2D9?{PqdU z?IV{~0ro}x$V(FLCm#UyZ>@k%Nz)8U{9=n&RA8OGS2%R_diifm)PM>s)`LjB$VR(! zwkYJ#u%}Aw#3$E$DA_pUxs&v)g^CG_XlutF_FvKD2co}y&#QxPx7=_d1J<8rC+6Cw z5eS2%+F0OrXKT%6@y37;jb&{g5_@Y*|MHu;hkj3cdzU$bAn_Z=5uczjdaxqEzs))C z4bJeAfG!1II(DAsMA37GdpC9~br>!s%lPPwhAK?cm>d?U=J}@IuRNBpD9fO58twk6 z7vnVc0TSSvpW4geJlx&8mphgD5urb_ln*PnaT$$qt#zBp0YiSUBPw?1QlXuP7h;fB zqJh;z^JUv&-x&abUao4)H+gGYGRMRK^7EbT{1oL@F~-nGg)iIGc6j?OGiAQ@Q@$Iy zjgrUB8!G4vmSz{}q$byKF#YHEI7g=ekdFvWr^ z@jfj2lxsMtSSB3`JSj}-;t^Mw%>X?7r}kuyfmE@dSA5NsNpXFvqjGBAPd|bzv=AI# zU6afQhmIu}asyOKPub4YOHlcZ;gc*mXX}&Y!M&s_=%+ujd#0z@WF=-FHYdmCMet=< z&+1)du-m%sIy(LhZ`i}y@B%_i{t@-6aY9MiS(MeQw}X)FocbMSc>j=44K&=M2hAS7 zHSNBH7Wil!B)j&arr(rsUaAQO(Cl|V0lI!yB!9!pVn)efq2@)LI#KXM*Rmf>i+g%z z#VpVIK^mv0@q!`+z80R`jRgBnD+xG!D*8> z?Id+TSSAoo?@^5ZaTLkW|7vj{PGe7|K_UlTbRjFNiLb)=yzmXUMGBXNX^uNhVUqoG zzqIXzdM>VrMwDh&g{XJ+SjwPx(AvuPj4k#EtjLt(c4j%GbjA27Kv$V>Decyw zST2RGzdn5XMLsB~hmma)Ynd&iYHo9J-al8OV+PVh^0di?T>TfvigS5gL1UG-K6d12 zc$-WfwMHC*>Z_)SG;hx+?84q;K`Ud{V7oIN3;Uj}jO2r7MC#mef%a(aZA45MR7k6P zx2tn8x|PN5ZO06upFF6_-hkH+|54{IWF}NFwYt1nXawmay{?kvy{qkARtmZGtkqS; zJZy^EVli8RM|a!8CtB0jXf!&3Xn(;+W2~-Cpl_|kfX~a^qjdRMui%Yp!hPubrM)aI zA6j?mZjKs(zF6g@fZ3GB_?A?|SEa7OHVZ#Z5;A`5ZFLYfsO0P?eZMUPwdY4isrsdM zG1%pM>Ol+C1^o|ZPOF%QmO^5$mk#9;?V~8dWOE?d) zyu^b{srhO(uWvhatruDftDr4ZHh3ooEWfq(%x+aBS3bI>h*QRCPquDbj=5lH3q^rG z$d%4~K>Ndl;wi+-PN5`l)s2r!TQ$As~f)uAo)dgChj z{#k&G+&_wgkKT~1-g>Z--Mv~d@~i7pp{#DLZ5`;0D{ezd2kah2h{svP&g5_J711|X zY@$6*2M0l^*Ec3-Q&cS)!_Imc4d}b0o8oy_M{h0`fr9PfdPFUwEv7wH7ILagJJlPH z7zB^cHV6-Uu>?rK=;z~~L2_)5ZjG)cll)k|0Ykv}(g^?lgkA*jchqe7x)^Lo6mg!?td z7%-G95`zlo+(nxWc`w?8BmD>!YcZFGSE)_cpuV$h7jt_)vrr(Ke5 z=T2w@c7b+K`&^mRq5gg}tLNmKq|4-eg~={Y_U6qH7F)Mqk)4{NslL!p)!P{|w`>X9 zt0N2PcOe3OUpVKj77TUhC4S1%!LS=W6e1QiXVz!7%>4RJN{@}lo!C_?#V-=mW%#D@ zlVWeZ)-Q(bW~&NT_}zaZRqT@qx+>(_MlZ&YqUc|6sNius&^9@OVWm{I71yg9+Gv*l zJPjrAt6-caS3Y9zwu;<%J5B0mg&brt_MY{$m5mxKn#UuYl*CBa1edc<+~O(HUBHG0 zFz8=Zncz+Bl8<_2dyGGv{B3TE&w3>lp_&Mf@J6tF8iHBqGrJ7gwQ&r1e0i*Io6$pj zo%w|>;%An2Dqvq4NTR563KOf>SVPGpKC$HTCSQ}Bjclx^L+8jXD+k#%B9*VCVXbn` zD<6+fCLDNSU@X2E#o>pLd>0G+;VUgj1zZ*K&@R6X!yA+MEa}U(lsi__-Y-wl_#9H? z!H&}nnWB&KfTkL2EEddsLUG%PDz1XfiMDmU3EMn3sM@R$3+=sK| zHAUjZuP^#+0ltnRnF=|&FfaAqvsy92<%A@!DypQsY~Gmp55|z?!KLKDV>ZBjm`{uH zg~Hk|Wg$FY(MjGZKe$9qUL1!Mpl5ayIN4gc*Ao%2H=L!DbS92`A%hWeOJuBw!(2a-J2f)isQ)JPPc^AKauIr z;Rqb5n2d15k-5c4j^1GG3xyG17)kOUIsMAjXOaT{YD>cNejm9@jY|8n-b>d5dTh-l zqPuYA(^{SMB!#p9u|UjR|50Nl&}wfl@%V?M4tu0Gpf?pyn%-AX%EKwY=aoYoJe-hD zt6udpUN7-9f=?DS+W?yDO3{Pf%BHCt?BRHp$?iPP1TZS0M7k6F^F5sQ@yrQA6VQ#< zzE@m%U&yAF)htE{`OPt;c_!NhQ(^P6J|A1iv!nC9TIqnPap=KKuJvlP3_aN;jw2H%!r3=TK=D63ZY!hoVTp6#r4d51<<;78!fu^WekhuV|$c zACXw%-j547(o^Ge{Nd5_p|n^Ly`g)4->1KCpoB5Dd0qOh0y$#wB69bMeKU0<7KNsF07358fM>{Z3j zS>UD`3IELUo8DcJpuPj875y^77(sR1(XhAX1Eq-#=v6Qu6F6^`ZUgYZGg6Qyjfq^F z-j19!i+t}}7mb(=q4e1bRlSua3E2pPh1--~nwam1Z>X8Jlo2MX_p~Lso_-pNJJ^|Y zP>&G)=>@vj1RiZQd|G!U#kc9C{tBJ$Q#XR`lG8>sA#%<>DYoUl7v!DsaU;;fK@s8& zkM-Ot-t|PDZ;tUnn(Oq(H=I4)@0Pt?Y?x*f%P6}TDai2gushL2!a*zcC1a)ZSla3J zUQL3~S7XTv12GfzgGh|ayOGgo>oS3;v2IYWBXX}-MYgiq z#f@Arlv7+FD5C{RoKA3ooJ+Vp=&eBa7c;M@KFhu(u5WZ^&Z4m=!&g4ues|xS;bxhy zQPkM8aKVf$ec*S?8&H6bj>n`RHJ-+9m$ooNcRy(KCl1D3?@{o|XeMRL|*?F9mNmX2Fg1suEI#+@~C!T7%I=bf{V z`);dz?<|+UNfe!VVx#>Cfkf4Y(fY_di(|+ZS-4JuUk2GC;bizDtp78)MUXv=*Dz1} zYAM5RfXW<+57pt*c@4*Em_cr$t5mK=VJB2x1CiB<@Wy=s%sxmJs#Ayz-%li0t3{XI zubN}MxfKz(e6geAal(VzwfhX|)fyC!D7&!ndHQ$Z4yQY?pZJRAZBhsuDdp-FSE>v> zp44mGKZ(?=i6D5aRzLh8JafQ%$E&fsG6de~Fd-TONar%^qpF^%KE%DP6sul@*UcS^ z_gq^R*)k9Y{OXq5$RJhp{OEGHRePC$cw<7jdX5c}LGm;j5+uKbTdUic|NP@e-VcPp zCp<&N5UR$I!LIj&V-Rbv_UZ6)1*m%I%J%0kNDJq!ug7o9n$9{Bvl*FvPsZjNgZs)pt#}BD0P!N-N| zAWb80X=}>4WYjpbrQ+#edXMp@q6(WM~yVd zIDd8jZQD6g8~Rz;f0|q_5q%4CL1f$z6)iNKZHiOZq~vR{_FwATZV9fU;KNQ`J;@yg zx=&tc%Jg5dk|VT~RJr=|-2(NZAk`Pl@b<<=mFHf1|EFU{eZo3qhQ|6Xj?pkIv<@3& zvT#x->}Kw*6@RLMClDkDz>_)6SPjVxy)4>SOVy->yiImCO$GesF7utdNJBkeV6z!H zyY^)p*Et@K$)bD#jlz$3Y2@-7GSSpv*Yx9m9)w>DcxE{n<=$U@Cv%yVM#DFgcP!N# zL#uw5E@~fVyO@PYWGS^3{>nttDOdo%$Xrk=B+jRdcCECroqQirr3I3|$Nmcm5d~fr?Yd(dfQNfv!Zquspt<5uaaX6J_Ls z+M?j)dkU|=-S%+%&^h7C2JSul{qv<53iQHu{ApDMGDCPxcLHI`QqI$RC|I-z?TL8F z;<7cs+*4_JYu}%Dz@8I%c5N&khaZ0N&9m~rT@4a2|GmzOcinSgDu4=&_Roc2#GQ6k ziAyo6%Vk*8?D&P90u~lbwg#f*`E6rkM?L6WT38-Z2pmwIrxsfypYrqKSNs-E!&wdJ z#m-=T`WHJVO$7lJ>9L0A=c9sg68RYLV7pMiipC?^kNO!u`@N&Ks2o^1Wn{o+ym?rB*G zVUAmuD|`zP*vdHr6F@MILfvTRH3Us-bTgN)n?MZo0rFQuU71NETWP=DK;>&Jc1PT_S!~Ni%7miZhNeoK#K4e z>bM`2fUNKGjNYR@o_%O@c+&CsIak;Ekxbp#f{5$K$5>^r??9V#UHfb9%?Ud`&=EuigC1dEWbC8+R~`))!i zpTpnB6bu9+8>Tp)2P|-B;>r=m1DzK^%@bVoDD?{Jp2k$V?gDr14>Ez#X*pWZB6d*b z2)zxaF5f*EY=f&By%Rl(iU*I{Z9C2!)#F_i{7=JRE??O3K0uwi+AnNq93)MsNWbo) zOR?**-c(}4bJCJoVIzSA{tt3Qs*~Xr^X=a@=pM{IjXK8_(@bEx3(a?RY6Cth$}@f6 z7yrznweL##~ZJc8tzD(j7S?-=b zLH%Oeef4DBLD&Px9irlU?^N=5kjI-Ua2QYZK~(% zfspiPgeenEe_xO{f1qqaiFV+XN;7>^U13vO4-*fgQoHxh4?R0Iq*8K@H5TGoY5ENq z?A$>o@La-)W9Yh;FLnO8=z_Qfvv)YY2%Z>x<{O%JEuzePi|6j`z?WO=SwgG=CJc#v zUlu^SC<^yiP&)KVzM930wCb-^XcGi)CwAwym8s!DnsSc}^qff@`0KQ#?fErRnHce8 z5iMQV^__@mfRjvpV`}SYt|J^jhJFO#BLwVnL1`IoQK(0rGOf z82XDJb{%vS7u6@Rh2`10k2)1I>-^qu8FXp=Z)<9K*u-Crkpf&!l>^e@Ht2+`u2qtQ z(QL2I)&DuRd$Kg*K0daoZ{bdm?b|^aWR=lqF6ceEuHVP$BCb;xx)GgSFuUIU3D91I zX((j(ufKb5=yeV0l#wdFv}`%xu%I;n3sj_({7SW!S=0f_?arscg7SLIx*~Gk#>UAG zOGo*nNER#kzGFnS??g01iq_wae)^t*mf0X&*qz3W?v$>iH?`@_jb=)YSGSb}QqUkc} zmmKe1tzf{a8En?CQrs4$O#{l>@!6bf(9LBBrZud6XPjnG1r21hH4?kccI42119l1b}*unQ}uy%Ex1@An- z>)P<&wg;BdAx({{;?m{2GQqVUa^S95=wjFsq1s#ePqP{iREar4GD9H#5I2qxzp}tg zbcIgv%}&g(Dx1B++5!0!fqDy;KYS|`GbtDdkF(n9XXH5hKBKT%h{ z2ShWgP;@z4Gh)=R`V_RCz9D`1!k5&CpZl@Xd;?a>$Txyr^mi_q7ih=8#Y12kH1Ydj z-4(YUnpf}0y|4xq+$c&Ks;?)?gN`oRUTzNUiG^DYA~P5yubO&k=ic^C-0SFGvbQnu z3W^qKfFcKQDOKZW?k<;KpZF1CiEgJ(GbUZTMLUrJC%c!q1Y|~Ca+J|Hr;upY;2-gN z6^kR!)BVRGyx-MLiQzlh8(v!D>m_C!LITU~Gy=9Yx$&*8QU4S-kf4)#|K?5r-ZxClL|arV7?_ zZ66$E1&(waBMKpRSCv#d8TQg5k_gJCWH!D^qG!;XKX;wiHn=HugW1QDC_V8FR|L|w zt-l$jiA^+px96m*z2?KLVVs2%@!u?5Ns7W7_M7(Nn}`T&+Ll%hDM!E0pwl)_Hg136 z=OkR9N2d9_$;Sdgnhj@Oi^4a$C)ta)=qil}4CaJ~2&W&h;2X#WbtV-r?aq!S9yp=T z3Fj4T9yH`lOkJ_8hF#f>#~<|+bLKRzudGgt5CRp+`$x5`=|1N)E7X@F(!$S<6VMU# z8|UMQA0;9_e&;Gffp&Y&-!2CmXAP|hMc`9$3e!}5Dj&hleVHiVBYPe4cOfs8po9Ea zm*Awi3o<0?iZ1^&>{AbslS(_^4M{jU$~}Eq=2!Qp2W!qCM(^lQi+cJ+!rHu+?C*uqS29ef-TT z5|tEKO>#SvVn05g;Do-+c@Es=`H7o6z^+wj`F(u#N0H+5Cj<5>kz#e?Pw(m1+jFNl zclQ8RaK>1?-DQ-CQF9mrohXK+i9I+{I3JGeJc0@=C-4wF+ns}jI@O5m9nQLS@;G^~ zoe8rBY*!{ik!!8&`N;+^9ST;X?>evl!}#9 z?ILFkY&MX@0gDe~)WnI`eZJPq>>7CP$|6jI4!Vf?;wAZ}+B*{*R;Fcw4PNRBCH?EW zQEw#32XZ{!%g9LnEqfD#e|;Bw)KjcmSAR^6*2lZ>>oLvk6X5v1skTk_Kx8!}mJ&oo z2ULt^z_?^9^I6GaIF`F02{S2Vwm$09Jd0IgCol550==YH~Cha3_~z7em=h zot6hR@=KXk?a~5!lB#w;)z*bv+4&Gi?i-CX5nS4!Fp|JVY1c>l4VT|f5h~lWW_OK4 zV=r-B@Jvt;q5kmK1{XZ+o=F_DU19B!&}zA5N=7QWb!qJ*$KbY1? zfh$9}yTqB(a_ua<0MHCVVV#`|UhO>|U-;A2>ZgzL>@b!X(#1J#uzS;q4kf-EeMCLn;}Y`$xvi9O z`e~s!-`ddsu#Sswc=R)Hp5*)T8(Eiaw<-GP$bA=nGmRm`a+ww}nHFt{QbkohTkj`n zbc@o8=&>rbfr69*pu%L-+uj<-=>0&0nI>Fo>!e7}h59l@>d|4Ym(n@U0N zQFbxUjqXc3cm+-xY#?3%f`h+*Db85XzWx+;KyfLT)I+RW>$!tIO9})6*M$Q)$|D7p zD$lOg$q%i>Xv2vr#)=ac;;|#N+oRUOTGmlu>*@iB0}UB#X#_RAaLaC$H~XtGIlIrs zv{S={3)9DGlFujB*$273B{VXmaHlSoS@z7q=(PyRAD^vxRua9LcB);A#fjkebk{&i z0d^91tZ3rgfbFMGwpZD>Y8>WfLSI((E(Yu7eAw;XEJzn{^!Wk_cInRmMLWK<1MZ`} z%)vi8D@9Z8k$WfIP_}0xZ(Jw>mm8Xb6&TzT^{`XvtJm_7gKT^suX^JFs%ekC?B0o! z?1gC#l{)LDQ_r6ytf09q!laH)-!SpPdc<$?KaVLNcJ-Lw!>DmxA1jffgnV$u6<)Ar znl)Rpwcn)FtI(^q3lp6XWV9ro>utgs55_6`sb-Yw7?jC_0#{b1!zSz^We46qxl2I@ zox@>vpP&=$ng1dgfE+*h8)4Rb-Q9N!vpOh68)n;eQ++*|)nKlc)Md?M1RW@oY5P-gB9KmJ;g#S7>2>2TS&g!5Zcakg99H zz%WANTBq)2C^L_Y^P$yOj!C-9P%kFxymoZAa?9jH04ED=t;1e?iYx+NYbqT=p}*}| zJozAC`ySG`k;#$&Ue0H^)wn-bl8t7=+c&l*lk|KQw2liE_n96RKSAm>8WetzgJDdA zZ+uGa)0Ly|sBhh<9U3FjFl*%R!qTYYDm-o)oGC}&i+=p*RI=MWnRC?r@gRz5vxA$<>51gxO~3LD9@B7?A{oI_*0S?A+K^Q zM9&?UlC|xeIg^|m!*9AwnuwLJbK<2TG<(MV!8;qJBJdqzBFJ@dSGey=A|7nd3Cj=XH951JU$TKj=dt9NZRlEWbuBvp z>WIBk&F-eSQ?II3+g3;AtR}>ltAY>C`$ddYQ`2%p224}aeqCMW?isURJ#gU2)Du&# zk-#_!{ z_EuBa{R!)l!R4gP8J3LW%k*Z_`hZ)G4L)nCZQ;tG5XW_|Qc$;5i(~r<-vX9;q4@RG}tjY7%V>!99u5FlB-MCTW zizs9D$#rj{>WLB>VdBC@)KM>;o*k?fri8!tJwS;*|IZKi7?K2jO>~9*nqG7e`S1pE z+S`|OgG?IxI_O}+dsTjO`|`wK;>{%G(KOp}U8lB4)n^ONXJG|tb84xYQpTHFv7o6j zLGuq+>Kt2m`pOyTi1C^kPp=EM06QZ^W3R<<4h&brNdA;KWtnY?2V#xmE#Pnx1AbiW z4L7CSS z>Z%1hux}w!Mcun`Qmb^|rSM-)FPqN|KFf>CdoVh}E0<#N>Fbf_ha)lke1}r%9&DjY z9;eQg)>vf|uIMXi`*fEexq|#QoSDkCMN_9DJ8Uo~9=#txn0yt#Fs=Q-n;!Ly3@v+> z(4EAikj1WS{U&(#$2MD)dD4yAtYoQ|4W}K)EVao^Tx50i=m5l-iBt2V49>!o+s3Q< zv6Iu!(P>m0F0q3tc}=4c?slU61|&p9avt5l#=cjUe3@U zsV1WI9Ed;CidnI0+7Xbf)?^p3(P?BZLfi<*&MH{Od>OT5U1A>}R0UFYnQe+J@AFPv z`$$#}mvHoSjS1%T02hZO9dr65stnqrMGiYh=Z*D7Kb$PSt|aIMM&Da0XBj!hGEKB+ z+>7IRLk$6#6iitB52bOyfsEYH%~0JjkB1rb!l9h9G6E7I3cvTML0@aoUhqRNIbjV4 z6mk-C{IjWxe%4{qc0bxkZY@=NLN#aVT}C0s=yge>nVw;!%V1{|zp?iB3fsUZK~;X! zG@J!72OWJ2ossv~F_$w0bf{FTmPVStYx8E62xFGSSv&Vy?UACDHah9!t$Be}`Z%nq zD~m--b;(WAJenu36NOUe@Myuu*Iz6_08Eeg52n+HHlczB5G0<=uO{8~t>H}Y`P^_T zdW`?$Vg(a}Ww(|WB?}!~s!y|%w@AydM@65vyy!0PMB)GmT)7=3 zYxuL*Bsd{Z*Y?DbCwjz2+^o2>ZI82@VKHYJ4F z5bgHeV>`9QnMa56>!qE`KX>-3&Dqe-d{d}Yf@>aWWHxR1iWSFe6%&Sfwx_L&j5JtN z(p1Ut@%EQ02T27qShCT-uTue}j+o`uqiwzHQ8X_LV(07qK@yV%M~+t%@X++=!KDL& z?L@`;$>V6phOfS-l{`C}UwYRX+l;n?xzRUgSz5v|6ayg#(&T$CQTF8tNMNIt$A#{HKKlgOiplGgtnQF@K> z=FeKp?^WtVNeCgX``4qN3eoDqIKQfEeXBhkAWU)ObI~EKf#bW?yFzQ@oN@N`&*#o1 zDxt_Bo95p{wha6Jke-s_P3Sml_1(T5kh_~7sAW)uBOIMD;Ib8Zt(=D&pV zVB0<(A7x9j^#2l0PNfR#ZNho_KM7|z?19C931@=H{i>JbTpc1bQ#f=n1fKH2TADdB zB#UseAqz-{@~yRMnZ=B#Y+;$@&J#cWwgCeqenEl&J7uP_aT?-0&kLI$S48lhd{RGD zZdCs-H#&Dd`e)Phk8xMkghk-@aXS8f#}E*bmdIqI|As-Lh&wBZj%MEoZiC`H8JhSY zuVF1QI;b=xllt~;|5W_Nj#oA#tQq(u2EmpCrF3_wmq53 zs+nAmqdQ9Eek7XEiP#Ju582M?2V)@^W3WK_yVsXZ{Px38N-z4daKHN`00w6Lhk;M) z7xDhC?H{6VLCysYuhsS&9J2 zmi8OfTZZ-*D*H=lpfi*i)T-ftXY{q@a1yTvz7uxe#elMym!uh9Sd11Y0;7t?LXhc}vVUnGNY0h- z>pfsB0Z;zTUeuUDJ~?Xk)S~{AiE{ri-Ou{#VW`P^%!>m@2lWQp+0>wE1xkouAg$75 z3H2xYgv^gW%L)0N_P8s_%wmwB9JNeJqnGJd4Yn=QXW+^EJu_j&uh^#WaTf>q0XE;X6Z49@D!-Xaux$0Zf-y(Xy7xlqVKa7nw z_h%^b;s>MSK)+qh+t95i4<(0}aL_9nvR9~e{9JAAjD57^2|fNcG$=`m4K5Tt0fGYV zw|`4iy}$V8ui;>jq6~-p|ApZIR8bVpfSPFkNNgUv7(3m&gf;tap&>SB3X|mTiaUtl zU4vwne=Y4v@LxZw`Ex{WgUUYvV?Z85f!!PnhjyD=VTrB|JgClcq{lDO|cc{PQ0>JL$(5L@A3N`C! zL?Ln#7&W%B?SXb0I=_H){7nVDUk`Y42|yvhxPQ~_vc4$tce>w@?s=8G#QX0ofWHJ_ z&G)qr>Vv-Z{6_$GoZ+{j?F;k2c-)Ytax0={zx*$hC@!;;)dWhQ<8bKc$=crM2l2Yv z$0^}gZZMU(YGB^Jr3wfT*kM9!rA3+5?4dE(g*8?8w^3f`f_^-vjmRyoU}Y?(4R6Vj z{mIX=fCNU&9{X>^ZH<^@xkLezm6oID$D0LSCp#J~zE{N+m)&%J*`cBL8^1m?XizaU zOoeb!^~Q1KC2(7gpkYxSS&-)oVwV6I3(J z_C!9GBO?fxV#=JyemdTD&d1Hmh_9(gz2^-c`fp}{uuqEmpI<3Kd&}t#!{@xHkmC=4 z1>%(=UuF)JFgIt~Pt-nK=`S$tjsTgX555@9Q?BjJ&$B0>cApPGHiO-D^P;~z#IT{d z%?#zS|4I%&G|<^6>~i^=pJgaAX^vfd7_+hef1oxlOnGMCrf9Q4iFAf$mrt$u1MJPH#$v_(U5Ek1XNVK z6gnD`ET+QvYJ#w<%_(g@4|Sb>-)-|f;f3kF?P{1|g!9?TU0k2@|i) zScswbzLe1MnEEjX{9b^NB7j3M(J=rav%I+-04o@K?DuaTLxf)Wj{s!^ihUxqGo-!k^0^Q5o*>k$UB*n zp~-^IMaPvf?Am%Rbl!WdN1YT#DtdayJg>5(!!yzD-V>Ojx1EYwb4gKq2CTmwV9PM~ zPCb;O2P7FKH5@#@;%~kQ6hwaZ2$2+NgUP42^UjC^z5yA?YDY{N^IuFH9^@mG9wEi} z{<*dU_Vl$J{Z&Ky*ZUJOs=C(Z64)#sB|7F?+jRA@>IkA7Y0Or#RSlOEHm2x**o(Jh z;OT~#K1t`2_&2?L((%7mSOadZFM!M1PfcYKw#uuWR|g_^oL2)0#nR1tV<&$H)3`p| zt{PQBHfvKkG@mi2_#h`E7VR6>nHyG9y{N6rcNCKN!W&#RSAN;w2j5(s7}>x|QWyuD zFLvbB^ORh&nQT{q>19N&6$abYOn#0KMe2yME+DqZ+-I9S#)@<+i;?g;t6_%Ccb|~k z2$m+A{O5eA_)`y5R7gT>6QRIw%R7z6Z`ur{qZv51ViFfFpM|b+;?&pI6A%|=JJ|Kj zt-H>+XeSE0m1O0Q7ixn{vlw4=rKl?j1~DrpnymsAC0^A$16^t=duMg(JcRDW$*zNg z>0Lkvo2PusjMm3LfF*SKa_$%JJ0#(aukc2G)}QAlL?H_vbIH9giSQHv zCL-T@G}klAGzlha=yR;e#>G`TdiGA_T6EFnp@8FK(ftn62ov%1HAbP8cvIBuh@7uw z@mLK#mT^tRJ(_n(@toYZPrCY%nyTb;fehEdEA4DSPT6XxshO-6791-!qQ~KvPC&Ja z^`tKS;tX8r?ZrR(0gMGeD^XQ{AzX4cN!@-BdLl&NxyR~lO{aY`8b(7RYb?Cma-Z_0 zMV7%bl$1(s5W>Cf19U0fW_N!g_vJ7X&&^E}wr54@id5)?-8%Mz$8#l@C9YT99F0=l z?_a&e5UUg0I~x)soc&5zf0htE^E_oh$z>Y1>GvJ96s9;LzM_YH+_C&p22EXIXWj+|KF8y+aE{mtX601y zWLtU2>go7~zEYZ)Pt>!=5jprStdSha@=5<07JV@F>pAbk@T|m>S+7VXfdvei#Aee0 zrY%gL){mH;B6{!yyDMXxC`tk`&K5;)zgw`-`;91IN9|aN!h3J?yRtFfp zpjyAciW?+!*8eapMXy=D>TyV)_)X#MTUMantM;?eCZ#;yAsHRi(#P+Vk#N<1xQuXU z;S8>2v&b0NwM>z+g_XEOw>v8XS%lgeF6)E(trkMk@sGkgb%FLRSFSQHlArN1uxQ<( z-*b2Cin;bBXD(ftOYcU493aQBSvrQL^?#z|QT7H*>v6+?j-7Mw>V$a3z1`TfQTRI4 z6=An3(6?ziUGg@-uW~Ptwl5AgE>H!yU5Ur9Lf+@sc05jh-X^{@4_$ht?2Da7H}Hpd z+^VINp_;6_37|doT}k95vj0-NEL%WA7@O4}L{h$jVB4mf0v}C-mp8vbl#O?&2Cb}d zucEuvBJ=q>*GGWn8)(0iB9A|2>k2eP-Qp4Glf8?!`wcEz(m#vxGhNunfBfJ`CPU*i z*@GVY4@yRL1uz#^JmrMY;_r@zAiHX!xQ_aokEC}jDMYb;{{QVjzFMRXLb%Ihb0~ee}knXOU;;qTICmGj{9sNz2V} z{0q|~b^Y2&Qe3Vv@snxil5WB8K`mSK^)ivP?+*6@R@3~%Hu`PIe;IhL=MeX{ws!mj z?j$hD>RM)Xm3B^ZRq-&Lc8_)0XlSfm{laG|tBw%jChMT$V}Dv;)G_V`me7OKYi2p) z-nqC)k%PDAz#Vatcz-;?MV08sX#X*T?RxYm1W6yiQUK@4XSx-tdbYmneKq4v=8;8D zT^)Wf{P_#C_niND>Uh*nWZ|QQu6u@g>q8lVwz^*)zz-w`S0sa*u1A|ZoV5{|OhADU zFc&L6DWM&dxzO!2xntCz@W`mLxUQ{rlf^lZ2Zck;hk3&COmPg01InE-46(*&9>*V7 zQp{PV`NY0_xo2VbLy_TR^Dp-1`>svCfgbRHnMlz%sZBBPA4clSYAPuQe)rN5NOF&yN>`??4nLB_5%^a+R53b{2I zs}l?nGcz~W%#wCc_E^gZ4iU|+!z6DMJNxs)KA75i7~Tm~AKq_2vP@oZx2w-0F%1^A zj!%tZlF#OorTwy7ze;OT&i(oTMUQkA^RSqK%KqMKeINAPbZNQJCvD2v(AZVB=Hbg@ zn9-yolR_3I26YxoClN_51=QF6=R{$EZ!Q&?b1vy!9| zP>fp2kHAs_t53MK1>{b(aK_KjiGZw$t-qZE)hp;BS@ibkN@7=i+=JQtbKaxX*>CQH zJq!})aqP9Y7;X0!2EG8zFW1kwd%tQL zZ;n&KwT*np4%>s8a?b{YcR%?&9tYzhAuRuO|I8YRKPASnVM4#p^21|lWn-k=%3o2w63$FRoOq_+XZg=pgnz?eY&i?)|lS4c!8Trvn`>`-ny zQS_z3cqa&o1dwqasj|rMyL(=gVmFtE#oBI%n^VU__0E=yRPq)x9A@@24b`dRQ!B92 z+?*GnMSOqO{O{-PcxI8WsFUcwqk}4Uif8y8{v7nH z(gt3UpOO`@YnR}?2!mMQ(+G`yAFx|=jn4qm{f8;*yoBG#>yaG!0szvy6XkhjY@79y zmnz=YMo~N0O!E2MsdMx9u-{vBTo^T%(ixJ3qS3$_3Kw({OB!!NRc&3L!^RRziuOO( z;Rxi60J&}UxL*|T#!ybfPN38P`Pm!hG<_~JG2b4OW))BzXKTEYR9hF`P3u*3{Oy-K z7v#YK;%f@_oAY9_2;vdQ{~nNeL#|J?1!H2ghim#rth)KEHGsXPm))877h#XM^tEIW zwMtnLh}>Z@VRIE%iw}_u?EK)|L0`@+KP?S4DRVLxt0bjH@DJ%3*MYJosYm0Z71c@B zvm^J}&!7Vd(OPL%vwmbp`$sSJ>V3JZZ4;y|X_BSR>K8Us4;?VyL+~p%hi7SCazt7g z-RJ^h;j_HO3N~{;An99?YuYoPzgMacE!=9#6>Vy<@?hdLG_a3rI@NFZ{5-sLk{-H6?*T?NedoZS&s9IHd?4@Mf(~wSa4s}=Ocb0=J&jZbk@!;k7q1hvNYwMFyX09)RTmZqBPUGv6HVLKAgyMiG$NqJ4c%Zeh{-pYW zu-ALctMn2`jxF0byXo%+X5qr-86T{ZYt6W)S6L;J*Wf<&w05!$=L&<~^RWy?A)2C+ zLc9|Td6$1pB;L}-lB#6KUhbkXg}c(R6o^IW30_(Pjx&%wH|_-VZSnMI&)Ra2Yz)WS zQmHqe8~786qxro0p8^^n5`vT;M?qtyb(44-#D?8;ZovV!`fjl5xx<+YPp)Y@`PSzlj|PD*Md z%fV1`*-W!Sb}^Pg4tm%bn|axQDhQAv>x=96DGdfwML8eZJERDm&IcCP7J+%K9;k4l znt*8>b8bn1 zeuNIwkqrln#|bD;SJVQXiXUuuKgX7-5AIzn=kS~_DQbiP2%O4C=+?ZVdR~1|g1^>7>dYaH8+&gm zJ8qnVD||=uYU<|~;kg%St2Y&4_^q?|tYm_|Bp*v!iqFl|}Mo=1Q8r|BQdk zmG94|9o@0rHR_)ZYv_Lxf4wc0i8P2+W*l>YIC@C@D&~KznBu-PiEk~=l+70y-MW5) zw0pymKN0wRr3#R&H-fk9nhke+O@EM(2T$}TxXv;MmWXr1$j>THk%x0VYdNf)RquW@ zo`#mr2%qz0YrXr_>bea7t77eu1?eXh#J?GoB@3lPy#56r+e>yzd z5w@g}F}FXs5q%ll1oy8}R5$6}$0Sf*DQH0)1Sor+7*w4dQYg2?rR1?93a1u)nXRCn zeRjTC3Mx%o`npB-Zo2M$gtK%ct)h1XXx_HPsKvL+THVoZ70@ZFKE-&G#!{3YKR4+J zAu5lcwk(z0nv686&&5@a6j;QT?{hv~2nNmWJ`wM>M@IzQ#Uk;F{RYh(HH{FIcBR<| z$axX6!cCuiZ-dqXfOCGf0T;PR_7#T1s{$_uXbxAp>}qE$zGUdAFw^V%{eE8?Z}e^; zQws;^hf9td9B$v5th9z1xQ+i<10X32d9|#u@*{7%w@x=m-7$?6m2Qa3 z09fGnx)cBiRWd#FJzI|39-Rgn2*!T%gXVlM>`b^oEmue?6xc^%_s({m1(1>xCE(fJ zU7X$nKo+f&vKVlblB@4&uv9A`c~p-+E~{1@OT_Qvt=b%F^SE31!RPy`SSeQ?ok4{99nTIzku=`gNlt1I&F zyJMF$jR3EREsLEaXxo(Z{Kji18&*5u&~gF^#vdtCVN&xwd1G-kNM>vcF zSS@84v8^YcZz~?yZvleG8?wRh>WT=LaB{SeR$x1!@US50;JN^7z@;|~^JrE474efW zy~AjkNpoBFZ6|^4AlpMuu4<>|XLTa-kjx{+<8w37KTjQUO6TBjl`4xLvMwWir*eTBpjSCCe{fS@~BR_qkp-1wM!NoN18k6{l}=zKaZm+@r0d-SgNxKOB1NQPSTdn>90* z^mu1IiGqU`^qzEx;>aw?6dMfc=qF)umxn%pSHSQOW8wj=AtQoDScokG=+wn@Mt)5X z2JHL?{4g10{CxyKmYV_{{Pa2<>&$)c(25a-IcFA1H+_%PD)0cNXgHG47 zBG>AcBAkUE@bxPKN5Q6mPi#ve((S>7c`Y~B!r(J+|L%2wxeH+w6Dx-}4%yL0!U8!5 zX&SdG!hG(g7E*CreC6c|d*}~1y!riVcH@!)gR2Ca;#O}`wzA_4n8`FO1XFns;^$yP$^AiS`rlBB6&(L=1g{@wQYgy50#m*%d^9E;3Xo?8zs2HYRwfvBc{ba z=p>ppOB$(L5ZQR#E9J3tPx&EeNoRdNm42sn)}veRZ*9X*Kb(^gKD~6Z6*3+9@f=0g(m))IMFv|ci9FS$vxS*DjV6n!sLEh zoo>F0EL5b-9S#djF*KN-@>Yuk3;ncZwu3Q*lNaY_gACcKf^uZGVx+Xww#-W60CQmH ztH_%Qi|oxPp&2QLwMJ;ts?D8=ttCtxkuD`SRCdH%SHSL)Xa zZ|mYtK+VE2&#wo3<9f^FgTbM8S8A=X_2u7N-$=3J5lc0NEJHw)p4Kqbwfl#s8 zpqRu*K#w<3KJxG0~o#u6$-Ca4j5EC@g`-LP$1dZTg-1 zeaLm+U!Nb}wPjk_e|rH`p30jb8y-6DJ*6YuqV*NfgrJe~&%k_aDF#83-(s)w#u>S! z#;tk_NS=J?ou$;pW_h*iCi??!w!N~hb-Lo0OfwSTe2vswPEp!J+hvJ z0S7D?DOj@Kejg}|xj_ng`JbLi=g^+E-b+0Z=wJ|A^d2XM9e9kB!7KQuy>{w@!$Ji2 zrblb+5b=kN<{y%}mw=yWdl<}jwB5Z<9cuRR>TZ8d1aCN&u*rK@BD;Mp@3Q$~PD9x^ zDgUf%i>`LAsN67+LrEA={%9sCICPA^d`tDa_CWRaQ%U{r{%laL@*T!|gw=d1TzE~M zYXu~6&Cg-=H=?R?=rS;{Z29w;|JB_-PPv(hYC&A);gsS@5|4X2W$RHkD9E5xo)0jeJ8~&zS;)mjGD#?+Ktysk-{si&XI>y zdTmOL=+(m~8Td64$A7na1)B8Z%*71Bf1?c_<#ex%yuT2H><#E^yplA!FEZ)35fJ)n z5DZx>Lp01Jv+DcZ6!f%s$e(dU(D=A8+I-9KdcmkyRuxr3L$iiE8dmJ9_yHmKqePf7 z4o+G`J7G7a;6sOp9R0ihq1AV&H$>Z57*Oc@aq%c+sw^64Ar|Yw(uiGn=Q;R{3M-)g8$V$XyK}UXUv#sgRP43 zOUz}!nJ)aZ?pZDo*TGV1C;@vPy~=k~a!}nMfD`xAfY2rf_RncFKsw%9uKSOS1e zMqz35)b}o74k1xMD5hDzoOe-Dg035Km}nb@t;~_yZ)#o=QHev5K)!FdR$2%Ja{1Sg z93|h!sj%M`{fSI@y0)J#jRk72E_Pc8_p^I#e}H8~2=kM!+GF;q>KQP7^P}h{VC{A9 zmtIetA1zh*0JUo!AEpZtSh>5P{+^snV=4~}>Yu6VXozNT#bPdX&vIt6w%j{6gMPs;fX%*Aed#rHWvU7-r1dJD$*5M}*3C$CJvXo| zZgbTxjfV(#Y4>fo%53ZsJJ@9c8wvE5=dwi@cIRFk<HVOr7~=KHQP4$`zLUZ;nzz^*`Bk8d zPsOi7OT4L@YU?1tJ-Dx8U?S5zrJ2-xhZR9BX^b0SS5f*w-IyJ>t?5oLZaIn0Ui)he z-RTIxi{E`?Rd6D0=FqG9r3a1k6j;IB10cShtUbS#VgTnHp1D__h#?yx`%&W9+@qRFky&=Jw#V;T)nI=-Y%sr@%pyciO8L7-8 zn(9dR%&-YQewYc8+P@F3clc2LG7Nz#Y}RHL0F-jVg%Jw?@tP))kEj(QKY2TE1YpsA zD~dRs06A2jtC4UupPHBqzxe-wcZFB@k?bOKTMe5f5kJK*_as3)REowREiDU=%i&I5 z@4#3iMVotvt!RhzfjS)}_WXVxTNbL%aUBHr`klxRL;wMdfb=JO+45_5!=~Nli?xd0 zXN$JXJM5atBw7@O#)zfIk}>w{qtmQW9S{v%l+RizL&YbX4CP&lD0!iv2jC_DKoBOuK|k;H|iui)B-X z!H)r&p9gf3jBreO4qXF@yK?00LR`G*7a{pZOXO*Kk+nQp`Pgrdb?gCK5;K1aK(by1xOZv$?| za%mh?dH?6ah%`1BKlA&WkDgivdjj*H^f#)NC7uf$Y%3lu@1g}Tc$v&JN)F-v^u5Ls zkDH=Zb!p87NOyq@-?1>82KZ;(QcW=~y@N1UBy#e%!PoU_;Cx)Wmus!pJLm{o8H%Tb zg=k!LxhHCne)~lod&W6A@#P4A{}{+U<)-Q{S?j|+Avs1y*O~EMel&X(o~_?|y!Kz| zy3#Oqu?KASNwmzmsWNV4i~QWkD60z{pdu!^bFwz(Z!f~GEl%wUsufoA+njajCa}(Y z1KW|BJ?vsmEH6{=z;=B(Qu+lTM+T4WYZ&z&Ej)BkSzHqXbqs@#E9H!5I>=kkUCCi>q zMpb@P=>6V;A|26A5+!X`{MPqwGfbx#*z@Q4hcFonEy`t{zVv&70l|M<)d@Qc-g^_+ zA==J{lD*A{x|JnvyBX-qf}niYg})Kp3c$WD1ib9s-Toy0NVr!Jt)btQi_mJab5SSpR&Bx$gzAH3av3>(!^AJd1gtc>s4a z1MuBvfex8}mX`%TReMfyoST%Fe;P;;29;LFUg?-1-wZdn*ve>K#RP28XBjlQMl1o< zC4B2V#=siVQs~43J;j8VK@ot!SKKkrJXg0sB|xGh(feADU_~+eEq~Eu;R~kL_#`kS>z=Qm?lRs*9I%B&*&BO7jsrEUWsWoTJ0$zNuK#%s( z*oZe}vGPeVgaImMT;qScdgf z#Bn$ed|Tq5c9aak`heW%aQ1ZxM2OH9LMNJJ2lzit9+C|&$wlB>d@Bmc!yi%L2 zoOp+yTxE(K0JTeU;1gZSu2xCUVV_>U3Kd2>Y-AM|=_9-d zrxBi5=!_k-;Hp8X{hL#ir3-r}dN}_zAXV6Qt~lT!(R*hNU0Msz$|@045B*LYMxNw9 zdg;gSXnslc6VMs7Am?#ruNm^fpFn0EUTxWB;Craxjk9-CgeauQ!{tS{8` zU*p7a>3J&RfFXK!y%xW)6k$!|&|H3d;#KW}yJh9SDlZD&z8meS3y(+vgtA09h*&W1 zmTIdDd{6%#o_bRi&wNEhWSZ*h$agHr@c0wttP}m*IqZ3d2+Q0dZOYwHSp&yM zN?^(oYS~EUPto2$`ENG6u5YR>M+n%Oe^#}yneXJR8R8xtDYZl{-_m@hhyywIAf}MX zg}U~XnjgU%(FXr1UCF6m=B_fLySy8I3!QAJQ;rZDe4KSC?s)E(ImwwPi!c|Db-(0P zWHk-5csyZia{ke&VJXI6MrV}qebvjh7lyYm8q)eQ0df@4B6upr66;!4=Q-8nl(=W` z#RtDW1y*7%Bd#G;7f;YyOwM<7{ah)25zq#+IN_ASWv8&zmvK3^4x$U$r zG>hsq`FB3Hfwlr~yL`v|TjK&ehl-ld(`M7f%q@Q22(HI=>(MH|1S(g8!RErW*-IZj z6|Y_0ltjZe4ANq9TqBpl?Bv`VVs%1kgu9L?h)K52GFX zUUUz!qwGKW%3|6lAKA^{_wg$4Vq~xlTN1o4(ipgEcuZ*;7|yh4qip*gR!z;SZg2_sC0=ZC@V`QvPL8>iNAQJOVw9%1%S32&yDQA$}@QR zjdvFSvUV(7y@a+eX@X;9p}#N!!{-v_40wK<(ojqwWMQ(+j{COF?}f-;vKANPf8NVigquj;rX{j4HoZ z>1QH3*r%-y>{gh7_PJJQ8zVI~$=zrA^molx4xpI`!MBBv{>okDw?1ff>w|`;Me(y| zVvoy2t^Jm*k!w)&%XCY=k+x+r3uMTJo8izeUta5?kG*d(rAc4+)B_9jMS>b=RWj7M z2-jC$-l!g@ezuh$nMWIMzW%T|L06iQ#lE7W#e1uhKDkJrOCQOiE7h{G^a}M z9@0)x)*82nXp^_Kbgg-NIebD8!Ka+3s(lZ$aJUEiWb(}C9p;P>pNd6N?)uRY)Horr z{vAJEa@m*WGwylOcHqU1D$nXCe4PNQDeYP5 zJk~7K(ltbWAbeB-wez%a0Zs}4Y7g|KTNb;1fB`p-zeQ%Z(h3|w+ce0n3l(*p{|n^<{fx&VGOU=BmaxNw+@Q(ecQ%W5JW^uK}nI47Nt|V1QZ0M5v03w zK@>?@I;6Y1I|M|!bAhFiSddsce%Gp>XP(dRedd{Y=l%CP)w0c*L7a!d7Q^_ zoL^;T3F0ZlX#}+_0^giqpdEdpc>AOGqY+lv18Ir?P_FIq0y*dMh7&XPnkXvq@~+9k ziv38^)an8lyXs?mo02;X`wB3hXckQk?l0;B?ts5LwQglV1Akx*t14{c=XY0c-YV{WXdZndBrY{DL zqlH$kmWHBBB%s%;B>!p7_!44~O#N))<{rW=M@thX(oD)~k0#r8$qdb6ds1G7nJ^jZ z1gDyvh8|=~ctT|1i>4ezt%ez8$8R~PG7@k>Eq|*$4D|Ju1|;q~?8{wGeX>(N7=h+K z7G*O6U|_G#!EC+dnund!@KeW=_WLIrZUM}9a589IA=eKhitvm6AAE7$wMO7CKr?w_gtx zeEvik%bfQ-Ux!|3zU##l-lT@~uiuUtX5`mBUo9O;tZ;bA(6Xp2%WU4}g3Vc)Ya0wb z0-e@Ex)vR?_~o%|%ec%Tg$-|J&!^@CheKWwmW~~c4cG9;o?%S+wnYC>{8&)u7Ep&} zF(qgE2?K|j23O$MwcWHTbRwowDdEG>RCm-i*yGK@i?P< z-I6Q(2m{o-9tJ*w55IDA6=3+D07!tuY?`Bn4v#KAYKu;E6)o#Xj7aKhnog5QZ!u2@ z5x`QMwkE??`xDKH&6}bJg;KcO4t_4+N$+kFPDNm2?mKR6rA8NXSL@yOKXLqZuHoXd zj1Pn*r7emm-65flf4(9(Dt^u_yO1sOicsQGx^JaI}0H$+xJC+AYup4{Va`YWtH z$s<&re9+51Mm$NU-^6}KPXQ38x?|@&inliIzgjUjWujw)*G#a^`6UHp_dG_jULcsnKFt9kC@3TqW&Qpiah zSQoX~2%bwm5flgCOM~L-?r__zGhCi|&JkM&8{jf{!Vijy>1)TBOvA?gX+kIfh|>8U zM`s}0dS8R&%5xO8TR`%*1XnUnbd0uqR(!U5nyjfeP<0-!`Vki2vHV~$j`laZ&EOfJ z!@N7Po7`s)RrDi_1gkT3h6vALV@mT~sK+06dves% z!aG7RimPj{zUwL$Otj}5eVo`ah~Sbzs*)9cvH5}>$PJr`ojCp02eoTMSWMm%2pFhgf znin~lYSuLzHGUOW&A7`c4Wm(LxS2CVlhQD;OIQlMlGQUeR;FOh8@*AmR7248Cedly z6v_cKc>_b-k^Y@w)`0O}jhnBg&ktq5kA%GBB4W{4{?S&&2uypb^l48WmtS+UlXW7= zX-gcJSN0>F(kDIV3@rla_@ zYFfEX2j6hi+lkkHdjvmuthS$AFQ>i&Y}{Vv!=|p+Sak@44M}J^Srf&#n^iK_<^`76 zY81oONguu}5m-Y4=As^P$!uwNJ?X+l@3kEx!%YH&f=-v#r2|~y^Lc4y?r81!w1$_w zd%HE|@5J;3RRpCMc!;cuUgKT^WjCO-L+g6&YM{EFCmmr@@CCPnT}8q9G|H-~^s%n5 zF59mu637hKSIQEN6Ko}a*wU8Z-h;kHho!+(6D^V5GyabAdS-Z`wp#ka z>MY=T=wW(6+~|Na=nnRj6qbd-w#%iZT4Jq)u<>Gh0Q-dBmrJ@wOEvADWvUk8lZsVi z5}L=(enNMD@nm2kzteTv_UHSAP?-YTqGTh>eLgK9Fd=gl27Ii|Sb_|`VP48&+ISJc z!L1*i$b7#lgoP2W$2q_=OGDyG8zi%iY$c@CmAJ0BlyRT>kf{Me zWS#$+(N)cqIZ+FQ%1OEay~>Dd;zHyT25p! zT&0hG-*9(bu|2uSs@)5grh$&5dY6L5GhGkLm)ikdg3q9hw^P$2)M(vbDS&iM=b+-m z@NVmTR`{a?-PM~RiS|3wd5dN9?2TV+V5PTN+Y$%XhkQcqtRxJh(#g9C&86Qezf$i& z;8B@s-=8d+esGW4o{~LJLe@fHr#X;u_3bR4xtH*f%&QrG3k#8%u?{==5J+TMsou54Mu;`CL%<)C{JfrtPER( z#7j4wxgsc_D&HD27Z>YRD&Ec4$7k^B=Z)Q}X#)IekMzk3?r}&JTQpvA3#uc2H@>KL zm1M%~_U+qbCzIPiSO#;|1>&Vn+ZD$)AtS3;h=ALH$#d*O!k3`Vr&X{);^z~_(?DY% zVk$#Hiw%GeD=RBhOZ$Mxv--gL8Dz-(v98r4V;R*Z5(7oY&vA&Qq$IkIo~O(UCD01%sv#PXaC?~ElqyvFHdmR z#WlHz1Eexzgp+h;VBuok(6lZ(t9u&=w%S>M_aseevPFJSRqPD~?$R_~j53>1Eq*(~ z$!7y7$;Ym?7JAlsX#M4x_ofZ@PdrUel&j$DatAKF;&3sp7 zz#WIDG*D1|6#upVut+s+7@Oi&Rg^@Pql(_y(16ImH|CN<#`bCWRncv?+pC_(%P}dP z6{Uj>Eh)q8T6Ftd{WupPs69 z9T%isBf(@HR>q9Luew=je-%*W5IR~l^gVvFx_+1(^7X{HFD{cC?#Qp>jX(jH$e%g^ z$lEWANi1Ne$ku8x`6Pj<*HBljZo7J{3?08Ge)#kOsEA76b!Wn?gDyb(ITgDhD!EtI z>%m!a6{W;I!6<5?l+nxAVcGb>4?x?Vr9j>tV97xEp2HgflyPJ$<=P(hps69Mr}_5n z10{N4yl+YG55Oi=K!bTc`W@{`WGwLFX`&t4EB&sm#vk9T3P6>q-Yr#!9)|?81P?oD z6&x=>x;O1E^;}VM+zB?}Oum&3Rr@br~-~5hnyxSmf)i(!p2+p4L;Mx_k)n$(2Wom{;vq^qPkC|Mh7mx;glZW}=O z#!@sttcq4lPcuX_595`t@AGXPsg?U8`%V@SBJ@tyFIe_AM;7l#9dr{*ym>>ylcd^A zA;|eCS%Wio$E4@r=88in(@iH0Pysr$hD}u%Y;q{+=$oc88V}=Ua#+jo1~#emsF2a{saU(MM|gpI<(eE#qAX z6en!Eu?%MS?F|=PT8z~Q8KS>_rTWQj+%Y^#eL#q9T9Tt4xv!g`_0rl)&4M0LFGojS zt&3&`SRPzCj_tfxs)IaZzQcQc98S^(W})~MV$F0lAAlzm-{w5?w1{?uIX1WE#1g#c z21w~JljlP3P6bX0PA7Hy;l-83x`GRR&WqO6YS(nHW zM*GPwG7-=-Qn0{Cg5<*V!>%+Y#UH6S8zSjKxJBjrl%;A!Z;nRTu-{x-2NrIM zJgYGOT2wS3Sr+uil=B~%^ovI#=m*KiWqqXtb9+k%s=YATAJR8J(?+XU!YZUg9Ru=> z*2`3&-DU=)CAU8dQ5=;*Bt|g??Pr(KR zkH{EW!x}+9$7}kXAGx%vQprf9)Lo@HzAebqD;>+ihO&WD0)?|;5G=_r`9__ZkV0{D$1RF zQo4-x^f$#xry@l+SM{e;(W^!)a?k35oERCXo}rYRZUnic>=`=zYtj+qRWXS1Pi{BK zf!N^p==OTf^YoS}+zNUFaT}X*T=jN>7ZklhgU8w-eRmq}d-0 z6HN7!)b;TP{RXw@NobM3be#~fYs^XYGau_*<*?o)O=wFptKpvfS@~bq_*XdD&2(01!?P5Vt=5*{r2=bq^KhZk^Q>~k@4nW5tE48x zAPL8piNxb-KXmYBAehtuJjdf-3mJ1M@4zEi7C9JtfGPh`!o~h`=ucsy9oMB zM9ws=?O8|f_wx_*_mBbsVJ4mY$GB9KCJ`k)kxBycao>V^TMQk=;Rytf{CI_HyjTUF zYau_2JRx?j{kOua-5Qj*OLJw9a-}tz!e#!tyYRH{lyCyPVuHENlH3wdk{!m?jlZgk z9ge{gqys_`4wth(Af&7=wHR3x(HXl-oiLSLZuueb-1T<$eF@k#CnAz^67Ehm#~e6V zObGl@HWupK6G=G<$;yYVtGyE%$0UJ`^p&scIi zaeg_wZ5)A8D&!2mi{-|I&vWCSbAR2QMDLzIYfkWOCbOh)t748Snd}!aHIYN z8IKWvH0+_bG@Q=cTD%=~t!>-|c;P4`pN`wYf=?HWVLrr*8-!0glYy-;_v_qI5^(9R zy`_6Q)iRXkiW9WjiUx(4L#`CDi4dQeiN3Ec=!(# z>jR|H!_GtcM0vX&3{*3mk2>+Q>HfN6aJsv17I00lfmAq(+gi@27~2pT*9ORE1CA5g zQX2>&C#QYqA1&T!fZ1eZ4pclWaHEay{h`djA^AIj~>!D3xu zx66|?!OPOnFJ|=k2mg}q=D@6^TTK-knHKjhbv4m zewmxHCd^bNFt*o3VHa6F@*KB6i!jo8ruQHPGgL7_8fJTM(kbuWP|26{{@xN1k?na6j0Vu5j8uFj7<(Nd^*<_53s z--B+fUZ`#j3QnT<+hg2zUzR%IENNZM7!54zpqpmR%N=q!%Ukw?D{j`x(2J|_i_TS9 z?D##OKnRQr+eJ<&C8AtMThF~s-SuFdE%RU#ex0pT2~#(M7}=egerW5q7q*o3BX-lx z?8Tsg1@o7wv*+&LF7zdHm)w^4Ciktc((Q9&=s~F~n<~ScQ@(5R0>x7W zG|Vs}*5Z`Amd}4K1kDx;o1*=7{Q2!Lf^sF<0aadT8ICuYi-fK|cjRUk`IAn;abK^e z(3VhMmI=rbcyZmE&p;JIQ%`M#TeorF8OkvgAORM5A8A3t_s8-&HO2#?%zR_k&^=qM zG5kq|1fDPj;R^?bZ-KSsTVwu$gm&%@JB4oF-+~4j#oZ~i+ic&ZZf|w$f)mr>{kvj+ zendq?)b~yYwwDPefmyEJx&qf^MLfzSni;+=J{w8Lbl+&4|A@_u%dpiY*8KHeKYx0B zHC_`{3wQBUeXF+ftRYrP$6aWftJ=`MUj6EC+n(iYlsgO-Tl*Nj9`q=zot60k1c)NU z7=!3aAVBlV93n$SZzVGyznVp?(M1{rwxaKoWY8nV<(a>>=o;JTygApAd5&Vj%js^8 z7iQjKGQ#+TiA#qJR6V^s+c6!@c@4GIeD+((LI3Hmll+&aykJNDrU1Bs+v7FB!Z;>b z=4H{)8+sc;A(&=Z4h60|`__49i(WllN9~2(?&S4Wm^nd!a=F_T>z2{dQgc(mMwo zv=zr6T$a;#?h9ibl@`i1+>rG{{rc5gza4gC?|8G+=O6wzc#uu9Xzr~#a=)Q0FZq%& zU!$Q+G~58Hmb>qz`AUa#rFuB;p;wGrA-sZj)t)e_^d+Y`P)w@8dzlLXT`{fOi-<1J z$6b(3o}sKbD8!RBSEHsNZ6oCKimI1gvgfY5>6m@;J(Waz>tCm}7g0Bkz6r>#n1>b1 zG!?C0YU-$Hku*(A64e$NUnpCtzslHCjMr5~?$12iJP+*t>1#`#;I(%!oW~o4wjwUf z#35xnwVF4zan5$XnsPN#W!IQlX?|evjmnOiSETAL*B|4lf1tq+AG~foQLI$i(`8Xr z-St9l6f+_tJylFiO;v1dZKK@oTSe`CQ>lEvF1y+6RdCc#U>e^(7xg7PPi`J?W7Kv& zQgwRKZeeN8e7KHA=DMN<@Gtkfl&`RBi`I5eHdLnH4XEh66YE!|un^m0DpiseDrM0C zh!WKbLs)Kl17OX~g-Njl@tSIIcTV!`O|Ucw0^$*7Q*c^fTC+PTC)TAlT#I80?fpJ{4)VN70@-Yd2D_Pn>s; zuPvTwH+c3QH6IJb&JaepzbA9QpXN2)n3qek zQiElY|N4$Xt^|5_)bTOEvcFAX(%+M`n40j4o)#SkzeRWN7A~!&3cB2VhU1D>iRf0dT+82^`b1i_QfloU* z2c`mC7UX5W`hvPw<)I)V)iwdKs(=}!+dw9Y;aZ>saF1@~>t20!j3Hmq;@W#!(?`%- zg&kv33YfNg07A=iLvE~;j1(q-1XT`M#2yj?+H%1fci}$2>~_b-9?*RMjiNzM+R@+N zi>Fa|5tZ}U8^}@RIZA_I#{>ZM>*M{^f~MU`+Z6VYq5ZMb%KnK+u#MwSgItE~+@h94 zVE5o;8KOfBGuQ)po|5&_TX{m%!iy_nOWFef$r3NItMaE0ycB?>nipsYl^X@*mk^<3 ziD_UX&MD*-P@#mv7u20A>e}f7(pNfn<7oA6^SF`)-a{$8CU)ScJTpl>qOCQ$~J(M%i_c_7& zU`R?TW0meF4t>eW$5Hd5{Ly?7v5j{_3_%6QU9Kbh?SNTCq5p|NoKtXe59_>^;b4+MD&61h=aO9!u<|h=(WGC7_kOo+Gufj4YCFMaWBAQCMVt0q#qz%gzK79nDeSq2%U2io&`* zG@gVQ?5_=EtCt(S1}t>Pow>xly}dj`M44fa31AqE*8=igCXkq@=94A7DgSFT7l*hX znUsfqdcBF)SJ;TreU74(JGs##`dOorS#+ptK^@0VwJNu zt)hZsv%GKVwyqz+bJmNx(P{(n^*A6~Q)8Ascf!3LgQR)ZqsxPUQcc>!*mr_iwiupUk~fe~8S1^@*PZTx-5~h%qZElAruF8^ z>$XpICp;k3{|c($+CC7Yi@)hH?E33+hdulR?e}(gg#ULo1Z|r2D@@eQ9VPl(0$Z+o zRs8-V0+GLV#p3XkyQt@q!}5>qWmT0{rOEHN{r^6Y#`}Z6S*_V$pGmq@gIBbFo?InL zrkQZ(|Jn2N9}IQl5z5VT;J{{PK$_`iSBD@@eNk7xaB zM#SgxE~V?YCl2rk{yobh)5Q5IxpDtPOx(}uar||8zQ7d!z?;~Idx4K` zGfIfuER9ifMQFddmr*TeedC7gPGgK*BS$lDZ32~M8N$f9yk7CV>t%S?`+i}2W3I2p z{V9{EkHNctL&0hwQL8FPfO+N~>VE1+eu#kC)5ow#QgR3WHYa{f6?zL%I|**qo4f~B z|I^Ld6#<^T7QyYnYM%-y<#!9--9#+{nj6pBz0%imtM z4`}C?Td)87lINEXXSxi7y4$|V|F$rd#Nw$Hbhew)Kd?>z=bHx%CItbv49U#Eg{(gp z^siSuO4L>wm(~I%rlNYJlmjGxmcV1YP~0_rCX`9C&!0B{2Qpm)VwvSXCdqmd*C4_7 zACmp)y_E!gh&UsuM_I?Rrv3Oc!RNn!scYy2pr(H@+LAd8 ztA!gUWSIsGox%FcJrOaoQBP!pi|XU@R0@BVZ!wMT1d(qz`&P3UeE;$$<%JNVPQ}yv zEJ{DttSPN0mSX9z(^s!4u*TENkg*fmXy`^r@35|};hCJz)b8i?35j8iRF`R{IXa z#Av>lnyyNa$JB?W?}1h>Q2yWP`L}%z0_q8yE4sd-ruBOaTCsum7)&a@UN-F}@Hxh1 z4a5~MR9iS{zi2iHp-==c2{Gx!N>^fpMx{x?zKz}Td}^D+uR(mqH;kVFhBL|KSlcd4e$!U`wM_ucHcB*pT(PDP=4Or~uNnUj zy)lj}o^Utr8ZL!mtU!>{whSR=rPZAObd?p6gODn5foAhp&vU~Y^vRw($b{-ohTV~AtlV0PDvVJ_PI0TfnlFW~__}yv0N&(w7A&GOQcHOb zCl(&eQ`!8|rp6>PD(F17mDG-ZHlunj^p3Ca4O+Dnl+2MqIeJSy$9%17G~+cSW@*mJ z<<-JkX@p7J_dv6xQ%Ljh#cfU4QP-T=wdz-OcS2P87m9^Xjz_H5lf8!NbZp}HmeU(= zl$FC<79!nN6T+(25ZY)&s~C~Bp9siRA%n}N_NJu-_u{#?-2p@a^N^vIJxk%DerUonm{iH_sov(+fO z#Lc7Jqgkf5&vdTlx;8wobVBCl87^7cyW+-T`Sr7dPuDAp_lYiLgo97oYM`*;1+kkGN(VUh+DDui_?!Pa{05nTA{a?^L#{Eq*%KK}IqYC*6T`q>UttV}+5S$-|T<^Y}> z+cYh_9YQX=5U$b_9h|d?W2p`OT=(4ZJK3qhx84-iVx2~US?8ld3JRr9_b0pMi>+sU zthO8)w>F+8VdkEQok^uPXZFQco4YqkbVoDELy~F+h6y`8S4y4Q`I84-SLD0Q%%?NF z2{0T>_{ax$O(qD5Tpba^Npakkc&}W;p4i;jThJ;36Z53oonr;usV+{>?h#XC9(a8T zBxYK)V~$NUsV3A?&A&vyp}9$4>oiMudh7*v^G}*7HW~d%vN2bQ26j)^MP#Q?5Qz@S zAzdP`t7>w-dipcL3~}jHE9M)XN1q1Pe-1@{D4%a#Idey*XmIAaXc2z-DZ{!dbkbB` zhsj~WHu2~s^@k=pc5~4*BOa4>R=^ESk zfd;$feua=3FM8TsF8;~jo!zMz;SCvHZyk6l1bbt!>a&#H0Vjp@H@9mL`M0}#LiRdc z#$9Dm6-EfU=`zFmPHUL6Bu*KyYW9I=OxeB9TssvI{e1hrjVtReYl-&75g*CL6fP@Q z{m!rI^tmIr0%yMa3*gItdQHq#W!d|#LSAxH{{&^aECwT;b@_9Vt15EQv9>` zDfhh!GZW1M_@<_-%@x3L8mUcA!RO)Q+j!|6Gyw&&Q$|J76Vtv^c~**;IEHr{EV2*o zcN1;9?fv-VtQ1{fid_D75+wU7Nfh`{XLooN?frU*Y&U4kvFpfq)!V{JgudKXo8XID zVAel$il@rM>M;ioZdtU=Dr8g(d$)|cLk_v56NPmt7**s-oR!BI z?`uI_VJD|&rQJPSvN6i*Ky+AEDdyL>Gz_<0MHc6ZAxHOSZjP0e02}vA7MRoUDNelU zY#qz#oNCfmUm88Druhn1@8qafopScTpsnLTpQh7bw~D#erK)TcqsT?FtMpokv>t%F z(7!VzDbecd5*F{UrjGYu%NCrDyC>H-OuQ+03q6JyUR_>PylYd9IYeH%yT5glTeD`S zo;z<(*OkqC6>XP8J6esHS=lS<*6GDOaHq*h+vlI&Ocy$=!HlW3Z9CnVjftTgc(ElI zcILOjq`JAc))y1x+jRV;Gei1-Q$+Bi1HGhfgq06_V(UkVo}Yfn-!-T8{93qXnq{W) zoqLRe?~t-yxXWBn2#U}vB)n<1bY=*Ek; z?e@``m~D{SJPSs=>ypnetf=m3)>&{uE)Z;GasoV0Eo_8|Lmuz)Nh5@w4sIrG zzbUqo@B2vI42>7<>cU2r;rou#WHbd8#DR5z_ z*0b8hWnm)R9l;;7g>g@^KB-qt?*65VYpy8kQ(PXw)A`x!QRD$Ps2@@G({ciV^kX5h z%f4?nqGs!l+#Wp019P4c;*Tz{(5omy;wO8yK2T9tVmc7vlvYnnSLUs^C#;qidn%!w z#^-3mj7D=IOpt!YGS4=m)A?py*93)-7N=47^sYH`+WD(X7sW$em7Z{S^YZMz1i7WF*wo@emHa>r zj0Wr#u146e+fUPlFOtgM_lTk;K@x5+_vDXY5`5+0{7NUUFfOC+fLv~C9Hsf8bQ;TB zrdUX`m%6c|kiaL8HpY1EGxmLsWUj@Z<^rKU$ZMx@C5w(8t6>0$-RE9s7#nXm8d778 zxVBwWh^_66P&u7KExMT`qSp}@XGgC7Wk!8zmcky#PjD!ash-ulTwMtr-DsGNsl^P+ zS@#6@M=vxo8f-^Y-FDj8kSD$q+Va&*#@AY{h6?CioUsl~Zzj)mLF9jpyEKbh08o~n zlI-so#@~ab{`8Z;D`=(ooAKth^ZKA{@vh}5zLI%6Xm-f@W6lAZvl8YiSBwb@B4#5= zBiSGB!xf&4Fndyrb6M5cy=fM(DA7oVEveV@@MWw(MApm1^nE*G=Z?)sNX6!t8Ls;= z51h)SGO{V^)t-mms%Buj<0^*BBYfh*miLBAMuCt`cY3GxT^-b+v6Q9KB5ZYCoS>F% zXf{_&&TJy9HsO$zFEa_;BVsp(w}%Bh4uj7S4rrw2ZvQ?AoYr7S- zraLw+t-CabfhpR;fbeNX9k2u0>)Va%U#||T?%|jE*QMp+?)9=DZXNNn-GPX?zcLL*Z4V_A|kjkwImr* z0L>Yb$7nu}7Vo!;7Fz31;yHjDBPFN0!-7rBmc8Re zW}IPQy*`i^;IwL47AMWVyC;6zQ}N-mdDD=9tuA}wp<)Ai(wJTJc5WTgpN@-uyt?)? zzmMrvdVQ}MRCpT?1ZVWL#J!V$9nOCa>K8W%KzR|*YJ1B<#&!22h{9fX#dIMdcgY1T zK*+fPi^`uE&Q89{2_nl=!$@phkuJHRc@)wVBH%)=QmiUtOj`VDlrs48`!9?izHa+@ zRM(o`d=lx2F6MHoceGSwiPNFa{vlw~f(duA{IE9{rji`<<2c~C$@D?(cUE^SilPS5 zn{~XIN#~cH9C7USO|LB|wkS9~^it>^Xj20NbeN3=}wc__xG^{d4;k{qJwsZa>}2~FWE9OC6J$P zi}`$3gd{=Kh0n6&NnSatP!88akci@gwK5e+E&-F-v@XYK(v zF&mE^26eC+XPF)h5$>NV!c+N3FJY!xvG1@G{&uNszV*v|{Q;4$fons_coibD`m(g^ z4~J8!{no?derlG9+sZ0Ys21lbCo)E|t;Gh;>)`s{*)W)Liif+(Ml&l6AR&_-P=@YR ztO%oroi&s$>MUpETX|%x58b9tL<(a|Ulb-#!5SN_R9>3@u=+uD8W%eB*&+2$i;QNH z-DU`G&NWDXb*2!*(Rf3X{+!OxpqDT-D7R||EgA9#_MjL%GZ{SVV2`k8hTR#6X#XiO z7DYOUd3%su_p$HQOv(guxp};hL^-D4^1wBqM7q8>u9r2wK1Pq%>ONm;()?`4#`>ha z0RFUnDez!*-?(GS2~6APmM* zcr|C>kXDh@tK>yycTzSZ6o;&+vGlN^k%==$cY>DJ{sPRhHXX}zWbK@FcF}*C?Zncc;A5jm8gLOD zUp?eb>5*!UoMyUUFjm+}(s6t&9Ok&mJ>o~{Rj7Gtqo+kHXYFI;XLM#$@pomW#a?E1XU(}=P1tBy-B_mZU*6N z`##r_1`kgM*ADCh)VvX05!og!a@3WHiUMx9Dn)N}cI4UxYTFX~6gYVem*dr%f{~m1 zcu@?93s~%lyXIKl%6h3^gO_#0ne5L zox&qbevxc%%La?LUoIk%$8}tKgQhe*Qm`@A!z9dOaY(YXk0Dk2hGtQ`;*q<0yh@{+ zgj>duwwthEnBL8*`l!K8B==>$b(ekuDvi0YxvR7pFHZH$eOv$3aEk`A7K0%q1CIRB zGXhtmmZyS)XNw=`bEFqU?Ya$3H{)1=qjsyZ*X_<*N4O@lodcD{piyv`MW; zO~5DQ9{C!)nhg|5BXhPxg`*CQ3mf89fr%ab6=cx^!71YsF50)=h04=dTrIj@1gzc& zwr0hwsd8ddU*^Z%F>JVr0``*CaJ#*LPPf-9fih*JV>Z3T5hmLW9g+_N^WC1l81p-@ z$j!|?_%`@j0kM|{DH;~w-lVXxdK z*ss)!CqTvEMu~rgo-mWdUu$vM#m6L(cEq{h=6qs|z&Ld|xX)vk-oa`Q9TrP9Q5IWz%^Hkc+AEXK4a+n8pQKE7@3Uzip$fhG z5tz_Svj9Z#m=dhv{*%xc--gM~%$n8K&zO-(3PDLqnzlPGD%A5oOz^`IO znJzWNGg_w+? zWa~teo~Iol6?S79>15kFIX74nB)+{6SM<9}nc6k7Ivh^R>E)ZVFRgcb7LzmOO@W;LGDD2I~40z!{!>y9s}CdgJWuA&%!N~C??>Xu3D6wi#NN& zdCQy5f9~Gvj-qz26KEgMJ*L0lS;b9UnVI3cxhqqV$7|sR zjynR4&u%#V96iF6!jHYDxh6h~|NJVSTlT_2Xe193@QiB~*S`+^m=yNyspor9x;1 zt5YvgcgcWPL1NtQCY!w@qR&w)CaSD&dZ=AvGi114C!{F&LS{vFQ=q1hoo(}Sbg3~% z|1%9_PEemB(t4ump`+OjJYR)o6N_!4KdI1#-+q-WQCjA8fnr_B!oD-IK^?cM9_D{B zi1owIz6<4RgilFTONq+KT{8 z^iDb4fq8`n5-~)wwvN==ulYCjU)Fluk5Xh5tDc()0H~V&cf2<(&*0moa7ADt2d3FXeXNCYn3=Nc2-Ds0>|n3n=$&Y)fu-9*V$?H zpR#FN35C>f0;%}?dk|!VY*6XYa%nyis4sOAQ4(tpRTjS z>{AtGINV}FaA2n}syvaJt#`VsLB?;}%`BF}{Mr4eua!XF*}sWy_SUCvQ1ZU2SrjTw zyaAKGphH&b<5uS1P-j*{Jp zj>{#M*zuCic;ngK!PP?k8QjeaCE7s7Fh%vo0eoHH;9Ad9Bpx9Z$(>${eVMV+bLCV! z9&bEmb4RIuhT{YIWxc1>;O2BXf$rYlL-)ECg1-xz=>D_MS+f3?%xv+mD)IHdxrU>X z^FZ@y`^pc1_NG>vb3b&)8Ep|$q|Mr^mMBX)X2MS1naRD+X+HCz%y@uE+Aag@;|C9( z7&(+7`!0T*;6;g^-MoEBeT6On&?L(epJH2xtt)<`Dw%CE;0|_kuPAx~Noq(Ak38li zgZ;&yHVZaKb0u*P>g0DYN!i^+nj1TVIUwUV_*~{g4Y@L9qh5U)bvEB=2oH_f_I&sA z35!kL>IR>K^MI8d-zA&4~rEoA_ z`0DyRD5*4YKerc~0YgJ%?uUqay97wUV+f{zvoFnc)6Y&>idXh;6#` zWCq=qVV>3PlL^|?>4lxtV(9TJN>Aj9FiT{lM0D$GsK`|po!q5@!a)Kbb0UZ+;%Q39k z$2g-YhKjV^>;OjA+~?KZr3)@rnLFllqrvKYRM{ziPR8T%^J$~NxnRs2qheWB`j3+f znH^qd1|)q+jX(AnFCPh|TrE+g4PSJVE|yFP4^H4b9PM0FnoDL6b#XT@3!}JnO6rABf*cg($v25Ri}|+EH3Y=c@c@L63Mz`STwQMR z+tlc!MF~DZwLjkE>W3quj1Sh}^>=8_2(I0w1PtQ3gw2YikiIBm(jsT#{r9{?p-%O=Xq=W>!tWP0qX_E1 zgZ`7Ofv4^L;=5Z{5AI_R8JzZOyN~9PCD+XOoP~$e1`Th|o|%rtIkH}Sp=sFl-CRG7 zmj5!_r5s_pxcr`Xg&SblXv|#-zO2S#xsM`f$!VgWwK|II`UN?fnNBuzg-I?d z<0@*VF6IhnOi1 z)`+W%p18gU&itTYZ(lTwV4IT znVsB}K;D{l5G4{O?NSo_UXa~A>wWz|0&l(mQP!XiEsahJh8vjc#>n&K7l74#F-3?U zufxJ{*TRD$rU92$-i4QQ8BSNsIADu9+XH_*j%)&cPp`Gg2kkx(4Bf*O6({U+`j$R# zg?WyoU%fu|wTZZqMIe!dq$5{jMG6m} z8*wDx=53O89Y0fWos2-_UFC}$F|u1fA)N@$PI|a#$8f|c;GL=j@{mUZ4KtNE9ua+y&{K-6K3+1j zfgs~>IIM@1h!qd#q7yk|Fl`n8B&%GC;r)Cco4zAmeKuzef$QbL`kHXv46}8U)@Yrp zf!XLUl9&J-)_{*QymsPPfjYI1pDYFt7UQh%AO%?l|3CKLGOFsg>l#&15fl&*kVd*u z5RgVvq+4Rsp`2B$cO^66cceAO@rZ-)iIKQp_pL5^m+~awlGtPLwy<_;n z4~`*Rzjdu^t-0o$>l>qdnv`1Jz)4~XCzgXWF-^N{SH;puOxViTMXC-xePgS5`O zN&bA(1k|T%w;`!OIzEZSpRRDkEzz!f~LI^mYt#7u)=E| z=h*xP)?N({NZVjL3X=oizKyYss-KlPVdG*h?!1RwIc!*4$Ri2&^Tt#FCUgDOQ66Y_ zF2d$XDzqr-E-PT`*tcA8BV^C1!DKK{`I>EQzQM81z6CV91`H(RdaE61qK|)o3|Y-0 zKdh|T94%lNbaN1@q)F(j_s*GA1)$1-IaE1NL7|w;pYZeMdB?L)ka=wP{r6>QrK7aK z-4Tphxc;1g52~?DZSQzvy}c8Z?4Jyj!-<;)pjmp7x~|qs-f5|?w3zifw)0tdHP#c| zQg`U!ewdtk^99PS4Q<|-! zS49&mZ=O3|%t`S~iI{DYLlA?0B}nR;mOY2hK5(D2p2kNzy0NK9ySOo-bhAxkMC5>B_hu*CNHUK>}J_OSD#k97B(yfLE_q#W|xjY2F7SEa~bK z!iinp2jn%M#_iNXSA=|5;$B?|jofi;1ImnAtFK9Jt{bW|eBi@hYyj+@&woffTE9OE z3ZD8qq!2mj%1M0TtTTgrmB^J_03C%K^PhIFvCpojp0*AJpzkQrXebS(ugeg`1)|vT z9|QjfFgZ|b0GdbuBnN6_C8h{#bOX}Dkl8FLiA@(1K?s8NpckPAY-RSBjuyL9d;K}4 zWw#SP2jPM2&xqB~ITHCKkSrKUDFbtP+D&%}-PTD-mfM`~`8Ickf9|;LnNCG|>%3$& zTjVwSRXxqon>q1KC6UrSBcCqQ;@-p2?|G>C_vYCnR$Ks_M)k4wC2<)#7M}de)$Ybb zA3op#Ap5O&xQoxbr@OrYjt%6fhOX(U2j!0F9uF~kJ!HSOhnhZ}J0E<}gK_B|?4l#M z)Gxw+a;3ELeksfBCkL)7F6mxQ$^l8z-R=kxCt{6aonfmBY_hr`H(0J6aw3%y4vxIo z5Ae7PqnM&4NXGpte7KO;EXbp(pZ2YmvxQQkSL@OwB_5_e54qZxb-apd?mO;H$+H~C zI(m(0qDwS8tHF<9zjBNdxtZuD24-dQ?ogKRt-Uv+t_32g`o8QIKlE*oBBCiA`t|Qt zt>{@M_@sXL1H2nza{DS1TiR(RyPaE9e06u%UhGdyen!sw3lk_Namm&7^l9?0x%-q+ zdNV$IbMfx;T==dUt9lB*M|||Jc!0*kP1d={+ca3kx#d0r??Ey%7lzu~`ffJ)D0{uM zYhlpYJI}Vf8~RFOVYQ$FRoQK=((rRGjhGd0Y}xrlfeyx@TapR19nzk!k*5^TPJkx~ zCO3Q2p|Q$B{Dv!Hr*j)R$G%lng}Fzo_X2<9oNsW-I(xA;(4Dj*dWtqtXCz& zbn3K=R4ihgtwm(v^91CoGwavqGo+b-^mW&*|`m*U-{Fg-l zW=DSznyyuB*ZsLZdF-@JGockD2^qsST{Tp&gbrD2#{ixJ`PR@>)0BVaCh_n48B;WY zl=wA0+-v`DRk2gd*Nyq_V%FcvA^dQZVCX~8@3k;=*N=b^LL+B7*dv0h(8$l-yFakH zZNdxOL}~dUqawZC7g+r;ddL|*R67Tj;j0PNr+`rH1cH0&)9PGv?;UUH5cPoOjz3)# z7^r*A!TVE`YAgxYhcOT*NrSt>$aNI9KX{BlK8s_~)&okdmxXD2Bb9a#WFm)cdOl?a zUClSP+9TBTIhp`9@^O(kVH4ckVg2O9DtCQ#N<$%>l%-UaKi-#ENs5FZw(yj7mvR8O zb(!AyJo%-m+{|7Z{7Po5TR0`;_gZ`Cb$Ipm0*SyD znC`a}mvZE(_>MW70KF9N9dlS|KHro$yUZ!xyOpnHKH>Q} z>$h!*NZ^C%(AWCfL24cg6P=T}KO4rz9LJQvVR2!Mcd^OlBDhTVcB8aM{N)ScIu{#DaP)mqrE+UIOqr^gd9=pxcPPQ&C$y#->0Ts{c^Rz7|6v{-g9j z*LP)R%&2WpO@!?pAP0rogzHyU}Tau}3u=YXamS*sg zcQvgY6X!RV8_&${WTIQ@OoG9YR5r#DR!m&`yDJln4#$(99&x)uf9|zUs|G3{z8Gi3K ztMI`9EhJ_J(?DtaAIP6BRj?+jwj_DO%PvYu1gZGI)Xzqgz7@c90)#EI(CvJWj2uAq z9fZXbLvVvj)44CmNF#|0?b^b0pW~PV&8DBP$-aHqIu*?u4oy59EiK8EG{C*XdBKJI z!BN^*9t$e~xOIu+z|PR%-G^dy)=wgQ?t3X1YkiX3usUOwS()#gn{Y_J&eJ)j0r=L> zRW<C>yNX=q5 zz4{Nn7b1M}9(@prtA$p@t6f|t$2Y>SeDK!^9*CsIAd?nL_Pza4O}eYd%l{+{1?<&P zs$JDTsp=NsiY*I-wu902X(w*A<+uQd<9hFM-A|Yq7^7W$^`IjKjDk-wIR+&lh>WE1 z;(Fr#DT|0Qbd1(ajCUagx5tUc+lpdp%i?4@a^$KJw`9WS zrZaGY?1*#_EZ29y|BBMtX+EaU*jUsLm#stOuXm!Gs~m@1&FjnPNM(#i`8Tj1c4Xt}}j$0IC${ z18GZGtghYeyYteYn13SfOd>d=3==Ts(KoFF=pU`JSHk5m)-e`x0|RaU?DsME#vr;F zWeNMI+bRg}B1T`wtf(>(C1BaZVUh?b#<`a%Fws6P@qoILjhAXDAd+dV2P2ITBT=+6 zVI9Y%MY53H&C(e86fQXrFF0nbg#Px*u)#fnM3BWMkv%H#mQM_==bRl3OcD}N#C;b$ zslQ6%e;WoxengFd^5jw^D7zCPSA^6qf55c&74cc8kGoeTLRN_*c!O;eQNPlGAw0zL z(f;N1;mi+f{z_GZew|7u7EJFRZG-Kg7A3X!xlJWe8Sgvg2=|kE6@%WW)*Ef5bk7Tj z2=Br8?ahwn({}aLZY5KAsjwwUosts^B^zXCf-ESZB<7Vl2lRxZDXx3VgPj5Pz}*R0 z?J(X}5K|jwdFxVm^p+bs40Gi~*KY4gy>Mx~1jWTn)5ZB?rpU>&GrQVOwh7qE;AK7^ zpdWd|NOE<4TJ3)94x#EhSaVQ$vDGuY!W=7P5k<;mP|yj|E8XRD&B_bglEcHxi}}lo zNu8o_Y@o=>e^*P;{?X}FvCH@>U>B15i!p9oKhn+kyMg6F4|?Wy2{)t()Pl!SLT;B85rCYLIa z>zVTFSZ)~{$Y2yuyXJNTgg@ybEf&cQ*wmflyY{NkxvEX%vf%&*p1SF4jfb0y7Qz>{ zskiWO|6)zJk>pxF zPS8(ZZi|7DVp@?5xlK`h{U*u%ti~so{i~<}|3xVH;%gk=t29ae#az5b0}k!Vm%`^$ z!W=snKl^9bA-F9=V4mh(%M}Vas{7QHfXK*wYkp8ql}l-3h1;|{V{rcst&ehv>V^poG4U0J>ASaO~nU$jdfLfoa$%4+>Wb*j^e8J;CBiI z!>_#^Zy>@2wmX{wpx}#v6QJa9Z*@O@12lYs$1NUI1JEt^FXhHk)tkTo^(gzH72O{e z(QLUUy(!Lra6Q;p_$aFR7uL6xeQy*vBzXJUtq(nVO+o0{-t+5>6i&M&Mm5>PGg?=b zCGopvd>N4mIv$AZvg&seo+|C1?iNS}eRtZPlAWo$`6xqrE#~8x;Ibz7ou(W{_Vup7 z8d)V@oAq?nh$8Hja`RTt`QN61vnd=)^!iBm0-he$C=Wh_W$~6IsSMNjhnfH>Hh+T~ z-Y|7ke!!uZtz+heEpvhpeO(rQshn@J{~R|E*e@P1Q3{pE7+3YF^cQ&6HW|>Aw%1kz zZo60wB8YWZ&Z(8NUW`nzg1`rKsV%uAWG(c7f`L~t>Dkh6b{p=_BjM+MAL1xkGJ_}@|$xT?I;A95KtSPdh#{QTM}bjV{0vph2q*p-xvz^ z4DEsiL~SO7h?U%SW2p7GLcinSwzQu8VuHxC5c^^z7He`S5syh$+8^6iXX)um{Uby?+|FGI{7%KtuR=YdtBuT*217O(exF7r$DCCOX zwy>qDA{u-+3^qo79+-Y7PWOrCvKZK|q5I2w+;S8eUP`(tmg@{5%-az>9RgSkNBjfD zK2RCGJM|Yi>48WuI>4iuNx%6CK;#HGp?WDUgF|J1j@`CF3Eor0z5U{X^y!ntp+CHW z#C(EApU#O($TQPF&tdb7aTQw6$r@3*8#5xr74+TL1|aIsB7D{kgwDx`MLWo+s|ukv zjA1>~O=EYwrrq#OTk}1dJgt@;chlA$6eB(WGq^0ESsF25om}_$&~$TI6#n_|emhw7odrVO-Ln8E07I7p{l?4dYW}TGt!Ir!lwNq1;bBF< zZqD2*w`s>7>OR3a^F7skfUkbF5Y#yqA?lfMz>>1cvxCEvak(a#?KP25d>xU{+0?lL z2KNsw8@}8M>SPDu92zQ>3EI}`VGYqgclksEkn>1 zd9b@9S`9|MVR+9X!!T?S$_q37F}KbHPId*%F36L(oE533huG%@Q~|!gy;$Dppee`k zz`F6AwBBNlxAVw6vRUO~qOcN0T}r-?z7g8;I5PRfQlV)@r(;uAc(Xs7Ik4PXH}+QV zp=*)$(dv$lT^Hayl(#8CEQ5vH95*T3H8lBhfYatr%6aJZ9(OB(UC%qsZ2;N^ z@H$tGaC{<=%UkL1I}L8!Qex2AN_P&`?O1g4a>%imA8H9>RzCyC=dtP^Hd5v$?^pym zTcTPmE9sYiEJOa)5eE4CVgQPJo`~X}PpH)|V$drhsfX+q_decJ>DPYfzQoM3x)#;p zxuA8Kb)eowDVo=xf>=Ht^ThV~P?ixtY>rDMg_RH1;Yo+P)!(UgNH=4>&I5=>Z?Uo+ z;61YJNg%k!B`f$PvtEu6yY_PnBb&Q0|I^xCzPATNx^GR9u)0=vA;E!cBfsK8k2)O6 z5OSwU1t$b>4HC9ck8GMZR0(R=jR*?(m~+ z`b{7+vLfDSKtQ{JCXBINd-eq8L&hJbb&5_(O`75zx}PtJjGOx$ z@4V&mAdLOKf2XmQJz2STEsV)(g3V#dqTYC%^kV0;Y7(jpyJG^U{--V@KBJh?qQ>DGx~>$h(8{maBY$)Ij|EI=6$ zSB9;Q+wVe<6B(t?8F9MoRDscz-bCuJY}Ok}Xt_~w!@9sgt7}=ImQ0O_4YS1n`9-}N z?HsR9!e1Lzi_FMFqE_82Rz(XV1rBb2fME8Lq-B_>jg%tOW*Gn zIQ8Jig4mnZVt2ZuR;m$gw|UCl=k|tTS+x{d^a?&!u+^#kdLqyNcDu%LQom+y-RRdh zA!|DNI2_s4_$2pml)RczDNV)iHof@4JMwo8CYfn>EakHDV{qZ$+y5e#k8}tE{auVt z)4ZI{x}Vq3t-qIW(-p%E4IySFRVC|Nu&+Z*DKKj2ONBhn;j+{Qgl<(j=nhkfqxpl8 zdyC$Mf%t`$1!Ku@M#jfi%?C7|dy7ULj$GUp6S;DPTH6n_gDq2)cO}{V{?u(MTXN4_ zy2K4k;7xLtmMr_LjS`lG(ZHBVO?6r|98}YBNe)KCX+@MT#v=)BiyQiJY9f}dEpM(n zoOqAfV9HtLTr6eZ{jrkl-pi%59+L}rN*?oct*<}#KJCj{s0darjP4ZYXkx!B!Jz-w zqNw@%kM@t4Kfc?IpeokVd5^GD%Inm@7&I)RZaLVmc$d)0!;iiOirq1H>TIA=`D23e zzz99mwG4pTF_N(>EargGULofEbb&+eL9l|`0@N5dMr_VkoUcEhpguA0D^~tKlylPm z^N-+t0JlDj25@UYw4hQK`P=YI(ht=Sd;GTN{bP?dg)%q-(fuV9e`L|*KgQn%rA7d~uQuJzHf-j%_HY1}EZ~KRF=$DM*popbB`tXMU z+_Mruf@=64;i3Iw;gs`ciQ(URz&{!FQTlHSCs|+`14L|9Z3hcLm6xOjp1{uE7GC#J z-cYyhY|{WBA4l|Gcc!*rVCcch_3}@F3|Lm7h+MP(Er%L!ymA9S03{TvIuMv3P_n(2XYVHSu z|Mv%a6sUwk;kM|0qj3A$vqpr#!Tj$sr2367qg^wj{HT0i0zdw5lPyI`(LPk<_y+#{ z9#X4Ii@y*55#wJPwEe-E(LEH~T=KKnUyVIpp=v$me;}a$U<3f7O6Na^clK?k91HOI z!b6QHDhL66`W^8Aq7i3DR4UpQ*qj;gjDt|q={{2D4 ze2wxaYXqnwzyR^~&0|BZKjCz+_z|GAGDc9D{|+$8zu$xA;KQH84H`sd!S zTW!+>{q=48hWcJKek*_Oqzm2$0JEw4AIx~~TLWzJHqpCZ0Ut+HI0?O61Q`GSa^e3N zpalw%|2ceA-uUfS_b*ZTPnM(6z~cK)w97M;RHjz(k5neThC&md7ya*YlKGuV85Yyg zP)>EIaQf}&_AeLy@Ar_T#QAghsJ!u;X#FoyN%x2KOcurB|2q@@!+`qB%oFoO>Bw{$ z0Ug=X^02`foMPbPVi;%P^qxj#lut)$BC7C^6szCW3)~j7IfYAm!))?ydT(1HK!`33KXa9fPbY43lg%b zeteU_qNu?_57u)zI>Q7kk{Y`9fpW3X0iOHm?x89Gayh_%ZA9I}7*RqE(-cwH44*sJ z<*CG%Q1t>sk@QDr2ruNoe36%M=1gLrdZim)cK-=DN`X;LF(nJedY$Qo5itY+EU`}| z?qx$*RGOK_OI3mZ>!VEvlv_idL_kD6`HGc8p-ZlE`=$CbAAOd>84|nMxx2kguphF^ z&0FI&yAnVHpt!|no4vicP@?{{A6-H6?xA@X;;x{}PHuLW z;uFNZ&JdzJA^jHruL>(7L9He`f*-y*taVGDYv`&-do=}){XD&~I(A<^Sv#7pIx2JT z`W3^6OUS7cSfo+7xUX0X-onwTGb)zZcPK(|oR1@unWz(b&{j$dNO_Yg%vhWh6EM(VN z!sMxMw&}9fftVBI7Da3~?+5JUtf?OSzMLI??HRCpjWnWdQ?g*TER(tg-UFnqQY#h&g+zezsR zdK0z7gO*>yl)Qa=GXL~MNzQDgH0dak;>Kyw@ZkHdW_gi zt#yH9_`=<1E{1Q*P3cRt{5mv~RlaBzJ!HVcnRPztBPo^(T>+2L#p6u!snAeP-d^Qw zrIJZdr_y?>a8{ow@t{}8ykFb;VW@?241xRcwsGQsEaRsgtGCsA$j{nN7f0`jxw*`L z$@i0uA6}68BP*ojv^L!7i4WTLd9)@qV@z3kGCztForb3i1q{^3wR=8S+I>IUn2FEu zcp?D(s$Zo@{kdhPw_U>nCuixv&ys(CHT>--_3yT+xYws(n5KYjk;=-}=7#Fc+16tL zugzG1z)=V)ww&@N%PQNK5~+90Y|iE|sEOAs)!VYC-7TK2vHZp$U9Z@z3o)m?YOt=6 zaX<8Jw9Lva3*uU<&OG5sk)dwocVM=6Bcr!vT{NR9Y#;q@d9c2Cds5cT(`JoLyHI<@UA;8)Skm@TSUk)QkWrWq zszJraT+PaoFCd;fJT-;pHLR@(F>tobl37=<0td!<-QF=fshd1?$_pEuomtnwOGsIH zpes&-oDH9I%fh~m1`~p8V}ozoop^2E?|l`~P?|8t>M;0A5zdPOg8)l)XXm#-pF*IK z6aG&HSE%EU{9CpCYY`_#{@V$F!5(ko4PdiOE^>B1xmp2})uJD?Y_Da2Hmh8^U;io) zV+tTnn&hvyKP2iemzfs7`{0TG&B?al>wLvL?AQc0?bw+bTjRMNHWwpYid;D&?w^DM zX~Giuv99BO0kLmS55y#RVPV-abuRA8bZK{MtKjUp?7FV*LSbS6h2yyCdCdCvbJm5{ zW=!k;<$DpyAdw;Mpq{Ca^42h6zJb(MdB)x(cCvjjB3$pP^(0}Xp!%>*Ih*medTVoE zoAJYp2LkIdizi8K(M~7v(t?9T&qO4_MuOXHEMqJGxeM zd(v+vN0YtuP@tgCxTj`|cf*OT)O2tK-no*oiEi7Pn|+|4s*qp7`W-#8$z^ABnvhj9 zFqn0)l6b&CT|XaZ*odcoK$igqIoh4I9ojIx6^)Txtj=OVuA~7I&X%l0;!^V5srHOS zmMQKeH&uH8E)UqXWkv`c(HjwkI@p{YciH0;ad1PK%S|em>k`*Y_4_7o9Dc_<%&b1N zd2@x*%wSmW=8G1Z?L@EbGZLMt{^gH3mGloee=sT*S`==63_O|J`gqQVp>!tO$oBQM z#=?mba|MToY%KJ|dX^nDbH;)-a98$kywa_GeSLj8qfmV`sL}W>jwPP^vy#Ii+CW*o=4RmQ)fLPMjU^)fgpci zl%TD?1aL1TPF=;Mq;b*ca&rJ>5QICg-Ikt8-Ztj3SFQzi8K`L=BVTybvTZ`7!3f?f zUI;ADWrmRSV0t8tm@bW+&98R@= zh2}NLvn(TWrGg);9v>JS>pMoeapjE3I8rQHgtUBNmb8%ji(&~lKXm;M)-AsE)YRw* z>(IY$a32+f-DO{H9|P4JbC@zsel1f{@?6`Ns`l!lqzz%SY=G;= zJ(`$5oPATXI-hi`yS_$NrX?4Y&pvBS)TCahrw=Y`{FVE})BAfYzx%V}&0U zu{V{~hAV4-Z%p)ZK4$AyNRHK{QwfzSqGZ8tc=Em!QUZlpYP0^gnAk^*W0d$*h=~&z zR*K)xq`32z?S;)xPg+($Sb7K%Z}@^y6+t0)NEFAX#8HW_nOA_@yj{VoJJI*6Lq-v{ z1iy1tBs4>kOj_PtsJ8VnA>xqHWl2ce@UkCwC3L&rj|?ECsl{5vKr9oRO}dm=5QPn~ zj|!d^pRo7(TlUeoo0rljL<&Y40+H-VfE1^67AA#y`FcE0cdrVwwo2X?SLz>Vy8IBS5);_`*$`mcRyOMZJwYaFLTzFR%# zo_yw6OF^kpS*}p;PGrekT{-??B{6HU%|uFPwGGm?mtM;RQij*AvcK#f=a!lH;9hO= z4t%H4>LvncFbsc87w^^Xc&rQ5d@DrqdaH6#G;Z712NcQiJDR#GEmyXh1IuA*Ui9}8 zdoXO-SHVpw6CTXDpaGdW#@}rOu!tco5^&MBUWzAlk)Q^s?RVbUyY-Z<*dm@ww=6hL33&hV%appPG94n%N*&6Sx+I>Wp&G48hrxKa_q}#4N(4>uth&)U%yv%Z zw+pn@SL`bi(0v0zXIv!f=625WV}=5DfH=j1jx&owaI_ggN@8#w05Psn?$t~drcN4^ zVX*Zx#}!HnDN0C7CwSp}$D+^0o(&ORs&CX+wKe>VH5*B;oj!7GO9*%@PfHHp}2 zT3dp49{{|0xccnD4z6Xnwv9${Y`!$M9wZz?oNcV-=UiDFi$>qbR;>ofCAQdsvwMcF zef{Y`sQ~MSA27QQI|lxV!DMS5&Ulf&>|s>*r0AuQTbyaxI>l@w)(849e!b%68BSH> z`wKJGw(_WAGyNp6_akq5ePI>;1n5L@Eox}_?yt;eod&y()|ak4%37%Gj#NfO-PXQI-aI|w`Dv$m zU7A-4C(Lm3r`#Xg*%8_8Lc^OXy|w^48lfFsWe?bRo#Z}^H1EMe!c!B5&GD)bj|q35 z!wHHP77OV=TaXCb{8OHp`-J<)EqK;G_ppyhTwR$m4exRhGsXVx>a`D6OD1aTf#Y7w(s3b((ZAx*V zHYOf8JuLHqwe7XgEuil_n&Qx_jGicgTCethul!_rD-H5CRm5xNrTNL%P3Coaf9~_ z1vm+l4X~436}mFOVt3z^O9EqrK9p-Go%B0Op%UG?dm8dAxsd3l2iANAuuyohSr~Zh zn@|Ih3gy1|wD^Vg;HS4Z*vyscL;S%Pmh!NG zmM;);pZw;uL(Ob=Gs_POHng_aGWWKpo3=r9&b;XdqR}XGDp7sX{@+S|Dg|1!a-9<;=Rx3U#`@FGSFrVx#6RL z_?mf$S8RrE%^7aP=kCTMlpBw8vymWsZ(wtrg*D2=CK%RY4!5nLJL{{aZ=Yt`MNC=Q z1aD5h{n1IVw7WSxy+>~Sgu>qk%}eI$jl$XzrXO4Q?np&0QCcG736FY=XHj8~NM+D7 z-dNd}(E$zV_p8%-YSxCJb~sh#MQX^kS03yypbzY|fI>+RY#7naR$fxfU_Pcs@Z!>D zt@am`FH`v<31C7Ee`}BnHI8Lhi`J>DA25j08^8co;`zvM-3-JZG`1q@=frltLh<_1 zX5X$`_0^uaL+{r@%eO#Zdrvz!V>bF=i14~X!XyXNL30ilG5aH)+eTP&G^jFzgzdjA zGyaU;{;USVeEa-1`HFAi&+n_IaYCF=Sv~+eKCk2oao>J4^=pxR))8Kk zdbG&SsnF;NBek6VcH2yaGX2vuK(n`V>*-Dx#f?+*qu-eCL#J&Kno3Fs*Ax*JF4Z{q z^bLRvtxi&6g=KI$1ox$2nJ=q!NFlG6Jii1KW67Fb5v*3aIF#dO4X3%t09m-LK(x|=l}|Hz3OEGS>r#;bpk#5tDp z&^F|dX?Ox?KZ{mKbsMUO{D`SWKV7~Cb+h|#gyxC60Xde4<(uE7bqES$JOhz5dWO$p5 zCYZ4O3zIQ|qW5sxgHvf3UZtgO{Z?h0Nz?j16U!SFyuy)Jzivbs69_5+e$Va#_Nh4^ z)KlbHOphrSJb5lf;UJk?k$n8Q#F37bMKtp2>^g9;d7Pqli+X3q)-t*IsMxM00r4~Z z^x$Mpt-Hnitcliab8G{^W={TE98reZ zBC#=N&x*KCIl0Bm8%tjfs^>i} zeY>iHiow7UmM7Xb`#jP)%Ogz4q;jUOVSqC-%^Sf-A8*~EzIFS)*e$d_fAxKW|12sY z$rh}j@y(nWZT5-&6RV+BYf;I2P09e_=4LTAMYq<9eb#m6mz}2QOQFKisRsJ9r*m1o zyn0Q(Q5u*Vi5`p^l>|ib)4DvTC~>Vo<-6_(ta~w=JXl)(UK< zQPq_jj@r)p-dDnjJ0gAS(d1VZ-@Mdp2Bi&)$uUIv`j8*9jI{|l)&pV}1CmXBZJt?b z6|GomYmXuSNV1pb*xg12}Q@Yy6>C*L}r5_&cxjJ!0BSqu)yj7G9w}|t)3l_EW z{md{EK0;pBY2r&wS?A3abZtDBK;w%ZS_ZfPZ6myszqdxExi_3_V&uPoa+I_oQ z;aZE=awq(nwuLyR;BoAkVfy5Cc+BWf3i)hG z^yTBH!w-l#;zh4hI1Idy5H?Wdk6A5f`ntzs-2=iYqEBVdb$i_xy2tINs`Oh1zt6PF zBUGG~0RdN4-$PNG5x3OOuky}FsTBMzvbL?he=_2wDaQ_B+WD{h@LyNM_;s6Pjb$TD z;jPtwT?;)CNgcjlRS`Q@obP#bBC&XEFcnM8)qcEJ&+gKb6a@Xkr_>f~f1z6u;{EzK zi~R17I3Vl|9EY(i>I?iEtt)%JjC4xX6=_#V1`=#raGc9vv~Gt~vqf8TAQ>n7r46k( zFPx$1r--LpFDw^%Vq^`AKW*?WT)bX;`Sg%ix6Xd_d)-l%@cTRQgRSH%3e&}5+m&jJ z)7CLz%_pi?e8#Ix=g!7G)jK^Z^gIG6e%xUFz(PA9H-CQRl3VV}n_FmF3tn_{F zPcj>7LFbLHHRaz=Xcd&&^iy=P@6<|X)sJGfD#k~MT>s+cdNR!!(>5W}O-e$zU;VZ$ zXsk?2>-wueM@08T^@c#@&BFXQonJ4A4YIMPUL?T?ai;fGT^N+7ek$aw-04Zi1I?XMGMd9|{;!cy+MLT|bj%71NVY-cheEYhudcp<1gnD2cy*-jlGV5BST zifyLzNo^x9_~>{wa?*5K^d{Dl=H;==9-oW`aq2>zF{r{gcfF00&|qq0YPz-;*!L0@ zvfYP|bK6tlCHaGrxO3Y_9fzdg31MzJr7D|GIo5Y-CFX=pfcKT3?3H(sz$?SKXYvb zO;X=rwtNYpe%+R(Z>61AFzszR(l%h#gj}};2_g}iXvuHP9FKY0U6~*!vl66DB%Ti1 zpSpUh({Vl_AO@#*p0)}P#vRKa_x@l$SnB44tnFN-mp=YfHtcPF*c??1edaRL!U6Ll z!fBDQS!k}RGUd2>hJtEl_2cJ}2`9%d=H>%H{1cwB_ z=#`Qh!SxlO&u1q}nE58FZ9;)?D=$Q$ zP6(FOR1}$C7@}A0W2m$7p2J}!8R6Ni)3zbvOeMsdbvFI>y3` zCG8;_K2`iF*CNel6zU2KhS8t))9q_08_sq}zW^jhV#E>Jp& zbIZiOw1wdbMKd%LX-25<25A&>b9)2}tZxuU$ac!|j}88Os$2b6o&C+bdnZu)Q-XpZ$|@ zsNfaObk`wV|Mm?`>Xtzaam%OqbAPRcD6k_O+`19*($v6UvFM%|YdNU;UByFtHPF_G zb65C?`Jtx1Nb=36)9^*Z=(3h`{T4F81*_%2j+4{QB;byX)wP~&Hnn_J4;$@<%+}iJ z?vhXowHK1f=YJZaaGjE_eFu@{of@6M6sgN0XaW!7Dk_(#C2xHk)JM9+7!%HYSS1bH;4)SpHH8$PYIxOvTaXgdOwDgkP=Qm+qf* z09A{MDtu*2k09LJbnoXe|sW!y}}nxoi-L1xSh8nuR$>rm3neaKR(2&){2vqhG95mgd1( z?Qgw&Plb__0bjveU$&H(Je1@y>OoS`{dL%E^l;=n>{+`1j<6-jwe*5g9bXC>UWRz= zlUQ%bz^PSmEtlYgrt29NkS~4{g1T&<|G!G`*i0kK~~c( zqncU6ihzERu#D|Icb8zWdCg~A=%RZ8bNKnUey8`!hQ(CEweyXV)X1`6jDgbVSHBydNRRH&ny3aOI2BA1~q%G6?U*XB;V1 z$!=6H)@1Jzmeb;tGbCGuW*DJ&Sv!-35V7ymUc5iT2f6IbFthIODKlzbcnr*isxe!7 zu+(0RDJ&Jq6%~A1_@Z%hKCI~QcuI%t`e;=a2?%)(@XCqdwR~v0v?|RTx(F{epUeFO z3&R}&=alky&a1XzmNICU$5(DB9k(nZ~EjnZdh0}w&PlrXlUv1-++2S ztS;fv7|VA~v4dw+&sPoS4fQ+A28B)=wPIOD)XI0a`43lNlMDO74tT(?$*U|qdAcyF zFxzBnJ9?rcm=knok0(fIxtu8FO8l-Ub~KIn;FU*Oz$+;cY|^n91`eIYQVs_v08l1q ziJ%1GCmu&G2s;wd=zO*!Og@?R!7V%gUE3+X&c(w*)~|clPI<{sJ{Sg+P4`pX^(+#~ zT?sAafU<9nx&xG|RF+-L&(0z=DwS??0ICE_*EnX-%yEd!*`hx`k7lx_0ZS-`bYpt`?Mh343JXKmvDW*2Db%gzNx(+3M5v z^JJ#oo-QmbYmH0P59yqf=Yi8xeaAn)i_%$nrf9|cE46{=d1@V`jEtIqe2>!EKy3jY zMjfi|P1|Q-!>+e+}uXtJ$h_?`REX4QvRe_5_XG3p-`nFhZr@=1um@$Lx`tK489%e(wg_J6VWrtws^ zecNzJh6a-4G-b*hDj_q;5FvBMrHEw8oH-$7p2tv`XBn1^ODHoL7nY&SEX&L?d-h{- zUe|fv*Zo}A{r=wH{q6bI2d!g2j{QGw+xFl0;5m&i<3Wc~toyfzju?^xw=?rcxPztC4n!P=Wz{2itwIVO}b*<#tizTtus3jJFfi-NR1=RD|_SjfgB?|X(Q&T<4 zM7IkpSk&U7nv%j4Gf6u(zTN$UF3Y}Y3&0>9J{d8s7E_-;S-bY9E5^K{Cu?12L};YC zTmc|MzkQiq|At?S|Bms(oul*vr=;>Nf$|qa^?@WG^aWiEGEvEBra@C?(^o%~CM#d5 zw)Bm}i<3-o6_w2+%L+Neh?pa^ibrkr%PD6~MUG>48i%?Jb54ftRUZ8t<3}qc5zJhqWOM@lS{%+y@ zZPd-t3xDWl^msp1qd>9r`AFa3mc+U;Ns`}i9F)lKsFqhj`9WLDhgmuMY z($YQ>ZR7b1Q`I4WF+puv@xdd3bG=;@|FmOCC}m+_utqlMd6Y`Cvi1FWVG>>W_>aWw zM!g5ST?BtI@R9py-Wck|FQmdD8AJO-{Udpt0P&D7?-`aE1bWU>H8i@yJF1! zd7@{Si@qbH>Zuf@qCAw?2`XjVW%1!p$kOTl2{)aDUdao@aameQ<<-O%+%iTq4d;z7 zCeXUSPh5~T$&rpVOED5@i$OOjQTqM#i0Tg|PBk#muBlZg*4TWut8Tr!`0QwZlK)9K zi4Vry^AoxKMYmbw`H@fgmLoaokR;dmr67Rqmm@TMw?M-x)OIs z&0&<7>b&eMVD`g@V2+sy0YpnBYE{Z~^Ju1ztfX^4$Bn4Hs|rxH_OT&`1_xc@Pcy06 zC$tqJcb-jL*?Bn?J2pUYv42TSLB`f~5<_Zm2ODpglr&99^Va->NGwG5u%4J%eHFO2{KN>DNq8ZqxtOq}CEc;sSJ*?>m#Z0bF z7#HKppn73*D@9v+-uxHt&*qhLgF`Fs)^mEw&#NR8`1RyscwrnRW=Ba*H}8Mir49}= zM2)6IW=fIDPLJ6VvSqvgZ9(&Ze3E6ds@3qK!{^>E*R7bt{ zyLI^C=&#SRJ<;nkzs&qwfQCygAScqf8Hs%E2E64JxGK=3)keLh!Fg$lz#EPdt>DUw z^*L3WLHjengwo@!%ML$Vfk8!m$@ADG;TF@xH2Ul5Pe(#WZzze4eEg=-bwkSgS>eIf z3IT15A15VjJvESN?|mR^ZXn6EzVQzi2XeF*E~Pf@vm=yuq)8mkTwknUqxZ(BVaJxT z84F#SvUT$-X?xtde5rCA8se6{c&Oe@thJQL$n+zpcB@sdA;@M*is0$B_XxMpJwIf6 zmlr-SWapE9l;&Asc)v6Xo=0+x<^0_ty1~yx%OpRA<~TvU>4Y zUYg-!H?QT+dLfnP^)k8-uN@jTk?R8ed79EMgm0yCPY*S1DMgdGS@;I(inO^6*&uhr z{pmpnLfIq)TJQbPqaV)6LwMTHmv(3shmh-lQ@z~d*P2q+I_6ec=tV}Fe3(g4Dlhu; zI6z^tncB>y>|m~_$5yPVs_sLuIYlZsI-FJb!@nNvd7T*?tr)R)Gxf7RMIgyb0qv>J6Ui(*0R`MJPd+VH<=s&|u{38DSf6I5a=se< zPb+%GUSdn1H-o_jQ&~<{g3;h;Jhv+I#&N6!t>zlpaP%U)%}KdVgXtaINj<%g^hh#8 z1xAtHo-~ZQXm#PXre>Gto)=O$H|$uu)`(lp3DY)BHSZ5PcS z$)dcSjV&xh9DJjcTdGBqeD)D{Xpvo<(0sU>a`?lo=i>G5ck`85k`}0Z)Pr@?;AfR3 zZX=}RQ|04_8}Rv4uMaz$;_I@3L#OnbHYY9zMFrFKCh1?14`wQO2CHO>i7>*2{iQT%>?Hs$|4Kn=>n6!IBE_gT?MTEw4XiD2*<^t*ILN z^kDrusQSr`$d;1x>$ac*IB)z_>cuov0V+1IG0s+^B^DOC>FHBRA7w~0Pr!2(9AUg2&-vFa{Zl;;NK`Ms-8FR27N@F?;}Omczn%87>&6XUfiS_9VwC3Q3SKsDEK(- z<^nPjAaz)ha=Y&eYM7E*JNY$ z39z7UEXBmayt-&sKDi|QmfHI=|AV{6SsdzJ-H#^mmy&?m#Ac7^Wm0}H^cB@##5Z)!2m)C{P%d>tL*t0op@$3G-KZ>r-U2&cFMq*zZ{13Yt*0#16qN zfwSLv@(qTRMKVGLALS%Tenaw9F3l}_5=BLIF35$~dblcJBsYdb4NAjO^!qO9Z8QYP zN)|sa*B$%j@ewoE_<^DVpE=${*jk|a4ZXh)ndPQ5wy0cUEd#X~`nyw!qWmeApL-b) zVCIPFh0tNo(JNG9u{O2q-7mte0)(~t?0g*4o0bf$G~eLqA#N&}wmIcL+f&v{A5yS& zAsP&{M^WOku5L`7me+DM1U}uoe|DXJ#0bC2`2~K)nJrARW)vIChup&M=L3Z#^uGfQ zdXv(XMw-kij$U{6n`w329O9z8YBw$Qp%PjQO|K;8H1{-t>7G z;IHT8#+mJ*8ef@l?Vs};gnOu`JEHp|&tNe#dxyVlLNqpgo^jR>JI-R<_1>VGV?#mK zaUThADFnbYHOKN0<=Dzi%Wfz8q3-vuTTABhuKGASg=hvIE=laZGpr3QvQs?YfyB>D z*s^?y{SC0@o$%GtyxRm9P0)mIYF)Y~Sk_vLNKICAyf1SK10X6S>+xXgorMW;H_Q#; zo6^e|;jjjR%0Zz%VQR*qn5*0gErz4c3T~bG$>{(lt3({k6v2DaIWLcybtM_}9SqOs zJW6H;6=g_WK$>b(G_mlf<`tCS9QW$ z`;l`V7=En&dO7)+neK++7q2dZ1D)%Dk$G_i#b}+uMk5-xG}y9i_<7Evc$3dGRoo`) zRavzs^P<`B7M_TmJ!Wy0R~f~%1nM;c*x}T`yxGQbxQW~7q(#ud|VRktaV}%Gh-t(YoIk?rR*+W{~k;orm zEn>6m6-2a@+f4$XX1yche5>`(abG!P%SWE_ape@<(euUtiQaV}U61+#o5zR4BbD8@ za|3yvt3T0mQNl_YWASTTi=kxiX3VR$nV$ z`t&NlqCx2}sFSvsMuu!|iM*A4vM|Mz1-I>P_X+fPeIel08&pY)Mc+m^xIdo4Fy5x` zK;!xFOAJAA3C;D(8@2OF9v00Xno_G6dny+UuA1!w(Mq~b6f82 z54~ePx|vLf*i*fFmqcpM@#A!c_0Psm17Y3%yxz5KYO4`X`zYZsw3iiDD2q>iSpzq8L@E-^t9MR zI(G59IV~rw@A>O{1dmNZT+)xq=7E+s+6PKi?-PZXb`GNp>$I#iZmsw)xmYih>Krc0 zBx9Du)8RH%j)_srKA&eS)?CKe3&)Apbsooase6R(Rt4(PZon#){L0Yf&pE3wrAdNr z?_!1kxp|o)MRo}3E7M?k--za_>ah2Jq4Q4@HHJy9oC%%gmoW;g6L{-IVy-k0yIGQS z=|Sc<%fXl2yp@lJTN$l?BEHY_nY!IWZ4Ow)INo9KkSf@l_ul|j(t`s{^)qh=%j5;q zv4!T=3xi%C4!Cvi#3t}tOpT8Fou_PBM9jKOEXyjt8W>a>((R_-To+(sy$C`s9~5#K zxC39yO7ytR2269M`6Y%31-Z%TG+nSpy2UD&i6WIs^h->Y%)eNx+bGN#CGkfijTcIj zgK799WOdFR1-2lwUR#7zH5Ha=iJ31jji5C~qQA`XXCYt9nre^d% z+%<~M``$YLi)a#}ctM+F^3(;JmA6Aw)ZMzAuM>qV;CEj7@F(8&itJeT-OWOP6PO(n z`J;C?6*Jtsnnc}|#5XvdVD3ua*LX5gx$Ve$H!CuUHZGOEeO_uxieSHP}*>KoL8^~-mL{uGWE z^IFikf1pC3EL{E{fqXe5F6GBQxXp^YaGL1KX~^3%?$3K(o35qeRl_yU?1_?5%d@IW zdP64Jr*TcREJG`HogLe;9exkea!3y0PrQ^61RX+*huEVt|AKU$^1g%rP$M^yJGAbq zw^c%$-&5#!!`X~@JM&Wkf{4$7(}nl~(ue%&gJZk$b^u-6eDfm`+ykGG0nmyV^`BaL zQ9L~WR_&|TnGi{Slqb5sGbtIhr*%D(o)2nUG>E*}$GcKT%FhW2(c(kv4!`tn?Lf5E zgDavplnTEPWrUP33^(vpxA>o+{u7xgckDptaC<`R&F)3} z&Tc$lYQvZS`u%f^m!|hzz?(6$`TIcX0rtyysZw=QAlB3|26_CM~wKN-6cZp z#yO{^US5K1LgwDFW#!8&OPE4$mURiw_G?Na$iG}u5fcpmDj}M<4jxq2iF?r1+5Qks z@ba`!4OS9O1ebkt1!s))Os|1ATt8M?ND2cNb}(W0KiRAMAf{P?9WQ#h&hQqp z+zh>IP_g>QA&1dzuf+29-^H!QM23i6pN z!1!=J!z6SN2RZ-8dqj=DJ*|OsB|l6t0B3#wgaV+I1Vlm)62?79?}N68mJ~Rjh5@*G zhZfTyHSuvsAqoUeaUg2 z@@B{W)ohL(g2K)Y=f*hW!}-i#qQTvft_~1VDC{2C;;ip))+K3s89YeW4ELbG^%qT? zzta{N z0}n#qzy)7hL(Jo|AW?e*_C`?eJrEAa{~4IJ6N=RcKw|3AQULp#V1V!-R ze-tZ`4Kfu4|4zYU1+NeJF+Jhuk@CNo&9OshHn;r7xiMsXxUBM*XmFB6w3=DIPRw|1XLBuO<<{YNcd; zw-9|V^r)doX41QutPK;sbh?-G1J}{=(Gg6}SrkzA=W#)W5qDC7)Vn zbANZ^XN4wMoc_7*${)k_Wo42ByW~8W?GKZ=xRExDK)8&d+YQa`HJM_Ax$XeIaZ0F& zNku-K{ya>5itUeed|2czll-&=xGtTQw^non^+~rZ$F8ncEKMsvA)Dso6+EF!5L5dt zC`c2^QdGCVB+g%=;qgDcA3|k=<8g4$uy|{gP0UcF)G_V9NUU=Y zl4C+88Fzx$bmI05+hTZQJzPn{WCE`$s!)<01_X9(QSVHx7f$Va!O()+Ep9AbcRf#2 z5k*A;oaw`_!F(f-*cDQ89BNI!)yotkPs{C!h`KLx_rN<$4~4otf0Uf{e~;>Og&DkcC+35jeKzXm#=-zCZCF!7Ky~vscg0M0EAdd zyTs9rNJK_g*pU5UjH@HMbr)U8<@f^Gnb~ptvEtO9nlvY8_GLKJm>uK!zU=1%4KHUIV`L0E#I z#f^>xf$$v7+?Xnlec6icFUDst*C90+EJ-wfvLSckP&JG~TT+UW_RZ!?s%3#L|22vJAQpyO*#AKl_Q% z&Kymy{@rZc74bgO%U(Upnum16qXa?aogeza zVil&?=Y#9SDK9Z(yFD*SNu#G)%-*(oEqjSF6Cb>D>v%S+7)vK?nipHWnrri; z6~R*_{@drJ9i74Wr*#3l27by17~L{8tInME#)m^1TEABmc`jQC1fKIH#dzgtZ&u7vZxw@H&NX|f)a;X`rcJbUqQ zkve;rIA&fXLfk_o3=H|U0f`k?`MAdA7A-VG5s-+{vx5H|cgLLT9R5IB;RCbrh;+s2 zIdZ~Q8-&Vc?qWIinq7;Z**3bubjiWz5C7^%w|_AudcfRw#dXSEN^a+xuJIFDwl&K6 z*5Nb{V#3UDZC-58XqXH&Jd3NoSp#t^3<;#R@7)Apzt)^5_2Or zx#sy7*YD3$hh(WqP9NNS+3BavdIyf`L@Cx(Za!|2&Q2PazVqBfwKu!{8%0lUlS(o;2 z!T4NT(WTyB@iLjzqS*nkC~hB4(pYn*_Y7xv-J+2FbWy6Z(h519h$RcRZXTK`Kqba@ z{OH3^5sIN}9#f3O&*rrpR+{2x4PuHaR2$wFd(4t3r}fEu{sC}VF;~m_E&Y9ZPv+Bf zTx6`rt^U>D>eT20isEs3aobSw)x43T=3dV)B(ftVdpyO1#CfbLJJQMYmBJIhIz_D5 z1Y!1HV1nwV^}LZLb?;|gHF@r0ugu%mEqr4pvg8Xl))PNzBG#UC)>?!>E)nnQ(d7}^ z`iqg5O#{eM-1;}2TUVUL<)8EQD!}nj4{Hg$tyt?V+x-6oNZbpqY~kRJzj;C92hjad z_?%2G#KEq|r9{-dp6m)ICTDX)X*dYaZCRvE6L9ZWQu{3zTUc*0?a%vvHA5sd5{tPb|oTX!?W?b0$hCM4t3{Fa=&GUsaonE9W^-VGe z+v=%jyE!)|H#I+NxO%Yy^S15s=PX{s>c)$nuO1nCtsAJ?E;e9CIRWy%iZ*L)j|X>w zi(>HNcV``J#<_TekmN`LEY(g6d&No*`haQE+j+#@50T8_Gv0jP-%6F~WZTm1?VjW!{@?x)+yz?&V;T7642U)eXy%va2#FPoU^t2Q4zcAxKul-J3shD(M!x=rkgD# z5?r*>irSy(#a7AtDjnnyS$bSCx%a0vteJnT2qVgS0E;XLH2x7B8`-t*oZSH!@vf?E z>Ya^}!Wt*u+FuuJ804~j@Xu|~CDbZ%S+q=9TpEm#3B^paTsr9(tF?*uK!5(X7zP@N zUx60t(?DNP^Z-a#n@J4uKfCC4|@$krPWhMzOdX z)g{##FF)?BWv$3n)Sp^AY)YEGc%jcqnBlN|Jkzxwa8${!e)t{cF*uJnR4o1?_~HM> zWQ{)^AAVG&ISe1W6V%Wn+fIA}xCHNf583&zTXo9Yod`gZsG@3_g0d(g_|7HM@u$}> zeSj-rXm;Dr=*S(l#rVr+TTL)cn(Fe@ESS33G-et`*Amh*n5EPTuUTaryg_Aj#c|Z^ z6Na?QH%{Lfm0dwNpdn5C*EcBH`$@6K$>v(I}qm_wxx zcfRK$c4iWrwJ^i-2x-!GVLR)3@5k@yAl&LLnA*OaX3KafNWpV6{Y<)lB6Zd0SW@v_ z4W8Nw)1#t7(Y;6U;ujIWCz;ewfkOK1%ZRzB1Ni6qZLZU7|Ik}*;};y8rf^;?iXQQ3 z;&0Kc-XxXtG69h6+6nBKkAn29$W0LR9v^UO1Qj2ps4)ej*g-GG&dER zVBu)Sv}d^J*?IGtp$t%LS1Imsqh?UtTcM|Mpxg?CU;bYwAaQ6G`G9T!K;bC~Sk11l z%$CmEt&p+pIa+^RfEA;7M0U{>(Zm57&I>&{))h+;A4v0gCqqS-Xl)QzfuLiSa6HzY zCfn)jeuCa~5>{f12}|O&^IlE$m5;}AMN@&@NVmuIWwsv)H*M{5X43Na{_t2Yi%koT ziL+xnXZ!#HeOGbyzdjMJ(^(=gpR?O$By85noEG0^f@!H-bkSL=T~7K(|}idCWcC za;U&Drz1+Lj_q~fs%jVq9v;QKg>2}Kl(N{~)0APa5i#e9wDGCVBk`$dApOOBIU*~5 z-u<*NzldNYcG@E1EL=s*JvL~6QE_MaAau`()vQ&dSJO@;k5Uk~InDoHHYfH0!?x|m zwh+4M84DgB7AEeJa8WLOw@C)7tpPVft{Utz2PKIp%j{Kzy(*8gQ0`QN^j`jOtB3Pw zm_1F42oy1$&@OS>A9y%5!2AQnrgVwNM|tb60se_*7qk2jxW#nZB_b%;X}!gj(tdTD zT(?cy?JZKuishe&nY)PvRr~HkH7Mo7%4cu33?1hyip{VFR_d(|zYqm6 zUHW&1O|6~6KS^JU;?&H!-go|-rK9vq>NVd0Z;Jf&c4Oz*7hI40Zs6xxN!$oVnorrNbmE^?n*UtScHT1o zA|35lMI`3gc|mgj0Ix#C&XVtt@Y2_LyP+plR{6pl_zD2Dj=FbG>NI&4JIo&GMIQsk z&ia|tyQF;R>8@`s3`DafyGkv2tUM?b1yVwAglyWZx10-RP9#t6&^b3~*EQ&2qCf{% zFV+mA=D7zfk~bLwEZvogLYzNp9%1J8n`_;cc~DM+t?O2#V!IFD(2HgqG1~Rm$`Lp- zl6vDqBp5MWhw*o)?I*nN^xxExNuZ95{UgO#DY%vrlv1dNYjPnDdu5LFZ}}B$$g$p^^QO0? zNL$yXVc49S^Z{bfeo|R?CQ6uNAk1l&Tf4<63BHAF zkz~NNTXtYn_01g~e6764jN1BciPeG=z4(!i)QiBxVrDw(1C2J28}Vz%4JvSfkv@>C zg|6+=EVq5Aus@hgw!7ywKfcV6op1%yRanI7GcM0s@7f0>37cwc1LjL%t*47#>n^TBnCMLeTw`Ky+&(_oJtsiILL zE%&IzV>Q{-jj>l48KBUIKhRSsG@Z0iBeo2s|EV1!R%ugT5>wl@>@_lU+jh-n%)34z zJIA4rgX1cW%e*D`Ew(U>T%}6V^TW+n0u^Z)c>q0#16F?o1RB~WpJU-iFCwXy|uX%0KGQA>djiLJNAcSsABL!zoMq%)j{#YUv%aNDN3gcf}hj(c<*+fI>op9;Jy?n zkA7{aVN_3@1}x02%W2>587b{1d{>NVR&7ebbglI*|1`AAaN{*oe2j-m@jTQF5!w;8 zfVXc9(C|bTh&Z%9=B0r}-Y60~`Vcw&(ap+)x{0V~SnkQN`E%E&-Gk zFwsej|872s^F`73KW9K>-(A5=C%$2@8!Eb2pL?ib|Bc#8KPpX;N0LeX>dsyCe>Th~G#i65(yi9z~l0T=9+j@7+L2>+a<{s0G zy1|{s)R4P(4K!OBFU^OZ*-{%1B8wwconPy;<_tHMhLjdxtIKH+OClU+Ma{p}x71u@DfxC(no zw511Blm2$I35Qz=l$c)b!AfjD=Be1#LRe0T@XGh+XdXD;1~tWr9)L!4eG13;{k`jJ ze*e05uoP@I6Hdg<7)EhAX>FYhd-)(8`IfCuJG`6usH&`;^eeH*v&U3?*3}=}XAlR} z9wdW$09!Eesu7Jpd;Y3xZ&P|=<_RZwS9;H=v*#&WeTgHdN>slRii_rkkmWBbw&uAnlhgfAY+{Eo zyIe`6Bm6a_`>q>zg&Jh)`=^K7x?-0`ojCj7UY!C5H|$n}V|rU^oDE}(mCiAKJONa` zx}Niqj05w32rV@IF1W{B26cbc{4t)Ml@~aYa}DD>-Zp?Km!}$X--x51>MP+L$Qdi1 zbV9Xa1P!cniOq|a#;cQ@oF0 z;F^tWC5cur>X{m!UZ6Relqo3+nTxVi5x>p8Jpc75PFN{K-25YNnA-22RP1=TbJakX zL$;^r-L15BlX}3HM02C8AZ*vZb>U2iL{lyj-jT7kQj^PUdR@Cy#>lO6N4u307P^%n z2|)XZO%tf9M?Ps7j|Hvx zop0J=J1YjnWNB}deha?2(@c5p#QW|EAvQ{UB{dFYE7#XoGgI|7zM*S6I13Jz>5vNO z4#Cxd+4gugmCAUbWY4W>-%-9D$<1e4UfXRd&AJx-DuYNZFyJaKumO{H(UWQ02Vi*WlXGPyeY$Im4EGE$j02iyTSD~%w?`SbNW+L z%BF5l_IDWl?(=M|%)^~nTDvI@!yOK`jo8z2wE8(A6bw**{REow8Q=laZc^vGFXOeS zw(Mci%$Z4d88k#&x(8H5%wKOvR69)B@5^Qln&;txih%QyfPanyp6VLSywThRQMb*7 zWzwauHa{;$yl@Ipg?X@P8@g2izzb=iKzOhA?vzxvZn{Eth8w+8HQV5Dtuf(FSH5=4 zu=QD7{u~5p@CkW(4Ldn|VIi?Bf#9A)n9Lez!6tl}a!XL1BCXFTlcI~bgD5ry&0sJ6 z#{0sWmLCX}Xu<)cm>(Jem;*>Bal8NDs0txP$v>nkem|84*gdN@McZw#gQ+CZtwgipPS0AW3hdaSxWag_!53jKjv&|@jp)p_6)7&CYq-r5+6v%hS0$y zK+(l&pa~9$fKA6mcCD979LzBcJ#VDL4C}K)huTEZSGML+WOf5^4qEh;fE!p+pncW6CguBaO{|x z05BixM`M=zxC|d}{y$Z2)KlNuDbBSH2c*qJ*{FHq*6q2iTzm0Oh*k44aIJAaoGw4C zolWwHpmqNxe&{M=lI(7pwU0e)s*yHN3w1Vh%jtX4C3K{)GlA~@9RAVJb?cKF$_JQ< z&z)v}2xrhsgiMA5UyJbw$;PAoH>6|kS^&8HUFaW9JT|MBhkTe!uYda#^Pqqy zv%HTxBj{DG^bCD2d~)d>Xpzj?z?5M)jde{E%KaFkpo=ZY(e_+?aAvxRn7n46f)Z4D zZVrlB^g(Ic`@fDJfXbfDlC%I&7Cj7qQWm+Fh@zU1qWn|OT#iyh+wRyb(Dxq9P46mO zOlq;5;etE*+^$dVkQU)v+Dn;g2T6aHvr_ByreDGwW9n}sI*?s9%^Z9nH zp<@YXFVvS`f{{^9(s4J7`cJc6Fa|MF(N-?B^PsmKz#^|(su&WVD|9-6hNd_VD~XFr zKNc@yMWk8Rpe*Wm1yaWP)xE%jdGN`0iZ0tBejNzDg=X;%HXl-LD)r}Tw`qJwIkhEW z+kVE5udF3nRfmfn{`9)M_08t`FQfE2K;xcU^z9x6MC3OcliN?5*9%*cj<~6QmHQ`7 zPf`n_AGE-_e&64f%HbnBY?`=*hi@|;zZ3m{U!H6kFfkB%v<@uk8JUOo5 z?QAC$Te?qVQ%;Q$rflS3P8g%8GDSla>}8i!dBrTF-k8UMdA52cJm1xv2jU>`gthKn zhZJHl@sLdOe%A8sL@em_^S zHjFwT`l`G0YizI8#Vje#7BLmPd-<@W3lRZItH_H3Fy1HYP&!DouF@{}x=m|dv%^Y^ zdc!sDE~u<5F{_4{n`0)8i40v{F0`1bAr+5NEaV{@C8@+v&zEAfh*6Q+z%j|6h~0n^ z<}h?>i~Qt0$>l-MpWfLb*Xz zTFyjt2jHJW?(RO6P|$$r=`wBoxJUGe@Yg=kc%Wb4C?c12=DW;(1@Wm%F2!TD?9TI| zn9}iLt2au_5>zadl{yj*-OauLE|Q}z>}CHHlHYEB<_y-5|K_8%0>oN9eu{wc%nhNH zBJ%2mp$ki3jV2EoSOIsM$d1uu_2!?qDE`8w2dE$zAAm808fs`nwz8xl2&3nk7Mjj= ziUS9}Dd$KnRP0jY!_93=7*(J+k|$CNhu z%bv2`G|QRv7yUV&Iy&(HE8v78q;}QBxb<$YSd|O9Wg-*AS(&ZzDz|Kodc>}2p8k?= ziTzCHCd%b^_MS9Hq@=xTbL6vJ>*g4%$SchENjWEa$bP0(wJ8a+ z=W}?l+iZJgR{<3Mmvg#4@XHlAiZ2Eh%I!_JOt6f;9$#4vkd#f~D z+67UbO&>UR-BSa1zXtL?KKPJ&hJp0%#@+F9$Ki~ck(V7ZY6!zo{Hj=P*iy2U^@BLZ zsm)KJ4^rP=+;=S`ag($Dx_Qm!Abig{Qh-cuTwM4zBfy({FGS67;&|~P>geD) zfylyc=jmCPUB3J75OR!hkG81%gL{2=55v3;8VFiIE+uH0yEVRNP_^6RosM6D+h}oR z@LLdi*QW9BI?BIE9*YHZU@x#sZf}lO2b@N1tE`@(^M_sUp=_Ewmk|y~jBwc;Gj?1{ z!PjDsK>c8^YWHo`{j-(d(mMbc@|i{P*?a^`vE@*}FIEY}@3-hSP>atNN*pY5-JbIb z-A3!dQTo@u?wQ&gU?so>@SQap8C@SRHpqufBzi3Zbnvi+{uj+(S0!4SQas~}4uT>> z!l5w4t2fByXY{5L4*B`NaPPP>`HS5g6 zl8qX`0iGP2Xykg^*ekS0#mtYYeaB5zwrlpjrPeiHH1gGC4nGJOTyvQaf~DnBs7ScH zrX4B|MTDAm0%~wsrvT8+Yr}z|@)YGZ8E1hb3j?WJJCEN6N3R_MF4n0P$lT;&4dP z^Vsy?O-vmlxF`DEu*epO!6_fvLEw{FRTAi0|GWoik9$LnH4a^-I>ipz4&$>cmN_vw zsZBlekE~p#RvFjoQo9CovgEPhdp20)=GlXM=Y3vAwF1N~8QD~dWT%POYI<+z84C~- zc=K>Qq?%*I`F{<(l9C?H4WVMpD&p&E@iRFqb3Nxfn-`L;g1G@mPcVv&6Fr(hYen4> z`--i#>zr?jrZ##r-ux~&_^c$@m{V-;c}!(KKHC2=n_3%gf8^-+M#-jj|9*C}cgs_x zFI|KdPtD51A!NrdGJOZ(>|+qzc9clv=ckZ z$g#q^&TEJ7RHiUk@rQ9w?=+`F?CaN5QeJ)!ry^xLae8m7;@3`VQR6k)8kj#~4IE+? zOOQ+Oh0NvZqi|6{hqxdrkul!$rW=!m%4cem0$amhRG<;0ZG2sb1uIu{|F`svr%Gii zpshX6TV*KgDcWkmsu=}-OZ_SgN`*wYPR5pL{mr?RwC?F3{Lf2* z5J)RW>lQ=r;e@EchAHxhGN&t%vnj>6CB54O$1KDoINm3WEww?)0{zfqVXG?5>;pOX zjrje&fW+#xqY=P9jaNuRhnZCnS^A3}C5-Hh`&Y9%5JaJD6oOh4M+DHTAUVFr?(oW@ zk)?$#jW^hflmuTh>A}_;wu!AJC(o}P`lqCerjd9`>;u>iP~R?OeUV(zO-O(t9IS$7 zU=TC|qzBnodX3wZ@9`|1{YdJGj!yHaJ6AopZaYT24M z;!FHO8>v={zr2A4DMV>H1Z)vfQP311qc6&K6#Y5Co?y-z?DmYDLkEYqcVZqn|=2ppQN7(IF4vc z%(y>-8&w9M2%1k>8ZxafjI0)>DI1X#7&NMhddg-oHB3Xe)9V)c$DyUHm+4*@z(PSo zkjGJ6syBo`0KRxrt-gB)U?`v?7M^9eT?Mz<$J6M;P-3L~_Fd963elXqTHKp&3LkX% z^#HUGI+BU(KYrT6Y-P7+*)yAGC-=`Pir!Pb%;o3Ux+oJ^rX3`ZV3lU^_O)lV*!H`~ znjeL=4j(t>YTFWo9yRJFt$;aIK+G5x9`dhf@ZOVatdw_tdNM4^s5dZX!i|YuD#oeyNVVj=EO0d|eyn z1a8fW} zojNd4^LVtvwo3Wj&=^>2z{Fi3p|x8WihboD*!#LAV4}^|j_p_g6C+~2m30GDGTRqA z3-c_3CS#QToP!xcp5cc^O#X|MS6&*Z{40k}7E1lSLaMQ<(azHby|!FDidf!O7Xn2E&UG+spX_*Wdq?B zWrK-ZG!Hbkdqexc%CjpTo%6m2mdMe^r#*6jn?$VskCTZ1IKLAbu{!0A2dyE0?|2Q6 znf(8s4*ctmK%-3n@yE^8o!fBewSvqr`cH^hP5CMz#7RE-6W0CfpPYl@wKGwB~YyC8oS`=cKF>+el5sb|2O8+{%V+Dq6=Co zZlX(4MQ{}NejIOceJc9Q8Qlc%{+k>7DEg{%=bX!qAYIT_*P zi2Va+j(-o9R|ugIPJY}0f?qBTDkuIvl_#F58u03CrS=rgCO_hUhFoo#wC~{P_~G<= zxXO2=#vc|W_~5mTlmN7t_n@-$A}$0goPsqt9zVb$0awGr|3|_YYC>;J>3oUFbU^eW zMUnOT54Exvnh^I4t>Avj>)l)s`PM{q%XBq$%5{vrYKr-`6I#X3(9%Pn;Shw29lo@) zILm7WO7F;cQa#+pF?ivrV)=LU9{1n@l;_XEw7=#Gh98it5#4~k_2#p-017UX!#6C~ zvY!Sm2^;^Q)q%TLc(L5E>XCoZm$d{)U#JN7o+Ez@PO@*06KYz14v5%v^Iq9Q*Joxc z{Lvl+cg3N<*asElnwk80$ZZ+I;z7*gqlX_)Umk**(qgT=DD5nO{QjK6g#PT$jgbt3 z#)HXl-v2|&NW@}GU#D_|{z&0@UzT@Ywm}<=?f3xWZnmOVEo$+6X368-VBlK;Ob6_u zqm#ZNHMXdJZ9|V<19GR96h>~`8%rLa5Hol@bGSqPuClOYyguUUC2%Cu$?W0Z=)7Cd z%!G^)qr~fpw?lTEa^Yq{2w3TtP12~?;syDr^Sc2 zFvBhSUiJnEzV9rZIP3N43G|2F=NN7p&(L;rELpzSy<_JdoJ55i3-wb3oz>UbkYs3p zlKl!uz4wIFH^?DF`F$C@GjZUJ|A8$`G8AdjWFGKy{Z@EBmiGmm4a@&2hyrI}?^wOT zosO-rUh?Hhw1CYG&0N`8@sPszGiB0+(5sk$1w?XlEZ`)uk}kj!S`|YMa4*TAhvx^* z>@7>ra|Nn(rbZ{+5B_!YPvN&hHCF!flh2+%EOD~`9j=NE1e@uGHNtR(<1Hj(EqeL; zX}pMy_jtoU_FuIH2tn*v=Ln%sbbL_7k88(fNCS#z>*}mySFIptzXFS_dX|orG}22T zX2Jc*pC)*`5n!qZ6j6z|UC_segOfxeT-LAd;^!i>csdg9tN}R-WH6Er#1B)1z&U7o zOU^}7KCS>jGVl`Pqyn10PW%I$1BiepFSQRI5B1vtgs~-r8EbPkjHm(s_ zD)N1TChGrT2FL&OriJ{$xdh6*!9dB|-!}?QmuD!mK(>Z+_Tzr#BNOoronj5suK3uO z&5ntJF+efQ8YT1OER5W*M3Nr*_EXAWS48->aKG+70>EgyNf}(izSNuqrjrsz^0{gV z@lZVv+5bCeQ4WF@M{!|dlBr0Oz^f~X7n}{;ombxXQq?l?^7nz4xasP5*S)EDTrtJq z;GjxJi?-p5$N&>qnfi7@b27CsH0!~@4Zel$|LN=81EI{`IBuq4N49n%w6aCzk~CdR zq0-LUlu)_nKHA(e+5iEhqm|TSNAuIMhVt zIz&-WK(}P9lsy(d<0kw!V*L7iDz5_Ao8+u!fg03+>fBRE3r z%HMkwyt-@aI7^)iexWh{OtR;CG+Kza0~1MbIG$#!Z&SofR=96}Tj9bPbMG+RDJ|;N z+NX5l15pn#hh6oXiM4tnPY0lpl-Ag)i5#;R5#ikJ->h!#Qf3Eit;~m$B_u1iNP2n* zM*xOHNl@Z+))rPFKiXaN=15zSvmLD3?)zA?>vfig-gSdj$NN7d+5cgS{uZzZmNmR> zIK{c#zTxey=4Hq_c7(B0oT$LOfUaBXYjFr;W1e#uepyTFe6eO}<45(P7$zB+qnW{l zo9L#47D#mUKI_+_I2<+jQ{#wb)`fGqHdoGlfJGGO3)Xu`J(W>!eu(7Gq%y0b!rUXd z)Kpo9ovv@fJ{ToJ(9Ha4#`QoHk}_=2w+ksURzw}TQc+h94h{}I$md;rEci;wJr>Cc zc+7c@FNz_F==$U&(N+0d*<+>|d)EnkUm@oSiF|RJ$@~0K>L| zP#R*~Dv$@(Ij=w)ddVi6(9DUVO00X#tPgF1#s3G@M)3^+pEyj1>|L|i9tZu2D=?raz{P3H1M zj_5|DgqHTl7VzYzW7Mi2l~UEjQ$moNcj7cUSVuRq|(2MISu65!mnQGhU9^3*g}0 zt3CERb81x{$L%4yMl^Z`!r9;M9$T1iCsb`Ox{29Y8J5SNH@W3q&XpBcv+Oo0~{A>U};; z{qf;S_bpB^xuG)KxJ#g>TIl-9ie&W)R?EQbqQ>)xzH8bXGdI^_=dyU3OH^@el3S+L z7mO6E1EQUlOlT>eRXekpj%6BOfMX5xLn&>vm+16N)Q>3I?8;#ds{^`C+U&RZdV(r| zB@)hM>|TAq|0i*2b#|xRaELS&?hW^cQ-0tFduX6*@TUI5QOs{H<;!euaAzURck6?zk0d7oHG;&~gK zLI&pP+)&Fe&T*t=g{iJ4LfAQ?M!pr2MpKqCngSB z?l_ql5oKp$8laS*WXQYjs}&aXP0g+2XKDBPH9tgOQV{ew6}EY__SbV%3B+EN(sp%~ zigtD0OL+p3G(r|IXEHmOGllOD$PkFpq-U$5%lBn1@f>Y1-4p|_kjNAbI`$v+#fPl} z+_TqTzIW$3ql-0vJY(>BP~h0B;npMDn7x0F2)h|U)!~MQ_k|g$8SpL7T4HPK#$|7}=dTYvMe zLu&)M&VT=wxtu@@g7i9)ncR-&4|TzI(yY?0B`=6i>OkDOTyI*G``c!HAy6(Qw zxJ!B5^w;tfr_(^bcC52v!274?2t|d+nR^g`yycqgdUrD@V$Kq&;?>0$D&ESC)}{5dzw(YM##< zOvQp|&FW<1JWDjkgLQzSH)LqGbQ;a&bklRIZX!)g7pxwep019|bJAnZVgf;!m)7cq zOT!2$qE)PjrhB{frnC@4-w!6neM%(a64j|}`9=1w2Qd?)Up6LB34%aOQKpn;J-a<; zjfEjocgGtEgjGfW~{eeEf^xZK; zk#^C{LqRlPLGZ?ArhhY44D=i=2qMj~)eQ&J9Kn1fhI#wpZv81O#L$DlM3)!!KQI*l z=5;Ylx5mFdO+nPIb_iyF{Tvu%BtMmY1m*Eq4Abws8Aqo?Q5pE>9@L21NbX(aL1&9lP0o{YKK4ov8zx5`giMQ^f?q4vm}J{W+N1z{H7TWq6n-2<9d%kIreml+VCi1Tb~Z>&xL__8^#vVwkU8?0)|Y zOf(y~Sr{~kgQ+T{i?8~0@CRC55;9*81n7(3|I=#5TMKTrsY^2Oey=xn!z4&M& zWv<0UF{T2C1b?_V5EwKe96K-U3KwD%y|KY3EC`;qB)BEKu&to#qEmQbgF}T!jc}~v zBI9|I;o9eHZ6EH}wCEe*rY6n}7fS literal 0 HcmV?d00001 diff --git a/docs/assets/deployment/hf-inference-endpoints-click-deploy-button.png b/docs/assets/deployment/hf-inference-endpoints-click-deploy-button.png new file mode 100644 index 0000000000000000000000000000000000000000..687db6e03212fbe1e7a0125221f9345a88eaa28a GIT binary patch literal 833513 zcmce;2Ut_t)&>kxr3hA}_o{&O5~?6bM^HLMlq$V*C} z#JE^kctwQxm>%xD96Bs45>;zCIZY)wIR;H<2McRkb1baK??34h=xKM~Ni)=9#3huM zQC?FckGdhFOvoK@$n;M6@s&3>UOv?PniWP=Vk}?wxsV~0mi()!?w$5+gFD1r_cmX& zlr&t$cAeI35`l_%pb&$JpaC(Dm3aWx5ad+L`dYzNnGzO_J2$rmsp4b%nB1=L4-w2X zQ|j&FIdigeanUsM51|?x>aUWU`}E7^9}S|=29FRr%vlOvW^)bBi$pm;%Tg|nJ{?`Yaudd=)yO~6hwW^nLsTY=sgk(kW= z_?6u}bo%-UlvPBL??H%A(MDqV$Ghfz4;(bg*+V$Linr0TSYMAKx4{Yc`NA~4@rHoF zZN@z=pD_i7)$b|TY;7->#J)E8ODS}b5e7|>|8!MSm-hs#nbwIuw#GRKiF<$G=|sLM zYs4B&&;f^}d{m2|OgOfv3-4WDO+PUAlj1l6bCxfParvnNwA(f(J86jd7jH1;KBW1` ztNt!P59cSLm%FVVSWQ`$P2@$hLhidK9WSuIs#>#o1dSuf8>@JFQ9?!TV;dNc8b ziM(`EQamNa=bqbe`R1Ji`i?-iFqV`PY}zPaqZ66?51+_naJi@4k8bNM@HEm$TBXmT z3WF%Qd_IFYfu5+BMT-Q9s;AKd9tca5@u+{2GGXZt3)Mb-A8)xQm-Q62;3F%#5Qx=$ zg-CC-+3X`;Te0zx@vi##P23Y1eEtAJ(L(_o<{JoamR%}hqULn+_W{0+DIj1e;gTF` zlU%3!J%QNxICl2&G3T@L7VUQ~vUj^)ABRe@c4rV=UASu2hTeTgR*2Q{5vQ;m4L!iw z@(9LB-1S%Bd%zqf^<0UB1)p5*ovgg!EY@*L-V>Y4HP8^xWcq7@{73xz$n5cijGaKU)CF?pDetMV2xjpHT3h^n^p$*Eh9ipm zh-}`$p-d+~t{y?2q;PPU-))b4A>f)FoEp5D5z0R)y`s2mGxNNuHb_!#brto|kI}X@ z*x*H28P1g07K=N!&G!v#VnBq@bMC1ptaL_B8fQAQ&9;1gC^Na#Oq&%%%rgtE?Wc=l z>xYlV33i-$^BGf$!pD09w*A8=cyH@tr}reSgT~jx5|BQhuq`EJz2?Uoy#K&e=Sz_ zdQTKbWC8D7hJg?-is2f|>-R$l)o%ml&$$w=uDoW_yb>2AltE*A#ZdP46e&E&dk9xO zuwvoHw=4Dm^^Qlb0Tdb34LIr7aTyr}-@IO9W_!o_B2tvuwvFJb{P(E4ELW8t@snTu zEMviR5Ou?hRV%_)wjj!{%XEpw8QWTJn}IA!eMxBlYP+V}Qv#uf#Y0gX)XN#BRy5++ z{W5tOcxzW&ISqn!KGzS4AF_B~ZG2sw#XCet^8D4^n*!}L4}?3~nZBCcr;RbK`1+#g zR$<$#uL>tArKT`F3ix&QpCl=70nN8*0y`J4_!3D`0|Vn1pLx>yQ-On}+v@imaH(28 zKG)b~*(F>gq2LZxW{H;SPFqq7W!Y2m%4Ql7AK@8s8hNHgu#Thu!Se zA=oV#HY^BVuJuV!9^+U-)|1R9c}~w3SyLa@WV@{0T*o`qI>SCQ_l{?KG^!j^*gct2 zte@$ncMxY0c}LDBRwrAhWa36~ssTl1noUe)aV1XG+Zp1@npf!om0P8gi( z5N%x>V%*T%+;uH?Rd#jXtlV(je6?A(#0BDVE16YU;&aEs_MJ&yOh*;)Jh*FC{y`2_${aql$MmG!crEq z*54oO@m)_Y5*D-HCs2uRH(f64(aY1H%MH$D%q1G-&MnSO?=R~A*-z9DPnZ+QORDG} z?bl5fN%k;I&gj4~+tR5__Y(I+_uKCAuqQquK3|dei0bQauG2BIFmox+I%|3? zh%N!fL?v9!#Df8M#3ukvG}e6lUQ3}-s|x-u&Tc(UGb^qhvG6ZLPu^xxDK@%SyWA1g z6$R3(QPT=~53B_bml|yu%kxkh+t>3Ob=L8?+PNb)R<_w(3f(qcYaBJ)fII$S#jO!9 zcumuf89YyX`u42$PuA8~5?5=X2fVxlrM9K1CWD=Nk=H*=3$J{wNfd$lgn87H^|Fs! zR%q0h=JP^pMvkM>Ytx4u>Y)M=-0Wc$VFx47Zr}>23oSVr0|@hB!XhTT#&OMxbnIF< z*?lr!a>6~-45XwB4JDLw`#^B0J3QHvHI>SN75vZ%j$%*M*j;-Ilj zNiXW(e-d$PGWK(9^35W4)nrH0&n;~!?-kjH+4tCKAG2n&eJ64rxFL4`iddF?2Ht{39f;~D4!Ut5LeItE5e7Z(F{-16~S8;CoxtqZw zw&qjXQY34aX2)h*Z`dhSFCCgKc8{{vD8X_bjlen6v?o;8156&ObtXLRm0HfSz=%%cXJsq3?w?pSVHj$Rhg zJt_HOkY82)e0lNNOWpBei((!Vqg_Q;O=agrYz(?!VZ|qk6P0?D7 zbI*Oc;iUi;z)azb0(ISbUELkQ+1euf;wNLS+se~!<{r)4!3H5S+&&3D%J9I(&_>Xf z%AAJ_XbQE0vxQSdz)e;n-6s0fC}d2nbhBiz$id<9D=Pqv_qW>N!o8vEoy>qF)nu-S z4LW1@aBtg|uk(v|oj&T1HC$8sL{GnYX&uaIFRB|$)B4pNsZJ#xSOyKGiFb;5oHrbw z<`Sn-TmzZ`i@;0n`6caD%R{9lV7gasHpTT@{Co=lgN>SVs1!8SC(^O>q-g~?LJ&;G1*$>%Y1A7chC{H) z4C&gX>JWOnC+#;+V|Zza(?-(bqy)Tlkh7;=+LoHDpQJm`!+Q_Ue!A!mryGDuK-LHQ z3qXHRm22eYwkNC4IKsP|8Yu#puWPW|e!t)sa)4Qz%`e3=vi8O2DtA{@)wCYQW&@*mUBUD_rq zX9ag1LN`>;f}nDS8Bg7?`trcH>2i%o@vhiPGB zo>JJ$YOg;--@X_GyzmJ&b#h71SaiW8<2r%z%V4j{CSAN}_ z7&imw*EU`eW*e5Qww#g@=2iQpv$?swivjqx$3E@iobNQ<1saJcwx@tY3F!x94rY>aZJ2eQoO#7mn zmxT^ z9M|-PgPW@)6Vt_se*637bDDcv|NBh#F8}&km@mkCv4xk9=K=3;$HokmxabwvwDvT& z)t9%n!^|1x98!FOLi`e!1OBw>->3ZDP`!T-8rgSj%fX#k^vEW*2`~ zCNTeS{PK!v<1W%_f)V*xSTa~j^0H4ovA0qQQf{^12nhp>LlJJO+ zf$MIX)a{DT9QZ8Cx5#h1v=Lsxmwys?S5`^kR^Jsa1}?GAdR@tSNdN#)U*l5~zd#M* zugzPPHU>`i>rBQL=$RkWeAN!-F@xMrt%#tj40$K}oNNjQ9w4cQw}34XKrlhIOY!5P zvD?grMTY4rn+O11HUbEAust!Y*H!rEI2R4fHlpT>R*}#G{bgg}^)oCwOzU|uHkyG{ zg5%lOnQgJ0?AXv(KLb_P!D45;n`^Q@&2*Td+0q%(ZS>9mfN@+!$>e__X^GOaiPOZw z`#TTifGa;pT7>=QBG$fcqQnq#%>L(fQ*MW{^a4yh)dc7ixGW;W?>W6c&y?E<7X1Q_ zZ<)mI;13+%0J7bdLA?5F7hM5laRex_i!N-bliKH#|JuCYEFE&u)l48wu_gY?W<9!- z)yuAvh1Edg%gwh~=|2nLT3WHZtY!~SbUVzDlqb1@nX)p1t(IRM!VoBV>9ps!F>b=5 z)3eC{y}4*K;}l|Ddd>g`g?-V$Z2Qft7ykOASNHpa9D^4gt2%Q$OmzcTU`wjDNx@#j zKW5QJ(zeL?WJnHS&g&Tfy0ACW0e{0ZQ_f$nUHHi5pYf4<1JS|Ix7)5V1Oomc?lOQ3 z+UEq0Sl61dMX?ALuxJ8s{?Z%Gb20K%Mo~Fth6+j3zM?Dw7~>1_OH@23`|U@6eGB{2 z55LVZlKFo#$CuOo+d(kM`RG9RMgUjwMU4xKqCi%;{ma_nAPoL6upy^f#3TE`DDfs_ zmz*w%uk?`MKP=<~=dW1%ApN@ma+>!-mRJ84C^R#0ImiddZ5NO1+RBZO2dC+&ek@J4 zmt>KhC0u`AEqSt9Nj5_O!$Iu2w3M_GT;L*T`8DWYSomk4lF4!Ui-Rctiw=sGyS`Bb z;Uiu%WrACiRff>OJhuqxW^gbFxp3F{g)yIF{|$TnbBRpek}#qzsp8EQj)1@M(7bYs z(2}rv@mWGhM{x#@c)}i_5@!Y*h6VVGQ1~5uUjH1tz$_&db|UQROAA^_E3)fBmn#vr z&u=ULyFZ4HHTwTk_z3bnU`SyUy-}aaES5z1$&I@A16?J-7B*dT(9m68Pr`rUVkq1z z2NOAdZ+|U{aQLHSTrq22k(kQgn(%-ttTL}1gP&8EIxxq<>OcdiAwqUAk2|g3P-i$+ z1*}Cn&u>V|ihe32o!FgLs`&yJ_sPy(ehC}@5L6y@hg>?SJK?|QpctA^P0P6meHpTw zUo|ixV#%uWrQ+@oofX-oyB6p#><8Jx->_TkAO>+cN0x~c)>|^zf8+7`Muv>2Tp4ZY zcL_mzJldg_aA+p&&^0T9DeS)*BL0nwTcB$g%nl3@cG+s&6}TWY@9yGaRs;+Xf_|kx zvbcW;*)nLtUjp#&BfE0WE~wJ~5w>jh^j zx^hWQZAh8^$f?d0J^vRVz)&NM$oTajCj`3;Pzg-5i)gc7eH>w-U_RI3jG=7UJ{Wq- zAlN}n_x)eE5PWH}PPXLvC z3x4k|4Kr4xKjdR>w)NQ^w5GoxQScwX%7A8qUK}*dFJa@Kfr{Ldea1@%@&6Yc#1g_Z zreE{&&QPoT)+i=Kh|TOxU6-gNz#!zpT_+c&_*Y2npZ9I={%2`!XW;{~4~DV?t`8PH ztOzbhl>4v0@&ALqXivYutbpJZe#p&^K5_3MIwt(re^77aEf?zUcbsZS`uoK%{`YDc z3nT9S0D{Y|-y~qObO6EWimWImY`Oy!odpHBHihG;F_dizMFCD%^*-p;M5V*VX-vx5UpfA}O&gAVDB?jM^TODkJ;mR4n9#~9Z^e;Id;UDC65aAy< z)oaxC{|yK(L(Oj*9g_#?5+TDxJLaztvFNPCC;ByCXMfE>5q{VUypY920e9t->iJE>ln#Tv&B9 zjL;ALhyDCZ%I=o*Kda?FOx9fQb)S>A1X;a_0aDaQNUgn82$+U1q>p0Mb|u+X>pzo+ z{eOc>$mb=wO9z?#7agQ@?a`MC`_B6rA@8U1Fd@RZa`7v(=Y`t7bk{B!#v=a;sr~c5 zp)vm~&5h7CrK73|ytekdW9LF`SK@5_AE?u>8Qf6m3(Q(t;hO!0j5r4$@}A+`3I0n3 zLAl_HKlnxc*=>uTJBu%1z&+o}5BtaUFAD~wr|x0c>+fXENd*R6v5tQ|t;js3_s!h_tJH6-nUXwUP!dgsEW`6rhZ4$Oz! z(6!YCzX&pB0G_}+pg{416_Lt9@UHLr5V?9@BPQoaa2xf=Gul20e& zVUB@1Ot=JHb9b_Qtw2Xv`+H*e6lIc|NQJAu<@T5Yj%T&4L8*^V64lB0Amx9fC}d zP~LrMxfBB15n0E|NZms*!K%%wpnBjJS!cJY$F8k|*D&@1R%bRXa#S-DN8`OW?O-(C~`p2oC-3;b~p^HIJL z4S|NG7aZzVWYCW>c9-uLb-!;(c9Eh=IMl@D-xL<6mF#;*dXWK%eTj7$w=ZJJ(9)gH z|BNE*9EiRkv~19pyY4}~mu;yzHxgZ)%em`5wbdelX}q;5~e zr3G^0T^{v$o(=<>+^)J`4eRj=zkplWllJci_moSt#%TU@RtDqC)Y+afhEp_La0<`s z{{yGcWixMpCmQ#?7xpIi~fgQh%gB~Y9@Zh=Z zVTi1|0r}+XMb?-_36V5viDoj5Hf#vg!2sy8#Xb<+aChQacV#5!A$S8T1`(}zs>Aty{XgNW!Mj?g=F4w z?hwV^13u$diT`A0DDp}Z8T?q*tE|&gf>(RsWgc~_ds4@KsSFtU3yF+i3pQ>n4nq%|GQEV zD*1~@7!p(OAfABTy?Xe7QWFzoLxvJ4F~S5xpB!~pOy4@|^V?267ek#aX1iMCx_K<` zfKZ3klV_`pc#@Ok_1wx$+?MT_*c*Zq%Ew*`)Q8c=7Rl*kRebJk}IKL|pU8YMTp zmX*A{7MdCNk7gYXPXeu_r+ASRzPTVSj;33d=qU?=`EsYKt*-ev!5Fc!rF$|mj^1ge z7Qy~M>jV9N8dl&s%$9wYIhQO=wXk%U3~k*sj+z8Opc=DeJ7i%Mph|(N!QT*ZNY9#q z?(5WQQv6d6{Bio_ zl>uo}5oKk+Tp94-^2z{C&a<(S#(Mt^KatbDkR4V-fyOr-D)`di%wtIo^WIpuV*9uL zbWI?zNfKp;lm~!JO4^wQp`wf+tRg@d!F*Re{Rze#7z}IdKS`tXZrL9LCXtV}`sd?Q z6{W$3;!qX}le<^9eNLh}%?=azu-$2W{kPmc_gaF-`v*L(`T3`yKbXbxL&1b;{-{$5 zv5Gk_Y#7H8VrMEdh{nUVFoAe^X{k$1#A7-KR>QGX=rOqc9w!juLFl;+&_zU!OTtn9 zDe1=n3-v+_Cm;442-Xtnkn+cinFqw9{y`<_;r_#`?Hj%RF=UL{D!Jt+J@-mK#&Ji) zHo|A*UQL#;>MvIqS=i5&KSJp@X(3ak%}~eC-ikpf-ol9@_raRwgsAcN=cB{}cD(E< zQ#0Hmw8S7(tsXvEBkvC=J?d<{KwsM)=3F6YlhE=HV~&jNjrEkR_Rb;;`r3MK0nHp#b%o_!(cTFt90yFt7zXZLt1buanM#EbU7Ejx&@12l) z0|Wh92rz}f`(()9*8?wVd2Ai+$qZ3$hmXr8pUxvB@->g;Id!x0&ElYvP;kT6RD)`D z(Yb;!>L{}aG3Og-oF`b>xiS9AiebE1t3rV^uM2i2)^T;QvzMJ7bgSl^fZkh>3zB-1 z+*hOYTPFZ7FTZIjtB}omwE{^$lKtMuc^aMFfRC48-P%FAzoo6w^ZhOHL#EY=bJrc;@&FbYG$KuiJnr^xvxU@2i*I` zMpeoNbUCkIdT~I3=qJ3Ayo!R-5(`{Rq*OwjyNIJnOI_g3sK8s27Hdxw>$9CoGeRCL zBoV}4W#gdt9z8L_zx3Od*80eD?iZ{D89i^|q=4yFg$9P!VCmQ!(wz}r`bk2PW7=9M zivH$^V1l>3eL0Pjg}S=ohY+crYH+a*B#Bo2c|EUb^|tr0Ee2CfV1T; zK5UhNfcT0*l__xJ0i0UB*|~XrAQz9ALMgf0GA|n>em45uKD~Y2J8J7?;gQN+$WKYC z>YZ$&DXh#7R19sg(s=v=JyzAQSDCgD;V=g*je{5HC+>A~FY~Oqpp7m)S!m7$aQwjt z$~xT?uS{c0fF4d6!~&G{EDQsbwh7lKd$3(xp7fEwJb5ghMI zH8j26A1>HawI72y9&3IE5(Isx9}GLX2`y@0S*tG%6U-_!Zd?fY1;2b5?)iA^{2@nKwCdQZnuqf(H{D)rLY zW=VI2+O!S2-nX{W&ZneEzeYi%9j>AFY;yKWpjxV7rG!qe9>o;;X{mv+MgNJYsj-^O z?z-LJRf%+PBvCH9t3Xei-YeMF0Q}JHxaqXAIOues=rV-V9AI@BkRU;N$*uH z%pxKg=J8K)Zhv>LGe;^Vhe{@3rY>n2bl@m`A zS?ST^rj%%RY`j;3+g#76XEm)n>Q+`e&6gVx2X2kBa@#RP`nO;7cy0}cEXSvVLZ|J8 z+xp3AJO!zrWSnqnD&bHPkkHWRedQ5>-u>ohIqx~#PSMk!%6yFMW*^Nqtf2!OaJx8I zYcbSp<*CD4M}-ikRJl)>Z<-NGl_^n`$Xw6o7|Qb?q72mL^kfGJi%=`M*8dWq=ha;w zH^G;6YFuX=nz(wpUlWa$g24?!C?QVmI@bl%Q(+4mnMuyPG)~NNFFH9Yb?R9#HrJ?L z(wXBd^E7xtls(P&=hgj_MT|038{Gk&|Cn{rbm~u9PGg)(5&2n5Vcs>g0B-j_Tw_Dm zAO6hvIcJAw`jW!+n)Z45KqN%S! zzj>FaIdqs$s*f7kd+(%;{33Hh3$Gk9w;y%0lu!(Ef0f#L^rtA_1-^{(s8lbGYjm{E zqnX}W-%0VS)#h;VO3zRzN2$26bnSSZO zqx~CSL>#1ddRIoQJ#qCmv$7~-JeKCj5%?TrCG0SF-vc@=R5k}$b?f1@%eJm)or9xh z5}uT=MLycDlPqEBPX}#_Sx@a^O`;q&W*;@3ZS=kokIhC9|Gayo5s7R>t`s^yZ^fCb zUIgBCN6YOWzPoPyH8P5~os7_a>dRHwvv%G-_qlpY9_$tv;MOa@(85&RLLJG>YccGN z+~Wm0N68Tt{x$fxU_KP=&FboE6DH0dXb*n3on*~PtFoDl%h#Td+F$M=s7`ILJ-3;L zj-0h=JKH&=51t7EDc=We-02~=Z#v5l03f8>l|%Vs^9_CYGeVhRIGA z!=MY!U60{_1>|uduiNIh+f4g+JYr1npC!OPp?;D$VlD`9SFt>pD2A8~r0ApMLSbhm zsPj?e^P;rmv9uBE8oX!rO3QbS)CZx(c*QSsyzR30QSmCLNs8H!v6TJlq7$Qub9kVw zk+@hfTxY0G)4$sa41lq)u*{!ycG>P>okUAH4*YLX0tkiaGPCkEU21KADYa|XT)OUc z;v4)bcX*=^!Pi0!O=d}2yuL}nm!s&ATK3>mYiSCgcOs3KJn4l$0h;I<`y@tE< zw-`HwQ}fRG9A33k4wA~cfr~3L6tHl<%fO3VE%9%Zs>Q>t*?gWXm$}#0_?&9S&{PsC zR*|bBvLaxJf-Mq@PaDB*N>7}Damcs#diY3mg;v_6jMQ-qZa&UpD36iKktiC1`08VM$P9R zYWSUzeGDR#(lenJ&}l;Zo1_%3GFv{n z4G{sV<%t@Rue@36^+C2$m4rgI8yc z=_;mfR;K?vQcjX$t!)ybf|dZo>gVmK@M_CeR@o0#oLWy1L_Uv`(Rwr|3HAsNM3msM zMv2|d5^SfeHCg2?k^TV8|FoT=+`2dSp|!)di-3PpDadTj&TkjQUITVL__#mjzMzE5 zFh%xlPD-H^s6mUfW#z;Xvk7b*jqWyF-Vq$jA%B@K99Bta7P=vZ2k! zpe5MvIDSz>RH;C>l(9nSo6|s%J|-tI@ol-?g3yE&=jYqL$lL@ zK9k&?67hvHLHkngiOf1`XU-17jO)9`Q%f96VZf<6w?dkcF|(=nXo-<=y8RxF68fL| zIia6&oE8K~zHfc-H=U0w4jW32I^fL{{w`w5NaOYJ5B# zT5su5yYzEGHTF`cE}9Vz_?*`F9HdCTG?>L`rTXJIr^QYi>cM1Cye)!<1i`&MSiy3n zXqo0zcdhO2jjo6FPOZVD`z;~#g+KvdpG=gP22H4X$L`qNZS=7&w??}{3fAX@&Zu!KeGXf@N^1Vt#1t-9Tg3y|$2ZK=Ue1$05 zP5iPujxvrqr}y)98tcxpWG4a64GvQ-V7HjpUNr_)Z6x7F_4aDp?6@>h`9X)T#%B8| zZNI)~cMjph4q5hz8F#_t@eFj%JQuk=EPsCb3dWJdK9wUNd-nq^Gzq6;`(zkVxVRc9-;Lx8F zD16A8#Hr2Jv!ryB_^k-Q1`Qk=jM-G2CySL&+dKF4Kk+!9uP8jB-@P{90`Mf*IgxBE zJ3qomx%`Hx#VKR0*F8Sz?De!#m0df2r|}*=bxYGI3~D5*^ZdmOd>u zYA=41%s4ftRep!ge+^sg=i{5M)JbP#cfU+Gvq8qMqzU6AP~?|b@%=x}|G*-55MM9wjDqR7nnem+SH>t>v?nVpIjNYHPF zKr7Fk;|t&8Es;)e@$|G$Zv^eItkp^w77}qBbub7JSrhbt?6Ie@sl<1Xsh^*AI?TO< zeTUNmvWHt*aD=z`yizX|mlVoTd3KZ@w9iQ?b^i4m;@;CQ3<8$EENn8oR!In$L~TZE z`;+=4A%Ng|!1J5(bMk(lXr~z)1q1|^>K(u&BuNo{wXI7EDQhB4n6%TLq4*6{MdwLr>z7Ytkg8`o|dGpRLu7EjX0n0v1c0%ZKKm+z_U)`T% zr&w7YOK`bnHKYljapAQ@YzP%D_wq_bigl1^i9SVyc#Z~t*rbIzO#@Yg0>)V4yZpq< zAF0rOx$|SLNME3h#7f1Yz$7b}W@5|nc*yg}o$Rq`%bJtk!HSmQPOZUFQnbFfws4U# zFZvu&<16f!bbhwU%NeUveoaWpmy7#6Pv2E`Kyp(hrfze7Y&>nZV9)(x}A0p6a2+&7{#uvEb9G#(|Zx+mrCiL?`YK`N&tZcT=#T~qq7XE20o!z zQ~YvCXeBtxFcBkTvhwsv8iJcO-;7ZBEqrSZqlb`gd33r)aA0jcB=z;=z5|Ilp%h^9 zOc7%0>*sS!yUfn&spH{~$Mms6bXFNh2CtB*X2;XH4FH7m>yDNX9XiW}t_Yy(X&KGVOMKYlZn{ zb@6`RsD`*cg=BJSJN)d~+rgu1RVwL?t>vIgoO)k*Is zx?I%0M^pu`RDyA)p1&o#rMpvl=2U||8713ax*6$x_O`(o3MB(l#4vsX zM~>#OU@mm?E^**5X4xkrdVyxd4kEE1mBk-M8gw^GyU-)55#08MTwSzb9XmUtd=7&( z+Icq1ALSkf+2~rAnS$r%{FXy2t-NB5Jb!><>Qe0>HQgdmU*^&X${gU&nU$|uYiFc1 zf={`0zO2QG9U(dQw*j)CceM%@jhtX+ir>~&J5mE~3fJN2(pI5cq zc^C8_;M}M4G5c0zQ1v}e`w)2w12b&hn~B&5Au)pml_yzM;(N+6aaxZlIl-TslJlPx zR%iA6YzGCWyO@Do8(*XYO}8Rpiu=0YSI-JmAQgjAnB4mYAfMkp>;bUdpz+m*@KIUm z12B4EbROmh@p?5Wl87>-g&3%dFOPkBbE_l%L?yj9k17R|5w4J&42uQLtlKTWV>YOB z>$7VyD-;23dLu$w=1E$#*}IoKpdV&{nlJ#oc3BM^SOB!G9pg0{jDqXlh0`KvzBNty z4|mSwq+VQIu?LNe!$pU$b{>Q+i~F{sADmXMLl>BR`^f0CFctc>^!`jU=}d4s=IURd z&s1T*WwgHk@$7u*trHY&a(p*^jN$E`gZIgdcM%-7kaHbx<{y1NMv_Txp&r|(y-jBP zhAS1RKtRXBtRMjs&VLlCd$mU-Ex$)1!8olVC3!>+K^wMBWUR0zshKy{*pYYZe2?G(1Bp)V#JkIR#q0KO_9MPbnTI(j1-7 z2?xXG>-rwb&t^i*<`bnsJU>&79I7&?C& z6+OS=c@UoNd7l5#Ov$LUU<0JRx@$SvIw_ENXLF*Mxs%2(Hg;U~{$6gpLWFvOf4VvW4S6pE9l;ptOZZ!Ew41IXO%Woop`Y^em zHfq0E6>nCuN)wYIg;#~n2qx$`nlxtRHmv>_dRtD|2LGzn|0QoD;hr44zP&e+*W(RzHtD&M5BAfXRHc_x>TA2jSWy3)UYFbM8BsbT9fbkO+7 zo!q~w(yD!9nSFni%x*@WXJbk>LcHLu3#{C{SA23{V$e5PRiaS0^b;73GEDP=<0Xn8 zAuFYz+5EVxdnlq&|Kg@c-S;3g4RGBdWRy$4gj9OQ*@}({cfZbrE)Jamy+`=n4j=ZH zh5grTD2WdDkr?*x5 zl}-KE)THFf#P%kkce*I;AY$ge58OU))vkh=mRa?j`EhH;Nm{O^#n(sz90-++(Rqe54NLagz@#eZ=0%rRog{nI4_Z2li1eKu=&l5UFbRS@Iv>Ge zRN62z8)D~qKlxTY_%8L4#b|$4+I9s`;DLb3IztcvhT+f=lJRo{o}C=2cb+xD!q^x* zGx2frvQQQbEsSd4v0H2=9z#v$=p`E3UXTrWTiNHxqUS)hQ2ACUOR32 zfup0V%x;pKO^!xnJ#76d%wY{lraaAWKKD{h#u4n}y`)4I<9FoTAMPOJNUT=&To2XL z6+d2cND_Upwwz*cW%3x}0jfQXgUkZbcPBhg7lQIL-3F1B))1YU6!+kYxqhatX&}uu z6sBmKd+Iw@iozycs-t#@9TB(U8a`~SKC(>nf9PC(^vGPQL>E&Et!;$N)#B%Y03Ia` zC*NJpo+ZGZOxEwM7%l~zk=}SF^a@y0Z$IfFmHc^&olUOnnzAy1!z^*4H=OFtju9X`4_ho@c4IKl^mg(BzucmvrJ;9|um|+>~=vC)4RsJn(xu3-O2A z{YepKW^F1#D-I!#LdNmJ@!0(&>xp6iA6NA2JiprZUNcWp%ky9EwNMHQCOveScFQsN z@G9wptaNL^bfG`6@^Q62G-=lhv6;S67s^9+ZIYfqa<>bFmoTJbw2v%o)Sh=9K2@lzA=cQRzda|B)#5)0-u+xcAiXntC=O4^ z5;8oIq{21o&uz;2y zgH|Z>dL@q!M;I2AU+1Ogrtg}ZAefFl(Fb0FIDChzi|i+TE!rh?kR>DbcDC^MNfL*A<)J6Uylzw`G-9&(nA^4oBO3JcQy zcLMSeQX5^_g=Saw`y#C{*@F9=es(^pxl-K{r46yV>OnwyDk;?iLc#BY7khF%$ z8t+?6D0jX4EO%u^d=Qs5!6;8NOPuZV+poM_ZMwBU$SktF6A!TQ zW|udFKK2f&Hs?|a=`{a#XS$YHyQDDN)DSL9uIP1gn(MZujqoUleSPa(nfc}&N}+w$ zQ#iyS#V*%4S)&Yi4pm9bn56*vsyCs|RE|%ANy(hAc){>vqNdGuh4h8`D-HV%oFOoe zjeu67fjU#o^m$1$SX~!;Yd>^1D44`8$bZsKkDbiAd3pafm1q`bpao&mU}NK3Dw0N3s8Ty% z9-dC@fBLLxs4vW2vSu-6&S7gu#&5-f9~;7UFAI$d6u@QQhDJB~*X^)R?-%G6^YjZP zZ?e&vJuoDoOnB|TZp-_~5B(^fQB-mV1aPZU7r>?eS=p-@{G7DtXxb$#jysDV7c7ZV z^K*G=E2T*A<Ad4x*Vp^rZ7& zch}8>J=Anh{f>VG?iAb3j!}<_QI`~2$w}aDYYA1K$IY)%Y?MK6ITZ?a{j^5T=ALHj^J~|` zSCtB1ey3bHd^6b*;l!_<=+YlavYHBgLbchu?%8tpqAJr#=l8g8HQgndiq^NxXn1|& zOKY0n8RzMGC;jK@#BfeGLCqRp9WhQ|iP2NG8!j2vqYyQP;DG$qIS;XERvdS+WYygF z6@%97KCmi}1;$+47|!J?zAST8!_Jk+?!k`tF>F>09zr$mgzW}mh0gl4QdR3|MeM_A z%=o)koX+f89Ol&$^7?!C-$0KZmBjal z%2u)jCkVU6K6urrlx2S@CbH|j5^fILlc%$CQi9kmlZgQfZKIrdDfc5KB>K8ykU}wa!bu;owmM$x}__N_ALI z+H|U={fH?9&l0__06&0D>SS_wnQv%LA)^|1KBwpwYdxw!c*G88b%tKm8?@t3-1~pX zde5LHzc*SN1Oe%aiqwcGC`j)$AOa#yq>B`#N$)j~C?Hsn-dmL3doKxyfYi`?m0m(< zA&`*d&F_Ea%$%9`OD2D7T-rh=Usqewcnr3V-*r!)&@9l)#B#F)2vU#=R1wlCZ3W z!-KR?>yS&opDSy2G}GiSb~CO?P`a$~Nq7SI%@i$7zX?0S7)=h!!iQe|zMUNO6naPa z;Gnaym6Fop(QD{}N}})e2+ZEn3K!>x+G{Hc#c;+5D>g$hmRSprSfxdkcc71K&mF|Y zGy3f&4!mZ#4Ccaz{aLOgR+o7WNl0oarULA@@uY4rT~B#eQtq^zM)QzsQlBvW)0F5N zfKlEzi8pWMid%kvV{p5_!z1we#h(y{65* z_nx>dP7L9iQmunkVb~M8fXskjmp3&B9uYfq!hhA-@BI8U!fMx!3rOA+RM=&$J#%4q zz`A06gOE}z%x~KZeb0FL{&qN4ffhL=LbrDecsHr>y4{V$NNjSRo~iXZj9=QV7|J-! zbaH($?AFu8d2~!MxneJPInb@V+!_$9|3-_2-M4k?&0GK!FZnpAg?xMXw2~VGD;9f#-WkR`7m?C5vQxC|5UG4``n5pX^W+$%>KQn%{)DuIaVVo-6fu!A4j%+pO8=uH=NK*E4EO&HefBr zGGS|&NkIqbbM{^huM!1^JeYZxc zm<{zJ9V7^zjr*K!_5aiGnEx_PXAz_fLUQ~v3kb5*GH z)cvTMJnI;_<%{|bwB|V>)K=ndrp*CqfFs)lT4otOcbX_1GKsD&Nb|EVttPc>VZ+;t!z%CA zftjS5YH)%z_{NCqV)@*fcQhkyg+W;(xTG)|?6u{A#}1-SE?Ipx^zoU(mWJ|B2p-Sm zDUO?;&FT9w7!+q zGIDitg-EqKRx;6$!P874c1efi)6LI27+s+^+;@_Bn)fTRxmHFXS>}kzy9*wp%$V%8 zS~xMK3v>S6sgHt!bTIyOCt%6TWeAP*(}9SeeOfytyP;k6Dsq+E3sJWX+TQ9ovRrx) zp04xy%UXt|QWNG&!w~DDqzlNsZ|=NCVnhK%RY&^Yta6H6{pSze-P0UWusuDTefTWM zX4Xdcf*A+KbcQods}9J_ZawyBKh6k|$FC>N=Z&FA?&OAm?u&OtPFTi^VXrEx6rVE# z%|TNv!S*}2tAC+VSB?VJfzTZn_ulxPaSYZQKsZ+Os~z1QW>&mh3V2_+4tJNIoiU`H;c5M&qeq$!5a zQtsbRoo)56pMIqiER%&eq>~duF`|VxuSKM1+N{_MHV!h;iQ7%|a>}7+br=FAK}HTs z7{U-&M!In<;&1$xu6TjJ@kCb|6WDIh6n+>7Pl;e+%zi*e;!LK_TOlec)i?OK6r7D7 zgXaR^g&~kg8=s(4ilFxLinYC-p&>aYWlyM$=rx292%d?j$X@Esr;nlfS zJ2CFSSV`Gc6$Yv{o~`S(+oNecH$32v0PRGpw@QmF*Y=-is_evm^9zrq+L4cN*6y9Yh^TA{o0~OsNavP+8jQ&zoh`J>vD;-8UdYvGPMg61RdsisTtpI2XrF3 z8H(!eySd<0y|$^+Cz!LtCz{-IZ--drbTr;eu&uc6Ccyt3z5A`evbi2xq_B`UwjER1 zG(aLT6j41lt%z+USSA7OE1B36O{_;JJU1$MU8hGF|CgYMDm7?Ms3UXh`w` zm_R}bxcxx=RFo^h`$%lS7VNM^1b!6;&i>~Uu=I4Y#8T6JysIQxu$Rc;bCUE`JZQYl zV0+WAhbkkVSIskuk+lV?gV64^l$c#3Py@U?Hh)EN$9<;Dqt0B&=F>jv$myKMp#XbS zFF|#;A!lRMC({=3-p5M&s9Q@8$>lHVRr>Ypad=|a*f)2gb@D=)_ zs#FA;#11>GZU{NScU!WT|}+tD)CsvyV! z$o<^hR-b`gGVbNPH3&}o0ro-KF=f8o>Gztye9*t=TpLxe22{|&CA4F6(r`i6+NSJn zOD>Gs0(oY?g2?3ef#c=30!a@_c^m!(JC?uObOMN)U2Ea72JOgvsK&uU&a=!(sA8%i z41mIU&bYf$oLP!kfkdNLid0X}1;52AE55~rTmNeWShU02N&78}6v;0-M;`YzD65ht z?*Kh0@ujY~n2&}mhZil{Z;x4d`9C9psoAO>JCCgfmaA~1HGyY7w}0Y00T=uK;t1*m z|Fw3OUpxD+_@#O}?qtREjK8M}c!Sgq=plZ#nN8SkZ}wSst&ZUyL%t(v91XIc+J62k zJW*1Q%hEm1bkks0QXYQ@=Gl4VU30Yo9pf>>QJD!oIX4n4>Yu8CEt@U%S4Xy=K;O1ZJ!S=H|=8`bjT`o)b;xuN#gZPoYOF~L; z`bCAhCkW%1;U{o_>$7vhW1tWzpJyp77xzTW0(%CMNht-?5m%p;NHG|uEmy{uh)fZ( z$Y%9CuCx!+&XoGze$=p;1~?irs(Gf~_QV;crMVb@@^5qhUSaKy<*Nf4TJ$JUP`3a_ zfu{O4_Bl{PO}^y$ls#ugNGGtX;uLL8qNfdW^%H1on5S@&Zf6E6{v z_9D4KzWul{(K@3-os6r>cA20ANoU{cx1qymvA%f*zQ5g+hZ#<>N`!T=@bIC`X~X`E z7Aj}y*2$UP@aP)J5K~%dbInQbr!_UNGffYs(iMRnWjT;x#ShUVP603~N+`3R^JBH6 zfY#ymenur!JIT2rM+G?hCnDeITUbTz&FASuFnx@u9$o3Ap6|pYPA-OOH=OjG8dP^Q zb{)0n6H3JQyW>yhfgJ>_nV5MA(zpLnYJZqI-TIzA`pDXY&}7}xrQG;bL8g)Pw5trC zT*WxljlbPtD>&2aFtoRUE9=F@DUB$|v~o>fJ!aq(PZtggBB>{d{AZjXbjqfsH2Dh< zvc++6KR8xuuxOXRopsOVItovaC1#Ss9?&|wUF(zWe~)UxOY$XoUBIortbwgQ1m%RI zghPL_sb!w#pKa}0jElHvrA3n!RI|{|X*whDY5kPgr&m#uO3zGrkxL-B1~H%){tI{& z0j6x+ex#sZ7=l9Q1YW(9JHD9izY8r|FCHGQ0Snq0fHts!`0 z0zROC@CXNAR6vV-cVtqNqz~`Ct1tmnhD5$rdk}V80 z*nGT>&Iwy^ai0;?!8IV79tRoU#vA0Gr!1U4`GzlOAO+tB=e)u%e;}olH$>-BI(Vy< znAk^6M_g8fdvEKG?yKN=u3{=J^Cyy1x1D7QKmO-JqVL-N^A;$h^bd(zYJ&5tRZ|6! zs(-F7l=070?YYop*N#V;`uN+3S9J0N*SXaGr{%n>QV%XmtVFo_9F0pi6{~zHBW^Ks zI%AhtCr9tb`z%=AuZRUMSH67pvy|-uq_&-DOGuI6Ji&B8A;&Gz?3ZB{R#vQwWYdzA z5i1iOaU9Xo+u!Fhg75`NrS{^FGrNTS+2!qCJ^nfAel*+xNz8zc73t*caE^JXJf!KT z{0PI1EWjjq%VwNNeVqB2HbVC4YB)ySf9qZUUW*gnTS);%Om>EB3&{W#`vt63NkxZ; zVoex|uUA5eJ=lan;qOjWR&#fS-~6QD1Wbugri(=M?7a&MWjeDXa+R8A;4uNiXc;Oy z8pd;xg(g(7^^Dwz#pg78mKU;C#GKQaB2X+S-=tzGYn_M>XrX7H)l9X;54VBNzG7)- zxtapL2E*U&Lr(gOv=$@~-UD^!R&GHARVo73sO+`n;jT=l+ze$G`p~N>tHN4Xek~#~ z%Z&h`=hc9Vzg8xO_(y{qNr?hCfmG||Cy|c`we9=Um9O2ZmLJ}|Xfs zFB?O(&!v@=)f(NT+;}86H}O?deGA<3BzK0Qh{=nV!JcNi^NlsmZLHQ_CDO+5%@HnR zp>?_7(;`UTK0_6$rhC*@70Pf#LeR_|ESypTB|@vZb`gsBz@g)rKrpysn>!UT6JzZl{;4)Jc!@Ni z1UGQb*O$}{6C{YZpp%0M{g-lFZ$Z;@+k2}CCN~!L5(>08Ye+8Jk4%|T+Cql3ySHj* zeCK1E2!)lg&b^l_ULb8X_-?zy(1v$h07Yi^)#W zDtTWq_!6ZR5S)OEZF0wV_+s>rD`(WZB90cR=-N<2fjZr& zZF{2=%NIjlbY6gVBJ>z~WUDCGfs`W1?Yx>=o1z>H$#yA1JPtuNUY+>t3YY54G zr6J15nBdDt=|BXfu|^@5S~70myrH;MW}IcfpCm57SHgmP6#Y>E*f#7TDO-}4 z1~`75%`WYFpJh2%=}6KCrP(B}I#g7PH#9K`x+ICtfA*}QXEppAYnnMn(5=cQSFil} zU8}=ZOWEzLM(^jR-AEF5Z^v9=GIgmgBzsjY%QFW0HJi*rw}r zQpLL(!|t`Spys*|Jj2Y()DfBHH#jDZ5S(Jd|4H2ysh6-8W<_#KE7y)aQk)K z^<(4y@ak<3cG;txz_xtN*b7`czb@{~kgJ{fJuINjTzUq8(I^*MScj$KDtue_3#K~fZ&3Z}^^5026PsP{)NWZV)erA`G^=mzg)TUuIxq>~j5$nkisroT zr&H1)J16guL;l5xoWemRHZeq;5y*SPvIX{O#3!K!m6#LYo$u%V( zCwD&E2LQd$Q+@*LNH6cBKymRUN^vA4wpi#Zl5k%;6{2!25saIE|9&eLc$q{Ga1V<& zgeD9yX9d@cu#r%Q#&7C9Wx0E?2cfA+K|s90#c#&#o{GbAIq0B$s^7P^75YWl-9PKL z=S*!xo)U|I|H3CRVz^swqW%gY7kMXV61@NUd1Y1|tvVU9R?Z|NW1^1N$`c&OBv58v zr?yVY+bWe8aQ;;7t@^>m=IcD!XbrKr?^y4~FVZ3T>rUC=@Pxz0Zm!szX7>Gxo!qM~ zPzi~zbnujXL@ZWS-V$V9?x&~SlUZFZR?X*2L+3FmkE(v+@V)=aZTMPH0mmzc5w1}p zw>j*YMDM0;?I*rV1{xWWA%KzpoYS8={oVUfU*L9OL$%2s3cj8OxlXebOf4wQEBgyX z+i7Kvd|F*)-K7M6;kdz6Q(>?vI4sTqk-kg$5{qcq->z5C*BeDZGy5-sD6p8uwiC$! zbUoX>Q=^v$TK@y{pK@h^QE~4MuUiFLNl@I+3h1+ZJqc7d)p?tywCgOh zPY9_u#N&MBXsn;pc3IcAWy)@KnSYC2mE*t^+KwB-v7B=E=vjJn^bAuvH>H32h*<(I zR?44lLqR4a$Hi|_y3Tvh&9aBenNS=)4-I+xoP;VejiH!y{NHgrcOr>wkuS&& z{Sf%VTLfnS#G__;pDnsZ8X86^z{Z7iLzR=c6f#4a}Vi=dVT^@ajiIz3v{>ZRJt^G+k zSMDX_Mq0Ocft8SwCS+luR>-1oXkaXQaw`fpnv{xxpT9*Z{-@(rKan)}_k)j40h^PS zOTo=3;80toX&L0JapTw$r-e`rnJb$DgTgD)G6U!yL@o{ z+mQZ(5ivay#<8R8FJElH+j8E@vD97EUPK;?YG1G1_Ul?m(Y3jnd}6q$66BG--WQu# z1;@S)#voHY<33@sfW#}qL%2tEKw{lNhXWYdXOZ$`aag!)kgx1w4mmG3GlT@#W}Yi~ z_k6okV1cj_>u1!lbQ>)d@<@i&?JtzISq{%ATH;^SChsioa|ay#QN%0#p_C~Smzvsm zlxltRlH*`x!Q)3TBFEJ{J-qo`rV+pV^VO!e3>^@vg`jtAKEe-wBVj z?Po*7vnW{)dljGFKKB_^yCe61%6$Ic%SC=h2>#cf7p&!Iwfb2@a_+zS(zaG*0)1D( znUAOW9r4X*lZ;;mD^hS7pSCM}RnXX@*CpOhb^~7)!Zw5oKEW*V6C6UCS2x7$}<@-&?o!Vs~kJhPPKnFvw$nS;VY2g0e#OsP=QN>IQg6Qq!!o$@pu zch(ahD#x9kn(z}l2g=EVnseX=9uK$1BilU2q8}hx9Bv1pr%bc-Z&Ed^Fu=s#T6ffqR)ur+Y>XA|S^% zH4w)psiWho4fj7fZ`tsf&{7D8g!_@%)x0j7T^i*SB1rMvoNx3=bsg zD++9BN|agHakqlk!xz*}B>r**0=T%MVmc5(B7Q*o3%)n@ zlaLesc9@&Y*y$*qs}O1okic&K?Ay8e==EY^3l^+CuV0G56rp%O-6haTyH5TI$o@hY zL&rey7_=cAMBq=5S^)h>uiN@dtZX01GxWW@BgI>H;~+X!6%(%sNY&(qh#y`O&bLDe z{nJ2f^A7fM5MoIkFDaK_gSnfR;y8kRi!!Zq-ksj@R@-lK8SRuZ)dW|6lQ%bgz!sK= zSkab%>Sv$055+>xuh=9U2RE}Tr}wF5AOJ2J(n%~rG227gOTG&;=L52ou+E3lD4R-D zxXs=kXy5RFvRtdR_aGtZ6k*))tX3g&MTVYu-e8S$M2?t}YUFCpN$*7yMUL9W)z57f zeIOA{_ZytJKE+gOS`JB6ta^|7?^o0N1^lM`-_PU(xqYcRu^{{mKPD9fhFu0J$8X

(oCVdQ7WX?kitSGWdc-R9r}a5Sm_n2b+f6lKkD~y06<%xz;cuzrst}9m zu9v65J|vv0^K|4wV=>A)%xf&Mz<=~FkEm}_-sWiTr*!c{1;uU$*I@Yum=|A4gb~}` zw+yA1G-eK}fL)pd^aZSh3X;Mco8=^!ad6}BmQa?xPe)F`3fsLeZ&l$5 zqndSJNF_TXa?>GRMVgpdxbD*Thc!0n#r*RmJzOj)3v_P%D+;q_^*c=Og~v>f9nB%id!Cet z=b4?BFoOFdMqNMK85dKYk4R686Ox$bGycET*N- z&drb;@se~$8t;$Jb)p;yb&$Q()+VzYx=O?>87vkPT$URmO%Cl2>j>XHi9AO>hjRir&LMcO<*b%ex1EgtwFTj)3U5E+bCV zMv5wwSq+i|Jw|inY-!&g2a23MU>6(kdyWsg^w~k_LQ2-Kx>WIjfYb?o23^`n8|t#s zEVl5#0OYJHb@eYr0}?@fQ5{cZ)a<^5s`uS}F9iStgV%>;p3wWWdkzIEunm>UxG$@G zVBQ?gWP(d68@hG7qz&1O94@9y$nuaE*{@kmBQ^s(0G#@yb70b!V(+hJNn5(VcRkFA ztzUJKeO9kNHK{#TqkTaeldCNgMy3PS>aXDr`FCxbRekq$ugP>skeko0wSg~n=Spnp zACr$g?ZP3KTXa}Ai`#k+-s^IWK+)DMVD*+a4cKdLeDDX8UCfIM6<0mlNs=}+)0N#P z{f_F(Pc+c%(vxIWIErErhP$fi15(E9Jq@>v|81$O>Kl3-+C@9Zg3FoyKV$pRuPR;D znVBSC{l*{{H2L*7e^D{83xb76vvVv6rG)GSEqi*?q5L(@Z~Bk#qk@G`=pHC_SLSjO z9Uc-c^jK3Q{l-L zi1bn3`m=Bk$ik=de5zAaFp=+R=bMFxGoADRC!x=*BU%IUp6DAPh@vR&9czqx|ndu(0%P)lq^q1TY$)>qUf zY8x!zy13EMZOE_pI6{C4pgj6_W}iQH!KIVA{f#{h-^-jUhP|aG=o>3A^vli)6ISWG z7}~a6N$5uJTY1cT>hB^w--+O*w&vk^N((2UY|zDp5;x!ixDw;h7CO70dal&P4LCR^ zy)5>ri|NhcyAJ{W#A;mBJ*G2L#CmuNPZ}KcH*?ty?#5&ZierT&Cv#kVg|0p_4yLAO z_6m3E9(mjv#PL;SKTdQ^yVRV@Q?GrhBl_9|MVeBW71h`j2;sDTzCGK(^?Z}I@7Gz0 z)iomyby6NYmjeSe@Ft5^ZTrX)mCBHvAb6!ep{$u}&ElhX5 zEypQ^HGOM{pnm!I-xK0u%So8l1Q%s7cYH}*RXn(SeNRfz30-=pa8oz#K93n8vh7o( z*$fJX@9lc*`dV2DcYV94S}eL(6POMDbhMA`{~n^$=I_NR@gc$5U36*>DNC=ZXO*0; z?6*0w-M#sqwIz_p%?v&w4t(@rfQuk&Btm>vPgG>^cq;%iMY zm5fk5V9HE48S~ZIN|B`Vh3H3-&ON6DK;11SfP{%<(%tk(^zPSKzL6@v)+H)79b0}E zv?cQEk5S9YkIkH*Pp>hDaFowfSU|L>J#QCOov2?uzZjRr@6%)Pvtyuch&s~EGy@9xy81QqparDX5W*EY98Bi zr{J3i9oU`kxsg1x&w=_Un@8(+Kdv5-QD)BR$FK1aC!Jo8LHB1$=lW%aI^X8{2DL(% z6ucuO;%3;%=G`q7zTC?Y4A&JjE!horh|58HESHOYUQ2rmS!y}m=DJM#%j%6en9mKH z?~(A@Jh>}m!d|Yd|A)8xyvA=r!>~|KCeo%zIh-0@6e(M|{P&o;^xvZa`Fm+XXC$B? zY%lZU;w5kJ$lL1-=E-k%D*K z8mmhTcO02l7S4C>sCi0m2ws=g0jTo&8+LX;Ny#1%ZdBs zir(AjT&V=AO`(WkVIN)THlxxv;Un2fu>p%YXIiJRZ@qm35T37}gR3{-oi@xZp2C|W z$=a;l{mgGWOzO8pT43~SVFob87R!&TOdJ;ku)TB+n>Z!M?8MWK_i?72*7n?VWcSn1 z*48zW1@M4{3U}L+u7$Rk3n67b(p--`WUdNADXrdTdS**@8!B+5qEXHB@HutXT=P2$ zp_nWWv%<4oiB~g~O3BrZv%ZTJW#OvhqP46{*GC_=k}~}h`FV#*!DVuK_kfY}jV@K( zz$&U2GS}K4D?LyW#qx_X0Fu%o<%Gi(ELOQB9@6YQD>A%iP3~=HdfR5zf$Tb6pUu=I znNh#+b!i94QcWF|4%;gbE#C?m+EEvXKE(*90>RfrDYZYY+H3Rf7W>vFx$=5JZrE5& z{Zh=bs&@h_`}9h><}dGl`S(-=Dmg`()eNKjHX};17bMr)IB+5ozWW2))0$EDV?6(g zn2&F~1hT|7Lw|6~WmQ^sl%@mI3mSZ84l{-?=-~*DuCvw7ia_m&Sj$#;VnD=K5danGf!Aq&jVzxUXCa;5fT_6g}d+zB>Uh#=S&27)6o=k&!JGI;Is>G#vB%XJ! zU6~V;_f3;#l6cH&qrgATF8bHaCjEDQXExk=XRG$%!-L^q&IJfJ)UGk8PVJo*H!zNk zRP3YpHQ|wdX;{pUY3t32KfSRWd2yPP<;E8A6{3_FYPIT0VEQG8wDUD)XSPQ|`*S7T zrr;U)g9zf!5G^uyR@a$-mq0Thm$)aHbmn?fgB~d!RB4J*d;j~J&o-jc`(%!^N;*)aQdaP*x>MAcTsNAaMt%Ndd=(uAih-HgcKSt&Sn2rxaw~dg? zeYv?tebP+&zP!L`s7xoOXDj!WYxMg*d4?ZfIs-(qR)4yz5H6;I*D+^ge+`p1fUPHU z;MqToRr87m=Qd5$ZNTQ%@FluuA7aV2vy- z(Vq*Vw}7d&@8Tox^gNgB@;!Kc+g=x-t}4O|czTPqnkGxY>1l9;)Sm(f{J!hff4l3C zAu6^vc*V;)JF)jP%VDvr@ao9OlQW&VrXz#UQ56E24W~89Fqa64a5-lFdb`P2>tUK4 zIA9R*0`e+Uu*|Qw9XfmBhqRTCu8{^ ztf4t%oOV_TM;Qy~Z$qi0#SiJbrtzQA->-L` zh<+6dld$j2B)sd$2KYrIdG&8?4%(Dzmcz;eJ7)E^gY@w&_!`ae!!DWlhGzsRIANJ3pOs!V2-Kvw6nf z?@s1{9M#yC6+6~pSlE$!1DV3_+gM)zG!6g3>&zb$L_cYVz z#D=oe%*Gxmc*S+I0>S6zO|0B)jE5_m($E`R8IY?dZYwQ=x&Fwa~^$XEqY^P&L!@fFCGc{hSI%g9EQ9RrHq=${rh6a*$SF-V1I}E|JiKM ztjn;HE*>NYzsI5ZYl_V(fWr1p6N5x70fSqR1a>=SZ@-&DD3+ zN1t2`9BgQ)I8&>6x-}gSOoZ6p<&I%G3p&~OMZOk#NA%MNk(MsAc@LY2_qSNps{KQx zU4SG3aw+p+_QiXeL%Pf9bqs_@5uAJb8&@tJjd8>(qRin&45NbR{sEP=ol5RVf4L`Z zpnhLv6*c`2Zly3`E7{mYyI|Xp7iBNj!P4wM@!wfRcJf)sB2lkSw}d9Ql|a5-H(r`; zhbEzs$fm@d zd$Tt`YbS}`WVk2!oZ{S`&(7gUoA$020r><<`*3`BE&(iv<%^IEP%;Hs5{WI2Q+iNXhl{fRb&_0S_j^Q;;i-Rx%^GZR=GYgEP2buHuchXRUWsx zcSIUhz72|J81CIlMlO&kN>@Yr6AHW%qFvSp0Fd$m`@sjsp)lu_ueA=tg@3(8Uq`x} zv60<%r3w|>xj0;cJhrFpyO^u;0JH``RU2b2>s}X>8g;bVuJcxTMF?hcSe||Y>wNuF z!chy;QcHXSSGui!6s3wBx+SndhucMV2cdhmIOW=XQn>}nF3d$MV<>RVoWci6&If@F zD`J5tfpARLrr0-;{W7MR#OFui+Ee4RI!y1QKEo-Pqj?0a!XB4ei$&b})2ro)Afscv z@%xFHLA~=SUHkX;{M^fD#VXUxt!Vw*A8}o07h1O;y}Xy7-{C`iRIz-PV5P#iFOmh^ zhv!~-2fb-<4*x-YdAo)&(uT~%5BXJf@&|_ui#f>8@>a_i!eX-(Qa>2Sm~byF{TKF6 zjVHoaxI%Y!l|KX2E0?`3W|!7_7)s%Cphks}aMd@#`X5aG5O)}g z@O`dOo&M1MAkgMq=;y@`!ja53t@XZtq3cv(T*$=D%=qei{j2Bw5l{MJPeETB@k~8$ zltWm~6@s--*Hvf;$vF@Sie>9mD$8(8iSx$N+M%G1d*x zrqtYP(FUBwcO5eML#kI~>QS+cO0FO?{Bq4g)RHp61*ta3Tu%7Y;NePl&qBo^oLTIj z?ONmeo1zJSuBjctSY?Bw)jFA@KQFP{ANi1fv{^R0Nf{>!mtJbdMHA+`=zjAU6}}LS zNdRfJ=`q_QKB{jti`YryhAO4ay`mZK+umN$$xz|< zd}yiDk`ub9IwxQoty(C)A4PL`;iur`667{M)I-JlfN*k3U!CX9YBfB?t5(g|*qz9# z`6Pp+CjPL-e!Dv%E_3YjnZe~Ig`85Gtk=KQdm}X&U&48*Mt^Y3NFmyfjf!8zL^5)f zYb-&?e%=dVKJFu_BlZr%+u_-t_GeoZDk%R>4JAJ{$a2t;ao@IRLj8;dy4$Bro&^Cp zHJz^099}Dowja!jepvYWJ)aFu`ZTWxbk3Nk{yinb!L_G(Zfmg0X{nW=&`Hi_Yv_ql z?mY^hr6oo({!D3?UGCLQmMS)ljwHuB$P0th8U@hEm5G*8B>w8uNPwVc&>aXQ$R=z2 zThEtqC&G}k)oT}?xv@mmAFY25W=txliU1T@T|do;oD^YnccrOzX}<=YZWOB1UGlco zoK6O)mlA9ivBmvSzsp)$#V8fjm;m#6?i!;HKROOIYn?3x0CxZ!SPXKh@SRH{ub59& zPWspGSgyGuuuaPPDZY4YeK!hFv;%ai?@=V7dq2xOs$^UN` zfE%rp3z$shR*cZLe=0^GT2{|U^W4>kj89_WfC3J)~mDyNt`G#udNE^u_VqyLP&f zd@pQ&>WALljQU9Wu8ey`rP#=zrz(58k4?XoDs2pZ7u(OJqjEsLr)7Rys>3E=-k#+A zadR+b90e&iz@O-%*&B+-amGoSLr*dLBEZ2ecg%1`Xl~3qUa=g@){rrwm?kDllB)c8 z^z8GuQ4*a{+JAo70qOjJRnogFU!;sk^9+FyA2_fV;=(@4SiN3yV6@ZMxYW@NS@gYUBF{=ah}i;}mOv;!*nwTE|S}SvUsMQiZH7-U}lSo>0*DdJxHIn4XiqRud5G9(=PDs{fFKHZGgYZ;4#t zh|JZSwesowGdz~zb82eBq-qJWX)9HZr^cnx|E%jhKV3UBQoNWb zk&xcA8z>KyqAwhw5ueB>ja1kw9c8Dg6Xe&wXfFjb6u0{rHbpMYu<<+%x!-ZQ#qrGp zwX7r=x~Ko3WNX0x<`4pi$kJ=`KOBv~{Tr*g5f#y%FHjKvB#FEWqR7t)yQXra&3gYr z&f{JmJMaA5o{*VH@0o!lgyzoBEuFG}_jS^T$Mb5``oN+gC)+W#1AD7Fxo;{7m23(m z1t}$v`b4ezwmE|N>2>pE@xXwS{I9}yUq5LaVjlXq+8xFCgd;hKg+;-h+k8Jwvoz$0 z+_>zGWe{Pz_s4~hLM&ecC&qV~Y`K=K>SIDA0j?%y@FoP?Iom_r4!PLK8B!K>lT{=B{< z0XKnRMF_r^f&JE)HLu_u8tGQw{a@AHr}r-TrIqWHF-I{8@_y{47l$B$qcoP2fP$~G zUaMzMkn+ij?t=7PgQ`ufv} zbJPeky~IY?Kr#JvNg`6SZ zI+WRvyY7!F~_6i)RVng^XPvUP{FVGj0tZ6-@>6{-%b z8s7MEQWHP@)o2m6I=C>iH)8VJn4{Mcxty95-8USuMTzL*ywTP;MdO< ztq+ff4ZRTp+)?lwV5%pQk-5it++I1jY!=HamNgUp%pn~jD`f7x>^`({bjlOXTMS;B zf0ne>>Uv4LH{Ow20> zNv*XTewHq7pWh_j{u#3S_)c2CZC{u9uLKE-4e#4jlp-;2o!=}gDKw>#YNPNcE?C=c zszL7`>~hi0CNt*@zcQ}T5Sk0@gUe1>d;4PL>L7RjNt#kwYzmr9*1t9J{;{_{`_4zj zGiLEJFB!3Sr`JYEm>J>Zh%q+ zO(Vg7q68#NE8jmNqezeM!{Yl&;Vt*VEY}S^Ux{+b)z3V&aGq*qz;`&+^@W&M`CZ6n zDO<9uneQKsl?ly)?eU3xjMqWzP)3^rW%pw?w$F%ab<(H_Vc51a3*!o%Gr_?sJU-CRR~R<&WOr)X=%2_c zdXFiR+0=dNqI+>CJ5$_^iUH8C6H*Bl*|*rlzm$B}zF+KN>D{9MTE4co0{K};{1)P~ zIpmb|Jhayq-QXhg1sKCmM#J-H>mTW&>+~P}S#c|fHC`oq`nTB$Ixnh@gejY{kkL`=@gRx|cz<3#%hVnPrkHUqjU7yG(e7tP0 zF<;GCLtf9-Zc0wuk?sGzS;_FldzBwrpZn}H?O<#>%v8lzGlY8n3;i`?;#|${g?76O zO<&O=J@x6@_hQz6@xc{lYoS+neaVlh8TL?iXNGyJ*?i&N>^&U)iEZ5fV!C|WNPEZi z)9u%NdSoP;Fh4?&Lz{Qo zG6J5ZR=$0`oht24-*wadL|HTI?XB-ep4EYa>#&f$#P2n;fy|Gt+8U zx}g7$uD1+odu^kBahJ5X1}O!KTX8}Ql(rNoP^>r=@=FIyg4Ed4_6LR0zwbuHrh>m+M9qoqWd}Tz$_0dAk=VY`dCsy{R)AL*< zr~NricZN-@>EqUp`OGh@RbHw+z1NrsxXr-U2&h?>EK69J^PkUV$dtqz>_W&H{W89- z+e^|`D0d8BGOxkUc-Anh&W6m;)jtK5WK(d=Vq~-RjG!FitB(6j324Y;$pO~0^@bPRb14ym~kBD{~kao&a#7`XafX6V)4x%TZFx@SpQ3U9*mE(Y zH{tUbVB{zGAfJ%+j)x5I_};G@ql1jK!+!md;%Ve6#bEN-{E8M~`44G@`?pW>p|BD?!tc?+9SIBWx92Bo ztQ5Nz+?UA{#WXB3!S56??QAx6Jg(BN^rH*DA}{wQ#!hUtQy7iPD)t46L*F1oC-Sqg znNlDeiO&qia*KY&rAieeqsV#QEj7AfU6`O>nf+9%cl0EX( zQqjGK2oViq&JMrL4iJd`m{8}i^bxI;NRTJJv(_%&aMy8a1-R2Q^JzvVT+Z~aC|{JS z*F4P>_Z@k3%>2L>e{otlNljnx_<(LRTVI$by*ZHiEeWmPYP;xz+_Ghcx9bzqTU^0K zuFzHLarUQoCkM3U_k8^iD`w5xQQ;V&L*q#r#;$Xrs_@#nP{23r*tJK#R zw-;*+{CwuVl0v1r7fn&6%P-s3#28x~GC$_|WJgPxkUGeu*4%`?P9~I11qGj)$pc8de-xK^=`W?nhC|fZEqJK>P4go(xyHRYyx4IiL z`aXfDnDxc7c2C{}uZ+a^=B9PS7Af_Z=|&Ft5Gy-Uu6$vh{drFj+`R1s0CCy_jCu9m z1qNYa<5(~NXWWoW%%W5P>;{Xj1<_257C9@wA7^$tg&$CkbjPrIY!GT}F?TR0^iEUG zB(}v@?`lw#c@`vMGlX>o4?;5-aPIb}5AYW>4qi()#+PzNVlCn9D%YbwlH>A( zYP^k9H_LHoyxiM+26fDWz_i|U!DMiXolPp}swDll$)8(Cs~w|IIrAF)w>Skb+dFIF z-o6|B>h$t_RosIoqu!m~e?6IE14*&1l9t#4e!PmMH5G?i3}$7v-W3IZQPHRfgrhzG zg16IqPgqe7^>aVSkpuPZt#|hnzF_SmD951Gt0Zq@4iVx|U6LT~DA0S+&6J9sx?;KI zQG?0dP-UT_z5Wa}`Daps<0k~=3NQ09jr8%M9M4zIF<97G=^L2GP;%a9{huh9SMqDb zNr5@P*~Oeoz2z=e1Z*BthKVVjNpcoN4h>6#ExwfMkZd;&S<>-dP0%jQ@vXeAvIp0?7-@jG4zNTvEGsRsDa zTz$nMdASm-F&%>&$^{4R2B;2@-OsqMvd`k6r8*232o=?d!)q5FEu*?^h@ONi4n#9s;%cAo#DJHFG(%$0Qt}H zD*3=_!Uzp(G<#zj7TFx!I@=sl(mDfA7k!9v|2Ahhlh2}KY2?m`e~NrU5HBN#vp-%F zCQ;|Q#J`80J^SfbT>e?E=gPe?o(gj}nPb^(+ytiP4mbGWni?bYq8<(PTN3+gM8t-3 zT4EUz*m-u|sTCz~Qqa6wk=%4npPMEm=O{=@Bs-J(OJz4TF#CtpZ)k6k47)-XxqledfJIJA7Fa4aLlE`$Vy!n37I*+{YD?es^pJ^~j9nx8QeF=o~8a z1ij&lFUUoo=%7a)OHvXgk$6}42(-k{EnZF>6vHk5e4Pql_9E1&ga%R`Z*5)rIDS`O`K!s?x+lj6)=!qp2^EtXyck4;UtB_NPKI$6j{B)Ao=FCE1Fzd^;>hBcI}U9_#8{v?^_*TpU-or8yQ54ZjpA;- zd%8Pp4JqNd7Z}&HjM*1rSA2j&KTJu}(4K!c7|#l1gYUjQO9~WKP7@AZnPLp_mzvZ2 ze6mzSfo66X!0)u^NXcXPQYpE#{pA(Sz*jahox-93`+`J$jmHcvaaXlWr#f92KM znn_FHo?;A|SDj7AQZR1?_F-E5U>6`R~? zBQf2V^oEvKG1R8`cn&zB#>JOfM>+g5L_?|46BX4Yp{t&tZ5ZO~!bW9R8R`BVk?SU} zu?@uWe5J)K%0;-$(18inC2*pgd0&+){jtXCg><9mwWQuXnkF7mht}2H`bOSlv8CsP zREquFvzChdaukn|j74dNc^Y4^%V?HloYUx0i0<2Aw2pk0>|XjFqLi}RM!$yNmtCGK zI-3SjBN>th6JE~-;k||yXLa`X6G6;k^k#Cuw69E_t_)=TGvn=eT{Z6fv$`zZ|JFBH z@4O!H#^pIROI2#ECyCe1Df3k6O3Fwkgv#tliLp%q0W8hG;*uOb_s6aGe2{6;XpYW% z_*khTgn;F~o!T3p8cafYcFwAO$bHMEkxqm=F*ms<1UjaM5m5F!c7Tuq)Iso~TLChi zkn^n%bI4fNL={9b9tV_SGd zypPS@hC@sjAH`hHx9Q8+X7BJ2D?iD51`OdxEHkH(2~=?=I1}y|Nkg-q-~;~ z@^OhG_?PYOZpK*Ok}^G$v|uGZnV@6mks?=1u+4d@T=?lseLx!13R)pnBl``M<(g7V z^Lf2o1HEJn>vP{s=D85QT_Jl%n$0!~4rxv|K0~{@g^WJtww#gvqS%?$P57A2W7XTj zNA9A6m_Hs3R`50|mRIa-;L390R6Gp2l-^b4pFNRzUQR^wT%D(nsN%`VT$2(*n{+Oi zAiXd6Xik9Z80ahE?i-yfoG=X*l8sOiwH)E*7j@9O*tk#DFns_dH?F&L|K-|Dbh(NR zxUH3GMcgYQ%5`hb65c$ds^j$KhQ<9JlMV_D0@tftYOE;qh{l zBC6~5b2d}SOpF$qrD$3U+1sxEjo3RUrB?m{cr#wf`o-ID~oD1RM*-J$bRXLajdBoHhOaTWQUettobCvh+um|M@7dw|{>1C-9NYb9@&%cZOk&L3N*uj-$!F2d-jT0t^se2Gw%QYf%?A3;TIq>87PT0zO`Xam zn z7EC~ zVQ|vzjbVtXiJo}7l626g;rn>a$n)@Js*t(b(7M=*F~8PAgM7wT_vYg67T*&vEohkz ze!P*J*s|&nsISICh~7F44cdBXjy}A_nb|Uas4D! zLAR%LW~M~!x(+p?5{hp5ED`W6Mk?m%esf!Qe*K!8R@$Ta^qF>|x%@>!KN$5HFOH~! z>2K1R&iitYb1Kp0s7J`^{ zA}!qpqwa<-nI&0$qnu##iiY&LH@XLWpYgW#3)`HY7%fCR#UvvIgY!+W1S8lXB$&pv zK36vTIh+vXob0@JnyCB5YTKDmiRy>(>|deQt;c^8waRaY`G+RtEWT7?Va?qB{f0Mq z`sBxtYUeIm4W}}5(xX0I{z32H+DhqSz>=aI~_EG?Q}W{7gMUN#d+LfzV|--S#~ z3s1VXk1(X4ny^>ygpY-H1gvsuSbltK{B&rHHSA+=B|)>>-m{i4R;E&Yp!2QJ$mrhc z(_wQ>ZaFGO^a;1#Pi%@mv{uwXTWG<)z?8IMJXx$ZcomrZEt$hG+G6D9kL=ftcs}oR zf-|9Zu25r$x$j%be~cip+U347J`|{#A{C8%6>O)!7#aZ$uLsiu_or>&kupkw;tcoP z50YZUwC2${#w|-sk`#^StF5iN8b$7`iXIkDplBQPnBFA}Rn|ufRWa+`?iJR1W+w}< zFSO8`_`BI6$9|%Y3?`#xscNSVJCUO^GWJ3omDgXE_BEB1{}sZ%=JfXaGxCcQ%AST@ z?c;Eh%7v~5yosZELAza<@`uIvT}L3!KYKs|5OQ;coqPG#qJpEZfA(A#aN3^88H9w`y>vyKB>umrQv|QF~(y&o)4Gtrv*;R z*8tc5Lp-d@2zS=~wLD~y{Pui@-9nGT;arq#@1`=RQwjJ=dWZyFpUh7Jy>qO!H)c@F3`r zni_$ebDiB)4*B(is!E(Iz(c)#_aC(?MigZGV!3hBy8V2a>s<^KbWlcOoO9QIY0+(k z4~kaCV2=fBY}}0}X{=smF#6wF&2QEo>FvQqYIc9{aAUXO6EDMbZ4l%ae-(AoE&C2m3?x1HV{9qENU9p%A){(-_#Myd!%ql0 z|7_HH^?DyI*C<^muj<(q0GPAz@W)5C@22#sDXOslf6CPVd7}M+h7f4(!frJ<)FFeS zrbs*wttJ)}ZXQk+6bC^>H+4N+`_)=|B3fZ1`J{K$&0_R&P5QaV>Y`dq$>Q!M4T^%;|%LsAn zz1kT8Eb8y7{E5#6RI+gt?XJJX&Tg`ts$4o9zSe&CbN?V4`m@>n9Ze8i@Eb1tUwsZVV)espIDt3mZ2%_Nq4-*$FMp#>fI zbIc8DLONyUi=Su2tt?eQd19wr`3F-eLJlrzS|_D@!xS2zH1D|EAKg_G;Th65e9$H> zLzd1yT7ce)pejv*Kk#}YqG<&tzcSYYOZb+`@_2uO4V^1Mj+&-=b0$e06tzFFRhF$) z=6zGywH;t=Q8P8*Jg16S)Vc3FZ80X&4M z7-Ff?e$2f%+AV~>IIr4LcH5R?w72hkMGaH)<5F*5&l>0tU%+ebTWGSI3m-Z|2*#qV zAjH-a8oap(WZR8u9nKDHRuGajt9e^E?+hrV2 z#;eggG+C*kz19=KPH5c~>RtqS>WHEHkX?8C)n#WWr@;4OnunJBkwZ}FCM zsYkd>P1eZTew_$${v+M0snE_4YrXneATE-1^V|r~@w07|IRbLZ4#Gof<3f zKBNje(`QP#zs$+EG6G(k7vY5BAKac{kSI1+$5s-}XL^uCtIbyUb0T+qJuxEGy9Q`U z3rQ3^TOU1|84tyFzUZ-MJVxWutuV#jV(t`sly#wAF(DCq6Dd~mz=B@>h-*)bI2*Ks zxZ@kNXmI&A_XgbcUlPRws$$*Kj14M2webLEvA0yJw%O}F)d9vc5f-sXTvD1JSw4B@ zyKDqtvCm%xxI#C(XW1{yO$R_FJlWypVDM%PZN_l>y+68a>yZW6-^W`h9I)S|OpqAI ziU6(MKSBqu%j0Sae{T(Nv2D8NfLk9Z^FK1U47M2>@G#Zm-c{(s0uLd}`w${_+wK@L z?Xk?huv=>O%j>Cf1VZAz@sM|4W1&?lq-m73HCOcODAA_0PngE}uQ!RmFS4%=9=@|F z-Lyb_&QmMv^1f9j@9Z9@Fiz z6JTgWt;;8h`;8>}u60nXsWJv0>xvoNGe|bch(&NL>r?UQ%fcM=;@s&@1Jz7xPjiqcDfA%oP-E~`G6#C-*xoi# z&QQIW=?(QqM4yCWq5HY??*NQT_BzYCR^ZmZfBtR4w+Cbwzg*-GNGdidjDVW*ocR~T z0Ud9IM7AOoRdPvnp3JBBIc{<8v$kudn6;pCMsyB{r&oJ!d3h5qvEgevSgPG7z|b9E znl*5s3ac{pKAvu&;?ZsFH1Q=pG9TO~PE(e22`SaBN}BiF|2YOxSrl_~T2T>C+io>W zKCX5N=|0?HevqBc?yC%(>oR+t4Sa_`gq-cS@q@khA%R5YdgYWlzgV~f`J_f;)5TI- zGm-x7^BP4TI6DHUUM4%UeCFGl3*Q(Xer|lKxP=ko{5Zt5nHV#Dv2Qrt>aO@uje5A{ z5KPt=m-kHb*`GgCzBnv9fAnV8!kS;s9tzHUs*1{DPC;0fA1@V{M{Qpo&It_|oifln z@+ec|8QG)bl@Z8RaFqWHNZwLQgtI^y@j*xvOdz4OuVCpX@ConwWwz|rNn8tnSfR$j zs&cpw3Nj-g8j|S2V!GMM&YvhVkkn~j_aZas{<4<91jXnXJ?sEF8=v6RTd8EMvSRw2q1VU}i74gEo!yu3nV~gU^ZdO!JRfX!a*x zgS-3S(D&->;qj^DP3}0~mFBxodhcy3?ze!5m2u2FvCSFfd8;8;Nz6({f@$SUnTSee zcjInQ&GKlbq>`Cp9QpVpJkVjO@=UVDY(un)Bwr;t)QcgO!4Zx8UFMJvS9@0mJmB7B z5*C2jKZvTg%_m7akGxf|NSoxxlDrZzXPZh4+e+lJ?EpnP;gm*GizJ!$MV6MV0oz)9@Eg980v+w7^#v*!A+6EC%RI%y;GomSpMVDF=L`^jQl z@&PG_+5JZFt3QyU(@jx$u)BYw^i2I%!N+hFh8Gn~SGb~~tE|R&9s|V~ET3(_AG9aV zhofmUtVw6zgcR%7&{|UmA%dA_UxLRN7490ZXw`2CC_Ynd>_gXMLW;f(luNnZDboZ! z%EDT`Mi;F2$7nxdR~3`r-@H0r01x%W4sVB%F#H0;kkjvB8bE@v`9I+UaS41*DxXRz zGpK$>c{Ni95)(1MU>?ZXNCC5}W-tXUtsfk)H$nmJ<1dn2QpIjR)ceCE4V65Pq_5l6 zK{3Qy&@S33HlI88#d#QjSUybNnkv#rUiOFkZ=L1z#T5W1#WW{>KUUa8oZeqag%!1+ zo48ikfcwx|+x?x8{Yqn^Bd1<%-Cl9IjXxv|IxKIT-9Byq7R(X=|Jk?3!<#p0isT`8-csTGHuxl7ZK9gvloTM zh~~)q)lYcaJ)(U<>1IHRzV66mk+GVnL*2}uxXzNbij&{O5DQ-08ch>ax5|3M_l4O5 z&uZ%rk*NOAe=3FFVw$sGi%gn`+nU~_R^~;=HwqBDO%iPoj+ms#FB zb{cA3i2@k_cV_Kf&((JHG{G&&!As!!u(3O7ZAk9v$hXu%pNXT0H4sbnw58Ih;rt|s zxxO_c0a)bG5ltW_WX?B-E!#nMOGdM;$Vc^VslRELl6WJ2xf^F(_1-Thtn_EOAdu@9 ze7oON(+VZHu}>X#PfgGVAsc!$*2tUq1TIAt5mHA7kHbam-cuR3z4JsXINu4iQq4wc z-Sx>H7J@o)ud`?HL&IBl_KlyFeyP&$`GR$2sT6pHcpkDc(>Y%ReZKn?nN1TkV?P_< z3%`G|51R~UP9o|U2HsZUauJGx_dN)}v{D#c!hm;;jPJO(*;KyoBT)jcx~FdS8?;YC zmp31VyU#A~eJR}48J1&U_sZED90n$;j+@>ZSu<>WJiW752h-F^iC_}6l(iY79UtzK z)$set?=k`AILG~CIUnds6z}e99O2K`S&L&9tM?!i6_0X2n++}Wi*0n^J2RSy4t>%1 z)`v58Q4|e`Ued#^OIRas7_Dp0O4R@At_ni(2}$_Z==;9o!Zw75I;czF`ROZ?i~FOP zmZ9-J`3DqV^gmHtj`v4h8YqvG`G}AH^PLq2QDH>t<^f}Q!^lT7t`CB?d@is8tx|#% z^sx2a{rnFt!AGw=3M35fa=yDpc5=jS05`Fp9*KUUsFVl0wV8DKOd()vDPB4V* zL>VQ~Wu7rXn}I`xv5<12=l$N&4OJTTYAYV@YGR1kbc=lUSW+F+^A_0n&%`v~wVTMl zWRyTgd&FHg)z?o$Zyt5wH2ga>@cILms~;vd=1%Yo-Nm}Sm$6GfL7 zxkJ?hLv5Vu%@Ob=v0rGzKJF9UC-bGrJ~op{rRjyABGlh{Q`<>79CFm1JKbKH5mW^Y z-aRE&#tB`05lMe9gx@x(k|_aHUcrsbLXDICf1C0A*!ZzTZxcga)@yMNO`P|X-evn0(QGsN~lV!BN7c0dyxO_<~) zCj!!+z$}J0)PWz~6=D|PAIF<1a=?=aoC*d<9q%B#MpDtFm}gDe(J>iRqnZ~`6vfKg zjZ&>I!p!^lUugg}nqmU~Maz4@T4gJwbpN$qffnI(-!aSI8GT11_$h#>)A28?P_b@= z*8AUE{|f1MAa#7!)XlVI(79xoU3;f*o^64`CabPUwj&ktMJyQgAPkwd^K}ZsMrND2 z%QeB0P;xpR_B-KLeuPBZH^-G)O&X)CEijY7$v=H&p(He`PqWddgF{KQXFdRbsR#jA z?h-nAr0*}qa(LYvPjRYSZ2c_;D4mviao568Sol3?B>_KWp*Lt^&|}$jbj7lzf~0DBAdrZnqzn{eY;G7HocAv=%#>=X4r3o#ROsWxEx< zkVST@mrLSEIrow4Q{5_EeysG@7yZJvUsa@zDW>DpbKh|`4`_CgvxMB_WWPR+N7s@U zuBR2m@^tdk;SXm#QWWIEB{ZuziQ3^yAQkXgL!QM#_)9<5>YLk+^$DCK^c*?WfBlm2 zM_9{0i~1IfW0ghY{Sj&dI3d1#S3GG=Q_zb2`A=>p8Jt*-7P{&~ys23272gdi!Dl|t z_uuSMi_EUMf5~b+SUW82In6yR)RM37P4z7XPtxZQFSan_NwWadyf+kZOcmb>+uz}5 z%J_f0Lilz?OHSnbt2VgU1h&J$QfR@K-fxLdnZc!0kLl%sCw_8im!W~6p|K3oui73e zvv>{N{DtA#xbbCR89@J*x_oc`GDc!ssIu1C z!WnJO$Jk%JNV8O8*=e})ebiCXHk^(i6!<&RpPDki?td0k_MfV z_zYgpRkiUz)KB>I>tCN}-pO92FF7qMCG&olL}T*UolUHDT(PE{I@mE3!1Co8t_%O4 zSZ$OR=U0sXV5)k%C;3dT7rk5HTRV8~VA}T6Ec?*7m(Ma-@fmEg{s*o7PMP%Oqh-!! zX8;nBhrgki`f5x0rE+@Bhb~DsE5)OEo0Xd_^^gjGtPp<3%QFfq^)4fUWZ&J;fseYPDo2CiRd^tutAm%c?Jm#Zy3P_!Uj z9mF*Qr>)jZb1^Q75a_vU9=A=aUzcL1QG1id>qDJ{C(e%tBE7Zkp3q8KJOS)uT4XEQ zacF5+1fcup&o64b@(kB*M3M1wVS9^nRS6i}`zPc)c^D32c0>J-X0g20*YSvF6lbic4wMxCQ z+Iot=RZZu<_5vqlcR61N5_{897n*BRl$2_AEEjolCGq9dw3SXnw2(lY zW~?!rv#T&r*xMlFXz@+mtG?)wp~InAQLd|wJi{D8y6|g+nlVjKi}uFQ&yr=JErhdN z0f0Q@3Wox&PW!@-SF}LV&7=lWkz!QAX}fc6|5cs^;E$vdQwTI+1aMB5Umq{YO>|rX zaQ0B+671Xk^DREvQW=E)x9E;L4cOgs>BG~v3E?t!myZB?%HvLdA^f4!zBqio+wr8- z&+Ez=>pijP6OYzR(rVLi%;3pKZF$}JWX0z+pv4v?_Nh_P*3G@b2QRK-jh(Ut0SyaH z(&g{{=^a;t<}-D~|qJS%H&$fgYp!=FHnFh$}{Qx^V#Wa#_g z+oqVkBA%E`W>?_S^mvqWIxAv&18NK+rvAM4w1Z++lR)p=lnyUpn&V^)1_V zn$>eTR{tCon`JlI z;|fXs;=U$kd#!g>uVzeaI+Lzrq7u83YdGYLVfNgWL)-V z%f#YPH}9Xj&y$93Jvw4zq!!rP0MOS>YhAb#Q(Vc!SA-uBrs9LST!-c$7fbe&b!Cm{IQ>fo>sUGm~G1)=QVzH*=0F_>WppdCQWR&d?J~N++7r zf`F&*Gena9)ceamMD1xn)G>cQ-Ds|~Kdp>-q_O#DM)hND9?wE;>h_?Aev1o@`ACAA zO5U>2><{Dk$Dw;otJE8#dUfg>`;JmqU0%)`W7};rWH998Sj4Edk*C(SC9tq2^SyZ) zO)M}4HU0U}Isz<{gu%A}W8+QV#?;RcS7k(?RW{M_!_7CXOJvW5RZQGZlyj>qH1mzP z%O@(`f|+5xT8%-iY~5`#$tRR&+bLLI%(gr4h`Zyf2tSBV6=~)==P0Hq{=Z6qh*3Q3dwD|#-hnAz z8C?aER@jbP04ogpox}G*zCjK2XmrzX5&MirG}Z5`+UPhIi3AiFvzug8v!Ao~(0>ME zV1;4)9aPLG#F6Nb`a{Hfm=ah$oNR>7eZo*M9A2vPXtOIjrQpG|=j~rbut?ox(9B~1 z-=b_enmN`zh+eC+Z1+s)gAI$A^w9p`VV7WvPB#D#6`$6Yu%70XPq~)>k`xfVn(ttL zdLv|KR@_+jCUkqa-iiY9-i2&c^yZ5Ruk2~`o`w~vPKy5+AMf%5jy%y1zztTEw?;gl zUy*fo_@^!5nes<|9ov;c19o)lm0&vbFGk~NUmnl`{7m9P!*g(t%F(SJ7}xPygv^IZ zDng40)?mRyFF;&cA#}Re{x`k zH&T?OINO(PTdZYNz+D#W+*p^sxX$(#&@{_e>I>(ksb#yd@YgjKy__|jzJE-u7trIQ zJM0Ddz5eFd+I+hgOj|>t);5y%hN?rJFIm;1w$&fbt8i(FW4bb98#9e9u3?zm;(kFQ z?c){qwdL$-$kVqaBbJfsH$pgcn!Lt2k^Q3N_NhD4YxO+YFZC$b&TFizC})M&=c9X# z0bFYA-xk>Y54$`wKh80Ne_vxWl`~lNW$yNc~^a1np_a|tp=`%QhJ6Cz- z9c_M@9$hBL>uO%)a!}*)a8KxMz{+&0przVQ#maQqPaw8ee57=|@*5>3P1-HhdEk}V zgh7wm?MiJ}ne>wppi-OL z^!~?3L#v2Ci1en$2^66*_SoLepX@0#P>EnxTD7MJU<Ts75KMej0&Pqh-!c_ zuq;*dE!U@cGxz(`BZWzQD&ELnh9*{2Pq&WdX=J-cO9R8B(C&nkPAE3=6ug^F@oHa} zlTSZcj8uq6@9@5$a-SV|(qi+PuQQHuUJ0qz+i#A`G&=-+>&LeV3u853Db?wC-F$A> z(b+Re1=clemQ|qZ=h7=82MTXQX}1k7c+P9 z=OeD0POR6vPxMXMd^t&Zkia;h_Y3}H<(bs~QXRGz@oET7y4S)76OomMo0ULll$P+? zrzav2|Iu#9!}PG;B@lmAB*<^|^{^j9TMrR)Hg|HMRzbhDy|Ade*IUmt-DNdY@cLQ3tu-Mx4DJ##t-oLF>>&$D`a58DDvw zRepBy`X5eQ0Q2mof6ojO7(H4l6u+OtH377iU|fr-EdP!@4gplt*mycH^Bz8(*Y@!9 zmXIGVo=JoufU)*;3_a^|sO=?*QE|YV>W`W;dDRWBmSagg)T~dmTaitd(5(U5@6qH$ zs}GuXK`H-0JHLSu?tB4H15NQL-|}lXb&;mOe@ow0>Is+!^HV4CQ}?quOjvBN-}47c zVS9b-H*Vj6+Wr05Y{j^Q+9~ro^dRm)IHO+0P`ol_brQgYQg((~R*ZmN=l_=gdH6R0 zQhbBPIgsv>j|pn_ggYeTH&2REs5I~WEsWXDBm%SaOSM+-0v!^GnzJPi6!(8_)7PE( zqeQ?|k2$^j*|qm%wt*0+a#4`F^4hxU_pXBF%7u~Mh(KDOhpzUQK=+DiyRG>E6~I!g zZ$=2KSdiO+y7^9s`fXxu-`NxILw7Sg8JGV2b3jbT1Rb6r6e`!*oL;I3)d{3r3A-$A zTLw5ID~4>M&-Zrv-jJa=K)gGdUuxfbQD@_rR-rq_3b-!z)}72233mQg26V{(X$_XE zCx7E}IJ7Fxn%pG53*oiObt`hX*c0V1K#9`GM-xzxx|n2g-4p|!KCrhV}4 zK600BTmMe@Wsb^%`S9U~2?YO{re6&_t1T2mWZM5b5dOZ2E^m7S|l~}b`7HpHa zbqi55yx*CMTiq8ZD@;+Q&s0Z+2d$xcO=()3lD$hutaqy4@`lmCP?(mDS?!3d|t zC1A%v7nrq8t*Tj4J*H<6wD4@%RH*nrq z!UBn5@zSou=`H1LC5)Sz0D`9RBi;<8Q9L@2mQYP`Zg4_a>C|kjO}}S_ZT!g_5QN&* zzOhF@xKWoA!i|{Eof@Fq53uR z=cxRF()~CvA9}3nHv)eEP~bkOfK~D z_uKuJV9t8&Ds9ByorhWwG3{I(WXPcwesG*DbjEF zigRg~Fx>+>G+efsD$)^zp8H8!9%FoLv=gV!z19#M>dAdenNPwnpHrGEFreb?LHtg0 zYoIO6_u_hTv+I>`0EAMno{)-n9dk)W0wXjJ6cB!4a_9$diA*|LUk9or9CDw9UePsJ z@%BWJ^;P>@jAZ;FKkSc|eF$KS%q%}jU6?r7Z7a8J_A$7KFG$s%gALDGB!)iz4k7xD zs-D`h8QeIxzjI~C^_sZb5Vjn>xXT?(-aX$c7TaP8`Tgj0Sd?CoMX#{V*yPF89$hTQ z{F(PiIRukQv;7&TwC{a}R=FYO>LPlh&lOHcH^~orN6VaN)aAcF{jp0;!Yuw!9O0RC zQSW+L9fWsSDGbm5*n0ecC_MIAJ-WUb9~lUwn(mS!TSyaorhV z6~LMy)PWMyJ_lG?H1F+Y?(em?+ioK01BPWWU)G4&9!~@Z8HrG*h?t#P9ZPw08iW#E zB3E{*9UmAQ1Hbtg)+r@Fm+prr@0cwqJ@bBP1 z(l3zmZio>c3D0Y8sNUl9Xu72^V4?yqelBRgg6TTA_4bKXZj7JCn@yUIq{vX5cs2Ra zwkg~@pYdvgPvS$O$VIT$mfL-I*A*`IPUC}<&aVy_i298xjLUkVufC(O39O7HA$jc< zTL4xXl-+5*2`uUhGm!2VPXja=_w(@vrb+-80g(_1DV}AoCj52}a8hY_1gwTg`d#v) z!dG@IF1|s(z4-L&8tyUv_Y)x6Jn0_$6|vD#A}cp9{2I{C#Y34Q`7gOi;U57n=Z?j?EcWMfUFa z?pnRVm>F$KPd6#1uV|spf$QD&&?_JtpjDA3^Ss8gEl`fL@S+aYeP$*b-QKpdw2+>9 z-<5680%EBOV9FB;9|BLEml`kZ3P{nPs!w>h{juxbPS+FbXg^3ne`Y`U@ZTGeBgN!j;wFx$l>N)1S_A?t5 zy++y~eJ2>fM`vS<;Nd<+fS!K$ZMwCR$Jin8^InnqzY!DH^LPM5(D||0b)`rv5v&CbiFEOr_Y8QnD3@R62QZ# zO9{3|hT~`hws?}qi0~J*Ft7+p$M*yMpqj2RlEUVTijgC&$H(BC^+&<)eXq9TkRRdF zGI#XiemWOl;*@+JP|rTJyiy)^oy!r#8YInxOMi)D1jeubL=@bcCWjWBb5_3uJ=vj` zW}8TRFl%2lCO>!WtIfQ&_L7j=lRq^lf5%RPml)7mQ!+1UXZfS`dC|o8ryUC_R0IN` zB9vzS&anF$L+mO6tI$+;j5X?#rCMUl(4QwanB#L*I~=Be`RSD#7u3H~VCI^?Bn_1G zIR2S8{E@cT24GSDKkU7AR8(KvH$K1sgVdmOhk_{GNDPfdiIfNfD3a1Kd_GEXJ-6xo+ z`iSy*UykWQ+owbxDbQ&jr~|)bz0H}Mc{V}JLGR`r)$kr(Jx;Csp)S{wucJl%fH|>( zr8;v0cLc&l(~f6~GszQ1X^|IO`j6k=4%zc2TLY&E5L&i9{e0K4vVi={-BQc+hRdmk zgjByYF*7r@k%y?20xS>xqN`!V)YY>zq$Bc%;-vA?QTdpsCl<$LErRSj@x3xH!P?lE znXZ$#GL|Kv9%Gl%`}i@wE4ZumS0|d8bn9Lf0#e2q9g*|dpV``i@mzWiSy%igpV~F? zr1zITbEqYMy0g1}inM2(Q-J|wm&qVeIpBYOEEN~lkoCH4ELlM7UML+0&vdo(ty%rZ zo3_u6+xR_%Yy}=i5|8ZOVo&wZb*=YYEfattK3qDtobrxLu9x^H<@}?v)IZ|)|5<9P zB@s7~sL2H8J7#?6;_NHK)|hc>RoKqe_85oW7SG4q=kffb0xGw!96bf5#U&>?MVU8v zijp^Ne>K+V8Q+{s!cjW3YJqZnZGXxzT4b1Wmn)9_@y*AbCH-bgZQ(Q_tqtYbK!PwF zDuB)!e2`7lbe?od6aObJpBfO}S%ZU;Qp>h)U}-6kzLa905ATRWZnxJQ+4{F0U;29- zT$~DWf)|_TcQTZtRtNlpPEMRCWHOtb@QGw1JtNlPa9f^0BQiMB`6JLhl7`=P=A$1jp`(V-7Lb7AvzB zQqGRFC<|_~DG})mGD#vny+H21OT!dSL_I0$zPuAk)vOT$%P+%mcHFvfyRl6rt6%x{ zl_SvQ@oaPaN9#v^(FYIsK%wIf5ofmM=?WF6+r&2pb6{>e8f$r)SqBiJ=RkCxndMr0 zel3y784gj?mW#t-l!h2y$s6KP`<}AP8!LpSB2R2xe*NHTp7p>3Q}Jqz6naMh@ouqP zdf3V{mnd78$JK`*Fml(_Bf?9u=B7@O*=H8%7c{K=f}{0U-dgUVY|;iUF2hkBIvx?H zI~(J(*+3M$hxLayi!kpCu(WyITk7kO_3eh2y-!b8e&887Vg|+nf7T*8xF(riSs%64 z|8Q7L?|A2n)55L$ow^_!wKM|HYWOshyRRZr-g?wPKhcTU{mP@qK?|0;tQ%iekyJ87)!dj9Ps zQ~Ypk&Lm||l-H|z(sc_3LEk%AS-sVBG6y6Sbn_&y&4v$u#%dbQ9~EGLRX8YmZygP^ z36i8#8I{(bVD0y%NW@bby-L)>xMie>qFtF6fJl34li=RNPqZ#U58+3ynS+~qt%g96y)OTLIlk>SeAchuTz{tfn(}) z(9g$8JC^a=V|!5aR{Ke8>+;)4ji95=yJ}lgajSrPgcJF74=CdE%(Jcz@NtSfvTd_8 z*Msol0BLRPr(e?(=k}L-(t)Bac~M?<8Z=tmdry)jX@G{~Qj1+=QX(cJo9tuu`C4C9 zu8dv~2L+Ml`X zeK+?%N9vykIXnd%PX&3s%&_rGihFTc1ZbuE$_`&GF1`c(Xt@D>r`18y>kjZli(U+W zB}u-Le)c6I1T)M|f!g_0pp#ZU?D^^Hs??QYN1GgeQp;0K4%H_oY-uDRA_6P^+yaV+ zM)`&^q-F-Q-o1f$_n{v1y~_05yYDM~o?%Cg9>UDFB|MI|Ha(8(UD5j9wc|e|Bqa8G z5^Z~Xlf?&jL0|TMx8yD=Cx45mdqqr*AoSX8)j2b7*mFA6(&;a(B66~3@#Pv>@ov+T zK0ReNM2Q>@cWw#j_0~#)gZk?)!@m=gHd#Zh7jtwYNWIRIpUe-8uA-NL1h3JY7cQL; ztf=bTVm&o|x2Jut4uS8*k#abP`p|x9lrIr1NkHUtz!Pc}yUZ9JX#G#A%8y85sZNJk z)%y@GYg3UBbKv(kRiU66ZiI{<1av{DlhyoI>o!XQh+`{FU2oOHA~6k<>Mbc zq|VE8D81Ee#frCww~x2%jN^Ex&X}b{8Bf}${cjaCd`1U1qdyQl^qY&hyJuaQu(BEr zE3dip`1~_&pl9lpoVSX-&xHW_3h4@ID5wp;1Ls60i)G^!FPwu~jW4;nwVl8%_Yk^K z8RDYX!B%9&;S*Fqy)(!R}t&WAx55@+DVyhM>&9?i)v0L@qGpX47-`|0OxskUQ2ai@)N9A{- zUW$&=8X!-mytZ)|8CkP_RZU4Uo4a8asW#kH5`?lm0wqzwG0hU&^@{s5UXx!jfl)gL zD@#Yvjq&=>lsZiDDpJ1V?aL;{h@ASf8(!y|K%=TXPSQsR%Y%n!i=$eGiyxMWtI;;i zD+GLO%lXHMZN5Mkw-RlcQo?V*46YjkEHL=3xQ}uD zK3JlI94Vcw)K#?sp~^hnb7DtM?h9l=7IDV!UbRa-$!HHAkU?G6c&;8thHuv2pelfk z23?GBz#k#MuJ7K}J#U|{hS`?apFPZMz@;NKmGm&}BCKfNs!=|G3@B&U3dAzb?UsJ& zi;eP|8xC}ZBMwS~+=#?(r!RM@l=Wqccv;Bh^D%>e zuAeLK{wUB#v5wcEJIv+qlei~SZ%}$B2Q8XGq9Nt?8mrG_=i{|LKb#QBi!}(M?^pf%l(=#Yv z(o4h&#x`c{w~KeY+u*FzWiir8hBo5I=9#>Zq9{K1xpDQ0@r}g=$Ly;<5ThSv>dmo` z$9j+GGcBz<&JS`0lRTPmy``qR+OL@6xzh9ma|YRuzEF3OJvtSdy;Ysu!l=|Yl1nCi z+M<58-UpNP-lr(_fbc9A&frhGmltP7UW}|KN?Z54*Rr`iV(*JU2F6_;WGH(dx$N&j zH)1AS7gkPNYC|o7AOjIoYFsq4UG9*GSBv=;5YEGL7r}!l#U=OnuwBm zYBF4`UL)mbj{XnL${tgfhdL?hAKD_Myd7D~cF=08`Cp3(zYN!(yQFHc1r6{>Y}4}< zqA-o}QT;#PrF>Zh+Fi>ukI5YG@B}?GG_0q3-^aRIw-D0S9jM68Yp(Gkuk?ebQIaxaeS_=Rcf~9x>8iLwWx?2>omzN`g>TXrq*uUc6SeIt!5PV#6$X|QEI5b zLUWR0J6H1jgBmP@&bBt9i+bBUYETUpyH(@LN4J5$Ozw8%hs)IeDhl?AtfwT6R5OU=JmBD^n-4WMOmtV# z@cG`;{^iXIP5m=>-Tk-4!SSq>r{Vr_22Af_>zpZf_6OBz4RJ#|$al(*{bO>B zqG_vsZhkc@o|;VDZulSJYj;riF(z42{8S~kI^XYcSfxTr+MS?_K+0x}CH0nmYLf*B z3~ZzVsq{teXE~Rz?#A8wWXU$jV@LSDp>-*bIY?UANoZ)`pr{tUwSoB{&EVg`YIKu{324Ct=`y%^m%W-X}P83rSWb z5{i=iy~&^v4ugBVy%8H9tNH4P{-vSz&C8C*+i#K1BGo>y zOX;lVa8$SuQnXKP&`(%xd*w3F$ydIe9=RJwjVRVTr{cam($9ar-I&M9`{RdO#iUJ* zW{rVMnutWDanP&P3g>RhGgr)mv)!OO72oWv9%AbB%9kp1&X0!jha-V9n@_ra^=2YR zG4dwr7aUi!7qd&zd!=aab_Q4VD8at|B6;4Y?jgw@y+6G9H+Ge!_D1nT2726QLM8fH z+Dhhsd^h&T^L;;{6IP!V#`8lm{cLjA?!ip*jz@>X99dk_8PX+g)xb?%bt8dFIQqn_hX^!$^b6=c%IOUa5amRh_ zOF1U*6Bex<>|FKsCz+1gSC9a+{3zwa9*OY+I+q7Kpb3wZqYYsM=?K~6t}2~MZm`=P z5IJmtIg6;+LBBibx;8}fti@s z1VBK+#O^ZQTGZh*BclH5cqDNH?_*4oGKHz)a}5o8b^O-W1$hu&Q|~tmZEb1OMRIjh zHv+AXwyqyX9rq=PkDIETbJXsfo!Bo9V=NlHj*nMXCJ`hK>q!0F_YtFX!j^I>UqrIQ zg^o%1_L6rG1~beF`+nSZ$7pz8p<#&I-w+Dz=Qn4_Kn^j~4ly=*=Fhdua5vl7Xb|_m zAqY1kj@l5oGdQF^_>DLytQ`;nB&jYciCSgRO>EL*05g+F_A^a=SdR|MU7U-SukL?G zWw_Dyu`fC0gr56!&STy{Oup-?mXJ4-Cx~G#CTI>5U&rm&z*Q0uoa3f^Gc!fPBhL7H zuJZ$Qcyh&OV#jc{uCv3<(FDfoE5n`%qEXB~Z_@8ViCC=?peDn~v+$j^UHtF8_|5|R zCCLxr((@f?`8fRjnY-&$`4@9;-yopv=rf5k!m}`65D?(*)`V!OyBDxad4Y3@bpv z;{7w^_mkh7zuu5I30z^0R|?0=4IcF~y)T1(+Kf6<$vZmSgN7ePogY>42;EzL zr-Wz_9w*$Ni-vdLEY(Z}fZTi}Wf++O#ry>136fT~@Tn(~h9r<$N`Y(M?pPsIEWenl zu+KN*V6gVmte&SY`qylG`AFpo3*r|p-Up(K)nKjmqqNcL`%EI9w@tInzMMWUiJy_J zn*Q}e#!yqsFhTgODscKFbl#G!l91;Sl^hviwm*~fq^2a^{Cu|~BWf`01AHlE@aG!x zzPF)CUstg0cv);Uz1!2V(|6~dnWV*aK2$qDPyBX;4cm0Wl~HUvWG8p&rMvEsX0_nAF`*Le@?M5R{(8bgJl*l(TdU1s${ z?9Zm5VnC4iB;$Ee^tSTP1$k8d*P_gB-iDTfu|#p(LAEGK%eD-uQ;SZ~ zk4Z_Ju{}F6Y6F6O;m2oE{iNdJ;s2 z@hNlI8~zysVw6Vh1*YZal?VS^$vw%R1j^yAFVR?`zBMI}OH;mq2Ufb4Ba# zaNK4uI>}({>tFGa0f7ON75`z_lfx((JUZ-Uav|f%TZZP(zvo>N2HXVw`O?1?O+GTe zS=HBBbVp&ixZm40S&C}7S~kC%)1QEt^fYoXO^n$tR~<`H=JF}jbF$gn^~WN0$G!UB zE(ztD&1Ol?2Zl#qF2_W<(<)s(nZ6V~{H9~R?6UdG+rINo{9xDcSFSzk7gu{64Ns7O z6S}PI*TrFGdf9vT;4=Mo#<8jR zIJxhQT>KDQuyU0*c{Omkaut0w6mx|_5Ac3-*Fx>G9qdB(a8DmIobHX3-)y)FOV2a( zyZ@LlFTHF?{#fC*8<~%+-N9Qx`Fx`HgV;@`@Xf2~p69J>1qmF9=%8QLB2#@XG+XiI zbM@V@FvruHJ4}tYaP#0xU*MWs@`xoQMn92us^uN8$3o+iVs@oraovOw zzfb8NnY!`E##8AXgr01`XB?n#4AbJEcLvJ@K3cTR*4EI+6NM4DJoHD5!quh2i>cxK z^(LNLUOySzU@(C6RF|ukEW0zsuQA=k4gJF&2ilWUYo{b{4bHzeabIXUw>UrARA50( z+T<9lWb4}=?vQYOj_^3+d{OG4H?)Xz^;zZCGmjQRLzCPZyE*K^b0_;poJok^osmaD zr~6C40maoCt7G5kkhh`#cUJexwy>3XcO3UptJBfJP_3TegmF7Vu6&S}d9lZtx%W?LJ3oU1@~ zQku`=A3w}?fQkw}NkwZn!h$_VfQEN`f~%QXosJd zhfq!sqRHCW!j=EYBO6a3(jX=u(nkD0Q_wq1Lja^Foocu)$aE%cP%QTu`QkfcW7O6u=fIi8aC1lmscyO4W0(cn#qsX|dCsbA%0%c| zCtSQUtNg>!WU1=G%_>rX^Wk{7_oJ$*2OYM^P;iPeqq|SU_!xiu+v@y>INUcyhWBxj>PHz~=vZA5$`<)11pDsnpT5*b-kzU)^^ zO_6~$9s8esj_S4Q|4l9NxYBk3x^J-ifYCJ5Y+o*;O-Vntl*iCOEAmJVxhM_7V4dL{ z;IEJ#X25)qjSVRH4(|!oMc8rs5CVC00JW}tKTmqCvoec7HfmDb!_0& zMw9b7whnV7u>$x;1ktLA71rbLrO!|XH*H%=#Q-xVS7&?Qd@+Bxw1d&@Z{GEu*=P6e zBift8AnZdQ85bKIksB_XBhNHmJfw)-ct@`Mn-%1b>i8mZzkI}lX_lw0aWTt;=%uK# ziz=4s(h%)d7*O$;dTW$;4{_tqi`bLx>_HR1Ydr!A?Z56Q!uduR(G{9kTX5H6Jv0>S zf58ebn6FvkW&>IEMx!r|UG$;-3V!Jdeh`DZSQ4^HQKK9+@y*I&sDmT7wzj}UN_^rz zT#5NQ{^RHQO+z{ONK6C&_y38m4}pc(sp5(ERL zDPGb@Qm$cVn75H?g(~eJtiW1Cj9jhB^SM@BdDAQYT3kBw+p|p6;Gog$c?#XvY;1y) z-!)!m)Yj(q9{#@8`-}lnc-`!^BXJ@{?yU@y1+GUu$L0*%PbzI25lw7}+D>1uw3v+t zp!&!Lm(VJ2|GiB1l#J3^f7p@gm_;D2O8`0Wd6fC;ynSnri*EPE0ddnfD8 zL0d)M!REYTWNf?n>k8f-qL^(}80LBPMp@XZ59T<_X+}xpeMW9>aQ(SA0kMY(m|MRx z5rQGb@U~py*PDQud84U1!enu&qZ;v1@>@749DmYq7)xZ-AKo$QW2S51ZY}J=LV-3S z`11cvOsqrSQvWjqQthJf^X_-5k zYEYAps)KLJ%+^m!c!PWF-WQgx8a>BMeD4)lwwMgk#;i7 zSNmeh&HWE8j6ZS7f7*4{|C)DfR!`b|{17W|Vc{nXD-2vLI(j+(PVrrZ=Xl>iPr71- zd+Uvn<^Z?BPle1W(S2th`VJ0+9@KUlTT^?2HtWxehEip5y83ui{rn`mdVABmy6zp_ z(^9>2C#&M^q&DErgj=+R!dDV(>(j0(t!1y?UC7P))pN2jwh4wWYeeBwN2QX=F%`hP z_%vpfU-Jn1*QLFAv-l;j>Ead%b96PNgvCx#ky9%^zNOVu{*4lk=_2K)y;YH^;)Rft z$2=J&q;0TpB=yyf^t!yl+(Ff#_!GAog(^8 z|I1D<&}?~XgB&vGk_5wWcH=-eJg6T(l~jhwM_S$W%qJ(v=+zkr`NtYQ^p{apT{);9o z@V_(5{~Dy)kO405f^~Z*MZZ4^kT%zt{eajGmULOVJWv!{;YtR$d_2e8WYH-69h?)a zi&Re|(k9YBgQi|WT6~xfQzI~3#PNW5KV;?)y3{9i7ZQ-sOEd_$5y%?^yUR2o z|L3mIC*C^;P_A<&ZT%TLGJM7rQXAjDEQV-&#cPWJ#FQ(vQyJ`eL((UR)#noAc0J+4 z)P#@1*}*!IA^S^C-Xrb@q~*UnCHveHV2WdJ9XHBklwXdrgGfs|l#wz>yTUntz+AaT z^A5{Nv@UNW!e zA6ncs-@w{SgzTx&P<{*V828-)Fh(Qnug&K)OgLux<$?Pq6XSeq~DP7H<71k(n~8UFRP(EJ}k z1^R<97sGzBe}ZD{FHnHIOZNTR{syD$X(OZGA1LnlCs7HJ0O+|!W*7bu^beGLm^Nxt z?f-yDJ>>#$PlzRP(|qNA^D7vK^Z>s~=GyuO*uFpOHI{rxpVJ2aEe2%&i5=?y8#rm~ z%N7P1RjHJ9%Q4s+l!GK&OqeiOn6yBAMuR5*OCR962CWRt#~TEQq5mRu{u8*MYJXv= zQ2f6YOB4W>?*G9O!~bCEzrfu72TOlcs{g?fz(M~Zm;MJ!fTHj}a_RrQTzcWpE-@0_ znv>TpH{B2+P!xtMu`?jda0JERU~u-Z{tu-r$Xi{yBnb7F)@-zl8{z-Abl&ro0{^Ww zOYoZ0|1F(Al5QA#?fU~pL)Cwu%=&#}Oul5;#DM$bX4(sZN#v+lQF(^x5q$w-Odf^f z*$-o8`tMXM{{LTjh#2-=5}Z@cZ6$oA459BQWyb&eyyU;EeEu6V{;&VN8H)d#R>*=o zyIt)6Mn?l-T|1Hi79|5lu~1WP5STWRsEzy*&~@{V6-9CXwT7ufO|;pChKub0=9kB>z(1!&(f9_&1b@3gQ8-IjwJ;QhU^@zdY`gIN z(R}kjekG`%G>u6P7T^uRd~ySwvHUe<<;si;XqtiEL>THb{)1O5?fp?{f9&>u>XC=p zq>b`+*e~okOkxBD%Q*$_CkP0X-<4wX|2-&pmz$fL?5m#u&h>V^zv27k3vGT;QTkf` zz5M)w)UHs6`~ohuPY+ByAAQP}ll7JPMFvV~r8kkhHsJ`dQS^k=Vz}6kB0&pMopjX0 zVfVqzw)VsHcb`JR{cwL!K(`Dt0h!MfhjB*wV;SaND5$SR&p%a+^PNn3H~g_M70^K}LHZQ~M4)*J;lTNE&U~(wt_JKlf^I$h2vg=P2$#DDb-h_N0^F_oroW&s} znU4I^CP>fb@efkQj#6Iu8(FZvqfH}pKtVZ4RxPX{iEtxooCLX^86PXe?Dw^NOA!)P z(K;NCTV3;7nnWwF7ojBA@R3Ppj*f}3AQxsEqqeJ&PZI1(d5zyrc!ZiEEaZJ(Pn; zOqFTunV~e;O$*Zmr^G@ZmhK~0sAIm}*$PDhyOvOgSHIBwr7dKg8eSMYKNTuda=E5( z)qG;4NWUzTdPxRhKYsDE`Xh4XUFZWj=4g5d82!0=fl85FGUFlhQy1{O#$~}LAixcM zY6JE`8lB6cG#gHICBarX!|bI&-?!u%uT5Yz9uNAio^DYesJzi=3;9*-t#_P?KfCq> zW|Aj13!i!5%l+9_Ru&^_=3NT5&ERLYxA8~>;Uur8M6I&ml+A>qN`o;Du>P&*U$j3f?*_4EaeaHtM=Uh#5j zUnTV8+N~q#r~%|}Z=0Xb5x&PKdQ5Y-e2F$83lxIydx59&l+=27c!T1T6n4$8R!gy` z<-rPRD%3)d;=6fi65eS0_AXggP}gP%3wMhY-}w>4^66LLur1Tm}QW*9kvugMjRwM~^+$UjO~fPP|OPZnY(aSb|bR|Tc z%A{@qTyj)u3Bd||2;}h!=8eqf+DfvhGwo4@Fi>q=8C?Z=e*oIH??CC**p#v3TE9iq zSP}kSTf0}}dxCr9xI(h;%`;j}e&jk_Ctueoswf_!^W?9`Q&47RWBcH+Z;%Lo z!K$C!sr|iFyZ}u|q-WyK?|_S)7Y;h%iq&Uw#>aM!0*U2p@weadmGJ@|xR`fgcj~zU zXfH0bF!{T)5>jQVQohqyCKF%`Y>Fhu!0`+89yaQ;dW_ryW|SnhGbge)o>3e)35vb} zp=R{^mR>CfvZumsZ2o5Q6OQcY?ajIfjELwK78a%-i3VF(f4a}#EKOyP6SepIS^jI= zroVKakVv0H+?-sy`@D#|!cw?a)9DPbLUb5c?p@yd!v$o24w~rYL7q$0A3Lte0tzj0 zRHax~FO!2unICfefcei35L(U-q{(xZL40MV*?>3k__fS z_)<|70$jm(tZ&;3fRO8l1(z^Gy{`)9D&&7Y$WBSdmwb-&BaRa!fzXTJpyi zNsUmX&nk`D`oc}-mU{$0+1_NAfvFaFZ*cOgfRwtpfp+;{pJ>*N&Mk^HyM1V zF0kLnMww>ad9Gd2vtzg1a*~G*^THM!*p)j4w5(t_xY2BPQsgcT^q~^?qhps|(7iVd zT?Cj%(j3eC67C`m8S+7o+kpB!ckW?FA1k~S|0qF5=2LseC#k9P5G^j#Naw5DnWDMh*1_=|lklq;hXeJ|8q>%JIm+g`zy@EiMJFfA zMvO_6&<_u@-W1;0)z)6!q50_}_A!oJGs5>X7Skmlh(O?*lvwK`cPCq(*nKMyi`*)S z;zb_e|6xd1tN!bU87|;oH zaZxlivDWX!b|Vc@cr~`tiUYw06aH>4my*q2(CPH?DME1F=S+V;zbUQLYau0119SHD z@O!wZWC3Rtt%z&_fv75;3UR)l8+$;Xi zGUOfx2VfvsWP#x_8QI*NQc}}9c$|#B_5qPHD!`#(X}r;2%f=0yhk+p$bd$uDax zl*#pyiSyNjzC&#sZ1=-%&K@uh*{PIQA2W0*HpjTFf*TgjE_}!*3#l;h5Xsm zkO&#s<~AkV&`ON576pZ85srW03s}UY{HXK#$+PF*wQS{p#R5Td1;MqaPnq5>b4Q%l zlSpg6{6i@aEVW+F<;)Q`>N?HSHpc2=v)85x*J6g8DnN$09=h|<)y?dEGtVfktHGf& z!(P!CGZ_1lZ0DnBc=2V-≻^V|jy#XK;{+ERBy$0q}tbL(V~7KIw$~_If`d#jR`g zV~lKPfo6ru#S@B;U8>N?dFqcz=JhW}ooQt5uO}a`I#%SEPcZ^-jwQiEi!Z(FLkeg@ zFa_$H*gxEGKXy;XXlsAer1cBd_UIId%+j!Un5rAcpmO%PWjSsLa2u6%J4PPFNl5c}@ zy&nVMQR{r8oaHN%#0pBz)BL{6$Bgpz(cD{(;baAV5#$RYgP>S+9S_W!BG!4p&j?c_ zkHDW}ljOBxYty+@i2S}?LcEIUciR+eapL1+^FaVTUO%J|@Q|U@dvOTFk=Etl8gAtJ z3AVpcE1=APu$?17Cj~`h?fX75mTag5N9^?{TgSGoMS**?BK$Mg0*4(CI2YHlzClDb z1%9CyvGAOZGHhowF?creOQ$l21nr!cx9(9oI*m zvP+Z&;{e=1><%(!4ZrB7d$-_T*wb0;WIxcECTn-p9E`X3{jOzk3-S5!+Y_IZV=X3JSXByU!$ z;%&Fn2aQM+Y=`Njk4m(|R&__--5qZ`K*PumdP3xhQ;#*<^4>NRnMb14uAA|q>^{Qp8zS!A`>0#Vwx{;gGwFT9!{rcrrVWhW z`1zc><#;QQ>{EGZCN+h=)9d5oE7Sajn|QG;rIga>1{PM__uuc+p*L9xfc=qSz9j39 zr#lUP{@qQ!1$SikPBj0Mmd|LVKYBOhnnQyK9s%I8Bf_$ae5KCH#tc0MI5fIGM7{C0 z9KfL!B0ghl$s$#Jm6RxaeTcC->WHBV086){$Jz-;N3+T^Lj|x^JK-Iq3V$i{f6gXz zN|hUEj~@C%#lTzm5nrkVsx?hbaf9Yx7;JQs1%}Df5cx%T2f(h$3m+Son2wHOywbw! z&ju*ItA3|}F?KwNF$fQ0a$A^lI*19P0kYCAzv-o>sf7#l*<%CJ#epT-+B5EDq5lNP zcR^l8+^jnr$(9^t@!Jz0=M$no?S1h(bv9-cJ$AI~a*2Yf3^O%$bd?cg&Rvql=aN>f z(6W+hssROVp^E;Px&KPD6OP$Z3fO)?Y#%9X7Kma1kGY&gsc~U zV5RkaJy#xJ114vLWJ0k~iR6~n&xIGs9*Vg2Squg{ObTTuGSInIyd8&4h9g!d3ZGYw zIB&&($e7jfA-5B1hp}EG6r_Oj@)uD+V4+NEG89m#{}=!dSpeC}IW(4QImLnsxM(%V zQfI!F z01&Dqap2KE&L9GRpW^rgyLL4O_*yfwE|ITU!)d57_+{nmn?bX9}@b&|-|F<%3-Zgy=x&0tT zQOj+Lp4_*ar|#1O*DZy&`r8kOg@u7iS?un}CVU~UvtOyltXR;UcRopuKZjFpdwrNN z@LZD*+SB5=Sc;J8W1P-=^ntzYT}s@g!mRgsoh$3b`vb`Buv$^+%p_xQ;^4?2bPSGT zbADtyan=LB?88vJo6vQ1%dM!K!Rn-^n=UrWdY|=xco&fuwI>p^ zS5t(~UuaXVw{xgm=8eHWWbdkzF+*9f?;Y!p{ZPuBMXx*ZNd2@#ivYW_2E|&Wai0IM zvah12r{{F(hItBJ?t{x%jTQ=`#jvn<0|08J%JXIxQ()ajB-h|$!PL%WP%&c&AW2id z(w-6<`|Ld@UX#T|ap2XkmY5H*AAX7K&L>miRjZ?oCl{~hmz8N%rCmN~pu?nXWZ`c+ z^2^;JhC5JenW#VLB)~q3Pk5N0g??f{SNrZsx&f-~FA>MOt;WQGFGYA|!WOD9pGp?9ZF7+^a z_YPSnudA;Sx5MgBMWr=)rJFp+I3G7`K;tt5 z!6u!J;^UGwOF20js_qX=ccEWecb}4&T1iN~CMl4iXZayIDG@07e`O;vhhZH-6XWIlawSr2z7~(nU<0B%8)X3zVsIXBft{3R6 z#q>7Guk80V2%#N**SZPP+Q}m=8%7xqW@<@STH~`3FQY^Rwx@ykP&UVJ3i~rxK8)Tc zyXNKLYnKW!RfDQh@6HaTf<$a*p1w+9RBO06({DLs6m%D3Ks0){Vf4rOs+85@V-PJg zct7k{(4-pi%_N7P^V{{jI0XgSvb}nj{cjz#E36csZ;tvF{aCxv#oLk7scD$;ju57* zUZG*l3}u4c_M2+Qe|8ra8cgr|oT=u0gxMD%i>Ku?iEmtj=bGF53&b6J#8iD`B>$jX z(@UN58s$`24M3>EZ+-c~^gm%i&=Pd|?&~IH;>vsD=m6^ZeAyD(r8WWS#giT<-7p8* z9=s+sZtmc-)B7G*uY|f42@~k>nw<4g%6^X_Aicf4+4qaTE7Z{Sc&!_p?W-Y7__6Xb zK8$P#Z*7$KhOb!&UzB=I3Gxkv+-TvmQA&7+6&IB`5>Ll)EHt!0TNUJP&>mkc1fhI| zYNqAMDBDiq;0GU>feTD4f=uWO0-H{A!qa)ZIdd56cuF|JEG{Ojic+0K8bg+V)I*!D z;%u9Nblf`rx1nmUjGB%Pw(5v%n+8zf9=C9??x$FUY}RJkBS+W$`zpAO)QNpb#ZQ>e z@32X4jT4af1vW7pjVDA(uGy^(m9lNt?yEl6)_(j|8zVRCxzFHrHojb8EI*A%A$+~F zGdvn*-oNR3-a-Y#_BDdsUCuSjHM*EzclSa-C^cub^=O8K*!*eVC=>;eYikW6wX`bg z)4O!n=!l9H>PY-ifL&(RUASj++@I-Cm?o`-9x0JtFz6c0Kldj8#b!7Dl<3FqIS*YTGPYBfEoYIv&VLLC$Hk&Z$X0#%G!uzv$7S-8TVq zX#1m138gcG7~Q%2UMAKkE^;Tg>$u@3U7mHn$r>$&t9hLnyl!ij%6sCE+A>R2x~kb^ zPf?sIbH0E0+8?ERES^FdHQ!!aD>>m9HN84I>s`F>irg&Tm6ZOKB;`*l@^PDg!#;eu zKuw~MqgFmrwTJOH(nba^YufCm-}%fNlLFx}WZ(^q9J=#24A4sTZyG-Yl2YVN6%{ih z-Pr1FyLJ<}@S2D(mIkx5#2F^a5-Cfm=88wkU2mMe=rj!8nCqG;D!k2e_KDaJC$oHZ zS)sPzwg_I6da(Rj8^79Z{iSJ-<0K8(^7mWSib={9G#$$2s!1e#WvA%lh86gw4A~}G zm}Ca`tqHCN;7wl9n1yVB#nFKEBYXuX7HHZUV?OF{<-OExUr5??I8`OXlTkdjnUk@rLaDO4j3LS!l+JU-)ykx1bLd zq|5!Oe?i8Q4d+d$xs+?E@qwDtQ8-u(_Yv93>VmeR(4IqSVAR)g;i9o6$KlnlLJWj3qiZagc>KfmY!MbLWjQ zmm})h0tHHj(H?&93th|c^dF}{9gc=TARy7QElK*C0B}gBQXCu|$0ipjU(}sZ<$S&A zdb(LTSyb>Dp%1l984MBYk?0O|7cjs0qFv!>!zf zFAo>>bocuu*I6t2;@_S9TD-44AUrw)wcK-mL=`H#Q}B5(mtY%_8Z@X>vug=0z=OWn zw)?5ur!f{`*~;IzYoJD)6U=C_{+k)mMpsO0n3Dy1wzb^lE+7b8p zIXzGgA@t>5=95hRPJ+`X$#L>3*XE`gkf-u8WrYJ(?krH@Cj~w)Enl1+b}Uhomk#8?vCy34i)79C_Xp8t|v6T{$J6qF9d zuZAawtjC~>PnseQVQAyzN$?uF@%)O~X)?m;NmXE=8Ol7u_Zw+m6-p-A zFX>)`w5A*fvEM5VX)8Un_>~Ua%@>evugGeTcT!qtgriOROTM>N) z48r8{DBF5Wh%{`J38{T=kr9mxDa#UtSr)}M>K3^?D>jw%B4~3Q)GGI+)E~eJo3SEVtGN8Cd|A2 zba@sj^if&n&~=Rp7E)Q5bI|ps*JJ!h{e&;?3|X;}R>Ejbbw?sj)Ro zsU`2Z=VxhNU2{=Lq}FK#ear$AK+*=T#OXF1DY;Q;>o(s*;&=?+v>o1lzuaAyMhjN{ zg8aVt1nl;KbYir4l&V%S*ej$%M{2-(n%S(2)UXjN;KvL+!mztMb zkgi$noMUO&Zz`4ev-wNTA^oXGea4t`YM(A;Mv|t>p7(i8;jLNnM6Q%?ww*urzDUYD zn_JQ5oWof=>V_ILIW9$neq!EsNGs0?@o4{Al$^TQ+@#*Z3`jE_d$2+YFQmS2Z=FX> zH%q3hMdH%cm}p2J4>oT%jCd?cPVC*e(HHPS?fg8E798Z>zyOaxd#hpO><9RgcS_eO z1=w3PPss%dlA>W&-eQ1<32Mwfw&yr zYGpf`BM+0^(X@sZ^feea03~74qIq6)Bzud#j*&`w39B`wF*u1qfE?l-vn-$+)oS}+ z!}y|QOR($$kQbaSWN(5dSrv?;rd>1c7uOCgPfKi8+}fP7;n<%5mOAzK@0)@}9CRYq z>2qvKRuwZpZ>c$XNX%4N2p+C^84j6-C?*e3-gryN;I|c@KpYqT(e^N>(VhX0O#`-4 z9SA7p!yDPl%`xJ?!6)AUJEkAh&O(Jo0~M0_Iw6fg(1(?^diN zY~Ox7dff2-4DOg^zqPIAg&?M6tE`$$R&?8#eijAf47zXExgD$*H)u{Ky#Nx4w{5ML z6XN5)KKlQ-de5My{-|5{uhNvND7}h;h!A=YRVmU$M0z41z4sDG6cnUOkzS;$lz?;y zM0)Sin{+ThfItF-gd3mx-1oyflTT+RGiT2Eon6*iduGvVUD9;X_~he9wQuWbEWz+QIJcs9W}2=$ooW zbELIr3F4_S+)UN96`pPE4a?G0Ar9XI&${rD^hzlP5HiTHDk1t0V+r|{Ef@J z<7h=p_nLKYh9in`kH|VXr(;)RukjOq084+H78wY% zyB|E2pe*cH+eiA!6fVa8%Xk=#sB>G07?EqCbXzjlGAeDoJyprvMiX{J8W@E#IU&cC}*^VG>DV@1pY zk@#<)Ck51CUpOrJ+t@uwh}BSW zA*WHc<670lswbFK^tk7ad-z9*t!%&-=2o_U^UQ*SFLOhv9!#Wfe7Jn}gS*jODI};D zbAPh;T?5ZMBGc&c4f=Kw!R{4?+hYlF_l|E{1{!X*&~*q3xR!_r4>+BOV=wVWH!$ZH zEw5I~;^s8iD&k2%w1;sXEcPw4h^q#;VpN*+F$90Xi~ctyuF0sz8&A>N+=R2tRosW| zPi=K}QQJLfD3#*AyV~EhjkOQwO^du!N)u+A=9ugvUt{=>8>PTXJd)3E`o-mw2{r-C6T!RAT}w|5qG zGEg@8cV{;WmrfeWd+Gl=)$lL~4O5|4z6@M5zW&gk?ETZ#nin+h_ceaC{dcGHjhvbo z9hZW882kwbVc0#S#H zr}?Y{6IV`3OWtd8Nty+_TKZE8UohHtN2(jfNybFwn({~$4|@YIcA+xCm~E=r`n2^a zP+pzT+`?m&P$G_g09Jcb9qW8UThsJnS) z1vvIWji)Qo&2?l#|9^et5%lHfJOj7^q}!6=vNNgp?i-5HCzk8|uj z+_>}IHbDj91Kzt$MY#E|hM9hpv9hh!%`-Lm@PE!*aWJHiND9ER;Upp#{h%aU_MM6r z)y|K}TUds2?5|+k@^wHKSJ)g+pwo?n?HH3cVocb8Phy8gAqjOg@E@lPt}J98eb_#O z<0!jO(yT7LIOz)aR6>N)lTi1&1vjVBNvcOUViaxXe8%kC$Ni9tJI?EWaI;1`XLm?N z<_p9P7_uN!em}<@kgKWqBJ+pe!&i0nA85lg$C;5SMhPJ1z(os_+oVyj$l58MvsN6_ z7+(S{E#O9xJ{#Fu&S=*=+?YCDi8`H9A7Cq=0YFBpAC^J;L%u=D%+dps+v+a~9`;FCSZ&=Mu@Dt59JXfN<&(*%syVnr(N zBYYIta3_wz=bI)3k!&t+wAJs^59>qEEd$5zoy@Gjvb+QgURERJqrgA5?xE#h+;3f& zj-<t^)+cO-1=!H8@0?fLi96nF19>3XXXlS(&VF#bc77~h-I zw>w{as?xVw&g3tR3U7hBmOlclLs9U>`Qn^brOgjlBs(D(Q~00j3ET9u9tyOJ0WBnW zy^_ScUc=vGMQS^VSk9)vk}D#Y_}j}aE``$0GD)2<{GB;y-9g6^HY0k0+O#Xqj4HH# z#4X#!*m3H*A=ta3nE0KGFG23FR`bRLwNo#re~~Wn>ub}-;n~;0Lq_Z0ii*0PIFP1z zcT%(5MeA9INJC>47Q}VoY*?Y8ZO^#!RimgBBVIN!YTY&g7)>YpMtPnzlNnbrd#k~% zSo!KKW;qmgZ8)#tSFcsBa+FCUqrPlvyxhg3`GA$yIK=^$NoC^M!nb4xXA&1^!zO`c z#gA%SmgfS$IV4Pe4~p5^;}p-5c3speGr3ap*+M*vLE_uDf#a7fab8ZSX@!Z?87sa0 z1+;b?n{>$?tR!J34>p-ZGD~9yhh#hsgeaiAb{{eumc15^%c~db8C67K|5)qIR9WAk zhj-UFjOG)XLP-dXnpJ{w>U_PM%Y!ufZ8Sn+G`Ngp2K-1vHYg~FG+(+@FyIsUx?<-q z3F#onH_lt$`sud7bP?4`sf6GAkRC*#J0*>A+~K)Bn#R6rwmCSa&Iz`0Vklq!*R~pJ zpcLWeOUF-HLywh(Mj|c*mcTIu~Mo z_XG<$38t+g285b=S}8( zYC=r!#+KhLUmp%B#-xU~VK3Nswo;xE$CASK*2cl<

&_R1m-yk@n z@7n7nU3Z7Mwq+(=xsaeN`K9$lN{%XJtC=h3`KSiX;h~|oZHp>8(94VkZ(S8z9T)z@ z_%stxkf79h{Wlx>bh3bkP5ff_5aUkkUe1KKU?2be zy{qCpjbPn;yFJ#UC$4lgVDTQNM$O#gu5cK*C2@4?^16o~8ujTn)vO7q9YHZzBhp%c zgj*C6_nVFkB+~Y*H`Y@GM8??@taIfXeFXmgx||5^FSdga&2OZsfMFavrS}{jp;=IRz~_4XOXcSZvJq3k2ACbcbe4T3sBGENUD{1Nh^I zw+q7$T*pZP&bR*|^UEx0g_r9ZadU54Vu_b1kBjr|yxpl85PdMr-(PwKc8zl|xNWDP zgL0w4V_T6}zzj;~@QuNEo*P#)sE-@z%SAOc7LVH>Lyr}Fl53A7mmjw^H=8HRvo*_U zDzZdW%9w2n|8-6ycj;yv<_R=NUA?*9`_jDP_TiA3$G91ARJ81Fg{r3yR~f}&f9Bt{Qx~Ekmr5B&TXkDP-gns zy(IV-_ygh+y>eiyhi2iRcCPn_LRizm;ib?*0yuPP^LOzeZ#hy*WyS2Z`rXR<_ZC);OaBF{-`RHxkZ0K{`YcO9I5k`!d6Um#qjX>YA@=hlghu$8T#!rn-weze&zOR~^+pVD})bj0Ay z{>5h#-hxYB!wo%r(yM^^TAA1YWtcH@&d%|mh)pZ@5ftg|RgBOSG5h&}U+L-&@OX?t z{$xx&c0pZ?ip~VK=$DqWyHglnVCBD%p)dEn%xuU5X{CF0+0y>$3XcvBJ}MUM36c%f zcmMoR9tg?RF3p2J#^sdsSpJ%>u!xnLH&76%I>^r<||X0(?pL4$Gv@fNystOF?U-}f(|pk+_mBWt!BWGd7H z2k+i{oR*`?r+MZL!X@*9*D=F$1Uj88cF8SI- z?YpowvxD*YHg;(?tKm%B#&!6z3#W;nyJ_V!30Pr}=78Cc-VYMn?EM^8MT^X?dlMHf zEVRlO?39J)Bl(-rlCciCM@tZrf?l61ku7c6XEdj$ewH}!)6IgHdt(tKzE8I?cehxN3*E?m`Vb{K!?V4o;O-~ZLyj@Ov^^^49J z<$NxOJOdI#{w2f1DyQ$kCWo}rcxRgBuGGy6TXk#e+kWWr7>#(A0d7@!^W^cD(!Y=g z7wkd28hm#W1~qHiQ`u$$AC$*kcJ^@0oN(_*Me(LpjHVpzoE$iij4^2n+?VuEzI-`4 zvRjl`cluk)ssslQa{n3KpAmQy)OKz!wsiHUu&4Lh`IvbpP$kje9M7W7KDVNtxV^S`;FcLuqkTY~cr=+@Xgjkusp5}c+n8q4m+_RwV>x^3n9 zq`qlyB4+oc^ylRDNN#8mFHTvPajr_6spd>x>HI{Z8J|lTtMguT>8G7g!|1zr)WHRu zfQp`LOZxk&t-+@!vryw(W+VW;G}r%QFPU%y3oFJ zG2=UDzAHx(R#O`Qz_z5bwPc7WbR&zqc zRdf|?+Vq!M`W1b6%!OCd5QgV+Hq1?a<%pC$?xVrv8-KY9%!Q6{_quGHrH!Mq*-HTK8ymEmTK|>+Y|P(^tU1gm9b}xk}x9WmK)#@vUzX`W!+<#1Mbr_ zw0qXt(P9yAW*Gk``Gp3d-KIj3@3>&0t5wjEm+RX<>lTIvGZR>D!t+qg?x5KAsAP)v z*95FCwj=lnyW?^&Dh`T2BEhHH0n1Wd9u-fCc~IxKDufwm4NF*b?@PIIwDBEo#WN)+ zB|%MpxKn;B#&F@tYUv`CKiXcm*5-I?NbZChu+g$P{rlF=)TEG!S;G5(fNb7@J#iLI4y7z+5D@YzlTU`GvD>!YrWPZBz1%<2^5f~(^hH+NMkELJks9-5so>v}yuxpf zzG&8>>l1fA+Ry~ytQxYq&Hgb*=dS}S0on8W51KLYS57G<2C(Fjjb~J=f5+@8408kf zTDR_P;op=m-x!ad=Q$8Hx6<>8etS6usaXFpM63BPA!!Vny$15nW_w8b_$GRTtEzRp zJj1y6*Wc8x$kn=zf8bJNE-6oNB?A>k1nK%nv>_$7b(cxuCuA@!usH~(qz#vY{D_FvJrx4m(rOC~% zfj;hdCAN_xy6Tklal0*^8+I|5UP!k=#+y`ambdxA@~J$3_c2fv#r()ujW~44mjYe{ z$NYB~-dZ@d@q`?ILy@d(^O2hUvoHT$yQk4!Hy}6L5`mN`-kAq+9AzXg6!4os`Vs#rf#~Cp zMpx0nqzq*IYlH6>@`~u9&myFc^~7d3II%-$+72&$_R|#CUWap`$dBeFJ_Gqb1`lQ6 zMSw>eRPc?TFWv78!S&c=q~TG#KLh_uo$@5cPCOXa7HAQuNKicX3_i=pBb0QhxUh6l_{P`WZt~yX1(pUb5jw4ZDr% zhC8keuW#~q9KGsIGi4=qNZJqpXi>Jw3 zeovl^HhDEkFH}YYXm^%;!u5W$&w1t31e-k@MMshcQLxZ)cEiAFhfxInO`|~mf{)QG zk&TSnWGF*wrgJ@$$pTS3oPnY9s5lQ}j&x>&G};fd$zOF0?kht=8kvX$6k4j*M`0zI zF}S7(<@Fj2@Zc`Z3BI-Dg|EmQqWZ{a=UR`sK;V?WlJdaf;pLr5)v6GVqzd)@pkNZ8 za^J2sGS0g@3r?ugU+YT`n9vqTx2$j~<|NtA((pOwoZJiF-`gwyN^v8Aj!TXCu(4Gc z!c6D;EwKSEsUMp*-EVD{?%nA(6TI5+Hx0ZyAcSWRwK?91O36sHX43S$dphL)XamJp z6l8Q);Aq5Qt+hcJVovS*)VYd%h`}^>w@APS?8gA-=8K@00z`2vj%c(e?#+w3Y)R7R zj1T?pjS4wa+&`&NMDTueVR!;MCf9m$V>p~5EYvLL2 zMn^{{2bDU9uLznBxMQe572s)d7P(28qDw?LC4~L)l0RzLmmi`?zxhZ|u$Wq;d8kyT z((}wXa5HWOd?ZeRHFjq;(%k%o8?s`!>iAn;)i8uPYx-b zP}9Dy&YvG*g=ffUI5Thz6g?6S+~*KWwpc)k*I>L=z2P43V!TSQ7z9ckDLkkfI&F!$`~7Hp$x1f#t; zBkRJ6c_vfMPNu2AZ|(;*Q@QKtm6u{j2TO0P)WFHy-q3S)>>kcj^~6c&7cR{HS*$dm zQ0x2QGC%q3hjPyV&snsPNPY}JaRsEC=Viv?@8yO}s2{xudAHeH+8b}u{Qn+ZsM~(Y32+@iWXh#W{|w|LYfWg;5_W`Qt&6c$onWR$2 zY`ZDju;FtJ2~B1}9Ep-6qbxleSEAFvFsaIc_6~ee%B!e3%c|)7vkDBTGh=@X?vlE*%RwdHPHGO5-81kIG7OrYBw4 zeNfU9Nv8GSsR~p#Paqkbr2R#@)$5lUS5@0#Q)Mu*V`jdl-96rc<_mm9!}hoj zW3CLpzg->r296ecNF_i~V&nL?>vpBrcft*9YK6t_swP*d2b1uY@L$X~W)Bkp;Ng$U zD}*cgDboF`*()&+BRyqs$m!IKKfBnl zTx@L+$ex0K^ioT?*gNYZ8H0c~a?zFQlnL%D-_}JEQ@rV&dS!INXjkvysDyO3it@@G zm#?~(5)_`m*smTNQ*j=h=&4^ixQptb`(2neSW}i@VT(1Aq-dX3yU9Ij=dt^92|@xo z$A&U|f^_ozBvE?OwJvMs*}UAFO7;o0;LxVNL$W6bFKM6TDZbJrmuSUKdEPgnC4tH3 zdoh5WaB}XKFXzRoLNRGXk7NA z(ZrK+&Af>MIP>_=dqO)!B-to5G&l>v8&y)XJwI*`H)-?2M88I-CC98@0n#^J#JrST zM8e5S0cH(Jwzt@8EPx_?^*0u<#eY@x+3u+r>k~#%G8Y z*k!+i#*+L`nbVkAE%mck30F%axmm&sqy5q$B*$F0<7m(QU+){({L)-Bk8BXvZd|a- z7YCj+Zq_SE3Gt-_(2pZ4dOH%7i_L1b57iowyN&A!`GX=&vzPCLcDr;IgSNq>f<%@+ z61*S$b7#Dvn)2=VN4wb1!lxqk=sodV?Um0;YbS}1vTfl31%Xt{F>xok5UgO02ekH( zkwj?~*)}so^+4q86C89cvk3mf&7isyuD)sLH=v*!Yk5dRMnI^vNiFhsE zy@g^!C&Vx>^WO!sf z2U%C2H!KXdqCUA~-Tgsn4U1^Il|1!lCarY!MQnl;K%j$^=v?NC=wVk&0Vhhx&Xkl? zOg-t9R;F^^Ag*?w5k~wx$c1qIvyZu>rvhIy6`2BUVrJ?E@&dD&f3<9>*y(EoJTo5w z< kj*81fkko8%2RkYq(A>DhGdtq68Xwhm(x4@3s8~qLsJdJf|9X-1AO_DwPwUc# z0*sp+IwRtG+^Lm?2^oIBLw92*TgLMvx$dV8b&~R=piwD0q7R^e^`#BSz4*mheKBY`Tl<0*<#Vz}b)_jpGVCHWnErf$Q z#~K46{-w5ol%go!m?7n>OC6K}Z;s}%J9`}H(MRX`T0y``$lS*BpeuC$2-!*F<#Ehz zQp}=HO*5n61Nqu5RyP(ji5T;(t1@afmt2QqrBIDOklHK>46kE}LJTp*7|B+S)YBB8tL9!QCV5 zU<14t$J;cMrdpMZi5cJd{<4kq`gNAQsq}4J#L|$o#h8X;RAp;)=274diXjq+uF~rT zAPE|wBXSGsqxW+D#2z?6mYDWupRme5WL}Net`0Uhye{~QUcV3gZqMr`X$tM%T!knV z9qZ!1nO~zBwVP z>z)6a&%wQQoio0jcO;Gjj*23E)sVbaTHEP5mzmvBd&iZDkgmS!tzHG~SHFagD;Aog z1uW)%&Ib2A?`ZO|_cTviowe{lYN|)lHt^>sc1E%>4qOy2eN{zyoZfQCIh#TSBY`yj zghd}njRUf5XyKo9t@FiDFzg>8=K@DKmfQP@N8DACVa9lt8qn9Y)QEAg4F5}qImZYEyD!Tg`pt+>z^ z1|~Vcm$#h^<4~DM7MPwrnmqWE_0oUK(Qa~SOToT>pi-F)iZyIOI0dv#F3tH&=8$Z| z8p2>&DPW|E@z0*tl`Oh`@;r=U`GFh%?O0fEtg z<7VBxIUpJGVjczEoS7MH3M>Gk!(wRRkFdv4%-e)RX;sBOP4^=-r#OBs9!!<7$t~VHC}a+qx&ANA2yID zv+Pw`VG^vz%AKUL0U*`cKRWTaWeb%AMd??7jC?DR?wz-XOSr6=Izl;PwznZ7=~KX+ zRuQC`z=MY(z5lDLX}MpIY~v5z-+g$MmM*amO6~4zgk8O-D2cu@HnG}TJ%ugSj2_v< z>~ih%fy_5M%ecZ1SaiXmr*Rs?V=IB*Z|)^`9`oXQvfK+F>OdzRXgu1?b3gj+gj4%* z7Uh8FG_R9XMmX^YTdiY7f1{#)IlPpqNMOMXkl8hY!s!)iYs3r#``Rhz^2OKRBd(z^ z!(H{sj+HU)=R=KRTs^qVR@Vfe*ILX2jnMhL(@d$9Z5hWGDx(H(_(oVIfIZK(1!mY~ z+`G7G!BTmATui35xBvT3@$A<>tr?f z`?PD_sc*@>^|C#ZyAZc>^#YKU?>c!w3`_VHO`BW3?~wB#t&6mCwOGbsNE)4-92Ryx z*l8lL^Rp2kHt2%Yy;DZY`9s^;lyiu1R4t(#6j!YH_dN)%V?GB$e=AJ*N^a>6ET1`* zabX!beY(z3QQDv?I!en@wk`D$NCOSsLt5;~WE^>lN_ng$vmzL3gKF%13<_pWWXl?O zeQe&?Nl`KJ0mY^<0cXL)7LQT!IO9zvF&kt+UpUCj@)xy#W%IFk;Y$BA(os!927(fC zn~|o_=?Z)=A9oXUd;hAC1R_hq8rlFZ=j9;}Zzd?@XC;E*Su&U?<21&$hJm6YO!1)E z3Q~OvNvG7vNT_jd?d1sDxWX=GY*7Nm{yUh&T~EN-X!Ch1yKJHO4F>O*-(=+Pf4Khn z8Ou)=^@M-&Gves7-!E;$Cxo!|gFcqqC%cR_qlYw9I}TmhqCiG_@_Wx-yrTJZQ|!%u z?b!@B?4P~3>odaIDWP0rLq;ZoF&+jDW3eN9BSRwx`^!W;v;ioQbG2k;MfwScIn^hI z7mX@PO%hx&(a{MTr={F5?e%+8OL4z~ss4g)5rDi5i2G@uJxURUTy5XE`l|p)tL`-O7GcvMfGvy7Xp1A0SsRwh+a=IEx_sQ_adM<8iUes|!h~W#V?ybn5Q=~_&` zSH!VX&@075*VLn)U+obnn5E^rqpS$IJP`rZ4;vSix@lmHeQ9+-5z(jPK-OQ`(M2QeJ`@1+ssdi8m)tAW4r zU9LWl_$O?QA2na`od%pG_R2@{oMz2losZ@YCD3^(M=@ks2EluD!N`pIefLo}iG4w` zRceKR1Kmlk=9x5r;>n-z#_aw&wcCqrM?L)`A?`hV^v!sRiPQ))hzV5Dx8yj({n#~4 zISV3q0uk(tL!cWd;NzdFab6o)X72FdyB{g-Tk6jt6#`K@u}h%RWj`4!SU3!rxeJ*p z+|AYEJ(bDASGMducnfdqw$mh!Dt>92UR|vs=IvFt%YW-I}fFbi?;%4)fORB*%ZeL1%(kjpo66gNk zq^&S=Lm&N47w>e0wU51XrFhgP$ji1`i1TNHN+vu^mx}nGuurLzbyjMXAqUtbMuf@L3*t ztGUf_LQ?zpmXQ?S!pTZBn={Jj?3I@U#s;qHXj&~(g$)^ApzN|55I}9XR5UdsBFc?wb zI&l;8!2TjO(L~Qmi(-a+BX{h)svgreo(fR1x zXF{cpzAyL$v;eFDO>J9m3rBu-ZtJ@f&!z${`}{k@;sF_CVKv)bE0I)WSNK7r?>6Uj zDSGUOkUZ&e3YYrRxF=aHWUgnfN_D%nBCV57Z?v7MQne4S-Jr+ZEyt1fc#HN@$bD2i z{KIDyGE03O8YX$C&+^ygGnqJD$umX5M&ZSCP^7A*W?rQ0_4c1ChiA6*Ew(8+lnUlX zT3XUcdoEkU$6BVh)1^PU?w##DTcI{Jx=c1uvW3|06BQ=EmuBxkVcR*louXs|pOsTp zN&n1wMYrL8|I%A)KZ4!eU+VMRf$GZWy2agk`)fDECQKTvt-%ZFS*E8rXu-X+8-#MzUUm(T48Lbw8{ov%rfqf7LfV0Qfwx!t=)E;w*fS_u&z_q07$%o zD_c((!TG`?>oJ4kWtY)zD^B^vb}aU_g{%H{ULK;g!zKJRUln~!6n{2>Uf10{1}Sy@;dP!I0*nD1Vm5EWnytpm!O_k2L+bRN>y7%MVp^Ml^SG>Xq?N6BxSay4daQZ!4x%^ z%*ViIH>%RUgYwJpbE65tFOX28E>12Xq^SMaX5o6f>YXE$`d0d*up5ya@}HcYT_y`^ zd*_yLvuPhAlN&QBKfWDySlCC*>5XTyfR8LmZHI)3l_B=0VH-lxc`qH|{0YK-b!cK_(cP&M_2Qo^ zJRbwG8NExb*8yxo(2cRNd*#(uv3l_RHuFyeiWcsRBb6ku9GL(D&NjDlz8YEdlbIk= zA4=7ZVu;>Vp7a@Oa?+2M6CoN}z^PFwRacWof>N9YAmU>PEZdehqwDy}ko8oQb*}@1k3h za(ia(Hr-iWJ@y7|yWzJ$*2wYUo{*ch_F>o&H&b;{7eedty1wLYr+(!Oe(kE!n8H-hgWcXc>+g-=y+_%?+rkXRg zP5XamKFUn>(sP231sOP=s#j6r;~NVi=oW;5pmi8~xwq5*MkfUGw``wc7kw@b5E`MI|PokZvcSMQI< zq8&yQeTrHP`JitZkFHPK7>;J%ptgJdKG&{8O_IXEzM|1^xNc_UsR=mi_XxLCZes_h z|4zFv4@Q)8-UOl{*wIj$7bFe7eN|ivDM`4$1IP+*+0VRt9lHh?9#(}D1y0;AY9EBQ zdQV8jAHD<6H%K7MYR5}xRfCEe1G~P~J{#~}0X7)n4wiCkLPs+*>E$2vn?zSF8I?O( z(PR7bqsF(hQI0Rg6bPrzzzmz(O(6b$udc94X ztnHpvOHaM|m?31Yv08AZCQo+lRojnoFqIPB`#-K2yY{{!lj8${p-1j>x(pzZ$5&Fs zW1U5z$E|@+B|K2&HPTj{Y~tOsJ_u3v4nvId*29_YmpkYZ70=2|T1F6AgpM)KEYq6g zV5^)<6`KOq5;u9X(?GxJq3s^|_SjZPO?0|0CEc);4EzR6b<23J=hCwGY4Y|cIU{p9 z2lw-Qr1ouz)q0Yq=*Z1opiyfmaKo#4@A&ATh45LANA~~4w&7}(0q?~zM$#!)s*c&j z=elG~eHuP}Mc}VLBybNA)8U1=Dp&X|i%CK3_Wy)9&Zj)GlNb78`y?TP($(IR2K`od zp4*yP0gFy)K+*#5XZnq@GX{&XZ-c7v`FSdbNg;k`wUlUoTl{DI?9t-Pr7j#KG+EJL ziI#`#i#5iZafOYz>6w3~20xnHopQ0u9CERd2N2Mud6R4|O{Z%>2Nr=xj{m{=eG6~p#Lft<89SI6=9J3Wh#{%^dpMJiRy-2A&y ztloP(@^7v-@_+4{9=)Gw8}LT7K>%>fkBncLDcqUTR^a|8FcDjyMZ*1^^t&^P?c@(S z)Hw(MwVzZZlM#|#tom0obGN$*S zdt1iW-rQrTUn2E0cd#QBSH4QEM3;MBz!OovnRutR2s(uf`%&W9&6cUx7RPl7I`X3+xgQ zV;ZBxI$qQWT8rWjGZZ(&ikC!OZ?fP2$PC6FZ-p!b4+`(BT!BV|Qc<<^(jO!xKYusY zKt|>4);cp#K<4by@`dZqlqCjIwtFr#f&+W_4jOp8qPMqA)x+2Fv@NnVLiVK?cfl_3 zUBvu6+Y5*2mr&OL?{#OY`g4lV?UT?mYEE$UGagySm#9~@`PwtSeH$O2TaganR-BdR z_>(G5YLt7k6WAP5A?281m*&U3Fa^)HT!!L{QkBlR>{b8F+Qt{$px2^G3ck5P%N0V# zsd)IG-bnT4k6x#c%>%avy;I9GQJ@1Om6F>=f9>E%)RB5ekX7N^9)X$mC7&6$0|C5q zi7s`LJI^}rP67N$?nv;WDPq+nvJ{vdB2!Ql8EE7v$d%9?a>na%E9dj#-Dnp?O5|R zGhzFl8OzT^O?9iOC}UK*cq2KqI(%j|Duor&r9NMEWd0CmZPz-xHl96gd!Bp1D=9HV zVincFFoH71XkhCrGoc#phdAF7Ji&F43woskNPC(Xgxl4+Y)XKak6d14?8sC8PaPRP z!PNg}7*XBK#x2iai#1p2`c683qRE<#PdpW%HOsET1Z%n@e}I zu^}wfQQSdKU-D&qKQ&g2EQa$F(hgtt)+k@5JB`rhHY9$^la15y;h=3F0L)33?sO{O z%o)JtKZYMGRZrilRkM3ZN_v!K=lN}_+`Ey@6q>U$gkSV`DjjP4t>e@?dLJ@8p(N)H z{38+&izg%~bLhRX=W@-DuVM=uUN9>fK5?kX)zj0nzauzmO&(S_l=iUwTL+C{P}8@| zG&ulaeN=fDt8htgcGIJf(qdl@luo<)ElBTs9|stLSJ2GNhY3g}KiE-g!?*m)zhxf$ zTCec%vzvEJ^2x*OXrTLbGa)fNu76zM$kko06y*Mtka5-DSi|ixA^3yIMF<+`D9f#X zC)w%7B@u%54dMYn<@*qi`_l6U|CN?>EMk{|xP~=$u*9mpvlg?~B^mKSAj%4XmA@c##f`Uc$P= zvvqIB(a#0$@%U=J7b+o8^e1O<;~6rx%;x*anX}gs^ZQbjr^%u&Gycjah4kW-ZN7^i z&1nXF@wvg`(h_tXdxCwig(W=}0J}1>Nf4)EbEYG5!8MlQEBF0;pFo{-Mg% zs0{d#-OXkV!^fwO8Y*f2oEFz3>d_9lL0fVGND(@aaOIH<6`<5^pcZP2O|-;nJ*?0T ziQVcRjsK~^oPu>Cl^&I{yM;x}_8dc`NP9jyg)NcbaQW||;8^XvM8bds|3k0#1Lm{7 zl$>S!dmxp7Ovzy`9O2jjCeu5qc9SeiQs;PGfZ>Vz*86Xy#mip~(lag7#0K*QXE_3p zRPy}V=WFQ#T8f5x90UWr^~BcVhaF_U=jvI^i6>RwtN87qE%3dk72EA356lW_ZeJsM+P4-gb;%Yoyvq|0(gcVFd$YVP?OcY9K+X@%e)lC zTr|cF=Id3e8q=wLK?j`jzF&7I(E;R&%fT2o`Jjyq_aYNZsF)jKJmLV7PT87kC9U)9 zhB<%ou6gp;>+LU7dmZ!#Q)1y+GbIgypz}PT@e~UW-J2#B7QJjaxT7g3$A|WMskg-s zegUu_1hukrbSiNgMGIcB@ZSBmjd9FMl6<#4R))Y6ig4)bAkZ=!PYFN76Tjn zHV@?6HgtFbHv~JKW|7FRJ$Wm|^;VC+J3)cQPgdSP4(2C=FpqkCmA1MQCd?;NR*X~f z@O^&21IaBV!e6&p=t$2r+RL;ZC9wI4w30?TS!VUstBlr;-KPq<{9O1Q*F!`Mljx&CwN@F~>z#Mp`^3Vt{~QyB8X|K!#8Wpni_^gQh7LK~7q>e2y8HPkiiML@jU?b{DphZQB+T91_SAUkcqe>_-hLz` zTjs0BbA_+g&V%Q>rFlCf#L-36U|}(qnSNb_(;&fn(Ya&7WHkYvDcKGr`+oFEnC;uM z7e}kR89^`!-P!Gz1#!5CPj2H7WWBF3ZnBCV-OWAlRy-0xoQ>Ni#O zok4h;ew2dA`#+J~&1r8Xp2u>i%!eji6?J(a$?j2&__cAoPUkfN=?}1eyb|9&m3}?9 ztXI)%E%TvpL&^f$(U?Z+&B?=%Ez1y*sx`4J`RY#~#{K<<VYxiycybw56!P zjk-#@oy_!hC_S>UpZP~kc??i~UG?v*m>)4uyT#U&%Y1U6TjV1Yuu1(>gO~Wcn9FS+ zpA@i=eraBbFwb81U;yb4CXs4Xy3FF;w`;b)6P&i2|Nn85=I3a>U0=6|zq~El_HB|X z1IN$dZBjcf>16&&sqP_Jm>V1OE1t6mc8`38(&cq&nVQRk-gop{|1Uni#)uhEQN3Rm zjwdqn1JB6+c_^Imj+LO2grbLRC;BY-<$#T8Qc6FLbjib#X$7tfKqTCy$k~}9HSlTS z;UVl{ZBT|9>B=f|JqLN1X0Ci#9p)hYdeoC|#0wPsWM5`w+_mke^GYbrUsC*&0N_0s zasz#j={0-k2zmTisjEFm>gbkESynz@-B+e4=BXZLcfb(Ve}|{Pa?nfdc<;mS(wC<~Zhll1h~o6dot0 zpgLa8rzYq8D%>HTENoxVMJa6dV`U9b(5>~}oW;vTRfq!tr${)kUG<+@l}%(Fy%AbZGBUK8c~|#FJ3V?^8v!i z6a44BlcZQoDojtgd|!L?=NP=JfA;P3^CE|&fg{o`!YPXKuwNc+r|#mmthom)@RF{P@(Pn@Av)<2r`ODhcrlt(%mT_l8VwP-Q6%qmw?v|pDbqJoO54u-`Dlpd;8zme@rgaf+p&WxHb>y zyV5EjV*MLNf9`|Iv&9QzTqxiD95!hfUDm?vJ&fNMI9yVoh7AX5Q(W4SX`hZpn0Rp9e+yBjw{VeRRYd9c8I> z&uyV;&J47rn%ov#^+d4*roUf(Z=pb*g(CUl2M?M1`vlS;7&t>Qw!hlz%N)2Ybfu7b zE)PUn0~|rM9pMVxALi_8s|A*KQ#W?3H(1Rw=#epg$<^l+S^lIxe!bnM*x9mC25G?d z>ImrDRZhB~6z^J@8E_SfryS=rIc2VLl~b`f#+-UOxi`M=xhN;N;8t=&({rsjU1JM) zk%0EOeuiF^>3v6FYh2DWw=+Em-)}{ny=F6?x+|Hpxgrgg6oGw1Et7A4WE73NWxgL& z>`Q%=DmsrwSkYa*6n8Huv?A^^9b45aiOphUZWG^&A2jr!iERH}V*0`&{$SQiZINe$#jVxlq2%yaYDe7yOM`)oNoz3D$d22@>yhO}g8Ia4>f>3_#Zm z=7b4qfHWG-2(mG|e7}V=vK+u^4Nf-BdPOfCUfuqY{;=R1YkrSTUL&D zUxDs8nZO}HbS&nJ;?A@0#hJyPRd{@5Xa!Q;<-Yv>1pVK#wq61UW+JOTm4n6&O3q&g zd6-D+{sj)Bc8&TN5?J7Xgi!XhSwga=Hb2E*-MLS4i5>l7(wB6W;G3B0dXY}4B@m`9 z7G~16s+&dWwG2Ms&y2?=$WOW2VoUMcCBC?(zH-Qrx-gw)S@NSUt$fdUd_Zo(W$UH! zTlU80Zhe~J6(fdhe#2G=(yB`3MmUoZZ?l5SSse*f+X_X?yI}2`uwDM>i&&bek~}>n z{3Kh>K5;gWGHo<9>-)@LE|nYxaZS6l z42kSA3Y?5FY~tbiCDySe6+a;?Fu32mYP_r(ev$jZYqs{BSUfU9M0A7*DkRyMK-q90 zk~4bDMBBkOPTQPt?RFJnv((fDH~81BB&_SdE2(7X5=~bQx{9){lh4VVfZP1OhNcuA z4=@*N*Ra;ARVBe4*CC07YsacY~T1uymMv~p1&C>A;V(*8lJgG;jijHivwUxq$ zAa`9_Mx(=$s8tcUl^-rE1P^C)<9uAEI<)Ri0X@gQpt0}~;30=voAG+I(cFv$VMPWf z=jZ=Zo~pN@YT9dlncm&eWHTh;$tI5SJIPYezAgrKjQfbJM;8U+7qjU}U)n#8-|}pd zwW?mLBQ@BDyHs6opISe~yt#UEBMw-WhAo=FIQ0ezyD`@Lec3$LSUBg!qAMl)l)vz% zN}1Zc?ct;5=7yz8U<<}?B(R;xc+dm=+cnUDWd{cJ2mHD-`_F$lPa%II5s#hFs9?noF0z{?jY{ z6V86cA#(RJ(yod%ZOJE19`+ZqP}AnsI2lk2g2sUHkFtl=2W-)EBB1 z?RtybIbf~}iww4475H^Ye)h8V+o%gKwnJZFcOJM}d3Ve{#5t6t zMhNb6W!rE318MdWmSfp~LTapDui?%5WYd2ZTJRvzKG5;jW>|KN@kh1 zo<4Lwl@wKNeuNJ6_{gDa!)X6Quf5Lr$%V>8CWuPNYc;E&{Vkm~sjQiX*a8Ca_cAX) z3pF}laGp7U(e570ycAg_Djy45ayfI`oGeN`WiS(qM5$?8qT_A`aOZGOi>6y$sjW(X zGSUJzo%xTMbd*J-YB=tWI^&)XA=H5Nt3%avm2azo83aE%VVg}KvQ)7(q@c6Mt(+;F zt6Ga)6EBo1|7~sq@k7GvwtoJ|Ww2cNO%y(w;k&mJ0)gj+;*d6Wv{zPCvlvyC2EDLg z`0&Dx_xX*mRhq@s9p+zny$bGQ!;bL$C9dJZsy(ZQ1WI9hD(BmYVO2+9APRnVF+p6yZ;?Q;&aHyU(w@pSA5Z$NEQ@ zrWUx29C{(ASCs`JUwJe{>&q>UJarvvx*CtiXite#r4}8V2EKB`C~x&7NB8g~E$lg2 z8V*kY`+b^5vN_~29AuB1q;c37UIwFQ{hnbhJ@!o@GnnIg76%Tbj{7GF9>d25RFjq0 zGb~gAY<=zy9cqy})rDamv*vh`hd*FsyyA1eFT(hKUD0vA>}U(8<^MatIwq*Ndr7aL zaTp`0j6>_GYyAZC)_A;OYRM}>KEor4$OmTs#@g(YZx z5cDMDI3iU(IsBE_M-H(s>az^yz17)2i@v$Y&5=M1l$AKEs(0bdQ5rqJ0s@P& zD;IT>uzD}yy04S60`~)Z7dgWE&!4>I22`eHtT#yu%Jl)3_&%Tx3%q|)+;k^C-UNBP z41iPE|B1B%1}D)JbCoK@5FZ8{A7zVOMo%|Wf>ik>|J1b0|C5!8dh)t* z`|kp_Ynx{htG`XEd%phFEw10>;XIErf+foM!vEW!=`Pk|2Cc1TG8)OoQoqGMVrTg^ zc>02O`{Yf$U}B+>FE}3Rk+U5Mh>E1Nk#8Br`s3DujI`~OK42e5f=fYcpH9CuFTl~N z?o{EZKuph!d|aWSDjvM(w@ZDT@+IhGgf-B`j}4xub3RUBa{N5`Av^SP|c&&|B_Ngp|xLCZ+2`_ z&Mw`~-q$I&rsvOlgx@$wm=S?!1l%HCK4UbIxVcpV3dDVNs^(B1@LDPEVW_)rhCbU& zgx!rl_=~(=2={@v>(@z)J(XKh9h$i#Lqm>VXTH!XJHVzoqMd9MoQ`pcYdKdMjkoJ| zw8{BiLhrfY?`X3Ow;^al@>+PEy(HS0b8PA($sug723r3xZ8UCpF8XJxQgHK>-Xd1p zo4t|NZ5cOVszylq+Vw-=Wdy%sb3SNQ`1mOJ7s~gbHO7YxGSbXszG%4=c-li7 z;FAy2zc^sLXKE&gmH5IhspBT&Aim^*#!wMn7Jb9xR2}sTE_aJ?yeQ}k)l7bW|2lig zi=TZeZ)0_OAj%l+;B8xpmU$p$%C=R@4pn=mMuedmxEMB!5XER?73A*8 zyqR(ze4*lw)g*C!33Ho2?o78UY(n&rf))eG=#;M$lMp(aqe`5xoOX>pIvjJ4YYWUN zL*5_vHRmZqEKsa$z%M?(T*=e zolfK=;&;-0uGbIsnvd8|Xc(_5<31aubBUM#~Jc1yVd_%$ianNrEE$dCjXEH|IY!CCsO?ssOGn| z3!uGypHW%*sODz5us{RsJa`sD-+BiQ_AUyQTh53rMB4VB2b(%l-D&rk;TcfpkZ>XmxN2YW28Rt48qZWd(iU3%?r_B~ZblQ!jy7J5@@ zn@=pSKU^eg>W=DQYSsB3v}BewoOl1c))E|_JVFQ}XWZ1rs2a@aXB(Bj>yZxf!JD^S zwJ+QPjp0q*R&z58(0it{k+K)E)3?7&HfEl4RwDv$4%}My1KP3~r(|zlh~91&q<2tE z)q1u>lTwaatXTq)WC0xF*btLHpD~#Fw`ltY4*9pR4q=>EZQAR@wu%pL*hnQ$g@}T; zA~XDl5F5W?-Y-;^JdkU!J+S0@{mSVv3#ul#=@5K8*O8?4ZS&EK@o_w}%g`yzzjrAm z(9;c);E;=)ZHwDg!`F8$dWVUHyWMYuW(2Xk<%pbQb{z#hkVXmb#(3oDtd;zB7TwnZ zgq~YaoX~SLr}xHWKI|#SGtZ{oF3z7OpvpVv>_3%dfq^1Bjq5r4y2UM<@qnZD@a^!- zfE*RUnULaN!`>Gy7h4UMMp>KyD9;{ACSi?yov(-x$;3RTL@q=C*LanT1;+iM6MN$d zbsR^cjV9u){*lXjBXJ!UD4(_!kzP>BgPC`fs?i#DHZwMaUH#P?ySU#=w}(Tr738HEKEHaaT8KmE6;qanNufePKW|cv~RE2QzaSp0L=UE45k~ zA~uXC=ilL*Z69H@#`$c9L;C={+6vH!f~Y@=HJ?*rI=VwFxzE9!Lrn+UV``lD+qI(k zjR%qBgfQ@J!nKa$g8NpnqLvWU1#N9LnlpDX)$XnBF~wNipOJN->$HW)naID139s(M z8t|7x!FTH(SA708o%M5T(MM9_mSQPMu>St{J^(=IS#~`&-%ySH2J4t7{+{NMM?r$B z*rI#J`B5$(xZ~}nxbi*YSK)xc>H=eAXwUN(h$37`y=#(rXo0vJR8))D1hJOf<7amL z=pgy-A}F-_=Bt0&{UK>Q?X8#9xLWOLXCnm2_fO`|te_vP8t!&JEU2&nNSDFn=RrM= zs(-mJ7E5UEu1gW^&!9`ndh-R!t@MdnH`@xcKc`Y}z4%3M7c?C6{!1<0ABfhV-%#W; zPQxM2W*x`#YZdhrrWfbYMgz%_aonA8P03SR*ZI)Vnd3QIHw7y9Kdy6-e2D5z+3MBO zGk4W0;44ykEB6;n{DS`N;o34_r30Gqi+8Sg0{Vy7=LPTGtN&0capMo? zOyM?WxSf1a!ApLs3-%(6yU}q&e)DzImbymIItpxP0PWSfS+TwtBU~^$P#(&5B@jC2 z(X(&BLW(`XB3pI9!Bo1PG7ycN)OB@dsb467pSEBI-Cf=RA}x>vHmx%2RbJ}LI$rz7 zx#wLRcCkyg!!epttzwR&?D^9AVst=!T&x{;`gWOQ@pNsIMge8&Cd{hi8;DCa@_Ym@ zXFO2A0AhyQ&Lu@hhf&->Q@wY+2s8$R}CUGePUi0bjG-5)HAZ2ukP&sN406 zT%cvwq7%SQGP~BDtn1b|d0U6@3vyu7_=P|{h-1bF1O!dZgDMWe*aiCN>mEKL-XK?yxJt6KR2~Jz&ue0@W zx0TSfvDMPz;?*}z&|DJDcOW9KCo5r!YwuWEo(TRWyS<_ot-A>0a{ZEfgx=v! z!8Z?lY2(pnerGj;n6bR3K{T;4f(L7FszNjG08n}>WQC=GDy1?wxsEns?so|^@8&QQ z;g#x#bU%n8gbiMW@;A}gV0$jwXG?JIF9D}I53_uv+(7?jsV%>F`B4rr09=bgP2OsK-yQg9vqhHl;sndw8c2;`h%v z4*k#e>hItA(RBc)Dxc;OCL-!Ve*HQ{T1&^BBt`tD^QX`tnTB5EQnKYW-i`iKzMc~0 zZH#uq)(-zBAo*IsSYMud09Eh2OV|ZA^{nNagpWkOy`rg|wNVl|Oyf;gA@r`R;prAO zTJ)bX*dj*w1C4b?g(S+4s#_a9^qciR-47gGNS7hqcpb%hnGZDl=wwy$-^k}8KCi`g za^&4R*c<}Ja4tZXlv#64+;hd*1Y{`iOXi<1g)BQYqN+icvh>xDlp3Wc2bxbd&)8oD z*y6BHcR)LC#M-g9%!#-oYMr6Yy>)cRVmniCnduk(49GImmQ($qV8^KEt7oB9dcKiPxtvL zljqB*PieM<5bQYAC^0d}8L)u5;P?(%pf@F~rR#zXDG z`-Uk~s%BmNHAex%Av>R$NypV39l+S&m2@s04bAqC2iHEh8sz_B1SVe~1FoziFOc|^ zKhLtQd96AUwY*7h{;tScL&ZHk7xpM;_l_AMXI=VNpZeDK>lT#XlqOF@$Ru18G?QNX zvgQA%DFRNK9(ZLDsSg}$WsUjq%WfsQ>A1Qfz!EZj;SOj1AF?W<_AZNj*byMNGScWC z2A%X%qg#Qx`<@;=dflJ0potq>6HO}89^&~!J$5DBuT^cq!=XybNg+ijm26Gs_omh3 zgUtCX@=Ikw1<~?#pvDQu`cg!nMu13S=UoD$12_FD-YzUVd+`b3h1z%ASgvQI1<7JVr_QEC31Dkdd`|>i*7H_$uJ~m*prn?eC`;y$sWB zkMOb#OUjf@yY-1hs9|V;Y?=p&2CG$gS{d!jU+lSZj$EbD`nKybzY1D`*C}T|l);~I zPL)JknDZlYj0#lK(?TUk_K>=MlFpaMVto`@?Z5_}qo9W-pu<1z1fNe8dg5#SwnEMJ zy|4#Cwq>T=UAebBfUXXv&-c1d5%hefHd+P3vHRup>d~O0iA!WtY#S7GAaGy*N=~ob z>ViK8bRMN%&ML-sz#wH8mgu;uRfDc1Mxlur_1V@bRdshvH2rZcMc^_vtuee(iJhs7 zML4o)N&;JGb|4+^7T{NHkwM3D6Ga+?T2Ocu*V>2t(8{wZBM7r<{U8Bj%a_~4)5|{v zd~eG9rzZNEKeRR@IkXa+`u5fWic%)qSs#gI5&web{di}(O0oX!5Iwjtuk<6)CJHXqQq*p zlhS{{&fF+w!gLz3hPV(5Se}#Rt*LS`mctP(a<5$qT2vXxw zA38)3x8*pgUvvmL!LsCSL@u@cvmxwzf|V0REklHj_vj(u2!$RPxc9&SwRV7v3 z=!{p`3oO}b%%Z;fTVooLSaS9wI$nlu5HJ}5vU>@y&UPCdRp41_mb?SzEiiVdKJa{) z&M|`@4ovq?`duYNkWVN)w>Qd~yf;$)Zh`jj)+3{`8r4C)%=>*Tc&?$Z^9-?({5~GK zCR@8If(A+@RZ>7R%;EeGxx*?1JU{&GW}$cnejiS_GL}fHRt1X{$I@;JB<{Y#L+mdm<13rs^XG1lWB-`3cjV5yGt714>lQ z`enC}g+tgCLXQGG*2|Yo^sDN*$3; z-Qt_dq~!oF($9R?#i=07Vej=^zJLW!z3^hKcOc@(Ht1g#`A%yzJ<)RTa5%MD3!7D= z^wr$MxiSz9c^;kRO%UKhcf9E9;2EsXbMRDQuJtC6wle0b0kD@%z957FhG5%q#!2)d z?E~Y~Z1E4c*JzR;q{gq90Bk;(>{?W55DI4C1diMMwXbeX^)cGxbu|cX9+YjAUt>@Ztw34x! zdU=qWDDa^T>~a$UZE5F{?`&2&J)*o zTKOb)@~sN=k!>FAs=H)C_U`0PcG2j&9%D{QmaAT*q*m)ltM#zv>o10P9NRM;nga4* zvFeZ*p!Na=sZ9F+Q?+=mKz^{W(SFwhz!!snhCiH$ zj)M1(?N&ZF;3E1J6*sJ1tgdo!IN!}E#5VTjY56woFIA%=rkhn z+qHoJom^o_WhFJ|GfqXG8LhxFAwg6p2=abWV7rCDz++o3MWu(79k7&6c}f4826jc& zR=f8z#NH`vK(@aatx|b&f2Da*f*9;nLeeYnY7`;_E4tZzIqq zf%XXRaqbP@?m|(V3OG>zyv*(m%kaqY){b|p#OQxf&i;|xH7EUln0_Z9|43FQqX776 z%XUa$hfOr(U_Bqij~OlHl*y4PL=mKdZuIZD9nK?L>(7^3eCQ=bzBh4v^d{~^dL0o` z<$sH0IvAw%TiSn|cHh?M#Yi2QuXR7<#~#rfGQK(K7r-oJ48vmT*DF{_17QjszPRhlA~c1P5tF~Ktq0cm4)h}FE9?dkS` zJ;AksU4!N^8RDjRPHXg|5_m&*ZyaElOng{Qt=Md`}if>8mn*ueU0Euj~m> zV9NI`KBdZ)kq%=LE2ENkr>@rkW?|($*-?A<{La9{IMNnmv~bOh+_5oxv>bS&W1PJC z*ygRqD#XYSUTsWJ+Iwam$L_i0N^9V2(3s_`er;nD3*n=rpxnmJ-+X{>!S~yix{fUY z$0`)5^&5oQNFc|Ot0GG^sN}P3aHufJB1nu6Gw!PbUBprQmRT%SY#6Vi?l0JFJ&W8B zmjC>Ey98>(jGg0bTiN{D*jh<-Ll2o8tjA?gnnwQo*ZWh#Y}ElAUF`!K{Q;5rJ=j-t z-)9cz`60fwx=BC0IeFn}ZN~Zet$3m_iS~mj{R*><> zzgc1UVCYtGs@Z0~VK#4aNhRBmwTZRawlVVY$}H_>URy3j7@TXhotW>Oh*>rARX3R) zowipnU3dg#!~TA`#z&$=GuBrDsVa2Zt306;KY=dO7fXs`pR1f!WUfOuDIcb;!F^VX zs@*xZUk*zk>wP>!s0tQc9sApVE1QC;OuwWh*JJgqWPiHW6PPzMIQ;Pf@Apxu44(PE z+T>$7(2yY!(KN9A+VG{iH3a|)hCvDUozjUi2G+qpu6vHKyUB)q{Qva=@E{MwOE^3hQO}#UX#Ammn|$2s3!rFv_3NCZ^{AEbOskb3 z81;rA;l*l~3g6)&*6E$6Bi9eou;aPpuSRX2(P#rD zzW*?3dcM+$!tuI|wCVVR+UzGwkBH{MDIxY8cdmIP2BmEb5e{WVq<}J&nN|^t0sC?@Wt*uoo-r}{yqqsyk)~;J^pp`8 z51mb<{?L2L(O8o|(U39>tbpx`&J%%vv)w|}@rS@=ib;GsGSQ_&&leOyP&4*c=`rNQ zGqHj(>jT4-K)3@w<9*@%fTC=T+KJEfIaoz2z5xQn;`hQ%j*f|K>jX3 zS0{8(`N~h;ejq=Llf36l_Zll=rI>z;v%)_Z5v{pEhv+NF+;HQ3zRF*cS>BOHxB?Vd z|J8zJBv+y@L%#p?u3I?wAl>UW0za{@9(`OkC9Oc4nJ=e8wXbpFB3D<$edS=7b~sa2 zKpA&+bVX(~mesz(&k_=;Q5G(7{5M-?fYTf{IOb%G;NRf|gxlp{y&G_A?(aZ_@*68< zYfAGJK?)}|eJ?%sRza9rWs|8pFs}iU9s7Y4-&E4yHrP{a#raG9{EZ?VE!PfDK_x~t zU2#t_b_Er3o#Jry62pu~|9Hbd4#yTLnLk3GHHd)6!T!{oR{){|Z^5Q1H|h^dH)@qg zz9Yc3q$t4Q-QffRSi4@sfQ7e~Yo9x>XBf&~%QzZRzLF+ognI#rC4k6L^?1E-bJh-# zR><(E^kpj10pI8dY{gQz=|@d!s#LNfZk6!mRchtQl399!0JMrT+P{@SC{4ZiCr z5m*si^h>RI3ftXehc4yC8HgNo5UaG$5T;3(G8=&9b}&jpxcP%`y6Ca7Cv_~@A>efe zd^YPe@GNUJQg!Zz33B599Orzm2}RGkw9fP_jyO?a;w?Z>93R$YGrE1@!*O}jGF5-K z$6B|fSU2yYmx4^OEtA%Yh5Zz9v&uwR1ROOYKHp*HL3!}Vc@b0>*Alpg=F#8#URZu( z-llY>gdjk8Bwt&9kK|dPt{TnhHFUD+g#u%z5ZN)QAg0@P(|(r)q7H?JXB`e<+W%3I zF^KG;-m?=rw0KLU!z^4j{O^zH$=5s~-;ajxq?z||Vs)fvzd(5zV ztFw+L)Al5jvoHfZ4Dd06O_|o~=NA~UP-!P1ys=DoZybQy)Y{)5Ibkji65kKO811V` zZ&2g~QswIkA-_4+DPQoEaz;pUqc$E{%EnS6jvO0~oGmjX;(?;1xwpS{z#DB9Ad3if zaO*cq-+zDoW(!2#ppp-d=>fk%ez}u)9S)TBaNH|8_|*r8%6;aJh_(+4H$vRXyi~~8 zc>M<->ij3u>>UrB6@>TuDL-SmKfAAP2%Prqk>UB{)@6Q_zDja~{U=~QOYMA8>N!%X z;h;9q=>P9l{9;jUO5ZnkmlK4y?-RSyUbSQ!Fm-vhP@B4dTsY*Xe5jQ@wrPlk{qhnn zeN%Hc={RAqCuF6GFCjnv4Twjd?GFfeSTC+-#r+RJOWjq`wMs490tPsNL^M8smeVre zUrfXuVVkbq!~Uf8xoI~Hxb1|@WoL4cZ=Pn_(1_F|4!ldt{~=xsaNtV03h4j{!0a%` zMjB|`XE{-!Gmsny{$a07s1vJ?c#?9VZ$Ip8`10Sq#hjBGo$*@EVH@@(2xFGUj&i@V z&Y#Z~q)IoLL}16`k$m)AZ0~%;yC^Tc`x8K3v{DZpeC2LY}bZ}^qTXJdlb?X3_HnmpiS@3D_#ZtkjWn4CU-FzArtExwb4F4F!jfe}in4Q{2QKuwQGFTyS})lIM&&riMa zCntEONx%AK1mCagQt|ZYRDtl<=_M|&z&odgkG6x}mg8eeEc~wqJZHF288sy7U29!+ zf2`MM3TP9CiGigt(qvz!^l$7HOVY5!?z7pic+y_>jPI}2B+u9k{QAsQ!F#kCK+gI< zH*3c5xXr`x>QgxzzClb#!{wT0xO=GN9syU&0FxItv!_yvB{Sbk$@@@-~`>F-=;IjkYgMv97u1SeBI&kO7 zI}`7>ypn+V5W+3^v-RfQSLE`0?qFe>+-jAMwz=G-%SxWV4gy4UWz+mC4*vd$Wov}g zWrJm=VvFF+Gyn;%yX~I|UXSrdzG3sX zIK1&$`4c|l;k&d{i`C@~mkk(WfQ#H&A+K5<`0l!af)4Jdlp%S)uF*Q{)9fgDJKX9! zIzr!5kJRD4HHR1|^3tUDQSiC0{io)TFF+MVK+XK?b=#+x#Yx7TCdN>q30SudXwHxm zCndkS9iT(Pfoe=g*gNs>uh&L|#p0gW?mZGw9*7*Vx>OLmS}M(HNruc79F9pkm$OLD`jOEbfwS1Y*o4x%u~W|BhNHNS zOi8Dubcdg^YUJ&+TaqqIXY}t6lmo=!V zC+K%Cajn_Zo>5l85i(UlC*1Ld`nGO_TI#b9`@Q)3iS_rio!^3i((3Q-@^Dk_x!=4d z+tJsY#OXgOsRhlr_L~kRtak&0MtA>sA^wbRj=N8(xL}13i#ys;I2Mw3RkRj5%WY^d zHP6S31t!w|8Sn)m+tpLd`7v zEZP2v8Mjt4vQ8YtxF!Qn(Zz8!tKOB(o5=JNSt7_U8-C%hAK#VPZ+=)Z-?a{U+4hDd zud;wM2*z@P+J$M31*#!y^3z}@yPP>hQ;PbYd@Hh~j}a)D^`${aQ&})(Py?Nu{rYbs zn1sJoy8Dz%nXB*l3>gNFQ`FbppM8Rjl%*Go7icXvi`YZmS1#g2vq`Vt%vH4QQB=Ri zi7ETr0qP&@f26#dP&_GbKf2GWI6WwZ_Ij1G5fZ-6=7U$l`x< zo}Z|7U%<QQH(=eSqVp(0d2th-I!x~0YYn3nq2ZO97TRds zYW4F+zHZS(SdorHjPBwL1J+8Lts?o5zXn`wOL#fi=KEr7K<6hvH&SCcopJsv{YbeV z`2>`GaM|2m8310qSzeCi>fchMS_+9&HGpC}4>V;qfgU@K`I_#u9EY(QMt#5+VhX-xXx4$Xrm7Eb z_?DxAeTz5XmzluuC`0fD09WbmADwVeasZ+kao;c|>8*i!`5<27WoZ{rA-jU38MUKf zlp|vU92@I*oinuKO@{%?pjE6$QMR~=_}ef8n)c?UeE&4K=BVbyb%|rh?5CuA*ml0Z zbWq;{BlU%{CIGfe*{g^;d`V7{f=M!!|}Z7n%7=0 zwR~GMcDPc}TfjCrzUGbfCfT=^X&nRmLi;Rph!R!Q&U_Ist%GiY&zY{UoYV2e61bq{ zf;t%^^>W4{mEF3JDe1SP*IqNK^>@ct;y4{cT^r@W_)_(uy!j;_l20L&*XL`(W5p#x zt_uYj;3QJE@BG(R`N@|GtbrgZ&>^h9xaIQuJJIVCTSKJpKK#$ED~G*i_rM~h+6F#TCZmTiT7q$pirLDnikThF#RhQjq$WsBn`lr6=1wv#!#9wtD#YRDKrr^ z6FsTz%5NB#RI9T>y1(bzGX}WJ%dM$+p}LY!Iv-izzdi1Tp;|a>=L{ne@Jt1xllEPR zkvF?z$Dzq)ZNo{Z?$9d_bsCZP^A^b;_bxWH<&8hD7881M$zmQP?+|uzu-LrXdM=NeIn6w^5)d| zJ|}5PXsO`BE2jRZvQjMEPzZ}U!Q~f=S03f59$?#A=^kM}bVcY5k%|g4Rxtj6C2GR! z5SI1k$mwWn7U+*9&xc7iJ^v3tk3)Qo(mPYY;@@(&r!FPz#7RLGD@M3g))zhsh1$)G zn9*|N^+)+Y?TxFn&*>JgRnxtm=m)bgj}q%%)={EZQB5NqWOiNm)2T-^zz0Tr`zqRC zJ&8|c{(rqM_c*!XtA^V~)}W{rK{y84Yj+7k4{^R=!!$zPr#qwp(|pOWs8thKso zL!DL?umVe_B-1+WCu~9k^Zsy2W!2ED@13%dH4ARH(BtSh?tw~SSDHb!8-%nS80d80 zMKKgpUSezIlu8nRG>tZj)GD-5IDzI{ntxQ6fkobF_~*X4oudf(3a?yw0Wy=#nz(5y zx)apXX_^0ARSwZ>05_o)MHMmQ{sL)4)HmJJuHnl!-Nz=m0Ry?Ny^F2A#-Ox}3Fiq3q?!^qpc8&>OHEGyR_rdi5G$`W!Uu3J7*eJ0!b^uc3p;L0LB@jy=oZBsLa^kU}W*?&n~V}`ANF(r;PNoYK_8rDQH{1t-jQ%i+#a6d*;&C z)=9oJYDNee$_pq~FL(1Mct$$j*3tTUdDqVh z$jFRH5d5$K*R|op^4gV^bIVPG&@lczfGOeG_%%g+0f74ibYfdXTN+;1>u#<1 zcLl^-FSgLP=(~d_d1>nT6dZI$ye-~|_Ojzx@s<|Ax9Tls3zxDEH$E3hg_NchJq)=FTvd?}$fC1$4pa@= z3Cdm=oUouCI!0D>nf%}J*7LUd?ft^w@uM+LT(J7?vpb2vx7>All7=d~>UN?i0ch>o z(4+FH=oW{nzTaYV`z&3fx&EQ<*y)ONNseg*0oE}vlba6{*D<*$IRh4Xp|Xt#m#3hf z>Fs!$J_ee%il|>-8>$Wf^uvoe1ze_hm+i7;YK{ul|>aDuIXW_ zOJXA}tKUmdUus;-Y~%3=kCn(+u5twl*FoChgO6iso!K+gK+^__C+D}9gW*>@^?z%s z|7T<`sI;PQ>_ioL&dUIml5V#X|E9(tDDPd`=RnEode~Q>RUAM7&vX+=dQuw!c;RN! zz*{TehQFi;R(ToJ`sHmdlw2=)>Z(^V-?$=-KoFN@7;Mso`xt~4xGJCZb4$nhW#l|M zIvqYHdY&*cIXtS7CSh2QH0UQf7!U(cn3V1A;lBaTyz>E1*QZ7c!BeZxUdpubOAh7D z((=;o2nPLJSmPOBlf|3&EYLdud^@#=*!0Fh@^o(|*df)0=<{c0hq|lVXin>qo!WWvBS->;u8k(#zk~!gXJ%48ZiN(xvGHN9g?ug0 zhh~a7LYN>Ps^Z0A%UydvhUF<2Nz`LV4-7B)_^)k2v59L|az70G|6%HSrq?VGE^wbY zG$yL8OjVUbtsi2j7_l?PoyC@>o2#V0y78ZL2Tn`C-||(;5A^9%b?!N}sBpo;ChE#} zRv2XZbtJHTT%ZTijWN~>7u;1!wVxcuT2It6bj|o&0^KzhFEp<3o8&r4V^6ybM8}ZI z5+Zoz?xUGj$e;z>%1iq1GOpTm)4R>~_!7l9w6i2JBs>b7jCdPb9DcxzcRFhH(+fA{ zqtITlOT!+)NCUUyy#L%V>8fD~-!Y)T{$u$0zp}XxY|n17aIiXmzx;Zi6#Jx=m=nVa z%C09EPUS|2OW{_Z8dDi7=hCS5hee(EN(h&gMOdj%>Mb zt5h=6qKJ$gaPcr@(N5XORgbWMTO$Ta=G$~cWapGAxd|Y)ma|ujDF0$$uF9j`i95X2 zQ}9!u@qR_^(JRdw`h&9U{&EdZx89f@C;|*^>Tw}X=P0+rA(_<(D6e9=S;Nlg5 zvS4|ufpx$;?AMs@766mf-KHj3CmJ+*etx?>-po7|JQS?9o(pR{1_zIrjyFGa8uXrh zRa{icFS+&tgeik|CxNdlxALS}_rs@0JOt>#qYKKS2jNFN39SF$ql?3F@fhd@n!px* z-+|Horqa%jlGz#m2ln%sIq9AugF=JPi7)Zlf=V5#Mx{Td5-bvdyjJKj+EsY|wuQ(U zuf+&vRhSOBh6GdrxRN7Hx`+$0|Mj-zOmhb{u+FLQ3yU4p5# z;B?2RTXy53!#m#AkKY}a`4%Xp(bj|=p<&+QOTjK=^Mzo+T5pDYq|10-qKPEDpsmLU z^kcx+LPa;Kesda~oC>6LLe6?x9c36;ti2}J=W6CWdyT14GFJa%G4{SXuosqRF1 zuN39KB2GyYol0KQg%5f@vFs;iUv-vLJ(ckY7&~iT$m{sE&VFvPfyQ$sC-fAKYRy)l z^^G_5TahEg?cjP4R5JAR`3Vb|-nfq*He#>%fNK;gPqmF6cp{I+ zSJ1pmo5#tCIkGfR(%P9+TUi-@zH}j3-Iat=P-35n5pmz?*!>e-6j+1BseL5q&vj#( zB5_A>eVKp?i)m!L%Y`MH*FfyDM-8J3b|&>r-9^&ytZq%aNQ!JB9G`S_Hbz$EaLC3u zUBP*l8UC>rLuqo+F4{M28Dk7A?2%~K{z^Ht?Dj`CqfWAgNwR2e`5Wa5{Ze#g_t=bq zXux7|rUSYe5CZ$e82V#&3^y-JyHq*-K!x|dh0C@(BFFrSUr3L0_>{dcQ{Ix32Nkab z%7TEKA4M3bYCve(5#;S!^aIKnBLYrY8wk2`jjsIDOANegDJA+y)uaLJ6*c}V0nB(l zOB$rQdz!c*B(nToy}jdU`emaj_i{EB`OzcHsONxU<&+Sb6_Nm2G#T}#j)W9rGwJzYa#Ok^xY3_>~rxzwHMCf41# zMMd0^1R21cL&ISD_BF#ze}3XE&iys-W_yP0z-oDT!8uz>{}eM{33?Y6-X@$H9;t6t zZB=$a({Ute$CcR}h zeWEOdnMEx`r+)MpxTd2ovxq4Bwt^a5t+{cI5&e?MYtewfD>;n{J~G1xRX@e#8LNsw z(vDPVj`FF4=)RL-ezYDk8kFs;jqVWvhE0y==dcftLYz);XkK&v3wswR#fa&5;~Q~r zKaPER@$Pe=l=;`MjsaB{!ZN3lr*%A5LhPU3E}c{4_e2IN2sE; zN3&Yi)tpu!fnIUO^(QLKwZr|QScDvQdx=K79aXc)jV z^Ain2q@k#!G`kTC$7Rwgi{-n!-|@JeI>rcH^uv$I0r6qdH0kBQf8~t%6EVSyR}y|RPe8YpEeFSQM;Dm?n3FQ?vQMb9t5kCDh$YT*oYcnn!@Lqi928wi^Iq6!Q^fgQ@k=s7bBAAzMoSmIX#s< z5?3D=4OI_!uP@do4jpe-9#Dyy@ZP6S!j9lzN%OZvAGa2|NySLmk~2s74)CFyKgVJR zZbh<5bx=v40q@}twR&R5m)$w7n?Kv+)4e;`%Z?`~j2-tsTGf*Lr2-QIceUs?5c)+9 zvGfJc@)X8-G6`^#kvu*>)_msTdZbobj-4%)ABb?{y*)|ftak(Gl>``Nq6Ye?(ZIKw zH(E5YP=tFbHbg*c_&gauhfjE7kR#bF$UVow`TH)CLoeYYP=?dNVb`Awz~>Mnkwi!h zdyNB|5hv3wac4F>?-xCFx4_o^MtdK9GmXeil`?k{1@44iT8 zk4l4MD6fvx@)wL_F z2!a~mFL9`lBblt&dk*8>U!h66#LfYp^M%k`>+1^?>~;gRZq`c5T9Ak6>h9>bS8uhn z$tBvAqiHHIKoO%o0@WrwA|MYzH((M|u^XAN+a%R3EYeMLZJ0HwY(dP&5ww2hNK zG>VtOpTOB6k4P3kdrTeC<;QCiNtkWI#T!AartzL$>{~zmrVD9HM6j8a$rF}9fRD?xHJwO( zg4^iQ`(&#OeqF-^=_xc)(q2Ndo(*pbt`)JhvLdKhIeNJ)_4&?#7qTH2WHQ;t2Sp-*x z*IGyD0MyNTaci7Coe+mpOZE$o*p?c3pUrPS3MD`rKfUx?5m6+lk`33VM2D%a*w>q# z6e{y)Z$I_wg;;pa?P?#qD}@P{>sdQ7Io1e(AK zQ;0|xv4*yWkE^i7Y|B9cS&xUngAgLG~0P~Q`C5q@D!7i;4!aW$UYSy-MQ$N@42w~Xcm zZM@pj<7q$@d|yMi*OyH#y(>Ex-m$99dS5=eJ0Pdpb8-@UDJh_DEW*eOVp*TKh`a=E{CnffU z4Cx|IhWNmmm^G{CrorrutylV{ySGo@^U2SH&!3UTH5_&O4-6*~7UcHhrw@OG-4acqI%iF=Bq==FWSJ2QPnEJ^l zTPgke^~5(dY-IAM6e0xBNsPae*<|vZ|GB;|o3TT4DDNCE?+K#|iiU5Iq5tv7d!M+; zw-x!7+3kHw^fpByPu}ev1gtWWf?He7_{r~%vkZep1z%1V6}E6 zZfOl-mWg>7ve@4{v)X(ACZe zgibvw>A{27!=}QH3Y70XB02KMqGJ_l)Ud$*8AOt7G`P?#ABbG80N05aPPjl18db5+ z9GyanAD|ykW4P22kirpeQt1#I2_aB#wjezTDmb(WoOkXWCE9zfZ?JoujAk9whKiIQ z0VPX+iU0pRJ3UhO;^XQn$wC7Q_f5cw(@^rwoSIl0y_EOSC}hv=BjB9RLM-RYDBl7- z7Nw<8SRiGNpik%h?7`r_q^e?#C@VU^3Qh(F!4tyK%yi$&eQ+*8bT4I#9$L*S|i(E zXtnJ^M^JA^Z>}5UE#$4-c}nm-;y)w~u+|feF19hMbNO}Lp`S9=N@j_f__SRuyP-wm zKBMLmedptfj?^O?8S{+#&w>9j-WvVW4|egkt5DzvFd8RtN*C~B>;?GW=Qkq%>-^5B zm{CaDbLxL7Vg$q|toC0$?0@G!FG3J(z($^2l{5ljG+egJ{%-!T|0K5}2&$<+@vEXr zFgYn8A^%9{|KNG$L0PoV@b_N4Xva~WUI3cMpfWl@Ye#XRX#W|)5=6jmmtV0IMnwFd zOzYEo!aqwQAho~HHvh%>UvScY=i~gpW=oE4loi4%7&ujgW7Wh6L7+lUpL>#^0E8Rl z8{{6${NOFzEVO@7I0CR*fw%>~Kt1~Rku0?T`(sXHfqaUr^(XI;GMrY>Z?K-o3&|_z zKP006wRp$IJqfN65+2t7eCmJ@NGJj(=qIp}C|wb*l6ulkr{@5hacBG1GS3HO1kcuO zd0($+jFkm2&%IUGubHp0Q?&cr8nui_fC}%9AemS#V6guD7%XFLTeSHo6Wy^_NS0f} zKW{H^DJ5jtyYK%vhEqk?-=SjHh^^9?aG}q<7Bk9W)oR&z%s+Eh6K6^N%~g3jxuE>8 z#?a%^nRH2g7kw?JhG+Je#{Yr{H@fs?;v^sKn#8JDy2GdwRPpIN9rq*baDH1FH7ao4 zIBTk^I+kri-XYteQTwUO_Zdr(Dt4i2JFlV6>ZUm=yGHXr1a<^Aa(bBg*0;5iY;k`M z&YE8@w9i(qfuiA2F6eC<>MhS?j@hsa@{+; zF3e4b zv(xvTT{@>cKnD@x&ZCO9$wDgTgIYGree-~v3&jx?>O*vpzxF$dQ!89MmO1stV(a4X z#jek6&QXPoK%eT1%1a7^8|FT2_S!Hr(TWd$TK^#hJqh}Xt^at$RxnA{jkd(X( zxR|fOo-5Om&EEe~)6O%z9>YV=wjj#c@`Aaelrd6lj@z_16k$#~(xm2dQ5xTy5n%`H zQ*3U&P^*>WHl?K%&-qs)`eF5|InGCzpnh^hcZ&FWoU7j__E^$dUXg{;TD0aV>4aoq z=ULo#C`@iTAZTkRotDwVm}q}V%NsqKSPqZ@RjJ&}?H^|}16;yxvwaX8-EOQrFfJPQgqo&)li8+ZA+8 z3NscEsKk#fCu#zU&0eJiL6s+FXQAg!qy;jIrX93uZ^nkvX$VNT5?CGW$l7b8gPs%f zhfOah+Lc?PeHdC6(3vk?so71-w;8*bFR&voFx?mL#F(@Vti~R#MtU3Kz+J>>Y`F|U zMzS>K@}KR`0Z%iu@mKw@mUZ%n_R&EiKfp8Deq*q&M55WGK2ue!3s$W(+>=BH-Bgta77|vuQ1;p%)POnSxd9~krix9mY`^9J-z(W{$}Vb z#NwZ*!86dOjN-lMPe0e)|LO;VQGZgXN;dzJ{hAV}ua&{b>Ko)YNiyePb-tvD-7XPj z;t$0E|B`RqiKhIc;l>QY3Km3D`w1#7wm-a5+mKLsHpOnRmB!bP&*Ly`iRKwt^Hw^t zX1=)i)pSReT^EMMBAukjI<}tFAUMl6n{*bOr92zD{1o|r2L!51l5Gl`PJ?T7fyfDK zzN`>yVhU7L0UhxyRlT^hW0Pn<_b8cOnr4&?k4Z)kA9YiK7>U^2+>V8L!GyVxhMJ~i zxdT}AMgU>>`;Tg7%X%S5e*S`tp0ixuQd5Q?0L2Fs^cNu{2$=nn81nwQM5sbWdAj(e z45=;W3>Q+`3uf;eEn^MI>Imx}pTd_&_i12C`reV2jit@OxkY zi08Dgr#x%$bCt~`!3V&&gak-gy4{-OzaqjA<_f}4GxRYKt2z@UsJ0Wik+%5r4ZN>M zgyv))xMVP(SriZ^Vm*99n>Xq&YGEk>ZRa1WCuc_yXqU5SI;a5+>M0G~DrUZn|0Km( z7j5oIwzG4Z!cGUQtYtOfIf4S_D?Zq0G56~knj(i-?nA4PYD(ybu>+ z&7YGnia-Ir&%1A7tAjxCO{Zy0XB3bEHezZbp)(b^*<)c&4%=I|E$BWo3#DcD_wSQ> z&rSe&#zl-TQ`jco+5Kj}LbrHo&pg}W4!Oao7yqT4lP(pX%F4HpMO$~*^L19y7Gr#h z)xdwkt&*NXR3iQ>B-`ZoXkV?k&@cXubv&^-d(TOiFX}mVQsSjYrfRA|y-jKAQn$>7 zb2wUBRbeKt+ev^K?S>;*YB^0JJI6&M%zY4KxKj53jpAU&2&l9Af<_$_T5(Qwk*Z>R zaw6*BK$K$?zv?--A&02Ww^xO=H2w(N++18OkQ!jEa1_M&8hn-mLQ)n>O!mY%aZ!Kh zt~k&Q#F1RosUn$|tGE>Z@WviU_?B~A!`9dnQh7--|1x&*9j3h!pkc;D*PDD%^fQXw$BFrz|8_ z-KH8xA3Ns&ZH;JZz{n7(iV2X62Em~vQZxyP9{U4KnHxioy=;E(h8eSn>fZ7R5MqG8 zU?UZnRD8#qZafiI(4Jz)@>L$UQP#H0!@Y)%C70~C*NlmVSNlRG9e-mn9+ON8_|Al> zE>*VoHC`P@(aX4e*7`{!#uzb*vs(1B%aq=~#)dw?T^hkoGLFff{9JS4Tw|wR@|O9W zEK=;zlHmA#_%pzv`b7=M-E}81*Jd#AVePMIU4_9}ls$p;?_L56UJigQ=u9Nur)Pb4 zgbVWbCqW9zdSSGkvLprh)}SG4t*yK56Koq0hLZsd#q<`Wl)W5$<5j$#+PV&5`3K9; zXoGxsK=b&2JXEK$u!ME)G6miCSJRLxNbN;PmuFtDWLSy?b;y1NTXkY?4!K8LbWLT9 z;oN=Ysybbm_cSN6q1p7aDv#J0DJO+)^$#sAG?8`v_}-%YtMsV`M*$bO!wxAT|0f9a&j{ae%k16!Ck6^o8#>e-L= zR4}r3mCYv4yVD-k^Gw@6-&7rC^*O3rKGc|0EuC1lZe&OF1fv$~*M*XilfN48wsj>q z_}WXfjO_EJrRD=UkS&5oGy=yfI+%qsmy~3azUF+#wp$5^*7HayAVx#pgC%8vx=+LY z9503%$+8jlYguSc#;OW&Qp0LeM!b=(Cf$F-Z&mFtqWxZ58?h< z0Xk5q<{k>U&Q|kf^x2}gR}zDAUf`Kg-X&c?k&YT3q4XK!KKp)#czN!xN*{PYJfMXT|BRmtA>Y`?D8ZHsu4vH>Uyf+9jyZ*|wijL!AbBn1 z4Z>>Yt)r;jX!9qmz?NW805osoGjvHQftd&cc}>7QWs-@;ok;GPju18?K=8j{#Ekdl zhRwk#0VD0_yEF0cUwuq{x>XJcp8CVQbw=-x;uq#@F#=-R-y@iC|VTSOEf=wGezEmMY*lsi&|d$F1JU=eOEEA=T+3`V9UF3kmg#WQWU}Q zAHRkRGNhN-NM6GVRw7hl`%`tKV!jD!HpR!B0+(g>`gt^2y~=qsslxqJS8>8OZ)znZ zmt`br@LlAy_~NETr?I{V!I8xkdY@kIISJS!D4;w??Z~t9v$L~Rs(nbXe=yXQHp>#+ zg5JtLfVOLajqB$lm&8S%H;`40EbCqr{K(5d#ctdLO5%u3Tt4cl?Fl@(l}a+fMS7@H zyq4%Sj%vJEro#O;=tS>&H(H1H)#&oDaI}|WO6^j^?B=DB;?xyUxd=(U*X?ST^j+*D ztmpQ8`SeXgBr$w~nLG-hI7VV5F+S$-FRH zaJx!75|=#`GYJuqT^v1_q>jM{`SeC*wB~{?>aHVuV(PY;4c}nHlY{*a4e%d4U9wJF zu)gBcD$iBvy>e7~|EVkucv1g($g=0fP6CVL@%QgUB4_s%h0%hoq+(O^eq^F#qATf) zJ!E%P!6t`W)3m7&Ip}^He|DltDdj9f+q>taOLKQ(JZz3s)u51F9;#26;^|B5AfAJ{ zAkBWoLMM9B8-q&?pEHf7Y4tVjrVE3=JEcx^&J|NHY=DWc!$lOasK?bX%n^c&)@|cZ ze0;eg4qY?#IGU)}U;z5r%YVJMubmhg^ji`u|5TC=x$}}N8LukU_>gca69+F=^|w`%l!7Ad*BnNA|m#y#q)OB zT5BP8YTKX4kjQ|=k~WvH%9!Th&7nFj^M-*DTsTZIot@20PKndn@%T*y1CO-RN;3Y|!k}?U#XV3NshYNk7FayhY>R@G}&%5}CT+74(f8KT-rXa=9r= znvqNKA8*NIOyf}KzLqU^<>P?XhHmk_DN&oeNRYz zPgq_(`ERzjNeiX0{n+>Ps0WhJ5#57M&7>_H5RZKWvaF$k2V~Ua8P%QZ!54Hy5qtFy zt@C!(J-Cgrp;>|BSwNz;J zCvGdxec`zObPW$^PoixUG8m@&&REQwCTnG7EQ@AM@+5eT=>fmOoly46^&%I9Nb1f(cOclVT})RvQ%9bU6R~Qcxe$AsTGD<`y`@|3 zfR+C1%2{mtoROPTr7z$5PNA>aVbQH8%tr3i2-*NoPe1p!2mc%h<#DiXzBF~TG?s)) z(nvs-1tKoCt7d37U0ZU_QZjg4_j7@s;d(}6nZusnsGJ5;1R7a{ zz6T%Gjh(tvxW5CHlpWItv`tCdfb(d;M+W!@p0D6a`G-7z5(6Oe{BNJ`zy!KNl6k(N z|M6jJCUP7C-a-oW8fAUyl_jgR@TW?RXHkEfWRD(U?nO?dEF!NE7b`P!IgZX5s9@ z!FgBTVL&)r$3C-1(!rtI85r&3{kstMu{D_(@?ihpb(2@^qhqIuq%e={+?CuF0uMvQ zcm}q-kz|CG4E}-FvZm?^iK1NX9=->IFFR*^@#;8ue0Y4S3{*}M6%`Vgd1KW$>lHVL zk8=mPp78e&LgL?6jHh)8!^-Q5iFI5v0~) zD7&~N9)DEh_6t4O4F9e4*w(rP^2jCrEHPL%&^o8*DNX~-3W3CKvd$IScxm~|pxJYR z%oZkhx3Lc*3GX~faYt-mY8XABKdy|B*e8{q4rMpb18f=hjc%>FB+WQ*?Ptf~)Tw)i zp24qbT)NhF-(HVX77fe*1BbGJ^As}?#|so=k~quSuO}=dMGwJmTj(ZFiSC<6|LXEI zIeC0e^9wzJlEI9wURZ%;#zy1^Ak0`O>lPSv)cl{oHY$qv`QS6WoHdbh{O@UB7K z$bS4+YJ+(>H<2U7Bk011yi}aRRn*F052qw=9=bUM1So>Ml(?on%0xrAM}PIQ#nfdF z;|JxA$Gp^Tiro9FV1E++zn)RY{_{N}#fM=tr(~Tkom)J`jxQ%+D;7(@oMGYamq13Z z@+G9iyqzTS$p<${I1E>!W?Z6s=$|yDrFff=1)j)2^d|ijf6R;vai{dHy+m6?YEy>u zHnY*mPrs(ckXK4o)XbnJk6$j&*HU**NhU8J(a3N~`Q}t#LXM)zMf?iCh`NufL{Y0` zSwI2fmWy`;U%9#0AMIavJv3xjrBn zG_%^EYi+)TefV|!b7$8B7o=Ht^b-&*Tyg(n@$)rgF~D5z@w3q$biZeb@`>C`@$5$y zg83ez?rT1^&8F&dc~Py*IS(Z27nJFba!MLqXoo%Jf{+4UU4 zXgFw2x|I)V{bTV*_269AnMHn0+j)2FXuk8c6Y+Mi+NF0`iaofM-WB~xmDT`Se_B9! zWG)~{g-`^{X}@#MJWW(v7dpFy zWJ`YW8@*5eu`d2b(os}`5>U3b;!vBkV>FEn?}K2VJoOu?!PHi@PeAtBpR7tP?9J*( z1}eg)a)ilpW3-cvA>^;zFFNX7HkW5B*En6KDXy+JjnlM^HaUTGeg`U#N=rQYyMgl7 zwu}A7NJrMkZt{a*q=DkM%Up`eQ{!Cd&*b z5H1r~l1#V9i5%126z95yL92+Z_8`p0nf2l!L*S57Qd{<0ar!V>~25$Dh@^MP^%HtV>O9B(xWm61vD2_j`nDU-AR zH)9!tkD9!X7d-3S){7nTU{AVAo_n^Pwr zU)a%~VDqK+W{e$_!e<3iCY#~^{7AjpKG5AiJLJ_l?&-weH)#mIBHvAUsIk+<-JU3X zWzuBV%l=H%XA(QmUD$V)guiHG`ZNh>xpC~}8{reW&eCWgt`fe_^4 z@KH^87rG}wLv^$(19iBms$G5vK#5d+Jnp4d`{~vhTY~3M71$Q`T-iAz_;%!x$}72j zm7JO_C8?F_S4^#lY*x+I>nD-(1BX7DM^R?#*QJMdfAig&k1M4{>=)}$@jojy!|sk5 zN1H8-%Pa@4-KJ~zemlT%_UGGtQBK;Y?HXyOIRYBd&ETm$Uf~|kkYGLIUONo7?@GV_ zjVwgMm5*qcJ#@hZWm1PNMtp=pYGrN@#ItO_s;f0>eZ1Pt^fNj6LV9$6OufJ&jH7_R zCNr|c-&Ff9ZYJ+|EjTx*3<{!ED;h-}EtY;I2VEIQ!A$DN5IK}SvZ>T|<=Ld*N(w#w z5F8?Mqm-yBpP7s|o~Mkd&2Ahpki_mRj(2>zK49@BmA}vZB#v^vMnFCzXIxaLC^j3q zPq^ASZKuEDPq-;L?u#my$}VCcZMv*jB*ig#Un%^EDo8!He(Ke8KQOWt z(nfvSp|Lr06#rIK7N-^P-e1khXTBQM`3K}6$Hq?O)~hcKuf0KpIE0)wi3`Pm>c5|k zqQhj@vbbcUDJq<=B{WP;0$}wRQ&@KGiy5P|jF1YKt?j%f%RKXi7p88ER|+53&c@pQ zIGWG`yAzk{$%C=UNmZO)GL%P>Pg)*0p!%m*dYhLHEt!}G5*F7igGQ3j%oB}4sD?-E^)Svv~Ct{G?0QDE9w-^cuOr*qJwfnO; zdYrQ)pTfevnk=`$SJxd{9}(AjqVY!fnW1$FLhkly1l;YlOO$~n+73-JZ+}GI5$Hh~ zB7V*N4rwc`-QZ+>`iv9u!Arq{U9D;uoi>_!{aa-Fo(12G}6aE4Bv>06exo3WMM z=F7$RgvaY7#?nFCRJd2t2w{A-RZvb05FHqdW_tznssO{gQneP=v#mv41a_nExnUTMkkPds6dF{fjy{hFX`}d_ z7=-ka+;{j$Zr4^ke6-LGPRtni1U!I(k?d=>2|3?uC{p_ILB)Py`cE*E9$$l z`r_*5*9@o0sM>3wD2Zy?o!a-SfzfP3o9nl2M|ph{CNsnCY!f{8lL-A^Ug8M2Y;c2? zU#@IVSS-)yE_Fbv2zRAkSHGSv)t!)u<)UrBTJ(o^E{|rHv$j%NONkSv?N3%x{_X$R z_@-;Fn%wlD2_><6D2)qcMAW`AvNoKieG3)4Ik9ftHGxITzxXYS#DS2Dq~IjY@WI_R zW(BNMCYk#qL+g(-hnDLPTQh}dgi!H4Tiu@+(yrR&hJiqvU7{P@XBc94N4dri_J%^@ zadacs&9$dF8kV~J`b}Q1jlW9?EL0i%v~Cf%T#&oEvK?i$A$?6p<^g4x zw^~y;5aU7Dskdg!RCof$7yrzf09FKP$gQ5!>g37DBa_q@8`kLtsBTIiq@>g`0^<v7#k(K2>ShASe1_nn%`8_O!_xTOl|lJ>g&q*wAT2j%DeQPMoq zOeUzK#$J~Z#Qt#EN-$byH4u9#aSRp{u@_FJT|-$5-wPpgkVzd_cTVmfNqEILs*5I+ z;k_*|dFY~>y)kpO0yil-!X$YRY<0OxJ)Zb}<8b}s+VR$o3bBSoyIlO+I1$zZdS|+C z$=xxPLrAN#oontr`q$6jG2)?)&)9w6A?3-ZqLvx70MRbPGnT%n2K-hL;})OM;sn`W zr=7^u9dKN+6Q2m8{FlaMMy0{Dp+!v>o@t`KWE5gBVd=4Fq6lqhJJjP%f_LXDa^CrT zIevG?vdP@$Kk(d29w!e^ej311c&wI(!ro_(241>UeN%PmJX)cLe~Ih67I50t8p^V# zG_D5pxYxZsfS*`q!_+?HFVR55||8#K{9uJh@R zmIH%mxVUmv8({Y#ZHvB#w@vZXgL4){k9)6zvZeao9bM((l7}n*bro`J#*_?+jx{DV zW%s)*V+**rP$XPN0V?5q&iMIUKsRZaGFxH&6!y>#9E&A34K z5r&TBh9i>ymkTeFWC0sZ#|;$F`BF^J^Kx&IS@e9IC13irI{b@2d0c=Y&j5gge++O& zF)XYe>vsm@_~%F)aTd31Hv4j#ERTlr(^DNq_(SYwi;&uRZTPY3IaZ_GLP^pp=4p>> zHY?`Y(}zVt;kl@>i1%LPqz^#FgCecmvim_n^FhfcU7U8%Wk7OV>P3dahRxRKX06@1^}tTUl^$A;NS^i|WMsH(;%fwP z_?9S)SBgjoQzcpwQ8pGPzfTh0R2r;J(DGl2$E5yp^N&9!j%Z?tqK@mvHEg(rICiKi zDk*)Mt1gj5ofK@eo|+@HFl0<|i6(bJH&v8K91I>^sDD+sAWQX4N|(mteCO>E0NUj% zAKHZun+S^EmK;!LH_>J0K`AV6xgneTc%+BA z5@5-m#_yMio95T07{I8S=0E>N!v%hKNp^~p&zQYTTsqh3zC^jU=kb-r6~3(Jfl44h zV9yFsIv0)ce)7_u=sS%G7?k_Yg2uOOZU)G^M?=?*x4)H6W~!B#)$yVJH(=!F3p}rV z>%((oa$AGlao1Tx*?3g2SfsP}P*)nCE;*uOL$P)s(Dg@crbMT8GL`}qU^UT-qrCGY z0;p5+eZ&SU{>#qkv*`7IFoeKbdn!S1du|LNcgiz6__9t92a;TsX)a0t;%Cf15C`w* zzrwwLifX35X&}#4S)2M8VS<*5j6xUO*WfD7<2Qd9AbWZHeQa!NADnenTP7LJw^5A?L8h0*f^TN{_ITXw*DjfK{i>ue-=Th7k$vqu zr>$`T)b&OkNMeI*t7eJ@O0+6}LU7(x+0U|0P@seDUZfx0?B)Vy<79IL?Wcvximcd@ zGJJV|p8p0s+E0zi0J%Go45FzH;;b}KsXsCIVVytQ7Xn}GKSH1PW!w|%RKKf)OpCzo z&}gC>waX0ftTamp(i<=Q1JPOuj|n~ApE-KmY!^nlKYu;nID&As+=)!5+~R%OG5OL# z;^u5S(%n@&3)cC8PIN@XU2^)nZuif(%fkSqh&eEV{~gk=>HPJ^LFa}0zHjg+sZZ`? zz|vPipR;Gtty#M20GeK3;Pz#oBuvW&>}EHoOp4Oo!1*U?3+3yz(eJrQ@8AkV$yR~3wo9V zbhqCflg9-8m1xQjl{L7!9*G%5^ znTBkQmhny&t&AmSqXd+$5SDWtLm36&rK}a_Kz~>0WrlCMS&a3Z#{#)50kjXBKNr1G z=YBeRKb6KVWm4vR^$pEy&AJ|ZLlSH9fOSRmDhc6LhYDDPW>ZEhJ&LYs{r?Y70e*g- zd=kz$qnSTT5=Z;GA_+YPQz2QvH*P(e*Exo()+>349xDO0v>NY_bgcIBam$Rth&9x< zzAWS`CS_}W4VTe*NR1@(q?Q*P;wqy44~#@qv|61 znr!DG)SsPq6OP0eJ_GVviD$6caOXlUwo7h_Qbux)iJ@%XQDa94yJs0N^ zt!8A07GC9O)Zh5$jfX90$YX-hI|_-+IQL%OppI`b3SU?Y8PIQl%F7JY_h|z0?pymP-g3E6Qc*wTXTh^w$2%A=mx`hp&~AT zKF}ycPq0}Hsz4cDtLR&E)yRz4%#bt|Z57LznimW1#;`g87vfxHM%6ozG3=aZFtzsA zW^g>yWH*At=?F~2%lK;IYDpp*PK$OsOvjTT9P5bXY3yD5LO`8O}Dd;#=Ndf}@ z*O$28^;_{0+4ary*2OwTYV7nn(k)xqu~Sg42?iccM)8t4oj&K2Cj2m0KwPK4JRK41 z%;_Gedo9-b{&qT+_Iet?$qqi#peX^=N$s@@`re~9Dtfwe4+LURB0+V97P9{`a+lV8LTb(r2IF}*|ER2}- zKvdqVZ<;7msqKg2I+NXD*Vp`4d#8b%-WS3H-1r3eS(F%=u|iHKAXdYMaqvvh8x(8J zl7RPYcIoG3W3V-(w^nu$%rI^0O| z#ndfkqobd^Fez_e7ni68Ol56eheZ&^c!RTx)_eQHkLyP(ofXdKWLM`iOFq!OJYzW4n(KJ>QqjBXV@vP zU5*YiL>!mf3Ri!#J~dYnKYrI8S+pib5t6_%ll%f%ueVsM6idK)0g2S1cSTy25jSZ$kKPJnB5ilbp@ws1}7) z$O&)qonI71F7;>7H&)_2T+bLg?#*6SP+8HBw?X84U$117f8X>Q5uFtlO>sqT?-&x1 zt*feB;Fns{0!&SR0!yH)Jf9pC8&s%rZ*jKX;!B-8%vHty=krjf!Q)XN^3kH`lBfRw zqmNIu6+P?|=_`xduf4BwCtg_$gY5KbB~sSV+MAd;lfoF%SF9zxoXa^w|yTr4Dl2pNE#syO8-_b;dAey)P`wp{lyHnQuT5zL=$V#H898&A8`S zD4`H?qi}0&B)Qo0RL!f#5PrC#4~s_kfMC$`^>?qW?pKTv+xZ$w)1W>xx)2gV7B zeq*}#CC1FcH@>%@L+mkpcOC!yC#s0bX+~rtPj9vL6c&}=8D3`dLQScV$^H#C%)E$0 zJzoK(yy=9FP+Ami|7YlQ>!*;vkGD&z1sir8%Zs+-W=oQs)k=jUk*OQEj^08T2Rv_%^cIpul247+Rf zdldhf8{s=GTIh{bkMm(E&x6qhC5bU9fA!^1Zr4a(A{&clxlz#1w#qfJwlRk$8?>Qg zmyKbPV`mz-t%l`W?^ieMDIRi;E)8WoRtpf^OOcSnLMK%LyBk~5JG#AWxL)|?$KB~p z$vWzf3M{{_RzXXhD_mg;j3|4mJe2y9FqFTJki)%+d_{{0r2%aZDh~@Nb0)6AJ4lg&FXNpRQ9yuqQi-j)&j_=GDIiu=TDCZ?I zgfD57SAHK({QkqgQ>H zSRl+oC*dTI%gT!jw8i^9S^>hB zixHb!-&RJJ9{u;{Yb9ImuBE3rjs;KVBWR*G;D_r?YWll{(-CA{;uo^#B3Hk~>21$tuO9C(Ii8Iru^R@69JcIsmKR+A zy*b}4_$|+LRo3X-nPjY2p3BqfW;rACmqWK9XGyo)N7KVu^>au3zL0xog(_ zHqb3N;xq`8BwIZp1y*r5){NO7{)MYW9;qgwWKzAvT=;V5H}*)E0_h3|E@dzdWwVcj zSH1TPF|7r+*Pw@R$>O`bw~ve6uKcuzsRjwwAD4kP1aISYMEPx|pONtvS#-q{ukzUm zNlP0Ah$cTRBShSdlrT-g&`r0e&IWGHr`$!wY_7%G#I_B~c^|q;e2RdY;{rju;8$R& z2+tJqpy!Or9-R;>&Jwg{dI>m>u1~1cm%UK6G+`@>Q8oq^cOqPprL@;Cwh*!K-f9#o z5%Nb)o>)4G1%zM(Q8gJ$p^QToTKycD-t3vjX<^6KgXwevxR|DVR5CmRi6h9)s5&Ko zgc0{NiIhvkKO7dlEV4M95kfKNo+Ese0jw?SPWV&hBUQ>GAfEu*!O({fm`+ z0SlE>0a3kQEKO zx-L}Cw9+r?_>LuX_>RZaCi0?9gCO~`?6(!=Xvxico~(LYdWbcyP4WjFp zZ>_PLk^hUy55c{E-WfbMKNDc8@F691g(fwNJ8`+MJP&vLg?vJaV;-r7Kj zc0!<+BRi0!GSWD^qbG`2r@�du(H9e&wy1rtX-MUoOP+XX3sRsj-_C+7911u^4P2 z+25lc_(GBh7-+gJh4$4CQewy9+Qmy#J%!sV@$pa z&x{$&qHSutDja`g)33fv1quOV2-*eXiFuuY6&v1n{o<-H-^I!59Mk<-u)IDlG$a~D zf*O;hX2m}s3F_CZS4Y{3lt9&bAAFHuhH8I%VF+7_?Zw(B`Nc!I&vp$JAA+Vp8_u`( zU9MC&b{8}3RO=jWB7aMSO0doE=$6rsxhN)mZ)@3knS5{EGN4_aL)x@hKr+u%N`9KC zgqy^!feu8Cuu9d`B-}{UiprIAV|2R#Xn=-{dWOgF@Yj`sfr66L@wLz8A1>GnpN1~} zk0+o3SoWtS*kv=E$?Gg5$g?->+W=)^8>glDVHy9bS&++1})&%FLY&z4#{^be!)x`mJjgg zP;NjN$G#lVsyL{)+Dnh-%rUP2pL$71K zPR%YO`e6R8P8eCz=xj zS>?FYAdcm^Ui--{r#k*~k-Lr8Q%P1Rq?dckjpV`%Wib=if|_>;o9oa33j6QU+`^rd z16YNQ_a4cu=2V0AE+knJ-Nl?ACLKL?wi%UEIrRHz;9}#^7JQrW!L^9UA#D%$)G3Js zJ5Pp_y*8eCvgJL0kxpgWC|Qx#=MBfXOIH0By(s5>0ji@wa&qR!_)}b}SrVh3SJ6=R zQ3~7`4OqVV_XBRsW`psxsn=ZtME;z0PW=U%0)rHDte8Dic7gdKdWeGHCL4##}6@vUv&QQzPP|!#Z1U#IEBd7YYfd{ z*N4Mkb$}K8yiw^)R1D2JZ8ce&k2=>XBjBA4Z{0_Wgct#?zZul7?At^^m@a&|V?-2r zgc!!hS{48B$8Ghw% z=wyGagSn!#))N&pY$zCUX)&B0WGWf_%%U|d_zt8=!{fM2!-acywHCu^92sZqkM;U1~tM$E=X&0*b zA*Fn`Xeh*wnO63xqqk5E{5zVyD~3Kb&hO@wwUF)EYBwC8@imUbPS1)#9o8&{tpS+D zX_OxbOc8JL2IF1Zm%Zr%>OXG8OmP^(knjI|_Yb>gpS5k2#SdK|+B&u4;5 zYAXk7UayInL4~<5dGUsRUcoozX&)jOJn`qKRCLR;w$Yh~#jfNVl`NN=nJuQ?=^+Jc zEq0g7+uMEt_)Wo0%$Avu@aFKhp{fSuM0KpHB=!x#hI7sxhFk0YX;>46PG@&kY$}!x zIjq4twI0S(kw!cbYlg?Yx^@+dx>uM4qTBG{aYeD<4gY%w5Q^{K29?1wq_eJ%D-q7o z@!>JDjksWgdZfUate~4R!PN-0YOmHKuv&5U&ws6Eii_YBX|c?7Yj+FLKeAd)$z}00 zotk~#Tjh`F9Cv(xK=Apm>d*2~>^ql%rDRr{oUGjrXlYnpwfJw+^hJ?-m%vr?9y@L{ zkO^z=fnVbb1!<_1Ou?Mq#7Fz|UBBNP(pF!mmk|_907Fy_AEOTJYg-fM=rw`|ol8LY z`S8FS{jH(p)(b(^8cG$>NCNSp)dKd$C$EdZY~Mgxc`PIaO4T)8d&lGR!G z6rS;lV^&4NWV?Hx1%x z{ToG|>pf8$8a(A1!z;z8HvX&Vzws!>jce}DD9%Jb^9&b};p!)GIj_g+G5NM??ATmp z9tyD0m$KfTI_s=DtA#X=C_Y)KR!_jQu0t#1wp@#beT`X95`0JSD8yVAnzHs)(X!7w zRpfukylw$soR-c`m+4;_HXlib`K4SM6m>j}g9&`9ZL2lk=bHk^v?Yn=#R`oKZ9Qh% z`B=qTVCmX)z94M3c9do^amHyiN6r<2DWFCP4{^28gr+Zq_=Yq0dQ;3`(1q~+ayQsY zYDrL8Pi2f}jkr~}sj=_${(`uxq`yk2O8CS%xCg$PJ8;^r{dTwWi=zpRT6rF zZGrbT8j%exPWzOfwGN&4te4Bf)PwkKzSTx`kcAkIPZ4sa=G0n@SHT)(@r$)PNMG_ptF4kf)(yogn}uwUYBEmBhR7tK%Q7*Wxp*zz(Vy}k>zQYE;5 zx?kl*ZNN7V)RA!~A%Z$w|NM~EjU%Q`k)%Kh>k)txcz$9PJ(po;1IdMBb$U@$4cM)@ zA1HJH^`0J+9Rm1;Amu2%IPej17f_))eqVTE$#vE1d>7kPGmWkg;~l*6av=J0bgq>D zLg{&W_;aSwiEJcJ< ziCot1ai0I+)j~7+$JdRGZa>PsE$FZvSq!#;iFbv=#eSXfp8ZR+)i#pa!Nbw}(#b?$ z#oty^^I$eOBttXZT~`CcmQxpn;6ww3ma#3~DKyu>RP5+HP?SLjLrSU(_nYO9Ga9d( zrHv)?zD3Zb4@`^KEoOgSnOmJ@!f}E0{>uC;zyCGy#Y!6< zCwbazvXiKZz`iHP=jrK-8b9a^JFb$Eg!n0+*RA38dLle+Zd!FZcr~Z31c$?xs~HUT zLxQ7=Fh4$-(*M_Q?-g&$Z3E3EOKm7J7 zh4zw&L&80K5v^0RY!<*e-$-rxO9e>K($AOd*|06YMs|TA;w`Zv^+@C0+jK$0jPRT; z8z^3n(PJUM7xkCazz!rsu&FZ`SWc|64I%Gc0%msrTe=E>ShXLuo}n(5!Nf8w?55Uf zu5TTbeU2D1Y;l_Bz~(on6MT6pI&5$$b>AbfBH0}vrm;9peBOg%`TBDp3Nk9S*CNj^ z&;;aUso7HY(DAjzvi{6X&~i-&02lH{zjhp4$QD|mf(<>#soFGZF{#28{wKm zHHwD&OSrZY+bC$xjMZ{FE%uy_yKTpLf!=QCPdkYj4>-Jy!NTJ;+Iud-A&$OBl`V)> z14R)L*wt+bB^Hv<_8}ei9hyV1ZB_PvxHpPo)Z{Q#E#?w=Oor&5;OMP@4Pv2Ug9JM< z+LSpKF4>Ux^I508RxpV%ujLMdm&sCj>mMi^DJ_lGJa z4O7)nApaaLk<0sxx8xu}Pp+{OdYn5b+N3W6PIHMdo>Fl!@RTwftt+5qLHrWW7Ca;T zZ2;=t_aT|zy8w0Y^+$YsJZ#gC6byCVlb`$bhZSPDa2H$e)zc((3Ppp#Y6Lw` zH%$6m>w%5C8eua!ejCfCX>?)f1d6Tf4jHop3)@9<_YJ{$qG7H$fsHd4>wiBZ5H29= z`~NDl+?+F`Rm@tW+s`;Df z_a@=MNvPGlnX@@*2*E>ib9~gC4L0nmQY&2BmQn?991#loM^A^ku+Vy+_J4gs^nw|V z^>Zk4IcFqvRKb`b{rth8=iUD}9Kh5obgP2AKsx%yt38}br*Hn-?+!#6#E|l;6-K1x zOuDsC)brQs!s}Eh(UB{g|8ttdyw4=K0w$d8V^0g`U$D!DIR3*?lOoi#uwS|YU+EUD)m|A_T$#t@B9s`2|Z&f#jt(zg?>&;h8Pcx0fMh&z0(*iNU z$V#j`n2jMxtRC`>G_xBPjMgPqyc^HF$sqGZ`S#WuflNn$zv8t1KJnRZt?{m{tl4Ya zeE=7>Wm!%T7|GV~kp8MH;Y*p#cDv#H!D_Z&hyCS*tn~$>f1zAWbOt(k=vzk~U!Ee5 zv;1ji?4?fOPsFRO2Kz1w!$FS}FK#x%6Jvf}&WCb!E~}y2Ql4u4I!(@8PQP1ld58))kMo$>&0_8LaIft-{--IxFP+n3@?8=UUy9R3|?MNl&F;6%*yl{ z!l<(5M&MLMc1x9TSl%ijf;Rh+X*I)mx;Hf)7J2U3cOB5OHyrIJz7~CtoRT2>^&@&6 zw_J$nm@-h+-D4vI?@#+a!$^z(Hz#BeJO&L!3?3)**w?^KaL?{r;9w(}!aj8Cd3DFm z>T#~*_#iSa=uVRFibTkP9a98oZ%z+bSb{Xy2b{wdtB4!5fa7R(l0X)8fy z-zO#~OecWD+{-O5r3!Au{eV?yF#TF7}`zv3sJ5wIB%(n|!33%2?hBH?t=Vp1m{A%}9#=Q2Gh|}q25DC@{ z*b3dUVz2KxB?lXD9g?%m#gPT6iP&nRFrw@yH|0Wg#Q93 zt?w+kG9GVW#{^x^PY3zsO^%Y((h<)!U~xsdEp1Z;l&>t2N*?gOIg{R_>Q(>aCeRh_ zEhI_L>ishPvI_ng5qVk;2A1f;GVE)AEG7{x7M1^7>+Dbg%W*NHaZKhtq)YCW$_Jm< zpFX(*)>^8FK!q#3U11v7#rnb+{Ui+?yl5qVN&gsJwS3VIjy;Slr(G;ne?Rm{%h{i> zJz!t5EoZLP1`w)s1Lhu{jRyF^E4Y^~G55BEoU;MQID<|(*@VBB$!}vTEu_e-W-mT21H@X`LW^2W(lCB(&@C^ix=se>y3q{$p%E;i*u)MVceMc%9YEhzHB9r++7yR3<^_1uStV1a|u8OAa9)THbdq<>n=6`qY<65)9jv zq^Bgwo|nT0{v+?aFCzv-Uzy;4W*o-p+|^z<3O??1Ba=WHc>a`wfFbT0*ou@ciD7E3K0+V2HiBeovsm)%-OypRn`32=3sL- zFHG=t(lDeC&`FuGQ@@4}GH08_AEVyf|PUxJs?j3_ZU= z@Ml|-1gol#KUD8zdjhDY7`6=o@AzeidQTBt*#l3dGT=5R;}t9wUX$B({;u)m{+lV3 z1Bo@csY^t1v&%B!_49GK@3l-$3zuLqW$Bd8A+36NgN2{fw-iDOo>G5Vpw7CfdsVt{&%Vs3EhHMpXkOwfd*YR?oN0g&BU7KE;b%ky;iWtt zr`;gD`My}EW%ieyw=o-#CFWIlR`ME&zl1kG0k<&>UwDb{s1nhV@C*>2sl=&Tr(ugi$i%s9;e$O2pZ9Vh~ z#*$8H3fh|Y<}K@abX3--rP8nJi5%Ar0Je1^6&ELtK93z&8O;g=Iy8F2W}35Z_zD66 z2L6=c!Go_m1)^^zzA=Drsi``Ck=iS7IQ+0&t?AdiIZQDm!gz+%{zjVEK2ue%)4{kS z$3WLW_^g=ItGqKNDu<^mLHsb=Qrhru*h)`Xe1-0I4aMgV-JVV_tL}%xR~7%cRw-X~ z_voItiRWjn+%#$8bT;+56+??AOeenAv)r`_@;V&8#X8yh2AM}tiH%EGclz*yWidb}GBNXhakbm-hR5fBq3>aN zgC=SvqUh#UzZN+uwjGVyrJSZr4m;&@dry0$T&QVuIR5xb(!yTRBGu%4Hl)93dyxTp`}Kae-sRFz>ioobb%i5oe=@S@QhRNwfepwKGJ_NhE6#Zek>$i2HF@vMSO?x>(XBsvk4j!&F+t?Sx>ee4 zWPZefh9BJleYu?GAi1nAxPXrbO53ga(H-Gy-F`FIKXZuFEY3G%cjw#43^KmDkxXbX z{uYzG*L@%Fq(qQUujIf7@j>3t*a~=jXHMt#YX@yq>p>UfA5A)%9GeD- zc?doU3GdI-@J-xcQNfmRMork@OO9fdw%xpoL&M7~jv)0FsidDlw+K7YID>@PT%MMH z%id=Gv1bq6S;&0B=B+kaoP5phSH?VCB4Z>X1jIh6x&2m(TS7vjQhSr4TfkHLlnhVA zD@$Bw$b~RN-&m!y1Pd00c?gA=1!h-%$)(nz-UTK1j`XP&-?(}Vm(FJ=CJ)^6Hay2$ z@fQm=H^H;(%3`@0D<~5A_&L<~lhAJ?dbU1USuIvdn?%i~GXivqZ1YsQN)Hw%;y53| zWA`BsK49WgYI_~+d8w$rtT#V|=L>L@?^w=eVYtl}?HP=EXd~)a2 zO2kGJ2WPTVJm`rF%7dZ(j^*z^86Hiv)jEQ{?9Y>@H<{gPD|OSTRxT-E?i7G0D;Lwg z=r%jM_!T&e+q7oX#%l|5=I(#)*$9itVb>IZC;)#PQ|r_=Q*C`b*&39*{i2tj7%Fzy zG#u?t94Pw*%Hq-|a7kDzzb|_B*AjiQ#Sm5=}Q z(*i2k5;6E=gT0^7<2M_Q`9XBhrZP;w(^IR=MohwPqOrN5nr$~pEU=6+p#BlmXHX#x z)hsklgeI|0w5m*qGZPzylN_;mEBp|Lv!30L=y5Pr!!kY*q|p<#{snBeQl%WbW%=EF zA5t<>i0CgIE{zK7jZQIK`Sz>r!Fj}iS^}k4o>R)FMTW0<+yaSrl{^emLd8gMWtx-l zS*Yf)u!KfxTEyu)dlUp{3`7xUWO3jP_mlRxI35G5}Ma6d9MKue>h(`4Mf>$0E zcfML!VZ^j-Qw%t9W z4|Ol5t3AljKu;xMH&;}L`LM-3b}QN92BNw6DjpodIK1y(p68wumGZ&YR@Y}RTd}^WycwPXwB>3$nKHF` zvfpygQ2VQE3`q@5{2eW83`-Y5&yCbcJAu%DSjS)*C5s^d@b~*x%{|JtN(vp&$bZih zVxoyyjT$p1-8KENC)N+hcbM1ebJvyh3@FUy);#S@IP)adnN)Ka=Y> zvh#607bWgnKi58N;ek%b_2eeARiiLe6fW*Ln#vxQrH9Ejjuzp;2OaM6(f4|&c;t|Q zTyz`m{!z4BbEsm4FYf&2$p+zbozzq4hlm_^&<=}#VVMovf3B`I9t@7vcqwhfYX({^(|kWT@U9(NQa_Q@coYUWNp)DKB4=xq&Ic*W!|F- zpII_bENQ&n{2UhzyWv0xq4$@l*qRMKfxp=8Bc9Dr-9B=szG zdb$?gQ-fZ3csfioh_K-O%wtNLL{nrGnw_zVn^GWNlIin^gI)WC2Y~(+x0zWs==baG z?Bn7@*<8Ff~UOtEBLAOD*GXA-e1{haxX^4Wf)6GFirCCnJ zQo6R0In1MFr2}Yd1zL={MrMs9-orj@I?xJ&i;RQ4<r?OoMTnxbhdf$6uR1 z=-4x7qMtCy>t{_=K99W1hQ{EtW3oHjv5p}gPEX|ueV7w(een}q(Kafj>i|nRTbXm! zGemIo(Npr8ghuiO3x)SNHl(d^mbZk&F(`A}-Sc50Oo~(iL`HW|nh)(v=f#n>*mlw` z&L7RAmw#2p%MfVMAo2+l*rE~9r8oTO)Rv&SoagmXPnWqV|9Ac1#6bh*a`Ug)R%#BE zS0)643to%Ve^s_xbXy%~Yj2DoiN2_k;*$8ixJrC+GmvBipQLnd>+^hYn=uy7B9DSh zt^t;k0=RCcITORXG9-7o**ZRI7?i;PRk>^1KYq$;t_7#*V{QF6kHHdv#g>kP+Hu~` zLzbxqW}?G)DE!N65q`_KoA^TOIKbY>pPBZQPN+n-kxM+*Syv@R2{C~*q_FG!UXgxA z$fVMVgp%b&S-`Q`%;LjgQdxGAy_jC_2n#J8f^pMyTz&tD=O#%?xVT$oEmOOvF`dqw z^!#`^;bgLCUN#QaRR=ovZ=5JSdFjl|zp`uUvRe)>%06@9S$}%a7SG@_V|lw8Ci0sL zxfMFTZ%{G;yatH~pY+-l398^L9OJy;m%{P27+2~{IDI`Xd`A3^2^5=ErHR(E=gJzZ zd_RX&f;HNoU($YK^}E{dBA!C$WI z29t;AAyOlj)wB%*Q4(=jYCmBKB^t$i)aSwL_f5lD>A{l}-2DB;EMiku@)sNJt8Q?k zzaPkhF8M;crFkL$@F&t>n@@*gH^QGbiLK*ZPhPGEko$BjDSm8r_&oIY3J*NZPHSW| zmWLT?fy1K4TwVHY*C6?_=z7l)la^7S*1d%Qu*wI(ODtbmJt?hkh~N zV4s&Y#R?$%g(^XKZXQj+Yu7xkJCU{gs4QChoY?r9EM$J;V(APDe7y?Bw|og_=W_0{ zBvY?9_3~$XQ%slSR3?7HxAT@=a^rnM9lf8NWIsDC&AmqL-)=TZGkTaS-4EgmnX3_w zY{ZxZDkzj=Pc0GrxdK%!iGA$(h`qPa>AQ`0zX;-5h28+zR7;rbhV4F zvV4uvaE-_(XBJ76jh~lU&FpiU*hXhJu0LOFz)Pk*FV4tjiP+L=Af%A>jf!%)?}x=W z3z24|K{W@$KWOn+4kq!|a4(X(6Ogqc>@PQpOHVQk8-r!X(Nr7&l3HB9;Zuf{*^t(2 z?B>o4(1hwtj3sX~%ZIp#3M9G)ulpczTJ0A{9g9@!wjnq|p;^WoF@zs>sW(CdRRBO< z&NXpDPKprI;CQBhDdGh>WlLTo2fgMAMslg=Dr(jAlJNq(QQuIs?$pKVJh335Y<$B{ zvP&3(4R0GHD(ySSHp{&0Cfa3Rh7hh_WEgDF=UxuV{J~aq<5`@IUgHe>viH=U2A|Ud z*F;Pu*)DyqE`xumSb$}au({CivjavHE}xHVE#-Z^eTjP#o5V0&)=9=x90IZH?BY3T z_Y<;`05a+6v88BBeX6iDBC~dJJ8R&7mh%+b>#~3cVjb((eUSz4ey==kM=P)|-09kw zl#;17=m;t|Z>`?+=_}29A-qr)`}@&VAXb+~ZOQF`onZ9wZ@YWBXF<2zMZeJ~a$4_~Uz>(Va zvHJpI3gF%Cv(;?DWwCgGK?V3lLS|nSzC2T0o^)cWKCXrTD)CZPrtl_DQ}iEXH%4=4 z599yp@U0%({gjPA@pvKv?em8C=CV?z&@F75u>V$P;3=P)PP?^IGz946hVAxp+uuiE zLZ|jeBu929M@WN)TTs&7E~e#J_vg&Uv`rvJ!Emzuz6{04X$C&SSzs?I=`dZAkO6}G zYUC@#`yrd0z8&O0;RbJGs5Q*kj1zKij$ksS`@G5bso#%^v6NR2NqNd%MLMG#RO|P^`6)f6E%CVjL*8Bfslko| z!mPf3eus8cT@kS2HE1$sHk~}16_AC5LOURTj*Am~M(BS1lfM19$SPR>U87vpi{yl6 z9#-(Dc-;_HtM%c6OkdGg{`g*7pcwaX?;?`CI-gXKQRU4J!lCcI*A{^1TcR=sB%yyo zVa!3#9EsgwbH5O4Z12?aw+p%5aN(TDWS7%?dwH$t*?k4<6)LSrAfEAzlRc$|BRgAa zx?d_CJdIYHb0gN_O9u{O{m0x~*|*5KaiSSLX53^seSeT$>y|z+v{Yfpm$THI<=@2Xh-`ENS2Ed&3CTH76NH6Lw)nFd1eyJ#2kwgN<}7H+f+SMLIzN_@hTPrk<-Ne zXR&g_jRv41H42BL!-0IAej|8=zyZ8yyXT+f#VFYU}VZd5L!qNymBU#891txaU3n;SRFx=t9qZHUOf><&K z2VV9eI83M7+$vJ3qLU5qSw=2X27uk1E3elg9HWZC5|}vE}E2wPNV-rfLs1h|K zozT4GPHeB}`^3e^Ng?>mZPN#71fV`((A4Y(B2|h*>cad6_YVcja$Ghny_@7g7a}ci zcP>wf2I}XxalFu#H@t@zs^8Cg94hNF6po}&r%={`Sw2WK({gK?0=CE{1u5NjNA@8? zj)##L`8FCgLx>$k%&P^R@a7o|nhM+mROYH@hXw;hAJJ;r?!Wd#-O$Mvi3@X=P}7yL=tG5O-sgI2>` zKg+N>#!M5}^$2cS33l2lKZ`aluEK%8ktsG15W@efd~TOc{gQi7D>%^Zb1Q>-yguAs zge-+*R)5>cDVyzY9@&P|J%%SJWG9d7y6%9+<_Lgbrbi<%W>h)>#)cYTNO}WU{WVV zpufz)czn3CY-Km9G+!6Z{n_0M$rfA!iRNhJTnnP1eKAC{{$oVx8G`{Mf7;o{9uta7w&LeB<>I4SLTJkX0yt_0prJSN>f~=&PJU0$8vec z#Hg!{U;L_7$_YfF3SbM+@zY#Jd+1N^ae}{nxP`E+bl4o=%*Im#fB%klPPXDo1XjQM z;b@v`1lzlnEf9*7ZY-Cdpcw~szYGa*>`!JOY`p*>2>8C-jYS~9Jw8GfClrxbOmckL z0%`XTqjpzEQNs}meHAcs++n^Qj|PDTqt5>6|M;@Vlg$<)Q2vX6?5LeAk z>O8?uyR1pQ8Y3YE8=#eUhE9cdRU9ISrY_Guq4B>7BlbQ2jXPLeW!^foK!#xBmp&&rj5`8p1=2!4^n05~$G!h$lLa-v3f%1hn4HSwIuEQ#j2t0O69tU~P`wQ(> z(JN{2LTS@XSE|NTLh~bBH4h4NoX=Gzj#SP_0LSDUnl1bcrNN$X?#iEnKLtlnrIUpLnxm zfjqn#9(Xaq;^6mj6|aCQK0Lw^Q3pJel-W<)(p;`jHf2Ec-9KB0In64SThRAMLcWu) zkAj`l@)WTnsALh?-wvd9A{(K{p;1{TO*!0SqQ{7=u%(lDnm`ZZqTvbBdtGnI0D$2{ z=*aWI?MHt6`43V?H!L~boiGC}b#OsLX=w;LS2$sNjzWSp*ue5QQOTK{R4N9{CP0hBppaiU`+qxZOa&-I5fJ4S3?sChV@v( z9|Q%#btV$MDwcA+W1%XNG%5@R7%=WOTZy|wKL}ITrNBhGyxoGs;rMq3CXU%E)T;tH zzg8X6@DH9|JaxEV1@?*RwC0oZuJrr`KM{0H7e=1j-PUo4MZ?0kL6p|foA;^TY7{wb zn0#Ng?6yho?a&)KGwtvtqyuyIF}s&_<^SeryEJIgT1OS)m6-pvZ1Npo!K44SX|Pp= zH>a1`8W0^^=#~Nrul-0l8kMcft~`fTIyJCwK^F7}ri4?V{qCiHD*b9`{&lxAff-o& zv2D+fXNv+d8X}`85oi1y96%fS;$Da;xq3L61-;eJ3U`#?j6Z85_%3UKPB@uWf3Mka ztJm?yMq?0G=)H8z;m;LVoC6~G<1KUPp-lOWD0aVT2*@(i$nA|LWZj=O6S}1Y8CL?OL}-jt*NsewiZLZUL`{oLr!JST4|hoMNdwKdJ*o<$9&K z3zcSme!@^hnBmw=oe5a5pJ6$UCmm{2+&58jHGJw zVciJsg|b^mGkt;j`I~Eq)i}5tKs_DWusLo{ykMkY&NMxHaLa0Gc8g~Oj)QZ&pJXyj_A}P zH+W!oS&`s!YI%b&N1*Xueqb8>bBy5ZU|yE}XabRuJUMo-{wprw(o=l=*Cb|TGA9*> zgC8W<_||o%k3eFie-!KRFaK4R;+Z>lPLMZ{%HxlxfSQ3G&%P0#@!gu8KjhYB8Z#_` zEG+V1;N)nwIcHA`5!e%{hYD|RHgFnhIj;u%^qMc+_NVM|{yOoy56G;m>zo<#0XC0# z)cNt`LY=fi-hAuB%Ay|kc3Wb9j`W4#B16@oBfIGWBw z#xY0=xhC>UV+)$|qj!ozXB_ub%>z?Y5)v z>bPs#+9ZCtXi=@DiJWS1zR)cJc5h2)jiu4WQ9TIY zFZUevC@$T;gJ+I^Hn_1LhhRK!q~nB^cgePe*4=YjH3x`5FNU5UZ5D$ahWpXj7`z|~ z4qlczHWaI`?xWv`xnfVUfHi@ARiFGRo|fa=$g*UC+<$lh$tQap7u(!vn0D;pg!dL=9 zqR!*fTQNdE$j?JM|T}98c06pQLmh{u^%bY#E2| z6Pu=HczGzUsW&#`#kB`Z=6dv=#uPw3bl@ zWGvCKCbcFHAyxI7t@dHbpF{m8+BkOs<19<8%8TO?>DU!SZr>0~%5fNA`d&vu4l~+K z6{^+wN0&J&%5jH)!lBrGs*5E8ObKcNe#Bew1By9?3*00?LuVH6kT$i>U?rp5;7%5u zcU@xC?MtbhY07$3-o!++UDZ{NmR)pJ9viZ=)@=Vdj{l>qjIlIf+B|}&&3^J5QB^Pb z37^H3NS@3tBPKPNXt+0hhPX2EiXAYZj{oZ?OMU5+&S~8ix$kn3K6zcai@+gf-;2bA zd3X>33}Fc^xAC4J#@tE!?Pf>;(T`K$(!$`YQlUiDdmPZ!#R-03FI1h^%s%A({RmE| z@tH;!6Hf#KLc$EZL0_p13W5@XA+lc|{+L|2$)r&ScjCv*{Q1RNYV@R7lO6U#KsimX z%@zuEfA)ipoJ+ewouA9&qtg@1`umYh{nEJo=8GJYd$uHVhO9$+PM0&`4RoP#dRh`2 z;<^$hmI7E`y2&_y|M-Bc2EpQhGi5cB1o)$5nbQ2|LSGn$1F*{2V$8 z+{jh1c*`|mArNs7-VF_0@-p3H(*ac;0sn*(IVYBdb-6g6yk#mL(z|xg9H_Ewux)^i zGK+%=hJ@|R{01x3(|hm2146bJxC*U>tXA8>0Z5~&DpY0B(M*!cY4|}-#O26in<)Hf zl3!c?6muw0xM`$Q>#a`>GXi5$^e9z>Jq*}DS+ zD9SdTND{It8E5oYLuH{L$%Ubjsb9U(dJ;8bhK@4CPZw>stybk@i;uTfX#Ep~fSou{ z+LA`Yz3)sR{Pee>F=f{Lj>m(M@P8#K+2P1MbL8cY`NN1{t)jrh@NMK6h*EiUpLfKGvK0EVkus- zUvV%7B*CVhW`q*!)e9Cme3fC;{{eSoaq%mMUcRw(dxc&wssvmCUNMu8gj(XWJKv!p1{vBKYtYSd8Mj=c zh7@VoG->v%-Llw;e1&r(`j}g0sq(79-37AYWFTi*vck zaseVo&%Yvv-bND{m59&tYD3A3*7+_^l7e=7cz(0B8hlpvPz=+)c_mPqhWbPE#2P9G z2Gfrp>lMJLe%mx^;{ju6+VN6u?36n@-6 zsr@~#gU|JCI(vcYzm&&S%Qj)F451BLn*d6CBfaKwk8zoIrn7R-lgkFx4+vBbjxnna zX!Ylt*mn11LV#`oxaG{AoM=wUO9Tq0k!zUcIJY;Lw7=B1r&SE6<4nnq-_0%L0*}zz z@OfPci__#r8|r@of!nBI2shaTa~leG&2&$a&j;RN|68Ex(Wa%x8^YN0f2SVV!3@`Tp`=Di8}Ur#(qyJume*o!!GlOYd0a`{H@?R##-TMbg^eV?ZL zcCT$3JxfvrUB?2JPRU4`@woXqPg5_-NU(7x_s~WZm!@$E)t?L{Suf;%D3E_QKdjtS z+dJ7gHV`^y;D3)~9#=SGZMM^6gdmnqvNAJ`&Ey_4Xj2wHnQP1Wair6F7qsWGFQ|xZ}404Ey+Y0eMa~Zo-?kQo`@X-28~+ycp4+#w0VH_vvsE9eg+p48$>wQ z7Fh)JhQ5~JkgVwSf6IJsZlISY>ySuV;-F3%UP;)wy z4cPuhoW3s(&Lo5fy(Sc&ghM$5|CWR!9vH&PvJ+-Ee$)gO4k*jPi#@(=EHe$^;ZCC4 zH7YgCnL*Dn6~@Vv`??gBTG-o_ex$5WA`y>!_H%MADA&tVBwkc*h0UY=0e8AJ!YV2x zeL{!pMhwayynjX@qp+q5rm+F>Kd?67Z@5JIo(8=E#A?#SpRz29=T~5t=G=*Md-9Gf zfWXB89Og3{QwxO0Vjy2+YwX6#U?yIU+xBr1ZIAeaB1p93)*XH@v72!$tN61J@tWVA>xLA3?mV_3 zl*6@tPL|>T$xmPMx(5T-SUM zdKXsx9SD8Qg69G2fPvd6txke*u-yf%KZ0)4eU7^~i~P_AA_tyc3U|(T_4|Ro?;5)G2v){MM8> z3MYA`(zlIialx?x(+<%Fr|4JYLzZvgO^3<0s7thpUhU0hG?q(h<>?kl7rHc$^k%^L zD>9Bf0vc_gVWdY=Ea;fbFT%Ho&^g7oUsv$CIH%5lmU!?m3$5C24@xZdESMP&W}+c^ z|MgrFFw^9zf5Z-9iXxfgNB+h4m(z&(*x*v{Tb^P+iTW_Z}YApjFjcNB8hbw?1 zj$TjKO2*mcT=MEo-uHyWdu_50!I!@#cSgNdk6%XL$>YpokMGK_*z}{buycEYL546; z8}`5#9c$rCn8&|JCJpJC^5{*$9`9NGQm5Hf;GlEUWHV?-gPm_?CNxb(J>F_n{o@O-DjA=1}cBT*n*inXMbY3wUvpY!|Kvcso1 zfiu*bo)D3>U?`*zjV#V^uxGwkluQ*MtRMF8$9awlcMqU?4!abtwOqjf0gq{*r|09m zfwRR5-;j`Blck^_0bX{&9*b={eSQu-8ImMX(x(Y9GU{pyd**EnZCM#mCF0&&^P zac4s)2bHEI-JRcuqH!6Pto4@uO<;~z$?xA{t=}mKq-f!|7saTFf-1aD^9>OR z6)oCU89THG10y;pQL7GF>u>jL3iXRLS8I2_ zqNfC*r7LAX2taAzuKO?Z(eMQrJaetmax)MPIMEM)r@wDY~G zeU_IsaL4f8VZ8)$p;|4t9uH6MnRvo8uNP9|8nLoSN?RG>h(p>$?C4R#VH~Q78-R$_R&EU0WM~baUa-V*@jqjXrv?An zF0U~T9<|NO9S{~GX64K5UEY_+_pzcV6*B8v8GTdnC6JCMpCfB;;uR4_Fj2lz9~;R0QIZ?QHO$Ut*-#0;#A-b-Q0H%xf%s%q={C zpGWt{ZgqQQ1&8V7MT5mVRSg{hTifmKU`ooIDlBpRGc0ONq7$cv%GP2QX2POi-?qUa z8G_0Hh=e!;i#)vazG_i=o>Oss93lE&^uY%0!DRNqcMVP`P+VHHxVE^IqNTVMcPS8Dg9g2u@9%ftd(OM=dgYwEHh+-4 zlI+Yp^Nf7vnVD){^@MGCy(5P*i@%azR+C2)EOcu`L4!V~u^z>k9&z*Y7;9Z}w%{!S z`(8ei6|MGzI*$pP$$(}@{PURWueFOau;hs@b$&f-=)(NoGwX<`OvHZ6T-}Ek4+EltM)PpYO+t2E5(L6UhZqvYj*)%YI=4pLfV(`$X?os1)TL4 zrz+2$hSicR_C>QVmh-Y?u@>&XM~F}$QsCz+Zfn-2J?m3)W2@w)JSt1AiaQ-E2|X6> zRA(*`Jnbg0WE>k}56W9Sp{tJKBbsSqu%q|a#{8qDaXir`B6G}}sNK~`c<|gy5}(QU zJ(3428^3qV)e0KB1ir8?()JT)>ZOY~pmTZi65vRUD6A2CI2G{vrD!xFil%^P3FfrBwMQ;XVMOB9dLw}p^g{`RHK|QIdm+7kUwcDtqf;p$z5QpAmaRQ^lm!qq8 zm*bMsOZDl)v!e)CaD$E5{&ClK9=0EOarchlF63fvi>$ZZ={}T2qj>=m?Xz#`BYYdl z__XF`XTleRX7^$5?+m58@rP4JplWBYN4WkkY zt`Kvz3IHYvmzn))Ayz}=2l2}-C%I7xLCFobxg)oA0{eCa$!W$bNW->Ub2#g2 zxQcHFn-9jW#|cTQh4ppo&9T*-0!{jx>~Vwl(*df#=8#-#znA|~gD1AQWnbqY)J$)+ ztzKz(ug-OTBZ=;*>yPD;}nGUXbTe>h|_Y+t46S zd76(fkBZ3hKyAss%6%Wk@LuJSM%?`o=5@qgN~9G9hWtiu2aoJT`;sgTRx;H>wqu2` z95_|`ypUz%iZr`5;QZx`QC>on+gd#ROa>48+vLj~Khtb1BR-M?d!P%5-`kYS>fG2k2)F4n?0g=ll1>43`t9$XVl|NDUlOwV>|^R_wHC4M`d2CCmoP|$#-(L~ zm9+H6kwzfoDbKw!_3H+p;MO!-@f{OB+4IQh3R9a+VjDE0HRDEU8lMdpU-*vAkXX%n zAtV9rt{^3>-b2*2Luc+-eWH!Ack$`A%|D1-!oNaJ-HIq-W5Oe3wrss;g}d=HTjBv7 z`MI>HaO$(o#uzf|>RedMS)3WEsp7ZSjqWj=bUtfpxmK@u1c@G)(%oA8x8k?p;{wxJ zS_Ssx8~c6-QuB2?Uc&r|EnAB2E0(c`id{m=2!{Kp~* z3Q6?S>7+N@ebeiTy08@LeNGWTRJxI*R0jBmU+ednLbkA72UHzbNH@^$EWMsVEp9e& z{hm0TLi1kZeF%GuPL~7zq6Nia|z4P4R zU93y*h*82-|Bi2-unNobkBsfg>LWq{*CtA7XU&?{ybbDjoo4gACx8V)TiOqZXriL(LMNE`iwr9Ven`C9~8!%bBwWf8N*dw)DQ4<|( zy>Q9)UdCL!FS>PPGX2}Y8SrJ_nU+^)>N}8yu*fggoFV3dnr)MO#Nj6Vr8gdG8aYgl z%su%zX(H^~dwaA-ID^NDT@0I9TWWJtig`cf$F$bT|73f{Vl;L}g46{fe{p8YRHfZg zs|7}H3Ns4hlx@X$W#GXHN3V(}1b+ywK+>4Bo3i)I*TIb!LI)Leq6|biC45V>KI}(- zGN6A-#hWRhe4cqdR#r+k++w)`y6`pAmlR*bu2xZZ+DemRK7Q6Y%#yEcKk?L@%Kman zrck&vEY$9@E^p=l$FdCk+xUJ9uZ!0( z>(aMebWB0Ix4@_8Ypxe9mG5CWxKz>5d!xzHwN0a@VTlkg--9@agW9Pre{%C;J%aLVX{2 zvhCETu{(&*0D{sfWO~Zekte)4SA16X6h7YIxa3=ATq#j+hoBvdDd(6Yi2sndkIL(M z2qbU#-?>h022ZpaH}YYq!_xha#h}zMPv?R+e!j8PA?ekY@yc&iZ%59rPUF5!w)p7S zEjCr##+QiI)B`@Kuo02lxVTr}_b*HXU6+y-e5tL+h#K1?eJ(Wx>w27!FLPbg6GHK@ zUvE|W<52-$gpcDSd~547AsNiYsslKQ=&{%kPQ~oyMYOJ^p*}c1aIP2%@+Gf1^87j@ z@uy@|vo?*K?NwFsc@HIzv6bP*Hfwh2Y+m>GqQ4xlsFnfjOeR+7Dz~XdkyZ;%iZapg z#Yfg`K5F6LV*&IESd&R-h1MMU6e@TJwHhC=57~2)bn=>?9tlueHL9c~y`Kb^yM1jj z`S!2_w2YVZo<3ZT=*qRDtiMhB)l=Ttt>m?$ey~7W8uc4j_kE zJ^$gb&>&{HEcw1D+qfZtDnvtKP6Sitknj*F#&Ms+{jwkWcWvlwkf(i|*1nQ|p`N_o zwKmGBAin;?Ov0?%#m1zQvla0}1w=Tt#!7ey#dBWd>k_h<*4vU!OxNM^ zWIQLo2^9;7RZ8Z_iJ% z-;?yvUEOX&FULnUQt*q1WD@aa10@H?r(M-E6&vsCKizi?qLh_%G!L`;Xm`^a=FyDW z*yxb#nC`M9)CQW=M+nq%F~+`w-#X0iI|wxmg2pHOvMs-S-o6Ns9`IZzt9gJp*iNHf zDtR(uPUrA79$q!Z<4Ig0F-A?b*ld*#esA!lKBxRGt;*2Ww;C{*>SyE}FR(XE`}_>^ znOU1?Q$5uB*Pt{8jfKAP@`1I91<UM6$wSmTPs2)t>HzZ!C@H~j8r-K%H>7^)v*PQK;|67lb$7NP9<5hJ@53b>(1%@H982S3PS%{uXhdcHNO{;%+%)Bh8-(S>szlY zkLwQ3Bn@j5Y33Vt_pwa=j}L+iPM}oj^jk_=j6|`~oMNHb{IINpG&7RdU(ttOC(tLDpBoDb^m-tl;|FjC#lT19z6~DU4lAtMs z{O$+pCcV>>xs+(J-CV(`VrKmc{D$(oD`q>!g6j(*!KD4z+zGxLZmSfuapb?~KR30L z{JA=5e4Vb4xB310ubHi`pbY;B>cFw+0?;xlhP*HNphSs`Pb zSmKD}1B%Ccd*(nbt%T~p@&4OS)*D=@NA#}JJzTPFnO8kc2@k5-V#8n@OWF^KdwiRh z3zhBTD+>*^fy!L#qLpT;ErDjgiRrBtC$a(m5_ullj$M8xC|bd7-j?WTbN^wftkB4S z+--k&#UVLX8-XvfE!Dgii@Ez})>Y>-gwOTWAs070?`hx9!=T_`DepB_`tr#g+kxbr zx?`BZnNFR>y-Dgfq?JLrmqv2d$w%nyBqXHc-DxJ)hzDc1sZir&6*QlerbWKMKY@<)s8J~FgW&K-;o?LepXFuTE+E#3 z^se-;OSM)9<}0p$_TstM6c3f;6A`}H7L5D_G%^nu)&U$x*ON(_rEM0oyjOxm!x{FX zZx=H?=jUQ$2O27XI@9$I3CkDh_gUK|6mU&Pst{)Lg@&8)I=`c89OXWr;wS9mRT~~J z6u+kJcz!j=tXTHjdKfZZcLArq%A~>?{#EYzdk298Nf5dE>e)jM*sh*WUi2#kHavVUbR>l z?4moo^3SC0Wk%Y08fGyJN3TaQG@9?{y^l+(4Qs5X62zPVQOf+G1(5oc2}Y{fKy$O$ zE;-?m4JvM&Dsf0mZ ztS}s0rd8dw50ZDUw_xF;#_sJnu4h7AtDNpbywp3f3%$^Al@0%;Yupwl14>47Ca6V! z!DvJ3y7S=WfQJ%%?5fk~@&xo*{Fe3CEr~+OhZ}q>TVwA+^8&1T_^A&G6z!?Pt5!RW z;V&xqlDD5B?1y*m2|SIn-i4taH}Vqh^OBx(!D-0j5Gu53iK}Ob#aX;Wjd$ClnZm=o zyY(iG#m?6w*@Eeo%*4uqeXWjX!7Eu)4=Vjtlr500;{k!EAS>Qx%mxF|bn%9AH^C8znCM=@@$-47jS(*zr2G6~36btxWbyhpv>}Uwji^i(I%w zW9$89*=OQ8-cX62w|m0aK4Z32+%Do?Kee~LiC#^6)bb(GQukS<_uw~iPQnJL>ttH9 zhn+%2v%m9BfJb1jbifU~?k^VhIj``dxMSsu(~_}GhaEvAad)wB^l8dgc@FQKZ_Zk0AXZ-@ zDOV%Me~pkRYggc-sry6Bm!}eL6C-8*mCKlUh+)O%Uk*mpal2ApOFkLC$zKm(Vm`0t zLQ3Dy$Z-U+2l8!bcEspaE{YB!RM8U5YG+5VI8dsfQ@4t;-z<*jLb zXAU0iDg#SZZGAC+u1f_*U|uj*rMuSu-3%x!dsBt9t~uEDL5I=op{b23QipSD1ZA_G%;nLKoAFuEhYn8eS&$Y(K^m~+9I;xRGXve zZ=zZ5<4SMdTy;514CfW;wLLiz0twpB#v3G4zG(}DwD0;|uQ0};-`xXa#^q2k(5dKV zz8en`&(J{PZ!W zM)IzWZPUv|IF>~%OQ7rXd!9>0^Va=DGZvb^0|`z}GTtxQmS|>9qiK@1Tl@~luWvpH zoh=K^HY?Y&cYRvQgO`0%{e09z7^GKonw*&vT-(fSlatQ9d9XpVg0hx8j{e*YC_)BrD+V1x>#ujEZm@7Ar+N&M9XO5 zaG__cgedGm06}sK!(g+_F@Si7SM;|0 zs4Yc&zmF@r=&RoKvM%R=mgbOR)ZO@8oc~HQ%RRAF&@NW*rA(s@T(kAvKbiB@w_G{= z{7fh-;7~5re{FUy-!F|0P;9wr<~}L!hWsPb{AipwNw@@Ay6LZ+4k>we%dKP^pHf+o z@6(537)gxx`J`CwaDGhu5T{(=rX+;HcC)(XF@yj2{rg~iDYz&R^G?HwWr(-t#yM&B zut-#zc*6F$vIKy^rLN4RO($5UY+M33Xd5B5X;!q?2iQw)E_8$nX-bC57E?N zjf$U^j3-JXYo{+J59>?ceb9V0Q5Y_ z+XWqoi(b0|a3t2uSGZufVC!7DUGWPB^=vGF1y97Fls&}C)~Om6IA`u~Sma0#B=fAZa z`A;Wup;Y#Ni z-vJ$+Qumvwpm(3Wu5J0fomabLOFpJ+H%at;Qy6Q!eNL-rm72ilC?FwH^1jcP1~@M< z9>M8j)3~?T#-r%dY}q&3Vg+MPyO<8_SxmzZ!xtA?^0#@;b38KLexFx6(xxd%;$mKz z-@uP{ z+wpJ-$*rB^L@vG4qqy})>lnYG{Xt!|r*<6eZUo`tj*YW`z@CaKY3#$)|iexRw9Xkx$&UAy)0 z-IpGwk?hGYx+XDWeQjo0077Q+BjH>W3?-UI|Fw5RW&=8U-@Cx@@hKzm|v#TtxV0m$JVJu9v4Y^S1xk>`5B%FYZ z*i^oGZfy{73jU=2;4T_LXWhdAfM`2Kk09DojMLz0Z-y%OM@upNo|T%KTA;Iy ze9#_VCr~yQb$Mbtx^%(M(5fu$)OPo@^=>o6C9M=|dpRGYl3=dC4l#*%6&nxqBRv|! zIBM6}$_|w95UOc)VWWXsh`+9EVO$dUD{$>48doe*5bb-O*M%oO1hh7~oA9ApWnCAd zfLi>J9_8tE+Y*;iX~Sxb${gLwH~!)Hqs^pW-4kk6NflDK-*9u-wH_?XV1$+8_+ktvyp|$FT^imVz}?a+`y^T`3sJe4yk-sMQ_fg3-U-a}Wr{d4TcuvOH0#SSUycEHMl7-+ zx~5m~gF_kd`Xhz9{upaR@f7~FIb0N8}HQF05WBKR5}CK8|aLk3Sc-p1@e z?!kVma1vp;%9_KSmzdZwj+uKnH9DwW3Z?d~BJ|K3X-7;i)Fq^8etOyO+NMD5!BlUL zpzXT4fu*gy?;p2zN!e-O9uU%eAz-=lFh!5yn62>+lQ!T?Yw}&rv~H!NxTojs$5bw3 zay~4eOS#U=IGkTpKEMCgdF|;($jswQet*{+fuDCT#ihbXJ(VktFziX=Hj=0Hf3lxE z5t-H}-fW36pJpB;aM$*68FCZQyZ!oOk8=NaA>tl}Mtg2ezAOned?QC=hNi>$7IL2Y zL-J!72te+t=t)mR3$Gp$r%Ry$wG)1E8NW3{{HY? zi^=!b_30A#8!tw4%=_h=PtpxUl;@?;pW$tktg6krls7dc0~wKD^)GCj1kT{EMe23R z^|^L_6UPhe4d>7 zXTn|veU>Pk9VTW;7p1L;|bqj~XKIA{%HjJi;a(A)&=)6?Q5ieTT>QUHeZmHOR7yG-$6#>lj zl0bmt_n8QDV>iKZ@J`*-R#U|J%&o)^v0b|`SF;3H(JW^ap3vDS2m)qv4IeD5dv_pW z0Ar16WVmbU^TSB34VZ5zs!8O#4m((OrajLZNQ%6H(t197>DY{|QBx{sUU{3E3df)k zbUoBHQ?4Z4xx;Wsyd=60=K9D^9jh-aQeqIi0`y59O+ z%o(K>8t2S%)_DUZW0pQ^vTpPJH#HUC3*Oy2vTL1P&e_naSe;zXAzSZXEm-yp?s;7uux4DLF%KK;FLpch|kK8Qd%HI-_;OjHbBU|M!b~w)IIkwH1IqlMg6E+W&gZ zcbn@Edg1Xb@XZ~S@!1;cQvm}hmux4BhQ+2IZ2Pw3uiccZ3K$n?)gR{eB+rHgSB?uh zHC}PQs9dpG@e<8`*UmHUQq~V8y>8Ge?^|C;aQ^6S>Y^leRLz&iuyO;pn_-}B-%3t> zqNC)ZRge>l@NI|OdD|Cz3|&H-UAF&Z$e2bU{2dx*jO*>>xI7oK9L~O-Ej6yVt&(Qb z8H6VBdiq}dl}E~S`l3RV#kBpZH}=35-!ZgE487(cr`=Y(5(m+VL91}^fx!a^L1hqr z{ke6p(QjuLqDL&8LYc(erp?)^T;9yZKSm{Jb(XRQRC|Om(f`DTjz((8nwjARG`#1H zn+YI3_i`lPOFnXXivi4V?F1(>t(1=ov*E?sw6N$0VlNhVlUmcG&~(Vmy(n!sR{#sh z>ME6$F~r`9lX_pV4e{Xjdls#m%fZQzmxP|bOaA@mA&G=*cwIrk#WpsG?Wb?p>NA+2 z5n;##)oxvRo18$wMFfmG>plXa;$NF_P{4pLI~Q2^=7ruvpKLDL=~nuSDxEi;E^Exc z7R-bEeWGnxlHU!44aLU=JApz~VVSD$&Ba0m!C^ogT;!figo}baQg+>h;U_?G#m9cM zJ=Rrvs+1LDgGI`$D_$o%KsDQ*Bf=y&sG9M`bRT5(;^ld%?5)qHr-CJ5jaN&^>@1XKSZA2iA*1HU5PuI(&QjP_^Nc!P>)Mc7Mq^B-)$QR=7BVemCa7$>*uj}wjM1Tb190%ew=Lb58V*YfNuR$j!`~jQ zv_kQevD-s$9o7}yH8#<(zu8eq9T4>|PHNscUe99vc;p{dYca~Azc~@CWA=F1!n{3u zj8x$hg$Aw5F*wM^rc>wV66*UjxFsds;DM>|p$;hYX zXOw+~bOg$dbhPnt&hWnqtaZ1mA{z?*8VAkixX;&o;`AJ~YQE_;3RvINlmf{`b{?Kn z)ZsuGmoEcBd!^Q)C7xQwb@=lb5SbH%8lVU{?tTb+!(4gxg)Q5@9dkZRAjo<lcoA-c$Bvmh$-z(uJBF{67>)}|+4!#U=?)n?^?cSdF$6;zRE2U;ih*C) zn2YSmUX10ZVJ9#izh1LOJ=lmj!DOYbCDhi;R@AJ8?YV# z?MI|f?t4!^Ba{L*2eOKVXVN2c31DZ>uFq$lYD-IV<<|f{mjaOQW3uXIoX2rN3l#0pI`4fJtk9rlRTvrrkiE_KglBY0Q<}xzPzuu?iY} z%BYfg{`%|nYr0{w{I3v)r)d>m=X|6oz^vLt$T>VvyQxiULg8zjzmlSsseXOZ$^uUE z1_SIr`3MLKS)KLnL#{a=R;ywXNy_bmX0+>F#?+@%zr1u4NpVRI?fFDG?#Si&f+saO zWlWCLl*aRm<%<`dsvtUZmM62NuR7R?t0?Knr0AZYID_%RRb5Aog|RQ}UB@(9%`;pc zbVaI`?%#F)e)BkL9n00#A#md_|Jf#GDNt$nZuvp{f=QhBhV4{nnY!G*s7PWdCFfzz zM z3JFCD32eoVGC*#a%>yL*Wo_f8oKP;iH=R%xgEE#NN>i4y;0TOBU4Ih0AF-->bzd$_ zcG14&wS+|7!$|AMV6?9BwuuSXqm~2Qfh8P7HWy?EF|Wl+rSqQ6rBNvdfHb=_!Jli8 z&9Kq5lrt4X(b98)z9n_eowdQldVbb0*koV_uJj1DG`R}{7`e}Yfa&FZ7#qM(yB1EQ zpkr_cy^xqhf3uzI)BQOs>Fn`#x7jxs6UqHvIszJ2dZi((9!^U&;SEj?Gm$hV?GRMp z7?zajrtnJ2+2ZM`ZO;&N$jxm3bIq5vYj@y0(i$jyz3jqLqx_9x*{Rd5s)J% zsjT_O)dv<|z&11?)DKZ?SFxl&sN+!q=zmC7$_qV}6=#c}g_tnJ@`Cc?)C%abTC-={ zCgO(dC)r)*+aDUy1f04IO7)o!;NFfdo&<n9*U1^ZgWr-< zb^6u67?EfAZ%uvgc3lmnDlob!cJm0eHppi2MGUXU6~!;Pr-rRIV_4rVpJ%Q-dQ^LoLZLqC?E{ZV3UK|y+WddW@O%a`5vUEZ2l{48bh z{Gf_EmU@J!?blT(u!U%WErbl{06b8I3DM2vrW6UvMb@>x&Irf;N1f+Hfb>dzWudnK z7;1yRML-ovP@#!+T>5|$Yk4ODx&8&7ZT}I! z#Ge+ny1Q3HIP~EVhI-9KX3R4h=EUlCXK)=*v`y;*pZ$h#taDh9JUc1QdHZEeC>$3~ z!Wu;G{gaz`#4}X%4(TU7YB*|~mGoDj4z*4rqxF*;j0E-=I&#jq#8ZLXN3dpV4-Xh$ za%Ql3YP)J`#F-0MPn@_YDed9-Lipc=UB*sb|^aTvvz$hMt0{nZ|E167T%9-MelTzn&++vI;n$9 zp1qI;g9hKY!;5?35}BcxA;{e*!6gLXL&3x4z#8x0dJp&E5Mq*j+az*$f`D3h6iRTi zywp}=&6;-fz2=31#fK@L;M0+7C4EGNUS-3{Z6R~Fu66}Wcp*Pi3Dx5Y(K&)LGlL8N zIOBDSHs0;63jQ6cOt>PQA#t>$%(_Q`UJt?1}Bwn_B{1B1SL9^!7laQ3WR#cH6N>4`lKuR{cir1qK&k}DZ4r0M57h9HQ} zi-n~F9T|y5hH!sC<%P*~aByRuQ`U?ZS1d9s<$(r&D>4CJ5+udKNbsl(g)?QG{|d-z zk6Qics6`I;CV0*ODo2^bv7ovkM52Mj1^g6yGFdKTG-vd@gXFnwjb80b4i5HG=Nr6i zwvENMT^9yZDAluqDm8u-&$*P9Xr`RM-(@_Bm)`bstSAYrCE)kTznvdhR{wiloN*x| z%3PE+Ua#9{(A;3Mw)<^RcI+ZcK8)!6L-lc-EK}#txK{+mk0b)i!s(-70K^@Mk*HNk zPeNvUqJy|I{uH0&eY~-hlS-56PN;_GPa;#1e^6O%bAKguB)EFyZuetjk_4VWm3hJ; zMK?kC=g05pqv70rQKM?ZyeXAe@inSx>|(S8C3D~~H}t1gk4Fc?EK**buzd%Jl{i5_ zk=xI@@CidmZrtpL&oK4@AkWg7~PU>+HOJ5h2QH7V$Zm7_&uM`>I7=0Ke zhKPB`^$icQA}}&Y`RKvk^g7;+xUD5mjz2wQ5oB6Vf#OTAtOqzoLO!9*O%j%TZ^IOx zG)DGR^XSjD3wK8ZoKa4_y?1D?%c!3YW~|)9M}RFj{LI!ra8bihS0<|u4r<6NZ~cau z)z5Xd;C5R{Jc6tR8n0SLLvA|tR?CK9W$rsGpE3@W;XEc-TLb_G^EbZhC7ot1;VQWE zM*p2HSzj>?6$TPAO(#4)D@(0@mxPS&&6n7qiUN4HuW*vcQwN@cfher}=950JryF!B z8Df^ZMT3s?gWP)k-^DB{;MjA+I46IsWxT(ISTma4pj{99%~drLpT%90&1?|So4@>V zo|Utt#PYVDbjaKyqY>#yf`#f)Lw1OQ_A}A^rbX?wwEe-e);jjy+Y?vMyO+iozYfy( zgy!|)drhi>&LH`1$U*HNia3SDUv6|^l4PO+`|~v@vq^R!(qJX5**x>5TQ{3fFX=R! z(EOGZTBX-Af1rv5mwwM$AD*qFhO@|(%?lk3ougG z_Y9#~II?slE5GN=bswbL`*Gd``&-G2uCWD4c`y?EX4a;UJNtyNZ-CeM&n5RlFy|zM z&@#y0U;u`@PNC!5iz>1ALQo4p{wV`l(sQ&V1Y;HMF(ah%?}CbZ;bGv;#o7Y%Ql_gl z!`#0|PU_x$F=C9$&z)3MqDl*zo;ed=Xk#ItyUM4eSjkSVep5af5UniTi2IB#tQr4y zj~=CYpMucB$TRCo@y?1L&PkOzI(Bb`ji&=H8lC2*tl@OzKj6`zV3$10@N)h7C>CB# zc}$Wh@jQwl)X2=ldBjoHaxv@~f=^b;ZH}6RSe1Es6g!0ZQIBM2df;1Fra&Duj@p>^{KkQ(9z& z+<0$Owi*0gG{I1w_W5633ggT>XHEvwlpv@ajXxE+evFgS^OkZerfz}FLGqdMu>9s^ zw=jzrK1`#4uQRX?d(9+`r|Mo!%!=}KZpla$W(|9KfAUJQzXRDya3rEEVjM^w_J&+m{5vUm>BFxfIXPa%suogcOt} zj~h)K=LD!Z+M-XNtPJkCKSdi_pChh=H)$O1oHMdEmX)+Fqs(IJ}J;2C%!5zaet;hDAXeF8J%V*p(0CKQ?M{JfY`O% zpLq4jK05Elqo1Wm5ZO&`xO+HHFQiBFK6I@dDe z+N=_DHY|(#*u^r3s`Lr;1xW}U&;mAo{p)~v6Ak4h>`0SQh*lauZ}#XxK?m|U}D z!Rbx>_l}w7E&=;zE=v?eHaXZkBxuB_%0@i+Nw{|yVBlbXifMAA-sZ`5a6`%Pqn=7{ zRDJ}{-rz1n?ed<4LEiN#ytj8o1J?c$Y}mx4w34ypKRPn4kCajz92jyVAg2y!IX={2 zk#h<%eC;L&n_ul|y{F>LV=EstC8DKK+M&oRXFa5ZAumFY%J10VSif`eRNQs_WSr5? ztc?d$21*Lzvo;fx=`fT~L}UWdYcg_iff*js0%AzKm~vmU5|LbS?=kxfa2;<6;_$wJ zk#W>fk?G*icF#?Z4OaN8k)zPQZ){wRv1IWUwIvh-1{np4>K8 z2uzjNMW2?o-PeyVgcjYB`h>K1^> zH>Mp|9O2_=zKz*dD$xN)-a*GiA0a_nDwgpK<(j=`l&dc#`-N`5eQ%HvHKNRK25R^i zCLpx%!q?O7!*v(+5?P|u$yo>34L)xKXLI@E=wBQS?&8C&|616?OJS|M+H@VQlmbp=FbDQAL^NDy8DL1(5kG8pYg45KY^n))AQggAt z4)XnYz@Q32B@9`Dv%q;pndb1(+Iv46$no_E_`nTF=*jx>d;Pk++e~O-BpV++=mk=F zi8UJ3V>?}G*dU<&P_Y_ySEVpi7v*2YF?u>4OZJ_yps61KM$gW}U5Mq0 z_5BcnE3Ek~r zA00K}bLxq3Ohd zFdcMf`fZ65I~}m4vA)GOdj$CxI4*pBCqQX}o&yc2OZf@Kg5kj@i zmHFdFujT{Jv#Ndln*G$%>AYoaLr4H;t&v2w9%m?eyr~mR_vE$kDTRg|cX&EifPwBjbNFdtnTy7RZ zNBIJkdM2KlcfnyW_$#MKJo@r1CPl6H@|ZtfWBrcun*R=_-MBaDO|am#1HVJ|G`)_t zcr6jE)2_8%vU*f!udTs>i#UFw{sbLl$BB}=Y)MWq=q2iriGIzR7+~9CPIlBfCo5*g zx>DNxEIi-&(*c^dGhI9(_=)2>xKq_;)0VH54B)~X4y1Gb@@#=r!g>9sieo_%o7WRd)BQALTxx_^W2E3Dxz!eW=0~L;pka|GE z55nj`T}3`d@aG(-elGKjVVe-)z}Qj3cM2u zc-MZ**Vsw17h9sx2I^g?b;pK^sD_jTkQl%}!x!Z# zvHUlIn*J6mA^)Z%i*%Cr-vmCBsB-!@C7mPKyJY_+@Cof~<3Hpf-&$;;dEoq;!2gS3 znq0R*;7(5O|1&~S5=Ah?eXUnoQ zg04}o2yejYg7Uw!KmWCe@v&eK1IVqzm=6W@ zh7n|-MgaC!To*YG_Wyi`eAo3^%5qKmA6|Fv|07ZwRWD%3x8NJ13y-gWtaZk%_Wqxx z5}IB{OF;kG2k9{|`ESx!zyahJ)MrvN9U(;g$OVk-PVFW`TjNOKgIqxgAyYYH? zkAuVj9^i+ubjJO`_zxGf3c~`wgCR>yT13KwhPF2OR)6-Mupimf74l0HMy-Ob!C@6>;SrQ)` z*-TAX%qmME@C98+L?>63DEBJY@IzL zfx&P-1-2kvgRY-kfcDvrk*_q*n9q>^(?$Qk<>1RitOyI9-_m5I2G#`I6 zaztm*xFXEe1rW2yuc$`j|KXpKbWmwqCN-FSV3zG$UFL5mp#=Y8)x(Zjy>j#^%WRJnPx2+pH>w4 z28Wg3Q))un$LEU+lNMxP@b+dmGZpveqkIC;^fAJ99O2s>YR_<4v!H zr@Brs`wKluWstIYwh&5asNRhwHbCK9Lg&FQ*iaVy-T6lYIH3LG3pD($FCq>MU@mqj~$cz16(MNx+P8ueI7rO zz&cG?LQ-P%OoMV~n0!3#?1_^qUTXj}VorVxIUB0oF!FwxRiMMZ;}L zbD@FfwpQy7vuWqb>}sgmei9hMRT1Cu^dFI@L>Xz8ojX%bQ=Hpz8Z9orKW*eZR=DX; zBTb>hkH_c>-NpT*v5NZ7vVJ8%`ey;l1Un@D_1`LLI1;elND`W@$QQ_0bVRgQjG)@z zij7*%Z?MIP*zh5&{N<-Iwo286Xk_Xg$k#nIj{pTUNQh`VA(VsT7SEPspDa@mq8f3Z z0^4?jcXm#(nqFB01T4}a(RhXu!GV4GD4ar|S35*s_S0~FiY~2CY6iw6<>grTll_12 zp9*O4$?!V=N_<_VAUMWC{s>9c;eh(<4UU(C-#epRV7*BO0_UI^@N5>?YVL1M9v>0L zUw4(&Rre3PlcWHTq>;pMgppAGkvMO{J|n?&=ZwZw?2*y5_+RQ`2Vl2 zW$<$`z)whMC#P4)1ow@f!8M5TPubV4S>S=FLB;z*UlfdDU+i26K)Gxv z@B=97;ve||kex}Os9?HwMkR#tuOIZkti}J<+=!y+5r)iJHzk_()=s9@l4o!1&bD=X ztakBP3`E!ZTUp!GGFHXaxCKNk(z90~quv7#iQfco2zB;;rMpP1hlIOzyNx1LNbeNJ*c}J^RfBok&MuX!1fCIYjJke|e7OFh90s5Ea#8 zCmkRF-sZ%2zlm`r&WolVPMH^CDL$fvHD$r{H!(0!W}4*(@_zZ2h1f@MfpO*uVK+IG z@vZBiJ-mxR|5pKJJwdS)XdjOVBn1Bf(xMeZN~%D1;vu9 zs~fTW3sw|>hHvJ2mbDRM5Fl=E#Ccy%K}fi`K%n04KS=%DRCKQK=sgoO`eSW z4iG;{aogK0dT$O_+iW&H|I{|g9X#@x4+LR<_u}zOnXUUnY&q>YG=O}Vg}4l>DqFF0 z=ZXnMrQz49?~2e06hV2SF?fJ?AbvBBaUlPq296)&srdhifn>xd@$}SETtZDAvrur{ znqr6%sAZYp%mL(7Y%iTt6_!2!KKBo-i$`nG@(<(Qb>C}$D*C~>;=ZV;-m4Wa;Qx7L zf#1@WKP&$0hAWq0k)rseZWzl%^z++)h+=;~_$eC@I+ddqzkd<@Tv1ucRqB||y79Nn zZ4lor0B!iY^6!6^F@o@R_w|T$^Iz9~??l~E-yz8_vj0eSihUv~YN{3<41}#$ls8M) z^yZLOufnMHwp}bPy@;UzNIo3g8{tSHAD5}&BSoo4i;q8KWctfb%7!xDyrlh!V#YOI zB=);~%CN&bsb5xW;?(+oqwZg{rvOEubWIh?aE~Iov*mu;{V3~ufxmCgR$KgA_t|wH zg)4`ee0h+%iBG2f;y35bhIW_3kxX^u{e1a%^}QtzGDpUa%HaqB81_F>A;b5AigMM+ zqq!)$aT)aCY@*K2UqYH+V}PX;AswC2E?D?92j~A41SR=}@XpyJW|Wp?Sd%+N}up$MuXO7q0I z2)W7$zarSWpT2`&-JrCb;MfM^uDPcCoMWlIHQ=PNBn(UkM9M^BV!*+--X=$AApr#xJ1A$7@KzKW@A8HSFZ+!Eyzj7OR@-9 zkeU!lJ_5GNbG9GfXuol`KF!qSl#WzbV0r1+s}@2JtmI#;M4OTZzhf9J`cS&(+Lr1P z2VP9t-M?64LlO8lvK%&(F?Mzmf{yUU@jT7}eKfxJEe-x%!%oW8JBJUh(L_Mt{n zlCh}qml#=;Ouq}k2L<1x*-9w34xQ)CBz`Pnr-7iCWybUG;B8diudu}2gj3D--Nk8k z{?zjBi^*3$4+hBo0!3iDwTS^AFHg?b;jd+lbYHwqkW=-&tptmU&XX&2H=ACDqSKto zF<{lyk}cq1?X4U04PwZCa;x}5b@mEWV*J;v6MaBd?Yysu5ck&)1gs|8v1#@riIr;1 zrj7U$v@atHWz`2Y{s&MC7bR1T!cih})ZxNtu#b_6p0eRcUb)&NhNT8GboY1V{!3u@ zHj%QzVXmQC)3LC?;RWdMG{s3Vu1RrC>W85O;Ci-VGqZplq<3nT)xW-~RFncSQ4u}H z3;HYlO4PW)NFQ`9{i$`}^iT*cA@_q3ubD;#;_cyjVgO>u?8DJ~nDZ6r7W&yQF^X5F z|2i#vBG~Sb??<3 ze(l-(+QTobuC1;87SP-32`+M4+;bjgf2+rTe5YgORv(#U8e-wW%ND}(K>l}5IN0F7 zNgt5v-Q))V$2X6I{?}K5}h9vh9)&Sfr>6PhIiZ0+BVgrcjib_r( zw6ufC+87N2|FDB7ny4>^owtIxZzel~?~d zth>A0damJQ>^YY zXvfD6<-R9${9oM+KY#eB>&Q7}^jjUTH~$J`(OzO~A(qzYyJ#{I8J$V16iF+0f*cpL z)zQ>b&r*yMs4HzKoXJ+Ev>o!Fs1qw(3pf>EvcP=KSoVFq1zTHNtFjr{cS(qgNYPTT zG8cqbIl%Za2OCtEf*oDbQ&Uq_XY<7*J0gI3>)|1ULQaK%k*2rpip8aQztcpe!do|N zuIrvIoDZG9Of&nveznDuFnJ8U7;@GJu9e8|j&npp{bgFBWOBvu{VW70U0lUaUn0a4 zkf*5%iB~j=+1@r0--agmH`EwN&hmz^c(XVk0;!vd0_=|Y=zR>SuxuL{p%`I zGJ&=*g-G4~zxkV%DJFm=ZvBMKH;-5;z?)wdrD0~~f1d%QQfcztzpSl3GBWHp&(L$c z7bUQJ*#{$Ja;Gr#+_G3#{_;l1e)7$cXp)4FkB>X11=syhnd6N^RD6EysLe+IBj2Oq zTI=m9+U=^9)9g5jp%z>WL#bF=sufihH7#WznPaeWPho zEa~|sc+KM78(59z?Duj?>$UFEtfvit&eVP72Xb>!j!a-rALh$TN-O^8BP}qQFw+bw zCH_%5VJTMCFQtD-Ek*dJo?xD4%|IR>Y`#SIKq!(?gw-6`F6?xY~At)#VW<_tUJe!+~Z#MVIXK3_F3YzG;W$jSDV_v~MZ{ z$wJ}dC2A#Y4Fxh`r%G=YAmCrXTs%wT7`q&c<$qu?9g!I{LrKjL%%%IB2WE2-kNijO z?1pQVw4pAi!8Ms09EQ>G?g~SPgmK4Bu)D0bURoIbic{e_7&FeRy7?$cw-STQsv&86ES*w%xC##cA#_$+iqW2v}u?_T_6UG68jP55_ zn9I}La6U<6&_hpb5FvAJy;}x(H(N|zdMf6b62ktTyIR`$9{T+#9D5Qqbf5kH@|@Rv zfB&D|cc;-a8X$f7Hl+yChCQl)3z_-dc)roCP_wN-ylKreaa=1~F89%22ggGk8Hzk` zA*DdcY9$fd0kR;!z51|rS`=V|mF+fg#&1`2*R^0F3)-J4Y!EdwdenO~CF_t9gF&^C z{q)P5-96P-Z6;;#Ij-Qo)a_RWrqNpiMnkkiX}r`9g!(m(IYATT&sS9pU45lJ)N0BN zbN~R|{q$FrZM|<(uk~4LD})2{rmlTh(O0jl0o%bUq8a`)uV?u1EnOb~q3-)sK#Zl_ zuy*J{f{lzqGRE5Fcj5upFB_nYzG>TDAM;jh@HMhK&5Z6Qs)$~q193yF#|p++D?O0o z368YllfM;wEnT*m0oUrK(b5bZ^u#GFnXi#$G4xk|*!1*C(8$r&+4cl!+5tW?g38pr z!>dT}Hop632z)aFrfbD;|5N|qzRpQQI%D~OG`ctXd%pjD-nt2P9ZeUPa&e2P(@n@- zQnQ^l_Aj0b=xsYjMv1t5Jh`!`n4xI7rXki8MMB$^CjD&2IvMCA~G-dm7<3cblmdI_f6m@pbCg5bj+n5U!sUl z0b~SVjsJj~7a!iz>r1YVQ@-^|rbXzMkB3jbZ@PAw8EoO_z)7U0-9SJP&{LOK#=cXIVA`Q@Q=gP{)l3bZD9TUgjJU_0_=kj)?!}KbsA%w4QP&k} zV;=R(w9?@JLs;q8L;5yQXAHTE#l0CGGWFwUO{4g{BgB`OR;pBARE8hBvXd zN+M91$t5=Zte0{$f=$2Rag%45!2?@Q6|R}CF@^{gqn zS-n>q=@a3eJG1%fN%|aNqq{R+G*?9$zC;@jc0Nfz<9=Yt@!q}TrsD;+@0doC^XTVB zWyDyi@xbAOh4K(r_%$By#_D1yWmF#Evhck@ecJJeIb~|LC-aF~${~Jci~sP6B<+gp zjCq7PvgHTiKhBRBeY{($bw;YerBNvJJ$?zX+c3DqW$J+la?3W8r>$~vwxskfHAQL)dho%e4yxNDS}nC@>&CWXBUKOO*KX+X2F ziC;4W*7$52d-yCBatBfsfL7QRNo@(Qc^7WRxdYA>XTVL>9g!9{#*QeK7kvlM5-@PS zo*KnYH*ZI47hQe6O-lDs{?<3^My%f|uGxie!qT^;tgT7~TtuCCwcUu@K>u^L6F%bu z(m9W_$EwFuM$~B<;L@$)uRkgdUB&C&$d5#m(}W#9jAW4ykobh_hbs1Q-;{>%G;(P=xWjnhRaV&ZB7D=P}QF9seII< z*v%o|hJ(+@tzJQOA&oA<2)M_i;;T#gmDH>G=oH5vC21N0DEN9Z?oB(ZU_hql=>0Z! z3Dx|HMq4IzlFKLDVPbW{qN19+@8Y@vBV6vy;c$Kd=8L?HfS-Q*P0(*VhK}jq#&!XY!K;eVH@?xCLhm&xPjlSk#-%vsIn zP2W#MP-gfnpy&*mUE^csV`da#56&f2mf*iC4mi>zgRGZ$f3J`)IP>9L<3ifl97CZL)-aHL|YKFL&N^2sOFj62(X0ddE8L4qg*AAWLRQ z^LkrD!jjaDeTC?RY~_!Py*u3BUS5YW=R3iX$2=X^BvJ8xEK0ozs}F?6>nN56DvvBv z$n2KoeXlAtGIrxsGBhI8^#md&&tbyd|sD4Dq7)>ViXKk z7eBy1ng;wbax>WE;lhG5o{v?W9gJL!V&#RIe2*e6$!f`3mpftP64Zr{Eq-x!IWh(t z2RN`kd@w5T^*i8)(^jsZurYReZU)V{@fDyFvFLXHaDK?WUcU3fVLCKD?JS;)|E6Ej zDA7P6haqPw{W0CTwr9Fl|L0Sr5i|Ej5Kzk~;4J>9-rL!^cW_CO{?`42lOV#}5s1Ut zPhdP>c1l=e)f$Rfz0TtWqt|TtdUd$3p)A~C)|#oOA25IXu5_9kw9^9nlvOYjhw2T! zIO=WQdSHRsjAGpEmv9txVgxOb`JSbn^_fkTOAs0QUW^@&02t=b`zJXM!DrD|0KTNu zPEW=V73;%;6r<>zhV%=zJ+D*yD4m(W$9FuEZj)^)ZKV5s^YK?(KyCtlcgeuo{GxSJ;2YF&%-0%{l1w2bOmxSe7usOdlX$l z2Ds&tI41WIGa}c$K6b4n@O@dUbs_&B9KkoWCV;`!gQ+By{diJhg3M^S#gOWPT0?gx zZm9Z&E#Bmv^N=Go5%A*zN!UTfoWrT38rEhi)^o;yWzNonjW^PbJ8xVnLl5&J1N2h`zHtaDso{_88ZHnF#4QPS%)wu3FqyYIJQ z?p<$>nDWvtPmm+ev6xbq{JOgD(%x8bxLhpH#Rtu$9h|9}-Dbt?`AB-X71#Y$*e0rt zT&NrCHzO>*(t8wA@Em;**0wwgOIJ~xc!Gp|Vmm?a|cfJNPeGO}G-KaCesxtc@yL2wI`|fb_H<=ib zw?i#IQ%D@8`vfP8I5`?%HZpeIFKIVExb`QDrJXJW7_S&`mL!Jzx?e!0uR|3ho5$X& zH(s{9)%_>LeUuj9ZyIO~FM9YB2cuKMQZ(|+YCLzv8!Eg&N6F1>lw6LCBEym%4p`aa zU9W`{grn#E9K5+J&ju7~qKB&9C$;V{J6z70Go9oFm5HamIr`;8jlIH25_XN?3`kmj z0**1mvHbA6%Zg15_WgWj4;X(j``bM)@*+mBq2;^k0X9A24#t7BmD z!D>z8tR#eg=4IhTQ5F|N{X#R{=Ny^c#a=<*VOlA_vR)zK?(>xa(8Zh+Y17qV;kQlf zG_U6FlOD@gxlxgK0}FZ^__(Z6q^bOvR-9-8LT5#xBDbg)Z{2X8|CX-~By3y?Dj;ed z&UyECaCs7ngF=NTGe@T0VzvPKe~aXt!=jWl5oC-BsL!*$yA+N55#>Gan5BOLE zv>0Ax`0eQdt{nV~N5^|^Le+D5#V)%^R_T_6pBJoA(X9-cL;KeGj>!^UWQN{97r5T{ z^KRs{W}WV8wcgEf@T-$adjC`J^l)c%k($3o5+~ZHMHIN`ukE z(#110X9@4)hdP(oUTrfy_1GT@QQvXTbkRGEq}8DV7^QKpqf z*4mIY1vrELJlSzxwxAR6rjY1B&opHhMf8hpJW|@Y_vOi)H+2BBLHQv)oj1mWzwn-p z9#FGYmM_0+l-pcXR#Z^5d7`Q=%jf1yB|14ycfb0$w{jp8Jaf*d;*i+aHb5QFuZb{r zUai71x$$U-ineva{4ZRA0Ki8|FP%y^YL+L!%d1-!7`4_#r8c#s7>k-u^O~0pz?>uk zvLgxmKSZ{(Wic)+{Y@)~NWprAjVm?DDYNcXzX*SUm=)ll73oP9c6f>mG43-%_Z#`` zDohxP3`$}aLn=wEEX7y{Mln@;@yMQ>c4NL=>U6*PUh2Mn2`9Ls*bW1)&Gpr%7Y>)b z){Y{F&4-j2Ypr}Mdp8g@tSw2FCoi)-5i(JLMBK?JY`y{?*)gv znOGSkEJ$^paUY=-=-SAk#%n+}oq@7lR-8nbHII`wMU%4fQ$Hu zRNNJruWO&0N<5kS(Tp||-SM-mXSnv*! z_TC388G1g`Fxm2ODgALqctZbiW&q+Z490>{__Rr{N07^^zgQ)Dudj{y!~ zZsO{PN{ie8dB)ecDCwzwqM_^3t#4rPQiPOI9dMf5bx)f2$5?a^3<#EV$@Ba0i^iro z%jNJlpT~YCqgVJiFcT+wi@eE(g0Y9v6d(DH_xXJ?cVKJhowtW6a+$lG6T?q2AgK;g~jQj2*<)T_9MI6g$qV*ib<{})Xkc=qtG z+6NHz{BPdvukJ@Ej2w_CCTS$=d-&P6t;FbaO#HL`uF8#20{k?uCh!vFS_<$uqqU_V zmAR`6{%Cnq_qbV60>3Q^|JF~`6>h`#91AR0lcd27S!P9h$0#zTn5%t&tNd^Q$)kVo zt*mz^lJ0q|@7Y#ce;Y2M7QL536*{n4ZkBJv7=Ul$V*b68tfj%s37FIXV*|#NgA^Aa z=;DHxgP!M|nxns-W}*tl3O8fIu3|i49!PbLH22sfP~r=r_LBfc9psnalR-Atr+oYDObaM74>QCy?B@W!iKI1u=(09o38Elk;yzx*x`)S z+g}X|37I~8=1JD{D&_IL_*T!C=O>X9A#8z2bwj&xSNBExj4ZiCQLM^wdx~Sj6ptQ5 zUakgwK=NB!TfNccY9x?`eC2TF5jomSEtSCE*w`2;uAR3O=LTQ5ZEk;2>cmpfLFv)0W^_L`>BQ%J39*$gHefCGXn(IGCN?AK zs4mH6t76T)EmE$}%nwUS?NbBTVcMMYLUt7LJjC-;^rCkD6q56N`Xu^FB+ca_hkE3_ zj-jP=UoRn3w$DJn2-FyCfjd^LH6scEy8k!#{Bplf>`0JJ_~oxG-i`JkoedWcm+>M! zh70dV@fS5|>f$lJ?nqNH>TQ`P%zz(JudC1C4|-(=!}k>$Pu^|J*;5}r4*mRopD^Yy z+|2%r$?CSkNCMhD%*5sJVcG+(V*T3mlDdLY_LR+^z}@d1!?{sCgGO{!tC~I+4vV{ho7bQ$Ppic;qwby2sR=H3}YPX#1Df zk5p7H_rmr!%azjD_JltoCYKmwb#o9I-l@%)b@~f^UNfE&th(GubYjWlo9(=7i1rj8{Y)o-rm>-wWP zKZ={HF8_;X3_cW1qagh4x7U!ChEFcUUwY|6dQzVY_hA@N_YlJmU+GNEV-hUj>nE%j z^#_>ISKrVN38DP(4=T)%^E=A&gSx~2ZR4cRE{@{LOYQGb)WLt4BXOs@-El`ZlfR2M zs$If!hp2%+i32D~x2Rvp=U_Ybo{^G~IhKn{9+>uifBtzTymRsEL+&lY_2RKY9W4jP zvo>;x)F{xk_mSdK=JPB?!mgOTQ47oUwBi7)8Z#IbRQ}j!#8Pdm$!lJS?`)^*9y^p& zK0Xz*YJZuWC8u)VYSv{1krXEm(8M|o`_1_sLMKWa{&Rp&|Jgn$5CH!+=Dbh&vY%%m zXwd2b?tI2qYt+YAl>>BBAIgqKB1JxAnZqx;xf`=D3Dw&}B`1Dj?zs=l;UcP2MBgq^ z8}r>sT7djq(PY$Jh{%y#mq{qu5{aI%P7u%b+&#@pIwM4h<>+fXeaa_Xij;7e*;6I& zOLk1(cG=de3OUJAIoQT~uU_XMzD44K-wfCV=bC+&PuX#q*0apE_2~z^n&T5;XULX; z$HwrSt074o@+Z ?Yk-2LTcEveL?wr=YeQsRA5KKry(=zm4 zKbe9E0Y3QDT^Hx52kwJ~`s%efW8@ z_fcgSvkQJ2czntddjTxLsV9)pUVe$6?g%IQN+1-3j+f}E-tWR3r{}qV{nwiWy<=x@ zr}N~+%8R9o9*`Z?USDr8C%605xPs#B4vn(4+4O4cY5S6nsv5>7ufse#aK;&*CVWdg zG>_dNNjt~xq!D^Qcs~)Sao?Ac`0gamJ^4R(#|H*pYRc@GD7@PVHjJJRf5ejcF-uny zw>_9($MD_*?n%f);(yM61`Z!B2&M>1Na$B63$q4!`2&g8)RfmgczjI}_X!c#Zebk&2;2q!Hio(NK!no=XtoQ<5SH^JBni@0Zu+UBt=B#KO0F z=6;U_ZYt%%Z%RG1j2h6c5(%@8*Y%jFJ1+bW-5Yidnl6o#q&6%Phy0`7m1?R1TqBLs zjt$ylzHk7?=WOmQB&9LWUsw2;d2WPf-hF{w+B={m@lKswSdD8p7e~9f;NCb!{9@;* z^Nda+X1+zh>@w^5+6$AjYjj>`ic=FWY^SlZ&}lat3|na&aP6h>%37`*qJP+i+v;i5 zr*gHfzyFUV<-8LdoKtz^`Rx3nBfV*@jeJAT#c9tl+)rGwEFkUV%D#U8TG?pPx9ziE z=VB1pAlhpcUCNXlG)(u*&_GKQLE>x3K7^n;!B}PTb8QPbOX9QO;1bE;+@c7?`TzRz zm0}A4zKF+?vYsP6<$0K#^vRExC$FNd3j$3@Imdn!nQJwm<8I@_j{Gz|x{O(tAfy!1 zC!-^cS#JsO^ZdZAiSQrDPZjoE0zOOa8%->gC^US18kysmHhs+sMxP{j{4~ikasK|$ z-*#Qg1u9&FlsE>MinFY>4DhL*53*P!r_qmRABXxM?Vkn&`!6yAu5TtGg?Os$BW#+Q zO(>>-afWMA0Ug{~8z5v+r3Dtl7F3v=R0JiytUfa@E&Vi8W5d0;#dT|4!K4L!TD*2%OuS7T0Y7rYe%L9SFakecS8|(SO0{#| z4k4F(L@s@0J$ukv1Yc6hCmi_M;}yZo4GgKl_qKc1>a$_Yp!KK3n0cr~?|~O!xn#3< zYnx1BHTt{K1)u(Dm;q#MEc0m596?x1kQleCA;Ka$b#O{Yxz}KL>Nq^9KV~P`Y9_C|J-Iz^|0iy88+w7H>i*Gn%xe#PAG+6ccfYA{#KF=GX{A(G#JEV=p=uP zG$bI8U+-aq&tMM=l%|EqqIS)qYEp!OAKgrl+>*kWUu{310KRfK2@G< zcgV{c8uMMf&3&EBAjY|GT!=?^JmVx20HVyc++f@ksI*%gD=+@g@S|&`k!>`LO)1K~ zRV=_4>FGas(HdwnR|@q9A*5>Na$Qy&0c-H>2FB7JI`i>jeJ{h~Cw>0)qXZ9=N<-2o zZiR%Ad>bp&zCg1nwO`Xq!uOIS4;*aCg75@clBm>yT$7%`9Y^~?Jp(Xd!KxAu&h zWIS7$4gSk0QQUht+d3m#q74_L#7Jqn!Azq&=?L3h<)X&17_rzZ-!tYlFfV)?eZ3eI z_7QLU`0>)&euk)%>=|gwXhEPo3>|?wSdz}_AxM0@x_?GnZICe}>63AQVD6!0DoxD8 zz;Ej~=~w0=^O74lNEx11+bJQKwmWd~!aDu-189zFjx7^H$Q}0_M(h)Ir!yF+pOs3S zJo&-F6wpZ?ibh;i6?4<4?qJaSd7+mwu-krMee!{x%1RcAx0!F5wq@E8gQc=x5mpy2^=0kU7eBBD*Ya*$^=hymg3yg z06wS#$Fy*i=ppE^!>-hCSIHRed$K(|F~{&RPMUSOUjl(f_hp$5MA3PKd&L3EaMVi@ zdv(Nzb1EcJE#fr#E5^WG)4lh6J;kZpZ9pNTY!EQH7m2Fx1X(U;f2puudKoZBb*#)3 zZt%uovJG>)fg|MI7fD+4-OK8|E|#{BjuFEv+;t?!cm`syt$lz=aS`cjvdH^J%)kBA zPw1<3`|=kKrZY$RW8zib?4yhKh1&0*%z`|#!~ENoJIc~i)Rz|4LTmM*T*)G znENA1OUBHiB$u6Ow;fH&x7fUndQSG9d|!Ow*~`19nANGJ;10VjkuiUe`aREV^JJp1 z7DoJ!v^7w4?&K?n+msRhp)hkOxNGLZ29J0?wT)VWaG9fh@Jr{#x7&pQ{D;=|FmQf5P4pQDe)F7M>ZvaAMt&jPftjOoE5Y5{K4I323TU!C_TIzG4m*Ayi{T5a^^r>R9fw-P1Jc{{k{7EZ z0*+UP&(R^=Y@bQpTl>R&Qr=o9HLh=Suf@k{kT7lYBGH2rui^=KOPq0qv*{^P7r2gy ziO852D%OVhW^C=t1DMIYVh7+H+Shw0r7b_#sI^3hwASUwiDwEMg$~m*oFoQqN{9e@4*z+4V;9(y`1J zcq|VTWTR!U%SU>gs!Yv?hvBP-s#&<1)CTVLJ^h$+EZNaaY3uJeLz!H)r4w|_h#omY@`Ln_m|ym&d}{v2^I*5ZLP)vuLX3TH5g|Te-ZLvT zjgx^QbrTkxKstaNE8MX7R=ayP=T+)|)F)CyCrECoKnALo7woUGTbL(+?mZ=z#>g9i zFV^Zz`oG6Lbg_sg_LjQb?CcXynWZ`SZU%))m<`r4*O9bI`JP5{o6+Y=i%-o3>P=`H z2D&IwTVK_})BAtNjWC@EQey!Y*cGrzz6^W+>y0d6nM#X&BJZ(SpT-}}uKMlL;!vN6 z5Fbh-refs3wH6viuz&_OLD&0<*~P>Wma{2-DWq{vPiZrs+~)|?86kDB$bQbDC+hoe z9%{?p=uMB{eF?3Fp8HMenl%^TFVZJ$eekR|{rC7+&zPg!c79onGEbB5{{L@%;GLjb z-Yk4_&LwD7H|?+Ry2=3LI^f^g8X!K}!be(0YUz?Hesb@OW=Z3tIu!}o2u_u3$n7Vj zugmai7a)FHLs@yut2LV`3Ke<8Zz`4Ub96tmN8Ap%{~NdUgJLC#PO-rl_w`t2slm5S zWzSzC6R(4s4AxCjenX{ec0Z+M_;lE~URVsVY|G=S_&So3rQGWrKR@6gUR_Hxjsu+R z)jj8PN~#=2rU(43Ck{t@tyXJXyrykmD|U5hF@57Am&Z%Ny!0V%MdWaFM_I3F5y*ir zrxFD$dmRo#dY+}`+qec-Swo-?OipsFJ1Vrvf}JOK9fA zS%C*kXaCxT_9>+cjd)!K!;D<@Xiykwsa|>MyKf_vfA>gZgQL*5X(PTl*D@ zyEPc!rpr1swq8jp@y{wD`Qt8%RCVl!=aD3>NxuC0Z)M7Ei=8-6aq+q}-0rabwz&9! zdi12_!Np+hzKD9@4gn6Q zo^-EypNawLzQ(rWp*;-xggoQXle*U-F;-H{U>+ZXrCW}i8`H-lf_ci-1@9Z#s+-(* z+)EN^1;_GZ3GhpG4-zGXj;2B(P1g;DYk$aMTHf}c(zb<+yl`8Smo)P3oXv7TrKs+F z>z5CR-u7@Xen#wKvn>dqY<|xvfqj@)MdMO%1EpH>iML-Ocq$ikeJ&?`?%b^3_mGrl z&IQ5fkO(rag)~(;x&;IAkaJ7=+4*$-w>TeBM(p%imb*Z8Uc+bivo&gz{vd{OuO9TI z$GKrbbJDe)k-{MVEj#T_u(!i73EXS;o?lzyuIh{Z4S@!o(hq$z3BNRmm_~qcm>?wj z3cAr_rj>UI3jv3n;!63QqRu{1m%y$S^P9A?d2Mdb%s4S`P~Taos~C>d&wZWB6HdJW z(w5vv?XSr2`xv4gQ2JpjIE4_sLCQZ`qNP*`K^Mb!r zw`L~2*8xetzG+`qeJ0r?Ju9P=puWD?ganXMcMGN!Rkd)4@CWWF9<+{ z==E`g$3Hz~_N980KBfqgFGi)Jmy`0PTJa7;&E`!=_S!u-`N8$Ym4ko$CjY@G`8MGR z+XerN8~@_nkkc5RSX|H|;Z-0rN^bcT$JIZ~2W*f((DN8LsNFB{I_CQMd+c{nxiXCS zedCNr=@tQ@E!v%7B(2&PaRWlHYPaUqa!gmGEG7)SPC{%$lzM&l#1ez{BNubCp$bu4 z@g@qDASSot=PrKV>JeX|4eS?y4?F?Bm*|qb*dbiWcVn#A25f*Ty>@ocLV3gy6zWX{ z_j}Da==hj)gCXPG;8341N{w(I2B-5>tBkQOi~f{-flM&{{F;=b-zaYzW;dC!P%Y)u z2&d37RS+$Py|+1%2V!5@Z=B($H=+@C54=K1z17VF8M16~<)zc|Ef*ur!X(mM8mM2_ zV#sD-CZnBM7R6<^CnA}o_{?Z+MRa7!nlpfj;oSf@kxc|P-ZFH&=*b+6X&>1vluiv4G$bs^JjsHL~=AT1ALw1 zj${J=sWL930hR)u@;I42snRhI-LMIt+@ck zrp>x}*-P-hdXt!@v_Tg?4kR+zdpoV*{EGMM)D#AASv)T-`4=e8R9=BA8nh{2%7HPT zglBH9z9ZAqFoxORpgBBpsYh{7q&7=A^amHO80~gBXGV*jRNL;Zg2?8(?a=V6uZ3p8 z%?+Nr%T~wg9Db(Hh@CTczA9{y?}%&lsoG^|7LJn*!*s~=wTqMO_1);FT2G{w8kqfe z{83W2mWY{-`PgT$%7hVBq_aM+N8=Wh-V-PXd= zdTq}V`mP9=e>>Lvlz3pKq|T@Q@L~4K7Gu+{c=JA3%U8qa9|=ud5r)}i7P0ML*anWU z7YWlINN~ z?rCncKG)87R(~c#{wQhTA>;##l!Zjy&n6Oj(ikOE1lH3x%Iooc%${lBB02N6($)UQ zDku@|&926S5c%PNmX+8kN~mkpF+U)0K-SFbAt;7_?fTzFWgh?fv1iPo8!1ENF@Yv9 zoOO!MFqsS6wK?dQM}<0;e4Ct8s^6-2;?aAsQa|@o7sz_4`Mzqcl>m#b;8&Yx$reY| z6O4kM5j+>WBKnaj8`KmG{WuH|eAiNGu!Vl2vTEL$w6{ij@`3rR-2+QGU=ru*K|VP; z=!b7qGnlYTf4@=x`^W`?#2v~m{baxV&yt(8)X|;%q%$KUh|mR_*Gb(BA~FVZ>ad-r zBgmINQ(st{x&TVPvkQQt1WFD24MGO%z!L&P{%~SQ=f1(0Q>XsGMfnNJd113@Ha<}Y zLMJx}?NC*;bo%wRUzT1ftPW@S9c@AgG;MrE$hz1oBdCE!gaD7{cPo?2C&9W_awgp^ zCVqp?JA@pBc}bH{JHN&kwv(OYzRKF-ak#CQbc8~nSN)q0{M=+RCy0zRAOK1c-rOQz zhp-6dDhMoXH~%+?HjKY=#5uV8a500JUs9vNT^a#e7MVy)^*Oz`_LZd~lQaQQ%X<`$ zy6AB^T#gdZc)+*C^*wO45q=i#auCQMNz{O-)y!xy+JmnQgniwwZM2bp8Ddgz-eZw7 zFc{Irkv1yJ*OYDd+Vj|NFO3tb9w|_DRu9bV>*`|*0~i4LTT~9{&yn34`6yKAC zBk>Gwj*JI)qV;Qb$i+pifMNI<4 zkVV#*lvqn`FEEsjMBHBYSg|~~aUb*zyA{l=bRPELF-sk?omzE8irxqDd6XcPa07u+_nZ2XRnST1aV$8EiFyE=rWR6eZVQ{$ROcxPQ6 zcIA-7Ah@Zlnh8DOZ;mET+El%JG0brL-|>t9`*_H{2*AJ^8ddh4!aW2 z0%hI@Dx8B3(H|C@C~&>ez7>3e*eIbkd`!A6@{bQJ*~6}@RCN^)m&Rs5&RZ?sNX;x{ zbOkQn-9wyS^Rd*utiOAGaZ-&w+vT7)uq-s!dN#OITx+0RZM9NaFgLmKB!5kSWcwIE zzvhR3l;>SVbqb4KQ98kjFj(U8Ax#*%o#WP+x*Qi0_YDl$suCJ51MIPETt$U2vB+ zKjLpX|JYlzs3MNrnwRQXz~I?wI&Zr4dPg=4Xw6Zb_g*ReR-wlxdh#+91s(uXtfU1Xv)0&T{c15*8^!aoo+H244FSU~FV7iT zuUX#kY<<*23fk83Ole4-Ml(NP=#OQFWYNV>W_KCw*$_sqr8vRke2%0K!XxTELK)l# zgXSCNvww=%JurV-BK>U64kfkY*vq+dmUj4FrK9bF;F!uIprQA!e#y zmrjVCDI*i>UMUW@EZH^_>pLd0#EW7|WEU9Y@QNCybXED?eC;E$_qE zN+ddZfLc_McW)otb;k>;s*IhfN+mjWVxw#b#@7{bUOpcu5UhmM0zm*%{|jHu)h|q~ z5wM$$g|nIn*bZbQ-7xVM@2Z?^Ds;>e=X~r9)-NCO!LFCvebvIVlVGitL+LRokAVEk zYe8tf{`vjEG<9M0BD!YnH32?hX?uU#UZXLb^>BU0$qLy2q8%jcQ;6Ii9)teN*poux z2ey*Nlh=7=3(Ve9*UgDL`+lD;q*cI~(wZ5Mpqbei6#Aa@pRjWZW&vsb*?szR%7A5! zt1qAfj)lB)>2~kH1DuthSX%OER?#aY8ItQxe4&AQMbAA7hEPBkNNqFi&Kxj;orQM0 zS;9GwHu@3)UE0NrQ?nQyeloDL02Q1PAlD1Pdf1!pHTnqb9w6+69GOU-s8Uh*j|MvS}|!4<|AG~F^ahE#V-eYh^T zmCEX)*%yC4T{yJZXfj@QPzya^GA&XHKx44VI*HXHQ^%`SN*3rwmaVIbI~)>-6)ooI z>1sPa7%}}D3TZDJaw5mM+?-<%XhrSTww+_V29>{Uq013Ky}t3Betw^(`S>hncq`&) zxf@?36|Bv{Pn_#G!hc_h+0WnEtGB}PCuA>)_PS;c07fUPOv0&h$CdMkW{sV8&oUO9h1`ry4Wx01zLVpF?_QWWWd3;O=KyZJ0BSsq zV^`A+jFjnl+vLA3D2o%@rL(M{$Q;8N;p>_D)nUhqO{CM+i92;V*tcQy*S#+J56PD; z?xk=ZpSoYT@C(KazRKU8lMPvwAo$j7Eg#rqFR*7e*p>~ccPy5$YF;ZXm)6Hr5~Y)74bGHh^H*E8sWoa(Kvr#ZJS z(+S4AGt)ziw~Bxwn(~GRC*FTJ+3w1*B(qKufW2eRdzTg=45KV5@+YjYmo=+(*ojho zBqWp%u!Ap9(Wer?*C{d31NuRB;D52Xq1PPk8K*Jt8wT7~*ltaVJ92z308Dyovfj}! z07I52y8{R<4ktp0xA5_G3f?K-CXc#}p7&a4It1olbw*10HuSbic<1%-6I-H$w0NP6 zZsD6*YYxLoqZ#16PXyOL=c335PBb?))S>41<6xk~_3L%&d7w^_G2Cc)dortO?Hz8z zr4RfWk_)8E=S{Z1_ySwMH(j`zP5J+@b{$YnZCRTh6d@D^5hXOGh=3qfNP?h%R6#{L zN)ZL5_Y$fIB3+P<(nP8ty#%Cp73nR~A#?~WB>Xq{-prddugtvp*Imn)8wmIAv&*-? zz4y7C;rfGeBc-sL_m8&J6BM`w?vIfA^-5w-RnAON;a%%aQ83*z> zFTL#YNjL3-?F**`=t~NGSs4r{G!q3X^Q!rEw@dR|x&U~>*9+UtK(17=xVM zJ;FFIRydY(t<64BE#B|IdkB!MC#!k^aJ`;N0z?v)N5=H{9liOpDAUZ#$HLET8^7< z>nwT9Eq$|UFdHO3{ur`oZ^(2cEjJEczpP!bP4WTv%8e@iBxM=8&uy*F%Pet8v_>14 zL$#U3V^`yR{V9~1PoAl@zN}riS-eXja%Oa+oy5-?X@DqyrrrLm`ljDFbbp4yZL^*E zs_f^36n|!+#@10cs}zf)7Y|8iftJV;j?MWaJon?|{^m}@S(`3BMMuvfm0}#XlZ0$R zwZJ)PnKtq&9X7Z``h!#`=`7HxPL(pC2mQ&wTkN3vq*6E$hZ{jgY9c*PvOgSgCvG3O zW2)|iS&n}b{Pv;yL@@wBY-ENpdOu9Debkg{&Na!1&j#h46RPJtayw&j?Ab0-tTbRy zHT&NERV9-<)jjL5w_h?@Ga30Fd&M=4huP20cKLCJ4rLG+^25vdpjS&>=HeRi$ndk+ zn;ajM@dZhg4=y?Powe!q!q;y@W{%Qt<*peod~D5jXP#c+SlKfrZYZ1VD7PB#*q@TH z!PvybYl;+GM%b$Ews|D`$v&)Ng&LNVXUk)EEi{Z)E9ZOOw7w5VI{2xivOJhrwgiaf$sJ4$of4Sb$|+#U1b9-l{Rpnr&y$%nym z9CykW=o3Ul&jv7arme!*VVp`Q4FIH%K>IP$o1)UqhNm5~yE@MKB$W=Tu#Qpg}7JpLnm~%}ujvr^x63RMz zCFn@W`4GkKpQ+jW;;8Njxi-6cBJ$vCnZb3*xdoHMJ|IWYjpf&wDhy(ZN}lXE<8*XI z92_Y}10A|PjWau3a~2{rRmBit&*8Y%Xs%Q zJtfVwZ5=&P_V+YFWRI7+FHutpe5T4^pw*A)gE)Qv`mN53t1lP7K34MhF#NN79m?Y? zMyFSX+oop!xxj%|aT*8-w|MK6IGy2?dmleQ$9bzFtpx@Y;>^EQZn@!i#3x<*$Z_)i zm#q~D(zDqjrICjBNNri!+#|iTJ)$myxn|?FxYnsJ)}xnaF;;W?HJ^ZSW+lJ=C!x8G zjs`z}ik;Vla`GfF(1f%fkTjq--`l2|y<#EM20W;f>m}0>zxd8FCySUBCDX=$X#F;T zh%#;)(#Ct=zn~^HPHt1U*@>QB62-n%17NeF0CI{Ck$OeN%dwb>H3yxKpD`@iax<`w zTdr|1aQj03sXKM|PLI`Ljn(RT8koi7{ZvFpPIj_c z_4FL*d{N<;*rjt^4qI1DX7-s3LoPE-2sv+e+oOE39*Lqudre;+Tg|T3S=L;8LVu!7 zsb#Xm-2R!bMS^Nx5d%%w#d817yAzFO{N;-UCtTXWN4to!rQUKoW@npP!vxWf zI+#AEI(J87!-&L`L$$a-e8=U2Z9@fvfoc~)*o*Gbvj^FB(=D2fwMT9cub3~&80SgV zRR`IiGaWaD&6a+It9j|KR=Q{|s03>Vfms&Fv6J6(Z8}s&AE&rg>3c6Wda0@nFHL$uusTnPzUQaEBSwu^8F2#yb)ZX_$_~i=Gw@iv%DP2Z zPlNit_^CZ6G~;c#)VFJH2cHy7`&v}(7iY)KfXc3a+}RROY{k{$$vRb>wSi#WjXK=U zb%X)a_eJF4re&9y9wDWSw;hz2oQ8O=zum6LNnPo2g)F2>G*X#$Hztrj$Og~%L{~z* z&MTz9$4;xgX`pUSW`7_kde%4>t={cG5+JPiGV*6sXqoh%S*sD*|ld%G?Kc?GYItFaTUu>_iSciAwAFTS_ zRJbIg{^7#)s*m%AuXdxt=e)R%PH!%riRPZTLOu5ClGGKPSB|3;=Rqyk-iSD+Ms|dC za%9pG7$faC!cBtlRRUzBev|HK>%~!+yNO&83TV*c1KI&L#c|6iH|)YZ&IPJE%yn&I zE92&Bru^Eck;uF6C;6Cqe%%W|LpDpLYINV~@mi6*NfQcV@m5}-bnrX3<{|nlg^iCg zH``E{IwP$W*2q@8ZODbi>s45C&#THiUpvg~4ZmOYH&C|Q{kS95EZQq?q^UpH;`SF_ zhltrPhm`!1##>#txCDioQw<5c6%|>=wE5gN`qACFfnzn86FbMH}ljJu-q)2B%6 zf$%f4v>`Hmf>^kgcO2F9Nh8-*E?EsnxnYH-$ z6d#Pg4M)HVJ8y1|+O(G>kgKQpS}4qzG2%OAI4Xhq=_c@~GxC>;`MTFDP~Q^G;tZTO z$`J>{Oy6)HEKu>Q728o%v&$tt>zyiI%O@(aDZzO6wM6~rEz`OO=7gJV1|<(%%5inWB*-aI!)uC&mwrSDa_dXRXywe*=(Yn1`lV2};W z*tx~``!$eFgna=XlSG>bX#ci?naA!~%Vb7@gIhpqaF*$XEs7Z1zW&fZ(|-0g{>-Gq znmm*3UI3ESP~xqZUggGf%fs$PXl$KY@`x}dlGnTX3;#nV$6D!2{uqF-h1aNofhELj}7)g zIrKes;HLF_3A#tu2vL*1YV> z@L>jn!5qetGi4BEipetPIfeb;loI@{M9nZGM#1wD z)@^BY1P%$S;wWq2)h#+co53)O9R+gCE8I!0ACLu9J1Iuq7$;_7u5*t0SQ`8Ir#7sT zW4#nP!@lZLmFcN&B7I(Ib>?=efM;oLe#_OvLQ_F)PN}ww+1i?UR=p-LUa-vQP$h4& zJdW;16rlh2#urZ1VHeK+{b3VCzR!7{hG<4p1%DQ}9ZKRUFrm*H9 zgiz8LUG5e+Lwh*Md4x8L8%$hb^)xQ#*DK?nbK=u3;P*PfIMIZT1#FI>3)`uBVQX=f z3sEmK?=;@RS5Yvk#|ujeyw|odf718i6~E3wqUX%B(pa>c6 z7XDN#w}lA2{R|>(z1IOx9@hqj;b)iz6CGQ^tM57YvYLi5kwQ&!dqr<8+7;0y2vX?yXmXY`ac*$wYvPX&cD}2U(2pIQd zrN|V-Ia`koeGngn^ou&^sYz1KMrU#9@BhMi6 z9d%0hDwn&cAN6F)wmwew^43Q^y%!hg&=T63VV@a-U-ZVUXzNt0voLWL#VvoDR>SX{ zxvbAN!uu8e$zGsjjuWgNHFEuCr^Sy+{V#31N4x%c_<1A2H-&c7GMPzK@Q+HQ)IG4H zsW+ja=jx?#!3udV#33T)HPX0c2Yi`%_fBLYur0`-ldPRitdPNEkzIP8L5Uhut&&KA z!LZCZ(_9uChlci7o2&*GcxRVBnJ}6bHjs!J__vE#sm9Mv1LufhVBgppxz<@KTCk{C3}?;a0oR zPA>|vC-Ao6`4kAl-AC9ElV=U2vFsH^IAt1|uzgqe>bw`C21T{1K^$AHA9UXS z1<)3;i2qo&UQeo}pZ1hXuE4s)t@LSsQaT7&GsNDHApNv2*%S8TQckA8_+{p!W<}fh zGl#9ETw^<42l&raoXn_Pw**4Z8hY!l@5Dpsq|YxY^!gDF2R%YpZ+k`F{A zLD}-M*;Ply%GFh&+jq)86vA*bTJ6pgR%JFKFt&T0VYMr2@%!5%RST-|%it8mGjkP< zp|MIxJGIQgkh0s>w=a*$Al5ca+2XC9wF{Q}0Be&G%MxT^K8d1c)_pe9ojx1L^mvrr z{?INzBaUjQOi2b0B-))Pcfb3}u&gi-maA4^4c(SEnD6Tg4}%W4uVj&LtxLt_$VhbX zybo9IXlxF6E3d{vLpf2zTboD;s=k_Uc2~VMnmgIExLT5*-t(!AN^24fx1LV5JkC?y zpUXX3$+BpbAm)|Og6eTTr->ffV}Dd+-2BJTRe7n`0^2OxqHOT z6BN7Zmq`2w-C2U8wG)aKeTA!Y6%g-3LvIJ2>T(BcxGixR)A6^DpRG-BX`cNehgxnpMBwtIJhH)VN=G>S1 zEb(Z=)6Ht>?r8GPVFt~rc_jLsIQfz)hZK$8Hby)pI_9x$-V<&$ZlHx}>Fh4kuFm0# zpC9`B#GTJxU5`@{yyIi9&NnOqRD#>^vUTF1wpV4FN7e1V+C*1z+7W0XH&BuEQde^j zZ(1p;J+n23tcI$$j`WYcN%yWD$6c-THma&Os~Pl?B5$pzT3ayTBp#aR`2yQFqa2t5 zS0z$5`u4-BP?p`mj+U5NQOjqSsaaNkAdrsj0H8vyz*+*?0WJA zzh3o{#ezFpkGy!FLU+X}b{jsj=GU&LLBFtCfNv2&P^6y5`Mw(@P+%^M2y&L#(+9ln zTT0PFct`k*Y@HZNdB7$pXFG8zxe}5U#qkK{sgG+DNadX6GpkmJv03-?Qx%9$i3>c{ zaF7xXtII#{vHX#D5-%K=b9eM!ehno2B|WWYy)UT3C&xnUeZ0uy4j{8-*geSCEsqq- z*TarJ?oPO9Q?pTpG>q^;Jr{Vt8l^0N3GXMzJv<7#{w#c$QNI!xi};pd$AIYT=+D*P z`7DcfU%y%o6q}fIA%hK6fN+=$T~p#NTs|uwxqR)Q5sw+%4ed-an-y(bo>AI8fF+FX zp!7>LouO+SFiSIABZ-%`wnnZx{vm>upY-&ftKb^EmeREzg%F68&X((alZ@W{#Xz&u zXMhE8kALoe6Tq7h3+ZpEgjE5;S9)(feS&?~;QrUhMv(H>94N~vWOkKqAlZOu8}MFQ zv1@kIm~V9k*B%_ciM5_C#$-y#N-S{R(V=#PW^a)T(9R%&otZkts8}DAfAf1sg0m(* z7+l^QmXKIK(QoqwE`VI#tdHHh*&T*$42RvGt|8icM@E)wc~Fs-d#?Y3haj+%5K`be zs=h+%!-`3Lny)ueoMEH!Je;cTHusnAiC_wS8Wu6>OY8SZYY(=UE<|q(cA)ibPA&Fj z+t@}+&lL~#_I5r(?C2!xn{O9f#r7G*i`&!ztzd$Mt|m-EQqEoP-w=)5op;;&+FHZ^ zxxGUl0qo-UD9E|p*<;k7^(150UBvnnRd3TaX+q?t?x0IRHsEF~Gc&^{EHP~~< zJdgLrQFM5$DfqElBh9_e!=8Ogot-G-PMGwVaH&`O{ls!m*`g!H{_^&P@k_@cz4-U zb>aEV6$VT@;~9NKg4H*oabQO&4Z@Ugp&~$tP{Zq7KCjrxHU#(NPUC<+p@!awB!FA4 zm7Fc8ZrN97RC1(lum*bdRG-saU>=JZu+M5ab@1q7e`^+|<)lvAnzBAz2mzz=Oxv^Yz`zp-$bLx&x{l*mx=m?ei83D_{eet${eS+9XckaF1V44;jJiX~z zzbmKUga29x-lGFLat;-bP?*q21mS5)w^y-R18P9~Y}(F8O5ej16$d(XPiI=ciyr0e z__1%o-lH!15jU(M_PiU^X{81|#PUXt@FaQQ#YDi!xV81|GLhJQrqsB>3si~1XK3^0 z`+Pn@Ydv02h+aH-YM{&X+;`xCQC|XlYn)Z328jfzL%*riHYoX%dE3PY2_iQ~V&)~5 z+Kv>Vj*@M#-3$>|G~tq2xWl7k;SN8LaINJXxEtqyxz zVSdQiT>eT1Gc9s&nJQjvBSz!De?tcWJ#OM`Ip62(Q2kM06kcs8HAL}*xj`&9%5DzMd0sN3CeN&ktyOBChAz_Yfm(0zp&$GQU2ht_9iPc z(Q0gk9Yc+=g^L;mU3*07h?zI@Wk7BMX!cTqi?mN$7h#q}^aBmM^lXJKmx z^mfMhCqTGn9UfiyfPLNkxsa(tL?ktKwlfZW3Gvix%rk0 zjhSfmb_0ghzIzBhD|k^AcC4q}W~y@G-PY{gxGU99`#bQK0riOq9i1`p;N zFjR~9TLR=q{56Xr1&ZFYlJ~+{zbcqKUYfb5|2zxWrAIlr>N?N|g6PmT5^#*y&^BAz zHrtf$|DYY`Pa~mGho7_D2O0@Oa58uEb??#0dPb z`G=&*!6Z3^$OH$C-m~CO!tD}S72#d#304fPBGD^9l}hWY2iTwR+fn--$LWN z_0BH{-&?zQFZW&~%s>nU%(HH>rq@UowEBVGG5b*=&PYh4Btu6GF4O>(&+4>6{r#m} z!CA22bq~+s{3pu52EjE}4G!}?PU7oU{5$2|wlDq1=8LLm|RXmAb_9c1;fj>ht}mw%v8SufSoN-|=vp8i}cOIn$PC_0D3mgjpEoQziCPZPo2F zmpceeoz)d$8nw4X_pycdHg~SC?ZxwIA^jH&rw!9@IqhTOE(#w_t4C4LD};v_@I8f` z^(gDgf9(vmbVjxBs<(}?sM%+E?ZB?JrxYV~Qa#heiS8Hb8eCJ{2EbQzrgB^criJg` z2QHxf(M3G4e{)sa2#pMZW2WUQpkXK@eil;$WTsByOv}7mbG^~`azpSk01ssHaolBe zoXXUDy4&UO)R)DY@L@qHx&Y~?y`33n82N+KX2Z{Scf36Q{Ob7?atXmh-3hl{+^5z~ z0$;vdtu8X`T%EAF>ao_T;4shhA-ot317=!lj5p7X-gQXr7j|142cP_IH0&mPb+FEI z?b0h#pvL=J)N-H_zpDjI5DQY>brH6^u%DcsfgL(5$WFtG49A_`F69OPMI*u{C;U0; zkn$spotT=U$1eQ#z!OXdl0LTmdkfG7rkHp?U&Ou&kc-+}SNt?N$aE$ki42$wh9{8m zyQZ}<`;DW2psu~uIz>3=1J_w!j|pZNqrM3uE08LA2}}(bttQzb%+))F*)&whfROr2 zu-8#3YM9vRp)GZ6lHjfM1O4rG#F^0_I}lA ze@3Z9^kU)=(2omRF;U)W+$nXAf_aDc-m+*QO<x6$u?qAoty2%v3#BudtWrca$9LuKqC(#4JOP^ z6uem>C$-cNVgLrMee;ujC}=F;>+b{^o8A`rpn$HfmX()QJP(dS{G*LcMabBS?lJHr z1yRizk%k7nB6FZT4fd5MZfM@RopgreBiXk#<&%R5d81d;``B{`#H+HPb$VdpKo(0` z9|1+WRcZ0Fm4qVPgCKxvD(a^#swehp%pvg; zdCmS{ZSSMXiUhE4rg_aBN;n%iv2+p%O*NYa#R*zkeG1s8E2yZx73DTs{XHb-Y+R)e z?l3tD+4rsPcSak36&(>U0U5Xp_dC^~S1%}06G+b)VbBA}EoP-Q#r(uXg`He*t(YJg zpyX+vXeohR6GbymQmUn}SD@A?Kuk%@3cwt@-X&t`8Z+;Ln%w+EBH{+U-Fc+s1a59g zS*+D#R`ky`Z%FtAP);-{(HFc~Fq2taM(Z&!&b_L6y^+(tQ#RxvLQ7KN8jq^KuPD!( z6MzZS56H8fmq=Taae^03NJLiRZ`yR!H|S}-ny$S;MDmb?d}sak{KMMJYt4IQY}gIk zJ%)!DNaESya9eW;``B&D(-nd`rIARcjosqVFz!99$pq(^@HMDEw=|V-N_L%w(R;yo zthb8CJDIO37V7Z_9xTuX?me6j;KEyF? z&D!(Dc+|SbyH&zg!PoBNK4VW%>j1aqBP&3Y%I=c1lYq8yS*Iw=x?-4@A;q0$FU{Y7 zR}v|;Y;nFs*o{C>%9q1K!hd>edDGQZxzMHt2HZg>m&QUJ*Bkbs7K~W0@zIo-eDT&u z(pGgx1qP41B9I>5PnDu#6=-ci^=vxkJBa;NDP&q$=j zB?4{v{-BVDpBtKqUTXmhof%8SDo_fcW+F5=r*+Ztyz)|t2ez01KKc`MqQOV76FyjM zvB#ZR{pTHd_Bw{nx4SjtEr2ICMCsRxaI`mh?-9lcl(tdyrq=kKk6!u7@<-~T)?q+S z#d?0(KJc}llwYIm17Vm3Ok{ffkfb68#+r5E8w~?Py$X)HJfP!0*G_5rPH?#B zDFZ@bdhNyGh1W>#>0_5kuzT0vI)Q(@f#+-i)wXEdji#L&3~e2Z?Kz4Xwr`2;*?_;! zcOrvDB8@542ySavCwpE$$x%`q@KTZ|P(QkrQU?j_c##<`&tCWVnQORf!xn-u|R_{|m6I&&1TSGA6RxVMV=CXww2_k(^k2-0ST;}gNJI9)8 zhnk)oKsPkp_60lwPbYA@I{57hXMzrA`ok|_>4nL*0`fUv4__h`7&|99_w`*#jO~tU z1GlCT(*BQs-x>OTTO}4Iiwm2~EbGc`qvzGWs#n}mNiGgNyx>02P+4MDr#*27xVvV0 z9t)I@9{|oPf6>GTN;~CClglS1 zD&+Z22?F+Ennc9fBcZ8A3CP(vms5k^tNp2X*8G~q#e-UkiGsz=UA>VD6K^m443dAa z;t}DX8_a(BEnHZy7r4lCAsVrTE_MJ4yn=@Yt#rrvAg&nlcaF0J1yM@_H$X&U7VUF9 zhvdO>aVXVQupND7aZSrR%K(a`Bwk{`@a|#hF}??Iuc8Td{O--;i9=be5Xl}9Ky+GC zZ^s?~=O^P?4;nak77{}Oj6*kIXGffb!uL+lG_O{fUQ;@|BV;TQve>IWEmu7v;UrOubV&e+Z#` zik4Q*3SDP@)0fp>v6LyQnt2d(o(t!pjR|!)WpltN7+v7yHTb z1V5f?aQ>M27WNhCGSY;J>6$D-U0{cMrPuv{l*n$oFpb{8rU9`mPxK3Pofr!6+Vzwu zQcoXno~RJv9TBkHoB+!$o8<+6ZhZ0Sm6vWAfz|>v0$R4tVN@^L>>$C39;q*f!zBsw zxDo(=(oTCm?oQVR=GhkjuPCZcTLa40PKo+qYWhoLSfn2UGKFsJZhAMy<+5R1Z!(f| z%f;~zmyH zCN*wRy82qI>MDC2akz;;SvU`hi|lAN1J?BkU6IPl{fbWqLoB3dReRSVDBm0*klv;y z271*Fx8$F!sGmAF$b_Bea;^8@v2^CaAyU(7zZpI${G?5g12ME{0Dkm}@Sl3nKPv)+ zcYAD0{&^#%yMtnMZ$7JX6?N*FBCHtv)cJ`CgS+pChDPXMWDgPi#K1g$&3>#=Xd~xr+GD%1*6XW zMC4^JhRlwwlkTs-aH8rMgj-XJh1)tAUx0oOe=qz5{0THdy3+9Lsy9up{?rHy$Uklm zPY-ATpcA1)gxjpwC7mX`14Qr-e-QdpAiq@}p7-D>5wf6OS~C`%M~FN-6oazQfh>-4 zNAY7R=fs!g@QZNlTYzhQG%9cV>iDk6^yHU?e`Xif3=N_`@5+kqW9LsXm8p^q5+K)z zLeb48fxcrg4T;g|ryq-{fsw9Sng4(5J>sRQe@!4Cayb15%`Ez zB-_Aa#eZ`Ig@Vq4g;yPKTh+9a1_k>l?2AoR{m=voH)MxT5M4tdDzOpaM@cN(54{32Bzc=NVP!=;2SRI`(WRGa|lEKN_PL0u5T0zFfBLq(fk-w z{Anuj`zRZ(b1~vx^$v06Dm0*tS`ruoS|Y;u?x0qc-?F0QJQc6kle+$kW&Fo)V(180 z=B4qs#mSRP1_XiX?`b*$H?6*%dixXm{RoW{ioW*K)&O_5umm5Ih{F2<2Ya2t8^PmP z2wG|U-yLJ-2-po^ov73d0~;Z{=wLZ76baJ^9c#ni6$px08UQ0>q;A9wt4_!03_v&k zy{`ERE7X&dK^CGU=2c^P-e4$+UFGQ&v>@-nw|xM!e6hnwh)95gFis~0EEX`)e*+tT z)PC?2j30;Vbziz;r~AOt*fW9{0!(D;CO`PwN*2#ao5)_#WdLP5^*40%7%t~0!U5ph zDcU6+<$oS<^7_l)NtcK*KF)tidJ_F6%Km#a$eaYgG>YFPxw4>6IEeIr`!a%=_!iF+ z3$nSQ)uOtp31ITi-No6YIdhXqyli$CVO#gk^Y1t$0mG8(_LQgThNktD_gH9%L_#Twatr!zf z#3%4EEU1m-m6BxL1``f9RT9F5Djpe=EkKwd>$n7@AQ=D!hK(yEy#ZuF2TP#e!#C}6 z*THlw(-5Y=q685R0zwEq&TIbx9sc!X#M&Ovx>Xb(?beiL>*K$}_5b3Xbe!~`8`r8N z=8Y7yvJ-_Xiey1GATdM)^oMvTmLc;~yME)U|F*vuo+lM1!?K5EG0xF7*Avrx&?Pw6 z|HVRLoWZ93PRxFRy*Zx{YWfh|3EYX4&;OG*qj#@2qkNXa@(dE|BT+LxFbrWgmj3Lp zSFdm)03Dc5;Ml)|M}mIMYyPfk{vj#fIJY4O1ffAL2ZJ_8Mh6KEum$LU@yO?EpO;7p2%ant8ZduMU;+%@zh|NUAFDN_@BIRp0ysTrkqX{wvMaRprCPb< zhWtr^SU~>nTKUogj@KZLUE*>?yYQeHnYqkUHo(za067sla)THESb*&0Z!H|7de0Z& zjkow_KfKSs91+0Y7Jrlr{fnp1d@K#rIQ0C?kIALK<>%qU91*1s$&Bu~C;x{?L2;tV zl(MX;xqSkJ=iD6uvz4VzfBHKqaH+9{lAZV7>dOSG-p|||7zUabf<$S=v%TbFUP7cvfVeoNaJaybhDbwD zhGs7Uor?<)&+Ee8S-^=j#Cjn7ND(*|#E;W%0>U$*+zuxk-VQ%OAOHjZ{D(rMDMY;Nd2yF9Dd$zj}iP0>n%d zh1)L)_&hOSu8Uy(3-yGQ{2Ro=kFlk7FXA_r`&V{bq|*xQ2jGm<76M99e|&X+V#jHD z^gxY|ZZT1mBRTY`+J!Op)(iIk{3ldl0nGd#5Gw!TM1F^I2@zs#>!i(lT*Rx9&ae43 zu>^L52qSpTxj`0^0ilKl3VPY3G zCjX=UM|cosD4dFQU4x|E@fJX%5sJ!t?DkknXwy%E#ItQz6;1uCgZ=%M`9BL)ijyD# zx%Gw-^EZDY{y$$eUtz_1D$1Ud0UGmF>&6D)-j&+GV;YbwRKXQM%{;!gwM9<92#v6r z-v65uIsT$w=|UcFfTA5zX*~b;R17HDMZgCHoWKJ~^}fn3xp|F^+;Fcn859U9NqNjA zrpf$6ye{B$y#SDOD1iqe)N)V?p2vaz&j|bD2>7$w^D9Anx)}uQlTpSM`*&rpF-Hj$ z1%oUUMgOND2BbSB#l>9W;@baTDIZYz?=(Iw9`|>-3}Q}T!*qUusB!Cd`=fba;^;vT z=vdb4>^Nc^`+o)#{%PTClN<2L5K*q|N20viZ$nI9=d$SxOSz|{OMt`mF{kMo>8@?SiZq*IsKIOi%E9xU{?Y3#bdf?j|h^%G^zaD zm_=LUKcz8dsEyIAyOT2}|@UYTdGpO}Y3 zix01_g*bvU&;KKj9a;ifoHXZYYXjNbrypL68MzS>-xzctTU}53r|}C#Ed471WSonE z-@hMrH>Y2N&qjNaoOrFMA9RigXb`w*j9Y@uOwJy0AMqdEw-wQ@LRE|Xm3KPfOCF3b1 z^(7+{v)1R%jq_f_8}U>oidE3-rG}pk5~6uK(YLxmM2OpLFn}jLtByJ$9hu5K6wbX) zmhzw6bD0$QZcb^?-Q4QbnV7;THt9%Jpsiw#t;h}sD_)8t7cg0O*;d|G)-yoLpRXyQ z!KAqN?g|6tWyM7xIoQ*aonjmb_kKXCdoucZ|}bGJ)y|;g51^+yxRR%~mtY9{YQIgUKP1=ih_=d98n< z`)QP{Louc8iB@gdI#Zu53r}ocY}rW^R*SXUc#RF^(R2n)O}vmfCo} z%icJ}0sL0=h@j_w9?;uxlPhBO;K%KI&N0?YVsW?o(OF()wppmDbFELRCnKr{aLArl z9=7li2rW5*nF^qMzt(AX&-O^$QF*utEeXwB@!%QM0`~BnBPR2`AfU(00{|87}74jayN%4+`0M>Vzm z&50p}R+x^|&PV9}(Qq|QJx_?h;p#V~8*4IYejyyH`%iA);rF>scjjgU8QEzvSvc)Y z{ud`nzR40(ad41)yOVHj@#g6W`i$E_0oyYOn;rAf6$61E0vPHU^X%mqo4MNLaok*f zR}+-8o>GV;)aMK4NyS9Ehu}tBz(D_1{tJ-dw!Ctr6#5s&}HOipSaiKyU zNh>ez&lN%fbt7_Zv&F2BaGwtXF+^zM0n$9s18@S5jI%I%~GrJMADH=S+2 zb?6QT@pwEBM0b;nmE2}sAtq1TOTN;;N}I46P-Sn=ppV?bG4)BtYL<|@RzBb6S@*tYNL1_%+AFGtwAM^$J>Y$!M4QTp=3yA5 z^!42fk~jWNzI>MWmJgjVnQ##NFI8?r(uG+ z6}yDP!HeEUWWW5%U%aqQazyya-zz?)(la@!9^u>0QN-jXKD+}J5T%BZrug9GI`7RK zej}-e2XR}dg1Ix#r_sr{fb9b zQ(miUH)$a*DYKDo-21oj(yz6Q<~T3oL)#D7YUSeuz^G1Kq<=6p{BhORDAVRy z%^JSbAMP57(+&hZrs)c7%0NHrpor)(aZ_=rc~Ix%5Ij42c`Hz{VKd?USq<8b0arCp zG3H09=uk|{BgJZ?Mas%NrQ*@t59j!1LyK>a05BgdHgb*^LMpmGfI*;67c$e2QqT7@;(JMlsAxEzM6qe%|t%C-0@2 zzscW8{`2??+hDU!Q1X4jNN?wTGH^jgS(Ut@N{lX+o!q- zPkDz14^(w2$~ar(D=vjzG&F~0Pq(gU(F%npo)UpFH*%BDMpF7tL?~2`LqY~EsJ)5b z4pB;S^x)%3C#i&m+BqYAagLj9({Ud+MAAF=u6hUfxEZz;^&cwAU5J3QIISn-Mf%M$ z_jo_mJb&n7Oh=@f+_hYF?dlbpsCD%`55IQ4qExGM*k4u{dV}o@@_UrefS=N&vZk#$ zMj|y%JXLwUq6(tje52@EXLlX?O!n5nooBknsq@smEV1CfbQ1q?BW+vm@F-vMJE9V_ z%g*$NHEWt$eP3?hd&kCpkWC&E_f3FVrJM(3>KkF(V{>*Mr_&^}%}6p4)ml_6%9f|1 zTJDZLG=jiBwVVdu2LOrigFD+ zb>7d^xO$vQ2F3A<{YuNSoe_87YU0g65$Oq$jhw9L->s{HP21aj+H*6?8lW2vX3?X( z)sDfaJrhX1@^HO}ELP;dxW#UFwlkV+Us<^)#wp0t`$s%)h0qCROh?Nfk@opD#LmrQ zc9oxqH4SBXvWmkVEuXmRvn50lP@mty$tT3)WTRKKPyXo1z^S0i+~SR;%8DHFt(!c? zD9A0gR<^W3;=qu8AC5D?)132MW-_w4>I1!~?$~fWUZ_eYX{SwfH`tb>6_<^4vduhM zlP&x5wZ*lhMiToL|9R3*QZ(u0V&mygr&U-)+R2A+G)yuEe0Y<3d1h|@q_nq=2;vM? z!wtuE1p!@&JhpmQo|g|&z|s&(;#g2Ds46EqY@W0ilmgZ~_W)K5HTJ4M@WJ@X`(TLC zqyr$$bFRlHj^EG|4}kK(4ZtAs_~S4q<+99@sM%tQDoj&KoIaK9c5BTVxN;Qh{p9g-i><5L5H&n&$#; z3)B0RazX!7=T8VJY{RiW2&K)6qI^{P$}wk_9k|%jG=p#g!6GO$(k(RtPYbLBhLdB-(oS7|6yD zvAxSN;2csv|Ce%$*GJ=}#9+dFpyPFvu?H5uupx{$eE z(!1q9>o?0mwGG$2QM`A9Oo#H4aswg$)yt5aWPh_9_pxXP(4q zvp0RrkQ4|%L=*DHYXF=Fc>pW&7DAIVo8ux_wn+teE}(Cg^iDq(_0Hil3=Ka6O@Nwu zo`o1fEH>*8?zsuD+Zp5P4}3ACXiyHg3yKE){Ejymzf&dN>&f1V%F4cESk;X(UI`we z`Y;I3>rU~|W}V=Nncf%!TUIr7RaO50vSN+E)O=OB%MDKgTJwK|2g0RwjTy~2TAX2g zV7ulQhtCVY8EuS2VSA@jR}agh6jWt-L)!RL@#G=!jq4C?a+M_!$shEOdg_Ncl^r)N znATcF-DdXN50sLjAMo@0?1kDommwKkQx{UB1ReA&Dfu1@e5JkVyEKcPN>yf;#3!Wt z4jlMyD?HT|ZpwTFb@WxFxOCxj7_i2G^w>P;(jLnOE1tZ>^hl7*TWqYT`S4~How)v> z=}-;+^~SgPZM|wlFZJHXK-6--j{f!E22M#@pmR#Vga;%h9wZuUsL!125PCIP?)RfB zyys@UMdg6{#gVq%!jmTI9^iP$XC)+ZPen2vlZ_L-m7;{Wg z;Psq+zn6DPyh@}G8bUPe!-V!}SrJ}lo(FGR7qsEY+X)=@T=chwJT6|MN<-*|#5cFt zBbL4?;3FUcrv#N_=;Ch*OUV>8%HO}2)xZUSMt-$ssUJ(k6UCp@t>1{9{vm*}9#-{R zYK>_!Qu0U3MIoaEq6iqOD(=G{%45#rB`xLY-Y(A}+xP~&vL53>3^{3Z_`*Drf0vN} zXW<=EF`B(s!DRP%uNp)IPnWTZkjM<7Hl5NST6@SscM0~Jy=G-cB-OLs3^R`DWjyEk z|HYG=ENJlw%3-Kzi&%l#-;TPs&bzpOuO3eY`d6=w^PVGhzoC{5iVy~qT3%oGkZU}xpX(qxbEo4t&da)> zzkFHBdm&#qCW^kqPl&6%afjNXA&HKf()V#$JC);ix+ z@y6nD@m8UW(0Iv2rH&lFSL*rSBI|XZAKlHw zdRIg-y*$kxrU$aOc}J@LXj=fwHp93Se-spetUMH6Io_M@)ltMdiix{GVV$3pRl^PXvELq>4Y!?Sfgea2 z!m@nML76=l%BcttjGBePh4Bw7+*FW~$=};a>0#pYu?X2*KHA1W$9f^_;K7H6Ha+sn zEsH6ZBpc;aglKK+QZ#2gi7@Me^P*TZhYyye$*oh{Zt9alen~sN-zrrl)Sr^B6bgSWl%+o-=3?s0Yjh9WI-am{K+D-~?YF?=wbzu1`lm6Nw~A=B5@>HCh5 zf=vpwO?6Mz;SJ~gjr=Ift$~#Bxhz|&bN*u^h#<#vVs=)~w2)XQkY;%KyfwsyQ(Zgy z;RWX0_@_>H0!Bfl<60WhJj>MYs^fjSHU$nbRlMDyHwSyo7)v zyrsfnfhj`yVB;^I{Ye6mrla*Q+Q2E0>wZgA#Ws-*RzGrp4w2pb4>mwl6-dU6h5`c& z@6zQIm zDcA?4lSend>qv7V)t0(Jm+>xzEqpUzuBlp*;Kz#!dRln-PxaN6qMy>(#rus9ItVe` zb8co%aGqX%S+2qK5EVtfA7aBqy2vUdy>3GPhbLirzo_9}5RcX#@C4GFxD z>@~~DqTaVfA=B7Dh@(jS`ym2oB9VM^Ln`ZXukyrx1}t$YGTakZf8d()|M+^Is43Pw z`*iKL^Xz{~M`x9E)f>Yx{p9pt@@MoOs+)D7gPA0WJu6Z^~U(+^OZiIU6S~Q@2fzGGm-*7PdWq zJ0V^OX-j5h8r4x4zThv!s8eZIz z!=+7bF1qOg7dJyvAUB4diA&1^UT&+>^$aiJnwVRE;t`{ddO9`Kl&6dwBMytEPXp5z z0k%61?w_R!-ZN|+AkXcxr_f}+_E}N+j%!Art6)Qk z@gMXh>$ZmsAZQlNwT5X+*M1fGpQIMn03;}6J{Rr_g> zN)TtQUDeq%&1M*ZJz$9vFa_rT<#sFdrlo)fWI#43bi}w6mY&xo8<6z+ zB#5ucAIY6MbrV`ZHkej?eQxKa5g+IU&;yKdJyG=sr_qT-Sg!PecX_YDvNF*P3zksH zKBZyP>Y+~!i5whkxxg#4SaUjs8gNF#=_t24<<#q-ZR37TKpJPyq6I zYplufSWE$nxuy(UPag0w!>FD8Hi$>jcO;^t2NAD#EkQQ(=J=Y$cxU;iQ`$dd3b;0|0GzN zFr+uU%-o<1Fo=_(1CqU_YOBV7cQL7QUTWVg# zp_mvnww$Ft{j_u~!u4R%L^YODF~_o{l0;gv zN830){E#H!(ZkV5m|mOe5Mf|VlNQ^`LD}HZvz4u+HFr*MHoTO-stEJDRx0SCAF!={ zq{u-xuAr8R11zLEP{r}3XChz5Cqyu1cj6%Nn!sug+V$?m=z&a5AstoFi7o@ zM`O6n)n?{8_woTlkLNWmefYeavPPD-TxscMWF^szGge@gyWj+#lCI3o>X3L-Xl3hP=0A3 zPu2@YD=EF>{+L^=j}#4l3WvzsvR;oVK=Js`1+0@(zH&iJHAWFSHQbt4u<6m4hP zE11i0N<5=`*vkM%i$@o-Tcy7+`DBEjf1n@h6R|1r*_EpCjYy2sw}zjWHmY5#A7=z(MCqu__w?=? zT;zJVYSPP=$H?Qhc-IRhuhl5+@nWkZfZ!nC&7yu@T$$#8?w06J`?!_*Olj+`;CAOv z-fbOLM;hi>R)Ee>zU~ufWC<5$9=CS1ew!XZkil9p_Wg}yl6mq*IVFB&p9nyiR)#Fl zPzv}>${WcpV_{p|P!FIQ(WAIdl~vqvrppyZiD0?^h){`^a|`A8hrW#KAP`yL?JYYH zn6&S;n?Cm}V03rOn#qlZEz||PMGw$0f1{&^<~H|yP){bgd1$1Tt05*wY-+l0vbN8R z>v)7ns-T@*dHZD6(jXdX^%g%!`)wwsqB6a}IfA`oQUEzrDYL}g&5uG}&!iDZl3{j@ z%-uI`wdz69u1H%01rE~+Z;>g%NGus=WTms)i=Y= zQsXMLXZQgB@Ws*~T9pOu@s(hY)~sn%1rvITY)+1WNN~r2q#T3PNXKGKK70*es(W>U zR1Jer#n)+`f}42+ZC;dh3rN59Nu2!t>o_zNUm7XoYj9BZ^0=DMn=e>2xA&qp1}*4g znd&atjHnnC$iyhL5VWlJK;d9s{dczxF3 z)elxi#+#Q|(uSEs#Hv!Uga?BE%jJl*1#FtteyI=0E7OUOX|-U~(RNc6Wg&tk@=9x_ zgUq@zTdq<163R5UrBQV~=CXVy^4!|2%y2|HMcEkt8IPH6I(PUPvJe z{^#!69gfzB2t(YQwYb?JE5b&HIFRTF>DSJ*)1gl%Nh8{hTC``Mu6$m%zx_B0;*1{S z4F*EpQEdMHl(UXWzq`i^4-V58cA9cS?AQ)S$vR9ih5L09U}JY#eWa+K>S+$9xDtAnY2nZ@Px}3zH~MK_PQbo;f_~CA&P2p zr1Z>#X9b$Azr!%Ks|)N zCq0nSkBkTMFpgp(#>o{gPg2A=wJ9d8@HHivsBl|^=3+XROoS8ohd*&v_DF}1oxNc;*rC-(b=xE)EPsxu$B`a3=@d&4G$@n!w} z{B5%I{d(;6jhDZV{)NV!E8giY`MRz+Jq};8gsrVs%wenHiLCA{-+(QUY@s9)d2o+h z2x5<1<6$(%ysu2R1JS`sG?}K$Zs@*Q{RMb3W-6~4BDx2DU7TKKSc)5e>cr4)V4AQ` zfMbc>Ui(x&=cjreb$`r~y};`y3H;+Ze4en)rc3f$eNM^H+r#(l;iBsyku31>(fP7; z#PS>1?VyesRPE_=xO*uvraF#+G7JcA>KlJ*POTyx8rb%U}(1h}=@YA1&Ff&*fySL1|>Lw&058_cMG z&*h5n<+$ed#c$b~w7Jn*-ni=kR8^x#XWMb;=J7*Y7l0#+CzCt;e0vEsx<>ZCl_wRl zsIfN{6mI#y%wX}Pg1Ab%8mzEskL}KOH1`!7hnuoo4ZxRck5OGR(eYNa3w^ zXksFX&_M;N{TR0WFLt)1jbnO_@623iZ++~p-rqqC$%mmCS-bbzHPIFiPLjR|qnPb8 zx82t{LI55f3KJ>@28MUVL3uqzs|Q>=Ie4uDkM45*%}HxW0^@5f{`4vg5J6zJv#&?d zctYmD!sidk+-TnJn|01yZHoyRkiS8rLZE|;wDY*$`>MY?h`L84uKsOst*hM-BaWiH zHD_?G#I8={i`O$ZV%hNSu%!BjF55wDXUO-Cu zIb4dAf&@u{GgOvJ4@3Iw8o9=bloA*C?q>8AYz`^A^ShPncjmyk!x`(fBG{qBtr^bP z`&!j>4kOSSLk@4I8;j|@yJDvetxmj;<^+oKa9X-5+n|e2U=~4nOwmR}#{y#BopZT; zcZ~D`&E9gAj?S<$ZjovhdTp@XtTQEh(D{}v+EL&BbHX7BC-I-@k(8>g-f;^(w`P^| zyA$KO7fj#E)%|Yj?YU@lJ|3yF?5X6^?XuxMV8L+>)11ClHVo4ny`&k~tot9T{I3pw zPZ&!U=!_+HbktIcsrDr4@#?;-i%j_}PJ|0>n@qgAgy`)SQ&#c|)cW zRMJG+pjopOz+2n)lryO zYoBM8rv`>c56HnuaaPk$pN#)ljSu#7)7ibi7qWZxx5jN*@#eC@)yP^?l~b7jh_MIgVif=b zjD8yPNM7Vv`0w75LqZEjw6}lqZ0}tGKqnI7AP07OqQJJ-yiP)EJXFW@IRt?VplWP? z(eH#ZXf~uH!Uf#%SaShB{j!yG&P~VIOw~rF26CXKnXF~Mek z9XBmnOILlUp?9}@KIb#0NdyO54+nqnc~lCPLpj`~q;0Mr`ZO6?Sd{xB@&1cS$&L?t zN0ucAkKAx6rcmhwbZw;~X0yB%U^;gWG<7Yj3+jH`gO)i&ddM%yebz~Sr(Fec@CxlS zFg7~K3S~@QDP)V{PAbJ2AHXj``4);Ghff+zG~+T-*2xu0`}Nv0Zlv5sXh&t<_+u5X z=bLPK4O&83ET$!yIfr@Ota%;svV$=2dIFm|MB#$jDH1)($UmMI9oHu0`3N5*xH*iW)tsZH50j z!-L8v=sQ}}R*_DMRI56s8QxTe_k_NqC_W_{!l%2jlFT|hBUy_6s5=@Ro*worjz!LE z(Cw*{jl%b4#!K2QJ@yQuk+m?^VoQQS8F)S zGm!07ONv8Xt{cQ6hjB^udi>1I%(bIUH=FIOm?+Rt3kps7gs7#1`H3UHZa z1XH|I(#;4IIIN^e%QkC~7_P_m5FmZN|652vbYSySnKyrEaI@9zQT*X#W1R22%wFI_ zOv{sJ9k6n5q1HFVf-x1)bqtN=yoZ(gH8DyrIKGcA4pB5UcrChh;2SQsaVPfWUFn%n zQ*STz9s76qz+3Xs0U#elmn&xO9J^G@scd&Y4 z+r?Zl-7(IjpUwM}(Ymv6IMJbG6`nMlWHhXN*Hrf`i_-q?y6~Z~mf+oC2LGjm&QIfC zi+8J(x8m)8I>2rtYs8F=BhHV@S-ji7F6x|oJNO%01S2k3;+2`hO7Gs(zr9=el5LIM zMNJ_09y07zcwBL3?r8yRNCrAM;Qti92KGEiM3^o}IO#-Z`gt`aZ^Tlc9MjAob9_Kt zh|_$Qcak5BiXW3o6T_I4vBs>PRcW5dD^RCV3695~``^?1hv~i6+8*ftrEh`&Nncug z0{jX~N!R*PMuXQ1rFXM=|7O>mI`e&vHIbi_W>^A1K?Z+)fK6BUs`KxJ=}I) z14y{E(1sJM01-9p4u>{e1qG~M-%L+v@SXY(8CBP+PHu9G?9fAYFx=11-+X<&vmA9~ zL2CIDjg7wjMTtnfnLz3^dzU8YH^I|fF`juy{&R-^al2l)5(3*D#<$->DS3TV$5-xCGJ^dUx3m0PzAQ0r zC0j6x7OlzE(U{Yz)PW$MI1fWK6!ZnpoCgj+5_8C<;enf@7__3M)w-S z#la?#L+m#rZj?TD^fCAlzneSGIbw_Ke;oQ*d`~5ya&0Lv{zKh#+YLx*XR#$L7gNyE z2Q{I{_jCBjo<9pcsWNBc?&acpYASl!lw}G{YA=QeJ}i}Bd4`NgG*F#jLSTfbm0K-N z?Xo$=fpspyh-dY7IK}WUIn;& z_>X^et<}5kkc8m142Bq<@2Pe_jb0cI)i%X`PUr_6>IoSRw~P5Hc2i3+pzV+3dmm`u z^z`$ecY5(E;vKt{BZuc)GQ z35=R|lxP_}u(>OQwuA1%a1ibaktQjKuU+C?!-rV%kpnmm20LyQj}ztbpP!4+6i?q(*K z>>M0&f*M~|qDtSge`sbQ;m{!X$g5w*l?M$@;l%FxQzBS2E_+=Wv^@JfT}w$ov1T8E zjL05!&`co?aJ`jRX#hgQ*l$m#%^BgM>dwQ!8vizi3R7sdd*WiB~1Ty zUXw#ncDC5>@B&pAnHhU~_`jM6^+n)JJ(!1ftZPVyu=ElgjfTiZistDq%?zOk89P4td0xmGVjqUA zAmUfj>T;%`k)Xp}hEO9pI-YK^5WakC>TCfGNH4?E{gEIP9+%(W`F+)jICq;Tgd1)9 zjptB_)9*vj!br$n%D}gQZ`<5s$nDQVHFs|jytd8~Dww>@nRnr=dbcjmMB#E%~wK3QckAS7b~1oG)KjJI@)72ta9d zbD?58H}CWtqano^=gas6^!7gX>-%n-vu9FqN->BJkL39}26f2DE1C=H9&^A{+Z1n7 zCQxu-xk*#zRKD&DX?XXG0D5|6KHq9iO|*KbMz`&KduE>N&=t6LY!8i5*(@5D&PH?I zezf{v79F)%Yf@;z=mDpqh$IVKFy}`37vX|hYjxSIy>n*jZTJy!5Ru>ky@tLjrnP$k z`W8kj+EhR|wXhwitd}bH;$zw4`Z+jvD<4vWwa&67zvZz0ghS<*>V7D2{X zlNe)@Tuc4QOjJZ3`}SCN*kWnR>}pRaPH>m_x~0ctZQ4;__A(x?t1sS{Rc-9{ObPsY zo5if%+8cRmw7ZTK>Xm82Ky1b#sgU-aL-XxXPZ<0MJbXPcFlqbThLIvHC_CuvoP~UR zdIIFYToB49&2s|Vj%`x&lbmmDr)b#MuIwRwIKO&$$>cC9?k=}0(AU>@w4Vm-jxc@p zYNJ~?>f`Ltau5EB^&ZIUvV*E!wOn2P{Pa8%*r0U5pP7r!YuJo99jW^Z@aek3THFAY zT;Cgbjz`40!?^`b#?v*;!NMb(&0_6htuJ`9^tJ~#r@ze|SLKS0h!@d6+Cmx6;CmdE0I7AzfhG1t{o z?R<}XLk+}LwW+>1IhV6Gb6@#DdQ_rITl%343%wfz+l-h#+0G6#UOnnqTkqqvbIcnj z(k|BdJE7Dq#mKLcK^_U0Iyc}f_VP9dU||Jo7L9erhgeS#nfx?wdhf_}+*eZoPMg*_ z>Z^G`9XR7kzWx5SMvDvWT)a;bXvg~S>5m{s92?bkPsT1#CtBO7GkXh_Yr55%Z7><0&q6q5bO!qRJ)!MfY_BCr-X!th^}QjHYfD69 z%Ln%qKJ!>M(&%Q@f&W1Cwqer3-*qVR!YYJMf`~rIso&T*+^k7s5YFV_3D6XKLwJjw ztKvvnomb@>3f6vSB)~#FZ-YPeMS?tZQeR!3C6zhQUK%Q9;yQM0RCiy2wFC!#)rnXFoF3{#GApUht#I$vrP}KP0)wu9pulRHgfDr)T$=bW2V$j9P}MlligN^tt=$$ zdH@liQJChWC2a4*@yfBw@ja1V8#l*R!6!zKR``uBEG^D)cRK3`Z5hGkOvVxZYwWHwulHWrT&J~9*N?4dl}6iF0$E%Q zJwqt0MH`Sxl>bW9RdT*!#rB=+qBei}~?UI&IAdP*&%$XC;{DfZSd zqiW2ZeAf$fk_>bM?039q8aPWA{tTdxrl}tgba9YYu)F8Y}|mBV|D4NEeDXULOhy>CObt z_Lbk;VneU^FA3>q^`qXZvn2e!eR5u=L?Z4V3r!lC>;7u^cYS+MPhzxfb=HQcad#yD z%4Kh943k&4S|E8+G#P*B73{6kb;VkVJw{6Y?~yTe+E{|9>?PSEg1~K9oTXEm;VqSr z4?_92diD1mkb2fGwVW$$$q_>1Hir>4~gjdo$E7s@~5(cWS-yB}9v2{B{L9DSriG8Cl>!fzN7=hMSf1XVk%rz(!< z`K51b`eM&j#J{LxQt5xa9erZ`^6vXkry1#d)jMQ{vQ96{u{rZlM9d@K(UEtRQicBF zsI1WlQ>*OJXIWUM#fvz!O&?t0bq(qXJ5nn9fj-lGf#K42^;@wBcBucxhs2B=DXq5NLV0^c05s&3mwKVjC zZdneAfUxn{zX8_S)uE8;Yvk3A>T*1*i`zl5Kpjn6{@j;C5l(!(k5|6 zeB-lE|0R^LXC-*+J#|;LyHZ#4x1Z6cOND@ZLA-DbGXwS{(W`pl*2&rr7m>(Y{t`MS=l5bEZ^%nk=aqupiv zJn)p>Jgl}_Do?STSLnU&KuAGc632pn)NQuxI_P%>_yT0kh402)(1Gn5j%c95*}_%Jb&pc%x?6#QE?8_wU}QeR12SX`^h$Qu<# zv-*BNxr{6@f?%m-9BJfiNO{!dr?gSqU)nsWi#pRex}BAM*O7->Xxzfr!%-VgeUVBH z%u&t>(24$WD{thmji*?JUsJABNy)I#0xhHH)^Fr+u@?NWJ}*O>1SfSI-KEiy)S#@1 z#;2%8-szO*vjhe-;2j!apvZkItr?@oyuLja=)|<*uUAET#0CPorjVomkO$y#YW!t3 zh@oApp&z_qaWl8OAYlRDal!CP&Xygr_$UZKB@rqI7**DBy*DSX*Nv-*1vGmHG_(q1_r?-Neci;3}uXC%u&4$(9e(Z%}yW56Y8Y{;4w z!sg*@7&K!{b?c44)ZeW%XnK(;u|8Tov@p585Lh^UO009ir0cllTo_`UW_P6zoIzpo zmbNb*&rqU|mUhHifj-KaNa@hqOVhnBXM6qS6H8Ruc_S8aL}K}{UX#PmW{In>uyC4?8n_13Axw5FXJOTKenjMkl?k;t#Z0B$lfg7U#aupw@lSdIpW^DXV*kQ1@KC%CviWNZ>GeH39ZAJ0gaO^FjGLB=>hd2S98uy zrl9m<$#0vtM?}!iKi!Idepo8H`Inp29De5IHjM-oE_VB4pGN5AtpWXX#Y=Hno;+rp z#Rs*jds)X?f~c|dU}KK)NRn2wnYaIt@i$~+kvPz;2l{F=bL}+Nfx8jkTgr$f#ylUT zXu6=m!K}0|Wh`h3A;uvDQM`}6rgR41H+~uE=9~*GV=eLKPvsZ|hki>tNq1w$jK{-v zqiuYfUpu;6EA2xmaFo^XQz$8_g|y?7yyp(hf-HAncuIaKcrf;Z`lT5;hWyDvaC7uv zR#qc!W13Y@mA;{c(Eiv^_IN+9q@j6o|t@4poZ-E8thN=!VBXBrku zwFx0fv7QFEUwsYm?%l1?w0)$Vn=tSxsyuzdT$Dz(hlOPQvdT6}Hnuxy-9HBWz!%Y| z%MkY@vv~h#gxey%vx31*kfK31tDY4Rj;iZ)^;^s(pTO_#bP2?l^!)B#cmlS|##rj^ z_p`iw2jJ6IR|NK7EY%`YpSBnaSXdp^PB`vPiJNBgdD-m(o_V^p`$v`g(k#jFwilr_ z2<+5kdwr{%EHItWe&QUUOX;Mf{I?%pVK!&Iol`M#~c@hvlvw~9^kOO`uN%Nb znBEK~_7aG-)m~C8*x;GC5Ob*89rb9iG2)4=lxp)<1UWuvkWfiOm3HY8) zUZxls^@;E9YyGJdTYW+rh{@&~)F1Ue3M|U9VV#=WQ0#Tx_aonp0p0;zd~=x-70*bd zup1pTgrw_wadT&+;r^Q5-;ube+wW$n3+g)(mM-^*u@`>h@vuHOkR9#3Kquylde;$3 zy(1?t?X}`~bs$#cGkx?%yrv@ySm8aXziWb36TAyH-epO)SWVd-@zX74MzgP_ z5wxELnoS_lGTwYv&lVZZetjf}4XUn9cF}&S>A~OvboC3p%3N(UL8TD)K%#n6ipCmA zPD~TRY&B@Uf0fC1b&lz9I>nkI0NN%8;044Tx8cV8w0gOhZ+#!JV>RF8ECP*$R@8xMpgcz({*`@xssc6Dnlp-u0>X9PK}0>;R%IIfALCVL!B3Bud!UrDOnIUT zM#UJ%3T7{P*P!j}LnPAS6gg%ybV5!82^w)6>p46BQ{tkW86Pjp!b>m&e;2U(N?MYO zM*3+-ituuLGXG>@8xp?p5iI$_47o|8FH&ZFkk_ys1+keM?*$&!3w{w(1v>E#t}Dl- z)R<3^7ouah;%Nq=X<+GfYI*Xw8MSwE!Eb|HXWM;jIiw+7n!2C<{m1wEHu zD~>ePBYQid<&9NcD;l`vtA2B9Dba0Rbww~geZJ!NEQRZ5vx=M#e5CmGsukbd<66eu(^E506+`%L8 z)x(OEn%KIUqf8gzW;Xq6pZ%u+<-&&8B@~QbY0&^>lU1}rp^GLz#Z9O^ohXRXkOJVwXRHIZ2UUzj@v_d);{^RXeZZ0%t6O)-pT)j4d zj&iloQRyEY0grZnTLXUY#(_DLiKk*SzwZ$9O!^YQ97cPYO2HY)guxd1Q*eDAeReOFvT=x2@ih z%Hfb{J&`)%Lf7eVa^Y^gmr_%R*yh?#FWob&gYO6b_X$kEh)g%I+lQQTS+slE7K2KfJ}Jh)Wr9CM&}n$`iY8HW-%!^cx3i>-Bt&_1>WB){sQfG#~0e zy!2T5V3T{AN}Jv0iMzN30l-JHKyQ~P&G#u77E{$k*}ji?nzaOMtM}W_%@&k)_3Hf+ zKA`ITJ66UU6r#XshieB8UDW>5k2sL{Dv3GCN&&8WPQ+>9|G0)yx9_{;u zofcZJtM)~r!Y7D*M_C{~-}6ObY22uqoB#2>aPh4SSjLz{{y8i#Tig44B^*fBjR+NH z(sZmE`-sXmG_8CqWm^pD=jXR`ZXuGAJXn~eHR6a6IX#h**T`RK6>&D4*^=VU&X}6o z>m^rZl}YIExnu6#zi4zUOXZopgav~wInvc!E{UHk9y4==*XGEnq_pT4)s|fQnn@;K z7%Wot(=Iwo#JeEOzuQ$V;jIL?@e>l1Qes{nki~-8)%=up+UCpLKgHC>rFT5AfjSCa zNjDSo`wb^z)TvR6Vl{rO>X>1c3tnSa2K@L2vd0@pu>fhS^M5`W8dQn(=gmhms&#CB zLWc!eiz|umtv1G}RbM!?vl+!Wn1X)%bP`U!y%B>9RMkE; zhLWIp_?pQ)S3gugh-L}aJ{dpC0ygCyDJ?sA`d94U)!HgcXu!_3$E7c7*1SXQ!qDq#*nFE!PJPy{74vV{ngSewQ0%xpt#vNNBMg zZv>P3@p8OiU-%oC9^X8QgGnl#N*M45*sM zLx>iifZ#KuDB@-p1`o3eO}`m4YyW+-JJisyHL!Kb8ATQd@$TWP8ohMceQ1@P4F@bA zCP^osm&&yi8BcW18Bz(q>F0H7zh8N!)BGuE`j#}dy5oi|q(G_m;Q%|1&#~K5jVz+M zaw%9vGtTTG)BJuq0pbHx^10!WoN6s8`ywx?OGy_fF5_nuQ_Rz-n_uR&V*QBG_2a!h z_oCePvZtlcjT@QNAFU<-uAa4qU(|+M1LD0&wrD;J9GR7MH>d-3|JZgQPAXL^VtE+Yd)y{Z-K zeCu(CknZ|VBlXjK?MmsZqDGVBD1Uy_m&Q#ZZ@ptqYUSxK+}5-~MPf`OL%X7N|lDR3a=O6lg9e zF!Q*&<*Bym!|J)&-?psD>sy5%y)VJDJRj!jtkh|!8E-&_BsgRAOOVPrL|AxW*a zKxinDZ@JyHh{0%Ny=QVy$mhejWXCHSco)t7)37z4I~~mC4{whnSJ+O!0Lg%cy7^xZ zOUs++#j-9KbJ)`Q?#LA>Ky&3Zcwy3g$3BP`<5j}7<=Mr9$1|@(A`+i(lb3Hzh_P7Q z!ZoSyaqaCk$14>s7NrBsCl2!%2wtpLG6=B#OZRvcM zQpGgtvfu8AArIPpIe*&WSA;qv9WGy!gbaEyQC;dTWFvskS*w}hX=b*MPt{*f}ow2Y}t z5eH=rPn?3ybp!^)<^{2)MNPl634wPvpR;x3$9KsBvkxYSj+%~nNI9Aj%@`KMHY?Sr zx9KhtsNHv4=e!L;IWq4Dcx2WWAonrF+mo`_?N*dEX6iymQl4knS8|K|0iGr|XXhFLkgvUCB-4LP$(mlY7IJ4rR7etQZ0jruHY zK^2Yaq!|b&cK62ulS01zJ2?=jH`@Gw!s4x*p(ic302Q|YMm}jocO$k=Q^*UGn$y2R zELW5nxc?-i_f{eJRkwwX7#9knH`|bEg-5j3nkOcmm2n`W$uvwg4<0UpVNFdvxL#00 z+kX!C+x$ecSczwK5{OOl=6+eW97AJ6!eD`T$+3Q=qz-iVMK}io}At7)!!s zD4_R3RROze^CB1`NI4J9l3b0%;hJt9tL}+56>!=2-7dDw`2=_RBFpKkiy={e@Uf@C zYocA#y4ZKlHuRI~`Y+SVT33gw-+nT|u<%mST--g;bBo|xtHUb-y*43yQ0z-Kn@|p z<^dNRO;?$E5&LMg)1r^zANNL&Zh(_A_@)wWo><&G^2I|Pl}iBqlfOH|GZheQI@`zC z?+^FQw$-ZFa7>{tcz~S9l-w(rfCb~cc};Y1d+4RnHIGo2f&e%fv#UPnjK# z*WE(p3B7OD-*nYFf~dksYvCl-IfDOo+5e~6-VBIRda1`!R_m-N6Cbmr%i=0MLXn`! zm)zTvsoee*be8E-#gHZ&AINUI9c@^R`beit^73o<)`N@DviL9K?>`Bm%wQR7zk}B* z5d?1gB6tlv;EQbET4@`ZxJ|3b89d%Tfji4YSV*Av^SqZ(wq{Z;W<3R)IJ0(tpG9Cz zU-`wKmMn452`&d&-s@<(?g%{w>NmJ}DF`4aM^C}6VeRqbM6@696)$1iRK6DY}n_=5p z2DBpWl&1`5EB5Qt*~3g#%84p`Jx7~u5_{6qAxmQ8BYYrL{O~@YD^X_ru>$ZpG=Y!r zIt=|}$Z}(JGB)dB3UhtG@_Z}j*So{V3QgJOaKC7}Fw0mRHbaJXzxk--aCnN@XXhXZ z!>h?b*u+`f*7`+n0ZZ{}aR?WOAy8!BKm0G^cwmx|)q#@Js9BsYp;7yVWeC9o z1tQa`gz)PeDTKSpFA;LwU;)BgVy~JJ*~LbL#~6qLPvf+`9>t!?TxxwV2D@_W`>9hFPK8l-W0(zhI zmb^bdO?s3%EfFCd5ooI7Ljhl#Y5e#Xb{qI$phqExdnAjG_ey^)PVkxo3)=q4OS=9^ zx%gN>&OCvT3*1>&ujo6;bop(8i&#sJj2=8=~t zuk=9EX?IUr#cFg{M}OmajA{efH7yzVJ*v(9j>?U2>+Ou#+J7>fd#{I)v!f5Guz&fu z&<<9zG>*pY6cbiRGd+Wr)fBujYc8LlhAGioWS9!FI`_!U7luDhVG6o=B36xf0g}&OM-wD}8f@b69^spZ?n z$Rv;x-(9K8Y{-q~xgV`F*8w3zGr>;*CEE&#*W}-z{RN4lqW&iwtM<*Htb)O4CjHRE zEvSlOF(q&{AL+Tp_wjxE@;RzdIK1c@dz1BG!>dbTHMH+A@jNEn{*xL(Avp&Lp5ZIE za?I9v=H&&vNurmR1wXML(L>C%+N!|R?c{4a?=k>vjJ@c@7Cp;Fk;#{hn9d7bLkpv| zj0Wmks_n1o>c1MGd`)31Y_u-Qtno#V6-4hMZ^Btkzub=8jS4s|J;KxpBVpO@WcF_~ ze+Mhmsf(vcJIivH{bNY~f(`W+wcZI4SK9Za5;#`;l$KSz&)$-KEhvu-#XsZ-chqg7 z6)0bOU)RO~5!aFl(RYRSt_RTeXPY2gmrNznZ4w;6mi|CA)w{~YWaso0=hCz)rL8?5 zNkeI5cc)`dZn!#wuJ_&WgWu7;e!lPIIU!9!Vyz%9+uedzC+e<*@GGp28TTfQFRq|8 zOXaG`3bzM|Z_i0Ih}sZ*x;L&xi`iwS8<7YlP|^e{{=g;OZ)+mpsMUT!6q)E7jb-^o z?z&E6Mr_SzejP@GbUPNLQDv@Gm*&y7yw$q(v(SV1jX@Qz1@#>+P&@GSlbc$v8@KBo z$@7g*2vyTwlRLM|KLwj~hcksvL7K10-yNvt0iqGHvB6MB++e>4$&}UWNwHu}P-~>A zhs4HsHimk!-yvu8uOe1JF%zRuY&mLDiqUhSSJ4>e{^O~C`)nqDi(m(*a)VA{Nc^g+baBzKGaXBkI7dBaqJ2QOQ*`XU!7i!ceS(JG_o5oSj0m!^= z>+zV5XE&0lv>kse0QnHouiIQ_eV&sz{5;1iK?ts>^x`$xSZ8E@Dp}-do$!BXyaSJB zb%}zQ+m0#|wJJg3T;dsQ(XKXJv1qZ8Zjkt2=zd1b?~BxPH@1 zHIyjP#=M@Ombh+oY_C>z=3wwy_%X*FzF8UIkM4KY_Z6-8%n#NTw-?VleynIswslKs zWWQF`ulr)?qq6b+DeLk#d)%H-_Wv#7XCVB4O}~=SJN>`<*B%lAKqhY5F^-Oo&Uh&OJe9UD0D~T|)`n0#ujBQK7dI&QW|CqK3 zzoAqg*ed>Y9-Q5FCy5|kD)D!K{~0nN>9v&P_$8J!V?~OaBVGGr|w;mBVUR%t9=$ThjENm4p&|3V^K~{TEX^k*)_IdsTCr*T9B6_ zrmRuO{b`!h;OU6G2h*g45mT)|w@R-H&19h(Ffz&9#YCqvOnPWCBV7p?BK< z7_qx?>3m5dHs8_`g3~yjLFgKgf&1#^5u5H5AVy%%v9o)_`D zykdha(BaXO8j){2Mo%K(QA<_2VZ$Ks-#R6a6j^$>-W$Mct(|avKvB{!F#yEUu+vH2 z58o5PZM1qBBEDGM2>-qakSb%P3szBxC)%$vXriODy{-4=q!qn#;?r)wP~eaA#Fq){ zL^YiF^+pISUgG85SdK7hYz!%ZS46H}xf|MNIaK|_mQFwKXk6*F)}N{?0mNNqu*U~3 zoZ^XB=R7MeIXrp+iZAVsR5NT&OrNO8xOC9jH7mi0VZ+_If*ai0d=A%cKH3d1!Co&| zt<^XY5ojzyS#!QC2VO{}u;8HAV2!a@X&AoUA1!0HIKd6rN1>EpV!G)Y{#)mK%Mu+N z5iw#lWRj*n(vOfRrfkjIE=aZ&=YTo!Y^V2PKBA9mRfY%~PKHK|q-+Hzo>N4$psv$k z87lp<6UW7)U&SVMr{-{2ZANC6j{}EBlR>lR&%1H$*AS9YR-FBY(+g1!z0Mv2g&@=L zS6OjcO8%SycyPRYZ~G(m6zszOVlK&k(>!amzDDL;8apMBWc8%bV?EvUFt)I>Q%~3* zry@Zj$js<)Aod9mMto(B7@gum?5980};jR{+_annceXmJ^JuT@E%jJ{8cKT~P zbdUJ>;h{!U-iKDuN#n>c-ZvESt6+sKrmgpk+P1DR1v4HY0!J*{{96r`a+G651d$Np zBI{-7PnH3M?oS#9e%a{^Bc!KtOjp36#t?bQ4R;VYxL75gYOxh*$S|{SJl$C{9)52h zr0_q)+)vp0d%l_T!xi=ETV1lNNeR|G!zvGMtK{&%w`X!!mD2B1y@od;eYSHC;02bn>i9i=(Wc4&7m&U z?pVu%fQ{~?B(P|~ds~3X+7nLo$e?$;F?Ij+2Os4?-e(M>zD!NAxRzb(gCj!&+X7MA z(I1T~56>F@A7vno*jM}c^J*_R9uDE&3)aw|gn`TQT2PGhoaPO}8`bmp$es z=TMJqyc++pE58^wGs^f(=0CjLi}GWpsvf3jW)x8WuG~YGsmRS*Lpv+_ap;IU#aMZ^ z9ALXRJGOm4uu?_Aajh)t=Zq$3Yu+eYVQ78fyV&?0tqX^d*8XkgO@=sV!W{Nm3q&Zn)5sFCsY4p0pD=@?|V_=UGCHa z#u)>bofW#IEC2~Mn*vA|KAoImt0IlDDg1{&l0WCqskW*=kJh;2l!JQCy8i6GT{+_P z%C9~kF|Cbtk@FVXzhX}zzkVM5(@Ez~0hv**=uv)<>bj^Ea2T-7(Uc(lX!g$*UHU=+ zOkl^MV(7oYs7gRb_NUQ!JH5(@&QQI#wnH|`|C)CES87;LjemwAyCg)Jil~?54GkeU zwpFUgY_2%Xbd-Et!?3yGoCMQylC3%I9SFxiM^KT3((j5y_SPRY4DSe|;k+!!51-gR zN+JKk!ngEsUBR@Eb(>c=;E3NlK`CHKxK%kC&&-9^sxYw;ZGumai) zmZ|jKMW=OvkJKM2m4gx3%Up#kJ;8L0e#H}gpP^}hVdN3hXDeB%w+ek*Y@gr$C{LwY zDPyJ4B@8pzV#6Yn7`SRrlrRl3qPBbTYX~Ax>T7%VTz*}-cJn;C)Coqz?rA^ol%5&& zeSlFTJZ85P46}6`+=3o-@!jLgp9HM$nh#2^3X;?4@}{B)p2t0|x9TPjm%B6#N6E&G z=fclRcDeLwu)>Zy!6pt}8bZ9a)UrR~zK?SM=iBg-Iov&{LK9BOXH{3SKv#C~=fSH8Bi z+%j){^dUdNHNIM>bt7EvfzrStm*333DNZLDZ~|r^bm34?jfBbR#NC%OB(w1n>eu+d zp7udn+R4=O-}FK?vY^ysD=ihHCo=-dXemyEiRowq{lDOZFaU_9=27*tfzZ#}$@}iU zMYLltUo%Y&TIHelX2`2Q9Ez1Ed-1RLv1Ke_FZ!adT>cG$O7)R3Q5qGRsGOi~%hA?r z?9lMu(jHf89krL zlai~{j|TE&o{DkJK;HDXF8|oWT*8@4pFXhM{6~z)F!RH1yf(&zSXb)RW=A#JaS<7< z$$?hT6v`C(m-&s!*I|OHL4^(fC|3C0V$fQ?{-ccSoq{vxGz{U}F1Sa`x+5aH-SHam zZ4;r@;x`=kvB^#3jZLbtz9y4`rS2yDk8=_p zpA5etP9-!d?T=Bn`BdPdabH{SylDOK?*YHt&AvSa)9aGka^wE#l?zC}-v{^s`ZDKq0iyb8bX zo6!86c~C}e}Gy1{w5ns-7nfiJFWbA1NP^2id9;=8J(yC z*GSpFik6H^_#Va|^u=ZTytqc=y{&Vrh3quWY?lTVl>A~&LE-wmPi|Z_ zl8gIkaDU3dE9>YQx9~|x8pXO8gn-SMvS;>>SgpZhe}JkbvNqnulg!{Bp`+Pxgtq>@ zKf*Gk*6{FA76@vxh&BeziaMDyYHtM8g?W4i%QY$)A>)kKE)aV@k>kRHyt^?FOkTXNv z{5ghWEWv7%Kik1|;Lq$tIDE&AL-c+}mGXxlJMTrtbwKxjDF*YRlwt?ZJ=aqeD~dU4*fa+vUQB@|UPd0c?F&{qtNh6D3WAL-qK2zFZGgR|3v0^?tRAU9cw= zm$Ku9jSOsG!klt>krTQ(O^*{EcAJ7L#(I(TGrRTfzi**b9vb1L#23)Ftx3fbxf)#U znjs|LXKyjgq?+N^Yk>ny%*u|8+9Q?q1m%OO68OS75YtQFi;3HGXEPcdNUgZJSVAY= z_lm`$H(Q@#2-LIco~Ow1eAC1o#s)8pcF=3Rx2k0FrMg~ZWGhBs!ydP7@9FJ=@;}T- zSMzZmj%KhJ$mr((mDqRjjjlS`7%L?XNil+<%$Tt}*_;k}!ue>-Rg3dMhuABL-S4tB zEBjb?cByv-;n#xp+If|`_h%wVT<*t*jR7kHa2p0dJe8Ox~(Qw2sFos`i^tmr96q_8gtgDtrSfZqz zZq1NPE+K|E&yW&hBW^M^w(JKswCC%oEu8kYH7B6MTV5)8LbA7x%PqJIpO(ztac(o< zqQ80iJ&7Zp7lvlV<2PJeI64Kw&#>OH%dY0dh;yYfg}Wy-1WPEb-L;JRADQT5w zrz>1docV)WxAPn_uUL;X#+W26!|d(D5W3B6E8VLS`!nZ(qv|ECIWvf7eIh9ie34a9jkdi29P&`+sPGIgt(`iYF_CY$kBC0DLx; z=_G#ZF2T{NTu&niPMl~V*4VBV(*wl)Vyjn}_4(~O=xea}MmywavI(tg(N}45?Tc9^ z{&I;2lV?i;($WK^83}${v0Rm(KJD(K-X(?104i`9Rx9m|<5YaJc`&i<1Oq#QaOqV{2K6L}gJ zJR~5n>&H3Rz~29p!&I+>ZKz%Enc-CSp(;lrM0*IVWbyhEjP+UbO|EiuCttsdN(mO4 zc8y&g5w{sfb_gPQ=%Dre_zsSgsG@-X#N&eF!V*fHV z+D7}8H>+A)*+KQz;|v?;nht*opeUYnX&uG3(ah&TTu(KCWdwCXT zugY6$>l}Ou#DR9HtraFxJ-q#xfP@Scc6r_WZV+`4SlswF-yGP;pL*(I+cdEdmKU94>|MGl}a?(!%#ul)a2R#)9SoVwp!a=<4+dqg&Q{qAJUcUJFfG5HO69S#wIsi zyi~`K`ea6^o@uE9k=gNHn*&ja1R;I$X!0$w{ZWi98c=yMM9b64YBrgnLwvs0hX1{+ zmQtt~Q&vIH><=G$;vIQ@>8v+2Iioi;lcPR;p)C5!q#To5I4X@JWH6Xy7Jz$lO`av{ z4|<^F157G}ZHh?`kd6L{<)Z+~e(*t$_dQ$eZP(7jt6R)@SHt?O#)AKEB9qv6rV!kG zx$3-FU8|>>P1bpDqMDeEkdkCk6YPXzA zY=*SFvQMvTEBHMx1`2W)^y9YDx{x0|-%_W)0y8T0g-o>?(bW1w`}t>dV$}oYzX|!C$G&aXkTEHo`bBKX+Iu;xUZ3-`RkKnX z=gWUAWHpVaOQA*^=G=}GZd>;_@hthwfz27hrdXYGMf=M7pF6__SKLTXps&=z0Icmq z*VUV2L5e%t?_@~`?}M;G!P`A4r_+INR_m);&8Ic#$j~8WTNFShCHRpQ?=k%>+GtbY zjr7Mqv5AALd%uuGXdsoHMCIM`l14en2qghGpiZ@4j;8aDVh$Zbn2bM|Jk&Ssx;Z}@ zG1)NOP9zik6IZrH-1iwN01qpm+^bLtYy5BeU{q?q{+Ef=7Im|jRNj;*nHLn{(8UE@ zpG^v|b47+`Hq}Z)=+fZjq}It$`6bN94@`rAc4W*BvW zKmoVUOoJIPXDCUq{YWQegLwyf08NEgIG6oRcpsbg*>r5G^a2n0;*N55HDVGL6-g7e z$rOs+J;aEW3FuImvt6Y+W4cJo+?K=ctw!!>PBPfoQZ48M`Vz)7DTU)u?2aUpoZA&o zf&v?}8?Js0q2uw%aHH4y2w=)h=$j^F-~30F@PDjoMO2$RT@CaWkF;|XmMTl!4&zqsyI#z$r!xdtGqj(;MrCh~Az3tF@B zGX0K9nO#8u0QMLZA0LlKp^b#B0f*{7K6plrIq3})WH6@h+FkY{c7$il*j4Gu;52L> z%JX&Sh9d6U?Za5aaW*j_P8QDMS2(?R*iYIvM$Z+MQb?(oGX2WnY9Ed$DKhjfh8_e` z7&w;_V`!Gq;(_SSI`&E&N$%g-CtvMUIkr_gJq)S-e9Z`T{Q;Uo&Jc7&*;*vpmGg#O z?G7d4?lF4-GLXz-Su7m>MkwA+qk=`;k8u$^I`S=Vc*#DWuEbCQb--4vp~=@h>KM#i zy(z#He{0uwO8j%ax@7g`Xp?0xxbBwd)L+0^QESS3LsBc+tZ;SsPsfdS%4aagC(eUq zpTM{Jh7z{$IHFwajw=L2QCv3r>0Vi zN2m8K)_>lGHM%({60bNuKj8{G4C`CMo`w16UyX|qWissyDwxk$f9Hj-I)L8n_Vmkg zbqkvS*t6pB6_(8&stvWOvZg0muR=7g*pNRIeqgGTrG!@V9 zKsV~zlyv?@OzPC+44;EF6n2yk>w&=vQ1=sNN?vnm?&=wZ(rOECr@7QYT_3f_odK;* ze$QqWb6Apu8mbyft^dW@^Mro_9?e^gE-j>1Wt1lh zqJqbF0@i1o6M#BZb9)>p+E2VlWje}hD*rKBIV_-z(n`X8;F!fKpn>1g#z$F%3GY)V z0g{Ij-VcCZg1^HtcHAmZ!B56Cg^(9D7W$hLOAtw}8!eCe09W8RwTJPKH*2J}#Zo6r zJp5k>GcN-&56yd;+vQ)58=0a}-jwnxl2-=3!CuUZKRvu~nVugXM?2y#56j)#-i)*R z-Iy-u$?-WFOW^Ik{_2fZ*_h&+Yro=q`waV1MO;RRN`_?(2{5Q6l>P45ke{TR59Ylwv*Rh{{ zp4CJpI7`UM`tz74@y{9nFf8PxUD&)H0XUQKX7?a^^W8)xISC0M02dO(``5on!et!? zC@^X+U!-Kx^e+t59P_jE%E|v5!3IvNc)%WSxlRpQaI)JD>qX$+=`l02GzTYV<&Om! z9~B1eGaNd>7ee^>PRd}@>(^4FPbU)2fSt13B=#KnGn~koeq*l{Y+%u6McbnAXSOvR z-`jZpuuYQTYw1GX>-l^F)i9&49Hs*}+_qtl>+<$)@FIFMC5ZArxxvLOao5xqG12WF zSOdETeCGqgPE*~r-KX~V7vuh`P^OozW)WY}TK)t!9bPOXs9-R|N5FCrCJ7F-yf_yF zR5N{1DBuDMYB3D3KYUC@!DU!_BS+(OsRrY0h-8+>MaN?o6QfAK8d)Di3t(1&ocaiu zifnt)?lDrIcZlFKXZfev9VX_@iF%0n!BC1~3ogljwisMK=BGvVrw+_s`0wyvwmZ*i z2N%_UFQS;Jbf_>S{h&eM{{a*j9gG4Lk90WNNZqyG*27mT`ug@CYivgJeRdj^m)@SE zJ4{ZIbSlMIaH6gl_qy2goA2?_NVnAR-j&5r)&vrP(&W#9TNhVXJ|w_oLqd~QS2Dz# zI0^+=^!QZ|4-sMk0Ad2k%$#+DHdT$G^kIx$k~*u2_syZK(N4nTooIY2K}Z=ohb(t^I<9k=dp zmR(C`OGw-Oo|GTVhqDMRR2~(Z*?*K-&hnwg6I?~P+5Iw1=!Z0WCCiz$hE)Vma^9ML zgpBwzK82o;ySyY{2WDX~xzbHlFK+vQYnQC4EFsed1-j-3@kXQ#TXu2zosb$)yDU}z zi4iK7tna$&9(FH;ftv~UhDu%e;?3z5=pAfIP}k&rHH_Ob<#V}$R`S;&8HzDB&~vDl>Kqg9q@f40YKG8X-X41L>iI8cEEP)NEgaR{)5 z^Q#xdP;omwO{{If8mP%mona6D3l?@Xbc4D3^Goh zhxpP1O8kMH+QU&7ybOV{;jzt~nY?l?nrY-*hCO3C*y$4N;gw>heJ;VurzB1Xtk0gCiwq>MWe-H}|w0l|GMl{8z zn!TuFY(@RQNJNv3^{Geu8!fY$bG`j%x}D_$r0mbup+zr(t7#QF%HsgK!?oEp}0o{PUx;(yEhXfYj`D3B_G3{J@@QC7Dvz7T1u z{DTC|%=I2`l@95Nr(2G{UpncD=K%bp5qBYBj-6hN$0i~!Z)y^B759IpkdLDp5AI7z zi${9y4ac>3fm{HDO~7w@doy}B!+Y@Q*NaVn{y=MAUhFS^&p~h>nGNv{ZK1Ul)BZ0; z8n3Kzv>_)F7N)fx&UGpkUdLD4!oXnHQ(TNzV!*5`3 z!Zz8-C$Cr%r&~D76)aqtuL01e%bh_>DG`seFc;D?tG;G`%_?1VQubs^QyxmS?VzSbF+}6F zGOYEM8oOzIro)zXn(H0MPkj|!#HR9#S&ZMNy5*sBsQBO|)Gx?OhGYAK|r@3FVtFe0i=w}G(8uicT28>=Z3-Urymk;)RR5sg#Y0ETcl- zg`~G|F5T@K(d+$v08Kiu6p|ZEMx8C44P9PFJ0(N~s7J}?Zr<#Sb3L;Z2zUk9pnAE~ zYxu}vz4M&v?)<)5U!a#H^l4(V$0l5PEySZAYd2Tnexm2w*3N(mk%L3VCvy}|HFo+N zPEadKr~Rku-2dJbU=h%Orwjq8!~{#2BDO&3^`!^-HmK0z{l4R}p93K1g;YkVvofQ8 zl1OxHfgR&9tRIU824hap2ZH7;MwBhnTT!WH1tCI54pEznBts*k<1h`qt+7%?LS=-1 z&FO0Y()fN=4VwW!Me@k)e!}p@me>(=4*x^Cg>&4K2j+ycCae^Vf*hh4Pb+G!v5pcW zLWT^TbY_lv?}rGM4ZhR=c}sGqZgHnVzlsQ0S6~24DM2QbKdvZi8DuW#H9Th#_}{&;9foFnUeg;24& z?c!|2x-vJQnt#<|H3PEkW`2D4|a4xc4BL;awD ziZ_nIN2~A`?iMO9P>A&_6cc4b@VsezXh~ zE_d|pj+#oLmVU^e;VFm)!-Xy)#!E6#L*|e&LLxmkC1diqQniBUIf}Y*=%lwGPpbkH zF7WH3<*|L&80Mt5fWM7|!s&VU6BN8zs1`JJ?+Nd@<=LIgkS#SrY!}YIp8Z2L&ZYLWZy=`kRb!((_*?m!x&1@a z_mbQgd(k6$BMQH}O0Fe-n*uEYje@UAuQLU0v7VFFtyxWW#z|Y3XBdg2jQkK6i2>3*D$tDmGu4#lRng*A_S5P4ALSciC9d+z<1;!86_Jmq z6AKPYK!pGCB#9OqeidY!600B#A6#qx6*Hq9ExIh?gS7G99A*NSqXXH*a(s)^d5%Hu z&u>)MNL7hqw~O1mQ$+{yUSS0R?oZBM@ByTL`oXae0MyAW%B>q_U2!r)B(R^?B}Zit z5ZymG*Qf|@ee4grCD0$f-C6rF*w`*|kIxVh9SM=4a=C69Sk)dI;z5x`4K*e2R1o1P zh~mrr$#QhyW>jM8#_!0T#AUiJkpjj|>#?OvyZ5ibK1~Fui4pyJP5!8JvVoww>@ zktZE3E_-IYNAxgGA&+&GgV`o}hl7Bs$oq#aF=Tm-!J2+LzQ*uzZ((20u6HZF;XvTs zL#c0$cl`TAHqSYT zsnOpXonLPAyBbZX_xE>qVeDxWnEVSRI@FY4LCS@lt>Bf?CtddSWvcn


)hxu2n^7fsabzMidpVrK9Jl7|Zcfp5Eo7Dtm5;_hoGYD8r}-Q8dp zz^<-@Tuzr7n$Stc*hkZY2IFwlC*%I@e|{7bO=&d4fx7whiksPk(q7agHf&$NBSC!u zqh)j5T9rY7976&E8oAawEJS0Mib+PV;uERd1iiCI8FsDs6>T2L)Nha$gGIBDGXl|0 zVbo?pmX};ISi$c$444qWrd*0ag3cy9+A{lJu*!66n{a7TqXW;P0*%-QKFK%*5Qd&+ zRsDZn0KrIz*`Q}~fHtkC9xBMY zy6AlLieZ}p2;*zFQW^`O|1A^JM9DzJ=Z{1jn#fSRB70<@1rGFYA4j(;(B?HQuK=JYJba<9n6sxf<&{!??8+4Vomwpc%eO!64@aS(cAUV++0@?5#UL1i$m&Pt zT&N)TF8vxa;-Mh0xq#@}v0BML&DG6ekIa&A4Z*F84ngSzBm}`jMNY6Ilx+Y=;=6rf zmdw{>=c==7BNjk+wzr3MyjVem1fqkGMU^d1~pr!Mibg2lS`V{ykfj zHO!0SkWG`08g#&(%iwcX?xB+9SkBEu9gogpt?X`wy z4J7k-tB6@kHE>#{xaY~^MoO2baV%RuwUR7w6Bjrj=JxeNevKXym{R6~Jyyx>olMvm z5a>|CA+NT@X(%gsvHiceMEg2?-CH z#Qx?6h%A6CXuEm=`Qf1J-C?v&p!j&w6RT>eHQxbs5V7fl*w5{c|3(W!cIB2!J^#vk z!c8Y)zA%ZQ%+QKRorgoJIsso3rG=#JI|l*5c;dH4JMp|0Rvm`Z_{iXSj#k>X zTku-jeWVlTV3oUn6=sLe_hr(Zl}2nDQ5OL1SMMVi`}IGxU2N*!(4vMtM}Ajwj-W2q zsb9_RVmC+Aq(?8HwPfmwIz~laUZ(5kh&OK2qErD<)>F-IvP1%qX_c!@k3W6R`uuoS zZ9xl;51)#x8-VxF1)!kmO&|l%8$tL!Gx1iRl;5U0K~{~C!1N3ZOZ3Q4O&kmWhG-XS zpce#tvuI+!t0FU(0tKv?!uhI$1ck$h7@E0G-by+xV2y6oYm9qtcjUP+bWxZv*nTSE+;rM64t1YfP%WWR#(ychDfkrlp zIjK}ee-=E((rX9+B9t}@B@UiZ(CLD~gzQ#P09r?#u7kU`_`0~$F2WaBzXKf^EQr-n zwsN2NN{b9!mkQj(O2OGpf?wA0N1B z#E8Ijcl?yrK>ts)@fPtuUCow+N&q|bJAPsg#L}@H9a^;}l5&B1ew5qc5O92D`|v*m z)CSiI8Tu@op!m7zYL)Fa&uWwX0Dl_HDD>Y`w1y;iW4H*|H|<3vW6jg6@#uJGP$f?% zm~L$Jy^PZu=T{Q-rwKh?bfPz!AR;f_{Yit+^);vB_Z^eY1xNTGw*R_gyb3%cn(t1< zu_l;oc`}?4eL!?k3tfA<)c|0{uN&3&ABQ~nZYwJN%(*vLZ;p)!Esu6Wu~<7ebY^=Gy2iPbM5&P1m1)GVT`j8u z+nSy;JQ{wP=UOZBDN=jsB7u1%GAY1b@_(*kGr{&5j4_SBh2DF)=$Zo^)*;{z|86e} zsol-YpAT;>fxv|Z1-Dtt=0GEn(JFKWYYK2iZSsr75ce=LwCK5ZZwNSVn@N5cY3>UD zWB>DZ5&vAGJ3OAg*RJa~DAGx)j$uk@Um@%=-~aiBaDcIw$MoP|&d)QC)K|eljA03PIVd);U4Z{LDI+rhws!h!@paDRPGXHgzyA~kp&t}5ZKABqx|pm8dr^D8~RxT@r*Lu7;lb86tBW?pj;rf zFTeg(Ab!x9g1dSw;KJ}qnc@2?#8u^%2acTow0bW8)B1>V)rY~ zD)9H0zj{zRu;j;o|NHTL?K0eJHjQ2S&%+qAep4i4x~9;u&=7%ACM)?w2F|)QNdhlz z+wyM>jOIfRg)fVn9`_6Ln_&zl-ghqzKg>}v1J8iKN*ffwWO@hE9v4r*0bfk+)f_j# z^zk$4$kCs#@ATohc7T0+O2Z>PE6F=S;hcZIOku$=|3T5+hlHIS5gK}$ax5TFv(btT zH7G~-w;mdB4+rXtUSrz1`AHL%pdi}cPfs9t;X~$?u1-fzw|jcg2kUI{lNTP#0W;cn z4<84l#Qeiv+_-Tht5shVdXWHSy9NdZMjMsmD0WI;nZECl0KOy2f33MH&qyZjGZmCw zHhEm&-k}aE$(56Bpb=f=!fP#}dnkX^7-xwG{DG)wYIq^Va62{#5+Iz1jusi9AZw^v z2#LvTLxr|9}FY@VGwQyz=K>E-Q|=N8;&GHyJ9 z!C#cj|20S==Ec@G{qPdz=XsBbM<>p)M~yf!=j8RynfMUin2rbA^;L>3G#CS4*4sGN zkX01-U``c0@6=M!6eAD=B`I`bU6eDLlp0k)Tq8rrbrtE-M2ed-ieeOZzZ`cfqTE^*Q&+? zH{0~Veoc7|Y7Pe?%0Jo=2V8)_b^6^$9TYHgac3+we@F&jgpfQ>M%{Bn`Akz!#bjG9 zlIZD7h8)#LPaqDx@c<*mVVCAO)l`=u{m_4J_$H$aB3tI!&8c<)LN}LP0n|jS%40na z#=@;`r^f0}oj&(?FDs4N{d^;a8}uB=&_f-4*z_R%$7%>N!;U2E1QQ)J6DtW=mSTzw2(_VBC>peaHAy7)walO)c0lE{ zI+RgJ|m8v9cnusH>z(-SH+?eGF0Ggz0D3zL{Pbt4KW>Gt#0ayLd+TF zva5}ui8w=@ZdzmT*4+p1L_z9IBhIVB8_1FZ__e@nv-E^f>#^q zf7$BY*9?+h4W+Cuij4UAoTC1D<*8*nN~=vGmE-S&7o?#2JrBBtGmcY* ziaybjDE9Nv0&;pWe~upAEsSz+cU$axAAsFPyO&~fEC)FkC=fHLs0IOjouUD#3rij_ z!#1gl5mbC+ z=>7xkOMNtt&CH}G;nEBtH5r^(+x`*7rSqOO?K1-ILjTlQq`0B^jgVq>OqcuGGIx_K z^&c$^G3?+7(X5GN6FM5tgRcOr(s%%ZjA2H{drORb*oS)``tF36zBd~Ow@x+GJ^o0a zw-xNpWn}(frkMOLYRqQOkrIc%HybI-f*6#8L$I2U87;Y)2%(UiqA|4B>oki?Wo#DZ z0Vu?=Y5e(Ux6%2*g($l3haF|S04liP(sL~2ygun|Nq0A`FrU-Xh^VWyjwzy?g;=ku zM&i*=@c^1mNi3wuKcpis*O?RZMmzPM9@OkP*4S!(K&C&h=g6wTAZKSaJ4MLdQ&1?u zK)}mQmTeu&~7Ato!t7B#L znWI~C=o8&o9D_{AOe&lmiejnF#C;!-9|LP{xg+LEreOhf5>b3X9xmo?I__o~>o3-tbh6?y`JX=!P|;}u7w7?>-L ztD%sGZgt%@%{+vwrVtfoVg52ny z1d3T7@;!EFeoGgzeA^d8sSYUYy*XzfcJ_Rm9gnZyk)I%wVAnv^U2>qYg* zfp?p}@*APn-YEzYOQ=$5vfwfZwUVl(D%fmd+aj2Ba?*#?AN|ymv`WJr^XUJLC6R8k zUR3*7k+P|FBUAC_8Um=f4r|uWLBuEWZvp&Ic74v_2seLf+Pr8 zGkv2$DO)I6`a1MF{rmkhXz{Ah)v4>!iH6c){_xU&slgKa-5=YuHjguz%steQcD}Pe zZ>B2PYyXyxlbPjaiuwm7@O_2so4Og){KV>uv-jUDtY?kO#9wJVnLl9vzo`1_sHpz1 zT^uF`7?2o5x;vyBX;1-akwzL3X#wdPx>G`skS^)Yp^C@qnBm>u=XcIo&%5?o z`%h-g@QM4r?kn0{t+~mthg1~^%ZB%s%0-r29~O2mMoolMP(5aCM$((n=E|49Xl9iP zh0`zKdgO`X$UAwf`ekLEn;~mDXO#H83;0I`zj`4AKL^|EkJoTvT~QOT#%0&D5%lSj zLF+A!@EfdJNzie3dZx>88~r-JOti7&U#Sf(Chiu82VjSYY0I1JQGMirF$-bMxav|0Ci9BaPpSsX67Jl|+Np8oNleS=7 zSXqi)%@2pk_tK2$EqBOHw=H9dx$|W0JC=^(xDTIvRo;fKla8Yc{#VZ*c#*J|yzB{W zcX~WyS8boUh$&J1V3j8LiM!2V)h%BQZ=6Xndr=F*y#GU2jRdp? zX4w(a)_z~`^@jXQ#~ypYx((sG#`*Yf>rj;o%g?pl;eJG1Q(A}0AIBGCIBbRMqiJ3d zztd%dBHrDFEBr$^c3I9a+e4l5{lm=DKYM-X59@|djm&PCvO)+JkierD|5NfcpzNG7iKP^i9=wwsL;p$xwxt1o^@3$t%60OJj)~h- zq)FMMF{%Fu{=T=d#X3>yhSd~uiZq#WLir``@#^uB1UxA}&~U(KVk|&+LI%Z|H8t~= zg*E(HGH9sU{)GdP@a)#REvULUc{o9c-!^L~^_cr82blKYyhT?B2zK5VwDb;b#ycEGg;U+CDdS>7d58E>T34uAk~=5@;3T_A7wOzXsFM9Oy8_4D$eN~C6$ z6QSt0BhhD3BeX(JH+NB_@q+0WD|2iF-T~PMuBQwdBK)YZ4-K5@0zG#wyDm zC9{2QOd(+)TCleSMZF`Uk>~YgGp@J6Y)b$3{i4RG(~t61Bl*bBqg?m9A4A=$BxsC+ zzn9)@kxF-9ao$_E^~X`g(MScR#CY2pw+wXxCqEwQ5Y`QCSc3dW`PXI$nAg%$H}V%> z9F18^4Cge0{f2e8ng4Y_rnJ@MZ9<-V;}h(zR+sD1{+!nw zob}3?-p-(NIAve1(4eAbqVA3T#%@X&GA`FX3JG374z~9SsdrF81|(UIY zieJ>RRXoc?+#&-*XWENbn1feh|}6NkaiNxcZD^!<$XC-Ryh{ukRADo@@W zPD|E&=3pH?D3QW*`KlI)|2IVr%%V@jq8M6yhC?Ilit>05lpahf19@bW!geUt7U?3c zkCJ};<;JzKLrq)V$8SLpmo8DyZzogeYFV!sr@+IBtmEou^J$uL+sCap8ozz4HW~)H zQDN!{^l8=TL^AlYFpx$bPk5bZiBDn`>#U>t^)i@if#)p^ock<%PWbXlK{V2?Q+m&} zUNN#;%GkHyJiBI^ywIVjV0%zNihN22!LmDwoy>2qaXatYEw3lm*Xw6YuqEYxPc?_E z)n;4-i(nS09Wp;YBC@-Vl%hfm9dqkiu52sFDJgXv!=?wuZC@Xo{U``u1AsKnDP4b) z+>E$vsqW&y#XUR1@RY@v($4`kb7XtRvphS5e{o?egeq#ZNloN)1mqsc*b(0P?S;Kb z;^6JE(Uhy!T*aZsTbf_IQ)|69Nz5*~a8s<`h0O^%&S}+g^d5`BnuEN_+S}d_C+JN+ zJ2<_O#D>#1%_!9M%~nflC&4?;P|Lh}^VWnpNZx#~Bu<}QDJw`1k5 zttT*KaghKWNmNRH^i072d%9AmP^`iDcbC`7#DP0YWeTLlx(co5UxsSFpLI%eVG~GM z%l`aGT&0vb0m8SY{OEe*gu!c`F*gbdIs<%%om7olrSjD?Wcq5Y*-kY_4H_`eNEGVn zQ>H|#zuzQHTaU^1uvB!9T-LSRX54ilvfb^2yl0WeD5ug?WpgP4LJjy9` zbn|kgH+s`=3ot&{59m02NJxbP^ zrgv>iKd_rkxA+J9R7#N2o)alM5OeP^|NAZy?M-e7-z`2hP05v{=OcJ~ zBwarFJ}xxIY-nnyyJC)&?=ef#MQ#MW(P0IOOYUFArrvz9V+!Bp;>2_FxuCmQl@!m_ zpvQP{-W#Jv&AfX1?VNI+yVm1!>*U+vf;!SD5w_jJFTMxlyLLR@PYDCC7{H1wx+Fl$j0WN`XBt+m7r>OH0-D3bEIU}ulv%AO? z?#^jm)%p9tf}lFCpjWCNkH_D`ArHWp8~O=&5S5Y=vYU0~IEnOWjrncw@btdO@PoFF z5R!DA|1$pF=%_nNWC_PmcZqnDK_lE;)?#~Px~hujY&~#c$xk^OAJ6oH?0yhR?fOrf zjgRI1@@e1)X?blG;df&V&iuF>l%<qCL-yXr9di6%ADWY5tH0dsJN?;*|Q z(W5jN*JpThPfn-P7_p?sOQ~g?Fa%M#%4(ePGQ>ZTMm<^a46r}O*8ZaBs5}Ir5`ur# z!-d827i-ZnxLnULdz4swsk5RwbGACBpeBRzd~6y+@wmUY97)9YbJS4M4EW~`Z~5r7 z8O~QL78Mt~Lqp@d;nqbt>a%gQ2^w9T3ELYt?z-1rT4$u$C>W1h2FW-q{7M;wr_j>8 zk&con-}M|}%W`f?p%;BY4*j=78c#3k+@e=bWZbSevYO@$iXrTpryxtS-ch8Y5pgMu z=C!zIIHg)Vc#4b=)Uzb;0MZ`))UUq8t5SMs3M`Y$v4Yx@c(s;YMKND;95B zU917K1-S233vmK5eGp6QFsZYeK?C{UUoq()l8O{@>rjKyMF`eeetlfRkG!xN`+@&u zc7E)mc*99p&x4xYi1^qb<^xvEaM8Ow6zUSX=pka%-}jHlR#gZca;Qy*wg5bFOo!8e zV4izz=76+l>5s=(`RnYFXTeOAKY{U^yF>K2(|JkS2;N05S% zJIT#ehKw0&+z!A~k9~YCE!5!{B4k{L3sY00Uw<*)w(y6){z9wmvl*FiPU?3Ji&I~$ zxXs!t4MN2uiD%a8-EnCiPj%+pADeMTm-HNPup_TP7$pgOM$*Xt=WUVq8|n z$r$vTZ=C@O&1>(6W7+HPN=Cc&6QU;=Dkvs-M=K3=uqyJ`>y&!5zfVIbZAMj*ae+N}cp%mqEZJoG!puRC$0+s&Zge@`3mFlnrC8U%h`NhB67ywj}O6xqPnr8j{zn z+tTBPrqg*^T7oZXdv_#@U3g0)cbIktY!Ukr_UIHGEaradAt;ip1-v7^RH+A!v*LjC6v^vSba0jzxw7jd_^YB&JJC zvumHWvfSCcH!H89kl}q!zb;MVchO5Yn81j2wA4sjI$w;@Yg~?B89+%pS0Uus&1SM8 zvd~~37Vh}LFYqTupS~(zgcG6Jbz?I=7j~*PXH2@{RfG%%BFx6~%*TFYn45l*-Xmin{ z@5>s`?!(s$4~Rzy(qG67m~)y~V5%i#cSG#%V6T*>f_Z(BPn>#T?D*tTxg6ZXYZVzc zuzY7YCmIgo2c_4|Ku9~7EQEgH@vJ9_La>~1dnh|dTxdS-_AAX91B&}T|Vu~{x~ett)y357S&mb3KpZ&qvkW~zU2jU4|e-10GQ?~l`;l2 zWy8z)agr1{o~Lo2g!GaD{m53P+w0fk>EbVYZ!RGY-dv4{6NRBiYdnaZ<6V=_n%VBw zbPyD}Ub!LQ8Q;XBo&fsMSi!nXmB}pefdp-@R3FM7F4OYFy#kUgZ4QW)_!9%yn4FT{ zDvTiG#fSSoT>n)3CzAQ>w)$T-R2nvS<-O6JK1@+~_FCz=+}2#ZplKep;WaGRm-Wr7 zYyJK21b9*yFNLi$ZC`}02iuqj)@BghejMOmKi?(fV^7=*kr3nBySXyV2+rUPgtjN9 z|HWtPju1%N0Py6C>&y*xxX zQSUKnv|PNw&^>g4YH|h)ThN^)CFnxSy2&0c|7jI#mUX;!fcFa>WtCntO;ndf32+*< zz4$bqRf*4EH_){e?Za;w7$28=_BbZqR*5Nq!qUzA7ZKo4ce$wUj%9-~7IT}g8Dv;^ zyS}|qcpg>E$Vtaruwt3=j7mXxqnP)SFGnkTcaWPxK1pJf2H63wSDlvUGNwad3${_|BJt<60(6X+9 z34;m9N~o^lh@WA^(2BjlTsr7BF%wv2TK0;(d3IS5% zO@YkC*J1n`o<6ODQ+BS;@UcVvN$i6UbrB0x2|@~=26cIh-iFTiEJ_XlEHJMF?^W^4 z;Q`$A{#rs^@2QFvvcJ&+UQtav#+Y5@5z6b5QVL1A4>QAim4d#i;JgBjNePW#BRpSt zJ{2KVd@N501C+e7mP4xJL5`!S8aeLBVgK7Md^~{1t!nV) zs(knfAg7|K)Y%N{07B4svxk{dZM^%N6MBQ6G9i9owKmr~9LwQrbV$JK&TI8<9QiZ6 z7(X?t$z7%LUOmo&%+EISg(0!@P4O(5zF?6d9+OwKAZ(&9r}EREMxaB} zpKLE0$>!5KT6|2rn`-}MQ3=bRd3tR5XZ>5+-*e+dy+$#X(p>CSWNEp>@P9vd{4SXj z&-)rer^ojuiedOH)*csP^c=O)WJ*J^i3?S^X=slk4VyZ>#m?vEyqpA}`DaF?bSk4Rz(6A{(17rKM*1j&b8KRbsoAs4F5 z7~248uHW4iDN|lL6lLO^(`OMv ztYEg*+8wiSm+-T3n;+s8m4pZQ5op1?LvUpO)>yq3*J!usiyiLkX)9D#z=frTsMLQE z0?AjM3F^~GU(BOGhm*M~NzG1(hp{2$K2hM_>BY@H_0RMoHk+WcEBk`;&M=wt!};GE z%g2F~EH$?GXubk!5l3C9sEdyH$pP7GZi+#;kINc{%WuEgJYgZ@F-5k>t~3*42nAyM z3_L9_t1$s4!lS2)r}J5UOCGP~z7s5(3#~&-8OkJ8z6%1U{Kwhs(nD8UEs30{{VDh! zeXS02>OOd!go=jFV~v}u>coZJZZt*z2k9oTqlItVlipnWV%7Iy<=&1w%|y6V;p+dH z2}OYo>lmUzGiGW*Ag#7veQ^1C<3%hsH?O%Pf)nZBIn@e(yX^XP|9Cz<5EV-eTeMr@`EeHL5?F306@D-i8Hi;d`d6jFmq;{FON+Y+Y>`hpF4fm+syJ> z1a?ibLM!HtKPqfaZp_o`X4&z2ye2iw(;p_GsHIjUQk zpjAi%5W#|%-<)wnb*@5pdv3D~)>Do92UTs_rvhuDx2euw8gbITco|}c!9p~7r!%^! z$m72Cc4S*A%2wsoNxO;d2Fox&DKy>Lj3M!2baH&i8U+y*y?RbfM0>^ojvJ4Nc$T)9 zdI<+#e|#9%VWAn26m~sXc~7mqu-4)GY&;uNrd*VdE5PD3;f=@g8950LFtThSk)%r_ z3;K8#vWl_E9EaiHoZ0oXN?qO)<)js;u~(Yf6#l;8;PhAcKj(9^>+%xw z4JaBWE*Xxu)4V`BL+2w<5nolKVARU2sJmDeH-9^BIkR8hC-^+o6bSgDkn(&)T@$9B zF%G@VN! znwJY6YQ;;E@@a2@Mv*K!%}yIP!f*e&g&M@Ha77MAsR@XQe=%G5|}`NW^N?VKylQsxa9s7 z!uCKN)#0qKeWMrrECyn+9ez7hdEK!zg|ixwk)t)aA681PCT)WyI=s@|&YCn0Qv9eO zD)fTV4DwD!N5f;;{=ZccMG3cu;Y3Wb0cRxwdmBd~$|-&Iv#cO}>eUIoeCfxt+(+G` zhC=$Ce9Zs-{L(-V%qb@irV~9;rg&9vy%3MoC=ahewZBJU3_*pNkps)1t9fpu@Tv?Vn$`_a zBBb4RIV~0Fh}QDQP+C2gP#Y;HuAi0)Q- z+;i@ab=1?}q_8wHROrx%Flz7+vbmpfr*D)HXf+=6Tb~!Fs#zM=xi$+CZm36h!*^AP zW&8|ZKd0!Wy!Rv3VsyF$3X9ENJW42J( zfnk58taW#(@>S*%_NAo9zM@|H+lThlu(C>kG!P0i7-l!h7i^P-tlU`MQpIyO zR?dDQA`=Bjb532X*x-iJHZZ8SPfI9{>J^H9jb$}EF_U%q!UeQb@dC6nY7C; z?)F$xc5@Hfc_f%c`85hmzZQv7rt`Zi=6U+%kU~2!e}5Z)7KAask&dA z+;18N!#I17jLJQfV%BaB)&tf(em{piwqK*_pPmWPkwbgt6OwO31tBCBRYEen)o47K zHqG*vE+u^=p{9m^%n9Uq)p`73MiaIHj%g#cfEb`i%{W-rx1WAp7B#)Nr|D;aTi(D8 zDlNz**gBS43|)E$p_$Yv1c=?ErdZbPNhDa7bl|`QGmXy%+is;zfhc6bWshD@r;$8i zsNArf`jgtn(uBso$pQrdw%=%yp`?3&o^H@mIHdX^(QXZ9pDP1f-gpy4G%&`uE2B=l zFHl1cErDqMP1(?SkK?07ar+z>mfr>7J+cx;!5SHe);QRSJ=U!8h+4p^bUeGL((@(h zll^o7twJ?S?ZXdMrkizxBin=ltufZ>1{`S;6+3ngF+Ez6YCpQwl~JA)c0417zN5Cq<*NVmAUV~|L9ibnGV9b z2~#1TvJL1l3W!~%6#s@+)12lm1h0xcUXr;wNTJotlJv$0oSt{gJ%r}eFOqHO{Ls|F zUNpF}JW_O`0%_GCWtpmXZm$kANnr8g3QMFP-}j=D@`T*IVRhJNLg7s}eWC;N{l&wW zgp;8Ly1$l$=>3#l(WR^}jiD@hUfSMMM}%^j9YV<|%zvhM;sHe|NaxM9r<5JbYyltY zZZKr)%u0w2K{y3-^?;(iKZfeut;4|D)D$xEI&|eg^vR{+Zhr)#QZQdjYikQ~+Bnh% zVc3dHmsoufDa8e-YsKxRu=4LtSn3qvX_~KC{HEpPrNQ9XAb$s)S9ZPp4|dh^(hPK> z;ay#qVK??IZoG_~%kstr|9@vvM>yulQT-(Phh)S+xeCGFn8_o<{e<II2-c9UV=&Vq7+glh_dU4Rp&`sB8PFZ|d$09cnnZ+~SMHQ6fBT7zM9?_-};s35>kGiXCWH zOpX^(=kD)N)pdAL_*ZNB?(c7`8;3@mmWO~~vbb!CAj3w79>Cho>`wZrn}`~Pw;Vdv zI8NAuDLZNMEU2ukEQBp437&S+1!Lswrv&)>jA_{!V=LaWh?aVKM~B)m550*cIx=Z9 z98UdD2=D5+IFa&ht>fk85!MX1)7iKf-z@yjsRb%vWNDbO>vwRdok@9rO*xX5IE$P+ z;xq3>S=8tv*KzzMRraCGUiC5KYR^T+k{8tJ;geJ`Iq~X6#ccS?FL63{!QvFbVIY4~ z`**mo)*Dl4-tU!q)xV(hY2A8~9Zq*)1hhEzKE8ng?QcUB{YZF>(JBlYCl7ue{{7%* ze#%VunFZAN+$L=ml4lnd`_uGf`EJow<-7zTvDp^_;7~9}Ft<*eVHU|KZ7=6?skW_X zSdqqJStpay^PYsBu&yO>F7@|Dw31%9?D2t_fA~DkWT@qxRLT&C`;$HK1HBL*Jbfnt z>!HGuwH(>SIwK>CFWr3N#fC+~(bNK%iOkB8W{Xz}r2^H$sa(1=d)v=acDioc{P}!M zp{`fw=dwDhFMs&~4U<>TtvZe53tX7(e8qsHz0kpWd2D zF`l<_B}$|#{#rKgm3z->l-GZoZ{l|Eyt35|#ekCqtQgQT?yiw=8NR%q>ockGNWeTg zio?Avd0)hLIQ+rE*v}W)ojv?->gh+1WKyApJrg-Jonjm4Omi_a+q4+uc}LZJyf*WU zTQpe(Z8JV8l*Bo{r9CIyPp1pS%0E{sA%6krgi!-;jzyzUp4~gyxA^)6BC9@?Wzma5 z$e|Iw6CFqZ1}GE$Ox^>I2IPB}SKY}eX z*Yn|8(oU}xon+i32<4NxRh>$uYqN>|%GH&W`!3GqG2deDU7*ZdOor}2n9!je>UdU1 zfV6^24`X*i!g~>EkuSzXrL!E?#UiiXdo$FGQj343Kt8teSl-EcLKrY4{E^Qo=4FLH zYkN6w_STrju;>Fo*n8T!-JZE)%HZh~Wm8r(&OI(_i-{C;z5yPQj+y%^9clfJVnsqsx8TfbK%EC;MYN_m8GY)#>14*9M>;E~Y*-7zRPz>hLz2Hd1 z^5f3TpvF?)>Kj~czCvmEy@nk}D>-1QjM({;LG8z$o=uG?@c&zQx$luL?nhvX!QbxR z&FC4u%i42M(%&eSX};yb5_H8xi4J1iALJ^e>{Z^u%)RCJIll0Wmb03rj5OOG-|0X) z5fHc!BBs?nR5WmI130r%xs5}W2QSO)9IB0_-QAguV>ACQa9;uGxm`nA)=lOry2H0K zyW{}fd~^dB^+u{|fB<$R^&+xM_}&6@i7C0BwV4ku5BYgvH3e~O7?CX^$&V%UU^!KG zy?L~Osdj5^DGC;qK;exWCJV#{A1T1QQzxzOqiu3O|C3w)&VbF6(s$J}K;U%W%e>I+ zDGl0kKvbPr&uE92h?wLWL34Go0bBe^)4jQNr>kEkX$UT{loI_6FNXVqXY(6t%#;PpSwbYb+D)Mq5CM__^_Y`;M=SQO)uN9o8voC(T z&j}|t-QnEjMs=tF;S{Xx&-3i>S2kL6rM2n)QF-j++Xmgv7oFo1*TpnHY*9Wl4)s2b z&h1C-#%;6XE!wj;WN`bRe3MUEyC40s-j)_!Q1;a9y+lhgVq_-d$8nlGQ5@olj(Igd z8B}EOUANJ30Zz>lc%_N|MtgTyW8T#1t;X5Bo!lyei!>BUPDj>SKqTeJh9KIb7ke_i zv|f6M`sQPnv9<8}R_or@+d-*O3vc<%HU zIxJW_v^o$}`oj_$=>Jn*+YLf8sSD2i6i)kE4o8!;CQc7&;;0}a98l@EIX*Pk6-L+) zy}ErKTvJMs-&hi!kD%D>R~V^0CiU;k^RYmUHuD(Q)S`ofPT?_9CP<&NqrY0BL9FQ1 z60W^0+LOBh?>e;uF8{S{kfUXga_gh=7_;4I=7GTRH_98(cs_Z|wvgcN*~9sbu0Z)S zvSG(Kdeh4dg!nmO=)GXqKHruZ%={rD=4eRZ)t?J788dJL)tcby+O0`I7uH^R2SGYH z)bZ^AM-4yT{+ha0Lm)a9@69?)UJwbSBw(OnoB|;CHJyf@{q@m`^+pkX+!ygcgJ))I z>WNJ3Ouf!7cn`>kEmqT~K=g0csw0g%!@4r@C9Ii1sU>W`Xg81joV%&tuv!jdFpE*V zS0U#Dfkj{{&0n4M%6IYL(M+8_g1tem4<{ZN8h9W##i$niAwaB)mHKl{24(oC4LKk# z%`o&QKR_o#JzZ=UM`KNStHgKwPLR~^@S%RNvWcGKnS?ho`YO!rPx^-FxDS=(3$DJ1C}d)qBnDh z_sydj!2$9Pu8Ut(M3V|C9sIthpqRe+%-yChSfmntgCF{KEUO(?Hqg=j%NkRJB;jM~ zv-d?x*v`uq-P5c*rhC`&P6UT`ZAO+$O*yVEJHz-kQ-#3`;1RsunxB#`UCLvIM{)kRgqcM zoKrj?1c$0S%F}l}p`+DEqsl?6 zr-RpdP8dER!n`0?j2PFSl>KWeq{hV=8 zQ6A<2F$Q{vvB)US9X0N;GaQj8i3@++n^Otb;!GQ7d80&2C8x2dkQ{kZsvDklZmH>s zN1~4h2%(f9VUm263@Ux%=RvhmFKvr#bsyJNWh}?`vO7&!x~5s$2y>1jGT`9+`{#ip zjgW0BWB;LGlz(ea#Y~t7`j%P7jb9C!qF??Xo?MUPm);b{F^YGI2y-bGct+^yCzJwV z1!K#s^Q`9oqaD2u#?tARy-bfwzBv0jR5mlPmmT-3$5+3h)z1Ad!Or88#PRM>N%?{a+~Y%BYss{A zcXCOsp0BjK_v@QSFr>|@r?V{$NZBL+^&AtzMF?&w`r%J+@k1kbhN~I;aqvR{kbVQx zEkx+rmQ7mH+Lr0+V^`C88 z#M;spVviGajPLM#Dku~WlW;b@z1An+=_e50hR-slcFx@oeO1O&{GZ3?l!7jtkb-Do z5Ia61{z#1*GQEPg$>L&`@$B)pi7?-D|I_=(miyMt-{#r4`U`|Qjx-oO4_$GCkC2fC zS~)_JksJrGgBH!$4r5Dw%IA;#pvB~HGTt2_*Rm|I1JG z_JRU(R1c`)Bnu{{)7m(^*4G*PAxEYz&eTLG=Z3Q{QD4A!MqdgFS{>kudOdE77_K?i zQ|EcN12Pp{NOX>=ZKwdq|9H#!H3t;3)8q^{nqm`(p7>n+(c5o*z(MpGs2_uI}xBF9%W>)QXo)={sF9Ax-1fL-*n$b96UtW|pPIad`)~Vn#vkr|MP#buv`JsA_OE2~81aMP zl=T4f?f=_i|7W8mQE1S#6JkWyGm0YtKOA(-)_|z8XT8e*a1_aKd9tO06p%L^N?bnw z-Z8`9VPHRJLF+}4??R{pY=N#W7}$!Dmr^#sM(p_$pc(5?{VqcNZ^DiVUa3zy#Uga3 zFBs66KGtq1lPOyL=9}_fWnjkq3D~`t>yN1JS=XO7B{-|^+1IxkRaz>?5|H!9Uo0UL zH#lTzeAl(w7SobHxJSUxZsK)IQ2*Bx$Ww?a;LU{w{pTD-qrI)Y9~czJ5JI1Z68^rk z#((I1=S;N9N@8&Y!a0!Fb9`1mVga;~!CaId0@l?4VQM|=;(Y6cBQcQG{T2c^U8W0I z1(Wfahg$~&{2aM8WKidl=ZsKjjS}m7GAK4g<;K$CcA0Y}5;I@Q@+p2RXdH7_EcIdV zoN#9=2p!5S&wJ{6!69T?g35ZD{094%LstInHdEy!GSH+UOivWz;H}_|K<6n-6<979>l+Kc9O<_h!X>=rq$CDZ<8~iZf>Q++k0|Dck55eM z>6m~($?_H0hYW`n8cz+dCg^eK$hc(6;}yxHbzfM(DUek!-2rDBtZURsX-ofj#K!Oq z);w9S;3+PxlurN%dsCLn5>xGmAU%l+UnQDxPrjxLWAoZ(uVSOn584u<3F19sgN>i^+npiU`lC8wlmY~TD!R(u6_+e4(D=O6l0zEnPq zrF8t3%zs*pcz73S29~Jzu|j_eJEhzj#DQ;XqX!2C9VA{%gWrO@m6DOV9mJA8LdXBc zL{0#ldM87*vdb_2+AL7~$Y3qv^%tY*MEIp~Xmc!-nu8qxMxDFGgTg7+xTi~@e1}9c z*Zparyh4uE;kU_-N6iuw5h7dvg@ARJXyye20W6TTWs5Iz|HlO1DdDnw%XjhbGhx3_ z|I%9e=ZLe2)JXeeA>(=V-xgi~Rb$>o0|V_akTVZ+YvC>)QZM zmD>_$$D`=s0>Aboe2CoSD9Nq!L4_SBuXP`z$OkYsnXJ&xkSm7&RY7QLgIhA`HB^tz zafhf%iJr0}dV?tuJgR|l@*5T!U{ILf>j_;p(3Z4ZJ)vkC*K<1FG=p%TZ3(8mTt`Qm z#BtKK#U^f@7Ty=!Kd^52gLzG>yh@omo3&DUzBa=8*XGY-6)xR^A>lBI6 z_cTQ^H4_lXsGuN1kjl~78R3nuPtF5(CxMn{iSESrlVMk8TI=cWtvlLdVPBDtbGz#L zv|^P*o~mQECQA|!5YGa9sOZM=jAKo={um1^b7?y63Icy~^&z55+kC!nfJ!*%{A;#$ zCTu2*Yn-4VW$d=!am<*S81Aw`fo_qdUFzVB6%<7LwirP6MOGDLT+TyZ*{zvv^4J#kawE#U`=;6 z$y|2)ct6cpIFK6WxNbmHF|W+{f3&BNpMo-nhuDdQ*6^kHMBGa(zX#ZZuo&Yr?o##t4?jXia8{=F+ZA22u6fiHq1I{JznAN<6$KKrggw|Nwdwql zkxaqPMAdY)$xlCYze_Z$Y=7@+NbX%BogA_(^5#J#Y6ieYm>-iNB%J>LdnT2S`6~i* z^;LT}YlW$4fax`j@;K+E4|8PzZQcsRXFHmew+}z&)G5lD+W+;0&`$=YtCtU{R3`W% zJi@U93lY9_#*5h%vGYqspK3-Qze@N$Y)#D0;`t%P2(B@_qzM8AtjBsXL_`S`;17lX zH!cSdj;Pi6r^_mDw zy;{TvstXkUPQ2VlEY|)!NLlF zdk84g*gsR&6eOBzgj_5)N$BP^C2%gDs_k-}exB^{6}&4oo7M~e8bdepVTNV>ic+uD?&WebLPjL}&9}Sn zcZs7JndD?HAIJ_rOIc07Y|+GLUFMnhull**OFA5QgTs;AjiU0c1zn@bTuZLxqtd}} zW97Bq>m8kO`5K@1(tn7e8%)mp7#_m@`wjF~-M4S!ZyGBfkVu+UI?i2mkiX9}dKhhQ z5>7Tycb5rB5wte-!PF=D=tw-sB-|H;k!vv9q}U*hf$ZC)Uu~sX(VcsKpS)b+5%ifz zQ<^vjQJSrie#TSDKOU}8sV^i1UMTodWO(n9z{28CMfZlz2ZdNG{}6S|$Y0L&JpFAY zKw&L?4BYCxwC|eA2S-NYFwCw}UVrmSnXa|q3IEF0Fx`1|L`<1d`!cdUGqRm7hwd9; zYxre|m9p^h)2gL)qS9a0Z?(!bcVpCQ-qTX>Ri2^kMjo|P-FBAL$oXmgdpt=FH*{U) zvfWNrr1rCEDgP2YZ6)6KJVGx?OHbupp&m!7BJMfUV*V?+xTHp#Mm&LXZMcJ0E&*gt=E`E1!@(9B>h zqMvs?mMcim(Le~ou|jAFT&c7i;nBT}Zd|YUl$bUDb!N^xBB(ebv~ND=`J1eiq=^VXqMp+h(>g){*&h@+MqD!%Ahmh!#Y6`Fv)P&QC-KcNuG^J&+p*eeTsxKH$jGrT!gB<8NY|J;@TtKHK<+?(jF1NhvY1u|D{xvj#-@S)m& zQ`rZRFQq|SOi=Bi>YXDwLGye`zDu)cDo~r>X`=G^_Q7lhDW+D}4m)Ad66>mBi3gQc z|HJg_PL*ABLaXvjK4B|z#`t!b!sE%Eig=oglhkXJD1Q%aguxZG$ZBP4j*7j7TNrFbZms92HwLg?=?98%V-=VCLd3pSh0cfKfx+U0^SvwEx*|D*ya2kxg+k=f@~tsKO@ z@?%S14~T$IivKBNbXg}?ER9kt${28@cYI5nS;>S&!bNia;S=tv^ zh!zn!p%=N<)$^>sr46nO4W4Rk;v~ap%kN(+u6)wvxqD0Ug#7*bb6hp0oCL@He5+iz zx`WB3kY@!_T=~tos`-_O1IKfm!NlhAelDdf+_x0@EOjc9TaNLiR-I%w#!u&s=OW8q z<-&YFf`!a{3c9^^%tSZj5gKkXjim5)&f~D#gu~mv1rI1!?e)5UKCWokq>oxg)S9Vf&Kk?fOSLg7}6`h^r@zn6_;{N?+1py)B#(bOK8*_;r4fZatX8D4L z(&|F3&Kq{K7^_QAdCLAye_QB$#+!kVkCiG|{mTj;vrDjMg09Q4V$OAE#Tm2SqT5WRtwDtDd3BsDUoyDp(<}IX#V3|87UdsThe%s2sIEnav z{3CPDb7;>-j+aZ`+FG&yfz=9CS)yJdP%Jkg6xWM21T_%E0iUfMR#waG33Hv|`{Y+~=DYcx%|8KdyGxCqxzwMJ* zJBht!@$rqQJ244KP=<3F-~ZVbbQ*$aF+*+lvMJ~Oh%ULO={1vouh;jVqD6+jEFQ5h zu;oZu^5s9DXh&5D`#j!tAfIy2uymiDAW=TAkVK?kgQHz}kzGZYb>pP=g1v68F& zdDa}22eb<>ttvWP&ieyH4PK&F>-jdHd`4r&PZzUXq8awWON^`!E3$9&_kDuS%yIIX z(;REM(F*itd+uW|mp8s1-*0<_ zHY-h-g|z*NUAPIGT}##AB4yh~7b0bM)A)a0dPh~}*WPCCyx%;s3$^bDcC=gm0^r0D4kdlS+>F;-6UHaS~i<3ov;+L2mR(t zQfv%_&CL+be@9o~HK9|p?^MA6%9bhd@2t)D?LF2PVo`g~srQ;eH{z{((WPPQAM5m& zO`rF3)u`4;y|a^#$bezdA`%lFa{M&@f82d_SXArRwv>QK2neD`NP~nRoq|Y*Ae~C7 zba$hObW08jg5=Ph(v8B<-OSK1L&LX+bB=O;ukZD~*LQt?eSdh(#+kjJ=UH*Dd#z{f zy+yxKu4zlH=Sx=;mQs%6Z!qY=Ub~(=8uU>+`6;Spb#uA+TgCa`;Mcj!r}$Zq0Zkdf!(aiuS^ z78I0F%nh1h3c@S_@e}FV%)=u@rN|zf`@C}5!JQm*Z26WUe23wDP$X*NT~z?$$w`{$ zr+jKx-P(<2_FOtDI3XUlbfZDEyy{aqp?W*p>e~58!W|jO$=HelFGF&vPFhs58yVK& zL4t)9{Z{@?M9x~_mEs6EPJ`z&D3s*Z-h9BPr&N^4kf%K^cYITXwwLJ{t!9mUocDqH zI6P?U0w&J#m0E=c7Y-?|e*1a6FXUE8x;o!%nkbhF{x{4G4SZ(ilMfXc(JuFwRG`qG zclvt;KGE0eIL5%UF}8tdT_Je8Jlv>pJSWf)^32f+^*@bMhg@0scFuwCaaL^z0!;tK zR*aov!e17hXXM_(YAu|(D60;9hV%^L@V7+Vh~tfwGxAT}%&!!z@QCC(HHP zol695>s(qYXrO3t-(v!Iz0UwKc#U6*`L_>rH1MTeXjZDLuPRkM6St|9cJH>&NzHcH zjXaND`zfsf7i#qg`+4kiPyKd7pKsBboaQotH2XjzLUY_G%|Y9&q*M@pSfg`mM(Fwe zBX(8-7J?2m-@`it-W|8*5o(#jxzHKW=<(J)qSS>f?}o242YENxR8E4Wd%Gm(!NKP$ z;^SzAsK|Jl1{5m4#;QzzjUR825U7FhMIF{T9=r7IeI52zTso3}ISeKiMz7GiXVU%` zB|!25SEjUL6$rtZ^#V3jNxJ-rH>7nS#;5N9f{$hCe;zZ$myyUh^61$=0sAs)6NpjG zV2IDh3uN;saw%8HexEa}2iYn1_pdnwXO)%|^%Fw43U%NF$*|9(q4pb{=K=*{<`e17 zYf%&u_ymE_4Z*3x`Gati)+qiq=&KOdHC@dxF%hhmcAo-aDcO@A566!FUk&OR zBW)Nh6JD^O&Ix4Wf;rUlwGm0^Xz;h%&Ujxg?nsMnjLdxE2*-r`BE*DVce||G#Y?Yj z!tNAR_Kc|s^{E&oUI4H>h{{auVs<;_i?#Zl1p&oLTJ60IxoinS^TJYzz`@S8YRI;dA>*1SjcX%Zag!GpA>zAjd2@_y` zK)Vd?K70`dwrNk`M3ZSh{nXS$p{&-i$5_dFcA>`+x}W@IDfZWl4FgqcRdUW_b78NO z5MxwOhOXcrl^o{R&FbjU?S?42Pb`?1Jl}s~k)|}eINsFC&ezlQcx0_GsSy?=mWgWn zC@n7!yic*uTV`_Xq+7Brm#uXe${c+LbMpMAeHsaYZYV0}Xq-}iesK0KK;9a62?IDerP!rhAG74-@rUk#;+Qh z@}bEh5%7t;ccRw&;+kLTJ=8lJtzvKF9e?}Co!6GFTLGpuN^!9~RX3k=cPc$@f`1bj z3)>J@bGLYNGFXT1Oj#UWDP+=SDgSmf6zBXDYMX7XTo;h|C>$Xz^njwBqNmj8WCRzo zw)-I~KS@AwhV-SItEOYt|J3m4!qtB(Gq#CLOQ7t@fX1soh z^gOq1+-1LhDF#R0T(^ypO~}pNa7k#{()BzqyI*PhenCnUkyZSods}NyqOf%oIm zZ|`b-m^M?+Wz04(B-PV%ATIM4>;2n_0H4}bYn0VgW4QCpfb~gwdu^r&KIyWCpOsRO ziwE#sDOMx3N4-O+ z0C^vgX?lxwsMdos)E=90uHwDdaMq$W8qoPYyTJ-xvh$3yuo^YxW^K9bbt|iH=Z={} ztS41fuX+o>mhv}-G0fH3R2rg`A?rNENODf_I>85As?t@n&&i`0dvmh5uJ|@;E1e-N zIt&yL_U$6Uh)ZK4S&6(v)Pl)HK^qo?--0=hwnP00cvUxWBvCN$Z=f#c|J1z`2jd=) zeyBI!f<)&(dYaOEOmZ*D;QP{n+Z0argbwqu*;{zp2?pY$;!UfWW1@(=5Ym!2>+yyX z`2mhw8KEnYLc?acfv1S2(@IeRJh~CI^j&&Zr4fqW5pH{n&3<6+Svm$)=T7ta>t+-} zmarpO%}Pc&LMdHW`s~)z!YA-Mck$X%z*RV#);tFzwd|St79u}>JdCWIOg8g;(YP;7 zPFfJIdN8oXQlcN--E}MrdV)FjMLH)p7kn(Cezsr4Od8CKTrDpwg&)K>41-Cqv{GtW zlIJ`*6BnO?ntU-rLAzv*R_Oa?;GZ8i=OGP!U#^4F#btH%hKT!}2@T?Vj^yM;JCG!?;YAc+xXjh6+O;6Jzy<8ZajkGeI3?yw*(_rUx>;~ zSq{9mb?ZvBTxRjQ(9DVRNG0BT9o|AYFS)t!Zg5S;O`%|&U%VPIQ$ALH7jLL*cPmqd zkKiml(UiBNP>f2K!Euh6<@zB^Ey||1%hYYB-)4mvnFxM-(U?CSeAHF*0sNHV;+d#_ ztN=^v!b4V-TWt4yhjI9Zw<>bI0#lFi;hJG`>(TW`L7+z)eojwnsPL}9FOrM?@lP7O zrw?vHRBb{bI)&#@<}YqVkh<$q;!Usd6C(=)LWjN&dl$v#3iH9^jdDhwOr3mdUQ!pv zcZ1}0kJo9)w!#>Rq*|)l7W7&^htRw;vv|qrE5?OtUM=h<2#@Q+b?#YpWL*VEwTdc4 zS6538tKAmwDN&5WM&DIVtGc8;!`j}Zm2%8nZ?n`y>)CNxF5X6uY<+N3QYF==!=1Y; z$C)?1_xSP9x721>sJ?$QH9YWn4*8`~_~-$zzxJ*YdIsLI0w3Qdv998StZ>+G3v_=S z8q;#9s%(%qk9O%hj}_=I6R;3AU#I5V!AnwLwXNdsKmO*k7RlF!Ny!?a$=>7ow?lF) zJemJ=>5c@HgbRNBN`E_~sgN`|5#X3e=X<5gvGTV=HicjY83B&@*>YY&(+}#6WLf5g ziWjUcPdtTLtswUbb?DGwYm0oZ^wnO~aIQ=^d1{JdSXScLFi!E$8 z-pN2oAiw5oRkUgQIaJi76;zT@Z~FP8%V*PWRlo#_iEMJtkL0>eq(|zLg2991WY)T8 zF!^j9L@+buMZC`Pa*%Cva=(`+AEB~$o@y?kKWr%S14<^95df%EIw*l35A=cAoEyn! zHK+4fPaO81?}U2x@tWrzc~RNwR>IPS(HT-302kx|3to!n^(2QdVf*21`oi04-{U7| zXEtg!40}x(l>dN7=`x6}o>J zVHmpFWHJUij+RenC$7G|#Zg*-Iw^iECX&2j5! z1@7hz|62{{G`NJE=W7V?%n1S7D_%b)q|lfNJo~eN0^z4EJC|0=(>=O9<2&BbycW9O z5kjE1P?Jh5Xdo1L`RfPQl#uid6ijjl-82eifRta!{r>20zcNGQx2;5&vNiY|ZZ|Tg z1~=avyn}3NWT*PV)cPAIE527-PqJ^4f-7pL36V6)ce360HGUJGBe(8f{z z&N2VzasK9D${Dte6WQ?&6op^;rsyvJld69~tn#y>`WcVhU3IpMx}W-Qz^>E!opLnn z;k8e9xq{F(h%i^FKcyP3VQWB8&gY+cihTEV6BEh+N+Q3=-G7TNf3KDxKI;YZad=Er zm)8&`6b+2ydR#rA3=hmZ_6&Q9>R75e&b0AZX@bd>agio6TvCL@HtlS|2asRw1uTrxr7!=?SP(mrewuO0f>&3`O zsdm`Ije4Ut6!_2Q=wMJ7{3Msc?;a*s@*~RIjWltBBLkDe(^aExK+C0imBk^CwXH)apGn5@47crn1{R#E2uPktDMX4x=t%Pj& z^i;+@eGaQ}36xw9G{0Kyyznr7y`wlv;ETsTQ0P|m$NIrR z_zCF+v$SU#P!7(E=bcagqG=Cr`aylFhTl{aFsli9b>r_6=5I3hrz1j)$*(D`6IJ0Z zg83%2m{RU|iVtJ?C_&>9m0_;v1RNL-K3{+HO^XdTmqaej?K3(|Ju&wV%|Auk4aT$! z-Ot5PD1`QgkeSmgY$y`f3A7v=-si!xS!_ zvvA*$Kuve#2c|wH@D=jI#q&ei<;U#*{G(k2yh%-knhkue!7RW(I%miIjJ@7~{z8mb zSMC1mhBxEZqpNUU@_jkl+4zVCZE;-4jgV8~UHcE97NvOxFlDXR?m4x=E>|co{qcs( z6AzKBK*I&~9m@YCz<-jN_Jg#jxI0(BN$Wu01@8WAzkVjQx0cb7LmeOdPciLxf%`v? z2JWKNn_7obOg#8UoLE!?Md-UU{FOz4vu!iqI8N@G~H;06`PD{2?}5$9+1 zXZF5qDPt=E<;+s}O>MXf{{z4K>#C3G(i$eMy|7?IGan4TUOR!jNISx1^t~z+AhD13 z7oI1oXg15ZVdfMi;Oo+R6DUgkxD_bi_0-d;s12$cBDB=9v@8VMdG6Sl!){u~(LRKa zqzZ_aZT>Bf{$}p9Z5Z~cf0KqDmk2)kznFVqOQm7o-=x8x44<(5lEeERM%*FoaRo>N z(i)L`qjKz!p!3^VK*p0ZusR%VG7WL(-uKTwtKN||(%(ZbzPR^1Y~m|t$h@j=ssq$& zXF~{Y;h@Q>{c4dMoG|aa6`ulPRH)PS!mp4wy&n@V*6>YtH@9fux0G@iGI5(506 zS`6Mb{x%{XF<)%HmL(OLMk`RIkO*wj5!@ug#OsFJC)BTOJ&kZp_(9;`Ho)Jsb0RF{ALtUXSnsIY>7%gr?BIy9Vrb4Ns0kIqMvf%PsKv z6UtQt67chZifT@#62ieR$Q83}WdvA9SyEcR5_)o-oW{PXit|oY6Gq=E=zsUG)2>P# znetuceL&sd0ZGSc?w(=B6FM}F>{5fW(Bu2FViNi2sAZBC?O%TZ6c&{-IVy!xf=jIl zl%2pau~0%bLj_K<-a9`J2W^-4&W@(dtbP!AbVmK||1zTM+KPFSfm#oiyt1ErqK@2| znGHnsdb17Al$tH?G&2NW@v_CQK8hHKW9F8n9kP{`XuBPL=Hg@B&((MPiaqu~s-YBq zYL9f{TQd#k?d((G&rt?xY(wxxw5f{pKkfa$v-{6{3lc4dsX~LtV7U_-$WJQAHhSOV zx7g^g-d?gRg7XoJ#CDman`c@x)-BVJRa3EHh~@nz`o?VUv?;RWR1YGty$KL*M+bOd zUx;lQ#Q@&_nvMKT5q!e{fKr-Gm|3x*&2z)hr$d3yNb}L+GL!KU<9)l&aTPGd9^q6b zQ|#nq7zPf~0cD4b*}nOZZ`0l-hqctV)cA6DWrZY<+9>=S-xu}an1)CU!e>ue6q%OG zyUS>N#BO1ou0++i3NM~r+X{9u_}}=PfCV|0*;Kvbj5a)ynTCvyT!pF0F*ioB604qj zj_}ssc8Ih{Dk79XzE+JKLSH?KSYq7=bOKD8XY3~)FH%+}c7>)j@tc;ll7un1kklbj zW?GYyUf(uHi|aCq-d7Q0K);8!GedF#5hPxQ8>lCs0Xx@F|FOHT8Xmu5`+Q04P0fJ- z3T$Bmkm1XOq!0x7J;RMBNP>Z+6dV}9SN_~2_%D{spPc1t2&3^zr$pSe;CTgXkNk#( z2u1=B@p`NZ4O1l@NVMhFfiF{cKYN((*}*^(2USGmHDPqKmZ#L{1dN8h=Im8jKK-ex zQdfNLxA$Eh`R=2Rd2{PiW|K4wNz;-?9gj1^Jw=4kvJOme|Jl0&25)6ei}sW2Vs4#t z2vX_-tSF1br-csPtg0;Ctl{&RH?(t?ig=3;Rl~qltG((@0XlGTX=9xhY!szW&SoBw|0UMmoi*c5;!CS3{BFUfnw;FxK=5P)@Qd*aGWTek*KO3Ay zzI4Ayi(V1P*?z?+e+?&Qh>IOjS^j>O{~;=tgC@%S+rYXq-8;aF4Y4bA^><>Op6j_v zW)zrKL4UAVVKWx@wAazk%@JSe%|IogVomnaohxp;T%q2xR2qXe_URNH45R# z?#a206M(%gs4bB1HD<}k&CG~cpJUIdsGqX7)xw0^BhpmLNswjYe;4lMQNpw2ft?&| zOf(e(jjvJuz1)^ufZ8l|$5{TTBUfs%d2Rj4>x#g%1tPB5mZ43_vtb}jh#1fsn{%o2 ztTf+4$FgohIgaqF>;ub~PFZ_ck6h8LG=4S>N>_;+?I(lP2OU{XZmfjBKyHjo8+mS+h#!JBG5!}<_?(Xz3&yKD^b=qgKKF~e zd3uH+p`VUx$td|B2k{`GvSkHr;jO&ChWd_COqaE#ri~wI)V)}b>Cps}*#J$L5*O&y z$v+^pGL=xjYVX%j6>#`kn7-bg3Ke(c?xrKeljMEvj~p+lo-|MLmqxGHNLm_(v@Xl) zM0+OykrX!wH1@VLZDf%qXpVk2+853yG|jnl*hNRY9XvBLYOi6A=q>-3d-xD;9|;6^ z4<5?V;+8#%CTGBiCLm8Y`D5nci^l+{DzN6OE&*WgFOl|Mk3voX$f4t_m@a4Gw57}& zAStHU$p!7;zB{cFB0LelGHD>-+f;eXvur$^GZcfua@Gm?xA!K784|6GStIDlG`HF{e#+>k5@HW& z$Wdv6;00q>)PUJ?;<9T&=8~Q#RW5)Mkfwhxtym72QyMD*Sdd3w#) z=@wN%ISv;lGA`QDM7-oI6uE=lR7GvZesy{g`i*V{F>k~Rr`1T1g378eyGj#&{}hAW zP}q=`x^i}ljubH8BWeI_!%rHukhV;Q9YK(Mi|F0eIi-1`=dH*B5ec79>ffRH?_Z_S zU?b;?wJBW@YyRY#FE@a!dj5yxNQdrtMimUWj*c5v-}U(7-;)3`aY7VWl?o4b)3F#C zf>0j6iM5Kw!v7x`|I4v_v4HCfDvNKAaw(SVTPHchbN8YHk{zHxz5l47Jn-ISU>?^4 z*oG6siqF4BDFtL61X$^3%|BY{HPlYjVYD{9D1y@t3H%Oq9SbYH)%(oUpE3-C{Q*OJ zjp-XkqA}qcB7vOI2781FApIw9SXLJkEw`@3XGR|6YRa*j?lSx9`>jz|j=m)x`O}EX zFD}Q6ARq_b%p>ycw^(PYOlQEL+zD^~Ds@ostnSPLv9i0fukSWLi z^ge$VzjPpaUbt+}{T#{?Nu(syH>&69DMsNk`vhC^d)2sS)0Wxn^?M!D?i*Hm?HkdX z<(VTxPo+NQns-GkAVa1lo0zyw5|RD2t0vchG@Ga5HV7Ase^n3u5u#WT(im}ssQE<5 z4p)uZJ3_QdjPqo%*xqN%_KTi8?UL;d$vK^q8e9s9RxDD3r3$-swUVYXBw283>U*r2 zbOr{&K#CXE8yp)IkETqI_TyRTsPrg*%P=OBJ{$l+gIKZ$5)BaLi zWLOad4*^n~%y!n#s+grij+Z4%{+iyN3Qhatm3aLygnomKrF*Xk{U*qgn~@@W$2RcY*wAzA-Lynu_Z(q-)>GY0LR&sH zRF2E7sefi9PxZx%o0&K-URmrcIN-)EG;amYs(>}DG1W42{6oU#i8_wrp91?e-yZZl z%n*!Emwo_tYRgkvS~XIVcAeX+Kl-kzAA;S)tLq+0UU*R-*F7gi&T7BhSGL?6#Cvg2 zoLAE)(DtxyAdz{k9f@;q`ndsg%HnB9@)-diF&rX4xam@g+udtr7>N2YYDIu!wQMkj zPBpeo4?TGM@Qb^;Ikmh+xcn8<7}!L`ThL%5hqklri=Ke>(a{_M4xQs%Gv_1<9wr~~ z&^Jg2m|h3W@|ZxuLR#Er(BZ%3d_9;U@8U)YKsPfIC2}%m4~y?va^@*) zX?Wl_`5G#V9B!wFggKudk#=@L)yQHM9uDc^H?hGvPPYQdA2tVCG53oc_<8V{DbBNn z9Ia8-vrN@Ea%HxMiYn=QZg?|w7jZBH{_Hh`PUyF{AYwxTn26XDtWu5@$YdS?k)JRd z$VFBlo8tQyGL-7}(hpFkQ1n^9Z$-(pzhaVD9O{o2LydLPM?;kL7|>igf?cz!EN+cS z4}CBo!-1z()`JWQrV_xyO^sN8op#kOF<1>n3?F6$%pRf}l(f&}mye25xUPCF=(jLn zWHC>~@jeuxc3H|%D{dX0s92f{@#KvsNM6Hk*$NDpU4}>lFDd;dEE_>ENzp})WLm3K zsHUdYaq&i0>aVez^sTEZ>rC1p&mJ(};*)=16IYQs{|_!*L&ab=q`fYv_Ns7`TNAhG zz4n{e-q75wCvc!>#Uu7=|qQA^?1UG#H4w!a_Cjr&7Gy#yB@p zob!%ef%tX~7nW+VS2S<>^WLYXhMdeLlHXu0D3DcG$Gg-3+8uh$m7MXsvZH60$qA;a?wUxHs5veq1vt>{?vr zm6Tr|HTTvtoMLL}Q+iloocG0<&=C}7op;ng!WyMx4KnWK<$mNfsae)Q0nIuT-`$eC z=w}qL@yA{Kg2T`i(d3_zF~q>lUAB96u($N{qDI&2Y=hU_SwCEZ{uHJ=W^^!>VLrlE z?B*DHdN|n6>AHKidmc!swUn5+I#ClY|A|BKJ??!rz!4sOWYeiv5doc4>3TQ!egV@B zzO3ZD7&){pZaRj^NJ>^c)F~IQKU&st-OXm&?PTCug0Y+F)O68OAA+RLCBpdDmDeCO z@h)`~T9r1D9tTs-yF)SAoStnr%?);lsuZ#Gt*0E9bRENa$T(ZJpEo2kik_VYd+@l2 z37vKr?Y8@2>;zV1>Opa0rli8-EX${~pWU2zah(0sBda>)#3gY)AosAw8NQ^5c>L3`v^D zq}S4pyU$U4+m)H$sb{A>2UH=qv#*}A55l?NBs7zglMR)VjwSFwXqk18Wl2T{&@fXQ zFHkqgC9YS zM*RYMkmiLKge4=|seC_X54vp`QXhc4NaUiz*oh3---l1Cgb6@FNanI5nK5m^-aI5= zuUTVlkZ`h754J~Y;K>TW`&(89>d`NTm*pTT{j}RNE=frw;^KuPI&4Oxhb91}pa#@sv^Eb#tUmfC z-lSqnTMfGef@B#x^Lg$?9r5j2SJB5wHhJz?^}EoE4!vkA(u(d_7n$-SPlTxS^2K4g zWkRYgfATJrg}uWKZCF7&{%w;GglNMHlOp+TY~-~QMCpi{b!nYSfl>vBp7?fw;MPND z#cD}-CalylCt8b$s7`gNL6;U?!znxFjdksV6hX)06p8AD=5Rjk<(?Gtyj1=(wCr`l zjW_&yz!O)M;Wva;=)z9!oi0BxCkGLDz`YBiar!_J4reo-7j@(|1E1S?sx{U~Q&S#L zgtw1N(G$_4n|*mo28bsO3!^7*nnbs{>P!)8nhNqh3bD%LFYucr0&hm@IZtUFBQ|I- z@4NUo*1oZ>qnYVXGV5j6E0?EmKb0}B&kj0lQ7X}uknopxhgrIwx5gVc``+uUEa@i* zpRg<_4_)yP(Ma_=)}qPYuLU0OQ|E>#9QNk4k0ZCAykkC{T)9(iZhUcu;5L&yaB#ms z!|%jlucc*kOg=l9kW%D3zoVnF$)yU4#$m4-8gEOk1QcbmRfl+N7HXVm26BJ0>Ag*z z9ypoeH2z$Xf2T5I+DnxV9rkoGeA>G^Zb(3F1J;&gb<0w~Wq_(`BkrE@l;dzwSj7wq zW_Rt;Uj7lF=?26u4ib8`PHv*3?#_Bs#}L?|p_+d_qyv zJr_t}QIq8c@D~N!zbf@{Vk~>FdVBZysWs2AieuuORYkGaBW(D9F5}T^5BjTVFBS8w zaznt9LylP#G~tC_uU(Mzd7U*6Xk>+!TGODs+3Nraq`m!o2=1OgI-!qP6V506JA03) z`I69QEV{~<6oQqohTFADjo(BEx-TbW+rc)Xqq=%hge7FE64Lh01+*Y;R){4m< z^LNO+`!sX2AQnV^y=Qq9o7KC==IjY2oK~l<%sYGD!yY_DK|!Eyjl766e4(dt5A|Mc zn3gBg?Ly#i9NdWu;$e~QB%x034pgE^f+ zh^=DMYeJzI#wMuK4H!!Mh@9SbOSDq-f#_)mU|ueJ-t0OuO*38xDdaBOFPE9$Sc-4& zv{P8_x0!D$3sSlayeto}t}FObE8dpsSgq^6flH`j((_p^t8aH6M|Ei~kTUw@;>5td zh`L;RnwQy z{Qf!3aZUaz0s!rI*{Lx8A$C^`G{>K_29^d1nD)ziZUEn&HMP0%52*Zx7vP9E@iKZ6 z`YjMKg^M_a^nBuGVYTEj8Qi3mVj(2*OPbh78k~;fY6~m5Js|@>7d+Yo`a-+g(*Qh`A$b|TBuz>66S3*Q* zm?=ezmX?#c)A2f4yvl zqKyq+CtoY&CizNy`$Esvaa=!ZCIL~++F6l}4v%gNiNs#$FJ}u3FbwwltEi{i*t5#L zQbL}Tv|=uf%sWAw_cd|nlp7Bn{U5SNUSa=O#GyVCXF2Z&mmU?2fPs5}B*k4FH~!KN zwA_aC&)Rv9)DDQyuS90MH`Bs>G7^9;QH~>uF|QiSyg}jRN}{Fc1g?C5B>=&5I4Da1 zg&VTHyQgZB9?J7r{VPmO9W1o4ue!9(rBvbc$YU{1Wd&TZEPCNk9TOXSZ_g3#$mubp zK3v^`9S=Q|IH+F-cn$z@`BALp)6>j6uxR|Raw~mp9@Q=CB{ufhP3;tCN>!_VnyA}x z6YGp>EV(_s(QPvx{8E!_@kJi1A6WBfp~JN3&XpCg%6{5#uwIELETn z!zttD-2~ZHljchZ+J*##G^pRmm zSEW6}r(BgV@AD&#AK2kL%`k91Eeuz4M3Xq^sd4mY8oCTZp`W<#5(!QR&<;9zbgbX# zs;go*VV-SVt3p6o^1ElO+NX>=f6@(~9vJkiiSDg%EQ=-yONH@4HO=*Mg3MCgbl>An z4QbxK`R=AciLQsrX4F?XlbZX^+!}PDNpe{A%Q-BITbTD>TVzECgX@>8Gdx?^2RfUV z9m#EnYw{FcMfd6AD62C-l%ZN478=CHolbDJSHMrK( zqsPZN@EWIZ1M45K=cd9j_835y(d1v5J=Aj>8I!_vc1=?mn7@Y=+4_zdzQBD9tO;|Oq+2D zPxpk2Ms#(tJ=@77H0eFN{Z`z@L#abSZ$-p?i>_kwj$G!HQ=p#lTJfs?@yFd83BoMH zN$W!^`Vly6)h!t^{Pjm|<|Z?B2j#=KWCF6NpVKN|fT?D@;8kU(>1$kzhmRPv_JW9j zf?7L+5QEjkkP~%1eov#8$9iW4s%X`O(&{pVT zdP1%3QSBQ0w`^`(K`QD>CdT1EhI{4NOPVNS!9nDiB#Xt<{rcEL_WJNjbZ`JrC{3}V z(>p)y1{bf0oWS(5?tkQ`uACV+@1C%(Hyx9&zj#u0-jcoGM8L<3ZRE-mJqcB@BOGst!H~?A;KEd$vnM` z6M!&_%yK9yl)@2}`Ob)n21>RWj}yyhD~2TFuV=+vp+SILYA1D#Lttm{-QXOMf%jg@ zBWuL_JTvWR0-@!~g~ijoK5j(zg?)=?cx?Z2Ptku!m!NyKjJ3~!zTDM|9^GBYWPT@B z4rov)JZ}HZnt`JPY`zpWxZFK9>_c-}f^WSR=}wF$NXV$XU&xQF-~a2^mw(<@X!K~* z1d`gL?;b^Lr_Z*bj`w|-@=6f<0B&bG>AI0Qcr-LdZNbS>rLu;6AfVH~^|neWR~Ol2^H ze&)t*65J~FpL8Fgwb!tlZoyePA~h)gqp*zI>fZpuI*|pob2p+3kIVd}X_Grmo?rj9 z>jTuNVtktCh_l=fw{IM`8Z29#Ic(wI}?EcPo@w#t;(sPwO z*5;488jsT^!R3Ob>CS_0T;7?ty$p)LwiDxbA*4A)bS$@>p!FFw_0F zBzPDv#0$?tl4?YTuWZUI_n=m*Ud!q>Ivszex3OFMnFCqX?C0ivt5Hzx1FZ5HR^DPk1;cVYdL-5w_W49$dfMQD+gnq!$oJ;aHrd!yt)->y4! zLIlzipMObHA+N<24_MQ+9bs1gMa^|YTMtt?@-nb2qcT~|S zdECu9QBZkQE0Rc%7f!S>&07#X6L-JX7IU9ql2fGoYL~$;Yfq!P)I`)M!3>TKbEAhM zfRssq%?dx;VsoB!N_RX8lF1#u!Cf>07*;;7fi<0n*WnKb{<#d*`QVN*vlMExWD>nh zB2!)p!VOQp`iPN*Sn#sfTUIyNhVxP>SGzW#qf%#an^)qi?pxlB3@L7IU8-KQ_3kDo zmKEj0dbSYAZikVQ_MX6wCIeMc^MaCmWOqU3&*ZXD_L{|4{JS4C@xNcxugkjHbrI_% z92#4u7}gH5xp#bN1*|_Y>5T%g(|WL{3W`#=j$e*vf07}%gpetEYGVE_ar+xc#-V0P zB`eAs{);_kUg#diuh&jcv^mUs6Go372+kTN@OszhYi7d5t@oR1tYyGuZrT%+uKTvT zYi1?+uJ+<16DuH3;8wjt-zw?r)&k<@KnO6zoB6+ZdPTj zBntY^>Jc|BQXK#chS!U@Y+0^BRN;rid>)TI-;;|j)7lT;xqu~?EGK=DW7!0oT%qY>3qDyr6G(C%NS5H^J1i{Cxj6LZuD2qQJY%%4`+yj z0JpSL_*4|pg{uQaBP9_pjbywmwav^V>-ZW5rSfIRpX)UEqEzY6fU@&i&E!@c}nH}QkFr<)UCoo=?;RTbCes+GK> zWqLw)=HVrVnTs@C?}grS1?NdG!K$IPJGRu-SF(I8OlEEssAdEcCz(LG3b*6>6$c{r5u>I7TC?E zO@Kb!rgGUuK`|VH7g-=hH>+y$N4wN=7X=;hnhd4i-=r{>10Ktesk1bFvKjA9 z?(|95YpiwHsXBfE1k}TT3cui(!yxq=R_!E$Jp9Ay32RxXb zeQ}(f{Wp-b5>Nv8n|m|9LUC8ORwqCx0AN%1-v80lp&>AB{nt|8U5|c+gk!=<(*Hz2eWqv1ngDr&j&Apq$s8r*&O<_<>DU3 z)_c0$TLP3)I~X?6y;!p)?_LX(TswvZEvo&@A5d$8x^G?Z>9W-=`o(ls@2uoRYqM7^ z+PNN&nJtj&42po2Qr+fXju~$onrr0*<;1eIuI$Oak;yI+&;z{R^_{%y?k%#)rt8^G z(WUEu*T%==yag6$)+V!~%j6Fp3yH(6lGkSK1eWSM4?3JCEFkOjzG7nZ`Yu7Mdjrin z;09*k^`7Q-HOQjoTyvn7u>8scYL_$ku&Pz~c!3~9#^VSw!uf)ZAqmJLGtrT^-(5o~ zGJ~69yqbv#n9bW}4Eddgi1`8=;<{A49|!-q7l+&f-ld~HY+Xr>dveb@c}!-daL9Sa zn;p2<>~$aF5XiZ@5EE7@@N*TQ8kqukWa*^G8MjOw3T#@%`}D|+|2!aC?ZNtIyW{Mv zJMA8S(?QF2(UUx`#gM@-Ft`)&YG^ARN>TgMBB}0{z4O@O6-Y39oo=SknY_nN%pj`_ zmVUWMl|{)}AJaj_0%D@m^I+N}e*#E!o*qOcy&aIL9m!tY4dofN(CP3Ct>miIiyX0dgdJltD^jGU7;;7sfv+8Qmb zriVoo7u!wP*_+N81TO+b&sg3p1ujyp1qpl+&4Su_l?hKWA>)967k2hM%|%ids(pNC zFS6Hv(!Jr*o5NpK|*L= zn4Eae`nrxmPG0GYK7saBv5DilWiNbB_k+kOrwJLi+?^HshM0;OFCFo@^{6LY!Yoti z8(}#Wv(*0AgOrYXKo{w?JIhJM9<3kcor~JK;hPXSr^%O?G>P%{F$s^N zjJ43~crl)cF;&YE39s`t2G{M+Qc+#T)rNx#JdwRT_n6G#t4!xU2Gi~rGzllIqzYCA z?tIfgnaJ3aJC(~UtoHo)adv*LrCpM4{(j|TF+^Rbqa*T%F#;%(%A9K!QfbumA9?N4 z6)c~AAEgF}t+F&wb$mZjUzMnO!M;@+N3C4rCNnMl3C#c5aBz!ecR7In))mCrm8Jsr z;5D*WJ~8`N?1;Wkd^B-a6zFYDZ~u3T=s(`J*x-}!G(sr|_Xyl}**Cj}@gaB3QWF`6 zN|Stq(|L27>8)kOZPb0P_5`Piq2FMo)+{M!M)vZdW+x=m|1mLRB?j7a-`Q9Ijk8!8 zKtEch?@MIQ@jxnSr&b*q@J3}-LO-sybgC;SW6%Qa2`Ez!6>nb+XunJBiKM_KU1?;I zY2X+Zfkw%XFF*%1)xfjAF3I@+@g*5qwy5WAq^F=?37=mWUAo%q6d>&6?-`i`HbBLI zn(v~MRQiUtI-_<^yDb}1pdS^j+S97oGm@-aNKIN}bhVq7I96<$0UcKJId!TXA5nG4 zO(vfkirk!t7TGf{WRm^i-ipj&HuAELBfeghCC46dE9OKFw zUh6k5FcF1=b^UN1e-0E@|8oc2Kn8H0!fqjQs;?=1GxDZ;@RLfJu$vMVo)h}wvCTRN zm@I_y5`#x*8qN@bd7955P2f-6e?62g%?JnRM=9E3*FY}*ztkwieqbZ-lZjQ61u7oZ z<4cmxXSy6nv6%xsRQ9Z0=fM1YNw8>5;HLHAzt-d+hLVErab!Z;0br8sn>RrlH=3D6 zdJ?!a+LS6s%ah5_Z)nXTbinZrA{ z*!qR`Y5`Q7c;~{D%muW%N&{Vw@dM;3T)82 zpi5)1rMmaVUTxd|C0gL=Yk#%*5CUDyGX{E(M|$5~HY-vC=%MqiSPAJ5a+f-+P(s>Z z&kp056{^(vg7TPAxSmD3`_9tH;+UC1F{>OmmtBi8zx(=|aLnx}DWdT~4S`Bmj1_)#yLIuEAl?^zXkMSnGLKVz*zR3{}pZ0-u9 zxB^(I{ds_G@qvE2WiAP;HfY>JJV*{?pZS)OpbYc*$N1{h|?-DH7k_*=s@i2 z9ZJb^vQZ)x%$`+HppTf|iXgF`8NNpXbPD*1Uc`7)c-rZdj3X`%8Yrond8S&l5v>|C z9Yfjd+Izr$nb--?Gd$@vOpg5<7_J_TKW~KP5>#Zp5P#Be2-7HAh~y^*`~|oXR~l@` zw6ilSq>R-l^APC|lkOr&`}!Hw-ksKTPX^wlwmYn|RS=nRuA%Ry-PJA2DoyCedV`+e zwKIS#6Di4PfAk(7F?VbiWY2>W58etmKT4TzKtFJq59#26w*av;Ps*|ZH++qY{;xxXsf#^}L*dJ}e0a?^K)l1hQB?qIc5?h^S*7+rW*1Uue)K-;I%3 zx8ql-$7YU6i>^Z~PX#ns0!^BN{FwhuDu5u~cI$54V2uWDC&G1Pp(DmFUzy`4K}L&h zWVASCT|@=IWB?mWqVH)>-Lo{kK4IYu!~#-IBX7emUp`|h|-3O9+g^>akVl;01pWR zY5;blx7wYtZURzA@Cc+Rx}NlsT4GXDkG3`4MBPBPT-MmxbNtc82{egNMGP2hU1h(> zG{JLMnOqbm(Zhm(Rz()zw5DkRMc}q1fsbM;&#awD<4XClo5oO7s`b#dqaRO168fDW z)>!qyBb)}$)#z9>+T7SF1RM(2tJ*hzRw%LC)(GbW4pJ@-j)A(fKz)&h9A}gbPHR84 zDBKrCvKiS+5cv$L3CDi5r(Lk&Vo{RQ1Se5NNHbL5JuHq|f+zC)Y$Q&Z(Re>b=%Sr+ zsWtwb!B=dsp<{5UY|WF>tz*V`=-b@SDtl6`dXEHv+v)&)cZ2!%!5~I<_IR{|8L8`5 zmG%4D=B(Md`H{fY=|J8`?`lu_0(T^%GvP(UT3HR2M?gmy&{TZgd@R*{oj%E0MAq#< zTkaI@xVxk=)77J7)(ud7>)1{QaR#}~vi8?xk6mcAYQXgkB1ccADiT1wkz*w*<6%iw zp!_nW3Ilsr9a{AG_*j|#RN)?pI|~M0XT6E+J4cHH4HpVL6pxjwoi^y@;#p(a>knl! zl|XdIUdCLj)nzmF?j4P|!|}ztH`4WblHmy+nI4XH9>C-1@^BXdUkIb~I`_GqDGFM# z3$7Q&#>B*Q0>6!c=*4_fBu5H;GjzF+L zZ~_E(cXxMpClK7-A=yB1cXxMp2n2U`3AS+w?)-~$&b$A4^}Tz~`>MXGRVgTTs`i@Q zvwMuu-E;N?Z=V6R(;=>6glGEazR;_f`SJ_VW~g)bj9w`}N_k;In$oNB~{9mgM!s3 z*C_t3v?YAmb_H!RN6f`S=b`%yO~roDZ1c-Nbl&ff?NFt@{;UApFIuzH!$~G`krm2k z78~@|hkMmyye%PWB+q$RAWDv#BbU{L=Z76q7`CSg0fVM)iz%2CAayF&ubMkFU94e< zn+1*4OBCy-I~kmJ(DBM$UT|xFf`Lf~_p;PEo&E5^iz-q&;)zc_04|Xx1?pRtqgb0u zq$@-~B#I8jZ3_wI+ZoPvV1OHY+_2y1obqc9U%$`&Xqyd$`GDD^3}$*H2<#V~W0@BT0lJZ8gK7CwGlv3yYD*d1;c9HQSXqv!yIv#y*5W0%8XvJuKTwxap>6{ynCUrdF8tUbMr7@EVkj1NlqzG<9`IMuioH@;`36l zqh3gj>rrKi-|5D34*NS0y!a6n#PC+Br{qVAp6$w)SzTbRwCut8g{xX<1CeW8#XsD$ zml#6q#M;1q?ZoGSK#uPm6v}mfQ9BT0+-54z1A*s0&rrY0Jf;xj40YVmppm;^XCP?1 zGn%%Q-eMik+vdU6QbwEUamoFAtBS&-O{TRT&-+0l-P62S+q1Jnkrd6F+v~eFa9PW& zU4SK{xV3y+Bq=F^79=vMuG^&yP#o1li4T_Sl9q+`UeBA$mA-sxzPIKgiIdZ=1gfjs z8Dbf!jq|J>EA?6>#5?Cxr1DF!B}vWh9jVTD9LQ_F*0e_`5ee*Tx-gv>x{gh$?)xy+ zvd`W@1(D3pXC*d;-Lp&4@hNbupBld{+bvygocFj-v(6bYjxrP6lvH#uA%{pT z?)FysOkL2v9{KXcSc)@0EbR;2>VE&a^N;Jd|yIC}nu7bGqz*Ii}6|ZBoOh)`q2*s?7R(QMFu~ z#He^jMJ{>O;iS=`nmZ>t>$0-*?(LVewg$Eo)HGN5nw_{_A0RW;NOgx2kH6O3)hH6*Y`2O zE=QUaO6l(8Rx1*V_&p$L6p)hT;#M|QdAsha`$1c*5ddc?b6gH$sZ!Jv6F=eUF$2r% z1T1ggNkP+I_x;48pZ?0v(=f_Tz7&G&{q8EVR8oM_iLs_ApnuCo0T9rxH3!DuB*}mA z4ghOw;xjO?BO?SonQe%ZXL>&|<$8mebL~kE_fYy@uy_wS;QI`#PUXN*>6v6^oQ=d$ zN=Z{MB!JdubZGBlB;tVi8gPOA&dLX73y0`^BgN}}EIw+@T5$wEd+7&4L-*>QaYIBCeC{GepfaT~>URIJ&JsBO# zBbTyN%S_NnfQT3{1+2mFqj`hSlU*u@Mc4<9E&i7d(bC%+m=#^2^~%N>@2N}SRLTLh z6C#xsNfp+)>zq1Z9%#+AF7y)vb$Vx_Zo8-d1O2nuUcUR8Zh;=3)`xMImGiIMEZYmY zR_cK1oPiy6g!}LMGNWLKN#9{CY&$f)1i#)lbtSiEv!PYwfq-{xDFQm^{qtcOq$jEv zcCVg6Hw=M_PKe|i6uj7lK!7XTX$H&LAf%q`U;`qd;BbM_c$E)ci zO&`ZD#9Y8>E{A+I30ucaE+fYp3#oDKSoqMa*d$o$kzeCYSy_K35kZ4DhxIDEcWsxR zmt<_#zJI(_;xa9xAqD+%*r6TgL}$1jUO12CP!dgMF(hni%2WIxcEshJ<8h6;VB3$dNWuw;Onm4Oitf8auvu|^DiK9-PG0##%o??<%*rUnn z`?J&2NK5V<-5de|dJJRhrTUP1>m`9}Jnn4Z;=9T5&u<;OGqg>$YgC1L*jp=iGZtcb zQrN~Qk+1O>3|a**mfDswrYpU5?re5=lbMaaZ`r_HDeky4YJ&86$~nQR(#(hV({g>* z;$N|PAzDgQ%LrVSK4)`%vWoJWbr=W@@B6sxhTv_#Ze2O^hLl*osk`^F-*59wyjR@A_-Aj29wPuvyGXMr%_Fwp{HZC?KKc4wtLGb^=CVM4t%I5v5(i8F3H$S3Nl^@tsFytvU zlWK1vIvGgJ8+C+T3xAzbaEH1+P0dxWt2WmmjHZJoIyES)r!S>((P<_uEme)n@#kZ! z5I%8A5{b#s_o%Gr)Ui51baHR(g)TUlor7_Ly&blP1>h22siuim_5ZxrfG}`k6hkaU zYnvKd%h0U76Iv;gR}Dd~gQzjbtIBaX5h@}HdG~O4i9M0qcsyU9p85#~wH|Q8sjZ?K zR94;oXc80}EpD~X9esXnM_j@ol9w=`pS+&Ax@}OCBU&z~N6&}z@!pSlRb9PsaV3w1 zm6-btGd4csLkR+~KDxF}P7_E;TA;SkACy9#u%O4cZwfavcr+Y$T!J-BgNod0=S5{S zDIW4=afNz9&=lrfKO=+?x5kU0s*=gceORny)nuogO`(JaU$1SDl$b6A&lo7}CDZBQ zCNp^iE2A@Mb1yep*i`k8$`J|7Q)0FotCs1Coj+^jvnt`y!ailihkFf9FVtG~(faEg zv;!;gY3cD-Oi&QM>smSu=NmIxe6XN_(*|gNjJmo0FI}N=k$Offd(1LM?$^^Y^Y=Em;Hp>iYG;kZ@m-G}dTNc2L`Oc?b$^vgs4#xh8^`&S4+|Ng%fMA`kaa2=W*X6~&H4JnBwEIzyj4-pPeZpar&2bJ zJKiL(+rP!hdmL>y7@4<)2K=fhm_k~rl7$}y|yw058>6^Hl8?z!6eIN3%j zuSIxxq8870^?Fo~Dj%=88)S%%5AZp7`+K5qz~_^}2b&rjKkM* zdYp|m*m?44CQ6WFrVvSOWrsQmsov4PblL*H=y ztKTSkcV$NziXGfJPI^g{(BUDEzZ?PqvmEV<#Inj@t!hhZr!Lod_{bgC8jlO^()PEGN0}38Y{1I(htS$hWn3_sPF7 z9Wb=G#O&?HKAj^VyGgxlul&?hh)&H7-l&mYr=dVN)olsA3vJ-7RyAtIqeY%$Tqpw3 zTP}ACDhJ?Liq-PoUGDeGOuPJ`=k&M&eVIb0P3QB8a=*r=ye(w%skM8SZG5ysD;b@JP4)!*kI{CCmN{!_H!HtQ+ zW%N)n$;9#aEL8e}! zF4efBw2%-ctEE;^lTwioHFMEZG#nZVpd;&u&U`+zoITc71up6Oa8cM8}`0{ybF>Z{TPc8IVTf6*?Bhx;HH5h4YCT>LFA z8vEgj6p^l-Qd+7=rzYrEWaPEuuW_WD({?HaKStyJ%+q^Sh)$}6u{n+VtLLlnEea+y zZr4s_0o%ObwW+}Mw@!;G7KxlkHjB+(1=GXI>)F7vdVYt`9E=+rl3^HBtC)!-Bqa2! zXF!6$(?>F%L5lIX#1zJ$)r|t@pix|b%UUi>cG9CGszUSTv9Z;p5Qlpj59XdND$Q(9 zEi_xaWAY=K-b@pUz)pr14MH!Q3)p^paN9f4Xf`-q^@5tZg-c-B=?sfU1+bUH+x;^W zsI;o>Hx4ucw}%{Xra!;aa2^T&v?)tbSX$Ez6j?6@cZTJ!s>RQLu(Jogk;!OmR6K1` ziGyB{*BKQ?WQERk1tLLO1r7`Z{|{!s?NfSTbpLI>`Bxq~l|lZBf0MH8OpOJ>kI#S< znD7n~SESRT8q8~` zC@)6A;$r1F++iY<_2%}x`-`hc$_*lfMcJ4-s(KHxMyKN8%x2*bVAq{7y>Ysc#bO;9 z+7v2&8f|jEk<{|g#1ix$v&9-=@2=+1)aRayFx$Mavt6u#)jUA4V|&VDfrNyNDJX!l zt7_!D#HLRjP35UzmD(Ou8H^c=e66T4JYBw&$m{t)j!r`g&1%HgEzvEEIJ^WNO^Y$E z;*m?5ls%L@$HKb%*6e&D1mK#<(f}j~etwBpAbpY#u`k*ApPnxe_|Wm4{OB*ALro2q^x&^4rje(EBWQ!Ub!JER@00GG;0DHo&9 z4w9L|2-u$I%j`xVbPNOpDd^(V%`Dgb7BCq}6!wP&hfLfU5Y-h)TFA|SZx)NAk@kdcl$cLwL z+=qI9mZelMwl^;#RA(M%x|`W#B$`bx!-kTv?p=fX_(+?nmX83Gr{h&cB=w_yM2jl|beDchxVR z8i@!I8?0Mnl4*$`P`U8xl35&@p^`mr>m$khr-{;jfmT7pPYp;QDphhZBCmHru6YOP zj{&)@6iI+UNReHiOsO4v&Esk6Q>@qoS{BYAB-+DASxin|ZTA+cd2ZmhZ-~WD6#KTl zA?nubbk`sz_oANqEMzu>|DE2O}WNbUQL`UO)EL%hz*o;$=OPZ2`2Waa092fej9 zD5`2*=v;{Tlp5D1G!$XJfP@bnzF|?^D7OqRB@8`F*369VuaX+mUeaW-`B-Z+m)sbs zkiPoePyX@y$QEhMSv)cXT_RtM{dRwOqLk?_RI%2HiT{W^u>Ih}1`eWw@#lj@B6%up zQbz4VgDmPGntUD>e26u1hJD@qKVb_&($DDyHFKN!9bTutueol_`Uh z4Ey9JBR?CWvbud9%$-oqc2#H!svV6fx{hnIp!#DU^C~d3$Kxk)nTkLoAb5*}CD$s} zvgPR;8Hto>K1kANvCN>pqtk4SjxSTUl2ki={mmvGKv`>tvxe*c9NY9$fzS40cD{Yt zfz*Bf^eAFPG3nG z5Jk&#IJnYbc3QL~@nn+BhsD^|L8nlRG?vDpV}rSEe+-yhaKLan%>PJD0zHd7b9OtN z){!_{x?(yw-LH5U24Bu{{a5(HvM6p?ReYmNV|P?zGPXMhKDrp~Q3UCG$Gd*nMLkT9 zDgf@Lmk;1;mdM|h+ED~cC*?AZor=#n?X%_w215I%VE`c8Y-zTx%Pmr^_>xa4%BA+dm13&GQKj%l@pRDt!ctb(muk_35_=Mv_H-Tq&8%r#uw11g<13g2WTO zbJZgXa&q+vHoD<-jwD$0{u09sMkiLQr7EfFx%(_upp{63G9)Rj<$B65XsSEg5r@m2 zJdR3BlQIM|V`6h}ywlJ?p)uKAPeTz=yG7LrYrG=BKv%EbGYjwFx%e7)+r+0ku#?$g z_IVe$6EL{m;F}mZI4XF1uX|vM*|eX$3C0arc{E^QRruv0{v#g!6Rif4z7nuyhTD2c zHuwU6f>MD$L8&=2G(v0HXO(11cZ7V{!wil+yIlp%-?{M6w?rv_1c%-h>D%B8SxNJ#RCw$bh3eCwatW}>QtbG)I zzH{oyc7%t!=SRtxovm(Ynf1e(t)LQ4zyUMGhkxQBsI7wV==s773sC)0m!0x9 ztkvfH4|sgzd~gXDkJ8TVn>tW(u-78Jx=ICcyN{ZT^XCeAh@8LdBvo@a|6V?dA?q$i8F zbPEfM`!{&J_MxOquNh9}MrhFBai#+#W6AS3`>u0FBL>M^mj|Zr2Nmx-cOt1w5H#yW zsS*C`EC70&cVm>30MagW$>@B;hmDPGZ)b;oEH-NRxJYuSdx7{cme#^g)F++YU?Y@! z#VvUjKT_gE7&n&G5;ff6o4LdNGt!59|9lnBzYx3|B@|$xKIB_+#mQW z?yJ02%6a=?)WUm9Q7QZF`CenQ$>)gZhHTVX*4VB=O_raz8h-ff=4;Yu*Fi0o6aw@o zK3(j1lsfxdcH}*AA-HQ09oh7x5Kb=F_+(;GMNhyP5;HdQ(twLQ%#qgDR99!iAH+v_ zn6m-o{>-$qLKiZI<=nwRN`^vdQQ{H^XSu77-zAOGt8($#wdzZ2O*<_Gwn{!KtNuE) zEQ`CgdPySBN=m_jfr#9j$!(UdBc<{9>QfV6?QR0?9B=yV27X4rnTEHBk0e7;=_3Us zFIJOOj&t;hMW^R|<|5THWR&}>nW~U?w5GD1e${u;st{j}E%v>P1|EN1?;h)+a72imGy*pHE^+W8K-5;Kb3=N-Daxm#MHI|~w(ophhT}V@6WqYxS6+b|O za#(wn?J^o)z&@Pda-CGtFSa`N)PcEMxcJFpmIJgWaJkGXyHvb%?QTW~3na*%ugvqH zK5cg(ra&={1hUgJGQWd09|-kvy-Y0UC;3*)4p-crJmm}6DMH%NATQAfHDQbT+*fs; zP1Vfqv2IB7Gw-PhpuKN)8S1R&GPeh|uvGDckWWhlGf%>kbA^-u8C?jff#yD2s6@)SXXH{Pd03i6EfGEJ#d+h=Aaw+Qi?Npn~zAhJ!+(<(M6!<%kb;5dGUcDA>%vP&+K21k_6uq_f{z zT>iAWk}VqOT%Zy~`gnP1X4Vpq?>ZHa!)yrtlp}O~vgErzU0#0b@z9;%ZE;@!D0CbS z#sxm$5#@OzLL&ddI9S6^=))RI@iHakaU~@N7X(6m<5ehRXslJ4B+$Aw&TmK3I)ojg z%C*=kNYyYf0IBI5gZgunCmXq?rvKgLzI+H+hTJN-a1Nd(qq6ctt=*KWX00ov*VBVW zU(a&gp5u@AtgNgTY6Z^9z#x{r^CX?8&n;shcvw&P{u*y~uXkV|$k|e-#IQp9NDL>% zlpO%4;it7a0kn?KokSBNJNs!Ya83t{QNfG^#k>=gCagA328*pu0kR$wh!}@G?H4*U z1kmNrhB*5(N|-d)BPk8jnFa8|mr)KjGO53tZ^XeGWaP#Kw^ptd51a5_FHC=W^igrL!>#)|C17fhMZ zj&tM7)qmigtKlF!6Z#ulVDRV#e;d9|Vs+BEEwP38&j_lHYrB4-8i zP%L8}kJk;Nk3&F){6deu#0~(=q zsqEevNhRdI$DCC9vSre{{(7a|BjR|qt>PB)Z5gjV5WfNmfix`pM;iWzto>CNV7&(- zzOXOYIuP%VWj3VzgR1?gfG6w{+Ow*xm;j^kKq*09jiowXY=7kJdg^hiu5%zJYk2>~ z$OvanCx`Fle~UXq)IxNA&&DF-dZmsC{goe}#{#z1^P<#%_5UgQ0=FoG^oMLkDqaYZ zKI02?*fP$}c~O=LF&rvBpMb~#-7IJ1-3~m4fG$}221E$FZ8S0Pw$aP1(tsrOPj9;s0DQOKKQVJ&tOJ+; zaBf{TFSHkx@L=;xKQ0okq>Zl^d=s=2P0e9`X*gQw-2e`lhgy`54+{f(Xelv8E{YJc zDQ`f6LQy?w3H9ma6;DSHUqrD5b81wGXcpjwVlJ-??Ed-X5xD>t=nKcwq=K1WoLAgq zG=eY`7KTXUbqa2;e2$i~mpc-;h1*JqcQ~9^ps?AK{%x=pl@sGjKb#kWvA1&X5|4}g z*oW~=eU%nW zOAJNKlZ+As@X7`O5K=ds%t~gVbvH`G(=?w;{2W2xxbBphpAkRthfcs#Tm0hKfSc(9 z38k0e#orKspJWkQ0$KjS$6waiA3AWOw43;2&A8%(6*r>z_P!{QXm}J*k95FV@v)Vo zB)gPOVl*l;n~I-gbiY27jMC$kGLkaxl}i=8a=z+93_!ri97$o*v}eD-$$;1jUi}zK zWwV+kL^ha47BL<&7FgBA!qC}3a?&5M=( zZ+4I%KdK1{X=kDcdU1i0oE#mcVq>APmAozDEw7vOL+}@@*LC==SEYN7C*I1jic{|5 z%ep}#KwV~BV6oOsXyd#8LXs?h`7TRa+!lompk?{>luC#;YS#TpCs6KgIv_x@L&M*r)J!2)lWO zu6N8|IbDv*DzgAjef1_N3D^KZZZ7m}FDCuZkGTGuwPt*En$zjUa5}yA@s@Fyt@|68 zGL5$A)$gqn_P$|AU-9Sd`}lDQ*sEYcX=$kgLsVg}*7uZ1D?@L-aNrLVnKWgq8S0FV z%5Lpp_RNe$*gc|Vokp_-;H!V@-4e9@WtGzoh|_W{DM^Av@IFPkD*1xUdHPN{n;2ZR zkq^ABEW100@!t*)b>81)BL8iB? zcrsolWw$@dm$czF{u2Ro{jxO~q=4-aC!hS0rV}J|kV>s(6Hc(mn7v4aC|jNRT%Km# z0W^yk0$%P&%9D!g01%x@`3>zm6Y<&q+DQYCLE(nh9KjlGb2>&T3cr>2E@nf}a;))k zG(L<4i;R@ylwd4yHCxPfLjUdCMqa$c%!_M8s{d?;KmHm5EVViX^lFC>o!8S@(kGlx z6hghBTYK9gw6e_;=s?ZWbTIcf>a7N5(UbvpoG|Biz%tlzs1Thp-e#g}2~>3a?B0ju zWowna_WaRGFE7{Iocu5uI~aejTgcJbXgyykyH4`res97g`~y$r-LVfX#vA_HPbr}= zpfbhW%paOO()-R*KXCSK-2w#kJjh(^9e*?K`bz%ImA>Tyy!BUZi%TNMKf_e@7tu{p z!1g3vhawsVv^}sm_+7KsT&5=^E;20RsC)IcGqNk$G9^b6?W_Jzfu zCFBRZFSG_h> zpoDA7GQmOs(6-?F!Me<@a&(3+9<5HVJ3UJs_SaglX16&SfUh?M@fP2=eMQUnrM2>kOuHP;qc@Os25e z46KbMT%9aQc|XZ*>qy-uLW2yu#IQ~+zB}|sQL!0~MftWS*)n?Fy^jTntiFH$9)GPb z!VK_}i}g(?C#DhmZ*Tky6}Kt3_Tu6Xl@=#zOuk+Gdh+6Q--;Y}Nnt(8b^Z>~*%RU= zwiZWCa)5Cc>hDXZT%iD2OTIJcOJ1U0*tu~RI^t))(Lg>2LZU7dcgo=3zHe=)1~vUV z-Gu6f-&rZ07-v`Z>-+LrRikDF-IicotjCc=Q0Mn$eed|X@n=L2y@LFEo&8%bP%HbD z+hmi-^3PGD8}7A2YFr|O;qU6x3g1`?!*qOh$h^!6L$b)2oEYYj*KhB}11dJe2l^SW zD2|GzGSUW?xG1X&T4jeFkk*2Yr1mEB;Ibte!9yt-oX-2y3*g}-nFgD+*g*qQxpdEq zoeZ6B!ZdCt&}_K|Mcj6hL`*?USy-9=VznCz4tK3w|By&4!HJOR?yuL=nF2(`ih0Sc zO|d&sQEC;M0w$yT!BpE`zu(m)so&wE$XMjfpCa9F3jR7nqf%JG+h^!Bm7FMbctm{q zW;~WCE0>h1p=a+TA+q9eMexDpXgAq&h0R*G;LRd>JkXCgHwXS(%dxwFiFavlSXLdhl5?l=@nZR5&bk~ymCAOr~cgD91ZG^DEtf$i#6HYv?CGIlKx>OkVw}loBxYCIh z1q+s)peD!1JC$`$m%T;bYU9hiQx zm!B1;#CV_MsDN{lz+*XN=QUYz508&PUmV4u6R~FPDC7v=tWD;2IpT+N2M$MDA6O%h zRXRTOtg!L)N904Pj4pUMZ!O~m8KBhi@kr|U^!m~B;%>^YNrI`P(3IznK}U9k)B2O` zCO(b3y@XAh`Z_o+t+X^pcSi)f{O{p$;05Oon9x-r|8sZ@bO2y@3K=vQI};trB1l&! zRmW^R^7EEH9@@;*eDBeGRoK<>3Q3b)*1=X0lRtUGvg<=PgJy?yoid%V_iY|#9F>|b z^<+tdjcIqK08lE8-g@BOZoJanAAJ z{l-!4ysd}j?J2J-7MSV#a)+o^SoQ8RABNub$&?n8V3~*c2P}HiK;6!wcHJ3}K5ae@ z`@ERg`{BO&&k82nK_TY%m&Zkzw;IP};4m1FR4lm!U__*qFkP3E6$;@w6)sk@$=LR1 zT?+TK2q2Lg!HzbhSk6+yqwE5K9bCOp!~?Q-do7i#qzuxnpG5HR13!f2lO2z{QTv?ZX& zfFSyN3ly;)U<|Ugww}R%V3`WOE&pK3V4AME$@<8FHon>~^9UBbyg zR0^>Kt*TMJ*2pXVCCzkRKtIHk57fO>%-Ogh7q7l>JhJny8D9|Y& zJ#H?<;jo=eD(!c?w=RG|n_gffg~4ELMsYo7+79o1CxOh%=lu{knx<5K0)et2>Kz{2 zJCzUxXD)(gCu1U;fefAlP9eG!b-22`%tIjypFB4i^Mju32^I~ZzHMW-?0;3IwU?YC zqVjZ5Q4Vw>h2wEa7L284>(@WEQk%Oxe&>rSp&mQ|3PWaRTXw;$t@s%o-0#k5(6{e5 zZXGTjZkZ?*T65{yvKW@=Xc6(|D?WJLT}aU9NenfDM~Sv>8(C)+K=wH0rAI|5WaJxE zYQoUykx7po(C9cfG$F{A>A@ceNnkjT8|u8-l!?^614!gV zhb&}Ot9<$TVu)@e@9ygl{$yUKY<$?bQ0koU-9gAkAt*68G>h3%A*X{Uau(o#OenA5 zKx8<3LrE@=WO}<`AONXW=*ku(nJT8cU+l`Pt|~+k0i`xUIE>Ml;u%6+UAxU$Crx&n zazX0BK5V|v)x_wRAE5OWG{mGHPeYsOXasDvf3I);=D6IQNkG7zmoiDTRZ44wcG=ps z`(j~81pvc0YIrIT+to%x6BQlxk;!MKjYI(-avBJ1h_S~`3wzTN^_hu6RH;bo@YQj3 z)bvn|coR~a?59Rj=6bB9o3T(J&Se`Oe4x^x+~ z4YTLM(>Oy!lG#ptlFzpc4z#2#sWDK_MNvp)(Yk>PF?(tc*h<$mYGSnIhp^8b_$TE6UIXl0;y5>E=HQA8Nkmy1{X`fmtby!C~8w0ig^{XPEu&oqP|gaU9KPXYQh z=BCpqvCHiVub$j+)&nL_`Zn)#%~DluuG$k}NLch}%c%mHe6a{TSR+7)z+&tcE7rOh z<8F383RaGX3$D;>p}cf>>}AoU_X*$XV~QyljU;R+h72`FsvsaJ(B3UXr$4uw_A-M3 zZ3ch98;hN8oS;+AG9c-U6Z?F$Qj?OcvF7cIMz{uO{8Luw11eN;pQF-JSgj~jFp(k7 zs{pw^izqn`DHt;ska{>)c%{YpS**qRI5N2ivNMJ(qM$$yUQ8wqt0wfJ zo!&8g%c}P5%%PVEGXk3{Sho#!*3N1*nmh#He20f4JU zFY>rwe0{n#0j91n2p0^JqcZ$|1|Ur4k6;fU56+OF!DE7joa>zhmqIY;#sM^)=RFPj zOnayMgHVLe?M(cPA-2j7=%1QT{{c~_S!5oErQ9JPR8;IzVU2prp$JruaaY_>Lr`f* zoi5hutvKbQvPF8#u^0>r^IvH=?p~N+45pA}sOLUCx>FE-9fIt9{q42o)1dgGqELJi zrxj}m2X376$!ZazX^1x>L`sT0RhE8uoB{Pd1aoXb~!zswc zzc-N*BfS8PsIV?(pji12SOcE`VE&(&1Wsekmne>B=ybAsq|l+=PD&RF#m0Ci zRm*kTQe`uEN#g1lH+$tsp**0n_J41RF3ygrNDc!^JMrVxwKzvQ88mDKvQ0UV)mY-m@>f47I;Ii$MmMU1+*#mR}c) zCA0B9GZx&QE>e_;`qsm-+~yMSl+GhW4LjGNp1ZtEZBWi_mvvFD!?k;NSz=!!tJ{9n z7>sBsjz}F5Jwlcvhd6wYFZZ0H=U3}|B+2P^=BNI}1fNs#;(q*^*;SseNR~>e-4pWe zb|+tPrW?j^fbWMKWeO2=I=vkhS3$ktl28D`c>Kt-<9vM=IJJN|rnkV_vE zQbnTk(UV;8ei1skYzk)(E_P5kr~WT`$pqH2@ifiKl4H40;r;l*Fi+7?q{Zb9p9JHP zWZ4m|+I&S4DY$x^rG|-BuNFVgv2-sA3soBepN(ld%?9JRNQ`HbaQsGt9?I#$i7~+` zK=YV62%^E`MZ$$_RLm<@E;2{6x3fn|$h)S*fWV+sPh6}!A-|>7X%3{-<)(|9!Bm3i zJTBrX^r+yC9qdtANqv*SV`yT;rPJ&vP%xVK(ya_0)+joTxFokZZsmNVfBEsHai(Sk zFU{}D{CT`mp0m-14~M-{jS1Pt6Al!DP9tlzSR0dXR*dgDl8olKKP^$T(m$~|FvGEb z*iYmwg-|R#n%Yz(m2e#GwBB{=c)4#>WC@HBEc;P>86#9VhXC|y`c6g(3Q2kl-^NBG zVk&2mH-AfWH>|+IK3E+xslxnC`}{YOEdx+KtGosMvZP)#zdmjwUIorOi-kzv9#Z=f zCFB#gzqO$>P&jz8JCV}bRs*N%*cl_pA8s)Qzq#UcaalB|Z0t z-7NiLg|>Kgkux3&mE!j-Hmj1wN@DJDlez`S)a=mBvtHOOq2x*>W>-e--6i_Zl+acYi0r5-*LT681~EQjen(Pe#64_S74MJ z(`(JFGW0~H^?etS$*h|WI_mOwsI^=$*k7!J+U}2%sth~Dq=aSyDt7>l5mcVkoHU3Q%ViaLNG6lx1!h!ejqWKmG46x( z4Zxjm?r7e8^_q_;x6QH6Pr$C#+e91)(1Z~v?7*M6OMW`VbN zHq6{d#72kBVB->wbMRCxW(~_hbcn**!iLS)9SV72ha~R(&fkVm1EaI51y4V+eopUI zlyf?N{I`J@zvu-YwL*8|9=~KiakG@BU3>kG)#!dz91h1Ju#DsBIPZWS6_eSSBSr9G znup7-PmKxrvbXsSlS;9+NF^jB)Kp=Vkl9AA*b)EJ4fX7}Y*D4gQW}xleArdu z(+K1bl-{uXIUC)B3!`#;^~0|jkKsPfMjMWF6S*juY(=8b@nU=6YsZduo;((FBlGD( z8NLU3xne99`;WkjNEktbNKiR3fnu#e7~(LQrDW_cp!Qs36tBa1z+?>6+$R!4WO*Xq zSX%4|J70|+W&ppvbk6_lSwncKU8c45N($g>AT~Nap>lAVJ`=RMue`%LMtLN)s8+Y- zrh5al6IL}52%O>_EmfIfuCTZSg-oBLQx|JM5&ezU{`XNEVv;x6>~Kh+9Ose5A4<4M z2@Ug=pJ{NooP`(dEl8J>*^RI?3Ig3jJph%1dD5g(xndNRN)_aNPjk0t%;j>Fxzh5a z)MzxYA=Bo5iDvo0GXjIsTUn3@k2E-^!Mp@XU0jxk#cC-H#Ba1$8^3B}`Fvhy zwYscXFIIV#>{9CdxnYwT7;Mc*VZlLg`rBQmW_Hwu;)uhK`7$Xq!Ybg~ul$b_rfy!A zK!>T<{`4`kr>kS_l9d+Q)YDrJx44Nf;)s?OhEm$&f20QWB_~A87m? z<6ppi#u(AhZJ~P6c*X(DScS*aMJ!Cg#DO#)J&@IqKnF7x;xolFhza5VwJrdT0+KRv z>NVL?w|58&9v>tkxjvfl1&a3Fxv4&fL=X{a^g%#P0&XRP&m~;7T%8huL_0@}bbmYx z1t=4URo;hwq|!#2I%v6JB4dC9$%G+uv)dFJ;7L=Ix+WU~!xcdpcm=QqD1o^91Tq2rw#P%5D2`o*^BvEBtaFfgH^&hz0T_`rhO+{K>X z$jVA4Pcl9>-u@8`?|$D~O9w9z7Ab-F24Jwclq!@;z0#NiQ&Ta#eqNbCNnxsLlMLt{ zB!gijdtWT|>N=4YMLnAIrIi8P{&^PYa6oSKdaS<4Pr>RcVlpXmN^v%48Z-vl9_G0P zPPbq-3%%R$E|6(;t3S1QJj>efP+p+?ijvZ-GnEa)X3J$ZYwhps{3gO*ML=7N;C3Aj zXu|VEkGSLC!%}vqlbG(fCJWc2(psF4N)-&#_vU{aI__3pVS_b!wuhs9IcsgC&<1_v z%11{Jm+qXq4m581A}$EY6CbN~q{PBMW%*0Ro3RAI|Ia8MIrJ;}sH8Ng>c;PL@sFv6 zfh5kV-NGbBhN#)Oi@PK2GfY--YRNh0BTmIELI1pJUc1k*k^(rY5cXn%Z%_w;d-MQ!{UT*eIcBi%Pq=TJ4sjZgfhCv4qR( z7|Ck2iht|jmY>xr^j&H@=?}@`IPHJ29E!2mYY}7R)q@rh4kzBAs9-etE+P&#o zISLxJvN(Bh(+3kljo;iM6Qw=QU~RzRhu}auI*Jqh3JMzzr`gK(#j{Qr=>GY}UkK9Jb?#>oqh3KNxS%cXEQ!hL{LUppRB- z5Ovn^S_xu>rUQ+KEM$g9|RQ94w#@>z`ZHxA&sQ;-*UYpFn?Z`t@`#h68E_Sp1oOaAcG$n3ZvcR zDth<5#yv@^;V6^$5vq7Z(d~GRsR-ze1w={ET5DEtT+Z5-lCrliB+#sc0^xGB9MtUm zBz2zqF_^k^;g#sOuPpbh_pVoo&F*}&Bve#XF@>q@ZppwX7zMPyjG274tDR<>K@dgx zJ1=0YS~~4%-+Vkkt+~d=mrk2L0CsE4W;%iElr6f&`9x%ARF8fDPxfDt{lER7Zw4@$ zguHwiEy={~H*&a6EK2#qu{<8K#X1eN_lhlylAF&@Y$3qB4M`mu)1T@fW5 z6**z}S$nh7i44$_kDfAdX`yD;$>)$)%}s>SF?gi;W`knNSQ05%WbH3fYM@{jCM8fA zQbQGn#Us1#v(9(EyI)5ItjR{OT#R1w#P#+doR>pyPLenyIlHE-kdogW5F_q!_@Xrg4xWV(FMbUZ&yZ0 zP)pS{R1dS&KC%15R+KukYn&LE#|uncrdua?wsX=Zo5U(bsn_cGEuL0`9FX{3t4<0< z4rYUEvd|&a%C*CQBqIE6EPX=I8C|c{wS4#IHwK(ezZ&hE;TX<+M}UfvTg+N2rg(dK zDBcv<7Fn8$QWVy;;wO%Ubl|13x#*e&%cXPYYcLfc@qI9$RLuQY_qo$T5g1uZylKMb za?V8{+kP;l0|s4d*o+4DX=HuhT^#i^ijKj7^kHODjRZkndrFPJp?@$SL8c&+%>Oyt z(!1b}EF?zKPSx3-ncea-nuAVG`t?DbcwVmBA?+i$G~Jg$pb{9sfyc&rJm))IfJ689 zPHx7I4FiHJJFL7SGK9~NiO-K8PqB9g3C`1E9>BrbjzD8%9p*L!VPq;~=h_)hB7Yx+ zK!86!yTETlTL6DMzziGNq^p)`OVnt@R(EMTuDy{Hyr5HxOy%P(#SZQo+{?q@* z*jonW-8ad?2_Xb`ch>+RxCeK4cXxLP?(XjH?(XjH?iyUrKlk(OIlKGrR=uC7R4P;j zuK7(*cTW$8KDvdD7W9t44 zB@9?}|F3A`Uw`al2U<%!u!H+R(1W|QvJ%st4rKaTdvA@Pj2O^bNc|OJTO>4f9dS{<8r#j zHj09}eY|S`w(ph*TIRldfQRDDVDe~SnD~Q6YchH9akSPv8xwA{;}gjyYqpmt3=Dk; zoSv^q0@ve$$u;^iL0bI~MPIwuHoUr|MKfvAk9gIOhI2PNbV2kg8B8d`6BCI+;?tA` zX#hDUHR2JQr{j!n+Uasb=M6O0>HbXqq;*D?JAi-XhOX~*N+*|?=O@}SE=yIU<9V3p zq&#kVVxtSvO@gI*ry!HKdj^%0rERQ04(dxBln*Con;&^^&Qko#1& zH)q*=Q3d2f$JfVcWOQ_VN1^$Wg*-sU72bcyzC1olp6$CVWj zxWn;ysysy9c+S#hX!04#Be(I6-+kSX-wD!Y=(4?9>P+(4GL~Bm=y6z@lyr8#{`2Xs zP^|}hQ||0wm|(xpYqv9+1b2(3lEO`S=}bm`J*k)8{A;`$H|KZ?3kq>fOCsz7e0PLI zV`w2zp&^AI0{^CH7y)I)xTJLJf#o)c=VOl?JRy|hj{040kIeP$gc^s_H4dQ4awn;~ z&jvudzd{`pDpKcm>i2miK%=8m!%J2Ln?aBD>t$qQ@WPl!+?*Xvq`df)R)$=0iIMfY&tN2%?%O*dBxfnL!-???9at@ z7sTY8pZU?Y70_2mGRt&QEwdE|ADFJ~8np3&j$<%Z)Ej zSl+rVK=Mkx>1>8(yC+GCH6w49;C#vVm>zivoBHSiSqFZ7P=#W})C^7~!MrH!4#O12 zw|GJ5(6>}7;Af$-90i(5IQ66NBW4cBU)P|`xFba7hE6R*qB5Vn^i<0P_mUbXi|2*4?Dg+y5>^|f7d--uqbxEzC?!o0(P4XCoVoA2a7t~V%C@u z;_BcHrH;qt{>$ah9#YAkkM=u;#4c&4^s($nAzjG}{OHGft9Jx8D>ZZW>KLeOet6Na|Y zg%tF+dNRI1z@MPi5|R#**iV2~1?iv86^cR$U70}o@EQ3y7!&(Gt2CBtIF5Pt;7{oT z_z8CFEZlC<#E~N)q@yxX04NfG&q6TaIS__Xq_#-Y10hjlyjE{Ip9gF$LfLI`M|uc` z>2@YkWWq*&YJ?A%-ikk#B_jznYe4DF)rb7kq=fG( zMHu->P;!AVcoZ!Vi`!>nw^kd10`Lm*RlLC3x%%8np^}q>6F|HpJik4_hCG0}-5>G( zwZt!6CUzCK;V0WL8dv7wWg#BqB$UQpvGGa~bEA^s!?AKc2~+stMLUg)tcA9h&eB z7PBO1irr|!T*pI~-7_PV(Msfoys<4ph^WK$4zgUO-bWyET>ZO^PB$KKQj- zDgn=@vgN}%liGaxX2WqvHiwhE0z9L|$3(4S{@_c7#mQVvt9K`y^(GgMT25xlL#q~i z=f#Q-W9FB__@7na{Ti;f$RJ5qcEqF^+~W&15$zFI|N7V~+2w)`diEXR#!jDQKo=xd6LvSC#7ZArVwI7z?j2UP3DX>EiK7Hg9uffEbxT=`wgNV#o| zo^oqPbt?=!a2i|xEUF_1AO>{^S~{>VK#U5qpr~m7XN4sF7+4$N2jQ_E``RrUL!Qj1 zJ9yZB)I6i&%AvDk8YD|d$6kgmE$5*8B4H`rz6lg6BH{#T=nEiXS033$&{i> z)R&CajAwR6kxFG|FOwQYVBg|}!j{Evw0Vr?Sl{Nz8Rg&_?04meeY`!0w?h|A9;NrJ08j~l;>wGNUXZM~j_l&^t9ajZ}V z6EP~rp8Gc_u2{OBej~_nK0gh|_*rcabU#a!9bdG!?hX3wKZBMQjKxD`HE*rZhoiL{ zMGkQF@~f+teV1DJj%e$6k85}!u5v_=l&)oz!_GHdxNkA zCik6v@L4K~1}ZZ1$Q8rZL-ZD(TQ5bWV>7X!(uw2nn>1cFfkd35%wY(Wd=^LK&hiD3 zbKKg-#zF+H-aHB5VNbx07Pe*{g{KfANwe|J+ z3!U})gsn$~3_wSZ{~D??UW&vp{58m(5HKbY;J%Vl6mYXU5L;cA^~@1~OJcJZp?;9m zgLhv`RNKaj$^#%yXQ>Y#92VSNaP|%~HN=VuT`KjLZ0lg#5}L@%?F9uj7%(dJGiQ3^ zv*1SjI&Xm?_gJ~T{vmq{&+cStu!;Q;$%Jo22ZBMbuN{f86`3+3_G&{iF)@KWf_sVZ zN*Oef#wr9bgaubuQecCfqx@Dnc)6X`Nvv|DqUmh)ITGNZG?oOHQFE=>v zm#Rwb56)CtTs1z3!jO@)zdA8Ziko-b+_L_Rsi5P@;ApP0d4H}YLZMI(nkkSFy1#ea zJ3N$1TxvX!O5+d_io|9<#=2D9y|$?vKpvDjPe=(qJDJ(?eiMkb-`{CFpk5U=a5*># zPrap>ao+Q9aWIvuFNp`8wnx`g#d|N&t!OdnukRXjYd5Wh~lUc za!aRc%Ipn8d_a_rDL3D(b=7aZ->eYz}GnbjgkTBUcoe@4W_ z6m7IzFR(kLo$7e|Hay9FQ2B=>1SL0tI`yZf%Fz0Hx@bk}05GZWhHpzXcb4GU|e=5%@WqtR&N7mCb`%fSx) z(~a^!K^T0Ozw0M17gUO0fgBn?aCGO+jW~sWCG8P{rt(h4p7_Q7M;1Vn)KV)xq}#(x zlODG7(h6%#sGGd~4a*UYjDt=I&@5L%JUqMK&X?uT0rKNn{C-&|OL?w&Wob#;KLn)+ zABrV*WIep2YI62V@~_EnxK>tH_nB1D}>1H{#XP! z2L>HL0VNVGv?o3@aEBzMwz{n&V-tR0B=UkT0xcQPQ@tiqPvt4)OUTm|-uu8RRN!Y> z8wzQ)PTtS9G7lAt)stsx$l&vA9SS11w{O`VVc7W+kN1jaQy$Wn$h5o|9WSr?UJmrG zm;nP<5*H=j{#d#i%i)gM zR{m(>lFQ-;W?E+ba12T4)F4Ge#A2lulxsfFuky;4+lC`dGBUW`c+3{dXI(VH&X;2= z=^7rA+;MDa0G7nvpn#~RG4z?$;qF9XyI!qqw&kWUQeO8ZF5uYh(M%ZKE!=a&Q}FuF z;DfCD`1q)%-s96q8VM4xaV&lkayq+rJe@lzu4OCs6QlJT$zUZ$$3o5iNK9cN87te8 z9C>KpM~q3fWCn`>b5h2LbV%sSvh+t0y*RM?+Uu^Xp;l)sa^Is$xbT*=3apGACO7>l zYaZXS0`tCHqWvsaD4u68{t0sx&@?T9$T))HRG{hqv?*?sKfq}2DwplQVel9s-szM! z&G!-LkmHCbmi7i=aGm=BSePmcmgLq4k?u$BLlrHOen>x!My^CA-UX9~B2$qBr8Re1 zOneyryk36*8@$&eoX*7j7+H8yHA5dRuxg_{gUjUjn&Vl$KcW8H{eF&aJCdikIyqU@ z;Y5U2#X;_!yfkCIRThf0$li8t<6E_au`)TeR!2xd6*uoLyTNEm{6IL4K>rf$8-;A% z6#HxWYU6QZgV)q`?#r^?u(|!~7y{^Gk!rl_!$r1TfCHNW*n8+}->}^AQ1!0ryr9K| zeSS2Dw>1tLF|w zy~QR^(c#X4$ahuql1i^3Q-_{Kde0qV(lX6#h4RILPT{FiBj#f`=k^Fe%DIO$1gF@N z=12Rd8G*CNCp8AcHRoIl*M}oPoDbv5T_bbL)5VHBLVw8PIxKciLTVL5T&Z-)4$Sw} z1{Ja$y2_K8a^~}8w7Y`eimT_T%s;n1Z<$w`;XZbFy9rl1#f;zDoJalEH{;(mk2=JQ zWf|NS81BFE&0&eD>IHbc3V#WN&b457q%2`~IQpf*<}`;uKAQfR%G}BH68=U|;@&HK z;rpKrzyFuZ{5A7;r*LMIuk5dPC>Tka(sf)jtcI1!gAt%qnMB)RcKDFf()zow%UFH1 zFSKg*oNn!Q1v#klN|h?7L*h&Oa&}akta(fq$|zK-C!OnM`woRwC%A}qyj^Zd(C$@6 zwbDO`ok*wVMPL6Et9{3p>)>$7GX)a$hN63ui|F#V?7}<*N4VwYL>Cqo=F-NSwYx!7 z=Y7IMu^TOyC-{8|iScMaX$CUjh2v|WKg@xnW?y}N$TKME`yAt)oNyV) zl_hEc^n!d4ol95h{rSqzYa7MpR6?)7ui>f#K7t;)nI6Yt?ywyAi5-&-Uy?tuD^Q{x z*G?os`nc$i183KHXu!BmH(NhGo$K`&l@o6cZgUX@K@|fPRq+s(uA|!lr0tkGkSslR z)SEA>Un0pi&l;ebmkUSt$l?KEeYWCli&>tW-G`*JpRhS?!&DnBv*9PjaH_mPCaakMp5?YL>yYIO`>DAS+?<(zdY$eFlucMSNFXX4`QEZy1B zo?nwa77+d+N~lD`a(-(>cB9MFa!XS2qTK4}%d1^H>9p1~?KEdk{p&u{b^ab#<{qs zfM)3B`Re+ML@HD4O5IcVOf4T;*u)V3?fy zGy%;7Zl;*}x}JsCNBGS_gljsSVa@@BxLy|R@MQPIdh0Pf)IwprY_)a-4qF`=trT3j zY4^a_eKF=5ag&b(l9~RJ5_jcwy_lx_Igg>)Cp4zLk*#XB3lvmL{Xn zXf=bzkr$hqGEs1HwEwkXZ_r`aVvkWInf`ph93~L@+vs!MX(x5{(PxQpgro_-uGhq= zCeZ?LpJwe+?dq7hJQ*U=3!I7vH0^i3EaB^!o;lGfD~@n(b z1(ueIDwzN$Px0>5t2z?Jm9Fxpq2uKTe2aX;Tfkqyl2n6}f!RB9~@FJJW%Hn=tIG2I!j-14kC3D|PP zfoG~ki`s_vfldkj^|!~D+IzUWy9}GVCS8}oiS)TzC$H+Fhm)f%;Z&xIZ1ar`SC6q` z=}o(Xx^OBjPp||U&8%gO@D*;z&4l@qIkJWGwft}AoYucegO?k^PZi||!kLZ1s)YD3 zTs78jE>%YonKO0UtnbSweFT7Eug@m|pT$RjX;Q0*#pe0}FznvGx52+`DRpGN;vye1 zg|3g*i?=0iRvGugU@{SWbRBUw?0XQw{XbL3|Dy{2%c`*O&Of<_Xu0MSqFOM(>QeZO z@fR@&-)8hBU`@e%iLsf~?rV3NZ<_>u=yz6&6$TuR_)_^CwtCa!rFv-fJmB7oK|1BT z{ibGnYbtS940A*=eiGCdx4R3GS9&YC7!J0ZPxo_cbK>fTqB}c1I(!~^MX_`!pnrlo z+MjRd5F)Q9Gv|6X7SHVRZS?BDd`TvJm8+xz635M#Yo_tM>ShrLO~;kAK5TW?0ox9R zlcYfx-W`WaGPj64Zn@m;HgEq&trNg;z-I)B-+qo z?3u`&-+ya}5Hxn>CM5rT3y~^e8J>FOawqXfRDsAV`q!K5oi-3wz~mvUhHb5!&&X?y zy%u5g(jXCq+E{7LfWS0w=8RtQV5)b2zKo+JTtA&+LF4s!^?41lRO!bGTVq{1ODMcJ z6{HD@I@6kN@5Wq_i5N?u+ISL)>;^z@3Hen-GBw<1%r1N&8OU~R)a!-nC^4EY`eHRf zu9D-dbPihY>rzKPqoNU`_y4SwKo87BnZM0MsoU~?rV~S(Nbfsmn)6!X>OU_LO`QG6 z7NiRU@7CY!&?{VrQVJ4&l{0ZtX7OTrz2b9()UyKuWjGiF4O}F{J`r_+{{Z>_e7Uk9 z?DE^P`qOfwLI)D|3z-)`tn2hBXnwpJ*-)FeDmO^g5)Jzi!3X3cCXM$G9yRli;&_oR z`8R4L@BLY{;dVn}@nt10#A150gefPpJXAL4E9quYwwRhCqe9q_@FDrr1xwJCmN!6z zih}UB7v^M?JY$K0gHV2YgiZ{y5+xeDy;z5mC6+2k8nQZFIJbU#u8d+4(RHu9`4UH~ zBq;R5ArAg*xj0soBu?12)#<|f6EPK%NJN88WdM6o%*+*ryHKs;xT=AEuCOWXW$`!M-{58uI^nT6g9G9c2p+em zA8;E;#B}C(rXV`CYsqGJJCa+I!eAt5PqEqOU=bJyw`clC;gPXHFZM@wu}8_3zO?^I zWiY9cO`vBqlMe*0GY<)vyyI}#EB7SD?I1!YjLIgT$arC%%OyBfduhZ^@M*o+t?H%U4{M&dEE7rwkXC(tgP^-rvFPG4; zY<`>_@pKh1J5qAR5!f<~%bc&h+#SFuH`srTGL}nYss1~+OP8~4SQRz3+s;#hgHO?7 zN`D;yp-37G!;oVPaKhv6dT-y}kq0KIz{N^Fr>TqO+HZ(Gy-s9gck>o%PF+^cwA1tj z(%HPyWx_fN%=|kYZVweB1tixa@rbPG^CnvD?!l0&!SIuzSNG#;%_~>NsG*J@K!h*D zT9XZu>`qc>Heqv7n53OwM!zbFbu|2UN9UIb3;E8Du#jCm0#dHoiR=_65~B%DqYb?Y zV$j(?##u^U@hR-lYXH6k0Z1Ni{vzSakXG+RYB^Qc`_Tt?CU?ogtV2dFX1o2N?5<{$ z+7x!4*I2iq-ssqRwprfzBOF zs^Ke%2wk(qcw481aQp`B8O5*9x97R!lQS=M?_b*F@Ax0FezSXq40}x@5~pgRNzAl< zu;JE6pW$?r7Ky_mU(o0E7AN00kZF&Ty5p;(X}x}0n;rhC zPOZnKphsqh5$MOahfNf`kv#Ea#1{MWJVKS12=x43-o7zK-mCj`_Iy?v$0Cv|Tvl69 z;QR?ei|`YZg<7ty9eMZLSHzY;&iD@E)7CrD#5X)=Ka6`7))|fLi86{tZX0=fq%#275f)mI_pE4Mkds~A>;A}*e?)NCY`!8-mvbbM4 zUQS~H9*J4@6o-t@fC-6~f#P!fVyjT}@6ayuT z#9<)u@93w^nY1QBDJK<8Ar3CagBbzt$SXymo5zg_#|d`m=h8ylB-+vA$)Z`VH?-^2 zt9T(R$%B*nOUSN-375#{gRP+`#+0i!phGq_rv$iPT_LCorFCp60~j-+RE8ewIR?_` z8qpw+8xk6a(0HxZj!J(ls#`a%D+}NGqmr^OPPM zlgJ^iNe+Hh>QNExXc8FP$sdK$sM)%wUTrk5V$mulNXILm5l{Nj5z*Cd*-MJP#VCxn zxzL<%@yH18WAdIzsqtB}%{?0@qb`1`7%32jF4r+M;kz#c5}BBn^WkJ78ju$OCgL}r zrQ2oHgXfgV?E&&IvaLlU(CI~a$*cn{Un(oGh%?^EQL*g>ZH5EfipYQF;i(|2@q|&A zNXP!T3P<{;MLNU7HGlf&hHj$GW2ix&Dz3c3mQLdN>4c0GZX`rCFlg+YUcXAW#vi`y zT*@RY+5UKjcabI?WtL>G?7JGSfZCO8Il@f|+myc2%1k~pEE;F4MO zPeQ@b>po=wQF)Vqy@EuCnNblWU3{ah?53<~St}$GvVk`A(=G>j8j{i5f#;!0>SME2qMUnb^Ma5JP)I2-8#==hKIcZ zu%YEG3yec&t=6mI6~eeoVh!Y6Gl`o-LXq9K7`5Ao6%F_xrp>kn+THG6k5G-|*$n1K z+QyG|oD@lr_Uv8|;c!ffv%8%<0Q}OZ(6b46KmG)fT@t0;XH$$$Tz_gLRcc4{*Esz3G zp~B!Vb&>hP%tZb^R4PhE>u;UGde9J!^wI>*h%s>K>86WiuzEtJoyx%;Q%?=2Qy7wI zZ23th3TtW0^!O4d@*xtbbVQ&?*UzuXy8-%gAv~-5490WV6}ygDwv>u_;A5%mvvKI+ ze#fsB0#|TUZ+PMhLleMy)9(QZ)vjKl$i0i-2dN)D%~6yBkj}S{5OH>uWplw8LWhd_ z2tJXx?4PliEdA2iAB20)7Uq#w8qIRuKO+rFx?FbmYTvEYtI|8e9KlhiV3dA)IGcrQ zuu!T@2#Zi_P2&f#=>dhXuMT^4ko<`1CMkdd#XsgI^`x68bzM-X)KD^098sc8XErZn zQ62*9%b0B0s+H8DJqvSXzE?utz)qC+K+60=Y; zy&$eWISA25Z!iLe&!Yz-6AqVym#Yiff*FAl>9~|5Cexr^6Y_x57##GgKU7NeI4WbZ!1*emz!STp^4wU0Usn4lMww5 zIxsBT><;)7__oJiEk-F-4|!F~E*J_^+@r~-C&VfesEO@xdfa1|@ewu>iC~%Cr!}+X zo6uDhIhrvf3h@5W*wpD2>Kcs1>xUQJNVc0?en_eX?_N2awnQdh4g6S*O#`l32fcbf zgLIm|1%z9l%$dw?Pvs!0suPl3CrdIJZBE*9yVt=0Kw&ii$&C(Vszg8z9p5|nQ=>?EZ|f@@&Vf@dO)ql8Gz^VhK_v$@B5inCuV*E`CW`ow#m?{t!+9?@8u zFD}~Mq>D`@nvd#s3+_tsjUv(lt^sZI)4Zv35@oom>@y<~=tDP3^?CG=ah!36QiUp1 z22@E+4PJ4UrFtu(y7ee#E`wtl-k4gGw1Pm%Ab`k#br#&fdmIh|P$fK!eAMxju6vwT z7zO*`&FSWSbKW28o4{1*&H3_hDyc_YF;t?&>@G$)t;1CTi%W0mV;plMhD!ZArwLOf z_4{}?q`_!U*l}#V#1`~>_f(z>Rr=fJ4`o8OQPE?=-4>g>obB3)F)gz~fluSA4fW5^ z1&-sXOkh^8d>gVkhe6TL71YjZFKtOo9;D*kAN9vw8yZne5~k%U6tg?tsO2l&&A&hO z3nt4q?N4rh$tHGC_=ZIPI^`^7`uYP)k`fae0T+!qd8$!12jALg6@C}z!s5OQYg#M)dhCpIdGuA%&KjmEP&9Yf6 z{^x4t?zGBm5!d!AcP( z_I1pt6dJLZm1$rw_n%!4zmrA7n>&u{YXQ@K^wMRRn|Rvc?eb5=Yw>Fi0NlEQ!fL`DOA29Up<=BoT zug=Fc83%6(XmO;}8-D>ZFcfrIscKE15UuUTE1`^nyUYc1^6-J%lVF+K9WBl^I@3Nb z(rwBJ%|&d6lkrjt2~dV_2sF|8@UI?im)mQ{dLxM%F5m6bLbNIhBr{1;?4N$qli_P} z)aQ_fGb`aT&$4Dg>H2yru4+KCSg=T%ZcNu=opC!)>E@*_(qJs4BKSz*a_X0Sw+N3D z0oADd{09Bvc5{mnM*-YdXP&p>kT1z=7={HRcLH?tdp?#D&+oBu($)SlChZ3@joEp5 z77UJr7kJ#RT}6=hXX|k3t#?!N=d0~VGlgQgwUru;pTTxawQ~6>yB8}prxe<|s1ror zky|ukDsJ!?;9&z{FX-f-NcJ@sefqyg447xAndcP~%UY{EloSA0XVy_=tB$0e%oZzo zEYhGBVwM3HgYZnbkk(TBVp7{`jf))m(g#}amDdg`10V$Y8j*UfT`aXgcrie}>HLEJ z<#8~C)ZwaDw#)JVyC1(tEV2HZO4th89w|=`{Y^wXX(*BU3ou6IdmE!rw6}fx<#V~q z&LHfvdZ*rO%Nu$y9)}M6$^a&gmde0_G{;X{$z5r)NaqX58IYo}wNig7y1W2g{3-Fd zn)qD{awqldm3+W94!#Vh5`9+n9oP`8!rfGoJqf%liy;;z-rsCpjDk5)4n^b`J#OWuZrCgKH*&3ud})?EGmP>I_8 zBk{I!>vTXV!q{{SVXElZ_S-{Y{ zf%{pkH29n?zmwe=2-2VoH!Xm72P2VzO-|szWMTjqj{oM_vdR89#MZLGu#XNMxUxlA zoioy`g4gc);1f5@$0}TT-F^WuD9B1~B5k64zG3avH~XVsBrus32_<+K?*@xXXvowc zx+l`yqHQ8s0ba=i*F<5+VC_3jx(GO`!d<_ScmBJ;&VBQr&sDAtkAzS_|4bO@c5nq? zQ`unOF{2%!*MluDP=k!Cxbc?gpVp$n^MP9E3y;?b1tU1Ki< z8E}9rr;;pE7_7A~C99ibyaHE^kiX0FQluJGfTqzYK+veyAy47C+qO8rBliX42+@>8 zXtdlhk#Ln@QzK*lQ&LvTEikShh#es##!X5zY4HtYGYcB+jxZ;4vq-|@!mIR#;{hUa zw`zq$HsKSY!c{J7kfSsoD>e@i)KL_Ag1ztIbNK>7gpZlrcIphQX^jo|{IF@{I)p4u zUSxgYNJ>m9ocBj>P?V}zn_9YhwUbb04;;nzR-1^=M4{-BH=_xaKG_{ab%yyIE;}qm zK2#Usvfe!;TW^66f(qt(|Gn4!y8(KWhkZfn-|}n`W+lFg!N9<9GBEdVZ4vg(Z2)o}bY?;Ffb$1WhdgA@#w;zsjEv0X}g1 zDmr(Cpm@Y!G|_+LVml4Ayr+n9EKLF*`JZC3Cj?d_0!xZ?HR_xZ{*u8bYeiS5<5*SB z)3V?Y5YzL?82>7php6fHqVV(OVkG^sB+Z|($_rE3+j+J?(X1@#E^jFFH;~1Yrq%5k z{^wfx&K2py>mzN!{HlVlXMBDn&RfNj(Y5speeYba+%3L`^;!XyR#Di@QFUMYN?3H`#@ z+n=IK+g~sr&UZc9E@L`XgaNAFR#a+R*BJ!wLneVv<#9a=8}M#?veKToP)5~gvJM%7 zG962<)?flRD3vDvx_)!`O_kBWtR3iO|Q zc%SuArQgSLWS9fhg5+^nPZH=~=1csnun1O=AYJ^T&JbqmM~7b<}!2bMuS+y zuD?gS;h6BzQKWM^KqGG5L#@~!UT_wvmlrIUKXWX5uM#?3&MO^Cfyt(Jm|tPsfO!d1 zsF%f6z)({fuN86ehoWjI85t_Kn1A)#q95PjFKNiLG>gOCW#TlTLPt^%_{iks&>q_I zhOAD>P*Q8O3S=~6>U#PO1&@gR-6DOiMu5IZroCjlEp~UlV)fSwjUbI#q`emHO=hY2 zYEF5hd`4pz-oOqVX9CdT{26(xmKh5=%GNvIp6>&g%2m-}OJ(CR3m_K*V3vmdCr$6K z&?f@|Ku@9I(LP^&P;_rAjDJ}1!lHjePaJ``RRJ)N`xp)+GD*<47yruu=Z6iABu2A} zpA0e%2^$0!>zJZk3fSU5ySoX5Q#|nJ4M*Z2*c{9&ooQ~h!YHddDgV{l$_c9#;}usk z8^leMzQ|yx$baqdZ8}@8)eyXKN9aFrj#02E9>N>)#;Kr6F6A(t0ayh@3oZs!w;ZgRL#vU)uVmq1#F$7e|R$x zwo$8B30Ks(x?gP(fbO#Mv#Jm^j(8@3ckdBVnrE=tp(?~Kj!HNT4&ei2<*grZy+XtT z;X`LAN=H_+w9J(0TOZHDOEWGqjq)$LVbRr^ZcM`oQuq*kbpROOupF<2;&^+vr@eF|PcZ zMWFrXfI1|#_R@&thmkoiAObOU?={Jl$eUz|qHywzUI#6xae2HWJ6dk)uSav9@n(6p zyB2k@DK7>D>E{`~fGf?K$laSSYo8jvp*3tpX%G32)EExS_#~=u)Wl-uJ#@W4_UGgG z1>yF_l3B$nyt ziDk9B$7V3u5UrNX{0Xr}6(={MTi`Os4TNIN*OuyF)2dU~8Mb>~ zn5D1Qo4|OuJxrzK-)Q4$DL1>;$4U99*{5+h4o%T+^#~YGvACDw@{wVMMNAmfo}#Mw z0STfHyd}ulUhtuX27=%j+(xLc%A%DE=7JabNroj$jhAcwl=Ei6>!L21-M(2iy_pV< zVqoPUbtGEN6NnJ$8U2i=^Zvj>@9+g`WZnWh0>m*5okrH?CoAXc8~&cVe>jpC@l zddb0vOdmyyRg-bqtdMD7*KH6SWQCR!e%h^{ z_rN~D|CXFujzbTrpj<%I2M|KVH-+ic@c;k=)Cs0 zEI&jyAWQq;uD4QbJDm;Kt>Ae|iu#Kb;K$IN5>d!W*1cR+XteI<*xI8Kuo#TQJ$5v; zB04SK>{}ntpyZpI6|i>5jiys2?!B`f(L>`h%<0Ox9fn|wmujE>!}NR|o_E9nz~jC) z0PncCe33slZeFYfxY!;1yNcZ4FpG*rfIAzQfWNZvmR)zxItYOj+~MT{CyCxIL)It` z;gXfeU&btnHT4%TubmTsh8`wxzI+A#`Eerv>VL>*1 zMRZlh!#_#)V1`-)|ZBIti27)i0J9 zU!zhWevGFxF?Jj<{|Al6hi;ct*nvcPe8uY>o3GR1l)2}V6{pJP)Uwa?bn~kek`LVZ zX0EvhaYh6NL6dy&PL=kC`Lx0?76P~2Hr~a{{n_s%x?8AkW5N?i0};F=>!zaWYCjt$ zL|7HM(pF2AzSFsb#YR$_w%C4mi^Q!r>p6X-J18M@h&|{TflGmCJf!PA zIN3x@&GA%r$1nDakz&#P5$S?4DO&{6XNdqH)r-nNjXtMkulA5u8fR{@&IBlxzkmzj z5%#|IvLp7{`QiFYNJ99Jb15~M&i6^_s^6*lsPmfjo=gZnf!Vx2E+|EVK~oef)bnA8 z+8eRxbdz|UYXyzxzk<`t_v09d?Yil)j;C;iz zoyKfdh|md#U&|G8+F!p!+e?KOl%b=Ksi}qN22v*iB5->ABPDaCr`dR>#Idmu*nuwS z4LOCkFDHis#9X>v-D75rtp~#4b=Mu!%X2OKuw(MBl2}!Y;#~d6qib_{K>h0yz>F;a zT8{bDaL&F3c<`AR8wJ4&+%^Eg#ug4pLj_aF9ZVVkf{eg!H*2l9DSuxojNHdK z#|MC6xaszXVIUnnkL)QCI1V11KE(8B>Tp zwfe&#I4g|`E1E!DVFHa>MV!TO96yclcnTx3l36JB_jTWu!`*-8u{%tX2Bz5}9X@M^ z@xmlBsbokg>2NK{iH^+urvFTFB7(@h4)E=ZqQFc$hpQ=t;j-h>XtqIrRoBS&`rwMf z_jKpe0_c2c499XE91J4fD@0$WGA+CS*lSrX?LIcQKZeA#LLzZnH;V|q$E;q zbmwdJXlKi4@B3q6X-23@V&Iz1=5)x`c8oyu1=XwX(iFj%F&u&O!Yze#g*isAHHm1v zKVT*JQHB%_e2RZdoboCB7GU3Jwps@Bc!r4@bBKy!9q@}{Mfr$IV)IC7XkIrEI=oqw z1SiD@lw7#sSQ7fay`BBh$H66W=jS~PuFbV{aZNOTi zHeS9WjkVjW8(TQ;M>b7|x$$N(JvK+)NAFLE%Z;# zy_5VSJbugmi}v{1K(pBqJ>9L^LNnHJShKxflaUaykNk|W2`LG*sfJ^P8yz@LiZ;Bl zC}sx+o$mOh<6?6mv;Q^1_ioloWc-8?k&@c08i@|_0N|WIaKDI>DVKVS8lk2rTUa;m zW*|IuyKgu)Zk_~@KKH}-8V5Na_A!H23^43e8BKZ#-PBxLwyT0vL?iBfTJvC$@d1@p zC$i0y_x@l&oAVfMnDRn+eLjV0UMa{;$TH_AjsIfsqQzvq+7fU3HmTM0sZpBk`9#wE3%%Z9SGPy35B-<|8 zRf@vGK{6~yfT1W)6r+Q_+|kku;vxY!D~QD;ecsaM-VYhfPUL6~Lw~*`qO&7l&vdn5_(NDL%A%vwCi@=5cD)Vr7@$kBbsT~rrt&{oF z7Uw$YtdHNGBOW5Ke1T6T!YlVt&JH-n&@0MNsL#ECeV0))yZ>aqq$`p;AojVQ-{7Vu z5dI9XMxMEeyWvIrooGAUi+lVl6pSV%qqscS`YT94<@}wcq?gH2mnoW?nuJ5Y^3U&c zxY+U4l1lL6Ty6Tlhh9IulmhZa-+M&P3MWKQu!uXCEO%aj#jX1s%_nET5kX^!K@tE0 zuqGv2>Qy1YXkO0S2OIW_1!P)g#N(mj9O3^k_mxpuZe6=l(hVX?r+|Qf(%oIsA*h7X z0@B^x-O?Z>-6_(bG}6-DU1tG1*!z9=9^ZGy`EmX@9vzSObFaDPy5===$@@Ecjz7hs zQDo=L=A$Elrf1XGh#Pvn{?6jP-EJBzYtCb>1}A*!L~g`=-BQyn0&}>bod!v`DgO~p z_ies>ND4^LN7)#5b9%=(6intxsyJ%=+q>kKvTzly4B{{d(Jn`onyzI!;3$55MtEG6 zw8b>Wx`zMQRbuip8DaDcOtxesJD{}{*)U|A=__$mGhgGg@ zz$j|7Fri9Q5n+cZQ`+simF_f&!H`)p?oFN#KE88aEUOceLXL*mNDxx+hZmY-2&dIP z4zFUkEC5@uf&VAx>O2cQq(fB;^YuE9Bj52nE`;z*gZE9G15^30agI=*baGn?@d!9Z z9Z^_Me=NS%KBL^t2{@xgFbs+=_a|QhgHb(fMmy$tfAkG$uWjbEIVRqau$M77)4A9le3{afUcOGR* zCsGUz@ugaRA@R)M`~cyzEO8Fa)*6KWSgpcKzLI`iS6a<2c()uJXk^gZgXmXnD2Xi2R0z+>2*V zBKUWWk;!H|eHCJGy4AC*lN3ZexJk$}B}4f|;?nF4zq0Rs(h0l`qcG}(q~?hm59d$c z%K|;*c@aMQlcn$9wgXmb6cz+%=e(f!4;rOB`Os3|+Np9)Lr+d=r-nJl=X@4~q0=)> zuh2M@jLbv((G$GYKXCi`T7_k-QD-%9Uw9q|$WmFoOuUJ?3}Ue0(7u8kDOm9>-x#lG z!mlW422=L5U-VA)B@b;Sf}9|9B71qX3~FXi3_L0!r$T2G<1Z#3D~x1NU)if^kNTso zMrTDHn<_@M3k>{ov3x)9WTTLH#{wOLw>6!7r#FcSXOoDsJv2Q~=;&~h>ul!U?)|R{ zO?8l!h+E&>8$3kkW%J`oxq#CL3JIs*s$qbf*^#SR6!z-MUdqQcrRvG~K)5Eg_2oTX zkn5j(0hat^Fjo-@g@6+YOoLOJ8R^B(@2vRsFzSmHszGw_{&|esxfjnR91fJE7r&)u zja_aM;u?#Q$IC3~&(#(nu7l7E^Z7d4KrOi52$zj6`j7_#kF8eu!0e@cO!V?=)v3C; zPtBR5)ZkqEVKUhvoeL~ft~}(qJU~~+=E4* zsCfcNDWA4%O26Ju9UdR-E+Hy)FnVJ{Dm#@XORmyUQZx9cx!gyi-{LyOxY+B&&=tl* z2u>^G15c2U4tG2wKj#;IUB3;Twm$QUu0YLGZFrR{0kV|T1-T6>oq=>3wa^ETUqt$& z5H{%YKXa8&mnBxIws93Wo~U+&%^iN0q>fdjHbl+qk7gtVs>9p}>Q4E6ij2eM)a|A= zCrj`#yz;q+PoeL_@CC*ZnBh3r`mR5JJ@F)4Eaen5I_|06zIJ+Aq5s^Gb3XOsV@@ ziJ>SQ1^@n|>xG^YF%HW*^x=AJrFM|M6Em?MRmQg*I)n->e8lTH-F zZdLn|Na~?&pjBdp{6sac@*%>d!`TCZUmRaTLK=t#GLfivcYc0co9&J*38B{_c-LR~ zr9TMH0dWqw3`O3fwIy=AmU}IiJ({g}{4V+gE zmoIba)eD{~6f74U%X|6!6l_F6K=P>kFvl;fARPri0IK{ILVva=w>JZ}n3fmjqhd$s z8%;;Y5ULS<|4o&LmdIGgEBof@)Z=x)Bs(NAX=7SADn3+aVMK<*C;<+=0mkz z#<67ryUTs5uYQ=18TEVN=op%-$i6BPe>B`E5tsP&k%Bnpo1pJ$=2@70t^$?o)QZ2D z6M-*t=ePhIh4XDivEX+?cB7(IyYG8D#N0gZdOJE-Em15@J1p9K&?G^X@_rPzz!Q2( z`8ucbWAv|q-hI-wZNI+x(kOaPea75=l$S;bl zP{V?)7~m#ZfFPQdE@&oHx!}`Vz(~)~E6|Jb`tWI8Wip5=t@?*=T%&|l-e@>_2wJ=Q z=K`fdwcK%psYjeF&S%I!t(PHXlDNti9}&CZz?lBXgSVMgf)HLociPF7&yV^PM}g@U zhz7MC|LlIQVovI3GB>TwS{Q}S6!t9V@82_ZEtZcY2wMoeQzNd57gn(&goA*S+Z9O# zRiY=!fB?0|Y~%(}x!_f%lbJ5QQA(!AwEOGv`CNq=!vUe_@ac=(t6})2s3b&Tn|5Zk z*TZMguGG8L8aS~c^3YQ?_8erxdFfGS>1TyvB}&Xxa$UwG<{D)Vz6SA+*&aBqToPM5 zRXxEk)Zol|`L_IH5`|d8LumqzbU#X*-5PAyhHccR3=G67g6@0&(faUPKLncUwL4lI zvQT;RjZq($xF3dZ)67^tT0Hf{1?4$G=u4N5(1QF$Wk1l~n{LFFp`mGOqWKOYaoAb8 z9DkToMWh*`S6N17F_};}p^#31W4E;$M4wiWRyoGD_U;Mgs(E7-fhI{-c>iF{{j(uA zd6X)}GgdS(*U$BBJ#m%g+OT4lWR9`@LZ>3Q3J&%wulW;la6AJZGKB0i3-0{*HtMI| zL)b*Sw4i`JofS=&k5d=A)=C;Wd47H_7#_hwB4m*06LQklH6r?5nplyIAC97IsN7gh zfP#!_JS^y!tx3}!b~r7t$Q7Pwk_OqdavN;+I=djwudOE9W58pJ{uWHd3d-O1{D@uO|xV`E60eS1j!f-)2 z^aNmHyPOC@rcwIIi>A386;AD3kV2GoG36(qn56he!01r6d%VEp2!6`Z;}W?C=zA~)m&W!>cR znQ+4FXfqCt$9a=q&C;|9PJ2-x1dG zrR8isGnX_Uq04(-NWwQFj~+U~E9ST@UCta-$}e~2C0rM_o_`=6pMQJhI#uK}r&qQ7 zeMcRb>&bD~%1K|uf`DNPHDmiSgFFhmqUlV+K%`tRK&dh}mL`I)nQZ8E_{>gq_|zW-t|YDjZ3H*Qj<-wG8~e zIZfJ3d{9kCXtLv)^ zDfo(HC?3+24Qx*PLnK8F!X{5d9KIU-x(o(%h*I{j4TJ}lt&>)7S2b29F+*)DjHXhb z&;vD>9ZnY`4e8K)!&UcqvfeSPRJU5Ra+Rl1Lf!|P2B>aCM3=462NPUJD9EWS;~?p0GCUN@d|W|WvhOEKqj=?knJQM z`8+DR@KsAQ45DN*>d#SgRWfMxM_KF1;5No=rMgGd!E4ju0|)6sXphYZ-bK1D9EOU) zB{5lZW6>*N?Jo9^T4&)+JD%TDXEyfo#~Iox#)j=?1s3(NH^zRKHQ__B@0WL!y2d0J zn_$B2_KAa#opj_=^xVUGcpj}FLY_|_qdQCC(n)gjqMxWrW=zp?4DiF*SrKqM2r}mc z5oDWdmYNYq1|mGScJx9Kz`p2{Pb{;SMUu@ldhh%0DNTm|$NgAv!y&}LPKYTu^1mLk zn&Ms;=YMOXDvSI)>^4aCl7l=dpU-N2G(gKM-}&A6qu>jEbd?fw-1)v}Wf)kh%_xh) zfN^05dgHgDkYZuiC95Dm)m(G5rC4A7fJ7W_s$Ms(G~c(pLF1h{I4>=GFW^gD$@k;}rC=#>8blVw0Zz6Z**)Nex z!R)7(Z&keu%T$wtM&F|(aX3ztzCZC({1ETxtTJt(3{wuIY#EQ8#UIl+3&;v zZh+|V#|=i4K=*k)<5b1eI%nLJ7ZSQ6KigfqpRqJcn!FXveAVy)bDxkEPC1U;JFgN3 z)p$sNCuO4MYKBl5DymIPq!#;m$ui5nOs{zZhBD5;TDGY zPxe_G;(FumwZSW1Cv;SPH1~6OJ+H(+v>@Y1=4YqpT0NSp)2UsB8? z3Q}w-o|#ssH$lw&YrC%FT@C8XpGsMClb@b&B6V%fqBOo+cucFRS)k7h{!}|to z=6xfB)EUlG-Nv`HtaMx|5cH_{`J}rkz(Yjv1Bp?VWGsSMzVnQA-C6&S;mTq{&1ewZ zE~qAwkc2aUy)lo%ATihJT+e42-rGb?ti>N58L;34>Gal;rbe z-T_RF+~Jmj5Y%(_o`AHwU+efb_ARe5eDYIOM!q*#nr9ezkbTZqlT;QIP`fzqKEQE} zTnS=uaXsFU>xk;}559VOouJdH)zn0Ek5_LJ-4r6^CYr%3@Z(4;)Ryo-tF&9GmF}@T z@f(_9nqcAoP!|o#ZRo9&UXP$1omZ5+M6Wlza2>6tD3e4**+ z`>R!ZDgifhucvDqb~sO~(BKHlq_9MHPAyXEsm8Z_<3}&vzZTca_)=~>fZ=2^`ZeSs zSGyt6bHjdFq@4M^d3v2kBLU|pWoT{|v((LjYx`e~nrBH09$m)CHy^`mj|9>3^2o+y zR8{VpB~0s-82BGL*!u3*E5t6*?*wbcF&)twt%ZR}V;Vh`(nssj9C6pR3PH(Fa5Cmr zFhz($o5YO`{TZ`&0&>Dr_-(m%5}jt+L)#RGP_apNZJRu_@e_>xA$j@`0`ode2Z5mdb;VkAt=rM1a?dbw0!2BnBdO8R5VJN)7s zZvD%Y%jG)$k7o?{X1%jxUtLr|U;^|Ii?QJ5MTx{oKu2Ow7-ft5r7}85k?8lu zB3g$ebozo|e7zHRWxS3NP&)Vg$x5M8Rlq#m?8;tNk;PeN`{p7K?)1CI$DpoL0OpAL z0k+--3=LqCtA?9-E3%I&!k>v_USu{;*zRE~N}i${r*FwVQ7#V%t$E|W>M?U$(EN>z zU;e~+u#{Zm+faN&sxyw1+7B`eYud6bW}+vcx%@`!W9{}>Qp)e0 ztQF0l)FHDI5)|I4G;kyB?Jx%(E^eqVPnDo1u-mG|K?xyXyG2zl_@P6{A-w^s^X;s} zfs~F9BX^gxwcVbXEy5i#hjND`md+dfUi6x4`AR~|GlJ?+z_ywI)R`If$e)bPUSz+E z?=A`ouym?B+joCkIRr_iA5ttF+n+(+Z|=3fl5SODwxBWesO%#Hunaf#0An4`z8k{j zO%^p3Z5mQSj})lle+weWd3+X>Sd(I3SkHdepbPRJq3K9Qg6WZM`$)=P@>`69F(}0$ zcjnsMSwH-0SM+LIpiyf2QDn^Wt{{o?gNnQbPNMI-8vQNq7@zTliPq@H=BZWeIzBjq zIf?_81@h_@Zml!)6Bq)LmwQY6zQ2To!o#LP{CLXU5gC5bSdH1+sbOEvp%p3LflmEX z7$_@8KdA++?CFRrZR7M zNoop2-t62f zi_slTa7s>Q?&*toPRpE{A0aY>@>U{hrH%R&WIlY%;XKf7@bN2tYYwr(k~3+$uKsBp z@dcc_p?3`fTcp)vCfjDnraOT533$WB*haWf*TzUhePrbmSQywe@pY*P4QGh0soV-E z_!A5_#!W)A8c*dXiI)#6^4Z^f>^cT1?`2DNdn*r+0+#9v5H;E|JHz}rMatdzZ9FWM zoj4{!L?)T@&FLKVMas$Qfgv0cQnT^PF!9c^lS#j9AE)1kFnIX;G=X+aSa|{|PsU5K z1wzX5xKh@Sr${mElgHjt{!PYt&tFoOeX05sk4qerur^o#|5jK{xg*n7#G+Si^(MJj zG4egI9ZZC!XQ>M^C#_Bb4ugOZ*~drZTTKG54}N~k%{ABUY|O)bslfb&_ejz|%J`#S zn-&K#E0YNoUv7=C@8TM$jEyMgJKbL?=_G%cY*_fA%UjvG)=S;mr0Gi4)#pNgg5@9{;pOkCHWL2T$0+{nsm`UaPKtt_x&V-$kR{%9Z$ zr28$4OpmM*HJPFB-*3+BQJjgQR3IL)~<=&N{e&{U*jZ z+|+UZpR(f5_&|*DTjOA^95NV4tgGRx!~`-a8U87<5G(^$de~{-0^9+MicuWbEmUd3 z&yDuPcgoo0-ELh1loOjAFz74F&|lHgDyAcey>$<2`LpyB=w!|}Kfq|nUyTI4ykkA}wH zl44B7l|61m#ilCj48x+l_K1@i1n!RpLN{*Jp9T3zi^(OTNfM3nFJm{YPF4^afnkme zp+DDm(qOO6oL^5UQYZu>7TxP?v{g|~<9NG|tBvjG0%ePEk)UITYI~n5ys?hI3hsa} zaKv;1`T|x>scLy@2$S7<_2H-bxm@=mIvf% zGx_3Z^<)!$kYVJP6B*v31AWj7yb>|$W%LxlBzPAZi&v8#bETF!+m7S zpB6h!Eh4fx<{yQOH969$oaR26XUH-Zm-kK;$L79h^=)5*0c{#}80&riz0B_18XW+| z<<9|>m-@0E%`iC8@gQ=u?N(LtNkME*bAH)cg7;UxYWGYC+HsB~j93V00k0hKgZ)N1lW-#{7EVIq4=lYofAmlB;`B#v^E=ND4JqMgE9e4^x) zVkN` zD|I;&VpMrf$zbppYBtE`{AlpM-xYN#0KuXxF-tp)&t(OvR_F~^ zUbuAM{ez1aXh5w{6eO*BEgAw2^}CK%gGv8r`cE33)M3BMLM?8b3nPyw zZS3Dp%AORvuf+r{9dQ5l{iTNwUI8wVc_fQ#DvfM2J5MMy*%T{RTTl!5pMb;N=2Jg z@oiw9BQimN-%}dj=qkU+t}M4Y23&pOTEJyT!?1t^pRCi2|Gjht4BL2SbSzr=@r*fL zEb`HHL$4->wM)$FhO-0SrxjbHImX{~g^3JEQ}p(KI?~;+l&=yY{C)>J7l<`)g_WRA zm?+5};)t2Oxed+~A%dli!{BhJ^8#df&^SviJVSFmVM6N!=GUyFBOI$hq5|h<&}+vJ z$O(012hf49qxM!Nk%MkR@v4`A`1f0BNreb%bnp)CsuMvERDr8t(5r2%`Y&eQdH843 zQ4fd9Z=c(iq`aHuPaVs2m|5y8vKd1V!)J%<7zvH^NvUxdBLST(rWQErs(Cm$tcu{Zs*o?fbDEr!#Z;JmGAdpwIa3@8a7mQY!Jc^Rhb40TI4eDh&w zA0kkN86Bv9Y7U7ZA__05r)ZApki9}@qUAhFE}RnU&VEbE|Lnv!eP#~=Cq--^D%Tp? zJkR~iN+519XFk=k)^UkWA?KBZZb^mqetbXw75OuRbymgfdQj$CW0kL19^WwOwpzxi zz^snAxzO3P|2|w{vy|vEs~_8PX-lq#B7k7!^KM2n==*%#SsKf3Uz{%X`pXsPXwe56 zzYpCbLnW^0KD#&6fD83E&vpGt>O|0WrHE@GCeQ)r#1V4pAU^=_JGnbCxURB*vUW4M zq8C9ys0B7StnmBbC^rN~;lH*lkczMH8qn{fK2+?Aeu%-=1D6ZBHYBH$CO=8VD`lp4 zfJ@i!<=1ZwSl>hAD|>r(`2Dj8jwTrszu8&2af(dgCc86T*rPvwqy+Q^xdwvrI4ElI59!^pbeIz?tBIE4zrbQU} zu}fcytJg?Gc~{qxlAxux|B?(}!srJDW@@yi>LYq6ODjg!eqoeS-4?auS?_goIkx*6 zm2k+9Sgy-pUnR27a5(t1t`VP(hh^!D1aSt@@cTQShkIyOO*`1`@%L>`rkKq_Dq*{? z*jnu^n$ED;J_=hz?Ibqh&XkEp`4+J2c@9E`-47X_mZ#M(XY?*J%*|~7c>BOH=ep`g znJ$Tm^JoW?vL(TGkzQ)Kim`dbyLyDYqz@sH^zlDI8j(0LnCP_Kg^dq`Af3!fP0NW| z@ypaHx{&v>)>aTR2Hma0_bWV@1pykI<#L(dd@^tKd#2lO^8?!1@f2|v=Sgny*vn@> z#pqedpm!JPv(Gl-dJh;cy|75_EERF|8m2!dJoi+g`+`={k~=NGQc={YX!z)oyY|B* zb$cXJ7Io(kn87Gr=SBS(=b`Hr(hcj#%KyCt_9n_Bhz|YhTOcABof(Z#mj>L?GbM$ns{Oj6>DUjL*==Qq4>s?#a3Db`mN)PYSK2iM(Q3_h z7}F&~@!quhA^%AA{v3(1Lu0kBg2p1PAz=Q=!3&IL_V|E#KA$=eEQUep1^6Owlbgx$ z0zH*OFhXo!5|4Wch_)W&mSLcet|U4wK#1|NSSL-$f=o0hYJ*)F$Z};doo=z1h}$?! zv=?2R@bj3VWKM*k3R3#4h7E3s_w(ysArkV_+1_0+tTaDBNZ_)6b$UggKzXItnM_Y_ zV>9S#`M#eQj3IKVKj&$rzXFxT?%-Yp;m+^Nuv2kEpZS6k*oZ6^B}1Nz@!RbDLansl zg3zdOLM}HM+Z?P~dQ}fO-xZI@sM`q#s(`)vvcEaNJA3e0EaD|JT9ht5N|%UNBEiLZ zGJ_x%PT`7n&UZdmqF;n7R_`716)!UEhXm9k9@=Ne7p+?%PhRtry=-&aN_EUiP|{FPy{loJvFG4Droe3d7+7 z*uo}m>UfPxD>hyc!Nt~PMs|mD8Io%YitJj=2Dsh*kjH;_^5mcaW{QL&wzixfp^w(< zl2Rw@$sv#6SW5kgZtu;5W{{xYF*L7) zi&#<@p%QYrg5jsDSUjbU6cS-rdc6s5ofNiZit@JWtUT8S?=}A8e-;ynF5q7a%8u5# zJqe%LR3#1bUXEH?P-*z9>nTz=T%fa%^u=__jCOlsp-V2IJ%SVFH{LR8MFpt`r}VCJ zZ`(lZ_qFzwR*J603sswz%%1P2E*0x<4n*B(L{K*rXAjY>hiFtP$vn>Ur{#j7rS*DZ zQvJ@Bl1)=YeF+tr0nNOkx*cJnc|F3!%LZOv^b2>r^K0wwb^{{kMGt6LVI>^A=)V;u z9_w8nR~GyCCtnkM@1fh#Yq6#wRyiy@+Qwzp&}U)7<1xP%MlO~6ncMah>AC5cu)hAS z%|+)+vFcfUX0

=>tUCT(!6K(TWr*=e<^gT~juKa1a6FrGNCJ;V=JK1_>~6W}J_g zLf=u!VzTMFVDwwG58P@9Wr-&xU{|x|w5P~HB(dxl zHx9{oMn0BY^t5k{spMUjR!l9CE=<=`3JdHmn}wd~C2-gGh9cjls$CoBedkid7sRU{ z0Qb95E173yUHCpJHpWvSR@pR!A zl3O_J!r4ydK^4cMPt6T}LVOH@Jt3T`jEda?-ogAeK}po{T%KGU75v|ja22FPXOD+B z#=$WIu_ss~wFWgh@wRd6P5#K$G6m)m{y2r4*_lht+*RNOnbFE(LAnPF@#sq?B4Kw% zE0%paVtCAA;`)Aof-jUGM$w=}OIG)|)t^T@WrT>;e7!OkA6OC7`#0Xsfc#&)oj)Z> zmhON{qYP%RyspfM9JakoP~Co;gP~TUP%e@`@a*30 z!F$H5-Biman=8nm+4C!drqO!-Nunkgz|8{zN|!oh$2eKZ9-1$FbTH&M*80aj|7%P; zwnINI#B_U^AxRagmNsjm_Qv2xE`9>*M{!2Bcr+sl1$q2wv5DG`5_n#kfT#!agXts+ z#-bl$BD@2`&_5mZ@l5lEV(>hw0==uXbtz(V_o>e=-Mv%N^p!zeH&3k|dO~B*pW-2i zE`$5<&_Q{~=VC)d{obT-D;=^hW=i3icz)iV<+8KE!Jb$&BNms5zk{z2$#tEBpT-k{ z#Rh+u9|$(Kf~W-CkXwA**S$Pr>FoVhYK~|7Zu=|!+3V)Bi#^jRb{o~dD-bKPE4WHf zAYO?UX%3&&@}Y3hBQQ&m^ZQU+009)Zj|8L>OPMcR9j73jA96UQ7SR49Ke>LddM@b8 zHIJG6$0K|oz4}sfwC)VWMSo<|uP4@HW^2w7wJmtld-Tg2njI>EhJ`BSf`Z~P^-8-P zysuzl;LuYRx^q;1h+D8bFaQ_hu#+MH@A+>1o!s4(_= z8q3rnR1>^5Z0Z3q<-TFkh74%&qZ@ttlpD`PJZf^U3gT~+A592No8JtpN|v+Rnb4Ky zm^9XZQH1e>LNd2Ai2f$EfJ7j%bOI|HqI0%H5~uGbEXS7~9F0$}zf?4(lm#9+V$8zE&+@kc~xV;2DpL<3N8NC<(X-= z+-DPyL078pApF!Y@J#ZG{U?L>Q(&7MoirM2(sN8CM zQRo664{eDKAHk=mm=D0Wa2sNBQ^5A=W416QmXh+jgecAJ$+9m0oz3QjtrLd_pr;^9d9G z{4t4tptW|y1Y+6wZvG74!+)1^3pcmQB_y>L~K)?1y2Re5$AYSpKUVJ za>5oT<^%Ooh3akoMj6*~V}eMN51QwA3pMug=EbGXi?*s?b{|hCzT>+G0d4+dyUBj5 zJ&;AAXZDpe>U4#3!}Ep;&-Ig|>48WiVwZc6a72IoWB$D>BurmLF#YWE6sKdtCaiL{ zHs6WKg1N7sA7fmS(G?hLRo|Q)&-uY^LcT6eZlq@GUM%y4KpI^Me?>uJ2580Qs&-_w zuD1+y?ns-O_S%0$u5%2XP+**iAhP@Hs?l ztb!j)z2uIeySR_^e02h`tNvJn?x|*HotnJ-OWpSGvy2~Cy~7gX-c(fBmS$LP4J$S` zHwRp56`m^*m>xyQi9oofJZPUCFt(^vWLHF2uT1a35OuC!Yp{@i8Rm8x!C(uG#tWo! z^GB~Iub@up#TVCO)AP69GP;55SDrif1zbZ;?zfx}5L%=FV9y>=EZE={M44VK{GhWW5vc_L?sG^deGF7ug2ry7&tb+**z zBHTIx+he|!$rh&lnx*MF`K9vHu(+hXGjo6LM9Sz0dVR^cUW%TDLaF!wcFk;sFR9(J z@4ey_wO&w^j6_R3=n%q*Vy-SL*`%ktS~X1Tt$SvD*k-Qli6C5t0L5Zr7i=_D+?uWI z$z5mH*@8M{I%ryw)G}yjmUSyrR_vLcO@^43{Qew27bNJmBub@DEjVcXBO@l1-^7W= zOB4aK&Rg_xi)41R-sSc8qewVt*lw!VBn6Na!v$3It!=6--JLZ``qlX+7$z5Irnt|{ zd>jrAIwj8rAizV~rCceK-_gjF8j}k7$9#6yImD$J7JtlhS^4VK>JRnH%cbP*>VcsD z)L5brSeSkHcv(c~!dGSTASf0=qi^{mwRP8_t(W^qu|gW(_U**G;7F73{A6a8tXBfb zG{>hCpMt%pwxq-2O=_FoZH#8bKRvemB$tKQ6UQ9&$-M{LQQ4_RoWO8g+Ir{oRl4l! zlh={@+ZE=KYl%aSbtt9ObueG1ZT4t!+3fO0h~>~jcf0xw1IyjS8Q%+Sy173!{fFTEjOJe z26w$j^ySApo{FgGUcJQi1-~;dIkQTwIR7noq~mHZS#y|$S;MRe2IH|hF9|6r85Nak z5Pr$!I=h3Y)7b@T?29$b6Q_gK1V^GY^gm9@pJ&GH6)#UEF29`&7-TTwM}#}^D@K@dN3Lr5HcfYmz^ct;N-`O3w4Kx54=mF5XYWi3CmPO1 zCl8A&=%l~QXdZ-i!$)D>b#sYbJ~;E~o^{2C3-Y5Tl4JOCn5*}GxKpq8&kX{n&=7KV zSmviJk1;9RP7E0(rH=7;)#Z#W8gw{r#8C(6{{Z|u0tg9paWca+aGj~ikF+!#$&E!1 z92icy+`z!coQoLpLilO&vvjOLeg8!M#G&I&+J#r{NY~kx)b6AEP9FzVaIc2dg|G6k z5WYQ#P|sD49x_zwa=p6TduYzr5mx+W3?q%ZN6mF_)$p^Jym^=$p>96){+`U|z>}O5 zV_*boH^)D2XYDPuFGRmuRU?0y@eDzqG)63np!v6Dm*uXDS z2t1eD`Cq?%dnBc*s;Yu|Y}BMtwI8-y{jS0J=mK^3U3|H4B6R>R8~>x{*#N!A>tC); z{`j7q=E5tB7~}h@0ksv6VANInquB{!M3E9RV)Szww7hy^7o5BLpz}1PK8JY%0Xi!y zqhrV80@s?mEC1%ClpyuB{Woj&hw^Mfo;D%7YFoz+=sH6-y)ez8!<=$zF??v#H|5Cq z;TIW|TB#3qHZ9q#5>fBPN6y)p0K6B%^7)aD2Zr5GXt3E zV^u6{W?b*lqC*6b%#YA`$!$Ke#vgL9HQ{~Raf25G(2o{s)qC0;k5dN zwBW9z&&u|LXSp#0RQoc2n$%@$B<3+eNIYHO(3^TbHco>(J^wAR9C@y#!snQtK&ORY0_hT&2O^S zt3hnukgNI2!P;=-hqoeCR$XT5`9$YBXZpG)%3AuPGA~}q7bHAYW9>_uHm+ca0F$!q zz)CAqbi_`6?a7d{v;S1aDLgP9=c1)ia+)g(6o+n-%%)8`!O|w!8FZDri;j#h{jliPhkvOm?5*>dg-D)gSL>6$fP=2NLHu{! zvrRzy;bt9$O7JL%;iaWmV){d-4SgI?f9}C^VT!vOUF+FI4A3SeSv)X2A(r;YY^I5 zI9efkJ@K6Of7kwa&|h`gF-flkk$P{)Pc6Fv9RzWe&NqD0-Ni&fh`;aABC4(CN=Srqr@ zf2>?D8)|p|8Py%E|5I4EpWw(LE@r%V&CC)Gwv_0iua8W)l(blAr?OV^%GbhN1+mqk z8mU3QS_9BeS7Y4;`d*=^wLkOsT|-l+vSKNJ-;z2Fk!P2d^M$TwL3DdiA_o(fFgp?2 zAS7FNQ(TKXC0bsus-naif@6dWrv!_Sk53c&17>|rMn)!lzf?g?h9$Vy6z~3> zr=vkb8Ys>bZNtGMB39_fheZ&HiM6?DFG9Py{~}`$`!}H=LqImUIwn!pzj$RZRNESp zj?m7v3%o|V9<{V2NboW_yeLeCi`z5E(4R|iu+d57P7r(}J&I)Xs>|ff?AV)Ti0X=C zng6gTycCdDXsq1`km@v0)XHRn*|5OL1F~lC+KTW1y%=;N4 z+{mj^PwapqW#n>YB&G31x#Z3E2X}tRl_mwUaaF69(!Eg$?s$-;IR_V9N)|o;6&ahWq zrzp$7mvN;JX)*c?bP1~I)I&&xCPPSYA>60kAxl|uAh4G7PlN+28kG4eD#b>e(=klH zM)?+cPGVY_1Pw1hKbY;#_x}7Jx1V^EFutRj0JnAXeZG8EvC;IGa3ZsrBoaL@-rgG- zoiTbZCGr;02nyOnc!*?cg<=p8yr)gqE67~@8Et$=F(Bn<24(6*B_u*i6%J@r3z3AsLa2- zHw5Xed5(G6Ecp8){)@ng6mCz;Cp#Fk2{R%T= zfqo)_S-10u9_7Tmi?!AtZE`UV_bKXAvu+|kCrn`w`t+jsMzheW}LfbPw| z=Q?At)|Yj6_j$`b&fB*DMP@o(E}E^_OQ zb0n?9fF0jfci}CX*15xR00K9n-3DU5ooGM!yNv$aNg}l7+a;pa!?A*QVXXj5?91m# z{!6%J?7_{m^y5QZ|K(9GyqmXn$)YX1?)oR35YgZc;{*PpVrUN`Z~TfzR>E$`|DvtG zFPmTGW{G&sI1t|8U6zzwc(r{|J03eAq9AEfa2kjyXqbB{N=!bwe!N=QYL|8l9})5yfYIB;J8*?IjhZsvHApVJ+Hf~i4rfCeHZs>UU zmo0K7zQx3Vy{4ysJ2ZFyO5|~iiNp@*X3GE*O~4Y9)t$KkVEp;D|4+b6@`AKEjf|83 z!Y^WbTNE%8oV5zcwjbA#)8CZ9)dgKtCVGGWx!7H^qh~jk=yL;2HEsa{e^K_||Ko3u zFc-Up6N!}d`{19pl>wY6fAJ}T^Y!VZL=Qb7dpdz|Z;$`aM-Tpu76^7li>vFY8tpY)_wCn86XDwF44g7Izcz9&IMy zNJGiwnNA~lLq@Gd99s0ue$ybM6st^4jo?_W)71f!u|x>}H;p%dsjk|rFyWXRJx zq~X#GyCrQFOrMM#a&PwHu?^C-h5OqM{P|b&m)GH?%tjRx0-m#Lk@r?zvsiof8|o+` z(k8BB32}D(@+Ag&s~SG-3$9jE)VqiQno7`Lhhn_HhzRg$e`vo-B?ccBVfpk4iogxL zVP*M?{R<}XzPhziOA964Ux;0Jd2gJuECUqOUS2avNn(A)=k`LS5tvRK&+aV3RU600 zuiJW;Qcndo)k#hG&96`w9An8ExW2O51nGv`ePq`Ebye;3wlrHDwL%BSw(d=Y5+O{ChWayKF^ z%JT93mk1ee_N`GASO~C40P1B1sh2vkwGw;nC4!I+@B?J1DuGx)y1Yv6zgLT*7{1tQIJi=}%P(&|#n;#G=`HfKH*f?1s zn_gqkoxJt;_wW?k^(75UFmK+P)BDRpHHoMhlLZ%O$ikB7+Jf;zEX5Ej1^zejkr`Qz zJv@5g@dVr`t{PBWIB1bJtcdpP^RT$EC%%*@XO7)>7RpPc0R3hcI|}Hl7&yOTz9LYY zMYjvJHi$v{w}r;tE_AS$v&|nYl=NnyQEZ&sApH9p9V1N%w)~ZE+hzFRK2}3;>|G+_ z;m0eel76tY^yYAuLhUnjR%X`y!B1;94)AK}Gu(euBcWS0dXnTAgbM);$$uSu;v&Br zPE4mr4*B|()QOP(E{BDC;d<+b%gBdekWsCu@Z#6sfI^BiV)(zHf~zdnEqDAiwiJWT zOGI?zlDuT}hIJxhb#}!7XMoNZ9+vzC?bz0w?%mz|L;(C<)g5bv25&hzZbampG_t#B?0R@$Mj&C(}8Xeyz-mL3?6iX*Y z!+VzH;S0D`HvDzKI(X_jU_Fza4T*t`X?T%6H#Z#2&b@l{<*sYtDvRwCME4~0#|imu zvI82){pKgNrQtFERg~DbqFlwEv7JDlMaRI#Ca1)Zv0;{XzXw@Gf2}}M4LA7%h7$w{ zL7f0HLY}7`IlfMWG=JGjMb~VfhrXR!D)In{cfQWv&3pcfu|dndwSeoI5KD>H_2^wC7BV$ z+jm^NzTxi^6}rLyvQ8HlEwE6jCopcLAkNB3(rhIzAyLv?n-d#X6(lAtR(@u^KC0Lm zNn`5yQ(kxQGpWvac>~j=Oo~;u@)64W{(Y0+m5WM~{8|p%RFS#{?4zl1euJT0W7K#q z`^=|W^@F3W*>ZF1=5ORvE&bPYN>8X2vSsCdRyg;XTwe4|>z=Q@b@+IoMu?dxb9zYl zgy9E6m>DOZT|HiMhGElMZ?+&2QLX+4U#i z9mx!2<4)j3f>thc@-Kb2<6~hIPEkk8b5Yt$rLQl*m4aN+9Q1w|O079)3?1nQ16y}% zTniK@n#Q)n8JTzgG#jyrWxXqQOCyw zhowUXZHXN9A#2`fKCUxwTKy!prY;iRZj7bX;IY|_f@Qq@+CD-?@|1tLP>X)6c2CL5 zswqYm>pHTC7}Ej|eTLkA*C}xm{Cx>1Vg5ev|BdEMU84X$6$sPn1t&Q!)MAjYV{+IZ zN~(gvgni)}WvhYxxr&d$wXeO2j7H1%>}#}*_^LKfAM1=1B<_xAIiKPYE+da{S0Z${ulB5V(jeM6Px2=2&%ODNPa)QTvR>CcuoziJKQ}VVa(n}3tbPu_W9wcL zGda?+&(T<+u5p~2VB2l&Z{mT@6bgC~F&8G32a_1goV zNC+tJpEYD?3Gv0mMfBI1B8d3^2lAyw)5uf#urgVb?D%G2U?4*x3kOiEo;6mK+{-EEta29gRonR zqBTEmCw6A#JoN&|#=6BxrFqIPCN5rlSfz>)j=^=BGvqZlXFqxXl5U?JwPbT6G|c#@ z#L5rr43Fqewi=)a&U*9sJwv;+%yZJy(~lFh^v+T)@DQ#I59J#aN)UD;x{H8t7_Pjv z-f2r9F)T_)l6hTM@i(&d$6sxgubDeY46~;>o}xsgdisx29_3d|g=jfX&U{GiilU`V zZhYC2`ept9L4rwnAWq>`k~heqWu#2FSu78O0yq^0&a zpFEoG3|JWmATpo6bkC4A;u9;mv}fa%mL^Lw`<5NZsFYo4v1DR3`I;tXqDJ33O%pqJ z(x!xS-%O&$X(vo*cg4+?z)?ZS69IeSOSz1wq(VU3gu)qpgHZ@P=C^UaQMwO1pe(1t zc2A2@ulvc6(f?uX&BLMozy9&El$1~*OO{HCLJ8Toq9~GOY-0!s+4nt!QphfALYBeU z#y-~UTe6IO-6yq?=RkMlT>bDm|FtE&xGvLUQ@ zE`5 z9K?+RR*R_{xz)oMgr<2Whkx{4DqBV<>{R`EeGFUPBxqP1R#MmnTNreMeTmT+DZH|}{`37Gvrp?L)5 zl@rZFt(Y*u!r|$dnp>Lu&C3;s(;2V#Tlrzl^-K!r8j;~{G*H@lFPUUHYtcNCm0z2c zs->mA=bEO!;MFol#_*>!_5A|{>^x20E;wh5+s~?dl6M)hU?-TAI@s#P`QpxG_q4vx1GJfMgw(7Fx%QaXtn2EE!m zpX{Yn->Rkgkz~Y^7)a`YfaYnG*gE%RLFyUN$ixmAny&Qvk`auEv;NBG@{)Jhyc>AP zb)f%8xQ9ZB@2-o49=5IGzWt5(j`Mqn?_u-f&U@$-hCHDBhY*_L9^eJ5o-h;FwS`v^ zv&ucfM#yJvJMVq7oMUvqvoXKBgUakydLOItpa?di-7{Z^i+ypaMVSgyrG+F|wC1w; z*vyF<#2{4DVtFU7^li0^St)+HvsURvuFi3sCU<~Zjbod$pt^2e<#5Z#3NmqRYpcXo ztZB0G{G#c;X5n_+vl8VS(0uUgSL7&_Tb>o5wDto9lP;kMwfzL5k<;iW7CEP6cMGK4AO~yL|b|JV4Nr~-erw8O*t*sEEvEIgH0&g&e~Lnrg2xO99MCRQ8cfKkIQCv zguJ?Gk+yw_gm-oY&Mq0m+YRJB2X@QDtRl{vA?Iq}4?u^V)@?xo7iMEWGsL@+rOK^c z@G7mdu1@{PN4sYW46?_ja?L&>PDap`NT1839;i$Cdluo||wLczu#_H~r%H zvikw&1dJ&$deI){F)ACD70ed@${f36zpPDr?{na7&TE3wZ9>Z~eL2>pO3U*_Zu?G- zGqHs+e5izKbh`-l3ujX)&?PdtYXQks-9A?AIXqnffCme?{(TzY|Dm9)pfsS}*E@+{~OKl4X+wD9?+OqJ`H!iPrOm$9~7!aj5 zX#e890I@?fF2W$fMo@L!b&mUA)4kV}39D`~y4+76ae42WKI9cv=X7oH?zLcdGAS(6Z7|!O&=fZ~xOWYp+HB~-tB#blzG`CmXapIW z*(Cdfr^>8THr1>=`cZ?@Nlnc7+K(J1NNLjU8Mfdzv=S9+mb z(yV}Tf~qVyp?QD4B50{IRZjnovo3bgIQFV6^bknl8}xoZ&WL+TML8C9 z$L1Tn=iu^U_m_zhy9;QY9MZek*^YX~QquuG+S}v$+e2BSg?UF@)xoPH%OwGrd)fcO z!+j_4a3Ggy2iY`nnBe38h-Lrc&3@T1NeMh~=oVk8rHP1;Pa#M=#0(i`5)BLpEP=*l zVOCR315>(ECu!kT>9S~@uO_pFY8~T0N)^PMV{oENO-dCiWx2t^n;Us{b13_fAf#AU zK_Qb#XWLm^diBQKaz<{458Bu>LIkizESz~F# z8ilFzcYeZWwpU|hJ@RtsYo?a{(x%Wac&fme+QKWDA|P-2rL8u zE&<)JiA*PKgb8;iFNI4j71d;)p#q{m+CsR>H5D50_N_&A!f7#0=J%qwuB?h4Qj3Bd z&zO0nZcY0g-D(HtQu`5)XBifhin^U?Ol2nRn!Gn*TgfePJT;g^2#vg~{5j<1SE|}s z5b_pitEGPZ15X-Lhm!p+8 zL5LD2f$p-}5OO=g980?+1}Wa?{{Zk#iCZl6^suL+W3?XG_UGToHSV?_FL6Q?e#|Jj z>_W_AUX&1(|EjMa@xG3iB11J<1>TvCUOiu*?Ce6*7{OI>hEaGh%lU90tG_v~@}r)V zga%1aEy%4=^ptXL!2I*jjDksbFFLbZFK^9?EZ0e>XKjiz=N6*c56IZ z*d@UkaKZ9J1mkK}tJN~<7LXhD+YK%D@fhr^B}+R?S7{|C$|Xp8`~4{2x3=>~P!;x8P2tJV*E|z`_AB=Ygg?2POugTQ60B~RPCHy3frtMuf7r14>K=W zeNmca5^>f_4{6-ySHNTF_6qTQVUgPl7+($#o#m$yP^aQSM~GhWVlKyrvr1x-mq!j) zVxcqm*~$={8_ny-46ZspdZM5d&4_3UgM-ozg@H}n)^fzbzLS8f7O>)BX|ZR`=zse! zG$*v={&ZXHE&IWahnTXw94J4yGg-aREOX$xpP>pqTHf#YzlfI40^to@by5XBcrE4X zds6h-i}#eDpZ+FjU=DeW(3y5*ZHr(O!Sza=3t(%I8!C08kJB9*vLCrpIHcN+;jx{+ zkfya`Ts$3RGC4XX^&XXl#i7dF+H5|>lRChwq>uEB90=DcKUS8N!Fv6`IKbr#AS z(ela|Ca9X?}6f1lJMVEsUU7shd!qlg!_PDOi%}iGRNM8C1G>%Q#32cylTiXpP z5>*Z&c$jCm$#!;Cu4_G?^T0kox)SrX)x9wiEuVEWV|S+nGl)%S{^hC8QBt!wZB7@N zi7e_p<)Es!ogd|i%)nORXsPj3iF>3#43G!B{x4+hLn4Az#sC1ON12P?Mqc~h5g5rN z6tLabx(KU&I|MaBLwp;xlUI3KP*e{#8R6Hu>rT)w>o>CC(ZM5;yd1rDwI02PEp!2H&c zCV@_9W_Y2v#hB!SB=ZW7t$VQJGtHS{*745eUPCgz@=&qaVQ0;UnHtEYbWjD}v^FIN zjgAeoJ|A)}SyPKGu@zEf%Q`)BxCgDP5UooziG0)8ps2UxWX|bStL-id@s97_QNhe7 zLv}MYHl>Za)BT}^j?LE`O*i0M1q)zvu@*3d09{1>oHD~+u7nS1i znJN{wZ09((6Eeb_LOvcb;uf)uVXN?vN3Y@~7n4+furWf*fZ^mnEDTX5NPe83_ZVe| zKyR7GJ2Z&~8i(mn?J9F2*D&q$Zn=d+?#}i@=&p%F;^|2LUa?#RkkXIDvZAZTjX3B-Q6-JI^MEo z&XklCIhR7t;7FTlqxRVnYr$gEP<9oKalr2&@kHwQgZKRdeW&21OHt3wNbnR6_T?Yb z%G^$p8n$f?-)2czW)ew~UY-{{|CY7~$7(7GeUCNW_%WzfKnGzFc+$TfW z$3v)MRqFC9Zv~}H*Ez^9768co6$+{=7c!n&sKV4w5n*ThAZ;9q)x%S#IAR1%>gt0- zL$9JW;BDI0tw{NcYPKuXBw0!+ad%DZ0!0pJOuDCT^su+EX?cFn;$SDIy(4a?DG^|^ zQ1L+<$b5B>=@xr(s7cRY)(6B^6&mG_dk}|;1*SL|b}e;9y1PY6P2smp6ihq6`r{cQ zXQ6ni3yi`JMLbiR3+U$Y+S9D=2L!?4QS8xthX#dd-?K@{7tlYFy0+@T=EeJY;;7bATBt zH!Z!n>k{i_f+(g&{ur|9HpTj@JZ%-Jr_0OKTD~ za0opRn~5p)5s8(J@Ne<_WE?hTHsmZQp%IZ#H|_3A-pGLxcF(Z%r$bI2;FxT74@&Lh z&oF!+xB>SEZ8jdwv{_2WYJ5`7d>Y*iQ6;ESfe&8?hHkaEnM}0-TYbl1Da;T>>?wDF ziv{#@@By&tyfT%FVZTfF-vqlhJ&Ej;(*KL0gG$ zT!js#iA5jfOceNjNe5@{i@RQFgK&gDa!`w`pIIJp7pLP#4J5{q66K4RN4ng3sUudr zGaYTK;^Ib<-Tu)woOBkMm@TKlN;>l5L#j=-D9c1~lRx}4!p4||{mi>DE= zdJ*TXAXd4G6~3F01rnkeb~YiYV;;;DzYnO_D8X~T|9LL|FbN+Ee`-1|^AA3KqEYZ_ zDP{#wmkrnv=d3}v&^Ox!7C^t5(l~FtjYyOUoV_+90)lhAA*bR{y`!3`bg64TZy_Vh z(6Bv*1w6S`-K>8vLCzhW(WlT*M7aiM-V4{s(9OCPh>V=RLlSflRM&1SqviMw=AfBt z19jk57}xb7*`1fW%7_Uw1R6|>k#T<7=qjm+Ht38UBp*9IIsU%c5z3-QW(7^)yM3dB zblS#h%eIvl-KZBRG8Q@msyK-shXZgzAE9mascdDu(KS`YcO-6-yXe5)m6q-Jb6x_opcA2r)o5uYw#X~ zWcu54G@&2gMe`BhO zgc|97k5N!SDu;DwXF4A@en+?c1Dpo?K|fTW9>9jGTg z82(T{0-~tJ|4B#uW#1Y;{nS?&27Hia{!WZR%^Xit!Kpe&8E+aONqG>*DDePOp@~5r z&>!L_Vr-8iWp{I@p!8*r8WnhPe{T&O=JXBoxkEIMTr^72uhZv$pfayG?=7 zVi`1Eguc0h-w#56!hTGQ-jRm#e|6eGlX+CYiq~XPtPngPr3n*7Y!pff&CShi%~t{m z0MTBV)C*6%IH%^ffir}TZk22%!2pJwVHC7^=y&19^RA)pZ8L$Dk>iO0k}pqxa5c?W z&U8YZtTxfBYs{bP6q#o#T6S~X+POwfLioEEfZW#z=7sAVf$WvS?9x54df>D~2Ti*k; z;Bi`u9^yf%*m-)Yy7B`b;lo9qI<4VDo%9cR^E^&aN_dT0vZZR z@%TZNV`$0ePzHs2*9HpABgn;e)eia!p4eLsCtkihmEfzYF!#wFuu7JG=^a zR|`qY!388He#s1b7cCnGhd*pwZ_BHYFg)}iT&p368|?4KXJlJN;V4SWTybZC>|v|_ zYnxP!5Peg(N>=!LF3u}PX-2u$Xfu_3VOn!{S2n(<;femkZ&~awLi0y=&xhKin^L*;uV# zo2m21lz#aD3~KN1!^KT=;~&BhW{ZHMWHc#}%o~}9#v1bi;WHDI_woopvQti-V{vL1 z&(o9%x@0Hqk}!Hdiri1^f!1fcQXf%JOuqqpTS?{G6s(`4V4fV9&G`IR-O{3(_h68| zWzc~n7{CMa&A9;BP4%0^Ycrh>)KO0U!iUdK;!`5}E29$9(VdO`+b)eXKT7SHQsxtU z8AG=e#8?%3WOFfy&@N@iWQB^yUhebsT-q*Ujz+!N9#o4$m!8>;z@n&LGw#g41U-+w zI)^A}zwkc8)9>5aVXC*#Q!$kwqfycwcT@CmkmpB+x^?_D_Q?HuCi-Oa3b$MX&=48- zG>p|R;^WTx^u;Aa0K+Y&_DB{oOp#Pctu3;gWokpfa%p>_#CC1ZnrnB@xdDkNTw;yq z3hwPCknRMMo~dFQ?Yr~2nlD!?34eUuOnjD~TC9P@9Z=uWc$H~2BqmqifwX2bGV^`} ziWLX#LyUGl8OGBXOdmYMmBAnBy?f_pWuDtS9|bMjK27`G{dK#*3u+R1P{}qh-?c8N z?VYQ_d@m6q?J+vIr?JXLq1C0~?b5cW&Ir?Nf`f_@KAuzS>i#YkTOxMkM}FnSBNxv9V=k`# zmkvly+af_Z%WCLodo;Lua7Em-uoE04jeg)0f2JTgDY+rO;*jM$TbY6Pm_nAD{lJpF zfa_*m2@*b8L5Q;n!sZ>x7!Qyo zwxXVZ&Gv?0MV6`YfCwNS4`TH)ZQ#|2PsH3Z0WMEOqgHb^9OhVy7qX-=rCZm@FY#4t zJ9eWaG8y~Rl-SZ>6>;~a?-%joZn|TK?0@7~Ll8^Fpp1;;OZk3}!$S1^CWrl-SXnAH z5D2|(iFV&zzJL}TCJ4wgY!4#nNpB#d>#|>(w{%C7I^!m3GaB z>k~tk&H7uev>4emeg3R!VcIvZYt)?^CewS*OE=g3(5-m+;N;Z(@l%MQF7_I|dVi$q zO6DR`gd+?CiX%DMo;b;I=Bw7VH7x*Bk)s;#+MJeypuIfiSSE$MkJK#Vb z#*f*d%Z;7E%~y9`uIONm5yyA!Q_9RUCAA+%@vZI^M)rGf*$2id0aZ53jo)FC?<8xZ z#NSmm{|%IhGoL!DS|psy;HDwUWgDd<;RGzHmX5dfxpU|6`Y#c?hs?cRlY=VwP<}@D zS18|>UO5|dB?(;nSZ8R5`Jqz2vn50;sQOOvLhw;*5p`IZ<^yr5A?hPEbNAG9* zA0u#7`YnuVi$*iZ{Ff#q|1F4;xcakR<4Z`LGUW;jSfj0u+O%sFi%(;4WO!ka3tl*} ze?q@fok|TPd%xX(AQ1m4IEo!7NT+rlnrH-C8~Q)9}8 z9@l?8O#_*(WHpo5d~y z4p$E1udl%5;i|lR^+CI?wW^byc-fJm81{s@gl#w5*!C#_yUFYeoT@EFW}6PS`+Tn) zA{gZEeT6S#>j(0K?3ViJY|~djmj?Z{v=yU)`T^NwS%}kyfX(cNUvGQGcSXJfWCw=l zMc2dPu{wI;T{!nf_!G^mwN6~_j~w6K`Z$spXp*hQ4~`o=EDmh1ucdEGd=`d=#Q_6{ z>4IABP^}G@hJfA`k-R1agVqYxiI`Wenfk00gXvQ>YmaeTb$iOXI}nbmQ|A8Z4bPJ=xE{6INWA%WaL%b`5vC>`*`?hLG?RW zhPGcA_;1VnU%spqp!lhVW1wVYNi#~D5swbM&zwzMef9B=%tMD*0}qB^yc{fkDzCnN zhsC1Pzgxd6m9p+_1)piSYG6UZLRWe^EK}_Tk)aB6PB~S!HM!P&to{WORo#kr=SPBC zqxl}a2awRZJwF1Z*hIPyIgz69~veGbNh66)$|6eJO(pfRvO0y{iIl_ zxa8cnsGz?UBd``Rr9IQrAKuKN?G;iu>=Zqm3|l@w>?Fqy$%-7iNM(009c^yVVxni@ zYb$!kIZb7g=4PY`%o*=Fzu6Dx1lI79;F>sGCj$mVyrPCxbK=D8146GWR&T2z&)J+b zdGHdArbh&ZnIMYmkg{dOMu^fa4M*3(i;3~<;C?NTQ(BrPdfJWt{ubif_{0oufq)%` z2=BM#)j61Z=X2;rXu;0Xch%(NY%WF?tz-`hm7pycg;TS=Su628P`8J!Cz)PYw*ERZYY!lK^Lf zGn{PqXM;TN_6omwFsO&@26g$+{sNPf(u>QhmX@kI4xds-Qwkx;epCkIZwiH@oHm^v zP`l@ROB~kpCZ%bM6REiZ)T*4OE)b6a2s$`H68ppX1E()TN^8xC3c7Cghk~xV zpzBb_%WP7A%&W^M$uHTYyAm^mO<1>AFyqO?PKc>6?iJ_+x>5rgvM3`In-e)XsEo6i z+=ZdC5uCAMn6-gr_lF2=*E?j;LWj}grvga|RWHV2tSex*4u8}Pi@eP|;k*Dj4Dhkq zh%63X0sR)Zti?VVxD5LtD#DA*hWtx%5dHt2sHu^3?MC2aRm&`YYcPz%W8fi(($fzc}d2Uqw6t@dBSb3f4TXc?MVd#u2$55$)|Y`z=5@MXVzW<bfzzNF2#p!itSIHSug0%HQuWs6)7}YFY0W!twH-$g9Vh*zGUl^ ztP49{FKy3P;929VWp8ATn|B{^y_K8qgs4B8>4&EWZ5o)14|W&Ic%8w`dfOjpdHg3< zW<$OG-kDe23S);FZR|~uH*Z*WZVZHRd=NFR9q$)ay|2Bq38_o(bwAXDE|bTW54*-T zf7k;=9yIE1d1qt|3M;}4@Ql@b8L_9z+rr0vr>D}`;%#=*{J(onoz-_v9tKMoe`iRay&TG)kfDDuDj-k|O?N3=WPF?{?}!<+8$4|!ZO_X$FB|7x?sBJI>$Zyb*@d1` zDWHSp-c9!`6ki!-k(vxCUKiQbR!{IO<1zA-ab4;!q~%zcKWhmUlmpbOO@|wVW0mja z%M-W!&vJTma8^i~m1YJEI@FL^t=1YO=_E)>uPE7X7F1`LS-YBdmW0VXRYJB$AEupQ zxZ8k)YdtG3d%UmXra;V9oqMN`FdMzYoVQFlMU?We@M2c^Wh^{1^U zHF00cq$ol_YEIZYXqC$eEl`S1Sg9gBsm4JTFB$+a zMYu1c(G5N6z+if_)cFkv{Q;4u^%`=VgQ5&*p3+n^qi(a=G78!)UWU7E52vENQ{vrZ z!9D9~@3(N6q0%J#csJ0yEWrfL*|TYxrZnZ2uVzd>_mg8fyU3eOB%d) zM2%83PM)DNGbf`HyezlxRoq#5e1EDyu#nC-+jPK=mUnlfDX+wG^s|R_IMZl6t1WwK#ry`f$Fj1^>vXOaM?^a13- zV_#hO_QCH%aDjp_Czj8gllXL<6Fo+8W+^IC*eF;z)xx{O{@Yu{J9<5j6WcT+ZKwE0 z9!6vh#kiHXC3mvCrw0m_kA2qD_fQ~hWE(lWFZexSruMxkk5>K^=t}pfJNg}~|#qH2I^ZqJd!D~cKd;S6FY?<7hm?^TmbN(n`yt!u+S1+nnW)BdKq3O>5c^ifk@ zU2K?~$O(0-Vb1|feN61@iBD&V7W9B{=dBH9u<@*|Pick)e4x9O)Vl|TDmMv3?Z0f6 zNg4`Sx(e9s#9N7^_DtPq%}C)JtPcd$;hpmo8@Ia6!&#(G00urDvfKEjUCdte$3U?S z-G1+PU6{z#MDdj++l5CJZmu>E9Lft>y^kMavrzt>>@>xT1d4JwDRy+K^ivLo*W7tkPdtu5)48?NN+Q{5iTg|3jv zP<^!VsZjA1-_De#T7zafYwKGKB2JINwW2qYxo)z0pQf@hZMsV_I95B8TuVrPQBPTS zcZ1wuBQl3^GhNbKbD+?@|79~+k!+;kdM46@cH%5RAY^nyQ(XpZqYecGRZhX|7a5pF zS|`+OaIQUm^+Fk8ftwtnobs6R(s!IrWo*~3HDW|cm$D0Sj=-d~(S#_7e&SIgrQux6 zK^(PA1MOy4UkcgGe!L5WM2gExT6C9k)f~oZ1J-KhyXBVw{7LV`o6)e6z)N!6lKUp; zgX`_0hn^@gFzEn1kDKGcv+$2K#lBoyxQIP^WY}fGws?1*Jxj&SJVhOcP|Pdf;S_># zwdsH^OUI6L{U%AXBhE`x{evrHw&44jk~CC`ZO0dwY2huJoKz2AE4~upYxuf$@ud?z zJjPwyjKKM6LR-!+yE<(KpCA*4=x<_2ftVlPb@H|ku_Ut^vAN32&i!Q7FVG^3gb(iP zrUF(QZ-<13=H_^0E9u1alcTl1cGV*CQXFi)Wg|A}9O;0S#Td=SvT!Qdqk!s%n z89i^fl&`{xXCic&e)iYoLpcz8@g4cDIJ@GCS}A#`9YKJh5Jd(XZjomxdFl%J!k6`f zCXW3n9+Ph4&64yrm9;HR^LwUFQp7?P`&*izFoBFsc=x*(dDH23F?eb3GGI;8$G-fa zY1#mWG2_^)Gj=x?M=D^9ru#(<+sm)zG8;^7=hZm)u{`t2V|ne=&Rqt|w~aSmP7E4S zlQ7677D|5zj8o4118_HoLkjX74TYo+I z0#&<%=Zz84P-$-|-@_2a#QJ;C-1W`zGcO2%r&+oD`>Z$%=qmrE9sQFr{DWc)@g?{y z6fS(-J2wHot$YA9XeD>`O*=uF>z;|Rz_RT|1yo?Vh6p~O&Q$>Q_0_mubkshqodcXjEo>-bV9s{9oB;JvQ_E?`L-Wg?OJ9IpD@)_rq+2tv)Zdg&^3qc4S#<03+{Lc3pV zJthynNW{>t!mO6rg`0ZTJ`sy2!c2j#sd+%)(PR1WdM0N{KJ&gshQj_8HVT*ZC;W}@ zD?W~Iiv2IevA^nFfeWv<CyG?`L7b(#0Te%0^tQQ? zI^IJILvDhT@js`(@j6AubYH2xIW3>f+j`5g&7Og^w$f$VO$Cvi(0rqzAF0+bmVuWo zk}TkmsQ2E2f&k$S@o%viMLD;?f9ZJhtAoXFC;-}u9Cd2>)mDV%`bS0fJEs}&@;`Ov zX3GeSW$e@x25MEt1QTkIgA>7i!Lj;;VeN_pz~M|QA~^byrW}D{!_qWM_Ec{&q40pD zuPFkXGobQCvJ~cy8vGPE{L%?4g3l{KZH7_a3a*(Hpf??Pah@U}-uF}McbV=&TTS}b zk{RKh2YIH^RLWagw;}#YK!L5II|N&ncZAgix_73v$yeSk1lGFf>l2(fNG@Mn9fE-> zCG@@ZwE|0t#?$(3i?obSTyf;ts^$kRS@FN{_*u1PWh3`jG4p^ z`N>0!`51wjRAuTcQutM3afA1Tt51R?7MjE2mUE^?7!1D<9-<8B$(Se~Y8EVgpH7fX zCRHwe_8{fWw^y2QdBSFsDk`$wtvM;A7{b6}si!qsxzx;6>XSf-DAY+~tNyw5Z8Ao| zP}Qt6)K*qg1JpzV??G{5Acty}@q}~SpyNv*ZVtWs=($v^h(z`k-SM4un$$)}>zS>9 zJBc#H6f$YNOeiO8LPg;#-V1DxKYpY>Jb?Rihu5!J`YVu)FTEvUNR*`zc58XbzOGF> zg#!lNsPeUh-IWEaM0Fm+XH}j%D$bu?_NJ!!WtSP%ILU@yPssSu7B^JV*#<2=V{mgV zRzOd1swu4V=>ZU+?k0V;^pEzg+3LS2c#2I=!J_%vB|*EdppeK{&$ObSRwCOF%>?)q zAHGX2-HY^IZ~NlhMMVi;#_Dhc&XzzHPkA~HIXT%uE)P#O2EU)Fv!eGXL#0lbKoDQ_ z8-ryFk?Ts7Mn!h9B|f;y@5~<_-^WC7mq=0J*Xkq30X84MfH$8~96SCmFNbEL$@lb- zb01I!SBr%4zh0(JApHO_WkTQjfHUcsr%v21XsR~9ltv$_T9|Atv63Mx9ihV ze|^5BGo^IIVR5hzS)u=Z6UKq`F!yIop*P5Cea~J>SRN=)F)-JpSiC+zH(o1e)Ro$y zaaYf5T_x{YR~A%A>YJJXNgne(i9Mo?h>OlYk{;=wx82&>O4ckiPb(9!Tym9sXwT>?%)JBpT0pr4MgBR&NAdiG)SGW0Slf>=GF-G1+3f{1(}h z2l5#WpE|FNbE9P@-E@T*z$s%MefydNGxjhZ9K5?G*H8Z2Dj)OUzb{^1vsrn3zliR{bQ$JmG>yVGf}ER~azewptU{r;xl6}Eu5YOzL;o^AGEU6Q7 zvt^8|kd>rMnDcOypHrWODgxgHik!j>^kV{QMyB}Va zCf-&8CAS+7*!Q2cbfbjqXXn;uqK>IlFfs`{h{wC^elq4b57w=CiR<*tc^Pb2J-8g% zX#8s8u|8S}?g5!iRJE%h2g!87mfPn_&$TGe_Pm+)qEk5*j(xeTq1XH~!Bw&X8Y{7- zg*UVZi^hlLP2O^%46%&6ReALMN;3B`GzK4t@aL_UWz^64-VZhs&!xu~{qSq^&#%!Z z=Z-9{TS%AXE&S>JPRY&gWJ}e?>OW0EBqVcdJoAer1%o!z;!sLFI_1`Y-TgsDn(W1~ zDb~xTKGM$(BH}7s9(BV9Xi}q~S!xNw#3w%7r3pbSi))m+#j0XRJ#}O#ADA-OKh>co zl9AqSx;)}1D8c=no@W2xv_Vs3V^#Mh8ti;`^ucgR>sN{UL;;E{ByAx=mHpXHtESsIoLmW(YP4}39tK*!_+bG{kue69CMNJlLXa=)2FzuEi!yZno_$s2bKF~U;*d7h*5lA$B{bC9M2XDs8!N{3X~%p36C)N;sn zzba8xz_e3+z$H+Jq=1nZvYKvqWIfaNb$fNZ@p4N0lPI?*Gh8u{r7Kny*Y&O*?fJ9E zA%itRq7oFZ46C0K5#SHfKfcU^U%OZmN~}aD1(nkk`6St?@2RoK`MXc20?xTQIR)9rV*=8VYrV`Q2iAShShOFTT%7SBJWRB^ zBuLj(-Sle5ImG|E`N+Y3SAw2lYYP=o)nY?@^8pDYwXwt5u>IguxoO`Z_vGvaqIK_c z-xS0r>ioY3FPG^RWhI_@U*NpC_ypYCSuk+*r_Iabc_(tN^E(S+n#<1G*(>w|wi*{W zHOoDHaD6m$0W0*Jm)#+D`sq#fn88QHq!+mF2O`->UU;qQNh^KAHeJ_*zXN_TSsR7B z*dlz+@oh1K6v#gChWdt$?J)2d-@OGb82sL%E|Fc;AW^3-!5u9#g{t0HrzEn*r73-V z`fj{nPtX=4dANt;;_o<1O{@?n?C~r`mi2wL$OxrkRH>6)N=S-)dI4oIEcEs{{G{vn z{x)C*Ji6Bj{(l&-M?VeNlV|lv{9=mB00Y*oaxda8Fkp)e$iji9OGH%+tv1c!Px!vH zzJA@YfeWscSE!8X%e(GgUgonE$SCqPP%JGiEK_5LE&1`MA5Ysa%Tg;vkyD45sa4cH zZK1Sp&P?*gs}*6jHn$K&F^KEw6Y zp3-Aut(zlXcSu7vrdJ9%P-Mijxrs?xn|4weyI;+~qGrBkfZFZKo#GGWgbKRm$W{Q8 z8GHHJyhTNj%?##}ikbXXxv0>dEOi6UucTq=y`hZ+&Yq*@QH4oLDGJTk!rMOa7<`En zDNFq(#CqdhKyvYUYTbBYOF!uoMEI*jTe^K*a9c~W3ol@bgZC`YOvUIX%ve-V=GzX~ zMTy$71HtTlWKQ9=bJrd|47n!%`OAaTa68IJc+4T_jEV6#+Ap!4i=J++{;;AT!)WD#g&;He*&EdvWk4KxOy_Hi77qw`e(ceep;!?%nTb8r`V$&oe0M7rAnd?e%j5C zKTKbydG_({gA4QLZodeGf=R zL13V~??ql?>#(&LpDJGN;$tV)8YR{FRe?X0h4ARJM~8{DEXiedi6pX}5l=oa{+my( zBqnK~;in)ft<=J!$EoJIqMNkc;3fPAmqrDeAb?RR|B_zmt zTxq_|ovw=XtEz=aNaU|Xz+Mgy65A{vh^R#H$jHwR789ymp!9BiJ_;PIQs)a8yx`&Q zops)#R)hh+-P98QdG+u=T?s#RG+z}{?<%Jxju8z;0LNOpEyFt@93rHbzn0ZW&TsY78whWkr^z_bY$Q&UG4gmDLv7%&oDizv01SDe!vv zd*!&PZ~Xe!???Q9d|75duvSY(RAugX*}@UOaIE-+oBp+MBp1~ve+R9pP=n@zOZ|Pc z&Lo~!xNmbGKTg9kI#kHzdeE0bxi-DEB(z})U!I-LjHb->r9Ap5@x?zITMye`Gy5;V z?ikVYpKCUHkz3t@l1QA{?i@gNw*os}o|VWpcuh?fL%D7hA?17WyTzdnOZLvWwvFQw z*`Mgvm?`o|y5O1-{RKM#KoT$)IstAlO<{76c~rrrzNqYc_osigQ21?VdqvP!rOE!~ zm@$t^;$uS+Pw2~Mqoe@+IOBolt-zt*kE6<|d0z#tOxnqQ6NF&^b6yiw_7u;!{1ilN z?Am4brdmUMa}C=;eXYUg&!5pYhX;fu?#rB)1YIJ-xnPg%F~jdvChBy4PRo}+fFK}F zKnC&edua6T&kddul)!?Y?)$g!X25Dh+M2}k7Gb{5=!cKk!F zsqFd3y}Z^u_j}8l6<@vm@~18OhkqfLkKE3pY#sp&JoBI*e$P72{`GnMWwVto0*1RD z%)Qm$KL54-icbATo{QJsQ`8Xs8{Dyg{@j*XLNY67z(xo^kDqz+o?kP6=FiMm)3Pil zmO^S4y*Og$*aqdl{~bFpt5W_RVUBT{F89CA>FuB+yle3|Mr8X80W$>$$wewuxUc?^ z852K!0s#H2XK4icn=|7Ui227!V-`XZgn!MLhuVWmDy&740tHchda_T|KODTDKl*n6 z=QwX$ZjFKiES|xPKYonCKkdM+i}xSskNh%J7KmMJxT!K+-pgVj;rL%i`8AH>_b4%W zXKz3I>v{g)(@ewqa~;2&Cn^N%_*@LXZ`K^}(UazhmsORYNUC&CImY^*y$RjjSl`Vz z7&!Z>mi8|fP}zFwH%G<~i@7NJ*TekTU4Or+9ANtiShi*e7{S1cZX7u*qMr^+gWE!y zqGhP>{F(+8p|zOqyZ6P6C;$4B;Mn~cACmgE#=np6>d*0ch7d;J_o>&>K5hBEPjw!2 z3E4OtGaV{`E}#^+xY9L}gS|Pt`KH`_a3_7JnTh{03!??Ut!efEr`#Eazb4`DjrQk# zrOGcq*z}>296V^@m8116|Gl1Slth}dd@Wz!o@2Pi_+QTApD~&tFWgr9*D*qVjnOZU zFdjcf&!aIm{2JrSY^%O6Br>>en4fof+kG#< zT-)#;b91!pzrxSOukX**ocMq5&i@SE;?gf8z{9pe1RlEe=ro=*{|PEcE-I%fdmQXq zTN2%T*K!#qL+LBFUnXkWS8zoV{raZ^`Z)mc#jC&ZmEKHLP3o^n_%j>-uZAU>L_wE zsjx#nr>n5jI_X(*F>Sr6^zpe|vv0u!f^Kez(T<;^NWQgq**g{2(Wp{(|08bYmjM!A zWcxD%hr2Z-|Je!uW9&aCM=R#Y5N5wuxG+cvKrQMBJWb$3%~!NKZ#^LXT^h#qb1C`z z8+mW-&QIAbMnGDX&*DtESJUNo7QQPg|84s2NBxFw0UH}%{x+EZ%NBUc@N)qgEZ^|r z_vZab{Pr%U|7kGh&$umr{KP)F{<%^G+d%bY*-BZ5mNL@2$)%y}!YL_=jA-$z{~00h z9ly;-j?1d9!@oL#EQO!wdUJ-95p3UY&yMI|3A$Dml7che^_d@3X)Ur>>x8`jCOP)t z5|2r(dPyuViH}`trIub;fj|5D z@B!gTG8M9Va4%{~d(1P(tK#Acm1Wul1beeTlHc!MJBnjpH-qsU#)SjMY0;{g_$+>m z|M)7R2Mb>pMx=+~J9&`{K(6Q`$6v#JwDCR@5+dl*8sRV4rIw;EoX8_7k{te~d^`T$ z`@@;Us&DWpX0_d|Yx}ofzZL)bs&6e+Slx=gnT8AY!+@nyk8=2Z1lOy1Jq-_SiZaJe zX^mVP#mDs3^qcmhqhq0W97&g=Bo2ucml?GIq0vl&)1h{dzbNJ-A1g{bb`Zn+I7pb4 z>bly)J$DJv6_&i#pFV<9TUa4QKJN4R`oPXsP5ziAcGY~Ik!2x!vy^%TMj_sUuC&gl zbI+F33$U1xME8Tf#5KiF+=z%uad*?#F zvv^mf_yBK=2jM?`Obz$PYEA3&)Z>_Y`^x8Mq82VkM|8YSCQ;jBtF4TQa5I>*Q&lb9 zbDZUNmyNn=6zP^rv4I(K&NrIS=r4EGj5LKJ|3A*&GOEh{>l#%=LZqb|ln&`Gi47tk z-5@H`-K_}HAV{}#cQ*opbazR2ZfaBKhj`!5|9Ri{oadY~#{S^oi?FYCt(bGJxv;$6 ztQk`4&YV5rG0AUf4*SThHMSi=E(+%YT~~-%)GTWrR`C21D9LuT5*?iLGBQ{H#AXL( zAIIkmH{0My)8kN*G33@%q*E2AdRfzx^V@wipEuIID8a;aZBCGoqgiVujZI&d1p^-P zOa}_8Er!lNMb^r$mk;*v|0XGb;rgEs&E53GkNmjzHeZqFBH#9DuD4J=NKZTnA@H?B zLG3MW@P#!DfeAvk^KLOz_qX^btTzcj@S9?g=@CT6zg4t*LmH|9+|O54?9Fe_wd59+giuwUbG^ZUW(-f&0G_w!I| z%H|YEszsp{1o49Fx8&kVq$g~mUbnfPw3T~GfM)J3Fgi@SeV zBDic;hmk!d2D#}M%Z*T7a;K$LSw5=;y#PFJQa~mou^IHe z0HXI)_ycVG`gaFw_0G0NdvmkEK(J2)!_3?P>)9*ul`gd>wq(h`V^0E|Ot5&mC59DS z#d*AtL}e<$>6_OA*ACz7uon`MC`GzWsXJO#gXc$vboxKwBlsv#I+a7@su)uvC;~4*dX|O+_Q&BibZK;#v?vxP3u7|8~^xKK*Ai z@qf-E&05rVon{mbTr^;Xtn_Y^w*lHeebGYx^NF`rUsg|2;fM&^nuB!7s}1)^v63IQ9;~cWj3P{FR5QfB@FFWWA4@69BCK^(`Z% zMeGYhGO2)zlo;Z1)*4Gv9$T!_>FjB?aaj>vqaUq+@sJar^i_#cI^Y56)q}u!n<@4s|MWyG zfADy=W+X!KisvyB&BQ8Be~Po6Rm5+k+STw6E}v2WFV3IzT=a#>u?`Wva&a+XN8{Da z(6$D<@?0b~co(m>-}ihCgF%l~Nb0=EtHu4pM{-9`yR!DZ8-ImA%K`#`S|9wF@9TSx z$Q$k5cq3)LSv^w;OYQzL*J(dr*Yg}aw#R@cvLgQjAYs72L;XKLq5q!#d8qG+7Qf@l zz7PKLPuwqV6PcUzUmR;F<*hDy?pJPBl^&YD#hU_RMz~cftK(U9I=Hd20TlRFEch~1 zln1_#PqW%6+42P>gYaL13LB;oZ|il2ob7Uqp5x??6~Sjt=mPWx1y?m?hDHF?hA#Ye zCn92gbN{d-d1PeMV!l?I-*IrDv9#ULd4b>rnZokZ>v&1m#p!4xGL`GG0>yp1qF0aGDx<@ts4;mUA;o`9z) zFmPexF8SpN{Y?`=YfV|;*1g(`=4|!ZagI)6vea;VYOcvKy3%4cHKW&aD63nW>Jg#z#xM0ssihK2wKnDvb&)YSM|DP4;7 zwu-QTRD`_WO1u;TM0MWGfUO1K+o1~U{SFZZlbW;nNo=iHKGIs;4RaWl$n|% z6WgymC5W@N&fl1I>qWFG%^WrfN&N-NY3snMLWY3zA=c@^`XDx+`zpMki&0%tp>`u! z7`l45WFx*uw=3n@RF=Xmo2#sO^NmJ6WvL@{N;?qcWi-85ua;Zh=&Y+%vSNl*PDU#> z{E-t1F8wQKo3%9U8e2sd3UW^vlt8D!Mng*;$caZ_C8r&r-FR^IaO;Rm_MG=XKbY`| znn3*7^CFc7(HzA+#g#4+wiGxTcB$EPg0GuTycTgQldjo2+BXz4js*s zT=#~-=|l2@sO}wM5~bbaCuEP_dg%2V*{cT7xj5O<-->v1NDxo zl?{yI3X?EMQ!CON?AaIw1a)hvRN}E$Gss-wP4mxTg|e<)anAxx-pMg)^Qa;F_SysD zP;bmqTqReP-BG{Oa;`kgMMuB#A1{C?MxFPxdD=DfoJIrck-=0!WW%P**W&P zo@eL6eY-7(eNV?q^LhVAzT%%7U3LhA_lX#LJ3W_;WNC9A5kEyEy(H_6VO7Z+?We&D zf|AJ@bVa3|=6SvukL#!c#{#C9yrWdnaB*FokCmyn#)~pQ#m=oC6&Gzu(ND{aU^dtE z&5SET@h++(x#*%aFG_0Y1ifslM-R?ge{ve|@FonPvX!@nz%Jb5F(TBk;539nhLu{nG8`p)CP~~}mO)l{Tf*}B?GMtIrN0q%9mlHVsSV{esjdf>m(PV> zpp(7!OcsKgckgW^hCf3!M@RX60X6=HYS4Kb5M=+-6}wfWtZ)C<5c;R{`S<5|RG`pR zhrj7*$7mtB{i&bsoot2RU$#OYBN!eECu#_o)quI5jJT17Hp|ofrsH9g&-+ir-CQV0 ziOSa57_Gn8HwOqu2O>3AhB+r%;C2PeSYEg4Ek2f`c9pUxosP$XaYEHjztUu3DS}V+byE!6lHxd$4o7_+S`J?^m zoDPTQF0la(HkqgLH=&FA$|(1fMS|nqjc0kZwV-(_d=)Feb68rNP1j;Om*wc_aw5R< zwkl^zpUVo$!-+W4QPjkoLc(9kYqpoYnD?O<74s`e#kUn;3Z0IE*Y_)*Zw z(XB*ixrK5z4GlO&pxBu^J%9+#7;(Y#T?w59Nv9>XYa5to^-|Fm}wtq=Vv z{%6_(_s{4H!XD2Fl+p#sKmFZl!~`_~vNeet2171v4OPvufM^{wB}&k1^e5P}vCq8z*zc58Gy zXEUE~^a&y5eG$uMkouAm%VLnST^Spf>ty%0gq!Tu?;vup*Ts@k%lww<^mvV|dwrqh zK^%Fh(}Cl98D9nJ;tx(!wZSf&?$Hd*ax>Ws@%QhAIKm973KuUmN4KyaAfl_KszXwB z++A@Q5ratc3Y*+7RAskwEnDH_LKgCb-@jLKC&)Su)9b~Q1LYi#_O%}{@DlfVgqxb$ z9*}1NTpze2SKC%(@Kn6tLype(55!=G{iLw@PgfBF5|2y5GSRAAtX|JDXRtCx{y=KVvw{SVO;M$s4VP4wnl7M~j!Cq{2f(3f6Zyq-Xse z6U=%nU!%gbC;6sKHe!ihC3iSlk85P}cEjSQr?Pm9Me529$lX6W9G_>%ev>q9eRZ-o z4WW@I&H~v{absvtF*q(+G1c3@+~s;n_$|Ipjf=(#FPMxzh+1tZ8e}^MT1NJkV0RdG zr617uBZnvp7Q+6_zJ;*or8|T_m^{!gHnkiZ( zUxUW19Golm$O7bYHuVPzQV4XJHUY*}S~8jZH(Pd^><}lB^8oef2=LD~X!UAp4x?I! z1)JsuVS^~mw9pkqr4-XkZoF;e3P{EQ(<49GlT0HX?Dz&!|7}2cVlS^NeephU#qQkmhvu_%O&5P-Z$g7d;b1WQ*OJ4 zrc#7A$`=5qQ!sU*T%`NJV7S5*63Io=bM)&N5<-$mpp=C_CYS0>y39=}BJmmYhPfC) zXsnKSACktELv^oc#b9<^JG(;F@>}DV68x3s?Aqg*`cf^Y=2OGLTjLx3E8XJI88xn6 z7br9{U!yeJCgiZkRF~96fi=1}N$}gr&ILr()KC}Y$_*ixH551OOMg5iDx_Vo9dCGk z4|Gi!Z^x8J(tnM|k)5O-pKR`k#ro_gg_cliS_2Vijzc0#dHt;hcBSCPfC_dTea^M? zrqyk+13s}W^wILcUX!bFUqbSGRsQ(#!0YP;*L*b=%l5#%&-!8r6XM(!lZX8D8l@4N z(d|==uer!{bsHjSCm0#IE?O!Xi^;g0Qeq5E!}5B^B>*Q(e)2C|RbttoFMd0b{6BtJ zgYNj1$MHPXC}8k+{WHCE7;xa#d<7#GQ<}FZ2Nr1U6LH|(v5W zSm{D({Ie5j#G!Jo9W*#rEp~+a9tC|?cdw1`}R$sJS zq{gOeK0R^Oxyp^J5BeKJ7bv zb|a}O)26MM>CSUwg|Kcq{o)3p>Y%h|j_U&&Dv0oBgU>JtG)R*@PI%o953)3)(jGYO z;nI6YnxD<2LG;4IT-5yhhD{}wjh)3uL6O_oJ2h6K89Z3(pd+^m!W!Qk7cG7N%eWIm zxEB={YO51Wn5EV z6yP+61=~Q#29;T)R;1#t++o=ctKzFY8s$>S+I?YKh_DkC*`62mpzIIi4AZDI@O}?P zAOz*#+4=g75mpkpI@u>qfln610b;FJ4?4yCq< zfZkAil=Mbeq$6p1U7}_XlY%GzJYTKqzDnM8<7#e0O~gB^)dUtx50jDfSghnY^CW-T zBH{id3D7ZfsQJVjQn*xZ^LCbLWyf5sB^wS3-)t*X28(m-1W*i?w_nkzNu1A3v4GGEsFPh z)N>{*;?ZpK8oU%6U+cC=f!H^@C- z9-*;0+Z~uB=$U!AD&Cx3vWfUm_eD8VnrQshM2RW?Rq30gN#a`p0mi_;`gz9xnTo`? zlV2lw%^!*{9DAGfFp>{48eI%MSZ)01npGsjQ z$8nr%=5a&hIZ+ONV12bgT=-lytD8enIH{=PEiRk0uxdDlzL4 z$G`}KLbspJ2JG{?o+MDx+nV2^FS9~F0^T#B4rR#7w+BS0!1+8<>?5wBH4aZNeVKTp zUhBMC~kIDmPmqQS|T?zyEdy@>}o2^Yr)G)`r@P|$a1z( zRt(|7WPB`ejQ7zV5ZyANc=0M)>RTs2VOI7&T<(@9V9QiH^&w7iHn*v{ed1Q_U6DC2 zY66CO$aPhHD3Gt%18Vw{FUo5CDQ=F(nMSUJhdg<78f~QWxGym0Y&!?&*ue9#fc*1P z*Uj3ref%$)<&H=q-$b_Z4(*D}%?;@XV1sMhDjOPxl_A_{o~Ovhu)zS>FMvtaKe-rA zw60#B5X5pGFial(UTwW(a(zG}14{YTPq(Y}R>BBAdn<49#`Uhq?u~lOvhpSW*-9}tXS-NNb)Gy1U*jB!^!s!T?|yo_*y9Q8BW5c4?kWV}kEe9`E* zGo>)8fwLINs1XNR#)9n!aF>t6#6atLyAW{+2~n*y9WThl@#Mp0vu$;;Ckmjg9E}W) zHZ*($+pFlRG8&9~8A)HT(u?VRtf0Chv8Iy9BgL~>XS0$^$!7ho*JP4R#<=xo#WMdJ z+6?K)mkdNgX3uq+tYu}ks8Z$Pxne=%`M!E;`U$*R0ZeGLXkO%qCKtZN#)>hn@YLV| z{j)I{AbxsZ5>rP`I5Ho-D_ot`_1rJ3Yw50K26$$kq50L->cdUJ0}wnrym?3hyjY6_L%#q+>9 zQxDC+t|*ow$~}|B>GP!^a;NU7CV=D*IHJzrF$Bco4Jp}T*dWvUupKYZ`yXo zUJn8|mi**j(MXBotp0oO2>$D{zy^P2tGPn2oPzGXzp-62TyAr2Q|r1{*nNfwp_E^^ z+9XrFko!JJ6ha~_**JG1j2WZuMxFnYhBIfR&cJ9b#Jt>NhtK7FQ(`&M@4jHVd~}it zn81O2CX+MohRscp8KD{-j|oF+JIwn-Kk8Nbj0|RMhQ{zhGzV@iav5Dxf&QX(ZmC?N z3)KE-HFqbB3eslC-k>5>i!I!D8LOBsmC3v`T7jZdZzsAEb~jR6vD}f&yPSLYOn`7g z`NQ?{f4Gx;;MZfq0qhh=u1N(O=uD#+<2*l<H|=X%2if=&VcNoi0N2x9x&9i87Tx6AfCI4ehK*EHQH>j>C{_^)=%?@5$mXuo{WWL8KPqb^G1(( zJKw;z+<--1T~vP}e~|+!-A{Mw)e`ysU1;)|Fu2%L$Y{H2)|fRR>Djd9Qv_~lr9E2t zoVHh}Q5OJk%XK9RuT`;tZr+_B@|f{PwNu@;ODH0WUy*jb)GaVm|L{=hJDg9s2|y`e z9#G>CHSQN&?qN|D!*b&L%uk40rZd?Ry!^2{27i6)s8Cf+7W-`%Yy!Yyb-8~MQO9As z@9+O)7RAv&@jUT8T>d4aQTpLK<7BCSKfeK%*UxCXBbaQ-prg;p_MB{gl5k*BU`Mkq z5trEuR3hI;zd@~dRiKIBNy?0X2C3;8e7e{km~{w)w`{H%CM>2zIw8UoaXwsCsXp1A zl)0w*lIvzB+lHO|>caW@MF5#_BjMtR^f%+HFP!wD;@cEopIT$Z9UlpBrmO=o(3oMK z*9iiao|{5woI4QI6Nv=N+~MOqqIYaFvbqu6?W;5>O0FD=^L;M(ub6i~`knU1d5EVi zB}aS74=FHf0kMo;#m2m6G2z7qdzR5<^}u@VU9+;lGOyj}77JWvWGhe^4mL*_7FmLR zwdP{TvnVeMxw*rJKKq^;IHu#N#64`ee6ZC#xTG>|u2nT)8Hk%(-{7rl)O`$F|5@rT zoGf~as4>;O8+U*ivSu;K@238$*|$e6lV z_vFj6%d0Suj})q{6ilWoKS!;e4G|73iA{rmVgiCZVdfsuOw4jsm zNez1@q&_dVn2id+WBGo_XryyBZZpL&cf6h^3Iyw-y*>byfgl5W2qf#*kDZINdDd0F z26T0=Fb0v}tQUDNm74!edgCZw>>oPC-C;1J=w<6hm-Cl?W5dQ_UKhm^g3Gf}*Ft4@ z6fsRfV|hhY6!iOrg8as_o1=C|W2n4ObM=ZSR0>pey0tLj=NoN(lZ2dKOt$g!2*soo zIqJC_j-1&S{FL=+uF)=Tj=f0cG6)}=ixp;6Uv+eKQ+G$>9TuJfPqR%IuJCMr_;!=! z-Ck{L=O0$*mcas_)K4NqPo*~kU5+=f56;utNWV8xbL$1Pr}#=lwEV*UIoKQjorrAJ zVK+RP&7L|#I)3W8Hd%?a?d6$*n8vH_AQJ8{Hec}$t7ZP1+Uv<9JiC(~;~jP=f2Bom zmyQIX!wEGA+HOr8#%CE6SYF>n8Gr2?_VGC$a0IRXI6tDKhp34Wu$6pnj{g&o zTk+cWG2ok|r>b%PR8|qEzl(4_YwVl1bsebZ{)p~+jhJmH9s+HcB=TpYr?(wA71DL1 ztSh}{dd%TE7-x-UVWaf(6?u4I`47?^pN5zxDfcx0(ZL@`=w+0upsT~q`w6YbG8jFQ z_tSpTV=nztqA^|IIk%B(0x&u3DOwot{VGKIlb z!CXq(((fhTC+m99QF0ZfIb zMiZt=j;5?>zw4!2a9T$Qt!u2GA9>8)_m&54bo!~{%Uk?X{>7pGjxUv{<-h8f8rs6| zfwqyerQ&-SHbGuL1x>9u_H9o4cVg;T^zr@v-*Ot$r+%R#z><_J6QTD(``!*9FZ~>- zG}lTrCN-T7A`}cMHR!j%*X!S%U7x8b`@$CcJ3Cq;5O2IRnpr!Y%)V(P+Fbkhb`beQ zQgWTcc8Z2@?|{(Khaw^BC|(96ISdw|UQ7j>vlOvqbY-nyEj!5Y%+fSqWe$vYjls_lwcwKK-OqEtt*j7!fEmp zI2*`SL?VN-rIObuf4s&0m3jmq0yr8fIA5OT_hdEdC85ao%dPGbub#_KHMKnqcMgw+d8fAbj%8**!E51~(H>CFh z+aEw+rV5{RK8S=_wSnC#y;QL*GC<#+j*dF913%bgWg#vaYT4_sp$znEJMvK5{pS02 z`wO2Y1m0JX#H)x}mArk{>DVmNZfdt6b=#|>Zn19!N7yPh%bnI@Y2QUppv*?M6SDI&v7jJjxYp@<zjHgWJo0`Vt04_@RMu- z(`KeztP%GRt=?K-t=`bZsPNb5^uGXRwgAl!KwqBSj;52_Q)S<9doDCqj{Pkg&!gtt ztAgkk-FrMwcT}9+!7dVJO|`Ps@}j$!iofHW0^IM$7q70CbKD0@8qr-h7-P#6`*qzb z=y7|Kcx_)I2W#|Pt{+etWa6RQa{RV2U@)>T@_tk4TE(23CUqL*kH?98?uzr`8B|nA7IT+c^g6YVt=ncn$r+B!i&kk`_156%I%3z#Hx?4Q zTzE%OT$q>9iuG)*TkRvC{Ud|3j-hAe*28?gAveXe0-)eK%nNuPB)A+uzCLI&SrDhp z-KA@cxc1-%<7PPPc=zXV2+KhqHBifx`hjoZ0?1t$2iD%DVJuuzcvr7}js?Dxu2RkT zu9%g*TPy#78#)L4wJoiZ+1cI1f)iPmmY>OWgjI2$obHW}Tj^!@{g8^eLNcBGGKiWB z63UGCzl_Q^2|xY+gqFk;n(rNK-B9|VOxx{)Z7$7Q4!j*1X*14;{=tQ25nMW&x1HZCt9&wF-|;2{{cu+dBGOBeJ)1?yBagSUI`Ws{p-$! ze&AXD%kqp|EVie7YhZHVdb@_5!+rA!(E$TQs^EOX(C&B>XR5(9i(|YG;P5Cg)dF>C z({X}R*qC}2}^O5`7 zj~8?HTjK+}KtQ=#K!o7Q1GmrJt3`i6uZ9o=O_PacG1&o5J1>EY)@!a0qVy2TLanM# z`b<{uz_;UG`*?Y_sA85d;JE& z#B}3LgS5NG?l(KsZaDUPaW@<*AmQlz(Wm0T zom8DC?6&-A3H>m(IbLwMJ-CJF=Azx8PkBns!kX0J_t`VN;hW$x#dF5_^ zF5!^c_99G!j4<6txM7>!$Hom(icu_@P!#i7SXGbYdOqGx7aCN-!hj(8fn;#T86{iZ zwfaei;#9G90odj|cGqig2$noURkdepQPA|gKm@ufUo_J6yz!#fC&auPAO@k`Ki+&Q zb{0w#!{~u?IN=8X-H?R+>apY1ro9W(7%Ae`f;-=p_5Q+*(a)fqTtA=RjFjVKj9$VX zP^66RuSth1-InU}rHL9CJu5CJjAp`sA)jq>Ox8tci1+}#{KXEo_nAQl{5gb}&j@4C zCM~-b?cN1r$C2qDe5INeja zhI_cvCoBH2WyW)@Pe!Uia3EC8|0`5=qVReCBUHh}aEo6q(a2^x9W3T#0&ADlPmMx$+Gbu~9IYZg zjlFnP|8AO#&uL%8saK(hYQNhuP{n&?f)Z)<_cQsZ1ecS8F-obeFAQ9rqywh~TI+#t zwFyCRavNCj#36&&UftGP470d3arIU|A-&h;BMK1 z6T`nu64lEm4uhhAnS5O2iL1gK!ySEnZYb|0>ra(-y)fB}fy}{}mZL;HH24mpj9rEy zui_o1-sHi=q}MoN5xCmhlrfaijlbz~x@$pY!mLrs;Xa(2A;wz1{n00}>_eZGNph27 z2K`8}KDO~oO%cDOS&ou)`XG^1xARB0%p5{DZ<%K*sPUW;);}2&mP4b9L z1Wfoc%$t;}rBI=gk~J@YQ!BBs$_e!x5s-Mj*?F;u4hb$imu5N^5{=^LOBQkuryBg9N z;<-y+v*0$Q4lR*Zez)EQk2sUta*8$;o<;Ha|6NeHoNPi2=3K`I%+^yr*)4cppc)Lu zN+;5D;?QYEl)mqy7BaW{1dve5wzSi2-SGCz%e*bSjUk1uh@ADQ3I@tYt;n?=S8=l^ zswf%@+SO7DT~UHW&twx`U9?U3FLkUBE-lbvsb1KQBvGLz3%qAfvOVo<1j2g;@eUHu%PzmIG9uWxFaXZ+Nn*)QEV;o&d z+f2p?Dnqoc@MrOrv2H7e&-ffq7-w<99#XodKMCUk2X19!%M3Ew{IRh6*>7&TQ`FGv zJczs&-b;T|`2Irz{&Rku9*y_w8)(ijWo;*lug4daFiumhE}+kw80Zx*5^wuL#9eqR z%4nxSO!hXw-LbzUD0@sfB!Px$Or=%14L#d<$48y?5RXOsb$p`2hPa)u%#WkUWX|>g z9GZ|HSbFJ2+70==rs}86^IM>;NZ~LZGEpE2A>)6EPAY`7r#Pr-se&={UCA(4_v*pQ zx3Ye00}Ov}hUaUZRW@sVVj69>Gzwngb}L=aYV8iNba2-?iIErEv$UNL6$=w;XnmY2 z6Z!1DLIT#AFAqP3-s%HY3mTQ87d#CS{qT_ch-Vx{4<2`Y75ZFxLMq@PLK8}og+ghg zFyL8~xZ&lIa9+005Nopa{~20+fPh-&U&_F8x!ulQ>wZ*u($wxezoAC~?*Ul(e6-U9 zusV^e)pQ?N4utTS3#sEffwPsGhZmIbf|_L*mWyrHz(7gGCF{MkEjJX1Lo`c`!gMI| z%@X+R2{(twJIIw21f1bWggunG6xN`Np5xrchmjPA*||@UalbNaKTo!ptNqU65VPtY z^|86+Wx11_I4ub~^?<&sA;Y#riNSql6zg8ZJ=W2JiwyeJnw{BUnU!8-!-p+q<5I=B z>W$$wD6(fWz7pg8$r9=qJ&O~&v$YdY5?#^ESFt$?OPQK>6Fk<76Xtz`$!Z1K7Tl@j z9B*2VAY@~&ZE3olJ)N$mD#|@t2~B3o2~=p+6!J=GkeIczkt)o`72^2Zb?coludYrd z#V$H$lxD)NJ;=Bno+R?RP7QcE#hQ!?rMMh>jbjFW_eCXn^X^AjU6c8!s(-q{gT{Fl zZ60~s5B=$`j(ds=>j#=D@8Mf!?xQpd7o5k|KfXHb7WgwK*?L`FXG@~`3cp^Uoe1>2 ziIhtcuqb91_WglXO7uAFc3PMDcVm_*`YFPHZp>nk-k&RvuUo(qvs<_!IU_?Qm=P9G z>ONljDW3`@U^8;yZ~xgo!J6hLAxXk_BN9ok#tOxI@J|e5PK+ zQFSlm;|)^XAl<~gX7YF@xYx(4QJVgNeX;QjmIfzDD;gt9MT0^~kMx0cZvq(`RjtNl z<&ph4#dh!uQlh}|O+VUHAdqL9=jY46H7ZcYz<20HUm%_iFs|ODt=4- zY`UuaI+hRX&b+=OJQ9`9St-a*j(5`zVd57di8&^tCb>r7&YUwD8Jj*0A?`tFLhcXm%k;4^eP4(Mb3Syw49| zm+#E}=PJPyfN#aDKofjb8Bd!A?CRoiW!^F9A~vOQ@q zJVGF9rFKA2_d_C3ff614Ag5NGi-oAibPuq4%PRyh#WuIlnB)SNEN!2fIU2cWaJ9I; z*qG9zK5SY$W97W&XzfsVz2CsoPd;esTIh9R&d@5}Nsnkfp6U zr|l@cZ1X39=)UV-WZIWtLZv?K8ne|`4t6jo9U+^QNHvQb1yWR-)!lyHo4cM?9sQQL z+K)oI#H-o5vv>fCrVgAj*3f&1ZkQxOK&|B(Jle3wl6bLv0lYsV@##kZy1PRD-54I?h;ScjXUf~8DcGk_XM0mhcl(K zcE%xFWf&CHDQvOAvADPZNNG+W=2kyFW}2&!F!~{n!~S z1infeFU&PyXKp!6&{Ym|fD-W;j;MfK5|7+kU!uKJQxjcNX}}wLVt8-&MRH0&-O!-r zzcmOwkUbCBy2ZW){z()(s|7SpfAk-JiNSpYB<1YaVFC><_=O|87kk7WiroVPZzWj} z_-tVu$GfA6ZitU@G1&qqp}<^31@_$*(fmQS>JHzC2vRr}3Pmh?$BImJCj^OuL;}hg zI3C5KCCL!Qk|rv4MH~>kVz1ZWBGGsS3+N1_F$nujoTLr~SCP|n-C=}gKe{IZlVT~I z9})7+B(r_!!cXEAblUercvSM{JteE8GDc6|@%o_e9D~}}!Wy?8r!_6J0p|9?cd9l? z7nje$X+2Fh+i4=6YvLvKtn=d?(T{K$*R%P>c#DQHTYU*rF@3o^U}&IGmj%LoB?*+Z z-mG>R4<(>^KHXTFjb7{sq;WegVfY&9;P!zv+b}vr-{R1=4ndDAQ≪_;-haYRI0J z=hkzRss<0}DgQWs076&%c-qgPG6uVf@jdX^jeD|1O03;xbt_ug=3a$t8v3^eZqP%C znf-#*_koSUf#+7^atZrX92MoDgi5v5#QI=CV`h_tYbt&pIhwN1?_A{q5+a znt8Rw-rJ>NS%!|eujx1*by+rDrXsHxGzi2e=dehKPkoRj>NeRC&|l=p+B4>Q4td4R zj@0d`rq6tTpKxz=ba<7QcB|Jg>B7U^WoU>0MA$sXvOoSx3=;Gtbz%R;-3_{4^1Fjd zAK6PL(j{8cbDnv|6FY-NK3NbunF;!R*ih~lWqg^3ZuQV0d&^IZhj-yoa%VARm7k#d z_z@r-s4;Fy2UoMd{A)GmeK5XLLspkN>kk=T6CYHnomya!*Z$hvo$XL_D|mi3+ZIHY zHPj+@gJQePq~!s5CKEL@CCDk<6(q24t;_1GqhCTeK~E8t@^u=grD~L zz`O-^nNu*;O{Vm>E{pdPt|STku2C*0>mQ1`B&!W^3&45lFD|F{yVyI2a6r=;H)3oI zJwtYQI7Hk&JeT#Oy1v)3fDxH*<@lT!;6EsCS=T&D^CagNsCa{v4n$o%-&o&KQ=pL$ z6(Wt9b#CRztexmv@Ra$vY`6U}dMtB}Ij^x@9RP;G%0vu5+HRFSehD;|y7s;MSTa$U zawx04_7nL!7a`CVUowJCUdpY4_$pdJNT8)X-$G55!PtK09S-RF0lk+pW!W5!er#_o z1?3YtG%m~%gB9=}4jwQfFWKb2yqQ^I74*0aQp_kN`>sLY^HlZnJPQL}I1NVmsG1#d z9x;*ci{$;W>FPZ4Gjle>Yeoc8K?ebYt^^HXlHZx<9W;zF4KBUb$~y_XHevIf_r$z! ztoX+`t)3#Sb{;WlRSs3yG5^90X|kOCq5tlxp*a}9wtkOrZUbZi_ioUeeOt>a1tvYU zKgKdA1AhSsGlFN2XTFMx zkq8KiHD#{(NnW|2=9pA$Y96zf%um*3MWN2=PkpR^27{5sy8{-v%6T8rB|+|HJ`7%ZKC8ZYYs^pT=FvIlZ3F7 z1*YX7Xe60pi0F!dl+JlsVFGP|a6ZPwa;Dn6P-WU^X+`W(rS9>1qtRzr$U-AJAXz){SiVG9&2k^zBRoKguu{pqm84 z(~-m~%VjTlb!AmwOz(uh66#i%xwAPVwl)x>`i4`>NP?TAnJ%b=1gr7W=F_!4KSK!G zrRyLI>m?C_u4rN;kP7WuBTV3^;6p0tmhQhKd{&GQNl!vt(42I}U(Y+HM-HHVcd~kB zG}1G&otctIQlu80ngE=Qv{0cjZVHQ|opHU}VFGm}AxFIg%YJ8U_I2Nu>!Pc#Aw;5QCwMVN7jLT5ej7>wib zcr54)^Ir73>5k+_A3a}0$yKfi7&~gYAxUrf?A9BN!c9ifA8y4$PZ-0yZY~$lk7A+O zB3%k;3otQyK$9sQ?A=Mst+?)5XTkg~c;HJkkkY7DS zQS!&yGiu~88j)&XT!c$HvNh~CvOnwx`fH=spZ07-yP-_VVqsP|tJ_K)eLC5io!w{~ zRP#kL9v7e`J0){D+y9PBl2NI?w7Le`0JcSs}f<@vKENnE!^kOuf2EB!xPl8`$G5TplH)EGqYEy!)5 zXZ-fNFL`ZOhgQQ+(SS8A<}O~(jLYJ?!G&+nWF^Jg@UE9x?bbzVosYhNT|lL$^3rqQ z|C!g?w|-H78p~mwAj|*cPUZlB?ti==;!F{A|C8DT7DS*byunZnA;1vmrucbyJP?ZZa z4Sic!#G@5qx`!8XaePE3u`!gz>BMI@Ai}rbboGU;kHd_%niFm1r$K14Kn9w5BR0{f zJdY_4SRa>1CFIP!FLWhd*YSmq!Il`XUMP}?Fc|IcLc8=Y7-#OMiL6*=p1-#S(<)yK zfGY4h%1k4J?$$U6;dn3Wkga8LW`o_3?l)Ia!5CBgFz`luk1b<(1kl0D)8B}k+2+0{ zx$}8yg|DN@_;TxofDlS9SM5e7PQcm=72UnAcu3)j5qV3qw6Et=H!t52hV?wDkc|}A za37~|fkc6(rzP#TSk$fY!H8c!*DxUSNgVdIzY$TUt1L2D-v#x**j+M5@mk?IBMw{h zkG$wpiNRw~^#Wbo2zuh~Tmz&*!u!1LbeFJ8V_~`gV(C(}?fT`gNi{z`*(1(^cIj1_ zW!x~H-Uxey?R?^tyFANhU4ky&BZ*lURHdvavNO7y7_3bcg7t@uqbYm8@nKNl2U~&v z^@Pjayg83Qu>LEDaK3P%rJ+UWdt$mJNwDPY*4fzQ+uqyG=eRw0nbP|eBb!#>B zDMqeY5{{UUTu0>+zU3{88mz_llDi|)?U9df1V5Hvj@JUEM8IGNr_gsA`J~r#Zs)hV zV{whBL}LVv#hLV9{VL$Uppwy@c!wJ;Eu~-!_r}OULP<&p_UiT;A*h60xO)7LuT16# zC|-!Tb(egc$i)5|_Vp_NNlR?PdzJhi_85?eL0NT(q# zQ$9KMu#cLOa$)#X&}iKX2vfh(m(sjUWVifIF|`pX6P;tK){#uK#ma7M` zy&g_D2R75|nN%ojb&gvv0avP6*+-qvAa=9tqB!)Twg46`!n@9{!+w(~ZC;qc3rx98CPQ=d@1nV4fS`u2c4#*`(XZ?J5RO zo~FVy7K3Cypb@Xshn_v18_iQQ>)GjL;EtoAY`i{z7?po$&q5cr>-9)zi=^RT3dYbn_{@ zCXatXPJ^!rxumHF7n z*%Hd+oEyu8D|Kfj&)rg2o5$T?S>jp9`T_*q#Ea&$Ch5n#ItX416KwPN5W7E-7RA&w z+-URuL)qWO^BVkfb-5Ii(bF7{s{!}-q|8lCHTGLvg08TbaR2OPfH({_f*WncRxzGV zq&j=a%l3cgSU*iT8}yDyk2f^HIg-a@jj-WAvuYb09Aow+kfyat@l{wtxGi+KBYr)G z7?7~_gko)aT(N9H*Cxw6(!pBY7T8bJc-%%=04&FEtFZZ$WO~Zm_Ln0ST8;)TTdU@q z1K0eK(a1o0>Z7d3ZTuvYtU~A_LDOSe(?QfwE;)0q;YHCi29R9~9(nZ+Qj-MB!+sdz z;`iwkf5q~X3UMRmbx{^?*vZQ%xb)8_uYR}2tX!aJZ~qIFN=13hXG*vJ1K;P|8w^zW z`2$Y!ET5*Scd><-OdrM}n_iO-**{ zt+S$s#&bQef2rW9!NC-IYqSDXEM57%HOe_dBOTL_$RgcyyxtgI*_DF3XiGkCe9KCK zLfwjeqjf+QB0t1O+oRyb2g?X63nUKA&R` z_&fEg^ez7*2Sn(d0e13d+#Rt567k&9nz%FEn3X%|$$S^n2- z1f)A3KMIOk{go6sG;}97k)x9A8dk~aoelR8+@e><5}H$t2ywhe;6SO3k1T)QL*wD} z3;={F#Io%Tb|kx{_K-->bD(&UR49@7du!N%i-061^IXd0jMJK!fpI>$ju!J74(3|F z3eEafkvjXt)mG)GUnNKawfZ>B_9`_&N=xIQ*lIaqMq7}rtZd_)b} z!~90%tK^2GkD;lXQ)ki++;tnsp9Wl6yvW;iF-sZ(0zPIV`K5kL@l$+)0Wa=?9YN_Q zh39(yq>h{(Ns2`=-SNc~b=@z=esBKR$?$EnfVy9{=S;TKyr*+FJyAUZw$xl)A*+3! zY6UV?(;EkciKC^anI%rpc|V1;(aqzg>z7KIuavtb*$Vd@gvA$Dx3aW49OCq^!za8|XNbLIiPQ zI-fi|GA*o$N+RQtBKcyex_*6@77-VfJLcTo6T|DHiPtBd0U{QJ}EuA*yCZY@1QM#BQe zCe_GKTh#qO2R*z^HX^$4%qUJro8fBlbJ`KiY9bg3c%0>*M9}UlNzvX4D%-8MClnSt zb3P^U?28m=R?t1=h~SwJ0~@lRgbaF$@R4HLlrlA|jKyS&%tWnA4k+2Ip86}PM8t+k z=g`qD8h@zU0NdqhVCNjK)$IA5rIm;R22z_XkBuA#JYGAgul#=vY4<**&=k9^_&NvB zRiH4MiCTG3GVM%ExdET`oOTr^i6_08%Cdf8@1a`eWPPsK(&to2g<4sp!OrjM`Pu-j zvc91^J6hUuPvR*%C>oR`;2MzZzAD(dyGT;e!3WGfQXRui2AX_=WNcS*hY%}itH`&wJH66!s zoO#9@;WJ3Be30`eZa6o=q(=y&MSAnS)9r!M&`BCH|9BLkQ$9Uzs?RtNuNV&uc=ULs z^g`|2gLe16GVJ!qV-S3UClLW(sx^2~s|6C4nE&SLb`L`xdad}VYH}nt zU%N3uwNN`Vsv;J+xD2qnCl9OAor8`a!8lrmI3+hYL^+pyQ>+;PZHME6_K9l@n{ z5B0=GZqec`*v(#Ekuf-xo155UWf#tSG&j1FC5`>_f+S~L?I2lNJj_XdL1rXZ)|3}# zc5K6L2xi|Du=M)@pFJ$Sus8XIzDLvHTt?$E%2R(pNQqrMxBb_24-Q6M~(p^%55&|OK z0@B?Lf*{=}B`KZK4gY-(&Wz5z_gla9t;KQ}nSssTZ#-{3FYfSpgj$ASTkmkO5>d16seEtW4O zC6`G>{89vRJfOFz#5*5uvye1>=i0_rO+1%k?!h9$2pKZ%BI9}==E$)NEx`gHt)U7O z5u0-I{>0bV_n zh5B?+Mx-eeh7O$z^q&}|T#T_TwNMCOrsQgt-w*Hj$N}iTv=R%gzv&$ z0qeo_%Zp((laHKlz>2L`f3m|xFoR8BsN%hVG#Jyd$bkFm6|esEd#r!-qEIRPOR$Pb zrfgzLL*HVc+t=58tfDl~0^R)A=r`m@OULr!o5o6)mUEc|(#oej9(T}mNNQCv_~6th z6R1>tMDj6dV2{SU+m%DeCApog^o2Y_S5h(qY_cvm`N^*I4%!5!&C`pm>aq#Uj_4jg z2c%syu)Tj9$U4R;PSdcmsI_t(BTU&X??u~WjS*ci0b5{#hOC7{!*S{neTS3$ zKoj*Xs1Lsq`wlS@WU)SZ<#FZIH&vbPhI+T>%kQFxZFOUB@z{Ut)v-hTboTPFdx&4< z?g+GrB>(uGfO?6##bMO83OXSHfw)zVOFTU}Ya>Vc#Kzu_@vY_EKEd?6RH3;t1sF4c z-fQ5B3W@aHt?MqcAO_$gO+kXCo&(=|=?=VbzF!aMx?f*!{yP^9|HQ{vh&1VQm>od$ zPf*FF@BWsn{@{${97xP_us=`+FibL)BJdreTTJQJ@iK`h;x~xT$Dbs|D04*FKl-`M zVkLimvcF*aykqWg`A6tT)$yk7Fh{n4Kouzfx1-(0C>c*b)>i zb~?K~+yoxnk5H(s3JhoqaSLA^&d;r|Qg2M6lo>>c_$o%iD&`yDO%CMFlUj=3*R6Zy-u9hQgO~7$&CMA_xK#K474^^Xx z$2LelVASvN=ht=X9C{Y5yM2vWK{cDfZX(VQ2JHklwws=^P8)*~AhtYQ+HGk+Wkin= zJo@x`cqNQ+wdVL{NT!>ET9=E_$=;eA=cIk9r5${e6>JFbN{{w z(NW9xk3H>1ldrZOv8k94wE|_o4C&+))Gcc+(;2S|`?D67a@lyA6(4;yo2iUo&jErT zV$U(KG^R+dlgMS*q)|S6I&3;w2Ufd_e%8B}nro*sF+d@{e0=@f{v{vw(a}rK72C-l ztJ87ZuRM-sdCO~G*nbbFl2vSMsQ>Dic8{XDMF;~TIJnPJ)A;$uG2{MR>wQY?~XN}0GpKw4)1)HuncFln+bw(2YHSCfDVA!6FMw$!wbtZNUP2Eoq4 z^xmIG#fHO|)dFT?N?-P0B0aK3O<&G3jcfeLj~z>Oy=<+AgTBnOZ1;1-ihY*j6zg3o znRK(Sxw$HNw=0=Tk1I;?qo{rVgQ z9rEJ&)&2w089KO%=<5Zfkig%B75*l6_0`lm1^#a(l-&^rmv|fOZg1)0{SFVjyd+7{ zJ>|N%AED=MQa<~`EeXl#AGHq*gI!-b3o#0X$An~MS&$|o9zGGB8b_5Sz-Hn(7!OEc zj7Fr3kbE?jx!mVptg`Oh<<@nY+#kL?_BQj(9FmX|9DDVi@j>(3{ zW8?*gA^h!CIzSiUs57}gGA-11$N7=k;m!x#l~3O1H9RBDf8i>%$=tTP_AD!@noesB z7THcD^*?Qf+cC;4CREEq!(cy}E~4Wr#nZd=A$3k4=89lpz5=S~18Bo)j_Tgd zyOdSs@NlK9w!5J@Msl&Uh4r#B_?)Wo$WrKh4y)2R%IPJmPT$_%+xrn(P3}?tD4b=d z0R^%MOsQ@lr$?NT;#br0wKuLb*hVSpJ)z_d9q`$G)cx67|?sBLg$2@Ne(5+fA$8+m_TFr`whsApZckQr8 zUGLt(e_6*Ls#I!hfiGx~%JZ?Zt&spDpXKm5n{l=PH^L4@B5#h-$Zc^;ZTi&NxE|y3 z=Mxnc56$Lt&8o9l#Sok7DljPYGFg$fYiNj$5A|HZa$9=`?Vn{8^=^m#MPy91`;pk@ zrTUk=zpY)wi(M_t0|!i!nshKWgcT z#X)Zq<4ZO0R}4ZuM%M3oZ|@{$u)7R zao+YJ)U2#u+jLtF*yML`n1bYs2`Y()5K9H)b11(o)UE(%S2H7u-{@`K7J}b#6}T?P z4W3n7uJ=EG=#Y?^L@DLl^;88G9rMEi2_6EqFu~KK{pjT9Vo5Lj%-#@UJb*txS zRVHax8n&V0bGyfAF21ERkxS0je05ai44Q9HHRuh@_H!S<=?5!Khe1>{$ux+nbrlG{ zmw=p+8kr=-uDp+O3X9zR!Q}*FWQg@CXdY@;GkJ za~v&!jm<$g4A!~P*hog0BEua9%4x-ny+pz!-q&@xcD_V+JEVJ6u#pjuS zP@dOQB_NzQfdFO-5%SaNRG&waL5f~I2i%o4JQgX3Y%iC;O^lo6!rWm0tsvZ>Cxs_n zEG2hC?LG%{4ipMr&JX=qO;2=O%O&OV6<-iK8KFnbL7WfSBw@9^)O?=hWq9lagwc zM!Pt!cHh^CV~g5@L)Yw<82z(`3)j}Q5$#-vqP;<%zu}okj!JDs%}qV>fSg2>ciRi+ zj1-BM<5dC1((%$X*)+uB_SmPH>6uEE;NkkXfC1L%9l|<@?ltH)+G0> z9N+WQdnvQKSl!sDP0=AqpoiFWJo){n`SjBAV!TzDEd)# z)_9$I2~QR$3LPA@{ey$kSbk8-KYgm96HoQXdXi_~w430^SC17(-u>07;Prcs<+8@( zW!0JinuYZeeiI;$i~#JRG_~1UBj-DVK}`KkuWk)H{<7SXxK~JNuzb20(7bzJBcD0O zdf#_i0pW|+bwP) z*&N+mN_NZ9n1Y4_P0rb+W~Zeg++jz%ty4P znM4znVGrov#_6~`;J*-&lYBy*wp;GdrRw@}52}+$Zw(pZ-o>FNd`4sirqsoyaF{+e zf^yHtkN0zwG8xOwhvF^{8&5l)vC`;J$>7NmaGYhtm75N{zlqipoYUb`ofIm#s-R8? z1pp_7orDf$u`m4~xe7nL(kYa!2*!*+wOk6V$+S&!d-Z=a_p z*C}`R!;I9P2z5cjJ4&^jKHokic{tlsoKtwlD!Q-ne!A*=63Hz!65!qzZ8G|tgJ_lW zvIurgw;I!!T7pT=VwDO#ZmfP^z)1Aco*0WKjgf;g=B^bXZTA-o!OU(yW`ckVtv5zZg#Cq?ERguLT$_cKptY(xw|F$QI&*AP%e^O@@ zcAzBZsC259ryh{e6O$gr{kd&C2b<1xq0^}5Laz)V_oy;0EP;E zRjs_()|AeRolbgGvsR!+UQ-jYlHbj#Usk<{Uk(#2UV4q11bTAYN)DHxxvsg(Ltnh% zeYUru{5V;5{7WP*M`Sj|9O6a9c|Q6A2gbb~ZEHQ4n!4kBg`Ue@OfF}Pk>|S32JdW@ zLg)!5T>*b>sDj}ag&Sn3PBMH{BU{)b0@??ZP-1RZdUmr>d6VX3_E4vdyxhmv;b!ZD zJ^DLbuxgf(=#(HA)HR_(2+@dj7roL}jFQoG2ny)t<*n~J%+tHe?%S|co({>K8C~Q3 zLn5eGAoP@U8qJ0xMa;ZoL*WR<)jM^Vkf!UUQp^~?{g>WCR6m_7H|_iCFWLBd0A?Rr zR`|R!-*K1NA{hC{s6;rW%=4r65x$cl)e`G!~fAKA2TR4k$=vSK6NMRBmBfnSyBp4R!PYdGT$vRN85+-hgfTv8Q z1>>Pc*#ZN3VZrV~bGDL+=}}RW{pOj5T#w5)kh3+unD(rU!eP>U0{o-P$q0vPzGASh z+e>;;QBkwp`b9J%o+lK#_r$WFAEP7^JT!gsSdVCyl68MEA#^3+?w4x*ZyuXxqL%BG z7Spn6Vs)u@*uZKf^()mUr&m~5jr8=42%Bm0@K0}PfJJ9@g_M$ldQ)4Klj9*Z`|Gp2 z7f&Q)(#T%j#K*kRC4t*&60nyz@(twZPg8+G;hG|Eda?9rYjQ=2YN(fp_j&Jk7c(VY zAN>)-#NZuFH`m&G!XVA1OK?GJFch7X+-^kZFWk}?5 zd|M-MP?fhs3w4})Gx$L-*`)5m^4-g#D_(JG7s$LHSZ5?P)zAvtv+3gtN9TjCyPt3% zTJ#vtipAbjPz1BGzQ0q5ee?GO5~;ijq6kZ0x1O=-k}>KDtL>7^#H? z6&_e$rEP@*NYO!Z1WjNx%(AxLq9OGI#pQ(*of>`G{@Tbw^+BvvPkKvBiCOYgebSyp z!g!U!-Bl)XvM;TzSry~m(^{7qU}AX)G+1YiDE0Y^$5txE)g!$Gc7vXE{o;s1GuqMx zU>ndhmJxjuO<{x6;=72|TY`Lmg)uwt_s7`W&5{7b!u`k2e3P7AGOp+altERWlI zhQ#%}Jj|Zv_&w8Sw{@k_vry23kJ`eXNgd0yfj4Gm=5Pk6-ZJ>PTUqXr=w!kxd+n1?|RUGNf;_J4 z{_%G_ae|31Gvb-!T9+Enc1pE8f9(*7F#lCEa1%6i08)8fTBvZDoll|OMIXgSCIzQN ztR6s>V%WPW+VLI!!5p_2XWiHL|GetbLLRF=J5(|#_uN=L%3Hy^T z_-)nI)JFYI_VMNGZ{Equ(_ig&3~cAWXRj&p{9?E7PGC$i^B1~1$qyQ=pBlK=pWMl^ ztap_>jF^8mUT#OVF;&`yYZv;G3v-iwL|Z%nyGzOb*+U|JkBEkgITV}s?WZQ_;u2QF zRV?h5gCg6pl$QNm0HmV(;?hd9)+0RdZE_3LL;?r2D zw4BQ9F4o+~Cts}yKH{A}+;2R#{}IeNeO9lmM@Y3md;fjU(=c zLa;4RC}(Vg$->Sa*>JuH*cRK122bOtk7kgu=2^sMCLP7Dh)o(uZs&mxai(&>} z#Q~OvHN;TiZfwD5&g_fJ58|o;oYp6+T}(S{qP)ME&uh?mDR^wnrKh|M=hhN^3>Wv; zQ+z)Wn+y!X<`mj6=DVe-Y9}wL(C*zt{BxZSR1rb-#lu*U59s1zInbGKO!?e*OOd2d z)T~LLjz3aoaC42t?j5a5EXJT!uKe1DCvneo>5b_8Bz^L0B364YcB=vHjoQ8Q!{^za0QDNu=`3dZp>(W-lM?U&W{sDPmMiCC(=IsGd&|=yJT~jQ~T?cX)^5Z#9s{ z;Nlqvlb1e7NI-=lq1%P`{_XLg>J}`|ShYIBV#V$q%zJ57Tt$%^_|i}~9Q!+v1fP7M zMv_kC38z)b&O>D}n}?29SmbIRM7gYHn`BwkI-1!T8^1=sD<~Ela=RS$*VfNpvhwpv z3X2*c2Df1Mi+KNo+8(}R1}7ToBWtLiB9}N4DpE@Kp41$nc+ITC=ih(qHHak7^`J2^ z9Ib~6jvVO$%b&*PC&I;p!h_&vwNY$S|L4X2dc8|-Z?N!XAkk?$y_Z7kZ2Mv=)en|E zSWx(`RjW$0`mL0TJ!@L1e74)?*ls14sj>fS%3uHQ)pxtVgyYbIXwr3981caiULEY+ zTWlhOE{bYOtCsk-QLmL}>XMqMX3UoV`^dD{m9qTuFu3K=?##z8&$~PTkYqK?-{Go6 z{JW4k-)!C62q;b-*#XQhBH`n~oOea}trQ&L(&O|e1FT}t$N1np0S zqJ9pMCNoAx2~B_3FK_$oqoSCY(Rae2y!o3>T-}nqGK5wwN7za>Zk`gk z*NX4W=SL{GjOYWtjopQGfI{1WI0>Z^=9m61oaf6r0u_S1^-8%t(mp5FP3@rci*+s=jzvIYm zY}Lh&Pdik$QGXiumP_Q5X)HJm5YVoE4^lN2cX$5y-{h&U@FEH#RRZi)ylfC$MGFnw z>Ro;Ux}+vWtnOnW4?@;HG3C&$OCT1TkpZY{AUbxuU`y6;qD|*fXQtp&|Uw zSBB7egOp>Ad@*JulU|bfEQihYZZX_my$_`(ef{2^2TGD=GXeam7Xzn{47&qr?0>vxJY~KcXyE ziLIev3uayKcvO5FZiYUL>crgt4>NRyaR2^ONP?1-3G89ket*dN4afm{KU}tyzQ*4w z1op*}MZ=Cf)z6ojkc&3%ab1E&?Ca`Je`+s6q8H7uuyIhZ^j!MK_4NcQW3E?A*(vat z%-f^Pz~k!o^?Twa*S3&QM=O$P{rh&pKeZC^*J<=4tl?j{fM3XIfBdaVa2gWflxG+N z1Km;9k)n=KuM$F}I~|P`Rina7N)jU&F3A3P7NH_^=&W81871-^b_M?q^3>~E)c1$; zWRuenOae0jYkQO??(KWrD3-*W|%hj=I!Pz9d7ygw`s+GCU4Q%2;;Ib z@+arHYS{eU(p-HGD(vu5ZR*F{)3B=twxm9lGc+VAesA%CIC`EAgPaTN|Ro4WswmeRN?m;pKR@)gA-6%(r_%&x+MS zi&98$0yjlV(-@X<5Kfp6#Y6reyC^*Pin9ND5ZkL`EU`X<`~Sd5p-m&i#@Am#i^w{z z2;Lv^bPakW3O~gtlRNX(E?(#}1j=oH+DzyclaiPjrjhYSBo=G>@)si^9)PNW$7Sau zIdX+MMaR=1ES>EqbRD2o5oxS6<;lz|wB@to-A+s{#UggTxRR0* z`!K&74liSH|A$==>Y%tf$j$+SDg>B=`|yK=fBSDuGuI$zl%%jYWSvtO@2VsMLGdp8 z$R@nL_5nm6{q=`axzJY{x9jwq5F{le6rwPQ?@1YaqZY+Vh)ld=U|qDX-Tixaz;7ZV z3B8hH%#z2&w*l;#p@1QZq{b<*T>A4hgFmeX=?v#}m?G;pVl<<}m5dY*`pm>gNpt63 zXRoayXuDkr;obaDRmGK~Sx$St-9}ROtOK)TVgm~kQ!=c0`x$nsJ}8u;zIp8L6ZT&v z1@@O8oPX&$n$=r4&!r^<*;zrz3M_Ue(EAnW{jHB!_uxmGA%A?k?dp*(vAxle6)4V( zO^lEp{nQXe|6XNdSTp;YfSQjlVfAaScx-HJnd62g$jbL&VOX1H1&}|TqoJqwA&f<} zH8wd{lloM3^_~9n&-AxfVF1m%KUx_}{al9L_H{pXFiQm4xRuUY_daA4+`A^g#9|M8Aew1)p z_p9%EwggeX-A$u)_9G&4;p1MH&&*;^yb>s{QInJV;6F6|0DV;|0+|JC%m8R?ZG-r|e7dIAWVH|$uM^H3jdVQa8uCIZ?N5D=i8DNDo}-$Nc~ zjl(U{c;gmo|9nR{4LiGv#)l4bGOY-7M7B45Tjt7qTlq3y?e2!&M9_R>4f4QQ zfLamj-aQG`+(>**OX=d`;%BU}SdW+ssrdO51pxo%$y^Xq#6q$9C@rLkGKc{$Dv0~{ z67WA#4<3r<%5afRddGm}Btk#$!Zfi^{Qs(n@cRKoZFhrwD9Fiy9ROpCS%521GHHaI z0+EV}YG{3|6ju4~#C$9)>_^Qd>+kc~$DrdEfceXT|8pcE^s7h*wK?$(4FC-hw_yl) zmK`QZ9JkT`?P@*l!ei|N0d2v%9(By-p9z7EdnJhfX!`iN$zXN>LFrkZ>&8mU__4M;3zB?Ld#Y*y5uP zLYTF0`DsqmFQXG=GqdiH*{N6gV-cq9HT~~>r7)WW%QpXIlf16O?(X{Mgo6Lt4dUfB zePzSW#h{O`j6^EsyTnLQ`sIb?hB1w?xy_Tj1g-VQ>PZB&sr#?xpZH6dA3vbwzkd)G zMGt9emUDOK({bAg)oA8>>r4ATzy+}P$#6X>y1Z?G1haLB7f-98({hrBaYjS5)hC+S|KrG4QF6&%Dtws;(Pg@2Itq`d?JO* zivFvaexT|gR}8sW&<$On!d*@W8wh4{j@3<1>u)MkxMf^>{S2tMHmqkB_<&u z0aE1}fF>_NQB|ZiI?;M~CTp3Me9#Kn=wl{zIp>r!MQd9doeR#z4wr#27Yu^53XT+0 z1M{(qckkXkN&6Y6DHrhoobK0T%0jzmkpZ`CUcZjOw7T}6aBNt05Jb1f?zJd)Vrj# z)R(wDg|9UsnQODV^LE^dTYMHAd~phAU1fQw#Sq3KVv{ISk0`SZOQw|6qF*-JF6iEk60IZTb_Ud*pncYu=jBNmIjZp~py5->Rn3pMpr-|HL1EWt*t+t_-rz zubt(9*Aw(#H!)jn z5W>?X<;uU3Sw~yYXGjXxuh9`Fn8K>C#ba*;xphU-iQggMP=?_X%yCluup5J*Z* zOT%ViLU~o4gxFBG5IttDstX#v0SR#C5b!N9>}OfOuPXYa8^ddik(E^u1F=*P2eFiB zqLg`b*81I~W#;g(vZ!{Q=>R!U-|2nJVDy1N22~^3BeegnpD=48eANs_ie&&tWQe42 z3;B_g>7LIF^9!t4hFRJma}3d*mG+O|9}H4_`z%ua4@BJA%ZdI>g+lES$nGbJFj~5z z!14|taPBb2;B1Kf`ka?_hjq+o9=Q7(Ao>y50|tB!pfxD?&>P({X17zl99|MmU6 zF(p4?(LNMDQ@*{e_#Vm}6YCMcnt1R*qZppi?O$h=#)7?sB-(YH8zvtg&{0K;ROuVd zDxOVzcyvSZ$rE`#cQK_D{5Sni@C4uz@s)K;H%T^32kL!YY-T6>rKd?cCvJKd zb{I=QV3+=}du5CT_fvhA7nz@BQ89>mVmE%`qQLXYf4c-yQ)rBEOL%)e2DzDa?@%+* zy=-!SeM$LlJ$M?H+7Xu$?wix!38_|~&yM>stBauY_Nsg<`BW+Fhpb#_$shRkt-N0o zN4!kHdY45ct%@S~MTvS0Vk7;!5zAH#iy1zT78F*YUo=#0Y3(#b@*R!grJ2NPvJOdSy>|xB`TuVEetj-K z2Mc1F$nofGc(0+rjSlb(%xLhIXei~QqObV*VxmgqsoaBi?7aRguRwUOPs-TNN!)Wh zP507!50{)zTeQ;f_+FCJ8y0mT>R~#im=y7-`86TTfH^nn0gunk%@Nd%^f<<3OybZ2 z?P^w#j@1n~=@O$~29$A3E2Vt`Y2Dq@4}_T~O7){HK7NStYYN6^S0umLwcJc-5VJA1 z2_hmuO?aQjZt?9;eBnjrcz<$2$zJJNf39^bhibcdn{!#=?rsmemL_AHLxmXU;wKs+ zqQ;pi=+M(1oL>o@C~imhl7SQ>}3y(jwOBh>l5 z=)Dz*`R-17qEjMRd(3YdiUyHR!9gx^Vx>9vX6@ctu6*CXjH|&dnW|oA->` z7`ISpSjlfmas6g~gs!2RPSqiP&DN1i>@Z1wDg1mT+pLRVoRXx( zo$q&5Rz+a^hAZtHL6IsPjO*K6y&PE3UYp&H?q&nAZ<}tM=`MT)kKOr`Lyn zR)Qs2uY-2DV1K2knAj+hSXnnAMm4xeB6UKeyb`XNU@uGtb*K;95EIQ)je86+r=?J^ zoGM4MMLz`9ymZXJwVFP|$iUhU85=8Q10)<_nGVUz7lrkz7#OA2YGOEx7Qjwp6I2)} zFrqkd3o}QD5^@XMEq14rl|6ibybZ|CWH2x=%pU4*er6($x6pXl9nkg;L9ZaZp*kcD zWj9L6*f?V@-mh6alvqVK!Nhe8_BN%R}H_A)&~301q=|3-ii;wuRBX@V0r zswqT6215e6Wm9}ykfyn6Uo2u8OG@k!rlT6E42sF0HUCOQK1lpkIC>IMLc_2?i1a|= z6N3i)K4%>7P+lR#859ty3WrGJtSovAEJEEVvJ}!@2Xe)vU6hPQA=cN)NK1bN zwWC4c<6!w28afA)COJB66!? zDaj4XjaVztdja2B+N0yXXrne&Ytwg13+_G(J~Y|yZzj~;y9ZvO5%Xo$ISbTLs@X6f z-eLDsh+&ZQ_tC&h;H{WLho#Q&$n`O|C>q9uGRpQI#E1ur!Fxt9dm|Au0ryOE36!f>bc8R&yj3 z+xY)1UeB#es-`GOrrmb4{luX4?|bCEZqpa+9~0j+;FA~|h^IDQlEon)pc?>)t#xJT zzm*3KF^clk(YroMi&Y0AORwS()p;q-Mk3jq@-p5inn|A<@|6OGU}anwgddJl6ejM?I(-{7jB?k`kyx!=82cHSnRFQ50MKXp+f>Vej5`e(*`t-dr&>Ux zaVBl;ry?S-PG;YbWNDNd2U5rQH3K7avKv1bV*;17Hvvc zy1({}B%dh>MVxjD7~MocPuLcqmz)`x5twvq@qkYyBHnHs2@FLLgZF7SCb8JZC(VTu@AV%mz%;&8o zXr^r;8-4-U4cbsJ2!sF;{mbim^ZhCh1t0~(9J;(cJOuo)+;c+@oe&2{VzCHn8%V`6 zz7c)95ciLGh*W+Ys-8datYox3EQ~g3H4#(3EBaqOEO->}WX3e+M1@W1e9J@LKD6ap zCsb8idW~3S)D@Ku=G?L{e7KRBgKXszc|<{GR4sMAq>7W4HW1|HRSIWiWApIqzFZ3f9_PYJ3QTsQk121AxeyR@k+#so)pgTmMQjBsvR zjg|PJk_hC9utv@ONbnFgC&ZC8e&IVOrA~VQ3qC)wT?bRUc1VkwLELXQYYQJINmYUI z2rNcR)G)vi!O`Dz?7lvPo_Q6)5Jqf%@F7ajFHj~qZ!qx5FZr{8ws(ngYT)uc-@=FVz zpV6I%w@tSQ-9mT}D75+DW@FSp@lT`BiF+S5E0^!P**d*7zu)-{K=>qvFpIG*N!lP( zTPzDR-&GHZBh}epnI8zzbs7Xm+Ap%fCK|(?r2HB|x z<6E$dvVS$GUP#uz9j0q7*Zzk5LPMyFT<4&TDK`2?J42;}o^4mEUhFg8P>S%qRzqt& z;^;Ll8_y)c%3?vrt`--M4i__vDzu3wk65Cs^=xffEvD+mUNOh`wNjRhriu*DFh4-J zHbDwpUKOyhqlpsFvs_*r@X=~9KGIo@khm-I@YunO8;?y>;VBqb3Nm~< z`Cq7m@*Tv`TU7du@mW?g0Jhrb3oR5P8<;vUX&&}{a!x660Z-;UZhFfa%t zS&<5PU@?kz7-=`}VM{!IEJ3x-OG6_eQg9|EvUe0cI5?=RWn^hg=chr@de(CG*bcFM zkXQ?ydIJKyQlhw!1Qvuz?>NOZP2$%(2#`y~kyd%s-+bu0&V@|@X`&4z?*i>C&rE0k z;z*Uee}(AkoS1^VxG}ABzdcRF#+<*QeBYJnAVAJn14*-D|CRlAHv zc{Sy0HNnRVYRPmCdY=1!TJe3@pjyypT#^QV7*(yeP`szG$%an*@deCZ4eftNc$>7TA{DrT}1g)_Z?IF-g;46h@C?Qk{nlks@OlDJ4}!wkdDcvhdZdFp39J zu+c#Y05TG8xnFajug6rs+3-%->IJo?_6DzM5Smv`U=z?lji5$FpPR~hd|+JwMeU; zq9o?VWO+O)3D5Fj>lbFJm@2O^-%ho>Cj(&v_OoX$Jk~!+#sNYbVLu-Hv{s-{dQ*j3 zPeNpXPmI@Ty*oUdNg()uM*5NUn6IsJsVYMb*mjZr@%|8FG07#ryE-3>qGv$+GGXu@ zr=RcOU}UaJcq*`xi+IJwHp->O3}C#NRf9p5$8jABrfH!>*b{TIel$>2U&C6JT#kp! z3r)0N<}e{6*-C2bL*rOf2OD4^;4&Bm>|BcD!HAZ2*XCn49eoG7=yK{Voi@Ldd9)>1 zuPnHQh4z`Tn_*5rR23*j(5Z6v4k12|Mm^g_OHa={-fVx|2id--lj}&dz@Oy4f(bf8 zRdpaUenHWSB2_Zw4>8GN1Z!Uaeiind5p>SlR@&s^T1e3F!|6>9abJCG-oN@=(SOw( z__tWC2E6zW9ShHy(r)fr4R(ZoQJScz3=>Vb+>IcdOPC??H2>JpcjtrDT!&t3KyYX?oaz*xLkt?u9p34 zcLB}}+I>|mCve7e!1Cpf+=aJ3Ijl{MomJN~HUW5WM*R%NKl<0b2W5oB5FB|}vB<1d zE1U@{(OWxPo@#*V5vau~SD0&%)no|+htL9Wd!r;-9#)@OYl%4>;-~MS6CbT(b`!K; zJ|Qc{S~5E|wHQk}-wpDsw{_p7cv+hNmbANBpIgX&3GTAlM~s74~xwr0yY@Q2B{J8l_|F^CBa)|BCvs+@M-tQP73)R=dbt7lN*#3 zzAw+jskd|6g1*nu?()!z?zzSgG^f;6VSjo-@v@DC=|)eI`it==k`gjD7CY*wm7@J9 zObsG{z;Ay!T+ps8*;A8>q(D;?%v1tSAQv3zX@|c0;aTrD)U>pN`CzoS!hA|NoGK*q zZTIcUgXRM2+mEcfqVluX(n1xCwqGv9#S9duiLV6P81e-Ynl$q3egBD>8+Tr8a6=}E z?-4_-vDrvYn#jw@Q?AGE;&n9cg1hT>&rr}ZfBs~)F_g4~xK>0bUux>X26qTJFH-V5 z+4{))uJ@tQma)W)eNN<23XWsv4{_Umiuv+jr47d96s~mUtwWgo0ni~4PS8^(3#>6r zo|;chZ1sRkD|JYZ!NKO;j59e7I*-pRd&j*a84?#On8Sy2jKS7mo#dBi zEcxoi!xrWih>#{t{PtfBc|z7#>GR%6y#}!xQ$O31_u96&J^S`HcBKn$FjddJ#V5qR zC;CE_3FNPHTw;S8;`&bSxH&*x?sb@)ye6ks$<~f-I9-(D?9(7u+y-sV)CvhpPrz8u z#CW;Wno@sYzUR5@H|E^c46DYamH4d&C0P36?(&e;HHjuB6fZC^h_dZ8PaJB!X_|bO z|ETxCiEhIqz{ieB!kLhwQSuidMF+PD-jn1lj4-3)W@!3@WSVaTBfEzFj(9+^J%-!rN1fy|tVW&G3SM+UcZUfvVnoa7 zeIlEwaFVvAkoCY4*Z@C@gsSca=PsrK6RmU9g;HoiKWZ2@W6 zUEJ2Z(cDJQ=Ka4_$&TCcPi&u6CvG@*{g4_c!dkQzdjl6+pn9arSx(MyWzFd;Z^C012KwkK%!N@%f z9_wac04T@fFiB#NmF{w|&W+!{rBPxsx)K#{m1c5s@Rg%>;kdpRVQPc7qPn~zaIvQ_ zHL2b;2*qg*4TD56uIiD&@K3epV9<1DU58S@`-@yc-rdTD=QD#<&wo&o0hY&Hqo%1C z8!JT^N8I_issK-l^=<0H*A1$FEl+?9b%96cce$Z_>HIwjyXj$L(t?asayvq z55I>mS)UO*Kc7MAbgKc=vk%R>xk>`!>vtsK6TyLP_m-kTcAj1Y>oY&HH~$#p6_?aP zc1M1})ns4-N{?l595T*kO#zi47{RaEe?Fh3AkgDBJyriu)1BmFx?2cd-ns7F^E-t7 zjqQSh>|Nr2chi1?QB28@B7Hei7fXpb!oLH`rEDg>8FxIFtt{O)OQ^X7ZLAfLufPUQKq>Y zHg)#?sIGZy|IKUl{oI!j7W{Y#v0%>hANNZ0a{l9p$S844vs8pd-?I16=h zRI|klzcj8;T1v^vrbtQPW}8=+93zivgRw~FBvwO8zo0(nk&ox=gB3RCN^%Duf^eBk zb5Yp0z)#$|naKP+u+2ZkMW#O--DxLc1xEMwb^YuZWw=%it(y6}?o6k` zBmnm;@cHaxG`FO_4Ds!hyckF|@@~^-`5Ez?4Qs$m*RZ-`+le%YAT*h4aa=je$P=;2 z&%s<=Ol0soB1+^mf~~z-Rv{K{(D?)wj5lq;HNQ{=ACf1)oqsd{j@zjg9FQ;?L|rS~y9T`Q>7#aLWg>MB2P zBk}y8!In|2y4I<=U0Tr8CJaVvamLtg9oNdN4NjQM#SWRQjF}`2JF4uWLvpF#{gvYX zKn7ZKBUEjo224K@cw7$2K-Zs}lyY$i8Zb{_iw@6NHJo(6c&2|5+;%DK$MsF=#Bt+r z&%E&>DGoV*Gcx7z^0s*G@|nf=JzNKu;nU;mIwpcB^by7!gEnBYmuIU2h$Hj z17>j)V}+l2_mp0~%2C@pF-}{#3ihwxBg6C2?@4<6x9Y*Gn{P4xqOv5Ih+e*0uOu&o zKo4>Br;Z8fen{qljp5!JU?od$9PID^AqUijAM=ejxF7FGYVao5zCW||%rA2Wm6%$-8k5;qh$?0Mq~mMf9Q6^(CI^x}ij5ic!$bX)2rPPf zKknaNd>pXLsQn~W&+{UT!}3hYUpTj)B2-|LvW1BI412uzGyhX_b7}f|wo9Fb^WNQ= z!_(xs;_L~hGnt9YDN5f?8hgOAq%<+HWgXNRA5CiCJAne5^sRXqLd$1!MzD7{trfw8 z^VxB8Jd(%WSssu!fb}VXeK@s>PA9`OI`)zy?G@iwV3ZMG=~Vl(ho~aO-&9ns(ypFr zdnS$^RXVJh{y(nXDy)vJi5A@iTQ~&Qg#>qZcY*{+aCevB?ykYz-GjS?;O_1k+#%@g zZ28YQ_h~)!%lc+_&8iwTYK*I0xbTU*hY4L_yZf__!|}FTMNt$YE+N|q?+AdC-qk#a zFVbMKfF{z1P5s?d9s?^&@cg_u>!e6iCg=*&e0fy;$a}XR^JDT94VtpuWSx?E3G@mi<`;j)vl2io?7i!-lnjp%NwPfth^;ikz|L(H{N`u4oSSGOM z{=3fOj`!Me+xE5c{uS7xl)P+7$%vC(E!J-1Ej2ulZ{;4%5QutsIZgv4jLH33{!S49 zL?!@07e#FF{$6OO?h!C2$64&}*+3z_=gK1x4h@UgKa0YQZV zJQ2{STXb+arm56rnaUxJ`1jQquff;$J@1UgXkC^#hWj0C%Ep5ar)rMnQSBqQt07HR zi~~vAx(p8WI`UgvTj3i3-KQ_u3rJ{-SqI&nu2xu(b%YD{>hlh|^6k#!b$|sCc z(63yojHf<1-~${P@XFAwy`cKQ%PwDiV;n?NT}%Kh3jF1k1e()z-gd;XsQ9i#1S)j; zs6lq%5j5*+G}JrtOd(~9VzhN>v-T^?`YWl`Oh_4Yt9L<^pZB4ZyOXiS9xW*rO9UtkhftR&4Y#Q2ua=%JoTryx>aaV{A?j& zc&&|f-aR%1blM*+-kO4Z?N43aij%mnr4|MbUy)HtoJOVa376RM*bpMJy|3UnNlfdC zRHtjU$UX}mjAs?EE8xeFR`OiLUYa5*3}8x%5b+&C|52E*!7#b|k<&9<=0vOglDz%y zvF&_(ZVuC=2lx+)1Sj5&VfDlAyWH- zyDSC1{kwHQw^Tgvza3pDq(1>#59iO#aH(*h& zqfJ*D!BU+nO!x>$y}~?7u4Wv41ax_bIKAI>(0V^T#yxn7k6?|J>IAqa#AV2O+@0kE z;^EYs^?Sw}*I|^0d&xSJPRul*{VsHTb_*%FDl?J2;iO!E^B;zf<}%ay^5o?GLd|Bm zoRFq76>4Cx^K)j7Bf{`PVfgd7`P2Rwo(V96!kfXA!wcI)Q_%qUN7gIfdpQl|st-%3 zT5HvnFPlDk?b&|Q_HYjCMqXKcHJML&$fzWK_jT2E2PpyITlj>POyc*+C1lO3wJ4EW zQ0#vyc9yd@JQB87{#ES=P{Ss6a83`)eO2!a-&jrM^S!o%)oi6D@Uy71Ky|iV_ukwX6!!o0_v{EiruLw*+ib&K<1$gSoPZtr;VsZ5F;7g0(2o4}SL@1HLqf z)~JfVqr*b|wDvJb<*U+rNe2z0Q1kRAVwe&-YoPt&B)Y%w-PeV7bBT%>+$3lPsg|1m zV%shBb9{baZaO1aa+-=Ov{Cd)4wZ@p`?U903aZ|KFZ@GKJOY**UCxQ{IChNzeYqOn zXhGh%s}`Y|qOmzPyH8ap(W6TZA1Nzo$v1Al5B%zU;wiaJ=Xezdlt>Db8gPj`E@4L! zCdjJpDFgtRvQ&QaSgwZBYs1^aD_JDb$7xtBC& z&3t~g9{CEGJmVSyGQY>9*w^<-lq5yY@Krz51WdrE(@S!o(gtd&^?&XYzyx-Bv}yma zv+JPqg~RK5Om&w=;^6!|OTp0ry`BYjVWJ*TS-d_+jz;bTgpy+({uLgaZMcL!%zBHIU_m@v3pJG$~Lwo|pO(Rg{|=6Ile|LLBHzeQcH zD_rWwjF{L~`*d94h|{^U0AUOl^@#|IXM;59Mt(T9-W*3{npNm!Kn>ZRbK8V$eh9DI~ zh9H-oR%$jDq_P%;I+UvHX<3v!q^qXaim~lhRA_ONzjZ8Bh~ryhlY<%dpN9bzqm8#S z#ph;csrL$LshqYXW2wh4`%|ZyoWJJRs$TdVcMlbh?$lJ>JAHI7cUz@Yn}$0*@}A;p z@SBkH@{&OA%*NBM7K=H4nye1oK3o1QO>XC%$MaQ{u}@8Y&EM2Kh*a;mshZ_81tK~h zu4vWw8aq0yHPi0iPPCytF$d7Iuw3Aj2*R<#+dle{7WOqXV}xVVexj->gmQ6lNfybi z9|VUtn0?#Ed-uWh?Ma*E{_Je+<2nM|2d1YdZD$u}6X!dRWJ-^Xu_&U_5ic*V(oa)y z7K{`WV<{YAS9o_!IX5As>zqoQZVu)fjSa16bwTv~6SG6pc2=c~0M+g?0qegry?UpH+V^=Fb>bIR9!F4Y- z@}X~RVA@dEp(r*q%RGc44!Ox_3LZ6%JMnYBN-926aKHhCJ`Su@M~$Q(vCLBgjwHkq5H-%4yQK@`3qGkMgNnG*8>~h{%jiV zd}f2M7{AYyL8jXM!iJA>mq15_a)``^vIvtM6dc}pB9)p$L{XGAD=R)E0hen^7WpD3 z?n-pUV%xzzl)q6SW9Q@=v!WU{4ju`~G2ZB!g*y){1J6Wi*QWu*c&jBqu5y$up0qOF z=SrI6NYE};nh!wid0g$fq_H#z72taG-E0=Bv|}uQZ4%G&W7ka=E zT7IGTK7Uhc{}U0rJ0h^@rO{MpqSx=setoglc3+}>oK$qrvF4Y{?9yW}GX{1J+$@sQ zRO%Z}bSz&3RK%eYTWCt+XpAlJ_D(0ikMz?8ZswpSFbfNzqC!DI2ormfc4OPDbBZwO zL~XL%iGip6jV?Y=;G_f_hEsei84&EaUC+_z@%D=PIh7M+9w<`l<=LZdK) z*(Mjp+n2dZd}1m=Z||5=lg#SO+WoBG$7-KU9v2aAGkGkX2gamYZ+@4U2BzfQSE zr(3u+tfPU+#vkE2x6D_<~$-G0`PxC@r_XSTVNdJQn#boK71KkCg$ zb^x|kvq-~Q;FT6j&Iaq1Dd(s!v$m0nWQ0$$-F0?T)AhzP8sT8Ztg`_88Iw`c)j)jd zQ*zsl)-a_H3jLxCmM1rDs4$Q4NR6B(H0Hr{2r5k97|4#VtgZ8_K5^`Pb-AVyI`pyZ$F!?{H?*|I& zrfpbydj3Tw4)QLNQiKrGh%MM2h^Z#pNlJ_>ABgNzPLGz=*-INLl`S#gj@QOTv|`(4 zI;vwpcg`K*nG7H(P&@Psj9|X`rXLO0Q=?B9$=ZU$!aye~TO$tcwAUlL5k?J00<{n? zhy+6h>Mb%2VX_Qs(2on0!b(S8i@@Qg~OBUdE&G)c7WwvW-Xp3tKxVz zZ}NOq!-Nr{X@18*DkvNX0cBZsA&@>TMa zLun2zFH~t2$|nu{Snks!XIw@j&^H}Jep#H*9jO{UUvizWU8)Ehk*sx>nQw?-=SnSh zz+li4Gk$+m)YfOJI3uAOpWT(s;4sJZemu7hP}#k@BifUBY1qwr@85g13PbHZoq(G# zN$b~1@eKYTb5eL;T|O#H$lE*~Y2>AVKr3>w4UJ&FS7z+4H6MKIv;XP^PIN0P{W3PI zTW{2I#~&PYx?EqR*<|}wwVYBR)%=Gk;1$4T|I8I1txf52);?Qzk5WXk^MqBSTkBb9 zPO!B4ZH~VgVA?M}X0hgM#5it?O+q1A%CXb;sG&Vnr|&G2G%a1E+lV2s>0%kxe2DR5z||~h}Zlx-5ORB2aB6C)bc1N&)9s4rm0J&7jr-y}rLUK3so! zx@isEWV1jqoC#*f;I>m>KTL&`;IvvU@b5N9ISsSj zQ}cMr_n`-8Sh1@P`}8a5^e3Uz#|-)GWLb> z6LH`su~0n8{}Xe-<@SygmFV7%i-H77vKw7S1nch`ubtGlqMI@6X(poBJKXb81O**R z-3Us)l0!lmN&>V$D!O32N8p$zt97Fy=rsyB;7rh@gLynjoYvf{y{U`#E2fK;q{ACgNQC)KbdN($7 zuY+2)(s#tu4}4LK;QE(S9}ooFog?|(Z=}*;IdFP2P68HUet*&nfpn*etwUr#zQnN@ z_1YW8{Ikpk7&;T%+1%oZN9RfPTn1tw#ul_+gVcr`a-~648#yFjD)*a-u?a}HgrA|< zj2~qQ9aM3RouEa@mL!Ktg>ppz2UB^OY-X~)aNJ%d3ma1Ov0pPj>49Tu4F&Lt1WU95 z5pz<$y-!LVJo^aBFEEJnHmF@A5IobIzyk1zLL4Az~GWq1I=K?>aEblo@{yOYK=7M|^+ z)02=`f9#TZc(wPDuVP^;Pa@yEZa<|q@+hE;88EP1ZuZVpG_vpznIB7GW-a{q_I+r* z%HidRoyYsRL|}J_fa$T!!pY*IfsE`V0Jd|s*3EED_fyn(W?NZ>gt$}UOtC^aqRhte z%OoE*A30P=UrQyUEIW(Qpp86cXO=ocjy$I8IU5_YXYS}6Y)I?g_XR8rdMTsG_Tw~AQka}H^ytI1olFKIy4xLT!jX3V?)5~PiV3H$v zYM{|Vqz~(=px6f+-5FM#)aw@+hkOYj%AK@Hmm32+djVWDX52nOca*Op5icm z0EIVG)Rq2~&spS2E{WREU727^!&B7b{>A13>#C^f9%`&vxww(U<$d6u*`6+_2ibR* zb-*vE9w=S@tziE7Q2^8rH;TD|NcVWO2=Dy2jHU-j(1O=KV%0i)NFSo(BueDsDUghZ z;>1Ug5m{)Y8i}+0FW{Bv;W-GVBzw3XD`0UsvJ`V5xd)BSdQqMXd=7iFq<_lh1tIn8 zO*CG>ZPQJJ3BVtV#2gsl4kSuh3m6jp+&@14IF%Ixf4w)7?+cpP9L|GAFqfYe^@*CPX_ zlY>V?_ zl7gA<@H-PaIXn(K+WdyZ6HDMQ`mHkt*6zmeIZM6T<|&*=pwc&EqyOu%P)SfX5jaIa z99>#xr~&#AUoT!ELrS#UI6gD^2-KmAGFC`;2bY%Utntit+1ln0mLiFq9yQ>b0simd ziyj;1t5l)6gMA^$XZchX*_dK8!T}*u)gC@GUzCcZgpU{KIMFzmo6qkdXA9gAv}5p{ zam_=J!P0g6qlTCrjbxm*E1mH?j*s#a$oPUfo*GTgjZYlJ9&8BTQXADrI2<=8Nw5?RZ~52U^H=9jc}uiI z##@o`<`H}NK(*%}@rt!}w{n@_3zD&P)_|`E!$Z9qQMX3pCP}AD4FPc!(pJ+uk!^H8 zEm`$f&1B+vFcaxsxcOZVM?^Kewg-j7%kEO5pr5*K@z{!XC>1OWRI3>chE;1ymW$mA z(qY9x!5dTSJLX$?eHiL!fJgga)i?K%L~2p={`9(oZ9ZDv@8M_IQAvO#i{$N-VNYIR zQL%bzg2<$OGU{rh7v*q@K)ygg82PtN|o zLj4E#yDfW^5X1K(c3gsPANc-$M*8j@V>a#ABXnK&LNp~v=jQGHsE=+}ySX2v;cn88fTta~ zehK^JTPdeXg<2ZJ4H^myn;vpBCi`<oK|4R~G& zQr|GCxlBB*9aar!w1qb_|Fp&tRDqjfI$@Y@)>Py&C&o`2n=dL?#gUd2JJ|_nXa`cfAfV~zQfqo{4ntdon_(Irw_&aeKa(9&SDmhDW6IH7-Kz_Ei@HfRLqbYjTEd)w07r3588WrMm#r@*M3|^lsnlJXNB_rd4ibEvx6<~4QjxvS@p~H98c+92sVrFt3VT$yWRQ&R zG<3t80iCP=?7FUtS%F>tY8n7gkl+AIC5h!_#gy8tX_nST+!Bq3hU#_B{7z4u$%+^z z)~NmyS%T%EGbT#4PCsJb`|BqQqlE>b68M&Q?yF7E{X+QP@%;lZosppubtyQ=ih!Ko zoG|46jW0^5&lEa|VOvYex~JcaabSM2B@T|>><6~gD?o_V=)L&^JzW4A-qHT(~mNcqWbTY zTJfP%Dkz>)E6UXfDxNR2xttULCfB8s<~j=lfV95ocKJ{!(b-&bYfRgNSb--wK)&A9Z#oL; zj4}IZHy~c-4J3{MTbOJ(x2<;W`s?IV;qfi=;LUmKWu?@K;Uj^|EVunp+>UdbUXNL+ zR*&@MH4+boN`INzVR zusqQ#mtn|d9X@oxaKDG&9oJ3yar&u7L#F zY*qoL!sR1B#4?#d1Zjd@qxJHTz(Pwj;@D))02k`!j^Sb&AxjqYwF~gB8 z#;Ynh{tFxtfZ88pvIMV-4-~+qay`Bka8t8nW3{FeRcQXgFfitc($&Nx+d>dim}2lr zeg)z0e$W~bH|N28!~FS$Vo0Q4Ok+_9SvNMH$sVhdIa&buMv>UCh2oPBQ$J4yY}Z9t z3!30=%0M&Lq0l(T1qA+KPR;b5k#qWleh8GYbOgA;^G2A&QeA}f_ zjyHagpw`aL&NPOnE!iFo)FYH#s!(j}XaonLU~X5!)p^8sZ$iO8cu>CFeFGE@EgX7v za^=6}_ve^R?UW1!VpRB%`>az#RqVEe3W122T&awQP6h)5u#M(NaqG=@+*cO*wMCdB zX;mKm(>NSZ8LbIn;DN=qLZXPr0D48I9QOkA{n1pXYv}9!X}~D<=5@u!RR&b0*ugPZ zs;hwVRNeKO=iS5iUPaY8U$IVtuuQwU>f@NTX5(bNdAxSChs?awFRD-og@YOc7^MeJ z`g9oP0K*dNr|8eMM%3DNug~00`W@-(C}Y9Jftxh~O|U40RV?;_sYz=DZ?)k$443?mhbPTd6We5wIKyZIr8B7%^XW2~N! zrA0Bo{ip~uKo%p{A1rSds8^z~bGBGTuidqkQ8}_XUs;6ezveHU@V|Jn3;H{rn9!uU zp%HfTpP@fjW?Ai&od~*0;nyAs*(5Ig{)_uT4lUraqt4|V;IcicE(>I9^U-K7wAo&Ka&`QYMYM zJcEumE`eRm9Goe`!^e=A=Te&!tAcxe5~vk4&GYp($*$*G&3jWT&jdPLK(%g=Do;fG z<23&*6dQwzNDb`|wj0%IH4$~e-+^)st;)$~_4YF++9t`usj#*iEuyTLxz2YPzZqCz zR<-~<7G=4{9=(lu&RnUu$U1%OiElArJc+fs$~0sJXnsp&TPB7B28v0443N*1%5M>0R9?paWNnWBMY4mScJy9VvDQvL<|oA&<~TY*r17Y|-( ze~LvZ9mVfxe`?81FG(RXNI?~&&A)1CHbVl|iFmXwqP_}gSVI6^W7MP{p9w-*srM{=68;8Qd zawse@f*Pp?hY@h4T4G6f8R0>V1cC#8zni0&5ADEQB*;EK;zh`#ERh1wKah_DkdPPu zfC0l;;+Ha$_xKzC-F0zM)JfgL zp^OcO@6%7}=tzKBGW;`aw$)lQ9+u2)iYn zfwNEryQAgg?ZpMp*v*kXro-lxnuLpP;*?Z8T&eiH_2^@((=js8@CFVi(HDK_0h75pD2?`IbZ zgqK@3T+f*aB+Qn}^MiE#8;~;+PbM3q=GT2TV;M}<+q#*xm!@6g)dMb9I`n%V@xF0> zOHL+#FS($4+ObD__;7lHLsF()SJYZsn<_IJJ5A-h1e418u~AEY2RL>G0&XcqB##F9 zXn(Yfrvq@=jrbSs5dbR(VDD1vbX&)jST>8E7V zE-%Yp5?*&49|`7R?(9`7bUp^m$xY7#Snz|l&Ubk2X2+o;iF3^bUkF1ABVLgUFf6iC zK85hcp>~Us_@mn{)66;8Yq}doXTlFMWcvK+UNc0eJO;57cgD0pK6BKAs44?U;g~FE zLwl?P{25{&|3j7k9tsJe)`$}Gd1vnf;|i<$@G#kbXGI{W&nBCcgNOuDFkjLxcLo*! zjO@Yx8Av`f%ft`9i{SxbO~f4{$h9;Uhs@MiEEKIGC{QYmuzrcwTiIT{NeD(%v0J{C ziv69N*aAW<5?YfzJs;E;Ng1qWMS>#-;J#oXqT1xbUeRv{3hsE;?EKaX;c%>Bz%UYq z!NvZd6G;X`aDk%TIDoUP#&{8g{8$4ye)#n8V0LLpaOwNtpj>Ey)qGu8Tar}j06I=A(uo$(30Hn33LcQ+j3LxlBHEq-r-jn|)2(8)$J0xzm) zEG?gi@uXQDK5*7&JR5*o4^l%Pg*z13t(W6yRH_OL(KU>YsQNbNbGUn`b(nRc#X)Uby0mVfWI?9brRx3s?a4h5zE2nA@>Rr!qLdPG)pW z)z07OCMk;4b$%mL`FgtbMp&dT3cK9GIs_oBE+;pHnDi=IO>OosJWghBmFpc{;&g&HghC&a85fdqCCL%gD!s9bWKy+~TV`B=O0qQGrz|7@`Xr#0HT%oK z;wAd$y6ByLbp=q5C@e|=-dtyG>rIJns}*#s``G1Ek4zB~jc5&TpdPZx{!OX;QkYw$ zr4i?Atp$Cn*8xYwAo7L+hTl^m>DM+I${4~Ts-%uZ(!P%~!1R$l{!izX!DvgA#_EvP z$a!kvJvb1VErm`Yk1W%d&rAUo0~QP(&*Vj~s00fA3BXJq9+PHfUk=}FBq^%Oi`TZi zdAgwr@mGLQ_>Z5ucUmx z*7OqZX;(!0q4Wk(u_yXI=-q|c52l17Xh!BvybAs7;}NcfF2(+e5o@V98RBzxtwjuB zIHraTZqb1uU2cG$S}oLeu>X?CBJ(P-Aove_|* zeBGzl$n%l!e*GDS&X%JKR?rR#>nsk`gJO1kkdnz32q20{Ds7PN)e8%4^^@zBVerbc zF)&}Qfxtr2`u3j|K!@jT+{Ln77Kf-0O33f4!jI;^rp4v6{qB3YJS1(y^Q@zEm3r1Kve%GsxCm zh4`G+$KI1Q5gva7LN6NeS;d3^Odg(c_&->{77Wp8hn@v8w0(zZ1f9ic8az^ZQVY@c z+1ao=v!bV8w%n%K(>l@J>G;!IXGi99#b;aqNVyGQcr$*&lKIA$x_;2_aLa4^_8M^W z1cp6a;P)`=i-RtT?5zF>61Y4dvGz$ixUOHXb;55@<-D-J3`AXRaFC==XIXLA*lF{a zS8zxlYyemh5>`R*7&&0~+Z@?c0?vC4L+>K*4Qxj3th1-ph2WEmhPaeU0r_k`i^obo zd$OCnR2;%Z1szO(8rRuoE|2((>M}Q3X6p*vP^y-6@Is2UNs*P z5HeOCG3NThL_t<+~N4PubryH9s^iv!Ri|)UL{fHAn6e=$9j?U`SX8HnD9KEAmH@0U^ zj%_4_VI-0jF6^O!z$mn*UayhSA~Evl$oHIRd`v$KTnO@^)kP13roxiKN&Q#&Jv3dnGMBtpFtr8>plDlxQ8I6_!wTY;!+S;RCfGcu5814U%!Gs=!|3N&T z(}-#9%uG+9;aW1?ujG23e~-pP->tKe;LMC?F=vKjFzAIJ9Dd*VTy6s?r_!b(hd0|* zZBCFG7GFHsd>KC*6$jyw5Sz*MmFY8F5A5BkqDRl=rUqln6&9(W&q9}rlX!YCkeJfu z%BEs_qKntN*T!|76^|PUt@zp~;XjHnNyakB;T=`kOfzV=23kAsjcGP2g^X*nJMnt1 zG#-nsIG-806!DnHeE{rbo#PP;F08n(4r>?4q3Xz1AC_~3wJnzNxXI<>Qu^GZHa`7^ z$0`gt8gkE7^dEVTIBoZc2*GKj`We?pk;8gDN1?;`TB2hN$p|)>p-RW`Y&NOy&Z(Om z`H7jcz{Ft=0YfTSkFy&c^WPYbL6gd$Vg^=~v+^XO^Hv@o1PTE6vSTmvf6p-x>3?H` z#rFN}FMsoIT#zAReY5%Qa#a~2LA8SknzcsK^iOmk1eI+nsY+QEXf!ONFR{9cqR3ve z#DC>SF=IR!h|oBuenP;#iMpE{2qqhu+!yc^iT3AszQP%_jm;J6cVHpGF_eYs$9Wb) zflhT725~(0a%~3?4uo>+L1t`Xe^?7a0b96F_Vw8R1+GB`}9~Dj^8~GpM1AsDmA<)$4)* z538E1?Ov@JyUqP*W+BOgLNWNIxq(*>MFkW8KDG0=6mnHEV1b7q zwKP*KTh#l;D;7fpt3?1A#^{pO{<&IM^y*4R3Y$^u6E<8IYuYzr8tT+<-`vL>x#plNdU51r0rP0Kf~nLnU0r9M(_}+ zm(jK@0hhzZ{Yg2~h;0rFq9Tmrc$myJPscKsKhA)j`N49!*m2H z2NP4iAT>Iuym@E)h$CssZH42oq-|F@@vQl)Z9L?7|K@Z{Nf^ZU-I z-~9?HGSYE842z69HL)sHEItx;T1x-R_x?f;LVw5zwt;#%F%=!BvuK|om|freMC)~5 zI+3K|qM=3wMB{xk_%%5!=JIfF59cV=u1+p;14@=vb-&_hcs&eee{K>ctK>I4GNRF` z2sM8;J-=jgEBnnWfFY8O+U`9jGn~Ne8#n}kNW_z|Kgueo9xwn%MwS$>7`?HjOIL;r z2e!)?i!~-yWP6H~(<{6PiG;2GM$r7ScGd+a!UP-7B>XOo*RoZ)fAiSlE#D~hYua+_ zml0}*eOar#xxu^DW_5)4NZyynJA3!5n3xGjnok8><)6&Z{GCF|Ae0_`y#=|@GBkZw z{D4h4n6<69dH|K;vgN=fVf+wHQU zQYltYA%O)(ci}nV5~52YE7F-WdAI|4|Zbmnfb@+Vp z=I^p4xagG%K;*J1pU4!MgfRn^F=8cz<;8n&;6XGvl1QXL{ZLDsDq`~?D-Veq^j+2_ zNMTSv29HC+pT?0H1{eM*AaYNO6%G4?*3Wl&4Cuc42>cj7qumr;8S3?I2?9pBvcIEf zk3t9%Ph2evKLjDo!{WyOV&XJ?=*tL26ZoVz@dyOS{0(skfBz;*YM^2)5{}mcK;;{p z_2bwHV{VsF`!XXGU06EwqvTsBBqjK^eS^7J=ydp_L;OKV6qol@INekA_T6&~sAh z9E3=O0!khVoWm_hEY@td0_IWpo$2g07)JZrcW7}u1%Y~~#RCdyCZvaW2R>DxWH1#A zxT_j}0Osyo5^xKc6PuTvK)rkIgw5o4={L)3VEq>Y8UnVD<SmcMfCzY@hSSmE9<&1+hEAd#34%^%G$NG7o~H3H$FYW}a^-}48|CcDAfWgnz768hybc5kwFr@KwM zfhk_u9_{A>Ec(%(KKTefw_c1An=V%Q&&!GBfIbi}0Lb3sa|H@zF=4e=b0{r!6#U;M zfY_gKjS1BA7GEWY5#3m;=sA1ucVC|Vy}w+xjf9-w8n0ObYk^Rc!z|T3yYyUN+vBEa zOta~#rnEmwx9jvJNQj6%36oY;c>?@skw>V>sa%#Y6xI9rPv2XX#bZc!@%?@w6Cn0g zV;NSzdWfsn?y;1apZ>$z%TC>W$P};~=Zmj!iFZdF^TDyLN<50#Pdy^bbqEG%1hN>uYVZRiczD%xJAK0RFf(Aj9_eEdjCynv8#F{gd7eec6ALQO=3qF@A0Y zN?T7%5;ot2bY=cDE`TJ-BaHD=5mhG@HF8WQ)+@CDJ`b&5krfTI03B7mWdcg`#@_cC z+NVbUJ-z72&Y>a0;de$Wv0f}5oL)bHXW|ux+9Q9X2!E><;t^8$Q5610;dU5*`!N ziTq|2nr%t@W0`wK5M9K~9wML)04u&ded%bWa$XqHH3NyrfcjU7o44O-CDDx+SgC&Z z%elWEL<59eLcR#79YofIyt$+VBRP3}tE+SWX||Oo?sWg)X|`4tG)#KlOodP8E(=K6 zsl*;Y*Gjvb&Fb9mn3EMa{0t=uc)SDWbZVy5$+e_yOuWIpEVfZl)-H7j0?pxmrk2|8>xw`GVX%^h+VX7=L;;7;>NEG#%{G2 zqL*tbZRIyr%&w9O6lj2DqxAX={j05N?dCkYRUZ{eRcwE{P#~YeTZJm9SbfX52P^k+}@uc>q>NBoW zmujk0O9Gu@Bt_)y{N0OM@S_u4kL&y4OT&;1mfm$mqpYH_uG|^Zah&3OE(?_UbCizR z*In4zLJx%*93K8Qm;53*ytG|`BWW}7h{U1;M%|R%Lqy!gY>WAl{SVpf9z;ll%+u03 zjgIC=v)>7wj;1rmGX>Ath?WEOP6~Zzj}L&IbfVdXum|ljBl|Q9x9O2N%LUBbCkJ{C z)E_Z~{9=l;M2+9|CYrt`9yy-&JIN6+%4^+<#m^;hDU#-=%`+&q*SjCZ7-oL}s=tTJ z?V0tpm4<86iFL18z;F9vCffCf?yT)J#!x(Pg=sdvCON;gE`0D zv9r)i70RYTmtw*i5$E|^g>!)NV_8d+SZDKs=?9A|dN+l^0(t>>1!@!}BG_MNxi(?h z_x%|$?%=qWFCa)69CJ|o=`3O`h0{cW^&)ebwlKdY2_x&ucJDP;Tnon_+}P`#aoKMl z?wZBgGYi&Uu`sl&JV2IWLpul~gNfgf=|n;x{Di(@F!X1B=;fM+;aU-}iFk7r#R057P^=qW7h4MDg ze#h+oEK{Ch&_+4m*_!6fjXWHaHF$;#!KQ4$d_0vu9+{A*JCEtl*aqUoy8xZ?gmtGG zK1cRf?dM8~G2%KJ!MkhK7=CSuaB*?Q0J;yVQX7e z%2j5>%-CW49K09CbH>tjS1lvi+Pa=RiqJ78&PGnV6FGhJ^lKb;FH5qQ0bE(^(>3mUlkqHD(k*AJjif4FPE(P(@oU9tquCN^*5P!<>O1~^p6J)T zkIA##>@m>ZnM~*A&B5Du|{yYfaP7;1OfGbv}#{s)YpG#oX+hW zvM1KXpdZ4R0Jr)M%q0dan!+VX5vYjiMNfImg364Bx(5vV@x<0pFwsV;aNzQB!2fbP zq;HpR@g29e$#3g(B6kb34oy^bl2H+}wRv3%ROr)rTLxTafLk4CKo9C03f>+Jhhv#L zI;)P@kEh{2?u(#0Q_AD)?2b&FGZXSV$GxY#Qwl>eP#$90V_Oig+da0>4%(^(m!V1w)VgMPNu6{qiCT)GMQWb;%G4!(Wr zY;U15?2jyp9Q&lxX@Gfqbe(Ur(E5e;aeRLW+JMN4hH`!o`Ki85DJSCoe|&v)R8{S^w{$ncrZ=dRbc1xGv`BZiv~+B` zLApDo8zcqkln&_*r8~X_dft1^9rrtby2l<4_FD6q&x~JOerNTvDr>M^GZ)|zqJ>j? z?jKye%^Lh%RE6>g($$V2nJcXb7yP_Sxi|6B+_Dn^ImWm)A`9q;5TNNe#DpZp!XeCq zkj)NJy2B)*IX6W zQ-O@SD;pbm(XN#~;UF8qz^!lBx`FIqZAwJeMgl+GUT*|TK|AsATEPftl9_Luz3oA8 zwn7om52gm_xPv3Mg7{Vp-TZUZTOaDeWz0{Krt$ccq2pe0fr(h>rRag7@<%)Ykl zZj@K|)JS5H*lE^_twMnqcahc$sJ(P+)@-v?p2bEs8xI$6>VFrbM89CGvXFMCgl)wr zBRMgr z6Kujz;vL1FO+H-`&2eCKZAwSo$9drkk6$Mh6aSVMBL3usIxmGNb}laciWVbp+?UI~ z2F@E1}Z*x_AA-y0|~O72oE#wjwU?q}0>?rHfy7Qe7OwW|jgfHx(IiUkcAn24TPEg>JrQIGg_-W2g0#cc{xO zCk+4|l@}{fVh;1t?@fKI7=k`1B2&3e-N0W<;rKW@0f;!7ezATDEz*%Q*uFnj2{()# zY5p$~F;-aiZ2(qccDiKT3pb90Y6gwER8M8Oh?wJZ%$Z7$6eoj#FNn$Yree;>LN;Q+ z5hlky>IP7vN7}5xs=#?%B>AT``Lx9w6Zh1AV(ua~JD>ZdU)|7BY`+elP*>uM;8m+K zBn-xlVWx=<&Mp-4^e66mkNHmM*(N_?ai#wEA4cHnZ}4lr!%l)x&@1u zL8kMYK`a+|F3oxm&9iNM( z!6{OW7H;~zOt^J_6-eu$Fez2)RGQ3I=%qu|sK8{}%ar`aD~o|vyC%q$a9EWO3c|mf z?Soq1@&Rx0!)|7QT)<_WQiKvcT?mWp0OfssxWJSD^jPc}4TneoYOI`*-m{}vsY0+f zL>?KMCdSbRW{w&JLl9qmmK{#lNezy*!_&k3AjE3~p`E^534#_lL?Yr>MJEFVq5<)N z#{zI)EfstnQ24fW(MCIb8V+IN`pQRr=p8CJ81^w*!)$HH*pa(uIL!wmxsy^+`39jB z8IU(u8pceZSyorsg}My@K=XbIm#zNU_Wq1I5rL0Th4^^`0tTrm!e%fIRYU-*K7_cA zm$4_j(bgL`oRFDSDsdW?px))6&_yN)ZSCRyhOt!Zt8$D8-itbX4-hw4gY{Ywh&b>n zc@(hVPG<4QC%ptfj;0;WjR3t_e1Xf>?7D=zssChaq)@1Z$5n^$e^T8)q5+7|=&+aH z!Z9HviLleiv=)c+FQJLnBJkJ=(TI4w-zLx&Ro#4NLrRB?(Y~J`{U@}Eeb?tQk$1glnrWBB*LSU z>JxtB{~jfDS0oy&&u~yt-{`(UNLV(t`fwWDN18TH6`6e4`F`GRi*Y{}q2t??J+)5y zLVl&JR?~R5Oo~huz+!vYjrnmp94^YX6>qb6lx0ulI%-Kb^$&_;U_$VABQ8a!lk% zAsak`qaphIp9hbBs@`7Os6DW6k!I$;!%XLxHCb-0D*0h z?w2~Zh5ihNg;&c7?Ogal{XrnEg!19w5=eQ!5>#=y_kF9YmQ5=9tP;?Y4$x)#%?9OE)Dc1uWK> zy&qre@pQQC#hPn-5R7tSAPHD)FHoyiJi`87d!|e8_28&Ja7jGjx}QIi``H0kdFx$% z7(s2zs{Ph|mdLK-`MPH1$zB;^4AV#ADXpkTfeG* zOC@>ZIkxx$t+@Q>LaSqFlk8qY);55KQg?Sb*<`=6o+(61d*ihT98C&B@&@UmAG^1M zGDYw6vQ2k`YdC?BKrs*Rz5RXC-qXZez@I*PI3^(_b8I4)F*dn3EzR=FIXisEC)v-5G$J_KD4O2#WY3u3`@zzTA~W#Njxp8(%Mm_kr% zqOvf!5%a)XkT7KKs&QdsI&drn3Jh|$ej%Bo^f{c~bGTM#{>u(cX|@2qn4oAYeAlR~ z0J1P2n^3~K(GEf;{N9QnG#1=my&kL5tb4%#Ujq|tIzVA((>ZZJ>}u4Qg5EOAV#1^? z5y6$3H-h8#8v^n^mH>dTBu}*6+y^wK;tMMowwG&#(KI5U8uW(}an`6Sd~|?y6hHoU zN5FI>GpPw(W+(^8+o)HnL@IZQwo1xj7%wVc7XedmFogW@FQCzNLPd%4(0a)+JAw8@ zUVbAdx6d@TzTQ>9)-r@%qslLyUV}I`DeQm4-GHm^aPd*8#1Hd!F&(eSDzf4V%xs&NGrd?u@2 zv0E=h@heSNhZ(Kq)3e!(f^&kZiir5H3#ISO=+^afeuSrR0wmNZuk{gu@KwxOTan(> zSyq|4H(s-166g<7T1^fteFv-TG=;LFI zQeu|cFDj*Pe>QPr$Lk(eW(dU5t1RaBkjR~WvRpC_!RVR=K-vmTHD+c2EM>Qwk(D5R zs{zpQVz-)tSpRGtqDWxxZ3PwxN^+=7G0C&bt&tevB&_M4w?_zgX0@+{E7&&$Cf*bC zcujx!S)v>SPeW(W6=Z$v`{1S%=RwFxO)xNPzxCXwHCSPNqHfXhnLnS~g(9GEmF6J! zRB8x{{&PV^6q>|!J!Wx;Ac*2n*a*RsFp-2y@0*`eu@VhE4!va7Wi;FmD#Su*Sh!%^ zE+t&U?{G=6NG%`{vNbvhG>tGH-4JF>r!kYyH10m2L0Ii==0?18O>iK(gI;7qCPK#H z_Y>jnb^XEc+V;q3L{x(E;J)%hA8w|AZ+n5Hn_}U%Q6HHRuA3RQg}XgwPUiGNZ3lfh zPUU9;#{^6myLqEJ(QR|W&nmqb85ynaz?a_`s}OR)PM6F4-J=p{$4mxLUZ!i_I=F*y z4?!qa+ZCx6@#Os5%c|LuUiC0ugW|z>DcRjYrL%oxK*$9-BZ?~e-TS|b-+6dY?Z|tF zZ}zRB^0Ch@fD;QQj%&TGqWw_*L8N7iySIJD)M6|*s~;2h&~w+28v5xH|Ie0U(f&`d zwse9c6wmlL8Nh&I&6!@~8r&?#r^Xbn=#dZUAQH?}9iw2g>#`Y_2B)0gXJJ?^HfS3m zg_{t5kv~F7P!oDVLGC=PO=D2>M&X%aZ!dH0+)mX+XaXFT$L<^jQKem4jW8AHiw4mqxsX-ECdN*X3H zqcv|bha(H1JM@FF$?75Qo6AhaN{#g${JgCkVyn$^aBG~Vo7yWywJK*C$DQjmoW=#h z1y;wQGk~)3JyBt%*jKfQR>vJ}iicEb34r68#X-4s+=@-K>5$F4oYO-8UJ-~?40JLU z<{$pek`Y%xC702FY5*vu;11dFk%cVPS&`9-x+KK(|6wkE1o<2`1a<;!sY|pBRF<@- z_1OxE&jaq&NzoFF>(y$JxC6av1=vXGQ|9UbA7X~Irjal$j^>dsEJAOoG%_%C?rj1m zeoHr!-i5OT`QK%6e3r|Kg!brqhW)zg=dt^!$&$3ghD#>Kn^OfYdxml!spg7myeNHU zgx-q6aF6Y8RGVV%(!Kt~dp-hnYmF-~*NNjGveV2Fj-aG1e*xSGe5;bcGb|i;Jz{or z%hAj0Yx5J|LA1L^*pEk7z*Qm}nj5Asxr8NkBAMO9A&Lj_w!LMt{xVswJSHny6d)rU zQfD=f$CTge$E@#GMAxx7I69XJ=;r}?K|bxAR&zg0GU6OVCVP5R`10TJsUP&Q9`I4v zW{bwM%}UI<OwJdJv*OXZ3%?9V`?KwEK!UVh6l@F3sc7ZBG$xAKX53 zHvZQrfjL(H5FWE)plm+Rf^+WThKx=BQXT+$B+`T=v6_t65U>I_ZZkT0gKdnlgE$tB z!oEK87t{{0O}=bgNW!v`iqg6|rRCHYmFZyI9YbBLWp%X!p0l0)$*AAKA7^WXdlhQvhEo@MCkgl0I|DEZ*e!G1`O+Ld=mT4$tulYQm$~4(*zj4J30!uy*kSEM3*;C3JA$B1UB^dxDA3D zM)*<>Q;r#3XS=0oIT$&vT-KHrL(5M?oc%O+uLRnSCKfuhmd3bR#-B-Gg z!4uRoBatFiQOJzoesZnr%atTLU=Rki&rF6t!-_Y$(!h+av8}HK=FHKx+BV=HMi1jw zkne;?m>U%%s>}o!)bc%H1n)2WZ7*FuKeTkDoD#m7;(~AV<-Ne_ZKD zp3FO-=dB&Ob_F`7Cf5zZS5lu76>}xCv|h7b1Fn|XlNN`y3hPcpfbKTaNbbY$HkCrB zpWf4%UJss1t(JmeB{jxFJcJzju%FU)52X=F1*TGtH;49cSz!UjWCKMRz)mG0I(BrX zgnjYOCP^{FY~9+**2gPbB2+fzITo)vThBOt_*(nw)d@Z>YrvjG89)jN_XL&;=F{Vd z2RRbqxq$g{l2V!W*SG8zlk9yBtj^2N0DF42PqoR_ktu#Zo_J?q$Um4NiNlz8-<}>P zSP9GdjEv9iIw)PhqvXzXD2bH{aQ2gVk)ThC$)_NQ3D@@=At;|qeiVTj*G~~sQV1f( zu>?4Z{PYhg0gUX#l5$ZzqTUHf`xD!@^Vu z+CYXY4DK2*n9dOiL^oTNjLuO!O+>;Kgz^k)0)YLnK0&^Mo6>59>fv*-Z{!&6JUSSz z0q^m#C9kyX53l3L`r2e?ih}+!j{a@#21Hvs&Ik}d|@ zDwk`A3Knl1*CBR%vR=w(HlQkG|D6>}^IowZw{Ls8rOix_UafhQCWfguRp9(C+Uejv zPK3%(?2ME(IU8P^b;GB;Sh+x>zpWr|>tbm<;R9{TARdFd=vuejgrUpHdfw4mv&`DX zub(;nA6i-Z!qO@zVFdt?KvnlJv+T~q`Gwot0hKbQzLU+qoGF&>mm&yTl98l=j2Nx-PfkRpW2Ob`F*?uI>mHDGsW)Hg8j?nNMk?r}&Fqb@`(29<={r3jR5@Z5V{5hN*} zDP@YI4U@W16d5Ipq`fCo3kAU0`XWAk1%*P^{)~edZz!|u&;V^a8Up6kb}sXnMlMT@QOy_(r$K6rk9)s`4XQKGdhfJIZ{yKo<_d{Rkkyt8)$CB zAP`rWNb82ym6w+{S-5(7>r6&s4TSR7So#U7gaE%;lovLe`tySXnTM#@(8L zMb-(E-n*|C+fzd+z);|OP(Ah2g#ACuKUkmou&0PV)bJ!XbbV}Tyq?l0?z8`l=u5LM zCR+8H!oIuH9agjM6167FG02tSD{a|y7BMXTtv#M!Y@Yk$3A9SoTe-V@#>Dh$O%pG@ zi2Rn@Y{4R zarXfneeJ~*5Aak+6K&VRDc(lYantLxcD?L8TOE0w+;>0;w*%Z`z*blYP=xy1b8kz% z()-hh;~_S?-^Zq3^T~IWsl@U)n$s#|`-z9ZF~_p@tx6#D=1WtGk#)&OnFv9{9mOFP zoA!hgsSGHh5U|Vu?ovLUHwQpjbw&$lu?e1_xNVcaumu)m>^g@iM3g%$QqS#hC!|IF z`7VT1#a5Vei21|TMuqe~zrNKi-H+V*#-%Tm0mx!hpm(o~{CcL%B@dFA09@a)1`-%8 z)=e?p#`E44c&>k+I>kiT^cRRAu=imbgfnOd!JTx%=bH6qhH^)x5wz-E9Il9jFArR| z;}In41=>y=_HpTHM~I|!9U|O^TNr7AS>u6sXKLX`_qJvRmtDO5I*jKr^FsZfl_2n; zyqre}iA}T?Vuu~a)3ZB_|C3mq{xb|`iX?-W^+h=wAF|(mcoeZVR3lYDGOH{u>Ez(uAoMdLAO8ltJ}{g!{QXQqF! ze{}mXV>RBsR%0>AS7R;M^RT`>>LHtGe8>4Kc^CV8$dAL7PH7f^Fygp7RWR^yZ*i=5 zz;@jPRckgr_Y%jYJwP=}fdiqjN@&a?H9I5qKB{%zL*3npmfMdL&+K9p4^etXfYNf7n@z6qwS7 zB}iZvC-R@}sASsQenCkI>$@Dvygb5@s+VMyg^|sI<3lcy+KY_prcJcD!CX@cXTZgU z7goIwU{z}z_mqd$eKB4Xp=bmv%^9+q&Fsou|DWrKrw^MW|18bgBFkG#9!u(PaL~r$ zAEEl(GwtuL9gDTA z>s~u*xYRQ&DFBAx(gsesqX9b81s~^Yn0Lqt6ynMU>q;<5kvevMT{e~8Z$YeCZA^kj#4L!6ggvZO|*lfDv;bb^%p1P`O(~1sAg*mTP%C~`H zDFK*(3jDY+Rdtm1{aa%~|4)s%jPp+TPmRfjbt#7>lyNNpHbjC?!tYwhm5MPpRE!cy zIz;+R9h3)-fmrr;$^BwQdcLl0m?7rX6G5V`tox#4g28D!|AW9SV9b?!g`n^}1%<02 zbIAS}bfFe#K`L=8PBtn*b_9|t>AnO@WO%ew4d_OmWe1?6pEe3&7!+0pv_0Hw+)2(A z8HsWXcU*V#l&X6HVC&PcZ{ie8r_LnA_?sO*5X*Kj+1eRYa5B=YsmS?0FfhZg&1y$D zRY=4QOYUnAigQ#T+S+skM1pQB7<&8&*H;_ux8MyamG8EhapZHUBf>GQ?5SNZ^!ly9L!(dYcA`@0Kdmq-GG&snU7 z+n6EwWpIR{7m2nthC7Nmy>);b!;3GqL9%SWrhL=Kr(b^ZF-I$u4*%_mB72EO5tJI{U=i)Ar-%g4bsRi-5& z68HwOL#u61n%!K9bEjEx+Ab9DwcclfHvmx+J*3Mr6#=l89s&Nwu}pFiw#k`Yp0m2r z6dB`BzZc)D`L(h1dD9j7?**Eykf1C7kpO`5TpFrZg}6XW=n;p|kC=oLz(=^)%^STIowj2H^Ol zFr3OwBlY=ZGHr}Ja|8#OxBy6Gh4847WX>IA6=s!FHij_**S1f@QbMrj8;0~Xin(Y? zGOWeph6Au#g#21+`wgbK6OH5U1P=|Ny=@rNI>+>WSoNKL%b`DijISC# zb0^u25K%d^Gx`ciHJriXj%C%*+%ZV}17t#Y?>{sT}A%v`D zQ}F`-|2+3+`zqtyl(-%wtNthTAs{9aw2(5+{IioWDkQ+|o%Rb^e=lVk+ano*8kQ{8 zp`b*j1aKG*1XSpi49JS5J9e*wnz}0=IW!NWt zb+R=bQ~+F-)crskEEG+U*q6~8hN4exXetr`W7RuVt|QkI`PH)-d+$YxjriG28erdL zwmwlq`YdT`0 z8$)c?LlYLJQy1+S5qu|8R_@fhijUh&=;8$H08xcJ3ZlR-pJK+m`Z5~>6;#>!Zz!dI zrKo?UujY}zcjbtQPl?teDu?#zuFU$YbF2&8mCt?OerTroP^F10rBxLiX4$?QlzTj2 zdeF{OERKxc3vze)iB128skfc>RTu?4&QM!rIs<@(HPRe@5%BUJB6REZvyVngqfrr^%5- z&01>*?o}IbTYV^19+!Gmg49vA>4@9~Ha&HOF(1#b6~eX&X94VTnc`ZS43X>uOmuW~ z0ED5@PZfZd2x!SBzKv%yfkBt^(|u-<(9-1T8vwvB#gaLUkUg%=lu5kUG>yJ+&`3SW zUHc%0{4}45`g7Wezc4p1_jODH@w-Pm{;$_feba~-hp=vaC=NWnaFNC@eKCo zK^PA5T&)k)pSJI~O#h_?Kys+?T+SKn#|Fkk^rQWdiy(?AhAJb3DJ0eouV|Gy(|#-= zs!*&7^_)mmK9(WdA@?F1;_ZDZFCzIW_QBJQlkv9}(kCf!!UA2}r>}3`u)euz`INAB z@aaU}ce_9LL*15bic=H6`!UaKq#FL?YNU;BjsJ9rKhqZ^o6akRf=357MBe0bMjM6F z&dSQ-VP!(>*cKrrB`vFD9@(4W=<#O8?H#O~&)q3M47Z)qBMS=02(t~_i8sXTtYpBw zv@03%WJVW|l9FO`IW|lyRYjuassX{ls(*wvlQ^dMSNuIS0RIt`WG~u@0F~79wNSx$ z2Hy+<9z(>RE+%_)fvm2ls>%H0+-nTPUM#j0 zlcnzm%pz2@@5ml|St)D028v7T`Fs2`w}rPLBO;aSG8SZ$cuZcCO$XYnbLOFqnaZ3R zqk#-#g@A*D`T9n{|6pa1KtQ4BbDk!e%PR{v6}ON#Zd3Yep?=FCF>enEEIpEUDRl=s zmd)&*)x!xHUS-&tS|9PovP1$4?Gvm62A`92(Qx~#*V)~X+T{I?h}xrdI}M6{VK%kj z*nPVhKt%w#IXjTxYZ9>J$a!Zzaz2Gx^-5;|wF7a#@>itdQmY3Y^KkkJN(-w#EWK7! zA_SMyd!Ta0U}9%+cp*;xDS~+ey%_zh3IX+gsH2$R)?SJM9?(*?NhD=A#0L5Jb=4T;G!!OR;j#yG z1H}z`sdw3#3R)YiIYdcHa_)Dc8;_Vk0mMr0X9FC;TO6m2L$3QU?0Z${1FC^NjU()< zvhUvI0M1XQsDcsVbnUtFS;BsF?S;M&2;EDA5~95x5c0?XNGO0?Au;yL8*6P<{LaIn zBMNjERFyxA$+K@gRO_#DuiKGo_%Kb8Ji1K5Aj=${@mHsp7a>0a#p^ z0}o$N$tJ_)yqptD1)oU8kJ%LWSbpA9dk(Sq6Kl_#GkjXbt|kFr50brFi*Ou*7?J^j zhy8p~J~y6_opkgAv%^dw*<+w#9&O*!arpcOxx3{}fVK6)48>yGgTU^2RNTuxWR(=m z+ev&Kd7u+;=FuwGsZWPTbswe}D3cvK%s$;~=9c{>h&)u-z$2mK$qx8Y@3V#`F=9hr z$1=(ZNZ$=(Q}KvslZm6?G1z>rUar47DGZvE<4B;_sQ-Z@$8QzHkLKF9ofDmZ zUXk*1ae-n@Ru%`MQEw^M8PGDk0&@I%elfbkS`gmaXj_WZ;R6T#{OD8k^_VCyFxeR| z*hwI^j_`h?>fxBPV@F z4&?M=G4dKUrdbx4qC%UnSh2zBK^UU)l(&%6zAkw{-VC1|$1|E7kHC)H1p0N1_=?<; zWksY9lO_N;?-V%2f^iu1mnx1rx*aumh!P3B+4sG(LF2=%7D#!SCXcE`y465#uw?Af zTj{iYl1f8ElZ=^`oNV5^jNor!-H9ivYbXofO}%-=JPcRoUV++VoDtO-0Q3@wy_z-= zLUkNHgOw+mhX}w*I^H4-28B2g0Rj2i@+(l7sW;l?#cvIe31<1frS?C4pxcG|F9Fsb ziM1RKAUAK0#K7->#>gjCti1_sH)DN->lZ`_yJL;*Wp@{mYzQB0-1SseqBVRzD2 z+9g2ljofeVg|&QvD#4JLC1qWQ+F|}67VdL;-X-w6Nck5dARe3hyESdKCQ7+pc-%r@K;gwL4{HGSR9Ut=qQu9HSE=;j)o>9EK_Wy1z-UgK41ooTm4=O(Ia&~v-|N0vsLY>i zX6>26bHnAsLz|zQS4;uW0^r}@8CHPzh(BtMV#b=qbFPs1=!hRCse~x{O)MJnJq~@}4_s7r=x_++DmlkmQNlNp$~y7?+@v z|8;)J2x%+#?WX;w9yfZH9$CX+$z(o@rkun@V*XS>iTY!2TiP-EhceCCZF_@uRv5Fp z0JMdnieF=QSi95prU8r0vx7f|S>& zZc^ak6ef;Ft9$h$APIijMqaLA7?B3dvVM3MWZ2=p-+MDB51Gr@SPk{l?b@VeWn1@! zZ<8K)Q1!`IK!(T5-4S?@1f*>^4NY|!tIkNQ=+~V-ZGF7;Lt@(?vQS>tj^q<>al5Fw z6x~wChA=PnKOZ>TLZ8zV6#iYZ2-7opGpQLOfm;9=kuD&id{T?TFA5vTBJVK9XVYJo zOlEg06LZRwElmcrSWI@hEL5%uu$r`XLZ5a`J_|wr+{HxY)WLrizn0?5$9bW#pKZjW z*%{@5RYs9i3}rGVDg{$e7S|ye4W%>W5(S$kMjUXh8LZ|?6h#(hLbenVh3k0kU;DXBD?M&=(k~q7lFx*79#WlLcZcAh#p;>~`7Nut;9q}DR z1`^3%_Va_4K^7-7IP`Jx)&SfE*oz3yr5F6s%l)VO*9vXh)8{wOY>C*rhqzc)K+Jlm zN6UgMjz`=9UJ(h74FU8$hy=DOLmApr}RP6aJuX&d|v^|PZEUnz4(XSi|pbvH(KGew`@7G&lG{4j0wlL zavnFcw=EfDnJ^#JA$k@+JSoaM1AfOO*&agU$uD|QpWxCqn5Az{)~~ram?rVD zZ7tPovc&iN*2UhDg{b-#7+B`>-(8!HlT%hT#`o%ki#-!+Nj6uFWU6-O;QTS)29TLfBa;#DE8~d{2qqg5<24x$;yQ+n_lLe0sfZP$OJ!v_ zt#_&A9h0vgbU%rseFIh+24=HW&nL27Pys322=l4t4iG1289TZ2WlCqESm@>^ro$Wd zGhQ^$tLN&~PW1k;f$}LFpMJ28pl5<>O@6DEagc)mV_?2_X@Q76Md9~>WRV5GTW=P( z-~Gv>&Hody_~l$`F;B&#HieDUT@Tf(9aMmvp!1tUF;X6i26xtAs7o&DjY1-9$U{&T zlgp&^zCXjdG_n{O|Cou}Och3aC4aa4pvQI<|E(UX{{}BSK~KLyR8pu~!Ui2{`7E6X zkpZ}0-^x^^+ctBAz=7OA|nzrn2?4j>p-i0Gw`ZCl&T9lK4$(p6x%vUD1s4(bP{hOR1a9KYkNM>>kFGY)?kr zhY+K3v%qm`@XO8mK$o0rRa|6SWVc~I4g?Z7um5e&+i6Ohtt})43WlHe)Yly_^gaE)$haTl=5Y5*WMV1qo`Rzk<^6 zUb&h4+E0=dRqLTEZ3T$!*a2v6zUuXsE9PddUf3|r7(0c2s7+}+ixLGRsZPY%I|6r- zdG8XOKD*I20Jm<`*R7F5tk;*Vv1vJ2g zc2GsQEbff}Vvd?j8Xiso1h-Z)!oN&rM@((m>Cx*Oor^82y6Dp?bk{Bn<`AbNloKT`z9ji6O}N@W1XRL4Ttiyogw z=3xZ)W!)c)o6DE-Yk~lkpR_#E01D6aq?s4vMgom2YE95>tveh*bP#@(@kFN4gd7(! z{~H@hVA{9{LGRH6<@w~7+lglS9&cIbC`)C%w*o2ZC!sSbC&Ej5IQ0tcd=+Oq?}TW z-PYCBK&i`9b{bE~IX=UjL-L)?u$M^bixb-JaXXBxK{_&^v0if9Y+V>hdW8;1Z%%BR zOh`&gXIC)ojZ;^rV1`}&KA4Htc0alb6Sn2vjF8@aJNtoB-%$0}Sarc`N2=KeZMX1W z=57UI5gyHW^uIG>CvfR6jD&5xDb^lNPcU#l(Dqls%klWaqvdbBR&G+Vny*7cb2<2s zU=Z=`-FeFoXmg#|(69E}^o{2|MN99mjyGmX%S=YnMSh178%1ZG?7l9Szj3lkTTe$W zgClRy)fG~&GR%6ndR)fmR_J!o!;7YJ`c%dA)*>=pO(l3vJW7^xO=~M-H#|M(*}!(O zGnAHzT&|5n7GTR1x;s&ngOq8!N;?9dO?3}rM(N9f6pNHePEMYTi^`2`(hgUN>yMN@ z8c5bl;vw99J~~oZLFzEdaidRQ4p8Ln^|+^byFKM$P@;4dMmH13#5>gGXSxAJOS_2Z zGjd-v6C!c4I%&iJ((8Q$5EJ?)yD&TvZdVu2X7W~mKn*jz)}^eJ)R=8Wgj23|=D?mG zLr`ZBK7s?c2-E3GYoe@WK@`{|Zv4aN%jU$+zqJ1`O*saVM|L#a+T zeWZZ<(n<+EZ?W!h&}{5uB_~oFO`?jn!@sXHbMo9uW}#fYvAUt3bT5P}Wo-9ewKaB7 zRBGFLvC7}+U@i_InEy~vIK7+rP2JmivB8i(vsQW9`yz5|LC5&L(0gTVl|5Qgh1AXA zq@DHRzS4T`z6B)pT2mmnbWlXXK zW!-VJW;OctsF3ZeahQDkDgWMuP5xC_D7W>; z@zmOxZ~(MMO|%xlNlv8i+8T#Db2l()hH#Ma4%?-p9G!6MdL`mGQ748@hp-Fb!?D+o z1@gOp!@D-M?*QG%%F5<{Y`aq~S0hm9SpJ3!GQ8X1_(T5v|7;K1{tQ3P>scB1o&%Lw zhS1aWgYHY??kvE$J16XyCScpCB-Fsj{S5V`B(7Lln~gv<4e=BRp^G?ZCwqrF-B%Js z>&w6P@MaGtS_9HB2a_Q6G%SbSKXnXg%ThUnHzx(|Rn7PBYz_j6Tek-WV5kfz2$2B#y@d=wzt!`r9B}!ud_Lr z9TbEpGV+|fL0%O^f9cVh)rs~Iv7>^9fS@QSdE|Xgclb|iUo0|~n+Rg|5U2YS47oVH zpA`ArkAhlK1v2yJ4jlI-F$-zBvsJI~?AT5ZPlpQ-c)!SKVnMv_U3kJ>KBc}%u-;(A za1G{hXqacYU-E&OsGS$VQ#0ndaBNC&J$3HdxC(4TG0(6Y+MPTd4jKalz;|BD`dqXF zSA=3`3SrX(pTjmL-_Hj%=)MJJky=0~8z?quczG9AKaMUZm|c;*repcl(2sdLRKGOV zvqn`B^8RPj2a7m*XC^qf+4L?8Wv=pbXRo^r6~e5t>iWPou^|I%&VZj-ke2XSo9+Cu zCZpGc9Q%#~)KzqxU+FhPUOISoiOI-{~gNw2`bDh_tI7HVDj zeLVN6YiwuiF|YWQ)7H^XEVc;KO-h2 z3t^So0NU*fe%2klBO#2yBySXM2L;^$gpocDKw|#LRToC9D+I>l9EMLD z|2BK1Zo89eJ_o=?r_WPXV&z4XUjpr|I<7o82!ir_P4AJ~@z3<`KYQFm3s~nh0ATFt zLD0MU$N-_js<8HPCJFwhR?C})-`Ks#syb70(yisob_%te^@zjA^sY3()=ij4?BIZ| zkg?`74dtuLb_kmyM)w<>%ZbPrn-4-bR>c+V| zhtg?&0qYyoB3hpr#bignufKNKkf134j4F}>r0B+>p~Lh$~n zQd!b3#n7^8(WA6hV0fqth4A2@@ZhX)?I|^SmlKe_7;B`95E-rb-*v*dC!=%&4wcBO z42R3kOac7e3MbcNn}FdJK2OxqwQ2&=nCfeX(LmB`Fl;9&+;s);-_koMWFok8J57Lm z(_K&j(eyie>Fu@U1{d_+%+9^<&BlsMo1Bq3ktdc@5~%xRYZW>*rdW3t4|3+Mw@|o> zg|Hi)Bs~A62)pElZnk~#XvKnQcm*WDD0@Uzf)?!_y}ym6w#ZJcP?D(_V#Kea?XSR_ zx8kG)@|URIzPJ#Tpb^qkt@)OU)plEjZSdTmCNbeayO~Pb52e^R;D_t;~fu7A`|?t?`J9n zOED%DzH0E~RFuWtP7b|0OEjnb>F!WygQfm{lZOXWS?S=Du1?P;Ou=NO*Xj(VI`0KR z;@dya=aCmo_iGqMo(?reX+8Rp3I%R&Gz|YjRv2IWYdXEP@mg-`kLq$DA`>|m7%cG9 zc`aP2#nn+p-Uym{!5g-JUHKG1t^At9#8^d;?v zKp>-&P2fJ^0A}1e_W4Tb*E2fcwQu&>ksLkYbuM8&0Au^^Xn!CX2*`FS%!;4LBM#Jm_iL$Ul6)+5rByHzVi$GTkhr`g@SvY=Eq&Iy zlOjORbDoJeaz$Nx{T$2Lr3l$v2iEP~3|U_TKf%}Fcog!PoRe2?A5Yv3rgP>AjKlQ= z4K0XR5+I^w?R`;pRPTzK4jhB{qol4=JSA{&ImJJ3WAcma)T^hpApYaqXmd4Iyt_Ut z0*cY(ZRAy2{F)>XvM@C|CXX%CY|Ol-?=xg{pNu$@mTMwHr6?gVh%9K>w3bB(}M z(bOyCv}4qdan^Hd_bSYg+VNYy;HYA3J>>G*$S@F_^;0ZQJ>vUY7)NtQuN2U=W13Xo1#h ztN1RmEVO-HNG~B|Grohoj=M@XXPJ^egtC;Fa+wAnIS&8(1^jIpm#6=}hijIo?*Lj< zP?kcM>d=@2_HPL`OqZQLD@G%1V+P2Scc4pMuS78eyQqPNxoP?SQ4D#57j?A46)gg6 zO-YMygz|N3b8;_~DyDr}Fy2{_2|WLrq&$33&MWojK>Q}Ju={Qor$CYsUpDZ!!_veE zQTo%Ohy2r`OEB`N#Xq%Ii(s20ciOy{Jv%9+V3_GIDv^RbMvByu8C;2~{c5&Du=3j( zifyR8PWzy|++5JQ^0xONAs(K%f`S4rDXDBP7?t4GUR)PbWd+6~%m=Uofc-$3$zq0Q zN4iyqSBO8gaj6!;emF!JcGO`PmKDw|?`t{Wk2Ge9!F@W&Utv8%PFe!xVei9mD2OHy zMiv9nLm;^EroZL_F;c3)yuN&xGlRP> zcxn(wxB?>mh~QplhRxoa%0a^6xBFVGR2%93>Bena^BZ?P56hoHleXwTJxQoaYKhe2 zTY_T5YDjcSSBzQ3-8sPquv+2PO*@n_&x}^7BvYj^{Vh%v1TRIqRsJ7kZxxnh*R>1N zoq}}SfC7TljdYiwG=elph=6o=cOxR8bR!+o-AH!}2-4lnKheiG*8i>VIM#+6Y>8fT zjyc9T&WQ55yE)T+`SN91l;pP$H;A>6sQ^DeV?ZpOwTdF>fU|53ru%t-8xXH1|n7`*@ z8{%zA#L9}{4&{mtj;jb+fG7TXGvQ?rte?)g2<1__Q7gysa2IS-7)>$yax?ideN;vy zJmRgjmA7Yk*ojj^u6Jw#1S7>P(0NEwPt*44P@4-Ovz8PP!;ZUL93m@7rTzRkND)Ku zJ@ofM{2%X+N6>HNcnVv-@##Hu?gkIb=gn2#@)v3`!fhzi&RcK9e`|=#s z2Hc>~6~_F+g-6_$2wblBV1uyd-qezfmf!DRY~?M2+rb}u&3d@vL@Is!z;;~yqUJk-t(U~goq}}ns5B(-eSQ#7~T#MrdF6qln*a6fA|dU@?|x@6P*^Y$;wh`C6# zeK)67O3S@`iLITVjWs zg0L=_6O~F0miB(rbw~o|t9w7P8Cz~s>MBu2Sic(>x;!D>=;vBU4-L_U{8@*2{_9Ge zb}=eVJpKL7z{ABcYp0I+v4?CsM~4uy$bqjikzgXijR=d6*e+KgvTpcb3k;#CP1gqG z$W^M_KJgm43RFqaL`T%FtSI^PMC_Bb2Ac1GS4Q;~HbAr|P|ZC>lG2L=YdGB(Z? zx%Rhge?v>sMi-N}1TUxv>Ee%KdqQ*?^@UBGpPr1*uFw6*9tO%<{EBf}cZz_ciVfMU z23uun_eaB**M%BcOl+zO#)@bt7-g;>Uj8l||6_M{_s_>kS<`)e|8WGpypo!~OiLB4 zetky>d?m|+bf$~sBJDMLD)U>!-xQ4oH?Lab_=&)DUWkO#M#$f4Wy?={@%C59&zEkQ zASTKm*I4O}TbGiWgyQ&sk^FuJ!=}bjd@8zUl9QRe(lXxj;m_Tm_&bwKT^&6k_;ZPP z5SST1&_WS(YvZ%oM|^O!3e?zSe<0KV_zvQjp2+7YhsTTe;K8xlYZzFnXWZQJB%Eea zz|?olP^B-h)$lcKu1|MJWx^Y@ktZN*mIS((oelT+&VdB*&hppJK5=Z{%l8;jS_G@E!Y)9BCqsjiE?LD&J_j$b=km> zV~Y(ZHVs~AKk$NxB?8$UKgv5gI$^TKrrj4#XtmYmQwtWdL95MJ7_M1gJlH@`tnnzM z09`F`dqjVza;W%5f&L-J6SJ-F`Tt8r3;9#gXv`Sk!z-nU8XabvJ;|VGjt*+dGgW(*nrRfB@fE;)}i7 z0D{i3h>kybjP7s%880_?FSDUmDi~!x|0 zPSIW-P3DZ)XZ`79|9CU{f^rQ+J+x4TZO!& zOhL)roB(UA;4+&M4#Xn2p>}cMj?v`6QQaEPm$cuS7=QG-4^i|B4^*H{yTz>4jrD(Qc6!Fk%231t$t!peN-vdWK@EIkHGcLe4Xk$ zM5LO}_TOx=b%@XnF^W|33H$>Cw|Jsp?Gz4du)hK$eaoIz*`$fzj}+=5qycB{=bnt?t3HafA>cAma5QN7K!I) zZ20l}@A16W-y0y6&uP@rqDB6I>h5^rG-%z)wH4=t+lJuOFHh15s1W|w|1N^JR>Z$X zhF%tC)ZY;X-rFKDev1geU3<)L@rz}ndW?+7PjM0V-8mkx+ClB;=-6naJMux%1aNvt zP*5s>??=4hk)QQUBrzQu=V&3H@(UpSN(M zT|#o8!;^}XE>R-LhOZ|OQ4@kIqihNEx_L-h3}whVwQ}TBCZ=PfAI7bD^XDVl0yAO> zJYU>bQc^*kShjE6$?}OwlVd=BX6-tkz8~9Zhxs~N8gV@9EfLohEYj+?^ub%zVx2X8D_&91f+W#BQ^!N|X)Wm;NW(#$#KTmz%TK7l``28QP!UP)HE$$>u(5(0|T3v}2fKs&6B1q}#vzK9|zJtb@p zg-7I-5tc~l$->9nwxBnu>n^1L&4$r;Tkm$MPHPu`s3q za;$ZBi*?)SQiH%w@v~U%-zyCK@AqL4zc23pP)hz5Zvnm&*o*PeQaG!{t}Ddj$bs?D zcb)0Ki?47tu2jxVcGXnE2X1up6#17Oae@oBM+=F+t4k#w$JnmmiMdT8M#V^QTfyM+ z>TMKeIS1AGPQckZ-uH>xjE1S64hL`N>-A4BL{W@VGyhZ5*o=*{-?2A(uSU69g!}mT zw?N$LG3aBI*8aSqqG@SqV`Xopld=dMAhte>wN3~6Nx$My(^LddgpR!LWxjaqtBeb{%&x_UMOZ}Db$BpC3?}hO0;A%rS2=Rd{PVBm1tjDJFER(#UH2D7F4s~# zf$%Rm>han6!SXZ9AN;&FD?u?rhSd#QMRj2^&5#6YEF2vXX2b-1Xpsn~1m!oz4~#KZ zY)C?otn_erAYkle=}mo z)dxz|)f6%R8vu)YzdL^18eg0k==M=pNc*esn;_hgU(%a_w}d0>haJ!C2Vcym)MAjR zUJ5)FQ~YD{=oBJMASC*`5Au8XB#&l4Yw&FDnk&aB@&rwuuWi!9Vt&U#?H2@QcDv7} z!kYh^(br2&n*`ct8%AfF>h>~kkp(M%y&C)|@!vVG5P8VIK25N+48d;+m+%Bmb%8~E z&Z9&{t^pQ>U*BW%z&{RYgTPQe$|>%tdis0$cOXfX5VStBF%kv_1{|<}h^ZJ867%7M zF;EU>W0Y>_h@k8arTL1?bf5$k7`3v*Xo-AB?7{bYRiTZmB_jMboyAT>*`FEhQjN0g zTr{%hYm^=aG8CarsJlr9xXGH21tC7ti}Fr5M>}M3YbKcXmQUu-TdSL43CaQjF(i+A zto9IJHgP^i#4V%x#h5h!hP*lAUe&LMNb>RUBp3}Ontbom^728KkTQxZNF*x#2iX7b zQVxFd)(ZdUeh|W7VxXiqFSb^M?zfxEIuLVeFS5-1(l zLTS-~U-yPBgO3XWpwbSaC$U%E4huq+S-bAFRJ%1-`}%vaS)It4?O{)Sm!k6kS*P}an<=K0SKF_t;AiCnmH)G(o@4&)lS&q}0?HJA_fT^*RxjR;NWI|C zK*_ks1=T{iSVutC;cfaF@ZvFU@j*loEzZeO%y>Qq5=3Mh5!nckgPQjd;dX(R!MHPw zSZ){OjEaL@!KB|S+FfYELqE+5>o4k7q8_vm!zYqC2|3;Q@Kcs=cSa?5Q3>8X~10= zT2OEI1FcFk^}>L2J@{>+rm$@RVqc**J^oi=Ec;@lL{{Dy@z$7qfFCfUaNnQe$f97q z!TaA9iHv_1i5oc$Z(3Lx6j+tfnRTSH(DPo{9hB|#sBCDAJh<=OAzyI1&rh!&()~d* zh<%DtlDg2Mhi~YiwN)dkFEq7qJf;;dcM+t`zR5=5We~R@=@B&)^)_Xqe_yLG5&()B zTNTQML*kB?MsG+<4jKNlUrzZqfquG!zW4WGYJLbuIFO@YI--t-j-CZ3eMNC`KYu5e z>vQWQDk61l0Vc?4RAZ3IZAonxU5OJA>TWu+}rI0t_qEJIxc($%YcRER$)244&#XUgMJyh+OPUf1&!z z$FnlW&DCv!RVlP-VB46hDSyKAxM%%crDC0fw&`poxk~jeZwS3k7%;CBU;Bx-zSmSW zdTcVfuC_hgS37XzK-H3_oMo!wjvq-ccQqF2dwXq5n#AqgYO?c~Mp|(anKE+=5a=I7 zKR#KQ&GdV8qHRdopUk1XbQ2d7j&g_v*CNs~R-}!*?7G=_<7(0tVDJ3SyX?W=38Pv5 z4>537&PhQ*4`3}5NfjS$KRUEHG!4l!BdcepP(k)QDS4J0;4FZG`%F&M0ilnQ$#i8xMQx3z+txdc zPegUYoV3ONd3RCZJb<3(mNEjw&6g}_N0hz!{reRFp@rWt5L#Taxjua8{fL(KIUZ{K zA{sCbiZnfBj%Y{9XpEl~%sJ}hWmy$L}}*a<$0i^!Gn zgb9&gyZZsUE$pUKs*iX-8nVgj);jFGOyYCQhQ9s2&e2AJEY(5}dFy%DLs#K)Vp_!< ztz8XXVdd%)myhWTL9yr)xt4CF&AlY2^kFoY%@5oc+p3{GGX>b>ec|xSU>bT9)vqXw zf`!KPmD3q*v$6iZL)?Sls8fH~-5rX4DG{5!;CU6xX<_F+I`2sRNeUX;ykeQ7WTgpjncz}|D#)5^!wMT8QpmB$6M#tzq#XCYz@~FxoCNG<3kJZ z3Uj~!^dTJ}qDlajq|M${A>I(SJIVGspDs{mPb&QsoCf)Rd$!>D`Drio6cuU?YBdKj zfdRKL6mj@dPeYWqA0S_?7_;}_^J-0fx`JeU#_kr$1{>&n8KVv&HxOHC%T2H@8fZRr z9|?CWm&h#w5T%h!JxNW+8^8b>0!rIdBYhN%w@ieR2f2L*${4d)b?VToIcw2Y4VZiOLE};hZ;MA!>CL|LM4WRQ?@` z;^1zif~dY|0PijL`6*vUuIV>ZVrKWDP?n)a_d6x!ypiLUu4VQyaxRAoTI@(7o&bh? zN?q&e5)b%r5}w35)vO|=8iodT(~6F>Wp|fRpsacd-7v)TcHP|&#}=UWTP#SUpZI&Z zg$M38P`7d@%xbJ*X!sb9HK~2e@1LKpD<02)M{pvGoA*Z{Kd?|Wxv*-e0qS4&Z<>p= zZ#Mu5#VZnUwv}Yj`=2e@F(u;61}zUrjVMZ>+}<46shIqqZl}bcExQX`zSjk|1jkVQ zE+dD$aMgLqp9?I$=42_|D-pm*@*HVyYDz7sV{#Uws)#i|8-`6O%c!yB)@Q)+i1{T*{IO49|G=DQTZbSci?Q8 zZO|35fzc9=xAl`Gb2FMoa`PL{Wos?qz&V6OL`*CPVOOd#E@KJRJ4jwJp705O&!F_^#+KZZozrrr;7$;!yv)lhDfK;p|4u2H&ME;?!Jm`Pa~*b?|~-r4{06gF1Et-cxo{UTW!ZS z*R#42z0_%j2P>{StNT$`fZIF$H(_4*hqy4Vj!O3Q z*vInAgT?MXO@I~q&V_LJj@9aSpuGg(7dmWED#Z2EQ+1^nv1sDB;1r8+EetD%H-{x}mb7fxt5;q1-07f4n{8Z3t5e7Uka)bWPxa`;Pdmue$1k7(m zJ>wC#Y&w9_0osPbQo#b)!)=Crf2gQoQQ}v6XV3w--6{5`urUqvoW7 z+p6~)z;1N}(}@BJvA~A6sG<~Nmfke-_GK|RT6szjNqAhs5L5KTbX8NXX2EslreXDH zeZ+vPK1T#|ah9~y#^I1gm8M+&+Wf?wRtT8yKi|4$HLpELLh;!c^-RxOaDP8#OT(H9 zBm0`T=|CGOW#xpXzOg$NznpK7WHFs=Z0INXgbPtob%tW2^dAB_{&wZphmg z$j95s)I)H5Y!Muq67hvJj2#`34R$OsoMkN=N#Xvc5F4W2Pk*YSCAGa$RC>7Qzj(5; z5;H2$clTZxp&GLhAh_YcdV1bn^*t$tx-?+*nlb}2F>GHm1X@cOn>)#;-4?y)>fd-d zs*h9g4R5{*H<&Gos4n4ja4&9G_mAPkzp)u%B!E=iEpdav`<>NKL#g|z+`B%kISbw7 zV|tm{V$~>EL8k*ui0F~#KqI=+HfSG>+X66o+lmYZnwj#v^KNZT&GSDmAzqQj;o-?YE2) zOH~}=BID2z^6*5Xe?^5hPrqMHD5wf-UMXhwk(C&+j!TWspX(hmnhom!IVbJ$rU~s1k{XdW0;U&6N~l1B=#?~o;O*;O(fy^5~0KA@`CI^ z-4O0Gk8QS>axZF0Mzw8I_8QJ`W1pzL9(27NBX+6R9ehs#Pm3>V716vK7bAG8Mrk^` z%WSYSPf@M=)}lfHwH@c>Z}AV9cSi4*B~1@}(2*Xd!+BHL9l(7cYJW!}$%c<$Ljwo= zE6x&ZKY5S}yrNRSZ%m>vfP8CIKNGcO*nKSd6Xt7MbHxffE4aST8}Cq*Y+oo7`}pIS zJr|$(S1J45r4WP3=bx8oI3b4Z&r3uP7dIAQZmLfPWbMzvGo;cUMWpRpf$GDEg=3*8 zJcsbaX};F}Gd|GB(E5PMDE2d#WzmUEd1DlK0!uKE^DvM$#ot;jzv3{j;YT|c+AEX zqr--zg+q$giAyQf1!>kd1D2e2=lgmNON!drGQ=*c3Gbfqkof7+QnU%p1i{`W6x6$2 z*Z}N1Sb9Ot0HiU5owN{GzkMg7MNdW zkz>|9)<`LusxxoQpq0sLK2sh2;lmM9@L=HLNrD0>Ky!eZY`5orM*3c}$wDBU;-qpo zF5o@Fy&MrWMGlH|aMq+5XGaze>)OT^>Zql~f3h<_uh>7rqFV%~lJ6jJb8?n~PwLtS zb@;~HEjO2{C${6!c6s6)!hgPzSv-DbJd20rhvW9G2y(C`jeaL2ViCSk(0Ysf`Z31=hIak-rKH6)Tm?7o@9N@lcQv*lwH9i`@RwUY zfS*W^K{C&k+xSo~)F+O7)&lQ3%|78n#8-@#Ch34TEPBT#CrBrxO2%qI5!RtI(lSyq z`x{P=xqqLAJ`{0DE`_;ZRwmxmWEscBW{;ziiu>&yG5}=n+N|$0EWFXhMSnvtiyG8c z#D`?bTmC+l>0^cC!%+Cy8aP-_?Z1V^_sH<(EX#=85rkwl6WipG1BqOc6HF%K(7o9eE5BIgn%Z2kXBK1zr~Zy;JO8mqgdSnO@b%)f7jpk#CA0H zQR#0aK1u>0@nuG)N{O`azL~+>jSvLePHl%sK}s2n&Z$QCE4$|!hQ%B)FEi59?E-Xx zXI=^r?ypj}%*D`udx-pm!FDq_q*XebsyfwjIU zOcq3FYcXBs51|0sMBCSZq4+aVSo|Wk zKWfd~gy?7}F=5PPUZ}c3PD9 z;P7~>=2!9~rBfLRpJ#({3OryAr{CeR|LRQ%iweIQrz$3u7IZkK$6UY59CkG020n0k zdlZA1WQ)19H~B%$*rVB`3+12^JoF}*6!dM5FWpo0-zsAN2}r%L{(mB?x(NVg)=7!l zLX`%#zqN!pd)hiLJ#ADgVHo8A;_2lj)O6>*c|3($pAg<-TIAy3jpM@}n%loy%O{vM zG=iy&?4|LfX~2DNwQMI5Cu!=CIBDuK&#|*cxlS?TOP*C)tK4vk>Xm*VHo0<-6V}K@ z#jzp3Tu6=h8(IVw&00UBzUwD(;>nI}1uFSi60sprf@+5y)1EoA)M$b5Y+1_;2p{fk zYX^FT+4O@*GS&RoNemy0Q|*@rI`i7Kwgk!yvK~C9Mm?)+VDh}J3D=M8a(TZ>cnRuA z=;+hXzF+_9YW}@WXnuuYX86VsNp4(xQYq`@MqhnPOUvg>R3S*HPd%zAU5sWze28_J z_e$-X7Px!7@{vNfVhH||^e!IPLfOsBhk_zVjVc1B1cqlp7fXvI8 z8e2LlX=XGtVYp6SjVg;m_8aP6D}eP4T!*stdBgcYuc=M}UNcf*GHUlpFY0{)OlZww zzV&h^f)6X2AyF4>D`S^05iZ_}jD-Y0xA2dSbFc%H08h-gN4>Mi#7}TeH;HhBR)rJe zN!eJZNlt*}upL>^wJ*+fGdwoYn!o)bGNOlKbtDP&)0POf4j}&)fK4>dH7ubPpNyH2 zdgAxv>#cg{jkRa2K|XC}Q?9Jva)C=v0E|bpX3a|#22X87hNvbO@HocYs;pc;v$uVm z{}|#4GoGs+`aPCKQjf~tc;U98Vk;#nq4CM<0%NvX9ruCf0JDL4xCJKTv9-~Z&Aw}*1w)nRof{OOYr6kzy1Dk^b3w87G?U)X4ea#)~NRFXLMF*f!NEL zzRC)TJvsAfa=d;4h>Hi`12b%17fb8XAq3{1X>^3d=GxVi;XlAnU+6jy`yMeUB>A+) zlj8^aHmQB=ca;s^6CR~{C)5)UZc#qR?NmHE`FNGNN_gPCv6qweF0SUY-m^^y zHEe#?_Pp%N8tYU(+2m5l`=E%QBWUT>BD7gs?R4=$x(a}so0h=z(OD&0X)uRjKFRLQ zonX}^a}qa7Z(P3cLq?S`Pj+kV-E&vuB>rF1_8W`e^S+EX0lkt;`h(~W%qCG3&eW)Y z-_Omo&;34w6p-gVA@}t2-Z*9j8>LgXg~3mF)Zq5N5Ateiyep<^PnLa4x9^g@UGVlF zqQ`)mGfm#GLEA%<=BW#qxgygu+bqrNn{nCQ@x!U01jAJe6BEJ_6!{{JT0?Ia==w`X zQ$Z#|(UQHSng}!%JTYh|^Y6N8sN`WN(05-jV^K22N zvqWS7u|z^bA~8Rvl7!8}+lir*H`U)!msm$Y9s2_3>5opB9Uo7`pjlaPirfTvR6k*? z@sCmkGb!8~JZ@ZM$MmS5NcBBrP)?Og;#CYM;XqkYJ#ViSMlrI1UbiBxsN*Dx{FJDd zRH+n*GKuaddf&{ig`;~9*0m6M0M0G>xsQB%5Jt;W12+XRWLt>s9+m`|@WQN*Oh-39 z8KC8@A#D(`SzSXUSaj;S#agY^)QAMm7kx?4X*YN~TdipXE%-K52e)j#VV!rn5EFJj zu$fw7T}I|{`$@u=4;5U-J+!ulwEub)>5uXHi_lV0Hf{oB5f8;H@kr_{8DG08xz}kR zIXnlI(lZyDU}?pJr6*-Y?|yJ%XmDK~jAwH20ocfs3P+x7i!o_@8%t3Q@M*BVK7T=S z*`&%(3$Lx3MlEnPccfgY#UrU&J1AsiMb>_0J2gbLiA=;7mWC}zCLjG+n&f6JnSAlX zvy1juH(*yd`%On{PTeJ4mu0?jDS%{b?|@Fb)91qD+JoVS|3_I*n@=Rn^Tlca@bzr)mGD%JHr1p6SoSm>cAw!p*pgRUg}-!rmLB zXp48MPmQ`D34$XHfsZq2B55K{02kKDe#e9R7Vl&}6$RTFSVWcDETa3;7bwKj5*#fM zI2bqX%8?1}LXVh!phGwvs&is4tz{HBzm0*=zEsK_5Bc|#MWx-^_I;DT5=O0$SMWGn z4Fj=~a`P|wg7=VD55F(QjlDwDdbhx}#Zy`CqWyCQ6_c@`aTI~C!?3g8TIc$vA7#zY zPkeJWKTxEeYZ+EIrs?+O4fngE49y%tsI=X-T}tnJl<())07GNA5xi5S)9e})XdlZ_ zb)*xJ6me)E?Azao5D@aU6o;ln`6n=aJw6~!hV(@r}V2z{%|?G zZswblM;UNIzD- zyEV)9v|B_4>#1zY^$}oM^wn_cGbwkK?OI-%T-w7 zCE4#2j+=BYyWG7ySxeb!nl=j>%-pv)&_qOQ$6cmm_Mnv?|TSU)p$*t^C&rQ^!<%tLAef;mMpZo43 zXHUL`vcJlX%H|anx-uOh2Hg2B3wYsD-B`t)Sl2?gk;pQ5gBGI?33SHff^PAGS9bhV z?~N@OKqpdIZhv#aH54PCy-+7=mot95Sdx}}mpWhddc}2k&T$Xk?h7!9&Cmo!thGWM z{&g1b@B2X7E~LJcaZ!|k(oqN=Ivg?3+Wa$IFXhFE{2r&EkuWjN>|3y>7Y`FY^gfP_ z{_e3CBtbDAH{R)P`Tz<)Th`}8j|R00e(*REwkZ3{(rTywN6dKlo_mJ2U7}2c6Pjib$^WPf+ulWj1 zl`@$QD2JVVo($*gwDv_fcan}bx)wHFz2FIDU*)%54ZF-*q5S-@zqZJ9O6ahgroEDk zW|BKl>-?J90)Jx38dzBSSWkFk&a8g6Ufn|GtbYDxuyRP`lSx!VB$?tg(YV_E(l-~H z4s>L0xZa%Rd+Esy=bg5lxVOA#gqMf;FPLM6w=YeL@4 zuu3WhwxP>mEz|ZEY*obh4R^QFzQ_&2VgZdW07suQFnhLZEb0r3U{SAjvB)=~CF{Ou6YumH*7ikg9d@M{5wRIMw$;WSn+XCq@Ybd;MD9frT=`UcK?F5I0Lo9>Kvf-wkXqIUBQYLwn z^D;K4jn;W0#@d}TC~)^+eylp0FOJ~iehm5+r(&;W903C-8B+ zLC$w^ftB$%ZP|C-NLX&(pL6M=tP)P!us`PxxEFots>q~E_V`?P+@z|jDD)H~XHCbc zXX8#X;6nr3!Ez+W6QcKS@7n2;6sHxt=9)&F-^^3{hYsyKK3B}kRRV5mU0rm!Pnn8q zuQf4fj;{FYZYf-^h`;@e{B_)NwLDiO@@FQ)L74l!+<6X`m&~HDh(7d8JUZ@8^7hzK zVcb*-09KB*OAGt7e5Ve4$n8gZuwXr4G-_o`c1#z3xKAHG3f>N4WVzU%JZl+($F@R1&fc?q9*hT!)M=AS(#3wI>yS0VDMWo(g-n2GTkgG`fb)njlBGNMBMNjDR>d_Ow#d@cJx3bQ# zu)5bmWL=NI&sP3DB=^LLL255jT>1+Y`(a;}{s>_&jKmK_l5xy4&|RMums#^F#XO~~ zXL*wa*oU-1#q_xetZ3=XGr(K*A)+72^%~`)A=}O9W!KZ~VW5)eftKRemLNWPnBXRY zgd{=;ood6bnq~>Ar5nQx0<;H{56qpoj-d2x++OYFfGy#AUri7$n>WPq_S#w7KnIZ4 zy|Bz`0x(Lwp7w6Fx=DX}nie^}dJD{3IjkR+rV8gIelMB^@1r?9v3{pFu%cW+BS!n# zG9%2YlTkKA#aCJ*Ov(LoQEeXgYg(w2$j|2@1wd&t(=?r9K^ zs3-S#^lxQN>LApDhMo0lrLV`$A`RY+HNpjOHY?swGKY)6o*1WsR(`A0r)5c5SinX- z1hV)Z#ZFb-dv@Nvs<*_-{pg$JYqeTCw(z@kv2v*@@?h)kYf@arvGZ$ts1?rkLRF-W z%K;CviA$AwIZ^D%euABMgMZhQuEJiE)Jc$~>cMBsGG81FPtV&E!to-N-fiBt(|i`} z*VE&7*7@)cen|W3IuJM29a$Z=KcS&1VCt3!#__SJl|vKgP^A0Z zId{oo6;k|@@!g8IZWXxDH|9ux@(o54i?Tu+B8db#4{yS6oR!{*iz5;{&0SG`l@%mS zYs2##f~7cD>>f7Q?9#G$>7dOu`2hzb-3fpfw&m3ML2 zOz+N*y@K>ROy&Y9^KwwQJ+U3m&M*%f8(hCqn()4RXLCK*8`0GD!{a$B)szdSl>Ber zq2)an#FGXg(C@i|;0ZavK*MU0U0O)HuXVWcS1{#ax6j%~l`~v03q6jf*hqMO%F4@@ z7pUG7S%3$&oPu{(ND99*b+4fNu=np~K}*fj-<3T1@4^lizQPNL9!jT4v6`6^1pd^$dx8mEi& z%hG*FtQS>+grGJM={m|`pVmo+6HCfG6DoLs7|UdEB{KT61D)yYfjiyi+`|_p604NY zx<3`r2reZy4z@cr-ThFTsq(Z8K>xn!_#F&TqS|$WW_<_O&9|Gb4NSPPR38IWQJfCd zcVfj90d(zc1_UN|*j@LIJ+tyv-1xuc4qQ0Zb{jsm!C26JMA>9Gs^izOY>W&HFJr98 zQT1e^5S@#@Lu#MIEb9CzzNX)@}YPKPrI~JA4yn(HLr0N~s*kNGpLyDG#>H-Mq65S_0t+}Hhxmm8_uyDI>b)GqrC`NtbX1l>gB z0>o;_siz93z&cdW=%%j%*S6=w;}c#HnNFS^z&nhfye;8ekDo-TQSWr`E@8IS;~V2H z@mNpAbno;ngu?;`>a*QLPvsTDg&0^5|^p))8b$zv+t~%3| zc93UJz~bFqb$kRV6&ka)KGw4dnTr(Sxt zH2Q+z=QBaa#sp}>Cnj|n)L>MEB)(l^>7+Uh_DB!#&M_=sEA?uv8=lqp%S#qOmVLsS zj{pxxE|0QdP9cM!P@|?5BCH#(W>Hs2eBx;Yk+&S%nkdM+k4M2AV^GOgyt%~89Vd5O zAG8$f`2k{K$nwg9+MikG748|2erjzCC1Mt-*39>yZ+k5FwqCbnpKBhoJ5Co@Uda0U z2h2XB!NeC|xGXbSPY__*XEtt;6U(xSv<{FP?tav5&nWTf#9ddRz3kZxwi}Q1{n22Z((}x5s#eY8S|l6Qzez9y4kD5IG?on{+Ai zs8~F&iNmEjH?1dxy692}+694V1hLsli{X@C^AsNKHuL(53m)teP49?Mw2F83Hlz?A zI?o@tEgjiK)Ebea3;OCCMDm2ppR#qG%L`2sJNLchZd%A2Y+%yl+@cH{SDut6J<(w_ zUl3;7G7?Ae_)W2GN*3*70>stlwAqvP(+~{aows% z*W0xgMUSbb>KofL4K=a^UriL~NIVXSOGT!19^3fLy0y&yi{%79J}Z=PD)II0d z6^tErUd{)8E-SC;z7dMEAAxe*EQ+499=8XQydKn_e9({-eMv3HgwEOSjP?7!svPgu zWiT}kS-Yitc?i8aBl3Keo^D2JpH!!IH*kA>^sLsdrM9KqpljiHEoHXvmGp1B_22R+ z04pz`w4~{62J>ttE7`QZSgy+tjB?Hl2n#&fd`?>171=f7z;gR~DbP!zEtKT+e87%& zw_(6TnQUoT^Gk8aLLOeM!}0(utQ-W@Yx z;(3lypOyTT=TX3|rxFm)N4^w4vyPgn{e1VHWT}ykqwhn6dHV7fp#wfB>%{Jp9*h)n z5N)e+mTV@3;=QxV^ksdkSWt3w4TvV)*YL*`&f2a{$_1GAWUkdU%B*1LUH5Kyo4Lm+ zdD~AKz4i*4&uBKTg}G`^K4tR`*^^E@I1N2-sIy*1`V9Tmi$NGYzV{JKdpJL2FkLRy zlb)>UKLWJUX`kltYauNo_?3Htk2*q!-0ydb^7-NiYVB;=C(sgVM38F%wffEAB625N zg@e*kLLz?(QzCL)LdMdc10+k36$#ASVwfM8(Kx%<0^s=AP4Jmb`vt#MOvbp=~T`HDmtwKD39zFq)9 zBO_E2IC?x=oMux|40&Gy>6>A;e%iNkZ97x62ZptQmF+hi_vj>|Er!e>*@93G7PXS* zW;q~=W{HQN_5p5L2?4k?Xg}}8K|rz=neeLwB(80V2KbgDzaT-JL^ISfz*=l^Ft%K1 z)P9H7c_H4DV~2*6_q5YHjh80uUSb~AhJru314Hb76c8uoRCQ4M~o9$48^i(kRKlIX&BMb& zF(-|3z3XeKFQmxOQF)uN=F^ph?XsjE7g>ZHeF^-c^i(8X zgg~jab;8wrR4Pwc8k6;}=U2ID-HvgAYfw<#b8TqZ$M2u7KI2jKYl@aEDR8*_vlPT( zI^W^C+%6A(zA$GMi|erUsqop)9j+v)!0Koo{js`Fvcl&ig4>ugwdSGIg%))*om2*^ z`&>GXhkGj!co7HPxOlWj{;GlX7JX<+$6mLUUw#D)jTujsL=PnPr!(SWlOI?)n)7-j zw+ID&kU!li3>urrp?p(dz5E5#AyZ0|yaH%u6J$7qo5Ol-O;C!r38r#?U;}vO= z1kZ%)ubqBAl3tw*W3}hk?e4sFYD(^$VV`x9idNXG$%*N>?n~fyxjYbLzPp_pJQF%KZHf?{V<$B-+eHl_@~CXMMOnA!&07lhi(l^uh>P3h&+m| zZe@N?-iR~QxJH3huM-m+DQ6cr5f?hKi(MwDirBNI=mXv1{;znZsYL(8wZGDM$k9D> zVgVP315AgCCThu1(Yme9M{EaOK7PZvYa(H@DfpG?0mw(2KPFy>KF#4(e{AVQ;wXpf zv-S0k<^*_kBDJ4odBdliKHJvHiksi_bc-WDoaqJpWYQCI=vXrx5m;Uihb7op%Ot(vlL^M?c$f#}=Or4(2olTpTa-sm5g9M3!55;OUxZbmxg{F{WkoP7s$%IzeWa7F&3Pl?lhn z;q+GwEgdAWT5wq!-!?bPGbpr+AIRW8QBu2GcPV~ZQEuJl!fSI}GGTr4lZmR@Dvf&V z>v+&i6`xVMuPTa&Zh_DyvL+1UT^ZbB((^%a4B;!Q*TbGO6V`Lr~bdQu2Dq1T=$$8SR z3Z?0%0Y~0|>}{e{HRtlBwrkR_HPnBrba^uv1}t|n{TcG$=}3p*6Pz|iudKhS)J2#N zs4&`nKsv(q_~G(Cca{$3Ba3I!%a;m+%j%J2f(3Xo%`jJtm=$fg+$y= zFzt4@@>ST5^VHf{9S~N&bvCON04)+ zCF1V+J1J+IA^I$Tdi3JKQBfQAI`tqs=%=DicNW_CK4NA*;-^HJ=|8KxI5Ntsz8=1E zPFh1=I*0SKM0)(PUiyWOOzT3IM~*<9BYn-Vd+`H-ouSI+rfImATTDU>&0slB2shD0 z#oH-@5~eqr5G3`*lqSkiv0udJ(Vvxl31d^D#-l%1)(l5F>p?37ekm(aRNhWO_XTt54VsAH=NC-AacISP10=y`R09mvKFC< zY?*{~FMK!~3G%7MZ<9J1K+XvyGKXz4pCm%=wye$ezf#t?y?F+|tZ)Wq%ldH*Lg^^-@2*FcD}YWcri{y9k3}h{ z_Tf3VTf1=_(_oHlg3OjP+)xPAs9W4OfVTn7Wp-v3Bm0ODC(O0^ywu=n!;xqXko4v= zZQ0*!PUcB89Z5b(TC{h#el&?%!JO2(-lslDVcuGI3_{}h1LD1Ju4-gj{eI4+vy~S{ zt)X%ULKW!k*x)yYBDh#;>5=<&tK%DbyBr>!TiU_;to7-xz}4UV8W|ibN?OCNKhNc{ z9pGQSK}1X8wib0?fA-PyK`C4(>>}~%T9;=z|BFCE!r`p*R9y%rU;&=p49v|Zq=Uk1XoBSKVB&~Zq(>|~Oqll@kY9zGS`-*!eH zuk;wf+FmGScM~1{y(n%vMb8|eIG!F?hr|-5mZ9D z1qKF?MnXyH1}Tvmdg$(u4nZkN>F$v3?(U%*_T~5f{r6sLzX2Yw;Jl8{k&)!G<&WD* zIo;0FGnco^GHs3cAM@lS`&)Dd8kpXJY%&gjte=fumU`Hbeyu^@>BZ?!VcLwYiUH<7 z&{d9Dn5{3_@WaiAgv)f|XUmlrx zC{S#=?e?6v{@pSYGa6oS|8RJ%;_z(w578&u3>*vy>>3`3Pa_kN%9p1~Dnu{43G}N| zv>NusaS%cg(zIUf;CZ8ulUxfGuFs#$^!GlhSMCTMMEBIi4J8drkIg!m>%h+6pPDe` z%TLotO|9w<+}l+G-|TN;m(5%S z*32v>@>fGkEk=uP|8vMNmf7O~9GZci%Q zx)N;PKv@*?r?awq_P^c@(ORCg-A`y{y}`>DCBf%qHpwC-AG2yS!@30Skm ze5u(z$n9dMJ<=~lc}Qm{T?Kr_c=d2yo-^uJ%5~MFI8SDSAGdk0># zz)?pmSAt3_9@g7zmTvcf#wXNZ(hQ$ZC#YG3UYIMJAN4rUh?(Kaa6NlaEjCT(v%Rd( zy~z2W;H7T58##DX!Tm1}=}>z~G2j(pMbqFdqo13ESm4ZbQAqX-;W`VXbdfO2>f? z&>4pXTe9J%{LGXZ#i2~m7UX$fef&zF>XGIh%(63J*hR;rvAyVh7|GjT!k6vyC%_Yy zm{7|RR|*rdLQ$`f%oWdREcKIYLtKOK?~B#&e#78t3l=Fz@HX7+{>(m2S)r=qT77isB}Te!&R~R0B-!_ATEnNkj1ams*C9O z)6xD~Urz0k27yy$V+^6#D%IfzVQB}m>(#bebTAvCH)$bxQu0`cZ!!zKF##L*Cl2EB zJTAzk#aw;a~ z|H+lqb3l)<5!mRXq=v5uyPkf8P1~}4aXq~(;+`y?hA)+B_Vu=N=X#~j*=9+d)xHn7 zwD~+f+=7a4&HG|qhOztgMk>-TB@{yIDI~WWhbNRwlH_Ce3{YEkEooNNO?_qk@Qzz* z+fuF4bpB=ekWuGgH8wg}K)d>&kubMq6rC2fNuc;qh8Cku4ngf)qBy zVYtRarnF_|?lXN*d~;hx2~0W|>xOGn(T;Ah>X@)BwGN>*xjQ@D736=#c)Hy0X?Kueygt zD77u^@!UN?A>ow7p}MD%1bnTQA+O)y_7ewu#WQS^4j~gOJ#ExhcW!-oyHevxf^NGC znN~1^a{MhS-|l#5l*r^~?r*q2>irdmhv}?IuW77UQ%wEzWatA$YTHTB+t#?GvZ~@h z&9J+;5wx315}NijjKsH<&|P@G@woC~YEMo755{tpEsJ3A@beYomOt|+$i|*8D^tO& zD2mM8krbvINn|BdA|AoNlw8s!IFjCd{P|6dS-r9LukU)yP`cnwm)`p3wUAiy*bwz> zBWK5MH{7df(fL%InwrofhMFN)A$^R#Ol9uczK8GDDCVlpYn3-B>B(YY2I*iL>3cov zQQ6gI#$v~yn%ax79H1o1Ldz_{^}N?GMbIXe8T&ODw$wPQTBNR9!|8T!4ORp5;3Fe0@vcnJ<2vz5w!0%9m&6K~M4Sz9xx4S|f;gG93A(xR@8s41sy zUS6x{WsARJ{&VC!_djgQdE)umu>z^>X)DJ3YOoq^dAQ z?F0x%@pFx}7&;bW~_ z#^v|0&GZ?hZNhrrUN=W;{Ku@S@I0xh%$=UQb`&@8l#D#DMkNa>wnsiS+lyTL=O#}P zUPhb*;l4$7VAHD@sJ*m1nKJ)CIsIIPEt^7N8_U8P9n6~20#9VsU!&_!<3RZ(6+zy8 zwLq&oGa&5gepRs`r(5d3y}R~P5ri91x2FEJ&v2r= zV{{}m2aJ}z8_X1vPPrf#!GnGUCI}05=8wq*Z%~3!xT~4ccwL$~17rBj&SmE6FEHNY zaYX@j()eTW$EJwWUMXJ)nf-vNmG0`!z9P_jY-u9g1%k zk+X$%>%YJH5lyJb!VG+6Wo{aqFx=%OOZ9`__9sXTg#y@v4?Ob%^UY;>N3RwbID#=K z?h`dg7Cwig3?DM&{+JEtY1h~>b#a}z?=8%ZGwp`4zX-*D!4+;clwpafio57>3thaE({xBwFY_bjSmy<+Jt} zjK0*?jT%GL14d|=OO>1MXx5)dAop4o79#A;gR39Ep!ia6VNNAEaF(QBdmAEAFIb^* z{n>jTrf$O||BIz?=cTAnhmez_q2JYb^f3UKW<{rlkhJ@S)?tF)IAca zM#=rNVZQ{&n5r+E8bHiNjQ2Q+URtqGx!?zmf%8LPRi#9LC?+}o`;`MFYEMAS*{x_v` zl1rpOYH63Q=Ef#xME*VP{tT->|LeYB+p-NVSemPXZ zBa<}U8jA2XC7yez{Kr>(W5Lz=kOH!QLXW3!CR2p0Pe!Ii4fg@^-`L5S5Zv|K+x5$WfJ%CWAt~EwS_js`c zm-kgeD?hI!5Hszb*{?_NajFAT%eJ`?N#)Y^yJN~O{W7wapZaWkV~?P9+%1Nf0pU?W z0|X)cUCpxXXx@hxK%hD55J=F_%?aR>%D5dt587K8d; zP;?+ZrR?rl9{=1QWXBY-LT+<~8q_Tc@r?I?j{L<0(7I#mo$TR-5>B?GNiEexUEqj)wQ2{)N&>hg?bU9%nwYm^gNlR1|Z*mz_ae# zHqRW;ned|O%Wh*P1=_D*;0r(iE872DwNK6qC^HftUQBY2Ezup$YAe26Dwo*F%k=s| zvzV5l?F+FrA_9|pPLYj*4{-=_30Pd0{^F@inkilM^2}S`RoggD81<_Lp>8j`UF@CN z{g`1!IbH_37@SDIv2a~Myfb>ng7Gj(Okp3|i;>JifUyV_G{jBugLODLeYkm)= zww%pq23);W*>&l!P3QsMtrCyGzEx`ERZl)F(P2NANdkkw^%U2#(1-!05?WUBR1-#Q zRyK+1c0X-g*x$g0lxy|td+eK^#mnB(!vex_2=%d^cym4O7bc(S`LS?07Eqt~p?Kg*aDjN(YW&}y*7D}FGm zA}&(Kkp1k6sTjK<2|QslRWQJU0@tIkOcy(|8nIvS&)2%nq~^3uMpjn4AILo%%!YG4@2+Z9 zeLRIcNEQ3yUUQ#rbFRP$P@1hj^K6?DT~28x6Eh8 z34jWU1{S=ezI!z@et2QH)QFZ3%(dcE)>QM57Q<6zk<;hot^0(=GnI@myJH zYC(UB+0}HRu?|rxoTajU@naE1PNx7+WKbaP=@L%1jzcy z#`+Ys)zQ4*GD45LBkC73rBRzdq!;?dJpm4*<9{56_WzH=5V<{g{KsJsvXX)b zuQ~ltX++(6K|P|GjVo?meHTgv3{VAh3FKE)Z`G+VWRk?z)0AvL|Wq=UvArhFIpzH2PYVA#V3TPT~AsWHj%5yl!o*$X%H!#79BH^ANC`E$i z%{mwbb>uB5Q#z7Lr3a{NBn~EBa+R+j(+2sho{(`NRNT+D2LH{Yy~eP~ZUi!ujW5`W zW7+GOSMM1N+x_K~D#NNl%;*Grba+kcWkE?x`M@_H$4(LPl{QL_U$C}v1HNxeO-**>>aN-I%g6w)j%07rp#dmX(P}$TiNd zFkDdIf!HWEp*1>9^2Hg0=jJKNm&L+lBfQDFfUn**uLXm|zyH?K&uf05rX9_t*4LiL z$)o@fF5KYH^eEt71PW!&Bwv@l6ec${Ii?c_yo9VDGImf;=|#ELKW@0<_(9lyvi^6Q zd&1M9Ot7}ZXp(4^zxP-V|DEQ=f2aA_64_Te{5DO`lQ$w?>Ct!N0SPs=iNIYO*N74E z>SNUxr^3(0DKR$a%oy$qojlO8`6R^Z%i8DewSB3>bpqpif?V?h9JcTnC6SV~kM#fV zs@^W5`!_p3R*IM0b%`%fB5Om8X)0g{u z=JYf&+_*lo`zencToqC{{rkhs>DDJT0H>mWRRSSNh9xiU*WRD6v`N%=o(`Ro?r3|@ ztl1-#;M0U+g9Vh18t*gWGxgR2{$dezuD@HKcRU$$74$rPb-38nPXzk(TT?bP6WqMF z9!XonRO)7wLzWRG{aP*|Hg_1YdQ7m}5V6mCLuW-PZy3{yyLFLC@9vDeb3x=jdH&vk zJYJ2X#rR-ht#1E5AT5i%z~OvE1pVk8Ram9PY-00brEKUuz7iG00Q9ZF>G*?uBI}>u zcjGES{EEb+UJae=){@WdsVA=7N_wIk@e&sX0e&~bvm_fJ(kun{^>2@PmRUgpNRN) zQuBFa^R0#j0^IXQ;_vJ5E@K#CWEEZ6Yoyi7Hp_9f3fqcnM?lmsN=*Im{75L8h`~{q zn}9~RH`4BRk0KyDsMqhgZe2|o=tX6tqOL<=ul&+iU(P51ThnPGxA}!J#eRQcESohR z+Z87Q@hCvwze!O;*7o&mnx*U`F4nrp&9W-kxB)JwiCV~3d!@z5C+S**Rd5!*^7NOn z^3D=?*Ft4j;0ZR~+=SPonOB)GlxiaHm^jTiOAI9FiZ!_3fR_ z?fx(Z_5xsU&;t54(vil9!M{MJ0IHJ{}-Rv|OClUw~ve25!{!<-mHgqSzS| zMd*QubO-v;H%YEvC{;+{w-zFPrvhB_W&)Wj-5H>1@_61lZi7Sr6ZI0~;hB)ED9{7~49TCM zm1;JYn5+@NfJ|*y9Qv{H{q>33odwR#7(+QmYhPIx&zw;R#tv}ypWA9CLvmaSS zbhmv<&BV=W5G`>ur4#2?X4v!0jIhD&8B|~Cx(@(MH1&ynz4X(JwYaU)r?M`QjNf4G zV=t{i8bU5u78$r-*NmJ#H9qlHFD|A_b*0DiJ~$^U!;YIOCTe%{==1_?v};TVj#_Fr zWZU&QbQwW1C%`t$fDA12+2ZSow$W_C8jF5??GU}}k)O)h?Gw#84qP|Sl=h|yS5Z%7 z=hauoL8z%Vdfk*BpNZ8-<#>w^P2p{?5d4_V}uNz@lbpd zL90Z`@A;M)o$A(M8KLN>~B68?Nmj(RB1MQ z6vSK-pI{Ik4D+tbHs4S!nnUIN>W2sn&4i?#7VgPFZDy&i6-}icl!OQU;?8ydA?QbM zEr3VqeATMysL3($Fa_|EN}0KifGnx^=ytUyRb#vSZU5Ji*gm@7JydoU3#FG5On&jX z+SUptt4gBa4f^~-)GU6pdPdOdS9mhky!Ac}*2OoNUZd+7KZ>;xy82FIuttGSLTEWX zk`k(&xiLjjfX(m0T)!YX6wJtB?W7vZyW~~gK^EHGyO!m=o3pJE&Lu@120J+0ne)0E zq)4C<45(4X+A6Bn>u*?MDYBdK2fkeFEkyu*+XH0xT@H1#-vnH03H7LRwVRDdmXIMF z!_ma+MRz()m1cM8qe(fL6_7*70XHE%*wUoC@J3Df1iyawe16LFs|bS_01rB z+UzLa=nOr}!9tRFCJFR}i^c9i)j|_{k|Ms|r&L*d z(E4Md_H%n!dL&<^Lq}lHV7txlZXA#t>k*o-S4`{ep2PyDP2k+_&c|z833DELb5&>e zgL%e1U-fdI3cuiIM6W)ZRk%%RzcXxUh77PUQ}Z94@gy|~;tZw>_3z_*%_P6InL7_% zJdo5ffnf*XQWrGZGhn0ul#pmwOuL6DAyL7)b^cYb5}di z8(>i-Y7Nch(F6)JOrxpazv^FoIfFR-pTa10r~d4W_t}7Zf||Q?hK-MR1KN)}{xs1` zVt`dC6P>DXwf>)^Vqk~_L-RWC@kDEX*rs(cU=S`x24jESl4ozq1*L*Y==4F+pkQQ> z`*)2zltLcLv<$Z5pu(Wp4=aC_=4UsKibCt5OjyQwx*Gm7G z6?RY8SDhHy0rJg`qfA`es~n{sPitWTa*Fvwtp0aaZ+8KMNMFk2qy)!N`DCFN;|Djq zOmr}8!M`A$CE1vgm9fU;#0V(#iP3=kEIWpa3D+GaDN8LSvZh+z&kX<63;>GG*pOL> zvdO=XDJTF(e7NtU9`5Vus2vKKoncRQ;%VO<@*SrTH>Kx<)|Z5hMmJnlleyF zRaPJx)8KM6I`Z;CSCigh`Rqb?in0uXx<6I6{?DH=LyFmqw$TxRhlyWe;{-(jMu3Sl z(+pvJyT?>MyWE7gI^HfnE|g$a=j{R1+BqmXD(8nTxY4ByP=pn3vnAloPa!y)M7dX| z9G>UPG=o==G~Jl7ZK@DBSUtv4q~%UqjTW++_QgvMSA?S|Pqx_E@n@-kS6==xfaljc z?jHhKcmWMzAvx;Gu%ik5ubUw4s8px>(D}*#ws!yEl2$)V=2763e%-e z%*Z+!+)*73Sv%py+XS>Ttg>$O=6b#ieNfB(ZoG53(2c@5-N(n67N`op(#UZlquBFq zA+KTM)H8kuA(yJyOGhY2BMHva^8JU6L7kgL$1qxUk;BER54uqx;VANxcSyt-q0Hdx zljVC)-|gSf+XcQg=E0Xcue{~+PgR^qYLMFg;$CKw8zJd~x!>|S9al3>L-Yd!oYW#6 zq6wlmQYQ6Zevuu9T(PK?>S-q=lUXLU*#J7+Yx($Ja9i9ZL%i=DX;)xo@QWf#zyLnpQPF!vyq95C4nqft*qNZ;<56=( zd)-`^=g5JF&$BzEC~)HV&V|*w-MMA}o%c4SY>dqJQ2j|FTD1O~_v*pfNuWIAs(x+GtQL5%?>lXwIr&_BcwV}+lrln6-w1gY)Bb*7{%5gfxwubx z8&3Ppzu^(&3vrRwZUOM8+Xx3|AdZ;AZ61kyE~mSb;M0Uh%%VAxPmo-1)Ash;*CBh` zmpCt5S4OmNBkIk3St{ub9AKXnhg(IgrQbNim&Cb-z@My9DB5$r=R}I|`4cpbP&@?U z=Ds9&340H0M0vs$5OnV|2w|H#^!P-xMAE@ry+Eijry^p8jHY~DI5QXtfsf#a*;KF+ z%HcGo2;xPMAEJ_qY@FzPub~KM1}G5j8Xs9|Z(&sVri`Y$0{7zFrhrE{`vcl|05-hR zV3f#7DaNbPrfVsj{fVv*-l3eX&A3)t{C-z?$Cp5l+iW=DjjCpiI$k>lul_j?s7>?c zY0j#YqVS>C>21VRH8u}63ms5`l9ww>O;fLI&6`2b&BmJYtsnccHXa>-xUiM1?D!Be zW30o4W?k2}(T-W!|ED{eXHCD}gM5H~)yyl2Kkln;8Zi}WQHok4xM=RB{ht?0eDcc_ z$9p9Btz{fKz(=C$9fgv-wJl3~=a5qG=%ze%iKeB&kL*@%IHlDA$NZGEUUZ zBfs|%6(G6R;V799#nc@GZ?#^{xf0|5e+g0p1J!u{fSq)ZANnGrI&Vt!-sPA^ak2i2 zt8qjN$@fJ2w?^h;xAX805(uEI%l z79?~5bkiARx0{OB!g;PCVj2ZEx;gN$AWj4f8FhdDkKgf|VF{q}BAt$IsNkl#24GG= z7Ri5vfTENS$y`Xk(b14;xR@a!AjZYCCK7D1M{8T!CG1Rd0yluW&;F|>@^R^KYsl#; zD=!o0EdsAmz1wT8TxBkr5XuRAMw}w(L_`g7(~Kp?ohNVqK^}6n@4X)A6fZdWR&GD+ zyI1zd#+-OXOIH!^jyW}E9EVC0(d*zs)f+gM=|KLLcg^vP*#4Ave@02^-sHj>tww=3 z3dT9-X{k<=6fO6PUTMv@S+a9C^&a2lV$q|lz=+fr_e1~|k4WLkS!!|{D=PR@Xati^ z6R}DxDaYHF|Vu#53B{G3y^e3OgI9 zPyOq^1ht?m7x7VBoF&rsqNBczoAEXZ{vRNpu%0Pn^1d4=2pw30-JMgu4ZQGylmSD2`1~2l(iM_3NqeMek%lWem*4Qghi2ouDK>*< zw~H6$XBonBxi`u8OYWQb9q^v)R~x$tEb|J2Ft;o{(Y zH!t$;-DwonyMxiyzHwyKmJ4+epa7@2qE{|1v`q0lSp$(_BV>w zChip)09moM-=W+}M|7}%Vu|GYoIy^Z=?<5}%>o^DU$6b4j^O%#SDUy?+^Ll42GG=3 zT1)~JN!@M!{(Yba9r3JJNv z$9Nx5Br2H5K(-Ghi3qGfc-rR2rO-@=cicwCu9pjRsWC>?AyR-PNYi?U?MURR!q8u$ zAfxu@cBdPCB+5KadlT_V`sdj=bL}PxAi-xC0;Gq1NcJO0H<%YF7p6w!w?5HnH^!niXgI6V}4p%{#zu8fQ)f~f6V|5Q*RvT)PE(sSZ3 z4#7P0gooJK3VU$<^Y=REl9_E7o+Ic7a6(kC=ou1E_)ssIkSqn@rCCR+j;ptj-Uwlf z4rv6$hH>EY*RdqyB|-YU-;)i$%S+^?JFfH0c#Pds)j#`JE6(2qv=TizQJaMkn}&9% z_7w)Xc_IZSnEZLhH_jL~Z_KDMkR6;#>BZ=}D|9n{k%(G~4-j-}ABGJqSP#90WCI&boWwITu6r7}3fz zLL|>f+4wS?dNVm}SqzOn=kMl!=Z&>y~nrrFsH{%fRJ&7x_gKCjV z7&Zwbi?#4}L#E@g1*646EyXbVHV3eboQ34MQ0)?URR_FY+!vaukO$zHFc})i7yq9u zi*bt}+v7Q2h7oQ;M0ECboXlk5r0dd!O+ven#dpN;0%cDZuaM*VPZRE-x|* z_W{Gz{IiMpP{s=U(?~~d^AEf;ex{TX2E_EEt`FM=J>>dqS{4?<=1byxqFzWlx=w{NqzKsJCNOI)FnV>KeFP}E*Lakiq3tvDj>RdHsuWvs zIApA1HOEVwu-89OeCJP&rzQ$MmYYK`Rt|-Qw3u|KkSd>hEppq(nX%@7Z4;l!@}gFw z!i6G$|HT5!KZ(LE$Jn-a(PHDC?oJwqAVR9PmffY&^Kv}Q#OKCAobAZXP&0M8J-si3 z1{%~nU(}{Q(PUr&!Vgy{3R=yk8Q27Z{vI1>R1Y80501{A-VVBtGDh+M1;)nh9*6MmdqNR-Z`o0D*lV};-3*^y1 zPghzqgOIYO^el_l$aA8dDooGg6B91PiuEN^q){>A0r>SOGriTe$%{%4G!f+zfdVJ? z?~()}Br4fM3@3SLm~5ZVsc)~1d;WZIJmJ6h>Nopb?Gd7XP>*5W!p!l!Q9H=VV*8Tq zng^+^t(j)<^hdu$89S6C4dqqvHs~r-l(tO~9Ow$MX+E0tS>pc5c?O}=ykd^A3#vwY zlqU@yTK@Vtr}odl_xO2W>kr`bXsrfm1HlxE)|(Z+hl}1_Qv&eF??$&K*Rw6vEGUv( z3|#>7FKk94kUUbOkvNX!eXk@{Ht6Lu3@8pr0mO+G&eZw=WP>CEaXZgP#-M%o&3_1J zGUa!@qwcMsn?U2ZFnx?k4rS;aJC(kF-#A9psu(XuMg>MPNB&(vy8}pRktnFTF$}GW zy}m~y7!TNGt(O;OBg(1Z!g@8A0_jIP7~8 zk-bt>rv|jW#Bf&eT{1@>e&cxa{=|s2o~Y8f*s~y1M`@A06}V1sr`!Sn;0td@(EPu_ z1W_3^O=Op|Xz`;r3Q(V7xcY)3qeFP`cl;#A=p@7+?t?$H7Ja8@Jq*?z4V&%wpmkjW(cL38>wIP#+sj57mRPcs2E#yshy zp`Ke?Pwi7kLkf(6-@dq^Pv0l=QzL@yzq@$*)8nd3pBb4q2snmVLol(=RaXCAdHv^^J08=g z=CrnQ!$m;=aK*+XV!kn*N4%RVaF{V~ly9ei^s_#SZ4wly+w=qpGg*(7i7c8CPzp+# z>FixVx+FN+MiVIv_N*GW2U9Zz16(*K8at0espo1q$F_n+FB!GgbiYiEk-%naT-YzG z?R!~^3OBS(;45L!Y(n{|;XrnM)?}_}$s+|V%c-sUxpI2mmI*{enqyeRr zpE_=Rowx`fJAj<2cG#S6cpKIWCvY9>4CvSN8dBm7lyIv;E6Gs=r}aYM#eS5FVE! z1qoHZ+YgKk$L;9tq{E_*wGn|)_X~&_D|A$p0c>dCfP?F)APe4(feB3OpcCHP(@0N^ z8Acn#CG)YGs%NUp)9q^IW}@Na;_Gv?C7?Htpx}MT$3lD%xT)UdHEie4kITdKdGb=d z3to0QXSQxURAG}uT{RTcqDL;wuKCN}ty@9zzP}P>$+Ww*2d=e1E&|d_fK{ZB3w z>`Akw7kWHrUBNLio8pj-B`eTi-(H{-UgY_{ zi@DpUcXhnrG=%5o?$vp_%*|sop_y}NqwSsbT3(mhpFBW`j`z#dFXWr_upOu4?)Ld6 zX?-^)%Z~{Q&1ce|8^1QYy}_dq*3rz@7`7D-a_yJ73*EVFayt}cx8B>VlEcp^{gTTX zoXla!l0K0pi3*%<8*)srz!di|5$X4S$Wl@Jul7Ts8QaEGKN4PmhH2Rp-1d5-NF^=$ z;J6T~wh3W{mbt2og;RXmb#X4-AkZ}(zPlB;RP8yyntPt9CbG4ALH)dSeLsCnE;1vp z#g1N-kl^19H)aOce%d~YKTXhxpS9&^&e30{smEGe`|pBZpo`m=j&07{&m%vP;Bvn-VJ-K@lAqsg5q8KRxF{tigMXKt4$7Qg^#>TKoDQF2r(x%f4IQy(D}gv;t6HxV#2*~eu{Fka0EVjj z%lIkHMT@+oy#!_B!M3|@#I(gnsQL7Prt{M^mO~?402h}du{R5bC~U~;hyw`u-gv2NJK-l6@KBJ z2HsRB(nUCAL(=tF-ahtjD38d#+~!^p=(yj!@^@b>g(zl<{0s|KT>#X%e2~nHqEj#8 z8C(5u2JiSbsVy6cLWpvjHqUBlv7?iCwmhIt?+m8dy%=0+#xDEKA5HOw$^CrKW}#H& zogkD?~HM*nTGm^isa2DBzGh z)vy?VO&=zosO+7M(nJ2DSmYkVeHuSWlKO4Wlp8lbnDEoH#)wG-s&t1(5L%C|2w|WSG0gl&wxd6NIw6r z7=;*>@V8$oi0JTK%N=d0gWC-gCDpufSvw4M6FYdRP^ zNyr=7#FSxH8Y+m^&sRT?-zniq9c&x1H=Q?>p0;hgc#AhtpMH02345kjX%Lx$^LUrp zVmVo82UzC$M%f#ch1@T#RaHXhV z3vDzm`%^BSdEdGG&7g?Cr5fYk^;vSM>;&M3f3^m4eEfn6nVZ}%IlNLm|2R`u9TJkN zLJ`wNE!ky?LtSUAQaT|IZZ47XPFRn9?q{+n(=c_&rf0Mm)^V%79_7*FG%- z_~4JTt?*-pi$0AnCI6Z(^xKB=>FVNVZ(0Ydcy5oyB9tccJ)52B!0^ocr6R{0=Y{H4 z?=Iy3Z#X7F#R9O<12F+c?G@U~F~rK!hfAV+%|7I>n-|mid18L(D!1pmBnHF=vKz*+ z7zR>=a3o+gMqSVTUhx7qkhUh*YqqPA3>Lt!SC=1yl9K|CaCHnQ>5p;2o1-pLp zZ{hQ!<(5oHEp_%=jS@yeuDv8I8xU0*hoQLJ?$7Ux5`nm=cFh*@(?}O18E_d%e3Q@& zGMEq>%)TpBatG6{&(&3kvzO#6!}UuP%NdDVAK@8fO9+7ncqhntL^{6Xjg0*K3X*^t zd?Fg-<_G(pkFth!02fs(ftHa~LZ3HaDjp*huS8Pj7>e6LqM_oS`O^#N5}4IwaBgUu zR)tBnFB;B^3}4CSO1!35k;=EM_K*nSw_{H60*({=bdg+@3^wAVr}ki_TYnFPkw>eS zfC2lA(kaq6DatXLZXRgMir<3@hBMqc{-x5KIMu7eACX!NOwyQTH#N&%MXkr}WN)V& zh=ufwY(+^mtpTc;3#T=1tupz~BrHMdXrWHpliXl)u;G<3)7qoc`OLEdGyg~A^>A#e z6EZ@bVyO?eJM;{MHh()qggG1Z$Vdum0k5<&a&-5HB)PuWCX=10yMdjU4$JK!YN?EO zzGqj!C}*RAX;EFUxV`OYa7*!Rr2ps<{K65@D)vG!UlihY086Aqqmqpkz;bsu>=x2k_>nDfyD#~La!U($g%lZ7 zMd~LzNE~W~Wb=Nj{+x^u7kt#to>=erQ~Uc@;48xLy%G8x;&%Cy%1-c2(9fd@*|-^r z)HCDM4FihP!{=gpoE>mS>1=D5Y*4SuPw|taV(G_8e5V#CxG%^zTWi|Z6Lx3YV>j3r$1rlTE={qMt}1xI7XjLW zdrxgVSh#tPi?S?sjTg=DtrE48k1D*pT&v=l^3E#bbu>F-Y9;Gc&>#=X7IJZB6F}Ge z0lW?<0F0u%Rg>(&^8;RYFKi1O96Cn!!>L%{H!ys3_{CkT&raCmAv zp31v*bTFmn$M4dnlIvn;Ys7`(uDs!cCuJkBli=qHX@9K8_-Z%37)0wOevA;N1U%s@ zatVdXeX&EXP}t|79w()ZeLd=4$oPU3$>ebqY52RNgRLbivwn9jR7&rVuD^nV?oe%A&xah3@DK)7~u;h zXqAC;iRIISH8x)2ue^mOVIuKK>cTBmB54Z+l#ltBNO3@h*8L896HIz1)!*sp=!nZc zwk8$EFnEJ8PDe0cXM?@Z9d})g2p=T0k!Spmkk{U8=Lc1gyj9PC1l@usXy+_$ zvZ>L2C~h72BUs;JFo;sY_;%eg9nPR6xu+P6I&f!p-VEW*H>l`_A1drO(LILg7=Q1G zs|p5&SfD&IE;lp&+ulhpb7gJqS9Ua64FAf8&A&qb7l^AwW#7CcJXH4B( z)FLGKJMF>vP3OWBG2dd|dP<8p#y1C36aI(-cp=bbcbzy5bd<3NCPuMFZnri`VY8z1 zx2~%Wh4wU8cNA-z<~~+RXDk{=oI2bT?u&%4j@o400v+xOk$!S--)1a(n)_#0C`)Mu zDkthOy9IQst(j~V%Gzeysh>w{>GjKjHrpixx?NB%<9*ed1p|lxYqcw6!@M(%f_n$( zVLYdF2VL*?d8(g}G9bMkW?TL8m&T>+CqI&97*x4qr6#<%lNj$Ub~|0ccPt z%(u>*PEWg|#YLOdhHWA>W$7)|T=VaU>m3iLQon6Fd-Nt;0;Y5&huH=|sKB_^x`;mA zW|!%bOWd%Ts=x}9C)WjX22XDklZT+U*KCJCS%`1b1IsUC=_rJ!X!TqL*^+PU8_&^z zLr#!Wl+@4~Wxe@{+V-;lA}Q~?QfU60pt4mY%a*el(3p;q>6Hm7}6B51i-rW#hK`4GYQED8 zjW=iiAG+Q;D$2KO`yYDfL8Nm40YOSiVrZm62|>CPkdhucm2QxbZjhGl4y8l7TRMm4 zz4+eu{XWnAi?#d*fmy6|UFSacKK8Lcd%Q3UGTV>}oyx!oE|4w4@(a^xRa_YALCuqRm+$t^t!4`C<0EyO z!u|0P(I1t_wUN>N1Z+nBBvueIcgI?vMWF+GfTRC-+*Ma{q%}@n)qnT(Zq*unz!2l? z>FX-wUO4d9>f&-8L%j|@^pCu^zjgKirrp(>wB(Akaw`S5ekXX#VQ)2}==`h9u<~kI zhIwx<`!>BATbVpMpZYnzl!Az@p(>obsyO|(A4{ig_7OV5h}2Wei)&-^Ps!mZ_W5}A zS)u(#q`Fyofy>{x;E5bN?qBy`P3*q;1hg;XLWwz>?5>sz9D%uO^NIIRvF_SUsnPppn_g+3&*Qe9c5T?Hq z7HvY=o4l1<18H(M7l)4_TOW3zn=-dj4;MWIf%1uvarS>1{#~M(pPKRP8NBFjE)jLV zt4?0xcOPA-_2w8%f_3JGi>2)}-~x!%H8LT`X=amokJb;|Y-?Yji`!Rq!{Zt4ow(nC zxuTQRHG$XK7^L*4K;e6tWdDa5@stZ?RBS%GFBnvIDsLF*WC;w>m zr%BW03(_-k6!-7kZJOr7{^6bi%<$b358;Si1tQ>5WAPTUXMZ|=9v+r&dwTwqnRZ%N zv}h{({@|b=1)it)zTm@ez8)=HURu&dk1SZ`X&xFpCXy5T(9zc+%yy6V^nj|5dx^U@ zoN6#M<93OcXBxq>Z5j()9?Ac6d01wT9+W*^9$xQ#R^mfcycF+F5u$rkmkldaq+rOxWNlUDBoY(tt`8C7Im8ey{2Wa5lhR-we6F9FeF*`B{7k^rriJ`I&^r3o?D@bm4kdo zIZ{ZbsWS{FfGPia?}wIOl8s|{zoOCcO`!(YVGfZzvW`;Bb`vPRhs*8$IP9|)DUA!jIZXS;V#2OO}k?q^v99px_U?a76VvX<7`<@2|ZQM32Yc3zGw8+D@m zAzAJsPUovRv%;ze?0^MZ9BA&kJx|3ruWkQy8+->r=Bn&q{;-i7eZ@L<6eQHC^%9^U zyc*dqlR9&mpTsdtwZ6Wj^GNH!Ys$55@Av{Ex*0&AgL`DI>`JPN ze|kqD@`-(upW+`dkTrg}KmyAo^S8hC$vEcYa+$JM*h%1pP3#zQk zQAW#a8^A-D9rGMn#Z*gCD>0QRNJc0XZBA1qov(XvE*etj>H;* zBp`2hrRrLBU9p|=?o8t};F?I`wuPS8!ppcxZ^ImSp1#KYJH=JE#D352ZqQUT))|PC zB~Qd!aLKkPUGXN`*u#tJv|Sa6I$&!kO4N zj3eRn_Kd#h{Tg9>8lMRK+%{yzZGqI@>iM}5?Vpvlxa6ea#)vIAV8+!rR zYG};+$E3$nrbe1T;4PbFpO#w~qO>T`{zVXn>bf7>kS2gr~|j%{Q;g>{#*J8kjy+)!Qyn0|q+AEOer+ z3ffTOXWUN#`Wi4TZ}9&+EpLduow3{-Njl;Fz%Pkj<442&5vS~EphUzCtBx27WbbRX}5g~@qM5TN6FAa>cQez2PKAFKrt|W zjWDi)EIT?`glF^{ox=gbog%OwOvBK~M<6p@c}&D7<7fgqGT{gm)`IP$S>}H+N)7fD zBg5+>;AH+qkl;3gi`pC=*aF4(6nlu`o5S1}%i9j*!HepXq;xF^y?|ul#?ePKnB&_8 zEEAt+$VE0g8Lzh=`S4{#)mBL#bJ%r)noRgQVaJY=+4HO~t_9vxQ$0BHjDx zO5%$7i5&UOul(e6dU$f@@nx~8H_N7ch|ToMlJ!XYEvJ*O2fV!d-KXOpz>d>Y~ zQVB-_``da170ur$l2jkVDw3dTm{)?Af#0nxUmBMWgSgNr%t076d zF9ERIshvTF1-IGBjfz$F@Mhnzhjq2>)NIN=_d>Z5A&X8+lZr@_2`-IbK!;!;1wzKw z#}M3wQIY0(eX`oBD6-+dg22EpB(d#=-%A9j+#7~bSeQ<1SH!V6eUj7u3ik>pgT+M9 ztg$ucD|5r|TLJCli1oRu>JpMaGO~q1vs(!8?LW1cJr$UWD2?kRH@YwuW8>k9r>&afuo zE%T!I{g^C}{(|+2!~xhKGxlRo2n?vla)qq1e_{pLQZ{ z=WNsCNo$)LT@LH43aJ47T`BQ7iW`#oyUGc7cG@@PcO9+1U-5PhRXYtJuSc_N$9ris z`TyF?G$nm2(=yIEm*uq1{v9qa95>Y-!a<@|GetEQAlWM!7&+2A`RTQ^UbfAK{NWEO zdWLN0g|AhiRIxraR`YF#4gDHTnoCGkpGS>}_#coAT{+w*x({VW#Q&5P$%z8QMar3< z;|U7HMQ$LfHN>dG-X7{<4jL9owoWaT!x_Mn89Lvj;7M&_a-`e z<8LIYrB*+5X<-8dlCV+tRv~AZZWr2PQ_FYM%kA&E^N#t4F*fPqd+5Hm2;*b6l|g(O zm{jhiTJ>5BiBTBVZjdMmGI2qN4POK6LE3|h?9xLhHr#$&NaCRPjDN(u`gE$ysrRQV zsNRzN9sQYpoYL->8cxybbV`@PZteDI>Yh8ntrj3dg>DW1XyU<%;B&8~5%J*EYj7OR zjSv_GI-^b;W2XQor8vcY+x5O@ty1$Gz=`c+kL)G=%-WyCJ8)v>M?9%MNJe#vz)bUgpz7LIvI7T>XPQ{cghi%v^w9q(29LId7P zmxYd`<;}i|b9av6?Y?1KyfbFT*|Q^f@%aI0T%ng~pk96f=!6uF&^~sOcTJm35mcR+ z%Ee+JW)rUYHE+r#v*u>CJlf0q)n8Ea`JrZIMN=BoR6-zOf zF{MRq-G*i;g5vGO%7Dyrw#J&`!KXQ=Z9wo9q5WH$eekPSnx(qHvQ5TL%NLhW1Niy% z7`qipIrA7Z2^Exr&sYCQHWXqFMa@hfGds#+A1mg!Q9P`lj(WmCG*OBn!uPYJ-4p1V6W^OGl?=8PoGNg-fRfltc~1~|w&Q>IH;~B9xWH(76#znCEI0BW6W_3VzgLG@7rtfJj`f`i5O0#u zK%a}=H?n^i{QH+o+!liP4s0km&3UkDa9KK@ANf7lR(I1Xnt|@r?Cpbz3ek;*?N1cy zsMx;Dcvae8Q=&b#!P-iG{vSy{=R2>(9=eDG&@p4XGFfayUz+HTy-IdEUo?Cw5{!x^ zCIReva!VSPq+;n?Hmk2q+Q_2mjikxnQDYqMRv;PZw%IXE(E*=Ny|ckGCu<**mNKra zQ=2-7gX!=kxEIBBp03Zo|2a|5qTj?&ZN7oZ_8JXxi)nj!b} zb02^7y$Q&uk+F-Yq zhk`Sp!|vy@62pPZr{ZJC9p$yO=(%?{bd}i$$&jA^m)E^w4H44wQA)_^pr;Xgop%iQ z#N2q+Z)6rfWUX4ZJi8Esh+lE}aE_y!5{w4>E3CmZUVqrHy~_2V0VnG1Q&N2$L6;rz zdV8T-|3C5V4s}kUn~vwRM6fxaU;jv12H0FQ#8LSCT$r4jfwONv{!>`^kwI{5-Ehfy zoOk_azNZ4o^UfH~MZq$v``O{t_tK@hO-8ks1B-RP@usb3asr;2N=v_~Kp+|mOa6jh*Ngi2E zwQ9PctQ&*J>Dnv98DGkezR|;QaIK9&o^9OeR%jh@|M0uf^>29bi4q0B{jsTDp02bY z0#z0Kdua16whq#GjEptHNuLG4sHqIiPumZ3dDyPE~HkQh) zOr@D<>zqsePCP5lMgkve((c(|NELv;q$}w@< zdY>x5rkzlhuY`AePf_~bk{!=lvITwY0uLJk!_SFxq%R$F><*W@7eA96rNoE^TvY>O zrR2p&Dc9VG_nH-B7_ZYr2~zSSW=o|3R&Vx26JY|spz|s-l?dDi&^>ae{m{T)%K<6A zypws$h0xaR7MNYYPMithpWB@&+{tGP%L1lzj@f_QgjdyqHy@mae@>5DJ$M1Vl_{ZI z{(5Ha*3Q@H5FCN_2I>_8*S3tz0Ir>25PSNo1Bbt3)%nK3cw(I7ZimhG>Mgf7X%jCU z>z(%nb4?YI`u@TeJ%%ayoq1A49ORF0ZkV1TSiI+IuB@gLOMi>U8|*0u7Lsl48@4pSpmSq)KrVnIU4444`8cs|TXI4GP? zd}NTMvqZcuS3r?I)JU7slpuNNg!jE`Z?9n=F?F0|M3fI>Cdaxw`8ir$uy7RyGR+B8 z5|nd<8Y%*M3JgPk6!g62S=b~xSCz5FJ_q4>Mb{n!t1oMrdENK@(vS!IJRq}>&pZ-4 zWj@*MD9vy1`H)G8oRCC_gy?821ll4|1cUV6%3KGE_|sy$%62k>qtP>|@uE_&<45j; zsNP~&OUh7=5&wD?4)HK2+yV=4-<0i*8UxdzzkZaQQ7z9HcZVGRovxB+Pkst~xVER6NWY#dd z-=kSea~Y^4#(93J4Ia3AC&E86nstS3K4Wi*FGx+{jfM7TxLJgAGvzAA%exwQ-y%#d z=0E!?AG?Brp6F{K=O#MZ_?KY&*za(E%-OSDH*7Pcb89m^0{{`@z)Ax84?o1LDVRmi zJn4JC$0cj-N*XB6-<|v(cD&dl^OC`|Qe_>f$?l*Du_Gh$^m|VNYNM>%RL##yh;32y zKEfm48cxm5ZUXcri_>*}qur8L0VLmev|6EbY14NBM4qx<^V!;~b#UY>%}2ds%C27nT50 z(3$9O%lFT@54DgRh7Svw;48f}xsV z#;m&jnGcE^V(;&m6P56%3t*`Au#+2oSsh?Y_hkkW65i~GE*;1WWuKno`%>}rMdahQ$T05QNhwjs!8T)m< zzjUj(koiU#m!kKb^~_|q(_9w!$Xnk|fZAcP{K8Vcf5?=R|BEX?uZhcPeIUXA^!8C+ zHy$Wdp7l0vLUThu3@u(!0sj(23YgR_%WHv8l%#u{D`lmdls zv-8l0r@6A@NKEKEWpb8$+R-y8x8Mu>KlGX&ZVF06O2kr=xLbQ@XE$w3o~+rOep6}+ zx=~ns#?M-9KC0dSYRJU&Wh4>z&c)H0vT>owbqmKMv2 z0*yeD>!<2;84qSuvzIUoLSVl`02-162Hsx&`=5D*0O}5RqD}%C`^R}om6|c6L;2kVtK4jqQr5ed|7E%qCnE)~#otX_(ygb9e)*?28r1h*2$2+bdaj=p_7G-yE9w2hklH85~|x2c=> zml#L_8H8#0j1dmf?LunH4gm42KErbY4M~ZNfuoiCSf`O%qy+GMgv6Oq(tW~^0;JP? z_?)PZb{L!hvt@l(uAT+55W0_&YM<|x8hqNZZD(44n;sqOwKf|)JTVzbgo zK5_&Ya%t1(t{suDfH|aVOniw^k`J*ZTRNpxh( z|JxY^99+PEjG6){D#BQh$>`|$8^jy{Ai4XY*-EhS5#m2G>ts1!^Ar%&U&9vW=r*g| z9Pw-C1(?Ne111t`Z@<--&NbDQYDTh;RUxzC0?rB06=(w4&~Gw})(5Xc&}HLG3tmPh z5nS?6h>gPMJ#&U(Nv? z37u5Y=-fac!DQ!VIW{!FFp&w*c=Q!}5#~<;u^CB;6JX+(Ec^uq$coNg3-aY61h7#lwxRrZK z%)HtG#IP_u_ZrW`qsnl3(9^BHt8bws>xRgF{2n({)i%3$q!M%8q0LHbA~eAT$WGGb zwd)90PsQUWjB2i2JNMLkT=|0L4;5@jUC4m0N@5g zrXF0b8NSjMbUDE3UX>OibDbJHBYSy-ywdn$i!Rmd{r$+PT{@GGx`F&frM7O9l~{i1Td3C_C(Ct;y8^o}Zq&ScIyMens~3 zqpDC$&&FX4fZEAEB_QxZ`G)do&!7Wm+2rp;SyA`Y@r)5x+k#f#LvOmbln#DHwm2ZO zUkZJq<9*GpD?Ge*B@Dw^l}I(;RpjA$TD;5UTDkuB9@r&w>A|*?8!OsgKRjr@?R&_0 zG)L>O`C{+sGcLXBE4tJVX3UWfcCUC(^@RRXX0~Wb>q+YPS^Fje|Nf0(N`yIE?LWSI z@DSoRX?|ou;INhbrRi~6e=_l-u(^B{nsa&uPy|RgBtgBO@Q6_&4rPM*L*fydkE&QS zazlWXk37}|{dt!CdC=F7Jv7)?vWEb2kcs(Ekju0$^uQQL-#1Rq-a_qYKJBLo}@ z;Vj0IErP^V2GO$n)YN3X(9~Obr{hUB|))9b8`tvNQAuJ zIhK5dZ^_dGzWGFBCE#!09hoP*^C6U4$Sw6dTo+?g$g((_2@j?^Am?78Y~@x;WRhHp z;l?Xl6$Twxbe^-`i4Z#I>oPa?AGdqEVg&$Oue7sSvAn=`L^|u*8*<7)7xegl`mo;) zxsIKp=$x?h(6Ud9Z*H`7;ujZ?50b(hZXw2>Vf9;N4%U#;=l9duoiucu$ItdUlS}i< zKQ@zw{$yr%lX6{f7>4bP%4Gl58iu`MxF;|WvT`hLl>dzWOBDSXa+&)++D@eC(f!!W z!^yBnW3L7wN8R@7nT&VUoz9Jl$d&1}Zd2O0F>06(0Vne7HZUm?z2U#U;9x_(Ud~Ga zn{c2ZV_eTmubM4kwWk~g(dXq_md@O>?6mp}Py}RWHIYIu=N$~bFP`Pq#_7^hcY)>3DS*xB;kns&#>iRzX z?%Rdg(ITQ*wE4D+>^;WL>BH@$!G-qH!a54gt*jvXA~Pq)qR4SJjhAQ;fAf!V`)~N( z>&dD?We<0n?gY)wGGO#^(VNrZjja$=Ey92T*jhbYF~mHW|onWuoB{c(40 z7~IBv^qDPAR)0v0jV6k$hs$6~wYrjR`&wka=v7iI}o(0{i5vPQA6s zyK&{ypcXwY6jf+#USTBkbArxO_%6_nBtV)tCfQvG@1N;42p6>vC7;hWnaOC%T%R$O z&g>+(>yPC$B|Sd#41DB!ok0Zy_KVv~Y28cd!jEB>>BqI-{S2^V6MZ>KHow0I83rcy=)%c~L%4{z_&4GiKjC z<`X8tO}dmyCR_=|$qyCu>^Z8dhOBUJ_B$HgBrF|y(oqk%vkQR6kQ`G5dw^<MtdbB5ky_h3@5dLzoL-kpslvkVx!D|84yDWy9wiNXvFp=R-D!jXPj>RHlhHVJ>ODP8GvnRR z4+FKzJ8Sio`^bk&#MnH_f6nCpI-qb1$Or~MBPxPpk>fS^W36bgrj(?VHgWtcZOKJE z2|cRGLq((~9o92S4g3B1KS|$ucmF^~BB;3ef_h~x-12&Ndt0vtZR7TMmz|cqPdCcG z6`^M5s~%N;P*78FP)L61^od-+BVa(`ZV24|f|@K~E0PF2ybb3)=YD$$ZG{#an+qEM zjTX;K`K7w|^MxlHxKFzNrGB=^sm2^@%}^4zP!Y31I-jk@i{yPycH?2X9A zl<#^rRSDR4=Qj_ztyjBeo4+}BtKHp&%{Fj@qA~){T=jK%g!hTceho()*6sK3G;Oi@ zXa}yhz>i}lo+6g(Don7nu)xCSJB|}>Q#M1yslhGXfv@z= z(Q~!24OL+NWp@jqF0{0*K!hu?byK?@32b16cwcjSrybNEwx}wi-zk9^ z8HX!LWYCrezmt#W^s52HUz1aZ!^?JiH)h!*@1wxOeiKdb5a(qe!899kAQrUla1L}y zH>Bcy{cw1(r_b{kbekr?m$s7%;dKb1rv%$&-ks-6e82hwmhYt|D}caEdISyv7yFw^ zd{(UCDV5VZZo~0Syh$4NU<@bY&Xdv(xRm8pkJg30lK3Q*|EYmaaFg~pTh=jP4~(Q5 zAa6A)J|B=FXM&q*B@;#(E!R(%vg%KDL)S&P^TeQJEz*!_^_j!jB?&@`!zh(ETF$8@ zKh2G0!Esz^CzvU%5B2TE^AKYRO4$A7+W6VdxPs|lI%p!LMNm%%fzXaXf>feW{%K`i zNNtx<`0M)+CY>&D3&-%+pA#;FbryD<(i7@;oX|3Ts+6g>*9P8k<=fDPfRoe4FACOt|t$@R5)U| zu2V6kHiy0b5;ofl?N@2$xLg$0U1!tWX*L|TxH}VPzhY^(vv2sgTT{oeFX?yidz)1Ct0mINie~a|@SY;S}t6+0O zk)Y>|!Nc>rp&2ux1Q!8)is4`mx4RGGkfloh!7kb^R0~K|u-(XT-q)0IXiO&e%n%qq`3RN>1%QRJS#2U>5JQ#xG8vU!NDBzdqK>@gl zQ9lz6s->YyW*dxQJOJfc#lVb#^RR4>sVBud$tFgzkx7OGNcGn-haib3zPg0?3zwVe zd38bhRd>&#c%cEtIt&JtJt;7YfytjhTaGv=J^Z-b#zNgC5zxy@DVqQqdz0;wt^mGk z8hkal-iz1BY@pncMsqNmijn+1YCTt`}4PD*m)MSan6Z(8ny8>gSLdft0@5(>K%$-f^*l+- z^e)hr@v^)=#`lT>44oqi!=`5iqdWVDaG7M@-RkB))UNPF>Q?Qa3E52ldHerYwphLd z-uC}4E(5k9?d?p3qc^g5Yu+FzdRvf`VdrO#=aUN>f|{k;@hvACtp}DTS*IM{8;67e z2J7kB9ed*{qV)>trv@%YZVQL^Hbp-+tlu@GNsJ8gFGktapXV)N3nht%U60ceDUphL z1!@%Gd2_UY=j$v?t~*E`Z1oL(I_uqj_nJa|U-Y!~yr?#Rr&noscJwTe>Q!C|xoy=UT5sbFtcA&KOudHS@l{zrN#mVus+OA@JLf znOv|ez51+pi^5wrXJvac;VvpW2^y zBJuXyggM%JeT^JfDZP9D$<|UuQ}6s+fm-vO*QWkKP{+&G%Z>A$)AWZQ6a6|&?yE_V zCWpEbUSw(1>wCS2cj}`CmwVjOv6YnR@YS%hMiSjb(LW~Fuc2Q-utPZZ%+2krOR@m#_xnB+s$(q~ zCYZAz2F$JvMJGB7>h)5e2!DzOzU-@rr{Ch?%Gkadxm~=6;!5^pkE@q5HaLI1U%-)i zLsn&In_!b1Tk=e(lT^p*dNy-wHW)51d^K*O5~pD&0rMj7VT9fb>XXV-6f zb;nW!m!M`6fm4!|X*V8qU~^k&UtNTwt>|V=3(~+Yc&qs1y~IpF=`SqMd1?566zX*0sHdW;!a)1Ooxf zC{DSQ2#Azn2JkyA*O6m|`DrzsMD-88H{2yAlDaOl_mBmfL4xIzXzNn(-%vJmrb8X?#v^b013bkWx)x9;{N{TSswT*IOrDJ z_lZyuhU6kc#2;Fl5IAQ46IeZgzW|;4pAf(BpwuoO9(lYJUtukOo{$WT@4z}SQ5ZmQ zu#gkhWq+bwVMenmW}GJC>Bfi=NkI}XE}@kT?J^5fA<)@ue6nYr65JHp(m`#oiY4mC zySE`pX_Tkcfez!6()#eC_}HwJQKxVGJHM?ma5K0k^V{k(GMS(V+&CS-IRoge!_&sS zUR{hB2mGj1S(Icy%?w~%1ZQFOLmj~R?3Z)3qpHiJ;Irz3A@HUpJ2t%N%#8e~E{fF% zNTD(pzE(;iUEK*cK3oWli?H|~eXVDL_;#cwk}5+yI|Y#22{n~?W1Nx##cEs+p9#;! zlrV2nU-1CYau*fSf<1CLfgjuKWDapqEYzM(!Kkv8P6Ew|D!~j=0H7bA2g&# zotlV{C5q=Qgkim`a&d0WwQlvP`bj|Q1IGEY^ibIdtZ`m!aV5fRZ@oRY9b*zbpJ`!q zU7Uk`gkfod7oY3CBlpz@T4!Ub@>c6Mc}A+wy%Zq?Zlp zms>S3Zm{4Fk_5i?xBMwUsJwjBwC2GG{Bd|-Wa~j4RrlR*loWpJ?n^g|juUTXK1duk z05kSZQ+f$$!M0VB&H-)U89PUhxzvy{^;X#RcJ@zLOB6GK)da1ms)mak{uGB+xsxQ@Z)qhQP2&jk3F{AdBbyP$Dm~rx zwMnX}i-gAU*IRd!-y#D9ZY2aNQaXDya3l{PMyC&)7=t38u)e1aE1zV?$4%ky(cRWq-&lOY7DLJo{#j zr<&Vf0k~5so@K+~Ogj!-8YKQ(+g8h%~^_b)MddY~<%;WG$eAdwLY&tK577YPgveFK= z%I@$=QOJE?${3d4-zO}5*nu8h4E_dkgdn&L>NO2gnzagSwNG_9o>eoa##v*Yz3{iq z$eX1o)n zdd|-T(|@Yjs`$`WC=c7lC5^Pv#lneR<~ase6_PoHbu(-}z664O}u%MID4G=_YBoRiw_FCsy<#gT-mZ5KP8#oNXdd6YNwFv~3U1|P~- z2hoZ5ZU+-Arl^@Mdl`u?o0h1K+m!ln*N z`0ay^&SPS1`IHTK_&MqZn(2&6Asi8ud|csF{2~tr)zwwG=M?&~zt@~n7?Ez+x&B)z z1%4@x1Q1Iser2bU5hFngoIYQTSA&|(5~ zZz3MPswp#~Cah7dr2yY$)b@@;T9lJeHNhIS06TbKc>&c6DaJHUe*9 zWdXP$EY^C$pN%oNtHxgz6c`GBnkdkRKMB>H2}Ey?Y5s~!@$EDRAZVE~T1m`z5sxuB zgH09B=-i0c#OI|V@xytCe9Q;ZW(`o^YQ%3?LHq>>QMY?~diH+fcjqvr1M6#YEW~}% z1f4$h=(!T8+0Ce3jsC3N>>_|BUam)!50UCfU1d$BIHf9hb(y>nIzi&*Eu|5Z5~=K! zskdXJ{kr7+aJQ2gNc{_e?VNH2+ysLR4nsq{tJFS0KVTK~5f>7}X(?)3Mni&ES)GCP z_HS^8dSRjZs0JQ|;DMyNkGk2wnzQwrXm`IQj02Mc0jtr9zzZXlRXdtpL0=5P%`>n7 z?EvpP72Te7kSj2k;7!;nImh~4RVlCNqZawgs_!|eC|801Pa*sTu z4qOF!)sN}NS-;GRTnbJ!>`uI#eq)_TZO>%-F(?hfNpLGY0VpXreaX}PU-iAF`y=)F zy14|ChZ+GEu`bcoPJX(wy7Xq#6MN}6Zie?o^y3AKIQ`YKf%yg`LVPE1eSgtwI9S#zN4(jM(M{Ouq_u?vR9!zO)u@0kH%SCmW(qj-WjSvI|X5mUVIl^C?WV=z4P}#~ zWPv_W+9hz$PqWBG`#~`ysbLLLaUWFnq!_!YGp7>aA>GOLMxwVf-_saru2*%vmSv>1 zgN9XL=jwim@PULq zNo4cV2REFzdn7SUxSD2nJ288!;Wx1{XO1Cw$#Tl3?CTFtT6_m3S zua9C?s!J{-`PZ!p#o~VIS zD9ej$a^hT~!=E!`?S6l|llyD*hSV1&`0+`LM%5iS?d?bAxYs8`=CScq{`<9Q%aF;S ze89~Ilqu@7+I*zPu80Aoz{hHOGKrjDBz*y@*s{T9D+g3dj*RvaJx`%(rZV7gbs3`I zDp_I;JZU-xR3(kBXCfVxqMKdWfw!1%RRdHB*dN8qA9ahLJ8_e`G-eJDB?lhk3OtePj1BZzRF6z;re zZol;MjH&wfCW^x4?K18X$bg2LWmdW?j!$sia&w^Y*db)}X2m$~I%Mry^ID(XSu`!S zl+@k$)hvE)6tujDyOYe)4Qy%XCn_QO&MO7GU(eX>*XCk6h)?1+%PUmN|7K+xntIfW z4+Hw1S}ZarAHuKZ>k(adcUr+CI0SXh(}`^p0fyyO|DAyS-!3Ey2B5QYlVY~(=Kf7K zDy%GGcXF=E`}3{G*I^~4kYF1F75(PWTU2aX6zY6y4()~AosFw{>LIpx079-D>$}p~ z9bg-_$)D`cUzSrlKIHy)a3?JX&S&dNTr6fg%0-R+DiG=^6;WCI3T8$}W0J|Iws>gY zx9nTnD#%c~nlieCyux0o%*}lkYproi8A&+LC zVvCa^S9`1{q^@C9K$}(HYfd^9mb3ct3at23ZE>WfR+OqO)vb4uyWN@x8FMl+l7>#p z3-ex?+nz#|HL&j~SS@T#kGUO-^9bD8uD|6pH9o8PzWD(`x8&07qi0}TBq(vIaD7ex_ zsxxC57HOX=V=z&{pE&M7y!ss7ZN{I=A=*56a(+H#c&MKDYX-J>qUB(7C%K4WB>v5| z8#-_8rkST3;$H%!0kycFZACZ=Ue&uKOvcYSEsihsF~hGfP*0OGc(aOHu~5KT(K-mx4;_A z>LIdDUS@tUi8dt#51iD!NLdF3tRO7u+ zk(taR|0aRZAyhl!Rqie$$MvtcT%`WZEYMhrsaR^*?zCK@KgvZ{1{4A(BU#(I&XS~k zs1#)|0M~ah5o;~ui<1xxon882!j9|t48u8aB@SI$PE#ofPuM4Y>TBycq@sG#FQdGm zsAYPMeCffS{lTXd{0@I?TJJ8ZfWn!QaOreNh(jrv=lSp#p(`-!hJwfwM`@4pz$4Eo zJ(rE2E^}y5I4O}|V_Ch&0>%Fg%l~~=Gv)~%bEzcA0B=#LjcoknxrL6f!6(bbIKf({ zR`HVgGN5sXeBzo;H#yTp4lD z7(JunyId~Q_d44{kxO9dC}O~oBdz4QA&hhwBOP~swk`sY_n*8cMds;x|9=R3>!>Q* z?F&={38_st>r$ z0a1i@8roA2lFc6mHV{GQ5aF{ri=y+Z8J42@$r5cdqd5vt-IBy?Yap0PsDzaNqiXW; z@%bQJOS0>4Sn^6qslm0Vpyo6F?C7R^E8+^F)2C~n`xqiN+&avic0X{hQm{E^r$^!m zTR{x@(O{;B&%U2>Nl7sw{rB@>7)AZSyvSPX=G?(nFR7-4kqjSxAdlzUzy6UIhlaHW zw7GmFevXKI`yw}EPQw@l|FK=*#CmvsR^vB@9UxmJ0^Vndy`dly2#dJ2q~tKZYfS1X zbt?{;Y}BSCopg6HB^7D81+`*d5M>2zhx!D>Y&j?b+4W_3LML1H_xoFE&c6S-#e9Qn zVv8Ks1Xi2@6B<2=z&gY7^Al)uGFf4G6Nh;xf3y zP%+YTf$vB=4-L@@RyY+Bq|X){z_L0HTzMM>HR?41PcgTu%gf6hFw4g0;oorWU*wn$ zDsmaY%Bt8*x=ejJ`Q(WGic2tsGG|-lHZo;49VunYlS$mI^5OmUgUq-*UQ$f4H0~>T zJAoAESd%I5MvBC!gJhMe&1@gC?DgvR5g1Hx+sr2)m>aVY`ih{rV;p#9V=JQ_3D`zkcODmQI(IV^y!eyTW;F_ei8naMu}el*D4vF?>Cmu zvaJI%I?E(#GUxFlC_kheE2IsvaSjjeC?7hee?N;M`gxy)LjrN|?AO=e=Z(M=0ce(~ ztF7l2<#$o8n(RIPyX*0hc(vos6F;5$1Z%Z78p!az@@ z>iyxU<8Q? zW-Oi&VCts%r}>4Frce~;XI1xhRQq|VHZCRuHk&I zNZ+vS`(r{Q`jemo)I)(EmVTOrtedkKQS<2ssks=~zUj2s=DVaaECf|j`aZSiNDL)( z@ps7h+o>K#iy+D2JQ)f7lF#N2YkBsO-jez)Hz((+gv;oEIc40%(B_d?H+uRmv0b$WrX7v353l5Xp>R1QX*jLE%98*PY5m zTAomtA<=?9`USOp*Drz?q%GyrNhom1MuZT@c6L!7k-}?XXfIh0Kajc;sPpvI0o8YfCojQf}_2}5}nzMoXWKl+i3V#FNcTAt(UsFOYDF_+D zenTmxE#8~pfL z76M4@XZ=*IHYbYzVl9CcHr$u)MovFW7K?9A$>B7;w*kKKY2 z=oJ&9g7Z71@nwYg_gFHYTax01I-Wg1hr@S6DKN<<%oG$^$N1~epu3WNflQ1d9w5D} zeQoC_rnc0SwD*jp^>=OI;o*tu=ygv0pPtu=B@v`{Ovm9*_LP@K{U$j%Cfrxh`6e>+ zy6$J94#IOR1FfQyz9(!b9I_2?bV4T7<-IvRXJgS>VW5W{15mt#a!^wT$aNZkl=6!6UjA7M_4l!5R;mpeypO^7nml zM&k4925-H6nS=3T$ zURg0CLl$>8)x{S#44mlgA{&ntGLC3fRWPSKSm~WUt#E~YQ2_&)Y;+W*RFo;Ue`+9w zqzck8Ev+2C;0xOJ3GxslCuMXr!T zWIA5zsxtX*s0q-k_48E`)%8XY7gvwowECr3>sR%|p?mmamOq_je^i!@(E~)QnI`Hc6LG6g^MR28IGr!p>Kc+FciFdybdKYg;uxtrb)N*L-2b z?8!_D_m~4!8H%`ErO%&w%c(ogzDFiZg^KnGqq^Bc@>H{=;|_=Uw7XHrw{qwSAF9SS z4v+1)l?Af6Ew%8Qiw^O{(g^uRxGci3L~d}-x>Zc90wBV>3KBt7w-=dXbpaHXVbe0M z3VGzKg|*w$X}_lYJPu=6fIr0cwjv(Gi{C*b(YowCxK|6D)48YbFzT+{S3Ur>la8fV z`g&|dhn92!Ku7uLge#Yw10C&g0>f~Oqp{ma-PXePQ{DH&uMhHf)86}{>hf5Hp|yT6 z`oH6;-|sLo{ym|87S&|!F*Q{iW^wuv-Locts-YOS)S)=b!NGgJodkl<-cZliNQV+2 z+Pk;c_;hP$=Z*FC5Pe5oDk+=S%azeC-|{;D9&=)!IqDZ`9D?4eby>yFT*P&pEw5j) zY}b86Yh~d&o*x3f_afKMq#&#y-}}Zug}$e%`L^%dXr{Y22bd8F%9oEjJ72+9{grED z%V}O`P?jQnar7=a8BASzoS0p4#*@T2wL!v?_ z{2@YoL>r(a%_1#TYNX2MoBK+hkXbijqD1d=@Q|*Ky`dhv*Ug14#pmmo+CD$WP9yAS zUtp_A{Q6K@+QG)jkcdc{#t|>iHMGRIWRsW7Y4aydkibDL$9|nzj_w8MOO4MY2fRr$ zF3Zp~KdD>~)HZ(uW}IOOQ7dsj7n}=9COI95uHJ?l z3#a_7U+DO%67)6Z?^g(K2hJDbmn3(;1o_h{xE{+mS?_ouy4I2F{Jq+1%z4&RsWX;# z^Hz1+%2H|JZn5xX|2k%VuC=KLUY(ODb@_8xsJig}w$+e-Crtz|wU~W&!E_W?jdrolg|sQloeU9SSf)n0leM(61To*dD-{%aA05{Ihjs09gtsfn1Y9r z&OScpx1yaI85Se&q{J=k3W>yQ<8Qm)0hX5Pww({`Oqtb1Ahc(teGv^z6bU;ldRp=5 z6rXC)5-N96x=ZpAxG!UnaWnDY#BT+#*-hze1ko~j%N8f2xWfvzA;b2=Fpd7 z*)0@|Ev{J1Mu+8E^17Pg{v?w8w3<3uxgY8_p9nOo%^*@4KhyMRjqsYTFta$@ovwQa zcil*dcTYiZKOUQQ{1qEH(Hln$ihB~L;h0}ioto}i#VbubC$)|{BV6c2C;|_giIrq}3 zdjk3d=-~H9c_B2mn>54YBb2Mtr&}Q`zv$lURzID)2**Y;vVB`XCTuOLd73?AX9Dr z%RbHAa+B^)J=qmOx{r?tT3=DWd;*2VPX*ULqt2a>?8zoAx7v!PF0Ud^dt9+IQLF$A zO}7q;i`mGg4&y=o^P#vr8`ms~)Ik zaY8L0rs=HA=>GHyA3o)SJ^le=w=>We^Sqe>C z&RX%xwX>N87Mxw4ugpS+_M}qjMamYhwu(B^xAbWzw#N&{fNh?~o}J`A7`hs2L^OE5 z9N=4{zmJmtLdh|R0g_WVFuz#;$fpV<04y$~8j=0^yqHs`NbShXWVF_!XHo@O^0tCQ zLpJTZZ>RGrfA;=8f0OqCt=mLy!ORlK{w36Q-q%dcseAlC;Vtt2?@Z^Y@d7dZxbX~xIJ zUhLAnD+qdu261T*biq^$g9sBoI;L^!i2-p!UcAI1_{b{xVj}dRk3&}&Nw5&4YZ<+~ zf~6c^ z40`Xu9A0WP4uh(1?F^zjy4w421X(ScB4aYX1rN z5VDkbF?jfF{<8sqzedWZRVCY*rJ-|GEo$aWA~Mm{%L`xSsTr& zG%PHNCZAIwKn=BdQ}Q_5Jfi0nzX$l`GQE9SPwEt&H?RUT(H{dsD1sVGt@ZAX+Ah{H znz`IukuhZXZAxlFl;cqzJ)N6Bz)0XtMiJkgskVf!Edo2g^E3)ytyPNhk82-3q11>0 z<|FZR%E<|ZgYQJv=-a;ZKmxp0~(#6}CDpuXQtmtR%hdS&^sEjp6MAY7sCY$I0*wKkp$jw2R zF~P9kYq7O%DwNVF1?nUj7DVO7yPx&Q6?@`>>>=c0_(G>G_8ngozwLP`D5 zJESmCZAjTKoD1Qc@jr2=M}kgmZ@Yw=IeIQvcGs*cGkJ!P1eTGXys&Eqa3BK{3Yea4SJG5 zU>sN^rzEZX#0Z;t5m!OcJ-{V4<4=p>gWKwKak#Wq1jr?0=#;jlzA$}c2&R9?M60kD z8c_~Qo*^ScvrB*y%GRjU(xsv%cf4r-#3kwe77HP+uM$v9==zd303467V1|jl2;vzO zUse};Y1I5p3)rF2!0pUu-d#)x0fh+{YVlOa4<@F=XsVhH!i+d#7(6VrCag8`L~e)@ z<}rfKvJ4K%Ysi^QPQX3Rz(I*-Ie+A4WkK4HrE_UdS(cmsn(G|*stL((ua&RGb0C4u z0zeR__kpQ6AB({=qGFIeEuFUd;ax7+632>CCayL)X;-Vgqj#Xwz%9X~=wg zkFt`oQxuE@-Aqba%Fp~aE`RMoKQ;2-34QT}=!d?R@oLpDHeSY*z2~aBHc)LUh2Qnr zPXFp^SW5)z;dfnFl|)cg-b#cJH-*>5D%ih7C6tf_%XqEJr3PeLw_OjOeuW8RNK7_Q z2PHT+kbaxoe0_u&{r)FENAPj&N`wN7_hmoFWi&$($W3bXIW9MWHa{#SOe(}w4|)@P za^GVEb3=37r%oVUpw)uc%3!HDI@+lqMZe7C1wDrt76iNWx!I}6CU3%8o2cL660tMi zPQCS|X<*Tut1Jp!-eZTyW@U@lj7~mhlVWctcTCO?2D`LOe zgDa+W{Sn*Ms5^Bvuq-cWeY-rSv8QS(bMkVSW@<~9AH|7NQt0{}MdR|)kk+K+*%UY= zAp9n{^A_Ie9_YB{mo`zR<_A;oV^VzbW*-lAYw`IpON{`YvEP$yo1Q5{43dQF&|hGo zp+pY)n9((FV7JZsPG$mpRoQyA-(_nw=YFohHwfVI_8w={bV5_ZoZT{RR~u;tDQ||F zIPemyZ5$g$iwX*unO~ecM$I?=y`KIHk^u%^4}mJZ%YV_wwmLZr_)rk-!$y)uch8Rb zeZ)x6fREJUH^2KN86&EHz&gZMsOnO7{lEo&Ifdwhp_u7b0&vs~zv+0+BZRlqlnnlu zsCW`i{wkiiezVPC=@C)3DotSCC-9Y6$@S2Ir%17WBR|Yb>~T0lTbx)}VNZ6?UYs5Z z=L1Bx%UUcwySW)Z{N%z%C@Rr?H30KaMJ-SHd9=LL5B$rREcRS6m_Np|6gT+0DY1kA ze8sF~Mp{K!Q8zhM(V9kD^wX}=Y=Bmik;=mu&f$%%~C}*|REW=oK_9cT;j&fivFO7-w`#yFY8P z0$Yw&iI}QQ9Qt{n^AR0$_^bGH!8_D2tP(?Y3zExZBGVdn5^aTd8&r&Y!bwm+k&ye z`S?aw^HRxM60ZG`36qJ8IWb3Bh|_~y6d3nAy`)CGbm>d7SYLLUu`bV)2S%arm7x&i z2cTR5HAg&Z>y&|g?-LU55(IN5F3s79AWK)| zr%w|O=W|eYIo*pxDif|#8tmzi75;DkqeovHmim3qdu-~PqyZTGJ;Pi?M11Ey=}Wc4 z&@LAdw+`Gjq?DYcs!})3)TMS{6Vv1R>qH}40P*2Joj$QopifsL^?zEc`pXD1X^plFsc~gn^qAejyK7~vc z`D8ggJ{|31J32i=*wHhn=;Y6YwuN(CKq{#O-^x z$EQv?IYp~4ns-LV?F`9$PvNeICUW%}<2ox+m_NnixR(#>+Lxye%H7prLtaaKDaLr?f;#%iAWwv(}c zTZWcg&`S3IW56#uO#>Rp<#a%OgzYDyiDN?+$)k!*GNZQ|u_`e0S>sH$SU%TmGAwEIX5|CqrSlb;mHP$MDkc@saj5O-gGsR&z$J4bxD z+t#u8jiL9k4=M(PSe#SCA5wM?zU+X1h|8{pNda3w8PEDO_$my_mr6MRp~AMdM2L{d zS?(m4aft^fbIAtS{m{?T(Oc9y@K8a`(bR392&b(ag~ek^vo?z8dQ2-%uFTI=Ipk9gDJeAgp$mqMk@CdJK@AQnXe_FQxREB@OLAr>Pl7t#iYV zL!KWNu{X_94aML05g61+$55-((i3g;V$VBaXqtZup4h;u~^|Q;={QM_w9&2R*`xIL~XPC#SaNNtGak|%H5==M8#k7 z>`o?9G)lcwv!|f|9umO?i{37f3R#hyhD{h&#fo1!&kJg zzH5ptH^~1n4q!;*!Vu8~?_lU!#E_~eGu~3%jw7o(6UsWKXC`ojJhTDYIk8-W$~Q`M zXeZw~lDU##-;bY9rdIlr@!HMRo&j_gjTB2=_cqDm^5xbJ9 z$ham*#%LVa5uQM>0;LNd@eCr>GmG)I;Y^SPp!XQWibkO zvCO)<)T=y@2f(WFmp2raJ>LC+7YABsc-VjM*Z&H>k5AC=!L7^W6HcEZ^FwN3LCs;RlIjZS|Oq4y&p-CqN>ip;Pv z)`y-r>AqHgB2As*5zE5Ke&JXEk!}=eW(ECfXY;So`1?Ur;{5SCPmYL7_W{bYGCDjd z4hxpMk!`QG7V?i29V)V|prfSEI-^~uJ7XL}!k@jI=2$Y_wok;e$x<2o1c@D*n=D+s zUf{s)5JIj$&`V$5NfJg_&WsaC&R*19bL)5FbGJ4Qn99kf1D-N<3{2yGiA0ZH=Io>I zW1@aST~#8bRRaV~Vfo@rzPUXK)CbFcENq8tA)W8QwXl5Df4K|OWBWl&r z=;$x{>K7jA^{K0pe8$!I^|B6V^0!D8S(%zk(6ng!R@cZ8p#(E-t*ko)X^54!(}#1tmqz7TRcsIJCCt$#lDS+Ib2 zzaQJjI@4BN4-@wZf&VxDpP5#kmXtBiR~2Qeqhx`d&z}q7;tW!fo2E>eIXwxZ8AO*v z74%I}TvE7in2cG3sZ-hMnq-SWsst$R8lM9cLoiK^CaS4~ZsEI~O#{;^PSoL}g+>PW zaCe#+znh-Kt^-tmBhUNEs7UJYXDFOf*${SLfO6Y7iBnu-E$;gf9`g>0KcK-QfrIXN=mN#&-Ri+afFV7 zqU4{v#!=Rer`Q20f3=$aXd)�r)mjO3~x8nrcSB0R$|R%Z$E4P}J-W9I;q7ZaAma zlnyr>^;|?a35q3oBDi%K?X7uLBo~BcLdtfvorA~si6a9K=hby|o^b|VF98KKMZ;-R zYJSaQ(_&Ym)7YaYAT{@-oaMr>KY^I~ihZ3FMYX3Ruo zl{n@fR6Peh?>2! zn+F~c6aMq{`;)W{Tkfanz`gAC3~1Yy(U(rON2YNMTx{OUr>M%d|F&3jVY_eCj3Sb* z)K#bTF!F$LX=;qkT%(SS^uhjn=Zcx1DMQ;Y3nIu1-UXUj-gz#dy?yof`W}I1m<4t7 zezf}QVV)l#R25GT!k8T&NgnhX*WQw#x)CL%LK^G?cYLq2RK5zb@nZZn|8pUA-nO?h zpFuXR^&>j9zRDw&cSzlW3KMF4HJ_8GMwA_gYZPxSvD-G2xC+O3nl4rHn@YG-?guYV zhJ@k)ucONcd zD(EHT1EE~j1qd<4&E8aG*|>DJ$DqRqM;@5OLYJfClVnSa#)cK(0@?bKnr_o3g;U$* z-GQl44m#((V0|_M8Gg5D{Z_Ypjud`@QfOg!9@93vV7No6fPYhAA|V> zXD&UJ!*(2EogY;ul=T(UY0;9zP_^g3EZEaHCnw+c<# zuo-9gel~MXCXd4U@Rz@6FvcvtWZl?kxA5_j!&J6h;%+UxE@umxb|+rHUBEbY@0b9> zEM$jRqp;h84kdfCf@&vm{D;qKz(L|zyKhwhz(dyi?$jptl#DxDGucW0b-k4<-EOhY zx%lhzwkySw!lHfGa9kNibt>+ylC+Xj1$<=V!l+0z$(qTL{En20X>X>CpG65&6 zlF@!4KUhL5pg*vEWCRJ+4s)&XUG0IAR4*dA0zkOJ%7 z^?@lWkatmMhdcDMjq4YXwnv|vTnzhDi}G<| z$_HB#wBRs%!?1{pP5)r4V82vKBbzgm6gm<3jThho*F~(;)e)znrl$7Hz+sn#-FBYV zi&dtyOE@ez>%^oz!Lpl_JEUEgc-gN=w~qa}RvB1KvCHP3CJHLzF!4*}YclMf#&PHX zy1YE!7dM8%OpT5Ftdlba#yy7rEAp)A_e^-YupAftDJA4}5_25|lkMGYud(qDO)bg* zSH{OLP*jQ;#v$1J1o}055$Nd}2fuFWtNjFx2d|veTnKPyXI{I0y=yw)%69q>Q1q`9 zgZ;12gIGV4_#lS1)clyAAzx6mmd{J(q`B|VasdslX~kTZ(&y2Q=EF`00IbY~8k}t7 z^5L@EYDTTH_f`f)p*&M5uH;|{mgZWFhyeRLA#1Z<0Sq>2vqhU6sZhc^fZLdRD)yiR zLwun>LWqt+*v!6u>(ftPtI6nh+cK?#=-n=@TX?wF70~Gqbrr_xie0qK zWBnrcB+K3hBY*8KECZt`NH%4YG7|U!Yb?@w6E??9(II$Zk)kt_;P)>(UzsXTK!Phu zwyVNMBt!!?dz0xNKI2b9Ew~7ME6=k0%$*H*Rrj6%<`P~gfMz`GX@tXQ=*{b}Uea>2 zcD^YUsN0^@{HlL?o>8PoChw-B{w{>Ql1`xWLrds$x8Q54gcLOL~ z$EtX^Ljv^KLisB5CU_A|94>rlgi)arJz{n10T({0zaU^hU&yL?YG&gASSQGBBtw;h z{h5UlDwGj5^*Ht$PFWhF0KTR5>u*j<5#NPNurMC~kIP{3#qSQ@sZGn#Q81{ah3}VLEI+Q(>|QAA+kW~M zcW6TAGnv|68QZjL*`24i9n$-Mj)7a6sQ;7&jJ{_a4}ZZ#fg=YD=szkacJjFoAEt*VkOcBOcTPHp5Kb;b{QD1%-8 zH7AGb*H7oB7)saGksmJ%(q8)|J1?V+jmZIK@-s8rij-cfBQj%t*h09aECmYVN8i^z z6_cqsu0?S=!ON=h?JQ>+s=44azA*Ebwj%ns@wE8ai#0BrKSt)0gOj89Ai>li)K4c67Zemz3{(#aE8vi)k0zY}`-eo+5PTC8@in;6?o%7f}d$_R6a-jgM_ho1QQn z;OsxQ9I!}>&sg%vg%-sR&x(0Aeh8&aTv4@OtJXjZ` ziSpa*??12Z8L5wUrT*ux$j?!~YPfCWChx8^-6`6!{+-oZd?fDCo;Ph;0KQl7xQ%jj zrfMY42YYsJy+_*c1+{3-qAotY@&aXA;=OxGMk!L)nashH@p%l}(tx~ir=?^tEB@n^V197ClMfBE^De#%2G!rVc0Cj*r`&-Rf z4gA;Ll~Mh9)Ad-kTu=@mH_ztLcd{U5^N=iny=zV}n#9Q#EWo+Dx?0G^B;&~`*q%|> zOc{-fPl*}iy_|v%-vn$0P0pus^Hg`We~cC*#d=w0)&jogk{}(N@)I*CIgOx(}1T9o<&gE#odbmmAD;R{5@fcBkFRI*cl z!$gtXZ8?S>FtkB!DjLad*T*F5N=7j#xMbfTN4PK}bZjl~FFKF@JEZM*JEMpc~mb z*7bbV&JNzLD>;Lm*S~8+eGH%anXCKCz07oi@=p2Y*H;6LB=^7YB7iL5@!JRINm%Dt zulgiNgL5VEg?s<{#kod*YxC2=`=cR&@)K4`H8{5u6v5qnSo5gB@R9kj?OIFV`%fPbiyE6~<;RE|Ce zV2zlvzXfEwx+ejN18A*yOFjhWy?`$z2b-3pYVnu}P39N)<@{<>*e`H{IMW-do|IW2 zQ9(SGBdKb_q}X%JZF(9}cQ&gcM0S!)+O;HKcB*;6J8*k7Tw?$;%8snymV%R!6_jZW zclI~UnWe@BZqNA^3+dcF%FJw*M@&Mg+}1vn5KGiCQbH%HS)l%uxNIB4q;6;CN=_FQ zJT=r{6A6#?8Gwm&VHAnEVNhWz_Zq;fD2Yx?ivn4E7Y>)&p=%cV^j)ig!+4bdIr6}d z1xo5AyR!ZrP#aUq0(A8L@)1mu*ErbfD+elOJS1c(zZsmiWuS}E_&a6_&jGfaJ*ry3 za5#?TZeeuRZx2-+Blr$$^K!wt>c&Ri{N_9t_!~a)SUy+vSMa%u%r71OhLEMdxWA&8 z;Cq8HMo4Y_8?A$%X8VlvzUt>wG~EnV>=iaJJ@~v~m1b z1_x^{Zdh(3PCGatwf8+?NWU>iM5MBIQXe82LmhR#x60My5@YW3zk;^z zO_wp-NtZRmH7pd=XKn_aZ)qBLww-fFvSi|Wd_e2$+AMen;EP9ztg@+5Zc-Qh{tcAu z#^&k)fX~MH+<7*@?_Jj-GVo9(C6xT0A=cGS$Zn^@d1*6O!8PZ0y=QS5La?=6IH*FD z40Rh)t;yMVRix~Fx0jX|H&hylQmKl-tv|Q^IEBS42Rm1aXRCqDvBp{R8wKE@!oL=) z#=iB^RjfxFFs6H5XK$8Y!DI>IfGkpyXcXo%V92Qf2B98zNZRThgVc%Y*~%WUjF4t^ zENOaiCSjOt<|Y>rUu@m-Q|2@3g5`Ef>RSdvA--iqHeS3)Y6HOAq${E83NYG+D(+=) z7zeS^18!bH4Iq&+F?tA(!_-GA%oLC7eIFM78et7D9;qDLf*WfrcIIp&W3Xd~V|`*;QP3Z+xqt+*dq&N0q(1@;9P zWGY!QE7`hJEk&(IO(d*UgiqDo9OfH5m>rBjrWF?g+H)YD%`@m4*Pr1=LkBA751#D1 zn_FzRy}pI6uX;zHE{e>r@95y{#2qne%EDZ~E;ob?#0^I(hq`{U39J52TrEB_Er3B= zWV!QS_h?!3ZxOgGY6+_uk1;L6 zeHl`Ix;0fbP(?ZfO0$I~1*#?@&DL|x5B4mSr@8nELqQ%gpY{8wqx~?3Mj`_TqXKMTVOK-V)0l%V zS7efSuS+n-gZFvyNJ~yDU9X8)>&~KG|TVItgjd~KyW2L1IF8QMPVyhm8 z=S0x~k*Ndn&e$#~Au2Qp!ui2q8T_H^lIDq*kj)8qgLs`+$@Nw*jsq9TCDC|fq zRc>%(a{%6QAf~wR)FR4K9G}UWgjF$z^dUW^&{=2d0^@oG4HoyiO_Hoh;jkl#VMt$O z`7-oX*QywXe`?)pI}xk3ez#~=jEE#ZvTF`a4r}PFe&T4r+}Q#IU7*MdoUKfd@uMS zN51S|i45mU$X)8yZRD-M-N9%3C7b1g#=XZ%b3~v`1yrx4fYYsC-Z!bfQU^-_(ig{E z2e0^e9GZ5Gj#sSk*2NI%C8@w0tE*v|(%u(U)irtESL>5wxm`-%0suC~T-%D&Ux1I< zZ*6Rt|Kj$=Ll=N@#cVY^Q(wna5)=a*)5T%w^xQWt9&|)QLqA<68mq6%uEkJGBASf| zxK!5!g<~w! zBW`QTWgPx_-a*^#GMN5N1znssWFrKe5bS#N1N=0Wgm!%{yOEOG78D3Uau(6uq(i@3 z_4ZBzMBWiFS8@K!*cdfWknfIPtR{i!w~)6zHDEkvRWo976L?f8B$}IxC+~VWZt~vi z!)-;%JdZ~YUhre~M-t(49TL-#Pw+~zeK`fqA|(kwR5h}NbZ$PDcI{Up^}bO*2usJa zmexs6z3K#-Rs9oeLfmGfUc@8`O0cIv_+oj4BsL)0VG!r2|V5qq>eN88{Tn7d&zXk zE)z*5WCuEva6mr(#9{(JHYyP!W@Nq&RaaoL;w2K|qv^nBBh;CI0TcoVA#|cDZ`l&l zdr!in>$ORkvT#VJa1rEQqV=HZ-FSgj5T$_sO^YMIM6=kPD!)9yYlCyV;65U^bM@CRAZHeaz{8mQwew(w?-fa6OB`)vTfG`C7k2`mOK={vVBP znGCAOq92uV<(m}EieKX_-qB5Wjdx7ng{C4}=5c)&PD{>nLs?uspSAof^{~9P8UNUZ z2&kWN64+K&;dLw?<^FAx&1d5PM}02mau6BFQ5Xa}e@%q`IE=C4`tEFZuYVn>=9bd< z&YWuT`uFcq2z^2_RNrQMsoxeA)meC+znP2dIk#A;zxraY&qm9DWfRhDf{zw#D0HL? zaY+ML|AW-e`q16y!#x|Yfhc_%J}mRV*GlJP9jq!H_p-<0==)+p(v=r|LxqY$1M|~P zMi>}L;z1y)^OZ$jZefXw>}gizk|u{48t zFZXt(+MIX_cSU}i3BLTK?e;j0`~u^*q_0Uj8nssVBzbUnVIOwmf;+`PSoSOT9B3uL z(&$|w!>G){c^4QLTWLps5hATwf9;buDugXeu<||;B*?k^y6=(C`=j?~6JSJ{xP{sw z5G<)1Y3t@*-fd#m_bby~+N39`3h)KXzU!WMZP8Q}wDSPd`aj4xpMQjduome{fFNe| zRKqPEYg8l)WX}1#02;zW2*T~j&-l7oz5ORF5V4lIp^b|7PEJn1 zR_gd~2j5e8?R@uH$(hkw=p4*?KL%t;=W#76At*jU0X^16Q2g##m4Qt*gOF_8eiZ3C zf=tqqS%$VpjSeE|rZXMJt|eS#wGXTFysr5uMaE6rBhIqhrG;Sr=z%oV$22Nx^KW4D z5lro=2Xj&FEhhPD#x->dS)WlN3&wz z`0^ouOqi~sBnVA9UafvKkA1sEEaFAX<*ozWlMuWoM z_e6vJs*U{KKa4O*zIbT=1(7>Yh}f?>9YcuNd8timrok0&uGUVX+6ZINV_sOU=O+(e zS|rV`?dewY4C4KBSAtSu_FWl}SLJzLpC2FOh2E90sT}UIJ@Wne^+2}t6Z88bS;W>r zMh7u>QP;)O$%XWZ_dm(zhegYl9_tNL{`I&N2!VO5Z!JY_kz$Fue4af_-S+$vJ_-EZ zrCu<~-)9jC1DOn)#viePP4ChCz*c+x1u{)@aOA;VzW-;}l8?x~?RuFUdkFvM@o_o% zpsP75YTHn!#ptyy*kZer@^g|Tic5yd&^8)@Yr!<1SRE==5*F$Rd3?|)Am#x9hgH1k z?Wk|u1QxkI-ak24QRXVt%*m7rWse$2f6r^R;d_MSZ$gY6n9kB*klv8?sWjcJ|J744 zC~6KRj)A=;F>y&4+4s`dp9ry4A7k~&qVAIqEmlLG6Dtp+^cGj70*@!Gr$l`aeBkSa z0b6m)mvQ8wV^_uQTL*J8aSb-3KQM?b!BcAk-shzc_Kb|k&A4xBA6O*<%4Px*;<6la z`iIW)6rWajqcFkG498ZwBh?HI)7h}>hY`WNIyF|0Nu|g>sL*Z(U_q@@6|T||(fF8zC50l&OPRjjL|F{=YN^p8>XVJ~ z5gJCPcsP`a)tru<2B~VN+{E2{W%#N`3|J+M}hA_yygbLdMJHrmyayJRGC2m0Jphf|N3{!FJ+xtN2Y zf#E_rg;!g{FywgI2Hw60yY?|@yZf%hrcCv-ie?NyV zf*>_;t>%H3dabQdJ1-^@S;_FDJ%lwM`2%m+(`_Esh72FWc7pmpAHV;3sO~OmJ$p0x z!O+G=1ZyBV7Lj6h$8Qm6$MGJJyilsGlBs0tpAj9fMvrHcj43pJVrfW;3GP zzqZFMQl=Ju@mZX@)QBAY_W}OvAP-X>&LZ0D$sx9;UMngHuTZ+Q3@_**`%>A*&K{@z z$Pwoo9S++XBfv-;?*IIe^+Mi`IFX%cm089PbNOv@{?qrQ6W_OHs(0`~E)qWWFv6Q7 z8LAgo4qF(kLl#5{8Fg!nnj@oWUT;K)2^uS3v~GNH$k!w&L)`?nhD&0s8a;2pAFoCg zZT)ad%!Jh7e7dc1_5Sp=d>q3Qy*DrTcrOS&wf4PZIdu4&qU_Q>-ByH0q&4+8u`v~Y zSgiHSNtyWaGxO|0rof7^>)W2^jyEBkiv!{Tk5&6(pJ6WIcB4OdDVq3zU6qGQ;$;v< z#G?;6=0mA#Uy6w^%J8n(2%|lrt34U%x;__PttC>s8wg3bx4uj!L5j+)n}Pl(E8hQ9sYY4t`tFU1s=T{1gzbMKaYrsm18>?%`l;4Pb7!`4 zvO1`c$Zr1fRWh%a?_i3^HH%Jm-W`undSuX-#)G}Mgoc<4sNn+W@shjK!$WBy278-j z+TKf2rwdmM8;C}5>B2YWr5I#inrp?cZ?XUH(@~7FS`XyKm5I{VKh<`9P_T(no3Htx;UcmOB5nY5q4MhaUB;fR`kMn;Z|u~YWsi% z$57p(0<8&F~QRTlJ2hr zGEZI`Lt%G*?_0sWpI=oFzGo7dRG9XPKYEP+;u_N}2b+(30O4B)${~7J2Ij8>g&{y6mJnEOZwFFo z=*@bQ0@$;MBoPoMErwF{9M-nuh^O=5s{=XICFZjK4_j{?6=lQqjnbVWozh5mcY_kr zNVn46!vNA93P_i9N=SorcSs{ObPYAscir!MKj(bwJZH@!{(;3}&Dz)AzsyZSa7LPG zakqsaDcW|q->SuW$TP%}OA0To^L?6`=~vx9*bGMGZsLFYl(3w}Ztpdc!OCPkmT2(D z+w29t)sNk#)1&@Pow~(aXn^_1S6)G2Aqc&pUK+*ymF4}a_%+Li4>!lc%pc>AcgDM% zaVSLqO$fyn`E>PF+jgFi-B>C&FSY6)7WchM4^AjmwxstrwH%=y+{pelR<{&}_X?AQ zY<=!lnnctRWA@t{-uVOLWkULqsBbMWkkr$KzRvtmwX_3KNO1*RizzZpq_g~lRj+h; zb7o5fMxU)21eK5|^ylHFY~z1+rPv$Gj&QrQUEGjF&KB~`l5Bfmxx-OpjW)mh+ni)I z{9Q+Uh>$q)b~=Gh`!z$j}|yFA(A?%&mN#CmXN6&_y*xoP)`*(%#M-NWYgAD8Or4j=Y8 zB*l(77_L=STCR3GAZ*f5wpO8GMWDl__FK%3ZPUV!Eso>Urkz z-`khkUmN!7IRiH1A2oXrkiVJ}bC?wMgu=vq6+XvMWuiha@ER*RUb#qNbWUC;p5d>f z?;Ua1wIe0=aT0QXw1!Tko`a+(EGyZJa#2DG*Ku3*{H6ZuwK*+Lv-p({?-9~DN0vj1 z%cVUibsqYVuU+f&2?oogw!6Cj(%_}yz1(9tdS%B)E>a_1ij)J$t4zK{W*2$d310ugG@@#F|0w= zo4AgJ6d~)gp2Z~PVb=MF^~(H}Q3@JHt8%7*^AC?tvpnyMXlS@?{JP3PMIGQR&YU2+dGR zM(mW6iGL4PE0a~rdp`uPK{!Pa2$WCio~X38j%%I8GH9qA^!fo(8#1dl!d2?jt#sQubSH zoidZ`^?CO8Q{r&^XKC@DhWQ7yjjT>Nu4so>~$8s1x;KDK~eAHBIK(*W)jvJ zxu$S#OC}AG_`5jhK6TL4P*SgUi_stNGMH^6DUZsJ!#zoF;gS3Eo>05R)9gJPozul2 zyxsB4MZ1@S66#OStHKK6T*1iREFRqZq#-GdJz2F!=UwwdGL@+#l9QMQ!98KR>e5lOewcCxhwnmqMB5+TXxpRTDBEkA?|M|(P|LJ% zimuP<9giJ9=JdwRm4nc(cmbc9_vKk+h zb%@7x`8d~8i6V*k$h-pRc?*7Jye)BU~hb%z7-7j+;q zR$+S})(dkqbFlsK@}*SwLp9X&lB4-sGMS7ck%1fk#Ponyov# z`6lrf-3vi0&AaD^ewACm5M$A>oga0&CPIE)ilI|6m#1E?%PrUW;=~=;>=UdhNc`6# zumCsrLWH7c@t_T%v-LMFSna_JRTOcSx#L+`X|wfC zpk9cqaBnv+jR!n{Uym?r~$q>2>AMW8dI{gl_6 z51_T>#s<5s0MADS>zt={Uf;h+hFu``V%4-95s%|B7iX$b7qQnfEwjYm1hnCF7qM~u<^xq-6Gm;r&%t zo{^Oqpo{{L@3O#62~ z9PKqc(>e%7HP81*E5zHBgzA0vEKZaBq$pUEm2bdq$^d1!P#@&_h_rcE8WS)CM^S*} z&*0Ooxx({ybE@*{bPccbz%Q=Qe_d0M_KN%b!Rg*j`|e)mP=VcvG3aIcgW>h4GZdkj z&^MopeP5}k*=g;^2(|VHunjF-PgFZ8^Cl!GN=SrDzy#5WavP<7Rj!i?PpRt>#=1L_ zkz%qrWEbi4dSH{fNmem_<_G&n799-9@SDhLBg`FFtmg}@S#*PDwS+-LTtc*f2x&8W zIkPbweSSg3&!btwKkH2EoLytlj?sl1;M?WeTG%`rhJ}VOAyERU_)#}PvLrzAp$e*1 z;#BTey2&>@bz`U`uhBhY``Zy%^02`yd-CLm%rTTCsM!{*9_Xu>o7!;ZtFaMO!G4;= z#yoQ@EmiC{R_(ZFAHCOAdnwNWYKr6gDMos23yz1VulO;U?kq z%99ilS^|UmaSJfrND{EoVcaNw8YucPfuWClijZE$&Tuh@$#3kL%k;l_24uP_5&aVT z1X8CHd=v9zKcMw0)pLC6^=U9~A1Yw8kH{JebCZ&St1|ehuv?l*Fv01w)V-qD+ilYY zx$HU${!px3pFZ)c^>RlWqx}DSMY#KAp=w%*EWdi!0p@|%!f{7iJ#292UgkQ zmkUGh6hSC4k4T;5cH zFTLJ2_^PCdPF>HT!k_I#RNLA{B!kZKwqy&!y*5Bcl%LnjzvdU%CY*i|l<_$G!@InF z3~Pk@ywPNxrBEb*RYxcWA41Ij+#~Sz@bE`m*KGPUT_F+A^|JZU2IxKvp&C_Vo@{>p zHcgrD1ODJ*-m|Hw43ySEg37{AOh&OY0J+qAqjUOHaENL*0vC3DF#a$hY~cd)Ahp{` z=cYOc=>!@VrssxJDBuI+c%4@|5j#GVzm_X}r+@p4yO2a}X@}WX(xlDfuyoyo$Lt($ zFpz8W2=oZHz{95CnJbt)Shc`Mt68FZpRl`aO-C)b_d4we5lz%$jszs`Zgch8X>=)} zPyuUy=x5sMNV@1iT<+hCd&KQ?(wBAvS=q2BUhoUKsJESZ#{+gA^xz)F=QVnltbgF2 zwM&h#6d$9U?QCe9eT8WkzNFn{XPc9%=z@6q)-{03=r3zV9g6mGVC7=%Tp_H#+T{2X z*y%G(3%f5V<^#>V9z?ZpF@f!6Die7UzU@@RCg-nfXsM_)YRdXCnSNOPvH3rPMGX0W zhMO>rP{vVE(O}DIGJ}f7@Jq|H^ZL!cX@ld#ja@?GAKsE_FHk1cO!@^u;n##Jsj%p; zrSCH8cL@3$5sjKZ)|$m{hZd>R!bJk)eJCzUizeJYLc;~}{MZAw>XCrK6?)2D)SOy~ zoW^;Jqe!XN>ym>R0<}=W&7<1HZoF?qPr=7=i!vt2P zpmU>8c51?UT5=KFViYJiqK&GA;0f<2!HK~I(xSTfC7B9F)C<4iajiCL!CE42bvZVt z5P-a*v?;~mQJP}9LN|)`s>8O9O_6}0Vqr^>bb_-CIHA%gjzj$>WtLBS+;CpzxpFP? zluvfOSyRDC@Q&5oC0!tbaG$)m7MwtG=+=|52D@hK3bV11ipuaI-8}9VN)(?5I-bYKBZHOn8z5H z+`zjfg~r|?-#i1Ra*D5FO7HmHkP+Rj%$zAnm(HU&9v`Sm4I5E(c89vTl_UQeH0xrI z;>islX%o*jnD_dTNqRM`nA^|@rFJztBdr%T90q{{3l#65tQ7+t>la0)6pXUG1Q$~Klff^4YAa+zSIP^ZcC{R?q(lDCkcA)1 z{1vQ9e*$ts0g%9Ec80nup=e_@F}|=AzBTpBhTv2LDm6p@HB7_<@{YkYP-l)@cw*c~ zY4(%kZ&dq*=g&h5I+6A?Bosb}-nMrV~z3Sa!zIt0!XuJ5NU zh*fqdg*&`90@?X_d?|VZnoOsthO3BovUHC-o*|rl>h7VXoIn8gRk(0e)B{ppZ7%Dy zx}@m=)$d#JFCe!+OHeiTTAQ?Za;3rr|6y%^$Z^|70i8o5s0%?uPs$g~yA-c+&IjfM zu#pDmEDcTm>;9GUr>PdFd0DZe5vmrixE3b>pkSAGLzMBWqunbJrV}L(=C^jAaz8)9 zV6TCXA-K^(HZo1qvkN0AS8To2L?2DT_4Sa=nosf3cxE$10I?Me2Y;)}W*Tqmei8z@ zp%m)YaDIb)=F8o^KTcyoYk~+iTirp%%Ss$9MeD)3g~z~Y^^Ez({!yCK#bb*YLlSo; z$vK%9It-KT6;k1n?1>{^QcTFs12lRwYl;3lCn>4s9Yt6~_9RQ~L4OMaUQE!|+Wb?3 zRrRd&$KVs;%&c)xc-?p)QGqd;J#ZZ3)=B(`KN3@zyWoU>+D{UXNU0+Xg9xTj)_mius^ zm<<@Du>^UG?Kl^Rdp#s$F+5NyPMC2nE3r##ot z2E>OrOAwv#*6XnP(i-N~N^-Ah@KpLm%UMZO^n+v-~FX|F5KvwH zoYK;4JO>W(;SM%<@4Fz8P;HOf*N5m8sX1`E)|^0eRq-3%l=f2jBF*%NdW9};UQ=gB zS3!rrcZ`rEG$nq32O2h+Bm8uAE7@TFM7I3Xzd0oITtq`osBgbN3oqMi_D9?vW1$1; zZY&N*mZ-n3JmzwT{XZ~V)aQk2S(XoXner4pt@w0<8oKdp9K$J*p|aued0CGdkwwmo z>hZ4`e0NIgQp4y*GG~F&Sq*zX4#Y>E@nDI$n04*C+i31-)P?%m zByEa%e;VD3Ylur|tXQm`xtb(ql-uPC!l6m9EcUFJ9G8u&fATqp`n=o94PjKv_#AL1 z^QMz?e=J9y{O-#2=!QMy-Tft@4xgo^42Mu~}mRP2ssf6F|*AGyq{^$`~ zp|eJZg}zrk&+~e!nh!YHY_wPlV+b;yZGT%$aP)O7`8a8gCLODJH zYv#y4WMh0E!X6NhyK9d#Nel|MQOmWFbL2E^5b}JfXX=ana#o1GJup?BEjmvU(7af+ zF?Fd6SJ~w|%jh8vHubJd`FD})*4hU^&BEZ#yA@9aMCa`YiNQE``TJ{nky?HFq%io` zgAA`@U&#Ff<`X`YykpTSdtAN0YDl40>2r<);6JVBeT>owsg752My4bJ4vEnNZxN}P z;NH3`qy_!?$!SXNx;B_Dv1XU>S5+ z6Y@rOF|2xPwwnSwYBUVCc=U)}xp`SHD_57%Z z;IhA04(r*^myU-Ev4?^I>)eG_DkuvvH5kjSo5u&+%O*UjMH;>bmz1JCD3>lfJe*X6 zs{pCSG7>;GwRTQ2jTFyfKkxTRQ{VJ-hzj~r!-=^W2mgM4g(pNVYY6w=_2*(1O(^!s zOvJWw_gHtgRJbjU#_4Jc;jbvKpOvu@Y7MHAPHu%*#(UcYiI9BR2<GoDrrM6P6n!FG47L+T_}Z=K?8g=vK*#qk2ge- z+M^J+{x3OnSfrTEomP>2Paz%~mai2+e9qpOLboHVvg|ggm3nDi&7sA^L)l=n0D%Z>Qimv>NO(A>LG|w=a6vqlj|QO9&6pdur>+u-d_@n#8!S7y38yUlJ23`%lKolp|^g~*mbA^a}T z@4Dd_5v%F!V?*HlmFja~EpES$Y7Sh_-Y_wH6KLSGZpEu%JsV&3k<;#W%9rCj0C#nV z8ZCy36krPOv<2Q168hFf@|CrF*KmG%c^-O>9=|{fLqx&R>@Ils>a|BqtB1p&Gg=hZ z)z;&bbGH;eLw9i8;i9H9PSU`Hv7i2Cx2bMrk#mW>+z6rY4w%M$f7~A2yw}<8do>M~ zmK}Q!T?+||7xdVwSU1JZLqNtLxl~JXUZbqhcQ4^w){rs-bFUlKDn7`Y0vH9mREYuA zE;>Ppp@ibiq=r$=SvJpZnL$h>vpX@#s@)2s8@=1oH*tTQ^2gy4KG(-zO1YX)|73*( zQuT~G!CZQ8&5UB|mbOW__`*|Tk?DR_;<3M}ELt{X;$H2=j>5UPwwD$4CoB>Z7dOJk z*8#p)S})R}?BjP%2h&xUl;UK0vz zzih*9PpzX46TFJbo0>_54epDN;lZ+5VvSno*M17`V{wmTJoYH9ty-E84|V=J`h<<)_&S z%R>X9E`J8--lpu*xvu=G_tpb%z@#+Oe=BxM1`q?{p5LHOOS=r9jtz?cq8_<@Q`Li) z`@Jz8>+6vUz_bg?6E~u+`qIF^$UaQH&Y#aVs$3CP0jQebKp)93Jl12~N&HV@A3V4I zS|gL|mg@(bVmGCzbQvZGx6*%sbVs5%$f4Rql1gzc6XIc=z>)Rln{@|+FJgn1T>n~f z;c1+Uhljw^hVvx)B%5!}A1&~jk*Y+c6l=dg{H;ofqLWb1!wB3&H3AQYq`;|Qd-zM* z?`+-g;lBM!ihX|~yPm*Im+!=}9WKy49g2$kp(g}chhA?Q+c;4%xjr@mCsscG{a!HJ zNFu6`-{Bk!s2|F}5F??SDE-9{oDZkd&$xu+LWJ%JP}O_H!cbXMSWzqmnC+DV;wHv9 zk>wjy(>EdPFpYtW7!duC8HWR$>zh#QZLWl&(X2#8$pP5}`Z;UyT0$O@&{YRiC<0g> zN3`xuD`!AB5;+$Y4++x|FkX*d2Qn9)01=h}benQTk=?c8Krj&R%ztN?M5b_qx*3Pe-|LGmTWdxCekoG<8tt|>Ru(S~(XY3P zf=^;Kz3P{hLs1(F6#0ZJrcC96z|`!xrqUbn5xxcDFwuvvu9)x>K_^m3C=e#-6tSEy ziPnl3c_PI{b#w5igB4(PM>Jm)sdauBdLEJm!U1GlrRAI^oRFCPX@I9wN_EV_{xpug^UWBvlUCWV>!<70c%x?si~}b!Qc7p8FgSMX{L}&XGT)b*I97y;3l#= z?e+!!veP=;97%Rhg`yUI=PS3Jv;0sydB!1gUfF`lrjaJ<@@_Bu1^?6Q{ddZDIA_<^ zf@Nea1ZFMarpl7O1=r~*@R1)xQf{j_Uvd)pTvvra=k{^h+YCV;f;W}72S3u5im4CiZ+s4S z%!IB+hXez0#Q=<`GQZva@zzAU(uZ4ieT|zLA+l23$)eBpb&|nKgVUR}x85Qq7Xrx( z?1pmW;~ry6=&4CTM@rvCZM}*jIEyAv_2j-(eXI!gs1U1O)GKj9gl+cX#tB`xQBVs7lXg7Zl*l|1ktqi~*Tn(7VDvh4%|U8^|57+Jdu!F6$wvBK zW;9hiZ1yIUsTm*L!cpl1s9T!xTxpPQ1x}#RJwai-%tKREoYK|I77iNKQWv#iM8e%_ zboMwNVvN0HWAzy~8e2*?_g&1^E&I5qN3-RHx@5SpeJAc5-C{&L>QC#ln7WI7Eje2ps& zb(8g|R9r^HuPC}*!f|B-NlD>cgMBrtNV(R_W^GwSLks$&iMI4id5cDg5rC}eZsJ9N zuY{-2Op0an(+N=# zU1AtNL|CO3cf$c|k??DF>!drbm|4&UHjWj|%JptYU^0-z1`6}xZVbvte?U>D(H=^F z^RAKl>z5zi)8$50_>50?^T0_ioGM?k^J5DI^2s@+b&MJhFHcHGt}jf@9YG`7 z_ovnH?;7xp7MNujDHi$sOAGv%1zWfFPQ5}IqD)vNsZsyqR2X(1rxlkjbX0^Pt;H;N ziQ}pO*KrEbUn88Op;vbJQL z3{y(aouxkFzqWGt3Ue0STg03ud^d;#N+E2ts3j_2zgt&poUi!@z~gfXAv(j1Xq{&6 z33lK`9e~(>ozE-v2C-Y;f8;jj0S!u|tiAh_#to*jk(j-T@q3vef8rR&Hp=CZJXk5c zXoN)4r*KAka=y=3M2gTg%bKyKv6me1{vdnW`<<9i$81||q>v*1!5Wapmg=2JD7?(G zE|$t)4JfRYflcY%_c%AN$`TG0toqi)+dj%=!;RfIk_?FiEzsIlFD)i?LIP~4oHo^WY5WwH{E7RxBnmwzID8%&@qtTq-ADdSu2 zJpMEI>e_2K`OI$X(N`fT;B}514GGQXs_*!k6>C*((oX~07f&h*%>+i#Y7ZCSCP@4s z3ifx6ew8z6?P8$HWcIGEu=p_DcJzC8ctUhyU1;6S)6p=udG1hzr1#H#b}mrv=AHd& zo=GacZ+@Zai?l+j8Pm&hf?jL}wTv+AQd3fANgF^!edJ#=^Pt@&EjOoMNxkpqzw@cLRFQ3|tMGW=nCTj5DcOHn z<}a7O@31jVo4t5#y6;oMuar3($_msh{c{6GMh@Qpy3k?{4EG2R(4NkgN4uI#7uS2@ z_}1ikwBR1c^FU2PsTfc7BZWzw zU=~|vVOJ%KDPloRo1oVBmD@M=B zQ2|?q$6reyS}tref9YG!9m{=`}(>JX_wm znW^`l2M`o(%wiy2QQ~(}Lk4fj>8K>+;EEptIFH^aYWjEI)>Cn@;janz?YuUg`!D6~ z-J&@7VG8{|zxm5!mm30(8LAU*xGK!6dMS)LzP+QjLRxQ$- zYG*ebJ*E6bnSn&+LxN%NwN*6-dHUh@0 z&0#=y{{d)XC!At2`|&QE{O$(n?$ss}45?2J`Ujvz&nu#SYT1CQYLA1UF>1r|)Q5Wl z8JGgYLP^r`Ui3rR)JV*!6a4bv!0}VNOtZnEda6sbGOSRN=kf z;q(tO%hTb)R!>t27m11n&^M?^vDisI4AQte`1mB`3%FfqUt&SP+A44mK{*wM7L0T`3eE(!bfM-ug*5|ly3}>#uL90$T^^Ld7rGxKXsk5 z_pa?-5e1Y?_|t;j*Z#`KK>SfGPe6F2C94u$1bC}I9R2-Ht_5Sa)Q-4B-XpjFrmD@z za-Qb5nL`88Lkg9nAt%l2`FmBgCR#z~c#5x1@r9FXeG>zN9M0in_!AP|-+w|0;XRLA zsM}^T{C4u%cNQe(<0hu|uf`E_y7Iu<8Fd`3Z^O5YaaY{wa&fCrK^amxQbSg4xP~D@ zXLvT%6Ivi9NP^K~_VTDmJa0I#ObxEh{dhcRf1ceoSL(z*dN1Sg$NixN>KroVM`yqaDO+c4NUH~7;Tm{~ktHq-FQ{q4uj_Gx6A zLW2|(1xjQlnqQ8r@xOvD+T|yV>Vnj_MGt=->J&bK>LtN{(d8U+^Z+!a;eTE^Shz!R z{h*kphg2 z%2#_61-g%2pWIN~vuAvtgUrWD&6B_;L%%y0*L_mfE1Tk3ZOX7- z;;jxMA6ERb;eR@F)73QFF3d}r|L*Ey69)~H`t+!l;pVV0c>3;VgZI&L617LCWy6+q z+vLZF0wZB}a>twBJ-w(0D=aB&d5y+5 z8f-UpMiTtSAQ%5P-uTb=lR2pEul#s>e>?YYzGvtZp!U0E3|q(b{Y=FA4-<7)_Q!g8 z>LmmfkMQQyQ7!^os|F@V|K8b|_zvRq1b3{KEg}n2PMJbI>loDZ4Ld1&j6DsbQE_2j1KuCzrJf#_ zbDvapwiP9!QVxc4PQBEymaR7=H<`+l;B@zfl0r>o?sVDCxVrM!o33CU(czJVGF-wPxSy1K!jpIZH{%i&w1 zLBsH6|5Owdd(AVMbI7V*?x0gDWg>rL^TX6-VaL_SvgJtikT`!~Okcy0CGF$_53}`9D6TVWR!5D7M#&;=x zh-?WCzE|Q7j;G#fjAa@O=r;vvF<+i;H!$YpKW_aFJGkG^6+9+bF(=qb8fBCKgQnvs zo`$#u1z70PuqEJVI)h$7BEz_^%^at;Rp3HN^>40hjjJ=M>e|=Is?}|m5ncC#-%%u4 z0f0YIn5X@`N4u~MzFX;7c2{E8WMC{+cQ-hsQ1ur}2wL{u4${NGYqaprdrabr$yP|U zPN7K9WZT5gnSorZ?`>vD$XTAz0{Z!X{R2WPqOb~2R-TLUWl_mIUms70Hv=TVLqVho zUFgw#h4UTGe!=8t?XMhPSK!|<(Jb=Nv|A(;F!O-77p~5-`Zw62Xm78qp4XpHH1b1` ztudCHJXx`+CHhJgbXd=sK=v@!c~ubZv$`OB`?|KNSv(L0f2y@)9M@v;3P3W zoMuN@S465PMxTFrcC53ypj`%7O+Mj0>Ok#`&+}#WP~%RIFc9p|A!6aIwoe+J zRR&$A_hSL&3!|8|Qs15rKZ<+)OLxy>ra;%URYH6$j{ocPyu6QdzsxBuu`n(gCEUMQ zU44lpYB~nwrMuC)Ult_?<^7DLTqNQH7J{bCdaals{GAgQ14TH<4-y^-Un#@oKaZqF zb)5|wcAXu~R}-?3`4;hJe_RD8Q3 z2qSh)ed<>wBJ=!ztZzOdVu10D504_~VH6$BX|`VY>z!$o9aEK7P5v7?DVJ=&IJ=db z69xZwm|+-%jAC~5pYAVn51Kc0mHBmBNa6bJ)$ZTsw(1gpt!EE%_#uzEC;#eTzD=@t zPmsVb`S)F?LRviKT@cw~n48FY(B{C*|^?t7xuKaZ)L-b+!{7tyMbE&oiN+>DTAlEPqSiV3+}7NMJy6 zxj*>);;9x7V;f?3ORcb)7R|b0FaH0XWdLsZ9hhb=E}|gSa6Q_^r?y)Eo-x)n(9D=e zgW^95p4vixkHVYouKi#xP}M?5o1joK9%6F9_T(gIR5S^e)TD8%=aGPEWUErY37d?3 zbbB70S&f{@WBJ?g5Yi^qA-Fd==O}j-W@g|34HP?dYol8kNm=j~4fU)q2Ti)UL_2JM zT^z0_k|-cSEmv5zVzF{Nv*^uECSPOxt7E4@pQY#X9@$p!eUt|x^{>AQ0NY@4SneHX zwV4jQi0d0H!BFIxR1vW1Y5)-&;ULCH;#!A)b2Qt>N_}pAoD(?yB(8O%G_M0KnMc%< zDTaC;oF_YCpV!vPB#6S`o=Bw_ySf5dEBZmdWCqnA1#fIezr7zG_B$4UU;ipGhM9ei zD*(aw=MoecLZC;Hi>X_JL#LAl)Z|hM3lRbUNlp)y1?X+NH2y-|6A1A~&B1^~#K&j~ zM~fN*T?gQK*^MJNFFsjw8JZ1*+Wuyg`WuOc1xIp~Pbs{^MTG9@P%8}s)a)t|xYW8g z-Kd;hk358LwTI40cly(&;M?*D$UP#T5@ikc0BWW|HP<(;7V&b4Sdc_TqTB4eGw@-~ zHA;XUx~o9C>Fkc<99{mK1EmNQjdn2o;F=5maD^P+~VmY9e z7aQ>GH>X{tVjUB`1eaa_S94sc*A=Wyw+qM+^W?AgFA=dHXs?w+nXVEj1*1%TGo-_( zZZ|l8AYakaz9px`2@4L5aN{d(SD5_HU-9Wuj`bH2d3U{G<9F?1Lw?yBln`vaU+hag z&%sX*7qWUU`a9ynRBVX9EF>kD9>mK`TYaJon_;W8-``rlJzNap>QWHME;fD}Io9@f zKNqr}`%NSy;r;i3iJJaEy*|tAm8aL4+w)>Al-**3xEhVli_2^>UYlzN?S!UBSASnh zIw4aTDyfgT{fuRsw+n!d=bnO#^OXh%PtINfCR&rCgcm}{_7Mg3SI*-Hy#3>KaZ4Pu z*Q~l(g%^qe8@`bakD3`c6|kE!&|HkOsBz7hz2;X^&CYA0n#S{TKT>+2vNHm(`pZcgl zQ?5fmu(b11^DJ{g_{ZmY zd;Q}KUR%be>&4vg_Sa)lwGD4%F8&dU0&FyzGofGpPz+I*42~36x0yQ+HSQ^RQ*mXI z)JKw7TMMM!QjdQ^HeL2&F|?f3JqsDQXz|$!xH;ih`Nl`86L1#z z))?kl75;8{J;D1Kc90X zaDA9AJv+EV-w&?v$jCTJT*M^4I06ovzO%QEn?d7{@5V25RXMy1*36awcG{Qo8z^G{IaGPJi zVber7$nNs*k^N8UWL%hgyRpu~xHm~C0@4;7^=k@{n4p($a_L!ovQ*cz16$X!QSmIE zal$T%Q8!hO5shfrb;?sitl+gUmZCtsrRWB73G$LRaor^a`vBwcme%1DN(&ylF3cR= zDsfK2R-9+RNHKFrS`wx~3?bMqW}9MOPP77e=ImcX{`Z&G)~rqqq6`q_?|dx9?=yAb ze}m25u!32j`LSPGGzC{;y`ZwV%kcBRy)Nk7AOzxx!BnJ7_NiV&=COCi%So76n53!W zUMyjcsP4l7B#5Nb3@1yRwO9X~r&$V=%6r|Nz48aZdH#x)#gZb=WID9PJoFDWFhK7p z@e5qpXv`lgYC6jcdyNV-o1bZ#Bm z-+{(T@sph7{M^JyFn(=%fVgh`4LiGWUo|=sxOOJHg0(Um?WCPM@0(ztZ(i;=)L6$G zxi_b4j!ujZb+!kw(#58^>kYP(9F}qp@p_hA#I-+!DLjoq5&1QmPpuv)@CeETjB3RV z2Qx}vvkHU<7hzU27-u@bS~~9I3#Ym;JR&Jls!rOLgqN4!;R%XZ^|l&knPMp*+a%nIGgyic>8 zQCrOH|7-3AoZ-&@IKu&<9_R*%7QXo}I|tY-IHuY0C1xxs)bg|Nvm~p|AwNIX;>>ch zGocn?(#|*>vj~2R_so}zN0&?U7>D_{{H0KP{O(;qSq{02vLwprljxi~jZtfGI&sEo zdNx0hMK*hz_tCi%HTd;VSD4>zy+rBPx7MFw<=%ZS^#n}ug|b*d1C0H0dQY&pGP`Mu z0lvcfaUmu(D#61Kkg5C%cw}m^fW$z4-Bya=O=D5-87z?g_nI7Xx7|^_qFWL({t}vJ zN$a~r>Bk4IDLa%etJ|YBv9Qy%wmZR<*M%I&qIm6=`;vUN3s{Ih2wQ+svm`S9&u?|E z!@uFvRXQ{L?^OG>G|6Rgo{0ZN(BzHddP7pDW#l4D$D^Baxp7G}aq&Zb zmT#8n_F+=b&VNVBqmj|Pv16BswRM3WRr7{t3Q|lPT~BLsY?Mc%A?gyCC@me_0}i=o zY(s$kX7Z2@xh4H;Ns`_pIYuy$c!`W68uYOvyr*;XoVlkO6Eyv&?4b(KXFj)+Q%K=b z|AI4+0|f$4142B^DR?dhID_sC1h5QZo(eeKZtz*g7*6f@Crx}G?BT5I3VE^sgG+&W z8F^Wb6qc1dMhByzyNbjK7Eh9;${dnKA%|I+Bq8Sc0bYC&@6)VkJ63xaThzy-WK-p| z_`-L2vGg<*OY|+3sO~-9OI*|oY*(r{2C3XcnRkYpx=`ic0ZLxtGtLwJC=D4sCUaW^ zyzP`@aFR5i?zTA)J!%or-DPp6oSz@>=a=(|o8Y?%SKdTmdy=wlPyb_uUY@97MGA-! zi7#wk_fs`q4F}wZjpl4_ciru0T+~)m@fS)w&E@HbB&aVm`LXn%UPC$5y?zu@NEaZL z30c6U6e-HHTS^8`YOX)RkexS*Dj6&!nTufwH{F&{si;>BWm;*~1F=Fj;&CK)Sbq^{ zp_4C8Q7us--j5-GebxMvyI!3l1Equqm*a{}2fQt6-QQPe;l{HzV_Om0=NTTw0HIs1 zBvqCGV-=Hne?*&uD+171p7TGX&UN#^-X&(I}*+0?o?vT znLP3n8Gk@&^eO$eg?sym{;;U`fi!2Dk3l(~_YIq2D-=~nJxzczF88%7@6+XO#ay|3 zpRfu_{~}CI((QOdaO16Pu6C7^@Jog@up5s{d2J{OUXSyfj;O!FbC8_upBG+VTb^ET zx3&k;>UhG3E-VL2?TPmRZc_#)EY0+p+VgaI9Ma>w3|#g6hnj@{WVG}u8cYuJSLoCo zPcFgrcx9T&I!s_oKHVX{NY=8KGe9K%Rbm7)O(D8%D5^GXHSbTcjyH;!z{+`&HVh?H-04uAK4 zkquAS+1RZX%yQK4$^kM4L{a1941U^Zy=)W`;0KC&I@k{V32eBy#s2n^Er*B4W~Q$k zNh4-8wMcQ!Aoo4C-9KSnL3{V-<3fsgFA7=U2S6hibm`pc>u#&8SJZAf=+quwso=lJ zK?&jLiu#az@8g1s6yu=n>+wKpDUKW>>Uk<{z+6171Wo#6g_sHWR$~fuK8IUtFc@cl=eV(QAIZu?i%RPyB+;MEG%pfFz zL`eb;er_Z8FqJgFu*>s`wlnTuZT6EUBN=p}yTjR;ufjtEF%B)Ka5MvhSKu*F&PxP> zLwcF|8gaYJZ+l5FI?tksCJl^{ z|CI}eJpsw?vN+A;LpLLB3qmOpQVe$Bt1_YoLXBM@EBdv56o3F>3)aBf=LG=)lc@j^ zfh>@ZoyP!s6VVEn!=Y+~SDp$aEoJN?3CVM0Qt&2*1x_Uh#L5~cH&{AB6mTfPW|Qu6 zqTmXf6{{@a2$Q?s$w_ti3Gf&A!QV_u2<5cI57rzRlMdv2gZewQ#PziX3v8VOBn>eX zhsiWC^jAG+A0o11gDDmw*b_}FyrVc>5SGHltmw_D#xen}3nU&u6|bA2P({k~S5~Uy zFY5)j+Zd`SP0aQ0l9eV$tweTMGLyR6S^E`^^+>Anc<{F3o-9Wah@0qgDRI-Cr7#|} z6v=Pit6!(M7m)=~Al=qfl(a%dQ^=je7lr{1pIXn-lDL0;UtRe!AP@eDJQp5-6p5ez z5=*4>S0fX#D6($#YqX!vZtDTSt+)TM2@;~TN3RXEVPh@LdbHo$ru=@Es}ABM75wQF z#9IZ^9cl-3C7UnT=LQuR@Rn4-z`?T2&Yb}Yrh}ZzWp;cDvOf6$6-%_VfOY=I+VVpJ z3z#(fbgpbhV|OI?!yoU_3imXF=cT&Bxj4G%^pt;QUZ?deBU!&@l$bIBQ;>a>$uSba zAWH3=RPE?P4nj}ahaUyQS|tWv2BVXiGhZw8*c@{xaKFJD)#XwzH(6{x|MhLt&zid! zeIW#CNnIc2YVK7U&>kEww4_{oHduvJJsZe1oIO3i~pNU~RWKYOYGBH(inVKW9>obt+$M8@^AacM~A>bK)Mv9yKA7dG^lh4QW8@-w*e|3B@NOb zCEd~>(%sEKy1PdGPp|8H-M>5jkH!Uoq|#%9Ug<#z-IzF704pw$5A@`F3X_OcuJ#4=jJsYDJCos z$$0i^C>Up4ev$was^1dvX1!ySoV`|Qj*j1g%`92muDfH^lMj0aKW*2B;0P1kB(1^cos08{Njt@u*gH4|xN< z5T`Gz%t5W7b5FT6#D=!GQq^iM(LW89M#yV}mucpM$wKqnf~|q=vh%0vFj${_$@Ry}f|q1(Vij0VUl=&Z zn)G%rbE;T~&Suj1*~*z@=*MPaxMy-%eka_~bLv3$C$ z1#QQo9Y(S6wZr#upx3}jwppv~rXmvX!`WTzFLd642#kTGSaKG0?=r}0B8Gjtp%wfY z&TQ0M(}sZ`f{hGTNmCAj4$tW%%0j?Qp~BrqfdanRRPt)rzZ;=Q2v%q|cP540k&5n* z?zO}Rj4@ZQd~L^6d>EUh5W(LBH~Opd8K&>;XUkWhs>!Tz?7B9`UcGM5=#Kv-ZMk{Z zv#H~MGo;;uHp#}Xq2=t0(Y7^amGOa;m1^Cz$vNCm2-@Gm-e^mY(dWE~OF)p78=m@x zYrfH|+SN|33-I8+r@8GQKF%Pf0gYi)$BL*+Y>!k@Sn(@NxBbL+vme`A3eJN_Cn1~9 zU1)Ff{2C@lzM2QaY?w5h%#-%NH z*ws!nBuTsOhjfrT8K1Y3z(GtxT>@BVVZ!C5Bd=;-tCOSI|L*3G6*dydZu$`DP4-)K zdXK$%(k8vj6sLbaBzxqWVqc zT}|@%N2)t8LBeuHm++G=h6MQn2y0-{Cwz+Bo2z5pbuI%!(4^ERfP`^Fr;o_~2=Bbt zJAX-Wx52J=ZjyryOBJ=Lk&OG%6XQArK41jG}JB$%DQMjhG=rWl+~nazcHt3}{YktN@Tl>?G+ zPu!MU3KDzMDqDKwlVDJH2X+sq-ywg_I*19V@Z`=jdtLmsM68c{!}Ow-B`f6sV8yST zm{?T48DeeYHu=Onu|kGB7uW0lA3xnv%O=`^eDh82&gz%U2A6AzO3wZ<={G9>S{O-d zEUUz4F|5J!4~Q(@^L(tZ>iM4a``h(=+`+zD8XO1$p&jCqg-H+(V%p*`hvEHdlg&M&{9}px(%%=%c6rV{TXhD;dT-s65<)}= z>7~H^#imj*E+v9q;cAoZv~hrQp5xss(eIYY`Qgv+(@q+w7N7?pU&0F(REnU*cPCGL zc>Msx(rSjN6ooFnBymSeU959adKL%bo;ukX0z6W=k1J0J z?%X8Z-Yg8+0z`^!j29-CaBYajG`7)q&9@^@XDyP$Ok(EVMFnkp}Cw<$G=K`e5 zv_pVMk5u$Fmd91ok6nTZ1&H>h%}JxFb~{p7sjwF-Z6c3pI+w(L0L))NFMnP?p-h`7 zC2zgFXlKtiM&4H5^>*)JApK7pH4^a=f)+5~oQPw|2sV38f$Auw3>Bu8fu#U()p_SK zyUN`gvA<#L-J#jxW$wXtZ2Df$?!9Q~%K~dQhSIX#_Y6Bcx83(7v%m`Z`*W-Z)B_qBBoce>?zU6$K%{_UmhGuXs+8;&$;lFq3aC5Tyk;oUPBjB$S?|X7AIM>U;z^Y_ z#*n&+|HL+`!~lO@{cfsUS;$U5-3fD{tf~XM%$M4K$-7!+1$V)N%RbVMMZ}zbol?^2 zp;dQcTjJnC9ivE>r4dBr#R>o8tv{b0jOV&NDCo3b%z9<-y;CgENozBdlF9qaY%iJ1 zPT-r!n0LEW4OoGa^!?xSmRDnJGHfxRH=djI)iL+R)w8)`K=A+=D$QN}2jU zT^go)5qYdVWO<>J$72%ibOZTedPWN1jc>;DKg25!t#m~IqQD|B!L(b)y5QH)>b6k<`fY70^a+Z zT{C~r>@N>@xB4H_iHT_Y54gO%`^nLCHEE=%8+2a$MB3NZ)muLp5A+UOa{;X_m?#vd zVv*}aghjJgR3=+R6P?&Nl4;MPaP3 z!IcajfacwZ&_OK1$|-%m+h|4xDJ=}~wURPU(>KlV%?JJh0yqBd_Vmh~YQLTLZ^7to zfM!Aa@c}a}0o%vt``8`*UWSJMcOJ>C`u}de>n~wm7i~jk`Y_d|{fzs!y6lg8xQMb3 z<1Y*_!~6zY+Y}Y{ITNB5eO^IXETQB?eRgLGV>5i}h2YC?CeNBpX^%un%*{Wab43Mc z+jJAP@|l~5koA9mj|sxSjmmo-Yu;FL8tDiZAn;B5Yhmye=h*pHy>k_Ebs|O2G^)!c zJs7|B8rb1<*f6*=nMBDML;Rv6&1y{(pKyY#vAVypYSLq!qR-2>AmTvSAa!(XZ2b`0 zC^xLC`CidXw1h_-FO|$WOFWIhi1 z@RekVY>5@X<^k(MP^xV^Hq{LAs|%o+og;v6GF<$vIwj4khjWf-u~%;lMKsjq(mD8Q(9rfy`YYyQHgsT>5cnGQc7)=m0=V;?z9V zLb9_Lf}r&c5)n6*7hV6X5!T{+B~WZ!r-L@Rso70Y?RV*BWx5!0McbiWN>XRcy64}u zQpfO@;E0Q3?qvkw|~|BNq6Z zM5~%0g3z1_xK4rSxF)St1;7$)6ZQo9CA`~=ODER*6xC~64-8CU==zr|E1_@rE?1sM zMDPiYZ<2l^X&R{zyXasqei8G<2fU5S(kM!Vc>w*kl($+z5i;=zQa;&8LauR#mN4i zrt+Dt#gCUV1FK+X)PH#mIjMTvzN2!8<`8%2BcsUlcds62^;16x&NYYIt~Ovj*lOsQ zRIYU=FTW!2M8}j&-T3P?s70FADt+vP4k(`OjB>*e!cPm{O;EqA_IB(0%0nkP91_#T z?YQ2H!vIrr+ny8@)vg^(-JCwkj<1=%q{UBh*uGSdgq@wRhr&!o{?UYbp8uDo;vDcd z(e_;M{s80@{2{vX=3A~5XL^XG%`8ITd%iM)HpwxZU}Q!=)ubMR-p%n&=&z-rXK2yW z5r>IxRxRWB-o_X_GnXp!THmFTa6A53p`CS7m}0O zu1WDRsnwFAwt3`j_ykB+I?VbVOw`Z5>JBpViIhP2dkxh8|G?&ECkQ=sBHIL^I74?_ zCA>}=yR`4j|5A9V%yP~?+C#$ue_SM7XMNxVgoyO}TFN{|eGSYX`ACzwF=(Ro-n7G` zPkZ!j4_V3v7GG5D#}F|Nwm@{y`9qMz+hEPdSPzH^%D_rKhi&aac0lIZ zsYH8c50x?D%I0UlT%T#CS86RLoalRL7c9^5Lg!Mfox_9}%r<=O&WJI|10n?-HXJnD zgR<;KziZ3#WxPV9zcV|8wZgDysc317oaILqXdeG~Y|`i%CE_%%3~jls36Mc6!gmdU z$?Btj$a2yUz;X8ZMQcle`x2#{0O`k=dn+|tjzn+0qqM8(bQTFh68so5i z>Fx%7M0?(c;(JstpRPk3D3hqP8*l9J)$O-zs-fT{AsnN#?M^)%(uCfK<{PS+VYIGD zr3Ha6{Ul_Nq>vADFH(KW|7fS<0~&*c6l&`jj_ac zECZu5VH>w?ZOzZp1KQqQ>X)B~uRqaDGRbn zJ5YJ#4bveoroO{eUR4?M<3`Dk9oMNpelq*)d6XcaO@pdfEwbA?>1eGr&QvZa-gO?t zO(8vFM4&edB+tVL5lMfYjKbEmfT}9#GXb^vjHdMp@iW)n+Odf%oms$b+Nr(Et2$u` zs`Bcayt$<%2_eUaU9VC6O$qwU!+yz!Ik&NM_lTI(?@16i2LKI&6QckC3fSzhzrF_K z6mGCCaqq=>@v76VgYY`QfYqKig4XsA^+&0DbJ>0&)go+BJhJtsjcEN1!E~F(AoZ6Ld%#jnB*BfQYD;b+~uRArHj<_yxSXuxMDb^;#|$Gzf)Ii3bd2L>qw!? z1$P%zbl-lV`qwhyBx6RnEt>n~Ku_TG;2+cc_+FI52-=~$c?*oTSbX|xB?a-P9VrNu_=fxwP%O1{N?C(o+AONMII1p3eO z>2SwvFBfa7vuQFZG+71cOTe*rvweF8Qhm8(*%fgp+z^M&_dxm*Szy&Cl64#c0@lYO zh0=Jg9EbX@7N0z|Z|2h!CTK0ia_z=b`{~$>(=Y<=bsQ{F$5~r0!TJ@Zl5@eQr=FJ5#UZJp>yCWR(ZZp}oaIjdwkso&UbE?BRk0nOOYZpo6OFvna=$zMw zrd$a(VPHAer8!!Ov=*}IJOop^&%QW09cAlN#$H-+XRTbdq?a>|npp9eVb~cDuPNWN z@C5udx=$7v$OU63OZ&L5$srQaEiJg#qcMYZmo)9H&6gpGn=$(~^gP7e`5eCCV z;tuE$_%Au!x-A03n4^NiW}aT+>;yR#0WwrdLS;in-AnoPc=4}YZUNzCgf!=*jQS2B zgzk6cOsC%%zsdO?IV% zDW<~=g`=5GRRJHrx5S6tMDN2~HjMWNue{TpH+r()@|rdOFn=PhfwoCNBi`#=61Y@a z2wOh6fV%$q&Hr2|KH*BT0SLEvgiG!$mzM{C4E|Yzve(ea?5YBTJ#c=~fYYl`Up(u{s%=4`X?yN>9@|O3Fyt{H=OQ7X^)2F7u zD5!L+*!BHr#cr>D;xi9fz59+zhZ%>HT|13ju11>t{mpFxaKkvXDF4H&Jz(fIe&lof z8*h7l>p`E`%iVJ{zWs@8BW7_;>K_vFZgC&3uZr=SeB2X$%t@6@Ca^z|vE_45`Ovs@ zG5F_Gs4b8Z^~kf*^C+RfBq*Y@os7>x@*XVg+6}4|so%-hI+A`jogb1zGqRDuW$-p4C($36aTOD(lf2cNk$lqfb zqvAOESkSJ%IY|chi4WGlfZJXm4n6o+i7jR-AzLkkia#6L$CHzj_VVAFshm3 z9R5N8Qdm^hwhQ|7kZ!tjI5N2n+slG=R`H{L?=fBY{unNW{>j6O4ayd%$M9+lXOFN9Ya4yKpC4e?OFx=(;en+e70r&`%@+ z3@O8D#Ja2AY{16MF1+fZJRXlq>^h%RY#8u25qWra*{5h>+G+kkQg+zN%4&J~#81{@ zceN)5gCNZ86iWjeH^RTO+>I~&>X+E@>#jyytCM8-IKNDfP^)OC0N5>dOU?uut`;HV z=H`|;aJn)5K1EDK1S~=&mn4nn(?4)^djP*TdXqnqDXoB3owA4}VUt_+4jEr#0`|ed zxe4MV)?S)SU(5vChkY48<+|=mM`jKmJ~$e*fgN(y{rRHV)h9jR@Bq7`GYvf;k0ax{ z#q7qS)(L}tV%+H-uN$cN!zoJ~^7IHn+6j<{tx2+fyM)#KsZec@XIan&ky0KQA-94X z$x;J-y}WV)>bCGkl8QAqR#a{y`enSwDeJZ-=jw|FcA<`DCMHkPTeing*QUD-%`Ppe zXk0$%n3^e`vA)+0bfTLMi#yHt(gwtJ<+f*Ya|dk1Qs+w_oZBPeqa8)JM(yIyzi*cs zkCA-(p}L_#p4!%^F)+u?Glc_cJ{u@?tVTwpfK$U=ah@n%6Dx=qG5p+0nJhJ7)ib;O z`^0T>4JDo_c}2X*@h|sM6dBqAVgDJ{&~en8HN|Dt~;GmSu;Elp)=+ z!bNYEypv?^!)+&cumY~v&*G3sKWKm-rvrCZP4yg1ulS&sQ-;%KlSZaqHKmdqwGcff z$%=-V%iD78yVhqm+Oi`8Oo3!KDci%mYlcl{t}Bx-$jr+$7Wi_P%%gk1f)g8aJ@huQ zhQgMWT6S5VCmU_Na2GuaM<0RnFMT{Aa(k!uleML|sq?fHHy%?HL%eBAi&A9X7v3jx#&FnV6ll>u7Y z)FemX6&WUQ5;P!QFXqFk1kIvaZqM#Kz3h|v!}<>;&1{Cl~yLm)(pZtkzDzD z-hy#Ovt)$4d55mm+;s9rkItqlJWaE7OYS@T?nK&zdE?ELk<)$BV&E4O8pFo7Q8y^P zmd#P9=4)o(i%uTRgTvG7$Hen!0z#bK#6H2EK@|HKz|d{|_w4=Gga56T|85HU+Q4Y^ zew)PgT&Mri?cw)RmIELBk8EwiInmjN(2*toW$c4lw;d~e?M&Zecy7-Z;ST5fCqo>G ziJTayow3F}VnWe?Dd=>*XyDkY-S*bW~#$$BPSN z$*^b){V7iS#07EQducvvlgB9;+iXS6vP=*=f~5{+lYsWG;6?m0yx(5kOhLBto4Y)s z7vgF2r&g7=KV{?Oahop|eXPdwwb5>J&BPSF)w1{yXz{e9v+Q$6ISyTO+eHqJB7+eH z)kZU@avRf=z;oNQ4xVRSOEMztgX7R1lipYm8dY;3q2q~{KeS6z!D4jE{WBd>_~?48F$bziunXIQ|y zG&hq{)FkYQ5-5u?S@U&1#>A_=#3Zw-Y@C(+Cpw2Kkup4muZ!{T(6BBBcPQu~xDa~= z&63vrmk8wovt|^hbBZEzU*%7uAJE~mRtmpTrTta$fi=xD+E>MK#-@9_w&uPiFFhW4 zzhBEVyE2K|WoM+MlTZrl$Rp>B?-Omgn<%cE%1KiWmuWm=MYTQ$Sn~ulL$o5#J&zWa z9IFsbgM9)7o-L}Qu!REy=lsCw2ag-=(4149xIAii{%7_HDc(5_s(xaz$ucTkVs6LS zG^dokY2ODKH(NhDsD5<3j~;M>TJ2%juj7S)sj4nlStvfywBZp-v+c4GhxP2WU0#xY zqO09dwBd`qGs19`oI~%w?+KR)Q;OQ3*YHvr3K|r7stI}Bd|X}H1!vftUo@#Z!N8EW zYq}imTk8kIGP;n0h`}k?fuvD*Jl5O81^ZGkxY%t&&cm&>s}||4n!u(h>C)_OgWPty z-Wn3Z-&f}REZw4H?0eVM;Cuhn^$VH%1WH3@&(C=#uVnMlV3-=k(BwFWRwgo45(6Wd zB&*tQb^fGidhZTr3TCzl+bABMyb>%c^f!Mx6Nfxn;K@(!ZN^W$+3ZYym{spF#%h))>f>$EfxMr@DgHSijeORETH{^)M3FWJv{;WcR+fkI)V+kcKA20<-B*C$?}A5|J9+m5eYII~k^q+qm$QRa2Gwt-r8TFA z?eLfc({0c0>1{_Dhw1lMzYK#eKHQb0N~{r|F9thlqr_w8ODENyLzzbu$=B)dX z;A&`2iPBoHzmt931f9@tYOr#@{_n>z;NxlaQS=WBTa3DP&T`z_Z-B2_rSh6bQ{I&UST~mFdnsQ^J4|bd?`Nkc z=%snfPx5-J`#;E{I@vy^oc}4gV5eJ5{f^^h>2zTa=xL4O$^62b_1vz z#>t3TAya>5Gcnux;vA9bvfdvB-?lH&l({_xX)7+jz>q8t^mBMGPm zuDiwA0QI0Mz#9ra(q6u0u=vPx8>wb}nzc=_lOYGKT&Pfz^tQxYn@Gsf>5~!Gdirg- zD%(M?1-8oC@H>&t8$KV|()c`ZkAv&J{cV5bumN1Fra6oyiSXFY4u6{ySM8+}s-nYp zRM|XST|oEE5}`L!+V7u0<#l(^=~;(tE3JJN1u5P1l8I0ls&&5RO<*G(sga~xE}Bp8 z^zyv0QU8FVgu$&S-Cm|_lAMv_qh41T90#O#Uu?D$evYwJ7n zTW|DtlY6)>7s;ok@oC{Vc*jSY#*A`jK0fw%W&7uIPI2nVp|z6=*kF!mRT-~Ov~H)` zM?*RrgN5XXmU~`!yw{28&ir_e??x#+9aIZD@744`vylXQA40)h_uLtKStl}?qJYqU zyC3RzeN8R>K_{RNdAF^1jI_Du&2;j?%5sLCt}rBAG~EX-D6Ui=bgB??wqKmb_uXCHo@T zdyr22DA;N4s`gK4`u)6!40OalwN1)=NfUl7PjFsBb>(J{y5cF>6AOJZle62UAK#;2u7Z zEbTRCVb2|+U140YF9{AnZLWR@9 z?0)_49e2f(%Oz#&`JZy94a=jB0arJ$TOcmH=N4WM4SJT){W8t%@6y{!KHY~u9S*F7 z9!P&a#>+voTSZvr+~9Z1GeQ%1^FidXlY?to0y2)b`Lg2UyT?V4-n9Z2zZYt=Bw-_;9`z>&h5!WVhqipE07X6<_zl^>E@+zY-}F{$NzZu~@XK^AFAO#^X17cW1nQRO%AIWgiCp`hP%AFyh& z5$H8%)oamdxltnI@sb<2UA) z(@*%q_X`IwipLKk=_Ey_*A+$m#7b6G!f-~vJ9II0|6VVXkbLBRSC$tJV?OCZy=k94 z8hYr(snj*=d0w>w-gAh}_A6JAnRDQdJqm4}w07a|+)d)1?XM{gf-#Q;y2)qK>#AgIHIUOh21w`4_s@DNwZFTZZ&X4lc;h`5y;vcl z+6bAOWpL1tXcv_^1!bc0*tAW1E=!^1Pzv+$Wzf@+mFK|oSku+OPNq7Cv2Rp=fgftU zcdLf{9)?iB5{Zhsj!&z-W^^?zXixa3fV|1IwS!CVHfUq3x)Jc}fbdA^mJ-{Ly~mBdTvL*bA4OMQ=cqvAOAc57&)wcW(hl+;ov z>3f-|V_^39V5Y{i^<<+@+}SH0BH%{%M8$ThT(Ij$95FIxV<4%A%dA-~p2wu3(%R$d zryQ7$mo!33$2471?-7q$W5Lfp*10cjzhIvP?#kNNGBCjL-nV`wh71u#b-s0t!uJDo zsD3x>lgp;F(a=F~rL$Bgt>;%-uT^<=wq4epQ)J33PQ6|uXh#;zv7>t7wfc8t`%_vH z2*syQpO!1jtS8_^)AWf({9x6Pc%ewZKs6|M>!|wfWy=}OB>s|vCOzh+ z6j;;=JgGdVAQy^$X^;cDekg5|z-{t_Bq3&leRB|YQPo&DsU&n9*cBdg2gGc8QTxcg zb2(anAJ~vpk(IZ9n$K{bM*9Q$g%Ym6`xKrU&}>isC_OKI4)vMHJVU9|{X<^U5gj}O z7Vod~tA!I)rh0@5M>U+xNz#T>$=(y+646TdCf+Ti^0=X%rd02|?n{O?G6`1;H|Q6n zCG8ZBOo~m)efg}SabSFVUB>Tw9>47&XHeJjr`CqUoA+N^Y_t(t!w299w=b!IeIn3a zG~@E?prg_>HT!C2E|n}XQGSVVlqJFMhtkUP4f>nQ-&hnqM_p-Ohhyw5bG#QaD=Lif z&n{vb7rmRBbiG%j;G2QZ?!%|{IT$aypRo3@%FG-wvbIbedpv|5QD=iSk1ibEm6m`7 z+;K2TJK}9?;wOvEN?WoA67K4AfxBO?W9{wlZ#4}_5+h*WDqQwx&mSS@On3k{%gN1C zK|=i25P?Lo4xf@$Jmz7AMTSiRt5;E6vV6@#wIY2$#@iXRo|=l!_A9~YHSWmiB%qxP zlDQKxBtK@5xqmLc{F8L$46wat-E1K5M{&q3MTyeLi;#i5yP1o1PhC)AcGF^VFNY zjwEPb>II~Lqz$F=>=~>P?Y-0O=WwPM6IHsJBOjVDj48|^pBT`|ibj;tun}j}WD)sA zKgN^8cd3;NO%bb)-%f)9=Eyj4L7C`nKrfg*Ow}$)>g?3jugR_H?fUA@r|SJ| zWCz_mM$+eR&Rc@-^L>vt?qj3BSzm1oH8b0;L)R8-)}e|0Vzp{wfbu|x=U@7(}&24IC@yrf&0>$qz9hnJ^Rc#ies=mT$!$042DYtt?TeJ6h3N^Nq}!dM?*s+v1`%}!jIh$ z-*1x~c~_-tZF-y{DUv1;=r{2BN`-;Zt+yyCT)E@bt51C zpWxE>*R4+TV#JL;U{b9^isr*QvFW)O<(AZEeXpOzgR;kE+$yGE}+eP(wr@}lTE z819y4_nP;*y$^!97fsvdbUQb1q*aDwE{YWKU}B5HNkEsd?!w#G#fI!2cgHrUVH>Ut z`KrF(Axb?~sHZWu-b?;6eTAiiVS0N%R{8^7W$M;yF6)TXGo(*8v+uudr0pQ4hQv3F z#rLWrL}lFe8MsfL_9sK@nGCD@j`5I9?t%_D#Rs9X?=_s@PAFv)w-7TUWZTN#g314Y z+RmX^xW!2eM)364CAl@Z%yz%aS{!L{HNMIHiTH^=r<w#*anatg@y-x6m}W9Vd;5X>DVp zZ@fS^CmpgtMEpWt4>b6JT7jT7#_|n<+5G#nZveuMRYRb>fk6cO{t^b!$B+k z`ZQ1jKF=e`w4%zwh?P0|hTLz5hj((ZtV_+J~ zX8k0mcpHsCtKu{X5#LV89&_(^BAF2^GfVF|$_P9!U*;j}S$Yi-OUe_kOXa9p3^<6e`iv3H zF|V@>c~fb+u6EJ;aLHR)DQr>AM5m7Fx$q~yQ?q#DHgMEMRkr#M*JTcW-3p+qSDV+f z|NY9C79|5HuxmQvMv6=!S!5bucqN#PYCc#GqY-*v=DWEI?t6pEC2RQnsJiQ{5-8B@ zZMvAJi#(=wL>e$&BJZ#Aq|Vae3hJM_>nO}nDpYXWT}QH!PVStX`U|nWhJIhrDO_Q`>u=&%Y;~`#PC&Ww{9WwH%b*uTPI0HT^$g?q9d9Lnkuj zPGsHyD^37%AnL5Vq?P_9VzV{ejof(~F9^8l^Q%_~G$<|$B>Kkta|u~A_2Hm zfyLR0jDR46Zi)}#Kas0|^(wKYYzwKc5Gtg5^^ ziGJOqYVjt6r(PR+yK8LaOLn6GUSFln0q=#i;fTmuqeu-Br6Eahk<#!?H49C;p7mO3 z5mWfBo7Zs<>zP|f+-%xoag=w<>FNAo6B%}OB+HjN*8kj_Kv=Z6=O_Q21p$cYnZHDI z4`$dN-oQ=u*+QY3>l~BZ=ia5PHXSW}w(5y$YT7LMcrH_B!f?U`@T_|Er(1lh(?pE# z_XxBK>$7(Tjka%UM{H|FrR{%dXpek-fB(j%=cz%}d;jr?t`d7F)VIpd#(|JdfD7|^ z&%j>so#^M&^`a;he9 zX(KJ$_-fXmKTKt$U2on9xL&@ub8?6mT};Wla1wE&swj*QJRzimJSUP`%HUISQ;*{^ zs+ULd6twQW)_De#p%Q-=yEr$x(@1USdG_sNBc1Y+87Nt`y}ssdpRSfwo9j zou+P1+w^&8jzcm6Pjt*8o)&jeTcN4tfBlNS&f_z$2-BYQ_%E$OJmw|!7})=U3pDb2w0_BjB8qWII@)*jMdl>GA3;aVY8{?b*_73O^ACuGh9eN{$Dr zAUYew6f(uaYg{F7#$EILDLwzQPL8*UBSd8AvpMFNlEw|goa#3=Wu}F*m?hPlt(ux} zMc(PtC>4B6!KoA8X09fkN;3R?y7gHanQ!Esj!qFKLg+-++*?%Q)Kp@uvN;WNtPy6G zCOJZ)9loG6<{vwW{Fcw(zLM3f@yEdVJXK1i6=JrjF8?yCvI6I_xS#+Jv8JmjokQ{< zzS}M@^-&OKwmFx`fTEbw9)kJYpWo3G^&T=KZbi&$b(}g^%qx=Zg;LllnqTm=&u&Ro z9gnhTerQ6$7mr)PpFs*g>RRVR5-^5`hbkoG5Fy1jiqr0Rc$^N)4ol3&xh?$-)-BHdOBK zZso{K>CxE$;z*Y7grA~jM&eqTrssThOD>7fte;`FyYh908ETorJ*f*dOYIL6*DIHE zHGc>sczdYxc1G{|3g1s#4*8aQZ&S8=jwO^2Nu7kL0gBWG_aDvC&z?BtRm*T?)+pbs zO7_M4*Y_`5mX7g1;q{&pfE}GdB9{7#q?&SL*wwuWs|&1#0!n&d8b9IJdFa}VmscBU zaZ2=lp7&%H;35fWVcpTnlT&)VK%sFxXMV3d9cy<6mGT4rPXm@A$?wWjFxn(p3c=3* z3*3o*?fi}UM_VGvpP&` zal*%^CoWcPqt(u>iP|4L2T`PLF72J_02Zd44ydc^Eec`Ve-ml2NB+><6n&Y%zDgO$h$iaqiH7iJ zC6on>wI2zFaUX0t_Q;wqDMiqkTX}ia#cuCR)B7H_frx?nV+fJo=un30xdS40Jcf*m=*cdN<_^keRkCG^j9312IKz#Acd6+0NdxfiluZ*YOFxk!`$}1_*(#D*}a5TX? z0*l7WAu)113K*;9&we2jPUCESJC&y;b^;k<1H0&pHneh2x5kw4gP2yGpK}ONUgfu_wo0{<2xl45E*oGWFq}UU5lH&WjpxO!emYq`dAvL5`934|{!X z1dQ-SD7cAt{=akCeu@AbP}zdkwzVZ5ot9J>rMh!Et9=G>X`I*AGxuO$0+0fon!*Ku z3Zj;JYV?iz-6^#TcP*DYN6ron`&fl@PEMxA6VIutIW5b*bKn)&4g9|T*MWt0tn@h$pcXgu|j zETbe{WH@DaUX&6!Huj|bq+eijeL;1xaN}Ts_VUDrnYVarR}$(6GiaqL%8(q>>0J@w z4r8x;X{^wKL`JvtvTkT|b8%*glx?SPwqkg6jD`ezYqD&(ZRjEny{9=yk}z1_GDF2& z!`xg}H48)U)vLeR&~MLoXP8F$5an&@*zW}5Kbkz;TP+_*0p-MQDVextFQ9Z7)JI^e zj#7RWSMR^%ODFbE&sQa-+QPTFwTHYunyspg@W0-le_PMM@njiyI7OI)t~Xl9?jRA7haUQ!-L6#k+J4G7K`EAaBZ~U1AP{-a-Uw z?PR4zig~RjKSPdHBF5jGdi5 z%IBa-FP_(|q{fIvm$%+}}N4@U2l zTzw>piPtNGv`9LD5vv`SV!4)VScr0rhZdc73!? z7S)`DW+!&RyvPXU(*~n1cU2b1)~>($0u6+1+^95)+jAoJDX8m7gxK?^>fe~_>gqDp z=nzk2`50O7O#JEmpQCZ0@etBuA$$QF|F1;AbRHz_M`+9f)nXZDt`S@BmlH@7AUh2j zfLdXQ6W)dG50so)IfSu>r?U1B_?DmKV&LWu?(&Iz%X9PzxfAhFN~^PY;^-)HU$d)l z%~yM6?M_8wq{pQrkWqS;NzeGRoFBVtmY<*Uk)yw9c?Z9sVg|)@p+K{Qbq#Z831IVY|-z#7z`FHQ474 z-v@oxb0gfMx)`F1LnUU}D(NQ)J=J6;u@n0ntH!KvIXO52Oe%+HQ|6dE|M0q9+9lJ0 zJIgZ3X>Qpt*aTT=_zq1x7RVo`o|wdp<)zoy{BWG?d33#7duX-vpH;Et)!$X|eE9m; z|5z1m{0YhQ_=`!^ucuM=M^ivlqu=sZMq8Kuf;@TFMt` zzr2i82?@h1oNlqHbcFN)L1Pw*F7zc0qtqEvc8Sa+#@{Q|5euRD29hCpERW+fZu`@P z9NVeK*YE^U#HoKrL;hvr`{rF`MP`y)f6e zGk--4y=FA19D6)^gR*Qxta2n>c^K&U#r4`vhoQCvfVNjYuhv8872b@5MCCtG4e~|0<;hLP{iL0>W*V&xgP1^x9oZpHAG`LlOlx|2tM}VUK!8 zUJzi+O}4B$z7w>L>Xpw872VwYIJ?*NYb^ppcY2Dw3=a#&q-5mY+F4zDc*)vKx;gA~ z3%7SEu~lDt%BX+X9>e(}x^II2ilJUm+dqn%!aAqJ+6BRaRKMc(3x6_*Imo532 z5X-hDQwSA^-KKl6Uu%X-+*()bX0Zy(X=0;=ABWu9&m0C0Q$z2+s;IA#6wg|y7zBy| z!ZcD$)!K_Iq{|xp@5scQ?5NWe$W}}c^QtN;v=&-q`5CODWH7gA3?haGcs*bjFPl%G z_OBwEeX&Lm#9z|I*gh8|@|2HES)|kDX#Qv`NrRGn19twk_gn<(Hiq)w}Fq6wq>7~i&-#~|GpTOAZ)x96#jFN zT^Ks%7)45ytJ=;Qq&=kV3#20v-`@N@^bk%mi!uJ`YREjO=AE4#i=5>Wh-;|+$shq^ zNWJ7Be%JGWZjGHB6z(K`7xIg%(9Vw1D$6M|NatEtkR#Y&l#Elb{{H^{11+-wy#n(k zIxbFd8h=Rb2AAg!$r_Jl>%Qgjouu!<0z~Fd#n{44ubfQF@p`*8`!Xf7@ZnIQxa1&K zYm?HFaF3me#-vn1^2>Mu@51~{Lb_R*8l%~VabYxAjSP6ADt1iVsY#AJ7ZcaZa`ukW9jNg3$2M4LU7ePcA?% z_@P{ZZTa%)q{)a0a0v2#dRSHiP1CaoqNLu z6tn7vg5gBNv9lr`5<_2Z$7}ARh9A|u8C1MolHn-Bb!86b$}<5mp(q?G0Zx`wfD$CV zcsL&Bx?%(p{u$X@ig*4C<6mZqR4>YXpXmtLNKv5GpFZs20r)EFp$vqf?)ESY;l4LHM+y2B|m%HL~y4)`8=2cNp# zV!}JQ9u$cQrHKz@=)RqOsGC#9(;chmk zuUNAdmiF`K&mm(0LS_LD;1Y0UondPD=JOz3Idy~nyIDktr-?`(6|n6&@!N)i0_r$F zzYx@6?sbP=gQcBccAP-%&fE`TQWGa;H>`e{Cy_S_PqVv&>b72Ve5pk3U10i8tmrIo1_@b3WVsVFS$-If|0@g3xXG1K=GtRcRZe&#m!jfnoey z9%wxNxeMU=X`X}rtNZ^)<4>Lpi0#aLVf=Y8aFbgp7<7MHbH3MAhPMCW2z#Rp3mAZ6 zm9%Ck<|oV5m|=4^Fkj{vTbATeH%A1Lh|l+OwZ66G~QL zAL<@(L>hk%Sy+Ca4(`IdXrmDy4BVBAKceqg?5;7D&b6VZNtC59Rnk)=1HhS~68BJO zP-40LI@s?SIxUgA$T~zKr6-kh-~*mJpc(8f9%Ff{@TOzD5&HFrch(95S3g~VN(AmR z)YVXb0&;88WU;_6W<|%4thCi)$qOeXQ)AQ6tu2&_jnjRBz8I=bV?`&?Z^*c$RB85~ zz&w69kkpc`p0|L{rx;AbyR&GC4u}MBhlnSv4>{0;jST^Zy8lq>I-YgH!zZDW#JJvc z#3w*-@^DT=t7~DX9Y{M)rv+IV z1JOEoGxN4mGpn+N=$@V@fnI#>Q0Z=g2Fbbl$68R?eT^`GVB?V z5{a(g$`!qLa~d31;Z*R}jZb4mx10L_x6bKoz4^CO7TECAE0mw)@cseFFJ1qYBEO7L z|6P;U*z%K^Oirn@6hViF>^Y{jI_|y9jXT0WO=e>p@2|?&67uShy*U&znr0y7Zo0+S z7rrez;?6Uvw>|dATaYh2ajTebhQPHrUYivvymjd6Wi2c#jm^(Ur3#m2h>0muS@qaL>vC8m@_ZK%Lt5E8rkWk)1YuLTJ-(~3KKU8<`O*hZGdl?Xy(AT0A3@OJuRq+z zOfSf_(dtS@ZJ@oXSn~bK!a{Dyh_AZ_XM>DIruBCq7kh|O6wKs*&u7&_wh~~+jK~vt z)+Nh(xgsN&jfvrh)B$F>AB9p?4GAW|W4wp-10y4E&TI?~eQ`@NPyPL&aG4Fj05(Ha zV!YWK{(-k*!BFm^PV8bQ=wr*U`Sq4%%nIubK97=?Y9SVPZ*j!BQb8_>`u^I=8X1Xx(vdw>(h5#($1zTkLp&$v0{ zrsVIRc;-qO+420ZwDlht`%TNA2@}>|k^c|$?^+8r6gRmRa_Y3>XUPa zq65RzX7`=YyPg6Zhf`}hXA{uq2~sL+lChMq1ydm92pR>di2xKw z@)#>oQPD4iI9cTDTfcikfh#y2;@QafTbA||B$O^!J*cKPSNVd7O25NCmd>s%?#r`2 z9*wwKSiBlubB-?Vh002Iq&rk7Kl|aBiL>Hwjpz?m_xYkf%iv$Ap@ttHYY`qzidDHJ zW0|ep%*rL3Y-*r|IKWl)4t5mNY+QD0bXd>!I*>y;D9hKF+fM*5@)gMZz9(g#}};ati1J{ca@ zRAcSrqZu!-%^m;;bLiNDN=Lj*Q9CTMQKnwo9#4=IIPM_>#1A0lLM)e&(^egPs$Av& zuby%pkv!nfkc9K+LB+*we8A#G`Ia>o`6X6QTDwnpbHF}r86Jq|c`i(e<9JHkhYm=) zTbs`%GlxAKy4aIGr`guRV^%g_zWD^5bIP{9Wkh*^lRZg~j@f@)c^%nbV7b=&udfEV z0n_#A)E~{|3L#6m6Rbx82p?4OtG<;@)aQI(cq5@70^jL;fn7z)B6m5q z-^}ijHd)2DgX@oI133R{DEQ0nUJkBlsq$H^#?x;AJDkP2prS{`#^HZ~+3q3D)U;)- z4;#6*S=ji|Z@uZFLaL+3q3HCTIKjK@5L?CflA8$=mEj)xU_gGkTo{oo8( z=Y}KWu}e_OB)x?vg=9gI<1k=DOu~CBgYUEfKQO3>5NB*tLE()IXBuWZ9cxkj5@*?m zwToz*um=qxt^34ZCAPAJlUnaYeCa3P{d2(U`{>A*2g9)gTw_<8mW3*h8&2_Wp9&eQ zg1(+w$oS&F4>S)+LUKmzK#cpodS~FnOr&2jdNYl`rKPKz+sBM`{FNsd75S|kk;W51 z$)aXimyL3Gb=wQ*BSIxft5{}BtE;I=NlHp`o1amn4BK78%MSaF`dL(6PbB*6`JG}e_V#$^-K={wk9DrLuLU!<<6F~k`hNq zOQ2Gq;DzqDVj*L?;U>QFQGq2u5=TbWMMbm(bOD0Np_?+KXf~3`EwvMgEffzL`WFfP z_}#dr`Gl!;oY2yRRCZ3$`Na5V`L&{kioGJX_##%ErQre@m8?gQUP|WAg~rIHQ~tc@ z*WiV)kBxk* z{5^9W7X>Rd4lz)Sukc)s9>!O%26h2ix1YDXpL3w&6eb*HExYC0#BcL1`=8a(n_b3j zmE~#Q2i5OiU#xCyz^w{ONc|PcRN~W*|9&TbK?y5#e}}T0?{Ld3=3X=us62dz!g2Vc z_qO$JSd20nT^u{C5r=&Ry@oaeaf3sI3Rj+Fp1%bahnw}c)B|e@QM|1|q*W>^86s?W z>{227r6QLeyf-y+YNaa}S#`KhSc54j+MjAxPO^+iaV~S%u%$aGoG`oyn6sEs?~`qp zppR@BX)v>+GQ55{c=*+-nzcO?#nElK9c}$IxvMZ_xl`!uN;Dr;@d&Jymn*>B>||?qnsENw`}H5l%W}&RJRB9XDv!TF!>H zis)0;=9wI(cv88_OrrPIRz6EQW%U7V!~k$x1R7$11vU)Enf#M|yU+7)W{PMvlp2^i zokmAfun3Qsa=skOE_@dGzzsNV-hjwJdeE%vq2e^H5A9%YY&A=^T6UpV^n>!Dyb!ct za^(F&g>YrJzu$M}$^uI2$pr=_r!{5GuuJgjYe=wXEh!HaaEWD(AUm3pm>Q zH>n1Wrr@H!qdYed0MZGWC@ z>L;PBHK3Q^AeggBH(t;3!~5W2AMDR(HC+G^4gK~#51s|VQeI$bZhUQ_?%-XQ7Q>jA zYf1kSo(;WASf|}olmKVJ?vp9Mpn@6nAV)gxm_~rU-Tzk@|D*9U zIQ=^>ds=myjrRA?aEfP_%upwtL*W1ay-iN7|@GItG}$4I-?qE5K}RW$8F8DH0&$nOn-ndL7JNZnxX1N~p>C)z;6bB#P)Ehz8sl=o4Ew)>|KBKUSUTzMCX{mtq#~fq~BxNp)b(Kb+`gaL()X; zC<+ISTavg#@(6+mS}kTzJ|mp}x!;)1Lr?+j%<>qnXAx)6Y%FVZ@5@sbnhTsq3YyNF z(2wbrPGe7oV_vgDGH>rc;scV4x{P`5Y_z^vi!K}6JAcM2!059&KXODeLy$Abuc)pr z0n1>mE{o2%LezIwIOCi^{NOBvRcjtAzz=iQ?t5odT&3{W6>NPkmU+`mfT=~y zvIZ0$n^|;OXZ_J+UFUFcl)TKlZ#a$coV`*CjJvRxG_czy=OR!97d>p{eYN3T@{l~e z;M#Ht+be_QK#ex8L{29TpRgKqStLwIceBL1x`;zEXv3K^6PSLqqLkp)%}0<(6K-dO_>a?G#RH7;9$dA zQ7$Kp4nA8b(UvaOUz7Z{xhdEHHIN6g`rZ~Ix5l1aq%Gm`%q@az7or=vn%7R4I9tD9 zYaJfaRXeR0or>N38Y{DQ`eHXa8<9PCN&{O}UA3K!#28gA=$yCOhkm~+^vZB6be1)J zxMX>o$e#5Iaer)g8W&b`br&;4SG*M?%@mr*aQWDL64My~r@?Y$_cSoV>NH#Ol^0Q5 zCW!>tvrP`e=TpO?*P*^$cFJtw@%2gcdGzcl%ugfLZ+wt`!#eDK>~i0UA|z9=gpKra zzla@~@^}PrACDb48${Zx+;ifb?gp?*igKkq+`peVaE!IQztM5MDU{VVP6XyDwI@`7@3}* zKkct^38$>-(U&4V3EYli(Ue-n(D_^g8%o=Uu?xr8#^_>n^ZDL>7MD&+Ti>UG246FH zSkFdZ-EFI%6#UjlI0|{~^nMj=(L2NXQ^}=K!xMYiL*V<7N`kT>;zT$o9FPm3&q|7m zkqpkrBKgq=$*wfs8ge5FLp3PDVVOY}!Bnt6g5u%SQgjyqWfKbWv5vXvqHUKD=VDR- z|0TYKG*7}FHnh_|e2XMLh^m4rg!ibbFJt5#KJjHJ@8QrYSm7e9GjGJw+mcN57?;G> zX$j{c`E^TjC-OcqIr-hX?i4gWM7b+L0oQyY&j{7QzOh2-bv{!R-Ld_U@wR;%(A1y1N!0ZtlQ?*h=GkAbf3({>5KfG($3mo%qxLf z+xJ1Uirx2npZ>U00+vz@cE2VG!}MFb}RPeZl3H*_kVG4 z|ACwQU2gmYN&%IXB&C6kymdwG{Z;hiS#M*_RfGYaEnZT#XnA^l?U_NFAph!)UEpGh zkKQR_i#q0b@w`WSsP&A8E)R#U5@yjM<{JY*b4hu!Es0hRyL23$jzJF;oJGJsY*q53 z_q?Z;pSInLBtqDG-K{J07dk+Oqg!(zke=x{8RB?3!bMS1DS0|1o9tori7?Ro#jdFh zh5LNylay_%$`@EoW449QG%Ky#w)m9HY%aXz_Vr-QrOwaP(FY^2}`JKei zv#kK@KegD&`%LC5YuW0xwbNK-qa5qsjJQnOKg;9rfi$;NPtf}X*Kx9P^Sy!i1Jq!t zmO8x%f?jQ&$WJZKq2W2K!GmiGL|nH(2fn3KEjVN~1r8D9vN9&H4Qu7e;_;c!PK5 zVy)aRf%w%paJtUKS|oVh`uFcCO4B^LxBQlqpA0*T9Cb*_%@`t$gYOwldyVMxQy4#@8(A*_q9oUAAi2Fh(;F{N+ z4WM7Ljyh)(EotRy+b|46wxkw$Yfp^;qvSMFjL1L%YR6^E3bsPO`Y*ZxU z<{AW`M`6NeKxe)O;k!%QfCj_wyf@BGfPBRJy5%AHGQi}5UbK^CFtM7!qWI%f0FYoEfSGI9?UWf%M4 zp#K+^{qNbtueIPWiqsACqQZ!aTpa1CuTO?D#jpw4@5nu}c;47FZko5x@1Jv?^1I5H*m0*kaS?9D)PY^hlRw0mEpz{dJ!`8+=BnW_En>f=NJ*Z~P|tZAg%PlG ztLISzE3;0W6;u6(| zntm&-)Je#?r2BWIn$9kZO39j8#p#CgmVkecQZjyL=Q=o?!LyzbDc;(?#>$Q{qm_Z5 zR@Z6K1ZYQ+_ozZLJENYuz`8d!34hOt^NEh+SpPArZ?S)Pt5%We$@|3q{tw={rXyOG z3^a+82V%*M1v*%EB?*T_Iz{h%9w|hy;AG0mX%f>)^e)oJh^aE$zhs+GgXUZ83)}GW z6wR1u5;a4Iw)-MWfSD~?c?vSx&D%=x72k^P*I`aDXEeGWxtm#C-oxVCeGnnDvJT;tUW@g5a|5~Jcn_GFTX_RL-&0dD54Hgy_5&^D(kkusE zhY=p`E}_4JPOSvNczkd+EVEAVC>kR1{^&Q;WvWn|EZ>0!^s!nWZ~rFQVMQ%lCL!_7 zY-a07hyd7y-GJz6x1|S;H`(@zqSCx%flZ)N@^vrcr6$(Gm{EGKPe()vF3Wt`%sXiV z*lvra%JlklTrb%P{xP|joe6s_J;sQC@c)ANQ-EF&Fd)ew^qcItjg_4z63>nMVk*t z|kJq%FsE1%zQ_Jc}x{&c@GR|Vglee7@yl&@pT)ky&uquUFVb`AdPQMe4<*E+ z9aAmrAA^|BQvB_!ckPfB{jF1{$d&a~(W1l=C+rVtb1gu2jSquPCf(zHKLj6seuQj^ zx8^!z8i>bBuGg_TIN09b-K8N}qStIia?^x?@v_{gl|+QA-=CUnM4Y@nPR;lIn%ln< z!vHx+Hb`?uL?ZyWXAH0M`p$(GkGO2JZmfH`|3)K@ZEHllZLQXbYlw|cJ={9xP`x;q zbi7~Kq~kDheC4)TDgx;V>n0=P$IUrg>5P7=9 z@?ThmnB1J?V}CIwF;FRYOGS&0ory%tbc}1MwMnPfY2($Ak&_-%OyX0XYuK)`3qs6(#@z4i&-=8CHdS_fifQJ_V7_?5m%nodpr?% z%7orFf=5-12ROiH*AJRZ_zty)3A4=#d7|d9ZEh{0gQB7$f8CvKLD%`3>fsDF@!4{H z;@${64k_L2Y;7 zfupFVN|t>vSc-skFaAjkWRKylUH{;2P^VX&g?5AekEcCxA>i6}f`ph^#MJ=t+o#{l zTQiQRa6fbYS!Vg~R&ieYFKdb0*>`8|0I+M&uAW7>zM@Hp9%NnE`pa)UU3I$e7!@`K z7U89buzqOEn39U#;ikBHTYAom;>Ss=&O9N=-mv|X`$dR&bAKJ1K^b(XuF0}FZOrzi z%dhjk0eQP-j*s=$<#TnB{`E=g~n|iS5t732@bN6eCkH#dK@676*&3W|&J^-9!WF*A(dL z`RhKZ*;y)9G-WN_|9m)>X;bW683>Aq2^~wuGT3v$`{4dS=d@j0pZJwJyU-FBkB6H| zgzDt;^UBD8!{-79G+Bs{_mnbGU1&o59nEkBP;yY3sCYRk7Emg%=%#2G_h*2SYj~TK zDWC;p^?AC3;4}(72PbbmUMu71k_{M9A&NnX;&n)NtL#*T`*g%#8II||X;)Dt3tU{8 z3-S?H7=tOt^R9oSQ(a6=89~?;i}}KRhR2jsZJyq&MvW*BtejqW+NvDTK({II%bv!k zAAG93!AZj#E4Hh&=$;Sp>$(ypLq7y5AB;x}xR4il?8C2{-tbld(KF1$XKCkq@r{05tpV2m^_6}Jw^ zJ-&t;t7&S|NMF+&F;Gx!q>wu06xq3UzVij!pJTUcYv?x%-I?sd9_FCow=J+3FjUs| zc33QOGZpqRa#k9rh+LZCYu|mhOB;hzPEFV|yj*Y(5lWA}H}hOP531UiPWS6Di*<*H z5vMVb%`qUQ9ZThsZ+l!hg?skc#iTvT1kgN$x$NE2|MTqteO|&}C9Ks2#!K%H^6wP0 z0_lj4M&!3WL#;ZXFpKqns94DcRY7z@b0Cb)sQYS0@ZHRxDJJt6e_QtH8;j*kIXdCS zX@*AV9g9qJs?)LGfZZtl36tLR`U1ZdLiO6%CH+T%%^Vl0ZM4$eOO-W^xS115>!~?r zj{T+`L5D&D?kbShoNZJB!}i1XJT>>7QuIL2rLDPFtq)90B`PFezz10dhO)_N%!Lv! zFQJhVAAQ8&k#!VHk8P{UYUEZm6lsROA8z#qB}z+6B|uFeqMZJQNm`!NpqcY|G1||} z#Lf;}#89ohzBqV-y%=n9`*ofr$iX1}@mC21ymtG^4PdP@H+L}@iX$QAye^C;z%GJ7 znau_<#j;TpmXg2ogUMTxnkti7Z8ER+Q-) z!8=nhxh;xSrv!!Uf$G^U+oOVAhL0OY1VM)nHF^K22fhbmrpnRwht<7!^ zx17C%J)67ACa$1Al$VDG$)R+x)bV8Oi8bGc$Pb4L^uEQtn^D(Cc@DOE{DJ084cdf- zy~o?Gi~Lm**o1-*b#}JV6`=nF*$!OnWhRnH+WVkPDKlsg)cSQ_0nj4dEo65nQFWLr~Bv{T8O4K;-ZO0Q)by~+w_y^P8BtjV?! zTyVGQ8+gje;O87*L(7Z={KJASfRpH$Rj)zIZfmUvzNunwX7`R-7yWMfZ*}cE{t{~Z zBZB)+RJsEGUrxRsC%kF@l!0U;Ixm+6&)#rn{%cCQ)%bMU0FSYUitu^b^OFmnL_XDi z?cC0wTf48Ve3M7!F?B^Vc>P#rX>+F(vJ>XxfhF&&e`t;u{KM_^N!gy;-w1+#A*KiM z`L9q$$%4yYqjYE2tUnbBL&jPW_Muq?KVS(wsKj9k4dikqeVk{C-&dWMB*Hk4TY zRDn#)nl<$40Q0+cYng}?$EVNdd8~zMBrXp~<SVhKgOZ5QLe9J5LC^o^|xLpKWD0jKxykL0)#Un2Ew*qw>B7UZ*0TO;6QMgU{ zqw}XQ=;7zdW9)}BQC}NA!4(V;++oVbo%hWOr{2o}nvSWGn^4G>!a|YgaM2D^+hO1x zh7yAcO3<1?HL|jmf`rzgennyCKbc&?8I^|b0`Q%^A?jM>stF7ObAW4*bL#4WQ>Ln&{>cu*1O3P5@{ z!COO(pU4ek2z_tTBY8Hn@i+JTmP-OT`F7`(g`D>kXlQ8>Gns1IwiC3s0Cs;)VsbyG z7!8of#X=qay}tH0ruPqb1O)8=Zre!C6S`4{qW|@HPAlRB4Y+8>3Ew_VD^G1d5Wm=S zjUMDRD;wy;_&4sl{gPB}(jFC5-vxHugiPw!TtflpyYn~ifnCY}qpUn)1dH~}NW>wQ zx+DhcCQla27SVAID8gUGTaQlk$08ESsgX4J+J!qx52~!6gB2BQtIm%x9cN^jhNkS| zq&&pz0Jn;mp_g41p6#!1AdQfomwX{Ul_~DkR8^^MOraN++C`~zurl9=9knfmOW@bm z=3k=V9p4t52VTtU?nHL~s+DgF@N7QC-EEr)C&qR8%HmC=N-+ss1>zLn_a z(%`M2TXYTQZ2zKZDD-f!H%V8?vS7?wxxxbqPL6@$(H$o(bCdnOmAmDM_Qg$%YBqPa zi5s-_d-n!C7f3G(NKd&mZpDksOX#x@Z%&Zp0hoaSkuHi%(Rj^$HyuJmE}fU7BjRc| zk|BT`lFmxD@AN$$i~KlbDRA5Njm`Ne`zORQ>nd>8WGXssEdOhtSka}55;aD2#>dN;@|*nYK%6C*=)F%9WaJ;7DitMLlZNQy$2 z@rEy!Q+Wfy@5rE-`2bYxtzqJ(wuZ)^cs=(l1qDSFaI4JVJ>Ok_i{*h+af0nJMBSYj z%%`ugXn0WoUU5AyBHpXTcUhIZGyOCFV;3j9pxB&@(5QHp zxs9XD9yIbW5%GL~wPyBc1d{tsm9cf7f1BD^+TF;#S{7Ic#G#cCAHQ#IT?ZH+A96r> z)|%|@9J*DD*_jJAf3l8!_Dslv6xi^UL{+{HS zZi~W*M^YpW3a6Pn+b%5&@g3Yfsoyv0k{vH39eZ)jB_`HMTk}=v*vv*^fW=4V8aE%4$TclWRcOfc`%X5~UmNeQG8QY&>J|ad)4`Y?>8)sS;&{qdq2^Nd8jD~P zle)F3xrk-1?_!k#M!#V0*WpK}4cLUQN-He^>$_}weao9-Cs*!2P7jX4rSzDr4tL2?Z#?*4<=qB7W zeSLLV+gfY*#XM5nXRYmGY3)htbc`}1vi)VmZVR~BEqUf1;Nx?M&$}vO6CU9Ftelrd zfP*A>I1(=V%iN;3B)Q7I^OL2eIde(->7VkW`PhF}&jj2l?{pr&0t)vcK;cd+l6R74 z6}L9Ls@q{;-&mtps-#+}nHL67I%`5oKfCh=bD%Fw71oC5d8asH_6jy8rGM1tO(MFV zb*gXs`aagZC{0R7q&ipVxC|ug`dFG9SX%z6fRAB_ugWb^n*BUa%C^g(q`)BSx6XfU zI`^xB#oTRVj5j>VYhN5Sda2DRWoueRYG@}0;&Mbop=cr+f4jD4+wt+|kPC_>tC4-# z)9uS;u7!adGMrwuoS{KDatVkc+D~6!%w!#_P?`De#C&#;E|ivo*Iqv`Wa)Z^gYnR- zR7t;8bd=jLJB`D%ir(EigPp@_Ywa_-fuWpU+ZC}2`8{ml4{=S=_@}3~AX6vzaFZu!_C01S9*O7G(TCo1w#s&-^QV_E}sR8LasR3C9wE+_ZnR@`$>t*KaSL%u@ft@5{ zC=p3t^cM&tbRI0~7}kL-NjhM|W8_FHne!lfX3b!D+Y?oK@i}F^dVEbL3G9G-RvGs&CxD9mCPoym$5Z7~jyWC){lrcTUe3uzC{r z+Tp;Hr?1s+L=Z}<>cm1zo&mb9`-F<`kg}C;In-|*4suajTs%(s-0ZbLvj^5|{-SYRu zXGefr(TEFtl)%f~<_HG`oqZ~&t&FQdneX>D3#+-Z^z&ASO)(mJdiKLgv%uBrZV?+w ztQF{IvoVjdGBx}9qQ#@Y=LtJw^V5t-L|rK4Kt-xRLBrGmYnSC+#A zJglm`Ob#2F^Ye2tCr^}lBte_Xxd_fS>bkL$v5dx>n+M~dR@-VK*OH(NlQ&}GH!bd$ zX6^v5whasb98Ty;?9vs~{B~9W?70_K4iDX;A;;N7o-vH=W_o{%$|^ zKGXd9v9|@_TUTcT`PhTbsoL#BdysvsiE}SQNyjLRzrNQ7Y@p-qwRl=)W_i{Lvt?}b z2f$gh#l+O?zOUd6x$jqw%P#8+Aa@x+;ad4r+vzw2n9oWgO84$nXD9^jdMf8VX%jWP z`oS4?HrXGe2@S_g%;_@eHaka~XLx*?!^B%t%5e+UHbqm)D$17e$WtF+d9018 z&o56*%hn3{uKHH>gSd7}%MVp0r6dNbTRSpyRIBo2&o0yU(Sb>pmBd00B{oenQ>iaf zG1CcZ{gG2UL&>+kRIr%u9H-**B~LR5OZZ-K4eWaWYk?f!{!-0E2H2ilR+k5U8fx0P z;f7L-oNKik`T1~KaBVK)?5bNAsBy<1n!<4fulh9R!YaAWAXJ5B?=f;rePD~2Q%XlB z=V|wvZ5!NXD+{+#REwXZwqXu2c=QV~xtU*okPeJvy_Js%dolerUWpJZSCfBgI3i$8 zVS(jc(N(P3yINDn+nHTSO`kd&o#sI_63#8$_1ZqcN6Fk@0p$^?A~&Y z7-i#4i8P7Bl|*Sn&3ahNJ>`K)=c0xJPl~|--VXAW;1wq0`K1O1(08WUZJW)$*@qhA zyZS>Gxy~T$YWREBQN^-b(Ofe>G4-c$^G#z^-nT#D1?O-iune$Ti^qF+muhBETN`@k zp$Doc&7VrNYL#|I)6IcXRRb_1*K$%)tmdrH#%T1&5)o)sZ`js)rDgKU%x8O-BvKJK#6|41JHBbOW*!5#(R!^4~!ZLB!{^)cFt0JJRseCwgP4X^=S z1>Y$uVh#CQowBj;QGEBdr_0Rpps&1;R`R^=4beX;JF;i!DR@Nn+_&u}<*FY!^Gc?} zim>&ZB^7ow`N|pJwpd+T-OuqEZ|9}jP20V>$q?0CYT8Bg_ThYrQx0H7oU7$Io>tyA zyEe_Vo*`UQd-Q#W@Uk9&#?dJ6Pbtg$S$i_667bONPd1NS&Rc=LU7-^v_MYan#REis zZ)FH@ac!?~w`OIgQ0~(>facO|1!V~)d5nJ(x$)(}y5943fdk--Db;n{<1rF3mddi6 zjD6caL3Z+a2nc?$D>1QDJIjj$k>KFm1)#0Qe!XiDYg`wGC~tjeYAKP-(sB4R_ckl} zt(*%G;Ku?PcRPK3c99IDfSM)>a>aP7YLi!DF6>L=`E?*-0hY9iP7yI;#JUT}B`}V5 z&wVxOhV?3o6EQYWy52Qpvbhz$Y5&%N8aa*hUGlz_HW^Q-+92WBSd_z>*${;}F`YQ_ zyu3b)O+1Xfg*;}~+*l;H-*_jPh81vI^RKFD5$ zjf~e!z2M4(QZ2w{M~H}QH)CG;*IZd(U0f}XDXTo>8tu$_FSc*Y({CwUr+rArAs1m} zWSv&S>S>c*TziL^1oV4Tm*`5>1o^tH#3Aof#)PA6yb;tiN=3xqCr3x=n7M(IlpJzp ze5WRw_lI7Ev)B$xv=HAk)0-ruie=p0E>s5Asx+JGWaDX^)Fo_9vi&zYalo`J14+fW#qW z4={GL@IlhwNA?g&EFqfAgkGj=tH2TKtKHB{ zuUU|BGd6p{c+wpnrLLTj(dP8ntmm{_ixMMZSSJAs-Xt zDUliDIkT9M>+)mGai?BVUuQ~5XKQD}3ex3Kr^o!_VYna`LLTrpQJN>St)kD*%fq8i z$`r~nsp7a~bA3Fn#}_dEt$e-MvybFB?cDSK-%9+M4LZtHI|LgX0RM=YmC~I*JB}yB zqjl}Be>D>o`iANYC>R!!BuVDH_*}LniMaEh@tH4DtjpO(dZ9vbF2j;a>Nz*Uu7OpS z1z~Z0UtxjjCXl^Az_GJT+EwAQ+w1d((g-|O5hAbkwkYH9{U-pl!2ct`We@LP~D2 zJfB&v06MFiMdnbu^#D$UETKpJAXr-@1 zc$+t_V*)SuoRg&^ml5`AU#4?&6j|k)nQ;+>ZT<%7861)}Lx0{=n< zk9yvi4s|>SB7bi#PL_jbwpzS3++-Cg?1ppC8FfgA-EzzE{mKhB2`jF@)7CX$2e=Vr zCiz9Ku5Ch*Oz0_v3jcD*q~{xY<7F+Z%>-i64qN4;=?HJoOoM(o@ z?;(-jD^KM(-LA5FkB(jRa8>8nB|3+Me~+TMRI|X^3P`77`1FlI?PZ_Hebd`wLPlZ}A|ir{D#BLh_L zZW$pT2>nw1qu@w3-nl&whz>tZ@Wi*6loTTp-6}n3&&|lgP&?Gw4K3(1pO^bxIGLW? z!iNyUlXvSgfH`ckHIjv_O`7}9fO~MmT~-C9XYphv+3ff6HBp^_y*=vp){2Th>ft#e z6^Tt6%L2cm^~DW=_lzshP9?V2)%l z`Vq+~SjyB~S8F;Y*A zz~i(FH?pEglH?LekqOUI_baH^gPqEnoht)}zt z7(@}`8;4n-?Ft8$a$UY;Y$zzR-fv#>x%iDVHg{N0n49}>#HQ4)sgl{usgi;O6PsJ7 zl{v#1$I>8{*+u00bb6JV9l-W*v$v`XxH-pt0Ptu8sYC0~bO@u>`;9i>gdA|^Ob~13QILpO=J3J##AJudF$mplrBNKyIWAYL`3QCkcOeVySrm3>Fxm}-|_Z7=Q(Sg|9d{o zS`2)-SZiPVmm7s+zgv+b^OMcMATl7LRWz zpM+Fr3BD}}?Q`wT$`~2dz%hD_eU-RRPoV|i>*x2le7v%p#aHwBWwwKi(QU1; zsKdF6nzxJ?&2Dm8_mw<4Z+=b)zR$n0N8r@rJcuVCQBI-3IfeTosDy==+tb>k+GF7n zRn&B3++6InsF$3p-q&dj;M>bRN;_yR_X9#?Z6w*NwJ5~u92ro$2inSLNfm zD0D{)x3wE?WJv7{)iM>2g)ud};u}VGUUNlX*^BTfAOpI^<18ZXS;p(*K9w*P%`Qgr zZ$&ibzJv&7=B+fHjT0|PpiT4%Ip>%Td z9EL=-sOD!ruO(GyTsJG5uk^hQ@i#65Rg{Bm`BhK-4Y0P^=Jw6|Dtf|Q0d7aym%mbY z(B@(|gM6@TA!HT5y2wMJEekbTAWzW)2!sh~4vA+f!lATdcOrX)d;OXQ)H%NZ5@928 zeEMJ9!2ek9=fD1?3F=-)lQ{pg$ubwhhlAZQ?thU4E(ER@+T#B3w`g?k&}P+`$nv%` zqef}8m4P>zDIQRu8dYdDiO;FB`Fy`PIwge`2^+lR*RVhpH;HS_GWO1)evj~=GZkBC z4Z;1fp$!=}K>EL`W(+|KAh(BfDnAr za!t-}Jf7)M+a!pi%esatpznPI`UBvLco%U_Ltc*`27|n)KwdXlOz&rn`lABX^1(BK zMu19qaa@5%>nezMR!FH#Fft)uW`(6moIljnyucgkcHSKmk#wLlS6e9RSW2VZcu)!+ zge+8rGG1Lr%o(tUWbEHQZpO$sKLui=x<+{K*m>J8ye^37z5fj!QqwTl2-a14<8a~%z_?(4NRojk_K|GwT4J6r6feyK;Nq3hIO zm+aCYQY^AvU=oOMkuL0EO>^v-t9N(hJn=3e@s8Ru+UB4Cu)khr`|%BVc-R1Z)~}&D z${lI}V>cb5ixn+dgJH69ZdMhOWhv$hYONPdJ?|31B!jK1Qz zl_05aL0Copd9lsHrtSleqqj639J5u3Zwf_Pl`T{L%N1Rg6k+Fyz= zCCWf=Gsh5u>kxJ)rrwQsL$7rH7s;rUdw#B zkjO0#D`r1P0W}-J4dv5n2)EsqSDVYUhY_8+hx2&emZ3^m`=L6_^WY#Ku5MJE1#63{ z*CZp}BIOme-S$6bqFdNjN3&L~Ew4kcfd`B1I<)pA5wNbLPOtkHV0HvOB&`_g`TZaN z=*bm*8ktk;*D95%Z#c5_#`((rahxPPFK&fqok&iFj zSpU~L^BFVEY={vD_{@H|NXUiXYkZAjN_qS>s$bfifp7Nk{jXq(yLQQfhAh7|iwt+| zTbLl7V-ft~qU5NMr>Sy=-E@$*L`Sx(UEE5TgxTy zu@;W43wp5!RgpBvSQc);?WyM@%FCh3t)YNht5m1OJ?ANfIW*W)8Da;OVWatF&G|)V zkeG+AH;j(|X~`w<){>Ox^jl`@*LL+*SJ#`u{JV~`c9|>=C0>+gsKjl*^K=45)$aRx zdIxd_lIa((nPy7|$Rk9q_@J@FJV`*&h>JQqvx%QQ&S$v^i+N8%YvU4I1GMDup9b;S z*Pod~xpv@eC3g;-t;8mgSM^`hv2T2jXBk#LHR9e85ahf-pn(iqkgOKDtCOj;%@7p) z=TYkA{%3^jvp60cz6R2*CA4Xk3Kj;EE>XTyS^v5Dt%U&N;;coLZ13l%LEqjkeuq3^!OW*|EZ8a zI(|jczR~LoV(j}$=MlnzQRc`IAj*z){2D8_pks*nfE@#dPA-n2yH>02xi9t*t;rL- z{dI&_T2@kG^FAL6lQ~VM|(_^V-B=}xeBJSVys?7<*#v#2t=q!iehG z15-NfNO;#%WsG|T8FVP}$RCiWF}}$969PzSmC~8``T1#b;2nu~Ss%7f*P{@ppE1c+ zl;;Im0;uWdRqE_Ku~5`Gz6zr$Q&TYc0EGQHjv8l!Yq&k(F@q~SFxoq_37bPC--efJ z!A1KFCI>yFd7j^3T?M9f@4#!nfFo3P*Fmhe)7fVO@anhmXCs{o;2_u=d^YppV=1KT z`(4ePfb{mP&@6&eRU)|&9>Q~E^7&hrP!}fUo3swO4P^t*-Nzp zcHa)KUwAXa2Q$S91LT-DLoEQd^w?O-Y9ob8s4<$K4fn55*3oW+=Ap*Jv^&gKiDjGg z%*^a3f|Fy94LgTENpdN7-$klB&>kVJSK5`>#(=l98|*e!w8vV&ieEpBO}f(S%Ox0Z z3w20(%Z(Ini7fP*UP6O~o!CYfj$<0C`Jow@J~R+aH`2o&KBB)d9*C2MhvloVZ~Lc( z{=_ZLdPx+7--vU@(B|%aU#T&*)A%2|(En0Vs;mDlM>^V8JFEY2BV3t(mLtQDP0RnT zN2hMNE~rh){@aKhUo>BKVxW*2UnlIOC;YLBTL)54FExs?Y5C@RbI*8rI%H;Zj`Be> zj=0M+ZprFaS!l5Z*$eJ2IY*;ilXLpw{}2_%SY}V~cC!IF);3!`*^`#5XbpqkO>K6J zu5g6Tk4?^{Ozv|t=C4QdYitc?!9k+ubSD@T51$Ls%bK*9fOOZgw3^k~hE=hL&4o6h z8s-?(zU1WBH=k&+VuT*zryFy#9;RE+N=dxK;7ad;YrY&^7-Rx;(y9{<_F>GF^Hud* zv$OKZFy|4uO3Xe|d30GzRla8UnpLT!gO;U-Xa0vQA`b8O73Q2_@0Hd4*HrPJt;lgAGe*85BYGWtJrJ402kI`m z=)%LLZdS0V8e5EZUO3&rZMgc)f4l zqm3e$0yvLRWIQ?QVKAu9&?%RX(8#7u-s2>;q%5+ajkj* zD8lH>ZLnqR3X<9kaLW4YY+}7>oK#hMa%g?Be=VH`ydEEk{%Tq&8tz1&X|ILXllV}~ zm}}Q08NxG}mI$&GeQ-zqWP3DW&hS(js6&LAdBju@WeH^vxfgj8*=G~unM_3{6&tk) zz8Aax%hVif!EFRNW3*x5y%oBPEmNwM$q!7!Jd}{SMY{4LqP*;%#-TLr<;{3`s+jjg zFEb2hO-fhr8z0qQy-ZA0!d0EevvF6Zi_E{B`aGaGJa;rd&&($ezB3HRO0GhYu0Z)% z%Z|(Gdds|PEv8;o8&DhvhbZEnWAB(4id@ z#Nx!DcNnIsvmg3?*CbNhm;e~@3C?!*g=c(sb9b>$}CSTji zDjZAF*nVwbD?}4Px9=yUHnzqhgw{`p-ADPIk9KdJ(gfBq7V53CS6Rx>bx;Eg++nWn zdnYi!-dBWaQDq)w%KcTvry~0VbG%kXbE`^_PWs&tr}MZN+F~C6+(169&#lMN&f8j)W+RK-c~9IULM;i-Nd9IQc?!;h-=>h70`}o2r303@&E=IrVMeqW z3=8}VkB`0gUPenQb6u#DA1D&t4HQzW(;sv|SGo|UkxiMH&7QUgJ>ZX8AI1{0PZt01 zUFF3D^JJ@C73};JpEXu7?OT?$0fzG`V0dWfJC5vAdSP^264U#;D_p-7qNhh0#d@2@ z=`Zi(?9>``g&{vkE0r1n0onKh2W=WQR%sA6a!5++IJRm(+nZv{%jC(>xAa!Hv^-LkuwLi{h{tZU^dims0DZi+Jh7bLWMuX5=zUfe2?9< zdk+o?LkvjDf0q6sj(%7}l||<4*CaQkVkzn3`>&T1!d5hE(v{`66oxT?1ltl(Zy=IO zdYzEL%9BUQlH2G?GCQAmB|5_v)S$LsVQeH0xO}(Nh7~uz>zGu_I-4uqZb_yJGk;RLz`4oAQ7Fzf_rx zI1%STjnqYZ@=A$+BYB!!r&!t}-J^++QQ+k?L$^;Pv=9a=ecBvXYG@@?-|aqvarSO@7? zLIORn$}jt4Cfjn9ahMMe{_w0#2P|C0kc`ZaI|*J1B-os3K`vSzqA^Dq!|4*rYT!kK zU$Zk6fxrEB@6|Qcfm2zfub0j;+*L6_>OIgZ2Qa7hF;waGb34-4$}cbP2Y+QnH{?{; z+5@yf9M)ky5!hIMn!TDSJdTI(Oe!YRa5B7{Hx529t<=}3kd~q!h#o?Mj5nwQD^G;q zW(T&Q&erK=@P|5m*wx;(;_m(Gvq*|RdYMgd<~SZq=1J{yg)T3bG4gr1hSL1D98lVW z?F1E`l2p1HCDJK=;d*N)50Qfs-X4$-V}?$wd~yRr!Z0nMeMust2qQ<(I$mYAh48bU z8iN7@iH{TSUM6L5`k?h#^L)N&8b#e0{eJ_ZzwQrh<-a5^GhL<2(Ld@;ZLXj9g|`k5 z^c8w~DYHQ^Qb677WRk@#U^N#`b4h5ue~KMO`RB{X>`i~PW6ac9$Sq}r2+JQ{#Md4U z7d}0~YEQj*%f0T;KEtmum582>@UBqgXZUDmhPde)JBb+wpBH$r zg^SoRnmHYh;l!pbuxcCI!v^?PbaE<=^Eq9W`AED?X$wmxZT(`YosPSM?t^^8oc^ak zIDkWIb29O1WwS3lFha7z!nvQ%#vW?#!meZ4Pjj~qRa6rC9XqhAJJ#;{FEx_$A6&Cb z<2ZCk1k|LsX^asV><~r>=*sa_Z$4b+SNnOr3Pt!Dhmz|&F#;Mwo0>zHaW1>pvmh$a z`VvQ{I?kkfr3wwVTNm1WUq_aw71&j^D4S>=%N=Q=n0|lWh0DJa1u}J!sZZs%%@lW2 zi`G!4G8ogO<9F`?c3ZpY`t_Q>Sez(d@ON9DWr6@ zgSBz`H-DcLf3C5XbZz(*w5S@r(cfBhYC`A#5iiM3P}-wO@jL<>979k%+Yeag;J(uS z?bJgvz`_*AQ+O6_phEu=Ls0f=Z$+%ucI6G2Mf;7xhye@AJD^|D4qZ@y4Of+Iu6b-8 z!iTH9slHSIAwUJd?EluE+MjVRgUu)aq>N&UoLWt-3)P)v2n%AWq?8Kh+91qBj;ubV zWTW+A7L|dDN6&&R4*8jM2a@=hE(bw#p-}ObTJ@9X5BwG7W^0Y<;Cqk#p8z(t3e~}n zZ8Rhw?`9`BV;S3f{Ly`Q|E{yDt&CJJ3--GrTWPLN!|M}DS zPw$DpdB~^nOfl4Fic@#lapFbI2vjr(dP)at@*~{?x1?8%Ri(=byr`c^?61Vtx20iy z564G+NPZ>iE3orY%ngWL8LiL2vta?hK~wI$aLlAaKv9JlCzhyTF}M|?-u*T!EIf4r zel$S2kF~V`ExysN_gj){VwYH2*OjUixZQdmf3X|$nS8t&;@!@23n8@JUQ;BV@4Br*QY7B3o!W_xijiPUSkfcFV$MJ^#p<^uqX4txW_OPLA{#`HfMVw7$W*yS;)t&AYUsn@1k$8{8BM>BUUc0tnRJP3 z;}p{OV<~>CLaB+&J+zbPa;RLCklRZZ0800#_Ojra96lSI?b? ztd?`-B%6oz;%Obc8q|IC^{7(L8FL{F!=?+E5#ym5kfy-gW-0??O0Jkf@%iN{m0T&P z`Xs}9{|`Y?lwO%ddT)4G#5|5a`3-BAzg;1IYPdXzr+!}XzEYhN;i-~FXZ`2EQjw^= zZfL5<{K{mOoQ8~oO-V?mjoHGa8OK>lL#4Gz_y8eBqMmBDD5O;yu^sst>P_{6rvD27 zz5g43A^?@euEyNB1^O@Q?zhs~#<2~oLkvY*JKR0JV}<$l5BntBh*H|^Q@19V`_0N# zYgDec2;f4UXR+L8Vt~c!p4;yDzyLjQL@!}wVle6*>oS}aHc0m)@Y0vB?Rz6 zA+<=s);6_}Gno5u(Oee_At-cgX{qz8bkft;&ua9DWW5WY@Qj0u;w$M?PY-uDf){!0 z4VU|+Sua@5CLL;i-H!Jyx?zYC*nMmGV5FgB^2)vXa?SfA;uq>ETPv%Oi7xTdOV405 zZ@ejZ%GtCd00q?tWnV-Wyq6diRm!T)O<9NijD`?>3zzahJL528{b)dt)BLrLLudmgW)G@-8a|Tt+1Y9sdU*?96T(q?=ErF5!rw78jc- zztLQd5%&eI&oD@9#-uTFCod<~syGXvK7)9gPT9u!7C?;;wnwm51<3{KAeEA{4)jqy zHo&dETzBW{{pN&HkW`~t2V$V<{su4ykOd?}8D3T(B`HjDO~SIpY*f4x-oNP|dA} zh?_HOvCcH+hL^;cdfP{(j+Zr==5>?cu(PmhEq>Z(UwoF@5l| zkSOh_KEF(Ssa1}EAlKwRbXQn4>(nBySG3o#a;}%|^Y@i*^Nmhcr(aS^9(3g;7kgDf zhuxNM3H1?_VhEs3M3}>%tO`Vc4g~?pc9pN+nq^Qhk%`{-nU3R(+O#f#+Kl1esQ29) zd*8uaXQFE=s(q}v1qJ;OGz|H)D%izAS=(Tx+0Zbb;|=_-3oOW*F(g7MGBy?;skl-E z&1JY|%9R0@Ilg_$bzc2O(NJ5*nv2Ix%2zG7bGwMMm2SAkVk#baz?|e+&N)(QMxcMW_V7s%IIyEtMsJnl1``=FUe;L%w+`q!i z(xj~5(?3up6M*6sQAPY}+5XeNTSc?pUaeK+!j3?Q!dWGf!_;XTn{?h<*hR)8D`e@O z`kIR3rr@>2W(V4Fah+CW01w=%bU77m5P6b;3+!Hkp9qvi!DVgT^?D-yV14;}q211O z%9SXfZqRkUowAl^e>%@sctq$9^RU(Y62@V3#pF9@Q`Y>(^N8Ty(-^kT?L5m*`M~4K z$ei^0^Q!k%R9oAn@iH|vj^fiz;5C&*TM~EqaFU=#X`{zSLk;B?i94+G z1vzDNkvO0eRw8Ps#mXp{Rdf!$aK80pYH<6EJz@{legnfEEuXjaO_g1ev2#i?M_r++ z0d^)MhURCM4*((Mx;Pl}Ol6=l)|m!5MYjSy!=`(dIO(u`mjp0qwsmfc4K~S zUDTDH=fDBThX(y6Yl4Jl4d)4TSf}E>^N+zC7mx3zFXN?cMsmD{ANB69fe5cpZ9LQk zt>U5z52gpp7d>II;nORuauZB(dIS`OO#W=;g`&A-zg>GbI zwmtcLh3GZSb}#9j1c?Qr65+fWCoG6(>M*T+$E$P5jS(6ma94_YJ7YVb&CQCI3AYl8i9Baw2t?Et>*Jgd*4(aF1BI}rTS#1wR87@Ek0V|yKdZ4AF$ z$=!%#R%J1(3ojw@S@()f*wgGdnMZzz3_iK$z4+G`{r~1H#&Lo3XuR-@_jA`YrGL1* z@n4>ySImUerBsiHk9-uLi$gRfSE2ds)gi+#dKuu)5Qu&prDw)XBWa^X zQ7Cy<1`lOQX@G?jkQr+lQs)F@PyWfKubE>`GomuDo~0^l=bjtOr2kqeLXQsHw_bj* z*pGTc9(9Qg<>M@`rN49$CBJQyZ+qxN110l{U~CQ`-ufaT*u3LxW}tr#DDqxe z;2dW^Vw?@_SY;WE@m$qoK!+0*W(r@-kL2SWQSc>LEni}J)5O@CS!I3DP_vxX9%)T^ zMMOV4ao5>I$>e z0K)2!nBQ(#-Vm{@3|D|`AySa`tC__YAQylRQI!uuz|bH(Sa#*k&aQ6aXP9Yt7~&7} zx3EhR@opSIR%lYn5UG56^u|QSFicBKG+onP)10jd;tomT2Bepu>Vt4t?w(&5*o^rw zY7XfqN`cQR4c39kpS6a>2f9bZFig>FiF~vYL^y~x&`*OPpLPvvDD{h7 z?Cndv9$<4EzK>oT*>DH%whs-@Vd9wBFXH!g7j|uAUHL%)g`Rgat3yBr=)n*l8VZTA zq)R0VlF^iu{PpsujNxSNq2V7PN>kXBpTSDzKmQN>0LT|br!xdC@ln*Hbi1;o{YkBf zn&6+Ve76rZlTKONTReB=qu|TYg&u_}9MdP=>DB4M)mFm|B_%nnj`ii&u}8h-w1AEm zxAHdo2~X^+;>7h8*dq7RO^PN{8uICQ&y8sukkn7Z}483|ZK&ed%P@hSGP zQ;HHZpZf`(z)_G7n<59)6csiD8(KR?gx5H3y~I88WU&eK*=8LZfSxp) zq$r3)5W~a-#8W`C+F+mo{Uk?4vWWg(y^f$RiiYY+mqPU+nL10 zs>+cOWSD^sYGqKWRYe1amm-`RJez7V^2W&lmH-!_p|s1g1mWsRh^TS4_12d_j?IXj ztN7A8x7hwm_11ZBIl!tFk6{c!z4WhlqC#N?h4lh#Y*&j?;9zEdSLjaQ?_m;9-e?Mi zNIy1RqDZFU2;m`G0DxqV_sSH#G>%(maC7vNPn_>lY&HeNnk->6@Dg#h&c=QRBUCqA z)X^-++^(dQONdw0vG3A!Z@Ho5u;~eK-feRtm-AuZQICUQy(BhCZkA-XPA&ystJKl4 z4)UR9?oRxg6#m!I=EUJ4>ht3nGO>rX$ppNVa@sbMhS5l4di8Jfyac6qb_k`b17e7X zh){vlflSoQ)RI^$ta2>irZNnH@)oLK8}(HfNp5Y=wpK> zHD*i{r4(`Ii(Om<+{_n-(TETkkcFZAEr620e52g%&C;o}*?4(NW}6*S~)N+gVDuh>3ZZ}E4V zm4J4+P4@jh`)OJsH8^Vu?;>Sx$*Ws<;W1hmsvq)c4t18?AVqE^C&VRKwIu)&Fq(5c zFl|2i5Co5o4GgdonNRc*mcDq@v{it>)Kth#1S@E>0UE~U5*TyBFQTl)|*E>WC zJm8$OEF!MJI(3S>rA8{BJ2BIt8MO|o2KfE>fj+`NX{vQ)sCPA@?;n0t2SeWUkjEmCm{ut z;J*3eBwliyV%8BPF5o8A^twvQ&oOB8fi>&8!Yq?GKiRiD_Ws!xh8w5a(#_LfqxLFi zjh&AQZ7}(&YYNUbs@b?d)FdcW5<~kMRF2RxEq?*x(Rr`PAxsu6e4+De}X5B`f` z7kr7+^fc(>BO2&W6RxGOrfUPunZR{3IwXmo7yqK*{apVNXa{9svVOPo0&axW*D8DZ z<9;QvZ(dl2g^t4@9n zRet}b4Bq}FgCY2)Ao2=^q4zJCkpUitpZuCngnRB{SSnpEpzr_WR@ZYh)9X!HgDixf zvGNG$hVR9&7y(1y9WASN;@>kdsViT0C*&6WR7t;;Fkrna>okx0^9K3U^a_T+(;31zX;`lU&5sHd*A>2`=o))J#m{_5E5Ca&@NMhfWt7~F{^dGS zG2pdydG?Q2d1I!}XF1sso*yI~wr?-v_O?6?-#ga{8A`r3F z7gfr4fKjZwtaELkA0N8)i|*?!;qD7cIb4GA)HdrX+dvAk}8Y3&K!S;#=8k4vf7`PTLM!FTRX+N;D291?o8W2hvHu!}E| zK*5ik9(u!41CVNqR-3q_!OeRfc)l*x4240h#Oux`U5MAcCLola!|TQoNx&HCP#R#o zi9&?JhdYnQ7O%;~Hc%f##jI=vm~ZECyM!W&K0h1~>xg&4nuYfXjV>;+n5X&#snWjy zqNVIke~LT7{w+O{qaZ{o+Fm077QaR!#kl-#P)X|&v%vi=EU<d%waEbSD}Mq&fKy zhzu@&1^%ucIK6|-P32~0ws(6UR`khYKX~B5&l|w;6KwoTM<3yAH};vkX!)m- zA=iw{;tD_v<3~(Uv)MNiRYa;sh*gmrAU#BMj|%K{Lp=_pY!c53W!6SbyZ+K2!nHS@ zw60jU^oMCY7f1Co-|KC?w+7?R_wq9a;XzJ}oTf36J8AX!M~wXwr9+{Xmb>uUy8IFx zHk*0z<<7Rlpf1GDOFXoT!u);o_q|Mfp{1gu*us~Qr;>ncDO(lBv?&$PV)^@jV` z6!u3@d>ZJBs*C zbuCYouZB5)ZXj&_-g#6}qE1I@gDb!h$lggl>cXR^s}F|=TjVV16^P2y<} z#2HRa;xr%ZiV&i)aSfw-K%Jk+E{lo@PP1G_Ns|yeENgo#x}3D=b{$%Dwb^R4ap3aF zCJ1lY?L^<4lw&QmF<;yX6*5pS)h6PqZ9?d9c)W1kn=R2%w!J@u$A#56;C8~PjHvY2 zDRs6yuno904_VGiphVQm?`qF@r=3TzZG9* zZ&=!su%f?p(9WQo^H!3ar(7`=%9)oSpY!T$bk4Z>TlL1b)eek3xoRAyn>v-wuIcw% zMXkWReb6B=@f|!+ArXCPo$;#r8m>-@{8X?p=@PYj7Z=N_1xH{ybl5F*^46VrRI+w{ zWS{ik&#M5M2hS0$;$FS}lrf_^O1>x&?fH>I*=1Y+NHn|oQg-C)2x|9qn;*COYL+V) zL*6U&{o%r;yB>|tR~U9f=DNc<>NCvI^fWXS#?5>!zqlJtigK`Oc|{kYi33piblHJ5 z9=mmwihvtv$AC{-TD}^N9|O-uRY4eV^h^B8e+EZKkuvVdoT+4K8EQDtkH0kYcyJw? zAvRepO^U`*eR@aCo*3GZOwUKH50F6lk|Y86tT3TuRy%4-OVBvSYVk-lwZjzob6sUX zIxK$Y1S}SQIH$kPz-@EZ%bZ-;u+-+jK{cisP3zu6E_#37HrMaFy(2xA-m+QVg~chF z8$<+92hai}_p1CDEY_rM{iy+O3SDaL5M&nH(=XZWv*fl_@-v($6tQgm#P?_enSx+P zUbYDCzZbrRUu|pk^NRahS01h-JxgR9zTK|0VU!V5MwkVPs^HlqPP`zw*XD~KZhql` zJ>K`2eW?xT;K1sa-8Q<{>7N;`@%U{1PHqBoBC7kN>v z0!~EGv@?09T8X5w2M`Sxf&4&YC8m|GO@@@qK^d9`Qq z`t!ZGAobIT>;OFs{?fb429skTXfhz4Eu-OR}z$-L=&N^ML#C89X_} z==kijULiGnFMLv_M~Erv?FVdQ=ucYj!f9Tkj|iMDMOzeN#wi;L z)Hb2xcwL0?_QNMWA%~Zb$N(}a=q3CR!KXloW`)X%r-Q99Gr~8x2PsD;l}8#{s}*4G zIurqZq|yj)Ec(A_nv#G|xj{N`AG?6e6FD&XmS-a0B>JOOc6;d~JK;=KN^G#yz&FOe zfzS+|{*8pAVj?pFnyJHfzpA8?^&H@Mu`W(dDtWPYs`#PB`fW0;Pv(WX zr&nsUpJvZ zL?q-3v^?_d6M0oy(p6B}b~o7n2nAYN*?<<2;bS5UsYAQ0hA1#Ah-l-K zps2Vuc5ue`Nh|LuIzK7J1ixQ#MP?Cz#Nbt!WZc!|CnQ{7TdVHM&TfhNi)Uam%eB%c z=o!vWhZCN9KTZVOX7NoKxcv-h_+H_6pfC-MqIR7fW5sj&O5>ahT)?mJl?7tX>5$rt?%G&9kl6pD7l2pHYPLv6>a=N9|8Z-ySBK zDjMA$G4F&N5Ieu{5-(JXBGJbN8v)oIaklMwGTn$ns>1>f3TLd5S(^nJ+y?&(ArQNfzvc z4dWqgru97udio14TV z%s8njDK@qtX|T`!7w}fTJHS5Nd7q+0*w;t2m4WuJ#=$RL;Z{T$n#H8OmKMaUJUaTqpy+~!;2XZ;H zKz|^`FGja(B0rnITkH*E_LOk&Jb4kUX9920J%`s}5NpC0TX<^8ZSsUj-OWtt14WDL z*HrK0hRGT^je-7Qu4}^(^I3ze42CDuPO0-LC0HA=wbD}lkKc8*F{>W@<%9gOC54ze^c-(@)j7`Ca@Z4sM z7!ulDpjL%*!^*+x9ki)v{WwZ5w_4{vGIE>nU&qPQRcQ${3J&MWgXz|Uu`J!n{|n4r zP}wozEcv!!H0&?6jJyHg*Y~7mc;4-J{4o9`oe`S(*yt8k@!WaM<`1#Uzzjf^+U2Z_ z;JULpn7#UaHw&xPxH~W~i1ckfGmq4g19UKPY=;Y)&dao+YnRc7!6%a^ev$C6FB{^G zjSu1M9IzoPa|nOCopknM%DOMn~RwdT=r!AhwgM(aNRvgUf(e zg1_;>)q1?;8{xu=#}8BLmW{`sC5~o_=lZpFLNWN8;oFO8_v8JAQXTq$?`cQytq?e? z!&7LJ#yd^u8hpKkzHcV8GE2()w>rV?>wh%mP{u>A`(FHlxaj*4j%HsQ{)ciVUpIKr zkbSaD(4Z^J(N;F#15N(m)+b++F)GP?Uh zSHH95BHWN!xn+je>KrQT)cpD#GAMcy1l2!2D8z8-IEq8emERdKBNRWUF9gQcBslA> z-?_K-Up9alzv(ha9i}m4D;>fwQd7x!eW>ruG({VoIc!&xOeRQ?+e-fwPX{KW$xn6u z4sbYo-v8`g24IRS;h>{ynng)`2nt z|BFB(Q(D&WkcgUMd{Gq9@DDm1XOer((k|ZkH)@{kTKB5s@XfttBT!g}tfB+# zP1;^$*})f_uvZV4<2<7Ii4Ehq0RaQn7^WzZ7_WvCXS{~uN6z6rTO`6-NEC@>av@=1 z>KX5&V`EJ}exI%B50%Hb5-^OAM3}Nf!x>q;1ZY*gygGrMndp_AY9xu55afk}*Vo9a zeHs*ptFCbtJODlmoZh6v9psqbP}Wioj9?TDwh*41?v>HLgqmOS;2y-TB|OsI?j-ph zcyURB`^7d<2AmgdR z8+w5Er9}+E8-Z&K#D0I_8hP8|;vOw9Sy?2m`8$B&$ae$ng3%9Vc>Yd`n@-cE`(n3ImYd* z>eBg_p9DP5N5i^1@OQ)f-FdSP%^w)fXfCV1R>CUyGml=u=h23+dfW5MB#ZRuhugy@ z*NUU*K^Xs{s*(^r$VW(|zmnSDkgyRJpJLWs*cCX7F*9Fo)Z%`e6qmFGvVZmrHHYl! z3~ie~esx}h=~3Ik>EyR+GoBD?cL`6$rQ!?*jL)R&$ozcS-~YhR7B~Y3RMN0U1?7m=;dbB= zH!>1l=#zssgSpG&gDs~z=zgb;J=t=onF!HVPLBlGM$D$!A%wIsd8P8qbHfMi%{zh_ zcpj&Cv*K(hw^`e$rTzm-(E-93;7YJqTQz&l*uC(BM*Q9b1rS+EjwGntKhQ)TRkuz0 zk-7_Z+qe4=i6{i&a{5y1?vInJY*8F4FC#0LfC)5L%HTHGX4TK4_*9ZALZQk6F3nz1 zH#*<=j)-Csrg`2G8g~BpDTPX`&22j;Mv=pjm?GPI0&r@1;1}f0ooxpOTJ=@ty<3a zSA_L|&wj-;v}eI#hYZOGox0T3Sp9NLh}KROOYJ2R-4yotcS2Ip$xs(EKFF-;uLns3 zQ@E!eA)v|a$oQ&19dBHdn^VA2-Dyql`!brV7htNArnSP1*_O*O?mhP;`^TcmRcJlj`H z6oaL{)CDw)rU)hQ!w0w()T9Qe+ApNe*>KT;qwzS0Pr-S5!TM;I0`-B@#Ab`th0toJ zKt+GO!uAo~RhnQ7zUmI@3z$RukgxCizzu&S)482bmJ&fv>Zhw|PeHh#m#k$E7Ya}6 zbtuRg?%Vv%+Y35IMl1I%m+Kb~E_!A=jN{RwjwWRms>e}jWqudC|EXwfPcQvzpc8(O zZM^xz9SN)ZlX-bk{;PZ2uaH#;Q+(DzU}e<)_Q})j4$E^4`DR}ta0_ST4QbJ^@~a!% zpa2pEPXV0!70;6)6W}OMjSy^e!?^%4&VQ?F41WD^>7uW_G!nKwD*UH7EdYHiM)X_l zF(P%Y8Hd~9|Hsx_2F2NKOT#b>9$bS5CpZN6-~ocWyGw9)f&_ObxQ4-91Hs(}hu|`} z`#aCx`|NYRs_zd!Zi=az8Lqz8>eZ{8_u)y7H4I2grRxe3`^U{;aToND zalw^B-YfdAj*C6r%hjs@GzdyX(v8~~Y4f2=#p79}#zFlr**y6~5s}LX!7|JPW%1bS zNqZ!u0i%0{zMTbK$6smoZnqFV=bHoDm^g`DtP)=h@>oH;O{`ZgBHhEK!qVsM8%-ix zvYAs6L!c#+!&l#Y4xpja-#>L`hcofEGhR?<$>DOU@8H>}+N~{YudQ-A^R512j(>KS zD48A^V&H?l28N!(pc4OdXrnouMRLwMY1uv9FCEZ!_X>4`K}^rG$%=m0`qHGk8!wK5 z_MS%y>PMBF_WBy-dpX?SHXE`W^YXB)4mJ&aU0sJZl4z&4qp8f!m*-d(7-o1~R}%te zJ%O;;AYV~&^VjFR-BX{_1gO#PC$p``e~&AFe5?`Z3O6-%7Ipez;JxpwsQ&YF_ahHg zz6zNjsMI3(MpW|F+4JnvI7+3`uius_BdA@x=EbW|Aew*Vw&r;(D3P#0Ml1Zno~1xP zT}FB$q|GC`{%ag-Ec=<1qpJrs&tEnt2oFcc#m*O;SJdsidIj5w9tVtvur#A`&MEo7 z)Y2P5o-ek#66-v8^aa@P*P>ssbad4|s#k(-+SWdU5ho@&A8zwPQ z6v*Q?T}F_jvDJY=aXi2M0K^_OdK(;e}07|i=HnGm~g_v zG2*~=X~Ah_-Y~6xA%8L=TXHN_$l5~;#+Zc!nIX|qIX3{R`fa(I;y?PI$6?%(cnZPB z4Qtrh4-we~S?;MZhU%C2;bt+vCx?tbQnDy!(O=Obxoz!l8Wb#YWgzY-)Kakc&DGZG z%vku{irQ64*18Q;Nognw#G6nHTC!gryZI~UgL3s7x6>Ru2a0bFtRU2NHIv)p;P>k zd`5)+yjU)6s059;SPopBn47@T-F;7)geN#O#06!xY2Z0#ICokJf2$x15~e5$H};uhig%#(|p6 z;Vxc<5f~c2-J0I$!Gd4@umw1&hp@L z@75+=_K1hRLuf>;Ybqf;(vK*VYhAj4xjWkZPeeVdckRB95x)VtJW zQUAHEV<|oIIh7`NOD6H03R3wQ$$P(0$BG-uGPB=2&fq(G8c!9MOxEcwQ$f2u)4XoJ zbcx5|vKKA%$X^fY@_QN)LnQk-X{JkTz3|JBZX-ZMl}i^_<>TfiZ07yWraxlFem$SA z^A7mW53|?cu*9S+9Ou|$*MHkge)(UU$$9&KYWXPOpy{B|nWrN#-~emWwVTX$)TBdJo^1 zDZj~Jgu2+)xtOm7r#ScG)bF zC;4?=#(k^8yz-{1H8$v)2}zGtpa${$f%-hC#qIGqM9tR}Ex2d#ejV70>qXx$v4#P( zg@aMSm>8HK1dgd+T~-iz?TXm$D4M2y(Jr?5t1!pZ``3V5eYMGzJ;SDwvA9l;vsZ&L zwA?3ZR(|Yo_yS5-74$Pu>k+(@Ieuqm!PL-e^^Z;GSt>C_Cv+s7n37`NqwIp_R^Jl`b)AbfD}#>*PW}D4 zfp2Rn_62gG493``M%>0RyK-%2A`I_!8YOp2ss_2VxsAh*a-n z%+@tFq1k43Di12+GB7DHQhXjT*oP(5Re+Jw9wb9>RS`}8hFS|ig4@sl!!U#s6B0&z z!W^*BN4e5&)H`u;c9#EcWXBMKfTq&ZUO$aNIBy`mMT;KQiNH;^)x*<7Yg_kfRfDlm zHpMg&edVJ1>q-2H=w%5-(<4| zkf<@4X}{y05eUN)NF3TBTTrK6md{QGq&D)gJ(J@$r+H^Tj{uVJeZ)?Z%%!b)Cs5_KsoeCPh>?3@!!#R`x)jdL&3-xyt-ss&> z=NYnCGd1Sw;_*_lkD21s+@Jqji~awZS|0cu38?(fCc|EN+dswFdq3@IK<`Sui>T98 zbzIgi?!BS1k&o#4TNJuO-hVWXm{tFsIQ4=@8gF6XVI(T?Y|5%xc&5KIiV|YIFU&)l zsmhp27s!u)Xl!89J>~DLygrS$f)Q9Pv2ouF@+p=zmTc5}gecaPQ|tK~G)c0MFDoOQ zW>t&Ue2zk(c$ap1l4J%#59IDxrehI;D=q$5leq=UyUfs7jHgoP*}8gr8F#rHu<*sx zweCU3dmvM1Dh$%Y@D1y7Iz6QLP5J4Ir_Kwg88mN~ug<6;YhU|2f+%Sk&2#E=Z>U`N z840d*e7{%>Mg0`oh5)$+cC@elmyL@6`(u%L?*lhlRVP0RUMFm~1RCNmJHNZ1>8~^i z9LMP{o#FlQ?Yli(*Hi8|KPmAf! zrOH2(vr^nLQcSqSCgSi*5 zp6crAC(Xw8x6*qOFyI(s9--RO@ zm;^dWZmr*WZvsR|tx(Nk>^E-)F#P)e=nV)4;8)#>x)8(p^bQzi`HiphIAb-WQI)GBk+@2%=(e2nbej?+5Oul+zt`O#fc0#i~Hb2L1d@*_9XqHu38 z+F>VHDKUQiDPX--fTukRmap6GMd(juhXEkWz- zUdlUugw#)-pEPfLVU097IcCM`?ECT($*|0RIHuZTSn$NE)iPv2c=snh=mq|?*}(MV zyG`oDd7+|xoTg0CVg_f)ig{A`D@ru!6U;>WCcx*zM!+5Ex@RXO4^%IZno!P!SvBr= zdr0T$_*bA!>(?Y+kPvIK3~AwDbR)Pu;7$0Uylty)&4{B^K4=yhn0Du_VHTHN7e*{D z1zQ-X+(U1K%Z8H8j&i{20Ht^a^i=9Gp^tyIF&O#%QL+ZDDP9^k8cp`n%!tW{W<(JIg65TMjMn_YTTD$<)$nk}*G`}`E zf(SdQv5O7kCBTgZ>bc3(9Euh?f{zAD#6s;7pZ%i^GE9rWWcGa*Bbra_euuEv9R-Z3 zzE^aQ&!sHYCq(cVk?)x1@*Z1*=_Aosi&LH(I!+>>=~Wm|e_6`k%=fAG0LU&mZ-ip^ zN*KGNgP2XV1n3&$7w;;Qu%$V5flR@?@Dhk`U~K_S_@)4N(YF2PzJMoEn5?zg%1-<| z1Ev-V@Rsm{mb!po8rO=fWh-tD>%`*P7mI5cIk+@;eHyT;zKS*?mvTZdJW4~55xNQD z{O}(@9rc&E8*2iW5ZYoUmp|CmeefzBcH1e{+$E0j?ebyG_P+XL=z&6OGim?#2OPJGGazr{$C(NweP{5ZT^y}Hm8M8>Yv>DU zLRLTW7sQ#MUL1D+oy@gksj{`Tg*$d+_rrzVn;Cq>JV}fe&w*W&mdtE5OAL%@ZHkU| zg}=wf-OZlUgzIT2hS6FQ$yV(vZ5ZzI{sobyNEc-dvcBD3Ui=!6VM#e8@2L@2ydg*^ zrYXIhBKEO>O-$$Lw-**v2yLX#}{kwO=4d+#_d=0)pqEH?LvMdy@i|BfK^K4_4Vlt zPhNo>noki^qL5Y5rNg>B@1m87yRt>tY4JD_;&(%0XL@Z+=54Rnx8CV<@q9H|$;AO^ z4RE>_xg{#>XTu1n1dL~L5AKw+F1nPIS{n7&h`G1&Tlnsl1bYzp8MQkE8tF!H(sJ&G z5llYdCg&JEWEINJ(qbP96JcGpLC#w8`}`79 z0-htSXLCJ3)$Y4dzjO0H^v;oo8E#jxgnTl+UCNWoWZ zS@JH^LC^XCs-t|KH#}1Ko>}o`)day-bwc0y@Y`#Dn_;)K&Z-z}7$@FON&3Z6I?*6U zx@|FcZ|pX1w4O3sA`~iHvET-2&#P*lrk@sCyxs;2?;Tkl$%K|eP0nf1>?yS9c|#HI zw}R2J&NTw*2CocX0KLdo2igKNM{*%5`k2IVomuEV&*tVAz3Fw0>e(&~H@8rVe1k|_ z>Jq=XyV7@MUz1C?TIFw=5#SUUis=DRVh414nE#4is7oU35xE(Ng>a}a>yjbyJt0qd(S! ziAokpvK`)8#Trpsd^ zW7y{#01d7{n+KDiS|UsfQXXvwhuq7&W3z%cpd4Q+3~Rp0sQ~JFQ;+&l-z!i)!|Dg( zF((H{vSWo9SwqR(cX#B;8SYU#Dx*>HG(JTe8zOFp)iS$j^nIJvmfCD&NrH;5>zHz> z7$rULLvs|A+bgOFX4^!s7M`-kZYy#9Mn6DLG;r*q!yzgYwTWO0{A4&#FJNryM&m@O^}yP-J%Vp~yfZ9n$%3v{SOhwtiqA?Q?WD_dwcG zHKNs2Gm`+MSp_oYp>!vas^N1z|@*>ij)Elr6yz4Kqs2w98WNe34Mcu7y2jBs(q=YKK@LjnKw(}7^EKqxid7IT>xQWoHW3eKGiFVAuYWkSY>%FnovCif@WW?8Jjl#U2%AW&x)?qdC8hM! zO*MH~K_*vxV(Jf5 z&#m~d%;d0mDv1{$>j-!AI8ro#6*!b@YW5c}&8+V#Gnz>MCy_4Q2O4k0IHVj>Npk#7 z_NWel2U&pwWGssoUYGlm5mq_4jHt$}8)k_r)PoCij6KwY4`U^x6hFFZ`R|#kQhmdqPhNY^KyEIkFNRimmbEhV0AHPRj;riE`%uwV+El*O zS112vDyNBrWzFTQ#C`U>dPtu{p0*+wnXo?Btzlen(Bj9qU63O(6T!$j&Ely0@1CV4 zZJX@eEDBqjik>Zwi6!ByhEj}FK1U(gw#VC(%7#QK_~*KD9fFq|4VT(js7o+OgYk{I zs|mxX|IbN-f;j#kWRGKf4cylm=F24uZMl|YL`UBgC5c}K88TA->h zPcZw%Ok{21WgMwj(4~56N(o*I8g7hHsqrh(7=ZuuSV@eLjCJKSz_^JHBo}f9-owwzTN1^vj>+mhnJ4`#dZv$qlL7V%zIrP)>Z)6ndl-|SRIO5?OQI(+11xeK|vJIEJ zoQSwMmvYco_`|=0cp(|FU92!F}TSEA+KL870KnnK> ze|FxiZVB{Y^U03F0c}Tt-WeJWQO&*j`$)f`AI6IpEn8JmEajMgB{zwxyK9z+RCQ^xvx}}I@7BDdy~CT zlZ@!VGpUbKGF5`~A?gS7T2P=-Dqk8u@Vnah46RX-r3AAllAiCm3=wVjBb~o4X$auL zqhCGd+cB2{Ca%)EhNIS4ANQ9}{WB5>N<=rJNLtqfk4j?Eg7xd-pKcr$fbFs7#*6pG zyhM4h$Vzf2G+4>QC%Z+= zG(G3YU~*&^n<16S!bze7YToh>3V@`s`WPcRV_B)3UG0!OVBq`O6!X3Jf4S-DwZxjm z0FN9h_Ii5R!EDSe4WeSjt40k9y4Q^K^fGq)W#5ENeqaTn0{f{K52>&cTeZG8R zVcd4(obc6H$jK_5e8aX~-Zn4B;DwRFBy@|WbU&ZDlk9~2ghR1iK6;J>2Pfpu4}$H8 zo{R=?ow;0SSQvu6&j;k(+0~_STub_shE_*Jy>|B`3f|vZ`~oN`|kg*L{Ec z*dzD|L&Tk{Wgbg|j81INh#fw^iG{c-=LG0qpjvc1C(5l|puGCOUOroYN2tTFS$HR& z-P6yGh+(wR)%o%9LWy_@};k3;cJ*^9Hw-#qh6&1_rNebr;JUs zhFLj`0}nPh2Mq^!y1=3$iM~MLq^$(v{8Uv4FR%h7-(h-p)km&5QPf2K>0n-}GcFeY z)xkWIqNx3ubZSsUKzbE$56sy9L{EW)mam+0odN&bL=fn?y#%XWSNt5=wA7B2VSV9Q zqiO0IjV@{DjGpTmKoBM9$5u6Ie!1J42NM@|@4aM0!taX9DczJXR05#n*s3I12c$LH zs=%SyqArd~G^h9LV@RpRR25UA=(+(Y21$C`xDcoI!?<`8nM7^lC9v>iPWojFYD;J$ zCLit(@+VrFHXg9Y!DylQB3HK%apej)2PLH(aq{6xnYQd7H_@15oX7A;w-uyeLdblC z7gJ!ZZe{}*62{LbTx&<0H&5-9OpCB$bS2T43d>?nInUZdLqN6aG2t=z{|2n`U zRw~N(lSfu`u>XOBBY#9k+~UAX_b?wsdcMoOMZ#j^X5hpqTwe?>u8iwlVU;KVa-v*a zkkse!(Y%*Jn2=S(PG=|g2Rs?96(U;*dGS$(bi}hh?iX2>BXzM4)?GZ_A)cQ+0*lOW zf4YxTe2PgtX0ATv%M5KvmWPMu9AQ;$5#0?9JZo#i(m?@|Q2zWDD3N8y*mbSn z&4c#@OegCWrJS=fOgz6vb>3iy>etg9xu#LGn1UtzIp^f==~>)nG$IpYf3sv zl(~B!5rG8n$MO59)^v~>VAK)JwIofeM8W*zHIfbVL+<1tVxxlbL4u5W!2zD}>=cEN z-MvvIzD#LJpPK7?d$%3Qk@J2JWWfO!fcq{U0Gjm}`)1@5gZ=CpJ0{j?9dT8|b$#xq z?*V-eHor*ZL{r!p?XA`tXqQ=x^=yg)HKxO+vytIk$N+uq9%l*QDDjFMVJA|{95?*m zeSIl$a}mP8Q8YMU1-M%G$9PtA*#jjN6}#&JW-(Wg>8+~w5dKB-zO_RW%>1>&b5ZqH zU7hyNKrRgUG8a;gcvaLu7O-BOUPx>Oq=VqPu_QH>CACYBc$B0RBGbN3-u886+Pr%9 z5_~kuvEn&J*Tl~7wln4}gkjG<+3)>WIH-Kq>2tT0Rz|A5d>sZhmjx0a z3Oxbg0q8co{%VzH!n%K|cPPc%_$|%zv|mz^D*MAyDc)k~@%2NdtOREuOLf zL*kYW$PDAR71HK>V+OSjs&D=2FkCdbk)-+1xB{qo29{um@8ca3(A(ckfa z3V8$@HM)& zE5k>api6_J+-%T=q6o|2s-|VT2SdH@am32E`9|>K$-kSvY~!@i5jK_ctgDTbJFZs+ z3!2lg{15~Zf+GvX(+B?-#6p*}ueUy7eWCmpFCGtd=J)3zk}|Tg?hgFMU0zrC1$UyZ zg6-nvO`P?07lV$B=Nn){M%VyN;BCid@Y`mt&sqJ8cDF;P;6B~5`TjdY7fM^OhC1Wh zJseb_sQSRj(v9MW_zs(5{RAvh#HIV%<@s&oN~_yAr@oxy%C~xrb+R0tRL+nG6yuNK z#&bC=O+FmMd)BU*Mz5GD2(OTZFJif&d(YB!|6Gs#;`*l%eAB=Aq0be|=N2+hh(lI4 zdip;2!LhqtX}!HuZPx#xF)*CmyXSNd9q6dl4LkYOSH)lGC#}?)i(v-X_aw71oqM+x z8@$5JlyPY=83nR`H*`nJR$KO4YP%rb>_B@o8_B=}GE)>1d2!zEKele?kA#E@ipTErLko`}E zetxF@->SLW!!R=?tvC?wcj&o;wsmAnmkXi85m}hE+^xxW&^tX3ZOaDhC7IfR(6a@H z)sU3RW~0kZ4J1^oGNqqJfL2D9{)TMF_v)KR%F!FE4yW9|TfFP%h{U4PJ zL<$n9Lp;V9Cc>V!t9lO)2;MI>X&aXBK~hG3w71RAnm02&W)vRDVouNTqE%_Yzy6j` z>Mcz<81*j!lX5U=4GeSkME^nM(t7|LYTzNG&t`!G+WCMB_-E6-20fNM>UL69Vd#aW1br<6KcBfY`rqqFw|G_w-~ndk zx){V6CsCtKavr*^@i$WJ2xb{cM2mhkG&h@g6}WVu@M=Cr(}hzt9mOzzFi%5o5mo0= z$9D$g#yW-GdcX!Pl7uW94;!1z()E_7HUNa0kFY$;jhyd9X83-x4e)-SRmsE3s_Y{p zp{DOSUOclV<=N%K;46`Vaqe*4aK|nuIFD{Jsbkf*n7t5|QI|u63aD%A0#LM;Mz`s4 zJ0Iga0A(tUggR;uwjxbp-_!gY5f;=MxJ!x#@urf)C@q>R9I9EHnT5CDdE{NTk)otd(VJoPh`) z6(*5h5u8n3*M*E0-Q_JL`SIpPnqCW%KzGVN`clVZ7b3WTR1}x<(Qdb2FVeEu7Sr4p zrH0bntyJ%3UFvgB1dqUJNI7+-<%po&QP%D%3bTr1s_8>NIPnz4y}x5>n|*P@%y+-P zqZ84KI+d*DVeX{YZcn{DU!Anoajfox#(dv*zqS?3vD_T<^b84{e$L`vP+U)09xhLO zdcM)z-wHS1;F||{ta72G>ASbetwo}|JgwUGm4+&W$F;C&|Ezg&X zc29$&m~%M{(+)fidI22{=1o{I9RaH$2i^|N98bJF)YQ~lwa6GFPqx7>jU$@{A&hZgczjYG>u}P2g)GcyBXGwHI zx~H~mc8k9N`l)9jI#PFelvA&_C%Vws!8^u{5JVI~LmlUE)wsJ3FPC)z?rR^TH-EuT z{v-o20fhQ(?hNA%a%&x4j$2v9hOKk01i$-ij7J*MfYHBJeZZOXwj-e#Is48V;n4pj zUY1RPZ!=d`^A#!f!60L}$90qL+eNh4F}{7*-=Wgv&6V#zykL2KJ=m#!sOZ+%J}x%g z{3}hC&@B}8c2fK#kcYFoVQM-U{qY6?yr+Db{+@ z&ou9R?|K&KAet88&Yz&@OhTE#@_%>^Lih=a{~a5M5sx$NLZu!*i8Hj}_3=OOFp}CI zc_Z5qN5hMdiS<1;ew%gMA6Asog&~2R>yyU57@ooN>;OGSJDU9joFV~Ht-qgRie!UX zKKF1IcvTPAOQCr#Fui{6l7Sh_&UyN_vD&D0KWpsy82TkyH?pFlMi^W>QA`45z_04q zuND?sP9}c*eXmCn6!Q|rQJ>K47uVCI5vph&H%FnbVj0{W`l&eBpZ`d$$cQN3IkTsd zuQlxVF^7#-f&@M;FV_@t*ZQZe#|wmTT)HNK#OV292FAXJpa>Em@y!PUG1%TV1q`Qb zuaPuF14h8`#AS)azUf%UFReO`uLWF)zXw?tzX{zyN?*rh2JqiyBa%BRx{9>3;LEq8 zD`N$TChFydgi8i|TS(xzLaE7sas15fzmgh_p+O>MlZt1bNZ~Zg58KjgsjBDaPZi1) z`Pm3fJH#rf*`7|E*M{@fYcN3`!k_t}#`QyPBK<-*wu>?~IA)Xa?+ny)pPoMPoXS}G zw1qD%Xxes?qOkI*^5CVkGnkQM z|ENgRJjlq)Vl#}Ov`~3_08hukI?(LJ8;#9xuL@;pI_Ip#KC}aVc$_|V2mSEyaXaUY zfx>eM<4yc(k9lP796mClf(k9~OJ}*H_4&u+hvh=UeJ%H|m9IK9(}{F^nkkHmgr1Ml z+GVQE4y}*JU$4zq{Y;&9%AWoHCVzB}dHYI{&eGMmDU?Rv+c97+Nnic0V}j2hA>xmS z+|JfEjTFd!x?uOBE!k?@=Ux?Hc%+84Sl*2NAazHS$tzzB{3Sj4q_)abLt*Tq!V4snQbY-mzI=J}X*~ra*D#9au;_ZZgDneG} zpZD8GUziu&T9Qfdpm~qU*}PbodInQ>zlMcIqTk6+e&yz8vnQlT|Cny}FV^)xFSfZ+ z2n~{YC;emwUCx~p{=~=}2LP&M3<;uOXe!135RG$%*V#i-6xd0XQx*`E@GBo+7 zGpAs>n?EgPGP7USqp@*A8Uz2{U(yndRWoAJ*@9ZBdUE^Hs${)k(3`61O>(eso(1-Dl6Ws&kJ9S9W(){ki{<{=;Sp9aOrId;whm_)@f{2v5C6k zfgZF{>NHA+XJ@594FJ8lfH|Kp*YbdyXlJ?jdGV2K%|80*20eQF^>qM^7pkvSp z+)k@_7SI9N6Vrgk?zewKFUJS7WhVNeq&j%w{au}@nmATiMMe~sL7dwi!aeIWL~y~% zHGiwa?XI3+x+NvjnA??+(5U=1D<@Phk1=2b9Vu4CG_5o{f&ynl5HUt3P0o~G4t>DY z`gnxT+i@b4EG+2L)6*>n1wU$xdI2kWsH`2B%(_i#;9ziJAb;eUo~6UrTnWwF@7)J| zr8vi}hI0Xe1eYZ+sJM5yY?(S2oBWLq-U>Fbmzz}#U=YUIM6I9}=;T{;Q~AmvBgU<< zNp2fQEe4J~Tdl!U;R1!*wH!G_nlywOg?oQX?7;hUwH3SfTYKGXnKG4xq+~g_Tpj?b zya-6t(-B0Zw`)qwA3j7pv_~&cy1V>4S^_j;6)db)jT8r~8 zTN@>E9KaguhNKLfjKqer*E;*x!-GFgtWx{JYF}5wzS$1@Q)05%1D%L)XnCLJ-k^x};U#PlgfX ziw~l^lUw*>u{o#0B=U#Nzw6u8)GLnCH1La&OP-<%$1n{Oy*WKB{6k&_#$0&I!3th` zS*-Dg^Vhxju?kH1(R8wG`l@>cLCP)FIRkHGvrfKk5jUB{4SZ3Xv;20T6zyE;Zief9 z-9pH7(<<3%fLYe@6uT0C(!$WUcDJ1(9YaP1x*}+I9ZtQARU}{?$mFV=N5inp^Fyyb zn@7DN{^WExDARkl>ek+QwzycLpc_fOVQ|yls!0BHlY-a%_Q+^rQpHu%;j)ijrQ7^{ z$+o+xn?W|_2pwqRQuz^kY%13c*9@U*pXGZ(G%pnonR}GlV+Gn8?docrhIYPEbB!%!(npQeil%cOYYqj zPDU8s`E+|w;O8*iK|?Df@`vSk47w~7u~Z*r*zGDb2^#ZX+ikpBSN7Ppf?&1(5NNRF zk@*gGDeTC7zIZ|@`r>-#LKD^Q^U3M^yd6) z8ktH)K#nTXT3fBUKhd(k&VK13&))Jf=-B z=JG|sMJZN5%2dcL4m##XUjzF-@(Z6BiJ6m{+l({$x$svF^;CKIB7>G8y$k8v6Lf(k ziw0c`uy>t}>XM^37iftxz1W@FKZOS&pFC$GyY|dWKd|!14bRtoL-Q5fNBCq-f)FPd zB;|3IhBW?1%12vosi6;RK8gDekVrvRTabZsX;7918tKPD+qa9 z<+h+oZwD2qLTv=XOkg<{NlXs80TApHM39^8M@kALWNZhm6EsM6 z0a&MqGU3=eu|O*~mKE!e?Mq-2P0ca$wlk1pIECI5=r;_S!H)~}4ekg#PxNM03`%(M zHAfE!{XQoeFX6DOhl~u69o2&Pdf9pLN6_#I`X>dFq>x9>3fG|fB-Sfz z>@u2&@w_j(guuv+hafWQ1Wv-vsIsY-oikr50 z57TjHkNfic)j>yfDd}G0fF3K(3r&OEcuRA`1gPt=BGBlf;-#v7LWPYL)Of2eZMo<9 z*i+juF4SMNZ!REaB!xulIo8!tooxw?l;-tT+w}DczI6M;Lm?5vg<)9@AQi}tkNkzBn-&=PgJzH_>9_6M%Q>&_z!r{=!_a%GKxEG!X)6)bC zr})rSxn=GjsE`gApA`v*c-!7^{9AQWUWLd1faC5Zu&cQRrm$_C^i=O+bTXaKw9SF# z=2;P@SFev#Q8;K^mpkQXwtS&oDkjr&yW4Te<>JP4C&E}}GM>0{RmDk(4FQ8Vw%E`& z=mqh%wK2ElfJ`!lDse{>o{TJ@Th7dK!3DG`@@njQvXCMvfR*TX{EG)t=VCE1wp7M= zcE>Mt2kA$F?eWeQZa;hPv8i{*cr2(RI)Dmaw7?)~G?vheJ}>EpevZ-8@o?7CQF=ur z_;jsWdwW25rzY;b%tU*);gWFG3VSyXY zE+VTlAdnCN=s{xXDvR5`=Ge7F$_i~PU5Fs)lBf1-;b~i;Jd(j3yXC)RLI22s3E?Z! z{ymh_1RYy+a6unHyZY|8hNte&LXyk3?=yhv<_>3Hd>mz zU6ft3D^)G&0<_#oXnKrdHC{3jibRbWb@vK;%4w434sF~lw8>L*HMFH{2-Brp$y3r0 zEz#UaC*{A|w?0VTn_6NIjIDeZss%v2ACTzZqbR~YH4raAc|!aTB6MvGQ z^D&rjNPyAL{ZjeQYM%-Ll}LSabFyiL418M6;NYOv;5U2T@9-j|y<9n&`ToB!gRDE0 zE8X>hVUnAZ3IZ41hMFIkId#R7_j7$%3JMF&QRJZAm9DZ0vITbAoEz|4I=F{gOg0r+ z=8{5QR)C}wR(Ao%%$yYV%d$~UoFE-xW7;+|MUt|AeLS-flhB8TW9G^CkEc{up2LK< zvzEW}dV4>Nh#ZL{Nl0L=2-~o*29+V7>73J3-Rn3p?t_guPOR^fZ~g=_6jJI=`!9t@ zTHfXh#qRTxF&)>ojRS&Qipk?h%+oBZ1GWcZWaQ=5xRD=K0)orshlhtz2-h2|lTLno z4-yv_S0|4RfcogF3p5HC{yZIsA!ZzRHb-YXsyq7liUrM=k-)rFeTq;HEsqHgocE1l zyR2L2t%X<1z;6*YQ1nyaBTrgq9CI0;FyC8JB-f!!y%9Ffy;&t#l^f%%&9PtpUXhI0 zbbQ>y|BLU5U9mHRFfvow*Gs5u7h3OxLe28zB}?}Ft?87ht@BR!v99OiLAR0qPQql~ ziswWZL~|ADazTT3g*lQx@P`#=-m1pDkkk9Y);DeCbZq;aUxT7xdU43S0Q;`|g9}D0 zP4=EpO45BvsL!e%)WHv~W8*{&X|f<>R>;!Z`^l#TJ(|Lp8C60R@1gl zdRkQ>dJeUptaU#jI&qJ@f}*5)HXhMF8^_3_` zYUjKyC;>5g9MGVjL&r1s?r1}C9i!^brCp3e9Sxe#^JHQ3dtBrot>$$6ar)RnB|50K61DXG&DV!>B|0}Gl zsIIG3eZg}Qt@`t(#R|356ZHj5#h!6xq_1vCdbZ#6!@*sA zfq+Sv$moxoMhuuNg|i&q-%f}Ow4SyCFc-9YctprYrGxe^-A^Qyqf{jXSjPSakCb%? zE?P<(Z1MqID-3d$#N6r>7pQkV)lB$t=e?wr{ZJ^q;PLCe`f7x~x=Mh{Irc5Xbhj|e zzsOAfsV>A9ou&SWned_OOPwI>XAH8@fyL(LEwKIY5Hzuzl|qdIQ3N-opgikl8y^+; z`va2!^%E84904lXX)KwLK#-tg@3bf1&VEHJt?aewpC)R-E-S>QezUTOu6$SnLSJj~ z)s1H60v)bkj~dbKWx7aeJd#+d`~AH=`p6}zH(olwJ+W>i&Dz|n`dB5V6J3AK3J`pk4caHW z5k9LQ+l}aBv*Z(!K1{Q;y=qd|<_33`8EkATDk>_}fOCtY5)=}aNM&27^Ij-a$%uE7 z_DYHmV*wR7#+KEKDn^chwf*rkML+Hr<}j$1ltm0(Cn%jjlXh*SMan*hE6j>eZBNeOil4>+MRd5xG9uzVk}R)%&;75 zEw~)J`>id7Pls5iVKsU&A29B())>QCjHMetFg`IrcShKZZP$r@dbH`}dhv?m(z9Yh z7M$3f2;S4P_gw$ zSPZ!ctp4PQ1?VE=4*&1AqHx3LX@)jvOj0d}UVZ#-4c5;=Dri>0`t(JPQCezY0HSGF zpLIY?l`7D!_&BsdbJq2i@26kVeGeFjC3cMS(l2;@I}1O9aBzC8Lnmkk?ydToI%;kn zRvwe5Yo7P{h!FN-b26W2YuAItUWbp21^nFLAc!z@*MLZezQ#fumB8lag39KS zNORiy-xu{F0}B@N{yV$a*`!nI@nd8+?cglzDCUMtBY zuUvgY6v8io2}v!<&&ef5hm{me)KJIlV1&3U;3S-+3Dz#k_-K{Bn&P*cTRZFUnZ;`R z(RZo`tB45#jO;8$uJ~gFK^2~Q2(1mznz{~BW46*%w`DZAUm#L<5>v8-Cv-j0KEj73 zmu`np8LiOoNnX#5Ai^txXDtgxvQiQv_uF2tRr-n1HM>(Jg3D`YC7+|*;ZA7o(7iLc zbgg*nk%o46Fd%lo>hh||wSK7WeX+(6CAW^23%eort|<5`nWx*w>45M2EFRM;V`^F{ zTx$nu-#1Q1elJI=4C*Y0+)MDC^!PTPm&=#wPN^3bAde2$QC+-oc7l&{viU+X13yhK;kNczILKK>!frJrky>4Y(lCjb!{4F%JTO`3UU z%$(31^aeeel?GYk@{At3`+a-MjA;Gc>))hfGSb?ios_at*$<^$vs=UdEiFDn3QRR~ zlMd6>Rrbkfx5K@jkWYf?-m{&q#v=rZTi&;Msi|MLCm(9->ZW564}O$2Acy=-m{byT z8I+YZOiaX1wEJLz)Iw**Eu2?7agH(aWrNb8$3Y;Z;*OROtzF=BeIEiWYSULy6V5y> zn-;wEte{sp=w$@x%p=MgVGad($l>2|lri_7XZL~(4DlL07=q3PW<*^Bm>VZj)j0Qy z?@lUk8z;h1oqd+)`Xss^dQZupS?kAJg6*s-t7h|Hd5svzLiW>&a;LHJ@qiR8#tXjv zh@$8I2;h{<1s7~GY!>2UV(J;5&O|4PAIyJez=8pbykhqVKb7c5bN~q7KQ3}`0zMO`juQz3Kvp|pJxjnf8}Y`}9KNr9t+rx1l!ZPcU{4tvUaO0SL&kkK z^I3ZA6l)cXrXqP)9QJJw0Xe?KhA@sY1wQp+a(D8s15UCy?xi*gD~^}E$IJ6?Z+-bCzGeK)uJO{xOmVFBWXAMha z=-ZMe*WDIvIT^KPHfd^F%@0>?E;lK<3o}YegUyzkE^P;$x^phTVy8yxu_(BfdeauP z<_eGP?oLN?sv{w}@tP^bm12i{F|wi}$dH~`16%SnP+;x2hPF9Q&h z>4X~P<~$g}pUL}%PvLx-)H1P!3DRe@9k-B>nEFQ4%(pp<4k2n zSmyb^SqSl592E99ATsOK$jGne#T6k_I!QtAN}?hA9Dez^D8=e$8ed?kgWs(7s7o+7uWcr%&f*<0VFh=V zh%_00{A8?9yGDc0y-i7*_XAP=Mg;%UOAZ`BpcuG}LmKPhQ2o;&piK;PLes**0(~F_Q1RaZSU5 zyB&-)9Mkz>T-!kYO0n@ZD)+#+Onx2-b!Q9N5!DjLBMyLB6wv`KW3HvtWS=j3^uGd4+z2PXGHCGxYhTedE0Lx>55ov>Lz+;a{=lF( zP)!O3*1;7=Z^g;)kIYo(y<>Ye1M$PkVmd75$KEWytOQB|vCcCMx?8>|i zz|C6$hyoJ+A}1j!z2!1#*PG4!WYA!5KJbMO(_m|jXy({%*!Sm`rnp@+YHpIsyHQf>0r!yQWVb$icV)!S)NNCj`=3lfwJ>Dq`^-RQ! zR2-hEE*kkQx1P0o%$3HA3zIyUi=BJyO=~vJ_xZ~E1;_;``vaf?Wh`eJG8Q4H^pD%@ z8YZ;Ox>YNr!rqZwuZ=#4OFk8T_@n04zE}R;3Rh@SPTb`QEbL;|W$ZbQtn%_2xPGMB zrczDR>NuoDLXW^**Z1!2b!%MkJ$4$pX7PkX^|TeC*{P2G$1B=#7+m$&Sp*StYFOmR zWaBwdYTH6loQIG49MAD`4znPJ%-j6y2L5t81NCa3L&%h9m9@9^IfMM~J%=i*cO|B} z_aS6FUB%H%V_3wURgd9jNUIJx-G}%&@!GCW4;vH4k7^Tx50?XE;S#=^=topwA!`3W zd>?lg^l^ZL)h!*;-R}C?@e9+an7d$OPqZ!%S-F=m%UfK^u$2zy$)7J|?#>ptkEwn3 zGL&qm`feu^lb#;;tLEB0>VRRGQ6L*JxGME7%nbfuGE4fe$nSq%t+(lzB>Os1;9=@?O@OO52q$D@P__x;0=lO zUtbAv&_1z~TqQ~y$016(yFL&t^PcoY8+Y1{lLWcKAN4lh@BCgXupGeSJsB5RWc+N5 z2-&BMk|dcXT8Qh-k@m!A5@v;x7%#r2W&DM+M^qg%C`+&6D~GPitobS9Gcx9zRWP2A z>P7R~2NIv45>lFUj zw!(@d1kqRO5t**1yt}XFh{dGm3P7`@$jV-yld0A7A|3iyifzo!mIkiI6#7 zoC0S-{s)I%kRDeSLKg~#605R$NfIL6XV9ra&o;ZC++-(@Ad(^O*C?NFUW|~-1h|*< zoJn@zJEZwnh#|^Z@z9-u-~jZCG{p+~*Nq6>s};-f>p~`<mz>|$l_FI3HnB6@#QW_@wHGfgQxq%g3;fIV zesKq*zN%O4MD&?NlefD}=tQ=d&jW+EO1m(UU)1C@W(ecT7$X*n`tt7VWQPHK1`UWq zfJx`)L{*B4QpfL=zrsKH9PU#0^G}exD)W`NNU{`BxBiO4QHKj&>-N1`Pw#rXRiXdf z&n9+>EbX`m$qDfx^JolsxXpjOumW!M*TA|{|WSl11+=D`qv7;tix+Hr_rdf6}?#0e<>i`o4( zxgJMRVBxrk*=)c`dCq%!%R3(Wa6xKp-A4UG5d3gq#Ce`6VrpqWU#`P}a@_}{69Ix= zx{x#VY?Wyh1kHnaPQ}kx%I%qaCPcsP>shG5i(QW?-0k@Y%}5M8cX9acUk4IU4q=&*1>|E&mS;(KNaIR{IE(F!-_sSdyfg-M2= z#DYq*zWau)0e`Ed+Q<98k2K-WC?H$xt}C@39Fj)$8YT{E*heX(oJ0&vg@;6H&mG}| zIHk2;w zpIjM6FmOr4PPG=R@U&2R#gSXVa#Sz$;fixWa_RSD4Sb=|G|3ifZZ4p6lU1U!1-`>E z^4XecKD-h;kOcgVsRQ?XipU^c7Yc+2sQP;`Sc6+=eB?S2A>o*Rbj8zKMy4%o z-wva7Nx-P+3P2}LGuS8!mYSqlw(|+NWMNkBZzl@5+7@v@;1`m4{#q)6{}g{WcWY@_`Q{=@{3+$1b6>~dPDf6k%%$kd8{!UArWh(^AL zgr-F_$w-G8WswU2EtJKisKj63-vv%d&$o#gqop1@b4dgAWyM7+80-a{(Q$)4JlKau zWzE}Pql^&)FM*J!DJ4*70#AY_)8u)atlb=VjtG@`N&N7Fzd z_6`6PY>kc?)7VU#m-9=fCB4e%iSUh$4I(rDi$9uH@wn6ZhD}cBBxUlI>U}=Zj`DFu z0py)qCT0y={{V_{Yn_ywPdlUQi#ds*#Bhbcl zUcRdVa;qvf9d)}O3j6dxAz)uj&pS2t_;9DwyVc^pL(j&hR2i)ql2c#`NkI!XH9?Cj z;-V&3v!+^P*c8ZI<8AobMIq#b>}c5PJrqmP-jaXq^F|7IC=V9P4of8N`Iz)jDDma(++ z+-o&zD(?LIk>-Z6zD$M!vu^5YAAmK(Yc#BNL}`zc;-zjQRgkv`Rb=dN;n_LM#>)Pu|gew-|1ogdr+?)ade z(W|;!zzsV#nx}KxDeLzBeT6{-pv0>FT>&52Xy+a>9Rk^X1*Ki&i1;7==bq7M_>a5W z-jrn{=U+M>hZ%UtP6{KliH;FPvNa}P^oqMeg_e6{$3Jq3LvJ>WnIx5?v#!;&z z=52C8s{)qn}n$o!1{VPfNkf2yQ{wj1Nnu`eij%4KVQJ)O?XX;~}Sp)~l}qEVl- zSQ76|lEgLG@I^cFU;d}c1VH{XVoIROdKne2pecx1hl{l~JWwT4OY`|;G0%b5?2m91 zBrDcIOm1yd^N?#f=PU4nH(NUvVq6=83DSu$(8xTRG{bM1o4W$VkcAu(aD%ly*pt9OGy=~!fBdIEYyQuhz6lBQ(wkpqg?MAjR7KDH!hmbT znuu2%?udklNJy$w9V*Rq_oAv>wXu%JTwgMf^iygS7+vqo?7{%?r7E(2%cg&@IK~Yz z2J0)ubejcuYAA1`)v%#I_PZ}U!Mcs7EG4^0J5!R+C#|StG#6;am7nTyFUHHqb=+OS zG26rZxI}ygeRVVsRypUQcF|UG=5H+o`dq<(B2dFc0sy5A^JHsj`m1Gs9H#2lZAhpr z;lL1U_h^=of}7h5Kz#dZHg$3Fai*^$gGCTvk0vnFdIbbf&sG{oq{~Jm2l%hWqUE3v zR}h(~)Dz}NY_<;g2L}i5iQ>QC5^bkU82H8mo~^Dni8tW@&lsGJ6ptHVU0GSQ)c5-C z{K#AlN!-QV*%rt*&YjmqYMPGnP@T?*I=q;=G0<{2^ixzs2QKQ**~T&C9Rb!OZs)_5 zA!kZi=z&6h8nnx#$VBXrV0wE6_GsVHtgcT(O%0((c9(LM3|T}D@rh0M_eWZ%(8D(P zrVJn_Sb<0J^;nzy`s6To6Ku3&bSF@5YM7@pD6ALRZ|)+^rXU~`)iZ-^!KA}pK+^OK zqtSWsYc9OPE565<8L7Ke#d+^f_QFu?%LmkFTABkn`yP^-^!HL%1G2)Y8jn0AOW@T8 zcNXf5lYBv+rSsSh=jVZ+L2F@Qua`XfE1CMC8;VQ=PDO}E0#!&R zN>qfzuTPNMM&RS12dA3h{^6G&exdVpivK842@EOSgHZ+~d?Sn{1}2eu}1FLl|m=Yd3fyU~48%%E8yt=z--c z_6)xLti}3v7|oZHt=^D8m1_}NsetNyNy9HZgP(Hh5_8zNzq7Y3YdVX)W4sEc5o9tu zFf??PM$o&eGX6mV`}p}aJXPR1YeEpb-)6n7;Z(1MgGj`OxLPP*!B~}QfxIB#V&;@B zLa=H)HMEscBdJ1nZRRk7|72@V_=EVDxUiQYIU48nc6c!yqTw4Mbv^5cdMaAh)YM>S zG*UM2q5OAc+CQFWXjct3I$7Cr7_TkBbt(H6hj9mu^#9OHiHUok&@O$PiY+4|j_lc- z8BjNl?ujvG9a2gS9@1RIUmse9{Vdus9_~%cygukwb9-aN!uaxGG44G!K!wEk=7WxN z(RZDdxp9c=fQ^dvVX9!1z;?u8%6j6TP{)`H)=KI6QY zc(UNdcS889@oy8We@twwoL(eNroyLhw%u*<%;tecnB;P^cp7uK^Z1p5Y!Zg|L&*al zFT6ZE!}}tr3+f1XbbIJk4Ib&wAjQe@%N~UFtEZ4rrRQ_d zne}AETqyGJN^TLNHN zObg?#fR0SBU->e7swe557;Uwy+oM^oYf#V@8jbgxF_E1Q!684G5MRg~((Cm5pwCTu zF+(PNaz4PKd>ZX4^{QYccYj{o@tqnOc(Yg(AA_NTgKz;d_J;*Lh0bMocZwm;D zyG+QDEY4Vs8m9!UfsoKP!dHYAB>6X}G1^|1d1*X#mK?d0$Z84y>yq5*&2t7W2wcrN zbZS~!!fm?PcSzITJgx#fg$)1>?CtxmcgSiUsOGy<&3AokW~!%m{&oS}w`&Y(55gL= z8EMWBvAR{oyJz;1YUr(29KJN4b%#UB`GxuGe-iED3CFOdx z%$u!?VO75$EM4Go7yQ3XZq2m(-}CZm62~mn5pez8pJVEo`n+Ahg{^D3y#=CuZi%^^ zc=ulN7{F4w1*svk`lq8hzm@&J(9h=9BlZnAn{K`|G&6gI5j4}zciP<@>7-#8Y?~eV zWU|_E9ElLuB21Y^t4UY)_T$=C7epb=R?6RdbsoP#kc3415v^4K>t}v6BR;XYNRNj_ z)jOQPj}FEv2ruQQlYOI$itNOE@PA_webLi0R~B`H_pPPwJoVz@=fDCVJb+&X4r-Q5nE>V+jokCYAIT9y^+&&h?2%Ai3i@ zOWNEC$KpECo@d*jV-Ta!jlHM2I#B-(LO|=* z=Lwb)VOdv}ii)EoH;^?OqZB!tQ&EBnItTAyU~?e6H9%h?QDii71_g=mFf87pLw9*J zm>pfH5o^QvJysB7xuD6CznrNr;=v8-7g>TBDQ*?0D)SInIh{@_Ml3$5)k{4`ZTK4% zm5sOdmGwV-sV_b~4>~_POUTS*m$K_n&Y5qr8ct-9a2sNe2nzvhNU^)#sy*WJNI zcYG9FQWe_inH?=zlN;nwBp*u%RWXADqF+z8h&!R;sBKqRm59*JTE>R3P_Su8muhy_ zVFio08lIkl;kHrxk<{Zg1JEsnJF_aA@AhP7?<4~&pA-l9y?{p8dD25oDq&sGaRl5H zv1zm@2WOnhq^-9rM9LVdo{REj?u?L4dDQgYd_?R95;r(bZHJzIV+?2aYj{^x^3hYV1LOEiJ=k40Zfw+1{<25+Fy^|N% z^Py#T?QJph#2Y@Vh3G~$svMt{#bY4l{iXbAGb#S$(GTlT68JpOQQH%!?LIkZ@JtsY z*WbF(7(PQ{IuI*z{&zNtg1V2Y5**%D{X9>~b2W(@{Kw_?gy-$Q?;wHeKZR8O4$Jaz z#94*Il&hcr-wwH&xrDU5ppa_kKh{&3-eIefaada04QQ@fZ!CxZZzTnm$r;`MEt4~= zQhh(VWP<5~muja7P_Q|mm%1P2x=bVpvA>qa#i!*tzrhQ7A)y^)w#!ny%MA)^a$1dU zI<|}dF8Vr1)4@5tl~^IM_VhkK6|rA=`b{t?*`*NU9ImmU0_}k5-Ur*6v4NL}up2aJ zv=kYK6szAb^+(-!M~#W?#d2Lcfe}UJBi2gHZ3?uLGz5D0 zMifdxzwgdQqng1-%%C^!r-6v{<1Iy8ua=6C&t-9+xo1xy`+|*TuU0+_PghcfX+9Vh z_(kx}tf$)!l>O*oWIZ```M^xfwHy%|F-epyu7zMF&-Tu&I_4KmnQfCWw}SM-7NWMg zel1PJ6qt7}cY>79pLK6G>b3bT^X5-*Y)Ep>3&B+@5z77CdQ}lb5~=%1a!$QkwD_fI zLd&Y?cg7;y`9Z@?s)o^y5ic0Es&I4vcoBzRet6b(d%5=9R*si@Xi(O?@PH;*J7vnA zyFbh#%=1OvS&fq)J^mCjD^atit%1WcpCty-_UiXETKwla$a*2gmB`#PP-dCIL&QjC zM#e~=C`3x)uy>azuXiNhzT2F*H60+1=cizOE5J{Uno ztjv6U!xxHTdJnj&tE&)x64&d8YIEqNP6T=6x^H2;9vNs3Q`{AMU>Z?*LU-Mhs0!9f z#bCFwvid&IL;k{=&}8gqfaq_&bKi$=-`HwBc~EX@)nHT9=h=%ldQDDL7MrC_8TK>f zpPo^PvL601eET-}GdU#{Rpm>)t^g{}qq!(t5}tSDFYNgj)6x0dk6mJ8%yQWs7CQWd zJP+mywykJxhX*EN2rD2>2v8Xru7Y>PR} zauWGuz<3Sn-_tBi?eAoZqd&SyjsaSv-Zu4?jGt<~>?kl9Ew^y*Zm!S!Vx?!CsK+=_ zhOm&)&`>N_?!AnNrFs zwc0}8|JQS@dn(}jFZ6T$2!Xm(w5Q|Z!kbcjbV(}T#kgErS=!_;-vf*-;4CzO*np-g z*{%3pgi#pz3G5fFvRYg3di2(SaC!dh_G(IfIJ@nezYZwmxZQz}yW_x()7ily33Jf zG@HpJZXBsX+fxeG_vXda*S4M_tnJaXcuC(b`lS95F{)s*9hilnjAq~iC+Zx~XyQP+WLmbC znO^QO&j%DsQwQGj`K;w?{$@~M1Zp#FWz%aF3!hmHAEkfnRgsx5IguCNNejhCrE(T4 zO)#3T>b&+vz}~*cn=_W2wS$=l)wvgKj+fE5T*kF+H5xuKT*~f42~Nn_RIl|v*A;0J zR99MZi@P47=>51D@u&QRkho4pyl{Qjs^an`!rP1r%lvK&w`ov?#wq2zEqtKjP7I_> zs5Hj@C2MtsGa4jMfEsYMFzdE)Gs;OiM{_=9G!WS7D(#8-r3{89pQ|~v4>d8eX>L3D$(cyw#R^clZ;$piz zC_0N1sPiW#`c<%(?yG9?x~lq@uUd%34bZ=&`(^k=9O{qdcc2oyoOL(r3|GK#e||AC zG^B;VGeyZURZ-P}UWYSvP=#(;QfjiWMw&j}jQT^*BodCk*~2XUN^EW7aIxvTuThcH zaw}-^Qm6XGcvu?-##GxO(3P|aXHxk$^ohCQtkAMAO#QV3tKLU1IYwsCQX6RVs0k~Xfyzx{f%6Qhw| zF3rm#W8S%923Veh$*|S>MP9d;(mvObSnsC;om{_}l3f^hLhdPC zmIZWM>ML1Y=`UxmknWMnkvCFMoLFLL(60S*1l>p6=WE=@giO$?T7F*pYbm*ei|T*! z`|bPZt!7s3hTrKY*IIMgnSNbQ^rj{MHf)N3&YX@5_|LO#iWI+u5}P#zYQI;ao_n@U z=3&RnP0QltubhW;ja(ayTvPU2r5*=t=L601qH%V5Kam*I13jZj?y2IZb49fPOEaxq zx3qozY+-FR&QN}tHN)RRRzTL{%E4o8dY5LbcZYG`j*fEj4;syvKk1 zc7M+v8`e?{EFo0z?M>RAGezBw|D9BZgu_?u*dz+@ZN`q#6=)UhLeyQ#$nTP93-x4n z@mQ%tW4KM7)2VD|Y_M;N(4gG6-rMDyMALt8P(w562sOH~h ze!5lcn)T?VsNNb&S0xjDR3`)JUHm>Oz-&euWCeP0Cq>0IG6mP<0?;_Odty!%Ku|Bq zfS{02nNpBwj3nbXJ`A_9Ih|UnTvrM3o+4L-7gtYy3z<@_gBDFxu+Xx@>N33q2uZ!I zCYA$xM8BE_>MOo@!b4{Uu1Z=2&3ecv# zM;s;Ex8#vd9@d*k#jjZO?9qIO)B%Atb5v=vF`-J_M_+q)g;N6*_bmhEtgpvbznRI9 zyY=3n<*Bocv0XEgkclg`r+BqQSm^n7^r2==Jfd``>z@ddNNofp5|A`EMYB-P5C0O^ z^!TNR9((D*i5K~AzWBqvg$9Rs5=ow#bXWB8xPm*KFi|`0kj>U^&9#2unu*+Us11Ly z@ODFSwaw@=aPQE8X54WK6iEvnsinqH27M1o&@tYulT+1#w4W^p1WgA;IpFc)CT3T> zw+hWQLqb9#B780V%8#E3Aq(5N7nuU1sqg1cC1vH$XVBrtrJDX`8!!k9c6zP_Jqhl8 ziNZh-2EmALKNPD^15Q8lCYmB9jZCk<6};K5;n{xvwj{!MmhSbS=s_Nu44Rks@YN@@ zHz8?ZFZHXCZ>L`wHdbv}gVHOULG7tR7C33Q*LcW%>E@x1b*HwTIIV6+b3MOGr}sI| zDsiYYH@H;f&P%gxf)p? zXX|Wf`<<~IB1&VdeLOsXG=c2(=a)=ydE45S&rG?_3x}%@4W41a7%2QMMj+6mR+R~B zJGo7fxZ!0%!J6YU)^y8rG~1;P1zgvD0~dd%Uo#ZEFYb1-y{mI2(`Q$bb!@I9&gXD& zW06xa)atmA4qds$=f=q1Pi%>c!E7ju(`-<4bIw0z9+JZRBCg0vaUeuUFy$(W@o9s! z8mSflN~DkbnfTbc znes3KNSYrYZ6(_M+-|GOO>OOhLp`}EGL`k(Izh=E>cn_U6?X0!CzJ0z3wl}`_TT>w za~1WO9N)pFVs2XYDP6{OZoKHTkw@*6iUaRqw5cj&gFZeU^7O(54z1GFe|T@R!QlGK zaXWjqkVT-VKca^^Bg(S0&8CYUtF6k;Q5bl3SjLsF|$8+t^eWs>k3`sw|*O#gvb;$@5T>$huB8B z)D(KZQ(l0;Ua7F)h90G4ho6?t?i2L8u#jL{ZazC(T>pB}K%@=)#YieY__w{70y5O0 zvh>Vf3|rboO{S*Z#lzj3|5KWlQFSHgq}JTnbaxUz{u_(`_Yg_`Zc}7$1Z!+0#k~!t zpFxC}xYVf;QcLvwX_SeDm1FwkjZww^F#~2y+{cOGi7?hJNm^n#AcHNbuw~`^S`l7j>&5! z`(pRzX54DD#Bb3o3I!_WtQ7NMC53%-??&;@b>Q_j4>ODd~M-XrqWv$;vg9l2silhmftJpDaBqJkZdz1N6 z)74YjyGAWX6jN&`7_Rrq1euK#T^Qd&^;4j*Z098M?P-_d@SqH9sj&ALnfQ2CQWw4|k%@Pi0NA4rBD6+}NOe(OF3sMFN+fPr zxE5}J5TLK7k-iczpDWlR)zQxRC8gSx&jd;CSCrs9jm$gWj+xwW3==$StF8~jP#=8B zcXqRKRDB|9eLmI{ho>o?^Cr8YzA^4bM*IexVG_o=a9Ij+)ft;v{T8>9$*c|9vV}%R zLjljRPtrmillUf9lIt@w-*q{T>yTUaXTFz;%Q`u@`9x?~P|Qg7?pQLJmOErGH*Xnq`i^5opmyXFR>bar+6ut8~S3Nx3Ssvmv)_{**%)mL={=59@f(UoMox zy1dOFCE|EDpsg-ZV)oI#zlHbDqix!Tg{cw25!=EJdCjxS^-k+dG`NJLTP3OeTI|de z3!adZcX|EI&WI*3w|Q+Owxeu{9hxBAh?~D9+-~c?)jA#KH7aQJ!~=xNBnL>FOi1Yd zK=8xeR*eFUryYE(ZHg&O2vY=cePF&$GGVLh2_jx0=)Cgk7A~bSM#J~&P}f*Jj{_-W zKYTi+skF27`#1g$Ew!+;MgR6vJnzH#Iw9Iy)6+-ujxqh+SfSMf-X+>K%78n^gpsF5 zKa?MhA7%_7o!bU>0IJ!YNGzZ**o<6K)0d#y&t~BxNtrKDS~*}b;tYovHSV zs2LghZaa=b-!I+x))cdWm(s~u)vt(PzZN!eOCkFuWWSciVZU}u;k}o*cbfpn>*E+Z zi4?s3d<$sm_8Q!|yB*1(879By0!mB2L`Eu-y|j(Q?pi}k^S!4yo%<8Pw6_LI^Unc- z_*-s%Nr6I=pAIjB-s94UFkb#3S0rf<;Ay*Ozo)8czvMP)S6uP=J}o9FsWePt`lOzv zvoq9w^A1f-LSMQXO;Q(($x}bJ9{6-;klN!Hpk+~bi@jsCeV^$yCbc5GsxEo&ZT)G~ z1}v)GnYB^Q#L@V(Gw|w$qs6-_)_35hn9^_CaT?ipYz8egiGT}@0nf%p?z ze2rX?dJ;lk_c`0+o%;rLln5aUkI9r#Pl$Lh^7NNJkM?3&g1|+?1C}nDR0SUFH0u*~ z6jA1ER$M;<*IGD)81Y*b66m{cJK|q`gh8ksBP9 z@9TXN-Nqup1N(CD5n(ULVt2G2kUWQ8uFHi)BZq+3Fj7x=x)pn(!|c!x@wH}xnD<|( znF#e63WL?s}z^o`^Q=gZF2% zTd@8xs^*tvrP#S<6*{e&WZ!G`mQ>E5z|+hEiDA@g+dtjSP4Pk>455-!l(-V8?#39m zliY`RjMgGMCNVze0zY0l;@?lm%tMYIBWd&Yn9Vl- zxDO3q<@sFcQJx2BdcUXmzBu?RaVt-*N9J7&Ex#$+Qq%M2&xa^@*Z2O)FfG;G56rs$E{2hCV;iUXKUdst;O@?p#v32LN9blzJ_degNEhHC zYKM76dwbExvF2!=N8%J5=T1qKW3H~WQAz7eA-nd=Bfn;hg5Mol86e05;Fx9hgCBmY z_zX<$=E*n?w(CDXaNmQRP`@jep(jI>+v08f8P5aK4ee&6y*C5Tm+df%E9$kFP=r=L zhT@#p4RIng-ttE|X(co*?m@q?0H~7d6@Q0vtX@dJ(R@ZA4FC7W7W}6QLckLv+qV)S zn9l6B?TvORnAHOzIt>SSP3Ve@|Ec(KtXtQ0tqksDUEE%2J4(I|Yp$L5Y!%JimuDeI z|GHd$^n5lpGBOz>iCI_JH1+%|en9xP4e<`tqxI%1Mf_gyN=s+dKaPtozO4B?ON2zC z0ovjc;g0fl|8i42{=^i9JO~rID2XsFQN}T)IU9zI6Z0|+II znfMhO829qee%P&q*N@vv?yh61juJMB=a|hK3szMt9Vu)k{KhB|xGW1~-BF~aHJT>D zq79)Xj$-@rSgG8GjrB$){12A`W>DHo#;sq~w|nH?Zd!#E#%=OcTfMwwe^Xyw?*J|4 zyq@|mv#DWmQnj5|O83lt@+VrZMxOr~yB+cKuj0vCWFf?we6B%5XN&|Uw3fImYn2&Ihs*1hI5? z;VX@gq=`g78;mP^)dGs2=)4D_FB3^R^ zq1kMcAVRIhCEBEkE&Uyu7?s3njb3Fo0*;6MBv2iBrO(uP?Aw@klTEW?xaF z;k#{K`e-I*%LI|+7eq86hRiHU zzq&2Omv5!jOZoWt@NrK<1<^yE`vd&Cs$Z7x0qy0Yk3`V7#%gUp)Dy#Gk{hY}y|3|1 zHM8S6Eo*s!0LL5k?{{hgbTYCJ5LzMYV}V+Ip?*T$JPviT z1S7+Ud>AnXmsIpcm05^_TyKP}4dhHon{pW>?rTjEr!IuEarLtCYGz}srsaP?!mj6V zzkc%B$>$_-?ewmI!kDn;O}YmZX@Cg5i63l!1^&mp(W}ZNC=2{0A`6?BxAW^aAv$w= zHQwtq(|}-DlXD7OAJVUHlT*@H4ofsJFd(R{{AOkb0X38eSwJZE!@_5ixO3Xd0)T_z zeOQ@({NZJ{WEm3t8BFs>5G4)lx->5!vC^=qyv3C!+kH%A$Pkh%cBrB2pz1%G(&s^y zE5Zz(1)q;RRy>$@52%VAQ^cvD%@%=C?MI=lBOBe~aTx{^Q#=J`aTyLR)Kb^&K+=qT ztl=-qT|#%E;VMZ$4T90|>t;H_6CKIje~H9@)c@fUHnh*%FLMmd{eN_wWmweT+wF($ zkdQ8Eq&tQZm6Vij>F(|jM7q0_kWT4F>5>kCL1KU*hHf}tfB$%%bFS;WoR>2f+p#B z03n-1yi%3)6GEotpR!1yV(z;PllND;?T&8}ZzS(&`7He*>ipZ^g^KCcYT}+>XZZtq zi^W~jA@9Y*p66UL0`kmt)HG0VH}AS0_VuL8)Q6xB%}74{%>SlXzQyH(CZj-H?qO^; z|A53*2Y*=PYB{oR)Z?*+%KLp3?G<6oV2Hy?^XT1i7kypihx4g&_s7W~$1o^tL$iF; z!~1SrXzOa7gr*_T#3Z4EJMcs5hE5k}Y3ErQyK&dp&rACFn8dtJzt*j~qu2$*!z67g z;R|!b-gE!^5xbg|pfdwh-H$(httpmI88~*IIg*KK@TTOB?v6c3 z?njKQ)CPF)FFfR8)3%iRy^asDJHNlim}`@!m5@*ykPvvAqyaaaL;hcMrwP)3nTq*5 zindH2fYF@0m$F zf6mbUfo;C*pP-;iOnhfCSH4Y**ihQb&qnp(cr9RSzoMpjR94cRCIbVuTscuW0;>qP zi{tekS{i9xy!TmG{P1TT)@m64UDQf+-WSp<3+L~xbseAP<1;^)F&o*=#s=H_{1rm?XJ*Ji8=ZQKq!#9{D;y`s6dEB$ z#dlsoE=Z(V`Pp=ijBskdH+*DEt$82(ITqh@y$rMfy|xV*9FigGGCaYitx2v#KtZzmXY?MDA-yDAu_t|EW^G)1MB3-DN{B5QtpX0&ES`h`9<>soHD_8s zC>{d|ni>*9NS(xB7`%+^dECt*9`{`PkT*SAVoenaZL$WFH72x zmP)Fn-yAFT(m>rIL@lv0=t~J_ka_hAzI%mCCTO35DEpjUr$9>1w}#PNzBY-Q`>>WP zG$~xo$FhK`FGmch5oC&N*I`F{%f9VA3}VT=&X zQMo62b;L^>29)6K@aWjl+I9kZd&R(;D(kC*hDj|WZ(Z5R+{5#h^S~mi0WxPn*N%CB$?X0eMGdUX$-Ux8;)=rzd| zFFnqX@og=Nh4VkI=1eylLO$VR{zdA~*%jN9%>=O*w;bGFyY|nk$8KYKqXshzL0hoAwo7r)4{(oAyt74ZqBW__+Z6(7?Bqo3AwDc&Wu+F?DCRS=uu2v=C@zeXmpcMd@f%vEbr{#NgA_#plkpGpC0UNv)w-ueE_I zkc3E&KsfS3YC6PXWwPdc??t6c|99>7S%4Z*3KrbKXg~S4K<>FLayl<|+}^Y6wdM)q zk)E9Fx?bLP+VRjf{y6!3TYUJt?cCeEfWRC@Lugk8K@f2MoFIq2(Ja%e#tpirx;t>= zyrGzPN+B7Pge5_9NPTWJUAG0QgD-xa&p1?kZFSVpq-_yxs%VovC-vUBd)U@CVuj*T z7!?Y+8VViB<@kd)Wp2(U=R`;UFD*Tc=wDhoDo1(d=f7Hd#7juaBP63(afJ16K4k^f z2!Vs4u^(x0;o8a31Be^~qT%x($tPFLlc>Bpo?k7Fq*c|WsTIyy7Q#r;iqD^z)+cUm z`Llc1#ePEvHY`1!Kyx-~@`*7%?)-svjn&LFlX4@7y-uAsSqDq)ciu`=4eu=cZmn?| z>iJx#uHwIHmNg!{6@Cd@{^=Xa^)h#?Ra5j+vW}`DZOfO1^!ys&YNJ#y>vKKyER(

%Z{k^l!=(t>g{*u50-{*&3r1aww7UzDS6@sxgoobTKytp2fF#YMI!%t z#r*2tAf;WjNB!EvS`fT#D1WNZ zo<0nrCA6PNwHe$kzr~0|+woZsqL)=D!J05W*D?$uJ*k5G{oTM*5<|(|e7@}hq$L@Z(!OaX7 zSbeyZ3hu+lhkwA?--#2R*Z7%e6;-JxHviKUc@xdy6C-DujyLMsv((Q#2#cDq2@ljC zpT0l2sbq~Axav&1?cNa8GJ)Q!(D+h%tZ_pIHa0}L#56r#lcv;in#vy;Q-{-F$Pi4J zW~tIoV(aSaVmfb$h>Ge-$IhmM%mKlg$iLO|P_GrchI%?Kw9caP?I4U=TfkX9 z`yedfSrS9=u!OZNQgb+y4|l#54o(BrX5-H;-M5QTo5ibo!MhkXd=sLxu`Z#7%Y;04 z4IQThPa@jr`o*g$rg!4 zyiIU}!PO*q*`#Y?rs*5P70KYBFkbaDk4y0t&OI>E!2a9zNTo_|lE6;;BSDIlg zU5|m@L2L>AJdw4{3pB)Uv9Zxp(yH_-nkc2TEK3f0{MIJAcLg9^?*@KdzutYPQCMg? zU8YFlVHQFW=>KPP%)5Uq?Ois5=nf=`)K-K7c`94Ti4KQC@V#W4mASc`(z*0fql?+c z9&ZsOE0edY32oCQ4q`ToEs3`)naLlSDSqf>nOmfnzK$WBL+Wlm| z(xe&pgG!=A#MkR+rZj)Hg68uv(WCblxypOtphwnb<-_GBZKH2DUeN$0R=%X;G^tIk z7cFSvpfsxB_u!1|K7^GJfQxZ@(f7yYia}zVh#VzKq)I~XQ^j9>YL`!TwsU&EC9kfY zbg=qeAD415XylNmVfNp^!+#isIw;CNqve@BBL=em384u8Bz88K7Fknk6NGxujV3t- zdx)RB4@Xfh=1HAxDETmZqzRO#wiazU+yFPvG62fR6} zzsLL7yDuHE6%R^?8SO*Jzv2=j)U_SBgY(g8Tj(hRfBT;~T%P+-T@*$heFua!c=H?v zp(52h+$PN;zt)GHVvYm){Y;*#)7DsM0^ue1Luc8DypAH+!}G4mfq(>$>F2**KM+;O z`+FSO_+f8!!Ig94Vo)c12iix-#(5nXDU{X_iJg`oq>J3btMAx#YHzbzpIB6ifT*mX z5v=>(y@Lq!o8F|&efv2mBeUL8o#w{zT}RB*x{;61dkWz*Ms97S;M!*Q&)DyckMNkG zLq3yL@9{Th(gg`6oxE3&r=C-8F|O*NA~DHik!%ZWV%!xh5)m1^g5$%#B2o>Le~DQ$ zkL+mCiYuBitCSOe(yPBNdW>)dE?5$seef`k#s(WIE<*% zF3BXb^AITaCEnbHjG<>iCgL13HJi(hZan)M0rOH-!)tG4ema!v=}Ix?x42{Ky+n0$ zqdmHlYq%zYMxUWOE4I>uH}|3)qD;&t3j=me0=*b+iF_a#sW5Z5TwqA^pVlCz9=|%2 z{Q1glPNOck>WFkY#<^>5^28vLe$QWmje|})%{L5Hfm}t@K8K4U6(- znjiM|^=kkJCPQ@J?!5788zBuJ{^w^jF?F_W?>@MRLIfPeDDm+q5e=G<3stix@O{A zc1lk8U_X2M?0c-`lGsbo&5MFM6e2T;#plSeKnpEnjsjla!|jE}6k6dynMUa!v~;4m zv|Bey%6w{yJLs|+VYQ5Pr|+(quH|H2)xc}-HX&-82`+AGc&PI6X!sa0J~m$OY)F8c z|1vH`T6M^FRr+l$&eb>?jX#KVJD42p4hT-xgxW6UNyo>!!~x`Q#hsb1`PcwH_(3J> zp^N>*p^ggFroD~BqgVpm!sWf=>l%aX6NVuC%-YKPW;-20?MT(78Vfd+cb1W1X#yZN z8&M4J7FD^=cB#DAI(%#mb=OyCNK~C~#9+FX{}Wt(bVSJ!$D6sk6!-eA$H+CKZS0q8 z;JGB{*Rmwi%DW$7chDI1zc>p2%p*!bl*rZ~n-CS@6Vm!8Skc4!CHSHN?t(Y)?^p&+ za}mL;cMGy0S>QeDV?Zx zQ^ft%i$|-|V?Q-ND5{2CH^K-bKOb2+i$ge_FePvRkw?)ud5snQnL_yGB`=AIh z)P7J6b|FEm?%Y^?cY2magu*Pz-&hEC^&~t&{$09X8?e;F?)v$&<`2m@Vd-~tL4!?O zNqgH`+jRL>kCS|1o92w=mt2uaNrtp%2u%`C<3>@~cnK;@=gHilnS_#MIvTje`G&pE zpV%t(gJ*P3ojht;$c%E*_lT^+iXq}s`Mdlyw9e|bT?d~P(ti+n|56xF-iB>j+=zmj zfpSQM>5z&;e zdgeyc?;@f#U54aUel*oVG<^x|*C>tXO;!5S$j2B|%cM=xp6!D{1;BiSLpt-pr?uz?XIP3~Zep;cp3y^K z8@+4aMab5RzOKfQx>}qt z&dtnAYuWySm>DEtQ#Vv!p6V#sSSW(6Wk5xq0NQEg{J`WPB?y39ZdR zf?XUeY3Mat7yikq!EJnlL$;U}88_2(R=Dt9vqv%~Ut%%NWqC8xgUre#OZrNNRL9SW z$v-bS`Rf2RIe9t6=x8n$-gq-1cKq#R21^jH3F`fg73r|S*!Q_5#bw##Y*rRG+=IvK z$iTz0EseqmPa=dk6aXjm{@0ujN=H0rmvFd^^z>Q}xT+e=||pQ ze7Zo+_r@nFKN**HQjxmrK|#dn2Req+vUF%@|CZdV`kbIgXZi5xCm0t6I#u`# zAP54{ADw#t=vkJL2roz%0dg(TgyHspkkH68sZzZD(aLNJu>YUkRdQ;UPK5H1pp_s7 zjftmY%#*8f#&|;_IwR`fMNs;8+bym{kVfi)cC_sK9u{!~{d3ldPczv(pZHVX#P=#T zmkxcOY5Lskhja6=!AbB3wndfk&S#2x5_?&YK1K|8n=S*+CXSr^RmMQN_niD7ST7z8 zeMfbMZmjp3G829%p-Yp>8s--{e2pfiNM(&XQLPtpS?`8rKAPw5b2^?o?bV~|bN`q~ zkZ{*P1pRYD?`_bA8Zra2CPbEq>H4}sbv;lEnU?T5POI} zDLSk9DO%*H7F}j21fPLSOe8`Lkm8Zm+98R%7Z5{1US4${a4w$}ZWnw3l|864R0pFO za9CIVLZ(0A~-6bkZ`TsWc96oF+9h7dhjX z8$q%1+i_7om_&TdLD8sas9PPPffMiQKL+i^r=h{a>mB_YEwWzk3Lr{QXn&&<)U+)< zloDK4Qxo54JEw<=N6r5Cb6QxFh*2`t+1 zFL~#u0z@9}u8wQY*-?TpgrgDl%AzM03OEjVCc;9_EX70Xz z$RoNFJv_YDxta4m!2K*>7kG~5kng+h>=nRxRce@FeEAN-xFuy+3-ZT4ohq>TstW!O z0oCh3NnruR@@UfYaJkI!T<0RMK)f2SQ>Z~@fA|qYmM=e~hsT%#s5N9WB=qX=y+tv_ zEjH6e*`fKbnQ}jlnd5il;|e?UTI^KLf6TjYbPfTgQju}@<7tmfar~HAOg0NmOZO?w zb-d+h%KxIy|Go6QH~d%HNONIqmj53f0rPtV81v(%;NCJNt84^9XuQ69`$pL=>$UqC zcI8iZ)X;_d*0+`cqB0~)(4BGp^7YgXutGjuFWjyHpe3|yZS5>xC~tp42vxb?_xAA_ z{Sn2r5~1>jwxwR&0dM(rTv<;PVrcx*FEzG%LMeDpNZ9tYVpZwR$@1ARunCRDZpjcZ zIj%|mJ2)Ugzuw+oDH9ivLTWp5_`Od&(qWtw3|1GmXc^JeN_O(9hpKyE$OqjnS1QOL z1!IRP)!tHZF~Bt4@biuOK`ozVQPDioPZH5D|K|#bl z8z2HQO`mh3n2zEVz{Yb*c3YWETaazScS7v6|GLjQQ~fkmiTP<(it#e+$$>Vbl7`NN zlG7+uH#Yd?-;2!1)96vdMz0VOwId$W)`-<8?(hrm)~YZOiPL;fAUl-d-T%_n1F3NA zN29^b2k999Zgvf7AGj9TERQDm6yY~9G^`jUA=8*)FWBY^R39sz#NmfqGKzTK&Ba70 z<$x3Q?U*?|XI1WP9tGWnOvjJ@)liCf(dV|qJyD4nKce!oFp#?+wjDFCe+lYhI}LJ> zUn8NTcAyJWWDkj^Xx&G~dUNZs-vozz-K;nFQqmoqsn$}fw^tSXHt%_^IE+d&hZP)qKa*;eCg`R~3oqV7PBdt7o5-qc- z=1v`s^ct|*if$?&+8Z8B=qv;W$nix|6BJGzYUBX74)jO2!IBk}BE>C^IU|UN<7xXT zsfWQ-e%VU0tE&q)VJ=jNV~Ef$151u(vtCNY4)@$3+H$C&KxG-QTf{9(%J}ap%1l&! z8JnccFKtl9;=jMIpUvi6ncC#SI`_^!+`pNMI9)u<2s_GmK+hrIwaB?yhSlbAonaUd z?z~e_Q19OWqZ`V;eXG~QwoIsHITg%xQChkRAm>Nuq=bO(pj0HZFI%vhZY|)JGqsz5 z^M3sxGJ)-aVOD`wn6}5*=46;Tp9OC!Jm9XEuMUVu?fWj$a@=h%H&Cr(m5Cu8o=iz` zLVtw$ENgIXVxNbC|1w(oT-hnn0NXm*rRgh`ZTWXou>pbK`hwL9*;Z*bVr?o-R(aKf zjn$((+!K6vv#Q*a;huk&H_nyy!#`r2JYKFi+2R0gV&yxfSBMKbD?3K>-24~jetAC5 z)Q#PN55Bt1j=8q|7w6}=33|$sNo9y<@&CLT{#NBYTB0mBAG4U{2TU9ZpbFK1Z{cCY z68n?Y;c`0G0Y2N`7vMtiDxS4{+Cc9cGl_q7@{avKX_^00=$w=OEx{)t3tIl`;Z#R7 zm2MHIS(D_tup=h|q}2_gOyx?>cBejO%6b@Y-ic!fVU;w zja%1KR4!qoKxT3_jW?MGGpFua?DrkM32Tpv_u5;H zsf30Nw)@RHFq`FP19{@=)W^idqTXy+ zL8+EpI}?KIcmfmVm#WQ8x=sjV2AD(q-cclhDZL9G$&YcF(vE=QS5UPJax&_6TMkKqz{S@VxV|zlt}=ENQH@q0{|iy6MN}`rm4+5*45NOXd)8 zSU=(S4?MAKf^33Sc|%M}Jg5gWo}*imppeQ^L+1U*#Kdak)IpmArO?q4!?IJ@4ukvz zYtj_HQ>(v@Fzs(90!j#mFrH@x*(v5{bLb;>=J_v9R?ix)jTK*kX} zX96%E&8P8{{@{e>W0f#jNWUu!9ac|scB9iX1|g$arCX0G{Z5`m(HDpXsv%1{GW$Wu z4+f`0>KG?p>8&akX<6aXtAz}`>Y?q|#vCt7D;AKU{$tx96G%Xvg(>8NyaHFP34|p@ z#Kgn`C7}(o@heh#MOtYn_MGURe+(Bh-?M%Wb&+$uRG_33N5R1m2yst8~^zGk(dVYJcf2Be!Z=)6{CI>tY(W#ugkBT6J2Hda+ zTMxg~z3WIb`WE^2Wz;WhhXyL$mpa|}#}t){?ccdMU+wx+)lmt&z#l_)E9*YpjrFu2 zb!`GJ8&gx44uh-^{cGcoF`zCVzQK!nnnN;^Rh?g zPR_T|`82=5#;#$zWoFTqMhcxOd55%u79Y{%8> z9}8s0WuJ2yRUWw7QyT^TiR=HL)W&@>=4>ByWXagS_|a(J<@Fn?gywaB89>>sX*g(T z&yF;3|HbIKM4S#heRY>aTw&iR;66jQV>UwE&;EN``A2dIA*>GWas~kBgq&48*8hx> z@)A(c`4d*}zh#yu6t#CVgw;}-Ats&%SPR2O8So_hi5b)Bq7!FwH#z3f+*OxYr>TO5 zVkAn)EN*p}ogicRBaoaoN2&HHwkk@s>&HeMf?|uQq%txx^_)!5u&sT0^HNx7pMm2f z<(=q2RJ7QOJLgU}jt&YMQOb(__vb1eA#Z}ZL1d!t8T?Ma03J7D%JBKD`%;5n`u+WA zd`n=KT6MNy)rx|K=(+UJz@4G=n z<17(4*S+j>`a`LsocLZ8^YDJH^J9lX&?trHnzIK-!b0Ekhf}_4tIaItS7^c{)&$QA z|Fk>Dx?UaSvP}d5`w%rbBmx-m33=kaOEWVTSFTqNq(_M-g)CX`UU+E(!;x237wYLe4{ z#xvOzn$HDf0{WAU9vDPeb)U0eOE=jsg~RHKFOri9w&NwW-kZ-)h^;MzyGi;Q@Gm#L z6C?A#RK-9jIE;dI$VbV$;&J__vo7paEnR(Q2x15oAGIpAJA!IF`}+a~w6#^X-|9cQ4H0355g^3#jX* zA|*io_|TD|nEBj@CTa3^OohLq8q6I`u4O*DMTP=MkIFySozF9{0bJKH>dg9jxfD+U zmoMt&={Hs=0&>^x@4P=)Dw!5CmX>i0R(&g$fS>s==62mj$6M>=CqkLdUNL`|LV+rn z;3kHmxL_GFO^Z)6h45v&UlESAevnAk{dzQ7s5yE05hREZb?HR$1!C5FaXMS+?{nV| z2y1QLKD(jl7&S;4*v?kj+{PG@%%&M?O+@%(!bp?1BuKA52~XE!7OkJzF)(-Qq=`?z z*}pLjHqnxsSa5p#(()_A31xH)C9_@5#@R%+EmRU-F4o9O+SZyz$GHC5{UR) z%+4q!N6h|POdGcnqTNq~PZN=Y+x{X%`AAOfOG&{e_Ul;q-Zykn{Vkzk9p+NaZJ>~B zg^Cgc0uc9|b;y*1j+@p!wn%4&&QSsbhDJHkq&zhY5rUMW1wF)G zWHF4lw(&LL;61X8gs+z|3JC&iYzoyae=tGi;_r2}w9*hm_b|Q#w(T_V*LLwSF}J}f zfJt>k=4zDpRm1tMVr^0_6Vjk{@0a=O-MVZK5iKDhdv!UR{=H8mONZz&qUPFuKPS?oai=mXT)n?X6mU%AduN@|bp2EJYLriW z_wfdr{~R165gHa&9Q5?KFCh2n{iJMsB23z8tkMmcrcuaY8J$H;UR^_jxKR%|zK^@& zywG!koBNJwSK(W>+cgl0doNaf5sdIK7ti8&biQn7^F#C;dyGpXJfxL#Y_p9=`3@|M zHxcWy`~rED4`w5r2I1}1^Ub#m*m1)RAAgCZXSIrVSAeV3pag9Yzng~j3*)8n#Y^We z358Mfowdf1gROERyCnfCmfz8pQ}@Gj_!$rMJ@_&3vND+(4!VAG`(Ad zc{9jGx$Ij>Rk{POn{Dk4Ux;p;){EnANll=C*uK=G$o`2d#!rhNnc)2imDlP^2gea@I}AYnG;vp(XPJU7J{)16jaWMFH#k5wYwla@hj z6$_P>wfEMUbE`8J4mhOTa-7}wn5mtC52BgkFhU>O12+O96NIETiTr{l{~L8{L%saX zz}-SWk5pG^_i2{%krmK0%L|(!b6pU>k~e=nKKJgaj7DIe0v55eBTL~3ULO*3l#AH=`3@`h39t7QyU`AzM@^iUu*oe7lt<^Zf$4x5-$P;k82WL#?j7M$0HpT1 zop=~)p$T6FRA4HdH(g~@5ZbQ*`t-AecqpM%TkZ*$&ef(nqD7;$#N9zpe}4{@AA2X! z3tYXL%D&LisP}gbz~g}&om;+)W|xkN{I25mxe{hA4u}oA-F6EU>NQ|F4A)*ztc`E? zcas&ff#5;rP(y#A(u}cTFH;`!#`PnHY9ha8l2QnyL4uUXS)k$PLu(7nJdr9V5Y4 zs-~aeBPx#$rLUs#nLjj{>z-QW94w~elD2@nMu^$CI=>glqR(G%!1UxpjV{w zsGxwSkMFjt&$1Obq*DL9eLzS4D&{xN!p#(hvAs<^+C@+13UQ7g+95%B#h!-UvvIfC zV9qhn&B~@R&5Ko>0E3~YtP$o#27qeryypmv5A=76ElEZaQ~b}ddZw^w28civ`h{W# z>p3wD?sn2m;ji{A(jfGbv$lA>;iL@>4Ikl5y)&Oap`!>+6Cfyt#vxNLML08w=N+we z>c5R4rIU%kR)f%wHaV?xHdsxjH8%2Z$7UEgU?>s*$g(j}?C@7Mve3GqzEor$-AF41 zZDhGDYFlE1SkK1o2wucLA6w`(eRdiBV`KHy)seFmeC?iOj`dnbGa}AgmV}_2l#4=m z%a-Ee0|ey#q}TDBGl#P+YRXh|ZLYQ721R~})t=(VP-eVY$+D$pH=Algk9<5Cq`@4s zkXW$n2}V$OIX?%gvkeOZcY-%iTAm%%J5&tF#g73neP!ZE};5fZNhi|S;wu3ET~SY_#ki4`Qn zLdgx6-73ey8W3cnx^H%}ZL2BiO_zXUQ{K@3+G<(PNij&kxR}|jIK;zUvT?Gfb+-xZ zmRH%&QyUXvL(RU~`}mFHtR)VhdhdE{8Zl!leM{$t1NALmm%}3?E84S{@Q@sKCA@cj zV@x#UoNbcYL|TL&LMQWUI&2#p#`TW=Y*T~>1o$6EH=6#BgRGo_=!J+$t&TwC&wl3z z*L6NE!hkC$xK@^z{Qj>a^si$Ane|@>2Qr53*uP#l`)sPdOC@a_ngRcoKl}|`v)(ex zhiry0=dPrDd~?CQcXJtv#Wg+rH4mew%CLQ)IF9f4AfnOnuLTd){^c7dJisV^GIE1_qx8liwu|e|THS^(_h|Fa2GZKSu)8Vp% zV8LZhw+NKc>XRr?D9~!iIJJJ-Veh5K)F%-1VT@iD1%H%ZO&%EYH79!MWM3eFg!2N0bum4XW9egBGSoYc~!8>u_#31K3L_tD`~cfFVb}t4=(*(H)LMcZL2vQ3In$gv-s4xI8kb)2P2jhvq7VVA!6*n;_0 zzdDS`lIR*r^j~(FWHXo2B(qw`hu?-7`MJ1gEsU;5y15qD3Doec{R)W=U36U==s7NB z0E?Qdvz4O4+mLf|@4^_0t1pxT^sKHMJa*2JqONC4u|zfcSll~3p1psNRO$=H8Jb%n zz_%xYH;&uS$b`ysXFrzez3Mm2o@+8ugNFcF7=& zbB}u$hOOa{vIgqH&B)L%1iCFeYL0g%?dJ|VTwDgf-@gd!P+L`t!+U;mkdBNNp7&n2l}DOjL*NPwnvuEdds$nA-E^6*sp zTB4nLrI*stkAE70el2;Sg!pyRO6sS=Wh^VSs)!clNHc;N&aQuc1a#*o0&^f(Q9QDV zrcu)0*GFVNd>hWON9BFE{C#8w_J+i;2s}yft%zjNIPm68;M0Q_O8>cdz2BBry8a zG^(i@>SYz)P6O`D?`Q#H2;2UsB8OD>8(?NKob~fI3GB}Z5}tF8Nd@QAvPYTBFI=Jr z+6E_|nTpLl7tjonI~%*AehNrKA!Kbna{k6Tf zlt^Loda+ciL=c%S4DTW7zg5p_O?XWVl29@wsJ{aLy*9{tQt%7-V)9bDGL4t zLEUd2JDE+3@$-oRdyg44qdL`t6D{+3+oJhxJo&J%NTv{=HUed^A*M-05V7mL;9(<@ zrrdP`z~cxuadf_2mRR{15mD|^Up=1{Ff+jmp7=;BjwEful}}qHZ?gJ7vQ!QtW~1#A zc|1kCdYk*9{Zmziwa)#0eZUIt&1H6tdBx^QAn_cZ&;QCD{+-awvj4KZm}QO4=Kh)H zlo|*yft!!4>gd*eU=3_oE~&P<3}ZI1IxY@h9{`KC z<&aDOx<|T!Yb!Q6|7$PC#*2P0H;c2K*nOc!z#JL=6wT=#vJ-ggQ#h7?)w#M;qSh=& z;{Ddr!#MfLP^=XsbChhZsVo zt}nYM!5cZne+=jWB2mzt-&3cTX3mpvf2c(WB<@rdEvX={7^|O#SfYV7yu_r#nNfD{>A0&e_qwFWHu0Qr6NY*f$EfdhOxk@ z+kuXos2;wSD&T!&kiS6v5;0Wehh1MQ4b7<19V97QJ_z5UTeW)dHBRy%)P~6I44NqBT{}Xmuu|p!-L4f8{sy z>R&VtL4zhv$LrnN9%oZB@om3f*9Jg6Z*HR}iP(>zFlH!TchBX)II3~bgGem8^r=GEp3@26dp#v|rSaxF)l^y3k?lHPCNcfLQv+N!V8jK(?U?MeN;f zv8pCyM-8r9M&;L}^JC#97f2a{FKJbm1)V;*BL$nBr^q|yGZGNHA;rJ$z@&L7%A#8$7 zIj$EzSI92Uke)_OIfPn4^h+H>+SQGH9{2RO?y;Ow3Peq`l42{8gY4yQ%#gHEPvP!gXm#S%%VM6N`cAFqH zsC>UKC}h%+T|1A24hO$x>?Kbp!puPVJB)f~-AZ=jeA-NSBkUNd0B9p2sD z2i{cni-d))sC<4@?BC!q?YEa@c<`E%>zf`5HVBklLFk)YRg1FHW)Lx@ofN4zo(ZIr z)K6WHvjd@Y8B zP3YijzWf5lnRCZ>pexa4x+p4!TtEScJcWQOmlB0h!~_2UgE{<7YZ%DuByqDAnl94` zk4Ru>^1{blLo3I$3pm`}u4+3_2}icU0L~Nf_1t2jEWX}PwS242tCqv6Mh{NJ%8C^? z>RZ~W;ewTVDgo1=#uD)ThcVwu06A2`k$pey7&5**%09|L@qB31b>52RyadN>}qhqg?i)eKmw3c%R#4gSi4h8G~OX&vJLE}EUJ8mY??WF zVV;xciLp@fCwdUjIihJ^0FiwSH{)L~c}Fq!>SK-Y#4Y3WuLiqv?q(iNEAgN_byd}q zuKTuC+24Zrw8XI8idYfX9)b-&xD*7q;qm>Mr{c-v%$1e_KOnoWdc zotRL2m>EixTV3qWjWDhFc%yyJj!d_8dgS>}HA7F?mter#Xmcj`oZY+saei|U^Wx?O zBP*HY^-p=C4wj7naB}~Bz`!T}{T9gAN1AB>JXScDcF%3uL(3ayhqx5_eo*fqN12T` z!QW%$6f@ZJ;r6mwR^vH7rBL7(9FOUiL+F_w8InJ(Jqx^%*R@6yF?z^y`3aaI=$S^f?i3VIToaFlV%sZq*fa5<4Z z0Ke_)+aK-!O5=7?E(%B;jN?azo^AC*G9zIwMQqhWcXn1iLto#ns#o7=s7NsBF6naKBpU~g&p!bgR%+_Yr`pk6 z)82Zi+G>7hh)L`DbfPFLo`iQi6&D3-dmj6GlUap7w?jD=_hFQs4Iy)YUb`*m#x z|K7*?owzK|wV4Am>bxS~#)OOv+TW$3?~&IWSL=1x@+CC2T|~qdpb}N+=n~IrFrb0w zpsxJ8-21j&=G^yCt*YfAL$6gk=mYAf%z_%X-I>xF1CKcmPc1si1M#Qzx=V?9l*d2> zSLo`Md_O9V`@9Vb@>8n#PDl4k{e`1dzu}98b!)o@lMV2V%Z9&L6+JY*MJgiKtxMsQ z>2oP{(5%I}SofNH_U9hcG?Z(!k+WbYk58YMxoY0|pwRFbNP0|}9n0WJmRAg4pG@|K z=yc?At}tKq0DE<)cXgxH{X@j`HEDqlXp8?ycsRGPF}1IbslY$dZV|pJH6GsbXNy`l zl)O=x!rAk2!!){qAs6TIjHC z&v;j9qr;L;ME%td$t`F zFE~0vC4UNs%l(7s*&u5QpqC6D073Z86)u&2E@5IA6%L+kLE9~ElVn^c6C!v?1FmrK zh`fl*%Hu>Ujin33V`d^kL@Zs4_hSNyw?2ab1u4gn7B8SfM(bcWMSgPa#lr7XHrs=`K|7-3aC3s7F6Qb?f38;}%=@76lZDf#ejckoq0&dw&*z9EHf97wzxm9gcf)l|Vg?WTARz zvbWII#E*!6h-RImScJGW7AC9L*YhhlWOMp-`=isP;@iTS*>9ogzwRjyZon`_szH@q zejsPp8-Knwf>#wzcXhN{VX__(tc>x77k{^C@cq20KD7bitE91Zpa!@YxSxC?*^IV{ z`7t;+3X~3+pZxh*o)3gomQ~9xaTP%+p6FV(?d1I1!Wk+m&A*|@?3qjjqdhc;M94AT zJjVqe1)mdDex`a_9MP>oIR6<4kN?U!y7xc|VjJNm^`oRDfjuW77Pfzu#0awOG{K1B zt;HSG`$PkJPhDVPTQ*oTnL~nLHNW7nga#!cB*dc9uJB|Bcx~zYJtQo?>kVKk{fAnK zK}D6J3xr*U(xRchk66U!6wjW~jUzY{J~9@JwHeSfISWyLm`#A_Fi`m@tg+ zVEYkDK3#DRAr==FQj=UzKwFISef!>L=dn2VP4x4!+e+95 zF=%paKKDo|`~MmL|JO%|&cDdK5^3uHmW*;3P|5@MD*6yw1fc?06MuZC1d!5jriGy4VH;e?@cR7jUM-vnNXXBZj3tMaJ;#c9B z!qOswf!nF{1-b?X9pHPonP&`e!V;ps$v$+0hRkdy;g-1b&i$mKeSr60odj;#2DJWO zBRqEM6t!)9(7ec8(mlOzKpOcj@(dpUXFLJRsa*AU{*O1M-YUn|3F$b%I}|tL)Uy3B zN2FnEkfys~GC#l(zeflbumCLG4t-PWKd`$kIM!TXX;#XE?re|nsn8%?0R#3jz}J{< z!C+;w>-_bT>_{t}7}aNtI$N}LU~#Wk*QaoGlFA^^SL(@is)_J9m`V!XhtfT)?W|Ds zO?a@R9p;8KT4V9%|L}E|QBj9&x1XUyLb^lgMx+~5Qo0){>7lzzk#2A(=`Lv)Qo6fC zx=Xsw->2U7taHx$k+o(m;LA1lb?v=>+Xw10;(n!QqPxXUA=siQhA!n?pIbv4S`N)OU>pAV)h16i-&Qq zp<5x%@g_(4f+%F8N=}L;yr-xip9ZyX;lOAsSilPhUGcCtt@s$4VF1v#R(^}WlUT*sIA3MXy z^b~&CbTwj7UZx$6LFZo+u`U9y<&-DzDivK-gfRIZgn7f4<@MP)dP1 zSfGty+YIjaAbj^0wXJUVdI>KJUsYwLblstR=W}i`5FU0*ymQGp6&=)@Z{;O!;|n@` z-#@|{Gk>8er!2#L7Sd&6pup6wZ|Je#goM`qT5k?F!L@~OgQo$#w?2HJVjcj0gHN^`{k4-}1X2@0e( zELZ{_&WWDM8F?iwg_gVcUK+)Je~jvLSf^obHTdjUG=W_ZJXs$0`gnr7zuHqZE!PYW zB=jet_r!*d7w$(?;5W98_t$~gao{!gji?v5z9SUcmFF>V%^L@ZatA#i`}KUXwUdwv zb%vz8gI%H)L(Ke58593@H$xSvavNRMSCT#mH|!F+-?!3U=oe!Asctu#b=h&|=X=8M zj7{Mw(qU*)g?T>sJBj7wkEvN15kEH}H! zF~Q0MK=j#05=rap>pVvNzNN2sr3(|e=~z$ksY?g@`joO5v)3|x?_@fDIcH&@ zp{a24A+1Z-O;_>3koY`oI@+YxbS3VKm9X%l6qOP!pvj2vCgko>&wsodW_zKT+@S=s zjd`a?%CKuHBd)9MFKTG4brbFn}E)+)JI42p5 zy{!2J9PsE5Zezz-duNVPgG9{zcB%~6XSgU^m7M}RSb{wpt4=d~o*%Di`T4aQ;hiFh z4C~E=wmZr8&`K%U1?baf1K^QO0U{|RtzP8pcq<5t{RUBYl+p^g4gR;R@IM_Kz;4lg zhCgQiSDd~PNGg%N5rR#rE*KiRpcm?UlqYxke1NA ziJ9CKR8Q%BN&y4A+BS-e-~xl)@1KLc`)iNG_Ma*LorFES1J@IUgjoYpJHvdz>yj?R zn$c*)vI8VP4XT^y6%(L%^by2}$c=_>#}31gqFCa-mC@@ign8tElH$+kciagpzH z8yoDRL0Ek=RBbgaOQatuYQ>_z?hS1gZ4>MjnlaJqB^LH99foTVtOQ8oGIp((n9FMU zAMa7kjVF&THpro}7}2~@jplfKs%X$}73;fDz{r|^497J>te!3cH!0S_&l)o^Ux@J6n<&Cg9R9NlNF7hyBMn0dfTAA4t8;dc=$HPm95 z^Fp-W+~kLV z&8)-6?GS6xY~lRT@#NPf4GSoSjLiCA=2W$%An<@F`jj5ty8I=7^qoU;1hh`qZWoW* z2b&kuM`sERt~-e<0bs$u1A_0zAOBw|YEeJDuos(r57b*+?=Wb&qh}dYHe7BSMWAB+j1wt))lGOi89c+3f-e4U1QkL% zOOTrlv>7?=4b?&6S<{I94q)=PjvXSYEetZ4`GjzK<#Z0QPv_Bg;e8k z_~XR_&Ax^1W^u?_?guck`OGx-+b`FHO?bDi{8_G8{)g&g$=5#OY;74mf##^}INh5W zY()sKQ4tC@m`PC}JT`Mm0qF>@J)Rw2=gPD@03LF{XJKBXThAuq;#%fdYnEAB$|4iw z#kwxlga8k_Ii2ZHsn$}y;Z(LeBX_MYNBXBr8Fh9{9R;E0b+3`AU9bfAsV-Vd z!X2lpv3NEB7q{?vgV)Y==W$+@8`q(@DTlm<<2`1y{^NaILPG9`S9pts%CEgd_C-G7 zZ4Q=`Jp*Bln}rBk+2+4sJFR-W(T~m?J1EWNYIDDbDOfOLzMPa(Pf z*RHOvgs7FVtTgoeEpCFkohFFQNUC*@aK(W@#)0(+w<+n0;Sx&b{r!DOFVaa3Acsp! zNCc7uaSFN}ml}Q6z&1BGZVmO|CqVBV-`KY%i??+VUaUNAva4V^4O*y#jB;;~oTr#< z|Awbx0fWJ4C^J5PC1;R(k2QdNOGCfha`C}}*lde|(nxnkT7WhKz(FjZE}$+98#UQV^*=76|9ZavbN;G*06+Fb0uS|^BukTjcvTgG;G|Y~_=?uHnm2~j0m4jt z+@!O>M>M{588@RtElGi8+j;YL)o3~20JF~XZro7@+*dON?FpY3rYPODjL$+nB3&u+ zCe5CEDe4y0@ljmeMtt`BdvmiY+oRlGQO%*Y0CMmLk%!8boMQ4m7s#ma^*X7#ZXwt8 z(lR+$!DtE-mxX+dWH3V5Kvsw3;f1CF|L;m0O^TiNM)N@pmP8p`QRT9rAPLjcLCNW} zsPx6H<@(-%FR><5{N!Sp-oKRvi*Xh7>c}`nYCi2EB?cg-1F0?H^S;NXd*%D$Qi~IV zSYSwINucl z1o8}zC6^N#sh%i!NUYk73AK&(ziWHsF@)W2%UN7I_U-_^)Tv%W9)6n6lrt5C=%L$R zYIyY<(AzP{O8&GqWd=9DC|=6MWx8;mbdZYl?Q-4FhvzXlYMQ>AP`;ljTa+P9iL9cb zqwCupe|^ZKPV98^Wt@YtW@Y2)a@KIO`2<$A(eV7dHq840J=2d#RUt1lk$ChckOr_r z9Qid1aaKS=5m_#=|DB1CxZ9wel1Ym|^Y{SjuC-=S=I*j_=cZ(<2c|FR;bZ&l zpauUuAyY+_Y8LHAM5;{6*IaP^gc@^K*Nri9BRe2W^7EeaP3LIH2u zA}lS~_o-NFi>f^>*9W|fz>Lf1goI6@ttHP2hdFJHJ6O|DRDQnhna*>K0pJ1T3?jiI zw~9w)Cg?9dShQ$ykhUMDpV-Bi*TQeE7}U}dZMgREb-+O2+fQ)D636N9F(N2wC{Y*A zprnrVWDt+Y?=HfGRQsott^){sFWdMlYyLFTP!KA~mKlDE1M)g~^)my!HLh|cAEwQg zE?%GYY1- zGO6-(Kl;JnJ(>@mFgZa6?bDD!i(x*yblm1oZ%c&PO*#i^9Q7{42X9ndo%u2R8(uyn z#b;Z};}Dg-6&kf4>u-guUBZXl>*C*q+%RM;%wUqU@)~`SsKmP?13MDqt>u^Tf*c!u z*)1p2n)og-PHK=RwRGYXkss67UzIaIdj68N@&1U4a-R5R$sX{Tj*(~m(4j9+#%C_5 z)T%q|Pm&zVLb*Qb>9+B(KNjz2$LPG#gEHam>aIEVnzO@2a9htLcwX)^QpL4@`t<1s z+-ub9p59)$ckhA&`B^^`j8uV0ua6e#ADCWBWM~@t)V}>Z22v`n5TM7F7mE|)&=0g2 zPRsL`ZbHDV-sC30FnhyBuBtssWYhISToEog*s`b@aqHZQ`)56sb6_WezA#Gl5AsFq zs4Y%vtz$-}MF;sFGxBJD9%`k!tZ!`KPY2>+w?%D;3e5zFKDXHcZxWfWM?OsxZZv@) zBdQ>Fuzm^lR4th&XG0oR?-_Vk9>rt6+N|0#{xXpn;U{2&t`Jl0p(z~dMZB0YQCSQ@ zCoFCR0Ah%l1lyV~v^^4yj-hgl8OggELu7oK5=GuGLpILb0j&THvgPU6l9IB(*__@H>r#Z0rlU>l4-)x3WbHcFwh&{46h}#~!NUr_iq^ zj=!Dc1`uGYYPNtS-bB>gnJgV^=T{XUau2}%3ToQi++3TC7F|5$z4!FI4Yvo02nBi# z;{CTL6Z~I4(yNHIIsDBHFq)PBgMt+>nd|R6|4h@Bd=kZ_83JSuRg@-=eln~D(=($Z zth&q!-_S{;zj1Pd_*kAQdF$Hu$!zT-$dzg$oWXC!XhB<7k z#72_NCX<%Esm#JnF}2vaOGFQi2WIbRM(ftyfxQy-4Qz!H_|oxG6uc-yb074jxAH5d zz@;bYg(UA<1VvH2RdiiRR_G>fwE`>RUb5>kryY!1M4py>LAki0a22*-><(n!EMx zu8ZCXBBw34uiCyn8z`DOb>B#_2FUbz?Tt4iQ6m7`(qf;DV*~1dh;lx`T8n37{Q0xL zv-K*TUjza}UqnAPn!(RcHKv`6H-S;ZH0`TW+iW`Q9*7`9__dUM&=8vA#5#>GO~uND zR%g&&8sh+K!+U*D+k**(?Hbo8fC;g0&Ko$Fr3DLEq z*k`_6-z9xPxGZ`5>y3?Ev9lc|Bm2_ohlRJ8sJ60cBbT#ax6Fnhnuzbnjcg9d&i^C-J?;-tJe%x_ll%TA;lefFM zYbxc<6DteW(#l_@L3kei2x?0E$C8cb6TG@+0(#+(l_Rrm6|8Po)^s1!W|P`0sft3$ z1}nE|W$yFdzW$C^-0C1g43SxBA&hFCe~+R~_qE^SeEW}2oHD}anM>sPSYq|#FV{Z4 z3MA(>S<-;h^r7+5{mqZfd)Gn0E< z+aH`l8Z~)Pd=_oMTAo~TaRbl!D#g_prY@d^ToLLT14#K!4jbDhc5J!*EnQ$f5CJ`_UxE4p1-4Z$ANBt6E z5z@Fuh~mfUF=158WTWeon^RNQ_Ta*NdTFF#RE}jlV|62&>T;7~RU0maa^H#0uGYu} z1Hg|C_GRGgoi{-7NTQ?V)x||(Ipikqp6%r27ET0Ezja)(Bb|QvfpC|Y%e?TRktdRr zSHp*ok{xsfgZau5W;o@12-Z(O$d10$dFefU%6w3WytwO%5QsWIQ?9QYupMl(>i6uE z%IEMym`8kpLfErVh#z^|Zp$=aNXQ&$!{pi76SBbcl(zs3OHKuBr6%iqL&#+R=YRj#p(Dmtp%DjXXVCoQSjiJg{XMzLEuHDnGIb9r9yo4%j z9Q@>=OjHd{JLQfS!}SZ6FZ^7-&>XE=WHwU*;EmrdE^s%;Y3aM(Pc4R9i;RZzzzfOU z#N>L1Nl=Uy6O&Acp(_-#T35qW-W9Uabu4x&{2Zd|IZ41|r|b|bH2(b3D+2fLy07>) zsnHIP5BGo8eY#^1SC6)87J?zFa!Oc5FzAw4`L&ury)BJ+fG^tGClzAt#`D2IZX7Zc z3vE$K8G#fwk(+QgEboaPZP{=%JX4P@L}f>x9R`UpP6*%pknOq7D?HDS#0>#n51ok^U{ZJ5LPA3q(qVrF;8L87#kc{Ul`RGKq~rt8sQtR&<_7#fZN)Jai*rUO6q2dc8u#C#STzU~Q=d0}Kg zNjRh%%$IpAo6P-L@Xy(mqiuzN7AYm8*rC4v7ttrZF%MYHEa^y zR9B_WR8m)>A9>C5;cBS0TC|sNLzk+ExsmC_eU)$xMo%^*0f{|*z3k)R_mVBoy3*At zPm)RBdvam|R9>#q{ukw#E0;X-Syw+G0l|CTu(_lfYklqU{(A7I<&5SE_8%H-+0SuD z`~-`*Z&_Jb)GmYHXiK>#gxKEnV%%3^0Ll4p%vLUVeS08#kB`^u8KfNsaNkK#%!TZZ zLrfsv2P72-Pu`Ud{ZZ}h#hsTy<=q7SB_!)uhZ2Ik_#ySN_?Y#WpI#BN^Iawx4GTN8 zH~~gLZ6}+~0^pIy5Btw=>VXyNu$lzr-fkx^P=Wh+$ul7sEJOMw`Ws;lkHnxK^|7c znV$7%?d%%wqSfalAJhSyyU57MUqrA0%e%7rmAGHu2$_9|!<60z=w3}8Y(kSMzHd01 zz>fKMYiDQY!${F9$h9rh^L#56tn;Z2M^dBOy}bcSmJ9)_PHxX9-_pq{XUrv4WRcK9 z_+ATR^60(Nk69V-cY+IEoRzITGZ)9+8vl1x{l7Q-Y=7SkplSzGc_{mz1j2Z<)7mFd zrGbB)RQ<%%CNC41mDH*DSXlH7+>I;Kk=sZz3}03ZI*nrW)a!?qsQRMsCD=!c**ATB z?*+4nk7*c$si{-V?KKTjjf!5sh-hy|YzW8w?1UUBe6})IIBvh6Uk&N7KcAj8oZG=C z=M&d8s6YwzxoQ+Q!nqj+=v&EbaRAEv#Yx*OJSIia{>?_n4zlbA^ClmIRCq-qnMgA3 z53TmUt`^o4*?OG?N07;cKrtM0HGl2iUMH%(deJXmhxTo0>M{B?|EMT-3PVt*^;JV6 zcq>+!H}h3&`JW5B=TU=bh9o2isPEfd?>*ec=YO|I=wRe;^ow(1RDJubr{Q>1{qI%}(1*2CsXJUvJEgM&|o)vpV^IPgpuWJb$}Wc23#d_ zm|TpGldmp32Oly-C)NEH>1#bZlnnoPrw?!TyjnF^AVN26b>CG_*||^+?Y;4WlK|32mKRf<>~;9K)_gY2yf;;W;oSMnZ|UBf%{ zd=Cn+fh5CYFPTAXL_0lhU}xWHNH`90eGb00u~P}_g?D(Uigk$C9H4bMt2tcKcANj| zfWZ-^%%;Hg-i!(!Y#I7}OB{z^dx6duPm5)Au+inG8DopwUK_nMBTr5#J(4^GeFXw^ z^V2MDo+c_)xxm zFP+?VdF|$kA5RjS?JFAcn6q*p2wsl}Zv}$i6t=-;cNE)@@qGqMDu)^JPu4g;10}hI zZlrJQpyfvpM9L?VD*0zQwWOV$O~4Wpf{C#tYye-!64PusEK5v_S71hES4eEYi6>vy z>TAKnk^L~O01_3;I)orFYW`Nm@iZo3i|~~9gZjr^PRd6d%*}y}u12&U+j(8Yi08+p zt<{3$RHeuPM%Nwc*G!KL9X=IVMiV{do%jA!b~ZMpaJ)f*fp}AWo}I8@MW<6a>-(#t z#BbJoPww;_G7Slp9NETp2-JIgLlO+`{c*I_0NnQLw{N&j<2VFclxM+g+V15YiW#^@ zv@9}T;=BUWrVi0fnzuoE&%nM93O8cXilwz}sn zR%dYXi%<%-j5O}vEO8;NTU@R48y^PU{4skeBDTIk`AN99Fm@R0zc%ZCx!3+Z9i^DF zRvUmOvGIizl7D;-@&T`MWrTk(``?rCUt&`s6ZS)<*AqQ!TK!M1+r3^&(<^A|)$QwO zA-1Y2jH$+Eugc{@9AtIGK>M~0U~R5zCBGe)qEI@S-9w`v+r~tw8UDvsJj)BzWW&@~ zNbZ}Cb=Ft-M+qSipKO;J#CsyWboF10@txQ5PTi}~u3_Ja-c<^nu61!7 zTIQCIrVC2Jo}YAG4j+_(XCtU1=@UJiOZxQyrH4H9Guh4^ITi}$DOLitWZ2?x+x;;q z^`=MZCx4^g@r&TS87FP9m*7UzK^#j(?3}bVwpK+niP= z+n3-6tJB#BIX-!BhNuX_A-qM?cPkiLc^!>mm<{!1KUBhH>nOuhOGl*QBW)ai-w-@T z>UerqPRB(+*KZ+u99KsTJNK)it|s(9Mi!jNL)425`}sA=6D%??uT|692>Oa#9cXVX0%vYx!SZsfIGH7l_m8;>0%liuHO5Omf9 zvebTmel_Za7jZvras+bhn#Xn2W$#u(w+8;q=KVRb+8QJE+7Cj#X7UCDue$Pd++EH> ztgFI1O2ffY^aV1OOdvhij0psCah`I&?f;d6e|LXH3p`wDZHAMWm+Hz`5KcVEqy8Ax?}Xu2ov||{C0-jG zP*q-9@QwBTj!R(X5^lp(RSMQdWrmaKI5zzO^l0k+ES7M7xrr`;{^)}?pFIIa`WrWi zz4z`*jzT4rM&^jcuj*Cc3U)VK2KhzY&!f8cbr1-`q6i)&R7s zDKcjm!c{AH<&E3Ow>!%nL!K#}%GMRFihHMYclpSyx2jZhlfBCs+lC!2&sIR<*^fLc!r~?9+2Y>s+<18K#!v2z z=<@auy>QeivP8aDF1Hi2`_edd;nb!Fl8CcO%0 z=cu6INGvE6Zp{#pSM>ic^YESE{Zn0U(beIi1}5IxC;)z~K9{l3g*6|%_j<<#v?|J3 z31lE47Q-P({Zl>vY!M5@?M6i&X1%)AM>pG#0se1G=`;HWm&?RQ-VWRTw^Y;I~CUJ;Ywy$V9Y zs#?XBm6wkh&yh+Xjiz@#Cl}f07Odbk@cVdY;8KgR(bP31g~+Gyezl}LR*t<&$=QK@PFV=Rb5D#8Fc_6-XP$A zHFER%@%-9dboGu-wRyrtDe=;|XG^C+u%@ZkRkUJ2_-G}0M;&a>Jo%(1OjjH?d**Mn zE#&z}>;7pu`ufU{oF`=(p8y^~x5fY7M|#fp#@ght_j<-wck`%zrBJC0S6yTKEZ>AL zv*boRC;({hO8=G66RTLh=q!1r=iXiYTcaf15S*l&PS2ddQBw1M)l*SQkdQhEXmml zu0S0)@+)nWfDy>S!Abc1{zl!JA>y~AsE8XaFD`3l3;Og=Z2bi*!-Q7Z6(rVwev`98 z51h1v=OdU!3z9259`rAF*O06;Coj3}5}0;j^tV4DtrsV-8;r@0u8Y^LumYisSS7^j zGjxeAQBnFdONN64SC)7c3|iW1gAwm_COen-xE0O@iJMK=Ql=*cX5}($bX4n1xBDwU z_Nxqz%JsMvF}pmnx+x0_Sil&$44)d^d0~OCWbK;XV6KrZ@!?}z)t-a=WkBCY`l!W4 zh-OJSsHxkn;-ESVhVrYllzwfho$DXGEl!BOVqW934{`G6DD{kfjFTk3J@3&U-uhu~ z{CkkU$dvfIfRSn%_*l(L*4QERDQn;p15uy7S`i&G3z1B-Ll#3TBUKoMrP|lQiJ15f@ z?|vo?)ke*<_E*h^5;o=s|MXB8i+>+2Y4J%AY+) z0fad^GLp2q-U5_qOOqWHIRK0&e0iO~Ih&H29CI)@U|$Cy807I71XI0`k4eHD32t1h zwf=U-qG@KeHkCu<2XVr*I!>7l$G-uIO9VQG9Al(tjYj+MS!@prM3r2VbZkJdJVb{lm9vH>}zvrNMWFv%lCMf4i{wU zRj+H-`JWy+n7Vd;wry>4Ck8%;O~hOU{SkYrM@|%c0d?X2MsB+fd*`(0w-%X6-?9|} zV~xVzSayy`qN2!?v_`p(ai$=aT^sqxuOpV7-|7%7+sJD(?uLgKM*B-G;n^k@5#dqR zQoVigio!W;XRSr)g7u48oW|RSP@CrOxD@%{A;}}lQ?;K`NG|;jQTi<0+Qork$YZAjV-Nh2rCFzcnW9GMuFh_e{<=rIix2{lg&td`!o#b%DfvI890KCH z$SiWkn%Qw!jx(@N>(fSy-WfGwfloWXQWz%Mnqd8Q74!Gmt&DZGeLpH(a7W=JV06I= zynVS+0nu7hr`F1Lhx@p--fSZuVo%S|jVc9R zpyJ{oG1)M#t3S-$UF|F+fMoUitbBLT#A#uGe2DTI%m0`@wHv61Vl3pq68?FR+u_9D zKfJ~C2>d^2Do{QvogoZ;AUbo;<6xJQ{pPp@8$6Ka<(sR%0igi)e|d=nLH0F1DCH1a zlCNZvoSk^pi22fs zxCc(8)`@dOY&R6n!`iyqW`rfKfB8Ue5}VITKyZx@cWranKfsN8H!EyGmdMe9RH7rm zHv=#gDO!P?uez36KuBKT7WjMQ(0Y=kuirpa^nGIv2YCc{%MQvvr%*1bomiVOH{nJU zIt3E?)<{WZSFd7mq(3rnlR{V%#)u*-*?B$LoO(a%yIJ;tIbFF8#iB1wa!JUdY;A!^ zJ_LbHuJ=Q|MR=E?<>|LiJka?1!{NBiA-FAu!IV$QHJ#!dou0u zo;X*VO_UBO8uf2{v142aT%wzq3b}hXGe~#jzd(<>$VLODi-ZZFwpDu}JJiNTaJdov zx)O$(CO^Q>Cf+?~AACJCLUxl=8b~2~8e7tXJQ3ovOCtVG_|oC9Uk%rTDA3<2B*DSK zOfftF8h>|2UhBSQR3!dyMJ{oz{i=d^oepwf4cYgzO9xbuKkc_b=_5hbwE$xC)&8{H zQ4#pE*157>hoa{6Y!jI;$zjUd;h3cq7-vSL1m(g94f&-e6r~$_TAKBC`iO68*uiE3 zr%!;XyO@1_If+&G_knt81@D9d_tanuQBX+lNDI_S3F9=#Whn)ow`Zf_u;id2=`Q7w zkD!;Tg^Yrte97l8T?oHGy+p%`!lq(_0~RG0lVUfbLwJ14M@2T;i;uqZeBJh}FWf`G zrWfqe1AS0Hey9lL`d=QEH}U`4qBFgzKi>WIaAssChvYGtGjee z#3*EhFmc4P*Hz-Tyh>~rA7gt^VNF_mRVoTP6ZDjy&D~9h#IPY zRu8-B`9t;-<#b6KN)yh}6TGe{q2IiFXt#(W`Nom$`imCt{xwB2 zVb!SOw$WdwjWDx)OJZU??I;GB5}RInO4G#i*K-$}b{7wyp+ZO7OBpjGakt*xUvh-~ z02?n~GGVc~V#g2_B_F%sH>U$rz}kn%w7$n{QPYZ->FI`)!o_5G^7MyN6kZZmYr?7) zML@!9eB;|=`q=09N-(Dv*BefNHOlYBsmH!kOLE(%Undu(mypqzGAo2zSArS&HVX_PY#4}s!jcy zZ6%4#ElQhKAB4W@W!B1*P!DGdpZx1n$hZGA!gSr#E zZx>G!<6iKt!j7z@+Z2@1;%YD6>uztMb2NI!)K9^ak!_yjjU3&$n#^iNUHobJ1aO;N zZyndcd-I$sJwnSeR$U%i$wFcymZ>2(ImPdOSl`KHwIL#%3smS|wCl~oX!YsbN|?cr zKaaO!`7ahzSVrGQ#|V(1SO%J<=aR}Gx>-C6O!Jsc#)NPqblyR5MPM{AaWp=i&(n&*J zQSK^YYuUT9C5@n=Xaue4Sw?YYzt;DkrogV$OkYqL#&X(rDWx?&o{$X7!#mDF+jsrN zhT!hb!D?7XYbIkLLjHl1ZPBESo>sBvu|?`C9|s9D#H4V`%JW^Ks$2INn&V_{ZUZ)E z%ME-iztQVLtqKES?Ka<5Q&Urh&lkVHv1=I7=-bSPaR*AxKEa1I7VwZ=z;RppL`T%XR-Ya8&#YBRJutG)D^iE~($443#7BVr{ z2DqS&jSc*6Iivd!LW39}Im2Mvs4xY?8E@?Hab&>KC_$|V)n z5$Zd~>getO=SgQdi0zt-to3jj1A4sWRe42N5!xu)S2`YI;uP>FF*nz3dL^t@3Cx zREz0WuYu{tOriTFs;}){(j(`9MR6by@giMaUBng6r-@7di)aRiLFU4JY(;C-_1s_^ z@?+QaYCpS^#k?&Pd1UVXEV7{jlk~8qG)njkb~v`4Ga%l7aF((A6eOckZ{Q;%_c4*Z ztrBE>efdxV?)c}3HDX&+h0lopoUDvHdV8a|&_;QX^osj+4h_zUnc(U-Gt#7R|Fr=j z7*uKR77l200PY47z%_vz>!=e;{!n&~`WNDR9Dn=C$Vh7=(dQv3_j0JQOW5Brt9yJf z`z=ulY%M6KU>5~+$Ef>ERwA!lRA|=2K8ekR>U99oF6gi#Blb}H=nHSSZ();*7I|@IH;pmdL|l%Iu($3fF$Ab5gK-l^Al6uu&k1q--u?y zEzsW78Psk~%*?Q#i+~K9dCJ+S{WaCWR7uKOOz6h>&`?5iN*8cZdY)0#ssDU9#=tIJaM~$0sR_}Y+EzSxl_IcyD}I-NLa1nG0=`x&eWGC5@WU8HD$sCRK!M_Bh(~ zfU-4OM`u2;^_7k$xM&|)D1Dj?w5(kJPmH!D&w#fGSHl}UVQ+`V3|vxW7PhoqPNZX zvcuqf{5ef-&{O<(7vEGeoMYTW!Jj%YHI&1NG%rGg2$%P7(2(+9S6b+|w|hpu%z29& zH4Z^U`dIXd%fM1O)pUzsAivvLW{?qK3{6@+GR(RG24DW`1dJpC)qZ8*8ypY~n#PyE zz*7Ssvi8Myw#XxL|Fx;(;Nak4%ZMc&w*l4U69AC8z0eqq52KsC2#OAr6R#!LL7`;* z!^7&04F32Rv=7qQfJ>6xU_LD7w83Em@14Axder-yINNlpyigm|Oh+<<*%)>cfq;(( z0<(Q^NY+8 z+T#E?1EIKaxpig^EjLM_M#8;fW9X(&OJ-i?W0RvY5VM?xwf+nwWy{uhk{D^wfX(>y zEsmmJ!+}-G;mLzdOiZR1fCfQb*wY;;fHlOLYWmg78#6PO98#Zzl-I~Jl#MO+27T7% z?$6$S3dp_L^+;`HD8Phy8KQ29-0OTKFvM@!o_w+Ca^1PaSWNrU-RhHGSI>;00 zAZOw324nSh;eC!Q7ube*U88SNJ~xcRMy^vcJkL2Vj|yG4WXJ&j>PNelvm{4h(c7EX z+KpE48wE0iZq{DoV1=nz5dWFDh)YZQU0NN@jLvY?x=#X%O&EbNX5~ zeul{ecgTG2?$l#DT^zaOFuP3eUH*wu#8CLr^x1KgU!8ce(G{N`!RduW8hB9Upf`Q-qMS`r->23Yw zfAf~cp9Lso5Mhmx-S=9!5bL9#xQ-MEm0SVT?B z^dw5>x{;8YTEgv#pO&hK=5~v9A3LI_*EQr&&VizuFlN@-S;+vX{N`E*yua)P=pBKq zj!qMt5w}sl@FLESh=>R^PSg)i=Y#A(=!*$a=IU@>wXp&xgXT;Bm5n5Jd~Yxxt62PX z0%HF;)+dW~7nDL%Q?je%?-ThxGR?0wqqA}VD7 zu;Q|rHpw|lvS>9bB0zX$@{F^B3`kX_w>N9?Bf23`u$dMiT7X#B2sc}Vo3pUBxyb}n z=R|{XU%PdV89?r~pb~*W(*;P41hv+)J@mKxOflX-%H@sgp_X>DYx=&^*)N}jZ75rZ z?!GJ}(J2>3qLR*In`UZmUVMxAnkU<+$IRI4#n&ASzSU31)0MM2R_#d_oB5Yp8KjF2 zId)6Vq)~&4F-}iQ7f);4QN*OAC1Q)Lx^;a(F-JDkITiVy7m%|8wj%nkJU{-?a>g;Y zIrd2R;nOT#Vxxg5+`u?d;4yN_sj zKPYKNw{#mHCp60)3-AdBUwVo|k0TG_Dw>_vNvFW3=mVAvCefA6RN)Etp^FG|s z;NkrB|IJ1QTp0c{p)>ki%ft1z!)8PoaiHzJ@)pg%lacC46V7~*Xf1r7`#1yIA3D8d z_@S@vVYTAN?{4IH!tPh?8%8M_pEDseDvbZF|v99;xlR)+wQ`$aImNSp{2yEJJzy~Fko}Xl{k5x z>yr8?z1v{1X-!C-C0(8eHfPBCVO$co4t@9xiDL#U7r z0}Ngp3;yZrx%Y&ZNeAW4;uy-Q_m!xBUxu26FXzgTg1Ju@oS^lSUSeh3-(a=Lc#vCWOFs>=%N% zDmHUuaoE_eDMRP3R@9|Fvl9BOWVGsrJ}00MecX)GSa%dAbM;vVdAD>zGWd6-xQdJEW#8YxqHT+EM5l(`^0L5kfY-01BS346+88TI$2 z+5Fr2{Y~0dzB4-Acgqn$<@m{yrha_#*uAFFj!UASeP-3Tb1vEn47infWxw~LVct;i ztU9Q~1l4HR7D5fPKgF4^rPqB0*J|3^;mrXvQz7@8T6SHuMcee+%?>lYJvTeQlrk~x z^dpg$Psv0){y{D+gpQOylahlrax)f8#lT?M-rkq!6dZ5OU0Pt#N`#K|;i6hEBCMh= z7H2R7Ul@#^G_d{?ZVL@+s8lMZ9JoRD%>=KV)_pxmEw$w|Yjf>{)4q)h8J?YFTjqITWRNJ*M2@%VMyZ;BHj=9vhUq^Ql*mISC@82qjH_KN`n~iPqA>J5<|m=p zf*2nH`~p^4xzPc@?rqP#J--;`0(x*hNPd3>$W)20?%-~CCyN(l_|d7n+}W?$XHS7w zp!$X*8`RC)y9KU```w@i-wq(*x8m@a3;O7zhEU!?kKEH88Z8XVsYkhA40(2TIHQ`A zU+A`a)&dAX>A1K!HR+jpJ>}(NEY3 zGHNhpRad#1*cWlP^{Y+4k|amtm(wmD07j!Z7H3d-K!x|jqqfwX@9p^(6VJw($JcgY zCwdMM^C2RcZq1J#o|3xuLcDwQe6BsMkmNgX_E{Y3xW;NR~^Q+)2nr&2U*oNw0H?6Ka@W9(vYj0|cl+yco2oOc;G6h_C% zkG&o*XH93z$=#c0X^P+iK#iR;wGu63XRny&>4_4>N1&?i1$to#T zz7<k?!@)nRJG@nBGYjzuN&H)$|JwRpm|04j zo5LZ=BPS=9DCwi%syTb~W|NqBQOrhaHWuX1flj*fUS#g$*29@RmeVWlvNvxFV$@a< z|E+oq?Bz;|fDkf?5l;jzaUWkWCO!hKiAz-ieqY$*7~%E%;5)tNEud*o4M*7!jG^4Br5@`YU67M zV#Sc*Jfja;ODlrXzi97c5CkbZa4|2CFez|2_ym?ze3mGJQk1b_8>GxQ!`YQb{n&lp z2M0taI3v#xvEc{iXiKnn?_JyP*q`g9iiaI9YS5#?*1l%!exio+<)obcBol&8`^llG zc<>s;>GNWDvr8l1t{z~_{)&VJ1l@%yCb9EhOL3x945+}J=Ba((9ct>|`3RZ$zD@LL zU(K#NoYUaf8G80n3fs z#?zE$0V(v)Kcl@rnn3eu4`=Tb#r8bW2kfQ_uqG>?qg~}!fgj$F4s}tFd0(uSvR1?Z zRI84mK^0x6iFlc(r{|or(>LcKbN{Y*#HVPAlS~Ovx-WwX%-TbhFjZYZ-|_b8_%4t% zJra>!jX$^k@{1f+1g8jZA02EkqF+R0W!n;*B6KN@ZGh8YZbHf1KG`RNXz%tke5e}j zXqTiH(Z3v}e7B!3e>@z-v^Dtcb73w8IGi83@+Hw)g>i=w_{;fGzebjb@2VF$tKWFY zD%+a5g@x=B=$4JuyJma+YF5hCmT&_E&>8D6y=kqV5- zX%%Z%EF)2FBf-_nx3!rrmco@>yrXIT;hkO=miO(xxA6)bx7Pvl712xS_3@7Fuu?l&@{n5~?)%6!D8d~xSRa!%1BJsQ}2brX{L62)n2 zd~@#qUjJ%K{~_YG^q?Hs{=>2u>wxcHEz8NPwhN9L5CW!Oxz|dK9q|yrxN%HjKIUe! zJ+cVwR|(H41+#V^lfmX==TcnPfA=D0fBt)J4SIXIzePaw0@UXgx3==+b+QXvuf{&& zfl&+t%QHYIAqaMwLbPx;NIWS_JjAC~N$X%F_6zT;MC|jRe6lY*@U9%7z{s(UBfD>0 zh}|@rOXJJgOQ8Mi(@W}?n|AUs{R|FqL8M49GvR#C46)`MB}59?{dDu_wT^dSr^)M; z$s9J2v;Rk#1uEKg%vg$=hZ99dYk~%cCu;TP_6LPidW;Xh_rMkjC0P*nDEO z=1AK3Oq0rn;_2*6Ff=!H4cx)5z zj;{Hx&sO^IFjV!L&mkvoI5>Vn%X#}RA&#=RBu6zHnBgwpy`_W0WORz+HwfW`d`PxP zG6rY4_;wDjL-%BdV)NpK;DI^{WenTm7)P5_`|6zN8?{KcX4A?|U5VBtS60zCp8^XH zivaITOjcS2Ou8i#Z%9#2PA&Ucz!YH0dO;F<^j22(6PDs6X)@f=&#xUg#jfAb{FDJE zm#+EAr#uagPR)kLyVEgxR#v6yC+HUB4O`OoO;3cTX$n3vp~X65p+QdsE?a~bal6*t z9LgB3l;;wo0UE&ey+%I^^ID+30#LdKQtztN5()2bZ&kKR_C-ai&9rwD?W=R%yemIe zm`F6P?0RrbTE-4p-TamY{@5-V&~_(}e?BhV-h5>f4S^NsQzUudfuMbL>0yRjBx9m* z9AAlPJ={8A+S~Oi`CT^uIhieWSI^wJJx&4AD!L5w-qR0M_b+`et}v zY`qUJ@4r8rQklM*xcW@IInq^4q7v2Zw}je#`ZD)00nq-X@V?7*p0?-!BC>|sE?1a= zw2BwVTT;Y3)u#PJV6o2d!3a|>UXfA97wzC_Fj7{=7RILi=l>0+`cpv&ux>{GJmOj} zEF}L@m}C<;wySvH#m4{3+AFR&wZUEp2aszNl;D9s(Fd0~e^Gyag4gVgnHF7tF$I58 z8~A?w`#SLHxE;Au&V`t($7emG59XhIDWdb>A?3T#Ku)#CnnCu4nElA=LyJz6C??-o z=-4X-*P^J_adxW6W-GCU1XT{W_<0Vjv~3zgl(pd7Lap!^S__ld+yXv2n|OyrO7KZn zJmpFE=Wg{6$*#BOrx($E^$2;%goA(jl#fSL1!XCoUK9IABT829jc@pP}m*t8y-5on9+Yw*&NBFH&&rQ30du% zMD&dxMYTcb{7#|wW-a+cbUhcxn6<#?Rw3ID3ZxXm!=Ua!>^!^G#$hvK=XSgKG$z1F zL8v%r=499`Y&zDGf#R|er0Y%3R1Vtgahy2Ed4l63)ieLX{Hi3ewT!^+wx_3uF#}b` z;YYh68LtCo+wl734}a~*qCW&yAO#u&UgWo)*(==V`BP;d+0j7w1#}d0xUs=v^d`!I zaX`uCpRB(Zpt#i+hM{PLE)+yvQF~(%_dH@H0U8aO3puQ(>(vgWuN4LMygXZIRO?KI z7oDb@&fX*Xc-m#?;}$t60Ea>oY9#zq=PNSIY13-fFYmhgS!B~hHI?ng43f8o5=!GG zSe8B5w`Y{Z!C^J=i3FwX{PX-AG0)e1=!~af?>P=wTa;ZCM-x_=!L~M(08(8Q;A5rhJF@4z=S8fnKi}@*^qX3mq*APHO%HNjmG0N@)~mKO zMnxAMI56#}28UJhrW?L2%y2xTeE10zukuLXYT10am7ksA$#?P<80+$kH+jO0dd-a@ zL-HXDAf!3st-Tl+i2A7)_TX=;FTu}#OG6FmQEbX9OC8ZoV#(@y+GN{^vEGS7 z9sj2m1<+lufBbzr*lN5rSV-N@N_}7WrLLUZ?)9t=`@>HpaqS4>h|^qV4il)Iju5JO zZ{f;!&2p@?Tzx(F7(2z0=Fcmb4NPWc9Mb$@oJ= zFfPL_ZtC~(f)*I{2S;H;F^)!kLhb=|TgoKcr9>5fLu(=6B?c<|}Yc7W9%p>4@ z?h@xyUE5^qZ5j#oMNQSJqhi9ajWkD!x1goAV#8KnL#whc->ZI8 zeX3eqJPXRoTGlnENdEC4bBfk5Gr-5#eJJh0A#VLQND+rW3S9FS!`_a|3MV--J@>< zA7WgeoBCJJ{+jgwbV9xpXP}Z+<&EyYw&)n=%@#GW^)Ylr-ghhNY;Aka=a~jR-5;+m zjDGtvRP3;Kj3iR~CBIFa{r9dOaH{%mF-KDO+n3QA zS0&Uzg-h<=y4Dj*$z`K}k^_bpVka2#W*Y@>wLE*mhA zj{f>r{o>@tqVCs?Q&KcX){5u8dLI>3dd`uX6td@EBnt_cfg60u_g^s(GF~dRWQYJu z>SW_#f-_b_Z&=bmlZR0f&Lbsz2G~kp!1Vq4C`G)5E7?VWE&K9kFv@4%J}Bq+?=OFf z>u|hV%!;TFdR7<6&yQ0^1U4;Wl9i}8-y$6L7{(sjt1lkPpYI+<8HIl#8HV6fJ{`Mz z>dZPY7W=Eq07-1JI!TrOwRX!Wo@yDein8c8DBgE^&6_6`yL3glD$&!)y-ya)N47-s zcAlas$Rq$8LtVzTztu{GWF)&&HJ3u~@ix?6|QA=Xeauh{HfX0mlMqIOjathV)8+Ce5@WSSq-JKLHH;W9v`>_2c{L_WUTBwZe;4!cpgaKJPPrE8r%dm#t;G zeFy|T5~jNCKF#X!vp^)jNl=RbN@Q*n@MRkI-0^WU3Lk#;J=*Q&;jJK%(X8)#W5+e$ zfd)h`?MkEaR}Ejf*)MPW#=QwqMdzX%zEMQ=hf8q?fgOc0W)CN1s1tyYqT(xxAbxz* z&AyB8FdFXLS_(c#F+l&zOv>w!zD`5k0YX4xHW~RpeS7I!|D57X_PUfzVAFOycmbO| ze3WJSL1|fmj)3_1L{sTU$7jf&5X4Bq2drlW?lP=XCxpWrwW0WtCA@@xKf)a7>C}H8 z!9IRVrWQn&x&f0pK7(W5qPKTNPWG^l@rd?Vc_e)DeC-&Qg1~$)Pzn9d@o9peRYC4` zWFX1dokG$-nTog`Sd8JkRK)1WLbk6vVbp{C&Wd)YyoCeZ`0lb(?#q9wDbZCIQD%oR zyf5CspMzf!wRGT#*@9$~`;C2$gGbW&C2BY)^Q{%LQ#G=*p8Wc49+0@#&5efqPH8Bn zdJ5nCro?RHlY(QTmXhesQb|Ew@S-DY6T;LzH~fc2;N7bF?jh2{jD+)Q%~zbrLBu?5 z2l<$M+c(!GAaXqqn*#{ue8CFaN9)QI8p*vAfUb4J+&s*@h( z>G^YIXXx`3l>K7rx%bAfj_a5?W?hVJ+FNn7*D_E}r_EK~`5zx+G|coyy1V1ZuX3c} z$k=wpxFSABR3v%H%&f3X{(%FH8E=Hn%5#uD33b^5O{FGtAC>JSoulw0r)LED?;@nj zy+T6+H0jY)9)KS8np}t2|G(eIu~AFu%5~jEoU|p6A%;~ zb1xNFR#V=drjz%Zd#_|IdEe?(^ASU`(=isS9PNl^fWVOqj4es4q|2OqjK0n`vF$`B zi!QtlHfO^+qa|CA&Tn{g?dZI8Wt2lV&*a)sa*N~GwT3pNG991yV#0hZQy3^1 zX54^|Tz#-2=dsCSt7TuTiK0r|q6mRiQ_1w3kyl{3to1dFNc8`7%gC)~%eSF))ffLh z62jJDH;XyWb7~D97Uu8R(|r6B(q?7x?*B|3fHVCQP1j&6 zNBc+l^@~E-Ig$Ftg4TG6k4Jl!84stJBhuTH7rC&szlJsVwFDgt-S<^A8kztt8&nE(i z#D+v~c96@&ps+}xnXNf}MULIM$TS3bO^q&3e#SYZhZ1*00v>|!wwhnP*zFn4G__(> z4_38hqU+B9j<&}2U}^s2E=iIq`7#FzHboJvG3{NuZbjf%rVPm+o?DdH*IOyS)U;SU zE<$4-W_#o!X!v#o(BBJg6nQb$sur*a({z+6AQ7Zth&6~cuhU{_cgCEnWVn3JKBt5C zBRKQ`pBiPDD>5R*$16nA&XM`x)2bIjd6XZX^5;hCnT)k`iC~b8krrpc!Be1k9jJY* zqWdW)o`d)bjTH}@AzqfT7WlZMtAAfLla~J2!pq0n6+_nWeTq(2aWy9T`A;<xP*sMUDm>m*AWa&BXeQ7(`KiZn4+u#y8yI>zk@884A< z+3yr;-oBsdVPP2a@)5nSD6-ZP}Pb4}^<5|tB{$|gd z0Ndgd*k*I&H=jE9@UdH!sC*ZpxOF8BvOe*YF;d=^W#^eMMlLxwm86)n?S zq>o_?+3)@zk)C}1|4dT8Gc@Jm`s4Ma04DN@Q>=C3zmTRsA#&{bFb2ghyECQl#`u}6 zvu=o#zy6{SM#Y3T{(ALEyMPVttGk?qRv&+AvE1-~S$H*y|JDN)I_E2ubu`<9nzkFs|~?Fr%zW!|j$ zyixmUdJ5NwI)OGEe0YC6db{F&(ERAVwQRDP!ZjX^UJwKIXB`WY=yZ${tHVYRoRc@b zPcl|#YitwHpt^$=8#s0bL(VX^H_@!H822l_9@Gqcf!SDMlF^}1`c%D62lBU*f-#5& zz{@oo4H^QB+Z5Nj-iL>l1<{47dv*~kf|k}_Mhpv}MkX3Rz`Bw#!Dw2iWG!}+kiSSomZvv zbZ3Xzd_I*HdDd@1ZT6cOl(tE)?k9Q}+1-dG#zr@_mUWDH>8`1bw7s0NQq-A3l?*Kz z@jC15bv${vY&>)(^BFI%wgq66gE;`zOzXFY9ZA<@N~vvCh0@tl#;f|!H>*k)QPjsU zOUVkgZt$tvZU9I17*8}^RepC? zT`vlXVZfZVjIK}>X}~M6&$HXHMUY=2Gfbh`ol8)J$Z~Tq`AVz{TLgkGbLZStpW09w z{nE5=AWtq1v+lV#$0-UU+swcSJw2>FA#Q#;037H9hg!g(%WtO%bQn}bGpXurrV{}r z9vO+^wNpQ(r5ML==j|^jrJ_%KkI26n3eJYb6hFJZ3xEfxf>VUS?Rae|+kJIw&=o86 z_X@(EDhA$}ujo$NrQ3=2z%!xFyVYTdD8SM%K>+`+y#ucyOnPvcLnrw11Ew%i1)VUSx zdxLO|x140ViSDSBXTPA~zFS;4ByziFI>tZ?{u=pACNf^|psAY{&y+Z;=$U{spxZ7x z1Y#xLnig2jB;+BsHO|on192v*G>o_esOX4Dfsoe$|K&%&ddNek5-8BWKECrGf2b%G zQau*aAnCzFR8I_BE!iRULi*3Yg{K)H>4Veni!SDvbC9|^>s}0d8xY%J5iE^T5&as4 zXZBO0TfVeS?3tCJ{%)Q%fqZb=3xrMqrPWFG6#~_{2wvozci46`pfXAha>5DQVMna1 z;V(2*FFhqYco_L0ij(a+0iDD=frt5zX{?2qe9N@qeL*ha#@Vp-ftYv9*{_!1P0JoX z4B1*8KXqF1uDp*6C+AgeIeeq=;N~TRkYUeq7pihqxkk8~zG;=W%CCTt*TfBZ)0b** z6IqQMgJD|`#OP-UH3*Y-Gqaa-yS6()Z$1q^j;(cJ;9rFyV17p!@CJ^fjkAD4xvb2u z^3q4n!LCfI1ppDBL1WXFG$*B-kdn& zEn*9jBjOCw!afjlP=4JLif;}izlgt?0@CqF&ITlZ1wc_~14t?3&5TWO2JrX@Ic7{0 z#1t05h$?BXzV&sZtg5b#0i&e_fl&@AY3jpj!?&UtOjpUD*oS>a=RrT#L*O``SOlB4 zM8R6^lu=$Q5Tch?sPeUg&%CHZ_?QYirkUpkuV95@d4w$}>fCzFo(o*WAl1#8Qf(Ck z_s~845?rBs1qB7#MY*k^E4tY*&-d+;zArRIs}0rF%T!Dw49cFAk%pYxth5G$TSKV* znxty%D~>K!d@t48LK6+_9v}sr$n7qxSzDwSFszG^8d z@-pUVHaK&r(8aw-*L-F1Sdn)ebJZC-*O!5;jm-P>bXil6)aH^>)92Q(yaH zFGI%#@L%9EKWClR~6NSjJJhp5XlBRxx%7T^1c&O*i%zBu4sPbTOV$P?Sy{X zA4>poC5Ec|scc(vm`s=6uSJ{yB-mo|94)OgAaQ1S`|6e4)#=q*)Z*NdG2Zzw@}}MU z|7C>!IZ>|q1ca+|84&M{N)dC zXrBD&cFKa_rbp{P=c_klRyE2|J(3?RiJ_BtOa>ZS&19EH%;tFtBh5fN;`ex@I%9-H zPf5n(g`~V@{K=J`Sd2qwPM!nRH?*}ArmY9F`|Ym-+WCVHa_twzCTO+ z?T=)ZcM+NF{nU@z2xl#>J2tD!zL(3)g;?9{Z63>-yXLbAry&L`N^wMkzJ+eFc7{R8 z#syL0rO03eMB5&9O3R5|T2fx`dPs^I4&hB5&)aW9gt*8wZ{VlPPQ&ZLm5H)3(nine z=?hrFINi;Mf^IYEJzv;kcf)c zKy$Y#=p~H3ssJG|eb9xE8+~aYv#UZP*`S{~e#&Dgo{3147!>DsB`K+Oi6%(5@RhYu zsX0o`a6f71h64w$q1Z(>zUC7v(vQ>AXR3t@Wd%Po9{2KxA2TryrOoLg&@)0nXK(*di)?LOjJzi;HcK#wn5Z>CA{aghjz$;57#Y&} z9zi9%khxeE#BlLZ2E$z#9J-^B>D9R7TeXGeYqe(EuLoL@-lVkN@VxoS1v=1B5vGT4 z(}B!rba!|9-;Yf7cZ!^@LXGBf!yC4D_KyNMM-RA8-Hn%IKn6ab^?N|#Yu@{raHdc# zS9%i2PO9CxU&3CiJ`m#d>0s5XlLg))E(EWXe*Ud=)0p`b9Ndd4lkRn(@2>G!(0$Je z-u8Pa(^)ih;15l@*aSwE9*q8vC+P1Vt4#kfLGSyGwm822d87geKl!{-HAi9gm^G-Q z0d;C7a=T*w=Oad!2MN8Rio7L&t$tYI>@40l3^>7y;7;j~*PBs^Q-~aj@v2wPbIRb} zS;#5K6B{9e?uu2llZ$e{mH7<&nB#FD2jh5-Uv?>b3~oJ5P?D2 zgoE+DMX15WUX*ip#AAHh#lkQ$%hZWb@R&S7q@vcg!^AJClSBU>%+&sOp|CB9p#4EK2T*GAqY3TkxL2Q_8%CwoylGGihLG z$GT2gzlc+SohP9<#@lvQR%>}O&tcy+tVLf>So3pf{^0G8%6O9~pQpt{F(ldRj6WdHkztad~u}x3%J( z@DO+-C|)cMo4*3+TmULrOeef*YiqF-=@yvazefcRS@`++k&PkVIcWDnHa0>Ap{VQC zN`fH_VBSH&9yA!*2YGJIK4iXS(27ghm-cp0Uz18BDgb(iaFD zK*?;qY}IxcbvS}pxYC`OM>A0cG&YO@Z?Dmw;*+$QiBzOq+CB2VSneUlN6m}jxq zCO@wGT1mWR@#$Fkv~Q&{wvx!&nJNUnn9CNoffoSJ6N`*RrbE5J!G7Zs?QxXs z6yThK@17iY`ZZ9TyhWidR$ z9~%*2@+%vx^^Xe7?p>8~KZQ#;xC;)49bGL{8jBWdS7y$?^;vu-z<##@ue8kao$B@3BR|mzkqWTeX>?FprAvn`;c=wA? z<+3+85xpD&9JzIp%Ho2Q06Asr0x^a@0FBkNs!`~ZKbUX+Ib1@8yJ~-vxoi~|c zp&rZ>2N|43rL7M~t;B4FxW=)4vz2aj5p`ujE*(Ej-w5Wkew_)KNBy9|J&oe_*{YhJ zjPUA|iFqSsjL15EO}1yo2A!a=U^a1`8!4hO_e}wZ-OmL=WGP4`s0mD4FE4V7Jpg*~H$(BshUIMS=;mWvXmZg1HaKLhJj7&K4P7Q8K$ zi+hed<0Wk08YPht{yK#^UM5FVX2?fTOk;1HY9Ows*hKz8=VzBrq(K)zMNq)C9rAg=D*%v4ic=O+tEs8A8_lOjM<_MEQt#wnz{aWy!lBPc zzuhH$WZA9Cw7<31@9UY3{Y6`>-4OtupSST|g-@B!l<7{^m@BkVSlU<-R*{i0*>ImvMFjpwyo0Pr~#RGmBc_+jA$L6G`1$T~zsz{1)cszy$?lb(waw zM8!{qaBWz9V7C&ZS}st|uyGU;8IK&8z8YF_|54nHt;fl>7Kego83phepPyfte|A`I z*}1p*AhqNGSVPWsrutXS8!fTU13a%fPY-&Djq8pfxO?cqF8|lv@sU?w2>;fd(iL-*pPH(H(?DAkEA+ zqES+*dSPQMqr6;L{9RTlwgHPPU-KSQj#F0k2O8}2c@VTt&*2^JTpHp<_FKuf!4A9V#IZg#It+MiMKW~gRMkH-gUcSk`@RFzZx6f<^`Y+KD{aCiL zJSx7IXlnSq;XBJuGD`H2X|v>54rR!sVaRZ;5xQQFI~m~`leMYGBNfSRqiDC4kW*2E zPZ>sLrbN1P-&UiHRccd|YCnR2Wk=zRG4>5pc_!QGEZtyAV?^vyj@k8=67S%ES}u3$ zjx$ogyJ$e+G^h!1Q~p9Hvh%)|B9rwlGNi{#sGdh1I74xk^-02u(+M`!$F88h@sMkD zgvV?S4h2=F;wVWaqXcc1DE}JO_LLOzjE~~Oa)YsnYKyeQ+Pk%sW@sW> zVA^Vob@_O`K7>rWxvDScy$#^LUHxY1o$`ta?>La98$}~^b*Pl#oK+4cDqs&z(v zT}WaLKbS-M@cbvrn(1skbYDp2jedxewSt*`_Nt1=br0?NFKXK#Up?L-SnPx)rZb^@ zIQPsBw(U)M`D++vzBUV4tn0*fyHX^5P)_D}{Ve`*5oFjmG&oqz*gJv-P4ax3Ry`5G zbY+03@H-%t%O=^ zwS$t>$ry55(yps-Gv0j3lWV7{^sRkUaX)71JpLMGh}1HZDap_;pTZIT;CwM}sn0zI zgu%#MEP5CVU&M@33J#b>y!SaCSJckv03LPYC^Do}K>oN@P6I zImF;N!ZY6L!_TfjpW<>;ZigkwU1qx_x!hM#6#VLcwB}RTBn^iv-U|1~SeUB4M4`@o zAXO<->{(EBLJw(G!a&3Km$N=d(E+NYrU2Mbpmu+A&ia;XzdBU94#_7U`~9^2jo$5D ztMF;qI~kE2sU93Ln+sC&Z57uO0K}LBIr-RUpCYvMQ3*(Kf8HLLn=2TtOi7ZcPZz#U z{c3#m+Z#PCTq*NYCs5rU^RE56ujNdD*pnKWtX%aa5rpy~C zEi}$*?bO^(%&adv9YIiX)DKX0e=9s2X^D+*@s*-$4E^~3fb3rBgCAF)gF4e#b*)JL z+SeM&ATd!~C3dyW9^a%eAh-tRauWUj1?2n33Ml>}`qSahCKWKM@&GG-`a%VPDyXW} zU_ukAo%W5j`5~C&4;nQRkIvtsA~D~gAY7b~NN1S#J>ILHfC52!1BA&o-CYw?J87b3 z`s6+5$!LI4Tz$@f8z#E)qz}QvGA^nXC`#bQ$x5%fL2}dwno}mbB6rYqj~zT-!epgQ zhu^0X?Z4uc7WmP72#p~f`i(&)q}F=y%Hc^uhU07{AxEYI3A)532aIiuHXP3~SaaZ;{qfnk~v+I*v$PVKfWA~cbX>_tJ?le zcAILqr$|0j9mZNwos^Mb-yYk~_WOFj@yCz^jq1kY4-^;w+FqIKMjRux?9ng~x8GsoIc>&D$F=6%!4T>H{bP zZVsYGNLIW_QqUGC)QzDT*`Rg|z!VM1+1sQO>GJc`DAfP2MJ*5_So9Bw+hZIFdkP(R zQ}B$zk2)2fGygt(7_GMdtZ@Gg9G6x7`|v?)u@2M^Efzyst#~+WYb}%Xln?GC;AUnr zD6P^e&YK;X6e!eXzBM+|qm{ZY&2#tSxgMs9Fgv_JzsRRon6==cFZ~D!~q2+Nb*54Ke`azBh6!z{e}zod0G}5jy@L zbTaTvAWFRXxCh(0$AK(@mtHQ!g+?cL;B!|RpZ(MXN*0lMW*Io;QxvEZiOm1KnqMrY z_uJN1QTLd;^UD6g>?xF?2;O@QB6cGd-X+I2a7uHj%LZ*kYHC^KF%m9TF@+32iW_)Wliu$1}9@jBtj$##BhDZI)v#|yyi zX}_}lJvf7<{Uw!`y?Xak#srVlz{?++^JWIfR%<+M@~Om zE`*4(G_TrXp;LV$7es2(a?jPs{=&L%(A*De8-< zXB%Z(Y=V1A9ny>j$$Uw2t>BpmttfiE)SJ?-CPSY_u;MW(hVN4H)fP^BQ4(SN8 z;nK?X7wqjhb6YlE#fRx~@oS}pfQwuIgZ^mUQ-Q-zPoA-~uwMVM zej1rL(A~M~!{Hk@UIe)FFs?Q%Jd80a^$=PW&N_}q_SUPKkW9ixL0(s?&N?up2mN=_=x@Xhmtd}%;0VV7g4=3ZZkwzG3}F=|6YhOcb_tdCU$BBWq?2cN zaf=1XbNKtWp-gSJhcCweIjXCd*|1oKSTzk}73Lo?9qzhait|6T0dA>_Mb42fh7j~n zuDadW#Ha%EmLnuP$9KdXU>D{RBzyXPs+Muesva1+d@V1x>*0flymui2u?d-PRUHi@ z4ASy=(W3ASMtL4D8-h^O5o^K-c6oH!)tgw5k?`r0odtih4&)-=lFIVUsC*I3LHzL; z#8Pba{IPYzLM}8gl*@7?T87GZiT2pYZ$5vrT%Q*Z&Bcsl2x>pmij0jCd6>%y0^DBT zCfmF{Ja7CT<3b40*_cO@TyWk!E2Nmyvnlx9>WcUIk!hw>&D>NKC{`5VDUkVmP1wvh zq07oz#)i|%rE=i!k{bAafBoR=VJr|&MRxaxcN!l5LQFn_n(P4pz{6h;Tc6#doXs}a z=Sh6c*tifp01A7lp0XQ!SMsV;r%~xoujMv=&aP^OkFKK;BaQapBo!lc^D~Om#VG*E zt}n#~{d^x)Exj}1(GqN<2qbuLkvl;jVvQql?Z-!~(!O@1bw8nYcX{DGJhw#v=jx7({d|I`9H<5nzRp_F zCrRS$Lmrh9oJsfOWV<*D^+D{3EF<;v%=GH(j*J@P*=T3*r4TdR0~XN9JManp=Mx~^ z3k%g9>l3QdI+Vu7Zk1TAah^PU!~LHB*XgBG$HR>+!s_uCAi?1eOp=ChPM4$Pe>Ner zX$lHdiaq*Y$=N@1MydCYZBOuwGi2}&5gS?tZ;6sGSvUWSQ=+5AP5+W*kB3?)>%UP~ zMtgFt3>fwV{s&|0XNgXyIb~cmW9->rY&|w>Y^@T#e#i`j&c7z2Yl>+?2dNSfZ^~Ia z1q7HCr|{6{y8G(3y>Gae5n;`Zq|0|09c$7qilAj;8{lEQBB4cUEPduikh(vaS72so zgQ!l!|1egbUGhNFmU+Ig!t$xz6xw5VCV(EU8)HO{8y+qR`6!&1DGq`{p<#Fw-!uvz zW#J!lEz1%gF2|y$#)PylmOZGMG^--AN*?!4$2N}pv*M*A$Oo}z4VkgH#kbCOxG=x_ z?4R?l1D{#EoX;!H!>p5EdKt-$ZHXhsuQLq1cFdi7)vz|c`=*kkUd^v3sOyN+ z&7kYX89#p)VMv?XR~aApOV-Rl>@bfY0zF;%&1!PZail}zEPTHgsnjh31IrQd0x=RX z4^azIavfwr2M&OI#M`d}ENjS5Ds^l7c0v4LQtL4u8m8;_b9jRA1PU(8(fADa1tw&n zNt|tuhr4U!^oFVWe0JNxhi5G>)+{Y8$=A{C2qK3)6 z8=;tvWkI{?SLy+fPV^2=>@{6~-<9~Hj5iSw1uE8If;Z1?JW3WA8FflkERn3fWe&v1 zP(9sWsGmh$j$$V94+RfOP9PCPj^wvU2S!R#R8*MK?dXhTd2;#Be6I|!c;+WfD};}f z;`Z|z3y4}9k3(P`Tob$?345s9Sh&P~xc%vkp;TN*Y3i~G-4k%zmCcoo7DA zO=-XlMhg8+v$>qE8lfdc%m18EfAl=Md^ve(d#t{q&es?&_|hPMV*qzYtEKCV`S3>U z=b(5n!!SXnq8d%rgtDMQVfR>~Q{|%!(HKc7)QDA1ZfwbCcG^fzE+C$uB1V8z&biAd zBHu&VzBne64k+oAUt2)0Hc{f2261@j!gw(VFvIc~du<7Suj=OaV;=E*7^ULI8AGEO z`6A|6e`UKMxK)xgd@66Y+O}qHEv0eYFIo{7euf#ey7_pO+j>2-_(xC5FWb4;NoQ&>&C{AQ5Lzqh_ZfR~Q8z{+|v>p@HeR|Dz>7WuE$TR6CH=&2X7vKn_ zf0sk81reA7aKV(6I%e&#T+zFuAaEL`pgZ>f9xP_NDm{R~8mC9WA1Zx_<2tyYn9R`$ ze`_aLgyw_hDwl%^PxP{b;?uYRxVS~sr_w(Rnb%u(h;EIUSgRbeN9t2ws9j$^mc4M4 z{bdLjis?&~if@SvrctLKPA=ror|^3lb$stvbEATb2E_`uGQBHYcfSwX5!kq{`UsDM z1eus5>1GObT`6v~`<1_kI7fgKDWlm$fGmcLIKnGQ;vBFk%8w#;Ca|7G`&^L&=m1ct z*@$t(gpLXPX1UhMbw}8w@;#fF`N$7K#NQxE3zrf0G9mx^XICX(Gr?)1)*%;5`qV8} zGDibmoW$OTnO28UfjX@sC>sxNCZk#gTBPdou{>|4Mt>UinZ1jFng4Ow+@1swN>Xo! zjlr5>J!cWyZ{E(H^)vM4t!xeQiBOpv+;-j_g}fRGIHytP+ybQh^DZBRn-zh+fUk9@ z7vX=l+-tSN6sw?j@C)o`xuhtjlTlp0pV&s4TsB8y0PjNoOn}!W0`48~i`LBwU%>i4 zi&{ioDbt1g|{P92G6!@WZ!1wQ|vFRmt88zDU^w(%y z@BTfml5euhnpDwGwV4KzXhSGnVkvxoftuF?UCidosLw7aZj# z$RY^R>HK%5#4c>~wQY9YetVHl)w-x)aeS^J46MYPW^1Ibl(f@Xuet?G1EN zM514!@;SevnWIITVn;`v(!!s3FPc&(KYDkFNy@ivH}8?Yg)07djY0PH zgPwJK!c&w>o9F&>^)Xf)X2l6*YSKJ59BK+F`qn8nIWRh!quD9#;YSf;y130PIW?)z z-1ZBS6|H;f)?Zx&5qPq^4?I2hj{7&#T}Bh}BSl~D?l!?(^3fF`9N6TqMrc}rWLeZB zn;_)#jbC3R%y&P$Tc3ZrY&M_zRu^d8sr3IUxkBENqyA=f{kv;$>-6hjM31FBRWFkO z>9$cG5D9KFDm+8b<5km?i7ZSqTlR$F`ue(I6qJ1Sh3Y7VOO?)^GBOVncM5;M$Q$tG zo5ES_K`#U7LfZjhin_@+7+ssN>d_z3U}_#~wpFQCc^QsK428(Xh)6aCImJ5Z>}$n} zW%fS!b0B9zphDyOyH#!4AZ?AbIuBzP7j0XDU0$rW&&BtDJU(~?L6755t6{lw<_5Ie zH67ow-rR*smLZL61|bK$C8tJr6DH~Q?L;2K*u(omMvh!+V+?w$Ub27)-=8qmw$4u# zx|lO{23lWI5B+bZvfh%ttjwW}KrFk&?~~W)*hV?CT`OX}gN|N(OBP!=1i}={bgH}H zz-h3kXS}-|l3CZw0M|a*jxrQ8Fvj2G5K4VL^`B9ML;?PX{15AJ48UbyQRAZ1_lQ9 zU+?FB_kQ>Di7&v1`OS4*Yn|&j&&BlhjvDkqMI{D1#P0yW;~*c(N!I|B7AfLf4=vD? zwmNaFY{e-oQr9678@IC=5&J+OjxH7^4(AJGxETG_xV7Zi`7q)KW22n++Px+Vf-~K5 z_kOQzW39CuTiiMM$hWYQ@K}L1JkB8?cD<#^g?*zj8PQKoG87g$i1BgP(n9pL!y6QW zjniMBj6)Otb1LVv{?mnumg}GK0aLg6Z$!G#>knTFwVPXaa#14Ckr}mB|euKft_CCf(qU+*BS6#b?s(e4&=aky$GLXX94*~tD~w+0rlwzd-8IkpFgRt6SQV;_SttyZ=_1MKv7QZ! z2X0vD=c_xbw%qxY6tBosbR1xym`@Sau*G|k;a>tD$Pmr`W-quL8xV+9y3m$DJ)rBb?C72f!nMD23tCiGpo zZz82SQ9dD4{h8ASCHx&pAk0HV{Fulhvx>58(Ox&JC@Ri8#548H zf5VNi-FW~qhz;%l*?CN+)C)=FouN-|BM(O@F zs9B=u25f9x2u1a)IGpO#qH{uOc^_wS@ zSfuAbLhnPp3LMPXgi7Bpnx$;rp^**3H(Y;sGEu!hc5@Sg4S8p`h#&oR7uAi=M&R>u=g1V$xz2NA(C@=f zA|6EveH=`#f_zRBp{oIwLs(Urde&1d<;$)^Zplnu;k3Cyp4 zMNB14y(sSRza|Pa(J||B@$`2nmS-v{lc2q)EICsD^`9!a>$>=#68reWRqz-9uXN#j zlYofAOGgX-jywqP(cj0Are3k)N#_P(Y0rp5F#oLx{w1I7>``X@s#Tg3?(LyM7dyj4 ze-x&(onqp}dk13BJPJlkm%bF9_PCnqI#H)E>D&lsa<#pN)wSdg6{h}tCY`@8kWQFbgluyK_ z+Oj;Ro-1yjDpckfzdY3eGqds8Ft#8xvVl8ZzaLFwHt&y4T&Ue;!qdyatCYdj{x!0! zl?~dlVXm8Nc46Fag|1@U)bmPA=7!nO6~ckB{=g$I?RhpI;sN*(rnA3T9~q4xh+)|H zHV;2QVCNM%niU>;RX=;{yytF)PX#?O+mzh!jKyH=EhA) z-7Zb;%Pu;!h==oWx?5mUss-(`i)>KP(t)`Q>zw&^2ctZlABHDvw?KEw)$YFL=Yg<}A}IalK9B8khbHzk!wC1_ zoOoUE&GW1f9N}M}Nq;zu^rxcODixgNC8&t*vXJB2cSSJKlgf)|Jys0%%ML+|uzZdX z-**4-GFNmCRMyo!{3YbJb;s}({h_-c53cy{q3Sot##xcV+8>}Qii_Yvmke6P+RDaq0a;~f1x{WlbMO7y-RU=V02aVD_lYrf77_BDxt zY#iO8iSFa#4X0L@co;pKT2ebTpZtUb9#6btP~gE6X0DAv?0$}d(oluycXyq5c@@cj zZhev=#0~lV{9L*-{ScFb;-wGMJ$1J6l-!1@$b+2k#M^cp+Zyd{>^^~92~e;A?I6SF zAD1qKKR*8ciWqU`LBFHa^lx)}{PrL9Y3Y~%wf~O_d~Ag9h*k0VviL9NB83vHsmb?C zx)imP>3LUg+RIU4*Z(?s_x?n3ir+2D^sKA7LwjurZGQjrpxWpK^F$O%ga+$@y0>92 zLK24eMS=^to-)jf;QcDe?iXZFYB*IBO10(g3R5RL9SNoWUsKZ1-8n%~-|2o<{gU{nO*Xz09pVpd9yyhO5V?fJ1mdb$I=z5oxo_>m5Ui4{4ElmnA5+unEfS)aNjgf7 zdc_kK98}&B-EJgCyi!1~!zKibF?e^+j5-jzFK5Q&F$|v)r&uBXK0tkDX*rkjJM>`RWKtQPrU}OmD8aPep`RI60_I$b!(B@mzcLM3=Aaoip z`GuCLENO;xh%0_$*bJ`Zi6(qQ2phaW&DS zM*K#TU@}K+iUH>IV)tinKIb`oAa8Z2o(e4;K(X+d@Z z5D!niQvJmoVP@&vt}f|@=D2q|jo#h!pl;A2&_-j3HCA^B%87$@F6w&OEuwn6MG&l3 z_$_WC{iq{5&IkJYaJ!6Y<;+8bY99Vrqub7YVDeMBBYdiU;@WC>Dk`x#Uz|7bGvcWd zdEL%1%vmi=bS~(;*?sM62xCPmi!BcFNCqf)0Sa1)3gf>S6s)K{lndb9jq<~>GfCt+ zM+>Ny4Mc4l!?LMt)CiuKDF3mOPJG{_ z_+L2s`1OCzZLc#5gwv`K4bC`RKKnf!k&EBwW?eoog8d8STd`moN$fJ!{+Q%6MK5f^wXsbW3o5U zgo6H!&*ENlRhi+_GWKNtXg+U-ZHewhMlpr6&p(_=uZMYW9povScNRMpDWml<#$_P0 zUY9#VyBOPkXWe0w>fIAd4z1F8uKbDA0fsb}J7bz^l~T!vV@u+fN42Pq{I;`N5Bw0b z1^GOyx0)av0aw8g*F+*13ZjZO6T6XQa}uwFO6YH1GmbclIMa^TfuR^f`0`D1m$4!j z#ck(127jikgA=lkdVu<5j~}W?e1<}XiiM1BTz*CsAC4d>f71lebwdIGu@5AhhREQv zbK^C66Rfit+Yb>%q><^7hiVTIKbv+mN+F2Q&l^fUGzMv@d{<}8?SJbOoh_^~TMw@H z4U+sU_2p`>O$rk=2U(Y1DF4YYdf6{YS{vp#GoUE500^6UZjcNCC!HF-UsVV;PUAo- zdQW!1gIAddRq?w{KAkuUabLrAc^ds(WS^g=T?Vvs1W!nQCGz8FnT>TT`kE>Y`ih;; zP2Vj)OBcST-o~-lh?Sv!*@Zdnwfv!9kKTN6zh@(d6^~`}!}@omk}Y|_&R4@Nv5H3J zDe>O6ZmV`wcxc@UN^OAi(QN5XW0bk)9*$1`h~=^9fgSv46=M6-A~PP@FzUaqd(#%sfShgtI3KJ zSY+vr0dT@1jG&o}di+Uy^1dkQD z1_^ijUZXcXO;YIYG&cb1QOEVn5#W1f|1b^wF=I$e^sW~|rsDn&9fi|AuVCK@{d0?o zLNQ4eg`GMTDoMRGXAge}F;x{1H909C9A+k$Fx$%b^UZ#H=F4<@%dSAli?ANt z$a?XI`A)gihs@<<=I;*4%XeLGu6^Gb8iO;$Oah6Bb{rK2=+$T6rK<>IU8ghS1+Ie` z{BlLa2{*EChmw{a^gZ?`l|LD|*bkp?G_I9`HSR=`*XE^j?? zC~o^y1RJPe-8oV2FB@N~eE+A)B}RXrR$|S^=c)S1pGt>bI<#3k!|T7%h52)0ImOEI zASyoeY}7|q_Cdv_CwF%W0Qb{W@B{}gMm#Q8jFE{K-SoOsbUfdo6zgb7cX4`*m+2W^ zlk^I&b!x#xux9Q=@Mh@eAFlXB$}Ab>d4{B;1feHlSq=Keq^(*fLIp?=BV=^c@NH@@ zxK5ex^3WQ4N~d_IL`D0Fuyvlg?#%PZE+P9xS0u@7{~glMi{zn#ln3R9Vtn~2Vn}+0 zdQikyy<9cUsy*Avc80ZR%I};Ggv~cUo==u)e@FiqW-I(O-Y1dhJ@RWMc@!+vd7!Y~ z7JfvGer;Gh5fu5EPT~wP^f&wMk_GCFk<-KNWCeV5(maI<0a;Vw!Jo#pL4XeJ7#J-AKILg*e-|Ogw&n56cbk`C{JGG z`))ulU+Ef|T*|6+i#@JbSxB&lnBz-8hV`_1e%`^>;$ENB3TO3-;~J-w^^ zVu|qp4IX3%M77zhGfa2_iWHgR4{&ZBV%#K7PJ&p}i0;j(FGbT&kBVy5+!*eA7ck`s z#7ncod0XKYu;rkDAh-cHAq!^@kY3c|Ky@RQ?9MyT-Yt+xPS%Fut316~pqMg9N*3J; zL@(^H@FS>COw|97wc|pyMDw%WHQ_b6Ru)*H#}&V?0wTnxa$#m6Vh&Ts8@O{B)j^$m zdnT-IpsPd%<%B1M4D+5=SRfTj`&3NZ^=oDiWuaP}g@uJej!n&2G5?>99_`GRwsxRT zNOXCpzk4IuUY`ku!mUHZshYpSG4Z0FmJyn8itTJ|>5|Uf^xV%g((}IHDx@9`{jhmC zc0fWAI1!Yza5!JMk|E@RbyAoiy5ST0FfXGsN)e?j>Qa;;^-7#4>n=oeNAcCpfbk=+ z2mWZ6VUqh$H~UZbNrvHi+7LNiYNXh=mtuK=*}!&xaH;obIU9gU^9-y3sqd|5_f9of z+gt0i$9~aD6;Z`-q9}Mej{$h~QYsy3fm6B-oYI*ke{aa~67ahEnhtM_AOu@yFa^k( z#e_xvD>9w@M`T)IGjbCABQmKI6)Qm=e(PzUboha~Kavm9C{%Sg8s&CJDk#jMl;89s ze#H4Nw;!ceP|`c>x9CF!tg=BeXtQf!s!ge||9d>Mh?z8>)iu%R1XXERtFh?Op$NpQ zkWC1ncq*+46eq^h+mpYus#_P9U9R+YZ`Fo}!Nw>?ml?f;V6_a?s1 zW=7$p`HuaSt%*T2gP?IBUl8#elLQ(kteWcTz+xz_dsi`i)*h=4>7&cuXrK^{WR6g1 z@b^PLP6`@xK!_}``A*X*LFIMCMS8V*=RC~SFW1@OLmrrtSCzeLj-5dlOzlLAXC5!J znEA@GVt87v%mM?Gk+Dc&jy;pU)!6TLZAx{BP*sH4A{SO*!ReCz*x)NSQdWMc)ulNn zsJFXZ==BDL@o?~`9;U_WR<(w{q176L{7_k|G~F}8ZQ z#3sV{GmHc(&Lg=5Dda4CcY1WK5KZoZZ?_H6(IGbb$zuI?yn)$Wq^?Mgc?rh*chObK zLS!GJm2nm~FeNzg*f)Q(1hP*ARQF#db)37OrEr}`0*Ku}E6)6Thon~Vq={IhOu48r zPd2_Ef)X5HUYb%!v3lMxN0%Ngoy(4wxB(qHVehkS(8zf#*E~<)fS9=N@3iV?Yr9A0 z8lP}00z~lIPCM?>z{6p8*y>5=nBoTeu-;Riosaf7_v=SKfe+J(#V%~)6aBJ27rqm} z0g7sZAz2$ZczRKGh)BA!WPHG?Vppj8YE!;b@94HqWQIn zG|Kk^L(^GrzQlq~8H|%jT~p3bchg^>Ao5v-_)K1Xr*hY52~w0j+Gj?tqocwvA$?H% zopSc1Hz#C_kH>53*EXuIhHS1hx=UTXh`+S=xZLF9Gn{sz&RwSc2WUA(uPMpCqTV-1 z<)yM~#$5tn1LyqNqHV>5RY{mW(I_nnDgxT&>=tR2P7XdQqe=$7W2~=%MuO(n{pG0k zx~pZ#?`Mi=T2y+^B~Z=+*Tb(dId$F^PP`@7Wt%~Aaww6Zdsr$+`BSfLhqVrN4|t6-^XjUShrO z!vdKB?GDz-vPB9YU9(AWa8?f88|Uyu9uOP8l@QXj#wY&3n)yNw38ui8Zb`Sk z_MP8FN9)X29ow&x5g<~iKmpJw*-+O{ViyO>p2+a8V7aO&n&SfJLJ5?w0h4!1rpKxt z&_fd<1@DvNe&YtxSkdmc(mp!wsW{Wj=AHTjqa)OA?nht;L+@e>xQ;P?*Xeki`fd@tl?12%TFT&~zZeII{`&WS1cnTYjf*f=-yb=UFwssO$68hI@eiJ=V&;t^0 z;@4JNE0hT|-yCY;*k@cnj@E5~4ca5Jz7~|vEOS>a$DcZpg7;*9d|E2xFpu9mOpCYV z;L=&FrJBCg=R(EL?~fv-E7^CW7hT{}Fo1%l;8q%UNAHssBzz_zNV6)>pKy z#=iW6G2|k353^_^Y0?+HKd_W1%a9T;V*MWOTX^B(f^+rXXHG9D-)0cR+>c`ZRzIV1 zxqtCvX2svG@uN}h`uwRL0xdGh?k?H0bR#->)N5PX%=GChx$f)LAhCQ#9RbcJ`D3k; zFNFfsBjl@OXh?ac)t>;Vbo17EuK4!imbg7%UNrP9FlaND39n2mZP-YgHlW!e%5`be zr1{;Rnz_aob)ILj#bwgcy%)a~IR-t}qP6#&1pW*-u@{j%hvuw0%==@$)O$&EV|;sQ z*yOZ<7>#@vbj^~4iH4i}DVj{*&=@4x_kHx6fHKYwDwd^0M#qN4N5Fz}XPD)8Ekr7K z#!Fe0%Ii3>OpA_)`*v>`6ovr3;8S1{cNyiIcvYmF{&Fa#vdSJ=5dA1E7vJmjcCHiH zjA1-=Yb!Oq#-vasRM25NtP>JEG(?0odrPZS^XaSVWN}}E*zDPys7WKI>YEvzBvPx- zIG_Z;SIW)w?651bvhCutwAqVERvNk|g7LPqvywUn0_8{)et>;|O@V`E>k+>8NXq)e zggm=S(g4;h+9f;tY(4&?{ec{)7-wH&jPC)V)j67ywu2iDW*Wwah5-lM@YpqO>nWm$ zP6LJr5d%neiw~3Mfe}GU`-c%7hm6+@G|XMhoWlIeI1L~>AtskIo`xbfibYVU#;2^U zR7@wPO?4T{Z9^W}iiw|hP46oh#k|4mmNZ7|6F>^dLEiwLHhP5?E(hcXt0|2F-!d`ZLhaZS_fw~1-;|uHLvL`dU{v47;1-8 z6ZNy5G;d$;2f&ynYj*^E_*|J^$<6Vizg8n01??Lt!hCY0w#+{qZ}L37vWhaIGf#kPIc0OZORM9r$d5%EB%L+;KRXX#=Uh+qfQAGS+}Lh$`>M`36}X~k6X z>MQYjl2ZK?bf}VVCAczZYtMFhXT)Iduv`R=5+~2?c(Odw;n6<^?mS>VUi1L6DE0R$ zYu_nmEgBSee&%|(`#`2w%GzQ5rJ{PR;|-eil)L4nW9ftb0IMHcC-c&BSUSJ0O(#(v zN6ovIj!zk4erZJeBys|bnJ%lhH9(=#pXPu`BWTnKN}g(owwYf(@li}>2UZ8}3<9eS z`Gv;5hNOFSUzV8{jb1+y;Otn`<)w8NF)OjJ<(UT`XXXvq(IaQ*PjUxkzw#nHL^jr9KpOuAw!LGeMFQ?}$- zhHv%hrD0@3@ri%0KPlsa^`%q&AWx>I4jBwF^EHOaj(Go9I=4b_5uR&?*ESk_CF$|3 zdVCuB?rA?fm}iI;5Nl>)m5BLSSeX+yAM(zmKBW43>Zyi~)lodtJv37fR$u&FIuN|9 zAq1P{nylD;aGoL7(MA4kBisi-@x)mfFJ55$z-M#)8NQ2M*Lu1dFvKqE9v|fzomMJA zuFlgRO>T>4B>y4eb$4%#44vC(Dvyr%y5mbDqTwMerm;^y(L^B=1R%pyD&h&;sL1-q z{%qfuGsG3PnTd4uHgoX+#;zcW^7?iM<&0w7={aDIA=CRV;aqJ@av0Xu72>ixGR6FM z4p+OrQU+xkY1RS*J?leXYK_@;=^Ceklk1OjMwE{%ge&J?v^#f{(}VdG?aTi-7_iBB zIcbOl333SKJ~Yetp`J+d(4r!}PnZ|C!m2`-+oxgP21Np)3)O#^_(fB4Ig)XFiMd&B zjCw>>L@kY-Gdhd;>kn1(qz|BEM7e|_Tr`P2V1wqm$zTN4kSEpg7k^Ni3$Y1^st>IZ z*GdmqiTs?50~qWsf+}CVqc}xtI7__c#_6u!x7vdIo10!tlSVQ~YEC3Uv*6G#;EBS!Y=?{I5knJOXr{l{j zv92|U1y$K;BcL??QMGSnjerFu$7z1(6iVViKj2+mw7_%ogeVGZ$ir_he-d&SEl$59 z|Iy@{3VyxpqI!vPH=KAqT^+mkCdm$Mwewf8O@oVm|h*S42X)Hz*(|e##w&n8cqYp% zw-`y^Lk}}maVUi6;z#6IIoRU1_2i-P-OIYPuIC*t`C*HF8e;^e;DtfGra5{KV%z@Tk1 zZKSPz&<}vWnz9i-WnAkpZqgoHezhFA_GSsTs-aEesc17~3(-`eq(P%_9hW(^E}I%o z<=FyztkqB4C2~w9FTZkdfnV;3i4$IB?O*QC*a`@p=O+qyof_wWQCn{d1NLm=NA6zt zmUzbs-o%Am)sME!{SbS)tGvQksFe9~&d^V~Y##=#uN?(0+_Of77LO(AQF7l6ROmMn zFu+yZu4z_3uw7vyGiJG#x*pHE)vPP*ev%uIGI{mb z7(X|3ZaBQE_XU8d=P>{Hg? zdUaig8Si#JcaY)bpLiJ)AB5o4rZ?S&j2C0iH^FlMRnnFhNPI?hRg#arM|9JZ|K?0# z+c2wUF#Jzd%f?{MNklY%fdN7xDK}!1inG^!B|W;**}OG9g&@tVQHi0ORE%U7K8@}xdTS+9cu}_S0X~8RN!cuVccUMpz4sAI z05$(Gh=2Vzpj{7WMVBH6wiJ z^)><&*9cZDxs=Q=FNB23%|ID34kd=Bna(JhgF{518CzO3BxKbIuAtpOZgShY@vPSC zX#qM77KCmT3>lfgb-9o01e9pSG3Ql@x$vMzjjIqNUkb5%6|XN$-7}Mr9qf!K?^o~5 zaEUAQ9M+G3)O|aB#21Lw&Q93VauA)ao-9$HJ{kiFX$hq+sW5XrOfmE#iksx{^1Y0L zL+@#I**|^j$15}}#t9re%x4K(mV=__2m(*g6SxL6$5KT$c^JIxkWdNX#M$9dn4F)D z`kp7Sp>6gDn8ae!-Xo$;;S^^|Aq#gYE!N0VEWXQ0lNvQMD)A_5%}tGVs3xaZexbY?tHd>|k0 zGW0xl`dyH%QiRr#9%@OR5gI^V#9PKp#{wa9f0CvTWjjiOqph|E{N5}1JnX019@QF0 zsfyceBB6N=TKDVHSEQ2IZXTDI+sj6iS@>(9b*aF$Lgi`IyEM)2!np*)f42f#Q$xan z!uR`qzY6$~Vp0nE+6{v0vi+|0rbxvjaFoGoMZ*z7G1TI#M?OEzfV_4%x;e%o_-R;A|*wgitP%(=ijAuOoRQ|N69-}ki<2uIvCHL^f!-8^u z=8Yl(^@FgCfSB&MyJMfg48K(`@tNgo1kfoiA){T55#Z>zSkI~Df6`F_IIv>puv_)t z4`+1vYqL^|i(_z5aqI4kWRrh~Mt*z4k+|Z1YOZ&^|Dpe0@ANJpAyGxBH{I^yegwc+ z*dFiuQLn~CA16^Q#I?`%8L5X>Jg^%X?r;Mo>(- zcA63xf8J8f!QQ?FP~Qnd`8ub@=L`bg@Ux!B_(p(*$a5p=TVm)e;nNEM8~JS$!-P5I zE1F<7?O}ElgFRGF(BVJ2cVDU*;4#nt+yuvnslKs5sd!jHa-ymb-}}uX_NLPnNGv(zYPU(5-(W3l&e%_T)$>@p z`Gi{F@#qlk2)FeBBQn2j0O9q&wX2=mf5uO4B|V_O*PqEIhLpI=lIQkV%3rf=_cbyy z&(O3Mwpvn}H6a6mzO{PC1MR|&U-SL0HU`^&jgKR~OK9-HzZ~;oJEc%OvC`Im?KIf- z(@k)c=PxS#A@}1@js)#XwF>G%VnD+yZZ4PeWIst=}^)8X{T)5zae(ZuJOwRt;4rkY%Ai4LRSNA zB5LH38BGj8c|}>?3L9Ogd0PC!$984!=6Zsv9t# zU2hz+1G6H?5L&h1NHZ%265;c&$gsEh=E~_}JrdchD7W6sW%^*cWd?-v{jdf4oIXT& z$w9prU`H%R>=u5i9()TTZ^e0r_!|Mqk}0Xlw^w0*tux@a^pgfs`-QN{->>1Q9u+|? zK1bDI<=9SiRV;CQbM(0x03tD&Gq3~G9I+zNOOeykH&Kc4^d$C#063z6Qv%=VR8cGi zQS=dN(*W9F{*9Z7$m|dMyVQm<(t)5kPq_qJu;TSobENl(H@^wtK}FT@Nt z>7LwmC3odZeR1<#hGHED1|nDr-7W9S8Z06tAb#O)K&qd2Ur#(gkKpNI~}cbaHD7I4Iqns{ZQ2nWcJu0X>8K* z8A5ICvFTjlPeb0_lS@L;9s#}Q@VmQ7Vk=P%H>1S27X zMUR9dL3#u0@85MyGHn9I`jCu41+IGaDkD|EvdmK7p-k({_t`z$ByV#2R{!VFa7|Sa z|3rN6P5UViP1_nVK>^Lv(DJ1^y?`d=9kBO4iWjVeQ9v(Xc42q$0gXbpB+F-^reKI( z`!rKe&y8iqp7l~OaQ(e<`VghZiaoT`(;!N(qLZqc>sKn%Xwj88b4Rn0rR7ZG2K3%G zud3-WQ@4|F+cLv{5fyaV;Q8o`q=XL4f~qFw)+ujHkRVV*8QQ`Qs=W_ z>kDX1Y<0=sa`8B8z!&l*V%hu7$+`p_e)qx}a9AC1@cMR0reML%wxb2sY9y0Pb3H4k zImBjWt+x5N=_*tB_bAwIglKO5prM**Sq%g2P6?}dKx|5}+O23h*>A?7vFTlr_!&`q zTbdiC_A#oeRlixk+Y+9M8eo}3Pc*;Yqmc5}+R;%<_1rh|V%9Rl|M zn3GKm{*xYPz(U>k4}_+UGJBVCoN}P?cV@6ejyK$ns74QHN-kgBgrD~7=p-)DAFI;Cp#THX=O4ylILFvudvMmt(IlL#t&x9Q zC_ot5^xJezo8KYb-dMY?x4#?NB-BLF=cUoz5cPjbS-L2Y`p)UmaTyT(P!#0_4%w*)PqS$fHRF;Rlfg5$WZT7<+biZCoEOk+=P7 z0lmxNYe8a3U{sf-t>HZ5@ah^yKtlav+QUKze~Zr#^4#kI%*16|SXjN1S$tbL*d0}$ z=tt)Zq8fmlnX)Nf8&|+m5R^HOXoEt)V8c0K5}~9lpUrKY7yzLi4EvXm@i(~j+`bGGVdwij!V}bw)vBbniloAvjEm;!+;bo0y7o+} zI%Cc|9_+Y|t0j#Hqt~N3C?MZB+^bGEx%eaM3o?+zb5wIU(JISEsi4yl*u8v*!I-}O@U{_nNmXPq0O>6NDZxShwpt51F8l@ z&a>g4ROjb`c+}(O*RG~^H_L5o@TZ5O*goP+O3lEmA7R=9?<@RwL__VBmyrJtDf!CO zL9N#Jwb-bmp&{+wJ& zYK3SxH1le=k@?2euPM4C>+4+OT1@f;*xz(4x+fNv=W4aq{rz~@HvA?#w5ZB)&7W+g zwPc3c#n9sHI+R3aZ>{ndyo+qF2Z;E#`0-KhBXqU>?$?VH;~nh;uo@Gm+(

y3cHAB1iAVcay*nUD7WuZ6GX+yTNi*d5oH-X(%uj+ILmF<37jXmlQj$*lqrdb; zr~Nu-%xvo(TP9q%{*nSV?EJnh;Gqm0tj%bIt>`%p2fy3iqJ-~iH+|QP_NW=I!7?5t zWjyZuPwlTT;pva30V|Y|!;6598!dD10e{teLdB=viFJK2#L;U=DscS@9>;Vc!2Y%= zBJwXodsMGe_Vn?D%0CS7{9Cbn*UKwBTC7dUm<-5yV2fPt=eS#X0n>MpDT@VTbzWv+ z)AZtsd4i3{IlNi*T68BNS9=vO{q;6X)WddPBi?Ne@khgxrp>nk=DYSl=T7Zvuxj8x z(9z!yRGQGhOEmB^aW}8)0M(%(4pGND?iWJZ!mdYF@xNT0^$l8Osr__UeT@S4>DcWD z+6gV)^HS39?v?6%7!rYng>m;AXFV@RFmEmq;GN4ra@?0K%I9v^TDczYVVo?)C{YnMw7@gt;6RxtXT&)2(mwW@L z!^x*iarNSBs!&_eddD?&@TFVh5jaMa|5_j=xbn*JsJ;1uY} zW1ysa3fR2_FFuK>U?0N-v2#IL^B4&c@e{j#liEf%BGQ3|2I%hdF*cIe@D|gI0PY|W zI(z>IWqFBUj3v_WQvTYSnira;QXmmAwLURsg6bxT3*O%h5}bjw7V}$3stF-pQcqFa zhO~0tmB=){0-^*x_L#bfF_1~NE2j&9^p4WXpmM8V|2_2OC%>NXV&30uC90;aih4sh znGzeU(7x`GwNwPg%+pNn19@9kcftI?L@S%Z`PcJ8Q4qU?%{xE3xU!zyGQ#bxcI-_j zP(mUFlLf1<>G4xmU6Xii=-a*Mj!WW}G+^1PWU9l|93?rgE~?A*8(R0N;HdF7d{wRTE~lr^4z`79-;{;62Z0{xqOL32kRNA&vw`sag6 zg_fQ-vyN+xqpi@6yR187s?zpDK_5e(<42+0)w>gTrdy!Eaa*DIWfem3O0%7f zp@gRr-VMJY_aQK6nv%&f?`fG|S{%V~F$=qHcG+$D)eLbFpxyIQdS^wmRzr4@iMd`!7beVB{?_(|Q|ukN!-m;(UFd9l&yVj4^)i4V{eCgzkXY6nwf*vXI)> zMYgYoEKymp>!Mq+8<5i1GGD_oE$d=kl)X6#saO>N&aa+REo28B*_Kt zTcSa70smZ|@&X)eho*|eqxsZ_{++07L22;NWvVqP@A`ASDkp!n1|@z!g`YEQ`pxLo z+04TvnfeV@bZkrXNIN&YYLz-613h<0cQF)~u0O9PkCW3iB-~|xFZNXCoi7q#;}*KD zQajk6cI^6>7*skq$*ua_QF=Mt2LwBNqI%W@uP(m_S$QsP2z&gnFHDYw!bE0`d04bd zk8*E6Rbp~N3sQ>0becuxbRWaeTE}>2e_{UII1q0qDe4keFlgEHrf%fxrHnkx!&sebj7y zsP>bu9m>DF5}4<8c)UB{bKcEtl%7@(zwQ>sQ-^o>cpqXn2IziSx)c+3cHP=dY7-^jMMXjW=S~(p zJKM3sD&%+|jb*NA`GsGKy}kb_@fn6E%HKcu=O5iLGC8RUCBh>HxzDw*+V_u7zaII# zz(szfjLm}Hf=dGteSG*}_CNUH8&^46Eb+1}8DRa-#N(|*YCjzm)`w8wrkWv$Lp<3P z@j@&fzw+4`hkA?l%*zmPZr5nm^PNE`wzrKVFssXr%Yrn-?hrKbvT+0Uxq!QdV+;#c z4JYTukmbPrzF6wYc%cA|@QNVF;gU|?J#orCN{maKq5Daenvb0n;!bsE1jZ`D+VMhS zPFF^7E7`4}`qDKEFS_8b*c^~+Zu|krpVFyl-Bb+4^OBh+fknpwy6NJM^11c^cji7Y zvK<*E+T$frz4=`X9$uMywe)Ko$cMPh*M34XsAm{7Yn!Sf&!-CaaTj3oc}LAntqaU0 zcL6fLM_!utGt98I@>);+4EkO+9QcNp2F9jtF!2aT79R0hqx`Ce>Y&)2Zo*+MGcq>ckD zhD>XY30~mIdbsKOkQ3#&b(~?szNkzG<5}%fQ`HA(zetf$-9Qu94T^Uo6yPJSm~*?r z=Vxr7=37b8Gz0=5`7T-^iRpPX5lB{bbVY#3m7a@4g42h9j4L@>nD!foaoc<*F5oit z5Zfg2A6E&ORf4^7jLG5HePS<#z9aIQ@l|2j7dJfRo7U4Exe8y#8xFlpZ`s{!tD8u< zI=_>Xg8mCA5zam_Yhs~_BK{RgAe)Gp*-h6TdZhSco7Tyxi>hY|O}6qHdC;}FbR)K~ z^{4Qtc5lWdIn1+2V{n@U=N!ZMj3rl~k}tDSOOyx~_XYmRoSR)`yst-SE7=wgBJ%D) zrcNcfn0Y?@YqEi3t5N0gWk_@Q%hl5$G2jlIz|;12RUC(AbU;_qUw(Ao{xVYF*RR6= zr6c7yj(yGpYj7jgGgh9}>7)}QOPmlTEB#n^cUMzYjF~#%0(?ktWZ4EO^)agXoZe!X z{W5&6(B7vRmiBa(kKKxWk0LoD>ry}3co_Jl!idBoX;8-$|y;2(Ovk)to!^KzH`Fru(|b7V%~P5s#uNL@?DSH$xj~B zwmC~)Jl{GnPoCqX+_Bp@FVow)CXrjM3%9P3%tnvSnKYw!3qc`a$lIB%1o~1#6mjqREC#uK8ZoW07CW-9W_ty})GE5biemkh*2nT*bfqy%N!N zXdryvp2{s3@jdlPTIU{U!9CLJ>VtZGpY><$?pGxPak1U;uRY#+g0rFj`w~3Eph(2n zb6i3DqWmgHwNo%hwN4!PIUhjt6Ol6Up>IFQuW~DBT@dARr46zFBO=*3pE@1N=}Ng; zm4Bp@M!cc(@xqG&jfghbIF&Li*uvzj;iJ?(e3BzU!1C=aEXZ4fiJCL>;Y?l@-Xn6e zwA|f2aK$1(|7#|8Eakw`kme04wnpIFAd_RIpF>_4ULfJ`MV^i4u*067j}nMjc$ ze-ej4Y46y;R5>YoPB4=6tB%CbDF}%Y!DG&b-a1DvCUXHH33(9t(@!cdSe#%(VCq{- zi5DOW&X<7m35tG^;AMPCdZhEU<3&6fA?^L;&CW@71j>JTiM{v;(mc@R63}Fp>n_fBQ?yyS_RS+MRHAr8I8k3;OP}x`F~DU@E$I ztHB~*=b5~`a9NQf#220Syc7cJwl0s3hYY7 z(fO+#N?*ydT)0iWcNaG;T%Sz*V)nyUU&)g()hDpCmYE9Wp&W|L5~J1_NOPy7TISoN zxg5n)?AT)?)`fdPhYq(efw8!4r`L^_m~lF#dn?h>UN9J-{tYpC=XyDm*IN*td@qsvZL?vDLkI$=yy(qCq*-R2Vk8`C9-jO?{ zV7~H;Z)@W#u6%snD+iN98!e*)I3e7gtlt(cJpO}t{Y~&!JrR&V?us1B-G5-)V=TNJ z$X?o35_3jf{kIqj@@O%iZ~8yO@Bf49qMK1?ujMI_NA)w>T`IfQv_K33ocN4Hutq>4 z-O<#G;`qkp>!2e;Q;FEgncfOy#Xh#gs3K`}J3c;#mARFw>!iyL?1RjiDG&Zs@YcG* z@Vv83CHe&_!c1AE_{nNFW_E#3*3YnWUM_m$eDEDUvSF)Tx#;7))z{B%gG3QKnmTzs zTQjmri1-|cmS|D9!&qZTJ%=-`%vXnhip%}GNKR3jh-SZd=AxWGAajs`sIqBJs3!v8 zcx#y7S#(kPa$thoi{QIKd9A#$WE^?5c}{;@^%{-HCwFqGuuw08NadR;sb6%T7*k112dY8R2$qN>QWn1b^zs~06XmKofk8OCG9Oty z^>S@Ot1VH8bSm@(A?c3y0kn-abAMNLz%;~om}wuy$ijJ7aV?2BSQcaDqUOy*EVjFQxB9k`t2N@Q`%Av9T)nt)P8!8s8Sj6Hw}OWn(pw`}JLg z{v2zJEA%H3?s=6tu64ss#m(^IaF{LMeRo55=0S-C#er@vW?CV$lWMSEc(?slZYD4H zGDxF0x=$?E)ej&DMj?@c4y7XzBDmRX0|zE}StAse;W?Q?SHs=r9#??r$;EdyNy@sR zSsP6hepnaZY_JgVT&?$2DraPU?Cexjv)r&5=_VcOr*a@gryP9qLLgbTWf-N-8Yddg zTJTj_Fa{~$q}VbWoDKn|8N8k1Da|3SEq2^Q5oJ0>F+@w zFa?j>q~FK0Uu>ns&`@9vPNaM%U>b;)p+x)Pw(l`s;**x0o{5gF+I+TIFf2zem&C>| zjozN;)tIanv2I|*v&iSOhjz9%ob)`Z!hQC17}eipsYb_7dMEZU2o#>i7B5PWAx!BPFl1>%xq)* zL@qCn&JVUrPqOP_>Q3lmz|63onT9=0zLzds;ECbk(AAawlw{=nQbm}Pg`qEvnCpVG zM7vsD^7^>rLHU97!gh_J=`r(UPO%QyDec%9@Gp4?oD`d?`~)Ly@+SU zduWl~I3N|OSHUx{ueLgLvYcjG#!{ZX?!(Lw+TZ?t|K_?x=7^2$~zc+AN41QVM+u?mS%$DDs|;I&e>u*Sra`0mlgbt|Hv7F9ovJuTlz`vX+$o=aWqJE# zhb=7?A&<0~d?Ev#h2^{JR9=;Y@1A%Pq7ma+x_^COMOlgYmXU5g!~qKwJ_n!SR2z8Y zyLX-#smPhhF{zq#>*ESWcU^ejIUyRcvAgvcR|bYnh#G} za8zln~p}*d1 zCT2G9oFf3@?lXc4Q@D*{tTf_i&>rmgvWvw)aApE!f`d?KnH zqWtJPa2=eivoUX7i8e}T7&Y}hB)po&<1pns<#HbP*dvR09=G{wE_AGwK5$mZw)4}b zyvODy;pO>?O{u(#pTH;(lPbge%gcvCp)y6wa87^T?Et^b^q~dgthX%XjbQ!oZ$kb* zW3%*#BW{;%8d<0{lL;OxyLZM<*|z6}td8eOGtW$B=Og&**lFTX=QYKx$DBGd(p`94 z1!7PS*X{Y{Oi?D!bjVJTe8LZU8A$}p{jHkX3+{uBZF)&93*9fPK$`SG|AYzkR%?TV zm)vYSb1vKaH6VX4;mGX*IrU?^;cqV`h%}EzvpNCsMgMp<(+f*79$5H)!^pJdKX_&E zNi5)>&qB(@C&%QgNwEw_1@#r=2mp4i&b1@>e;Hmg{|&Dh=ZidGcxeWRv^L1a1m2p~ zN+VIX*sa|Q=Km&^D!BP9Xtuh~od0%W@odh=h0g72!?L0`8m_+C`^K>Kp~^g|jzrLC zFLUV~5|*bn4FWSZI!75tFMB6T5&^TH%L!XHsd4|ooDmbUfC5`faj5g5V^Gl6*&bGv zm(}{;VHns+3;14ySM2H-EvrPYHdBIcXUoVLu6wY=oX_2k7Ioy4*baw5njcB45yFH(4{V)Yz)w>F;aSKl#_+hOmxOcG>4LSHAP_gYJcYc?cRC?j{<|_)fsw zBJz+DB$?^DWsZp}nmPgq@+aWBHVL>`G=IdWT>~#&SD%m&^x+XQGBF-Z$}ka|+CIWF zGCy%M=bXs;F(6CbcSeSvn?et??SHl-Nm*yiBFYLSr32-cSOeuu@fu5Tvx4@!Lqe7& zV;()Xrq-rF4wn-g!FrU{ulW>6mVadE$Af#QY`5U!2b>X|{p!@eUpt4NptcLU5ooOC z?)}K!h-ZY++xQ{1_*mGBRcktvxn|akdE4e~yOyBtD0K2y1Q$Y$@xJP3mtHMXTMRkjLd_(nhU{SR^qg&;0Su z>1m=~y(LBM{zTK&aLll$Z73ndFpF(7+fO}HyEcqO z+^AnEkhFvQb_L0JuK}dsb9!>^X`Y`^fbB&_rF*-iWlOC|y zILMilcb9(BN)G(oliun5GfQe!N7N$6IfeuH8&T`je~qz|DT71B0w+qLi*6 zSl^5_azDWL!X2AwQr4Pdyzz!6J<|G0F15Nv9lQf!$;JyuSAoO+P@qs&ZBk=0o4$26 z2GexjZs|2|ChFlkEVDPC6XrIOp1VCls02RTNq^RHYn*)fYn9U!cB34lQDO$MA#S37l}h7OYouz*3@aKmn`014J^-WjxN3yG7HZxciMVQ@?M?mjI z4n}KNNmpAuoHk=z->V0I6G9g4)!f4x>8+52lPs5P;uzo4p*~xfQS)O;Q5aJ)y?1`|#_ek2<&BeCqn< zwbw-~4EAWl_-19lQe&K*Ou4qf{Th_(w&YX^;1Rk%FZXBc;~|tyT70(4_!IOn&j?FJ znU^HUqNy{P%^A;p&d;Ef0@nR?{gz#*-X_rO=XUJVuyrCba~}OB=t4UE>)Icj+fK(L zNh0|?>~e|9*T>N-?Cj3%JGTDwmZ$Bk1+zbc z1dh4V(d_RSE<7?ZWk`_i{$q;*>|eXl$!;qjK97!_{e%vK%1-6S#VZRAmKGIZei9EV zf#@-df0m*y&K&;-hxLF}lfMq`^uR{}etE#Hj+LwJtba{c{>O>9!#BE1@PA9y^L6h( zHYY%U;GZRltjM?{O=~q-T|968)xjc5(M$y+Yh{b#cd{%Q8eO(%`z_XZO20?VhC{_@ zGE@{lXM6GzokIT>idzB%Yd9XXCSc%n4-$;()f>!5YBN|EIh3H235dHMEYhinorinH zJ90)u3;K0?&;2-|%O*b|;aII5i*N(2gCArDr zi+}4y3L#Rg>3~Lw==CT0#Aaucb1WI=7QbRbFE)<9G;0ewA-h1nhvXMI6#jJe4+0LU zF4-R{?&&Q%RX4PI?+vDWzvf=k+PK8%1?W!%T*fJCm4f*Y25ATP0s3?U28n5hLAbF)opkX$T{VIUz&|R&sIgs^gxNrHu@qSpe4<3{tDH8Ku>}C}V z@dc_4A~4?;`T$J=Y$S((eSgbep6Z>r0OY#)he)?3x*28*-g_Tu}t z(*|mpHL5j_kj8)d{idZ&x=gQ8*}oJnVnq5TJsggM9+#b@){f^@bszn`vEiyiBw7`B zOr;eVcS}FV9w;CHtESaYP;^vd@@-Ll7Oj23w`bX7F zLUX0kc9H~etK$@^e`braCY*S=9QW*elsxs;WBZ%@9kJ9VVd*tb3Yxc_C*klz`aXS5K>gRS3qMC^Fo?gOvo!D>D8>UIGB zjYRlxsF`x?8jZAcU7hWppCaUaOzZ81Pn;lBDz;1 z>0B#`A_O`N zZnN~KG4tKny20*FKu|@QRuDOlo1IGXg;F}4l{YRsV(*z&u0pt=WSfKF;%AO;f!)-g6A zR-aHq#e54~vjuKSYZ)}}oCx8|uBQAGz5{Nt#|8bmJUo}-pp0BkZjdUwsG7=!aU_}t zbif+oBDqKx$hJ+g&XV%t=6!>Wj+<~<PGS8Jl+2m3&|A_$oaao1PO%La=~C2_9!-jz@JyDaRZfH+S$R=Hgd$llP9 z28&Lc1mXLl6Zd>J^{tSB0?=lF&t-3VoKqD<-?-eiIu)b#3^>{Vz+vB&{ZhUB6K`7Y zA$_=?^0L|u?|WDgxEq)aVIZVXf7y#cAmK3fEhZKMy#f%D_RmtH8uJ2MaTK)sTQTlMPy+{6(=ex8vbjRK-PIs<6yt%D3^{{ll^xmfEX^Y(r#IOzAh@f>Mr52{bXY&28NJ4ZY^;mEJKPMiFoO1xgbE^AIKtliGbR2n-~gzBg#M){(lIBzwJN)+bGwcX(+U)KaVO7Gw;bjF zprU4%N0+DZD>MX!a@kMd-X0$`PaXzxaTQAyfI@27$rnk;NQ=elNm0F4n`-S~Mjr#Qa7pDYiNd#G#e9IW+Qu*H{>>k1 zK&In!U-J_m14#9r5|_DRj=AS_&uuB7+>QAr6S}h}yp`pjOuP z^&AkPX!J}oj9C*3kg~Dr7QKo{8eH}?Hx`P~Ntlo`C#iql1!pn--LQ(-*}+8cC)`Sk&Q#)w=%IKQkk4(Q7(Vc9t6MLD7-Sfm z26zyksf&PN)wJc~|C0sae^s+fdR3QXSk1WT`)j?2M;%7^tPAnu!q#tjN9^%&ddIj~ zpTGU6P2y4e&P$ar^HkiShGZ8maH0N~L5sIj$isoDS?p=?RTMf25n*=9_t2&vq0*lF zwv7Wi7Fl=`e*dXo99={_7dNR<^@5w7wgzYk$`#wIf(hPj4pY zD#_6Gt8>eh(6y+gPv`BPH8I}-4l{)#)nV`}BJ8FP3 z*{ne;PRdhA)1w3q`i7o>3d{c@Q9uBj+s%R84UQVTo;payp>m(zsaDT59q2lG5`9tx z?EBOVG8U11*y#%PnHl##FOVCNc2<^v813-lFip#q>NtH5v-9lGnUxBDebV@WUtUVV zSO9a-MDm{qCbHvS1~clh`h?2U|)EHnQ`>sEXZEO>inhlN=^%vBC|6*LTQ_8uE3gK|V37VsqLe9*?30(N}#TbGh zc9U(R*=4)h6}*Z1)hz9k=@J-%)$jV1eS6F(yK2!D4jC!;m$D1usUu^T^gCmJW4=NE z&%yGeNfIvddE`5o*irpJqU^%~sN;7S;l4@kOsO?MS?>#3eQDx1y*)ER9LHWgFR3=k zw`q376d3*?W+?{LQv-M&AMwk%+?Bq^RPy1h-F#Y=#MR;4TG#L12vUgZ%Raaa3=SR+ z+^!he(ibroK)R(Xm0i2%rT#CiUJwJkYa*M0kUz5;TvyB~lIr9b3@lOuCxn|pyfftu z`htE9)_ad6fOtapiJQX`aChg2i^3=OmsT>mV-^Fo5K310Rm?usO5}V!c%s#F`k+J` z@gFaCTYzJ&oxw0hI7;#0HxXkMab@pbEO44m?H|%{IU^tqXkQAv1+Sim8KFqe^M)R_ zCKh^IJtQ2?E1u{ZZZf8Sa2h0fV`-8C(F2BM$%RPbB#q$O`u_+wQM_pZIMFG8-Ze-zo6lSxS~e>cM@Q<`K}l`%INo@}+pnnI-fhtF zVA*z1zwlA2b-KrCBT(}>D%!!7>GGG|DXnuuNn!ppYdsi)FP7Z;%0`l|kUd)1wd3Y5 znnkVa;XG!G1$h|?RdR&r@R-Y2pE_x7Tx)SW+y{oK8EUYG#rg@FpWF?wIGQGnWoj7d zol_j2J!TDSJp%&a0d&hs^@2gJn;02eR1qWB!OIyJ!`OXlM5C|E4gI> z0Yv+zYk{{ZnD7u~TC-l!%QAdMU4dd>YbZ8yr{4ULgS?)L< z>#E~x*eTBwf$;yH2wK8-diMKl5hT2rEfvtQFMwj2B7S`&9}v=6(=1B{8r_^|=)BxK1sJXssa!YYoMfz);{ zgS*#0-L(C4$Pj@2D;!pS(AHn9OKH14S}1WlVf}Z{sARx?@X{5sEg;Ig$Z+ad|6WY@ z;Oo~Xy;QW5FskG2_CIGy7c+Lbzv!g_*qAAl`gP-}TW}%mGVMF$mw}06{lNDUtBpjR zqW;S1TK;kvE3yx_R-Fsjvf|U{YWlCmSQ1(8cz@*EtsA-tv>wLh5

L@i;({fJaCBRWQ@CvIj>3f9CJ5uAG)CNgySxw++b|KXJcHqVnzqYoMGi4W={% zgR?MNB@iAY{a*S>EvoK$VY+6z*9QOx#MH6%kQ$yAeU9B{56}h?L~^G5zb_Rs+*?d` zrz?6Y?y(0G1bxwtu@6CFQLnUL3nUGG!Fx>mbHAwmL@livT-{AJX%xfnbR_WiAZxX7 zHdow`{&Qt74o)}k0XDypJ!3_vMehMBU#bw-g=K~)Q{Nj(oR($_{fR!5D`;6WGY z7s66A+jH#eiaNzl9*}c}C)9w!wKJ<^Q?F$`O3v1YwazrP1}iq` zTSi7&k=?bGU?3IHf(R*d0hY3JT)0@dh;DlcL0j?5OsBf5xcUdI%%uGBOZ=( zlq_P#!TX?IV0;uJ5&S;#44Ed5?MUqiB!ryL|KwK{fW%RF7uPY7Iv6SANDJt;su<@* zp{4+jn(6#(O)o#RiL}c|rW%aH$T49-RNbtFY+0g-94M9LZ1uf<8|0^q){#fUvTQZg zgU}lLE#)nX65#2+p+S6{tb*AL+9I6RyR<3k7Gi}-d{!xa6IMi{eGFMocPzm)PjecC zfM}=nzL)kRzW=Owr4=aCWA-Akydr-Yyd3|*5Ql^!lkv0`v0V&vhGDR$JF8;_TAt3@ zDMCXQD7T1FfLvhh=*~H|#0%|t_e%>Rbc5d&Y+o)EIBQr!px8-ph~Ky()!jy+BYqM% zF{vbdfBYVvvlsBP_1d|EY+*=Gu@|L4&M_~aEWlGpYP}-jeQ`mj(^w=)(Je+6VA2w^ z@1U`MNuKe?LP**edvh!5;)F~m<*;CL)mS#Mn>}XwhTuC|BsOx;OU}R+X(V&jcVuKf z9F@Tu4>qio*RpoAlCo@LR-U)5eaw&i3VSevkBjnXSp)~7k1DNWi684bSRZiGkX`2y z?2-S)$-5m;nh)9pb(P*7 z4-02p*fkgCiToD?XY7tA?ddl5-A`+2HOwiVqvAgn0U7cuqDFA8?ST5mTi9X~&wqZr zV4i3?bGdWqCRvj7@>A3VRUy)UeRxGoh`G9ON~tS|!J6NAqQ7TpGRXGdZXN+smzU!s z>?$yr=>W--SyJp`ulj3g{D%~14G*f>^0hWR}%Sf&NYQt=$Ne56?Eg z9!=3F6S8x2OulKS1v0&9Nmonngo)(*$kzxsz&0*L(yc$~m2Y`3&|K+{$8+^nbUFn< zQ6U$#r9oqEJ%PHfm46qJ|a}J#cg%mCFRIm1uP9u2%0A6KdU~Vg93kPBHw~|-OM2!cxhMW7j3Q_!(8ue^LcrJ12H=lOSpYv^9 z-c=vy1|%`F)~^?&Mqy@odxlT%co{=bZ-?Y=A8m#;ER3D{U$D?$T%;GtU+Ch2JZa-{ zq085(p#_$&qos;S$gLMi|MTOqzgg4#07lQQ3;K{Y*QVZYJy;PrZIP%3%71_4O?cL) z;>8T@4`o#j{xsRTW~m>@KCL}&h@HP3zSME-yPv5P#5*=u9=5ltkiyN)9eDY{XrU<- z#sILX4AG(oM9BAuw2otI7DKAjd_<`VX5X)b9*v~ljH<7CD=s6y{1MQmDZvTcbE4g* zv2G8kZmBKr^IJI<>}@`0WoKl)TK%@~&9jW0r6oLbTYcb<&`dJs*D9H>Wko4GSACc? zRJm0W%5x3AvE&LY#@fc|0goFy8)_1(dB5(~(2Q--hlL?yC*Z+fr8X;)`zEZbjKG6I zajF$aP&U}?kQ9I_!bO>Kf`mve5-3=dH69zazWafede+LYx>vP;%Nhwh9;7uESqxO) zx1+r`04%O~JtC}VK<)5*BO|!4rO_JN&II^5Qqvt4>6Ec|H@j>)C^lGZm&DJL7tSeM zU=~~6j>}I)Z+HkbOn%PRG_)DB-TmP99Y6P!o5-JlNae2OQ}^Rcs-U|j&OI7;R5Ut; z*KCrz%Bkv4W~GB^QwX&dy>;g+R+05f_1ds*mr7Fhkt=KgbP;s-lk)(kw&0Wo-$g56 zJE<1}C5}hq(#N~BE{!W6Lbq0LUPzmmeY?6Wlqm?=8Bvl!n;b+097Wai?^p|Bt=*j;Ff) z1BbIhGD0cy6d4ijvge5+$%wLN5i+w!$FYirk(p#?&&XaOS=oCYLiXN{b3E4(rEmB5 z{@u^>dOfe_kLUZ3*TLs}uIqYV?>!&8Z=Y&h7tpHHCV&1Jkn1(R$(*t>Z4gD67QLaH zw7xDRGL7)b$+=?rfoSi!1;2prx<*0;0@0?@>8-M?P_gk#A$!0}v;2C9Gm@kItH5>)dj`$m@I*h$fcp&Lhwn zJ>;mN)(XZ-la?saVI--GLSh!nPH~*_(&qWCceZvYzH9&=97rfI~Uxbk5AwtPpI^?qda}oQtV(1 zU431Xyw>Q_zM8(?$MktlQys_gz276s;8TkXD?2!ks=SV26F}S!e$-ry**l)GC#I!7 z?Rx3b`_kR1iqod-bBM<;({2xZT&px&0?4XmL!>M`^r+=H@Ieju7i*h{;cBNK0(z=1owYSFCjKzYf z^e_GocC#vNF)g5eT=>2=@$;g#Rgs>ChFg2fz>%%Z^Eo|;X8un3c9Z3f|5Nq{d8mHV ztaY4z>R$Cy$HxuKJbD;o&Qd**dvmzVWdb7-c2`di`aC1!RhyO~5ADm99&YV!&8C@G z(s{C6GtyBCEsT=n?~l;}!Db^TNc^tn>NmKG)v1!;?FXS27nS`S!18*}c z(|3>sKC|S(rH$fo%p9d99!QAb=}bbN@P>NM9+z~q;F0EPJ^Mrp&c!j3sc$EYsI zFJ0Xhre*!S@YFd^g>!kC6%4=E9cAJFmbY1w)QD*e zvnSNHZI(r&t=iI3P=T61g>37guGaJ_f~wv*sHNh{HN+tG}CuNOAO3Y7pw zCM8Kfdi1C?$@dIdH^lpC$$ItKo!A#6Jl&=t#~<>(iJD(i*jrOrQH8Io4$l+`h!@9d z31X(s4d<`rX~H~%(5@z}17g`^PrlAL(^B8v8zqGce6x(s9FvmW>@T;TxaIOf^`Uc9 z_UYOKl9U$j{U&v*KwZ#rDTa!AgX(?C?T#7c&tRL_X9|*0`R~2V$nwuHWp^qAFt8SX z4E@~_&3tpR$5Vf?Z5Tdd&vVmkK$ePUlm8|j7Ve8%5J$$|8I3QZ5VJGXCl}^s>?%jl zaY9+N97m{(;MHYCb@lUhd0noGg=!YZ{fBbqdSVuX%1)bJIHW`Zh+;bS<2fW58E1D| z-Y0)eocBWHBf$0;O!8+hTWSKTsP2e#q_g9~>}c63Uf#PI@fT+6s)a7te2}BoY18xd z^U=`Mob`od#Dr*FDv!zv~p+PQR&5m^F=&#R&ji zpvZ{_`);N@b+%Yz2(jfE1nFxCA}3Lhmp8ol)bCbukcM#yk8Req>k;iwmAW2*K_J6B zAf<7+Kf0;tqxnP48M}CQ7Gm3Jf&ihwb;pgM#-3xujnm1YJFz&#jHV^kjIl-onak-( zlQ(ddsNw6i0JT3HCppor>QnP>}rMjG8i@+9_y}dPG0( z6r?TIhv=za5bv3f%IqmJ>yMnxPZ%)bhNPc2M=iN6sKUK9G&D-HE6PbUr^JW&sv4Ow zOghrXfo>Lg93|RH>gl`g5;J-q72sZO*~*>k>ccpMAFw9CVq+HMq#Kt-u~W%XR^BWy z_UaVFrz?^uNH3Tz4YO1cXR-!fw0=ppgmTih*D-dQ@p-%xgkK?U&^rXZO?skgX{IUg zo{UasEil-J+IDlTsAnl26&5EDPRn!@Q*Pgk$;^~`lp2Iaxh)S;y=f&O8j)Fpofu)V zA~?c_Z|6ea?TF!eC2fDZ??3<{6UU+}at1*io$B?pK0A={oQSbXhNl{K3ZDg|x1;x* zKHgLJgAU``GR^wZ@W1dA*#(xW@7WvuB)7$lV;QouQ{v;RiAeiJE@Bh-01^_qm34d} ze1$QF!M2t3v$2o5cxR0M-fc#{$w}5ec5H&%32sA_-7q!nMuFpAp7Y|J!xK`4Mx7%m zr5n9`M-KFqDd4tB!`GdDH7~DeBxQR@?T4SjhXBG|hW&UNpnTNu?2EGVZqSo>u1lT_ zbq?jLbXgNZJBuKjTTHF%bd&fS86rVO*Rri2N7^O=(C$FOk8pg(@N*Atv1CN8nm zpbm~2c3q?is~UJw?Q6XwHk#Wka^%!)6fuj85>Nr>9^T7fAJ`Oyz{x!UDnK^)vQrtcU*lESyDPtATf=3YodM%Iit-P+N?2%1OOjpm z&j&5wtW;Pfg6YBq;H)PJOiN0NT@upLSSGZBLz&;ZN<}gSon~tczjpMj-%d}>(gbYy z$)BSad)MyM-~T8jBV}d5=zU4j>m2J-&Bhhp0ewAgxk*b895!0z%GC0X~z zOv}hfz{ijIG~TMAs=+c3B+Hn{p9fC>0{*2gL6?b)E0Pj2w2$LIQ=;w=NE?R5YpIV* zRIS03mLY|+cAQmtN(GR#@4{V$|6miMwlHB}qoI!mx^89F<-vk)F9t`@r^$}6} z=aV#^sw?=^+8VH1$*}9by9wXheB&K=?VBYWIk1qH)c8d!>@=BA{4uQ2-AFr_`Az{G z$-j5(45!wVFH<^$qaMI7#}iY;_ao9X4YjbVD)|cX?Gm3fW&6>Jl%%*@Wotu%kL>Ka zA`~fxA2FaLMc!?x^R`QI63%{~Qi?ork)rZt81wmG$K@t*FIrB!3=@LR0RfJ8b1!`%WcRS>Ok6-1L}wL~KChmwckOP3 zhShv~MU7^@*|JW_+TtQ;@q1$7tRMuJ?2QgqiA=gn9zf+21g0+dLoUUkp`C4>Mn$hZ zgX&*Qi=Lk&{)LiQxQW`(3hRO{Unbgj)HFk^RTjqxi1DQ_LT)2i9G|5y{yH+5xVyj_ zY%ZOgjE@j_a}J39@Krt;ZcjZjkdeAS z+;p#WcgL#uke7E)NmF^=+w-L$(``hpc*fl$FZL;e1|!8PgJhus$=B!(SNgf|eKtM* z7kSGnJy$rf^v%=IAM@?%0G`5$zouse&CI+PR{%)=>veIL0Uu!T?iWT$g@%RIr$0M~ zCa9{RCrex$i`KYwWP_hIAPunSk>?j7w4E`2zcBpZ(+A4dh|hfLfVbm$&PAxp_rv1R9K@i$L)^%{l_VM5&(?Bf&Cvqw?0+o2d|35hv$Nw9{xHb85FXEZI9EZ zA!KAPIgSlzZ*ETMh!#br>z=#;^Dye{S5u7$Zf{r0vNU2imZ#K1>Z*A=n|@Fwwo7)< zxAcMJFlB$+W0503`9bQ6+}M?HiR|LgI}F!)`yb7ZGdopk1=126;qFcp*)(}vV6&wT{T6TS>zT(9aYUymU94|Io`;c~F15$8JtmeDV7%FbW^z@{F< zsea;~)M;?ad=ES?-m_-n8)_edyBA{@l{t)FjUADQdNX)&y_NJD|-iB|akI0XG zEOGB8n34mm$%5Z6mL>=#KgO$u&eaXeLu2|9~*-3 zVV^nCeOz39691~ehxx&`2+J6VG8mk(OnH|w_O&O>FUlBf)=8392A?oq@b zxGn>E0gGPR{=i30M$*Xbe-z~V`19dZwqiD4@64H#`GIa$W}NucFMd4(O~;!%o^zh; zKRKjNAe0g*HoE%29GovypZ+M1*vF~+jC!$p=Gk4B&zfDt8tv60;i3HXjJq1u>V8Y|NkNE5^fX> zZPf~L|o;}I4I8?=dKfi+n)_zo11b{Z;11w z+7Vy-d6ODaLmE-rTB^>@$nm`eWXRg)x^J(o(~YNtR`Vf@p_49^@d~>dAlWwB;cQ-B zo^=zISmdJQ{k;Rm5Wt9$3=xbNzF2bsiZFU&^rBqhVO9ZR8|qp%aS@(CQ(>Q{o8)o8 zWNI$&ON>|lQewcvJpB@P+Sx5{$UZh% zUe9fRA6N{kbjR8=uCQ)8ZQ<2_3m$bV^^@%|6MVP+^2%tYXbS$)^5C-UnT8z&*p>MQ z+gd>DiQbu``Jo4->}wNLKyv=I4m72~vmKk~q8VY)Uv+T3t|FHXxONT4{{El*yZt-v zaCt{X>1=l;PL7XMc7GM-Vtc*XEGQ}V;fcxj^%HY<%C^CD1FXg5hrFG#$`7i8=|)Ex zrN@o<5s^74SxzB^GtJQvIj_PWV0YLBJux;`vjFXhD#i@%g*;hUSyuucJLCqxEb5g| zTh@omAK3=nmdbIO+S}SD=+ISowWq#weWa*g{-JRN096h$APKPT{>&}lHP200yy_~u zBNpD2LrkDx#2~(UYV}Yu8L(pxSvddiRFJ6Q#!;wEn#fk&Rcl8_)>%3(brFQ`N9TMd zExp3CiB!5yvLH2xcu`S7K>vf5_J> z*H-P>P0_9QEp~0s>~b7OMd0O%?0v_*7VXSU?aez)Jdr4^VuNe3e9y-uey9@e{uqJp z4g~)56t-#P_;5&h%hv5_q`^i5A4Fsbw)<7ODgF>$efO0Bd(D5PusH5;dWUw~7frhB zPS-CvXnd2XXbijj`Ua7IQ2$$M6BF~GAwXZvq?=ol=}dPn>7rInE9vNb7tab{uW((s z=;VULA3iTH4OMekdwrFLnl`ZJ<#(A2uUQSEN*M-hhnV1-sK}MI@8+&9mW2zcafQ!v zi3t!NmhVi@Os(}xLwm)W4lFTNV+@ty%K$yWTDN_(zAVN3b-BOHD{b)EEZ7_>z<@br z^WiDEL-$X~_b*TB%!$8VKA-9RKC7Syq5t54rLco@B&(;AiJVt>Im< zTrk^ndaC2}EoIW6*1M#v`dy}*Rcr)h`?6e*+zNj|1(#+N(igF|Pw`yixAL(@H5VMA zJII~VMBp?YzuNHOhxKlC8-t7FfYe?O1ePmUi1zh=#+yPK;VZPXu%es!sc24u_1*k}4fvw!I3@W;7jvuG9g;mYX>DmR- zTtD7BVZDvxZx)C`MwC5ZglIV?`9zF|x5$9aMh`jr)6!F~@JT@ha@h{|z=}~){k(xP zHJ<-|F-TJ(pYJgipqqI>HyyPtb9Dz$r;Nh@UEk>6DOU`L1%9=78}b-*xhPP!U+i1V zu<1_?dLOI*>?8?EePfn+*yiw_qBC-*toBiAnnCm2aly@ZSKpVWa~MWApJbTZy}7e< zd|Z2z*3|T#T^=nzi{0L=m{ajx6_xiQ@X1Wk90%Q(JO%lMind`VNr`Xeq~4-_*xgXi z!WjlIURGUPfIlG4B0fBW2BpVa&1#q z#w6uya@y;QFRALnlcIR;(@dX9-pmg}1Zy+Q2MrI$$Jcikne*^<=rxQ&jL}4xO3_73H4p0mSLO~AQ5g3#Bw^& z<44*h5QVn{7}j#Fn4u@Xm#pJe4zwT0HR++$auOL~R*<(xP%UEc!OqX#K{rKkAGk#xV&r zec(aH7=!j7|CJ@Q)Tv7X{2UD^Qqk$fsm~N)^+lX-1o~v8&$`Y`Pa4)X51r%Z1+5v2 z462KiX0WrT6ZIe~W)505S}|Mh($UM3<)f3qs(0%qIS~+Que|9l{^8dd13GzquO_I( z_e`ihXRLQSO_tHE?B@5BxmhH#M3!1VeOLC-uK%7#t*)jfd(V^64#w`1>3pPtaqH8h zA=`N>t{{Ic2mo5xZj9L$p-|OoNSF>Q^#wX}XHz8p%G_$jIXjx2QeLBs&@Ps&vmc&| zQ5Bc3`ytCFW}K??R2!=O#Ti{TXm6@JG{2K7`gWBnk%PwkVeeB12In~w!4d5AcsURv z3hwTfL|21*z<;Yb5i8B_qHn%O7~624>PuIfji>yk$kzLhg_958f*$3J(M3~FEJL)k zaQG3f(I-i0MJuSU>%c2nIu^=npB7qHtYYnbIFP)vEv6Sc)+uQP*(YRT&k;Mo$JkY$ zhF*Djpvr7xY&UyaAW<(k0EOW$5T@V6w4@P9ibyfA7mE3@B=m7^@586}eFbJF|4w^L zsi2KQuk$(e__mvNOlFpH3~Hd^ZSTg zCdiF9f3h&hT(sK}y}}Sx*Hq2IC(8e@un*y$_GD(4*Mf4ert*f4=#$+CcwR=uZ7|Zj zJpW#K$4cW-9c`}Ny!v6Ap}N-R*HdWe>2LLQrdZtlp4%)SXsEu#?DX8FnazR{(W3(w zHl&LbobL>={%*o9%|Xi5iWn1KaJ}yA>|8}^_>SQ+B%N_WY~yn-V~41_i8(0^ISgz*;9VWdF`kOrv=U! zW|7_IhXDV-=NiBe#HRS}d+}7$OsO-l?|oDgkq!=9_*v&WR(m2jUs5%Nft-|~GM5T7 z>)Df}Bs;Sl*70RJrLK|UmLVf@4>iB@U8v$PKAT3Tt6 zt;1$r<#)a;bSIGDH|?9>sarS5q%cl#YB;{5x;tF57bCJ(*D-8kOF7h)lpf$@V}hLnU#~gfP)B1~KE|K~_^{SyjV#J34cA{h;*{6ClQXBhmTRf$7BZN`Ay)7H0mJ5Z%qiQw0);w-<@HZn5ky|CcG1*UVkR}q)}K+9 z2=v3go%UHqkG5&-DijLGdO(|Wdr~p$DX&rHio{n!RR(J6P@_S#Snu|0_}OTC#%0be zN2Xy+ym{LCV+2*`>PA8)pw)KjR7*rQj4sjT;PEgH0h@H%~2Us>~In2K=C>=`qa+wgXoe zWN%+Hxc3j5!4Y?s$T~11=K8z3Cgzjvr6RRfi)5MeI+~j;idOgNHwx}(sBr3T&D~I% zlnASokxUk9u!8GGS8TnI^F!21;=n>sY$cW@+Kb{$g)y78%EfL=4-~}MOYG!rUj(*0vYLG0?S-QEJyNC`|4tpB;qo47 z`q*a?+{P}R>#8I?R623758aH9=4%$; z{N(kSQ{MYWD>?EY5jV+daQ(Qvx9|03fVaE{PfUpX*H|nZJU-vNEo$-F%s!2#b2^`VW z<7&?i1dBdKpJ*f`h`7Gcpr`8wY#*&$Cxk2lgg!ZuAOK&xDcsQigL_g2?W?Hpf2pFP zCYG*J6^e-PjL#b%h?TdU@}7(xp+HF>?$HT<>{5BB)Yf*yQVV{sVW)4aY)4{vh2IhrHyyqi(Z<{3v61(_cy6oNw7v2UscZ6ze;Ce z`@IA^{;D@ly@Hu_{PzVBp+^>mSJ=Sp?;=sV>wV(fkb*(i=mIUz9>i9|Uz=7?$8aJ? zcmrM;%atMLNzeA40hhHDPU9zI#JY8b;Xset6x&kd6V2^{m%EBu-MCRC&X9>~DGy&> zjedR3ju(BYh=9Pzg&5c06BJ@8Ll0wA5*S_o4CvGWFt-wo4U3-B&V?O;W$qrGq1+Ee z?uFdGlUgwQU#n)7$BfEo$~I=$xd0U5&Z|S{ZLPv30gN_wu3&R5cVK+T!C`0iNj2V0 z3eKC4l==9>&4=_fLpjwU)G@>gmD~%C*j1M~PnK5uj+9s;Tum(Imn6F?=d(miir(dJ zbdJQmJML8-&jbgeK4)&L294$$-eafI5F2kYg>J1WE37fdsyZ!fhoNe&0SIp0cOaoE zvJreeowQeS$B&kNBZ`_nMPO*-3ZB=Y6$M6bYBL&nr&kZSxXzlQ%lWk&{W!Em5W0pJ zutuNj;1re0L^Ul+Iytz@an?FeFqcnKgFwv#X%@N;h9GU6U0nHSygOz>uQcs3l?bu49VW4+PbVQ^7=;%kS(>kbto`Wh8W2}{?F8+(G$WlubLc=@(gF(eye zbB>H22zGo1n8w=(P4iWqaMsHd#HKvF&4GfsM)edCQ5{|-n`-3%ED`f;_>nbViFF@O zmd#B$wcLZM5nEzhQk(aZ-m#jth}1+YsMb@Gk?0UjM@5+Q;Y~(fY*bMDmK7)m6;SOt zhNT}JdEy}aVL*n7-kkv9ggR{*1Pcs2u;}8s*dvYH2+)hZ*g;UWV~HIuMtg3s*)fZ3 zM@-cp(%8it&{$52HF_%PIo^RnD78<9_5Utbgi#2yicUSzC3rRBGF*Ihf}$}@-m$jQ zZIY0MBb2B`7EaM`zf|?oJxkt>C<_d@PDxMB_P!Vq@ipI?6Q{~wblJ3Ty-jT3V`pb~ z-P^13p%kH;RW;0kBk$^rLG^a{eA($C_^n&y!2K}%@S-Y~ z{!q+tX8~eyEOKavsCx$~qe`eFZrbziQkn|qrmD*qyn4iAcWcW_Y_D5%^3lekFZY_$ zqt6=}CZOMfTu7i*jju2}@l}wDmcgu%k)&JZLrW>SH>52=KG6@Ov1;Ei2|hGT)=-{W zWs6PU%N}v}3WvLK!=l^ZTygFD1St6Z)B!uw{=v@v368BXhcPz{9C2_wS1Jk+vx~+b zUq|hp{*t^q7wR&;v=mUFqTG@Gm*hKt(d~3l2DUk+qCK_xg{C|p$J@-}$^eD+X)=-s zW8uugH3^x*F(QShlw{?5qey$FQkmKSbw}i^z`H1$~MgvB`!Dn^CX}6d;XQ zYXe3=pfG-ET<}1EdhZKRr++CxfsdyVNguY6Zx1)3dgl1f$^t7^SiUGWxD7pBagJ((*#v7&ol+S?8%bt zi*BCVwesJcNx6((G0iN$yYFVuO9;TO;ws-wEZuF0K#yvVO7%vLRW*%8TAw2up33xA zDWTW;w3BAxHT7bpO|lG_r%e{Zo5k~ew*4mT6JM28`{xQ9K44cBl^*wqkKpCwtGgFw zdDA65!<;T>xx99_mKo#{AQ}f23K58A18KZX{_BS_Gx07)-IK>KdkbI!r$iM!0i52u z&zjc*pjzkZ8x%XsyJACQD_qxL!e>3`JVQx5~uBBEi{uKY7|I2LQ_ zaO-Q$Ero5elPwWrTqjiR=eBM}J^7Aj^61gcbEh}TNX3HXTFll}bxV!QwLYS0;bLk| z8{4Ym83l-Ssti%nymTR_r{Y;5`?#0`+Hz7kh;s~S4-G3z(HL&#@gqX0^2_f>gzS4U zNmVxJi~(zO(%{mwTy6*oZ4@+BAkGZzTArL?b`t1~NKXySdNQ38JXZgZxis?SkWT>x z-Hq9zjI4SZON&hhmqRV?N%qPQO`O(7x8(a>^*n1QtJ8#j4Hp-Lx}Z)?tfTY6EYBW( zDVB+9G#zo5jspHkf4@1cK-Q2fU3iyITQdG}{2QL1aZo7qYk3|}3KT6~wrv3ymJQ<| zqr36u?{x1_kn|;Lvt7;~QBDJkjz_P-zAPXOXvmrq{iB_`uKsjPoWX>Z%pUza$Ha6A zEuZ=DrNuH(oIzK-G*Qoz5s)jd%Z6c3D zy2tU4@79KKXrx$=Erq<6hp+o|R>Vi&{JOBH+a%i$MzFTnvS+PFA-H zh6fhs0(32%+H^C3(ahOra)g=Oh)|v29p#z zxfI>acD8EYCm$riLC^ni59a+l9du2b*u&zz$qdP;%bvzmKG*KE@0e+@(sg;ui-UNQ@!cFt4-d5M0pkZsOm$x}6at^19p%snj|$Y$%8J#f2P$n8HnTBf4LcBZa0=ZT7IW4W%{@fB)uotPs1=i?f7QGdRLl!Ft5?O z3tYu_0(D)teV3})R=YR_BGWm-fOB3 zGf@k{q`InW?%d3<-MvAmrdMDoN~|788)v zb`x6KIK>(G^nFaoO@32e)ObH^83g<$wpB~XiZj6_mX1lH%K8Iyd~LUZQy zY|;idM}pWIJ+7*1-C)TPhmEuTKcAnoIjN@9h= za5U{EvEXDV6te%s)gtD@T$KQiFE2NL)7P1I#O&B;RV+aSy#e z-+b}qWMfjZCL@R;Osg!1+S%JTexK9{F0(Kgwp9)yVvTayIA<~5(D1S;sh-I3=jAyRFO4g+#n1ISW4w|tyQ9aNEL^tboF=*olq_z};22Jvwq9v4 zv<{^yvYVC7DrtL+*yxU)am!v9hg<9<6mX$hEQ`jpzWMPFy{`~%F^ApDuP9I^Atg@G ztf9H*IMm;f5UVfMFjiD9V?opNIx-5e)8NzUC=+)#lrw5lTR~yZIihs0l5>%5GRh|L zUE%!{QP}mA^9V%gNS~*C3hF6m=MaDf5#)OT)Di_Xe^V* z*FJCfah|h2-+?Le`#7?_`u%3r(@VLLw!$t$Qz?LLoMfgxCYuK^|^T^NH{w0n_WOjzVA%yT_QzCLJXmp=0x$x(>Jf6{Mx5>+`kax0MXJl zUxN)+;M~qxv>DM$>v3S9r43YHsICF=F;Xj|f!daJ!G|t~I-To26`lA8Dso=^!VC&c zVlfG35oCwH-+d>*b#*fK%%k9;Z*+;TTF&U@U5N?p+Olx*g zCjw+$=Jy|DU6y7NY8*zw!1Y75@TxY!-PNw&iWxdU2TUICbwqJ-L#S7ZNbmvbi zgc0Ga6*EL!0<8*CJ`6uRj92e_W+C6On=a2^;pQGG$Z3G-Wp|^dfRj5u$d5_OENFdo z3Q0K-KIV8t{Jx>aR0>sTfC8Bde^VechdcN11K%J^locF9qiR z4(ebv>P9}Pb=C>eH~HzTM>j$7L;~JtTOk%7M{iMzc9}!e)=%ul`K@P zx{{ilM1JD5Vf&O6dH$(t5C1zyOMTqGooOwXe@P?gXf@J{kC%6Ynxs1*djn^U+BP%K6C&m~}(;)^?^pe(mSm?rDRiWC z&)8S@HY9`EGE}z;dtOr+ z41|GuEJY3-C_hmCj=MpQQflCYtAH9#<(+e)z4GB3cD6ZU{vgHVw$G7bCif|#&#{XS zj9&j|)y)2;pk{b;%pb7ThAb#G&rP^x=V=Z+67SbBMJ_GrFV1?e&lbMm0H`RV?PgKamL>Y~ z=$tnQU7{WZEYx;iWsfvEh&cTSJAEU|K9)_N(bW;M;rRIDqg`neRJbjSUNcdXxVyH3 zD$4!m^@6L}CeL>DTR_ZA-*OSrtrZMG8sd(obQ%u)vAiNBw*3P&(r!@H`WJ+jFWaev zjehMz?MH*()~=#zjTjZeYaAh7S$Z(sa0;zZj3$Of6GU6RJ_0-dKLY1B?nOORsL>h189K$nYe=Gc}VI(U(PoHi|WLEB| zadn%Kc+OQP7g9*jZSwy)IIvlKAI{9VB3$W zk~Z(R>7ZS!yW3Oy8$QK2;(G8`NvBaMm|EMs@22X^@KtGPit0U#rS99$x5GXeqasIE zriDVE%z=9n>+ECs<8YM0(Bg;+TrXvXJFRu}G6i4hz$h}5Qzf#KBA#lC zdS~01h`!m2bM!^Gr6AhkEJHcAhdb8w_T1B@6T@^yxVZJEpWd8Z&}w*qadTlds*|xzke|_@ClC#3N0K_IEC+Xln(a4qkI`u9uE0; z5fUim2@M>P1Yj$8>`Zlsy#W4mmv1hCd;{f+jrK=$h98@bF~$W(FTR-_cGV4=9f708 zf&cjukYyYIphQNH`^Wx4(+*G%*q8wt{kisFU6%S!$Q%WD+yy99;F3|G{SU|-Ir=9r zJg^ssLYtNAe_X}UD|8pZbcrl53iCJB}|iGS?&=*#}1fIw(-DdcVbfUn+r%*CI=T>SKB@_#~&@%o(Ge-{z6~d&C-Tj$9_UmmQ$iZg#`oU%%*b%m>{nMc!O2A*kXeVUyTiN&i zGPkdPlBnF#0+5OFVQM)_@<*}d2NJU4g1tPoIwNrWkHzZJFaU;Nt|=?YEAsyU8VNVI zC8j0EOMjgFx+K_5KQ-8m-PJ`q6q`BeoeCv1Z_dU4Drxz80p`(WXxo!M9WGkuzz}iNlQR+{U zJPNB72%s#|skwb+1ybgQ_gA}kgVNz9W3urGNT@cp&&^JGEZzX5>d3)M|2qr$&)2hJ zy3Raxeki~RUIICeeWsLv-%d|Y@!7LyhK{``TCur?cyZ}*pF;J`$d(<3pE{)8TijdZ zfj51E3->}onf=g(cp%)Nq@r>OBw(13dVR9e2=&lD`^54Q;5v9N#RA-id;AkjU`y_Q z;Svz|V|}jPzlx&M+1WR&(m>X6TEgym?-{j~BBBnslz(dcW_vx{NF<*o$~ zy8$6`-6EpC;F3Grx3tSTeOtq<;@2+@PQj=%AMgFra}wn~mDw#1SJ2uc?pvu<^g$~| zKfwknW03itmHrjmkO{p(mylG01SWR&gq)l~m%aP`6qJ0=tJtd-Ro4j!38;phzWMy; zeH>+gpdSrjI`1MWO!sy0+lr90AJ5WK2bK@JPCt&-Bkt+FyC5|nTl{*X^!26|_nVPd z_4~a~01%;n5lkWJ76-_*)CyZ*SF2(`U(AR#N%=17GnuKuD>+v*zar^$WJC6C`9>u&rH|p*Nn9#A~_x1rfNT`-9QIPB&4w8bW z?MC$?cqp~*g#;VqNHh%Ito+$-1bP?unU>qfEXz-2o71v{M5;vrR(URa@eD6=o|8(2 zUq?5NAXN_nncLzDw4e*l(aZad34&AFWA4)C{^k8GaNu#=j!m2OsT>+AW2+oVow=0q zc=U}PH*=$0F7@Gi6m7A0uBBLPo=+K$(%I|fWz|wvuJNfVzo~!MWAVX3wL8_9qHnB! z9{4CYzMO$TMDkCW?(fckP4C(XVtHfk!bVi@T}{o)yqvUJGvyB>4pTA6d?G$AWxK#Z z_Z@9*QP&Pr?VD|w_@;5&^A3I#$-nsX$e_mbya7TZNS6%y}Pzldt^tnCN$%XmpRh8uA zWGnr!M=8my3NUP&Uw3zfJBA^3=JAAE(w%#e!Om2a`BvE>igM9zWm#RK8b8`sM3t{r zM*TC?nOUGvM{8FROvnJ!;D(t}&&v2_|3Ha>l~vv9o4?|_eW|rhYtm3327(C;*js@) zOh>0ZJToz2sHCgQK4EBx?jSsYq*T@+EU(D>(d0s8BFUZZA21Cd-Z+Mv`$76JVvdZ( zT~9Br0b@QyrtQUbImp>i;Hk3F^&Oh${g7IGi+a$<;U{Gs!=|7x zwn5aj4s^_lI!S5!-c@BGBvg21Ezy#*S+rwtKvWcQ6EeT9JpU))OPc;o5Y>)z|4 zZ610MzVV5)eR~+o{ks(juB!pidMR+qVM^Vs*~m`H*xHp?S@pD8Eqx8QQ_>FkI+59X z<`5^zOx*#K5wuP2xSE_{cYgHkBPsHzXY5tOi!+(&KiX9&R6p0F`rD;`-m%~*a7+p* zK8%kK1g6dg5cts!wsK2?aBQ1}NX9|Y9Tu3S-a6331u8~|qt3}VI?KD2O?H*3A7E^h zO{LEN0b_xt@3`{J3=4y?AEDf|Z5)Q6T59)#>9VGF2{oS9)_Bvcr6 z`mkA6OmuK%%8uB*Jcoz>TYdnQ2Lw489OR3U6K&)K#~Crjor=h|tl}uaT=~?rz_v#m z89()}6N)iO1qvi|J6ob^FibBsFR!F2kvGI`yood^);{N`bDW1U4pTNfBZ0+hAgTIt~@SrR>EX3*jXkcIws5&Pla%?%NjlgS!Z!8mojP8AwY6Ns>(Wy6&|r z>P93n-Y+=3c<2+n{{E8p1xcz6jg1B>)hB^(ng00s(LrsB4YX>3@j)jVhhei{l>2w; ziQXfytU?BtCPtNdp#(YWW4t(LuOBSN_3BDVN-XO@^|hZAafeKxcW`6bJ3mlqvAnCY zs!~NN)_2>?vkbrcLG}+`^RDIU=kh5Scf?dt>k&>feCRnhhR)e`b34lLZQ${{x7(z?D13Bw8H8&T!5G9t9oV--U z(&2b^?!@7mP>83StBQ+&s(v9V$I zwby8Pp<6jA@Hc8)@yJd&)V9Da{Y3R6pD2TXf^=U;~a$foP%*EbzA;urargUlAa z?TA!*0^OEA%Q-P0KK&bE96; zlRJd#ifpJ8KCT(}zUTIT8~+ngzOiwOJW<?lSO9 zkcTGy#Q4Jne*?n!4FGGM-ql>?lFD;~F2#nGwVvy?u_LyP9xbszIM)s5Rcyj zdYil|e*4Y%nDkGt+GoX>_*_j;D7bsaD-Zrq{IBshwn3h#2Wh4r*db3i{)68c4Ag{LU;M8##-@vbK!;69k$>TebN#~W zC7!8h-@Ul(&6=It`;giV6~XyQLVpJ{P&f%71t&=yCzyMfXfZgWG1C%5bN*vd(LEs{ zAwx&UIy{#fJlR zW~>)<4H>`tNr&d6NU!xx5>kfH#4q7WckV=WptAq^X#u(q5}iC*h>mn24P%w7Guxz> zM?TmHLYCcddAZ}-3sQnWZ;QLXMGOAi{Rc1j0IQ+XT3}$9D3Y;!)&h;_klhTV& z6Kv!=eC`^Ws<7@w@sIlQ-LoFgCcKHPrNw_+-hor)Akz)?c(zY{kzU zCqcNO;Tz>R@-HqR_dYzoXoMRa%Jt<>$1XV^f0ZO|BXx4C;%ZD()HKOr zvzvEJ97OK|;R_~(JHZ?tSoPr-z0nnxm$Onbr^BcFBDL+qHn&b{YZsLdUDeDQF3^!F zHIdj;27{@|U%H`?*@lgL8WrdLMZJygME3LEGKLsBH3 zu3(8?extldejZ};VEi?A<}EULueXV8*uQ`4zyB6|7c*Z*33L`fUX;2`DXk_(!)F## zQ?YwXli!lr5&xM%aO0+*0q3qRv&*Sx{Q3bNjmx4w)#2~@23Lto360KWRfXTC9BvxE z!LGdBPs}D<@Zv8hyG-w=)IBzAJSn~D_5>dLIki<-@rVae^8K0@c$w^vu5J5%Iz2v; zp;@4Ok2&;i=H2s;PG4k3^ytFnz%bb>sFXYL$F(2VoW7W7+5dX&e75-j-wdLnB8B^< z=m2QN*>&6N5^T|Qhpg@G&v}AzrE;2EUT;JinwtJ%(r;_B9eeo5694<>3*=w!5mtG; z&;i4jpkSod&oBJ<-)>Q0rf$h8uuJHgGcs3grE7f%{N55CvB-9o-aQ=eWFR-eC$a$d zaQUye`i|reCAeKEq-c$nvn0qLa-IomeE*e$nasvOF!TK_&og&pHMAukQA+Yq@#cu1 zeeW>ee*+9|r7hb-BW=GuXK4q$V;bNc@gNSvX z$HQO*VF0~K_M{-1EvDGcbcOlY?uB5^%FQJ(zLF%Zt1}C%w)26U-Sz8q~btRh&xZjK+OkS^KH5xDV|koBH4kjCz9U zRT{aM@(ra`_J5B*xETB)kCLE9LJY)b54KXzV%sb@xfSoS)+RkY>dmmRNP(BA5G}u0j^uPdxznK_ z-Uhm?ChL?rcNRW=Ff&+m)oyV(vi_|d#l6ua^P%DpFk$XCX#XAis4Mjj=nRpplyJ+{ z`4U#AC;FzYwT+E3TU352m>L)Vl!ywn>H*`#I(jZJ7Jv0huxlnLCK$5^~qLH(NgY7T9&pxNt06w>ZQHg!X-WO19Fs1Ze zN|JJty!%!%KUewIg|fAFS=S9$)J~hzWaK2G#2w7}R*V*dopkD#a{(jgrohx-zw7Ug zvz;}VZ;O|vA`Eb*@EAj>WLT7wKNf<)yK7+7&}O1HByucX>HlKvEyJR0x3FOagh9GNYCsw! zrH5`o5J5nCNRjTLyE_z+4hca*8iwvJX&5@ByW_ik_I~#9)sOeD2O)Fc*R|FeYn^9* zquaN+rilN0eY{~rN*=KuoL3@ZP2@in#0}U`6OvQEYwBLml5VdH&xm3b6_7DAMrb5d zOL&Qh$Z}2;|DQ%{hl}_9jc%mhbt7!x=TvX^4cnb~B$Wnk<-EN?CGBQ+tXCf^gQ8Lz zXXMk`)R#R++CULwbrhx!q*Q_YWVY?Ivw*PQ)HA?BE`cRS`p7k#Tv;K<@?duz{)M~H zQ<-7%=f@B#`03~V^@}^9SthJDua930eC{V7(x+D6r#2PIw?LeH zA}TgT^7v_*hzDQw=Sh}#nzZ9Qpb_w41D$Cj670jnY6Qo2c?Wi7F7gVH#FoNo12b2q z12R1^7oDf|!hR7d&iOpv8Z@kivnuHTIllSWltP4VjaNT$+dJl*$)kGewRwxlVjN`< zFud3A7EqwKDS!9<2cH?Bbd0l50*7GlNlxv*MX!|##g4Xfhj(`<443zZCKpwp42N25 z4vlYpMQEUDGn*zo2t(5S!~gjy`}6m0$P>#ZysdTIQeGSTv^-jIHheTvs4FWthZQ3i zJd=8eJ~KZgJEC;K(Dlfjyb*yD+75nrcvxe*@mjg8ir6jEtrz08xJLWoyiB7+YUaP7Fn44r}h-J3)oyA4?hFqh!VtgiBtHjUr>7N zRX8-Nl#=M$J@wO#Zf-y23cZkd1r`L2LA&~^^XqTUKLfsfjvdkb_dfOnfh%O>OM0O| z&2oDmG*FDP(!Nsg{lA62pB*WA`juWyKW4d2`U{|-x8|Y-)8c=<{-yf`%m(`NH=y|c zpcw$ZBrESFkl7gj5cqzN8kC}#RF+e0b68tT=W%%6I>Boq9virFpY-x7q0NFUMG1d% zclBCqG|g6)jGGhG)+k`?qU`qBZSarND9z8ef-xfK9thdxd4BiS5PaleoPjAMC)1~*g>pQX1}#QhBPs@!6NVa8w=abV{ zdJTy$UVmC%%JjOJJ&WgWN{20v&0!uu zyu)@dxlL9{37gmm-0c+8V)|mc(d*XbLh0-NRu~Bk$R?KT>pTXvlddIm87n!XQ98w< z*M1snFY~^;I{y7rhd=Ym^L$csqPVPir$8~wcB%PnJ2&Q=4z3N5#jFtZf;Zk|?w2VB zbOEd?r7XBXD7kq1hR+tE^o8XE_fP;X7LucC1TK}h-AuC=T3djf2C)ZHnObFGD*LaS zh*WT-E9T(~Y4mo~V54gl^!6^KO1_-`RLgg7@U$u%u=W?~BbFtAJgxcIK(54lJ(7qU z0n_NnJA7iJa^SI25WO8L`1=j5&2q~jA5WDS&4#}z{?yqhKLNR>=s@0h()WTs@3w!7lkw;yMfP!aV}&6JmTrZzgVA-si>MEywx@O|lsuVuMEQ>wRT zCPvI@?TN`1TvazdPkvha<{V6+w3nJ(-?jXrqlc3`r+AFH=h2US*E{;y_uFAQDf4Zu zs(Vi~W%tX&yX#Xd;wzla#RL`6*_Id*t#ZSsyp~gJ4A%o3EqMegkhs#82X}WMe;7GW zzwZu66nYj~aU>LZR|CSk24WYu5P|!z(F5e?t1UI_zXPvC#$1)f+EU0fv!QRM-E0;> zwpZaHo?s+6(jItby1yoX6RrdnZ9D<`ta0kcD><q{CxwXZKhPJ1(P3~#@gtcs`o z4Rt{NQ+Ev{{ubG1v>(>%)2(LI{q3qqkWP*9(AS>7|NA!ld&vFb!=7NS*tYPec_$0< zL(x1gwq->)cD{1PFaSx(|L;8HciOn`^a`sW%=-oZz9{eR{$BW3TpZ)XwKj`yeBgU# z+AS`WUj}9rK5o>g^|+vou708V**D*Aviv+Tt~%ny@8KNNVt%#s&nbIvc>}(d&b|@z z=KT_FwXAx+BUoi>N&IuEKX7ePu*zMBkJ3W}rym+qv1gSmqH3yYqY+&{GCZ6|5nU3S z(6`=?*+1N$)lbA)o;2+kS<8aU-o+Hg0vq=#`+2(btsJFu zB2Ak=olb_rDD-44MhqWvD=EJGY?hEpe`WF<$Ro`LLcZSv3x2kNKy2PTWy1#`foW(x zL~{|ya!83fCK_tG<99#%B(z&nx&GDt^1vDtUK@J)m3;lWbl(^%^mu?JXy@OWwc<_jdeYald^?_&q3Jpc_SALt8y z=P}Z{wsYd#NxTm}+npYPU^rH8iJg6bP<8NdLQq1`L%WWCU_O&W1yMu83%d_^M=fA%J}1rjSW16Fz&JhgMO_|I!#o-UG$B3tLw8okJwv@ z_xC^!&VEaXd4^m5ah3H}3((Vms7hpf`Qgqu)S?SjA$&N(dz_z_nwzez+g*9nN1DNF z3C#_fhFEDwyl&p-aQGJ|G8OFob3%O5E$ ziZy<6ZycwZ*kaqwyBzBIb9!$9s6lZi(f4p{y)WVIqwgzk)>8P%8=;jzf`!jU2ND;4 zwHE+BUi=;O@VA8K#~htS^95s(n~lG_iWWXN67gyLvU1o?V&EyUTEsR3pOY?x5 ztqd>d^KM)eI@kZV-i7)<*>oo7JMsJ_>Bdu*fR)M1>0H?{iy`NLa^AjD7432x1|VOrdV`190US&>TZ^#7T7Ec6_i;Q=6+beMI}6o!*4Yc3xHLt1Bn_SdagNB zL)TBQ@d~v4g7*`Uw(A7s>X64<+E-!u2gFiL+0OJLdR!gEe!K_PGOv2tpE0YFtK}f#wq6ARKU>eCYY-ZTo4R@rUkbfITL1oSFd;!kbLx#mPI z&_5Uw6m9!HEBqxcGgYc*$tSpKLc2{eleJi=nu|2~K^vnDj3}^uXh@38e!ENTq;TnZ z*vfZI;$}YFf5`@c$v@oqC91}>`v?9%2qh-bf0AO_<<`$8R0;qFN73f|2hfP11GR+d zsld0q|8y~biI}GVI-&5*JzI*r^?cza?9oi~r1EWY)14W&ebQ{J(%Rx66;|c1UltRa zuhfGSd_hvN&Ca(B-vsV>=c?`TOu1%jjg(7|h0;_-lYVIYl$A^OWS=bciKPK;ZJIbz zu_v{48un-DHP))}dEl5Cw5hX4tj#>}dbie5s1Q-3M2gszs4K*p;0*s=@^K=Px!(uc z`984{yCArXLidOFSv_w7FQ2CiNue{GqOW?ThsexM!*od22VAq=Qh zK3-)zpV7o6wl<=SJmdSb0;C)An#d&jO{>6SYcij%lDyJ3$)IirGy3P%aiDZ+!gPX{ zMFI*YS8;JE0@ur37G>uuj2S0OKcw6DKk6|>i%MG?FVxuw=TY;DozT;QS^$c`Y2a0@ z^bc(5VW5Pl0&JqvgFkNIvG?jdHIG^362nf{AzJTGK4=ZV{{$Xt0!8#5UWEFbN3`}; ztX>B~;cVi7zrU?m;0Uh*9oUz0m2`tl{(a<5x#?P_;QQ*(3G zs2zX|I+tnv@4LJEAdcIum5oLsYw{|K)A`fv705U3g-XBBML~x955Y;9?id1bJEX{0 zswXqo82B#J?(fj}$ztR&Fjh%B{+1<1^39H+XgngKfU%f4Qa^FOrl^d!*HbT{BzZ(SKM@ zUi(YQHEJ&Kskf}qJ-tj(bb)wm8{QUg-?TM*oRqTXHjFlV5SxDA_&%U{5;mGTWQYCv zU5tZbp<2Z+|JrD&zeUP#RTdyXx}soswftB2F@Ji3T2y7s&^j#D`B>@6QZ_zq4eME% zTz%P@67*$B95MU;dMSc%_Z7vaas{Q+Tpjdm0L~HSVm3bhVZPAlJH-#(?~uvD`NZKJ zcf1cj-bXRFHr6R8uAJyHMGPEX2(V+E8OAtpUwx}j{V^%zt4-+~uj<_;K>z%gZUj;| zz)nSq%LknuET#?%?Xvmv&%XJ>LN*c$BsqT{5xpI>y#ZS9NI`c51L&4a@JU&=zmLMN_*S^oCXwcZ<WyUp(kSZ^KaNlAM&^!U)TY^YH zQb|xG026-6SJVCu+YdA5#`h6N z?9Q+`35>gTiQc{E{|4G)UW-wtdv#y+%&SMMq8!8q1Z_&KRj~5&Cz%Z<;~f=R5(MrG z9qlc9QRaEU=uw-CLT^)9O@R;DO3EyFYYPsCtlckqN8^ZIWj*R8aJRN7Cb63thjwO* z0V!ik+W-vZJ0$cTIRJ=-N7uI}L0$%r!1|}rzs=t-U57sU&Gb*xrRmpA?T6zn59b_6 zAGJ{qUp#0O{SWf}ku+o+dDJY?GLBTM{;a{9y4?x>nCPz6aMMz@8|TJ-Lj%<{MX;et zY=0y_t@(DqS7=~v;3~YoZyX&mVgCH(%%{!a7!0+{uUNFcSRMjF3>m&!b2%~?r){YE zum)%Tp>9^4@wMDopWJokJ%I@YT-0n5p`6BSe6P-6vl8q3mdDFSq+Rg1PulNQo8$!m zMw6d7&?}}iHJ~2%+#uvs!`k?mN~d4Zqt9=nbO^)J?RGSmPKCy`h2S!3LRB9V41>At z_`SL*z8&9F@WgKpVdqCvMzbO3j??`2Z={oYVtJMGYVNUI=tpEgc>DR5{^vgUTGNi`i0c+m+6dv+O-v~@=%mZzgI*!*Sl043fc^FA z+VS4Zr=L$R2Pc%X0^~T>C{7H z_p=!?rEnUx>u@@{7f}#+wqc-3kcK0rLM;B+Mr?oI%E&^UWE~x*++bql58$iN9AhWZ zRj{6;inG<-2Fu>kijJ$pb1iGFXZ8`rLEq#%cp52VwAYr%Jj)6 z(TDjRz)y!qU+%7?5Ip}zI&RbOhv6Scjl&uV#(clQ|ybdQH^zPVXvBCMZ`7ni)`@_Y&Eqb2b?e_3r1hj0LV87pB#nJlV zic+`S(C=`cflPu_U%Nt{CcgJ{7(mgs(Fcietx&sVQzBTNbQ`>VB> z``4uN;#a6)o(7?$r{Zr=piKhOz1Wg=T0=eW4iVL`Oaa8B)fGqFr6!VPZ z$^a+qU$u>`Pk-%fBpDod6h+J7qOMGO7gX3yHG6rqc%%4Ygq{JqR2uaopc;L1?TJ}# zRh6UMNRTDK6H&$SP5^VRbLiAWvZxmt9_&9Jc`--UGXMTEeEgTM>%OfyJ1#27poQRQ zdfx*{x~dJ#o3-?jV94j+g~@9^i#PF$jV<-PF-xCtV}hOp3Bb2zn830Z<%h|Qupk*> z+y=|?B(RR*9a_o~f(=g=x7&y!gV)=`HHQ@3fLDqT*Pj+RRe$`t1Dw%rIw*kr({9u>{Oxv2V<*zj1h6LU7+!M z0&$$u{k-@?MiR+BQ!X=y+H9EjoJm1|Vwj9)YwXO{{5mX2--%tgaK_{Fx6F6p$N=5g zg`?6~`nUFr14P~%+~9N~M!vsguH}*O&VPQ9!hKXHK6*8bJa2NL?*6)4L7jE&JFk>Q6lt%#Drg4vh4rzsIAOj zEOXgEHK`8N^L;Jc*z;`gHq4EKlr+3^hzzIgdC0$px?izKUes?>&y z`MZtJ^;Pzk42u&592+KE?-)GL%pN#QO<|k8b8J%FvO2oVio^-Q$9}FWO2e?TKD|ra zkl6@(fL{J2*kVmEBloxn#NPjI@$s1uaQGjAMj#2Yz?R%zJ*vs}Qj}8hr;P`0fyYc-;2=4Imgo3R*I?7^ekFCT}s!3D;TFZ)9>jB8*RIJ{7j_*RsSEIQq z?kB=eN&@K!{(?1-c_4r*naXc#^P>wXI#zySFcSd{9q zv}kOPq3>l+NQ|=dG?xE(p}mt~H5e~Y@0TbjQ!5XhV6U$GQr+bs0FHeKuaTw)%?l4e zd*#wM29j!v-zW;CuQ|4ifDLd+Ia?noW5%HXRKj_76#hZW-p3J2*K_Ee+-fNVf0Pst zZyozw)5>!fOCxnxC>>E&%DBzxUv8l4-@4oufS&qF`Y7XYL@pK``(FV=9tY{&^;dSC zzBihvlSSWAR@7C$xN-L@kODGCK{y@R$OrZ~u5;}wX$Z=DdXS>X3|mA3|C>r_nn&3_ zH7(3>3EZ0Lf}D%3L!U?CMVEPNZx^e;9{2V-`?WzcrxMmWIs!3Avgc0w_SItF@TBxl zJ6u}%(&mrDdRzDu>awW9_G{j#U8NqHRk+^0asmcIfA7V1XX|CQBLcRDb(RbCVET*z zck|1~fcUS6B5+vL@Lk_j7J+$44Hl5e_MO` zi&0)Hu`zTjK*|O5LdcRucml=c1#rdU2*`z{RwU#1vq+F0wMl^XSSW>E*tlQY8?F4T zMeIydyEv3)YN7lsGEWN?eyHm)p^HD0*U)34ehvK7^2QkLKUYj5?OzQwoJu3H^kH8{ zpU^y*y0;FzYjz?RTF-=YK;2W$xU$G#6`8&#=+qPa^L0YZXv$IFe3kROnRYFd@k4Q% z)E8EboafFj#=esnNLE`;^Pxb0r=&moWru1Xi&fBw#VimI*8XuL88fXqb!|ZPc4GKc z1e8SR&)Hi5e%79agno+H#&@sSt(6MFX7HKu`KQ!0HuA4*01sg%=z<>s>tLe|to9Sw zL-Gbp1grEtosAeos)_9|6sUUi%cx}$LMmH5`@UXHJ?Aepg!9(`=#L|ziYjfo&~72k z7$MTsl{nv>=0`Z&@8#o$-YZdnP9x#&=bxosRQ36|kOe*h`Ve_cZ_TtSzlthC?D6jO zA=&|E(~Vc&MRSE1Kr;#XQaqJnrZ{)pOTefMr27i>h0x88X#?j?tFGfx5=DAZ$a$yN zdhi@DH1Hyl>7K5?k5!V%!4H&ps1-pr9>d1)0e2RtjSGt67OW_UqnKRezfJA8&MSV4 zi|PkGUhT5)Ta05_NMN3|E!87wegr>_m@(01%bIJvlrU^lK8M9)`*Yqz%Rui+!|D#d zbngC7&0%6f%2}T-(py|AL{A@`!objW{m2m__$xHfesKxZER+9k!=cGtI9;1NA2mWP=l#%?BK0zD>s8hszyfLkM`2iP7hVh3O1x2~QTPo>_X9c->MN-o^C}FGh zbMDbQl7BjMowGgJfU4W<`o{A8Ur{ZKjuu?g!Scm13RzgtkFcf#mNqLR6H7X4~8t+4GJm$Ur4k7T^-H zt9P%Mv@dWM@}Wfi+39;uED6oGZZECZ?$CZ0DQ=}rq|8w}6K4g?O*4^w#G?8PO~Vf>Nc)}uhFFZ}wMMJMzWXH&nIPeV+| z%Vg$h(9o5ORm_tPwPQe2CI9^U>rMC$i|hbrj=O4gnESEvKmi>=pu|*FSR(s1qKgS= z6P?kY5+0%v_u%)vez2}mz&bGVD@1`-`MpI2j1pL{L%O6q0TwY-9KNtj2EBBqeOjao z$Io;0bkPG-m*_Q4DLuzO?|KW+JxB3RjI5G@Olxjba{kPHQ<}dAZR-C1S&EGwW7xsfBF?Rd%-Ye^LjGIw7U~!O(@vj9_%dDpHMS_|Tqbaq-jYiQ zP&p(MOqG1(iKRFUOcQ6=LUlK2P=421$-Esr(teR!zG1DkE)o$Qp@>T*<_%f-4b~Ai z?8+EIySUKTywM zvX}m6jd@9KUdx=7&8eYa-SM8-=m%yA0ZOLNl>aFJqA0+9xq`CZ`!eQ?-v$~8sOpgw z68-KxtGYcR(1JM{Y7|B@|4vJcsY(^mYZGCX*O-k3!jB>Qm?nnPIj#8AoK0cp1Z7kzG}QOm(3Yc}4vu zVuBiwGqyG3`gy)e7F98t#_LT~d&icft50CuvKO%@vaEi8*|H;fJ)(f#{(a4BQvV+f z>)kt)PM1QI6@OZ-_I{C@ZLh>y3m#KWX8wf5TmN#5vbO8Tjd#`BScRT0^fwne3~XMa z-@cY$WlJi2@aVRaFs?@!*G*FfhO z?g4YzViivOYZMeEm}O8l(VD~~kj5S?z6eO@S*{4Tf|=gkZ4&u7QnWnRL&VdLtT49r zM`^z|A+3Y`w}Se$K- z`8CVbhXGJPm7@Dt+TZm99g#mLaffeN=)De?X+NP5U}2%}!-a7j!0^9-;ehF}{SbnX zyAx$dh&NhSnNoSH`5(su6D4R_Wpmt=HA-JoQv_6SH@YVn@{e*eIA#c$Pp+N(Z7{C9 zBJVcJOvqH|hm#Bi$y^{B=*1>Qh%@L=i&ycqFy|m;qHkfBPc|=GAgBF zC^Cu>6qRo)|6&+9N+iw6QA-)$N*3w$#s+#OX7RG0_sco5WaP6hASJNJI(f%F5TJ~Z*C$7Iljs0uyk zuuD?vKliWjbla)w7~=4I=BtvimWUE}nYpKz0?*TL}(q&G9<;Mo1LeyOqG z@*Y8Op8?>SPdk%8^$^9n%+mwCy=OBkWj&qZ*ygYjLO0LfVLftX9w{I9u#>Id?43_P zsu#xYPL}G-H$G;}H?<)foFd528m>K$hIbKm50T3wNo%4fS}P~XksB3boofn$jX?@#Z&?-UA^-Ph!3(#bCWd0KCv@Ph~sh1SMbO^&h58rK_uCaqkO<7fkIv z9;oBg08=9FoJ-|9JwT$+iFPBlj; z;Vmq}2DqR5s0wS#*D7ykiZwY}qSTR;bT4j4FpTUYR{6QCtX$Ic1c0_8oy3P=&;5vB zAu#IE@3ns6m%LF{4oTFL(~2RkYrs6pA44L9d_5}4!RK~`<+J;<7IKkCA-^Rvp6vcw z@?X&!XzPi zi3}YY_@}X)KwNR^VExUnm4{re=4((Tk+q%}oWu^@Z&6u(0*zFG)Lv&}$?mvq`dT6k zo(yaeCjc>D6=J^sJb^ahYZt{g{`v-)p(bJC-)JJE`#v)-WVC{M&68yAk2@#5QYfr` zt@0ze7c~YOPP0N8(%h{~Vk~GU?T-~?sGNw3_z^_ALdV_fr4pPH0{I52Ct0{t#mSLr zRbpC6cF(Tr?|ZIto+o@p-k6oQz9FcFuN(WjWo59I!QaOZPU?x--|it!QOCsH7C4zj zM}u`xFCTBFTa5aNWICxEFHerh&W6$Ki{-R)AUJ3H=HvEJjPYpagavFLYuZzfHl`81 zDhx1c&$E=v*+2_~N`$wNzf%QoQlcljQcW!nHJX>z^S};iq0nZf>(9ZL$J--K(3d^# z+XI(8TgM*5rA+v}chAI9FWfp3GA+CMClQ)UcZS_3e`dlEXNP8|5c7~sDvl=IoXk$*-yaK!e^B0OU%46(PP$C`Q?_-)T3bU2($MTcHn zh>WUO35N=2n%Xu}WoE^dQ_a|~p%Z);LG-4SLu}QfQb@XrRaUPPr^M4o1y|7%dXU}x z=*#WDXU*iM-FmJrN^J$?Nj*B=W^RiD_nc@Gj~N7>&0pody;~~E=AK-fTcJjUvlIoJ z0aG24pS8ns8s0%n#;MC1Vz<7lk!Ux%H1V4wByI0HAKf}cDa_9*{_vRb_%IrnV|w_H zQp%9rSRel?XM6FFcg>gfT{xbv2nO{`EvIWp6NV$}V0PzMrVHtcFJHc{e3g=ObEq}w zYpq4(F{>4nUm9G1)8gK4aB~mF`D}3iDx~lr+$?w_HvAaJ5P6%}Z*Dr8|8w5r*2M*9 z5UI|=!J!bK98qkOKpn=6EtpR+D=T@JQzzysNLF@Bo4z@4j}ePr&o28_jkUckGTjwR zPA@(34KqedF|p$A<$Y)I!p3fo7LE>OWC|ogX&;`*5b;2Tml!62;e-lQk(0!*gSJwM zIFm9}LffxDwX1VxE6r0`R)SC)%@W*DWy~0c#{fz4Q$PJFIMTc;SQCGwo@w13!b?PS zBt~I@9+OOa*I}5!>LMBuLaCs*aRc~-@uQ~|WXgrr`^)jE1iE`J+B@EG(}-l~+O<7< zZAD6*-HT+aTEf&b@a+P&ml#}N0xwW?4Js%mfE8?Xi0xIG?aQt``)RZIt$P>NFvn~q zas(tL>CzsJ0^83Ht94iREV*&o0ar6`&z~I3X4V<64Ebd&jK*l0<+qIhrgE@^M}~#i z!rFouw+oR*YRUjl&hj>6VppkYUfA+2W?AiV9w0RT&Ql;rNE<$$r2;MEQDh*$|58X5 zxWb$kMvq=Bc7HZ;f5y~OA5W&$eG1+aUHa>0kJa3X{rjtD(xYq2k@ZkAR+f)jFK~|m zIq_-)^ZMwr6*0zP`j3g=pK3^m;+GDQGpI0%8sq&uRk*qf9c>GLgTpE>rudocALy0L zGe890mfVpLcw{!Bnl>5n{Owf=uNW9i*-wBjKpb#Xe!}BeIG1|Rg)h)?j;>5RWaEDw z5K#rM;K`$mVC$>yyM4>6*W5NAwcVX8-eTlB9!ry4lcNJ=K_II}88#8Z#4xGgEuD`o zv2eWxCmNW_8wQ|z&&~L&ktXZzYcgA6lr`V5=y~i{dwc$Ra`tM_)>^?-_oZC)>pEGt z;7`a}C;9MXqZ!^ zcgdmG@BB$9mm8gkDy7)qH#x7H3m((8Ndh%{8d`t=mZXlljz8gBFSBU)U}|Qrwlxt` z(BS;JUZLvqx%P|eV;F&tbSiOG#G~?18m|}wHSIFx)xvN=c@O-qnuuX>yku4^c=Jnd z{})!ZnA6i~$-SCASYHI|gL3qv(F-oWnP>$0rl7*D@!a@I$TJ^C`VMkithIy#i2lG& zg@Ozx3a_g_TUE@q33Rv9Y!464BDW^(7Otfw<`j%?@1wtR82n+lzWj-8tK361+1--V zdPcS*p;bQmpZS4X0;FRIs4{^3rseUr5m2Z27qkvAw;hRtm6N%!7NG~rCTo~mA`+jDy-7Lwxjg<|$9k|w8Z z$p{|<<%E_ScJ&ZR#f>ZJBQ+H(l_+xFnfarPSa;lLKk1kM7_*twK#8U}?YHz2u({uT zXFI87YAzbs1pbaQ;53!tA;b5SWi8{%4Bwgr4WC%gr}emTr-0#6BzggduM@dRV7 zpW}NRRW#bP-}-|i*C?!I2s%gqN(^U1QGkkr>W3@0>f6eMWQzLcZ39XVk>6{k_6+VJ z`yb%-Re@9!q#%RHOsyxaXh_H}cjRTPXz4s)IRLpc&tXIUX7<$Hm*T*Dj-b46c^ju& zouXZs>+eKdJ~OVNy^^{8bIkP-&A}$syN9f!p}PSDIrg(xT!b0K0l6 zYKVu1zu^`jj+x^h>LE=MO~cM4p|t}DiBBu|OP^Fwql5z&tky5-=6pv_c)2MDJ zra_HW%C!e>Jx>}Ji-f=zp*Vg$6eydv#t-#ATYcjGI}qbCJG+00A%Wb&B~c&_L`M^t zprFmU{4bFCBkkMthfDQSI?aQ6nZ`%-c4q&Ge;Cyd{&u1)MK7*&jaWX{1M>e+zQFcp zV2ne__Xs1MOsHI*HkZ-q0ho@$k3MonFGvm%yC}RXMVwiF?gw%5Vof4#h7>i;yNsV(JiUGf^$a!}Pkf$0FUSkj1SFy}hyM@{JXaBg&(5u&>K)2JtjDhHxKkx~x zZPPlh-9@ZN`g^@!Fl3xHK{gjD zfgQ(Pq3h1vKd}r^NE?7}-dD=@pCdP8iwpRejHCC5R~7}Kq~giOv)CN5XpXj`)srK` z2-PBJNAQCPQhDaAmZ#n` zq~^Boy6y;vW6Hx~R9NUmI#;ja1P~K$zCfoS7bK_UcI@f8?C^DTgiNOIkJR?{~pBT$yvunE~ArH6K zX~^psw)SECK+^wMO2{jrUH1Zq>#nv!^!=sjt22;ddN;z#P zk?m>ROP-Dw9Tj=;F<&~8*-F%J{`4j(acUIdXd;S8)llT7=iTh8n01{P696AfnYSQ? z3N?e>Wt8#zy_+@4;umF}T5$3$s@z`=rgv*s3%}}WW4@S5>1chGw=YS2)*Zg)Z&hJ$ z7__7uj$VmRTFA;Eg(q{A6R^a7Si_#9v8owyNFxeEJANq9ZtAX!UxJ%j-bvnqpYtgp#-wMyu@|^ zWsLIytu;!)uO;J7(J>0U-aexo7y7X!*U3)$g=f2lBS>6-o<)V8q_R4zqzQ~2B6s_> zDAXL~q-73E;|en~44OO>Y86Pm|%u+^=@)_-DElZFD_cUrZqj72h4C3x!?V;XUa- z#-h*CbvI?mZ`vO><)F9*_TdLDJU~;54WX;~%KE^-DX6L!>#mUU_uB=CL z&zyB`i|3o5Oq|2#nRNKQ0e8q~*k`9;TwAt1niBC-33C3bkQ===kN%7tqnqx$&tkj2 zPmv*f4IXty``yHbMlqDoOR{_+cF&kmD%-r%tixIVivdinS;vjfQT*o(c(e`q&_LP|K?tZWy0pL&WO!ZyKS&2i>_S zcgmLYy5`i?a{TunKgg-KZenJQS4b_FZGvFGMt}2d+b4n$m}iZarHqTtAvPt^HkyrS z=9zO=`s3ToKV?2SRJXm_RXvEGMmtYX$4PX#9G^*+`flQ&! zG~3=~ICrfc`z=yYHZ|@;?k#?R1#`2YBKn!vw&PT>ZpC@^5;2Ek!oPz8S&1*fe6MD( z%DW?Y9k$tx6bVyFH+4^#Ur8ws@3FmlsgRusi$&J=8f)Cy(MP9w+*(BZbX_!- zZ+AUESW8C~@g(^3z%dy>0U%7Ha!=UhkFbHEAyXtf`ZD*)9sPanY9)%^4n7Ieb;HhT zi^cKAhi_tT>B?erjTb)-5C%7hwWtTDJg(#u8|hQ-wybcHqCx{OQyTa27G*$oVK&EJ zP}c}rSL>G?Fen#GG=9vMQ8er6Q7ps_+5>F6&Ly9wph zP(0kv=bs5y)yXtRNV;eNc%DYUYJt>Jo%x`7c7Mff9z{7&&%a>3$F+}-Aw)za7H!>a z&AlC+@k*AqNUju^$j}3Gc*w`oJ%%5a|L&1%yE(Rb;-j_L#mOIYuU|R+nqP3-OQ>gy zy}Ok&s!sxf?h&JiEHR^}6jfXFq{Z)Y!7=6$e!KvfkEsdv3mCOWtNDwvsOUjG9>uT! zJJ^v01{sQ?(NmuatL~T{XvW@L)N=b1mHr~7bu=3cN(95ZGY@+qusE6IswI-aOnRJB z@K`W(Y}2yV=1mPD^kx)_Z?;&p@LMjYrT`#bKSR84578>T!}rhk~+`1**$Al z(LE?JB3Z5WX(&-vE)gCaqu8OnSEY?eYYkC69?2J#%d@i;ZlL#w(IUd}+~@c!|7ZEv#I#G1Q(d2TPo=nl}Pu(2KlKM{N0Qw&i z`#!oa@V=e!By8^T*bnx46%NiyQ$O!6(K-$L_}#Saa^Y|1f@okXBP)5|E7l5boH4yQ zmeCG7psx z=vshRJCW%AEt-yNM^KDaEBx4>b|=J z9?efqG*+>L^HrL)wV4A8*Abq2y_4v1z!)Z<_6v=FRQ){~U=Ei1Lg^YrWV;gM2xsN^ zv!{2tMc$NZ+cE*eduW+IJUTE7Sg5c%^PM)NxqU83dU<4+lKuq>YV(YY(yTOli=!DE z1o+JEdNB^$LDNMa7I*Eebjsci$Rgsb$vyUCM1QA>*_MWQx5Azyu@Y-%FL$a5jh}<8 zI2ynY^}&35v`$zG(C+*kT2KId>%gj$U5m-Gw18@v7k0Gl%Qbusj^r+t^z5Tg>SVL( z=^z>DV4$bP{N6(ZkxSi1`;PhC=Z8@(Q4AO}fFuLGJIJ+{c&}t5R}{IQhT7D`oC``S zW&MmhV@qv%RFf2^oGV=(*Uy|)c1oJA9pwX|5c&ueHH|Q^VPgzeVX9={l3i^ZbzIAD zR~0>i|A5_+t*S0UjGbA$VZC`^vC%c=*cZjt{3LQ+@A=ECIceMV4%8@4JcrIj*ConR z{g*9`?_QhsCzgMv?fUNMd619F25CTr!bkp@IT)_TP;oJM^8*b!ZAt8%i)W6Ju?uWH zAi2_OJ9Vjm0du@}kSmWJcShd|5vvMy;2F*R=NXCPF|h$mxkO*j9rg|GrU&;WtJUJI zlv~?#%|XYbIKfK%K#|I}51Ty>dW0e1z2Ol?l?UYs;~SVe z@^1Gp_39of9O9hX+VBj5_7{R7faxhpDQ=ghAR-stt5S*S_7_hfe9%`t`?b3jQ+m_r zFV*MmlV;Bkt-LeJ$TIj0xjCWC=t%q8{b6@Em9QcJNDUCe3l&CK)te- zPrJKi0f>NwO9O}*I^V@41G({GtX;8fbo6=;*iND+efCcg@%t4d{*eKc)RDW2zUWP_Bu>i^JzfZX%vL36&MO(z z?PrrqjFU+0^Q$1pA5C);<6)qvmrFhxpo#gRQ7WBU2bUuY6kvE_EHrp2T~cmjW+nO_ zdz$47%liZdXN%h!$Xv4*qn_9PkkjcaS3D2K%?s`!wDa}awx8c>_FjyWn3b`12EJ~Q z)X)y=^XUv0Gcrv__&j=vy+tYsBuHpkpVD>HWKIWsP2Re{rbCICe&{zfWM&eX8^uUJ zG*KENXBDMJ#ky3XSH6GymwKjG{EB;0@1}K}E$VfX4BsXo_X>@Q^>d$LlG}(tT{VLlfg!%{~u28c~ zUuIH$!F?mWo9OVVhjI2fIiISA2C%$CjmM_mE6(?_4nWX;uDFNYfLTUSP*AWlSp6F( zR8ndBh#dgg91IyGTHAnKyx`A7!L7yBPea>Q^;}u3Z@@y13VvoWEI(kZb^?ok0XMW` z=xC)QEG*X>a>&Ub8sC41TB(|zEMR%l8mumca}!;2aw;&-c2uLH+-5ooFC`!xh{Ti!OYHk#a?@@dk>#8m7G7!MFk0b?$dx3M*@?GWxvMza@K7OFcj^b zd2f|vpxC)@Iim!1^l9S;Vr|fY>q628Fa31n^^kMV7k6LAod~=+bT01ON^RLGuzv~O zEifxMo`4y(`dxe39CYQrE>mkP4SL#3j~lg^Vh*~d0&?@;pSXD+Va+>v!6S*{KAQ}Q z2mj%5V8BOaR4I6$%;%&CsIiW%w8IBh#L%P6-jg!S`x~T$>OECk?6z$^Cpq0|tIfY2 zcKu-OosIG#c0@;U3t7;SxOE^#b1hx}9VmKUx9ekS&T%XeF4m2&CNKr0ViN3xWInfx6HQGW}m1EEq19M&u?dN+kw?Zu0p0pWA8~!G5 z@rFP-D2t~fcVL3uxHYL;V6xVZQIc?n(`N|OXH$MO(hcvvuJ981+Kyp|-}48>Bg;H* zbJOO1rqDcAK;G(8-f%?GFkpJ1mQ8SK9zlwymWdmh@HwJWOexT+8T=Ya3+gqR)ikcD zK=bNE=s){oG=tJUcE0$V_4XG6PsWKHipz3sKW z<5B&H&*_0uBrQL-gp~ef5q?DDa?PYtLfZgbNeVuA{eXS8! z>1$v9Pb!6(kyH^}V`Bp-2iy%&T92TfcH5Bg&vw~CHEA03DXhwzv#ruGLvc1yGL^0e zAMG!x`XKM362&YYn8d;#?ZNpHx%lFE9l4(cw`z`Q5LkSFd*!sB5iX%L{7EAyYHsu38S(?uTR=#zNplV3^H|aw&Oj_Y2oFd%9oaoVadMRrBdYCFqW76)FV6D zot(q6?1;8ipi&4 z>)<|``|OPX3+AP;ZCMPSa$NPR*tXcQl!HL)9UTTY>J-sF;`F8cO*pqxr zJuJJE-Qlu0hxY`Gsyph7gySVHAyqWAKEKZ%@$Mw4Xcm&HRPa@|k9EU3NZgzL?OAf;1Y<45+;Mb(WkYw(;owjm zSs`~<&1Y4jt%OaAT3!yulPTX5k<(eGLvqdH#|5!b(I&NMU-Ye8gR8W^^F9|)v6dxo zfO>k~(cvVX@W3k8y8s327^I)W3_>U=j!15frF) z_`0bEcls8Up~$zDI}xl%e_&)xL1KL{x{|=)_2H4qR3_u%4J*E&-3gP?_?tMqXpyhL z%^fbuRN(sIoPY5bu7NYZm2N^k} z*Rj4?l#HGo--6eTwNVS1kcbzjkgh%b?wn^(xiNOsA7K4ZzQOYXSv&r_Yd`DqZO1V& zy(8jj<#vAuGP*r+OB@fI14GUh^iHA{U+a%%4}CD?IBrR`@}L|Y|>DKW389!zGBvB(KIDgYSGJ4z((l@Zd1@FX3)r}VSfubIBSN; zHS1?fz<4)2YxE`#Z)Ke!eNw)&>K=p93=^C~#f}7Zk!)`k4|xVI0Ch)3f*)L20MfAA z$p{lHPX7>his@UYyco_LiLS?=*u3mDX!y?T+W|@i3s3R0Hncia7uO2u4NrH1@Lisym{uj2=!wM4z@Hy$5v zikq&PG%&LeQ@vt+N%}Ka8=M$SAb}UqB)L31%tAb~FU=eHi62Hgo{kgG9iUTs9r86P zGC02hso{I|7p^5v-|uF-a9yq7ZyicU?69oa7sVz`pP^KZKA6in_e-5%$#$-o`qq`mzk)f$GA<#M_NYtx6&Y*r?)&r8 zwL(fa^U(NjUxb?|%>*(;#gJ~WYl5&m@y0x68)cXPya-Km0cGy?O`~-O$P;Ab(7QSx zRf)YcVKk`odbQPQ+rH}oOX>>69S-a7IV?H2DLGS$v0rH)Hj`B3C3`zX2;6DS$J9qw z#JS!%hkv_RpXuz`c5+JuUwt<9o1zLHQ3l|=Ow_NdOqLhNUDG!o%~GP-*h(>JK7RglGJ~Mj zyeQdmT@!BEZp}T97{(%O4g32TsCuzXK2CRJw%w3+JxBMNkQu}Mz7#h+G}Hv8@{Ox7 zbV|sNsE2AYZ3E-bbAh+ccOd}+xM6@F6v1A}06~sx7ins*mBVO86Y1aUXnLp$>fE61 zIoIpi=I+>Ab<;PuTi`>A6e3aeV8ly$RQ_tZHRlrPN7~Z1)}* z0F}vy*Vo_#4ose~J4iDo6#))6nbv3Vo?E5zI+&(m=iWs-O`j*;!Qa@a9+Rl5Hn@m607@d7H@+5L&-lU`}`;kD?3h% zlDvMCau!c3mPu>YNcL6EA^Qf#P0uU52GGtzVI<8x5Jz%CcDS13q^LBq`r;{+b;&BZ zEsHKfeBB04ENJ{yf^s}rP`^B&NVgDmwlRHx?B`w0p$2L*q`8PfcRnW2%&jFUNg(d2 zZ8rh7*egnZ#u2~Un@PlLy}4iQ`UHq_h#Y*4)uAwE5r^OVyzHtob5~`jk4GxSlI;RPKW1cn|_E#nfcB$f~iJW@= z*8csLQO}VB#emBc1#jos`ku)YPN~<;43Mq9E@hVV&&iVrRw4ss321M_tG;-uMncPJ zxLaJp*JBgFULP+es&mF@usJTdb`1G;8_zRaR-=6H5gfD9zqh+PO~#R`4v=be>Z%Sh zu=>#A#Vey>CZ0(07X26ELdbQ1IF0sPJ-O0(NE7lYDi7Ay72)1E4z0O!cUt9ayBDnq!^1I&QydG zT?G>LcW;renorg8aF~`IIZHFT8AFd}xdQ60NSOesiySy2K5X}|Li6SdCvA%~fNIOI zZ_FN*Fx;V-kMf+|x^^0t-oH)0tuYKLlRS1c?s_~udY*FE)2%a>Ri1*^emf$kdEmM2 zJA!coAGYC7M4so2o}vpCW#EFe<&LeAPW9QRgMaC&IFZS?Ea%eaIFhM$Z*}UJ3s`oNN&NgwdQuC$)#%fz+kD9cPU!8vcJlP{- z-N7BIDU5G-n<@QP0Qu5e4LjMGrC`PaYH|;z=2zCML)j5v{Evm2UE`T$pob2geD!R4 z%bz63ZN$=UoQ=-pNIVRf?fY@d*DGyTfU7ZDUUDbOS-*hCaXeyF0@k#Jo!un*I&p+* z&5i1_iGy0?eui1ZdGXOd*+2P`vb!crQ`!xr4g#@lkGsb# zUFg#%;NoEDby(sUTjCQ3Q~0Rr_d!Pj!0LU?EsOS$bm@o{S7fv=17XI0)=s4n>2)M&Q85MClcUUYHZ#ftQ2A(o`@Rd20kvXB3nzw4%Nw}N*oV^y zfw95z{0W%@di!G(it#)tV-1ZmEW(4t9Y zc+M=${`B5Prs)+(B%!*2^@|!~U4$CYJDhzqe}$J;lp88v@J_?dA*jWXM)E_PY7qa~ zdT9&2Mkn*mJwMbB#vhT}@426fr;R4@*yTJ4A%tVj%nfuK}}HGWTHG?sEc4h z{lc6Ibo~LFD%VXir?}0zLv)9`LsxI;Kl-gW?u_6}@)kE8MYn*} zj~hQkwKcZM0VSrV)$WfD$1*Czd5biDGEZ63DU@Z)LBRq99d&GM>{9HL8NhvdyjGzA|rawGF2%!6zi_xqA*{Ht?#R>;rUNWSUQnov_&Ib;0*S+MICj0!|i zFHsz3+XTvqE)WSRRAj*@2Qy#wRo^Z&sac#u4f{+iIW7gluN=@CFfA!$I;~4v>%2ha z(#0;wx83osV03nZMNNcXcm&zrVRu6g&;8;trDwrjMXP{a`Lc&qX1lI&%fRL+;D24J z=PJY@U(G)APilNuXn5=blKDNT3G}1V?FXjW^sL%FQ=SO1V!t|BtQqqh2bujSCa&p_ zpAx6wh&!W1hP+4F-M}_&hqlMIeE0Bt-49QWt(=gHb@QaU z%O&D%1{G@hosVPd4n{O&J(s!MPm@#Ihn=*&E*ylgkPe&4*r^QU>tOs0^Wh;uSr6i_ z$u9@76eup18gBzW1GhAaLu&G6bi72Al|~d-`8ZuU%tXigbF25`$ZDFWV)dtRboaKu z;MBa(x9{=Mjd(hStEfAr1|rS=Db=A75e?(<=-*yayZvr>-~cKRUl$)2KPFG3?ro5Zqfkw%B>d zC-vo5ex<=RBz@5z9Ks;spXhZRW;ODnf?DbqatPIgoQnGEIvrQ&B08FM+zvI9s^^#|QhFY+bSR+(`? zJz2PK0DeF^Id}pfPrEJwkcC>j4%e#ORx2w+n>KOZu7Tl%XH1n_pl@*($V4+fABBG( zIt2F1-`f?+q8K5{_iNu~sNYnWU|Nj6X{U58>4}`Ltzw@M8SN;juz?miX#eBMa_!Mz z^d(m$sFQvx#LF%(AxaKLr>4V9n0FdGCcox`m2N-$Tvah$Jj@u-ueFKx0p?l}?qnp& zLQtM?ey&)z+1%APAVIYeD=G}w;LIhn_%XtW%gUyA(N}TcXicsH`1zX^v$<79qY>=lq+FKk5)u?4I@D5)2e&v2i+bP)*Vw!ptN9pkN>R@)>(dd&&AYXVM!jnZ;znGO_cj;t2C{vBakAs4|>V^=zf1R`-s1{?3ISk@3Ce_y=zcxGNeQOP0eXf05X}CMPOHE`j*-SD5=N3wpuo z5yFY|=OX-&hMxJ+=6Trp>{k_gv9&twTe!bTu2930T%Wm_4jv0vVPya|JU_WteZ9Y# zf()o&pa=tox@fGrivcaaNzVO-1#n~Yb^1N%3tJi{vJ+nx>7T}=0rjE}9t=FlT7F@a zY_K@muL=%);@oJhBVBR*GJsq=exR;b;7HRaKo_?g@9b#HN{(O zdtXpdOu+*jX;>M_z=syUGVM=;4J4-Fq_NPwOo)1v;a+l*_rq=}D{lUADikJR;H*-P zR^0=*pkDExB3mT|B54J1tqmaf02T`!FU9oWzHFE4D-XWv_=>3~=QUoaWWi181(}pf%<9kMppif~H{{ zX!p>+W73c6*6D#~85U2Z6Ex$+dhCYk>>eL^hzbt-qvr2xJ|}uw$l~Xa!iBz}HNWh* zNupdT{`^W0LAKrIA?WXHmqQi%)pwgDn^GPN=i}1fVgEEHH|UfVMzb@s|8_v@3mM2Z zk_d6Cj^{}$gMb3Lh`Qg8AFrM<)1k5Gl)}0;?;_L6Lh1qg+#On4Nx5Uw?e2 z*TywT?hJne#}fxCSBuEJ9t2pe|!{279vTk+8w>{5p^~J zIkKvG#u$%%LoAlxG~3@_1Zu!q>iJfKQoIL{^PH{Rk`x2}59zC!oMy_h&>fY~Z0Aij z^BS>1u>=1|ln>!|73&-mq-A8IsY9y~%j+XvEDGJuDddR3wCQ{0%VeJ|?nT)6yz>MP zXh`GVrBU|&p_wg5amAqBpz!wE&9!4}UFOI-GO}kwL zuK8lJ{bu%CsRY-mi{cw5iwedQXe?8qRzcBhU&DCL3{Ynl8w z{E(BI3 zo+VA~wL_D9PQ;40f_WAWkiy1z2y!N!K84k*;d%$FoK zF?6HG059ivW80;X2PALB8tX$&01HQH)Q_8V`knz;PSk2XlEFO@xq)ERFS@NnWWw*4 zQ^p$$n!%TDae|(AUw)zF6c8uq&KEfirC|}_eo6u@C?0zwZy3~(!m@5Lp)W&AzSGHA z*bg|`QVB0u^Ddoj01<`gQK2epOcEr#890WdZ`F z13W3i{!{L584V;2EdX#YA*9}%?5xvrWWe3(SWf zvzV$4njs*blG7viwMC6!$xe1mc)?#D)I^QS$isbNH$WcT7(nG|?D*0*Z3n!OLM+-> zFW!2Q-)Pl5e&AxBHzxbp?Dm+=OjRII-~< zQy9E5R0uPT4(lZ>w_$%FkU+W5%V=ky`9MDA)ABH)94)LEt441NO0{IdNg9w@WkWp& z-t+ht5&ra}tQhna zW)JlPuMCi|t5gU$&wZ9EW(m4IY}52<9MBO=8lJo5i8Kw7)OrO0GBLJ3-LIHdy4$^I zL2uDdgi&Gb`QSaDInMXq`$$$*Y$ZYbAf?hCnCi~_s+TuGdb}=MfW!h^+l~2!N>upa zES6fWhV*+ezZ7{aO zk_V!-+e9(c>#j7V2?xH>_fm?o#z<|Ft!%v{m>!GC4z=~ zv=QB%_n6Pka@IK`^4cL99`KYu^enH<&jfGJeJgTg|y+5jRiT3t$-mAlhwJ|*w4hu6igw9&W!(tZaq!R=Lv`lR0#1x!8g)7eu zI-Fm)@9YN~SzLM>WwPYlq89dUypM|NrwW4yvNBJ%gdVoK^Ur-?nC=gCo%|LW@|t0< zr?=K}Qx%nDk9Jx&+}ipZEk3yce=16|VuXojd`0#h;j|sp-D!`@3f+ujm(}8zcV=Nt zu&0d~JL-8)P&p;2#r>Q+l~SI4cX9U>5bS=03%a}p>mdwebq)&)C!pos2jTPU_u(cd z>dZQc7LH_z23I>%-11eF2K9!$Tk-P=>cYo<-vFnc4r*<0x4A%=oM{$f|dEw(udH*ESsUrNxME~ z)`F4wQHeL~;syC!r*ON}v&qh{8hc+7T~MSqq_wY<7tX!6vxP2J_Uoq4>atzOe3q^* zx2^=W?0Psbk&$AxT}^^huZ6RQEDgMyZ{0&_?%IHV{VKWe`T{6mouL?F5-#!2!TR`I zQ#xnkaFx_lDrh*p=Ygu%=Uuz{;ppI}=gQWh!AaSfd3bAWU%w2%kMAwG(w?@t5Z&H8 zmKwjj9M?76r|(S$;qeDs<$x5CSr zxF$6ygPiXcM!@ri_fJ@-BJ0niYX)Uzby8?#+@{hgBU5(D=;7qCwQ@XVWVAPhPJJXY z`z}&hv*-6bZ*P{-;FIf(=8O8CWO5jsdx6LA=BR=jeEmfh7*o$gTTd#ODR^9&zHb|O zDY;q%7kH4T2{t zoo1dn#J7)ejo{&OM*}oP7yY92o(`-kr@1uLlS=M%mNW>^jS%DzL0P;^K5F-^`4i1w zygK(0b*Tntb|Uu<#uO?tGL7xalJn}!k@<1@Dd{%1M#$7iVhV^ps!o~3PO~l@fJZ}8 z_IG0&+?&n@&IP%Vzdl&q1}rbiE*&qCW}P3B&yQObv!ZE(V!i02kp#}qUkX~-uw)`&`C%1hQMKQ zgsjjuiqIPt@MME)V(yD_l|$g<%d@(4NUt7pg?pfm>#J`Z=*Z5?KTu+&u6@^s)0r3s zw5(09lBQG3*Rcm*U)1Gl3v+u8a1>x&En3Od2J{q$?47B4E~v_pm^v}PYaOYuXSQ#4 zVlMEvyqJs~QnkD?_4E6P&RlSBUiCT4Rejf>(D%d4RU*+smV%1T)Gw{EcZH7Wxd`Bj zO40DPSmk-RkqjyXO5IP2jNF`WLvH38Y2_E)mD8R$dIBEZ{H7o5u$u@YzZrgVa!||E zS$lb2lBH(|ff{qej@|kqj=6L{R^mDwZ(C<3XKxAQSDt(uv$hzaMozp9>M44!YJAP9Yk-{gj71sj!c;Cb_5N%2_8VTQkD;(!s9gx?gF} z(=N#36eyv*hn#97x)mAW{;GPr8Iw`SqGr&7<;cN?X`XH!JUJ3Sx2PA&?FPb>naO-a z;aM-NCTs-7s9usqwaT-L>+TIic-kZ08(Gbt!tZLneY&4-KnLSaAdMo z!^I&!ZOW5IYRayQqt(RebHjzDck|rl)Ae={wivF}vawV6RnNFWMp)M?UAN22PNdoQb4&U<$F)Y!Re~09m%|$J>uTgR9=+l7 za{KZneFe7F;^NHTiZxp*H?4G@SXW}bK13~oP=gxnoBJV%{E$4xG8RuR*zdP#HNjik+zcb>WXaZQy+7EXw|@$ z8I|)4th3O~Puj9Qo$??jfmMXXutc#7J}7uf2Cp8NKK`J)fAAo}MMhSYbN;k23#ltE z{wlMNc!NjWL)-i#FwA?r1t)+H?>&CQEs~~lZld{1xs$G<5R1~<$vv>pO_Dc$K}*Ptk>nL zYJ)mP2kT*iirS|7@?xi6Xk>>L=I3UTe4L*`sSV_Q?e4_2i5{*;>G!I%S>_iF>z3|g zQeDc|zfbBqX_`@E7UF8S+@`}vg*>VhKCgjBkUPgEB|VjhNIoW!OownCHx2RslyKoR z3$)I3bgK#8I9E3}Q_q7;_vDds$*y(hdhPJo1y|41q%P+^ChA%lwNGHsx+rL?VGNp9 zM?I7`dp&MJdv87crx~E;4G9DLwB}0lCM;{1F9umJo}B`gaP5uWy_BQceV`69cy$T~ z3y@|@{LOCb#4x4etRiM3oW=xR`5rMIaIG&U`QCM)856s(xx_W&BG|dxv!L0xaK6-4 zs6-FE9ke+sK_Od7?g*ONOyVHRv-0I`<1*Bqp6(gP3jrtT=bo@FE* zA8RzWczI%4-F|4^fD|T-T}P-iJ!F?h`J=OUBNE9h)^{R4UF0rcnf3FoOA0$vC|JDx zXIH>XMbWadvxg8#wT+F!;}urZJ|12x(mGFL0eig9!U7s{vq9K9P0V4}INF~h*g(LM z&Pez~+{pe!dCwrm7RnuyYFdA;?tqL+szZI{rhqLVLqIey5gw{c+Hh7z5skY-+CZ-^ z`3nigII!c^fD3eNRbKE2Tg$%I{uAK>{$tI&Q|&y(3V2r||4h^6y|?v?bO9i2cC`Hi@s}rSJAH^wo4=(|LKV974F?(x(b_w~s!S_(EJ(9x?#vC1pB1{O zUQ*maco7`-o*R{hq|(^^D8C_=BacCM~4iGR=2X=-riMOox+8ZCm7)IgcAXqb!zzD*}NvH zi1YVji#P-dRH&g>4g14|UY~Y5Ei?iKw@Jg>MSf3+_k(pM8s zsN+W^1K=U$y2zdy-z#)=Zosh`ak(6EUni%J^)x8;IOrQ39%~fMOgE#R(~6z536{mX zvHAuCQpi{s2E;4P1(jz14HXYH&`;4cez%5FzVj)`%RBAaOG;xPJ`DP1reovaNTwY3 z${GB0ysM(4BQML%?bFoU#3;19qy5u?ujT4B_-15iI9NQp)A`XTxr)9)OzIcP{$2gQ zeo1=|jv{2M*2Xu%v^nKWGl|7yy-&yDYCznGZYGdVapWm;&UuZ5E{ zk=ODbI8!0qkcVD2EDc~skU z#{98z2We7L(n-IVv7f>LzMLk2_Buc!gw;GunYY1WLU(m#vw#e0DWCj#%>f&&ozPB8 z*m=X%HU{eXIth50T1H44N%$Naa?WBz$P3kh-+A=H`YQPES{88<_ZAgiGM)%)S z{4;4agm|0o*Tx`Fa~~=axJ!GLv^zTbij*uhXWwtnoCovB!k01<0{%Bx}P2QOLOxN5Da-0N9sGZu0|Tazw-?*2Mhg(aCWAC zHL&S{t2Op@GM>tmsm_=M1?yT~_@3L0rnsb;hpvvH2=qY2m1PZP6rNT2>{0k}s4#XI)L+r>}W^}o#!3vl~1;@)Hl z@YP2{n*Gd|aB^W~{s1V48H5o@u3-Lol18Y{zq)|r1!^ajML zySRwxf7{Q`o?*k7Kygg<6C$uRh#cZi`_d(phCCgk{Sh1Y8%IkjQxnYOe{2&__1V9q zC$A)gn1fp!r+0S7mV}X*$9W?{%yXJdqxXP;7C%WE%-E^o$^WI(16-hnsZ=%Y3VRDn zV=xiE30%O#Q!;sFUpqLRlCoK*e>Q>-+^t{hqice!q%^d$9jSk>Gp_vRVRn5z`~(4ZA0DN z=9wWAeQD;ME;NDD=H?o0l6D|6cI_B-=It;F|M zTC5m2<;Xt&F^bn7if_Xnv({{`KDwQ_rVY(7eoy?3gzxbAD%ORi-U* z5cn_5gcEPzS1b_EOcLS~SeNav;6_3~ht@HC;v7LMB zq4$%rVKjGYGa=3UhlE#!X1_681_;1^D3ji+=jKrH^WDQwM>`@&6}ESB7{)?dDzeUF zB~Kb2Y3Sav(nkc>JgvyU;+|5W|M7#sHaCi6wm_OB+QYMFcdA&!E$zwHBmN!fHCPw{ z@#>c3#C6{Lv^TBRQ@9+h{wTP6Fk5RHUc$Wv=#!Mx6c(T6C)`=&_?!3IHmQi0OsumOqg8{pB9PGMjAk7{eQ~+Q0kKQwXEsW}m6LI6Z)$2NsKUUWm6-MDp@6#OC zA9G;r3y_pEP6v7)mq{Z}3p{-p0qK$&;%)Pj%)rKPV|;lZjFM z)ld&Lbjt(SKIc$*nGru9z8YR7D_qFtJ#nmF+UrYf)^!h0*XsHg$q&v`I1f|S6oKbv z7amaeXBc1`1<>XQTVav&ChpI;oduFD68~LOVRp#=kz>YCxqZf5xptQ*r{LX`srl|5CWeljJ@`7#2_M~0OzyW9-u)&+aqoao$9r{!g;L;i zI$G*|d?s>D+N$^LCl}`HKedwfIoqr3R`~X>{rV{iVfXuc;i;|MbSv-mTR&e+a?{ZZ z$P{?Ubv`e&E?A3`+nOqQZefKnQvO|whH$`TrZ1mceYXv?C-i_4Mvw+h{USOx_g~ixz-kovM7?aX)3vL!ui~jdbJH2{hOeb?SxZ%U9Ta$U_8|_B zzm;u&Ei!%0;R@4rr16^k&ySmdo^>lQ zB_2J5+r`H|AyH!DvZxpKoF--NIAB2YTjFmv{CJG^Ca9z1(SrwC?p0RRb9h~{e_{Jk zgs|p>BT<9v5L}ht)h$G)A)bupQ*@!Ce#;Dc1e2hxbvc0!@aQrY&XE9igMxQ!Ls;C)ntAxQy=4R-!So>!et{hd@XvxV8PYWO*>{OI+0Pq zvoC&4vD{6VTSJWPcywD0xGtNaiGPF?YJ3k?Uq4mmU|d9sZ#ld+>;bgQPRFUpwEG78 zGgRNLDWOEl`y(8`VSrE)>d0>8jv0GSPFc)2=>9}tv$mL#TU)F1*)Sj#&5GX;VGj^k zxYT7%;Au!%B4V_I@9w#1|2|`k-h`b{;r}>sHP9y9(9EkR@hIv$MP4{c1o{tz3cIe; zrq8d87Dt|1|9e71^!gB4qry^3+?!qiMbGT3D zG)eP?fVY2t>yNY(w9~V*!SD^yVYBfQczMt0mlc{lx2yO5O<-B^JgP66;`bY8{gLJj z*9$rS!oIaBjNQ>bBcyNnV}aV%_)=bSa;_VTgr-(S{Cyw?_GM+!pbzFd3YRm?PG*-S zCwn3Jv9bR`VC}ZT77A>%?|flzX)V8$a-+28GcH5JQTGAE1T(ef=Gn{H8Q_{c;_ic?9D$g&=ai}3_os0mq3`F@omIP_p%aV z3+hW4@ots%+8Zibu5B?0qN@oT{M01Bd=aes3X^kE5+Q5e=-7Q(R^)NAID`x5E6TS) zn3Go*9tx-h@ar1W2?EZf_g+Ag!*h%1l&P$O1dY(be;pBLtMl{Cw2P*8zPK!>mKYlm32P z((#81C${G7I!nET->Z-Aq@z#&1DWzIjZ@x1gC&P-J14Fa1-%|O^`GkIA0dHp5LFso znO7f>4Vk?9aM z4uOZF|6<8om@sfH1tlOnBe!FvHs@JNxtZJQ%8KRco9*J4Ek&ZOkmiMZI(t|VL`X0< z(=SbK6%JWc?&eOU%O;zO&7Bv|f4jY4Ff)FQqu+M+8!S`h&ca8u(KQ_{jYwAuscucZ z2&Jz5Oe|ITf*xtsP{4Lv_D}ymv5^-qTrWaSh5*uC0HnPp(RNc$LvC^GsZzIW>+OVO z3(h}c2d*CH#+Tk-1el87+T7TXt#P!>NA*i@?K!?XwuR4iNuRY?yZ2e1>157>-Q=pc zYTG)L{CtxB0D|6Ully!7fn@T)AQ)GzSMeW7q@0gN!uS13Yd(4~>zwT$WZ1jzJZUye zM)gZP_-exSkMr`c1t~H_UPx>K+Jq0yFTFu&sCBss-Bbbx7-jOw>FFB7k-x_3Ev$bd zG(tYhw^g!vVWn&+SkbR0(GGt$@Y3^4c;lBs{=G{1RB!f!zQG9<8a z!mnIGD7WG1<^kT;RSjQg(}6de=gQI&s8v~jTemc0UhOtLX=k*)ETgLV>?W4sZ<7dyoW_i5 zdAh6Yy1hNg(RAX*H&j)#SkIZP|<; zo02ynD=WBW8AqP4+)CEh_rgWxw|+i1JaJQyDPi9 z`V>o@gjpP8N{Dl{z3WK*kyyi8T->E`?JN5{YnJeZb=^0sEDV735<&p!$cU;`WjrWwM+ zmR#R$;6uafH-)S#m-i-RckA37GdC@{I^YW#rHX&KN(^iQ0#Szo_C@0*y}jXcs!trj z0S%9PZ~SQ!r- zijPP_Yuy4Id*GVxAB}>rTOKfX6rLmdt(0V05~$!aKi~N*uN1JFT+=*JM5>*iQm0fy zp(>?c|8^CbnG`~Cqdc!-f)Xe%8}+L{(xS&Ukdx@pO=|o3CE9etoiqYYl~h$#7sgq< zkRHVj6xw=q9f)t*N!UbkIkm#SG7oe9S*V{d)>T65q`LjX^#s-;s^~ARxqo z5Fu8PLt}^+X_ge5{ZD7(k99?ak86TipAhPVcfHGY zeH9rqRx7BG7Un*UKf6yoX`P*Q{m+L+tzoUm%{F%YLY%dVjHByk+nJ^(sb|Qa?|Id!fiiEMzB#7*O1$>+7y$k-#o?3wb3N3h+^9Rq={mmex z0uEa4u4|OjTV!L(E{I*4}ZmE-+| zV$K)B!XW4wB`;F*m#P0Lew>e~1AO##s#1KL092;}Y4xc7>?Wy_u<&t9*nD_6IGz~B zVTV!5x3`6>CaBly?GJY+{tCGukdk%)Le^-08dpqk(Z2@q^IxPl zu!DEQ<{@a95I&K=V17x_2K4hA!deJK)BoIS6V%yi`+u+S zfXc%}LefK;E$1F^_!G$aDJY<=ft7+!|JhcW2Ld*zzi%AsmjY7$xZ3}(W4cGJ8Qc5# z&&(}uQ&TfhPZH8sBO*h;l%6r|&w+CCf|Qd#LWcl#Ss@UL0O5iqT|_`?;^@5J?oV3? z_+Q@mYvGKUf3`Ein||>6Kih%kFuoCO+IjV>p|!ymf1-lG#3Qz7VonVHR{+2QT&aM8 zif7jp{2Ay^fj=Yw!F%Y}18`B3>1}%b()8{`TM3erm{y%NGR!f13UW>>T*y z_zPt1xHkx)jy``aXVIXyH8#3Y)36`d$kMsNr%1DzJTHD=($k7s;FZ01wg%-0K29WF&Mp_7p|?VpTOew&8psafsOMUsupr1Ga$bCr^Og8`y;f zj{ingnbAFj(96S$hGfc%eHs_`j4gF6RG7klvC5xJ^|S|qy# zSB4P6!9x`lS=RLdi0%aq^?nna@noTe=Wwpr?T4~66JfNx=5*k=wU-UL6#j}17@_il zm$82+qLYV}ZXU!?9`w&Te=iu|M8HIB!m&(>Gwv%u2NKqXFXayzRfb$uTGZ~jhWfTT zZcSsJankE$q0ON{(7mWknt-Dm+0ivP7>U`U2Gr>O!e|mLs+v|)dqyQ^V{6Oq^2LiO zrobtaju-pmgtqA(_!c(O2;xJcAG~=a54d`=__zG`25Q9WGcYi)l&vA>4Mj@%KZcOG z=Xo8P8yyB<{^W;v{Q~qG6BM6l2~0hDQSZ`m|=Fna}D)=b*j*%`i`1#{Nl z2VK>xw!eBd|uq{vN6W~~RiMNmI z_gdqku|~Uh4OKax+t>jr{*?RcMD`~|lyPh1?Hd%YuW`OYf+l|eOdvnZ`0(Mw&uCOE z+H$=(b|*Df8r%$TYF(jOyw)Pn39i&0Z}*t zOheqeHeg+MUZso8I~)#&WBJ~B>;&z7=>BS`qK}tT3*XFVZ;lGya)3m6h!I{^1_lnh zc>Kf7Kf}80H@d$Euoc6KFGth%7rNm!L@gOD9EE4c$Iu%sqgfU6?Gnpd!C6Hm{FUwJ zF+PE>@XYZe5$wAA@G!Ed^+y)TiJNLmffpp`=y-T=#f!DxW!f+zD7Y@mo2;Kb-9)&i z3S6prKRP-Z-o(}VaCc)Njm>}TB|vtUv$QlX3>K=y?vz&UmY(Zq1y zr#kIX649)M)jz4J?q;G$)dV-&z^Rw)q?+?=hb*KVM%H3%J9?#5x872k-p9a|n~jeW zR<|>&()Ne5*o=mx#cS4TtLFLe zzIX%({cqRFZi{)@9ykeUzUpQjl$0`L zTh7A>ZI$7=Wa0JH7edz!qbwv##@-NJZ`9471ngGC-(kV#lf8xk{95gA&m@~UBQ)SA zXtzW$7X6W$$UeFAc7saAN8Nizcoxw~qVzjghf>DJK2U(^nW8F1Z(z_y$5n{(4d967 z#Q)YGSrPVil;8-QGn(Si%SsQxSk&dq(bC+JSRs<2(0ZZ;pawa&Z{L0r9vc>Re{XNE zd*%YAy=?|=jlC4@ZeM+eWz1TXVB~~)Q9?ojKQyxp6lTd)u25DgXdyFg%UR1&Qyf_> zGYz0E=LZ+06zq0u5=G@!9UQbBf>~=HCUoO+@Rcv0HT`97{*DMZ3_Kgqn4V)%7&pT0^fkbK=;luKthvaQ9onT zeX1xMGb?uHkbuRqP*#Z*YB=MNn@hu`JX4jv807x*eWTi`?~_zJ$b!wiXXwgZ1UjtCcQJ)CgpMhnX+^DG}!)2GJ z-D&ge@YY6tn)}q=-hTV$%}3Gx4F>q`A}jCZKN|`m)Vja2-KnwA9{)OMDif+@qZT7e ziy9;7R8rAWidH!ckc^yM6W_W=UZJ8Izj5OiBBcMJ6BeW=uv<2w#qHaWN7tU5pcyh?|>;Wa4FB}40 znQx?E8=VJ$OP~Oqi3ddfeEGMOX5(NS#?s$T&+*N=*l2Ib%%=tHWs5~dzdu% z*h-^b`uq5NuCA`0lexo6N0orC*jq5m%Abs~Yglig^+OY>>?)=sj34`m znH8F%BMCXTo_2F_n3i3tdDFrio=D)l+$-2L!#@Fx2*#$vt%=_IW8 z%3YJ@nDKgf4pnxTI?`FkPA*Nvd;05g5>GkHUPBg2TEDuE0vc7Z8l2T6q?W8qW4{R+ zI!R;XBh47}G++a{iwu5rD6lDT(BLVhC6x70#m#+k+SAZ_^kh1Vytjs$ntDp<`Z~rl zk;DAa)^gb#woL|(M;5pPs21A_uJVcVSz(nC_J8K?%1Extc6DfWd`#y8MoZ_4 z6b3EA#^RHV^G+Tb7>p}iHE=2l0MZJEbtp|ToD^^OTHfRh1zclG5BYz!MvavoMC?U$ z5^;BuAg9o)9O%^tJm{~58y*#;Wk(7=bnD=5{MgSZInxK9zkRIJp{JQ0Gri-jluMDE{?v z?jwowtT-Higr8bhQ1OD&GI#eGc_@gfIyaqKH$SfyM`ghp5hEC6YpT5S3+wh~p3x}w zO4KS?_Tvg=@2S!47Tb=Ia>vHBCv(7d1-E-_E)1%KqcfLwGBH(@?5eG}hg&y&{@sV= z-U=J}k_o7G413wwP}1x>40|d@rCdJB6dY)pbh)^rT}$KAZcc0Di5^`IDg2DcEH&kh zTvPw^TpZlS3(TI^L-!`Q9jd|A$UR0fOruRGoc9~^BMO(|za}IwOx6D^Y`fS>^E-&Y zodkk-C!*WKPQ50w3|aP)&}NiF3knNV?}qMmH?RZ0H40m8GU4#WIYhilWJ6CRL>M(r zq5@rwnh{LCwQ8ol3|v8W=S^FfZH#Oz>K9Wd#-aqBDA|5L|8XXvY^h@RaXza&6Xp0; zQlzQ0$SQ-#M-^Gj9n^``ST9>`ehr_)?rzCch{l)35%m6Mug^10XR$+b{<`WUHt;ta zgBz&)hv)+CHXsZ#^O|-Dk|Z6;8~40w%1+B~m10mCyWM2?WsUQ4H|S6fjrN!JiCKRq zCHTo8{8%d7m?b;t^|CL|NSN?yfIpjKA;vH71dW^Q8+r*Lu^&?mJS0IqWI-b)kGLZ04y$DHuq@g0Cb+T_Jas-()X9+HPN$U_8S@D~55=58H2ysZn@zu1_ z?#)UWZ`gw_Ac-U{%WmE4N=GWoBn0bP8ALm7$o#QJzx^|lI8m%fu+WvQ;EV&ey%+Us z;jx_CR_YMA+{>ILP2Ka*kE`;+3pq>bcZP%KYFU@Lxru=1-GqvmKr;7UMV@Y-3r=gz zJjtN5hSLK9R9CWaaK+MYMOMiof$*{`auVpN;B8Ex^NmTEb8k%qgiYJq!ApB1OR}&H zLs$q#E6*uz?-Z7djW^KlkJBcFmi@Z6Mxipz@C*fd+A8mI^6`b!i=qZA_Xe$dIXHaG zmmNW{kPkY9uj(EHR7KKbtrQDWO!T48^gF<(WW|O8xHT-%*3;h!+#5wAOS|&uBy0IW zE2_|vlJz5sv1#a};*9q#KNhcPBqXS}!FLyb3Rg_n} ztO^u@tFIFUi>z^p1iCD3x@7GPxDk*ffzow;Oh_SNt(IAaOyrQQ+>WdaKS@Bq%CI{E z`kw?R1D&UxrCG5Sz4Q>&Nx!sLNTgY)07ClW&G(gW7&VtvSibNQ4tyz%<%o;UTLoFp zFL&VJgBRI$)zTM^@)mtM2dNg}sd>M{?K*e^U?i)u?V1#%BT)eBOa!K?Z|r@Ej?zN20v%Wm0Vi66LScV2Yupm@+ZfabTT3*Ng10U_w33_V3{K!H^0N zmwpC;dga-9kaMu?42(g3?&|E!o86e8(5~1h$V#+(>2uBrQ=wKbypi*!+S|+i&7Zmb zH%SkSr$T9WYT*1I>x*$~c<88x?J=(PV$q}@zEJ7xC+`l*k>$7ejq(K`Pya-$ptaCg zv`D#{>d&vhklxrdI){y^>QTn_Kn(IfU=CkGUuj{yIuAJutwRr0HL*$>wj)&E(zv7N)zTMV6%$?F`GNbjTm^wfE*^fI9be&$x_?E zApD1)gVXgFV8-p3(pxS2sbFtY7>hgv!Xfe8lp1H8kBIw7>*HO(sz(k%R<^?h|4#I( zW1p{g@cdUyCB%neP`av40nYgOr6Qyl&iL8yXTRXq$cW}1B_3VxT)?>Frjwn}-lLUZ zcvN&3Kt0TnyuJ9CeESTCsQlsQav)9aH1XX+N=nL0it8Z&@0bVWW9rflv^FRX!6b0a zM*0npLiXPuy(I(>oxh2FD9=1zKEt2&xxNLa9!z8_@X29C0QimSffCG>yU6+=T;|b~H66pWwq89Oxw1X>4Y|ITIV6 z&h)2a3zx7u9C47Gp_vJOc}(GD+hbWX(srZ&FV@5qZvoF*pvf8&Kb{f5SvZ9A#*JeW zV3hk;yG8#{6HGY-dzMj_PX4{&*krLgYk3N6LcTC|6C&44dS3kbnd|yNs=|M*;m)zO z^a)}WRkOhEeE8umAH3LI)sHcvsVoR6AW868j)o*jdD16r%$;I6yLPP(hw#Qc1T811W+%9Bb?obx&sXT z|3x>Fs=fg=D`!8XAEwg(f3Q-&4{NdZ<%S;Hgf<`C&Q7l9qv8ZKp-k4IY*}GfO*?8akmXDkRkv#R`_}u13`W9 zVv&8@ZvoVc>A*^3E*v|#i(9~xl&bFp#o}M~7U6I?fv+Uo`l5DL6Lte4nj&;44@duG zHxf_Jggp7uH~}rIBn~NMlCq-_T`y1+O}d67;aRQ2B>hV6uVd}xRuZc_Lh08+8Ei?c zKUY^wqr^b*sCVp-lD`XP{+z9vg}ydN^7(%d)O5%B?j;oiz^9#yonL3vj)S~zuxF#I z0&es1@p$!!u6R_(PXxf;-YJPS`^dvD;X{fV3+R{TYV5V=e6uV>;Gb+ZAG^CA1kkcc ztnBdTNcR7L#u#5U?+i`uaPyScBf*9?3a<6ySq{@uc6-!H3;e zi@8$PpKt6H^a^ed%#bBpCrwj{@Lt0ikEk{I`}p5T3Zq0SW*h|Y+$~|2P`#_;3{p}Z zU0p-1FRZPtlfW^!vgB#wdQ0)xqebSsiE0uF=A$1)gojh!O-oJ|cscqtk>A05h^m$6 zM`YbE0V%`IZ?&6yjlB+>Udy420x=u@O&;!r=R_`{(88%q>zlU`8E6B(>jOujIdB=i zC;KIfphZBLm+qpv&Sr=oaw`wvPcx)~YWo2njM(&c$0S)Y|A)P9qr(N5!rM%}mPU6% zEoz;9^KA79jvA75z@jc&L?2+#`)ozI~FUb<@aoYP_X5n#oMPsuxpKTuOYq9yF?jA3go?;nX4 zIeqRvA}#hAu3ygq*j~aKK%NdUx9X>XKx~idQuO`FNpkKPF`uht&VxN&qoW#mL&q_B z&8#(4JPP4nbyrWmmLTD&Lt>7c#Os4yxS?gXz@DXN&yk2*9M2X9mvT%8hl?}Q@wZbmjL6GLWPq-fXlKy9>a3*}w59t6Phx}9*A z=npI@E!7P;4o_t~3sB=MBMAA#_MG>hqBPr9MBm(bO`&K&}K{TbdPkDKJyp3Zq$Z{3VH|AXOU~x{!{OG?INA8Nx+( z4WKs~+~6AX_IlJ0oN;C;$$G?F_g=hV$z$dlbVGpkR_5uwzTcYQV1--bGg1gJk)~W_?xy_k^YP*3|?mznJjw3>3;7buQ=lruUqk zU0jp~905#l-epKj^;i*S3~loW;-H44_=CA7`bknpLEDdKWM_pxbhri>EM;sQi&1Vu z$MSEmMBA!MYd9F!R_ADIZvM)xEM!<19zcQl`oA`x%HARQn`SMz-ITTXVxSI)_~%y# zQC>e-@sdKT|NKo*_sPhVq!`4-S+Y8>nOzdyO6zMJZ13Qw zKiv&5)4H82D!=*u=5kAQkPU_Z4QI89Ofx3b5)kY`#o=NlvQ$ zCFocl=0IVDoKG1cWp@ZeC`{{^1TBz~`kl~Rgmoc=Rvqkz!*~uuRN#qI;aYSGjN-Mf zJLn@BvwEq71gu$^yyKQC66)5qRggbcaSfqiVG~?CP^&_wGw-U$^Esw0sNvA$h;~FN^px?EYFv8FJee>{m z3M~B2L%LB?j!-~#qx;*%XN>z!E;5VNkWg#zel)**1$qKd&ihN_7nwtZ1DK7`1y?6$ z#phUENr~~v|K8&s+$qY6gLV>pIw+?JQlF{A=^F`Qp1T%-?v8WYn-xMPHmG{~zwHx*GK~bsg)Q~R# zz|}S0b=>F0h@;N6gSCTdx+nviaE-^5H0&HIM|>af4N4v|$|%3fCd0 zNc>|3nTHxq?i`-hi^gf;troOCLciBBw>320)D$f$@Xs1K##7iJLWY#t6AA*YtzYK- z@L$IE$5$o^>{%UU&dbQ|q<(s4!CUr%z8C!8rxq(0ZG`7NIf2gV9x_SmsvjbA|(ouSr_mGM>xG&jA^$6u!zMwKXsfhWa&w_NB#VQblt}mGyS?cwVHSy4EE-R1I?QHT;mwKV?;|-5fi`P#>DTD ztB+pXJ~qzZ-vn8P`P;`h1Uu6KZIF#RK@WaDxS#(ImQa8F?tlS|)N#O(jF8pkzY}oj z*i+LDkbtwxuzLA-1afRRodl4Y;87pi|63IU06Bn6V1j9$p{E^mf7tDRuEg~kNP2OQ zHN5z1hN1`z4Um{blE$VT;_wvph;@HIE3O+%L1Pbs_zPW5XD*HLGS6eUBgjPg*rb2& z=HY*g9sx3(6uKF4R0DVL6_Y5;;V})S6~-iL;h6FGdl@<}!5n&fI(z;m10yy+(@+VjEY%S3rF&+-NK|!#HfnEn{`EwIho#aqsCWv z^t6ui1Sm-as^ug`wKUtkJm!f1#krz7Zh)tGE|w|SFwqS$OMS8*Sec29xnH^2%g}ui z{X>5buCH=hc_vg5w27^lB@=?b6v?yZi_Bp^{-B?mS+_mM>LI_C$sO5dDl?Bv_z4_b1YKKacYuxQ!b8K@o?v#tQdXhyp%|q5n)1eo zBXvY?Ze$ZkyXs_eTqdQt#Gxz=_vy4v29;|e-0hslDTehq4;KY8?_tqGF*K-_{U9$5 zHH|`KGQbHr6N&+@#65o<yAU3syg!WEw3<*W=|5BWUdyj?*P7yKuv$N;F zBqM_SHX?n0J%}SEPt09+WWBu|2_Iyd6Vxc3ZPFTspKrZ1l&_FuV4zqx&4A_&kz|sK zW~H4++|ROG3_&{kBQh3#+?w7WXAOkgnq8KWl9Z#lICp(t6qW+PMK$FSJccbzX zJhRa?TDgj-QM%LL_%BKy(slkKZR|q=5^9z=)zr_;fu3T2oW_PqkoQX_*&CkqDp~9D z-sv!gnKb4~HtjV^S|y6 zJ2>*8n_`ee9DN^KM#_;V>y66H(98FCP`z^0UF=Ji2vL=tO_i4q{GES&N9QkRxtQ|~ z+w^KWLU(K8kv`rkF66*-c6x?Q_QVC_Ejk-9?rqddcB)kHbxRL-fEG-bAB|b~&ppW6 zQgMSvo!sFLIr&UoIS^1nI|#9kTf^yPj_017Y}g4}$#wL^SL-j-|9#x_A4;T0W2|r4 zh~LKK^A>kgi?nD_ULMyJI7gI9q|tv^90ko1?2kjknN5F1J23a1e2G%$PP=%Ge=ix$ z7bU#eU2Hxh{nfeW`7pd$L@3C{Ovsw_PEEDh-lQIDzw8wE=-TWElGa5x<<^%GFP_|Q z6fUKZ6>v?jR4DZ`b~BgVkWA!a>}bf2qnuF5(l6u+vSgDuxs+B_&4^i&++`3HMJBZN z7kx1I$wbu@Qb}{wwa!e#VcLb35B8c7tsJ0thIbg2iIhp-L5=MOJ^d2{R>;w@e4C@> zdgIfauEB!3COR{&YdYLBcYW^BbcXEvAxkJsW#t87lbIyf1x(w~K_X&5K))$cO?FI| zx~*@|KnKAG7)X7-)}Z>6dxgjB$=|L+gwj}5b;5I9UHbJ6n?%q!24oB~rAa|z76_ih zlbM2DahOQV{_J$HFO{H~Vrpd}BzeE-tJNZ0Yc2Lm%jil59ITi+qa`bUbv_xH4BL%b zr}lVWcJb$CVx`4xYD$g$AJuES>&~IbMCexgjWr66Fm&Q3Niv6vp7qlO2w~;gsHwc+ zBEPk&fm`FqWy$cdL?HOA)eFoG3~n9?!lV-^H}k?-G=Sqa9{xi-?P zM_wMn5B_osFzTpk0Gtbsf!@3|qA**svx^yi7XjAM=6hsGVnRZFXLqGbk-@Lt?mNu5 zH55Xh_Gh1PR?W+x5dl_XeRC%Y+gl2?6z35E>zf4wF5eJlMf!q!)l5B%g4BbKwMrVD zo5D#+~%dc$^H!he>fW+06S&TbvMgqk(lLA3g+sKENqYjpT)_NYWd z9{^o*?S(e>bkg^_x!H<0Ugg$ELU4c^YxS6J{?FB|?BZc>-_W^jaChFEKNA9F?&4mh z3!`0W9@Fcl3z$!dG2hgoJJJ1QUJ!8hND+ zv#;C}T|w)@&d%Wg7bN@yJiU{@)6LVsTXaR$OTm*OvI4#04Cm%Z5%F5y0=*eJ(NoJ0 zpp|_#6dL;_uyXD_5q~Yg4`qFbkWy6gh|6b#psCGr1E~S-{E~K{WkVg$rORV>-IvXL zc!^d9x;_0Q@qL~I@76udrF-{yKh;>8>GON>wXTJGJMfF3oYU(nNi0@qIw!rLJ6rN; zs6_vUw*y-uz_-UMjsbLJdsDs5$;B1;ohb>ZZxb@&MGmnV$~F8JBhP<{-dk6@ zgHhVHBK%=7PIu+`1#d;VY;x3h+&~NF%-)%Nk6tF%=uGt2-X)zEmooHj_aosbS*ot^ zLzaqEGMqWnG@MI&NhHQ)T&TWdkH9N70A!u52i}WXMkjYjp`X+5Ekb>}9KV#ztBnrF zX1er<4m_UU<+7Da94)fg`W(YF2(?Z~OCMSrd$_ZWcKKWqTriOQ+C2CP7Nr-VIVLa6 z9kah|Chv6j#T^&9YH-x&DoZ-cR5Z<_0k)FOW%w(htnnAE5Ui$KWAJpd)sqVmXZOu5yq1SN5dV zOomsfMGA$4UYRY(kL-JjT$F>B80IeS)5ndl*VOK6>yjfx$7 zari1duMlkKAeaazYHES)Gnfr2*?yZuqbq4M99m$qO(J^}6-aN6^o5z|(U?q>rhWPP zTJEU*l%LgfX1DKjWT8?Psy4E|BNnx%x}K_tUzf%j^XV}M9m;*pp)(SC>xuJ}l|}5{ zG{?iF)*;UD4eD^&UH_)ft|Kj0`{C%&^0n7yU*<-)vSd3L;p~(OI2LhqS#OrV6y3c` zMnQazbUgS9eaifmQxiJxQsUk))6qA25D|DeHbV7N-o17kF7IG`j-IhKlV5CZE`P4B zUJ^6u8DrnN6%%-yyVW_oM5JmFmO%HRoid6zN4c&*qk0bU<*b_SU2J2e^KBr%)U)UMEhC!&rd6rEwh79k72 zJS??;CbWK4EydZK{q=cVrAU8Gw2Y%X?5)uAg9gV2Jf>>m^!D&B6JB-j5L0#k?)$h7t;2~Hc>Me|?f*@5 zurZH3@SJLb*T$}N3-yh^OS^`PHys&~ zd`2T5+~aZ^8a_{*>!cg^ViteXD9QT$+Ru5iIzZsO!S5Zj#zV-ywC$%7?cZHBxC=TN z+|K@hr=)k9P~q5<{)2;g-DJ9Hr+tCC#enI74e1>-x5t=&@~Y#hQZ>4P0+}yZPciP0-8cG~-sb4($aDTuHJGZ}Cujx;X|5t!ocP`<@wO z{3tE2KsdZ*{#IYn(;ThE!&jHP??G~EcAbNW4M70CELGkrylOv3uw@w=yQuxFIk}M~ z=Qe1VTVFfvAUzu*)!f@)TAw9%Mr-Di?HYhC3+;@%b&PM^h3vpf2$UpXZ4!y$xqT5@ zoYItwn-vkz~w zWb>%L6+-{ihHPWUNsKvNJNRRiD0`@)jq63MzB$l3|NCFRQ?74tN%Wt6G5Bn=q7*NV z(s`PFjzGpW+b5A!_4})j-^BxXoqmg#EJLh`jr?6KS67qh`uWdh8L zxMPh6C#jG>UdEd$=!O1jTSD92FjVRZ-PAAUsx&vdmH1ssV*}dmwbDA%mIX6jb9uze zS7@fjM}LCLHM?K1zo01kB3CuF1dJNl=?8QEMZxa-y=p<5D)KWV&&T?Q+dq)k;3cX= zs=8uJ0y)i+z49SHf&F7lPIAbvir9(p`Q?~6tG6kUthcm zEBS|+VilY}*!ykAT9Jitdb{=C4O`RBG7q9)uR%qab=~xsw~4G^{e1U-r|WNZ555rB z0NHXUhsW1mYOqyruK`Nn5w+VwBK^~SVwqEJ`fQbrAvsY|oE(kD>Y7>jHD#A-W15nj z;fZ%7pV~07v`8jo+9=lF@w=Iko;I>u@!>*624C}K4Q)GcA&awbL6vR2i16jStAloG zbTB7!0U^#NbsZ`r)s}DR1@_3guQk~!)V+Ojwoq!RjL80X5_N?_ArH1as-c$~iD_uU z?kCgGUXNSYz5jNK+i9VL^Jmam!yC)dMYlb_F0{Lsyb`=5zZ83)THVu+6~TIz62EKS zugp~X)ccgjVrsZgZ6qu5Y^k)`*ctC59eGQ`lGqx#g?cB=?#EdUD9@i(Y53Gec(4!U z^yX+U&4IGKcPN*Im9VR?W9xK|7q5TTz7WgW#zKt zXL?3DiE1a;hcd1=Lvt?nZOMDNdzU%Y-w)cNWw&|JN5?n&maO6e&VR+{*CYRdgzZH0mn~6*;A%75?T~bF|;fuir=xJ4ra6p=9(yCt+skLGG7K^X*Yj zl+{d4`nyb#R1ddTt1=HgQirPhU+O)tIWtW^Yi#%|tYoCYbu=vGdSKE@l*;OC-i;oy zX-?+}cv+|J!>kOWaYGGnqZa;VmSU{_kz89>dgfye=p9wD6k;c%^6Y}qW@PZCnxNrq zvn8C+e6M+eOdPB>W)^WBoH1;#eppEgp&-Ih+s>*#k5Q(N&g1n7g=x7{G7TE7q%@!@ zzg(Zz?A!_Zq!SbR5TOUBY0{skW2>uSJF86PvFD_JuI~Hy$8KE>mF>ZfQU{9u|N8rY z?uJn0DIztfsu=2nOk^`-j7{o$H$D#h>JFi>tl%1*ONd5ZtxoZB3P_&n6pl^dZeEI} zWA{}F-cv-IJ)P>3)6%6TdN}1f_O4SGfvV_SpW(ZKSQETvwU%%xWPP!Z*}-1SOMfrm ziPLUrs6j=PXVwv+)4UHKK?Y^rlt#16OjA~}OY zU3((6o^PF05X{J$#(6D`Q99FvEH_wm%cqC)_SCf9l<)N+-06x4so4Eb&HjTn!h9i& zqfhMVr_Co5h<`KKj$qhb?Qxp<^QHki{{P&(udfz$q3VR7IgQ^wqMCgXVgsRg>VgXq z+a?5duI%wNUkLj1eN|xRl)qJrT}}V&z5OeVe!MHly$Y=v;7orKyJ7zaRb3gbRgG#T z8verjR{eGM<7>;C-fz}hs@ZTuCUjUXOs}j@=;WaST5_&uWo_av=R^H+|ASjJ-HmVj)OQxxeact~+YF~PL zeDsu%caaNv+fA!~fmfH;EZYIWbaKW;|hEh&IO1iGvdLu^K)f1npg3wU!~sSsH>&|=IEhi47EF- zGG5I*n`X@}sRXfS^y81@by@=xHZSph8_|EKWeOgy@RgFLA59mM420VwNl!|hXKkr| zd@kLVG~|sA#GMt#Np}8|=({KNY%TFH;~2jJp2MM}>p4+GQ8o$mp>L(&1h z`~1!&Un+;R(Mnft*o(-EJi7CAG7Hi8JWU9y#PAR1An5C??q^cL-q-*%h^ ziuwoc+HIY;=t>AIaJ#wNrL8qrW61sf3+mPGlDJDm8Cz)X>UG9tzDj2{-wJ*|lP?M? zZD(`WWKz&3zZa$TmNxe6MD1v_mdA@GSX|IeCZ<*>6YU;FVYq{lF zuKt`G9!UNY`}{_i14CQ>j6Gv9{ekKl(s* z1pNgldlAF?h|lg{bSTW2;|eQ)XcEuSNU22Y-5}ktlbQYQnJ$s9#DYS-Dh-`ZZ@f`x zG;sfmlA?f6n~3YjAqO1106Wk%(oH^#8PhihuQnuXyd*`O-<@Z&s&nQdOI>cwsv9mdNeSJSD)m2F`IXQf8#^yRY zLDKKP^gf#)%eM0N+cXW$%_?6!63-~)kP;9*pe2HB$0}TGvcG^@6)EQp6ZjyebE%Qj z*IrFqQ4iu?JKxk&?&7ex=QQxOo-#?vTq*Dz<&6?nmZC?$v%@n_yYYntoytMI}RZ`BCydO9H!&G@8 zp=d23dP+ucmvsDg|V>&M4x2d(5lJMuQ-)qaSBp_$fHgJa1F?q4T-3 z_{-U*N4Wn@z)uUY6Z8mzK00T9dYU`W*!(>`v9d=c8=e79_OL0*JJrYw^bB#)xj|a7Lq##%Gko64 zSJ;v+A?&SSKq|*6X$`_ax68p?YoK_~!A{9XEVwVflWr=Ll**T$`5Y zVvVQpOmPajuG+ZVMp9DMbE0=q=DhM~&z^8DqkqMKwo$)MjW40q1`BcH&gzqkbtBF# z6o_JGJE19f>lZLVg_5-tyfh*naDT( zaXOQ_0>LQO#jAzLZH|hrs6$Hk;9pDj$Dt)b1Rm| zrgTT-)u-CMc-5G?xEol%^?wm~OlSdKqQ2LvB)L$|A6sWAE=$m!Kfj%leSa3;+QDr? zH-3qY<$^J@!q6MFEn^f@UCm>N$vFb9e^^e1;H-%a>$&W|9nNrt@nr6I3Hqlc1CNQ+N0mzZoBW9m+cF$(YyZo3hI8_ zs%0PW89t^+h&M|#6cv@m{7e!eNXH+tJjvAY{)QElol<~V%4JKj|E})Jws>HQz9OPH zH~|rtL<%7o7d?q}hT<-qZ6k6|{6NA@;o8jSPJ>_=IpU-(&8@+iZ^FjZ~=a~zg zFD<#5X7ECNm3{I?3U8GVX`FF2R~1ab!+Luu_;{a1`vRB9{e5OeYf(8|x7ihFSZLMR z2NSd*zp6-P2(n_Oozbt|va8e{RBu(Dd!|im8G#Nb^eEOWAw&$`U&&@uTuOgL3qMzf z4t(oTixQAP$y4s1dGlXo0C@#r#;LeHO`yyoikILbG~Kb4AX z36tl`TY90%zA*clto%)M(1{bYPjPX_8SvgU9t7fFqA|A1AwBl&qlLh47)G7Dh66a# zY%mzTtvf3h0@ssUt2C#7s#k8($VqUiv4WS~EaeCH6!nRP|0a~LA^#vYw4CwAIPrsu zHfuC2u}$T>CP!!R72;{NADnK6Pu46+A>Kj_H<;sx2~b^`iPJ`urig}DKZK@K3558W zRq2}c&N$wR+(_JID~&PN!;N;9PLW}UP1&z8Tnd63?M=;~o<8WxeP5iAC2#a zSM)-Hn_8XKLpP(jwt>48gm-g-J3(G)8`;AHV#(YgKYLfnD;XB;RTdT{!4!z^9AI7 z4PmkMvl&Kn1$RP-I|H7F0dNO!a5=mqZzy{d_tUqvUFBTs`SuP*5~1lwPkQqj9e&L` zoe!4jxZl6-%y4-N(^G#T8=;&L?X6&y)@rFftskMsvR@!=S$+6>kcQZ^oDShEER=4{K{~$@$&Koy^06<-w?Vm!s?K-1am;ksR#K1= zIZAEVD0;u07SkwBY^{joz{{SVP&4h^d&o3wpUCJE+?%mg^t#7Ioi6W6PVk5I&|%?- zTsDEv&Em!6%kh&3U_cEUI#kV*!(M-k>?9?enaXn!y8_RHVKocOiD{P?G zS>#JX0&jjZ7Flca@=vFB^~D{hhY}7W%^A1r7`|t`35M(}O7J0Pgk|%M%PMk*f`vxT zmaJT9P7R9=>Xnnrv2woCl2B$3ac`74jp4Y@7xT0l+ahHfK6P&5QsC9--xYH%Cj7yb zddhu!?Ib!z>NIh|CKXbc$4?UJr>ISYKQUdNwj`iA*=n7i;m$77c*FDwvXf-Oj3!EC z9mO|1H|j(RVXfbO2Mc>|p&zc+q>qy4I6M8|N7?8S$vZFo@OddO4}DtYz|e(^IfVmD z`tOS4(Hg{^d~(jC>@gKvBhtRy_-59lQOP8}5Nm^+$7(y`U%NxQL##kZAIaCdF*osP zpF-g0CGzW0`pM%8tD=g*di4SC}+V%y8Uzyr@^PcBg8%(~(U5hP>`j$xK ztykjs6{{s_EjI0OGFrDN-)*M>Q2w*dm=isj&NDF=WF~h~=sl}E=vM|pY1DUI3#v9F zEVXWDYs`iX5%@dGU-~qBMQCEyi@n;%AhRLC495D4{h_$`TN{T}zi)I;S3N~8vbK|TC6a{TLAqA z36`Scz7nB}zjWWN7b@mi*Lm{`7qXbn&$Lo(Z(j4N-+FK$vGR-+{bk4D6TZyViLdsN zp<)r+x?7d{^mF;dw_km)zFU-A*#7VXYh#lx`y}k~KTT`xp+5z^J@nOk8q0*0F9+|B z*+70f?Ekud1%a=_=~OU!ud(bL{+idiUv<>Xot<0uzDiBKKj;R3IKA{n6o~dIJ`uy_ z1#9*i*>1}ebf(r1!ap0-Xh}OISFCf;u7u9%d%*X|Q^k$fi464y$%t~{v9&$)muwOj zVDG(4zIw;Ii9JFEnXe8<8Vtwm!gTGn42%|p4=;` z-}y;41y7@YX&edeDc76NH-GX%C)Go2dBkxwUbI4(iHsTzO5h5pd@s8mZlI7Y%pp;g>oW+C_Tb~l;{LvRIkC2b zHrKZz{c9rFo~`%m+xZrG$&WYF@4oR384CO?C^(ot8Ed<3G`DJ!_vKWuHQ%)m#^H-| z|1?Es?0ggFFgwK&H)V6+s=Uao)BHZ+s$ro2QHA%SClMh#<(`Trf>a$E|1zods~rm~ zM_DNiG|$Q8@dxs3Dm`Rgf4(uy?ry#b$9@&{kVO_eg}Wi^6r85_+LSBvCHYnSJ=i9B0|+!tI*p70XVeO&1rljBqE-l{vxDuq48i z;P;dDWaJ;-XgN#eHnH`DTBxJ_cW*W}R`}xTs*eT8YKa{<7SX67;t&N+TiN4GI#{ozmS6 zA|NFtF?32ucZZCG(mgZ`-AD{I#JAjgzsIw`_&u-RKXc5%V%Ds=!qS#!R?_ajDKww0QO4gI9tK9FSM2RRYR22H3=SZ@s=G$)UDA8_m};APgEbiD zZINO90)ZMx0Q{ZFts#6XL8*<}=H3XtBZ8cK{5jUmR^~%32)!U5fsD?PEu~Ls-^5F= z6I>4sFsP5{o*~18l`)7HE_*}Ize08 zTI$aYPF;PV^;TOj7m0_2Ps8bnD0`;IVR=V8vuy+}2rRSxV>cSvz~8VCUM)IA>$=2v zmt_C09dM+7WNzJ~-V^STni&>U4{p=2&Oe#|*;clt{m?&?wY1#1od^`zQV_n`@mcRH zV!GsOCIuxaD+ebT6;MJr)Q^+73f(W%ICj!H@@iRsWE%$I$@bYUh#erlRBB@iy7P6Z z3g#GzM5Fy(EXzhPA8`&J#CVQ=(j$1Y7)%!d=JmE3VlW@kYv7AY|1v}9oLu{vJRsYj zFMH`q(>1*TUn6~}AkQ4IkBQ_JC_&c>-{h%;y>+nd@l5;rF}WNSzw8oXY88m51UmBD zmC#me6^o0=0BwY%QX_#uxIvLyw z7Eny2JKbu}TH`^7d?}tHXLQ?i!*<@>5{ssojnbG+5_nYQc%i)jx+OD ze}dsn+9BU$)@yJ~|7>$3PWJXMQyvHy>i^{a(rA9ohXr>t8ll$&5joclC;%A)rx@ol z`LD53iZ}=}3ZE}V)%@JUBBFRYpSI!HJfp~$vBh1AnRweaAg2blt!NCwlv=&={rtip zT;~Y@=5%6HG$WuGL(k5(b3)?eTFDCY-_g2@@wW1InddXeJy(&}zmywJH?Y;|Om7+K7G6OGSO8CdH~jmw+=;U(^aAScv801PJ=*5S`-wB4cVc)j-OJF_zi`UQ{L z!v0%>TxK-LL}-Ft*mQ~2n+nH3Sx)=Q+&DwT2ee$9?WvyEjTfW(yVSVJ?>?9*QthiP z*o2;!%J#T^EE?S@uZQ+1keyo3e$=N{;mWqmyR;j-O4PnyL3|*s$1c6nO=7@8+9IVM z-pIiW!OIm3n=VFdlk;F+CziNm5~vEk3=G%?DuM&u#0y5?LNf&OA7Y8HOeXlK9{ zc$zC4)EV|sB$@4Q_+)AZ4w29n=+(IU{^m<5d%{kQX;yJZ^qT>+C#N;!oPU%pFi@ej zyU=BTfw>nR=e!(*pMN7{{f?$D}m`OQH1u zaCI|t?0JD4TSvW&+}LQALra;Oor;m>(_6Etp*j74OiJ0Ca$g^#z|RYr)i4J_xFulw!ykbZ)r6Vb(H6@NQn z<+#NNlEoy#6k82CAa-D`*(O2VWeajbhNX8SoR+b^FKgptJgw!VX|8xVPi`Mnm4QG= zJ_qs+VY&gO??A}{H}t(Bf`MB8b^J4ErbxXW$tU6@oVA*c@9);hQCTE zFiNEDVC*|l)>b0Y&+ZG(8mf~59G9O@$z(i@z^^Pb@oC4$60;79+wiug48{Q#TL{!*KLH&q2*k7 zQJ@++oQ>T_j0{k2e&!Hv z+~1m}IPTBF`flrUG4FSM2hF1zuIW$_UVPUugR<##;*7oPoE#;XMK1L?!ERMnfO88) zG>v5!C&C{(a@uXS?{*ic-#_FHj_8bfcj+vLw?Pb&n;Cb7A6s;-k;HBY)7|G2wGvFG zqbvbc;-XJYk)V_aPq?{-wGO>mR+^}NGI)ZoAz>r@Phblh93-X_p5Ynoijjdj&P~yG zshlqmQ0PES3od|Nw?s8(;a{I^(Of%(fdlYjP1px!!J#c%8KhDoLcQbGFM=~44!Ck$ z4&2mddvBLJVlowpyB728t(7I15;j8UCB^ZrUyZydL^m2H$on`dH{gkf?R82ofB?_9ZM&FTI6Gfwh%R5_7?b4LB_(WgkNYO8ge{2U zez?O!*2h>2s`&bG(<*?)e#1{J{)r6J3kg4Mpl}Yl!p=-D3>Sw(`c6PZVj1(&J5gf3 z@O0-CO+{Pjh{Kf;ZO=jFqGy4nl5-Z4+}D%E>hhSU-N6N9C);$?cihAI3Ah(AExZwc zd^)gW_4N8jzunwfAX?B~HYHRABs0*VXFE5}daVl5+i3E0orH{i?Z)ocG}-a>u3Nob zpn?^*EiqR-3i^c-{CQ_#Vsa$VRdY+_)1|iP$sDRVg@voZi9-eM>D~Nijjrn1 zo;9y5o-HC9pmCL3)1bRZ%* z34DP2M_yP2Sr)HS!_cTgnG4Tssidr8G+2D*9mDK~&4s9#|F6K%vj!(Fu8+eEedA4g zYsh(2X8tm#^XHe_&_F*o{#NZ9mRv8G9rP7{4v!CRik$o^vdyc6IF zIhe6gFC0+m|4?$&;56a7xMZX4_I~SfTOHqoGI~$`k`8E$kx3WkI(M8Tl|cC9G5n5+D83)vPEa3e2Aao zOwoe{Y;v{G?`pY?5U}xfo5t9;1i>5(ixS zya-LZ_)MD&Jc$j@bapLNiZ#p+mNDl$-bI`Ci4b(EsP5Y{fv$_CrQvUk_wIrdJu%OW zbS}K7g?F(Jqw|uyrxN|{rU-`>-;kO-!koV-Gof&q`1&~tF={PJwdk)DEoG5c-jJT8 z?^tWJB%na*3_}6(pZZDIkQ?n6(vwK&FvpPKU!(^Sk4@OHO2)B5T)Ve=P=gtTimQKc zBrK_6s|Ga*j6T_1OqDtz&cO(Qf`We0P~%Rc`}=>yZ6fQNXxunMI@Y;K zM#n*ml*1_62P%|YTJIiKlpad!(~yV6o>M&OX?O86YZk3kc16^j)F}T^a^)WT{_O%8 z8)b!(a_@|ikrv4~E-38x83yo)_K{FbBB17i!LVFoPy=y`F42{wVyO&yIQSDZm~0Sl zl=T38=>hI@07$kW44R|I9mdP+0kDsr{Jr33XIJgXXz4G1qa|Fl8gfc(rGQUQ#-+qg zDZf)%VZdk;)Ij(@2&!5rUumMp3NgwY#(npnI zGun9gTanPw{%G$y>)J5v$n;{>?r8I1-{&f^Y)Z3*uu0)o@%LhNf}x8a3H=vQy=Vj5j}|H~l<%(VmNs3UL7uV^+>4${qRTw=gKT@}vr8 zvMB8ioO=%wg|yn=)T!*)Pa2 z*~#FwdcI zzCjyOT&oHa$Nj@xr|fUKFdDR8)xBbWLaRJ)#G7+=}9NwW#GH*erx zLI2Slc5H5Tfj%|he67Dt>^db~yQ{B5NZJ<5u5AA$-L@lzrS?%K__&4V!WJ8{`k~6? zB0%hMR!>EJ*R2v|qU2UX)MNbt$g~iJGA;4ZZl5nAo<69^*>3rOj(B}{Zy6t6@#IkN zaRT_=7*uaIn$N2Zz1zz`p^xPxNoB-&P;<@swH%*fi;S=1%uN`W{)IeFDo&IGSkaD= z!YDtcPkHN!=#i6go5kb(I#U;^dexitizkMG&d$I5*luFe3$aIg5jJ{wirz8M!g{>{ z;0gVc0ArPHK-ngC?c;Dd4MyX3H2AgG#h<;kJd_U>HIA?6#m344#usA1XtKTVy*p5O z=W2Ui%hTHrpFBjNu|$_9|58gSuWiFGbBgB=ZBKo+I6r5lhJp8htysY*kvJ83|8o`O zAg2d1?TT-RSL+RV;nzf&kH+&GVqS=eSD%When${Xp^Y-(@nM_-t18 zvt*UBe02#eU9UPI^V|2~FHP~lG+8ZC#w3I)2~>XNV^Ap~;E_`Wh0vw0XcK_Yh4Odx zUVG!?+$nIvYt9tp37pq$&*FZnD~xEALk zy$_)S-p;~-@0jat3p!$}5Jv@h<&Ov)mGT@ixf!-4VhhpQ(SLdf-*5F{Nq7;!-#ooY z-y=eYQ3X|PX4#c3Uj~O!_`G({*yL==+87}>%p42F(C_1wn#x1;MRlJ6Ij~W--xS~N?2eRY@hwYZwOFa1#TGdK1ZWaFB$qHSoe_7Sn&YbFfU%QA~H;F zdVNf59*d|gGlbu9cc^5A4Sz|42{-25&48fbPK;rf!wD`ZJpl_8!lkigeyTa8*5aJC zdD-eF#nyhA)hy0GKD&ePbu;H}zc=XEDY*8y09~wP%Nu-+jwYj*o7lB3nQ;kh$#&ob z8ATtMg(41Siv+$(?LDzso|7m~!xDmyj~-;*LQK>)!z*@4zlD)J_W1)%%mZ}necGi^^%+ZQ|@90q+o`N8EH^7G^Z}dCaHFxz` zwph6Y*1y-Yx381$m(gN-om>+VEf(X_0l+-eW|yyGWAyrrEe-><)yBJn;`&Ph6~^sss<^ zi5WW z;*1L@dTh25vEo{=t31^P>9$`zu1)`(e^zjE4n{kh(QUV?xjCJPveq#9a)9@ruvG_3@|KqS~&wnrIMfBvH~({M}{R zpofOUMTB3GO`^B=p4r`KWJCWqf4 zNSEJ_MEdJ*%*y#)zxtq?0Cz2~Eb|dqM$0D~W$G(sPowlC9GeL}kri|oEfLq=q7r5( zSEnKuHkvgu{y^+k(gL*9b9ekGu1#x@`85*sp2bDcy#mkdPqG(sfuP86KNgpv_>nq` zGXSC+nn54Tdx)P>&lcM;Q_`Y$awco^Oqs+TL=-yp?W>vm?;HV_N<|F~0v-NA704OY z8@`F7ke94z7bE~qYVv)HecP<;-sk7{3cucl_-?LR!Dag*aPs^wJW&3b!UjK1TcUzk za!Zqk$ctoMZgs@G>nNp@fCb4_62a%6JEPL>Cq6MikUcRvu1b}mhVm@LD0?;qf3ZYr zk3G=GdNP5PwcP9S)z&Lv>86>xT#!gGLdc*AAu3vP<4~}#K0ssiR5fJAb@-RM^Lc$b z(z|CuXs!tDMV)rLPq-k&6qKy7^;#cBwIJt;+cDVT3z^B*1p-rj6&oh z*<&J2m-I$$IpB8&?H4>9kU1>j1uyjTnO>pN*K8-Z?kavb*-*e-aw~xcA|ns6e|xq> zZZD!)i85>C-iP6YTXFcJikuQ+q`+PL--*P3Uk#5F_a{c%!;_#YD+=NUZG+BIQ>pdW zxjBdWkC_M=LdgHB7^tL!CZ&ToI%^GY-_x_|2?>On?#Fj_Dj8%ch#fJjHkkCjmlkZ- zD1N2dDfhSTqr`uAH-ZUCOKDb?9XD%944@xrBFt`2a{9Q7*8VP%hiVkPelI?Q8p&7! z02Xv+p^SCSEzr;<{9OShf|ayNdOz#u$S_{hHW^d>r|0_@OiN#1Iuaczo=zlgsW(n{ zH(BYr2Il8L-^6(B_5ie+(T|m<)+0!iPy4=xi_gr+$!yxxQvh5`REQ63Z|Qr$F5l@k z=r3^7@^FegL~d9)zr1xS{pnKnMB;wIg=pD*+k1*Yx;8J}z`HDiY=u&>;iQzWtVm8e z@ETeX1|S|G>ehJuKoVB+C930px1~7_`0qLRy^H{NyBS0dLIt(mOHrF+c0CRwr0(4b zpkD|^af$rDe3?KncT`Ds#PtcwMkWsyA&OTEu_*h>aB7opt~p+#YVfgQ)N zM<;||kG3pgKR4ooPW3yMVFgcFco>E)W$D#wCr6tlsq8vjfgO_hmT^HPKYY=CmMBTa zXezU_Sr2(}Nx?mFV|KZgBa`>uP#t#Vf8$mAAY|V1Sz#t3D^;4o!33?f223jnRs*6|pY36VXRT{8HH?s|& zd~Uby;lBC$ra%-Zln||bmYn-Y0CHW!U|%0S?^M?snb7MQ{e+ZOtnTqJsOmW$pc-&e z`sn1hIkHi(qd$)H<13&5Wk#`5ppfMWjF7OA*ZIqLL`Rv#rxk}TAYiH< zV*HUv;z5w0JV1aEY^l~J{-^xH5tPP^8*skp=d-03Tk~2`)$VNH;C19M>t_SRfg@$K zuOVnbx(Y*Lt_Dz#t-S1Hv5SAQbujMCCh+~WCwyAk&84gc6%t5!GLeZD%2C7pW)CcG znUxcH{q89$IBz>}ZdfgCq~1O>xSz3ik>1f{lm5y`jj# zp9;6TKN{*Rm3ZIRC+1^yee!4`fF7<;cxc(2*dWt^-vW3{t-U7qz4gMMw^L+zUxaEq z42N5FP{_*LlwEIcH%4e$Yy)(SZ`(t|5r<^lp0J|jAGth~8xMEQ#m11ATWix*KV^2| zJsbQr5q*Jyh5pc#)sAZFNByPZ*{K)Jl zbDgs>i>aS!-8XQ^k9ne?Dkt1ziN=nGJ^?G3NZJz~1OBNc_@&9UR+9m`q%|TdbFohB zyIgvpWl(+p5lSY?acTDEL_#HIbz8-K0JwL;IFpR+0$}E$v`;uk+tt~*5#9M*J$Q0l=x6_xl}uAHQhW9UUu~cHYRN;xRs`Hkcy}RJm2i^#~gh9O7F$3(rM1 zf&E|!e&PtfRV;RnJkQIjf0@2?%WGs$y_?@N1QMPWDEgYGTA>24EcZ*x8};P0#Hj}D z8xGFL*L!vnQ;RZ-=zor#EnLbHR<>5d3}L+p+S)Z0!KH`PH%XQg>@j_bTQ9e5okcgi zu=NNw<|PjDDrPnHUt^)s+OpEh)?9JDGU0BvCfRjug9_y{aP#vTJ|8LZG?4)n$A zS$EyxE3jC*8o*IG?IBTOk8^w?dDH{qu<^+7!{~?2B$~T{UgYE|NM8L zE8NKd-I@qX*BE=TB?QVnD#$b~)~SiU!&S<03^aP>i+x$FQ<6!%l=@0Cn*?NJcRF5H zpLWRySbVl3{_RP#gF3O&lmKE#ozXopc11yuOgGoV;=|mUH@)$okXdK9Zy9e+NJ?os zkro%+7Mqo3(RR=v`d(K$x&}G%Y|hg^V{l1K4cs$d%Lh3T3xU87mt+;0ncO0$GCFgD z6Amd8^9H4cm3}8GC{+^dP{*#pQMyh!Owy%v)AUgf-t-Js!53%7Vf@gip*EKN=vwbm;OjE zycFGWw@T3O8OTHCOY;`RdJw%UayY3mBgn9MAi{2^2W=VQt>4VJU&ZI9?Y)HF9>>Od z-0c+*cGYEi;W%SSG>bAK!+bB$qoM8jK-``q2pVW$JzS(A|KNGEm#;arKM4y-dEo1C zy67mwt=S~u8g~iJ}!f?FC|u zw|6hrLX?RJ8{(4vMjmiS&Lo4eEF2?;_?*+;-OyOnsVg zOL@T02~6Ylt;CBI&s) z-XT$D=H!tnLyzF#hO?2w#WdEchrAV22EI7%QVNZovk8c~CER2Lvuz)>Z*r?zU(y}B#iQk*}a z&N%3Lr`XgO<1d(5gB2;Y*|`(38B~*AMERv%vWh8q>bhcwR1T#wyy#1l@o{ULLzojf zVCoYB?zM;(`cGJfWAewK44V%#fLBI{Z}E z&h&?XB%%LiL@tfFN8U5xy*P3uJU7F69?`t!L)@6w_`@USMRJtjr1;Bos3fc*ox)he zTpZJV?~9?NOmia2eBqKc2D+%Lm54RjN>(W4Rb%th`7N7Wxsjf3E= zs~!$9^mRs zJN|leaq9iBRelzjGJrkLH(S*p*1SFA+qbN=Cwj@wgRTc27aeg|Pb>KVd(^?Oq+=}&Yq33M!ijjp2e(w9_k|28` z=9gNf9X2jk)zSNI!#hf*!$ywBxLDb=7(PK&gxx3Z3(IJhO^m}tGD3H3d6f->$p~R* zO3MsFh3Jr(##j4Sl=p;VeDj#!vl3oy>J!=H>jQm8xEbngEEux+e+YGZ-?@~AMfOA! z^U#S?>8p7lpZ=+bX9k8@i%91T>!0n&6Y^~!UARt$-&Rv|+czj`uAW;Ptf{%RU8IB; zhYTD1PEpOKX{+F<(!KJQZ$#=s$(eg&`uT?YATmrr_UL4_e*XJ#%Y;PtmNVsA8h6NGI+*`H>Kl(_M&ra z5!bupHvs?bNbrw1G{D_=nE@@)-tMre!GActDnkH?n}011{jSTDeI;H0!asriU_Fls zMJ-V)Yn4jahW*@NG!C+wM##YD!i+fUty8D##rd;L3<#lYe}_=ELXq(QjE4AQYCwCF zDoI5i2a?5zFpf}SjuQ5o%sm~wEo-7ehbe>va}4_q`L#3fMQSWDQR7xncXDt%{btT2gq;4F+;epF@vg7-HW@wOyE4Mmui=P;Ab;*wG)sWRk%wY!cL40 zNhXHyFHiY&sI5@3DpQ&M;TQhDvQ6NZs`CW_VZSo~$juu2;THQK9{`E{KHlTRAZ*L4 zugjl;HN+TYfnEWCvMb~<$v@HM|GH3UelL`~A&B~a04Siu0xZij6f_8urX{sL4XblJ ziJp$l<3|+yI9?*;aHy1xS<#8db#L9|+*w0p^N0WKOa7TA@or$=VxAC@{`&)oVdCOc zexp)?-c^Y`G{hWTbW{nw-rv0WODe+%0Y%WMN6i0du>Y&11DKimxZfF>(*%e4e@40Z zF)A|9AOu(T_vQ^Em8PnXP)L1D9%4IMXw3ZY+59i>lYXcE@;wp5|Jh3qP@f|K{Lpxr zzjjHEEbc4&RrO7zAgAaV>^I1NP(J_dw=qG0VL?+Fqx(0ZGoBDsbPxtCRun);J47>@ z!vs9+bc`|Ye|f-vUrJI09=4x8@=iPl8+WD|s%+id(=V?iGOTnwTr60{%!>5a(*ga~ zwoi>8df(C|2IRb#mybE${=4DYAck~AY{OSY{dJ(_S_>1?X0!_C|KF5&Z|w}{9pOZY zvPj|* z%+KhIN&Z>cC^!@?RY)tyIgdehKba&W+U4%N?lw;fgN@J)ToN*DE$uVlV%1=cOmc6Jaum0I+@1Iw#t|99ptdWS;uNm1D+MDI7Gok4i zau}4?_y$Px<9juK$-?87kybx$cW>S+f4W5H(R|Fnf(K7Hy{d4@w;pWekPb`86EMu~zvceB#g~KMo+Q9& z{4a>=zb@^f{&r2>yry_mD$S_<_#mxAtL?{seL9DY;E6DM6t*RTiKRR)QaE5+qQ^oj zm;!&H>)Do42GVr?d?v5F1Fphj0{gmk2jK{5Dv3Y!|M|b&Q$AUgdC<3bT%f@JY8$5e zw?z^ON&*6b>jXa;@{AYKkg89i=4{W$dz?xZwDMGkJqX^4^AhJ746+zu*lr))~K`|MF~K{ zf^~Y%x;AO%T)!!c24ZB6E+{cPGs{x4+x$^*r65cp-B(%za=hp@)FG`Q{3yixn&bpc^ z^&7WxjCQgw_{N^eFhsWRc*s5y7?^AXl6+L+`WwUM-rUhEiy~ev!CzoYr&V4<|)^>P)U52PSQ%subPIQo2nWp%5CcCElVL6SwG;Auo) zsWU=!?JZZcJS(ZfcNB%858~&TWpEW1dsi8rY|olgChU6rBzqD4VYeCqmWive)APUW8bGRa&-UH!iN0DV$!>j# zO~}aiX|fo@^d&Hf?pY6(XOhReWq6ODHs8}Z>`o9JcDV;kREz&7=J_vns(1^qO~jH! zX2!7RLI=AjU=bna2vUp-4gVEQwdBpwWRbM+1es5i`Xa*xZO$l^C~`TB@|26 z5jVEUfR>`O9LE7hj5rA)^nd$G7gNAHfc1(0cPcB9R(n>(ChCfD0706p!w=|x4Rq8} z8f4P6NYd`y)Kol>fM{?XDud!rjvs~@`%}VDA%n5OsoQ#Aam9x12&4cx6?j%$*w&uH zzdD*SxZHh*PU z4bSm@b5Sodjo$71lQ-J~@8&)ohma9$@2h=~fn19+)8JY_SAGt-Aa5DPjKmD>KY8(e zBQuk`uBiXuFBPe4)sL)s-_Q>2dmp>8Q6((d6s!gF&VSss4MouF)zfj!WAC2{M@9SG z0iDog7^IUd4(jpvuLerR`%tMoCv1-vd$%+%{5~lZg1WtA!|V(fw0aqGgD>HNXKJL8uNOa*ZbPUET?E4cbw)VP~Cn70Bn zOxaHPSi+GJ+Iqp(j~(uo-N7|GuA>{nZg$&do;nqvOh{8=Uyye?Jwy)K;bU_mo=NrG zJ<;#~p%Ds^#j8JlQ^oK9#n?;M7Jb%Ux~+q0)n{jX{&|mV)FI{)gFod45on7raWw8K z70HCRlkKqSWsG3vHWe86rY0S3R=FN=MKYo0`CRpr@gQu(KcL0;EwM9VzNK}Cm0EYR zb`Nk@Br7FCHdIa*VrgNlisP7KN|H}^izg-GyQ39fMRe;hJ)njHc;`-1Vdb}A3 zF9lQRWaej}co-Ro@}{o#kPMK|V8A!!xKNNy^Hmb&2_x>BqqVyK3I~fYIHNy|z0$X8 zR`wso5@_~&lkFvVRQJT6fNm&nkUG|q+1ew|-{MO%%HBV=mh2<}(tg38>C29#cBEg7 zY1y1#I^sO`?XZH%BCIyb`j@afxx?x1q`Ct1T?J*J6msSS?@YVL1iK9P(Ug8g;ad94 zPJ~I9>y#^t0@YyN5dzV}0#X01%tqO=KtpYqKrpC=O{dMAC9w3P|X`whQ~x~^%f?_AN_!q zf*LbTKB|}k3O!jqU5r9g9o%dJ(jc4Z?2b3=>vqd8KT@>u7#cl$rPUywVqm}Zm@|<3 zD5tN6@r9gwqk*tsDk;O)!&c#^w`r9JyLo!-7)0I{TVR#@*{A&r(Rnszlt69KsFOWO z`-$-*<+72Y==3R(=F@(!-GD;+5)1S-BahOGkNXr0HP%(SQw+%-i@`hc;Dc`3NK4tv zaMRnl?*|uuH&T@>xQ~FuUw1OtLJV#NKZ+$O`cC-0BlO!F2}y0zIzRrqVYNGX{%CWV z==I>BiuBO}__%&T>%!;hIBiVDHa=@r@*0@$?wYUCXH-{dn-Sd&`lS|2(!r*JdQny zh0tR=MX9$7zvUx_!^ZwFT@YW{CIGUTM?oBp5r$RXmX-XwOK*vUhkakf^4^wVvd&^$ z3=(jY=-z5}+!~mCi`Qi5pTd&$%|6KM0)LD&hu@|Yz$J$f4zF`!><4?kB)z~DJ$`~d zbd}vKewo!H0D)<`Q}ed;?LzJ74>x#f2v<-BYWN>KAYOkhEup@Pu8zdk%G)W(Lua*P z4#!h!V;1r`2C}$|ue&bu_xLc1#VMvzt7$>%(Z!O(Js?n1)q`1;;xwbtxec&teDm{| z*G_Yi%IKJTS)T3LbUWQj#!i_)BfYk1AX>*98!)HwoIEY~>CT8We^8a-ii5tKj=whN zF#hTxQo*W2797x=IOY+Ar&o5jB|d9uygkS-54_qQ3B0Y_doQ+fz3{g%d!N;Q2}}NO zsK|x}FpO###2uN+dKbmnB$%hSiP?JvdC1NrF-dIdY^df(i# zB&r?Y^qmvkkNrLMVTHue9~`d}*q?USM~T12xUUcY-vFM!aSlmC<4tl*4vF7jk~r1P z4Fc_~SF0rDtmRand&N}ahmf~5wmlP31M1ty38_A=$nJt~HT-tb@-Th~zj3ALF#35)Q z*}q#iqZs0>X2!$@HN9oiy{YY!ReSU4a;t$Qrt3y@8-Q~ZNm@QQpq42f#ip=y!b>~J zzWo_gMML*s%!z{u=zxR^>ii{v)P7gMjC*)%1UvIhWyYnrwc|aQ8;R|X5q@)&{lu%z z{>z6kr*<&idLmSkiuHA|Gk*|A>k4)j`1R6wt+Op3pMh(I+L(!7Lw)#Wds@yP`n_!b`l>T$XkWrAr#k(knH&I5d>`5WKk{Iz>7y;b~e|8HNXq}Z+5gILbmp!n2 zF+u{DHuAp?Oo{%TQRrqD67UfYd(*{=&gZr8xe~R~W-pCPl*d-btAS#SuCg z=(^)b8P^`zJHO88I!AR%Q*Fgn-fbyYI3VQ93ehroCHa(!=dlX@=R~;#rHtf9)w#Hs z_FgmM`txO4b4hh9*6^7u*CWH7T^j3+8N}VvQ$)vz$VGlFMdev*ASd*`eCw(c{6 zk^P|o1Lf4DxDnlzr1ESGtrjs=vU5?<=7L(6rMvHRSF2(vL?O3JytDOR;h2z%J16mg zUy{RC3fZmapX2nslI7Z%pOh6{OhU&hJLm1v-&aXMjDWHDmPjy)lh#(#N##hv_%kT$ z^Ye)fd3P9-XYB$mGKjqPeFZ*wY~8>O8<5KFCo2yq8v(@yft`+q) zJ3)1A=P|Rr!|t3q{Ec(6r59~3w&S3+UE{;GbX%!H+WIlZHXFF|h#V$pzEpNkbSAh9 zYqLqii-*y&4j0sDbLe1vpkf-mcN8$D) zl;wjaiR_~+zL&%6AKy)BeNOVASa<@~F82PWLz$sEMO)drC_Ivn+~u=BbWnDSJ}-I) z0%?Wo{A=)x$1%AvAMSBD6^hx01heoJ1(g5t_D^DfEbji)yOs7r{Q91ew5I*9PyBxj ze2NRQxyFoIVC%bT?GnGWcL6Fy{^ipMZb=ACMcmD*X^Hz;Fa$M34g~7F)RfS`RMP4l zXzU~e=ZriOKJ>4p3cRgqR(3>8f5blamBp6QjjktsQXf;v=g9L&mpQ6zA+4DExZB7r z^3-)wx{#G`ah3ta`?ScSudc=)32N1sH5`E|0t|V z7L5$KzRvEXr9&sMaIaJtJ|Bn%YXtZC1h)UYtF5z(*J>!cUJ)U{tzhWm{nt{YFA{He za(aEcns!JTxBvc%D!+rPbD6#CJ=9Ce(q)OM9VmMI>)-#MYxv0vq+9z$lvxQ4ufppq zRjO_9D@Mi&L5a>CVtjfkVRGC%$c9O?SE|m_8PZRJ9IeM4b!T6OY$}GUHskuo3`?oj%VSM(z%czyS*8XBWlOdPGf&Iey zY;nd;ak3qyg0jO-UCX<{?8}i1;a~gW=JT5x%zkaf(;Pzl@6?^AH5%QEmngcSCI~r) zXod6_w#((r#*Y5aX^+TxgZtERD}L4d%gBEFLQqVCeW;bYqAsZB-7iFrE&rQ|KX4692l zdE$WAb18AuA)Ra*4i{N)&eUg>kIm+Sqg$ezGaZ@_36OO7`^?oX7mMb+Pp;b9fHQi8 zpgog3C6o>gq)(;}c+>VJhV4u9Yu4AVH>%-Rjkju@ZNEB8${NznZp%9O8EXaSHjIQZ%3d+$odR*$XcMvMc&WI{H`-+=iViAL3T z8A1!ViX%-BJgewin@86WSch4EFZ%8ROP#7}TDplY(jZ#PPK9=!w7u7dJHypwa(pU> zkRgKa{enxR{rPnsr&sbr*|#!w<8v@kI(^FVSaQ0FOBRK`m70F z&s#npI4Lm9?}ukKP9C?`CfW2|Qz1i$n2GaU9_eAh+Z$f}WKNT~%=6j6k}cdFoczSogl^0jcku4WpXu^+#TNHG%i(X|iNx9#)~ z;I8!4F|WTr%tmZnL5FaD9vtw4Gp8yK%pl%nuD3O@m*xir5?dCtrxp?oqIMs@z?BT1 zA(Bf=%o=}t@Ia2Y$$FhSdhxoBo=QboEAjJ{u}{)Hg{RqDjqLq883vt7JrrbBQr=go zeRltsF`;h&Y)@4{F#T*gz6jLu+cuviGSYo_2cA#Ze~C#zR2(q0n%RuEUt|Mi3$P}- zh4S37Cf~s0IFL+;aHpr-91(WX9(M|Xic@MLk?xFdwfOBdQ?)2B=7eOfHvg}_u05RT zzKx?~4k_f35bfn@CZo{EA)!oWj$;dp*eJ&&=Nz&prKg0Dr-Tb(4l|O|Fx91OQXVfG zLVL3}=V+Mb^!_~0`^VDz+TXux_vib$@B8z)@89*iu5TT-$f*X`V1EW3v^*s#5|&Fs z&0KfOocQl7(eW>|g6KUcd6|0wJG$X-V8!PJT&net=R$3D)@s#;Bk@;Alo*$M*qUAb z$3`&Z^o<~g2(#+80kj=&<7!Gb1&uU&a*WYT<{erM*NqsyqQ|-%ShHRaxa&7vt$&V2 z`yf#+=#Z-N{#aa-!R19q-mLM(`|hPm-1MQ9SF<)nr>f#TRmVJHtNT9`k@$13PM=`h zA-C%LQcn(;nYMp!+4b$CtBYFIG));tJ9IDH*A_}nND6Yz)d$s zdlKrWxF~XKwW(U%w1G@C!5)W;iT!Zu-J|K_-<$0B;J**H{V_X`s;6E>zW$zHR_}h{ z_~Ezv6tiw8tcA4^sFUZyIUIu=;&>*5ly!H>;46PLacx|o{)o4y@OL^l?%O1HJ&b!p zb1&6^VO~^tC3Z0OMoJ9f7FeibN(FK|_eLg) z26c6Y94Hy-hb>^NsG~!bd2Qw8%3B)eab~4UJFin{&L#&x#?E-i0AE8J4jc{+!V^^; zzo<^YD$M<`1gl}r_#o_#Xhq>C*P(;6J`&IXeotYB@|0E=wbAyM?(1Ezr`Z0I`XQSg zs(21%gdopRawU$#QK7cK!quBe*@`@VkzTGywuLd`=Ud>Y?B6wPNs27vvz4u zG+K0Q^@Jr9G6pNno2Z^MN!3tU8zFZNvqx)JTy&!rOmf1{9=hnUDK+c-#t%)yh;jY& zjg}$J9fg8?=}L?Agx27gA)V?j94{5=++@w7 zuS}hNUGl1218N#`Oq-t5hCkdg?wc0qu5vkQ1~aFoW|BSRAt^Y$Oo{7Di1AAJ8aa%+ zZ;INqiz|}p9ZU19TY}gHnf{B)P!G{XKR4@`m=Xs_>e$yzjK03M1uzG-)rH~ihPh=F@XUlKjHf{o4 z{avCT7|m-pQVZgA&v_X6A{9WOG)=gIrQq!MI8jfzlDU+hAt63pT8l@hkIFcPG_C~s zQGj&y}s}#c!A;7=H`zQ=O=!zwE1`i z?3{g%bSl(-NJ<>uET7h5w-JuV^cauyO+2_dh%%4zU^PJmLiOQjiqbEjVgZjv+v2;v zaHq5$uWdC#td@{~OaG6G>$Fx&N7}-2m z;k87x>p2z4&)|a83tgjd2|#$0!ywoUs~E&bQpU=fBFqT?l&{Cs04{Yx95ZyB+T)hH ztP1TS%Mu!~X#u8fxj%qN>OBy8D|xBDGI9QsS4W(zd(dNpI_HnUO#g`AUhLc!`~4w6 zmdDrUh?I*7+!AI0!8%Xa`v%RItZq`Z>*Zc+A>$uHZIyh1MiPPT)k@YWsSw+D-cb3W ze=+|+gzdd+zy`bH%KrwdmNl1yB$%URmd_tMMB1HHdI14s=;~oZ{q43)3zrSuLtJXE zufiNVZx66}^$iMLm3T)i(}9@tm%7H+TWX73acAmt!D$?Al5 zlLzW$VdC!Ojq|!uP_zu-D8aApN&=sf{18jGN%V=ZcWdxw=8T;!1>?m2R3;(5`YZ3< zpRt0C`%`fxY5wB@gQJm-ET8It-HEjNJ+TIT=I>)+_k%$8*>0)v+Z9^3V>b{hcF9Xc zY9NG!c>I=Xq<0kaep31eMB4x+(!v#1+AT-GmX{8+^q%q70?kx{1B3Ddal45}hqcZn zY$xN9(3WeohSlJDIAR)khxY@;@0r&t{=uht`(OV^1#r_FAj=tXW;*Ezhc&aW4=cb( z^G5!KI^wqHghaF@Si4>h)XK{aWD)gpD`jNwTUp48i@678z51y_1+@jpjZ#uL!Cam? z34hta4mA(wczJ5GNCyMoT@RUI`1G~-w9_|t(Iq%&)&gx(2>98YMOjr_c-{ODXAId4 literal 0 HcmV?d00001 diff --git a/docs/assets/deployment/hf-inference-endpoints-create-endpoint.png b/docs/assets/deployment/hf-inference-endpoints-create-endpoint.png new file mode 100644 index 0000000000000000000000000000000000000000..e1b0d12d1caf01b1d1b07cc174fa6538164b8815 GIT binary patch literal 362703 zcmbrl2UL?y_b(cxi3+HwfCvE*QL1!l35ay*9f44#6MBb;h=^20Kzi@JOO1+BrS}?I z=%IufNV)NSzqaoG+;zWmPG&vKJhNxdJk$2xzdb{Qrn(~amAh8}006bJ(sOM9;OYn0Qv!@W-1OavATNePBCR%2#EDtCMkq9 z$vr2R9l+CpL-cJs0<=z&m_Hj6?hz{GRUdu23XsWRQ>X7G_q$HG3|P1y*NY}$RbhjI zxFID!w^=7X%2KjbXf40E?V`2pdAnWMjtAcINV` z$rqt-Z+&y5#A3vLc3r`QEL~@Mmd4f-9mP=dxc53Gx3f8Z=!zSNpT$!8z2kFq)>pPv zx*Epnj%~BPI5%-QtWLYr5M@5zR$l#*Vt%Z>ZG^Af`4=k-{lJ?)b>EX-{TcD`x}(g! z$Ab}9fi=p!zn2wgX1i}y_ZsrBCPqA9BjE}vlajl0PtqDLsR6nt>3v`IcjUbv=@_Gf z>=F};STd|E!GgyyQ|X5?!^DjqPQ`0o_ivF?6kB%n+*f{<_Mz)I4?|L2()2Z5Sj@kh zh4wn~-6KDt_bW(pw+@f1=%qE?%@vLj?1pbbrmu}Cn8z4N+YWpV;JZx5ll5AJ`8Qx% zW^Q42>MEgF;SC9}Tjt?)l5nvC|}z*DBvdoe4K(n|u_q5H306Nco!ikq4| zU_v4v43Qs)hNI*0Ucs*1MVs#^7eSiLTq`f;08bYyT$yQhDGj_sd3?H|Hu{FSb%ZTWf{ZGn`7Y z|5CSHuf9`uqpE7fkZ$4~v)=MY=2JV@F6zoVk{cQ{V~lOK?IuPx50UC#>eA{!Ju6Jk zG^RK8qokaK;=_U8y|!+POmaV$mC0gO3z24t>~ zl9GahmVU&Lx_%=C-HZuba+=y7&!PC%#C|EPOy<{8?>j)|Jx|}$O;Rg!8Ygq9fp5&d zq_FE`KV)vbr|dDmdjz_=>%sVid@bZx7TMq=sB z$>6*5*MR2l9z&F7p|qFZ#9hDpL+0oP-OroVifK1Qmm|s8ZYc45ippb4eov;wdKkx{ z#8^X7{Jep!zMEwAvinW(SWi~5j~7a`XihZCFHJ99N|9&fLkEwTkXcK1QWCfOpWEhy z*$FgXDwio8mbN4HzuETMOul7k-40CgL?kLA*^-8VX~j~@@JeDxPZhc0WBnLCt7Ai< z)`6sIzB2lNa5^`Nfgj9^G&H~HTuDH5kk|ZO+y{4CNx;#N&b^hOGuuni=1NtZzBhc| z_Pl-ZgXbAlc3M@u>lasw1=abtlh5yeRIYpI|1Cfu`HNbbs)_pCi#bIp*&SBUgOhlr z2SJ~td%|zYFUO;mIr5_Or1Mtt)V0Mnd5xl-o_Rl{jMwSD>3*Xrtu-|;YEC)$8D3^M z&rrMdk=89X8yXu98_N7-_m3lL7@6@LW1XO)gE6NqO9%4$4F7lj;pZx9YW*+zb$Auj z${Li)pE<*4#Op5{_QvYRQDaS8#ama7Z*R4XX%r!JVe+}dWp?J94217_*o=^#^QQ-k^E^V7|G&5B+vUl>L|^jNJ48VJ$~Rtg|u1@3k*vfr(B zdV%{gAOJn;+`?=}9H|_iY?_V-R1S{NjtmY%$`M7dNq|_D(0hR|HdVhCqh$9C)2kP( z$gNbnSndV%T^FeniV{h4{5oINUxnzNdh}Dtp>BBNvsR_R=oI4VtZ({(zzPf9yNJq& zDeLqew|H9~(>vh_eI@4Uua1-Nro2nxdeNBNkQ)4?R#f!~x8SU;_hg#Gfau^8i0y*I zf~|$6K#ygHeEKh5)ejuAf_?6xNTWV6{Rw?#ebw^27yT#9E0VqZHoh_#L)cke`@CJ(v3WvG25-h*>s%YUjm96hgj!5rX`8*+e7+r#6CwE!bR=-Nbm4TkiMGKMV1oB%k+xf1 zr==$Wi_l&EBg7`*tyN#o+RjqzW!EclKd zAYav|*wHu2Z%Fs-_ZY?>2#<1lIKR?Jgy{R}SAOBHxCQ@B)4z~{d&tRUYcL?=qP^QL z6DFDjF_ZBKfB*hnJZ2bQ!xhEN7?G(L8^g?O|} ze|AUrc58aV)9oRr^65|0&K^5!JgJ9yPrmMlH=UV$pPTS2IJ`610?rhI&0eyWJ%2M1 zwstj=-kbl5k#6mm=BKs?mGbKHjvw&GpX&0Oriy&E4mzOE+Kah|AAL+Pfo!#TZa1Da zE<kC}_JL=;nM9ZwoxlK{@AdqSr+r!2xBwEtj3tZ;6wVI5%@jZN*dHt$a35d+uX|J=TPHE> z@G6*n>tb+rc7Hi)ztRQH;^G5^e%)}Z-u$$lzZMh0+$DjN&@iKE#U9>XNVsaiUg>S3 z`qg{If9tq{=?9YrbZsHfuXJ%Dd^U3T`tIk2@z$5YgZpO3&;~ea+kH>MAG{wt8~T;9 z5qO0K1bYBGs%|+GK)GS);yLRz(lZ;5$WBfCMUh6Q9-D`>Z5(h%T>EBNe!&m z(GUV{S)t}Ky-5d@VDcwfQZv?o{dGQoT@T>NV$(+nc`wjCp7&cXB;S97{U{MyHFu-T zVhJAYI{HzWx((Ptki5P4GckFu zhq&IoVQI~h<~PGQIDpY4lyUiU&mj?Be6TT8ex;@c;3B3k1IS430?3Ie65<0QVft5E zf#eZ@^soF&06@4sfb1V-)QRz*UmWrI!}E7c`sp2jlK6I$`1odB`e*5@Ls_K%Op|>h z(g3nLa>~lYSjWoU#>U0N4(ds-H%Ej%04i4{BM$)J4#%H|L|Oa6KC%8OdtF0ML$wzY zR#0a?3u~yQ4WF;G>z{f6Qoa(zq_d5u1*@;KlZ%Ihuk`)DN=OjXf3o@Sv;I}Y(^2}q zp_(SE9Ms*0Rg~`u-;?{GE3B-nQtsBTB($F^{DYnNCVk(|)6-RgpWnyFhtEfd59)5q zFCZ>1&i_P^Ur>;jSc2EX&&AWim)FIE{qIWtS=PSLE-;o{SVUrn^n)l#$68TOl;H>^zRA#2lIat|G_B5 z|EKT&F&2Lh^k2Eekp^9n;{VsIfv&Vv$k`Dm@}B*34P9bPTxNfMm&}QO9{n8?)0Y~% zv3n(B0DufY`MIpFFUgK6rMbzkVHOcp_e%E;dA_Mz7hUErd~)C2Re3%1^4plq>mlj_ zic3^&WY4InuCpGwtUMEFu-`SGbNlGrW2!$1Mzp8;1ZeEdr{_s-S`WT)LA1}OPp7A+ z&mRU!rZ%`P{n)Cj6E3N!co?lmQ$i_0J4RE)tmGxBSyk}OwaJ$}q6IzK4J?GrxP}e7fZuA92B*5HvEaL)^ zZ9nT1V6De*dVBulp*q24P2>sjr#A~fU{{=JdzTRMB_$;}yF#HG!~*$*g}T<$&PVi- zFn|B%X-HNXB&aos6|+!x+P2ERka0GP`%!!J(h>?9iP(XwAz~}i`3HlMJ16&n_GEss zQxjLSZMuRzQGtFZ+?;`$oooDF4Py8W+BZkDyAZh}@HEzD4r{rYxj}Mt*?3cH=eUjKoDW_n(R13`ln{S_zkz_+(vPuSRYNF|& z>TJcU$?pMBX-VE=>%h1>GrkS>e%s2-aI`3MONObUovM*C6loK0i@@Oh*ByA6k9O*Q%I%kHdNLv@hPhJD#=mf%9a%ZA5<5&)_{|`cLhNn2(%n zEO>3v48t>(9Rtq0u$DmKyWZj#Mq5fw>(hfH6i`B&O%8I_25EGr!n4s_i65Cq=Mjh+=E_@3XxE2j%osc1=J$I0&U8Xmv3I#m zDwPr+=kw`~gtoe!a=WS742KQYvUok8d(YWlRD;+x6&|lPd$Eu&R&Y)N$z8jFGsm($0^fkE$E64 zW!L2#`ol#Wna%CYH~|}aU_G#oD%eZPQ%c(cXHu*0vQ&hVXFc45-;3p(#D{JQyF)tK6DN~Gj(ARQoyAQI(P3UAK%=AKfBZGZMhay;U zUcEom^Qsq~;u6k3iH_&)?B%y6!K99TtnqQQZ^{Vw`9BusrE7n`AbkvZiJ1(C8Q+yE zH%Km^g4tJ!F^>f!_mmzk9MEHO^sTxfXI6n0{98hJJNSm9^Tl2lxVxRa93goh7SK+9 z3~UYq+{=n;JbdULv>yYLLTlLI8>AdYVT$xx9SG3uIej^Sx(CuZngNr=G8+ND?))}5 zQe4X%I~}PV4aPMdwjHInR^j~)eLV5^Gg*TU*CJc3K>>%MBi?@D3R7m1Uf5%47XO1W zs1v>+kZu^3Y9@ti?Bga>;iyLVUz*h?R3?U#If!M}xAt;p93zrnmOvyRp6wff5x9W2 zGgm*p9-vR>mw;G{Zk!jN$e==INWlSd4em%4cE&*S6s4iMUb$o0SD{uQJr*w7kFRIC zI_qP}myMb{D1{zKutZC;aKRGb6|prEc>}?+JJu|-%FLPXfIt9fpWV}HS`i7+!N)1z zj#ZZXO$o+u6K49rZ;L!QU0V+yYoQStn6BK|>T}>{U@SHPwu8C;0OIouz(%!d@TePG z6pI~|b4+4ml!2U)*YhW1SL`A8F;;EgjALGFAo(c)Mhqn9S_YWV3|=TyLJ4mcti(RG zExjPGntJ69OfDWWgNo=xtQow-zoYjoqrs+Uq)fEnRBz+slBPK89xLHT%s$imF(n(? z%Y6X19`YL#LeH*(?PK%8z`krs5;Iun6>L6i0q;LL0x3L=*oNt>Lp?**IahV)aivg# zPBgquap(5P<>2$Tn9L1)Xi{=KTR?>HkC*}AxqaVk>Pc@`DoqN3X1WTmIwMK^yanMi z3%rt@0~Uop-^|R!?i>_B33UAkNl-9~HdfzPI_^OAx!+4rM3%wq$?gW$k(-~FecIPwZE#fuYI*zRVdHT{Z$K2K+zXraEx*wc>ammB}GqSayL%Bub| zY3rdgB=w}lj+9p>p0aF6u1~XU=rp!DXpChPNe^SDhrC%6b{hTe405*0X0L8+Z%r~p zx7qtaW(_x#cFO0{K1iZpZw z1viBpP3^+RlKj0TopBu(Gy6$s>hIvhKr5sw{5jwK!YsV1_*i5i%nQjMnAhHzE_oOB zG4!X`CVZo@JkzY{O}jJa+f1>TUx=3rsfp*&zaTTCx){|9)YA;N+P+8@){_A-Xp`?q zFSxb6lb!z~d@^P^sP!1-?v(&X$IcZNVf6?*ezz+@8(5u-r1xIJW9cDLpvpV;uh1$!(q$`-`OkJR;c2=JHC#*pa(b%DR3 zCD~Nj=R;NJRT8Tq?UDXjVt0$qC#%j2(fWQ}HH3!n=FTDdeMr53dNpl&kkk-oOHlPj zS8pr_!VvRULLRIi%yV9iwi=F207eFvVGWNsfM(>sLQU)>t=qw!IY4g6M(C?LOuPzq z_>OO$T!7UvzkaakIYRf0wE7)>n#FVlRcQd+If!i}h{9SxcI?kb#h~q`FvT_6Akmr# zIYK-ep3w931Aeu9XFmk$bp!-$glCvhMjMTihMAQ(xAK zrtOnkeT+WnrMvHFhp`IuW$m95AtKNDDp6qZAVN41yn3M(bPR96ovuKicLC$9o!22& z`-%b17&higd^XP;2+sEcsEe3@fde?UtH&BgwLvTx)u1{tCHym5n_p&ASf>l~-q-p? zWy3w1cl|DlO6TEL3vslwz?Wum@G@u|^E0_VFeBDE=ou_G8rF)HrTCth37OFdmc_1= z)8dk@N5Ub&&Sw|n2Kf89sg*rWcxNIks7qse5{lIe<^q3wof)KwyS_Qr-(G^%4Zcs3 z4jDM*NC;;q8K)elGzA(`PE?piHf72FOS`H)yF~8z;e{kK?U7ne&y3Pvs&|N}dnJ<@ z+){zb2OW0o@6UyjE)L3BBt zc`FC@OlRY=bHu@d>f}JYzc%0d3{&;q`!rm8NQ*+0ZE6J~0PZu(4SRW~I)jb=?ekvn z9SuqF?X!T3n3IThB_8L7(UaQMNl}^{HllE8CgOlf$9vEDH-bztWaC$t>5f3})lCtU z2UPwL1CZ?Tn=`UD+8mw9Q`x{b3zG@46FUF4YW~B>2|+YdYDQ9fA z?2VDx=UIocs1U0=<17WArAmB$yKxh^&@O{#lqVjh@-nb4`} zo&lki8EfmXLA)x4^miZ2LcG)7&Bufz&yyjDo?a;?=`!Aab5qw;PwfDmg#Pl7XhzI5 zvhSa!!l|cWNXTgLm^@zgumgW8G;Et4%g+_@3086_M)CGyyB@4?Ns6pN`WEJW>vA=? z`aJU*-Y&D{kq{~T*r*f!_^`{@Jm~nOGW#6y1#5%7TS|ynFY8c=-}&@_|M63*Dn}# z5&Xbo6o%(05B7;>1T2Y&ZSNjb)mSaX#&g2fffaI)t3mC9z2~WA2Z)Tbt`P$`CaD`= zc!^+$D^_hULj<2>^Bw(U`K4M}jhSp{GX_m%h`enl%2Ip{Tsx>k!RT1__&cUOk7!7S z&XCrJJJ61^bY9?8QSzPv9~**k{)cBoqIVj1X ztf@Z=pI!A05rP;prPX$5VhRheD~w6Ly+5@}S?|gct>%Nz)qNMzl*5<(p`-5$qf8g} z#NR6n1*9rS+3cW$2W5rdEX)coKekFb4WP-nakNlyLxZuU72$C`I_p$4+P8NKiOUGJ zBWt6-+pSm>jXKHQJf-mrU0|n`#gt;RDXWh(j>v3ov?4=~csAPPID&DiB6u@KvSXo* zpjKSc`2xQ1KCDH}yCs|ZQ(~|x4-UN*pU!_w1Tfyr{LD-1(fJiW7yOB;wxzu_{^@(T z+GqtdiTmM7hiE5v`#$d$8gPKXUmoyA4sB`bThC9VVNm)slb2+X z_%Rx@Z9G^IvfeyW?AZ4F;FK6iXyJ;uVdaJY2|50~IjQ+W>Y|y%!lZ_TJg{cS48yU% z%-eO+V9jsflwxVkfH;X0^KpkR(x9Y)VY`H| zaG;G5tH8FcWZO@4zsAYc9Y4`Jltose4R9U}8xjcLmjl~1TcKN_75r$J6ln%6B zFyn1lCAv1#CLgJ*Wh9HVNqAVXn#ynh#Xf!Obcw(H>~!K<`@EPCcYKYY&{^zCS=rlt zjT8Js=fk!B?2M z0QTRj>HnK5tViX<{WxEnVj$^rVwiftcsA#?vcVn~h3RRd&#CdIl$WS=IGp&glp=0_DCCOp)*G?~U_axC)6to&Vr}uU>}D@;?b!;y#G^0mQxapxtczdAted zH5fW5Kpkk~xd_c0nVA_MGL)URlXrxif}S@uZ$V7YR9ki!u=(i?_jXP{@gK*zDB*t? zSf;|~X?l{TW?lTCUEbi(b7)eM+pYGSp)n9~_~|3t!vt@C%q;So!G_{Cd?>-2i_q47YU6Q_0s#N@lZ`TvW<7p4ju1)Qx$jKDcWX-4U08x=fmBWxH)_D8D6{a z@L9!ugrS==5*TR~gkE7JoDaovOfsj#crbRsX922$$C}78cYU08>zhCD$$|2ZcXA4} zBV(h-;XJGV>}8~CRJb3OTRrEJ%bN4BRJgl`$Fz?0+Ka8p zY=iT)EDh%8@C>DCkb2(6)2*i^rdlm2Jd#zp{`%^PQVA6^j`fqXnQi)##fQf0$0uTy zUTwq!aALat-~H8;%py7iU;lR*5jCWzh z4j_J1dU^&aR~R>wKPM;15n+ri?P338$wCnu5yxK{YpEEz)y7Rm`>_(6tG)d_OS&sD zoTMA7CCC&-I*<6MM~Zr_D@5>|u;$M7XMy^GTA z*s{h2LsaMtWj(C}u+>|3!~sa$gF%`XtS8@?N{MmSi7YCV*qZS_3;ksAwfa-|PqFx{Wby%(k!uwq!}x4 zX0Z|5Q`)p=4F2_qQ!Htw&SALd3|==fqMc!U_b-?0AHg#u{GiI=SYmXj!c;;un!=I$ zuazT1s!B9aNUxeq7ys>@{XdXVN^VrT`uZd1)Tz^vf%4Ib`uYjO1=M}^$6N9gJkZyN zQSTJ*U6lfU+{>O5ucWWH8;>3dY8|n)vC(d6*r^6}0Im1f685c)?Tw5?t`jP{#DgdP zs8Y7o5Y4}Auh_?ppvzC{u99wewT&FBOmdr6Ms=B(R?C}-lg>X zUu9L{_9(fyxQgsL9+s4pEkoYl?B`M7S*3&{-a6U9#lzB^>ltrbJzP{LWTcE zyHo`!16bCtPD1mxN|e6(8cpjPx4DCy7fcu^`bk+c`2gqFFS8V)%FLfY-OqddV-3N?Lea29@%F#5AZK>*B39%$WJsiZS!XqdtY=lKwteB zW8~}&s80fll9%|@C%r7$4sD3-(w#(6RKHZ~QQvAE^2_#?os|5i4E^7dDSwMyP06@> zlA1<(%3<cKsO zH?bvy=QsOm-MKdC?*u`wR}v8(9rVb%y|vbU8gmlhHxZZ)IZlR@fM>(;WYfyb2$WUe zuY~|BW(YH+p0HqaLwy@NgvmZ+mjbqGNPs7flHXhpb;XKDC>Om z-;VxGzdT5}7ypMU0dB3mpEmIqUC|fhoB4-(2(y%ad#Ubk`j5l!7kR|G!4tP#%!dNx z-@c!q=grMmU3+54lCj44WU%+=SHJaSME(l%y>IT1me2c=a4L~WJOYE;6SuY84)nLi z9>;pK427Q`Z7O3hm}`N9?t?NfdoeGMk=dqf*`TT%(@SzUkG@Vj84%_oOF=ILAS*&x<{+(8S47cZm+ z3?$9P$4IS*$#k#fH}46@8Q1OH>~+nVTWcCC(7Q@6mUOMDJ5aUsnMU9xvz$%Y>c=gfgYW{cp7vB34{EjTg2hIW+F zk41{YBw$|nEi82kvjlAXFTI#`!lrcTZ|!$Eg}E8_4-GjJ&Pw%1Z4$|L-WzzL&iq@Y zzIpIRff6qzJa(R{5`QKy>SCgHxUQZXb*<|#P~jb5gW2)^Q<0zKN zYtwuE3PTxk$Hb?FhhNKI#fRh+%oi~6X)Y+=@EX`wPH#D~(hOqKi8*C3dAHY1RX(Ux z_za#eeU~#Bp0~37n=?Uf9vSE0t&Ms+LzO=0l{qK@^TcmphcUVTz0vjWC7orl^DR%A zj+(D8B0?dG!D576;Tlximv?b`;*LueS7`BCfyX1>>1T+bi{$SmHctSc^zVl>uR-iL z;hm%9B{iQG0#0?#1Aow)RKBfAwb4G@A?m=I{7`?Z!Q*@zn>u^d$K?p^XPe3PpAm(W z>9B?5v05K~MN{&pgU)VJLsr+seE~))aqYO1Ro~3;_rybo!e(htY*!g09UgT1EBo=%=YAqT=)exxq8B(9g}Td?T9)4JtJavCm)d(?4Nr%dD zv(h&Gfbx^($JmRU3WH{|P5=DugufGE<3!WPl-5&v=$O^|@)(QL=~;G9{KH}A#%Oku zekMz+<9K#Hdu5t!6*B)DS zRsT8~w8hKAf4`JzYp%<0_2wwk3&KKmSEdWTc`;fu30i0qDn_1)Tg6J?LH4hP|{L1&9WFHLq2TlU&j z6g*hxBswWE4FB zYl&5h+Y0#;21x(+O_z}7*%0OUhofKJ8Ur#sW} zG}1>;6bYDeffd1asuw~p;!kFMASs}r=Dyhr&2Ol!vFcncyU{o@w1Iz2UA#d1p08)G zWQFV|TfnYzzya`@p;avHwMyn|ukk64pG1K-ry)EyTXvhT;;@H<-t@jpMykw`o=Q}h zhc)oxEPij8_IFb0ol>f7HPVr=ZT->Tz{359IiKiVC^}QY6hnuvazA`OuC*P=-xLbY z@^ZXt-|>!mtUZWw#;RN9u86|}oAuSV8*D1J%q6#IT8-=#NWB?VoM*iSSvCaxev!8M z<~1VY9ahGtP|0FY5bhLSIqcGcQs`m{;IVo2N~9U5iag+YWN5XTkesZ{nHeZp?>K3O z3=2z>6P!4-?x#JL$xJ(F#+}UCkF5460L=oI$USm(O8VCmO7&AlmrT#bd}@wLi~A{^ zp*xr-7Fgfri`{@1--!qzOaZ;*Ii4A}L@Uolo1j=S9pFq`7T|O-@%aQg17y}99{ga~ z$?j(L=`}UNY;U1Amquqs^>-A(^xiqLozHB z2O7r*`Sjg60S&@#_HwJ(b&3I%N1{XQ`+om6k0QC(bg7SOUy2cco16C7?4SkZi*D-G z*%H!=Jj@hNq%!+qO`ns=ZL2qlJ~#E*sp*|wJxBZAIr*c$Ew#G7fws*f5B0)bNiRxq zD+Wj3ET5O=(hOn61?`f1&?@&&r2I$aXZ%Lzx6m^DI}b$VxTrF}o?rlJGMm_E_W?)q zEk1>^p%hw4e5Q8Yacsi=M|KoncelD?5aaE*k8d3eV5{99g>8CoCH;iLE?)YtM2pRN z_J_|5)0PmvQv!zsy6))+Ngln@pwPYu-KOgR$GMUA!Cy(H1QQfu?%l}N5xLkPtntGz zS4{&@nc_lZ_+#FQC+G-N$U>tVw#H(H{wFh*ByRVc3`Q+gRQ^_~h~u5v^!YPW^lFRm z!OavYzfXNWdu^vDQ_Y=88iE(J7)5`5Z}}VkO2LG5dFE}&jx{+Llh~c%Iw;6uzGyi* zj5I~@rJFJB)|2B@`=gPom2?*+#2d*fC66kf0J`ROobG}wA7!>)FpKm#?AkRR*wHG1 z46ZrTj4(cjUu5BUHICS3<{UdZZp)!BGItKt9^iZ65#J;vd*pp)sNo!UWTOKr>pZ7}cmcPw22Zmt$!5A3%Iezna5+WAp_Pzp&XxC@@G)O!S&j>_QH6M+r zs--&2v~)rtWhY|w*h?ULxcT=>#?79^Mx)`>ObS8gN7bG`f?qge!1%KjPYg0Cl>&Y; zd6;!G{eeE7!QDz(rZt5oFvFkM zyEcX?YbP-guM8EdJOltsH^Sxi&wBTQ-h?T%QPDkoTbfe1c&`3^b&Z}V4jsI&^H?A& z5ACLF$SKwz?p-IHpa4jx{k#NQ^UdTeDJxqGy%cZPIc6_hrQms zbUh8WnXEA>Oi^iY%Jz~E6Sl(NOldF|S*ep4*|g1g*0R&kqIKi@ga8j>8R(Ka%VNI& z^kdRw{7n6z52T~VXD|G*XdhoR@TV<-Y2c52`z*{bN89=D(x)ghRDF`WJ@90+C zDI#;XWv{Xhfxi$x6p6qI<D`ng@3BUpac21SiSHwHY3?<&HqWoQe|}3cL`Wt%V)2jucycTQI~KWt zbBgkwc8}>KoHJDv!WVopxt#&ih3dG6n<&CMI5Ybyf9nwPqMJQ-h$HEV;lN0&9cqE4 zcYMEW?%TPOC<71l=RQxYZ|`RxyU|YSS2{~hJXOlB6gG!zmd)!Ga2=zAI}Qkm@MfP? zZs(`amBjKo-(w1{m2<1kH2DBpre3B`*F0oMzxlmJ-H@b9eAMXo^28AQCOoc@H87Oa zavjFGu+Y`Uhf5WMv7iPr);Ua`zPcuTFts8%7Fb+9H&gGt&GHm*-Shh!h_2ayu8X$2 zos*=|%QiW`y>^+ka!BBmMfhkqFlcJ=B3VZr6h?AH5K%i%4jB!)c=8$)c4UPJI&q`z zidiIWrRzLQIwEVV%&U9b+t-U(`|Jcd8|PU%qMKi^9!eDmq9D4P;lqxVt=e+ZhiLT1 zXC3wYmc6Fbly$VvBv}tUr{+z=ZD{T_d{^Z2k%SXz{F&89z*HW@xWK5HzE%uNHW$6gRIe!KE?InXd6~Q~&?ZB^&p_+x)0# zn9p~?Hx3x!m>vvlN>7hyVLg9<13~C92~OPk)J$TvXz}b%Z9g?={W2-PNZ%`AETkw? zcy~3U@>N$g^0r^g&(GX z=XdxjrO8!12;cF{0g=s{?l;jR?Ax8O*2qaRBpG8J@=lH^4cv*4Bw=W|{+b2p!{W-? zNNu*8KJ*8fS#Z{SeN^O;{G$@!aa4Uj3*u2hX2#67Vwrp-=K~Mb^t~-F=vf#oZYYBr zD9^jm)f>N3!GUM4=jcBEe66~}0m;!;zr&~L_k#J@^9OU{rm2e`m@O>GF99+GNEMiOlqib!q@+lykZ6;bYXsaG`r;7cg8s?UO7p*`KnmBu-D)gQCOw?LWGFnYUNgxnvtnL#P8=N50G z`8B{e|E=P}v&EdYy(^7V!9UdOzLuMRdrgu(dAv<)O*75D2n42xLBDH6j1p2!6+~R6NVxoi-rnbnH5r-X(O+(2{eq>X(xKSJGLo3()YT8&`$41-G@-`kXafOW`jVFO6i7g{X-q8=16y zxFY6MFxZZZ7^z7eHCj)57J52G`qngx+12||xc(R00im*VZM%#fxf;b-(;}O`frpg9 z>1!F#$%gz?^;LYftkh5}=!=H)i!?hgR_xQ(6*bT6gkM+ck3oC+;oO_RzzsGfaUO8B zq;gD~`T9Gw45HqV-njNvH1`Kp{+1|~;Eg4UJ-8$xo@bpoB6gBgy4CcGc`vhTW;k15 zjqPAt+Ng9iFMnm>^u<)_#kg8GX^>UVUXB3$u#g5A6;t7#WveBeWhxf=6w2_x3~s?3 zB!H3jk*cch?(OxIe!QZRItXIaxEg%+QDJ8|2`%BBx5jH+MLEsGwenKtiau5a3X@J% zDH++=+OEvuGi}ipfht_T2WV4G0e;Q>9dP=+qu49;r0fkASTRKPi~2F;tLZHFN?P#T z^(xolXIp8L$;S0Ahc7ra}F4g0Di=C!k zBV^f$KC1k`I2H>ju%&d0J&?^vQ1B1l6YqCgv# zxfY}*FYm}sSZVIeHFm z%SfK~6@IF5uCwc@tgEj}YhJ4tTUcsLaLKjzNF43_Al&TwyJ?IYUf2J=aUHE+zC|CBmTl4Irn?5_$Tf6LBDTh ztH`=tc%$q_3)J`^zB#SQ(pnS=r|taw+(K9hfkh#9A)#I3e^7l|sB>da*x$TU2UOwR z!&x5D`HwcMr4}J`ju8bd32kA}6*$ct{RQ#bWd0n*E@@0lo&9XXgy)1u=17xe62}zVx~7}*rIbyw!9$^T$ebQ8Qwz3w>mgTf^%!okN>jjVyvDU>9~8P` z3C;D6n5|fv&)+?M@Pk?W^UJmwGGoe8?F@+5Uh~bIkpV|qio=S)@7oo0fGqeI>Fmx? zjk?MY-=`fX%Y{~`f}$GDxDx0Fd($O-?1oOP*uw9nku!7dmDsJ(LPjh5=FfIxZTsBT!&j<>U$kQ9Cz(^O-L3BoQ^gQkrfD)4eLBX+ z3L%hM#*p;yYau~%LV9aV+I&v8B)sP$ptq(+%Sbn!RDWftn9xhV$UhiAIgCrunlwaS z56Yxh14Y|#ZVSO|AVY;3z;nOvZ$76{7N)s>IN+iQARG>Ybu!z7bZXL%8N==T$(t!z zTmfq7661gk_}Wu_42lCburqxGBb>q*{OQ!eI8XnKwjWl^4M@}8$cx5@$_^FRWB-dS^smGB_b!xz44mQDk0j@x<9YWMhnzW-(qmoiqk_QqQioSYt)_T%GY;7GUitlt|jkZv(#(B+7${eAA<;+ zDUSBW!&X1OnjJJsRR!z_ytRK&02A3%I{b0umJmuWLt9nLdrEXo4A~@!XOU|%2v|Nf zb??d7)zVTQ1YPh*AN-`N5R+R4dJf5YA`=X5emfIQx9Lk9S&0S9ZcmiwuV)0%$j<{} zGs2m@zbfw$r!deN>x{kU!nsjqT>k-&8MNIXr+CX_T-RNG^~tD_ZMyt=CXB>uyOYIU z`Z%tA`2$#6RyL$~wAzZ&f2X>ufAtbC0Q8onH6M05?=x2K_=V;n;_P5$lelwod*V5p zzQOAJ2sm1EnJyd8vn9nRpj&HXY;(Ik@r6$+6*1}YJ_o<7q7IB3E8;e#pJ_-Rf8LG& zwuV#BhVM45yR7hz6tEvtW3Jcf>ht}5f9G81 zI@faigL52a#=M@7$Nhf0-(S8{yg|Zp7oyFf^b;5M_r$+b^{Ot`yj9-cBW3eao}qdE z)n{$D)mf}H?-IO4YKA{4z>C7lY8J(>n;z8LiTJK%ehHs2b2=9$`7S>z__t{Qt!y)3 z?gA>$=oY#&+HemZlQsCM0N!qXlOneoy&)A@B zlwzby_Qr!&GS{ND2XcG|^i%5Fm_Cg{=t{q=DFykjJSaF_`lW_L(|D5NF;{1M1xa^_XW1GI&40!G`|yc=)$=T~UdtoDS1OJU zXL~w>ka&bExn&knq3>|0Nk-VyrB67LfUK>{;-m7gGRDipbh^wIk9{~HCzWz`)pPCl z*UE4UaKOfBCPV8ZOnhv!mT~AYrWP)v{|3fy}O) zw&Uw~w5x}SWr_WiR6Z2mm#W=GLAQj4HC}u!p(>Htyco$;BD^10DR*cKe)Pjx<;c;J z+=0vnFZ9m0Ao9_<^%BD`S?&K`{{5#h((?D_M8(sMQAce{Ypq$Wqi6$k$J?Yz`&R78 zClga`toO-;Oro*^h6UJtXUPh65^$x zAjCBd$LW_AW{h}jhJHnidt27uK^Yn>{uuM+%ZIM#TE&m+-ao4GFaaFYGeuNya@-5o zeyYwAD=pyME;A(|jD1K-=0_+Oxc|M1=0?XUR!Zc?ap`QY-j!Im3{V9KMKkO{vn>_zhr~?zU zdt(+5L+ee^%66#m35lwV+G8#_X@u_U@C|0e=dV8qar9GcB7pPVcH$O?{dSbi(r)FN#+?%hXnJ&wY{V!xqQZQp zF9X#)@2Q<>#A_{T64#3m_n`5is}Ya~?AyQx;F(tmy?^jun9n4CIsgAV4HPx<16+eg z83-PNvJ5g;i^Qk&Rs_Yz6Ac#vmuSU9xnP%2)@2mrJ?Jwnwv=sinrQqH2)nQ&Az=mvzNj*F6tsRR6I#$v5?9jP+^dA}Jm0uh0LOYd)?{+iCF6+i#=h zSI>kGOPM<_i!`i7@4>czH%RN;_lW1+dL@Zi=+BQMG{;8M{8^@1zDvmz&QPB4~Y z+)}drz?58*e6=qR0}xlSao;S-5u29H*uZ{%y<@@M{oUtztM5O!tPVdM;)w!0n&+?k z)jBWdnZ|q<%%KJ6*ww#~+I5&sd3h)I>d-sQ=BJ^JdYS(m-fCWvuiwI=1DNEc6uaNs za~~UrctRIJy;E!iaVF3tlr>uqQpM14A(b#5&!+XU2Fp?b#d8V+ISu0*94l%tQDpma zc84vY72U?z6@CEi8r)5;7zS;_e%bGBy7V8PWFO>71;LZOy5s`So^?3idb2 z#@y7F{^diZn*1J?ItwPg=Vqk@b{;B-htFRQ z%HcqIw{G1GsHxcWI68po8A$j0cQE5N#i zHC%WfmVuw+5vk@6fM`=dq2i1`pO7JVYU?15oPya~|C%z2u|=h#(Cgot{W{^3k!@VV zc@}Y!U6+5+G-WF!YKf4&BqJ0$QGT{<|Nc(!y#WwIQ#m@HQf%Gt zuS@VvALbBn*(jU3vuyB^YI zC9IDHg4cgn<<|vqHFofnv{xl?q?!7K6jkb|dXv8}tunTu#ODc0NF z15|n2Yy!v@7uuXLP%XkjiO;_}x1H9gZ9K^mt_OhSI9M8$TW?iF!WMqa2Hhqy@~-tO zF8G=WiiSsHzEy{}PQQ~-|6IO#|7f}k^VCP1z?mfZ;JrCx<{J`fp&zn#1q!DE7pu!7 zyxy{D%&uVz=44I3b@gqmywNQgkNUM!^=U?m=p9lQV66^&V`RPVZ`@ui{{tj>5RzY;Wfkr_Q}m6Se|+FKqCtX9KI#=WGZf>%6z) zWrMMjhfa&^^SwWlHKn~46HI>t5<9WMg0vAuaSeGV(`G(u*}O|Kj$_Y#MaT&G2}JcT za{!I8+xKV)%D~>Jy}3vq|8`-_cKVEuLTTN2;&?r8{h-^bk?8&7aHB!2vrKpsh?}}l zS66pc!aIIY6qJRbl%nr&Q0(KBNe^&|f18dcmFw|f3#&fXbGV-JqjwrvX=AY%`UAzB z4)`?nO@5Z=O^UWiJC=0OW~$_KDTjhl?fUO7fTgxqQL>1#Dd@C-*K~Q;xz!;5mycUa zxvBHp-Lcfeo>N!R3vGz_RLHD>=%Ua=our_e7j&N<;>;A`AVqC*@5;v5V<;$cmwO|E z7SeNjG-k{enH0Pes46warp}DXdRVswU^R`EAM6BPPysXd22H?HGc{l{AdoN*5@#rZ5yC3(>dQJdN>f8vTK$Y-d_hj#{4T%hwuSlJ zpF(&tU8Qhgt8gU&-*68ItyaBzF7XF|c>nJJ+S8{7UZw)Z3jHRC^Wa@75pg71knDz; z{nE;V;fF+Kc{&>#WU#L9`O^&_)l2HBUG0hD4kdGEVXv*oj7jlZIy7Pp^mIbk!ZNMu zW+fgqAl{J9;=h@>E!29MwLV(M>0nQLy*sKdSTa=<7PqitJwC;*R-Y=k*=0WRB(6!N zp=u#a{wUbWfUlJ+pbNeE#`dLB%=|cLBUO}pr@@bKMb2P3{XEpi=f^kq^kl&lHDG^P z#R%daoIItDJ0T~ZkPQ(z0~DYoPChPwE5HA>)FPJrJUd`!H4Vl5{aUoC@7?uRy{ln% zdA~pouNK{Syk1 zn8_Ns5oM$nD69jc6SRdOC)&WVfpk?qF7%b&6!mlNp{8SgxO>jdUsCS|CDs0pgb37* zE=OTc-8QdKUGL6%Uh^ZLT4GZ>bIaBGnSS;_Z;sApW}{JdXD`hDMAhm?S99{*Jq0-T zHDYws@coq=>CbVBl-Y7QM?YDFT-*=Cb-q>wjNxoY0ESJCc^2W#Y*XOCzYf2SN40Fp z`Hv*%fI>fAI`#8$`zJMczDU&bY9@0Bkvf2T9jIg9%yEjZlq7-)Aa}C+Zp&E9vb*JaM=FnwOr& zGR$y5c<=whhJrfL3nJfT5*R+iW{|Xq0|n&8M2p`F``i4ew~w>EeECHmT@k`?*w6T$ z%2J&PA0qi(W&PeljJJSvq}MxO<*Y%Fdr5b8MPGS~cO_Zcc332^r|$fAFi?T`zZSS_ zDZi7+RbzE#R@2e^b!k&I?FDGD94&Bg(5x>A#u&3h%i+g&#&Qb9h)wdV=-G&4A%rE+ zYWdi1IG)fmxB-T-y2q7?te>b=K0c1Akf|(5CO9c`LHJmVb&#H!y=;^vN=EHPd{pD4Udre;#mW` zzoP6>UqAm=?N(r{5mk?Y=j=Ev1w&^+Vu1W=hlK;k=v-KejvfT%&a+BtC?Q?N!dRX& zWm(C6sn^{r&V9SvP+b|&|LA%0M0C9$HW#p+Zk50P;cI=nV)I2O$n(ptweDSwl>UuW zi_CzDclX6+5uwWy{@+<*oND*1p@WU4n+H@P4zE`|OY9n02>>ts^F)gvN|>sDHe&Iz zIsg8>ZGF&c%-~J0Gq@AQ$?0AH4Ksag+%ix5vE_kxG?@zc3Z@$Ufe;J6mHwuPs3FO- za-`^Ctk+`7>^|d{_hLbUm@NbJD1`0LH)!o+*J<1eMeZP{)HEZE`9Npz0aJP=VB6t1 zgDt(g9bg1lS^$s^KM~P;xM=3zRW#1%dAQ^3S6^VA1#G3`!C_Baq5!Zo92df23pB0G z82n@GnNpKoTEDK!r#$0186wJav`z6;{^3xqkK&-uisM97PF1M(rm#cAUNp# z)DWw$d^H<73+<<@4Wp)&i{vh0HRbig{ny{Ac!+DsbDY7SFYfyO@GTGtO_FDD=zj;~ zEFrFAg_|J6{KoZ7D)Pt@-vPMnOk0uTeM9Ui$UgZ)FG)qHRK zrQlG#J3C0=;9nasAkKqM=wZaVKeWa)!Xm= zrSQ?j__*B*jdR*k#@KgyhItF^#|+<=+Wz=h{r&MJqYF!7hSh07VYOnCrNEmizIf^a!W!~foke}^oC@YBPBQ!S{(9TAAG?GqD z2CeM#?V)H!feitoEq(+2kLXYagcK~GFB>vlKH{WI?Ht&p%|r#g+8nq=m2XlL zH6Voyr2wG-t=GPlpTfYDC0V{!+KP!|#Y*2P_)-<&CyR~<{f>s!Kv&0o4qtk@@kTl8 zS_D5wAtDT|YwwpX+ERmLP`%e02gt;cyK=WLFv9igR%}B^yC&*;&g*bXzMQzJ;~tG+ zo?put<%q{Q$Vu>U$Rv^Zonx+rZX%%)Wd5{nY{X z>n;7_1^B}IH~HGotu1pKCwtprBzH+9lj1)`Ux)(@l2eS;>`}A2(Khh7uqg*GwM00M zIt6KDk50XSn0u)hW4x_+>-U!|?W8oB@87gtupl>gCO4=@S<-c5#4Yr+j^mk4!qt3H zB3}Knn~XnbMq-E_Z%w!%eerG{>W+gLvSoJ9$F^dBJ4)!5xP8N4Ju?M0=3&#{yq)fu z_nQlG_Xu~-?9t3NQKM*GcU*goBjTqmy)^fjVH|83>3quLP|*VGMR%}7xdw}z%{h=xAjoNoHyX@k>Y%;3?7=yWim{|co_76U%IO+$s!1b=fmUaV zsH0JfngUrPgtIPvl2mCXguCqUj*7bar;33K>0nvbn0ml~{>pD{4x)20 z7YQsX=;MZiH!p6#TG^Tjn95YPJVHZ$-#BK~qj%0>mU}n>CF?&j1iZTyZitDuy*md` zw1i%J)5>>DZv~!k5CgIP4ZUrUzyCocvCcxtOpY$8z3jL(%r^YyYgPv+&0E8%w}}2K zmsT=-MR|O)bm%xzLcoMp^sdC+yxLtSBl}x&;AV4!C#)v<->*bLbU-D`CK(=#@at9I zE|C>%uim}R0N|L5FRc%4g5ti+4p*zooJ|&QykY0_tK-Z<+@|9qJO`78f3-{g{7xep z^eva0vGY$ztkHURuGj5fA23O+%7NTZoq4;I3_HopB~}&&bVs< z!6R;p7=@+0fR`krE}@(dMuP^d_b2i1-@A`|z9Q5>rvO;kOOIeLp!~?wlTQjw%jAhS z{hDZg;f>K~b7yhjrol3=;Rtu%?oKbqWm~=T^cnP8DI}SJqrEo|K$&5C=IH6)sRnL! zzDdy}4Q%?SI4C^Ef{>I>^3pxJhL}{(>}W|Bz(w0Dh5h*Ywc~tXkL8D6nb0GRsgi?k z%6#J~cOJZZ(epI8yUjm{qc64cl7Qi);do?D{(k>*);!J@3^H^I4By_zX z_pv-v`>bD&^Ekc?2Q)Y7M})mhN!?Y3)YMsv`pvor4Sl|H+OhEfj565f0qvB?iisj< zN0&Ha*Br3FMHS$+m^a`K#TL<{M~vs^Z0sVD^(V!(++SZ)Sr1^4H1G;uYog7cFGQp~cJsshB81!0x_ThGq%OS%Z zK%o~LGdMRIA{!YW{Ms&P>Bt4617!zl<(XUn=FZoKAw^aMaPPe{F$Q>lnH29yjJvfU zO(KLZUwH9ly55DI-P@P)b;&$_Foxf8B!zUMM8QDTd+*u>)@`VO`182R-dML@yGeEL z#@&cb=f+*2A;{c?y;p9&$0}J zxv#=W9TE@62E54LA4i%6d1_A2LR1IZa$R9E2m>^#R)j|!gG{> zcK}^V>SK|43skV?xgO-D9vv_P+GNcy3-;MfPd*{zJDKsxpOD?S5N&~U@P|qIU>t@V zb8^f!MEx2db{Vz%%ICa@g6@n-6hqdi$(dE;yFf!&$O-83$6T9jeM62D>RVa9(kFXF8ij+ja(zW=|JpEU^?*% zosK-dQx1-7Y7dk+0IyRxYwwEkSr+r#-#HYD85xM*Gsn@}$ z;)>+DVr2NWKFYXzH0|o5d=_WNP{e(;BP*tr)DUJV0bFW*9P)hlp=PU)PpY} z!przaM-=RoYoi3kq4{%4(()&R(VCeQWbUd` z3T5?36A2PFFw~%M30oBFTBh6$xbm&ZDcw3M-Zn;q3O?P(_(mqfS|g~$?XDln4@ruz z_d&BioPYh6W^+kCXl(f`3&MiH?Kx)tI4hGGAh!xLN2cggoFA|G5xCj!#ZtBn}R zlL}9;?I5w6`YESfZA-mFxWJh( z>Hi@i)uD$Tl+$(WO@pMdV@UOZ)%=he53zGvI320k6Rg=!Vzb5upfCLK_ij>+RhPmY zNoQ1c=jOZ*hP=f(#us%9Y8fsquh5bm&9g_n|1v%DbH>(%ZO}81vVB z(KY*X%yU~0xR1*u2M15U?{tQb!dL->M1JEZ z;ITLBr5rjrrghukOFjU%^|`@aJcQ1EGL6GH)y?Gz9snJ+bC~^HgS04Dwk07p0?C)p zgDIZ-M6(O@K#VqpRQ^JrXS*SwXh;-ZhEKW3`3JQ($Io|M)Tb9Xl5&GS0Hqt@qYt3S z)c&Z2MmF_k8`)RGP&DdiPg2q8xYp|9;8K2uAmVwz-fJY1yPMwljgHOT?bV^sY*;~QSu=z4EKRwQ$$P!DBH@6^>ncqE`O-$~XwE#vVtUTQUD4m?QIhsqG?2EwPzdgVnY(p4#ni9QC zfK0j4}~y7?c1mrO=3!P58{G~AOZj&&DR zW@fR?f5+(7x>9pe6WMgtWtER-Gke>Jo8L6rJ^NSnOm5&`#5i!9pej)Gpg7MwDD(LY z=5maQ&032RugAe_&XT=k*E0&-w4R&8XPzi93w`d?71)-E!Z(ZZ{B2kNllcF?G3tLa z*e$Uh{F`_ZYdi7c&PAip6?a3&?~L$}r{D4PgQJIk{ETYbv#@iuM;zgq40Gk_ES?0d za+yc*KzRyQX!oP97ddB0Z+j&PN3N*K7K+?PQ7gZVXcWKK{jT~&S8nRx?0sY%;)wg> zJH}7i{Uwjq2jA7&opDl-t|sos{+h$@goZigSPN`)9teBWhK_!vJ))1L4`B~4&|;me z7fIsB17`Jm+INQPd{4w)5TmMplV2s;W53Khm|ZyL4eU5wN4IXs^rT$Vfs}wl<>i&R zwm{>9QZLiA_|)#ux357vc8REEq((j*bKoH~5&Ivp6%Ypb~A(Evp|8>wG)=@Og+gJ%hsYeV=)wp3hv{HZW zkb|#9$ndl+D;U zn0#MCW5WW!W$F%Wd)K!Y&k*9N%%B`Q0N0wVcIzz`=oQ(0flUGCCk|2jpf zVFNPG5|#U@ocJYkI1kx|Yob-W`z->Q?zY|H+Vt+sFOgr}P_^4GF6L(d^i=F@>!_&Z z57!Ld|Vr7Q7jW(a zw{Z6MW+E`B+_S6}B@x(*r(WmcY1*es%s8CAF13lfWTiX>&})D+ z@I|ou2l|wJ_NR8*L=+`#;Ge!Nzy;Bv*=b4HuA8`Q^003)l%MdX{McV-ntojVy72c!;2w!)g&1XcpyHCpZ9Bh-0?-Tmu8NSwGLTH1cg@d4?Nl*=?%#fKBSWl0Yu3s|=6(T`3_iFms5$io+Ux3msj(WP~t?9ToOMe-5qRt{}hn=v1yP&KWqqh0zw zwn9SKj-smkL9c6Qa~FXJu(!ZhNGGpIl@iC*dSP4kcoaZB*0WzPhwZNyaoRU>i__6w z8xuZO)d;C2+E#zi=k%gI!6+WJAPiypRD^X?GiN`9Y)cW}oFsqI5D1o%#=4&Tf@obC6M$ zOnhMbP#%c4%8|9C%1}~D@95EC zQhxT;8A-L4Gap?ET$ChXoaxk|a1h1jLX>ybP^aco1Pf*7Z#XDZ{f5&59MlI167#%+ z9JdLv%1ZbT1;OAatEBVwPM-Ki?3nteyDls&gjiU#e~;(hy7k$Mnp`=Y@TT~iXa92U z@n7C>Wvvb?!{%oUhytO|c^$4#tET7RnMJlyUa?B}>bx20{C;n@&vP zur@@@V-Sb|kTWK4($X`3V#M`g*TC76=1FEf&Ds`4Ir_ghgMdX z-6ty(ob5BZp}gk=pTPYOmU=r~ydCP%NW(F8eu%3WO!o)zmXO_9=aCG*Z~usK_7=dxr)JdVeK%1>04fagL)Pya@qc^$CSQ!{{)8y zA25^w8O2G!2@Tk%PPB5mPsx~fXSwbI9eDQB+O#4suJY82GLbD@{F9|KD>5FnI2#i# zAAjQ!kAU_z{gn*&-?r95WzfcYr9NkiO3-EID}7&=VNRIkA+{Bf&EH-dqv0o-KNe8>c}JAzWM%4Pji=iWt{hF( zN19iZ=D*#==o%v$+v`byx@fv+y75k3b9{3klfKNA4FVN(9}K$luL3 zKRNx|k-x7fLw04bBaEMW4?5Iey(=0ex4$3A-?w(OO#Vd^UyCG;Aua~p|6nwPKBp4? zfny+IUn z!H|BtMz|o+CRk4@%fdPe%~*4Vf`-S`9f&5;u0hcF1&OzB-%9S`emU3SYV~=&_GLWv z@L(v1<$GaOQMP9rd|YiYIXRhBpXhL~^L@k!Pbg|@I;gpQ?is6z=4E_tQLr&__`w$f zQeatgx!kTc1zbLRHdbz%z4H5}rqZT^TZQ?=dFhRbX7e<3ZK7__NL@JHI!);tQt(tYxTDM6D=w>)D=CB2uk*s26E$}f- zrV~x&a~hMJ>I4O|%)y76-V6^%50CnFX159UR0`3%AJbMWIsNGj=oOy4N$Z9%=3lIqb+!0 zsz@zlST{D4Q~OjvJc^G;+ZjX=Mz?u(w_FsSocUQoVh&{f6L;j+f*WrkS6#(oC-==; zlc)a$!7D7ubAA9DI!G&U_+4+X`p%!WmpoBp6Bz8OfG(qX5AWZ!lVGn~6YEy8a4LgpKUnT6^lc!n1wDqOnPTmB!(x&7shn7!^LJnWJLTTZdCt+E8K#Ry{ zhhv9E)P!>+;wwj9@dtAaPO&VU>`xme-T`gznuj$fI7(wxDi*ogSWnDX7xJ>#UPBSSk?;7(ym#(Va^%zOhb<3vtul)GU0S&Y(bQko zmEH?iE6PEyp@|lguS9>6u71u?^;#1C5NG-DGVE4rch;)qU80BFx0hkGIzvcszzb73 zKH`z0Kt=S;dRht)*C;Ej|Ao)i^T4u0)rG7%j;2=f2ONKjiz#3Tl zl4xGYH80a{>!^h`XUq^^W+Xsb7C_5?PK6#d9Jmo8TwvrU>9YL&Y^d$dD_!Sfwr-?> z&=M(%hQmUnNM)}^L#;bd>L#?bxO}5}t_Iw~IA&Y%aZKZ=72Z}ELtmER^8@=Y%6+!4 zSpg`YPuiDquKR*kDzMZ0(}-cS1EEIThJ!+(xRN--FL4*vPxBQUFcskAu%xk*g4Vj8 zd6z1}M)Gr`LZTj4S$x?^iLm!|jz7q&$@d8*|?jtDOI z=$OMdl#o3tb0&&-h^q~#be}ADZu~9wRCAYv`cj*G>VOM7?`d*@6`grGjeKu zIs27QhmM;xhPv}^o>+xNnp7Xtx5tEBS`83Ae7PA!01D&Dh&2vX4^YkA9^?a2?cJI= zMZ2d8;XFEa)}Y`u1^BUBvnIxtWJJ~z!q;tiT`6j%vckXHiQpbZ_?nK3bWO?ZgOe|T z1ubi~Yfh08TBjX&5#7LOM-kcfR!4HP-)bpqKG7RR1OM@;Rs; zMG39&bh*HL=itkSMvri0(7 ziTS!attw2WJx8Z8;7^%yf(85fb`yW^ouK%i$`xl$?tfLZ|4knc;EV~MCQbgkiSqD& zU?(7->=LmB{!2goC#-y8@WW;pTSl%dwxzq{ImbCbR>R!wj_Zjr;HFaIb1w{ECh3am zm{=KhpE-)Tyi{salT|1Q(wuvzLL5$3myv>I55#zZC~JU|YZN2baSnGTrj-mDRJ!-z zz8)IX%wvot-qz68eg|dlAt!HdtBFj06EdJ}UhB6$pL@Jh#VnmElyoFz7#c#!bui7m zF`+Is_Oykzxt1j;@Kjz;&%Q-6t2Od)r5vNB?HH(rE2>&I6bi}b<>(rXEG{Tzwz4uM zU8CYp2T46b6#kUAZc|K9@G1OxQ`;8AUVeLovaQK(`ZGhhC>K2F&!*-&2cSnnsv$Iw4Ov0FU7<6k-#iXGNaH6xe01xF{ zQ0xM4orUk?J6U>~?$14XU#|)M)=)JwJ@*v(aoEP1P(D}pAP2+TYVb{8Ejb`FMf$3V zth?b?_WQ&+ebI;??uA|DzH#|0{p^B&YF=I*k0Sa$nWy_j*yxb(6@;{7$#GF5kN0%3+RH zQj@2?&IE=8{aGuxaI7xk_Q@gBD~xr{>g7eOkL&Tsm0}93awyVzk)At`+#2+oIkxwr zhyl$?B+awf%Mi?vkJ7#G8%D$+3)HzT?`IPw?G$RY^qY^qGS$;Qy_AaV%!v$l^ZRq@ z+Wt*?fHvX#XTJ09(c?qgwF}MZ$Gx>Z(tr#ecty1b^1!#+@|9+9>Tvz~{rm;KkHtH8 zH5k$;daqqxO|w7#xlf2cMgP9Lr=4xn+ifg*|K0lXtB=Zs)x_#4HS2+4LiJR0p)&*TSDRJZX0ia<90` zg}eX+6u*2jA>WlhI(R5))u})ktC@2}v#jGxtRDR_q?sag)RdQT-_qu5&~Q)2WN&zZ zvBDdo%!|RjiBpc|K|cndOh5dHG^~ZTdpO=p;{mCDU%Mrv@bwr5lCO#v`pk{|UL*`C zMjdNFr~}3LQ;NC|y&=|H+RR9+beW2Y>z86oysJ%J9W;^A%C8UG1G{2rM?1kxQu}uw zB~OalLgc40%>M3u;b>lYxIL}E0Gr3n2AVA56KBE2)#@Gtuk+FEi5*wtKUym&b-f{2=Xx49-t`x&RoQzyg@wKs7P0zk@ zQDxz#+x|^^>yGygit5)2yc|=L~9gQ;2;kobu-A?jRM5+^jL;2nM5Q zYk_$2FVGow)%T7iXXP_xFk->i!}`Ra>09d)Lo17PE}Ntka+lVzojI5Ofj=v*{e5-M z63nw)ORn-1HdbzwHlDFzXS)rnsujnN_PugavLuu+75eb$slIcadF8p5w~AggyGxhv zNY{0HTgJMWd%K+&gzOLs8*56Y8(1CBNxnC7?7SCO&wEUt>PJew_k~$X>-?ajE;d;e z99eZzS}1wD7d4NITrXPtqa~Db3qqt5gS2d#l zY&GQj0Y=1hNk6Ys;YW=Pz%NAiZw*B&CX zrz}r;(SBGYtV)r-2qOvjh6d`l(zRV8t?MA=j{eR~JI22gea~6lKy>L&-EkjJrk!ek zvZmqKK?g z66?vyet(j@6FWjx%aO~`DmHQDsY&8m%6UNFnbC|tmgA$d$@%(iI4}Q>QdGX2?n8rO z5^H5?uj)nG_ITnN27*uZVB?jZ4bFagw&AX85z_1s_gLRRdH;c2;%dgCsL9st_67IC zu^7m+8GqdyMc;p1y|1HRh)+4dfRC7au2`Qr7Fj2UNe#jbU}nP$(}FYZKH2Nh>bm!4 zfW*GG$22cFEZ-K0Vb1tnQ%?x;=+zuvsp3YaqG^^*h1dX@DY3D*MDnm|Bq@}Tbb{+0N7aTMSwch4NzMJ8A8kqn`!cm2n4DX+cgkKtH0BY}p z-2fu{?XKIRjFn@7Wmz17U~y!1itW*~luV&cE2~)?2&x{7d)`I1BlOyRBu-`f9ELcV z?w~!p@=FQK1^tyTkj3?Vt;@XB+GhpqH!8$pqyA?owLw@~wqn&zkD6s~T!2`4K(Y5i zA&q3Y*Y2;z=+Nr3;AK#&=>yZcA!K>H=~VRY6^efNuQ6+A_AZW**DKyRl-W! zA7!EkU@O_=Qi0d}^I`DY1AJqqBxA!v;P!~)it@nsF6zrrz!!|1_|Ma; zX?9DnXT~TSWHKx((|2yibU4r4EHU5042%oD8GHdoLUcwydj|BInML@NOg_iTB-m+Y zM+(bkxR|GBoEnh+=n^ZPqC4YQao!Ogc#`phbuA}u?5ir$NycerX6kmIWApt7HfqI& zE{~n6S+sM#!Z(#nHTKXJ@`E;E&<6@;xxzxr>|WnjRn7mbu2dalKcj8F-^%t0nkZX2 z4}|d5d|yoz@0>f@x9{(H3*TGx-baIO>N~0`R#W(MoPY!Z<6o|QECQpu|A^%FI5@4^ zqKHRUecuP79$%uJHGpY2e5JIDTbzeo0QjQ98!va4&5s_W%uFe4z?V~1`VYK;E3^T}Pt4cx(AVe?5`?+5Mq2s z)Ywd`>`{g-^2z&@^o&`kLH|d6rMUbDfh3#V>K<@;7&zRwVqE$g8oHz1U{+hyAYp(Uo}2|LD>6(d&RD3 z-Er3TxIBZ0T+yl7*pc_z` z4TQz9r7KPBj-49(e#csmT4_PDnBb<2=QOg*o=MI29hmqdaz=~1ML$(*K+`Lujw00HHA**nO8jk7#;We1X2x;B_h&3nJBfeps!d|- zaAPY*JKaj;D)PHO=bRX>P;%;eUc{cFDo6o7%dF4qC!9R(l4NF_JKU9Kg ztgTf#La9$)k5{f276mjB_V?vO{FEmGEpt3?p@AMjvOml*u&~g;B4hlTbZIFpZ}+>g zf_DycZ})p12e(XtG(F3vOyT!+k0z?!P{lxm=2Usf0!V-djgsZle7PI|TEjiqle zF_pK7v57+x>(FL)5T|Ems`$w23IYCSs!J2){orlB%=ySIqMiSrB)Xu$)O`g@hy#Lb zP4{jJ_`LKf?J?*6-f_sRo(AcSs!O+50UXLw2)spDHkiq!e_oUYq! z+oW1B`I2Pn&2)OfBDc4ZP5aRAA#I1y1Gt~D$ZEsL2jbjg^HEC5$!^wzikdgkL10)gp$=J(j$lBSydVk;l?t*lft%yN%F<(lZP}yhXXu4 zf|ym*Jng}&x!$a%Me3}uguhWEaCW>a-u=i}E&D4g|;@(RtglP|#*Djc6 zVXlXueu*}^+jMHmM_DcK=;FJ$7AG0F@&p+ce=efA>7lMXdrIxud`IStePo77h z6fr47w)}zw?O!n)D=%7PCn;LtNzBDXqhx7PcdhP|Yq+6J>(p>769o$G>%$<7 ziJA_4L!M=Qu~VzER&til!qO-l=J4+35?Hbq@=l^n=w8RG65KFS!vzLLpshyB?Dnfs0>-$F85TlA(xz@!_J$*=WzKV0-vr zpo2{Yiz2_G-y|Gp&@CXxnRgok-3p90cd`-(>^fL&t^b4fgGml}@#{BVFx`VO{L;Ja zi(TfU`*<1~mo}L#sPJ8?)@g8g8`%HoGu|&!)F{aDA(A9af>(_LvAjh?-AnSNd26IE zwSU;nEPm(c37B1!L%xW&)Fy(NFwBI+<)PqF2e*1)ct;-WNa$dv zqt|qgFyTbec^#w0eo%F3%}4=ZiODSVo~t|I59KbUQ8H6f$Y7QubE)DprnTjiMvvge zNXjt~a{=Pu`fPzX$9D-w-5MyWW%48YD3q1p2u_z4)9CDRV? zpiK&G+@yafqgxY|l1Ihat9lP)`adPn&0k=M=M4|25mePIFc%elUs)`E$cz=)E#11b z&^!_T|u*YhjHLiQb^b6|e0pm=IiN7He7PC*>!3zDM&(G6*+=4kqt+m z0+P_hEw&b40-8Q_5Y(z_plP!rJL@l{Tc&g2I1*@x-JIvPWqOFOGSy%RA}ED?dQOWT z#lwt>fp~;vNe!HJ3t093qAne4ihsOS8tG{L882L>pxdaQ5L?e~80hk}q1_A1P=W5d+--77avS>XamHv3b#hSw)^KFgDy>VnJVS)$RKV(r+XqJS6$Sje27f z*hG4xUfRUMEgA7{nu&;$NYM>?nmJ7)P(2zVRC~~K&Rrn^qd+6@Lnfh&cEPaf3s8bi z9!ylssP881#DxgGW}1LxjJEZPon0~1bL3V-nOYC)vFM(sm{ZDINx{*v%0uiSdXQQh z`NTq-SUO-2EpwvudtKqmJ)^+*Be~C3PP006i~4u_6KAFn5@M~swdzHZYBsU2Dhm!j zXe}+;G7YOQ5;SqdJArbI;^j~o@jb5#!~+A5K@oxfmNeQRx!sLPILQ|A*2I8OL2DL_#MDH0)YLwhW0Rb`ROkZGg zsaLEaDd@yguY}>b^~dKrpm+pW^qG?9&gpZxu{`TQN=;n;wE3t%19}G)F-H{XV3m!m#A5X5~F`QkS^#C-u;315gi5 z@aA3&|M8G?$@ZC}|p8(1l2R;yZ zz(;9!wX_Mace$7XOFVJEE1LS=%U-e+wYekS+}W?mv3nQ2u02K>j5yn!<)cbTGpze? zFAkEg?uT&O{yYHYp5`bBP67|*Ejbgq6Z95hKqCsjDv)@oeSSYvDZ0jZogF9qcO*KHQp9%yW z6*xqoRSp_$6;`;>PDcD?*x1_y2N<64vBV^8s!MLhZTkbR2zHD9X-_hzT0imUnxviF zay)t&X7xc+cp-;Cv5gexg2aVpEdcR|pzF|+C?FR2Ov5ImIk*Ib<3BVwIr&v#Z`&#g z@tmyWH$&0j(TrQTt{fv25$`j0lLukP!3lHh5}fuVv2k}SlgkO92f3& z1&OGd9AD$58!*8y6&{1%BNPj!#cWc4NUwz& zF!$Lp+S~;^7K@oW|E!D=$~8^F@5=4v3g>hauL%5YtpN{JI>_P0#KCV^0&{8?!QbB8 z^ZN7(`{lECCCu4pjj;8?`rew-(0dyP3F>#!mXewyq~h_C=6R0eF0%(ipSlaNB&wUh zG^+Y}wyJz`vFy7CXur%7X%iD)irqRb|8bN2>`n#wc}Kfy#i#E4O%-yE(Z%(B1Di(k(_Wl;5>3%o)~nQ8;Y|p$$iK#I+V`H!f}% z8F*AuQ{=V|2@ttk0j#rDPCjK-3R`{9bO1Fu(4a@1H8C7e!S6qAyN(yWt7#(!A_-G4 zst>@9KRm*|nJQ44s>FvqdZVB(s8C&L3E#3V>wQe6f)?FKgQlMrXqJ314RVUv82u0j{lVGVDUOp+0hD9MnMeR)bCf9S1;ei};asaZZra?~)XgPU5aOEHEV@|;6Uh6aodn`7Y zVpj#YXxd93R3Im*EiK8%#_&@+BM8|e^|8w9cy?=q3*m0hwtlpvtK!Q7xGlFb{hVB- zlv8u^T|X~QDkl8Q(~lzh6sq-*&0d3&_9$B0$A&2~7vrG5D|eOpAW}hYAc#}JF4wOl zKaB1A=;@?EkKM(%P|4atko};G$qu2-bg^3F>h!mb^V8j+=1nkwPx82XpbXM4tjovE z)y@Hfc|izn-fWha!cCC*s5uIt>$%z%p-s+Zdxh`m~gR~0RI zmc4fXfk}snx>mL&6g|hg*^Dv1i0~CW61V& z9J-_ga=o$P1%hRu1L5+C_t%JKOtvcfO>~4F<&zvFMi;|W(Ww=^Dh@|Elv`ccRH)hI z$n#eMPgg}QG--|m_GRg-v9o#8&1P`f;Zk*7qa8LLos)>l%^e#oO5)x2COG+j*1AzQ z$>BN9yfwp}4}~n>u`@iG|5iWdC};4NgMKxDe)PthN;$kSrRpwyxx*6}ckyv=Uo-dN zt0vrvu*{grG{uZKPaRHSu^i`ygL1_k&gw|}9Kv9_^pO#4-g<7?@dClY=`d7tyegWc zrSxhle9{j$jyB{>1_emCzR{G))%Sp}#3`GE2S?tBeFD@KuR2R)%8YSopoif|;7^s2 zsH=rf3a<2v-5_9ii7|*yc!H@{KbAV#5>}Abn0KBZySk|%@J44*LAKLH zeDOY9X30K=80f&%pYJQ+g8lldto8Ld@h81SPY=JPSc`lKyOkBUv|WNL-pHnlB}*4c zYh3q=r*k9i!!dr3sw=ze=Uz%D70>Pap}_V>JPZg2z-o`|OdNnW0+l^F5T@l(C-QP; zfV6jEtt0xKNk&2VR`W9Pxyes`Rt9CAmlIST9@=u&q1z4$Ygimqez<)6O2sb$f?K0R zK*(*PB#%tb-0{M#r|H_0Jgp{S;s*W=|_fbTq<8(S0w=F8;LWD1| z_iHA1IOi2zDR{YFvcj0k3iQx7Qwp{fUTBVWLKNa49>f--CLlBAu}c0O-(mYSms7Gn z1aAyVna&O5SI+eHRyjaZhPpS-yo~`_NSB(n1~yL|#m1mEzD*Rd*btZI zPzG0ClBN@OCntYWuqaNN4!c8?J$u1zufsOd7m9pdGN(_)zl(8p14#6V+ngK8Pp4liY@-*Lw2HBAfN9S;T8Io z=(KPtC7u0M4z1U;++vWo-E1Qlp%V%2-dOO&=YY0@#L z=)w)?J4B14>SpxdCD$1Px+J6{xcIzxr^pQ=H*3N0&rNz6E`LWUkqTDK%nsZ-37X@Rx1LMY`O&Ep2RszI(Y8n{4@ppEf(~ zafP9i4Wxnv_$lm{Obo1vH3)bXjjtjWxv~4{rFJ=(V6m>+J`%r*;MbZtItXGvA71IZ z90jHzF5-6*yL5(l{3>M}^EFv{hCy0B7y5vI!ptAmWOTgXg2s65SEepW?6kJ-2M5uTWsA&HPyl_+7GugW2*3d>VbEW_y_9AebW0G#80o1j9 zpA@hp9T4j&oU}d_MEz{%;FvEM(VcQ%iGyMXu*zE#B{vor=&yxShD}X{v|Fg&cOSN! zg+SG~@$CNBN}==N@DS07YUJ1TeMH!2T_sMGNrVxkn0b`k6yI^CUBnN7JadyanODdl zAvEIt5h|b2+K(9`jgK$i#?@_X4z9r#2t2f(E-~Hv1ow?28xFFnVUn z=C$n8h})R;lTFKez_D^;CJ)y#-GNh^K+Q4y+DoHxa%1l=D}ZKG#xkXD4m9L029ofE z2?vObkJG>?($E}d2Uoyd9=Eoui}EL?N+CjfqX)jKI{;q zbz^6r`)G=Ta3(`~e9QX8C69xwA+lfJ+KF;;xWkPaPU6u#ppVxTyY&Y}jJoC= z0fQO76Dam05Lg*2n4s1s!fdb;6y%?^)rpjn?C$mi1R@<5QeLxkj^xy*}xkhW?4Eq=Nvg)TP(bup}R$c6xey(XwU&FO>uC| zsHXcja8kk*%+_@KRgYBH4Vznq4-LuQ7A(BqdSHTluC%`#`FfP74p=?%Ja)HYo5}vS zc^+?(jSurAx}=$|0igswVCU5!7%s5nf}4)1c?{I#F7!(T8iLn*(p&;9K+b(jWW~w* zEpzxLd|fs!OHje%*8m>u`T*`}3#&3hs8q}^QNNj*F)qVSTd(P>9P??kc9oadC4r=> zn;OpW*u8xk!ug1)Ji^MsZbQu;D(LK>I!`^!>x7}_hPuQ@0HDDY>z7RUFEERr{RNjn zuzjDWYwFY)IZ^kVQss4DV=oU?7oJzr%>cIVFZ=|to=A?7*R(WRfhqF?X5Dh8bL|3$ z03NRxFk{Z|jP2XRM;}{CNQssnlds(!yxDG_h7>Khy9#D6HS0ae9^b5UrQqU4w7wJ@ z#0OC<7+L7+&x}+kX$GsA>v##xEw$>pr`87yBqmyHNIFY5e?_5=57+I~x^{_%63OS^ zlSNr4G83wG4?OXC0vu07#?CWDBX3K7sP~0jirPb7~3ND2~gtC?!5v89Ieq!&sx7Rc*9U*GZaA}8a2z7(cj&|ttKe236up(iEYMhz2IM5B~>|HZ6 zsTmm#ha8wBA8ZMRM4R74YN=da1CeAo$A~*@1(v7qQ9e+=Tm;ywcdOTO>{mMx zz`D&UC^QF|sEQZFtxcIs{BD*atoC_5TW#v09=psXiinIHpQ>2JP2Q{Vf#HDsq)vAU z8V(>&W07}57UNwlIX!bynPy#@px&$w~mb6R0 zrD>8Wb|2+Xs*r(Oj1|@UJN;|Fw$4uF2$#mEs*!X~YO2n5ZYs`186c}f^!08&qik)@Q zK?Z^SPK4}#GV1S(_911hm1AFS+_tv)hXCWOh7qT5rr_F4zlI3Y3XPzS+AUGut06C0P`gO0sXW!jWWmU@oLAZ-j4cIUll)ou~ z0OmPGNlxO*D7^@yaL$B!IDj7(+HfoMn_?#(e=je3eReriy#es}G*9x56PL&00%fE` z_{Fg!Q0YgERml@ryXYjG6A>wcw2MO7V(*lhK-clg;Q82e&Q1I?$>=Q^8xc<^GT*5$ z=79ESKpSl zcS{V|Nxdf&`N?gvsnei<{lnVbk4v<1*l1ZL+MF-Ix&$RA-5@EqSvGa;ZmUf)u6IGr zn~Qpgew$&>wofnr@K*;H`&!NSVeJKH*6Kt)>IP^~Ig!CLDxC z^kq(IS&}mBx0pC6iyF_>UkrGQdG508IY}NkyGAg5$?(M2ufRM4mW8xQSU+14OV01> zS-#xf_GELe`>r~Z;|JB-85*Vw|7b?+j{=h(w$*JtrrP97&FUMXzlK(v5!Y4?cBBlN zz1(Z^qXV1R1n(^;dgsHX-=lrOy7f>>TcRh=+&l3sxrt9Z3`zMKnb5w_bYwvyVp&6h zaFL@QdRgv(emf{E`EKu7)2h0%zg z!wI7AhCMPhWac4%iZKVr@1zY~A{a~WVO=Buw@u!1DKfSo0CC*mo03sPkSMP>S=a?b zom2{+C~$t+BAF*iB?CfD(h6rq^(oc9ujFT59pOs!p(`&Z_#b2KHoK3v9dFv0#sH+( zdmWE4dGmwtTfU}JRw4k$n9b$S`5}oN8luos2k5F)$=UdHwQ7M1NO1eq#AA~{Mom&b z2xqoJLrcX|Gc*M*c~i;?#)x0rnu#anQnuMWj03lNp?bT3>``j=QezzyhOc|sAR`8{;TH6EUCy~o0BWCEtah?_YnIOEn|4C!VT#rRd4<{c0E@AM5%Wfhi-!?PXD5Ka>`>30 zjp*c>qjtMAQv$|YtK1Zwr`$NB&6M>(t68XoOH!AZLSNN@-I0{BtD~tnd@#V$JrPoM zd6DZ4MGgv=+UJ%tfTFwq#eh|2E2T@n$or-bwSGELr(g!v_1}>oIX~S0`UbVn!s*uR z*vo{q%3;UoKfR846h@;SgZQ0H6%{)$?ZsQv@j&ih4C2&jYa1oQ{(;&15~y)A z51~*(01P_pG8w-sA#+0}=*WJ}cFm7tP9A>%ar|GP$7>(g_ro9VdkBsW7cVuK6EPEl zC->>4mr2v1xfE^i%7%;+GIpRT_A2{oj@^uGmj9zPL^XW;ZeXB>rE;3?&jQ&)S1gNkb&n4%%!j*?DLq{~qYTtcm zoX4JpmYv+fP>CY?8n@fcIas>4mTrzKjV`=n>=@Oxf-K*?_yVnsRrWf*>Pf~n(dMypWim*l0ZP?9^#mc!Sdp-vB?VeYaor6z( z%aZhQ^0CqM!wG{ijLT8Pu!(a8Cs7fmLdg=5o-1?Pk-E9AT~YuEzUhQ|FzJ-&=}Wz* zFPNxhDW_RIdk=2XWc6JK86GKnl3d�r<1)o-XRxReSo67a9uyZ8*pijj+%TcIpH) ztIjq@lUzag8UH$4m&i_-`Q%c?<#cGk^!@1_-8KP4qLjIPI&>tMiggiY@FXUqIRB1^ zrx*9CE19Lt7S3nL_m*SJT>+Q*eeM`9fUGnWnD}(^T>fV{^zye#s1>giW-UO)=e}bA zQLr=d@_yqghaBitqP3COR%mQ>0hly}rn#WSEp=$&6&f?8!7im2oqYHfTDRtX?brO8 z>wqq{hKgy_-k17C1R@&A3`H+Dszz(f{>=NjRZ2bl5eQBS&q7YBg}Xk3DeF%o*S;i! za8HwmxvsFJ4bbS7CR1SL*K2AXuzn=_Cf=nCyydIAWNIW-AX`6u0pN6zSi6yD*EbqvZ_x<)u@w*6LMk9hm2nimkq5=~!KIhkKGz-2XbL_0^)4@j}=MN5b zx3?7~-47ElQfsgn@^0=xP%}Akn!CdEL9)XQH&1mNEL`~dx}A%|4LYO6hHf5-A{&A+ zNOx-^uxv%NmWy>l4)DpBk9xjO&?!E3AI^Nr8is^MDAbL8sO+ZA zuwOcZsguy2iM^0R1gcrP}mCcYqJ3B+(kDs0Zp`1@Bt_O=L4E zOm+6ht3PNxrxsuHDX>H9#&WE3N#blaFu2{aUJQ(@ti;CG4?6iwLB=C&k7j{L%7-7y z?f4>DtXcumj+M{)fgId}M#*%et&~v`>N20ZY7_II?A{fsql~&`w2m-lIdS*5*GqiGKMWB7Dnx=T~Dvfrc;6rkg&X zneS76S^%ID9A77fxo%t^E|qU#myu&u$su6s)}|SHYv1lTN$3Kh3GcnwK zH~QbpL0rDGMAT?yAMC*_5okTAO62}1;(jZBF0-VOCUtsOLvV{C)~KlkaRhXFH-`Vx zQ&j%ujG=UFMgzM1&SpCtYQ6689DP4s%fUa#8vtK#s-duLLr6VPX z@s$9H9y(AlolFPucepLH`%n=^Clx^Tl1G|jK6JHM?t8)4l{+Zj{@)U3P*3t1u{f~m zG5{u~2pB7Kmc~F9ZHF-;cD2wv4@ch{n4%I6Zf%l{t3#T)(eAIqRQb}3Lg~-^8zm4s z58N;b2QZx5+obDR>?80e39`l;00+Bu|3k2Pu31aJd!qgU@dO#-uW_JW zHs2c&wByH#)yvCnMp39s_*0;)n5b1Td7C>nT0``{iUtxaO*jmbec|9xt=X&u(}KJp zt7j!n#cqbM{1l+p%jU&@r&iWq7+MF|CSVlEEAMP;)a;c*TlhN$Hd%)XgbpO9vW5Fw zt;|`>!%-+3ze^AoN?Y9t>0UwGuH#rP8S>JhY1R(u?-5k$plnbrIQz_u+vUSnRe0aH z{>Xx!KFhhD*BRcWO`VwiJY;LI4QTnXE^>_E_hxTOM@OCP)>CPk%VL!hmp`toE|^wH z0bg)*;>C=mU4eG>-S`A_3HLgZRy;C%FZ_8w3*hWUJvgu_WzxOSA&=$fk(9ggnS#W# zngyf8%5<;~;S4AQ9Wh@%en8F58hFHViEvmSV304%Jl(2 zRfnTT@O01Y@LO|RKTHu7;D6M#^uIj%FZ%1B%6#>3Deg>T!`8zM4X0)+eD#mb&K54_quu?@9hXe zj(#90iW|2_`d)!Yui@QDvd_WzB{pbJg%q?_e6FEP8W@Z%$gcW9#u9TExF3|)E{`L7 zLF$Le=OEELsnVya!V`%G^UCNIHjlb;|7opY9N0w`OF$(^ayyE~F&|o3IhBz?Hd=KV5=atb!Q}-V%pIIZU8#k4ibkcV$-;lHu zeFtw&61>cuQqwq+b^t8AT|Y(hDRBn|6g${gv;+{3;S3c0D{TbN0NPq!jGjIwv%bF* z=w7=FKgj%WW>f7U`ZT{4$+xHp9@$2vs*f$L28ou*C7EkQ&&Z5FZbbX5KgFia|{BF=Q5nFqD)sIuFg{A|76J_lV9iVe= z!<`ukyX&9y?Fjd0pGtMOSoHG{W0FD9rQLbYdx_+?w{hlD&z12rPa!rBiH(T?8$4XJa^G(owBn z@TR|r0b=#1qr8{8UBRPkjP3z_K?foLCorGdv{1#>;sR2TkqD98pS|@kT=+|d(`un5 z*7tK^{z_MW(Um?RG4IUuXwC(C=fe-t-5boVNmjq0^C)PyClY=QiIKUERpNgvo~xRz zQPFllk)ggyu^`3X$xE`s+y75`|NeQ)r>}f@SdeXX2EIQ50AThiRX!chZJh{eVk~Bg-misb0j57M=m&lOhcx?Gd!F#tZR4E8@Dv-j1Lc6W1d>A5 zBdjqF0!jzjoxF32xQZ~VH(^HI&>1?h(btfFpmkuyZ@i^R8h1n7#~FF-iGzm-zZMnn z&GVq>dm958-W`=dcBMeP1ldzYY@JQ%d+`ZByC0K<2Qw-A{7+Al3 zumVxm`P$5Dw=V8|xiN92Y9%~l+jtlR9+LamSJm>I3{v$&8f2uSjov}WtQMaR7Z@;W zku*4kI{C|e_mV|?`FX2s(5O>5>f7VDUimhMkCZo-{LtswnXWVwZnxzcHKU<$6eMoR z?UU!84y%3-jCR0|Q5fJQK})%Kzxz#_a7U<$NzBriK2+XJBog7`9`Rz|xJ4|QcpIa9!6Zi3GS>`3@RlYre(|CJ_^#VvwafS~paEW1)7?p zUs44EH4yd4Iz4J zNt{M~Aqp|IEUc&-Tj_3?-_35U!}2F1aDL0TK?x|(T3O4BhSQjol#;j*heiOA79h^( ztL=oUcam2=>Sy)ojfEGT1#us;@KRb7H;16t0~*ic7dP<-+>i?q2fmlH{~c2HU!ObD zsCUd=7|;8Zr=}?I4cGRl1$aVwZOhfo!1B>A``n2;q+2H5U%-L z)1Y#PsMdtg3F@0;<&tb@u;By;DRItoT=ti-_xEDBxr-46GRTq`{@eG9H)iJK21VA1 zgz*xiAnA`E1z!+%zmK7A-KhP0t@za=V3mK`jenDCy{abzOi?4eE}$1rB_$`q74%KWV&Rv$R!wMtlCp> zdgksbJ_S9RYMP)hKy23v`|w38P%z0k@uGJA1*rIE-eh-0_IUZ}1cQnTu+T4kn{J05 zVhO&ji|SRTk!dk}_B_oDmXZ=#T_O2MJfANRj1U)Y3i;fZjVzFe3MctOhu3{J+J@l= zWaNsuoKar1G5a=%n~_O*FBdv;TT91ZYmBO=8F!C=vWD*-@zYa&FW7%NeRSdSDA(&J z;C;tI;JrPg8tst)yrTz<1?^X^#I$^ecWeUD#?ORnewT-P%a_JZi>`i$cdTVUH_~{z z1f2RFzy8aMkCd^$emBFE3#ga7zofGBEHDLtQ@k`|&5#^-g?A0uok3*m4Wzxefrtls zzn&3OpSk8$Sn5*zF;uNxb2Crp32^G{H7@{M^ix%r>R+$) zkpLNhrp(eBtX=2mN32nbE#^5$mx_Z%7O5|XU=2n<Gd|!OTh!N+XebrC|=lw8GAV$ZrK-^kUx&-Ty;H6uU~S zE!&xGbogR7P#;4@wC`(k1RcNeSqrgTUiS;K)Sq1w2MqV5q&F?==8|;eh9)@!Age3P zO9uV2bJU{Cj^Ul@DZ;lN{l8w(AjO*b99_9Kpd7s$$P13|0ne&E{FC;9e^rYA)o8$5 z(mMtpEOoI#c|S9p%+QLo_htT~_~VA%k5e5(GPG`FT=z1@gXy^Lm>>E895adbLNa>U z7kAhRed=D78T0k3q4D=})}{C!ceqT`oWsjaYb)=`#;>9w!Ap!g!!2$4qT3L}RFLF0 z2#}Sep}UgF-GmB6^SXdD*xx%8e_Usd%GZ{?kfn1}tIGz>`?&lg!U{kM~#Ju!~ z-gxnS>Mj~gDz;f8MBHRNMyqzSKY{}lA4B$9@e1^c%FFqmaJ1azLL)wzf6&T1P5j#R zQ5r2jWQL*s7e(#gUtKCz@0g!3UJFPn0^En4TJXV6EuX1UEgybqtL#~OBshK5+6@<0 zt=)Lx-jA~9O`<0t_tqA?HTh9PCXeOmWenZbJ0h#us#hw0_0yc`UuhhkL9kMQaw(j@^nb8<8Hn z)4SDHS7HXaFT?a8SWq|SffDGzh4ic16@XEjd_Ct-6FNRWhk}k>-jzAJ>7X*x9gH}W z)8zVe$SPreeuBuHIB_2W0gFUQO|n7k3pBA94~kG_Gcbr!DM zno$sTX?61|p!f5j=RHYO*l*+kn*kz$RqPz>M&WmMf$!)3q+v8&8kQr2)JMm!15ymv z9&Wni82(W2!gmGOa&V88hAE=tyMp{P-jshnHN7OW>gB~uu))R;8A}yhrJY@?S7@Ty z)_9u8JI$MQk>al{2X@SvdXBse>Xu^%{{V$POGzZR$&=5$5&|nu$uk{;yrZ zl5KMJTTS3=BB+{Fm{^|bBTm`9e%vg1L2@3b^QZ+^UWDQ3B72Ck;VdJWpJQ}eZiFiT zYnA2;;+yFC?zJl`AIBMT)V2THK?crB$|jeR4S4{c_y2aR<^A`OdS3#haS}iby9-^i z9tTJCSc)=+3L{RyM*b;Eat93XV05=i0u0B<#1p(2{`JWAk3Ha1bw)Z{r|oiJ*_*P4 zm1W*a<|y@Go%d1Py;p9_(M&eqL4I#>p7ia3Zu!1{fjuqr>-K$Gaa#Io!!xj_|C=yF z{QU+0O_*V6X}I&4pA+5~>Qot!>jD3=$+I>jN9p!jF~>0d*w@#^5S^FBrC8`GD=|k; zUjwg#&yCXrKTHN#IOt;rQWq)qS3-_|)8h88Y7MZL5LsIQj($twS6ZPrTlV%idra=X z@!n4r5SblGclGLGbIPLsQhvUR-nL6oZj(1s;_RL|ymj7v^~KoT_Vzl`UjK*AW2xM0 zvH`}}ZECJ+fr^{d0ISt(!88GWZ?X6ftVB-5IIv=ZWySB;daZ3UAnI-U_SH`TQC9&( zT{+Ze|3BsVpX%xl(vDY8to{+OcgX5@zCUkM?aefo|O!IW?c0S6xbu_66a6xUVGm@&XVW`t`H^mHZKFgHa| zOd_YPE(QZZ1y)Wxg#h%Wp*k#iE^V$x5+})&_;Af0?V`!&G>rblw8Yc#&hl~!Q9aR^WD9$F26UyZ@(Ym z|GrTAPZx-w2dHR)+L7%)!8=MH+*UTMj9EaBg@Ab_+~+{rC;n%iUD_GZyaTgO>0@09 zOZfT|0o0{eB-6y7Mz{b$7uUfUPwh5i+NTFNgT1?oN47UM4sH>L5dg zd#?D;d&gUzD+Ivsk$sBWD;}AN(&cc4Z-)LmqR8$Ld~|ZG!#a4(uLPp>Nx<*<;v-5- z&v*A*6HMKK2xu%92Mbh;HK-{uShkG34a)5HoQXvPZ9E5=E}}tV6-@koiyV#oUm*uG z;CZ9`Zvgaz>>T8g-{O-VW%+hse|(QnV*cHWj}$K-`yye>F;0H`{Xg{w!t#4LO7;I? z9nW%!T^0Hl>ED0-+dcb_T?pDYzN;}%SwY=rg8s^k|Ne4+vX$8~t-bm$ z5~u(2u1(WF0sp2_Ksxig7X01i{KXmb9O{q%**kzclB2R(wI{UsdUf)D_-UK=?1^$y z{MUcrFU~|IZhCA?-yHMV>VM+%{U1wqcXfo{U;9(P|NK*DjAf8!yKfm-{KxOzGF|l- zxBUOOzTZsM{~Zg+OuuyIpIiWcFXkVK&fn7bNBZk;Y5Y@%_HT9iN5<>_4t4ss3jJG! z{;6X4okRP#3jNbN^|uQBTZR7V=J=f${d%YV4k>;gE`7f(f5(J>Bu2m8slQ{wKM=6L zgZe)bqu=rkf5(A;$AN!jdj3F+eoHp}|Hz4fKsvW7XQXGNBF>**^`?F8^#Im!AUyra#7(-d_zhr{~(kXD#=B?QR|kCRnny=tv2b{xSy z!Nzhm&L^uulB++>ubEjhhsjJMboLpp7uI(6o$osPAFapTH}Eafy6bpyhRf#lQ}vm> z?T0Z|&SFU9(htycDNE>;v@`4O@A#?dUhnFs2h`0spOtTz26&=fsjeQ<63PyP7YoEe zWDu2H-cl8M&Uo|seM;N5UwU;{Yi2+1{-*Qo(iPe6V<&4SqK8#=C*b_;FSGRFQ%3L# zO*jeu(k$(AmTiDRKliH~oj-qH5Qxp1te# zo(rCo!wqpPHQ?*2pOr~mBRrTjJOhQ_Rol(`f=&*hBXQm@<-O$S@nOfP zRzPbXqZ(K7`fj22DeC$s=o>??UOm{bH1UCmtb3AbZ*oGRKdrz2Q#VDQeP-)TG<%@! zR**oZq1d${^+cF4#1sCZ^3Ce$++E(dpvs6~if_NMcYmRBZ#2HgSTZ+GuNb=J+;jAN~r-yU0-%M!ea|AwnVVz%wry~~d7&%OWNxyD!B zb0fH`Ar8U$oK}8gz-jA>#;N+z`CRf&!&f+4%Z+H1*b4p30mrJO~-pTN`Z_jp! zxhliEQb0-W%B(mtG~Hn;G>wr4ww(}o&y@d^XJ%iJvAL6QY%lX#o6cKNO~D1-=aJTr zjV=`tcCjUSAhSlyj(!wu_DEYh&0`7t+}&W;9onI&)I<%Uen9cW)ReO8=5(LY#@F$B zK6e{Jgn%Xy1DYi8&Vlb*XQJ`BjoG76zTg_o<+Jx0rjw92t1;4$^~Lv*SABqGD#Fm6 z2-yG{?prNFN6ge+?zrDa^&8%I@)k;=^s96A4-xQ$(acjrI~RC<>;E6l-aH=a_5B}5 zvLq=gQAmo)R`#99lC@IVLXs_O_H8UFDk8FE8HA!NlYJdRh3xBKm?7IR7@4unSbq1M z_xpU#DbDA7zQ2EZ5a#u|ulu^M<#|1?`w}W~Myt_4!H{;DVUO+?vTcn=^c7N$m3D@7 z&a+Bfts5LM*OFT^c(^hb1DfkkTc6#zH~WBUJ3EYiofcad@&A0(ffTLfuxIqYpFDFS zRk6~B)oxfy^u3dq{qXtszTFp94|MT=?J)Iw&~1RO8u#J2ytN#zzMkYpHGHk*LQkn1 ztzCb{hX-Og{3j(GFK3nsKWhA`Lc5Y6k)K>mn#);vEjPXqYMn(afDT`4oz}6!mT`!n zBi2(>QXHZZVq)&Eg&*zP_U}e4FL?1gLfhIxT~X2@BNI=OZ{YXQGBszX2}g&}9vZ&L zKIW;J89{S1Ti3B#3K>yT_buWxOjZ4>BA#H@lV|jPez?+LT5$d5>H^ZHT)a6)n052P zv$K9`Mdc@{c4`#H_j$!`x^*h(CrMlw5|_J2Bn_M3kQ6>9q7VH@p7~kVtV8E7A?A98 z7#y}%7mT$! zU+b1QpEWG>h5B1jE$gyL5ZwKCq*^O0NmK35xrS94JIq}cPk*rCyd&knS+U+M*EcSA z>Crbkc$>U|-oxZDik^o9E}k|o{{QUu4eE<$m3Bt7STfx|tm`1mA=iG!W%Mo4U|#Fr zbZpPac>Jon;pCg3Q{Z!3tXQEOy&6kZ zjqBIAIOsnoUtb5Fd0xH-indm>3`9qKFHm(1@NJ<(V@ShI<)}p7N>XHmHI73Ny?Asw zjnzDPUg4HYdmQVx5d&v^VwbYomhVkI7s1sTj#rY}O6KY|Fej=-#p+Etukx44Wi4W& zu3!!UqSbfKk*O`I%Le2t;lF~n2>9w$X_9@EH0LP5XBBS$Fmi)4tfxS8pJa@y~+bEIbW2+r^v|CD|vM# zP7+nRW|w=XeI7pR^nm97-H}jIzc`lTPD!-Z5uHjI-xn@lXnU-Nl4uM1v!m-erB;q! zXvtZ7%G%>4)x;EcZmN{sD z@9w_EOrcZO>fFKw95#NBD2Z%t$D6A>k{wr4<*PMeoKUFg=*SRT)z^OKe^-fp7<^E&0dQBrG^kXjhWqAVGy;aeD z{oq>BZ4e`#C#}x3Ot&M?wnltejP-OhX&DQKKvmI%uD6Xv7QM%ZDlTXhwPBWhtaqqH zZ&t)7*@;^ba?%EUoZrauWY*r5a?LR{A5lc5uiiGedm5&wgZKq0 zq<`{pi+2!1*xC;qYUyZ+d)yc{+UTQRv6iIH9*i38ZUZyt%mc4i@L4NznZ_#Xe-u@SDfl>PZ7U8BpWT2)EEy!a(#++`D zckkJ@VZ&I{i$%sRzs63~s;l-Tzn<7xrxV*XZnRp~nFq@?Bk4^7mxu1Nlu2YhRbOWw zI+c~GFA(@k3Pcp4ut>~Banme7nf}Is=5}BZx+?QD@ zOR6P&i{+u*qC-M`os&)nIxPlNdoOef9gzKArN4Gtba}geAML`Ar!$PNBWOIJI+yhr z4jgz;&kFj!=2o0eD2{X(BpzAJO^jYF^=_O8=RH~b^bTHCOI3X>>eg_X1SRpxS6WOR zDWKGUb%PnHU1C{){G@8#M(i7~M;DYHWbbO~@I!dnqSZR&GRu+Ya2o`Lsk5_>dY&&z zSpP1u^Z69q8$Ab#6#qrNZyThGJjL=0D8b6rRi9!k>AUQAYE5f36(rv}3c__4dF89U zitv@zu^aR_e}{$@bu|UO4|jBA%A5rSlf>hB6WNxhvpz_8x<(_@=TQjSkCTzpAdg&eI zO~TXqZ_M!3De8OftS<$v=HCvV?s@N_Z+sO$xa??k$DPgXQN-YbsjeMe#S`|RxGODjWP#V4GEg_L%tud+mR$}>mQ z%^eU;@+;-ZtgL3-l$uDfe00Sxe|rkiIt3<;7yC&#qYTwj#K)TM+)r*x!B^&)mUoW^ zuD|Mq@Syx&YBHyzZPdEBoH?nxB~i`3ALGrl1}*MLeE+6XdyD>g1-v(31hZ%RDb|owBE$ zYgTF(^s+8hun4V1Dy7*s3_I&?b}eReV{!w0Q-!HHXQ|=iE*u=?4n9&sTgPn|_PLU>kx2&uaOjgb>j%rWw*>k}&+TDzN1- zNAZ-^)`X4vxgX-o`!SNvukCtgkVL4gM{F-~Woe<>;VZCJLk6CsA0U33hz<^aWE4pE z&^6Gpm-JkNo`iO4gJkTf#gLs2KhXz>D4sbcaVGte%S^XzyN_VZeHTnG2s9_eTN8X% zMhh(y`mVtQ1=|{F9gQUu*!y#lRhuEdMRH1cRO8Bc5Jtr$cwHydAy?|YUii073R1G8 zb{|_TNfJLRn6EaN;$ETcXcy^~N8$z8gDi}jc+Omyg~MVimeVwH#sB`Wa6H9Us{GJ0 z7z^~SIil>nRTkSy^`p##LrJBL99#0hW<+upyme%GvPy#~eJR;)J3|h~7E$lq)MwEM zF3f-oQpZrH_f*pbl*>9uxHNU#^zEeGF`sH=wjsTBRIJh)v77Jom+Bl zx+u$8U6V401SZ8)`!xi`I!3E=<>`<|RRV(nV}8aowgK5wP^Fe z2SkS|q4+#P4?!k%AdS5tIf zdX&bdsBeU(dmD)Tk?&YAGqXy-Z;3)aT{7*>#EB)sXymr|?0UNcL; z!fL+^JlamY9dBb=Eu8!Q~Ns#en9lElt@xk$X?pDSx19i>Nhqw zsfOxyf+mx~K0`ACb3Qt84i?;Kj5rjXbf5FSq+&~p1=j(uRf~U(*>CGB)rARM+UI%f z4UUs3IW2^iDcJ$fjXqgmkI;N=T>4P+ z4|^`wZt;GpxmK2>RHt7vj&0o7SYEb&9pxY*dx*=mii1tAX})|i8E4i1v9!Yp1X1}& z?e%~p`t0OnH7%rs;Jb4A^z3Xzj3e)xou~BfxRFYeP)2!E<5Js)*sBZ=1Izt^cl^q1 zA}m_`HpQ#sv@F*`+7O<#B}p9X5clon$?u6a1o5|zf#jGifat9qLIB;5dm%llOW%nm6OK+9jH0p(fvbWL42*y_92P z)JbfI8sJkl5MFFMBx8yhe@O{mr9SU&#P!(QdCsBSd{jHo&t0}#W>z|YSjo1s-Q;(# zO9^s4y>!vK^`e{Obdr76J2>0BI%k;E>a~;?+o~3_Ule~kvT>THPoyHOKo8^A$xwZO zU~!k)_q%WndJTxG{uYSs{#Ns%%jcRc8LyoyEIEppMdPN)EPI4516xh*nc~9R3Q9OL zD4poSa9M0{_S)*xFWLT5CS}R5JhC#zaSe|*Us1pZ&=q#c+Q}AbIZ-!_OKk5qju@Xa z4l*nEQ!ix-E%fmFglQ&%xnEKh!y|7>Cjrd!2;monEg-aIC1xuN!S8uDV+g4ixr=3r zMkm*7z6aOr<3`r--)w*8rO2kx6*}axm0*2z9oA;M6#XJn8%K>NZ^aI(&nPCYl>Y(Z zZ3r>E_R3i4Pp&uF=8;c`A#pJ=`q~}vuF%mCO5(BUE6+F5ggm|2oEp{y+dbSgmn`UQ z;9kDIJNh6pS*XdKeC6|btY0#_V%!h?jf<7A{jfVPI^}`#2Jzm${lA9z1M7%u$c!+JJnfY6d--F)Wvi@)x|Iz9AF_L)O1eY5xl+n=bH9_@6CDm1AV z0c6?}xm=K$6ves1;(ScD&Z+TuV%@#MQ(}5X-r+Q5erY4BRmt~C^xx~I@g+;z3?1o% z)y>_vDU}_yP|FKMJn1R5MZ?%2wbg<>5s|TV>#fdP$?dj8)apv>s*zjWrOcA#8v}u- z1V@M7-pD*CP0H(i7fN51P=e7QglnG&*m%J;uR?>`(r?os=&;)~cZrh1{DT!|UaJ=Q zu0%)Up^%4tdE6k@1^{l0h~+9(wakfhL|1L?qt9&hsG)Kb1j{>f+-s0M9pCxcrniM? zFf6$=PORTYY(eV3Pww7W4U15Mt$jXQ+-_HH6D8D>Yr~rDBRl$_0GXn9JXG#d`pgi_ z%WKv@Ls1SlzZvcG`TlF4u8hI^M4!c>ee~U|466Ho!PL{|=%^d1iENNM>J?YlLZau~jDuzRGJG+pRR$_kp9;>CuNwo=(T1DwRVVGG}Fy z{ENRaHm;hdQ4CotPqQC|In~~Ec4)xc@m81(XPA%SM(sD-^rAP+C)z@%<9^!RNNyKp zI`zP6A$BtML5ss+`c+s0y3!stk1AG=dHp);M&m*JtE*Lh0t>CH9Mx8J(oJIcE)^Eb zBlbhq#MjW*mIZ;9vM&NFw%q#yXXNY=AFVpXdG3l=JCW0wq2=4F!OtyJbXgcBj!q`u zSkh_f>`dUOUTyv;jvXBfaCzWw5*_$kM)EO*d?qNWbh!jNTZWwF38l}7v={z8`@Wt^ z#h%gG-d-G*XGtT2XmhEEo@vbI4-a5Y7iPFUe0A)^{?x!JjtdtBUi}wqVL-l6sJQ$S zYUgR9rUt2Wne6(sV)*>m2GerdKB@_3^L2wMXUR)|1Z) zEa{!wKih}cGhkLUa9v&9L$jJbPcZadVV{brf?piz%C_gN#YfAp|2J4`@O;o3zpy;s z!1CO$iP`6$!iO)&1&)1=HsJoK1o!o5qdO!X6?m}Y@b9+e7gYtegW34B z*AC@SwPe%k4Roq$vGvLU`fiqVJq+?m$8GUo5TBgAdFyP;g%2XON?dkB;70%V?<)?H z3ppDM$CGc&h0j-M3A1y1VEa7FMSqJeu6t0Qc{;;1!`fV6>L*Ce(`f)D1^=6~NEe~= zR?p~c&f)9V)-9`1&)`!sXInfz4|fE`AIS3m zuE(KDSxYtB(bv=+y2=y9JKA#CPo(^|nY*>TF7LvuF9IZY!49sS#PrS*Qbi ztkGQqXfA}boCR*_=(&{XM3uJRRAmow{t6C69+)4A6QpJ0j3p4nKE&PWja1+|_0-J1 zN%X2-&~}cZ+aBv|IN-gHfLf~Y^hPW?v~&u;0-(_XmXT%kEat@S7yM1YV@IT1#?rTH zMa9Lx2B1GArvO9*NOo;LK)aY46ZQNh^f$Ey-v%$UEQQXj3n*bgeQXFm(8Zx@y=6Q@8wS5J&lSs@ zM{=kA*M~m4n7%J`0c4Cf_4Ts?;O-~W5ggwi9tDrPAxwL6kLSHO=Aw)rX0R~}!OOsU zNBvyeN;KFdJ6`q0#=ck(P*?v~mDN1|3KZ;(jH^q3d39@#KaHfN z=6S`EkY4yENahvPhv9SL`2+jY=r_~dA zPc1}7LJXGKX>V97p!Swei3@G7ZokKA9HD-}&}IK7U@aB0>P?NA_@=9}`?r2ow(A{K zxQ~kGw?{ zd@sl>0h;#sF%r)e~cP~MdUiFr|Vt277Zkijw;S9 z5Z1XSVqSGT?mgw|Ub)}1x}Y^ZiH;gwaWR}WeOD72dVIGM{P!U$<24Z4kk3LcZ zE2v8L>H~)62`^89H8t^>n8O|$cv1H=eY-tViVvR@SxN>YE91?(U+vdtA6BFU4!1b# z-0qD_zj=VXh8pr3JU)(Z02ip14vRHN(B=764!^zo>7XEk&%k2bnDIF3%VCvz1OwM> z+v87~fMrZV==b^97^&camzZGa{IG}0{(S}4_wUcxkBL)hBoHT#;%bV82JB)s8cNR( zeDB-KE1u_~#HBg7ql^Y9;A=L{s{zQj)9oUy*_NkvrO3OM|EyT)k`j!kQ&!RUJa8&B z+0wgyMd@@jb;_rjUOp;9B(?tTAOEWPQk?rW+0zX|EIFP6kwi7gK66w4^*4Z%(G*k) zs=aBhAhXA@`&G>6j)L>4eCgv}f_<%&MFB4m77#dUV|@Mk=^<&K)xKoOR=HFU-IMzbiaQYryI+FP=K~>!ByhQ-cm@H2ots%)g)C ze|~4BGY1Z*M+MxDiHUJ|n*O@j;1Q^@?J6St{gucsa{4lOf4*-X>8!h@p!AfEI%K?R ze)A}ZKzyLbj~>p{GbvVBCk@uSS(YML}J72W!Ng4ZB%1q)!w+c9Zr z!tKeD2?eIrnhzg-%+Eot6wmvoZuu1-%?;Y#xvHtDnb-eek1hGHz9>DVKb#J`kK;?y}VU^pg01lN;mAj=#_t*j5bP%EsD?W_Fp zg8!_nEYip-`{hZ=ct>Pei~QOMkDmNW>@Ap?>Gpb);`cX~E&W#v&zs%r&QNs)qf43r zJ9IK5Tqq`tE5K039dms%vhR1v{dEx1ACq<9uw2+4I>3D8^ofIBYH5fIJYC_kXFR%E zDm|YcO-I*4ss)USzRV_pC#mU5jZ=F`OSz4Grc9J{POYvA*dq31>v4HWN82Qc*_|;m zGIC-?fSEEP_dZjgR~b)AJWJ(2=NatQA+7#i7dp4-(kfWqJQ1b1DD->U`nO}C zlmRwFuY_OucRYd|VY(irqiM`_4GI{@GZJwck?OXy`#x45Vl_Wln9;i|z)mwTAu|Oo zIgJP7Ts|8eOa#~WR=d{kXRW}hN1U)w>0Nw5oyx6bF2Snaxa`X%Pns`2!qf$ZjQK2- zBC^cu0*+wMHiT|&`p*vJC+8U!o^kNv^k2F{>iXIluqujPo$bvI55-f0LmV9Bz85d|C}Hz!#{4Hh8Q?}&plN!OuL=u^+<5?qwbUMwCx-B z$rfkq>`amFH4;ep5W0h_#wn~B0!k&HUaA1AnfF_hoe{UrIB-id+EjOv4Y;T}8Mo=S zXnd!lc~Nn(<-^VKFs^tollnG*k1Ri$qvDPWn+d@E)>b)I!9})3JUuWoxN(xCv(W79 z>`j=Ne7Ww#x0hDE?{#95lI99>KZ+75luIA>kMttYf+$+|5k_m?^`#ECPI*3rsTZ{T z!2d$OI~xlQ&6BEA?t3s>&YOevtHWQ%#LO#qP1?R3yeoga^v}BhrhoM7I;hJUhf_eR zGf%X|Y1~_#A37IcsoWYbI7PzMC4h299L8|!wok(odIg zv**rzt&Offnd3Eg>+9@vd-6Ng@Z*ag)ET-!R`Y#x15d8C#(;Tj@{{%Z%a8nT+jN+0 zBVD;MG<$5*5vux66+*I#>@WKMvQziTcB;EyMA5qDG4AKX>?}U*pi~OTH5^-ho!^?% zY-g^1`tuMy2UnvhJSx~Pzemvh-86_|>&CM8 ze)L3tM|bL12?I__UygjgjpZrNv$)|>d##xc=z?C!jppLw;;E(a#?`ZP1Nr>G$QI+Z z5`^M_IG;yuLcsL-Ye0RJbqQElUA%MchFZx>r%_OeOO=183zg~lc$?pR%&$F3yr*C^ znqq5vi*!yH*WAcCd~b`mvT(WPC6|nQb7%w9!&eBl{owd+a`uAXnh z?Bm5EU

O@h#w`-ud9C2eQBwxn$M6#gOQW>}YO240k46i35CZLo~bi!)9QN-dQ$9 za+Bsfu2*|t2d)_#CzUvj8tclh0F%wQy|Y!f!v}_3y}_Memjl0Wh% z_8#CK?57TE1zq=Qd)2q(Ga-HUBRMyCoJ(JAXsm?~+uE9kY$hfqN({ojbF=zotyjWX!Bc(H(pIfATk z(6USl`mXdFG%ngV zpJz=>FLma!xk8%Ew^s`k7O9_qg%^42Y{M!$3Y$MW_1++^eqbjdIXW7QsR>oS~_V#6-@i2lu^48AC$X#<2 zpr$Xq9kRK=V9@+gK5x0wXSd9JXWup}!3$3$eY*g}neEIaN~|ls z@PG&;d^}N-bA3*BVb12;BZi^wyd}=24X6dD)CCcy%TG$O5w6p1iQ`EY9RFZbuD_#3 z?YLNXzcFb#K1(pvk{gy9uuSL==vYf7a^wWCzC}NuTjL>edKNu{+Ot(EAMgiZ^Gt~W5T=8LT`RYe(zW#B%)sgfaeA><_X6={m0Zj6Q>&xoP3NJL7X#)#WRIANe?k{oX`ut^ zEoYa3YnxfrA8GsYoJD20_tH+`anS~!Xqx24mDXxqu5as`_)e8WEH&n0w>X7PEjVha z5K#Bkt_iQM>jo)!*SJvnZt+w~RxHSSmiX||al$AdpJ3ezTKonD0tXZ{m_NXL1;tWu zM;X`EKVnUBTX**>x{U)0>5h0gI>1lcafgVgD>K)Xg1Ek~pLkQweH5AMBl@IR_Mft* zbj<@&<7<|T-3gvMS5<{ODIhUt-Y6FB$;4y6dU}0^-PAKcd#t+y+eeK(C(GBlS_N`# z>RE_?b#!d(gOh}$CnhuU4=WbN2Yl8pq+-Y zFplrblPB-MWxW=r4b0cR)@Hl5xZK1QC}0Eih&SaFdGM?au%AYcJAR{%u|n1O)L!JR5K1uf{ZK6w z6HcEPPOsbw0<*8wEvlWpCfbIkfac8yqFPk{eP!ey(P@Uz|F?dtc83t_^_!^ zOWWdT^*g;Z1#U=0Unsp(Ys@(<|26fcfafL7E^s!uIS4(h3}|5{p9+MMDd2KDn|Nlq zR5RMr)p+n?cLPXBw6DQ)AG)i?}`;F1|-aaQ%ABV9;I>7)G1YC)Hf; z6JX$>Gimac7zojQq(GR8M2yMj#jO-S-~_D~VJ^r~8hKHoU*kp!79twB?cX`DRLleh z5R3p*;5m_Bj=C77K+G}erp8Qxfh;ep^F((xXB4NVlbu)d!^_g^stI0i%BiC!MvvQ} zY8bB&*l+GWg?}W4!5;;d>idi`6pcGLE)Q-GKE1y!J+Sq{PJ$bd(TYY_GylqQyK(C& zA39{w8`2zygk-Vd$p9wOeYUVi5+}{z-T1@T7E~bsNc3adLI5*)mv@HA3I!BPdZpEs z{a}~<3B$lM#~2a5`^vxxVDDJ$dKK!PQ( zgIC;{;1oG=-BZH0PV9+47!#xAQ{ zRP&R|itT!NyTiB%ay3j!7?oq}!~t>;ZwKP{!Rx*Nm#Oy`y;wJ@w{zK<*M&>&GR(d7 zipLBk4IH+z4x;*^z2Fj_$?aEF9mfaI*bpc&Ou+;aEpAf|_y0)CZe#t`$Z?6U+0I(v4qvANDbGY5mZ^|3c619q=Fj=Ke)V1uAZV8^?i1?RCMrGi zF?npeQ@gY6=jTivLINLBn&RGbtwV@eF0!~j;;Jx6o{r;bs5j6%TjppL#yOUn_Zfj{WT0? z3cG@CYtuGgM+2ZvrhbXUYVL2Lu8wdVS38PWsRO&gjnQKwpi3(Y<@@VU5L4qRRC)HP~U=YzMl8^}vEJ?K@3 zX=Gu4rK_Xk;Ta8P4r`-)v4UPk>UwLnkSmh5&rG;#UB2T2)@0GPq$=Q`?hVR#o3XDB zyeccFR~*c7`evqu>{?Jp^;D?JG%dR+Cg`Qn)mb3V1c(HZFE#fTEFSE}x< zjf^Z9f0&Iy9t3fP;7__P>?J7{!hJHTZ?^rg_4 zzt2(lag*JZ0pLrd0`^BXQNYdee>D93>ZoW|K;AzpLR0IpDY@OwCzzS;aJu4)sV72k z-thsBQJ%tmgau@IIggAV2@6F86-Tx$#@bT)iYLnZmD#>A2`K}qr-vtAM^EnO)eR7G zjC#~7l{h^U6R8Rfb_Ns0ywGNF-!qv8i$=~otuMBSFguGlXFrT^ZRC~5`rj}Bh+ zEp=r?G!DuP4oYz#Nc!e6>m9Z17!WL!7h`lwaSX==ToOQFdaQE!JOAjEizx9`kb=P2 zQTYKdl5ASwn;hfn4X&GS#O!VkZi&xtgB+(4Nw5eZ15`q^xP*H_!$a4&Mk>I#5vZzP zs&6fdIf?>amK_{8Q#A*Fn^|rEGaHq^b*T+Txf01CTZL>DQR12{p5z>90~w+UfsxKP zaBk)d0354_poq?}HSTBLpzYR6-PN_o?-aDVvoxSKL^WiQxJu*PEqO_b>HZh?^ffb4 zM=Tfx^jgTY$~)PL^N(m?_dhv3RME=I(F0sjhEr_m2t?rC!KKFM2#vdwTNlosKQF>P zkV3i+eCo zbgs&)*cnBg2w6xP%XjuOGfQ=@9zYbij4OM==`)a|5x+ycgHBjz;P!@-{AfX1@V$*0 z#T}!g^I_$JxEeNbRC%?};d(?siJLf2K55Pu`BK7JZFG%8bmTGpAZd6Tq?0ELj0>@k zArS+*OETtQwx|=S5G4$QuO`9JzMS%wYgY^VsR z_PuIH{|U`b1g|Nk7>P{}6t1ezgtRuv#^B-E0kj{{`)0i|h@&|H>L1Z@b6)RJxa%I< zhT%KNCQHYsdt_~t)IWF+Z#L@PKG5a2FCOJ1T;K&M*)UxFIdCC-i6+wH zoXFtNAaVqD6oB6y=@I&M*8TsCcUnd9^QMF*-Dt+gXK4T)Q0fF5Y zIg2cJepLNr)JZLWtrRHCn@=n5Q-g(;5Dyl4B^ad>ZY$l)H!WH6)Ov>sB8(x?05ID_ zFusCt#GRQHIYMpV`goX1S;B4b(Lr22O!(FCPitWTSB}i%L6A67Ro@Af%)#9e5v9h! z+5b_<(fe!Og3d36WzMF~3mQHH#W%L+;(G!`gkt~^2B@{$SaJ4F+WUMZ8;V8C1 zuA=g|5rDcD-OE$0ql;L&5wnCIYS0KUd(PZsi?rqy#gg*;oYD>$9KSxCEMleEeJ;I?HlWGt9=#6zZM8nuUcQ!g}8v~bH_-Yr7!Gx#93q8?g&?hrFgO)pi@IQ)TH-M9|;w$YD;kws>y zJYDuP1jl_S`@>FVxYsV3oq+lr&3TiqDi%~)fgwGgRX3bUgN6R#R_0=F2bt+SHZ%AC z`Lvf2eT(Z3-M|W}i5dgBEAVT9?yv0O(UGN4j7ROqqf~_3Q<`q6fBodouar&%e>|1W zH9PgXw@=UMPTF zIT%?`z_WGyHL;{pc4&EtjCAz#=tG%cbq#z};J=yYFCDQ0_kQvv3pR4Mf zqBk!H4Wdfz4a`l()#eaoToA$ybNQ_lKdnbikLEYW%DiI>4ghth+OiVoKVa84?~J;RjKRT}6ZYwjj*dfip1>Iwbb^A^ z$ihQR>3~GT)2tAletS3o5-JwX-VfRa$dxCAz({jWaD368amue11n&lGCpt>c11YWt zJy*c|5PZ0}C5O2j!qcVr^9xI@f*qX#S(coqj+o=(fZ*4Ykd_5l$8JIhy5OWZ_JJboj2-qRp7*)^5$?Awx$h$Se7iPOBIMSI| zZy~w|J^Gg8_zjM~WE1_y>e-JM0uV>%+FUN|ke&X%HW}lZ$n#us>P<3W+mQ^BMGJjg z;tyk;JhfcDIAxAg) zih9-<#9@LL8ZslX5vI7wofXC2f$&q^H^C&gq}1%`*lR-8;mkJKwGf)#a%c47f@>k3 zcsi`OPP9M0BC@Q&nUzlA)p*|pV1;EhYs<2k+%fPBCK+uoRq1R_fd?RxfDe}h*~&+B zv5j6gfOf(qoJKIIevj%`U6_tww!|Ss_mf$Oa-}ELv@58e@n9SJ&i%_aTn7jG?81`` zOS!$+2&b=5KMpMDwwh^rf?cT5)lo7ia$9zQfik}KD^yeYNtMupM_48!v@&_G3W8x^ z5n3jN66B)cWlFd5D*DuLdco?}`oxykeBo?%Bc_fGKbApgS>2i}=^TGt(72+tu%E{at;XW&b^z*mUdi>86|~gH_I^j~ILf z51VVdm}uuHIE;>IvYcU+cM73l9MK_%Ds|gc5uEbFqc{LH#DdA4Urh(tR%*#rB@bSI z!5vVH94MMZAjUQP(8bxE*CIspgjf z3`zpQ8vwTMbDn>c`?bd)dzlPM@uuShzKMxb`c`f(w~kg-d*R|EA`XH&^TgH`vz*xy z{vD|n{G=97U&eY@!U9qWCRsvqEu|v?{G&qKji%S__A3Gl>^SpJkS9nJAK>E`?!5gA zm%65@>6$GAQ$#V}U3ChVajx}$x0jDO`-+06iwk~&cZt)J;DV=xy^4O5ncsV)xio)S zsJ?s1V{+AY3hIUW2{owb9td%y=_HX0i#&0%SdU2?=jR?*AK?_pC488?tV06lUjKN1t#4I&COqt<6{8p+{)6LH{^@#JSzmasTFD19FB}H1Tdlacq@wMJ*_OE<@ac(Z>(oLYKKvoD3iV_Q zf&^AEH?H=|>6<5IMC$bKbFh)~`N+Z4HK}Qf_%mBO@9#Zdk%~~_inaX57TMG1uLJLo zeP(rZkE}hI8eGE9wRKdKSe3sSWR~3PQdC?A z60D7ljoQZP>1pDe)XK_cV_aTd-Wf(m*_D(JG+yT33KD^J&1zzT?28%lJ7=a}Lk5$_o@r-Mv-*iBN zV0MsoY`}vI$4C1x1e>5`S+HBoHQNBA^yQ2U*9B03F7EBTzd#cb7=vhy z2hl%4{e)%UDh$qPN%nY_vAjw!+JUgEkC~Ee>qu)ja{@pok2`|uua_KN?SOfp#}%&Q z^lKXkPXedAW?ntW?b8!(dxIEFs4|myf5Tb$LVM5K-EZFhg}cf*-NSa_`h59 z!OI7`_&(%pgoyuNjO44Ee>0K^5XlsS%YfLz!&*N2!C)vGz?Lx}%DDpw1mt4seX6hA zuzZ1VH?b=oMpf<~Ag0#j3a>?DsMVu4mQHS>g=Tp!44<>(eK`6DB?L{z*+ISzs+8FppsTMBQgf1tQ_dF0wvf5K zZQJ|)B^lT4>FKHIXZA(O9X!qZM2P=R-lwfo(ym!|-P*;AY&tppbgORSZfrS_ji#5- zH0ANjJl&*$S&!bbHVlc3_W`O#6zH2E&V6yaGTNTZ@BpBbpdpCSuSUjWE_VGe?}#%_ zlI1GvC|WM|jBg#5-!ZBb1F=9I=AP!JA%j70Xm#i1UGo+Sx&EOl212WIM+R%Gs`Jgt zb(6OIygCm3X; z6IwP@jItJV9%Ow$N(79h`tXhmJV8p@R6w)h zfd542%eqc#bVpBt*31i_xLyPyd2Rs5`2cV`mH~8HxgzGUL*`ehsfAWYvh87j{uHam zDL~eE?k0$#-@RAY7eVyf3QgH}CP?Yr5yPSf&r|%DTh7hpht8M-$gTU6{9ecAPqr1a zeh>xZz8S0U9&vSSIGNCAF?|~B*R}l$L<|ob*dst2$`}+q0YW=DA|?~4V)%XR&Hs&Y zeNh)Y;_HyxgWU^EZy?KU{apG!(C{!u6?TXt1|*4B$V8r816Vv@M+|YU%@65=+5@j? zwPzeSSz5q1F>h*PICo3p&uya10sFMLZ;!_v04XZ0fi+Ek4@&u;iba-r`T@ZGgFR-2 z>Z|=JsK0(p-i!YO*)!(=*q7Xf+ta7^2(#(H&v-t6V*hi4|A|c%sRbU^G-WlF_1E+N z{3>`5(Bzt?uL|y7=&vs=r9l1B{SQi6h9am93w8IO60?0Z0^-?o!(re5OhEt2tNvg< z>V3&oPW$sw|MZ&adDI_4w7(Azpv2<(jh>7{oN|VAy$N@sw{|T${S0}pvlN>45a}<^ z30^*)a}$&k#KHZ5KZV${(xlz2Ym(o|?_Oql8~Dfs>C3eLB)&gC7o7ea&|bx@68Z&9 z5Y<39k@<9#kUfYZ*AvOy!}qzW=DQZ~t5B8FOQ>$2Q8K^7(@Fc2%yNFYSFhf!<6!^d zrZl^M?VI}TqsZSr3sZ^%%u&GaYWz4TAtndo_Wk4alpLtSn7HCgL9aVtfRPD5pjDZ> zrKP2CLHn1Te?~s76_KIaru{qD=U7$T6Tcg?coW2 z-5h4Ev5G@o30~f2!h)2mFeD<&4jDXp!uM_tg#~Z^OPk58PzC|NVx0^GlLy0*K-vW%)({^@tP4z~u2k5@& zr2{5dZTA3N(4drJYd`DV)3{nvLBeOrok7c}7Bck}Yxwsy2NxZo+ZI#-%@TkbO+WASU|%WtsK0DLmK`L&*ck2~%3pQq;!i`WKo(N>oRyv#Wz zbL)ArJ($G5`r%_|T}EgyWKt)aq|@D71G&a}g54Oro7bk%^qVx>bL;QiN~+@QOhEBM zZn>Ff8RTZBMMXuLeJc#YHP<--Kx79}vgfP0-{WvqSRtcIS1Y<;g^25kYEuJ8zIkN* z^p2~AkiO5P1#-fszr#7e3R-9no;Z!4=~S#bbLkIO#WpB4(hS^b`vFLNLM(i`2c;{_ zIy@@i%fvUdF3K)C`gIC*U{>O{_x}HCGN$DDOB$8t56}L)8)Pv2Oc(D$R{EC#6Y35-4HaQqOBFQgHG%RvKfo|PcOMI`{Ggxx z9Y7`=h9;N8LFM`njs`ca^GHRZ1F}qT*P^H~{K#XOb$`D3Z(8*~-(F`Ao{&&V18pY# zQ*CNqElPM6`{!1K4GqUqXSA79^@0RBljXU6ZEz(mfcW9Ad$+1Z<49A_ESZ(zI`LAH zv$f#1bsD%`SHsxYco2V1sgsUi;T)k^;kvWECE~qwZv(7J**EaN>}bxS+Kf9XKGTUJ zUW+5I8bJ!kZL$A+3ii`*pcD%^M6z?*&v3k%OkN_BQJl>=ZW94{3fx|M3+;Pc zc|T}hGui-UUCyfdJZTgafeg3+6ZKRc;lhtBwdVsUymN(4+8-zx~c^vFBYl=pDEUDk690%6^P0m3+=wGyH>)9-%dIjRsdcmxdsP_yjL{fWR5E+039weGq9~N-q)nteQ%rWBEwmQ;{iN7 zII#*iIA%Cts$}|=-?xI>dH_hJTD!Gs((Q))WD!#GuwRpGEZCcCCu6k8Wm<01*P{E_ zfqbB`Y0V>*ZdS8B?=o6tJ?B4FKSb-w%%1JGmj_gzUh59Q{mSK5 z`t+AUNjNk=*ZI=*ea_us%pHn5oAjq~nS-t0RkD%$IDLeBfsUBJs+ zrDSp@y-N%PCus^We4Y({0o7sED3jp~CMS#sV?3alx030k1s(0}ai9v;3ebJ4mAU?9 zkh=`}DdE!eb)~({jT5&$*dzHVb&6TK7Co@>G~{04Ve5UB?7a+Ta6}D9`3H+dAQr+Qpfpa2ldg zLxOq0h1@&&%#hsB0N#~sV4_!RwXa=!^pW(QjDv!tVyH7KpbS?z!-@99DOYrUMD-Gf zGX7@!uN#pg!gzjF#eX|Yr#Y#bOu-osvQ>>zz`giVz7YMj|lLIEs zjPzC5eU$8Tf;v6`DRX4q13jeCR;>b)q{oJg{c8Y9iAwd?~ zn;RCn;BH)XnR0-A4TiY@?YC}R12r<$K~HELpthGX+1Nf)9LR8vTL+JK3Ixvn#i$N^ zWt*NgfN3#r+*7_>LzT;WmdtN`k&c6vx4F_`Tlu_jBKO-SvC)pZh+% zd!Orly|3%_damn-!fb_(Lw)NS5QDYIc78HQKytHLaKMlh+o9NZs!^J=u?XBv38sfL z$kW%__-^MuGtuvH_K}LGF~Bgnavr$1vM{n&X0G$4ZkEB-YddwT>e#4IrY?k{i7&-7 z8GeP~K5K#|n}e?NSJU=cdPM~WXw2cqLB>)J&?mwqKIFJf7K6~Eq%Quu$8s(YaV-Ei z;Pa4D)#0C?5l6iwYEWLrEXl353bheudTNhMcXN@Ul3v$O-b9-bXVgGCpD= zeW15~+b+d@sUe!NWn43gta zqMTj%j2Ax$heCi@b!AkZ_LZ?Jo;un(bm@CA^d!<()Y&CW9{X;Le^_Aut|Z<|4Hwe3 z3_)L6?Q?y8y0rX?l?NJ~+P#KRHvi~v=~}zc;HV^=SDe;UE_2js6@AnKtu|Z6cE7{@ znC$9geBj;pkM`~@iQlSC-zj9$a?l|1g~cP5N0fTqjlT>@pQSrHTRdcCpSX61TbF5W zz@AcE&lBkaa)T(*%Xc)`w94meib>dK&PVveYIY;vlR`dBV_Q zZhSl6;sk`QFA-A2m7p4(%S{O8^_o7QFgPmw&od z&pM#g%Fb!?tUdaBpuSsOeyBFT{sE2e+KYgT5_~iA-&Qkf;%_^pk~e!w7&A|#cu_WG zs<$8R+?jPy_Rq^#;OK6l>lh6>%GtfFv5cB+O}6`*OiR_q?v4M&zs38 z2?LsGho8}2GqpYHb4o4Ue2C2N24JIgv{S`IlXbv{8Ql3|*|x3;58Z3C$m>uWX$)T1 z#M{q<9e@XCtPaF3-cl~P>J55Q0)&Q}+Q;*n-i(Exl*9>5vPq0SVd@am1@*_%UZCOl_}6~a$s^Gg~C9c z(l}~%#d8x{^;L1VXiTSO#bLy9afXXaaX}ML@byFg*$rw&N-e?+icGzES?RYex??nd z*>xh)NeEH&x`2UVopt*In#)I~M(Dl^S@`i;O-OXN)ua{WB-D3Rt+c$UEvuT5q9q*$j8g7Zr-34mG@(KQi z-Rf9KIOwQQCZo?~xJ(h*$7T+p`Fagu*o%>&Gv^4HKIJ{rBWu3PY4W^{SjGKhyoG)X zlH_e;J1OZg5*QNn#kuNfhEeM`MuV=RVVROJexad5j%AUp_H~>_y`SahY95RL*l}Zi zu=djWgP;@veJ^^UNA!Ltozu_Q1oK$lN9%Wk`zw6kt@DQ_1znpWM2sRaooCI{vV2}k zc$7^6DQW?!G;1dO0Fv)>uQhmgf2+~(JJ07c6cB)uyuOaS20>bg;`!T_#glP<4T#dW z4G^dWgG3GaK@gx6_5hPZ+o=W15+#J1*2sBZE;qt$wWsOz!kcM{29?Hej#%76Ikv1z zsQ;Y%x^oUR8znCvd<75b5K>%KOejg4osdM80F$?UqPEYv+|se%wnoEz?k)P2LiZpa ztG11&;;?9Pk`uN|8KpWoMsF#R@d^afE-UY-8TS*PSl2_9v~Dj+iyB}d;Kv*<4ju67Cg2OZ5X6V+iQHxqMWik?#+tMEFeZBfpjAAh`Gs&go1hfQPZpOQG6QunI|GfF`YD!rAQl-*}lvRdx5 zCyJFhmjShpRHR0=1h09X2&lp7!Zc5v$yn5x!1IB`36C^9gQz{-E{;hD^Qn3id8WF9 z;Xw%ILLbU%k6GyWOrw~g(4?1_v`Qik%KE7g5QEz z^9N^>XBx`wi;{Xaa$y$@hkiiN(OtO_sGRGUn!gb8nQys){;2;tbKu)uErD!;bsWGs zKkgI#@=U5hicy@Y=xJpMZFfzOj$;o`cFppUfb*5}wW3z|g(aYyUxN^%)}wZKz-dSj zc9BI_S4NbvM6@fRX#5FpMkcqHzS75H4aTKuLvn80ExKttnI$?Zs0p2pjrpevQzSON z2D-E@9pcD6t#MHQayoS3`g{8BlFg}>?VXdBxozHg;JzAMve2CB+5^TQ&WyUv2nUOL z`j(oxaG~yWE#b`zPc5Dbe}&G4-yDa}mmmkJqt61Htbg>0_#lI&W)mcc+Bu7(AbLglVDi`_ntkk7lx2Y`*j(dXY{@>GMOd6(9ZD zwd-?ADshWjmhLX+g^C^i=%&o8sAtHguzaT?6YN^!0b+#Xp;pKKvw><2Ji zJ&mlp^CV~kQ!gxc-*G|FJ!C~pV~9zx>ZMonnxPlqgW@Y3T)TGIg^5bybd3~n>xw)l zhpj`7NLusk{}<(K&yiel8St5~v>9aB)zY=7`L66Ydqm55DhzWB_ZhwW2hQ8*&9=^T zW1ZTJUmi1<@HH;YTH)e1B3M_wT)epHkiIIWX&3=2<`cysOuI=3LyfOUfqPkNM~)qSC+qx!kV?3y?ju6RXsrS;AxfA z;dihKP-3v3P+ad-*l!HN4$uIB3tac|k6b4!osvWZ>|w6+Ab$<&Q4q&DfzX>{R-EP1R zTxQqb+|lyiLNN@{3Eq$OSZbAN1QFGIfqEI@a5v~fHt8@xcRn;pEWD%k4{*}9buH;S zcat*)>4O>zZATA>vpS`l>r*W~zkB0~<=(M}e`f*wQ+X!wdoB#UUh3J1-LWPr);-Jc z&)W05hjqFw)|&$ zEf23OIc1tjL|zgfvj1}(RQ%>p(Bu~OY}8Z91)q8xp$j7n z+upMkswYjOL+j(h?ylSE+3v29=DB->ZaP#gE7F~~7rpeN3jC8dLr&{FP~FwCgX!sa z3v$aWY;*Ab;hy$q%Lt{ zHU(hXp6kr?se5DOrwvP9d~gR-w;}$Gg7wxlNDq0vxf;FsIDrEqL|~`1H<%E{FfM_D z?=0A^Y%N@Y6S#vV5$2l9Azq~ce9zn^Y0DI-=mH{jX9r_-^C#Prl`}7HUAzBa?|0uY z4B`&la=5A3e&{3r-jKeZJ~xjmsDpGZJ+qSb@jXthd8;*xZ&yB_BQzLGa2gzS!@d?f zk@FRVcmwNG@*Z_*Wv5OmZCNQ@Z1PK=-XXE#`C8CC*mR)Sxp_S+=sbV+&X&|-f8oLI zd+MWHz(vL9^l!Ku-jrPs;9rx90|lFyN0}QnxvuqH;tM z$tMVhE?&gU-cY{nA|JZ6FpW6FCy6-9Ue6T_UF^J!3Yp}2S@@}a>)Oj(qP2)a(6;d` zxN8Mr=Dd_B*O*@eNxg6iDuanke6k?7A0pEJd>!!bSC(v}d$ljddQC0+w~1tu2emu} z`!cN5ZkzJt!1%8na1c}B5|6mf=y9v>1#{`

Vkpo1*$T5V0D8c;j)Z?#ig)mc3Rjb%x%3#oqq(jcqOX4b(hwoS#LU*_4)z-DG)Z0Tr63LR= zcuRpShri!Z;!zX8Y01n(593v?S3%VV%Z~;%aXbS(FeyaXqvyCOY~ddS z5y_+_3u-k*v-hL)@9@tpo@q4GpbT3my}Wci;eVat3zph&r-=yN(4P9J#&ET%9eMVk zmQKnIOG(q6<=N(~-?0HcA|YFHw8ut~2yABlmup~?%hd!fj4rqtdJ&jn?8F#O)pm5= zAv3aKz06Hz##vB1`Gt+iE8D=!_YDhCB`^l=BK&?B4Y+8I_TWAT{c!H8e#j~ zFD$k=MP!LlH@RZX$`b_Q_-dco@N<$lR)_o^qwqJ0VW2dj>AIR_uGrOzH1=vbn2rac zm|M%!%$sq~u|?|EBrDSsK*75DEgOUDE)l2#LdMzeXPe)?mLW{PLDizrd23&*`x{V* zIH~J%{H_k!V{(?te!}f@L7o-WQg67y)`fs{Ct)`sm&3$Mb=j;~;y9&6it#lNDiXxy zaO@?TR%SLnwOnq*5`2W!l|t-ltk}WLu~g>Xt|+s*m1HwNQGK}tAqPdqUMU|g7`b&1 z+!GBz+DzZ7WAEB{${~{dWw6VV2Uq&-Ne@iM*_sFX?R0ywo|57PD9!9QdM-D+|F^56 z&sZb9Fm166Gx5f?z0~@8;Ety2*_kp0!?5KQ{MJL*IuG=s>LeY9F zw#Q+nv++i_5MS_hl+vX46Ppu`I-K?mXQgAm*|f?XS8SWJgT}s*+(CU)J!x#AicuIL zTXfR&l4k6I5p`+vo9lzc#l)RdGBlPRKRCg7es~MN<7{{76u}p?d9NkbM;HlzdPL`1 z+g?R&ahM+zw|3+q`9VA1g)o7*i)4L2zWg(YwUH7kT)OhvuMq&=Z;3_R4!AzE)b#%A zPrqxPT0R57Ur;C|?e)6}9z*3>yt{?B6f!{GT?rna^mZrD6$k^Nb#1r7F`?qeM()s? z8v)HJfEXNF`A+7R0W`Ko&IN7eYY1`)1xs2VW2&!ur|mA%C-n7$o#~rEO$*V6)OdH~ z&BCra9L>rp>R_WztYPlNPp70d0Kzop+eM7?(n6#dIaLf|g)hH-05`)8d)Qufv@vCZ zCdz4n>e8921CQxi5Y%m%KJx0KGz!0@WDm(uUvAn>mI<372a7 znHYd$6}|=TMg-vDv22RwcAvUJR-N@`#_x=|lle)Oca@wdiO*Pl^G647IhxjF7YKmF z2E;x;$*|X)(fHn%XOawsKmSw?Wf5Kq@+jLUYBxFY%3Su}y=mIWvr84WAzqGFanzl^ z+*U1qYu!y6Pa(_RP>{ogf?lnH(XWrV^umkkqm^c^iDE0dkNDu>#_^}P1(&QpSYyh8 z*lPg4=05d#=n+<7dz~)-vHkAaonY5R}eA8a}@h_GtpXi_n7tCM@gOa*#7M4m!r^CL}6=>8Muvj#dZ!BG| zy0n9nsp!i;Q5OBvsWSuw4Vroyc2q&5w!d1jM`nEU;k1es~L+ohbd zxIR{4uDBs^OOMM$WV}&Am|7rsW>;Wym?gDG(;gbT40W8cRo#5n#SC?#LB(=zUyEIF z@RIrNX&wpfCzdj(xW5*aJXr!O(8f4K!Gd;M_M%$&-;&Z^&mV4)eLW?FX7&lfRTs@> z*U!rMd{=k1Be8m65?2JiL*afsL(efK^=)wr+hZ4f^Fvrby6cnQR(bKqS zeqGyY=6xZiC-{krv(xDm*v&qs)fz{+Bo4`yqJBbMN8*I6?zF^dz?uT7giD+P5AOX~hJq&JNpwZXQ)$WW$u#W6b4C2p5Q2gsJ zj~;L>l}(RFQ!jy3P$=B0*fg2ralJ5D-pbK2==GNb<#Na&YUC^Ef6+ZCz|X8P1nt%l zR?8D$XD)-iSh`E2_wXp(;L*9yr~Z{BKw$yeOY3DRN$Ou#<=^pgF}rA{g(Oi`2C+j* zuCdX0ix)-qw{Y*P_H9HrJ*F#o_WEzc_N3JuIN0xQXXJ`dm+#z)Ro#FR{PmH*3)yeD z;uqP7(TpQ^U_sVhD?gQKGg6sUEf*Ec6ypKp*VFO0e2SQ4nK>y2k@xdkFIb8z*5d(^ zGJT~z%}5yFbw8e_ z62PDuq5Nenxgkf}G7~nW=S6Qo;xYRG&C*SvpW}?qeuqeShV9$ANl}Ly3$Kl{rSC9( z_nAj`b#7DNmuPM!VY26UFWbRq(lHs-OHf3Yi zb!}nOfkDk;CYMezz5@4b~%{Q5%GO_ePWb{IQ`7@2a-yP6_7p3YpQvNQ)`v?8}~ulGYBH~QnY zP8Jderr^g2?)^@0RYf;|3}VflgUK&Qd$%UYX8-%sBw&8oY5GQG!)7!I*pb6ihLN!5 z_wD95-!duM4amfiX3mbY7TuW-691eFr|N~Rh18q}Z*qa2&dqLjj`TdCL0Z97!e z{k4ypen#M4)-y|g+)!B<%1C`znh}ejjA>aP*c|O(tav~hJ8Ff@ZvY>G0WcEvj8NWg&OJ%1TF|mP^RQ5!&d3D zf1&h)ixL|vcgpdgdd%X^oCn#@UzFS0OvrNEBJJMe8fjds%#~^i=n<2S0-%sW0jd;f z(?Mz_7og}UmZoC8+#;!@uj%={br#vRN0ajl9Hx{O!KByRfj-b5@*WcaHMgRcI8{h=ndEeh=09ziEbP1;!GDS zNZ&0wZ=3Ti#&c6X&Vr=~X*GB(&SF~uE&MR)&48DTvrM{0^9@755c-iyn-5zp8gX(4 zv|;3~FSNXAdZ4f|6zbNa-V8joq_n+Dk(0~W@t-vPq&u*sWA54X2!4km{BlJ;z9)&i z#Z8%XnH$z!n$I_o7f5!`HQJWcxAxv+KOVj&$wM{bQQu-r_T`%7eQv1pyK5(1^(z8m<)ccPA1W^|PfxLaM1SzUWYtcfmA&^c6btO$E%w9lV6S&Q+(heFEwcR))BN=H zlV9HVcQ1O3^ftD`lBgMYY1)#6*UXqJ7du`gptrGU0cvuG+hh5=Us>O> zg#LwHzk{y6gKi{GP|-8Tu-*thv=$}q%0|3^i?c;k;Lq})nn=5v%=rDiRRZEwb@<+I zWs+DH6DR;&&m6lEs<&kP`Tb+Um$*>?Kz+GGj|u3$dO`z!hs!W4n5nWwzu$V85FzHP zNAU5#3+AtWiV4`pCJKiQ*F4v`w8c{t`Kz}(f(O1!wvO?E4y&uBBId_g^DkEO&rb>% zxDnN3M&JjzFS$w;qHXsWGGiR>(jZV0#IprN)Li*tave;Ek;zs1zS8*TV&4ty7Ahj5 z!r%%xO#b2j{$IuR>I8gt^9l@{lxm! zy$D1i_nc!S_wzkVHX;OEk6`L=sUpU?>B&W3MOJy;AyhYkAL#_=l@<>A8tf{@5NtSgC3#; z$~+l_N`6?U|NYBh#J~*MeqnO<7h|diL#nmw z`~K@!bL?OXV!L7=Ii3^v^S>bZ6fVDi|IuH3#~*%KQU}o;*H_iu`UOok1@q&ry}^l} zU#B0Jf+R*5P@*kI58cnd8FLCgGtA2R_4^FvFh%Le^00Kb?k3OO*Y=F2TDXMS;!HTW#mx`E|)?_G=|vPm>$ z!KV4s!;^f8G>>+6Lf;|Pzqk(=pGHVdD?9MD=NIoxOF!I6IC*EL+s}9T0%aaIb=5CCvp-JNBxJY3xM=kJV$C^-?5Pg7PI*fP zQz5_=gbS-TfE?HwX)xL#EjtHka=;-bSl`%gsDltNs{jqIyUjpeDUf}b6v{e~-HnhR zw{D0iKHI>6q%zav-A`0Un6jzF@LtW`e@$f7Nf-P{V^VqlBs{^LnFNfuVsOi{!Ir6**@E z_+9^7wMpV3><<@mq}KfxD}o_>)+`eadN(BcaEN~eoE4yqvTOi0&djmb^jN$?dSxrq zwzzFfh3=qTSl0yYl3>9g*1W=iwwBwW1O~@F=AvUPTj*KKUV(Tn$qv{2%XN7ge+fb%Od+RYJ9{X=huJLQ8g}6z zIbeN=LxiCGcRoCQAPSBf;dnQ7aJjjy4^DRMBLhaIhv*boPw86k<8qxnYyWQRj+K%G z3X=aNH&|^(GYM((mTh2=jS&a?V;m5!R~CBc-8*mJFXPc(-1_H{*E`!s#Rc|-p5y(C zxdN190*40_DHN3QA)5PAd3M3%HQO=SHptp`&*|9QzQu1eWhGJRu~vz_&Ty1v!2b}%-=s5Iz+o2ib&D=fhQja z;p$ga2zfTK2LkOalhs#oV{uFY7@%0!q#iwpRfw+>THr z6%z;)l9pX}vSLDd|6hw5%$kXLGl(W>grJ_=->lU6C2{0fcM%93Y5|b4G{2~#`F2UZ z86~XQ>x)!174aTp`u?{Kv>-6C2BFCp+)e2J-ZP~Z{WTBdicm-0oZfcxvFzH_o??7r z;)+Sg%H*!$8q@#mTMB`&-^_eX%9j6m&6AY%lm@fGkSlT85J12Q^Ain3;Cwar`{4cshkgkc21(5g1ibenw)g zHE^90kTMK|Bme^NAc<79%ILL(trWHj03K0oMfMZj`7(3zy{1-^c}3JqNa7AjeIgib z)D0A;e^ZB92yoh+-HX#;%;85=WwS?NCYf*YcmYny?wv2{I6FxqY(jiyPXs7dH?Sbl z2{<6?U}C{Zcq;1^PFPDoYgviS7B&eI9XH{Clhv-g=~D%-TpNX}U1BVOY!UlQ-KiG>CN)&RriZ&POmgL|w8uh|qI?@}WCi#t8L*!x}F=sGaS}q3BLd zBI+xA85>DqfG$x!+Unxwy8Y&P%w)?U{_D-T~YzjZZk!V|L8h0U_s+hj%ND}#a;(?$nnQXw^(RJMPxdBNBf z?FP#yB9xULny8Mduf!BaW89ETrVUirWXWm2c>_0(>DSUEX%PoFVPe+)YRwH2M-Zs# z4{#Qky@J%Q!J($l)qZlK1#@4oEv~B(L=>kMvxq5QG-ftp=6&ITunj~LBO=GOlf8}U zOE!ac69?81pmdU~Ln>49sJK3a!`^DaiUx)+ip2i0qw_yubsy0&Ky!W|b7y~#@BcIV z*#20c;uRqUvOcPF@l5Fln#CR+b9%zt#tvD^y7XLEJf`%W{@?|2 z1)ceh`j0#*&as)-?sT|W%?gT6PSc&<#k;J|Q&CPC|&lrI-DF%Nx#T40<7 ziO8&nQ-n?BE*Ek#*;A|Z-8}*ru^Y5LaGB4KJ$b*VNQC|CY!`5j4~PgSvRP&W+7`0q zCvnZ8J65f3^=Ne&4%>%fyw>X;kkRfnRvIGwZM=xPZh+OT8H;+k(w8~$fOFbas_SuC zylQr-Z_35_t2pg)cZywaz(68r-3Pi?=2_Wtz4O~kJktngP3LFO`vu?H^Q=c&Hq(i< z6aP3Jq%F04F@FAL5OVm`u$Ti#NEYU69ZRq*ZImW1Zn1VJqkfVvyj2=?F0APC7si~V zizkO{THSAx3dw1mt>Pa}Ph2byve$h|CNLB<(iKfO;4+s9Ew0JD6EvMwsoSCl$fIEN zx5I9H!r&I0(`7~z?lEp-Uupgb6o4HfKKMep(u}q9<0aTng0FME+JO<%_tv=3AR*Lv zWu1iXZy*B9AJZsWfC+rUYl>U6YkDjj6_|6ea}XsvcI)Emm(o$az{i9`gZ0MGuiT`h z#gY|Rnp2n8TUJxfL3r%PiB6Drz}}}6LuU2Qk4WkUu+99U-c@DVxmgvsaW-i$97~IO=A!@VgmR+F zo3w?yD$m+{v}Y9##o9bmW_Oe_r2WWy+_dX4+sS?cSa*teY+bZ`yX+In!!9484eXw> zmGajAe0fRK=}De4F@=GK#BAr+7Z3^O7lp;?1?d>WqVzoVxTJMR>lvWti5;b@glvMd z(r`oiB5;q4)AnBZv@0dVtNyOyv?Z!z{=A{aT_Vr~A#=vz!d=S{)${EvlsmY}dPj7> z!lmBAJ$fwTW`XoG*zy&_k*w4I}qZx)rD_en#-)natmY;mngdT%#82k?mj z;>+EoMB*h>+C$Qz_0^>KfNz_&WqM;R50!8(=~2}#d(5O46jAHlZZ06}w(s>I_b?z4 zae6NJ-Jhdyjt88Q?0HUeq& z&7XV3-W!8aBPmeiV+2Ulc`_*Ppxd$(t4yM?ZQzNqvW#J?h$688x911_%s&nqGxc}T z{XxjD9EJU*V3RXq>0T^Z@l(BgXBTI%-C)s-;cD7GATaAaGdxQyoz_7oxGOlP%Xd8F zuc_-;@Rs%9fa6tvbC33nq|bxgn0Y(h;ioBbBWp_a$l(r4Q7M)>FyzlwONMNVB_^-F zv*}y9cRP(~(V^MHJA8ob;OuHU;dr9UC3)gXLUWBK*Nnl%PTutk9oo%onip`ri&7I; zv*s`G?ISsDq38%rCmb<1@*UGQug=&y{vQ}37XicgW}I7Z|BPK6RQ8kxi`~IHRr_f5 zjx(uSDPEHjKh(P>pfW#3>vyuy{^KFvFO5hQN-H$BJ)|GYbd(E3>)Zyp6?epj=vPkO zPk;B4YR$3CCv1EJiKtk&EE!>=dBub+g3i_sF&_;%76d?O5L2%DKBl9X5M}Xc!P}ni zcrywQPj;x2af>(b_fz1~`U{q4 z&Yl)Xlq_Cu6&Dhn=%+*pTce#>y_(HlVuEe^i`GE(N>fjqY(1i(d&Fv|1_m*D7XeLq z)(KeNmAOo!d6FB$W~Jx!$4YHtDe#>l-b0lc#R2`6NqnN4M8Dk#uUO%dyqfn%OqZXG z74F+N^=mzR*y}YIwfU5_pe;AMvRXv+Cx)OHSF_Blrk{p9{hR>n$H zaADU=ss!PF8>vBxG-i_PMgA-7U=KFOy|cB5`E=0s$@(JH2REm_{3&d1&2=eGUh&b% ztA{xGN$AAYwE()1%%XI8yrnLtLv_Qgp2DCIDcaS>A}&=pr=SBquS*1Itrwy@Dkmb! z$W_&D449cuq)cp3$jjw;jT7Ssto%+nG(iB$R1t)_FqLY@7`NHj&a^mxTGaaAJm)lb zbvOEd!O(BygkHXSZc(z;J8iNaBw6yvyi%VqhlGBg6&mLM;hlH^lYz` zn|ns$iY>)y4NP4Mjz7CQVV?9Z)4lT(U+=Mwa+B7>PIl~r zMG>zJeEMh+sNmjMzTiHcbJU7)DW8heeSAS#)|NV*wp%(#?sU?V>@071fVijI`ai4u1zzKY%cm5o0&eXb{ zUk1G}Q|ee%*DG{xW{qUM5A}+b8==q@&7-rZ8M%XyKkzlFWjIMOzW?e2TOCqos?+=+ z4N7?uJrQM1sJ~3xT|9Ej-&a@id1RA*2|YcCH*j}#`wzx^sJc4w96h;cj62Rua@6)n2hz^PORU6L?&C- zT5=GxD|$*y4R!04(u)hK@+<3IbP*#VH2vfh^n$HiQI|`vecY#s(Nh6=7y6C2bK(|o z)Kl~3*dj?1kCxepX8el@)j64!^_6PwUSMn1^kkxa)VS=f5!z0gL-!`M@=wIzO`gEnp0+u4*Tan^m#ViWTv2 z#Hh!Jq$D}r)~W#ko_6N7-uns_PhxkCpxv1@?EIG7@}&+%Ll;((v#%y+r-vF_uF9U; zeBFvSNrSxmoDLfpWwnI5TFAmmFL}2wmFrRb6ZS2bzz{4ZZ{}n-6N6P>aXVdx&aI;( zl3EGlj$*b_!VU@MS4T3FKBG(W_QW8LAS&TiB&~*~5xhw^GM`d~+L99opU$1V#vbL+ z%u91@R90=`k~U?eC5E7?SWPc6eZjB9>G*;XUw??Vwp^pUUG|K*hW{3~1hQS_u4CKC zNsFSEwV%0eTWDF$;}u6X3W>a{R!K`-%C&9-c#jV>lR3pc{r8jHBM+rak_V#7i-n6x z4ke}TwN%iG>WyKSUCy^UjO0{zMzCy zh{yRxht%Pl+T7KP=N_@4mhVMWXEAs*Bj>>?-aHXHp3wzcanJ>vnQ#B%o%NNkG-Q)w zlht&tr+sy^ZOVO@w3*eHzTwi3_?7Q?Z|OVrpP7({oKT$~$e8{MzC7h*jLlQ>TSaV4 z`O+sjua@_qic`~L;7aL0@YJ2sPypyK)YYSU1ImOs>B@yhMs)Z-Cvv=-HE@%aX z^cSLga#Z=iXP?YCtgk}|eW3B)jWhw1WT7I@>ygwWXv3_m^2ar+!X`CV3e7&R@}m9J=Y?uK%9q3=;k8PWq#Q=T^I15NF# zT<5_Hqzk;nl0$y3H;PCYCp^e8O01#Xu##-2U}#vsLF5trx;#YtIH{e<&<)I*2=p;^ zrA1AY+LzzSO*a#tX}^7ca4t^=rSNAE*wbigj&Ss1<^!Q2x?&-?=Dpwb24KF^-DCX+ z2)Ket9^XRN+}tJ*K616=7BaQ^ClvB3=6d>dGP<3!jAI2R%A;!1ynIGJqj9FuL!aE$ zeG10hZ!IEo#4-AS>=fsK4z4Il@T3bm2ots_RbS-smR5pXNSX$4-t6ngI9<`~i5}nt z4u|M}AG-pRM3RD#?3;HQxPC!NWl>o@dkd)8o4u}5%10^|~XEW)4drF>cdGYf4P6GIK6>#ryk zIHLJbB4_9AV*IK(_n7-~kMRU&hyeLc$O#rU`@oyYTd|f^bC!z$gXe@3eXVwWq(`G} z=N><=n;QryQN0#w{nErMaRLIl56+%ySUl-)5%CaoXXHn>?=u0vD>L-HxoF$+-7y3G zP(FE$ISdYf?*F+*J3B7j8Vu_J3hPO(Xm6>zj_IeGskMxqLxlvAbm=q6&QO3}5!K)> z(S}fAug0PYeeL-HNBw{1jOb}mBP+2yk75~j&_p{5)B0%kOAW{pbp#CFKZShbc`tdAV9Zmt<8M%h<6r%L%hAkc<|QlAoVr(dG}PX|<0D+EZT5U^wH!o?&oEV1vC&vx?C)XRzByLuB->-p96 z&Y*+rycNLFOB76#zF+pqO)(zg(5oVvTnf_Jy0TOb3ZG<(tQJ@4i#nr!84hh)=IwSY zt$@h<%)m3u=PL*H?=m9?t z&v?umyhL$@jn1a&6Ibq&EcqZRb8L~;nIP1gnMOwK81%=iW0yVU#fO}Gotf@q(7R#UqPRtm^;6b>c3VGLNiuVK{pKqK z?erZz^BpQI|o>CYqMDq?a$4p*?QtfO?$^%n^3o~m> z^S2AfP2PDNeM3*r1vZ%qk)!1X3}HBog)Q|`G51ks(hArdb*IX@^LgK60UVUsK84YZ z;x`V^Y3DYwW#kmH&=|CB5lNZR#ar;+pIlan6q3}Pb_p&}9`rpme>P#T3lr6|$dj^T zO^L)eg$_G<@b=sm{H&bFeAu+++hFP?RQbBy%Bx25EIMROkh=h$bnw0yr+F#l@bZ$L z2eu^g<%yEHJT1CJHwL~W0fE^6d)BJGGm2G7Tkydr#zdV+;)D_{ZX?6=oj%Pf$yFFR>=imt7E1{@34~ER;(}w z?hFY|GH1)J4k)ZI+=|6p4Ev~!H%D8mUl(+cGD3Qh6ig*u(WQF8mmV6Eqv4(DD|M7U z{pCjcSp`&ym83fPfO>gOip1?x0^>dfb@VNpV)Y$i zb)Vpme_obOw=7Cv8_#zqKD2C3hNO|++UHVE9?d|mel(0bnppJV#RDzET7J4{0PVtw zr^au^;T5I{ck(zCqp>qbZ|v~C{>780nL@{0_H;$$##Xpf)@anKE29+qKH*RrGYhL+ z=)u!(^_A|IK?>c7s#h}@HFoE-x+lkea@!^J`RVafJ#a9IUbvZSpwqMBAV%oi6D ztO6;$D%CnTkto4_>EN|ev#y)2?uDqpX^yP&#NokI4A;zj-cR`A3lNMPn>zD@ob~@Y z-Xwbj)S1aF7Jq;8!rOP4a&k6iGV zyq@LW{VLz0_X=P6vV9WMR7lo7rNWZYMehMIvaKUM$rsIsEZLEtKVqPd=#wo&s>VSQ zs(A9S7_Y4c%n+_{JL)B~@BwRf0VlrpBX6}TYh)`j3gd?3$V#;UdMvVF3SQvW}v%%BRp~A!P-wXZrPE6E{%MgQc<(g%hgUz|kxo%)pyU zc@e(SZ5mwG9w01@28f-`+`aF)h{)er0GqN8N+Ld_7x|2A!MmGQHL1%N+47t~){pm= z%54}*4@}%BWtbHh`5a6Qjn%+>Snk%f1sAnxCn>@Cv(pV@ch(`|Lhlt7>7+t@)9*kK7I*~ z>Pa0uU@3XE3S=DTzs5iuL6?Yk90J4YiS(Q()StOd>yRR&RH6<1jhCpv3!%as$(k=DOTgfSmCS<{eUaqw+><=>3DAhKTqoXJO0MMoa)dtmrtc`Py9 ziZ)nz<(en9K&9=%dbZbPciU=u0bygtCCC|WFpukVEk`%m`niu(M#<9_@oEICUbk;F z*8;f0cJ3BCTZEvwn%<$8z}%h9sQWmDh-)&-PE}1L>zv?txO<2WInyhKD{?-cbQ{XW zX~luLR^fhXXh1((@tXC|Js-gDP{%T2bOV(3h@=#v_WwB`TsfomZ9t*X^=a>G2i-0? zre!uaFxgj?jCnLov&rt;W0|o9?xlQ1?L(ND7i+Rvg^!vaXk%DQ9;4a=# zh!UTQu=jJt1r$pZ)*tl``Q{Y<1{?aFXAFTC6#9T;Hz%*Oi2jTK$E5sUPr($!nnqP< zbqz7S)${2T=f2Y(F=jv62R;_NH48s>XC)oQkp!Ug#I3~wFMNBXGr(;GRfQp8#5API-+)~8X(OmzN8 zF*DpvcuD>y-JYf9Iv^R^sk}u*+u=q(msm3xHWv;T$u`oR+>+v|xdMqm5{emXNUSTZ;)f)o-puwv|v-hDtiBoK^th87-AsQv|MdSR9w!4Ci4`nL(sgU`yG2;My>iboz?hy> zObeE&4lDr*8pBF*`n)S8vPwJ{)Fg=mZzOgPLBX*$U26aFbXxhMS&vO?p3VEcGAn)8 z&h5t482>o?JcCcPKsE#Ol`oWp{F8=+Qu!1mRM1ad?>i3~_ila4rn_F+>1~yp+VRcH zfvg%|YXp}rZ#Vk-0=Mp#@pe%UG%p`!ya${Q1r9r7D=sbe4Q+HffVs!HLVA=&-=Fo- zu5s5`=~3@5jm_@M{2pPfH_oAld2tLV$-B_jiXkG4MMPSH5C$4Uba(dSLOu@XNS6-2 z@Sj#~ZK0E28y(q1PEUX%a#dOvFb4?b&d6&))%>UKBX%8F$sZ_3TL5q3|$VqXZMQ5hSBk*vZFoVia~PoG0G z&VHcdd<`1J2I#mm^i*9l%qNK~R;3HC%Ac}c;74k7k5UJY(m~%wBr>LSi@$_!bl$xz zJFvBf0u$A*+d#JvzDe5L&>cL9)@XD_omy$TD*!sO($$1}Ek#72^0%cRORzL}{A=5*{nR2YCc zjUBNPRIMG!v>x-a>P3fK39fDo?}jxptO=M?Qi&OHrCvtHBoHSG}ZRvRijMx30H;drU8$ls9<*|e|;V)*1 zs{^6jo*T>9{fq?RiuWzocqVjMnnlfSWsx(>QomUEb@}D>z!zwQ?98X6I6sNpMS&%% zrEv(Xa6Gm;3feY+^QTMtTt`}wAcRX4^t-Of&`EL~T?)Bb193l(&yd?(J&&!VJg3}(-DA~&a8F~5#*_Y%X%Cdi)K)cPFzuIv(3}MuJpfS5# zG*%;oWl*?&!3`V)}AI5A(PHS9jOtLNCK`0i%NJu9G!EJ`QJEJxb~}?js%^uN{|V3Rl(8BubMv7Wh3-;%ssSk z(@GT$A9I)l+f1^1z@O^;bKDlsO7%Ib>pqma_`(~S2!$+#JLB-P3Pqd%M7U3b0f-d{ zsco8wYh&ic+1JUl31Z+^9$J%+gsfLeZ$0i*aTJa6}EP<`(QoT2UDrLT>W z+~GsB>ysog$zMJ?Ixp7@ylX1QW-=B#;oJ5Vh{x9p65Zpw=1=HWd>>^Qw|bx6GQkOk9O2+< zMU=NKgoO(6+ahm_A(b^P*oZ*C%z|K6&T-7lJch3!mo?U()5Gh*$CGqaw`?ZJi%c-v zEs=ED0^nSU<{J20hAYSBg_lWLSE7yvYl)v z$62zSMfAIT99M&ZbxD7QhkM>%lD$^H>miN$>kH%V9F9E+I66k1ixtDN-Wy+vX@>x8 zbjWeW94S1fho>l3Li)4<+8w{*J|SU^&9z7C5AO`HZ<=1^dp(cIOP_b}|fqb``!qa;wLZCQ~*^V={U^mfh>*DA{nij0&RJz8ZT1;E^ z=o{{#jGB5C_fGHa8B7z`T!gVllao)0D$RPy6=;ta9><(akhHWbF^N~{!#zG)Osl`# zkz?GtnKB%Co0(hseNWS}-F%}h$9s}&+53gp`wXjaTYB2%@Jcl9e@g0{MiVv_THji~ z(qp^aJ3Hmz%Q9z4e|KJyb%|`%j=Xtwu1_G1W{qM(a237hny;w3`~IfPOsI8d;)>wI z>C9n-c_v+2og>Z4e5eICf84@1eVdNfE+#j8eVWwIazdaE*Y$d8(rPiG$*py{yYa#| zNF(G2?Q?~Jg817qdgWyr_4zhgg^e$DUF3b9B`xZ=#&eD11==<#gb9ofGnaVUad?ds zYdVx$l|(;&xZjrF9Xl$)Hv0I|Z9Ct@k z2$3oFu1$rC$uslK-*!6F%&$`R)lsS5rtBW^rdphR{5me53WLquf}O{j$c1u&-XTNN(T&<=0OnAb|ChxTN>}qc0vO219Ih}`}u7tKtEmX6Z zERwtJwOo|q?D~tf&-Q$5OAZ~5(#;c)OjQ3Myc8n0I&HfFFhq_7J)jS^{q{}L&xM8l z52L)wq`6-fzsIJuFZ^bo>HYqt_hr^o{YO{k!`Jqsy0r>$6lPt%ZP&B^P4F%HD%w1q z-BJfMY1eGmoODtZou_-+fDljAcWiSI*|;cPfb*^lYXbT~+L88wc?CPb>egpH+aB0G z940&8{$3lLC>{_mzNn7kQs5XfvkbbRR&ucn!M-BwzQ9VkYEOy~AS}O^lTK5YX>5oP z=SZ^r zIA6OZ{`umyZ5EE%tQYOT9Xdmo#@(Sr-#E3$^(r*J>t&M^$4%3=upB#v#;Ml{Sffr5 zxm9mtJIsp$)OS(p5tqjGng(37t(sdkEx6`YYt9i?!}_VVcNNLTT~d--W9)=G z_!4P{+XIs4L<>!)m>zkP&O6gwSQhHp$EfZ< zP8^QbAQH0|a%W++)+pZR(?+B(_G#bmDiSPi?VY{DT;MrXnz9=d)oU5?%5I;nqtv{N zin*jF&|X6^t!j(?rFRqE?vhK>(Y#AWMa?-DcwIcN_*vJBzgRKbc8EWs>v%Md^y=7) zg7faPZXPbaO+CgRXs%(==}pyX9siiL(*9hNlW^N+%=kwY=C!U{ou5?=={s z3HmJbJowCDel&1W^u4Pxo>0L^f-sPur^#n5Gp(aYF=UX&>F$&$8l}upJoL zW5K#&6zymV^J$| zG{VLYrTj-Xfy)sxnT?NZ;B=hbcxQ%3bq+Oecm4&64aY_(J`5098&XsN<(lj-b7JEO z;5pTjjBYZ)cMjE`?4vXM&!%ZhL?t#SAq-UX)0jH{(Ja=EG%`r3P5q^N%Rn`n=-9lw7gFx#@m#pyaNcoi zg%bf`m5Jzun|Txfa|Cnur90Q7)uFqvbJ|b0?ECEuWTRWh6~2=->HkjFB-4-dC~R+J zTK{28;78OO=-|0Zd;jzR?nOQ4J%ox3V&Fn$@X{Iq-0eo6365nb!WfkdxY6>9*9lPr zWpKNF!*Ys$DtqlDpN4Fqk%-%u(DD!H-c@L^`;AEX2Uz4}%Sm(}ifGf9qV*>C&!!_S zK?t3ulJ?engh>uy%YJ!RI#bmeLG`2o3cyj9A_%ZgE@(j3a6d=$*mp#fivbQ`swA9C zLvMH${;{&&Kyvv;;`v_)cYd_j9q0fwlCdm6XImKJPBw&HYDNg_W+2AAUecTgvJO>q z#CSKN&tV+Fr+$F&t2@aJC{*lGcmO-#dZTEMH3!L$P#~`5rljBY zK6uUV%_1!yVZ6d9!W;im@ytQGh=N`mG5(*F0R~82J@@?TA480IOWa5kw+8{f)L+0D zzpXSEL@ZJ7mW2CX))#Hvi*U=|BnVqmpY`W)kF;MFE{Z9=7;`!1T=WRFkt-X zBm4bHxL}5YU8Wl< zD2tnZaBKRTl+_*QLnW^?|7YK`4?=H2^nz>qFIRqqCPZHst-tim|K88k76;~_P%wFh z>c+pA=S=v5DEHHrzmDMVd+wjNLP;WvTJ8nO>|Y8E?~&ocNx1!IGv|NYh?@m2yib6K z_m}PUo&)~2Rc-X&#BaYZ7lRuxc(c^FDt~DgyjTE(&d8+a_S*>j$8%txY`{OgFVb4@$oO;h5B!(m)~X`6$6iah4|fH zmNp)+%am&2bN|kV^xOabcg;jX^ZBhm`Zkftj&=dleQBXwuk(Lcv8J?HtGf7#l<-O;5GanXW3Uq$@I z=Cp!yAwB+4?r#I|>+fXf5btN0HW4AHgnaQgdd!_163MMoM6r$RZSQD1 zWpkPGsV|RJ&xkZi)mEHIdUIjn*w617K}Cxz(CBB}Ve3lPr2+9np~>#l$cqLtES|^x zqe!$Hp?9&qb+lf0wC+`=LA!ynv9QI@x0X4Eo-^GR#6KpVXQHOBR91S?NJ+z>eaPlL zg^}`+AK&sJ^s=TDh4eKwah!?RPv;~5T;BhzxsF4yisCg!OrI<5y~?N`SzVO;BF*LT z^Wyh@A6z1+>IT&)#qQF-Pt(b}S0$mGZ}XH@xnc448I2pc5&vGx?1xf_^=N%7`>HKT zG4FWTd``&lIRebX2hV+mQAWO6zAIf873D+N{C|xiBMPl#e+ZvCDXb&CH5g7?g3DOHIkd#=|&nsjK0q+0Ih20AH?E){yw|GpH}ct}xk7jGmphf~MF z5?By$k)V~XD)1m_sfqb`@!;8h`yeBFR0g%b?Y=t}uVB8PevQkrqmI%pu8Zx*qqs=W zr`smknSYk`d28%lmwvtM-l#-~fN4y!Qt6vknXHn7+i?e?O14&YQpTj-s`tT#&5O73 zN{9~bl%R?JiYKxTlN)oTpyi&*kNf|=ofEWR;gN@I%t{o@>T92~tbL!IRP@=-Qa2cX zRi`L-RVRQfM|ZY5=S%B(*PrvYbn3j`XM>Q%F9w?{VWdmt!7L-&8m$^5hH?rwQiU=y zjQSY6-lUCQzbK+N=b)TpU`hF`&~jQ#LE&(sMp-FQTDxC&4qv2%+ehplPvugKovdD> zawl2!45I7`SfpABV*EmW%La!gmBt38x9SVG-@JhT(>AK9{qKZm`kaypsFp8KzlbyO z=uGv>!NC0XLBzYzzEd<{;(X7TzA(Lk2$x;mkyrR091M7~8Bcc#A ze=4{h=@f3hlB!@ncDa@^zTpy?Iyt-QOl42DKD~bT+YF8uSU=}V#sD3cjGe1vVs|)@ zQHoOhLYC+)f9e`}@j^-?+#dt(EEz3VEpHO`=;;4^M$#6inSsJVlDG~9X#{D0Yj%qxU{tjniySP|Movy5{ zD4i-rQIWPdI>fr+7n{%E@&S{eDR|yVd6IkdLla)yIFV2Bu=$a(hPx*v$@V!T-`~x9 zVU)hdKWcP*n(<(QTrQ$#IqgZ^NmU)KQsg!`Yk0|YiYEBNhsE1`lUXJQn=Bv0Uoi4n zk7v3CNsFhf7WI!&{pA7R;ge5cbX-N-9nw9=w~1et(^kLNc6IGr?jEIidzaL~y(CU8 zH0jlkM#kVeHqw}{cDE*6X46njBslL8zB z()-y41(9vzYC%2>99JU7Tya!aVMuc- zZm4|N8>!IhSjIg6`o}}UN)CrsfVyMLcKHl4LKcA9ebBuph0>|omlkRN4 zi^g)_cx-*NesTKHG00d|86d5?b}xyNJ1{5g(&~$Pq5+%t)NRU4KgJ7His8>2S(nEV zns)u@M8<$UuoLU|Cp!UxyAEN1@vba~u6Z3GAX0Y#j-?Uk8CiJ2!rhW6&$I~;EXPKT z{D(0YO;q>9>k(w4O7%K7dnKAW#sf%~dytHZtU?o3Jv16TGV!u6L%_XU0E%tl0xdzI zE}Z}=ol3k*fiGwT>L6!M8v4TTx54>m>%HV~;g&@?DerH>2`NFKoHp0^_RD?C673^D zt>H;=4B?PmITMN<&z$wfyX~*+oAmE=Fz)S4Q!8ie_%5~qMadbwsZJKdFeSbpuYTz0 zRU)Na2lt(=Y1Mg)k}XT!UCYk3|2jm~M<;-9JH;_e_+u@?X2OAw6|80z(hC4G^&Oy- zAvjSu)4iul{@=P|#U7%MTAIUZs=j!4XLw?r$tCpv(0Mi3?lmFv)AH43$Of9PNqAnLqe=oa$JH2*sa(WkMK zpQ3Nc&xJDK1}872HV-Q%pE10&)_l&E>BND(JL$qj{5tDFPPo}RoogDSg5^kU84rhH z(&O;mq-6D+ZbQ*}vT2EjK1~9(znFr+kPlPsNuxDUk5^S>i*6a~7nB!%Gvh<2mVV{N zKYg%KJ_VzW*T`?KU0aq#?TZ}ezJ}U|=dX_oR%ZrTA&{6V6iZVt0fjyM&(msmqR_WQ z7RKqLJ9~kzDC$F~Q;JH?#b?4lXXjuek4R}sk+jfRE}IIgSDL8SST`L0vH4(kP`bSJ z(Xeol<;yY2DLpdz)8j*(KFVOcV`v#G;+^+=>(haS$p^}*vA{&=4>(Kug%9J-Z*)7yX@c~k$6$WZ&yt_wd#z~vrMg8nf1ECJ?9 zm@y-WY^=@^KN)Wf8>dOj=}>L~r`@M;5-dU_SYe2g# zp_2Vzu-@BkZMH`;_PUS5V!>Dh^i%*jdml^e`}Y$3783(BPB2MJP|Pru9E}t$1?{a` zs3g1v2H_4nN6@P&b|^nY)>Mu?30l1jYgDmI7Zzt2wWhd&4ZbzgN-8|l;MSC#Z z{}JD!q!^;n&&}Q~&^UBc#Xb<%@yDfT$G!epYIHaghtanx2}D#8zNkJd=(Ko#jE28W zfR^`RbT|OgzHy=>zOLQsl&x9%Nu=Md{z9|haFs3&q6L--$eC`2&9M>yr;Z|2|L?m1 zvONkQ?u8cumQyGpCLnnZ#Ce&hMB3il|CwLcD8y*eNWDAzwu^Y zO6`Mc87pB%(PIKFkTzJGprc?oT|FBKDZMK&t!_QWq6*A^lp4fVa`A3>j3^wQA;l!W~i zv=aYl>7Uzc5u3qSkTk9Rb7Hm#Nej6$ia>In@Oq`IjcPpl)|Dl7P@^4EVIjbofC4gA z)B??&7G#TlVthe$sYykq6NAuoSQp)^aF}1stNF%vr9zcO`|v`|SF|0=YU}VepP-<{ z0oM0XtfN-3m~(MD7lGssKn$Iq(}{cwt`<6UAskWl>wCbVYJ?_~JOE@UP3oK2AC^Sa zJ)-GA>c_N>6PEFkIj-q6V23>DA2;nMQfFKoA)WWPlakp@O!VNM#QJ^Kr1VE2k1h#_NG=Ee<#jgwF$_f=nsKa~mjd;j z5dyRbVw4IMgZi?ug$&A9>>w^DfJ{cFh^<)M} z-6AlypBITnT2g0AuY3K9SoH+J8!pyHB_i&iI zxRIx417kCF)$y&%nGNW&kO-)Uiaax*E0$ky%S52t^@<)oa%{n@-R>9X;lTSr3B$&W zcNUJ?_OLy0I3*+@tJk`8-}u0qar-*syv^1lX<<3~i>)K{MYUGTFRIW)NNkIuITB&6 zd_YJOwv%6pt~hik>vgf1K|k{+0L;+MzyGkcfrdprKtt|-NfAwmwFnUbQtu;-t86Wt zwm9VoYL>0gg4GW&pqovNP;FQF()q~&bLA63%pUX`>Rg+KH67%e z5=5{M=T3IeG&H|BN;{{GV-e7@0{GxP?bZ$ix?-Tf>1YP1AD7v%>wDl5ECHHuGNu=L zu{&@|$rsB|mN*kRsUU_}2a;w^S`8jQ(t5fnuc4f%QB~ z<`T;D`14+y5}MG}=~P}d(tYn*_9yvrw-iYA+8EOs}M}Fqo!mfG?Zl0P0ohi-d8~z<4Tv4^9tbv?q#Y zXv1jkDO2Q2(8JsI$HZI#NPG^unZ}zBB|+KPPyaP$lsOikm!i7d?r4Il-$ zZtVXWntzQ~01MB%Uk*cmB>Da}U-`t3r~!rd-5I0AT#<17C6xF`|1x{=cJZAX@vVC@ zD7y6Tn#Og|?l9vg020HxfIa@U2u!h<-Diy#B@i>v;gKQL94@F<{-9nG)F~)27L<- zbqNK9S83<+u@DIIZ^^%(druMa>X84M7OMQ2^o(PIvvM*d>t}`{^co6NuA2pC;uXfP z-sz2#`ooDfDKBot?Z!9@pNyXW9P5clU(@noSHi;8({G7(S=Dmy%Ncp`B-Qybw0{c| zZGw=jNlHp=I9TCZxd8CH=sl#Oe9AWIF3Jf~2erVys2ldIbY-sZE)Z_A69!ZkI)X!j z`0MSd8#{2m!X^m7#3i#C%@@FV@^nN2C8F0vt*l2=w#u7r4Xx;rakkhP!0yU~XwtIL zaU6-hX(=WGw8uygaItQSMKSdzaZl0F_oAqM0Gx}a<`3cIQEuNW2VRho|FV1_HC&?i z`e0KlvF;c`9YHEK|e4Ze|eAY z7Q}YqMdLQp2?<+v&N!(z+OA|*gi0mFkdaKb;v=gt4^d;Jsg_|e~} z0}q-`ihblc49s%Fg;C%xmX^d!8hfaqpwN-I2|BB+2nQu%VCCX((b%WHERG{->8akE z_gzZ@QCU}VycQ-0gi7{I{SA~8P&d^fmgbco+W^jlUQ@V!LB%r%Mo$_v&8M_;V4cmG_JtG;g$K*#-x;)=okOg~025lG2aEf!Ueo#K{BxEyQvE_cP5Q^YDrVjGqufXjY zKNEdp5Hc{y1DJ;u&z8Yn65rq3#jIEd0)!^RO5|LEl-%MAqSuh}P=(g~CE!He7&2=~ zxqE37ny}b=jIw;J@lS67N3A62&*^|Zow^|UL^IIDu?pF*1(LC;Y&1^Eu;1B>^0cKX^>U`+t!&X0pQud*`MCP?{ z%FEZBur;>1Y-h>4!~{U0iAMQ@9@#szn9mmvyyk-sq~dVEt3;=Lq6+r;x0vC=jX(un zJt`O3G50f`X`-Q2ycOFe_wR00I*b_WB9%GvLjfJMqjC|#7|Y)K{cgF>Mx%Udl@xZ3 zi!Qy+{rrOqK%&IBp~h?KTtc(gGp2tn02IT%J4%RSZhFe=)2V1hEXlESm3e-zCMq7I ziR-lxft&?9Wjii3l@RMNO1eD&am*`7VfC@B0ehSuoPkXO*AI~muJ=BY)}T(#PXhw% zQeN7FdHVGOgB!dKNek2PEghBv)5Ap+?JP_jF%s@(^LXZ?d+W0FU=amVeB1%LBiZ)5 zpNE9Gx69HhHjUnFCInp+&WIOkpBU-iO|W==;0`6IBb$-5`!2E--Tf_=pi%nj-p;s) zJtyJ!pfVF8Au-7FAgm)n2m<+W@m0nJ_@NFQ-{q#BH`i8gRqq0&haYr-*-Hrn%jZb= zqb5Im8vaie?zp2syYFANzdO(Mc*llA3i7$hIgT8z2dRl)Ke$|l7XzzuweO|T0U`l) z7OaYB1_N_u^Rh_oVNt#M5hek~_i8_{_%M&Wt>D>c#pCVc9J)3a5ZS%u& z#eMhWVuU)ukJC6S&V(ray@g}**3Gp6h;)pqCLq0`E~$0y8weP5K_dMTGsc@O2r4<_ z1#kYi$-%NiKFIKYH~t!hMLygI=57QD@EmGiF-AQb=K#-4C5i*jyyHjmfsYs>h^oJsLj;?We)A1dQDaGvrP|9sOG*U5r zJcz}g4sFE|Hft1p&RMdp#BUuQ=pgw_zP^-k@!z|B5<}kexsQ*T91@=&Q@Z(MivJjJ zZlCX)w=>&5boWOwKNLYu>8L4qjY}6bq(+*YY*^my-7ADM%5g zK&s)4tYBH-kgc(0)N;2tWa+0)9XRAJ1`J4$5~J4s&(a<{*w_a-9OQ!w4g>flqemx1 zsJAl&zs{T4=lPsi&sZ&*%XhPw%`90FT))|`=9R5oBb9De{>ZDridKJXg$i4`a3;TG zrdJn>s+#Vj?+Jm+uErk<(hCWtZ=f5ZBTKIy_%e+Zp}fTg#(FZe#dysRLo7NJNyRog zKRQcWv4Q2$v^I*JCEPz3*!E^gL*O+4ca`{z~ z&sa}ye>0PMEm^){*paME_845Cw@@^BWqk^r4WWD5fv}N>Z1-}R#H7ES#?44UWylzD z0Lz^plsqJn+z9wac^9DzG|)zg{Ai#%l7(@<((?Z?u)0Slq^Hd?n3Q}A`;G{iM=@Dg ze~}JdFi{Cw>zN;|D%iYu{Io4;%e1Z6ob-N~H@A2~n!czsmp_I9uM0#qVp;bajW`CI z!!D(HtmkhPtwW)+7D8@%;I4>9j@NdLQ-&v9Fd%LfK0$uwrXU@ozx5X(v%dS;SLlY9 zfM?^jy3HWw_VI2AyU)UAv3;(3^Z0d5h>MouMRvs*Z-q|?lt9=8w4V+`|3*8>;)vCB zd*t0i^G~@b3cCaxKvfYCp1O?BfagNPe0*Fm25fQt;wj2X4;~NAjN|@%Bjs zW+#avC>Ka>tWNOXjZ$&2voqb}x+|@AV8>WG|kvc&eG^Q0o z+hpwdE{Ey(mL6c%?!1lLKP)hl?q*al8g$f{EesQ#usua7qoRU$!1=Habh#bHT!us8 z6F>z_n~gK7$XbD{qs79DTG$~U8b-xJd5ovn%&#-Puzl0!Z4VYV)&b~UfYqI18pc8e z3V-*u!B*XsEU{a?mn#@j3*kPCTG5_~4=jngnYU%kNSxQK*D`_HV)bVHwqWS>nr zCEGFH_aq={W`g#P(O1XnH^erHM}r!(_`HTsot4GH^@-rOY!#jEhxU4Nkkam1xK3rv z0Ik(NAW2gb?19yF^n(Obbq>%X>#Gbwrc%XjeJ+Ab;{irZyZdn+z+t+`5yAP>aRUO? zv%MOEZ>2*AgxpvHqQ73(Kn#IX=I2F&fPW1Bl*__@tf zTZ3O>F~|&lUl%0t|8rfC5*U6lOQ!SUAoM5$&oZP;^QqLGIn3+EhNX{NF&-4G9DcJN zQMAJzO)&PTYK}R``q~m<(QxdolzLHKk^8+hcV4Ge*h|IUvuzAZ6l|0~w+H?j8xkCM z_J-a&Q8z7vcTCeojxF(?7N0@+6y5%YC+Go@1|4Dz@&z|Huo@y0hr80+W@!{xO61t* zAaS8)4w()2H3C1vlfpnLojD$9fBl{K)>DXSt~T+FQ>TSYG{b(Jp}BxUH^lk39KETC zWH*V%oHj;JPcHXLXH+exuG+G3+|)@MXRfWeunII^YoQJh(Bnj2>r$0@aGbG}4+4#u z3zGGr5V{x@FIL-$i7>h#u!^~_Z))jm2GID%UTb4iaA3cJ+@!X(W%<0|bK+-OmI3K^8U8Fy&en{ z!8g_xA#4s7cH9yRV3a!j<@VN7xu!Usdzgbe5R4nu3{2& zZPG4kD-%KRU`ByG=M|DrIKxrp2tv^}x0i63J<&WNGaZ*BN)PEIf)m^ZfkGd{>61UF zLAHKvv>&8aGy*;|xll?)Gx}&ej6te*ofADl5eG@%-;cb9=!CcOZ&fWbhbu;*+Xc&U zRi@@cA?rcnvjI1W#H5$E3uq~4%ot-zf=}I#Ch@k1nk@Ok zr|jry9`Y+yJtjqLZC66W*q;u9yD7fCHklx%Iz&rPETDZ1dk!A6dqTcWkIj6j_TR2XHQjT^};xEJVK)N`APsh})HZgy_S^wCRV*bSlx-wG1LNI1>sBFYXit zO{%#R<4m8*bxQ9a?!YNwDYnL3F%ixa%!?c$-imckzgP-V_wSU~?Z!g%MgVuTPk&1E z>FGlmHomk%Mq%lnjv3}&#?rENJL1IJIxLv{W)6yY zP}9zNnG@39sMAOsOeKn(1r46gk4LTD;=Y+Q-Xp(l8ZgI)JBd}i=XPwPnc&3qMDeu? zlaJ=^ocO{p$PigG+nuxAC`khZe&(OAksrlaO7GnTBlZc>CL1RRTN9(@z! zD-64JqGv^{dNEF1uyXBeOI6Fw)AIT@sH#=Ol5CB+#2oF(tA1_Kq%@s3a299U?3kxR zswu(oopBHXBUGaHobD^iQ-!Ww9tThh+Dwk^7TXg)hQ^9;YR<3ovwlO$W%|Z$Vh&IS zS*}8;QLM_l$o28l&+mRXOz?FA&c`~)gc0BL?=(9{w$1};5-ewL9IpIm*8Pxm;2|KDFZZ!V=U9P7WeQ zp%IB!N9NPQ7oQ}*=H#76Hz#dfpILwPJc>(1+ffMLgbjkg2u`U2>3e&FkPKT1N^WsxU)~hhdRZl|Q>}3j9 zfHUpBH-DZ7`Qz2NL&hwQ$Ccy}c!$DS>woeYi0AWbbok?Q0t&sn){uJIlWZcu`f}yR zX{{6s_XXaxd$vBE<||6q*3_(fFGo^5jAG+@U6XQSxyX#NgKL%dsW7Y9>U3u_4_*(K zwx2l=wy#l;#ih#xCAp2hTDfFZz1Mi8h)0nOA5z_b zUhPBj9o=iGehb}9!AemWtJq2fbL+;_5bkZ{Yv)rLB6hOwu! zNS&@qubTo}FO%les$7*EgKMP3C+M;>>8ek0J@OzfSmAt6p&AQ2uvLZF%{{jxrJi2J znfrp$kf&ujWMEsDUKnImSGuyZHVr+kQmtK+|(>g=f8TN^%16Fn;^|qlOc#_$}RI~BZh=k^y=OgbpJRVY6Y8tuE+bcQ2 ztZI)?G#LAO3=2aH6slV`EQ?i5M#1s14-d1MOfViHlMSzN+I&%&d355259JyAnGDY= zndj3oG{XiZ*W=bWZ`I^fC{Yb(L;M!cjdq&DEAHX^u>1A5je3H?ch1v8&P?J8gQjUws~g8Rv@kaJH~{jgTS5dBnzwmN>oOW z9^hIC3%fQ4edR@l% z^4A&sxdO(|(H|EJIojRUKzF}K+BGyvQT|2oWM|-ZZPH6F9lPjx?L@1Hwd|-lJ+;6s z%?6wP8X@2G>6f&&gO&O!Ql6RFM(ct@Tzn&)nHNPI0)uaRZg;buf7|fUpw6q$_<||G zdhDO`YA0UEoZJvWsXM^I%N~Y53Hdf_4w1SKi{LWMztVo9b}oxFj{~nY1T!I!NXLHC z6)o#)U?Di;^4AYV29a+bfS%-{b%fSC>1G;*H;TDr!=Kvy>BIPU(HL~jWb}DvRS2@cA?j+H zll%@f`T`#q>l{IY<6PDSm*6|g6hxYJO7S?2_tgm(TIp#@<$dC|1C_Ivhh*`mF!v#! zEESnvu6vDn;IIMevZ^x=!=`9pV)NNJRBSn$yKTesx zD*1SbF2-k*pf)IWhx1g63fZXln%A?Pi0Y>>9qRHOcgdA$7GvF0{agW}*m{z?If z!@JW?@60G(5Ah|gez>u#6_s!iuwMj*ayqJ~hNnQ}l{MQ~Yc_({?2Yl^$6-^ps-9=p zq1t9YS+VtGI)2nS&-N(0E^E}**AtwvX*RudAW@pg;lnitM}GBoxgsF~@~@LX)v!$k z=WxC=H}>2bHIZgtP1~@NF<7+6BAInt8p=15AB(ptaWG15j-`LRFLffmSMD7jaCNNlNVp+;yq$Vn5nwC zYq%#MH8uqlYO7l$uq(}7h3hQmV+L&3DVFb>s94i7&N(i^?tk}i_DyvQP$-Aojf(n@mBr^(q4wnP{Co}e_&c^L)tr<- zrw$u*eiPZUR+k4N3;R!UWE`4@^4Rx$7k8g*Vp`*o4m!|au59{iAJij%9Dx6viV2M9 zOEx2`Rgo4c+S)P!P0=4W*aB-2Y>qG8(Z;fL##|k3#iyU=6b6=B#OS*UPdPv$qgasgHE08LY{}fA zc5w+3kHYx395WPQKTtrI^=#aaabjph9!+RR!g7w+pe6e9w+pn*apyhB16Srikup+7Ah4W$!96)JQ6-+zBM$v=1c28+J=@prqp&W7VPdSbQE7 zk>*R@Wo|aN`*J#;(z42!soMDAoDzX;EVcU7m&{7uu)b%-%M@u?8s=BOfij_emG*O2 z&L=@9#)cnb*{99oEM&8 z;ZtRQ@4oYVRID?R#7KewQw(gYz0!NHT}lR9Fg&6-ZJAQrx1?W_VfNT8-1k7 z>mZAsyIKIv(y86kCl4D6qS53fXiTEA>EZ}mI?-8|38W4{W54_O3@-`M%O@$Rr)#i} z8&i~qdXo;MK-$`tj6-EWW1+iBsgOkR>%aGAE9 z3t5XD7l!P4_?{DaPvvFb-f{On^IMSheGZBAfgQJ9+PpzcJ{9%AG~I#XyH>Zi%s;MA zd=^E7vhS`!eDo3fJ;u6C(w&r#tPj81WZ5txgj6ca4GaboYx&n7vft0R-f$&>!aGP^ zm_C~*i$d4e$4lw?>pBxRBQl;UueB13uaC*^&KJ%B28~2p?bG!WvcA-I6ET@!TCuy& zgV)XJZLV%kFF~dq(kHM7=H;W(+06Yi^^tvaaJO9sVw)Ph_m`0pEl`6e##gbHeu5kU z%Y7GVkXJ)%Tjq_vFx?GJcrO$CLDe}+UKuC2+ZZj3h{Jgf5c8M$p%nsM(1Io{QHO) zf;GMi2vpPqu79lR#HmQA67pq2RO_8#!Fd0(OvC9{Li-SaduODM#rYIUw>T}NC?o~F z4yK+V8w{;gs$ZXz^r;`AO3FoIPh0Vaz5}6j=ATaGI?P}7K%H{DnlWeDVNbKpw^u`X z$%OUm>2FE~ugZ33Os-?8Qwy2<^Jr#V5KonpwE^y(0tk(W8qUz7!Mj_?A{|= zjROp@ihJ7r25mWaFXV!B>H^Pk*(_*oix_;gSW`#G0zk@iZyTkO(HJdCnU<_!ennJ2X#i3_*pKY}q`b z(RKW72F{$W?{>qLBkQ(Lw3pw!+L@Gg@*isq^Ug3jhuBRbW16WOs+Asi+;yDmNx9)Rc&0cT4ybKJaEUr%5ORO~qJ1lotG5i9$ zUB8eUueFh@o?tch!)pGtfv?yZA7^wlha2jxT+M>^Zk>OF5-@pUnjBkaoY!~1#zV}Z z`h*_#9>)Z@1OAdwhzow3!!-Bm?$Vx%SwQuz&gkJjxgy(mrAhm>ZHa2>&E0jI@MHTW z_VK*668_c2nzN1l1D6sqI_9<`LZs_Pq-I^Bb_G=lA2cEu19I}nyS+V0D=Vl za&riIV;tZ~zAfV$S1RFXV>TFS^$s;@YlV9(hEAW)>i}r2DebZ5huTDUdA`|bhmk)L z+iIv<-gG{UpWel(m`xL6`Hc}_9Q6uAo@=&ad~u7VR~c2}6a$A(OojK71m z!vsG!&FfB$!bh*QCWF{j7(`k2FV~u)_bSEmYXzG2^TiyB~Oy5b_$R520Q~hl-#}|BK6r4w- zW+-SMIh&{$!3<5Xw|HH zEQ~|+|DslK!5}*CO;tLJ)i_d+LvhTI_v<^t9@2`s?2zZHxk0yNmX{4AT3Pd}XP>Be zdD4w#Dmz=Vef3m++PPy=m~O{arB4-L;6i&Rv;iF}kf(!LaL(HD`g5aUGyI^Uig3er ztjPsVZ;#TqS{?mbn56rnQwt)aDuqu*XQJI+8D*XK=kgBhI38e63wlYXS`AHejbpeu z_M-ai(u$WPgmfM0J=W{4_G`AYOr%?;uhBei$rNZ!dZj4knNrc{8klz0XgpUDbZ`~ZVzIq{GjE4aR-x)SFC3UM?(@3mv2>xn=Agvz+?9(9D`}rW#kRa#y4z-`T~TrSCpb+;rcpNu?HN4Xx<> zgYAZjD937yF9Tn5~X)V&1;zlbky|LZiG5->Ctd}l*EgnDoB3-Iq(Y5?{ z1v8oT#TSuort9(PDJ5g>Aab(?0Bxzk$8q$~_l4l7*(?$@k{<_Uh4bDS@z!!wXZ!4Q z-b{#3a9a6jYA?ziiFUyT_j z7D^1CY?eS@0*TA9`=-LTjnz@TSAGq;8M=VNG(n z31u(e(V96EQ4NR1Yw6gu(DK9J+}%cUk22ziyz0bA=+gsog!Nk^!+6P5Y#V6+lhSeHGdseRh;+^uZie>5r+)>n1auO zkcIjz%(IxSDGmBPzU7}j=lfF ztoM;p5{Z8l`LSoQO7j+qD+NU&`CdEK?9-ISyG|!;uJLX4obNv#;`_4!} zK5m6_N{E`4z{fed=O;|3-R2|ZiR{xknO}A;^IFl=eSWl1d3E8F+lJN zuSzoOWc<(Ftu^FT2L;GXDpViJRwyOnNRAPsi(0tVc0{{lCeZEwm?{$5D_jUbo zKknZ@J$zF7czQn1ah%6_9ABi8GX1w(tffqssyL})D=AP4)9h7e<_&10tKz?`#sZs+ zI_Mj$C`?34+}M&LKUu!YY$Azw*3;M6w|*?| z>b%@6LfNlT_uUSg)G}htEE%&cV1GI2!m@CLk=x<47Cr zNQsj8I-0up3Wpz`>dJK|c>-Z}vyQ;DuCwYR8S?!f%a6>sgQzMT#ILm&%^F2~X7aDc zjo-U6ib@}IdU%H4z;&*TjAs!Sb4MUH0c|_&;Xk>7HXL>G74*&w>b+AJQNNHmCE~eq z)^4i#UHO``j=j$MXPc&*3gsP_VeZ9RFC5xNjkL6m9#)Gi z7MpE(Me$uTvoBg*x}TF{sFtlqFyqIS=&*N+xlmNTLe|5$dTIczTM*RAs~n9JPR1LK zJp7FHKohYmV8X=mee-`70RNRM0LVY*&YUZV;+)ibm5qhHl-@%>8s|%2pH4VM5_4Ov zpqZ2GdFCl-KC54DSqL?rv1gOf3#qfqWF2G2dT;JD_>*se{VtPv7>~NV3n9*w1!Hc_?CXvyR-un{ z%8~xu)>i56!)A{ya^i{lnDOd64uCbWHI@~O@3Sme)tBIQ9wE#fK%H zg^>|8J*W4XVzw)uur6Rr+KOnn#6eKV_5;bcyyPoK_3kU24JB?Q+y* zyVp-oan^7=x%>2vCd)51hsb5MwE8ha<|MzON|##A=hyq~lT>$_6q#E?j<=Iqebn}a zt!kEayQbw&zFe_d_4%B9on?J#8ynMEU`t3N&2fCotNfz(G@r&fOy@0^v%^~gEw!~M z!=l!(ILG=ll2&GZHmr_0WD2?b&U-pDP;C8qq;~V)JM2mKthI3b%t9`?ZS84);AUh- z+|1MRXR&>L8WwYG9YY@coLtjO4VQp*vAA7iQ}bXhgUg1fk9dP8iT$Ye3fc3A4;e4L zzFi78M3Y^_D_AtpogsejqJYEYVpuKl;ltNwMWtgM#BDe#2NPS_Gfyu)@ujBkEDCS< zwI^^_CSZMu4#(cjhTfIh)WwJB>{QHZ)zSkI(v1@Ke#;}hX8pnq?}czU5R|dr3Kg>QY?&RzUyZJ;L0|{BMs+ zrXFYUzFepESuE&8frW+E@woF7vMgE~ZQFk5ea9LMHW#9OPB3=KRf}eFezhyUH$HUv z3py`OX=ZbglugeF+dOn0S8CmBlgvEGCOU-J0(R20N|=~WsS(*zvv^XiasPVhg(`^M zu6R8@ut!f=sN1bPKHyqBe0Hhn%+r%p*Pt|O%suiESk$FGjVvVY3WTl7D)??Vb>NXb zj}#Dnpz&GtBMCXW1d|TMFgD44;OYvq+y$?zlH$v*mY)VjY%YRSM@7@8I_91`QvIjI z3MKAm5-;3~pB=Qi&{&!CptaJchiIJK>?GsVz#;TGEMD&AG~;tjKDQZB9s1rzCXsnt zdU$mYGg#u>;1E3T{|eMKc$_f~W*kvgrE#+a^sP>BfyQz1A=_~<&Qp9N@e%H^O@mr^ zf)&=fOz8Jy-*TEUr!-vHjXJ+exci`M!eC=CA}4;z!9JByt<1wKe*gy;%2WgI1P6& zRn`Ajun>Da4fD@*=i1Zgd6lcp^?9|j)R8Qwj>9a&j;{Cw>{w+g^;f^#1%AkITq$rv?>D+@zS%PA{fLWVan}OWgm=xm8=z3GC-|$LVw<{3(W|iM8{s(%YFrJQH z@4-XySlk+3VLz53Ic@^hnwj&s&so>8#g=wBQ&y~~FTswda7OwSjDtd}@zU$^eu==n zyHi9+XR+rodJofCSusE=d9o=YN%NX=ZbG=;&(nfrDp%V10Fw}EdAD6<@&lNU^kQJH+ylv$#<2bZ2eJ2OW_tiH)3sJDvPcv zQq~E?z-t-6erBGdhAnLc*U_Mc?85oz8?dOfS*U)uy8T@~SND<7`kl3^)^)eIMfc^Q zWs5^I-O}c<8{Lm$qo|K0Zkjq=0jKHlevrClckJ%7QJSuExPa>?b zZ3e|(TcBZK8T6}E-YSfxqtfQu3^#t@z_K|i26xTqM&MdAfv7@u!2K4HEsfZpLtNpuHk5be745MaaPHYvC@C!NS&b{-y z33xMuJHBxzH1|B{$-s+{a@lh_UewKlYG!bFvsdd1BIQzQy|Skfy;oWS)TQe;I*y=X zE8|8pD?2W#4NDKNRF%wYoW!*_c+cFg@w#7mGUSKDv`KDE+%hfSw-2i4*nc|at3VCl z@1w&h-mBaE{yQZw_gGeTD~bIPn0yXikL>7Y|9j(0W?R^Ofg7hv(XZ(#a<%1MVSUt( zH!|i}K!}7C6m?J3Gw@bEUJ{JU{Cq>QH&8ArSs!@b4Rp3ZrU`N@- zpLYb$J~v}n7dPFGIk(f!O439oW23S)S7j?_?>XRu6{HU}~_ z7;Ts0StS=|O!I?Eux$tSur`npi>5cs?|JtL<6mL2-$I&RaNh0yhVv;+xu=vpM! zDzacSG{bGk+Ob=z?)V`)zGl}#@DA~9{F%M2PWo5HLx=syZn}fPKiVi+29C;l)e<|e7v7Af-!!TIz#v7)a~1{{mCaCn;iAB1ssOeGGQY z%-h$XmB@S$i-nt&(V{(~imhwO7jZ{?`7ox=@oK1zRE^`A2N1fN46_CYrARzBVI7gFn|pORId1^QxTu^QZ#m|3p?VKIWl9Nf)?kzA_BJv(M_Elt6yR;HjZ}$~_R0QlDnJCHI>2&|~LbtWJzI zU5UC32@NNcKKVc`DeJdF%rppQg@i&q)&kAg)Xg54&fSk#cDX>5EmN;l;5kX5(jY?a zSk~Yic^wxdFy=cl+f!wtIpPttYh$&98B>BMJcyU=NlX~4M%|a9>)TyTml|M9%er}s zG};d?UtF3R$^N}tA_cC`8TH#rK|$q=_G@tl2eKZ@83eqczwPg9o9+U%=TcF`*5+zy zT-+(0WR_-waVHqmyBK^sAI;KriiO0ga!=$6Wl^tqe zOE8*Qwg-*B9#~tfPcBPLtisxPHx_((wuRCiw?fEMKWIcHo)kmGHTKI1!d4=@R%RBqEy%~2Zb^YkE%QvktU%f<7a1Z6YAqxearQdOZOI_A}@R!+GoUo}`vJoh3I%j{ggR2F0*hUR%mw2r-0K>G#fegN36^hA0LGk9#CXNulxTa0r#&Ay@r4j%=Uw@`&RUe<*|v1gO7}e@hy;Ft-m~@ zp^IA4NDdnG!eUJ<^w|EAU#J&@=;eJM$Yu(EZSBQ=w@|5C*H~l*aSCCHv!{LqDMe1~ zU?S%=L5MQlG{Kgxt#DQ+qH8)5N?Z7ZWBjg5k_xb3f)Fep4oKc zx1Nv*!rgtvHtjifazGm>u}@7v`cccqV`~pKfX1V3PU>uc22hcFG7!i=>r9?fa;zSJ z?ftpDSrFMGkgXCp-S7_T5Wx#gk>^5#HCre3QA26h8Oc!XX=tOHM7KvyYSX80%Qkt+d|#ZVRMz;I zlo}-LSTql{*m~{w369Akr$H;@m7=g^0oEw@Lm1<2Zg6ck-!si69~BL?A{y8f`u_Z8 zM43=}`DD0yslB*+*pJH`xe0tdfb}0k2i-InW42*Lj;^LXcx{Pmz>+vKJ+2aa`~AhDkwTn`0Hf9z z`aVhJFnS6!u3LZgU6*HpV<07xU!iSBhn@8dUDPVHuS6ocyyEYL)^2havq^+mmk?7O zZ_y2h4`iGDVm;j&(+-MYVRO3(iez6nwaJoX5Vv|8>Zv-8&Kd*dZSJ38^ z5QDV?;kOTp&xTHJj9kKxxYLfc{`8YjKCxO*y+*KUAqEN`AP8^4fx$Bm%jbcv7y=OJ zafJ{ymdIwC-j^1tdt6$+eIU6fPk@19`v(42wiijG+{qecK_#=j{b*kM=nbehTw;qM z``3EmpO1^kehUH`zUlQIJ*!*RtesgyHB{WWTlDx%X2@>vdp_Qde_w3U<(T~QYmPa2mEYl@!d(6WYe4bFRZ}4#znwu?y5?t_F+)p1 z?M$`b2BYgy4fHqrx^5RYcH;$P$S zO#m)D9Y^>fPVfNAu7A266b?IQKZN-F==7*;jyL!RVdw#jgBqbAcQUjrD3t{8gx{>i zb17ScWvMs7;dH6Dh=W07`=|Qltv9Nql$_jMyk!>+37_>p=m6aGNZo(syO0qTORdTY z&|yO7nJQ->6$_=^ayR&6PJW4N8nnacJul8h^{iZ z>+LGi5f%HB9NDvpQLlTisrz?oaS^&Wt2N)sK^%*O0tskXbgdEjQ&8KSm(`>CW&rfC zH$dYQi}CR(-mN6x2)RqD46jRqWFYR1SP zITZRZ0%?SU<UmCgH~Pw$my*%q9Zs(t`R9+RjqT1IJsmJjF$XC;d7#C+?&x;4 zmM1E;Z)g75tKRElwCCLU9;n-vH7*L<^B?Tzb9-h(GXLysS~n^*)?ASGClyv0S0;o^@5D4)ig5-*b7|=9X z_sYtE_1Osh!9_%72-Tvu@FGhIk!&O@63+3DlsRs8Omb^|+Y%tQ>%Ek+7mmBkCCbV1 z6SM>Fn^Dx+AX}P5|HsC}%Mcb{stljrD%&@_D+D$mD^S`k&APU=m9-U702*(y# z4cw&ZX3AIL{i_9l(=9_^M~ZBO!GdyIV9-cO3*O8mFy3uHO<20^MMXwGH=|p62b7k2 zhar(~YIgO3^kvgGT98dJ`%|y7zBs-578t_lJaf+FXsihKXDjKFADDBBZOCrj zuSlt0?dVS^=@^{I%~X&IIxA-PVtqkK(6s0;)1Fr|`SeJENS(;85aov6g1Z%#tNMBW z;fN_IOYQd~`(&%{A|@xH2a`nKSX+HRvJ<$&aN#9&vyTqLE&4k4$WPA(5zo)@bqCfu zf=Fr}J!h`)Nt8a#1dli41W>E&{Ydw_q@4wF8C%u;zti%XOnk2HF@r5VM2SciRuD@a ziAD+vokG4bh#u{)*)R0u8Y_sfd9~;}VtZk6cGX1IM3bMyZRXb@t}LZ9o~Ga;NgzJB z&LPqWivUkPeRF~ur9gM0mgWiYfjTF;fE1mKO{p*Z#NV>VMY2g!8_hT1MstLy+76Za zB;uc~1{F6;El1L=KJ0ELk69jiqhbzXP|UJCB-~=0`tVGIlX>6mX!+>#MkU@%ZRxkM zlopjQ)E7P!SdF;vo$aR6ck@5VW#Pla5#Z}&T;=^s=!E`s97|oKXp3-U*I9n^k2alZ zjHuHmUfexf@ubA$Sm1Rb2Y+o^CFvpK=t!~Dihw%(kkguh%xz3TGEUFju%Z|3qSE5A z3WERd!T_WXPK2urkg%bBZ0_DoHSN`8YmfMOtxPSBM6IFQK0L?V`G*F}txPP2dhCLH zCDTWrv*$dI?sPXg_UzN7I#roO#_$lL@=c{#7iwpV%*vavgv5#`r6+j_%er2FjZ&^o z|J*j0Egn?tv1ps-XR6*&zd2D^eud{g`Fa59H8#1-Q*^j&!zEkzEnbsW;}!rl=ya+E z1u(T388oqR(C~3kNvajJ76d)j8JMSAxYx6UF?H#!4)=1;q%jqDnIpdr)hPv_R!S;$ zHCWaJXesSmmK@+^+XQ9pVbaBfX39*(_z|3y*M86YJi{7ChUqqDu6XQ)Oy7yjYZ+8i zJUd=$vE6gJ@}deNy3=pWyhKB>ofmDwEMJZfIT%H5-0jv&BD3|OI+^k&^EK^8S;{YI zim9ByCD(2v+NiikD(&^e;s^mUWtPUFh`4Lkb38P2-;qJr?6hqyjgLW&c+ia>ez+kA zO;7O4;oYkE6OEsyG}6Lbd8kXxxZ-!$&t^&G(?i1bFWZo2npBedsV*d}_BFy_P<$2d ziS@9|w}znucFzEa`aSTxZhHtvXYu5t0bpG%Ke$u&~W4 zDJdcn1ZFB~r`n$G+7pKAG#1uVeruU@vB`_h#N+obM9Q(?pnhIo@f7Ny0Fr6rYc7Uc zs!*!tW~5n^@Gi8-c?XQ<-M`7Wc3ll;|CUDL-ub7U!)XfDk<>q*MPfTId`+146kSrV zvtmVGF#o8(E3l^1msn_Q{6gzh8wbuYXz$P6r0TtSa#&r3ffv3%J3igZmHMW7wlQ(j$4&)||$=zEPu6AM9vFwXn^ zYl;5n;}@r}Jjh`s&&d64umWjXia7mw=L4p63Xcuws?)T|jQ7?HKa`iO=%(t{S`bLe zzgPCki4^Y?VD*(Mf>C~lEOmYlY)_#FRjlu`5cugiz=LwUIiJ z*J+<}vVJ55Jq_wly4Nk(W1m}paCA#3i-v4cBj(-V^2)w5te~L`SU|9p88me{j5}FX zvoG2KoSgVDKT4~*PR*L&mPZ^<326e;iBqsg7z#wl3z1&+7ax)$yn(7|1JS(hj#Q6L zy;swD?aw9G-}4)bmXmDkWw{+43i6V0a2jjrZ*Z}3XeG9<(VUbjg0=I$yur+6?V2+z zDaITelWWaYeopdwC_H12RyT8w7-LYNlo=JwIIu$HUixX1gSLw7wW?2XfEhs_Yb?pf z0GJ~7`$U`owg&54;{nR%Wzj9JbUvX1M|0cxK!OrX=db`P+n&72CfjSQ9XG1A>Iwn1 zF5AT=qNOminBl@wcqbX?c=a4L^T)TGZ(vN@cRZ#w$>5a5hT=TuqFl`^TRQ4oZu z&q_$V^x+;mrn81}!M!vNMzMaBuDvc^^v}v3v`Tydj(9E`+RIyTpV9lE{Mob_S5m^H zyBxD}L*$S_j4{Khjr#lQ{zY^54kn9u{eIIAhD^ooo{P)CR+;ikFl%{k?7Kr|RzS(X zYoeHHuQUJHjL@N(tNx`0BY6rbb7PddD%4x%eF>BNR5OSu)Ez$(X&sl_W|!z~4ci(m z3h4Yf>axx}>`RNRZshj7Qc2ZcYi;Op+1&IDTmPv*16Za;SATeJCbAAPX#M86S=a&Z z{BzZ}bW+Zl@fb5lFXZ19q*SY_+15s2v&Bupr(*ezCPSq~=KT5Jr5Qm=J37Bin*DyW zZS<$c%1ueoo-Nm5X+xZPhdg_j2lmmK=1ndxL0<^Y{2zEF7amrUDeTl!zmps_IahO=@Oni3rIVQTZbMgj2XmQ7`O`U6STxTlq2e zb}*c7O}0qD%;gYvFYpWXT!~n-D9t_+_e(^1DU?JD%zY~*&o^n!+t*vqLYGrq5Q|Zb zJgObYR?Q4E&+o0XD2rY!=k>T?iOF))^1JGi5_y}9??FT&vSe|fnZ0d%0^tk)TH#@J=lVdww*y&1}2X^ zVXqZ9wc!7>F_4EVy%*S>pRLC0tllecUf)LD09@7um3f%o#9s0QWk~(0OCWDOuIf>A%cP07vRJ)kgHS!x;rL{k>-O-vp za@l&|G|6I@2ugarT)3{GV_U|sxsFX6ZB}96xmZ(JTbg=VdckxWr&-phizKnh*1rhb z|3zGZDlkkJ=x|&ajjYk*XMUC4*E=QZ>ohuSVz<)lUEPCI(fQ*6m!5rRj@7p3IA}h0 z3P~S*RIbv%ovMsmer4S$5`?ix;TV6Pi1vCW9oJMdpPc(I#@*LhRa!2Km!u=<`@E2S z0y>h>_xc)7?Jv9mwPa=stqc9A7kf>h!WQAcP@J^_`5!x6MJ+ieug(uTd(Z( z;mMY-k5d*nl(g19CI1w|niXr95wmcjOra)41Ub4{TW=g-9zr?)dg#LnBBIPbp4uVy zO;v>F)ka}V;r_?gUi;K>WGO&JM7cqNM1&$AUxZqTB=? zKX*SjId{B8RR_BM`d@B{X_w-T9xk)qFn7g63mLp|(C}Q}XsXyeRe-yk=RJ7;d$z|$ zrBpvxEeosV)GV4sAY;SOjez)Fd?>{qCp}+J#24~{g^;0EkM!v?8bN%LM-7h;o@_1l z-;NYyWhMOWz8H*!$;@P6|2fU_#J&6jF7{E7Fene1bE5WQ?Rv9P$`|f?;=mZ!oH;4L zcWG)X_~fVasqTmB+1}y26;DLWZ(sk^t#L-gIahdRB6Rb-lH9_J5tPk5Ckf)`$0Uf= zH@`(KCSoIel|HV3fu9xX4A0n`iW_}Vz#JcDR2bp=N|cq!7HopyEb;|b((b_SMDG|+ zsz~#E^lR5k?@yEsp`3vsFPk7|_{`#`bHW>yAf`8-B%(d4gI~w{`l}ITi3+|?gPEH& zh9V~Fsr4_U?Y4AFPp%tQ*twKg#y6~lW?HP70Lk_0;v>=Xc}LM?Dp{XLoy@NcVK`T< zz4#frLWXi4MOrt@X()Lj5JA3Pa|Wl05PNoMYG-KXCSf9IX76Np{0&?T63<4(nk4%R z#)J`}5$$%w(59f{d){I2If)76C_3vemDn={FhAlFcU#D+Z~Q4IHaM2!2#f9C!>%5R z6iLy>!HQckJ%i;evGMZkuVVR2?@@>jvH`G~xWqK8%uGRccawTu#^sZoqWjQ-T&YDa z=OUNn|5!7^NBiL7nDoGX$SH9AOd@|-*Yv@X+?&pnWL%f&O`1REqAzN|K~pZ(GG6^? ze=iZef5$Cg-?(@ER_qLq#NM$p8G}|zmu?E5Tz@&OFO-y( zo(B#ev#{=8wo5Fuz{H4J522u@cHv8MN(SqNmKbG4#f$F==Ru)zI$QDg@x=Z=UV{2HB=;8O=&LcBF0C1L_ocW%weO_dE zMp|0lvKj)t=&22xeg=6X4J}~IWT>9AbH&)jIEmBn(T9OHdw+Lh-T;=(<0}Ma_L>{t zvPgvEr*8wd)u3cCpD^8!_TNk}eN*85cAP@vYF}i2n6UwmpSU?)U9_Ho|C|%gg_LRb zH|7VVk^JXR{WjBlst$Ie=JUe?UD2c49MciKey*Z!`Z}WRLsJKRNz~3_XXbwYL0>K8 zV!lT%=0Y_imFQKsy)Dx${t*=(E#z7fcPP;fTGi7VfMn?=6)(RHsLM5>KS~&c-scNh zUA0yPZ=wBh#v{aHf-h59PGlo2x}Q*y^Vsq?cpdZmChF^Fohiph+~O6m5!3|OHl1EV z#5D@8Tr@_q!>F2o>Oemj3|+iK}22A5vN>tXk@rwGa1~+i7*;TZC4|+hb9@{(_9| z)WR-s1H7}|Cg&<;8F+@r(0_({4F+^Q8RMV{1{jX;`M#18?Y0ul4V-t+YB|Kk?B)~& z57vKHr?DfdQ^K&0dd=|I zTV4qsSTj@2esZTL|GH(8(>st<_tFf$Wt`q}xje;sI=M#d89?UQ5c`OvR)Fg+ zgOShH2WpMvGTT_NEr|c9rU25qc7kB)(|oZSiy*)ybe;4uLK!L2D{DNW62`dv|wR~Mevb{zGx21^mA7C)m&5Z^Wc*qQp^ z^b)Hx?mVbYn&2KqZ?ybE)R93^W4zb%2t;BhL5#$TeMxYr{Y06uVl4AYz1X3=At(ZW z)u?84-M$Ip)auIgxrXOx}twh{3RJD#?>4e4Cf1E6(4bC@9$W3Zms_O%Gms*M>iK^xAK7BK5{pS ztnA53S9Dc=E9NU|l@j|jVL;;J>E&9Yg}U=9!D+?sZ~^v#tdT6^>RN6p`uqQGQR4S` z|2RHx1d4qpV5R5V3KlqQu9d^VUi6f}Xh=XjM=aKWqU_MDdp!iXz{)u)E+!mTuZF-s z;A22S9oLN$1NeXUc<}i}lxBC?db=#lZYY+;(w&1#Ecj-JqK|1Tz^NsG9ndoZbR|keUyKJ z`?#FDs8;vs#Xp}AAA^W}xg|XOkMAy%2-ZFEMmjb)xk}O(+l1GzU-z~5S5;SMffQsc zDHyZ8jLmi&RBP0ak70eg`BwtOKoAXz|F|`h4Y9e`Ea!(u%7ypo85t3&sj0r<1|PTa z67ITdNlaD-U2R|40q~ZNw?jWjgc)>)?b5nG-6l~jvi_XH{^mqkI;U~_MJMPpCqiyB zgdoj9r?d?@6xExnmGz(JXF|_{?tIt4oK;j(y;GlY5@auhcvtS1!+S5C{pkd=dr|&I za31S0qiuw*a3=eQN=9g~_m_XM5QaSU9KLJvxUHh=U;cP7zqklT6DxVyQ}gEUg!aPz zDkLGpX-Go&-hXeU_!A_EUCS=p)NVZf>2HK^*i1EB(HD99E2pZAewFpcCpwSlhb)C? zR5$dMY*VS5oue)861OolG?XkUGWEoI37LN#@$Zp^Q|7 zhcv6Ui0B@}4syxdc_MU)(raryP!3*&#ZI5p&)sW4B4m6NYeq>TyyE`Bb$9k$$>*TZ z)vpfhG|MN-L_@x%bnoS~+>iy8C9UbiGyiS`J_h#SF(*(O99L-nc$L3+i3dp>k0szw z0VPh0AL$+@_yluZ+T+}xN619_4#~T(%`y%mxa&i$F?JzreIF>Y(Y!qbx%cYTt5%(t zOvd2dg9?f*LPrC6BZthGZ%f~Brb87X?l_L;dZ^)?xt&n zP9_8t%r+fAdgKQX& zRHc{p&gT;-Cnu){1u{^`bNfD0=^5=au&|wN&~z&%?pYQ7<(_V+|9Ld#^QGMnGu#eR zG*BCz>5FqUFyLf|bgV9e%6>d!C;WVSC3vtHiU|YY0iD}udF_Zhvtc)Bm*ZdVx>PjH z@PF+T5uJ9HjMCHA6Ky!*612XVW56{Jg9YD`@Gkc~;P@^S1Ie+6IByRdw%*jp;gRLJ z(6lx-5;T8>CkcVLm**RuN&{ECNM8j69eM10`jq}ph#bOG%elU4$LmrqU9OOApY&wv zwxx!1W9&jseDpwML$PhXU6s9P;`AO4fsv-ocpv!=m=cYvO@IhgIE-jG5|nql@X(0Z zfHlltiZ*XwrsB{t6Wd{WBK_v^OaMDbJGwP!&CYKXjt3GZJ1Er%iB~|xkjnw}p8M>j zE4Tiy9U4p?j+7A4Zk<~jQyry@iv?*6di(B}DzF_(_`<)1DDWc+DK2Fo zCed%+{6Uas&R&m5X`blBczsZivr7wQ?q%~ud7<_<*F@0EFxT0%LtPaIn{&!4@$ zy<{N^V89a3&lMRjM~%LG<|)aMvx0X{jdM)2`OC9n3z^jm;{;miA3xsY=H~WD3AIpj zd0#OT?{{l+9##Y1K!?toFr-Qb53`&j+31-QT&D>TX(aeR3c)CFy2o6%Dv5lG_oqTU z%`EONs+)V=|HSWCXnY>2rJw!xFa`-7dl)?nPeE#WYxZZocA}R}Ddsxjy<)rA_ZSS5 zC~W5YJYB&LaN(P6ZrQh}08p7$6y0Bu;5pNVUiN3R`kUJ)O?3?$># z*`(M$ex{i$p(dy!3}WSsqPw?20Qw2Y_E`OBH#LD3R&G$vM^w~Iwc<~%BYI<}XgI&+ z-H)aLngpU=d~V~rlDx9=Wp(qsw+QU}q`NCan*&Uqh>_dFBd-1U@g=0(LGoV9{ho0| zX)i{^#k)h{#h(NC9lL8}-A{;M>^f@7=6e45#4B=$_yX!`e>1>3Dbx%$xlsLLN&M;5 z#F@BXJlPYa?A6-iFbKRniMgJamxpY%mpE8tRCmXeK=UDAQa|0?d@n;vO>vqsZp-vx zbtj@8J25fQQ0lZM>GGC?jcr3iqx#6ez+e@+F0W=WD6bIJ`;a$pPPfIC;p7~=_Aat3 z2?_IIaF)5gzN8Ftg1Bul2YC**Y_u_l5YTR64z_G8mn@4SvyJG-G2_Xn;~2q>4phwe zSAmO<*fuQR=mG|V6j(J)eO-HB2=np@h!VCxoPkdu=~6HZ!uV1r2V34WoIn{Ed`1Ke z;`{N~omrjleb-{I*tc$tQhfkK%y*(F=bphJ$f4TWyEh2g)sCRi#8T!7`1j*{e0-dR zS`V*#|M>(y85}3660Ovq{CWK}a-0qS{WzT)z_H~6olP^>;cd$wZ7W-kl{F1Gk}x(M z-^yUF7cW8$s|={aRA1_*K$`A064o9s^K;oRs1%KQpG^Si?Q|(bzKStInMS+c4Jc`v zO)mFAQHmrR0<+7H?0KZZ($+5{6PM_MYM-hHgaY;r`#0bKmqBBaRip4>Za19bu<%43 zaoN>)h?PIo%pl6ApS|q7K64Jd+TJcKm?3U!5`Fg+pk1bp-M zEhRUH>b2a0Ezl5?KiuuEa9(O3tMyxqvJ^JDaZQ~zE+ZqOz%m?0+v1|Vk!_XJaGWtB9qAY zoPvVFk?*mlra7*Wrwb*CmVk!kFm3!0NJDul=Opod>(^M`+r?c!+N(IqIoVc8^pxa~ zHh35UsE1JzNc%n~Y6Cxu_Qx6I&L>}Z&qVe6z~i6a$kVGO=pO;4VM?O=LVj8tAd2`Q zr;#^3(3$;%RR25RL}|Va2^sxh5L6Et0J6CgP&gLOKDfna_;`l*qsSPD;n$ejM|CGx z3@KXIQYtrq&a3d25k!p59~dz_p4Seb_j}?svPFktOVcb-3g^&fN9f%LpZJ-sZ|!!) zOh>t_N}!iGU;X>NWj!urnyE=x{uVAN_@r@`ye51v|NOzL*N}j@g7;tQKAf2iH18Kt z5SeuIiW8)D7Xdv=Ntd62dDp4qfZR8aIL6`kuLA|}9JQ9?$7O2-@dH#)w0!M{Km4xz zRBNmy^GOM!HIvp!*JGi6kRNnzIfQ}C`OO_*lN*5ce9R&uYsC!`X{}Cb?~uwbA|foT zr#{)fVsCI+qki8~+r>Kun6DSt~dNEdL7_zcR7Zg-o@8?R37KJk|E z({>ka-#7^W+5KAdogPq!x71%c|NE6q|9xdQJmUY}Wf@<%vM)2+ul&8TA;^`L_-|L1 z9ys46E#T$(HF=8EIk%vvQjXnEPG#YfqFhq;%gW`kNRYc64@rhjU&c z$TVF(KOrX+Dd;2w1iF2 zZajgQwXR;1BT9y`i?~W524q*SU0WSse*3pkIKE5b=a0J#xf(mEs6U?|)rR)<*G2;G zpEHyZLt6jafSAc}{)42QCzr`zRtQ}TnXjZqJi`~5o%PdyWTDx2Q zmSWUFI9)#!DEBeP)5zl7VE8JW!Pec9zxp!Pr`rP2bZ0N$*8ASju=0M;O!GYEx}hg9 z)U!2g+oZC8J`j3$?Vld%_t8}SKAN6dsp`K+!w*Me^(o=p-=k4Lj>bpsKOar|d2ve` z*Zp6&g=84bIp!iXtY2w>q~)8~Sj+dT^mQ!ah%L`?aUs-(GXwJdm!XZY&HG4>7}Y=~ z7D>=zfyU`7q}`_t!e#G9pk5(P4oVnH2z{`+P;>}wYS@K9fvTKWibQA*)ojW99zDWt zF2tv!e0z*gIr;(g_*$U4!y)+FHXTFK!7^|h2#kdK2S%#74KUJk zI%DiVrCz)8H>?DIr~si=|BDa$-b1=_sr>Fk*>~f2QcjeOsubHM3xjJ~e3=vO)~ZD~ zK0_efZ3ee}tMAYe#7w>q1pGi^f!5F={OQSc#Ml;ebp)d_>!Zseaa&>Ne{|I-#>wzm zUhYNo-pr?o|R98$w@&bfdXdLlkQy4q(AcO;1pgsaTuEXN`NdIoQaO9Szn&nuwnwE#yk z>UM9OM@Y4|>#UKb=XJ1#1h`!4qp8>Vke#-83-H#C@vpzXyr65oAFHqC@@kHqPUzEUghvSMOM4QU((fSgzA3Ki1JgKsi>^`!#%5Bz@8En8&vcO#g zQ+|oLyz_5QuETIS(w|U^N?+R7VX9%-*p_ta zX_S_fWqjLN_ACGYhqU$^;+Br%Md*aY?_HYnV>L#)^^1p| zuNG7s{4mWkoCiaN(+Vp?DQ%Zz!+ZF@&6xJ2%m_yETgFUGO*#J{?xa}9I%1N+b2nBFOu<*U{zm-UFwZ|n=!S?z1zhPRY5Gj$!bN=~ntK(#M=RYSqJoFyT z+lYuJz%;MYY7n#2K9AuwReF{uGB|W{9o9NRV45O4(UW`o5NT!-MBjjSo01G@CP+|0 zjiNt!oZGrzE0UnQJ3D)0TC~u68}paJ=`I+Uv$vtLD0i0y;dRMa=iumQ`Y%62@=u}X zH@CoX^__@IeqeDU+;L{$id%OYFYf@K%eqsF+X@Nq0N>GnkC*4*0n_rfC@PO8sB>9> zW^DV+PUnU^hI1LSwY4>7Ki36ko>p+^ddhs}Pv!mRwp)?Q)SozrROgdXH9&oPwkxHX zP^fQ4kAp?4i}la(1b-`D#Sxs$-+0kQxRx`KC3^!mv0v}WP9tyo!(q}M+t&Lf&i3&X zD(+-|Pd<(a`?_-0^V;{{7@im!+-+shJo(d{t7eo}7fxDVdjIID)tu+W7zZ*Pt3XFn9&WJvD)VZO0 zd*5OKt#bfhaj~TT6#;r4r!3~gM}8$}OVJK)k^c+AxE36Uv5T)D`(!k`_(X3Fx!;qA zVp~M>FW&k8gYuiRuKaMDn|Ocb(u=a5V&(h+uJS9KgH#t%x>7^?-X|_AfbmmE=>WP# zw@+`lhxu>CLCX42Q-Q^_>-u1DWM~xIUXsGQgfE^G zXZ$z58WCqqztXf#EnC+PPhr((DK(ksL7we0#5pk=%@214j_78feDjlgDYG&r2ddW zaVt?!QWjY#p;2@&wG{?ykMfeJh7$*XeQ+RR+@}4zSeQh63V-W?i%K`_?Bd?(drvjJ+MC2pJoBeiz`%NQN2jjj z?drlpz;Lx~h4jLk%hQ1iJ&7fS781#dN{ZN<9~F`?XBB($_b7VKNdNs;UogFB&$ebE ze@OP_2geLkZL|@_Kf0*n>>+2g3e zjOyt!#x8u%b;P7m<6PD7XnJytoOvpWMtQq6E+B-=hVyVfg`lu-14`KTbK)8G$chZM|FJtg2&up-A5%da$klz2kX! zq$084o`{dXJ|D`sj!*iRC-OUuF%4l>9_8y+tuYH&dM8uFxw%;c>@Rv{pDC30?{?_t)%8Qh)Soj~-woa>N7bh2N5eBk`jh?je3^y5sYSToupgdW5e_?@jwha~9 zHHe@8_io`yUdgoXR7*}tAz{@V_$&j-6!X75S0Iga>j3(L`jJ&9+#yEcurwfe5q(4F z+ljWJg0j0VBxnxsW&$UkMZ2fj&KoAPknE;~D;lAUDk*DkZx5D29LZr;A}3R(lpyOq zlu_^IiMZu7Y9Fpr^UTub!x4g+l{G}TqRl&(k>v=)och*WXxgoeJBu$w3+b$C$W~#> zgrkg`NKP52ovHHz=3aa+79&Ic;BbcjXQ)HFZ9Suk<`df$N(!9ke2ckG*%`p?+x8^J8HT zLA>45ex~=PK9T=SxL98_UYoD0utWaN-He#Qix)hX6*QRs7INYjF~9GkuDwCQ{1n(C zQ4Ad%0K6l^7HEQbr_l?gUWC+lq0W*BN3_`|0ox~r2!~V~`ZR*p2}sR&W9}M~Gc~}& zGQId2IpXTBVg3EmIX~l-UD*G`OXjDXJsoGcukhfA)q2}MkB=SJB$Y(NsimqnuF zeOwSQ;flgPb@{=ED_})!r}O@wxc?hR)54^6wWcDE;jeV!x%U|$faNgdYZGaLk6%b6 zQSY9>zVPQTf+aC4%UXkI25z>9lrml2d^YI)C^_}%uBa3T=-8?C;QvV{c&G7}*mx`A zJmIEMxTSa{}ALzaICxA*`4sIS!iWfS))eh(z32_aG&t}=T{-77Kw`D|ODeT@x zR6(^sw|h4DGjH?Pk4hPD2rzz|Z;S5E1Xs@kv+-%huiDz$b8Ds78k?FHgL9OZ*XC>y zt-LhD?$5IBS|pnwMe!D@D{mD#XcJapSm@Ic^;+fRvBfdcQ?59aswJeDoK|Uu^ zRkmao08s1J+A}+A>r0m}_do7dB8ObX z;*{_Ar>F=axVX2KEE801S`pF}{Izap@>zysp zUusB7%e{nK?&V*&m+R?l{2j5wczr8?;&}DS%29Q$ z_AyzI8}@Og_{>{>PCkGUQt%{XG!CdRQ8FmuYo#8Y&Z(C!E#3COwrpB{?=<%6@&c$( zNAUm5!kU2}k@*=Ao;9vw7y@Pu!rHpMAIVMhfDS}CVy5i@28^#`b6WK@o%UePt#=i1 zYcXAe>>38cHo5pBd1*`cUoP)&*<@G_(r2utZu~(uG;d#Er=+CZbf(sL_G0?R50S+x z8|-U;vd(8Do+Xv3zt<@^&*Tr&VMwhB&Bre|A{_oS01V25+;06utv#^ z?kN(=T~E32n`|duh1J)zm8^pCh+W*RN7v^&pvhD7T!60C_4gu0zmdRo{Axg+PZ{Vn;18JPR8O6B35wgO<9e*VZ7dO}&`KyPD>IY( z3L;48Ttc$<5-kg@HiP;1o`K849e|iqiC@?VX7!E+)x+I(UctE~fyqpV@VSZ5{;0LZ zi^&O{ehrVX9LW6wh01QI*j zuT3mP&|t5$;8u?jMt;LQg|wexk<{*%+)~+}T=zC#Z=4GJ4z}?ZY&Bhwmo01A@0MI4vLW76vj$L2tBeXx6FS?v29;LvM^icLLRh&Y$Ex)`Fc$m>J;#W3FSCD~^B-pp zFO4F&roN^@KbvbY)zH*gf4wrv`$(P)#n$3-XJY+bF`3{`x8IU~<3k;TJEnz8r)&)B zXlU%6FyZzrYkTrK_tgk1)bf+nF)|H99jiBWT8%y314M1#0 z*cc{uxx8^b;HKF{o&d#j%f6X}4?AD@+s?x>f{0-ebbm~2?8Q-IZ@(}RaJ82f*RZ$LBDWKY&{XL1Vnwr^IelsX_+QQ3#K2%H(&y4U$TgZpdnmxaIKld}p z7f_pBHT&DH{B8Srslf=>FL#|(s@ZuJNM5=H-@2~Y*tR1kSYjW!J$n8A3yGkqMR>=( ze@+w&!r^$xXCJNWpC@itZjSC&vVU;5Rd3nSVnG1m0zt7BgZWq1FqTC`5&x-EeJC%o z{)L5wQReko?HL1R&Lo$A6LEbc76z|}NjngOgNml6&WcYB#``zC91<^}Y8BP4tG>gO zfpg0FnaIyjPl7BL0LbhF=05Q?pt`Q000`7YQvLW-T^*( z?u!?swF>rRIqz~ndZK;pUVTZ4z&A5K5Li6W!xUvoWn**qjxL_3dp%ihy=vFS+a7zO zq!fDOBOEIZ&}!>wl~sSNpm7!?4mo)&_)kFo z+UQbkv*1sU@yEc*hJjeB{p>*c;=1=+Jz74;-Mz63dYXS{&v6NK*-C)L+25jKj;&hQ zpOza)zmKrL606ItqWIPdIX_xDg-m40!9%9DtP%iC2%b&)^`iHS$fCHbDG4N9u zT?e@B417y30CeT2)!>5n1jLvjst^g4N}PHz?NzD+5HJ*qy+?))3c_BI-o0~cYz5We zFJYkqNE>WK8XC|_uo%2wlSB^+tEU5eX!pBBpK?9N7HkneV2wq9r>W%_cKOK^c89~t zyJLVO%RRp%yzK*0T?#bE>bHEoKS#6gsFH!BUuA@o;-gmy?C0G5H97 zzqjsE@e|UYXQzd!pUheke;&&#Wc#Ctp7}d>iZ~DA9)8Tbru)x$JU(}YhbQ{A8ncOB zt*zF~!Gm#3x%1!SF><9hy;1%4GxGb=vi6kNG>1EwtXUuy^b(YAf3;j9w;BO5)P2~} zbbtRJuid7H2oGI17`P!m43&2s?7aoHcIyN7# zQ1NAX*{W}*&-xpH$Anoh>nrwr0SS35a?$W3>eAyI!h5r^>bLCm+(Fn}3Qi4QjXZy? zSvsiHq)nb#dsn>2V0@zhTj>p2eghk<@=Rd`wPDXNi(fC;C4#%W{X2sA_Y^z^hy>Px zDhv1eI*e&$op2tP6Nm86H7$!rD1T;+Etu>}Y=cz!+4u1uA^MLVahlS^Yv_E~ef!b6 zobd07cuG1;=M>%S@Ac)&7Z=TUwGO~;*4}R=|I6w`3*x=qgmO)sQcA(8r@rYKd5ma0 zzRW;iSY?wW#lyoBhrxLKIC%s%v4B26YKlEP+!?J6-g2WBHUPNFa`PVARoufwL+?`r z?`vJx?RL*2;f6A!0Q&F-)PHaoC7cY}5tIi0(A#nr`~nD(4XBK}ql$x#j_6h0xOf3T zcLi!1q(KJUj-tG*3^w(+eFYJ`BZvg~!BLPYk+Sc!u$HT*p9av}vrtSRD6}>>ID!b9 z8p%9>BO-#1g`3p=S&=_%V!ysGPW=hZ-FuT)3=g%rk$eU5C)!yU8WiIjSlN^h;ru^G z+C7qU^X6Z2^#yY>XcCZb?ITyJY$MOjn%?TWxyfIE6fP{V%4x^AdK`_bL7@THwPpSZ zEwHiRCFvx^m>xYbH&+gx(BsaC;ijke%6JjXYx~oW@gT`**;Pkgz3@{D;A*2#QUb1r z%$dId>;GJNLAU*!uy`Ij^VFHY5hy=5;(s>6{vBIAh&Si$?k_C<;t>D%s0;hiK4H_> zUyqcZi{|eWhJ-EJ41{zCRstR0|Mrou?|m?O_X8aMUOxZ*_`lq5@H9ndkl}ml9V90G zd&^9E@LAWi!0G?*C4PR3h8oBTwx?;3|HOa&Jk-EH4ABFh&1&-e3W4}%?*8rhsv&52 zAQQ}l`!gW;m!pn-UCMG0seZMjP4&NeGL{7zy14a^nEw7cYzAPsW-cp9{QJy5|JKWb zP1psOf0X_;Nc^MqDJ(>nO*{F2%rA~#&NgEHpTGHC!oF)D=X3o(yz9?KKc4Lu2u>>X^Soq#SlVCs0cdc)hsyKUv+WN{D=wvQP9&-d{IEorAqe}+%~<2k)k*ksVI(C6bnbn}-rBYPdJsqDVT zI=?@S?Gu=uypdCLPk$MK-={z}QwO`xHU+4E|4h*qeDoITEhhf^1Ad*Jr>B7n;3@F@ zAM5bX?#YPob_U$dP6R^sUi}PED+0w8kX=`+0eYcBwl1aEuE zwOj1*@BL+C179!uf#iSmf+Pn!G1o=EFkSe) z#~=D}6MRN>v-JG$U;oQ+kmLk{j>L&ZF#U`b{l^la1fW;U$b|O)w2fnVpy7`D)QI&T zdcudbNX?2r1A)Ij{y(19tA^d`vDBaJ{$P=?e8CKohhzUhcYd3`zXr(f2@PlnGY>lY z!@kkI0W1=$*)+Th=J)ql(mm^KG6w5(ccKZyzd+4ZqR>;iCZ7{{&4cM#bYf}|LCkX7zV#tn*74T#IYLjzN}+Fuu;^9 z)+=T!LR(smH`f4m`Ntc}M#}~%YMLd42zPe3#{huwm+tN|pJ!#|tH!+yD!;8o*|S*d z{yO$U@qcvv=b|p+mGy0?{4|QikVdoMp*2MtYHBnKtgUits?-=@y^?<+iu|0YK36nv zyNiv+B&Kk?yNkt`K3}=|%PV5L7A*;0Gj>6*g!8wLd}RxPEwumpf|~=Am6OYUp!rbg zVLVMTyjW>;X13|CWe}pt#?P-M?({kSkenGdwOnL}`u#z@x!^(LaTyVR7Uw!Qf$p(UH5zOq`@uM{u+bmyMBlM8$YlI^>R# z`)kQ(){`u*-@|098@{!-h{n`-ye)($CMS!(1!T~L=Mo9z`|cX~H!u^aPs0RR|8v3q z7@Thaqbl8KgZSqS6Wh? zzxIp6&R?F3-!Nrnk1Xlt|}JgbqM`I*$h{hzMxg@+G?UqiBowCe{v;7|aIT5zFMPtHs0u5yuK|jT^l$Lb^YnsBIU`G>231q3XMN8SY$iTj%`tlq+4PG}zZ zhjAFs>X7&V`J^th9!SfxeIZ;SzNHOmStJucqsi77jbSTZfLQ6o^O<@zO`5r#h+EXq zj&LzRxq&8med2BQ(J`%Ll;z*HJ48*k(beV(Z{i(p6;?fQU0h7JW_0?m1he(|_oBEzQ#)IW(J!+Z4eWiK&x=k7)l)ozzSEF2q|c0dj90W)4q(*07s z=b_d-gCtd#l~?>}VWeG`{$U^eRXNDa63V$r;X*gCKMpfuk>Jh|S;t^tN|E80) zJ5d|g!<=1meb9*M$13(4rrRLxqtn5~+~ijLH|G1(sQHax&AVM&@}FjbPJsQcBij!} zld!mihl-Y#)m67F-hiLX)O_c?uB%ocOKw_C3i*^=8q-#S8j$@Uef?kY^Kp0vnj;OvB&q-tY=0cLxsPLZ!vs(ZKjLIU&1*Qo3X0 zrdg`74PLkPV7NrEc!P&rwpU}$jpOpy_Yz=Lc=DPGW0z?(qpSI?Sv;5xPJlo-eY-1FVzFJDUeF1~ZJ~;k) zvkM?p!peRw?7s{fb}ZgrF~ZB3A8{Hx%mEyb!DVOgAJiaB%WWz!&Uuju5|rDmg6b{o z7Rzw3$905?2b+1xxd(Xd>DPOGzhBRzS5*peSl86Zc0n1?myP@KjGDLcEKN9=z^4rm zYzWYw~$|!pEtw~F+v3s$~Nx+DzxLOZen*@PG)AnzsL#; zPrny0xO5t`1}iV#zWrQd<2(M!BmTlJPF{009`37#=*?jS zx(||k!8&~8vhs0>b;9R)wlF4C(gCB8az1a5?jk24y<# zxbzIYtnC_%2jKJ9!OBg2K#5i?(=tpQ9V3!WJF)Xs|ELt4sRzQyWi$$w@|>$ECFqCG$1(|d+1VMPE-HZjX)4&< z3ASA;;tPj$ArcTNzpKa^CHP2{Cu8X6;CYV;(LsT7gWw1N z45(M}$s1-6A$)$pUbr%GqDCtywah2dW|F&uRNB5ytqWbCu;y%=Tcs!&)h%(?zWDxR z3_T)iztpPed+uGRt!=;{$P|19N+z6gZ{NO+AF>{Xy_K4GLd+Ws`9*Dpp_;bKt5wQ9wU#~vy3Kp%@H&5ptp)Y`=IVn=Ih00BUG zp*wB7#qM1g^blalixev0?~bG6F9LY7r{2y!mK|61h?q~5U;lw2ES9x54Fo6GZF4S z)e(-}qN0z2R+%Okp^IHCU&o9a-uYHbr#Y&oIp)tW=1sC=3JMtB!tJBg(nxde8!MO# z*$OteKtmrq)FiG-Gno_}tmi%%X2SU!Q0;e#PP@`9U{DvbnAFcMqb{=4+!)J9@zX28 zy0Jgbhd^q@4xD-+Nqb*_!IQG2km{2G^kYG*!7HBcu%%W<0dA+0J&@U|_bGt!x!|1m zpXQ4O!sEOv;!71xK-Y*1WCqWX&B3%wFYpC7tX7SWO##e(G{AQ}w{-*nRUpmt1cWsb zfPkKg2;ec4ph{#hq!uM>nE~W6SiMocKrEJe2Pm5ZX_zAJjUUW>Ag$n;{oO;oZ}$yg zdJ~b)0swF#kmV!H9k6`@h^7_xOno!be?x|pA^w3Z>p-@j3ri2Hj{tRMbuhc@$ZxBc zUw6^2pCe<7mh}TT&<;?Pz{bZF$otG>PxS+4Bqx9vmLdi=thk7-UEDvqW$!`WFsjq| zLm}CAvsM*5x0r86gal{VjQlflEvb(OjcW)ND-rIiPy^+Lp@V0`Cm!ELq&}k#O7{Cq zI}gg}tq1|@wL|U5m*4HNCOuy zqQjnuITj_Gc2zi96q}f`AcEhgW6C#I^ zU&rV<{`>e)eCAhirZ+{#%&KP#OJK#y)4RAC$Zd!6F%*Q0qypKjM;)LX?2~m}WxVYl zk~g5_o1HmGn7i;thDuNEfI7s@0suIL)gRvndOybH`dFUR)>JyvC}NmebpDmeEJy^G zLq>ocNZ*tt9iQG6EF%+2Est7Ndi*X&Dnk900DB2l{fAiqFE7iWrOH3UkOV3~FH*Cq z^kuqRPhEEc!doG00SRt|i-~;tT=ky2BEgpcvJQnpt){rM0DgWSixU>#H)>T_gaxj= z#A<~+`$(mr9c9H&VskM`VY|FV}PU}a29doyp~76az)9e4L?D1MSh)u(ll(7iPFr@rBAy+L>! z+mM5m?Im03!+!V4kSWlDZgq;z)FS!B!JLHjIwHiq2li*T7Q? z(G*N8wKn|TxM@mTV&Cwu*%rPIS3lJ&2$N-TZVvfxw( zlw>s^vtqJdGzBU=sb^@8ZUK19(ul*%Oan1F)0GTwjAiX9X@%1jSoro-eXVsRpbq|! z-PcM>>g^5VGt||!DwqYPiKhO>o60+aM2kKEBq6q(pN&^v<*5TiHpn-D{#V$}8cUQQ zsP@q}-AH?MxQu`sN+ws1KlM&On;FGuotbqjaFV*u%qV!^>7|D-1D-Y1gkY4eCkb$D8J zX58IGeLLyI^R+*si?y_>$AgiuZc3(0*mg=GAee?`XvfpWdUwyRJE@^H0j_P!vHF$c zQyKsV45Oe15G%ax0OFlEF~`JOAh&Y%p*qDj``}-?PZcJ9;9$_H?Vdah1q>M-h=!dP z-aQHF>n_ay`iMV%^9wwkqwhO)c<(;ArSQN3`0=@c@&>EaO1tRsI#vV6f+DWF49n98 zrGQqrRv&&pIbJ_WEnl7DQjM;;jEkwM5yw*aXR%lkAnc@xWh0NN)u>;(aE98}*P!JA zmas4>_bR9??MC5_w>OK|#*Nx{cUQ00=j7znv#J@L2*=fdvtx7sOK@Bivyb>Tv35C8 z?*b5{wTtMg`Xb^l7g9srMVM>oxhgCMCRgtuc@2;r#tK5Wa`EWy_I?C}up9t4xl!4t zp4_9S@AwmRf(+?lAXtPNeS44=QsQ}GEPUW=Rfm}d+ObkU{Y*yMFsJ^~!8$H66B|%S z%d@_^u<#za8w^M)!3RgPfC(G}1mZ?qKGg38KSB_)w1wU}?51p+%)UzSn3m-D9i{n* z5SaBsgl@gaFG?3iYl(o+!Sp-v>iNr7=4x*MgPca@h$0ODt3(^eO}97pd;Zf(tor(U;MrL>S>C%KI7 z;N~)I2`X#y7#-E=&pzPrTN-BQ0~(y1&LCN1QI8WRjfk7)kKbf9bIL zG6wq#6fJv5pjoWT?d**9Ba(3Ci;E=O_p|(mw(`$asL)(h*;-?%m{DhZ=iWVqt1H?@9FJ@*@V0BUrFwmA z(Xy;end%RKW{sxACz~JWj?8$1_I7SqWMMo0qmGUa8fZV+Papi>vpbni5wO0GUn+)` z@4B}PB-i(**d`lG;%(2(%?Sb!5mwOP$Ydte;29DSGf9PaMwig=UMBt9lF z+jw$h4?LnMX75v-Ps1(vQ!9sAi-2!U`F$q=oUmB)&jL4KgC6We6ufiM-v#aA+si}B z;G|SOp!hl_QNE+c%$J$nRt}`hbO10GD|1%ky1piQcGm*12{3I>R+)8o4R@Uji>k^_ zNS3FbIoCQIC2y4Y z4>`IvOInRQfQB1EbOE6?V=W0!3GBf%K8(`IpzJGp1UX3V?&uIoA*UAykv$>QNT~@B z*3_a}6=|uYTlL~shl?EIZ5u$y7YE4dZlb^9ZF}%89T)(8oFf)-w+*9%`cKwA{;2Ib zYf(M?Q=nK%8S*~1bZnok)v4X|GEmWZA2^?@Q(w_imO#+wyChM)9a?HC4#->~y$b@8 zb{HcWA$*jP2-9uo4ge{VjJCpb%4Y#XCK}+1C%x6Tt<+_#x7~b>_ss#TH|&VscuFoH z5rJq2?hZr#a7n(qZa{-Qu}_tSudE^dSi_DWY#m>P4XpdR`hRTh#%P zh1-W1!?Gu#Uf@Qg`95pHTYveWW?$NPM*$fuh{aZi1z8WnUV?-5i#ML{nEd$K=3K{W z&y~p@;Ms)i+_snRL)}F>4(iF^gX)0f)qt|Nibi{(_Cs@HR%Z>OobsqSZ8*}hla5~+QgLhP91r&O=9U#}tWlA|? zcCS!pB6?(w!b2)SmLfzNU6`9I4-}GuLu6*q^;d?G^p7XChUGGg2vXODYA8SqQA0|$ zv3vP>b^mgNjvR-Wn9h!?6t$oW`OqgXicU63IFrNGd2u0N(qwj((&{!W?^7f@jNWAc z6~WSVo^7v}uOfBQ5Ea$w1Ftx6gQv~`1FaiYT@?^qS&4QDkwPvrSB)%RuBv>|(5eY@ zl3{ipU`ZK4C|Iusj7AbI26Py4tH`vG4iG204O!RnGiy;zc&QB3w`DL7c>JBok*y<) zS=y4CYLoZ;Zy)TEMy4?SHikGrgj>_X;Ka)Km5xR43QcMtEEd;_mK+L zR#`rWFc!)5dJ5YkmabV#uk`EnVDWH07q-&!cSU9Jn_tQ7&K8}%C!sGO4gvYV7XZI><56DW4SUgAGOrR)! zYJjh)UDii>d#tT!+^Ncvuq;5!SyjQ29JZnLSxlrYzooFarhTXi5m;U9)Cg7MCFXyjnkRmiK0D|idrj^(*~SFKt}(}l^IM0+dsrD)4n0SdXdvJQFc`uLy- zY&r?Ly5v&afU)W^8~V5sJTzWyo%A%_gBL>s<##a1Fw;XG_=7k}-9nk>D=D{oo*qE= zsVdKM;0#(6mOhdKZg!-DoNi)E8JCeAl4(&m}GKk1rMl>l!#|BJiZ1w+y!Bm1}b>=bwFSY58YKS z9l&UBnbPOgKw$p}z6$h>B};Nn3H!Yp!|Ki=cI@+EZOVAm~@lI4Bhn`7=uCGH+6M_ z8`8z&hSg&Su5X4%GCmxrT00#dLdQ~D3u;>BnK@ohRqS`INeH*jd!T+P*b z0|G^wu;^fH1mSgkw{_{wYmW(WRzZNRaq6MD`lUUe(j8g*=L)wFXzsB9c)7WP`T8f# z_X-sY8uu&Ar+{KspNa>0QRdqf5OYi4QhBrFFiHA3ga+rGr<2H5XD?~vNWI%5TxR$A zBy*KYYf+$PQQEu2$UCvCL3nm+-?I)+u z)q1bq1=g#jSpW7FP$tv%GY4825y3Q%&#g|1>D{B?UNq<50d*}xY1mG~Ue||bIQ1*} zElpE74>`i*8o$*MIT=<4+pL}%EY!voEX8YQFMlmX3`B+UvC5G-%=aBW5YmlEF{fC< zvm6FlNgks{we3(!-3l%91(dkk?JaLwz4g>cmz0{~ za`Rn4M;ob+lNcX#kRrEz6t)wITKOuIn#Yo8s6O7!vN&Xrfyo5UO`kz5dWkky6b|8L z;+&uGM9ZE4-hMDteSqbC5hK3JmS5J6d~pVQ?ord3C@T2wxm6t!?-W!BCcwm&<7J@cyS}W4`yk%d!cT;}PUsC=IQx zokL$w2skKIfTBH>7^n}BqOWPbZ2djevD<-{u9(=Po+y}%(QJ2qO}Fn_7Tgos)0<~6 z+97kYul|<;VprrNcC}j0c{94rBzM4f?EsOA1tC`x=MD3!;bcUdH17@GLuqxi<1HC= z|FDK#lq)M#L-zeyONwn?TcNQYx&`k?ketuJ}Xnm+WY2nx`Ad4NI}d z>W!3`r>ygLulr$=o&m*62`$$MiAI_868f@@6m>wJwb-;P5Q&=TL}WR~`q==HJD2z> zYr?^_@>K^Eozy|SilIU`bT|nUcot>$G1^(>dABDZZ0Kjq&F>AV_0)Q=CBM#~28HKg zN0Sc?Gjvneo##)THgjL-O~qeWc`o>ecTd0LU8YiW@tgK3 z?z7y=UZ)T*mRlVy$*Jgott7cSy`Q5$rBr|H|iKj-M%*0p*?RfF#nO1mG*p|x3TSI z>JJi%d1`5br!@GF9`h!Cc6FL6Av%vHZzgQI8s^3yIIWMno_53EC3s-Bg#0F>8G7l3 zn?mzaXkFE});+c^F^N3fJinmVxMTSWNQZV6mq(RWS)}4sklnRr%clSLhfN3H`HO60 z7tS_Lr;tFpjFDny7(wdhg{!irCGgWgeM zq!Xt@(&?M(DShogzDNs72EBm1hgU);5+i%pp2{1z@59biJ+Y9_nUbIFfp*Sf%d**R z&YO!nepQ6*LAtj&uPmZ~t9t9&h`}RmpV6GqE6)fs7gk0vaVfV)C{oVKQb^bwPm_@W znV-HM!W|ryEd?M8d6UtUc?`$HOAmiUlT^Ch*b%S!bHwGv?DL|hPTYvF+PxH?KeM7K zBMNbv24^18FQ(k5$E_D z!I$p<@zUBJN9@8n5HT2RQm}J5?qoWH!#Wla(J)a0eMApE`um=vg&8&J1b)x4J&Nn}*^RDfx~#;aNwY zOK?WB>#00NaEo=HT@w@x?b1a6eSRA?&@1QzP+@ckFn-bU_ z?rrkpK)1InR%EWV4DhAfZ6@M&7L~6ZXTqBuZ%j7To%a``YE_`+lR?V8yFxvG^lHFp ztC(q=lFokPwhR$uyK(1`iqX^+$l>>vC{m!AQYxc4Nj5_wwB1D98Z7V^g?ihv)7fPx zbLbb+ZYX>^P9N6qCx}wZq}~;e4dDroQ=pzzd}z8G3g&H=&2U#i3Ih2_u(c@=cr{ z!hHl^OzGa$X*2!=Ex2A-X#IfDcGhyJ<_*{4-Ghcib^Ri}`>?8BGn#~AebW4st)A1P zyhslG#of!jhbnko<1zdP2=B@-YFvc}t9%ElnqM^tn`_>9ArBEBw9aeB0j0R7BUQlLJpNSZWP-P*( zu~^y1!zd{{VU|q_>1(44Z<1%h9dOch)71DTjgL1=+{!&bQfnr4L7f0cZBxFpR9AsG@#N{f-bi!NGKYL@Mi1H|l6=SQH% znd^p!-{?5(@aQoUrzou_NO+bVPuGUVa%{I?*rM%?&JxrJdy@<re>V-&q!Twh5htN7E5aXodEae=h7Pqy6^{*WE3Y&$GT0W_mV{p zlgfI{9#@HQ%$H8U%gARZC>?K&Ng5q)5pD)OCD<_Mwtcq$m56!zmC&Qvi9v4}Kf~e$ z#qosv5&||s)6xZ;5Mn4{BF*|^9)~Gioa<0y_nHCad(8e$AeWhO@2TB&Yj&=AMeG<-Hli!nZTs&R?jKU z0W-Ib1UioAGlw!8p9z?EBHqc6pz#}1+PPI3SXGZC`}y0_B9nTp-wX>mHDu)sdb->x zb?q;@?S4SUtiMt(SUJHxrHIlKt`s)5Rf>Sv-iUXd35;o~VD+r+d{m#sv81J~H`Ar7 zw_1O#sJV7wF2c|FUit|itlv1X=mC?m5?iphl zXK5ORZ=;-;H4|+a7?1|jK$nN5) zsrRRZ>R)uFi#YT&dW~JI*E_s+Z=g{CVTOQXLkWQf2zYbV6XJ`Uov zx_BD~gq9*h2YmnEAKCXrdr}OjvOU@t_OJIuUfAdCX}++}-DKn{E|0gx%WRxW(3P$0 z6vYXv(|%!n)rO+UNMErE$K&yL@=lfjQd)el!UE zKAeM3navC2jD^>>=DUQt&k88p?6(>iWa0^xoRYOBTB5X$3Z-7#pu2QNw~}seX&aB4 zMf}WJqK{*9W>5m$JpXT}@O!hbZO-HSGSlOnKc^6v2E)fOBMK#mR?`3EPNWztlR46K zm?=5pvb1;)Zv)3gSs}y}A^AuLNvY{S{2FI@31`}W^BKrZrBt+hOPuDbV7@wvvpAEJ zq>KaAb~oLhtt89j6yYN4pJ~8=!0q5v-$E&K>A3^hjkA&pD&?)Lhxk0^VPvm~*iz;I zd}ibMyEe+b>;c?x*3mqpr*l=|6#=bDK-zI4Fdq8CrjaCt+?VBhxmuZ}r`|jpH8sJu zEmI>8=bn)zRk$x%p(qWVWONviUJ~`%L1d3aiV()d^v^^y9l(XL%q04D}zpwI83qVC~Y@G?f+<(jp zSu10HalAj?l(??q$$uh?aqG^`YICSZWV1LhkNjH%ac!Lq8URaRyB84wg=7RE<5TUc;mVx zXF|`}n&>Sfdg+z48yE+K8a~-Q{GDcsvF<2l*&bO994&gu#!{PDTsr&>F5K;EyrHr0 zokz=z_{qe*+>5S=R|@sQ)?rZDZwkj4l}tm4!;TX-g~S%M#3Lz1S&wnmsYKfXfhEb4 zO~#krrtcR~A9D$aXyn#WkT|zS)(n}A!$rC~(9Hc37tH(ujZzc(U7l8;`|c=Q*KkIE zvk8;4fwNMcgI8-`ptv{rp!)LF^asZGtK}Mdg|>MiArJa7dfQuf>}N=Pw}i~Q24o}} z`Xf@p84qE+2CT{Rx_Fh2lD9uGdL6I0jTlDn4IlP85pIqrJNL-xfY%1t`JV4Klhr7LB)r=JD}QMy1T z{#;~oMcfDd0KfdL;evZ4elKDmc(yDaLS|%>Xd>0>&H4X%O4z^u$}qjVScB6GxhZ0i zRn{G-ZA;^q;p{=ca{s+KyHPr3w8uNw{?z%`BPC&TlY3@8iXKvOH$Dg2`> zd?xc{3=R&2N$J*&2P9V*86+auu4nDubN0p0Dw+UwXE6!Bm;xYQh7s=; zk|ofHBJdb+$UR7mWI0ulOEL?~zBFBZLDoiNn_x-$MDjgWKHV0ZUpXx9uxaZPj?FS3?lX}PG?*YNUd%o4C0PU zL=jJXrSSKrIwJk(%{LZv44|;ZA?QcQJ2C|49)Jp1#`YLDi&~kM?}G%92=CtEQ-_5N z!d5v?4xV2;r0hrtPxA$pmzbTTc zH7P+=AWTnuB0ZUA>VYiYSHZi}kq-D!8NXEYOJx7Ww5ORQ6LzrKt;;y|7lH&<%~0n} zm|G}hopSKs%MV-@jt7{))2v_Q{cu5I5J#W4B1pTbR2Whhgn znpQ_#Fz1X62H-jzT(D>+@t$>CTp8rm8gJWKrOFzQc$KhMJP{Zd2 z${9N7d5X?&Ae#(d4Tb8bfs8Olclqg~@InX7lXJc;3Q`LCcfSWWL>%CZLQe53C8m-y zmC?g<(gT*4PxIGhb_0lA+8S1z?{<^9Axp?hgT5eoTLqi#T!U8kodyEnOwWijqo$OQ z#*xq62?G}r76SwpnkT|!N3kKBF=%_-RV#pJ!+2n}m&|N}ZVw(_PTQB`n!H`F* z3^wv&AGhGFE{77qv$UGSj$=laQ^cxGMRc#|jCisms9wkll_Kd^_9p8GWaS2Y>-7RW zkF!o%g;hF}?%e5c{_cT>rDa1ZHZL?1aNI*768jG;-&&29Ri~>PtC^gx=?s>eXw$-krAUg3Gdd{Ds6rwmJM`WBjJaCfUw zH-C~`=8)~S7@~)G2Hie%+iIx&qvTl@ao44sV&-e9n4>Y_S1%DKO2NzmnHdb;N_z*h z^IgKr6p+jF1HmjcjpFE8c5%H_nPU4ggCedeSaH%_YR<9V5>A#04Shk;YY;PaBo7jn zKxcc8dF;Bzk>~Y?X=5DopP#ZGSS2Nkzl*6aFl0r_GrP*3hBo>)!^ULSA3o3;tF*}( zyZ@eV)e2df|297*g`*DsFu2DlyHi}1;d&Z;@g=|nLn-7uC9a?6cp$}Yb{Dkt>vwGBq$U+ zzlYBahxPYISS@YRV2bTCRzbL>vPZ3TpiW0{@TQ@8_@Zg6z@*LPL3+Zq`{CqjjFc8> z7r%#3Q`guc`P`(q#B(R!uR$yxYuq|m8*<$ z@6EL3Osf+$QrwVSry09;kgHD)^SeM!GKF6%pKzxnD4>#wE78O_YPBR_ zdU6>k&kZ3pHlzo`F?5_{TKE}iDOmY~iBi?>Z_&hgZ$p$5h>lJN$ z+3K0#a=rT6Xi@td-i7NdfLiyor;i_jl zKfi@B+nm#t85Hbt~Ey~?bS$%6TvCR^qQ zf^o`37$8d7vv>Pd$xaK#pLI0jTatsWu}Pp!;dWni*f*;8{eg3$8c8ExiC&t@zj8Xi zI2U@BM2?kQ)j+U1lZ;w=9$g}-Ts7rHzu0H@{)8-yZfD1hHv|5Ee4TYvlyAT82M`cp zKt(zg1wl%>K|ujYrMtU3hf+bLyBq0lsR2Qd?i?79t|5mSV$Q?6*Y7=R?|t60<{$Wj z1#7sUd%kg9*T*!w(M>+B0W<8o3bv{D!qQXm@l1&|eJ!mNX}T)Wc~+AJWul;->W=PD znk!GRy&i|lXH{-XR&XS$+C8|z4oTRvDpIl~Y74>Zkvo$Hp*S7p#q86addQIH=_9W4 zlV*u_q7}|`aR`Cvp{5T3R(Tv>l%!WSxYhK`f;~jUfriKokLmki?QfB&7sIui0=99R z4}jF$B&TP~4T^MLaW`1AOHa4bqQym|1r@v4{;6hv%?pkb#sPW+_!cn}q{>lPO3>6m zF%2)=-;eMkqaEhTONN1w%(dCXS2*5UBYDUD_5#1}KR(Y099WYF=KduZttWq$lmY|& zB>UbE4c`ym{gk>p#d~#KGG)_NTJdv=Ghgu<_DWX%Xq`k2j$heYOtFKnQE~Lz6!7y0yr6U2)g(sa()q36j|`T^VW^I$4;+pYbf&&#GVq@|k?Pt+$Nz8_KZ{QRW1>^!leLiA`Yq@Gty{Pb%eEh->0 z$H6?OGTTJvNTkM0GwCmvQ8wx8lEA-anT}xpK|ZMIHc>OYI*T_5 zK`W0m{BF^xn=;0Vq!LBGF>HF174N{Tud!nCgYDI;5s%oIqtAi*w>=8U+kta5=3;w- z!&u9Pcvyd`=H%I8}*cp}+`ObWB9O8cz$C>9^b zn{$@|ox(ZlA_!!9=Taz#^~V66)&w| z4dto8V*Duf16|MRbl6KA?=$=}B#^bK99XUPU^a6^aFg09!Ie$RBS=&15q1x_O%4J-nZ&asljFlsLbQ@HCh_LQ7huxf)$ zuJ0R9flacLMuw6!yTRa`d9?-u&HOG24jA{ZR)Es-z_vBz+n$CX^(@Tk?fqmQ0G}+_ zpw1JcT;9M1D(I3R>T%=zJRz-1jEkW-#ax)4n{wO9I$|8Peth`>2k5%Ds2tx0vH@H0 zrk0{AU` zd)JT0oZI%OA@zJ~2o$fq0CAWWILCa!x1a=2-=a!{0ED$03aI?I=q}a>>$Jouo|28g z*?|nfydhb#Q4&QE1;CU&d(Rk!?iZT7MZ z*m~@FP1`fSLFU)&=X-N1N%$wGVsEhjnqGA$_uhODSJ5|tPXY0Bb|h+kgbck6**}?Q zPUOkAtX4C&N5(1(g+AgJ(k}B*r3XFU{J&RTri=5f(}*~dxGDJ3#5kLSh5ch4s6e~k zsyKCQ+jEfqslv4MwESc0HvyLyUp6NJjas2z^Z8am#m}9`E}y~?Gz9apqO|+5fqsLF zc`woKFMm7;{dx<(B4#o{)C>^rj2qRT?Ai@UaO{%x)D<#nloB0FY+N5Y2o;Ix5f5I~ zjYxo79NOkJV?29nc&Br<7}V-1+?EcWQEbt9xx}G1!?hV*&gNRJ7*2nIu}fr4vh@;B z>xg>!lhe>ZztPMOdXDnkZS7>gGk>l3cgrUQwlmi^4(TXKQ&pLu^P4l+u^?o|Ne6Jz zQ(5vN8}YK2lT5B|EL+K{`3aMX->1uG&V-S$Ot_T}<7KExAX2LdE?1lC`cYp!N4pMW z;^%XgN*33FnmkZTX%hC>FVf~&a+inkVcq=R1gl;>#JVBtmOyuN*UF2zn2kKTRVCkg zZNCb^XR7}D{LR%pp>&~BAU8TWu1QnecEwaDefX=JI`h5QnFB|G&L;=U0+$b&HvZCl zs5t1zdXla^aF92yufLfZNLzSK<9pc2i1$o}v+$H+z|Qjsr4u(5Pb5AjC}4m1(fV*C ziePYP7ME6IVEZ}v&p~CY=IL%r#)tu4#=`4ux`Uz=gTa%z)dG!5+MV!;NEKn^LE2hk zDE3Cc0fbYnjo+)JB*!cCNxsw3S4p)Q_KY9(KlOD~%iur|wsmJ?m}+0}yb9-Xj??P< z9Sun()e!1<-&EFx*V4%%>q|R}=VApvE$#dtG@jAS7p>ybm3J9e@NL{eOX_#OtF9M( z$TG<9bRG@7Dybd5FV+@)BQRl={nRFa4^)zy+ue$@ZaJ3wQ*5#1X;R<%;(xre>9Z({ zfR9x>oa`hz7R{7x&>F=zTAb*$rj^uFJc$vrPUB;Mc~aWBMeyN;`g^y%>fV|p32qW) zMjcSwz2i9viuc|+9LWo(iDHQl-()cDLu2NAXTO~gJmkR2KE}>YGMoiA<}Zy0)w)Ye zBN6X}dp3fDf-p^`@vGq_fFo(grbWN6`3sSv(TFGwXkmKI7iZ>`=7-Vh5#mKhnwr3~ z$IsK*1H=?lA4^2JUju~CSuJq6@L_U=SlqK0VVs~gTMp`dU>*rHHHwpsj}6%{knTi1O7!PaR@|Pc9~^o!<`oKAIP!l1pb{J$FU%8 zsS&G4T~tXCn5XP`OImkl)GMygBc{g&{c&jei{pMaweAW+lB?Y1(gMTT_oDAoUb|zB zb{d~A0aqz~Q#n(B-3#T+RbhblK@z!c-pRd*1d&^evbM+((<_0tF6U$#Y^sRQZd>Mks>ktyPO4`lo}o9}Aq&|EDQG&)Ua@t6-)dfJ z(e=i`Dsq}VuV?~el;PK|J$d;XQ@{nQJ`>ftkY9}eA@Kr)W{TN#tc&#RK6iI0qSSwR1_y?D>Rb&Y#f$Z=?6hPGpH(OSkY~<-T^|76>Cl3W7O$ z@F8S(L7+}>2#;#wN{RS}Fuv&IkG056A%6A&yu4JqUdZXWHAcD7>CTkiZYbIroW1GU zC2zc{vBaTZyR$EwrWGV{W@I!`Te%qj^h&+GBB#CH*5oH#X0+U<5ScqTKEtwe{``<+ zdoInTCS0i0kRW?0N#cYXkHAg z&r)A@HUkKc+E@u>6%0sI%?q;TFQYO7i&@)+F?3Sm^6kHut-wHbKmNi0)+H5$GJ(9< z_a^KmoALD;hgt7H3wJI3{09H-bg$xKzxV20Br4@Cow06QGVqEaB7j<{{A@<-ra(WpbY;g2nd=C~sjh2`^2eNTm< z;-!90zHU~C2}hLvx-`-i7aZ!*v4d_3I7E<+D6 z-s}$#TV?0ifDZUQM~pgK-k*z%a0svgt2TlE@ZymIU%$jaYTi=_fn;_*H6+4mnoe|; zICD?7-QQ&h$Q*bY_}tLW?99`>aY$7n#TEl;*o+Z?|18DT45j%HSA~On5m;cBCbH?U zT#o$(glftYg-oPB%rtemB?fU=u&Dc4yFjV3)0D!5S7Hfbw&=&RVMGr>`~{Ce4R@i` z2=L#O-@Gq%S)w2R0+ti2B3Rw{>I_xCH?Sm#KoqVKQLGQm>2W&}_{nA5tq`HN&KTLa z(bR5_gQDafUze~+-acQZ>+s{Whus;^hUy4?@Zen#J`hU4>O-_XIXR+-%0U$beY3WK zzfg#Y@k=Qg_$Adx*$QcV?S>$FIshs>uZUUpB0ztKgcjq0|Mrh0-=jZ%6oiHa*tMiz z0z**2x1dkBVa-prrO%i_)W9K?`^@#1&W^g!j5AxU$H-vd48M`Ss(=QEP77mnqXFYi zRapc~+eXhyuKMrrbhTh^hFO5BQEcD5&A6I$rPlBbb9CUA;1;2vDew`4z=ze)?>&w2 z8#u)%VT&sUvLP}D4M>_$XVqj1UvF`r% zNc)`t#uku%Jx;J`*?6a0Z6KPre9TKEZ*d#_!Nb{>FpUmZbfzzkDNQAz$_g~N3yRH# zc55;yrN<(OZ!oivPo7lTk*WEez3Oey#n^hpe=fV#8-p{b3iPv2)vkvZt+Lx~I&e1s zX8Mb(gKqlO-g&D?w78e^Ml04ht$Mv~iX74v>q1|9BzhmM^Sj1lozE*|cnn)nNM`}t zF8OlMsATgam2)=@BK~b#;7*_6O>Sk*1EdW!eG--rr9`Jx00vEq3$+gB|n?5&US}{n0hWhHb$sX6-RwW zZq4b1z$!dE>SOut4JHc?aE>#;g%6q(>*3xtFRuMxU%lcrGDf5#fJQIpY|)2^2F;I8 zHj%nx;T^X;I^mZ?Sm+Er-49VE3Y$uL63LJMbOGaTm}rZx-+ja{{c>U|;F>W# zl=KiHxD`Jyzw#%+pia1Tvue3T^4$J%F}v4FaN<9fZ`n_+v)M6_7Xr#mZ^KBk&5rKd zk_n4O;+wax0hETB%XgW0--uHP0!1PWFg0;e>mtOv%_A8B>x`@zs=oGsU8-0^S9h1H^IPDHum=}dNPbANL?>nE-w&drK*8Eq+NTXijQ(rva zoiRoX;cwE?hpE@1fX7C5fBabqVU;*4z;K1(A&<_kbFUZ99k?bmNy3~F?#iwiHR}KF z-MdfG(Cc*$#(nM)k%=baSM}9NW`VdO((|5%3)`zPcp>=noW={5V`H0)4qJIKN75ap z+fTgMc1a+P$M*^LPoj7}k|OahMA6qLVjh@KO8&K{G@)q)whZoi&kOY9=+B z7J~B&W+DU#bd)-*h)K!r-Q%@CJV{%+?pb|}>ogIHDdvH4wEblwvh0}?CjV6v-U_HB z=XDzG_4>1G4jxdV#|jJ$m|%P)Z(- zL=)NB|JsiK91MX(I4=0-Wo_QZEACabF%F?gIrP)lmkz|k*DFX%3J zxfvtB_b-bKr=iP_P5@C3YWy#f&et>u7%TvSrw7EnfMf8{_6#AF>xeg7p%8WkarFEv z7M8Q%+u}dM+tUlT=IkYU(94;H{<2knA8&+EcN2d<=IYO__S1r4yMpZmR=J~|d0uQQ zI@h-1XMa6Z)=3Sh-}u#5e+K4{TQsO%mt~8`$X2fEEBCR-4jW}&D2tF+i%oq^<=T2{ z37^tTpeJOk$5W;C?{>p=J|5v@rG;Anqe(G-;UU+9Bz6v-?eZn%)Hne|omTGxG`kYe z)tqM&jo+}G%3hN0pEuD(X&>}^=s!faODi4T4J9E$CSU(cRKww8o1PSA(C%tlnY7#{ zx+mR@6FApV`+FWY26L+#b|&=&&%9ke+g%jbP+aP*@j_Ka$FB4qVVvUDag`tAyWtT; zJ?A#Z4yDtvHqP|M_#>eggI{une#yQ$B($hJc?#ab-Nk%!&@CIY*0=IQV~NXTZ`a|) zW=34wO+`pJ^xYDhk&PYP@kF%8ZXU@2v2W5kzHxTg*Qno3m8Jcp>3cC&$aq`oa1qz& zt$?6>i3>r^X+&i8(V?JPL#fQGs^Zoq1D_VWt6SV#-oVh2R^tUm zk&_y{UNS1acuy7a8=@u3lfFWIp1;xlz=l7?5@Z8kAo^jlaRPH8OE0kt z5%Pd+{r~+63@*a(i|T1+ZU4HG_Od-3w2#FXk>G`O!v`vYP{8N+kZ2b4hGynvCJSS0-KBk7+2nQuSUEjA!4 zb-NiDl%+nUI3Ike5Fp_OY~E8Yg4jR3Rxlrsh^ouaw0xv53Rn(zfGmocvc5ksfVx>; zD}}%Epm*|Ye-UQ-XfPF!ck`W(GVB+xyy66Le-;*@x35Z&!@ws>PJU{*__V5BtcR3U z!EoLWh&tf$A)57g9*4Zs1$68If^lp#}p(*GJRGw-ckJRJ@--2-z-f)7?Qfo78;Kw7$J5!al9`ib;JRL zpWDN=K&7yNcjQ>23#0tGkX$0eH!vJxvM4%R2k!mxCpdX;RRqPX^RRIe zqXh^X%KOguY}+r9PpP_opKi<}H*(%Ph$&TOi))#3{a#${ld6osMVj?@S5mM-qOu^30Ab(sTYQZ0R>Fo=TM*j9g|KM? zWT)lL%Z2Jao$~M7OYjUn< z(N7W}0hJ*UFeW%;!YtbXUPsZ!ai#Rbwa{P5gF@P1a>H7T2`t3H_#tjy-_vWh!x#5g=z41hmyO zRCjIlAyY3(43-eisp~)m|C;p(c-M6h?XY3lBZmde0{d34@CMw~tB4u>OrW$_0hF`K z2TGYG8Rsw$CAZF>g%p+k8HcT@+!lfDK%`6FWONpt*+MSh>A8tDf{UN7b?y`~5QQG^ zFQntd^3c!N#!-KLCkCYoGv#V8+dYCSa;y@xfc}$846Ka>^Ue^gIKF6WfV8N4L!y=T z&ElXm{jN1PnV&2`-T{M-&n?-1%nw<)gcs34i~IjE4dHRn#qL z4D-+$A0aco%mvFlII4bP7o_=Vj-~Hj5nTrl)t6Uq+ClSj(Jr5!|9YmFO6PxF4VQnO z%birT1h|cOKugV-qmt;>pH@+#XNQ47fX(L5#ok=)=|%`WNRZ4H*yW%YD3@xP1IKNd z6z2Hvoie|n_om3Kr1&#@UPJViCUSt$-O`)8zR*tgx9!TQFwehn61r`~} zpyD|?`F;L5Ma(TDi_N5)zpKyZnO;|w{_P5>b&F-b67-=?%+?|NHbj~s>@nTAy+xiz z`cMg|_yrzd9zJ)7=8T-i?hiB>sm8zw8d3v|*JRtTNPbApjsj__VLJUV*dn9*^-@Q&-H5Bo%C#E_aweW=@VeelHBV34Ng_WO2O*3BZ5_&Uh zHwZ5yR`ZKwm|Eb`0fwKrhn7I7*@H9^&9#d`_aYPLRl5ZboXD9JKxwLgMIM(A+;(`` zCrZQ;O3B`jZd_{GLh7X0blT>&?vX|5 z&3$y+k9JHVRM}K7#=Wf`)073>e9vXGo(iUPw#gT2$Teu`!4Zf8O|7~9ZUS3R30=s_ zK(I$m6bKI>5mb4ROnlLfnYFL)1{{0v2tFMTd0XqNvzB|F8OJpz0rxyb^*|6++7?%q z?9;%>G~egnpgxW4C2|&M#={~WE*HtH)cc)~QXZNBF>bq_(;F4sQ4{V3AXaA*Nc^*& zMk9vmM3O=(0RL_Q|0V1>-kD3Z%T;zgC(M5o(hfq(;M)-%@M;=7@Fwd)k@4Z1*yU6_ zCj+7Il%Vp4M_xh$8AS&zuCtY@2l}GUpT4Y%KPeGkx=M~LwQHH)8HQaSta8PS`TI$g z=SF-S(&ke+hkM1jK4@d-s8ps?$|!*QoiiXTd@#{b`3fM`EQpwjD0*cV)qrP*63Xg0 zz1lk!-=nq`0zrzb(m^_qt#`rv%n z*CuAk#$OfW{Sq8Tb+P`o>S2&UyA&7A_)%dhh8Sx(Ag>;(Dpbr86LoxZez6)g6TbHN zW$;A@GUB(*ea%7XaPpkWBV1C_fVM}_ds&^}j;E}&SdOtDl4FR&qYQsEybEHk){G)@ z`_y3%PPCDE7(jalVh-eu3xrgC27S8$Ha3>#TVEp3bYW{i)vJK1kHL9QnYqIh@Xa;g zfN`o}^dbQ|hPjPy_p+ZN0VvESd5=Lpq2u^yj--M3aQBZsDD%LApQy&z|>C zhgs8)`NhX-xB}va`(}=*;2?AHIkC_5j?a&Y0xVVGmw7_~ptkT|mPp;Wk$|kHv&wdz z#h_VTdVZ>9FN|v(A9I}=45V|{y-Y|v=o^^6V!;&?4friTg50F>I$qA!1vE1ceBm_w zf&&xNJh6)bp_25yWoNiyO0$p#t7l@IzN=j+PmocoJrNU#6pEj~#5`2YY@sRAIJ@zt z-)BF{5ZBxg&(_=@l*QmMs624C^{V<2KI@)Q&}JA|{5X$lha07ie9`KKMQ>Rp1PcMSd;qGO)eHig=e>^@S3EPv zK~h zZ(K(d5RBsRtAl7oYOzaNFTii+E+yp@7_TGT>$ykb4>tbnwa}Ma3y` zPzg3gW1sc-(*wl}K;C$M(Btp;YD)-ii?9TzFH?e`JR(6)f46bG-sg|VCpkcngywVs zES8&#vnt~scIZDOG-?Ozp>BoZVtOU!9EzMs=X)I2ITH2OF|BWmJ8On^nwjKg(@>{J zc9TxHllINLgwxm^r{}FTAKA3FIb?ukR-nWkA9>v8Y-^Un_img#t zx9&R+^*B{W>NiQE=i^*+f0=D2;sj)t7+2Evx%BUi;}g~!+dz9OM15RCb)!uDFS&ZJ z_bV^I3vt0~&koPAV=@`28(a4HA63i=&k-O&1tECT)0YtDL857EiF<+dXT>UoJ6VVs zbNgPQbpTXt(;65G;L|i$?+fQtv}0}khC&)%3I=eJ6pNyYem*a5dx^WTt% zc}iC`VG%C9*L$4xyX?ZKJwWsFHB8MY2YpY$-3H#8B6QIKab4m(3G=dE;SS~fe5b7? z4kj>mkll!n63dx@JvP~F^QV1x!wnWl%!X!NO<~qMjyA#NXo=eW-}bgbAW@rNDNeir?LkUJ2~I|$IqgB4W4$C5 zx;kl#Ldgb`j2ji{IREKInTg|92SNp(zub|ItZu6DJTk_v(giTi+gmFcl4Q>s%_5Kf z(%RMv=#zHXa4KNsr$#P+76*lW{rZ(sW~Nl5(lv-eTun_a)s8xw1~dSQjWFJj)~}w+ z0eB(YS26`A#8x5maRaPd48~a2VhSl@m`%e7C_p8ga@Y6rd`%~lk_!sBXK1r2cT;EPEB4f{whm-W@q8SMx*6~|%qeF<$}%TDPe zS8VS$DS)b)m!86GXZAJ9#G%T64D55`QaIMbG{M9iKZRF2vEpRDF$gq`q5ee9%Yq6 zimBkyjVg@$r&O$MAnpbD)Dg{*J) zYABg))eQQLZd-C@t8%P;SzKnVm=m;Rvic4{%Ho7QMbEaLF1U=OM11_#1wZ@^Q7=(& zN*CLq*#Yf*0+LvfCx4`%)JhGEe4g32dF-Yr+ph6&VvB}V{7vi*$2!?TtI^ErNAx9n z(@9ReidpH~P5sS5p$Z?0{I@1v7Y4NNfg{KL8!qFW)^>#^EV3$nO&>cfTuL%pt^eYf zCqd!cQU@Kjmat!y+4cu2XcTQ0L6Z$Ike@HL{Lm z3ggEaTO1UN+oDuzQhdSv^qdntvIY(G1V6^I&vBdc>Ab~yJGq~eq1+VIhpppa^NMd~ z|04(m$hp;2U*r~798EUIvXwaAke-fEBSm@IkZXbg!PX!?VYFGqiUl~XlyOUV38I9r`nQdag!E|Go4ECQ%` z2e>L3YHq6<&KFmHq~6fF801r(4jW>R?C^a&Cr&$U@;4(UKvLdc6m`ngR>#YKWI45C zycktGLmRI@xsY|UK2TQnyL;nRXloBFSJp4L5d*2~!KDDUp{F0iDE_h_K=&jHoE<_J zjd|~poZ)9qzTA0v#81%ZK-~1WSvBr!kmBrD(4IGdh2Gh9y{&3@2ds0i(jN>4IS|lO zeb;6(fjSN8g0^Bpu$O!bd&FOih7DKPuozC{n!s_$6#ewnfP#{v=XZ3(8~LDngpMr3&=_EWM|PXq|F6m{{DUY`cXJi zdK6EDiQ2>UvE~xS7Q@sYHTPryI2Xu+@_GXP*NXj(!E~DrQ@h*$PZ}QZ0|Pg^@yUvP z25?R7WZ{s9KM1z1Y8P^%bOc_1l5hYwg^6Mu=)Q=h3m{N$`=QjeEE3>!pH7M1aA(N@ z+8xrzscwxJyAk|vJN!uo@a(b(j@z&6YRoN-N@n#m{uxVm8%`|~?&#~kM{72pM?>iR zOW@u<-z$LRQcmTY9V9V#1^BUq_{RRn_?f}nroS6Ls%~bycKwL53Od>NwWpZN^LqZY zg?dp`&heB|;SEmDfl~^C)Hho^e)Qrx+UV&gQ`2r{yQz;pFC>mspe3J74B+Gx#^ZFt z%q0(49!Q}7q@~?wHF!i3g%F5LK5T^be-U>@3aQo4L5<#_Hbu(V7IWF#9ok&``%c;0 zMBQ$RyBFYPT5X2e(m5TRt)8!lML=l)cWBB(E^!BNTmr%)G_Zj#-AV11#ekje7Yo%D zW?*}f?pv6lzwf4|vPoT3g@UdXR2wTUv75?s{gk)_8AS72%DVxdBX1TyNc7t?957{m z-#r16^fR~ppPMCtShy_dx?vNCRqYr+f7eBuHbDmjKz}8cv#VC}8N?0{ORFKG;Ob77 z!j3jvV=YCGpa^6A4o)TWpmtIEM=AU-Se)jICR*WH;Kxa`ns8EuNdLG z@l$q39Gwwi82Af>>QgBwnOVB-xUibyyL=+O`jUO0+ztFALE!1ykViq$IUJqj^t+hp zZj+5@*ab~lv}8)v)b??1;R5^dhg1TEXpcnH=9NlZ05U^}q-aUsFLP)7X9L<5-ZZq2b(m0ftw5s=U%lo{i?UXk0-TKB+udU0fseH(&j1PFf zliVYb+NrTUXgSH;3{0~2cj;OwO8Vsgb$Jxw}9NyK>G(}05+S; zV_$d9Ycs=aV=!e=H|_Tw9&GlWwzl?jVPTlg#aA-@Xg&A2-&8Y!)6?pLz*=)12#Y-` zsMM@}3;4a*qr^}}cWEB(sCQ&~&+odJMgpvV!Vx8&;eSClzwcdC($PtynQp&@DyzA* zi6*?xlYV~8?B-?rYM?Z~y87b-{1X6sqXL|YCqO$eY6Tf%GpPPl(X{ivWqW+wSkML) z8XFTcR<5_`K7(~Um2|Qvq@nNLAUJJb&Q50FU4BQwDiAy$V9%sI)rDT{Q2ksT#zpj; zFFAOmh-@b#>%9 z1Z&F{Ho80}YLoY!Zh4(y^ukTjQ+N3JfcAQR8VdvaOk5}o-Kr2d`*gzZ059>1pmOiD zu9f5ZuXJ%maeYYR7|w1mwYiE)tuHrPSlA9(6vO4ZwH$qU>o%in;x^`l=qZ1#4uovC zn%%%C#9Y6HQ%<#AMJKj6K=ocm40YOY+Ws-rva64QpIte3BtSeI+#ojulhf03_VyJ7 zRNCW7A@r{XUJXGi+pd9GrKYpXJQVAEdziu2_iQcnQ32yfe#FvEnG#_G5YYKO?DIMM zV&E%b;NZ~u>^&V3ISJP@17ypIcd=i5bEa|83KvNLFg$UuZbI?t$qfSlx;h?}BKOd!ONbJV+({2w z-<1R4uj+ZHtH`ti+4s-#W9H8E(Na%L@ELGxfH>q-1rmv&ZhY86h#JH|H7fw@|n_{+!Ku#^4QmZ`y`|`RrkiGkocGZzzoC5aEHp_EpQ_kA?v@ znocGOkZkg%_@Q_lj)eF9WNy#t*)C_8j7$!au`lxuUs0Uo%2M>YTJD|%JKqgWF6x3W&{rsn4J(3Gu%m!}8r^B&^2 z_1DlYq|$-4XY{3+0}}elt)A}$0M^+79Hv6VkIn8!TONT7kpgdltCQ9N+OpqVDxb>J zcaQsh=&jMkc3~FjO;`0Ud+Xs_!Oe7+{ECXN3s1TSYith@ZpHlYNbYJm)jK$Ez9h`B;2o z>?2xgNblFBaFK8VC}FGD7^oRBhZtdlzP;%NW37uoC}GTXx*;NHQ_M0xEfb8>%1{o| zz(D1>vqwW3`)gaSE%j@4FP6>@h;LT~j0-X69MYDZIsE6JqrG!`#9K6iqvmte8m_z& zO;dpQ11+r$vJTY1lHu>KnTa0$_uE@AyvwCtd+2i-8boJtxSe)>7iy!(o;g0dITx3K zm{eg(S;p@&An|f+u&T5wY{HLrUBG|@9M-5*+h<}0$Rq3f|^GwuI`^a{BiXf4| zNqZoIjorybHge|OOo)N{6C?^A4XLDKQ-x=M+Jwge>WSc5v}CJeJ0Ep6_`cBK3368$6pOi9}TazkyiG&W|L0mWe{8C=2dI z?aiY?i+&(O+ioVsl%FdnJqr4DkOy7i(A-BMlEq@ zKG#6+io0t;^mQS>1@(WFYVsX+baAgCJjCk4?9WOg*1t*m!;I?pakdt5#BTOp{X|3> zVvA=07rsU6l~#KOf@t)h<($2y%U$Q^)D}bZF{Yav_|wO^TXgE>r5Bf}qtB1Bc--ZC z|C4dNRRBQe?El5uLPCKrZ}}~v?&p_wNYg={s}w3GtPdo4e8?{E)B`M8&OlK)UzNaq z9p!&5*|6xYGv|9UB&?T|^6!J1i`QgWD9lddAzn`Pmk0Nq#^3@5bgd;hWL z5a!n@$U0+UW&_*Zpl*o7VnS$BjFS2|2IInpMHgZItymRhRt5u!>iw4X_AzFqsir8= zHoELAA!empfgc^h3%zjb=oa%SuhrW?rePZE{x=7W65?ggRqVxow5^X&WfjM5irW&s z-hFLb;~uH&=^5i8z_h6QVu{1`%R&SV2sP5Sw9h|-8w)awStgW}E?i)~Yfvw!iGK z8P}$h^y-UwRP9)Zek$smAMB2{!!+{6FilxfRfQT_r7#2P;G z$ah${bMqitE3~uE9hQOP7Q1x4!S6l}6uoQ??Uw~K>>M0PMqR;PVLKPmPdgi}J&8@o zn}9vJt=xI@RzaFQi?`U=+asn7*VvoIaQ4zSKK`HY3nA`O;~{5r(sxn2b3n$U+ox{D ztanrnt<_`;GxRrrg^}864xSEtR3G%4M2S1_QT)g*<50dU2N{aB1&Y)E2mv2_SBx!n9->hRiuRnwO2 zX0ZOH5%U@rj+|SyGM3@htXmR1z z-=t~%IZ)UDl(Or7eqBK6^F76yS=HfJXKh2h3WZVspmqMuUwZ?v2!t?y z+!)DnKwr23H!|b(k#oR(`sf8r0KdmaaJMoCMl=qsoNc#%-B-FpbIWsR{4W01%v+bf zI1hD3{&UDXlrSlyk3HCDi*HoHqm6ZS{l_Hh4KJovR)YM9MoZ5Z(GV%~k=yyaCFAh| zE~}G>A5Bev>egE1c~sBfS9CrnBLa4*;nac)k>@-TOY`%LNd~75l_b}nCLW(E!W>@M zl#33x>J{s^b6xqJP8pUqHFquTx1@oOd~k`+B{*gF`qg!GO7X@TQ`DQN)()ZwVh@sV zbtI#FP9CYZqGms(a2ANBqr1MVdpRi9`f@?FfUU5fn3&udH(U4SCeACl!ONU?!%c1d zu<^R$uUuv)y25^4;X+=2^KcyW%dj+PRi5oe+BuM1MsTFAAvn^0R14C>Y~vYV3WG6& zS&BCyBzDffz$Df7LL2KnQMPflXGF>T>}x59zQN2_h{|VE-?2o?S;yl@x7-dW32%V6 zbcok#>Lcv*eSrQzYD4axcKysuMI=pa>XshIp$h$J;o_YB8>4p9-npQ9f8m_2F)ZZN zl-rq@uyeOE@nd_A!kpdnVNHl+UYYikegm(Ju&<`idL(>KZcoR`7CKp7880EXaW=NT zV-;`<1l}fs`DQylM9>IN-zrYgrB3s2WdJGO&Z`mp-l?~soADoVW&i@GeqawEX~4Pc zmk<2WEtUf(^Ea0x{)zrKy9V~^GC$(U^gZA3$9O#}ENyFs@ASwMWAmw$oTCW7jfkK$bQ z=NR$yOoMo)Bm;p(m_`{EcP2#EeAv57=UQo_oR_K$8p*$pLrFztwq3(T(YUc=AFJy> zp$9Yz9_}Kai(V6 zb%YmltrdO`7#Z zX_u3pgATXvjL64V4!-a$yK;U^;4X_F~xb?LAQC7BpV|wJ! zXd6?UX70UY$0M`f@8b}E0{jgLxivM~q-^>t#mM&ocUIh*8sCJ*p$eRyND4c6&qK<| zJ?o5$4I3>FJx-jfpxEfYBSO6$a)H={HG$d^M}U;c zZ^29T6Y4M`fo0I;RHdI8Twj^(E|1pksI7!W^O0n)o7W5i=mL(92RL9K?Y>BSKETGt zJ{}f6m<_%kOh~gsim3DNXD_L#S5&P=3ePvl*Bp_P#3im&(XstQJR`RpBmrz_xAb`JCyBcTImQhUM9&LBB=TQ1!`z zIPk;;?>e-u>pLvkHDo^Dy|rbdcc(WnwwYwN)wjFEcerxDLO_u4A3JWg}*aEJv zP+>$7b4x3j*z7TYbq0uOXn1w#Jl8Oq@G#+HXbW1!cm>Apacse|n18JI*YZ zK5yjz1Bbo!0>i&ag`a}`0q#3^NHn{but?oP!HpoIeU0!JueH|pyH`I6b ztJ_I5J@9x$Z|L%`oG)536!3||%&Xf03M68I3|D;hn>x$CgCGxYWAl5%%WtEnsuGO{F_%vS@?H-6R zE1jX;+6mZp$c<-~uV)I@FXiaU5>tcEs%q`afJdh}Lxeap>$c_DTt}T$W!G&*wU+7a z3hCt)7R~vAK?PG6TA)TNIDSpW-MzsoP;3^}ibGkyehGiM+4d-V=H%)O_VtqkTqb+* zg17gBkx?Mcw0~-9>N=H*dOg)$2hC+xS888e{IbSbZI2CA`Pw}^KDki>X}-B%ecMjR zpL*hSP7lsKlWv@B^~ysOPG`FLGXxX8Hp|`Ze*tl_4YU85!@&o0fKmG_96@R-4?z%C z;j!$ia0>3$cWB_o?=LRQ+=&Pw0zQyG-SJhZgl4?hB`QZyfc{*GM+&aS4`iMH+;84} zOG)Jk_%5zB30?Pf z?V4wSW@`AmM~Pjwww`Ho`qElKsq8MeDNNw0M=MI@9amUL2k2Fd$n|Wkp8A&w(jq0{ zZB1r8o8O}2x6c$NKFG}6UPVUpYf5P=79!9v<_$DeLshMf&F{`hoe zX@30%19i}D@X}-9v`RG@t+87+u{j&sOUFz+O-olZJ4dF=td>y!bi!5Y6{W6oCk0Ib zH%6f9LrXs1*BbHl8t#3ZOE1*99h!3f7uGv)>yDQV2cJ zaurJdj)6i2zo){P;0KeCtP^Plz<=5#L(IIsZaG=umoi9y3Eo(AhD=pfR3!YCOOn;M zj|4ina6BywWM74@*GbYV@HG!Vf1ker{vR8IGrMFDP1%BL+f!BwUI9rKYuH+xm%=A4 zGRoA0IO21=IsyM^d2R9D#Ggc)JZY<2H>8`8mN*2TZH#0im1)p8W%kH1CX!v?0Cnp| zzAuX5pE6Uv({u{}H(~somQB1%b>xe5izVM#0Is8d7hvYe`=cS7BOcj_Faf45_MFUD zRM0D}ENTzQ&&9K+D){i~eR>gzN)mWQ>+y%nT>~^_O(=lJyd!0;<%Y4x-qq{{iW%0M zEuc0XPatH(87MO6mD>Rc@zBKdvJwz=d>5_0m>+!$1_DIU=l*CiE(K}8$5m6b>#8{` zKZ-tR0xD2DIpi(r%3_T+-ZCI-NCH4Xz<6jgD+?F_%CSSQoyP+F^A#m8C~M9e3+SwV zwkH>y2T9ZV>%%=y+MH_p0vGTlR!%rxdhaSk=MCxDIuF&hF=eP5rXF1nYzQnxk$*F=L%y7|7+Lxgyo+Wv zrO}N|D-*a^;XKrMkH25H)5!CHW=(bT#bxWF2 z>+*jH`|?02yY~MmMY5D4vZlzslYL5|vOFo<*b=hu#@ZNCD1{dLnzb-?Gj>H~-Zc_+~+>$I@h_j&*v(({7}eMls7!zN}q~{_0XOX1g;q1 z1Go;*1A+kGD``HrUa31!+9qsJ2vOSGarfIBw0x=AmcXG38`S>(P&ZevIZ@W#a7>$RQ?)y?BuFy+XE=`1j*zD zCTQG~bL(kbI{*(elCtEGEfU>7FpMy+?$gUiDBLJ7=kM0l$22|QV7%ezSd&-r$>xlp z*Pe2(QDa_|gug-Q2lnh*#d~XZ+>6@bJ}xw+Kj!0&Xb#V2K!)FCq(O3YrEV%Z5 z09R5h4m#zvmZIu&-}`{}N2a&-i2Y;wW#e64mi{c~>p0DVUooyPUiyy8pTtSsMfJR z-k5#{`i^6{FO$gCd7G|FCT`$XYYV?aX!(bS1`_uteV7D2iTURZ&+3usPnQ;0X&@xO zrz5-2k}}Pi5!S5_PB9lbg-@kXK0VXTgBU{_78koa%y$pZ<)f7F&{RzSh4GuqzP816 zdGkDuaWYf2Ew{1VlapCjza}os2+3^rm_Ky5XXTS=W8abAE8}wV6k~a}|GW%pVr!%5PZQwii!(?X0J?b6CfV1sfu1V?)!RPqA<;ax-GspwO+OQ<_$&;IIZ z`^)k6Cu3Un=p`BXzF-9_+8$m@(1?|+uRJlg4Y{DHdpQirFkVtvmxj8YE3>u#2ClcX!~B$GjH+URawseHT6 zEwS>h>Z#yAeN9YWf^?az8fDnv!)<*S06W3navsx;`maVGKS$N$(;Gn|ls+JjUK z|BzME+^ijU;nMc{B)8<>!v!>GO>y;g;81GDO6eax`amefzI>2Fbc0S_U|W&FWvXr6 z$jB)3Dk451v!deKJ%4d^c0?UQrAARc{eywu_%+SqRuS^|(YO*#ZzYLa;_VRvz4>URw>r^6$@ko0z0~6);Peeh})x6Lq8dQK#;sza+Buvg{V>$PA0X$=8VWZCd-| z)GI^VhG%EIi8 z-t&<0(b2O4EqX)tCdS4#(0SV6XRrs<9aXg^p$Olr@1hjJrmG&Jhlf#hlh#i&a`&S) zRkzm>qlQ&^=d?*5%*uUH1w1xyqNQlw%X&ocOjSUg!#q?Dd$$D!gCWP7LMK()q*|W9 zgKDZRUsm9vh#sBp2}^j!Z&Y3KK7WVvKk!aqGp5Y^%C`_t6UlZE$M%XupZ=ea76SH$8JCu;V4oz|1C{s#_NfA&vLN|KGsS-|}_w)0gYxM2_cvp@I z>UT^3vg-m*E{7&<3Jh!uXOD2z4S$Viy?`~PYk0V^^cx6`BJw7AD<8Bv%ZiFq? zh*+NG{(JP1F67&LjHjadxf-SJ58i2RZl;a-n-)k*1jH(FIyAU-G8>jJ12pr_;Qqv)QqK#i_+j+wca-6?hN!HF3AMH88l^p z&b1m`Kf{ucS(}dyx8LtJ-Gj_|6TQ8C81J&?NWraktjwx0q3P!s&%FxRRbC;5I6uFY zFWlKtU=}YQeAc7(PfIND^dZWw#(+LdST(SOR{+mu>jMgE%dbt(i$T}fX3#yd8&hRP zpgdOo4xlG5(9qE6K%xA=I$A(~iIZ_o7Bs2sTXIftM9sz!h0SV>r#q7DH#2cLIE@fU zCFN~h-DXgQzc{NED|!joK#NIoDKrreex0v(-W@XT>Scp?hYmf8BPpmHxzprq621?O z6sEReJQ`5*%bCNcPMsPowD_@j?cd z;T-9ou+CzY{mjVn%C%t3-2E&dF6t@F4nDPt7dei#&tEh#qbd!-1v%fM44x^|(E_=+ zvDrHeb^@Z4s4H{OHaL81WhGLjMaFsj^j&CL_TNrb;D?h2R?y@E8KEvh)pPEU_$RNb z-!?nfi~dt&Ou)g4zy2lyXXf&r!fkg87qVc5G4YB5MAgP5=7%%oic_y{S%R+cT&)>j znn1js3$QiS&_F@cma$z|Q8u>2bBXxsRjvJ=!JwG=RS>P*8+}>}&{Mp`>OL;XD>_cX zdZR_!VyNjcZ?aA63*lTL*jgh;B!~ZZbNg%p$n3~i%_GqRm*fuhT%G_F49}6M;g;li z&?}5fjScq73wvK$;pne)X&@Pc%X=%Y1G@)G{H=Rj7qf2!n6l+)PLx$v4!k&dDuKSn zc4*|$^CSi?U*2v%xIQD%>aUQpg)&sx^@Lgb*jmkoHKe1CE`|pWf62E;P`beweO@aU zTf=*zGE*u9-6Yb8Zemonk9tAQkSKF$>uJj6Rc0R1sJPdYTN(v<+pP|Woh9xyn_k;C zj%2#SxUx~{1T;_I?IR7VA3r|Qap3=Y?ymkVBN5Q-h$}>T@X!pkMr86&1K`As)Q&?q z0gV9cc+{(t*KV5EB2F~?u~+GXfDdvya@+2><1??9n$q8-_ka@!wX?UMA~pUy#w>n? zR_Xg4d1o+lXrVqo>a5uS6m(}1Jw9B50Ir}Vdd&02^~a!YUDFBh^7t+2o4}&-iaxPW zA=s&zgkJ&P9-l@-Q&XqbH!@ruAZy`TU&KOwjRz8-)GT~Dh#nR?d9b>Zo6b@}6p zKAkk*uea}tIr^9yrDB$VN};puH#+zkOQR2%XkGkfob#%c-z7)pv#KK%<`GzeG=xj=Kr zh%pcEFs#t@sX98P@2P^O)dsX6*37Kzf8UJta=pVUYQ(8T2+zdCl%RiD|EfYhV7F`L zj!-x|e1Zs8Eu|D`4Lg;Jaj6k@2W@42rnt|lD{J>fua>1XIQy@j8FFhC^0UH&e7t>V zU1cS7cQs|NNvx5TVO8w-&zbK?lr7%fo*$9e25Ws)8A0qno{AT--3a=5PJQ5|XCasR z)|pbjI=p%0g;cq*F59t3;ml*E&2qIhJe|;fDcE$CHBxb6bkrD_59|KC$Jw=LIkb~Y zG(K>{0>5sEGautaqty$nO3TXHUl}qbW6X;{zTtVamB?UXC~ojutOmPjuFXS=8JOhh zM2!EvEalsHXguA933K0KIX%3wMl_R`{P|K`rDBq)KE?*>=3JT}B8}`|N*KVs%(S<$ zd9k7!#T5cji58dFO6|@(sQ=qkILNu5V2Qsx&M4ub{uWeq9DCPYc;lbzqN5UUfOx>P zv1ViVMza65Ewij!hM*D3;Xs7tgctndDgCB6>twzpikTz37yNLPl}EeqU}0A3sYT8i z2h@6XD0v7dQh_;EF*4{42LK%b3(6}emO}hJRVmt(V^2MAAU8#k*~_-PhCeC=dSJAI$az@@69QD)LfWoiAB70ikA-7F&eou9+|p&gibj6 z!H&XS+BGrbTSm?-GH&Q+mY z#>SjLYv>i?=J?0}JmeVj2^fqil|$Y$Z_K)nqAK?0ty|nA#&^YgH0SK?ZHWE0^Wy+g zeYMZsq}=6o&0)}b6ZMouihs^rGeyY+MCevbsk=6Du&2H{H}83zO5Fn)ne(8)%C&%E zrtJQVRQyHPG$HQEkW9n`0l>&DXS!aa+Pv18smzq`SIbXzQ_PTBocVw=zebbPlO1KX z1u~kDl~wLN^1`&V%cHnlUNYa!5Us_|BzX+#L>AJ+p4zC$1Gpe*&T3uJ%F?4Q4WXYZ zlfYXV&;kz3s%Fsh_7)23YcFqXWF(LR--w%mLlAdo*WzcD`k^#spRbw0Dn)8ikf2m> zz|fkFS?unFQQT8Zo^oe;Ma7V*K0J7aC^FEgn>WR`<2&!`>%V=NjyU=%+S7q;35eWt zzyR8i&vGL`32Sd=W+no1q)Ak?eIT{(XsOHq()P6uMIRYG$Gt{fLs(&bH?e$I2oB!V zqQ6~%E*$xi-}8~@pbHI?)_{K{3W7I|PcX_zD~6U9Bn4lmUP(u*7PQ9ZmF(v3b*)3* znW>Hy){QH%9&4>~MYk@y$|xQuOh$=3cHc(HE<+!ve0~ zBgUin7BT$^dN{<t<&6Qp*@O|TwfTf9`Oqj&~Be-B{ZnZ(DQd7u^M ze3vA0rm%+)2eT91u0vX1j8F94(x!p)F_>>Yl?a$UK#Dx>7l!x|Jt0aWQB z3)-8=d%hwA9g1}87e`%X-51p=#~baY!&L~|vWlQ0Fz@+D#70?SLS!}CnIUzvfN>ci zJ*wD!sAPe7DMA}>(On55$3x;dtx2E|p_|b{?xzY5fH0A7vyJSf?1_||!xUE8c8}IQVe{lNrcttA7VoK)6H-X@2Ca)#O$HumNS&NTc>$_A&%sU5 zX?Cg|GqrYF5;8OJZL^?iNv^8)aN*b*=u%zWAPs^;*_H2d5L`^2wV<`R+(U1vbGeVy zgEE{ry0-xmsS<5Sby8j18>7-i zTOUbejNjzCdh6ArbXD^OQJ3qhyj#+p8XRvaTR2_p`IZP8QecJ4&Z=iv?es>Am@Nr$ z*uFE1mJC=l!zkyimWp5YwuTPUz6nAMEl&bmC!eJhLft^%*h3LK2VKF}RnR^eMp;aV z!I%?w2$-t%0iznY0^ktgYW%D!xv+>zPq9a2n_J3==$JKpE%I1OPK^pX3nQ@>qf?ge z)Q&HZnj=Mrc_{3WG2+(etBaMDC!>KgiW zo8LOvTe9|1h;OQy7}<-+?p(~sw`}r zhgPEayz<|Aj9p~f4+ZzpckLB)S?3eDYv<=n3{$#)%H)5XNAl!L@?I^6W~2m{F8i|h z*O2mZt!>ndf|SOtA5VEE*F}AaG4mfXGClZ)Ww4JkEBId7c@wk{&FNsY-6GxOoEr;$ zze{QG%QDzqL~Hx-=hAtP+QURp(zoDf*0|pI=n&C-W*Jl-1dX}cx{&OV!h`|(Eai7^ z8dC{9jJ{>_$OAPa=))Fxz<2NtaA?hymDd2~gAt2f#^)sCV?+mDo~axw=eM!&hH-Av znQRolNLMgdLGuCDNBq=#mAcFRBe|k=U4a18Fos1q3AQW#1@-#1g_Qs1n3sGt+ohOp zH02B^rh-od9`PmU%`bOQz#0y&eT!Bbs~yRb$AU5^nbed{@O)txQt4D)sXkF5nrw07BuQj0hteSaAMUum$9g`(xA^QnN@|z*U ztt~Cpi?e0V`es+0QkJmkt+TEbvmtDr29VZhPhXC8eP1f)b*%8o4Q*$NnRV|#32 zojx`b5_T zAxm%Gym{Okyr+W|kI*faLCkzs3LT;XW1UWDo5794zQxW~k~*>|auAP^ngyXoZMKv^ zMNPGj-D&#(Q<|hL0Kb@_70bXYh1o6aRX-B^@H+|!i2?cZs&h{&Awjk<|EmT~9z!cD zkgHZO^;pve^r4t9)WTjJrUw`)PK430Q`GpO##41VX5Ee#>qEB}-o z&31^_uBeB1fT__-Pu$!0Mu==KUoR~LVPt%##8(PZz%{2X#Am-O`yzc;%+$!tvk^II zhQJ9*a-Ko1`hgOMgK)NVHs{jWUAJ$b9?;XEjoH?vLY|}=zX1N&%X*t3H(?yRg=NA9 zw{CrTR9bdd)f-n|IZ3wd-J0$+?szK#_c=_27C8Sn66vUE92K3AedbB)^_0CGHKvG) z)tK`%xrEl2P8lfF0CoMml4QVw9L-)O5$n=G<5Y~nbY>*%0VBhwmq_NT=#02y!qoGG z$4L=6NRPF>h-iLse)6T`BMzo~OkrhRuZ&8$w8zO*Nz1Jk0AzdEiamB@;H`>d+b zMzk_FA5FeCCKa}+u!jBGp-u3uCGjiaPWzknuX;@x6a?vkfI{u|oPaajWWWsjSx6gN z)L=3KykYVCF&k3iy1K6m+}7)oK61o+2{hVIBVGow-}-K?i{1mqns?(FAWj%}tJ^6r z{biD}t%jLr5==-Spuatw&rDr=n3X*nQ5As;bKrbjDku=>yWYO-N4FW4@qIHo-da@3)ns5AM(E9KNW% zuS;v03Qu9$N~C%u^iw|miykEayWSgdXy(Z5WI~})3M}gpR2GCK7dABKE8W1k0Q?RH zQAUQtsMgrj{8)OYkSR@06VM_|BrD2MY*Jl}>^_L87qupNe}3#nT*r)c0OmBPdV4PS zF0_-8frTcn2J1WDJX+ztD9KRz3N~QKYw54gO^V_HR!Brq*enXhk?+oE=Wsd2$xVG6 z@3>ni5_FiRc=gD(TrjKUF>VkSFJJ}9e9vnPsZ71sJ({3P)fxe&8iPufrqxbGZ zj*MiT5q)hW3oPNqH>S8G&*A3w6TRD&u`Vb!OlQ7-u*G6p)cL*bz3+|6U)OrI(JujF zNK7osa$y++2n4boIg4Gg5qm#Fo1$!D17gZ|$#*~3(Pl}xiwV6Sag2?wyNYJOYJ$i+ z(9lLkJf#jFhG%h5Z?-Ip{{u_IqeRSaO6&|k972E;NBEEeu5+3{O7 z&E*y>A-^2n-Nw{Cc^kmhou6Q&Md+wM^!nrN{BNDb4R)`mZpzn{gl~Sxi9mI{kx!R}XuCWo-2>%uT?rAi_s+}qfzI>9ZpBY!}Xo{^&! zZ`^^J*rNxq4tp@#I$p5CNNkC55^Ct)8D~Yr)5NseGPo*pH zm<(^FC{3qG*;@`#ZgNDA-DAQiZxgC)xF$(Qf&8%an7`ERJrd(+cyLHAK@Nha+p$|V z5IFkgac*HTFp*j*n2aZAr7{Z4J!sbK)D0TyhhdB-JO!#coidIJIx*Aa-QW4v=t=N> zx?R!3fr%U7s=2P@O9%Shy&{gPevBpvdkz6DyJ_)xcs7gp22M2!| zI1>ZyPgj?aPJzkycIZx#?84x1FJ=zEgwn)t@qmSx0EK86EvlJ;k-O@>PTY zWyXQZ-3<<{zR7&@wQuo@0EoE)+|gm8KZx@bcdMR(T|B&pdn-@oyQ+GCS;4z-3=7Ef ztF4Q)E&_r=hx6&cVm4|!!z#D%lInM6qKNt-Vz1YAQzbyjvr+v93ScH*+In0fw6`O( zPFxP7`4lU?*G@<{i+nG6i2gJ^S+ADbr_^JsekqrxW4;=`NJ_G0xzYS8alv7PBhun} zVUl$0^-EK^{M_uiC|j4Tj0jVee zFC+eEBzEl?1OZa85e4w!IL7dG?T8IEGbyYtsd>6jYeIBJy0FuOEUa4=g|Tx5gVx3O z2~NAI3z}XBw{zUR6nG$b9zS?*eNmD#`5IPI<_^?MrLRO%)g^^GJ8Uu-yU(Z z#wTmtX)iCp_8B(88k$NMhf~#fGbU_7UhVV(BO~NA;*%9+*aTOSDLf5*vkB! zz;l|dIHv^Q*&=6mpW9%8RYGh$-CAg?i!Bj*Jwcm$kr#k`LVS`mXY$(0*GHu!$$Vw_ zGMpcbV~ZZOiE=xQEzff}`}8fUjbx3Wea<}iG2D?_syBYC8q{lCtCcK|^`dv?<66YA z8)Z*z^-WEeG~F?8OKPcx{1Wby!WA%X6EBm(l1{UiK2qgx1y6Jlu38f$U;8+x$lbZz zh+UDAlENx@)k!+%EQ9Lh)335l-NYJG&P$TIRlP|1BYS1X zlq2$6iiAN4wwKR{gfn|lLMU2ccDuu@^-5ln@ce*8RGVBLV?kWir%Ffo($%>gz_Ol7 zXPA*YztB>s!~qgmi?6l-2AyBxP%Wm4<(yZ7oOqC5_Qylr4* zWo1BZZC6c8A;Hg_X5S95WCGYLs#nOoimGTyxIoVtzviD zhEIU0$~em#3FO&E4-F7MdcyRM0E*hv^W6der%x-0jN4<~Odh!q_#p*Mstt+43Ks#_ z#5>S0^mI9Ji4U@fdK?ze%UcfDJ+jRWLG)_M_%voVy0(cm9+J!}Ft?;xtFl;XKR|?- zHPU}Qt*Rpg0SkURC;vfX(j$_#afZS+Tjoh!OZ}{3fRnD-OL(fsGqcdhv9QR0E>Av` zF;(9J`PZb9awXoqLzY+6ejx8%g!1m`Zu5%3Dc?{4>Fs$<3Fj+?y`TiW zMI3shJQ zDDhIgQbw23ts9_ZD&h4^KKFnX0EdMeIucFvQ)pIs7@SR3^$t|!?D-5BQe52gKdt)f z6blum-wop+U+YH3?q^WwDQ{!?uF?=^Ias5G%xt6ImLcxEqyBqs$T#BNbgIHtE0sF{ zsf0xI&%EhxvvgbS9b88H@qrpt#=?WN-RS7_o2DQjfL)mkblQ{Kb;&ubZGuZ-=Q9}6 zC-WWK1K`51Y*q5s7>p4Kg z0N0TvEuR(iwExDC^@!ug6>R{C^=_w?{YsBlDCIHD=s>i2d6ZE2jwSn9)B}gqQ3l^~ zzw`<7^YB31-+H|p0EpGl=Cq`p1F;Oe*E^xIm6M~T60qGEGG#R-tufwratjH2j_$bq zLpi@NgQUP&;xX2*2|@QuYn5)T4_y!WsgHjpR*Ae5wb|KM0(JouWcBj>&2a=G=E*K( z3$*lQb=}I!;t{!@b4d+xv7h3*hi^uztrzhXCyf{SX^qwAhzmMw^7ij9#&3cRwY<7u zr!a7U2#!%qf7az`5ol@CYTa7T4VCGN<6bl%JD>)%?OwJrv3W$b>6palf&S=5q6si` z(Z?MVoCshCD(@_-;0-|N%@=(gc*<7Ibt^z>79GtZ8emHjX;xAS_m`p{_u;Rdz7Mu` zS>uUW_qT+Vl!!g{sc;peT(3)x@b*yZIpC>Q_sT%at;+hBlRcrPas&r^`voTisOvXq zTJpU+@N%__@K*kw&>N#GJ767HWj*h?9ynj=-SV<-%6AOyq#V5jqVnJJ zltJS!+T0KJSjkJ+A~1AyRnWnPc1Y6r4I}X_aH|yIRJW7|kzzmzqEd3s8Of zyua?r5DSZ2ccyY$go6UNLqb2^&^x5}O7{?dt{s3Ut_L12adLMj*t^s`6aNvVf}Eh7 z*}UL*K`q5K3Rq&nzvKv_$0UAy2GQ(=bar#Kv~LUQBq2Zf#4D(dN;Jr7zxTml+rA z&C)FCEk%%`LaW*e->!(gH<8aDVU@Irl+iw-QD{(HQUW}rE~egX=cb*Xxk@plN-Lf6 z4!wBiL+M42B%AuKyAPgW?=#p%k;+QK69)XxD=UvDV20iSSl)b63ROo7Dc}JaO$uzXYh5Z+AW~M+3wV+E0cYGw^i6w) zl~W84N_)Y%eE0Gwc1L+?psLBaQI4OG3^qUr3^7aDi z*w0E~=gZsxP2DajhUQi3SouZAh+zYj&SNYD%+i8{^Q6x2c>pctxzofQ@k$|5A$I7@hl|8%8eafIl z<1b@3cXwjR6qvmzP*2gJB|noI^x^jM%5;wyKtg!R%Z$1N#kPoGrN;ucxAB>~ zei615rW#kt&~FwS?eFnlY9jgK(rc@7_5*^=ZEb5PWanjKUOCR{d#y!9M^^EZO$Chg z5Tvr`O@T%oh_}LT~G0jNSrAEu-jkb~s5r(hGO> zjh-!ot?QU!=#|1;|2B0g8Ja|zUe-SV!3WyiMLq}V^q;@^OQ&|xk!O)tkKF*Q5bae~ zmX?F8<@5GD3JOf@O6>FIBs+!f1IJS-q1EAuEZa$aU}QPI(GK!5a;nHw`1XNeqg?7; z{(I|1$qTu!S4izEbU>CFod4)V-?_J*$%j|W5JZ?Ee7%3MeD}+^KCq|F{I{%zY_pRV zT#Z3vb?Ml*rzl1rxx(=f1dAwgwsm>3!I55{7i1z$Hb$lTaspLpd@6+%-sSe1s7oq9 z76&S3(!#!i_(oYe)Ep8L_TBYfF#7d9cA_dXJ6Ib6u zU%l!GNk)XeGtwnS=|m&7juL(?8~*~X%iUpIy>gaijg0?9GbQQ3V33xU~#H9(;UT>^(?qgSb$32%WtEW*?lTG^AJ9 z{e+f9-7^WRZyY^}`pRB<*~mD~G=NH#xGXs5)r|Gd_ZAa+`7lyk3&z!rS$c5QKM?Er^Xy#A;%m=p_bzQqXLJ&x(-nCUx8O{#}uSIss7 zevMEalySY1P^CgE`=OTWy>h$qsBT=hl}VQu$hncIiN5Qj8=xJf>kfu-t~38qQ9ZhxJA@QaSA-0G+c2C>vF#zml_&#bw@aQ} zhzBz6;J^76PVBb65w^uv8O!v#6h%R(Dd9yu*gxqZ%9OFHUnL~OuWBf_?j)&>pG&g3 zyt@4|DawjKbUC>|{&UOxPSBiGZ$3~{(iL4)7Inu_)*EzxL%}?lCOlie6Sk@S9IwJb zha%}hMf3VYB7e=AIi-e_-3>rJL2`{7mfX8$ZvNU4293C3bbP?QD{=#nv)IwC1Ld{} z)kQJ1DYGc(opBT*GSz**s=;M|cG8D?9UIPB`Tp%o4P$u~R2w%hMF8RbOcY8Fp!)ZV z>o>v3DKiV5#q)yfW{buCVbnQL^c^ zvuIa~$aZUU3UR@DjNjd9oMoXux!53vxiSNQB-SbpP=>9~okM!03q#pg+w8Bx>hh=7mlo6og5SIhPA(Iyir%U! zd6q0VVuU+Bd)hbvOM#cVow12|M9nlTlk_G|&n+}F46OhM!M+Xm)J;m_v4{F6R`FG! zB290&=lUsVht#Y5DW&Q#j7CZYl-T1kF1l)bFD9&Cp* zwV`bITtBxDNn*!;u9^CAG9p3-r6sRnO7n{E7 z2shfZshN(qAJ)5HHZ`9fp(OYDGs|I9i)T^`3XUebAmAHb&dP9n=JA24B3|FT1b%X1 zNF9Kcw}L*|x28%@EGewf@NH+&E!L0gMYTxyF+^zPOVFq?UZmpZV7T{&V2} z92w^+0KMQH((PjmY6K*15!;}mKeA>MEB~3fEm_eBaKSOigZ3UG(O{_rKD%jLN#!Mf z)tEd}j>x2#OHE=tnK zMleh8aD}G?sSA;{SlF3pckn2I=Y6YymFel~Mi~P*?2DKfUQonX(grFX^GSj87H5!r z&eyY6`f&=8;v_O)61)zEUHxh9{Lk5%1<@7W$@A42ZuU)|NJT@klMfj# zXNmpL86?Hgl zteC)l-95XtwZ%3K!1`E5Fb33xksCZenT$k$_Z|&O1ZCoA6<t8+U0V}bpObNHH{#tv0Bz$$86Otkx^Mc{hfBg5~7UFNT(1){sP7oyLC;-3p3$*(D z+7Ex;^g}zJ126Rac5Lu3uk6?CW;_Pq0m+K4M4n#)^5;$afiVN{8z+BJ)BGMb=Q9$E z-?xvrzW+m@)K$PQ*m`%%TKze#?*NR)>%^aU_TP`$zv5&AvYXwYJI(s%vjQ&w2}yq^ z`Y(hb|Gf)!bikrrH`UYnGeJlnstG=G&=P#~KPKTI=U3;XL?E zSX%F2K=}WyBF~)yQjozE-tg-b{{6Wxs9}dYUnt&B8~mGO{QRFfBS~7_GB0ZVIjyO5 zV7@lV_!F-DVwpzUcS>d-~k8W6oY@~O8$qugEd&o26DaIchBt4 zTObceVC+Kb`pI9b`R7gh>kml*I}CrzRC+%K@@voRf9)I!Jm5pE66>Gw!+-qjT0XcF zyKZU!=Tjg$q@m=retiFrGNga}1kl=ar0JuN?Ee?Rw`2rs*QOs~@dwuMA2S%ZM0%h6 z)2)9nQUL()IUux;gn6=mE9d`y;a@{CA^pgUP>SEO`tvHVk0eEvm}rTAk)oeB?T;xi z9Bfwkcu|Ufj1c^(euktb+~3OH{8NbjTiis!Z}1)dmsw>Yp@P4Jb-w+V_x#V>BZI*& z^e(5lxBTaSf4p`811AK3?oaNne<%is9Iy|78Vlg59SUXfFPLsn(1PAR(`+ z^Ab<~c_)1W_-ruoU#c1YYeLBmkw7nSJ7cwuKW%M*wE~|RJLEn5$LRm~^BO;pf@>nI zA?su{&^>a1I$+h9bgjFjgNsorzme$tSw6vlDvn=iRqVx4+g1HVa zMVLyPsy`x&T?}&tkq%Kg@zS<7(?cusSJBP!sM(=Akb-TdCM2F_i&M45pp;?lQ;4#NCE-~~W~pdRGW;|cb_N2@!SOg3rfQmJd86^$7V9XEV+tjxm@Zp>s<);QdGK8=jMlnuC0iE#n054I0bl zyW>EZAfJzBtdxknkRwrmYW$0m>A3=?;4AeQ;4g^MmFns1WA!1EkmBW4b8{~_FE4+i zSX@HW{sXhPO;_rhR*8co?Za6_u@*N7=p}`w+qt({Z5$k=I|H-c96=Vy3JdWScLu(p zKHvRxE6ivr3pZ!Els3~@`|OidYw(w1%Oo+w;_a?u%%(56wki||FJ{Eu-;hQb3V~Zd zT5^WJHm*p=q06t<8h4#F6AayDHSlskX(Z@yGJLWOcHlKs`;Lvt(+~ebI{rU1sc+)p z3WU!h@_CQJema*6sZ=J&RXG7Y*=x@Jr(%fwOj4ny6F~L=cW=LIaNLWyD85G&TQ5># z><_LUXS#cq^d(3OWhtucu@V!&BxIIlN`3aBVKxS)a>a4n`PL8m_hg!-A~07JM!Oq| zEpcYkO`d0PP0OF3hPI0C`+uCULoGQYU!_)!y}uzei!H5gd|;6G89zJpUV-UF#TP#_ zaWlnmHZYGfCV?JX*v66quT~l((#pIr$QOBzNH!|z0b|7Ya-ytbDIMj70ozwRBy8PrTI>?7t7Q}Tp1fJ zO#Nooj?;)jT(OUvKJARlU5$NXA=Gf9f{tbCJ?4z1u4dzPOEtXC1wZGw#7@fE&5y>1 z4CqPcjC|m2A{em9;uCRR60yVw!N{D>T7v@souBqW{9jbp6L9c3*A;f}M!s@}CUO|- zTmV&zc4o0$`+KbPWEbwNqT;VrL4XfhtoCd~bVaiso`&LU~Y6;iid*k8XRfw%oOU>DzCVEA8$ zNhB?hgdly{VS%SdViyvWNJ0L1fT%;O-EPCKE>d8&5v+ro|5 z&mRIwroc6>$fKRLOAtWn63*EAFNYr=0A!nz6g*(Fnsc!Vp%sd%9{o!Bz~iDf&%Q0O zRsm75c=;+u-*d1N5P|@9{@jMOEAgTT*W5&3`$mhs`AZ+geQFl2$lVU#w-Y!64`Gvf z1!I)DipYoh8>3quip5cRWU$da0`sRCyKlGv1tpHae;LB8uUq}lec|NII}7vP(n;C? zHu1kWYd9eyI;i5yfKg8+Ut)5Lp7sX@JB{+nc94#i7U~#>^u-pI$6{L|Xx-M)tFJqi z>Q^7TI$k_VQXt(FXCKXA*b;;QC2V3>NXKUF!&(JaR-0PzOI}nH8#_j%u(-P(iRGxOz|ja1=S?AsPB- z0Dkzo>&;80x`#cs4;Dx@5eo($u{Qi5sU3~9{Tm@puerYovaD6=)Yz)wfvlw_cl{$i z`aN76Z8AIe8mYJuIr^)=Q>)~E0(e>-KIbQkx1{Fxbnce}vi4eUFv9!=7Y zI@xWmJYZ|SgHWGqEfF#01>ujDqJPtlr_(OW`X{7OSO*sj+D1zP!j zh+lSql6IL6;0R7YkEGR;^YQVmePS@57{JY2eezqpw8wQd&g~1;`4=hRLC5py8cBBH zxtAo>zUu*x%FD4`r}c{vTA3OgTdDbW<@AJ~*;!+Xa8fKtohHzdqzgr)TF&e{i02+a zih*h`I}70`V4eRLcb$(jGM{W)kHN(llYDN!iivwZ@Qw%yHU?eUpb?{c_+BqIo2Z>`Ik3hhXNT$KMu!o^HxC-=FPNrb8!) zBn4wy-sG@vDso=o>`sAm9T{mq4hr@US^P}y{)c38Mjh&rf|wcUi$q09VKdv>T3+4c z4)*!0GHMfmk=lIE=UeY((jIK zsU|EjV4kyEhh0 zEKU%2a>b(GJZ4e0IB$27n&p(eq!{DE_qaDV1>bZJ+Mj&Ja@8TV=TJt&Qn8@X;9hkQ zUetXlX~YZPw&1?z1_FQEZ3}RG+-ic?fT;btP{A{EY|Q4{j01-_50FuQRp-oERdO&3 z=Z$or4*>s*lnHz@Tw8tQ?kNbwnarGteE+}TPRM~@mmWOisK4|+uAbo4h1A{OI$Gj= z4e1qM<+(WLePn5Vv4kwJ1M+SU&4DtZJW^+%&i84 z$qkj!sOJjy7T6eG(P9VHQml{bH5izP^O1!SYqJ*7ja}p6|=2rHJoNW9?#oo+4`cHVpd@ zKA}G0Tp%bDQC&OSvE$LWG@Ws<#oju0^ZfE<3(&{&z@f8OIG>Of;h`{nB{_HjaKNV7 z5T1N|8c=KS;8ni?d>X~F!E_D zyulDZhORmvqEXDoJz-;8)7U--rGzoP7t=BvqaPQlbju8#A&-nwDCZn+!mqKUouIt( z3X*PN!6=JTRnc$!bj-s+Q?}vbW`DwCK-FAJ9lqe`Ib2$qXp-i*FAM7QfmCLa3AyY! ztE!R?4f)e6UwnPQT-?Fx)6VU$Ug|1+=;vy>5zPOi?XBaYOxylp83qASDFvldKw?Cs zYbcQp1Cf$8KqRD7L_ktOkVev?VdxM+kS?hqly0PEBnIB|8gzH>`?sI>eSXjG`_HbQ z*;(hh&hzNw_v!OQPtDR8xB& zC+hQjAnIL7M6%&0knAhMgl0?Qdmk9# zehj91zntk1YT5m9W}}9~_--gEBxz10iuPlH%OM?Ga$1o3a6CtzvNb74r0k+qi(LO3 zRY9_RhF`n#ULn#8$8jBbuv|*krfQu?*RSuv^4K^)F7Xa*2fK1WUyBX1+Heb&>O}t# zag^##$-iI1o7zNHtr3XhppL995D(Ak*8W}I!+91}! zIT*zX=_rdFJSOjskg{?0Aia*pBifCK=eR1IgUy9iUgAgickFUIe1%1+0cQ39@Y32R zVLHWLjSfhp37=dGUZMU=;QY=|N75Vbv+VAuyz@1-3z{z@&MV4#K&8Oa4d1jgn7lEo zahz7Frzh9EX5q*AtMjhoHwdafFAsCSRA7O@)2!~Ftg$Ik_WUU1ea%gJLiyV%FfIy7 zGq<4Mo1yqOLlab!SrYh9f{x%n+2Vp_2MOQ>9vMu^(cVAjy0f{ml55_KcCtL7FzK{- z_Qj1Wa}#`Ra6Jn$uoK|#4*m#?ll}I1u8wI=1Ie7))3|3W6r?cBI6#pqhu^mys0ow# zaPi@!_~?mQ=j9_W6;jpVPHRdD2<)d@I@H4FY0VCW{N%4koJzcV8B)z7Fjba&sB`yl zI+_jmUyWpdsT2uIFu^y!lwUqeiZ`nGC;#&ho8yka+|66~@3n1PJg0;yQFPTlI^7-O zT9cXCIC9Xhy3tq~;HLhiKS^WYfl$+aS^c%^MAcnt7Yf2Lta6qxtGTT;S*ZOgZ?c$L!!-bJ{w_kKqC`NkX*Q{DrAA=gzY%L zR?F1kx}B#Vy(@aHPcckK)@PF(Z|l~%D1;5Dg$)vAbyN>8u#Z%+yjT=&9vucmyRM|9MCoU*GG1rKv%EOvbr%VG~&mAvub zfjf7rJJrBL$@@h*!76^HO)4VuIDQG)TR24**H+s_0sJkIuv3({ne^>Ye-3lPe-MQ5 z%NU|#B!}(%G>+P2z(3$0_jq_tr__-|CAa*J-RGns$ob4xn3H3$V#QlZqP=_T&Ii$k zV$YGL^huVdND*KNa|;ga3{j6v7M?GAK0Y?5#DGBWbbGy33)rBWy(;Sld0(9(VW6I$ zmpyw-@K4aJiC0IIn53M;p}dWd9Lk%$WXk4Y^ioKDB|fKnjU~0yd*urOUWxxp6R4Dt z0mTv*h6W;@-Q~QD%zF~VN?`}v5zX_Ny+}9IU+Z$P=6U&DnrrZMy76r0NsZoR&jn5d zd_olxu7hdso4o)29cmYRpwZUJG0_h3HJ+zXU^xrio{T}_>Kz=LI}bf7#jbiP5Xk@u z5r&bV`UT6OWM@zzktuybt!4OYUmTag_7Zg zu%1NGc8s=U|A{?2BmRR4wclz}Gr%GJw;>e~b$&o0cy0J5k$ENy+)TM{Ux3^BNYG3O z=aua;YVfv<#Q*Z!qV7OZGeo~{L-)Z#qHCCZGJT+ZXmQ<~UI7>tPfl*v$ zxwldCW{WG7e*W+Sli%)Vt@_-&_@7qv4=Y8%{py$d;rltILxDdHHZ@3a{ijioGX49$ z4!$XF!e;?RR7d<0Aqk~H{<|jS2!S+g=P`omm^@z8xFP>|oMi4LTo8W%950W+b~2Ln zKNI1V-4vcCvKn&Br@rj@N&E+EoWC3p^Z5N2$@p{r^SJP?$nP;Yc4ef=k46Q#L(|8Z z8iI%?_?Hz8zDY7iR^!S|f`6>nPy`~xlB!4bE3~)nFUf2${yLBmyudpOd` z%M$nx8e)+>Heun!1YVwbD(;7`_Frms?3}MJ6M@%9TJl5rVu=QNbLS|0Q}yvLccOZP zz)QE2Z}Ktzwl2VQ60k}+vZfBP$@bmV_IF62kxBY+71oOsQ0 z_?%Qggxmjp5e9l`B6aFesard9I$MjYubl; zC=V%z)&F10ars4E%S&DjB_*XwY)(nZ*K0R!pgbfm9q}uJZfIE+|7w0c)D-A}Of_2U z)tzB~VY7qBQiJ}!DZpgNotL!A_i-69idAf59AuPgB`TingH?inC@qHgtr9%27P;(1jbB< z0NF-07|rwcHO&=&E*c1kDgDEvwSj;`+GjF09LHZT_%inT~opd@6%PiDr8%9_`g`aeLjbuU_T4X2wpQy6wN|R@5%C!S!em11K0qO5c3#>pRt# z<$B2*jQX_!?kuw@ImvCKOAo&xg}_uDdo8v14}a!U*AM{{8o&JQn6+qfb;NLDPBbc5 z++cse0_^(L2P-a~dvtFfEl&Z*ygvYd1H>!JHa`JH#iF;x1CUCRCPn==(X&;_XNB?h z@jtV}q(r=0B<*RXrhX4`_3o6$9*_0{^mc8~D#I(ijFjKEtNWhKEx*(=?*63I1i)-+ zlur!Z=QFMP-srduBxxXh0OLUdfM*G5=kYz=S{f3Hys?lZ)G5_F;?%1_qzBL`v+;oR zQN-MBEeH@hiUDGgwoY%1kPj7_qB$6H2v(3o5bJ>N1HO%{!5H<&kK9NW%O=A&p7KpM z65743CnT5pR`aOQWLynVJtfmOqt$9DnQ%sdso_Ow)g3XgQKk*>#iUg((}@ZTXI5R$ zI)!op8wqE?nwevpUSRp|B{1k`$`Zy%G6ngY+>GXdF*tyqh;9Hqb8SCPFVc3}8-Nbn z@9u{|;G!9TXxn;WQBw+lXdb(b529O)EvdU=9gJ@-y~^B!SQa2?Eb{s~O%a$Hl#GnA zRUUgRV%2tL0}Qdc>odF0vG%4bliSJKC_GsKY!B~wJr6p4_9v~-z!r>x%|<0wU8!=W zO4Ux3%qoKASb_9q|F7(jKTWE|@cV@;}1{0<9>-P1nC7IHL@wfk0sAFjAJEB|< zJvcaL;*zrN8ikm`I4oo?#aJ)awY26dNRarfn1F3iHhLNq~Xsjo5J zDY@EBHGkKDad69zL}$eke8-q7U9)NZEIa}+G+iM_oB*?l*4fVh0ttr3E5|c4qK^34 zKrzM!D->hcpL+;Y_N*JSm08ir6^#7vMpZ3?|8bzdKfcCm*L^vdM6+s)b|OwX;mjwR zw4j^NdLQiYgU&DvMOO&-1b|``=m@^X97HOd6ax*4h z$0`y;VoGPAg@=BkZ9j!t7>t>!s1X&Wa%1X0z=Y|=3j-%xdFLm-V=-tuQDr7Ki!2)k zHHu%(37)htc%1W|qX>aD+FyQEAFT6E$fpiuf%wL4jy|r=N++fjFr1TceO@~LgoyGh zgg&@SKJfKxIr&@Jt-h;wBhbOLh7bK95U2WUJC}k*b6KX}*EMO6>sI@_>(bVRVcKsD z4RivRJ8By|ef?|6qt;PWD?f~n#&?XO*L^~UiILd8i85blYIvQ$bmYZ^saE)-C%(qU zO%T@S6(D^rXY+%YQ!8KUC4$3x=><~SKO z5%Nv8GYo*%Sk)B1$#dmO^K)dMjRvU5h?{X3G>~Lan_r<-Nm|0o zqJ-)*w|vZ;C8)bcxjTkbEfXZri5g2+Uj@2&fE`rEl(lw%p4@7E>WhBfG>8KF86MUV z@N9D|m(M1vsdPp|USp$r)eCWzCY&?$w-K%vxb~C~A8Y?d2zvzXz1)g zUPprA@EB5$mZ+-f;Sznk4gZwyHHrd3;JyUwE8O6vxO)7 z%=Vz^w>m#70B9_U8pQ!}^f<^FFAKsq?-oS9B7{D$ictLLzFf1589b%8q2N*NjzA8DIJ+YJ@J96kKeVT1mQcgvbQX z*OQE_sikN>3t3tMFV-!T@>w^@b}&%?3bg>y+-?b^V7|EoHy)dd5St*`07za}jpNKR zBVG?b6cluiyjF?h`ppFZ?8pqj#cqWMNZZ&Buo5XgL?rtH9rV!S;}*FN52rP9dcOOu zis9g!Ke_yDvT?c%$5Xtx?L@tzW=!|HV9ne4<@{D+4yb>f{-l*CCb;aoB+U4Q!w$JN zPJKFIb9ek;%&WIW8aqNw@j1lE6CfoBA7xW7wI(S0Y`r%rCx4DGt{S}JGi7Q+`Ay9g zU<5}*OW^0txYmBFAGOc?MdaV9U!-bX&CoMnnU|D26#1l2Nar|HNG>YA`c57P-{j}# zzufSh4)mLn`+3tCVr#Y59%sK+ixtT7IoMx$00Jw@2u->j%<}zr@7_6yawY@5s>*y> zfZ=6No@y)_lUV#J`{w z*><`vgtoq21l?v+wf6b!Zmh1(m3k0bp84hlI6UKZ(k};0X=>0UgSfYRk@Di!8$oct zI;{M(P)6eh>YSt#V$>wm=FP8q-11vFQh{GVsre6574XbI4kB*o{czMx+OsoRdelfI zc9Lq(xm)APMkk;{vN@*_KhQ=e1605HFq=onYsSn-feuTLjL zH{+QuS_D0JOOhS*Y8FB8Wmd0aKdpXr4&g^#*j*}$;58hODzff$o=vH+YXam9>AY8B zoxTeK23ur|I@6rx{=5%6`dos|M2FW_=_kOv5xRFYvJ`eDlK8u&G2g7(C%{yD7g}O4 z`e+h_$*rf4wJ5kaQ-og>eOX@Qdcf(#^edlDnpF+zxRNer-8vxhs4r1t=C0X&upyM1 z9VJ)wCZs*G-1Jorxnyrf^yYLJ|(00G0T6sy98*4IM`gnY|DpbuTA1=P%o3elD z0y(s7q8-p5FI_h$`!OAM9-Q`#TQua-n~@&1Sb)>mXesJ;JIshA=8c_ z%7+hYl!J(&<4L_@>)eWh)YHf)YWx`|jlvDJB*9 z?FT%1vUfzs;8T+Q2SzI-9aUb|reiqV9P@Wz>t-2a9XbpYsy?B@9>QT`f*zmN9@4&1 zuG!@b(kR+BkNxV^bk#Uv(8R}kv@eaXS1Dy4`bmn$=1cwq|(m7un;d4E+@06lJmC`kXW#PD-|j< zT1geTIq856#7KtM-8>J0~PZ_E+Iaeoi|>%j%Jcv-EiWr&i6UV)V!l?rAPtG$>k*Oun9Q%-I&0%v9&a8VvA!U%}I z1mh2_?fw`t_S+$XHoIKp0ow}a2&y;UVe7CtL9qJAlWW3}AcP%%RP^l4M>}&xh8F^Z z@?eE#zO4b$F5)Xq7ewH|!`7N#AtcQgn|qK+We%`erR|8+=eA9gX1=Vj9VctwVne9g z^5#Up*noW`VjsQ-bC2VKxX=7nC1#gz`pG>7j-{6;JCTYTI&9*Wg$|HV!nUU*Z_He# zawySTN9w&=z7m_PiA=FI9!$ol>JMUkz#e4VaO?59!F9^%Y2MV>3S3o|Mnaa+hFQ}m zAC`y>8rbS9ubPZp1H0<4k{w6kd+@%`ca~&tgxf9$ex9Bqw2g@cWd$2~_5|A(QjCJu z)TR~uGgvg)0ZOXu9C_U|ovN^PR0*BOJ})`u1hb6P?b0Q0aXW<*44`;&@W9%>v2I^* zxR!m%BkhG=4+(IOZ^F6EJ=Ka?C(M%(7q$URZ(eYJnOhzM`Jv0-^9oh zLJu)mOYs4x`r-@XM`U9#v;@WyM~^Z4axEo$*Cs?~{h|7~Rn>i>QwPp9F!y;ondKnj zOf&h{Fn7wb!DL2=5GS_UXOCnqL|ihd@&iad4Ij{fDs)T&@iHy=A#<@jPjZq%C5vy^ zj(cQtdrWhUjvo$oZ=e7hUs8{t!9%W)y@lmgnzBsluFg0it}f-u_I#Wt(I&>uKi^## zml=s(yCn6qeIB>6e3XGRZpLzmRK zj6%8gnL9wDa5w>Sj%+g2ocfqkCM&H6r}M`Huf$dqzh@Mtu$^pq&Eh^6vq$D+Np&eg zzpz2t!lN@u6j8b>xP5F6+}4(hKSs-JKFVilKM<|>V^#JX+AM^W;)*iTHuUrSx|2mb zq~kUTs5ChikfFSO(T;aV@U`{Iy8Uo+G;K`zQ{DC95e<&mU(=-(m!`vbL+$*kmjAr^u8tr`>fbf5iuOoe_NzI4(i#RwjV{F zX2%GWvJ)6MJp|>YY*$q(%JQc+;BLMxraftZSQTlu%+Ob>m*0w_&H6xXjQNhjFr{g5 z6_eZc`n2stfw7|FzIU@TP#2dmx27CrnoM9xD^My?=gN)4Ed*7nX3(}*_T?48eOHcf z#u`pIe}{;KQhh+7TUsc(w~;oiRfFSR+GmH2R9rpDpjjxCp0{g)rLwJ6EkZ!OqN*;A z*-@`lV~z7ZJQz$~z$N=QQgS@=?H+$>6Hla+RyvZ34?AdI zMu}0)t8mHzX1A&5eyctOTxR|bq?xaZ+T(?JZ!b1#&MVAdR^gRe7-=In3TdPKak}%h zf_to^<=U0!$?L!D8LqHXONc@qx+&>tI3a=8NGAvG@0(u~`0BTBYgR8U7{otcgHk%H zCA}5hs%m^TFXRN(t`hu(>B>mC-((?@P5y)r#oCkFlLmuHwt_9*VbwuLdiY%#+~;2V zIUl3!QmajIn~C$KH9NSuq{K{#rKPiB^%NL;-9Q$VlId!%O9ROQb#xI_{Qitt3m=P! ze^Zd&q~(#x3aq;#12@@*H$l8-?K5tJ$0SsHP@TOr~hy^_f z9_rY~AZH01z8$6}czGd+n83sM9HuV$Q!w$3tqS)Z6)vJ0b`!v3Q-@RzJwqwx4a&Vl z;ibjJa7B#S)xF1+HkgGl(J_)f#P|T4c9B2HN!!r$MTySd@d0BbX8>_$tU%M(-8`n)n;mf(fnHApUprE0)2G{) zE9c<4%0_Unap7DPB|sjUEfOT$JNmo2y3W~!QKD@FL6QNlIcPjOwc};Hl&G=e@bh~h z@i2BAQL>sj>{Tk=+A7vdWIvpS(lXmMJz7w1mV$FzvMoq~Xs;?3R2DfP*TR4`T@(Xt zSe$x~&)>zR+F+0pYD|^cNO(~ic-jMs-*WZ7jS2z2pbG{xOiOy-hGFhJHVDnZv=eTb zRsFnc03c2{%i3!e&CeFOKKVI}EP8;^gEdo|pt^rHLMel5NwMJ@b1PtC{oWY^&?qo> zw+p7AmQG%L094AumMs;lov*_IrO0K?3(o|Ygmuij3DX`IQsg(Dfii3~`PM}C_%fSf z^aby|pTOnwr^W`x+I+7d;l6Z9MC52oh|yM{&0|HkS8BT%1-7y!GPOaP>ojbs0~LD( zp>m1`z*t$Un(R%e`keKs1R}w|w;6JlvI|{2gn&|5NE|}W(pyd{)zNgd61Gx-w;hJN zb*a^Z4g=%QaMY|BNN-3ON@pD}rgTYg%78M_sQXq55C$k!h>k5!dbV3~G8pXAX+_q^ z6K(EGJgPheq@hq6-9q1asri*nC4yo{58;40T3lDd!1DxTUp~D*{)S z+|p|`hA_2$EFZjjqU0clG0pT6xC?y5nv9PCSbC*fxj{H6;{_#hLKJAtYS63@Piiom zwZVLv#wb{!+6yHuWiRmGmzl3W*k3(3YLhsUUE)7D3M~lX5kaFKIgsnGh*L{2A6f<9 z|FKng-bi$i=2ta#;}f*$O3WElb<4)uaoe^Z1ENpo$?r?eCVS-G7(zMC_tXheJ{bh! z&<7R)&~YP{pYT77)Q8sGRNwqN02S@U-tsmt)CRg*0yUeuQZBR-Vc3?Is?h-lMcyn!b7 zi$V|GR2U$3vlYB0;sG`UDHFA%02#aWh((~px26xHs;KOJ(#*h?V4RdIk)k=Oqu2X( zSBItyYzMGK99`)AN?5&V;O`|p$aeXQbtgS;GR78^;DxpeKycnFZT6>Q*p>V+;wZuz zIiGAauvK1PZ&urnN6QKG0wa+&bn`2XzP(W0Fr=h z8_6Y~PYFqSXN-V7O3{xiLi{M%$S$JUSt=g*i2 zORKfj{1aP-k8zUn3vcuCI&WQjvi|Am39<{PBv(s4*m^dxYTjCN_6EhzOXMy(xkClE zd;Y>Jn_O?G2}r2B$Y60EKWniofM^c|wNE0gKt+4xjZQkuZLi+$z)*cD_&|=6C!yVV z2v-g2E*UpfxnKI$fRLbSx~=mD%5r7?V6e%u<|Y3WI|%ysY=an&lX-v4IN%q>7?|$? zw(qqod|m?){DxrQD&qz7`>^h^5%zxXJBBO!UEWR9`LoFBflCog(h`v=NmJC2;~7!@Fo{GmV}(=>{x*69SPH1c|E+L- zQ^`iI!UTzU*dcqcSNFJnu)v(qY#oRKk2S)~6VTwi7&*7KOUvEE|G?r_=!-@#pKZ`m z2;^7%ViwL93U(gZ`|UeLgHqDvZSko}TQbv;s86yKB&=Y%f-cZvF@FjqdXhHZE?v`w zp~^lYK1T+IHdK}$R9>(8vWx8{otv)winvYv{k2T~6!sp%x#Q#zJXp`rUMUraIb|M1 zYjPmwG_sCNlsfeg_!r_GwkFVgKrpS8Fb6cu!!t1Ax0zwyzb+bseYtEwoli8+c4mOZ zk>|iA%~K0K-aTJ)AhZ8-5-Gk$hmd0`TOZ(ZWR< zmWkohE@~C|y_%9lOV_~)$qJjhy%sA1c624XY_*jnNg+^%KPlsm&+bxGcF6xOyF12$ z9}FRg)yPJ?0L=zt?>#mN{W>gyCV+G<1#B;ygJeWkF&Snic~=&G8?mg+J@TZLB)ndpAOFPc{Yxs>X)@( zs^En%MIq@Nh=3Z9J^~eUX-KzG9&m#|PumbU^^Ktz;v}H}D1js-T}Q?-j)4IKqz}|W zN3^CXg}|CD_@Hi(i`3{*P$Re|p?r9G_x|(pe8;_8T;3fdTPk-9<(ALHN3XiLaT`9_ zq6-H*jgZM@By-az61>p~9jKO(z7*w|x&XMF(7RfuZ4Jda!$WJ_{r?@5y;rIx`j&o$ zi!%~j*-gd06%+Lzc7ddG^mKGEOT`fuHhG^RAZC*MTk0V?Y)d}|SkVyg@38x`(_JBA zJ>&gBl4@AV2=`74YjOm?<}!=Q0+OQYkge*_ohloTIhw+PjmFusreb^N8PFih3*UY7hyhGndY_w@{hRJT z1-CE0{ZC{ll2mg=0Jd|Q_eSxy3FE5CO~?b?)W5VgTS8k?|HLPbCOyM=_fcIBF=hYm zCp{Cdmp#@-LEWCfG2BV@0b7P%F*!_vi5G!~hE-JC{>40FxIObFZt~52?+^LumB1$`V&U>Ha*%B($49@H}5A&21UN0D0`y5L~#+y+kRk-tp?9DH>Zl2jvXpwyb0BGq7MvHneI+D(wZFa<3#m%-_*CsX1v%=hVC^UA~zccvjffpRbJ@b z%23gz6L)pF!R_N{=-mjz#sxyMO+CD&eD*!_dYko9wTif`F_N%q{&*g&vrZl;(jCiS_KwvjSXr^%x9oD)nee`t>Jr2&pkeRUtX^a z=_)4y6*sY!^Z|R8)QH`j)>44>Whc09o$kb-tPSsu(l1skNXo`boJq%Y00F2<~1Eyk=Gk%5*XLL89dVW69y-uKfx-%e5)-<@> z@=STh35uax@B#alu~WB_E#UsVM@^6-eQx=_49s2M;;RGQIM{@ndq#Wb4CY3>8GN&6 zwL4uo(0hL$Tbr#A^Wv$m3=t`*((kexr9%3hHj*ijrpuM^zvx~40qn_b3PIFNk@?z{ z6anC4*MF28$X(3QQ`~2BkKG?+vgg#Q0V7d`dJ{uq$!gC^_wreccWj@hxV|4?!EE~x zGyRac$aVc3UF%+dN|U#kCs^cpBKo;_<)AaA+Hies{`THk>qN>o3g{R|)>@4Q!o0Q! z(4t$4_9U6xvNpq+g-M`1sioSCLNU;mRk}u+#JSA-zmlZ>o zv2)1-y+ae=T{FR##z75(GXi&+-i$?efwv<&z&s9!Av4%~LH)v3P_+t^X+Y9b@yV>R9n|u6U2j(JJHa=T_O;GGKLW-* zCLPEx{#kJgSwV$$-BNlI*%4z&)#U?XcOHzcZGh{Z1}(7XnW0h~SzA-ovmj)3wvkL! zGiW54EBcD4|7ZwtQZvIyfQKVO*@>lgcfI|vLguFxQGon`NQ$kh2%TJ4E|F5@?#;}Lt&Gqtz7ZfISbgxvM97^6_su^NBZEK&wA6Y8h6(ai4$#@309K_{y08+o=?C6G9(0n&Y z@e==NXhgn0qi~P5^TYnC`%nk;*Mhl171gtARA*=$C^=O1*Mh9EjbqF*o~|rF8Qdvb zh92!;m6~tMUJLYdG&UZWc>_@Hg$`nMW3&(sKa&uM`I?gCRWrwAi!|x zrPLMvUb#aNyPn1fD` zFudw{+P(=0mt#J>mfLXxOHnLylowBXAROoN19{}>Cy+K!L!oShu=_t<>XbugAv?%f zBZ^4dz>wuZor`xOe6xOMGD5b)eH9CM=%0V!k!$$0EoJc#@R65(F>oSDiogf2n#lk0 zoq#5kzINxSsne?!sRC>{GEENSps+ZIbnJ-eqU;vMsF@pG{P&xv7oe|M^EvsKhN2tbyThRtV@7uxlB zRR4t3bZFFRJG`Vdh@X^fd+@!W`R9z&*kJzW?j16_HhIh7=6jlS(Y$C(_0NeAQ|>A! zH?a{bZq%*EIFC;qu@A;E$h;HMpkrnW@@2KVYD=|JOUd^mYl5{e7@=RB+gJltt7?t0 zdWFN-)o`Wz{vS?saQrotw)z=@$tz$#tH#mfTCLbJu~U?lw>u-i_OdUDnr0pyLm7no zyRmP+M|+DFV%58Iv1Zw+C(3y?+L7+H^c`Z%kRQ^T^_$ex^ssD!2I%0GoV{=xsS;S@x$Uq48kzCpjx(!S4nc${v3Ij!-bxX^BT--hX}mrBVU`DW#j0`{BcPA!_OTF1zq8$ont-8~ctiVzO=hS9Yinhs-|Yvw z*=tI8z_2&R98|p9OUS0wxbfI7t(37*4NJ?+${5z#v(q)t8+O)GG*x&xSW*#B=q&M( z%YOp`BmoA=?l?dRR@J?MM!}I>5O_jj&jB3|tlmqqUm%S@{KMMVJ9J$7g=6D{2FyzK zxw-xTU;aXuO&O+#!vD@mPO0OaWFms*;vp2I1DbCn`zIKRR3nl>ws_~ePgMU>3Xy^Z z)U33>_dP*!>Pp@IGGcc}zuY0SlE0v8xES@I9IaZeVli||!C^17Pn$!HqspdJHG2%}7bfF5%{T5tz)hgomF95C5lJWOeHT~DvVSp7XiXV!}w-(4k6 ze?L9({)m9{9}0yGq}2YtuIeTfx$!hLroaEX9r-=yF_tN*<2-lwyS`HP4)i$*3y)Tm zCogQBshy36+Y7+=Hb}nY4ZLn}XumtMU>E@zzVl!JwawBnZhh{AU^_K-ZBFbLvhz^r z&fN>`h)d(bzo7Q{Un7Q@6YA3VPbe7ipDb!Qb)1sMU$UPr<&{G$CF1L8dHlCpkaY;x zyZ0Ulo@k|SUw``ftMt5?q0V)U(%kUuo_l(iYIj%+4v!RHhq7nw>gidKVHhe)`{H%8 zr#NTD4a+2)rHrL(l9V={*Sl)UOAFiIeliv7g##_?02a4jNAecBkBI19)0}^1MCKIX zd-Akv7fniS%I`WfT%)THPrl-@IX4qDTRCyv`1PUYLa+?KClrz6kJ&Qp?%XuJA!f(a z)Aev2)o==WFL*ON0mVrk;9)TA^L0)_C{~pBGr9%3q zIkP;ov2Oq&8WuJloW)?bQSFX-SRDZvhMjdKj?@2P4V?sF6LH34^49=;g^p|HRbe--U30w)0djs{IT*p4;P;H%xRF-ju6*UdXl}GJdwwYWvj;n+ygR4D z2e?!C_i;T+=|kB0p+S^|W+5~G$-_|+oJL&|Eq!%MS!}$oZz;T{l09f)GH8~UcTH?O zSb+E`L`QM>kn*I(C{+9{WyRVNw{N=g1G!(bBG@zXazE31aTjZPB4^J&L*hZ2dgl*e zc}M)f^vXm?jQs!JSk1xV*Sgs3fl<&eEbd@@WO+Sv9-STgcD}!aTKjg;@3R9>xJ`A( zu-gLYe?ouEjnX+fFOP)y_*~cYPFjoljYUs|K%U(lU^TBDYBl&igv#X)_}`0Ofd!#pHQq=x| ziRXy+eRfkZztCd-Vv$+nv8y^draRb0^9tu2&AB&cY5wARJgDy6yC)JtGf_h7Ul zPIw37(S$^dZ8h+vScK!}@ScL@nJq$*wYcjazX2lwBo+HJw_@9tW z3hqRxq;=w-$P`;@{psLXz8qj@c?}~r2#sjeGrv5%c4wprFLn2dEipZ334I z{&_xmKl6q6+_mu$beC@3U($ z2oqCaiYn8o);xioBt6Ue>G$IRTN=dXmz8Cur-<-c1h#B`tU7@=t%=O*ROgM|P0~Z6 z3{+=a<_*03adSb&K<0nTX#!X&X6Y;ebqDe|=^IyLY2Ox#{y8*yb54i)Q+Jo;Uqrl& z^3I(*m(*hV%SZb~S%Q6;D7{8HEKG4q5ZE`LH&7RUqLFwQ93a8??IB)@^~8v46=S%oLGhkBe=F9Iz*@;C8px8a9IF%$=ynmfm%!6qhY6 zT2HsUTtPK}S@0~7QY7SzupyW4zb*jSp>=!ah;aYI5I#)-Pk52``LEf+Auz}Ad`I6Q zx}}@-Fq2Z8Rqv)gJnz%LnL2chSn=Ot(Sn^IBMG~YX1&8b_uRJ@Ox?xFA58?*utXQ|a?Fk~JB70e0;urWL;9g3uyQE$;n!IS zECKC>YXZN3>!n(y@v^JvU3_DOeb9d|R55+UWP=3}sK>zWbo4m=Yb8U8=h#yd{?JUo$;pxL zdN)abtP9wDys2Lw3*{J_BmKgW>PXF)w6sV}zWI;c;(Q;>-H7}0af)$Sb@!8gVpQ64 zs#mWT=@*yvGuGeo-$_tyYUTQm8p$+U*FqKJ-IXe(2OB^D1&u0iOa{~#a;5tZf8Avp zPVBhO0k}{8X(*Ab3E)VyPaLa{Q})lQKJx@RnhW3Dz0$xP!nnGj!5go>YAmh3IwtLy z^BDZcKk&~KcL2BAqgHhzFaP=Boc9|~GmNv+(&}wPw(Tof5%(J7 z4r7bocOH|sUbj(~1r|=Us;iWErn&F#ID7Z@Gcat-?ifEuawvA8kbf$LU_Jj=JS9kk7EJ`BG_d{5}XYxB8iFoJYJ3ti8wYKeD>NIH{5%Y4`J{Ri;GeSC@ISot*B0 z<8+t0f_KYydEz`tSG_KLxg?zP@Tzy&GP%0wY#3`?{fHY`o1Tf5OVI~OSKFeGJBwB= zOlb4<(ai8;AW$+Wv~R9Xa2eliZi|x~7oR0#+#S3Fa9Hz+dBW`{Wf`oV+`aiq1V=CA z(J@jSc)#MS*rQER|9*zs70&IRCwp%o%`%{}fio3E%Ms2_U}G}h89(@8xWc{2(=A=! z@wU%ZeSQ6%%g^3WqV?canuezcytrhxNf{(id7E6P(M?r(n|jx`?}y@fHFyrrrV(#b z)M2I--kWcP?W#U2>b({Nu~w;o;T3P!`(3tS2?+^pkBN#jmV4Z+ zh6*#SlzJU{dwQm<@^V*Z_(ZlYNPA%H8wmW{d*lear6tmMQ5Yg= z#yBE~b^DJ>p#L)kz43zHJd5PS-|P_-R0B6pG@|LLyB^_h=t0Z}ev zTgk!U>a%=+Jq9Eu_G~|L*c|{Vxn6sy-Avi2%*vcX`&0VkJnkfHkD0JPi)LC8*_I#f z*Lt}K5i{mn53jcB#E$2yeoHDiYJODW@q&5yUir|SxubU-lfA~Kv%YR8$2|NJVExLE za3{MxJ2VfL$N9Twvlf4V z3l}=r8+h;Mcy)fnddM|>t6j!p#Y_b4H0jh>m=*lSeuTO+9y^{g=gFK~GiyUT0ydns zlQuDZmvORU_W1EVyA4e}RvL8`^Zx$3_Rf=otxGkFn&J_XnN(D4N)S20RVcQx_W*a5 zsT=lgdDp6+p~7$msBdro+M4&4dQY0HCE1e^=BQpy1tlw4T$U{rv(Li>x$69A`BhX;~6E53Z9@ z8cn^~jj+k!(**lhpIRk75^;ENX=@{Vw-L5)D@u3(g{IB&$l8P$VLs7Ek4|*>OOCer z?t2wRPh9vj={N$HNUp0_DzO5PvaC|}nYp3#-FMr?>}gyaA-)I3&q5EyN2|YFbN$YY zr)1~e?lQJLJH_WNO`FM?GYfBPrBSyDN^V^0USsg-9Q3g`rE}jnc)-BU$Z_F|vShe~ z`_CwiT$nq5@ociJ2r+Z9*0V?f9?+s{53$B}a{1g{Nb=y={}C%2rdqDID4UC(O!59$ zwYgsVNYu6pYb=?NBFV5(9?Ki@LUh#KPPMj2UDEX$!$HWYF2+jeTUOs_g|L` z=(i^BEt*((am#N|E5||>l^hS0)TOM$G_HOxI1nSZ7(Ku?T{MQ>7qcr7$6~Wzjfo%o z))<20jK%T%pr3xcp((uhL7LlKa{i>*ZV$j){)Rcqlic3T_i`A$y?bnX@wK|gFOr?R zh?S(b#x^EWMib3vyu{Pl%t;Q*=WOuJif@Y%mL4t9?i@3lsyJ1C;YR5fh%o`2M>Kh{J`6l|*65(wij-K*JNwhPB#glzY=Pz45OCvlpEKi)A}ReRfieDA*2$rbVD> zo#krgNG4K>;cKR_ozg^6eVM7MZe4$|n?nv8tMv9gI~9}9jaM^Ua2>5lyGh$sUZHL& zx|{+}@yK;93<&j>wh-&j2DUq8JO=pQn@k9-1E#}Q&s3F>t9!GZvkXop8>e4}N7tx* zeYcq2>XJ8QyDN})*Rked|Ap~!SA*UnEFDi8K?7mqp1uwZY?>?IX6RyXSAGk2||KWUy3dzdWtVZvY9=J! z6uy4_`k1E}#^3x16M=Z=e(DA!o4a4~87*PdW4JuG{Y=qn0fAd(&D6rQ?5QTG4<8L3 zro*zz3x;wkE3$?|#g;{_MnB&7u-z6t=Og|#wEBS3abm6Z0{w~6I-llI4B1?#1nx<( z8^`2eZI}?V=Y=o4vO5v8M`;vZu%-Ik8R@lJkG{TPfA+({GQkjhvxqT5L(jw|p=qv; z5OaZesnX*cQ!BcI@C3Q+j+W!h+opsQqK2KV3AU43KGkUT6p7we=cVqz_+=llHZ<0b z&#-tfw0u#nXwq30pqfkGA4FCz({?fEAFRx9?XJeXP^L0=-x{~ti$b_IRnar?nb09H{AH{)3~b@vhmDZ`OK;}13@)=sNoQ* zM?L=M+wD#d<_gjGF?{-FJ6Pu1s3B*eA~3u3>VdHE`&=>?Tvk#?2mmK$z8}Eu#AW&6MNPqj@?t*am?Dl>*vAg^()z5+2ean;o4_EIUPxb%* z|7R4EmAyrUgzUXTR!EUeA$#vV6GD>gy?10g9D8IPd+%}VW6y)%L$BBC{ry~i|K{ST zJRi@;{eHXO?$?{}o_KW>0M!f$pDhIV-P_vS{3^=-sO)|#TKohS{(sb|nTp#K@gy*) z_FLZmPXq#(SJ2xh$aHeq-?lG6cie?&$#hcj3&7vQp9#|6ToHq}Nf3ytD`{B71%u?@ zv)16*y_uF;?yHaB*KciSQ0BA1rAn)J>#PMW$V2G z);OQ$Po30NJhHTIzci}i!cUVhNUsOnd?f-P%mx^6);mF;Zyg>tnsA*pku$p!awxA9 zpAHme!VCuDK;+T=J42feyHmyf>g1Sjo@bfHqeE%N7*5?waD!lMen1UjbSMx)c)_-- zPp$t}FVl55eb0lh2T)8RbzS5Wl^+7~OVbjT7f)AJX?)n)`w4NCIwx&b=B`&VqjqViy_wrp1>AsV@9`Lgqd&du%)EIOQ z&}1NT+Ja)RqV!YE^alD7P&(F5eQH{g;oXV6rDRSey4gacko6(4+Koh;Ymq}JBX#t5 zMqSjZ+vo27|A4G|q>ig$pSr&XttDd(2S{?@^ zE7=GSk{13{XWsMAQ|Ni>QVStk2*+6HmcgKraTGIsEr_-j%CR| z={{Xz0WlM)?t{givYk>_gAutGz}&6jfvEf)F9O$w7tiR|P974&F1K1vf(RHzNdMJ| zYF|87_6GN|gq|uX!99<9b3D+ivCldYRqZX! zt~j}lr=R4Q87Be*m45t=%S8WvfyPGYo-{xE2M=OcPy5jCPhy=PuL?9!OG{|+JZrBX z1+5BQtY`M!(zK{kI6i*Xv}u^pFWn~IeoVRdtS`iSB}(=yWM5p;%BuL#RuM7u)^aq1 zp6I+J0cx6WW}$0#i$T9iDK9VA{(x#n2;<}Cjsw=7I6yX<1-qv#yH3on+JvrCJa*04 zYF8d|d7kZMFoziE&DHDy&RzmA4UZPu0PvNJ@bT@o(k-jzc<71}bNz~Y;x9fH0L>Hs zW2SGs(KH;}o*zFJXFptjcerg7+3@=-F3@oSlhUO!_5;TT&;tN=%&jZHVfGzqoOJ|R z#0N?6m~2@?hTULpKc{C5gK12ir;TMxH^6h|^WQ~wFkxP()Y}ae{qs?+_p^Q+`!Vt) zE3@xsq7O9gnF9%oT~EtGM%OuDTD^K#x6er<;(3AY{n*so+fw<@fBIcEgYOP@`PaH{ z^b^zXJW3lmnuSl!pfkxk$SoR;(H**AA_VDd1E&t`@ ziIQ%}BObu${P zpXkx~m8af@XXFSmJqs*IUB|X_D%HJ;7Z;R=2c5%xcvTmT0;?{JLf9W>HL%u`S&Xxn zL*`Ur^*38Vzxx-$`(!AsV(4kaU#0~fM5cP3XdXtV7P*J70h^priuc6^l4axNlLhcX z^V%-!On_eP*yJ>n3J}Biglrn#uLI-zVq)d^GbC6pCHI2oJ$a3K0d{MfkO#3)Zcrch zyV?V5n-yElg8x@HPHIn(klPDlZnc$@Xn&pp!=SVD<%u2o5ih~Xzlw7A)4iRUpxq=7 zUxu{Ok`nv&!(OjwhkgRETcq4S3UPm&{a(=>nGa76PMEi6(e(**mzqkDg*NUR}I`r>vUj6(;M7i=Egk_hQg0+Ph86or}D>Yw3C2uCX>Yjc`#2 z{d6g&>!3Pl)7jgO21YnxePGoboAkN;bidPnq3@$NS!lNk67SzxeKjfQ|G!!O2qJ1u zDVe+cdvznO$Cb`DBs}7;PUftRn-NuD{t>SV?nd3iA6dT#&ML|47BANRs|)*LB0)uV zO8q|LJJ_ezfB5z7cMR~E_#{>Ae78@W>+CNLs(+h}k zqP5C;PVvUzl5=qJ{l9$YGEx}WS&nNDBPH7xEtMcSG&Y0|?SE%XudM8YoFNrV{shy; z&sI-PJfPr*9j#9ezg2&in3F?nvRh=+dsfXD-w~tRix?1_E^6o(j0j$T7xuOg{YdL~ zTSVL(!3y5A2_9UbAeE8k8!rLDnZPQv;r2q*VRNQ=J;8L-lKcxD5-PNzq@-k$cBreO zq45bUWUw73&J&$#?f#m}qm!mN(wqa;A1k{Y17#4%{Q$8{q8b?A*Sr`J&Vq6<;R-%=Azde$Ia8H8mwx(v>f3y+0eRLAG0v;CiBHJ~nh|+0-^=@k# zr|BBBoBvo)e{c|}%tux}55o@qr|MR%tL($>RF(#HxJxPabb1L3;x3J%%a_GDB5p)1 zuP+yKt+T<#P4u}B2Jk9s&1Sy4eNkLgs<4h*z`T6w zjf|gkpCB>m+^J^HXXOWl-yGS^L9S}L)6MEn4{F9|!?O^0#98s-XDG^AYe;Imve;#d zqElNyLvro*H>Yvu3%*`UZb!7zeoIR4$i{OBs(@Mp=*ho53h3Lj;&u7WCxj>aTuaAd ztzX|D$4jKOFyppsLSE&YH2He#QBdwb{yQ*I{U|(OUf~hGgPoH428(%W`oyeMqsWxg zr)KY&GMU)Y*ZDZm_$cFfl+EAUE$fWzO&6P=7g5gxk2pNtI1~LpBb#sBv*)N2jdb{y z`qrrKmil{jhNb-fI*05WnUf=KYtbhKRft+reXbkNHR9^u^#0I=B(<5#B=3C>a5K{+ z(0E<3nz(soYBz#W6mscTY65lD^ps+v=rHoU^t5gRE%`DX+}m2~S88I+mZ_=trrZA^ z6SwJ!&(F+QieBu^n{*tNzMR_q%%(e~7<2f99{YktTO(oZXAHne?sB2kRV1)}F8MSD zRMd?P$dpP_kBi;4X1QaCn{`X)TG%9ZFD=_y!{oXLWjzhp!21ZliFw^n2!RQW3f`r{ zYKU%JOV~In7Z@v zWYQs)2%QTW{}Bv$j}1b+e(6ISP5S(r`3As-0G|-9q=~?>$m!5le?YGqWb?C4eYU{$ zL~DcBBvY)f_fT$hAGM3f!trXw%~j0g%4N2Le^jCG^+BP{i7^Y9ivEdoVEg+PHO>zt+OfK<7#|S5DR4W{FRiNw zXjraTJ;xlHgQe=H5E~kI*BFK}K(UQekV)BSU~a#ue?p6QG$D$|PWj|twxJGc+groi ztKJXP0fexRm;U289_|0q2tzX!HXLuO(q|wr#i23zQ%T*4dUfW(=N@e8sQNjElj$_D z(L(fBxpHYGQ_~+`zRmPk^GEW z6tor-nVWDZBZztt9$a@^Dc~NDIw`AwP`bW)!HAnPm|DykKrQzgyx+!%y6nODu&E68?Dv#6a^bguQre{4Kk!XAeD_`o0e zE9u%yId^>ieD81`fNodC9M@HT&$;K%ac*;-Z->P>PF0whj1_6-*vvP|OmzhZ%#Iaes}bbKY35!O}^D=aMBu}r`Glbe&%VOce4lA{kr3LAFwFzjHn?3*+!ex?+}t9=_( zHBNv58Z|>}$iB>Pu&g6gD&ti>oA4GTG-*CN;?!;b>F*AT2BpYdo}Dq1Mq<*-hK!wV zCwiJ5X?%wH9o4N4#WPe+HQvK0vOcE^v)T?vn{sMvV z8!AZZq{As~E?vzvyNdv>NTZxwY@n{oWHXibx-x1fAX;gP=kHc+HYy&5nhY!#L7TMt z>APw_C9ll1V$VUrZs;`vFPsm-PqWUGMVU#%^0@zBhl+rizoKv;jpFf^~m8`jK-XOB>0udtc^tb#C-2zx2?d#C7YQ4 zO5r9$Cjck_Bt(%d!AQq>$gYyP=F?;F(YE>s@>Vtxy1_Cx$~RaCG#a&$>nkhUxrIW12~mbkgE5J^G7V_^ZrI zC@8@7W=TIOpm*n{xEBa)75zq~3A>61Wxd%UK=TeFK|Qk1SdXf{F9`v+$K4@gSz$+7 z09&xW0Ki*|+ak*W&|S$06wUboi&0m(yY2=ig%=M+uCkke6_SLNj*Y+{pHkd%Y5~<` zcKNz00<4;jbJ?f)Bb))%`%?+@+MNq9m(?_?-8mF-X&qq_YyQ}5FTkpmLtRFRf#cpn z^DYNL@U_oL37Y61dN1g=KqC3|hEv?3ckTq(UozfHH18%SrKIc#Pv0CJ9ldT4YWVG< zN}&3K9(Bk=)g3|o=9Q#-%QZZH^(t=GQlVU~aTapeW8&^%(&*-L;<1(t$iFYEiC17& z(^Z;ou(3qzx{x{7Wt`HLIY0*Af*h{(4F{^S{I;jN0fhON~M28a%3L5&>AJ<_PQR46yPzYUD~J4Hats#JS^1BwFS2g6Zf~_`{THuMXIz+{kZxJ%*4|uS9+Sl$!&lI)2@{b+Dv44Co;XZ^Gr$I<(b^>FkK0{D2R8YdPZIy#DkI6gkQ zBn61%){%cDEET01^;V4$K;GVQlQ*z+&+$ycsbG=4W3A-Q^!2|8aVMJ$hS*W!ze&Hi^*&G5mk&72eJORtoUoezRDD8(r`x^r_se+`N+AW@eGJ)-!!tr>(WK#ExI%xvb>_jKU8!U!k`sjoJE0 zrieq`Moq>z7et;S8`|_<~_PyhA zia!|^XGQ(fr2(;sUqEwI_E$jAW}|Eb&TT+sX(UVW!qy5OW+ zUV7nIkFwaPRcS6rrqyXUgGaR9ba?aaUYjcZi^+vml2;dbt*Pgy(3@hc*4+(o3^KRw z4iX0ASl6zSkS;;uXT*8N(R3bM4vBaq@9zPI10|dErA}q)A{Dg zNiYFB;&h{5C|UoGcw?u|UN9kd4BE0uUji{x>0>lr>zP-x^)BP@7UIE1mqN(9PIRIo zxsVWMb6r7Mr>&8<6u%kdi~7Z}aUFG0^omsHJR<<56}LdQp0@$0dwPX!CYE^ z`bkkGVLiSCEyK?SHs`c~E3n_5(|yerAJaMi`@;#wmXy4Fr~-kBr`geXwmn~@oMq?l z06S(ZF2fe|mMPo-Df}C7J?Iv2U>(tQCt`fld6;sGtrhZKjm`BmAsb`vqBun(GNGs; zW!1{6Jd$D&;ofQOO%X{fsmY+5yleMfzl@}{sbdH8+aiA!x8=kO>Ieu^YjksS1we7D z%Pzu*aX*I;{CDPCaQq+;PeCWa5c(f$=USCZLE^L~XkV{F&0KF#wK0#LaxURMLHFP1 z-1yB7RfEp*pXyKxR%BcvQdI7sntolSeNRy9Fs`Q{RnLf8A(v*8SJ9!Kzx3F%ko8NQ zZdu($aoXppkK-Br)KeG2jy4awQeBU{%_q|I{Ho$npiCdmPT?Vo0r!qGw>osJWiYlo z5`KL+zBkqQk7F`-l~jFXHH z8SAx-v~TYx=-7noRoPQ3U76(U8d%1VxgRxqb1x>b;qQPa^m6TvR+OT~EwCc9Bmi>_^>qXxt{n>uQvhcOoK7 zGu(26BdIbq^c|jYm%QPLxIS8F2+du6zU(MT?{&0M9m?+3`|PlQB^cJV81Nj*7rcE5 zdBm_NEkQS9gmcQcf^{S|ko=nYQ~&#q_vt;1vglz#M1Fcvu7@LG4@{%Q>+iw;&g3R7!Dt+^|ex z$j1_$T8iUsfmfrX`TAt()^P@e>k@$`2haY{@>wTcbDIqX^4iQ5@hnJ!M|s{q&y>Qn zGw8D^FYF8_ypLcLb_dhHNPu*o2`x*C_vt=So8(gl|2Ew4)?i%fA+4E2KAV#6pLDwA zC>*Dog{5O{n)uyH^gORV=Rw~Us*~go7WEMNUAKi0judP2h52GH@l|M^Zcj$1;arEY zH`+dGl_C`FTF&7J`DJnj=qU=Zy3X-W{d-fz=7YF#CHcQBT8clA6^eUt|Bda<^;N!F z?rW`R;V(CIMy+Y#beCLasXYd$BA15CduXrPs0AIT`c4QT+=kQd<5ggP_0Q6J@pEhboHcc($tn&8lkCSC!(+McT4sbOslhylr zo_c-681@6t|7KWONT2D0p26!D1HIeL->Gn)CM=5cLXFZA8^$Io12p=*&CxQi1JYopUzTL)hoNnBp=*ljCwZQrzaJ_QNZl9v7E-+Q+Vdt6nkYa_ZQAj3)J! zAknzzCvnYa>tmRMU=d-Q)O*@T3cpO^s`VD=q_@V31>Djti|_!|-=RCeuEUB37z6{4 zY1-U+c|^DU(1>e{(eGU~>+BoZehI>;r27SMoTEj|0`1TE9xz(MF6VEfIzl$l8n6n~-jwC}>~QQwtY*{*Xq#pwEZ zF6{|+^9i-bx(`ASkKvL5CdTddu+nm{*CsyYVcbiknDC1zt!m3%=6GV>k88qOdp-6$ zv`(Wcb(Zg&wV8_BjMp)Lh~FHD&Em>* z=rP}){@${oyPLk{WqjBBMFT7Jm!2&QQ9&<3@e+it_??vUws^r0-DWtR&Ia#0%}0&j zWtyxnMe8ebv-a5|SHrd)sGWqm8}Y=h6TGJ@W-I+rE@<4=tclHWCF)lOJ-&7M=S#u^^oxyQ z(d?uoOA!#DK}(>jO5n$0zH?A}4rk(Hm$k?k=+yjUx2mozB}; z)6hI3>2vvyQsdWX%vJ8x($2+chXGIUzKe)ja_8$=q%le-m1cbjiZw$Ou}EXbqHhK% zChGuYFsS5PrH`+qRE}EWhVF;ob}?}{PRzjZYLL$8H_9X1u5XXFM|mrJ0?r;Rzvko) z$rbdw!1%yd9n@+t53*rM_Gbv)t^f=m7ZelPpZ4E=){JFDZO{~bRfA3PJgq-jNL$Ib z<0yoXQ7g+9zeD0I*x8czsVa#fopwes{{sUGd^HJtW3(ZD!d8NN-$jNXfN2F5zYZvzqqeBojc@o%ZzX5ah$z1j2| zlf=LB;(XaF(xkwk5l;w48KkTxyTcv_lCiZY5*9l>>(?{Filor#WckcTFjc1(?x4J? zu(b7Ez8(2I`WmnuibkS?!Ob54(bBqy!7l#lUNcFsqWsW{eRTGB%8UiAqPRnR25|$n za$|_%ZSgVUZ~6P%@mryn4uJ==AzQ98FKm`_TligITS#B+5D=J|8OzTk#m zt<}W2VK4Xhy~F4lc?1GTt2ZenL@(A+HJhd#HeRRQfXox|n$^copLP^nEHnpj19fX_ zVHDwgDg=^As*BFVsZD9S&OI^FJ_ho1Kq^rONN|xyJJYeYbQo(Z-O+5-!3#}a+=Q^l zBhgge(;E7sjMVRl@P*5Lc4lrtS`}a>;2~Ja4nvr4Z=RV>~!mp z!h^k`f^XJ!_T8?W4)XmWjoXs+u}9gV%K||w25g-rxHik~lur2t*3sJgy9LsJ8kCl* zF;I~QLD~%H?lO|w-zCM3Qd!E^+$p{kIrixuG-`@nW#DeImOZQ6_&n$+Czg96!LkQw zg}8Ny^v89JIQ7YVODr^~&eNxp#Ukxe z8ulK4KmVEYcQ1rzns@2UCbD6uGUnP#G3{pgqt7T*VXQfeEe&q*deEJO5f1F z%e)<-)5gr4h1Sb2+lN?@FG(Ky*+%osq<&+B`J8Or5Yblp*OmYBuY1$z|Ma(PfrWES z^a0VJ2qT}iMY>pS*B5QRBF4^*SnV+0LFgE(6R=^M`sLL5QRe{Koe>1hk zBca#8tlOs+e^v6g3&y3aj+TYkGIp5!?9)0 z2%{Bp?hQ)A84{}~tWW&<^?_2H(VG*X4(g-YrH0G>kdEV%e9}Rd@Lh9Rv?2xsSVZf5 z)t?@@K;)M`^_xp1F;iRS?8>COo7h3B_X-15$f8BR#`ckfc2zxx4etxNi>>j}@ivnz z3^$O+26Y}Nz>|7WwTnSdX{%N(9^+t6RRFx%5!)y+#L> zYyP!8bh4wE#B=y=aPvKx8gd3Qj2E`BT)hT&ll`f19*kgH;c%9Rap>-97WEzyIopy{ zxJq%4Jx-5OkYd+onWC1hc5{1h*WnIihj8#Y$i*v`l1^F!vd0A-G4g$3T0=@W{yI5U z+1-CM33L=jR3w^Y@|0v6UbR3o)X5gghFp6@u})4{CWUvaL`at2Ah%f@d0C~+&Q`d-M^ivPsEM7q>;fB(>Az7eDUJ(Dan=wSiv9bM9KP>!T>hNy`F)Z44 z@{aq&GcmvOLdbbH`|&Z`aI9ai!fWTo*m}~}ZRk?=_LR!dU%ou4?Z@1Yx%c;L|NoXj$oo4kmGIJ(INv-FT+A*{LD{ipT{S8rm8a1GOq#i`sZSqI;rXAbDT`#!!klLy zZVo%VA-S+?&xIzGNQy{;xeRISFv~7Q-*2O zu0VTao9cCHJNJS4gLEBbaDk-KX_s?^l+S&+P8xfTF}0vn+?&)99WIzA;C)~OH+x}v z3dBEz?)L=nTrZc!cRj9Vm`NU4z-B*OLsJ?~m_6QdNm=snf=N&mgRD8bWv>NqklU_ZD+;Z> zozJe92uog%z7Mk9!6R&1pDox*8Qpy$TuK0T79pk3&aXX}p_A4lM|@yhM(R%e~{L?*JGjS6a3PuMPCVzp|k%eUlg+rk^O%B=Ay?(EHi|ul16A=G%{ipZO?E77q>VWc}b= zv4opfi{4+nQ2Y!U6cbK+-dxzNp%yM|b?V8XU7nU9cO!=AVK$RhVRK&dZtuygT-Wl& zRCcsdPQMh6A?4K??RWbaezZi5UthZJYFDyY)BaWedC<;I=c|P`^v`0>+8~2 zN`YF|8}}`S>~BkHCv*10WSiq~`iKMbW$AH?8~6E8#9Sqs62vNLeEv~@T$-5cI@8r* zICkh=)Y{`F^E`gbVBcAz@Yz@4Rg}M8n11VO#11h1G+AvlNs++oRW&*RED%FAWg73V zGX$^CQ}Napyy2J0jI@`@btu)=P0~OGBzq>ej)79&O;O+dL2OUXkpMLLE-2hlUi6o2 zz4z>5p8j`YBc$E(6^!PCqN;vx*TdvB#zEygxGfOoxG-TM^Q>{BT^*H)&HJp3v_kCJ z(@;IHMY&R_tarNimTs~4VbHz{oi^i}8>9KoS6_5>{wEIJoWl_+darrko8zyd}_mTF&PSaWzHIF$Fh&t%GkJ?P| z_zxIjgah)VVbE`eCr^S3G&=q6f=eI=^2x-9=&l}I z;57m@+nK5q6l7shuo&gE!S>DZF-5xY(e-gnjwEsI9`2s)FJGwNIYn+mLDq%ooN#Ux zG1Z$U!=UfHP4~zoSOhC9W3!t1TNcQh;f@(YM4A2+Bc)bhq+@eq#8hc}GBDm$27UC4 zTS0w@2@x(1wVoB+oUO0!=t@4rMLlIHg=8#;9?o34tE<#Nug}89ao*m9-k#DOFv66f zH5ag33e?m8RaMsP7d!d;lXkT`9TgvDl8DF8=$Z*oRA+;GxHhmEEqw1A&ZqvQL5wUq zZ0DURb6_u~PlW?@R zBJ?A}_Zge<_O#Ep>#MQOO>*aJ-obhejRf1UPfsw|$a}g7!N0Tb^3+ExN5lhn{iohY z1A|i1i+<^+J2RD$bMBXF)R8{0$F!7`AJ4r68dck5Xk@br=hJ9JZ0FGn);_Z@dCyja zi(a@sOLC`5xjLk+NWAe9IF{9_HE%ePWY~*g2)6xU{4sg9+=j}!NCB6est+vZ%z(|jqd<`-Zx zk?9@f)l7Dmxz)P7!@B&v$*Vnr+i!SlM5}6u7l|SOzUuR|=VQq2C2yhZmPGhu_EVJC z>p)O1c=n;_qr0zoT-)5_v;{ua4Xkb*%O!lc(H=Wr;f+W})=%dvZILte&?Z=Q4o`Kj!VQ*twZe(_#G_0b86y>_iMWucby70x&Xqm_)KW>X1i-8AU#rm14N6)=kfVLgh>SFNSbvL+3SWXxafx@ZM>0vesb7w6p>xt;0iYNVfF8V5))bqJ=0{)ud-XgYLsbE~y z-zKDb;t59He_sGl^U-H@XCL5Z>}oiXc6Vgl zhi{{+k3itXR@!Z_OdVFUBB5fn+}>$6wb*;7r6)`eY?~N4nBPqwZ;jdu;1d*G+}Fkh zm*da9>r2v#LOb%5G2*uPfhcxVk%JpB85wSOV}RLA{PF_^^8Dc%!u39|k8d|M?ts|R z#L5!O9@WS32M=nBH<(Qj(hfCq0k)$|Xo%gzJK_)Kt8G#{AATbl$Po)T4?Vs`?^S(& zoxmM(n*!Q{Cu@j#xVY$hbNvuinjT52qr?vH8Cp+48RQGpF zO4_XEcUzjZV!zS|g9&I(?9F+Vw57~LQxx;T3JxB#8I_x-u1}wY^IFZy#DDr-^ia5A z1(U>&CZaI4(owlJlJP0Uaqd;O zbOQRtQzZ)3;1f7JMN50Q-$dmg-e{_{V(!p$)$?@Di^XXZ_POChFx_n%p2+sM4L6{p zD<25?z|`GHy!m2OV^bx@>>@8w)jr1lSJI3Q*Mz;x)(}V9wA#1y8g%z4FOuygrhB*J zfP1drN7;dtmxLd(UH{_U%M;xuNgZQQjQkx4Ool7Ejyx|P5Qs>Be2MH6_Cdes({_e= z(0Usa4_8@XQ#iH1I8HS3`qSN?-Y1XJ$u~ccGxHoyQ=U@iw$1E>rkN&l;^jyGWW)2( z{}i|&C^Cy1zKb&b{!03rl$oJDzx@%mo=95gTOYQYq&joqYnOa(ynA4Qd&8Kg|C7I*%bun})RisE@1L_T!d}?Yc_JH^I2G z&rwCX(2)B)@u7s(_N(%!63N}0IwTdv%89&v^f}-dh6_4vwhboiS-N@hi~m56$M2xs zA9-#9^xu)fSod0|x!W*uZjhD!r@uc3I& zxKT~3bA5nC=n4MZWZJcS%t#;VpSYLrebC*bg*RS{@?ga?ks3=Ro`V@V_Zk)!)=v^o zyjY(H2+3@@ zBRa&AEqVujhi+K?CB%X8)Ve`wsI)9U3d`)_vSu@vU03nep28s3ITOP+3a|{Z8nke zrG3$sVg{d$)eD}6dUz*AK;=`FfXXz0p;SKrvr09fn6vtTw?Pd-U?u#c_u_f0A$2dS zO$WrT=`n1%169;^k+syLM(#&2(vQTwD7(h%!*@ZdI{^VSU@VcUpssdL$F7}NYZ3r;A?YoADrtN{>|;pqYr1j3dWZRW%J(^9 zps1%aUh2sHN*kc{qY>a`es~p3>$9_xPx8FwzFEN!PepY;lR5|XVst$!`b)?ihGrCo z|DDQWZQh}&%AQeibz5`ko}RY}gGrB1)pEc87@HN5j~*}-^jv>o3}1N@e#T#yrG{<( z2Kv)G%1KZl+H_r zcb|Z8`79p7-}AG(Qq`SxXl@JFfR2lMQ~X?f+d=M`L~^>(;?_my<87$<02v_&OaG` z_Z;U42w#x|pipOSu}Iz{>$RT>TV)v^9G~xfP;;uGD-bAyOG{+y{`yNS_HD0xNb)>; z#R$%-NrgfA__*$Pb7)ipC$pbggoX30uukvvC?7p}C%rp5&p6D)gh1L}ifizc3$DFd zM>>?RMoHna!{l`5*_=p$;uUQksRSjT6aP(-^Lk#i1xp4s4m=iaN8DZ{hrV`f7kngtD8!hVh1`I>6Fzw~aK)j$Ij z)5oI_Ggz7^j~d0Mhu%o{M}fPX^1eF+w^J=x`A;5a1|Ckf52QH%93)9U@qyo}Mzjo% zoWVIuEGkQU&i3Cq1Ea4`FZXTqIeG{uKV>}HZJ{&Z)BFgGH}@3ewDJG>{`(Ob7O&_) z6%Fn;ex>sZ*<%m*KI+ShiDw)mg&ur$y9rT2YrYA^1pj`*|Bhl?8+yX|GIueDD7%>Q zaoHPSK-@9^_qYz?;9>Eq}BK+!A2 zW!ax3;CQ|on<)3(MP2 z8%3oam1@qFJb2o(_Vy0feuDrN(~Xmgk~Fh5hn3gre2IEGlC|g0N#V7pl~w3NsY9Ek zA<67~xCwhR)F5lZZ~B$zDyw3WoS@4%j0W_7Tc7i5ufa=Y1bCQRxC20>3q=B0*y~?L z&;?cd-`Zl}TuXNAcY+(}G7TV6%dxE5CbR3P@t?KTg~=bmJS3%C{&lk{Uu!n zXDdJ8fG3ePS=cS1yxbwC zsGz9pB>aA{U$;gf7boYa)$@=Gaq_Va864sbUEOHQ>w9Y~(6$({+bil+=kFDj9_riG zs6t(vQLSyHKUl_6Di9>DuO}<&dC6MN?%6q}J@E3xO-iPv;gLbLSL*E4G7cYB^=&b^ z&BN=e>>znH=eDvgSMWyQXF~qG&~pEwj{oQz&~-|dBYzP7H;@H6-$QjgET%Wtv+{?Y zjqZbq3iYQ{8KsTLF*%xG6IUK(VuWtI!&?as_-tj{Vb5Y$*Q>AxD%IB1b<6$xBl!*( zG4pa68Wx1B0hXmyl@_C!&O3q@A2<)v6}`>gOeLJ9u~H1R783MMW6!jQ-U#4zqtm{XmnBY5vBwli=S-vi!DLiu6qV-bLC1) z#+s&Sou2p1oaP(7>XcyWB5nspgRh6yQL%S*^Hl>6z7m)mNkdB-_hMq z;V)otn(B88w%C;V7T7~)Tai%LlH3}Md%wIJK-$p}$o$poi+rYIiE{bhKsk8%Bn&Kn zR*F;Tqck1EQ+;y4%93^utFjq~#&ngHMe!qkQ1nGArJ=k3F22TKsSUKvs(t+5Tfc3a-tTCv1Z~b5`=}*YM z(pqEwKj9~k5;fm!!|}xOGku867VXBbPKSB5VC~c%HJ?4*Zf=N=A1k^$A-Ns)?YQ)I zYU;<(#IDaD{C@{EEyO5~V8L5m^3Z?J7Ucgtkfi@;#hAMPNR{#d;&?m8PfUpX=E{6- zygLCvWWyDa>?1G-99c{ppA1m5W(aNK)v=|?wtszj)l);g0a6dlE z^$xff@j0AQ0H`gk`x|hC5XtkT)i|g@)yFyHWJK!P^+Md`VPph@EUu7B^-#u+NAbKt_c#oUGJ#pNvoH(&QG=>X9l6=C{O3{D`O!#OKAx4!M}(!R#{I}KgqN@)V2F}J%{(RuNNLVt@V<#u-md1*`>OG z@#8!;QF?`d&=p~=%_$%y0lcocAfNagwd`h!(@MA6&RtU4` z;SU)j!?75%;T%mR&Qu2J=T$AU)S<#w;Rw)WZO{~7#bF3z+2+kDws+od;4phPokFH97r0#CI=NH82sIje@!M4jRa#S_F)iR@;xsk(%ub=bc- z0RtwkQ(BrhKZZdAymqYISfMAJXFl4GYWe6F^n8sn)(8j;j*dH1qff%=Vf9%Ak^hbI z@!2pJ>RB3Fm+FgZPjO~w1w#vvF? zM4kNmTuMWV9kD@Hot({?$nG7!3Z9*Yr$HD3QeS(7b{E`^S5KOA>7l z|HzsjO#i5;Dr5hsa0}E-b3seVOmpE&Ui@=037)pzky;}hdA?smebwooa&bk!%`|Jh z;HmH!R~Jk#VYwbiyvdxQ%4XIhcd^=gZRKVfp;jn7l!t4fv;m_lEzKx>@Ar(!#i;#N zwyrbgtl;%M>Igk1gFL<23wZ{2%FxizAqq1mnry$6tq+vOq~qc&E`SUCym%94FYI|{ zOI_u0YJCBeB+LZJBoilHe(Rm>Y~Vmk0<0h69-nq%4H(G3k?2e0V>A_>17(YZR4u*u zP$)4A*uZ748oC&sv3&9t*L3d+i29D-=o^NB>0N%)*1v<;uL)BtCXvTN26kuL8ae8; zw;(b5$u!`z1bJwioYhoC`pSyY#nG^`T$Dob2fuDzms#WU#TPts4uRkY027x~s7ih% zD_WRty|W#6AMmmIl2e*`c!Q6s_i*xWl@{rp!_9sfpnZW}Ghr}svebK*x^fM=tYrksTzupSTZ+~)pXq*d@B86Tza|I2blaToE?jt z-a8;gPHRHafpt;cd#25>#NkU57mA_*5pyzFqe{0K_Jwj^giTWeSu+>TxEV+;;$GqX zxlAt&2Q+IH+ye|WHF;>5cPla&`V%ECz))Uuu*qSiD~j6`MtAibI2aUr2nmOSY23ez zEV8iRQRUooj}RAJxX-$C2p`D3-I$<~4mDB^UC_&`(uxQEXiM{u;Ct>x9N)q_BWcGS z7BVA6hct5(<0bj#J%7jR)^jwHeH#8sEuR=xM=6URQHi!Sda&AKwmFhNmb56G z@qfmJbTjz;cATm{vBjzl1s9?SNF|O3u>wHKT6mEBG4=Tmk_upuQhC@aU<-1LuD zn6+JJFps>gK>8f*Zt%pvx6g@0)2*7W<+sE2X6WI+)<1(sx79zNCe`z#Y^s0$4CV6Q zj$qZGvEGoFDftE^sLSKla`#uBx}&A0-7u>5%S{Qc@bE zJET+T?uJFENQ1O=F6r(@Q5xy)?(RNw0sh|o{@;CZ&OYbjTx>5ExIV18<{Zy>#`uo! zh=&`W-F#6YORv4}$e0I*DQWB6XhA-VsvCz}0hi4s)A|e(VROY^;d#Y$7y&0rh<7uI zC@F&ir<^G5&xW}M+PgnIdybtp=GLd#zQp){V9h871wWOLli<`9i3a1;nlhfLrjnb~ zNB0uHi>jz@Fg^!@@Lw8?1d~D78^b1gG&0}b|Bj1!;?DL$GgH>;%y@2kCMeDP1RcdR z)J1-&7q#%(VywuiGaQqP8fEtIE#~p*&Xmq!>}M08Uq;p23pj-{GUum7{2SlTn3C%B z`@F*MqoQ^~jGW z^r5C{F5(K;kE^)g1JzW%c+A(-dKl=zZ$*M~6UG0gAn{n;0j5ojt-rhHF*}V07vv9R z!K`2J3dRF(WysoKgwVs!!3+T z8s`lP3IDR7j>1%U-OR{IoU*vkG6An^iRdtErzffK$L-y2AGAC+9FHEx-{x>KS-P~|g!SWi4K z-W-*UyL(;nGWRZk!)W_C3D*VZ_TiuUzpfzyhU$gQ9a8%KgDT7s=XvP8cw1c5&u(mT zPDCfS@Sv&xlmno+?~P>6-#=4$0SVit{5rKT%yz`g83OsG;vcogC>SU#c;5ZKsvCzW zSLdO0f|L;bJzlG`##zMF4`+>P@y)*=SpZJGt%PWkZ`mvQiEk53ixONp#_OPHsZDt1 z(nesCeI39~OPU3$jdhFH>e3f$qT`{bPbrpW=qsu!kc?C`G&m?L!jtj=}WVF8z&NzouzDJd;}r>)V@;Sat( z7SrV>s6@Q3U$N}s6F97-fSg|T`T4oek%v@Hz($rtXjX-16!~x@`H_VsmgGAcgf_rE zWCQSF2si;+=<0N5XhABN1@Pum?;^ZfUo{0WNCH@KXh&@sWIj_@+2?6q7Ddj#$riLP zYswiy?ixuR5=H(Qtqbt$7R2jt#J6YGE^;VM=B%SObX-C!HACf=Qu z11WL*;&0!+!C1AvIo+E}^@*!v^Z}Zb_>?9~o%kumG+qdYKmexoiZ{^n25Hl)yxwj( zH)ztI1tda;!2=wNm#c6T+x;>x^gvPbP%@%!11SEdH-9bK)!t3A`iwt5e!`K1FG^oO zBiH!(nkR$O522o4TTfR5xJ8uLou~q24>Yhw8GFE);^BFfHFaLPH+*N>q^ny1Id*&m z?7bG4KKhhUAPW#PEJVIJQBK2dGn-T%8(4@kw!VhFMLX*11Qj&=p*XDyehGzlBoxkE z*VsHB=sdz@=R__-zJ2+HP)y^S@K;%NalnS}Fl$9Dj`%*EZ1!%}tp~DXLdiTXqf48` zCBPwabZ-1oH1$Ap^#*Wy+i9F=dp-gCv3oNxvQ)|c1>%bX7eAk;Uqv1@@uMIj#nfgG&?N5M4_kLT8(r$@Au(b6yu&2}Jef*9UjE4%CMSCu~tfBiiKXW}6zI;P% zc9e%~0AKq+!J1C|UHQtq(uA!LmwDH_-YOb?SH-#TIDq3}w!1K5nkE%^`F!CRC=#?N zqm0A>1C@Hb)}mP+95d57WVOg0UB-kfs8lRVSN#`xIo zw?v}w9p-ug-28Xf4hgS5e6(&HG?o2?fO+dAwgj#Vg%$7No<4uxy4MUk{%L9qazBo7(>bwY+u^ zg|s=$r5L*Tr?KSXlW*!a1`#>p`y|;`4_qMQ${B%P{nxmly@BKz9imS@Fh0~3KGsM6 ze5^KF+{ZK9cueU+Q9KZ#ujKR}Bb(2tFbmWcH3DSvY3;~5Ub)CHd~BPGu#ua{%ngF6 zUYEJ-$mt}^EaXaEB zWz4x(R#p|CgMIl`%`$Kyxi)YnY{lhXiG9 zj=a?J@9h;{P9MND12e;wKsrEl-XGa8mWaYO21eHWQ)4=ZxZjN{M+HgZT0Y`QTT!4( zVu8DC5U4G_Uo)G>zE0Ox*`JRee2e=~B3VM(pUCWbpcE%S+9piECvglU14F$QNT3HB z68vSn`uh6bQM!xz6srqaBo(JE(!bm}8q;=Qlzrh%UuiwlCA!61(Z~A1$4X}FP$n&v z*NvqSfI^oWw}9fC4Q5GB?%bkI_K1fON&@V}pd!#D&B6zaw%hHL>S2bvY)P|vOiR$r zHn_1m?M_b^h)q-X7wZYgR*8QEI&KEQak`tYtu&s5;j;$;4UeKr?cv!#M`nRcf_{-A zX=+k$M^;aWWl&hycTM*E7|FyeB_Yxz^cIKMsHn`;QUHn$$a2nzWXmNF>ZI_4xOITz z))%Hh8;SoYCl(w=loA~ePon-(My`~0L8U*(*$<&yiKle$AW{##nuYfQ@*IeP|C9~x zheI`cE<^)m1(d&V9@!x^xX*$lMHKWm7!Pmke|gA&hjUI-YCk$nM|4C^jn*3^H<-o? z`?wjIH2+ZCM$*Ag4Sc|%!kR$u?#_7Wr!Zo^ynyE1=;8`N7qh9z#jWA=wqDJ5#j}m3 zK;OHvtXY0oP>_A(F18uaTtwFA;pJn}^DP~PP}@Ls)@2Ul*v(k2&>hQI*>(#w@((Wl z4n7Hml6n96$@{PPON)yIo>V(P+5CG%f$RO&V}L2z`qe*qyPg?72xNqEfCFSMZM%R9 z&GjjpW-w%|+splQf8w%3bzcr!Ln9+aOUnXlBsPU`0?r~CO}|Q{CtDFwp7vpp(H!#M z>YU8;)+pcBuRxj;;1Hk**MSoYp%%oRcX)Hugf1q#%np<#xBe9v#EO1ITwYlj${mvA z&iG!(PDc)d(3?yG7rO?#3_tL62W>XrmigQTdwjgUBy>(qX^6$Ctc)7kUMtxA2vt8x zVEC%_=5Hxy3qRtx?C5ytuxV;UWb7jxhC_Zd27{$t?bif3Rlp13^az;_7FFXL3ovvh?>8PFlUXX}05x8>4v)H2@$NkGRD zu?I}(i~sb#!ui4=W_-i=1*jGip~D?D0gMl{588w9WO4laB&6jZ8PFH^va9sB!}y!t^F;fry?oza zH1D%pd2JvMHe3bkE&m99KjcG+3QLd$RW(!jhvIqE>V(+07v!eb@bUPQokFT6u#5Hp zeVwaSZO!I>YzlxPRPmU+hqZ|8GsLfo422=)#ZL{|H%6enG+@5^CHE18#b#grw?4AEIFJ zMLerXblXoU<$7nUzCcPgLo^9VNkx3xKBZ86c3LJTCc{aq>Mw`*a7q}6v~*JCCc4)A zDk_@m`9V1D_0Quf#Q(bJN0gFq8J6(9qEB2*RH5NAtc$baK(|)=BSu?t8}V-{HjR1!4d+fzx+)R^Y7;7MuZL4c}@0(sC0jS_@+_#2&d41F4M#C$ul3pmvC|p+7m{ta5Zn7t*%x);1D!g zn1LoJ|Y%$R0s^Yl(jA~I4f&wo86-Aos1(`FgVij?|yhljeHdBN9*YNlb)4@ zrK9WVX%rL`QqgfVj@jMx##CV{kMwL#!nGOFSzm?!(k$e`g9;*&0r`}g&i}E_SO8Mz zJj4A-xZ8*G(MRAjB?M+Z4TdTfC(Yca;iE_d#*)&JvwYkfEh1aa`uXwY#y`;K9^C}_ zK{aKjNJ)IZZtx5W4E)|@H-O_tjKBWD&|8}!z$!8RjAmN!`~uQ!Ci)1FpoP+GA>#h? zKSoXVLTP6lJLjUMXwVI0lna0u6)+HAk-KdB9uND$%_`LRQmoG zZ9FQ5 zEEH(3i=VZ?n-YKDL@PoB{EfD_>Lt~KEuVi5UB7d+JxXj-BU$sfKg^r1dI=Y_L>K{aUza0_L;aFJ8)@7s;tgSpN zyMS6cGqcpS)>TV?iMXtIKS7@}>Lk(oZ0|=wz^Doz2TIZb4HVx|7*tfM`$vw|(Y@$x zIXx)?JZd)9JevleM7A-MH`MydAI7su$;yH|a%j!fn^6`N;%Qpd6YT8wdqgs?>n>K1 zDS*-kpgr&~0O&!w3a^8_DAHCaARec`k{WCq1iJe4%*b?(-9=8ZiFi8QpdTJQ1z936 zpBGZgc$*2-9o`yXJ!O0bL0#xDAle>&vrIrJpNU|0e_?rQbpH7RE|)r@k=fB4$yEa9 zlx$TyO8gfC3M~ZSisj{HrMHWCGifh3sk7GCX>ku5zC!J=s#1+$ko9B6u;-R$wW)TU zgtgql`kU?fQaExiJX={^QCz7}2P(yZc_hQC#lEu+IDFJRH2$gu*D)xB{9;)sLt0`p z$BM<)jQIS^`gvI$la@^vyC&Qorwks9Ja56dN>pLo6;|o|jD@yuoOiecyVa~ootzM5 z@x2SBlQu5SkM@)?3E4`#nE}t4i)Td7iGpuA-Jlb$*teTJyhq9Yw1SzLITeRNSA#SO z)@S3ZFKA+GNKAu%>{iUf$`j;2EP zX`#+xK(;3-WN%j+F3B4ziE3BY;@YPk)`D`_5!uU%bepk zXhb>a6S%ZWkxXcC+te1@)WNVn6b9i)e0vHkHP5zixZStegqT!RJaV6$Y$_EK6VXY5 zOi-(U&%DV1T0Yx>MoQcVtkT{@f$PN>1xj_0*||;3?r7TgIPEeBzn?zySx}GIr2JR| zSY1nBsHkvc_{LE|6)`4<{93;%eKs!^@+YD^SL&J>r6dy`ZYh|3<2*>X0cTm2a zo3Y^}YYZ9J)7tdxh@&Xr(3I340Ufk;w+ZOxjLx%SCg|nZcunLLsq}wtYOi_e1qcBi zE35m}A*660*|SHqEKs zvoOwnLFjA;qp#tA&cOaj=}jXUei83v&XrRlkd2~I0-T#ip)(BT^XMeZ4nU-VMp*i_ z6!dvyF@IouptVA~k|l^@`@WD#@u5AR1I7WD0mzcdT@+RYa+9)o?*H~0f=>W;U77@X zp=N!2)!z8dYgog9Zv(BlU+PLn*$Aq)vz|9wvwy1KVPXm^YIOhBo!$Tw_y+-Kq5Tr_ z85fdd$HnIAR8vAs;!c(*_tsUNw`JH24#^WSD8iQ+$x}trJOP%}U&&B!UR_ekgW{no z#9(g#xp=g*sgEng`L_3+p`I(B!%8jZJt+VbAorDPUNQmJ&nO70S;4Ot&VAfus2q{` zR4AnwEOCq;)IurtZ+lu04FLQXn>;2Niq%wdyB*IN(EPRRC1w1+I5%|gw1l$FKmtq0{I+hZDaJ$gVvE{iiwkRg9npPF@gH*`IwjuZ5zk-VF4SsOU!Hx z(?8Qx+!{a$3s!|#~gAU<>tLMj{9Hh>;&T%gqhQ+LpLp|ujxr1gD7==V_!aZxAP7p?&ReL zyRJsjY!@Z#HCrTjj58fAIzyi^})s?w%4!iv`lcUIIUThur;{$icadQ3X*2g^M z+_y`^&;lIUax?=4sW@iLhsQf3vP2BO~`g z|7nym5LmLMBdHaQ7T_^cUa}BU=pdXEY7+=s>g;StE?+TR3&5FvHEBE4P!tG;@C3O+qKY9G9n>FEtj9 zw}7EW*q2wC7_BkirE*^cL+ob~ey;!m5CKaFw6LfmCGAZ)qF9UQlDQO^{w_~!qZQBUWEk9le#OcK!n)d~19|qAh+;9Ev zfAt1UkhdVj0;8?OV3{}YD%t|J=?SC7!;-UPoNNnZ^H-Vhyc3$E!$<@QTdBjEN4ckY zy%(Os2z|GY>DpXPicn3N@j&e_!zQqOn$R})X0k55P3rv{kyE_)0j#F?S|*Ew@Z6M+ zmK4s`7T-V2j$-7wfw(3oh|Iy+8D)VptcG;gN#qosjX{+h=rzDvM6|ni4x=e(B(o)E zP1{#Y&x|x~SLw?3e!dByuXrF<02goCCvDaUaq-YFyOh{p*AJF@ftn}L41w%GS$2zG z7Cg`D=ym?duiYDaS_-%{JAOJYQIg@1kb)exzIOA$?_bxtO$99#3enaByAp;?Kbeur zDj@dUy1h>3jhEQf)Rnbmqtjp0t>g^PK;jH#$*drl{M^JZe@>jv7RG6<9?pMdP7M%$ zrjb)`zzeVXTP1G;Cbqn0#1gBw`#*mTtPUGE!Hp9{wc3!zIzZVmg8eDE7FL)meio*z zh<+$F<#{~er~~GNx%o45W%fHy513Z;#Tw7WC?etPVvLoJlg3|P-(W*bw8&TH+3-0* zwvBO5W_hmJa7Hb8KPtfQq}6;8<_5qbh|U7YE8jPJO^}tiZCPtpe_&M!3j3Z|-28KE zlBTTq*odSrs8`D+_w{5(WAzM+*xuCs%*_&W(^$f)$I13v9qy`yWzFtGnwKbM?k5U% z$xOdA0{`#;P23dZx9{(_9FVtu76J}wK|ena##??9Cl8F-O(WLfflE;YV@=SM^?`nN zH1!doAFw{~=%`DS&J$#$SNt@m&ey$BCVtyjFOW)w;qUoK5}**P>shq(Q<#r}@kXZ2<;dB%_BrHB0DJZC@*xz| zDX7{Z9)Cq;HD>Jp%xhXy_?_(lHZEz~a;68pm1zV4#Oe~C z7Vy2uGjI+e+|H)LAP*-xWxS@gjo}T-gp(n1o!R~$H#r(nU;JKbVfb@EICKxP`}mND z)CcqiF{fHRyMQ--w}o8ujLGjSHvTTWJq^U!z?tRk+B zu7ve{fcEhtR1@KkS3-VPXd6SU!_pQf`mo>KJyo0v--kGN3p`TPe}9mo{f#%|f?(rg zmNF`1B?$-NG*YZ6(GERH1HQ=P&-&Yj1Fc=BKOy1z+ z_^jnedCt)=z~I01eLI{B0y3JRARK%CK@wlDl?2%egEiur-5j8y$A*S$(^j2wBu^uT zyV63oH)hpEuZ=L1?9-*<(RV+J+M_orDhMdtPNv|op5tQ%r}vEUKsFAX&lF_rp=T9a z(=X_w*JNwq2Z{Zx5a3h`ZeyHxCyOil=GwP`j|y7zuMawku1|#BTMr~y(Qq- z%?deo)G2jC?mmdh-pEdk+uWDLY{M}fzxJbF-;cmlF4ncGJ1p~sUqlDOB?<jZLP&UFdx(N(qkj~Sj`eUL4nah+( z@u_qV=q({Jk^Eo-w_UYYuH~`K=e{t( zJ4joWv8i)QLYWogHMpd%5|8jtx?&7+HAlMMT0f1Ci1`9of=6}&2(md4=dTRSHMzmr zCYw)w{Zh_QKY{*`B6(y<;R~2-ROQeR?$_1aEchTY?CB@$u+vGktS!xzWoZ#G2Rz z`}Z#q2UTv*W4HsgPR z!+y-){|s`594l~UI==8D z8dWF+oC$2Vn)FFT(y9g7K>Gs*inES@F&skuIMALjk0y@>nXrTQBRV6U?9qPdD;*IlEl!u`oSx)Y&t5gW5k1v`0vh&kOK5GF?s*;)BE>qx^bjds`bE$}c@ejE*M+@Fxl~ zWV$VJ@5{lae@(6mb=PExY5J$IQKP?e`M2*SJ5B)3q;ZAVmx^~1Sp~{e83?sdr&_Y!MrnCl{ZmpQEa=(*rm>n*z1-tuT_*Xty*ps~ zA@IlzId*F^?e)!~u1-hlsk4ohR+T;M&a8u=c(u(~5Yz5Zua;8t!Y$%4U9MS$<%+xc zKtUjWvua&Kxe2T3p9Ukx(`}8N=G%k_WfeNIeZ!vD_})V!v8$l08?##_Zm;VKX$}WZ z^dC!b_Ht2eCytZ6PW3|W#?(|va-&;xFu}o}nAC6mupq920I2W)=#w%rsIApwydy`} zw){`;6*mRH2~d>vk-W_^R6H^UNoZ z81cA3rxd_+j;)mxh4wQ39sHQj?GP}5Kn?*@mM;GdB7@#Zew~g*vx-Qj*h!96w{f%! zQ|f5JoYaJdhC5N?!6kj;?f}r@z3=7}unmOfW1Wg&^d;L3YiO?vi6?Fq6&qIQkPkL8l`62>uGF^va-Sx2po)#s3NPw z{sJ9caj-Y?@ZWjCw@}c9blMRJtbHU6ASWK=&?R^?BW87a4#3<;ihvbVQ0A5O-&g z!Cq}v$Yfb(R=hZEuJK3VNn^`oR>mLvAR~Lg-O?w(RQ&cgqBOmG7Po|6r4++Rkw#RV zv-Q^7Q!zV_Q=%*|5C%ok$iCRB{1VQ8Cl__}!dvx+pY1tCzX$zGdiunsU!hRuj9Wwb^iE1Q5S6Lh z4Emv5?JP?_vD=8Se%>jvBQGtT3AEyAqezf`Y|niRHR+iN*OX_ZV+$+Umct2WQ}H=+ zwj#xN7lI)v0OF^R%E$TwboEB-a?LFlOcd!E&l_xLPI1Fjo1BkySGM`RzMsU+Lb*Tt z?=AP6dvw>(S59+wlB5{kFbAs*{19%>sfskojL|+a1bRGxu9c6zK4|Nod^4iapTVgX z&d5x;Y>$ebtQT2pZP+hVVQ=4kPZmJi*`vmZ05ypNBSbMtCeAKWXcmYq?ps4Jcb9C| zZUGYDw%M(#sqc^c*NR@;hrHd}v@`Zi3>pXlDu_qcB7#F*@@*OWvyt%%hAv`xZl1(( z41N3MaCE!b@5gNIV*r3C0Jjf%&Bqrth~<_d8#3rjNU)@_#S3JwR(bW~nBiRKel)J$ zAXqw}W}q5p8EqOG$-*pG;f$=3{CeY^bLM|V=6rxQUS`D!j6c@EwLej5%~9=jErz$A zE{o#0u8Vp3vaqq5ey7oO;efqkvP+xQ{LM&?qHKLsW_i*Lte!>sr%$7@Ti4#)_F0XDbUT+R*q3=vxQBQgd3GM|7 z_F;Qn|9)mqYZXVNJaMB;q~C>b56daoW5YGx_vy0Wl}JWCfzUnBoB$Eiq5#iGW{sK1 zzht7L3lI*u{@nem8IprLt5}ptH%A!UZY;hD9A*-&PwI)c7rz8<8&Lj2;kdtI)bAf{ zT3ttBEvtp~W;0)U;Zd6H!vIH>0bpYDfre7<{VD%X$bheTDyXywO@1XssGm#PaUUHP z@(F#>3ZjRDDch^JwV-V^uAbwq1)#=%A1Blb`*bp-NH8D*t!k#3{;KX)(azN`W0oqM zIZeM~LTWDlg&@#wAxUS8!_^5#(Il5QR|MGDxs`u}uXqs;&%bG|0nWquJd={|_zTn? z*PGd)S>XqxzzV~~ReSt-J>8-FLy;uM`6>2?m{p;*KBuMm%KB{EV$>`{mI4mnDWY%>7VBd%|qbPs-;cBkDN zqs?xjS#2!_%e1>u>WTF1q4BEVpX{#ACE|WMv0k-p^zNDAuj7pLbV;5lt%0>FV^*;&NsK7!D@bH7M|Yl#rp_>xGzYKf#uU!lf#$HmsT- z9+%-qbSpA14<WY$HP0|GuPKkH%6I`2qHcfsi748QFxVW;frIwP=G8xo= zk^Pso6I3Vl+54!md8H>N#$iNXs9{N-NlvQ@x@-L)L;m=Bw2odglufpEJB^ zkMoPxtLFfh37B+ln0_qpO-BrNo5>PEHJQVU7zzZ%#=6R#}6`br3s&+1j5*Zj0bm+2}8=v&OF{lOrM|uTY7>azsp~0?bc+iTywzphGRVWbZf5$eA;rbF=jHFYv=f)3~ zZCTqI%*EAU3xa`ZNctm_vK63qVjYpQU80Q)rk$DK%nZncD5Z6O4P-|*iZ41+p#-D! z3RQ5Qfq`;B6iD;n+C`w|Hux_VJ!U><6i5R5$N1QJcUFF9vVvCO&GK}%-fhl`dv=&r zu6*Ag?&saBZJ}F30c3IN7d?9{kYTRoztk!hr-^`0aCCI@{M85h}o;tUEuN=j&X7{lmpDtqe^1# z+$uRQgfMN0ma6mXPYKgj=s6Wc46#<|e{y%=aq7DGad!}JMdBvtXM3`WTf7m6tN7|- zIu&U?mY0tDn}pjj1cy;iKFr}FPd2D!U{-RT_NA}az+h#?ozFW9PgW;|_bpV76(JqH z-I>_=UX^TJpUwsLBatWjahWs07S8&wbQT4dzDJJv$3W;%OOSK?G27^t_Wp@ zxj9rJFdf^49WMExgYRf&1LQeJdtu$YM*F1c*r`;mY-;VWMAh<^D>v5H;)Yfg{#?MC z^(Jy;c!BEQ0rTFOpQc99VA3i)KYnqRoM&a>J(G9vX*>DNt!A;+*MNOtNNm}6pI6k0 zMWoEm8aM+EDUhu(9=6R>%+yRzpeQR=u!WCy-RL^0FaZX;7NInSzVLjll8cdj1kP`D zqF1|<@()kyzkO}qV4gz4OpGA%jKm#@-Aq}pd)th4bz_e!CSUnGK@m~zk=qr(2obPg z=n?|KrXJxjrA-O?JP7^NC6kFH=S(lh;HG%WbSRdnu8yEhS1x9xaJK1Lac$Y&k)P56 zFf0fgg;~<-zs8tTK#V!D2hp>ItA?2N6hFuuAw=IM-ap109Bk|}j)NC4?)m4bCj6{Peo5$WlFxeS0TCWg6A zMwwgLCqkWD5=`h*ncJ{;aB%uIf$tgqsqMnlNn-Gt$$!t`e4dBcyV{|;*Z!AI6Z(83 zd}hm0e{HetF8eDIp(nDk-NT(-32+@&jCs8R0d}_vd#Go^mUoB@nss6k-Qns;nFjj9 z)2drm)?wJA`6jrVgQcM>XR9p^2OH6N?gC+KD)}nlt@e=pZ-km1CVzAa0*8~*^hL5p zX}fPba-`VImj#1+)RkfQmY;OY8>g~OP%y{kXV8jLH(y-R)4uIQ|5j1qm~eWdZPE}) zLK@){g7W;zJyhXCX7LNl2)QIA>D4=KQPYVsr)-s)N-?3TWYnF}!cU_Z&2~p}llxPu z^o*}U0zXW&kJs~Ew^7q^#pu(xG_Rc0 zywdL@SMFhVvq~*6oSDf!m@n7us7;mM?z6xn>iBZ^ti%KrHAh$r8Mk;awEm=RKyyOs zgXYV!;#a2du{fHDY6Z8wky>9T-Vi2(Q7TJ+2$e>(UY5uG$(_5)zI&BA7`^>5Fkf4P zq@>N4vm>(`E4N7ziZdl(yPOnT{41nQ0%*KP!RS$v@j+Z(T^#^&B+J)c#iMQKn~lZUe}PY=jgRRV|slSiM?gEH~WdddvCH5_C%S|{oA|jDAV|#>?)OW+I_YX z^N~$Dxkg9d`MX&t9IzC6LwhxvOFL5W-D91gGrKIG*zv)L6 zrR;-(uYW)npzLTW&gmv2FSolKX*k3wWN>e{J+b#%Pvh65;PbyI7X{W-Vi6fq^Bp>4 zX!oHcy5ffrlt+nMbAhb}eQ8yd%@jH5A#e3tU{V7*FVLvIUlJYJh4YY8&bAcKz8+<| z;$I8Rm+Z5k4LQZGtuU0sjpN(MrVXH99bZ`z{kL?h&wgwcqP>O& z7f@1S6r`50B|VrV+n4q$i#4G`N7O9#%!G|8C?pQtLG5HQHk`12vr;PUSw>F!oj$bD z$$W$s=L3<&TzqtSbb42iLUrwM;kzO33gfsYDW2{7lGoT<0gp3i8d0}9yK9%~F?w-q zky6n>_S0EW2K5qOMRZZP`V2I54~m!XTVX^5MSp*J9uoi`+NQ35YtH5@f+Ra%qw8|f zx4jZS#o|z1?P0VU-*>Bb(bvOjl)85FjQ`oyj0qBOY<;X3*86SghOzE0`cOSkUi5C8 zOy?@|&x%v?SH{VDmF|TH;6hJ2I=S0DtPoJ4WVrx3G|X`zt(}Y-qO!`EMErr@)Uzez z?p0PflZF>p7;47b($hf{Gc@y6Yy8zC?I1p}P;MokQf51ywx(Z&Zw5gyb)oEed~oYC zF2#xzaA<<0AACDnf;BngHoaH<^v6`<-7%d~CXEB3U=T@tATRxuFcIl4mBh6nI^ol* z>}p2liBvGoFr|Gkn-f2@G196rGoL{N7lQH~g&+23-jskjmikBq`Ls_F4BWcYj>e=P zXlZA=e3HHBz&p~UEaw@cm^yjEIZGoP()WLLf8{^e z0nZ!rcSPchtwu_dwH#)9^MMC~AIrPawa2^7*JJk;i$Snz~ z0m7;~W!M4u5jZqk4BIJ()Tzy-WihV%+jD_;B(PBSK@2Bxk>&mQpY^fVBN##n713 z_&pVY=K|F0{)2T-(bC2$P0VobA3MrtD&=Zgm4t?~3nFy52Kuxq^~_0OpV|5|N_KJJ zok>bGf-tk8PQnn#!aZog3XC(^B~vjL_I`!nHY*IYySxic4CIf|Yy~B7Ef}OPY!pidJ7G$_&u9eaI*D4Q$RKxON(XtkPzu3fO7;6C zT7N0%qdS%Y*15k^IT&k?T?Z{t(lqV;&FvTYSQ~_zqe8py%|;F1%liKM|GY?O`am^G zsQKeZ?efZr+&@p@{qvOMx8qm);m069=phJhnq#!aieWT6+YISmj(|F;FI8RA(mV+H z67f-;$JU6!RR3DAzrP9o1a<>bKaFR(Eh*&l!2dT}ah(83{tvbFpS8JPARocOhwFom zY{~EMFB9?@M1XK4OYWbmUj=y5B0lN*dl4tWO`lG$Rb9TRRSbh%Xa`2Sb?{~x%@zq|HvkNvOl`TvXY(f3F4ycpqB z($S$vJlUhUxjfw{Ia~M%;5a*o!8<{DFsR$`FRr$AXhmD30k6?GtTjAh0TUW3bg|al zgsA4xPE-|j-N0MS?t4j|27bf~j6BI=V9FK;Jt@zm5FdKDNAOg?c$$In*U9}$EdA21sX|ZlP;c&@iEUPdit7%%J73_smEbL`zQZ%%3%ilpN z3uP4*Y!el8)RPr)U)Jd%K8BI<-r2urZW(VMH}N)6*rWn*o$e;%<|eyuP>>X!=TD6V za{-Pm>j~p6M(q)}lqKyEBIWOx@DDD|QD1Fel^A`#Hhhggd^%6L9R9DXV|3(x@P?15 z;}I1=JPl^r*vagvef{t0b6m3#1lyRAqZw`9rgCisCZ+)8hVro@Q5FrlM z1O48Cm*DPij;pBwrv!}^#?3Z*NEwTL0WE3L8Esp@2h60P0}l~jl#xM9==o$0q1imVVPP!>uSix=?E@nh-9}I(^4a-nHjY|C%fuCXnz*@ z^CwJx10#L#oR>UK<3*9@d3fK??D+DUz1k@``)x(>4(O|$iCAe-B9E!$p_RCmyU>^A zQ?KT>8X{y_JEnHR83;HQ$uFEPp5n3Ve+$9q;%x|q0%f6X>7px;H04}k+)Kn1XMb_r zWOSA=ju^D4aj|{m-3=TcWW#P+etTp1i$p#$6M=+!!^}+8&4L~X&5%mx=pkEa@ZIo` zGz=KLa`^RtYN>kehLraWaH5)0QJ~lXSg`K-i0%L!G8>kn@=( z*Z(aHQF;}l2c?&V(IvVXEuxP(z_f|5@mAj)kMY<$K!|6#&_sfR32Vh2=UH+r&RljP{Pytcwcj9K3bjJIe-uml~zpuXR%FU?{Blg&0X!O*WFKLqOowh z^O<`p!z`?fesp`s(4FyG5;-47_;>A9Pl+|1pN$=^@{p*nRL`*qp>WyiS?~F^ayXkSmoF}J8u-klDr~;vWCJvFYTA8J+8wEMCTs|Ogc5%nnraWla9!>I@ZZ=+}Dpk>Sy<SOM?G-m2zU8_d7aHXX=eua&y^eER*x9L?P+ILtGTxCk{C$0~@+r(RA^uhT(ax+v z4;XW$+uNH$SpJP>o1Sx%6C$*G@JDP^B*3UC$J|pZhykun+gD~AEL;x5D+^3)tw*T% zOZLkQN8NYzN^-0}!!}QDwU7&#KjoH_Qk>D91qgf5x_ln$SDIR{+w-S(ldOrV>{njW=cLN` zq?Gn#82yS=_17m9G!m}^d4%mYtMmcI{=R33YA~#r9zsQ?e01_== z93;%od_*HeS4kXHO#O;|AZK<7BZlBc;)h+2#`w7F`Rh_A?s=boHvGMl_awX5P-bgK zTu%ER-NA-ke5^m&3GI*3?R(FIC&5*i#iSLDA=+*m>oo>RPrY=S-hK%+&D*jAd{Uqqdtc3#qnz|gFb;D>BF!8D|8nwJYV7tl z!`6NW6iCvpwBhon_QiUXcPA?|Lp18$LA+B^t^Vk5OL0|IV`VM^M8_&f#NNpK5R#%7 zhk=H6Ie&&Qv-#oeHJspcy>I4Qs$(s)LY;QV->xe^Vs3wR`ujj}-VTr{@t$Db-b-H# zo4R+iUT3Yg$=2UoT``yB>@6M$)orR4`vI1zbomyZtn^;Z4z2M78%b3dctbx+$?EBF^-Jurqt zF`!ltmqv&&oTy}oDP=fy=RNx!R1zv!@yIS+u6r5Z^Ai8sxRBrW{q8xFrkVkr>5ea7 zo^ZMDo2fqUg}MMV9-UPtb(F1(3P1OFOKAv*-23 zn*GIRf>!Vn6CNV~5!7!j;0`S;ER6CQIpTi){P>MEiPK_hbGanb-ygMR0pGy+Hw8){ z_Zt>Ll3Ig$t133*yBX=H!B34A@`G3W!QBNj@;yyzRcxC-kbR3VAZQ05*C0$9>EcK! z^I@=wvB0e34mQhr$)^G)_V)59|8OL4U#o8D#7<0y-SX4lMS#i$l@xvgo{Pvmf}=*1 zl*Q3>)NI?$q$@r1JG#FOh5bW}c4o1)`#(JzK;Ch%axD=&_hM~du6t*gk@MJ6 zZJ(SU3tYlq4wm!mwZ(scLLVO+I0zI!>+RiL{PM?b+bo{lrAei=_wp<%xGm%>Y^bfP zCI)oi3D}5bc{yTXtCFMKb6MtOulb?@82+hsVrsUsLq*b2$;s2FZd3JPuhBUMSv-}B zYhr>?M~eS^9}`bt<8=RZanpVd;N?Y$Y;e<30pG}Vn$udi)U)3ony?5+L=&H;yfK$pW2ead*ZMT%& zj*?M-UhOt+QBOFWB^NH~tG$3=xKSa1jzejbAebiaAAb>jFgE>?1a#@G8gn|kglg}5 z;90q|S5?+EX+Sjr*}op^4$q5`A6Vl9_dBJ#Vfe`_K$>EJH02=Dl>A~3C-NARIk@~c z1Q;Ab#ki$3Eps zbMQlI>R)ZVx+wa-^B?Lsul0bdJ&x*27D&7=Yh@a3zGJt%Q{mm72d#8Tw@XP%9`|gU zdv6~eF*nB|GbLg%@n6s1EXNYGwG-=GCAP=6Cl+U~&N9Pt?Ah#Cn5`T!-%+T^HHh}u zUVY6B{|bo@E{}J~Vq!)PZ~aNSliE`&<>rjLp;#|Sqp z@O<> z{9o+7cTiLB8U_dv6ahg6=~d|nC`BmjulMKfo!!~Lc6Nqgh$i1T=k4$FKHrz{$Oy&Zw@$;)q`enT3%hDU1MD8Q?Y2a}6if!`;&#LpOB-A7w0I#i)LL-Zbc! z7%lty@1)SSRz*7mz!g3A>Y9biP`MXOr#_yX z(w2)=N!oOVO${#zFC$gjFMk>5{(*xUM0)eh8{K90z6jcR5gP8AimhbW&ng+9O$&y6 znjX1E862shBFZ*K<@{o@!2zw5IwscpnZ68+oZz3I%23zUy*&7&5pnk_;T=qFek9*R ztV5KoUoDqgY4%~)jJL2r$4F9JS=8rw&#p3?OS5j|9btBBwlmN{zHlv7WR)eP)fujF zEtI+~Hn+hU9!5M%aOdK$xy(EXb|Um&K@|Ug}AE|X6Fy$y`H@te>znF^ma7HkyHW;2%w;AnhqV_92AB%E0qEzpDSF# zEyBvpkkKvr=?&gcFf1>`xl>e24!+$~hBC*+^)#R$G~&g8TK#9BR(A!Iwh3WYa}(Ap zZ+tcPWU`(#l$K2p)uU)?qn&NYqXQPx^Ue1|!>3tvr=hi5tKVknhXDEm zCU4Y9+!^zYXj^dEPt4RG=|Ecyw!==6s#!3?ab?36C1=7Jq^Ra8o$&B2+uB#DmVJSA zl^=b3(_>wxe6CTf{+R06_B}@*0E|^P#wqq1vTuyv8+LL)jO()r8+ zAJ-8(`>p4BDXj=HZhki$Z6$gvd$ z`-R=o$Df3ehfHHHm)VWkfU+#Pe;`oc(;)BDxxG<=wOCBdDjgS*FqE8G zxH0p2Q|akm*}1Trx*J>9qn%WGSN7!@UL=gUua>2AOQ6Q?m+ya*XTijAcaM0WdXZn% zp~Zr=eAu2g?kw=I0QR20K5@akt+%TNFc_1I{@B20q>?9hyxV2=CwMDYcD!6GiixSe1@08xu^BbA zva}2-g?Tf*B9Y?YBB$~Q5eeuUYGZ2!@Hj?Z1_`IKSb=A{Gt5rk%)(|ci5COmT%2x{ zTMZavl4i{KR-uCjFcUHnXVCY9s=Ip~kQ3@?k@<}0`%0gm=hJ!hO_dnwsi{$)&Qdw2 z9pdK;Q%X|1(wTtCFVKFz2#7FS#!4ARm~C9BT~JV%oLtsi!jO;anFgjWln_b%n)5}b zv4Bci2*UG=;xH(DA!z;UyrkGj$w#v}!IjgFFyMtz>*fDD&6Nl8RiW9bb6#_lnniPT zy}ts}<#(B@z)@UWzT*4pSt|``7LV4AC35W2t_jTe>~t6AP+v58wZBe}NJHUHTrc;uU9!L4?UoKkhy35zbH*@`SaSZL^is{bKwU#$Fm z+l^oelQ=T58_%rt%IR$Sy1d6-M+FPeh)sc+tT3z%8e{7=Q8Fw{dRcAy9jp-%EQK=U zg+DhbJ68A*F#AKVH`$_V&2jv2vR&S6;)Jq(cJ8jo>c{0A6RP8zgxYs7qhhP3o@`}G z4AQ$m)YO*8t=+P#17^EG|JBWq-q>KLWXy9OHMQjf@oIBcxr5r$8S}eHUc`zC`#2o_ z2MWJ%syzGiG_>p(9Kg}Rl)T97rp+Q!++(e z#b?0lD?WcU>xFp;YVzEEB)xA~2R2srOGb4(RQzg z&|3?=Q`K`f%S<{YG)frDf^IJ#`KD zY+eA6V8Yff!HR_puHznYhzf~c$7T^$OrSh6!eEBO7IVmGzN(Xv4ZgIfQS*1FJw)=Y z@m?!j4YK%K$ZJ`%Ef&V1FfgQkjNWHlYL{XdGDzRgW&&sZ_Te{Cr+*lfSGAY=IDQMd zTT+RB%03J!jSI9Wx2vzg+1pHm>wyRONh@B|u{yP@`Rg^67O86+Gk1xj75bRL-iJXa z+h_zaY!v{G3q~1VeBF!bN(X}D_mMe~&crA9eqVpZ7<)1_q05XjI1?znYdk(z1Bu9Y zT~9y0>~}N$eI55Bma_0?t}X(q0u`unOw4f7rQ7ES+{VRv36sSH>xp^ApYP}N6w1Hv zWpsXHjczQqD_ab{Qag@1p#Cs zvSnY7nP{0QCc+UG7^(4=fvj|XBxWrf!0uH^B%km_dvAX)Lw&->Teu;pyZ-@G+4RHC z>gZlrdX>;#G)mh*$HM#D%EZP&zrK`mw0F)*>HeCU|Lr|l$FE<$0D1b7SOlBvdub!1 z>h`$(K=GsR+c^mICx3d5L6wU8JvV%mvQxs&;H0wj6q&@m)ygh={@uOhZ2m@m0M*2+ z`_;OXDV^|itD)zjmvjeu0NF`2I>T$$4fK%}SlI>7pWDe~&T(Uw{}ZzTIvME#kP-c< zLditz{7r1hvpxPTzVoUsPy+(E39HAx17qul2%E$S;4FB^*W1&`hWX%-v9Z{6$a~u; z<|BXfR@qH=D>coB$XJp6B8|7r!xpj6yrP?<@k{BxMvF@xPo%5Lagc}3gAG|!q|oYy zaUDc^%cBOobQ1ST#7xm;%Q1c-GQkWKyfwh6K}~?Bk=~a40fWQ9aX;nU2t6Cvce^^) zZR+&~qWZ{_%_(yAl*^=@iVK3vab7x`t3!g?i8a#CW9XR;x`i(Ig&-$RAxmU)=3 zP9*QKAmK1wp_>rG_Qh1xm4Xg!g9obtbZu0}iIzj%4{PmAf<_yk1yy`bbqTE%Mxr>n zc-^%oAZSLlE~mNC7?Vyx9V64@s($s|c0L*SU0WAL{|BpLn3isU_H%qbRU5F3G}BEW z?>5@)P6^#hxDkJ>PTkhdV6}=A?=W#+6S(Hi{Dki}iwaT0Df=f31aRXB{BlWrWi{XQ zzX0ZeDo{k6Ju7doGHC9m0xYmnc_7<}&mvw@vr}rG7-;j7yZvHW!fHv7znNn2Z+S46J4#sMB*&;adV?RZM@omA@Rac-=; z;sNSc%U-HaNg`cqgPql^=bnm%kKPYzeZvY$#Ud9Ib9~9(=;M<2s_CUvfGvFI z+$|G0XQQGUZfTI9J#B#Ny6?!KBWMl=L+>8yIJHC!`Xgsbmh(bik&MZM{QBTzqv|$| z1tpe_*+ArXdHBp=)-tnZ}&NlyM+5OvM zi+~160{5r{DH!|X2k3^J5wFxuBQ^{ImPRa*nuN^l&GQ|s7MOhlGls>zpyLo&iy2#7 z@1n(63ueS^e3s|`HsZc^ErS2xbzxnQtJB_ZZNe!sXlZLg!r6t#ros)~QAM=(^(utt@ zrcpx;MZ*`c^I^?@&xiMT!>>>N$0R$Kq6~uYQo^dGum={Q!VDp(EqFQ3KsD{`0I=4;^P>CWHONOn9x=QMEf1(fPE>OEp z9c<+O1|i(2%n00#G1vgjeeC?H))<=KHStCruZl$)I-yuCYgEm4nfN>=iJO{muwd|S zataC;YmC@5?I+r0ev7RyLbEsw4-gxR<-~bE67t_-^Ao-V!_p)F4CZ41Z4a2f8LTTU zrKHu4d33e55397AqmEmE20X#yhSfFDTTG~ZdPXqYfa93J(MMx>JZ#eIgYjwOK9`@| z+?L_%Z8ZJWBeR&aiV(PKLGydEB;>0{xqc7-a@;^of9Z{Bux(A=H@nAV;w-8_a~z?i z6&e@EBO51fljt9no_QXtFY+^eosvCS*#^UpiyQD3P$Sz#qoklfGoJ;sk*L65sxo~n zasfAw@Rqh`@qD_Q!EYd`bXR%O{SPf+Lxc3WGgXxZS%HAUWKtB%T&4Q|$!KSo+y75S z`~Nr@?claF@I~SJ00YP}dUrfIEjcAcZ#4TX1%!Nnij?HuX{6az5N?c83d~RC2G`s3 zMU1?i%Xc{KXAO<%7#fU#)OlgNTJ&OHpI!KrFK_YZ@UUn$6|lxLJGTbHE{M^WPA^Py zIlc0H+L_S=?z<+X{{iUhUEW{-7fpJC?li9E!+$`$P^drt2~`5P(7gK2`B&O`yKDVM z{QRH+A?~xcCS$>R!J3>>-cMHu967hIxc`3(ud7@g^sh}|1PWueHc_^DOKxHyO zYZByXMs4y~#Uy*Nb?`6_(t;y;nS7 zvw}tU9?I4RnSi-gZ+Edu!!w!Y{}TBbC^k%>h}2YNV1{o{9T{QPPCS-mV{yKOtDX?k zBtqx$2XX~`7LySkCHtZFvY|0C<0-42^8uda^KTTgNW_Sn!$;*Q^#5(IwSvyvGg)Sr z23UC+^_l$F#)@mUfTpIO{>-9Xpio*k091)4B$cyZufRIlnBSFY7|M;e5cYcd&Od}G zEqwu3>ioj1ZwUb8AeniLnF;twdumeCo>^g$784aA@HM^5iOS2pm6f*}@e*4_y-Zy7 z9(LAP$n=d$^5@3v+%#PYxc~GVR!kir8}9r3*7`eC0^3{mJ!xe&O;i9|W-KBWOT!9D zfEr-#@al>*36?2iSqL>~LI?@7%;AA{ZZp~brA+g=3W#xH1zEf-L{)fBxGO|gQXva(aE?GpO(#F&1Sc@m?p#(`I(lM>{rx)?~*&7_lRm<4FP*N7IPY&Whv|NM2BLpwdoukHG?9Hm=9aj*;T5iR|C%X>I ztV2{q6V+v9(p%eOoA@~}$?ULU2TeWK<`#G`b}C z!OBCzqa$mr*W!;W)ZzP>Dre^!_6FgIeR>{@BrDWwI{UxyMb}P)d@gm5 zqQ*>1f8K9tGhs6hM)qi8{Az*UZ)r(?SXKMV?kVJGCq&xQQ&UgJE?n!4>G$wUY7SC+ zQQjiJE{xZ7NnDNRv1Y}NXN*o(X7I^5Y|}GOVXVxIA)l`_SplX7WTjUkFtw{V>*U_N zWTT|YxUwb%aFLDI&`}^7UcT6!me^GOmuT38e?pTWT$xi`Kx znpuHZo#5hKy>hFmx8G(!k%PQ|gzuMIk6z*~r08$13%1VA3@68%VK$*S8nbPbB>JVv z*Lc=TLaq3XE88g%9W_<_rt5~(kZ>^{4_U#N^8ix>yMc#E3pY<3aPO3n-dOC2C;_jE ze=p;x#Ptyrk@Jl1pJO8eOP0xb9LOyyVt@J@l>)+iItm|%t_SFzLH`L_emw;W85#B@ zsWI$P;8sFBP)_`4^o49*qL#z`2VM82EKBLi56Y@Kv(K+%cP%CO)$ZtInr&JXtMNp8 zcnB|Qtb81v*J^NI11JR8Z4c_#Hd-+znJLk6mQECExlQ1i4hAs1g;B1?&A$j$4&+19 z9W$=0rpI&bnwFZxNKTpR)|OLZ>(E&+t$kFxW)Vn4w4L&`Htx&L;rc+UhT>;mzXsdE zA(O4y1&PAA&kujmL)AcezUu6xH54d1*CS>R{*xzelJY>C)eo`7rc)N?$znj`2o}+3 z4Xni{;sKC_td``rOQ4;L_Z_J)mh&gxosa}Yp4%~p=NgsXTUw?~tSI%`e+JJCp977*Atn+SNX@awU%CCmY(GI^YtEO z3Y*t?&~H6@8tpWgUYE{aYPSJwKwhz70@?;0{Z%;OF>(sBRyrttPdFF@8e<~Mfx>#{KBKTHDd`y76E}9KOgSroR(eZ zm^PQafbP)0n&<`wWB0w)DuG^1c@+Be%2Borpg{JS)`mvzYXLU^v4cT||D$3bys*)mwN zNSb7Pn6_r3!5M70uZC4t_e&^A7r-#)ak+IyVE&2cLUsI;rvE-J?h(cFA*}8Gsn!Lc zPeqjbXJy(XXWx{37!?EK9{;e#%EOa$_+$WU8Qz>vKE}|tW+gXL2|NQgG~kCi1X3G& zR_{EGYxlV!Nh|_52eqqd?u@y>o=1ihmc`qB1tjQ7JNrr4+7$2+N0`)pv$x1oumfcA z!#twQKKV?Va?+(38;l|`C&Nlt85p+k<@%8Tm>T^Pj=yw=Crr{%$qHDUq^g373M{eC z+dL_F?iq$*vk?ImIj7+v8}a)7m$DaLx5e{lrpOcdI0>4+zH*8RGQO9OiF4i@PdY`> zJ6^@=`qY6o)s*$ynODU3Ie+bk11NWxqM>2$&L~!o3eto5D#dvzthh&!E<^r*dILs^ zmVoRe5wGHWMux=sL#_~X@y5o)@ZjFi?81_Mq#gX8++q)6fYeLYUH*MWJ$8Q?r zLtS9C@3N>D&Rael2w_+<&%_h~53uEV@htP<`ccTq*F-gh@c;*y$C=?m$ts-FB)Bsy z=NLaRce?*l{BhNYb0016SPzYB*DA7s$Dk$E289}2!J>67e?A=fTG|~ z{)(F`D2VMz*M*=Cv?*~@;gx;gBhjTus1Waq|2sL@`B3#s%N|I z$o!bS=36(9%@yH4U2+V7>FmI`;xy(?gf1S;V(vt#6#oLIiq$7NFk1;&eE{0QkemPX zZA=801b{`jyTt_==8T-2u+N`GGx8iBo;>)7g{**0n?MORGiR=-tPFg0`z{G-;|ona zt%s4`KbY2ijrfo}7Apa}Pch=})_Q^o(Y1SePR*Us>apv2`M|yjqjhWSHr}F}lk*r? zj$FUAhl$e_ga;lg8W4ql?K1q+0=7i*~F_BF)HK}u@R@sWMODAu_ z*v$m62PAJ6@odFr*uNT+btr*dFtJ`iIMs!hy#CZHwo+RK_JPc4)~5&?@;cPsW?`R5`FOu!=acx`6? z^G+gwMT~}dGztH*2pzCU+;n^8U!L?owF`xc0Y+At#F&o%+h+bPm2Otx0+p}hg#R&k z_^%84@d8l-DZ5d{^Up;l&jWX2zkt8=?{)uA{)%G-N{PZ`58(XNKUELl1#d8qI>f1Y z@xQM8kC&p4p*Rr#A5KsIvIq)TWE?5q_dgwPLi%1NbX;S^;(*JeW z>Hwn|4q{0AOOtDi@x5J!=&4SgLZx}{FTIw*_D>HmYV-R zp6kFE@WXWnT%7+B8KCGcK)k527o^9Ozcpqi3|x zU+n$Q=e@dN1U-EnGmuqcpa|Nbc(cbHfYxJYgbZG`pD&3qZ??KHTU6cw_eL_Q@Bk+Z za(60)(RSYaTB7&c(toS`U%O1N{ZC&6m>~`XR})lrg|<2mZ=^*&vNF46ICUx^Fyj~S zVZ`t-gD7*Rnr}IKn_r-3ru-4>zo0@Oo!f)7ifL!brWN0zfihDnhp=^NH%Q$od=t>h zIwAQ?@+0^fbwNKH?y#fu+m&*=)EQc{suNCN&ILy)XZ?>TzJWQ+BWWJkiZi_j&gW&J ze2jb|xL5RQdnQn3MDiT4FEb$I7a(%E+i=q2#?6|IFj2h|z3sep%L&uZ{?I)9^Q)0J zWcJ!4c>+}>_4T|5Rch=)w9J?f?PMPEgRFWA?-~iM#`AjF!8kpJGiu}uT*oXQUp<0@X=eSe-P5z^ zbB4%1bSmsIEsVz&^m3aM0ykm88uH$kZCpk2FCwZb)x9V1Ss!N24d1*q~ zQ9-H@1MoFLTC+x*{;mDIWmYODA2^S2OGv)_d`oSIz{n8=C%x}spp#xj=56nwx-n>b zjB0G(ELrZ{)!gT@i8)=f?7WjVS7vW7-y1vGF>7r=|E0tPw&HO0ovYgD!K4Xc zI&h}!aQVLfAraYJ+et*pBLFoz#*I6kwdmK6j055+VmsS|{eQ~A6W8wCj-t+avy+uA zA^eIPmG#8sV2Nz$y%8aSg1h#R)5_wlzR;zztp|j;qAAiOV_&B4?|@Nb9E0Ne@ej3Z zgGEpw;?zFRKdvI0>-ZvtT3v%#_a$Gsb$ucVqFxs}Dn2Omj6PLJ*%bmL%OYm~$&lbR zPIW^wc2)r7085r9o#Ikf_8t%1jguOGkmFspER@!Gjo^IXJKB8RJkg;rp;Q@2FowSP zukHao*?)FHUqW6BmDpW1BM2sCF9YzyqJ28{B{2a7IsgLkCMZ-Pfl zqUTqm*y4;As1kkGk-Pl6hjXeWJS|58$-Y;C<1xZy36RwLzrJ%pxP5TvIU z4NPcwL_~(K-U99Uc;4PS%qX?O>@e@X2^<*U4Bj5Er2$?|lHxW3$hYs@V(H&$hVf$5SJTLrW>X3NPCOiCEPH}0cV&RBNSjp+|5EK(EY z#m;2I$jju^FT67P2dV+;u3-HZnMnU06=9v=n3+7#246tN}|5# zZ97z$dk?%*le5ioDmImp9AoV8HSU8G74MwFNpHw7`ov+9fdEIT)hl#f9P|)*+6*J? z%9^awKN694D5=iO)aJ_4KP`=XC_jKrZ#(3_K#5h06pNMK?b8%3AVx{^1IHD#q&t#e z!YvdAG{se8UVYfYj)@w#CJ>S@qF_uuIch>QLP;+K%&0 z^;wG%+^V~v#GIt7=`1F2Ly2UZ)nJKMs1+F)y<|X`3HEvB=V%)_ZWhW*dpqTEYi8g~ zSx1)2BWbo+!^`o9h|YB6OVTCN)5=ap9KNz#uOY5IbvU|@V^GSN!GNx~dVRexC7G$? zmSxBXn&YD=cDf>s56a2bJf`S2GB+b}}^xXKLv~_t}8Lvfy`NNkec_ z#l->Z;2Wwe7kd^=$ZA*d7|cIDDz8W%W7`easp^ zv|4ARr&}K~optXA$_Za@feDGWifn~V;F04BNr2D#g#+SJXPY_8R16rNh_1hy5hiQ_ z|K-APyz(5dF}RE3w(WF&5N{`GGrWs4+(+WGxfm%m?iWV%CUoTf*f=D!{3aX^kw8ja$e+ z+tGM#dyZ;!@SSfj%hvV0Cfl{gAhK+^@?M$wy#YKkTm|7A5AkVDzkX~b4Nr#k=j4{R)DE)b} z%3Lzz(g=?{ucR_CD100SDSZT;K(x`)(4=HnsV^!zUrF%i%E zfgxt(zJlT#~&A!M_Um9$_YupF#)jCm!*W*I)8-V0J36}aif^1J1>WzCEbda zZq$4C>-}N3C+1^5&IKJFiqKi*Qv`qTn=T>5Ts$B=9jLcSI&u35lE@qK{Vg?az)3-UZ3qACM`w-W zg8Q~m#4TZ_Z8ma$o6^3hZ@agr2QNw<|2UPowhg;@>Z0$ra#(rbb?g0{O zvFTPTb-KO|{ifRdbk9RTXjJ`cJnG=N?_Ko0zL_rnAA!=Iz1vG+zFeP9=n9RhT&~_i zPu<)yv(G}m3EccmXJsPN6b%TTH;xCR}Y>efUXd@6Q-TA!f zSzNW}n6t_(L3obe%TOaqbPLC%hel*qXg+$A?=Bno<47Uo)DMz!d--tac=gB#`!egPH%pB;^NNVZ{sWpB3NJzVX*xC=JdlCd;n@)J#Mh5POciJ z0eW_n`FWPM)}QS69ttBKs%u8^d3gLFJO2RQfn~7+lcxUu;zn3YW*rzf?!v;+@!kQE zcK)pKkon5?eLgjLOmScX6bBWJac)>dXh9UW5s_OsABT0M3OQij!FNSz6|{roa|q7@ z%F)M7*Xe3hasz2%X${6wSI;JSt1^>3M(9*_uGvDy6$>`TB7={H-iF38OH(disRyJM zDUNSdd34^Gu6Wc{w=le>6YeP)`Z=oWrfhH^G%IPf&3N~q^7OM96`Xs*J&d#0N z=)w4|Tw}{p@X|wI^pzF|dR^WaxQ(59<;*^!{G9h4Qy5^*shle-|5Bx39mr?cfT*HM z1i0}J%XQQ{&6wsOYYa- zyk?rq<>wTX&7BxM4PDBTk8y#z+|a_7Ih zN29w=yciYf%195KFQbsS0s?fm%)cP9X=vQ1D|gPK}3auL4(xh|lkcE4?@BFO0ou1Fso2YhpwZJ|}|3v?*o((D@LPhZfO z_+6l2iI35I=4H(PrmISaTcyfGN~X%A8hYD#(&N>(=?25Fx=E4MOwpwhFyXBl+vijH zn*!5tH@D~$Z3ad{;XKIPQxB&lzdb37i4jV=GZt9EzChl1FqZ-Yz===5eC5#2?xbelW!?Cu$nYK+ z>w+s~Hw_>m9DuoEy}V?GIw~mB@Y=oksr|7V`iR2>Y*>0WP!53t!Ua7%8+Io|0AyS3 z@kb`W2Qio5*cyUKZzq!c;Fm`CAeT+|erBe>6}8*pB5WRRwB5O|5^^oxrg!nK7NX)N z%RI7|t`+F@MZ64!7u75o-<^6}B^jc1Pac~ujlt{}aEL7&H{N`jH_BLHeh0d7c*mkgl&+&l~^b$rA zScAd$BRk#S=LhX@GC6SpqqAG*cIL7q;6{+|^Gm5$6Daj6?O*i`%1afJ#k=j=Db8X( zH%Kr9NeGXK;FR!kaaN*V<*FO9NV`vsHO=cl0S%nWON^Z+TV_U|x< z_GXLyqq#x3!(Q8Y9MsF@3R;2s(YR>CXpwWrvjVN(dFhZN*E%*%#!WsX-$f#L(#Eu{ z#Z0O_#xJH(w4RQN`PXTww&jut$LeuZ+O2x$j*BRp>9l8uz_o=S2S!n}As%EO;z;Z? z$^np%!zA^T$ku!*xN1WRVHeV}o#fl&wxf2)Z|dN166;`U@UJ3I0Yn(uW3CN|G7G&JFj{&2sgjGPEcEZ!dSnW9wx!bKM z${7H{ym*E9y`ezqeZ--l=ib+WkUb<26B)vQGn@CmZK6+lICQ!m@#3Mkxo&tn;Elu? zt58h=-?+O`-7At4fi7SJI3W>YZ^u_VyV@Uy$>%PxHISU26XCKob31jlAa7+Dz3V>- zV5)r!jN3LOn|YhSab&ooX{Wsl5rVX{fnqVfo~Y(==b1$2)$OOUw$ON$^KaI1Hd_ur}bwA+J<0{@J)DtQIyN*>AgKE_H=5yMZ?ni9&jq)iZ} zw+4DN6rWbxF}T0@eM4-{iC;j_@Sq)N27J>le3`+Sq`UME8Jya@W`-HC!dULgNBb{d z#J-%cbmcGHjL~IZvB9ehZokweVC|n)r~TsC9^1n#iii0s77l`M1KgEikzE{=0Xh?Q zY@Az^IjA3(eU41`k&v{5vFLyudbKc!`gohnU5Dgfpz#P;8a1Ab=#2G4ET+6f*iJts zd@gkd!VHxnRRZY^w@c@(;g3fG6e<`@=aHFKaevC<;^MQn^u6t9oUJneKH(B*&fVIF zlbf91eE$%c#e|H?PHQ$opSBI9dHQ2drc_E>C@=HoqHQ8pF@d9Vy=R4C+_{}D8i7w1)B>TS)js;v z;oRn;7Z&DKZhjiOdF;HJJE*UxgpwdyFUU0ztRins1WW_ISJ0J~lgd z5#xSdp6iLtFR4njG9Sk#=btUR2|rT%vj%1Rs|_5#0AC&Sula#=B0yfU8B0?kokpgP zE;6Q(<-DtR41|!}BF7pV3s3D3><<0e!M*O90PK`RFJFwUpF$~~9}mG?>I2CjR}zH% z?l)R|3y{nRYeMEmE>$d$zKz3i8|PBA7v~lo8ymT~wapg(@M$iD05kFDcg%G@IBlQ} zr0$I6>TTQt`x4Oz%<3punf(OQeDw-oc!L}L%=;753-gmm^G`KP;UJOc7v}Y5fUviQ z8Zk*dkV|Sw8#m^J|7h-yWv`ZU<>< L-RN? zx3Sh&0Be1T|)kM%$xW<%_qziwX904|#^1PUJTf$ih?M8Nzw_CE_eeYZD2) zl%*$a*T$1|KA~*+;iw@d;f}0#Vof)1Sv&$iH+sK{D53ZazD#x7?3LYd`|yl~@rcrZ z&g&TrY*ml$EA~7@xiv>#Oncy8W%6qB6J_~K8A#-r@G(wxv=yW0_EB?v(m+}uS159~ z>m8TPa$_1&xS2;qNGw>aT$*I!2SU=Wy2L{q_sF#G?gdPQ2IATn$0T_|aE^^F^fT%Q z=-|PH_9WNuQRSaeTu=rZ6ML@~;NWK?(j~RIK^)YGD{V3Y@onjQVj_sM*-gIoM<)4M zpo%9;&(8d`wKd0QX=Y}o(8Vxr_=BWTQpe?71m+?|0J_qftN5$c%Ru)z7cGBx=}PiFb?-Lu$p03TRuZ` zpEBZrcdX)o*??Bxru7XzF{Sv53J7SYkV32N{R0YG@U+8xKsNB&HmY%A{RIDYG|VkbvSAM_ z{jN$`aoMd#l~oa8dHSsO^SIu@rgy9I^V!Qir70I$w zAz*n4V^;vK_w7kDupK})ZY0gk?IPZ1G(^JMg5B=cp6k^+4j69cfhypv{&C7-7-xk8 zMfKsy`2-A)6-tUfCJGR5uOTjVbC<~@Ge6<452kmN&7OJA`!0m!+=91=dEQFdauhxf z{%9#<2xpi)#Yp_%KwX$UXGDTXzgK&f{8MT3VD68c*KU(Sb~ruXga z2aUS4T8_bj{(U$2Dw@^{W-6p89#H5l!>J1Ekueh{g#GHm_iK#^2?xaX1G4W%>rg8vEbb(UCDWi+nhIIX7W1|wOkJzV=u_b$4 z$jww$&m-m0ssV2p?}Pn}$QuRhee-pUK4Hp?-gZXoty_LObG4RjLPkpB?&AK&x={(& zgo)6_#KI&@32x^YGGF3gZj?N_cA|bjXhiMR-iroD%?lndN?gta3(_3|CL9pcWT3R= zUGZ))lbM`DSQ1q7_U$*J8n!hm4`gLkHIrA}NFAypsf01>K%o|w9|B)^lFTw{U-aq9 z6gj<}OP|KbjW?^@c0U(L@uYF^c=bfsn&*y1296LPtpM`DR25v^X{JMKkBwHy-dleM{<< z06T`hol6r)u0YS?e7+vH8D?brH#MoT`LQ&Ug-k;HCgF{a2cj=J^%G_vPv*e`s;6ba zR1%9d$KE1bj7C+Xc{0)t!IC{^kD~CQHhn+La{cM@I@jJ&Hdx{v+rAJls7Qxm5ypp` z1T{uqiiAb~9D88_mA4ur-EAlwY^(0aFFrnPOT4|_@+LpNp(cOGaTz}S(?dW|7)`O6 zy?G^K%>91-l;6Z&`J{n7ApO%Z{3+_Y^>cL0yu5Gk%Vhh)HgmFqhR-Aze%{aJsR=KE z!3~+OUvJO8$>D6n8P$p0`GM8Wfh6vbB(kP4iup`epKxs9pDuI)N&tSm;ZXJwNwPb!TfQ1)dRg@50}-5dv|o*Uzc8W7N~lxmO7sW<1iu^%c(OG_U|E!3uI_M6$_K; zYA-|`*yReJeBrP*9^@DUA4ASp@4xAo4lM8VzwgrZ)6042#QA*3=o6#$6#pc`o0o^s zV4+K3Q=Wu89iDfod-;6HFEyK1>%_dLu&bB5E1xr~;$a?4 zI$?<5>-?eGI{Q_kpUXrBYu1*k5~_WUUAYw_l&v0oxJuISXVtYo$jr5K)NusUuiWqF zzMxvt&Fz4UQT9UfjJ>?75+$z}%zV8)II+$Utn10l3F9x!hJRA9=IzKOP1!sI9Zkc5Nj%- zecC|iSZ8(bo;*(Oh79#(99+!LEwbj$&MyKMe3mVKxVW16&thK~Q(X88F}p_(!MJPf ztPD=c`d0Zs6z7>96>v&O-9XsCYiH$Z$?V{jgSG7VJH1j0U)J}i-%Q5Y6Q)Gg)gKG$ z;91_^eH||T8TE*8Rnl_%uKD>R&3S^Q9be`-(;lMYj_7-SK0B!!wZ@#<*R;RRAABJq z<76MyhxR1DxD~Yglq{`nuajk_C(QZk%D}YGgzpzR+1Del_a9k#>@2t($FISw*+WO} za4o2<&b%?CIY>nz*6gmxK}M*n2fI*rA`nA%;9LBe2IPX(+SiUY%i?{-)W}NPL|UWf zw|+dFDn|-ki=K7z*56o?Iu6yIh;D-9JYgnfO5kI;MScM*oaX^h@@}af(b$S{iY0A> zbYhS>DE?+b&FWiK&y|l4SH+5bR;r@5yIX8Mt-i4;Jp5!_yUQxRAN6L1`DZ2Hx@tt^ zQ%bVDRI;Dlb|PaB!5;I+vFS;etw>sC@ZJQzT=iCNG~)*z;-mi^G?n z{nF-TyrGhK@)Ei8zE4fKhzv9$aZB=UE+#o({(A!g#6pB`&~2HsH{SsK^9CufkqVh9GA=ix_nZP&ft3;L_LnReH8t?YlB&8Zr9Ba5HXpnvB$LU6lBCg*;J zQ*6!86m(Nff#vDjI|>Sg9>xN}X5Vk9xwN^;<(2xY%y$<$jP7%E+1<&$lKu4*^)Rvr zR&M0s18%#uG(N!54fDutWb}nA2h=?5|IoF@C?l9jUbBCfU_tQv-7|aUzy~3?5|Aar zVUm<$tIw%`PF|~Zf5G<6PQZ9{(QYX*;g!nWz%`0TKW!o@d?-Kz!KIXj-A$mo#_jHA z9-F>za4No-U&hz-<~djxbve@V_MM4??o_T?(jOf7k$0!D7RtNP>pr{pbT}?}fkLTN z*b(uBMe%MYW941H7pR7P>ER%qW)s6T-ya+9nfj8@&<0TNabx_o-MeMRHR-cWJ6!TP zG~~+CHH-3`34Di}o4Yoh3=78(xHU{kcOYqxNOw|tYg!}G#Yp5x%QcRhzVly82UvUA zkz3pTs4eT5+GtLS6GW z;5T3WG>i*79MdZC`8k`l8I+wRXmk@6AHgnO&n`;-vT`U~ePvFR`Oel9K^xb~V+48y z5BA_7^3n{u&{fd(#cD!N`233SKuQm+hMfHM;;7d{_oUUQTXL0q$HboNL``*G5Zca< z(@CI(apv3SABARbAB|`pE$K9wgqy84+|)frb~}+@O{SC>=C+f9G{0myn#T*Mc~>WM zVtJ^Tx$)g6C~;O=8hpnd;KgE|L-h{EV8A7UaoYA8l!$6crni|oC90+KHw?atL=2$Q zeUQ4GFS395S_~a%obN!+oOqG?!l}7moMafc40U}c4PBf+w`BhS9vDz~g3u`9o~JLi z1$lmXuphGFgmmi5j<;({etf&A%PqAx-j|dw0&{RYJ=7jEdEUo>xASqq z-7Wrbo`y4io(kl-jwqp$z(#WE;VU70;2DBe&-}==AbF>I#AjL;`=xd7c?A=;Gp&dI zGMAaxRM>=9>qc%=0#i-tWIo^7S2XFI=;6jFq{5iqsN}_#PM#790G31H)ry#klXxdt#uHmlu4Y=A-|d0Z zoSYX+Wg3m1H}1SkjCt*ELCusv&1$cgyQ*Ls_*3b(^d$M~d(JZ2DJ!!O`Zu8_LW^o17i6J)y9rFPNF2Tb4 zR{g#O12=N>Gl#6jn`xtbl$323^#5F663f;?7x|Kjq!d%=UR(ixudUnrLi#{`yF$n@ zqB<2Z67~q#DtJe&zEARpW}ZG``EdY=q2A?NYd<)w)w3q=cCpU(IB$oJ?KMhmQVoyFS}!LbzL|&U{kN-fApoNIx>iqKWHmS{b9{aHTPWB zJ3+UWD(BKQkY_bC-tFAa^=>mtU-5j9ETNl8|4y8gfssRiAi%aCs!_Wjf+XJZJ0r5KL|!ZylXQ_CVt(qw?V}-cILX-<2*+ z;XaZt5PyM%ec!@{mxS{ufxKF%xV0{KxV6z0=_-Q5yrk&FOA|rgijQ(5iO81Y41|f3 zm>kF5P9q+r@jho9*!Ik{xidb<^>`+WVL|F3RHB=5-F?i86y!PEb1?INu=bWwQT5>- zs33@fl(ck7Gona0A|28>qaYH}-6<^~N;gP%D2OmLg47IM(!vbgH85~D_`c_yb?&+! z&benTKEYa>nZ2L*#q<2PBoSOzYUi_{z@wXrXRadE>=V}!5CHxp^-r3YV!b-|IqEe9 z|4s4ZyRFAl%)u^yaRP*O!Kw~sGUy~B0icw}4>S)f=TO=x>2Nb#YMEekc*xeX(xN$K zlVTVXs$oOZnEazN{TI53+)$aj%j{LHh&_V{H@RKzTi)+p@t;*6w=-3RUmnG3g&@XM zqKvy07a9EyRrDsvi5--=lT)h1QXXg;pLop|`U=g?{6>n;?-gw#)J_>v+-Nlrzwj6& z9+D|z919Xw&40ub27vdidvgB-Q*;0`hIkYLh#*cU$Yz&cSDp6Ii#=Q_d(S^PRv;Lq@RwhC!O% zMvgrs4|32l|7!!O)MVp=Z5~!wd#_>Cw%y?2NGZ)XyEjHeUYlh@wVp|z}lKUbU3p;2QG7ebQxt%1cEC>8(fJ=TGzexKh3lfCxH4wk=v znYJd}g>;0nn>~${%Z%MJJCwQ_XA3!yx?1U1QqmHu|8>>)eEIz6u&OC|RTx?;k)rE2 zkZZpI&y{A!_hnDbZIAx_scTS|t63?>&%I=BA~tGcbmmVW7(9dbt6|i6U)Za`Uf7!3 zE%UMe&ehqD%EP@ux#$*8mWvDiCZqeaezHY1$94 z8rO(leb^+HwM{L832udD>XMT;ojTm_lB1$mQ8Veumv$Z;Vclzb)G-6=){5rB#Os$^ zbrxHn#X>nk+Rk4>3i4u^a~A`f^Qe+^&aWd8qI*Jz!oJ4WkVg!V<{n`gDUc$=og2Wt zt8Kn{|tiHLYO}50bH5={Hm|O*8k1$Alkr1VCa#} z$1{|Wz>|P!JoOECG~{mAbIL9nl+$sHI$q%51%7dA_)G~6UZXw9QbxDP`-Mi@x-E9HncoUg#^Ij`xN0ISza)S{8Xp&Pe+3&orfZ z)|$+2%Nyl~zIDnTnf-4;5#u?2-xwakh0`l4kW$ibOvauhi1$edo#oaAEdG2KWM$*7 z<6&mdHt^gX>rj0_HID=_wVd}b6->cIuJyb}tMhNtF=zw?2;M__mhIT6y<;(C08<^!;oEn4|$fBwS+J%iZW0zm`0d+;|03Q z>LC53??%zn`PV5dpE>woM^Xf>@H4)jJ2d;TI$(6^Xgif5t(9b=x$54{v&jEPx;si^ z?kc@&RPkXf?*Oyq0yK^wWbpUIPFM}{tbwA_R8P{{K=S?1qlh@!st+VxObh2f(_i*J zy&B|=<_Ssk%Ul{hdV+^U+{5sFT~SX8ueCURyvNOa6UnKu zOl{*W{rN;v_)|$w$qGug3fr%!`o5@WSsJm*-5@OT1;onK`hgE~2ij^eYEN6Dh1NF< zwG3TCi6{A^-&`3(<{-v(^QDL`gh`A*^U#m3hmgy3wJp1v7X4(o!&o(mQj?t(GcJt~ z%5Iq3Qwfz^m|KArnrszCHZlJc>k!r0Ss!yIF@YnIbcI-51Y5Um0pZ!}GUhHXu zCcUi3YCE7qDgzlU2=A~uF_nqCIfG7wGxxN^I1ZvV8=d4|i$tc-h$JP;=d`h`ZpkO~ zo@sr47sUSakL_@R0Byn)EBRd&{lq*nPw?GTyLap&!0gZ8yrXAW;K4-AE_sjlDcc@T z;-W9wR*Ia;P$pl1K$N9;G!(eJZrc9LY>9slxU(g|CJUq1-OdoW{)L|IUTzYnB_8+5 z#b-rO6Ngwh#cMIHZoSdxopFCAokwu zp=V?1nt0MhW@!qvEmW;0h6*`!kaC`4f|1xCM{qhhJ-H9(+wk13db5ij`>zj+eXx$& z8GUk?>6I9hHIT2gBznwj%amY&F~#d|G51}TEXV2W^HSqw7=)GU!YFd5X8F9x*I|yF zuP(tzD_*XIaF(3oX8~@lr_IR+3E5v(4QiD$LPMFx*;T7)+Fl!<^JMmB_YKYIBFVqo zlc(5Ui3gj8P@>z$7pLdP{AIK+C;1t{6j#5_*RGY%?cM#u3#YV-vK9%x@#k_7L*%W|@0Uen@3 zUpBFIe~#18u+tHf0h!_o{Y|cp!ui6XjZ%-CKwCQ0dFlM2v&(5b5gdVt-KJ$GSD}_W z@QL*SQPPT}sJaW_eMxQaYq}2`6j%4ZNGl(0Y0+UZ(NPpbM3~^`b)#AUl@cOv8G$_hVl>_ofj3l7hM&1(Kgl%YR)NFZfw{@QC2_$Dt4Ug_MV=c@ectjM--LqkfvtgH zIA=EwyC?<}$`N9S_Yw)n*x#Pyb7Uh-B0@o$-4wu{=M}Z}0TuxUloW1p&$EW7ihgNW z0`e>evukzd{_f(qwqey^9mAK$%Y0E~g4+s(DO@D?5ANX}eu*C8w9bUH@hIk%w@h|! z{)WsC#vNLUx)w5!SE-vMW@9`Y=Lx+;fuhF!l1lQ{y6a^;T9f7mB+ofAz$S4FSTIgl z_+v>!(S}^jFpB!Qqu~_Hqu{Pd0pw2sgAZF=d;;hrB+t+Br~WqF7g^jt3t&lTj_4@= zQy*jcw6H6iYGJ(s{mpk|lO%qzirmKa*t`uZ-;@k7z5V^&2C+pvjAw0w4rcfXzPwy& zh`bFVKnfjxb?U!zxWqBz)X2ZU=5G&tc9iY}^lWYSd&@r61{BZ>5AJ6acp<{kY@z#Y@!VA z_b=`d5Rc}ks7m!({gOdGB;PFbEL{Eii5y;t7r2uW)$mo-1Lw-a`v=is$uhNrJeI4S zH8njJK+I9fDJ7W6d}sHHA2(81bUAQj+IxL_Q0smn zy`5VZ9ZSaqvg%}(_I=!Ax5d?0E~_or8Rd%psGf8qo^rnPM{LPaH%i#CV?)ieD-_K09ZK1pXsR=-q2QK3h-d zYWhnKHi-kaYUS^&{e@c1O06R{S&iu{J`C5_ne{GKS-PT;qxcTn#E5iA`AsIApdtVd@GHt$7 zO|HOgl`x`kTCYUL`dH%X$Zja|b@|p7YVEa4|Lx=F6 z1tU=prCsFCZslKnMmQsfesM2Q6z2NsNZA9ihH!xHnryjW;uTa^TlhrzNm7=2x#)$Q zV$blwEQ}#(I9|sLS@A!k$?q%vYLL;dfM_uEhUXf+EpGQe23L9{)h~3-9Wi6$5s2-4 zMm&G*I9)5taLWYMtx2wJ{3z7zorr>SI~}?0?!i?J`$Zl| z;>bLtm-0Q1*Hk5EwqBN;u1`iwR8TSc+%7sh@T4T`vp@YnLE+nb<3Bderx`MEP|+MR zQdu@VNz3e|LNzpE@B{Q#6b7c(U8L_`^Uw6_2cKPbuO|uQL_-8ue(*1(O8|VRO0Yry zKMD~YP*S|#d?fg*ca;Kq*KtFi!644{b!MSSy?K}CM#Gi^Zj9l-2=Owx~Uw4hPLAzBnqyepx>w^R3v$_Y8BalJ5CuZwg;T!iv#|)eR5*YRLuBDLGo0 z4g}wYB!}3byebL9bJi1d+NyOb@dOHoQS>2jo%adMX3yfDW!YbF1h3jZpi8BF&$V>B z_3tM;bvg7tE|p%sNuzI%b8-X4qu}cWW^gO|InqJ zx!gY`_sBSSm7t-}_a`3YSA4SYRxCZ3V-!fNHy?i~6^%;O?5@D;4G_gUA5z(f%c`clMRc zU8Fes^S0Yi9FyoJTHNclK(Rv`GXA$FBwrT|E2;l6`ONBjA=R?+ihL#2<$){UMjbzl zSn$O{!4$JcD0E({g&=Pko;Gp5*lSLC^)HUm5W>u4%J%oQ<@7x&<+RJ1W=mbyaaIj) zF*Oq5nNKZI@%p(uYu$k~K(iYR}^OsFccL(TcKCX)^t1DwybBDoU{|4eH zron_RR(gT^`FL1Jk0D~g$SO>3b5W)roDA2!i!>KK!2>O+&%ZNnYH#1KO>$!Acotg$ zRYHrt!ay?v6n(=n>n#Q}ei1~4hgu_%?P;U0c4?NC)UIzXO3*adKiL! z4eJmZa*8e@YI8=#Upl%to9K{U4%k!i`whwU?d_Xl-MIbYa?~h4-F0&LLz0POh@ zr*SdR>=^-m0YvaVKVbnpq$Clhmv3_ebXX!R06Zt%Od%^phymT19ZM#@Mh)@e7-#!( zF<6*hikSZ(*~$k2TL)uYQJ#OTuu0}yGZwM;|FPM95&*qh!d@;a#Lfg<_7qA54S2r0>UTdA~{(3oBjHcV72M<=l6FwWq zQcQKNcnc=E3Wo;FJ<6VN=7?J66Ym?^QXLH;p!n>(cF8ADIUhm=JX)K5BHo#yB!es8 zhqS<}Rel*Ksw?(=!51CHrl-9ErH8u5CK_~_Udg3>vedf_ioOH%kRywwgo|8{Y1=KSrP zWt3nc{7Qlg8Q44mABj)t7(NVI9Dnhezd82D<0OjbF0|$|98&(i{h~CaQ8=hT$I&7I zF*rpNrvir+=8YIMd_p;WZMFvX#a-tk0zmEm^Cs?|#Il|w5;#KPY_(lj@-afxzsW4Q zFBhsw&n<%Wm(m8BK3d?wM4i;<4W({b^=^hrlFAKiOjERP5c^zNkgFOMM;dz9;}xD8 z>U)PHME>eDwOujN7++nI9qXyQF}*74_tRVqkH4WzatvmPyw-B=stI5LB}QJ#KYDOO z5Rz925mg6^08VrE!KA;6N8%2vSf5eI(gV1Cx^9F_@Yp+kD@9D-Z${`3D4!SR zxZ18qhf7U()44bY`Y88^0{PBCGMfHnR~LV2Df>qe&h|{VFvPJA=UiG$%dP>>0?)Nq zAPfffhwlATk(vP)AEsByz53S!dld^bMe!#Je_d}tgv)$;{(FTRaT^2L$nui@24COA zw^H17aMzw>>fYR`>&E$!1?J2|@KQ`vXVf|`<|k!?u8jii zf+7cL~Es=s5?(;R$KvtKk4RftP!ukkc_^?sdaw!dY1`4*f( za{AEPJYP+X_hlm^8{Q~reG-@djS}U&V4|U*CPhivT%wY*q}t;I2OreYCZ`g{Wux_# zOq-+g>Uu;1JFk-Y{JUWTk<^{3Eb$T;UEY&LC&Fk+Sqo6Hf07TcY~P(6{yj(6q{kW;UZECX1}A&b z`=LxYp~rF2319d=Y|tv#KhveEJmm~otyJd!+Q(*__I?BjUOy$E>nmV;OG!b}?;(c@a8iD#py1cegBeBHfq+m=-0OiU}_enVT(EVjC)hDo|vrT4Ei^8ec+EYiL zYb48uIZh3%(KT>VkNX8|+x{l;L7~O!P_`h+noTa}SK8mKpQZ(LL-K2~0Wm-6%nm}X zdZlpW;xFgJ0DZ=us%=m=Q#V7vu+w{Vg*yK!!9 z2cA41xm{m8hd8?kthPWl?Nj}^xO_O#n63t#b4&uCnYN!{>1WBVkB&X3z6NYRR1y|28v!t#1rYa?AI$Q01bV}8;;Jp}_Q%P+wJOaZI z*8@id6WA~17-cRuED6d1H6|uq7yFox7~|{0lqD`PUDHupN~VR8j1oIzMvw0I-8`aPwMVCXHx=C8bWQpm- zGUG6U+;UA=@g2AooJf+;d(Y=A!D|L{M-|mFXGlZ9Vyu ziSH6G2oLQmRrx<;dDfc|9zu19uL8eJjY2%KCm>;ssd8|*{3xxkknhH}OLd>jlV>Gl zDvg(eRo;@ygRHdOodE1_lJwyT4WPsHf;1NL(O*;fT`bJ~l_(lKtiw-O|7esD)>|`( zQa$nm%p0IkB@c8)I;y;X#C=Xc-1psfN<0V*m^1lQbR&rVEqeJ1t)HI-%ACiy}unnJ)^#4@jjE>(wsrx``R(o#{ zAKR5ptd6k0W_NDy$oq^B5DZLQ8^gT2fSLS~7ov&5J^h0tSi6xpUX0lj6A)Ih-+f)gH8D_q=-Vc#?r}*-i)A&X#a_=oDYeya z;*Q^g^H=600V$gp^ZN5c=w}tNziy)qFMsom+n18S=}9qBY;V&?PIpf)x{k5Z-f9mE zA-j*h5sj#OWMQMnPil>2UVb_oB7$##BGhSbu)g{%Jo)}L*7vx3jr9otF8@Ep`chIm zyo#fL;n1}w5~#_tV1?T!vhW&7};|fasAoFVr&X8J1_;O7%sNQsFqqf zK3*Sk8f`853mpT|1xB1B%UvON##`#2Y={F`^_?8y$z9?wIT0Zn2z5BBWEOAqSh5t_q@T;?N7sG0vu`Rj{J;Dc=DE9gp>DzOPQn*M^hME2BW@MUgAoCp@ zSQDWx9_-S|9nk9NMY^KM9a<7s(S{XG>#zN{z6-U{wI$t^x)upQv#?~mo5F#6BNF^y z6pkkhRVNO0Mo{u3glGMoz+Uqgt&g(nJ!k9K(tCUF5<(478&m+Tzi)%4m@;d^rF>_32KksxMvcnkB> ztr-bPNy!!e6kSceO^9NgTw}8ofJ-PUm{v7DVZEwU`oueZC$NU`W-<#{Nd^2ak-%32JqC9e0wNN zzu(9);WOBB%HgdI&Irq%kBnd6{Ce7b3%*T}UW76rcJ9 zOzc1(Jfbsh=?$p?oHrYYr~-*9aKJ8Sn7)E%)RgXeZQBhSS>>Ym8U7%?68R0)!9kv zNfA(1#;v5R`~`_bYEnNH**Cr^vWz!H)}HlaMuw_Uz1>(S3~WF1)yix_H@~1@82IOv z2?*HF0@XBY#5Nsiq3Xs=J%|U~q)$D3b$KGWK2=-j1~zsxdOqYQI{*=xpy-S690%M+;gbR&QVaUWM6H z>3;+_y4iZJyIst>&%f+TSovO%uw$neLEwY8+=|4^3!B9~;?% z@u>y!W*gmS$kb|ye1~%73_Zz->g;9>Mcq%b{jV;+zG}mieacN59v=QUPgvCPQMRPd zX_Yil+g3EaWaiIkYDC%EgI`&K_H^Rt!;#RssoPhCk{7QOoZa>(ApS$Y8=Sw%5lIv+A%|?Oz&m))r=le5J2I(+M@#k8 zi4$rVK2sPm7-!xvcJCX;KBp95JHJ&wZ%28_mgOU3(&+Xq?-$9X%9wYt`OpmTKY40r zW1>b1gh}8ONg2GGd0D>42B}Xi)Lxfbtw4PC8Pb^~iZCpSZRjOy57 z^eBG4m<(X}jgfrE_)*7gLNO_N-@`aDo*|*9PitG%tmHgj2oUbq&-;BD`XW#L$-epQ z?Zao*@r?Lv3=7vo0eXr##)>2X(0w}Zv7(pErrs>KL1GA#*EHCEiQw-JjI6Az99`Qj zSAHEQ`0}ov)u%u~K_0*)_0mhU%k<+!+%^?{he8pazTVrHZYB13-ObWBv7&u#5&wV0 z3PA3$t|5(s%q==3FypXfa~H{&eX=xD-yee$LMTnZRuhc5)^|C7g{B7rK^?`?x6G=X zs8I=qavuoAL71wgSjEJC*YuwqAQGNzVh@8~z4UE#+uAf|o(;QkJ0ovk0W=|CN_S7p zdc3T7Gx-52srD;F;G-Z22Hf(B1O`$9=EP2Nr`Iq)rnAI<4amgK)`R%E2EN?Qi4#%!m@1UHpFRyTMjYAIzFxEA-%KXNN5 zTG$^-B_`f`7~(qxBTb&p(+$pruX|2@I}`d|>~)9orw0UF4;mY@hB&dR-a77GrRf~m zO5nounp@fLruWw-&pBv!b-TzVWPb4>n?tJr`5E_mL=Wx^6E75^<@lJA_1JU&%|l_Q zJg5zj(^`%A%pIjSLmw0c*FzuMpPu_(Kal_U*?ocu@aQbNL(e5jgaE@r;5mjr;l;@< z!oxasY=~&kKhl6nN^C+vW(ZCg0c{PbauRSmy8FSSQ_c)g#8c-D36w)qzm19Eh57kI zK$<4-c#FCqrN&BhoX2e<{$z=HeyaGsgxgm)?{hGKnYng^ZE(5}9t(V{uYKjPHT?~HU0Moq>)r!n79i`G*hvpK zF?EBUXSQaFdxyI}c^U=`u{ir%45q)#Tsx)8S5D%+BMtQwix~v+&Dq&Gf&%XfwNV+l zd-0Rn-euIddNtvFz$H+3v)h7kC@2B-68j@V4p3#!aWU}Zl;dx0CY(%i0df@ZeWN>O z)lG=u2f2cm<&y@KqHf%KXfpyJgRJcc5zVqU8T95qGDzwk@uWLT+HmawyU1vkwcfMl zBCZ_XA?|DJcKy?8E(0`QSQZf-5>2Zjq6cipN-*rUp&e5$d^`0Bv@Qw8W1?!WP6B8u zS#Fvy*mL=Zuq79UxZtl$?^Epe!tsDZ_S@p+An=(e@@GF49RLMMv&9e1oz@Jr4$`3? z<^>8adQvdc`^OHjb`mP!&k|mTL-Gkc@euE=XXeUgDfND6uOGncwO5KCz_KK>GJW#^HSMED^M-M^eM>8UCxXiTP3zlv>rH(oaMX^L*m_ESGQq*+X-+)yX; z;SvxpL&1a+mI|NQZW0+!`Bq(JRj0-r_1#GlmOA_SOo^+D157P;d<@?}wW#C;6)8yP z+o*g)M?Bo1`6xUM|aer)zZ9xIeW~!nz>Q9Z|sfnd9Uy6FFdYisXEW%HbmQsjFml5 z@SP5O8NmEH!W>V*$HfbiHGf6pn^GV@;+G`=;+`$E{vCt`o~jh%y~AX;xKASo$o3SM z-ad2C#?6}dE7exmeC!fusOFm@dLC*!(x3VhQYC`K0oO@s{{s^BuIw1=@VA8bi%R2$ znj9{9AfcWQg2>*4M@2CM*x1uLTuVzU0=m*mL`9{@$jI2c22@1zC)hYRTH4z2Zgvt1 zDk^!jqHgST2MsMi;&Nqf=#}Y9i+dfyBM-hC6jcze@^yB0YP@=t-5kD{EVl}PGCi@K z7tfz-WQe%NUv+eJ;IH6%x6@7K_cVc_UI6|T1=?TCcz*;5Tai)%=XAqmfH0B~Avmp&mtl}5D9foKi{T|?%ymfz&4;qMpZZ;RJQTD`7ju31RK3m& z74in91n6s43*R4KmjPKkb7)9_xHMDjymcW5@R|I`-^{SymOR+2&C==V?R~<%j*pMe z>PNTS3JXq$1!jron`l4azB~guT||hB@eWXK*y{X$3xYT$+%}VSgKg`zh-@+v(jRGd#Ti+s0zMiC2KsR(ry;mXwE?Wa{ht4YkH^6(vKsTmi zoyvICQOsdM%4?&%{ws^yan5{%^ZIBpztbNDo-7!|YmIg5_w2mdw@`g~(t#}BF;8qgK zQZqQ4pVD7XV7ojZx!S82@~ohn8jK^9_%3PVPngs(FRu|U+0_}QQO*^F{KmG4Cz9d6 zpZ%Wg!A(-AH-VJBRAR6WgYV!o%}txoZvylGJ7HbNUKkHs;FjZMDa}9Je1HNq4B)xk zjM{)E*)W1ggbcY=-)`$|LbyD=FN^v&AvU%!u;&WdHF}d)X~Wj%9|T<*XtM<;1Tt(&&>>JG-Q$Jyy92x9#0&gkP z$^$V!D*YZym@Jf=*{rr`1VA}md)%Lr0=rnaS%z>9#M_yO$R(X7Ys{mVPLo!qf!;Kv3-KqcQO2?3?yp@F-qU?L?|BM{-jlOk_OpyquA9PCDl-iEqLgg1w!JB;z6AgglmIVPE`{)|(8+uA8 zsH+zO&3U-Ave{byRT+ri>(KHc&{46mvDZG+(ox8Lw5*h-%kq1jKD79{#wxJpab23&ad#2YBQN)2BQr<2C>l@Vbte?$rkP^~Q7DA#TnnNO&V3L@0mM zpZn?Cy9v&@siVw$GxheB+}sYMG5yOF5TG+Qis_J(TmVq~y5K|E*F&iFz@Dtt@}|zo zd}eo`r%O?WHDDF18kkZ>8Fn zBTM>W|f;%$IfP)Y7K?+3LD4H&RT0x!ND z>M@|-R0Z#h^%!^#?hNNgwHqfwUDiG=9rbL^hIbKosUOue2@61OL)}fCr%2e;GyszE z?TxqTJ@R0e?cWAmg%Ix6Oa=pE_70$SDqZaN(M*~>iF*2Gffnlmjb7m}rDwaI=CHci zr#sfgT!k6>VYHZb;Rb(?#l9yv>+<=D9Poj8v+KKj2A7=CrcczCUtMx1fR2P=DFG6K zf=T)TJ!T*k_A-E`u7(&_9FOG6;Y~ODdcO{R!)~OFAr4x72hYaX`xX~yONpoCv6p%O zzeCjItd7%zv@DW53NP|$G{9F#si=xoy5j;QI5>)1P5JyJiZ$~iF}eV(rr7d5nF75Z z^cX;MNmz#f7s>^YkQ()u97i0s5Hfh}>X`l_cbT&=IasfX+RtRtpR0Q#BM&b9(9=FC4U>}^m0>25fAC9HxfqY zrO-&WB|eW&FJU~jqD>y(SVG=yUs$8s*{pcORpS3ER{=UX=)chu23GOdAL@-o{l`lH zl>OC9*1c&oyU9g^SO_{_a=L81dw0KZGBT&vq6??PjDtzL1t<8^2mhw@o1q);Galo- z#WXo@?kY13aqssDf#S#2db$KaL*GmY!=BlYbh~41*vy^!cN1#GODaCfIJI`szOq1A33lM{=bE(G7P@8o{N(Juh6vF`+2mg}XU8zFnM z9IHR;TV>Ne?larzQHJ*;{sn2aNfNPCyG%8g=<|0%lfhfLa#3HMB_4a12zsx(KFWst zaNz(3(|s++O313^WsYC;f!DgDsVz=b5H-LztF>52vu`#aeo<5S`uJ<#lH6rXTM%|_ zIUpm@llh^l8@!CQF`x}|fZ&{f?Zu^6q3zZXhn^wP@A2tm_;;A?FP2Um2~eyBS=Op8GwLG3x9UeIlR@J>VEo}N1+Q_D{ii$l5A!yw zy69Y|?%>mjO;$anqTmIxVxy;P$bHVUaQN-=&*%g>_=QS;y9;{T!uTy3b!0qX#U9Bp z^qU=07(**+{p*4@paqr==mQ=vgZ}18lng-S*he)AS?;!irzqf0cP7Bzs-BOQGZfAm z!u{1(OomR~djoIda7J|;O@wTH^utm1&AgoK1+UqDXf6}GhRb5wrX0qCEU9Y!1nMt7 zQ0833SaTMn?;p@y6>xR@aNW2T;SviC#q8(AyijX}-wwt~^+o_BP$+cU5ztJ^U(%ei z+_ZO@#DhI63|}{bp|<5I(nsKZhzX-qS-&g(Ojo?Ih$nB7Ofb60F#i@Vd!BQ)zdCNL zOm1Z{z-w}LDQ@ovSC2R~#5<6sIno2je&RBL52h$0K&r|mHb zdvKzs+N`!D!J%?JXj=#hf5&|)Cqk4>niL?x$QrAbNfAue@n`Q2jtCtHwzFGIgZp5m zf?OpS6ZffT{LNzVb-mvu$8?rd%?G&^Y8g5SutItd6JML90~&w60z-}Q;}E$!n&*#l z_>tSYnwwN63D-v~FlK|wy$3#{`0$WgnwsGu_Fg-ls;T@h>yFK17rA#o&n^il8uip|^C!Y3VI0)XLjG-{iYW7RyC z3C+EWY}f`g-E3~$?|^D+u4UK{(mlnFy6>fqdNT0RhCH|l_%%P!!t;A!l2a`D$PWz7!@anQGVd-71>WYbpM|T2X!e|_=k9}K9#XfA>RcR>-w`KPJ zjer0C=z3MsImYcfmWa2n>z`uy0U?8?nAhgSK z7_ZtU)(o8fv;8?$BDaA|tOTk^j7QssyuFsz%zKr+3jfDqDD_vSCNqLUK0M&49&RCr z&%nHC+O3zf%=eP9BW(2aSq5RIsE{u1(B+UZ?ih=>ru*C;W*ANPZP*N4c!NDpo~rGP zm#0bh6wdGU0`wwFm5GVz&4JRzpU>(KGZ%&8u>G56lKcpO(m^lbqwhJmBIml=Dmu1n z{N|PPB0$bpiaREiKK4XG2b^ms?k1HR)$^{&MK(FaP{0AUOoQp=z3>rid5!m9gyOP3 z2VhEdfCLYZs^k6PVl6h@yN_`fCIwRbZ;$QxtRg=dOmWGtlZtMdOkl8 zC>Vyw*K)AwrT+=TaDT3WD8TOKNI7m{PPelqQn_AKpgn)e!8N0MQWweyoWByJf+$nY zFZX^DPp2Cj@$Bx5`5cdH&{`J`vt>x~vPI()q-jlWG?z{(slsyC#WE8f?xe8)FFmUu zcai+JlRw+nd@_z+8_u);SXG*dtw4!MjQ`XWOb(r+mph1jPFCLW@x(;Ds2C;v4fNOu8^!igUxOx ztYaiE6!D9EszcQKl?7;J_+03kz;gdItNi{XSuRM_Z8Q7rK6?3eX_>b3qLf2hU z_wD#DGMFC#3V*oq683{6A~~~@w${v!d%pT#`I*``H*G=h+fIMY0feX?t-|Ulu5*As zyug3Rud!a8G&eb^9~>TBZ?MWQ-^RV}!jf&#W(v3sUspXGF}-b{;E)>{yN>A?KEY#Z zIa{S(Ngp|qxldQ06*577V0s%}amTPjXxHGT&@lX$LL=&|0NZc^bNA0Rx`S9*SlHXe zQtaxY(+6eF0cD|w348M$VlX2i>7?cs9`)he1uh#OFoV2qu!npsohra-Gj!j(N55wDc`5T6C|pJa-uk2dNef+0`;EXuOBr5Dj;J7_Uz(fNi#Du zb4L|CYHJF{c1|Mz6Z*9CbG2#0N-k>*WEj_-sc;>S^bMKDh_h!1Nuz4bxlgM_ihWn7s zLDHb}H!FuB=gd~C)<^oQFb}cm3XgLY8D4R|^G}*Ca1)vDJny%>UV5L*bgV5J$&yJ+ zdf!>8@~{++jIKI@Spmo0*)L&BtWO@d1zg zji}BN=GvA;E+Doa_&`h&WF#+&>jr|(oPvIxv3x-q>86egrmlx$lN|@q^gYuFCe67> zYAGsf5UYoJeI>fWvsHefO9wr*)5PHW(QNWm7L9IWIcj1Blq!G8P&vGD;>XK~Z>&peF$T0Mgq0Eb(jW9S$qjjq(Ae+`KM zJtF->xre^{z7NgKBTPQ3?VT`?iz&YbneMit9fYsMJ9P=jK;JIbGFM-Z*857set`EF z!=y!6Z`=tr`v>&gugKxbl`>6Gv8cT_uj1waOJyxV56tKn!2k@j!2IPm%Z+yo0JW`F zyV=JxAJ;p?eDVGLw~XI_wyUD@VUcwZP`TD(M7KWl)NOa(xNix&;ABFcoOY)B#iDUfgpkyd9rW32@MHsN5o6+8;w(cUshOQy#O z55XpDo$vXvVdj}<7f>eM)s|^T-z;Q|xou6es^eeZOoKl#JowppJ0S7AL(2QNFS*WT z3>Z{BVEMtBEeP6PM#z%0^_tiL{_hL=vk*XR)1x$GH@cp_G5&wpd#|V_+iqQ05TvOT zDIx;WlpYlY>Ag#DQlnJq3eu|u5s)sucd3ysEm05q5PJS6yzjgJwZ6T_ zSl`-5`(T|o&Jw;C5ozO|ZCdBPyJ`dGPa0Xc9Uw2Hq@QgtC4V3USFGFUrCRVbFq_V` zAiZ{jWyq$VBH!i~c1j16mYSk0GJ2hc-6_TD^ZtToik_BtBXDV+cKG$Zyl%N?7E+V2 zaUx7rbe)_E!o)is#4umveLG3+4Sh`Cs-0h0`*K0B#b~*^0o4rG_YvB6Tfjn+`~q+R zukoLa_etG4FR3ERhUZk~%KH`vD`N>T&*9n*&xB&4GT{YdqnwUo4h~r_ON@H%1|tsq z3gi0rU1&20Cj7)fF3&SL5bC-L>k^raXTR9Vbjo$hq=`u;JKFa#KP3|H=p1aB$`v1Z z$S&NXc}!zvIdHy;i$vV~j}0O^qd>M$+8W`iGBtX#1@#hAxC!a|LU?u?!5UEgX3U&K2$v>}maKGjpfp zHmi&{)^1=bP4}1N4{?&O5iH|+XJ4^5=%OfRZYf2Ukmw|M2OXRLao(ELcUBG|YsoFv zxi(eXFZP4r{|^;6ZtiN*oVh=Y7}Rs;-f3=B87RH?;U^$5`L1zCOzMMN+vTr@33n{Fk@uJ6Ppk?T;sQLb#2y;pkCP^`MqsNgs{5_z; z8U#ErPpiuB&%0I<7VjT1tiXcgn=b-i7cf*X@vdwB$<+kB6ae*7o`A1Zf6^|j##*m2 zW({TftT5u4t~lq};$U?2_wP@P4I)^5SJ?pkF}h|Ck4z=-8q~PoJ`~0)?s{o1{Is6t ze$eQ;m#8Q}d;%H_`LtUg8LKHO8cRPCJ1(Go2^*WDt#YrT?@vo?l zUfHQ?nLjU+drh{A>`7BT4Uj?ijFm`hE}E-hng+)C>=G=1{CYI`$`AP9NBo?J8`XVR z#Riz&ho1aqjfDUf`3J{9`2dLPLlETz7s%| z4<$<;Eye#5{?PUplRl*od}@{pUfKju78a0n&{~#ijb^*-Ytq$;-m5GU$x;B^7ck@* z?ekL>z~VMn;8#nL&jIec8AT5A9+ie4tPO>Fv%S44ai!sT^pN*YX7J_fPM7z^V|l_3 zNr-M!1#h}vA~7yd`*C_i>9=3=Hps!Dr0R0=Y{xC~WKF5lZTSVnetHOidn!fVV%*RN z7~CF;uMm;7`fH{R_m_~Sk}M>tQ-B+-p4>`U2M#p zC$cYJmH)J$q73#kB&PLWHtHXVTLC6Nn5Se--LET2G z6QTOYR-aAT)d@t#Y^V>cm|Gv&rVH=9w>~}!?soRX-7L}J3XV^>hzM3X=igake-XMd z3X?{!qL=&>&?AEBTdvf+)h`>jrEoK>jB>X@L`V?%0Ff`&Q|AMx~8l8^tpZkhi{ax2%J7OR~X3C_|Bd=hD0+^JOZ}85ToR`BJ}CGUqR2$ zUjpm8EquA(vCBCxEJUIm z)9%0mUw2}IY{Q6I-_quq68*ICV*Zm$2G`d**i8E{nB8PBAc2{K)`MyE~dx!^FaGVd5p47^dd&>M(z$|2;$tb&wmk+R;)mnU3idB^IR|TF)l=j)p z|H;7;arBXr)rD%)m~6G@9cpW)iI3%Qyh*a2?J~vCk>W=wpfT``l{Yk`CnlK+13<-v@)OODMyc%tY!(+{yM!35P=&Wcn)+@!usU& z^LUOtiNo~w%6ZY$J#`bAJ_Lgq^(wqF)vDAkz@0JueQ+jFljHa1X3|BeCjzn<=N(3n z{t8ln(R=J>pz!dNWACD!f@j?8wre6(6f65jv;06=vAwA1>z0#j-MRyiM=uBSRZRvD zHmAaZZ~{Q3U#s*Q;Fh2rU%ceIq{=^vr&BT>bYUuy_U<+tz(uiT|oNaHP)UX z;Zb#{Pp`+u&Of^VjH8J=C3oSUH>nG}6Oj@vGRJr4uLV5Csu;K1mofC4`miQl0|2y4 z;_YY(f@aaDNB2Vlry;(#&R$uU{fua)eFX-ZiCORNRqF7miPP%sUYIOhVj2VGYRH-` z<=SyGumx$mFn}^FSwaZ)I!s?cj9muslRr5=*c?KN4(?vrQUko=n z|2*PNX$-b|(;mEQ3kk#n8@9g^2jM z$IK!`GrU0F^HNjSN!%(7u{D9yzEJ7248{1Q(s$S^Bk%2XUB}R>d*g@c#79_7?G$=t zKf{1K*Zvt$&fXx~fFle-VOU$6W!=H{r`^5J=kuMNPMJUT^;Gh|&`d)9HX>7DS-I)| zBa|!|P2ppaidBC8Nm{Y}ik@~1j6IW;i~bYJ>vFu6?1m{3upNfP7txq!o)!r(N%cQ0 z?GLtiFZ<=30I<^%&xBub1;g%+`ptuj? z^qk4>n+SdBOWeV}plv{BS%4>^J;uz7-qgDyq~4P#0Ba{V)_B-k57Lr+HUMFkbD9&q z2Y^$gF>smzB8FeBlCrS9jru@s7+?6wP&$VRwrpBr>65dJ&O;E6) z$JyGkfPVI##klo#-zK<^FD1q>a|}R0xh19#Dpc!`6(q;3frqlj)_12ClQ)T`V#(=v zxx4TbovZ>WFp5hb1&l8IBdXI4SiQaj9}--rB&|gGmRdi zreypdSpg9rP}kTI9s`{LjbCfhTjF!4qOx~QgvY%Z;zNKYxu)=F!r}b%sN2PP zXzS4F$lG6*In=F-cS-0S5xy-0rSZ)f;BM^wa|NI*N7;_#)mz+=R3b&WmTXQhHaKa4 zve9l4RP|mp^nMIU!RLSVhCiiNT%Q(1iVr2cSZhJKzvCbfL81@&A_^GZ>T!o zni!wB;q5M|?313AnV?LmrNi*Jri4{a9$nw^+|MPDomq^_&{mna-%sB1`*-^B`13Zs z5#P&OpQy(4S~N`vA}*gjEBkd?#@KWZ`~{2(zvJ85>yg?Z20)@B#mr^i!^jE@HWXO5 zYmprYT(f{#U(}i8X5JZ#HU<5&{Mt+1yDWgn!+9(H?P$&ul^a)sq|-HmKEWaaTC2~y z_T*%e67y&q*rj4tuJL}4kpr`K{xt~+{5h*0EIH6;oTi3L*JnsCylbQ9pi93@D%(PH z=;fM`Z84Ebimao!4hUVEF=#d!(v`NncfWA!?+_-ImAi8l^V<@xC#Zupjtd3t8q;*| z5)hz`H_Y>tu4#O?bLq$gZe5o97(&i3m9*g3VkaAr27v7Ri8w&*P{Z5y@tWS`2+c0# zG+n>zRGkL(lWtGrPrXy5D9Jv00~y$hwkz-NDIL80wfBv@xpuoXgrc@gnZ@TrA_!ao zX!S43VBZSIS>BWuLcqOd`uY7Lmma{sIgSZB4CaTqbuU^`8a%|H#-i!@pWV?)IK+!p zn{~PsEI!>qUrzZrUVkB5N&Jsoe>T#mvPe2m)RhiU=?o1HUd=ra-}?T(P&1>%o8Zf4 zGmztcou#}7!ipdE8u>#gWa|5Cj@KtEa}~jXz_7(5&y8p06KwJy`vF}*t-BOAAYGX9 zTv_7=Wb^u_t3Q^#sqdh3Ti-gfd1~os&#^K98+#joRk+Pj!9ud^TB`tyXGE`Ps?iI4 zam|O}SzJD`NwZN~2pHJ(H~*C9T_ELF0@Pt#dqOfpeiyO%@l`J(3?n0B>Z*2~eXZ$J zdw?;e76-s??(ID(kbgf(!^ur>D9~uESHHSScpVs25&iZ4k1ppWKO3-25Kzf3cdVkJ7RSlbHY*VarS%?SmSSDnz8Y)kHqC#US@r%+#4gJ!Gl|) z6swzIQw=Tvv5Pq4j~w09W}u@5u5rO_WRUoe=$~}5zEn3BsKT;T(JD^g3BOEh#_d9l zM2#A~o|kEt#BH_IvqO0WX%+Pq3abFzjaE&)6PG@G!_C!ot7UUjrBx(wL$$Zt9eQu6 zK{e3lq5V%6#sk?didFXnrBd9tcVCh7gvVY=cHN&46dNr;+2QN6Zd&?YMc$_)C~WNU zW4^}u`n{PCtA(wj8AlJdcL;T$T@i^6v1fWo44>u}C8c`(UsZ4Pb|s~b#4l~8VVh~S zZxfsesbLQG5X2w_2?=h=txvH`b;vHib>eu$Ff7!pd_DTxqp}2k5I8hgittv|ol69y zGM5P7vTu(@r|iOB6T|8;KdP$TxYS#;>&3;j+zj-OC73^2;LPs7d#&sJO5$)GA+P9> z$FHiGrd6!3qg%?US}@Fca+`j0a=7A`S~lsOY-JhNub0JofBvHBN4d1Nul82#%O?Rc zMzQL$eZ>fRRn%ztroTYp&^xkfpgDTR5_I~y+Pb(S@gY*nI)3yR)b}sRe z?SPKQejAIxmasbKYpX|k}MqT+L_);nWe7+w{*_z zeBses%6@;n3Y%jzoqGlKP?GidMY6Fa*=p?*#F0TT;6H80>%sOAR^D3YsZl32?;w*e z6W?Di#xU@=wzH?60kar3*c=WapeyT~wdFb_`qRcBq)UYPNK_0a`;g_xag@BV;WQBI zUMqbSJWYgY%sk+mge5P}`l0F0V8+uQf(HCkT}ok@4*~LYC~`k8IgqXA$TGRkK>54Z zWLX}ySRS@xcLefsE=Lur0Ejz=M7}o$%p<<< zCmFORz0ygC3QsqBl$vXRlU5z+kPqLi_EW)1;Fu)y&o8HkN1)m;6_>i0vBIoD7tktO8V!K20a7Ku0+O2W>zGuHHMn{nmm~4)*k5K9qHGkqibZhWLOQ zSJ|5+PG?`{Vl_9TjLA3&AV*LIwzQoM1HxmipeZiaQ!}cJlTiYDH|tJXMj-*y%=28} zx`jNGxD7pXzToMkb7jNCJZ!18!6&0T%*7%^7-M>RJItFlp48w#J*7;u{b|F2ft7e( z!$$=({7s*?^!u5M`__~Avc<(WuK67C3s}4yHO^*L#PBU(rTf-v#mMPq4lNgN`zEIu z9V&YZildV^>IqTn6xYgc-R(-~bE(el=g5wJw74XDiI6Jz645)==Uu60jG_8wBP1!qe8>g~B-4E;;Hlgm!pw#-q zf;TubWha1cY%abPx6b`Ol-cu{O6Ke*u0LtwUKKS!DVU!@UjYLKyhe*W?EWOP9m}TB zXNo7OEOflIY*S$;Xa2d}FHe~OOoKdQKi}3m@8HND8z8G4M<_fV6n6L-vS)L-XvW9K zXYkvLHmbMBN#HUk^CuW+m|UIb__AD(V&#r4xacjS?d@Pwcwig`SVqE z{e>E3W%5zN-@sU2c+~BUbnGj(f-PiDC`+6;x_Rggk611=P*e_WN^vivRV$;2I9>BW z*i1~q51RXw;6InB`*Kmr zeK2?(rCYs9dMp+bZj6C1uE0~oK&KIpj-h1-L&xV|&*fHtCV|0?`Sv+eHs@BxttplF z%0C>TAe~lQ3BQEk^9G1#x0W7-f=skUoZx2KUHE4%Q8%Rc6n`cIriAyhuYOdTvoVk% zzuU7G$J5_3kW3wyV2Ony*!xjkua#0Mv&ydb=q^DiO4J?HFbr%@o1PK+GC$w1Iibj% z_j1W(r@T(ya`Vxdg4&{}RGr-loH&|J0^^8yq{z}@t%Yk3lRKw$IpoeaD@yUz|Z2}xI}`_Ylgff9tiJ?$F( zMT)OtfnP!x1U^#ib8QE-q#usJmpyy?igL%P}) zatW9E>7Bacf-g@)F00)Y_Sf;-PX z7T5dkJD>_az8Wat@hXJ09Hg3gPT2JFk75Z4SLP!ad4b7}cf$~(8nk=hCP!TKfYlI8 z0035Agbig_MFP9ps3-T~nz6mb^Ol}=SwO%t@y-s23fn?CF*!^6XH}lQ@%E{KYB1v8 z=8+1~c7SZEMu#QoFFuuFu=)VFk{oL@0PHO&J1z} z!`vA9Lr#tO$2)ik7pJCSvW^(L|kd9ns z9LYu1h0h3o7htpZve_k(;y5Fn`n`ltJYce@T?6Z?Q=rRVMo%{gffcFX;0A|`eHd1m zZbnY%db|=u!=8$a`$bkDQ~!Aopv9|b(~ouC^ev2}p>7;CqFkgH8QavaF&z`et8G=7 z=qBp4&AD9qjO4ase1XfVKPK_Lw#o?{z85Zke2B~nl3V}!HIQw&&^5je=vTmD)SF_y zlB2ooq7(Zb(!fAO)Ceb7(U9CeZ)$BX)yK}7#IB@PZw-P zR$SWGqbB1T&QS;qj0WTgH_iZrCCBJkN~M2lg^srE@_1t{s?P5L!J^eulUeT1%y$J` z{l;O^WFNq+At$8BulmD>F?+>Q%D|getL1nE^vp{Xq4rhAK!x)F>mj#ZR8*9f8-p1! z0$(d;fKs+F2bvcUVb}sbfI&P20V+qU19^E{@>7M-VLDx9vQ_!xS5y7iJobs(0$OCN zGmR8oW5Q2PL^o@OnQ}PeADF^10I04)PG5PO zZeA7=5~8RbVdQ#^eq`BceViG*^Wg6ON${U17$NO5etqfp{kXb)A>FeweVpklDftmp zyLq?1Owdle>29?rgRo&lc_jBqZ+(5}=jA$)`UxB9P_m!$ydRx^al~AAv~=y@a2ZR; zAVl?z+)P)uHO$;^4Q4d@J}!|NaHs*?FcMAt4i`t`-b6N#kuH7i(O0>6_o3IQ2 H zKebj~JB(IKnQ zCBL%AqHbynlAp{ZmH!`n1vuR=_=eybKGz31}Q_o<3Q6?uHxW4B|}Fv zE81|TdyI0cZR%;J#6+zI|BFofudFX${xtVS9JBSCd(9+dWn^Ta{Yh`APTy2_`g-C$ z_m+fxOJ4f(fJkhH@akE+Nvq-tZ&gs)-Nfe){nOX?+6AvP-czb-R(9?WGUn_ z?JdYLts6A(2fbs8bWmU*pmZUx9Rtxf?{qL#rs}ag$47QmxZhX&S=UQ$AcBZmi7|jq z(|K4n-Mv?o4cP$=I8iEx^zF&|fWZIdUMqzQI-CzZLXANvC%nEt zdB`GN&JKL;?*O*5)t#3?+d%#Btndj~rfID(?X!=fX6?b

8_`K~%s?R>r`VBCOE!AHn_t`jprQlgN&01scv< zCo_W!z{Z2`bML&L=3Xaar@=csHDAJu!pgRMo9~XE2EBl-ZxQ-RJ{AjBIgGHH%vwz< z-lv$G2N?zIj~Kn~dC3{Hd{;$;Sqwenzy$mnfJTXQr_Uv@FNIZA27)bWKLr{lUJT(| zXi^#Y%MjuGGj*VMu-eyZz~LQB7Z-VF*itB}imKnJzsV5it?tn8_uIi10c=R^Bv2=RXe&3Y3xhjQ&cG z{lqf^ns?+2{U1Yvd(uQUIckRvUOjl4Hl}6xXK*vHr#qjx$!45ns=S9uvsAFU+2KjVspbi^?m4+N(`%Hdo9BI`{nQ zhqbegMjMFLA2lVs0$bc{Xe&w9uY70bqiZ0VtH-6PN|RFgNZI1px5#NC<(_Nlk)}@Q zhB2jc9ZKr{eGRck>WSB;_Lfdm*AYFg#GM~2`o9@gHT#>-FaB`&!-@wh{$ohkW`4Up z_1Jgn6!L3-v{5tRn?;>7=)08D@V5$d$J(QD|G0srqW}rp+xdQ!1p9Fje=;Ra0cicM zMw)N}u>L|-H?DC_?_ET=E~k)P?-%E(I#pnDUO`bwA6sx+q)-ib{#A#rI4=4jpv!^3 zC1K&_;o(tys4jW?2A3Q)S86PPGOOn)MTQS&cK_s|V!FPiu zMLgC96)}&sR0WSsfPp;YDxiKW2E(AYU;Q9Zgl6)p`mItB+Nc6We23$~mu0hWAlwaZ z=b(Zt*RL`+bI}Bag@tOsfE}lRKo4$i;pSVGCTL%&g&^o4U{(R0(edK0fxq0Hm(Y&S z_aw{1_dhG*Y)AWKpO%cZWkq~&?G*R05zLOZ9hYY)P%okn3E5H~UFz0=IO{EJN}2=- zJQKxxUq86H>wd7SgjYBtiN3qJUh1Cbggn8wTrMhfBORo)>`>g1TaxW_hTpLzT0G+vh0r-(tJZ$!FOZrp;Qc$#0tOjBn_00hRK1dqDic zuDbV(1$DvHPx5Cn7vVhFxW2HFrA}D2&A|Fhui)~WXF?g|ibB=vsBqn&tnh6}`-!j5 z`TDKF8ke6t4AC>-=)O2?(1g8Wzt@&N#k}j2+7dc@3nY3x0i@**l`rHcWi|@6<#9XX z8L=K7=0949e#+i2&Q^8(KUdRtF8u|b{CM$)3V+7lQNcckmREsGuT^5?@Zm#7n{4jn zomll}ii=n>O*(fn{X2&8;$399<{l%Gxb1{8e~_)33acdIPP7`iS}mHzj0{UxH?Cn- z;}WmMZ>-K#&XpUz+c#huzSlic1ot3v7ckKbXZbB*m_(N3NVcD<;Zx;iRWWQRytcCu z31@b7O)^&ZQa?*ob;CA{*d);;IqI}n)bX$#$~{Jc&r<9oI?xg#0t&+x+Cr5uMl}_f zlX->pR&Nrfn=aqJg)@vZ*o%BM^xD&;Gt;|pJRa<&7{yt-HUzTz6&?e*E}PQuNZAue zZUp6V1#!NwbGah|8DA71H@05cZ~f>prNk^U@}(y>u{Leb`fOBUpkOogu6p;Z&nHDW zd6s{=kiT5vtqhHfOmwhR42kgjcYx7r_u>&yt1I~SlJ?>od#47VCc`No7=K8-x%M!xJklnQ)ms);506Sv?Uw)M;?%BIrfq8y!M#pr8NG;XXLn=mKCyCsGT*)%_}#2&Vc3>UxDcj_P5hFCV%zaRSo_UN^hAYHw|rF!y^V>^$gLmCIU zf4=$iM5V_eR+?gaZQOkKCrDhNX-iaE+wYX|`_>5MfAHTRfjy zQsQI79V$GE^^6LSVqJVQRQdq0lika|i2oWd5r_~TN{KvN0;Wx9 zm(w0>OGFRpN?-!>e3C&l>V%tlxf$@02ZWFl-?c_A!HpKHms$ph7mga&RCgyMAOHH2 zl#+wnA|(hgzF`;iPYCNmphn-JksYHK_#vXujT*OOLhf7#Q)mmm`c`AUrQx(6zx%Qvk@)9B0 zjSIV)fB)(FMeNO{BG}t*r@fOc!ufaX{E7!JGK@QjFaE0w@HZFh-=P-h=b8Xw$1o8`Y+)A$q_o;!inIsDmHvBi3%H#(EG}R2;@JPOhrdW3s%^Fldkyd&w|t0yz3&EKub*>BSFe%j zUOWxpy#8QqyTsQQaCI;ffqxbg$^VY7!Dx#0KMw^jKJFDqFxs6%Uh3jmi4uT33eHXM@uup( zrxthcuQrYU9e#vfM0V`_i-<1#>k?Xcv;N5b-#Z&h!ea&1L6-mRNj1s|}RECq0;~=W`1TGddz~sqEQC=8b|NtykOc zx{nDK>oplZy%OtW278gWkVPM_oL@Jv>$%w-M;3n`JK{K2DvID{WNgY;iga**u6MJM zY=TmYr>d;i$c<}#oqsKK7bh67*5+HxOzIZvVnjcW6QpF`#itpLi!@`wrEP+2tXW_8 zFYa~s7{Sr;8GaT!C8kK?#x)L<4Ap5+6q^=#4j&sE8$a|}zgik~0Lu{HWU7TC$bmg_ zHt&yTau^NEP1=Q4l15!~0vz(yGsGSjc$TL=h^FiN@gs%`sHv%LI~@iCxmP^?OMHA@ zMXj*zC;_B3&3vIf`dL zr~hsP{)-S2>9Bi?%k6wC^g)$VyF=-$A`+WK-;<``d_yaTUX~KKAa4Bax$5xxz1u-w zFq`gA6$lRdUubo$^i1W}jfHr8`Mv$lT{G5DAWgaK1YsJgT0yggTFw-Q)Dt18`Oecs z53pnp{eRVu8e%ypW2<|g@R;mB6c{pt9i7`5fAcQ3=6%s<>@Zf$WtTNzG>M*?v{rmt zJH8v56_@~gr4vurOBkpdS^9M}*O`4-*ga|4goNCauHq104}EqYb;04$+Ie?MiV~i!sOrrCe92aMQL>rP5Z%>C zsp!psJu~OJcMpEe2+TQ>@VoJex^X6|q{txQhSn1$-`hT~6!gG8XO~k%9iaxEqrriL zXWK5+sFbGybwP9U$TE#hk&o9KW@(Rvd*JZ+u=pZ4R{ETmNyMqDGiRdK5JsW7=4JMW zfnr0+VY!+b(N!i&{FG@JB7kea_}jgX9fj}KYxktCxt)s^sW#P>hg)+bz>_XPA^t}s z0q8^MJI3p&Xwzovl*Zn3SwM$$wMj2AulH#8!RL}R zK#9xAy95mCZ~4&Qg-iCeK3(YIfuCHO-urB)bZXFvw+-Ka&o%_Cch?`ImZGKa@x2`< zoY;0B3uY(Gr|B6Gtq!Cm|xcYwh~lmOks9+~j0i`!lh(oG>Ou+U4%D0SoeLu%0sx zITS^GFGq#O^+694t(_2ShFqL5wf%0lO}fhk_VG$=N5EzE8hKZ)q2P2$>CAftq_G#yf|7~@Z!C%;#OmGuLm&Nq7P!Z#=Ue8>j|uM@T`-- zcKcPm*>M|d7zR{=$SfXn%8ezHPTH6Ar*O#DPP}TD>Mq90D{X&9T1yY? zpvn{1qA$n_IO^^^RAhq=QE!*{o--SqZ;^WrL(WbLegF1&bFZRtL2jZx;{>kHwJDx) zckU$p(KD~VT7p1T>bXq2@~_-2yxGW1yLtP?OuH9%1ABJxEnptwzZJJBQY0KO&lrz- z`a*U+lyV7=8LL;iN+Cfv2eGo|*!a9gR{nj|SZ!`5$fU~5_UosHptHyAB}K#z*lC~l z!)387gT25$^SS2p!7_XK0o;{pPn#}7u1{tS4w0Xkq*d#jXI|8cf1f&c%PB#by}OeQ zE+ty0lsbMVS*DpWsfOjaUY85eN|Q3`I1}v>Y&s};8^YfIF+yOSZ+`Wyx!CW?TBfBe z`)a2b#hIlMf=}AE_%aiEOzXP>+A`}-`khnjop@WKH*<(~uNAmCl<*r+(|BM`bflwd zQD?~Ay?nh;PqwC4)N?_y`%^?n9pjAYFGE4I(P&huonX%_g?VUot`xMX79r2gwLOutK^tDtZ>P+ zGyyOzbmpj$&Bohr3&sFa(x(}TzSf4_Oli#~T}=`+<V34-;_lJ!VLyhT3+6UeyKsfIf%Y+^jm zDl+z1Jlb1wcm1G%YuP?MciWadQFdz7ArrRgRq3GKOx2p5C!LTp0a@h z_w#1XWgjE$qeSZ|p6y(_=sZ8;J5P-dKeE;p%cG@^O0*f;sr!>U8#GT)l+x#eb+{_^ z-qY0y{HP;@54o#*Uzefvk zsw}brqR3Dh4;r=~l3F-m`X9;Y`^&vg=!-H{9bRW*8ec2W@$oZaYa7KA5Oe7h;- zjd-rAUTX0P8Iu#O+HsIUBX~Gfn7fa7j3FxxY!9bfsto<8QlQ4ewkdsfYI<}QO=f6; zzTvBekm~&KrOElJE=aJs)Z+R5h;!W^Z0mfN+RYaL%Jtm!wtv>yLT3;JreIR{(r|ia zu=nysFek?c^P2BOz~r(Zb{h93AhUmWV0s-G+3vyvHljAcso^U&viJlA$>JPb`k9wOCK63(Oo~48_ za%g5A$C;h2opJ1H+nK$DI5z|#2RAB9?9B3Q7v7mD+NvEh-Vp?VA}?eIBys+Pyw6OB z2xGAy;5a1?%I6R0Wa{W?huaI-c<-FnyEDTi50c~#u5Rb zD)emK>ZzBo67peDX94f7vhI{8Oq~6eUn&#QBrn&#ZaTfke>KA|GbH6u^dd(m{(Ct( z(%wx~;%RqJPr^?x3G0Xn&2{%0jOhGo@=25-8=mWK6a84-yTLaT{bYPgjFj+rYRgCA zp^&Gsm9O`BIW@gX|6Ix5x`#$;8_`Ynj0K|%495}e5W z`dm*fACkm7!Uxa8B3LZA77nUGPyq^h{>pBAG#3GT+5ht0v84yF42zrn0hA4!x9qiH zF3n9jnIsoD(<402WdEA%NuO5dftapgxkVm(wO~ynaHO*nM#fnvr9f=;=i}F9HrVr% zdLyv2Xs41cu-(}PZWJcP0PMG&oQ<{^06q6u?N_>zKC9(0QbMh3ILd^3nkC@cydPFO zb$o5TE2G}R-023s-WNv)wVwO-)*4;YADEW@7T8B?AOp^Xg`|VvbR%1qZFw(R0R)ze zBmez|SFYHU^fV3P{wm>>rq4K(VdYhXIWweSixWgex*X}Z3`Aqr* zPC>NfB&w}dyS%sQv)X@+^AqsgEWt-|C zDbvP{v7dtO70Cx;=m8SfBwhnv@d%1?6lr8VV*hIfE(_k5q{%!vIn{0)W+VgiK?yVZ zBJ@TDbxZZ*I$3Y)rq2v?$X_@rFm41m>Jy^#Q^#HQKJ8AP$3=}}(+a!N(bLs?KA+m8kV_4DPmc21mkzo3|7zx`u zM?UoFe&Z-@cOb2LbSfX-(pNuwreAn233qU{(7r!#9?q)(>O;ugxT_J5mA*~=Fv?Os z=S2#@h$Jwsw^%A4LVpk4R+mAOAVdzZF#Vc2bQ!^c_E^K^;eD5Uo1HkDT}-vBiDD#^ z$d@UX^Jw#OPQYV}|Mpl)AiGL@`KlL3^c~5lHWWdP54>W^(|cE!sXqwd#oQ^WJO2*M zCtTRYwR3+h0;1dnf_<(&7%B6)!RX&+^8m|KD%VWtdxqXEoMQQwhX4n5iB^G;2cB42 zs(#!#lJf{t9XpzgZ((YU%(w+iA60j*(gwcSoHV{-{;JUY;P(v>yu`q)rBqOliw&#uL?kAVQMjIjH7SH#m4_vC~5~Ny`9ygpE zFb%7_C46SI$rOXh(DXpsA3AlE`RuNvSWQN1Tm$B|(#n%MD`0CDe2}k%$GgV@HLnI} ze74%XHmuH@-8B>1u8ZCu-R8kw0)jEgUA8+wK|Ou%VPfzsCDb+5*Z6Ed2y7^=kbwYc zBac+n4Grt2bD`u#N!yU{gfHl;2oMyMPC8WyaG{of&NH73aLX@W8Hh2{d(%R|u}-(K zt2pBE?W)KB7QXHLG+uq-MV!z($G*(aF2o~ z^BFQEcwIw?2 z$)AQj`Ywv~Y1hQ<-)JtLY($}j#CO!}y`GAt*`{;8T8EKu_50mliiHYv@_Yxg17~7R zy+s}hd38Zx(MEEowQd-lP&JgHz=HEF-9Cj+V^U~b=1pEPIfQgj-4Nx$f zTCeRIRJ}M;@_RYyHS6D2Kn7^ay?(0|1I%dx?`%tnyMNZn8|54QC+*mBi)u}lzzs#m zm}AAd7uc&&-sYknll(rYO|5d@4&_@9x#q<+z`e8xWd2WI2_ReSXykY@rZDJl4F zt|#>|xuR0^-E2fgJO`gako0F3K0v@zf3o5G{2t?*`AdGLkS|HWvy2mlQ`FJi=40-r zZtm`i5!1*XQzxT=)=%Vge3JuM5{p+K-=G?^%qxsOKa+ztbd@QJP6I<0;^WIHer&n{40Ts+upycoldY|D>%N6KU( z5rn#Cq;h6SF3Ryn)ld{a*YKH4iPNDDF1DX4X(Jg3+j5;g?AqNa0-LU&NY}Af8-CK$ zh@a`}u=gh&Gzgl>K)}h9*}ZA7b75Jo|8Ci*K)G zQGmLtzX9jzlVhh}Ub|E1poxijQ1b+_j$sYx6fMwJb}O}u7@6s!7%Kw4{YrRb@yQ}4 zK0h>knibl7bZ~DMTQgyJ@RP(Vmcl^VmZu_k*5`9Vhbx;^mT3v>(wmtChLR*wy_u32 z^aDbRJ-h_QYG~QmeLG+#w^!p4a2aNiCJ5rmeIfNTP#U1MfSY8Q#MPz4AyDcsQxTUP5o}mTdF{RYG?zt}YY7dS5=R*EW zed%PMqy1>>@^G1(_VxoMDbwEjcj!W(x#ndd$s*UGh{@5b9{Cpa?L1Brn_m_S#s-ud z4Qh8`hrVBcDXu4uEdeKn0LpWjVg%q8ekrFGT|v<=ux!X|qQM8F(!3Voq9bv|GB(@4#VE@YZt^Q{q3P20S#P%qc*dMIS) zRdFWwrP8zrz+K!<=1TLS$CIJ%JgPtyi>>oxsf60*j){9X@G+#jH|m!f8n`C@cIyk! z>P{@5+Jsho$C#~6+k*kIVL~~t?)glV%28qD!E2y-k6FsMPN{zx#%yfy!}*2cKf3^u z5M_Z(Pr&1;QdUvT2P>V1L-^wMTa$|}jG25z#*Q8z^I#$T@2yS;ZVNtv1mz^HSw719 zo(?3DZZiL1&j>W?-V7xO8f?Kz9=IJa+4}%?&cgvcm4%o@1io$1JE^M*=+s1o+((Y7 zJZ0v{29Y7`kFRR&1k5XpM=1gtlL)G1E{$NoN)*)9@Ett$md{5PQ>TU-fEK{L{t=W0 zUoGzJR(TLL88J8>UMN93( z30xfk@svHQHN0E;Y3D9Y&Xy5O;nk2wl1c(^!sg6Pw(MT-gJQqU)9nPT)Un)M&C||q zDeQ*c(zbllMXNKs#>~qHp7kyyzbn@j3qE$kI2-*>CsY`* zNY17-W!7fNUtoM78LgPQLn*cEuX63IDG#~bG}x@$#Pe;eT-SGCb3iy_Km8a@2GOe^A)uA%Ua z1Fr1DJnqEk=4>_cg>Z$kAe(j7`Z=4-O8 zjNlsjXO&gBIy>@thL&fr()DiXgx8emCNeAKd-g2F`9wsFq_pJ%T7HGgumU^0#gDv} zJF|k~A<9v8)uFQXzCk@;AJda28k?Y!UlYCq)a|Yf^W??P za%7%5JNca_c(p6owXAf%)ipYbCRe4)w~c(G$7N{()IykBBl)stk_#9IgHp8?-`es$ zBKCa>wU z*0t`X^7!J}ffktX_|;{M>ve&$bn*HhNJBs_L^51(60dfvB-=?5J(SW3v`23T z^vdhu1drB`($<@e`-$2XxUIEmXa?rVE)~j=Ph{FZ%fK85^?tr8FzTBN9csXBd-#K& zP}f-jjaWOof{*GAd`8sys}sf~KQF}ql3*;CnB{-j2`_^jF!L`Keejj`pa=rTF8%p< zVh*B(Z(XknVDv2-y3Rab#OSB8%pv^n>H{?&T%fDG|YG}At3&02q zx7JGK_DRL!L~&M^4A1VJLDVV-#ir}Z#?kI16YJ!_Dv9|cnMThw>k+@zoh0i>UmsZz z0F$>27n{RPC~7Z4)7g%Vn52`U|= z_ZpQ>fJjMzgpm6M_Zj!>z0bI3{qDH`-TjAgFbH4XeBXTMeC9KsIo}zzQ@afbVD4x4 zr9GzH@iHs-q?S(7x4S$3>M%Rbm2V$cD7a{lGY9V-9GyChq;g|I1y=#=%NEay{_)1w zrcT$1l%wCAtlXhrE3JR$=*DWGow(8ETN%hl2nC=TXKIlyEecO5y>4RN7=Sw5@41tD z^Ra8z&uu~!5n0Db*Pf=lj7y*X6Xb$Y34m?0w^?LwNsyEFysnWAXFURy?7qc4JFK)x zP4WSVMPb(U7n6c$eFv{Ch$o$gscAqb>%^{E4;2!Z1fRqY?O()i*V!^{Wkiogtq-ys zGtTPQ0Y*~oP16wX_a^>ujXRuO$=gcf>Xls!-u(&S%q8=yc(G%Vq*m(xL5A!yxoxuD zI$SJ>J5=jW50)R&24*>NLy1xr=#V_eul3F_JN1{b4zXRn_Y0y6&@N_{tAO_u zuW}ZzXn_8Vlefv)XeQ4&OE|7QD4C1X;1U6fisP2htR+o6>;Cs-#E<_DWEf5? zr`-Qp{oVbG_DmD|xY8DP%+7bnD?ZWE;MTdhXQWF9&QtCs>)Seg261F;=c`x+ zZiFZN+cSV}+!tNAJTm9k;y9EYlwhu!)5HNNe7hM#{|Jc2U5e;PIn}n;=Gz%#01CRY z)yxeq{sA%+F2l`~`;rP9+xOJlR3Fm<#j4W}JB3VW01BfG+Z5Zb*#A(vPgHz{?r9(M zo;d!%+H2upS3|5f}v=$)HzO zx6o7=Alr6BrZ5~|YOe;5ow7J{S*~7*G=eHq?E9#wZyU6sS^~%f!hEKCDTuM=qk!te zeSmT!9s~G(Mbty?664oI4ddKkRIagJoB(%2)x!+UD_+gY*7*-oA2YjLTgzdXE6cu` zf4|suF{%3bQJjEnvPsF9IfUV%__^JO;uk9Zdy?C(`lxT_O{L3Da-j)yf6HvLDJb2f z0I1MZ>PP!&X=^8|jt1QtGn`wNlOgdxNaM#r?|c=CS)L(xW4SxOS?rV&QTlhZiG&C) z#N2wIvs1nTWe7}V*M+g+Esb5WCyA*)CG<1qpN4w#&Aczjgc>D_SUNdk#RsZAC$0JQ zGR0(N8(T`k+@bL6B&<5Arm9cmFXs(QDMw5|>Af}esi(yC$|KRl^mJhg%JGB8*sN+< zLW95S^fdfgET9Yk`~DP&0@6_%vw{wa*R6JF#KQy%+PtxmoLrJ?&_ZMkR0M<*LCdHgrWV+- z-o-FCvDz^i&nb~T#b57g%1$NgE8NK)_<#l^1P6~t%uD)!HcYW&mZF1L1;{vp)>6ke zT<7gHkvX$LAfy|JaPHQ15(EY-5u(cF>lj%b+3{e?M|gVt-^GGXGAlaoHvHnCH2 zYdH4cgb`zL-PS5==rGDuiY)(qwPQ1;Ai}0V0ZK2+!{GMn2p0jiCL=s<3NHehbhEPk zy~vDt(#v_09cP8_vS>W3ZRmJ$K6yN0t;ey?t#W8LV;^l^rCOf=n26RZeC5^8iv5a- z^~-`^f&V*Bx;Y+q#t+7&|838Vzo`|K!yU4&Zna(-2{b$c0xbINJ2g^NSq zV?tq%7WW?BBc0HGin9*80Pc=q37Pk2)e&Zvt-TX9H~w9ot5V(1azBuoJ6Kxc-*2*r z+6wiff7Mbs#sj**X^@ygIaD)adt!fK#|w}f@LM*PmD9okZw%%boy(%%{p+pbVUxR| zT@Z>R&^=H#59dABaJ2K9sb1X3E)ey)YHV7lBCryTXZ3K;w-EPg-nAzS~ zeH`LHIpR{aP>NI@En(7neh?Q)Bo#2-UsKlzS1Q`OHP!$mI}H6FWX;Sx`joX_6IxLj0DnZWf=;<1=} zX&pGIUZQw2agb}ATJoos*#Tq=blpPcKBio?c!Q%)#({y#;=;bI;#mlzBRm!| zB7t~+;HZhDY7+`S+<5)3!e6Qw_seg2G$z)a{Mq5_!s@k_4+{Qi0L-xp0%k?TTh zq~}@I6!VwN7>c?!c})!0entW8^p&Ugm0e2L^|gy{=os@7VwKqi*^T`kqgD{-jcVmV z7oXn0Fy?>3;Af{NT|lxFb}TAJ$!n=uH=V@?#ROt;?CR=@(|`p_y@u6hdC%Vm!ttdd z%bLoS8(qH3Zzal%khL?u9sbuEeE{ZF+CzkMiUM9296Thv%cull_;1>j z^`$Q@-n8L5YP&!t5e5Z7aX^Gyv2)2j#9yPsiHoMoFmrZ6wntJWj>@|NxGfdR)pr-` z54V6~g4%&n6^FN)jI8EC&1L-8Ij>^(pRLdban|5bl|`J@tK-&$ICJ{ z#8*~WmurhLMph$UuhpZHr+jnvV3v9D=j!oObv&C`X^}-$z#}Qm?B%0=lFqjJ1ox5G zN!75>wF8+A@zs@-Tsi+4vtnbdq|~aVo;!G^q^%typCcric~#goZ&iK^0v=A~Lm+;V znC3OGD^#QV{1%)EP^m}aZ-y%QsR3za|L>{g+3mJJAfVmh6!OJ*R*(7D^Re-v0)$A@ zyoad~c5c~Oti>0T5tO41lSHCX(m;c@rWOzl{5xRL`+Dly6k!MMIO*M2`6cDXc8$`a zJj^LNnJl~tJ3p=B5fJAQvRRR>g=Z6y$))i6=v#67P%|O!Au4X$BUiTjz%6*!Z;Oec9o^nS!KJ=~&0flY27&3ju8)(3 z^|%trj_(r#rSd^*cizVgc`RRqVi07mKd?j zd`22}6ig>%#q*AniZa|sxM#GQNXQ3sR`h4e{;{*H#!-Ugoc<*47u~w zVx4`1#=FSCuV2-OEc| zMV~wo=dN3Sl=jU)gX4MuL4M95=sKrNXl2O3M(j0E&0yH-jrHY^L6OW!=)$n%*UxQn zRk=`lbvBJsODUkT(lAtKpgM`P(Qz3rR?l@^2ucLve$J&!SF3(OzHCF9$+rydAKh$t z-#Q|6Ucb#XFcwl_d~u{nLPw{?e5y|-nPSd04M(vj{-3k#QvOeBFc!Siy; z_fOX6DS^vrtCfzzrKS10Y=WdH{(VfHca?|@LVIYqSgU7AS4Sbff`wFW)odW`K9jBX zA;`5aX~jmptsH1DX8rZ})qhhJ6ajt4Th>TPNh3g7E1W*k<5piUYuGrZwAPl>5TBXJ zBBM~dE8P!-0Gclukhp3_U$2e@Yw#HxkW!OezfIwcA5un*l~6%d{F&Rbwuhp| z_x4x$PXLJ%?f1cIss_FLJPI|xF)it^HN;%$W>z~=V%0+|HUmU20YM7Fjz0jWw)L%b z&WsL&-0KY*&{%Kt<=j&M3dg(9z)*G=wg2ak%G3R)|L1SCFa9sO<^S~m{HF=-xaaPF z?xOr#659VX!MXe|W$OPl!TqNb^tdzi|9vNDX}NA6piYcVukEhgnC4Sd)Lk1bV)j-5 zCNRF)7B+>O-w;=5sg5$0$&~v+uH4X5*_ml_Y;*?(6vrDG<{G?eh)+zsjCk=FCqh-r zu23vqs#CjSbM8^KDx1+j*7_wKs5tyus^-1_8Yg@va+i@hxAW7tg-q}7Btpd_4?xR> z;bMW`1*&LG_S6?y(Fn#MY7K(_?-5=NY4A6zRFz?)ug}7#lX6-aQ&KkAwU>N4X5CG1 zbkr#FD-id+LjOym-@5yfy`g$Ze1K$e>PO~g;?ssu)RfQ6t+zUSd}!$&XVANNV2~s* z4_2#K55H!Tr`njjMllUQ+omEPMF5G~Wdm3D`u&%~s@)&iy_zX+q?U3T0u#nW(`BO+ zYHUXiZnx{hWc5u;Eu1~Lrgxrd>CN#t^>=k84K^RS6tigKIXHYrvfga-IIf_NKJFt$ ztgG*B@A#+|8q}`V|VY$I)5KP}U8wto# zh()bWG$82C3(<63RJGKi!$WgZDuEK~jU868q)|8NaHR{tCtmMOij`Y=9>a)jw1}a$ zN2B?l(BJ#Qp1a09+du?hauYPjML}y|bWqC~G(x9*-<4O3Dq*Nb~uA zf-^Vl>=IM|SAReYNSKqO=01I||89H6;dt)qnCdRTa;!^;zVsYyLZCcMcW)`L0hP4g z2Hp6A*952f{jWz(HLb%yXGG69COU3rws8Yp^GFV)o+Cr~Ga=q+;oERb3Q(i9@^{aw zVQMiO8T^*Cq$C~~i)t_gYK{x9ZD;xT@>F)8(65+SRBt`AQ2`|$R6mkEf?FW(qDhb8 z8NPFqWZoJ>J}`KizzL)yp%us$Ga=&#@pqSDY?(Gsr#+IB+ zyt(z6JT<+Mv6Gf)eA6q~w%7ZUaQ?Vv%8>xK74O{~)JanXRq7`E^LT84&MOtTFKiCz zs$gH7HfUxI{-Ke=S701(MB)-%bs5DwMTh`%bRUaE;->Rxf3-9p8U&Q2T>Y9Nx7H&K z0dq57k4csAs6E{2U;kA+7LZhIvIrm6KV8I)$nI{XPPf1S`r0c@xsG?Qrr}7BcX5@j zCST}l2HjnXZF%K3S6XsR1P!mC%QfqAZ=m(}6r9#+Et>q)TQfL@=P$YpW=Ghx`@^DN zCK&>TChYhb9JHr?zzNGhS%tDm>W!7n{uP&YVfPbsuMu+VmR-&ri7kk?3K6Mouw*17 z7$e@(-JL9c{|NqiN1mQNfAbdiF!}pMs6`#qI3(`Fdsc1g$c>SPY%gl9OI%Lg@?S(6 zzq^_-m!3vS;~(wT9)L822eERaut&4CioB2E-&2UIt#QJt`=T@H(s3T=^<`~whTS+ z$j;3(Ng5_?FzT{r`2#JHxn}(>>x$U70tqI&k2n=U;MtV+X$X5#{;=Td^63*4n9(@(p~-K!LP1D}xp06CT}B^q80 zZ+6JpSuT!k$A>Vd(J68>|xjNoe=;-*Pa)SAwV4#UhFFwitm!$8``314MD4!Ox+w6v%F;w z4%?M219w8Ye=$CtWz~DVq4{6RGL1Bk?PDp~zl_Pf=w?w7YuR zaB%eVnAS8vg@6hn9t4`M0V7!7$Oo{5Z={r(i_TMs)5Rc58sgTaf-ZM_g|;}rH6&ze z1XPV>KeG>H+s zJSDrY0O7UHod7iJI>sl)+i8#(SO>%nXz-v=POmc}Vsg04!_}Irt%8z{?ceH6)A@DT zI=4DF7!l+E<~$|ch|3u|@F4Q{VJ~=!sHL>mbq#L+@G3#9(s936RS(Hgw0zRO?mfTW z)l6R)i{X>IZ;ra#Ey&<$ko+KcoN;Xz*Hp~+4BfHE46&ugkU0$)#D3#@;pS}O&HPkU zm6nln*X|Wx5k%`}UwuTV)xyF;w~X?k5UJL~1$F1KnW^yoaF}I%!1dd%c!H!u3zOT`9S*v}W$Qe`x{F z4H$SSg3z}sHU0}|F{;&9TKx;)Nb+m?0Ou4AIH%v2)V>1!)F|GE+fS`x1ynaRxn&Jz zE7o)56}PV5Nl}+o7oVk~4*4i1%MQR|%J1x07=vn`Zb@7rk$8Iwe|_d4Ug=L3oz7co zK4CK>vfMzO-l0cxcSnlKG`+Qo8-Tw~~3%=a~b#2Jg* zNyZKP9<;1rN>6v6XB&yT6ss)c5Zo)ZcWbT3gGetQ{G&mfmXSSA6Cl$gpRP_NsThQI zVD-?P_!0m)7yylQVkh;Kuh}*4qbv6yPS9;=|2K@I~0yM8){t{;o%GlNsRefrA;S+qu${+wQ_w8qhab29S}v#2Q< zuQ`4S`$0m@G+7QOJbPjNvgb=|fJhb|r+rlk`h^J$YRFNe-Azv{1%*mZG0nOd*fvCJ z=kd@uIXhZNb3xbR#^_I!{h4BTS7E29IezPhGsZHTa6fuYYpvIuI2A4qs_@4m+{#r^s6Sl^-vOayXka?yc$$5TM1nz z_zb+@g9e)!I(2t~yW*L*d%P~eg*vz(b+_H7 z1}qfUf}er)VSZalj}`FK-z>6^`?XvsW7z7o%PrfXwmN&FcE4<1qDg}oSHIN4^pvt{y25{^Eh=eq#8nRCQ`GVf=BWel^2`? z!t)^7Yb!n}J}HBbG%}$fSL?;+GD@%Z#IyDXFdA>YOI#SUysKJ(m)CP0dw`4{Xl76{ z`T1eCn?c%RNF_#o>2#yWP&X^)P{c~wBjUg?)fcvp*5vJy1hzdOt8ifqAKz1-qSQ6# zch3YxscXQLDR6XL@#}>jq$3IfZTE4P6SU;)w@25HIv91t5S)1^Py?7%qm zrr`~nLpfzvUn|C~)^A4Ladb$g0qQk^DtSF0Xzj=`do>M{z1$~vRJH%5n zCki;lO-f9?Hvk)qY!|zsg>jbiPw&s(m%x@3wlvo?a11oZIuw`Zidi=eHDJtDXJ5JJ zZ3araKOA3|uZ_!zqDR=JO-~!(n9~!Os5x*E*_6H`%7ax)1Z~7PP!(F z=M%Tg5AIa?z%0Ho^vaB494Lc9i1_hS_V9SQF#pThV$mvq-Xh~l7$3Vf0 zmbe){c4^OK3ttwkO$suQ`@4a$+sO{hWv0`*4WCX27*|{kKXnRmHS!d@^uzMt(uu7| zAH)xJMtZ`wjhcvg#mmT-XV0=St1}uHUp7!vxypa-wF*?`Mq*>(4HcL`rYGthIF zD!K6yliHI$8MODHjUT)>pe8MTW>@YCAJR7SoLngsUfgC=prL8V9x#oJ@|%P#*OBTx zE|x1pk@0-_mRgtAa-g{nEgR;fUWA6>Uxo3FF|e=4;_Y9!3m@Ez{+g#e+hB>GZ9$>4 zAi+ez4ox!gAlU2xk7Uknemef<)cNhw`HgH=^}Q0Ki??#Po@73o`Qe>l!4qIV_?9Ma zoaW4~(t7S6W`e0u(vn3^Qa|#(Xn#2fbA)unuhskzMIMPNbh2gX@nzLR!Xcg#R^4vG zgh{AZ(_t;S{X=OfZ8&<2-LA5#%5k;YX3lp*D|=F&hTC$WoN8TvSO++Wuv3P!V|V*(_el9 zAGq~e%~hDNnpXH%VdxElnMs~aZs=htde~4o^3K6N&&R>#QSD7qHQTTs{YBD{=(jDN zQ%nc)wgO+VkkRk#8r*|M+odmf?SLyG{G`iEx+V%@{wh&Uh;UpZ{p|9BJTmh^>dAK@ zqgcGko_0b5CmzeM`aeXzkN;({N&2d2?!)@aYTdWo#m>zStocd*TU6;ql|N3<{_{%? zt>!0z7juR5Q?3u1USkIc`SB_sYeOONWPyhWJKa7X}~Q7M^Ult6Ay)L@~R!L#Nubi-wvO^)wPjjKv54>+KGMrI0xW!> zG^&+7TG0|^G6|R6S$Nx~JY6Ec(U8MYfw3io2vH(NVOcpB118Ecc2{efA(7^1cGcVX zOvy?q8}il2`!;ifbGCe(Xho0Yq>#EkTcv~7=%G6Amcr5V8^cw4@_ko|KpqPIM2&3{ z4*Gh7HCVBbxdCKy-HqfS)6CWImF)_AMEjXFStBdskL=o=wfvYb1)isNTUc%AnO16O zjml}dj;3Wu^Wt!scOu?;jlI$y4k?^+ns1Uq2&>EVc_A%-l1e#FmdZSM4OW1>K5hYX2^Z&$uI69(ybj8YmSZ)`J z)3bz%Lk|%LZ+F)kjqseV)eL)(LE{@y{NIritfetsiK~S7DzJBK)7PK>L38@*KfQ#V zey^fWgSwVdIsDH2%y{2@Xn$_JN_ha}3me`O;!@`K+Q#|-BPr~oxn@*=yhIEfFvlknWR>xBOJ8R(XA(BOtQ0s9LAX5y%?w#G zk)QlD{$;k*Y`->8`e31|d@6aM)vu}r*5MhQhWkRF z)M&-N$L#&e94hm0L#4TVrllI0$Nz0)ds{Q7Oe^sW1eeB5kLZSJ$DYDQzOmS^{A6-Y z#WS95CX(HrwMUzhO_Uc4`NrA?O}eyY#E$7_kEv=z(#7G)U{dcMpp0~#oQuSvO7CzV z7(hw-!5(SkoLhLA_96iSa1=rWBrZ~<@ytcijKhsRq4)0VQ3@CVm}teOZ|Ft5>W^Xl zP1gD;|I~?PZTs@V-U*qFSClTGbe#L?vFeql?%brq9rWSY!QE8iiOJ6=AQ@n1WSTE# znO`-in5GqNyf`6l@xu5h#%vD^3PowMh}*it%G9Hrd}apUwkt>>#Q9@KjMsJYl>K|N z+bRcS@YOEEhD3{3=t6EQY`@VZOa=4_Csx;SX3s?w*awF!q@SZQbHTG*Oe^zR2(Ev1 zQnVLuK739a%(SB02nWVyXw*uL(h?6qP=Ot^`IFd?z*?!7S!}mfP9=qSK=dk@YSX3M z?E1?^MUux09w3+Hafp(niQ2b{uuiBS4WwGl4M|qjV3(U8IeJ`PF-|Jv-6$}oLE+DE z-UXGL`5yt3Y8)rMgfke)Y!*BRoKQcnt}3QAH`1jNWqgC$n%`G%4NXbm`kQ0U@lryL z5mIuFa04*CC~W*_bC}#<6+swti_3uk=*NL+1@GMs3?Vsmj6PRtTi?{|e?jTXJ$g!|Zq zv<8c+tPe9iaI;jQG=Avx&LPj1C!Cl@dI`VXQ@R@0GBJIT#FW3?l(jkk1fh?*wVr2I zFtr>WVl}ZQ^+sZG;|d|YX(m*G#X@$IbY0z?a9w46C%XgPbvk!Duu17?9CLWpR~ht3 zoGb9#V?e)Ugesm&oG^>#6+LB3G%~p%k<6R|ubTTnEVCJZA%IVK`SQ=l_)xEQj)TQ< zjQ!Dv_ayX92^PE4`=W%lLf3mG$H6K*D*B(})N$CV`W>v`!j`;aul71?^V3v7p3wy2 z4875M3myv&$Ft4clb5|U6&`je6DE?BEqKZZRFp#orTAai>fFQ&yS8uaE7#R^-at9DcYtg z*hC(QBtN4N7)jOU!~%pJ2ws(vVg-dY)g9mxj>y3}CGU~Bg=DARQ~_m?qe-P!u14H- z(#G^;rv8F*D_SjaB5a~V>b=+t2Yb*0hD@X@M$Fx2F5PTJKM`6LjAACC$(sTP`@`bu zv(p*g-Gie-7v<#0D%f73EkJ?RcF7JR0c>=K>IrF+6XMX05?|m<6|GFj<4Oqe6&&fb zX@^oA-cVilO(?4ODJK`XGF|B@E-qduTDC>fsmhKJABSjQ>9(`D$xaQyK|wxapO2`v zT#uulfAmHDvlpU2FYOE~mKsZLUSa8s-K+*6Et!*KSOcL7~D}ES5F_0NJ7|dv3oS93F+i3<7IVp#(Sd5-MO-U zox^55MB|@oLPnYcn?*=Aa@Af1GXD8l<;pR8H@ATBPlaI9q)pV`W~F|LUZ3~wgW@y0 zZeuK6>|SVR?j<0}jQ5zX8GP%&cJl=Jm%3RxZLsK<+cq?aqYZY-x*OruZW$PN4eUQT zT69lcJx@V#Fu2nb61C|P^msOEz#N%CN|0F>RnKQK|#LfFVN z>)+HeQSgJ>feihw)Z9Xd=QK>SM*BxWvXZJUbsKY3D-|6=bl2%eA7Db@6MOUVpoJC- zn3kTW;*}60VpQlBfO!^3+{v1ETf(xLR_bvj_toyQoeGDzv@Q#O1vzC_4uR9* z;;g`0pD1%|U;JyO;!A?VK`wl7EB#fzDPbRBVd2I4l(||B5wm5poJd@4&xN5%bxDIR zj^;yPOxL#~>!af0M6TJ=g*LtP1Xd&6Pq3{)Ym4oj@&F@wbDjb2kVi5_X5wbjCdQo< zcST(Y33{nJ?&LEninlDQMJxJHrH74-%qx|DE1=Cj1x z?&gpB8_91cmidjU7Pss$FW+E#Di?);o>GRy;QD$3<`R7~c`%ecrdVs?bPu3_Pb_B* zbvCQCX&f8ctH{(c`!JDm+v5JF%6dlsA<8JLf(!@BCiv_gdeu11iM6Bz#_8_9k7w?9P5&b}PD0 zP0-{0186=|&+a3xVxO#6ui!-c=q6eJe4FI~TxO2cOA@eI64lQF@kD)0eA1B$DiV_H z!T*x@06%{I;{?-+)+Yv{<6&yjVL`%IW2=vDQ`#zh#71x6`0I2@?K-v>5d7Lba4#r5 zuwvQdYvmD#udz5VsjJ@hotz27Wq$MW_R(dxnOwxjkV8fTq)ASsevos1#$Tl}oqY(V zIpd#X3~VMiXlzC0?VU56o)KfgU}O=St~pBr!8j2vp&61)g7}jKCM;fcOfw>)qx~w$qd-rlK8&f5~O}$Qau$I zK_QOS9-Hj?%T@h4&&`sJ$Vi=qXrxn^Y>85DcVB$Bt`&T`+U1%C_oU8-9n`Nm(lL#U zq<~ECg`;Ce?ytRY!o4CSb&iXADbY~QM?MeT&&KM|pn#M@>;w`@8aa^mD1N^T%4|5m zh+r|Wd@CtL=%EQ877pzh(Gd7JNq_4INLF6VKQ@s-lz$@*j_)MQO>rRMXyaQ}coehd zI$L%1QK-Q!D<})4+E}SFv~W2D4%w~MOO+ZfG~khS{2Z61J_3X=o9ZuLP_l6?Z1fx3Pg>#ns{hPVPoD()?C5!E{ zbTbb2XQ)fU*AC6cuJTN-W!`LmOG5v32VQv!K(kE&QJxHEc5V44x&Ix@p8llrWGKHF zZ8=6U0wtFONty73GN&X?io&tHD!|>NzXkdD38{hPF)H9b!ucL-b>!VjZ(h!9>_VF| zp@>bty5$LlI*5Qf1R&?oW`QoFEC6|{5b5D;>Dz}H_6n~J&v?1+egdVGp4PE8iXZ{~ z`teF*ojBLrXXMmf;%z|$6Z`0mZAkD&dDNw}WUrYg(b9)SaPxX;5iJnkGq2hFdXC_y zv91w3t`6-C&QK?T-nW@*eAhV!!BCe*|B`{9)x84Kut6cFG)>FzH>J| zF?=GKh+~V0ajL+<+qvpHr4AHNuy$x@rfu8pPc)%XQlTaKDvhJ#qcM0dSaDsX>hK%| z21kT)C?xfCwVmW&|fE3NGZ|S37#PNmF$0z`Kt4*vA*kwN}`|P#I5)oKf}0 zuqMEgq;7min$IJNBUk;~Ifas%=b^*{)>L`ibg!})kwjN}Ru6(Kn7cPQNQ}8P$bb+x zC~B<*awa1x!PAvmrIf&@Q_5|$;`TW*snkjeY3Hc#d^71;%@SHX^Py9@jOf@36+cam zOOsqHzxq$~)k^qJFn(Sm;yeyX^Z%lNFD1Zo3RUOta(}FW9N-V@nF? zol@RtUB5qeqh?EKjY5H%1E)&B)q2LpZq&N+1{*2AdDCWSY06(}d3z_ZqU@;juPwjOQ!A3wSaCrhQw%=CICYxPfljn!VK-+Saz^&Ddtjah5)kGBA@=Po`l=-@sn z(&ae9H>r+MPPjEAR-4L zfKDs;=q_S(<`>lbRd^vzz+^k577uY0;@%_~8c0I9hwY-N8*~ZOSbg!|cW5F1C-g%G z^2$Px6yFOafNZoe1j$`jJ^^t$}BI&_~#nc-rg89y`0>I%c4(jDT8v~Z4 zRNmL1$U6W|a)ADO?78}!hIb%p#NW{V4{`S5chn19JWdSKNByZ^t(xBG_>X*n4*G#m2$oh$GH20kjlDbjk60YmCkvd{s*F8H_YewvyF~jnH&|R<4oeojQ_?S zzWC!TmGOc6A#Qd274~hZ*JkFUrZ&GzC+uM9=NA8Q>wx$i?_^JYka^A5T841aOzz#(<1!a<~Tu1b3=tj@yzatCifx<>Z1c6f9^>ZqTWU{btn0$ACn;ecw|=zsZ&+e^v8E!_c?%9Rb~Hi zw?d;Jwi*5ag}o=#b@Xq`ey;MzDJq4n^gKn>%JI>)~n3iIm zc=&?pD&bUdVz*V2_+`J;7vd%+roKg3B^ok6mXH0(!)6AEA z>|#h5(KKegaK&kFtEXR)-vjCC5!()ZMv+{v1c+=2NVji0=;ZC2`_v%sdFFSJpF4G! zsiH5kGuOd!?E`>h9}(DV%H&UMlIn0JjU};?`hu<(&l1sh%qRIWL$UnJ97Xg%al!hhzqI#Y%Mlwc@i;k&7QrI8W0B|ICtijGYbG z3I+5I6;2*m#wI2}FMcoIbTC#+h}+IW$aUTRB>=r%+vc9H5quv*QZk{Wg*zfRc2SPl zG0{9*Pfxh6%{Zg5jf*+sO(5a^Ecb40tX`$T4wNAy@&d3RCKbB`6)N8!q(1z2@ZWvY z`s{4O%+&qJCh4GJxFu@L#lFBMFtH_=o411#^7^*tOsAl}!+pL#^g`Vlnhw{jDxY;3 zI7YJCmAt?zw<%Hzif1v~P<8acIU1DUYG!|A6YBX7zTG;u9H zBcl|^rHh;2VA5~tIQ6FrfI<#TYW@l#ToLHGrU=$FW$M~(t$e*Q*{K46;1?O%39i;x zIrku|;f`_V&GsDh6-n{m(kV3}hf8$M{=lB?`Yx%L7z!cr;#(x`@>Dw?dF6F8M( zkTA_E07sd*_eOSrIkGz&qnuM-!ZPPjBOb#a7DJY=B)me% zsm7{z2j5horivsJ)+!qnBnSb>QN4p9P;%Gm@;O8bh4jm&5@7q*gP2x0uRGE3Qtzcu z!tqXipZ58y7di#&c^G?{g}I(fg>MKGGH*ilK>q0i%x?5i3o&4>c#Ep#Fc!0PO@9HmKv-Uyx~LZb5K;iMjQ~IxduOio#@ME3mVv{0vFgU%@rTNc`}Vo~7u?d@ovpDq+XJNOiEY$>kF*}1DUMVLEu7opf) zrZh0=oXN|Y8JKMAm%-Fy?P4Bx0&rgERvgj*k~AMj8e;Cb$6kb%l-pSE`3UY2%aHH- zKCGWu3J2HXpw`zJ6bXqtrXQlj|KfcG_uo=#?S6I-&JYi%WHeFgHrRWg@_|XA?A_hc zou78f-!C?o`$%i;f{mTuzQ2OrIrrre%Zt;%eMg6{@u3Q-m4P9@^?%7ws6Wzzepd#d z%yf;XvUo;|liz_C#8#`ZOL60)K9gbRC5V|F3mL1D3tzw*+T$p~!?s(>FmtUvGcwi` zoK9}o-#N`-@!SwNt| zv#ioA3cA+f#U=U%%F=a-AvW1m)ank_uJ=JOUbKGsg@790$vL0-(Mtbx)5qz`hpn_h zPCrWmQnjaG(gzk@-Q8!W`F&oPvj~3m;Rmusi6W$nTP4kE9nQ#KQ$vK_V_IJ)x!Ypd z#)Y-$*+GTBJ{$2q3x0mXsFb^YwI9A`>m7Q!wC8$3Yqut2@+%RQb$TD1!JZ#|t}sEp zWKWUd5XO87bIvNsl|~U06n3lfxr!9^Ik+-Es8j*6Tq5s}iDORVpQ(kLcYhb;>tbHb zgk9n8i(`l9D&tqd&%DOtrcx+q!>JmNgr`Q%A-6S_`mC;+*%BFwQqrwFnt|zKlWeK0 zLX8SZblPvmGzlF56*mWpo&b+;$e7A&NSp$;e=bF9d7^PfaSh|65?j`{iC|ih@tER` zLQ@R3x3}R2yGVovPe6dfpD*N&%jwLT)N(qbrtSAkx>AR5Gid?UnVY*3s~B8`E1f~k zuQFlLB%ndv#EP}#5#eL~P!8CvtGBnPHZ}4VikP{U*u49Vr&71EG-I*z%5V1qkU2fo zTm>kg%8fjPu5<`$+>eZ(G%0`V!$AcFY2o3IQEoHM3YmdItotb>PR(-_> zTZjngqVsCLdTp zEa-RQu4(&dUo>Ztoa(#I>BScCW>+ygaGOrcJ9fZac60F7kV^`uO{|)TZ{B0|FaYUD zh0e!+H0!6ECeC^huVcoT8Iap)x4K zMSq7ukt*({XAz61e|xTK+W6ssNOeV@++6?zt2cA&B;uKwcLLf5Ad|WJoXFk<78}DYqg%xK(NWZrMw3%i_Mp8 z&rS3DWtsJp9Y#vibya&(H9kpG2^)ysZ^9;?^%=1~WigHdZl6O)Q=&n{9tm0U$7_0T z=G(OGQdQd?gaK$tqI%vX;2w>?KPJUpB+C6;Ys%bvI z{LNCmVzBM)XD)Maml_Yh69_|{n^Nika@W=$it#%=a(}&%bR@8_I6hZd0tG^q&+I5` z#1UgP*K3~1+N})dN;`L$3qZC*Y{`FzZ7DS{nX7h5*j)n`5A!FfgtiP^%LWE?9ta9r z2tZ=F9boq|+=z+G@p7{Tu-+8m^2W)zWh(qP(AbT~gn|UwrQP3i%6SSzCup`~9@>gQ zh#5}Y*nD<)+#3d*M?aSGntuQ*A!SnTxEb_4bf`|tK6QKPwndXeQ5A90+ujje!WD9{ zP?#DAVX*&-gZD0pkIb=VK$AcE6C>g?l9CR;FwNvtRG53#lerH3=0d=+T(g8^um)zK zK?d; zSxiUjMJ7&R*kAprwGmbC)YP!{cWBl5;=_g4tP2ra{qh^G%^vdgyI;TfR3vd^Cgm@D zUKY?zCUJ8)!z7acF+S{;RTxIDy~uarovUgrnGmNV_7^XHXNSk^t!RIyq*>V9K6m4K zuNkb;7BR!?4ymRkvX?hLh>=~p=`5f0&YN(`EGoFSl32LV{l)p?0^Iq}f8Fh$02CfJ zchgYmU|q-AIcXpi!E7He8AMm`t>+yvy$Jgk5@#WTFq zHD1fXXYEpj&kIXvKA%Xro1trD#H6_gfsLCljTLH@qxtp}e!Z@;j8=mJeFj})9+q9Y zJhh0%7VvRQ0tCn$<|iTH{yAzWN8i*(YvhlsE*k z$JQnQxs2i;*vW8~D?c6M@$#7XH2OQlr<+3(caYsn1;wq=irON^G4aNI>&ZZv*yBZ8 z))CcwqrRpIt#eI#eT+cWLrII1fEUDdXY4630dE1o(1&TSce2;XD`2jq@%$zpo4qv= zhB>fRjs1Q80p({nO}A;qVvoHU`tqJvvC+sLy5C5m-*x)oa}^J2S)Um4n{3VH{}g&d z%5P$0`yhq>>xHK)*GN{%X>7b}Z>?;V)-)W~IvnF*F9bM+Su>$dGu*Ejd#*2<_o)F0 zm!BZ#p=WwEV-I3$Zvn6UuK@8ly>Oxg`dMLR_rw^(^q+4_W38{6-jA$s|F1l5l|@ry zz0qxb9ymS1YuZHF_xskE3h(lr-d|}d{ehVm_#{uOQAzbE%P!utROg^i6RrF2Y|;s% z-vX^fx5?K5g=VxE2xVT6>7}O;%$BYb2qV0`X>Ve`m5mF?qvkg?7NTV^HGxKFHe=tB zV;ux!hscMgFx^ruo|lql=9)1-d=?(R)l44~b)br~-TDf)7ly@UN2gp!7bG?jjeIgP z9fHG>CMG663*OF?9rZuscO1TVk?}&#Y=A6`>d^Ir`Yc%GB(YH6#fBg08kNc`&pmxo zlIr^DUNkUKEir1ZyAt5JW6#Yd zAl_uH_`){25|zcs8E>?Lw$vq2fY8y&ydR7W#EWqDDUePQ9+qp;F(IaYaN zZnP)U5#4n9i=Ph`D(CRM{X?fg)&_LiJl05DdubijOBH=@bp48h6EGHvTHIHE01ztz zgazlTO2Ap5^Cu)<;zz>ur_}i^VgOK4a3D?0nl>|_PY)a&5bx1O%|Q|Dh3Dl4$DKQ`eQ$Qmg?;TFdy%{#R;Ej~pbiz2}0zNZ^9T?Qve1EjCmBpO> z6#x&8`H-})N0N0oj%IL&EAw;`WmRPsrt&Ih5n^BJQ|455RG zqj%PNrENt#h}mY6dyjSZXs>#8%W0N81O&4_3L6GtVsFl^isVkWR`5 z(Bv`48Iz?Stzv}2P^qF&syZKq5(?^;>__Th$xvE6cPq|>6(Q_8N^cM=Yhf_vW8(pc zw_`3hBF#V=)W_n~8LZd8oV;o9gQ!&D^Zhp{a4~@_u+vxaJ2Ik$69HI2cHxr+@tJ~r zN6>j11rfqVp=a>q*!OGPv##plZq?6Qh4T3^v2_`r6c(mhou(@Z++^XaOu9L{xNIX_%;YK)@W62=ON^7l?$ zWNXWj-G}%1i3RR8%BP1jEbvZFfa09rLUGiW5u=-n5%JC+h1)RFaUzONPEPSCUcujO zi&Xtj+>y!coVmf#-IT|G#cNrl^c(j&lo>#jD%)2dh_WZv2;@TBge?8Ny_ zvFY^W#)(>9E}DB3pfQGO-Z(DG4e(eK*WULuS?T&#p-$1}PLX-mE-T9>*!?%fUctO( zR{P_uozgLOWnT3K++&%no*cI7#zm@UYu70ZioWT2B@zZpBN~~iW#UAv;}`ighZnJ; z>CG|3lh53xi{8C(T3*a~bpZb|l~aK5aZ~BS1yYZdV5`&O;^MY7rp(Y5-5(CKrW{HdVKZb(lsyqU5xXR;02sqMBAzJeM>mlA) ztoKB05Zz!L)Za1Dt@lrqj6ny5wJoseX6VmDi}$(>-&#&sdg0GBIJ0mZ{cN_^j_$ zTy|2<=15jx-cfeSy44qFYd^$yjt#val@Waum|v~|*lu#FliKno7gO^H&)9tQ1 z_N{+gkq)O}T_|jE8>qdb{Cxp^btV9Hm67LKIRLC)@$QOa(qcAQK?~EgXunPwUJ3SH zyX3Ln6yR26Up+4%1|YSkrG@ot?@K*z#I4B^3y&2Fa2>w~Ww}&S3UQCceKz3eCl0W3#I`u9U+?_U9yJw;ok{?#?% z<{}0!Q*b=@(OwMspBPaBeTd~s3En;y9U2k!SeATgz1@u|c2oIFdE*pXJ|D`5MVQ&ZohZ(mUt_88SW zT*%Xd9{oL?w2xG}=sg|am5OQ^?`BwC{H)d__^!?}I>)QtYo76=tqjiPw-|nHU|_E| zcHkIjK!|%e)%@$BNej?gV+C1{I6DEDa&bc%Vp9Dr$HgLheYpeW)KDQ4PbeNOP55@c z+}~xyPOsl4hka>yjez@8L%t$V{q~%la z5oHAW=m|;^8i;A(`ZLf~y|QG~iRy~+k53>BR|U=vmITg@Skn{F{Fdk=@=kF;>-hdU z`v;6@Z6+(PvRIn3;71?@bJKO78d9t@Y84VJfiZ(m~qnPgc**4Uc={0UC*`akEvdQM;B@WHWC@p*R;UF$)x8Y*~FXmpn)EoU1kRWSr5pDi%Wg40B1T8inB2N(0m(x#ava&+G z9xlz!R$`YBS3K$KWIklqok_5;XF7XAy!7B;ls;X<3k0I}a_XWH&-kGxTa3%lO1CUe zpzCOcr>%@fMNqW`4!uB^zq>Jr;VfWSirNaAqkxB{0IcVp@3~SGZel7*7eS?i_fco# z<)lzrBET4Fu9h~rko;u6#rmK+eYP2$HwrKZ=-nI2QLA9nnsjyc+HaMu>(@iDw~D!N zZZ>yUYURI0wfwLSJ}`P^d2;dUaan`R0J61q4cD@TcGD*(MMZd6G}vxbuMcQ@eEiaz z(`}%m$adV+>BW_)PGXXcuy#|JKjHZG=+L8X_ZU$nygeK)Uc%EKfqe|ETon!U30vl^ zXfw@_8>(|?eXCKHLX8=qhBCE^p8&7Q6)3X4oL|i;XoWVOlegm19l1G|reUF4{p{`I zI(2r5jd1Vt4n8F+I<5WrL@B2)sLcYo0XxQ0DIpP>mY3J{-&_HCGKIU8ikbpc)cB^T z6^ydkytRV1dCYa=^qLi}tf*ShE}h3_+0yYPCWoY@ExF| zqGcu=8z>aYX&?itX|BU(ia=LGs#)k_;J=HtJ?929HwJV3sR_414{3kh)7&Jg?G_%H zWKVXgE>8oHG^bn4h4a&~SgfY8rlnp{Uc{~%rPaBQQN4tMIs*g*;L7CvC9mhtK&J0N8&Ow^{0&#f7lHcM*;`nym88-jbnK% z?rK@TUEEJ}Frhd!;#8O_fcYV|YXX6B(3S-6K1HwFK@7Vg*SEh0U0>q#<^}PxYDk%W z1*iez^uPM!IC&!1R`I##N`s5-yE|wj^v9K6J*1-BqGAes??{@`fU*mKQg(JLeXCWo z%O5#FcoiybG&B2Q#)eL*dI2@>S=-E?)4X2R$#W1fwBqsHLXXV*)E2k*d+OuN`=Nj!C^b!$ zeH-}@k^Y(if||6dnfv~;S1*}omsZX$8nJPl4VX~6s_}@b;=8ffM&xJZIk^GsN65=s z!V%@A=T%4(j`qVX)hjIWd|JStS=M!Af7^R=G+N3iBnV(f)I^)hamz8FF;6g!!&;sl zDkON!QT`*y%NsiyD5D zJ2WR#xN!}d6AG0kvAmwMq^E<8cX|09Ds?UFR`~?z7^e&Rgi?S^j&%cxVLVc1Eq)jf^HxDXePFzZqj_GUVBfz(aAt8X`nAjpU<3Ea8aP^nz6 z=6(|vCR4{BH`;-UD!0{)Mnf+Uye9~0&`)8YXUMg>y4sN%B~NvL>JY8)MJPuPW`T2b z=I@C5q7N%9XB0v~EGe1CNQ+_KN6mew??#sxEtnA?P~7p01td^GB34%iYSPa!eKn>8 z_dT`+=k5Tjv2MX?4=7{hAU>PaVaTVKY?(wTY*Ys3=zr|V|Ivf@B>lgo2gk-B>p)9; z764u{$_DI*H36^_kFwk{`-`WN%x4yKaqkmtxf^WlUdL6g152Wr=nU=EabQ(i7c2}P zKy+qm2vl9UvKOWidy_~vNaarh3O?%C3naaC^U1U{scgF?2Am z|1OXVI0Xb|g7cX@4+YHf^PnY(e8-lg0wgtYYnb5v+>*qcNe}P{!G-j_ z-}!+nA_b)LeLIrQuV9pwlX$tgqhCq~CYZLgJd>la*C=Bp6~|tsfEJN?5s}lY?{L1q z{EG_5+ftiT`x(Z1t*aiOU7O!EL8kIE5N?Az|9-f+;b-p{tO{4E{g^Bg^<4h*(D^uA zBsQn#KHd;i8UVvc92Z2sCF*e6Ok8$2mb>@vTu-{IGk3$-U2Hc2QJ7!)T8s%iT$mW} zC9P984B>8s0t3IkMl8foVGaXD)L@XCU_gh!?HxM=^qCJIp44&AHC@gSKEKGGF%bt| zDjm!q@`u2gX)&&8C{5$)wtivW{3rORh}`%umGLZu?lh?hh~(Z1S^UAWVmPd1t^|Ij zl5$Jg#`k>}BY0?lGhG5t5@AL769pQ~kA(hB7ui||9;K;W%=UtxLsWjC*yiNoNa`A_ zpsr^MmO@K34K${-Q>;7XItrdMNw=ROhz-A_f){i@s7TvTN65g<3uG7l`sg3nKMjY! z3fi3}`XXY#fql&NaeF*p7#s98Wruq7C=QCFPOzmY9N$MJlHv*gSVnzCop%ww;v~*H9=iRO^;(!XEfN%mC;SHanZkYFTbSWvXJt{k9?|%Lizm zN%CoERbyB{aFxPjO%6-p6IlBC+`dDwnW4fYfuN#9F-c7)WMhXom6WV2+ANu=1h<(H zzN@=BPFGh~$x$)3#=5FB_oDmW*=m~zB{!-Kq2I1Ezl{^wcgB3{{0mGu;@+j1RNIa5 zb6KLg(#_3HP45G1V-69$-|yE(fRXut^sQt?JN7`L#+(K*mEx7ctrim+#DxHK7GPN> zW-}6Dnf;!(C_6Qd5O+9A!2G`up}x`(q1X6$X7k$l~6kqCf4xV47-6 zz#lt~6^tb3!Ws+%RtjJe(PVUwVNE;ALS5V!y$_g~tjxlLkffmf@ULH29w5^>+H>rU zYsS_3%N@1!Lq!}$w5KN~?)^S4GWGfWrQjkrI}LqDQ&UOne)F6Hfx0u)#PuH5uHE3F z_k;jlKZm}wReiLP^Xt(s z!)48a?;>E~QDd$j4DXuO1#dp8@vLAmT62i%o2ZjYmIJd#QiJEcFv&IzTNU*Pu=H_V zWBTxOU7+6MG<&%xnr!Xb>%N_iEm80zTYH;epsRZpaKH3s`g7w%qHf<_>u%C5=p#Ul$04>)}UGJe#2CP4E&4|Ynyi>E-MWnp$r zJmn0aN2}Zu*ghF%84y3`kGw~rzG!~8a>Uc>g2N9&Br9*;p;!&KVC!Cny&Rtrr~)fB zi$n%03FPB4g9iF6jf81v>2I&I5XdW5GR2`~cn0hcK(+`EKV1ha;kOB#PrcNpSy~s= zMVM)TQ4T4-=yJGb+uzIq=@R`8V&aFu=y;xHTiJ6iWzDAdGAX-WNW~ zzsrl7h-g>Trh!C33KE?YNzO{}3Y`bus+&`q1MYGImm)08ga8LBB}@JO_DNiO0D{=B z;)WZyl8RmK;J8hgd@V}h-NI*>1VtZx1-1B|M~o?hinD^1UhqW10HdCl^Q_30b|8Uv zAUQh7;IUK51lS0do`xZrIA21&e*dyY@LypU0HKa!q#xU(uzp-5^0I+Jh3!x|=hqYc zuyJQ@M|gPPQ!m`{&N7oWb1Hg9IvjP4TW$8TZqOVQeF%b0I zKa}SDX5r3K^$A43fkC((+Z(HiaRpQRysn&RZV!)&hES2j-z2~`4 z`r1Az9Ha}kLGwf4@Qq(SJ1+(=T_1Wc21H*d&7Mh@{m&f8j5y27SC11N7ur z&qpEpZIY$lLks4iedlN2?qB5or;ospka>1`#SnJzk^I1DgkoNCZJXgA3%GS7K2>m! z&y|rBJKQ6H5;SO+(;EGaIQ)={fK_nNhp7X0N54J~GYSYqE8Rl=?y)2IBTjRYrzP#$ zMPi{SdqKOMSY(uWWcv-(#lc{`A%a#r^ez87QD1<-EQIWCFNm#xWcJGdKJ9 zVw#4$@%h^m#oKS<#{^b;MTS>>_rL%8){HXG03qA}aNA!k2@tpxXjvZdPq>Kvb@JP9 z$!rH6JR>-YbcZPA2R*pSCR%p?-$@y8L2LvuKQ>7$zE&j}r$mYE0zZnERAuunUb+2$ Dso@`U literal 0 HcmV?d00001 diff --git a/docs/assets/deployment/hf-inference-endpoints-locate-deploy-button.png b/docs/assets/deployment/hf-inference-endpoints-locate-deploy-button.png new file mode 100644 index 0000000000000000000000000000000000000000..4fc6fe8eebefdd3bb7c64cc1ee1b10e9c8e9d981 GIT binary patch literal 799424 zcmce82Ut_t+AfHIf(WR9Nben#-iwGRz4tCfdhd{cD4-}^dKCfb9i#@N_ufm8-a7;m zlH7nZ=bSs|-v607|2!u?R;k|tEObrbUrwt7a zi})56>Ify5(IFbz9R+J?X_c4K($p$0j+WMT7HDWM-hbA{)=}>!PSaPTxq0iEq}+xg zLDW4-xmz3oC$yn*FK)cO_v)!iSymWMvEj4QF9p;eCSh!Ce=B> zbYWp)WhHOs9zrxW)ZZqs@a>n%I~_zI^(x=e)}o=&<1F!J8bsnAN}(Mv25Q7%qp9L} zALn_iF&N>Mh2Igr<^1`c)i6_jYm|V;qOXy+8JHCfJrTV2H-< zIGat0R17CgfLPXj?A=^XKeq4}XFdh7lrIah`YVX2w{1^$lH+nO-=oQSN*>9j6dIs| zv2@GZ!%hdJC?~}zVAlLRCseh=482T&{`Q&n=Bw<@@{9xkEVcG1aH{NnqM6Y%iKe7@ zQj#w{L}$;M_m8PM-nfU+r<|ixMEMz&*8uP)=H>%6b$w`_lCb zbk`ZEs#F>+5G1ISMkVr?Ujm;~>9e>IeSg>o^^5oMR)^AA8i++-DZ#}zXw5fpbVi%a zBQe{G3{MRYl*aGhJeS1c4!9+F!h=C~59ULE@CX;DIi28rfS*$e(75E*iZo)IK&$&b zw$S)E`d0{qMYFs`J=9g|VOJpJgE&KX2KMd6+vaV^gHZeew2nxOf^sDI7-PpX7$flj z@chYRx-fB*mv`u~2&6-$p6Sn_L0TFQoV-qh#!0P~DKVV=YM4YBDhVkG9SCM_dosSq zwUBNg$DPRpr~m;YfIfT&Og=+r;2c@Lv_;B$GBoTt%z@Sx|DS!uk*jC0qCOnk&@(XY z`On*@v#Ke~%oY#ZBg}Z*z6PfTZ)beqo|IUVUA3JtX{rqplU`p(MEcX%wFc{%g_UAV z3GL8(pxge~LdO*e=QH7$Qe~hta8`zD%{ALS@kf|Tr)Ju&!D2Km)%G-&$2L!7#^afi z;&SIc&UKG$ayvh_qzuCVJ)|X8rgb83aq)2Rz?8gzt2QnyJcIaa6Ik|oIw1DZyr}kf zcg2oUamDU7i+i5S30|gFn6@g1JwjW@$9+)lM-;-wb@O~Jqjf$P|6`y~Y2cwChQK1` zr6lz&%qZ%+^nve(U<%&{EKFDvZm$K>s@#YR;>#enyP+>dG=+B-1%Rf^nS_Q49txi zZY+AiT3_migiq*wZZ`&2XK@Wt-Z8Oxc%P@8{4sw=J8hXcJ4K9XMVVRQgMv1jvghY1 zC8oPNBxi(7OLtP1tY{lTr} zTcmIE-qJp0F%|hq@bk``JjGkjXS7i-D;eur8xx-O(GE~^(l}7>(WFsV6H?vxeL){B z-kr9h_<{cLrT16b5#bTe5$6$2HSA3c-A{H;Js7d0V!H3S(A35@#Q1&m`+WKFGV#mH zvaeohv@B11+63Pz#@UMwwB$n1b%flp@_sZ=K^f z%Lrm=+gPoyTE!FhicE?vNsSd%`jUk#X zo$Uj+ayL0QkL}7Ww{4s4x(&t+rO~1xvHaPAwgfE&N}iC~kG1LEF&EYe!;c3Z50|#L z+JqQVoEbTOPfJgGQKI7*v4Gd83}uH)!tLNh&{HS`&j~{$=ygyY78%~aLpQuh(mg(d z#|>WRh@T+RFTozcPl9>w)I6$wc=nK#43q5TrwP(j?k#T1M=(+=(h`1g%Q@>Gj}M;^ zCKvJxIUHfj$G4lVmiFl6>dxl`=g{QfjB?}@<)rr)_AmA0^q(cn3*;tM^pEyyCkrHd z>L=)YtV*y}eRY`3&>n4{Xk9yTl(Oo}W?S&A;GNc~)?m?Tq4xyl1m6V2?lpfCzq%c( zeevu)`|0ZGYBIZ$sZ5Vzk3$e5t0 zo4IhX2(j>lNE5mB6K?O74^ivS0j@6YJYsmJ9Q!mKTY$mmDMB)fPKR}>+yS;#;q!p>r3*u zz%?U~sPx+OA;)?!PdEotSVh?J2)NsL4cLVgn~V_&^QA?@y>*xQ?rXfUyC3n{@%;$Q ziDL;h?vGLIJQAfle|C$8p50ugizAItxw&A?J)|yV07r}>hmx2~i}H+lj?;j6L6De& zOxR<5%9k*XFgg_af}lIhfXG{-N7zPgEb6tHn}(ar$!LP=6P{NC?c8Qe?Ry{nN!F=q z*!LV76dP!pgaXqdqg7H5#Gg<*TbRMVM=e`^s*g2~q*t-N+k&BKM#K} z8T%zR`F`$qaWpJ*F!0zMN(SFvpSdzhbv?<}OWrHIw8 z%#F>p-m`yMy>eo{+&#)z^Kv&^X5@?|O?^UPGwxk?buZz?O>gP6yO+a`s#DAWHPJpu zcS#rWF`wyHptas8{it5ftfH|fTvtk^g^TBa^bKi0kD}E;@`V$z)kNHM*G}bhziZz* zd&1}w0WXJf(XECZZxFFCQ8Bxph=em>`9>C|D~TwATmg|j)3bKn6v zoVVY+F1czCr|SWWf!4=Ii^c$8m0QI2 zo)?4fILxP;3@#wDP}g9;_kPR!fDvHn%?nRAYv z*~W#r1tS*Yj(r$WbHLH-n7hr}6lg=yXeQ0BbnXpXepA{6+%0^HYVp!ESx#pG={G2v%ZJVr|<2`Wvu%!SE z0!!;>Xt<+G&jQgOz6%n0{J1y*MK3;B=)AO4R77J%wQr%_M1O>aiE5#vUgGFv|7br) zXGX)gK7IoYEyNn_=5KwJQ2VP#Bd85qB}v4~OU z(4^F*U%o``)nB<-SU9-8c60+I+-XN0xb5^p*A)$onBnS${!)$p05$)VwT6zHj-rC_ zD@S`yQ*%c%3r;V4r>l9;M7@MjO?wMBQ)(}JI|o-`FEQHd9>S>h)nP7L>gz6Uwqmq8 ziYnC7jxH9|{G5+DAJdBCP*YQjx|mxEt37-E+i=u7G1}K|Zcf5nT%MkuoSrd)XdS{O^lZI z>O%i`{Q5sFysZCuB?s5vJ{IZ&a$TL_dcygb>mPHYhKgPt6;`qKvar*AW^IqUXQ*q4 zKjGsO6ulnsZ>Row$$uHD^UtB&0(|`cI`m&o{c)(KtA&fSqdn@PZsPw0?6<-Hdh)k{ zqFh&>{a^0IuN!@R6m_S?aYVWPftonZm!;J!D9Np#DQlqiC^5TwVBn(uF#p=4+Beh8 zYE(!bqoGNny?iF6;f21FitYD6b23Ev-i^0!Ime4{eF!y?AylUX@-4tnCCRXbGvf_yqvsFLJ-fXf)WcF>n%66sDzQ<0Z|-7HY%nEN45Fat z@wsmH-EB-2wEz6s^X-Q9^!S#8@@Pl$H=VT&Nr~pWi)iEl7*1$+o6!Z)u>b0t!19hj zUM6X~y63HHK2LpEvasPab_7!xX8~d+_*&>*X!whPRak^O=)TRArPu{%q5;?k&3YIR z*{d_?qNO;^7^qeu-y+p@V=+(@jS|%|DMCkHH&Ewp99*?*@y@YXR;2X8EkpUrc4kkF zlbB@LElx5d-gXL_;9p;G>WRd4SJZ@0U!nb^5q%qSKtTCJI*sX26c32@r?(8=3yjP= zWj}RZiD#nWWcy?>8rC%{1faQbQFkfjcP&Nn;Q@gEm1<-sUP;jAqr>>^+bh{0DNjO5 zrGArA9ZZzm2VhS;dRB_MDJZr0hk(yf{k%GJ(9HTvHzopr*Np(|e<;!=#=q&!*GOz@ zp(L7#Hg?BJpmmx-LhNu5f*{Wdy4)*@;`nCa%W5GRa=JQb>_`_*^9hskAu5TggpO#?p|hbB1Oe> zqw5SCnSzi)sf7s@fOCrbO8n8qbtuo*uf_j-as3TPEB2Lw1g!=F?_TLi0RCU?$j}<1 z5*m_g#tyf3?2J-vpv)GkyHEWJ;5{eH~c`W)lWZ`s{cw=7Tw9yQ5-9;u?D+@4reu$n}}PoC?rT(x_PSpRHLm+H~arOEcV zVQl}tThy~um#$E3lv)J(pVyl79-v>zy@~f?`fvUr9k=?-2qmi}&eu=yQEGt!`#bmp_Wz5KFq4X38?6yzr;lG#%pY9sSe;#`?7QRhkpT_ zUogt0|J@Kxh8vqwKI(jPcf+LPcnJSWEq2iEHlw40A(WK=MW3OPB!rTnzEOqk_TG9+Bq`a2-f3S@6-)#K<6vay6ct^%%k(w#?9eKdx*Sg=m zyo%O->UjLd<3&1ff6clm^}3Hq^!EU&>zF<*_b2}1pWb2_fz+`m18>{M^uM4K=nt6H zalTsE22VMq)P0()rqi*Tmay`F{Wi>)ME^O}$rHK&JK(|KCe@W%oc;Ve_yfw-{2L=t z;r6>WT3wC*HKX;VmR*{#`)7iC593o(F}pr-cpxG`425?_jr?BxIc`4MFKk>S7GZSY zi0@QOe;S(KL5vmuyCEv30R0f<4acMyYWcXHNp+6RKbI?$N^6xzRAaebssB$Y+^Pk?B6%mrCjE&$#g7W`{QZTFJJb2MN zfy!eEx|5aI%a%@+0|%f(w@5&o)gQ$-UOh*fMl7a}N^GFiVyE@*;17!!e*eZuGR@Dg zjaHQJf6ZvgxatPeMz=oMj#Yfi?K73&55<6^`=aocbc~{oJY=ONcsz=e*|jLawL*Xh{nyFVD?l@=@!@TWJGq{~25#>T1e zN&5dL|E-dtz!Di(gvw(%Z}A(=hMSaGAX82=2{{3VcBKA8ogdN4;VlxmsEq6e)l|se z!5^CcAB;r0`+(-!XhHsKM(e{``5hVhkXI8d$+s*{N}s?^u%Q@dC}aW`IBSiUxW3)^ zg-sVcU_Zi7VhIrXr=j^BL>1`Y4N;xPCC>4XAhDYyHcwgzDkGagWrF=5S1LyOt|Z9q z)A?V>`2}bAQ5OV}Q^LO{zZr$Bix9Tzd}C9_Ca!oJ$<8DtBE;QGL;W#=Rak0JI4coj$jpME9G!_)R`+>t`$O;CVMQJ zv_32vv4%?1hr(35B}&-YAj_!xSPp#`?eS}tf3J|xX9slO(x+kT#CzL6pELcW#$iGs zQ9tEl;w;_BO$JRqvUpld*3~f=y$Zl?wAxty$Y?CD;%!TA+FaX-@sBR6skNiRcqY@8 zrbRQ)j4W&J*|;c(jN4V$&en9F893HO-?O0)k-1JmGdp9lYIfIyOV$9!(Ij*nhVT!( zr)(!hxY@%c%rET)`Tt0^9@P9wDu{+RcZ|-4nq4OqK6st|mT7w%_vbQgG(zI$af5L# zNzNk*jJG_t*G{4N=Nw9j5+ZRoZniOcit&u_P|QA-Vz->hC!ObaG}5OX^lb1*D)6Y2 z+2z&V<&DL@i3o99M0CQrYGG7ouZ}jFBPgP%&Hx}O?B<=&b&|arivr|x7}554iB7IZ z7d#9`0x5Z2Z=2-`h98vIG-7vy-*@_#w*hK(z4Yr7D$uE;8R6|%3@L=3&X;>kf( zSi?7kuaq9%!g?JdniK`XuF}ufVUU~8O+Me2n#PC?f3T3Q|R;@U-CK^I2R}uIuKRJ&X9`$ zDmJL;ITmrVjrg)BsnCI_Jb!~jV!r!9ef!Ol2d&4WFGLKpJn9nHV?@)(n?jI=Q!CPa(9?8LDzCFh|a4qQA4-bipEGUE;AK(I7 zxXv*_a!5#`O0drQHaGD9=aM{lF}Zs*BXK_Sepk*_+}6H#r#{PY@!`6(q}?6Udj>l< z+qLr7>>8~bv7#L3V;9pfXW4%!P3EOK%&L`sn8Lj_ASStL)U47wO73SC>*hS%Zl#JI zhZjT^vZQ(+{+Msh)BdgA=&|c1iYjJ#Ulp_b{(r?R=4A56MFTy>nH6r+<<6h%rjsDh z(=mxxUpmxs(9p}?fv6K#kBnUHxYa zP$Fx(8{KgEZG%X;B)T%!rF}&^cH%hKaBfg;1hD};k$_Smeh=^iQ^T$+W6%J>$>Yd` zf6sR|cLw1k5}C&v+VmW)_FA+u;&C1p<=TCcU7x~-XT?rqpN09?;WRUyYxJDQ+8}9o zc=Wm1G)8vfk}XC%G#w__Tf<8N@FI0f(76>bwKJZ!=SQm+H`q-#7zOoAzGIX738$-g z)2TGMK5dP&?fFR2e{SF70A`rKpFT#?K>=RZ3{$g{P$f0{>7UnRM8vv!L6nR#kzWCr zlLZ>aT*b*0I(JgAt#WhkObT5%9^UglAMRWo*EXKC zIEkki28n+4=GmM$b$}%KN;I6!c#*<;bS`Bz7c;SgU&0NyeOF(eA2$a|uO#m`jqir7 z!ET?0NnoVcN)Qnl%N;`)k|{h!ef%+fIZI5G&vPm<>|RLP*>IXYuWMTR9^JI0Q_)sz zTvPbsQmiC(ZOvgkxOF=`X|mZ(A~~SSK{p$uO?noDhL^r*FSe7U zeGRPzqitd8s=~M<-^uHC99??-uzZu81+?b!Yn4)F=f%Bd`kTX{~^OsWC&)2iRikKHX5Ie}eKQqtmFXn*+i1G1+Htu;) zHUPauCgMLM2)FRh>Siv&o#AJ;Ln{1^3+zBRpF|0GUuT`9TEN%HgVG!B(0IXEd+@`_ zX;vCvf;Fr2kNEJNpt^euPFb6<#M~LD^F)Vx>ndKfK4oY8tAWB_bfYsI{ zjwnK-h^a~Mf_c40E>(gvmYKd7uKpp20$7`;e8cz zAdSq@a8vk)A5CnrCG4cDY{Dh4=F8t@Y$l|-VjD_H+HG;#Sz0o57e=BSr(+~X@+4#X zercn=!;E(qf-QDXRir<<2s*RUZ?&g`dw7cP z3j&vVyp-ElK5+yY&bs{Etv9gL3NVhUgO-7oDnPGASp1NSP-plT7?IyCF|L*-6z2}W zj|=%{bt41RVy>N6Z>XRUB0xiFal&i@{MN8utCnX(mZy@HXs%JHzhO*SZErxI90z3E zM#5iATv{7R6{+Jutxj$Dcq2FX^K`R<)mOFrDdyiEF0YO%i-+qXG4RFjMeX+84&5E) zSvyl|XR+Zr+suEz@NNf*&1cUVZ*V4&_f6tMvh_UF{irWSxvRn3RhpKh9HB8~<$MNl zUN(yIIo*0UV2Ctw`q_RonGUR9E%y+w)aEio-!~qRQO-bW4-7<=G$j!4(V?fumFUoL zgpV}m-Q4OeY0NV6GrB9WGZc^&%4;`Q*)=EtI@`6|_JzElmA1nv2I`IrFp!#sNrIND z!R0>id#k1|)4@dXhODW=^A8Bo$Zp9GKE3}KI#)qwK6QW{v$Q_-gzM$8CAO~(>!mDD zY34AxxBJ>Cczd#h2?Gj=ij%DgJr|FD!Pe&)Ck46l`S$$~EkO#7!!dd7P;W+*Gwwu~B2`1C`<$F|5Tpe?-$s7UyFUnntt&M*9 z>XPA7W|G_^2lncE$yDvjr-aKt6HI#;-;j&h-EjxNGu7bgUoA(|SZ=?;(>GgTx=&tz z2}6|crFXyEE1}@5(+L+o+6cw48>XVDoAXXyCc{kK$G|_wFw^b|0EkuI-C=scwUg_1 zlqFk&{^*vG*q8mmR7i;E++f0+l|f8;2hT4MUoyyN-+EFu;oNsGedA+;JrmCN9v`r z3{WA~krCHcFKL+OBG0zQh+7Gpg-FIXY!+~Ty!Ytc?){4OHEe;;OzD0a`C56=e%QUU zug15=T{=@!Ew-#I1Edj%L6Q3lkyx-j{l#U>-PbqFAQGjzO`V|AHL?}xb}>t-;`A#n z-8zk#DiM9HBHiq;FZ;^<8k3hd%oLL&?1tsGb?$p1^NgzM`V!7w&Q&J`OsUVN5`70c zANg68*A(s~`E-w3w9Z)M*F(C~cPaWj%`tCmO<80e6z!Gn~ z$lB87ygllycU`7)kc%`3H&z%4{5|3sP>!NRzCg_4ch9T13D2ucbNm9C9GL^S-ZcWz zc1bYz@8&kX7Xt-=M8tufHz`dAr+6S_Ok_IPM z9Mgq2(y_U@B@l1ht@l%4ujk4;5<|btITzRS1_`*^2e-`;e_3&h0S+0Qm3a6_H^4LB z5+vWkYh#{rrwk^b*h2V?mK5c$)LGFEpv6DJ4b z(2$-MQVGPuJNqoq31UrL?ZGTowlpoq;!Sls+B|}QagXMAN0k@iecZQK>J7Qco|%V! zDqDVUA0FRHIcitUl7zV2=2}k)%C}t*Mcyg11X-o~N(JA+LvT(s`jT9Lvq`Wu_c_^c z$V5P`??%ou826q;!w;76D^Rgk2YtPM;`BoQ!{@<6+^+p<(<(R2)bnye3)Z*BEa%*J zdD~BO&UW(fev$b~)@J{&l}3r@yGwQLlOa`q^zkYWfAjHkL7$LTl#j3j?p|!FG&&z* zwayGUyfM?h^fERQ4__%!&z;?i7K6<7YON2zAlnSR@yUHcmgLXO8;42F;^k3}KCAk8 zUsgy>_JmnTY=n9QFmMNcN+7+dTKCY;*eJr&;kT0K&Dk?8;gy7w_BA}9T7 z>}uHLkJrgF-tp><9XX|B~;M7rP4^BisxJU>DI5Odd z&U>a~sg(f4a*g|vMU;sVF+hR_(0z-z-m#r7j+FLf0mmra3d?f=ma5fMI5ZdfLq|;D zn^^XULsLH zm*PQ16G}I6o&0wC>BOK6^FZ8DAZODFx6sl7ofwJa3LL`JD{}v+{ILByWz7O0WT$ab zd=D^cr4&<^^}M~3#IyC`>E{#Z#}l^;^B!3>PP$g>p0y(VZ1;lYuw6gS`W>@cXnwd_ zqn@UJP!S=EXpw%sQuxp$p@=%m_@Mo+(oEAMzWiSQ*5DphSqe_Zp*bbS@0+L2%h~#n zg-jWiAuElPfT`d|1o{stAFLp1E2v650qAABB2#-RcB0Ps32zWu29ur>AM?}{FGzT6 zQ4V{<@53k{OkP>G^Tw8D+@5T@^2H0A-Ew)A8kVv0_~+qKh@cY@$&`HwFi?OkJMeo> z3n9ul6@zD&jpKvyS}{D|D2up!!~xOw^*EE8tlwTMuy;a%Zx8QH}hTZT{BD})pv&~2iSke=8#SPzJqJpH6%N8uXLara%#z}(`rDlItldlQS@LvTrHNbE&Kc3?zaH4q{my#$M~ofYvyimVK^epSn6 zUv~pRkL0IIt)vMn*nf=5^V?Lovd>!G46Ds!cS6476IF?Sg9*a$3%p2hr>1h&O1F+G6)=-0Oknzg8h7`H`}Rb4 z2$laAKtK5t48uo!HuSqnV(qu@Rn6Y)|0FT?brY4=5nc~*oRi{R5T3AoY$KIx(W}+- zo+&-Q9vi%G1%z14Eo!+I&-Zsh38X&4?`?E3ZZVzs`aUzL1$j;gZCJ*T3 z+By|FRsO6eE2`!s&DxwMZ)vUe7RV@jnoiMeBxeic+(KwXpC%W3T{<(#9ZTbX1_NL+ zjLJ#OC?7JCoww}koSJ?LQS%3~7N5ntAwgCf{%U1@8;bNXOdz%x#wLCChH2G(B!j=0 z5a9emQg?-PGGaRf*tstz5{5+IU!-UV@;oTs04k&MB^=E`%pTLVL|25Zyf;%?-r#Mv z@jQnu!mm+hhLA_vu8cf2C_}QFb;xs3Os&d=sD{uXSjY3_H1uF72-5pgHX&nlXbjja z*^kB7O&sPXZgn>4Yz(x!VV2A*R`_C8a@9gM&=X9`g?Bd3RN8_EnY>e`(4H^3Z#2J* z-R8s7Mr_;LGtqLdhd{qtDNmugnteVZN9jUkYwRxODdu~4$9WB}h$q){H3#>tUh}v6 zU{Fnp=ianbeQj^Jt?P%&W96O224f&%VSWNIRcD{fEB@H4`OSPz#xWG?15p-FB6o#! zrg^Ca;{$oupmLvCyHBjC!fvbJCN;qT)hXbgOQD+m$y6y$C@G_L$ZzVYo{L-{CEYw~ zg&1RE;mMLe!&M5NQb(GjX%+xw*HZDqQ3#A7Nsyn%h&xitYYWkEnSquECm) z#Cm~9B$-6Z{ND1iK4YbE{u?ut2-?qRfGJ&9#~ok_Ypb&kJQ%A#?xUBPax|oGjTO^b zRZGa8Ni3pu4+X}1=ry@y_oc(?8xS*WojqRIU|h$N~ucK=#vW@y+fkR!qTp^vs2KXAEL4qRrn8AaLe%p_sg9) z%Vu;+P9Ns!@LY;AOnBrOG&xU`9PQ_l^elY zVP|#HgitV6ou(9Mmavena+}X@Rh4^n1CK7oSdL)(AO=6tNe%*Gv{_V-VSlbDFr zPtGN^tK}y&nvkdr*J72-H)G#))h7yd>1*932A`7ZH=2(X%YEE`V)f9^OtiF$ zqL=L}#=dt1rEV0zn<-{4(a(I;G4RKLx)g)?w|Z@jM?=D=uA1w5ZKywL5ktfBwz@tK zx<)>@x=qmpE)ip1W_kP8r&&%tcot6^XFLM#V`EA0YajwXEI|$|AU!*dskl!7c`8kB zY>#<(3yuCxYjjBzv*GX=nb(=xwQA$uDX2NGLx z$8%tRzim59vZ+0@{r#08NDmscwyrL5w+^qimv!4nZe=@Z&;IdZGU5P`2Qk<6%V z=-PNR^pUxGlFbFN&{ktnA|@S(kAN%IdZq_*LqC@V!s4}&C=ZZ@!N6}fm!S)Z=v#KO zIjvm+#>-6(cw+L6-#!q|x7hZ(08S+4_71fK-PqPCLAU9@E7IfIWF4&#tfOunVFX_p zpEPovac;LjG8xv-QFyT(e<%%+rh|?VkGxk@|q)4%Dj+7p|)< zKOr*Y9|ERs38nma$yEK>BR8F}@kfREaa{k=iMq0&E<`jeHDdj>7W`F&{jM>&DgZKH zvnqxs_nyz*7s6q`1v{o68V=duw}{+83EL= zS-vjuqb4nn!NHWyge*cWXu#G*I8Hwzy>o`@~(!tH~o1PD&@ z&R=Fm^=AZdm?8f8M%_U=(0pnr(0iSRcUnm4sfX-cV*1dj0m4&KO$pK!tHJ>j0A5Zu6dm8S;O-lu&cVPD| zMnv|X6>{YxDE#Mfa*S20xay2%NXplpkZgBh;Z1kR!M31{SGywvs6}@XQ-iX-Z-5=A z%TO6til(2y1izj+@ZzhM^M_Z<8IA8tcN;zhldxpTio;DKs?+74x7S^W88aW9&hm3c zgBYvW5SZcD{Um!7#E07=#7u`6OUKr-(27B44A3_H%jp*OZCGxH!gSzM$3*Jl&?|H#qH@XZ0C4zB}O@?ZW3I?r3Bj=%i}Rgh}C8R6eHMa3zSAqyA`y$0imP96g}YFMbCbN@ z@8W2n%=MJ7lkt9U&*y?w5E2oSn@&oz4?;@Gl{li7>En6<+O0ua;OLdlyLXLXY+JA> z*S;G@=^1Bh+QuCHTI1Tl14sXzCuR@#Q$H5*OMI2tIGuBD0#%B@HM~u91oe|^?jB#H zzQ4~$XTE-5Hh7T;gTi96cy1OM)XIgw9%ikKu=2H%qUoKl_l!as`Z#|VOH&@r8I0}~ zG5TC0@fu6h?^NcbU`tW6LqMpso93rpRbxNClBjgy^dUrIt;c$IM#SCk98q76GHCC% z!;ZVV31*de+B#~D77#lcM6!d;cgoUs@$E+Fm27dUZ3ZGL)F%*^16)MgDCWt66YBpf z)MmqBG(tZ+vPw{_{&1C^M5(-%>kXC(M;)dL%K@hwh7FxG)Ds24UAnu2jfqFoaSOjTIyo3HW2aTa#`nYXJHudXCK?y$50AN=tyItw;2#vZz`nOeit z1*5tN>i&&SQFtD=%g6Ol0Wb5xisgK^y9h!I;m&XhhS6MXp5Y?uKzx3u_I`rxpPwpx z(R&F!^lph;YW76?;t&wPVzAo8FE;2{P9YyDRi}-yZG&~d7c$e|1d?sgPu;jIZttW= zva=g*Y3bbB=)8?NVcDVZM zk!Pcqoe4$FW-bAzzI$gi9L%{MjYhbB+2X;)x*L8c)Ue93>SS+$&meF8eBi0F_l7_I zi;KRay=q44YUA`h%#l`eaDxF)mOkE`+_K+ecA)o){Fc5C)_d=nbw@{Cmi-DylS^+= zO}&-)&;G-spCS7pJ!!|bv&E^EVynDXwaGFQ6-~S;CB9T;i5R~>Nx=QJ(ByT6!j82;|PdT2-i>L<4ytQ zI0RWNjYoiG=;_Sh9<-;Z5waj_V?+Gnm~IHiUk1F?j@)b=nvO0&=l8fH&kBwAW-z70 zo0_RY?5P!5y8GJ?+vt{im7x-B+B~uxVZQM4a@Lo0csKHEY{k0sBRO!97xHU1i&N3r zNSPF6)>*O=3GdS>_en8xh?qybp+_DA5u28qQD>!jx7mgB0QvNHXaAmA?My$J>5`}> zPWwTz?5CEP7sn+RM=(dwYGZV_LB(sz99zbuU_7#y{f0I#Q1zlS^~ik|aPd@3o)9NUor4y1D>*EodxHa0@BQk4PQnm4^o zz9R(J5-s~uic7{R^5nmqc9)3PH+?`XTDz6#mo!=86mo_k^}kA(ATsHYBwy!@8sH^X zgOn~dvoZ4(-g-gvX;g=A;(|(Q>>UeFi^7HULcaR&{r+136*7=v>ZyZ>GKbX*zXWdg zBnMMb=Ixic1r(glMz9@;1p#BF7`Ze%+6#@{LImn3+g928o6HB1C!?reNAvfwA+S?g z+{@iy`?wAl0?#i1{RgeVMtcnv>SOrhfuFsJ0mxRn&AIIQ-QzDtHc5E`tGY zF}f7!6v10_MY#>I;@dbc3xU(i-ZhTi8n}40`{@rVw5(N-uOcK^20;gE3gYyxVpdAY z_fS7Dz(pzmXh?g7fb(r!qOyj&hw{(!6N4d%Qy@%*?MGVQuDcb5e7Y`^ucJ!h8DBf_e6FfJnMzA`_$ zA!bdd4m1_G;LV+3T=MNz5~ib~m+an_9d}|ovdMA+$-Q3R?7Jd|hY(!)rbW0;#wJ~O zdkBJ6<4rccv@sE#jevGiDV}k9h$pwOSW>?O;#h==v1Oihj&%Aq`kwCYcN5s&Q28;r ze0j_jd(8E{8T>Xj7IwCkhXJ6Ch=1YRxXV_&Tj63OwB8^tgI)%r@R5|`Rmh1R*mkj) z8wAl;gw@+?l&0S9l6Rq{=4YQmRnHk}z<~IAlhHLrZkv^`BRB*s+^YcMFUe>+M{>Gj zX*3=Da6Rf5Fs_{yuyG48DAASPHRnDOKkJfqAUNXWv+dsXTqu$%`kbP8C+KQrMZx5= zChl6@3h9-jUXdA_l~hk&r!%P@yVI_Y0CnDEa>=5e|{Gi+Je>Qpi{3= zaO*gy^lYa{fUv1k*hs5gE{W#M2y(L450fU|S)lVCM?|NRS@$65#0A`u$7EoCl zpZ@-n{Vy^}wSJNhkU>0cYx~N%YRG4UOsf9*+C`ip*8GEbkIW_1gLe7@evHg5m=A(h zP+yf?yKw9bpR^913Q7Tx_7D^VW{dlZm&=5O-YIkZ1e8vndzcI^#;EMs_Dr-X54$^K zQwjzU26ZZCV_=_*w|oveRbPQ5qBo)HM6)!~SX(Abz*BX*G+vu;a}6|oiWSD@!BZBHv!6<@}=ymkAQ6s6*!0>|CMsD#!w!If;M4+tfaOG4~;@a(H&7BPe%a z^6nVoV7=kOn^ZaLR1{Teayc396jbv=Olx@WRy~0V9YN#VrlG#?8YQa4&%A_SDaSh& zo@;AH4PM>%@(hGJ8zpA(7JPx_3S(BUF~yLN#%(rS!zl@abQIcMuehCYB^=X#m;@HS%VkP6g?$$2zli@1{_6T`6F<&SHHvMd)(cY1Bh`$r z<>)#AzNl2!T+@Qt)AatX<$?LSursJ(t0q{esFjZGzu+A~{YZavUfCp2GQ*?^c zQMW4oJ3EH8mf;kk3X79(@#gb&?jFv4wIKfjzF9LMo*7RJX}6=1?%3AU1YrsLk=EJv zKB(t4MJ?zl50MX3416!;Son2Zw=@QDik}Vv_JZ@_Jxo}dFY1y=b`_S9ovpE(W?ecp zMPMXrN3v+r6ithKh|nnacJo27R{kg8)3!$ ztkTB4VGLXa$N-#Cv+;H56dKF4)dx(N8Y?>uDUiPANpEKm1cz#Mr- zHPC_cT<5zVk`dT9KF2y!EoP{-$tUl@4fmM){=_Aqn_jPZGotqRYb$2_>qW?npY8eC_9iAO zp9O9)oey14C%!*+-%&Vwmy}zXXAR2K+QXFYrJ+aZ{}rgIwyMe%@wo|(#aOA_aNMYb zVWv;zS!nQoxO&T|rvLYUTtY%g5d>+G5RsIQfe478s7OgE(%rowASFtOG>lRjCekqm zg7jz@JrK#!umQ{e-k`?lG#DR^Fw#Hg(!EY9npSjH6R|lsH7M;npgnYfAzUB9(h==e7E`TgfL!t! zOL=#orK?7&4#&X}o~)bDiieg= z!GAqKmTIHuf$sH!54BK*hucI78i=QG1X5oR#h0a)}fFdGG9w-{wS#?vN2SH+G-& zE3%_wRZO`=3kiMc$yD+$M}~voz)uTlL;ye_`Bjl81L3d7R_s za0tar;0Uj{iqBceKVy5`lQE5VNc&1+sf^#|!?A_!WRk0CXqZNz$LrX+W@J;^JiVB~08kX%< zrxBuwA&p6wdj%scSbxT&sknqu$w2N0&b(L&9ntochAzBfy+_-qC-<9&wzk<03e2uf z)f>3IDb4zrC5}pg!$St3xvb+^c^3LYu9sLPbWs4@)NkEvW*k0#EwQTnP&?4Ym3YW$83dmjxeNp(oS6a4V z`N*^7X5>(!lo|>zo&LGD?kuUfqm#zkrYie-m21=Z@8?tK+nvRT@h~+~0C(WwThaAy zc=L)#*iKJF;h-}DeW1D&$^__$$qT?J`$Pv2&{=K@jw4S*f6gp|1=J4@CXMCLG0}kl z>^8Y^ca}Sc6?3H(yMQ_Y>ZB_-k(Hr=9_FDa7|DN;*MAAKKbF3p_o_|Ty|m?l$1#wet&sI zyz?ip&Wx_izqAhnB*S9|drs5=VIBSa_pu+$(*$R|6;?uQgV(N3HeUghU&h#!0N&(< zE?E3Bm*eeM1*?<3W*LEkqR2`;m{_`Hxzgd1)=`no^&SoN*4<2TWAi6Uxn?v7F#{x! z?>Cww2ZzG`q$7sRkl|@RN7rrn?|)$y3pWL%-8(NP;cS2RGfuwSY^d;&?Zu=Z`e7!k z*u*K22kC*n4HqyG2eKb)mw+>?{$l~cQSn)&@_R(ujS43)<6aP{eUJ3rGPAt|ldthU zqtg>psk!3h>iv=aE_|jdA!_?u(fP_D3^zIW=^x;T11=lnj$*T)|HK}--`L+CG@$Og z$+eGfSKYW}!?V-Tgm5c>pXm|nN^CzZtXwoe&-6Blz!JIHoVWi@HstbG)sFh*jX+q`=-BYEiW{XJ4tx7tf^;l6YJe ze!L-ZX$hP9;res~?e+5z1#>X@~cIcRv!#C^cD2Q;vKMwsY7q&)y7f#Q@+ zX(@8=iM6Bhg96i+0~g^IfYU^-(be~5t2n<*$45)@Qf$SSAdUBZ{z#5N_8KtvH26jx zOu+N%er9Fp{T;ayt>_+#I;qdWrk$42=?2Ap5-o(K{Z(p zIFLE7$AxWewt8<#u63)4-1~imI~+b&wtH|&BL!c`N1o~~0s?z4;oxo_5$(My^5-|? zJ5?SCZD!YztHo(@mjLcRMbzR!o$VwF+FgNVZug*X2-^q~ja;n$E5gKf(A%_9-*gfC zpHnx1H2A(2GfI>)w!CD+4M?ohN3O2#BTB)M=SwJ&dtAyVSK+}L_jgcoWTG?hmt%bX z+34cS{$qYc_Knxp&6BhIE=GeZWfS}a<8DzbDah5yp?Zs3_P zT4iJ1Y0W8r;Z7WX%9@&(w6yfpSN7Ilp4vHk(5hCQj|{yd&wiqc9V|D>T6ccE zBLph9n#9uJv*pFxYjJO}3hp|Bx{u+;M@aA-7p_ z-AT)e0s^zt#!0;L;`q&z4s3p~%}h^$;B2)5fTe$E!6oQ?46ebv1TZ9G{XotpAD~wo z$EbZ*f=_E+0+ev5^9+yDZ?U+2NcRiH$LgABPZUXQtwwR9hj}HjBCZ{%iDQHE$-7Q} z^Mr;9-#psklNzPnx=oho&o785%ye4v;5`qcL{8uV&8mSc1HtG#`;)M0o@13CmLBJIf~2GT3DYXekrOV zn1-bSMX1QO!%-nc;LD{}HWt;xu_J{Aa9KgH;T!%i!(ahY2a=cXL?72nkASb$ge;RFcQ(KeF?knhq8J7OpnF26|H#RQL-qi>{Coubh z;tc-U*p{_%?)xyCP@`0BK&}3rxWIFkL6Edo-G;nd;P}n-2a^aQ)2;3M<5ZMGQM*s= zvrmUkg?i@ytRJuMR;dhG2hlje4F8!EJ$O_@ZT7|ZBV=Z=WW^ABmKX$jjyq}kZ(iLp zW6{9LGm4A;uKL=-AA$~CwC+NgSmA{^vYtz*KeCkx&pN`ipA3o`=WisO zZM7e?JgYqj)^QF)A^{|Zz)hRl_y#oK9*4_s>4wBgirGa_#lHGLpZhv-Tovdl5(~=Js~Ai&cWGm(9PoZtauy z3DADFxez2!TZl5ot7FG>9-^h~bQ*(o%m|BPYG~ITfg&6AlEPm`+dJx38#S%&y2!Pb z4iJU*8a@!yl{2fKrnY?<|1jqTBdUbw=r<{9A3D{%;k^&fQOtD3;!8VdymA#evk{dr z(#qY-Eo6+(*a&EX zu8|}E*Q<>S+O1cxv;A%+;>JZeGai%G)Qo zNU|>btcbQglo*I5;tv0L@v@+vgHQc3{)mOi^_K1^XANd&19Af2x2NH|)cR+xj>-z9 z@y+oqTDY51KS0E^4XLRrq~+(1T@4|kX8N(V|KW*IMQQEo3W->tO%NGWM69eH%A;nV zjMZ8f*Jdh`Ni^r0)bq{ae*Un#XR{(>$MDM>4BsSPx|QrOQ2LWzWu`9=Y;qJKPwXKg z>?P`>x}8{Jkb|Df${veT`sMp5&N2`-!LkY4ay=YJTO(yF36VlQH^`LC$ez-UzH8_> zzrL6!HskghVOW6LFyL)DS_xxY#+X){zr4Nk77oOC3WT}X)|iwfozmRw$v}b#IwMe} z?cmLDV;8^*Cb#)5Q2J6`5m#PUMy8&hCaplAUNLfB4LJ@pW?7PX2qRG_u9nHy47m_Q zc42rJDPR6;8s%;gHyfSZdV4=VY(BZ(X*dWO!PW?kAlVNc0PkKs7ws|gi&$xb`#tI; zj(d&2JrB7Yu~3oT*vR~Ld~9cNKiLD=@w&EHBhQAkkz@Z3+tA_Opx=OS(O$tE1jvCxyN>M|5lwOE zkZU(zM4V{@^eC{*_(+%}Q!A|eCOJG1ZSn#4UmCnx{w{$~`Ec3jf8}|XG){bm%FLB{ zUMJrsgg_BUjXaG4L}MPoFl7#} zvvY#$sPX;%a^tRCkDAUkbv^8+8Dl6Rs9j^=Nv3IBnStr$kz6SxVqTIYc`~VIPrRVf zKSu&F^NIr6d;aiG4_{fbE|Cb?y7@FXVwOa{0f*E?>Z!%^Rom*f^Z%*kA3)HPQ6+4! z*8VB)(7hbP(p&|PKt9bsDjD>(?kj5euBqAgpe|WsvK-$oX4k2uXBC9WI;pu_KE7W`B7IClkpA&||;lKqV?Y;(^`nuBs=H`qeX_vu*)YeZVU1iCji zg@8MAhO#TbXR}--?{we98s?oPbtt+NKlBVQv;xA>EATrSxdOCoEv}{3LCbKU^|zps z88^fe2%q)e6SOCMe3dMZEhBOBR&r_V)xR92WcL|zY&ehe96wx&Zyt}jMv$d0KK?p4 zG3`C(wh^^KM(e{Ua@{QjGRQ43v1Z}9rES~p0yHS%{2lCkRfM~bY!Dp|{1S`&6_B`B zX#t7?@qqLoD01Y&chPJdH{%BR!k(5s_dL_eH+v)>AXRE(%1k=m3@0^m@7bY#`Mp;K z;uxnTU|wztLhzV;a4EnwUxEsU4vezj3tVZP*iY#pM{u~(b_B9u$Z+Ln;(vacd0DTF ziCl3?UJgnQu{vzpJiUj#wjL5OnmoR~rHs^_ONl6mA>r>wNSTYdA*xeVZsYAhBjxTO ze4-ps!z0!zWJbPC$G!7zXr~AjGDqGG9cuq1!|_4FpGS*4Y;^uF>U$bF4-Sba9OeRn zj!Pa-E)0Y{-b$E0R9{(7R=?|!`1`}<7cJta==cKhosXYF1JFxr8wfAY< z-;T3$b{-frM**!^YP%7~XSg0{@qL7ME zr?zr-k!Cb_M9V+(%}#=*w+>3Gzi<&a=47Y58m^B(cPDn1y%b=EFjfZ-SWW^7XAUy=pWRi#fNC zpII$c^J?L}Qj1h1+DU?Di(vJ(N=T!MU9SCg(=kT3%-z~L z0>u6>c_s9EPg?b-^oQDe&;yr^^Kbz$%;6WOt?#_UOTtWz6<{P6U`BhPUhEYw;}!pD zwuid?;Bb(r;(zo(jL+7O@P&fQE}AgxH`=O+to5yKrhi%ROkg>fpt0p;)O-N^ZdzlV zx1T9tAs$VGnH`%TIwB6BLMxO3i2VFJV6Qop-`|9?C+$;X-n;b{8XiERKv+arrAdV( z5XV#iMCZ!;Wy}NoKzR?w)X*Pak9YrY$_r^?s&(dShU7#t zC2O30s7AKfm`DS=&QemKrIVnJeMJx8zqPfKe*R6xN(NlbQk8Ya2 zyuZ5m_W6aISIZzYP>VgQxu5G<$C%MRape?U5K;Z>fBxT30Ugh=H0}RodXyV4L#2Yf zF9-*hqLW15+lK0AE4XElwBv{v{IAA?-OzrO&V6P_0zqZ25BrjUo=2T*Z1VBK&x5fm z>w_#fFd5ojjH~4*R;P0FHk{PYLS9AtnA5mBW7W=7BSB7?q~Wyd716(dBAqLo}`5 zJ+4;RSn$X0YQLsBy2J|jPcOFmYDW7jmj%RLYOJYATSK+%|2Du+aN+gV!8wYaNN>Sz zBbQ&H%v~dxjZ)66eLYyrH}fR>xD9$?Cl4BV2-t|~6;9yqnj(I?BQc8iEPw_}DPH!by zW7Kg0(g*`DYQ1T?rasQ}fRO-vZfiy&n>s>o>4anWh3tNC7``~QTt|)@QWh!XpljsI z(R+TQr(WWu2R0Jt4BOm|OOgX*@7hW%`$OmZaL)SYw0v`qC82x1TlIufvKBK`1^Ha3 z6V&;PYRm9LY>)~m>iQ3dVQ1^oXE;Nf2nTnu(<&$0wEc}UxzRHTR#EdN%dPg}-K06O zf~Y~w;|aaPR%U%fjiuD4UbTIh?w8v`9~Et{FTwM06AzMaomRdjeM>ia**V^oVLmU+ zA96-+idgF>w6|_>=GYejsJ38bybCPVaGCztf}`gYs%IRap*?u4;^k{*fctzkSndNNT-! z0!t1lWc2ez48h-|7Aa<}og=F#21Nf7B9Rpc47-|!{S+4t>xWlgz@-W(dHJvO&=|3kj8{GhUX1;Em7oZx0yX71SL z#kx&$lS4b1(gsTLLfS^@RhowPKx69X_9I00bm(-uA3e}B2wQQ|^;6I%*%P}-Hcf~l zcS06j$nQx3<;i&I4YrHDU;mouFDLJXWTuAz2rtimGi~*`AAGj3 z9dO7D_}sd}V5L+9_j!qmTRtzGt}-)|Y`uso4>;!FZ0FU$vxe;MIIVkZR#(|T+YG~l zI;O`%#7mvPyleO)p4Volo3#B5@g9LsuFo4qyW8@r$~q(B9bhCGv*&II8=LTB3m2c> zHCB{i_aI9ZmG>+8tB&zALu5BvFJt_^H+l2Ivk9{3rJxgV%j7@DJ-H+@b$={Hh?K;` zV>uwswg9vX3}|&2YWUcM_PUu4oTPvCX=#YRBf~w`+=~Wad^&dfpWO=OYW@g$tsvDw z0_S6wUlt(zTAYfQXFk)%iW-Y65Pz!5kw>}|GapRW zJQz=yaRwp-;%1jk&u9_7xv|TnLR@&SZCnJbUGKe9tW{Paich>17{Lh?kp`wHX3-&B6 z=&3=t?9d^Peoa-B6-bulg}Xj#&)#}R9+6YYF&!Nb{&d0FT!Oq8>T#3In25@|2q>eEg23l}Gb_Pci&|Bg2evmm(vod(T)@@7Gp@e4nvaE1R zTV#AT&*;wyJ0Z7yqgk{=n7;!s|3DPmP2R`n%MLt6q;7nFR2mYVyQzk}#GG5X{0eG2 zk+jbI2&$BeAX$`-t&+>Dgl?A^H1YtIs_!d$4yEhMH65%}tWU7IZJzPw`K&SsuXMSIf_yhaf_$R)pg3}L2vR+v>+>x&V}mSa?lFLai01_o2=}R3+9q` z8GhKZpCM-HYu0zUx)GQH9a4 z`qe}J{~S$G@{pp%%J~P+hVeK=t%>D9N4r}uvu!Ei`Yp+CVd!rC6d;+X2A@g*;nQ=O zdM>juR>+V<$8Soap1KKUHX-vN?3OP``xEsAd#~zC)j!KALX5(Qt!3f;^s}PjdX&(%3wWdT+cLzh|Z?Tq5U1S z5muH-o-p;~(=k%n7(xl(2Q zBk^vSL+@^3*{yzlDhP5llCY6i|K90j`#bPtIU(&Y9}<&05VDPDnm{~F<|4crckt(1 z<)7Y@wr&4S5S>TihLhJcsYn#u@}9e@&&^2#f&CgpT5RRia!}PFWZ|Asfs6q$Xv2gr z{|;(w_H&2GPZ?91nK=FsLq5=6LKY5(D>%oBE)WOy?<{UXNbL&s>gJILr18>90b!bnhb# zsMbV{@?I1Y;P?4NFKlq`$!?pcX!Z&xx6+yb?nl`{c!J*}vMKaSu7a!PNRRB_n9OjM ztIdFFr(gm(k-2Ukzb^<$j<~<7^6{LxO{4DYlpiLxrzm4X_#Uc*Q%pD&$KoMS?C#wI zy;K}Bd_5o(aE(;BwSTJ!Hz9wuvooeuV-3abqL*M}OCs)%Ig4t?Ev>6VaoA zO`CN{GInT`c4q2z3dHfL82iVokHpy0@mgl6Ezz!$Y}=NJROqT;2+m?adi+?(SUeER|tUA!PGYkT{&8E&$~!@OsNz;9CgyNFfFAp_qj1&z+ND z$S2O#%he-X-d+w!_^_oUk^3h1O>-1d1Lj*<8}anRWL)^NLArELLfCq z-c{+DjMuPv!?taR!=5wnbPVtkj#~Cgc|8`kah?ny0m5u4%>NwY0r~>LDI8H855BG* zyjiMgNuRu<6J~ zOHk+-X!D_A!PDya1x@!hUe&9|@oAz)T_f337xMWm(Jt4iSrgZ;$U4{d2gKZMX_{#D zGvkv7S#frrTWuf?z|vuzeaCl`6~{h%)mWYPMJ&zvkO3@He4|{iY{+hjDalLDDF(Bj z4hu?vf3>m$gy89Z*)wXUn0p+E=ep~IgMa^ybz4C$xN_QQU#h|P%EnlR&%U9nOsXV9 z+wKkLe!21yWeSNj7ENBiyBFhL&|2vFW%pX)azU<8%9pyBg&7h&g;d=Q%XxOH4h z79iF3j(t4u)4%MIn$>a4d>_+Vic~ra_@{B5k(95S15ma<9pU0OOi+Fx8jWE2JQmCm zf9!uv)=eY$;>vrdKp2MS!g5`YY(nX|OXGG80kP4E4L{5HxK#|oxOtEY{mDfh_=_v- z*ZM!)w519q{YFFLoa;~d1rW^|DI)ito+N#=WbRmzIw`LfJtttVtsZMqS84!tQ4gNt7}+`PT%K2wfI?>q=}#x$3wA2< z_R5GfX%F6T@LmV1-y>ceuX`W=wJ0}(6#3WK#|JEdHiqsgf~R5=^)5WiocJ9@8;KZP zwVKomx$37wYy9*)xGj<3kB4&&-Kg=RXgW{OwQj*8e25mqRDTj*jKQr@gO#r*n{Zlo zag$i{&blnl@MQkjv9Ffb+Jk-Z<>*W7UTfImE5%%ZVKRn#<$iEPjVrb1YcU)3>SiL737u^WVQ^4Ze{Pe|rgZ z8`lM>G$p*|Kw>xeG;{@9Y5+F)nf&jW~95BF(nvgnhH@m$QW+R}9sV=zbr6G(Yq;HstRc52H8#c9)^KdyeqJjw|-!CW}7Q zZA=M>Q-2c&0^9xj!Xod7XRrTFd>J%Hk#(03R^tF|;t{UFMoBOMhH|QQ zMVt6#{_+7nVdUv<7~BU*IGy~kk5ZHC`%|;nq`eqO=gBddcv-QhO?&Qa?vX7n=Q!HO zNWac-TJNuW0yjUq$xugwRsJf~XDdz7>!_<)ol3aN#WZ9D{>>lgxj2%@W)vUU>|OWj z2+wu!~DqMr|G+k-s1gWAm}`@dhk zp4k=8T5`0^$o*UtT2Gj`7dt05kSXq-0#NBY+{%G;e%zWUyA3-yYYrb!X?RPN(2jN3 z6?L1fjXO~HCdzC}eREvSVNX#$ZT8=t(A*ojn)ofs3}Ta=A;ta)Yf|x=zHiNu%}lGV z`AEH$DM}Qq2C_rgmUV`B*Z1FuhVyOQH;G0eh}}U;E91o$@7sB1=2g#brm$H|0#@x= zI_>DG?T1UBzIz{E^G4|Dm{c9qpUbWe6iE@Nxp$7Xih#-HjFDdn*mL=2D?1?id@Q?6 z?Ml2bp*j5?O+|tFY=ejY$_3`@uB=z$%{Of0=_EfN zzogI$bVo8({m$Gm6%N>aI1UZ-x_7|q*(Myc|B{};nbjnA?%TarT4OfAR&h4b&oP0# zD!%87YLArOt*Wnalt*^Mgr@fHMY?26-e{>4?tG687Hk{kxZ7t$eOaMhYZX$+j`Ju^ zYOp>HxEUR6=IIwED`Lj{#p#by!0EZr{`*`*%b#91{J<8qnDK^$#f?4Ub)yx}C zz?=i}&B?8Jga2#V+^zvQwhgmga{-kM;Z$j`m4Ug<*(MZOm;m6Mys<9kT_-n0pWgW* z7ns;LpEV)0#@F`aRsn7ADN>6|`jV{OUzD2H2=f!RZ!rR4`({XBS8^f6e z!kJ0b;k$^%ra^>hgm_g@!CSCoi8>U{#)Js)~x!yZ$UwM`+ zNbHN5Gl&#H)1PsnU(SC2xZcnre(zCk`d^`+$T6u-JN2YJ*f%wc{duAL5(6dNceMAs zj77Bn-c3;2HuWNyoQdchY|GgX(9O6_f3r7WsG~|1G?fQ`u0<(2e-s$``^siguKZNW zvl({xBk?Wa#+{bjI^KF5=`VKYXLq!By*#nWbVm=h9IzYC{CLirP~etxVaO%dBgMOq zy8iDgXeDp{mvgQ4%KSOJXXp~r&N>@AY#XLIIU z1@{z4%-NmmiZyZPsCn(s2-C&kbT=Z;@zH65neMnP9IWcw`=Z6~ys1u;YwAD;b=2b!w9Z3Lnn-Zw@I&rabS_);?~(C3{hYZB|8eEN`nXQ=PQ% zzDB>t{QB#`@pJDzTG6fJc&96efBdFt#7`_${RedT&%S7R{+|=8smR1xysKgvxqQ}G4U;7(6}?%f z0WTSXW89qGh{P)`*=AJnP!90m2GJlf{>~!Z4$O;WgQrs~_%@qf=6DOM3aw zve8$0Z?2W`J-Qz?=aY8XPnB^@jE<8I-`4DS*c6wlCWK4aaWQKhaMnzO%a)XbLe#@( zwLkqE%nqIOdHY1VtxIGz3KUj%vT(=GfWzLKl}hGWiaL7Ntlm`J-t?Z32rZUQUqUc+ zrvk5Oe{+J2o|11(O3_dT>d}3scr7LaW?(pQ!<83#G0(z&Ib7Gg;vW`MyWHmgjU&N= zz&o$^8JR%18=H*p-T+X$f0NV+lx~LD;9~>UVm*8Bmn5zZn?VX+6Np)2@9gGaL2<>; z_oA-ccYsy>I|;t76_(TLmT^Tf#^5)q8M0g{<;Gk>e8&;7d+|;iGw!(@*4@U~`?-wN zp&sg-EJ${TQWX94pO>A&;<@TLChydj+RW9RdR^%I25g4n(mF#@J?xH)Lqdx^80xM{ zk9I_;az_vue(`aAl%Qy3*x&k$R{#*{mfIIJVk?IPW+zPtlI*873_BG)eU-y@v&5Uj z?@`E8`Ee)FDs;YScBhAiE^u}D$7C;s$M9F2{wquF>f7g{LaTWciYC}J1atj*$mnBk zi+y6NXsS$9)T@@;OUxxMG32-T>%H70#BA0Yi!!-^oebj~;SnFG;+Wjd zkge~pUx*TNgpGNcJ&+WDb-7;WPCJamt8$iU%?$Xv9z6rUmXfp4-a};l)JH)DYSY-b zlG?ugodFxi;|UoM01=KBR!i3Sb0P0$R6;bZw5MNUP#C<)fIF;rdQtg0ByOy~CPGKE z^4q;2nNE0>(kf5g577i3k?pE=AThchH&K4Z2;7FM%GhU8R4q0hl8UpzG;BKwW# z2FiJD9U1BxQ}nS6l$BraK4l0C@=~lWxd;~ybX6Cq8)30Dma|n3UE%x_w@SO))uqXK zbJ7-@HHtO3B5TUT$s?ZKJIYd(SMUX!3sSCb;hYccvncnT8n*PVX3$ws?=+Wf zv&U{sy&py&pXfUF(Y&ZGXE?EQ`Lk|pKg__(c|(}{(@B=PEll&aA#+m1OUYcXrBr@V zh1$l?g#qka+KOkOJJC#l-_1zJNDS{DjCsIqT;lBZcSYM9sP}Y4$@gos9{&95*Ov9x zBP7(nz^dnh#Fn(Kpv0EOCHkhqtHxN|3LtKX3TCpw!|qSlouv++w*X47fIZvqPh`&S z+96x&9EKgu+ZFOqrw08316eX@cMTlxGQ8ZDqq9gMq%dY{HX9WXhnFLnw~@?XbBX$iiq~f7=)E!cPYzeHukC>D zhu^acM|eV&s?Ox?mw1x*rtH+}gE-r6ItlnuLY9S(a%wa}cXWU=$F5(g-#AvhW>8EM zs_Xs)V_``&Hr;DUebM}xx%W!~xNAjFVz&P}R6yrSY2q7P~ zHpLDQ8~yQmh<-?c8i|gz9q^m>n>Mz7brdUA1v4+0ty#Qtq!D-5)G=P?ZAB%3HNNFt zPieJtSyEMx+3!)_#f#uqi$U>SECvjJZjBY_Wx$W|&`PgEz82zxu!BOz!6F4y&jowr zE#%evugfLs62HnmjWx#wPgF@Yl5H4`8>`y8NN6Z^(an9_TT`nz1F;aQ6r5cBbyaY$ zvev?gLw#n>yL2#P1NdIjtpJjbKUr|o)WFb*8Lq^cU%mzyCGj^BWwrN&bwa*FT1MRz zZ2p`_wVxk!$FD`pG&ce5R&}%AmHfUa{mR_%*8SC7A_WyobBI{-r?(o->kFi({)5xiRl5x6+aEK^{wIhs0;``Y7@45 zdFURpi)pBBTsPjc%>YW^%n98Z$T#cVx0z>y$u}^%vnV{GVej31&)#{`44^ZQC@9x^ zQs6(E;6Sk@A@GahCy$T!+Al*5`|t6Q40Zj0LX_|)typubxoT4$+e}s#gDx*A>cL63 zp6If}w+O8_Ea=F;mmL%7vwBM5tO9I_m@vY1xksvq|CQUo!=PkaAd zm$9;TvHiw!n49l3^4%Y%=c2s+LrYhFO6&nRj_G@H;b=@7x-;&^?>ibB3-wNl64o6r zgt~YMSt(60|Buz;AO0Q6r375HW>C3y$CBnpI;TrppwBn>MzDM?4ZRX*k8=bp-%}(J zpvpO)LjS6)3l6}S8Ec0GTV{3T~$Ueo;dWyFY1 z;0V>;D<6L{K7I1Q)?UxdHu$%qmQ|;|X&T4a6d4N;~?SyEX!v?gt%M z0TU6Ku(wyLt+e>F(a|M!!@i7<`+oF&>2&&dLht$icL8L6Dv1@GBib7HNl+o@ zVM5X|u646YYAxjKULT~&x+oMaPB);{3~#tTCcwf{6CUI0eF$C;x>oPRl_09Hch9)A z8;fSWIKcDKb)|>~J>QPX4feHXemLF*Y7d2n(s5|d%9z9_*vMr(uq+(@Rrz%HOt>zq z8NVj#e;WEYt>4}Kq4P9@weB$YLxpO~tJ$(UBx0o{B$c{Dc#V}Cp&5I&v;CTtj>WRV zm+=p@+-h64x45fok5bz`f8pi5N2z9+f~Ki|hudFB8-+Z9CI}}!+Q@7OkqXs*-Tm{s zwLgVjB8L={&NcQ88uR8rMtq|K8;rxK(zJ?V16C!re8;ga=9I)zBER)4w`tXH&N*+) zbGfrZe|?XUrXgNjK*b0gK3@EX`I8;Jy9yiPw&zmI=pil5<_CU|drxFgZ>>HoX2V24 z+2Z)GaLb|>_+`MO)gi@}E8k?+n_rEjmc*r2_;VQPyq&WFYAZQ^x?&R+bg1Rn`Y7jI zn3DB5k0OWlfGYSjgn~wN3H&*7Od!9i`BLdi@siS$VPzgJ9c%i@Qe$fISI%f{?tyew z#W4*!LmCG zBe#<$@;q9O4AW=S^Y3WaM7%hjeiLUT%gaugVc|ldVa{oG#mesnQr6;}5+3xAOSP_QS3j_Udsn>uVIMP>)QFmne%160el5_< zGTeLM?FE)BpH{u^Px_r_0js(@kZFIb50C5bQ1<4u?ri6tcKfZN!Cz!2<8>sYxtDSN zVN{V^-7>)x4>aN~_4&ezv*@+4lIrW%Bj#zOs75P3=J^dD3QmHkd+!c~Jt>f->F9-a zYkthDCl6$MZwO#UcBUqEtrvQ= zwL+8JGYW**t6oPArB^;Z4?38OggInBig}t|^pxC(s-7Uofr!du%#UjO8gIBW z1|X0r;uul&`Catu0NcQZrdmi&Ar^mDW`3WOc;-`>N!mG?O0KI2P{nA^Tv$0=*>So2I821 zJGBrut+AUYn=@`lMmvzV?>b4V6b#pS%TqRzso9o8&%VxeyCf|oa7XADI9()tRSUT$ zX57T{{aJ3(R?*?pv=krWki&*bx{?_ZE94Lkk%Kt2hMJbHYvI<+@e1}oQT5e;l(~nc93rl#v6tBIzGS++*&|u*6Ie;&+z-QRBA*z zBs)6t?4HJNWIQh96saeciM)OQAkc-em|NCKZxWaROPy@?HCE`{|& z7cRSVfpsKh57v##j+I5V1@wkE`}2uo?A87_>JjR@*ww zG3^G1#U9+(j!Jmh;{ynK&HTwI`;M?Z{5BmI3{I_|CZZ=2Lmi%}Hj{YO&I1}qsPt@A z4?9Ol_tFIHtbSX*Z&Lf|;AhoOyYK6jCMW&3)d_Ahm1KIrfqjQeNv{)G-X!zC#C+kSn5h!)r!js;*{k5?kb|ryhWH#u92Wz!l>Vf4=_>nfDOP7@TR|~O z5q+F2xxxZY--Z&*A5V{l`zD_Fm|A)$>QV(zX*cc2=nst$c@*&*KOXUwzpBRch(ex*7 z)BFjgWH39cakk+WFN{NryE9NI?oiw*R;&~+K3FO4?v&yK#i1~zXz{__y|}x(yUX|V-FtsI zIm!8l011#G@Z9&cerv5u{Rm3Rzf{xfj!D#aFd%Q%@_=&}xU}BOo!i>E668jGA4`L1 z8)YZ*s%0W4t_jyCkuLm+J`*?Puy0Me+-DAN7cbn1@| z;@{+Wsv_>QOU)aOp?f@539=MTY9AC;KKO&e51|souv@C{hH}`x!+>Rxd5W8)*&sw7 z%9}Y7?f8xzKr&!jFZGId^N*8MByFd@o8FgUW2t`?0}jigT;uQC59eAt&o<<8Z#(+f z7utH*8|{X^UV6DEmj&y0RW}+r9>wgvtq>ULh+bk)rBI(#{EO45j|)Xx8pNisfpQE^ zD||rT2+v%Ttds4%{AKEF;#co_KomtKs)VchG>Li=5}n?)f;~9koM!|)2C81rL?fh+ zPMW9{f=D>MuY^f)$aiQTb?{d*-ZY!^%)hj~SaTGpzoFNc7%{GD(G!6ZZ~i?*zToo< z85a=s`zDULUE+@(&C(lW$pNQjOC!OG$AV_vc3k|jKb7x#0F9&JV{nC!B?b5MB%Qk( zXLZN`bi+3`i_qwIj?1!+mAF}_$`658IST1S5!4Q!&1&-8UQ>#|Wr}#_#0qP4<9HM8 zTn{2VTuYlJk>HWNsWx(uXDq+ta`}iB6b6!%!IZWU|KksFFvy)!+Bn2H>#->-Z{Z7dJnN>-e-UH@)dA`?;O(CMh zWhcM8-OAT)l>R1G^Ip8ah0F9_DjmAeUYp8&!A;DOiT!YWq&d4nJi~}i%=G%<>{)N| zwt)Ra__9Z{`m-y0FrTQ>dp=vP3;}!l4tva6`{f|55Q(8& zUV=rfiNlpo?G^dvs@6@+E4{4VI@jgfGS-Gw+wj&@<^SGMoc)*eCzr^W5FBkg*^UkExVkn{#K-kMiD`c$*$-#0 zPSYYtS}!SiO9=2=B2K=6Qk#uCTihMi!E{xnRQMh4FW=Ge#YS%%N&1c@K%8$(Ix`^- z&xHjPZJJ134fFmy`*7epcDIcCA3neY2h@$oj|`&L+$Q|_heq5iUPmz021$p8e7SU& z6W7Y@qiR29A^|G{gm$!L^@HaexN*J(sX_T0sQf49!Az;_cCb3oNPPFzd;EJOX(RVExX&5SwjYUcA?pZ^flP|pQssm2iwBdJE*?$H_nHz#q&Ed4qkD z&QzFY`>z-BJ)Sls97T{u1QCm=w&rNBe5UT*-TBTps-*_%3j_9Nt|9itY98RKC}}Ud zCD>i}VH-PUVad8mU!}uP$1zOT(w~B-l$^q5_k5x@^OwmPC5!J{<94YU+)Btoy$oJgS-r*czWg68Wv|@nMQ{}j3+?xPfy(&Sox4ccg@a>6kZ0?6R~XfbZ;pMgedUVQ9!TXAePk`O0~B3nf6rfynNW3g{E+U-(=fb_t`l|s@%N29u&Mk zOl5^;)7CPWI$e$4YDG;j4niE)nfZZQk>KLs%R$rjz?R@K%o(`D@|v8=n3 z`EDimeFdM>a^d3Olb0>K{akT$KM_;wF(X_3oKF$@PK~(MstR?2kS==TZL6kA)z`?i zLUOB-@xT*uW@H)wi(bdnWwJe^`|#SspyEAgfx5QgMcnkR5Ps|!5vQ2ZGQw!Fr=lC` z3Vi5?!Gc#kZZ2y@~Jth~U)3%6~jCjYex)*NR)+mL13xsKVt z9pM{|;-d8UO4!Vki09Gm}Dmc%P$Y?AQP;%%Dbx$M2l54K`bxThKDG<9+=YeLx@i(@s^P ze4yn~aD8nDzv%rwuhlQT8^gyr6o2%>< zs?OLoA)+8dCVkl--xTmi)8h)CV@e;8u;E_J`sql=^&`^JyUqYB&1Fk0i?*Gi_Q3m* zz~??K)}aiS&hGQ*@e8-?8<#1Ms?z?^npGNa9!J?DlWx81Bvxa9^NCe#n~G6>zIdC~ zv5&3gZ%kC4`A=%xM&u(Cx%Li2kLvkf#HXj?9psq$`fmbP&2+UncI9(yDG|a$NG$6d zYG3>mG4X2~{!mL5nxo#L&+4NP0jzBBR$wA^{sDCTg-+|VQCHk(pcC%HD5 z2yxiB_P(XB=3jg-VT&MchJkh(q$9SB_woGJ)85Pq$CBJdo$MdwOQ(^Rr3{^I?u+0B zRgZ$Ozvhj}L7;81?U$1B;?47ZFyAp(JoeSFz(tL#<2mK*R^fZt`Pb<})Lh%+NgZ8o z#9f6ZUeF4XJ{SWjA(<$w0R?yCr;|v*1k5QahTC+{?MK*)nYU{jgnk{%en8QtjC`hS zCP8isUvYOetCPlX(IX+@`=}&hEYljzhbd_5dp_KO_eVq#M-~a0d-*Xh^va(Lf*SiP zsd-{Z4~VptT&38X*p0GeOl{zw)h z?Wt-hF%Q@25Mt&sV3*jfo&7oW(v_kjWn6=WG4Dk7+gj zVMiszAiPE5G+w)3*{=dJV4wVbcKat-IF|UWOvXCuBkm+?C5Y|t&^HH8s~a{l<~Me; zMDs9UdbRew<55S1d|y+TsB4%2__)P?0P=J+0c>2J4b_Q2*Rk=>hbP5D$j->DIAam| zAYSa>QNQ5?|IJmy{&xkROBu@K)}8IAe~K#|jWw7n?oL$}Ep(5grik?6SWjoNrSk`h zHG7jKN!m18+@*>dP}&PR;aIOBbUM9C@H)B|CpoEW#Zk<~rM^B-WVwxcgYy32qCuk{ zkJER*jNW`EplGJ7o);Q%o&if%yMOX_OXKVcU0uXE*6=3j=dZMMiCP&EH1a(Lj4vdu z7Doyf=B!3WW_bEYsc|JMtW7?5cwc91(Pm!iscfGVf41trLvfz3IqDgQs zg;ZzuTl5~>7RS6UVnTJ0#l=xBotEL?h>v5tz^kwz`!<^8#(&f0Q~N6QME@1c$F#Fg z&D4Vk)-+$Y-LBsUlI5Ede@+>i_@nA)vy>DJ4p-0udulK^t)>S&Mja?Ij5xNd+h64xP`6iQNFh|h2jmgwTKE()m*eBhEGCSZp4jH1Gt z`xcw7)@~_bahgZ_8atNBb9WS;%y-iHgU9Xb1d|hv@JBpa>0;l?d!*IAb>w^Z*pqT99z3ZSa#=PWHJ`iYx_9BLm1My6rI1_IyD;<<)P zAGaEY{y*Y|GBO6|#rAk$4R0MKR_ZZgygyOWcELu?crg=9p za<+9OQ!}rPmexPOzaiCr`1}xb_gaw+LytqJH@Pm&nY>f+H)PAK7{&Mhd z!kwPgQR4bu>Rb_O3E}KQZ~R8D?$g|(gb=Rup_5sd=0cIj*^iI4na}eSTRiB(?l#HO zPE*%TTWQTFp7(d}pE|Mm^Dog!`ySw@%pdEx>XIB~$QHm40{g10E$Yd3v0?@&{j3$J zq8mHPL!Vtw_2hJ!jwUQ$I)25rc$8*v3>|8g+8`HbW#`caKVP2AkY~2~3||pL{YB#T zH+j9XZ+-7InygPC#bTxVo#Oz38`+f61hKR}-F=;gvU)f2J+pf2URU(QJD*r*`n2d= z)W3u3J@o@0O?v>JeV6%c4`E29(Yo3gZm42i?L&A&4wtJOLvC|nanU1#BshAogs8vk zb4Nt&`=NoMSDvE<)VxYnf%vlpc3XnH{6-hPv}jF= ziT$+aG-*4{G+DguNY9D%@MJ2HbneeYZ`AH}l_|plma`r^U@xWarkN;^6mwhXWk87{ zq{nt zllfo>1iR`!3-cwm8*H}<{)4N|pol-^b|Hl$olQUV%Q#Ry_}oN+Z)q#Xe!#??w6*IZ?qr6 zf5Qc%J3(`eL%8G_ma$~pzDyM+-j$@bj)bmQ4zJG~|8&_MB@%MCIZ@GC63Gy9{~o67 zzuzO+pn&lvFJSibiZGrD8FX|68?g78p{cwZvcY5G>MGSYP3d6jk?CB7Q{n1L+vJaQ zN;4T(3X69n9?sxYVuGcS^$5iXxH$0p&Bl`cqR4mSK!yif4yReBrKaHYMp4ewbF?H+#IV{WB5g7!|;} z9XOj116smvftm3EhQy{pwxJYY?gfQGifMzTW<`%hX}21j|I}{N$RAj`OhH%8w(& z`JT*_rVf@6JT&Tis(}<8ndReYW=R0&CF+f&Kn>zjcFExR^^E~;><#0G^TjjmkgWMP z)LQqLwZpf~Mym49kH}e86GWd9&{(s{MZK;Ow-rKk?%ht7a~Y%>lF39DTu#Q)d-180 z;DV1)>{NT&c1o!=#l%#y8TnNxkk}zr_Fb7gI|0&+fF4>90Qr=qe-ok2Em(SvnE{Q;Zhqg6a)M}Cdz6{x_@+U>z z4dsGsO#s|&y)7zHv~i!3KRkL@_tie`NdOs`=X$*krbz(*ID`}mX9!qD%#eDL1Bu@&z^YcWJ} z0bI8fPqMqx(%n7Jh~%i8g0>OvGKLzC!5Az3%OBb-=Vl2y?YZLpjFqR#09x@4@y5ob zh}@kuQ-~vhzwq>w={ORWPdj}^%sXw*0nvEAUc@;$3TFKScLS0|5Heo-7fi_172pD# zrQ-nE6gqW#PdzUio$kogMSrI+aS4PyRH1-hRlW+l6#Wym@$Q5B$(XE>A|0}C*pN1Y zOE6MT>8D&WReq9x0IOe}`tLte=p#XC<3O93-``WSNfLORNb0*t;^B*`(#x%!UC~_a zR=)_b`jV~W5LP1nAEx?$ajVrA#Q&jILCgbnhHhrUNwd}WU%LMar%k$<%)GoT$o1ny zo@y_iK@^k5*WX4*LS~Dan#5-B39mS>vAxqK=NN%tfA%1FRi^181@ikya@if85!xa5 zJMHaPcp825kuNO0^q(ad0yd zaj*vRd^kmr@Q=6g4!%nhkg3`v8tKDVdV=@&YR{{&ACVDr25}xDWyQ5g!8=14m~s%R z@dTqvErXWM^uzX!{&nOVW@$xt6f*qKt+$x}OH~F!R5!#n7))VT|t5TpQm+Frn8HsvZ z!^0#EgbMa^%Z!*F1wJ*piOT2#t{J|i$Ky*#+aX#Qp_r4J$-{N!S;r+9mJd( zRF$IqG_X|evxg_(@uhXRZX#)vS$5_`)^3Xa9z`PhULl4zA3&5a^@oS!b>f-JPm~Lsohb9 zQnvS4ts2ce*w)|&yp*Is#7WOrIE~Cc(S?+UTj@aEr1i?H8k-gNNkwaDxmu?(DL9>4 zx|rvBs#4X^g{9eJ47AUzFarP-Kh5T` zuhicS2>4%$KvI_G#_D{u7gkd5`ctn*(iyM^Geo7qvOI^oBrxZh6*Ql{utU?$-^o*Q zzER8fCA!rZq~{g8g?GnK-hG)oAW-YOLhWy3wBHZ%a?@S+--S$AhN&q%e>1mu!L-(v z%!|5DN43i2_}s(`_5W!aR=uE1(ff3d8Dd_f#eCT4*vv&;<1|I4vS)A4F|)ux-S%HR zZT`x=$IojEKvN%$!`@o(eMOzQa<26;7t;b=oc|AlntU0Q(ntkoBVs7t%c4>zZO5y1 zet(m`CfZN;&U!kK+kC+BLJ9gz{!@~Oi)cJo8QUPJp;GbE1BOszzcNskebE(v#8ICx zLdN*#;dG1^U=!eihFf8uv-P_^vW6MX62tfYCbsL$&)fMc&#YmN>2w@CJtxA?{g1d` zEM}Z1`%KUPpjBZJlp(S^{aytFXV{L4izm; z4^0Q7)*o_x4HLEs+LyC-=bvy(qsv|XgG_06Mo^lhy*=dY-k2RYA#lkP;aqsCkL%N( zpQR=xT7aD@#Bk}9-|nB9=)OO=l27w%0#rCt@b(H0>_>+p|8-^XKM$gi!j1h>3ns8C zq(eTC&3Qcp=CD$EVjgBtTHyZ1g-6`s^rHg%Q>w_Tr9q>Xg39Aa%gzq9D^)1gu`Thsf;^r(7U;dNp*7TKf!k8o$6O*oP3f#okZCOczWSk?AzRBz)1-eMzdtaqcnp?CNM+_`d&> zMpcy^ukXD5?$mA-aNJiX2Fw}?gvvaL2>Sh;@<;O03fm%A9M9PPZZ4o&umiekXf3+n zu(o_qoAbMV7`QgMsUE?dIp-0)&K{IlLWgE4;Jy8^ui{^0yMR!nJ>l&?xruLT?MiOS zf*X2yxGXa^{QJ@84tVy$Q^*B9a)TjsE7J}bKxw=HfA67HY= z$-k7}wflOn-Qy0S&Y#Hre)<_cQ7{xh@=DRI z>dZWeJs-F zbWhu=%z#utp}`APCrY7SP%LE9&PEI~sHY$m9pfX=xLeu-%^@n2 zfHlv%&*xW%A0!q7IZ&>!e7KquhTmrjp8-o+2#fU;OKZ1I(Zm)_G<_D@{k%%B@w-rjt5qEU1$RUD-$^4ggZ4(0@0zepp3&(8W(U#2-c>+T zpOu~)s#!1%qEypFkm;3E$==?9y#=yWUGm(!l0Ks@6+oqu6JrkdhCO&p?|@nX>|(=8 zLpwawC-alcqX=b|V#>Wbs|kx;l1b&PR(n<>0oTXa6eY*yTs_s*aBDlj3%OzddK&uzwh?Q` zXaiWT<^qp1qWP#t<)-vK(H%Y8`{<{ebZH-EH8K>Urd47f3iv$|-4R=wc$nIi@ zEJnYYMC#^D!`m&e-$}DZkBEtSOB5vytpaPqVtL%*T~1h-~CK$kq%{*Hj_uA?>q%`O}j*4IZW;?k))%Lk@kL@f#86p%>)5db$=& zXZX4*oYiGkK8&29)J6Fa_VjR#g5Xh3jgW;l$OYG|)8ug%1gy)5gJ%v@mZ@UP1 zo(nXF0ESAb=|SCGYlItYNgVnTg1v7}=6{`k(RSZRv_;RddwC8XJO2MOSRW{79F@rIC6v@dvkFd&WP zI^D_(pqA1&SipTmRNJKu1NFFKPeVbWD#Bu{pnwaBzj&SB6Im2FPnyEU38JXide6?} zd5ePspIR87vSeiTFI@xetJwj9=_lh?*Gl0dG1d7wX6#U-d<~@?TQ3GS7M*qQrKQ;HtKzfNP zR1BHN;xVtNl}5T)Ig_CmQh6U~dVjhmT}LmHKEg2Q$7>WN1=k8mhPWIch=KBz()xbC z+nYMVpZXVv8ogl3vsc;;B>$AUuVN`dDEl_R`%8<%aTIt}( zXprBzw7fZPHJPiBX?RLza2gl;F9-GQjCEd)d7}SZe`@4|ePw{TQIY@Wjd)em>Br!_ z^84T^PzrMKPVhUb;=6!ol;o9>lAZ7?kvokM^|ISO9K}c#Zg3hAbQ783AgwJqNi^y{ z_y^6j*o<#XrS)2>{&DL)2F&}6_SC01=-C5@RHYyjLM7gg&9Q0sLA<%)B|*-8ShGko zzgT2+eb&Bj^cR)=@ow)xkyRg#H{RcOpp~!N+Mkt9$R^dYp`TX@^lPPSFu_X>e4}#T z%@v8-{jAgmn#43#U>o!o5E>1XYCXmp^2%i0hi}p8ZN%KEehjLaA*nq1Y1=cFVU*a( zWuw2#Yg#YgZ?6*UzEQ@g;@l}{W^6J0Q_+W`r#<)@xcI+i4r~z{cQto=b*QyN z%ia~gj7fV_+bw$R+l;$5US}8|TpxLqQmPg|b;@G$0F@LG!152^k#4lCUmr)~mYEZI z4*_J!$itW&2Fn3)f7x5O5@uFK33llRLG|adc=|T?t$~7^fn;H~XH4-d7Bvf1%S}Ob z=*U}+x5KanCH+|J4#*wQFXO1Ql9h#+WoUq;wHH#kGc2iW)~q&S-wQeXF3>lfOKdT6 zhbSBMx|6Z@X=^f^_BKm6G?T<Z{h{ zkLGou@o@CNk)a)w`95QjmVe~p8E~T+b^I3@5}6XGsJ0(DmK?qRp49M^wnP_84{zgk zk_AbL=}lJi$L*Eg8X$}#wG(JK_C@~|nSg@O6n6OqPZVm&Egx%f< zl*G$IqYt#D0ZTQnxlz3rCbM?QAB7Vo-=Ed|)s_+x)EFeJ!G(tyFt?{Awtu_=Fj@QC ziMwxcJ;+%;3!IsshBLs$rJ+pKuQ4<72?73U!*r-aJQYF?6OPA^rlx}6zSs1DGabn+ zKMH^xQLO88PxL_q?_33P$B20C&doJw7Cn+^{62_cGE=lqZ?KA~wPQ$+A4mnsKm82Q z>3>qs;{RqIA>hs~5HKMfup8J1?t_c%q89SANeObC64!OwU9PM}8m9HVN-+y>zeumL z%2m$Ww3Wg5%6T}H&QHT?QH6mj4P6=G8GZ{wn-@q_k%2A(XKRf!I?)TE}TkZiL z&6X?XvQ(9d7c0LU5%a6|&DDjp**x9$OwMlRGJhV2yhM?hD8XLi&2DEi%F$9SY6Jm0 z^VIjYnvLFXR{PRd+9OO|ui5s~`#89`yRhOmN1^?2?nhK4=B|n_b&3H2`kt?brbT=;7)V?yK(m%>ej-_H0Q( zm1>S(uLjPg*UleB#5Yu8A=WSAQW}F4+#D;&$No_S9f|W~s-y_qoEro@>6^t#_T;W2 zqc1v*AD75^LZ5DEM2nKx^}|-OzC2G}i~q){jAiQIF1q%3ObbT1l4<2nQsGj-PLUP!t0pEkfx(^gL z^=-m#L{&wgU}IsI?xDu@&sTToI}X`vbj8&`_35GX;libgpYe;Bl^@{)VmCHkiuTD; zD|Y91%<(>kfV0>&+Pky?AT;eTOu;Y*^@CjQ<6{7M0$QscH)VhD)>|2BK3F#ow$`N> zC$CGuo2OGXQ|cOfo3DJAPVPwCG}IYe`PUC;`Pa8%q>SAYpz~eFgJI?9meSepn*d5E zn*qr@m3SO^R3C15h_+8X+vK|Y`BmLRr(TNBz|qO7XMSHqUrKgbubN<){L>dGY&atj zIG9#B>uMTu%byBzA218NPpQJG(muBRafdj0L_I63DLr6#LbSq4mBl=VE%*kZuA-)z z9exsGPI(hT;(lp5N68&f5h5gK5IQ@2-qE0P&bWf7ODaziXdvG7LhT#&5#2v>74g2U z!tqFGcsffZ+x^I{@vU+i#{CuO7Y-BKFeYhEUB3I+KI%VX6$a3)tIf*&2@gmN#v3+i z7V>g;P5)t74$@>n4%Lyt2oQdv6>j+P02>;4t>Arg68ILKi1GLj$A_c}p49R>=be$? z=`H<--1CqsQ*qb*$)=wGFK1BnhF;?*=Fv-|BDt_9R9{r z{1xwX`D3co08tjJ)<2AJqRHzD@0^e^&+HK&zhD>;@xPB{f0uSRl>qL2AkD*MM^7L| z6SlUIT{O5xE0?9VLDb;5J~Xwa_--S!n>@NUS8*E=e&{ktjmmW%81Ocu(r|oMPGH21IHDhD@cW}bK`W&BlzZ&8srBar*IBz>^@7lFfG)X z_+}oCu4RvstT6q2mB_3W5RTu05kF3!O0TSUb~~IQEb-lK0I2Q>XSf`5pJE2;lvCUQ zBoT|WqI=olZ@(gSt?!mZR|(5939EjU0~u5IOOGjC8~GXm^p*7B{1>-T_EKFz8_di= z1hcKQ2Z6K6F%+kO_Cr&~&$BHS>ul8Q~6`=#X~ z+1Jx~W_~yrU9o8|dn{M>Q+*N2Fdj~VetKk}K6s_godNJuQJVyX#Nt95ZD@z;VA*T; z;xk>F>`;J3o*No75G$(xCP>JK@;)tfWF=7r4o%@)IsGJd9?N6ZArOxHv!4cAo=vY>Yu0^swEYwgUKxzj+#?_0;*(hzoXsnC`^+Ac z#V8)Q*P)HqH%T>ln2cn1w31KkBT+lO|EqRc+A4;Y{Mg2fj!62g*->%GVa9&B-_qyV zN6V1=8W%Qh%>9xFQf~cydHfm80ubt->D{ogTkhh02X$(SDhP5zcXPnY$xV zS6uxU)uAwZAPffLLo-48n1W8*LDS;TIeoImSGujL6HzNb!BOW8CYiL#<(sySviLaC z$Dj<-#l#jRIt-(1Vusg9Bv_cQv@WLznAD{nVlt>Dn~;hH78CA3mxn}MWqVzT+?3HE zOLJt`BP^?l+#Rn`6w!H%5-bhW`Nv@d(O#mp`9BRDA4Om>Nz4XBGerKquF2S}kqdIF zqH(+(PaSL>)zA7R z(6{`{Gw3l*h3uW}zj}+c8E*T;mP;KMGi^Thc(dRz|(&VbIO;P%eY3uQjp5ndU326mAtI~4-+x3K@O7|l=Q zrgh+$B-Tp*2%8?O2(Emh)@?VUu^adR^=)fvpoG7{#tF`_e&Ux#VWV~fAv_Xsd^Q%q zs=|PAH%9Sngo;Hx1%Y}e1NBkGd>oAad7ZdKzfKjR=Q>I%W3+B`Z11Tv@NN|x5mDe3 zI`k@`YZ(NOb`!rgJr{Ky%&Bg)uf*=lBrF#!$R=KUtif8c~0W-0XTEntZhI zybgM6*y4q`^;i2;zWy%}M}WSdxh(FVPq#aG2)0F9MO(OcrL#12v+&ef;9{4PyBhmi zu2}j4Z;sh=lWY^;=H{lg!9UrH&x0z5@&U7&Yt|SSARxH{tdd3tY)W(CI**0gMFSD) z6mL&)|M`|IpL9-Ah{e&^40H5zERg}l?GE!GmtjN+a$%{$IhJOc6!24xnSp8;CWnph z{o8N0oC~4BjxNxc%eFlfbPK!hPXf^y&xQ48el>yynrJRXE z*sc;DIY`bR&Tu86|f_Wm|G!upDewpn= z%<&TyFq2uxoNi<`qyEx704e_s-@`}z;*92;JIV_aPW8REDr^6UgFAy^G2m@czb1uNw$Rp(#n&*Rd4h`G(0S^XnJ+4 z;#|}hLqhhWu(XXWBvBi?-RFv&H(&w{Oe#V>Yw9Fw;IBr7dg>WQqcKMtT}BZI7DpyY zs`?v`+D^oFXpG)a>p;`cE;EXZo{7+PPxC8LUycWbyGKMm3WFdlu)_Ai2#58UqCgQ+ zlC*bt$R#x2YTVv2TIa~4PWXk*q~n&$;pkpdrU7_ze@JJO36+vTSmaT2%X^zAmN|-qZww~hsc}H_H+#BQRy$>ZP>u7> zU`Tdf$3cW2?DWb0<1QzlK1VKu@i7|pTt&ItKO?>nFvYdcLVL%;m++&`eXHOR#v^y( z#*L_{Gv>0y^3;N-`Dwm9k>;6rh@?!>LaUaU$6-HFyRm zcMh1!#bL(wXNdc=QwX^PBr9$`FsQ;ej}<|LFerl#Jkh2t&bzX`R9?x&Hj_a6unoR# z=(l%lD(qT3pUcS9v-*brs2iKDdG#5gYn_a;5}h*M)caDZcANF1oU{oH+ zqu$=IxqWDBB=+u4BsnO>HAT z!r6LHG$IV#yS^%5;V(p1l8iY8Xh>QK>9pP|1)oat=)SH6c{<>y_H_dSR1X9y_H8t< zOXA?(t7N%>n()Ix1@P1f)aHOnu9vl-wBEkDy-@n}`X#D6nvCBe8--;C`+-~ZM(0Eezo7n&c?ElQRMm2z~$57gid_0oRgyMZ0 zPYI@3KN(!B3f0I=$(2agQnNdD@0Pj8%{66L_saC%xaI1>iND=48u@hD#*-q&%I_zN zrJFA2km<7qH#trl=7&)HLXHwaj|Y#9Jbw^1b)lh$!wXFm1@6EBu8>Oz!lKXVsH6_3 zKNiIDYHzL*lta)~+qLXP)?C@_7kNNY59}c61`w_B^q|!z|FCZ-8otsfR@;8#iF{6v z-$#nxhvT>N0~{66G}CgcGW%bSgfF~}Pi>)9DfRqgXAOLy*?3P_>vP0tj`|Ky4#uCtj9Q`cmYn2It5zt#+u z8q-Sol1@L$)gRqPEMTExOzw}EiC8rHm$E@o~Oe(DteBYwF@{JR{PjXeglQ&8fqkuT*WrY^zhMj{D8 z<1Xc<_Fw0%2D>GpQA%BMo!MsZ+RYaADk#f2onV3e*>4%TYYh5i%C|j}dwuj9>plci zmxvVbU_j_3K590R7BCx#4b)U~)Q53X+#{K`au(R%l%jP#^izb@;sJS$y9}^C?;ps2 zm>_q6tScS1i%ZStJ^WZvf^qIFErjcTe983-g6BoUg8+^`WXN@wde(6Hqg$h7C3-gA zyv3p=>HpYwx`xPC71M+#6WLK^)ykZr{fa=ZTGpV>Zv)DBKsiuX#;b zEzOG!YN>i?yz2>OIh=|1x~{UnLsqX2_?@uQTja1$G~qf@1>EQyzH)j6&Q@DOEFOv2 zKjQTE=}70f{J;%AZg$>cN3vgO-n#QWHNDtVm&4fc8>sa@@%yrHP3#eQQ9__`&GxyN zb1;Pu8Qzj>(<7smMXMwfD95;X^i+a_k%j7LdfK$CcN_m7s>rTvjTH+-puT?a0tn-2 z%bUH!_r5xQfYwcX-uO9_VcdE9k1D|Tm!c(L(AO9&bJ#Bh{`N=D-o&hWF(YDnIFdMC z)RQ61ZIHCP#QrQ(8M7b7`u#0T*8M+_%iTGKaithF3t4@%vw{#9-@HeJU6ts=2-pop zIF{WGegY7q8|QBVb>8g#3V)EBF_8cWswhcZhNI5f{MHYeb7I3y&58b*>3 z@Fp!nn46p%AzTfm3gW9|3Y2=b5<<1bYpl@coEiD+ zuaBoNm&@JZ@hn;Vfp*W=sn}t<(t9t?X zDcQtHYIR6FHtW;AcY;c+TryecY-PSdAq1Ez;9-YPA)7^ic^cVu3OK#wtqN0v-wF!( z7mSR^^z8AxDPo&eee(G!g4@27%szmpaRrIKm@r0KnApb2mhgRSYxn($n_x3Q3bsA} z4--QJ$~xt~OJdkRxgVTrvKT+alr)L{FDkIqQ115i+J;aN?SFbuK+w3}8i`{Rfv$Ib zs)%ACaCK;Jb)Fr^1QGU-p6n6X4yOoP=NUvO`+v6H3q9EF|KVo~nGkFYIC)=H{|$BU zWMd0a`Sd|*xvA!z0M#FVC!pGrBshI1cjY7VL-<7v5M(r=v{zL~WAjBrXp5)&K)3Y! zSqP24PgEwLVa@~p!gK+lwIc_2IWc@pJ$8?^B@SXX9wp z2Zw+4YgEN&!25Ra2T%Q8M)y)H#Ler#&}oty4(m{y&9*YoyLOlawfw09bR&kHi^Y7g zyVGs<#W2Ru?uKRme zY$kcbXB(hQKC{7A@(cio`67!F;2%v-mi0Ao(CF{h9O?3rt!%dJ7ttwr7=E7JBb(&< zyp3Aj*vDh3(iyd4Vn}XYCU2k{pF&9wvOIqxgo4UKl{c)*%aSt}@o|T|`yX!pfg3e~KShcvo>M8+) ziL!wy)~`^zf`ncU>o4L`vG|E*=_=26y(qXRH6`xhy(7!$1xE=C z<>+gPBz_Kr%hI>dU0Zs%!NG;S3N&R{J6=NFwOc~1_*HZjAz)nb99f7?cb{F%38Ui-fV8 zd?Gz&)60n{g}5UD)vx@d5z0@wO3@cGUy+u3d%2!K`n@Z-wf0Nzle<77d1OHN1D|?c zJXO4bR@P0$$LI;$AA70IUHlzOv6}gpsGJWOWc(lV;`vS%Tb#*D4Vn?kR+{(*z#c28 zk{?L9Ekf+ab7Zz#b{y6okv$%}c&K$?AOGVe>$W4&{%Up_J+1bGI-E|II;H1}f|aL~ zC*_J4{4C-`Eq|xFZ~B_<&JThynE3lsmY4$i6PolhN5BFP)I_=I=(dHuTD!uw)y z=c5#n?n=&UgS7cgfkVH=N8$YO@{|T4lA=mgtXO-(f~}qe{F5?-eOLFX`|(KFC1?;0 z!E$Y|o`)0BA4)!S@-^1iqwf614QNs8!x0V%5AKhrr!Lw?Ht>A=AqgpPUiXbZvP6SKSgR|hC{l(pNnk`WApCa> z>W_(=M@)`#F`|JVdMu{4m+zn4z zU$+n^?%>1w%SBV==a+VR<5Ac6gO*=_J`)r`58b}&35gCN=kJe?b!(TdY6vp@Simn1yy8cOpRQotHc#{}Ma@?~2!A85j^H1koWOC| z%hYLl^eCnB{^wbyRvY3S_z|~IO!$|m^Um3Fj%`m~BGwJ4C%Y74#T+pd`7YV+dnQfH z=3x~KNNY@3q!PMP8QJX8HkN_DQVfvB+B9D6hJiPNKasl(vV`s?kMfKerW8i(aM3DN zjqdwOFeXm3d*^s83HbXB{(tPfXH-;Ovo6}?R#GF9LxZTGWRWBdG&DgaiGn1_C|QD} zCTEb0WXTdmvIs~W32GJn5yr`;d!KW@Jvmyf_@3U6q63N+Z$o)C%; zhO;#1&wqsLEjd`;;2}SRLthkXSCgb8WYWxKynf2cmeJs(E86B*MV{{if&aB^tKl-{ zgSMZY900PZdKL(b#p`h)bTs$)JkAT!<&Z4GERn(n%lXJ4lkaEc_u!kuTR{iwgn*kc zlwdJJMt3*`qr2<4Cg7C~tkyZZ6kx}LS!+07F=s@s(*B&p=+1s`)cS6fjb zQ^R+^y}#`iou^&FoFts#daVBul`1{5u2aS{cRHIZ?(tU#GSndq?fUIWEOqi$=kpQZ z1Pi5YZmhMnh2k8XTAbS)D*Zj^hqvKN?`O(8AVOSs?KkWQ#So?+K2rZ4jRN|F7|8g1 z$Pi$GTV!?e6WJoS9rTVi=^@n?yjVpra%@|pHexj|Fo`g+&l!P>jLBm;WgaEz4;nJ_ zG;Bv7JoQ8E&UAmGJSkHNAs1JP$pC}LNey_|-u5+M$yrj=8UI~-{Ee4Px$F)}Lc0(z zr;#G<540+Z)3>m#6%}J>vDS7V5n&D41jjL+cETvUcD=NrBfl zHLfPYgPPrXN+4?#Z>f%*TQzZ*)>!_6_||=Swp!hjv4*&2?*swuCr?u!WOyqeM|v2F z4NhOiDI5t8`(%{jZ`$HrwLR)R*>BrZSm+=U*625A`GQgE(2|G`(F)~F|mf`oCBlsiS-QJwoN_7mgEk|@l`>bgzok`?wc^8fDlYq(yJSi%fFb`8=RX@y{l7B$Aq*&{EYG(yZ@ zhSfPA((2SYeT3?6-+` zN;E~r4oMM$CPUxGriOyfuD4eg4r5>=q%;c%UByGR9 zd0A%(4!rp3ChO;>QEwn?W*o^3dyZ%D&VcJ;yZZSmK1RH;=jcd(>0x*uG!2?j;r1n`Y7{XCW$pShh~GoU@z4-4h`eTMyFMN;m}DevVCthi=7jY6*aK=Y-*- zXo_zUL$;oI(1|W_$0ui?lB3J~^6zghSE%cIy44)c+0I2hKkUZ{c+5%qiOual)v7CH z?4yGM`lz)UjWpTO&3Yc=Y5FRfkwBf%F}%4{Cr5|nV~j?5#;`l5)?**B+RaVGc5GJ9 z@ks}{-JB+wPOV!;y!fB2`xfPMx{@=%1Ov(T{`&zhx!9G=p>LmDoMd&Wef_pmvhh|f zw!wCi`rV*=&+5u=%+`%l`UWZmqVyixABw{q#pp?-ag^2?%*cy7>q1l|8O7y4w$C5N zsuZGBewi6V_v3WuRX4Z1hYM2P%3&}yb!)A_$#__B^T@yJ z&YBmp7P^+=C7b*8M$oMf&1P3ugj+?Hf&!#kTgM*h7iIf}L+&ZA;^muNXHjP+*SXMe zBF}aA#=?S2{X^1k9_kA|-303y{Nn2SO;^hpplOctLe#6fd1XMDv$7ZZ$>^#qO&z;Q zOpL#^ZgpM8qdUIQmw6bT^4WB-(9_)b8zjHda6-W4elmKpFZ_mGJ1FlJBi$7c&y;9~ zY)E>5$WK3ol{<}7K%cn!7Jhs1n#wo~Wg~d$|HE8wCq>lOR`qn zL6RtQ!$u@aTl%vF{3;Eg2hEc+iU;ao_rynHowG;cf>x1}2epV;PDZ;<@-5ve`Hxp^ z)ApXvlppt1(RWB-@dg`U^XidM^Dwd9iT@yphjU5&f|uY09HNXX-RS$}E_q-Ojk3z^ zkMFSV-Jp*f(B*gG7ZFKxG~N-YnOksnt_Iq}+K+5}PFbg=6=Ga&d|Rk$!ROq{n6vK+;kzbCHqPziTVeoy|%p6 zF%t{NDx16hO!0xPs&4Gh$vO?=aF~yS&6K;Cp#! z+-%_#_|`|AUA)E2U8s6T_%5S%vY7kj8msdKf0x=V$%YKEar5$l*U*7|6&$+e&xcLV zYVV}^Q|>}Gh&J!1tal9Ew~9n31SGZUU9?!#A`YdI_Ta}nDZn{~%wshiZJL+po{aEG zsGWQ5)=_o$VP0w+XZoI!sOktc$uA3~MiSOn3sYR_lavJ2UDmt#77LQ&Rc4>gH}v#} zxhId9#2pz=+C;s{Eou_>1Dp1vZanp#i)2Gv93`%y!ZSnfx_Qa4Dx(k7KXaJ^s z>TAoTqp}Q_Mlwqqk+d_`OhPhvkjEUP93TVkv4d`oQRKGP*jih zo+{E9cxN}#drpt+xY~BH8b~^x-dnFHV-Xc~{wdn*e}6{pxL#~gtk5XDyr>ra$nAVf zl2&;A8R;`8+XJ?B!hlX4MaRNgS*B42ysZqz|W_*yHY0jgBzxq;n92PWK2 z2ikqI>eBPmZZuuiX|hXgv0}#KZ3a7U+5x>4^5;)G20G6&^NHb+w^$xpJ&ed1y(S-G zIzivZj6kto6TUH0CgD+7xabjXUVh$erPb-{(;KW8L2}&bbQ)r%kN9wgMJJ_rqlM?& z->WEVY%9;~)SccjV|k)$Z-Z>E4vB)DchS~gO{Wj;SfFf{zj16>k%}FFYlW65%sNv! z&JQw#qm!cy6@Vs-KsRdV_x*P-yQjpx#@QJS#EnQxhB3ou&ppHhHq$*B4O-X>>o+cF z_IB}T#O9w`n`_t^R|z2m)q-nUgT)yY`Q zcgO6T6G?6G$|u^Rb^6YWJIUg|8d2k7wKKid{=LYRBjTT-^;dfY$-^0fQ#R+b4frLs z=jc^JSpA~WQg4)@Oh2zIlm>mlpW$lY*5H}AI)Hjh9;l@w*}@XHWKHi<1;Z30%G!&6 zl`N#B_Zco`A!qlhEjo_Fes9%1q_Ths`eFp`>UKDN+c{-ZziPWPm0F>!Yk0SR#=AS< zQEDx02{~>#qRmp}x61NL$32vB{jv@ z%h%Wbc(_T|{D~ZEz>^%mPubC$wy1T5q7=4wzqNl};Z2H_Sg3DmS?co&zoQ!SrHQB8 z0UL-^)I7~hVF07zO45fq-tr}LImc;%_=o0s7_soppDWXL#8#R^hGz$YYYX8kr`^zw zeHqw--;wVXCh$@|=@^0ZDK+BmrzKe_G6 z%h=oK&66C{4kdBtXyYA0ZqgD)fS`rJ*4fruWO6hTG{pE!p!I09(L+v7el+ZC6ijNB zn;oDIy*KIHrmcfta{ceq0w7P&6{PJvMlqba%K0roJ^c2UG)4nUpyq~!IvCg%!h3A@ zNUZw!#QHqz9j3vDb?HDDo4ECZb z0TX4HovFwaYW^gLahogxCJkz8eFVq=^kL^Y#45_zf|zm zwby<`=b7b1Sr*X$N+n}xWX37Ve-f$Gery3GM$;*TrC%IPB=(Pa-XB#~)=^A#J;D>^ zb|U7JXGO=QwF2NH?U`$0;}iSJPfX|1zp? z1{RE{4=2|J^2*IodH-Fv#e-?(pT{|=Zi<4F)`T&9(|wFNDbA0kJ{&^?)Jc0D*zO?r z#WH0X-I9j$HR$>aJao|7?F)RF*qjME$xlMJ>&|t$xKnb7nd^x=)QEYZ$U8dXB_le? z@&tC8IDGi-&b#W*qzL2Bj%s(UDmmt-mE&RqBA{Q=hu`UOrk-@0Ea^e>ju%yH!F7q>)Hso5CaD}97k0m<0dCz<8R}%zg{ScC{Junls0{}8!tujT~xp4yf|qx7=KxIFLMuY z_X)={5CmpZ&MszSr4(=98TZ$lUorFMUPN74omstlx-@E2zg0ewyRw$E)|=4ylTYY5hT>h+?qE3vs_tUKh2^{Z%)u|t|d--&|QR4u}#D}mBH=> z)wttM?N6h6E+XLx#d<@3xEyaQ0LwH~v)rsD)d-bMbktj&a=HkOEg}_*m(}G`hmij?Ydzln z5IgDOxI7md-EbYyG6$G=^Um# z#K%7rk!=^CHlS3m7hcRChJO0}zJlqM;(7n& z`L-OxlTH2hBI=^6qm7-i#g#0KbWlr~?b5@x9Xb=3fSt$eQSl>4}1vrywlyRXgmD9`9FLQikj5HNyBJqa3cX0dkR`N)PP=GRFc z8iKwoBrqPv$>5F%g0`i(G@q(|JgPT~t?{Uo#1jtA%Q=_%B8b0T{Ri$!&niaScv%|% zW3{7l0Q#i2(<8Si}3*B6F!1)j}s0Km1L(NSM zoTL;%CA);TPZpjBS=QPmJt4p#q9bDv!pKl<9nRCYB4LA)aAKobW`Q$zN4kRdS?hUE z!Y@eH1jl}24hi_*a92{b-abPC&#J6(zo0`$oiqUGP+2^^bRV}JXAuhxLcYLlAehD3 z^?HtTu*7r1R1hKB9JGbxypsFA*@&Qb(t}kV--R;Nx zCw~wysni67Q%fhTCA<@k+03%v zDT&iN-|KA(?!~Na!epKj`Ft0jl~-CqM;9EQ z?a8#Ia37xDVP^>_1yrzlc$4#+$}`|#qtNB}_fteghd>hiLMTKyx>5Itjf*SmOF`mV z4wP!3Vum9VILGmcm^-fyuTB#~QTq;N-%3ZH-S8Lt)P3DT+d9|eBBq_54i`lo^5>}b zFP;4bn|;;TJA5xs-0&9(H$*z>7`=}4FK(Xsd^p2dyn)6=dc)?B+%+rOfIy6g1C(!G z>uRRExH4jUceAoOe3za~JMR;JzS@(eB^GwYeC%UPb(^TMC!G4Q!oaK>9?}ue5Tm9e z5vT1?GFGhdeL6jW(RnmP)&)Hl==9mE-u8^cq^wC=cntOFL4=PB6m+DuqOB2YEPON-ytx^6z?p19SM)t)#DZeSWtAo<;~ zq|{^l?$tdPs{#&ghwK+x4^BfhD~U#e$E=gZ?Za{YHaYNIuoE(n_)v1u9D@636e5t5 z_MpCNIaENJjxLj=^1<8;Gg9dFyz*EPqhT?@UHnTXcMgc)# z1TOL83oMDccZgjqG6^c`jxR+HE~~9Zwsc8}Q)@-G5o+3Q1+v2Hrj!fZ_H8r3`Qq{- zwixvfOm_uA^&KCj;^FU=2p_^lIWrzG`>3X8f~W~M?T;b(NrL?G)R z?L+QqsUcWX74ny`d;9NqO@|h7bIy2lh1D1aA+@@1upz?7Qm64V1q&K&e*V+5}; z+Rt8|x<$mwdB5cbugC5nbZ5ixXHDPz%`@C-C53@I-m z{y6s@;|ZWHTUi$SIKyUFdKZ~l_yTOvbaV|dam56kv`%SdAzv~@RdEt-2}CrKiDA>M zZ8mbIidOC^>y*YUWy9W`z0h?2obzYS5vk!c(=s{Oy0gJ6bcowbq#?gR}EBfj>vZ2`76r*;SQ+qru`(P5>!gloY#==+3zYbBRZlky#*B{0&PQTx^B{fq_mrkFzH`bq@vc) zyw87*%Ozo!H zQ{|*!k9S765Yq-a;f=+BU3u$x>WbZ$Gu?1n$*-@?mED>cbyOm-3|m>KFG1G|VV%IM z_INCyur*Rl zG)rt%If7&p{-VkecfopM&p(Ap@p-~=m?Dz3+GR=#i|kcmDU{z*xTH5nmH|poptP{N zGk<@&H7=OfLOOxEX&4om{0g2RZ12A@A5%{2zj5dT+s4KPSohO6xHYim`|n?UDqp6d znYXWdpP_)fp&CuaYN^%7^H0_H51I~#F-5#?U^JL=m^4F$^u8S-qWARKgfKhBkM12UjLh@SHORU7^oR<4*dn zNb3#YdS)gjyWy%^WhuH$n9{hQK(a|^*}MTed2)I7V7irfJ?gV-n7eGfp?d=c^iOuS z;skIrf#lR%<{ZJzfjFcg1Ql>NqrrXBWYSehfOYkkeiTJJ%QAF_M8TII zgD-h+U|o1!HegJGa1QRR%JouB&4ZAhFgPO<>3!^8D)QI5^6Wr_&A?E*>#Co_g_Vqe zk(%`VgiAZZVo85Q+cAT@I#pg|r<8EXXz7LKXFXKx)eqO1st+HjD5&~Mv&07k390(? zmbx@ZBUx2v>FYButx&y?kYIdkY^tR?D+<*UGgEhCim&su zJ5Yj&>2_)A;JY;a{@uw>YI$9O=l-3&(eTprA`ZB zpXwXg1nq{Rz4oHsojq5d!{_-U!>r;f6y=YWxBD&WgnTjl(U`X{@U4~=S$TaGDy+&L zOtjLW9`=e;r^4_X!Vf#b8|W5@7l={8PCCA|ga?K&C>;7!+YMM^!%lBnq1TXuyD~*Y zMacoRaG9Sz-RkV={L-CR@dAm|a8)7s0mA;bPV-$F?0H@RmMGn6V2??Q)T08uk?>h8@Yht+s+V2p)ICuI18LkwSsxp_2WS|PgfzGms6xzW?5h0}4 zS9?G*)ngK11$#mpNIMCRAQ;e}o6VXun_Zj}Ea3KqSaHB?fYE$FKGEfuFsEdnIcK+m z#09Z`{5nb5G(Z6poy7yvJ^Jd^Q+HztcT>$7 z|N5#~f+d6t82GAYf9M-VgC!9pddWr65@QnR#PAp!5M&Kbo-j{R721aIKL!!x zng+hMk+V24_^W@}$p6|<+h6|2t@Bbl4f<-!eqam0Xg0n4^xCRM5CEpP;YR?OD_9H{ z>P^}hFpGeR&z+K;fex@%ae4{TNa0XVF_0VRUme)XA+3ZpS6LR=;Qt4*`o}c>dnx{h z|8wi6R~hyZWTSjw=z<-reXj0_V;yk;qb9h%B5-y=P9WI-(un{0TL=m^i-sijP>c*mozWq-tGB)6xwZY zvYseK0NM)p9-r+!>_61#+Iuudv7SKwDH(O8U(U4uIjKOD&OCvL;lwbt@qakK|DqWF zdKptXsfkzodozZ?zglD{1df(XFT2;B1mF09kyeec#r(pdrkRT8HSj6i|n z^^MQJ2EXffVPW)E@Irn5ejnOP6!#BkX9+~OP3Qrfnozp~@qY#5u3Fk)X72e0oP|bC z$iI`*oqy+!FUtZ89^?uV``X=h%ENh40#u&|KSk*WrlBK{h8pjemL&mW;qD-*MyrkV8CL3xdQ(j@^lCN zOGC3v&`gTb0QC}$!X*z=O5OOsI1s1Ep8$*}Y?YDwZ@Ge?l}E0B>9#+T2|T=;pCj>~ zRBM@kskPAmQML9;&Bb!6@r#X1pxp=wl?WFtfmnmA18B{uIN&e8HG=f-^?q$gqDeQI z{}?Hd_$X3h;Saj7Flj)y%Dv*?zn&JsecZ^#s3oJEYAP; zkaYxUKd>jwQNk@3CtW)6j=`qn1wNzM_wYT>JrMHxGn}fLU2nK~0UG!(vHd1+a&O)z zTSol1Ow#|X?EbGwY7F?lCh6L){r_%~K9TCiauL4064-o7Mfh$uL*U{FvVgMyxNYwL z#|<$+YN6tP7~lUSyx3zq9fIvcClJ+sBK_L2+d(+H`Tbn0D9VuD(l(_ z+1k^mxWE=iBS-vPN8A#T2x;*?$d>Sbz_8C^5t23_EF{NyGy4DYPxWtHwIH~r#DKsL zQ~Ga!$;K4LqC%aB+XgsoGI5}QcSZmhDYkvQWrE+~y`+=-SO+Ytvc2JxXwOrueVk>2 z3YZ=c0P#F+Uj$Jm)tBbp?(5D z*fyaF9#oM{>Ci83JJeGkI*^u)um4#Oi#!uGl=3qq$XGK4I*P+&(? zRIZb#+y`S*7fFC5A3>f<;xZjS8~ti>3lhkO+rVct%$%$EoH=3@9rrTXuNB)11U}&p zELaN0PIz#K;l%feSo11Bt6*xWtt~OmC}@G~*{6 zx4yYgPeRobp2WiB`nu8ewf-EROAOto8DCVXhHj64oy87ivlm?Qj^0Q*?I&Ga`1wUT z;@)DhWY)rMtm>MHZ>DkeXNmze6J)ZmNV4KCEksw`#brc=6r*;}$ds7QmSb^v>X~Aj z(+`#~AxI#jm!55rqY2w-z@#}T6D~F&NS+i^Hg{#IM!q22Uq(_)(@RC^OVHzh3%%SU zpSly~*9$p4Ot15@E8wfNc@4~(^!)?CCQW?_VHau3SU0$>rSnpr|Hlxg5Y=x>&)!v?B z-|DA^)yD+@JWPiWP->P=YDNn@jhPJGBb=3wpeP}^hYBD*Urg*lEeM~6p3sP*a2~C` zV6LzLA&&`(Z+UtyfZ@~h0k`p5Ay^?J)ZW2gyx>W9KtIC=!X$op!f!EyQl5S3 zk$QED#7pW^y}yoC>jj-dE%bz-6$+$Y_7?qGW*u`4^|PA+8@*k5>8?*cF#E$lR1dH0 zu{&1W%>e4AAY}^*4+r``s*6dP45H*`n`@JAt<_P^siL90T=A?VMSN>NnAW=CWXLLx z2@BZYl3}_n7R>K4)znZCJ)rCV2Qi+!XR}jDkK>g zz<=QN(brSEgPWZ*z4TMTBrbc2c`$~|^#LNSBFn%^e6gdPT?TWu{7MF|c#U ztk)*J(7>9ox3|Ikx|)F*0w(an{L0QIDEv%&bhC?5UwirG5Bh8k9h^+X8vokn*ti&f z92PY8_41y-g{6!h(z3vFY2+F!|2e&GnymXUv>X4jc5qlL%bMjH0hCy~8owME9s(&i zx3H`gB^~G<664;H1fdzpEGofZDPPbc=bce&WgKSDy>WQMLODDQi_CLUkm;n7AM%t8 zP&W_y`%K=wxzihgri$}SdkJSEA=C32E&s9hhF3uQu5SbLXPXG!(Pjf8t?u&TM_;VL z*h4%)E%0kJJ%jAN-mPeU<5Aqh{q|nV-IVUXaQOvrSSUHYD;fXn;TC^T94qJK)eY$Q z0n0!H+{~$npaM$;fEGocV66Fp3BhXD5;9@7La+dj*emLJj^lME3$oR{ieCtWGYvsN zs7%f9jM_b4IHFzm@zI-N0$)+Ryill9U?^*={A^XFnBf-CzXAx8S4d%goe3WhPPl*C zc++_%XYY|dp&KT|S)W(x-TaN21eh>S_!pLl$jE0uPq})y*<+N%)>c}RtrG0ry_f^` zPrgVoMXUq*`=uLh+66q#oeZ=x$UR+22Qhhy%d|uMQEWTe4A28F3);x5YpIJqPI=^t zLIQEI+THHv&e8r^o*)=Gi6*cXlcdwcmQZ7#GafTVV8BjJfkVrAZ-wT$a6JRMUQW+A zwfFR7VPq6S7RSdmNz9*lKNgUWiPujJs`QeT)}T=>sZjr8N4=zE8}eqUs4Vq;t{x4% z$}i^}KOJ~d-|p`ngnqjvrGSIg8k{w(a zKM_r4Gs^-#VT7)kt_|jD$O#tm1kME%da<+p3dMIodd5n&!o8c2 zLvK!mCPFGoAr*c1HoAq6it>H4xlOeS&Zz9zV2H{#zZDvxT74k~^tLbwzyIDS&jRn* zUO7QreyU~Z6LNi_391(x2%IQqJ3@7;ePw( zCj(Y}M?e`z(a_5HXF-%+?X!50Em%08R5&CH^YdNV>LFenJ3`HL_|Pbt8w{E;qK7gv z8VsSS(z{q5T7$z7iV}vM9o-puoj0GaD4Ea&%-tHc4-#V0Ys&a6+Wf04{jKZlZ2%*_ zGE9Q?o<=KLNR0-gPck63*@ybpapRL#*y?P0q!Rftfi!z!|Bh} z18gNdK8<`m#e}4EqgH#!=h|GRZ z>o4VgwL#(;v3mJOsr!1_umLJs6X!b<6AR6Co&{tDtS*jIqFU2D+#WWtO0t2$HVj)W zZXN=NiEkL_gu^aBf*kmrs{o#)!kM3gus?bOPpRyD^H3fic*>66;I9MtAtQrp=4H3R zY}gSl(%9E4yF?N|6T6Wz7X%yz=UC)E(T=>%>+rZv=@<&R?lefj!u=PDjNov`nwU7sSEd~TsUkUL$?3mSpVT){zg9+o z0iEE+JR6KUm`>k~b6a6Mk?*@6^yJ3PPkPn!Bs$snfcAWcwjyU`P!!N#FO4ABUMCU_ zTsxY4F~%;`91Q3}FpzUR4IW+Md~@1=>+nf?;vH`uO7Ljgw`oLMswh!x*k0cCOZI{R zgsJxYXa!=um;KDgK}6Rk?KYMOYIVm!r_ZU_Za3tPF9eJ$+hBS+Ad*)zj>xQr{7F5D z8b4`3N}Z2wI_C{lhPA=ZZ{*1V9XY=C0BF{GVq;mra<%I%WKm#iDs zWUB{c{DgY~>&1}zTE6ChohXr^Uc$k;0t<0^T<-YZ1|Hm-IbyidRO_@|bmbLktnNb@xFo-MJ5cT1qkh#$YAC1P?GT#%sTzDW0G59e(b_$vcC2yN6rcbK*X%>p5glxjBLmy-|<6x&OQ0yaM2RGJ1xCWt~Ig=Z(EXN-kezeDm0Xn(y0ELA& zgn6=};Pu4|J^;bJhppI=8sMdjyCGXS9oP7IU6pqIY47zG86g%BZw$9`on?M~PN1(2 zUR3s1Ni%WUEY>#cexjrlG3Ov$;KlDc8$ec5Q}+$A z*I)4GA6vX-c-U{B&a3${IPR>pA-tU`lV=E@F3HA22G)k%Kz`uk zY1P_n(<(#VrUuekgFuV?mrMMb$dQdb`rHggXXZsQ`^9O|^(o_$O^;pqi&+lGk8j1l zV49mp9-Ic-B~yV^X+2qk?b{ z07gCGFy^$*T8?!JOHHWFke&Mtqu*pUbJr^kz%@4Z>~{!Uk`j0!Jpz=pKpo8n1~-cW z9HDaOjtb95h-b|`W#er4D;}L!X;FPrOnL~AtrZTg^7R{JL9hj(Og0GEH^lQ5nhH%E zNd|mqDtHjsNHSD<|6}tbF0(b*lK%1gzH#aos;V*cLFT4=zr1v#Gk9#c(P~e&LGh&iHq{#I8(NYC7SbKFNz85&X%-8qc(c&eW^hsWO=6eL8~gOE zz#_Rzh27C=+%Y6#LJWB-h8z7szzQQcZ=aCOSiVz>&X>=z$3Qiy6 zK5M$cXh8S*2Km9IS+R=t(pl1v&rN0@3>fO)@7#G(rs_A%wZ8l9aP0BXip)FVlEV5} zQqq$9i$Yicd~ZNrEa?y#u!!2Zn?5xp;q_&3?Sel+2O_?nrAn*RZW9zO8 zwWpfiBlBv31dDZ%dz;qpCYBd2qTz91!CX*|*28e2UGQVZLA?$jRpF8K`rN@;ZAD*0 zm~`08*QPRousE9h26X=?J&gzwT!3yqrN(uAa6Gq8{-pk~uS7oeuO8Qpw@u)3*S)(W z^@KrgiYr}gE?07)uHi&b;-=BGzv!SAcP2ZL4 z3W$$=;K4@^ac?D~zL>$dNP-%&;@(JUSJ`DdSeEB}9@o^Y*Jp9#*+G03uypP zNH+8c&#fzcsyCfddqXQ3fF8P!Sq_C4rHJO54rI&xQj&B3VpV@JEkf@8%o+ES^mHQq z;53hSx9N&0c%ulX!u?#(@^qQoqvK=>3)l98ZLV(p_s~xd+1XctEH}Y}+XO)*KLhzt ztItB|s#}{Uc4}nl1T4U(wfsd>N<>T!g8$j%*pxD}O+QK2t-Hh?q%LjoaKG3VWzfCF|u3J2m4KVup7 zzuVAUE7(9!bjXvE8_ldQId#|e%BF#2QR$WB36zxe_9U|G-IgX_7&#L}2`t2Xtrxw0 zxREl*Z8B`vmracQj|1%;MSDEq*0=}Eazq9LC%T+h--@h3bj>7ycgHJ7*Eg~`7IV_m zd>sBb$@M@cqmjzJ{}W|CDzCx;tg{O-hlh(_qwEH@!;;vicWaoJGp+P|Gp-*=CO8uZ zAp*6^vC0=A0?6oLwt;UChRo$cXYxiWCHX`Al=G=nZX7oOwQ*VB3Ts08`=#o4VXYwy zvQ{-%K7vXrpZH?iT`6;ANyJt`RfSioGO>O$uI9dmZ3MAmqoYz3HDG@4Y+zA{)3P{5^xBt)?si2+ z2=vVx=4K{Z3I${%unzjV^CP=X6pHV2r8Q7n(KXB5G5Myfn-u#a*Ny>r;-rBAQ^s|I z;t3%RQ_d6~FE>RL)xbJxFh|UYH6UO2(SmJ1P)k8;?dGvw-1h$9GKqA}k1EG0Q z%~(Mf&NwaN$G^vQJ$*8d?ej)xvyUKmgPGv`_t1VwU35YM*#Pg#e(Q}h!#g1(ZbxfR z)pU` zv3a8lp44CRB=TvqTOWxnjVjnZ?Yc?ZjbEa~^&s%tB2sqW2^0=tYR(5RbrBChKaK z?!#Q;o&Bvb1KRrK^{xlW#4o-G1MRJn(bc9}?RDiFy=9DN)RyWDLtBBNJ)V2Sx`^d6p60wvXEr;BtPj36WN9@ zExiSbO8z94^%oD%O zOp#VaSkJa&It}S@rWjD#ljLyoVkef)?X}H*EazY_v4NrJvgdybe(fVrGV|r z=P%E1S(WHg+2^ZejGjVkp{75&HyE9qsJoN-P$4<<9XQ8R(y|QpnPYaywToZD+>z7^ zKg@ zwkcjW8m)*_pfbjN;Bh=RCwtEMwnt!4Ib~mZ0E1+zD_WRSx9-jheuQGMEW?AvP8LHc z@ZEAa>FgJ*VeZO<&b zsx;(sUAteCvS=J$nspbo7S+MJ%xE;W1G;zXV1#C^YesZ0mRm{+Km(BAxI2d7q7pM# zkn^ASrdJ4nBJ=Z04$|m=6P}vmcF(83$wuGCoeX>3+8QgbvtMG$Ai`VdbEi>BF>UB{u8hUY-C zTw71HhZ?FxZ0iM!qDRL(vjdS!;Gv_3W1S@J_sm{dF|~`=pBW7j2h=QJ8(iE1hZV8V zc0|5wo(3iP1%p>VLK%#s&3!E8ym-Ry4s^FA=kpmo+fMCCKFVR0Ar|z{aQX(+*h`az zE4AbdT%IGNO%FYlXSVogTRoB<1;)ftMRpd=lW!dodliubkmL3uGDxO{`dIz+dcQs0 zzDq<`z?&b&d`L;x{wn{~;fTxg)l3Ns4nUaZu(+$vF4U_D0n5K^P@>{@-WBW()e z>WjPx{))ntqo;kq0);I~PBB8`}yUHSF(x$9A1JI?rEP z7w&ZQSGuj_V$ve0EWDDx>~~;{j2V0E?)x@@Cqsv|vEQ+?`-`9eB~m*_T%C7dCSPA@ z4d*m!&N$iL|Il{54JCAiIWKCn8h$FYq*XMquZLgK7i(2}nL#>}aZtsN$Dd!CZJwO3 zeE0Tme$7`d=BC?eI?*1e zNvu2NUs5HvAIDN#6LXRHvG0|H9f! z8O>6ouQVf3Pq(>8{LLVArl)hm++>RiI;(Y8)ABywv)nu|d#u`a9Wmo7EJOs{x`6t7 zvtzSR6v~FI^@48Q9?53dqkF2Up5Na>x0w+RdzyT7{P<+#Tod836$Miq z;wEd|7#z{b+|sl%hQHEKRbaK>k>IaM?{F6K3-}?wA>4WFDpRrnPyNmzQ=PrJcz9&2 z#X9{!NJD&M^wWnW{+|Ksy~8lY@Or5T7gLXw9?W}_%=4)Tx6ogvZ}Q{2kd;dl=e&~} z8MWet$5Kp`Coa}A$;Y(1;pm!gB9s=}LDCnKj-(zK&7zonlZ`kw7`d6q7U~1=i`New zT=~N`L=OpDq???6wH0c$C*4*$eD(YPA?nS;p={u<@k*salw`|N!c+DbvQAQjn4&CU zgzQ^(W6aDWvhVvYd)apx%OoQEK4VvQ#u$tl>x|$1e1G5fegAb`u4}HD`~EEFoXq@6FAzfncIOz(*nJmp7Z}^5G9{;jrIl0f> zxk;^DC{DfXEt|51c)WeB<(?P|&&PofzcPf3c+;w)lnccz@xUA-*pXJKm-TW_Hk$Uk#F~&L!nur_L>^MDuNI zoB^)rUK3T;)Xh?i5;>g9m(X67uAIZjtrFhgGUu?W^DDu} z22%X#lT!Z1LX~ZXY9R5&bBt9m_b}T1kV9y;j6vzgz4nqbJ0nTyqpPYnm_|OqugBU-Bq|!J5*4?fmf^gr%gq z2}6#_1RWd&GcCH;xm~RP{Ie2+JgPH4df8v(bsif)f?X|>Xv(pzPkeXHT0d^p`(X>R zS*Wh2EdHAGs4}BfhZq>F9OXUOv*=3<_bP5b$9RXh_0%uHxz30fo$>E(`jvHS z!aBK{5-+bG7~EZh1noXF-&w~IMSBN&a)GGQ&Nr^tIKST-;vg!#%2S@fHeM$r!;iRf z&JL>Evd?5th_b?bKK)F)#j2yfAI*W8+a-I%$2+Wge%W<$Draq+A8U?0&Gx@?|9|?) zhCgw$XJ>^Sr%_~n-c^-fzKa_Wa;rfF_Xl|iy5UjCmZq+N_H&Hee;SI3VO-`dCT3!bUuVs#fFxi;$J_LG5)bq3(vJwS-!?x`f?ji>@q3lz+ zB<(ULOF8NZ16!-_twOopy#DLrL(k(TCQnMH^B$I%Rwq4b0ROuO;nEpsoEcEmSEgE` zG|BykVf%X``uL6QFrSPj4;y`NgXGt{|7ZPK1YH1mUkrHmxAqmI1wp;4?cB`{Bl)LN z(L9PSpDOR&p$@qCqnFpLJqccNzH@4_7Z^F+MY4toMkQZM@<@Ztso2VeKS@JjeBzi> zOZ56DQe@sKEW&M3MNd6%4b@oZ@UwMEja-5n3XE79tU)gfz>;yR5pPrA+u>X$E21D3a6burtzpU<+CLV zUM8Eew}|7HrL7D^TNmseKMHjo;-(hA%(gQi{oloL7pVt~b(Ijr)GW(M$D-i=dR4a_ z*`YDu^nX_4O7yIg7MtsR66`c zPK0Q_zhgjWMAcE--Frui!E3WxJI+G0J!WL3qHF|XCdSI3rMYt*WwdS2)P9Lq7A$z> zcZ+7l2NiDnm`J4MMXzZTU;Y=d(@Uw@t%K2RulNKfnJisf3F5~N?Klu*6w+q3k9%vj9whJ4+{D>951ynu&bnc1Qh ztpL&yV`cV1Oc@xz9u&ZtQ@vg(W~i)uKLajkP`3joFT|WMPug-b&B8PO-ZdEQT*Cj0 z2#;DQkY(?8?ZWgF8_o$;iww~W;~(oNb%q;0F0b6J6|BHGav=Pk!_saq+Hv$=z-$F! zqNDDt9A>cZja&VBt#L<}-Q4}WGiHvG*dvh`9^Sx}Gr>bze*B-|4SuTRtv33o3m-L9 z23&d#Cl8$M3Ok|lg}Wm2Q8YIT2OEmpJ%(ZpKEy>rmGP1QtbX4OykU;cQ7a0ckqQ>wj)oap7!+h<&Wv&dEchZ)Vt^a_u}$|v5|>2y|{7{ zxiQF=@^+f7P-nliX$d7wLxVod_u)ZY6C>_OF!^%zCXZ~sGp}`B56NL-ASP+-K%_Kk zL1Q)(KPE}}Y@D?J((=xzu|G2yn(pf z=OX>kLY%rhHe@%>YNfN)xkE|Kk1iNAQ+OSs>|JaiX2LaF2jr;Mt)CF>7CYJdTPUn} z<#N)|&;#hFu5eY0sQb61s@)v?yh3wHoX@cX~QHVi>(etHjWHR|`id0Pf`H(yQ{P;BZ zSeyp0I(%!pB%uI99XzpVaAgmOy{qX>V|z`5uW`KUx!} zr*?XLKP@o|&0W?M&iY|G+q*6eQ#s0N(meKkhgu?C3vW!P|M7Yp4tT?MZL_UQ#FKk2wKh5% zWwnFDIreOo+M?#G^Za+SJ92XN^U}nHN3aw5RgwZ>_lO3>Dg)*U>_UeVI?XK7s!{Z~ zFSJo*u8_fmXkioCuK5q%I(HtMf}DxZ39MKphtv7;ii(OBxI=*%jR=xiqcYoF28Uc! ze0JNKrd)<^uj}T8zq;P{SqUX(Ui}HYv@I)(bvLNGE{*7VDZmx3TIMxqUh5~O13O|9 z>lKOopj+n`iU`3)@$yg}gKa^!FDt%?XDPzo@9`GTkndJ%Jv&m%`<)RjXum!F&6Q&a zNlO=37y%xX-+qJ}r~WI8TCix-7GEhH|Lx~~>jAl*JMHwGDIu0D+!eI%BwfYPIE_21 zii8@0EYGKMwdo7U=1|&^PwS1dg9WespAq_zq>o*hru^J*{m2Sgy2|7ayov*n_5T8{ zvfPe!IS7jN58JI5U1hI(&Y_l#-yc?m^xpxh0myprbR;+78Aqi754_#be9RxpUBgSm z`rO<=1lDO}TLV?!NZ|_!*-|v0j?*rIQ9tmAnY6{@OlOHnbp+O&jAx5aej=Ztk(LtO zyfNzZqctB8>owZq1r&nMm!IlUFWN3P!ALgw5Oe=s>ylUrHFe*|yH4B{P+x$;*{eme z*LFcjiWkg>4&wjzibxdNw6r}4iCo%h$qe#X5V}y^B3_88O^W&O%D^{U$ zbGoYFc`Y*ETBFyvq&ui~mAi+_tYDViX?v97$iW0Y&Y;hS4ds%W2R zFgb}HzmoM&B8C-tY0_py{MrEEt|F7F>Qsl1SdBWoe^dGGidyf{W?$t-!jO`eu$Xza zfJF2Dy{OScTVcQQtasftZQAawI7~W^wrB3ApXy4elrgjyJ_?Va9h=lD$;(sg^uI00 z+j0)E?1|ejRaX1&k7z)Nu}b1K&wgfQkoAO@?@h};xIJ`ry6s6W>mK`tf>Z^As~L~4 zpaj4NafcShuCkv88SUA#h)ASFem)QW#A=P6<8N@pYo)tKu6^pT>o97F#SPIu2N zUYi6~n2k>{>D(KDk=<)-%tFQISz5SzW574_DuEYQtOFL%ub`%;TFScMgkbP7KTjtE zqV0OA;j7o^%V$I(((^nJN>GAjPI+GilhYHQ65Dv4_XC%_QDuELiuNnTb1my2p2PfD z;n@)kx%k*&tSGATLk{NX(bw24jDQBC95`+T|C6)qLtg|+IOau^2oa6pf>UQ7G0x5L*%JLY)b^YX}kI@z?5Rvy;;nq#p5 zxGb(Wg|$V+6IME zsD2cxyth-akNs})^}`_32SCLwR+5+QoD>}3)L5!rr?5OUIpH7!cZ(54AMT6RB;URC zztFRT=ScB5sh_l7D{(t@1&a$49UZV-Vo>()Nh;E`%i<0X5C5hyG%w?@EQ!22+>;)) zPg>Dy^PN?WXG$Lb-YH;Zw#+AM^BsNmq%y=;vteF;HD4c|sp>N;n~L@U&l|q(>I-p$ z#1!L-+BT%AT*&hXT}&+-Udmj90$r3Qix59=o@k z^eBJxAwqd`auT(Ylat1JiuYwU;rE9$d}`&gGc%n8w(&#h^^QhszQaZ=q$J(}*WG?w z03Ge#)#j7W+?juP&=k$|l?uyEg3G({nN#;MCN7q0k=TY6^ zNtmkhhx{HL#&rsM_Z9N61A}6X3vP1oKEI!?)JvtRXU%hKs~3&$@Pano#-!8cJGjD; zXRCD^GL&z&{+q`&FXUt8Kt{?w?(tQ|ZE{NVcW#wp~$Dk=|u1Mnl?K zTJ{%R`L9?nWs}jb2A9%2E=+)dUbv7+xmIr{jz8CFRh&4u4d-noQA})D^QA%wr~$U| zN>@J2aW@7}1IbFSuB>n!kZf7F2S|IS$Onl7+|W-v5hznjC21`cVW8m&n^Ltas(D-e zC||f?9p&Jr6x$=f;4pG^kUHG51w&TMCgn&3^4x1K&RmsEjcTZ9-)0w8e!V=m$%9Qf zz+#mtSJ!tZ*stSl#H5ho=Cu11iG{Z_+=vKi_9fy|mOmEUq8p zSMCK(s{d<;b*<~SlkY3(zGy;V5^5;W8xM1P( zciH0xHLLw~hbfn?WyNBitN=p1bAWaWSWmOs3%GRe-C9xu2bU!;OuX0hPQN%r5G|1a z+7$6##a8rlfvyv~`u@NV#5G-jv7=LHTZW48zPFZ31m}zPc>E08em>K|3{K?r|HE7P8P?tb4aTr1 z+g$po6xR9O{Q^92#;VaQa9%T#QM_$g3t+lay4iMlr5smf^fxO0K(6PsOE`veN+?fP zH~vTw0^2s2ME=U=l2duO^j2(!OTDD7H)4d{JoAsC1(0Jsh&mzopqFb(31AI2$b1`4Awlnyz_$Kw0#=bN4C1wCTf2(}hX<7y!?9=r0-TBe z6<@i=iD+$6&UD|?^qeUVl*zbpO}KZ~WsUs5@ZZ%i5{?T2` z8tQ~Qq9}E2>!IWCL8*7kX2&!kb#2r9P(Jl&4jZAcJF~s&)p5QAAnB|mWYF&R=F&6h zX_osYC-3~npTL(QVV7X;JJ@Fg?W)R>E>pi2xA|f;@`cy4&p&#_YMQiee<6`bGE=tS zH!x@G{qyrf7b#Ij?$v)Bl5E93{$_Di%QCq<_pG?K8~-XAaH&x^`>Boav$_^y{@Dh(sE!gKe zU0ApYnRXLL)^s{w2AuUYq6U zc!P>eQr%ifF@MQ!_A$FPU1h08YBLY*#z#mc!|3{AAW?`?-cE`CMXj|9wwVQJyvjXY1GcU+bzb*OBOS)u5SdWz;nEn!`TwhzL zwvF6}Q3zgLQ-H*03KvebUu}*jdlXYlkpGXJR|)Bda11L zH+nK};nyxoeDOd16Lk6Jm33*fvh6rLsi~V}W{)`ZDp zbCxWM@jkJ}Od3S^AB%?)V#O%mwd0bv7><>QxJelb%ZEAY2}RLked_8ECqqWf45(>^vet0DeB6%BMV%urCuoArQl~WnC@NM*dK#PwLS>jZFkgH26s?LH2C1Y zg&+os+fB_hKyI_S`#@Go-Iq|0gS9*GT35A?hdDjNHoEdLt9_et#b|W!kz~J8UKhq9 zwN_LZuJR8q8*Jm1I@P~`*H3S12TlfL8N7Dhg(Obp#!c02){Do?aXMe%K9|OR_&DxS z=XdP83XYyt&$h@m@y!>?@Mb0W@pl;;@N+8`$MVFTT_;MiSWdIcn2ln(3PKj8XDv41 z8^JV?j+Pbvt^Ex$DX{UW@-|oY`_FQNA6#nuKGeYnV&O)7q2&j+T<_(ejsy{pi2@E& zd4gmjDyh5gkIzDeqwlp^D5;1;W&)ePTb0s7FpfGv4y}k zb)DWLgGrH4bt!h!kI_dr-$BIzcDvYSH=(%2X*s27Yr+t^@5_15qhmP*n)^Hg`6Y0& zboKLE4*sw4>m9|DEmDip@;mnBe(v2cj$q`^gZUb35sYt-6xhVdY`<FWPK6S^$ zde3bb^^^+}-aQSZ##OT)NJL#4lAdG;9^}@`mQdp_Ue(4ge#M%3p5INZUvDNpkW-97 zo>6@|A|_f7W;MHcJrZq}D44;VS|6ASn}iVXaVw;RLj$&~Q_PnpXPxF_Aum;CI{j_x z@wk_FcTCETUY6VouBPYn{9-Oq5ik6vvU>OZ6%q182QKx>MYgp6pGY2k&@ZZ*Iky zbT%Id=OA6`%30xKZLv|ef*|C>h4QT?+}@Gc5#vWp9i*|IdigWh)CKhB7jr~HH7UySHyPmPDP1ZBbJ3$fY& z5OX+DAsoSrV1WN%Gc#Jfa#S$+d@IXjRhpCgk$R@e%fYGNnF1^(FpVWY{ivCrMy?M> zXG}Vt?Xon}bAMAl=}%alJoTj&%j{>Dl>Lwt7%0AdXF-BSZ{20Wg$#ls~`8Y0`4hdO%y28pwW(X3}P&+fQiL4_Z` zJ}CaWV!Xd$+coAi@B8+(v8IM3s;~BoAC`@-T^~ZQ_Dz}h7`lELnJpbt)GWda5sicatL{2JhiEln~o|qXS;}G!u^zt*#LY1MxulLXP8g|N6t${+0*Hbok&R$O!1o?9$!kf-yuF>d zu8oSg$jbb0UMO&O)|_d7L|MQuRwv6fwX(?p0+C1H7RqMCYHf*WEs}bCh;3RH8(oC&BN^RMSCz4A_F1hT=Q`AX!$tYi*QAY4(F5l(&FW z&wX|Ej*zCc3r9L_MUu$Vi^pM@JOJ^{THjJ>)s!9R@o1a4`lQavV0#50%VfDHSTAm- zkkO-83>f=kBNY*g9xaW=$7Z$@tl5+TYsX51>`Fq@+%4bmex`0N177DfNnLe z|81)u`AhyG!z-+stTAf$mmxOisY1;1lq^^4tA`V1#?tfahw(j0d#4K#a$|V6DQotT zU+id{4Ov?zZhiQjO`vf!9hI)L2F0DxVzUK=dClM&5J+Qe2^FcX=faY?14ZXKSypqVPY2Xi+RbYB_>>TZY-YkE8Xx`erW|d+Mv=n9~{RKtLu+#wepq1T)gwKW55QT9U3yJ zd@wyGxoTQ#Q%-XVra%;8?trlVFu}BIz8VeW+T_KaH!4#O${&QMwv1ifp(z$KfQ%l} z5gb!xllxlWko8YXY)J_9b6dm0CR;yx=i7!~D{wZ49F#{)*3KlYTDJ~v;Vu5LZf(=a zAZ$#em`jqvRi_2FuI8ePx=wX;uiL0abq9l;Xx^6&Mzkn;c_DM9?I0@s=%{sHYm2~U z@$d4bA%~zAw^4`fp}}jIf0O_*5U{28e+T7cn)EdmA89jp^8>_&?0&CyLVsT2ka*kS zHB#*lKNJd4w1GS+lSVCOF=5EDDsVEWF|Njx?_t}!V9@x9392^xMEl^?_5eeoAvxrS zTbB6+&d%dz%>3O~$Nh5VdE&n$;GCVC^|~Q|GL>!uvQ7Me7Im6Z?rhuJ#3NU5xQ=hW z_mmG#BN!_=D91hGdjU9ewU`(V9fq_R!iENq+ha@_kHe`rDyY`gBF4#pw&MkX?S7>G z$WW4f_B%KPc-17MjF-nc5GPIMLJS8e8KM~pN>fo4xLoRJaAw7^pPq)-T@5!3MMFw3 zXEY_h^*9TP=!X<-DKv-sTC0r;0gS(pbAMBK0%V4)75}Yi8vVb@8t>;$D1?~bG)enN z;*sri>sEN2^BIkf=_T8lNM*}-2ma){ZT8B*1B>5Awm<9bvJR~RRH=2F@92pgx{F}(btCMD?WnAC3`cG6 z1+(u^D`xrc5!J7}hXd#*a~~}Qw6t$h2l^Wl>_(qA`lo{}h<&Uv{=B{bk@+HW+EZ!9 z#rfF^?ephL`?D8iE&V;v-KGyX@3(3ube)cFv5{0BNAqi(JNM+A<}=kdixx}!P((kz z-oI4t+>s7+b!G;)wk2(U4i3gzU3R_C@t@ReVEFja+m_-i(Jr&wt%Lu+5Ryg3O>1H2 zmzchJPM0}@FP6^yC**2w^84Y*w?5#r17!Y5Lf&0D~R3Fi9@4D_GiYe(^`me zx~3v+=uyk$4S@Y4eV_L;8Y9a&a_FMv)-GaG4plQ;1&7-G#`XT;we0woknF@nz`?mY zwXzCaXwiQ*tj^}Ph?ccOr`Au(MtIMML}w@4Zco=KF=KDHmGP>)XQgzU+?2L=w%<`-fKBM*Ecj>f99Jgu+g}1 z4Bnil`jUL`q#Eo{uVMUd>Jpm!2sGD{3MoTp?zFxY63me>8$3ScDWQ4&3CP;WsmCi} zSqe@8Rh^u+ef` z3jy=iY=rUzXij-fS-Eyh@oaiQ*UM)G1vI(0<)Jh1)-7#S>&&ib{e@S=RF4|abe_-n)kd_v+>Fe#Am6Pgo9&r~QCw_L zeyrom@v*;BZXrC4K0Hf0+2-BPVmg>u=ghhxXW~Pua|y!>-(M=YxLl~lnahp1Nk%Z6 zp6ZWiXX{trQ1Xqk2Zra#pA+|3M7Fo&DnRN9lsI6BLG7G%eGf6EafZ8YV9PVu)E8V?d>&v{+7Cx&5~i5b==ahEYA@TY-uU&A z%7a?`hQ0gbsEYzVn}Y1-2W~+`JAWA>BSywjT+zIVSy;iJAd&1`JZ_wcVG6pH;QHt&xmRuY{^V7vym3o<6G+10Zx;IIs4R zJeaf2wSQ&*)I*{sp|nTVJI%ABASND6QfJSNSsV<@IDL#d**AB{v9kM)xPDz&J;uCk z!&~-1ChIkQl^APgTj-g8rCgddXKSB%3GVb+LWs*Pdc6b#Fqg^u$}7!r>e5F_LA4>I1Uf79%XO*bV!!nR7^ls^>+oL{315;CrFjHynNe@r4}31F zq~kt%q>s;QpsU`T;{;`@0_`}MJ{!a1mc@MMOnp*L>C=RN`w>0DgV#OP7g=V+kZx8p zeag|aVoO~7wFT4--orxfSN08j#lc3Zfistu?p`MT=&)nX+bt<7n!eZ3LM!Te2saVW zAxe8w=SDB|O+$rIrq;z^r&>XoEh0*@lw22sbhGwbDh~g&g4rzwVEn63TAk#o$cAlS;*u5;d-0Gq zIj?8m5no!D)-;QVL3pkG9Ps7`k5}Fw7SM{A(DCL~^xk^W z!YOKq4Y#>T-OqpUUMOIW zY9V0p%4KhkpWgevzlB)(SeJ!!e8+k7`;}5hHlIFfiL->(mZyYt)cmLt|6ti=EA9s^ zlKBj#?t2u;70w~}6i&*<2t;DriAt8+sywAxsj5reEF=G$vEtya-oo*bZW#l;&s)0W~2Go1R&e&)14ftO@nUTuijsLjyl_deXP9XsLk|7Wm`G6>wNr9ji$ zT7NhP>fMuM)~*&x?1)j)%@k8)L`<;1XrdiVdZ4-k7&SU!es05=S58?WoqD|7bgz8d;y-)CriZ;zZTxRY{M z>24S}_ZTsw9>6CTV6?ktYnvhAVUz{kZNAdE=W%PPyKm(uD(%qBch{&eS5!YXiB#sO zKU&#qBj-Wt*XEZ^X>nI@OS`}CV-}hO`hiaCb>Z0>f+9Nu56OGQ&1a{R-8V%g8XmB4 z-@j3*QV_C+OC$B9CrdV15wX%!ej&9UkrGR z@xKxYGnZK)t#1irc0GQhU#XRumqb8e?~bg49MJm73tjo zLQB@gi~*(kmnPqUo8+cS2%n0@+#OG$`v}de@0U@nnnXTR71FsjsEvn;@P+C{;X(2* z)1xKFet@99-^;Ibra{|lV4vz<@-u%i8C zOoGmysrs#!no|+8)>YeH3miY7vi+5^|pc533$MZIv0@bbY?yLL@WFNTvWYNUcP zi+cY3E|#c0pKWkKJdwYbZ+UNZo+t5WdAygNKB({(*j75jcmh@5j;D9be zx|-V{2`i}iQo|vUO?Jj-+*mfKPNeGyy%4J2mEwP`^YREw-_HtxE{WnL3M7iYn~+ny)R8+1323~|SKk+9FO%%#G% z$GstrM$+gM`LokFiu$FKzdCh&iMIpa70Oq<>Z&%nAvNMZCXI2b?wYjM5^;Fru{3V7 zam+n7J(b_ZYWRIAS~)%Xjnc~uY*NMRnw^cNHS@0(ziFcrb3_3KG|+}OoxbsuB4U+lA#dY#gc~m-c?QX8Dga>H=H>S>A z=Bh|+K5LJrgIlbZHvJgbh<=PWg{ZPBj&ShBYk8>5pSsGRdfffAscV+#>R09QaEp`g zl`WvUV4c89&K$q7Tg^J@A=d6}p{+KvM&0D`;+V%KW^dQTF7k#vacVevWK}RGf;_mn zYSri@bVY(a=`8&t;Vp#?;i8n6-KZls7v%Da>*iFdM~ftP-fL3HnjmiNzuLzyi`xsn zI&`5{lPwhVnS<<3{362<#RIRV+OO|O^EBR%Y~8vytvXlM-bMV8)wa6o{YUI~%fp$} z9DM3m(rMFs%fO^1wVTJ>>?6#Lbj*|HbE}|nb<063$S4S>TeUC{;&h?fZ8t`U5cQCy zH0F|Na=V&taSWI?E}FasuW&ZFhsEiyaL@D~puYcM_2MBuJ3c_QjbvG(bpL1k=D$#l z#4G-)TlkT5v`8XtDR?ypeB#lWcOxQBMry44%09YPDVXJ!>5C~2iy2znR+21(kKc+k$3+7 z_qgFlEzILehy|-Kb5^+2iR;Z{<&?Z}ywr6l5Vj=8Em7R4fFWl*woe}H{$N6yAgxjE ze25bNhmdec?W*{`Va0+p-Ai)LQ9E665DpmZNA92B{8~q@b(ce_P&sN`PMojA!hXGclkUlv z1~P$aN}$B8JZ7o))>Cicl-e)f|EYF|D)F@D$_2n0D)>o$u#w=+a-jo<;B(xMFFMfk z7-(h*>0IqB@lL=pWn6catMi?+-9PFWWfA#Q`A}7n3e9p@A25Gd<}G@mDj`v5>H%N% z4?Tv_Il>kw*w$i_v-#8ovHCdL4_T-#v2j@Lt_7|Z2;I)U6)@Rt19oy;-D3@HwcPqe zd3$N^QtLoy1A&IZ7$Dc4c4U%DpUED%r z3vyGx{Xh_W{@}&J(@9y6vzfDdB-Tf5w<4aD@#$qG*ME5+SGP-gs>vS81A3@m8fzGE z^xg#q7Bv}{rA@X6-O7{d`YzW25e*AOQ z)ALN=zI@+m>9)-`5e;ICM-jf${2)_@pOP78JKAJzoo-YLB!B>Y1g=cvoIyQjsn+v;<@ zV_cTTj}7_{>b>?}Ea1kCe^yZF$v^(0WgSwI9zXUsy-@A{YWuF2VsNAzhDOOKys9-6 z_+XsHK&=d^NZ**IqE%|8rWTkTv?O2l#3}LUkuZmsH2C3vg?4nU5JROY&UrN++!a`L zUVdLJ-_1~amkU%G9p1kyc$I3#9?FzAF~NLugw~7v)cMvfz=$_+!57Hlx%ah#;K!sy zf2DW9o49>TRKa!p`Tn%yh791{@r0}5`dZcQ%sJ!^jZ>C+xeG#{W_XE{IUL9M6 zoQ>(dsM>TWoqo!>IEWxkI|`3j#enbTST-gMLu+>@Ro7$B8NNJW$`n=xuikywBi)$L zH-7iBtPMQMk$S&>x_*keb)9Y~ppw_8=~m#Wi#aOac`nt``TI}iCf3;Pq^g-b< z8HE{b*Rl^y=@DY#25PJBSijqX2x`>_p(K|0q)fN=F&KKGdaH=<3VJ13aeX^&!;7=^ zv?+4H?I7Zd)y))%XxN2r)``xMv%CscflRI?n5yvdvBIewc=pR(*@u@zc0blk8MOMR z@1K+9deOgV; z!LM_o?yTs9KPf{h^g;M3quT0YK*a6)zw5#*%@YPD!{3TC%PvB(jXPfFTw79s&siq% zs;Q~bek|B#2swQFW9k`2(R1}TiICcD4i|K;B^yxSsq{_R>2eIlbjdlp&44NM6Va5F zo~GK<_+3wrnSJXa{He2ZINe#IgIev0i`HRI6;f)7Hoanns7@rDL}HvZxP) z>N5G{j;g;OPfgDjtPNUjqE?cHM-O2U_OqY_QPk&CDdrn5-x@}QjIby^{P&3{1L8W5 z`$O{fTpzZG@V3kJlBHKvsgO>b41`Z5==!G5zb(_U43HM`s)>x-C{y+YkK>E5`4-!a zJ(2FIzelQIHhiOTIsNG8rfSm*Q(cZ*E9X*~IBM5USVG=GqLW4h=zk|t<|qUr=xTBaxkRVO?dDo_%= z0-yep@U2^RzGdLlXI}qlxtz&`+B+`859rGNoKgu#M5Cd-0YA5+2SVl5{o2_e%g5zS ztLf31wB6!Sm01b>6G;X=raK=m5Ne0!-@IKx#RdpO-eYkTmsNO_yt0yHXrBL?-PH2r z+E0OBJpz{t#WGdRq)RiNh*~LI_gjH+gAv+nIp;d0^7h3!G~BzE)Hvu!k-uXUz-G{Co8b6nE>G6*?*4fC6PG8X=RE{~8u6C>Ro{8qP$l>(<*<*s%qe{yH>VwCc@?kl+>8u&#;BT~?YtAlI3AbRQKlV@ zpG(5P$}NdfUwOPR?OP#0ktq`TT|1}(DW_|}XDCxyf>nkFyN|)P&x$X#aeBCoA3b$j z^{Vb(Q+nw*T%;rdmYz`1i(VPWr$tj~ATo)e%{PsV5xBM^_@%8jege z@ev9-5xvE-pP!UzE`7SzIq%;`kg-}tczcZXxuO^kzr_4+1+)e6k%InZF`8doM_G&p zngbqsk8b=MY*Zhcdw-ArF7HkHkv}P;EE!W(dq;xhNA+6v$nswK2WHZu8eQqOA!o{k z13lj!$gUMUy7$lXfXO)9m{-%r2k!6N8NxBAdkhFtj;nnDx@K)yjk!U0xmH*%z|eyZ zUyp+t;;NxGSC#i{=DiZSXWmIajy^A(vpPKPH^!uIm!Kp2p?KJ4e_d*SRHX9O%kS}( zoh?4A`@=`e=UQNALmkn*R~#KAt(XDu3Nu==YW+zv6$eFJ`u~V}%djZBFJ4#?1f)>` zX+%H-M5J>-1eBDL1_8-o7`lg%25BkjlI|Lsp}V^gknZl8_vZPZbKXy0mmg5}z4zK{ z|7x9WmQ`syi^MbQSWt@!1>vp)_)y zZXypA0aHM#^|3f$mNn`;B-+n)dUcu%u`MranfAD>1w$coqh!9%y{7$Oz3hgXWS&K6 z!+6XJ2w8c|!Cp8lUf=bMfQeWb_;XKTPKklGAFI9w%ZPu*g;DM>uR%RNQ=mZ;1$afr zT8G3k2dbxA$wJ=vinE)2zu9vid&l%=t_rLn({$;FWz1Q>E9-{xi+ev0+~V7-0Xkam zU#iiaJ{_ccGq5Zf5L()J#nC;JQW=mIoU;BwMa@gIu|b;;L$ue0HKpxsn4N(BUy+R@ zZVPTlh4*wH-w#L<4P^9aRW>9VdN+{^i>}%w@tKQ%g_OR;xjDbRz<8d#ivNw1 zq^AB~;nfg^uyfmf+ed*V4di2uH$t+*w%!9ey%#2Z1zW-$X!hG(yq&0d+Gb8=Orvnn zK84WoWhT@0mvj3e}mT+WsYQA_Uwq7u^0NbbBbNhb4bicQU3Z z?4vmrL6Iz7bA(<|+C@UkxfWa`uhW7AoBXm$sG(9oG%K~Bx05oZ?|^OS0_f_Lo6_@I zD+RlIT1m}#EXmg*(8NoVYatq;F>-2Y>G>RFwUb~hmB|i{c>Xj%oE`iX_R%6>qAP5+ zZk&PTa{z=^TRSGpl|pP+#UOR`Q0QB8o>qx;%FQ&pPPhJ4QdUkn2%97@)VPV-lp92- zuh>fwiypmcY$s{wBa-r4AYS8XUFZGOzZ*4IF zrYZ*Pkfw-jW!Yhnk5Q)UqPluoRIONDb#l5t5!D$|8}Vkm#fpzTx9UO+#sMjx4CqXy zrwWzmy3IAtSS(qnoR;;hz{L$l>h9fla+!42Q7rn#OPD5>-A-*?O7wh+sF6R7%2-Bc zhtEWY{$5Fo$^XAPwY%~xFw_UWJ)>%&-1tV6<>n|S zb=+&2Wb#_5gw=rS+-TNmj`kO<4%{i~uv6qbePCOd>Z@1Jcat6`UMQ&Hn0PRYW#KXG z2S1|{IW9vDecrBY?hjt7?;-!Wl%M|wkY#)77(RiUQPwSaVviheTOO^KwjBV)q8j1sfcHtcL+20qJcq9`RMzI9ZLYa8uqej)%NxViT`!j!|= z-lQgc2e$sL8_jojzOMC}8>wNg&&Tm4uHNkjW?fu^I>B)aYc**vo*%=_`7K9~WlQ3c zg>*6Ai7Xhcx268m9}?qXs4tX8V2jZ%b)KtCgdo#UYiXV7G-{~iBNU~pczwYSXVpkB z(&%NSDQR}B;?-9agbj5P+x;`7h^QgUy{2>h4I5zC@bU{<7ZVH5Uf^12^7KXSutYRc zOj=YgeB3FXOK#Nhe^Iz*shXy+BYX9IhW&LL26tWU%CVgJ&7PZ1ssK^kYs1+zQ$LE^mC|Fqn`s@QNflfQA9VX6 zCRNnzMD|8T%S>msn);=Epk+FiwzDo;pZx3Ixp%- z{IcZ3?Kj?`BxYqV(ejMq{TMzsUm+6ZW+`FuQ!>JrH*K+y53}iVMNW7-_^n9l^=@6o z**^0}O6Gx5JSi%xQZWKCM|wye&l@g+;m|p02|h1P*zMmM-=@u@?Vb!UuEa@OWsL)pWSilHZ4X6bAf&0ni&@N3kG}0G`EuLOOkk%psGkKUVk>@2L$o_ zt1uaI*S0ur6e;59TmIc9S2k7=zNlnS0Vd-vBrbfBvZVDcHZ;}7s?0n@v<@A~WApdu ztLIe!Ka=0mUoipqNgqTrWj*|CVu}YcyD<5X#nY@}j8OQsMAQg?+w34Da-m`Ic<$6T zsf`zZ>|-8OQ>+)DE*D%M`PIaqlLOPK0fsp0PVh;1G+sDGG{WZb9iQ4RhB*-+LyRJQ zCgzXI<|*Cw=V!d3b$QL9^-Cx)7d(4f->I@P?VQ@UNzK&oX1z7IYc!ZMn@GqvtAvzj zZr(af=-6i4dH7-FPtwBi0VNRnPw|-OSM;+!`?4|Wk`LP$W%{12p5c=82zTd=vwwys z&8Pv%7;D>k_)_wCt*TjHP)WG(=3mD7hMA@N_am>t;>Hw6px3#^bshWn0gEnx0yBOLJbO%J@iD~6p968j@)CVWzX%B9rr z2j{VX0yiRz{jhFJmeT;SQ3~PH@z^akXzatba#}K8qHp} z;c#|pw~UvCzgkl3jhHb7yH-I?)|uxdL^0RW&)(IFu>j@ph0p)WA>&2zg7ZNePz>#9 z6XnpVH((>X<%tz)c-x_!=MKN^Fndd9csL2j>V$%(i&F2)EQ`fWsGjYpj3Q^h0EJVk zWbe~SMU3PT=(M|fXxRppaiRXY)rnv-;aXkOX5dcs(&k@Dpo=Hn6zl@ zk;2f~)tv_rLRH-MNzDY^9QH35Wh7$<3$muzi_5Lkiaq$1DoTpvZz|HC#B)pOz^f0 z8psjfWg-vF(2GucN<};Mmd2e)^kNrpvK}=SjZyY1JRbqE*qsixDBwUdq?)(Lq`rpG znN1(uMkvrvQiJAw*H3X22>iJZYQwV^lgPTybt1-ABRBqQ*}%XCt!StXiIX6g$tu)8 zA0AU=Y8sT~%e;JY-gqH8+~@x9phJELu~+t7LDeH7(a>;uRf+Q7`m-&n9LN7XJ9SKa z!-b1NFflyeZOtb_8x*rt%&QT0AYNX+4HA!-V!Py$B&GhFb;6tgBvF zZd$}rmB(c#h5>Yvpu5|OW$x-JqijJ>Cbq9$nv3-Dt``n(G_=8Y^1);rG_yZk_rDcs z<%&PTCUmM~t#;A8IbBb@Td>Pl4nUs+%r8Imy!xIC7>M(+UhlduE{m0C|h)fXgY_?eGxvL{t$ zMB`k`bZ(D1aQAqr!fAX7KfA7y28IU1S3C&rVf%FJ+C%P&js7ffR}M6v7`cA?%3$x~ zsPP=|GW0H9*X8Xduo@B(om!6qp^w=p-_g$sNe*L%r3eG5qSWY;L#CgC#Q`*U_?O|S>_%4uBqGXoj z8U`M@{ObMrX$fLMQOx9%e=8djVTG+meQl3yOO|C^KtL;S(m^;ATcM9%emRDGw^b_=y%_W z)fv^ap{PFIvR&KuY$rjhX-_?#RiGkBZA8V&l6iyv0LOV004UlID2z3zV;tC}{#@-N@BOJr~x3$XcinsXPv-wdOA?> z@1ENcyWTVgsHI>EHy6=^Tk7v4QEeyalOZufYyc=4xTNzuqy;O-iB?1$4jVG=-o~%3 z9sR0;Zl9R}z~Q$}&V8VMHnz%%?0W8U$&3WC0PITYAF`g0Y_<`H62}Jpr~OKw)NM%j zij}O z5ww&fLsv=Uc*Ia5dOkgr&M^?$G#kk;RpeqvGY;m$f%d+8DO7(P6d9Gh6f;QG#k%D_5Z9r% z5hpOecypd<(iaP0s#H`_al(-9tzy{VzqJ}H+jAaU&ksUfa08K19( z=&;WzPxb<-=;l|vGGrg!y~ zG~{oNCjy~!c2k!NlS=)7>~x+z^$({Lg|oefA+7m&QWy8JhQ7X{(m;uW+5yiZuG zHAH{bM-NvjIO}VSKn451hP$~`yPa-V{~{7?@EC(QrHbvDvXq=J(U}WZE&l5$h+jTP zcExtw8tu4dzgLSsdj*R(t2wgMUp{l=o`QOhvJaMO+^R@}#H5Kp`?6KiQ*8F3gCR@0 znZnC|REy?PUNy?kIwvQf2S_&X=b*b!KG2=ro(Y#iAH$LaEPg6DFLVNNJ@7WCDejzV z^l$`!w!Z~psy+k#V<+kft*?Y-O zO?OUlEjslPti%k8LU|fdojQQx@bLROA#d$74tZ-ziL=mpluYX1JQ~Rrhl;a-Po||r zqPyl@&tF>`uH2GI*Nip@odDNhI^J9Ar$WNq;~&lvWM9V)jAr$jBkzdHF|muAEZNU? zXWGIJFqlHl%NasUuV;))9_40dcM?8bV=zxLy;{H=esu?qNwX(nI%GFp`LnYXsbwKy zbw%4DoopgAV7ecCSh>|*;XfAGIIDVglc&H>;=QI)maV}DAP=g0tx~bX?8p;tunVNh z(B0FKg5)^d!*Sf!%qL4RL`V0 z(GMu7O29j8Z{BQ%%|SD)7gQ3rtUu%$j-DnE%XuMYE%B~TqlN(0UzLa5be$7TwO=;S z#8StLdD>8;Pbon15AQmHI3ivZD^j$k>2^E~Sy6b>n@%KSL&SW>?DG zlaQc&En#tF!%Z{sa@4Bo_p8p}9O8@DPF!Q$tV0$)k6#b4%+**zpM%&b`le)&qG-{p ze_(79Uz35-sU;WtzXGtQ4z}Oxj(*g=|9v$BZMur}Cj^bzns?)8fRSd^tW!(C$OnPG zW4=8^;w}}|ZE+pH-|T!Ps2&%%A4af>Ci*7)S>T}%NJ~itjlr+%d{#UnCB!@@l@h?ykK2oLb66#uqE(Ij2g4QOz&jKA!p6}0{f$mHy>nwZ4%h%!``jtOwMvK;EL1Z zQpDW_u>lVo@_Wkt{S7Q)2Y;_0Ow8M?P{)mvqO^B#^iTV#GL0C!RGFT%g}EvA(juA&W-x^6x1uE3Ly)RIs~U zpK=5uEUFt0tLabjt&Ba3jg54FyTowq{_kd}QKWU>HPnd+t;*fcwC*Nmprm%890?&;Ar4(=@#9PTah*v*06I90kF8LT8{AhbHS z2e+z6>n*MA!?ZI^TQvFGjPdcNstPQ@g7rlfHGaYW|BC~cuLH4U)6+pd{8sqD++;|> z)&JkzP#E~Gos(dJ1E1*opt_X!r||GpEn?ePZRplCgQ(|)@FmXm-NlkZiPxgfAY^p~ zPa$+LV{7PN_{M;H>|3N=QYL;j9VhK9$?LATSEuf8jsCBi6UwKr2>mq!ptTuV02QB- zS~I!XIxMCLnwx&k0>l50haWEMTNA`}kOukonZUkJQYQ#(2~8QLih1DBj2}Fh8)!)H zqgJxhGU$&{T9aRN+<;V`Yv;jS*BLKVdW~GdrT828cu)2%&RYna07>kX4g%5dIbDO> z?shckiBZNJt2|}3at~zu$vyFX_Vehra|FH8gKP86L0#ektVgx>6xKL3ge zfHZ~pL+jct52M7a<)T3Oz3#QLpMfemXj@PDH_a1fFX$xO<$HrJdYBuu= z7?XgW>qOOQdz8}5fikUk=hjzG{!of_3(>b-P3)&TWw|tbL%9{ZfiwMgCFql*-62r- zunndw5XlUyP%oPj< z0%_j;brD+6W!$qVbUfzCqS2U|ZvVi+ewsA~nJjdh3US}Hxg~ZWk;K5#JLT!p)srl2 zXF}vLICT$mxA&BJy{T7ZN;C3BggYx^~Gl) z)v))s0#8VmJCakb#yF*L)nvD>RGT*AwW!HLOt?3rqak%C!gys(qG%eXZa1*MJr_>P zcD&J^M^!%2+3oW&VkzV#2Y$_p=> z{jsLQLCfE}o^P`2ikgh_B4Ua9GOvNo5KtWY?=Z{qe64TLx2SddVo}FCKsX1@sZ^$w zWgJ)6TF?0YRzyXUHuB_oG@{f1Nf4u|D5iWg5HQAxybaf!-Z&XT{-sA4GW@}gcCuX- zq)WYd7FvI;J*S5}{t5EDSa9a~`;URJh`8ai>w0_}dpg5;+nLZ8Jx0^)=l)HLZabls zUZjC{_d0#c(+)xgJVoRGqa_a*X(*olj#Lg69vCsUA0=O4I-{1Cv@)%kZ;A$(n#?Xr zQ`eIZ{T_~Y+wt3!Ul;JHT`*`C&^ULh{s7GCwDI16&te2D%Q)Z)=z%|_8m#j$AZSU(~Pb^lb%Mk&4b`NifsfY3s_l^5Gs_29~ z$vWL7BusQtMcZh~f#VI>;WV@Y!jy)?qYLw%zjWIJhIr4ZFq zU>PZRx!j%myrVeuYPdP5?<2IEs5y#7INtcUb@Q!n2r)liy8;-s9oNA%!3t!2naKfM zg2yPn%pW%7Ki5GX`m$+K6(1^Hfv5z<#L95>mxR2qFN1Xm!kwQ6I_ek)w}LvEi2JU_ zi4ISM$BtAV6J*iBZ-W3|&y{JB)*N&y(P4S+HRPOzm*4H*@(ue#*RZ4`YR~JLNeE~4>T$Yb3^J6j4mcu}USYk#oH z_NqPG_H9QBtvv8^I+zD}>(T&S{s3U1f`Q1dDNL!^Ys3p1&B&9Q-g#ObE13P>FjC03 z+ab$w?c;DBUf>>cXKf%9F}xL+H9Aq4nc>S8XBYmx`uXinaP47U61l2N6!cL%4^xpy z2+^Lt?UhLw>=mI|nDQ-$jt(FcT_FuztC45V{J+?d@;WZGfVWXVVO@Z(2`zZybGUws zeYuH~2CJzeG7p@X!%2C>81a#yOh3V}cDRjCCm*-%E34u3c-}$tkB_ICvKVFfkxj!L z2ZbEa7H^^;79T1|Wh5+ypgzd4YOiRdYkr*|AmK$@2?Zq)R@!`E*Sje* zycqff(rGsE=N%`5F8$JLAnrR4J;I`mlJ}aT8+Uvwa}H`DA;+tK|CF_JoVcu~lPYhA zLPdkIjE+p)4G3zn=rQOs59UBwTNv_|@I=)#?7p*2(5SIVV*hagRX4DzE2#(@B~AC8 z=4}g}Ie;IgpEFg?Jyj_sv zC)utl21u*+U`e#l4A|9L+-XYyoz90^_r2PH#^sKEB2fUxZrW$|tW+TO(mb~clazl7 zghjhwOX<-q=Gn7ot6H?15GWZnsjNPMY9=smzV!FS11LD_Cu;x$WYNW?|5%TDqDsbY zmfhy|YzqDLwiScvnTCS!ZcU@HO!7mv2{E0vtLW_uFQ>j@e`!WzOBfOOSzt$e4oppFV0QG_y#d_+6)t{}jWs19f_ z-W4%sP#DO2X|5A^Q)d)uK%hg7V+nx*auQc_^sTAa*-eLRK+-FlZJkl=z8xRp_;#Oh2z2qsrP}w_bkP~P`YmkfH(l2~<>w?}=kiGw-sewMvLnj3!GOKg zdm1r?a)CtnHTUy!`40fh60)lW)X56iS?0OT7A1;$dj;HMBL&a-mpqIYoH~ppTnnP> z=iMCtMFx!YJW>UxQonvp*M;$%d%tO+DSU+-v#zqB4K&g2m=Y7X2Xs z{Q*%V8ikEZT9E8}o)Fqa$at7`B5IBO6H^ySwKW}1ON5kJfWGr%Lw&*6Sc0PswCnzH z%8}!DH5y4h;tbKAyEm#Whj+DjY(K;h>-ZNKOs?%De z{LRg!&|jh0Fz$Yw@c@G<^hdb=^si1Vzaj4*NsqM$DUI-~_ASPVg(ItK-r_LV%+{+y z{7t9%JryC9gmWIO$xRmd)P^e|nbT*o=G=6hx__OGaoXO|U$?5f#Z#MuE+rOIxCcQa zUB_;A;OGE86)Hz7;6?8`c2wT?*6v+^XXN|Us~#3~#@LX+T)J0q)$l?Zh4N%RWxL4T z?#Y`4HXoOPZI(hCCYhS0Ciem$ZpWGZhdWcd$s!!>Q-oE3JEbqB18vOOb8PO(F)<%8 zk~xXyF;Ff9_cv;AC_3-WPK3$8FK>f6Et;DTSvK0*d!~S_^Xmv;SU|afagxo{3FX-f z$5xd&JYl1SyiK;Tjb4w_0du+*)k0tUq6t4=QmY_owD^MjV*Yt4= zeC1EWzWmj73hR4E$@H!arr}y*mNKI6>wqjksC@MRjNIdywP(0{vx%YcVB6E&qJg9aF58<=wta`hs}Ql@G_wJ^m1ZB`CTG zFp;)+?VbPCXA{Hj$c-cmgU@+!OCIY4`B9JYoy-Y%?>@pfP2I@;$V*gqx^*zleqVCC zp~+G|7v+Sy-VMTbw{7Euc{)txY|fKv9rj!{FcGjd3-NyIal!X}}TTZgb5H z<@@vbX>m3mMBA(;o^HOl_8CZGi*4u;psDI^YLaiQpJD%QyXcx3bR#{_oX3MrX{5^0 z0y*ioVr*bVC(i!Zp3kdT`v+|0DUbN46t3s-mZ+MXc!C|h;_u-;alt<570FycYqe9O z-psoiO2=8d`^0U3hMZd8?PB}0hMr*Okpg^q$If{vQ)#dvgVt<(uGSva0I6*#aFFw{XjC-9<%khi^~9z*;0 z^7U9YG)0a$`$mAD?^_MZD*%CjItv={>EZG z)qE{#!2BOunf&kW@#H1Enl$!V2ZhA;RZc2ABa)G`^f{l+Z-GfA?>`)#-iP8DU%cCX zPpQa>WDj_6H1Hq#X`c*D_{Pskui*{M9c3@r@-J}R6^9fiYCx8p(LN497Rofr`*0Ys zK|u@flv%&C^uukBENjD%o()OkG1K}x{40Y_)I4lBbc`0ARnp}C=O$G3ub{TWUGG-E zwO|4yco)}Odvj2w)o%?o(0$QTwkV;h>j9ObYbMDBi(h->dv1=Z6}Q@M z`(NZr>)CLH(mKAaf+W^K$}};J1XGo}Oj(V^uB8xMK_-HB@hqPf?W1s&Z_I&Z8Ib@~m%W4Ee zju20CDa#`L?V%5m`Kh0=c}}v^7jH~6ZNmkiIB_qB30vb*0ZKN}8L z`gm+tp6%34732M_srGV#DPUxm}21BE>gEmrO-XPT^j57HHr@4i^tZ=15eU}#3$}Azwzlg}r zEznK>iNl)ul;#x|+`(%a$Vo^KiYn%A6j%&MaeA{?jczKYVlMn_e7-9f=I1E$?Tpkx zLb3H8AHXu=tiOHZcD=RniZTGU)aX3xkX+S8@WRc%O%L8<(tuwf&^ueCD7f}&+r1fe z{xTwkNdMLKN^psJZtsqrf^fq=%hZNZwwYlc*+(#YSC9995Ni}}OI=KGl?=7}Ze-X_ zw*tHn%VwVU>1ML?B7btBGnq%N$8J_}YgI$pz+F;F5bQMa2Es84SX~!3!{Y_;IPg^{ z-K8!E95x6#-4EO&_Eo&ovu(s~2>KZP)=187F|#-T+uxU~b0E`g(^zpdvz+WRr&=o| zn3V(Dde+%B?46b#0pCTCYnaV79^OX-kL9az<2i`#>Nf>3e#OR)sA5}P!!2;?f>Sz! zk5U=W#Wv*&fw#0{o_GinWM5kxo<1x3#d@yl}W0AOxr6YL^uHB7MA&h02mW3x5* zJ(j@gZ%(<{xbVi`C?y(ZUV@E0D6*|EQTl(+e*w6M)Il!btXOPxpQC(m=3$?5FWz;m zI=~$Ayd07w%3uIYz#A2Am*C>~kM^{w1w60UJK45vfvRT&SOW~Q=3oBH$E*1ntI2Pu zSuN0xY}dk>)8)iVV6JYd!88iM7gk;H{3{aCvpOtgN&9s2H@3WH_llKcgf?BDle%|9 zD;V=XQ+hEdjK@4P^wsZ{lQy-WuQN(Rx=39`)@!$$U;%y9PqoR2tf%!96OD4?Mj) z>ihT+_7)W$^!D8ms<~04Dm$i^#619%mq7^87w6^WwbnPfiTz*aSdpz}bwi+^;8UUg zZU}l!~@Vi5oqw~3wtzU9)i7GYHyLQf~ zovf++ILDzF-`rf%@}Kgz*^g^&Yww3`JCX9~>g(%_W5og0*V}#E*j< zrg=GRrU!g4#C|Ugebd244$OSEuCw^bA>e+*WX$a%+44HDZ#57dIEZQ+~rj4k-^6D0#iXv>;B!&@Imd1;Y)iCPSq?7>iaRSEo&J9PW{^cr?1ep)4+Ht-BB4$zNdO5?s_gu;S?WiI^F8@!^0v_ z9gE$vjiu4;V)t{RHKNQhc>=)Ho@xGw#1hF-s_$4dZ$NtmYyv|C{>0|fIP52kB2@Jk zWpP8SKkcxl_f$=oHoUWM@EYmT{5$M5>v1wLeuhOg+PnQ^Zmka-?KDnQRKNtDSTQSm z(R7@yQ9@feCkL=_ z?bx~J?SQBDn!;qAv0zsd$gP3u^$jVxZ;DuB82r^jV?RwnLcebU|EYDx)$q3Om%=y` z?$vD9}Cyx;O+J-J)G5a>t!QLOa8$^HfG zr!i|dw=Wo9+~8JDI^R3~Z4*Dfp}Gh$>y4PUve$NviP@~0T1vZuBEir)K6MM1NN?Wf z#Z_PV!Kx8N4u|SCSMNk)h9^6zARm*3oU>D-Z5GY&$k(NOzKI>vG7WwRivw`jybZ+M z=wqrP8Ao2n(|k(pc>Cdc)pRp@=L{%`)B1+dO` zoisVqkHv)i3DZ+J{4!D>w&yqnelu`*J4i(+idaYhUK&f|+2>uSD{>{Ky0v>`Z>OMz zqTrn(#2#(vNtg&YHA&tXBjTA|b8^dv4l^p@l`e15-;+s`7g8 z;P;l?YpOz-y}2q4Z1$$~izR~y4a#P!KZ%eg)_ZnRsQF@D2Ri0{0Nbp!v5FPku2-1lbEPHA&StCbD3>5;{KdOfJ zSmLhAgXo455lh9@b^100J8oydn4dHaeb|Dn{gZ&8;QQD4ilpcq8!D7Y904|v0u>Zs zl6xFRI<^JjoW`4sFKJCj-W>REKVSuiE*Go;^I1)%%3e-2Y;XCyD%t@?1cR*p!WW2g z9obx8Wc|zE&UiZb~(MlDY^+sH1F4uZNTM3<-d7TQX(ClKp zI&J$-_3`UpFGnmDXOj8m&0$rv1CWaTx3hla;aTJc9zC8@bWf^0N&T^Z-IlEc!EcSZ zvmmYz?MfFCSRNrQ@_Lra^(l%_xOyOlGulqN%;$Jmd1bQgRoPOUk!a^Y!Dp)1h@-d@ z_16i{rzrKF&9YqH%=l%8d%E^aa0EH{_w_pt>+C&@(}-o0n694C-p`3l544AI0VOZ~ zU^I*SOy!0e?68w@8EjeAdG)ZdWHK=7dxD*nQyc5%@8~{}7LJ9qCBQ@V%r==~S&c&~ zT2)-YPmkNuIo4m3>=pP6FCO5?hhXmkj{J`X#XRj~b)fmNT!vUb9Ruk2spV+JXS`XV zV%NE#ys-=C3^r=50$r4O||drEn6I=@zeBN{5SuI~`O6tBZgjIVzrFOacyv zdB^!YOSNG3QqjK;fvE=FI5_s`DJW8Uo$b3vkPdw;9`AACvcvvNTkd+Bh1wlz&q=Uv z@)K?c!b-y$zKX5)JI2p^CYrV7E8ltjlO3|KyFhxYHL|V(8y$WfL(R8r_{8ScIM0Md z`>+lm?p9g8A=PERz__o}3`lKzKq(;l`xo|OI7v#j&vV={-h<}N6V*EF=S6|&4hIe; zyeHlK2(*q46c5RV&7j$6yU=qfZ|iqovOV&^n`H=~ z-g56w#M0ic-%u28ldz^J7hQdbV~&%y1pfByh|#6$Wij=44mON5ZY>6HAOVIS6Zlff zQ$Zwzc5sO`R6;$-Ja8cHk1=<+2H#vPrwKusA8(`Z)M&q0u*Xxq3)+nd4WQ`yLr9~d{o?A)eq2l^h@bKDa3aRI zGX&8{J%SdH3(l{ZBkX)MKY|)Yw~o;z*M_@!%K~a4uW8-u9GnijII|W_K1bPUbxhqN zqEHa~?VHxZYiG}5^pRc`#Hr?L#*I8(3-_kC0Q$P9G%Cg^lAq&3_jrheUS#v>rmD;J zcgH5;I~Lqi{L=@g#H$h~FNmxXqRa@&%((}02D)pq=W>_613>^=2Y~rffd;Xa%`dU4 z6*iPp+4&V+6NpMe7wuv1@lNP#aZdxkN~w(@Ej&)lT?c65bRB?3)NCr50}hyqD}f2S z(>1+51}^8SZ}tbWkTJ5b&o{h#Dy*PcaQ=E?MpFrmb8Vf%cPh3=N^GHhae7W!Gx&SX zgLG)nMZa#JX@G_Eyn?elvhR58*;O@n4sC?z`%8&D?$euRq#g6tjV`=-l8AxZ#D-b@ zo6`<`&hxr6=2aKIx$w4;ESR4ocJgP2SZERnyu5R%^zOCg9Y73IQ$BFjk&o5>Ci#ec zRx%Bh#?xNhn31(D3_$NHl%Ed?MLrsry1Nq-c*crbJtdwuOytPii~d#^CwTnAQ7j}B~4>rr1YaytYri|&45#!;%d6|g`tvs;cLF2*0x}$6z z2>*$f`o&PgjnnpQRl%B?s*ardZY^?)-~c!j?Y=+WU9b|A?XSupj@1AA$O7ek-evcy zd27w#7y__^iSd&PZMuo@x0=9>qR@I&p`q#I^AJNkt%l?isVJNEt4Lv>CGw`H=P`>M zEvIu7W$sSTpm+761OE0nSloW+;2w#X1UpoU7wl$CuJE*dDD8t^M&-i~LF2_(thHT$ z$2^qQ?VG>eKH#mCPKBfV?ySWe_GbO|lRVkfqjRkJul@&RwPf=*@V^|MzovSFaAo{) zKI?Giz7UODd1VT(et!srW{lyoWI+?{`z8b#hyK7WX2zU)rRyQ#E)OssV{CbH^85K3 zC>;#DB$_o%jfNJ+^|QHD{mR=m)fwhVZgEqsDEj`a(3~VN6P?A==)XafQOwCGI?0aL zkf?g2j8O(=oiSu0>CUm&d^h~w1)CPNmwwk6LY%lAe922v zgQ^RvoXJj{PCZC*4sIg!*XAGtU@(1q|dTvhk#`ZI|oEITTpbR|lx+H$h z29_$C`>K_bOpZo@joSg;EX?C*EQ7{iP?c*B=&xoyZx)>@OB&Dp&)Bu~AlR!CnBc1l z-{lRf-aiDnKL=V(zmn#W^0+GrXvv z2iH2x%$F(+IBAW1kOgY{@kpL;%|}km>qGFkw;Gj~QwHz~{jQDS+e3iK6!T+ zr3A{9@JSBXgUEf|R@-*px1p5}&f!m3X7OeTvqDkfLVjKe zUiuzVDf(g*c?{^stpSf4c0*PhAKk5X;U9o`DEm07^)v7y`XB${Cx&l`00fOjMy!d+ zq-o+eOks8mwsC0_*t9s)0lc6Hk#;l{mKkeVp&{=x`vA2!Wngi%f@I$-qY-lWgMh@G zFG;se4T>KAZF9II;O}FZzCRElz=0t6SFLL}^Df7^Xt9L`C+UTWC>2v}aMj}J}@ ze|%$P1@?A#==%57xcT`x3wwvdFXo!!X#ynTaLz$Di*4Oxmz6Syc`g~j@)myTyYo3{ zr9+R(_HeqWqoxH@zWGG&d7F0y@KF7k0$*mjp`uN~Nka-AHS5^kZ|cgkHjRuWZhIj> zV6)TR4Mn0A;dyudz~t30GF3~>r<(k99;H8|Q@DS|YG~}F9^qqRFr;1FAjh?Zz$>Jy zd`l;GUD|bNu^_4jZB(gL|CN+@4tKd3xqp+*{xoPZ@mNPGu+jA;u?G5t68)m;_1%l3 ztU0PfHUigfGMcX{K>SFlJWsoJz8Ij5r{rySs|-Nw&8imsd?H8hgvF{93z8E5hW+(< zUrmwuHx<06WL^-&%&*R{+ICXU0kZQUso>4s$jr@(Au`jGC(gh^*@>rRl=H@DsHx~F z5OCbd=>=_Zd48udI6J~>r504f@y2*aPyFKHa7Fhp13j?FB*ZquNgY1}e%6SMAz;|< zVT81AK1k8y5Wju^Y+0-ag6W^B;qzP$^R_aAgmCxgCd(TJqk-Hq+5xSNw||96uBo&o zei4hiL#}7FEwaKK0E`Lnk8Kpj=c)ESV3z;=AzT#o@hkH10cHbn_wGfnneUlH|D#6Y zunS?ADwm^=R(=xnQG*|&&<~SYwe<8cSEC^lwP2~x~7m*HDS!f$P1RAWp|aE zKmjj*RTGC<=|QgZDY7AvZ(bGWcTKmueM?S*0#&}%SnT$;CC4%g*}hde>Kk^Kp_I=D zFjh7G?_8YSx;{mRFUY3xyfMr1Vm)ljner@0(alj4xh)VHMI?%9A3Hsk<9rH^@B^wX zJrZb`42rG|_R3B83`m@AqmKNr1TT9HD1a6IA)Ze1ZY%x|=V^asDW#X1XO)!c6L2g^Wi6g&K{9+JJQP3u+rzIxPH|1IdSnSJdEF!~E^cf&02Spl zzlAoBi4j&&S9b>|S1DQq>y`iGytZj!;c-7VKJCdtTk2yCsal+({Ev8*=ox3HiHGOd zIs>M^g+3dR2L6Grf=5rl#Z_dE=1VEPeQfO74OUw!0RCmLS-WA71|ZBM<(;-7Dlc&` zF`r?$A>(Xie~er;S)E88`gv=kAcvM$T@8nr61KY!#U;1&@Xr?eIH=}OZ8Fu!72ND2OJz=DKRm{?O*=z zrx&MB2TmnR*;?2@&mRT`*exuzzl(%0?zvYZpmW1`S5(Y|2N;ZZv z(f)V<{1-nlYBK#af<+a~FJ79bEl2Alb zxbS@}eHnHuhxY4v51#H;n#(kpOX`L=`n46Bc)iK=>b>J9qdaVNJd{=^c?la_8R=JK zP_h04bO;?>CEARy=*mQ9BKbE^`uKbG4H>^K=|t3ioq;#>ngBszQ6qYM-cv+R@R!>b|ZY0%cb5YKsM8th5m~?uy@E+ zA@rOBG!IX!*Y(>53Ovk91`5QKDd~wZz4|?edhJgoEf!B2)o$bWIeN8c-K#X^h*aFIU9#d^fWf8Qi*uc=jh|7>;Z z8^WotZ4~82Xom|d*)QS;qH-z_h7xi5D;%7GJav)r_Z^ZQRkUN$Kmf}GHI0bc@f?oq zum`a?94h7*^y?W2V-wn8Ak%F~`LJ?|hs*^7Bg-2&6?x-4y||bOgb#<~v)gD{A9;D4 zA!{Z>Nr?VEZ0mi*ynq}fzTv-ndl5}v!PNZvSLjch7Z{;=&NVr&KyuWnWFE(*nT``$ zfJQQU)Z-0-^kdP69TrSV%HLMa4;})m|112*yL0w`L$ih!ei=}Ym;WDYZy8i)(5#EX z!WQoC76_W4!50?X-2;JO!QI_mf_rcc!QCN1a0~9i-Tl1e+ox{b`{P!feRj>CNmW=i z@66LZPs?<7^WNnQ(QU07A~??VCoiN~82Rb^MG)X`tc#PLXc7_aeEBi?`m zg^70xJK((~3$N?x?-F|dWOxK2Rers|!>p)&xw4B;)O3{bsE|Cu-FvWG(9%omf{m}a zBdhETBV0EmC~De_JVVJ%dcm;a$bN@U`rC6G*f5pZ2L}o%2zVMM)ZCDqFpOXFrc~qj zsU7~K97+{b^Z67&QUE|@VxJ>U&jwj{AMsD+!n$zn9{htf#f2$>dX_YvZsKrt=5s4V z5lPQ=jdrr<`LILF!i3WM0tJLg)qa5bhXT_OLmM78;SA#(RaKOPdGXH-;OpKb9$xz| z!T1>g>Z?4(62T1yG=dJD@OPS#EIdZGzllYuaAcxOl(T~?5Q`Q8`toLL8rMF}6Lx$x z-XNqv6;wN?Txykw3(m)YbV$g&YMNEB@8}k>VJ|vtD<-|zON7T0!C7Am89ILiwgOPS zd9R16D)a^i)JaV{e>6qLuuxkNK3nn4s1vu=bI%~jA99AKgQ01-PHpq!AL69MbIQwc z>#TK5Ogg^Y*_UcHbEqZqM<%ni+k!B5CA%VD3%P8F0?JpA2@@ND=sc!v0gu~Swq#~k zxmkMJcZbSOQIjg-TPP4fjh@N4^FqJ5%lre1O35RCKlWxi=p<>ljJ6c}o?{M@hg#5?m%hj9eh?-m>PbkaY1r@n_4 zV4O|z^Mv+$r1&(Mu}>?bD#EYyHJ79Juc=q=&(a~_i^Cue1aJftiUhE1|L*Dh9XY!j zpWms<;LV0hfUwvQ7|2xN(iHWk9^r0^+FPLWz=0-SiHyuS*9-PD)Wlv{SOLpp`V5jWC)x~7R-j&SbDc#+3~KV1+qxH zALNVz%3Yn`$u9%SPZ^-|4i?jXc^x?DYB^WQL(Ue_`f+Ih31JAD4QAuw1nw?5Js8Q3 zL_nV~7A(knYfb1U7=+M6V}K(7Euz`GMh+V_~{hraX zT2rfd+^+ytlUkeE;oH9Ran+%3tgJ2Vl5h10e^a#7XG=R9yjuJ8_U)}{WdR>h)?@!= z_h$EI4JM71d<$q`CvFgE1Dy$QR&ibxrYD8_+&@9bFY%RsB(rUN10M5h5vHt5uc1+^ zN4@B4^iRn!vwxl=SyWV-ZXq(H?{CX|`&4f67Zug|!D9q%Cs9Q6B#H|e=kXuV!%3hM zN=zwxw)fpv?-$bEd|&)~yFoXtv~^150ca*141l|X{G;*#y743M_^L+?54`{ldq#ti zY{)~8sp?Tpt03)rSx*_81Oj?cFcR#BaObxHh))cHZZ8NH1<*z)kW=kxz^ue`x}tD^ zP_P0Xu^l*dF{IaOp-{V#@bF}Q5I;=%{^vQ>gWr-Gob93Zj^LsnjBC%5(iHWby`aHw zU=Xv8vPn8u(npfFb^%zS6>1BQfAp*UU;6r@j6%~jV(8Uf2JycF>c$s9&!><2ku05* zs}^N5SM9@zAK3-}j8FHQc&;;reUk9f%ly{kUXV)y46&%3%Ktemfnk@06TL_4aK_|gRbib5yX^K zeHo_12qgy7koe&-R-ALwhS7sUl!;o4#G3siko=YwU<|P5rSHufd#Z16sRD;cbr5}(RexD zi1aDxoLfMs@4{XXDu}0f8u2Ry2C%T3-9MBx|0GNYkg$%on1GceeHq7vVMYX-@su?; zInVL*sBqn)Q{1Ng8%Cwh-+&@k6*j{$_}ko{w1H`dsVU-eZrVB%%2i2(f>(H~IWEjDqTQ=b<|>_|L`AJVXx0j&?0*Ob1VG^HUG`u^An?j zLah!f=Q1Yx?=R>-D}Z1C-zN;>0xtFc!w>p@e*uIFQ_?)tYQo>F4~gneC-f226O`ls z+@LUY1d?yC-ws5|!a)BwPvCzq7z}zQGLkZnrX?S#wnY#$BVVBteg)O*XqA`t6ZHMB z(U_eN>-GN(M?UlDfB5+?fPM1(l@CKe{$9|?^Yn|_YZe+F3Koz*zPe?|h!FTP!v zeJ!D^20M8s1M-4_!7mH&F!GO` zU-8eQul-)fGv#GxtuQ&{-;iylIQft0Fin5LrDxysLMJf9Tm!KoeBTP@2U8w(|0m0)1 zWNJLJtQICf*;m;(7iX7lmo5i;JRj|UVEKx@jyXyc>NA+H-rxVrqnD%r0oN&CS#aP# zt_SshB@ba@uX3ejH8|p^Ri@ti!hF|ZB4g3@!mLfx0Fg_)z-3e)4-ba7SbJ&aEme8> zN(XchY7pU*ly>S%Q)oMy<*wJMPcRDf*5WeLET1WCh@zRSQzl%E*H<9$F*>TRmI%Ja z<;xF?QMAF9+9nQk>0F{a8^MHw0%>86o8w*7$Tq}t%t2=-)`epq2-~Dfkt#HK8IfiE zd1GdxYZD@sAJ;vVOZujT^bPZkdk6HBy!O$yP=^`$tMloVzCwy8XsVlpQH}z>I$#x;@Dv}vnT3VD;HZNc$GTWYOC5ZCo;Akxd}XulJHO0r>zz^t95ByLFyE(J=y`mxW!lSmc%)haHVKt#)=AALOI2f+4;gPedRF%}->DCt zP0#%FRQ0siGTftnPELSuHLUm5YJ#P-xMe z?%v~mtEdPyvE9agGb59%+(xA7P%&1#?iTOo9GiV_Enn?krziM!Ms*W28&7=E>R$ct zJQt4MrkjXLEqbI)>QQ%D*1Kn8qafCGOvnRLjFtoX`s^DEoUvhU+Fv|BL}JiAu|LB7 z6&z{X%k_j%V1cYTB6jv#q0qd!u=2q;*dS{w>`imoHLA2uaATcl#783m`gg&nn(pAn zE8_8!NlOHM+K&&XBJit#5e^RUC1y6?fBO6GV#l5qc6g{+JL%heA07|fGn5t_F&+?= zp-_Z^O(|p(h=&1Z&>(Ns__r*irM=N1k7Rhk4kqKC@s`WI35nG|8`97QRhM|PVC_yG zoMkO2!I2p*N|+EAC@Gn%8B?RYNg2io54z=~W2hTWR=N6V!eE}{il6ZEtT$hXy5QG| z%7+0uwsL7gN`JX7y+I%U0hv)#R9LYJs8MRX_N!kvI6A%xJR2bAY=jqwI%1?(F4!Dv zlY6&HQTPtvR`6ZKuUx_#d^=U%J>qi=?N>_p zffW@c@_HK4ixvqBC~-Cv%h^QV zFLGUsP!auFS~y8GcWO=fNyZT-rrfo*(n6+!fq}{AxACm%LHcAAR5t?{bnmNOB%jP= z@N-(a0plo@(=v|~CqFISgi7Bk89Dvr8ktCaNp9#Xu_wbkZ`W+fk!L-6ZPc1oPdQzr zP*-)p*dg%j%w(W=+Ox4P%rhDxvZ^1I)d>%Tdo?k~_ZW8KY~legP9zZ#Q8^Pgt^jUp zY&#ah7q_*KF#PcmA%&FcrPy2;p~1fGP(MBQuylDaQ2RAS_X0{-aEL9bJ{p@Lz*D$J z1wt)Y)BKU9!dUpoR!Vr480ju;!7|YvukCO*%)KD?U(VIu7$`IK#}ul5R}9EvB!Wb5 zJTb^Dq-i~5ya0$r%9+o4j4OZlf)z)41_;=xXwF-+Faz)2sJiZYz~97p7Vwir82%M9 zQh+kAD3ASSQm(vUlo0E&g$V|nmr+DUI`oJ;xcfBd1|?a_UQ8#2s4i}Qm&^`1(T&4Z;NWI3I|ly z7yFW~X-^7Cd)x9j|E`}+YhOgr>hPgVwlB?A6=d$Ocg=BjT&tAQ4$ayuEeKVG47yZb zJ+E)0-DkF{2Vcjv@`^_LLEZzdYuZt_ud!#;^ge$%pHtSI1ktn&BdYr-6aXFpD-OYr ztWY}VuS?@sP#M?AM5Fflg^;#s6Tg7K(FvUf#o!PDoPglinXrYYY75~#y z%3D4uhm^w{mwT~y!a3b)=CAI%+NcalCP~=hBL7a^pJ6#(-@eXx;E`B$V-tONRX%~h zwT5e0Kp}$%Tx(|FK3}K>!}rsm^H>hpahrJc&p{)s3lfHBA1xp`O;n0t?RZo^yxx*> zK*)xMnX6*uegzXO0g6r%1Zr0Ns%OQl#;RpOsxKq55+^~SSPnKe3;mXm^FxKm!7o4v zP5KC{?r3~vvfY?1gNpEY=5{@c*H4e$(->o$tIcu3$SLKsct7H4@e%2fuR?GmaqY#@ zqMKXLB&6cG$%{p&HHB!Om?;gru5Eu^JE<<8>qxj~N!tc%p>l-_GB;VL`PC`pj82dH zUZr{_Aq#jfB>gy$Kp_mYgTT_{j~8C5P?ReUboX-nW@IjE&Mc4PWXX*D_W*Mf<|Mfu>FWsxu1TYh3i`k+*oYVSiQA>5(zao?nUB3$MHd z;sjTMw!;YHs#(ZuXeX_k!q;1cAR{3SX+c8S673Ei)E5L11oRT3)5s5u;44uiYe2R* z@L?xKs-2|iolBe&tR3DG4?)8s1p#ew{@^hP=&|6PbW(eG6q`*2GiZzQea+>y)ZP2F zrd89LW|35#^k?awZ$Vg>Gr;vHW$e5&kJ#mPM14By*;ZpqL)7jOj1{$zQ?{1E!~^RN z##LfApjpe!45TWW9=8|g-z1d<{v5NJY#Pj0z_rr;e4GFeeie2g=>cen*3e`;`JLht z#Z&n=u4?xL&mzFQu&j~fv>i?PaEJ(_1nq?()akXnajt$Cw$}jB0Ci=h7>^5Lsi^&; z$Dp@>&8VfhdHT}v)INJuTCC>A!_kB16ao1QXy;I3=-ir@u@#3-86`deX&-c0SlnI^ zQl1g(pH7jJHkX1Mw8<6@Xv{r|HvBzO9AQ;OMI{XJOV(@o6VKJ(E9tr{4IT|G4L6M* ztanex@ym&FZxW2zQh)2VJ0gx|C)F9XtfD{c7hf!{SKU9T^f2nL7jX98S8Illh)_V+ z_2`Fa8`U{$hWNJh58lnLyyRM3)fCfmDHoklD1-MOohftd=2-HT4S1u7&n9-7vp2qH zZ`36bb!^n_- zoB_H0Lzk9{*;_+=W!}tSpw$sQ*i|4|(~HroKQa~#*D;RIVLwof?MWZ_F^DLIxCq62 z8TIda$&&NSZq2)euYcmJ2hIup)AAg%of;J5_aX|%yFn;J?ZvyHHBjV!tDF6X*?1`4 zY%TST;1OEGAE^Q49iTSu+Wz)%<-pCAIVV}Kd{tp|+PmW{iKVv4M#fp%ukF?H^SarS zRL5VY9W=v5i26~d+(w|icdwY6%`ksve~mUEx^aIBvli;J-230(+>h8S;B^;|xa^x< zFDjHT7vczGkw;N}QDj)zJf;WnS}{?<2)s;lKmsV(|FOx>4#I* zXh}>iAuu}W2Kf=SdDgTmfuY3ip&guGj$J@qL(NN*JfVU6Qm<1dX_e7-aD$)bSBFvO3jquBzV%|cO?NJ$GQbAS=MLD89v zUTnBCmJu|J&k8qI!}3?-FLm=g`JdDdr@Kk_;)el;`%V^b!29>sT3R*>enO6^Ij*3t z=M8#m+cQ#`Mv)GUV|s7JlrvD0 zjLxRm<)<>4S&VUvQ!|9_@{>L36`b`a4K zP@VArmd922ivVUy1gZtgnr<`$kpoj9P;`}wMue0}*%Cj|ATYCs&kwo#N6n}zxc zRd%NN8)L(+uId*7%J1Lo7V`P|@>^d%9>Lm|P*?QFOizq8e)zBSMmZZwPk=)Se5!ZT>`0{2P4<(!FVIQ->j6bi{y({CCsU?WS%Kq1#DgkW*FvqHDH`>QjWzjtemm?_+>w}_lpKc zU8^9&K5l*-TP-OV%dAz|48bsr(J$5=lWR!54 zLK~>{tRWOKOHiH~!hhCRb+Ve1VOv3|4s6S{5C5F*+HuFPUruh+G!D+p7{p^AKB>9* z5x-DcH|nllwSIxqJ6Rb9K(aFVw58{hUmMTiT~JFF)hZB)T@+_+>+zjSX$m#YC6GlJ zSXd_>tm`Q6_dPA0)5)-SbWmivyMUZv$^k2!sPCE5x42y>CSMv zj32&GF~kBsI#BfAI%aRA`hp!-={a!iIX9jF~J_P*Z zi}fUy$MLlJ+|IiG?>Xx+ouwg{-4jZYmP8-PiYLIbJ9!gpUPQSD25E0fzH2;RaYJZ5 z?Y^Kg8eO`hB8h;0fe)%<_)ukHzaiY^fdqNFccL(8Z;j<~pvvHJfR82=38cAktpKaKJH-xcilxLae7;V3+*TN(M0$lCUss2r)y`;X@@qWrGanuIM{ zRa(44$8S>a?fp8cEBCC13+rMjMcrZgb?T3N4SPQ7v%3yVg}~SaSXc}A+%4deRLwQ) z-ZXY(8@W6+ak^M**4oxXeXJ6M`81Xl(_4eq%L2Or-BAg5St##@e9Wxbd3y?!zB1#kB`a&no=!~Ux>3d+Ih?RKwhNCTNyW8V9nMXgo)twzj+220c%oJL|Un|bJJ zZ_EW53gjh@S(RQ<{98hqgOcPQVRb*1*|L!MX_yy`I>rPryuE4MS!eHrmxzu$5d40c zNDCin&#VLB^9xHT>~a}szo|U^Kra;u2#LYPO1QXwLVt{9G+r}EDG-t?aw9zaSB?@o zh;e7c0EN$`oxHEAPk&;NXD75tEQ6b!|8TpPB^AlFpUy2mYpfkgZq>a5!i>}tYh4l~fS;D_g)r;4GbzjqVT42o6tbJjQ| z9mWrb-JSf_Ey>|VneHX|({KCzdkHWu)P6xs7%ff@E$J47bLFV3?tse;k8 z&6F$GBxvKRbThzVL%^rJ{SHY}cj1!*SOV#juto#zf+S=+9mj|;Rt;; z7r@O-0l~Z_R50Op4EM><-!w!LyT_2DYCjJ*K^6;_WJL(~-=L?6A+F?#L1-CsUwW5M zb@4fFF8_@Br{GM}XJXL7Y-@1KuJ0Ueh3K_wL!oY4PTFF$HSs#V zZtwzux*KoR%#AUD^j(ZOP<8oyC*{f{KmGxcuyQ!$P!wGNije&agqZ~B%&?Rpw%9k^ zif8qq*ZuBnTw!qVUF6ksc)qvet?Qhg3z4|z*r1~5=cmaCLt1tk3oFu+ zEagGTZZ8hOkBA~b-;1|vqcuA2VUc_|71xD`Z05FEOQY8M>pfN|wlg`N%t}GA=$C&E zL-N2F-|UB>2&V5IbdC5Yn;4m8-;J<9&UfY4HHQ1V<5ZVX=l?@6XqSxXK?x0ELj_ zJVF#pZ5r*Xkj>`<$4a*;K z#JVP;Nf$(pml_Df%k|sHzKo@sYAs!wC!8K`ejIi!17o3BkB{4gX~}Ce8Nn56*HLXX zF{9^8piUl+W(goz&Q*j+B@|sS%De;d13rz`e4-wom30-(z^c%eLf zx)FliK3OQB#(8@#JMO{9%Imz3CiLvOB(JPtE=iFD`W1P{Is&YMRcRC-~_uPVn}B15^PQ$e)XVt-Ew;(aF=h(_6Xgrv2F^4v<3P|r`MDN74!oM zZvqZm-mZ?`&$hUr)9d|gyj4wi&Ri?2LW?j(YrGTOp%ucrDB@U3TxSmXjCn18FjuDG zBE$b}vBCCvF*u2aAsbF?%k{2>q&-2cix5A=41NswtdRbSSBO> zxE2CmmdD=w(M=Om!yTvhEyXULcU-r@jtiO(yLXz}3$q>@zID?)U8mFDIWE0gvwxS6 zVPlQWH=&iyqf?;8u70f$r9-@$7SOb!(?n9%w{~x`yC4F8Z6oMHnAS~6W1|6hyS_aB zvq0h3+4nZ`u9X=lR^OBcyPc=V4x#zD%E-*XVjL)S9o4oPEw2f4Z@+Em3gj`C&-yn`k$9$75HP zB$sQQ32B0jX1}0Q$V7|4XXTFQ7SK?bme&2v5v`xa>jY24WsUUm{7_|{=133LZdOHf zJqx0;eR&F^c>i7n(%k+8-&46v4%?bQ{(Hd`_vKP0C|7LI=ybOSohV#j%W=`U6+tu@ zCE0+`kmY5WG#&PcG?Bg+c|QV+*2`4N{wHA0LcR@cZ)*szcf8yo5OG?g6)B`6bMFi~ zFvjLs1G(Y9xBShlQ#d`mW2x%Jkxg?mhAQlp7uWGB7J4vooZk$bRu3J%$*SwsZ+9eQ z$>6b6tM#s;j4h?ifiqHZx@oa#B^8#uDHt%7FYDRUw34%b&qrnT66{n4t8_Eiu8?aP zvl53_av}429n;aqgnepVv!?0%NsU1)bvO9UWv3;*uSnmuq!ui}7)xYJt^Gr0)P6HJ z`;ZMK0o(fQwjx*bUUY(Lj8JX$?%8@*=#%7hIwtxpPyyKb4^RN_m543lpMB)(MV9|_ z6bsOxs%%Efy8dQC={Ta6P%O{Qx(hW(VbksMt=iHOjd zg|RW>h|J*VZeqI8Nr4Nh(k8a2`6h*q8jka15OLD6k8RxJo>+B1DO%EY6wI=o63vN1vUZv#C{v;cCRQ~ zgzCVmVHtj=R1G~fm`aKsD5sKV^Y;B$@k%@Yax=B<>H+eQi=EM)b}x_h^X=h5?eFQ< zLx?IxvKStu&$p@UJF#f|2=g_j=oo)!gnyULSfpBnd)1k8)9Vz{;A74;?Hg()6an)g={5tm`?e4!ENd5hHcql0F0>Eh8kXxCf92JhpowbXB5X!?w9x7(LXZ z#&Sg|6l;yx0A)n?mXLb0KKhNG4KZk{a|_=3H9N{H+d`i<(7KMKg}I^t7ACDgx8l# z%pTPcrkMw(W2f5xqSKTZ5#hFem#!8DJzXqhG{!Q`N4O?=90VqxZ{%7@M#=*%Ks-2n8zGhLhdGd|+Rk1x(>Pev75ku9beF3XiV)hyH5Sg=OsQ!Vgv z;Xh^M0d!=~5xgmx*?CVg{FIo1(vuj8U=vM_j4q`5%go3gs42$gjR}b-=F+CXqbL|h zJ*(f59CTQ!v(U-p<;-y!{@cG(2RW?U58CzOCflW~!-dHC zZbz>|+0;*4p@thv{#eICr#z0wlcHPeouTUi>#0TAY-BzUN8AL0Mko@g_XF)O(DGR* z(}|wLBiuccv=?Ic33Lj%7wFG^`#s*B1gB2Xoqdiy zDS#Z#Rvf^nHC|>+X?-+zsD`-U0YH% zBEcwoQ7$wT1|5bfnAta@d=xtMUpM9%Y!|KEay+;@Ajkc3I)=T!<|^Vnt_44fP<8V$ zXGoy-7&k2(32N<3+54B0XiB@y5*Z1kTPte_SF^}EGrNm^@gBdK_uQ*1j>xn*hYvzy zUjpC*qebC-@r4E?0gLTteNc4)-P-~wUlf!@TXNFj9!dsqgS?^wRVe2r<4ef{XY|=i z9~&Z7;ODC86Q5w15nt8+Zm_9vMI<*^DkEG1M?uVTIf6xRdAM>fA z5EMkh^~pYD+q3SwV$e`Yux4o_UTBm>va9w7bKgv%6RTetDNyP}I(q?($=~}>aGGgc z*1N^&)e7@4lV0BBeVj#5WM^!8dJqa2)$>BHq~69G^B=OT%p<&j6szY;cdEY)QI_zu@sJb z(wS}}7x!JbUhHwj_wHaf34z93w5RbSB0Z2vnJ?Qn@yw;(IPB@CS-;*LA8>2fS) z79dyA^`HIzAe)Z$Y)9aKf8qhWi;yK@3MtV;r((+U7eyo8D=jQ-udy-S6*|I^Q^lu5 z!45lPUFfSX=r(SiXb}_B6MY!ij{)d{ zP*oh}U|>s#jYs3*!uBGHEuMw`gmL@-9n~XXWA({?Vk?kYTmQ^S;q*W_Y3d6!B!Fq> zz2y9D@B^IXdiZ{KvOs_il@TMI&xOTou@G*_LBYF`5(omx4fs4Fw%?x1q_CR`ccS7e z6^B3rrt;HC;=vg?GitT*P`=3Bq-5w{-!#rLHqAO9QFy!w$g6!T^#q+(B_CZrAvD1>k>WfzeP;?LNk=~7A`SnX-wz^1sRWJeZ&6| z?MPd=Uly_d0mv@tGR=Nimtf5#yBD||hjAsYe23Hg;@$WZy-1j#tVgQnB)72Oqo4`ws?%1d3db}=cV_hD6F5N3_ z+2N)&otrU2puPxP_Pl5>ACx3k1Ct~aGtx5cI<~FfZq&xzcP~p#HK#j3vl%`{jUGVl z%3hqxu+HN9_c@4Xaiw56tv zqk41(Po&f7SjV&fcZrUGI!WRQ)JHSkenj%+PXaFw*FAsF2Bo+$m|B+{#H5Lby+k>z zw})_Zb(`2ElZ``QTm8H@dVM|`9Irg(?vaQdOh(On{oaldTtYrMLCZjB^z0tc(6{c5 zd1rP1o8scr7f1Ibrqo=gvLs0de)zD|nzamj=)H2GsK@7-4-QqI-vs$Cp^#(bO@w5@b znY?weZ!Ul?d(U^o(cU)#k)$_Y6c?pD#NC$2C!P!cM2v|7#&_gKSama|x$u02ehpeQ z$=ye9TrQ@$xQDO>9lW_jsxd*-ySREu2QTfdHxc@ZqsTcS4E44Pc7-v~W|NqL9qVPcH2 zsaL|l5FkC_2isIvHShxR7g>=l90PS<=W)oqZgYLxmXbymKV2<237ZtNRswByUH1xh za_P@`%(BI6pW*mvo>yPI**H(!&~Cn9zkw$Mm6-5)eOcmI4S6_Y#qr_cnN!JYkxpWO zSI8+DZFN~O=Sc3q&2gfPOw zf};75ag4ado8pzQky(zbo28o+Uc?cDB}C_O{Z+Be9d{HY5#xm}(SBV;$}upOCZN$f z?Dr@4z%|jl()LWU;+xC!w(@gUzWB#T)Z_0>a#Tk;ZAc4!sn^ zTYziqY3F|ucbXzrkN|Stv23^<=796Eew#&&A;g^+Va&`HK<70)qKT#^L1%aN&S&`X z?c`7$hTHj8xZn*FhxH_sJ>^j{9^9=$CNJWHAP*GwN?K%Vv;zpmQGKdFg^%9VMtGHM zyTiw?HZ&^x9gpOim~lKve#fSvwrR+6f8{&(;quK}941%vT8jys`p(_`* zdrZ$om=13h)cKYPvv2Ke@WY$4)srEYWF-y8=iJ4%QvU1ivqzJacTr7%%Lua4>Ov*A zzIZYYw$7UanMqeeV5am0YYuV^L6_7cWy(M z6i4CHJJzmIZ0PHsr{N_{yK{AEZYg_|vj&a_xVUVte;%BX4!W0(@_60DS{R_fAs5;4 zC*R&wkM6SPO--5%LG3Auer`qXwVtoXt+ztln)(n_q;;T%$17M+x!x~5u?Vyqu`k~OLz=~(^OSacB5A}U_5b}?Uwj0px3`e$ZoBIq0A7bB% zw+Gqn2J(SZ;@`PzE1if(S{V%#-m;VkB%xzj<=g82xV*t}E7lOflc(Fww(-pwZ(%K| z5-{Rp4Kcsthp(&8WI}28P4>$m8N9ojulh*6rQ}PscBn%z#_#k!C@~l<&jehK`Bh7m z*}>P#ElwY`;@JARr8;ZvziC_4Mebw?G;gG8zNLQb>#EA}LJ-Q1)o*Y4oDj=S$mvd! zrDnr6S?Y6Q*nALsBUdS1$9oF&&n=Z=%1-C9LWv@pX5WxPwmRSDC z`<=>8Mfx6_ckN>a+Cc@$`vmQVBb-O$n1BBaD!+)iGw!q-?Un+sRMG~kf(V) z-wAJinfPsv&1~m9cZ{B6)6vyW=g|?~&@GLhEX3OV^5g>2uD7-ss->QJx}ly5UJ{aw zRTJ<2Jv11&a=v{6OBz0SMaS=5of6ZiI_!anzOvY0gUVxl`ToPSt9#+2N}+rMv+5`H z;7ph5$QFyI#ae5$I>qAjFf-1qETPBU&%1LzlD@VLt-|i6GupRk=w~gCM;L)2x+_`e zLL%(wFt_hJi<*8&G(;=HqAQ5b1pM}@{klvk2^D~pkV;20t)8pQ8EeCIjJuy*2Z=r}3#qOpv=VLkyS!YTc7R*sXIO0}ND3*i$6jS3Uq9avh&Q>rAu z(fGAhGr;6Nm}TngQX{Uu@W&|JkxOnQC2p&U*~GKY$<8^})K=@8{n2P)$z^lp+^|rj zjy2!&t0S>3L|a!JT!o1pNV8d$VfVgiDCO{wb|9Vm++E@4Yq-OKgNGbQM}pey1UVfT zo>ucrT5_7C`!@$w3JK4?e?)D_=L?P@Kuzbef#D^|c>ftozp{1YaLU#27z0Voxz0FG zrSLTnOUG~AK*U~KMZ>PC({K#F59a@3)&VJOGP4;(apHtA_upM;)9761PCaq=C=^() zFPZv+LRzIw>C#rtDH|YhQc~PmdZT?!KMwVJ&Wj*=PbanjZfy zo}lQ)!MziMCWG5BAGKI6O|AZ`bM!v6J$Janljlt$q#WN7#l=rYFrFW+{JP(H@>ioy zwY{8c`>w-you$O^9rd(+^n#cOcg+|1;H4%92kf4E4mw;`C6=$ebch>>+ z=Z05F>$0APHJT~lfi{$MgZx|BU2ZNYK>JhcqLaz)ns;yxMlzS-hVj&JY!xaocx-)P zzC!x4dtA=51lZH zBj#I~BAiI(OAzs@etCm^W^tGL;}OLtAdcIo8;@D%Rw$Ls^##rdS?CFeU+;GB$M~5x z#dVg)u^)q4nW%hgq{TKEc$%(*laDioNFK(Qd3dbv5;wGW)Yf zk@CUwIvvxjWb--@1*4ID&3LjWI>%P@8})^T>El1`K^N-Gm+}r`5DiA-WT97C&}(td z&3Pn361hlX&h1RO-}V{WO;1APeIjxL_G5^KM1zjo(PFu8P4KajVl1;-^DT zJj<2Tw%O04g0^?8RR5_8ry_3Tb$=PCOdC$Pw;OM1%Ks9-3^=^tPW}vGy+8AC`MOGP z+l+;@ngyZM<%_-@J(z8yt_2*79f%)01V`o76(;kg67VEt4uRx|b^fduAio62>jV28 zSpSa8R_*yo-*D|kwM>L&xE32bGXNW17_8Y$%2M_TU2M6NPR)yFc`F@oDr`gRf3jAM zh|!*syq)+xQIWC7{wAVtk|}DQ4{&S~YO4FScyaTNoS*S&FA;N_zXrrI4BYksrzjAR ztvnOZcFiFR)#22|3q6{xV^hT^PLt=LaJpi?2s?_=bonA z9tk+}T$;iGf4CahDJES)8XC2#E=J)&}eDcd)=j5XhyCR==wBPvVGmvBggjBoGhM( z=FFRSsx534?C-$z{*Nm>|JNu!b1(8+2+>KswLT-)7Q;mlYz%x3M(jLvtZF1?ep5TS z{_j6mU1Xd{aJ5t&w!+=-dkGpg^WMI$!u{m!{#-1`sq7<=#etTnY#wq~;9ACqenhTyb4NoaPUp%+sy zD5~90CNA>Yv|{A+TB3~&=Beq5h<9|7%q|w%N$2`V;O48O%X2?I9@`8`amyM6=EgJF z6jN;q{wlnOR_4JlWZ(wIC(pS#Y$Bj+vIPu;C6LcZWpp@A!ddTZhH?A|J|Qs=$O1EH zs8n(2P<4d!qC?@0=R_F)pBKObJg6ErpRC4q(vnWpO|0%*>f3S35&f=D9E92|f*A{B z8F#JiRud|nEz{tVVjW9FAc|PsuTrlDPd~LwliAF4T4^jrJ-6Kzp6K>5u}5$?kCzPV zC78NeQ@jMbl}i4+;bAR%pyHu6^i6nPp!*!Stw=0SlsAY4n-m(Py>cn=;T_tfK?>b1UYRZg zn!d|LgG0;a4=zIt8z3I+!z$Ugin(%Hn>n;(DPeF*{_4SKv9?d$77)ZFkAsK`j(~j2 zbr$XYEPU@?Z(*XQ6DzZQ|9HjE9r;1Curyf5ozq-&XE--`+9~^P8gy%PL!?iBlWvNr z9wt-hp^Qo1ck>p;CY4;IbFrd49|GzaEiTI3PG+~CY{m6!nZB!g?o?S4%v3|hv~+R`SS56iU0iF^mo2^LW&3=JWcrvg&>5PuuPZB>JDf3 zPrV)Enj>H9Jy?vm81aOpoI zfmm}2mDD*d&}cFu(paurwv$PgIvml#i4hHnLz=IGc^ifd|FG9q!HjtQzf zrc9`z7wf;l7#Y0$6P56OyW&5tiai@{c?+G1vM}38jeBfz$1kodT5(>N@H_w}ng}fY zI-{}hVgpZbmjDQ-{vI-`$1Nenl=DMEc?V3)KB^gs8?LRbsK67AZxkQUI<9{x+Si+r z4F|D<&&9apHr%~?Gq%Pu4828paT_~hws`G!Nem^13F>o4BV{;v7M%Kmqpt5{yLMg7 zHDS6?tgSa%aqX^zglr38((?{JN^Z?}Di^kH9?Q<#k<@l?_y5ir3-D6xFgwm=H%K8F z7(P`Y{=!dp`0IMb>-v2nPNiDmE1v5%uZ(r7WBgY_;XqM8wLxub{h>!II2x5VsTwc* z9afV`&pt;tKnoU37<_;y$~tU3WjXz7N(#IpS`fJA-p<=!EZ070QT8!r5-o%bc!4He zzlo{yYH!FiY4+mbxY*IxHooq)_*b~fNMKfDkt!c+s$FZi@=u@b+&vGbV{+Vw>J4!! zUAY?CK42NPJJY6`OJ6Y04r=bc|mGos^o5wao((FJXhffV+n1 z-&x$@_#mDO5B?|)HSXe? zU!hJX##N4S25T0rKN!^>j4xUjvetC<%__IsvtqAEG6i%~8!bLzZNT7yFrd-#iIVICwLwAOaw|;M( zbcfk4SzwH@6E-c3@r0z*|0x^sJ(bd6ERoIt(ak3f62t$NWjoj~&XhboScVx?6vn;Hx{8`revJ2IYHh+feldGeA4VG?dDL7K0tRpzz7dtq|vE`YcQLj z&kmC@pU$nyqw#-Et7f)BV_q5;)zeT9xLy+N4BByBiF|adpL9N*=H%DhTB|$!YPx}e z|44v@=ztCl50A6W$DD7BS~ZO$i!}&~;f-QGUyAcZQNn->>5LQ?l@ebZ4>Hig*u#L! zY}|I~LPv&`Sd8A&WC3elpEJ%snE$-lt^kZyD?xn#_K3FIW>|>b{ho_BOV#JbH>1oK z1-i!NaviLb>#+j_B0ZAaB*Znw=k259&ruN= zNAC{s8f$efuw_-DDgC27#|)ES)wSBJTMiC=$K2X(YS(89c-iGK0Rszug~r9)e&GqH ze<%~iNo)ROU76W}nQ2;h(1n@z??|$rBmT)it?D2~OOo-k%#F@<>iXo8^>xZA_0*U# za4)_}MCIC%Df#!MPP1!jmogq)Q^vLJ*rF8#e)=Q3C2PvwN{QVyvU%`YV6vS3!AOwG z*YvOFA`{FsqMSZ$m^LaGg%Q~JxzK00Yg;1lz8rZ@XXqdp4XoCy=4u7okM1YHCMM@W zyyG*PzE?`U+Cdo1aPOpc8~iewdoB`;R|7pY zHVV8-rMq?ySP>_pk5#NdfL!XN$d-oXxh1T2Myx7 z_t`}(q?bLw)S*);L1D33>nBQrmV4ZI9sweRQ1;-H?dwHT-`o^;M6$&d+wMt>D@kp! zN^~r6g&Ox|3zrKf$LEnFb09*sFerm%-1%Y=%!t~WF@Dxtf4L@amwFetg51laj3^HW zy==W%FV3ylEkTg{*M$ORs#r2%Qq1RhSg$@cf?<%MRY~t!_(gQKTVrNr$1nfO7WJh{g>r)KlBt*4MC7(v(0B zmp!*1S8NlVq9&Yrsl#!oMtuB4V7R=e1}sB}Cso ziRe=z(Ppzz2nJvsIr&~W*INdci#9+1bHDVMGATbM`s@WS2s(hW1R#VS=Z4~>$O@0F zugrl4W{^vz`};4IgI+6bl3;(`)-45_Qq?g7kHTI&Dh%ecc=Sx=G08?gyqe13Vmkgw z0lm^Fkbky25JBW~cSr}hZkfaA#0gaNYV|92!}^iZ%2n}2sXk1{obj3Mx;~#gPhJ0A zW&=GPySa^`rOIe{vgh`Zec0%ST~A_}v+ZHMVM}i=w-zilH)dNZy@^`ka|%?LZuo}3Saes)12KG#Qx1-uodW4$`Xh>gfXy|3(T0?OU)-6 zHmN5EuHkZNBV=L#E~;Pt4UqA7G$YR<^?qe_AceF zMT_a>ZVp=22ac}4**Sd!5#GDldql(#hP8ykpCS%$icU()_a{u#NXHtIG0(t>{g&tYM2%s%&~b^PH$ z8L@Lr-zK{yp#Q%2#JHEX8TZA8e84j@HyW)@PdE8# zyL@(TlGP^9monRr-mh^#dH0o<@MP{GW^YL-Mr6>=_x{plel{Y7lM;>{(yV@0XV1PX z?@Vy@LltJLfo16WE}r;oI>iBhy3nxh5K6U|+KQPrd72$Sw=+T?7bHi?H#1nSQVrh? z{bSYjrPk#+A-GF#qs+HY?MLyp6_?+<2k*t@z68~+ zq^0%6ugQYeuvjc0wo38WXmyyUJ(FpR{2hFS7VP2@Xh7pU2pV<#Np+4$q z7}6QV%F`D1xRu5jZmdyh@Nl~5e>kDTx+)uH}V{@;B;L(d9H~)qxH+?wuppLW$sh`Cc^m< zUuMD*Ot|V<^)BO54lPLRG#MA?y4O?3lk{3iG)+>~9%IEZXEb?E7hP9E-_9=B^)%rJ zL+4Ly#10MB;nUdmsl7h^g);OsMcsCiYx{A>Lc2@xe7jVY8vi=XORopCbW^J(`AMQ) zh?w6zx2c2Rx2f!YXfa`XBdSj@R9iVW1iHNG)nG3li8;)d$T>;4dvtsb-rmVaMkt}A zQ9;bC%j%UPI8&XiC9F&%<`A+@)Qa!tvtDmjE2?YcL-J}-h~L>Vmv^4S9-HOe=#?hG zD01hDBxUWK1ussQ^P-Tbc8X)W#qJKRwpH6L2j3+y=uLLne~Dta0QPjM-IorGyCzJl z$%8T81wNN5EmvfN#y2bk{0|6#Lbl8#wtE+c_7%P$UKKy|QD{4{2l}sTi3O&dFF5<+h_|kP^mTo9dg#4n7=Y6S>Wn65 ze)qoh6f{enR9y0L^AUWPcy+$iMY+{zn3WBQZ`}K{xB2ZxiIyY(uF+KjxwfrQWq~vF0h0-L zFlyaKvGa21MzkMG^qu4>HiNPWdr@%WK8$qZQUr^VZ`bagVQ2S7XcYfr&>K?h2@Xqy>-pTGuFA7{`iNRnr+o;N?4$yU!EjimMX$QXLbhZd) z#%Hs#izc9pk|kHEVKwJqQ}=fUlaI2RNYqQWQ?TL77~W6e*&KE6>fMgqBiAnK)4;jB zjXMqE7@6uFQo0<5K*Zo+H4-D_oRJEF+!)0aKj1cz_u|cO3|{9{0{vdc@41>{8Elov z;1Dzt@YC%)R#X@4%54fA5Ok0X-7Po^g>EiD*FJ`gca0+AsS*Ts32_dBheo3*mCnr$ zb5|_pwlL0uAZmSoBC|s+nIGDZO#!`DO5+hCcXGOD>meWgM(Urq`ECZXo7?Ho{hf04 zP;hwbvfcjQwf@;xrtJAQwr4NiR-LfN#Qnj4?hBRp28tt66*y3sziqTSiOMM~v`>7z z=Lj5C@dV5wf7l4Tk@tpXu2>n;eNEo~i~6S+rXNCidN-&ntipht`}^k`Y{YzAhw(b; z#b7($Z|~*@%5;q?kEctJS{1SK2>}X<;c*uIMP|!+YX0wGccbs3SB8~wnS;@ETbIUq zTpT&0+=N=c6|3!r-8eG&E3`UQ_4uq~rxoe7`&?0A`0`si<+(bv3Y4Vl3X#L>Q`VfU z|1Masxn@P0J-28JWxKxJ|% zLp%=UO zt<149(U~1AGo#8EKQ88(z<16Xr%ys0pMPVdmCK|;lDsnH zCWA0ioWFnBiy5M%>W;$V_?9gQpL9OXEGVB@=2gk0gAyA=n&VwYABn>d7~%IpLD1@O z^t!8#Fr3UE%J4%|GIj{=0Fzgr*>bgAdXl!65&rq*2r>^0#n5{kRy^-mHNRy4*>twY6?kCuxubLxa{M9G zX^##zSZ7`f8GFVE?~HkLr-C&M_7CzbHG@#a#DGdJJg9(~l~HlUL)hGT1W(j`Hzu(N z(n?07)r9UZvkEh6ot*s8$-+P3Sam&KXpKjk=&KTrfHjl%AmnI-A-Mw?XKql`Ag}k2 zq)TK|@GM~zyh&|Ot-jc$TL{YJrYi@kt+gVKHL19XF~^j$F=zqaI|=d*59Sz=U%+c& zMx-A8OeK7&kRf5xTTH0-)R4Qr_FKuJe9p!Amds$r>ZF_}irW=x^isj%+K2=9b_n;C z{95KU-$^gw#p|;4&jaEJv;$%s(ZwXdzSJ92qqm3tox?oRwb++CM_BM^ggIg*i1%-@ zdp1eNt%{j_lZP@IxzNqqT>kX(W98Xz^WeP*wYT#iKB(8Tfx-`>)-fr~0#uy>?k z^L&<{D{f)HjNccXH-3egF0Pzf%oVOK=q-;SFv?wYD6GWEX%wmg0J!|5DGEisSOr31HxPR%bJ z{x^dd9vG4E_AeN;*#Qp}%g?A9B@Vl*jbyb$UcH3SP)l3Z%`F<}Q_Q3_K~33K0=eB9S0PNGLex)g}UKY8Q6+68`C$qe>0h z67uF&sR@FL+X%{d;%}R;Py=OVBb<7~+@gpLCivW?`0dx8alRGxmp3c8L6C6n6Y)>j zu?#1AAUcdjQ8UIV2jjQGd$?0QahX2Xp6A9N$uInzU2S*d@IE=2=FHdRazYi=KF?gM zOV`)pwHe(#(SyiMsO}G^%AobTBV;?hNxwI%4B-(}0Ihlqwr(q%D*pfsE2&4nF%Joo z0ZE3y`MuSJz(LB<9B zcE7g5lZmgJC_17J4m2@NDBg6@tNB@+X)VQ!X9u<6+fj{reGuWQ7xHUcz~x2C>Yj;P1Gyxts;9nK@A(|X-^okifoU|G7BHmG~x`!&gyhxpIfT>W@% z{!Q{sDy=pt$d^0q*a9%KKa-e|kW2)s$SizecRmO2JO5GgWbc*7c)-H_jX1UIDhMC( z`GC~&ysC%M%fK0mpf8&_Cczf7?E$+nw~^zMP>y|7py)sddU1wLY!8~LS0X>Qv4SJ# z3@MDxmgwgRMD>4|y*C&cI21c*umr)f=M{u?o5}7LeWyUc`VFtidI9cqzKXI;Jzpv4 z%Z^Vp!4G`NlxNJ-+9YTR;-;-g-d+1)I;CUY)WG!h;Wz^EqS2455NxKO#f4o)YB?Ey zboN8jfKbNu>gE14&+p}$>;z5)y;@eH63|lTBbqckm5G2dcm=w`IHUo&`6>@+mW^P0 zDjqazL}JZ%xut2jEjtwQ!;0@k2|D;JaM_->&@&`;W86e2V0l1nvCv??VF(&O?99Rh zXIY$h&zi)8<6&2cV}98if~C7$rjcK7s3Jdo>$0wU;=8u&Z(I`Y)V)FMD6$~V$oM5P zCJS17K1Ds^pV0zQQks$W8*eKssKZ-u1Dg3)A(}tV6OB}J<#09At6-x*bgF+)Rl8kn z%I4bYed1UeDUU-}670b?&?PPVj3cOiuu%JtCHk|=gYQ9NxE#{v) zoXGv<=Re4mFAD*W1!bsBomf!&dVd9}j>3pNB9qMV4JC#;0+zKjB^TBkC$kFLD? z%I|RKaccFElRasIMztFZJ$~Jsb}&j#im10$InauAl~vRR(~t0DI`1jo9cD^0C{pZr zCKH)@DOLrjYI~g(7xx`LRP*AkP!+H5G5Jn*I#mVAYU4e~*B0xen#Z2YKfHOqRvoA? z;}k%ORuZB!B$8Gs=P;-M8Ia!`yy?A$KzK#u0==j|bF+Nv%W~Zj@eK}@-1Eqg-hJtk zaJ(EFFGt)sR0%c-d^`d!zUMWFE9^vJjfc$7)P@8Fj*%@Hyumtx1SPz>1 znKN!1Z}5Bp4f@*$ToK(U0%ugr5lC=&+VDW!7oH=p=!rQK!H$b39lI;oY17NrlJr|< z625QK+A3)2bZhy^^U`ksB6tjB@#x1tzB+29LbAPM6^m)ka0kB(vd3sXUQDab==iL6 z2Y+WNlEetnb96Pa1|CYv`Y* zGrD>nqxX>_PfXMX_)rB~YQ57(C=-NP2rq#N*vYZC@Q_vH6c~R+Kr=jF)+>8tPOx8M z$w>k$l6)d_gP?L(>8qTKC5bzwG|MXQzz2NQ%ldu&Qh_ktN z99_q=>a^Rk?oVc|eKBbU^TIr|SKG88P2sm}Fi*-O#w+|&hZ0)ksy-J`;g_+y|Cq3t z?RNL>zx$1jEB|3dBh;OcYCSb>ERDkfW?yuWo8t%P>}1}nG}}R7zDQ0&1Ok+9izDf_ zU?6~=0(CXnE~E=gnS*7(hn~=MKG7?a1qx@nndi|7x$@1ne0^4=uA(hbq&1|>Ne1z+vRYU*2$5NxT7IJOph9DC7L5<+ze{9H4RbOu07v|Em8Jj z<0iu_DdlFnYtVi>_#2#_@^7T|klDtxrgnHIe%RCdaPAFIZ!#t|9N9(9hbBeh@?7S3 zWgTI*W)~0xj&i@Ojkr3@PO8vk2ySpTL|(eIVble1{fWjuZ9h%?e8Ed^J!IiaG!h6- z8plw4a!Mqd_v|%_&7spZ<&<4WuGB~*5@2rS9`}50+RBD?IHbwVhbOOw2fTS%+azAi z9O<{#zGNLnF1P!XY7NFTRN!jg9|iL>W1ja7Irj4)VFju*9bg4qIHUm)p!aF(lc3AK z-vVw3_6~(FvwQaB*|}~LWjb@7^2;zSo-6H+SH;FV$ehEzr~`gEE2Yj?eTV&Fkf2LC z@E2Hg-!+>qZK$Nt3(YQT+;dN#k~b=Tg2$$<&u*`|V_(5|0k|Nze zeDb**Xu%B`((561HS9`CfG(ckOXQ?VF49#9!U>{C#PdsrBS^OEN#V4RGk6Ju!C=9{ zM%1kZaV8Mm{52R!*}FTpG?sotKkHr=F<*N6iXj9m4U7!4$nU1{Hm55i zB+5MWlmkhOFSy_16QqZee`sR4KOPg+2BZ8c6b7b?ljPxI_;O)&8Bm|k0JelO$So;* zgt0Q%NdWWrOoeL4fm#vrF4OzlF95)^fhVvNNBYvd^MHpYMjVHJ+F%mia9m2nveXyS4-ggDYQh}hoh7y^1VC_5PRU9%#GypFO6%L@LnpfOs-uK6xAd4AIt)R{ z^<%Ab(Op}Pa&OxUqYnBGZLv5{){HMTp!!v+)IeGtZ!ENGejssK{avg1wSzJP*Ydtw zG~sKxNDbg(L}I17FcDI@`AuS%fZl`LhBz+e=lAwNiAV!`l{4AGyJT(mzWt zc=`AAMNVBWk6fvtuenpx-3fCCQn_G!vlyp->TGF zeu^OlqJ%!gN6Rnak$aL@`)1kwB9GEuZ>UR2I3i@?jDO6rSfs`P1Rdv3Rrl6>a=W*+_nIQXrp0u)S6oCq+ItVA_aX*{;mtaO&+z z*&vN#^IHzVbBB^hq=s*?ZsD*D==c^|6vN_nVATA%`3lx)@d(c$00e)P!OqWD>b9rz zgz`gmy#5M47LE<$d%qF3*Xb99Mlcghbto+Y^2H3}>T+l}E_*H_%r!6F=y61QzV}pOhn`V_NY^d;SGZWKx2)7Aa zwl-MaEw4ySIl)xHc_#?aEABWWFeZ`BvcP+h#bid~=SCw6G~c4h@NBh>^st!x)Y72! zoca3?YJ5Juo_hDi_g;`ETE_!}B9i~+l(c_++P&bkR|%OWVt@dkYfqgZ8DWL2gGI*F zZ}$;Z;A5@2Ygf7qe>)-oG7pfgalVd_Aw}pq26WBL(*BNqC>m4g?iZf}*E)wVY)3FM zoc}AXan;2t} z0B&O_5%u+q%L3T-_7246&wGnSpJ+=LI_uZDg@%uTZ!Zx4(**}1-|PlREejm>gA+$B zaI#<^vS0NF$k>1OW%L^E2H`L$0D^#1HEUw%;rsIyG&~AN z-7icTa}{cTV1~Il0c~*i$@vZU>M~FuK({VoULGsEE^^FAoFi*oHIX#kA=y%8KAI&1 zXzjdpN2bIjPB}=7>BluRsaO>^z;L-ZKTz@u;PK!M^acyYx+lQ(?szyh>dp{rg-}rlPCp5wFHC9ecFm-g! z|Nn_Ce$A2`N4hooNUY?JxF`mlFZ35ml#|FpwAhwBUBwL7%1Ry@dk3!PPW`^iZY_!b&`j{G z)YF~o2yKUUGB90szq3Z@2(1s+%aSww_MaoiyQdD_o)PhA`47K^{5QI#fb0uSC;Ve@ z@&g<0*Ymk)kR+QL5loKg$;e;?T#&1LbiS1XVBgv7Wr8`3v&WgS;Cq)c`NlgDQTrfQ zcc zS~sxyq6&27?Keb7lB<3htQHV12W>xcDGQY!nsmJb@O}Owd%x{KmMWHHhq9rnSgp2W zexsI~7TW{Py&D}*=NZJNz1Zyyg5E?_Y5|Bhy3{dLZmT$n^Aj1c2>p~4w!zj@R(pI{ z62&%o_-PpE19U3*&XPt%DC|3Y2NDpu{C;or2CKqx4uJKP-lm6WuSN0*q11BJB#vpl zXt&WygTFF;A7fp{uV{b7F?>n97fn7g^ix5z zw$GVMb6qfZ@~^;|`P%=-KvXaq+vb~yY%25DWV$=uGGt_NkOuEE(w{6|^*L?`(Sf=` zPW4>64jzu~E@xT6If$SmL3~RasZ4y{5z!DY2>`1I6@v(^&V-$c6$R9<1S1BU$VCvg zrG%%zYh<#^Jd7n%1{H_2g)f0Y!o#k*h7A|`R_p}M7pj(@5aZ;ywbDm^=M^Twmtd1^ zw8WS)Cjj_eD!7-;No~Th_^}Vv2`;>sL(P{Yay=cKsciPBgBI+knSL!{Yug?ZHm+XsAhIOofIn@J(7_z9T$v127Wf^6n~nQkFgM(w`n(4YDQG`gKs z{PdEGoTec?{RbQm@6D2eF*`>;nBJn3lPwx@Dw#b4E$YVSVMxCqMYkm5Pc+JAF%t4C zqMHR~24mM2xEe%l_Y)Vx3AZoxvLX6aEd`?&XogXUSqK2Kx_3HLwf+ zc1Xj%-~wcy2;PVX?|jx(zo^#Bx#g7_Z>I5d9!RkSA})2*jwD-+n_W7=e?>;r{h747 ze>nfuiw^!*DY#BPeZP=bohQ8(tyQrD*vyc)U&rTmwVUmJVn1BCqNd=crDFQlqqKb^ zMZy-nm!|Mjt6uz7avw#_?sa^PY`RelRv^ACq=QLkrUfV1gWGnQAp7*MM(`X!y-B{K ze!LBS_-p$qfde;jY{Ax;={+JPOVMR8dh@1R9Z#NYLf zrn96P(Wv%IwpwV5opbfHn57n}bO&ZqwFJfn2b0Mx!5?dVlZCH)|ykBM>G1 zELUm;$IvHoUH@PZ3I*RQz;z#1uI)4|{lvUdTD81Cv+4Hv{J@CaJl#3|V{i@ZK$;?L z-d8zw`U%6;u38=bL@XMO@`_LQRUhB$9=TMLO|1YpLH`a(taFz`85&OV z^&G0d^ZE@|u2b+UF(mZ!@>w#iB7_?UZ{7lGiA-+|p*IKo1D8y82AdBi)_gf} zZS1z;gvP`6_)6U_z_d&(8CE83q&Hyy8duBFr%l_W=khq<-bWN1pr9i&OK&KkrpX4^ zH|kM(un&dP%~by5=u;`UT)HX3HvxLY)4+v91VE>>$p zLqqdk92lF}WhQ$j<$Rh~dtdK}dK{39U_DOtT)wbaAJ91Ry4X`;S&u+Aoz)%_U4(h*Agc-S)D=Taont3MGMp!^U|a^!kR6N z4(zt=%r}h*!y74lbu;Sm_j={^v*z){+Ukdl=O92A6%R;P{IhJj3lcyKXvP&gY&ZXbVm2a4wC^L=xzvXgTsr0;eAqOEL ztfyComAw0ES=QQ5t@C96n+>HGGmHPQ!Nvu=P<5)sEkrmgW$(b^KA||5gvBKN6(1Vto#pA}{^R|{W91|zkS zaeJW1LoV%C@_J=X#SSWTpQ5MF@0v)cFAmy`Qt%KKXu~bG8qt)hbwqFwM#vl%oMt5U zgqv+#F{d1oE7XThyvfsF9cNGPx51vUrO60|W_9pKfoWZNDMIWxRqRPTL!DG z*+Nv#|M8Z|L<*sHvDb+#r=A({r`13iIxR;OhTh+*>S?bQE|MY#X4D4XpGz#>kU(=W}MajSAnou=* z07$av*k*8J_;2BKDRomHxDi_<`kRgF8jL-r^(poa_^&Une|<;L0hmD+A1+KAnN>`g zU(q|rzwytWktshF59c(s6;kvlu0SRx(*-BU?Kzx}xh3c_W{r(_=-RKj;qZ>RxC&cx zj;++Mqi#nYA`Pog!O2hBRA?Cpdyur;@8d#5;{WkPsqEj>^E6`6^Fw*Flo#SOOMj~@-2wo;Sh(|Z00Ciq;dsJxKX8&t9z$92B>?e zu^%AHE1Dlo56k2QU<&M*G%SXqNfhDfdfMhCEXu%uqcPXg=+YFZvPHUb`;% zkp8ekrzB`{U})7@R;qN0xC-!aN3&4h5TkOi~Oi*?ymB?nc}D+aV@bfBZJ##g`^y2?m6J z)=(WYExsSx4@(%MBUX%f3IT;ca+D!v6Kna*vw%HfW=TR$z&D#HcsYdu)7x zn#QnAwi(kV&X%RMU7aPQ&*oInIMQo#uT|YMRxD_rH`>&vRq)odnU0M`Bsa<=NnGpC*8j!j>%>T7AXtjo zdbJa34Le8T%jc+G(JpAVQl?U6)Gnadq_CL7V(2`8$2u};F6AA{csK(>Bk8>=v;lWx zSNpcj&%WOF%Y31Z^Djsf-uaDcJ;7_@cW1_dd|W6xq=YIf3^@B70>+&V+(&V2FI}HE zT=yqjYN@LX8E2GyX;U~nr!2IfVqlIjs$S{e=5HzEEO*nh*MJ;4B%-&tK%ZMn=kybs zjTSNUotOVAO55Y{L{2zX_5heSGSa@AP7wS6>YEs9_yShNV|6P;LM~JFlu|b8-=Cd- zq}@a#alo+IUNAFv5ZuI>iXu~tQ%2|i&bb&K8MSvqw^Iw;g=D-};ZEGf&8uJXIZgH0 z?%Xc$KJa4AQje!k z`IQ4^(?&8R`uLshx#8P^D@zwA9U?ZXLZl+vkeTd8ovIJ=lu<44Jy}7>PLA;|$zGDX zQ9E##sCt#$=&$!*zu`th3ej&n|HA_AK8x#IZ{YHRNTg^hl5(Rb+Dx_1qP zd@FvyfpCe2_Y4BO>(mtY)~24ZlylVQPMQs*sDz$F0d6)|Pmy^1p!{xRI*%B1HwyAi zaY=oZ&MS6)DKpqpdArGeL8FxP59Bx6x4^i_so>|=Ox2C@M(a6b1l*7Gf&fnzKJpr- zM~X^iZ`zQ(gNVmk{+rS`q3z$jK~ei*XB4(vy)6&N z>b^*&-aw`o&tvbS9^MKh*P+?q}xc zOC`8IM>h6Cl{RGaUi4bj&=l`0oJy4zG69LC6>k{E{L1f_7oJz+n^R2ek+1bV9CPE0 z)#~1~8J@tygGl*b&oGaQhK)aaTCMTB#xV~geY%_C+v9lsK{%F@wAE2(PSeit zK@+egD>S)?7Oe^kObD%U;8Jd?-QM*CI$`8?g zj!IR~N%VBXc2e!``%;?A<5<|w z?;d*yf=R~2bsndJR2n=G;@(ctUUqGP7z_F0_^enN{0UJ9XReW;l4tD7N+~stug)Jo z8(e@(5fzytT(DH%Eh&L*1yIa82mcn0)JIkaV_6JsVaO4{++H(ZYVs4PPvAnmY<_1g zfXio)NN~<5^uorFy2IDLj+0(12mO<%GH9S~>N}>VYa|@fT7(a1H$g_FfBLpq?=B)S zC&{ZXRIC!ysuHUWPr?c5G=NRRvmsikIO(t-h>G{pu)QKA{g!fzIsb3pDCR_}(g7L` z6xu{pzK!Yxc=$NqwpPv>3+ZA*1Jyy>g(H_ke={FIXUPH}Uun0HYz+lK(9{Yy%%^=f z2lKuTT&!VOQi|%(5fvF2X69XS>=4VlJK>~WcwH36ogt6tM4yu22Wx8A7dnpY=;N|M zL(t{9jrf6iVl7mFuU~YJ%J3<2=v+dddna^aTUXX7qTety?F4;sCMgu^I;p)5kr-j+V`&DX}TKuETsO0Mc5ejE=BJNjKy0`dc!#g4ElW`g>VsB=QZ99+;7x-sO+No! z>pJpP1Y}?eA`VD?*n~;i>~GLI{uR#`s_1}!VZz)*JkI=<)pqRnHTR+WoLTEYGR){5 zq)I1q*c%hAq)4wpCo?%b`TO@%qUg$ybgCy|ex&f^u98(0#heVPwxsXF;OEcZ&TZ|u zThbJ&Wx1Ou??+MXmQT#?(s~~xK1R#UtQL-HfgE<-$Q1#*>z9{3 zexH?BXQ|TZx?}eGrQJA&*Z#}1wtc%Jubgk>g)q{eef@o%9gffST>Mu({cnkRg*p_h ze-5mN8R#~GHooedf89c}ZX0m3iP(609A-HX%Jjow#D``c;#Y34wr78L%)Yi9L=!G_ z+w1(A;23O>7cI+5AY#XT^wTIlo$|bucl=;|rjJFEx;DYwDKK!P`t8A%6iprmBxZvt z4A#E8pL_9weq}9(&I+3?wc4ukU0)M1!I*V7)pnwjN%&J7i+aFep|Qzh;LEh?e0_TZ z#NK_~0ZZ3SkEW5xa}cMMRa+@~MSuKZ6ZU~#9$DaZ$5~%TcF(^3HPfsxgH1>HPDbzv z70=2k3>z+<2E-T0i2C^>y>2`ojnpK&fsba}?_qbHsWYEUNmKnR388~kcT0PZrczaxN^d}L5_Aq_#$8aP=Rd*-fF?TDid5XO*yXbOUc`Y?ssRu8diG7;jsU_#}xqO+frNb;1l8$7f<_BvZ zieEvM$GOC`A`|pee1C++`BgL3d+B;m`TTLRC%>!_)T?WzpdJxPijR>+p9|cumKA45{;}*&_dhRT5P##U8s~d+b?lbMTrc zHIXUUQkRnOKH3c>tYkgGHrv%{N@&-~P_7FvsW&aS@QM)G`a*jr6yDGTw^jAY_vSloO34kl}M)m5C9(PRJ z7#(eDMKe8)cs-2I!aax=9o0Hy+@+*Jyio%fr9-5KidBwka0I6WkKT_EY;G4a@#%w< zABHSv5KKy=KD=hH>};Sk8IMLqPADaRvBI8Ysak^^ z)XrFRvvcDuYRBqxg1m!fozZF(xmEVZ0gu0*Z`d|ho?0>0#tA9W)dvWJc@;0*i7)6k z8)8db*!0L{`5nl zj;~3v?zgGdLf!acqgJZC!eI+k)Ys#%B zh0O2a#+AKwj?SdyVBb(Iy@n5vEG+|Re+K|v9$MeLX>iEoNa#uT7_({^A|;uO@Llqb z_uOy0mnD}Zh<9Oe6cNSX=I`d4PxxM)6zwh8x@BOWWWM z0=+&e-j`9u$o1t!f{{J&>p5X6e}el1HUNDVoo6^?f}e?(a!&KG*X_@doGff=GFS z>){_8;jLmd?|o?a_O|3J3fo!(YAKJ+l2d9jF9;eEz2-jwDc#nNxff@rXaqJLXq5zz z5+WFKU)#689W9;iiRf%Q88}wr6!3SleBUzs1pZ^l`NSW)3x{q39RsBZCrwHmD;gQg zs&2beV17Jjt6uPh>&wE1%ydLrnhyFfm+YKBJn#K}mhWc{clyiDO_j(GN6=s=1)&H1 zu_%*g-l;M!CNuDl<`cZUINE99Mq=OBCh*a0X^;H!{1kTeFoj;Jcv*Tp6(o%SyQ1qY zWzCE4gJT}f-TJm{QQimc2_hE_JU79baQW<45z!2*EjH9S0Hwy zO803^yJf}KnSB1JCQqag7iU5jG)D%H7VdlrtN%E8HT z0kvN~uYoQ-O2gRhiMV*Ot|(QfOAR^3tw_;w7^63j`}+q`NLvSO2^4+!@izr^$2?Z@ z<(!|V27_9LE%y_O$|YMAU>9xY&%8cxo5dWe?CzQ@j|+2rdl{D z9@jWg?j7FM8I?R~4(N*y(s6AY8pD-c{$ z!Zb+(wb;SoW;{mKD6rui;}somQohw~s`neslq0tE?Xkci%|$sA9-Vmoj!5+mnMH4p z_5N6<)|%#B?cumx2!`ru)tbFgx7<#VNX(s7Jv#gy`OH}a?rO>eJ-!92b=tB-g%<01 zL-iU9m(e$%80!f52Xpc#<5U^4`|s66>W?*xUCU2w;^_!|I{X|QflVca{VKyM&%{wD zHK`OYjTv#0)GJ>54%6I8sI=F6f<@BENp~uQ*Br}S7)>8@e_L9(!BrGw8PbxVq<|^8 z`0wzhr`nwq(`oNt+LEJo%qP{=wev>l-7)&eG;`OOEiC1+5A}|yW8;078dF@Wm?)w& zuU;YvND6q}Mqq}+n?Kp5{@4d}(Xi_lp5qCd!sof)C2{)l0k@XFe?9*h$=Qy!FO+1{ zy_hm_S)82Hmfl9*BBeqXHoIsR4W#%8l1X#fhxFQpLE>2_N?+<>-`sA*8NK;3($W@z zXtPl1zf2Y_jFKT68U3Z$5SO@lvyk|y>I5&jKnr&#V;{(0M4NT%fQ>J{!7Ws5p>TRt zJ-;?Zv-9)C&Nkk^rkCxZ3r;$PSSKB}-u5}=*|~6zMtz;zqWWt{;<4Q#o2n+~wv6IU zT2+M%Nl2Y|8FpHR?~ADK$x7dyai&0d(tAWiDNt|)ebiAimu2z zcIWWo6R?pDM_xAvD=^c#hZ~01knc122nED%&D|AwRM{`SfwtA8)*@%fG#$8-CQNF5L0yhkT1uQwYvs_W?FhF9wX52Otem^yT*#y+A>)%f;xg0C(?{p$$ z$+SBVnjFtpNMG*@vNWWq7Q=hkwU&OTh_`R6N}#`Tx}zUz+1eXT`@H&O&qcF_xYWQA zDcbRJg^~KSz{u3%f{nYW{U}8$pCnJh7tyhfKae+Lzrfqehp!Z0O@%+ueL`qL6|HxL zj*^t~)S&k?H}p_%v?1jbD4$QfNZ8odWBT18gzC&5`}57gv%)BaiW(14TVu9dFU=bv zD)*$!X4-`}WR9#}{~57Zf4SbHY1;B*5l|KHaNsJ^c(Bq~J>!*=5<3}d@&{c11W;G1 zXy5S&je%#yZF=yP`PBY@ZNy8VydJlQ%+0YrH*f=K2Dg2M{7A5zL?_s zn&lHQIj-4?*v?3leJ#P`cyFj$Z71zq6?GG{sZYnBJA&7mZzRt}>J#nPA4cNt$IAuK zMfwGQ9hMYXsA9e`mrV(uR5sbI!ty0NR;cvAdgz<~uHyG?P~nr?6+UakxtVBWJ5pYD zQakWrcM>}r9T}a!9&EA8YHt^D*rFjTO~9Ydmz}mcte%f3vKrcYWYiOCPY(Z5{e75r z9a(*NbR@^RqNR@$=V$rQ+;H@`5Dz%h^X;7}&ho1$G@No#)EWXsoT&4_)xFk4GuQE< z$QdNxWe{eu=>Xyl+vlO{CxY{dp6HJW`b?qIl?#vkj{5E$9>>3SKlI-|w%rG~+A75V z*(GqFFXvgR&!eBWLiPIdDs?p5QqinVt=9l*{74^7@(zkkN+cX@#OrfvRiNpH@eM>n zJXl+L0<$SOghJ^8+^$<^9ee_Nbv1il?)ee6dk|gx;N^M@*}cN6Hqo4z$9Bmk?H>M3 znLk6gdXcvt&mJxK+ewx5aS^~CyIa8-d>8Cn-`;UGs*~le0QF9PqbhB}ufU+Y)0l%dYV+ojy43DY6)?uN*Q^;Y#r?KS%kys0=iuhzR(-cKGzP_P@z+L;J2B~c{ zhTsL>nZw}Zvyf?f4xd=iTjSmmK0kOtw+@Bev6G9Mt^0iEEkBMeHx7UbXK&U#V6dzy z++|>`yrW~t?qcEQV5$1Y-DvQMTu@FLZ$RB7!f03;;mF7;gS=thkQ(tc3qj=PmfaQg ze&s-G_7;y=PWZ;h0y@D(_d#U zPK2nh)VGfE(g+PQH*IR}-IB$Ww){RX6t!ND@0m((C7ll@RD5C=#1rv{TxJzR%YBoW z^v$MTuj1BAuBo0VvfM2gKHP6uy)M{E?|2>8pSDt}n4UV(0+x|kx-G*%R}xF&aokSc z;dV{o6%5*W_0C$&oo^rd!QI*`)Dn)}A@1Yt`^81VKHt!x*fH%=-s`>TcqJig0qk3! z>cM0+o0X|9sb?vn8^Fxf_D_cFHXwXA!}mb_n7)7#r(|I@{)4TL`>5(uL!r^rSSnAt zDHWz1&tRM4jsN6ZBzSe>jvuU7U$sFM|I{DlNm?vO_DK19v90u*?OQ7Op*9gxE3dcj zZieMSr3z;{Tq#2$Gg}fZfI#I_KC!!8Npy4>u4DeVFGHtEZ)cIAQzW9;tBJ36;eyu) zmq|k4nNvx`v|+g0egIQ1NWF6kZvfN>hOs-T^lPzdlWHZs_54nt_`ROzGRhad~Cw!Ci3Tu**UGbQ8RvMLt=g7{4GHt&nBB#X)mgHVhoO zBTZjNDqu)YnmxLiKFGO5EtbAqUK@;uOJMpD;tK3w6#=yvQQrNQxgE~KB*_JaUBy0H z&;EG3p*itW`gQko;k_{avO&ii!K+YFeSn&KY{2RV)41x~P3tCM!bcYqwZ?+*?k)~> z!=!y@3Myz{Fo%%2vsq+6*&Neb61UURzg2+U;P-@Bq${%92#XiBvyMi2%jV;9eleeT zOua7w=5?}7hEz0OAdtd{gEn&>z1 zvZ?|w$f`(=sk+ptOmDPZ#+tGBm-a@ZCw+Uy$mR~+QWI) zBso#4xW3G6VE27LIL{KcdGXJ5Kl%=<`*=;gZDH zCh4amAOOE)ijy z2n1CorAHG(HvOAP*WW7ta zz|l)9F7Cv(l0Y8H?XT8v0zkCg6XmzpfYB%zf}_d2XdJxpy=ou1fJ0hKt0Hvo+ceY4 zJP;`LoaD$`M+0@>0{2G8aWnc4Rosh1&;m7~P>iG=l3I!!snU1T4RkT$EiN}&*lt1S zvqIO?F^B4#W3hTDU3{=<^%>cnKWM^X-tn*~2bGWre) z&$M3XxZx`0jO1TlyYYq*S}pJgzc&v-$b0@;)zz%c=NDfAK>yG-oC* zT-kuz-6?>W@+K+3s>gHtR>@hOKFos8D>kN@w(CbfXh8a|l3Hp)jy5MVRT){WVLiXA z=M9PuIWjkP?WC>ccZVYZG5)&W5pt^`kV}AwQjK)s_xAdcgl?x*uG%?9Lbgt-fXzvk z6^`t9V~OY+%7&G8J1XfFKL*vl|0KnWoAS=BR^})uBH26Upj%F8)j1?_orjr;e1LxF z{9DrwJriF&AMWYgZ|hIGFn1pX+!O?h^!Vh4-;81y3mc=(^kk<%rPb#we?;2!wnmB)HIC8~jZQG7A@Zk|hd(@0^ zo#~a_oOnK$S0SC%AR6lP3sI;QUX9G8z-RK9SBxWX8saN#6mGi%qdad8uLtOa{!)w; zp306hui_=ihpjaGjw0h8=|?8f{Hwva(9`Uz5$X65W}ti~M~+v*5AIXPr-OK<8K!bO zm$65%mSo6%MC zuL!Tys)o@^ki!ds&SL!nU*g7**POnfj+_sXI*(_rmLN8?IsHt!rXS+4U8)ThURY6R z`aYO?)Vwsz?gE@mDn08g87uwuu`r{<2)*s1d&TekiQYb6FQbN2u)lz^5=c|4V|m%q zODasn@+vB1blYLc>T)<0YL=Uwk1G5WBo%>l;dO0{|WF0;PbyE}-EQ1TvmJMU^LGt^|B>dh?%0=L|tN(uo9he&yCe}B|l z)1DvSP*q-S#GbznyE_(_ZXWO@(@CpxdNJMeC$2l#gju<$fZVc^h~H4N(R(?$HhQbq&IncaeAw`8X6IWl!Rk*{8HSZn{Ze)1n{oWw`;D?o%uGO3Q%vo5FG~3HpFO$?6^0|1?iI+>YH=IAUM}afoNh&6hC{+wv zzas7~4==<~=E=R~M9*9AncIMw>mKBjJxO%YQhxbr^Q0_IX1<~ON!4col#3cy!YJ{w6{FuQ z1J}5%GKRj`ZrE(m1LF5zSJ?$q;xfumngH}Dx2OB|e=gyq5EsGoYdd6% z;$b<))9^pxLrn9_jv98Z`8*0Gn6+snDLiaQkTd0{5BE9}0-!X+dT>8rbHM%f*e zEU3kFApBJ`Mdxje;HejeDKAjCi_paU1gLlP{kY(!!xGK=c2@TlG&9z;pDF%&)~f&2 zs;=ID@Nm7A{5Mc;&OCMJ3zJ%Zp*g!%_}=JieCBGKBs+rlP_EaX1mJ@)zcSREl3+#E10F;fEOC29@I;* z1ut$dSCrxUo@n*K>ZfMg`|;lo@j`!25DnTL!E{R5(C#U`n?X1N*L(0!-$+=fMoD^v zUTvs$_n*ZE;F{;%43feG4tuxjiqo9lGQO?n5w!-H<;x~<2)^E^l>n{=uI+uGZdFmm zd|Bkrk3}wk%VcyRJpWH}Fd@iYD22+SAHPU#n>w=iOXAOylY5R|`YmKeSVlDF)L)44 zgFv?l@rT7y$Xwtu65pj>k~?9fXXsyy$X}m6GY$2yx9=U--E0oLDbjJdhfZ+lW=Kh2 zBX*7nyH0tY|Ei2E$FeRfHh0JnzF*|QP zcv>uYbbnI6mG0>kzFoX0%5_jPt~Hfh+JT~{crxtbq-fEYZ5f^E;&DG$XP#fHu-LO$SQMc$w$5P>^`Ahe7A!SNduQ`?`WvK*6r{cWkzOSrc zu;k5u94CpG{2&_B=iNN}xk%+2XTWM?%Il&_TIDbeZXhDR=IvYI9seeh9{~e(y3fby zLiJ=Ct(UGSd11R1nNm$(^WPXwf7OgIWuRcUXi=Sj#`9{&qK@_M@T^B~NlYRFg*f|A zvOtoWjY&bEdFNaD*RdU0%({d9(~c=i2CXZMZcjq8vtCzL97^bKqi$U9+><^Tw10km zx+Of_;h%T$zvMAD9FkyPM7Hbr+W47ZWT0NNsA0xt{9`=)bkXH<&|>eqgP@s}k}CneeuK-h@*>5Y0~9_$vy%&lr#5=!WZCfe}U(NtH}_u)yTJyJ;JWRpxX=t9AxkQT;cGqCHxl=u`2ZmO)S6pJuC(ywT?SjiDq<)e|ACq3!Q?Fe#n`HOAD!{b!PmHbuc1PqbSKIE) z_z(PbdAe?UAmMB!q_|G^InRq?%h3q?Fd`nvm;$_z<3^_^w0_uw7ghZ{0k@6JEh=wo;MC4a`hZ&8T>8SC6Y9U5Fp& zK!%kJ$^(&7O{POuyFfkx*lM6YsTDy5(!(Y=E9lgk+(mFV0rSywD75yuLhxqo>fRKG za1kU;+hEA@O+K9bXN!KyC5R?|GwtgI_GRec@No9HR{7RtvwhFo7?Wwn)vLmZ z-!HueYteumPYrgOzOKoLd5I%?|7WC2DV-2M>8(xUD6~?p*e`VE9vce`O`Sc$32Hk8{X67piL6YQwF``DE$@e`aR z6YA}GuL|BVOVJms2_(w0p)M3k3}HEMZ~GXYPKnY5SVH>B12~SkZnue&gonQg{OPsT z?cgp*<$B1GfCzNgwb>L&sqDD>B&-EczNrS$2P^`bHAnD&D5tj zTBEr3KRy(M|lxR5u23wMDImy#X+pWZ|H@4#L` zKEhLz7&t?Xpq7jKB)kBn1QnnuQt!2QaVVGHYtXAS(_}cIo)qQgim^<*@Sr7^AT?%c zbBBDvt&@Dt_TJ#Z%2-$b#RoTS*z%%P?%I<%y_fcW{K}VtWE;VCw-HE`l9We*#c&Y2ipYFn0NaaS)p0OYdo53m(K;>kk>9^{gVe+*=O`XgfAYY2L`xWB*V*Tea$ZKgQ z(d1E>*`j&ZmQ=fA$MCn84CdB^_jq8D!`8X|iQ-tHWzVtJ$D5t_aoFLgYlHxi+!5n| z&Y&+4keE;hJk%fuBT5+=eq&TBtpguA)GW;`RW*08 ziTnZIdn~B;?ctP9nARC3%A}S0lQ81WzJ-jd(xj8>pIY6zw2Qp&hq6I0y9V9B)-I)5 z)r*DYBO9wCB_$NG60AmX??|`fO;)-uss!VBOkj}BT0(h(j_a-@i2YTg>cfaIh_S$| zkC9-bPQvz6ZG4lt#pn;IV1bb~&0im!WnRT^!XJZ!XX=h4MU&lUIZUOcA+8F<{WDZy zezQZiKn6g#)oQlOK7G$ zJ+F3f{3CL_Yyep2>;Lyu$$!o}+!UB9p$b znNmyC=0oNmP+nhz7J+23%f7o zM$$tgnV1dlrI+inxZs4-yON()s;5meh;oi9Y`bp*EVSr%(1W5sWu>-+qnTS?fDo?q zCW@IF?1d7;+ym|8PqJPx$~_{0?1eNygi#Som*#&=&5?>cs&yWcZ^ta~43?CfX&Kk| zAvu3#-wGmuieV9!8Gc33r#9kA@YI2T=tB1sw{h^=a?#jlT&TGW5XkY}&ySBPlEow` zbj}MAmmjRFZJwyBv%aLSFV`%E&kiwe@RZ?%)WlB3 zD(+GcuZ02)J_nXyub_T-UVt0nBmXjL`+#nTnkPFuZAH97w79kk=)I8*o znen7hb#!(Ac!w9Z;wy5qT>=p@MTbfMkkgA5{-2v zCkz8;?YVuy7Y|j=uU`Ut>$_jmIwiO705aA~Occb9I2elm@4EN*OOYr4vvyJ%Fy5}V z{w2&Un9DIt5(*0VEP)254B*TYfHS?(O|a#Jgl;pt9P`(~ep#e3=~wZt%BR#+Z;*FW z4~}vHAkJ+M_W>FT{&;ot zYlV47G!mGI^xYcEjAiLB+Uy!9`48EdgdI)>$6pVo2l-@pwhf1MWPw-4}w)Hz=R?JatU`FM!)R z?<|=%n~*Ffb8|A8exbu2Z~$M;xXlb}lv<<4H3%n%xi2=#VD6Br?l>6c!;835NHX7^ z2ij0d6Xa#Y0e^K#F%@REO$)OG?CAV9D(t|IpHV=JIgzL(9H2nOD138}pqnIv;Eg~L zUeOmq7wMqOIz787AtOg0uERu_7bBdMCQ)pr5Som!%ghO0ZCP#;9FsSKzd_Yp3Rf#hzwKz`1 zN>(0ge-%*cZTFtt4r)f>RhBU(u!bqCX6V3`6AIJ^pY%8PZE3Y<*ho;2#6&LCX4GW_ z+$zzWf?dND_^GT*{!P*{EoRd*iWNHjOoe2(Fh(rYxzKx28-EBi2Leb6%OTHZx!8+D z3Fp9!e6(I}U6_b!1n&<_sg1&_FcD70Aa+%s1U^3s3R{GgC zaF#Fc(PUg;B0g>P-|vasWt3?%Ob%0#$!PtQ$Ie8BPq2px;O%+O{#WjcmC5-(V9mF| zLu4Ji^u!?u1JJLHc)1cC4O%@ZDQQugona((YO0aqolZIO{0J<`Z2-Bv7S_(5UK-Ph z>7eLhZQhsStC?uhmY=h2%U3gZdo=gHK7-%!b0iXxYt@`;SORp;dd`Qb8)v{Kg=l--rlsLks)66h0MFo09JP`-uSyHH_43#s8pqDpQtBZx5|FdDOTZU zW-C^03c+1LLjp?y1JYkP)>wlmkhhn3fq_B%S(Q{sgV%xR&2LxKpcSR+Dxdt4eiP3P zlmPx6Mo7iY;E9Wz|UX!vG-U z8~y!40ld@ByLJq3kunG}qh}@a=mU2mJ%E^l;Hxhs$vw6U^tC zd~i@k_N?ry-+`MJtQ_fw=?SVCt!ljr&%i!kE6^iPVP3b1%V~$gA$Absj3h;df)0WN zbkK8V_B39~ucV#Q$L^rkys-inpdd*%V4=clb_DgqH()*IkhN95Bf`@z1_`-9e#Am& zgOWqRmd4?bHW8pT0XdrTii+G4TzupGe8EbBd4*1GG>8Il6BAUJyvb!7?~EJK<$J{~ zEQVMUD&u#0%hcVne+LD|-yz+2H};l zc4{RjWOVk`#^zwQiI-f!rVc#^Lj9znVxz$$;cQ+AMeorN)#II+g)$C{78!z5c3yY} zEjsJanZE!%J>&af>DU7ncgOE1Qj5yw4lP+WSXj+=9U>i)>y?GB0dJwi$_Ykd5Le~_ z&~OocvednHXse_{RGwO40iz7Gq>%rNLw))#xuGnP@p$v3>TU&+-Xa#>rv#;g=_?wo zO`)nRLSPg=!N+zGl5fo0UkItA6p&s}hzmiP1~LF_29oaVrDFSVK5vb=@hTpWA~WOU zUKA5##KUK@011HLP&D{?d`)7eAL3=aVvH}Z8|U!8(uW)#LE5|`P*AbtF*;=7?A07~ z%bQ5}bYWFZL57WD3_L0ISM_5CK=?<8|aP6F7J)GfprYSw)3~!Drh0S_?ky&3uN3X#8|h$ z*G53__i{5$(k#dz>APa}JAQ@`+dxkekO`7hD*P4-g_mi*QX}uX_TZ$dt}O9LNXIbu z%AdANO5_xS2Nly>$2w#OMBLdKXnW=V%E2uZ8u{5tnDh=s-noixSc>ghm-t$j@{}m5 z4f++QT^(;3;fcoU#X;HuhSad@k}g_HfxXLB9C8_X35u4@D{CZZRhd{4c^nDmlW6Cl zPNk*3KUQ#Kke>>7c(#(h7v zoE_A&^)UFVlCG}^*H}8(eK|<-Sd?Occ*(zLhgtGp2wgvfs|67DA{=17;t$$|ahD-R zzgu*G>;+-@yM{pz?mp0v8tyh;LJRPjkwdV8kO5#XMh-D1H)tVRfCo89-Ajs&=0`{^ z#*mFN^c~_!p}J1n>lpDdgk|l4i!|mZd;(Q6sEdOF2DzjZ%llXi7-?C_3x` zx(C$LpI;&7DSD(W4$UoUOxf+Ju=lW?63Sj0N4R!68HQs__nmAQTHG019QSsA+c^A? zu_qrY{{^^WS}9Gh8_JeYPR;Bhs=vT#gsVS$4<>)nIp>~TntyQ~b5{0f9$4QCUNs6< z75@Gk)FA6JfyCd5`XI^gYCHl=8gkq3)^H)*4k1fxQ_-x{cT)~9ixH7FVkpRB{Vg#y z3xg?Qyc!ph9V$#mtA%ncsaeC{u+FYU0c|{F(D;yY-v8QJ@p449iQ)KN;oeP4ld<%C zRPmMD?A>5Q*~fDG=C#PksH&LVD~6GYcN69%D}TIEs@mbJ6x}f&e-Le*fCJV|f6HsR zMhw(*irEnQqB9Y$<1kaS3Z@Vb|NKF^pAhOU{hcTG?gu^ul0gIR9Hsb5y7=bdn4Ww)A@%kMGyrZ= zY{c(al99+xDHl_t$vD8AH09f@oAx}8F)3C7q05tO^~J?8Vc}sLMKK%I;3Ghup$S0= zfwe=?^tgSnAM`hfqcWq4TJvXrPS?VW#utyquiVO6T52n~QjhmvF{zMZG1pX4Vq@cY zG}r`@2ZQ}uBZ|!;U`xla_i$BJ3?Q*<)NFL_?Faq8VB5U&SFYCU{cGJYpsbn;l1URj z0rc+lE&k{i2=wtQVfs9_k0IX7XCGtvSf6*hbsHR7ig_F~EHi zdQVI+b}#t2_9H22Z2f+UPB1X9-y;8-%?mqDmRLm#ATOOPrUnvq2u__aY7^i{Dg}9FM+i{9Q>{*w-g@ z>cuxJ7(L!r{w{QP(=2J_ z6n9-sRO_>r{B35V#4%2x?^0MR@3CRUGIq(5vsel zKbYQ0me12K>JMCKmrpuJtvrr1go(dGIC+ZvApOpl#yq}R_wyjQ)FGFZRM`$4{)TwO z???M`$u{MLaL$SR>8qIMxdP4BCEueWlN24H`G_q`XCSnHf(WT4i*$%N-}J!qL&-;h zOsJR0eevh-4C{ns^(8BTFfjjk0Rb%z@fX7WcizNd_}4DE^pef(5R9P)Y?HRncEVkC zv7=4g69T?mKd)?GKe$fl4af@!HwXa;VhGg#?{ED<*AV2r)q`Zf>i+L_@_!OW$pRIE zMqr6*CV?YqrXgV@1@q-U=-2;2Q{xDuy92HNhfNqeOL5`^aaI($|3YI&I^POgs>k;S!;Gw7wQJw1@wdHrN0Z}KNmYu zH4T&p{<#SGlKs%x{0_LY6S#|krPvg>6B2k>*~@74@3#VX1t#LoumKlitX_cS5sf}i z_>XA#>*W3KPj5^DJWM27DI6sVnkdeE9YmE+tV<%Czy+j`E~(57$uj)R0m>U#C#4piPfCP=KULDd1uv!1Z2~39vRG;@p1f>{OH;L;&yeN61 ze8&v`3G#y^2$Df?MMt);6(;;Y9F$imlS7gXH1!~d1L*_lR>Ow}zl%4&d-+zw<8#Cb z)PJ>MgCL*YXNolvZ!d^fc%U5||NE2nH;C82ZR5YZU^Issf;RHv_aDoim{#~sGH&-} z6fU^`fW;yH4zUpoCwPG~itJOai7apkp%eN_xw)PRm}iI=cz__s$luYZ$!sLo~k^(YFL9RCf_y2!N1IID=&fFL)ssk^s~$@P>FW!~=vEgphI@eqiZL7 zKCqv8DwErah3#=a%PY=(0yrojV7kRRw}(iTq`vzoS!mX`SGa~Dh8MGD5t%OX%Dt^LF}dUCR}2B3<9+j zryu9<;BCM|(lqCX*gly|QVA@Ik(|pxKIYl4%j!zT(k|t0({uYXd{v?p#bj$t2=b^-k&&=oEjy5U$T}Y zzn0{4;zzZ?H->|0I0D(L1zV9Ar90^rwk~vIl8cDC9yM|Tm;n8BDMAWt&afz)ZoF|#o-vl;Q7ViY}M zdj$3$PuoqT`rzA5sHiwZJrKGXx(ftg#=E0e&QlE+YBft_vOJzXY9G?rA48LD5Uk&; zZ6LRLr^3AizWD(y8|`0H`;%wm^&K#qf8Sw2Z9rc@K8W1HF2Dh=`~^uW#ZRiJl)rQ6M^||ZPoc$iY2`mVK3NjaIj@~rm-xbP zW1aY3i0$wGAApm{gFrTKFYL2}pS4J*Eez$8hSWQqpDQhl4Pk!oXtWz*rQCr|H3s|L zQZzHR=>E`?*&Y7XGE(`kKu+wC_s1B;w6K8iV;AjwKyPEMh}rSYrEze`Z9qGM z16Dl<7=Rv8M0ddN0d%($#oug(%5~iT>%WEs0_MMv(SE4@Q}9(M>;~P;J)h^8EN28l z|3QAxB!AHk+laKT2Ood82d&_Ao-A>0_ZQZUzitZID8EZUY8kU(?_pWdca%^GhXa8cpW0fl<3@cpm1)la0G3=mZBBn2ja zg~)}Ozk}y0&#C`(3p33kH%__q3~n;$L_O^xl8zv|At5jj{UZXjzc4e0L>u#h;Lnh2jWGs={4ql(v6gbso6L>hL#G@tD@1d6mh^=k5z6M39AG| z`2$TBnFyL#G1B!0$c5g+iM8!@Q~mY>ssG+tu=m=!DyT(a2`Ms zEE&LrSPmBoAiG);-%@To&gNADoC~%7`w3wlDl>Sae9MN=XK=ZJhG1b?x=rSfmW|GV z!e<^%&c`Bcg>(OGcm-bq&jWd5%*iNzX>u#Rw*Qs%dj$1EX>2q z#co-T+kH(KuO8r2D45JYFq-8<0vY)B|3~CaWg76NNHdhJoz5?N>ysy&W2`#6QLXc{ zg&bCv7_$+9kvp_H-Gnv3gT#{&9Z_z4GRdE;7%oc@a3m3_GrGaZbQgGS)Yoeyirw1x zal{hEGF|_iIN$tlBzY)BiF6zYJnZTlE%y}!JI*cv56Q*5_3-o>v#d;aFqrJ$5BdM6 zAbN(N1aJ_y*O?#_^d4Mq{)Ti^km8SnZ!zgg8II8kX;{pTHYd^5sSzksFO%gqwl(Gc zVDs2&DnO%spW8?*$!0Q@sB&cs*h z8YQh~O2iBrg-JObom@e|%bx@TNP)fVwZb)tw7(_64gZwHv4lYtqwL*5RQ5NC{=;_l z&j!cx;tqg6UweP(_OSV^;_EABrQJRJxS2&d`ry!wT60T->z_9jap!MOsZ6pya;j_v z2UGlU*52us)Lc4S&jfr|ZMOpNvn<1b&1k zB-`ZwD~@J~*&%ox20mveI(zj9n1AeItPJI^UVXZKHU9{S9=|Pqofvzg{O&U$U$JxU}ui5|i6$dc93pdL9EVn+o&2)~Vev>o&X>YAl z!Qdf&)h6E(I|Xvy{$Rm|pa6;^2yZq(ArbwU_>@g@R_=hnI98|J5F`ATl`v#1l%OcIgBkyMx)t>Is`6*3$d?mv?!S@EP*z-#C+_$1ghPCvyU5)vwB zy{ht7SzXRUuT9b9UwCqqzlgnqJsvKdSq*YL0_m)ByF70vzuM+L1Yh%>B)OONuYwwo zGd$ml{fHxeC~ZMxeryu#W^veZ!zhTWv%b~vKT|oY;iOFfS9g9duvg}HL?FkA9PNF9 zsyI&m@1mqp|IE!*8iMLxdowlG`}gnFQxJ(vg5PMc0HgNoYScRD?nM(`&EcnnMCK*R zJqs~=asSMvt4aCK{QU>0{BIxL-z5Nq{P)H6eTbGP>6K}{y=~qS#ge{$qN3#VTMk$v z3wN*ECu@h_Eqe5pcxmL#5&0vJ#?g2Gpn`b$-+$ZA7?A4Yx57i5t=5Vep*Ci@A@K=} z5)ZFdybSAyarQHRx@CDMlxF;TDdf6DP=4wO@5;>sBA;7$2+`@aOqpfk1@%3Se<+Gy z%<9C_ADREn^4wW2dT1I|Ex8a$h3e4nrmy)ex6&WOa8;vKzRx>}yfI`pmd}4BO1O62 z`kzae^rwHHWi5R)SG?E?2^|qg4(9q8+-JLd<7$)^qGdP_T@ae;!_3~tFoz?^Ht@r5 zNjC%?CjV90Y@FFMevj8X8!tOz}(Av|XsV&6Rq^;^~Wi5+7OUe~+R9 zNtfCQ?Z7A@Smtoo`dAqJ9Kzst=NglVNllNOLdR_YrQ`#sw`TyM4YQ~KjsA?$acUa| ziAW#JiK9c}nvGUL<4vOJe!HSom`v#Aj@Mk?EEK0*n!!v-MzjTyxOOfb-4#70AO;h8vHc1A7es_!IAu zUlw8omTk!#5`-@9C{9WNu>tuROYULmXQ!HikTuQEM%{3-3>CakJ9W~Xy0bj`jXe6Y zF}1|__;;v7@g#wTr)GD4jj+Gh7d{G)AS?JQlVQg;s zo8p|d8qj>kRAt*c!Oe8c9J@$o%AI1P`dg6e!nao%~x7hIBJWgFQ`jmBar{bsZ;zWlAclC@YLr7JlhV19hQ^(pnKMPL)%NAJ*F&_M zy?m;vOsY_Pbt?XgONBCUY%qeg51&iFiAPKGCsFY+3H@F_HU?JqtMN50E$w!aP*@aTzQm|i`DA*?&QUR8aCf$G z^Dx15I3zrJYR*OiSqh=w8a{hvKbb|KU1FdQ*ZQkw3^+1n-K5SN1Z26#hfdp7X15#W z{Ry>;wUoG=+W)?&GHVkyyKkJM0B&fUXY{zZ*=2_TA*()-+-A625kY{%4(Z#Z-{H^f zahW}y1+On$Ik6SfYCfG^)_^fOPn2DLR*0gbeh&ZOW*BX_vCsgn_Kc5M8m&%EZ`Uco zf0=f3Yme$DfR3)c`t&wINh+IW?ZJE3pn-C+_qaF;iW*tbdNm)txvofh=fb06{YqOz z;;EQp(XF)cgL_-ZB2vI$NCwU2R+9COLBnhC=a7Y*Se$N=PULP~4h`dcHtpD=6wGX6 zL{#+8D#C8cLZBckO9toodk1+b9_Q4wDJB-1ZeQtb{_#iJjKow8SiCRkSVU=_GJa;1 zG$^3WTS~*@v&e5x_?fT$Ox+7ck8iqON|N%>omng&(2mRCYqeR_Svb9$iBR^z?7GvB ziJyKLIhKT8ssb_HIqYR2ygA$+Sp^kBq&iGd@lfrd{S~t_RgS1%g--T{<5A^2>f({5 zO-paJ85__ovSynzua}i@vVd|ZTZZL6gZb<)+O$_2pQFI%hH3};;P4sh5~)_43G~$` z>Ah=Ac5fdJ!hiE|6MiE#4ZSA)ifNE}koo_j5SpTc{@FP8f?i$iBiERJ&RxS^^IupE zt`Y;Ml0A4M!{SKvisw!d!wtgw+ef}R3Xwl;)XUJi< zqd(gmqGIL{xWiUEM%wYmTr;{VGLa)@kBRA$MSMg^7fI+OjrC_IXW}15H)vk061s+x zO=+Oq2ePgEQZ!e4q-@Nec})HqI@-_A{KO5vqj;_9mnNxwH-#(Eh?CS!y9?xZWpkAt z@S+NDRTRySs1Ia!8bytauErKk$~u%K;#T6+vJ^Qaw~ur*TXq#n5VPjot=VEud8c80of9O&_c_UnSH9n489d_YES)4D+%p*HC z3;q3u%UuG@JKX#HwvXA@>H6=Ov)=_b1(-^@qo@4_$8?RqQ-ErhwCcV0->39Ylz1MC zmmP}=hH%`qe8ESoe8zxlRL=~ka$U{~M2dOXs$J8?#X zxs2Vb2X3mTQ&v@P5jL(1%sDibZbq!@+l*3X3$)mz4|U}W0FLAR-eKU*n`RrgmvMzA zUKMNyRxsncR00X=*&UvZyAOI02+e$zX*EW^EZGRiug63Ejq|>ZB^DME;15}1Q;Tdq zaS5Spt|8Ut7V8h`Z*YLi=qJ=cNd7Fj;*kUDPaOAo?NL>DPB{20R#Pt8a9W#@)WH;R z;cDH5%|At;7N>6CqFx@i!?6vOmv?r7jLqALvHv~Z3Y0mzu>pur`EIet3lmio?!BLt z==*RfRVHUxoG}ly;Ct^Qj!g{!f@xPY7b1HY@r^AYv*)n#-_le**EX-?)QqYbn+st z6NN!-8ZZ1lsWBvy&!9pKy`}?^fKl&#JedBA)Q4X4grc`;rV+7{+RN z-_FoV#Dyiaw621FN7_XueYn<>A{l$KGjWRCIAZR7Hck%7(tY|@Eqf#4s>MW(@})Y- z`?ZFC%mRKkScoE0XSb^_Kz*`IVQyWOr`lewn!T2@Q>$f8={zpP{1(#5@o>-MR~ z*r!Wg*J;9Y_>CZAR-@RHjID3u&Zmjz$Mjk99}{eg2P9r<8nzf()j99EocrevyeM|d zeOtAc>*M=l9E^frnT3A%A~jWl(jGmm*)6=m$<3K91QOruTHFSLLBcWu+!THrriryn zA%={jZ&A&T707K5v-1H4d}GxO#ix4~1{IG_W2aEU{yB;?#lI;GyBJ0U7u+B6-DdKS z-u32Et~YhH(v96{?exGHdkwSGnEuR+wRs4+)SKGttDO=(YbLkwf5 z{o9t71=SsWCP9jNT(9@tBEW5|;|5*~gh7^yEc%S*vlX;yHKtW!(Jp+w-D- zXGCjV+8%eH<%{7#%xu@rf-YX${=-HFO6!pfLIpYhb$0;V5qyuP7y^#{t1l0ty%usm zS~QujS71O$fC>>V>_&MDZ+P=_sy72F`4P8hwf?1kHJhOg7`}|{E}sMvs*7S<)y>$63lco-6P<1pJNM&qOf=Xf{H1=1-0JMWJk)AkMV|_ z9u9v*Hnb&*XqbTejvGeuW>OLorvHF;McLj=>f#G5$8+J^Z}`lyIb>z6e`ils9C0hzL{?H$3vY@aEF4XD0_WeV7+Y*a(kQ4p$>e5-K?0K*4!DuU+qn~EIY>FDDo*!oWG^$IKysf*J zh<)d)zE(g0J80XBI?^LU#b{bZG@t2rW<=DO?AkfKb8aRS-x1$M4*9_O{I_iO#_i-^ zFKkXUtH9HqbYhNGO3np^hG*_V>M}m(o9km%0KTU&cJ0VL-v>#evzUePKbVWsVKn`@ zQ`HU*p?5cHmp@srZ+N3*wE6O zp?bm~5{|w#07f2&$E>(2!(Jau*8!X!!GEMUfGzPx)Rm%B~v^BAqhx(0DOlQ7);| zKsWcD^f(~YfuYc-1NFNe$eFes$x}Ut*rbDnV5aioCdVB#DehSeX|8L@TFni@Q~9G_ zeG?8xe%qeFk)vRPM?>X$whJd==Um*P3PvRMvp(L-krSj5T?i>^_nBo zKid;4?S~oefElgoj&84KjONs~G+q!!JrbGYQ~yPMH}b@XS1vb{*(crvy37Oq>D=fA zk8!FiP4(nPD4BW>tyV!V18@y-jIKlhD~sj60|@Gx=Th*z2MSl=Pm= z)ei0%c*7wPdeo=7s<;H>ApXjVf~WAXTZdn&!e7K}q~3OM8GKNnkwfxQ^xW2d@E0qi z=j5VRrNYa#VMvulmvfb8AKf_~izb{iEen8%?Z4)mxTtbv1I;xhOE6Ia_5m4D;&|^q z+5pTh-0tiEQ|D;j|91cglFa?w zdlq}(eb7ycn4dTa#@UUO@?d10R~81OaHD+JJz1C@kisel&Y2;=CPMHPw(-5J`Me@P zl1h4u_WCxr_r6b|Zdr!s@jMhWBu`^%)+6fBDJXU_nN#f^RA<6Vm-7{R(VZ{oc#6w& z%N`@m`*WS)aZbP9KHkvRW0ntokwP*ft_Kxkvo@@8%uQ{?442V*Sw3pT<$T`CcS6rb zw_x$4xXEs{Vvnr2d9?>jcq`tZgGtB>=nmmc8~Bsy zvMMXYgY$Dl*+t!uUeshIXu8^hzJ+z{r%3|E%GwZ`LDUa5UIO^OW^T@aniNlVJVMi? z*ODyidDq|b)F3M>`A;DW&gW+nJ4Yv}&KhcJQIQyX=ByOF2{OOb9MA zBNtiml3uZhp*bjKSX~;FedV|_2NAC7kFSw}p*)tlIR=MR=`w2T}|VR znS_Mo4zyLH-N;&rU^rW?p|0HX`?ex=D@(i{t}r!j_v$Ks`~`|H05h$ENNjfcf_JK8 zFP`;A-rgTAHzRnLg8ZrcWil_iJ)=v6p_7$WxV4Khh;olW{Ub-~f7J9nS|y00ntrvr z`s2$U6VUYHT`*?C#Lo#_g|bPSF`BD8fW@lCN)ihFf0s;Clq*&ej+gp?9-_P`pIL8eN%YUYcQ- zGTT5p_G3e_bLvY61>sqb%7BYCL+1*CL^Rtzp+)B&sc%{EViz;Cmsz=4Em_j{TwF>Ue`Z`edxo$gqTBZy53E>V9 z13_PIdgDt2AXg2Bwi9H?nBpr>As?D%LEWzuoHT3Ab%9LKC0e*dtJ-Zkkw7u9W0_Tq zRl{^Ox|k0GYEsW;;5ALf;~+aC=|1OICG0)vM$r&mJ$E%#aK8ue413huXyk9W?q^RETQirl#PZjOrOzA3Q zXRO0FWCuQ}w0^)<_ATx>S1xb#&06nGyt$*^wik=M2_R0~6 z2F6EiBUU~+lr<+BREhNaDoOJK+-j}_!RFzB)W|>qW3s>JIMSv_4!$!7xg74@969c` z=)A2kTOP!r*qeFz=y=1GUVK&Px%CJ}o~AeLMR33+&I^||rKGH!1C-BFEACClC(}oZ zq+E_*1E`smG63qReXASu5nzr+JT7+r}%f0 zw?(>8CBXQMt$e*iiNTC?ZX~*YDV*gaIIAiy9qcz3VDnCKRE|jd zPouxym2Vl`5axO`Xg^kHzs?~k7Abw+vIgUr4DXJXQsGGV{+KAAIG<=$N0`h0b>003 zXIb#{G`eDBT+c7RLi3S8-WL>?#4ZDJB?xSoC_naXrKDIJU#pcB-lX zC{Yos?tJ3H?%p zp!Ifed`!mo%eD4!>R<)V2?09K$B$;Pi1|^}Az*wll;*=n=N2+28N#;OB7zo`Tu!M* zZIm{WZgq|0>A4SV0UmOA0oyeBRm}ksK-$cJ4A)^!x<%&l%~5Q*6IGUL=u|HYO`#QF z&L`&M8ILxFf^KRT7%7Pd;38``7ZsmuP85r)OT7qY;}jZJyZqysEb{YH+|?d2%1Y}q z+3n75pW!XquJiL<5F-itIJJrD223ehEjwM@a|7mgvZWLmWnaupI&~*>DzsC2#io)L z96C8o%kTp3CXe2_9n>?08<;;DoGcH3@ikxYi!C1ssH}Gcjs0ETS^|i5k%Jv;@K2Co zCX@-hHQ4RJI~b(WVpwTivgrm7@QM@hEwoHm$?q3O@>+3R(B+~@zaM%GXIfJYVES1PWpz!VAvyPTDbz+ly~dMl4}e+Jq}`Iq{sSbTD%7fu`80wE z6hob-`zM|#0Jgn+)Z(MJZg^r4^^y{cYwrV0e<|B!Aipp%2=t)qUrQUL2XyG4W(3LY zd~w}LqZg8s06RXWbQ)zRb{w%_=thd%b-Awg5ZZAn@ZU=QsVJGAzYdFB5xBPdtn7xR zj#;Q);vf!t(#i1g4WqesM})Eg^B;sBQ^4OvkEhP;^_x^&i26D2WmSXvrC$y9t2724 zQu^k{du^2YIyPzD6yN%)PzM~}EPf7&z8&ZlPpx(DvEc_<8yDk_0oNGuoH1c67 zlm&L9c$tYIXZ{3!>H4GXDS```*FxO%X=M@hET%eZ=L^J?`NNN7^R8kUK1!*5k z*?uUN#P+xFXy$NW$P2JoWIWmN%Rjl;N^x7$?ozAmw5sv?McDRo*6E;e?6lk{dycWS zPT1c6p0;(`y^cJjTZXsPeSJr?0s9JirH35w1~Z z+x}}Yz|GRbepukJKLd*Y9xZ+G&M>xKZC324^#D_iFm7;p-CF5?hu8-GJhf$UBQUm| z7CgG4asK18DHIo(rUnDFZKUkU+xL&M0Gv5}or2{bY5? z`Xrq->X)$5gsN9WtZi_*m}#LRXX5&m33Ag+tuARmzfdnEm=H1jjg@c{@F^GczRL1| zxwboTJR27qRAPLrUa|jcWc_Ws|Grz;&PYC`;)G+C91FuGP7YUDhb|%Z0kLJ(U=b?5F){$*E zmC|m%7=uXjovCU;=Q$&AQH^9QSb=$*k@Nn&`+JsOKBcJLYb;<@gf=gB(oddM#3ssn zApYKAQ;l2g>@!lJ+_N&-BW$_A5kHFiLnlhMXYZ zEubZt}!hHdDMZ5HME^cF4T`9Xe!{yJu zhvZ?+&>3b;uuRJnR5bQUSMmAQ#KgRJXomk5ZJ%qS>#~B!<JBix)e-fU&P8#a=O?wRW zn6?+-Z8$rgJ$vWoE14z74J-3WAjT6+qn23wkXA4UFJkvFuz~dQ!2AsE!MXc-<`Gc4 z*sAG_uftKh1b|19?>SG~XUZJ^ua(ij#%5)7&~q1_YpjhksJ73Qj$SdXaioP@>?_81 zBVwa06^n~+qYtv@{VRj42Qo^&j7s&t#YL!TF+(q2%wHBJD)ykKs_E0cvnO0NA8C?X zk2}jeJzA@Yw&0sSw6#P8fN^t16LE~vHHiBX$Nv1;~1$l>nve2L+LSFAm^Bc z9dibdaY72?w3c{$gR$#a^;T?6^&IpOv?`My**MzxEN4|F8%x0?mNs5GyATS0QB)1Y zHv4Nwr?GBSy{Cnx1evO@WK#6rwSNv{xh~Xxey0EQOBiFv$DfnL zjV#S%Ujn}vKhx3r^_AQY{e^M5%v^3%o%r6XZs>qx|Ms?H_Y-9NCog}?^X=Y7Obf<( zy*F73;_DYW99YIn>Pgf54C*+LlM6#>v<#h z^~;0@z)uZ~eki(K!kTeBj#MQRA|fr3$g*WTFN?jh&xaFSh4=na2)~<4?e4)m1YdO8 z%$ioBP4k4z{`}}r-Kw){b13~SzM4NvwoERR@=$=l-gEp8uRtvc)6RTB4jG-q?`b)R zNWg_RP1-@Iurro4o%wdy*ztJ}YI)Y72vm2zRn+7N7IVg;sz~&Ycc#@jnq>u;c|Bxb zuA#El!NEX!P4_Un^2PB`^U@_blj(#?rf-g*b4x;PjCF(f@39i?{`>-xyNm{3E}hzaZY<{UjY;u%4^w|MOb}4tV+lQ$7gOIlPoG9}OGbwiTRA+K zxJg|aFC7qF#$h0QGKG~sSUTkMK30^5%LbG1w@WgZ1-Gy+G@$ZMQGGdP18PYwpD^E| zOVr0bCGe-qyw{F=#H)mzkQKRUzV^ifr^Cv8-4@Xu{M)tmV?DGC5C(7l}`v3U!VrFv9yx4|eF620Z>KQn#j z=pyyy^e{0I`ia*pBiZlqojBt5Ibl>l{qA`xxLSAvkKL`TMQOgiEFJ#TTKoQC=3c$t zU~x$_lj=;c!^lOJ9^@hVNT6Q{8MPs`h}#hnQmd7kvDt9@-5St5Ucj z9HT21e6hU?0B?&Lciv#Eu-(5UJT7H-c7D7~D%?YPOiH;xAQ4CkXgPipdSClLK72E# zd@a!azVxZC6}RpuUb$k5&5z|xt7O&beZe(kKS0S7KhfOEz+&Ryhh0@m7dLBrjg% z;nVxTdL#j3fSYWL%=|;U<@52r@e>tLnHd|Lt!8JLMEwdE6 zc}Ez2n|=oUrR!ro{Jlw@ab$(|YdhINq_Yjo$%2UWuLIJnptx>nN=XUDC#Lr=?GG3F zPDq)&^-43`AU$%C^x}VrvAc^U>M%w$5S9|tMk)SyQN)+=oVL5-1G}TqXX0d%_C(LW z!`IE*uwL7BWr!MDW^Ssv&b8yI(C8)=6aRg7!+6go+zH64XN(#LijvBMabFzkLZw7p z@jehn^zPgvay9RjAyE^Z_64CH01Ouc%C9~;By59FEPVGXe)`|?z_Jeer&NZ>-7uVpV=;&9st z{hbI=K||qN5gFW1Nz%UMD{0y{3hMGc+z~d;gClE-q^0s~)a~Hze$D~=!|eRND{r9b zYPKvesk(QKziNuS`2IHe94?K+ghon@4eL)*^cek-R&aWLM~|u549MF4kFpp%4kO+4 zJKHjOQ*CP&L3~r5FtFOmbKB!x&%j`W58aV{+s_xvD@YsDU-~qA4=$U}xnflw18!W7 ziNAj4)zB`gG`3P_Viq}gbFkm)92;uzKbb<6;gthb`Y`#^`?Nw5c)vO8)CX?ID_qM= z+c)61W~2jCe{KmqKSgflxp%j%r-2L8zzWKbZph?$3}0E*F&($~Eb-$hs=4_@ z-SIk3exM_$)fZ6uyoocm*pt1v!xv5aa>|cht^yS7Ea?p}%xS?Hhr(pFj0NYt4RXKG zAj&Vupg#oV-FTf}ptfZ|yyKJH0XpNUo#6(OS9AM1BZKu_+nh@VBXUT=sI`wA!Z07^ z0G0vCLzP$0EoNFdMFfV4{dXIkkIWLrN~7!M@uW`}UTjPY8-GUP2|qt}v>>ydlbn?d zoE)__4g^us*kWs?GrQXfD|b%>CV)LK+HK}${%Pa=2M>BQ)uooCL-HvkzE5-?Ao$2| zptq?vNZX_6rDXxcmi(iUxC}Da6umNY$ahjZ!h+7v^c|Ik*<|?1Dnv7_mr<$_F`Fo_ z?<`2ICiALivLe@s%ES$!Fm`O(7w*I9yAYHdEmYmiPq8HpN?M+2q3Dxy@ecZv^#5VW z|JP=~QbPLPI*i8?{!3x_Vz~FbI+e0j&k7FC3sR-~K-aq@?7n8a`?8R^oss(?LoR_H zn_|=(AB#E9nGeC_KcJTc0pBJr_|_3ZyfO%r?iB8};vM>?XUr5~V<165WmCl$J8 zA}s~k5}Y2RcgU*?;&m@3U(|(99UMgYF}eRp16y;4iRcGLF&PHu>{42y{4Rri4tfO! z5HM2+PpZ?nR)go}woOZ=yU*Tt#g)RMvPxuK#K#evnP&n{Ux`o7=@V3lOli`&#WO&e zybm|pqNNg);!0zkfzfd!AG=Rggk&je_N>@I8N5s~+Z7n-E-Gtbh4UIcNr?=R)@-f#>r zo^~gqm$~<;&+< zmF*;p(8JXngJhj=;yn?0zTeqoZf~ z*nf30)o^5ZXIFuy`=?b+L6VLezxG&{J{Ac+^y*(OpWx?kH0mTiZ`ccEPGLVrjUAti zo)0f?P&f64ZBis~vYY@Sv{+JRU%>)XzJ@&s(I0+-H^%uT^05&#Ml!|L-Xn{jQ=MY` zcEd@?Y;q*uVCrjllRnivsixh$ZcbC@7+yOgVQk_d+t*Ff=i3;BXmMzBhU1<%w;X4r z@;o3fp0Uk|bWD)uH7Ex*d(p{EN;NGo{lklGkC6h2v~cJSI!A>coG%`#GLi6L&tUvm zc*euQC=sbLkkd4+?J47!E(W%YCo`{>6=*W?16>fR*YF~}e*PsYc@f^MO|#OU;-&)({DgzUsC zRdlSS`lQ-j?k~ti0XA9(f7^^cBP%b?_U5VaUj(UJ1NC_LuupDYubH)}kC<7TbLpJk ztB-$hLGPF<&O9i|+Exm&_UT&P5qOMk>an$mgQ&?Tc1*?IFHJCNgfcIkpW#cO@!K@E zJ*2C_2QlXq?8m|9vbd~(9}U6L%zK0ZfaeaYPY7i|d-UlY=_SRBzi9PvrYwlcoK)Tb zm4g@QT^|L>w)3Y!)!!&-%}e(3nH;{^r|wL>`KmMhcP-U%ZK>Xe=Tl=j z(V{(MPo_V#ksCd?J{n7@DJOxcrg?+M`QvqTWr|JIn|+Ue0BTsv@nX_A>!sm|5hkCN zMbFKHbTXoG-IPS&{F#&s5c-qqoSe8fgiJiB%}w z)!+NEb9^jpicl5qoH(o?dWm%PD3;Z~Dq~D#8I54PhGupI<%TbvcbRU55zmEG5`Q*b zmK?XL#38H{`^Q~6RxK=Rjm1wx8+W1_EvKrcac5f8bZIRb%aN9|3dgyw%aOPVv*{Cq z%KUQs~?E8e8FtwZ>HM2CreN#(97`}^?JNQ^3Hi4uzCHODp(Gv z=wfPALV*l>K1b4Hp=M!oztF%{ghZt|EJa#UVx0FNUV(&MYM zn>kWshQuB@7u7}1sQ6%SFD`HRTUC{JOXWX8T-3Dlw@JMhgLh8NN z(gRq2Fj@Tl7ISb=RF<&rf|p{OanQ$b`S3WLCQ(XzWD0WDC)#8v<9ak^(J4h7-4xTc z`%y)-7`2VDu2kL~=DrZ{xW3I_)=-9(d6$W6%W>GWWS2vDl?A(@X*vMW`Z=1m@%mo% z3&rHk9l2KR!;weFGqrOJh^6Ui;=w$`PkR5I(%mvy!(fu=!R`>&haadYJl{-;d?mX` zUH>|Z(ng3bEQ~G)Qey~=DdQ7)49;bl3u^kUWr_n7u*Oq}7m8h^T^!hmg|qkvR7KMy zlDM^L=N?>TcJe#$>A838vj#LDv zIEyC`JrQpa9hm^9rL{gDcNqQfx7@;l@@>Lu0~Cv+q`9E+dkN9MZjMd!OsSg6%f+~M z15gFArj(zyU2U=GJ{+;k*L0c(2YA&8O85=T;gGtO(1>YQD5j( z0f(~ggwRA$ty)2&t{{sJe8Of%&Phn(U}Krzb$Cn&6iCfSql`fVOVG;7@#Vc7of3a49u-ctjZ}O#DNpPIVpG{@% zjB1uZazgcmu(y2&3vDfMaeV8qg{ETN0t6dnuU)4Kyhg-wjo^mN|M~5=&oY)dk<$+x z>|Mzv1|3-w-@<&ne{fL}gAdtbX=1D{xxvB~x_Qsc@0%uY5tRELRu2^9zdbBT=`yae zHa=g?H(kLpx4_VF+_=@CM89cfU*O~`uS%aJvao91AXMmv4JAFlmQF*jLc!+`Sz`#k z#$to$C)Q&PKQ42JC#{l37Awy(yN?P+nl@d2Z71le?KzG5a&hfNlVJLr{63R$&P_`9 z07cj1iTK##zw;t}uSI?zj2qhgW%QCa_3@iv)RYh2^ziPoN^ahW4SE(WXfdT)F{abk zbbPo5dAyzhEf-GTlooH$ow9!*g&6)(cDgf$9aZsCuQyhu43k{n9$DRBQaoRWT1GFA zwD>~y76^uONAPnebU-e=<`H-cPw-^qkl3jHWM%n|qfLNHSf^ zZE?9)k9eXzcT%gYFdC=c$wSs}d6&xXG)D0+Q{&F!&V&ATMvv1pQTLS& z8q*t0Ei)rnuNmFXl;NW9DnFixtcBdN$!tDOL@i(E!KQn~_@9~y%FUik*CssERX-Z2 z*MtRYS`$$=HdRf$#*&h&ghbzQq`aP*o7P(`l=B@C>pw5)bt+rF$i2BpHkQ>iUQ!E2 z?izLjWd$RqXJ1J-D(+;*>%?;?^xl!#r;6JNV|qtmf+eXmojjb*{%xJv#s#`lr@hDb-KO1d;~bq&~=8-mc-sJ~0H1sg$Pk zBmFdAxi~{mz`2NjYT6qOTqP&DR%a8G(*qZh2>HC1yp35Z>BQC9b} zOy0)xkHo%ehq>u~zspyJhC;6c)vmCKbVvOQy1_Ihp~D66v`5W#^Wb&!u6_>*j}2R& z%>>kk5sV76Mie=K2B1sjPG0M+fIfY?RiRgOehrp^Jy7IC104#H+K%-$gp? zOm{yy`PoPQYQKC9FUnhh_NEiD5L#LMhrNdHpNw-mMdD-hipfA+2EI`B7t z`C@i?fFQSH8r;CF7NBR2JD6)kcLQPTxK)@b6WPaJYng4=E=E3appoiQRA~RC5#3Yl zhb~8kxIRzf)Rx(nx|~&PVD$f0TSkJUU=sP;xasvfi1ck2PTUu9X>d^+!N^F)t8$vf z&wa!C)$lA3TGirLeNt~F@@h-K*z_CkNP=zMgrY_pNe*IW5Sml5To&Ntu~-S zPb7c)F+>^hBz5Z{uq1&zAG@UzUNrgfUb%y>jN>{lqg9w+LM{1vIyBt>Ia)4N_2Il1 zdZ%JvVd(e9Cpd3BfJtANr7m2D$TwKaj>?J`Vt1h*T);C`)~o^BF<6{ct8Ujh8JF1~ zrwkfx7CCMTwF~nvS@9ZjY}W_`&`_}!xgHVN9T8lyd8Gcb$Z*wX|&(f zMbEhDEw4N6jZ4o~GdU|{g%#f4 zhhwUCUgL?{L$vAF@!WuQ<88P>eN)#M?g)&eE)BD?Oq0aQCcDi>&au)1*pMmUd1ASs z{=0ZcP0O6mXjto6k(lRtOWXyx1OoYRBHR%!4CV+V)-=dznM$8bl#E7Ae_!IU1i7C?3cO1Y&}(F zcL3-a&ki@A91#@nFI(vInL>87&w#ppcxS21u)@-HM(#tx9V!Rr@d_*D%@w8^+wtOL zo`cRQ4(1pBA@_Jpcl%7mY=0G_ryel|c!1h-qT4^dYX0;LufNIUbb)U~zc1#wG$${F zu6#$j>YKq`aj4A^^wISfJAW^615nGO4ZRO;{1|E6O3wR6&+oMefPjO&u;`rLy}1m? z3mZQy7G124-+#Ia@PyJ1h}c;i5TpZrd{po*@*%dRHz6 zPVO%&)Q1*&R;pUTql^(od6oC6#CI!VD%P1AvFoC4rw=ci-llqY;UXJ)6Bt2cewf_^ zPxslAoak)IvJm0oVktT8+VVwIkEIknNa;yto6vta(tv|5J~EJo3MDVp_<^)Dh=Xw< zrbc4r{+dz?7AimF=0#nz)GECB1;lF^W(=s8Em!_(;H|FUcqts;5TcvBMFPCt#*PPu zZ{}NY7XuTi3#me&tpQUM{~O394uki1c@!5oDF)v9`%|O*!vvru(QmmpP_F-Rc?~A& zDTj8+wUWIZVBN6U9al;PsjeYE|FXNRCX(ALoC3Ps(=#Q;{ob}!&@=V|_H4vxL9I_J zIiuJ+u^o7sa5}GyHXO~dCb2>Kl{k8FCv8!h#>AzR!S~Hk!bk%`c9sH`dwCJJIM*#) zv`J9j#q1T47BKY;-<>&F6wQ6QND7sbcrnp@P%~&{I=+~oYZ})or|?-gJas}Qw1Af| zmpnfcslGOF`Nw*>6Noy9>tgo%ZR)w$LAb=va?VvDai_`0WX`iTORZ&P>`!ZJne|54 z;&7G|#5u-VBi(zo{RdKoi;Jbk6z*M@m*DF;4hB&GDl(+MMLvTL_?4HNCi9}hbQ z(6LzjPxkbGu=W*TQEuD&bO<;gDk3o;DXA#kpaOz`K}n;4(x7w>-618QgoGkFbax}& zAkEMXGr+*W{~J8#-h1vn$8-Pw?(;nl&foz1+k35dz3W|T?{Dlv7(4|>_|$pJm5xH| zZk_ldk;O;cE1x@w#q7RW9rfrqI~_*@35K}Cs~g(Rq~mv2%(wt~m7^-&QCl$9kN}@c z*Gk{raSP>WDCk{kZde3~OT5SBSXSgQ9GKe^rQukbweB(ch;Qs3SJ{lz*?ywKF5$t< zz?)N-*W|GwU>00Q+$p0Uiprf~84mO58Ds~se{qk7I|@EI!K%1-JXPT`ysoi}(6n?+ z0G=!-SO5ba1ogTLsOi2}pPtlIZ$6`bvrlG&&@etsLXLi5F>HvAL^&q3*-2#A+?1HU zrq}qslPk9TO{$^6!ghVZR?Qh)b*1rgYD;MB(z_x{KwRn(&BQ@0ncN?S8UQu#0! zTp@MVsV#*0GhBp+C5F^8op>4EdK?EtD2o?&%D#I)%H|B{B<pmJb z&3;pS5d4$Eh6g?m9D~w;+`=@UjvVW{e>$c{mezw5fcQPC2nt|bd02XD0Dcq5iPT#s zmE{D}G%P9MiEc)@B76G-a-fKnqm^#+@sj8%76c;fPRpc+PL%^RHl<%zohKs4LtR*$ z26Ub0-mrOZ@3D7Y?a@PvYM3hJZJV?~uIvp?&8oKnvDLiqGlvmMS4d6QHcpNaqG~0h zH?FV4>+!*D*r4xV3dXDv>Te#`rlvz*%)eY~4WXfT-lTNpEVDRhJv(J-rJb1Q)OmS4 zCNOqdqS%@_3?4`6m2WqwZ*R*|EF1DSG*YJHfoSRTst``w`BVFkMK8v>8Mgv??%2G; z5s>w`2=@we7|PwkikQ zX!`mdwHIQJLW(R^U5>+~3F;T|hUC+l*=3c-z4?NW7pwl=!!Cvxaw|j5$ z>CK7Y?c1J7maT(3w3SA1EhH%-lCwXp zc*N!CKoJXdMCS01rpryxq>ju-u6s?!6YL{qTTzB%P8K;`qd5YUK?ldVdT3Sd9t%uiR z<4b|>42K$#uFJcm6JnieG(?p_8?iw$$d=dSjB!6CiKdo2SB_!Z4QCn9<>VFAS2`; zRUS4Th;pch0(-T$HMEkl9$|?$Yh-Mh}^Xo3U2G@Col&C z+dtN@`0OHjmfUF%oW^E+8DxkfTo1-3bc^1u)Y`1?R7V;)?DMn5_3HT_Zb^_ISEf|30_aIz$IHA*)$j)t;r6vj=KN(-8Kq+x zKHqHyirsoUw%kvEL&hDma{!uJG=Q)fN>dMSgQ+RJYfW-s)34n2Y$xZA{b)bcrP%$jA5U7li1pKcy`yJ!3yA3@jU;aqgvQDlRslw}vtzj&i`lCLr0 zqLHmO)Wj#)F0Lt-GpOflV&>jZD#Jt1s{NUfa*l(Sw}^QKsLTdAXeeTVa4 z-7)HeZOiLZ$<1oZ5@5)jrzeuypm0poi!oZIl{XnKs7ELEnV1r4ivG9~2OIV&F8ezy zQ@0DoXqPcHi%nL>GtLC?#h3BF9kZ-4n@A<3jEFc7YZo{TnF!8jyX_utx#K9^wm|-_ zzs14@H`dEWyw@(+BE!T_7c9v?ys9>5FP}wbxrVJ}wH)gT4Nfyom?yi>a40Rr`|%QG zU$m@74Ga?DShAB|f$iUe>XH?e+ zto|4*JDOL73#7lEO9>owB^rfByFd)eIBA=zR(i*0{t14#KoiYYW$h)}7AyI&YR&22%<5>+X9w6V>l* zn&a?&`zYLMxs(`xoK2LWcygScpRa7BP}x^*_0+QTrEAm7(ei_KEt8~7OUA>I+r(lR zF>(^w8%b<*!^7gUw4~5l^Zh$c)UHzBy_`z4`C6}R+!-x@W=Cb3^i*TstcWSv)`FVi zXHrx?A32@$`uNF)+V(inTII-lnWj$CtT+R9Qv$pQ6V)fxP>nwN2~22C$)qas!==hG zp}oXP80x?PlPDgNaa=zM6L)UK5`$l7yH1Mk_m4Gv8xDTI`&+iWvxB2Mn?saWWg{ge zVUhs|1|n8tWt}H#9;SF)&Z0*QVInv3s&)&`%IgC-OGc%Th!DeoOn#Qvj3)giwuRwu)6Ts$+BaX5FbY+1kw@Qm|!t9Rw zkssLg2C6#*S7#;gj-vSeP+`P1_k_5E?=1XNcZZU|Z!isxv3xfegJ$P6ad1+VK{e9oUi=D9) zj&a4F`~%fjQry1^(W&M+BGxMnu@A26(ue^MJ|w2)xiS0Tq+57{E;fXQX)XI{tN46^ zMdB*f=H}i~BRRhP8e6QEOHHl7z=(}yfgE|9;|OuB&a0R&fcShRw*D|*8Wcg)9fn7~ zil5fOxaRI~m1P~^EQNBjGo!Je+^&nC+PK$OIf6U1?I&k@)|x_vH|a#fM2GF)b9FwD zSl?+)q7z=v_oD6YJQN%^?9`d#@m?cEOdD^ec&t2qA=YQkLjqL8c_U{t9;MbFO5(fk z3{lvubwQJ#O!5hQ0D?+?w#HpnIq<7%d~d%beSB6rus;2i_u4S->cnsGcu0!SWGL$YWOm35d^E4YgbzE=b^t>9se+KXMz@~Fo zPZ7VL1>hdr-Y4pO1h;=dk!HK3J%IAa5I;l8Uld(`<$hpRf?u!pc+K|Xz?P?XNjh^8 zrJ9yli=qDd%l~mEtI`_}+IS z9;de)%jzS=_Hhrn!R~QMB+7-2v`*?E@jZi*2?^AUnfcyYNuGw7sX+t{&Hl$DGc zu{HSk;<`<8sq7^G(bsmu5vuQEL;LUh8eeXmt;4{;VoLdqcXh7?nl|k%# zrP_jBHJiLUTji>q`iQH5ZYqg&ABqoDOHbjm{4KYU*32#ADR zO@#weG*AHS3}Cqjem>~6ChZe@@%jz0+oAP-pY>L-hY6g;5vNW7{V8|sL6Y9=MrA2oLyl}QNn3GiZ_1*ON94$MmAP7|J9L= zJ9%}V$g5sG@k3UPj4uk<2Jqx+B_WM*v(k_5E+2cT-Z%P^*kUF?dq^yjHB5Llb4c7P zJcc^8>fV!JBXt?^Z8r=Gr_?RkOc!%cu+&<|c)`5dmUU$N6qp&klV}cI z)wIZ|WXjU+6$&CApk2Fs=6eiN-e!metfT0~)t7nvBj&6z2CDld$>QBMDU0r$|BAARs^nbBvb8+#FC0N&7J8^R@~SgEg~+K)fbjuY5rE7IfTgXIbB#IxL$HwXZMh3vRB^1H`;; zmZYPJsOF=}jgkav$A!nIHGI|+=6AG`jqXZ+2)noQNP30UPa#N0qRWeTEBm-Io z{)*|*d%qy9j7>f6)NHX_HjvsSUp7HA0XjR_>Nna?R4`;w%wu74C0eIZ{mt1gueQ^* z-y;NT&k({CqZJPUfm31F9`tVPmMMb6@i?cnYHYiasKRQcRvxy-T^rFXe|nW9e$s59 zOIz3IspT=?<9(nn@T))~3J7`icvKE!-Ec`EDmNRDpX>1?|88ODcvX#NZ7k8PVm%z|;QUR!_XNoc~|G1&(5Z!%zaHrGin;`-SR_7 zOd^3$^*z1GDV=TMR_W}|MXi4Fkw|Qqc&%7VbLvZq=<@Wgwfc#D9UL>; zr(Bn2s|*3kd*7hNOyi%qO^@plCsSANR3ATTzVdAGMe!BdDEw7ycs;=kwu=WSo3B^5 z4j(wXcLtlm1K1TVmOl<0G!481{x4X94O~VH-FW+v3M=7NyEVVj6IJ@*#@6Vts^>>f zlv#SJKRKpTT+T}W!sDyeMw&I^Is?%*1f*4^AiaprXPNM?*6m#bfjCW@MSF@|daN-d z%PD@e??!Z-d1v>8Z8vXd3)v%Ld*bfM0DY&p?ll~kP)>k<16u#=Ru;L_i{o+Aya3Is z9|X&LZuVEGn7a4PrORs}Dk4r@wt16I8;Dtiz`c{eq;@F@!tQYkKe^>kH>AJcg1ApX zBHQ-i<%Fqk-^zdm!t(mW?u>fjC4|tdsy`XZJWHk2ZuT=EUs6a z3N{L%-+)R9X9;k4p0lPef;Uf<_TohsdZ25D%lM%3g%o#X*moa(dn_PTEgL0!?4b=2IQG z3W|w$>A5`aPFimQn}4{2%)Xj)op;n7OJ=G%cY4uYuy~(?m}zIB4KT&O%P@O%6vRs-B(-e@M8x$JQcEaX;hJt}!v z(1*t?YIT*OMI>9&gKg^FW6HDB_N*(`wB318HCBGwcJJPF3yC^eXq*DM;Z*dRMHz3W z%pG#CXzE7t; znkebd8X?pP|5AIaxoOov&F{PPwMFYm4k}FOxDK^l6Ip)>itDDBpc}ePXWg(cM3+rR zZ&|~&CxFO=1V|#2gy5=i;_fxIz#jC+)|77|4Yd#WxeN^!CkN73m>&&T@NP9KRrz(V z>qCK1KS1+ry4z|s)3~#s7u$K9c{PFLg0^$KNiW6_|CEb7V$_x2GV&R^(&@ZMGQx<{ z%q*OXIOq{RGZ2@yC;_O#Y%s8IeB8b8Ucnmzu7ukmMim|~0T*h+uw}WR+Q(V)4Yu`kgw*=(otDY4DblKd-RI@T23Yt>TLi9Z0 zzD76i+L^C^iw+S|I>p+U*v>y$wX#_7TF&?L=cG9H9ub<(&={AnZ?Sf^d5b1U!ZnrlUPj7FqBsF3(rG1ZRRSxVxfvo`A|A4<-F za9!)*VOf22BFW{@p@Xd%vN z0<~x&-b)hN!D(ldeLZntc{hBhTdwu`Od3QoK9g5q`Oz*|dvUC=yPR(0 zHe&2pyuWmd@+S5p-uYQT8ev1>6YqFAc_pO|*iYUb>dF4fKVNAldUvxpH=Z6ScQE_f z%L^xQ&><8L1QRSvG{#X4?2)*iXa?aJg2g>@Tny{T*{*8G>P4W9^*Qq+MH&g3@3-H> zm1`?e{V#hiY*Ks>%e3dFpc`P|`qDLgR$!I#!FjWpu)eLNrl7@c?>kRZ%<&s5E3681 zCzF#vE8d;?l?U;dTajU65gLV?Z{oN#JLIaBb`d$U5@rILYv!fX%taG>67?m7YAH(VPHs_?Fvn1%)^ipwIh zLhIx7uk$02dwI2|F30brXBy>N!V+TE<&Z(Z}rKiX>Aibca=hQ4x z(DD0@O<-B}ZB$7V^{gR*tb>ZQQm-B>j)$p6Hw_X4G^+)hL1Bw>*l*!mfybNG+=fP0 z)wCqJ8*Xj%{cf>{C+y-=DHS>zE~m{)yKT~-PM~I;8+4Mghlww{6#DVx2Ov(pOVoyw2a$*MsbW3~KqlA9{C z!h6PXdj-Ct*8}6CANV;^YQHMDS-GCjOF!s1AaV;|N^)(O5LJ=L&V6RAZ|#77!2G{)}~mB+Nz(rq9~kEu)$D1SIv^g-4QxQWWl_&~=;DaXfpIu8VG7eR(h`+-9^X{A!&i=@fpDw438>xEaJ< z^N#26?vUJTj}fus6PXf=nrIiqQ7jkvh^e(?W+tK#c3rtr{`h11N=8B%UA2_PHY#kl z!!*zMLqyS=*wBLZ2UMM+2jw#%pKb`7h0vPXO`vtvAqaDUMdO2lwn>+fk$IDnoYiZ0 zU(9jQ*xAJemPR|l*E+yw$4SnQb}Oe$UuMm_0FnrYwAoP)7CuYCfSP8O7jP2IQ;K~J zjwDw^=El2o>8?<^FDl4%yrhbS`=g}wE@0zj3!?!~Cr|dM! z#UD~8M{Rn?o0t2n7mv_#)n)bK3W+j^N2kzazH&|m(XJOSS=gLmgMcJH9wv%Xo;3GO z)CRhC_C*n+UDb20Pg`qiPEaG49AQIhsBhjYd2}ViW&CEN5^l&Hii7ce&=b7=7DQmu z*H={6+>6$`hTeiq?M7Ehr^2g=6s^-rgf|ln9&K_De>*^8Xn8r2ROxL4uKxTg*9lW9 zQP$JXp?6J7qMcd4LSsi~4$e@sNXzNZuB2?K*s(7dNczodY~N(A(RykEqT~_KLwzDK zt8no%W`YwZmHD8$$w)3kQ0xh(c$r?${h+Jq-#Btxf&Q0FqTJ~RQ?6Uu8U<-{?yX;x zinsy2Sknno#a*-iH3?H7a?JtRE;D&V;gkSu1unE1J7llY8{H_iWN{XfamK7ka;ML$ zK|E+d!9LoJ>y%hdC=P4ZMpOYCAa=VawxY8_3ihULM-V!(WN&x}J2ou7z^ZfZ3Z(?#wn>AJ2X-u=WUAqXo ziC?@5lV{|<(Rr$|TF{-dPZVEMpi>A>yRHrbci_WcJVG?$h49BryFL2$z?i>>SK5qk zeA2T*KtMzAQ%K7BHD;B{EVwbNt^J^@gRRxFcCW_NYx|zz&X65p%^15b&_g+!AX>AZ zdWjIHQ>~!n-pEMKbnnDtunl|97VFrdutyKfUZI1mc76`?rv>XvqBA^A7D;m!|V z1vl}B8(E%m(12)l z(OGkMe3RG%N(jtG3#qK#ycRbXFwjcJ#AZxjuh39Y~fMO)JUSDdo{vks$FEFrcB=*an<;^si3%Re6f^5&HR^VO|D^L8JLbz z6l8o*gmfxjyXo&RM#w9#UqC;{Lr!73U>^v9Z>P8@i$7VeXOjJTiiAqehul9?;(sP1 zk(~&-IGpx)NcVx&wTtpF{0?KgRpZx~3LRk3Q&6T~JK;>qx&y1AHj=*lSxv`4%rblk z0I*}E+OvLXi_DIzoj;X|NoAGli2Os!E;l!Cinmy`Js0%9T(mbVYgFoD+?SSy4tQ#m!t zU-H&|-f_NB`+2vL=g&7V|Gt|s8PCy!7`n0*mS2K}bHnk*Q88g)4u+JS58(H75CzK* zYnicENj2kQ{k+&CxmtZOaK1)j(B>*4UHH(;!0RQETf}6FXP%4ovs#|AElwM$yth;O zgD>(rQ1IofW~VW6`RZV(idxeVBYNOt49YQ7OUTT0+$PoMzN7D0EO&Qryi*g7zp2`T zkI#;a2Y_BI;moYoDMG*={R6;X=0gMCZbo``_3MCX7Iyf7Tn)<Tl!cXU;94}0um{G-_w5>*B6#1}0;qFcCOMnG`-%j> z!Bwyb@zn^8#Bi`kzqkOCy-T@Ah+doFw^)J`G1)7+#3T6Ulm8dAxa|@|HZsJ%m(<)k zfWZ4UHCrL|tC)tW=QzzqnVUA(fF-;@7`4<=o~Tpdq29%;61|a&o7)!zg|L^1 z<p$5A2boNQqlL z$;xn*gRjJuD6J<2VoZZ!np+&{*IB_)pbnX@jQSIW7DJLb80xQKk*W`%ITD~P+V!dQ z*4+DU{3=78+N_sS8UxFe3L*fSXjX%`bLz0oC6Y8Y;w9cbA132WDN_t^mV4@j3Gyw5 zqpAxiF_`gl*m2OMWRb^-o(F2IpeXzfui}dV2P0p=gP>J>c;i1rPMtYyK+)WWke!R2 z>gHdDPa&P8mD125nU+2$Lcg!G$&>|EdLHTx>V$YL53~kVt*v`6LxobGX8VC)UPv-a zpHZzx2(L*IUFgeV_p@T5aT*f;=K8@D^_+mDukE(7JyCn9Ams=#4BF6P`&Badmm6^Z zUFjo@iB6?SB#CI;s$4@QeokH5!@ z(qKdAYhm?Hft@hE>Eh_Nhgajh_W(PxiIeDUw4)hX(G|rX&-rRqDh{h<6#vN!KL1JlLZ%<9YJv^N{!OCB5Cayp&IB8E zzPy-Xx3CWBoWu|{?+0l*Hbf{8X|%=d)*K!=jpH^x0Z7hAjRTkk~Wii zK+!tlEP;uiN6!?}382Fhvbx61wtVVySma^Ke@6`EowrY0J*5fi`Aofi%u|~;|C@LI z$q5sX>+_IT64}S;)2qMCd3hNF3J81$=L^tux~k;zk)ZoAKoSPS^g{fT zGR_IZj_nNrnRD{s$$P^s{(Pry(@4M!_4%${f?@x|*MR>4exu9c`R07p7}Oc&K4+r+ zQs(b|q15MgQcWqwZ2ZM}TTGAx4qVPLSwxxEpz9_af|PJg7Q2Rd?#x5JO|40P>+0t> zj0xDu0}Qm$=oLdw(eDm1z}FxOuHiz`W`_iJFR2Q}rUY7=Tecxey=|&oW(vSg5M{A! zKE}`=V>YPH|K0ke=iZ;+FXJ$grquZy{4YPomZo($32_K2nDQ?V`9GZ|*d>)sBygubzND~Z58C}W|~+po#V1H8lFrjSiBtb=BQ zz#91-(TTL2W4m8Ig&D2*Io^N6pS_Jnc}#KdYQ4Q5z^^Ap5O%&l^N@ZWH=*@MIM|bP z2nL%Z0*q1KNK1{aQ6>6lf!@ptB45xe)`1K2w2>6{{0rRs!{Gnp&DRNRYf6Jgvd(X< zGPfGBX&=@V-9NXDFQ7xZ@sN7ILsIY@ddCiYiH7P&JQ%_o$46VsIW}Mb7LY}7 zjej_a__;m&YYi;7UweYC9Y zq*M_G2fB=;N5*@z(CFoDn*?Lmq9&@a?Q3G&j~To>eY&2Cp(F19Vhy&jf%Veo9u@hq zY`<-*Wg7tPP#SQq<=!v217^Yq`{IY5DkijNy{a_BhCy~E9P`HkoA}#VLeD2Kxxs$| z!|4A)Pc_?&R~z#qyWPNd_YJ627CB#7L=lM0kaZ<1I4WZeqDzsInp4Z@zpV@gRuKct zhB=P?hf5lXy^p^P>IEmCZ@dcX;j; zl0-k8g@1zmGpdOeI{umb{NW{pnnC#;J>Lgw@1sLv#%&KjXYU60Prkqk0gwpRGTjo@ z@cGT0F~Ai(q$#4wf4I{Ai#bDYYCE6Rp-C_hI@!I5|AG?#WSr->S1yl^DvHgUN8YdW zxLyd9@c_dVD_c@w#RnYo31}OG_V4ft^IyU1w>NE+0QDcfq6#DvKT8__FmL}d-H`80 z0Lo{}lTlh{N(}c!P`Ff1Mf6l9+-!XgkUo54CbGh-G8$ZHucy#zN7wGdCi;02E_Os znDGC@vI6eW#tU-5gfeyldHgTUl0@z$VDn|1bvvoRxIkmYU22J;pkeWCd-Kv7%$N9|5fXD>e!jNmPV} z|0vQQND9e2N77{gTDN^9j%9&sSq*<&=EtQ=P!xu~+-%Pw8C|^ovk$?{!0n`K)L;++r0s%qvk^Lij zGt9V1ocW)Up;F&nkUY1#S2@`3;sav+;xh*-Zn}I7{pX?nJm*A*c(j4u`uR7deUSJ7 zr_XtCJ9L;spiYuRS}>Vf`QlHHEBh&o0Hhv(!fYZ`VP0p0SzMT#>$X3jf+*7#X_TVL zTe1iM)2Jr=a}@s1zOJ9JwI3GvM`BDzg-)o!7tB-wt&2?RF)x0-hNi(b5~BYNLIR-n z?;lHEbf$}^FIh}t5vyl8^mYpTrCi|mU(5*i0F)eg>#s9?>H(?ABJ18oQ6?2}9wh>?K zA)OYNS>pL=3(DgEe_7@Bzv2U?_@5jUbXe~~N~=~41{}Z4V|3f`R+7l+6)r2es}u_5 z$y&->PfFJS;*S<5AirRD%DA-KcbcwN5-UUKYhCyO+h(ETP$sP|k(^BlCkvd^vZ z_u8k~$m}uD7juJgTs!4WJQTyR<{`5~(cw7l^Ibfw^?_-Dq?ic>m zuGjrsH<ll#7B->0H|RZ;D=j=l|#T^dDNt7e#U4kXhy+7n0a#i;*$}jksKcNrRjs zza41zOKyxQ0vMhwpr@3lepit!myeT--jz8@J?T5E$Ntq!0#e`vz_4oeaug#+XxWj0 zXcS3y*wvg%wC9yIg^L!!;BD#TY5*0`2ks>S{2#cavxT+~uDI*stzx5$)p)T)mc{$s z)6SOpDNbA;oso(6iycb+pk(`5*c%MzS1LEo57t&I)GOA1ej)$xq156qAfJI0)HD9t znghbJKRTf)9(C7CW>kKE^bQ};zHt?w(z1f4u$cgr!OOY#BHA}+oE*<3qH*jcOu#{3 zgS05?&`hhJ_LbCCWb0u#!s8!T_>ZLcFSLg*^eCV|q+)u5@|TD@FyvTZ!@i>wgypgW z`in^$`GuFGHXy|(;LNZ=nMArpLW4xQ40lCm+27OizZl8SWD(dhDC(s@=h%RAYux<< zyoXWM$H8#C){xox5rGaFfG3avRa7xgo#I`IBmYacXlHM`%%tKpz#0~BqKiI22?YSt zA8sH$O2qfuNsNDAF8*`Rd6ogbk-$5tB6mE?qW>L;L}_fH+mJ9ok-XQan+ZazL1Msi zD1-6ye~(GYu66k5QR$ax%*$15K>WN;YAVbFlt6!Jp(MD-wy_KtxS}dXeR@|%@e)8` z8UT(g8tkD*p1(^<`;F?8#rP5E{!RFkEO7;}``Y9GLxxX>&hSNl zHN(#zCg0|1dT@j6CKuP$7At^d|ElKngR?@k_5ddxLB4+lJZ$)9mGZw>7|3^Kpvh@d zmD+kldHpn~>@0BdE}AnagMkT814zN&6M>(3_dfxFP7R6u(!XoN!>F6~7sd(Hhv7Ik z63U68OgHs8qdr>SO{2MJ)!Xa#e|J;r*g^vkk~ClpvrAT}d%<0P6=iVej>Afq?BXKa}v= z7Ce9?Lw5CwF~$F^hK4EjXC z#L}z2$k|vcC^KD5)dh=u!a?i#=ELUbl#msqO7%+oe z|6cNH^Tbz`QLXgm$VwZfs$7wU#wIh_RhA+@?;B~R2>|W=if5e%8GGLa)HV}x5Jy7Y z$fVW^WVsCEA(H`LX=d0&hqdE&$BqcJ-$b?W^7 zs`mYR4lXe!l0WX_ksX>HrLuVzPjPzPor-Vh1I2YsExMyk_r7H61ItHj_@DIo1iJ;U zILAoSDR>~SF*k_d40B;EdWeiLFO{E}JaNuOVQ(B{bLecIa@#DGIBg(p%ATX8H4VmO zko4ty7bee&P1CkapC30KrJa3jAoA}rI9gEtTjFj?4~dR|=_F@pJLx2Lmd9@uoGw(7 z#)wd;uP(yH-DR)5Hlcb}kDMFQ(NWQ;?(iupJ5$TC(U|@@pC6o%7%epkloH(Iv!^t8dUoGLl)a_h`GXaN z==Q-O_J2Ii;ok~z!xkW5jow{}Fm&Tklgz=&RGHx^zfv|ry`%^4O@Oios&;flvsYKT zAvcEEa_4m>vMFC=EqUHuv!H&_TB@4zV%^jH30_U^Wb-91)3yU8*KEFxXR(`y-&bE7 znQ$Bov1=!+kIzT5H<;VhWKj<>RH!twztq_2f7h{sB=GUsbf+L#8#nr4f~;VON;e$% zlVxW?4=<8N2t_!(182e(vsw>aEH-UefEM#rdgq^?>It;=B-XJ}e94T{w`FB*qag7e z3eF$8%pX70qpiKJ<5l%tJE>DEP1?FWXak%T#+M}9Aw*ijL>z6BS40%i2n>(w&<&U#clwpiEjBgH2jc-BU(`%;y zK*-1T5F3EP4ll%*S+YnPCkT1#SziI{v@xLghHaG@)Hm!l4i7y#xRGVs#jXF{QEq^h zn+ig%aFN=nFkb(np+12dRH3IX?$JD|UZ-=h$24Q~N~fAbY=$JXIM8%r=owM~0yRn+ zNLc2}#&jvCJ?5Ewzg0N6t4+R|s#^biRb=%=<}?L70l7im_HOzvYe~iU0xT=GP4ac~ z&EL-gco<;Bm970b^m3WP#?=){YoF>-6a{Ue*!3fdALPx>EnCF9H7cyQ(q)-SI538D zFP`k+5Hi@EwZ(r^K_dAs79j2CKYOnQZr`_mOsHyY3wlD2L2d(?rWBq$t^RtWU*aT^u}?ajpd8OdL=@2M}n}(=rDsN z4;V5zaDkrtMcq5FWc(O8I>p$e}!*aG2{n#e0B*kwE2jpQ@cbRUX9zV zTCyeR1}pij(##(Zu;gbJ_3+@?h>BLqO$d*YYf6o>E)XJr5FyMzjj^WD_#D@Ui3YQu zvN}3G#~41*hp!sMrhjQ8YG7yO+PCP)2%gDIrr^c?XrnW6E$=AZpZ(ia0L@EeZKw$r zm?1d$Wp-novRd^BevL|twe6?F~8y8_P=DXe3(d{l5FR53Li{k890_&`r z7JB)?s%rcedlt=XS?-LA)bWQGBe|5Xfo6+uhJy)gYh-F#JGxoxcG}xKjkk&a z@)jiSi{w97l>-g|oyr*;x78Z%FXmuvs-&+p^RY+ya4~472~1D=b>^xCGK5+)BAt4R zBdjk@eO`*n3c!Fv63ey;-){o2p=i!4$i+#LM?gj3*DqEW`zL>?Y<5c7uCJ z*RIeUgz=7_*X70oQh_iYg#n06$?k)r=Y?O|Y`z(pzoYF7E@#OCb3ADeVw|k$e0*@< z>E_$ja5)S;w3jY8mcE>o4DTycLQC(@N9OSWEK0abaEZ2$DibtQEUMBG`_C)vp{iR(0TY@7;kR8ks8y#^)$FAr( z7agC7_m+5W`?kA{?TdBoyN-Bdgt^{<4 zF8C+&gxOs%B{?h9(Awt?o4Vb(^nwD1@>r5uRr8T8=%HE*RLasP={C&ENwWG27ARZ2 z=E+^*G9&*X1t>A8?EPd$pb{Rv8Ss=lsWNt0G`{=$V&lAj=U%qb7vIk$&ftoyEVQbzY9%~w1Z!$LzL+g1jw4KIXWgfsE^F6>y$gYH0Xy__+0{1mJ` z$toVj?o?&D8$E97M8Y)AjRd!1B`>+)bs_dyzyap-K@LeK z50BB^+srdu&iNS>R~tA*Z}mzkw@C-%-v&3iB;&tI0eOpEKESpXcc|UOZ+i8~6~b2D zH=weNov@D?5Ks+?Ubufi26OY|Cc0~JyVhG|a31}$v`r4m`{{QUv1%S?$Z zP00ETxR*M(Y47ARlFtpE5?&h4KSiY6L+H9341aXem1h;Jyxwpv`9)(uc@#48VC~pM zW`2R$_Xd(Rk!O7AW70vR_9LDL%tC5T0z6$*`tnHWHp#a7_$jiz$H5@edFkb_1#z)Y zsoROv^plaIFI=XMj&@ZB?@&K+;NXYouOxE(V^u$eW4OAz1|_RgoV?oDh(UaWS3Br> zO`14uDkx4bDG)>F0-jfulUe32P<9(vVEDHHd1q}#7MPRCysd@w%3S~}6} z?O}Lu-^LO#S(NJ0 zvmQ9Op}iZOAfLMq8)KdAB;+&LmMKfu=U$+flRz)-pu8)3g-Pcj(=<(}C(B+7X`p2X zq5zDT;tMPlgS9x}(ZN}jRBqwc#;w@p75$xQ3xjS`I+;29ragxlW$WBR*O$M9%D(gq zaH3)ocYDm*8Rn)i@~z9Iy{BgPo(nzq>y!MHMmKNGWro}=A2;O^nr+GkJ*dLjLoq@g z-Fzz9)%uufp4_GJ)Nhn+{F@ zjtk^8FI4%CbZEbQVxIQ)UZIS_L+{3n@WittgUjYzB%6lqAy(?lFZiRNmH3cQM}=m^ z(A}q{Pm7d<+*HLa%QV|q?2hgUS&LmbYUOf=1!(nY8l3dgXbI|k=#;h$+JU{_xgQL9 z{J2lz;3a<*ORFvFG8qc*hJg_BdsW1$kV8{9?y8CtM%Y};4OWXx zRG{#YH~IGB#>6YDAZrZ>#t ziYVJ~r&;=T`Wgrbe_V`QgmjbwSO(goi}*@v;Oq}?#Ys!Tv1_;3iJ=1OgZyu0V2X-7F_uKnp)NpBDP+wLx>T31dc zjO5YXAQtz$U)_BA`EnG&nL_NhoHIwI201$mi#FW(6+)fND(j`Prcd(ZO#82a$l@v7 zp?zqoSwvQvZXI;g^TI>H&^_D?xc2T=965Ik5_u|DJ3M5Di6fMPzdvSLQ9*P%- zVnvI)yHniV-QDfvIrDzsoOgD9W|BRV?0c_E*0rX3wARA!qzCU_NMOkU^~m~PSCgSE zH1Pjx`I|7ndy2}0_zW6RY3(o4=Aqo1-X@Y5O6s>kV$79ju1#gQt=gT^wziH?d@ZK@ zqwOr!fn0qdBHl=`8&HT*X01KRsQMpbHN_3{n9SCA=0(=|`8e*E$1#Z{V`I*~4h zdr@GNRx*~$royLvhZ$B}=%>is9udKwsrMd97qr((lT=^;G*7){Nc2JNT4fmi2B&HZ zgZ6(uI!1;b!stBiv$Vvg`@S@*P+dZ{qX#G_hwv&!V{MVmvQ52By&F7^A%TMmJ3f$c z#6c~bhD}<8S{!f{wG}fZ>&R?w#%e|kgVl#?C;#&bf%E+hA9sRBZkoB`FKyO#RQdzS zLM*x0J;wOBr8m#7ET~=t{wF`pL^}$N$9oMcjaGZlT3FHrkQJAKk9rZ!_Y`gtt5NX+ zD-2%)`_|Kk@x@WU05Iih8So{uG)=6tpS&ZUEY7wg@ECj69x>tJfM@3_NnhLNqN}xC=UQ#^$J@ z9%1%0;MB83`AaG)0F-_!`2kuoPa5~U!?-*4BdmAMbq+*6PYQT>yZ_rH-CnnaBy=OlsZPYu@} zyX83b@;|Df(QE=_L7LJPc#v-)NKfN)GyzX47U1#otJ#|e7p%*lL{ZBlg z;S|F`#$Y#DnWU9ah1i(AvGvXs?Zo`>^XNc8mB|>Auf=3lywX{S#C@@S3j;AL!Y_yO zFB7Ob!KzUaP-rD$C5$(TFbVr^s3})F&sTz+*^fLox&gNh7FK1-e zUnuo!vJvZvvKF0h#LP79oL>GWsBb!dZkPCKotw&>_??%jvPTZ>3cOfT?cwrs{ueGqrwc_Ok8wsYTJ+wIhzPK7KK}4qSX=p z+njh)|1aDv+S`kb!(RbmV@nBem3>m>Yd4jj8k|g@-N3P1#3IIGBS27mMfS;>BOHok{U@EwjkDOhQ153@h_FDxp-G?MU*B?_Gaze+OjedqJ zMWt239(pp-8plL#_Oj!eMD|W@q1Qq_oii&)8DJj-oU^qr86!&T>ZYTqh?BW;I1_fp z>w_mrz?;M<9d2<$2Z6jQPK`3WhJRnm+!2}+JKLjQjMvYoi3EKSW8l>-q=*1ruEn-m zZ9Hzx9oC3nOMV45;Ni(E;tiw-lU4bF{VO;Tb{I`*M~oMtik{OWL>xM7V<(?hCVvn? zJ-@h_8Gp6@Q`TJg`+u;+e~6;Pix!^G%OYYJIf#W=rw`Q)(;hCPM~0-{u+#K-!(YNQ zK*9TxP4|zWSGaPKi35|`C2eCiqf)wmT)}APUmo*bcioJ*1Oh0+`5=RViY4vRM8lF z&yE>-`l%U^ElDz)ZxCP_&j7R1nU|`y!a({ug>t$<+s^pYEp^V_o-Ho>wxp7bx5)lU z4=<>tMTCYA!LSvGN){+hC@1}SDvC>ELE)7nBeW|`!0vqhhsdI4#2X;x?FHE-sAy)W z*oA1h$%UxNnYinkkji{?Ka-Da*Po~grFf)fFx;@e&c|$PnPi7i;APp)-u!q-X?inA zyVEV(YW*t=PWU<$`ervymFlC&q=iKNMRY%3=Nisz8@68E9ZbXj?)HNrdQgt2tYeWN zZFrNu{2@}FU2s)L=fXF1h){z_!-UbrpLfBVIxiX}sLdPR%?@dh85uLhqq)j=;#f)$fW$u1IpH{)>*)UYyRA%&$#j$yGV0bzN~ai zwkF{Vo7eu|!99GY^LoSE(ui2g751&Q4*TN1v6EX=4DbH<1aj-=f8~Q{GS2XnEhI{; z>-0nFzFy`SRhn`6ZEz>o|IjMA_ynMVH~-S{1JuX=cD)hi0cs0hwoV^1qHGV3|GXHK z2sa1c10SAzl6bV~BM#SU-aH(w47Z3&+c9k2HB`i3rz`RQ@$hcNjc5H+cL^I|NrlSAJ$)VX_Y(MAc=3b~3;o=)u=|+6M zB0|sr(1Y9GNW0E`6*;Df!K~d*4*8n9GV*Fl(=K6=4>#^W?*2!swDrRyc=mgu!1e7P z&s+H`2t-)Ltas5GWP3j@etpi8NT`ZoDCKBqY_j+n{WB_{_zk(!UsQU%Q}oN7#e3Ay z7>FXm$P#Y=4*Gkas=;x&XewzXX$BUfB1KE7$!GwO@#TwwCQ)>5DUb1cT;LNaBWNXk z&`7FnowAGYe~|_Oqd)4m5PYOgx#>E3UGt<9h`%)Z;AEpu9#|v(m~fjUTwhuPW`7)cFUblU$L2ZpWkBy5L(S{$ zl(xcc&d{{w+;_uOWDZ;z8GV1+-n00Mt*b5Wy6pXje~N(}Zt=druT3F|25b9`n&Xg0 ztizh_H^`SgHj?8D*ip(xKz}+@rZ6>Ep$4O+cL`p&u_w4(a-40qeBpk&WGcuz$h+sM z@l$?gE*Rw`!C@vWI=mLzpC2LrJ%!fk>QAxw&+&A+lmo$hY{1sRFb!}xQ;JpE zW{W(QAsRZUV%|ApC15SbWW7KaT(7MwWeSvef zg3kO+rr^qYsSP6}lJXAplSX`L*aglQ8rl8K*xE^AwASX6kYz(M-7bJhInKCx#A(}Q zLJW40ZAp3p5eb?YfB_)69(-~xyY4hCte>z=gcKun z!d2?0wYPlh|1xkCWI{lISs8Wm5OgfpUq^Sbr=AyrfT~+(&ljL+H2@OI^Yk-nM{z4eBl14E1Wi4k7(tMLN%dWH zpKEYIy@Z1`p0EY4*O%yY2*1ETf;-&0M!@xP1^F>)FlLMLon9I}j5dMeqgFprn)|OX zJJ$8o-)lJHErB-$B@(+HGjEKRK8eZCdy1MXQNFjmOcdj4c@waHUy1ouE)#_1p8{s% zwRYYi-N}-@xnzx)hxTK;jbgiXciCKi5*?hTed&Ru>0gc~DAPE%C%m5X8#Xa$6aQ}{ z8sZHHFNR3)*AU>Dqq8^@~GmRv%&s=O3-Onq{Y+pm2!+#{QccHoRQlzQnys3!Zx3x z?~GFr_Bv*z9W6c~u=!vt;p<-`4?+8LPM{Y{E4*=eg#60x54T?Qo$bp*KFiaJPUY7) z?(+xpNw2~2@vp?-PW}p;5v5|u8Ce2N(c5EDIKPwCql%C8brHnpmF?)jG_F&~{Ed}{ z*vtoXc;)cfwT|6VVSMelIAfrnT^V4Uuq20S!%!~jM9o-IKT0?|i~g_#Ulvt5 zu>7M(a?|5wEo@lQFA&X5&}X1&X!c~$TG)+Vt08Q*Mqa$>XQC+vju48aTdf2)iGM?j zf-?J_H)P9bMTeTgb~HyhAXW z%1vCko#nXS)kSTKg40GRC;Om22lLv!r==c2tCdp+wsgBiXjAjc%g|ZB^LBJ zW(^9WhH40mLO<8LgyzdDUOOFh5ok6W#c7H7H02&hp|aMl5eQ>58==K~VRZ&sv~~zf z%9@~hBjcRPLX5PiH@p7g4Wx<^WN~9j_=tKGYN%!49jkjH{jUqvlMi1qpyEv zkI3m{BN3gSLETUNql!$KzK{s1PL9KX$%iQ3K8|z zIx%GB_>s>ZtwC&$pCjbBID#pGo?`p{cM3QI)?v1j45LBcGw$u!`%JDm-kVGe)sMtG)lNfHG3+*^>irF`$HMl2N9?S)kxe(D z)j~s2VtyGz#N5bN(|>c&JI@}sW(qAm$Fyi~y}sU4Xv7Ew;$oErC)M62?BoST7D~!YWpflCH^QzT$Sw)m69t6`~0az6=%Y|eYM4xugv3L_a0Vi2RxwX;}VeU zB*x#z5R71d#BaS@v6-)w0(Y`lWAgBsRw!J*?>GOB&&{L-b)5>gcHIeNmu+b&p1tzZ zrF;tB2lW^k*etR}g!;4PTFu(*z(;s4gt%-kzeCJP0ljdv62NkcC*@>;9Am#`<0B60 z{<5xXc(_}VSSF>e;k8VUK(hRIlWS9Viqj41rP!@5aWz@hg#p#=y2W%Xtui+%v?%G9RyR=f;UGd=K)ZgI~S_r1AIsCJBsNE z@=qQZ;_>j3BiDrG1v%SxDKxI&f8#oKADg^0GFM$ypROMq>#Hx%z=il#kv@M&{OT(> znvpQ&YPOU_%hTDYq<7aCDoD0RyWuO{3>TggF1@o~N74mQazB49X+3EsaVA?ZJV1hM z_50;nnr2&$pkLk#R_qmS;I2oJXEt8#1kJAP!&j!IPlBXx?UuLgBxb3*LQZ`EAugRq zyjnig!S!Uz?M@haK_YQG9PGb2{HJ{_Xsn`ifEORoB~IG|3X|(g#)(aS(v!1Z^vWK+ zGi?)cd|ncMWA-&-ZFd?W-d9gQWFQfxzCB3*LVZaF1I4Kn;bBujqfr{go*w9s`oUly z%}YPhTg4&`O?N@W0~FoX-s@VuS3YntLZ&L~XXTB@+O+efO&9mP2qGWRn}g2{byUIL z@q6`s`g6fupTP#ka)wZ~E_`*rai)Hu=q%-Yj>^O`--(j4+h^~u)Kt^2HKg&K(a%Cv zrojS+8}?_fKJyJ8jMnp2EBPgde(D&he1CCLxJ>~#ZUPNnXYjw9?a>n7>X0d4?*{Tk zremQriKk<^6FRuWEQ2Y_VJdG${r<_|FjL=ihn(8a|8=JwAy$GW%byyw_ffK`jwf)) zOAew8MuK=twfRCbRz6x3RoD-5aC)7-kJR@T7QAOri_rf)XIHKp-P@W?w0qy zsqylpTI%FcZ1^t9cJ1l6er5&$O2Kevz9=tr(Su4?vDFuHVH5sLOik#2Xfod*6TKA> z5qO!zE!XsrClf|SH71Gd{oa!>|=Y-Hip(Fj>f*&*a>-t%)fX=b+o15L+lKFKX$$Om_l6) zF)MAEBs63)`-zrCbf{sF+kM)zVZ*0Oeau=*R`T!W3h%@jSOlAKba?WfMCfo%&wAJA z@>ss%ZapZ)&+!5#X9T`n5d{i7welw z&O|^ivFKAV%v;YwJUcTNXu2QZI^8-ayX&sz%=v*HqG-x`3>)=$t z7O25d>CLjUGm=RLD$~SMXVb>1X$Do^L>J(ZED#W~%c zoKbrJA3edlUs*8Hm*G(%LYo|M^MP^VkYf~hv}^fhMz^H#sj~k!-xed2c2h3HW+i?> z)=|9=pYO#9^k;tL0JafS!GJ_XSdnr?l91<$kk!8tHDV@hl0FQ{75&OgzEaooAgAMb z0nB5co@ZYTz`opHt)<-_IkHP8qgXmVWf1j~eowX&o6~oe;rnpnCg^ckVn{i9JDfr* z%==cw)zX2Fp7c_O-qjV6sqkAjIyco1c0WG)W;GI}(S+|6E*VzDN36|qiF7tfLbuMY=%S`|h+q&ckkd26vR zO9UbFC(Q;-+&$>{?RYUR5JY05?(W=?4;M=ac{OZgK%n+t1aI|ruBy$gJ8s6b&H-#W zxRw_bY#NqQghKB*BlBkww8dJR!K!Sx`M*Mi9ElO-`k-Psqk7v=jv2_j<|ISNx|srL z_a+jLmKvpqH|hh1*6dn0@?EI-hxHu0CTj=VfV5(k8k;M=_~$~vbCVH0xsa>Ks7F{( zAKv~{F`0J(LeC@3fbdU6XV(gc?FM`M8NAP~%%XXBSFrzovt$bjpI2rOW+eeGp5ZRX zs&LrFukAg#1k#;(LEnpGB#~~gbhHyf7a@llOivi;h<~{Zwp!nfT*BrEo%8Cx{d^f^ zpq_PFpjJ(JF0!3x_(0|N4Iw1@G+QoPFjK@M3bV3~!si!!G#Ie4d;&=b(&~|4}%< zy%%Dw`o{7b2gIs}icUC;^4Gg%gOJj~?_$$b(Cv}R{rNscO=zOBM99f>q9%5mk`i}t z(!~`AO=h7S##y-!vW;vhKTfuR_WPUhQJ)h)ftiReX(yYBN z7e=GLTyV3srMp~;*dFiC+9^nxIHGSluU{EievetMdSVRYPG}WN>xCqb)$WaZ^ELM< zlH?YzEvV({BE)W|q{kp8FS|2jX;W9;Ek3O@PKlT?AY;#IxmbIadv=kBv%f13=g-47 z0d13>ICqT`9TG&X*3R$(1Ic4#jF}iZjn1Qhp6NowW{N!UA$s0V3-LSt`|Bhlhh^pKW_{d*&K`H7z47c(4TGyXWa1;Z3;T@f0 z9@!Heo#P!DOoKedlM#<%7F0upHA6!+LwkNoDIP8Y=TL5_;qx(Mg>{TtipLF1l(y{&gFSi$-*$>p#xN+?Bs@J@v{5w>jiR4@0xH`rcSU#gOC6Djh0TI6nes~h zT!?Cwj*)d&?~U<2erAuJ3tuAqm$W4;w2TQ9Mu)FF4EB#3+FKYp*>T+9kM3CU6ebCB zqF4_uIeFY&IK+~JY~li!)?iinWCx~5B3E|dHNKg_4R;I9P^l(VJ zG9noDHpC#6Hp?8Oggo3gXn};5)mu3>Es|IU#VJvtr4YG_?eTXvvfpYLAF$M(9t9M> z>ociWp?(0<>!MO#C{RUE82(K9fY~ee_4TI~jKEqas~_(c+OmVuS?chWM{o0JS9{Nv zny=AdtdRFqveFqJ_|(hjLgqpH*#mNGQ7sRSl4tPn>u2TOBI=GYWqR${fsuZvkR3Vb z;~KvZujMzg?{yZ|$RBq+^xMb*bLFc+V@GORdc%oum6D~2Q+ZxifHd{Gfruym52V<~ zqUdpq!{%8SIk)97X`;k-DxWokLObkau&Tw#%CA%6VSP#Ogc|3c3POQJYCn1GJ?=|= z-jTu0SsLcjLg8S+_;K+gj7hi3(A=qZyTiRdkhY|4E8*Wyaz@S9ynPxgdIvXEz#4p_ zeJ6J08hIg@|7d1xcULAUZ+rM$2x42}<%%O3(>xq9I36lm4qqT&erCT^os-;;|&}a9bM> zPq$Z`OR>QG+Ri0yPc)7l>X%8xv#tIy!>gtfd*ORZ&o;R?WlMP_Eo$OKRw5!d{(7F> zIOl<#@F?RJ>bg1W6mWi*i80!{w0E5q(}ij*NgLtG!zDgRu=b58isnI^+PyypxCclB zIY-&|j!E-|vGj!SmBSTQhit|#rBq61Y0b{7#F}_wPV!u{aOIv^#gv$GB^9<>D)5zE zMUf!AW+Zm|P2`*Kn+F@E`wr!`O;7q@o#W2deuJ@5$$eLAA^Lys0@2mLQtCs~j#`Hp zP5RHZHdMB*p3v3Ar6e9V{vUy491RZ+)AGyH_-Mfnty%@*F6WA-WmC4uEB*uxWi;bp zJB>zJQ+wJ^LL!!WEKZYt!ga@i?Kqirn$f=-s}B!311Y|!-giH1hLWluo}hsj4>SPEL_IdP#{nV+g% za=+2pY^HKqaPiu$0IST#=upN*;}?5(tOnTw$!;xYN?8lEua+1ZybnA}4sxIOiTW9r z7Udqfke96iJ?dp&vHN_a$AEBvppykVL7j$c4yP%lCBl&`<52#pW*0Gm2Ha0$?mttw zOv&S!)Z;x`#1r$CjTyQ+bNq&IrEtjL_=?nm40HOrR}pKXDpx*nP2%FK@Pg=MzG_;? zh3EMaz_zbD@Oj|#B#{KR1a{&akAxERfo3=K9k0WEy~K2pxxpRC>-vk= zk2CcY8rZ{P#q*kHI8)~PF71#Zig}jtoL0a}?z~4Wef~ zjpT7m>{@lnW~#oCmHxLvC1>7t#wp?PhElKpBzktQt4HQtF}okcD>5xh+5#^yj^5=E zkNUP^YKSpp-cS;mQ|#FMhE%wuB}D~hwa~R3Sb| zEt+KTG12#V(kyUbzn<_(pv?1A1jfX;rkZC1Ls)s!(Bw0_Obhj$D8KM~e{392J zU(I!5FDy@XAKU>zADM&*5Sux4XT!7EHzyT1o)?@i;zM35e7{RUJZ{Bm9If_rRE~mu zgh0Sz1+Vfbn79o1L#M7xyA@titYQ{nz^X5`;n^-+e`+Fw!11cgS~ zph&p82Z2zbrq%~%D!i{&>b4=%1HNh)3s#ZC@nKhncZxfR?bVNyh1MG=Fa=PN0om#! z?9?=qlNdE-UpKx~I~C~iAB-CS_^5$rc^C7g+ ztQViUE&&Zssdc zhSI&LFF-PCSUnIziHRL<A{mM0YH1B^^W;a*e5RZIM}9p z9am%;!V!#N7N_@QqU(ISrgWe54EMP!2&FuZx;;D&MH<#ZCFA)B?!1VEll-uM;E&6X z20%i$q~+_w)xIfOZv?Hm%I<$WiX zhW#qmksDr&!D}@G2VzQ|vwW_0N9gS16UN-43;DRsv5%uq1C3Afo?U2q2MaZi2XmS!vpW@yqp6M zBcUWqQc%@FZWyUr9+tY-9Tni?Er{l*TRo5A9EW*t8Hz1x~N z;j2gb_wjr+r-6WV8c(A0{RtKI)q6A*h@xQC>}XKx*eFv{;Kr|RZ`GmDv1%BpzbVk1 zfqkNMBf=5f;p);DMJnJac3h7NtyRT7c#!m}+x#2MoN}6-dF!vcar_`hxOB2}fVSp( z1R(gA&^gj`aK7>#6Shwr_E#qoLXH=L)0JjB&5Y=Jf$QD}`4h~p5rUz_QiBUi^>PHh zVvS%tDgs1(k&Tnrm(Yg4w|LKuzC6D{UY$mwQKvA$C-gn_>^I>-uvOylc#gI-)AwA| z+}^V#ll|?ft38g6=@h38u=iP(lK#^a>DMxA=3S}~M$n6(MN#kL`RmbKfv_$0ozca< zPyfpk&hX_-Qk?3FGIj%FJ1|eObP|T;{*c}|oc_k$1OF+UN z^x0hDWe^U?Qc7RC2~n)X6-)MjMU4qZ5|$qICu=!|OVTq$x#8z02I=U`AZD;q zXund1=5N|~P(lGw#DlHibdgqig7n>OT4$&I<+ep~Pyukc#bwCS^Iu>z#+vHtc)kLQ zE2-L69~QN~o*%l|*;CS6Rer*$aHIYDzcNLgjg>c(sLi)`^XsGaeBa+Vzo2jQa$<+mZO5%gd2(-J zYp;^)vnNaZk#Fcb|GLI$5VOVnTVQg~cT|k33hD!m^m#s=50D z93n^#bmX=I=92ctV8E&|)Jd#_1IB34(q$z<OKTYj&?vA6l+lx{!PfDmz=LMeu9f6grqY&--Zz7I=u_Nuk&xjtN6++LS=MQr| z*PcRE$@l*m5Y+?^Qr3^^hj*#MyS9#O)I1g1FWre1A(}iepMf4(%~(kW@&nFOwg5w3 z9=L-A8@jWTax8+1Z5v7LVrvqjsVjnBfQ7vBSc~%UgO<&H=k-mRz_IxBh}xpZaQ8v) z?KK)kqe`OP#3oe-5o&}68A9ho>}Ey=vwcAe;klxrik}W#Hity5alX|TpX@>*zK9qdb^zqU}fo*?Wvx_3_qYl7nj6Z3ed%WLC>JE1gnDrw=4 zg6-phX7q3KL*|j2Dai{_N^f&zYz+&a4$tP{D-(zIAoJ3w(8$h}oQWRf@E0~*s$ZqD z#b+9)yC*xgt?8KE&EDwarA?Pj!SlK+;BjB)2tnqP&4S%x_;Syf;*SyGKdnW1SRW{G z9Q|*v&I2BUhf)v*?CVz(CD=j)eteg6y6iYH0K6NO6r&kqOuQD+j2fFtv37HF+vmy( z{LElk#<=4xPh_sB2Z?k?@V8cd*d5H2>n?jaoU!VD-l-`HuVvx?)5Z}^mNZ%H&8DxI zS@U%kRzD9g8|!sLOysqk$S~F=Ckw-%HdDGlW`Al-l)%gRJJl9Ynl_ z2$sU0gf}vG5GyJ2@qB1=C$Sum8Ld`dGQ5|t+jOhO?2iWNBlO5()+Qq+u35=2Hp5jq z0fBO4rpOT;(Ngjn%KTiM4#TW4rIAZsuf?(GHm`@MK0buZa&6i{uyXq9Xa8PoL)yYO zArUBCWgU0V2Mbnxq&hpSlkYx*?~jGu-=*mI#?azFS&djGBzs|0wLEhg7Lo5l!P`bj@dt1#%6N=s@&@bgST()`lt8<@JceV_GpPBBCmL0$-E%;hn8G3;t?K74t7`Tt0m>Ft@Ab; z*sPUPAHJ>c`ijl`Y`jj9kPeEEcbw!JBdNRr{x4oybhjSijNDO=wj$Q6b=|B!ihvTk zdr5@96FRR&YH-89U-3;Ovx?|>d}eh33$JFHZ}Yq5P3c?xQ32oiinKn2Jm3!<8%)C= zI;SHnZI}h$IyRm9Aa`X;N@E>K9=zVQ2b>?_dH!Wi@eK^=JQYP@?Ta1~{5HoC-40&< zL~&mB(8K2g!&}l@_2g5+EVGVedtlMKASt=CMEy{Z`d|hR!6jex)rz)2dEkcx7KxKF zvM%^#IPs+(0xjXTXPQ7XpfK=uBP3@G2FYf*c1C`KkQR%Hjapy04TX)${l;sQxG^M6YC^8#Yk zJ^L58HtztguY~GlI>vD#AlC1IS5Va$6AMqfA+2=x_TUtkJ_J`1We}qIfk*lj$KdR= zl(f#;1sQPs+~SKb=omES^EsnC1UrZ(@UeRTx{Y7W@}r_2TEV5G$rv`u?J0y+-FXG( z!OW5_e?4D1D;}Oax}tT<5NTYWe26PqhPi$Qev5Kg>(rwcX_Wtaz6M9|)r;ZIzPgrb zca@}ox8n0CCsPLo>d)1}T`NqN_*r}1U-OibC|?&F(a?Rrilu4pyl=cc5P(C4TC4hj zR-JZ6k)GV_LgiB;#1rYJwip$;*SW@4iL{~*RQYlqkrp}oKDwS_v#9_2U3)_2?<1r+ zle9XY^vibowxIS_kmnTz3B4HaJt<7V8MxTF5&TA7Jv_M_gx10 z9%Fh7EI-p;y#3q`MNFUj-2 zgAtT-II?)_+UBd&9Z!7h@;=Q-wx`LJ}FI>YM4^9|`>Q{@T7 zu~psB_48MJsDAE_2W=R9ndp4NM;S2LeCWJcLpPkFR$_oOk|R(kE&z5gR?wCOi0x4w?E>n(}nk z7~>coDu1-ppF9UmF?8TEw{p05iIP93a?So zA}TrP0RA?lTL?%Pgb+unq)sni!e#g;Vs_nUl&Aq|1FIpW@Q0U&&@PBOMhYS_<~PVo zk6>YGD1U3y4{RuX$b51#)3QteOqq9^cuAmK&W)viDVk~>o71)I6Vo6dPBhtob$1g1 zdL6F_Qw(!rX%Zty&x=sH52r4!r3^GFXMO!yem8XLp4rx;J>xm<46>Wo*i7jOI&9Y* z`_6^uvmHrhcRlbeEO&YJkFt4fytckU;HqCA9K{i*9XvT-Elq@5BT5Q9bIeKALf@P& zNPi1CP}-ub1wJEPiFKQ~oCjvPR>MUrZM_^1hTC7xYq>vc62w2>OBViF68!Y&uyUsD zj<=wQ=>mmWC>tl-#aT~?Mqk745?I#b7RfB@^tJ1%8(FSBdk};a8DcVk^y`ng|LUt@ zgBtFLkJZves8KXqI1i*eg}NhrFe?eG`H*V*actpj+Re&dLZddM3(j`v(%$aXR*A2A zHBL6bA{leBQijl8odwY}#{w@Xrv$?PyU;k`Q(E73rgkOEK?3c_yJS*2xOY36<4{2u zpUz1H(l3lbD81k22k{Hm0gLHCCb@{J$NTs)=f8NI(L6oxHTk)SMXBpV5sXSn_^4ec z<4MNrld70w3Yr60o%L%E;ky_Mc+P3hv>Y&N2zzi^5rWQya4UGtLB(S&v9f5Aa;`uM#1$S@a#hDb;^5Y^*!v3u~B+{ zv19ewnR@_rEJs*%THa-B4jR5Z-P9094$|sMeKs#zisLcI7W6ulDY@K48cu13^REoV zT)JwPK-$>fXpOMQ?62>=RY)UQm694%_IP1Xs4$MW>E_~>CN&mmRo4GTBl4SQAJr{= z?67;!<*1w2#JMemdT#zv7#v{eb$yJX&2JZEqb<-!3a)$J#LM`n(dB>^q6xwaDoupp zxMKnm_=LmNK~Ozv{dpO&KW@Pj`#{XZXKM-q&!utwa#ODz~wy+};KxZ?X6(^#nk}nT$6|)g<3t5gJgIT-)HHnCwP^kClEK?~nN#4`W|3r>5(FRN@#s=XN@3J#WJN z0fyJ-%*TZe4CwC$m`tKr)gC1&gBJjVl+)%Vtl&$XR$pxPjGFZYnEtN%`2FX(EHWh60EP; zX8GZ$d#=2D{Z9Wahz&J^rq5XK70%?oC}P;$^XjSmXd@}l@89Y>uuT*1K#}n0+bMk! zk7rE6VQu(+>`Za>&4?HwE3-yLjM8gU{o|p~d4{IU`NU5*j& zPStw~=c5p39l!%_$A@|}gI*G&nakWrL`#9Z%9+x+r1->$(u2JD-3t~PNozF%r|az2 ze#5tn9FeBp=Ys}pgxtd+s#_n9X}N6}39I;ESZ3y{B6I3J01WSkI^~!Rsq9JjNBB)? zA@qOM@r+3WZxOlQi`{SsJzqpS+pQUt5Shon(lZBF*; z30MqITRzYJ88EaIv*txGGKNwlY8qW6^`-DK4ENXHyhhoG(5;+L3Qi{iV!YqwAh-M~ zr@T)66F*u1n=g^rbfqeP67nN*@05`k|Yg9FN!L<+I1y(HcG%l9)XXQn{GJ5^Zw-O}Xm^!!5GB z>CEltlq~Rw!|fIe;Kt%s*w3Q7Kd-4McrV^7_x1!ST0s51F7KR%4~ys4-X+a#e3%ty zYq!=!u}oV%|0vS=v2d$+VV)UlW`nT=QJ|$A`*k-QsAf`BuKR7S%o>f9Ts+f2fByLJ#T6m3u=|TVVfn%$c!sCWWxhLq|?@CgNG_ ziQ3(O`Ivr3L>PvLE6jb|mId0_-MhG2-x<li^I^fn`?@BDPh;? zb?w$Q*36hrhI7Ai1S-)ba7%m`g&qFCfCAtnWDMEiZTnHoOb&q1QzQ+2&vNjyVEoDH zt~pfT;SZ&1k*2??%z8}i5&z}zgzw$+)+8u5(!hmUtmOO0$3;yi7Vaq3T4}$(W1SZp z7`yVlIhNo6SW~GO1RW?x?;>^10&fs0PffYuySYt*`He0sN`G_fAMx(xCb zVu3LfYrp8HnO?gwirXM&rO10xzf)m*)P>H`^}E1#_9s%n<0UsJ#g%sBCL`aC#zAir+H8{=@+7>*oN)MCdsBsK@;vjh^?HIdtNN+9&E z#;iPI_`S6N$HhI|%qZ7%+7?B~@lRVG_LCnimSCB1d*vCDurAtQp(`RS_@2}PjlHr= zzr&WW2anNbdScy__Qjg*Dh@xN$L^>4p^g(;X(h2aN*`zC;Vb=zqxbh&keZ3KjxAFc zvC&I+-rHdd{2gzoz0toKq4WMsGUE9b$DK}0x zL!Q=J@8I}t7cqt37W;`w+ih2ge-bLA^88W8Q7uxJ{wzT|1Fn0Jdx)ms!2}iSh7=96ov+y7lNQ=c8Ph%ilg7-~#^s{#JB=dHzb-bmh2kt^*iryQvU; z?ySo2c2Gtp@Rp&aAYbaKy2;L8#t`I*)x7d zhS)YNKLS{$O0B%hv{p#)^?zkR?4~VSq3M^(J6w&+2`lDXP||)nUUr1t3qjhrvTf!lETh7^dL!&+EX_g>#W) zD-}ye0M+zaazZo`P{n3?_UOho$aHAmPOkrJJjwFsX)`oQSFc&e>Pwy@h7Cdx>i|tS zIc^3A!-kDpVB7W`P^m(B7&xG>hRwFY7foTu&fS`H>bz*p;^muO6Hct!%a<;a{fOmorPSg4F)e?zJ$?Z z#>2Yx>*0TeKEMrK9iTzIx|(#2o3(%~TeoTa$Z_M!MZ`m7mHnyTe;79bR<2%)?6diy zQNsrC4UT2n*q}DveirSc zDxCP6UOKrzv#;wyoAyoQY7*OMR`MS+zMmQE ztHcmH+?0tV?;j=%fMOpN(qw%9-b3ivZV>t@`SJW$t)Hy{yK?#PaLPy@hy12bAwcYY zx-^;C`@3Gln$W46vPkDCsU6)rj=+a+%7YszKi||o$d(q0p62;CY*LFYwAQWOZWvDO z{bTEVGxpvihfc!S(X`lB82WS@rHRk$V_RQhQdRvW2488X8ikP2 z_9gk4w^v7Q^VH7j*%C};*G8#ZE)z%hs070|+cJUXv zO8bU>-&1}NrU%W*KhUl^C7klklP`w`)FrXyZB`X4#c$TSJ`^mJPxjxv;~*T~cLLqo z{J6W?rxl<=)iQGYS(BHr9gY<4=H?EAzw0iC5rAgWcl-g)I6pbEh4Nw#CpZln{2`zX z`$Ohg7yPr0Ayz3te|5cQF})Y$n|jX7k9#-@3`_Z?N#h4=*Q@|{6;t`_x%1)o-=&ur z1a_S>dm1kqBJe&~zg}Y*wDc+NiXVzQo;h$@j;Okz9rIteXa@wgZHq^?e1jhnVl#0A z#J)x_A09i2FDD4<#h|bZ97{b%$e>rw z`buC}cVv`R8v@Z%IA@h*X)zwL&Qc!wFoh*nxu3>Wetm~`)-bcqM1AoLH!H<8Kc!tU z(4u`~$crZfa6_A^X8pw1f#&1M$JYmX4GQGZnZ5S+%_f#>CBDQos;n{JM!kh4bAE>h z_moWn>iT~>;2W7$lQMnrG87ih_!VA-su`W7{8Oaxfc_)8%JY)gNyiSOd90LAdFb`G zREL#Vhes5QMjPL_4!>a?q_LuoZ(N6|n5VYC^B2sEfY^qtH2V4X(sg7~-Dq-a%cuN3 z@VLx@qrR2n(;9p>_P1n|_(t+wK7Tdbz5P&5tFG_=J}dxDzN#yS-MMuimdvFOIokT> zK_=i<-zfd2z2_)rS29qClZHe$c`-wPSgmg0A@hUM!A;yCk50>?_m>*((i1{Jr2U||CpLd@RoFUf53=qrLE4(!4x|@7aABGK9 zKE6_^-;bN90>wJFd}&^hJh_p|LVba6`2wr zSHWLxdpifVV<0R7&5|MD>>LEsNvxIH`lz6upYFPQ(Vg!AxhULHT=Q2fpbn4KK&1>0dEz~?>cdcCgbJkMn zN6IfYK%EgF_LtvQ!%sicyMLv;+BIvy_^~79{I}q7@k3RoVCd^_HsC#IlBs@D zezDD8e`HYnW7Tp~8agR5654g>2ABRJ;E_^Z(IWqYMe}DM(T;K|R^3Kl!aW4)lKHS` zVOX|E0jW)%Hp`@USTO!b^~vj>2i77REjPq6d>k3Rc{umC<wpD3 z*yi$-1ls&9)z2utq66q1Fy+Gy$=5db%P0v5`|1V07s5@;_~Jf6?Co3DBOtaC)T&b- z_TwfXXV*gk#2!FEEWY=2P`hU{hS;{OBN!l-0Gc+))eI1eAV!|9`Ik*#>0)J!wQC;; zzY_zrF5i|dTfnN-s~ORkx;>8{-o{f~eOR5s@dVd=1&hMtNBC}8mEOD$JXbWCf^YVY94}sRx%6_>yu6szVf$a(6B*0IXru={P6VIGflfvd8dK~2rB-4 z^&c94wS3g|Cx6m!|6DlD^5s7Su5h z1W%}ZqN$gkpC8-_zNQ^xqsGYjsKKx!ul~3RrJ&rVsXrqq7MWu8 z`4+%9JuxmfS1`^rjH{0qA8zl|nOQX?hFIJ5yHs!6?*}42Ng!#;OT!RL!-F?N*r;QK zJd*5qAcj~v5fJ3@yOA$%SgC>{gz=n=Nd4|M&eGG zFFp&9;|Fy58X6&(Q*Q`lx*b_pjpWyNP)De$VhW?Q1fuM!Vu*FaT{!EuE|5Q7^ZWzb z_GgTy`Bu{;1S^dRFrH8(A*0F&FcIuF~k!4>{nlOWeZeweBSPFurI$b=u2zz>5qM>pJNgr zc9>oAG^!7;*ZX3(Rx!Zx4*q`V6j_eya6Ln8VwInsA(po*Hk$ykdv_htjB6SPmKb8y z?bkC5O5mVPvK%QNH#|CI(CF@{UkIQA>{yTqqi33xr_R?4nV$O&!3R*2S^XOxJC4VQ z^YJ+Z!MZ#@in4Rt0XTNxgdEP}InGuXVkLMDF~qvz^U%8iVtba$3_Z#!_Zi;azwf?+ zuh(4XPT4jB!`^%NH*DPAUXEk$MVnSFphJhYaxyjObuqHT=2fx7s=>0^vSx)}e_gD( zZ$#ozI*%Xy2Y!BOaM!PwoF}~qXog4eG;Pulx^(U+`&+tLv}hSDUH+@3V2wX#h<$SX z73y0A!Xis-G~+%*xEI|h&=BxTJ-!?_E|S)Iw{y)H=vG~l3CYC>*h>DE41rqJ09hW7 z17*3}h&@)%I!l1q<2M{5KQ$94)nNiMMn{B&zq3AYMWFa#)Lm-8_o`nvz-0t&S{qMI zTh@m{h4XW9z-ha ztT!jR_Cv8*EX!f39JLpPjC0eI(VCQ|5F{tVI88d4(fQZF~q9Nr*tXs z#p%FN%7zG9gU>?Xs6L*(^7TPB@!o@#qrIMoynyKw7QR!y>T$^#V)gY@yUXJykm{c* z&knRHZmWrjSRJo;>7r2oi%&_U%>1?e036wG@BL3=h;>07DfNfr7-A>R46%Ifa)wy7 z9M$7c_>1>gVmvtJek|lDRa!qeuY61SgG&ll7tRoCE|=Kn83!FhOoZG%^L(5uU*5ijbEh+w zSULe`HiBK1de~q70#jbpsKCxW;Wz^LxKZ-u^07aE?fi{B{h|HaNbhL+r5cG~>H#rw-6D zpdA+@3?DTXHg4XE`Ixs~UH{W(&S{QSSIV_6|G+>3#O0OCTd;5e+_-T=u6N$N#0(oC zhX>uf0rTfCki(U7%nqQPj}OYPQ^#*u7Bt5R&@T0ZbNlDWfgLhKEMqC{-p5p~?|ON7 zPGX4tbETYa%-9KRlLZe~Y!`6DW7v-(YcwZlc=h@fR7A#Ewfth6zZ1ry4nSr~Zl+A1 z1xuH$!1|cWKcGV!=+-%qM}-ZWw!nzd(&=YZufKL|fOH77Cv)reJ!sIlIlHDGUhnTl z4w9J>X(MmBO455798!^OI!f*EVVE_I@PA`7g5M+oQHW(RV58yZS1`aE4 z&MabxZOif_K&+P!0%C12w}LLq46B>)KG%c*u_n{}%f_&532nTP%%`>MB8%=}?zC#x zsa+G+uUjq0Y0;`3to>uX>{egQnD!F{21q)S^iS~;W!1q}ua6$t2PIU1*xY#$5NpAV z7(NJf)j&D4Ltq#9<=5Y3xB4RF=>zaY7F;q14<434qN)h$yy!cVr*}G4C=!DGzFY>x zQh%gQn^^)3Yhu~&@b}LR|6V*JCtS039kgsM$ximf3%}I9@E?LjxgkQD!bM9WLoI?> zb>`;ve;{ktEX+3ufvJUxl+eTox^_v!=vw&0l89vq(!}A8{d=}U*|HyVC)8`$7SO3Km5~O$L4yuA(+4%TL->8WpHm>?!q_ciMOv>_6 zF~r(6u0&)ZfkZ6_pgz)rp(eoOs8*vq9*RxSl4OBVm3FJC z^P+yB{!klBOs{kBIQY#Ql$QtUG<5B(zSNf^+~yKcv%m6Cy$({x(K5tJDM(Mo#XLtQ zZD*OdGQ{rLc?b^eJ#JPlV}@95G(8KSmMu?;n<-0r=!nNrb94RbO<1wS_yLH=AzNMk z-d%^`;GVzb;MM_6A-8`H*-aP!;gQ`tH}8cY1g<;Y(CaibxIzk@N3r zGH1;If#0^0vx$g|fC-~wpKDq1DEkgwmARs2v6iy4MR&>t<3+9d)!^fD#bvMUoA$wR z1fyCqoqDy!IW4`+r#LevEdu@hCo$9^KW;p-%n)ndI${#A%n+-@x3_Ji;As;GH%5I% zVfN%}&Ja7DXK!fhS#`s4$NR+5=Hsnhr{<6&S5`TXNn>V17y`iUGbY`LYkkQ$h9S0c zg>o?NJBdwzo+Dbc>kPLM_{vSSDiz@S5reps*ef(X`RN~t8SkHh)u()5tkn>V*8)zbx^eCwPcJVR z*smuvYEYLIM?V#+;*smGUP*3K`9^>`7QqzWty2dC=d@#9G#&|5s}5I>pRJqLLY7P! zS@11nO>5Bj>p0cl5D*s>1pfYXM7kCXD+A$F^0ZVxKA#*J?u_~9$oi@KC$8-;g>#hV z>2aK!>iJm!-*-+&Fl=UgS9<@!fADS3eh6L;V%6vK`^@nJ%uUR&vk?&M$oa+FV>G`H z>jAMn`pWv$l6>2={t9|`?<&D%ZFWDO){W`6(D17dsi8w`8*9hAfM@=@zEdVf$#H~(Urzr>WwH`o$D^%E07 zmG_T&d{7;a9#B}SsqR&hgJiQr-c9f#9FVpDvjpw&)u-t|K9Z1wecP&3{&Z85AG)dopK;4Gp=E?RCY! zXq+pqC_8#b9e?iaG9b1t9N2%@>E#bq0b*+-Al6asH3!6|wMm}ALH{BkRsy_9fY`5O z`O`b~7tjA=a}_;N4}?OquUf+H-TTpHy}yWgw`x`C-Wd`S0-3VrvRI#TWy`|8Jv*of z#5+8NtU2hrlJ)#~_>m)eKNU)YRV`M43ht{uu&-|5O2jFJHL=pVw^!ckbS`SU+3Fe;)r_2#8g_H4=cC`i;v& z-JX}vALE824{9##-m@3W|H81n`}gjGvL8zX22x+;!4#7G>BQsPH?L9_%pAk}L*;6< z@!Gh53M1jcsS`&`u>p4dwHxZzZ^GiIN$m#@RUqB#_}#DYhjfoI?yvrRd&Bo*M#*`3 zBOsPevE%W#Z`r6BW0|w&F>D{jFIToSjz0YhPy7f8s3m}a*owF& zl9x6Z#+FFJ7IMHbkaWjInzX(Mh%Kxh>o#A%8zA0%BV>>j61( zW`)@c$7ZS_Z@p z!!*)3tysAX3>>CB6jQ!&F|(#FgRPtQU{WdHRqN+~msd(nR>WqzV=FDDjq7XVzhOJ> zqNglXX2tS9;kV@*(8u@Nk0`ZN5NIZ#D2nHcDGeC>{&^b>nUrRhi%E()q#C z-=~-*uV1UZCZ4{Yvlsnfp)UtmOxMpMLo9)t<0gmqEnVdrW$|UrCvtxg*meJ3M<5s( zs@45!4v1YPhv&|l6Tz6-W_w5Ua#1h)jvZZpaNia2V}U z-;UgQR@@()dC^_m-Mt(s??n>yiB@5iE8Vk6VsJZ)Lvn^!D47 z3+BWA6GtpQ3c0xBY0R#WK5r^kw7q~>O0=SBKKMAhH%qVnvBVIYElX*rUu&s4D!eO0 zEM-=pKz>*-Pr0{IA6>f<1O@WtmCK^zJIr`aaE<}Cs3(c zIUXG(KckA%wPPQ zuEOf5LN;P(KQ-?5oxAWs%Z?Bk8KuUREmI0+O#EIA*K63!p?d~dAmIUFY_Tt>;!fHu z$|NfJN#oDyjE|W;<$l5ag8K#APqqVW2U61x=+sN`IYF81w2ZTK3^#804h%xFhSo_n zKlOOiu{~m((f-_(Ib{Dtnp1bp{-}i5gy_eHuX;QTeU&44l##l-k*)+wtfSKC7B;tENXh?N|jIvvIe~H{IfvI zveR>+(KUI$g_y_Ewtw%=jjAdwDa7_eg;;HRmrm_eg;;uSe@PJX%-jFVg4yav;n#28 z!exsas_nxKtk&AzrZa~4yR|r@u)rj_WEn- z(xr4{*iX9mf-|T8vpFA!_HP&GOpL?*I1ZEO|IYM}LL;Ep_)Ir@p}*n$@0C$p~FX^ zRl6=&)_nXDKJ(^I!&PgZ61!8{UR}-agEnOsDoVSHa|rP3(%)gk*JIT5f(7!!sz2!6 zkm5f0s12Mrbs7Wq_h&p_ZMc0qREsXAwE-K@2Go+(!?|;)4n2Lk%k?MM9~@3s39-T0 z7T4|vAI9Gj$Mx|3HTC$68?t6&ybPyC`k!l8LbYnB5R2-kTed1=H#O_0o}hID2lPQP zkp8e`3n|3DC3=cBUAyhf@lBpM4%)PCC5n?Gsc!%gVwIk#ty8;A=@-wR(hy=-L!%}i zs#z#~@1E^=jeSmyqYWZ5ju`cg5KS>2pUg-~prN6mB6#!WO&ImfcQQ%!W&4Q0K4SPV z5g&&uh>;Q~Ihig3CO?$Ed-pbtLhN-YQml-c(^!7Upnfo*e_xSK*LGk3>~`@#ux}?k z_v|ww?Tk%9`~tnQre~%5*Ak=2DvedCxvNrPx&~|A6{$ zS;Kvg<4~bXn@_zPvK;q{KWS1-@1{(ck2fP5M7g<($3jR*ur9;D zM^8iFFGks|_lrJBD8yR!+w{Zw(4w`Kn4#N&6M>(QC?eEfE%ln=Ao8r;Vz<3Ud~$?j zN5ZM~#kg$F@FC-2&u*M-(BN*w(_Xmo`*opttH!u2i@U7OAzvZHcGKMt*lXY0Z?w_n zGq!Usl4xztB`%pPRCz7<8QcFwKRlN;UeUGn;>9b_wf!KMa4^=N6~d${`f=MP0z*~# z)dcU4+ckxvp#@Fz5vevM?!vh%NWi_EWdAVs7e~TIh1hMIcEgtSyItOomK-Dqu_KWX zYZz~#NDj6kbuC(@%lC4Xa_~mIswsgCj{i%`iRcT|SK6HrG4jyTp$IO`UwVTvdt~g9o*eLa64s$SgO&C6kB4P^?{f2XkCRDyFmHpyK19S zg%mq<$N;Ee`Jz`>IR*~n#Nwx)f1yU#ulFYO?$uolQ~KC(=YN9?yYf3;dhLM*mnH@3mfd2HRuNQlKY+{ZR(@|P@&3b7wIkf#n#ocIrx z{JcWfPHlcKZ&ZjKg7;w-NB7{9zMpNA-&uU=I0t<7nXvCvwW<)S!He4AOuhZS`~*Dk zIzcWT$@-%P{bbWlExuHL!6Nyfbw}$9q0^_%!Qwf8sO6q{p)@pnub!xrepcg31b^?r z>xH46$%oN4(s`hrlJABJvDxrC>7w)V^Mz%LXG2gxfT-c*sWZ@~v-KJwt9`mmX_z*V zUMivD1gbZ^-J}J?pc1ap&6@m!`m(vtfUjWt?!Cq=cEds;7O&0XxZLS_jalgeE_~c( z--Z6dxN*Osam4+A`vLa@9tUg(Qo|0YWKC2rwceQErf~8`C4It;8@>&%;Rj3Pw&Bva zOP4LjEi(AhrEhx#9&XC~fj=POKSld0-Sv z&hfA3CGIHXm=LOP91I!=5>7dOi;A(-2}MF6ppW?&gU+JLaZRfCbPfU z(qWxQ_Rv=#ybm1lXi`3`QpSdr=>mxyF_GeWE+9@^sFwDG$z zps#hVxn0L@u>bGFN_ALYQsXxuz#mt;vl6cUSh)&@4Ex$rR%wqsJ|!{{z8NtT>b&u~ zny11mH8jEwi7;)_k5I8fIgu{aZ)N6weR{zAO=!#mJVf=(*J`|pTlVc;Qs+-pg2!hh z#w+mviVc{S;`&qCK2csJ#NsuX4p*K=Cw!?8`|#csWoib?iYc0O_WxYF3JI}Qq4pc~ zVC%MRDkqrc#?qoa1N!#^Ql#9nWh)Y5>!;fO#{V=9mCfypLZthuVMj@_FPy z!lzT}zWD?P!=hWxB3jZF{xnLB@xMhQi6?M^zXbH+dfwVpp3U$Mvfh zP~a_x$h&dlCa7B<6^Tvy^`eFIpix6*4xR|HIdbMz^L6dg8NMI$t-8PZZ@r7EuN%~K zv--~ccLEBc-6wbF&K)RFC{*Nc)$(JQI)y|g6!&@e9x!pzG`q*&>x=G4i2YJ#2Y)2Q z5@A~6a_ffceazc8NUg`2eqnaJvOTW&pY`mgBgM*t70R$Z{en;@6EHsQM?r2&9D8&3XbUgZcX-A=a)!tXc(_5kWVt-vQr$JHwjE zWXp^YI~kRqR^bb8azqRz^;)eHgxH=)h_&PFE5r^`OAtY{RokX&*wu|eeZNCBTO^@a zoEZ)(X;x-R#;4;oBgBpxHUW0-IFL+f+vIMaw}LuJkn^IEuyX6xZM^olA<-_Mt^$=1 ztBJMOK0%1p#n`QmSpTh_O|O{Z-9bw0I$x6=EOU3xmU& zF``zQ3LAC_u`bqaLx>d$vHHqv$U#Wz{*8#M8dtem1*rMPtC|?+H;)`V2FsWHX&Jms=#3CU! zk2* zgtb(1O+S7c%9Jgk)^_UTS@>lxz06ISGup0`Snsb6*4y{XZurrSr`mw;a9R6k&9e56 z*iN-DZrBiFS0Wj8*FkL--L?%ORw%?i(B;VpZ75j=39&7d$vA4Ugz2B9hUP z@9BBGM9L!q4Y@-{jzUXPh)n|TeFnAGz5|BZ&0n%a379);5(N0`NqSMyQE>6XWjKEF zB$fqe@V51RPUH7Kwh&_J8Amzq`*v>9D8&8=eFqTX!kXSyD8#m=s&M)!5fpNtHC@(2 z{^I#FbcEPC_1?u&DeGVD+joHC8kNYiqzXB0YKrSkX}e_k%sL+B@fm$mh8;idJII?m zH(a}Z6Fxyvcc5h#=Q;6-{tvj~Dyx+?BH7k|^t?ELK$evb0 zXCi)VZ5<~fgM&ZHn4$T8WgHM8R;!}=*%y6b^X6@8y38m258i76{d!Z|71z0I57@b9 z4+fGSzXc2Cht+>r-_^{Ty8wRpk>fz%Bj3}_KwWbm{u3Y5u-_dO^<~pPQ09X$ksQtJZ`w zXU?em3%h>@=mQpKokHeyoDjQb*A95eLR=+6Y~i9MP1{e$O`o} zCr?-jv5guNA$FCDyY~FvxOPz`#BLx$Y`wQl^DSOD9~w1M=HSD_Ng+0mns4BM{(`_Q z4itZ0EcY@TJ9bP>*X~cY^PeMsLm3o4Aou7|IONEs5Nvz&_zb=oHA2MGPxTtLVaLv0 zc8|MvuO2!=tY1Jd5@O@k{0|?v-p91(%b9**cD(YqUG=;7B2uiCcXjIYaTxpEDD^m9 ztv9vGWy?`WoHId)RnH!8Fain!dWBetio6fvumvUb=>APQ#wWw$ks%afQ@Ov`CB$NX z3_z0W!rvx{3y<8b*1vi4HneTtTSVDai2bmKQpD1~XD=8HIddrwNwThgj-G;kU*MOD zcE&^g0=Z%8>~Cat)gRF78#schwdBT58VV(GnO&{BdgVHFY~5dpG8tcvD8yENtvvJ_ z)Ky!p*xtNxmrYz!5mP5Egoh8TM6P5Bu_o;`!^ey$ym-NKm?xFmWTTR)uk6z$;JY8J z4-B#x5xx%WJp!|4`~sKGUlY*{o4f&SI#^Y@DAI-yi~Dm(Ay)24)4s72@m!Wn;#^W0 zO@0d6Rfugjh>~sc8_RPTF|srMjQE)nVh!gbqv4q>A=b(EYso=6g;)WW(%(!K1|=cZ zvcIRFF9nU>w@y-%%HoQpe>v4(W1?dqCN?(7^Oq-oPMpwt1!$bhC6i=RW8-3BEGpaz zg+>|a6dp)}rTlo(RYI&&3bDqW$wnytXO;57r~k_XKkm5!<96Rr6D9HW>W#gyVqI%B zo;KdDGYbmjeOVW`;?I_F_4~uBO%rc)nUzutPY(>JDJQ#uu?sC z?^!n}?-#C9ihXW?hqg|8Yn4`Tk!jYRHtCCf`O38mDWI4PjF%#7cY(_1h3)r9!No{5FJGsSvBl?}+`D z*=hWkD#S`suA#k@-&7&?-UFpdOMbhASd;qbeA*?%8uB}fFSXZHAyxulso$nv`4Na= z|5Tr;Lad=a8KnHOzgDSU0czBymr9CT@!MbcAq~BR(bswT;x(8(WwDY;Q(iqEfHP@8)&Ozza#VbnpGz>+hK40O*B; z*mo_2*r_w-!kh)x7hR?WG?6-mSZp6bq0BC7G9`--=1rmf;PQ;OiU9XF9#3pf)Zw7L z-ib&aA3Q#?;P{YaO+Z4d@`;i%5a_Oj!x}=YREU)p#0h&LF-}ad5@It7zMVU?g-#vX z$fL6BAHaE-HXUf5KI#53bLs>GDfSSSK^)VeKkZC{5No!76&O+q6a{ z+Y(4bN}PxU;*A@(s>fHOW>pybEs@d`rzOP7`m{v9HS4T|Seb5*5G&KQ_%BL=5Nnp- z(eW$^aS9oB8XipgoIgj}rxIc_>3Gzh-%_PZAQ^VFfed>#$@5{45Sx# zKi+Ea9$Z4zN8|Ch8t#C^_+m(mmxOqUkE`{iW_eRWtP|H`0@6x|y(Dm@A9`;?bEVRy zN7zb;eG>_>+tYmiwuIQ!Io?(+TfnrbB%GqSGG(7dRpIl=9#2$67_N5cE(#4D_9cuM zY2_i9;or1LV;D5BKa?$7M(#@Zq}-XmU=d{nFpRl&8x<7= zfBv;zlr!7Ev47YSVoQj!^kXbPbjUyxLaaIddv>8htd0;{R#)GU!2>~0hz-=G8~5+! zTP^8LRU+U=7k{!z;}YWyCoKqYK)#^b$l?UD&0 z*7kTwA+~X&hN2E4#AeTtTdnWo=FMQnbn6EJA2n|Yzx=vHO*fto3Tw~(wQJW4YETP0cKygE{iCLzLYReQ+J8t_c%@2tT^{2!Q=;E{iM-*ZU70w5f zW>_bYWcg-IK81&2R)VXHchpao5bIz?OIJ+=Z&U@-y5y8`wRz?C$nW~Vb1&#`;$u)* zt4G&iaN#^X$XcC4gjme(szPj9olENx@K#C?FeSvMq{4J4n=B#L$u`&!VqH{-&5ui& zx9!qQ-MhNPwHlMU8{R(N(C~x0P^xq>UBx>#?}JT$?a;+J9-!$Ybb?$%+wQpOfhw`? zkh6rMh;j1Y!;V*Hl9QBTwaTys@>PJh-v85!Mq)w_0b+(|Wo)>_?((YB*0)4aNH-nm~|l z>7rF|{OD;Jbjdf`E_#>#W&%?TYprI!ne69nC%NYK?dyTnRO9_^+LAvTjL#GXHU z8RpGUCfu~gFSqRyVr2%4?<~GlOnc3WqN!?JV7Hb{(ZYcE;E z3sP)*Tk#5Gp_$VRq}ctW6r1*L*Y>|u(yCC1ZA}WX_T9c+Ru#%MYl)mX%rtY0<{v`O z?$%X7mi@63hJHE9CU4D})lgZ>`YuBdV#U(-)~w0xQxIa?w-yD6Jn^C+$XUy4w-zs$ zsaJ@tOA4{6<6eKgHmqO&SL#%jinU~w@mLz4X~+MlA;W$qm0^j#((grqj5i z7VX|)P>AjNd6EsCH+L#L_slbDo_YAe>33tS?_z&nx&VrW78U72J+}IrYF=kH3Kc4V z3bEFYSwHL92Nhzs+pS_cs$7$LwsbF5d<~T@6J!uqMM~Oc9N4=RS7!1M`A?lbgDZfx zRLh!eXpb`%MK>x`s&1356WV6_aDiM*Obk?fr3Nlf-x9iY?jYjsph9NN+HZ^P)cncJ zHb4?$AKbD3;dE+ZX|JyJf2|b?v9)Ty30qLLHLV?yNr`|*3bC)_K2yt8uU-w-|G7r3 zZrGQ@;j7`+YoVH;*{D*bGHltTkWVSPRqM7of}$p?&F0fjJq7F5u7v#g^Q&nC1`dXA zz8x*X{~kT0s)W+`$eYicW1$vyabw?}T{?x>LPd((t!e0xK{^tvmrsD*ynA==&=F#b zmC$nsoTUetXN?s=4;E} zxPBRO=1|1hHf-1gb?VmFCac?v7tPlZVuM2zA~R)p`Q?hRbNd!G@95FrL*M=b)v(z{ zWcUMB4fhDH6i}|h|3tRDdGo-POXt-h-+c2O3>Y*-4I4Lz5Ib_jS8BA6Uy$yZB(?fj zO3tO)xK!A9>+Qy{5>?wXnhVaUh_~qou^FwA+@8mV01{%WCepiCdY8mLBgZ-#*s;y7<$DV!KLp8&BxpeUgbZIwO z&F_w1RTAmc6IIQ$ZY?U1O&B*<&1Y7>+>e?6%O4ZL&(C_pM|J-+b`JctcB>Ar{;PR!L4!3X4qA`NNs;y(@v|r%&CF^Y3^KN5Oh_w>FsEncz`-yIU zAtAvqXVLfCD#iBOZ?x6rGvd>{C7#P>w&!x_fN}8m{v$T+q4J7CtbUAL#BGZB_ODyb+cklrp#{acBpdbfv?WQNPviN09ASkJTUqUk+cxcnE$jEV-0lIxY3UTj zZjwUm*pbt0`dQK9g`s(yM(RF~p<3RuMXPoByuCejTar|Ns^72{yjcEOwJq1K-G-SH=Ih!)%ie#av7N_JA$IA4)pqNx_WJ)& z0o!{0U$<&2E^BwtZvNJt5syMvJSLz*?7h2Ty8MpVZ|!kbtz~G#%B`?}_d(rpY4f+D zbE0X(_|dcA!Tm5@{sM*aK%34Vs#x8;i7LF~e%7^9d;WZU6tbu+pnPC4$0onC_)@$3 ze`USqDTLU`DXO>cm)*bvmBh&rS%2ifpKR*W;-x*#_glWB`>5l)uO>n5dR4JsDKD|8 zzOM&QKrHDa09X2VBDRUlqcT1eh1hrrt3&?O@uT6Hr<93eqSp=_G72_t-<53H*HOu7 z*nnOlUu0Ayy!&2jcoZJ4mMdDMAS{?QS%m3p{+jiyBw#gD5*rB*JP>Dh1sCPQjbTz> za0Ms(_laB|<7sdFay*Y4whwF{*bcBANXiaGxZM+XGKr6wtQlIg2uzv$Bjn4ITeRo; z&0Em1)2DDApZy=FlLWi~2yyxTEgUii#*X4xv4WKlwhu{lTC%LOq*zDuJKDCS@}(6a zcEXQi;MFS1vu{*%6#TDpEz@RXCPJ*~0cL>6tS}yvBg4w^Xg0Q<1VZc!&p!imXW`tK z#nG60<<;6c;^|kz2EaRS)stD^qn7RApMR46&{fT>*>Hvs8yFA(+qSGz-`C!M5QgtB z>uPF)WC^irR{jPB@+h@VL`0SHTP^x4Jk;h0TRtsj*4QT`X6GI%H&obTzKa5vCaLy=%*z`VC zsZEIWKoU3+5vAJeX(WR`_00209+s8}Nf~qI&Ib>0%sJ!2?%x3t3nTZ>KmWpuC>h_tCm(#h*zpqg?;<>i?r1I(Gpkh-n$2R@+$9b&Gz98@ljjvB_yh*CLxyER<=wT z_~)qgL!~jOz}=^BKeJ}*;?kQCOQhY05ALY_>h_&GkT1WY7^-WSCD5gFNBD8vST$+N z)alT*+ox(cHac9jw`RDi8F3>yLTqaFF=zUPwtZP;+e(Pd>$1H!+J6pbo_P^gh|MaN zAPXL+t17WoYrf(%T0f%_)CE+8N=uiVLM*)~UAlTIcv-$Glx;b4<^pu@GE4@=P2b|* zC#YfCpqKHjKJ1~33l0u~dB2X+#ZlnK^;^)PRX^QBC&j;B=QUG8?#}J|VbqsaRk4@w zMfjJadQuUcyKw{SZE0KYi{+kz!Na?YrAf5~h(y`7-5_lg4F9$_yzpXKT^t4aeepH? zbL5mwydx223bB)Z`WaTQ*yKoYHBZB)b)ZdqJvo&UuV1?fUE2>r!g8#dS8kNA_&oRr z_`o()g_Lf|iYfR-mY(o6WWZSXd*4x+Mfko*;{E=oK~T6z0bRU8i0!6}b5$WWEzhN{ zfgErNfus~-$vze>X8)lpsU6N|=I6(Y0vN9Q=H(A2j?hZUQX(eYwaJfD@OikU%R5(-hggM{oH1;9<3nq&CWz ze-XYP{f(L>GAbJ0tox1{c4k8oVsE2DtWbv~f*6uwiJ(Pq(C`_kyQQRg@Nc>VYnP;zoCvv>q9VspyK8ECL_3wvzLyY9i?W zW>NT0YF9qk&zoV{ySMMcAHS~CWo?Nc#uP4^UyZwV?H0@!KVJj>J26obojg`yR z!=e5E=(0MaG)e_Ct3s?V;WenUicM@>EPOLWe{{WNR9juQHjEZ`C{Q3li$j581%g{C z?(R@16u06Y+@UR2q{X$kyB2qM*C4^eo8I?%zVn=Q=Z%RVX@&J82Yb<5<*~3PZx#!z=bmGZGrl~N;|oNS zPL9mQV$A9XJjxzHr>DpGs?W4DqhE`KWou8yNx(Ykv#}a1Pfy8vNs{4l5>c~MNy|I1 zmGsV~3!j?r#AXk8yd&+~Y1uh7p+4>>_lkw;XNFTg-R+cxbP(Pz6@nRDOg*pb$I3I8 z#QUN4QcqHa-Z?F9Mt>Tnda<Lz&G`2}U8zsxp24@UdpM zS&_UrvTG_fn5Jl&%3s=Gl!fYn+)h>Bk;CARZoeD}RMYDV_8i(gK6@aGQ&vCVwMTd>%^=7MHZc78(d4wW-?`J3~3Eg*s)ZePa z-vArMGsRtf*Y|?Pwc<7nG{gq&uM&#;A8ueG=UrH1C1tP%X;z&|$EsJeoAQ_yvbaE% z69jfaIFdT}U(e!J@qi4$rq>sxDIRm5s z0OCS5>!w|Gt3JO2f*Mh3xncO|n@_WL=}%q_&E2&nY16)H@KZ@7oOi%4ty+Bcc*i4f z_Q}`ZsI~8?(*6OL*AR*Z1R4wcWln`YX}kljiGic)+xlryFL}O$+^HUP#E;D<2ce9l zd_rj&eG?ahGtc=2)?=C)lt`>SLp+0RFA=rC(j~pv4o!ttMwAjxUZb>1`0W|0V1}7q zdkS}Cg1pHlAVUGZ!UOPKF4|C&XdczpoBJc?O3N*yTHjcf9yQ;s*Eqctg#9(EV1QfcnU>yy1lhp?hmU97(Kk+72<>ID0C%163*OtaHv znscSRcjG@)?Nw)W8dYgh4}EtVC*Rn*0?oBF;@?Z5&WM`6WGPYuEv&ihVh6%8+W==d z;U`!+bER%@(jWfkIUqGfVsLG&T$^#;WJ+m2K{oMg(r2zgnag%AXQ-u8g55Ig;6op>?f?e z@P2KBHD`@KM>x(s0f%u*vj`pf$G$CQ%{qaWxkr0cgyFOAJL&5k02j+ossPlI;2@+8(|NkgR>vF1 z;5tK45Zv>*6W+HMXX7xnSO$hh1T5wHs=HIKnh;-*+ft=AlHJ@?r6C!UCS zL3|v5SA1Xa03aXw&0mU{=sy*hjR(}FJ{V9&lZ(8ZE&t8YsZPL%-@LJX6oQdtde(+) zYxq{TQsQ^?O9r3kC*^ZA(EJn@VOK(G#rshLn)<6L0mP8Ivy3Q0%&H*lsC*qQ@XXES zXa%EK+IMhNuNUz-THNE1MRc@+Vm7!EGDk~|=$q$V-d+&3^~T0~ZNK~eR?|m*^ zj2lEknm8B8E5+nR+d0+XZ%y(6p0(H(^vk$eO`*drB#!qzx*}~KKd*`J!1j!4l;Txa zqf7CZio97{!m)l?Ve&idI^2r?VR&+4a1yz%aS%es#KyM-f-tq~nav<$!vuLQwxaN< z2@JeOSD00wKVz(?j=Wo7a<|9jF&PqNl*8)cy=@_dfI6AHP%g6|%r>6wS*XI!N?MdWBi_ zZnCOsg*#*DM0>5cK;p^povmDzKOJ)&Ric??>*QRG|7$`sR%#~&{F`{<^x3CR@WPws zW!jMa1e_^TZ3zE>!*e;DLdPw7FB%PdWjkBQ@_uqzCGXeBM!U=2v>!QP29?#wSM=Vf z(4oapPV}nhOwG#)$P)hH3+$Pna6pAsW8!BC{7bMK#iP(@P5G?|q>>2d929{7F#;9{ zK9RY4&|6@8Fu?dK{4X3V_?(EZpy=BsOc*-wSwB{ip~qc{@E$~;Q!oWtz&C65Ew3y2 zm_Vy?;J=gO)cvCHgN`08+apE&XUUUyx43F`>c7lx(@=lA-Hl}l#huMHF>fn>13q;G zi0!Y{3uOp=ws|i`Ovy) z({9>AR+>N2-O}ntF3;LGpNr6s;(a{7fkJ~$3b|P?RoXN^&5E}Nb|?D~2c9mecCU}T z)1oybF4s_(FG(yB1QkCw?q!ADx%^@ca_az^ zLoQa%IP;@admXKVnj$Zsv-UaSmw%php>Hy*KZL-LUv$QaDg_jN^yyc8cI>4e;&k4GT!Q~d~5S4WtS4fU+b_9-WYGbA$8mtBKdg!*<8SNzmh}5<8;|^wsgJg1uMg>Jn#7O3@o!g0v$B*469#@@UkI%?cF>_+cv6gKHJq}@Vv zh><${xu4FEz1Q#dvcv6{>Ll~}6a%L})gD~O-?l&E9m%4*9krtRKh=t}JIwmA7?$Vil4Kfd?z*R4^J1>?L7(P8zg&?rV(s<%&V zCVncG>{pVD%q%(ON7yThh&RlY!r)yyUGe@g^l8Sm|LJqLhv^_E){O8~Rt1#8ErGb% za71>lL(V$xOtIUnH6W(xnQ!4x7JN?Y!2^% z2iVMRQAF=Kp+j*FkWh=hgH&o#(;D6z>l=9+@tA*s8ss2-r^eF)?|5M!-wC+Z2%t-p zQVJjQAbleV%$19D)4-V^kYFP)t*0g6E7LTs-RlO>Xr=ERSSxh#uidEUnI$Yw6=q$( zjw1Z18_?UXA0bIxz|^zYRymT%i1y*zWo^tMTM7*$P9$OVlVQCTMKn-o>2=Pd1J#Mm%1HfnXT~?O#qZLx)5J4@QFsNWH(lq$B_GiT6R4^x+j{(N-ynVGts^Y3 z=p5vcU3YGT4_1thi;N^FZjzF4o96v^^{(dVDre<5gmDEtz{zLXx77~|Ud@gp?o3#_ z4%;(gqDyjx1*740ijJ?H-sS#D#4ln0N+SuetI65v%$Nuvyo^1g~6&IL%CQRrV0#I|Dnyy@W`TsLCp@xP|?AITH7!>?SH+rLXCE8lNW@_kUpF@>FCp2Am@#Am(SJ@s_ zIwchgV#s_te6RKiP+p)0@^c6(pF&TT#v2Y~&GccVy4TGOzlPr2Ni|(xIfiHNafXod z?0!u8P~Yk#m}bT>EFob>yWC0^Iyd7fE++LVw*fF7nhNkv3D=%tsn*z@NN{S}x@Mum z{w!|P)R6olk+w^1rL4fa!%rhg5lc2{Bm2f7{4jB~>2Of#c)1!Fn`d^Msec2J%oJnN ztV2m=Q%8wRV~gxwTm*cVdVGaZ3i-~NpzrRfTtKLh0u*tjoUK^E-H4;@X}j1S#(?v^ z&?Ks@r%v=$!=JQCWf7%2YZ6{n6fTr&b@$;=NE{E9HTY1rIaY1ydB70P+Dpwn9`|(} zfoB^v9CS5-i&KrV3M=785DduqQH!SI?zEsOU6x-Pyz{?784a{{DCNAn4l(wmFY#0NuKkqHqYdf&VhQ{v~(;_>oxP6AH8h zs#-pF$xIMotb99oeEZ?KMnU%DS~;rX-+)bPf~GAdFmIAsY?6m;Q^!CiXis7cqMv7Z1!o+#$?gY{%P^T zw#Dk1Tx6x2m873;u_cPG)Y$@Gj~A+|_*c8{}mg<&`faVE1~&6{KyRn$dU$~J!A+ItFofcUo1&b_7%JPH<< z%Sg*264)^t=LCg@+@j8KuHeayrgL3d*8(EeQe!?*%gVVY#0+ToQFR)i& z_I2PM>7e_QQc5uPCH1m3GHV%~&L6!x8Z+V0JCFG=`$4`>O+nfu3rFg ztz$H^d42irQst+!Wuf{W&b9Ek)5AUL(6Rh|Z&Z@RF$TPa7ZwADUwJ31oi6)$sc4`v z@7!W^rEb%D;kRa0I%Lb%a3RQLew*TXDsH^dMb_}qlKy?RpzkYt4}8Bj=NaXk<3d|O zg}4+ifBEl;kz@#YyomgP)a>fY!SSQ%ytZWS2fAOh*{m#cMSWcRh=G^wRn_iDdS}MM zZwnX4pVh7BJKA&_mKMA;QJ*cw3_nl`5|}M;A<3XDOJHgK^nT?YEtV?w<73tKj9C(0 z&*!Cl#?+srj_nnh<=)iTk{9NeH=z23r+^$ohzzw(4^Y^V_+`%flvx5A_X%!&3A-b! zx)2=}?Xte%@!u7&7ZUo}ttTbg<&p(_P-8yj><8ZM!Sp2~h{YD13-QTYUxIBVepK(x zD+vi=?8Gv;B-7DohRUln-tS}q0N+LN>+co&n-(V1+C(OE?t23^-R%bq$;OscvG>04 zL8CvD#yS*svI9-&yan|Byap1O#q_M<)An%GF!Z#kGkQs|oluvJ{#4u&?6N^Ap1lVW zYA3Wr<}2pWd6QA*GJ?o2zlIi-qrFf|)CFm87Hd?X&lvYt%&>F(gld&6%Hr{x%PJ*l zmb4iF3}QNB3JaNZxcyUbK1+Rm%2(Fcj8N4NOC7}Z#t$40zg6?cv|2!c- z{_t+cUCcdnx3Hk+@B+MOgMs*Ziau&2Pzjk8k8~&N2`u2y(WDZai<}3}B3JKRqH_B4 zH@M%?0=io*J`Kr{fA_rwxE`C-YTh^z=5#in6)Ca>;;r%gQdR>lGn<~TSD0Y=UB53s zEUD;<&v~+!tD>DapTZ@1uaphS>#MW9X5dYbTxM?xXw9mwEhUiq(JXWicX{gj+&h_z0Tqo%;obNr#$cP38pr{ z?fvh|zlPEH+@<6d@6%perLyT@6vSrFSkepK*ktqZN$C$sVODN~sU2Jk62|X_mv{O; z65#fdpnzAOu5nK4Fl(ezja@Qovi zgaY$xTF35au)uf zE{MIN#m61W5JyVv1{CCJ>WBFYq8Dpxf;tvk+=Q@utbKozE*>l>$+yvj>S!Cvbk5H} zDTdv|b4wD4KmCz3l^>^@oRy-t|5?jJXr>S6Y%yEIl>~w_S^BMJ%!GTG^9nB^Xn4TD zz4!q-(aBUecRObrx={O@okmOFsM(&rr!6ot_1g->Q)Bczq1___hqfm%-IQiv()~*e z|6-b}_uCL1a_cchHP*5LUgh5zO|32YOhVqf^eOwFIkThBmi*C6jY^no)C_2Z;B>FG zKa7b-KWMr~5>1A-v)5D7SobH3XG*bd;-^d8&gdfAjMt2sJlYps8x`~SOpRlLEZdLP zZiZaT){5YULX$1lQ0abyuJ>D|#2=d_82{0^%hB|Ym0KStaZdSSu6 zz^Ym;rfzwxl_aXhRie@q=CypLw8L$e zqs^74W9@d6e`o=ua4bpUu=}jt!B_O-EGUvrJi~Uzwu`1`^-l`B{);;M6|+mHK(4bk z--k*EEseYp^s3~qlb}>le-M0TZBP?%k=kCCoxNs z2%q9+N0MjmTcqnrWH}z2vTMixz!^`e3U96bIE$fvAm#+JDN*jlZr!)}V3|0^rwUQ+!!{W3ogIqZVvD8+HZmGCbP=BU zolH&fHcnnBPQMZPfJ?!L{yvQ@hi-HTyJma-E0<{Q_wefyt#8l;M#CSfbW3UI@De;! zMiG>}FwWo4XxvGQpCN)md)g=2(@I9q*oT#(^Sa0O2anyT9T1Ns>fj+)7SaEFpn}Pt ztUCQg1cCwwOb>-#r%sW_Y#T(|^;yoFw$%OE)zE$c;``{x6wdc<(4JNdc7OSvg0`KF zH~{lP!#b8S))(FBcAZP7ikPwSN@e&^hek_UVqt$dkHl{P-wr&3*4<5K@`|ODPvU`d zrjMsuf~|J7(62<4Ho!kFLqe?-X3lM&1;@AH3|Z1Cna_n%k%B+WR77hBKk|U| zJ@;GLX_>g`)tNu_4|;Uydt~s*Q(@;#ifOnT(>Uavp#Sis{>Gd*RK$`mHht7HaM|w^ zQh44or8k==naAM<33aZTEDfKziJh>Fc9-G1+Tc;DrHI^ogrz1!YeU|}D|vNzyfjS9 zdWlH8bN9e}i7w=ECV4Htl(JRxAW~^vQq1y;sRwuB^--NXhFw=&9_of>S^(mGymwFG znaN4hb*25~yMfOHd^RB^gb@s9yr_vw*7c)Q=X4OhToi&;xr3>!2g?mgPs~B8?NQ?f zbcau%*Y(=F@^O2R!i-K`CSsMGM+FJrZ$1aaKPQ5@zkY^|mK~iWj+&|9Z|;@}Il>-M zZ*SJjD{tr4)?bz{?iA*EWYdM?h1ynVA}T8@r<=W1f^~ig$EL91Sq0-azZg88bUT|N z1>K+f7OIR!e)|vww-<5}RSH3kI&SCW&|)J_-7K~;GMY^(`*Kai)Yn!jd;gX%+@DwH zkK(XEnLp;C>&8BAa-o;I2p2rjdV zm4cfv86!{$xlL?wIMvl;PwPjZ>%JPyr2lT|n$L89V4dAvs_Sxftur)rhc)yP9o|?9 ztB*G*p{lRus0QYXj*b`znxx{lXiEplwbCu~lV*R;z{WB=v;<)Ho*X4*Obk@PNiEGRma~vkK`2 zjEmYt75r1x=HSWM(^;p=JsQpZ+n)u|poIOeBHsAK%vvh^Y|$Y)@O{5jNrCa=YID~4 z&J%lU>&6YPjCRxjGh7Sqjp7+vq?J#z%1jL3&W^OnUEDfvR2LeGIv{e=aLuevGSoY) z2HuHQ1mGOi2@3nY^S$6|(QW^3lSx3J`o?WjV*Yl|{cGz->_mZN2ATa3T@tesp#MsJ zv1HQCk;JL6U=g?|DmpqW#osRA&c#m_iL>ZpVOINLWUi-*BJN>-BMCm?#Dil&nU=iX zb`IS^>)4;&Zx1SR>%MJaKub=7ad0X>^{iL-2?-~?%_fq9o%xZdIOb%@1-_n;`gddep!3pFX&>FxZgqWUR zMfg{u*Us$c<8b#4pC@Glh=~z)jC=^v|$#zYWdc*RJWl_cc^VvctEdM!J3 zX}`3_t#F6Qpd$FoJ_A*|bq}zO`uWy+mmY&p5UG@nmDn@_-8=TUYpe6OR8g)G^0TTBXy9mVn!_LE;lT zu&&5H&B_+C^_&&B_Oi6P`MJ23B}d};dHQiUtvoo-pKfqj>Dhj^iGY~Z^y#DRaiI-< zAj{3-W`5ANNLSeH|%_dK7V z?o`b>^tQb@9PRCWB|u47@2qh;sKM#psFCeS)d+enIVHK~8JkMbUJ z0_Oy`J!@xE3tN0b*q4mVZj;euKyJIk{GL%C>bZ*jkY>uM;q_E#Z(bASCEePWtw37d zxop!u&u+|ALtSI%@PrU}f8^2qdSS%DuHxy9Rv=VGmsqVTEg}oD9-BYUxWFKl>nxOL zhD!6Y`6tDy1Je`H3I39QI?S%!f4@KJcr3#h%Gej)Pee|3iU%!!UKVZeKfje*1cfxO z5Y_8Ls=kGne*1nCm*_);aO;#8b>pxh<{~G+m^}DqHqq$d7JU1@+${9saPs6pI*dd} zDIBO*Tj>1rSsX&x>v*tB2Oq*KtM`!GbSV;TeyiRN36o|uy^#dr1~G?mn|VJ+L&S+k z9|4!=4K&76L2}MD;7leixbZTR6zXJ^P0m$ei%ckK;0LRM6Sqge;Y^EfJ;%iim%^~g z*}A&Je2bzgw8~!LGrPk9k4n^tYQ1{PBffM2`!9M9L`A>3p-*tKp%^YDe?iq5aNZxG z?reVJs$BlA=JU!IxoYe{I@#DR_%ziFJ@F3YJ1s2UP~BNbOgf*Sy7c#LhGt!OLxaA(jYjL+1JR5Lzx`@P z5DDv4{x>Dfd&0IO@3O;Bx8f?g4K6pa`*_1?6+I*pna!khMv5_qGdYB-Qwa}VtHJ^v zr!DjDJB$pzrK1!M9qA6U1VEdzwMxWP_J@^}*A`9O3ofi@7S&$cZVRiIk6f!f4Y!Zq zC-&w-BucHmOSa)!02L9pa^2THtN6+Zp!cxZG@nqvp;$S;O9*949+_o^uazbbG-NH|#aL+d!#< zg7*^C#Y~e}mFoS<-7^aOi6`}56K&g^@o3e1=z$@(It~73G|-15$D>(8N6B-l&NV6w zPOr^|X+ibGlHMXG6FU4>%HZ&aE1YPC5)HH&)@q|&CuMzzrtZEbfg=ngxY9Ebb zuNeF9gUz>S^G+*#AW~mvVnJr|zOqvD7;+&ZjxSSQRozDi6o@YV^!Z5VyxuP#Sg3Zg zyMc>$zLiEX_qi)23O*>&TJ@2d&0}@>t5g%3}DdZ{i<4&e5WD1#~r__&1Ayo zR@0Zm@#dU(@8D|C<(!F)_r&Y|X85>aQH+T&S@L}AEv?O}EbW;W{oU(1Nrese>k1Bv{e8&UJCG!l;bJZn;6cYp2@yw-via z(n{oxVXOCsGkKN&`Q5hPH_dxXxva!4Sb~JzqP`J8S9x(jB6Qw%+UjrfPZr+%lfb7W z3{e;sj)|=FKIRZi-w8BB_#cgSmI?~kEO=C+&Iwr~H_x6Q?;PVisG|iCWZ-}L2P2C1 zgy_pxWwGBPkdjF_0u#yL16Tov@1N^D-f|fEJY|Dxbi}VEoYS0>M(YouN;U<07~?;! zhQL))0Sx?3d1%M7gls}^$Usg6WzpR}%;MC*Hgu|QEIb{~b2pviXPZ^^ypn_x`i_M# zYBBK9APrVJ<*zHb%iXDiOlXql`3NwN|05^P-PUl+p> z^0|-bRoQUEq`WZv^ofUNJ7YuxTA!7}wcZR-@&z~l^6;!yGzs6sFu*m)UtcJzo-w~z5&gyc02`FWRoA< zM_5oatr1~UG!@f8`eKsJL?B~07*(t5B8I|ykZ?vr|<)Vq$9;mRQSL+$|17+ zW~vME1C=c$snxl}&S3DZxq1#=O^N4xlfspys75Qk@74<}I<1mC-! z?m34xzsqa=*s$ca_GBg^Okj><=5vVo5#Czo%g!5P@ZjvPKHbG!()8M z2&sHkTTL2cKZ8upJ3C^-I1JLA`wTECVs1Z+(q_t+z3{U!;(Uf&yXOVi@29jp@kI(& zT49imUtqOqAd4_Ojhia|IFKpq^DELKpC!qIREh!;+yYnt4VnW2{FC zGeIR)+$_Pt{!>ix{2x9}btu7!I7aBa=J0f(GU$kb*lY&OXwprpouz*sCsmWW zD_1<@Nt9;UeuGj2x=v@Z56Hzodb}<8#-K?9eL-NxuB6p#ii+4L>GgFN*6|dGzYaOq z>64T*;tAEsYG1#@vV>*B{U%T&eg7Nd;^svfDxew&>dniyf0PZ;5s$t`z*EK>&>AO7 zGe>Lizrp3`_Y+jlgEZiBIKU+0MP!x4aM?l(BZ6-QXuJF9YNG}Wqym2)@I9J`>ZB>F zhHif0retkz!e$_~KrxU+;N)3(s73Sl7p;Bl5^V2Ym?E8V9O6%Qiv2}M^%Q^I<4?8` zo3FzUY^9RBgH8Sl>-iXp%}Tt~TUxa9N-@jvFf7_6`~O%jgav4}5@S@kHcQvD9GAfN zh+;8Q4-A{47@G3AxS@tz!brKWcWmSzNf$gDpUU#erL zp`}F{f>WthS62^3x`3=&odM$A)umL|S9L!5iE#3b6!_<-YS&3V7aj2c$U3ZZOHvQH zWF&=47&PhQBPYQI=h-Rs!+Cb~<3Teu*-iRrMU)xG*0&U^S4~yBPwV3wkdCdu&xZ{_ zN6~e^j;-~G8svGWs+AzTC{ni^;&ia9j>qJWOZ>L_Y*LhM^(-Ucay!~wMgD6qd2g)m zy!DqPY)G98l0BW_#>ydI(KW&uXzhX|z`Z!snqtY((#A#(Zyu$f{0n~2yGK)?$^*|$ z_@8;8aI3t{GL@N`8MQamZk#y9^m4n3L;i;Pm?rw!N{i(N`E9hTF5QFNkHk1=5_**c z^DI_Gl^@vw2e-kxCn_7F4}7a$t> zUlXOj;sZ}Rcd_EMKFnGv5J4Z&#!eEBgBAh`&8)O1{Q;cS9_`?B+lTiM5JeR+j2I`| zX6kJ9NX(*vK5Ij4)(l_iBjl^NV_AZzEUBE~C)3ST%}}v$rIk0_M^I$W7~AHOHzB>o zq#diRPI{<1v@CQ2rO|KuB*gHm z9k-aP2K{oA6HXTp;tWP&BlSl@3a0j0eO%v)vW>WvqV(e-Fry=+264TVSB}7sdj`)) zZ|gwfqkDATNfLKM!6&M_wX#0#3A%kA*Nd7z-q&6X>?q^+=#&-ZH|P?Pc3*$0pX|s= zkeAMjB;_q>rJu1^_-aMnIuZLNM_T&&vR^4ynvkt1Sdz`}248Z{(2MF9RLy}pyL@@;R)$X#IlcdiO3FaH8}8JHX8X1#UjxNrH#>Ag0SWfm=`M&l?6r zBXhsa|D|XT$f1uvet`-zgk<7f{DG?%lpl@~U2Yhbv!=ThRKX*1t*J|0|AkJ#g_Sbs z~rU`P!pT1XRUmPT5i;2nw0v5pvze=`j?sDVB2J>nUd46nRbfrkZ{o)QopTQ zMccsWPx~h5B&T2~8LNbCMj~zEUHE@TBzKri_Azo>^tHrA zj8J*URvOvy1$h!#KkdO+tprn_Mt{hyb7$MRr$zJew%g#;6c+5J{Q^uOlFit$(f!6fJ4S7Sopv= zqK8R&uwW^5hbNO{_TN1s5zsH$6pMz32Sgv$BtB_nMENq`mL1=n_*5Dd5Ft_8V))yS zT-pNTh*UhIT-c8bbni#=H_$x=mtR+llKGwj|E+D`ETWQIJ;eJ#17Ab&Ju)bNS@Y0# zMH+YhaYDx9S2pbxkz?m;rnnvVrnIGO3JF00N27;-Eo90-iIj{bmXK;{^cV?34A+9(@XakX>B$zb2n07oLFH7|wUgp7-av{jHjUMHEgT z^4Ge@zcj*n$)u~IHzzQJqVC4m7gxi7&fRuAfsZ)qkSj>0C8QBk>!gXKh3%P(ra6XE zj82jsc<)>M>wq?&M9GZ%=4Lq*gnFRi{b+G<(E^~{4T4DtGineYA>pR^v=E7)+3)JJ z*qd&E5$u*_mcoZ-V7qOK>)|CT9y^&Vb)VCPQiAVDrjypGwFUXz?F=JR^H$e8K&zv$ z)l{@>L>6V7jBz#q)l>jrKCuh&OdhDr_IciNy1zY1hR-TBN_y(yT|guLsq?8Mddn!x z@+@^j3Jh+?bBa*KhpoSA!=nkRseSn^-4{T#%;{+W563?49ZUT9NI{pu1?il(-WK(L z3iM9|#wso!P~k?D==fKBMkghdR>BRU=brI5P&>SNK&6O@cL!AL+tC0UG(HyD;b9*Z zM@irkFC}HHq#jL?C~$COqi4KGWxg!3+)1Hrbb%?hSX$52DT+%QA8>|d-kpH`5I@i( zHzpyi4yjs3RQ!QF-?bB?Mw3JU&|p&qaDrv}e1< z&N)#OpXK0=ejH;nqAMilz2jCN{aB)OB$lFp+94OpOWKwnf#+76@} zN>_Vg?KrC9<8Xq52oO|dNF~X1tf}ImHc-c1uS4HPdr30(7F3gK#`X)DWuW-C2ytA|`!BO6co9M8 zEO9^@0_8ioz@MJ{`OUe?;~QVXasm_AydsbxbkB20@sl_d3+hE#NmaQh z55N-!cnVt#K=3hc_Fa_0(h_zO5|pdU<4-uS4NJp&&-CE2?qn0EZ^D_W);AItIA!%) zX|DNz3+AJ&2Gq#Z3Bz&wz<_7(!7ei;`+sZ#cS7LJ5p$Tlc-TS&xEbRap-+ z!(H0HDYU~4rRErGq$VA-cztNdUSwdzuB~g&kcwhY?7;Kadf`6~4B`afKyTLdI9e!5~ZUr&5%Y-G`9(9mB=97f!4w5`CRql%B%x!ia7d z`y#-N->QI~)Z&rTNa{!5DO2!YOkix_Q<1#bxT(10FB1suT`Og-7$!sO3vA!;ZGlo# zdxOp)BQHRwi0emL>_6j73_wY=>o4^RoA7_-dPrWq3iIs?d_ast{`Y)YNo67wpK^>4 z<2el3B+5vkjl%RMbM6<|(06Xs?UG->NZ`eKoCh+3$wt9CuBIeSC85L$9|I$f z;RSS0&wDY)aup~1KtN+B@=T|??O7vyWwgV4F|S-dD`p7qyq2epoJ!&7y!}cCFZLhM zL|Eos{vIP7pDX#_l~%kz-dTrqZ!xm>#pQRJJ?Ao{hbB1`DOSTQ)))B7_)$5Z6yPaE#WmK=_4%yU8)YJ{@ER2W})>S`&ZaSFDboec%(VH~URx zsLg5Dzv4K4ew<2>=@Sk3+T#e0_<|mHYTQkxi}op05v4)+B339_W0rawOk1{sNxsKG0;}WQ$RoBe+rdKs)1s6gp|W` zzxdSmgzgCZ#NJ{A&IZ&3TEDQ2F-peBZ9(5eFCTi=Y8DmqyT6CnV6gDh6Gi!;zr34u zPW3cw{1cZBU<4WUNeaa_h&uYO!%SMs&pp@wNsj*$DBI(FMg*`1@8@RkWCvxY#G;M;a7| zA2DZK2JP9o4_TEB6zG<;A)*b46A6)Oep;QWaY_GJBu0k>#2LGzqmM=@ItSZj?HA;> z?%fr@1zyP^5_U3&v&%34yBk3kSR;#|`)xnjczAUmLu_wf6R$ZRpSq1zG5&zEwYOH_ zH~Y7~|3f$UuMjao01p2M4hhfwR+V5HACI3lk)Ba};^J|G!r5+@t7NPZb}bb;Kkcqd zwKd(BbTr%o&-#B__EO`b2^>)n%W%$vsn5kmpwsSE7zy|Zj>;Mo?t8R7y)qko5$5rQ z=p)c63L;z{z%tNNxKcz;!Fzw>0BgbdB|+W%ng~kE@>37r^BW{6zLZ%Te(fIr4xgz*MAbmt+oCg#YG5V6T! zKV}vfjT9rOYbjH})ANkG$67V?k+G)0@i#fVFNq=BfvmlUj>(L23dmaDdWqf!5cia_ zF+Xcu{S#aQw@ecj_BIciN;$yk>!(EgTRMEaAIsYMTTMKKw@#S0wpOPF_v6K7Ujap@ zkBF^T61l!)KNQ$MPiJwW0`0N43Zw%b;_RPIeM-?CpVwigYhNy&h~n`68lIKTVQszq z_t)sWu2;K|&MmsyKH0a$J$<=)Yzb!Oc>v5r@NInR^rVHX`dn5INKVKvs9Pd*l++mR z$URF`R9UEML8X(*?lsSc!SQR!2k?hT)r@6yHLJlV9}w27Tg$v+V4-!EcAl2(9ULd;)}vp~rS{H2r@U?LX-) z02Q&R%yLfvNnWv2&=_G08!KE7*e>q!F4LU)=oW>mT$?CO zef&IqEZdHaYJl(LK$g3kGJMF(e^annG41+R8Wp_0 zhJ>1UXpGzh+SloZ0f&X5b1d#du{eapq zNUEC-j3~{(4&r#R8DH})9{m1S7qh@H%<6jQp)NX8i6uE})lY2LHIdFx$AQ|+?jD-0 zgD%T%6yfx?r&mf0si;?P(ez0D)1alDPouAo-vI6c6nVnSe)A-z|EjI6cippQQTSC{ zA=x+l0-Fl2hqC^`15Bjl=gfoWW&h=lOyWh~N8ULL(MK@`m42550rHa|_&A0y8jr|e zB_H}GCv28&Sx^%)WHXprux`Qn-^VA1fpm4eL;97d#l{WS74$i9{fHo1U$~vF!dga@ zx7^_yj({xqxtf9rt6>bek<3MXulxF$F(V@|;!S8qtdi?3 zPp=ZOHTUFvCr7%8QubQ7_H+2gC>}eOMu5wND{lQIrr5vqeR3(xGH$m`OM?a}qQ$u3 z0sq4bY9%WaZb2m61q5^ie*PAB(cOnisg{C>59q;PLmc4_EbAi5A3g}M$jV~Oz{Q)7 zZ_qER{-?n;PPZ;IW(;_?J_B{+70wDo`_s_pq#;a7C9I}vH>u^9Fu5NGWpV_8shPkU z;*me#93&J~jqhqHui{@zi=!m4pikz9FAEawTDVuIAHPnoO!vJ3z1#aBa+$usx&P!% zXgYeIDL8q{PMNU4kOUbson_=!cOU%DoThpfm`DreN+)N~9`OD1w zDU2daxdq`2?JT@GTg*rF){hDC5cxXrKK|BQ%r_sT1|;(5EX^kUd*%FT{lRnwc`xx^ z%=ZUK1(@O1d8iI7#G6Jt4A#WUP&n+KDaK3NNrZ^PM+chE>X7*3^uH9kOnEdHyQFQ~smuv0L^B`- zGT|E|SAP{v;~{sc?f1`E^Dc2%37qlZcxr!gUY6bx)e|lo5%HeMjqmBzl4#_b_zc8Jnwz8i)X4jbtb^ZQl z85~+bppS+j^lIa29sYBEbs~3rGn55gf9SpmKZ`4HKwAU;b6doG&*}gD_QJkGx0iqK z)Zafn3IE=ZLcFXtD$w&CjTXnN3AlVw1Iz;_8SycjjHM}+upiF(hkvV@bKH6k;_US;;x51SJPSvIvMU1c}3t6(kQRIS0uM8HO~3ug9(D zp0nZXd*1VY>;7>U3)V0_(@%9(^{=X{tGZ|3ixHm8BM|QKhHM|ehkDXW*X#8tf;8V9 zlo_2inrMZ^s3F>Ai6GvvMxwf2w~6HHFF~WSL{`7 z)JisZr)|xY^6=Wzn!EexJ0tkGpDMzxlk+DWyai!2??~hcKjQz-|ApL{hwYuq*RNOO zYjZYYdp0IA=JwzB_Aivc!n#kI1Q_3xH@U|q5ssi4dd|MmN99{@0B7-G3cX9Uos6v84Qk%z*zjrN5;^)Fw!# z`o1scSr6KxHQdZ|wxPPsc?J5%$tqHJI%O9Gw1O1((}VEYl@xhLD^L27F!}_>+Z87A zsp|aR4(FChl*p_XH}Db8H#MzF>S?P zhN6%|-QyRMewz2Ollm#~kECs7k1?4zOM;L2oJH8i_zPF>pr3bV{6yBfDs;KvW>ohV z6M8z)cRuEEEuX~wjN<<or~$q6)@y%52WMSdpq86?tg`dzk$G> zhRYNE)xD~F$Jnsj%9^)n&IwUzFxw$w z#OIcTe3AqAklvznt21K%{p0Q`61Kv!V~q+TnpLx3dAi$DhB}{Yz<#r+v5P+U>scRR z**nj=L?>p1MCrP`#!Wm{#yrM608*bhoJuYB!;QC!4q*8*;eU?Ne**g%9P)SJqX%n} zf4-RaX~#gamGe#J;kAS(lL-LKF>yGGpYVZPzS^->3Ax5A_8;Q|;6kp*0E6&7u)Oy21m0cT z%K+LdVh{eqPKaE5J}tNq1HEaiK7Hai{57)ou@oSFi*TOa2oLgv577a;6~=+r)orgo zedDMd^X>&SRems#IMxC@SMwfIU4aUThky|?!*tHK&V2cJWNt(3PZ8DmEfvnR{d7zJ z(tb}8^g$$IX-$5TpyPpeSsX{JHo;MYv!dw@%kEWP2sqBr!d8gMhKNT@8yYXbAY7+D z!hLqMuaY0J$S-qG`1F1A*c=i#(gd7FqBEe66n|9C4R2T(k-S=`Z*1T(9MnBa7J!L+ zO>^8>$cS`LKG*t;UAPLKK=9#&120$d+0=V~6`s#GatIl9891Ot;;&6$;6&F!AJ;cN zAyo*-Bq>hLo0oCfT{z1A8jF*7`MOH^^v8_l1m0&>*i%;LWEy&j6FxPU-{qd3w$oQW zB3`mGAYHMoe!H0?O+bgQGk`u`((-}h^YP|xz;t9NOgNq&Wxq2!4ANu4;&}gg zKxBPtxzKPD*!H(T&1&2^f{2fZ_nw+J%yJg$wsh@C6TqUE1Vq-p-)g5-fpm2UKEmg| zv_ANY7UBNQ18U|f^EKqSu@DjDo%ozH=HVCwo`9$1T}Jh`-B;oIU)?s?EHPQY`C-%c zMBF6)Yez4`Kp$kTeM2mGL}`IQx;k%>L`Oaa87n^Fqn?-9cj1_^obVYVhm?|6w1zGPtipPS-fgN0>!bC zK5?<}$>*G=WcNRct#^~J&{_}}ocU@_LIrq5f=8K?`vIRa<`|pMp$z6i+?p+r%Q3BZ zVi5XMhy{Ih@|TOw#vl5EXkS}l$M7S-VXy1Hnh4L-Z0%w}F5TR7(ASLtk}jLEj=LPD z9z0r`OM@BM19PL`<{NS%n3pbPb~7B!{&b!U`OjkZ9d_S|%4J3{rW*%-*oD>y#lIU= zgNKLD&v$cAmK~A0x3?&mX}e01c1Pw9PDjYz;ZZV{8k5#Scb(l{X_Tu9Qv3AD6MePT zMraloqfDywThy{69OXpOYbE53da&J%C1!G#s2H2-cy=GMtM-(S!yMnQPrvDA_3TRZ z$~lC^@_CdkVc`l30~W^LN$oImNp2~HY`SI2TVj=~_SnsiR5uEge3c^at4enV{94NN zTytf>lAWs8iNqqa5CAL5PH zm?9sMyZzBKT;=)R&W*|qvRT&@X{)rjYrVBT(>#fnKcd^>NsE`?1iugrbq);&X@1>wu|hVxv8pD-WZ5bgX-+~(>V@jCIj09iRE26Hi7 zpwMNDz#!7=%-VDl>n&EVE}^O(uXm5)p*map-G4YP78lI=BX{jN?PqVCjbZz~FK;7^ znT+fxl;Hn8oiw)c#OeD04siX5JF2)aS70q=zgh1b@`pxRSME72FhcU+4N)!jDjN$4}|E!_o6)W zs;|2p>`EtdaNkgLt=M>f+rkw-iHg^fO+Ju2=quK`DVx9`%C48PC7q>^Ca*vDJr%B; zsjag1V2000#C74N!t>gmh{nI=4YnS^GUe)_+Wi(05k+-cn?$3ASuyzX&$&cFA-v$Q=I+i^3_G{y7p!82qm@gl7oEpBA9b0#$0#an>@{$BbWa^? zukOJh>OkpV>gO~}wN&-oI?&t|A{*=^@-OjPdv=EBx&IQBkc5`|MXE-j5ifuF**eF$ zuDgQ5baWBHJRPCNtg77^aUvEUqL`I5w?udW4AXa8XD^x+w(PP~_dGOGjzZ>|Bx+55 zCS2mjdrPCUnJ*YK!67}NrPb^sMwtF6Q<2+n_j3Ny*MSC2a5ZnrIckCC`7)glS^;g% z+cwjV@x^PuFjTd7k^6>lp*!CWeWot^=Vs(Yvx-HUbz+>WgQ8F0Lvofz(`4BQ%l-oN zeqwNDXLXe4{^--(D_*AzQg5VAPLPhqP+KU=*`K%Yb)}scNrV{I%Y0IOMf}BaY2*1S zEmZ%dGd;%DbmGg-`dXb*dviPb;=M1M9t+D4y61VP!X;>7ELKIbbz}EhqH`3BZ5b<*VA&tu;aroyTDYeVykTs4ct;xmOkIbN0rBbK z+>FBe`~4y*`whKDM?t5i!|e;~+uZztcd+ zfmUV`W{Yf1Tn`?S-`l-EIF!?;-+JE_o9I+NXmlDz!MQi`Hn`~B;9`=hD{&p4b)WnQ z4S(}n8Q#d26qeSxbAsh=o(DvA(@lvIR}Q!&Y^Pf1xKGO^{xTfg{COJ1kJctblFfQM z4|;Z`>$7XRgU2>@&d%hhxSEmHkx&UM>$)H%-eV7~u#RTK)6n&+yzmu%J(-w*03r_( z8!E6uRWP7Cs(z3n0 zh#j}724@n`MQd4Sa)wg!SPJ65Tf%!Ig(A~J8ayxqClp_1R#YDt>etk>0XGj8cr=g< zAI{7w#IEh!9+u0}CxvQabxNP!y~3f%R#>$!`!I%uUZ>nXETA&J=G7qST8` zX-*Q+ivV}rqZi@5*>4(>p5$Rw4eP!kCPM~o3d`P`2qtcfGp(WcN2N-iB??*FFfJy! zw=zcUPPOKxJB@noUWKo=4c4X^2TrT1DL=))^Qyz8=sXPC!Uo=bW?x%Sl4FQl?AC#( zo>EZTD{5ry66n0s8-S2_#m%mln-E0HtDRfDBb&1<) zgtWlD-?qQVC9tTfGQG%R>*eLfhr=HS!E#{?eoKcHE7)2kp#_>ilvcMN^ zFS!9PYWjMMr}o0yLia+xVSN>oT@aLJ{CwXIIA0XCdV1O*QfTqHZ(zFEOfE+`FPDi|;+=7w%B7mm zVz%%LLq-0VG2*HD&xXyLqIu(yEZ^|*%@Nd=5&STf5J_hRj%rIBNl9RdiGcJ zBjbZ81@6W1+hlFAs^!PEK-(yF{7I;p>D)KMTcD~lNYkWnit?MSp1W@@*e@Cr=;pM4 z`ut+Dtp}l{vtzw^ZEj?pd>Fy6OlFUHO^0DIh42AK!n40ry#2AD>4@-L&%SYGb)~|( z*I{HkbpM;S!)ga(rh2;HNCFT~#{*Hx@Grg_J7rt5jH8r=*}LmJu9HvAzE{M$JjAdi zwoBrMtVegJaU#1``ikfBMOzt`cl*CqJ|02@2E3=vAEW!f;)%yi3NoUTKCp9jyBD8g zj7K+|697DW-dPpN)XNK@T{4stW`qapF)zF zzgI2SF_Pp$#!a$#emUJ&8nI*p!^K!Rvu9f9`@OQL*VzghX*S7flOt{evz=X~nYjrE z%SB6YKM^vwz3nrZ8e1kYi>gfEaGwHe)I&Hc&dZCVDy((g1&bqwTGwOPc2XKd8W}%d zJnE5y$jdo4vL`-$Z8S+SA@zFmmmcxdYUTxiXH74Ff=mNai^#z#aFA|mO|r+tmR)ru z^=mzw+}DADk;HG{(hE~B_4I8<)rw#6l3|Y`<8h#KDfl{BTF# zK-801ZOCR0KVhSY)za-yrSXBgHs@vRNwCD^NySU4i}Z4hWf(LHm3gMFCr?WPx;G}` z5u0XG(JjQ-S&G!vG3$SiRL;|t$z^rSxqtsYgW!+N9;cR&L?NA?LHvBFQ|UI@3Su8T zH}j#J*kuaI4C`Ts(S8tJx||Yvj3R+4Hg*Bv`0yK4i_lo>E*MC zCs`gAEeTOKoibST(VNL{!njrJy>?$8zGjE$A~2wHo4h{PDWvZ=q6=VGt*}aFY3)@B zT;p-M1Db7XbWps#KK=>Q;A`;FWvfo{)0I1A&_OglVN+Cl##~k5T}Th}J-TDGr-m5> zN{?>>iuDIskWU}%d~H^5EuBwauxMUKO6{=MQuuVvlH?ld>=bOF_(5Rvk5lVK?R8Rg zO`k5%JB;e2F2gIQzWdX4#hZ6qK@MpT)z&84=q*(D(|Wz24rLp3VO%^pBTv)AwIPnv z8<~no#RoI`x)>pQPlYRN+E13^<6xFEA@xBmoOSJ`*|LtkBBYHadR`dwb3L-g58p&^ zevpk`=r7cQw@x9~XC~z)_J_(A`3r4OesDpcN+n)D&bT0F0;dJKM+cyLrP+g@?rtTX z$IF+!uzMWd%K|(7Z<>gVU_Ot})}6M!s#-}4;D|$Kh=0r(aGv>^dqbORd?9Y!U&;;Bp4D!f zI<5YAFxP=$oM9H;RLj<8G!3rF+)cX)^v$Xpx}MztZW_D)(lomGR*OAM%W5Eh!pX&S z!I`E{1s%s{c4I=kIwx+LiPxvk@4R*NRzql^ETf1UCR$^(t;fUk1uR?XCG;3O=B)(4 z!*_EFN&A1=ML4#QiGdUSy6Ckhdt~*h?$J1X!cP*RcN(-y6GCN zHI>z9mDth~JDUjIbrUaoM(Lr)OR3MZ$l)U&w?>Pg;sqv6=9^x_2k}F|%vYlNB{{(0 zUX|3#I81GLWh7=ul_bZ(7J|FsEnUpXvli~$J9aMXIiJgtwNpWsC}m-BRhkplcDlEt z+(vX5L3MDhZn`~2PL)H;AW?H9`HqxdsJq1fLcFl^bvGaE&rU2dnk){aIU>ym_bvp` zto6Xm>`W~@NZTvFXJj;CQ*sd`GW}GQR(kDd{Lg- z+6i4Y-|pie)&{|kb5f+QfU?V)r31Ur+cpmVBVRppTb{+F*B{O|#}=AyIJ(gzVGAhC zd~@2XOB>glP6=T`Ld4DS@)g1r)>`~{cRkw^cI2}5IY)+`Q6h$4T^f0F8qvj%NaQie z&QYxVFu1=*8K6S(slCf><=be-yJt#>UKQ3eb*^dp_{V5V4LO(~6~xC)9io@suurE9 z@CDAnbt!<89?xsWVki;%%_f6 zH?!tR|LfI4@Yd^~fGGBO&mQ;{c3qZsr-f{6)$I-;GW5Co2iu*tF1tvIA(EY1(vg^5 z2Me?n{>U|k$c#sw9^@4&C$-V*J6&@fUNtZ{x?o`x5h~opr+>P?cb7EEtTUs0!H|>{ z?rrr#95uOI>#wsZl^Yoc%NE?qj@6U&U7Nxf*9W<&)dxuv&Cj0~rw0pdqqYQ%HmY$o zOzfY_`vFV3JI<0+BLl6LLWk4mo{f&|TKDJMWwcn2R8h>oGnS^1`fNcwJ3;5VaR%pm z0tKC~OvH3kC}tT+S-gqLQ*WHD zbRTtx2&;I0$?27+>+TJq$zgF0HjV-a1dYv$(1W<^s=hb8bPKf$?i+s>+xzV+ss)GB z`!!mzYt1`=%_d$G@0^K?Nv)AWq1n1fWo}>N4s4}}ZP25)cNDf%p4hKt&^-hP>TC#h zKcxs*>`6}*K3w2&73CE}E44dHUS2xKpS!i8w3u#*RP4yXwzztA%NGuC=)(4MyEXC4)sAHIr&M(ERvmtAv}-go6T*%-9lUb$d4@Wat=u=IOsnptGMiI#7s z4bSsjp~G!4x)|Ve#jf>BC}!P11KfH=>Hr77FPyq%456YsQGxNcv6ycArbs44OYxby zvZvpU*@PnF0*ml9s=o#+en#iX@bG-D1e|xY1*@{*mh?XLJZrQ0BFgtBti@0nD&7dE zhDt*RR66lfa3-Gg`y2G>+n8P{5c6qq?3R*&IBxRmNyT=gZwPEHivDry)-A=2f%l3< zPxX<-F07U8nc}X52HomIzEHU&L1X8y{`}I^TW@$INMiIw!TLpG-R?_vX;1wHvC#-2 z>uS_|T#Mo10V{TMG3Hvmz-x-?HbyRs#aRwAcnK;|gjM0O{Y zwW*eQm`!;`(wiw(HH{2%^5}gBwD_MvfbGxzYu}k7F=dlfhTB<-KV^? zD`3PFEFbf7@Q~)w1K?aI$?+QxCaG%K+1MW0AVB!zzg%A7<Hdb-_IE2-NJTeb5gGD4rnCZUp*WK~#0jh*fVmg|cNphNRi~n7 zu{2$sA!=Z<)OT!tCQQ>CRU7hCSu|8hX{+)W4WrcaFxJZ%#9`*wF1hr3J8Q0q<@KwV zm>5`C6mdq&dh>DsDqO_>Q8J&#!#8YmOSfP3c<`I%+An`^G2U;##mb=8mf)HgXI2t- zt2_shyz(e^^2MuVsY=1(2$4w;rRab==_yReCqEc%e`D}9S7xo$0nIPjou#sIt`;~G z(WT?3YZrN9u#WrzD6Nu*#`Fmm9z=DT7yp}R!aD+k>U~$gC{``L;dHOTDjiJ(vfZY4 z9~``x?m0XFO>PI_mP9aeVCWTiA;1Ul&j`9+704dEHAgpU?6kb=)0AvRGl)o zKCD=s8+cJcgU>@2@r(cjVit#R?c^k+eB3jwnsH#oLJbN|(F!wyn+5TQO>tEO4N6+CGbU zwba}dGy43+w}+*&VtJ|Dc+A_AqdndItNBHXmu?$AEo z(v@OzZRa=4Dn$fsbX{z|+`;X_=$i6|(1~8(C#*0V*@}D9vcXmeD`d+I?9Ag^^}N{ z<>+;=JHX*Z2VGe2Q*KmEWR;F4q9-cX=Cd4_To+g%98yp9Q-&)_yW@# zZ=zoLboIFh;{GCc`a>zWDk^wltz}84d@@z;CeuK}Ao&{>wVZPRI;rQVgA! z=Jj~b9mb~G?sEtXx;y-0y=|@E`L+G6vs118JW{k1dBGRPW7Ac0;?rC)+gw{9Vif6t z-NK?bJVoW+b*WU3US;~7rIB;-f!*)30RG7sKF4i7he7Z71s}&zZ%(k^@BZCD4}1gS z0&)h{#21cGyd$5ND(+?cX!9G@*Q<3fR0rWkqxaq2=oAC`b>hW8p^RBxT~{{}4uUOrF&q8(59Eze^xF%3L5x7#eYcYK-6!;ErgK%0z zM2yph)wfp}jtCi~2l@20Y-aVLr@~6Jkeqm&BD-x(!Kip>pi5z$iAO9rM84!8$FyJcWD3Nb* zM7oU)aU5v^wBU%G3TwTf!MNGsBH*=T^baBd?>~Uk!;cc_bDIqC`1Hqw;{#?ea-zsb z5h|#Q)K_j&v?oRc#^Crtv8>= zaJKjsGBNNiDZV3l^9-lHCqdwb4?=hL#nDVYGLY3l{e?7H>lh6qBeL@O2>3!9&IrBi zw#2~m)o&AB1L*V)1q1gWFmyUf3M3e(4Y#;5j_VHw2|INoDAs@;{C3a$Kq}d;b|7K2 z9ELdkz!L}Ks}UwXeN2kpTkt%W3rF!0r_aw$y8Xciq>Z#c2&${sA$~?~jSF5$f_zKM z4yffZnWr6Epvg7RPUI;0Ke4BV_O(iFV5PGx2c_3n-KwkJsm78((QCRvs)ZXS`6T@j zoIXjg04Q9fDmtoOf0Ag*#kXXeU8h?Hd6rKxRq(y)8U^cbm`vdS(&-vKv2--%h9?6k zeJ66E{utK*X!{AGJ1_E2axwS;850qt)x>s6;Mr=QJDQ$IM#KyBF=ew3iL-n-wE5RR z?w94q)iGyCL%CvNbJE*+Btn2F0?V}X+VO}?{_ zJT6D)wOpdxkq`O6aH`)Z=b!&KkB7l*I`&qXYGr{7B4y3@)vN66XAPdtnB4%XAfqn8dF zjVg@WXE$9?dbmSpm{yp5`uYyK&m1wR{t6^kf$2|f==E1M`X?R{x!?fskod&!hx{UT zEFIP!T`mlgo796I1)HIt2~p#GAz1b=U+9S4`918bI0!iZPfz&CU`bs_CfI^Zjb-eK zcpR$HK$e3E=43YDDV)>xX3pGru5)TvLSl!03yLr(WIxBf43`~5HM51g)j&1e((*L3Sleso|$cm2zxbBLsFie2BhVn8=Lrt`XUDn3Br?l zxLM9pfAE*@{`0?NYd{0U4#-dZD|`_>@kztaTot1e%)1{qEcTu&x4BwVKpoq`_q^y@ z=py%Ro=>$PpX|VJDXP+k zRe*jcgZqw4R{F>ApI!i)RA27?rNRy3O^_Ry%TC&}JH+>%E0x9+&2+odK((h%Uwa0V zxWBy$^w623Nbx3^^TyWKj?H}gCAsUs!{mEO2SxlVco39&4^nEX(DpQ^WKF#Ztrq)e zotKk4RZ-QAIBjbjd*!4~1LLg8V=!pS?QvU+f0p52z#xndh_thwSGk=kT;QI0cdpgQ zLArDdg9*lb;*ix*)J=(t>cI?(iJV3Co0wjed#rVaL#jCDP^x4>1Jzu?$U7CGc@jo}}>`V=wIqGBJLble6H^3v{o zFW1SnS>Co|+g$G;IS=SfSxrD5pM>$}Bw9%YlT6gWmZd3?HMHWOK3>zHbvAshAvdv% zZXtM}7~S}S6#CvK)eRwgk=jXPtFz#i^DI%*#n`VEbsgz}#y#1wW-bD4G--J4&{2iv z7U@FENFB%d&F{8`f5&-MycF61RC2Yqa*iU3 zefhhggfY0(^*;NKGt=0)x)2(khmHrU`o59L#P32F)ori}K4=t{SP2s5RO z#wI1Ep~ZSZF#t~km8Ul;4e)u%;z4Q%v-}vK=O5k9AEy6#O!h`+s5a_b*Ib4+v zu-!%k5ob+y@h(`V^hJ$ei{AQA<{830QlyG(Y< z2)2q|TqhfRKV{YN-i7tjb$PLJv#pmMORZI<7XxIUU|@5#a?ZtMU>og`Kdo4IyG-dE3ek8Rzy)g177`7PGt86z3>J83dwJ)*qHG z$Pu2j0XEh(wGR3>lh4~5=hmnI@r@1N4Z@LMS9(NMwAc}8M>OHhC&T5|G;hr7kmH=*Ou`~ zd;ad=s+#$i_Xnj_{4=E-WhKJuEpJ`AYfaGyoaqMN-9#*d^%6TE=86X_0p(8yVKJ2Y zc}*I3snljCp0Rzt2-|eVw=87=(40psL~+de>9CJ`y$+}mOwvP{WHHx14EQo$;_cR}tm4fgwLU*65 zQ8lL+&2cNfHoq-xJA1&P@W6$d{1Sfk;x;sUF|E2L^t>K=>5 zU(=)_rxnNIo=>luwkPo<9YXsnEn>1Yi?!R5ge;b_b5%ZAQZ|K)WDE@y#TvE5p|rhz zbV+Gkx^&2)VVWrFlsmXJJYdbvs+M)5mz>kuQH?{7*m0^%zxBK zv8kH6lBt?gHuU!!!zZK8KEYqUCkMJf|6LRNlQbCQy8Lq?%DR|%2Sm8;VZw_pl-{`(jL z?eji$g|&W^wVw_(uBYUEiS3zLVTRLGi(F@m`&3<=r}^PFo4KfjX2~TXb+y3i+dus$*ZJF$BbIm*skDfm8BOSd*LDbx(8*Tb1D^uYwOtm-2c}iJjPK z9@8Me#k)?pUOji}-$M(*b6{MuZG8H;&P4jW@RQuM?8o3Qb)y*toEWctRHF)JQ_z@1 z&In1a{z&o?nP#LvhwaZuZmbVRFZw^+u_d4EpAnpOm}m%nvpDG0?EzOPv2<}c*a=Zp zQ_az3)_WBH9=lopX*`wThTTi#e7uHUOGNrvREbEP>wL9R$5ywl?M$MRbgDRFfV9_& zmuIP~T{M!{;*$T_#TiAyAl@%ugo3|)Dwi{N&tA`jSng|4Nx}!0xbPwsdgkx9*ip1K z^*M<*LFvDt=OS56t>-!x8Jj3*79A-ZMh88IS;$$HI|heO8Q4uWGAEW-F!Ww-%-gc# zI}ln=ezdcol=nQdCx3*%;Ou0z_!9E)B(vIw1c$eSQ_bD1M)gIRukO7yH^qdA_Ai;y zJ8O0?=9?Q^YG=2`i%48y-~D_pY2^DZM<{lHzrw&>EgO>KQCVn3>fm{>V?_VWRbJ3x z$(%rmKCwJ*|J5a?#-5yb)K@}_>n~jg-94U)h)ckLvN(b^PP1}># zcB16c4P&gKI)lSkHT-}1wsTBjcf!oHwWdV!CfPz02FwY#tGNj`5bR{=Yx;M2()!it zOlfTfnHlvJ2FJ}t9<}M4KhnCjod5h;C?6fx;{!v$09cd(_pAkTU^%1K>#${}<(8$_ zFup{UeU=^POJXXJTE}@azusGU0es0(y!_V{j#j;fV3XHwx{SG-_7Z<%dwyo{n#*o@ZwHAmzi<_322EK<0c-+f}fbhYbEDEw5rgQ~?yh2o>H! zf80;Kmo8H7WGA$q=OGGD1lfD_)HV?kbRJ&@MEa+;&Q8Y`!Udfwma7QUlWs+3?oU|n znU}j(_3Ef;YmUY``tSTWxI0DR?qibALxpOSiKbb@Sze zMW#>1N*Avb(rFoYrYSQp-lIPF;#h$Dg3Ae0kLRytt@QT_w^~LV%k&v%B9IUrM1QE zyh+5UC$9+v#l#ydZzZA?rm}d)WJ7YtuvgaOQ7CO?`jhYPO%9V&CBQyr?*<=yoP%uM zx_;n;jAo-1VpN~@gUPJ2zAAd+T)rl{+cF>Bllw-G&nhYbY1>p;-XxV`UWDE=K^G2g)0tRvg^f-L)E zYc;QUomagGp({75XJ2Gw#a;jC>%5(5JfGk;+=OX=$Fvu#Wk#T2 z$k{NDb}{c0OYDL6h~&J|unJjDa-&3fvp#)k&odqU)0Py5x0^>Ln${9ihpFzU zl_kFEDT&-KE9o}j@hlmrJ8U&soU!2Q6$~-{+uHD7D+CxZE)qidB zO`>&tWihyv%l2@ebZ(K5lqN#KhE!$)-JQtensvf_RPEApS5#balxmK3@66n)q8d%K z5^L&>!p5)^`VB|;Er)rdSGVrH*p%zd9Zq*%y*QnPiuVeaiX7>TPrR;LtUqX@W!&L* zN&8F0yz2}9bG`af7>f~q63UP5sZ?!t$g_33EfP9%!I|hl5u>IbrwZzcI0QvmUhdY{ zAby(7xzYBQ+hM~;<`5l=(#ky-sW`e-T^83 z{VF1;_wahfEPKiwOm9w)S%v0JiU1WRh0il!1HFYp?~%-H&*J?yq2*}tU~3hb_4QX5 zo(w-G(8`f>q2Jh9u%pS&t=JkVw{FP%QeRD}ujTeumFBk5?4U`XTCvJo`}&wp<<6N< zp03P$?+wL6xlK7sWUnMKYe&7pSmt2tX4<0ZJl~o+pr+2!2ujCx7-IyTQdSYCNu6T9 z?{vyRx$jkrq79YlS-2nSx@X2nJMR{@LwTQ9-{QauG~|tBo>>lwOcZh|U2qz$thcPd zTr!!U*>4*Jm34F+*pDHcxd&B_?>~R~T?*QvT;l;zLT%^7+Rq0g4wx6R_lCD-?rcbzy(|JQQkdu7knrC{{_X=8 zY1v18%98YBriM=ULT*okz~GZZ&%EZP!?$2&7bQru6yA3i!lFiR~VQEC{GI(lc&gI`EH!AX9OgO3Q zKr@3JI%Tjwm6|()nn@~O?&W!wrE#a+3B6#e9-5R__ENhcl#X>UI97Pr!wJfSg9I^5Te6}1V0wo(SrK;1tr3?kHQ zFJond?2s#g?@CacQnzPuYRLJt5(D#Eu!a5p7@d{SrFqZ7l#5*i z5rdX@2*3GSWqF4mgqQ3``x}$ni7Gc2^VITqJ-_ZnwL^P6+=#eM7R_S?jSEJ4a))^o z(;uo7idxQUb)?R)nqY@~?KBD3`?9oc4QE2Z0MhwuBcbxWYOGr#!tm@bc0avRh= zhR5S73rfI(IUOvVG0(gXkkUXSJOn&;qjY0B{PuOWe+9Ro$t-GZLme~42Nx+Xn9-K& z*ShXZMoR4px7B15i^$zJAwEFoiDIBqyzXbLZ#vMreN;ky5;l5z??kt!_Jp`hIwsL+ zPGwkcfh|AA&|%d4W4{OCRi0B?bTbQia?sTx8PBu+dt-)Ewl_DzxwU2(vRH&?&`rx6RXc7`m694$*L!A zznT(rs9L$R4LfhvUeAZE+(4;3N zfqGsq>=!p9Jh_xIWTW4{?Jipe^F?LRZFA3zVA5}HcmUz0%A%HQuz4ura!=PQ2Qk7o zo1-7WZo#KeEnC$ft&_fNfzJTS*^wS&H3w_$q7to+;M%uz75E@(Gk>CCD)`{cxOy=+ zk__%L_NJQGyzl-}1w6Pv5R{`>4+{?PGr4hiVN11#WhL_c+l%c&!U~v$=no zgv&Cde(*B8Z4SvrxCgkWQOcxk>HhgGb8*lu!>s+(=U>)m+QW0S z_y^UP%mAFcq&XduuzeCE+S?RZX1<8p*ee+toaWP4G&$ zd8;KV40@DpYb1UJZ3`?N10l&$Z~5-UhctqgX`XU196D@;CaA8Ne5#*N&~sJ&*Y+oD zmxzHpV7S1>)g97#_4OOCV(~Z1)ly$QZDlTAM}ZiWdd8Wz#)>2?S1TT)(Iy<-RG-QY z{n3vQW!5oG!={6pO6ltMUu@`}CQYuZY@t<>D$Y)4k%`jhjdT`+x{vM`x*Z_y6S=9+ z`tMZ&1ob3;0)8KxMnUt~=i1`*{oXbqd{_EW+M8|>u{wplta(x?&rhM&1@Pf*>l!=? zxYF6(YT84Kfy39lQLC1yny|Qr|RhA0*ir=WOhZehHw`e1@P-&>Ybb$$|)z|(d2goHFluBKMxe%A&^;Sk<&dl^F<@$cr9AQNJX*` z0;aoN)Va4S&eUb;)Sh4N$yts7#b5D0Ix{XMR)Z{&Q0GyiH@ddgYkdfR3NP)l`&CKC zdu3o%Tr#K?xUbu7LawcPcL*Q5)wDG1C0TeSrzoZzmhI3i=rAvJ*3d-X>9a1c*6J+6 zzooh%RHHn;RZ+Ex$Dl~k!y zR43XU>%B0Oodu7VUU#HDztC};MEZ7dcE0(qtwLzfZ7V04HvM%^!w*B%LH&ro@wYY!gxhXyhT+Kk-y54t8G4&9zjy^rSU zcx|mC*sQcT$XJ%%*C(x*?uxpP&LN*_c@z0P)ld5S`v(km&L+sG2NmwsPg7Po%XXU| zOsF3pvnD9V1uF3K7Y9XI->|BE4?RHf>&4JZki_pltPe_+>o>XPKmqF})HU1iu=1V^ z4B>P054{$C*JjhHKYa_H*TQ>hEgs#Icc=l~-=LrC%7jO;X$)v@fe3{Omx3>sf9O@? zHexDj3RX@B^bHUz4o~9T;@eXC+!&(^OzE>2ownp8kSn(j!oEmV&eK{e!cUO!UHVye~ZsQ2KoS@%94a7U{s~oJl{{ z6f3lFOb{0+R&#CsS=VpktmRwUZTfZ1n0und%r&0#ybXRjsFmo!2yKV1(k#SGK3yEQ zE1oD{4haXj%$*I+LY@xZ`$%rF?3-zSW}Lz#iHmZ0RbD^nU2|SmQq9!R&11Ojp$*k$ zS`EtT@x^0Lo4b|ZYFr~Fa66|@F1N|}rGkjNFzRA_MddC#<>vkTnVD}z@4+D~pRWp^ zi|zi$W^K|lf?bS@3a#fKrAMa><8N2XY%Y%4vNo@NT^Nrv*;aGN-J+|hUJrMuD^gS4wEw3gX$E zmxK*kmy6j@t<+oCZuBV(Gm*iWpZsE*Hz}To*3w!jLO^V8gi`z*Ut55&-%iDCQ+Amf zX#)1Tdp6lCcf9R~!YuDNOjXh@iz2Zbmjh&)fxE}PRvYc9TTY?Kv!?oJ+w}j?^_5X^ ztx2>IAV^4X4HDelHAvIJ-66QUI|O$K5Zr>hGz7Qc?(XjH4)5gN`{upL%=~QDS*x3K z>QwE$Ygc_Ddi{O~P;ajorVG6C zIQ4sy@20D)SPhv|jR#CkJ$_dVlbLeJtY(Eh1UZ710-Z{M4{_lSIi%I)?V1h;|mBDJscz2q9 z^I|k2PI#uSzEU)WbE9ms8^{=QW7r){W=XTT;U|sh7gC>oMEt5(CWIJ}zUCxY%w! z@w*wlBA%u)I518wI0qZQ9bJ{!%+i2{X$0}YX`leShIh0rHHvlRi# z2Q~Y6z@OLA%IKJ`w=@0L>t=X>d3eU@n$F|e^FC~7fOzxW-m)@z-?xVWP5$e6=J60D z>TAZ%$uT=|x6Bk+q?bgWpn_f?2dTZPEAG$CIp+!&MOhttM6A8}3UV=ccKiC+P2Hcs z5VyHCRhdg4wg>WhaLQj5hKObtVC0EKslKF>;&nDC-g>HpcqlPfNXxQl$RnH3^UnDF z`Vnq;K9q~n@ub4PO{eC&dijG3c?Ie-lKAyYGUA0J5t9W z7T-iJZ?sCsys2I~mBK|!jwuYOKi$KyH(Ierb&uKPjK#wmiB4?^ z;t@Kb&k`R>gh6>E}6kn{MQ}#rXbc9`|fgbM#1CZLE3h1 zB(8-v87is$N{!7gdwApa8ckg|DXDGccz-L(_Vp~kUV#YYv?r-lWrhzrB4ABRM_4Tw zb-!gxLa&vgOOjQ^Z>Ctu9MK0&T?HXdBv(b`D-;BB;4@+y^hp?22x&b z>n#)jH(zTr`oNj5(wvgX%RvIwqVRP3O?))r1vNq2m;6EbnpI4Odm8>N2yXSmV@Ew4lOX-O( zdja^G2EXh{<<@>cz!>NZ$DKxJ`vR$|YXA0;KeO#@JGeidURF3|L+|p4`30xl*2Hy- z?JY_ijb?UPKZ5OP-U1!U$k0hbR-_E|t>@DjwlB<8Y+=Ykw9kya$+%uyNAKNfJnbWD zJe}6Wv`ed;dlIXYkpCIFgPvU%DtVaf2laF&1FDCq5^i9LT3*7k43Zg^KwM?gA*0a8 zkj?wOe!s57;u3T{YjPgOXm}z91|*#V1kuh|IVxro!m?{&r1u2g|3&aE78rkN`d)wrjO-NtJUJ&lgeGzJiH?Ch;+xPpv7LJU79~Glfw1G z%gX)nN|9r*SoQqUQRpM5)yl9h0y<6hH5`0>V4WS$uj4nAoE#h(uMw6qL%X`d9-5O` zOd3OC>h`zSlZgKgSK7l=+rLvMkQN9{;b1|D6_mt8ecv6X(dWM}n;MTz>xF(E&t|xR zM}-N*!h|`8E$PZ!#W_;Mi&BaNVMbm3+8pJ7YZzyrxp1K?Kts?b^WAK#d?W4`>^DL- zZ%ykt%LkJ3AYLB(0u9mHIzw7@;*&K}9b9&N12C^aO{L<`z<_|v1U*WB+Kf)wF( zmmA%(8en>(Nrn10)?U2RAgpm5q=I4?1$wHo?dVlz(~2y1QLF3S@1vqq6Tb6nc{b&L zA}vxaZQf7)46Q9xpgN>2#Dg{d8Zd=SV5lnyq3UL>(JsSjA%lhkH^$Ui%p?!*GCYFQkN)VWjg;Gd9|+@gmLqM zZ}FhjadCS=j9)C~O zbV2^rsl#O0M=W?;GW(j|ILYNM1a2qu?x?IczmT9sgDs@y;*~; zS@=^~{##dBj#9)QHMD4b(qLmI=b&brAb46^eJQ*w+@>M%zx5?=;Ac6R68? zFx%>+lf4zqrmz>|p6e<<$B-S&koQJGK~RFrQ=lNGFN+F3msM$+>A<@+sN>h&bcK73 zR0|n3x?jq{C732b6*knCv+o+gcXlSFzQt*FfApbJG+W(m=?j8_v9vaueYGy~9Mr5c zV`;hgosf;)EuqlcDd_IP9hYT2zW`yWJq)N<*pc%I&$~=CNGKz+luoiY^qq$sxSLdIh7* zjsW;&i}~u6n^R)=WCaKipOKu74`*fr@Vmm}q9V%P!$XAEpy9T< zqFHEi>iz+0m@Cx;f2YWJ`-;ha=aaj>u4&p?<>#&@*XwcouT|FG+o{|&vO9TgT>N!} zanB;-*{XIN@v>=Md8{jd{lR9fLl(pw>CQ9jQJ+?o8Kqclwd|wU-Ep0->|+iT`y!wPpW?jFlRw z#FSd?xpf`Fi7&6&g_YhT;A(Ct(2XfX&|8qP1+jES*?JW_T_%d$O^PeY;$vheqCAV? zk4bq68&^%Z6M3Aymo0VmY^SK=R}vv50?Z3=x?bf!P>_Spp`szip@~dJx>mI023C0d zEN1-5^tT0>EYXBY!4bfZ!t4-(x>a8eo{f@!sC9e#^)31q3WvQove!#u1kB6Ok~eq0 zRid7+8QFAdKytY^irD08&%UPfC5BSq;0RA$=EUoH{6kpi?dc(IblwfJlZrp z>x^aHSd3@BgRlmR&onm(!EvG>9%JwyP(alUl9HBvhl$4uSaKY&&Uxy0j~& zA2Ww8WMPPn+x2>%KCIk*>i#X8rApBWrs4p)1CFQX<|~7C8(yBR-Q!+F+^Jvl_r)2Z zr9;FWY$i*uNX6q&j?%%rOU(|brC%C|4u-?GO4ILw6#M1=O8dFFX`xtb%QifuPBHZ3 z>;FG4+bD9F{5M;viBC8-LR1B1RM}%&N~DnPyWjKDOq=w{nIoAL5^eUe*`Kz;KtUyv zza8qjQp>`cFDh<%*K_?S=Elml+)_rbv%D!w6%pMysqm#}#;-i6(H87`Yj9KW8A<`5 zF-3xcFht+1VM#<&u%_wWh<(N5(eLHOK&xkA0!kN{=Wd@s32hmf^HTxQ!qt_El~>wy978Fr2un>K2wDVIsVd5*+b$>=aO?zb0g23D zI(NOyed>66Tp%kRo2hJRAe8@lQ6QJoI!+#Gvp*$S*)gA@8bq3D=XFSE9&OY z^gdcxQpjfeJXB`Q5763XW})8;m1)rpInF?L@ug?q;W^G{%S--vvo2#U3^ zy~K9BdyVO6N-An?BH|`VH}*4{D36N!Gm&f#>z(P|=eoW+`U;9VZwaI#F`RoVb)AMj zMaz}@L6buocFMlHTb8bAZnK}bdV}{}0YQy-yfPn^PzVLgiyLWvQV=2WLY-|6hI+U; z=93W%6r;ew>53v1H46alQE;QMh z901oUkLDLtuT1K7DNNWn#4cRkX0(GDbSF4G78O=Ei{g{$)H2oa(W?bJsbF2s)h3

L*D9^8fcP{Kf76=B>Z;R%R%WTeCu028I5E+{&QJfGA_@co6FV zgaGcNiK3pOVMCr}fr7YiB%qljp}-K-@_dk1$=0xoDQHT$?R#WZm~P^fK7h9G2o+7N} zQe$s~WIfcf-L~~^$gS6h;#n@c^2@&Qnlde$M7XgHO1$;4$Lr-4nFajWidjN~z71z! z^UJFl^>HJrT`}<&FFw5x6&6q!O6%-J%^P%GS7jkc2eag{cy+zSPV6aW6M2hElz733 znO9{xE}#{US*qC-Fsvon;-WQQg#qA&SD1^2gGuP={9Za%&wTB^Z{AxU%)lMYRDt=- z!@eJyn~&z4mrFkmj#Y=xSkJ*Ejk7KUNP$l`!aQ7!}+RNF!wwg+Se*< z*;1)uPLa2_x9bzT%m>EAT3OR}13iHGgzCX(rp+H7GOWck$wUCTzpv{`KFH)Fde)*+ z`l{Z))zey<$a?ST^8CbGs`ZS!O@FgGvYO?5L$S7ePMVrBk}E0RMU7cpQW8Z3vTbx} zk8r%d#*U~wE;$dwEnTqCKs-_MvOcB$AnIp?JfL%XLUfimcO zES`0mT?5_J>AEQ1!w)Ma9PnuxLF zUuR<6o3ohBVL3iD@cW{FscfN?PJ0NAA(z(k-xeD@*7DS>x8L(SL+`5?wsGv@#`Zaa z{`iak1-W$kB)BeiN{I7cCF9>-6C{2(fPr~_QJzEp^8y>+uZ#jTk#aalKbLfIg9nKu zEdX=`SSlq7x?4xp>S3NzsmWyudJrKP+ovXvy-i}xofbxDrXry|SL#{NG z5K3Y(LF1FlNf4s!v|+gdh3c;PjZ$35f6SPX-6P)=xF-1FF#k>h>$*&TY@kZm@12x^ zJ&_jOiCV4pJJfVV2Ci!%vS{;(Z0PxI>cKi^-zoQJ#?2*Z5Z;AH-HYp#Yf2JB-O`}k zq|(a^LD_c3n<(bTy9+@xG75;vf75fW<4mFMR{C>-YCE`SONue&}t} z)jQH1rFLcdt1%)&l-BMO*Z2^rl}bl#k>~2ZHG8!zt#liL$gn-Ph?wIh;zQ|`%QPaW zmCM#6d*w+g4f%dpSv^cuRTeHz%stK3V!XTBBgQ9_v?bOvQL8l|@UV=M*@9V?SYG$t4qEfN!k|xkN#lLCbi5Rf zRBWKy0E)fRb;$6g%7Z{-a!Sj{|+bDuwSBdbX6jV z&42vk0>UAr5j5|!7s?V~?4FMDhjgB$*o;Od9tYea-Fy>%xcZkaU|+NtB1!h`%N~aFE^5ZjDO;Vow5`$;3itxfQkH0TMSV|p<@VjAmcZ#}}g4@NW@8pN@ae}!} z5`-d95H>bPYljkok{JSik1!9nNpL%OpJ?q?zcK+S4h&tTzyyc`(BhpbVp*!|T@nji zBJvA2tLtw#zQf+1(E|ckS2(t}&$E0;(SYjpHoX>u>4l2fLmyR(vwLLCNh~Xlte2vGtT;O3?C&(oZM|kJ#bVAg+#)8EWZA zV4c>$)wBp!6T8Oi#N7e^afIz*HvDj&}l-18f#}1jsI&ec3?r& z>voYarmaj<>g|Ite$2t&;+~>K*V_kXCgYROu@s{VZvEEgQMIx6e#%I0_d}W!h4NbR z7Zx043d`%8{+_P`J}VCLI22CIR2#eogsrSy)HIm(VGZXywZ8b}*VM{1$nTV!lr$F* z;8Chgf)(8^4nlpw5wQ1u)3pZUt}~d_g?nuFH=nbXi@2*Tg;xdr(H{kcgk%RG-`mI( zg?(n|X(}9$?@?CPwpWGTm_0Rp>ssM#Nqb!oip^)q;Oq@F|7p2W8M>b7*Ydejpr8;tytIc{Q)R{g7!BysU=G~3NjL78$V z4rTBWod2gVSY1^31;3xe*hew4WtMoMma`uOXfUa(j%T2Yc;azVlfetXrb{>_I;{{} zgMO?_);x&iV^^)sd2Vh|T+qn`w$*o|^FVgNA`aJnu(7cb3t{JVde zm8e>_*j>|^PDQvzxu54sGU_Z)RQg$;U7Ey7!s*I0%I+=ka?eEVRjUal{Ip|p*k{EM zv0^>o$=4#V-yQ26F?W0f%-hyfno_G)M@>099aM1u8~WT9sgk|%>h>I0N~hvzD)%+r z&8^AZX-`q$J7>_|9o9R`n?z>w$Ox{;uZ_!Q@z#Z|h{z3M9 z|DP=ig%RlePaV?y=o7;_QmcT-`%^cnlM?waCix#dhSL}Tqpfc{SX!jj4&vppBkjc4B`$jT%?kxQ4t+hti?rU_^0-2MBWXsYqIXi|NC7S05h@ zQJGHrwW~z=lnQ*J-pAkBrY#g@I&LCg$3r5OWpt;a#p52s5;>pKM}?0v(%fV(;k<>- zsuLDt_Hx+&Tqb9XUS!%-YW**8Y9cPFOpBah_=XwBblS`>xyBNXw`z+SZ5!kdkXF9o zG|pU%6iXkF)CKs-{w*&JDC$=$ZfTfo+12jNy;3v zYBV%piQ!;2&>WLz>`snJx0R0_K6`y2s$X6qSJDa5SGCruAdQApi7t-^oa^_dK<7z+ zU}IVD8P(gF%BtxbLR9ln$*@DixqXybHNxb1?}_YRIWqDu#q8f6%_j+D0OU(V<+%j^ z+uvUxf8K%Wkd?i)x_=g+A9!2z^a;JUUJ{0EOhH&u@f{W_k>q-mEvn9Qj*c^mEoGRu zZ5@q5>AQHcbhWFPZ{cwARWY_-KVby3t)xk~7n!qhrmworDKA)*|019c?hR_>R}#pX z5&PY*xa{l5a7g8|fl_5M!YG~DUU0eC<$lphuf>!OECZc>9r13hhSbMbi|C@?=#c|Z zhJ;o+kG1*+aIn4QkDFgZj(@!*yXt#!Jl@R$iYi-fY>nxb{x-@*%%rMrc6SP5GxVA1 zm%ySHQzH_LqOVCLu3sND@H$*jD*NFnFg-I0ds=aqQS{#25(K)30Zf!d; zjfeoCj}@Hti*UGhcnq4Lhubrp`kg%Y}pw|!j74VMuzFE*CFF1Mr!_%mHg>hD}7(0B|b zKzPc^lM@e~)LSZI2JvsbJ^9+=5o%SMzTheA_eNs_%=Fa^!aJ+wCZpy3Ru1PK1|{dx z)}n99V+>?=&vr&pWRf@;1d&^d3ne9jU;B?O+F@HCa1;yF5F*ofFd|SBTQJ*RvWB_i z022Ol3VG zRJptAndDZ`CK^0G-?7E+NC4yxD9D{h!08E3uatH8%2VBoS|vzt(*2}`CR_TpNp7rm zO9t4UuYG@0Ed|bYoH;!4tCK`|oTe#Nm+kMb-^8S-rZ1~);$^eapw`%WdVDidwERhN z{wya{7UHfN>+$UEyVKcGsn+|!8#sw2mB#_+9P=RNXhYCzwD+)_(AcJbm)ygP+m@%s4#f9_JzoP@12TWwp&9s)f{VRDo zQ;kYfP9~#15~uO_IfCxZJWRbd z@^HW4gGNC6wZ@P6YiLTuikZe3PPgu4=6C@+VZl;?7+jf1igZqnF=AWjE_}2tHBTk#w$=e~M<-JMh@A3J3#4pwE*jzqlUMm6h zPCAt@)MCCyZ@JmG-r(wJ5)U~3VhoKm;sY;!GMiPLm8dd4qdCP~4LUgTklh;g>floD zLNuC!u`%)M6qSdof2y+Z6IGru6Y!FJc#^oFL$Nm^yLJ$4M5Ued}WWHli6r7 z=n{P8NX7HFm{?AZCl}qW+Uz!QH0o_Z^$iRoGSfY|JU-a;3tvRhqdrE4QnG`D@J8&FxXS?< zU>}(A@X&qS<3enL;$Sa_zgU^ztJc`ZEu!!fOI6_DiWXNU+ZwqO-`O9bYj46T z45*X^bF~=;=UZ6(j@gBWLyHahstwJ3_^Ry@CAhM+GCHdafU{gE)is=zSQ01n0%(on z&D6ZFXZn~%R7f!iNc&`j~27TG$$r+ z=m#+MetOwrB^ZBYp{`dp{Za=_aS4dLxEyH6!}(&~@TU(vw!&4BqNrUNCNUyZ-0U>! z5ES{mL7A$OFLfV=a>NS`x{g{+6$`anA7%^rwuiJn?K|QrUL`PKq27r_Ra8w~UdMh~ zbxMtE$w&=dnRnbVKjTigHk-~x7kNiu%Hfwe)B+YBRUTZm*gNiQyhUIPd}NjYzdr;! za;pnfHV<{4A96dkLVI~Ng=3mm7_|2^D-@}!14RMA5(04pi8+OQ^coYg^0h`1D;@sw7V}O%$G7Kq5E{jqT1(cH50kP= z;c(6XBX^hFpR#J3fkS1Dud%(A16HJp%zP;ix*2>^a?25)AA*$3CaI=z!b^)*Jeb<@ z!fu-$!as6c04r-97wtZ};w06MMu5ZbSPXT;Wa!Cj5K@BYuF7n+vPk-&}n(ddD5>aUN%Z#b+7@fF>TX zgDUEmh1J0PYgQXQ9hI9sE=S;c_dEEk#QJJ%o;%eXUo%sWi@IXyU#lDD>?09liz>p-Wu0&X_!D z|8cj&C2s&v6wckG_=j*79_ar+E%RSqk>4fa5-vT<-%s7wnpDdRwUHv}0alEUqio5JXw~_(BwxO8pKkPKTEmLP z%YhA9$!tUNgT9q5tJurZ@*$_vGa>6XI2?vNdAJpRv!eqYiYK5|nZkJ$DHP>|E*0T$ z_ylZ8CTp)bLw=1eWDQjL`4Ii>0^t2AT|38awVbJ+c-F>NcUtkxIY`RzG-y)2rRj6y9JHglPBW0kiC9`?ij(AUiIUU5e%Mgc#>Ry5-}#bF{BOQ=RFxHr`o{%GPH( z2+u#N%^1-cUol(X!wnBH0-`^YWX9~)kZZ936S}wUPG0+w@ntfCvP-fy|xM+ zAB;bGOGF%$%G!FSVYLy5&LqR5sl-eKanEizkQ6@xF|G~fog+1z3 zhd*Z0rx>r}g5LF$N9;6_qC}jXh6Sq=j|E2dj=Q9fe@>^U31j)$PmJ5+p}d3uKqmqKH${)y{N`A7I})iB;mKSBl}bn6=8EH=CuI1O z-@)wYIe^hEONo9QP?KrcYIB3W_JD=fJpCQVL?ACJfImE z%@a6ca7kb0Z)hjFT4;7CKt9I|Zyd!+CozcG$>OzK^J`|YWB)ouvkebs$%)Z z7iqdw70N6?gSugwvlG@+R0UJf&G%a?u7W^(i;C2Kpv5}l|0Tyc32&mo*&YDzdc{t1 zmZICXAQk@LMQeLtVKv~X(!L%lp}LPR(~|K6VN8l4qjQ*qcOUZLtn{o3$>vIkl1N5R zBoviPif^FFB7aQJt`9}n8crrnp=X2BWiM4nBnKbzbG0ER=zQRG{lI3ilzBaYBD#n7 z7XF{Ie>rt)XPDv2dTMiZ$Ymc;Q&zU5T!e%#VE$X?Y|)}$l>z*Mom%RX*UcGr@lq|i zBVU^X3^hG2W`bwRO=3#TQyGaHr3o-()sTorXAw5u50+wDfpK&hm))t-l@5tPJaln%$ek(WX+p;|B_ zcj_OPQDgt4{eKr4?^-ax&25g8MnnF*AZQ*+BKT8$dVbD5?HxZElw`+K|Fb2?09T)gF8?*Xx;@IexE6dLxc z*SaEFN-BXvhvsa%-)si0v_7tfGLuM?xw$LPCY6erL$nhTIMBkKZfjjy8j)0HS9Zt37dY4mULVc25n6R0RI3(Q=@bWw z48%4k9pb#|UMH&crCOIXS1LK9DBfr^j5nTks-_n~z1p(7JjxVOztEjxD}21UgVBM{ zZ;MpTHmjgMhSH%#qxG&o{;A+>8z$;xluVuN1K*CnzUX9|3vJZ#+v{GMXj8n^4{9QI z$D##ZHjY+SYAtFvcmo~-!MRl7=u3?@?RN=GCYj~QapkrDhzW(#JKMwg%jJ}Si}kgq z)_d%U_x=dzO1Hg;fb=Zn+eN!U=E2G2MgOxII8(~sQ&4GVA_EV#-23es0s5pUewVY*ODKn#RJd(DMn6q(tVtiy)^H(-r>P2h$IjAN$)q$5> z=fgB9^^{YO#xy*FhE;@D-L9{ErpiCpX}IYn!a{Tq9vD6Cr?{UQkGTojZ@uqZ`BaYA zubNVsh>(68SMAA#lYV`O+|5_446sg#FZ-esa&Hd>^FMXox_JF zkr^Qo>Px+!%Q@!SX%_jxeF;}RFsTzpG`O5ws8W!`5N)b}pj$aVtMj~+nk1`6ui5NF zHkftA|1gxs6(BJ+MHRp^XEwSfm@*h0b4fLyI_^B)hpWZ(pqU6QFV3m< zS1i37H{7q%-N^}p!Vo1aWGMkUW6@HGati)5OL!w7{MMPFe z5EsflHEX34NfiA#F>6nEs;5geeIt)uR2UiKrcN*6RFT|*G|x3S?O?G*x>O=T!4LOc zh>u8WYb;H^0T*wq9E&5sya097-S6SigUN46#`^ z)KrkH`#d%SSpl9?5xPsP)-b0XMjyRqZC}w;F=%I7#v|ROlJ^_XI0xJvI@f7UGp(8V zVkT!Dr^aKo8GwETs7-DB3^F>XifzO}hYJv5n9B7fkjY=QRrJHH;?Z)6nexT)yu9UD zJUHw_XiK%_BRCijnH458%-1~V*#=+YPI?p zhu)n2M3Fq0BRgZ|VdJ#DCyq37wU7)#evrStf!=D))+<&-uQ(gi&_}X_e=Z796 zsPXUm5bzeCBE@E*LsX@~(icd3ZzBn(a%>G~)EK}^np#U07H?9&vK|l_rMqMg`q5fdfUqFm#zeKfT6caIO_>v=~^Hv)~~*$aw7*#V7=``t9L- zy50g@Nra5>1~v~c5>8s%Dzsd75nDVfD44W-a27xhpI*4100eU9bUr$bR9|^)cWps; z7b1b;6qx@7Qu2RGCiz6->wiMnPf!zoffQ!s9k1eKG*~t&J6u$vLFVUywt{5c!ys3q z)ta}CO(nCtqMr$`px%XGyc=zukAOI`5(PC89Qa=HStjx1e_Z|D8)<9U!dSLB(x@v3 z%($^5lq@W0y)i|^Zf8NB%4M4E@WYcc`&GjSm%<_|9ps7YjCfV1h09-8M$uBmILt&S z!iIfj6w~@;swGS&S#R1hSuu1nmGlbCwxzIUwA>XQRx;Wl9^00Bi&-TlemCxy#Otd)?L)VoYIgK@G$LAhg%m)r2dDM<4}Fk82H5x;&ITChBP^!_zAMb62%Is z&VW3oyML7mukK8J3cQ(@v+sZrpkDcGnT)%W)7L|3m#rx-2oB3SRDl$LaV8cgP*r~h zs5WhhwXju2$1gdYZ@!f;evb;3^3`f)*~~S*=Cl)}7}_lUFmqkV6U+xD;&&l%Q5An! z{0TDCXtWnS!YRO{__?L}55;ASY}A!-FGH8qSW52`{&T|UFCOw~0kBuk)H)3scm8=n zhvq9m&V*n!mum>|`IV)vbeL&cp%l0%kSy6&Ii6`1JpKw}4d{Rj7bXJ!Fa# zwN2m4LIojk?JA_?EU1WX(##voJn-9m#1yqMZZvGYJ%k5z_?1@8EJ-VTW||tRvpzk! z7w(omKeF$!o90%C!_>slGFV=~0(`PcSPB^(`K&b@gvA>6j*6Bn^L)_Ss5^BUoxo`* zs}YC0i^n4AXYGm`PB2QrbA_FNPF;GD@Qs@IeTDGQaVtWz5ldj9S{B<8zw?jTp<@fr z$k4Gie#~~gD;~x1Q{p!|W-*cm+X0O0&%f>vas4e8X7ZGsE^eBV%O9^&8}b$Nnlmk) zQdIN541#oGsDL(@0OdBk9IK#<@Nvm+>j`p})8SG(QPDfpQc@z1Z10~5p$VcM!n$iE z^G@13C$cMTTMq{f)DC+S&cBW#8?Jpad2xz=N~N6-&wG0`eB+cSt5cOt<1m3&DpAcV zPgKWd`9T2Kkn-I`b$QMy9SO-acmS6!@nYUdB%lT6NjuX)SA(1hy;^r{Qez@Tomu)!!28(lP7weOEg z@@RFoo2{YS^hg&gd=Yru8L;Kr=5I}jU;W$nLsGxy!hWY+c*aNdUeY5PF_z$p%0)*t zF(q-LuM!b$XVoy2kFwSC0oAf0y)=?9X8+qi040)3cN+CC&7?s8=lR8T%h#`8?MC^m z+|y5hwuK@~KX5XY&VgSr#-gh}l$85N4T(;WV!-ZElUx3(+AzT0*exbtfrW=C@b|Ss z8kikVONZ0rbppegsAkW_}%aPi|-ZwFT>vH+j%QM>MCVQ3e|2N!yiv%^Aqm8w1u~hq2Y8nZT{a# z$iHFRubjU<&62_CBK8M8o~x9$fQzP!jhI{=okr#bYFDJUuP-kOxfo5#Mr(WbO%s^@ z!DBrDXzhwTJy=(Fv#eUdyPH2X2$u1^X4x+LQi8KJl!iZRpgqU(l;Rn?#gTHJA0;GP zDS~*V!5Q2anND>%%+EfW<;0KWwv@N`8Rp-S9a2AR0)}^K8#F#TD^83B%$o_DgbYqS zgB}t(pJ)%sgMLDqNow&u3k?V$xKi0jk2=A_xAi|Bmk9lH8*i*x+Xz!Ht%2^ zbW5jYsyQfUA&aylP%{Ble>xg;F1s6_?ms?skq}BM9>1GKbQTB$ICh+KUO*HFDS~WqIuoa zJ$`%gH1Zjin%v&xjcuYG#ufiGU;41#9Wn8NY2(uDVwW9>Z>hQ7?T18x+|EdIxXf2_ z0%38aWWcrxb#reT0^I_Px{PL#YmFPMRAeru3qZ(?npOe~LZu2B6>wuZ#Ge!B)KcK3P z>g+G+uPO;1x9smwRpj{ZYo41|-#fNV>{abm^*!eQ(Vo|(ehGp7o^Je*ZrRI)oUag` zyTC?_+JO;|pqmriPON&n>EVB; zK5)eq45yk3IG5qIL<&L{Fkkp2RKybhYxC=QTTLUs*R2ok!Qnw3;2BPywnA(7c)UWl zT54<$#cXwcxcSIky`9}lTuBU2kC^M7B}5NbN9AInFRMhQfc8(s$gM^JFt9VSL0ceI zp7q2uFOjnn(4D<;1w11}EEmib`n}nq8ha}WA|LJ5>)a{(VrjE_gV?Qg4%WM+TZyIT z^!r2153~xui;XTV+#)bIHQ%(RjkXS&PlQOB7&>Jn#}USHvzw|A$de~VL=PNudk!ax z@w#4i1InbIrG`eM%Q)BIK83^D^f{n=XX|H*$m0AXF&f(zXkdq849L!Gpg$#=EN4_w zty!33c5Dr%s$)7RHe(hH98d30Tde?xN#5L?#^_INYRz2F^(&VlJ*<%er=A9n=MAYT z+|Z}f=mmsvF%~P{UD(=X_w!KMZWglIuMZ@xM$DB~g+=sc;dnjVL@lyY`Ym4uq+UC_ zWGsLE7+|vlZaD78hSZXc%s#g({I9PA z-xVH2-MR_*sGv(R{SQqD2S}dOFylYI;7KHNsV?*stNF}0m?>o{z>yDnsv!ag@U{c0 zT2)fJL78j`*VbUv^AC$gZm+F7w@5w?+k=EQ#zJUFiF#GX4syE$AF2{fh#~+WB7YnWHpW-`d7koN=)ET8e^qZ}c4Vx#c-Otqec3%S^9R>f0tNc6Pq51p}huI_z)C6or7E9ABrm zFQWN}$DKwIwUQjgElk+yAV9NR!Re#K zne@^(KpU{XHU)!mdF}Qn#`47TYCze)I;{cS0aGL>tj9UPBZ!=xgy za!Y4)ty4}hzS~iNMk_lmK?{P+OvNL(?*knQ(HSRWZX5)*K-og6)tKsdzG3EdfjSnW zn{-&Rq*d2G$X~^{|MIr}`30-l@264aJmj2k$qZBZoUUplVIgJT{oNo>ylr7G) zrtjKBmF_qp;geQ7%xgK&XH&yu!z2Hjz!CHsDlGWZ<8evbZ3A^XRJl1lmyRLJrbFFl z=GT9ru`+iYT4Z)yz141&8%B(1DOSEsE`eg?e}oP0HSGeU#^3srvmK?LC9xVf5^N7D z6BLx0%^_RzaxTLn0&1nn4@j0sCeW@aFm)V?td;JIz+jg+6+kkhk;`56bn#ezdcCtK z?c&bq^`ge4J?V3(Zr)&mKWTqlXl_-|;0&W?7uksGS7R}xWD;53V$Jnn6sfuAQ$6!9 zOOAUNstMpoR%++E){UBQw+^mqM;; z34^M#Iby7yificSp666sKQhJcAa$Hw#?mWegklbY&-8nVDm6~tJS!GXO@QXu*hc0Q zsb%UFMD^&%=F=cMO>~nQ4#<(9p}*f|WZ8;8Eta4DmY0ZrKVo^+3gef6{a$vFurde= zqVb8#ZVt`=TTIK$qSr5Njg#x?%Pn#$7rN&d=p70YT-xHDDl7iG3xe#5!0unta%<#N zy{KM>Tu+g`zqNv)g+^6*2`p|gh}6{pP8sUFsK;#8UG%gJ&DbLZbtMBU^R8Rtr9GfF)vYGynB|zsk|&oY749 zA65y%Db$|7n%dJQr7;Mf=*zuaNsBiqDfP0-ExU>0rfgCu2Y>X@(vB6Y{9x~UE-N#| z@BLxaia1*Cm3pR>#1pbz+25nWElgl$qW-;`7q}VWZqqbMr86j_l$*G2H#>}licX5c z*ndgOSFIim4x_r8Y$az6PRI7$cvXf(b4fn=0YI4aUoRDb*)I;B8uq-CgF<=gvl~;v zA)3|p$SLCS(aD2Uziso1X4Ctbue}V{*7L5saF=NzbX-;E4xRp>{yZ zCXM04)(LYX!7+IwO-5Rk#5|D%>^QVZY;e;?f@-h5%7?wQo$rrI^#2cMPaPFy^R*(V zfC7?A2!enhAt}v*Qi34eASKe>B`BeSDBXyFbjQ-&jdV+QcYm`s@ap-U?+-oCvd`?? zxpQmo+>n_3Uz9=;e5Gp9meOYqp+~dVWH3Oe^IIj%`Dwxz-3DxT97*2SuvzY$ThKR% zNu(+LaoyEhp?Hl`QeiY7*LNt6!VR}GjkdsiS6Z!bwe@kQL7SW!161O>r&c0w9jC%6 zLF#eNan03v4bX@kNmV5n`t;URlVW_Pc8eb(h{;wT*czg`UQI8}e&eL?6rx1*^bvdk zc{=yaVkD0bDZLpUu{94vwqH7=57#!U>oHWT*RR$^s_SP|D_9drCbu3iAjcWDWUM4J zDk2s3;>j_qpM&>|QXUeqpp+J26?cAuPCS zx}huRzSFf5`l)vRyyxaSD?I$meZVMEuX}&30J@j;O`o771Zbxc1z1gdY6PKTQM|jE zA=?DoZm02Hf6%@wc138-?IBZO#4{aV!T_dR-{i?QkG(j+= zkXzO(Ug_H7M~EXcnBX}`0HQ?&{ptNdETYm=j;Aa4z=c2Y7XjSoJ-N+0vvDpG(vkb$ zFGBYUirTWpNkqc#)y!bdwb%z6{hpEvc@h%nFXy9-*TX%v$uzGPm0Ef&F5zKeX+n{ARMdRcE( zNMrW}hl_5zI-mGwDGC-gY=M?A%gZ*0*kXO{Qc%MYE%llq5w!ifWaSkPUklwHerLE{ z`G>>!pwtmV1h>=cMp>Hi0~_w4Qq%>m*Yy)1Q_&0WJ}(M8JIL!pI^fZ0JKXQh*F$Xy z^9tb(I5@9;SYYua-uM>i2XYD2U|Ct{Lmte>dj;aJ)VEWMv5U78WmDY7w)ozYiy8Lz z_m5$`t+D&S`HIVG39coR(`_1eqAo(2Jd;|y;#x4h{PXyg;9s zC{|T&oZjR81yvAjudY6ZDd-n;?5-~NsJyBg926fu+6sS0)s-@G{l4Ka;}kl1e_md= zfMkndAc1_AR&`*j>ibum6HaxXE{_1Oy#8?J&~v};6@4Fp@;l>7{5XA;{cI#$j#sqW zpK)g_PdEk7Y+YnF8g>(limYI`y?fS;{o^920i^D2T^g&$sErmHcjLa2Yz~ssOtzOe zJLy>*nuCaB=w9$wQ!3krt&4McPqbP-Mu={^E~VwE6 z@>oTg^M$=AhXEPWtP=b3RzgFA{`q;SZ`Pum%Q>9!ecMPWzVpS$U$5zQCZ@dEdCH;< z!dm6>j8Wd|s~PpiBo&ISmwmyy`^w;N0s)}cgBupnYf0@yDTl@mJ&Z+qAzr9LG9;u+ zBTadi##R#{B=0u(9rsNrP5iLT zen}|$^W(%%#K_<_H3xioWBHv71Iw>j5NPWW4}a}wAigGICLhQrJZ%xOzC(wigocOl zK~dw)aS|%ex9bX6zIH|N)`wwMoNN^&mN;DPA-B?dmLOD8xzTuPLR_=OPpE5Q(GCR! zyh!wY_d>RQw*K$96I2G^wO>liX zI}jZKw}pOh4u{P``cgK_uK!SuHtoeLxS)nyz(-!zK9h(hicA+dnun7vQZ`7AyXabX zrF=pwq=(Zjv4BlPxwkRXhjYmrU%WTw%?y0v%F*Xr>RC_QBZHWL6*^JcMt0wY1lPTJxVQ*65Lqx=Oju4 zLv6aWR)3@fd3~yxH&}uctNmGN#+|Izh~l!9YthDQ8#nm4&mfuiThM@H>V?CIBaXed zQl9NaB^~;`&y4r3-I2}>iN~nQ6w>5SM@H`JjxM^sw#wtz%7an$)Aa66?oH7`_Qd1l z&4~bjN2?sxRFF7a2@j-Jc@=p#;6CI0-017>JNPd=zwyU;=Sg9lX!YI9!~jr(M`J9e zp0Y5i9Ed&Z#i3iC`iGPAIG7pUJMH>zX5xQ|--ls(3msPDBvil8{8LNS0wbx7?nnl5 z%ihk6NC0+~F2Y=VqoXe@vCsa-C}re27Xm0Sh2>yfC4bXK#_yp^w`>~klg|gO#8&!us`R-}H60mB0DLCm zZPZ<0q^^tzt(IN_0XxS`?=lJA48I5$~W#dBew1VlNzDkl!+ zt)k8`>k~^;d@KDT=UMh6RB+T5oww7OxUVAUJKi&M2UT!Mgrd?aR~hDyNoW~q2@#vlN* z>hoh?Y;92j9+0)VIld0(6adRLb)_!yzw3?`a^zV z_w8zJy}y%R{(yM5jIeKY9NZUHhu$~(Kq*Zb^At5PQ!_(`Gu>xkj!^clIIn<3-R&{*^+#qG z#(lZ)cWj!tLwK>Pa=&}prIeDl35TOa`c%;xBI1PVK`pyxyq>5$2RfbS%10wk$5ART z$v(V6X}nZ`Z+x@fmt59Uxhcv|op!AT)Vv#WNs~ZpbV8+AlHy+?qk^5Fv_)@9XAf!d z_Tq{Fx5w*;*``h{V~d4WBE6wfJ9t-&(kh8(hl?zPhrJq3f=JYj{_7O(jjW!3nUVQ6 zJDEp{_Cqu~?%qrD4uix+oq1rfp`H){P&VeA0^AI*OE0|fhiKEc;cVHJ@5?VxGlL+J z)}3$yMDw-mH6EX8vX9Q+$jP(phil_)hV{`H0xpw|bKLQfUIv{G5i#T>=lmi+=ibLe zQAq0f*E{z&Or#%nrIv1&WsH+!;WSIQ$fZa|!n~k758Urgc?wol9?})vV0wLIe7O*y@ilM)h2sKFQjMX&@qlT z_M`>PaS_aeNE3nV#cy8t8?PR|-U-g&{*ut4y{Z>D;&?s&6r-!tIln=$jXpTuqm;el zN~F~te#E&tqjgT5&qFRZ^&;t1S$E>YUR%GU?ykY&HN@np;8#@EJ zlfS{FQEzHa&V{AW=s}lu8a^pX694xJ2_RLF1s**SH1J)Ey(24g1JPfAu-JAnWNvI4 znB1U>mP{{UmgJ<%%iUR#vTlQc^6HN@67w?DbkAZGHug3~wBwax3;0C!`%A5GK$3Zi z9RH%N7h`k+Higpl0jR#z&U#%^k#~amMw_E1sP>ma zpoKaksxy7L7%6iZX{-A=li@o(D$nH_)0EpDIrxn`jOajYc651`7fbEM61$-iG~K*D ziP%HA^MKwFvyIUAGpwY;=$vp1jPRZ7&}aJdG1#o;UR;rBm_hwqbGWvNd~kSy&VJ%L z$d#v`6kR;YAb-X^tiFfvVpT-Q^PMO?BW%$A5}i$i<*`<6F=hL&TQM4-CKG+RXqG^8o7fEoQ7!4W59cwt{Ao*5(*Ho@R^A@<= zO%C$7)IZZ1izgb+nv^c^nciVevolRrfUc_F0*}scj1Zi7u7!s|wdO6a;IS&0UYiPM zGcS8zeu!mYG+e@0Pd^Eh2{qXi@v;IphAe7ZhORS85pc@RIgzfw`H%{eNtgE>N_Fz6 z#=)I(W;erky7UsFH?3O*j2{eXEtHy+t9;bp?UnPfN+L$RxT8@Vw*K83eT!zwgv=$< zyMwbt_Kk$+}r-3sQuY=ToNw!C^ZH2X`U7c3Pi(9fT~8!3fTbWHXIop?@o3_v1PP4R$^u26e)a z)bY{<6H5=2+Nm^|l4Ku%>_W^ErRTQuGQI6_`>6Qk&JQkOEy60+gh)5Ks`vi5h(U5P zql8sL-Zh;xKB};RFSSpY1ASOz!Z<7i)sDAD?qga}y=-fZLbA)eEo3q~W>dYF*OskR z-B6J+8NR(V{0?r{kNZWMVkx2q`5o8PH7<$ZR=eFX1nkY%0?tUn_QQ{8m{veVqxzUu zn0pq)|A4wUJ3F%^Z`l3Cd*e@4zECGQ|CncCyPAeWf{U33FLvFMmtsrouEC0q>&K@? zDbJ#YFy8`wva%9enr>u&--n2~M0M0-(HlPF+BkplFHQOT%=V{@`X=Q-y%d0|O0#+p z*RI7Y6?@LCX@@)Av66rE-eg#KhWc(KQbxi=o@2)|&l~J2i6Q=G;xl_pcUMb~vD%P@ zVDZZVh6by<^ElPl3v&%>Kv4*(-iPX4*DBpUIVfs9?D^sPdQM|wV zK}j)U)nN{xe(m-n9u%U?5e`nX(2yhy&;=uGUUDJ%z= zBs4ws{$jviPqx3YlztA-U5Fb96W6a^n@l&a=Z(6)HJddpm#fc8ubHmy^2J;*j3q;N z4Rv{>T%%jbUh9K$ETa`RhHLkeH@r3j0S4@#)bySD$AN`Xsh)huQy{MuZvMR*NjixA z3olj)?+;qylBZ4e4|7^PfQhNQ(oE~~vT#!ScpT<=j`q#(M(}hCyDO4!EKx!^Ra9o~ zKdn3NE9|+>o(PBXHO`fk7w%bL;CZCc_J?;Ub7PTXy`J2Zc!0J8PkaXs;k-?jvo9)s+q zLJh&|LBVE@+8wVZMzUPDjKZu8cgl0tA5Ga*wmRNe%HJNYV7U)}8I1_?;@o*S_)N-s zM$e9o{O{Z^UGm?46?=CXWp!E9!|bDuZ1f&&=!c`@i5SJHPvCfqCc0)8>Y4OvY!4xCPCPr@yXpWnA%gBGl&U=lGF{jh zoAR+85M_6lJzC`z+V5#SmXYfpQvMRrz0ylkiY$QdXf0$_^jSJd*8SbqRtb0Yg|Q=A zk+~~`PV{KkX>LY~ep@mOG9qNSOP2T;Elc)97L-Hc*K?EZBUTxDxnyWi;y+>AJJc2} z;#}zAN^m-=uTOa?v;L%~ry?;&_+pM!NeZTiGBzvKTMnOAzkuk{quZAURJ*tLmksue zv^a|MG0{AlCvz1U0d9Yy;KV~Z2^?HT*QV~(s$p9VbD#M@VKLK`BiASrnn=$3R7%KUHB8ZaukK;s0W5SgqINY+iVkEynbEZNR zVb`||VsjLo^SkLZnwf5^e9X-}fXl}-4$(iB&v@B*REyVM>V)ZbI9H__+j5-WV~EsB zL%26U@#UgymPci}$sK0lc~pmeI<_OwcAT4a*AR5N+&)s|{58p`hkI85#mqhTLa7?I zZ1fuS^A)9HZ}%1Bq&~3gO7tmtOOr>`RJd12x>$YH!)Bwmg$>S z&3X=eN=(ZKeR%k;M)p~atW=GAzY<%{5D*vBVTza*OD*IYv^ste8?Nvh`(Ai6ihX}`+2riz`?kq)K0MmFcEehx&xo0^i?6EeLYI*jWKwdPAg zyj+#r|4sq*)Ed6C4w&GH6>Kq8&|%E*1KWl+Usdhae9D8#dUAITr8Eq6D_nNwreae| zmhq;J%3-@jrioC2$82QwE2Ebl^!qgU)%#X9-#;W?=xhJLc$H$raXNe@e1@?<0!J19 zi%NpT(ng4i8?&HC&RC#01HAgBn4znHaZ%**NPaE$^h4)uA~Zz|$!8a-`nEe!V%}xVuwf)?0#*s&7Ngod~ww%6@AgR!8Cp+`zXyel3aiW>8WjYeH zG`>ApQ1ZFlhA^`bE;|IUQ!Rd$i#}J}i<~SoXD>bpg(dt{^TOlsrjR=ayZd+G*^Ehf+G0pUH?zo^ z(}eGATR+{;ydL~gCUlFF&`{;R`e&6+Ht_2yS%EUVrzzwot9kmofGphML0xCa zqA9qi#I)T|(E-{AUKuHxL09W_@xc2%eII&|W$(g0i-WLEAuQgH2_%^#bQ0yg0(Q?_ z?{{x%I+a^QL_ze!okKS^;W8cQdLf@(Wqc{FkMWtA-1V)kh(nnvV776Pjg!qe8}9eId3!8=9ol&(6(8 zVZ}4T&|Gk&Sp9j z1SaQYWtX}tpgfPn2mdg6{ay}n(~Rp4?2TinjmY&3jZBpyID6;n-x(~t_5S}e9VJ`` zN+`kScagqql&H?Voz?S#SZE%PW9u#4A^NG%k+HwoM$V%!2`kXd@aLjta9|>@By_w+ z^+t__xr}E1Zhdq`(9A?<%7Cbdmzb#Z^lE#+vGhTHd$1T#KK4t6(ilI9@w5@f4&+?n z1?!unzqSKHgaaou!=$p{Odwg2$LYPOXq9tLpRT1DIU1th88o5hdlaLhG`Ea%$5Lo6 zWsC$ZbRotx9gz6btJ{Bqd16L@ad>q-2>q*Xw4o^l)3yw33?0OPJ=c--je2wX-xSIW zUJFPLQZ)!V!bj3S*crl4?e~G{R~d{iVt?%3FPi=Ka0eaGADsAi_#fW}rm7J}bGH=0 z=zCS?uK560_dsQ-n?$Vix%NhP@F|i?iy+8527|wtJF zA?ugFO;zXd0;12)T)_xa>wI5Fmyj#*^a7=C#ImQsg>pI4rorxRzyK^`^J^zo|0l!^ zU_k+r?cd2U$GeWE&fIb{`15uYI+;d<+omW!DZ|o2@`7p zj;FX6@;6F352piW)}inJ_5x%J0bV_;RNxutQ0a)EqB?+uU2172mt!g9cr7{XoUAlM0sITL`WCC~U*jKF^Z z`0X&*IL(dlxVONZmn}wa|7ORNaEB;>;>o5I|iGbCvmbOXuhNzvaL40wAe&QU+|IPI;_m5rYp2`NRMa~7*2$i= zhgN!TL9GJ$JUklY+~58y({T1n$ePhGQsJInrMvR*KVJ0UI?oN4(eB>Z^w+-Dmx<80 zL7&x{R*T8hGn18Bfv@(K`b{SPHC`BWztljC==jl^`OhZW0PR%;&QUl1wgwo?#^ey9 z?(i#%XQu#Mj^^P-VAb;G&)+=t9v2lD?vCRv{G&T6`j7GLu$R~@ z9Lx-lA0GMK7DCT=n9emCJ}2vSYS0UBWHgV>RfLJPYk%JqqA7qe{uU0iDJs+1F;YYa zV{TK7YUgN>;7DxL9Zw2M{dA_0QBjHH>i1EBHfXOmx};MNT#8oInH(b0fJ6JJFJp*V zdztIP2HMhOBaYTyc6~0FmH%M=n&4X*gUuz-$EOZDVn1mZKVZqd>YFT|RUe!cH}o#* zXM^6Ko9G;bO@xNmF&2jU8zX0)>{Wrp=mTH}C{%_#i=I z65%i%w8yQR>q^p>eDzACW8P|M_(O?YGib%oS=$`4nSsw`FUbGy3cdHfSqHy-(eo1d?N}G2{fh`< z?{B~Moe6Z9?3eBd7cZRmw33Dl z?JSqkC!uIK7cy?pYx>=Kz#p{D_WUV#zU5(}yi-`k&THT5kycgEJ(|pH%Km^w()bKi zyhH)+{~=3$)BK7AjA*dh%XlhD%|441bT|0l)!dwuHk@n-peAn)g?`x=3*(?D*SN50 zIwzG73wnGuBuJ7^mN`2ZdV9|kfJVh1x>E*b18oj=Om5saH1|_A1(;0?l~Vgk9!W*U zZnf~#3G`x9+q<_-?dH9U zijMk2+4|a}1vJO1ouCA^^X*ZQ`GcGs_H)oV%km~rv*p8GrpEo_+V+tIv(37g2ZU`6n5+_Dm z>rLouHcecLr(-?6{+X8RqPp311IdjXf*0QSTvVV|!Kz!OPIBaq(C4OLQqXA8bbCqJ zpyh$-^b$dwi7vDc(1mSboU=(Ycd=lL!YJ&#|NDYVk z+e0d<56#-V|3ouT4pu;d*M#SMee}m_ev`uYr!oO}bX_7D>Q>Nd=&`Ok&%(@RFtm@M_V^D?Pt*y(X4jrh=hD8b2!?L@X*=>K!qfOo6$)5IZq5)BOEq! zlN9b*)qQl#b(E%1U1HT4j0qk-TBqf1uo`h(WaMW8>Ea>^Py=#sdu~?1+*N8ejS|di z=5`Z)v5Opnu5(PlW)Ar*f+#+e8(6AJjEEFL^!gCvttl?6dU^_XftANl{y3q0k+n0U zQJ(@54h8XR3{vJ+Cm2+MfxSd+`8IZ=wM9`hMo$@Wm3$Aomw7g5hQB5W@>PjZn zB}hpFF>npCs*W_EZEBz{Z833L@v7TxQ6IU;#_Mt2z1ArzhoSbX)kx8a)lUzzHHQ5H zRT9BfKN-V-^ypgG-L=iYyPyc`<6O&Vbq*^)4S}oUFDDlJEn?^1(gy2U@6_Gu%hHDC zUsx-!(9`V8jI9|JU7j<0PxDyeFnGQvE)cY2>(RM#^-mcISc&c~(0b2`S@HfMld3K_ zzry!Okt{4JlFLafdE;a>M7%!LVye(yu*~?LlC~%wdc3iYnrp0ceEtEcUI3NtNV?j~ zFv~5eTkR2`yk84GYuFSMZc79^Eohy{@F0WJ?xJ7>ht0+k0`e4_E3sa8GIdu{sX9JQ zYRf?CqMbyh6oo@mwvMmX@lo-VwO*8dpXrw(o4b0xDMia=T)|uxJ`=67kOGV4*zIr@ z6JD2>0(~_JZ;(7MdfQ$1ljc#bHP$h|r0%b=x1rMfKMRUoJhO1UY%RUgEcAs6&r^N*pfDEY1z@Z!8(&X-IXZ zLTmCs(_F!}nNfb6)yS$>6AAZJ52hs{1VyOfz9P!f2rXZPwL}A_V#|yV*4==5LZ<`6Zs~H+YaVw+dc506 zi5#ydrNe50S~@ekXm4-crw({3?i9PBh}HYoT5kCtfA6pO5dY_^Bq?FD5`x75d%MdglzKo7ED2JH_R$Byn!>!OXPs8hN|g;cs^NjOwr(DP}TdA_3YzYz0TpXA{UMD`Ip93s7s)K^XoYNIUt;d3bwqqNq#i>$=YMH;)m> zS|ixg7U9{dlxmiS?1VvF*QV`CZ+q?#L^_%&==s&+XM&NyTp@@eoMwp|-WegRj-a9; z7@m*m0Tnq(6pxOCX`zN)DyW5t4PRoBwJO~LAPcVd_Uh5f7q-!O=338NOfGOWLY0fm9NSao3 z5Hbus2AYD|!iTkGZ}4#_v;5Zbh~5?}wnDD%U9AX5ej}BV14;R;DW|FT3ORFvIl8p- z7V~D$P(Ge8FM-{o2RqbyMPQJe`r?m|FVc{1Z$Oe2?&}F=7)3?^lgq#%u*C#}3}(U% zcF&zxC>(FZ70JG+{uYyhGMtUGKWB3)CG?&9oQ0E<69fM4A?iqjH@AHplsg6Cx}pvb z|7L&2I3dhBQMpNP@24pkDk{}$y86jx&m!qvSLrd?-BP(!Z#v7sV!P$kcmf_y8~K-E ztJG-o$FE~09xeu|PI_}YLKkxr_fp<1 z!DV2Oi*`j)FqLR~r=stDEumy9j8f2x8=j_BEU&?2=B!~3lXkpt3N_O0S zzG0kYwbBNu58^6r9MWj!!8~OYs`CM}JUQJ=;xys?@!pCSu$PIc6?F@r2iWAjo$?_? z)*P?7sy+o1vn;XQ;6pNV=&!6$7j z#Rj7lyK6nFcv|t;?;ocjXnK`t_A}0E?s$mr1v&JmMs?&2J@|&7)FmdT61Xp*let48 zShb+>N~M|Hbgs&J*L;uWO4&I*D7ZCwk|<@vW2BgG>YA)l+|W&^a<7ZIO@cxpJIH*Y zSKi13`aoY3E<<+=ZWsAc7Iq4$WQ}J4!`s0gm*gyMM`ECy?hT~8ONGl2^1ba3?D@@A zaA5wDwIAJ<&06o|I+(vMn5KeymB1?3VtbIaMXmD^3N8)ky!<8U!9~>fcV%gX_snOt zX3`9ravr<+;atWILlxfjOag-2lo|Kji`FCfVt76Lt-c2cpHasg zWMOS!g(n^=9iCs*o+nUUa(UhT@iS7k11OlJLp}XAu>|k8Fnwsg{e-6sGCVD%EfiE z8P`ZCikDCS0yeDzk&|KnN2ol3GJ<*+5y?(c181QVQIs00RQs&-r-u|eBS)OYhX=06 z^rlr$%{({z(w9W^QH<|M0cS{OQqL13<(xSj5V7FDHkV{n7iv@z3vCW(7tS>t8Z*X} zK8+YmfSAy|FuZ>uMgkndE43uqGZU3XVG5U03717T8I)LSNG?=RvMR4?3eK!6<9V?8 z092x(INaZ~w6M-IFe*KKSF|Bv;m|ZzjU4mvbzBT+++?~thjoK3uXI?YNDk2kTDfm) z)7|79lbp-2*dZ3!J`WObef4_M<|aLk(l4?C41h`rv_p0rMVB=0E7Y=%&!c3#+fJ~Q zao8?y@d0_U#q(A5zUUlVqoPSN0#?p)IS-QiDP)Rk5H8iPV$^PAjuUp5&6Pj6q}edx z&fb`4vJ}By%j#L+^r?y-q=Bt&uj6QJ4|6$On%n+pnYh>y9XjwdmG07M%kzIT`2lmH z;$%1!ZpS>Vm$20ut+4r$EH7MKq*2RFZ#H!$@GgPMy>?vtgXcAjeZ|^=jjV%AQ!U{F zwwp+8g;4!nVbGhEQb!`T-I8SJ4k&ZbGjNEp+nBfNTh-bsW<(1bgVeug4u|R9iyO=1 zhPrWfwKY@0I+C7G8tiK9oz3}im?&Os8+}O+ z+5r~pA4W?~r%J@EVAyuFD6>!adUI!6hE85{=Gn7Na!V`SlWhcpx_$g%H-Y>27ruMe+&XNneQv55ORuM{~j-BJcEhnaC0NH68Na#XjO9 zH5jmHyQ>>0hh-~8o3lR0&7M22Rm)w0OxA7GCP&GsD$hO9y{L%JVj84W>ey_81hZe_ z9c5`XdiwA7q&_NO)Y-hnkTc(>mLE}a{^f0ft%bdojM?KYV%4 zx%W9l>(NODKf%@>BS13~rltWK>IVC*hsW|Z8r39wth*~NuXVS(L0S2EHEE_uBvHrp zF4;`Q0j!PwoOvH|BNc<%PMp}7+`EfE6i$s!^Y%8o#Rrf`!8D)PCW)--rfYU1Erdx= zcyli8y7Zw#Dz@8DqN_TmgAKX|0n76|SfXX*S}u0Wf&DIhzTI{zN6I3bLcvDIv+FWf zjj=pZ6mkkWQxD%c9*7$B3?cN>6hWzZp3c83(5QiLn3asXEwWErBx}BimSpLFb%t9G z6pEpwe7{%PRoa+q;MAC*ar~&A{B6BnS}}#q+C)3;cU|zvLq#+dLF=VUO>kh5!2t8_ z?({M#-y6R*B1S2cn`|Gq6WWrIT5;2)lN^CNGWXFJP$)?v7KgF*3xA;f_R#&c4zZC? z3+cC=DXoLi~I@_?7OHl<-kq$GJN)hDfpu=WbMgtOAa)DI) z9lou_*Rpf@j)x)bS|#MdAtErdwE7Jebn#Q-#rPoss4lyAs#1m$=pU|rQv(z9y?`0T zxYD~L$1;u6kxtLLy(;n$eQ%q5scIged<*fC6siyI^%iq&m zlL3YOxUh;29~1&^IpVe5m>~gq9A^0slG*AC9?XUpTEzf=$du9#e{Aq3ZHe5i(&G}K z5GgUKo(LA0JFvL-TWt9Tw~UHgC<)dv9P+#(K7>hLMi3v4)bnIAc;Gosu z=ik5F>1(d@rN3W#vZDGe{o{cfTVt`vxh0(m62gIo#lS<cw_XY0$A#?=X8I%T(ryz($u+LjZFQjpjjMumawW|F%LPUq2wuo4617`{f z^~FG!liT%&k$w)YxSWhXBPdvb`X8cG``VXg3l@VaeRXc}# zcK9pKKOJ{F{|Z5 z{P>ewX!8$wFPtD1G;Nq0KREU)?7o z&FO_dY2@T9L|WET8UNx1fWOh~ z)QV4jgk5BJ3*U1)v-opx?cn;GIm9{mUl(Cxkzj>8(9eU{&g|Jv?m1_<5f$##e~nj% zi65$uX8&Q95#a-39R1pM@=okeegu*^53F$hkt_3A1c5T$fG#6AwfRrVkw5qDJRvyu z&POBHYMog=qzHB-HVO>Dd+_)1&NEJmmtlwD>5sb$q zymV&y=itO?!0tB)1*82-ju*mqzocT2@a*n~fZbPBq)Wy5$9Q#Ru=~lh=W$Nb4up5`SvHPky`xTYj&{wKLdz0TBTflb4V|%fF~+l^0B3B*$+EcLv6>4`5~1*+GLy zXIAzbgep()Qjp0M)!F%9#DSgV^jF9K$8e$*z&vswTIkFU6M=*#4==^}@~+>Loq2kJ z3XXc4&U|C&rzAZZgop=>fBdAex%pq(ObQkPgTyy=f+M~I^GOt7rc<6C;~(JPMg#nb zs2P?2_{;)UZh)`2vZR#m{9}9-2(rsC(IU-5IWr#eH40!R0{-k5zZsxR7z|f&q5BV~ zw8t~>Rr{5ujkEjmi%-#xVd9~bY4Y6}?3Y0S>+NmrnQZ=RJTll|jq~A>|AFCAfY_^- z+-d#^K2QyEvXt3p82239|E31Pa6)AknzN3nu@o5T7G@7#{|E9P27;Gne6&=~!uS=$ zYF2SY;0gQ(Xg?7Gfddghp)YVE70+C(IJ3Z_oYU;&*EZ-gk<|m_4p$B@L*&0OHzPczF8e;!r{3n>)mbycK=`4YxSpdvL zxA+AA1AHP-`o-fh5puJ^8Qy)-3X`e9v17bvWa=N-ON9x}yLZbi{sRH1fUwqX|Hl`< zcS;4QrcHFu(cgdh`2=BvDYR!#KcC^M-`H^8fmz8%DHK+(&S0;E8jv2e$2;j?8B4E^_S{(QO&!pZbmSj_*y`RCwL!QK~0M4e?^G!K~jCPTY5_#enC6MzZVnF^D> zpNsyr3N#}aQx3#V*8FSp3c&RIfmG{f_U;^LAO%j-KeSqbGdP&LG z;G3Ar=l#oM^Js*z-rAh8!5R49qXGEIs-&kQ{;_@{2{4|dy0-kx@D~t2*zTK3ko{M@ z?GO3jyaO}KO49CayZ^%5Pq9`46FfaKp7Z}Yt;pek-JcgOL--F81##CI%qWy+A^SZQ zqzVQ;vpW14>=B{)g0D0rLe2g`J&+<8@zDl+2>m7Re(triHO%93lbQ_r9|x&lz|xU` zm!5^OBA=cAYXVS-VzJjxBBqMO8K2ek(c~lAAS-ilS=^Vc6D=PHGmqmHazZrg!=y_$ z@t~=VAf(F|?Z(&!C%IUFl5S@_>1%Y-FKO#5A0B|KK&P5src$LtY2e_AwkZLjA_|6s zMQYs?-@v*}1<;15SDdeSMi+SO!-(Yw*TnS?oL1$%01oDaCmEllI$XHjz485)OO6z1 zluIzjd&&``#lFTScExH;w@e{!Eew&oytKhE-_84Gf?}epM8i4V|P4&Jt`^$bp^I*E+mMK#RN3vBhGt4Lsow9I;J`d3Bn3R?Df327_(M0({QP z-F0`n6Qu<}aSk=nY`q?u8*J7uDXG%Q`Wva*8BghPn_s0xuZn- z(_8hycjG3S!WA>M8&eVXmg@5vrYAOLeKSF9`{Y*mfVlDJ9Xh_lJ+BzYN&%j9P~=`P zRHoVfXv&97i1O&@h-R6Dii(Pd^TIH^`pX(mU*O;hHgq!B6RvrO$T+{6^EI+g6`M+h z@-+rIv(akwEMY@hk|X^*=9o7Glnm2aa+$eWRnxQF9b}I*>{$4Y(Lj`lnXH)=2SzwA z?7)^S8O;2L`cr=ZM4*1Wndq<0biR%Ot7BWFJWXJ;=yjizZ*9V`HL2n~dyeuHR~Vao zOE~+yr4sLK&}zGWeGtXVMaxT9XK3xVmvVD<6|}xg==hLmw;0p1Gcrmc*9`lEh~YWz z%3{an5bJ#QR1iM7c_W0uDXLk-K?S!f?XFtC)9S8tYyNC~;XGQVg9Q+@ zW5o?~B32a?eD#{|X&hwDny9EoPAzcAZ0>;QqaRn%s}7KM6=W@exX)Oj=shl*kzpnC z)iF}dnUxQ@57hUj`wYnA&{N=sFRC+WHQgpOpN~@Q7-HYn?DuWd8g8cU-*zIq?4s) zGmK0ZL%201>R#1y4fmj$58haxX(#zo80QctGSBJuO2j`b-)#0V4y}^UHd}UvWo_`< z_(k=utMTa@J6|*QQZr1Z+=xwXMS#Ci3@;DzCCWI|a}3;9bs_~#0_o;g^Y0y22K%rI zldWS5T*0F}KFDfLmh|9;El@bX{?SELf@g3d_=HsRJ!uT{sel@x1tBC+&}$x#XT;8J z!=hrVs9~B%k24SV7uEe#$-q>iYW+k07GYdxQ}@H9m1&wS0k_H2-1ggE3qx4~q7fXo zg6XwHma*HYEv9C-7&AMk8ryc*+LLb3WiW7%f zM2uLZ4B@!2bFZq?A>Y<=Uy`a72=|SJlvjYR*M{|etiGT_aqlFj(^|X0+IlN@-okn; zzY$!=#E1}*97_kt5#uRHBdk|TB;nQb;iP`iVQOpoX_1=E({`=b?cSS{valA^Nn z(Sk~!C)^eb5LsT0h3H$K&qczD=&;0Hq7m}BoimhS8?m>wL?Q4FKR0Jzp?Uz4oHj(* z^Y#T4wuyR0(!$_2#UO&LXc@x{%~cSlrWtR2j~jmoPO80ksx&Fj?i>*kj5E&@Oa9Vc zm#eNi!(svaW(nVvmX_M^Y%h-ls=d5aKkQ)F#B7BCJj-8XkULUU3j-n_TDebrKJ3tn z3}s)a34|mUbTKA?9@CB&HV9s&KpiI=q;*DeV{_`K8-S0{us-kWbLI0nx*es_Nk1DC*N%iH!pG|>;ZAm*?I8LCbFwi-R?oWZ4kL)K(Ne+v$a zeUSb@Nmn;^xc`+|2EkQy(%aha22BsDN zM^E2iRN7RtmBMCy(m#yVtZ~TBY7(^OHJw;d10AU(tle!o18EG{>LW{EOw@%7OWYQY zO3PB1w~N{yx=Dx!lBN?R8hxwZ=v~a;nD386=t8~8atCNv3%l@9i-`t*MI>yhJMvkY zf%{S06AOI|nFjMRAf4oywL70(ByjV0>-XI@8n*Xgx4M~cGViv~UV{AYa6fH8`Tt0J z>!>KV_kH+C7$7hrjdTgpNJ}>eNH<7JcO#{Q(wzbV(j7yG(k;?0-O^pZJ$lNIp6?&; zyVfk$EFPG7p1tq5?(2@7OJ*lGaH)kpP?58p0Oo#4%v_vOlqmDMtiNt3oO|gwY+NrM zPDQTY_SE)ZmaNpN-?G|u%Tze)?TCjJe95-c6@RXBfsuA()-2WXjqryph<1m^%HD8@ z6j?|0k+c%boGyD+5BMB4WnOHDyHM=P(w-t?MO>WjNvUtuZh@YNQnzFJOE-ny^Rq;t z{)vvu)I+mnLbuQuH|8*hrbH^XQ}fkI*ZXIN@l_13lk4}k2guOzj)?1odK==)uwRTu zPvjqOjwpo?T#x1}d`E2Js=%e(A91y-{6&r+$1wPQ$vThp5Bz-sc{Z2oE5%!}gLta?C6{p{ zBWKXunA2e|!+z^a=g6|EoZ0(N8WrVQBx+eCO zYh|?PUL^J}F;+xwl^RMdWf3(vt$XH}I;@RVmyI`wA%8|C{M?_fflR>tl9-`xCvE(( z=Zxm*RjpM|jhwZ+<;?bHZM2KrROc+TnofleDa?KoGuJbFl|lF{G*f>R9eU9yX@_AI zOqdGnm5&&eZLXgxysNaFjtSX@MsquD$LW*NsPb)_nul6yErRM*OEiUzY0XK_p?T&Mj`BkdyJH%s`sO6W6g^^{ZF%#WQ}Ee3THY)iG~=;bP_mjeZ0*$=wfDVa z)-?~Kd#zLcg2|;(!B~TFnxheEFz@-o8cS9XW!5pQq05nAhIR6Krc|jd1lMFl3v=dW zWqO@rea*~=k>~R#Hl-$mqFL{g5%g*uHmF-AqoNEtBcgz1lks zIx?6%7#7~_*9Fd0&+nBnge#FDe+)C0|3`1od@Aq*990iB(f`I_UdM2lk9a|K))AE8 zMEl&)cJ~XShK8bIzHS`QK}r5uZD!}y+ZO&U5i)}*ybh8z?mG?HT8guvC~DorI|)NQ ztSfT#%|g3j7}hiU%Je*a=Z%e`h6t<1D@ngwH>*)5@mA_RuN&?Xj_FLgSTzn9xP?BX zRRaUY)+UP@x8ee53LRyiCQ=q4gb+R&FEtKawXSwOthqb*$u@la&9)7E$@;wO@i(d^ zny5-G<9*8DDZfWbx9x?$;#g#TriJQHDM8JzG}=NBX)$s?eZIVf5|btpSd-W{QkXo* zln!rEJ0M!#HgX4)XNB$cD={X<(5us?6lsD&0-uGXS2qu0TduAqDo|~4A*>+^+cbB_ z>&>XD9nNDa#&a}_EQL%ySA{FoW}$$>631GzvOLcG6sO^BOLlS0XYU})oMm;u;EO2?9!yY`b zZ+cG`)Vph`;d(PeM#-a#N7SxgYvH8Q0!UxTeJ4H4NXNSz7 zz${kiL9>SG%4iVoVC|Gz|1g3WDp3q?y<63~Em9$;A2iuzS?$j1_PDGfK9%*|7Nedp z2}+sxb!OsxM;rT!z!X!@N-g{%xZt+aVK-2?O7rz>pl5t!&A|Ngi_?uFJ`0Q72s#Z% zx(A%ppCH)_-s>*wX9DfkPD+X-^Adwb;<-`O575vm8?3L~VKtb`I95I`q4)tx0#mR6 zZzU+Z*memCoH+D;Qg_>7lYBV0(R@RN+cufcNjrj4YS|H6d*j9cX24bsBH|;({1-gY z+RE>{THZufxhL}_T)n@jS*Oa<|6~>rvK_J0XMCX6cs#^2U1g*3CCjN^sK_R0_iBrM z_J#M@UFp!(3c20APNb9Fw3;*c z%}J>*mc0=HF|=%Jhj6YX<_0KrHeK0OE|^dibyZg6o`PCUe%X4yVJv_`3fYKx3(IN{ z3&5C0NOzC)FKm?;8VrxUZ=IL_$6HPzKYe%g#^P4hcH^n?a?iW9D=+Z_4nN&|g&YE1 z&y!uZYU^)FPt@{X=WEo8g6i`}>njOv$0&^cGeaIVm;J#T2BRVGN*7W*$)yt>%P+NJ zf0*blnsDq;xbgYOLcMd?Or=*p*avhmyinO*uc|*k>N=}1TUrfomOpqyRiVX-&y5%| zh3GB%LSue-Rx(7?DMI_G(PuGN>sX`yJS*MR|R z1X5G+o%PV5c*J}Dbe5Gx?Tf+Q)A7V0 zQ(aUbyJdO;W-68%Z4udCPB`#9&@MR3lV6T!KI2fCTL~B$bM*n9S$*YMe=FZ4wA=5{ zY%v#$BX8$oe{Zk5NYy_8Z?Hz~{v8yz=VmIfb|dm>*MriEycM^Jp-x*Bz8J>kPH9^ zQz=}OU+-Z*d{{B#p#iW=TK`4=k*7h+&c~3fz+yszS&DnfiuhkwpYs@9U%lKrFUo_V zN&3-YF6C$jodz1SYgM{L_G=0dfyYqxs*BsweKvoIQWpQ!^{-g~1W%qhZcUh2eCaMB z2%rNsq7r=GC3(Dedzr z3cS=-q1T|)fSSjcjTUx-n&R+;36^gs^@Ug7k}DzGH5XGk+udtK{kBMDd0Nl08R8f; zD*DZG6>~DGj*fdaiW=u^Zy7mxkAWfKo$Y1Iab_7;+80t9>F;W(4my14aV1m8&$TV?jn$ZphkQJ=2_p7Nx=VT#~yPx)84cLH+esU_=WDMOBIP)fyY zH08VixxD1IaB8i#GajE%Y}EpdKyS{Dx9C7zQA&{B$79mk4XYF3BRGn3axENk43=9NLNs*3L^E68#Wv&y+0 zHi=jI-Z|jm%Ly64mk@b8?#6=JSG=<7j()MD1uOdifF{kDuW?HIJv9pNCMwFd5x9hFN2Y7vy$T~CQJ^sXrrGwsRDVSVm)9Uw zjU9<5>8x|J+Q?dsN5ev+B5Z@b5MQXy)#;+H7O7FG=qT6TxiOt{J13IrgITPM(X@I; zB6|vex?mobMQ)_OgQoLR_7(k|ZB5aX^jD@TwfiAMypfA0iTEhw zuV$r1uT?{PV*+&{mR<)apO;za%nkWMcTSD-<5>?X&_AvR7AUhObAQC?=7tF#{{=Q9T*>=mqAU1<6VLW3lK+)P8?mo_TaRp3 z^r|8_V61Z=Eh6?m)HDRwTQ>HkC2|KiPgdvX+cZHu^)%rIj-MR-wC~IONKkDiPx$2a zum{&g2mm}69xL?n73)$)=gB;7pf$w9X!>ClQVSvXQPJJa(SAb?c>;EO=Ty*uI29*& zHB~Iv7L3fiA^I`#^T+~%9szuh6c&9{v`#*w%=W=m*P|k-b5w$!qZH2zIo?wPKeglX z9gZ7p8&l2jV%?DVe<{!~-p6V8nYg2}MW4W7qgU2FozNfO zs0xZebwd=qTR{J1=bQWOy==0`Gx;{asW+P5=m>oP7uLp_`%YnnPF-*R%z9n%bn93r zm}I$j!MIMSmQ61T#VF-nB}PD#c;mLOD!MA zMf3%Nw7VGbfMAGz^z!T=3X~{v5qvY%8cr3N2uacLIDJv6z2U7@d&ub2ue3!Y%#*64 zvjM433WF~xG30d~6Q892y6%=G-Hl~=oh*lp`NSh_u{%~rhvl%H#{zrBC>S49+NB(_ zTFs$@8use(jOA}5AIdD)ZOZl=1>bQUX4nj78reC2uiQIBEh}_XW2gFzv`&O}uz9)a zqA)sHUUEr9gtI`vhg{fjKd7YcdpS&tCm`i^J%g)-ex`Q3t6+TNw?b)iZCpT34XsT5 z6AH_bro#CmV=+|p4p%R@*&a>dY9z|Z)!K$Uz$BN5+NH%nf%?Da?|ONq>axX zU!i{f;-#lq|hg$&TWC zWWh_rv#;6_Iy0s5d!h5KQZKH>n|a$Jr>BQQN==Q`7BIDmM{w>B)J{-1&PB*tfVee# zr97_^@9T-0_2Ikci2>iMd-&_HfF)orN(mf2?(F@CEB~9eYaj)QH5KKz8~-HctRH=) zflOwppmEwR7I*8v1zmXF>8~`8)bLk&k|}0AkAo{D7`Xj(jrxM4R-=c|M7gQak(X_@Q~6; z0l&8(GvyfXHK;WTLLFIn(7a$)VeArd^t9FulSWll zG!%uccDld@w4~R#9zA6&fDkPI88WlUrhhsE)O94N15dE{hNO_$7iKuiWr(2pwQ^QAYKtg_d};Ee zy2oCp9ssz;6vy#E7TvQ#;Fw1Z)riNeaC-=_B6jo_y|_YNU{aS3D1$%i0ET11zS> zo`sp z$$qn<2sb4L(nQ{Yc{WSA?)$^o>Qz@7m7^Q>VX-LTm%xhKCv)N0v4R#p(@_@;%b`PhTvXFId}P?@&R`i%b?(15o!X%^Jz16It11KP0alS!g62?jVI+dp+Ntsb_w{ z@8u#6<~l5`F@NYi8)sR`u%mK&IJ{YoK?+FVaGtCv&u^U6O1b42H1fieFS&dTjofTL zdtH?}ZCiqdkNu;vua9s189T$uMe5XLEPpLGnfC@T)44cc3hr;m?)8Fitxi~;T-H(^ z7W)ILBuA|htJJWYo`Bn_gl%of2N78k9G=_bDC-n(ND0aFh^ZD0PLNMmwvb3m5G__hT}A()T$_yz4D-eX3F zSun_8_sa8ZP7IcFb4ilhIhG=4G3qNR7(DsP6k4y4mo>)@%NL{@6H%+Yud|_s9)JG) zOs&zf;OGf$3~0U|J0r|OkDjGSPMTG?rC;b5oG3nB11vO$hjLHIHIj*ZR=AqoN@6d9 zUOk$&sDJeWec_)}K->m~_Ir;@QF$=-+%7=>tHVW$1jq(ym(31(^6zkXkHMm*!*%wk zDVM#tmB4J^4oOMQzJvQH0N|71e|DHKHTzvfL9l$=j{2of3B$AVxtN{i#gmFjklnQnkaCSh2`hR9RYJ=GMxYBwaGP4U-tk{+8-Ai zRwIxKg)COhZReQ|zxedC+Lqlj&Ea~7fDr4<8hXb7hejcD`#7mhC-eDn;F6Ok};BPw0e={Kr1|26kkpmOvFy03Ug@ z>2eS`5ps+*Bhw0fQ{J`HG_YqdK-IfEoaY&9qCg2eNp-B}QM~)nIC|Oaq)!oO<-I%{ za(rIlr=>KR3=%{;-a@3hx_j=?G|+dtN{#w2lq+2J_tr?1u%P!|O!`7C`KL%z__`lEA_H0`@dI8K=~Lw>UmEV;<1F@UJlGy> z&-Yhx&XRd)GISvKhw|mi(GnqzeMmcjUZd)1HLZ4e)25Tk&SOX-y_Rb{?*f@b+E$O{ z%jpI?G3DzT$%e}#GtQUGQR7Qh@EqQ%k$&Gz3_k}*jWmCJ|C`Zz0f+!fmU#71|HJj9 zX*Ugj3r`mf2`kX3bapRYz1z~KQT5$8X$MZAIJOkOhcbQ`z@3FJA%lAbOXs$2b)_Nh zgNbP>35Tn+!jusFA2@in?g^%%<(J|Hy3ec5R)4z3Wu`ey-isG823=7D59V<)30Ph8 zG3e!6HjfWhT6^IiAT{6wC~-ISB{zOPQnRh1E6W`*9txN%wK!KU+pdk8{Ywcv? zNk8;x?W_`!WpA%#2IH$|l|~b-oVO(QYiTkToy)+RjAfL#%&rqL+P|2c0Q<0dAfV#1 z+nT!^_Uaazcw<9-uj@ANqF{B<`N*|3K|7W@N#Q71Iusmy?UR+%Cuq6MDs%YYguF(6>6|fc3oF#CX-Cu9jfGj$w z&p9fq;GCHRJpBi^N;GqQO{JmzTI4;V9i<5T42~vC9EvCwBR@pTOKZ ziig&(VK8|b5;|FrLQ#`gWzd0)S9J`p|TzLS|DVPsHgMt z62)Zno7O&(?7Jt_TDA7fpa3BxMGACcM}NtXAuC(%WAD^N^dh(}d4rU|ZZ^aZf^7Cs zI~5#uUg5pvZevGozmr5ZD-xP1)4D4jS4G#uO(VtW1~>EorP{{~YS2$?VjZt-N-d|= zH4-ZXiAQn;zK^tNQUC&&{1`?C;aQB*IG&DfgJVUCI5DEo3=+3$3TNV}} z(SIV9O4drNLCHeRdOfI;+nS+EQ6$d^yZb6+yhOj2vn|nweQvl4oUH*044x*lP1m(9 zrM#q^HQ{s7UXC}0HMFB=9wrS7ewwVb2#z4mw;{E9mD*-gIMW@SM_hk?Gfs;}bFA2- z<%Z6J*XkKP(=yujw1z);${0EgnX{t}EA92e4fYJ_JR_rNCEcCkaQRZVMV0#aU74Kj_U3BXtQc2IcY@S;5L&!}1-clK%B;z|88?-_cYMa& zeI@e}#8B%wQf8VL`7YU1{da)53jA)aykIrN^-X%eDba_(gG>5<#Q(c+A&5WZ0cXY6 zdBsWk5*l+(UCm8&Zs;PpfYKuT0wc|-9{=Ud+N7J~7O9Xsz|^|;z& zf{Kw`H+*LM41=DikB$=Up#+hH)3sxO${Vg1If(qW9ydPGU!EnNJe9P)Gbi+=@;Lo- z@w)hYiYgZUWzUhjm{F{%)d&YYf?h#wb8f%Gg5TlOqE%XFJV{taOzi!){+Ob2Ez4J~ zp^hEp4prI0qfT!E6*yU4)}KGXftiHra)L@Ko(UU`cSSEJM{Jis^ZF3aEH1VNa>8(&Ke~T8u%XI95kc5osQnDNTML zkUhSvYJHa^uiw0^4aK6J!*6Q%f!XKCGT zj8$fHM&Ow<`iT|zl1N}_>cm@z+f;7HTTFl$MFl zIi9Xq==#SZYTh0y8Wmq>H(edt-x}5Fu5&4SHxcaj6qm)M78NxNOzy*xW;Q;e{WI&l z!*+MD%7i6HA*;})>cLo}&e`#X{zw60drFw{OB)`*PfAMhYFO5z;C^P{2e}C|vohtG zjWLN_CC~8*avhIqc`OFi)UAV@qceMtoP9-?IT5w<_Uk8S)s`!`!cS0I&fcWd9ktyvy1LQ)t6qlvjbYQiSPTo-P@@g)Yl5uZ??g zzD}bIrbW>Vm=0IHw`2~cOj$;jq>I%&mLp*})YPeSR-24=&e-B;Z&G#cH`Kh{ax)m0 z3K1ETdHZAJJM!L-;SZhnT-Y6Jrb=A`pT<8H02Q8ddqE+(#~PJZQr$+uS|JX@IKa3T z_AQMqzj<{S2b7mmB1sX;_4b{o2ZH4_5E3 zO_$ZGIVo9ROl^(Virqw8T`|pW!L)0C7d+g0<#v#Lcaw-1#MzEDM=^Ah;=u{57F!?^ zFWiEd%`z#Y;D8?WDA(L5i%lvPqmKT@ix+?&1@XBQ$R@z5BMSPJh@s+hROE~=f;hh& zz2pAU;2>3PcN{Wq1jNu;X0~GLEcCJBZ0e0m!|j>rHpcyF8?Cd0brMkW#CHDwsgPA0 z>FM2g(rVikN3Qj*=seq)7&`Yw#x@-Asov{9+Oh>BOy&5nugrb-DIxm|y( zjUbnw0;hLw?nO)^k9b3%#7pbmy?X+#3^b%ZX%|Rm)B!%kq^XKG?g`htMi+EFU72P~ zoUT8nWGDF;tzg4(kGhcR~PupDLt^$bDe=(R3N!lNhFZ_Nn#^B8hFC3?(|>(9D22SqDU6!A)xCSYHx z*;u!AJQAQ?j^{ARjVgNfk!{Iowjc&`z3rC!@=$?_PK~2m6J;ut%MHCn@OGo~T9&ek zO|q3)7E2z-8L#$1xcgpbyI(O;QE={X?!=4}q8ub}0grc{!o)=cS+UnAC1-!RCu-K? zQfWTAs#c&`)V8wbG~J{uGZT%5i3vNyb5=Vdo4K1Su$H!D2MprkKD8XC8P7v#KGAHb z*{b+*6{BIkO+?WBju?wiAB$c)O)Ka!yxBi^bF8w;BS8~z;6jHm64)(XJgi2-EOxNx|N4c<_EeyEz9g( zI*pw|HmV4%^x&Fjbir0ZJ7i;sN6?0i?w#5?g)yb_J);P0_K5ObcQYa3=<=X zYVhqoqn}YJ7_XEK6E7S#ltah;Spp-6&+Dybul%0%Yu&9=?x+6tzEx1`wYxJ959gu{ zr^kF;?R!fLmqWFAM`nMfN2sVyol*jutfQJ{1_$qXGAuhh=orzcc_>MzFt6wDKzkETQL{x$Vj?OHau<*>#K3_5{SU5>> zJHoBxld71Fl;yRUib|UtpRT>%LfU4QE-LksBE%6Zzw^|tb?p_DN?`L5*=(2}3f`FC z*D}DxYFV(c{n*F1H^JTowz(|JMf`Cm?3WkD12~KptR%bE=zm`}h-_-+aqS{-x?)dE z7nVsr5fg1Vib&$Hnc5oLL~4e#oMzisdPm=btEnzCmP*VkWGmYgsI&82antf6)>yqb z#!fgQkA7L?R(akFt(6SMt1t*cH(j?~?oo3ox00QHd^~>7Lid@tb#_lOd%uXd(M?56 zgB3Tu)#rH=|d7L1ntAb7`D) zSGct{P#ZhCvLI-l6y)rx3T-KN9dOxIOVc-o9{as=aMwoCV+zfB`gMf$YdyEkC+dV6Y5OGvWiO-3pOK=ZoFIeweajVs+BPneDKNAT~EDr%stFdc>x zCbx6V*fH?&30`s8e@+kmwmO_wWM8+%Hhoa#A9k@-+14@GkIL@bk&IB%`srn9--f>2Z*4@42dLvG*{*Az6s$`A*XfLQkvDyi<|icO8Qlw$lxEU1X#;(20L} zJsd-L7(h3+5}HT#uWKDX{Lao2)!Pkywsz-^YH#&WuP-ZUPfy6ki*;XS%2CO_qX-W- ze429g^4yuC&wQx3j zuPc(mUbF7DK*z$HcccxX3P}k=++XQpXuQ}QpvntjZ?D_hWm?BUzM*An8`09%)}3;G z+}+Q2NzQk9pbvdrJi>}_3sI`_7GZ=V!IQ?Tow41e6o$xajjMQv7cqxB=yQ8s=vG=z z(}zY_+AGAjPh;q{X~B7Fc8<7PAhPK!BTFtbl(Kh9K10KK$UZ&Q7sO-+$hqajq8@1B z;-bUg2PyWojJ`(tjnRV0JNMhxGOODcHdEZ@3{6Jr;!V0OTjME46L)7CW_P(fs>B#h z`+KiD56YICy!n<6RdmAlnO5rKX9{F3J!7igwdfwiydsPl^-<*`(!o7BfmAEkUrk46 zXAiFFjZJeSHU!4F>(v-i2GHG?Q=Y+NpAqS}`V6l{LBjMgNVj0hP1bmAVtU_r`+!=( z;LvFnPpEXCu+ysNkjbfT%dvVTSw=orQNp5hYnR2WcYWJn?$)*o#m?xf$?|GRLi{H! z7HkfHVJj1xA!JeS6G^$B7dIP8jHTZdL;Wa}kLf8GTJN7J#p$sb_te$NXY*XYh@4dZuMMY+c5d(T$M;hutji?@lGv`hF@1;8Ju534UXV!uUUu@e7!L zAqPRK`KU4jpXfcYy{}JPmz!(ZM?`Q=r*T zKF^VWB1jrLgiua!D#b%Rg>LSdcG154JQ<5=dZu)S=D6+6g$1h6yt~2NTBVUSiRs$) zH|1dyQnULM^yGbnMMOHU>d%8psB@^l`C;6~*Y*rfd<;xXUT#qS9Ng|A7>>T!&v)ES zEqw3MtjE3sl;zom>R+Zx6K1NW?AoJ$n6CeETNpC1%YFM@ z>HllVyak1t;+5>?&bK>Pz-Ve^MBFqgnRuga4o;^&+Ede|xA}ghDL(<6QgR@3F#)UJ zVbBjS^zWas(Eu35VzN__{mUr+FrFRs-p0uZ#^fy2{Sb4!;gY?q7wKOb^v9jWDDZ)t6zw+u zezbqyhp_|G--}-uBniWUKfS`F5GbF<(PBPQVNS0<>gq1lZ)Ypp%K1_>`-jQs`&Z-d zqN1`}%F!g`ZPQHjeSdl!TS+gUv80Fvj^8*Nu!0@7R75HMy)@t7cTm9bxE)1n zST*(r#l*x=ZqvmvT%WH$s+=ED2V>_#XX-zA)SoTmC@%NKQ|J_E>XwxV{B+x#6ktrI}m|JDWBFV7=MNIZLjIu#YKa7KF`7~ z%39_}Dn~!50zdTjGe{zxrak2^s|S-#?MX*0DGWOf}UrX_Q*&gPC%XGQM$?ejxJ4f{4 zPf-6L>#a(SE%D2f{7{T%lCuWFxcg{v4MaH0pEpN_X@2g)`53Jn7KVXaU z2h|%Xa+oBgi4wDNO3X7Pg_oBP9SEtUjr~FK76pl#-b4iDW@zm1RsnWzIxrKblZ-|G z_IH1gdK-oWZl|~ta=~cc<&_dzl(A}FyzYEo zKb?Y_N#>1i^9Lb+RM0@G&S^9Mn&d6YZKO!AuZ&Ae3J4uLkYBCwk6qt;1T!Za@pCnQ zO2N z_e$`sl&NVhU70yA7didUX7!)%NDYH6Vpz&^Lc*U6(4VTr?+t@f-rRYh&j#~v?Y-s&Yju5T*Mte% z75;Fl=h!Wo;!`dZ0ZQ{DM!gB0(JI>1A3uKlk|A;bm4dx}Ijsit=}3`Ik^2{fv@yLo z!=V|S2*pfQtlv9PpaE46fYjGNh~m8U`{i<$_3{BR??B{tJtRiE&3`vABJ)Z}LRvcf zB+zFJi%wmmQGGtevp~D7mwh= z|91-geY^JpV7LV#M)&ucLhKLjw3mbf^@rK+;DHNk$?Vo836NIy85jt?7h#)$%**eE(Eu|MTvM0n>28&R*z~xQ3`Ox(OA(mY{yt_;(xF7|GxR^ z4Q__8>GOZ=+%-CEV<>i#e%SFQ9`7Lo3cx7P2Z;EM`Jv$B;p&l*k$nIK>hl1B3zy|Y zzW}r2wssfyM-22sFaGt@ zHWJ)twM1me4`rf{_fi6fX{xaZLGT|upluDrjs+tS@O@Mb-VCRWp@60}urDayw6BYA zcz;C4!Sm?cc>c$K3P(o}`0)4vU83L1hNcHhk35!>!hZ6TU{=Rz)m1Tpzw4peH0GgmTXu%zgbJ^NvfDAaeC-2@?|& zv&FeCnN#8V*2ddk8@!2nM^ssX{1aMMaGW z*pKLV8yY5p-Tr$wSiy}f<4NB9Ef^v8)+7N2yYRUIHk&bAJ7Z@46}nRU=o4YufYAgdDVsh(7}++ObOd*3`{u5gM^5P zht!>8==5)O3vQvY=F{WCsoqw7c?lQwjJ0FTJ(_=nmd41uhnh^eM6Y$x58smUbrGaF z4dK=;sdrosz(0TKfZ$_b7~D1lW=dQ+V4&x-)ei-c|DlG!A~gY&uZnzE`9JiO2)4LHNeJ}c4H*)i_v7eb)~iprLgl%m zs2@#%FD1@11A}HV%*RXR>#ESxXl^``w6!g5X>FANjt$1FtPN%bLrK)@aS+t>12Gv2 znCrE}T27Gt4$n4v>$-s+Stt z#g8w4S50rQtOxJo3EJ;<8f?EN99UO|8-)JlB4wW1 zg^u4uTXNIsO6Whph4mWDwcGmz{jC5$zo~{v)>KBPDokOX8Qjj0kath&n#Z#sWD?E+ zCNvV{2@`!(>`C1oy)Nx|fJ{P*Pyi`f=Q@IvklJk)YSk`++RbRstJ8K!yPi8JlFe|~ z81dt4DG%`9jVrR8uJK#NvIsC3NQsIf2Zmqtu2~E~N1k`sN?Xo||1Wq3^Q{bbi2lPQ z^B2N`q|tC@;_pj;4=zU%BQaaJb+YfF2X- z0!fgxj7$uO-QNTUv$j+qW6?5#`#(96e3sp%j1Mu$e{&5p9>0AxeE?E&B)@7y`};pw zguy$z6lyY@ELbQ(~cLww6B0f385hURXC)3!BLnha+8HDNj2)|Z2cKW&L|UsAZu zLYsu5VwA2w4jV^)l-vs6e;P__YMA6;xIXx8B*YI_4M5@8lyO(Ucga}da7TLJYC3W6PL8UvGjeFb^>y90DGe=xVUwX}46`5?7ayWPOj_a?L`kTqTeSlQOFz9Hn59K`4)qS7I$i~4j*0K4v0pJ)sTpf}a->;aT zlmpbng7GM_WK-DAxEwcM1W8Ltu}lCs+SUMMPc%j~z(DSEs1MGaJ9qZli99>TO7u~@ z^8pA!@QuOXTMDGelz5v#GiPop84*5*wZYbg&-CT4iB-*_(xsW=m$E6+5c%QU!xQ{@ zbsdUw-!Uv1zeJKaX(AI^zyB0K3@+gCuxb>#;C~Al9)1PeO!t*4tQ(fI@xB0|I;+|n zy?!Z4ThgJN;4abNMK*qy2!wa`X!f6ppIgPr#f4`++XfWW1CW{pN zD{Ta`XBptKMusdOS~3h6?)mX2bYUTXG|~Iy677NsHdQ36;}3j7!ot$lk?@WVaiI&O zdyj+*Ae~?ay*vnU%>%UNkncSztxe{3k_o#5s&T9yGk2Vwo#F8USlc!RkR7g%xx$Oz7jESY4WR?`RY|uu^Lo1W(-wpHyC0HO*uKE_BQ7)CW&Az z?KqoQ>DVz2{tw)nnhGojG&PC3@JHbFFT@X%-_&f_(a<3_k^&?Ughg}b;;xF1?Wd(s8ED` z;85P5`@SnbRPo9J4j6!_iHh}v?vwezC3I{eGw6Qlmv76cfGahz14?!!OW&f+IGO*q znO)?ALAge|Pnf@@sW2>Hg1@!jpZ;C4pHMwF2vqVJ%Q)HhQ<$Y;A$OSI&`d?Z64# zEC%5-oDEG&2BZzB_zWbWFP=p#gn--Vq|#z?1Tr94mCMqtH=8DctbeynFbZjohPfr} z<;jk2JnzdI08Vh#UVzh!9Pgl@u$F!NbPIpEO*Ozsj?o{h>QknEv)^XvSUNcc1#SsR z6p!stG48<7t_K_2MC4Z=O$p|ycrn_z?v?zD)r)=CN48J&0w<-hIO9ucr%`?H*LZ~y+)zu&xN1D6AjPFDtC zP_fC741@=1!X?<`oOeiv?MbL9WwD`tlFfc7cWr=j3YsGcJURjn2OSfWBE^004J>Ni zg2j^aDgj0$dcMR^;A4EAmZ_J&%`e#T$2pv;wkv%xK+5THQ58;b6vF+Du}$BS&Tp8Q zk%~>Ld=-<8E<_}SFp%r47!~6r6>VO_Ae6^>C$&VEhl4VU1FNCI!y6q(^XVHslAzwW z$=e;C;O0w3nq=!kiJ_yQnnOmVnxP3n&_~GD;jnXE)Gs4yQ8T#nxr=Pm=~Z7aZg5KP zx)TN#`e$I%hRLh`&O-0NvLQ3i(*6VLX~NRl+Pou4Z@z0Hzv7W{G!yH+aXp2IyG*%$ zaB?O9JOn*0fjzL!NR0b7{i};D1a3XhV`C2`W%2N)Ero6yz{4 z=kdI%EAi)N1wo4A<`@Z>cE(=X2i^??6p0j!ZKQ3ehGG?|TXdY5(ya;|sh*@i@0}oW zGgptQwOf$^qazycme3+_?63yz?H?Yhd>WgVB;JtK&A;P^iQYlzJyUs1G`P%58CTztO#syJPdZ(@oxTLeLLXcOe8ZBB9gM1 z+h}z2$sTE>qBo6XYfpDzAv4i37b)i|cB-~Eyga`vXXfr+7k&?%#9r!$gUJhH@d5&F zHxub8oIHQEiZQOnpjkx>fNwb9vvAW?L8zqX`&=ndjp@$j!{Yaah=mhG^U6U3ddhUJ zz7Me+Ff0anM^sgjnIECm@ccCkVAygJD*c`QltZ>+KvizXAihDPPm566gJbq=j*t8+ z>y0d8$1OB7gD)*k^@OtR7h1LS3spHPEIJ&dLKf>Y}G zs!VrKQDcC|x#Lb8GVG&zTmBxhnpQ=X60%B#D&~DNl1J{k{tUvoJHwai<+8mF=fzBPUX+p~2@Yrma8gD#)FwBRuu|h=to$d$3geaEARx{6?FfSMv ztrd@9FC~ZBA`{1`Mx!j#NU~@@gu8bAqDYf3I#xeqO4A-(^r*9se`7N}L1ZvryU@$_ z=qE@B^S^O0{~Om&ANpGtmr4u>ekgG=YhgT0?~pLO14&gWkff!hV-_H@VLk!vJVF8$N!X_g_M zmsV2Px<10+vL7^TyzYOw?)RFG{^-IWj?>LMidOBDs=W3cJ%;uN)t%+?tzEWE}C~Pkx0@zDY1Fe1WjP1M?4GXGu~;im4?pa+yEkk&cm5+?4K*${j_Jdx($$&@1}A zdnCbltjwkLpk05YBMuK2*JR8gQ<;ulWRix~*9(JGsiiq{!OWseQ@F zsyFj(b*;l1IjFdZb;GS@(=iygWpo zkOuKHEy=}-|CHHU%t1a>@fpKHYZOkQyhOkr>0>TdrYUz@QwZGqT%~*@otBR`i4II` z%=Glq20$b@2@ek+%)u3tEEOv7;AEZklWb9Q4KD#gNqTy^(((j#kt-3{pebK-=^cL+9PWt0VrNwvjHiior z$YyaY^fHJ?gu{(70k*hH2mdEc%Ksh|KIA)PVUnsE_*lIBFv0!0_!}wzgT$2AC0kZC z5_qhZ#L?}AjXuql zluSfahBJkrQvCDiKNLH8r3E3rf*g2QSeb;^n(|$-3=Eh`Sn%Z)Cm5u1x5WmaNO~1v z7>>6EDkm7NVxb3#VPVgK zEUUKo5il{z6e+bA7v6K|(LWT>tf$Xn81V!LAPM0F@+^_IFc0bf!49h(`_{t?S+_8K% znAiDe<9rw->eCKrZ{Af(Vc!aKfD9~poJZFzftNL0b%o}0;cJzK`7KFYTIv?+qO7f+ zyJJZjDR*3d#%Ba0c!(_u$1k0Ax!qnQ7m|Qj4*N--xrzPhY#FE&y$L_^ih^9P(?^Dm>#U zS1K*cV^?@96aM-0)tFTWWtxz?v4QEAFM6R?q!+odeH3y}Xxu zx9eYYd(lp%b5T+!-JOAxKZQ7r@Kgd3S(4f z=Nl>M;L(J+A)=`9y)Aybw2spJeUzp%h$o4|#hn<-+~{Jrm<4ZfD2~ z?}H=)(2+Vt`;8@B`%(ox*2Fzoxj6bbxALZ$ZD{Mu&m!dviyv#!>uo-VQ|qk5DFH8I zo`FW(T2d;I3eo6{lu)c|g1fr7+fTaokU0Sa-)#n-(Zv?NI$ajj%lx17+r!%)9dB7^ zo%o+)fi9P~tLP=uGbzM0--)0p!^0VD(}|xyjRDRC$hZE^JRxZj84*E*t4?=4Ev3dm z-SA148;obX5-!e>DNEHyA&qC#AULySMiq4s5l7Gs7L)LEvyYykFsF;2aqSzB{*z%! zg6t;eensuyq8&%*D*3YvHJNU;LMS0-I4~LTTRQ9(j%2Rs_YlhOwexv&e1XO(v#;r! zrY2uc=K$BCI;=?sV`)8B3rgRCcqackJyMkzIzMovJaJg0NfOy_8@_yA{dn>n&Dhb#5j=lMn)G8?ht}M0y$}U4Wh;(rAUM!Nah@siGxW~n(8B|!&-QPm4FgX| z0R>0b+&YZv@pD#H@snsi2z(l%{{=Q1RXSWSCCOb@ibJpxvbLsl_?dUnh zCR`{rI+Zn|J!uxyJ55ro)X+X|IiAK+4!#**n^?S6RxX?b)ZP5Z>ogzrTpkMCZy69& z?UD@Ih>@BfTC`i~ayYY!I`|M2vX0(%d?f|kx+?cY=aD6i5Z{>Noki8FYp|56)t)M# zs%DI)u_Kv~#>F6V=_aAxyi=-Le|+I0j&aFAkc>`7?LsQEp_H(DhmnKB^4PlAhb}L! z>y{I(6Kez^evM7a9gat%UqGqhKB6x)%Qfq?fuPDyXS>rZiRP2j6tJ~3IbegmwllDh zv~^*+GHdvv8~!0~v_oc4ats6CY4$9iCDZW?2Z0P7_I^zCq23VynPcpLCBQOGG9)M75nKhjMFVj0pNqb)|4Dim zQKeO>7AU{xj)zK#-JCOX-1TamP*sQODe?U;0N?zUFrCAa^SourfIE=( z?2tbsFAxCANU3AYR*F|Yh>NGrB9p6oes2YsT`xbTJ;CmN@T8s?&)2@%jNP93X*p&> z?)h#xZ_ie9^}v^S6Vd?YL2&u(hk$wjbe)pbtKK}QHhT__3yAKaKC+vDk`;YZts-7i ztM=%fJ<6w{>^Gh|GOn5p7lL}!0~fXh5(ZJgZCoZAvNbYdP{6}w%%!jH5Ds$sbeBRr%doqX;9&@FQ@yklf-XC(wLLtL6R2Xm1QvMiblA zo!Bvsj7DbS-)}NVgB#TImp=omsONU}o=r?}AC^7Fy751cZ{5?cGx$VHrV|(@%B@ss ziv$gy{M1*;7L*LQbj})W^vA?bo=wO@V5vrak{A*RyKgE#ykMJbCjJ}t%j|nUT>gpBcLcLU# zxyQZYC(};Nhh-o+>q`}9z++H2K;OQ8DzZ1|CftUTO>tUqkX;G5NZLTQ^;qQ@Sv+hs z9G7b1nsKh9WqkICH<7r8E^Hs&1w1CYLu>8RJ>x#?{;jI#xQYUM*3=VpYoZPS?!Q8# zIQurq#;^ht=Z`TjKzkFB?~9v3Pf(BHeD=TNMQ<*r2gZOI>mR1z_1LWfuWJ-(Urp2v z1ePm<9BP*r&dz1k?ZK;vwy|e;#5-0w?4WG>WM{Xf8F-~0S3Bn9Y)mNJFl(<_e(C8+ z8H6wUtJBPkz#Dz3w$bbig3H}DUsM$=UV3IZlnoqUQCFkx_CUL?+mhj<`@7&*FqqHQ zFvadEFq+>f}_KEZU5hqwJNn37aCVm|Bfam*H!hqicVcu@+b_Pf% zCB!S7ho|T!+EcNwJWl{7NfTi3EqWEt4oHFAln1zvG`=F-CaSs9ZsK1Pzl_(AWS6lc z=j>%8?z98Tcs}jCT*YIEh)MO;414eRkcaKjGsJiW8&8G!;Tx_-|8$B3|2oHgYHEW& zO=s$6OD@EoeZBheM-!+5r8yp*hfSHlgl#{d`h}fti+4!}nqvSLM_^7i8Bo*o<40w` z-V>FJn^^2L9XRmHS;D}cQq!LOs!89s4)$7rylpU$tjDmWVUQ6mR{NL4)sEBDeeo;I zY05KxyP-d*Y=PaS_X`#Db)v;!9LTMn7QA`b+jSul~}p< z*!6y=5xoXu`!*kP9(+($(K9S(8LS;K(j|GbWJ$2O$$Ky@p?-+?AP&~5Q;~#529y3P zQDA$uls}LXvxkc>v1^UCNP&ff@Xji{3b40_Wv4K@sdlGvUQU41) zc9|LtMrGRf|63R_NDTB+t{-DUZl~DX-$WmXKspwZN-Cc;R#lU`G!4#WgVJ~bT~ZnI zfvWq7^**4?rGT(eti3pp4J@xFF{{a=@r_VWmk-LJF7AWQKQf}i@kgl9evqA5JEp#h zh~a*UC&{t}(D{TA#01?@jEvhGBj) zYp+PBbRnzkgMb0yKLIzgVjve(kDJ)PWyWWDy%@Pzkb!%Bv6~ES2 zxpwXLax)!ftBjY7Bdm#p1nPDgE_qu)iFvuMhe~My%)i5Bd-oKYfJ68^o?@!#drsh< zmXQnkb{9*s+CEDpyB1tuc+9g?{FIAwIJKO1UnSc#TV61*?Kizn84up(-3?yUNTbD$ zL9JH}yUt+(Epm*?bCDy6bHQVT+L#G zwuTo0{&NCEUTe##S1SV`jew6}rp+6#`KYqhUdM_1ui$aXduR)`#8Su$u4h7|y&D^L zn4+8d#Or%F^I(zH^kN7_)-YgBNh}_%Za2)PxrXjit6)+QYU$bWc5<=D_}7)vF_ zx(%;v=#w+s{M^U6YDdl#MaP@)`@n3#eD_RZaBtc3ZN+>;poYs>kVi6Ir)`7nUG57A zYY0bxKy6AS$x?#*wto6IRLgc{YL|3&bqnQ5PT)LqPT*;2OV@82_h|P5*{GDsN@<)_ zgwyHEh(4>c`t&-;s?}xXSr_19INNgN-jIs^RxBN^SLiy6f!L*q9vF0CW_19Q*JG26 zf7z!)iJ-6S5hrr>TP&2&Mk|nrjv3&4*nnuq1;kK-l2lUzeob~-jT`FxS(3Ww)&o}I46ae+sOjFEca?Z0FRmx z;awqKQQN&{A0gmo@9zqO!FtHC2V#Y>h|$PP z4CZ|;VftLuOD%FAR1S?oMQc}M%0~R}0D5Xv)mUVe!K>f^vd7FjSe$@S^XzbY%NY*z zX4QFHUfYn#a!;cSWdU#%8eF8B@in8LIv7^UYni_9{B*WTkBNjay6$4L|6fn#?4NXA z-=fSop{mvqv5%j5QJnEV;-hz7J37p%!Dg!Q%X}H#hQiIkE`+pGBV&g%;W?;~#jQ zPpN)Sud}kR`!JWNN|*}Qg2Cpr%DCcDqZt-;>z4FP!H7052>p>0z#fcBi+CR<;rp+eRya`Zy;0B?js6? z8;`%HeEx*pmAuXINH*Fgk!jdd!FroL!D4tQ0%hbx>;ibXJGjvtCvV1uEz^Gge(HT% zns?j2)^9$9iyg*Hw6F|LFJcnH^_MVDdcQu{0v zV+zc&EZE3S?Ay4}7k)?93SC1w6jx>{ApHEEv$dh*_(?=^=Qp>RMK*eIHY0eOv_cma z-*U43Jv8mXCIs>b624B+S6^5Cqh5*f~E~A%2O+~u~^|{|@Z5Emm_-q!`7*ugT z19sZMScBF!O8Wy=ASi^-5&r&a_ucrp-mkOv&hp;-BKp^sF#LlST|WvX%KXN=;N9@^M5a!*z!FL+ zG14puh)qIu&E~%(ATa9PPJ8pWi03>iM_%<-GR28Cne>KNh{f<7?MF=t6S@saNzfE9 z|0yoExEGfB$bbRnJY9!W96^hicIWtA4{OLJ@il0CD9%lI=?)wv#)iB3*yYvI+CW46 z)_p@OLPOn+()s7T36}nDa^L7sH8LH8l@PNa8Z0Q$H&gK}Y%+t2LkZ&u(ezNg8h4)x!AI3XY2g)lBnV9 zpuERKPs(I!_Gng$%$!y4hQ@r*5QTv)er6Q#95l13tf&LNu4pgIwC5rl=r^9`r~1`S z35xL9P*#$ks2na=a>Fou|92K1?PEL0M#m~qiB+9CNX6^dVdJsC**kKrr#4tAPTl12 z%`JD9b_48~^Ovsj!q+QX_qccUK#s`*@|~8j3eu6Q>u?&19QkVB$3|m;feFe#x87D4 znKe2ttE0uyB@V#Wi9n7z%XQ`cFTn93FM9U5Ud`z4)*)uUb6Alp`iF9=Sg%WBu?3Vy zhmVHlGHsO5Xb(t8R9$3$*IiFPyq=LS|18}I)qd8&;aXV??Ghp`|3c!UGc@#^s~z3K z;mQ%<&`k0|H}&U}uvLDHabqQ?)a&gu#Zr_EQ3iPxut$X6;&6Hsma|+3WcCB`8VZ@F z_LrA?S8oW(C7A>TC8iD)h@zH7;;DRk!cH6xI(ZHWY!JCY6@*6LI!-p28z};-nm2Tl zb-dFy%=9}n9$=u{`NC~NgywDXy_+cw1W&A4Iv1SEwdQ*LhL zd$(*9WM9J*kX)Z<@%R^UMDyZRS7`8u{X4kwbsnJKT%62uSW=Rr#tQ)w0#WOR%$)n@ z5e0q@k^@hvd*Cz1a{G#AmWa^5Jb&cgnJjW=J0oGWbHFuS20XRbzNlT85bYoTl7YhI zT(6mAt?v;ls&4_&-Uzf79{ES)0-Y0(B99?Vk~#5Atb<+!G$b)o7gZ`^A+N_i(DWDS zx5=fKWFOC5ek#xGSx4JdC9L%1q|Ec%wH%vVk}GAdoy?LEjrs#E>Q{nJ@h`6-1B1TdY{QbAY#t~ z%|0%my0-x9^H`t~UZMjw{3TEmx~DRQ;lESm#B&WJ&1{wTIpv;f5@eQEP6qxq0KMi0 z_@_EJ3Avs^YhZIn8_mah1n5h+mcXQL)tYTi$fxa(ks9WG->d3}ES5VAM7%vOwWp}5 zs5rC?TtiyONLXDhl@r<6XJ7q1PJEm#0d;VJTm1QUy(QNx?m)p*X1COubbcObGCoM` zP9-ET{PDr2K%{RXmB)`2(=2nu|FV$%4>iG)J9lliRxK1< zf?qNLB)H|{%7SO|1l%lp!j8EzM9Hk0JhkOg><=ZG(8t)~9LIc|-wdAwy!dM#q4qN* znEPqe*Ry#-|B{}{vl26L``-hX5upU)M8Axy`l_sfh6A*FFOOZp+PZ}9Qz#8qRo`?L z=66pKD#)0OQl{c*T(EW;WgOS+KvV$q!3(`}lQuH=*2&gEnrfUh)!Gj@_E8TxD0;?y zF%IUAKaZ%+2BP&I_UaQIfBiPj_4+}eCQSR2DZk`i)ZIJ9UhE>l2xza;Saht{uTwyq zw@F;C(dM(2!|ZMUnkMdg$&D5>f};Sn;8<*Nb0IDeyE?|TS=>;Vf*gcL{Yq4o03pY| z7Yt3^#?*yTgT?MUv>e*6K(5ng%T)<1i3`hqd0A!eD_PeV&Ci>L`xPaj zMScIlg@8N_Kzb0xo21zfKL2fBLz4U&#MDPg$G$4{A!AAiy1j*LZ4?z_H>fV&C3DLiL$a2Z0K_>bb_(#U;Zn-FEXZ`Hft~N2ZP<#k^z#3((;s{e7TeoG}LrK zLRHmq`1t_SwD~LV^T&)6^s$!Ia1KEn(qWgGWI=SOyT>W$H7p>11GYEcU|ZVBzeH}X zGFtZfXXJ82#Vq~tQ`7;y2qaY{t6+DAVphDq$~|!PmxRk=jDVi9<&^`&5C82^fe{bO zWDRw79;QRg&n>24POdDrwF83o5ocPw}XAZJL8`=^p~5# zS+-Qgp+8Q%VU;6acvYQ9%5qv#E_ZLh3Wc9Y-BJg{Dg)VXI=N;De7TwoLI#4Uh27w@ z(B5kJ3Q?4(3S@j??K9r#ym;86%8s7pq?b;iYjn^OFFg zJOC6he>xSc9X%obdv;Y9fTKXr2WAs;_V#Hw5L|Qa`@!!c6is^WOS1P{C$QKv6Lm=) ze`?q?3*|DUWkjnVfB6{_5FHc;%a0|%7WWY~&CQJw;wJ3>ZCYVxG}7$2EY9$1{$))~ z&E|Fg0k(pg^#g`a%0{iRz(4l{d{^Z_+XJgc$6}RI&?{oWsV9c91r76V zqc`-AJgrVOicd?_htg8MaZhe2sQvz9u~=0??<~ISoOs8 z{W9bAejZNCe8-Nj{f{-5wU`@j)F^96zDK60MUhMjn;l;}@x+$%+@h5h(e@KI8Zkea zFk$MbO$P#%gKUu?ntq|Wdqxp4ytUg~IJ#I0eqaJ+NkYVLro?*Xc?wZiSUp!3(nSCj(TosGtH++OF{{7DE}o6)o5{_SV%&}C5igK;Xfg?ENO@n7=kV9X*&gd0p%&0G2hrgWL z_w0(!bHTl8bd&|R!Cm*6isVjGBuRmPYX&=Yx)%5K{I=ugDXX!;!iT2irFKu*IxqDv zxePT%{Wgx5xU?$Y2l|QwdmKaqB4pdw(OcV&gHrAq&<0}&(H9@eduCQ+U+5jnGmfAZ zhrI`rd0D<)6*J?y%XL;&3*M?s0oP=Y)cc3rESI##p@gue zCP8|#4LSRE!|9Xf5kc%Vf*u@xvd{f@en&+{a&L4!JvLFHlBSW^knR$+|05k_fUKSk zXd(0+xs`G1w0cxz0z$CWg{|ioILzwyg0eLqdf)wiAlPRHv|s+Y7-^R?F$$wM77P&_ z03bAyCN^un$C6*NX5y52oxPTA>dosPJ(kc!rfYSIccOS<4Qm{Y zZuJ(0ep^F%F28>mwICO8u|~t!@}wje#`crZy_dxfb)p!cIpMfJIi;af zR!a*)oarr)L?e4`M{C626HE8CtORz=Ude>qzs8q~4B#m|Jz7(SYXgRdObUbc(fb+X zQrg?<_DFOcT8|T9%d-u%o{4WYiUw=PQ!-#s*0rT)9G*+gt)m39r(>C7cKYPe5zM>b zq`BQLdHcKij-d@rONf#=(tf{ep4@(>-r?-_z&FMpI%z}LDl>G4;YNXp7#goP!XCeA zNtoIoyWi_$W$tn48bQu)#S(gw7{Wdp@W1O!uz$bi@u%U71rD>-#w=0MUa-ph1j{F#MpY-$U0;tW`X z^_*uw9kO&`?#Ek4B~ZQ2uE!jxnFl0APeZ~VQKcQxrbo+M`eV*YnOu|xd!r2gJ>mW< z|1<`oAfQ<|dbjTbCZX4V-C8|LhaQlJCYs_ZlmqS!z$zwC+aneIAc3VfmB?=^D!=Jx z80aB-0v=Gk(#`hJ&p5p4p?mK{jhkpaw_%iZL&OuaAK?c+?50wJWhfa+SxmR;cp=2c zz+$>jYb2yWR$g9FO)cJ8l&C`rH|l~|uL{Sx;VG$qwHb5<$737)GsJr(ydn3u*@rh| zl<*J1f`4R#xJ>g1uMuiL;LdI|{hME}ppfGnz}^F{&}Fci33cDyM-}t*g{eO&*Ks!T z`nR;(JCXNXmi@Doy>ud#qR81eT{k+d(7yB$5aj8z&FPDPQfSAMLm1G)3HQaGw{6b0 z76YK${^QeskYnDCCa2UP_giH|hZH<+$@RhBE4NLzI2*|gCBK7IO`UqDJa#?Hz~dHI zB@gTxf6Qs>Y2(5we&X)wZ*_W$ZSd>>!;5&{{IW-n4t8^cPT4zKRQIz``~Qe7xfUTv zk-CX_VUC#Pv&%~ADWUFqbD%=Ekd7bfwT!~HfQ-pI-W!`t?z~g}s4;`WqpPzFah3bc z7rKMDcc>w9BghbnAK(|E?|BuYQU^6WsZh#Exn9pxM9GF6@$N1^?iJ=W1peDBk(+xg zVD`0PBjEW#UHCYh^KMO~V?tmmZv?7H{mVUzRBCuuzMVw^ikWaL z)i3?*cX078!LyO?h^ z=moD$RmZ(wD5HQ0#?8C{RnH~n(@saNq1=I-SN>|wmJu^qM6Df#9}A8APb8A@ziBNQ zy^`1Eb1Z6&5sNK0q;G}ne$jl#SqmLiwbGxF%gkU&>{o0k)BF{7v+x2nCcn*T)@ioQ z_~89)S_SrOV5|E+9ny1;ZzOw+3qD;o%D*M794#cuE6KhZoAXLXT>m)XoQ=-4u1FaW zQ)4n5KC>k5nyR;r2`~5*Q;;VE$Mi7K$=N`+EDbW-(%kjCWRITY&bKbROmG6DpKFxKI*Ia$~Z#ElO6ZPSAy4FU#jWg6erqs!1C+wi2RsU zRFzCk9z#r2Pi&t3=hu03KcB>r0&;=$h+2g<{`pg8_wBb`DEaBqDWGU_Ttd;G)dIoY z13>oWDX`kNB!f~R*EUId3y=7`zt2i^m0E|!i0Y!{LI-aE$cGzBHA2)a3d-tk64%{S zX%dN;!9|lxTg_7>0Wlorc#6KjJ%g<-Q-m!%p{m1lxW)Fw^C7Og5qQpIC6%gXYaJ-F zbjUx!bnj@uMZU$q(Le~`F%Vzps^ri?mMPWeU?6PqrJ48dk~M(3`RBGS_&PEb>rn4b zu?=&LXXV(0z9ph8;$ax|-M>efDlX>xrhZM)G9#pLMbxvdNEOJBRSs;c(1_m~C3qn0 z1`gKt+=iRg)k$~&NxI&mYnE8i?$n9=+Yk@ass9G9{5&MSmGi^*xzg+e%^5vLMS};W z#eCQ#cZ#43E)6%OOkub2^L5dY+ip{qBS{ZPcsnO5{98R9=Vxm-n5@Y)a_0n?mK^We zbv}5k^B(Eh*3-8PJa&*qkE91{JF4a906g5hI~Gl;>HgNhV*Yh3Yi|*%fpER7xM7Rauxp*>Pwe;t}0x|6(0IA>8jyX-DAA zQBR=7TnRFDdLOxLBvEBlUYY(e!}X^=$bO-lYt$dy~3+ik^9EL&|t+Ae|~<;F#_Ax|T)? zeOK0qnYA|39~*N)Jb#Cu{MU9A|2pW!Z7Pbef&Sl?PJPxp$&s3ikvVBZ5YgHP6SOVR zuRO#c$0dD>7r6Xb$wZpAbgaNdxXNZQ>)ZR9D&Xn8gF-Un?n;4U`unFoQ>!XFCQ~)l z;pd8YstppRo8mhYu>KF#q^@REK8CnpI=@ggO)C>w`(t|Xt(I?Oj~|<&zs5Az$Ul!B zpvLx?J~@zLkMcH~#@0K}2c+;I({3&h2>uj@o86S|{zfD9y?)d778>!AZGQU7Q~5=W zAi~}zHZasF!Zd{Qlj+p#ZyV9wmuhOa_aU|<5oKjn-2`G_~;d1q9?rly`_of?GbGV*z;^U+dex76RuHu&QI2 zwcoK&Epaq&Iu+8`kYYEV`R&Zn=~`gRA32fEsHZ{Xj|LDv3Vb zN+^B93*c8GH@=Mv?q~j-c23U#2DpG)USfjJ3b-*{cra$m;@%Fuyq0YvtY=G8KurEP z(Kk8Stdu)(mVV8d8C9VG$#JyW0I^U9lu$|;Lc^Df-{u=Fj=&780i(bLB_c8P5O{Rr z)C}r>ks!Gxj;0e`{`|a?F{)v<^0f~(KI&{XFxVd>L9sW6U*i?GynJD(nrD9s#j4qZ zw_A-<>)|^-Lk41$Z)J0%$^XBb(sruFJw?Z8G0x3VmQg1uk{s9|;WGP4sfIqAim{8e z#}2&yt*f&@eq4OZ@oeWR3%a)Cwid(VZ`;3YSG^X7Cab7|1D79AfS>zl6zUjbMx9Gu z(9y$%^O~>1%SeR7u^gY>0A*qZX;GMLkM{ zARCeWEp~oi15zC)n-VIv8+S#fr__0_Pa8a9X$0gPI#0mmZ&hSv-y*lK&E|nDVT{CS z6V$M-|F{m^9hDldOgXEg+~2S^=ZYF`RVc|Sd2IH};s8ukJ{!LnUG=2b*5%)1zyB7Z zdHAi0FHhF!Z(e5*3y72e5|}kS>aOM^Ih+WYP4d2VhcM0kyDL+&}^#Cemyt5dtiRF1~7)*N#i1C2jx@aY4LAs-jv4mu{4kurS zHqu_xKNXFObYi|1xP*)M+pK}jvCA`NbVg2W)6Yw(H?;wZ*7dymY&j9wzGMlz*;N2~ z(X&OQ>|S7QM)QtAtH%(Vj_Bhlx6`kan;n=!z@S1c)_H-pO9!}sf$N=5nqv3A7tpu5 z3Z_dwxdM37^pW@pey2Y~%`0J)%01UtfhAliIUr=S&JfUcRv97l<>C^6@zO`McOu^~ zwL(w8cgHiJh?G&1{Smq+F6C*boy}js#6%Ayd(J4wVg8Z`)M@gHthkvg4jT#z348`K zHR&_2TuZ1S^xQ9UcsS_ws~l2Yn2=0MRypQQNWrHTOXWEVH-E6L2^3P*tBHSndw|3D zHdcyP1?O)NhM}VBz3+)RiK@j~kx?{fG2xC%M(b0e0Ygp`_JDoA>kHz^&861pZNEKu zK4fT1*ZAg6$ggmB3EM=Ze~j%+^iCZCB|P=Jf}pOFn2v(cc-^DdZDGo~+RAc^K#7I9 z+r9kiW`jI0%<;MwxjsdTD7uvp)!y0dv5Af$rO9;6CA6VNw^_+Yfaa+ibf6Y8!TRomum26 zJy1Y#oqsdy*Y@W5W<8-Rsm$ydUJm68Is9-*74SmX-*z^|gy#x(Ig zR*CCs-R~(wO_4g7g0lU;0FNSF&Rgga4~b+w`h~7m~5GDYib!3QfnD}f8wvp)nU8BDwAF3Duc5-~doc$w|G{M3nZG zmW|DxsIWV12g*#825kLsHx@ULr}1;6%&d-W5@cZPNV6b=^!5opr|CEDdHU*u+G$dY z(Gzr1j$lxu{R=!kN9-EnLVELs)_!3{dGx-FrRIG$klb;fHQ)*f7d5He`MaG%_>+-O4GlkR zQq4+8Ab#R!rIf{bg)>hR<3PGEBy9!YpwIV}MQ+PzrHOiHzK1}6g+#0(gs$!il?7ygo=b$ov9 zQi;tz7;)Rf4kTHcw;5jarSdpl+b6~qE3~(qSp1^Qhb)Rw?{GRFY$A>iu354B<6lF95$u1}{8t)82b7g;q;~u8eY~#i zUqe&oYemW%>3)7c|H|(;Kp82u!U$RrR>C5zsl0aV*B*ht^KkI#?9wA}2NZa|Ol-~T zm4he(MXZh^+%~7*F1|-7GBB3d^ZP=jN3=WDT~h;qZ(aoE@cH#{gKp|;ou}95N2<-c zv8~EgfZ916pPKm|j;L+y=-c&I#c|@7z z*trY#w%W;nbq0>S(2HdcY73CqpJs0-)vVm~Ll zi^;NgwxiVs095mK?z?z4pS7bZH9A`;^cf-ym=?aU-+e**f4$UX?>_DY6DKXmyvC9- zUSlBQ=^^_};gS55=Xrtm(S|yH)Hl)tChRUo2o@)UvOE7#LBBIRI@be`9 z$6U?SM}A@(6aC??>Vv^iPf3Z{)^>uT&duh*;#a2~CEzTapFqi!L7o)4Ys2RNW>v@B$ zu^FS9u4u|;Fk&pL6~RAim?H%wWIPtx{Ag5!Ugs|Zyc5TtIuVZ+6+1czCl+sp2oKtw#$6|;)1@Hd?;!HAO+#F%ZG#hK_IwR4}D85%OF_>1+VA&>Epp2?3}$zrNm zUaM7*n|p+BYD!tS51LVi*8ZMIe$ulVdFqB0)j}(MWr8rd;@B+6;E6zilI9SR-%SW% zIN^PMh0zt|0}9^Bx{_?Fj##(2>njcMzLjlJRorg)>iY0I*x|EOBX?!p{^G;ym?W~) z*bD1u1Cd{4huwv9WiKMcws$17JFk69A zS+S}<@CvhBY6f~hlM*O=+36HDDxf%5HmR*8xznq9L*DVc?3eqGGN{(P)#X4AVnx34 z`lq!!R*iIJ$M;7fzfgtJ;BpD;KLMw^p5-?;YPw3gI$?-1TvLDEvt?h;^qqR~4gNU@ zq3}6whL{yB11j2q#JO{HNNZkM;wKys&Ch(IyF=~!s8+}o)z4zb7GdGCZ{Pt7-|J-R z02xf65jW1;yB^40Pw>XO3U2b=C7*2x`@>&caKT35V8jh_|1V@<=e^xN;+vXOxwh(l zgbUFf;{W+#qLqXus&U-bY6f$Vg(4orrmLHFiQSuftgjkZSgf42oADE#F|pb^{qwDK zBVykd9d06bRW|0~4e_`9FBB`&Cr|P$l(Ty+`;zfOZMNT7u1L zZKE4@l2U)n=nhTeLQ3rsW+g2@_RWdi*BX#rl4Y3J!MRynB&yt6^{oj={8Aj#yfr%a zpDcj&33;$^@mFol8Z7`~=%7XxVkZLmGxdsG7aF2cnDz9h5V@<;0?HBv=L`kpf zarsI6)XR&9+Y@#2-}{$sI>k3XDuiC*_TBi+t=rtFjEi3JWeC~Hu|A49JiYo6LWl^= z4g`%>wNO1UEmu^!{`a`AUU@JLmSop42^VuS3Kht0v#%x<)-uUu)iFy<2bB}KWEsSY zZ@p<M*AQ+2w5c<_lg*IQ|+`KfT7GnMdASRfz%H6>KOB zW`O{Mp&Pk@Su6kcEjU;E<)eF% z^3MtL8q4;>2do21hz)on?sh8CG4Sb*O5jMAd=bz-+2q9vf!_^nqH7{3H zWT5(?p`q&4!8Bhr{1$O)it)@!iQ`(Fp3pHmr-e%#{njWkKu{2@gjek!Rj*0w_e$WZ zZ@Me03uQ4P9dQ$*`@)nmT*Eg32_ zCXK#l)-;An1O%($A91iJ=q`M5H{)SYj4~0Dq0QOpq+z!34}I>h?Ww2~vfInXeRk0_ z1srJTgvsIR7!<$p3-&ZK5Y8~u@hnCzVenPz#E(WaPJdUMtXm58)fg`N zz)*(wb{20s+Et{c>Sc#Fp)s^D)f|J}Hat$HDA2)CBCw@G;M#zC9^21RjlBz9xqwVk^h9i#qbF&<>H zU|hp>w&Aww2JCgug$ETmTdEh>dpglGZ#{mF2c*De8kc-{+uo*RbAv`rs6=K+du#QT zC15~k()i`bvqIVA*~LEsNXv{^{649$DGgO2?p*?-`{$lF14o?9&JQIn3<55>{`H!iSWvId4Hf(JX#)>F(62V!p=Rm3OA*!6!p z)a+I35RENma%N`nyQUa;UDH6`1`LH~evmc;du1zo*^j<* z=NpLzm}5@Q2^u}3VMgkSVpMaO1Lf>Y~dw z)k;V(LzTKI#(byb84#Lrz7*JF*ENcp#VWE}o3T#*s~A0F^Io3F_na;q!LLK&~FPg#%$yU|5 zw=%f}gUllWm;Lxh!{XV@{-;X+E!tcKtd|iLj~Rs` zKj21u>zDtF*nZ{n1Am~-5p(k1JstHLa(!B08(pt1rHAMp%r5R!0-#NIXo}CGKTh z;~u+CL;0E$Y{UY;)9Qe{Z@NE zH&vUD0V%g@=#e~SWqiLmUJUC)Qz1^Pb>IAcx#wJ&1G{_U^W&mN(f&7AXXWSch8;$Z zp0>v;z}`5HyOeByS8J}JzwfUDrb(wj5r3Z%H$k82l0WyAsE~Q`s>m}YjL!bUrDHZ# zBIGOccz;22H|3O-6}D$L7`Prtz&vNe`=w5IopY_8Lq_|O^Y}fz#+ zUK$U>0N=9mTyfx0%+*R}t>Z&+!JV9xjKr+N42eJ!qP}daI;%j(g{UREkkJAz#?UIT z{#9}e3yKv4w~WENakDXovq7+XGYJ>+iyRGhxA%sHg93o64WL$=(#ncZ!V#SX2>Wmk5BYM*& zc5LjEQUR~fMM6B6MxDyyvo7ZUkQO-EFO<}r?6zjKAN1RiKAh43vI~_S_d3*Eq@2!D zIx3iTfsLpSKDVFFvw_^Vp4cVN3X5)F5C@&Y+$Sb<5@R8R3szj0DDVBE8FB-^=H2OX zt1mGFE7kF%?h8#fp{VZeLC0@PzIu%gi<6ga8VR(pul{k={PWa=bGF6G7~v(7>qWj?Couwnt_M#7R5ni;rB><)I{iRNM{T{uKyPRtKpUyC7gjP)tyXFUz z-8RKa8I(4f>SD#3lHxkB6=b3OLOV_;@$Ldut7qeKrpspor-__dX6kl_L8G1c9GYLq zh?BIMo4Wj1F>%;_D@FxbcSCKn7TCE7ZavyN?mTcVv60+48pm&}G*FO<=m;tg*S8`h zP`t6krqR>5GC%i7r1Jcoz2PA__!0AdFhZ}z4H@9;+cMoK9r|1kvMuXXwcHsEnOyh72`ouGge*Up;{ek6h z6hj7X1aLb1UoXKs4@|Q@{{4j^KH@_4YbI03*y}C&LRa3i<^Obv6FaR2HYCXr2jTG% z>%SvXelb??N=`DjOORhm>7l8*ObQ~gTX7yi{IR7eF%6NfI+|JVn#8ETT3wVH)abh_ z^9#~Z)D}D%lrT+Qsm1O=o zdT!>!n~<|^*E|7xA+nVw^N3_$>f4CgcuqpouLVPtMe(yTj}bLX{PE-eFao?-?ZvYUe3XFke&zw*)LKI`M&RdnLj zX`_b9Tk;Mj=_emM&s@M)F0zp8UH0dHuM0)Qn zfPi!Yhu#U&djtejYUsUJDN$+&O+Y~)G--iQ?it@dCj{#=cL|$l^aGq)m(&%Y0VSBes z-swr_k5%2CKB8`9ja;_lK4G)8HJ~}YDRK}HY2Vq;jen&(cm**FTfQMJn#!qq(+_j{ zx-aj`E~cLaN=RaFPQ7izPu9%q$;Sm#Z8o0Nj~VUha+z^xmXxj6ZzjBsT-S4K{^HB) zfp{cxrt0@K+r*|iUR7uoBGhs>{R8QimnlFo+n)v;I(&47P28!$ygl!ocR1&L7U*_j zbe3hgGjt}`!eFTA@NEWIn1m*GJ51C9@^y?31F&KyM^zk9mRB#`3?*fVq29}KH_N2h zqX(bLE$^k6&!06l957f*1UV8bQW|^4|6F%>9nw-MN%cvZV^Uso-a*Rxz1vWE$`3QeS{X0|Pl z{NL;tIOL*m0Il0GzjGpZKQ!@GPLj!zBudEjfZkhbwy3SKB4F?Jv#WPOD$KaolAPS* zOm41kSfyyYKNy6C|G2|3u8d>bprRcz?tq$x`z;C6vFmoZ6DXQTe0dFvRt1OF=zVEVDjxQq%vM1=ZGH>TuGe5P5lp_2t}idk&x zx$N39m;z$~KI?1F+SibU@LL)IE`(NlqQ}j!U$o(*#Cu^=U*fZv`>aiA<58UZh%LQ{ z`>cx}PQ+v{^u?!s&c?+Uv2|3kL6$(x>*e5BgZ;J_lNO5jyZBVrFbhXut#1t<>QtJh zYTV2f1OgEuE>Fwgc-Z9PkBf4*y<9k;TO(AeDYvGYBo)=hGJDJ193M|HZQ~XFGW=15Rw{A6tG>&pdT>LqnrJfcCrHHqGcWNyWZk-DXi9N4Xno9t$OU)xLsz8%qiLSG@JF}FgvhGd@f?AZTI zV7KVD({_U)P=lT|LrKo-8ELkt5J1?{VA_<3V-pR7#2IW^ zOoXFx_UoM6B1tpoR25@Vu|FHVSE?#DI14CtKbop{dqCc$$zL65!tsetZ%pVX`QPcT z_cH{vKXYJI00xYm=xPyhdtc7+q2v$kpHkl_^`IsV*m6aRc;=6&Shu9c3Db$ zzn_W|4PMZ&YNy(!T2?}*27XNz0CQ&zhhDryyP&hbVCCY(Y1UWqrQqwb8>X+_3odb5 zJE)h4d~0|Tk~C9mF)V8jLf^kiNWa$B546(-3g=>l_9hBmK0tG34sOr5pW2<(emGXT z-)c8p8$NUJ^)rP$uPnXy?&`ps5B{IO+I>NZF2m>7_N$fC)Ynt_EYVzNwd|I3>tl9S z_>nF@mFP^DrA=wK)Rhm-g4Hu__!Q4B1Zu`G^Qc9=>2&*%MrhU3&2x4M z>FSo#H!9jEu8Qw|JrVHQ4Rz4vb_MWXVzQAX!smsrWkQ5N*}>a3w_cM_tuzgH1z$-V zriTrCxSVx^+WeXLYb1Wd8!hZ7%6&U&?n=uIqtZHn3{NP2%_71vwQ)1`b-5PoYv8uv ztjEko?RwH9EuthX(z-U?r93M-=K`RvFy-h6%WGp5sllRFH0yG)q6mA~clWPq@^sa5ZRXBRuAWeY=eY2=AFAoa{3r67{L5PW^{*!d zrWfr-1_brpiwXM!fvj|26>Rh(yY=M6eXfeu@z@TN@#%5F>Bj3RZ$ulgyzfs|Lgu4U z{g)4B%aww~g+I{^XKkS)GViTUPG6_>Ya$uj1B+y_0e7N-IuBBh9Ce zMe{=@(MX0wLH&|wKbPezUnCD4W<@V|Ac{&9MsnZXRxD!%gkl|Z(%0Z6EFuZZ&iJ? zmmFEIrY2eU3prmQ%#hHcyB)=F2{%*G=5~IIf#Vfy3&NeF2=sCMhH3B5XD=T1;iCN- zhHg&#o$;2;4W`(rw;Vn8(NhBVlg$0lFx1hK&7s-w;8qK$idXZZe(w4a)#it;s6_qC~s5OEJj z3=IMAUnQ3yHJ#=zK7femBtr~_dG{BNH`x&7k*7X7wzD5v4x>-9GaP;Y*gU9M?wkk$ z21wHv($c7C&OEofwshMsDa02?GwvT2ASwZMfOiX=B&7;ICwrBzRK=^OGoWj??r4wfXNn0~_~A(Igg!dGyeH1L z{_d5?{GU7t&=$K+pd^gdnafP`ZU+yP^(Rqws`J}pMvo3Q8DtZ~_}L+Jd7I2CL0J&f z@5(+cWNP(K$w6%GDp1I=Qgyhb(-&*(-6LE%{e|a-6f&_2$|g~J>}Vg@G3XuhvAJ@M zI?7;g#q)Suk|@IH1>I`<^EvKqoyY|Tn zva3r~2ZK8zOMR;W2qKk)j)OLet5yN@3S99IrI0CyhRiC5iSBY*^QodqKKp?maOhG| z+yJx2jPiklNayve(@oeAwr)PUb}bjpCrG@$R^mlZtjd{`o=8iS>^bgIkohT0lFe6~ z$J~u8otm|dG^hrU(Mq%*&kp3`f<&ND>dh(UMlgQNR$dJN?cScQFr|aPxqidx zTh1ktKr+v`ZGPWN!H2Zwcu`3G0s0Fjq||RYdLnfWRe2QwqoJM{4H>tHwy$0>&lGV- z-rVLXdvvOjemYng%Vjk!cS!XrKXo5nqE(aSx`G*V`!U-=!+({5PS+61m+uGWy$vsZ zIk34rnPOWGKKL=o;?soHOd1IzmEo$`Y(5bnekP$;p!C7q36rm0u(h-q!_@D}$3e_| zz^=|Zh2~+(BmT`%HfU!qNHCWzSTNwwxFfB6(n}xMzC7cgS7=n@ zd;D;;u(Z*9XQ{w%{UiP(<$v!Mx{MI(G9+}&ChkTR1NB(68beHZx>mRq$Bw}Saymo5 z;bc7B9zKIF(YS9|F}2x>2ql#$K5ey~n{8U3&ps&CXiOvPEZ?e76$1s;eD}ju=S0JV z_X=Mn1%A2?Kl*ydj?&PvqzF6i_yl*R&^K3-;fIz!=$d%Mk*@BAP3}RMGlV$SJWEQW z=xjb4O22*WO}riJRM)&K8n!mQvSTvEems}=*g_7sRYnpTzXOyxPYzKx zD``t;M{B#Ob9aI!saGAN>3H8vxO- z5kDUxaD9yN#GHD%)`}#Qmzlmr++W3ln`7#L)^#ANs4DCD8+>(nSky79&K7>3gg`Rh;!kt^su%H>TF!k-58 zpes1Lv&&FTAqGGD9`vp}Yf7I^k#4`2+TeZU{lxaiI2ZcvnIZk7C^hnAo}pl>BtG6N zv^wT`mDZq>usT3WfhyU>iDBzrNThvgN0-mI71t5PcrLEFks-R!x77x>HP1tPk<}tx zd00)4NWkGjAGXGvU+ko&;3d2CvRV(FT`0Ym$Lf~P*vZwNJDID6O#gN7e-+@%(tr~w z)Ok5BMWy)RqmUcSDzF$@YA$s$HCIxVfz9|dJ>1XsXDy>??+H10>GoGD`rFs8WGGruDnsU4#%U579m-P~IDuk5gl*Ok* zuE)MwMK3q!W@%~GHr@rBIAO%S*dDs!&~jo)`^dPI*X%nrYY-_t18m^x_hvbD14&N1 zvw1AvdS|?^25Y*#r=3~hQ>^}*GYlp^2W@_xsFb3-mzBiLB3StraA<$KAjSp&5#9jV z1<(TUecrIvPmB(npAe4y$%$c7z}ytHsiR;EWLQs3e*SL2Gol2D=6B*s_9&*yd(h&_ zK3+x1=PNqqAY10f*Z%nvjY3bEZt|oqCQpU83Au#Lo2o-`w*u=%!&J6ALixUSbTtF1 zWa=xeaEWabhw>i#}_rOy;@@LG=ms58ocON@U_w1+7WdZBY>>WwPUa8uqM7^8w z#%8Q~FJkOYO+)E3vYK3bT?Nq>NpH9$JKHTCpGJ)ocUZ3X-{}d{{u#khT74iNQ}o$t zt}Spqzs1NANF?&UCz`-TEwv0_${4azb^V=W4rpo9k(LmALf}m7n7RRyk@xofLwoT| zl*AoCbH*GD1J(}{BI1D!c*rM@#*TPD)0x({R z9~ctFPgf4K^f@_p9As3W%B$^l8`z5t$qf6OH=9GTL*ANmi;qG-PZs(~G#qS*3=c2_ zUGD?noO4?7>jDKF>)W-`rtW=gO#}b+%}WVdU%hvV-O)1#vs$xTUKLDHdDxaYe~a-~ zPDMdw7N2|nGTA@BD2oAnxPkm1X%K=$jDzn1H(B3muzmdV6M zQVqDb<@Z>S7Il3$)iNFFK}5zC#XI>h__{6BOPKYw>ke4l8R{2;*)FxU_h=*9sgS^> z77(8;7_39`G5GTUG_0`5_4#liN>+Ue?CgO@=aX}hZ^tKG&zIoKt}M8ibin)VJM$5o z=HVv-J~w#sd~p%`MRJ4j9ku`o12re|{TY9PO_)0ts9FP_X3CI$+w`N6POeeQsoS`) zpHkfbZbL+5ZD2$l#{V}H{#C=#B#&iuaGcVbw3f>*{ z;pcIh^R(&Cp#8y&d&N-ZV;b+HeUp9nXF9rzFJT9F$N3-sjsy8eWMXwEm&)dB+4t1D; zqMG)jmghOi#qpw+$HiFD?_6{IcR!%TL=EfRR&E(_WQiUs(%#u{ai!<@-c|guSO@pk zdsv zYEV#~;?h*a_bxRoxQ zXX{GhBz|y6As&p($jC@2qikah)^4VE((Vtg9&iV7O=@lfBIzCpJVXVf0OiG3Mrrzd zKO{OJ({nZ^dJCBD*qo7lJ35fPe^QH1-m`9qO{l}Mo`^x#zRGVqVLI5U^Jv4RO$2b$vjk#o)`5}laOQiHLT=s`3yh& zzW?8br~iYmdw=hLMA2U+nF>I4v{EezCXi4NC}Q!qU=o3T`G_q8ri&{5FJ9?ZdKbnF z5@z<2`C~=usZ0VfAG^7%tkTM)9(69NcIrcFKh*`rZnC0b_c!;DWZm_D+?D_*^UjgD z`D|H;{Qrb{oRrT!iXMJe}Ck%{MTptuR2`pxyZ8_K^-r4HoDL|c@NFbnOA~JKbFm8?fjH!SKYuPGO^V>GhK>3{*Q-W- zLA{{1ToOTp*p&;aGH>+5L<}et(}BF@g5pxcplmU*6+1orF?(|-B0$3XNvA@!x)G@# zd|gV_=!)OcmjaKamlA=I+NoOgjMpq~biN%0N-3S@2K&naFR>Y+9^$A8*2||wsnZO; zI6U1xcI_5?0iQSMIgJnlMLOA*0L=!IfH?2sQCRA>QKhvSwnA{+<;%kzP`9a-BgJ@+ zin9K3-4Fb1&f-S~^nL6kHX8~8XVJq8Dna_GN4^#r=dM&d22{#mVHQ#|1ELz5*kscUy)Z{B9M^U;0yhckvWWR7PtpkK;;(QsxfH-l4XvS;Z%^^ z4Wpu9x+nD%GYZ+42@A)O^KkMr>jWi7kw=PeZ|&3)i-pTWWzu)qJNIa4?Rem69=OFt zDU}0vk+pI;V4eK4>6Ps*6Nx@?d$9kDip$6op8*pDgn5t0s?}$Fr`^ZOAHJFHw6xG) zM|SW+h!!@!gDopk$hN!*w)e5A0AtW8@|o%B=@DH*NT3W$7u+@tzNVz#s$>b!UI%@f z=eH+a;^J$<+r0WBt0vFNOga{HapvCzYL0GH*nA`$=sO~n8#(Bveo$A*icmb8aygJb znC>j0^x_M<|9Nh238AA}bI$jU;qMNP@#58|Wa<8bvU-F(-Twi(Hle)7RlSgNPwgwZ z!b)`w5IdyMx^*#|<&ythX4R5+4->@OpAsb!-v{}}4FY2ga%w3nZk=(E7hMca_u7@P zMQ!@yU!{sxVBH3fK8a%cU}b{foEPMgCx<>n*+s#bk#)0{4llkbq&w~Z0Y4DawAp5N zyXG<-D#n{!QGZ`Zc!Ot>!+SAD8QQHqg_4Gvw*ApAWY)Ql3o~Gc1Z4_3X7K|;rzL}; z$9-}AUB22^XxGR(CV+7@?)N&870=Ou6SGT7`h5XsK#;Y{m50YHdChZ-R=DU9J-O!; z*ZeCOixO1>g|<7+XzrqkA(6Z+@TV!5)Aoz>tw|a0)jl@t2uhqCB`fe>Hv7LxCYk>W z;0khGk}>!nKl$}5=vLszGp&M8`U!U5T2H23XIgg~lIqBj1nwCq3A{pD-PqW8KSqWV z05A(VDnyACoe9JoAOvs742+;31Bj+ZGpm7-%(k)#D;GXf-93I17T!DDl{^|I65e$* z-~!5@z&#woQ0(`13Gtg%rlP*@Q0=(-+{AZ>1~Bjd71rpJL%LEidNc8%w9a>*<$PA# zHLT>6e7-K_sF#NKX_vF!bTJOeB%$@uPHv|XupPg<$dPKC&TmJ#%>3B$y2uW!1^4;D zrJU^=th}=vyx@HAK$hXA{(w8{Kwi_Uc`0w8uU>H>#8)L(w6I}Se4fCrm2Ka$CX?A% z$5d5R&)bevD|_4|Mp4hZ_cqo^pbsi6CnrXlF&?A^ng4W#t(Tb@C9#(;lv@1r&i-lB zZ!ZwqsL#n+5>6XUMa;oWjp2?%|S1ERBJi@XdHyu&wX9^LO|hPddzTV+

l3%%j;Y$$p6pLyJWy_^iA&zkDu zI7s?*VY2;y=DvT1qS*5UF!V?ITP{MtX~lquP5awptEyf(QNRGXg$W?rqU=f`zRM0d zea-YPk`dIw-2VE_ef#p57cBMWKERycKCo7H@?efort(7ISi)vpP-3~HJ7g9~hI7kE zQm|z`SDidYzjKR50t1 z5(>f4^`TjO8&pE?v(+|^H0!CA?|eU$BG84J_jX#^os1_j1=aFi_CqXEyTKr*#I6R! zk(sc#+ewGM0^@r;m^>0Q{bq3YjDXXEj5nZxx0vwQbuC!XO*go*ZY5`OczYBmbNs#C z_t$^@F;4*$gYlSf{dJN4`5XR9tc;RS*10?D$^`%TfT@g^m?`oaC6$gTsDfDuFiCp~ zb>^7YZ7j?RL5X4PAiesnFL+41%=EK;wWHFeecgd;=@i;YHMf0$qJW*qDQ?lsCnzmU zI6g-W)0_|YY@u}wi)`J z?4^JN>q78sYsDRhq{81~U&>9sBcqg%2;Gcyt-Ws^DC^UIA}NCnFvp;iAO&*#R3ObmLM5b^+(7-E{e$j{Rc$(ZAgpG9U~T zUJ3YH<6lD^!vJ;^> zVax{S!oPw*??|}gbDm;|AN54jTA{@8&}T|IUqZrUGk}1ihsqKw1|+S&jLD`%jqGz! zhw?;BU{T9o_Bn*vCRL7D7je3sduX%}-*U%NfCd!uK>3<9AWdkmDt`D5=!@d6Gq1B> za$OzmbDMUOpK|W!QqM6S|0%}%Wy;X|A!`4VXSnQK7jmOND}33>+64}htndTc;t+_d zMiMO*<^vPqZSAC1?YcO828cxt{Z1i1M=vm>Zv0`*2UWFS?cx7SFaXOcivSRY*Zsdv z{333RG-_y0H+jvT(?rmV91pa~g5a*EN#j7pp?e@3@?M6qPu2;^!LMVmEqPr5(SIo+j1N#vwo?_|mZ8w& zWnjv~^Cm+p*QaV~0Dh=bd!Va%OjEB*s7T%iZ0&{p=DU@RJADuGs<$3l#(RVx*!r0s z35%%=Cp*=slTX;s_DE1)WY~XbHALtAJy9!43E504aZ!Bh)8JrqstKrbxMvm+w(VuD zb69b6CXG@nyLVt{YH$*dj%tAS7(4p%EP3mTjl5{L9+G9!TzO2L?6mN0d3ow{wzOF_ z#<&bSO{rNb*&(F$PnPztg#H;o#}&%|-PZ zG`PRgu4OAzN)ZHyyuKIt9{*6>^XIqw*JWRY=YXQpu8QzxRAtMK*ksp+1crcHi41Og#wL&{zb3!EfT zBA^m;|Gc43pPrFVIy(%ZsJbRx<vjiGhH7{o`4hIBR7u7zDcz7`Dy{XL-D>D#AHXIy7BtyIspd|x`M7?3;?@)8UA`ufY=?J_)7z@&gZ=H0>~ik?c95^Q6-v$ws2LvL?4_iPCnF3Kx(b0CW- zM;F!>n;oI_A}jFBip5PkN=iyO$yaJ>YMji!8X|vAmJ~8|(jBCNfr2(~<=A``!c@rGmjJhM@R@8*wX!v4S;2_p-Km9-` z;bEBE$v=bApMlI@R|T*c-P3cQw||R}cw-1DkGos_n06cqtyyKCPy}(3I5^eVfTGU4@0K(buw-#Kp*BHiU|H&Y zI8`;UY7M!*#v{jcGho2ni2*9Uv`wW*dE~I;nP-n{7@X)@lKN~8?JUWdG+U7O2YV}T zfb}FL{ZF+F_>?k{55{ zM{&S%*(x8Q&%kihcY>$0H5%!G8SO3)q?!ShevbzZZUDrj-Ou@}eEW|q`?(aMig@cU z!}{A7j1_>HrRy9>H-LbX^nU>Q6NqO4;&c5)(xEr2fj1y8$tk5B1SHlp24BOrZiDh7 zSq+F4mee3%HRiCqhhQew3#vt_xm=TP75hlZM`lWWmJ>5$O{pPk?9e?x1O*kjKFXcJ%Fiy$K}N85FlX@K6{2>dQTFGi-?(r+K)X$9#)(6 z#Eb*0fA2@!^|pS23#)yz!2C2QmICa^8eEJsbyvF!3rgCBiMxn}Oid$ro|VH(@#Jux zh$-0h^v$uJvU`fd6YCC&S;98CgH%##Lvpm~n}N+%&%2gf!Z%&EC9SbK?%{#0=wKv7 zcQ0iZb_X8+Edo*TcK^iqfXY|gC{WyNcQO04aM^|6)o?EC9;Op8>7Q6HwmAC&pdnrX2dW zh>#pwXtybQD?kj(B_o)Tmwl@<><7p{A1srI)(;E!80H2`GbiNL67x|9;&$uqi}&Es zJ{kOtwb8yd-sPK%xa4Z@qMumb?srsUeEAETQX262j30b#?AP`xJd}Lu?gOJGoyg9E zNGA+^;N!aL3f2&OS+&b9tW03h7ZlhpTUs=a&s{Qw5A>i=%ijK%?2RTSwsE7KtwXZ- zT)22SsW{GfETcYY**WJvrAQawTjrv7wNGyOEf{m(Ax}rS zBP}xYhe!|1!rXY1rryI)mGNEchWM18?2t>$&#&3f@!4Oq%Eo1wYieMd!7ufHQ%i`4Drcy$`eMke+!upf)k>?q1BwgDUO{qS=X+YbPq z2&Lf9$yuhhm6s{X21BfQo2mdoWOrxBH1}b$z@)1VLOgjBHPz|dU5Qk;#(!UQIH*Fm z%w2@{qz&YjB_R^|<`uK|1z^YFefq~X-X;TPUQ=l1g~^NO?P~(-&wHfvO+$xJW}v=(=J*2 zSx~!qSSFv9%A>JDgvDgE9T=$h}=UXwash>`R4hSHU7nCfnw&s_6m0iWZ>qI8O` za$n~R@dM!%6uqeU;(*_J)dciY0{iP?l^L(3ky?%SBa=On|B5OI{y^8Q!&U!xP|H6G z7?Oo{&oSFSC)mxWTTic<0-B7B)uQL`<|u$8>bF4W^J>DC=48_pBsr-y0STSBU13Oy z`2aU`158`Ebax%+H3y61H*3~9$lY86U{s%{!w$_CHYtsv4e)73G~fIcjyY1GD_;?j zCdO!MK_BcnM3f$>3@deDfk>%i^*hb7dUD+ynj!qAFCo{EKFyIYb>{5p17R=T*KKW+ zG>~}AES*NF`H1azP&viB?VoPFH=xS^S@iGduv zveyAh_>Dd>O`SgWhkYsN3G)Tngz#6RByKHaF08VtlarG?`K4{OrGe<-sZojrk96Pi zb$pk~_96P@0W~|QpLe1wYt*o;L4x1zMF)|@!jW%$z;ndnPHHFvY)I(_7`h9Ka^3Ed z6Q#N<{DvusXFRkN&kIj6wC*4`;zn<|OJJ}glO8}?>ZOMVj_i8M56D67acH;xRjFHi}ED+^9E6)$UkU!!UE+~VI z>#Zpo;NBuM?W6~LCk>f=4@1N}>oR$OhO=n&Xe3DhOx*2Q^yXX* zejaxE5&TdXj}V})r(O*ZyY-qih8O#7;=oh?#t{8KrT0btt^WZTVf0$sKr>>IDneZC z+)BH=OcW&cE(PpbfIP1+b3i7qMa1{z<$`dw+>wtAIOd0geSP6MXu#L%^e1uU8j~Xn zBBgbDF1-!oX{u#R5EobYlD9c%kE{J-#9l#-)!tN{Z)17KGlRAPAP<9%PN@`!@>H|k zvLC{NK6Zw}!3$0?0RaJ23{D*Fnn4wv1z)8(kFe(@Ih=k}K&nV$whK_Q`pZ9-P3bnG z)XPSebI1_=B-`-<2Rl12Iv%jc-pfas-8rH_q)(IhW*m%Z8C6m9S<18kyrYiFqG(}- zt?HNe48KQY&q)A6O^?K}KIR|GLH-13v0n&y{Kr)8;IDf1A(190M|JzXoq!alV0&a2 z<#>zV*-N<>U>{y`Cl=GEHB4AKK?nVT>ygigyJEkWmSZA*+-kE1wlEcIl^TvWdhXn> zlwpjBjdLm$V>ML-3JXC9$hX($3-IcEJlgfqG&X_G}>IaKM9$dnV z!sC@27DpE9v>#TRC`>;mTNoxK%+Wkx0PIwU0!c5giW2YI3(?DqmEn@gbC;0f5-P+J zXi_t@jrtG19-ZfWON+ZOPT+gT&{6%qo}GII>X5qM}cipx^D4F zCaQa43I7(E6A6(yvBn^j^1Q+cQSX0u0sO{2_ZkzT^w!{)aYB@S%M37)oE14;I{|2? zy6@k=Pd4K>X!+uqYh}(UNTQ$8O6Eoi$w$*zkbogdhc78wm_^zLx}#}(qAH6N_qB=A zDZ1lau|-E}x}W>#z~=Q^!*=<#Xg(b_)c2ghZ#3d?Xc0XLJw!@&`jaSXD}s~<`Nd6= zvuMlYy2%6Yu283Ii&;9!F7sFMXBGHmhu5SV@axz&BbG&q;%c%MOnp70dN;D zgLfPgJ{vc?t*{>tzt~ykJM^P+H58S%FJSj@0$0l=dv$Ls zB>v7F|3N&e0vy5k5WYnHxBlE5Lf~o-X05FV7xOujf1>2Ory=Ou{gGyJfWjb`Ze`el zY-b|m51HwwIzVZc;t?TLjnSa0y>^k9xuT+?*nM-hdT(4$9?80q%9KbF|LIMXJS42+r(cAJ5KQ&Vy>=6n{ON}^Q zKVX}-8e_e2TL8Kz1dd{&G@an8nI3-ZhP75sdlYU)?7Gc#b6XqbdFcB?ovb77%>-)R zZC{#6rMz~=eqhm=)uF&mZa{(8DykWdrx|rw9;yP!Qu^fQtJ;A@`gSQJ@7@(6{>20p z2_~rM*ckI$#xfR3FhR6lZuBn`>>?uTgm7tF_eYTAzHR;VOsV8&(ofWgIKif&o zobS}qAWJSZ{8iOLuLiB862Y0%ih8`FaIZHeXJX5F){RtiQ6tM5wgl9BsrYy96F)Tg ztS|+e-ngK76egIuyr7CfZ_oeaEJQbc?WYP(cE~!V<3|qS=*;d&kn)>BZdyVFB6fcA z9AUY2wwAHLT(nYz-i+4`&H@F)VP*T@SP&uCaV6NBYmNSuzxro^@Ba1aeMPV_+>PP= zUpA%>*cfKGT~@R4kwkJ-KqDHj5ct3F-*QH!cgvqw!^e7703a2+athK z7DzUuDERHI?R#cnYLlTPW;PA6BleJ8bDgC`N>|qP0r}PlkC977zA9j!TXS6{QHMTY zFWwDPP3XoVfAzPl6xBvxb0x23u&h3|naAU{uMi1uHy}kjlLk~OCm1glS%ut-s>r?} zvfK;6Y2%$ZXhSrO(f-qQa5J$15`X$(G9CZ92Zg66(ew3fj;7*ir-bNJC4W?}$@>0i zsN%7S)XnMj)|dgX@|lp??SvcIpPC*(qi7Zx43)0R}e?)Fo?4 z$Gh4C5u(8VxFKyi#quZ?7CA&qb{m&%i%P0@to7dy4@;0~U{LW5N*pr2cO)H$!^_ni=h*WRax%l1G;_PdI__GFcv)~`ZRd^sfbsaVH@#m+p& zA{l7-4Yl{+{eIcDo=Yja1{o^=<1HK?DQIx_ljuV1z3R8XV!+Hx;`&8vJ3zBbLX#Kk z%kFSKyYZsllEb<+)(HJtRTqLPkLR$D+PeSIeD-Yyat1~~nTsvx5#ugVk z*7g=dOFH1Im)_L;I=a?;NycV)n>S@Yw9C1~RIfS5BUd=xx4`~U%`@8D{>q`Pa;J@T zE7Qc8GC)>t25_9%?pMi`@&tEvU)I=yjQ>AQn@dn|VjpM*PRYA1*cnAEYDB1}ejmH^ zCQmA1<>vKN!_e#0ri3O1p#N)1o&jb~Ldh965e8y`V$n#8C1dN^#=7;Evr{!_nm);w z0um8W7IU+VarjKd+xmy(5=+i?#v@&64(>)na&$p86id8UBsxooO}AyIBM^~~sojwu zdRjwC{0>+)df%|*QlYzYoU&<7&tDAcZ1O&cxnMXa4Uinu@QNLaljGUaA{E};LE&Q` zLq3GJBm+ar^Q}Gnd~?ysdd(cLcA<$9CKaPyIQLM*JlwweW9hTsGg!6^fCWzYpw`Ad zfSWDuPpyuTmU;$HY`-MD6JmsUCItxe{%VA>x{8!Pe?aTm2iq0;WGknz3=s%(Wwwb#?s4CPVN6E$|ll6s7iG zaRW%AsP?|ZangNHc>6l-M~9R#^sb1#^p}Zx;CRW<=qy3k3Br3amIi1zH|gOC48fC@ zB@t z*)Aw%qYir)fD6ozb_D5G}v=(NqS z8f_X$At7-jf^QpW`HqP141bIojG9)fIg{JY2(ibChKC`m8qK2%-6#TYCaJU6G)gi zkz{XG5aKggUaKE?=LuC%iIWx8+^@sIg^(qgSI^QTk>7;u!9Mk6V}UN0Az+6E`8Lw+ z2h1v_^wupZ5>_DXy9q+ky{m!)(HHxZZ%>xT&Nv=^igScKSmTQP zPv}ax_qU|4}???{2&Olx^wuqLqoi-!MGRa@W{Js^!A2}%YcD{O#F zQ4{r%GN^bt(>;sd`ZMD-CQvkUgzUNSGB-l{Gp^^Dcy%I2HuCWMY`O_85P$B$ob*QiXJN&0EfHZo*JJn9 zG~M09Wkgj#`|(RZds;$c_Ws=2lPTxI;vx$}61{4lU@qNSv^XAXMtj;x4s7m^s=CqC zhwASes#3`m>ES4~$Wp$+5Oeq~ep|qM2+M2bI8tAH$8~_GdTd-zuW;OcVRP!Z>u}!j z&4+y~U&qBw)&7AuCL>I-nThsS=sljVZjOF1g4yKC`_-<{J!d17JqdST7pRh=cWE;d z$L=PMaa-={Y5w#wsZaJ|F0&!~+;XP8D85ROs?4YyOT!xtzRh(6$e_*HNYL1FRfdB; zZ}!~qmKOZBvlJp$r%B+<{a*v|A%ExjuLYC5S->n&ioR~yL*GI4xa1R_S6^Fka`hyl z!mUx%d^sLtvL7|XC?+il2}4kIbk4cq;JE)pj*2L4ifo$#l}sqGJ0?s@1FUZUtZS)(f9LdTJM1W~C)(6Bkeci7E0hxHWMBzo%5$$)e5~EHq$Yz&rP1OZr{E zXi&>$<4z2he(l2;jlCW<`8yW!bWTC`)svRn13*EWLQyioXTrdJ63{L+GI?6wPw)Bd z&G8U>c(UBbhm_h2|7Eig6P$+nXu=OVCl@gq}pK4aM6CwuzY zZ`M?J0y7pXOr%?5YcC=4!`N=Bq^aq6ICB4sd@iq*3q~RO7S&O&SyHA(9D87^RhBR2 zaioT-(7d?_qq%SC`)OFuo&8liZmmc9cI)$tEN;8VmP%k7Q>FgpWBR7=41ysoS7sv@ zz1R6a`??=d+b{$usQ8@CxUYB9p4kdd3h&3CO`*Kjho{6n&J2KfpjGTKIs9(T%f@}B z+#c6DA}M*ji8IUv5yL#QSN@#qrq^f5P&7AOLR|K11xmc4pSwwDOzW4C{t7vZjBf>x zwdUBsH@hYvBw)@QR;uk2;3EUBhDz{(0X9dVDm6x-#2;Rk96ey}Ox!9c^Gd347 zbQJ>K4^iG(;;{N?TEQV_mkz(tK&rj|*f-a+tz54y^bj^fS#r9X+?D!L+H(3hLUv|} z2Nb`*IVTYzNxdj#9{zc>s~Am|UCxHBG%yc`gAr@6&Xhh95z|{x$n0w#HnUD(Yl&R~ z1h=s0VhwEnxi^cQtXOY4n2iMb16?>)Yi!3pIn>Ty26+7SBGRNZDNdUgnklj98x$5Y$h|-UAb}H@tU|xZ@>`X;=K6eazaehmYaP zA4-6=0B`;A=*=Oa+!r!zC#ynwjpj~y;#E#tX-pG(%MgPHVn>&jGwsw1RFH1=Q&WRA zGlm~FfiZNZ2&>YjjnZ)*A6#$$54GI&qqUO#4X+O(u2`saNedI#{)ZOvF5eVE!NL5Y zdJygqMT#_o$1}Rk+_$a%kHd$0T=42>`9}Zf&&a?9x_O(k>^p~|cw_VYW-Ra)*Wkb% zD3z#=;JlD`bxY&x4VN@oHEem_;XEwkUY>z%QR$owf5;1eTlVuOz5`LTXJ+D#mfhi1 zBm}4Plu$l;Ou9$7WsJF3w`@k44PQbaVPW=c)(g+T9rd`|^ENN8NB)RA#4#nf^03-N z!yff5MIekIv*z@d{8>%ayV*z4DVxExzD(6L-Q7$H_l(Yj8+j@r|6kva1kzaVT(aD z?H=>*!AQ5>DsQ9d5sy8QSB__r_d|j4VukqTuxheanh@2eg1rvRehD=we4(qUZ&46 zcgN-=r?>Hs4jbQ2(bcdH&g0+&%hJ9f;pGgwhET`rPm`9jVB;Y-V2m~|yzc-EM?=gx z{{L3Rd4v9#-4e8W&fQP*?NPLQ5a~7tA2dW%s499-<;mIUgor5@;{_X{UXR|IakYq! zn){U8+=ofcj|yC|jEw!Zqs($&HY;6&KCDzF{oyNNIaGYclO@QvT!{%MT(@_GOgJ;t zi-1vL+lT_b@|n|cJp-&ws(t{#ppH265gIt?|VlyDvtvxSHCdKRq9m!XwkdLDt z3x?lTd0^FA(Ghnqv8b|dK}s83%Nt@mdMVRzF_d(TrTaB9{{R@%0sncn`BT2cS%~Er}IouP!`VBtQW^l<0`SoR5pNwun9E5ipnx+&b4u zzU}!YdTAQ(RLrnNG%P2RJ7dXzzkg#7vC6=IK`Ra z0rgzX;P+-XXqL1)rsD)fMN0sN+y92*TfLeMU$>~2U9s+R-et0un{`L4y%0e*phJ6(=~zsCLN4Z*@ksQv zb8B=?hAM>gtVV4A2g`@US?lMe%{OjPBM$X?Nm>fv!mGraCCNuxY#Goir@I9ls&vExHaN{F|Ax^m(6}@)k{KhhWpyQQ$hRvmxkyV5 zX3O+z`0#BK+52@zT&NZ`*Z(5wt;3@FzV_i6xD6B&DU1kZzQm0f9lfL6DRN z0Rai=MnJl|yKCr%-_g(adEaaQ%LT)nv)5khu3ezfs;zxhZUcIhs)7i~ePl;8N(}>3#n%+Qjo$9CrC0_ze7JsEhM7TK6^P#OAG(|}sIpiy3Lijc(bw z*uO8_t3i@C$>z~vShKiEB!8Im9eZ2ubhNyc%|@-9ha!ZQJiy$(PPtpDuTIY3Gvr?l za(n|0;I_M8)GP`j?SOsa@Btye3#x1MvEJwYqz=1<(=lrceIk#h#?50@pVt~FC$Flv z1e?+2*w{Zb?;iVA1B%Dmg?ItSv56y}mFx-9)_6bY1gEr4{AsJ^WfkwxUwdBueWu@$ zKE>_G?cl^*4iximf|RE8JEWF^Z;%`!!mWbK3T{EChFzU`hclcLG>L(9>w?G2JZzWe z=w^awmj?AaVT_M+mlAN;#B9uC*D9^6Nw1((-oFq07cSuUpL-1U>nQo7(<&9sZt^GO zn&cbTvKK9F^W>Db=(dsA7Ro3;r1aJ ziVM5AXi9-HPDcN0O!$GxP4}1jn}6d(Vp}*o*mCq=Mrp-g-wzr+${+GkIs`Qiq+G(l zh*01%0eA;fxcg`xChcE4djJY{_+zNRUl}32Tx2Lh?+*kT`U3#lvX7z&A!!cCN{IMP z9;!Brn_{y2c80>SvH+N`1*|Ib54|H)zuvD5h#XeMACgu=@zbF1I9}yPaZO?R0M<7~ z^AGQx3IEJe=NKYoMMgar;=ynelhfV<-qwL3;e9rM)-@t*YJewRSIr`x7?u1zVF1r3 zv_fbdo$*_&FtzKft_ty`?8!rYGAV`q&1MYq_Lld}X@Air@RO@ln4H3vf*d!uG_^$l zlGcNTAm)8wp^_?4!3`i)H^^zhq3papR#O~ynt0w5quq1aA(R=yD}}i7v)I!n@q5J@ z;{HvEP>=4#tM4c`4}Rxy#X{dvboiC5KW0eyWj|~sn0)nkv+oQR~rDj!2$tr)g_)N+*#_jFSX_6DG(X?lf^ykTws&JiHeZ0-wE1vBB_ z*ELi!3a@a``FgteE$1h7VT%y#e4AcT!T<9d44Kltz)(iEtrw8km!;wTge19+Z z-?bnC7N|)MlSPX z-(Ki%i74do{~G%Kj3iJQ^Gz=77eFRU5soqhOBMD6!SFSH)Zz;O*K7z`iLo;=g(4{Y z0S*xav9(ZoGo82hK)n-N->4-zh0AGqQPl_B} zR&qD?=;%+rp}MAlaNdiA*jNrQSn9Q;G`UBd&bh>LlbGjm^Kr}KmB%vTW2Vpt&pqHa zi0!Skn=LFfmms7P3=kWrAQoNSPC)^U(n5WzD}sbQ!(P^8g128vOz5?MG)xX4uT%i^ zQJ2OL4*I|%skJKGf;Wd+{uY8n$l~~!1PcFw8B^V&{_WMrLf~c}3_MIIO%DMVn?sE6 zV!f;U=J8BL$EXb39`KuE_NGeT#D)o5d4~?IoS}wA$40!G`5_Wg6&U1eOC+1ofB*&wGc<&QuG$Y@lV&?!} zDDrR{$&&XNl%vEWz=B608*iXLC3CN9oF{+W?KQ!La$jfqRumBDu)|v|{7G&epG<_+ z45pvdFgwG)!g5Y`-^E@S(APe+**G2ImYQqF%(+G1Y?+t}YV6^iJwnyy)0NK;{}Amk0Wrap!>J z1SJ&z25y6aTLu#5FlqUAw0t$>P19B$gh}$@;(1)S~l=@NcR zpx+3uFet$TK~y^g(vUP{QbN%5{&X3u=gp0ES_8Hw;?FiWg+_YOP#$EkiSqt3o4JE&WK4c?WL>TFm=t;Fc z-WKP+gDjTn|N4&bN1Cw*5uHYRjb;42+kxb5O*fMpIE)?SCF^bFjb2A$8Ov!Y=)9EU z+-hVEv*xFn%1v?iuYm!p?QI2&Y$gO{zSdSn_os2R*1Qu;<q*IG3Q%>lMe+PT`vQ zm`M)oz8juu=jl0xd3q1Y1KG#RorR+2bd>@`8~?61NfgWaz~hya0pii3kmX{?3 zxXwpmdL4wqIsZKX63nI^g2#_*FLR&Zdf&(!?-?DoU$&C!V2%=BQRulQ|L}Xrf*I> zquR5kywB>WG!_M5ywT24=YcfC z3T;2wy_$F5|2-7Q*6L@GJtTh_9u5hzjBO~Z0zlMQqFkMTgF{Ml!B*= z7X!R57GEPSHTn7;<#3d)l!U`c7{$}YJcCsqT)rGFHBvCgDL!M&p1u}J6Lu#L|I@J4 z;1i#z)(y#St5)cZP|8nW)*LM^YgQ_H+pO&Q*pz$g{&;`fsg|J-)zuK!2Zk6e^s$oct<+S zkw}Tqz=2djX#ool(<= zIF65`+Fr!dDq3!;Us?E{0O{?r0b%XD{2Xrs|W}VO~1`v-tqhNx@L-0aDl0hjUtIBqq1saQqko*i47H@;PD!X*+`SkdJ zQAj7L1cLysI5q8JoqPbMA!y5t4f6%#v97%9k)?#mgW<@bM7PFP&i0`A-)Zc(Fpt12 z6JTa1Y9n^(LRs-=>j{j8`em01#upnnDjBab#Pq2@ zu*q9}oB3$FrRy4bYWw#4$QFAVX$d)yvE?L}-%o;$;=W;lQjj=a9Z_lM1d`<@B zByuDO7zsBreI@2Q$_TbG~{N zXi#mFvsTuck#11sa9D^rXIS|GpWU#fGnQK8h15NgRi|>Ic}=X(f~`vuI0rGzF{U54YyOAsG!mZgGz{_dAj*uu>tfJDKIZcjrt#?)UM& z9GR)e)qV@y>YkhGzIRfq`N?}caJwfNzhk#YEo8B;(#q$nu*aTU1(fmgtB+=0i~_>y+j2FPG#nKIwTXbDarw<+>~;a9mLFJB}{kdGB<0@VRuF(jOAq zE;SK-Wiv7$T7BU07_fPyteB3fR*1`)sPU1fZ*`Bb!m!B{_d&5+&W>Mqe}uI@w5D>) zH2=z{%h1Ip7x-RzzZiXNBvPtZ5lK9hX+3Zhv*)tUZ}&&V zGKNF%KYKr@ded@JFYba2fWscqW`QaEa@v(Q+-_&@zu&Lcg+PvXr!`PQ@waijHYta} z^y!OP_gXtmrL+BJ=;Mw@lu*V_J1=tY;8^>oJQ!Io+RA(589fozL0bMYO&u6L2$Go3 z;SCtl;;=lRYs)dD`B;m*q>|+?KwpZ%|9)QxCahd)DJ;F};5@RM>L^r(c5fRIce68XghOrMn1v2*Y z9TdrU7gtCs$xAG9yw|u?!4j}TKp|8!SR;Hk?>zBKUU!e0FBBLjY;{z+%HV5}mxl(1 zrw>IDXifu?`=>;#N@M0|pe)LbjpC3QQRu1W@X``A52z_d%o_WM5w$xTw zVwv7vg&0iV`)a>T^w!&ghiIjN8ZfDJMU*|%Uy*q8l^QmFFG}>D$h`V|(<1tGcRZ16 zNU}Fp%D6e0G~N8(aA+W9e7Divp1uCKDL%&Qe9y5rf{EK>fN5@!25Q30?k!KnW0uh6 z`iqeaM4l#%kmn$QVeY5tffdeLtV*&Z??Z(H(9U+OBf zXc>OE5Wl=2t2V#GJ@48!7rT@qvBCUrL2B9}gjcCqJ*2~l#fTp+R&e~5B~Be^jMl9D zl~XFq$NiJE#eJAz!+iC3&r=SbzO%U92gWxyIo9ld(*vGAG9Qfq z0^+XOznK(l!Uj@j+hM@T;HvAN%x$qWFB<;Yk1s^o7lcnUH`f)fO) zgW^Fy=%VqA#iT$9XlSQuOD;F(yW<$7tTY;6ozEoh;J!dcUD$iDx-8Y6OmwF#2kggm znMnZ<;=mls(wBY3TGk{z`9nLGO}~<9xM&1l7)*0{ePT|OW3)lr38tF(iU%D*Hr3cA znVYY)#8lbnJ><0>Vp@6W2KHy9^wwol&w~935Z#GHbo`985hjXvF0~9QU+ggMSxKK# zUJ0T682U2<)U^`S0^$zH+6S}-37>u`dq2NeF}hrjkq{UnI)GgUwh>Te~_Uj@n=G~(uMAIfNMx2fa-gEld*2VMA(;gH=JF=UBt1lIh! z#iZyTv!<<08Z&3>bJ-;!&7eb>B=eHvHpFpS(j4z zbR*7tvTSaL^#{RaKW0dTIit@Y(LFdcsL8xL=IzjPp@E!^sH@${_-6a3wK!Cosv|LP zp}pf>(S7lecO|4Ay;#%gHJQ#YGv#G865W&~#>0dy zZ4(rzH`RR<4f7bt6tu?iIa48Rn|@q~K*QTV_HO@Vl5-flBEdf2EaE}ON3iUUy*E_@ z`{@JrjX3B5nXT@6Ul)!h9t*~8tB_LR5tz^DGD zVS1qaaqFIyZ22jNPyHUpV=3a4uR1IBI{PdE?p^rByWx|_j(5qupwPV2z zLFc(-5@xdkWY*bC??(vr-EHI;OmAOkl==T&UwDq{b$3`=LT%C3V%!nHSdO&%aL-~6 z&*v#(dYuiv>%*x%4utWoBWK$zuB^%{lr(ixcwfu<78 zTUjYxxEdjQ4y=KIhYTS!RY|~%0Wl9|=zu*45i((#+!{D5%+jhBPR#?Vj zQDd^z8xL;56#YVHqhWj=Kd;_3SsAWnoy ztwO12L;_T|CYr@rj_LLk)m-GSdtkm5qx@XBVB>IbR1M_%Q58UcW%4C9vYl7BukdfS zL>1|AA9&&?WI_=iP;u8*NW4Q!2Y5!hf#fBgokLwqnK;a)nUi?tralJL~`6s=n~7 zzsSVqYAF5uuKrg7V7)7nJDhVeWm2Ou086Ke@3a0=Xc(YMM#aU-+0TxbwQ1#bw$0pU zpSB=NUL|vT?##;Wb~)fod>n1XMFtVAc1B$Z6%A}}_q<+OmcP3^&{_Ju{8Y}}5{x+X6#a2jplVO(dkoaaP&$c##3#bY+bE!?=GB>hhZ;^H=vt zX<|(*{Hi`kxv?~s5hk0d$9{b6 zpF}cCEGg4GT7VV~cpn#S*QQVFN;-sZFc-e=9a9?c-egtJd5w-82l{rn8AK=DesJ<> z{rd$A*`Wj{H~jRCAZKs+tRSoIShfr*GnF?iWZ?&AoS)}WsT^-)9^tv$k5p-~%GK2M^^=XB+bhC6%Y&6Ze z)v`Vb{^{1^kR0=F`VGk$2jWj>&PK7{XLur32ir|F%+0dI4mGFyWwzZzhi+HHjeGi< zKhC`qMAV$Rwvpy)C9ZxWcA~5qqaC+2xt-7*E;fev3cKiu_1jMuJJ&mn7Tug}{qUB< z^54G*=+kC;8Blw;jX69%`Dt(teK@OFw`}H@*!R&tc}2WTieEa3slF{E-!F)Kx{fvA zxzt; zfTUL8XvlwsCVe&Se#Qr`*m_)Se1dtL@LJ+}pQ~iB2~8F~h0XK7-|FVwe8mZxZMgBi zF#N7O>ymg3h8u4fF$;UzIsdc@i=h-;8XPjbUaWEGeb(Udz|s{?7I6wFVGQ7A#(d@0 z(;T-4zch&T6=sfId1Wk?yCDU6s)r3jKc=f%tdrOcm7eYoZz*3oroTGjr4+f0f8k#E z?|26A)c|DU3G}DB6G%59)myI*SHUNTdjYd_7Aixn+Dm;GHSL~Ec?Z@LJ~VO^Wl(aGXIpzCke~tD%^M{R(hWVE2XWWU- zx{@F*J((M2+utSJ$14AWB@rwE&>1_yvijYBWuKZ2jvIhOy=cwF7GMUJLn5sK62367 zBKUB4!I4a;ZSk*SQD|S2!!!c{XoMEg5+o5W8VD--%q0}bgSVF=72gx@1&aPmLcIo` z0L-P@3?#78E7GjDfbA+HP&Mxs<^}%}E|5nx_u2<+p(Fuds-pxqR$>}BAJ_m;`Z`b* zz9($iN7&EN3;jg?mdhjtvh?Zwltx548||ZvzBY*ol|iS8a)fKW7P|ZPHX=uW4Eh$%}2Q;~8(P;>{zz!1`VNSrmN~ z<$(4APr~cG@RTs(4E+}OtTVF9*ulJ0n@URS`(rLUJdz!c-b*?4Wb%2xz$wTP2Yv7O z6)A#b8rcgM(FLC zkuz*ZJ#_oc;NJUqf-MXgD8+8cf#o~2dD*L-xzUgNPM|PUY|57Ff00Y(k!86|otwsv-DIVi!t059 z8I|_7KAb4BBx-+p958Z3Rtc?b;Gg%b((`@Vp%EORge`Z-`&y+S1(eVtqQXr#-~ZAh zx!qV*F>h4^X z#4ruDWc$=$8c*!Z-UF2-fHvpZ@!RnfHgldW$|Oo%Zf7t`I?x;;Gp)LrtU-CbHqdIf zsZc5yRa2Z!?#VJ#fa0}OXINw7a4uFE*MRlANG%Kwj_0vM_u!?sL<$GGXW z-DxbhRzRWuy4r4%SNL+nYdrqz0&x;ay13Hwm;e+e;O`7(wqOm)o z$z-nQ0{(MV`>#=FvlD6w=?&cM#=j#^mgy!vgMX^R+xp09d>cT_ZY=!^^4LbbF?_a; z>r;jGud4Y=>PsS;AzJFQ>;8u}zrnn}8z%576zyoJDGwnT130Yl{sSQl*lR3E3c9H* zxB+8_D0_n*2Lbo=H3}#O;R_)YuE4UQP!;g%S_Ajva~hbU28ku;_2ZE7x<^GwE`S9x zuK?;V{as)h`u@CWQeY2;nxUy;VsfNxPm zRzr|QyD~&Pqra=B|9~MOoW7=Zv`#QUP7M-)Tuh>d0J2Y zP=vC44jCTJ4S6$$CLaS_vV01?o#3?XK-t#*qt9cV|DL|1sV}Iux`I-93wVRBE;>(4 zUU^vujRC=2Zw);tC|49ZZryjq)%udrjpc#OZ#R@PZpc!H(|22;R&48IRAtFnYh{R$Y25!p|ZD!qIlAO^vD(ilJ5@(k@_;wS) z)^;YrVVo09DW>+pOw*YdSFZ00lMsj(%S+wiO~8$M1?~WSIM~|Us5Q#SL;PqYT|;Sl zQv#~^wZ&F0+7Dkk>JtD0rE-$n_D3EU&X7W}JDWiahgbQGc)4z9l3F0mAW)n0TwTix z_UwmGH<);LlTAT{A+0_^rccp_^TGR;HFlGBT8&OdMQiwY^L}ln5hP|kP75mpF~>A$ z&-ty`EPG-{W20R4$~@Tnr4b2l&l)Nux>X;4UqLdBgK<>jRMQ(YX_vyJYc0_COnk*$-Vb$dkBt0 zs{?asBlqk3`~aoSdy|%^!)7?pxcdUDmZZI%wn?&>Le|KppzWCCe~kXyH9UL|nC^mgYb?Wc=u&vcp^mzjHc zr2PPx%e2&>8sjUcZX&p`eqkxean2(zDov2$FqlNK9|GeqPGn|OTs@(@Kf&X-C?L+* zYW!Vn&vCYW!`(mm@xS`AJ@^;zZA!;4jrq%0M_Bs%J?6{fN<;Q@Log1|@t8$BzHfrL zXz$_AxR_3n2$^9Dus0t-b?D-q_^DF)g{bPeAc;ZuxEO&CXxAF2G^jVAlr@n0IS{|+ z1`z$BS{cf~rY(h;z%eL(vsUYt8p!?)u%Q#JH!3%k%IAD}BnA2<{rwyajg4`!#dM_S z2=oobL0e@Y_K}sK#f9QhiLR99Iggxf_RqS`_(p8&OsI7mb^|gp6MvO%RQ@9V*@FE7$z8&J0ru}u;Ma3lZ+!75exzylZj3HZ_L`q1(YD|Y6P9(DjPtiAB zs6#(@X(->`PS&8Gn3`K8Axm9DB$|$Pp^+6V=TAMb!V}JQ~{(Bg3@@T7uTspo^U^v(LNeK&JPaxrZ zXBuazHX~)CDmg6|m8yYkjL%->IIn{;=VkmAyX7xQ$-Vv?a$uN&3Up6KjioF3wY`k) z9b&EiBb&Tb4l3Q4oPiU>+XwnQbjR`@rhDU{&aK|~B(KXO`X4_=;%jwz#~HE7M^V3u z^HsueZ1q|*C<=#tjdG+5E)OBzOw0c1a^}y`JKfQ5JYbu@K%H~$FykfBr?zcBb4HT? z%K~`)|Fu3l=B=FR8N61_ho;t|1@-`OWyQVbVR(axclU=v2twOwnv4DVkB>kqiT+{L z#w>vcO<8=19z^iYZV6$%5szo8>@Lx2po&AdyniS|3>BNyVzT#0f58c}C$#V1OqQ`! zY!*VutU1=%YoeqXq>A}!04x5bS}mlqW*)Q2!?{`T=W^auX=-X$U021m)zWX-KWoU4 zpSS?8_uJa<=w{!CEeg=X=t=*NM)9uzFxTwoJH8@dvtjOXmv1C(eM>%{nHY{+EP&$R z;|0^3q0Epy+hpy*v^gGaY4(ONerp{lSbutXRCbr8H%i9`9fO3ouP<6PxZl-~BZd^b zibTXTs{0V9l!Qs)8TTpBP@uG+inD;*I*7s(p|j<;`R;P5ktH}UW2>T+*LwKmTE)OC zol-qi{Mr#W26{bI9Z!iDahh7aKTf-p%`VC^+*s|y?Nk*ci)hBaHG&?GeE*U9Rwh`H zfgUiN3GEKneyjEBKECX1Dh?evYHoaChpxt(HJsK(lc*$&Z;Kg8K~i&O*d0A&ejG>q zh@gGGEoO<*`5Qd$5T7&=c+f1W-A9g0*M_-FbMrq{GQbmq-Z zH`nNy*(GD|)KW{r`E9=N~Y?C$IyGM=&ZpYRBx^B458wuCPJ{99jKt^NJTzV*SCAG<+P(T#1s-?o3#^U&e5 zG@@ioz;M0yJCPVqNee6zXt&tKp!cyp!#Hj#@6|Cru(#BH`b-5xSpV6W)W1QySLFkZ11;j~; zXm=};XwYXNKNTWGD_F;RNPP1?N+xz{p!h!Tkq@OdE9X7H7LP^gV&UD97#=|w zH4c}Rd(P)xR=>56yIMc)52f*lwaGdVIw7sne$K8$-nVGC(3Y*1%sJ#Yg9n zCcB~Wjzul`)mP2$F8(gxGE#st*t*m`)`HR*&fxYRW`jqH*;Gi-=YLd7KtsF?T%&Ot z+Wl5c<2O>6%}Q3bQx|aNx`8X47+0S7I!vzGD>_iIXa*#n9%bIPeES2-ydix4tb{G; zYwO!>U;cnE3@UbdE;FGT#HP`N2lLhG&lo-Y52nhN6-|rhGcNNtH9Zy5|FQ%;iPlvw^3hZOIRdpowCdoYPfo_60&(8KZ@;*H#X{C@V#AO z!%OSt2*JfgT(_5W*l;`?_U^+9#a^x7`-B`J%#z4rsbp|A=n_$Ik=-Fb=CD*fpzI;( z@RZeFWz+kNH$wO1pow?)$7aSp6f@MQ)rsG-FS*^ub-3kWqSTwSp)a~Xa%Y;&m=t&9 z7~8`@yV4DMNIO-Y2tZDMSD^D>j zZ>rHjFF;Aay`B88xMz@_Wzn}qe@VmxgfCKbIL1BC;CXOnBO%%q`HcWvM>I4Yfbc@* zLNDLj1bm!$m!Eb7l;Md60A7^u{N;rrVHOe9`vT>Z9MK+W0XHzD3Lu;@&@fYSNcupJ zugO1FIin=yIL7&Z`lnF?8k(}UAMhMtfYN7q`yIY^Qj2{F~mY)lR!@Iq$L?m;U@^R+QRVvG&AE5{LJHFz6--xF*@lC}DbL20D8 zti^6db|c&5)xd4axW{5UW5_q>8LfkGo{e>wBffM--ssN>c?nvR>-RD}QdFW{P78IT z8_Y>%%{$!8ItziH4&@0h03;GV!4~`SqW}CRu$*Y2PRDvM761KR;ngE5HR-1}GZFLC z{lpZu&an3=t~MK!C1sg3EXdA=87>&=2GSf*>6Rn`S;=hQ5~)_M}WR2N4)u6 z)|zCxA>USM>FWx~oiBWfJgGd64Sd4Bkm-(6vl0OfAJgtQ7$e`#6@RD zNj9%amra)?Ai6Oo-Rmg;x5y#I#c6YxfTo7~>9X%@|HT6^sGwo*Q1f$|IHh9`f_AvP z1*V;A`*rzfrsC6cz8sg`tt`?7+0IQUo^Kp8o}(W(AjbYk|HAl2PQ>>*!`KD7m9;MM zR3iFo?k4)aPM2LlwuJz%XXrRV{F7;I#l@NDzM#7IqvQJm7HVHf`jAGTt2_~Oihc(@ zVB0_n?Si;Q*Difr|BB@x<#S$uC?~;Sf@s$DOt{MMznV025zbT!uE5vd*-a`?N(uJ@ zKu5lkzmmL9(bs1grvQP;V4|8HT;pWyywi9ze_Uuz!aB2H0@LF|;H&`7Xzj!N)tV42 z9LQ4kScP_P@Y zktife1!7HGBoq?S+-;O~pT~kLK-4ccCl`rWwA5MK!2+GMfD-0ih0*7FEE+lR*UzXQ z;pmdY7ws`TLb0eH=+Q2uKvA@~0UEJjgdGA{Hz2w#S_jwO7iJsm!pJ!*ZpRipIoE`}d97??Cjg88#%2U@56|fPkhTduA>3vj+6J-bh zufspzpt?QWYR^?XgrIqfaqlBIUmO#&N&!(v*?GP3Cy>Qfw=mCd!Vmjpz-UVRL%V=# zvp-Eu8`x5qWwo0uS}!8`+s?KDo_Ck7U&M8bZ@*@69YG6#EzUNE`j-`Nn1wheRAOnU zV#q>9f+AYu=l*yX=6#F52`I z+p^r$9>-o;^O2A^OxbS<)p@I@ohHe&0kZo=US(?_Lkjt<@h7DkXmnlTL4jME(!R=R z++V_u6aTV8Uha#5H^ZhRe4^Nz3IM85QVz%TP+p%#r6$#i46N_$#*tkeLaT0nsF66RMk>H0eraJ`CY_8aRW>M?XI;3>Qt~K0aHl z{kcy$ZJAQJxpQ$K1Q}uji+$=;b-5NjlqiqEq4q%Co)41WqI{niN8>B);rlkavhr7Q zV0OgLF?`6TU{?S4AC-OR(XJYAqv1l4TKO=bVJ)nvWK?_1KHDA|9Y}e*xTgffu54(z zRsm*+y!#srg&MnMg%2BS$2np3n*;hr=4mmrIuwG!68!@geF+@$6>D{(cFxN+QAhV- zMlxaW#W-9$k zR*x~gap+}u*MDdnll5go%Tt@wk2^*Bh@nTM&Q#=rO`#?BwBJ8bok11z0?v7S*^MHL z)@q~9+wM?n!G7%53z^%Dc(=AG|0yjWD`6;|xBXuXqW=!YPl3pwPY)sf<$hu4Bm?TPi{CyZF(raOSsvs)Dr4#V*ns_mL)BRn8zgf8vGI=6b-L1 zMg1k}DL8>JmIV}{Nn)uG0J1^?zhYjI>x_%R`vi~nI^9s*B)SyF<&nW@ONE!DGQ1() z{7M9+I|Ix(eCB_!5IzvzfKq+imWa>brs%i+qORT}_*4c`vHm<@v(MD1Xu@E;h;v{T z`Wm9g%SDtm4;aXq;OrY&5@w9M-NT}Vm4USlb%5ynOPIupVwwOu5vxw5-@8A$hq>$< zZ!$ZnK-<5G8CAcjXQ}P%;Yy3ZG49BnIgv#e*4iaE+I6fxtG+5@3Q(nK%;<^dDZ-@! zqr5RuM-gHQXvRmyJ&1S(kPsu^zmW)0k9orTgk+a?#|v!lE)Ow!A{*+q%sa!?xXn(5 zYW&+YekvN$s1q}3S7KE=tYf{r$R9i=U6253P;R(j8&`l3-x@oOfc}^TUs!E>hYv%Q zzb<$<`H_Kh6wO=WJ?xk58S(F&-#CYJ<*3IJY|t-oVr619kZM!iiA|y-1>Is6Do@=sE#Cj!ls z6K_YdIM4`A$p_ea{15A0`2P=t3}K&e9v%e-D=aEqON`s9n2N7WH>V1N8TDS52gQ|6 zOAYzbvu5I=l5Bf`nA-nYrz`k)fQP|11S~|*O4>g^ZTyw3o69~?w+Pxl75vlVJ1QO+m3z>fl z?tz`pTrKWo={h;*by~N}GKAM51aXx=K#Nu9hf;yxol1BUP7Jk>&46sltrfQW{_K1y zhvZiYY7vV_TOy`+3xoFQWO$9f9dayJ#`+A~1-adY27~3xFRfPhr%JTurRk5oQ9dMc zXd{d&fX!kk=hV1ocTo!81u0Fm`kAn3GGQHj$58h3I9O28OV~krg~;R%&r&OL*wvKF zRMC#w0W4cZ22loX`!$WItpHY=dn{P;w#o05!}EB9Q~7Lrzzo(jX?}!Xqvl={BQe)` z{XR|SCwCX31l5{%18=E8MS}kmppwjn*jTK8ADsB^Bmb4Wiweg2i_w%3fvd%?I@K+V zLvgmdRld47Uu>-lTs6V%GNBi9?8v)3nkzBN z+4?QZ?$aTr&&iFo+g8c;w!@EvqS<|iWP+G5f>zTH`4-Yc$J=_FnKW1GTmcrgxSTJ!wCsnRL|5wkN>}n|W1XHvax{k7L_= zB6--Be7ITy`*iyTN`AX1@Sik<&K>{=qDnU1#DEvy|49hbbaTlh=7sV9-OnF5~i=|74@+2QY~F6#3OoFBb>Rdp9p;VbS70dEGG( zOZt6qtZi2~kv0X0QyK@B8QQp4C)!Tugd(m)CJk0Z=s`33NHeUwLZD8+F@rl%4?TMc zgxf`+a{KA5^Z;d1v*ptbAt|w>?u0*<5DkHk1O!e~Ml3d9DAW1Y*wAEi@*6^5AoeKZ zC}xI9Hlu=c0^+wY#4P0=`RA3RFCChI+{OmTNzQx3i89T63?g=e2~gD9yRPU%a)%&n zT140&GA8jzwf%HtgN>li?Rno@w(G9M*s!`38pJkJNdH3sDg-n}B$9g56ec5Z;9moH z-%6W0q^)U%Vs>~Bki|6QaW?#8QDQo^1D_Ox#>y$LJO6~rIK$hP3ULkTaipL7AX7us zqpRRbnc2BjQC{V|Lr-cF4|&&dsCbZ)8FXfH+z}f<5oeBo`TCUoEn#cm2dwjr?72;27#Qf zt}oWAGhmU7Hj6kgmE1-g4guX?b69Z)Vo*zFNUnS|{Bv-Sl4{lvd!V;H>#jPD8}z}w zI7Cm>++OENAr(vT6ESQCJ^s5!U&4C=#y2FK-vKvDP;mhv&U2nf}Rq%ER?6EAD7w4{V`djvskXUl_uz{QB16Y<>Y7b zzp@NjttSRE{o#VxYz&Kd+HQolCm!^OUeS3`r%{YhnGH^j{721!p9}$`hdPd1>mY1q z*-xVQ58K&BZ?Wo1VriP~q|*MeD&a(-Mj8eQ#}9xibYds+sZ9pETp)!GCbElEJIu;O zg%pLa-nTpRgY;E-C8xt&Hoelw`R(?;vK3KN(bTxlq&N$Tma-$?66x*W;dy|Yk*jSh!?9P+ktvAqt4n3{m^YMP~%p-dQ5fctfU-Ev? zw+JLz^81k4>fflZmt#i}QO@)N!H0TA`_AmZhQ;Ja>?^k9gdwzKMHQ4CO||5#)qkU7 z>@^VK37q2x~eG1ur?b4jS ztRe{cAzJ5L&=3N;rtDbBqFNMT{TO$GXOGgEGnfP!V4)Mf*tfmSs&rRIk(>ggK@e@~ zQ$Oo&TFd@MM8!dvUS!w0=KFiax&eIcM*)_dc*$8R8Wcj}{-F#p~^S2b8`f2H}CXy{5nKzD{bB`o`FqGp<(@b=`N$yNQ zK7b8b(P#tMbACIyY>|4Ny1z4pshO9DJ1BStnIxyUyEF}wENX3?ZkH=Y(wCBQci#Sp zrNt@V|9JtluxuplH91T6?Et3rPEB{sggOpHAHjXTx#B`YJoB=<>XkL#7(>zqr zlmQWN{H;9(vmhtTD!K*ae!PSAu=1^N%4@S=-?$^X7||kU1;Z=#mw9p+2p-5>3%Aia zk%vo78i5iMFC{arhEl!&4tKV9UjqCmxQ-Z*J2u(3(4C%xGzNm~5!@q;5hX*6*Q5>N z!g$XRuZU{!swgzd-7jQ zgZYhdh9BZs8(yEL4XO9}I-t{Rz4>rP!5tSW8oY+@XJ&AQz}*pv=Pk9C>>4eoV5v+= zH%R+|$FUK45$d;&FQlC%gAHoLNhpi~oJEqlYsqeZY=?hT1%40n6@zpxmdhr?5JKk{ zk5KR;n}JAx(huK_Vl);gj`AB#Ya|hzh&lAja##x>VGZz+i$NOT_y;yHozer-X%bHG z%e15P>trI)OU31d7^KXCu;4L7JQ16Ir@<7?%o52K4|tw4JiebIia@3fHZv^r6J?afqY)%rW=9Vfi?%C! z+}H|hhx@%a(EIaQ4H%UTZlPbX0&@-sFjhg;x6a;5=KCG@516RFe-v+x@=?Wav@5Va zcWOL?j~Wl2Y`X^9(>elCwAgjkpPt!49!7FS!nD8ZuTR6&ya6iIRIP3WrZfAFd9E$)H6B ziFad}+ue8d0I@gQiC;B$G3qocd}YhtJ@^G(630r9Ws3!kW{Z65ycZ*9ZYZK%|N>2Mubc%;L7I z8CrJ%OfvBEAOZD;M<@U6wkx;80-><~T}j6_?jmVt0V&3rpQ(=HY43T`O9%2~zx53o z%?wXMZVgoGvDK?(^5l@$3rfinUxDLv8Z#Vv(^rRp%f#kBR0aO$Dx)dG0H*(|jMuFn z#PGORzYjmJYxuHK_FI|dcE19pQpDpRzgH!b;_mrtBp|}VZeS(*c8dLvv33uGoWC58 z^%$`>QAS7Rk#%z>WU;|+Nf)r9e^u z*Yj~;vC&TCg=&_=h6vCY##d^qKg{w{RvoLJjJ@4TQBD}>?4FRlxGh`(Fc4OE(Z}dJ9^!9;B zLho6d!~IH5wzsWck$dG2I+eSv5rdOrK5Q1)&1l( zOHW<{{)@r5@DL(9Z9i%GSZejz*-Xba*+Yo#04V#jb64-7&GN!8IjV5MbBr+wuST)1 zQ5W-WsOs!Bna53|$(IO&Dq&T(C=fwtRA_ zEM2xpGbedm1QP1ALK%$bWm|1XVA~Fu(0fa7HD#u)gN3%Bv4Pl+E1VP}?@P|itcJi7 zQ=POBLxuu*O=+OX005Mrs`gujk%-3kJsOHJ7SY4iQG@>P>|;Rt!sM!9)2a!c#5Ha_ zCBbQd#D|NU(Mi+x|NoF6u677Ou7sOJaL(O*|2Y0nagZ6p6(EUAVZRf3c$NpnW!vUW`95NQ+_CcZ9dH2sEb5{guWLA@I&h<@^> zKg@7C(Sw!(ZI=>P?r=R&#Oou9BYut~SBFo8VLZk-Cu-g}6Z*{t3`7|DZY$r4x!=x^50!i^@-cB6c&6O;0BPMpPldb}q&fQn9_aL0RY z_CC7-VvXi0dK@fYS1*xPvsh;Z!(Y!>sp+&meB%Tb)+LH*DB4`foEiKcixnfYGp#_! zvARKoj*KKKh=DKp{_?bNC~jVJ+ugWTHhjGVI1v`m==2|xWAHHFINE)&+I<4Q{grC- zhAU}_>o>Pdq14=o<7dFfGDQ?B`KQL~kIN9>c%>)TO-Q3)B=kwWjAPo;oyeOl9Qqx0 zS>#%mS}-A+Gac_qbp_bx)C}fZ64U8jjWYJZ5x2L^Ngg?9lJ==DmrEt{s}NSc=~TRL zY2cKV&6R!{(3?r}${WXCx%MsnjD=gRSGF&U(an#|e$Q+ywTkSuRu}KvpLvvVuw^D1 zJ4MW|FfufFtCXX$`;7L&=;Gr{g*0=aa9h*fud?-D!Rx!PH(C5bvE__J-YozG{FBi} zV~X+0>cRMMye)d7O{4ok)O!V?+QSbb>IDU8o!x1^+!OlKrKtej+_&ZF|C1no{GSAo zAIZF0_Gp~$4X@iAOJ^gS&DricEO8oTbIE!j3s^vS`UxONVjYc^Ol)FQg|sOl z1Hixx@3juZ(4}x1*;tOGH^!mjQdt+_4~!XvjAimnCF~>xgmn;eE>2MA5FE)QGW)*y zqMZCH;&H>d^ab|9lVH@{BKD{I{wMY@4mHUC>h>jJf!g}ptx-I_?s7z z4J*kIo8q^d&%h+%kclRr(Ybp7b^o1j%+27p`@Nmsa3fDM0jNgGsP2UN+W)M4dpDrK z@V-n215hb2`;qeoc)%ANSfvZ>OW*T4cM38E9$smFmotiNZT8rc9LeN+eYK|N3Xt-0!sKH}S%MyO z3Xpbhc8Svk3fpkPmzr@qwlCS%Dz{|$&;Apax9uHb(egX4AZk{wFv}4feQtYbaR(OR zL@SR0gUR3X5^!lGio{_o7Z^baA;V+VTPUj-`D1d?c_NTd^hiwbC7G!g&_wbyimOnb zn?eUg=oJ%+&i)h!MO{>lTVn4!oi7kE;Np7`91f2~r@b~OzrOc=6JewS&IPehiTDs2 z!3Zy+P0^vW?_pkw#xn0l*7oKmyx(^H{X^M;e6ZNwSHc``shFtV{$OT-stfa9?>u}D zM}_v7qj=RkYa*~eqD>Ih!(1V&Y&63PNCSHunja7Ut+eZ&bRq=I2gSnyx&d&Akh_vC zs#+HYseRTfGsK87*Glf#a4A@(xb1Wl4&ePa;DQItk=3>psH{3VZ}NN)9C)3|kds11 z?=Vluk2d+$L4Kh>5<_%ivIlFI)0h$@>;o6{8^%&2_kSN-*@5d#r>y-D`n$>EFW1i+ zKp9ckb{}+=6}CK-|j6>;~tP=@c7UL>P4eFyEMU# zz5spp&DP-pqGls>3Q5*zv`dLzX-EcwNHNz4Xhsw^!4;yXaPANCqtyU?MgL+lj*d=M4F9~FuVp(6^Sqdp_I^|lVr>p2HhSVZrCo% zc`(WBR0^j7b^yDZqARqgg7U%+Kc^V{$y6I6b-(s=X3xue_BPyfaNE^tf2#eYpiH7q8( zRAHiuXhi3w$=8bmLuxUAZt0TDGT2@F{(blcRf?pA`iVk+qiq({wS6(-e0m;qcL5jr zJ@{+Lnz(XPgoW#WSGeYv+?uWZs}b2EO=>t8K>`>5 zHVGC~MKq*mtWVxN=rQZrMH2}Wj1aMK7zPd4E7=500R-@SZ$u3iAb^I?Zc;KK9y1%h z`qj#{*?-gL#|Z$2n4q}WQ!?2$pi zTg)@25~%d>xEB5g=K=3v>yvlzx8&s_Hni3Gl<67ubZ(h;sa%h8k|y)cS_cw5ofkX! zR|%ct;Yf>9U%9R#f_Q9z;#t&&H2M?VyV>Nbx0}XqxUo++NqzJnKIuw{F5?WSbp1VA+1roo zMWH1L$Avpr+hUDbXslM6$XCJ=MtAKVqA^*!!0525nuI3x%O{OL?sNBaxM4yyXk>UVCk^ zl-qg;IigGE)KEQM0?0j|OH70A&u>q`x6$6t6&lg07X+wA|_)g%>XJpJY^f-oq zU=TCvX)Kx7^VWXj=@@FUG8~ZQx|Rm4knP1=2;lY8^{PR!5wPfu8jLI_+rkig1qx{h z85X7=Gek9}JkrIY<@nibLnGV{t*{lo-rmLgDc7p;m<56{=Gt#O87DGRhktmh1tn~h(VM%r0h@%DvF{fn zLnKho#zFvz4o~>oEk#2q04C5)px7RzO<7Y{htJ9X{I-Vgy^Ca-S4ppIAIQ5$8!Zl1 ziSMnCblC!kv!l;(XxA>{W@-q$g_KrR_!W8+NGHy;&-~~X-}aN-kFPx3-x7fg?Qdyv z2%$a3=7punFtY~kp(oNAq6Z3c{$O`)8_6%Lf|x+8i2=>3{yZb(&{$YFR z8dh&@U)l*;-~r-bNUPpr0#HC{`Cq9%^agdCciOHLyhyR5V>32y_4|vmVH}{s;A-N7 z1yxDhwz~1Xli@8yUuAdy7@~5b(8gu9r)j$z89sAh>-l$1H8=KlxR5k}2^EKIP=p9( zeROwsIz~9X>UUxR$ZV5B#H7)IzyS^MQiDE_sY07j9g@oN$djK^nF`?X0moM%kZ2+B zt;2Lv4)*QyDWi+*h2d0Hy!+>KZ?tYE~-uxG*Q zSZ-v*VDpn0{=c7r|E**XkfD6wWTb#%@T}^`b(WP02GTh4>_EvY&R?wt%qT|~xOkZo zCU43#1nfIj%r7SE!I!F2>Gg7vMC^(*euUe6z?nC)k0KUMts{#}0`{aAMz#K%jb{h? zvkm9ktQDFv0ds*tVH#4#q$t8}yYi@)!$8u*)$IzHsD$1z@!7%{mhH}1=J$;&3)BVB z6|eafZ6QYidc)ACRXTe*#s(K4?y5{auF@#yzZ%_OweN8z>}bHr8`!#)E$*k3z;>4E zz#&x_hkoWqTgGQQ7w6-KD?+m`U|-}xwG2_~ii8)SxK8W70===D@%;u{x$nSAcHQ!l z_688bGs{hX+lgjQhvf#N4DL=%R*gx+whQi>E*!Nj3|`hdAQH3em!0i=Ldco5>TJP} zdDrcU@pT3w@ViIKrfhQdZPx&@>F>tZ z_3HelNAEHm7sHh?5unjjLws$uc9-HG=ZZ=ROP%&FDS&jvQZ35a_1%eF16aR)+I`$V zZ)uxHkAWlyF%~wB_yV3IniM%oK0e$&flit!nY;a4-1M_(gBxo zvuRP`Nen5krQi$bj!&OZpiH!!U}tSQZxlk?y0@0V<)7{E2D~<=TaesaL}*A?0AR`^ z+q?L)yzFqS)cz2}qBmI-@!=MC(PsYS1*!yn8?1P0yKEy*EPl--WgwkXGdA^GB}0=id&euJ;`GiV z-uw$A*`INsFi$G&M*J(xY8_A3F~X?w%qO*(7eDb;s?HyYAK*y5&!DCL95In+6#P-# zR)a-ALJ$@`O%MX;H1O4H>gUf3UT?yLHF9FDOSa%>(M4Lz?-x8!a zy1{W&DUd?tktCz&M`~d=sF)X3;WtK4#{>-8ql=NA==Sr%e@PZDp?+LE}#g%yRQ0=_@V zB7d%Sl5px{N04#7Zlt;BdIrm=L%9Rwo0FNN21{!`M9r={FQ1tPOoT-4ycpsPW)>4| zk9qVR{yq%U?4|;sW&?I2UF}7+Y|(UPHvTkOS+>8NX&iB&tbcpafoJ!}S!9> z%=6_N|F*K4JorM%-L{VK$%X7+-Jf@#zZAd}{2X3O-FkC0iqi3v#OE5n5bD8?lm^&S z%ZE&rM1UeMNfF<~)e0y{pv_K-bBr_#`%~YT0htyTR4$_mWhJv|rEWTx-zp*_ugcPe zUcI0jMrxk)n+$_mEnv>~8uMKv^-TEUDJW@%agPq#|pgXC*qJKY_eB{aW8Yu}t zEauG7KIh~L8g-tlpY(s2=ESE!+wB=n3dm7!!~^`(y{RphGl@*TqvR&$q*Zx=wkJMCUvFNzP(fx3aspz) zWdTUddnPHqJ3Q~I+4Rxf=j%(^tj{pcd2FghA-~6IWrUM68v-Z1*Kza;SB=>U?OH5 zgvXgy97ZT2>@!yo%{O2DJV!?|3IWn+GRA(bGf1dMFPs-;V7B}te5$vW*S#yRv4|(T z6*a_+TT(E^8UK0kaM{?k9TBR@CO;IZV&jo~COMOjHFQl+_<*yzb7?Ty9j=vfZ4`p- zm^(6%_{9{r=hWKHFvtycpH*IpPd zO~HI^F}wEzOpXIn4hZgfvf}jA77c^VRt>Aody26COnocVEPbvCU#1TYqAh_Ou893s zzgOI)(^J0d_Wazze_9nMQ%M;6 z>i0)x11W3 zf;T;`Ge`nKN(Q~^#UA3@2p4SH4awF;7$+X62Do3oF>qz_XRRa1?eCHh3J2kbERa-7OX$)F^xCYfEQ>;ZHajwGoYz>0Fl?xUC)7lh=~ zg)@%f5|8#glNQIZL7~fo^ep~?0oUaXR9dY%e>V*8hK`2ur$O?X1WRJuo6vL;cPB~t zI##-k{=RETs<~$lL8h0$_4rq$9~wVmd9xtR{Qz=j2JK#+3^V#kviQ*R7LVL*|2UEs z=ys;XX&v@1Vfan2a}(pc%i@PGLMuZvA#ntPN4bWG27FeJ*+;R0Zu$^Zw%H)vXqg^k+o>=t0oh1K+=LGqPW;B1gmNSKY`YP5Li4u1fRf@C?>)>+&tzN_^pBR1*^ z5YhU8<2S1G;VLH4=Er|qWke?sH?^2YmQPMvvRKt4h|^fm#QQ)NvNpiH%>L(37h7`a zZc7glq+Ps~iW&Fb?^AwiV*~dk!m;G;WQ!VJYxd$^DUw0VnAKr3pcB~k{3Sh@+fGZw zr4@npd9~_W(WDv{9@g68Rw)mu0`+P^pW;dlFyi%`ogQ1Eem4bwFU;+V+S6FXXSi@XeIkUTYRWXN%%x z<%-``Mch1qP64|%hpD^Ce@Y(u|3HE)<<5R<*q`i7`NC%lHh*CbRk;uPq**%}~ln z-KX=9Rwgh{yJN5b&!fL{!6S+!M@9U=ly#HXV-<9Lh&6GS#2@G3ym7zLB7%^$ROa}- zX6Z*F_feg4-CpCur9i5G5Tm}`y|eZ}})DalLA$aC!!xgQAd2GchxU0x|YFf;q*}VbT(Oo+$Vrr^W4K z<;ME`>maX-zpGt`?j*Vu9@##cv=F*NHqmMEZCR@v79eLUYWKCx^saMGS!gCChzf*F zP1wNKR4GZ(as(u!RHu$sB(c1a0>>}Z+_pH*k{Y`lV7GyiHcmI@@&bdB!%X_GPgimY zrz>NvJdw6;r}*P`a52yPK#PuxH938rn`CQ% zeHW1D`DxWYz-=_=Hk(cBLDy&sq~h^sGJJ3LyA|1?&=aL>SlE!srW&C8ib}*+GTMT{ z_@*`Dqpvp)SQ|C{O%2dQBAq)ek9Irt_6mdwo_H{GL!avd7*V8_(FY9N|lZ^9ndYpz3Cjs1L)q5 z;NwxGdm{hk2@nn$HdcKxMy>t*eRf1Dil>tpl>UR8 zlplV~!Pzcn{6(2!KhRwD;!5A9`TD6rgq>JkD5fcBwF4{CXfhtdo_p_T=qL4O_ugvh z2-S3=J86}s6HdPjj?d`_&B#S^+fA!oW@wXvZu#&g$aW&(#e2#><2sV?Q5t>?%$!`3 zy`jhhz-?^Q9m`F2XW;pNSpZSeRKh>{yc}~jUJv0)wTFDEsLYobi>-99^ZzG4F17_U znSL2FGY+;-ZDAM@n@h6>h2!i{;<3hajv~}k7lZ;tyQw-X)TtYTP*_;=+FM{D&p?4` zZ&C3yK-nM#3|w-&ahelkH`d=Ee9S6!$wE*DVjN435&|)y5aLT2a2ysDJ}wT9wWCpE z(&ZeGSon%%m*h8KUhR459n~6v1tSGNSfZG z*&c-(_BYVHM}5T#55aSHT510fhxjuSaBvH^oeZ*T!n~Y+(Ing6Ff`L)al`1=rJ>Si zZw&1NMHQ7-DVDyTtx(`Mhup1cQPL5GTbb|@-7KVj_sFj znZy~4N5!?IGr5U!jpbEvlVNe5MhZLx?iyoP%-dzPJiglH)LxPy1SNs ztrr50Sl&QGy~%0pTddwh>r>=^qIgi(U^;Dxd_cO6MJn~tEenoBBmAtdM|ry1!T{W$ zbbGoamDkwd^1gSqx#(?tM6X%~{QP{aXle?vyWrq08PC*_dZFJ8$r@~iGeg*?;f@Qu zgYZGi{q8vba^(#n5k8S7+_dX?3y^j3G4l0ecQLg+U^n8i?fu?XDz17Q_dwmul*^nbxmpU4}XIspCtHp3H@E3v{7ic zK{w~@$)D7fUf`QwzrSgOc+ua1DqfN0vlx(Xx^9NrRk9`x$evEmH-D;fKk5jZ`;uy+ zSgU#Bd}X!}=yy86@GB5+sJGdDaiX8JXLVZ^L&C|n>e_MdOcWAyceUhdKe7OOJRCRl zUb!5Uf-cv~tF@fwfPcR%4VQn!32s2{uye5jx~s8pv&{FU7D=VqVVz0?T^y)^yg zdS~;XR6-2D|C{$^0-p)-=gHrxCREgi%PPu99Xkk_LasiOIt-a}{noPkgT7Rf?|d&0 z1duRlHJ)z+jke=2+s0wDOny@Ier40bs03GTx0lUhp@aXA+$d4E$5xNQ0q^Qf!$sfUNwrd2f|ntMNAk!wPp<+9bwn)mgTT$dl*iwSz-Bc zv4n`1@N~3*p+-@-G>*{lsNiOc?5KmS(@|dF#bMlm-us2i?MXcIYQ32;=lAI+5O~G@L+f7=o#4Eq6Kjw^U)1EXe9bw+DQV)iSM}wc2IX&+`S@^tIW?TaSYc zL+9?*8h_PQk3O*(7aqElB3@i^FSWYd5j)2Dw%v<>exaaJ(1s1 zmfJpIz-_%Vgx>?MOCjO!%Qz%u&Dd7gFCnAAU7v6qhg}7y;$}=R4uag#!2^XbJ!C>H za~fvwDWsO7ss(!IkZuce!xP&qwS3Q5q3{XosQRnIUbNFXy-10L>0LIaU%UPMw_gGn zEb6M6CF11CES?2V;4{e0K@g7-xVbx;Cx@mP0@Z@MF1RbG zyFQ5nlXa4ATUPehQT1P5bQb{W^Aat0gLi$#ueJ5*wr~G7u@4-5b^r1rQHviT%&Y%1 zxJOH;?~<#h@k?_NJimNoX5Rc;oo>ECk|hr@Jh3B*gHk1Pd%22zu(-E3n1j;QdZcDk zZSb}r;`qBFw8unmLl$y#W_uhT_HCAsG3Yfj>zI3K{g>~x-bU657&{}`cBzKG>ecy2 zEaP?+E#m2`&oRE2d!50XGMAlBzsn{(5EPg8e!j*!$VWS@2Uu>V!r$JjL)&)wqLo86 zc;*y*_V$pQ)7-9AiStCU5B%0)vopU_jk1JbJrA)E-+U_1P7H(TO~>*u>L05WZ5Pd3 z_tlJ29H_wGSvD^D%U>NETyieRo1JIWd+bNgE}oH1HI(E=@5`xDd!Gr+ShXBXbJlF1 z?pMo0tO&$$V|o2g)WTKW>AIgg6~|;B#Izi85-56ie3tbXik_X{cwxc}{;EDRD`W>0 zQQ5~1NkSz86;fra4<|vjqbzZ@%wmTQ|2_1SfgAWGr6w#o7j2OY-7e2l4Si(2qV7hI z--U+2J<%>j7#pVa49FA+baOG&&--Gi2+pr|T2>$P&&6In_PtFtx`?!_C_&wd2&1`H z;pVeEq6Pn1^`7r;qpESEnt1HD{CjeQr1ru=0aYV90lhlp4x5ZJ2mKXWee>0J1kn&u zsk48i?axGNogr0gm?P* zMH#mxQClBXuM8Tc?N@#JmM?(VV*JJQN6EQ+%WkywKNQiM(`k6C>14-5^3a3UKI)6f zc4O#1ZPLl3yhE3m*8`JS)#`eKo8F4p4e&%A)Xo~Ohs!}!8m==}?^pa!TO@kL8FeG1qGqv)|-gkq^Y2*LE}Wp`ia=^0a)$=D!oEX)eA^TU&c9odH>we!o zytovHtaOYu%--Ac8;8wT#johtE&$((i@o{}3W7(F@B!ce)TDOUp|KJJqpDt;xn6<( zQ+0iAZuO%s7eD=Qvnv#b)s4%TU+aUl+1MTI zHS=nzz3H$mj5NY!pHpC==C}8u9eLr){T;WPLvsJPlWv0c*zFqc=%USU3L=@6r8UpZ z!*4@&a_apq2f&k)3rExSVz-oW_Qhp*=2N#B?WXxu3y&p?y~Dl0{a=e&V^=N8Z8VgY z%h5%Hug12w^P;(vH*RA8J!%aoKz&^%E@@E%e(h&Nr0<+rd5gD1`YEg2q=h~yc>=en z%ed8Np2~B{?CJ8dj5xj-or}~`A=gPB6cSZUT$8sgQ$R=-FTV9=#;GebLD9{U+V3M= zl3W%SKOA48=1p|l1|BPZ!OM^zUsW=#yF#DU>a7p?^uugj=someY2qz_E0m7=fT1&i zkI)aUEYND~-w{e7aF(tUHHS*DLC?U-*H(gmQ7TqAq<>SMua+NcnkhxIgy(A>Dei#b zv)1dwNLA}*nOUFYMrr$4m(Kj@ZLz(Q!g39-GxqyQ zOy}Jwy~4Co*17lge_D&($>gE92HkJXz8`J-tYMFv#fUpp6`)E~IL;JT0~ZeUmUtNT zdN2maTjtd7%C%so$lJT$Vi3MQOg!)+lU-yVL4Dvd7~sm)D!ro#mREULiBa{VcJj0a z=#od*9kHX;IhnOQgjT7SnQzu-PsSd9>lrk$9lA1nPgTS=sP~D^hNuphNG65gfnECR zTrPxmz8a;Jj@X?{nmo{pRycjC`PKJjvXk3(%lGFWB`*BZ zQ}zy28ff1`pZKhc0ll(m{p*zt^NJ2n$Z!zEZ#&&f`~H^3mgJs0w@wFN3a+<)9Qq;4 zU3=qicH>G7bMEl%2{Rc-!5i2rZfu{(O}?8qb<5x8%w_LOF~v9o`tB68SYkSZZrsjTL>w_*(cag0V(*fis9LXL zA#-+>t%f$uqK9D8ftmlS<>B=z`}`i4t+@L8{NHzp6J98%EIb-{`R5-`VpDppz44gn zNhMB4$UGOl4;;oB>SQ?0@!gvbe%JIus@#{xvoqXd!S+KijcEP!yFg)POF4?G2N3w7 zsU7*f{qfc*Vx&}Y!E7}A7-g30Wz#ec#;l3@$@y&LX@3<#FGVv~uzb8y1Vgv=^8tB3 zZZ=Da7LB2Zax5c)Rk0}-m(vHxmlBj&L$F2v%XSm)^E+2ay^hJXC-9MUY*M!qtO&#k zT_D>{3f}n{L;>dsR&)HeZ7GCYyyD4!duOMfpZ-ui5cW7Y7g${JyI|u+6%XTT+udm} zUB9E>@f!z*AW8)JJTK&rdnAL&t%pFt1|R(D8&%#lmznPTw?f3R zR@Q`z!%_n&U2n}81Hz0=d1aiX+Bf)m8 z;%i;+H6(+^#L285x=O3(a;h@B7?C`b=x>Xz^c|2fWPWPpyi-Z@#7*i`S#{r6dGR30 zY%s?ds9mM5R~ghETC7ExcC(lS_voYal*)hm{r9uSVu_}o70nxyme-OnAmNY$L9I$p!?PF#`(n_$`k&&dSrD($IvhP{xVvCz=*F# z5pa}^l}SR0#8onR>-!#$C>9~@cf~%@I1(2RpMDf;;t%xhzoOtDYz_m8HID=&$9tRg zwk}B`sXwZe_i&_~m7R|N;Dd55{-4WZV5qBEy}y|K^E+fA)yU(E`+kE4B^O&E*!EJh z-w7Y-H(mEv$XvC@y?)Ez40dV6sGx(EObBh2={kMO&R>JbBW->u4ziO&$l7}MDB3#O zo5#Bytm>yh+4)_E9KVT%?S_qQZQ;W~(+7w4kzn1nMR>jE!+j0_23s%o*bhFRp0<$; zVyvnkRPTuQ6!(`KtSHlNjWZvgUv^Y|eL5%%0 zU4#n3c!jj6Qdb15apz0Bd3g+zG?v;6c<3+69SLhZ>E}{#{NM^QPISibvN0cg60$Cu z&(xc9c!3FT^WX^Y;XebB943nt8#wL?^zA0wbkT$w-1lc1fIBP;9&~mD&Jk(Be#kWR zXc~6y^k{|BhUG{M{b}VP_H>kF#g(7MLr8fmeQUUTPp}GvgEGwPHhi3|uzQx#fPKrJkr!k{B2T-BE_pI9W|_OY zWGiAv$?YgboI$z5!>n>Qn8GnK#+2jGZcyXt7S-nL3Mn5B&iC1apzT-$oFyS z6~x<14A9*T3y1V=ozHtdN)=_cOFT9r#?@iYmga2A(x44DRH@XUJFAV?T zu{Re)QwV-xHO~5pS?iB|EftnbHI?Ve{Kf7s5xi~vxeUqP98vFtXzFvtrYbg^F8gQ# z$I2D(g2$pG&1MJ-|A$fgOyIoJ6@`^~!;8@%GO$l92W;1 zO!M2JUAi%@x-s7jd=*BMtPsNz$<;r%+;V7c)Cp!Y!8~qUEKlcw*S@ysB>C<;!T+ZzT< zRHA0z%Z@TE&Pk%~xLT$D6|;93)i-D||DuA`9pt=TJ=V0YOhod(LAr+Foksh+3b|zh$3tgGzrV47yQ8fwt;UMS(6exyXx=|eiFk_+VN?e-LzPNrJheki z1ep{&S*~pdwQAz&8y#qW>*rmqd*-Q-)ORK&=i-6l+SYoa^bqUMTj8S|Jol}#X`u6*cJ9$TMYHb}i>KcQh==n|+s3*6NBtS)DcZbnTZ}c>Qv{Z3D3K zWs82)_u$^QH%qi_gxwE%0SUOCvGeirXKGUVlsVL%-SK@38FA|`4NlGyV;AYrb`o_w zvAW40h_~wb%QD1dr5Qkt8gF|tu5o~%e^>E zElFPKbR}vz0?%B}Wb%W|jbGN~L*^&gj5dmtpZa9Et!vBXC^+G8^Q&5}b*$)#DZ(#+ zbog0dE*wVX5uELRLpA{arDJ3*L~)XM!AMXlmwnXOkA(ZkCY0Off6p#*T$I=~<>wKu zU-`Jh2fl1=e9wN8X?2}7IxGkD9x>`R&sz@ip>4hd8lxWlCpsbwPvm6NY(~_#G4jaM zvRyh>fnQS#y;>v~ynA?ec+OaHhWk39VM{96;Z2LNC zpqgFeiQTG7#&?o}YojhL3jvJ8Z(0y5aUeF|f^S*uKkqz-S9y9VtR5g7=^)y}JqN*F_SFcfOfE%M3GlB`t25gV=;`h9eelPxqOdY0_LDt+meP z*$w*X$DvAJd>ZlUAzvP&Ltgq-vZ8o2i@|+(Z(^3k?8w;fJKh55SLn6#hEm0n{~j+e=vG*R5a zcY;~`&pUSG?Og&SMvVpiTE->IyV>>!tIfXH*y=oG``g2I$T08l3aJK1-HWL8pp0W) zR_%v6lAiPnPLUe5Xt4^O>kbLh&7jkDVHva*Ji4l|w*K1PcWED<@pbF9qK@~V{s=cA5a=sCM5MsL{`Pq5aCo2nj!c@;r6t z)W2usLOGKxj>6D$^)ldq5SoynCLfE@&fBhHR7r*K@5}xFd1-i#$ajwQdvL`Qm~dh) zmQG(*iGz1OOx~;-ILmo$FB^PU3t4OE$}$4@>EGGj2s zG>)@iO)yOnf`Q)PCv`KDRSZ${+CWrGl|)5;e)Ns?`B4hGl01!jdA6Gib#>6WY&Oki zdIG0C_d!Zy?!jz_BenNjjM#;NqM)Ub*v2k_(z(SaDBq*^x$NyJ52RY&|9-Js&N`@X zw*78nH4)rs!ewmlhx;eDB>L9P64p5OGp|GXpflGfY1oS2827_Up*-~;JMSM{&%${* zf$zTe$DyhYZ@m;fr2K|U<6GkX&zUMx@WsQn5ACq66E`o#+y009gV^>lW>QtGd6!rW zJ=YhTeZJ4zr4HZ_}*tz$&tUAwtA(9#=CX8nwT75&Q7y8?H|X5 zf6jg2_@hH4#eon_eVor{n{e5* zkIOFrS-YUDMFLaUPy9J0bU``~c7%gZPviRuJ~Z9Dizd_wp2CTdzKxiUOUJW!(6~xX z1Lpcn6D(mhFrC&PIXx`BH$}N6jvh>p65$4QP)Lp=wAGMS4c28bvVSFKmaPypJ*=+5 z_))_G$sj_ufm9lbm3t;DDHHb49ZqoYOUL9Hgfa{p8n_kyz0NeTp0FnO;$Tt0@&%mq zvg=tUAO!2^BS6*JoqWbX9&fNefo{!^sZ{kU#9Gv#>xed?vXSs6tENsoFJ{!#w zH)%RQC!mLPcThUhMF@HDKL)*Y{^DRt0;jEYS+2$@?3mZz4Lp`EBU9VG!zm3bQF~vz6LzxxBkv z^6;PP^3)cOo)cqxEG$N`K>O|5`doSmY^Lc2NBg}dw+NzC~cUT3$s zQt@Mt+-~~SAvFcTBl`R^i909V0_n}OsWCtTDk4f2G=5fRahB0SBn10W;<2Yk zyYkbDpCnTReRo3jV+Hsx*P83m+MA2LAui&<-IZo73X$t14e8Ip0$quf+l*~ChF4R$ zi_|SUR%XFxUOQ9*Bxziwt`l5b_0)8QcWFmwf$d_CBnFMwiuU?l#xI-BFx?O0O~h=b zKD<@Fd4Zqw^DLCYG5nH94*(*a?;raG{^7Owk=YIg+*Lf>vy9>Bw z`43cy9BSb$KRbdPoUo|JJJ;KJ+0?l^6f8`RKj5{%i#`ca?X?A;qji(TL^ZMGU9tOO zW_X-G+6#awxJni~ALdasiN+cg8LMbzdL;L{Y(;oN)qnp#{x0N5IQEL;AR{Z}7-bzhBB|_s zka0*x)*<_ltz?re#IZBO!7(!HI7WmLIkHFQ@1^VdT;J>S`CRYM?=PKh-QwoC?(6Y* z1Z~JS?-m{NUuD=;c?l3+(_0H0T}kqJ`p|~loc0Q9WD6RJAMfRN&+B4YHRjKQo+8L0 zgydWvdHh&544K}#Go~%Ry0Y+ugaP8%A&pUvnUO_s%=oXjRv7zahH1tfaH~fz^oTk$ z@SN%eNCmaV0a3XaTKXn4NVFwv z)R@K9_vgup=PK$v<6Airi1Ifp?jbhr$nXvwQ1E2gW6k5Chm1YavT6FDRm52 z_}bi>9P;#W-eG$}`0thC9G%12Khn;*&GqNS_~#db+<&>Jz!mhgNwIm^#(A`*5NKO2 zp9GWC3(yWT+7W9%ew!d*)r$}$O){3WXCWPfRZNpEtWlHwTDhBjdo*x6LxUbAg`@Tz zKB3`booC@UAP%NUx~!$w;R~G&okz>xe@bo=fD0$zBJEuNF4!q%d;7khjb;Ei2mBD| z0X*jwsl-R@f3P7r&Ph!=Q=7Tl3NdOA-IDaCW{2@2Mo~PsFDc|)NyQu9ij}uTW3Vl`M5n9UINVRa!4?0xztJ+2x?#ADyxYGw^im7* zBTSJ@cRQg4IJ!*L^zZ8GktU`M2@E_UK=1tO{5W~2Ji=V=S$)H-kI1YFp46|hWzVGX zQW3%1yopdcry$sOI&mS&m@=T({rAsVvD@px z(qP?xmhNxr=JP6y1YolCT~eHOTyev12W{0)kve)wy|CNiC?^D1bu^0t69=( zAJ5w2Ot`ySrTH&sumjg@%_i%pNWd=(CWWB!>YFpnIjj~LxVgE!%7S2h+y{2@Lv_JI>SjTl?xuVY!f)Uo^rhp&pK7B`jmiZa$g>ov$)VZ2z| z0S&vH=aXQJCx0@y?7ghVkEw-?Z{7AEGbX*KpNVWVbVolFBGxPA4t_a4X~5reSY*?h z{)bTkTy*PvzuE8JZ_xdcP|^10Gv%EBRu;l!;NevLe8sx; zlZ{N(2EYQ(*7LR9@cNTKaG@g21UF0F^M(K)?>U0dAR+QP>9daJr@t>wH~uzk0vl8| z|B`d&2!x7^abgv#t)sv7L3$DnSqxwgX;0{;Gu&lKXojV+rrRl; z_uqkDmn+k>gZ9v&?sz9oc90)8=a@t0u47J&HbXq_<<#73%Rt9eMF!oPn}#um5U2%c5K-%o$A{G<9orQW4c1Gn==eI9g8Tui3UjtX>v1 ztrwo+Rkw=`2M=W4m`JlChhCb0!I?BmgR)_{?NjBx<``7B`l%-3uE{GOmDhEmaUY}^ zgSUs2HF`^^JJOf4|uNb39(V61bnC`BnIi?}YiY8p^YE zhd!Rz7h~4MGA+w)Vp%QVj7m&2?^}4!#)}Ckg;}=<1mpaH@63p<(91!s&BVlH1RYIV zA`dp=qa_k!61@OUI3%XTpvx!8mS7ijw5mbU@x&=OZ2@!#5!t#~WJqfnR8H!@kkez^osgtX*U;hGc$|(ENB&z+N@yI28{wL z?>hA83ULH%yzyR2o70Z)(NlU2ly32N)7rck=hyz#MKj9`O15M!zCZ6=VAN%oiCc2M zeito*WO>nLU=Z|lei&({>Gx7_k_*c@+1`G#xgT$rE{bV~1=JHKe>{iF9jAV;kUld& z-^(hMnI8}%P8Vmrk~Mo52pcXoyKun$hvfsj9xy2HC^fSKA4{&1N>E^YEnt)Bue61q zbu|&Lh5meHXpsiCfvzVS8>HR%4WXCl6a~eD$9NXg@>?Zlz#+FO-^6<*{DskghF(`o zlTC{Z15GBt3Y0X%YR9?R!3azjn+uryaL}0prYiC2wiH4Ql(<~f7dbfE%OBq6bPN{r z4?bwTJyV`e8~8=hg;13$>m7I?B^Wp!g4$EbY+i5kALOJgWj}suy_fd097I8 z$16lnka<{*R9Y&oel9bt=4*{J5a%FP_KJ>ASJ)oJJhL8}eOwdg z>nS_^Xx{$Xk1$bZFAjQ1*vO9-J41K(y8=wK=A&=8Ixli86p8uA6b_R?mIdks4T=cQ zFOV!a)0i$xKc4CT`Ke5AqMNqF9klbauK3H-={2&wkBe_Ce16HAR(om%yUWdh&H#xM z0pNUNt6$@zws%e*cU+`wT|9@A=V7mZ|8mdz53!2P2rG}CpB+kn^+Nc#wzX5np6$7S z$E+_i)>@)a{>hnn+I3UncF8kPC2n0MVzjdDFpnU0u99%7 zpS@{eQe+D%?E(yc>DdE4uAko=pvkkq6Yr8G=lx(Jf{MfD%x`mDtG%GdRsLDSI_Xor zj08l;k&oki;MYzy%W3+c;|h^OKWo41^)Gk?f``|hF`OMN!y{lcGycO_S`kJo4;a`d zOEt49CW6vsOz=Lc=+AQ1?=0NAuXWQoYX)wN2~|#3G<B|ps%Icr^;b`UFr z{1SAy?A;T4HZyd#sYgkxS@=&`5)yp8^P{~gI5gt%+XqtL`p=G5^zuYWXG3X(eDqwj z)q7$&6S1cY+G3IXZ@v@u+nz;yrZ(mphcmTLoNik-Z}yl*Uvt=-bQU}M;sy~36RHe% z9CyG@oO~)ZKz26j<*!83*@!1m<$S+}O;NSdb&wRko-aCC6|}oS;yn_4)VnS^W;A{& z0|=&#ziT}$8Z4FA{~cJRKfdA1Fl4>^R?W_$lI+z<8lU2d++FmwTR)Kv-xOi;6|X0k zdYR`Gc5mU|8w9HN)lyGl=dR>X0LcfDo!@3N{vLkvYdUaZD9wy>&5yzBxsMWuL9FLm zna|x*u7LH%7ZH~~A?(hMD1#_-q$w}Q6yE~yiQmVRsWeu6;qWR@DD_SP9h(M+tw(a6 zdTsg2iX5YtMl~Qq4}?ba=xzFTPWJ$PZ$HS#8|O3C_?$DaH=f;L7#QZTDczS+z&(LW zae3{nV44$u;D4y6M}VGCU&l)wlv5z)0UR^=P0PULTZ#VOjE5M*QQ&N?mI1reLMV1t zq~VKQ(Af_~zyW1L0R8UXJKV`nUuMuitG&?=jhJ`)(!6FmC--*5f#K-L2aDVA^?&o- z_4FM~+$2uDepY&5e(Lp~Q&IK#k5(Kx!yyM~#W32hGz~iQe8dQ^GUS)^mR?2Wi?{b@ zJdl8|7)2_t>DLb)h|MU}e%&+p>X@b%=oq-bvjF{TOM~y4fhS3V2|^K+kL+B$(G=E8 zyr-i6=_;=pEj{KM9rt!jaz~2Pr~a*|IL-J3YSv==tNtKkl=Gt&@eV&#|8AOZ=9;;;)>}MyE$H}7q0EqX0ri^#4hxtpJvf26vPu-YHtQBP>I!Zb!z3TmVsyY zDEJV;oKjvuG*5l3YT;uo*}Mt&^b|zukfB0vV#UX(EJVSfBP~xaR`jSap3|Xg68>w+ zL<7?40>~>1lU4jSDzAOfUkM$sO3I%*lVeLdj1ZO^e3_{#RlO!XUPzmx*bSp+Rmq>6 zN3cJo9ub-N%U{KEuDUjf9(KGS9~MJ+Bd$ZW;bm#_n3*P_xHz zIVi0spV_T(Fa2P32d*CQKe^W*h>|--i(6toeZ}d*#BtN3_9d@jaO~0e|**G zv(39d*X%ht~0M~BYVrU!#~ z{W|CAp*T?d@h?2`Xyau3s{fBK7hqz%KXECaN*_+Jg%yt=r&!v47-gNY>{d@z#^pGk zOB@_cUNu13i&1OSu1nA03e2Nthr2J`t~+1&aPlDL@l>txtb_eg3*^aQhT=q#Zkmgb zA>)vhc?#irp0>`Ht{)Z|cq9phxU@xkGG8F$wA$qNE1W(XiQk%QeqNp z+=P02ANVhJkZ%RvfHTd0frsSr1vknDo!;&eQ~gohU*DqscI$_&vHMK=VS*RUEh2cj z@MtJe@`gNbo!@H3v+BdNjEen_6;Usap8knmv;abGeGBfl+&Kby3M%NsWL(sD+l>yN zFH+}J_RQIEu8sSW9V+G2+c5rqWpdO(jr1&u zO3M?Mzy|*Q{yHjl%Il{|7hgbdQo|fe%-JI76BjA2e)DTryixH`)k`v;*_K`a*KebaRZn_ z3e4H>b$0LXpC=x^9+4lq&wZ~_m!xc>#5cg?TT`9yoY%{zju)iPbcFtjeAXZQp%m)u zc6O@K^9teCzV}wB-e*qXE;@@nu=dFr>RC#c$7i#JocM>^X6Wmt>lqK9MRKP70Ytxn z9Rq_lgq_V#F>M+Mq8JpCVZb4x=`}3seHTZv{X=Rf(afVPS$i)&deZNu#G;2 z_t_i43S@agO)-vV<<=sqCQAfqTaXlLE!5;dE&QV#!8UXCMWX;uk}rz3jyV6}=*tvg9EJs?=Pc&G_maE5u-c(MOZ*XD51hnN4p& z>x~`<d+CZh&%Lu<%z(L8dzE$BHp9NzDI>>=Vo#Pe z6r-{2Ke8zidEt~ilQa_0ZxbY)i>`kAzc~8dCdfj2;qb7(K!RX6v!>7C?sjU$MJ5Oc#;BNllG#&reTgJ;9TZhX7zuJKz%SaljbFTwN-Rl;f?jaW z-i;1E;5vnazXTqwRhT!oXrOE}WWbZf6+UA|wl5Tw0l1DF56^(F+HIvJ)rgT)WA(HzH* zb&h%?H;P28^{|>y@A=Ed#}y5;(P8E*Fc*LZh4tdzpJO)TMfKPUgW`>@s5p-DyFLXh zm94wZ6VpI=d%Ao>2F~Ac_;BIBQJy~pNa-^?w}@`x=cjiLRBR1b;!9`2oCd80P8`u1 ziy)V404Z7x66!FumIjI$ZiGFYRm$J^0e_#JVswN+RR$9L^rxhxb043#ktoa}xM_W7!>m&e0eEner7V2PD1n?oH^Z2RgKp zeZ?|i79uZpfGzK4z+&C+HvldF>6oB;Zzxx2EC*u=#9O!hv$^`mg-s~OQC?x9AFRH)@k!t(g8wbK*HHe_? z+EFzIvBjT~l_*D!y~o<|J|A?#w{5SCvaQspW89g*`5G}S*7@qO(G!$@MBnw+{WxCd zy&+b2DkarddOKA-mNARTC@008G=7U7BNa<=+@+(%bUQOrLwpi#ioCmhCQvu1y|9au z>R1h4O?9R=@7aJV9$+~qFhdl#0XiQGCV^WbKNAY?UPqy^Dba?tu{ z4$YLVK7Ner6~)1WrcV5cWJMauv!McVsV z_!1X=UT-5PE5VvyC)ElPcwo)jw}tli6a=$GQG^I%sveO(paow@haD~_AXC_$@*phG zabPel{zZxApd)(%<7hH<9sW(R*QYV92G;z??UMF`Xk+*G!=FMtLZU~nz(RSc(5NJi z*YG_H&1}c9LeT3^CXtY6(m5hUMzr1!ZjJ>z3-_|v>@e}k3-m+!s=A(!DTRLDseD7V zWF{7RiK;r0N26yo(*5$$G;06LqL+HcYPhq22v#g-su?fz$Z04|(ngU1|6HB z(_uIpsSV_*AW}|p4rg`@QRBCzD% zY~T?Rs3o(fgJ(7E+N#sGDFKx<{|-g|9iQaNLC+T+cq1s{-@3ZImv!zs$!Gk$Lb&}% zgG&hX>Dxrmqrb`#JixVpc;0~deTgdFILmLoxn-ku?r^v4`qkK=qK7P0Bw&r>;SfO!)0;-9l@*ta-Y6$QZ$p_|HVVI%wt4t_Pu zD%1?_^b)|1z*jMG3mcoMRTm83_8{;(F9Y|o6>!2VcrQo$pcPncp@Q(dmtVglzi^Vb`dW&+UWHQFfgc%koz(rx52|Bh@AsmtT8To zIuHF2y`fxSy2s{KE+LhlSeW?p=75@PA5SJQe3V`**|(c7fJJ4*S_EEkGU`A@xR;tt zRaY8iaGL56qp+CS>g;P}>saJqlU2t>{~uY;Z1ybn%uS%264UpqC|OMj3!$AdYTR?5=r*8CHiaO=ciR9e zx<;6wj=$+dc&Wku-EQ%--+vAt{aOB!pP)|&xLKBoSGjUBeiMzaFtK@`aVO}k^leUA z?+?(+OTp>KhhkI;(&=CF?SfFHN&$8~S1aq}QlY3bDVLPs>bbrJ=}5n4#myQE>c zB{3xgjpdp+g}oA}BAkKY&Hb=0;EL}S&US0veZ(tH7fIaKJmQr1<2ANrb;#1p<7O{f zqmwSUA8=o#K8y&Or0ck(R-m@X{WVf!y#3|Pdbc?RI=ksQX9tD_CFb@m{XZPqf1U8RG#90^z=rM024$v|iQip~99Hq7 zl?voP66G!$E9DlopVs_kU6@XImV+vH&u>|GEYiuc2XwH~2SKg@tI&~LYmYJm>D#5^ z4zU%M4HC|ACz2x0_X6{&q#S2>0~%U9G`H?g7ljun4HyawO$1KYy3%JKOx}KHUSnVP z(eg&SnfKcT?z4b$zvBDG0RM0+=1lzG{DUP}Zazo{=3?Yr_F62iISg^ox{ez5CRj&m zx-z&kbk0xJIrB_DU{!>&z)x*;Ni{*i2}fL~;`|a^&QN=9#FklGfNv6%3FU$ojuNp` z4%dw0(98#$7X#%y=Q%#qRv*6>pOH&-8_*Fj@hYV;Lpzo`gPU*Rg*#Cqg|n9*(QE%P zT`j3xGlo#~kuA&wtFdO<#GO;*o?GGbcriIWslM2~dQmq`;u@1g1Uw;Jsa(p0nK?43 zifSc!@NNIQI}s0QW1^y@mN92LvyQ8o5^Hoc9x*j>A-GFOmkF%%FdVhN|DeM~HiIJdE7*`p6a2>Mcvh5w%?V!VBW<{4cD zt+7p){F}@=Az}WRATM`Pr^^?mPIfH~h9>uo>%b+3(!A7;gcvbFm%BJ>_O_^gljo;t zDN+Jo?onxcP=u7>$?l~AyTG}aGxpCeAOpll7v3M?%M)`4c&M9Pw~JJGlA zsj{C{Yg=~804=pb3T*Q5Y(S_D%}U|{$|ADDH$H6)+^JsbkH;4y$1kZY6e-04j}o#P z60rE9&as2fwlf&kG-j9~75yrGbBCp^BQw3#N)N{JT7hMpIf}v58DAdd?=a`2axa`~ zz}RE$MlV~H1681>1yHMY6J0?}dmly5KSe3lVx^rT92E%Ff%tq4#_SL$C85eHXmxUz zO%icdZdhQ7s5x8BWRkTvFfEHhz143wy1*!l>4a1tC8yoI{0C`@??t5#B>qm z;Y{p8{ZW_iaRM&4Dd(qv^W}aZM5+pOzW-uX_DDFU{c89FpTm{n>9||JQWN$GFVO+D zz~lZGLPx4Vp+~I>5eFl?pNmJ!XhPuSE>_GoyalSbUd5F6!!N+Z=-1hpdK#x+qi2EhU~ zK10C+XtYraMDaB;%H=T;)!?^rz&htVg99iJ11JI0MNfhpPtxsDng!P>Odg&b-&WAbKa6E6eLZ8^ z%Kz8c`8N+1;RYUf5%{;u0KVV2tHd=T(?!xE47On=gIntI(sMqWajyr2T>furxDfD( z0C=BkTbWvwOAJo5)kWOxQ9~m*??b;StweKyd};L|x4=&~JFcqLkA~jEW#ZzZN#?*z zzU3#TZmo7T_8+53HIB8a)RsnoC*KHYu$*U(uSq#6ja5_)oTNV;6PMt7DqLXUISIi8 zQyv}^(O~FNo`_pW?aeopQk{2fm#ZRn-`2&kDJDGV<1W*wPC!5@7a{({WO&B?OtAlW z(Ovp#VNxq}KEljS*LhPbBi&u~DDQ50jI)KmgC6=FBOc7GYUH#dIMuvx%~=RZe1Z(F z!C$6#u_1>GBZ)UODkQD4uV5JVdSp~>s(%5np5KBJ+sl$C(-_fre?X>$BOsJ^W4IR?)HuC>@|a$q;`sw1a=8HJE`pBS2k>LA3@PkVi1h(Yu$fcc zc6At2vBkCCE>3)fu@n2#t?H+oPd;40S6a4f&W7U1eQBOTxI^;J1H;7>fy+u45jqHy zo@IlYh!A8nDAYuS${1|Jx1B&|+{;3?m3tXBj?{dS%&%7u)Y=XM?;<@#4EBQ`DM}*G zwc(4mxJS9>`}AHWq*HgV+~HRDZ89BnBJpj%H)TiC!K9_1B0;Gtii4EBwsdruXq)33 zeM5=7xmv_OW`M8Wh|<>sn5-pUCYW{?dRgW094kD%wA?T4nJJ zinos6&Jee?$ANG0zJlW6=gX`9;TZBKPVgUyTFE2O0rM$Yyf!Jfx(7bOUx$4wFvk4e zfIpz!l3WNn36dNdeWqVWTVmMo=Cd1{St@{uOc!`MWJ` zt+nwVb=B8Hp3~b1g=ANHkvf4du0A4vYW>TP5D-i`kgt{AZE5>R8ibETeGBU?RT`SM zj^rSIGSG|VZOz-eP#Pa1N(y&MWQ6YNvn0nkC6cYwP~YdS(=7e8qZSaZingLmg7U)A z#pUBNFjhlrvp}xkKmdXP$ZJKPR4q{hL-VEvXaSZe5JeRb)lw2T^?I+Lr9g)?h})*# zkscJ2vB@qUxXfh}{K1*^sZPT89V=KP&Z^<@;c}81oh%3dug=Nd4kqVb7eL){UM}AcJB#xo#dke1ra`2Ls@HRo;dXCC{)48g_ix)I8p>iRycL*22IrhiXCC(n+eEJnG0LL zcLlS_`+uyi?)mD5>uJZQ+w(o^lAEq_t^4lU-%S3{XjofMqk1j=a8gQR+hQOoG1G z+RwuJY2{wRVXT+SRLL`ysbLd*mHU9xYeDa>!ws2NRm`N^i>wZrv{5nOT`)Qzfed=+ z7~?+0umnAY){b|wRYYUk7UMXj#;rB6LZ+`R=A;eRom~t*MZ)LYMGxMCN&n z_U!Zs<~q|DM!;o~c3w}jfa=scPlz5`UGXRC)vwrQhBX2ZZ zAYuoY-!(&E9!;(za6g)gHkLZQLw6iZ?!G)z@{omp)dgrjQE?9Zb*_}A_|iTo-Q+hN z#Ax83y4FU;y3~WMM^$8OZMzA{fpgou!9t~s{?5w{ zbM--)>}J;t<;5Jw=_`KEh`=`}RLZ^H=9a?kV)#ei0Zd^4O&6*VsjmkAjQ?;b6|B?X z-im@b)}Q>YiQTK59>$dBw9r#vyncQ^k*jGUJ{|8dGp$)zRg#!tp$OA}pgega;oz4_ zvnr}Eon$C$ENJ z`J1s3=I9sz#A42+aUgTVE?EFixyIBTE4qi<0bIIf_4`8gn;9k0H6qTM`mWf}58l*s>AWhRm;`;{#NiCi z(zg&3qZgI2rlDJY`>@OgHVIUXXxtb4HU_vTJr^Ax_}CQ7Xbsca7J&^SsW}#8cYb~r zN~#^@aUJ4>p!bSb!xdPTg$WSUjZo|z`M~1?rQ0W5J#kd%fg@r~{94=5FIvj^7gX~R zmWfZI%qKnqsbrqfbRWQmt^&jIukeq|C=Sp6F zb2}s+|0uQQIWM(FUHmmiTXG3hZeE9#^ZtntqdMudV5yr?@Lx4%--s-$usG>?)b76R zR26y*mPTGl6Q2;KGN#z7woS^~o}N4P=H1ewgy)csoDLD^MTh741b!e$NakF%Baxp}&IzHpj8W0aly=LeI=6&R4;D4I!ia)- z*H$rCO&9-!pSH4>VVmd{SO1#FZqKa3I zm#tE#c_TKxGpaVC?}CkM!)1Xb!;=>Z@v~JIL_z)<%aZYmzl-cS;4TSzqa6*NX~{I^ zJTVk{Hoeq`EA^TM1Utx7Z>7Qmm3a&VN3h-%Pib z%d9eBz)z*{j$Ow<)YZhn+1O!WXk zWgE*b9T%pJzoVh5kH=)Y>~0pPyID!#&$H!>QW&I*Bz zG6&Nsf)=qFIwGd^IvXPjX%18}_nohrf>gBv#C0pSt>WrpoZ|;;$a^Lu0ZPl##VI4v zBZucXaLAfkjvnb_JHF3i%du)akh&*AOwEslbXSOX=hju1v7H@sk#AP%I;Z8wD&q51 zGT53k`4L9l#a@8o9zDUQz%sxvbO0cjI^f6Y6B&>OvtHumQ^N9u{W*Y2oK6siREyMQ6ebi?ew9^?O3)_IPMP0oeQY7XXj&kBy} zzwqz6lEV&Y{Tg#>f1jF73cs)a$A(a<)8GnZ?%cmIf0Sx=C+?hR!=?cW?d0)*vxH~k zqjGL~yIY_Kb?3A5v;X-5pq)R8G}vO{2u8+yX!<-*;fhQD9TarZ&y{?-AH0q1?{#}L za0R2Jl|ndIWE@V?ecE5e6!GT-Jmq!k`Tda;jz)1aj1?)F@op~xe@oXFVk*Z53XWxy ziZ+&gc5bY|u8|0SQ%@vSW5Y77c`C;z;e%aDI@u=mD|AYhZ=^8M%Jf~A1KKZy%21Rh zwMpdNfwY57nxCz6_O*gl1Y-<%(hhJCBS*+G%o#6iA6IzSY*3JCO|4#aLzS6hjf038 zK4mHXm=1LG?iQcI7L^xai_1&5BB{L*?CgX*oB~W>`HpHy;qWY4d4j~2@TN+)vBpBJ z-dM%j4C#ai3&Dlc=_5X2Gk>WpXUY3}!_nRC_zEBf0zeJ~C9`M@)^aF?CG~YJfGq4a zmJYG+7OX71H=y^4sp7USGmY+>ehNT|-lBzFJZdoqRcLD$)B2{@M;C3qk=rq!o{@&_ ze9c9bQM+{oD#!Bo^lFL=Sl)lE(tmi-&l9Gfz~@4T3iVXO3g`i8aAsW@iLd9Y6QY}e z@Us)SqX99R>N=qmqfm8`a@UiV-T_1GX}!_FA_*>FKui}(JeMsl^~IMLp+Uj5Y8C}c zqnmDRzwOJRM^T!o_2%n+x50K!3rhNJt=y2e{I5IOFlQH$g5uw`Kd2(B(kKG?V{jOKG(`%C8E0T!0V^KR$7_ZYZ%HSAWoTX%GC_$(b5emo`3GX0LSFS3R zogzSo-UEdh1dJ_Dy}tXlBA`rSLTMCaHapl{1Rvo;ICg=D7KuxMiaT%6YFwptUb}!= z7@zeDjMmu3MpL1ilA3nej8D`*l0aeWQ=b|q{?`TjueEvLb-o&(^>B-S9>M=$cm}6O z0PM$kD(WbZBlsy-mAi^ASG;JwUq~#&5m|gnfr3Uzg73>WfM=gz7N8hjHja!H#Ljy<1)Fy(S%eAS*)^V;n7+{ z4$$_E4{C4+^7I;3kJLO$Gkgcina977heoHG|t3ACt5ElvR_Yy>** z6o?pAas6WEep~ec0*g8~gW^~bQLn@&?<^Mj)MhKd80bk6pz0lrik5>xVXS-@oBW=- zY2Y_zj`=sTHy(OF_$XfFW2Akn=4)h9X(Sc?o@697@=iKKnbAmh4qjQfP^^6!O0c=A zwI1v4Dd=c@Rel4?F3fAcX1r+?FRs}EcHq~|Wa+CZfAz%GA&WPrHLKmS_|@JPmsXKmQ`#cZ#99Vr)VNxvQqjz)EZ0 zlM2LF*~f}wOPFL+%JXsp4;^_Ws`ovYjJii!d_7hHw}Q71?)uQoF{EhE>_IHYWp9(( z$FzNQWAyf5EvTuJA{)PMPpTo281K7*$c9PDf!xI-t0H`zV*6mWU%O0}Hh73E4i;@j z`GlM5kPb)_NIX{vvWyekuEkV{1=$6cgIS@i@aZHH@qtS)A= z*#kPgW)beWRKJiP%5nzER(?FeK_ju-NmP3=_vjUmk~k6(D|%@Xp8>Fw&*DEjWr8np zbS^WOj{hrLdg%=C=Kg19{+|cyo;(0xTy|ynG~>#%mK;v&c; zq>o~UV4_V89bsUb1XNt{*!d8u=; zNooLT`zu0eugv+@uf}{1&k^Os4f6Li+>mcd60u41z$_J8ZrS_e8U882+q5CmMDSugt~Jaqn~-tF@#ir*NA|WG62LW7Q}rOs?+=5{P}1~-+2P*3E)s{uge)VZWjfgzIT(7A+ObLgxqk*z+-e%ddH8D0W2?#8-;3J5b)&MGrh6|N@{59Mbhx*ObWaa>LIb_7dK3|$geff4R4 zw>+GicU1_eSa}Y@DqiNMgKo!+7FWntyNDW9ec%#-Cjq(A-05;-$y*yANPt+AE*U7? z@ji4<^{;S*Kzqz6Q=)#`Xrfi5f`2gLj=Bjk>I0gJ(KAiun+gh)ir@q$CHe980-Imb zCNHLNtSjPJDS=FH4+Kqizsw!*c+s*A6Y2&^iy3_sJQDRzv$Zos$|O>>UTqnBDbIb% z(R}J_@9E7r8dYajW($HcLeJgV4CQhM9T~!*>ik|5sAb8k-x>LAvPXhvuoGG`vALc6 zmk@+VG6Tq0$S+M^FZ{Y9Y4qMHF0~(hS?`hTC?i{LuL5g1PxNCrzcf#KPkT{^;ksdVKR+o6o3Y_A!NQN)^|{jUWlY9& zlX_mzjg;M|W5!6Mb~*6gQ=X`E@36j?(_eK9Vk`r#jmD+(3HQJ5;ew4Z2`2dN8pme= zlwdNNQHYy@P9BsE5N5qb_4*q1*iI>0bQs-a@MF*geKoB1(zpUtTq1(%IJyLFjw>nG z(AYlDUoC=}1wpHVPR?%bT*qAsz+ibs3e0(Mt1p?qS6;-TMTQRcDvJ@-9eW0tF}$S4 zzprsFz-e-@5nm0|P@k8zED`rTStm|eG)BhTYH4-^&`!rr+3u6J^2n^Hol#{3B}YXs z(`o7W#Fg#nNUwAiJu>d=WYxlHKJRt85TOmY%4Mc&giG3*!wHEUEE2*K%%@x@K)tYV zQgz+ww`9GzAKXCJOV>B60{{(&k(+SW}oW2jnW3rVi_bD%8f+y=UBer!YWS zy-0?1O>tYq~=U|l#!c{b3}L$ScXLS6Krhj z2_MFJY8-*ob%pc9JBrf#s%nXl;8++LoC~3JUKK$m56Vxl5kwq^g@F=1&M`)zuLmxK zEUFABxd*s4=Sq!k;3DEDWJQwHm^P1>%sLiO>|O}K)G`sen{Vwo)^<*pZ7H{SkGM!g zSrL7l1G7w`W-L*e;9G!$N97N8uK|!hzS?fU2pw!hxF!CgppR|pr-4x56joJ4c)9Fu z1;NqzHnsgRkNM=CO8R*N+fGe{X^(Lvr}txrDP{7}+Y@w+%HG$eg}xb1nL!c^>*8A; zhM#?YAHs^T#Z>|pYAw(`q<7owTnc>CPrVtiH|en1s^KrcQjnY{iXtjE*}1K3hg`WO z1^&J10kmJ!7=w-G>M7?qlOJ`4`%cB6PJffK%lkc;py@yRz)%%3VdlMtFe)+~CDRw~Sx;MRIeb z;xK7`#J+j5Mhq3JaK=qXn=Co6YM>>vYITujXu<{BEDNSZCjHv?R0LJ;&e z_aKmOom6_=hIfYjKbtSlP=wb~9u=LvgZUGUy8s1TPTZk7t(_g<|u97NYpok7Y^r zR0D_<9haOhQL!;pn}|jI7*Rx^?Ks4t<@rz45A+pvBZBbn%pD*BH-wW4!u7J6=KWk| z7T7b~P5k%&)*Wqo4_uym8UFQcl7H*JWS`%!e%Tos=H0Xe29WjYBe9yjHIwOU?Z@G} zKkNLt-ovV?g0D)W4zElD{oWFf>#aIE6S^_xTxPCSWG|Q|fuRD(O`*OaGet38rS z02UFtIG!Ly>A;RIDR8 zx64@M15i}LU@pSI98jxkfo8-0ShkD=8q9@x+Mt+HKI%lUouzjg4>hBUk|qN!;~cli zb;&XSbwz%~-S&Q9tNrax#cRt$v}Qcp1PiG?Iu|U|7k7o$NXnMG&j!+wy?6N%79^gQ z-pR5FvRqKz$cfY6v=aOBsO4>1e!)?++G^D>3z_^-68Dx60sg!&f_VA;O9i zs@HbDNa z+-w}&YB4HjK$oF7e_j$KkN{qkg0_r1Jipr$4&7~ef>*gjL&f^!wIoNuBi=521&y!t zdU{mXY5ady*v`-5XVAH9lHav;?{86#`Z>yBb02K$tBCrGw13w*#k4HtBQbhFtPbcO z7ssnEVbgXn){fNidGOl}a^%IEu59xnY~W*9{fttJS&L5;N+GZXb+EaS(CywLLOCfV z?RvSOC*dybmQlr&@1$nKQud4?mr+Cv20B-SyuunFz4VTT6ia2TgYCIZSNJ64iU6ET zssN5IL~Mi+H0up%1)+3rsq_<_L((tK8*IlIR9%x0kP6cZll7$i3})RFs}^wHBvYo_ z;tQd+WdK$6-?(m$cT)VcEKoGKOVB(j665&H_JV#Ma!gO7Y6QZ7L;AQWgFSxDym!9> z7Yx(cOJ!QlC11_^*ft1fnF%3U*Q{f z^u5iT-Z9hWjgpJuzZy1znNI}=4S1K8tN9Enmo#G87^C_UOwSpW&#&cBb~!Uht*o2N z-5iB4MqNhduMH498dcnU{(evCBT%{5g)h<1M;`wGgw>q)Q}d{D#A1y{8HL(AvMTxc4Ne?_63349iUg*#5-tci~C4@7cH8DodyZ4vvhx zWUj?ybU+WbClG0SKl}e5k-2AkPF-SUo#vMDN?b|S6Yr{uV%Z6W%x7cNw>O?^hK0@S zFWa_bHWw8ewuf&`=qj##O5J2c(b|x_5C&Urqo2vjt|Gg)7-1p_BR#PYW`H|X14A>X z(l-d3K*IXcmND+F(X<HAf{XLp5slMGk<(D|th*bhV z=+VQK_wV$vcz`JcYaf1t+|NfRa6gAzgd-@8lfj4tkQv-iWp3eW4k6=x@I)nyN6u>SbX4COV4~de zo*nmK9gH#0fbbnJ<&7q8Y+gLoarF|Q)S$m17S<;!En9w#jQajOjUETl3)?0=}xwdV?3q=8uR&nTI7&;VXknS29ln#LbMTQ!>MOr!r zq(f420O?Rb7&@gxrCVU=dN0>{?)zEq_q}ZV;m`MUe&NIYwS)G5zx)3? zLuy?9je-xBsFnW4ee(RNq~Jn(rae@1Hwgg21t{@a*m~#4Ge)}N&)T%lKBWMM`a*;v zx}Xw3zstLO=38gn{#!x;Q!b#RDZU1sk$xh)fgtbKuN~P42@IjWI1v>&`e0F_gxwCBfVk!cvm9UvOU@EZmYw}#6W6WKb`Z9 z<419~z)~>l5f-akP%rMwYDvy9m#?4kzLNVlDf%Obh$!Ci=)acJxChc)dI2z&X-fhW zISdi6f^v&&qFqyFImIHgLyWyOM%nBx3W#D)y6=7KpUzo%a`?TA;IYg*w zN5B32ETyAw@~S#nhODvxn^RJ%V#e6^?oyj6^!@L?dTICNG!Ev+0bCz6$<0?jUte#f z6TE!>?Vw8moG|2N|DHMY&K-%Fh50X0cS3JbY`uFs$@RW4Yw|BInXmK2uHQ^5{xZi~ zZ>G9|Kk`E&Q*32utlLrjX9!*3$wmWqt(nD(eK!9-Lb*#kI z$y|RJCiFtH?45iFPr6J-e=h;wF`+xY=nV$E=(h@AAU`O6|NBpkGzuVMHXTRJKOQ}w z?A8+IKg z{vjS=ls?GDGAU9Y?kQ&6kUl5J`J#+@6Wt_;yE3Esc__F(a>~6vomZ9cq7r=O`jM>d z1C*LahsZ7&EL{@2guOnuPmxcd&)%NWs`0k;sS-~SwRZJ3xgZZEw-ngOk0YGxpWnLc zGb1#I-g|&z&ec*(5O_1@plJYp5f?F}I29T!FaH#Tg0xDz=JFxrAl+Ipb1t65JlCWLeAXqW4dyD5J@Qw?CSWy+RMPk1Yd#wepc74F^%^gMe z2c&sa{QO8^Jhgz!R*ND*tZCOS`d8YHla8NMrQPc_)yD;rV_n?L5{X%s!;6D`%3?Pd{BtC;#>$l7854>l4fP#j4#ts> zpNKTG2Xn=!`q6WdtC(05$Ynjth!r;UVJPBQmSjUi$f04K`UPtA3KdV|z=1Mxu^_PEi2Ph=6fPu2iA4)yG-Bsj zEmLPyWffU6Vle01-Mf3Z!B32uB_g}k-3aM;eHric_@`6och!3 zoNNwoI_X~OjyD!TY(t`+wf&TmSdlg#kUz^KyTo)&#+>+Fx-_NIHpqtXq#AvTKgDZj z`+vRe|J(t%C~!QfUet*k+ES*WYOW+x`e$vm@}7O;l9b{ee`1AyKUm`pHHk`gY@RYY z3O}H`M{)Fg$wqL3=jZm7aH4}^^H<9El8!=b%Ix0rw*UI18VK;{M*6SEGP{2fBAMIm zum)@djLuoOb)TRyts55B&_JK-x#RuczbQ~^wRWg|C3>imQcw@2HuH2oVN^%x_XcG( zwWT3KAwI77wU}7w!V~nPdPKLm$LkYCC0hcZME#$o5 z@i-@MutLb|q8Q#H8TX(eLOd!hmhFge(#uaer5o5tayl?V`t99#*7X1qMB*Z3(Tv*X zfaE&znN1+vc^D&n^t%@Z@fjHio{dYFNu(W)pT!Le%p`t{CKWo!kJyE=qN=Q2Rpjk* z)qnrRhK!li%~wcwU7@P^_6(L!VPvf^xFK7aXUC$C{Q2=zB^BMhN07X{aw}%5&IMc#ut0F%@s_Ls7(mK^n?``+*E>F)PtD6CrPQ1!GX;dVE=~^uP7f0O zK#_LM%C8w4zFsi?>rnoG!5>PFryd$PZ*AGDbB9^9VJ%93^W%N*{=bvMy}$f(N0ym2 z^Is$pj;^m46Ay7c<^<0CAzd??p4fleLv2S-@oef}DW#&69LOQILDh_}x@4G`+VeRy zOG-ND$)mA(u}?VPwqgX=bP7lTs-F0N9G!QG**M6X_p^mSuqU$V#`#dso*A#ZPDxx-TBh$V~%a>OP;#gBiE3+Yh0xDJ9|O{aeZEH3$H$l*Ecg|$XCKwRV+=svADGjUkt;DMme^;2)Y}rvI-kW`7GH^D}2>6+Tpm_(4+{tWI8`2cf3V> zhma&l$TDu~adLO$1*k@ewnjC?<59-k)PBuyu47aY%FM_pgS(91H>&Nxo#@BI< zwdg{Q*yWB_s2-%k!YRW^I;F{4r;teJz6Yo(&{6W^(nui+mn|q4Yw}? z()PeV)KX#naxRug1fT$w1VG)QOlGNcD%P)V>dw5P^WaM4O9B=zHG%oYFXWS`Xj0-k z|HSG4XH8cD0Ry=`w%NKjTwzDOJZ>veUu4AY$Y~F#25P8oa8H=zH{ISE@|r2BNXi~m zdCz&esiz6v+}=?nMD{wN6x#X+zy0rx@(02@TZf%j-({%CUpJ;en^1^PWc93#di_xf z9$d{fE3l#x?mow)F1i<3{pNEmohKH|S@k+1#tgky8&+tQD}+*T&}KHyj~lN9x4jvN zY(Lx_UG9FB5lckHsuZ+@P|`&i6eoIhi6GO4SQO+Xq8@K_dm5SDe8=KKX$iwIsY*LPMM1Z%Ztg=Cvja$&zvd`J zWlqXZAVC~d#s^`K71v&NT?o#MQ~RVgib#x=ore3#(|o&)+K8~dO1>V$D5;6mOjg() z4dq1&r+!bSzEoituiZm>Z)GdzL&R#JduK6Po@vPsB7YGW>P>8QOxK>i3^`31 zTuU~c{KgPQ$Gp|#aex4(ExhnY{Ab@w^hf1_gJk>NaRq!tGh1A^ySf{)=u4-4#Hn_< z`ib=Zuo?Z`tLT5%@&3{fhox8r<$FY-S671C^3t#_rggJj|?!#|KT9(iv zZ%YD{r@{N^yN1poskK`alN=OGoO#4|K2{Bmy<$zCioHKgNfg`~1*KHAw^0DBMbhF$ zgoAy3|6SgR{VDHo56>xFAK}B;zjFB)zkg~yt7unuWnev>8{a{_sELE%sRw5F*X^@*Yr%@_sX(qtXNkUXc_-hg72S~ zJ=cd@S5F|n_iAa;48NA3JI9o$=tGq(dz3tk!v>)fqdb5>ode zZY?P`wyZaGFvfAgyZEvyuRWGPu~sca83ZjV)%`WEZ12O9=_jbEPu@sZW1w8bXno6A;vR(W5jG#dLpQIY+yW|97hV z2A64DqYW06Vs&5pbj%ZR+k6GnPWcJKy0w>?4UsaYJvbRg`V|R)N~T|l2;&O5%tg}r zDB+5q7sZ8(_U*}n78I8zxlLP|XhT}Yrq&eT#?1!4=ZCtk;o)tWuLlmucf&a_#n5u^M0q^Q z(iLqY*4X=%fPwu!r%ntD)T2}r^$U<~;-WsPlLUy)4`LRXB7`baAQ@5)+WhHpbl}f4 zKl>?0V6l6Z0-Wp!K#uf831|g=Azij6r8qe?9d)4}vx$>oGJ>0}uKfn5sM%-9f_^Qy zHO~?{zmu!fp#}D;RlY=m40nYG+y>c#tO7R*d5xE`4I)cdQ;%8igN+N|u2wA5)@aTb z2!NsHlq-`84!oCGYg+MAid;Vd78*vbk@|YfJf}xm>uDuRy@+oDw{=J7qvR3ANZi+8 zId3S6n;#N55`th+TGA0jvA9%OxDE^Eu=+e~4=uL}r(;~d`FNbde$3Q4c33_+@(9>? z7EXl)_fv>bx?FrK`}uBa|(1NY7UuAB4RfQaVT-BoO`andVJ4oVpHuK#Ux*L;?^64+XSTMsQ+B6 z90eeug)>-4y!&q|ta;#{o=k4|CP_;xajha{WTGr#D>C*x*f6|k_aFS}*WW#DpJsN7 z4SeD)^Av6ERi3dQeRxQ9k_Y~sU`fric)-Ti=I;jhKZVugpH{iiBcdr-;S0FJOeQh6 zTq*VePm1zxFJJt$%Nyk>mdK2vrHVYb{JH$cYS%4?cPv~OWP7J7LL{e!bNF4qT=0t< zpxDn$aTF9V>9^rA$4s5g`x~44b8ZWbikUIDFuIr=`7E9n23c{oU!pcF%?M~4EIGnl z=ideGPpz85=M2!xDar#eDFVfXYWaO>&3Y|Y)|C+H=&Es|Tch9W@^`?iwKgon~ z0l$^fD^CM#Z3og{3zNG{!2xy;tHc>)u0i_A^}WsY0Q>5 zI;89{PfX>*ErsF$+C^=_Oe7tSshQWf;rU8m@**JyMqL%nc%odr0Bnxm5|_@%YweG8 z483#L^~BmKkg(SZ2#vN6X|MV+#gXK8K*s=Xz1R=uBJfd$D%IbS?T?a;5vc!f#$!@y zRc{6qId=~FJ6BWO2d2_XHx9^eU;-zXdv&qPNk@0AWvORsB{bB8m&YXCyKHOSa~$6W z1Nlz&>}w&zbRd7FKaEA-0_{;YCsLeo!Zs`9V;Zpyz*()GR%wiYHX&y=%lndA%%+ z)aRq4y42y9KhyuftFB1=p2X7J!6n?4$L-^W3A;nYr3@#l^vuos8eqY8Pl*?&_4%>BnNrW3(fg zRzt1fcp7)IZw7e=R@a0P76Ws9NAKW^AFm^DTOwxS_{885}mUe(dtGw~Oc=clGAv0)iB#MJD zid(()^>EJHwNM6p1;PSyX0RX%8Mj6Nig$%=Sj}*(F)qocbdYU5tVhpcOUR*-*f3HQ z#KAoLapco5)zE~sm+cp?#N|K9hRE8jQs!!0)+9t&6Ggf_jRvoLQtxb>T z2^c=BkqLPUk6@YqqXocnadQHW=2QY`+_Nr3$)}OVai{X8(-qOC!L+ev;O@9SYSg0} zPNM~C$!SmBc0^B)X6%i|Ob)E66Q{=u$MRf2T#k)Jg=K>SxBjOWzeniL50hLgmOr!Qv`NWLGoQ2E!g7x^nqaeEhK79L= zKP;}eVby$dB+>rx<&|*FcXH30z{A7D+1yaiE^&XCg#Qcz6q)|OfRc}zi)^F+!XJ)s zDMrf{YI1^Kt*2%yS;kYTogmT90w{-6k$ZzkSdgB6_8<7jSIhwG0(E6jq9ZZ*w*#>ikNn zk@5*AH$xM4=|$-kp%|zpm?xRrBooo2(c!a*ue&YGlzusVw93y zv3caLy4R~*w+&uVgJ_A2w|)uwL_T|iuQ#pO-7TvY7319xYdCppi(6Bt7uyPm{0<6n zbq=$8pC%5|?R;bm`Id55NY*$mQYrbWI++9+|B%4wA=M2YA8Uw0lK2W&J@ldz@Lc9X zY>X9EdY__n1sid<2nZ_xkdovG59N&Ic^rHyuO=N*kNKHOQLz3>FH3>GlILnDq+X+w z^b#Nhu>h0e^b&*5J>6^VXucv94<$}rrSdKzU)kvk)4zeg3LlEsV-*-ckTqj}Vj=$U zd^Bf!Q{^#3b44Jquu>iyKqmI|Lo8dzZH*Hkg@PS`;iPhA%jM0dzK-oPk8|9sBN(3?VWOi17R)tcDB zze;=-W&n(UI+lB6LI~OGo#v+R!^ufneabq-ps&%zda7XtvDl!nx4ns#z!+$xLQ3qj zG5i6a{sz_-IL0_TV-e+4m~BE#vFxZSqKXjIANmq5-%c?N`%KHR-AvZiL%7rABt2Ly8xVrs%yc%=egXc^Q@hmku#I$Gu6@bmr6a;T-{dpbu4 z8==?NV(A&5dg~b3dhO%$g)UL|KXH8>r!t4Uv|DJEUmNHdVgHyKMT`yh+Nm!Hze_wd z6h#FQG@u_8B1Hx$5>3TfZUU|4JKr@k0%;^5Mn-2%2?|u8bj45%v|H%Y0-3K`=259p z@>*uzoSuAEZa+2?*sXK3gp`lm#>BP6Na>cF&_|GP#A9;-P6=EDp}iuP97uZglA2bY zA&-=|+L098#fSv0k73i+%(`&HHw9@47^k=&Q^)$hI;c1V?tcB2X zEk>V4(3yhByz^C(f8|_j6+{O5RIqtIV$nfXCGW0f+oKNVydTkS5g(DD9L$Z7A6kB@ zinz}h4LuPB2Horu7r&bizF)Na7LCAt)ym?2Cs9>w{}Wz*6C!Ex?#5_!E_<2^DRBY*`!eju>DsXZH zu4utKgrkF!YH{ZZLdT^{3~P|d-bbfNTBQar^mAnAs;dQ18;;lU`H^pwJmqVe&btTs zEe4nmmAl)b$WxzHIy4D+>R%C(&n2aP1OD0BJ)7wmD#Oojpa7qbgfN7TkDMJ0l~&AU z5DzYiBJH}-(F4#Tt;Ys}@GunTIB2C!JgX`l$9*4!S!zNg9mq~b5G9(3+MXmn$9JUE z`b5FLMpePTzOpz({qhHCG+Xhku zdbia#+v!5(RiEnhz}y`E(ry3hooiPEqN4At;ums{?<3ndk!vek#RU!<5A4h@m)}&D0n>PcfA7ByD#;tZb_qRoAERyK=W4s|y?Zq(4*YT<5+geQGtU%Wrek_wK`_m2j9aTWvDHdH>gp z#q|zQ!nqbq_KU2r2qHD4w>AjDN5jLr@aQMeNt>Cca0Xh3d_-P(JpWn1vmzv_jaB{W zAg&q+P4z`2PvP8%r=f3R&VazTqKLSU9&-N_XHF)x#sr;{OME-n>yKgdW43QS$arcy ztGLpax*KY3Jg~*m4QEo(n#qGv48dhw1L_JX%)!-hOnCNvIyW++G8=?nv|O!-Ci-lzM^{J;{wLX$H* z^-2;p2Z-<)BQ(yEjXSeQ*M~blM)XZbh$=B6eg^U`={m2qKO^Vw4!d}d>`xJ}6CHg* zE8m6?U(*%@3D-QnZIPPu^8=i3D`3qeRqp^3D^Wc1)5YTX9BE7ML=MD*0;Uvb8OY) z+rFUNKcpoP)lU*n6;h{On&9LJjN|&zY4|%e*$SrK7c%xHpeQxXYx3;I2yt?bQb8MF zf_{$;AHKf&YLQhnPpbX4R)MRSemyGa;&fG3zX55TJ?re60&}9M-5e|9;waWaTwT=j zYkzbqH*77_sM*(E`Kmy5SSufyQu%uC#_4gU-<@q6-=o@^QqA_xk!pSIBEy1~gWXJk zyJ6W&9AYWT*ptY?ReEH{)1R`#TC7tF)aJ=0eD2(#1)MKFJx6zX%G7DbcgE!|*ypR9 z>gHb`XNreL8Z9ik!rxYy%$<313nZQ$(!%Afg1J9Ps9W`?74a!T?ZSrM-{SUW-I0D-o@IEts{-bU1TM35!kcKf{ni#1}r@aVEzctJ^9JZjwhP&## z&=wNjrZ?}|UWyI%tA6mJY%brR{_$Y^+M;cjf8+F3Th6wL+L%!N9NRDY|K{hn3ILNB zfKuEF=jy9e<&`Yqp>r|m96ZMo&v!gGY)^f^mhCUc3wC#qWoX4EQQ|l!RM^>h;YiO< z08ZO4XF)OueX?^~ct>>h=Ao?JwbHxc-O52XbdLc_~{_A;O_HzBe^$?hPxKQukT zT4n;b7wL%U@0`R18HJn5KC;Yr?fEO2enk_&rO3uNGLyB6#f|FFMc4K zT$XvyV%QWXoCdVxRONKZuJy|`%R)%%by?L?$R9NBXMH_} zAFTA>JO%9hAbimv`mqD9yG>3NklByzqC0ilI>|ZseUtd>8y)fLv1+fxv(asiBU9R%MOtt?Sl{~%eTht7hynGR!CgL>bQYFcS!cMym zzm(2KwAqd?`93*nTb5ClnRqQ*o}R8|{t{5=CKbb7&#Kx|nK-V0+c_!DUn=D~$Hn^` z&rGD`>`VAw%j__s++yQ>ZHL~Gn=|?%a^+78*)0#{Do|(jv}#AmG>JY}-39x7RqDZV zr-{A5^yHO7Gx?h6!BYJGDRm|%s4)Y#KB3+!vkj`&_g{4Xd2#FPuTwkyI(G5wY3KH? z_C&hq$)e|&xU{)#BcZcNi@DXpshTvW;{dk>^n{7ml&r@}%xQzEL$--clv8Fe=KV`y zi%w_I9=6>h?pwRh^yl-;G_U#7C?ncL?b`*n4pw`ckMAaa3#a}DBRHH~Jgija9a#|N zXxkfZwD-F#(-IoW`#o-G|7@lp(^sBpHP0V#=jJV3)m`%*pF+v6R^33?n)u0>_${Kd z7FKAdUh zgYmbBS9;%G`P@V=?RW2Agj>8j6Taki{pv9nxT$qa?AhbUMwhwS5VlWw==P&j@cxNq zkgW0IO$B1T*JX$eUgJUZ<@8o6F|A5&sUe@l2|TQYO>|B8rH>dpJ5vCTsORJqgTu82+q?J96o_G^4g-q(}DuAje2ud{-T5BTN4=V@m?W z+{PI>mkB5N_Wfs;eeGT)uX||)Ja{Vq8oSU2bjCL*NwS8f1SE!23qEs-8W&z*gw2UW$oEt<9a_Lzi zxU8^lG4I8elJiC~^|rs>dRT;Rn5x7SjWX`DBGjBCVtO7Ts~1B~B^$r5M>FDHai@k% z<+VbQ7VXY47R2Y(pr2@QoS?*Ob!BB@#q!{QDe34dB+J$)fk*;4vCYp3z? zwjV2uo#Afd^}*O5d?D@j{-`3d7*k=!Vch);#uq=#2)a~Fs_ds1qYdBhL31qw-nb4z zhYr8aZcK?6rLnaKJMJxShkpTw&~f@Clvu9H`p!FCiJjv{_orSlu4>uj#U9(W9KyuZ zQB-f7IEsHPKekxfu03eQoa3taZs$%2!5_7n*gqhv(`oIhwx=FMBy0gr+lkR#z7L_B z)7x}H2Ymzn+)(HX-ukt7SsA)`BZGy*Lb6;q&%-rd!0I+?;|AFMx0pzKcm89i@%B2gXAx>}=saiZTVstZ2l7*gJPkuxS8uK7 zY@jytDggz~KZId;*?Kc11Y6bT>%Hc0M&se^m?8W#m;#c?{rfP$H*GP=c>2jB3NO;% zje8AvmzF=W8uMIjef~;Q|K+u2TRVI2P)kbmP}XE|acWR`{G$S&&+xCMrj(CMeO{q) zwFN4tY;ZMb>tKwY<)eCqa$?!AL9TD+SDiNH-RHZ_To6V-{vBelQBsEI#vZ#9y>JG* zn3pjfw6}PFn1HdzX;30vx9Y#8Es^BQ0%(U$#DzTEP#YJ!zxC)@g^!oeZ@<`ueu?Y- z;tNsVCV}PT)@K{~pz`9PSO(CkUWTV3KzUeHLg9wH;K|-Q^bQL;|4z})7-v1|WP&Wfuuz#Fe^2uFI_f*M z?Q&`IyH4}+)Qnfx2)eJt+q~8DWg5^4w(R?$*|q^G89Mm3nz?@P;}>CY$9_^dh(9qC-O>Rql4V15=#gVzuzAGP8S7Qy!D?F;J-)a^v@o+ zbv8VYB-fF9^TwH<5P=B`P3sld03if@nE;UI1?lUV@n)XuIX8*5$i}0Q5^Jh%jkxJX z8lIz*#i-%JDuINw2VlHJtvDK40W3ux+!~?gqtRF#wb(i09Yl#Y|DCW(7CmPQ89?@p zzfMTSy6HD~@1}gVrhy*WK&Sdii~>q@+P8G0u z5Mzw_hEb4!A(JB?6#?b!5mFSQ%PhFSJ~}vEK?ZvGj$1E_x+YxAIVLu>Rp-^!PjPac zy}_I`tpa+Gg?T_=G>l;zi3Kn{868?L*^XRM8bR&lEqI5 zlU>(G^7sLw!o*4fj;!XBZaJ^&;X^Zt0Bv`1j8hH?@^^1%=WSY-gUqzkC*rU@Xo>y# z+gaSuW24{4m&Xeeg=0l4Z(>A%u|TlueyrI|6>qb1$WZ;k)QCQ${Q)b*H_UncD+L!Y zKXYznTYh;f0~;hnVS$`rQ%q&U|_+JqY`n=dCBjXklu z*YXH2R{Z!b1B>9iw@sawhDnXH2f5VI+Q%q#Z&_bG2nokz60@>hyIhp>J7a%Tu)mK{ zmhlao6n5F*)Z8&^=6w)x*Y+lle$(@_qk7T7<$4RE;3xU&gZCYWkl%V2#@A|ZMgB!s z?k8pWEtcF+W1-zMW&x1es;yvWw_o22^}k8_nr~vx!6IW-vdIbJtwNyo8mwd*NV|8U zUC&JJ6qUF6$_2;EG`))6UTBsn%bBt%r*U7q&icGi!7EP8$BE?KH-EoiW zvvdbVeT2&mbrnWPOjjQXTj9d@cSP)$D;Xb*jj$WUkn-$Z-uC*Lw?ie?ig&Xt{wA6D zWbj>7S$|XRLwkvLU#U%fY1USB6#;i(%$IL1jmv^CTMs$D&%R+TovCyE%K^I}O2IZK z-^;73b9+!BnMwQlNN;(9MU)VXn^Pm1y%(sOc+2d#!AHG(;uGv!UP~Toa|OdQfL5GP zg^Ry@kl1>H_B+qIn2A+Zir0+vk3~y1djB)Sct;M*FqmdM=XKZJ7UmoDX|k8b`zegT zLoLrZU+J&E!#zH~)gO-&v9LE@|ELt7e`}kW+fB@K%!x+zF=`1C@NqikV{k( zUBBmJHT!jHRd6X%(4=@UX8it?dl-L{*kFNd|JRv+LEMkcJ6 zbfDhpp1r+s>1goo{IFcoZdp#R+bXLAl0-xHvTq23ut!0_+cY9fK!_1t3IyhGM~)Qc=-JyG zuop=>*FOR2JHur=pKfMM<|0tCiBwkHM;oDnAzCCf~I{0 z8`Y!oexc8CQc-vPkaQ3xLs}sgeM)g{7{F$YMzm}OQn8XCyBVp>DIqHCGvMXgvxk*PctXq#1cAbWeYw8z6z(uW5D1kcD|;{2EtMVkZuwY|V1h>EePXA!g=jq9kVR%^XJYV;P{AwG2)VNr-O6Ov_~b!-Ww8dhpO z-~z;mDPFgWYcIp7jBL` zH+^4$8>3QRdLMWGL#Ew{r}hfapzBGcc^pj`X4#v7z9f4l>1q01@cpy1Q8EeaO|scn z`2nJdZ?OVX6^mY@lVl4?`ZQBbW&xcvv7SNRr{7o0cH`BfM-GLMs=3O&ij!i_%iPrq{UHY>Ig$nhzFM0`yG^_VrpMfbyuC>~Pt7_sj*A4Ea) zbPDw}r)%a+h10*w#q}!qEnJu!U$%SR?NG+xw^xvMY)P=G#bx?lsA&oGZ2Qyvd;;U- z6{gN`Q!695YbFxL-j$);n^Ydt%eQ^s@~onrG^xP_5-VV#v<9eyHIz-p9O%n;`CXlg zY>0;QKPaG7Rv|8~NH`-~@Z*x$#MB^9`KJt%^4K^nPDCX$E%MZNd)tfyP+NN%LyH*r zGUxteygET<*tlgPZflDFQS7MPk<%Rs2T*$L+>Or)|W1iBW8F3|0n^T*XA6%*PfD=VF+d zoqqI|pX&3aktSy4Rkx;h6S}6p;P~sb+y!G!#ZuwVmJ#NX@1eI)wwnv((e?KPcU^Zp zMv6w0rD!JrD`?QYF%A}#udJ@ePSCLCQ5XYE3j&yEC~Lpj@J2cCD(W|l=2;FG$o~r!OpdjPB`qznivap=* z#g$jGm`#55q!ZuzNU34jU@6|^*$U{h$neBTVfT{ey6@K@)ILXn{O4)R)1!8>z)j?V z*bz`!3(KmlO%9uNy6pBS^F_UX>HGU@h~hQgg#1+Y{&0u|z0`f=@i_W9fDy4<`~A4i z!96oA-|z`)$*N%iYF&vw>&)DVHAWmwX3;U-kzI3igy+r_(;K%`*mwRV3#QR)H1ef0bVNAnGZ-{W=Aq)p*uoV)U%Fz&c z*_@^aEW#!9a`K`98mWp}+^w|sEj}7$mZhtFdQ4QfJkb^#U{qvyG+P|xMKnVA;>6|p z^0?u3+R9=Li{$i&)L#4gL$zi>N{^_$Nejp?`OSqi5*zhgo~nAt>de0T+H77bz?;7o zdRCvbcSc*DK=_)S>-&1|Rs%6qb%{U^=TaC}~7uE|`OwH;QI=-i;Yzi0s zRB8B6jV2e(r^;1nUvWTP&@B%=wQrO>Xb#)<5Wj!9bwT*AGB`o@PZ`XzBe3hlK=`^^ z0SaN&Edsxq1&~4SYiOnUjvrhroV9XkxL)HI5}KA@t{e-82Nr%*enCUQD*>@?5Iws% zCAQ3LHthC2-+@#;HD^<9shMDGZrtdv8i9NBpk)zy(2h3=?AWS2aI-b3CO$n1%glY# z$h5nT(;;v&5}wvOFs_)C+|Y5(s>Q8o-l|18)2CnM0y=<;ds_I4u;JNm1pxFF7e)sN zlQb6LC#4a%G^wZrC+G2)1gWi6fNNnHy4385pjtDNe+E#B@LFI|MY+QBHS`r{rdO zIYF>t&AN7rlS+lG3PMR5ud8p=A48KT-)0@|G}b4%@|p5Z$-TW?&1^-<@kT&Dn6EAm ze_Y;no6miSW&+Rot}?la8{S;?86^{r>7c zZLQ1im*sah?nUhmY4SF##YrYo*Z1H1a9%iUQL<6?YBkfzb!>k#eQ-pvvEgYm)Em)# z{K#=~n(10hbbpWCd<~ssni7t@rz}~0MV?5o;OO<7MbV}>M=&GPb494Q!Di!|nAfur zGp5<+FN<0ayo}f_1U?;Z300Y=64QZUiG3m2f3W0cH?p2(NN?nV2+rR~)0beuBSay_$=o%BT1_cV5_Ok>=pRM)61NI|2IPM}!=*+zy_dbo?f*4&lbo^;Hws5c1 zaglXoJ>tnSCKiH9|5#1UB6fjQEGk$yI5Qh0=X)FPt%sOHU@eOyV3(c}x>B{gLR+3>J(`pBUA9OnAb6y9;ro8y zsJa}*qn;!#zQHKZVIN%q3-6uzyi85MOR*_gsrkK*_~o{Y=6AwDw+>d6TYc-38v2Nx zE}9OGW`0d~F)T=BfDWa9=a5vk9=gr`@UNAVb5T&+prh&+H+g;#nt#3dKNPrc^#1AS zzbbiFZ96bxhH`#`d|Q~uhr3E}=JOW=6QMg{iryKFp0Ag_acoJkdpR#TBnvrt@*aHu zq%%RSbgoR&yb5#OBG_LqV3b=&D&IhieK~!8dqHfBXE?HFoYI`inu{^X?L)q;hbV5# zL+LkVCb0a}C_|XzXQuT`+7-|M(B<6Qk78yvNpzM*njL8~JM%T>L~rFC$0-uKeD4Au zlfnb1m}@aStX0t8Gk5CCR5Sg)n$$QTFr@51WPErj-qOLCF`Ww-iMl?n3C3 zs)zx2>>t5SN{j;_4w#G*hJ2479V4jRU6JW0phgs&tW{(F0$L}RqaZJQ+8QSP_I1U& zjW!}6E{;cvB@mxD7X3PhoErRap9$T+Ei>nrpfuKQ$*?A!b^$1ovkzT3t7O!hVwZ3!+VIDaI{3b>Hgw!!1FoK_-b^-Ug>$a^9 zYX}p(O^`5yTzx&9L672V0<4FwwK{&tj2jkOxK=S7?WnTQDVynfX3<6Il1kQ+Bg42~ z!8jT87d$ExDeHFY38nZDBmLP_Q}@(|f~k)YNB}1Agm_z(g;t!Hs3%dqBUF~vYfr#T z{hmO1f3DXT5rQd3+zgIMpLl)G+U5yubpU~Qc30X55!6gtwyGZxK3M_XG?y+0*!);+ z%X&yzdqaZ!$P50hvgwt0z>xz?9{ud(AX=iQ!fY5H(|>BD4lxlIWlqtM?WhMtb~p#i zFu8*F4w#2JDa}2mmd8`=VgO^nuFrnc*6B;L2aN=_olnJ?3k48lwR^ zcrAdn_wsvTnqtZys15szNAB_>oKJzm=pZE;B(zgUirULdGI$|=zC`7*QPdS690V|t z30o#i=J*Cf3uN`^o}TP{A?3SmJiGZW%jARf)`^E)sE=76mu7B$Q>Zo!c0y$xXOj;V zZ?|t;mM^~TA^Pqobpy*WRkR$V)3`r7`lItg;^;0_A_MJXX;bgBwoh_#)Vk7Jix35Q zxuFQb(@6`rX}{}kRY8*^Wq~7i$^XODS^h=Yc3t>ZK|mUm?hXl&ZiWs)x?uMa;dM}@Nf6pHvAJl8J&$ZV&mWfNBnQ-n-{PbkxUc~K9RWvkJtjc}E3QOKwoe03i}2Wm zP{&;mWT2$zr5U(?M|!mBpCBn*Q=l7ywsFzZY>|-N$!s+Zrja~}beY2SnRqp}(gu&aTJTVkclZ_EE#@^DGmoCF1#>ywIG9QaZI~=pA z#((l`N{6;tol-?&Ww|V;wQtp|NZf!s@~xr`x}g(0^N)wEJ$AaQ<6x2^o zyxCSV}^IjaJ_hoU}YqL-!aVVxD$rHDNyzt0sixHX4K}D))Wr52wK%A%MG% zVB`g?(r`PIv6}=>$dZDg8_)T^{d<67w^ZZS^;f)Sp&sM^6Y@KX`kT|dHe4lVDnSK* zgy?$DE6_70sH*83(u+%l+T0GIaLpeIT;$i_(JwU2X(vBw#+5FkE&xTKi*YnKrqn5k zTA;&99MeQ3lv}053|(2N_iE*FYX+o(^RHBgaJS(f;NQL}L1{%4r&-<|M^v>>Gu6~_ zgUV&X*z&L!c;^O2R93VAxFXZ?!Gu~uOvaI5jfzhAUrLU{6OH*w%(uWW<&?nPtGtQO zdlNCdez*1Ru&5Xv$>c(irFX(m;nw%#XJhZQEZs=`w+0fUishv=z`^)|3cdD!!^L(a zXcM8|ar}r#JDH%=mT?PY6#JrvNhfJiPh0XHB3T+`FL7)GNwsRtX2 zfPXNnQ>0Yf#S>`ItKToJ(-@z&l4EYeJ%(LA>X~xhwh~d}DDvvnny13s&c{LVitJ5? z;l&pF0#E1RS0q`F)hLwHA*PWpa5t{oX+m6WF@dbYQ0$&*nCZ<##euFRJIqY}BPI0o zyYTh>goMvePIx;?zD=Fw7badclY{BI0s9vxsPoBehV0q~eySg%wO2{|cax3B4s3{L zl+0VdG|F%syovri`@}*|$Cg4kk{{nQOu4qrIXI|ucyoqYb9b_$?C^7Mn+d-rmL-pb zPt!E{*^c+(#{C5(cG~n`;$t#tWd}o6zqtEgg@=^12=roTW@{?h_|<$w?ng4T8k~0{ zM@jhg{8e7Yvl}#fp6BusF2WG{$eO+vze zvrXIgCe?-dy_Q-~1TS`m;vXLnNS99E7n*kDi{Y#M5<@6Rs4q!*QM)QV7;nB5{e5=RS-i7(E#`;0AH)`j79;bSrCT!EtR_(%Z%fZRh(xUD2MM{Goawod`55Go4A9SXR z>gIKozwRpL1Onc$etE`hJnnlw`_eyYJnxH(_9IGH{Djzx`&j8ZwH6vsU{4P#CTeY3 z_RMA14$17gZ|itaVK>J9V|SHx8HoSO$-5`~&x6LCSu_DV&cG|fEyv*uUEqCBDYw6`>D(vf*^Mi!*rV{q zqN^8EnNZh|+^&PyzF^CU?efmfP`8Oo+Bt*T|3N@r8~+B_aG8CD!R(;Tq(dtjf|EH@ zmB#F7fn(1`F65|kz06X@PT5ngK2ZuGEp7jPG+`6S)yY|3fEBbvqvUJ)!lGB*7T|5$ zh-{@|b)E%AZ}A3SE*tq2zU#pKKD-3~&SXp#%eUW-w5> znpg@$L(urYr?=Iv(31{`FZbZ!7#Ssu?YO%-7^;-{;lJSC{A%RM6**1tcveYXm!2$r zqEI<;rS~cBdKH)H5!g+^ zXDhPHU8di}dz*RUGE)l`$u(WX{!m5Rv$M^RGO=ov$81Ci%u9zXYC3`w2Lf;+cpTJf zn)1D4o8#tV4m>L(YgtZLh4jO`brswdXXI*xCo>DoXGc27iufA2@(mz7lw!WnoLm-;VOOO2LfnN@m z(cU_iW^3`6@5v!TT4U)&n}N3%J3^G5ywyXqfH%^a4^(y#XC#BfQ(JKBaMekt0XCsj zWAvV_b*J@BdxFXDnnkn&Hd8J}uHHTZF1g&@+V2sI2t9vN!i66a-t_M%cw@d`3z*f! zf{jzih|Om2YgGK<%;_Q!xoWVKMtvYT9Iznt-v92A`}t!cnJl$_1EnGKv{wgc1NceH zWH#gUi=EVOGf< z^`@sUyr$^#yF%oztIQXUD~Dak?Ij(EUVVvd9lTFFDFqX%bb1F+Nx(8Xk3%#GI=t;@ zD(;=;yF&gvYo~a4mR5+J>+L4Sx9@GzV0tlYhk4ZI?Ha8(9@)c_1UGtpv9UHEb9WpK zNiWWdX8G;qBGp^0j9gCl6^AtX2bqxXTdjZV!b8-Lxs*O1+lq^T z%mA_00$89#!-n``|3K9{gLR|tiHtr%yJ<6SaBFrW8as`O&cORDNFoy?3<9q#lQ=`i zzB08r(Od0_e2=Ti=k(}I1N|*JK9v=#$$r5*4mrmMdp-HElC$yW z4{fI~$?F}OAGs0!Pet9u_$q=u6%6=QVzrmV2NvC3*!``yE-w;Ap@@E<%oTU?f~EX} zJ}ci!0zNfg*7EdhX&zGT{q~3_U=7d`ecbIbkkl?Q&^#X1#Fs0ZsdVpX91-dWA*&9n zCRm_ueWFUdJam(vJn^0wnfyfr0O@E(rX)J^JA_^?mzJq!F!-(vvPM3+wYGA5YZ#ci z+<3wwa9VioZd3B2)t{6{2J4w_Bl!&QrI$CP>jo=#|C*f%3J5Fjg)WV~w)^|?LVMS) zq!Ruufdalq4pR4_uNXd$$Rb-T^j!peK*Md+)M?XMef#U%E1|J#$F7SLS8pqUbr@|| zZZSk*pN05wwAvtFBY~>UcYN5RN}2LJ`}yx?gH0 zdCDv{X|V@Pq2F_u)Sbn$!y(7Z4ga4;20m)%Oniu@dw+>g*1abS_pQ6C z>02p1+HOGsb3W*4gM>U-cdnV+TTs>X{nGfZvF~jnz{HYONS06HvS39p)p#9cfME73 zj<6^k0crw(p(q7loU^H#nF2vqnD#iY*{B)MauNcuEH=^}%)D zOKh%%KNNk$TnBbXu-G|E1i-qU;#=B*0odv1fE3V37H4@hUr(-@49n*&;=GOH+Ej9- zE{~&fNE5?(7uK%Oaw{(6(kHteO$LouFH(wvB1p7g-*9ZrQjKNsq#eAqdPVP$1xU?k z&FTc3ldKtuza-So^$~6(*Q8jN;iPpTZj{ZDdF85aP+soNwh~X|{swAlcw;U6?~eFn zHKeDT-R$N7yXlLkKOVlI>_y(Lf9@>PGu$D)mUUcB_MiTdNZXpQV*Z}hWWpFn)yrbS z*Hk+nz6g(|swYt4iT2)~LT2BB`q2%wlHbrztivh{T3$*q#yc zT*?}w$kTiVNU79ZpO-oQbXfvt`?zaAWJDWSedy&2epl5>AWarYXkQs1qq{qIV0yr? z;r}QNd|g>N%GA8`htVtk9R6GQZQX*#P7=}o2vth&WRN#egO6&Ex;+CmCNOtuRN>{A zkXinRtc%`*DuPfA&6qP3tY-Z@{vSW6^En5L#cttDQ2A2GDBx%@O5$7bW-C??r6(c? z#33aWxd=IFj}L)A6IU&aQwl{+o>}%3dX=@DY|Nm@EhxkPX44b(K-~|J2%14sbK>`l zbtB*;c<{M2p&=gp_N1c(auF%8=|)2&@^Fu?>i)qi%CFJ$B64Q4kJ6u0b@BekgkV1q z#=)LXdTYdT(nc96F-pXek%Jr88Cy}+)ST_dm_cq0?mr`Oo$g%q3EuUX7kQxyb~PX| z%@Z#Iw9p9eEK2zIj~W-~vM@|+xgW*Y%RaJzG534*y*hJD?)Tm0ss}g zxbp0Gj`ORUJ5CB!KIKLOn%om%VPV9@C!DA%hZ^h|Jm~x?`K3gsa~pq z3Lp!=eybU37qZF?jPH=l7{3?ypO88>bi@qfac^a-ufKB`+6oud-l7SfYx{G`AEQ8> z9i*Ouqeuzs!sOnS@j#}A$goqK_T2i0P+A}ZrgE;m+U!0!9)Q+wz(>ja}DL4(nG-m z$`ODt@h7Q5YW5ojYK>IH5Foql^zRt^7GR^fG2gtJf+ozix+^`~BIN@o0Josm3$91F zVFPnH5-*?|KK`=P+#p&pE-xZmI_WY={>`;ATuuQkraU14FH5NV1JEccDIqPl@=s1n zn?FCK({Ijl86LeUA2{P!6#Eg~Ca=|e%!EU`nw2n|%0G_w zy?2v#I3qaS5Kk-JoWajc%vg2j6@;0?MS~OVo=5{eR^E zOowqm#Qf(y98(dzO2|7`ZSt5WJJ*{tga@%hAHfwHwK6VOQ#%$@`1Wv6)v@Rt_D#xX zFCU^N6INr82#FG{J9dfw)gYVmA^l;iP#@wzM-cccE;+eIsyW@^r?Ml_V{)>0f^gDL z!Og6e#6?k2g`#5A8jSIdCmzV@_mGzxQ26hV^Ti7l6t`EBCqCk7ymkbzb?KkJ<~Mn2 z+EvE->CG8a%mdv?I5xc|3dMshsqcK5fW=lR8Cm3aseVS^`Vaaq6<_23%L3@L^E}mgzU~YCQtaO$YVszG z-nXyC;my>&!rnu9iqE`ZdbnjoiNLip?7Ff*Um;aL>{iL$o_KgEq3R%B_nJwA@>u)U|H^velc9-$3f z7OX!Q$YQ&Svz;pKANyO_3G`I~va!iW<2+oHC%}D{Ls3;t-!QN@py9FX7JWLNQ!pa> zB-M^sM>iDotb*IAz% zDj$l)*KhadgCpmCjvWe9wt(wkrY`TESj0O)#`>Oqv=NlTZN)`1S!;gVjpmP9HgH&X z;-~sE7loR@6wDtK9ny+gqu-c)u-A4{HrJTeB zJ>;o4KA;f%0<)0#=fXeT+ivdP1I|UI>x`|>?ngmBX2Qu-&L=1RjFgyg>JTG8uag|* zJEo)Ttk~n>HeQ6#Ib7MQxpS_r(U9uKj^fnO#lS`dFKnYgtug=Jbg%K0ge>- z!}Xl1k0#JD=yo}=L^9x!(Am+`qTQrmIrHSYbFDe){?yk8s$ zfhqR-of{FDA@Q=5ycJ=SW;P+s_iGbGykM22l`2%soaZ5zXXBV5WY?oPaFxG|SJ^M- z(@GF8L;B0dH>2*sdzg~`GPUAy%(%>sJD!32$s_1(1Q?%Li`9YYzMlaEE{rotY)vXa z>iF^F@_cG$!ysYSP%$xKPv@w!kKiEMAb3 zb+-&G%p{klSqi%#6Kb}p2)EX6b}flO^)ALs4JEKW^8AX`f8;V%FS;hkHPIz%KN$&^ z^JgUm()j(r6aRjbVF&0Z%cv5zj!Ldp{|-Vf(XA__g@K++&!8sF<|&zi_f4{6C!)wV zV~o$1oap+p<|I&rrbo7|A@E(ZpXnKku$_C?*0Abt3qiTaxioen_>$T#eI)v=+N+0~e?n1qLc z&>}F}$|(x5d$kE-bCS5vH=pt%fJsDGz6Sp>rC7Amjn|r10ZgY?zU+B6^P1|+Mu^4< zuG%+Xp>0I$`D>6-#3zGr5?yZ<^Kr+Bh`hjWEQk;g(I%O6jS_Wg57eW@DzTG9!_=au z!U&~#O9Qk_!y3+Ly2q`eMf9wtdVKgX@y>nZ3YeD z%ZFMVCb3;7l_3n-cx*LBCK3VC7X$XPf0gIxSiqSM#KKp>BUI zXp250RCeySNjMlH)V$A4OhS+IWID>RMFx;(#Q^_*6Givmr4+-Odij!i(P;D>1Z5h( zF3mVdyaXuNdvCO1Kuh74X!Zw+;oqxSzXO6osMcQl90o1CQm1h36TiI}36n@>6EF%G z*88NK$Ftu6#m=kHjy|r{qs-Cw2E>B`%L-T*h51?TEy2|NpAam@EV`WxAHPMWwnBW4 z=8F0^bR5cov$F>V68F(X_&$-*MP{|LP<}w{gRG3UR5bnaI^21f|KSEPxpsLqDVB7o zP!!>FHJbcO3;Ds!?Gf-T?HL+cK>ZX()lJgG zn`EvvYZkg<6+(9IUrBmBj<_D7HhD9hv=@RF0}Hjixyhq-k-j+io4ne6+QPx2^tySL z`p%^})!A!~Yb$?cYGwfNbrp6GXXnlri~k99wUuI_A;D^8Lr53cYL*Qiy>Rgx6up{7 zP0iixW0Mc6nab;v9xwc`@F_*7Gld6($VpN>{J~?tBYeuqE?hA*li%d4a1{so!P15J z$<2n)K$yZR24fNdB`@6Q!7l|b)NZPrjc*0w^D&#-CfCo(hpSkhS}9HPsxNTG0**d{ zVy-r^@K<;QjQHjITDzXBU%mrk$G^7K^w5Vt0DMYxRqks-hj!=bfr4pd@_fC}$6=u= zpHUzLKsmrj3EW2xMHOe|P)Y=6}2UnXkGb`%VQ{r_7ZOv}ObR?uB!QBSQS za;9vipm}|*z4F!B`B^M&IkQ8%_oZ!{81NKFEBH!Qw&p@h@%;y+fCW-iIc;upb(nfS z8qk$HtUaH`?ZEPIId5qL=#g-N0X*%aMct~%5ShwIgXFNyR-*voApDuu?dGpTV#qqr zSQi~#9z$rm^~8)UAJ+#}{!01d$J!h12_7q6?-fFTj}9K&cC|RzPd&BF=jz0s8Ns7) zyIi4%0x{C_JgQF)Xnxb#?PZRikTFtU&7|fmoE>hOwOsf=asWNbUpYYFZoo(H8!l`b zWg^CuRZ~HW25c|uyO7ZmGiEuI#d^6v8Jia^2+Ct)LHi)@jL`pp1}!jdzN4)d)CZ zISgyBWCi?s+2aD8SC}g%iVwZ_0$?u!ASP@CH9iO@D8m{_4o>w3BqcTcnCpNfbER7< zvwpvw?g$SyCY7x&z-)>oU^e40s&)QDA;=pw$5~4&fB+XpW0BhPR_;lR0yXccf3n3^ zq`B(_Hn&hbHKypBY;6*r4bag-(o~Wbe2LHxXNjT(7H*RA6jR&5!S%61Hg(;e`8q<= z6vIO(<|d}BZk%ZG$rA~`u;ht^r&+&Xh{=76q{yEUxn7(YB5%2Q`E1HpHafETvi=d4 z4A_=7`l3BH)%VF{0lrdxOH+g++{^(^j}ma&ZYJy+Gm&j>)}m0XEOtt*%0TS!rw7c* zPAWKSTyL95YY((E_QZZYvmk`sTg20s7$Dcj)lp`s0@$W%OkC@gNAadB1nE~?0ZVj} zckH|Q$VMN%ZZCJ!V`mIq>LcWErU%Q&NE0EmP|3}m>^w^G9=upQ`Ouy!t8B^&4jU(B z8MPHr?PfX7NkLa(Rqv~2PxS~`GRbfFSqf?)ca$fbxuk;QT8E}{HI6=_LNCX+p4~^( z6=tZ~97Ing3~j zI5c)VGvFk)jXN!>pz1Ox9{`m#lnyoqe%eK=!9_&}DZQ^YhCWoRUf}$7JH;J*mz9V@ zsgtERLHx2ZHJQl9XY7T3G;yb@%yySyBs64H+OKy%*EaUnUG_bw7k8c8(jFM=+mv!z zuk!(nXA(XV=-8MoBhuIJ9)Jm4QMCp5+3SATtqhlPZTApv0x&!GOFbUDepsIi1p|j} zt&+EKtV?9AvNVraoyzbdJ>DR3W{ZsBQ>S1&gR&avJhO?yHpK0mg^&>hK6GPOow;e< zj=Dc*y@RRBl9LzzNf9*!XtHTJMx&q>_ow&+hoA30?g$+TSb1RW-@YN?0l)2{2ybWb z06bfhkBfL9aJkA&HT_KR^|F_~KlXjb%4^^B|9pL~CI7zv4v$(Vr6TZGiOB(}SQgk%Oh934$KfQx+gXJw|$si+! zsw;y-T44A@vXS>Y_Pr_i#lVPgUWCpMU78`RU4P94TNAl*Rq_puSGEz!d%;*fA3Ks| zwSFSJ7x}&JGG{h)Ufax8$KF9zXmJgz+6s+x2qVTCSA5AY7y4fhCNUqtaH01dhPYd)0%2;`2z`%Fh+_g@@t0FK~=!QIpXcOn`$+19Fp=$Gf*eA%_Fjv zMi16;5aOuV$iX}EdSUkZLbt7eULp2feD5jq|elkOdy8)qW`BE<HgBl2xoBAqVXQnR~z_fM%sy?Q~#etJcV8579@l`5ksr<;rX{_#3lkG+YO zM((%t4FYQEctfH4ua+>>-Ujk{5HUAQQcd#0(&&2>!r8*4BG{(Wt?gdD+A6E+g=s)f znY`7R^W<5lW;k*A+P;~T9T*fJ7#O_vs(U@Y8bd8fW)drxh4YH~3uM$PRslUR2BUJ^>eJ%i6f}&<6Nu?wav3U+ivijegGA{yNC%x~aZz4bTWu{kH zr$pG8k+~^ON7=i^=e@w&eGa94vu-)dM@l$6hE9>1ni{o=E>8eqN{RVaEbs7`&Q?cw zg?aalzR%eFY;GH@^FG);KFyQW3c2G5jU)YZ zT&A{?rS1KLeC_c~RmMVDP#$krf7N@qrC<7sU4JWE0bWidm?nccCy+J?VXNW89ETu% z$CmfuU=oJvRHYB+`*=8IwPVeiqyqP+v=%F3M<9ZN#_TA)+KXXP^(1zElBp!vEe?-3 zYt2kZ#ISbfZ7y<+seH-BkbkjExENx0Bj06)eaqFzW$VrF=``V8VZ_f`>yeH}M460N zbLC>nKJVnFZ)E*g$J0MeYlZ0n+dvoi?u+XPgG^LyS#lTG(d+raDj48XCz-7HsDJVP zp~fqFSs$hvez!^;VA?{rywjZVF(bHHDm1cfnErr-5~zn%Kh&$5Ug@;Ta9116wx*^i zxePRmo+ekLSvg{zsJj@J+^S`*d?j^LNp^Rx^Z~V|MzmsViB5*hU+Gidf0=QgE@X1r z@x*OimSqsN?~7<4L=Oa)Q|4wDIyCm>$7xboZKt`41s+wR!>@5UtJ=?Nb?T-n89^-X z3$B;=SbjcTugRVahETRz8KP~)l*}slJUw8cZc<+ccc{=Sm}0)xP?OV`iJ=DeSH>(d z>u~z8m*fROEhi7j+-m3ad$o!jYG1WCx1MxBhs7u-fJe0`DJp!JDzTuTu&Ec|`6@p? zoWR=Rt(liJsa{7q*B?{4Zd_V$Sf_$hb_t^4R*=n_F|Cs}kNoBI)Hk6TJHDtddtT$| zS_RXALWf3+J3W@YJU*@6?y#;v9^(|*lPN^J1#Yd}77*FodJeVfT$K|2qp{^)n$p2N zQHCA1v>-6eMKK%bknQQGdK#bow!sZ>*xm#*>(bFoC-a?ZF@yLfUVY%>2o}EG+(0O? zFQnOKXkX4@;hTEBRM(w6Evsxd65Jd9N_X7XD-%-K#sWH27Q%89TBK6+zMM)nU;Ozl zRw1Lq=dh-c%(>e*AglR46(a+V{6B0e;pg&y-}mwV(tpPIEP92pyw225R$O)@C05fi( z@qj9i!i)RVe{OsWax74E;1T}XOCGJoeMzUDMR8bIM3guzgs|nX*)C@zo*~bZZuU@`hShMTat&ZxCoBoam>@mC6dQARqTPC zW@J77sef;sNci;KSfmYQ4!9)bUgxuUQEv&!>0&RaTip}Ps<2f=A5(nq&q9D9S4DZh z(*X;OSH=?UAiA-JuJ%$Ta=Gwo2WZn{cOm z=!nR5>;*ehOVasL>_tvabxy6x79w=>@?Nn@s(uAB5y|Cs-d)__nYf`sMh!Gfdyc+w zWU~W>M(#}L7G<-rYFA)@Z~uEu0=J`1(u1agrJ5RcFvr|>GTmb?6!SW?r2ml=$?87ir!Uw|k*ZD^L?jHL~p3Nj1dA8FeRz3vcE5Gui zjBGu?O6~ZRWmknoeYLHB5<@UVbM4PU+f4!eb5=VzYNG6Kt2QcpfmE>xElAM`QIUA3>7@$ zFZTklSK^~ZeOD6WZyAYaa?E$-dNc5-yG)CC(Q58R^-F~|G?RLx$!W9wcm);cVqNM> zM|wrPjepTdKHX2Ka5;~wsPc9uK6~-9AE;h8oH(4T+ibs+dAYRak}9*!?R8G)b<*}U zkSmS;qG_L?^t?kbaQ0%jL#|=blywIrJ(gWY(@l|^UA5MAjb7*s47`Jj)cYI?ee31& zphd{Rk%WukqtZ)d#DD*N{>6&?LPrHaN9F$+D?3FcdLHDZt3bvY<6tO=ywIa>EbV1t z68k5Wv`M#aBjz`WO>MHM(~w4Ix{N^}Mp58QFo`qPb220X+8(u#PPqi_2(q&0xbG2? zjukTR*_@>s>f@5j%x9f9EVA&3T3nCm{FPD*_0iraZWLCyfri=HVX|EwEQ5`!?C;&S|5_sn9Hx?sAq51t=-)lBC22QafykoJWz#L z-vDr;!5oPIgFqd>{1EvN8Q=UR)ChO>kBtF-@g}W_xO zgU2pODUG{-t(Ryx?*$hBuh`NWn}*+LU?Rt6CB6-?GK_TXjnAJDpaM_!peR!+8`rc8 zGHZfy{wa(dfvMfg6+-;p5@wYR-JYftBiZMn7pV)%xBh`P6_vu+Bl!A08}p0!+cDSQ zt+$Y~o*nH4cn_g5Lznvk8xacHiFqWiz6RchIBX;ZbiTcz9{Yx6LF*{lAK%|kj|{T6 z%WV&*t+I*ZZTrcb#okz8j7rzbhIc(3i1)|KR`hP$g_E z0gpHISo*-;od0$JbLJl)9$1PjgU2)(_&MdX9^><{y;T2>)k;m??cReYb^zBOp}CY4 zE`+%xP~368u6Ul^n45<mto2x^(&M!6hYwyNC++?I=@}%6Ed+`ZTk9^ae6K8` zt7TOYWm|^!+ew|SJIl4xndE!ZI_bG;j>g9lw0D@A*iqgZg06=+))l*YALB31Xhitj zr(C+nbA!;gI?;#gZ7a5Svi!ds*3XE3i3EUk0|f>8z|K9ck>$sZ_~QJ*_!S_kerMRR z^K4BV5xz5+^H`}x;f)r#5A}VxBvkG06+oQNZY3w$yV;uc+u8YC_hfd;3h>UeUTt|n8!|z4|um=c@rSP}S+@?0)k1En`+`E|T!V3W&Z@;cp23|pP zL*<#Oj48_*4WIw%@?Ep^CjPrPO>Ih_b>fGmH(tM@;6F;#6BC>)3SOwg3xce&{8hQ& zLTv?T8@>I2remR*JcH!&vKPUP*ofW=>!S=CKVtpQ9l6?z=A$;MCMn4hCXXX+jTQkt z?^qLQYB__dNf$4c*df!LOI6#~!a???oW(&839GL`QgRlBEK0+rp*V`JAC2VZU9#H> zbmQeVrAa9{s?Y;x;}i0Qtw&cqk2UA6Zp^R;s|q?E$|OVE&XN`iG%pC0{Vf7@+=Z%F zzVuUdpD)3)d?-=3-;nO{ja`^PXNXZc={nyT`_or-;69uabJdLb*eG&GlHb+0Ud<1N zg)_cQ{$}@)%a!iX3HfVSoC&eROm(RdPVp~|rtJx3A;pFThtGD#CD)Y>?Q8Q8E=?Xf zNM&%9Zp~P{K#1%z)L+wb{fcA4@cPmD08@gpMb0pvLzc8(WOuZaW(oPU3wyaj^~d_a zeV>dQYK>L4OFR64w8_Ij9dszqcu)_ygVl45C3AJg_AvSi85#M#Lp-Vym8B&ebncWC zw%E5H(#h9?Em2+7PqEf}2PBN*Q&Q&_!2Rd@6?L41zOU|U1 z%!Bxy@9iLZ;m^sl_e1S=&q}zErsZSt1jAp|l1s$!%s~d(3Od?#u~YGBtg=O+WZ&Pc z(>%z$-TWeuF9EB@LGKSMx0UL0M&J-Yws;r78whGL2^J{=5FI4nDaxS^_kaibjj@r7 zD`8B+!7P%<2J%g^7d07&(uQ_UX8^XrXio;4mC%?1&htM}YyXgv@-p^nCH>)}g?^=< zbrP2e`XQf74*p}@tIDWLD$qXr?AaV@mF6vy=*{O$j7amBdvkKtGHUfr=_9jd7-sU* zjsnec3@W#Pu~s{7+VR=gBuy8d36aR3hD7v z&Z%;dS<~9xY{m8cBDAv2t_}P03hD z;+SHK9m)w@1>!$n;OMx?1=sW>)ou(6A=~d{xmju;rQ}GW`?_cQ-%hO}6jlIWz+WX1 zJ3_O?0RK6SOqyMHk`(M`4KC3Mfar_FGicy3rn%Y}g6>Z;LzZ~7HTjKug)5}WoL={* zzPDnEZTO}E4zL4pmSa)#(uznnkaB)xc)VAyr{zeC-h6=q{ugRw^oQ^5D}kNziLc(^ zHE9uJHO0oXe1N&c5j*E%*%Pi=W`u7#@4kSUaNyX=JfJMS@S71LD#bjm(0b72G9pTQ z(ss9^GZuItjIs5!zSz2`(AQnrzb+lI_4Q%|t9S8iBo>kB8j+Kg+#xf}f7{K{fB*Y4 ze^6RRmGh+BA^LE_;3P)EslSGxG~G(l_$isQ9Jw%Ky$zH({(eN`a+@ z<3sD?K$$ZoZ;0mP=KC06^I3LdC!V>fJ~1t2YfaUdvDmWL$6-O zizc5-os7Y%hqk*mU?2$bwai|^VZ+nJ`M9--%V%0?#(YetWD*(lmSfC*!cpI)QSTPG zT2rYPJ+AeFUj@$U;H~aHL0?~{a{=o&@d3uCFdS2l1$*q<1LM37f5v8UW4|-6Cc;>J za{W|;e?3`MQ=>I6HA65WD$NbM{!}%Khek#k*|yWiNkRbbf_|=)I=4=rF}#uT3)W3W zPa#lHD4pef?u^Nx;WZj?`ty{BFKIMlF_@t{xGFJ-qc?$r&Lqqg&fj7I;pqVdjL1eJ$^GzwWg2>Rcd_aKf z*EWXI19rqlS~1Wp_c-;#E-xPKr4twFW(^wY4^p&oM2SA7`)Pmg#IQ-e-24zBEH>dR zNGh|2u2*L`#fdzZF4mbE2107MeMeweIKo`qOL>^Gl9Z0k8*N+c;$peHib))FX}{9m zM$0eh$*VMK-@q)XFoT`Za8?r$V{Kz)#4xC{J?Ql;_*;>GrHpGjzq9G+%elVmnxe8;yVQN;H=d_ou5uAJShS z#uSHt3;N%zg&emQE{j*`?hy~c7aiVa2;io*+gvuu=D6cn7wHd+z;`!P2LwExyzcDJ zFS>6Q?2kI0oQ+?%FRSa}sQfXJS2TA& z82sV*!g<@KC?NtX{8unlH*Y_9J+~lDQYsEyHp3h7jZiWFeUplZp{ zrRNZN@(s$iX|Fm2@-1B3WgHuEr?w#R!^x7m#iw6Pe{C*{ZaYV+2d~|uP~)P-F2A)H z>D_tlWE80WN!H={j#`77?XrxFc)Ae%&HpM-3?Y~0xjt6R%VMMOyzQ2=uYQ)FqiEM;Kch*a^apQY=i*a$uS?k*Yy|Xce&qbqwZxiK zmC#}Aqq#W3{Zy3UHQb~yN=ZkU;bVFF^eVfEG23g{@IZ+W>GYto9ptY)QddZ|v)FV$ zP?o#N+FUq=^%xIbgiYZStJi z+(Pyw!wh;5&$vO7fI-fa8DJP~+W(liw*VyOG6t^~wFy40vbJU4T&P%ZG-wCQy1N60 z+gkogRPPY11bo3=$-z@vZphWYnV@`_(F8c?%`{boi1cp8&?*_)YYH6G!S z^J>3Bhrf(AeMMYt+~LPA_%={lE>Of99;2Ko*aY$X5cw9B7#&1yhtU+1EsRKFUm$2vtqgF|h* zKW@tXdE-x{lKOf&2un&)frg07c#|f3;kC9*@GX~0=&0BOJihCNAx=pMCh7JtGu*VJ zemS4ow0w_>&w$vBe=Lg4NtzD@P1gzYF2ZQPJb*r(Rq7M#grkcr<;9m-C85VTPuDTW zSjltH(5ij#_oH!%<20(AYVNB^0_RfPX@p`SKZR6W4i!;y|BM8 zQt{x(0#_kb2Vgzc!H*~U`q_1B|HRrK3J2vtuA$emh6gzJH(k+igovKya0a{7#;nBC z2!-=Fvnrj^W)^gt+3TszgE>i&^eaxp*sOK9iL8ZgUmtmBbSJ6YH?zeehw)?;xnyS))`@JP_mZ)J#RjKJsCY z@hN^p>`tSQCACtjV%|lwK%L3v>qL+k-Uz9 zYedP|Wn**TtX)-$d;=x-s8Y)lEfuGJ;BrOdvX(JBORQMoJq1idGnBqCRrm>|t?Fsz z+XI3%^&K?L^=KOSrkf+kye3t*MC(R`9};{|{q9Fhb9Vyf>RWRUx;#kg_cbG5iC7JT zLjP%@wUu;cqo9+yIy zH%;8OWQjUYRI|%YOFUbfcsBej${H%9*JUz89eaqm^hUFGZ^Ltgv>w~5y=d`7nw5g54=XnM9=KY>|b^tRVi=SH?9>jG~%zKM| zj0pt&dr^)rnXNAtG}k%AA@=#J{-RKL=QCIiMwX!CnmV(PwZRuo+7k(lJsUEr4kkt$ zPo|F>J-zjG|H(Ue@{2UzF{jVx^&P4Jgo#-@3vl4ctbC83-^w)Mj@Z+EyiUz!rDOTX zgtyS~oBu7nnfu$Jlzudh2(wp03P0YNUdKLM`)k_#Ev6N>QIxAenv_ z4cw40s7adH?`klG>I-Vq?)skh7b$ca7JDZW&Hj|8>nvCQsQt)TO1Mk4=#4nm+3WK%X=5JH z+n;Iv-@Yr6q6sDGQ$AR}Xd|PWKT8e>1UgC3MPfc{R~qb1XB|8Xs}b6dEPs5s6Y7Ww z%0Q>)1^tNp^TKBveS=A#)cyCsVY>ehlWcRMRZ_DWNm4g+YrE3ek~2m?s=XwBEXO?P zo=+in$k5Qc7?u zTAcf`&)&bY&-dMN|6(u($zUYweV(=EoNG?P=JXovUcd76*0mw1M-m*qRn9n!L?*fn z3mLW_$N-D*&b*-K^pateJl0_bE{3xYnXA25PyLgYC2^-pTEDgcCvBVlX}rE;drqp} zReOj_ko@-~BX#27!qgASQmmY8`OSUXSajcveJ9t5`ZPCu!ZZnDB1vGNaBDN)6C9upT<;&0@0{EODO6fks_zLgb&sE0V$MAAi*~@IxZf?^+u}<$Xp!Xf-d1e z*J^po_CZxwv{a{xM6ye<{4gkD>V>Oy0EtJs=Q`v=JjdUWAm8Xe|MxhXLY=%M(Bnd{ zl-rIFZa!#}`_a>IXtp+~ZG^Q=L_}?Vn8pFy)FlEUk~oxy!s)t?I_HNJ!|B$_kx8~? z(Rbd3%#wEfz$Q*U+!1r24EIm2H-;kKCjb@4bo_g-ei^WuCVE0W*RP1&*~+1hT8IuQo^vkH=~?$OSJ7eJ$1( zz90S6&tUqq3{qn|PfxA)i`uoLQ|9tz4Rp8tKr}k@$)9g+V&;e;@3&~aGk26^iN196 z_2TZ7fojJ6DINGJHak}*DXg!MUz37VE2o1%A&Rl`)qr7{erkI;bl=(Kn9dNRk|~Ry zmY*+VhJ$G=?CXW@*lViY^0gJ;F4?$r3?~mMs3`;oUOsA4ZE`1QS3hHlN&52Lr-&sC z6MW}2AYNgwu0DnivoihF397=nF!m_~?`pStc>CdIdh~f@sy^5-{#Vxr@bk})Zzk-0 zi{`!b;ghAqJ<#|PF?ANTuRLO#xJjg;s&fL7;t&cK@TjGq&Z2n{_mVTpj>0}a$-NIY zK<~y9dP;5UW8 zyUjt#1f(9FK^W{-SZ4K$?)rcWg9rsa5f}3t6Cj9eZHVDy_TzEP0*>paKSd8cai+VD zvj-Z(+2gkIux2MON-bW}sI%H5vd6df8%SnSu4aG zOW)n^!``Fr)fg=@sEZW7X$OeUl+kZERZyy|>1S1V{_Cfd}_qJ>!@a9`|UFw5B&M;H}sN(8k;K&DWnzd~L(@|X_QcC|$ z^Y*XTKsyOC#vkR;BI!O6c4*EU3uT-Xp)$3vz`n{$GRF6z3fqg(j-eYM%sYPME8qki z==45m0h``k%jImp@fLRvV{e+s9h^Drb@ep`QS;t%OPcI`YAQq1eRhk!!#8A)YvmHq zm%*w6>yB$>?QbNr6sy+xeKKy=a>rGplvJ^jF~b`@6&XittyB;d`9ao(p_G0>nyc&l z%O6^=k9N$bNq_^54@3+`w)FI-y-w{RR;)PhwfB&fIgJJr^Emm5p8aAZKsDx@t1^FDbEJk2|)HBl8pU~vJ6yS#%Q`~E$v85bPB2zXS-_uNRwG-h@nol#&*wrL#>Eo*A;#u1+oxW{-i|w?7 z^KCGRQd!%H`^H}VW(1ea6rCnM^+O4^Afzs^#EHq?pmTXLI|k9NV>>s#b#4G2l(vAm z8T7yfwT6u)uFln&Lo*bxefbnstWg+U_5rmR=SFvVbvai^l1)&lPl8@)*5d=PGcWF% z4e#Ymk4cOOl!GZ{_gjCLWvXUut=>Ijv#s2V)N(7MdDdq69h`tON$DRgxE}P) z-6A7HoH74ffAy}BCh6$$nXv43t?+o%wKDSTfN@I{`ZnUipH(O%k z(zrG?A9$rwTwRUo*|DwLiJ2{9Tpr0N2*3Wd_b+lJ6peInS#&>zkes>g$hQl%wj`ups79`{oO<&Ps%4gyPu6vIiw274 zrFHAwXeAmYPo1?{jDj8Tf;tS{XJwk)_qGv%c+cl;5|)ixy#yt}JI_>ctiI2M1*7o5 zfSIvyed+bDl`cxiOJ#N*Q?S=a49*mxI+)4bx67?hR?)RH&tCvdo?dBxC}PaL@BE#! z7EV9+3*(X)uaBu0Rzihfa@G8pIvtTsdd?Su1Y9c@Bm3EpU50if(iU(~_K_DRm(;#` zq@;0MVTrtg$6XjZRNDZ!8n>0XPzWWMOhi|DYiTY>BWgoi*hxcLZax!?2_Ef!J`G?K z7u62SPcdM%^j>VECQ&U)(}zYuh<5 z{Aj`#JkLlAlEBeDI9~S*`5oq_@a>nJ(-mFRU4hQ*2a%<-pNDn2xG@mz^`0ZB{}p`q zt|0KItXVDR{Z8dZy06|{`Be`Pj8r`=sCGtbbT+aFec6%}^E-uVl!&nQ9~ zr6ValH|)9G<`Np-zc&>F*rI5Wed$?`JKs~UG-B<8?MdCEWsfBS8s)c%?lUF~jYr#6 z@L{n{;uVjjH@8nd>ZsA+n?l>y=g7@RepFdQ2z#R9z4sx06~t5ex{cc9qDN{`v9>lB zM?}s;x&^wt4nu)M#bu2~02=VpLF`W3;KROPqTf>ILoxP%mw1SSG={)FM1#VzlIQVXj7FLWuvfE?&L=AR2CKrTAhPfytD>1$Gm9HZT=kGZo}{-c?GBl75%h~{ zwlbyrx$)XJF)Ert`0-Vl&rf00xs%x%;HVTH{)LDIBY=wFz1oc-F~{NT#bIa$>cU@) z?RaB2f`OoB^3n0T4}~M;%Tr$V&)+a{rX5V*FWt7BA8h;-ZNVx!mifCsEA0B4#HxoP zWJONoqw1ep6(0gotBO-{a=SJNk2OM70j96X=y1o@(-2fp8-IR*1%kcH-Hl^l#__pd zwJ6WFamNfm)YIQ{TEMF6<$Pt;PQ_v><8lDNd`BrdvwFYb1?N4Pv>l}q*M_rg%~rR&Bi6u?e>ZSQa&z2Y@QmDU@_ zY!_n&=}=rA7EqxQlZ++~DlM9OE8MP{_SYR{Wn*jB(ZU3d?VAh?2{=*RYxT2q?>#UO zgk`?4oJ?OY>iHj3AWiTC8#t}7U1CRa?sd?N@P3QF$B4~t{;L(Z)6tOH+>$drHnX^{SN|RydcF4Y;(QhNM7RI5_;BNfF|EFRW#aYq@$k`D#9j2| z0%$`2z{&9AXkNgz@9*dYfJ}XS+V>Q|fL}=-^iX$KvCOQ5Qjs>ONdNNQzb#-M#HR#x7285c>;PamsbX(9#CZd=XMYyVyR{I(9cHw{eR*|yRo!F&CAE2Tv< zZcZM|#;m)`;XkK)d%0m)yM*wpcebco_&6L<#l@o5#I!~DnG8a%2?*ojCX1^rej9o? z;Y&xodoo)Fl?`_b;+u@mHY3{%^q2$n%hL2X3o{BO7OI|ho*|6wM6(J@(n@zHm7G- zz=aN}ZnY8+p5ioPw%g))45UOXeUN^431m^{IaO;_yj}d@FDWtgE zc9W}-G$d)>0myjz%HtXOOOm<2y0P;OArk(4h_-n*FOpl?HlUt;m&E=>%oyU(-uLr+ z#CzmbQvnQ3V6pjk?2uv8rc~Sm5+bk5gd$+yc|gy51Yb7bltREd)|)BV`juDkGx_s) z2Gx>SyXP;A$GK3YxAWMsWtWnH-T@L2S4`r8BvS7D{u&z%+pm0Et97%M`lJ;%XGixL z0tfAKVsDk2DxW^9vssj_+iOp=E5Zd&ZT@bg@D;S1iX`Ro9P57XyKV}x@o}Vdy5<~W z-Wm0dV6(W|LsKZ`E4665&EWQ)VoiP}NN`&8VETP&vV8ds8kTED1fTs}!*J)qtP7C0 z&Qv9D9JcaGh4zpd&;)_TTJ>=~(zoi_keb)Ttf>rma)g!yRG;l_!%_xYKk1yMuf}3F^G&7c1yGV87`xH-i(h3^ z)`CdoYyYaK{bv>ScL=F{AG)R?jjEHF%SJTg<+s)tO zw7i4*dEDmBjeZ0bI+jj zPF8_$9&}0mo`vU6ha?tj(NDs(*Z+RT$i}BOh;9;aaTsRZTd(UVwmc+(=TO0`awwOZf-GcEV?!Mbb zSnQ_?Uc%Y6=iD}_DNh6?1ys^`6w-aRM?b`~-0gi?HJ=$V3_RmBYm37b`)F)D((i1K zG_Z0X5l!uU${g`qLB{?t@0+Nb?rO?)A+|I7u_H>DBI*7T#?qHQ$i>aU9|14I(URq2EMF#=AZAM+NKzria3?fuU2sbR$RVQ5WNA@A_9c zl{~(eZ#{o?T$QGrw6p=IH%_7U=D$vFFED@$?_g(X|G+PN9zby8q#awY-sEA!z;EPJ z!eb9B5)}R0*KL}V*-$k!pmIJ#_?9kxTBT1FrtU5q@7<0TF9_11+b6R1ya-P`8tu(- z^IqnSOP^JDWc?oFM=a3r=hMfH#ksmy?9LtSOYs(s-;b7VNct8OjG1QZvyLn4Fe;@B z*#?HS)E(O3Y>(P0-Df1IzYz=)9#Nyd10>Fz%sG$zE*kwuDFRp9_(UQ+G%H3@)K)>p zRH0=TFEMMxDw&k+g1DG_UOah+=Z7fYku&Tc6E(qutz4pwt(@?sL@|On;krLB-HqXDDxA zK-sEy-A0u2Nh@r?syg)5%|xYAvP*BzxTU%FxQQRlLj|~y%R03>8-MarbJF9e zstbwaMm;LN5h<~cPF**H{L2%2<4FQkZtiDS(A>d12H$<$T@1~x}h>PxCdUX6vCMztQlNkkr zI~F?Elap&u8txi>+b5NFRi^zeYgeKKIALCAhEIc*fC^o5Ez#Q zmhjlgodE#w=;u!1ST86%nL0i-TyFQDHcF#r;k7_=gSgp%XJdq= z^2AKMswf@vSS9lNdaQ-q&5mC(ueYV0cJOHv&}wXUHEtLd9`EaB9ZYp7J-%Lk2x_cT z_q}^HNnB=B{;wTv-uo3m+vqf^E&Rd<0d9(g1JH>OI_JZ!P{b#DFF_i^k9g*lxm}OJ{NSgZuKQ-7~9&jvsV3+HL{-HS0?xJ_XrioIX z3NS7&o(qdTtQk)ROb0>VC7-a;aWe-*6}sb@VkRQ^G<((3_zdgI4ZlLQ-Sm$Kw8(jZ zSopWuJNrH-Yh(7!ix!>5V2dRb=eA>3F24gP$`xKTr)qmD`%JWCJgg}f#-`tw?!C`c zdb($c4wXI$`ZlAMBQNp6oGp?XopVG!{Ne&gibd#2QTPCwGXvUIK>Iw2tQd-{J%r^% zbjgRrl7SfFtP32trijbNiCLt`F$8yG`*vl*3iAfba2qQk@oDV+%DC5KqBxC9qQ4|o z+L*twEr@!UF{&m85h@x``_M7XMZ>{Wg28nzpWIc^#r{Y|y@4%~!8ejFYZyHZ5pO3t z!4&WEvghPW%y~2p>gec*XReE+N-;+?{#QipvQ0tS0~*#hPXt2+o}0PIy-1_A(Fh4&Cz^8ZzC#T-d%}GgxPoslcT`E;aoI{@ zuB^B%mB-mo893QKAy>A5_UuR3Z&VKMGH$UmR735#Qx!09nDc-BzyN({JkC}P80PR} z$k|8Ag;^}%kZB&Cuc}1~7g(ixs@F5zah9}=IlH-4(w(gT;1=ynbdA0_*?md$`gnp*nLLW90ld_mlST3Lf@Buc%SJhqG3| zT-vI!Wwr6OxI9UfQaA;ZYkj92b;@sqv27=jQQ}5F(ifa$z)b3t-V7)@w2XY$zJFtX z?4BraQUT@_S1KrUFUFfa4*Y$m=6l;TCYa*m=He?!9R=U!l^4_YZ0!Fk{3>?&%XH;8 zNR*DiVYh}kA{PCPTi$-S^D*|c);wIg%n;bXg4vd}F74@ z5qwc>abR+@hRXS!7B5TesV` z^U}AtIbz2qQ<^((jtQxmgV^SiX}{x?d9qd4u-NS3GA~IWm`LSKvH2talQSBx#)AwP zo6znY0UafCk$t7C{q=4&2cVF6XdX^r2AKQO+eGm@B+S$*jiRlNV2#hd@>t3Ie}5D8&u>5dJ-vNT~s@*Wp`u5E9APR&^Ju@l%v~m zz%dCM;={Z6ROz7<9r(p39GvYElEzuH)UA9w3qIgjI@tWITYmhqyyAWwSoce!89Ce7cmDE_ z=v4($FdXY9vI7k#tOY@+3!}&w5UaK|D~w0jfRlauM-KseY9!E%7SDgTu<@5dPV5b; zYrq?J;>h0$!5F7RJs@kb)UVVB~``Nda|na zWOG&{!+y)xD2hkeYTr&|+Oj|8vvqbhl7U6dW<-7rUgO-Md~~cep;Ou(J^8%pJ#Nr9 zl}9smd-DoqPgc(@JI@u%L8M!xP5Ne3z~W8d-1{G8In`E!V;gzJ#Wj1P9)?Yd8Ekc~ z0m1p(p0E7(DeAo~69f580@6ji6gJq?@??s&suVpQ8x{`76cb{2_~xgV45Kd3fAo6& z^Yz@H(P1*cb!~=!ADKsh_W@t^)bd?u-5{zbB4)BMTk+@HDqR5yAGn}LC2E5R5nt}Z zssyvx;u9CqEK`1(92`N+V#BI(M%7kt&KwZk?Oq8ylbZEfDNKP+aAMmOtCU#iCK~rv z-+X={7eu#YN^0X&x*cTje$+(l#SLVs!kRuX5ig>Yqm2xVtVx*n{ zMWf9FL0DsMzQud<8D+VzCH5IQ?VZ=GOzh$3e|W|grnh?kc8BtUgq#7oYRO*AB1rQ3 zGoHM(a<)sH)W^ZWLAOU;F`}39%Q_j%0CT7&&W=SWHEJU?Vuq4Dk&@+UMtl&R6||t0 z-A*vOQX%$T;c{N#EAAzC^hJb1Vl4KgwS%84J{m>O!uLPywfo{Io0*#yc$~`TlVi6B z=z2uw*hU?WspUdaolcy-;PhilU3Q+0B0nl^f-2W*xYKz}BPrK&W^3H-D)teCMV?T< z^qv^1gYC^AabLimU4FjK-td{$wrtq<7j+|p_&Z=<+I}XmRN6!10jf(zZf*H;#0rU5 zrCAl9oP2p|@g_)!6@p%w+8jb+rr_R|;*Z+oHZDoy>v_5K%VL)y!-pnEE{1(FE!*+P zwtG!Bj{~!ioxs1*@&fgLgN|C*>Bgsj^=ZIEDQ+kSh(Aroxl?vT(p;IwybERM$N>(0 zdKlxt_(qyqIV_4qrphl{UAS6RMImXTXYiC%JK$Lf@@pl&KV>net6!!Z_0s@zU;3Pc z%YW4R|Is0xN&{szO(-t50LH@&=)9&Fjp|&x*1&;*9@c%pe{Qx0V>hHzf|bKGbsLWk zNyHG2Br1Wr}>LdBbqPEg`xhJ@DMr1o@8`z1F zN!cjw>waq}#t%8DPn?+EG^~{7;o7VCOJ3_aJwjCWKcC$5B2lP$y z1BIBORz|E%!`=XK%Iv7lgzqV^KSs+us>S7d9QB%Nv<3!(p7%wPiU+= zfg6}zo!>cYHaCN7PfIr1d?R2DI)yMci2lF|dTSagDE>9K-A=TOIECu%_Zj)BRDBb{ zZ9K3FgWEBA<4c>do-tKHpPA>Vs0y1VyCE3&k`Hc7HKHn3vt#lSi!fp!9D8agtmqwt zb+6qd89%y;td38^3vDa{x`Wn!a?s9BPy8NXpUon^-1W|pd$ zi&WBzxR}=hnWBW%vx?#_ZNVQ%3#982|ZaOm+=AR+Qd!p(F?Cf?@0j)uGHd_)b{vT z%KKjlJ?d{`i+_JPmK2#}x!ln?{dF|hZ&yFz_U3Hj9#vFOvV zXmqFwSdE#KxJs{b{c)4qpx$D7>Ym-x#FmTi(`FPGd?M9};F%O$axUXy>6z@#SNyoy zSUlD<@-bZMKSF-xN!OP%BaGKxb{PSBa0@_9;DVBo_kD-Cq;FRl4CYhkj|5>$S_WZZ zj>c|Q%+AgNVtRp#vw26uTwU}!SwEg^npK5Ox=}UK#hIYj3Y3C#ooF}oO5Os&YmDxSwk>?u-mEDULaH_?RO^=n|BC3&87K z7JUhsfTwhQ(&8z_`x1N|Nas@!43Nh`g7Gj{5?p$QsD?nt%hweY^R(BXY?|N6&m_P3tGuyuAr%?GH7TLrkAOG`UrfZ-pCrT4zi6fyD<`^WL~*I%kgIQoOrL_GM| zOU*HE{cYIV_ojz)eJbX)E$|lsb5i%yD(<|_VASaa(QuT(yTn@PA4HX0kck^NR{VsP zNV&}SkK#^~OXlK|{_}Q!cQdg9Kykwbw2a2UujwA_B)Dp4^8caFGZ;(GA*d(hZkCw#g`jHLCZ6 zgO>`d5?|B;^{RvKjAlcH6a?cN@0$6L-1e2=$g;U4vJ@c?htHNP8|yroUx#m5*qIwa zBxWF@`1`?r|yAnzt2neH`7Y1cGiSt0ZVuB_yjhp^t8W zjpYWH08U`iiR;1@Gf=2d0c1=A_{E?uXVFoB^}X6$TL5{nT-xEFC~{{g=4&`P`p%`Q zJV#}z+}68VKua{e3)JhTNq(2gey7`$s^wd~Pd4}?ya-UdKZz8-DVS=LLflH@4wK63E&VC=9no7qdqs;^iN&7+ z4ywNYIuql<00xO3U%C9fhNL*eK=Fa5R#L9D>szpK=>b+a0Bq#zOQ(J4HmX`qpI1sD z_+~m@wN`9#ST$K?Y5`-_htjpHz*)p@xHN>^?KlP#Q?=eOGcqPzU=k~Cs8U+QEV^s9 zgQ1F)7^Ml%lp3>M$SOxnuA$p0S_ZnxCIbT*8TKg}U6(4H&rorSH#U-7}Bd=Ff7I&%`{D(KKyEI{>1lgeWg=L2}; zj{#Kv)?#!0IK-j;Nz;p1d=RO6sDQO^S*Cx5O!n{;jFVzwtKJEb9J#hxdxkC{oMMfo zJBL0I84&xNln}}-viZ_*rRqY4stN`sR4rUz;PyogkW128zynQ8oHsG&dDAR=ci}kl z>9TXQ3?^NOiUeJLoF2){JPtdsCg&QrEZ~_^Nl4m5mX*L|D?TLTLaj#oBN&sUiR&^H zmx}%7^3=_py4!tsW;9Ldd^TT&+(R_e8;Hvy!xkb)7TAqg!54Pib@rRPE?Gtia!@z^ z6n3VVuvh)@m?^0#%}mzIB2uQP)(_6##^pFCSP|M%N+q~=E(bVY3!kw`9tB|#qAoTv zJZOJ}Fcpo}=#qew< zyoB2w>>(vcLne`?n#7%j@w+bmiV2xG3jl3K7rnXktdM1Cw%?n?pXaJ3OV7#&O{on9EdDY% zf^oB##BgqM-$1M4wR~e``_YSPLI|s1%D93|CENxcX<1@?(ezzdSn@D(Cc*{f_Q5~x zO6`tj@}q)Yt!k);NvEt=^y2JM;BkYz`E25fpz7QdMMR$&xlob8zaW%-o8NH>%qkVA zP&$1zP)0BbsH-dy=xv`$tL=o!V}oGFVS$JWHvKx+HzuGE$?)&&P!rG_4SG)gYx>?N zYcuA1AqA(a3`aFC;@8-6gLzTM_<~)Tz)s}8LEaEWWrWy7rBs?Am-f1>Fy8=i9Tb#Y zjzWw@rC&0}Y77P{7Xbl4Z$2hdJG%|j0wLvvXw#{e1ChbKgANBmrI?&vnM>fF9Hhq%u3 z=NQX~l35oDT{gCyV1mH$G+01FQj<$JHx!`0lc1rMWJP-pv}yWFfKs>KQaUBr0G)|= z4AiYu|Dpu)0*@%l$2wPeDkji<(=wgtpn5j9Cj+3oA^9&u3}{3vVO7?}|Fu5oi~t1?=YigAwY4}SX**K2;{j|?v7^3jO14~c6N;2A2D0+D5O=xvfMM_fu}Eo-Af3jwVN9y zKZzV6z>lk#Zl!|uCW_*Vr9$Y0SXZglxSuRqVzvwZig+m4g=<`bP5N@>rKjs_dQQ0^ zexHEu_cPA7oVY!+jX|pB(3My81yK~sv0b>^v353K-Ds6dg~(O(B)}>2~g-VB{^me_oWixNV{a+$^PEWeWHfwx5VXS&}iZIr3`%&$U zzJbOFi_RDgihz>5xYfB@#~p;tXindBSZ>2dB*Cm6Sj}PDe_lRF2jQ6asDbO!?4ok& z0rUXw*zN)~iJ)%E3q zQ)3{g(_*H=BzgcHg8KkeWHX+Bf2GRp2cFB(>Y#_CYYlU;JX#%V@&$XrER2iIaeAS3pTh6dy;WgX}8 zTU6r+u2!soLafNB3OQ8KK0Ty*OXi8OT@*ss%sKlHu%r2FAA^c_0f4VPZooBr^DfQJ z)o+i>%EeZF{;k3wo$PMOxL~~I4=a+oZ9a|T5jet~F1=!QajWDmo$OVqVA|tku00^- z0kkaXng`I3$1(8gO)!YumgFbv%vfH+jTUkIs=X_C3L{-sSdZ)Qx@g?&Z=AOaIo zXGP7(Hu;s#xw=cwykttQJjZ=A_0NFtKmGfUzmXBQ#{rkBVUyamd7N)dPyODErr0qt z8)<06dahQ0Ke3ZV&*kfjoxoZ&R6gL}u>D3pZ_G-naWY+Lp1J8Z!T;#}F}Ln=#mVvM ze>Sl0mudl>A<+NLbxhipR-;=#L-1aZjlGl`nkg?fINC_(GTCMpyjd~MwKhy~s2?*B z;Q-^%b73QL9(xw&`tHy>FFuyDr5%O+Sv1f?i0wX=npdR<`G`qUnvmM!bFq0$l~*BO zkFPdb7vc9tyot=n$nRax{~D=t~f)hD$4Pkz#ceGt3; zm7|=@w+NlB$jF)>;CWp|j$9vS)<7-AU-jh*&N}}VuBOe=6{hQ+jC@Hk9Md$XrJg&A z#3LE@*u|WUnRQeY)GPTc_ZpZ;g+EiHpHVkFPc=F@qDA`SXkpAifDeDY2(8eh~V@=FE9S_PFBKj~ChVfjozO}OaGELBxg z`)_mTr6riiT@Bd`%5}mFN^p0a%ZOa|K|e?P#IR)@xg5_-Ag(Ad_aI$Sqkw=#QSQd4 z{{5ulT&*p`U<$9xu}+OOGy=!gt3qLRU4HiZB>~G>NgCIUNlTL&OMO#q1%B_sw#VNm>W#A+D zXiY7_Pn!ABVnQ-Xx7r%J7A>P@*GyM?IOxE?UYgx=ANRDjHrM*3XHbNQWLxXV)6VKy z)-wL2+pixT(G%9vN>k6&!J}qTC^=e{x>kmt^wJVxfg1VBPC)M0ExyH+e6%ckP?$V< z6@XtwzuGgQ0X2{pjiXAbJma<)_!rA^PGV~31$Lx}(_qy6Pr7AK(Rx@(k*=@#{7aRi z?`Tn2L>VIp5n`oNSItl0V9`b zQeg#<%i?4^&?xy|4fq%<~h${xl#!WQ@V{WP&d@1-H;bLn~b1S4}JL2^nj{kuCh`W)rW4++#->2;T zYh?eb#|N8Bs2-_zlz;#RkbseR{x~UJ%k0>rWy=dtHcVjfZ(0-9ZI#8yPG(DV)8cJR z&@It)W^rsiXp#~WJZ-qiEt2(&QYqG6LtKAnF<*%zic3t}KvWX?rDj7-xM|tAkJhi} zY87AfgP3h-xhxB!DHCY(Rkh-tK7C5blUT?Qr1bFp^vRzZ_iYR|JkBYJji-dwm&CvE}FO}1U zZZ|(NX@+LTDGd(ovWewxdQ}~ zQk@7UaGVf96T zLItv|2x})xz4=9q^#o0s-vmS0Qxg;E`eAG?8(3{Y@fY^DpCA{*f_SQYV&>3?xIS{} zLK~z(#;6ju5=`}%&Og2h4p0>3JT#>3qo7@vZ%>Pe)g_Hkt;ibwWonerL2|X?sV>I= zeScW)dAuI|)@NLWADJrXLaSM>1l-v$UavH>)Uw>_ga41l5s+yq-A@>^?{f?I$3HzK z#aG`=VWncK!y+@jUxR-C>5n&z<6J4iT&&Hk#G7t;b@2ef#@A_dmmeuUVO1rckw6~Z%r_(?2B30&RR9|HRWkMMCxg+uTTS6a5Vl*1?TE-xJP3V*Xb0Zsz|Fz`nPu}bQ`SmP^h*3UgK-V z$1=9OWcIedrIt`&$6+g`i(+y0gs`bHLw%d25*xv^fEI#2=Y#=Rd_05qE}!z;dz6BD zS==1GN(!rtA`N-dIF6#Ml8bqbZuJHt$gzE#icvaeqO@cTzL|BYglxA{XruRTOm!Og zqw#obF?SS@mYSJFE2qHez%Ra_9A5D##@MkrioS)FYxvrN0QZS{=n^^|*PHR${=EO`9mZh{pM0Qn-EQFHS%T?y^PJX!yt{%vx8JYpLgw@I87F1vw6~{6YeqBtk8G3VrKq?9eOW4m_U@-39(R7HMKx4_i7P_qRWJ zZp9P!?%^jnbDiuyNuBRckanEfUEi)V9nP$|Gf3rx(e_GmYB{OapVK=_iORrSi0nWO z&JHd^DX*GHL&fTDH-mrmwJ6YcQS7ytvY*@BW{sOyDr8d$)Ak$*etxEeGPv|RUqNdG zSf5nmD0odQ)}lkqP9a0L5aOEZ|My=1=T!apX!|*U7?ZbB%hKo6*46RF)NR_{AB*c@ z;_q{?6b7x=^R=o1ARf0ub%N<{{8V=$zV%!ymz5v*wfw77IK5BAEX~+4a|*?7$%VsUV5p9>|B$u`E3=ajeS>-&_@2~XSbT3CSq8d4c3+;aQ#;SpS#clYTKurc-d6=ruy{` zl(86)8XIXTL!`DP-JW`>@biIg^a4PZYr8@ zc!?ZCLAg7@XBEQ%M%xGpEpt18!wcQwjY6O^I1V_Z-A<@9Y13;a91)|pbsNe>G{|F` zjXHiAdGGh8M=frv09TS;=oc;l<0{Bl_C(S(D%J=Jt}42)dJa-bc3)RZdq^Hx*%g+x zx%QmjA(d6Psv9ZcwFq{J6!S2Nb71z@4+^;#y8|x- z-q3UQT$^FJT~;VK*jj$!3UTWc4blUeym8n(OH_(&pfqR6tNwpbfCLb5)>`PCtBZ(d<`#-8PL;$- zxxeS=J(yym?&dlrYLs!F>xxH>Ba7+4)rmZBKO4%}*jGBFl|33W5w@c%psa3>TO>;& z9PH~$^o)S@XVbBmyJ@AznI$2Ds0yre-$wDIAmODZdNwB?vcD4(p_-_!uoE+7Kih3_M+Dq#$ zV3W*#PF=yl;yAFoiQYRnvc*zO@c2tW9N?0b00et3tLoEXY7-$KcPWLAt1yOcU^ZAk z)@zj*ZFEbyc1yw;tJffGJdN6_5@-&Hi`17@t5ThsVj|UbtVkd*?eIjzf~roZ^o3hG z7I5lX0g+KF{BgIKyr1-N5Gw}g?08trBUgvU8Q4IWK+dLQk(h#@f}>KVuev47sd2K= zezJ8-5*#H6GJ@qu4Ofl>+|gsE9CKfjOD&#|7ID6MAr_l87#6+$Fn1EYh|_sLSqwG9 z?P*1Q5-N%lx5k%&5Xc^uUAOMC1AH*p5gL3MNGK;f0m?}(J5ke^m>}+MxbeXPt0~q> zG=X$jXe>l+qN6h!om30CErVah=yrl*ti zz=GFbxYGuP5Y6)h-xTZBZzRHBNZM#@Z3AQYjQgSWo+FkGv8&uDjmtxF-ye^d61#@X zhD3%`J_HCTNk;Qs%5T!F;5u)1a_qjA!!f2FR%Za29h>;^Whm2~NWEn6V&}_Ofz#jP zP=Fh=$5%0#xtfEoXW_Z8%bWkJMEZYebpO*uTu;iYD8xv6OnA%D(!3(xc~dpDybOu? z;&_Ta;rM7KJT2{s#>oeT1FB}maXRSLDEUK5f6Dpih2C%gIA}UgE@7GZDE6=Fp5eKl z{Z-7;T9OnbKh=*;6RjbVuiP}b>}+Rk=lSCF$iCO;Ae}Q*jM#gZ_}@(B;f?ZCQ7V<# zgk=N+OY&`QgpH^YsXV!)v*>5UFc$TnQ<1t67MkwSta>%-RlHm~y~ek+kAvyrMssW9 zO#AB^aAP9aS(tG2ZF!CgW%Yxf&nwp(4r>w^y3iqjEs(z1U>z77&cKq`FeM9SJ=>m@ z@^U@cm$$C*6>(23(ZPpE1L>YpQCRcpj?B_^n*81iI-3*pnr(i20ygtBD&hXgG`5Mq z&MwOPEo9uGsh1<>3jNkaj_a_c$cg}VZBuxkB#nLY>}yrZC7B`N%+S>&59joTC<4-`Ef~-PLz&UE5xC zV+(spPXAEyS1yh&?4dg7k<>VD(MVWuev+SUZ$3WQY`#Rbo$|a_f{KsPY&k4{0*uj! z&PgPQh&Yk34%(H^#%=|KG33xg{B~<@U1zYgx+Rv-U_Nq8?&%Kq=Ze`nPDJuug2H;t zMN)eDI-)oPO^DiOPH4ozT-z>1tp;qCs~#^$9LKP?GbJGuC0+aFm1osGyy1YAJG9aj zDm=#0tpV8do9LPiCEi&by!zv$dt_y_@U8XCk0gPTVaM|x*p~shIT>Kr zuabc;gF%$#E#z_MVx2wQ=d3zoayi30SDD2bXqx;jzJ=+TZ^F ze*|pk|A}UrVc>Mbvav$m`%~sBLLt)HB;^00?5%^^Zo6*b1Z{()Ed-}Pi@Up|6!+rp z?o!;LxVyVM!HPqRyA>FMZ~G^PV~9x$l|t2a_2{<~JGkzOJ>`UVH7zr^Q=| zyfQ=CgK7twhB)oV_l#CB5#T@>+{0o$U4# zUz~%k0p0ZD+~Yq`WL=q@Aw;%os{j4T+1mekAYqRC5KXS0r{E8!pcb;*&7zsjO?(x{ zCog2fjRO6&GaWwOkgYSOn%J@XX;3w-eL3m;x2Wg$Zwc{}ue%Q4@$6h|fc+k8k+GNN zTGdKlwb_Wbsd9`C29C@RUFjTx76o662`oHgSmFOQCcq?^H~^_2mjzOR=(Xe?qMTjB5{2) z)|k@{?c|7r#3sdClkBQsz6PUZAOehJI7C;(Z1YV*aa@OAveS9&c6y;h4TRlbrNZFb zRW93kv!-RjtW^#H4)?!%+m=Oz0E3$kgu};TxXf*!WWPwfbHx#{{V{>Bfre#I?gPZG zNP|hBdcQanLPIcMFzZh(ig!8*A`T3dx^Rk%6f1VpIt`Vb|Hcr*=r;GA4}-54kaj)A zA^TJ`r=Pni5{u54;T$JNFv@*Sz256=Ahn0e!5R$1U_edaw4lkd3jk3*_cM$^1tj1KL!@5@evv$37LvND=>`QDPVqsDn%Lc?UWm>JL_MJvgy@n;+RUqb&qy8ni} z8xUeodbQWbqcGd=3x8c2T8SaEHQtY=Tqz?hpkB&#ln%=x zm8xHNmuogjR=!_#fRnU5c!06JHC=hk%fXfsVya{Jr65IB4^mPo+Le{Z>DI4GHJJFj zB#N(^WZ!DSbj!YdQPr3&uLtc4EcHJFhYBzDixdO@yrWW4f&8xM#{wk)8xL2OD5Ih< z>Q25O*BNfnF2d?LpyuIO76*0``jK(a9RY-w<6MxAZ%lTKTB&!`g{EeG#f-+bEw0ngQjSFl?-l-->8dPIhzc& zy5t7 zSM-ae)d_gxfQqMGZyE(3#-)$cvHyk)L_k79f))aa)+566{d`9qrT~ck1o#8N4qk|K z2w+w*B1EDzL=WR=XZE{>2eUbraoz)rcYWBMv9|zi%glU$tQCd!;lPD|NhN+|;7OI81$M39v1K_$t&KwI zCG%a?^2$xP71JYdol6&*!OB_KS%aBn_Y=V?^GIN&g=o<6R?eVX-;=<7_#Bn3>hpWW zCFW0#8jtkmx_2&%id}68)qZrpK+XU6%SiwYJQ#mVPN*>tG`usGd5-szp%V`7`cdtR zxj6EpX{Xg=8{gZl?u4y}$$)qjd^`HBaL!efRi;G9$`Te+C_C4tkqc)8&Yy2Bfpzw6jmYser5jl+zd(?Upv@=ujyjAK^=E9?fDkEvN(7EiFEiMNX+ zxIQgd8P5Fmc2^MsNimCDM(yXfR&IJ0_ve&bFNU||*m4$rEsY0sMj43HkN9(YF_uc8 zVUL(aw7?ez58-y6ATKsM6kt;g7uFS%ZT7Vd&EIh2MZ~ZE)5-oTND&^YxKi#h`~NLL z{*yJ`d4JFT$<~`)=}*9*$6yrRqDpG2=ews{MGbJVcGVo$J2WBlFRc}>-s#6Nt;Z&Re5B5 z!gE@3!QVSdFy^JAYq}atUI{!)TC+_$B-TKVuma-?>2;aS$kQ~px;?SG4;0~|;5Dnz zZLYdMgETr#jY{8X;4#e&!%Xsc5A&{kOH(tQ9BcVxfSU~3%?fhdzrO%#2y$O?zc3!0 zZ<_&d_>;FFBCmd)k!59OCWVXf9Uo2yP;23015}wpJ+%_%Ce=56@`89Tu+0JQA#7nL zxGvp63-(mW--4GdwqpdZdh4%&LMYS-4Cxr>4^apCZb-FfjThZ3xphH*QXCxT`iwjc z7>wXBMNmUJFp6Cv-XV}8jATsQep_A7DWZI8oeIGWCi9;(zo8OLYDPFOJUG!(>h z5IW+dN;VVeLy6DC7~MY=Xx2Xkc!i^<&3}Dy@rC6K?!fBr?}MDCbXFqtXn`KkdGa02 zWV%oiNV#PVcK>tg{+}m& zy8?*&)x9tc-0$MQg7&|^o{bc6e5BypsH1-#08)!rOW;QUI6bT6P1xuFLEiwFXC!ahKeB`gvf6k-ZIfn zl~;dC8vBS$I8~D;x!0WJlDN5XGIwLE8tYX)^5^SfhWMx?>Q|MM2d{1Y zS{9XSytV_6WAD<-6&R+4FEB7irU&MRAL&TIJKx+4+C@IJ#sdb@SMrWDkPSmnJwMYT z(3D~F`93`;kR^v#1H1u2+RD#Jpo8OM@CoUpfvfA+Wv}zGyt93{fdt$ep$rH>LNI2y zg8D%dP{;{N%{ezs{79Wp-cF<9A*na;*RpBN;VVd@&*8rqB7zzsDLBT2@BqZEx?|E! z{h;`CbF^)Kh04KwBrF>axoRJ|-!mt)(~}r(b6xfJadXaLuJQ>EM)kivzaIr!vj&6p zWwXx-Ikj&!4eYE~?>YgU>ADNRh`Q(ecYx*ZfaUd!a}lm>26@MKoKNZ>O%{rzQ&9NI z(>srxYtSY?+t@^@@>D6OI%Vo&0OPax-5m!b%D*jrwFB?_D=#A2Gu~-FZQ}co?cL)7 z)XLAIkJ29Faid)QIBwGocwT^rUH9qX=tj1Qd|X9^1LVTDW#TkAR9ZI6uGL{R#&~03%XeM9f1mC3GXE3NHpjY77j5Y;^L27_PO58PCae1 zs7B4S&nr0ipUkU0so#SukCi8FL+Yc$KZTe#r5N8!YJg>^qi~D^GGN2d|L5 z2lXJdT%s3kN@zQ3rR)9zUn$k3tfCU+Fo%!#VswyuY<;sQU;&6V z4M0Z3D|7Yi=k(1222~;&8e_2R5}phb83AIt<8S0C%igA=sX)UFUQsX?0#hnW&g`|n zS^A064vE2Q1+W{QLLA;^(wZA3$r9KpneS?|r%0zU0xw_7ls zpG!FbC;i`yr&~5Db0BAKd$|h%^2>-KCg78#A>k#Jd3RP~*HByb{Q?(Os_PyHsD(e6Hhk=LMCuEeYfcJ;#4YBEhv7!qfvvBL`{RN zJ)QnJNdKSkM}&-jDQ&sxJ7x2Ig{YrFuz=Tl$BR@Erm70^e?)Q{K zR_@HiB2D~c9|w!e8CCMJ>r1ce1kFR|sndR))06^BhYwoRdJ5aI4*z&{gV7Z;F+v^6kj*g}#F}>A6aelH;U*~ft4Kn4t z%_8G;~@gPCif%PGMZKH!>JlC;S6~Ymq&wO!q zwkW5{rc_QOfig!Q)EhY+G2#^Xc|Kqhz!^KSUVQmp^Z3{%?Zte*qAGIEUxI zKsDcQrb`am#Qq_3=}Ie7^b*%HXjM;dh+rrhdl0{5G!EQ;{)T3Twhr~{__*%k%qqhn zamV!3EFGV#>tG<=JY6Y7jkHP1iKmWv_uFaC$a$>lsQ3KVz))jOD|8segJNm!t$q{g zoQtLNaw7fOB!k}b7|zN^?PB(nksMpEj7U5B2>k`(Do~H5i8?Z<486^gEy6 z{M4*;q_O_Y-EzZe_w&;|^nTRE3%pZ&8vScnYvTTy9y| zY0>`UM>HCaj)1r03I>;?*0mpY%BjJ)mllcGf2 zErpmtP{5v3%{UP^_iz~9toS$!kU*r?7!dtP$e2;zd}Bb;Yr2}N*k5)b)u(cWj5J0t zS5LLR_%xU)wh z7|Yo@`e~5f+=WlL<56xF`jWc3y4;p5py#&`4n96OomtPa zl(WL)$>)1e`_*jSK%D=gP>Pz?4AYe`izvViWnlNgz)aE#JY7Jy;lQ;^Hl)>nxo0U_Auq&gN^BnLfQzA``Ou zv@#fpe&sy4kIOZjF_Bi9q-Esz{ReHDwvc8STXhn8Mh3a64tx-p#{D9D_H3JppSq(= z__h~ymHOONp4HXJ)vqNv%Vt`LnVQelS>m`mcDZr$_1>YprO6LoWXXa-Bedu|O#Qvs zfTEn|g~hYquG#dbI($8ys0kV^X0mO@*qsMRAL{ZUnCBcf^o02e~>9Ixl)jGlYL!7EZB?g~h{3RiqWEBoCD}B!GRY#Cj@35|34*oN;;^J^ELY3h)9fumUU_3rC0mwvU%p<_`#5I*Q?zG#GS zqdciVw*WyB3-H?S7Cm#*=X?x)QRiP?UIGjI`k~NVL4A?(P(>h?r>0=L<*Wie70we7 zyr*ToZO*@uak*@P!b?CtLOd9&e)FBa_}B!tjc_9+#}d?3AXTP}8CC9G{>HM6q@05q zOOJj1cZ}HEg9z7fK$G5f?bmjU++JLV7Okg+boq>)u0Ese;27$i7mg;`bY;h)niYTo zIv>3H2lYVAO+5du7C;z6WWFnkjhJ_cS>s2haXe4bPlk>I6IX{z# z$!?u(f(S~Zsj!#KGShvrHLk&ID;Q>Sj49}^WN`ueSn^=XqnEH z$Y|3a!+ZR-z!cmu_TBDqx@-pL(3;nQQJv2UFHNLXpJvsW%sDOW1IqL7e>$xv>fv>t z;6#8Y_x~`{|7UY2TkLO(MT|5@@CqV@(Z)&fp%GsICMr zeihL;WlL6NcbZ$3USODZ_mGHTmsQ zOg4%B!a+9Ykn?9$dppnj+{&dX66++BgsbdAucs8kq5`3kSpzG$|HO~?Ivp$=Kd7QO ziq59KcIh_Rs558;0dmTL3mo!kl(WvVXCq5T@!40LX@Z?%N7}RR-k*Yy3&5575sX70 zR`e(#%be`&NVQ3Pes62v#*);ME0FrVdj&M^y`z_+IfXgL!H0W?06OoE|t zf>Dj8h~^UY;n{!ylvjpEf_NqJjM{T2ecbynpsletfL=UmcZtAH;p`h1Glf$!qEEBB zJX_bOmRDPSb|X9kL~;JG<-P+jHuZ*<$s6KWMBj|A;aWLMtb_b;b~OgJ%_XFK$n#>wCsY->ktNS}k|}Cnt@smcH=ZC>isEZ1e*m9b z8XV68#a4{LcPY@qVTh`J1241ykO{5r&R#%pdV@=f({)h~(%QK=@1x4WGq?8q5SX|P z|Niz2@zgXbLicN*W&%3Wx0eqeniHs6fAVaQaN~{*B6irrq3{e7SGD$1x>FI#Jvs=XBB2A_onD0%9 z?+XvgVatmHsm6FN%YQtv;Wgl`3LJ3ksM*E*3vikFOM=hPY@QFaBlIAyoZTsvZ$_E1 z53Q%oDk^GIAK&!9KkE+eR49VrfydcXWNCL73Vt0JyZ61k)O2T|#OhBP9;z>(Ew*le z=D}{rs{+bNt(xk!Y@fzx-Jid{O~S$*P44oT&)HLpJw~0myKkU9$d=nXI7;w7T&XLm zg0HmHqT97WIO!S!{J> z26R9>ygypp_8&J5-{}M@7DDYmNv4&Ts)!(7U0>h}D4~tpD}HQjzkR?e>Z)hxc9n0! z>8{Q%RxKXSgo|DU8J|6$9W}VCbfv;UTjS!I4dFHxgKWM6o{|{Xl9U{H!N4;dCS7`f zdv_Gn3mV{;q^%rIi&5D^!~(mk_o@bNwFss-)0crflHxnM!+CN}VElBU4~F7C%TqihrvUx)itc=js^Vf^Xt}Zgic3GqPJ|ufy8RrW()Xr^1R*u-?+^h$ zG^0Di+1dHGH85%@itiQGKJe4&hxe48BfC4H>OpsHp={KQjG&gQ>uXU@h4U|Wx!+kl zjY!NtXkUd2_3zdTU773{zy|%D`Y}@z#d5s$GoEcHxo&X14_ULOzV!2M4+gPhpIUU% zH!ZT7;aXXPE9*y~dRO1@w?D$Vw-P92LFxHD(Zqb4=~mTo@Dg46vA?jr)xR?O=9u8@ z`d{K&>2F*sm2EivGe^k({FftqusV7jQ;YDi63o$78{zVax$YqiL#sj2= zL20sI=j_8bEW`Nk9G?Meg&X)$bNBoGmPSm&x>!rS;`-_tiO z4l`Kxz1T6xndJ@v$xKtj*^Sh1m>=O=wnIPuZ@MLaLVra6 zm-qMgi2N_ehw!@B;Er;e@V!as#{&V#XvPmWrv^lTNrG7e;FEcG%?+y`mJs|7;6r#` zG?~l;esSSi-G?34k}o}JeQ zq4>A;c!i-f2!S*>p}HtQSZ^58QX^SMK=Q~a3WZ|&PV&) z9ZOiogQ6DW2n7!eSFcQAVKD-^4sdmV-w4nXe}K#EBre8_Jo9xI*RAvmae_`T;R(sV z#CXtOtP{j7xb;tHsJa_1_iyd$xBK>Nkd}F&US7~`NQB2pE=s19Ax@O7VYvJ(3T%6j z%;jZLWDFW^95&P_y-Ab28VS+ml9kI)LoK5)32V;ubu17LD1eg3;=D4-ml$fxynvGy zYY(RdXR~WfgZp~k#qE0>W+=@S1qEtXmMRN6HlUs>%;60}m%*P=eAuPP(aH-`4+3MV zA@{ru<;GDDJK*XRF7qNuzuD>GU>3lsMY7P|XQXemgC(`Zq_yjgZ(?Y)OJvAAxAK3Q zx%z58Z5f%3w8_;!%ZpKb&A(+V+0ao?5Hl&?7m) zY{}>3GraO_?7Bv#!o zFG()TrDMVx_%=sY38D2=3 z=}US!arWlL5ne?ajidqF4SPJ&_at+84qC1+FOk=7%+TB5&T=DXTO&OlwBL2TPWy~h z3k>$<&lX(y#;5l)c++t zI{v0dhnTH{3|+3DmVAcSMPtI|4~KNr zAJ67%au_4T{1-$V@-jUQ3lgz`M6|WVC#nWjsxeh_<#~P@Sps#NV5?2yHFcWC%c!%k ziq%|zL7er{A|tU*$8@oRb&6_OhgL-ore)HPo}dqGpl=i*o|^*CJ0oX8T{p0Ur8?dl zYN8+jSlR{gyJBUwhVE4Tyt`{z1BkPn*4_!n09t+2~FbJeOJDmc>a7TUv7>yC>; z0_1kyTjVWw&TK`>hc2$@BjpS$btbM|UfaO~Tlq=^Tm8>YngH_3cVAB!UjJ+C>VNPw;(;mCem z)nyv#l(H#$t1dCtKigGsWNcz1?=)IWVgkB)4Den0G&SlJDt>B0lqu^#?N_bV>ofnlLgez+@#r76i&mC7KM1~W~x*!wn zGnm4NnKDUj3hc&Nl=Q_$@5WF@041lL4F!s02{G-r=%rKcPUcyPp?rHg)zTag-5ocY z2RWg`y=65$q3B#7ncy53=n{grZMBOrTJn*j_?}ukJUu_6KRC=qnp|pjm^NB3H3@8T znvG{F2;iM0xgzSf*44c;|0q+Qu}%V*W&a(`bhHU*m(so(zehpOn7g(zH;-MDfOq@P z$Kkr75w>H9k$E8aA+6IT`5;gm04sBow!Z4+lYYf1yQ_Ox3@A-7PP(G4}(lv^bfZ z4hdaNtolZC?M&Ra!UOlu2@jWF)rMEB?l&KOPc73}^K^egf>&DFlQ%azmca2Qtxys$_UUqlDk6!RCTS5(E`EUPIF``IyDgKqUL(Ab|EPLgP@}Ej zJ)lzq`8NE5c+eO^ISa}Po2E??uh1~0!*V(c(ODCF|Ig4-#2d-%!T{&YcPt(q=Y zL4a4%T8PjWo35+wJILyj(C=`L5kLZ0TLoLD_*Fo|xCnBPtq}`;xiTG*|GQM14agf3Hzh zCpLkvP$Wnuqep?z^PMV^C<0Hp&{)c!hP0{2pG3^>Hx2?h+Dn!(a*c^|YqliDFYvi0 zXQ1P+w7!is1hgaHt!C&bl3nEG{r<&<7+L<5#+Gvf^g|{unlS06-g$&CEQtCr0ViwG~whx&q%X z@Y7%Y3#=kCf!{3x!w7Aj|9t;oD#0lvuo5$uP!l!)g_r=*u;NR4LWJ48gp*PdHnEU6 zKr`j(l%-{JMw(nrf&P73q1UeS%x1&wB`}mhal!I**@3lM%79&+X{!l@T}Nka`(;%z zU7n7`)-H+A_p7SlZBL`)^h>ABfO`4Fqa&Za-J>Mpwx1?@?+GdR-lVL@4+G|jtA!89 zIDXKD4O`+L*s2LZCSHV^WW6(w$5%~dNJ$*7OO>&sP6BK5YmDpVdQMB#I+WW-3)ki$ zSiVKC_?+>*M&PXCUUTbs%_UPck|8mA72dtC))73ls~|7()Vs`zC@SQpq~mgt)QV9J zR;jJ>C1tp-IgHg~SbX2{5l{e1pbf=QO8p^q9xdhGtVlF1SfW5Vt7#@8Pdm^&@GD8prcep=kmR7JE*yVkyyUaL#V zg^oHykr^))^U)ojJ3)rGoSe@hKA;cOr&tz`27PrC!DOf=@8}eEbMgY{>araK`JlCxh@lv_mi)gA=aBRY zZiAG62nw{i&SCEn9h0BBsVxe1%5q%=$g_*1UZKehKQo&9j;9zhDJRr{mpFs#JR3;RWdNV=qE!b z_kE+){BV+*F211I119a1<*(naSri+t{~Y(x`G4;eQHPbrf4);Dg#Xg3LFS#C3!l3E z7GgSG>N0uJNSmOhoGC*NN>+t8y&DMnDM!TdbV9d$x0mrN!f`*q4~xn?S`~w|pNeu- zWaJcELRtE;C*0fdk}G+n$V@i2 z=5Z@*y3Q3kXM2p{r?cPr;r5ioWw15tQBes$WmrVJ6_s^*f~$G}n>^ae`qG!&zu|{M zPA~QiGIjgG#iF8$)TM0DGbTl>ZN9|=d07s#sLw?PU!lpp9FK(<+^qLqOw&O^bSIfb zB~3#1w}$p}=bFS@HY!O^tv3EvpM>Q$kBW;cIzU-=1a|nCg~?QdDt0jtn^C=%bQyRB zUTOZO8S?~uhd|@guU~!_*Wkh7BsxTPsm`5DCpmzj z)#YQUl0XwW)$pW@jj0ilpdYvHH=s94>v8@4P5t8j|-pOE}?s_sPQz()1DV z&n<}R?s{c7H)}?d$@EVhH#mPw{g=b~^k!~FP!AM2s4t(P{rKmle;R95 zo{Xx^UUJUz3T@2sZi=^n1^zhp7upJ#aMg{(^3Fnhpn#%cX%am|ID1k1eSQDDq)re* z8fXJ2`nw-aeqn>QX^wq7yQK4HHegY89Z()+{6uN2x{0|Ht$fZJ%ui zR*RpX%gTsLg8amdN46Op_a7^a?^nka7;u<*Qj|!Q$%FtRe(HYW75y(GQ6*krzZgX& zZT`GB&1`xbtLvVxU`ZGL-IFJCczA;kH6RC0lP-^b*93fcSO^j_JV94C|3&KE4!6gSHsr*}ePh<5`=5F$?0LtuD&7`xP3C1u7S6u>TC3gVr*6vbNnZF| zHryorp*Bu{bn}nnA3>h^RA;f`d8*%v@~}v$lf>Zp8Tb~(o%OBG>kApVHkZSA4y!q& z8?4gX%KGv6;9!rkika}y0w{e8UZPa=?Jw&45>e0aqh7W__H-jVXuD>TiV;d`meEY# zK}x@1n8Ce?fPUHO}q_}v<-5D)x zcm1#a{=dqWvq}C|^**1ksQ;y^SNi*&VyTuDmWpou3Bi`L2TFLMatDyWvBF#9m--j+u4zc9U0Gl@mP|ZM17GK%N9dYB0*i7Caew62I=`2V(F|hF?AgH{mu>lkrq-t%!NBEVdXHe&cQ5)zqLh+Q6X!`@3W=~ zMPi$9_*32t?PJmGi^#DEqtoo)0s{QhCPC0do}hIQ1i?1mP@rwN&ajMezC6Fcec%GU zcuYX|LV9bfVt$SM(!?QcPWe4z4~<5r#aClz>4X85;iGDw7UL-yi*iOZQeBH6AIN>; zUc5qgeMybpk+*pZf2~Kv1txk>O(m6VZo5XkDe{Y$PJd6h`bZ&ODv2M}bF8eGzZziI z(*FAb11e{J@Dbka@CShy`cCchRVAh0`% z6DM;Q+<$7KPreapfIEL4VI``U5p%I+wO=qRD#}~p(#}SMOvaXu2r(+tO;#kJG- z^zU~T=j?Gfg+xIF;Uq%Ue(V4ECJ3ga|pUW8vn7U)+D~I!y?u zeOTb7p`%Or^XCu1>P=1r$L|AnIZY;l3GFzsIMvOUGL{S_Eik@*HjAJt6*I3Xbt{jJ zK*6YvUxCfgf>Bs73#mqZQB3E-OTC3jCOSGaefo)FF7H(gKaGbaQQei5m2a*{%md$v z_EYN@maACA>shkFor5b^{lsgWVsBkO;p31twP+Z)RymHmE%Wh&?_e3W^EuBr;Tx;y zO+WK-{qKpR?O#v@FXY9w46cv=nG62?pP87wt+|2R|FlH@B$RJ~kaac+R5bNiy5xY3 ztTejJRNcACL0-R(E*(!pe3@{w6q(d?`2_;RA^mhrVI%HToV1Y~QV6tBs=2?^4U___ zqxv0n1g7s6RNarA=}Z>4eQ4&xBUU;P6_v}GE#p*;1j3`rYtw$zZcZbDj&X)F#?6zv zI<1pcxrnJ89%(NE;@4sPN!$r>{svX(ZSrE+NEjG+vL6-|AgW|v5%txxb1IRy6@00= z2aut-!#%>u2yL&WoiQHu3tr`X_iAg4cRaNFI(V$sB&}5=oGwbCm^fM9IPr$S^mzyv zifttPI=tsZiBx+Mm)tQu zD@3ofUr|(Gr_gUfv7(m6ZF8XWiB{oTn_-z(U)|-x_$G&AIQP_gJ9T?wV9p-78`av$e%POP>zvIuIRXf4kPq&SlNlgmLm4}IB z$Pq%$8z_r&oJ9Hyx92;kgE7uf?;At}cIr63SMOdKOzKXAHKTlRw1x(l#$b4F!|HDz zE={dS$vAt-J1--Cn8|tn29v_7n^f;m?+}SV0E|QsHd?#XI8F>#7;Rm1=kddU2@q+>%ckS93heftxAG!GNKt&m@jQE@)Lt6#2aD4QqW@cWfr1lv4Q-1+$)J7)w4}}U+rE~%+Msh zpRs8AOg!s>AlRtU1m48L8@|oLjjQZM&oIn|3`RH;QewT*s$x2t`Wos`zBOHI0B27} zRzJOL5L--4f@yjA|63!m)&kjb8hURx~r-6znm^y#YaNLr2Su%m>l&?wag8~Rzp zCbAh}f25FMA}-d)i5t33fti9|#RSiS7lChrzR;XqLayGY1twcGcf2SgNgkYNc zgS|`Q%{@xxyMzUcRHPg4EJcMW=T&IXGh%6H^FI9E*}%zu2VY(jPeSvmP2aMEo%*KL z#11~jJwtpj{H6~W>fQG+G_9>g*5|Q`qJRaluW4vkOsA5a^$hxabumnQ&6uFFuhlSo zP#fS%!RmQMR90S&UTZ8ZEnS-U66wXLhMLa12we7I6}wa>v+MjvDtUnN(&nz!!k`4Zt4hp#X@40M_i2y^92*JC@*^4-(tQI^1!N zvpt;%c%2VRTQb zcLsZCNdPmv71a8qoeeX|4#we6pxU>=FzU@^k<<2*x`W1{7^M^6!f3(ggA%QX$GAZOem@E{>#`fHwfNe z_rw-)PWcz8hWZz(VXog!8-4Uo42W0S7u!r1${~AM^+k$MFC$h|vxrSqjo}nusYl(_ zPO3QPnmBDvfi}l|z(Hyg*iFk`dd(IyUtVwrxi-RJmi|x?;0X9&19CTUJxoANwoZ2S zc1k+f8<)t>SN%}g&Jnlu`VLB{>zU?I;!Kb;Ptibg@At-QH=-em$koZKjFA zsT_2u<_3Nxd#*-%K#GUsNoTC+{^$VTIc1JneWEH?eB;Arz z(ep~6bJdqSXbv^8r+Na2@SOkNtc@0jD3o>c7Mi8bT;en8tSv6$(?;e;ag4w4?QPk~ z_6)#qGRt%b9?jrVt4Nx&jP6s+V|ETgC{1RvGVe9BnN;R@hk0Tn_NWNtB#0?Lj5kvy zGXX64`*+&MgwVFVt{m=Mq?=^$VUYI%ggN@Or(C$l;Fq*kRI$Y-F1NO|^>hM^H9~A} zdWEhI_%;g!@irE(dbl0tDmC7_t~jf4*TC&)X-K^c-D))K#_p04T^Sv_nJ^Y+?~>76 z`(Z{*7h|Xdh&}O6a0Njs>IvN4uuL2NxwzufFNw-OO0RpKb+F>k{)J!WNBo->DDPUr z{)Iy;_}jaTG4w1gD1cIO&=;|i>iJG+2Z_IbQ3YpZYcv&@kU=sKVlFKZw$}lng2EZc zT1upk_Z`=hrGfIJia~{2r2@`aS&~?g`9OMrCQHz`;fCcvu;WTc-SsI>%bixmX0$jj zTr3*xj4^8&6Z(l?5gGq(8r~v*1|>|jl2SpOBOAZ2X?EIwu@M_u*+uBn?O|R z+Nz|Vc8)@fKaR*r(&7!?oy9k2dU+MJUyo4@_wC3e{8U7)Y~zJ%;%TG=f&rc$@pX>U zHBub!z}CCTzHK%k0ba9?izX?^li>IILIfx8x=Ap&UoE_5PSi%nGhQYAfC$0>jQRFQ zJ6Hj}bVtJv(6pU*j1%m4z1z+&&m-wFCFzv8IgLv?*%r-x9jXdt1ht{H?^`<+t`UjQ zF2eN6ch2H9&>c74;V?w*vpE25@ll(|)c|q;-3LN`bOktqW0%VO%v*jo?BQ7alg4Ta zY!e32m+JtRXUAchGkgnn17w@8tkooWT}CtYpM3MkE<5p9^Oi3B`juU_%f8?9pc>Y>SjND#$AOF)WV^9BgUJm9)n>}Md6RYAz z&$B?W=5@?pR-ydL=JTcdKum&E5Dr?#ev$pU8$RCf;H}z&a?>IFzy0`27?}+sqTF4s zP@|DRYck#&j<}AKm?&ki(X8LoD?$5yNS48IMb>Id`nf~&|KsbcqT&pfY#Xa*myApst^E9zF{M*yG=Fc%rhAeXBy5Eq+55Ao6%X{w6WKBi%zM&GnW|jHL_d|MW z*DQzQ=lbLAOjTY`6G*0p(JOEcYou+k;mX6(W!*Gan1V|+5;cZ`bb~W!IZIQomie#G zxn6Oj)b+)3mQwmn2eaP>mOw&4B_i%J-|{?{Svh5ryF}x-S(+T46%P#mg&QEB3ENQ6 zTzUa-%}3ekjbTW5jRtjNYdI&NwmcWiQw8{I&b;O`sH@ZAv20jtw1;y<_$4kl&EV-C z>uc-{rgXw=y-LOUYGtB(ydk#&vCGkK>m!0tz{s$Y+d1h|4K z{I~-HGu#1M2pCkqM``$d^|UiooQl2)#6bTXQuaqS&VNvup1%b ztOQ^YP|B)tu$qcxxly;U*L#H%b`EWCV;)$!bE8PW)yFB^ZUO#m$`88i-f#Z)un)g`-M7 zU`NzWxyq_62a*PSn@`kWYJ}C3mG=8U3sb=6@NQ2?P?a33WN$GXv0tt+$p%e8C70<6 zeBba?_%(0Nw&@A1Y1G8B=6o<_zG1EYh*6N5O1 zFn~mZeD8~*B_!(yd9v((3T-eM1Q7XsxtdTNH$=7^#lzrzN^{!>Ag1;OyDF^+Bgj;X zu^}w5OG?gTt%&4M^831kNp>D0<%Cxqx(RHB;=lV*hlye+-Q%ljYA|z^d*rv8K}IUv zCBhdyj?%Dd6lW`z4xxSC)Y=V`ePxtkp>LU+9_yC5pJ<*SM;lm)-Uewx>S#@^1ua!o zRjbKew&k(!LEfRb5TPIij#etKTb#KTLH9U_+{!TvTbY|Z{=EKan>Di~gwGZ@1^+Zd zBA1k<60zXg*Y{mNlCM_ao~N}UhsSMM$Jb4LN(|L6&o|N*pHUIrt2lc==Lv7dt6=p7|n-ylDoGolL_MS6q3qLg=BZxUF(_n zwU)~YPYcpr3Hombznl6Lw?IHQCir=<+ElMt*ZM766KTp%laLK+Fgly)Aj9iCT z^G@5x^z`ZJ_>t$@llsi=pG>u1auz78I&UZEU3ULBjtyVZnfaey01VcD%RIa6Y!gEC z>8fuRX7OW~(eWIa%Xl>Y%)Ffx#GvU`3)w%@$M)i?3&YCUPXVv*x$Dd#*qwMWGD)Plf9+T<9{-Gi*E zG1yHWm?+tk1O&LbTJLg%4xwTgAM2 z%^tai&}Xj(Fv}!WAe9`Od>qbpuIS=OoN_d6C=W=FR|%p@(n@M=H4+mr2GD~GxIrqq zkO@dLjg{ctx{n6&r$68=x3iKEuHmwR{9B?pf*@lseh5UX3?2X}J0J*}21jccpEJhp zv>5`!02y|aSC3>`UsP1I7)2scPEZ_VCU>_s(zEZU42FVDJL}>l_okjW%^aixrp#qj zdW6Q2i()9U18~H|`f*v<5w9e)CpAo3@je{Agbl?AX1mjxBvJmO^((=osGn*B4BmhM z;;*IXGkn6+WygN#k4QT(i#(_sE&0OL^8P#C61-205>mcur7PgrV7uUh%qD3~O-&;p zw|?|nGH5|MJqK~|nrFH=&}3DPV$EP4^H;7VF1=_GK%XfLij?J`Z{#w5Woc!Hj%qUIXnKA@^v)fyWulQeBQ)mvNJuBF?30x?|oo~q)?1YZ9z zRh=wW#ZPJ7ARA6i72Zfq-Pmr#9>hggIsGRe?w9=gZ+Nm!Ex7!za%cvfzae>*m1&Yy z)NSjNoWV&>>~qsv0?!Byt^_umqE#(d_8HuXnlVv<=7xo%GV$bNi6*<#piwGYu7vfA zpru~|wWIreFEllEI6$4etJhVExNNC;Wtws=wAW(f*Xr&VM`cpg=4{>xEm_tQ^7wwC zquP2#uhB@4br7q=bxYGE$er&GCv`U5Cl8e2LOHX_BGuZqd!e^qD!~hHeAu5PF=O%a z@d@%enC=p+(7kc%;L;8i^|pPbk4^%5v%w;j8R@%=c_U<)U1Fl&l~g(%PMllr_bcMY zY06bJ@&&Jix-ZnaNr5AK=xdTl;_Q6o7Xef#nsmS5#UX|$zCq11#SZ#|i&EZt6+>q! zr!~rDgI$E9o0nUd#*MDHFX!uB$AQ+a-3?+wrHbxKgC>QPv7{G)rGf-jN6L)o^upd9 zj#aP6Jzfer^3+vcXBH3{P{S~Kp|ZXLClJspcotTZIUr+@XZ9g?9L9XGfG2*C1ezT2 zOTjF)E{AN8T8=D{xSmK3(9<=+t+Z3&>9b%06?86QxEW(r7TLTL7R=lRql601=c8T(Avs{j&hZkvo629f-zbr5p_3&R4D&HDc?IID4 zV&6pSoFaA|`r8=Hz^oXrZ9wg-Sv*1h5_KX)dP=iS5^yolpt}!{ZJR!-Y&(y`pj8Rf z!U5^3(IqWQ-?!uOu4FA2)$u&9qprSc_64ui=p*6cbh<{j)FGcYM*l@u-Vbe?CCYx^ z7Rfb6(!0L=IotaUNYw9o>+tazwemSh2abUBw08;Y{73M-+3=p%@jeVinRd_d!KX7H zh^`ASm7cRtuW+ShH$1l^I3ML(**pv(Dk>Tj0^XM&d2*@u)v15OtvbOGJhJS(o1Cxx0Z|b7U}n=gY;dSO-{vqSw%5;A{C*sKyXzP zJOLS@5l?`!{(x{8I{PB+0%nhogpK%br|310&7W=$9E7n^U~emr3U4&-cUP9UIePYT zw6c4%@QN~VQym_;N`xHW4G#|THZUM@kAPM3n=_*iWhYsz6qh#5d7aiQ=7#uQpA6ZV zxCyPfoi7=+g#F$*$q1RaO&xZ>>)sS`)2&1Xw()--V;cj>ADLm`jRM2YyZn+#rgleq$2QhB+# z6DEUI@29$3B8ZwGfzt4@w>Z&hxq08KHPLO8vZ9KUl&MVKp-6(sVXsK@JRU@>ZKEXz zVTSmD2rQ716cwXy%rWv7vLMPdN^x`qG$brw7!T_nW^)3G5D>72NW_{F%hny$m}RnG zoo`PkN9xecl!3dWWJ~Vt;4tu zsd)BVT~kcFHA$BNjo~f=T^Rlo26cuOIs?)Qi%jJ{mPgxk0DsHeb&ulM08J10NgPCH zC>Wg-ZvYRrAz^v`^AYF|$O7G;RkE;Cj3W7C%>asX1jx!R_F3R@2*zVlK(<@6bRi8< z*%0y3SXza6S06uLy+O;fn&ak&PAvP6TzFcq>w|6WJawr>Fs`{z!p4#O?7zvU`c05` zMd;G*QdxdU$)*nl<4qrb4Kh%z)WWtrIXN)~;VQ7<0fwvQODpdy{g*nI5GEeU4Xd$V z&5=MeC>u|RX$>dMNqf*Xy`7L6l3U7ga@2!RoL&kw!kRiur6_)t#V7He{3fENmBp|WG@fkXVlTdtRkfTq$`;fiI7BG0M1 z4Sz^*F!-m@FCJtjtF&(FQl~XS0+SKJ^gHbktcy&d?~O1#pwXO-pu=*q)1bGdFExiO ziy)5{kX-ez@efp&FR;#J3aO@8PSTpyeyjEkHN^X!*KKuKgMVohAhXnZH+v3*MS z?O4_LuQOugXJa;n;%sFb1_mha`fw(#p@v^m`yTw$O;fwEWt@+U=Ou&>HD3pxp<&3J8-?I$?rVePI?d)sv`_LK z?jpYV^A9d(&gvcD$0mpI?CuE6>CFZIlbxB_vU_)m03H`q6a~u5e-wu)bkhgr?NC@y-Ld4A2g#!I(t%hI}5cdLEL(VmuClf4g3+5 z?^g;IN|n(yabF6F0w`p6n8{Lky*h4iIa?m)0^O=kyRz4}2f{?c_Y!4;0p&jyEvY95&v64 zJ?LAZ7sT~t{>kpSakQA3{P{ohDP<4iN(HE`-9_ zcd-+(W@rRDiW>E%xq7nTp^%TTcc|NjB!{AE@nJC}J^U2wRbW3F?Xn2^m>IqWjI(8o zaQFPs!IP9zoLbl-CACC@+{3AtB?98wG78Cq*JVfwuoj5@f;D44=;YJqAT;!3974F^ zin^o_`cavUsK!FeBiS%hWuT_gl6N{y5Z|CHVlg2vN5N)*>oqpgW0=VlHBEJ9A>FKt z50_~1d4HFC;9W!B1p%xJt-rx?PmJGJIzwk+kZ%sUC;#;o+~X5)|NRSBw6BS!Ww}2O zSJFL0`{lP1-L6gz*^|izI0A=@_LO3_I}%>$kNS=J+`Amz`83ei9Z|lEnkKKHG4HD# zyh!s{=IdsR@sDLeN6=4eymWWiMlMRf6gBKD>w%UGd_k0T(3PsMIIGLPceUdM)4#8o zd}BGf4-A9^K|sVjBx@zBK-F|sv%Enjjf&2p4B3y2i=quupkeB}U8MB^cX>!)px77h z`kX0_AqU-TulrPx2_VOpAgaNQ&d@_V|0e1awt*Ik%ysxjlGc~r9}#EL*IJ)WfiSjh zcLWwL8TvuU0?+gJuJj~9lPUZYmFc~yRo#a((Zf*dZL7FX=c_ix=bAQ4ccYl^G$>`j zr_(vv-M1eX$H&;2s?g5ukK2+)yTvp2-pGVe2{ne{C(a?%8O%`c;zW&k4|U8Yy9rin zUzp*)Kt+O!faUwjZf{&8sJ7q51cP$)eEE~?xO zpdL(2-l)*O3~=Q}gqAA{$Sc#Bf&W+}sFwMu(SCcZ^WFLwJ$Ps(K=8Xk!IY#%$Y}Xq zi1A7|sr}qSrtvcHq~PF3!*1~8AJ;WLUo|ajKvyp+exsrTug0o#KNt=pw41!kxE%HO}L3yMw^|$(26L*eSCKngfs+B+RmJQl^ekVWh z6MG9-YqguJd7ygpzUlo!HBheW6F6O#X&tUOw72UaP-)GfRKrO)1WGq&A14_Q-pc<7 zhe9`jsLALDGna!yhzp{sQ54{znzPe|heRPI;k+hz#VQG{ChP8P&u|p<3Pgh&+_pRQ zdsh&k`+^#R%A~UC!71j(huRQ7W3VhLmNfEtTE})l&bQ`{X;?8YYG~v-QDDq3-wtrK zRshNe@U9ECB*cQ?zJbrM@X%0D)G-NXI5Gv=QB68Ln0P0=(Qi^X6h<=Di7f=8MDbSj zeG!dt7{_+waFl44Mgsc1G_I@;T3Q+YT$xsO^dbyEkjmwG{;b)g&@-{*v6vp6+3sj}bMLd|E;;WoK#9`?+igWtW_{^kCLT zCo-orzvh~bIgd_A;keg{4Fx2_#C13sReZ1W-0BA7@_ek;h<9#tI-zLza)t#czt8lg z)Yn6VoTKPb;Vh}oMOq^qn)6wqr>f3twf9+zfI88bQEl<)T!f=yu_L&^a$3Zh_E|B; z{cLruV^7zxHBd7j?&bdVp_}{V)-#DK+RL55s&GaN$VTe9{o0$U_S|LD zt4Pn_SuS4$(QLjr7N_hkPrDnELMLVZTRS&;N_Fmz`ARivLqnM8S5qX2C zNX|Xtr?;sM!vw1RsjgDN9T91x*@WaXwz)YLqr>}q>p$(x`yK6xAIeLt~plr zcbtEQU^ms}6pfzml8=h6;rf3Ky#Er1g2ot(f|kY@EIYJ?fIq_-<>+QJR|eSpmerou zZ`{kTH!g9QSWt58ieM6f27K+W=#R?OJ6{9p=Hd$+NCmL%y!cLH@1wkXBl-@`6H1A5 z?dKBl_s#}3N60Jujl1{mEQv7DQy^d5Bp(_V1{A@fglELO!NDO+4|GV!aU|1s1q^rg zrte8-Ek>c2omg?Jm0@2GNdM4gU4uRC8Ig}Yq4r$w^+y!Aul|Ogq}K;~#^@&(>$!48 z;3UU`(Z$F>2#xgClsAAY)2Co~C*D0dsT#!mX&e(bw>lHRi4x{9FD@-I&?5cN!V0l( zsE~@-uOVj%2eL~~^AJO+hxnA56b0L)PR6^ib_b(i1jy?rkQ|3V(S$PT4*y!h{c$G% zAkUSTQK2Dm+%H*xvv?L9G8cMBlg|%-d?ErbLtD0&c?f}g3deeYdvdjD@9^s9ArcF3 zf89Hf<}vPzdW(ZuiuEP_!;g4RxM4}eR6Zd5bm!UfVBpS`35P1*0u9gDKxw{U*Jylfh!Y`jmv`gT|CQJ7d!Wjs zVwuAf$Q%`C=L>RoxK1;Wzmp%nws4|+oX+oxVuS}1D;!YE3;5n;{trL5T4Tu(*($9%Rz_IgRzghq7}LNG<0)=4J? z#W^ETpLrDX%(em=5r>Epw83p~mg6^#vi}}P$f6`y9*rzybP9KJX(?x+Yx-Tg!3(@P z5_0UR-6qgyfy*xoN^LdZ0f2RZ4P3~f$(4)B|F}(m$TTd*T5b06%BF8i3Z4(WSsK(@ zHI_+lPf-IBl{OYS+&h2*I3kSeZRg9%&%DldnG8;1T9xt`K974U-IK+Nvb+;}`Y7_4 zi960@=Tx=*AL%Ptd?FWqJDk}7j$Hfav8Pj6aq(EtdQ|xQ`Rhou)(3!`>1<1`U|ZwC z*k@S*LmT#c*YZfuE&eud3QY6Bps{j}{iRA=qQ#``a%!n;CQb8&N~;T#*?u^X^|Hu+ z#B1Ar(E_u>;4SmSe2JoJmgh6qYcoIq;2?f6{*hg-_FgD?lB&#By>sN%^=?MgNYch( zQb0nJ%Jh$<_#a`HP~;0{Fs3+IkNdAk{V!?p@3BW4bVk8bpZu4WIvTy|_O(w}qGYAC z9>~eQgX`p!RdR7r=gIVAn{kidS5b*EP>H?3uhdSuU6S-h*>uH7^r%E;q-T1A-@_YS^_}hQXJroXQB5~i z!TYYkE6$M%FsCUI!7(`{f*atY8Z1`@1E3wMRJ7Lsw)=Zn;`SsYw}TX7)eHbxhNbED zvW_2^j`{hf4_sRI$0~;MAs&m_;ONzafx!bE_1)rfW)g~1+Pt|U8$6@*QfBqKS=Pkk6D#?Sn;EenF zk~c?&d$0f;Q#22Pl;9JNBquBTe0L8R#@dA`o|=M*-kD533{+ZnU{#-WxF$nTCD4=1 zd~IJJ*aUumwcY=s-Uw(d{2o$ICtUI)Rqr^{Wut^h0MXjMl7+1J1V@XEb+|SdiVbhX z**^=C49>v3J7(wARs^=noG8fww#pv=zD|n%i+cP`3TJSv$`PrbOm7V@cox8jFBe%~ zzj3aTgXqV;R(5w@F!>R}8B55IG^k6{o;ErMuX|Mg-7K{Ot>?Zwv*>O%6LVd&DTpe= z&YxVtVf=NOm7lAPc~(u+jVC99CoH>*Ae|BHoZ%X4vl%U&os-gvfE2^2Y4B<|HyLMj zQXbC+nJ0_T)AMDj422ln7?-2`7P_}U#fkxT@S2up(>|QB!}{VK-f0u$!+iV0T!+fs zhTN(0IrfIdvvD`GV&$nnoE0@+=aBCm=-#%t%RkcHKJeY5d7F!CCb$?`!+}_#H=qDGt(p<^zqM zo*sC{EDoEja@{V1Il!X(3(chtHj~e!JCv5+(hd{T_15sq)S(xL&EhwTcFvKF05Ah9 z>d7=v_I%k!_>7|cvI%BDod@I1%V;{R{nz6%VLIVr2zkvXyMJgX(NrLw2HKgo<01*n zK=$Iru$|@K3eYJ~>N^G5B#ZoK;rd@vqp$owVv*zTsjiZHu2xL2#&5#ukiF2h=rej? zotR*PV-phbyuu91zPQI50FreupSMCt*YX@9)$6+Wy*m#L@tkNVdhZTuoef=FS}RSF z;Y6&a0K%YJ$ET_7tmLl@NT%ZO=rf6#l*1$zHbMyB=je4X9*Wgikr=xA-?p&gcf zgvyPX!dBjEAH)Gdcb^U}9g+49NwisnexLy4!>0d6IM&O7WKdA(r}Wso$>Of;qT9~g z7Ty~{juD<%vUDE-swy8RHFg3is4WdxwH3v*g&u4;fSklxENuAhK9-n)dm~<$Y@*b+ zYTrAmUzB=#8pQmwE#lQ{OS!?U6&OVCoaq}`kVxz3c<(P8Ms6n|rpsulUqN!)c+i}f zR*z30Hk(l~aHa(s2M)4;en6G1p{Z0tsXZxpZ|gL$%u}R=I*b^a!U4F#LeP#yY1H6_8y*Ywbl8%N{O@d4~^8 z^mulix}PFgze*J*1*npZYjryL^ZL7N=(i|7mwyxciIcAS8xDs#NyEu}lH98vREWa; z#VX~?!$lu5wLOhdsl!1fv-4~I{n8wMXoX@?WxeGLystY3m#rS*hNMdwA@LHfEp>)O zN10MdIMqC2flk14=+=43B0r$xX5K|UIY?G0@r2ECNz5Er5pWU5Z-1&*FD&|FOnx^} z{DTdp%&X&Uk*7fRUAp|74?1}f?yE+?=X4kFFz6Em)BZW`^8ZH0dI5wJ;1yLfKT0dESs=i~t%I$=J%_h6T*RJs*w zM556I$=H1w#I@vtydtmt1EK>>W@uj4#lsDG41`K+nRrG6(& zP$pBZ9B>v1HJ)?^)DW=s?&*Dt`h@?HXt|7!3>XNPbN5P4yI$^ zdUQM<)xl*n9FekrjuKYh06XZxC~A2!BMzLjZ@+*NY<~=ku)7kdJuCN<-yty&%2ka2 zc1nCo0aYts)@V#~6F=6bkxWcN#BaVBhv-+YUDu!Eo*i+t;iu@-7DgHrgt^}G`K=9C4VaQoa4k&q*3XKJ5I5?PV64&8~EfNNQ zUn$Eu!xW3|SS~*y{}icm2ZGfU3)6&~r((Bn(UH|7qN_tgL$qqKhSk2{;#`yX7HE#- zwtE4^oL4)2r@BTOe6m7*?iR&7|wKdyD7Az%r+UW<25g?D4_t zV36?oJqF|#WC@$2G3hnLOihcgV{v9e1OYdY;T%>QWIWENN+&@KwFdh!tSD=78P;gRcyHqC51Erl;4Y^J?K( zf$GCm0q23I-k1+;*4>>xXw`CGf{uU)M;0J*Q8^q$$UR_n}3~2E=ak~$)yQ;^W|=R2h9#tS9`@k(5X_#^JP4XHEXsSiE`NSh ztxzJfG46lm%12LOx^cRKq1wN=D4PtayW!=SG&{@ziwiQHt}Ib3`Z`a}A37eY)eEI@ImQx37!J)-pZFd=C686LE8Jb( zv7aTDTn`bRNc_6Hxl49Kz+5@A>AY$!*XdB`?KK}x5gioIAN+O3k4bX1LdbFd2hFM+ zbX~62o6(YrMD-r1o0d2&#e^5XjCH9|WP3d`@iUcAoiM|wRL6EcoGxgR;IPvEq<^#zVTKDC+e zNj%w(7>J9DJD)$#*@Mg{b6o_?T)bDAtxSFzkJ)sUCw_F?{iJ?uVXiiOIL8`+K;}1k zsWZO8Y~QC`aykItbm%pomy_=LxKEc#XIDO9ssT4)mRwM{M($l<`C zU0zmdG45O>7_=z0YO8&i@W>bVuKmmNdPek-coe2`qVV`LrF4?WuW;SE3kG-D&d1j} zg&oH}M2-zI2xz25WiHoCU*4-F*S2#FqLlYZkBH0Vv)ooBQjORA9m1NB7VsL)Cd))D z3f_RjD`n$;2;@YMDGsYOGDXTRwPWwqVEN+1xAJjpyT+*t6OJ{&D@7C?L`Ss>1x;2e~Q|wkeR9!Yp)bY@)$uUHNPQC_k z{b-M62p}O^K1~t|(xIgCC2|V^0DAqkJT%;rRtf;yLMibqrOwcGCao-J`GaH)b9WB} zDr+50(aAO^nxSX(Pc%=Cx*$8FvSa)WYltQRNz?*>oLC{ldg*Ql_)w<=WdR|BNJB-` zoSIZ!kx~Dt2MK%bJnemSG2sb7>Ybj48Nybwyd-Th;1{PIscke_h7)SAF?K7>R_cR4 zKdHlOY{n~zxYXF+MuR;slV&A2%2;tp)@2KdL)V7mH<9;fc>{z2N)&km{0}u9E<##? zv>A>%7C;}k;O!uGbG@W?>dW?>0z@YUiNCSFXIukE%Fl$Uf^p9$FWvC=7sLAU^-PlG zmVPsUKDe`y0O{aj#XjvDLnV2~*OCCL+AOp~MAPVpL2~(8E793qSiPkM9<;r)i)ip| zc)1yUY$nC;zi`Or0VIAfs)v4F?HQ|Zu(iLo;H(&r#Ijq<^}l_~JXO4;+M0tD41`pC zdxJ=jMxGqPbk%)~ey%UFLhIe)-bk5h1RuYFnjaTktgF;hdfY;cp~Z?-eR|$;MYO3N zH(EAYzD|9a>N_E0;LtT2H?1DUTr>$(v#^obB0t;8gI z$`Tq)*7^IkvNWyqx}?>8ruBh7N8Gv$o+HI1_d{YGPm%7yH8jU{Jibbw3J^D7z=!H( zyOsyvKv4Jnu)yhOCCK~7DidyU;i0?Ms%LZ!;iSD;H|h`X%1;^1XG=S}czz!Z*{DCB z6q7S2wj0lPCwruV+>tnAbrO7LA?%z{2SZo_-?r3N^a+VoS8}EKWU3~d?{7*KVNh}G zewV;p?14N9e-mkyo&+84ZoK!5Qr;Y#DsA;e^H*FBLf>7$5V6k@$imvrA2Q9J^&*@# zUc>Vq)ctIy5cp_s94B@?Fm0^eXMO=s7ki^ZT!&a9^*p}b;m;mt{p~Qi+o}S#(kDdY%mUd{F@sEXND$COYlf0;!xy41$-#K4h?I z`uJss4VusJFX!ntM(1l>jq8df9KSXkHs9iU38fKnAkMcfd5FhUDEXuozzoZ~X( z1s<7IwbOOIKTC$OAB;|XxX==f2u(`wB|W|Kn{6XtbixF{Y(gDg{I#)_MLbJNr@1WPM zQq#$sXh6eJkfxCJ!q7X^eK}oe);DZnth&*tJg^1r3#WwAqLlI!Pvfw$PT%rS+tlGZ zeIRJ-lQ-S$4B)-*S9E3}H4=yd2Fg^XRecx+cs6V!xZPe2U9IOoc6Hpk9`K@m@Ugqk zhcf$Yb=y@ojomvLs z(5BWrOn5o|h35ZT$-8x`^tT0d;Kdi>3^g4wUhI!_WqMuiXr?>{QEiq(r-e^?cRU}% zW8Dsj9&Z3Jf~O1_pzXzDSnQSPxJH_#bb(L8_WRezO%oBeokrxmldvgZ5nSAsNwlV z?1VGdk=slO1$x(!@X~k`>>tLB!gZo42t&L%)|ZIYu=={=Y)xcn@y6l5ke45wKyc0x zMmIhbpys*kk>B&6?H}(d^*h$zkSrB3tU*HcAM^9|#IlSH5pK|ew@hCD{&_G~?AB(q zd|?;5c*3B=?-%t_qAGC5MY^oPXqn~e)+*U_V=>ZO!Xe@6QYh747!G&id6m8i+ zQjDfRwkx(Yr!fa%o8N^qc7sYpGpxiD&TcwFRRO6|3AQ1-(6t_f*y1TkirFJnnMpB6ZTah^-9@pSmy zj@|h=6``Fk0GD#jCYdunc6OCBK2}y?kY%Pbl({{Ut~R6tTxu^@d2)p#Y2*};TF6Ke z8R65{JYCNYSJx6)5E7h|Wg}!cLSUdc?k$*#+~8Tg{bJrHnGKiJ;5~U}xx?ppzLPp@ zOJ{XbV0Fw5oV7S%?x6&)%$VXgf4u|hj)9*+Ti(ay zdu=qkYcwp)oa)LSY>fu!wJWg=cToy~c=X3IK{_4m>ucJ%0`CCm#1(6G zU%ku~C*zgK)Wq6+m`#rdIZwzxLtmnH;Y*J0wvRY>J{&%9EoRO=3oMdqoC_cPhEIg+lS$`SG~cedOBBB~9{-+Cw$ItA%UL+O zw&U|Tv@qfZt5cUmg6_i34b}VuzI}r2C$YhqY^at;ZU12*Xl(?~a*Tr4>n9(~%Uqd6 zKYUtvx!Fb*l(7ps>Jc-AOxc8D!R^#9^Hs7fGtwsu6%3y#WqVv)PhL{k#kV-<*`Q7s zdj&dWGsu+DLyaBxin6^X8-+KY@6Yvt+6WQJo(25bnX8nt?(*!Gk9{p8UsO6i15vA4IxUw@vsD3n+EOv*a$3HK>ENG^D8aD$T zo<>DPof~Y9|BbQF@P_|Fbbs4MIYZU47x=AVy~R#2Z%JghfASbQEJnNx>f{a|RR zhB17a9HDUbeJSymcHm1;aSGn$6F*=_5@KUXZ<0M}A9Ph^Crwz@x_xLQ=(@{@gniGT zi58RP4KqLRF%{QeUk6C(Ab04yW4evr|4=G$i;|ggf;^BxOm*<)l9G^?KRVt3X+x&t zRz`6kU3bG#9P*p@W-?us&u4lLt!WkWg%VoAay9ajqiFyU5txt&meIVVmve*cIQDvi z9+AG_e&xsGq%$JXF+U;(8gqemZw|MFIR;=;Dza2OXBr3;6{>(VSO-gJBXuY7vlrna zA082%cF;yNWSq4oe&*!^v*9KZA>ftm-uHGro2Kwn1V;+wfYY?av#OSXO=d@czm{v# zTW!}uNV~hH*fRYgwCC9ymC?t1cD=3Rn7u*yFj1owfi)%tso!FwV`-8Z_HO+1rfOw}+d*cL?^tm$6))yv4vy?5kIQ*^_tHX{swAlGE-WlOT$rpwgMjt5{;lP4 zJE9O&q#f&AdZWuxK2Cb=@9z?~R#Fm#oLA|{SRS~^e;DeBQEh-1u7g1<((0@gElOXA zTLFAB*!JG<3SYRAu~r-SCTG~41ibbJn55_wRVkwM^AIT-Eqvw)rbW7L2khpV0P-;{ zm<->BKx;RIAiMZRW!|3e$a#6S?$n>rsk#%1=nszB)b^imL%6Kw2*7N5MP$tmD<$STc zwrUVyk)4;>V<^fB&1n~wfO1p)YwM%J9jv1V3VdaBJ#q;SiK@0&1&;lM6EHax4cp-~ zr5sA>JdIzYRk$Oh`fW23<>VNK(CvL}h3yelu(ICTA&V|WU@B_Mwe-&Wo-+@9ucFuCbeonR$MY4M26&_c{EV*zFKab*%4Lh8WZqdgE+$i1_b;@# z-fvGPVNs3u-Ors~9>zEOzvxt`-ZEipw7M~b`QUX$1Xye5SFp5fb0Y~@8kJsD$5g3c zxv!^%GW^(^VUm->OUI_dUoAc@KO7AOWidSzG zqKP|K>jfmJ;@CA$9mxDjRmfJBucU^%a=Sv>4L84~(?GrWpp`UPKUrbHDTS zkNE!MwwCd*`dI>eELEo6GZY?+iipM_e6243J`{D`9gu&?_xp?1`&qK??t-eW^Yt|P zzcHblxPRNJ%eGh(1pil{^v`Si-yS*B|8dBXV``uSWmIX4><<&X9OM?Oj*tRSXGN!S z5CaD~y8r!Sz2O;gy%O5Dnayai8YOn2lg_32*Xn0$$r_G<^ zkM_Y15qu+Xw|Fz*L4qAWD|!3X2%q7eX1puJPf(#yDUBHk+36K};~Q8(W9iYlbo_`X z7}5PI@Y^VU%s3x{r+|ra8BQSqJe0Ii`LAI8wX#Ru8FiewoQh7O@g!VM%Q!!lGvX9~ zVBkBMPmp*yw3ZK7Te`fG)*t6{4e|b@)JFCtv8-CjB0`?nEX*r(cY`5Ppv!vzH+N9W z?di%qw9s)*+ui#~-IB&uOhVY@UB+sOLSpUaOM#3h61cG~YHC94fATwk`g-7*Q~mnJ zPFw;;2@Y2og12z!M}x8Bl!`)~oA%WYVYP4hYbNq4mAerBRNzbSB-3#{H6JMjc+23; z>|Frw?#a1`2l1a#>1&G}g~F_6#81{bwTBUeMw@>O;g5+vx%NO|iJh%9U~CnE34;ef zzQcl}QOx+}K|@j}K*O?47Yhl0^ACi_Hj5Xe`fO!OKM9mm_FFTM4s&YV)G>DEDnbxvZn+}hylnV8#uF^oeYeWobq!& zla9?ISC-_J7|>0D9iVFMc`wNVlK9=W?gqUZnV9AT>F@GnGltl%ZKG3{1|wo6%tGGQ zIJ}IMaWPEOoK?*ryK`WjGS05jrJ!E17k;pR`#vty=EIv&k zDXr|7{j;Xcs2k`ZTX`Q5jIX`|Pio0Afyi5i9302`KZJK=Zd-s48L{>)q8+jogfE$^ zcW?Xew_4VLP859E#!-6%FXbnvKqthW+R7>L7c7(PXs1C`uM0vg%@zfjVqhq_N&#+C#EB(Wm%LYR0FsTd%6o+U@E&+8lC_W zCq9E*XO-&y>YjoL^?lJ7pA-Y^X3eun`Iorob7MM^#dCYoet7kfYEobW#E@j0(y zSZlTIQ>HR`rDbXSeAflkto7YY@zV1+TQkfRDOR{7(kL6Bf9$VXT)i1CP4~oQ`4M8a z6m)}D6*IbqmOpMIaCwr=7CuasRm!t+k;2LE5#qnth7EUG9;myyp61WCNSdge35z+t zzqUB9w=jAQT9yP&d1UH%>VGQ)sPV}jRGJK8G%xOqj){WrwtXS0k)ifQ4OwRsQ&m1c zIH3g5i<;XknGah*2U+wtpPIA8akwzOlnC4p{m`7%sS(f%G8}<=H`m($XKk!eZVN*~ z0`2`4eBQ^eEq{Ke-zo`<=u+E^S9&HKV^K=W+t#q(GI_IB8(qD$njaVojh^mW{Mpd| zerngcwRc1xahX;6xo1P@?d7m^R6AFBT^#ncr6^lrlgA0y;azYGM1F(blS!7lg7evm zJm@{Ct`>oZp4pk$9L=;PcVJ#TL1sLGJXBY#TII-_!=YZ{{rpu7vpxe{}0es{R^}|`1m7S4h09h-MA!Zj!k5K*3Ju)0Y_w!RHCuT8G}A( zyN@5QAeRZTNeQbS)q5e}cVjhi)Ip<{G(t*h48ApII3Z#c$FHK2QoSE%kfi6HysdKr zMp%VEC9XHbj<}9o)Z1v`Pz}EK_ff;g@B!tAN5r5Zlj#YgxOAT(>N2-Gj``Pdou6U=gks+H5nCyb-o3H>Sb!@y4|UTpI02~M%97Ig+6LV?AnsGS~K&Xq)P02+KKZwfOhrO_GtR5%B5M zVB?4a>y$1!uO|(zDnkgnkSQqn8HJP-^QUO?!zpfI+sqmg1@DzqvgJ_pvAX$A{TL~{ zc@1u7D>AT#(mnzI>Lpl9^lkZ;Ncb|6aRo(6?Wm&N;r;KJpT_V#yY2$%UYSHHFQ(Ow zl@P$Cwy77)=i@*lNVV24{7>%(cX3In6-2$2+HDfT!jMVT3DuMJ7~~0hB)Q+$MNoo_ z5kH}!8)flju;Y5UT;;hVh=1d<+2n?mf61y%EjIbaep=|m|#YLfYP57g+$98ecPBt_2*I7;?t$R$?(XjH6c*gwJ-E9T?s_YyPoMiw&rCn= zP4Na_t@YUgCK;usIv@e<1P-qZb*ARMu;4Nx?Aq-Ubq>}(KzUUPR#wGjiX*8v_@8W) z)YSC)&Y&v3MrOQVWE2!tBQ8*WXG7`j&$+PB1o9<~vw3m^M`dZ7In68i^yzje$!WnP zTEXxS{I9+Po?$H4fQ24ADCI1yX2A`4FZmXt1Oa*mhp9=YWNtYE^8B9;%h;NW%$BRn&G9EZqm)>ZSSXpbnQAznO(mg!+~9#)IZK zA6s!B=55Qb`z&)wbVa0Gp(^XwbXJfB%75*~tqIaP5P>;73RG=;y@Q|{a=6z1>YZjpa(zBdab~w(Z6SE8F`#=yP1i(>t5v zw2)^b=11(HQ&@^#TJXaMTpKs4AkFb+$yWi?$|;97$e=mo&<ZlU?fT2bjWspov|l<)Us}oeMZJHd&x>qXL3O$)L(S+GvZP$iNfb3J-NDR)g(Ja z4cPGdfV_JekM0koXt~(Pqa0K&v>&T#k`@%Hoj;z=3t;m;!_jGQI2W2-tXa1=*GR6H^+P#0RyWc^mvcE(X^o*fduLrtKLVE_pK}TR!d`!*(ZN}3nD^x#n zE1b{GUzEXWFWTFhp|)CaHlGCE&Wt={7kfcdA9de($D;N7e{rxoQ)q2{82^~>7!pZw z|GNu-%jmDC2iM~?a`zvL{{L_RWfuM2`x!cF5BnPlKAE!qP@vH`_FV#tiE4RJJzmKZ zg~x@H1qxz7ZZT>ld$q|KMiZv|a%q4XhMh*-O2pq1bm{g&wt$wZfPq?NsF5)>#$J29 z8ooGDk@TTm6%FM|bGZ!VyX%?&)uxLVcCVwxv4T$~f#@@k|&O_U~`#L z@Ip>Zr+IVi?CLE8F+WKRPzdT6(I*S&?B z;MOICWTe8$Y1-e!8gC5Q6{6{(w!SMY{OIKoj=Udb zjkHdVOY>V)pjv=Dg0ocu&+`50h4vI~1UJNHPfsf4)2U0)Ipb_?9UY(QbsJ+bYpQ!2 zVT-ofBCrYMeG3<;?)tG>O!0gE*mOXkjx4qpo#wj{h#*M>;bj&Y`v&#olf znH3i-0@>Y?yIIjMl_RgzcFl6$Wd6jLm6i$}3n8b$(bkVg+`HLj%e9U> zQ}K(XELR_5to{a5Hy`1z$uc>pRFbA@)Oc7f2t;1lQ$K8QV{S%ykdaOOBE{gRE(~aA zuC1*_P=9-S*qrCjn`}1|ua<1HG}5WAdb#sD#1V@r(h*ZW=*X?sb^uvw*}R?|$MiC( zyM!QW1EazQPZL1ZIPaO12+A0tcY~dFuS3|8o<|d&?oL3jD*tVZaq@Muz@9-Uc}U& z#5h^Kd#Ahfh#MZ80`%-}6XDf+J~vVTV5{+1a@BqdJaeKM(*;ous6J&8ux>oQbk%s_ zDF~c9m1~>qtz5Q|jJjlLi}Kwog%w}At3!BYx|(c@_~Y^W+$G9;`vpd?mlntV56&xXb))wYYFj$DChP0qjET zZxy@XN6R63K=lF zd&JT47mFgJI`xfGKVY_+zve_AeNNs6RD)ea88&NoN#jpk^f< z%ZlhMt!xI}{Hfh1 zEpbwRP0>TCf!chS;B89S+Zv)33+rO1ogM@F@Sf*$PI7Yb!t8EceLV)v6lmEUFIns{h4Lh0g%w4Z+1|kv=jy!17RG*|%6LO&*gazTpfAA(@{DH{q1;^9+R0rM5yQx%#zG!Ml zY<1kYyFfvcO6#+$8N7L`brME+He@ z2Q%8_kX%9dTF!#jTTwrCwXN!Q@uoI9rzx)oNc*l8>+|oANIA@yvVo%~UqfTvxhmdZ zQE0E=?nEy-=*I+J3KPq|fQJ&^b;E^mM^DY!&k7bjybv=q=v`>1Uq}*rJ?44(i+8FW zbSCaLns;vbBTD=FXd4Z8zsH9WhrVjq2V-*Ir!-jXH!SMXasVA64MB%yKI3wm{{~WT zj%E3&XYYziC$GFQB*MNV;{=ZW_54%-S_ZAhw2A3jV%hSl^>En&0O5-?raji4w<{+SaU}|4r?+0T3}8bY{Nnw4Q@l z%i-52hdcZIgUL%*QIZvVlp|v=?js^L`3`4h z9=spIu@Y~KTxUfYn>ZLn1>P3-y&6mS93^#(f63jGrwv!<_7XtTvL7U9>?4!>2ouzs z32Cf!QziJs9V(obMD-=OG4|V6PRyL-C`l$MMW+1m^KL=TFLh=bVHOs!EV`6gT9_u{ z$lJt2kDtF+YWRv&=0%r}Vvw1BY#XcC8Po8VFX-#o;r7Ylsd2w^t^LNk#MWs66Bc~G zYmW4?i77N;B`cjD%L}BPsvxHL64I#5?L{rr>Ta4Jnck!nO?41n(&W=J;{tRVrQ3=$ zCTFw!<{ka`;yPU5!kG*U>-cHYy`hkWTLpXqL4QCS@HBCNPAAOhrz7TgveI7O&R1FW z`*c>KUAUsOK0mn!2H~o7wKp(ur?iGI2I>k-8 zuOIXx{&pL}wrIY8Xc0|!BJN=K(j!_)=qj2#pV#puVO_^0YC(4@)wYO-%++J{QN|{* zNrjQ=PMjwV@JS`J&~Z$ZWf$Ph-FG;0aitpfAq{W+QK5L!a60}#C^EjNwouTF4A(@G zjXg1VK*~5$w})25auUXA!fV?j^@#SCMv_Qh#=(XPTpJR9lWKGzX0Vu!cgV_0>*Yaq!`dhhr|go|EMRSpQ#Rtk3iby5!kv2b97{P- z!Ao;=z5Jg{FnXt-#D^eJXDS-XV{aHuqs6|+^Rfq4>T7RLcen`C>^uk=~RzZN8t=#Vqily`jUk*;s{G!uv@p zPNL-_al>V8L&^OPSWr~w@ci3-pYF-*IKB(%hq)0TCIJ+eN-}Uko?WM&5RdT|Vd^$* zk^{L%;!3hzmDPZ+Al&kJIHqpeR3t#-M3+~`;mcJ&1c}jOw`uiO9~wA+x^j$7KDa5) z5LT}p@@+1N8W|Vs<6~oK`Yt-g&Z{l+@@vR}su;O@OHGFz7Ja#uAGn{!SpTKo&Z+-A zS!~1h_EdiTj@yV#$Rh)2+lIkXd3FQhrGd1DR!us+<|D{ELVv>7N+*~UX=DM(zCP5L zB9t$*(G~GQx0)#I@=yGjz#nVCjlg`FB)>ss;6Rz+l^wEmHj#M;pFGI>?`vNk>q= zVMmu&_9-NSsFGt)3@P%tvV~3^U>Yp>Ro4j)@bc$jBZ-~WR>|LMmv?|_0}ILBzrex# zg|(rf-RPbx>?30m9=gdlIy}(~&}Y8g)Pa$el-~vGsEB6+7o{LpI8*W?Wkv)sMD2xI z(h!N0ZNgJR4uSFgNFsHCVqaa5QzZK)Q(CLY`^i5El_n#1TYM9a$0#b9l@xMyOeSGK zs8I`iA6`OQ#!L#PTOkbd@OlmCfM`@SWEqRKWnNpsc&cpBhd|{RM3Q~K!&r+U&gTH!ob3d)X1gArhpP)d`GsD zCX=z0hYo;Xv<6ul8<8Dl{C(*Gf7K_{=Rk?Hc5v=lSh3^VUUi5(f~ndK*IQOUDXRN9 zPV`06QYoPy;xYTr+6mL-LzrYIhTgiJ;qlV=mL*md)ziTyZWqTyhhJu}g;U)HZxb9^ zd1TrJ8E$sC_{A4N+i8vb zreFuwX}js+kh zvoFQO3=T*I)1QAFcxKO4cfD(NKA+HYY!E6Uzd;B~;#RaiIo#7KD3UlY)x7MTSX95Q zLdc|ZedDxRQ6H59`G<>4tG#E>UmNea899cFdN76N)=eQly~v+o2f(1t*e8uX zgL?Pg)^+CXIfWHlGg@@o z1|i>2Brdhim0L*y{rFO+7p&V^Sczkkdo_S2beSbTP9|HEP0G5O*XEGArS8yO_5Gau zfF6O@{wf?Ik>EuhycTd_@Ji5r2q2A5&K@+8F8sHjuh;s8cSc-?oK##~bfm9<__tM8 zm{|=esfxi{Qn8-ZlU2*r2kjJ=HLGX=9CHwH_hUKl?iHy(8vZJIrU0Q^U6CM~>V z+4XL<7A4;jB41)g`%-labUB!<(+ySoAthnDNmroGcM9}l`ETcS{ZerHwW|6{($voj5F%&34HQa%SwzvDoy8Z zFyDQ;G^zPzA@%N=JhoRmWo~ZzCWX$8&&&>;JPi1#nlqAjNUeX$bz|3Dbi7&`EEwU1 z`jzQ5!2AQg(<~p9mdPMHM4AM^oH-^W5Kqpr*(2=jn}iszV8=Vk)hx+kcW|#X%|cjM-C=3 zBt=BN!^6QT%m_BwY*KZ7yu&(zUk0;gpNoR!Sd3bH*9_BhTFanQDJ0=3#tDWVZuK;3 zk97Aaq39#lvvI+-H{o|PwB;>ZD9mmKSDi1T`6`dFi-yUG-%Sm@`*BRstv(`dEF7S_xU{Q`HA$5Ri~A+f>8xRN_Z}D5?$XiHzuXf^l&8XkIb*R(nfZhd6+{` z{b*s)*v@{1@mO{MLU_j~w05{RZpZ+r4>)8tY8(muSneLtMs$fi*Q}+X2&-{;Ci_eW z!iggX*u7uNWb7k6=!a&Z@w?Yx0DAzmD}>nq7iuN&@U#R>w>JmRe4p=mB;wSZBLYieRh!;h(@pH&*Td00_6#`8Sr;?q_VODkgu0@F@CtU3J=edhs!3qq+V%yxi`i z+d0qVcA%i8l>|m88fN9GohM=Yscpbc7BK4KO`71pRP6GnZ!t6!cY#9k=qrtmc0;Jx zaEsVofa4NL(-;TcjUF|%@gY`5&hM$LmN7D zr@)UNvwOqa5@jKaelX))2bM5~AG93}ji75-$*{!kI4qu5E^zB5H+MdkvA>k`ghd`b z*Ol|u<{v;z{HSySm9iOtzGk=t3ki3r>CYNS4e)wO`t`^cQS-*bkp!@QjA8 zFP%m6loPn?-=!qG1VOHLcE+XuGt!@3toAQ@$v#gl0987dsH4$`@=pGX{)-L-mBLdu z#Ek|Z|Hs*vIg` zf+%}*@PH@+&jjs1Rmdb|hAd39w6f9QT(XiAvAfjqmlCrOPj2fd+|`XC>~J?pMO}js zM07bLv8dqFrVV;xf9T;*_3wn!#Pz5OWJV(nC_%FK?hr+@Vyi#{Gy9o)=)q?<5$O0CYu!&@q=t$~@IC?{r928B_BbwCu(c!XU3$v(_wX%ccKkc%S+ zb4A3(RdrIY7ks^I_I&qfS4Krd&*`-kIT0v6MGu1I&Z zS`=Q%S(LhtSTlYq*E6}h&`Kf{t_}2sf4Z^{hJXh*UrLP01tC<8pAhL#U1g65D1UCC z*y)WElo)0NhjYENdk7!Pf@Zs)d;`w?*`5)C=9@$^H_&5;EYfoMIxxl$!JhJRL_pw2 zihwOY5S>(|5iPM0xi{ff{zd4c@?N_E>tV#+|N9LTFg+;dm9Clatc{PjK!V7Vp%O-}41G1V^b7jS zamh_b;lj#8ZZ1h!XlN`;I~7jD!&Xng^~BfBVFZz^T`q_!sEs?KyNzEtxolIN_iCpD z4x{(e(+ZfR$%kR5Xxdbt#ytA>_JPbB?*qAU=IlO`c+hkR{Tp9B1GVk;Rqi|88lRr4 zE!}}hDU_tFJ=vVVhO{3g);evI5LPzoUEc)s?!(TOJG45^m(THnZvG3c$_gCJqln{g z2Y3JA2DMfuhQeLQ3kc}SQS_(G>-@~9xOjO0CJYhds~xB>4zAjqg8PJnF9imQ;Cf%^ z0Q&YEAq+eCI!t;zBL2K|I^T{l3#0@g9-Njtp8OFVSjlXstperyd#@Sh)+wn7wq2qfO8&9ai`<_>TJy^GIQk5B2Var1y6RK`cD${*W$4GNtal(AmC*zcSU_;vGe*Yg~}q2%qOUCkIM3` zK9k*D+CoaCSNN(D>#Yf<(ssKmC(gvW4W_kscq(Vj7{I%K_NUC0$OZX9Km-$5@-+FF zb`G#|jv6r43PTUC$#z1$T8EaFqWD9j7&*EKSvON&FHQZ>Q#kSWh(K{0Z+U<%#+gXD)x~kPKAD<-j_9C6$ zY=S1EArx-=z0m6bA6NO6CP88aOB%AR&&o_cth7UXX}wH$h_W6=ytCAQ<}8E*-Sylf z0S*M{H+3twcs|C0F?nL81 z-u^Sd3W7^6tG^%m| zx71gIj_0gAr%9p&j`-;^?*^@bU9DB{F2nH`0mB=!CB=<>vvL;)i zPW=)3RGG$(vo=jxv1_knEmca$Cem-)>`odtNr)(^Re{Xxki4g;#iKgaHqf`}j0Ru^ zA$mIUFxsl(z5d|TJTlWv;0nT(yI5y&>LONmgYT+*Gm1Q%!u?tt(LSZwliXv9#a*dg zR|?@WuSdR>CcjmM;_pC?kthXX_}d5JJx~@~c08G;yAGSPnhyPi6Y?@FG=*L63y&Q< z)gh;{fi4@kKhyJZYWurWBE`O|)Mn?K@o+4uy?5J9KmSGQ-hemxlcejKc^&w${lT^`;Tc|Nawr*~X<)xTkVT0S zXPi>48$zS&RHW5pji62uIxGztf+u4fCaonV`Xn!gJJ zs|G%v8bBXYkO*~l1H~t&uL>S}L~5(+D^Z#arr*Lup5YFze+!)`fe!*6nr#uCMj-s`FuP`jRcEER-BRlyNwDZ$_qS+EdRZBS_BHh zjS@LY52f?jn8mJxO3O&nE!ga;BQd147Kke0Iad54uF91#UTXRlN8t$HIUmM64uRCk9EG%qQswW(ZE&@7Tv?By+vwZEm z_&_F8dcS!P3;12v=gkTF^OqI+Z6m-0M=GfG<-payT1GqL&61i=_LHjk>b_?CMTJ^* z@}~ETY?KS6G7w*4g7HY5BVEum3IY@QKiXu+#Wj(M4Qi>7!Ql8}5!D8{qaphc?p(>S zdD&|YlwoztxqHjYxyKoY(d9GV-47})Hi7G>3kPRe$n~iN0e_*DEnK)2p3cBXcp78<93$bxPLhAUgm_q zMvr);=OL~ueo}-a8EsE5O#tz-)HoSUe5+=v}!m7%j^9`j48W5)aQsyURx3 zp@pyc$6i9rUcdwPvJq2zO&j0sCAssPhHuNGD%ljDV2`!d`vCT%!6;wbxi(jVulnM{ zFlgf*kxl@yYtgj-=y9_}`P;bYa9Nze_>Z^$BfW2{Ne}wa-19xo;i!^tjQEtCJQR)! z$ydE~4fjp^F#o(;(0>kJCmT4JfjgD2dL-np7GmEs0Ds6{7VKn##i}Z{7-NUiC{>u` zXmZN)S4e{J(g@#2oI4JhX>p#WN+$45v-&*QHeVC4l+_enWucoHC)|K5BLW8wEceU1UV@h&;^t(S7bF#D-YAQ+>= z{s-7jr?-s-ynWPXY2BDP0yn(esgjzehs8n3Dm-wue0hiC1x^uUE^uUAC#(;u>h3P1 z+O#@X(LxbISvA{p7O>XH9~Ir|;|iRhp5>i(0^h<4Fm!ocXZ7eJkC9MVvC(PLfI`&? z7FwB2CO11h$#0_q;}cDXv`V*Al)uvK!4?IcPsB8u&vN;^#mRBKi~{jS?()IT?^9bP zorx!e3RRiiT3rxvJ(bX}%9OO$UaR05MY-L6q=w!=aw&0FE!Z=_k^fFeOJdG=ppY`e z^OfrM%9X6*xO^aSbam4xEc|&;0hbF!n*j;<(@plRhaZdr{qtviB1Qf05QT1pzDs-| zqX_SsA0f+RVX~8-#NsKrJay~g$~5?uz15pU@i@&Z$)-NT70OZ}le1AXfJ%}f0s>(< z)JtEMI zM!SyHZTOg*Bix_ok*GBuJFgg}S@7y*e6>k7<8x0f1L=CRjaFJkgkFfKsA`HvP4P>2}+~^2JdA}MOC62zi-bclLRq1+iaQ-VDaI%mn95A{z^&e2ndV-_n ze@!&2kQv~3-mPg1mXJayNKMHCEx;(aY%+kZE0|RYH-#d8u%Ku{-m%9aqNa>TB7R3( z9}v)OA0O(*a#m@r>XdvAeWSi@Bz#MN@RJbEe)bCM%W*rATyC%!${GLN8UPWn=X1fp zz}m=Xl?j`KDeA7+IZ5ar|HkU35u_z*=wkZ&C zvB8hAd2xvWpYRU`j?464!=A_K#Nhuj?8*LX*mF0E)S^}+`AUx*Q#zHn!Xa!-mfr^h zgZW)n!}l45C^|{lH!M!EmX6JmDPO?5$vxnADUe)#eOaB!nuu;W^!4ew4wCI5#OPW% zM(hVQ6j~}=t{bU6Nq&~Ir7Ciq;ZsacV5_Z$CHANYL>!!HP*9i_7+DX**(mQP+BWB* zF~)?2v8z44E5ve`nrZ)G_D?TEH#&s|PI}Ama1PTMt3{At3mR`)q19@vxy8x;Ru&q2 z2;nBy+}%TS7J4{N*AY=yY_Ai9pB4M$55O#~v;>E*W327N`*bR5n=M9|S=p?&iJ;%! z+4Jc0M+~doqi^&R<-uBkcPFaLPJHr%F~rmbVn|p%+)T@`rurxby%5}<3`+N+CHdM& z;ec=KD^s0^U^I#6-yMi)^_7KWXr zlI(a!HmT}0E}~H#`ry1YG`Z}@Syq-8Z&ojpX>B)4I^aFNlH)SX7aP1T2ZA5m4x6hS zFCK?-62c-ZJ?axNqJ2-za`<)d(UVerF29hQFXPATW`Yh$Y4gal^t(j8-l@$hNA;z9 zizggr@o6(jiURSbLL`0!TnW%7jw1?aWJ0H?Gnh(?Fx$H&wbX)Cl^7wK~ z>rf5_+dn(N!ypEC9FM=JIvX>i21jjqb&JU+ahTDN_KWDxInHkV^t54au5OQKISVFB zRQ+@9T|i$Vp>k*9eGEMhD&A4W_|oJI+&9;p%JhFs-D0iM|K6gGg@K{JKbq8D;uVU7 zER5qP|E+fW4_#)iJ;u5@a?J$v9opJB4b7oyutaO{fLIV|TPeZ!V3a;za7d&_YWQngH6WxW5g7;Q{p^g#s$_jVsMsI?D8WfMJsw$qZ z%HgQWp}CEY@x1sUUbSD#XqgIYV!(-$We<%`^`R-01#Uug_r&GiN`r+gZ`I6j965ZM z7>>G{N6&=6@xFDd$rzR=w@o&2s7i;ntO`VG4#`UMqiceVEJu&wNwfVvUFE^tc|qY0OHPG%V79oH5w61FS7pP3E)crWdFQa~++f|}ru6Xkcy6LqDg)%l zOY;!9Qo+z-0=ynUshmRExP<&^zMfB$$`oUc>D9*4)L{7KWcIR%`zs>E4u_vN$d& z_z)8w)xYH5Os$t?oYR(MMk~o#be#Ki9++FMHx{WxCQ}a)c<@c8-Df1LG@r}+^dB&0 zXwJXwcEW0Pe7&vT^>--A6OMlK(6~SFUdQLUDKq_f{XlW?3r*_yd=|_d@QKuw3-l6!T#F+CodB2yXTtE2a2hd zhk?A&#W8<=Lz<-i(z|t@9jGQJNXz=Uf^Zg?1tAAs;;Yn6yM^&ctlSM=`w5 z=0@sJo`NW8=&D%GYbqA3`VOo#ny>bOTk0Z!*yzvsqs$Vxlq9fUg z#iRP=`5kTYDN?G5D#mgrZ&yeToATy5oG4l2ugYpJus=zUypTV0~Wzi(^*!S)UtOos@5(53+5 zI0b4ua7SyKMZouQe(JW;arnKVM=z?6im%O$BqUr5J5)7n6x&w6R2H_Q&~swtwWn`U8EDTX z-<`y1s0Va)wIz|T8mu@&R2nJhXc!v~!?W}7tO&6}KviZGflZUO&LEtjucc*VMC|Ms z?sk`7`Mbs(XGf7Rtg^EG0jpm{kV1~6|HS1RT^SI9BnF}>UID42YF^N2nr4blc+yS~ z9Q!rpoc?h0V!IY|#Lodo$pqhEMg2njG3Ehb&%aIe?reyUz(dV1z$>%ytAgnJ`k;H+ zOa~P<=>*bO{Ik<}Z46H8`|gySHgJeo%EoxG*UbE+qAHwM94$$u8x*y`Ys(jFC%$hL zg-=5FmoJ^Bmpq#(&G4}=fLcl;J*uOjjZugN*1CIuEhvOMBKRpwpt=Ga1qVnSZ)`@* zz!Y8Y<;pC5^>4LwqZQvnXdKToh16E*bgS8n)M=b$zhG3X<-@HIahYTIH)D-5$F>He z4bDHNCvY~@XG~UQ0B|Fj(}%=(ZsC_Oxhfm!!b@7rl)92hLOK9mWA`UUtLD}DUSm8S z0B@eghv=PCb=Kdibei;S)+^E$98tIAhK8>WjP&$E7ux?Va~L`+b_60kt{SAq^QY3f)bwgC}P&V<$P1~aU!_5o!= z7Zd40Y0P}9bj5Omk+=~RACIO!9UHuJlYCxHThe8h zW=Aa)Q){Dj&+)0P`VM4f9(+R;2y9>DTXR?>Yzsu&mwSOCWYY4!*Rfls9G;r8;N92B zzF>H`{ms61nPTB`GX0dXnHf_M!(FV>?sju*f#_33iqs|E@-CCGkYX_cQ9|4r-7;sd@F+?oo`v;^(A0ME9cB4 zjcQXH%{SS{c)f{5(#bmjUYmxJ7q5?svXI?;Y393_0%rW+D;F_LFM$o$H>Y=t`&>%j zs+_mG*9(qQwu%zc*SqJ1ochL`dyj$^U{jah()v0%uY=-33a)N8cfyCtZQloz;$*8J zH4z^?V&`++e8;cD-TVIluPwr>3;mYw(#QFo!!qyxC+z-3=*uxB5I%RcbG(U8m4>mX z$wK!C8-|2L68qoCWk}^tdlZ&X=_;s)ulaxe^BF;UZ1}5BVkH1Dkjb39HQmF?IIlnFICXa}a`;U4dV-}=za}Yd-;qZrsGgd7x32HUr?pO#=(8JgQ6b>S zT*~&-CbDQ#^E7ey?|H|u_OmdRM^7hcf!9l~Nc^uvPER>KO>eR*nJOP7t+`8U`R48S zig?yOmXxGpV3{t4HUwNOC>l`ef~hHCN%&IJ6%JhP)Iqy1<2*GWLHbSrJE$;(-xwric6;u0p$ZQTxhi*Rt1P{&5O{m#zl6v-V3GM& zjNh9#v%4z*h>1z770>j==Tp-PsCyWetHTdMkWN2jqNrn)03{2*RJZ)}<)SJ&>C#KK zv>X79mIX%pWk$gHAs%l!z%#zrlHAa!v_)~~U8L#iiXBFuf0}bOC#dFz-JKrQFCFB{ zW^b`zf=J(&qH|Tc^Z$y?tZTU@4$bCrzP8K-u0^BlK3t~kjwqB|L84@o?WEr7$PF@} z7_%#Ra2Go77sNWjngQIA>}Xz&)9o8F4(L)>7AmR+XS}W_c`tG^g_^cb-``&FmzL** z-Orkv*uUQW(!khkRP30<)Y@G(oC0dDWGdSnR$9=7&)Xw4D9+&hn&UCAc1pyz%MtJz zfBDV|s@t>B_IbO58BBIgj|eA}w@XSHoJ~K<{^3~_w<1BjzR0s!{G!X+ph^dP?7{m? zG{MXinCkFwJ*KE{V%F3W~owXVF<8l+6Tx=(@e)5~P zwz%bLE2hu$u1eK2|Mihl=pwdBdc;?-V(05Dm*xHWIxH-qBTF>jvzbHJinDVU zJxQaICpF*e5_)6W@>tR(Q)}k)V;b3(!|hRR;IQECVaS7kS^=ynpT7viHRBkzo@X@b zrpHuwuGh=m(6MP|SayedgXNCk#-dOq#07l^bSnS#qf%Aw&XnK_h68sOh~s?JQE#Jg z8E6RPNB@ledP7HF(H(S{vn#kc7hqA{7WXg;UX`AboV>kUlG7c&KQ>|d>gyL^`wCCp zV85gs5*{w4?LPDLvqKZwOkjCasB&w4sx&72d_UV!?0khW=)vByV(Rh0ar_)|gBp}o zkr^u|CLnh1k88f{jnJAub?`bEKfGe}Nj|{dD-e%SmdWOW7JqwQtXerX?au*pBK$gD z)tp~VXglwGfPE*b+8L2h>nZK}%Yd~ND#_&e`f+*1a- zuGcOj{=%8d{z21c8^%+glUf1! zY&Y)%xwfv`9hvqQISa;lTat^H4C@xF>gT1wzi?h(?YGp?BJTf4`CB~4bHS%;In&<3 z{_4>vMxVb+*+j<&q!LGtBouF@qDPC<_%OS@0KZ;MrRe_FS=#^&)&Cxx{q#crj?F;~ zg&D#B8n*v4Q2*EL{a=q=IM4*2ULrI`KZ3&8^BT*MkexUr&M+dh0NnVjtZ46Hiwn;+ zl>8~tdB}mxi15KPPw^iw)&o?=)>8w7rnh2-GH@Rh8z}2M!G;o2iJOeKLCp16Lq@Fy z*gB%n{r)T_+A=sOXTtSVQVR)3|Aa2O;WiMIJB`8h_dbHdLK@TGyfHadZPmhzpmW&ZlvRCb@!Q!Jz7Zn zkPDy(i5C%ZbieYtq={?@`y~R{Q}ZO+px)2`rGgcVsW8lkza$V)bM95&h-<1&n%a}6 z2!{}NW6i6a1d7UG(0v_?BNtDf_J4pnhYv+AK`-4>^Ya5eCBSK77XAVkZJ4k1o#^cq zhBPdf4XM#bI-i4d0w-O{%r;q*fi@}c7@RKl_J;)#8Rhe}gBUWUvWpGyW`BwW!U=n| z_tfzU-5V`PLU)Yh?4|CENnb}FB6r+aL!p&W2}6QHSQ}w0Pqm+hSyfNzrd8}P#Yw6F z7Uxl!O&ec$)MWga8&h${>C*u@=^yAwfa-uJtZ9y&HjY(BqTjh1X=HaiDxox&=D5 z96PreA!9Qx9-d(|hj_~=0;j4jvDa;!9qfX-POD=NivWPfx2Bk=tf~P`z49qF2KpSD zw23;B6ij*1X#$XT@B-@8*#XtovOV3C?Q#B^rEpk3DeZVxC;THWYO!Ho?LObo9^+_z z%3e&UXQixM;=QuU)0TqeY>oe*`QKdt3_xJZ`4_)-cjBW?;7mb?T>z#wA1Dd^1-l{X z20)-^g@QMAP|Wum3fK#)!Bu|l3}+ZdwW^l3_qXz4+DthI5d4Z?8V z`Y+}2aL&Kt%$lRa+0+I4vZ>T*TiN{C8yIl;BB%`%{HP5O-SiA#-5q?&0HTsO{~x;E zI;!e#+Z&ed?nb&Kq`M@f8>B<2O|vNh>5x`HkdTtxY`VKq>DY94cjx={zR&%gbMAef zKiOlAK@8?vbAD=??TTCW1R+=B(UoB5$^y9bf+gZNE zG%b>#6xF=lUlQwM{aA_(1MF(Gr`-qkXZUd1_oANWt-cZs{@Vas1gA&YhS6%IOlb<7GaWCH@rKF22GB)i$p1p9b`t17t8uw z?RQPLkGYOn%ch>JXJ_v7diNvO^6v9?W(bv0&4t&Ib5}cGqC}5>v`yUZHMQyFEazN` zDi7p(h&&5UcU2rfBHCV%vdF zO*XMLZ>@@A^W*bk4flxe(QQ9Edd_=~NM+7EUaNyTU68@1((1v3T=-_~Yw;p#dgZqY zgE#NFkOPoXhJ?32smRx(N3&A)&I)4b2T-o;mzL1K9eULJX9&IkZjVQwJx%TS0$2bki z+oHJhv`a_MPuGLU#-69e4WK*OqLJ@NX2T9G0yY$7E&E2seo8b8ck`>FRI-8VeKF_` zi>A^Ynkz;Gf7txIx6cm^QuN~8Z%9`MxLQ$xElx#4eq^zm>8xQW5^k($dw6#Y<}ELw zlt_rC`#UR;yN~d{L^vTO-Nsw>?{#0Eq2{1#IjCp;%k23U!U0XU5H7G?-QN{`&hUH6D3G(m7TvWgsJ%y!0*mCQaogCOhu8 z!|x#RddFLs_euyKVKRm}qb&)gb)Z4DXAGT<)g#UNUH1*(@Wmo!r`6nsr&wQG!(3ox z(RRr)ANl4~@TccV(?l)h`}gm^=x?$~;d2n5Vre9)-7q;gIs0ZRtzx5#?{*(^8sKq= z^ySRUj0Inn7R}5dDjC`ih^9dMu6_hsqq4`K-hC^W0 zIfEH(-1NTH8lhu*Q^;!Nl9=h(y%q^&e3A9Q zRg_d$#5T47=^p#SC(jAeap~*0sj)7XKkA@|(|#H&U^$MplUwy8to+l51+BvcB88@p z!n@-2>oav@roywWR7SqB+lw^RF_80b*Rf|+a)*Rwa)yv1=w3~+ny@mva4UM0?y%ti z5N)vM&8;;p+_UxJ1emIOR;rHw>cD*nGY!|Aq^&0Z@eHCM-)o{9H zG#ZT!@zvVEe-Mu=(V8L{Vodqa<7(HVRTwKQ`dswEZoRDp=pW_nKTLietahZnM> zN;90FKRqKnW%)qAE`%g5`hPo+_A>U{-Aom^)Q6_&o!eYG4@>0fnhYdn3g7KLNnU4| zTx&i9xNRJ;-e5+&Rlxz`3U!xAIB1Y#f_GMAHhe!o|umRrW= zClD;4+NH+ENd0VItQDuM?scXYMli0pTG7Kz8Y)xW4Z=cp+;0D{kI>UTt4@+|hQ_;7 z1#D3|{4VV<EGlqBBzWR9%q8zPF=^H6!&s0|8?)HRisw=NOb$)XYP8&%U)0h?`n$FfnA&e37-l zfrN~XKgJ}bQIQp=%jKm^1j7hV<}ERokq$mRJ@vcrkieFFa6wQT{2@Lc80f}D^q!?c zgS&LimLn)Qn5R}>ld?0oj)e=z{--oXvMMR3Rj76Ro;dgWP`9isYLSC>4;50PMw?4* zQr;SM#~b*0`G-eT{-?PEKV2L2>yLyDFF?H&a#l(>1Q_ta;t}&DCH9GhefZpv{icG3 z%S#V=6bi%21ED6S2s^l6!P?PLQG*)e7AqY=Pqii6sUGsO8qwSgOiY%T7PA)J;TDf~ zmp9vz&uDgBP$o68NEhn{GFcy6zzex{h_>LqxWQe-BjB{4SlD%25KF0{5Gyh|=?lUg zw1U1v6%>B$J6bhWz=#W}V_PEcD7`LySFK_?uH=d%exb46RU+Vdv}bc(b)C1iW6*1= zEqXao01}xtH3U_!L*j>%v!Y_H`m1keeGMgf-(M2-u+{4?IyWtwd9Nb*$xbWP z?$6hk2Yn$j@;B;Brrs;$nbZ?T`&xTaKTLG&47N> zEOzAj?tU3?+PHl(yG?UWwRop_)cldM5sQK4s%hVu`WQ~%7a*J503y1;?m$7V;b9!&+=xQ@z79#O|at`CEc4;U>MYZyf(TPaSTe%6Gc z9XA|K(R55n;^)FPF^1boBWOI{Lz?QC?>Neei{;d=JpG!zdD@;nKB+BA&R@;L8qF`N zhrfqY7Z-~<=8hbEP)@Ph!Wf}tXLm1j^wcZ9Kl8Tt-zbk?-M$+(X}zrev?^$}Ua3I6 z6@czIYk`c*k%(i={N~oLf|6$7hUb3I0(S>NQNNQ4@*!~U3{WsF= z#H=t7!SZ?k+kws78MsQx=s~B106Dl3liJi@iuAd3GOk;viQIk~TyH#3)oL$z9*)T4;`Hs3ef*ebp8ErArmuUFa~mKcSjOfbBjN0)aJ zoIdui*!EFlkj^z1Yj&P4R}92Fm!z_C1*(h~ZJgsEQ&!E$Sacrz?@~i@})n;&^vEz4OqNM=NP>S#cKB_nnL}^ee_$3kqop|yQqI!~L zF4f5@LNZy{8b!^_)uDb528BMICUb7PEpY9cBz0S; z>BzCP$BboE)OeBGr$LF4flfYI@bSL*mi!Q`8E4a|Rf zT zwW*ICne?C7nZavqkD>VPSS>CACSt%NE^j}w^Z*N#ocP0D>A6S_?1H#5!`IKM_eYJu zq2?pR)BN(_S0d}V<+(IJ;EA!mdwLFlse=B=QXk}yrcQaCAHzMKwIl#h0C3CtFfcG! z&cLCNR!Agth*tWXhkemq4v3E#yzu|BhhvG_0W9&xX#($Bw)JS5n#K*rfc@Gnd)0z= zX%KIa^uSTap>PN&Im-2ATQ)WxjG2ekVybKqPd~i9LKPFk!mFLn?{`%b_`H0vr%K<& zW-J`gU@?0wyRI{3yKBKer_)8LC;jSDp!sw|Q?C|+j4i;~bJv)!m0%Wm|MV#y?y~#V zedw7F%@hX*O+i88gZa$Ks^p8?X{eck;5e1baXk74`HESi0wPc0Pw@58P?I4cF|p-& zcllvCs82D4Hjl`ty@6I$B}iy4cgP}>Rn#ogtF z*tBLT!&Buw7;fW8^4DeDD5G$QJ~levlhMrdvP$M$&H{!W~K&wfnQn z3lW90(?c=AWPHv3SJBsA&dwZEAjMdA1%sfPL0=TYf%C151DZkDQ{gv&A>)`ejyk7# z7Y2N%w%e7Lh^Bd|$JosFF)n%|urAu$_@}5}8{)KK+R}uMqKfFS?Fz_o_QO7W6e~fYj3edg z!hxU<5=gP!sJmpxz2atx)UGf`>dddH;X^=Ts{!DdF$C=b6`qrQz#$!jP((Y^=)O(9 zQqE5EWf5=$O-ywZQ7T+VbtuAp3r0plEm1rE{6d(XfNrvJ@0Ctbn}tPA3sb$8v(q2# z;gUbvhcNX7NSrQngdR~ZVNB7dJ^Ga|s-(Us<`CDKwFu^m)bPFS^-=?I+Q!B__xwfq(6ent_o=8iHV6UpB|7{SWmM*^pdJ1KvDRw0u6^F z>cgarVsmcMhm%4MLeU5?FKAhAEBmQsU0o}8*&mZ5&Z-;j{Gge9V{EsU3L&$>^v#xD*RlIl^E!yD~W|HZM+es>2ZZs|Tzc~+ec)4Gf*SCzAxZ=EwxB~ov!Jt*|78}6(1&xc%$6^Id_2#y5ze#w>eOT19a zJn&A`KUBffA$kq?NAmp{QHpVc0%vz&mJhL-*y;O#CtVo7&~*my!}if%4bQ)!Vdl>) zKs`R@L8ABm{p0^}YNe3vZ`GD$?AJdN(Q>@ohHP1Fv5z)tQ&JHw7f@^k{mM;Wo3+Ub z;fLt2jaTFFniKwJCQMGs%Pxu;G$e34i{iOX*KIfjk`3-=bO>>4C{O#58cr&XNXOET zG7+1*WD$5=HN=9wjo)d!IQF@e1ywF!1rxvn?Wav@5 zxjI_WNCRgT=B#kqGL?9%L5gpqI-ExiB0_KuVuauu4iClVtv6Bh%9bUPZ{IRZp55P?*& zYwfP_A)gv*j>?dFAjqZS4t41=P@7^RFd_8iyt}r^04aQvuJfvLFU(^|+0i4OwQTPv z6iMw>!aVJaE>CLoUC|6^j{V1)0jXBMTOhFml#XQng@k?MH;wW+-@_K+8HqWY^Dlcq zn@Usyfv8Fb6~hCX<=mLWvbmY3;8z~4R{e3{gac6aduS}Br5Rvw`SqbrpGySU4MNYy zL1MI7CR{VATS-8Wh>3YOsjdFqhvlf}GYVNOzZ&iFJ3D4vy+FvC?J>;99O|asItMBT z+0z&4Gy%#{QUF3N+RF9>xJCKDTyN!fm-A@Lx3WZ+Fza}t=1uGANxqfnEgQkhiOMy* z@;VW{vBoGW1@2OhVt~@XFs{JPRf98RkM#6;Zi*IVN!RpxbD}wZoiQp;F-ugv@dbhj>ryG~oB_zrYjV<}{x2xz4gn&#;dqmD&$ax8 z@Cr|dpf_Q4{aJB`L8Im}#-Wfqk60PDPL=h$-BWMmRKlCRv6O{#1E)HMDwSkPt9C=9 z7+xB)*oth=o3j!GT$-^iDm{~0)*n}F0(IEng%+|11K@u;l1QP?3ZeHYTB=m*RUY@Q z53J$*@Rh+gy&%~>rnReQinzGRjEq@RqFzWzad(O8hns_G4{-M_i3#e>J-Jwi~{eI2>vj= z`RzT0WYE|yCP}gE#F^I&?U07ck{*&;EVD3B?bO%p^QN9;V{tv0sYHH5)L^st+Jl2- zRKyzHd_kU6FW$VaBTH%!;YM+17)+uHG`Vpp$1UgBmR441(OHhPYMq&X)60V_6yhlD zXy5b_0iei6MzQxCB1?&BKe@U~sgx{E#YLd*<^;=4lA}(!XK?L2o&Z+veie%NN6xY| zNYr#60zHxX>FrwkCXz3@F>K}~rFNi|IMolBidB(6!boi2pysr$jkMCMhC7Iw`?xnX z?QXAz@ovdJaYN0?&ok{-f#jAJF_E6(YlO;RX5g{22~_D#_N}?ICwR}=@tTf~^6}>8 zR2EwpF*!LIxyYo!<)y^k4m|@S(^=()|KQEIqR%_=BuK|JV09{c7xNvkJ4jkgnOp(_ zhlDYAlpD4V66BpE&bp=Gf~Z0(po6KOL&JobZZ|eI;)2;(^xQRQ8qqL{raXu?i!42+ z0AnVFBYF1}Yj|9KPHUNnk%JLz_EN_e28ll#Z3K^r%=#`Z6=nj9R>_N7r;dRn8D5w2 zqY>#43NA^M*p z`(%xTEqZ`z?0w4kq?z?`56w@^rtUAcL%>8Cy8OIqACTzq>GR38zUiu@7s@|{{{KI_ z_X>acCK?kHo&T)n|7?~eb^pS*I-ADrk3}z}S-UiAVfkQJtgd&=YJHtn$avOYLq7QS z^e~y<^sHByuWEG#OpLZFE5t}-bCp2sKlk&$WG_TLCn@jK?FSdiIyhH?LvQ9*7a4sT z9!_;{=D$CVLERgIs*u6aGC0)HqwM8n-BjhJz0qdxSL!43&84PMdHWgQ)4H|L^zpL! z!#a|8&80*<9gxomLy|~3-#CGp_lyGG2FXMG6nC1b!Px2 zz$|5evNclBxaZCn!q4kChf#?*`$0c%G$dXxh1`3lN7v|#^}NA!V=gp>tHf!q!}Ahv zMt0xzoM-+XFH6#*AoN}EOFbF%B3Eq=JcCMOF%z?(q(i&)(m(tEWJoxzmObqh-3;{>-CeK6r7jr45AAi3{fVbRafy~Tt z%-Ft^feQ&nA~IVCmV)txWKg&4NXLaLfl!Nixp2T2TeRC+o>rMC;%s$MYcm-hoca8w zfk7`fmG*-g$S2q{vJAgZP&t+C(f8|{RNU6ZfVT5U`zjlKus!`Q@myGcY4w-XmsNYV zm5{h_q}EncjQ-5?K^0L`RlN*Uy&@@vSig&Yo_Mxq1vqGLK zm-?BGg#aTgykp=SYnNkihy;~aP@vc`sze+>w|rcS<{BiwI+&dp99hCJw4yM&t_lmv zFT4YltDb8-0Nj+t?s$Gd{1h)kDZ(6zFLqVIc$w>Y=Bn4Pt@bLI_f6J>r{AB9$2-Dv+evT6N_;*n6znCJU1n}5>I{t{1G4rOCZou%%llt5OXevQ9s0hP{okysM>IYGPNeV%F1^d1eH zocG0PrREmTwHb%O=fDE;1uv%nO5qlQV8kzUFY>|X8Get`q)Fp&HY#s zSp1Iu@h%oepKvWx8N~|G#2RazVGaJ zZ<;=VgpWRXB9MoCb-BJQe97p>pSGuj*2qsbJtHG>?le#$CGc{g)<}65H9hw5HW~2; z0#AcFW&zba27x?yu9>p*(|D3pCR}1;__$=mI%U9+eq=m)SLuE~os^3sZZe?(t4s3p zAf0`ar)oor@1a0fg;CX9S4Auf2jtv*hR+lfZlOvjGgrV8_sX#B$`4cYrD(NU-H%&c zeL`-NW4vQGC?&6T%(AeKMmDVh5k?PyG0jfywh>^?LX89wT1v_(En#K3x$w$25-cn% zfq@2%Ht2lDZGOV0KAS}D94^{Y`B5vAAZ`g~LKf1~=8LPV5 zZF{qk5iFdIpV6%6_M@#G9Fub7Xpykb_WA9uU~9=sn8&4AvnSOQ)Mz`NeaCI4!UDMl zT2l7fA5j=&d`W=IKI1)EgrK^7xj#*jt*2Ja1}JJy>tT&s4B{|-9z<#koo~oM5H;PO zKYuW5gzd&V!Gl;ljV18{mzus%ZEh<>*K!e)iMg596)hMBI?)TZ2%XDHo~o*G@w^X}>oYS^`JQt+HPe2EQy>*0n4PvJW%F#UwplN%KxEPC=@N+Z ztxT&w65TyN>BdRF4xuzpu8v(_1c|zh-`M8~+xactPciBpR{|KJ#nD0|Q<)nEcI+Em zLH&Qq>f~4_TRfoL?-n*aG@_iYU^pirCbll^Fkc7LySJ++~S$ADyWv&gjV26~r zt^(}I^~+-~wEz^a|Co6@SSv>Ua-R|@lStry=|}#@%nL}Vx&C5R0dFOyeazTL6?fha zSLFEBJmMi*#EU!d(d{J7)amHx;6aH|B--+L=$TTq_kZDE-xI!^vivi5Wrj7#|MC~(=>1ifF^4lk_Yvo| zCn9+l-Fgd;5nm~!IJmrEa1s?eskx{_T-rAnZZn4UmwI-#u`*H*x&tApEtMpa@Ebn8-4Q|kjFwXnpBJNfGtF^VRw`(gajd2aqh z1}p}ea>5@gex_tmro;3ak&(}s@{>LZrT1e!eS&97{~&msh9X~*1t^D`XT9r|Fc%&y z&qs?}or07$^I2ijFkUQZaZ2>sou3Djv&nR$xp6Q48@qm(DR#-S|8*wHh+9lUgAgTn zRC-ICC31YYf2-&+fuekFV`F3G{#YMZiSy0dx1WOqD0Wx|D&%SGrt!mJGw|X_{2*)~ zI<;xHu=a=0Deb2MJPquxAV}etxnE=g97)5K#stvly&5MGJwL~^h^+#vUVOxo*12~= zIxA%zT8%i6);)fd&Z%ZLJsZ)uz65NMX?~;fC-Q-^__*5LrahelQ!B_!hHkk>%}Q-t zt(@E`2GT+K{Ue~l>jM(D2UFdYoXC`2e^vs3zpwnc{->y%>)4uvE5rSGi96Jf@kNNp zc$7>XN9_TsP+!;ouDp(s@(#unlOS1~%)5{vWu-p}iOreq3U`AeO={pYbwwpUNqQrJ zvaRF9fZGY0FSX*;A8P+y_w~1KXVd|n%~KL>zf0u*>5KrpYoGtJam?u3O^3cn9GL7j zGu_Pb9X#3!1lD&iG1)bol|ORQEStushW@y*>~!-J^2H9vR_yF^oQ*_x=y0^Tl17F5 z)su=Fb3o>}Qp5zW^)BBd)vbG4$W~EIEp-&B(}6eFhC`$uKP~w;i^UWpMU+dQcr;I` zFT-$J0;XlKVia~?f|F)JD~dM$0bStl4rrJyvgI_Ohb;==kTv$kU21uc`!x^M?=Hwd zcC+@#X2e0L7sexq6UmQ)9PDVbGBYLRfYoo3Xd1*UInWX8!C1sFO7^R<*KHxRL4#oe z&C@o^Him48Y_VCMt^grO1TIE0C(?qwxU`o|WT@@oNlyn~=CRuu*}`^N&31ZY-KQ1K zqW*(^b|CEmd2mIRUn`Yx7{6i^PCp(B<7%nd^&2HEq0a3N6Kh)3x|)TE*xgNox&qYm zwt?QJS06$xZiPJJUZzZ4g$A!T_UUKBaL^XYS1-NV2H)Vpt+2=MejbQB_qzd z*yASjoc7jSEu5eP>e%xw!_8rU6IgW^@8tYOcy$NrlWfrS(5?=VvXc|vt8E*SGqyp^ zwCP53j)jofabV;w8xcLe?u;Wl)o5;=em9l$4cY zDWJxxK+BFvr~(mS3#LO7o)d}XYS5s~o%5E;Gd81$l`Rg6o>i}epOp^;`)9^)htea_ zoXb1f+ncMk)u96eIWnnb*w$znVT`6p_@SX;c5ns)S{b_nLg?8e5Dk*Wla{__hlRLk{{ZX?Ci8j+LyOeSN=VQ!J(qev z#29&9HN~2GR5`~U2UK_+qYDJkJ5Yj6`uFl~_UIKq0!A+DvOk&?I~%_f$BMO?6RVL7 zpnZQT9W^OL@_{k?O zc9&Y4U3XzZY*7~dqo(d8LS_4DS#CO%CIk*os}VHh`X4T_{t^s;8BPA7W<;*nrS~5Y z`d%qW8Ld}s7LD-U>QwO5t8>?D`SaS>H5$-f+5a#(9nb^1_vZLeu*E+u!T)&H|9<(e zQ&0RzG!r&wGX@5DO#NLv}tBr z({hV>h?!W>%V@|l!^Ge#(s}=U$ux7E^HSE;NVYmDBIB4LDte0wnbNj0Vvr?~X@6{fgI@-cnQq|D;FBO*-+iFK zA9QUv`b4V-_p=oH=6@ zq_Bj;dYZHI1vl=KN!OTYP<-pQc#_#xUZMk*^`Kh+H~6OMaQvRB8Bx}z>3u)lt*Dh2 zKxar+q0U|ULK=owfY>UK>XATS zOexIDI?rCGw!sOZ3pAlEiZrOLKSh)<4AeZ=l$=O;HRLTey#)V#P7vRFQk8Z`PtN12 zsSj_%(2R?ADmq8B;0GtPwpK!N2XggCy4T*(ZJT@=ZwF|<$;KxphCJ+EJ1qzyP#clz z3Gap{7?r+&4QGnvVVGDFfd;<40{ZxnAXOBsTnrD)pbzq25vm0h)CZ(wI{(O?6BQNL zK5ZnbCuC(gPf0sLb104W=YC8NrpDxXmf+A&7U#Oxj+wfTTl4E=-k}A;#J~!8BY@$HKMsBhn(?_h*_0^BoL4>Px`As@7Ez_GBUxxwH={T z8*K0vvfWm=wC3|f|DanBC@iCZ05ij7yjaM*$K`pXDKkHR*=(xpT}1t|9~ES}bnYF2 z{`P;;PZczOJM>S4(_#On@c!Q&IsqYYN*#B|WA++-5~g!}koN^0do#V3-WHX>{iX_W5v8&W zK|3zS%&i8o1*srQ(Mva}`oM(tzR4jnt-JjBSq8qs>_O~G%mLpeMS~FfZ>&|}IK9o$ z`mR2AqM>*qpYmhwP{tj_Obo91gfdu>pH1PHb}gemKCXv2raoDS43kcvZO6RrGVe-+ zXTzdmmWfLsAy~6kK5r};lwVOv0Uw~4w_2SmIwM)N;#M8+V*0LHJ*;~vB04p$!c+PT z^7VjK-kZVOUNWA1Xblfv``}Q_bJe0lCX~ck{ElemjTBg09?1>0o(UgWJjtDWP7;|a zPX-Is$RO7 zx6Qm@HHhXa;@plCyTUvZRD9j}cPV!j*m`j#NfAtsIWNis&m6BzcS9Wb%caQz|LL;B zqJ#xnqc;BKS;fv2p|c_#wcE-1C@h~J9P+9&1%m#icL9X9RFa-yhNOyfJPWseCvT-w zP72;74aOc+#qMtV*!QbUhP?J&O(mkbUs`MdZD)mO#3%l?m7z3Z&PsS<6dY=YBiF@k zViAD2>ChTAFE?$)94&mZry1$rC^5~>83uAP%sb(K{`|SJ;65S;FggbP`~$!FN0v$Xcsz^i5*M#I(@kfL-t@>>95q>?5lC9Ow3Tj$AH=9f; zb7$(Wrn$b?Kb(Az7v=?V^=cpaahlO=bt?T~>S?xOauYM~T8?GJb(4)G+Qa44GawUr z*mCr0P-#=y6}ha_A55FIaG?Iy#rRo zP@EXJ!njiB_$#nPH3Sz^;6Ec2vmS46k?Ji2zGpJ;cOYSlE<|uPMM+-@F}Z609G`x( zUY(l1PI=uNNbSnt6e+j_V34hZq3E2iSXn#go!eulO?`)sRZ8!b${LzD6?Od&KF(zD zdx`$&vlsqBrSIEZJNVY$(bpuUY^llTIWtXT7I&uWKonc78dskd!Z%SMvRhEcu2@x2yb&Ny zsw10LQtS!(8V=MGOHc(GhZ()V>*Z%$I6oDCDk~Fz2Xy(L@)T%Rbiv$hT$#^^N<5sx z%Eq@BYaFcNPYl z9A)DN!eumZy8fbsy0BH8k@D{8bT*f3Rg#q$jHhsO3G3G1`jLHmn0$H;Z~TAPi*Z@t zVzt@GIaQ3bRU_@!CDF!oxZq2#d_j!7zm6$T149{f^mMf&9@Y#II-Qiby4Q{|PKM6l z*SF@XjTquG$FZ1k!*!QUf+~_>9tF>Z-~;~vC#IL%Y+dEjJ93eY0rn^iRn8wgLR_OC z)An`M>!+4JuL}9uiWOi-%c{%#u>c40b5kse#L}}*8-UiJU*d#qb_;W@pghw>aUDP_ z2(x1}keQxW|F%sZTLH$}57|Mbq#=sYY|Z2GaiL_oh?mn3?)VZ~kWNn~4BAy>BNYIQ z%}&KQ^{tIWo0oXD^(dT)ZT3s&waI_}Y;f(o=I4iX1h^d&-Y}E8oNo;wdmhT{XlVch zw!E-)N$ggcDu_-|!vlcMS&^|Rf33^EfB&A2iwkEt;8}cYI1{uo3;@`7IZFH3Y%jE2 z5sBv-h--AHfw}bSEP02(fn3#<_l-Tg z=B+MTx13O)272p|jg9v|*b7jEjr^0!Gh7l;iwKEGtcEj$kzWHgGn9`6=EM(dQqyJe z)}(|7oe(CAHrqwMVSO|Iu+mZv1VIYgZ~GL3zJ@$Cad2?K7D(hMv2>qTGq1=+LfDk=1}b%95f!!E&RKG^*<@Bzg2zo_kBK^be;a^YWTmj z><?w=juYwP$hfl=v8;0J{dxyx+Ij4LDhcAnX2$HV z@n6(OIHag@n3JPQ4E^pl-6r{KWKpS}O4y@O%feSim@oKFa#xyoz*jNRr~(O8W3Nkx zin}W7xw?KWRi<7+s1;+dJAX-3`Z0)o;eYMv27U1K$K(9yg2sfk7g@YO=X3ZXIBjfo zC?1Yvv2O-wZnxPE0v6f#PcK>=j0G6UteU(!q`^e7*%%*QNK3DUukEztwjLIe_wm|h zNeb*EzAX8=?t4@r_ELGOr*7HtRWI3fK)hym^Gf43wXE@(#^i_gi0~*FkdjZ9MsBZZ zVifW9Or)z9F0ud_zVB1%FDg%2L*X^c;ISYnS1?1Z6&_emI+9~NHfAC}{f_G4UC zP>cgtmm!4&nq=i5;D!MiOQAhL(r#WWTFo34PK*C6*hQA(xg)bP9AeShidVl0<`;Ut z-BDyG^#qcNmziRkGeqYxejKHOC{JTprF(i(e+@9>c{JQQk}$Lj0v%R!m$@1{Qa;;+V)j?B__On~XRen6mOEP2r$y52B>|@{Pj5e{3hsdpCOiq5^`aosLDDMAmNZck!K~yl)*UG?E zT;GrjWuy|emEAhWUMKK@PEn_O-n;bmsi#oSp1;bqW|O=!!9EMbs#kma1w!hX|A#Ze z7@EkcDy_lvYeqzL_A9{nWcxz%vf7X(;%c&EtNar@`}z6#&>SX>z@Ym$qZGSpsdgzG z|2q;zD3NlOmxsqC({R;?feMd(J%n<#RPG0DCuHYaVO(1GSk}je7 z|D6OCsJ&vN_xDuwic>=-yT&nuN5i4nQf3}%Nf&;GaA{6Jpu>lzKz1_jijbReR~?} z@(zitXIY{VKZ{_i#codOVuvz_=Mk+HzlX1eDm{6^|1#%Y`pIhjY{SMVVNW;#ex8+G zeS=1~D0zd*DWW7>#pYe=`SS97l&_>p|L?pto-kMKSsM*YhL>u@s(;o8#Dw%bI6fU~ zCR(|R5di7I+#LHzhgEHHuBIbjgR5eI!3r_X$`W7yR9sksDKS&zih{i85E>enjI6-uTyI>)?o4`l3V;!>UtXeb)8VSS(-{}lGTLoJtM}48^o$TLc~E+n6LiD%w{2Ey z%j@audlX#iskB5RR+C)c3fb$>ePjrI8SGL@qbJ~c(8QmUCUqEIs`Q=&mK}a7VfHSv zzKwB`N-DvZ>jLI)W~5tkkm@dqaE$hbu!lKCB`65q^3v89E(X|`Az+@@Ayqrgq^KFV z+!sqx>9i;U-k-t(~BQYbKJ1erayIXDJe1i@L0uKiSio;saOmPa9;+R@xx7j;Wlvo-7;|F@{V%Jn%A zxwfNY>s--JlcO`+4V@!Eucg^xd6e=%ZjexuPk=1|J#%eley3q@9YoGR@V7PRxc>B^ zI3P?7d13CD)_gW@dimkQhntlUoWXl#tD5z@ePyCUU?JweB%}YAsuurS)xNo3LF@lH zlK;ocy|Vo`W=Jw?%IW7K3vIugnW``%YIrz4s%SPBzDV!g>JWKTMwxMQ>vOasG|rwKx_*7M5fg@Vn0BkLrSnEzQZF!r|9=Pe2BKYg?ud#Mvh1t$V; z2SW9t4(O-DJfe=ph(G$g^!Cfna@bLzfO?k$*{$wPHv#)-bj3z!8;@cfW>5A8Dk8?m zNy+N#IHcmV;f)OmM(&SGRuCRqVagO0HF?5*sv*~08Pu-N{RAq7{sUb{Oo6{7gRDh% z*fF@*1j$x&++$Kq>-`Do_&{jt`C!@4uj)6yv@Pmfc*WTxe zV9Z^D5N0Rph@>~d$7ut;7j%l1**}pXXa$_cKjM2cro$aS6lGI5h@F$moz!~ia7zq8 zdz3_)o_5zd8!jd#LJhoxx~ByBauRqX9+@brx1ANFU9s*)`RmPqUc4vlCEpwF%#=7| zi}qZrgM;!aZ!SJdnRZ)-F}th8&9GGWZWabGs(@IfnW!h>FU3D`hIY%P$!LYyh$N36 z39<`}cEU^D7M`Do{TeuEU~R|@UYNbY5#a9AMBi+May8U=OlgAS`tTMRPv#<$}?Et{%l5I2cW2^Ecm>MA6Z~@RRWd_Jd z?|BjA_L=B?5r~agNyixI=S}0%?DH(872qO*-{v=?SkOC3){HjPS@qolV@LLpFhp);p#-?3=QXQ}` zg4>-;)Dv?}q_46(MIT|itdZ}`{jF2&A8B`@Br*mRgSf)=!9f)SZ+mfm(-h9%OkTTr zsw*Wm`eTiA-)mPYujX1(rR8zv^~ZWvRobSd-mPR&Dsx%TC? zBr6moBsNmIL$h0DLQ8ia)!NC;w*Pbz$AZpBJ&Y#hFY8#!qAd5@s+&qorAlev#gb4^ zfVOfUHq*qc`Ybq};b2ND3bl)M z`VRaHi~C;_bq2g-!C#Gv%1f`sf1k(yJ#gl4BR>N9d&;zR(Y9GriWai)ngVS~P9ef- zA@V6Q4qY%Ixd|=m@vhl}B~Sa{1_6aujDyt7uN56ui+J6&n>q_$Q3B=2NwlRpl4#}< zk^jw)*Z0a(LlC~)a&1PBvr=EYn%o`L+%#b8#l}cE;v6fM zOj0zR*d5}x6;q&fxXPG9I1b!_Za_gs`ElEep(V{_;B6~vPo09FH*L7rzB#3um(;hk zchPkC8kK`T`iW0;9I6i+Oy5mMiZ0E?q!4ec1ntYO_7t-|Nx@%Pz>}hM7M4-zQBlXrAD5HAr&dHQ67zDriPQ=1_`7*;%e%#T^9E{pD zF~%RoS>nTg*#Pp#laJqtX*c1)?7r(u`xT{cpcgXIHryL=&Wo0Vlr6}Av=rtPy9&1D zJIWCK%2MEakWN!*wYx|6b5!$s=F6$#iV*MCPph>+g9SH3nm339ffs`ev1-U=lT8$d z=|Kj2+i)I}DlZt$Y0+%52QOOK79O^c2Md%PS*CA$(0&UY-@xq%a+3i{dE0)<^oA|0 z`RXwP!_`ZDOavZ*oFvZ1{nxtsU-$#MwmTwX;xzzJ<2_FNoyaU3A5S92f`v#3 zNXi3QMm3}k{(?peK?o@1g$A`Se(`YAj_kXcE5Q690xB1)DYh;Dw56B-1TTpc{YMzK zGa}KH{HmrzLg1TM={_}KG3iGmwjXX94!viA$~Z|E*JF|B3&wok?uvH)Kfb;?DC)TV zc6X6*Nu^6L=djF>-~$7VVH4dKRo9- z=RD`kFJtr1^5ZD>>=bPk39EI)Yo&W#!PGKqrTz{2tu8n%x|I2s!fBf?i{qM@XM&3y5GzVp$zF9BY zehZft=a;wM|zL2Y}mS2V--z zqJ>ZHb9L7{K}!zX69x50KRdn*CZXUSME#i#%1A3K1&f;s!e8t!r)dY4X#VKW^0M}Q z!3C-KG4qVw`rv~?o|+8oooGw^d5AcEXK}{}vo-1F@UqrtTpO>QnYRwh@KT**gK5@F zHPPy^vBfaz+V7#yS+r%jG?I&99rO`GF?Mwe%VeziZz(YkzfoH?e^PnAsuvsN{=`06 zf3Eyv+fL${qE~5F-got#xfn<3WO_>)XL%l#HdoD1zFxTDPya-W|~alKX6{F50_i4yN7?b0%mG zhk$QwE73fy!UIGKe&dg{H>PurTzzL<7bSx1-E&L0u<&x84M7{Y#y zA|67^31IV}_lwU+1rrdcqc3++v3n)S87_ve{(PpheU`y0r~8RRQ31mzIGg5+gQp97 z?sjykLJ7lSTJD(jY$->7LW4h3+jS3vd^kP0a0(5k@AYr}BN2JPDB;Z%)TWjQBFR zK2~Z>ifr){=3>DtOc|c8yWMc8B?ry{e*h}Co6@~twZY}|KW>^QoH{=hS7D_zkZ+| zl8%5#^6i1d=5yZo2e506Dpz0qW0-e{Rs){vK+K;kBXfWvVteC00V&C8C4DoKPZnJ;zDrfsZI^4b(J8X>@OI3$dwJa7qi+A5SKO z)cbvy?Bh_|cvO2a_us?>xqiGFN12A?vYpERnxYI8OD5Wgo|*lrQ1JpsBE!w$$8(VS z^U}GPeSWV|TiKkt&I}=5qYStSO?Ixl&qa%B=Dgf3Wmjax54Z+-DRsoE5cq3OI8MT5 zSTg=lg+6VZHKVe*QXn=>IMnU&B*Yz!H8tO1?@E!#Zft4LNLeLaNm$9RRhMSYti_>2 zT(-TP7g5{esDyd{T+gGjCR18(d4f7%#`J2qL!6agNL$x1%Qd#()xkkBJYIItq5W9Si8 z>u6!OO;oc3hQ^J9rhS;%4_%T@;;C>=V2ta_^_48mk`udg61vj%5L}*}4Rj{`n^q7pY$YpOt>kkjop0kc%F{LM%dR zK~a{TM=*9Uw|33QySZ20n|XT!@#aTi=JcZ#`Eg0laDt`zYz^5wFf-oCyD7MOe>2n{ zgX`^Oz(0`~ZgP@6^dm>s7-JkVJt6pRO%5;4VO}?S4B=^(8~FEt2?X}@v*fzId>%3! zrB;7oFwYpMJ3ZqLQ&o)}!d2I?bf7DD{O}=jB$P8Q)T-Tc9O5&9^zbstUd|s59HTC_ zLIjWN)_iCC-T~~q>YYoqS_>@FvHJHemaJeki()@Rb5*(yj)BfLrYroftd7y$-ZnEc za|-xY2JQXsv^DGOQo~>ONyku1VI<*JuxnMTwOvjDgBGfn-sUs|WJV{UGa#z(b8y^p#h z&U-meXoJ%C~_hVL8b=BYY>9k1Q(!lT9KO)Pd+sS~eJ6ta+c!;-<)thY<%8(TfOBpL} zV>TLBwGfRC*tv8}v|+96LBnA<9&W364aWmDtc*gkt-2{p$v8?auC71ARMnS;dN1R^ z4T;+}#PhRAh?ahrt&QPvT!Z$lLJGb4cZ?3UuDYU`P2V#{O$y!Eu;dXz*$!l&uOi(ou-_L5eY7Sk%g&8YNnF@de*hM9g1yQHrNXTlQM8;;>XU=)Svm~S2)3t~~8t4Qad6zWXLjA)Hj zQhdajm47}?(STq|b7+WaMbd&%ypWED#mjJD0+tV_~*x8Z-fcP1pkCdoSX8ShwvJFm_S5dH+iphe4-+3#h)1F3)UjBh^KjAP^0iD%_igAM>HZ!u<3<)HeqeodChAy) z#_-0e9Lebx|JmOa8X?l(BnX|@Y?Dxu-iY~YN`JawF^`3~IY|sS%26s8NRuAjDA8+? zm}GOt6`g%r-%tUqGSvU`jkdo6l3Xghmg8wwkUaUBrcA9U@kY3sKgTV$ zwv;BoSz3e)#}yiAHg-l^LO*dPW*IA(2NKVip8C<=S9=)SCZ5wPS?qtw+~zG@{(*KF z_xA1GmiS=-#5TNy#_4KV{`5eoByuVjGjk&^D4E)59fra0-I zJn4-0`?AbZonKSVX1>&CYHCAasJ%nMk{HS;^!uv1`0;dhPfAChTx#CoT=G4BdwH|f zV4Tg&M5l!ZN*aC@vE4c1v z9Zd2U=he_8Bo>V4LS_m)puEJz1trC8EUy`$LA4ycnI|@A*gdXgb?Tghg(VkA7SRUHFyMW7Yc}UY!_%W4p)0|C?Vl9>RK9?B<6D!#(hN3P~gas^_EkV&YuvC_-}W! zasa>w5)l?=TyG`ib0a`QA(0_fBDc%a>Gs&mi617!F2~}U0xcrt$!%k-lN%S*aW|Sx z_bYR=iMg5zeUl`1?;TG!KLUS&rxJidGRGvHExIj+qVEV&<==mE`gVyUmFO-kDU*O~ zu4P*}4+noDx(&P-PjA?>KHKMGwhRT_^opUK6RxhVrxCozYO2)lN**SzzrBrMPN)<& zp5ihIGHYl^Ew#*-l6mSYsq{{r>(J zreEaUaogJL@I80;QOngT=PO%L8>26uUMpJ5jmqoxzn*r^ORKeCoksER23q$GWCm2pozf>o2e{nT0=v$A zymYJX`%F=@GYd3*-E<1%Ugbtq3rs)6fMK3V$il^18ALcscA8>rVr@3j(2Oc-h5VcKD|lG0hj z)gXL%DNgEXH5GnDh}^&a=Z&MFz6*pR%n=oMG_%yvy*q5#3J?{xK4=B5|XIj63- z(c`$avb9y(PUi#Vkk!l6@kfLlc70M@K8*1lB1Gix>I7~jNGHrkxwujTo5s)PgRL}ONk5OOpCdZV4TL3z=EO(vnOfQJX+=JFrnj!+piXHP zzseN%eJFYD)cxhvCeSc_joNw$hjC;hZ~DocKs(?e zJ=A~M)zt0sIfQ+_%3Lwv{1ehTF!!#|UZ${`hWrgLg2@?(j6y;5j9K;GNER&IHj~Kt zFSxNz&N`6E*y}Ww<%2NE>>d!L2teYiVm3u83?LOSwue13X$@m3MxCW4C}Nk8Co{KD z0WPl3H+TysU#yrXR8-60aCX`;8WEnZAA9-sGxdZXS4yjAVEE1p0z6oB0!i?=owF74 zqontSNxn)^iS%8M>+E4qYVXp)hXGNZ{g05dPk~7$~ir^o4W5T*_^gd`j&9yzc-bIWgUo{?8mD1d$gWHx| z9)JDkO|%};)Bxbrqiy2dy{7-PssecTf6dY(57dxlLEHq^3`VDC;VC@sfo+18TleSu zj1?=Wtnk8^ug^iWSek963#>#AI3U?yMfDI*`{Lqgl`r1hm10P?npD=qByy@)o>Xm4|Ltjl_g+Y#a;{yeWY{t76H7s!$j^aS zoN>*SIK#gudBl*YNtSr1R#r4^)80%q+d7S}3cwfB<)Q`m`H5w+ zL|cDwI9giRM6Iq=<*1BG+67hnYzgT{e;g`;XZqa9mhv_V)|E&w>MK(Ukxs}fkWR~= z2mKbKZtBC~yz6MvhBs6q@f^dI33;-|XOj~XX=5Z@f9SX-%B#!!6D+w(Nwn8oM)au< zpt<-e@P#GNOv-4@>6 z+F0(V6uxCFcT-25rD)o9kyxeGi$6e3GzT*lzRE2q0CGJMkwtmj?hgy*M-YS{LS8D@Sxmg~0Ib<1N0DL}EaO<9 zDA8uO6Xh$m_A)T%GfFr_DVuEFP*=C4^8lK)&-bSbx8rca10;tY`KH2+rugI2T&H_U z*ZpgTj-2KVgQPP%<8uSkUY}cb&}hl3DbxZPS2U42CI*LpufB{BH6DAId|wlD*|FO$ zP*vN{v`NAfEk_$kR1kY-ZW=2-`1_3j%C3;#qxS)j@2|SF6yeqG=`ciT&n{|y4s;uS zILd6JnEw@UJ08U6h44()kmMLXUt_IK!^p_#P~F_Dqvv~L+VKakPh&f8EL%Ee_#@{R zDz|LcB}Qjv%!|w~_noPZhn56Oe-djQ@D{kzEX~^-gD4bF*BYGj{x(wJW$Cy1)|#Ii z3k+kA5{#XjlZ*`yka#O@k0et8Ri3r%H+CsbgEX}m%Z}g_=d;ji23^Hm{C^KZw~qik z)x9_YVCsK@um3eGt^dx-V)yo)r#w0=9V5l0S;WXB8-o^FdA*Hq0V8v1Yb`Hdq;^2v|FSbR)Ad<@WOvdNpaocvCz z^yUUsKq{j%=k*6!dnO-l%l(`Jp=7hr25$TFJ|G z^s{KD<`NUybyRE_b#7xY{~flW2z)%0aty3hvbu?_(e{a1O}+uabfYj~ko>a#jpm)@ zR4z@JrbRo2kjI#oL(mG~D0sN{5)%v9G6Ln5;$LH8VUgnnDfWT}001>buC>gOh3F}=0Z*`?zw7HEInO&8WyAj?>eHCk5fM=d z3X4UlS5s0K8Hk0uKfjudbpMt|T;ghr6u}y|oW%xZFTk=@(OLDh_TTGmj#Yr_M5Hr| zNE8v%Dw!9iIc`YYNbE!!$CsLeVbMPABDDZkzBJqHML+F->ldeO-!gM)d$KOVDshqH z5Q{)Pyc#K{1>2+$>X(~h=@p&G0QRO7~)x^eZ7UT0`zurG$BJ| z-hQ8^IL0?@XB4-0(?yPtWgzO_1NmD?NlA@Q`(A>xypGYAm$M_>`(56LV1@c3wUWxx z!_8o-H0qwlc%Vp|6MyUWtHc1^uAN^SkKzPb8rH(`f8byLJBf2#46KMxqfN)w?*Bln z{$p8xNt{>iJ8a=?S^W*R~<~)*oT^V&XC0 z2Ze#Xe7fi8fku7jk>0M%9Mmcu#hkv3r0(-(ft0v*D}mvY3E;0EH)MlbP;|0Nr87s7 zckE-xgz6^^$~Q50X{IFHVq~h6-Arp7Z_2#l zhK54vOm{Q98-DGmWLIcV;K4_E$QKOBNU>yiuN1&9CsNX{@bP z{1)Gg_R}K$%*Fm~{!6#PgEx||Tw~=_L4Q`Yed*j&KG6F#nuw4V_7Z_pYZ6qL>637E zWA=^7-;t+&E~gm=&UH_8Wl9^xAYEn>?QD0mjafR2Nk%WC@`)vzHB>^2SJbG8>*i=; z^S!LS3S)Mcp9^rw&dm*w=`X*VL5HfEZOJBercO{&O??8JZ$C@ zKp>2DU-OwG?+C!)?O*2dM@DFtA`^b!3t^Ffy)mc#{2rm6C_5vS328}^q<7snB$9f@hZ<3Doz5VmSq|>w0cj_G?KdC8jIU7Hy{*7!tU-4to313;T)qbUpXuPzYf62$i9z*Y z<|EZWtNkWYxQdY3Fo8De7|!+qLFh4NBddBpqWk(^8|do5)fa4qNT16E$)suVTSdRy znC6@9YCoZ768zZaeA68w>zGc$iGnXU*voV*r;fOEp{>{$`U zz=*{xl_3NE_AyAKhAnVovPdi%q`{XW<%IAXn$F(KDov>XN`8n431_ zkR>&EQhTkFBwDfnO~=^(K_>vMd`C<*{T-LQdGM=Q!_l6r)7Wwebk}t7LMWa7E@Cft zeV=N~aY&2~j?dLh^<93C3LV`jK!4V*IL0n@()8jofwk0jQryq# zF*`T0y)QN7v`zj5R_!*sgSoZv530=$yU=g$?qXP#jx(-;OCa;VSd zE2PmhrduN1N-!6@*;|j_4ASBhhwE^=W{bL(R900E=olKyk08lF6qdYsC1V=9N451{ zV~#8_UdZ!`U$$X`IbO~9rsjlXB-AE#(v4I=Zkz(}3SmA4Lp@ECvm_z{U2 za{7x?OJ6_zi|AW&oQ%lNU4Z{c0B3mkM@R_tSKcqjc{tS$7z{>7$VP#o&=bNYKm;#? zIBy|Tkiy9zHz?K{9`GxtRc##TPU^j7)_6KEBh)xC{w1tOEim9YA*fG{G-lj`#+G3p zqsYK2bak)mRX92m;O(ch(%l3P-_e*0+Ak`da`yVB^YlE6wiCcvd#+ImYgL-`sIX;^)ixm}aP z$!9Zg|MU~fJD8Ya>H~l=IliX+s3QQGn1Gag667@b$^|-3RnGA68G|y`_lx`Ogo4gd zw-KfXCNup-JHR6-OZtZ5F+|HUHoUL3SulECke-3XcUFN1tsod88J5)MCYz3-yTCaz zd{pYIFMoCG&`NFCk`nG<)~RueP&Jw^w6?v~)T#H94c^68LBrkd_Pem$&*}FxIGIVK zyxj=8nkjJrR;4q9W!Et}t$V zz+dsNB*Lj~mN$Od{!?ScyF`fqbS@7dU;T)F#j_Yb>emG7HKRe})99FA6<>(HVd>MD zEKoPNS_tIZiVjkjO&z^j%ljT{zt4L*1HRleZEfuE-TbT$KWz0-F4S7CEfnRYTX!GI z_KhwvAh6h=+b8mxXwuG+@u-}d;cy=w7abTUk*bSnoCTyun;nwKU%IkB^skxn>5jfn zJ90_(r2z?LCg!<=8y7XF?bb`CovShEt}=tITqT&>s%@TfIN4IKSB+%-nj5u;pE44qv{cx!8#WwZ_s*yE69LG$nHKmB>YD9K+S&``Gr9cfWN9^Ri681heIt3%RLJm3}iCSoU51s zA6r(7aq9ZqR&l^C-01jq~b5yV1Iuu&jZM zf$}T|Q2!M0J`o>Afbye--0#RjTIli!kSSPEVtfxZ{an*l0J2H`02ETmduqUMolE8= zvYdhNo2*0tlI4LZOwW=Ke&ga?` z)5*{%Rg#RrIn9dD`S$SV&vcdBz9kjc>}$9%X!0OFu_Et9 zsN7ZkOEmPZ{X6>>_K&OnMa97AP8|QPeD!Z0Tl;9o7gHiNd;)RYWMcGRvAu6B2%t6C zc^7CLigU>RO0k0y)M*v?hUxB1MzcOw&CJ_6UydY!ibhx)IwS)!A_R5e%dBe#gUwAd zJF8Z2^04&fjGN1=b$mY^%Pi=vWJg*gh3(EsV~{P$lNvZ53VgXwKY~>Rrm~rs_#vKr z?y?OUl~39{Ww=6+Z79*;LWTTGb4 zv0kG@v^`L$-%mbzPd4B$oja_&em;qB=I8e9CO&+~@M?EVN;UPekt?r8@jpRb=@oE56bq z+&Z*8lc8r~;-bb{cN&PMKtg~V{~{rViKG}hdWH8GDeLO4q#1xP!Ot2%5XTV2+~-uzzP>k=m;f4|sJtbmDHP80bi@EWX# z#=LL%YUa>*co|U6&uc z-9TWpj^TP2n$5^?e^~2clCYRq3p-81!eDdCJ4AzELY_IwH4ZZcyV?kb@)i*LE5J$f zdk&SUnS4&yJ#~xOYML666Zyb$x(#8UJe1SJdgRpcOp%}0#bRU|+vNGct){xZniDN`S&&Z9V875BF;FMndcm)Jfw9!D?F zL4rHEI*ZFg{OCX?-QVo+zwYri8LCp4CNXHJl@7@vDD8(BqHGw@gS`^Z|8 zyZun2#;|xZGv;PJtm9@h1N{#WmSqB@Q*9RxbL&Kp@gC3vPUs4PVpRNJ#CG)}wKO=g za*W%QT&?D}w3-dNI3W))O#61XEEtT-4)nB3J2Y349bw&v_+9#ef_Q&eq_Zhg#&0vhD z30G`u$VeVMPDpmUh%?hacp#s%3Uf9h^iaAiMqTz1iI&uL4W@##5G5C!NU`TyHTUB*$9Z%^a6(80$Q2vi9<5P|G@Xm>l*+FrW=?mL zmvHlkV`ayPsiv+DN*t`srwRGBW{w}8`XQ6j? zZl)-hIr3QYMhp2wLqNM8gN@1>@%|aJ!~sy;K4d+T#No8{LmfKW0!u(9{RF#|uSozk z3g?~gD1?>c8U6eNxc1QTc6>IvgCS`0&wV`1$hy)dzkiyZ2nubKlC(U=2RKjg=NvK* ztWd||APo->52Ou%B#eofCddmw;N3oG? z58PufPDu7hHl~6&oVFM4sM0=lwyTmKM@PRbLanGr#){`-rniQ=dh3IXiV6#NX>)RN zwzXCO0mzHWZ?`ng?&fB#6w?TDAlIU#nK;thPq2)2t=+9&!GWD33LqZ`xLPmCSg!!9^Vn~$0aA0cD9490 zRgLiCcz^HmlG~)tTZcfzuQRe-ih&^;SzL{atK+(N>a!Fk1GiQMc5>9~Qn}q>8Qbev z#OnE{zdz16T@7jHFy@Y%Ykf7@rbSC8dZ=!~xRrz6{IWnxc;1emA(qkod#Mg}eE%yt z@m~zOPtw@-=<+}ia0x>IU$`C48%D$*V!ML8K``!-p9On`bXgeGcL<0 z#da9gLHa2Y`+*N{5n@A_p)-pgh>S)N8AUDE*=v7?JrgB)E(5XYZX&g&<#hNPeZ;iSLHcVi>(%I3s2zbpy#DiVL(EMJnX zUFYaG50&v`dT48P7N#>BnU6;Y&{N|FQfhR_~7oZ6ifzX3GUGP zHpgPt@6SIT;0`tZWY$`qx7(?)ZBi18g=yf={qD-a5??r>%5Bm17^MD~de)ELrZH@`}aI;%Pa81?vN1q>+K;~%a2{^aKo%;l96V)E0; znJUqz;Ezqy;8?6vp5%6>*lLlfx0kZ4((0%Ei3m&J{wNWVY68*`^9i@nxXf*?0E%eP zh~91XDn|a<>?6kEG9M-JxcH%NvKNiXv13NKOeZ;dvjO5UI)0^`zi!nRJzf(#*SmEp zIhU_}h+(OmH~pszb?ZwO(f}CSOR6oc;THRdq0+%JP_dnEh($1p)K9n!t z-WxcDVWx#!6M@7IAn?RHI0;9ocYn5O>2v81;~+X>B}XF12fO2A1(1 zo0_jTLbtxXZKchIVy;DbDQDd9NM(EjpsR5Pmr~@YJ8OQR`4Ii z_4gnZeLNuJn9J2DVecbG19ey6=3|Xx&wT*309PM_0E=&mwROHX^_%n>b54$2WL*>1 z35tO5IU!=P&_mmCg(FYrjlU<-(To8MhZ~q20{O}f3T%!+Z6|lG-6xcL_eSGK1J=tFhM>vanLn7;Z3(H~F7F9j7B6#0@yQ#|7iDb}biNkY{ zDMG+0OVc@#^Lx z{eU7Q)9!D-wo#gVA3Oo4H5YtV+rEb7?N`kB*IwvRUplJ^2bRaVcHbYthxMwM+T;JW zHZ|WJH6gJfctMONg?4*W-Dd}qx*8|moFB}`bDSvn-Hxa4I7DIoK4c=NznFH+!=Sz_ zZ90lZM8!+^SshNoQ%S0d&cm$u34>=M-Zju$Os_i!lSmnV6#s#`I*26&m2vd74_FJI zLCXZ+$Klh~t5LCfZSn^L61$e+KDG)_)*OwFg5eWm{W^q@*#OpKj^Tv#ZR@8H5eUMT zR%yG$=>hDUrrX1b%4IhuJG?m7o!3GCc>^p5zAt?EsRidg?Wx86Ju}x=CcstQTG}#yw#}nWE;lRFTv`Sji87^}LQVXK1E% z?b#_sl)!$EJ*v3*fQwUfV)3kp_8xhsFU7N^vRG%d&ABEvVojg1`9u_)lLtgSIrI; zyAKV&z;&omzp_c>Z)^%vsaw$7#|XVMwNprP8C8oPOmW##RLRlPRpTl$W`vz|=4_pA z-yjamyY(r_V z5c{;Fk+$GF#m^E${`g+h((=Qd?U01~54yH;e;8Pj0? zV?s~O;d$tvu&}DM>+Et-jA;(#b9pav3yK&=Q@gF!n0_O?mwNs%keZ%CWd6WhJH zvh>k-N_l7wYikV$gvno56aN^lc%Nbu_?bh3ID=zHcd5W!m(WvCRyIf$pkL4L8dTIu zv?$ojhRbg}FLz(x9o408{}i^7ct*8JJCr1r*4;4>fAQfv_Q~Ijv2{k*l_qzt-58(e zo{pFxn{oddmAMMV`SkH10qEID7~`_0P$R^#~;O z#ENrMxrAi*ZE!jI*_VWKL;L>l&{p`1jqz7bVV5~Gm#@ze-%yh2=RkDe<=;=jeoVh{b*0&e?2w_#K+fv*_I&qSw~Y5 zB&1CGO>TxkvtI8H_`p>wzmn55X>GP-DcZpb4TSm^vts}UmYRrAT36HRTS~Qq-M$&{ zAiM6l4?Z>L^Hwb(Pz$JMf2COCBDXcXvLsh}w9Pn?el>~)2+b(~Oi~S`v~L-e@a?_{ z-7SovUGoA;%a)XDUAOeG5hm|HM|T- zKLkEzH+9;E=EKPnwzkh! ze+4u;D*r{_mzht{Em}u98sQ40?tQ++z3yT3YLvq!PuUWI80&gLr@$E>^?8Sm5SfOv ztrF$3()#Yv{{44^3ysIaXFRl^gzy^E#&gxb-U~??1CezRa%q@&hQPW-27g93fMRQ& zngz@Q(_MCKlwb%5HUx3crBF|hvr+g5`0z2R>dhFnYl}&{4>OsmN~>Z1;Oci6XCOxi zH7l}vY&Bb#?iv`HNSuKq92MHBMIcLzPr)DLdAj)q!`jX+VrppnC*~QOy(}tYmgR34 zy>UX2<7>__aCdQGA?r65egA1}1#Xc>*E#1r+6^5zt0-v`8+U!6_ig(bV@qv=yE@4c+XfrryTu?spR1Fn&-JF@}P+LvZO# zR+~L)@y|`sDJUr15G^?1vZLVpXSX(M*ra`0)YE~Xt=%Rho*%|M))k1au-JA*6r6WP zfBdy>y1beDD==t~?%{Ui!QUTZ#ruNVn}g=z*wt?ZcfNC`(F`j-_p!*rNwFqWy`56v8)?8R;D8t%T44X>SmpY`zM$ zOKM|X=^9}Sx+%0buFu5!qYhDx-~pBqk4c}zf5;07cuAp2~Eb6Kz5(!%L?uDLPB3uqpaV)h?F5~?^8om zDws3GqZ;l>b!DaEQTiqp0jvC=2R@RU@U^W3hzk=D#uR&Uj}_E-jAj{jxpm8}$VoBaNk&S)+z1GiPHTn>c14E5G%E9rZdP zr(MfxGW_gVW3{arRoJGuRar~>C6asuF_%I-fqx9hx4lrf2++@J1oVM0Cm`vn%NOc< zfBgsPA<9LPoD<~V?hVF)>%741$Ku9jW^exr`+AP>O}D`*XG9XCJ*xdX5dJie2t!x0 z%I$@an6`p=Sdk-=xsy0KdZq(wS3&Qc&>lcIz?Sl=Q(%~c)dMVn0N(|*-1O?{J zHFPKg_po^3SX)md*6ETM_uDEwkkoAL)haf+M$bZx=4*#uE-tE{o7vW;zB1#Xu3xUc zY)=<^yyT1eb}*-rYCY4D;4xnYWbqQWdv?E&Bp6>({I!LEv8;d5%fMxK12JPoZg7s$ z|76ZsEx|V1k=Ay*UsUONv47dTivWT$%hRt0ceFwZAl}NKsC=*aItBQrUTyD?Fq_v; zNeqK3P9r3A>I)$-BqdyTwG5lZet*Y9Z1;_zdN@v+m{&p&pDU?z7RCe5_wHXLf6(5I zkk%9Ff_essr4k_Ptpwu;7Tt+h9>%6aRu6cT zq>X~nuWx)VyGqV-l$3$qM?i6Na&j9}y88TT^abX>#>;gOFkUXn;>HpFKX-Zsf2UL) zZ65Lx1FnTUHI6n-VCp2iGfOtR-YddoKtBFC1!6QpHuNHdt$1~5_%+^mw0q%s>62i^ z`{9(1lqTT{2Nh@#AqlaLk%u|sD%kXbj3V=l|87-*=c$y#Y0-T&PDMdrrkxeUWWWTg zGDB>QE7M?9*Yn7s*E&x-S7Sc9KO0jax5?JIXj&Tp@Nlm6V665i2~7;rCM*$c^cj5A zI*0?bVMk*h8}2#7E^!o3nnZXZgs&@^BSQWh29X{bjG{<3b{d)*GMUv*hSzVl zn!*()Sm2heams|Ae4v~VL4GgR9D+yGY$WA;apl&-5e**WI4XxbTCl2}Vm z%^730Ir4KazRyTGH&1|s+tt?v);mGSr7H0nDZ^+z2pR zosHsVNUsZy;y5CF>38l6_3UK?Zb)=Kg0ToC6}X+grhF62^!tWuq(ZhWHFM3-C{!FE zYO@G=C#{Pk+hPA8?U}}-O|#F9Rt&`=zI`^&(fyO@&lHD{lg&g;?}b+<8|+3zy9u|I_mK$S5n?-v4!i{%j1-j1`0Ir#_TR#IR%JAq9gkq~EAd|UgLi#VUUg&-wv z;tmGR-RDw68mpF=EL{W}fu9A>pL_M`fzl)nB0K}#d!yJu`#x7IRPFOPARY1#i?(Af zdb5PvuLTd*jdZ)SB3gF|Z=(=gm&Hbl`QR^|o(WSR3(wx1?Z}FY2ZrwfyUh9SM3>Q5 z$JZEV702FOogTp9VDfM>*BEex<2cH*lfcGx02XciODSE<>DS!=#}bU79TD|W)qZTr zZRA&<+P7WF@hDpU@eaIdfst{vaCcDT{4bm1iXt$1+}i}s!k(T(t0|`~%QIz=T!x~_ zd~)75ZktM#eWoW|X{$034kI!kglY6_I((bq72C?#>cn-r`^FLqQwi*PgodE6(=k@? zBX~_p-?@D;+T3@$1!Sy#alb|Sc)d&Q1;3{UTTx%mR)t?hUpoEwpsBV?t@3uwe<(cw zl>>d6_n~^q5QQ+KlIRMFD;&y%g6i+3HMJaR+v2ve5K;un*Zo|`ZPz4l(@VO zE%8fbGV3S_AA5ybOU#cFb2*t#GYuSVDvkSb6uPHXBd#?(94Z?$nx`9gKb-!M+Dn=3 zl%8gh>E<5=Us0EPwSt@;AFtZtV-)yBF4cnOJ9-|rI?0|?YfJRreb;4HyTvOkqL5Zn z#l}S(tR(%qqC^-&k-U#nXrsuxk7-bV@30cFU%&iklJdqt%*QJwER1BJe>9Xm!b+$w z-8TALfpM8=MZ0@@9_S0eVxJb3mc{|)KgrtGK(3;{N4+k@`;_en*aeWtAbdZcZ+^Bm zldQ*j@rjD_0m6?`-|qz@0Au+xJ5HI{!G?fO*W7wQUBo29mk|qI9h0t-73h7HvO0RkPMxO1 z>lVssT$b_O=m2=;SmgQ7fAm21v!f#2`|v}rzKme?V#e_Y4y8s=fN1V3ZuewKFO0*| zydKP9x$r;!qc-}Mxg^8@r9F)w5idqCn)N3DW02$i3E;f^L?WCtF2{UjFbrJjwhD_% zEA53}fpgd2*n-duv_nIF+*5F~iEg^etSHee*2Ow83hfn||9y~}THe~TvGyp1DS?D# z`)4EzVu)S#3}(Rmu&ft25S|jP5mfoNMvhJWJRf$r+su6!SsBTA8c2ZHmPvr-AKdfg zB<`TIH6*6%{VpBV=G#E3I8)xw~lo5m#Em}ev)MRT)>b{ zrmdW%Tf^oCO?}$kc+RnRO1xqjj+hi|$kfe2XctGVKuX<^EL3ay$jF#@Zgyj!pzHqY zy~P(tgdUM_>JBxj#-3ct6$3vRM&wfAuLBj=K{yQ;K{XeiB5ilg0uNhD?g5zg zXBVkl>vZjlg+|J`-b!00lNFo%dK-G!Tt=3+MU;)@Us|Svl`#r5l-3fNygk0;EMIgt z*+st?KM6jPk^%5b;M2og9Bp#_C$Gz-p)IXG5+mqQ{QI1Xp|#=gd;7E~iB=(T@#{k( z6I>wQF}_&#^l-?i(m^H#X5fvelEVy|EK=S0kH*f{)@CJ=Gs4LuVPptIK&GO{&Fw@0%(wViVCBF42 z>cnG?NAg@D+GX2V&s^soB*6c+v<%U3cSl}%PJZzmBy#vZ={52;|w=$V2-D^A# zkaO5|*0nMDi}}~??EL5~PsDAgww1s62>U1eW2T0IT1!+~!|1`v@3b2eIXl_zq-=}2 z69h_J^P?&G*WblDO`j2v@zTfgme@axXKI}K#NHHZ{%J&7m+}J7B7^9jZB$Vn;gg#U zx)2ihoJ+sA7A=jznZnV=di{esPG{V>#VKnTM>jk+oN7ujx$PYRa^nY2R=-a?9gzJTJ%S__1ff-g!aRuxJc9iOk_MF zIn=AJhInnr(m&3Nfqt>WLr=}HQug9NM9ZGx>xT|tTe&)&>DsB+*4AeHo66$C-Cgzj zp#w0&y*-(bAg#G43PeHr|EFTc6rcvhF2!;Mwed$n#ZbA_MG0P$+<|bC2Bl>tiAm&0 zV2g8C}_2pOK}JHFh98_pn2!ut8FV4z;BAD0RFlc=SN8`q?_LT_DGAjSMK zW2M?*U~qtYcTuPJ2!W*pNs4PWhT$oW`bmWkohF>jC(Sq04G{q@@CexUI||x^!EnfD z7Cwlu4sYV|^u_+Xe-{|u2>J)1pJkc?cMRm)4S&&{6MgT4cpv1}i?IaK8YxlL(Nvyk z;Y6`fSrU1orGD>d6y?uL_US~?-N)p*X(5}n%&F&ntp?ck&z?kdQE$y<#w{a!YKg4T z&ziqf)GGsm#eVCv^^P6TuO*t&(&B*2j|M~=6vWfEpLdOsdKGc2MVx0xHR+ZK2??t8 z60c6C#@Zc;49EB{KiRXuOA7$8ASKq?eRe;B#I&LMNUp3xwthHAIxc-P9PRm6JquX0 zH#2H_oOz!wj817}6OG$xjV2=bu6@!Ch|S(t{~0ABKJ*deJAtp*|46jhpTK*e)9g~g ztFmV1LvJ4w9UU*jWC5dYlG6R|86TM4c4jGLQ-Ju ze>PEOA|n~LxHE1-;3pM5199y(`H>L3fzD9r7|3JsW5~cOR zs_ebfA~wpi=`f&JZ*iSibJ6*ZrT=I*mD}tvgng71{S@$54hQtA%FkMxNIkKGaWoEb z#SZ>t9~UVH2%KE&26YZCKMbJFicHjOpc2lB^-#(XT)ABwr(6_i?GE6!+QcWj&T80A z$a_%-khldV1LA<49haMh-TscL+u6N#yxWp4Wa)S9`+c@S(Z)lWq z%MUwiIKSiMzH>czs_wy#^ssP@;Xhk+D)<^m#_Q8?h3tP?$;Y5i)R3fE+B6}N1d|h5 zvyDWZi)S2oNqq=Y58M0B<|{B}cX&Xn+P%Pu#syG6s1MX51a^57iHycGP>dLUJWON_ zq|8R+kH?QlYt!)^9tjeMZ^Hv*JirnK89j5#1x6%>$O13r@8mjMKrxCB6KRbO$80CB zprv!um--+pYK zp#9mFCU9bRrotEzDA8;fn*+3!ZG(8!y%5bcyO<7QMyIxC)UF@rIG8O-f&l(5@n)iW z?edF0+#iR_=L|eEgbokq!)_&mY0;pDqxg<{VSLv>NZdCcjs~Z>YcMe;S_*k6IdMYL ze6_#nEOxrmNyZNWG}j1g9_4Q1Z>zQ;D-}io&CfSWT+z*)i!RjEjkf2*5U4%JM#>$) zWoOHe2W|BLtdbsZf=F7q?hZn?3F<*Zpr8Fk)tI0K#N;pbHM}wuv50f2r-K#@XRNEGb~Mb-m2Lf-Q^$&&aVD7^L=^TKdLWApq^zu`g~ND!cXn zlhr7#`#CY>WJRNKhmHq6WB?v-sW`4h+&L#JvV+`(HB<6BL9OUl>QaXKbAq#dP|lw+ zA9eU~kP&`gPEMxaKc>$HK`Pm3(M8_QH6rSCNQs;Sf6#D-tjn6Eg`1wY08h`llcCtM z@VT84WrY)nUyaw9A8CJt1PR^kigTK;#lDrS-H3V4MbEhaCBs{W$Tmacou7mw`Du&= zo;t@hqym)oesACcts))FRRBke(a(^}L^aBYNQSxR7ft)pmWM=(?L499AR?!E0|e_- zw*zw}P2Nwf4tE=DBEq9PqEX4`EB3~p>f9P`qBoS{F@1eLeI`_1cQlFVPNbk^Gg~Uw zXFF-QZLSns)zX_w{VH9^YxxCW-+n{Q#z)su%NVlabe>m45|97m>n=V zOgGHl1a5t#seUU6b27{g$$9l7pHbjD(YJX0d{)r-;g5QY1-p4Wdfv~D!C;;@v`%gq2%c zUxrAs$$>6hn96TUEl!&p0B0)&xBd$>dO{g3$46Q{RG8xFMzSEoHH^};;i7WdLwxg# zf$k)KOIcHs9$+`b(s;(h$|l6wgW-O)3U8Lvq&eiFU3XLAjCC{6(^qP{(l9^XkMf=N zlfK?b@KN}*Yzen+jbe`yJ_pG(ua6I=)E7qtI~h7LhGeB9;-1$B%Ic-tXzLEVJ;PRr z=h>3;K50foalbw=850B98!OW zaV;Nkz!D={D?Kv{EU`jT)Y!-!JBBh|pSf?KV!eYa5}F`^YUne_C|eKveLRq=D~>qT zyTf#3Nu~^P5&2q3za1e#$}Tbgcb`TTdI|=zxLJnON~6`T@}_r(T|0crf>~PCQF-^s zfYHo->o?!YxMTNLrFGn3MAS^Es`rjg3uP3vxH0foXE3Fe%v9JGM!{E>S>HNZ37>iK z>$H{GaBAGOOW)>tvm}p0t-eA0HHZ##(z(_`t7nt8+kP2}WN|@h1Zlp-7g^a*fPuzs z94+!o%wK2Aeb-yhsZ@_qUYkev!8vpp8?*7G?z5_tqu+@U8yaO8XH7rD5dGG9os)DZ zOjT^qK}>8aRRYAd&;(9Gv+LV~uZ3-KtH_v5Rl~PGok)1|?=N6)vevm89rl$RX^yFY zhYJB`64#6s>JFwycJ2k=z4sIRhk48=C>n%`Ko-dh5j|VoHRU_9gb0 z8=^vidwJZ`Hj6j#`FKu~X&Z+3c|9J_ouCrVeswbk*2w873)=hp`yHFX6v6r`!KbsX zoo|DWtL~>(rv5dG{e0apKTT4g8C!#5%+WS=aa()@Y&f3%e$Ks{pm&VYNhKPOadB}& zoO9X~fDLkux`5kNc&q1Q?HF;g|pD}0#9liWrqPT#lY_$*^`qqfFAg-CmvR#;U=MFj=y0#6*lpW2SMQTID&Z|~(#X=2xeQbcdasj{q0sW3J@w?srWi{Oo&i#PDmUi!N zVg|Hj@3!Sq1xuM`;ZXs@@??j3vi;A68rpYpm4ujru@!sj#*#vibBe1{s%VpZqtq*7B zJCKn&`=$>izC-z4DP>9-~5X!=ot(6qnUSn+HdxH$~)C|g&$ zFd!-|H+c1t)1Wcp>&$~gvA54d!?lm1al}f6NO(S6Rq)v``nQ^0{~fcRAL{&F3yi+4 zt5h3}Y0z9^geAL#LAT7t`Wc$1Mm@UUUS59kfvRa;xmQ3uC| z$fd)Bbm7&*ZM&w$*LgcLV766tBZVVw+h4?*WbjC#zYkV4e%a0VVn`zADhSIy3x8`l ztgC}Vk%!X;u3u64I?tVy;T`g%XA zu{&68uAHOM!R%G&o(g=^1I!-=0r%@@8J7DP2ep7+)uPXY`FC5&@`Fb)Zze&5G{Y3X+>a1?Xt?6mIAa6^p?J0-o#-Al#i{UFdl~ zKK6J*8a3ii_+KkgNH)&R6`Hp^UwlZ*DQw8d{s98;NN`z6;oo4eV${iaT@b5HFD`0Y zz#7ACWeJV>L^nJCKna4??yvbWLV(X>qtAEN$CEw>B)8++KvZsHD~_mk(VS&>SOD{& ze$4ss4;k|PQG7=ILf=18;6?EwSx^W)fT<(jd45bC7AC|ktysp#L~_)4Ahw=$0UQ+~ zfJ24WBN&m03=jZh3SjzO&(WjHw6P-IgJLdP;Ol3k9A%-`NZs^L1N_6Lo7e79<2(rm zFoS%WwY2%c7WTQlW|qZsWmPcQIPzQhS-+!!Ah3dumn%p=U;BNuwN=0-SvA>(m^NEH zNrm>=``X26j#XYa9Q?4M;;!PTZT0JwEFNuOehox7y9}z+Ejj)jLw-+JRMUW`F6>d9 z#7Q}!ot@F1WniW1NHkBI1xM^$5*#H>7I%->3(we{zfs8{JiYj5pL*3^dp=b7W;%VBMU zlnrfvPsIA|W!B#xc*xm*zHD_h+7?iW%k7pvkm`yu4;ct}{YMuG0U6-Da$Ed-HS-@% zG^On)Wx%M{^h)x8#7m!krvGW91IHhc9V_qX!$qA z%S~0U3?%{8`=Xfd$s7=`S6#LON`w&4Utc+**-xU?5l!|B1 z-t!k;MT1L!hA#xPcCo|s+0mgVO`a#b_+C!;T{Qkk_L@ShtSSk6+To6qlymwmte?bX z9c%hUfWdxeID&=N#qGs)mrf;)dMJ)5PTEoI=@Kvs@jnGzr59Aybm^$5U`-~Pg16Wl zi*Jv-Kyyu5Vne6Inl?+`XPOagV^T)L|Xs(=q!U|5` z9_OHbC%{pZY0LwBjCaE#Qf6#5>ar9!=5U(ElB*PJ=ho2+tBROG0U8ij;2U#>Z>N4q z2)4U$$~LoL6(mkJ?RYp#rw*BJ7a`+^wtZ?#8FicI+cgal72e;W?}dvH`_LVClTkx=7Hce+NPZ{%z31>^uzW{eU?ZF5 z@FQdj_hRL~oD68)NSXhK^hA5Ohrp#)ogj>T6qo)=UOAo!cdmZtVRBgwZcaOW$bL*f zO*{n8;Dg}->QXu`0Xhu@ETZOfBGs;e2fb}1q`DB6(F$6)70AHIo5ye#$Xwrn5wL6~ zPwC^N!SX=~1)9QY@Eu&iGv#&Lv=*Z_U4zeheW>@^cx`|Js;FPah`KkEU@^puQ~DyT1mkClCHF!+N^9%t_bQ)QjPS<*q)q6CuG6{JxYA^+Rl@fL=WZK;4fY zp?rtnz!X4a7&~&iTHW+)BdKm9{gSeWQT%*K)jWEj|V#V3h1CW-0nH4V_MlH)v2mB2gT_sAd z&2Ff@i`h17(R9k&U15Ss;}!`H$BSaj4SVDoMd7oVJ;B!>j+2b<&Nfh;04yn-|HVa5 zYFxsLMXyym)%%>FNAFLSoRnH!^U*0?Iqk5ekTLx4`grqa9frzM25_;3MBU zDMW$H69qLJ6@3EZ*^H3$s|qM&5)u8qWcXeZzg?;TEJpPUv;yEo+l-TWf~hbuFqZK= z$nia-MjfF7Ts%Ccy0Gu*lJtgsWuNqst)e$)tk>nqKayr~?OV5gTdo(*LigSv(^|C z7QNOxa7r(LnBC-fu?F?2SfwD6P=;`GJgW5$?rs3`V1$c4a|8D{Jl3&HM5&b*Qz2vb zQa~zdNHtk^*6PH$R4_$+riI=78`yoa zh-fLW8N$Boh(v^n9^B@%qMv4;u72VTs@@)obDq*qb6PjRF#2S%Q)h2C3pMn*0qe&~ zE+_BODg4O~RzJo$d?m7j|AouI9LIehCho_=r00E|6GHmh_s{LV0B@O80a8 z`jot2tp7V)A_wQER3e9g*BQ*6S<+h+PYd;Qu+X6SOr@C3$-4=pyE)Tpk{3&7c+`N~ zh?SNL&VLo44dvgeqcE>)F8jYr^4}HuM2K|;2z?FWUd1dvNli<{TTqUO|Hwz*NID@I`$@OPjr7?UB>++9FB5C?Q*qs-MZFfff;(^u{0|6DkXoqvo z3fZygA)+QcA|ymhkjrTVHqwlE=zxS~Nbl@D$q;;{~bu)P<0{jY7D(g%)yx&5Os z*g20M(dzpu^XPqQMJdRClL`s1dY^jIr7NhvddW+{!?)}lWNpq_&!;OKiF+D9CZTFG z7KZDeu7>rWBlWO3tvfnG?+Z}^kK!`Gq0 zWKc4}UJ&CV#3&!p2~H{d<+Sb9X@c38#aACp>#Oan))z*4>H(sJL+&PlQl;+?C$JL- zzExMx{e*u@>o&N9E2#Vh^sy zxAQ=qLBYxtZp>8!Mo(feWv0t`m?_ax1G3dwPzbpkZU8I7qjr^anWwc1qo!5DEBG>& zSC!Cqb>P8nIhKXbGHLDX-7WFmmN^eDU|-G5A#Dw++|BstkdT1Qr-{Y zw94}2b!>|^Og1_y#p)Bn+0_-b0N=Z*zc@sUU0?MnJLN+ksdw`#`!6`If{)%%l|i4z zt&KzS=DK;Rg=;^Z*@uW1JRf%=!4&&Z$cZ4A(~)|CLdJLZ=kc%)^V`=y5D=#SkUsV_ zn|&XRmUUjVl-ubewM?h6p9(XpZssx@(n>epdN~%q7t-~G&4N0$thLn zv8;tYJhXqZ`Jqvuo)LNFc~L-HX1#Bd^@46DtD^Hc^-f?w+Og&M3adXWO;~qsY%G=^ z-9q{(^>J?ZB=IiM?7zy>EbJ-aU%ini2+4))Msk_+wCextmj5ozzr9qV3Wg+5Akq0& zub#_CHGTd~u_&Ar{4v(gP&F4evFcIFrWQxc;?SYT=c5;ihpF@rH53SfL$=k- zj{LQ{XXA$to?mI1Xi*xWEC*Q_0rR}ot`*CgkV|Hr z7=h(Lgj3tBo?7-GzCOQMh*GnlT(xAg*#C^!)z~+7=(W=ybDA!pYy9dQF?s}D^iml9 zQ?1i6lFej6%=qSv2k`5o)2=V~dzxo&7X$G_g+@zI@4_TY+miu0Z!Ii(L6#6=IC(oM zV$iw4Uf5-t_lriEuRrr5wW0w2SVz_#z#7`ge1O4H>Z^gplHAfumprJK3sfI`iA4U| zE#cEuUKTAiY+=!Xa2-hx`#+^QDA6#A-gd%v5;2YqjLezRn)W>)px0szHZ{HTMv z6NS9Nh_MUH^ME`isTigGwq{P(-Yvy-Cjq7%$tLLMorK=(NC%`fDv6|e)8b?ql;)bP zm?e&yUaVG@>ZcAtTQYhYO#$UcII`oo*II8;>Pq0i-vI7YHnm@;^EWbU8c`nt0&_xY z94FG)Kr}Xt7TfyIxeyaS(GgV0VL63%7+}_W@nFL2GyR@I94!M%MCU`X(Ij|UlF zAE5=g`WiMc_g4g$O|!Gqj>r#EK2c`CJD~*j>;Qk#sn5LaW-r9%ng{+>gTko)_(52{ z7q&Z+o%=ZVoxLXMqLti(&H6jH$~Dn__)h9=IzWde50q9hbfxh+JL+YeG8uYCCL&#L=v4U(5e6f&O<*@knC>%}%kg;&s;KeBQxlo|un3>}S|e zgv1}LNo3kdZ`u$)!#Z=y_H01V9-pF=^NGYC}OSYWR})Z zUU513#@OWgL|dNdtdItwotG@uUSLvR9oGGWfKFDfF1Jl#vSyy|vd5I{w*-*2Pib%* zqay7wv43-9j>K(1_RIY>_TJJ#W^gWLUjJ85l{+m0I;Y>_giOkf)%-TEo_59J#3Q+v zJkD40aBzIR!iEpX&Ot+fmwyF`?K!IEP?QWQA4!~!lIm@CHr7I7e6|8Xn6i*~tcAlw z)5*!sdL`fK5qZtmnb?cBZ&3-@?<|X8D!dEQ*s*v%$t~o*H>-lQ*GfhhPPRxm<3B!H zSITA$oGce;?k}H1E~TA3qg%A{Uj*=i1lB4%lPuR_vs+;=GWdOxMI6rwgjH&I#3^n} zOco2~E@wk6u*Qd|91o^#eW=k%CJezB$n&rSl;HPDX!VHMi0E(j+&bWK>pinZK|+He z6iU=wkYN%?_&;G8G>EluZj`nuVBNS`ivZdR$|HR=9(1!fvkJ;ZUqdi)g}l&CpS+!i z2@xd7X|P(c)-l}vc%H{coG`d-hkMh<6Y$_O;wFu7XmZ&91Cp5)rc~Y8)E5}e@a8Z) zss%-Mqxnegz@0q-Ac!$QELF+tsKM4*@DN2iGI&RN-m{aAst0D8xG%06wR1}1F#p5B zf7*Phy`|H;(1;&wbud0+snknyld^UIZx7oILmS7Uw<=~~!Wd**Tb4W($8``N?B@s* zo@jM&hWp5cYt%Kc(~H*rnf#5BA7|xj+si`{kk5+vP4Eb2*bsBiA#~N$|Tb#VuV$?RY%EVzo=N4?bD=gZDVLJt#?sRajP{=_JwG+4rU zW4Wgey`vAQ#$5HemSq}*@6E#?;*Lz{zU?CtY3}f-KVW7GQz!-81qe*O0@TsTsy;tO zN5su+r(<5b{cYLr1tk0D;jJIVR|CEeGBQ_NIs0k5Etn`)HRq*yuDW}R{_SBza_*5a zhgY>|q<_opzJ(`QH!Yw`fcT3$(hB``2&&_kQ-TLG+A838>cB%#jyvCT!MOgA`~sBA}8_NK9m; zlOhQE{LjQikG=Gx{ZM~}Klaw-{>64bv#Ru#ZG_RLrPt=i&J$Wm98kqwzZAihbZWI{ z0OL|PIkt@XmFR)5q2nhW>7&JL;z^@!qJ;X&^ec}o1G;=g25@8GYJ>5B9-DYZ^=TU^ z6AeAjLfKjrK~6<6qfXc&FZz5_zk-!OCtck`4l3^7$pOO>ju*c2*?&C}3x^`ovNE;pX7TXi9#?M=5MVN@ z>3POXm+CBuMa!zTW1HcyqLMlJby|pK!s}CR7;O*6m?R52H9Mb5aQ-S5>;za-mJ49{ z^8@`{$?4(#Rodc$u*$UzeM|a?K+eWJ)D|3&qx}vO-UP)v?M=lp+s50&IY#?%5=Aj4 zUIw7_-hgi;5$Iqp!H2ftftu!Fp+bPlK!+Ga`ZgYoHvGeU2SUICB~9lMKyds1z^c%H z{aH@<>r=U_FQTtx5QfEjQ?R^DH0=g?1`K4f&qw44-zAV`SSqE2Mmu+9ws*PO_F2(- zoc!2~LlgWty%&c`vtF`xn*C=NRFO<>XR}=mmJLF~1c2@{;Qu0^Rw`s|ui6bUp`voV zR+vC3Ypocsi#uk2mdy(R1X?1H%=9AQEk6pbgf<<)18muaCqtcDy=bW~!L3B?14ke1 z!SOKkRiq&lYmKU98Z$wF`Bi^K50RND$7c_U4OqwOj3VOd55GlIsBWqhGDN<42Wf|( z?fuX#x+d{Uxj@Q_Wz?87N249#^cSKm4K?Bv;Z(Fcf9nkyPwskxYDM0g?{3|7HH!vv zlGPe?{rrZylLVkfgzlYhY3f}qiRY;^qDtOAZd@;=PJVmF3y`}DN{x+W(L1^EeN-*= z5G!8X1)5o5`}(e4gkcYbkZ8Njsf63$bU_r_9#O)F*;Y@)hONn7;knq6q(LXoH2(Mk zn~YAjyPmj}!wA=V4_l+ho5=X;YyHN50)|qFro|oyP!oW8Z-sjwEpBdi!)9!md;53u zDovWUj@y+7rHu#lHSbD7)9Cx<6;i;^qoiBwJ@3D;79P95dt5}7+N$#Z8-4kI>hFd4 z@5s4UG3FJ6s0|MNQ7{faVdi|Y%$)OXJDl(!Hu6``1rIF_`pvuuWV^nTfQd_xNz-yZ zfFqvqyP>&Xg~>ESSG{aT73r>JxIA@9r0JrHzhE$^0+jL8*YFaT=`r88IoIEH$`U2v zb{+YPz+aiz?mZqb93DdZHy-vbW{T$ef!GZSH8ascZKTGLnf!9mPs_OqjOAPX5fmnW zvZa8JEVil9DdNYEA8&sAwBW^!(5hk>M*M-B2Fr@0&jizRGJZ|-6>E_)6g4Rs18f*a zhc$bg`bebWB#|d)ClWKLv{_AnR7`tUuS058N~W!(lxaB=SmWb1W%@}z7_T45}ftvs=PRD!@>!%S7ez8>8r%7z_vkS9b1Rq zyaOPaL8u2%gHcnBz}UttP5li9{wIa5-6)CC&=EfC-3a$u{~EzHch*)Z=C$AXkwF5R z2~oWy$7eF(%%72GakMF*=qp%SV`O`F+}0`*Sk&_rwi~Hx#Q^V}qK?qoZwGTFS~P9X zH%TC_*-(NSrj->WJ(H5PWuQAKn6Y;r1~0JT#CK1ake#!Mm=9dO1lmX+t?a^>>#)$JP|vN%06J z<1C1muC%hlq7lF4rdo8CV)q&m>I^{QlbmMd1cJO`+0*X*1ZHJ~W(R739Jn@%t3B;V zc6nj$-q&*^V}F>u&qA^nxcw}cSA7&(m}tyT=7&L^!&ey~!*eQKA+2QHqF8&2MQe~XTGO~0+VSXIBNGn_7n5Iayh=Fj~mhmxgvLGF8b@*J@A{_p1V)lj*a^wQ*^^U)Pt+DmtJfPuaY-*r`%CKasd!*hHkUG!+-pMbNw<7 zjkC~K4yERv5#S|Jw~m4_21N-SC{~-j-=A!VQYl9Fodb0s^V+xLg%gd(i&QLBBO@d9 z$7Qlw7A{IJC(@Ro5Yw-DaZ3Y2C+h2i)I$)5KKFJH5sx?=_c}jWt8r*`qsxS0LRtBu zlfTL4AG>}!8nq~LvEYHFu7JM>NU9|<0Fy|HX}TsI7J^)0jXTn>=erZ-S(K`m{`l_a zZ~X9myV7`vQc6}zNu4LJ)7l@^3{ESPsLy1n!0p>)e$$f5?lCHX<#@WU-#JJH{OF=0 z@Yp4Ymv4$r!8?Jle7l?G-6+uqD}YghJ3mhTlWi0#^I#_%4tlaYb;aVJ_~iSpU{_!6 z;tJzt!5%6Jk|eLiXqd9JX99P}(yI;J}(q=efH=;;N75c|6z475y-{`*$ZF+RY3EH zgUdjLqTW3uWy-xjkJnl3|KPsY9Dr~Wmyaywb9?6-5h3zM#HZc=o_aF{D)G4y#eMt*n@ zP1`@en7uYkzn|#c@fyj_@3f=YH6h^1^96BD!4O+{RqrN~5&&sqa{sutl-lju8$FSh zwNDg6!%|yTT83wx^L^Bl)_@l19sC!hU~^tvW3TdBV{R!9s)}tvx(pZPfEX;zP|>kmw0$8uu_^;f z;EbRRQ2l(@@hxl`rJioTw8!g#@1l8}gHd0Nc0C5p#O<%}_<&WGEaH;DWL2K{Q1iT) z!Kcsqs5aj&b4O6sWv1WQPvwn;zj|H^1%(C2$}a^j7u$6fu@_2y^Y%8G6)MtvrxP-|xCZ-p>}%^W=peqrcwAMuh#_ z3jlGSoPwad)LJVu7@Nxz7Eii(}C;CnH7AR*r10|id7XKak+jJ>H$68(ImLdVW zhl&%Hwx>z(mgvtCou*p9Py1mk^JL(cOvvUZwMAKZ3!@fMYpO)Ep;)FIyX`@xJh>E5 zpot}3r!n!}FVs7!gdf&>a%^PNnPqYbY(2v1#ci53UxoE9gI_2|A)olJ`n*RWl6SJ+ zJ2f>K6vw&mTR~+Q`0=fxUMWJ}ty3uI1cadF;uRlt4*hRXeW+3?T5o264(uJ;xQyX( zZbp&hqaRwx$6Wv*9$V;`1Iz_@NzY?E^;7rV(uB3zbvz7+-V%OEsu&hCEH>|j%wbsM z2t5iX9b?<03%hjwcpFKfCLA<`-@uHw8>O~xrjoArnzzm4f5PNSHXD7`phji^pc9s} z2T7{4&jRd4(JLl(s)t;5J=kgp*d&1{*=O+on(X=w|88eJFz%QCx$C&V-zb97x=*t7 zTbtC#1mFYFlPVpvU_S0K}2K`8}W_ zKGb0*y8TH`E&@&HL4jH>IeUBs{{o-~^O>QnTNH`gr*N1@S}q`C6`|*LIsyc?DarJ; z!+Og7#fGYAT8+*g6L^qOt0fWA58HAseOAODV5x()W70YHvXxHhQs?79?{f|p13Y<63cI&I?J{Gy76 zDar>;y-yAbN57Ak9GH-L3Mp-E8#qLYJME9gz>@Q_i_M>fn~qVdMG-+AK%+$LVS_(g z2hyefklSLIV}*yoB}*iutz3vs-D_WMr(r8!SCTi1zn>kP^F%S2<26E=#_0-O4vUt7^1bjO4 z8g3@rkG^#F!X(A+^_MWp6t!nY3MjT{kM*fC{ZoFG`S@}3}OLJk~B$SXIVdyJD0 zNP}6ZGMT&0R^H-i8S6H8UAV|$vp(T}KJPC?P7$bGwsk|}6FxN&Gnx3~&t|cMyijj$ z^z-?4y4@(DC|O-r8jEJpMRB3tBx^VUB14hG5`COOs&zEb>-&{Ku{m31mb5jRUZc0f z^rkogNP;5wv<_QdyX6}D@9ixZVdVxfa3qbT?Lnajd;l1tP0sDH;f}cW{Az=3LM&3K z5TR;ep)$*UGQ7C`Wx{6Pi~JlR-#aJv`7VaVTCkvS4lylFvdVPKVQ#l}I{G=W)N;Y@ zq@`T+Tq&|JP3Lo|_CHbA_M~ZTmE_x_FRl-bVcsr zoI@*V*slV#+}%n}%w;v67y06EV`0JFB(zo<&Kl1fn5X;8GB?g?s8=;rc@_rVOQ-?v z)UCJ-n%aYhrptEQzh}m@lCIyPvl|cm=}BmH4wx$wa*W0-REe=O37g5Y$BG#FYTY1+ zZEz4hP^pt9cP@{4Q>0QtkIkU!^fHnPQO4Oby$`XpCuLTy4RKcr^y-gnc)+(iS%yRw z3#AW#I&g8PDbQ;Z9>3NHRWw{7Dr_^TsGtlrJgyF^GHjPo1)FSi!sC~++ptE#>zdMh z<{Nl!;w=;O)$i!M?{w#S+>ZM9{e$H1Tzocilz05 zqGwg5ty+aHLG4d8>D1}vmAvtHqJ=Qv*0Gswv+6<@uDnroXNj~8N;pvaiKInkwZzP% ztLQ>z(iGBKmfNYOiDp?~B^<5(siDxwc%fIN;i2HIoyZ8n#W*Zl{hR8(JtCRdr<}8O zn1x(2yb1C(_ft>AL;;~Y2&AOfJd!U?d_*NZI6o+gM&1s6p*GqIsVBFeI&`|;Y{KyQ zds9eqvA!mW|KxJ?U3$Qa<6Dqr6A9l`E353pxzNj1m9cVtvB>>JWE)htK(#0rOO-5g zue99=S|tUlNa7yAfwxnxPR`V`gBfF*?VB>43M|@Y*i#Qh8o&)H!VYiR&rLEpZoCd) zm9|{`-jx7TIU#CaZ~Hm~st8K@+|W`8+yt;#beF*q?Y^K;sG9}H0gDYo;`d3_>+ zb-o*)4-Be>Tcc$yH=5kJz*XTSxpk-L-am~f7jjILi~C$|t-xq?l?JFo{S3ik8Z=OP zdo4p`u5y_s^WfMJn!He=wu!jqr*zB1nh)(`AtuB}r+4R@+rR?s_%h-2Cy1tzZSEf% z#!Xh!wJBEb86L3bDT`$Dy8T}r&?&Y zUF|9zs}Hm_b`=>Kx3pSd-v_vzHNiA-xZ{ndUP3-4-JpRV*4 zG&#TK6$OTUXs{dlJV*D#OU#C;VX#_g>g7rK<#B6M(MBqUo<_QF>08_+!95RGiB6TA z+l`CDM%i|S?0za>{V0*hY5h+h4Q0IrZHpY>DG?(GVZ=r zxzf1mrbSbrcFM2T=JkxwaJ@sQ-SBk=b9+aNmp72f34QLWsY{|Ck+hX?X=@Y_)eSg* zS0}K?iO+hNUCuCqL&tMC@?jvTr`V6 zdC)kEzuFYHAC^IKx+EbKY(}o%?QS(@Fq9i1EZfzbV&$vPBxNYb-5ER(g(hSWDZl_j(rJf83)$ zg+oEe#cp(HkdfoxH}=#U%z14M_0UDV4bthWG?CdZElu%begtp`0(E> z#xk&=jF-^Y=;p^!n>+~@OH0UXK-{%12>iw;{R)+tP}wz$}j$ft2=FKK>73Q zl=Vt7xV_kfkVC|>El8V{|Yc|rLWSoEHNHiEc#=KeQdHo#|A zs+TnI?*t8yyGzX*g)GpRu4#~#y-!!2^+CHEt7%W3H)IU5pOt*v8i?8k zqO6=Zd04XaNeMLA(h{axUO4hXJa2;U@dK4Q% zpQM`wBxTV|XdmL-dwU!79!Fpiy9QSuiLi7Yr;QPtW%9HNokUE`*2b&xY_Vn8?5B13ER=plm8=DS6KG$RW zQ*1|SS>mz)8ITG8ds+potM}1NR?~2j5O*yd4_aB+U{j+*ER{DC>SBq_?aJ){zUi!O zo^~EnqE623GsAWxmsPRM&-;3z-hn9jtAm0O|3l5g_ZLa$d@$OJs?jsvAaF6JJt znC6B0Hs4whO{tk?=2UJGPs3nhx;Ag%k&zF39c``anzl;6-Aa`#)^t)&F`?mK{CYYK z9-FetVzW%T47s7aX*_sYhkbysHt~s%Xwvq3|H-MQoBg<%vX+mo)(dfms~mjci!c@ci?6p1it+xLq)WON z6zPzV?nb($K}0}6Qo37u>5!7{?w0OaYN`8j&iCB8_cyJhh^XAeV$KZ5^-CW z&y)-S?xm}<{xJ3f6b^8&bMsavPUr7=UsYiGv>*Ll zs*wie04mtG^M2<|hoYC|Dr&^xf-Wg6q`7=);n42txS=QgGv{?Gk<0WSlgP_yFulU4 zUtnA;Cf`ajl{A>*UD6daA>B{Bh4jKzZK7(VIai5$%~@j$_i9|nu0?d`H>A8^;6jSL zbfFr<=HOSGPq}@D)W!Ms26|H$oa1alY4bcfL$+Kr^7!H$>C#IW<{y@uvi7W{{U0?d zU>*b}SVN~T;(sSeek0!IKH_%8AG>*g$DK@*sZPRDEJ;R5DnU0jv5D5HzhwUf z5xOl`o-XoQKZ|{fx}O@y+g|~}2FQxfcUXKC(Wt4APQlS8V$P*u*Vf*fGwY!XMB-1F z1^u8g2DrW0*x1E{$bvNOCGm1^oY0Alg$YAV>dqLk))?0^0q1Rj(Y%T;HZ`xNS@Lld ztB|0(2@SNFEfPLz$MHiXRBmC2P(eav;~PZ6CX`u(Y=n+2bq4#PdDrnWy`~O!*%FnjKrHa*gzjVm z(9LYMiHo%ey6#EmRQKxphk0koq8%fhLkZ0noZyufv`C^IQIuv2a%HSrDDux7y6C%+ za1bvLhl~?UO&Hc`Ufi~QYm1Y>=lMKeMhr)2NBj}9Fi$LH07pMt)eD33wDw2$)l?NZ;6j9JOtoS+kx&Bpf@F9JAW)e@^^qeMSx*?uOWT(JM_AJxx%*}*3N4?_+BwZ@8ynE zHr9Popr~b-0qwArg?&=WEzWuE!M!s0nJDYb_Rw@DK0{JstraR-RL=~<08LBhq=D6cQ_Egfu{x_x?gC#4={;}BN^XYo_xuIE z+88g11$9=g@KKW5p;=%U5s;;nU;RPDXYN)^Si1qdlaw97%|DH9S?iYRFxV`KriPns zE{djN-W{g_iq-F*`Z{AVl8`uqG)F`cejD_FM{vy`x;vCdw9KfX9Dd+F}JNgWi{ z?{AT=Kp0XR&p?x&+yU|#rv>Sg$6Bvn$}n^G%XfGK-+vHo1X#@Km`liUGUEkrHCJO1=g_&L=v zR7p7M7eMY9GslX~w?e)3=WQLdyO#ZDpTvE(_VuD2)@y>zQb0@B+pIkc&Sc}r3%-5p z^xpY+OkQlSs$~oIkcN?DcCY@>_`CJUoQXr++hn>k>uBnrFC$sIm}no_Ch6H1Ck}@{ zM6mxy4AHI*oUf=lj*yK1apMMvFattek^Oa@X2jdPHcVvJ7+gMvH+6bUKYLm0m|yA8 zDtUi3>B{!CI%v~m7=Mnsvn4ZaSu7%|Maj6}-hYfW8UN=7zZ-GlYKS+RIw z=|Zh}#&8b&1&ba-1$q1CinVLLBXw6GU3USp&t#GSO-Bl%a9Euvr*M<67G`gEPFBFo zUl>`2Cevs-k}z~eT zotK9TazM2dvZZUOBrb}v5&EqRrlvF|dS|a(c3y^D(>dgJg;pS|U zd^UtNQ*n#t6U&%bsJy(0ZjD;UeG6XN<1zijkU_HF9`7fdk$J*PE1bY~KY?$w%;yP9ltKsZq z;YKWO753+E_(GfW(vcvuCYb%#GJnaM2T``eNrl`>!czMMuE;M5D1SO<>fOC*t_Z}) z{CJtp&<+!4hmR7b48D9mJTEhe_c7kPknB?p2mIM*h_^{*myKsF7_x7Fks&`|U4RDGtZ>T;{68NGuaR6v4 z@%<7*b8AVIAc<|cQHSK()>6N4{!K!L?^Df$OoPclU(HO-9LCx1+&Wc~=>5o2Hv!9s z5Bi=LJnpt*y*$Ep2VfFqx1zgWtK5)R{M(Dpc}6*5ZebxwID=o)W-rX18Fc^A3$N*H z?2ZI8oA}*MHS@CBIt8hfs!S#84O@hqm_UD(=+vc4y0fj%r__X?C_%1QMSLE=FJkGU z-gB9ByCm_l;;yAg&A2yRiCV{$hRSeWUcKS;na|)Z8_a{3j9+!6|=fbjl-!>v$g ziXN6Xsn;HdPZJC6zF)ZexN@H1<@NzP>3r6y z$BKJC_`=6&Mg*v)AljRVgrMmr30q~`Xe9D*{j#2E+AIN((Z;_knFx30AfB+b|p-MfiTuE^7H|B{BvJ7L3WPQ5uyBzqxd|XK}bC zPrJ}>VNO1>H|g&6d%TTHa=dBxJPe{20q{bfhNII{u}=dcnX21mYk31g_E-s#5YH98 zgi}X>+&usck)GTf&~O0U&psjEr%9r_RB@OaHF%sUc5kZ6M>-DE`;6!7jwus#=lMVB zwMDY;wQd4h@9l?XwJhV;5tLr;YH?~ur+iDZyr0PE)0!549SgYzdLEVAJKNuy&WL0x zrg7;>Xp7!zdfImiiEo_IMyQbcjx0?0VBD@m!WEBCRbgB@`i%OOU(vnJF)S|U+S#Sv zL(&AS^X1dHwIKEiKVQQID$Gmgo$Qv*bK$?HhAjgr9NNA2_s)3W$C1h_ex#F!1N*zc4ijJI?)tJ@du?E(H44xQE)BVk_tDEW zdv41e3R-AZ4crKO_8gL%O%?rAzDVtaV;}i{1I~BcqwO#PwkRfgwJC;G)+LJtht_b> zmtSwMEB_hdrfE+crJ!xI+5cQ4bDq{nDIQ2SbJPpuT6?)^X((x&))Ezcd>>66F9T2) z-_u@k^u_SvB^&jl<(&+&*()-wmiOs!&rn^xs(N8}y^ldv)qIb{*u`z}zURApx@GzC zWpeSwaHTlBPrOm0@aM8f#t3yGC>BwF(U7q+{MO#1CN24`G{2RQ%~sn|0pVOTr-+K; zQKDmGa-1D+4&bNDxrk|b3mfWuw^51!IeTX%$Z7@5lJl1_3Pq*~%KB2@>xwFb(iBmU5*d;MpbRi}3F{j{(~7JK)Z z^v!Fzcz%X4oB0-#4u6RQa8usC~7LvGDe6rp=2|A`6Go(mrqBqlRtROBe(YP0nMIuJWdalr!r*e~D{%g7v0lAlz*RTjs|y z2dpzB&QY>}_-<7VmId~|07ldCf#JF-GVUm->AJ(nnjEN^ZyNbH*&7bpoXyK9?ct_* zJnaE5Eta546Kax66Sm5fq}>1zLOSP#5>}_68Rk-k=JgAu`dSK~+=4%a6WL@<*D>8r z_ZZ~adk3bI?koK-M<5mjZ0sdt=P0e$PA83-PJ@72WJ2jGa?gU^cD^=YfX)6d{5QN` zu2Lo^rzan%b!z4|wS4sqEpx6$u3)z8NZ8GXSe%2nT~GKs@M-8T3|-!1!JS_^j>&aiajc(E$8f?hoJr#0OAiKAwY3e}DLdhw-ldBTESvj<=Af#L7VlkE?SI>u`aB6?S-+6!JsJ|8|! zm358l!S}-0H>arB=d9E)g*C9RKpO5t{T98=Ns_eVxwA8AuHE#{oN!5?As^4spzBAh>054cYWjOW!8oysl zZx_Ikvp-IJ@G_IUs)-=wldBUt4*9-CLNxid<`cclh<+p!CBz`z%RK6V=#z520K?V6 zl+>;I%i#uy?n1U!<1rWsRfIloK*G1W-CyLsz7yU2@tNzT-`0YrJd%TbL zS^3fp)MYnjc4B9%R`oeD;a%w{K|cxoG?~^3ZR4}}(isTTawAWX>(?QHh7-d`4C*5Z zB~wyDR~fV)@k4wLpi=no{$05Je7#K8*I#IubOOCE6K#5B)z zlVA0OtsJq$13#)4z}IijcoYNXZQ$=&4GfLhmBklW(U5JIs|5h$Q_m(k3dx7eH2;GR zW(~q`gx<>)1SHdIhkI(iCdss+zIEZWm}D@yRl%pEdXI=t1oS{%x6>*eGuJ96Mu@NM zq$2EbBUVY5cBqP{p9(|?N8F5F;IGU0l;D{uGH*VVyoqUp9aIZ^_;#3OIGJxRzc|9o z(7n(zr4dV>HRcZAWk)zSQ4n?1h&@z&F&3)_zAf=UD_k$oc!H<{LonZ^y@TL2VrfX^z~3c~_VyKZ%gB+TM>L{>Vt z$C)XiM=}%y-TA}Qn>+_-r4BI3mu?(O&I9cRHvJSc3^dHfvtEz)S?is9OMQ zp&|-;xbIijrs-1dnEmOBVsQ^Y|JlWcvz`5KBSg`?e*>V2Z+jMwM`tc?#95W|yo6o+ zfU%|-Vrk$49Q>(ASc?f%UtIykh-~q+tqqe8`vMD}{)!+!79nR7{ceEC3ghQ{WAx9q zwG&OE8Ge^qH@-0U>DRq09>?$84JyCS_I%e7x#3~e>HDa`hf&0d6oH)fE@)O@;S)?- zbp6oU^L}Ve*?-^RzI{Xr>=%`lBpJaL4`6p>)qdww8czy@XWOqZyRDBuEa_ZsHbO?{ zFZUm-$qFFmQ10tF3&4Y+?3TQH2aLX?Bo1KD)TF%pu)<~Hmm4+aGs?{Xu=K*WcV#mO z5WC+ws#4Z&(eJ$=(|r6Zl|0Jxbq&1yk*va}YJT_OnpA~$K8HNJ%S{ta<&Nw}lhB-p zC`Sbj(Pl&lr&gV>o~MGi9>RSc6E-BHiN&=!CzeI;G9eAcLmd(NWV)@ko8V0hLtK3T zq}n-qf9PjwKe0YGaKeLM>j@Pf%jNh)`Y4!~^u=_Yjox4Fi}2iEt!jSwd|=-y*XkP? zTyXWDwU0mKiNdD*epqn!U!3nx(+ybu0J+YKXm7&uHecpU9Ed89ij8c{Tj+OB3JsQm zYLU8XHKx2s`yPfi9TK9ZH*Y>RHuEW+ZS>N$LezB7wvp*3*dKNZ4L~R>2!i}>8u!^B zL!GAl=w)iS*nn^;ID=S`Scx0m-!+LKEIfRtXY;aCmSvFpyI4dMn!Jj)4oIk&qdVa* z&=&CH7CoGpckmI24hJN=K`r?VF7b%7#TAL-l@h_o;Y%;t^6#a?k&1@$&hdzO6C}qR zPm&NV+l;HF?xcU3wn;@u)M|nK(0hYzs{-j@n;;Go5g<#)QP~(UtgC==8pMrObfu^q zP-x?-t!e^qpNiA!d;^ijh_IsyTR<_BkAOEDjVjzN(g&4I!T{Hnwxw4}YG8<9TR0}& zGG2nNdG*UnvK`cVcpY%wqnRELVrEi)pymL?vlGFc%ZPN^X$V0e!?r}wcd;moRL5^& zmU1woF!@hoc3OnB7@3_EEvp4LgSwN>3Lv@|O?EMQXIzEq22r{i-3F(l)Kh?KCg>ta z1CeW?f;d)bI9}Hlnus2}`F46OmgGAa7FqJs8P4^i1LpW+7fcZ}%nx^nNU=SL9!+=y z%+jeIm&c<6C>DxPik?$;>R+yVnOgQ)J_C8Y8S%%_Zb3~N-cdkFrh{B~4g8i%@RP9S zw|wwKg{=x#qgc_Bw2sTp=}sct?UG1GSWf!+FV%uQ6QAuS9<_4)C4_vr1UA?6kEPl? zvsf;FEUJQ9uSY~V=x;lHotGTQBgloOYY@6{-zWM$-mW|@elDXg6S|{Jf>?%<`)eE8 z-gOihcYPNiO5qtzxbH$ayzcaM%0wy`pSOjaq&Rzb1_PQpmE%ja<1NNQR~*|^yoV8S zRp8KXd`!1%&q<$f&i0DbcaptO{gU4-{u+9+xA)OHcfFJkW&Nr^Y@?;_TPq$9cTPw# zbbo*a`?ekRxKF5KIbj5}A>IE<*uUkv_@W?;(gSfLKNzYq}Tg1*I zD_eQp--(j@ertJcsQTxuItO2*15sE_-IR>mp*QDSeLh!5+7`3+f+eYQ?4$+IF^Uv6? zc`touSMR<#4cOKpFWoq*0Nl>YVUw5Sc8woNc3Y)b%w8b~UbaX+&KQ8H4ZL~Bzdfwq z1rY{IM`s5i2`P!3UfV@CTMf!m0>Q8PF}Sb8P(@j0{1V(+OcK(?-xZ2gpU$k1ZHeOP zTs$5A+ZkaGr!oiD$yX`+WmaX7M1@;0GB1A|Uki-WYQ|UN+5Ei^>o<9L_CLqz9TG5Z zZm~Mg-~D$ZZ5bJ7@QA}zcMx%cW?fnRSEzLH#`9P_UZ3BBha`qV}>S|Tz`p+oj0m-XuOPT{pBySr|be1~i*{CIaxt`_2V{obIA z*yas8YP4|x6-LNLqmkXbKjx5McLXt)WHPTU(@$m%H@V!j8jf$B!X-(a$*hg^qFj9E z#wkdC6}A;L0vCgUdUBIS6D~l0>`COC>ml9AD)VkqLx63#mYUoGz>JQYwsa)-`Ndfp zgrb;1QQp%zfMFoo`$c1ciOC#WZqT}X6JS74I9NcgwcX^%KyE-ARMp<86mbc4+ERuW_9DSl)U^jf4VwIn1TAbK9w^0yWMnJX(JsDtHh9APc+DqLCe1 zHy;DvhoIk=^8>`LSbH4~+UAt=hKAZy%m!Y(8vpM=8cNoq8PcwX!SFHwwcTxzmjX{*nzG2+iuCTq2ZmEwHu(1 znOGBt&h!Gxht25}cZABdmX?EmVt};D0r;y*-vko)zo-u%Wq=^L&5l3b$Dfr#ZtJ3! zPWhvU*7}hh){h@(-sd01rGtkO^7d6WVugwZl76BcCHK9t zprRyrJ|qsEe?L(5xXd(!A|InV@+nu$2Hf{+lthfEZ9W5@c#L1H&geiIYWO1es=v6r zf3e}5n|c(<=~7^u|Ae_!^gE|Ji7Co#_vaXKE1Y_ToOY(?O?o+r#zHF7y_vt7#M%zT ztkHrHpor-Rv&JKtoCl!zJf^m8nnXUvl6`;8piqFrLmtr`zd5DUAu&bOO_9pUfYt*0 zk?j6-DgK^Ah2DG%pK?|qYMyA}0|A?iaKd!KF=a0DG78k^p4N&UyIl+OX)#l3%x#P`j{9CCwRgwE3x zXPeO#%sa%~%^t*A!K+2*G}f(dgDn$^Y1tnbO=YJTw>uCgcZ<`@)ibeb^bQ zG?dnzHXmn7=N|h!kvU&NlIWq4+{>ic12pLd-ew^fGc)Snt3MHAgdj(}2^!Dq?12Y`%=)kTyQ z%eCZaVCo6$uRJS5GN9+DjkaSfaOebVjxdCwsxfGr;$isq$ko=hw9G0Cg85y7u}<)8 zaQ+8!4<}M*t&MA%E+C-tiEHTIx!nOUx&oq~##ift&2*};d3km{Pc1yFg)*N}7|>7A zdt%f?J&1zg#i&-M`yLKky2n{MnCW~GyV2~vylmoPKG-9ANy>jULhN^BW*%K@yMR3N z@@ie=eP`EfyZ2$zxt6Pa-+^le{yc62?B~pigVNUuSM8S1W7IdR_wEvmUJC+eZTpfN6Z)(SFyac*&R1CUF8~iKQwdMs1CSkd5YMYV*zi5Ou?{T zU_R_fBgsbrRkHd+!}yEw?Nj&JhzOE zvot`4EFZ8@N}8vc^}AA?rUP5A#E(TNx0rX|Jw+6k~4-d1K=H5!)_T z_p=eruERP8CdojKff#NO?#Zi)u!cqNM%}>qUYWKUp=x;v4a(^>v|cAgV-_(YO1)Ic z@xC$vSK6&Qlcp~(LrI9;s=kDUqICn=68xhC3v78q#~_`W0E33{)oOCIAIl!wX`G%g z%g}+WF~SSwIH`BWhcY_TGHyR#ieV*=RMYTbV?D?I5ZTkMdNpT}AutQoZ}!AoM(Q>J zoZFY}-Pt0cKD-W*scvH8AV^B!PEs5KLq}+%A=X6yV23x^G-ezekb*VdEY#Vvn+?Rf zn){KPR1+1+c3Q|~AjS8=A;tV$z(NFdYnOP(BhSG&CM{*dJ$!SVWRF324wD7!<(X4k% zf{8*@RPF75;si>tKIGqskzXce2$ZsG#|+ZnRr>I>&3bnd%wVl@jkyvswi#QyO1||w zNfH3CFjc7W`-F)M0qHm^3%f1KwVb(B0q1-q!vSVf4Ip8O?-=-}E|~f+A{Q6xZE*J* zbhd+6pyh-4BZBK#kl?f^(2$^&A6aH5Tnj2g=o$+ZwQSe)PR@tC0%7MrNs8m2O_BSj z?8mp11oAqSk(SZx04pBZAhKE*Bp=7xg2EfI@%8u8a~+zlZm=0nsn-5l^3xo8WYyL$ zUiIY?TC)B+dv|j-P>l9fs~;WP3%DGlC(K!MQ}t7l6oL^$eC|ShRm2Up<0}zEkPDk& z;{%)HbdRCmuPvPO6P#Ob>JXuTY&R}{C#b5W< z4f(?0^Cr)Is&ZRV?xU*Gj>vx9s<#2!Q>+A*+i})ai!|-!ER^{{p9vu7r+mB`#3Z*1{T|a?aB=vn)IMXG)A8@Knb!pmDZ3Fy9ZTD| zr026a&1U1d(oIv*2`pN&C?Z1qMfMB(Wv-9DFRya$lmcb}q)+agpmbw$qDX3pP)O3NkC=5HJE{_i$qjU#T%ltvYn1Y~fRBd3e+mVv3S^t6hiIiP zO*U$bz`aZngR%4lUp!9dOqZtB8jdU1^A=wBNYQNxac*kUyb$AYIil|T6m%A-8gCPC zjiMOOHPR#jXTWY!$f^GU_pP2R^(sR$D!3OI68G@el`z+X{hHHBU)HpD8P0q8Jc{$Tdz?(|u~@)#q#&k^QWEF;9FpOfs$oGnw$?kRI)Zu7a|8(} zR`?6c-p|u!CQy4rS^+?FQGtR)rNuS&9@wY$r;U_9%#>)YPrviqqOlmFbwT=E7@X>F2tMZ2G!wCEW1>p*CgkZ&V;koxZK)rIb{vW^T z0CNe?F81Ooo-xHgC@t%xC(N0gCf^BI_gtREAd7XO-1fGNuS^GH*yP}NeQ6zRZzsDR zZin-4Cma6&QiS(uwi2441U!kNC19PWkj4GnGN6b4sOA#zE4)CR!FFE5JCAlYOz}7k zcEhxBtPt%sigd&5cKT5Zq=35|&Qag_Y3}OWkJ?R;#z5bp{-SK*fLq7H=rl9H8b|SGadk$;i7ZwOFJv)ta!ZB?EGptdZ?)FB$KtR36}#kj9{WvG>T zzy0- zX243zt|Rv(ukTvlzp0iq`j|@z_Nm zPuJI#7Lnqc2x2u_y;8pzw0}xL_qU9;sL1=_=2ey5rKSX%<;suxpHJUkG0fqTP&tlo z$W-dDy%XbFWi_nV&?o;3YNXHDVTMr^*-#)O1MTVStDHzpf6jI0Zlc}(5dMs=erX&L z9s%;3o`FX@OL))Gg`y=R#O7hY=lW>Gj~rOS?neG~D-*KHN68a}htG?YYCTt(hji$) z#hiUl9sO=1Re-wc5Dl50zRW#ZC_s2D;3C!rQi3^)$q>zu0YsL@15A;uBIMqMu^fjV zfdruheU%1vk6Ej7!_F&A1a1s29K8$^EW@{n<^ng&nLQ4QdLj7BWT#7~tazVCAc5^0 zcj-{q#zn2hX$d1>l>=;pA-#NdNTe1S;*H)f0_Q+kD+NMfc~4)*MT8&3O|_mT?m35s))SXlOtUoWRzD*_TEizw5#5Nq%*~J@e z_e8nzr2|Re3xo$GpD|p*^tes*o#2ife1af1)e-F-B|F~;qBnnTwgNQkTu~2|%-86h z8HzJ`GPB#+e;3Yta@iQ9ZKi)XSz@@kJC6q3iJSlmpr6k2zO5K-Sm0lglE)8mN znYAz8{QlKH2fv{vBaXk+w2W`OSCIH=GgtW&>8PH)n{Jsf{c}m=6RB?L>2@-pk}UyU zO=SpX)+l>E%%wV8?N}vz)0rba^X`70ORDnUvx2$0rbcVn`r>fItyBVo76>FNxk=vh5In^6|& zUgFfQApVg!>qdp~J|36s(RE{|LHiUSj ze_#AS`0^ToI2QqmM_?CYjiL~LPQnwdPl9Z@o zWp<;&blcxv;u|uWy!sBN1Lh-{cfxcALv}NTk35_52~;MQE}9_$Ee|2f=d6$ZDU^N; zHBoL@xQXJ{`kYMbMp0(DQQ0k*PRJ750XuDcVmVVNI~|b_kpyuG;di>*yp^KVG{szw z2^s%@$zA{=>lg|wczO`&w}n4=+q-Yz0lPi6Q!`V2FDXMp(EzUH*VYbEej;CCX+Duu6W>4f z&9&?>LTYafVs8km(;iex#CZ&rbW$fy1aPs zCj6Sr_hV0IlIxhvt{hC8`aXugF7-xJNPG3AZQL(Z$=5*(6(e`o_ReEJ6er(cxqW9w z$C~`EG>In??~^qsNXV+au@_P=`|M*GPq!8)>&YSDns>i7axEbQCxR*CIosGN`P}lA zGLCHC5YCk9WrWkLmGAZl3P{gN2`%WrC$Mg>hk1I%I~Q
q0GvxOBm&F=1@Up|?j9${g7P5m6&NLC-iQTLnhV;8<16!Srp z-dM&?-~Ir&D8x%=YHXs*%8gnXXMQ>7FHRjUXC{wC|Ld4wG`-{@r`QOXtswaa_QY%k zo?;Q0*Q&_>!V|EcRN+QE-gC@R@6l*&wV5~j<73P<3>fJ@{>lVRztw7WD>Yq7V!|sJ zjymp^k6K`3p!Z2ucUnz#HX(dhW5D2YE3aV zKAQ8~SpPQZ_~53m`{;Hr>vYIRi42}2b^*G-d1Xb}#iD3qccw&A_VsS1r~>$fyTAVJ z<>3ZILzfEqj4+BnXc#p2w7hGTTlb}vCw?;d`_)2-vtQ_ASm%&6?6e9EFV!O-2_FOY zPN}-`SsZG862#9JRR-}dLKg1*84EHF2wmoY3LBisGD_=KAtU0$fW)i~IY1;=LH*FX z?2xAEx0N+~IphC+ucGVWlphW?>57>B$9QN~=ot0KUzdH9+lL7#n@Z>Y69&`Iw*AxIO89x@gfLHL;m)Y>00X=NT?tS z(=oll(Sk}<*EYkP2NJG&q^nDdEv8x~wO~KCc0+j;_4?Jp}O8up840 zJI?#SM^G{Ff0PjV`Kt<8U|zjU~!mvvF>jaP}P7Lkl2t=;WyaV2<=K{TyXvR zgEl3M(?k=z-$@9s3ci+m+XPgONKWIFE0>)gc}|w^uRfY4{p+C*DDj`J+$X&}xe~2l z)O~>Y-CE@y7-R$D2q`XXomzPTVve08!-n5>pKWf=S`(06*S-KX>A#@tVh1&)+5>)N zzdy~aqRW!<+QjwT!iHaECI*Dcf5eNZTtBQE5ZIoM*n|9|04Za;#5uz2Rsw$R^7~#Z zJAH76^~l}OcdwVVTrWq+Ep2V}yaw2xLWHaqhMTdpSAZ8c+dW*wuE1-E%lYtNN?JP@ zzzxV0#cZBZWC4M5J8qqgLfoZoHo|zOBN$EW+y^d7h0m?>usr_u{s7k7v1B$d&FXc~ z2oXETv_O&jHJ*pm^p9JQ5)hS;eWK(oc=!whBDV8!g^e&p?HW580M;}c z?Yyt+z_1N;Mgv~%!KeT{Q1Xr4>nZOt;N26;fpt&u8S5XJ+yAfl;6LKFuTSuRCWgV~ z6YnadQYSMW?+Rp(`}{IE)@4*JDjZCIqbgohiWzbb+XtHs|7qPQ)lrO*D<%wUQ!^qW z$QZ9z12JpRDTQGR>C<%9PCkcXhWK0?0Thb{5P~j?7xdmE_wGlUkZJ6n;8;2ZbxoUC zP$wdeaY6DIO9A{7OUM@X39hN=!S&>c8Wp#&zy%G;k>||vYSr%gW zgdEDn15psvdPFm;5Zq8u5U+8pCa6^&fLu0AiKif+u|#F$FLq{1|#*hj7ku)VJ2h(&e7|rJ||<{oLIFWV+1bX#R`Pe z9A_F$1UDpwxgiCf0)YtQirEme15Wnf*YhAA;-18l#)=N234fk7)J!IyzA@y{SJ(sw z3eHY@Hf*qG+@(MiB?tJ51dja+AC%B@U#8nIeiPXWe1er#k>%kQY1adbTkK{NF>ZGj zk0)t?rd|re;%eG(#88v%1Os13-p7$;kG2KPN5?xIH_4E}50}eY%8X5MDvWtwE4lia z#yM1EaeqeJu(IlS5hX>ZvBrJ8ta(_uI)+nhr+Tqn!JDFeA?4emtvB~Rei_>WJpv;h zkd*?{z8FkF3-Xc|l<^UwgCqNBx0Vs+wA21{v~cD8^7r5Jh5>KOL(GW>5S7c8!T+nJ zPTw(a;1=?tTugV3{+yO7vqsqCO!m0t-Udy?0JAqIQkaNAs{d{wU^W=+Erv-xJwk}1 z((HM$`S1+`iS^tsy*#r17@r~70=Sg5qw5i&NnB>p0rr@f=yxagS7!YiBYCF3Q~)=( z^401or1m(AR}u&`Pg5`JW|~zgKDm%|dzVSM zUp17e&2h#+41Dj0>y>JcXX%_RMnp~2W>@=0kVD8Jr|+(``VY9R$2ILc7p%L8Zh>DD z2?ZT3^uyg2seNzVie$&toAC6akp2DL!>r@I?ZpW`zS1eumPXgZS1DQ=SpIiMJJ1;2 zR(8#|3lH~7exjM8Wul1~m%uTg-m(2J2}c_ckb(rYfS~YvnvUO{$WLv~IbQp(Wh?#~ ztHKO`()OG(n{N%DKNe9r_)_NYfYS5Gk1QW63uzTxlI|M4lXTs5bI|s9T^0)}4G_xm zng`_KtKlZZ{u;vmvq5N`ydNkpw?4V%&~zRxurD=TMKOIWlFpID>g&T3o_?qgtOG!w zT|)^_S&0^xTui5ouIa~1iBN?s4kBz#W7dr0XR9DFsoZYBOdu$R(ThWLT=?Z%PxZYM za-Rw)OK$_fG}g=xSzjD&Noas|W>yDoRjUwfK6XFMwcU zZj#=ambUp+<`SfpLC6H*N+DMUi~U0tJu*^tDjuj-y^^Nu)z zU-Y_*Y<8U|h+=~{(w>a@Jm7)@OzQbT>94mDQi$1oIMN`wfK#ruQjGk*YOb5dBmEkF z2=kq3I_!W-Ca{y}>GS6{G#r<#_s|dZ|7!gAQHUzMN&f z^}%&0)r06%R+?96vs8E4IyQaXR|= z=*u4`vHTj5W0yNx^gt{Y`n*%Y?X^KAP_zOb;b~rcnK3%(=J2x_9w18 z9{1X-nv?S@n`4bs=j*ilWNSn=plgPfSjbZ?(4&3GEH+Zv%@RG9R*Dh7l#Em_DrB~Ps>Ne20AbGhW%loTE3``oy zaP6u|#p)03AnC#=42zSd`kE_)PxAY17TbM*AoP!+zwM)vu_#EBng_T2Rc5r*G8etA zznRr|t{cwlD)Z29+u7Ma`T-Ss5roCg#pi5uh}m|bI>x%Jj}5Ey?G zpUsdKqQ+I&F7Tpu0!FzNk@)ln2?=Cb^l!8nl%_262xN#z#(@|P(a>r~Z}db7yt8VA z8)C8E&u~sVpyUmcf8+Q1T{5TH_hQuk=JR18@)B7Q9|RoMaVG0OC@)$WoPVq`TN{=E zt6CqPlJFXJ0iqqENG02;9oZ7O*9;RWea6zX1h8W5>^n6c z%-0M=Kq0f@1F+o*Ar68MaqBJKH%9q-kuL9RbiH_N)K&*;XHYGrYxAwbw`loXq2^`3M%pEv%AnaN5-S zDnFdII(ffq=lM|fyv*sQzKlIZ+GJt?s=5M;;!$>$C%4dvVAjnD?8^jGmskzi3wfr>GJd@ zfsCttT2iN99T+!=2W(td4x8s5N6+Z^-)*g2r}sJa$FtHaxgOzGavDyEvj5H;tIvT1 ze&tSQieu60_t~C@ns|k88TLN5wZ9v$A47Tr@2sP45o5Uwyw4;f)l6p9al7}ueJgFM z4)=SQv*7fcY%+bPGB9nsnV({ZIcXb?1S5U${EiW#kj7Ml?se51HBuzjujK9_TWhz3 zn>fKq)=Dqrin-frJ!5Be`WbH@ceZCLxD7P)nUYr%Bjf5{YI#C^AdDmZh6vO#U8E%i zvM8 z-GQIf_>b5lSQlui3IXdRMq9n@A}Sa8Oe(*Ok^7QpQpm2o%56raITNfVr&sD8*w!H_ zulY8Vm}7ImN46+8gAkWoh4L-vMg7xAVB%loAf+|Z%YoAt26|6BkkY+ zKYX2KR8{TU?Fj)zLRxZ@(jeU-(kUR_Y(fO2yG2T3(};9;H%OOscX#)uHgy)y|9zix z-uJ`#&R`4}Yu)R<<~8SUhT=_tN(A_M2YhG2y}`fSG_xofaEnB~OQs+=uxR{!oz8e$ zKog;Dk46K)dhQp)GUjP~-KXrE1 zGmCYn^!6og2a$Afs58{0Q`mX#?Z-sBSeaaOr?4E%i(ezL75)ZrU4y3DpK?=w==D^v zO!tvRCbU?=HBm!dfqF&KKd;`;0|mE)!k+aPugZV%ds0;Nid1(2OPvFbrb#xC2RBT$ zN6Bgo!f(lu6l<+ljP)hady}~W*%1lG1h~g^y)LJQ-`UcawMS7&HD0vC7M8aWOHDWa zyiYl2IQ@oYe1>EobMrB=Ch9avoY13s4285<9wY$zixcwu;)j#!NP`w&%g;j+tx55R zay1=Rgy5Sc@o$U0-mbjm9T|&jp@x1ShDGzHY3%@=3c7q2_FkZgp*$Z+zxocPZ3Qt= zOE?2Ct+%0S{lhe~j_&8lTUe@sI27=x%GK<1<;c&mQ#=&gRv=a^r5@AM$l~+3ELqkV ztSrN%PXbCy0cfJVm;qS4A^e5&+3YSW?#~x61vd#l9JN;0y;Ba*+ zZt_}*B5wPOAvFbd3d)_}wU4HqFsI`u@?`<$ZsbI558bOZWgqxU;fW-$w_a{k+hr~z zcG{(OjdurpY&*A(;56r^-Qgeoz}=0%6WOo4(oE28TDdgU_i*5u*ll{kGzJuS)Z-Qo zmiEX#0&6)bc5f@F2eX4NTA7fJrP!Z3L!Bp{=n#;%)kM8lud~wMZRRAgeY#{N>(2IG zAy^9d``1?2GqJfU7f2HEiYw{L)2cs3%b&~br1PnnD&3`9K^L)iAvtemo^Y^X@4nys zF`rif?g1=0vI&)$3l9F><#j930k5|Q9uf1t8LMbknG}rM`*a2^Ugx93`0^)INyU1U z-*UR9oV+Az0=iZ^$j@gzbpp9l^o-s_Rzxjv)v z8SqPC+`Lf+v0Y^f@GevQkIcE&j-N0NFd^b?6!3O5%Bzf8?BTeD^a|JG&wOq z4ca#jc?XC}YIwW#S}Hk{bZPqu{-fCRy97@1WHw_ez-E^oh%bG9|Gwmcc(w!xHy%gr z(yN%q4mb{WCr6Mivnoqrya4IeNm72tW}6Td)Isb;wwB(6zwkZD|45v^0pKZR*pSjM zHv2Z*zMG{Uw=9m<3#p9lbi9 zRugEfqJ8T^ap|#nrL0Mv_2=(zz#D{e%hSH3`?H!H;PNWB3p13J@rb8q*Ip#Ar5RIF zc$I@M_jxicu>7)$uB!n<02tHbEV-Q^p%gLqub7!x6p%P#luS&XGC4QV4`;;tv=?gR zrKd4`QJ+TVFPdS}e)mXLzuQtTdWQEzO@9n_V*oc~4N?8wCECE9E0{^R4zy-A!%Igd zo-^?ehjF%KhRB1k72=-FiIF-%ixdZDmt+w(@f*3%(%(paT8gQ*HJZwKk3BWz4H!Q# zVQIjzGlfj({nZ$R@6(Q(wICF7>$>nqhYxZF~D|yWgqAL)_D92chNN#PzhT zN3sY+c#)3Ucss!RXl@;QzZvNc+^aPCl%}{+FQ#WjzJ5FIyh`j(;}Fjzk4I(aiE%7K zr;)3GvvGOf3lhjt80cNRjeE1Bh-TiwunzMP6GN;xa~r9qp`HQ@e);?9;SldONTy{# z&sht)<4Fv@BS<}<2LfIdnF~A~#R1E~%0k)4@<{pych{guHz2|eFuo`|l6Mfgri(NT z)|LXX@*)PFqAK7kWUy)Dl)=5-yeJhGX&ikbwl-dT-t4ebn0ro1z(#kF$ z#{s6*X48q!X`G?cFy`2JBC`0k)9*Leua@W7?I#M zoA%OU!|&+z)^1cYK||iM(Em7$^!=oPQ;j>Cp=FrSmJFBIZ9SW0zEzd|YxRW3NrTQ$ z0%oQV9D0(>u_Wh{`;ROlZo1n;29zsL_o0Gby*1+n&Wb5&8Ei<>V$mBVP{^aJRFxc* z503(${K^~2*rw1fNN}P#2*H0LTkj6zcx=_{?n>-D7T{*&) zue4uFG($PUnQms3=`B-bo*{6UyQli@+Tl+FhS=*g+}$%LoZVx7v~5}xIYToCJ^hhi`N%zSMxQFq=?4M?! z$jQ3!X`LK@0?`B+RI+~w!(a`kENuOM;w90D-);44L>hO8HaH&w54(2Ztgogv$Z}cb zTXT)JW3THYjTrcjn;9`mc!XQftFb6sEr}Ky@8ON66^$KExaB$dLQ>it2S zhGgJV$DJxNP=@VD;3d{kDtZB&N{LjjE|r%y_v6Gw(e4F}bVCwY}CgA25p z)K{On-{J(Q72h54?48N;gz!7zkPW@EJN}_StP*zc^lI(5;-~WS4VjFnmXBzb87FdO zf1O}%GvjUWW%~+ZD>`gF!onws+`ouXQ;)|~QE`La`~xmutvIm!f!T`?H1pJ>8$zex z5ouz)r34bX&O96C^5EouhC9F;6V5!<%v?H{U{spRw?5QPKctNoDXoaM0{;Vk!E|N& zlVTT*berr!2cgD+vVnw#Tn9g8pc^)K%+gYdoir=rsdd>CobGwO#KSgUysqncDq38y zbNvr=c6I#|aFwY5p#VeS^t&YF@4w!Et2Sxm-`MLfnP@cv zEv~4Lry(SEy!Vwx>m`18tb2(!ya;`(#phW>lq*?EBx7?g0V#Y|11f(IJvlm~S$Lq) zZNxX=>FkF}aptrsO5h41CBkOF=O=m_uhefq^x$;0=vHYp$KL#_qR;R9>_8?~uIrCm zz^2clRb`@{Hrbms7Ri#n5Q38jGF9B9N?!uBd)X6V);<6dbCO9+__^Ne&Y6IQdw-X$ zfl^^J^k;40>zW-;-fEvZMUWTTSUg=4oS(1Ly=K3iA;&I+KN>#F*BOX5{sImse!7}( zOO3<$)?wz!7U1`w$h3A3DEuMfe0Bu)`JfyE#tSqP{Wg`0*V28ScOR)pCm_Ou65_)U z+|Tdgn6y+fc01s0qrJm&K;(H6<6TPobJ91msV@t}DZI85%}E?zeUUNI6qO(5LBt{) zxz7cG^RqyggCAe0dUC&vs;?&0b zS_w20{o6GgazYPpM^c?MW5GS{&_)mIHOaktD8`{bBl9D}v1#mA#N+ zr$#mIygLbHli(E>HyQ5$4hkOEkIWD*FJAplz~R6I z5n9%*G#w#3;Ee!?{Nb^L6S+O}y+pymZgr(~?&0&%=Ra}PNx`k)JB$EJslWHP7sl1f zb`%C4+n>Z*>P+exvY@KuLTZJv+$jqpP*H!)ti5Db>%=CvfoY6Dj7JgnSuFiK5znU* zwDBZQ<4{4*mXVazTmOq^JwNOuvHc+5{h~zWvmvgt70<(apGx3l#Y91S$PPX)fpL*?;oFt?k5GPB*L z1ylC3P6*-1~a3=RJs-2KojJ`jALUMGIv&p{SKhP?5@q7gFnJymzjqz*W zjQgC9YTBp#1k3y+UK0%LDpNtX>wY~Ypzwd)em+gD%D)Sr{IeQl`2#qS!adQx8)>@h z=D)1KcVeF%@Zm==74|Bep>?lPGj8s|({bOP+u3@+6b>tyn|XST7wmY9#Ogm1*3N^8 zT=$T0t^lxM6qRZ)wa&Gh3CpgIF2(lORqG|ols);tvVS9iC9BNTS1z&r$AN%Y!k^OP zpA(-3u)Exjl<(?*p>>Q$t;a3$1h7B;zjO@S_GddJlE{l?KRGqO{Ev5=nv)c6>Fv!b zvTHva_ex_2IM4Ua@md>U;xMYMyor`*F5=q$$R4CkKjO*{>wKeA5A`HNF^zNuFsV*+ z2F)I_7Bu9+4;7TJd5&UGR&Lb3uFQ$@J08+Kdr|P2bsAq*2zgvh{>^G2WpWnJXrc?~ z&&0(=l;cyJMaJ;~N!5t+XK4)zq<0DFn zj}#t>P=*uKydW?WzUz3oW^2>cAkZ&u@6@5h`Fr8(fIiLr>hjJrj zB>?TOhr))>kUi$-WZf1C2y&L~?`#W8{J8JI5QQWgphUJ*fj>>r9eeIKDm7WMq#!_s`+jr?iFZhldiuLk zTlS5KFnGnYM??n^yleCDYk{+uM+dYa!yV5(-6mP-#2)WN^l6B9y0<~JOLLf*Qo2ew z6L!4VFmW5uEd8pfSOoIO+h$k6w+B(@j&Lgp=TpFpAPC`~FA)KVaB$Ys!4MLU>7b?6 z$KAh+)rsiyL$yN+O%^DQ#D^#wzYtR@q&PPSIm~!hqI0FGWTN%{j1{HCJSu%>uvNGs zLG8wm3`;DF-;{|Sh!Ck8=(v5&=?<%Xk#e2J?yUwZ%C?QljOI}i=JSM>AFk9Ha2M;) z4nQ4zh2rNu;jI|*pENdyV>DKJ#~b!b+*P}i`9%6YJTR@Q-s6c*n5X$D--yhPO9x2! ztW>IH+2v}`iNK!h=2>R09(63|K=&cTUAr@%qAY6?U%&Jvby;=V*PaqHKW5}0Y_OMcC z9hJ88K!QbvdF^j6mMjpp-SBIF{UVj{*ac`^r7>thh^EoOM~OS0NWz>c)}w0ObMgNe z1opvfKJTw4in~OA5RUIp+`JeZV7;_UivpHlj3L-kKEv5;7SSfhW+ey#sQWme^B?tZ z7SK^CQr4Bp?N!$(B{qR--CZhw$UwUnib;3>7EC+}@2niV$`aMUc%GS?6?s_d|zqfz!B%CjB+Jd}g~k&?5R!|JC}C5QN@E zW;;p}Wd%Q=Sgy#8ExaFCRV!WXYX88N>JHZdyF9R@*77oO+b_!O)f4xD zFaoY;Lu_)U->WG zeQvBLeVF_-=Uk%!0mmV)E~{?3=!4@n03N~`&U-z1{gIj{Q#`i;$cR|xFyx(Ibb zM>0^PZr_Fq$LMuB0QdQDcK$C%-P|pZK3V#G`l_A~4)+x4lwjp7pkAPaY{s+l@nUDh zWt7nYfwA?xpV1yj)$=D6sh8MC{qi%>!hz#+mLBXz_C>{26mqCs!}OT=2}D$e?%zi) z4)b|Z+i0(WOm%QfQ-s026Iv`8OZ}c@Fezwv6rFSKs zh?Lu-!D&Pa1$#TvDCdT=U&NS$y_ zgp7I0OVMwuNgttmaCtV=L~Ne<@<~KQ5?jTAo+!YxzM82)E{O1LMp0?crLM;OvMI77 zRS2&hQ_(#1582|(e;iQHE51{M=lPHgV$X{v2YHQ0!HpfutCglW)dJr66+tVi;N0AP zQk2M+7H=npXi%(OqwUH*+3p{-6~E7gW9V#iF2?X?Z!t7fq!;>o_wK}pTFBt#0|{9| z#Z!;)Qln$E6p_%|b(ns$x2IP?KZVm7{j)s(HAgx@BibPGVlc(iG8iG~X`U|Vs`%Ca z_bp{rTm2!)hvYq&#tK>V)*7XNi=1qh!w7Dc-+s7C5af3(5nRj;;PgEGaJ!X| zGkz69i_0>=WIbOEbWi^r00}tL2|7y~O;%T``>BZn?PFD_Q7Kn+!zC*$KuYD!g%R1qvxIZ2Ks@T}df{4UpE~nA);l8Yq zt{q$gR(7|_pp8U9Q+Jtc+mr6t2+PmQohZ%%3BiXU+{kB%X|XcCE>_h3gz&f0;v}Zw zB%J_4(w-$ph_orCqncarxlD;+e zl|bmS7Vt{B$)nD0j1Ej7*3+*@LRPkkYddyc3~v4D2)s3Mvzkal_t?&wGh~^F6TGOP ztuWUgHtjKvd2th>ci(|=L?Ut#zMCO_(tV7GDM${~WJDgqcxkOzHOob}hf^3?o1xME z`tTBoC{KxqNAw0xQ)(EKro!7uid?QSd`C`QK#D~3B8VMqbySONrA6u`uWGd>V7i|; z&|m93Y_0z^c-GXC2iML{dxZRwBhcMj{-&oX@9CWT_s*Y??K~ z82{sTLGo7q(KuW;+(y_9urfb+Bk?MN1&377(0O<lGnlXs=Xn)u-{ASNl1$AuqE)D5_9BYa+0ZK@gJbw9M{m2 z+_p2SyUi18hmzg@$V&g$kNt%3>{DXpm0s@lNqcrh_EqHjtDHnMs$|?>9^2I<7z~5O zuO+0`$6f+Z$t;myOajiw@nkML@qua%((#U{C0I1-MpdFJl$`wh#lpd#shW^8yHXL1 zeRP|p$cQTxwX2WGU6#D+22(izWw$T}p?HpzT)PnHs>>K%`f;|A1MbCOcm4YY2tg8{ zjKL?tD~ikd8o|YgyHFPv-@dRlBVHvKBY%xcM=k-iixoT<1Ox$$@h}q(6;g6S!+8oX zd3H!8b#w`Q1#%V}DC9gZ&5PwEoQPSE6Lw@>x3cXk7m6+5ab*Q~o?M#Q({AG;N zg(M_%U?UblV^tukZ`~w$QR>+<1(VAPloz-E^!43DKhFkXDf&GIBZMhzn+m5Zd2$6O z2wFRnSTDuaSPsaS!G3XD=2+IW=NYvGt5keum4L2hC z27o2_zUl(=O+CO|iRQ9YN71!U9;HxV zlKX=6=}@-=#QC1|Y-}2t*E)pXUz8AhkS_AOqBp`KNhVA6qQ!R^ZV3NF9AJ)i$yYri0$? zc1zA}bCwhjbJkuU>b>o}zY$Lr9=zzIO-mj>VwZ_xXl`MZ@UP@ zypxsn2PEjIN#?fu$eZNtvD?g9U{%)zwM z5B0Bmtwa=8iQn{c#{4r@#_)G{D(*m@yn&J}>*Ffq*lz}QEbf>}S}W@sUOHGBzXY+-xtLY|^#*KY;px6*fMr^>Cs4u>^_$oJI88Gd|+l|Iyzv^T) zxl3!x(P=)&cAjHWIc#*apQ9gDtN61Ua3FzAGo6{$Nv3M30MA14m1$`H`oO?Nrf4iF zTdvN0bNVZn2`mio2UEXU^2g+kWzQM-yK?s!k3q^WTWh7Y1vt;PszyX+f|El@eDO(qV2Xq3x_A!ONo*lW2vR*eJ$I86VGxm5 zW7juhR4iZL*TN&gUrOh4d5{ZCw@VV{V| zB9$2^r{qzRKDS#0E`y^%Y}FY84}5{Xer-P{ReLMow6h_%S9y{zdN8r0ghXa8Ug`k3 zVwX?v+KS=7BeQG!TwvZ+8{ynN8~zUGs-#fynRYWX>`USMSoHt!Qa92)5QYlf*Kn=A zZi%r{fYMY*iNH8$R~Zl5kv*`T+R~s%o83zDb70JcLDODe!E{FMJt@KkC|HS5#|l8Z z{N)(-G<}-ZvEj3gY3K)H6o&SOpWwg|#8{t$(22Lt+qVB4AJ(%IT*Scr(n4|Iz;sAj z-jtb6bBg?u7hbv7bH>wJ(M@E%B$_+h!-@@-4r}L5nrr7i0Stoh(m3?-+@7@GnRNQ&$C0~MwawG>2RWfbenDdF==YES~F zgha@5=HF&crhibP!#P76aLBjO7`>|y$l7}T_3zz)O>0S9X-XRZ2Jgzh+L+(d?`OTv zt;_4j?8I+0qMP5JTQB1$@P2!247E2l?q zF#sU7IF??u=k^1svb7e6_NqXLkf}dcg`YLB^qA!Vp~b!!6({D#J}^X^NA#+ChwX8( zwB7AfWdrnGjR+p@S+=Y~!cCKb%OFvfkk>3GnV_a$j_J4K`up~$Oo`BbH4VM3L-w9( z8d;`ZlBPim(c&RU+D$m-H2xo!34iWThEVQW{0LSrdC;Npz;rlkbrOCxTdG5^XFBQ4 zuWN#8GZs%Xp)f>a8e^6nBywi%Q-LPQrgx}ZUIm@0x5l-6(}MwN&zZxAV0G)>gqke$ z5J86x0q4<3H#$`texoFG18eK*n4QIJk&VH3!sQq}h>nQY(b!lO+EiVz&3~j|Y*{~` zeNLu?zXoyJ?S$xZSvrx;T3fuH>)Od}(rFVqGgG1mgZ0jgyK#}3sV#6(^{T&A+zTp|d8K_c85Jkm zt2As!fRXaFnheEjsWTbSh}_@Akc_tx+sX7n>jG40$#ghrwP}5v{`-f zQPK6CHN2}kxQ0ElUZa=V|3Q~v{cje)Em6(48FEHq@T63sbR2u!=K_2G&7qhDq>Nwe zPK}3?E7`_!ZqQeb5_o@yGW?JL0Nh&YW{q)ozvf85?4fQx?2xD}$7*gIR^4Ugj=>zo z=dj)2Ko^5-dGna`$<+=J2vusd!>bzmZ$IaM`arKz9c(ffsXtw|LdU3HYX3odnEwWi zl-<%ckwJ~2$#PAw2HgHV0BgjBd~dxwRJ#2h$#lif>sd0|b17dLG30wx01m%avoK&Y z(6F~H^*oUL+eBLb>DekLMt`!?Y=RfepRddXA)S^ku4`&?=b$^p8>&ln#1i&dY1AqZ z@|}%2@7vU>{5GNP2Aw`sX*PLRpvjw+Etg|Urfke?A=2Fcj)+~&NHaj3OQkSc696F; zkk3e6I7>s6Tm}SA{^Vd0ItQJ?6u$+A)F__FR#LM=RTq5OgPS$^UqyA3u?FkDxwHz6##FS`f(@D#MUcGD- zzrUpE=e8S4>0T(pV${bmQKoV`Aj?cDZ|;tK(ucT)C4&Qp>E)}|3RFV$#W5x<+IFIU zlW21#;Sv4df4@86n*MS11G(Bf+9cvU$J;3W)laa|8SHeBEyXJj(ZIH!vpM$wIHf*6 z4S+{ZhjMi58F*%-v><;_F~dkA_CV&t?sm93ock7Ty-*uBj_OdQ-(x#wx78mv%+oXb zXLHP->pf_P&Jrwq4n-s~6`mFsK@|66LHErL7vvvJ5bEvXGz_1tqBRQQ?pdWWtz9({ z9uKOBoqKCe{?2-~OJjd$Ffpra#%`G9a!se)YamZ4Z5jTknE0;t+dbuXrdQUpzaY-{ z6m#wt93tg@%-s8Ypwnf zpeCKm8b#_&r-SK+-3H{IbvfaHdxyPSn~U;ZjHFw|$6k<+qeIp5U?PWr-tK@|OHAG6 zYMMpI^1V3DAsbH%dpz6Wp1G-&ve^i1mqUSqv)ZvXIxm{K@%G;nOg@o)pa@Za=DKL~ zDaqAmp%`lkSm+!r@2F-=vuK!(j{M#m@3A%AhVHRD=0iT=etc`NhF33+3ukLRv)1>v zh_5&;)MqA-<8bLNfm$I|UL{YKp0I3~Nn`g;!gl>(^Ku4u5H2Tzta10}pK#89c4&n_ z+L$U08L+FyOqZm+4o4a{P#9_) z5^t$Z8bOZp(eGn=TR)87YaHW|bt_Tuh%0EN&bKG653=Lgiu8S4Z~=M*Rf=I8zc?B8 zgJbFRmVYDMG4OnjX3P6VxO&8U@CdA_FnE7L>*6)Duee*Y5pBvK{zIT-q{}V~?+>%FL|vJBRs^C@ zr-272f?utb1MGpyBpv_lg?!y`G>t><={t1kDhV>1Imuj4VfySND^V*{uM*)4M}h~v zliIg4k8IFu0OrqG7j-^8nnB@Q@}>m?AuP7cF>Av0W|ZPsCVD#IA=yntE=%h7i^4*+ zl0v_4sp0qTs^M;^3l1aK(ID^Nq;DNc2?4iCl06XX;#s<<`&WB;zR(iLK5r7}wjTZ@ zWFyex#q$60snZ{nVL?A`>RoC+YU68`miRF%a#^}=-D)#cWJ+}V>7+jO@(6N1a9(3? z-XL#p5O)|Y(i<1eWBK)E)hDmFKB&P{MG8dk)%`e21PZ69TILu!x053&{841KnY8f2 zNEvL@e++Pb^w8Dek#ESes|Ya@jXd|uhc{g zkAWZEw;A4p#yCp=BjFJSl&aC@L$g0f69Rq_jPhC9U?1%T5d?*>)Em5;B5s4U%k`eXLmaOrKhYV3jh2{Oi}B)&&TUvC z)OjIO;x$qVBmYYN4khUuYC~$be}bMyoi2y-^RpAf{J{AZjM0BPAes7udD^Ttzte}4 zdMpBczTqZF!SMr#yT3TVSzzT?lHf(;Am;vC9JH?Sgzzr!sY10%qfX?2H&^_6qj1ey z0vlG=n-u{*642xw5=u7Oi}$prv>rW?OLoCunu%A#*x{xLOHVp*hrp>KGqEiGa6hqgT{6xajW~MJNYsjMW z+CejuRg}5*m;VNeg!JT_5zP1lpO{(qQ5l=MbWIlpe#HoEkE4TzTU z^}~4=HvTd$LkZ6=+{nlIz-bV${?i|Js@5ZaakK%zrWGBRYkD911s-oy8CD$Sr1e}g z&N?yaC+FB+JffX|O%JD<9SeepODAvQ$4n6@9LIG=+tal z=<8)Wb}(ly*LsGst&`%}eC5L43|(Kj=Z)kXls2osivTfe>~jrP<|9gmqYo1_ZY zu<&{bS-PX8H!e(!^asU=!G9)3{QP)O89>~S8f%ggk&c2;Dfq=?h~^l`QD z?JW*$L5FwK^BsA-bi;16whx$3r-LVT5{FEg&y)1#Q^xoZyVt&Xv^wJSkGzb|mAHhv z*6qR&w85MDY=d}~m~GViRQmIixx-{ka%bqxn%{M_#A|%H)&%6PUsp@lfztL53O^?c zHaIZU-=&EdknP>BbC`_$if7fKU=@6dUVgDGRhj(#CgBV9YKvE)fj^f4(_(?4Ovo zcH#1RbIL}LEFQFsP8Dl?L?vK)1D6|X>;##ct2lB&>C(^L;uB$wp1VkpzV)=ov0hq? z=5}Il$dHyqaSK_BJ)GqWsEc59M1|%MM;XhYWJbuP$+Y$zij=AUy;PGafvCDYT>7zl z-q;c}TpTwY+eaB!ne-i!&+}Yj9f6c!pBsUcYoVHr^BeL=>L}w7IN9ocF3V|N-{NNx zlgDNmP7vAE>^-3yHUaV+Tqj(A2L;CreS{B*bRL10^U)klv;HBo!)1kPAFhcic1gThFszrYaG-AAOX?!c5FUsMXKYKfI-laQO#keGVdRu8<|mV4#m*&$ zyNL4lvDjSQZQ3fEmb!YQH3cn0p;#v^bXW_#}8A_Qg07JTb5g`km(_5;k{$-h)M{_rMw z?bwb181VY6R6u{`*ql?Kn3jg$tmfV67VZxTZF;e-Ph&3GPz)J`^CqNmd@G4Io#7dhtR{KSe^~K{3h7z@_eE0M50{9o)vSbHrOG)aSlQ5j=Ly`K zcD|vs($@>}d}GO(w)*&wqf3_v)nw4MX0phoZqarloT%gwYQAwgb;f7b8FGHK+#7p=#MfJ6^} zy`rL49_2d8<9D1|iK-2_VY8oHfDs!&uZL4d{?HYvmpL8XzTS*W9T6cN=TR|wfQ65j zTvQc0SvIuj63%Rmh`7iusE_Sn-~m~mXqP}tRJ1g%4Msv?tIz4WWy{v~Cl8l;Zg;gu zL@AAO>C7#_<*rAAZDr;ERq@&a)Sfjwbsv#Cn%uT1M53tEF6D-ch=m-JGi;)Yv#%&> z4W|3uh?EB3Rvw#>Ljf5tVqn~q(RA9yf%;CZZ!<>#O>y2rf|y0{M_hK0xL}Va*iS;B^T$`>7ysRg1iBLUxZrY0`4iayQ&Q z)cOc3LYPy_0f@dBrKV`3ln$pvY;E|UFC49CnqAJPfg{sFZo9pIYW1s(EX5fOHLD=Z z=T@86g8b3t-GSEF^Ts$H-0_z&Qm?PnAqxJS)?pSlSA=q;qBLW_ofz`+KJ@q&T5^eX z27$bHMOEN-fva`HV=eOi1}{P!dmYVug{5T&3ak=&KHnU&V%2{2cY`>@j@px()QYUi z;(@U9_uQ}0<|#H%Z-6*4m=~gnvm+3~1<--IFE^TlHUjV+wg;n>vn2ZftS-r7zXuMt zbTO6=@JCXAWqY)bK+wseIteOXx;It=T}@_DxJ`G#4mD$XU8ar7Ft<&aue7sS7d|(W z_czEl3MymEMDZVskk^0s#~HGU$BT)YEtfPb#OIszGx{KFB*x-LS~Jm>-ArjprGC~a zh}U&haByAW`3!qfM}kh2=$4#d0^FeKqzQHz;|H=H^8%qJm%Y2LZ(2=eHNxrCFo)Gv zr|@N)e@C5rRp6tq-jzNx+cvFkJukDo80FVI+ml?j0Zhhh*n{}T{(6Y7%koIbXq>mo zLc!wsVVn^MRso?2&I&Hm%?hx~BhsveHL4&|>3B{VV~Xe;J)NqEP<+;?J2Kb5Kfi%6 zx&k=^@g65d(c*9mY0&>msxK#_Y;r!CCZ3*YI&hxROJcND80K#|-#<7XE?-l3ytB3C z$Ub8Vm?hUClY823)1ZpjzPZn7CDzTDxsKki5tA#?u8T>8>7dZ-Oc%2|z&78WQk5SQ z1zck6*Emti+BdWFVvSWhCjMm0{EmG6YNYpa@ikeU;j$82W~}h+?pP#U$=TSH=rV`j zaPh-Q>$QZ4AIolIrjzs+UJhI}BN z)kTl8`HB)A=;PXo8F}aRhP`a4y<(-T)hJuuO`^E0Nm`Vt(ewImjcIrWcM zFDQEBwS%R7Sf52B-?H9++t|P z@M=R^d4)W=LM)^RFeZt|MOp%Wz!~#c&oiZ@9g?lf>iTJlPD$@2&85LvZ85Cgy&jx(jN*Gm81*U$CT@LhEr(~^@&NogxUb4 z<};G^D{!2)-HG1|P2TZMuc)y~zRq=T*2+Y47I`mt|?k+Nk<20n%c$%5sec{N) zmjg>ONZp%14woBrB%7Z?sAU`z|4bGB@-|XpxGil^IX9bw=+0kEix?kGKya~hlHdaDvZvhgV00=%89j*)C8=fN2AeieE%b*;kD;NYF zNfl}eJ=DpT#zHZ|@xwoj=LA44`4sHMNUWcp?SccTh@<|9>?T(PGluHO*XeC{Ol`=(MweEo} z-xI4#`FQTU)$}g?KU>*7$M%gwQ$LP z0h~!}Hhty?gQ}DN?OOR)20umB<$oS$INuB?hojquqyG#qdAjBQxQyvqtPY1RpA=yrGTD!;?D1~6HppbWE- zmk3iFUyKOH%M&Ugdg@{_Dib-&*Ji$`w^luqw%C~YPl3n8{K^j0aA?QC82spcF^EFsp4xcmIU zWme(+^bV2E4aoV6QQl*CS(YzqZUshqAEg~K#pKDO7YL|Ql|J zI<4IA-y2a7FDv$BS7g$>kE^$cJaT^GnG7$c(a5HKoh`Lyw3rj*EM!FHYt^o?CMbW9 zuPYtjZNEb_9r$sb#C^*FABLfb$@2{#TV{Gbo*BmX;?w_O?7f5GZoswSMeu|uDSD4I zM2lWmh!!<^?=?EnSwsZUiC$I>qOKNo_1+0q?=5;~?XK^abKdv7&zbLh#Xn}u7(27; z{@qu(uj|FNOn((wsqTLc$+k*B93_dY9@6mtDIE%kvAuBH^`40wsd>sVw`YuX4twMi zjh2Kwo0lJalrR7kmt@@dOKqxn5=H$ZVm_Tb(dcm7hLGE;eASd`E8@p3EH#GVc5?ww&sq%h&O&Z(b-7sVa z^CIZfuHwr5$W5bJ#H?m0<7ltMYi6Fv)2?bGdgHMIh{r->0pWyhK+g1uw|>Y%-i54dH~ zbcqSn+#UzfQl~5FY#VjH42iM~{u=eizY7$!ZvgVHzz^trySkj`eeYn^pDqw14u2N6 z)ZL_#Wiw-x(?Iv;JoDFcr>tgVU;KtznU6aQaNiTW14m!#-zXi~Hzg2pS1ao z*jPP$rgQU!ekL!Jr0*mzRPs}s00^AhOr3#m5sKFa!CSy;cBK%mq@(d~2$TLYg^f$l z7bUu;5tELU9EoG!E#CW&VC;okPWK-{ab*QVKD2yf4(I*8QAQ0U49i)aB&-SOBJ@;t z9s-eJJs777OMFM)_Qs3YueWzNU0@Ih!poNoAt;Z;+X&*v`Uf^_&^C@R=b?Nue?2IS zND1vS_`M#wS;lat$8h-_CM)92Z_hmvsCQm-5}C7zc z*hy8&{K(2x9y?1TzrtwI^s~2vh#q5m+$-RZtPy(H3fwt}`d8X9vU>ZOD_O+|KQb>C z01Nek8iZ8Bo(%fNMD-Q|oQV8^*X{<*HsD&G28l;eSEn|LqFI zaN}b3Bub@2iOBuNvS%)AWlMrf&TVyX^4<4dO8$97{nfInUNRikZeKw{{o08b!q#>4 zR}e8hdT_rON9T18slL}YHd2~h*y%fHslRWFbAp;`1xWz%P8&V_k-ELEHI-!F_|MhP2Mj`Q zRHNAwSS?v~xj1>NSdPtcalP4*#Zk{)cj80bq$EaW-M0EWW3r+ZZT-#Xp$7}=E@urF zNWs>@8k?opGEda@Wxio(w#Uj-w0;R5l5e&nP(J@#*~p>jyTO zgBlEG*tK%)SL*q^^W(&Wj{9R_lh#Au`45D@{wA9#%<$(jRg_!YHdaiE&TPc{0}RfN zAjA{o{D_$ty}=}r@~Z03S~6B0Rq+qbVuI3ui4uviyB`gIe+Pi>M4_`SToQM0Be&+u z*DfR9;qY8C=Fcs>k4%4QWr`TCBuQ+mQ;h_X4U&gyIHCR17yRs^VjIs41)&nJTNh65 zJ(@5^TI6oUK=DdPDM%v@#@9pf8MkME;K8#k_I$ms#0<}h)$9UuvVYf3R#&}U-jmDg zC3aWIes=e6?xJigheJU_Vazaed0^}k44(Fc6;ydY2pN6gPT#;8V!<>Cmg ze!AIT5=U4qvcl?>DRRy|H-ACME>W86-HVY_0uXj&9eH{Z|{ch!=8y2 zax_G&cs-pz>D2qay@F9QX%%zmR$@49h3$iD{HscUX5db-jsifNsPZ~7Yk12!G*xV< zZ#rI*4|oL#Jaz#S^AG>p5-XMZf1M@v+DmZ}%{N@N@A+KW-Dt`Nvzl5B0 zn*UzeFN0_AK{8D%CceQ-fvuo!dmhS2k4L^eW8yAr?*SI5q6l>~=+ZwLjAt zZijH54QG@-=AuY%udD3+3LOfLc_z|ZWFB>%Gdv=Mq^t?Auu(Ri%o2LwS0h9zwS*j7 zo~s=8j`Zi>Gaq}7(KX%&y?1Soh1bzvBCA^xW`-O_BYsdwUd51&6loyfhFOggn>&MAmOP?$aJ>V<&;_;NdGO5Ege8!cs z^ddkQUIb(#mD7vZTJ>&>A^q9q`D*QFDAl=(W*fSc5L$^Np`@_~5XOB<0*f0G8e6NB z2p_?#N`=f}m8>(u73j=R?TpbLuhoP5ca)uJluk76=P+~Wrn2A}mcXhqEU48^h&2#n zFXwgvrq7&H+W!xW+rM_pe@({Yn7{F{b8014nWSYis5-zrXD%T}7vk)7X@+kWkku+d z&Hs_soKEA5tosJ+43{7|Y+RsRVjur;ulz}FVIHm7Ka~8}ar^B(+rt>~XYU;ZBPs8P zJ$E&0AV@TCgIWoQO0v&%5KwSB3^U#4%f}B?JwKEBGv~hl3@G;@{2daUj!!}#`-u90 zSMA$5;@X*uJ67-1RiXmX}A`7#lnh^c8^N;sUZOS%lW=CFEk2Ob+EVfy3jNMws zPhVMo-LjdHVDTJWBVfZO@@$b-;k~Gy@8}+-ECr^B))|V$G2_u<_r>s z^$MTCn9*v4qUp!rinGM`Mhx}T`Q^OiGDFAcUOfotg7i^sbl?5bFChta-7M)dzQqv# z*4?7^mt0Kd`Jpp@&>4h~^VcfZdk&K}Y1uAs%Abi4ld|7p(|D~KT|-Qr{>z@tc=t#c zRsh{bF#;OR-O71qOtqnfIYdP z5=gd9l2KJc+@9xa)T^3zzc-%th_(>=oNn|JOEzyN=|9u2v*&ok3CAJVt5LihilBRR zW(sY=`W6@xn1LwB>MJ0hO$bZF>%!(*$Jr*j-|Fzd&4dCR{P@f)K}d($EjMSXS|ET@ z)>wi6>sfddWPi6a%R<;Zlqf*1h9_pX-P`V9iw)3!WkoF8l+uIW_@rab^P=B)N0ao1 z|I-q%ixEDKgBWhkPi0eVl|FbgidbceJ#3?3(O5c|LBRNK_v(}P}TI8nzG zhPxEarZE?BJ1Xf>mJh_uCP#4mva_P#`>8RII^sWa*S$VMPF_cS6)~M1vgg)di$2ob z<<{1XF~Y`Xqsky~jX4e&QN4?D8_Dtnr zk#W5ZjMoP7N;Udk7X;OBJy9^v6C~bF^)$Eq+TF!D5wBQ~i6H=GD`)7u3aEOYX8KAG zH!&4?WTN%HWxWZ^>(nt~_~QqQ=0I_Hx`V+>9pqiF`suD_Ys(V1dlV z3nb&tIoYyS|( zJWB}W=~^l7psBF6*rLvfOD$XesaaBR7dW$-?$rA0=UEjPDp6{rIkPk2BtP%Lp5cW0 zF>}32eu(rk2-Lv?F3^N}8p%=3a!b%_muz@vf_jzTIyQbz#hws6i zKzjJr)0cJjvmO3H%{N==KwaA(Vpg_%%rp74cc!rQH%^BJ-Y{*3VvXWl98(_ak+rA6 z;y}$XwU9@7Fi%ihU^CF}{^x0Y5@7Mg`dCMA!o@KVi@^Dgm5UXD#Y=4>Q={9~oFU}) zei`mhXJW_y^Z*t@7l;TJ{bRtnMmzK7!V#`+25La_`bMAo-jGp=+QbIi(UJNRSX5&W zPmzBP2w0Am161P+8{g;g;i>n;0VI~N#J1F}C(AJ{kwY%Csxi>OXz)Y<^~NSfbdSsU z7dEi2j6-d~1 z-LpEl)&=nMD(YD)5Ho}S3j6pJxzh1UJV6J?TnF03tIi9fG5IEpNmaGgz0pyHLzE~Bu^)8iKy-1_1$RKuTibw(DQ!AM z<9cUP{7`CCzhHIv-R=cU`?}_D%rM(SRUis$KblTF#p--j2m4#DfSziPu~FxaN>*2c z+1cz|SyXK{Rl02l(Yh4B&EX;3js2V@5QF={A#um zOy24A+9H@9|5K?zzS`0#QFAZ4 zv$=M97Vm*i@vr3HBwq#mPs8g!^J?qQ|7Nqn&w^j&F5uAP*|Yn|z;OCy$V@?#rS;~Nt&a&mpzi#(9yaCX z4enc)`Eq%I_N*XhE$`_kSX*XlX3 zM413?Zyic+<%$%r*DyYWU)4FcG;tFCzEsXMFY}Elcg5gtf1Nj;af*c}8!^u;ACuS? ztO%El-uPSg@dS5T-}sFz@9jry3{@*9C#Zy@z#pe0XOYd~PwcNQBXD58B^je`xCU6- zZDh`WznR>H{CslLtTM;+0hv^sAraqE2_;=>Odi1&R_}g($3FQG6#pLD<{B8X^Tlx? zApt8T1M6}J|I0hKHj#6jdYqNp(?L!gQU!O-;PVGN79={MLK2@#Eq%-e#o&QVx`f#z z|HN}Mf~Qx8ggd_y5vsCJNwa&5AV|SP%j7iNlY;9sG^|5wqix#dshw#D^S^)!$i^>1 zz#f&*9YGQ5Ui?`hgZFfWOK$v7iFq29ajam37#pR|5DM#GW7bBQh7zHF5Y^S;&79Ap zNL95I$!V;6#shhVO%!ViR)hVSlxx;al-3Gjyw6jUlGxnBLyn|YhT94Jc}Ny_BI?YQ zP{+*(3{C|0Gr%omaEA?jKN+`hJgB#Dws4%`JX*WiP6g!+RCP5uQWRJ-p%o!M%sX2V z78WlbPV(L{O*8^x2Pi3d=mhhasCCTgkbT8Ky%dubBLvgJbdmk0jezCDq^#r%}m&-&RZ<8-#t9ZSv{UvG(n8{csM`t$QKJmr-?$1csf0CFpd#wnI?ng+Ld=k-*zsFhQ64+hotr^#NqVoV z(*M5JU9+<@pTmU<9bLGYG}ovCebbr#k55$Idr{g2k~gPKJonLo9YAjx3?BFT)ccA) z>WZtr0w{0sH7td~W|H-f>bXk?jy5?C+zL>8<5=;ft)tnS6k%w`Y1)Q6IDV%AT%%OS zh@OTPOYbD$FqMM!8H$rdy}WcE0hby9AE|k-U!_F6ZwN*jg>s9;>CG^->7{yF-=Wv?A#mJt7l?4uG9 zh;d5Gn{LE?W|5JmCY~)(VQ;47rC78GN3#d*brfqR$Y}E-)QM39X%lGTCt+?v$*=Kx z)BO;r7&a7;)`Dt*)TMy5Yc9@*ZtwGn28u`7j-^4>I)~q`EDq? zLh|b=ZGA&dpu{l=hztim31k1k0Ds5WQ<+xYPtwz+q-li?v+hP}RtguO9=tQ)DItqFTt)JFacyL%d zr?p7^%a?~RbGm6`fHwm*Vq4O1*?3%I8*NaTE-9iFJojgrO(ajBbM!n=tFiMN3nc3g zUf3~L8Vqh2KUpK$d1&00vLTlT73y;Ec2Vji2YZZC`66`_WE6+;VtP`2%}nX`AC%@V zuc!}>O6cqh9a}f7#Gq2{0R04y{bUxfJ%f^XX3Xl`uOZ!dVA?K*OdNRFcu#-0`$1&c z%~Rqv3*snl{TOjSu4F;VJQt^vYT`3?;*r22#2YBzZnc-9dhib_<^aF3`t9rwVhunj z^QulRL)H>Nj$y6!QXsQ~y1$6IxHyUHL`77~G*aWc<_?zqR<- z6-tT*;C>!+Yh#_3=@_H!7SE;cb1b!02gR;)b)6q(uRP)?Vm~1!rSi=~J@}tz7zeDg zH>_^ynH_nKbMC(dhJ=xph0k{|9RQ+hBeveB9oX-I{M_arU4% zz!(RX{0h}*J84CAydoAKU%PD#IZ5RGsAD{!872X5!ZG8~>^&Q8x%ON=tyAEQcI1qt zP9rj@-ntVz`+{M5%Y@F0VAzb-KT15~6xD8InCDaUhFr_-dME<-3=W&l0rSIGJ2d_J0C@7{+XfL9Yb6Q0lyT!>+d z;zjn9%m=k{G;BP&;86jxi72+P`^KG)b^3S|zGi@%F-DnkB%pczDHiH5OGiPjal1er z&^Q3U)S?f4Y8KqC79Z|>cq=kJ@+M;FgYViJ-&SDfNjSmHix;@*;Pt_1cA96*{<1~F z_g~{@(GfzTdeP01j$n`07HlDq3I z&;ZVLKEq1PSMVj4q0l-fGFU2oR8#`-kXc6V`x&s|}7r ziS)*;G4(xJUV=k-&}&8p`{HQ@KK1e&___%=#e6S;nTz-AIGCM#Oi*8Wrzc}a@U`QX zv$IMm@1gAmMHvkmp#!hufWxZ!9N?Q@IU{gs_F=FT1dpE!qCPVI#)eKaW zCG7qYbaF#)!j7tpE|Z3qg^&>QuB4^|p%Il8K)Q^^#EjRhp!Jq%uYt%|Wh8{LKQ(we zmC~EzS|H+M?Oz`pkV(fi#B`XyX8GLcj_z{nf8KKHv2j&`T3JbNWkskA-+Hu@5N`Cp zq(|qm1PpjNj?DujMmbCHZN#&8tSo7y?0U6{K)BVM1AGKnb&4;AJTHH7+wBvZL~_q{ zD*fk4RWcs=LtlLE71H`lFZ zNXOPaBX9R1og2r?nv|ct%*NF0p3A<*k?aEASn@~p2U@%CBdOeOFO^bRTYQWuC#`{> zoB3wBemg(%j8VG3y^y_387#K2@jOOMF@{PwSIYNhyH;ENF}qEIe$$_{y-hexl^GB>?SHd`1|)b68iH1H)Arz$gQDg?4)^jnf@|M(+G7&;;ut^I);Daq?I=H zwjczKlZt3=uw2m4(5G3LcZ=i)>CODdDi5Z(Xm*TyC|Ke}Q1^;1T0R#AYUQxlNbA@7 z)J9T?C_h>w>~CqImM`{Q}CAk%)+GK?l~1ziOo zhV%3GQqu;y;2p#M!lp9yX!}x14-g*z=b~s8pb-o8g=>M`X>gAp)|W!YKN#a4g4b+ z3Sok>EQHUa z$mU=&{#f##%F>`a8_IaflPD6D+mF@cda7Pd#%EWe>*UO5m3hEF4;-Xmy{&DGB=fVZzj&I)nHMZB=Gt*qYCv|<@10nU<74t zDk}2oC)D;O278xwrr1WZo0kUBljj{|&YFb=Q1eGo^AX$`ojkzmz57X4;r&(cTA&nY zsleX&=yp6@yBU6Xesa{Y(BLNZ040KFuaH%0#~mVlLCCFNYs%Z38COw z6HS{+RPgc{c|w<2f6j92zn_l(#!?dPYQX1t;fjYktlJFLfg;iYH)|`` z{4e#^iP7+L9#qkEwo7$UQ{U!FFGHMpO}5l0n&UP`fUF=KH$0u*GcS0H_TlzNb0&=UBQvzhBX9vI^sxi?}Mlemf0^&Ck`K5F4n zK4Q6AcC#M(eV`Xo2Ov3ae~$zT#yS`|F4fhaL15)!fsdb7BHq_<{AM9EDP->6 zE8l~EqsPalD+pj}a#^o%{TRyXqY;8pJ7!Z>1AMgapNEuIv34C!5WseQ6f>HXfeJwU zhO_e9e9b*AtO31xlbi*wNu?@}VQQh9jf71gO9>xTS}Ym%{QZ+;-fW(bMVV+H`59l` z^4~KvG%0caI}4y9!+A|!4Q}Agl4Fwo)=$C=bxab7e=LK;2-x%8KpmjX2Y%*}cErI* zQ0Ifsd80kBMfJ{TZ;i0UI`?#JyLlW7D{*@c6Pj$piY1DFDom&@hj`RVy*^B_Vegw- zOS}dYSb zPEFg29Ei!v%`|blt`5;pCUg-AkYIh-w#fqqT7$=g!)qN&=N1n3g6k1t*~nKje*sJ7 zo3o-N!XkTa?$GA_+b6y~g4LrLwq0$=Yiq-ZIA7Gn*zX=89M4>q%9=8BxHra&BXy7( zNVvVCwfRDlg*9y{&OK)BV2Q+{ zG0WEJ2@k*Nve|n`h#pD@denD%twRXNKC^SrR(H;>@kPPbZ@=`kCexA=E)!~87La|5 zNH%+Z&N*`tL)+Z&Kan)R?D?-T^O7`T>Mu`IQntMxPwdWJ)`p0=qr+%3X&OAUrc(*^Y zk7&ja-E?tl1WEKm?H0D~6$WdMs_!Og)w+`1oUdPXxSuCSa5N~2->nVql>578{+%fi zz)U$q4{wC4se$dH9T_0!;RcwP7s*dY8Z~ zTsL~SROg@WCuaLzk#DJ1k2DYcFtgerm-F?YsuyY6;qtFIqIpTmF2b zytN$EexK~PXpVHf6<;!ZKA#fa@OQB>_yIV>EzceNLkC6Hmxmp6l))DoyPkK8+~#-e zeQ0ZJ7|O`LZ>(K%YImoOGY~=F>zQ!o1ATnJFPh#mf+YGF;CIYetO}Ei97cH)Zj_54 zyU!1XqX2d{FKyK2zp=Y4f7#v1C2jFOGDHE_)Fu1g1*hfuMm>G+dp`dQe30!FYhpE1 zO$H0L*{~KPzu_uuN9Hd1acjI2nO}(>zx?40D@F8#F-tLGZxVKzJ@z&5qYWT78)dkqO>AD^=Q=b8f59 z*=>)WuMJpXni6jsCG8_yK0Ej^dO21sy*-O#Hi|VPIO?W|))23)4dWg3K{3O zzt%+V;9G0#vg_p*U{(+R#)M4Gk zHdeQ#jO9Y6cD{`AVrQ73jYC<*)3aNPlJNDzRebc#X7f^`~X1k!tH zsrSY!+rdN(rKV>)>uF!b+;k%h_S1OIbYdV|=H}M=6e|e?9eDZTKQ&{qUVC#Ckv`aw z&){`k0n#S@$4_`hgm)C|4fIjhxwCF@#WI+>%*vbMKhJ`VRNHEl?7LLtt2 zsBK8g@VRsDpUB$v`}I^>zHZEk&>WY3N!+HDwJD$Qq8|t}dAvmrxn6zCfU4qO7ReCW zsfMr)V`o$=18JI{TXzKehe76MYxMfl&pqKPuLXMguYyU$|KZ+If88BVRy8I1|2;1R z7(D_s!vAwhunWWv3LR`z9 zGzdu8i=b&8uB%mKIUko7DyMTKU$#i{?`|89M|~{2T5xY_K0EY%W43W*+KmcK2e3!6 zf$-UUf9qt}=-S~CSi*?%ME}85iDp7!C`$j&Z{;}pmnbEOLCvc#m~r2uo$a4fd6WM@ z6_&gP4^A&d*3FUY7gq!n#=m77J;8cDS&%x59UA261Ha3x%lfRLmS_)> zczbkva8s&N+$?(8CB^*%&`RS2jQrg^mdUgYb6b;diN>r}h6Q>C`zXvyu&h=3dP4x( zhICNL@V_LeHZnob6KBRt5aJ%m>WT)Vp>cne+pSFThnBQYQ#165W37IQA7X!M6#tV_ z*HjwFUHmn*x=go4&*(3-`d-`AU~OotLkA9YcbTYrtIMLvYvf^@s1r>z|7HWOy*2nn zI{dUElp)Ci&B!QK(u1jV4qEWJr6aI*i02s*lv1PN(+Ddm;vL578p%RuF61PtrSplr zyE{eU73T-gWoF=ZVOL;zQGH2z{BDp+fI)Cv(2dgGN;Mw6(m?hm3sUwEdBGqh%-VkW<( zragu!rf|J2Z#wkp|9($iZthCcY_j&}L9|0FEB4C?xc{wv7;#`s{dh7?HgVYZpQWNC zHY0XRds8J8An`*bUkNa)vd7NYTfG{ahxSzXMqLz9JV>mZ2@tt=l-RA!E%s2KD6f_O z2q+3j!Y97EelhYS#MBl~^b!8tYG@B1#J2e-1khr!rt_a#)1X0-V&)DbpwMPzOi2Zt zfXodreWFw~A74Aew8C)141YNqR$9`nnbU2#j0zjLy|6l69}>eCqGB8}uy2r6H{;Pi z3$-$E!+ZAJ_)~V{%e@?$vv4M1VpIxoX=jkQl{JK2Yb5fa`6p$Y9v|8*5|Ns!md>pD zB5{J9_Z&L{KybYCu$~00)^3K_o0JD1vVT+MF0@V~kzjWjjY2S-TC?+ZeZ`?M<7s(t z-6e3c3J}kMVnhK$rW{drD7QMRy6d+Js1!Q>>zZ1jyePi%)Lv7&mCisb)-CMAwS@ZO zccKuK*Uf(Q=w}XKP7$1?IKgwx%^i9`H8;mme1VN8@=`o$n^BJBBooqj+fAhcp23D$-z~`6$3O#hc>yU&psqd8F`4)xM{EWo@p;-ZRG?{!S zeU%(l$<>zae&N>cbFpU?oKtuysvhAy)Xv*8Oe=2%_~u2i`RMgLJ6e-cH?j~oVEU>-{4xSQLe!><8q1FaTK#i#9%&p zI`q`Yt7!y~)HZp%D=e?lBc$4x28Ptk*?0B)fL32uw}&PNWyqfPZ;e+C2kZ&1IA#h@ zIGG-|UJLp{^D@tFKc`m1C#^q*Xp|T!QU9)>U?n!^xw<{t+)y74<4OA)fmts&`?}?eyJd~*G^ya#{6B_>x-4a|2 zI0>v0&W4VdeW1SbidPYwL=nZ}x4VUpE&5tRt>$T$e$*Pcp%Ve!|G|!coYhEJ^Dw&= zBUAwB5LD+9@cj1%1!e;_D994X7_*`Ql5p#tCPL;+a_r6(R6``g(* zkp0nL9?x>xhC}joph!ZO@SL;zsD6(F*WJ_R;(~VQb0>)ahbDxtb!Eie4#w>%X*8 z5iudJT&dQ>?+p@P-OiaJ4}KjrSX69HRoLUtgzR4*tx>fUrT{V>`+&c?R?Ad}t39G;%ejzF|9!d-9t% zIgL@V zlhV`$mLgKX<-6OR;*(Hora1~97g8PM;A`+@n)^7;VpyF2 zGns6iEG1-$RPztq%j-&mAc&F^YLyVY(Q$=`&(6(qqY`u!>~^>IFJ|GtY3a9`0L$Q( zybP2(as)F3yo(+;qaJr?!wG&u?#tB3c8iRfkx^hqXYhgli?2zo`UU9jp!O78*M&&R zo2mu3fPJ?9GaSb)vM|fpt0iyskA=Sn9=2yWIsGMqEzeU-K6Td4^!*Kl+rBR(bF*+f z);TvAohjlPxsSP&z%~m%jeVhHF~r8)xt-M%gS=wakn*=iHMKH?{qaV{g>E1Yd*5B4 zJQ|SOe6H^b5l2DGGS7_}fq*Ya5>bK7ZQr1~qr~g8xf(}y&Ib%>PKG05N0O9J;EQW3(ob?#!KPpo-v}S0t2!1ReaS;>sw>sY+A7o z_s8ubhMY376s}a)hT@4oT~qObqO>VQ?ICU zo__QJRUX1`jao)ym$omG0Q%t`Y27T-@Dtb9FGMbaX&_wa-NFLB=NEV^<+1zO_zbZ> zkpwvt1p{&qafbpM1wpNW9s6O?eA~5~9aLVCIEbiVI}`4a$aDus@gi(O(qD!_wR8kS z2Z)D3TW_$;_++Uy$-`|Aug*52MUIksqSQA}dg&9*_JeweNF!8B`;m8ybS= zDpMq^9Ia~v@}sJx*NGKJf7T;lmo$aa1epRR7W zM7O0bz?fvIPpat6H~QKQkPSVdH(g6xvy}ceU+M5&T=9fAZ$RHH!9ib(yF@&gWCj_9U+<(L8t>D+%{Ep2o|p7s$8Vw-Ts@YIN-XF#J0kmOtA7NK7;Ic~!Gy_)W+xCXxWw^%seKeQ@*Z zll^kiyl8C*QfSZRC5pYr!zoB7Lt}b0Otw{2ja9RhbluHUz|V^RvUX-OY5pjZ`=&Zj z=VvDT8pUN>?$kP{FrO{`nB%~=KyfDb@ifLo+PLBLbg;wPfBIJ;5AHo>;muDswzE}Y zsojyA7qbzm-$T<;J86@K`uwhuaqKN&x^BL2NCM?<*nLtK#a)uQ6C)2IzX4E5&@JZ z??3&$cgLQ-h>5YSI+4TpzjD~_SjA0$=L@=-b)ANzERlkINL^O44BytsVsrwZ-ONU} z-!J}K?6AeMGi=cn345$11C@V`1cH2Q;x6(&hl_lSB5tX$eY0zFrU+d@1^&-63618@ z=Fgu2c^)8{16y=$E9&WB5<7{rW@IMBxdt;s)M2bnA^+;xFX#uAKN_#e#lN#?zj-@1 z%}F=3Dp|-r_u5+QjsD1=DSc&;GHX!vn~Tt6-D>OpmaD}IkZx{9fDtE41(%_#t2|KD z+VI(49%~Jo->@J++UJ6A=6yzo$(@f)Ec~>f@jI*Gi#7`hQ(8P9s!!yP?KlMdjxF&k zsVXVQ7_3F?MU!_;Iab)(^gw18QiPDULP`4{^wST*{N48jy zX`UI54|eQ-XoV>$+!9s2E*0wD!~eE{Q{*vgnT%%9G~=19mE0l^L@3(M)!2fr`b6|( zL85Jv**YVjK#<2@78Z{A_%`rn$7#A$m-0W1b)P;?X<{AOTz6BzXL(E{aZr?bBswH$ z7`J{k4yC0S>SWG4Tjnxa$3Cm9{j9A$11RDN@$T>M2f&kWv2iOzCxQeIi{NBbl-eJ- zqjxV_@?HyniRYDYp+iCH1*8^K0l&Z%N||bnf7PY@nzlE6ww@ajCDBo zVOiQs=a*O4CUmY(jrMnVqUwKTq)LIUal})l)+50^C)T6QSQXdL_=(eO6&&%b%?1r3 zf_Qwf+T{5(Sn+U@v27ujv)0I*>)326Oj!7KDG-9PDlF{iS53w-{T z$6j!!@B=KRNBmT*hjt&N*Pm()k|Tp6oUOn@_WrI`#Nyst4c20lQ42bcTVTvDc~>DP zYu!1Y-`t$gqI2&c*d{J18q);;UvCv@Az^*F5^*rzT8P)>tcIQz`rTQ z|K%I~&&M5h0r0qMaM}*S?lLM3$PVaq*Y3S{b9sH@df+b5eNHzE3l=?t7Mda9S)$S- zBCRTa9kXO%bcj2M(1jChO8-iOX&S||&7f_>G*z?GeMp^XderdCxV%C+z@!cBI?xZ> zyJWb|7QI46G`UesGE@X?_~*e#TC7LaJqY(K|7{LTd+Y$q&!enWkwC#+s51TwNGg3Ikq2_1Aftt-p$~#fb6?{xY@w0p(tD?=73C zkzY>{%!>-Lg;JvO0XJ~L&Y`H`_QmXALiMXW)DATt`)#}ct~l2*L`V{P(}8TT8R^;1 zocs~MCa~-i`dYVmt+a__`m&oh+9p*k#>)z&mFk zWu>%wg-VT!V_Eh)i&|K&2mp1A2;G*dMm@|X3M>u7))%=*6bD7nOjjffmN&m|FF*f> zR6ozBh*Y1K@D05R^Q$h-79b%O$Xv9X7ynClBzI87}3!E~U7R?0pLNhWe zukv9&djcm@uH9tX?|V2xR=;f>a5}Sp70V3r{NR=%O-2-ivu5ED!_GjV1R zhg6Me`Z9uh%Ng(3oCw7^*%}B`#tYz#$!V%FO8d=MVHFxQU7=B@;C{Y^__ z%Kx|>e^b3|%>D&s{%sA!U@WVU48nyO&OAr&&!(UuNg2$x_%u=FjwgoPL;_Z7k9)?_6%E zpch0ifzVE)Oiz|`2zr$Fa7EyRH28qQZWej{jKlu;{ujk^AWE4C!tOX3 z-3;r-CjtXOLp5vn%%#62CUu0zu{O51{XZ+%LI0s(|C$U$pmpi^ulk6~m#Fpnat)j6 z^uFgQdugJYZwNc}x2#69%ACS)4|vYNJSAJm8;e2m;ru$tNM;d~Xe32Qr0X~jV&%_R z^A*{4BIXY3 z-$w!0Iey%xxfu9%>n*(P=mSzz^%>Lif*%laxF*)>u3n94mFoW1sJ%v*<9uCI2b``Z zP>Dsieg*$VdHFNmMw9#ew13^FT|L8dw~M|b^kwQK_F>ram4nWy3)bkw(R7>$^8RA&i2A(v2b@4HD7}u$7XK?igA?8bN9hDM>-;Zpj%ML>RhTVF;x| z2`ORd=gV)M^PIik^X&H@*8*9rHLUx(@9Vlgg&XIpavES0+YIM)a~vQ5FO(V9h1V!5 z5wFp`vg(Ic^2ATs0>aid^^MXexS^9l*SiyYCw2G>WqeL zd3(+hC!Bj>rXb0LF(4WE{IG(iwdYk<92^x)mmn1Q)f0I{qUEr+`;UYK28<0pk=qbh z)Mk0i{ikIVJ#wS?_J9}>v+klOXU|TqP=yE*d&}(Ehn_6zGK@J#2evhay?ieIk_EpJ&;S=4=JDFk=nEh(0#m0MleiM#TJ7fL(EH5W$ zeM_2hp+rVo$iMpV?2_pFQlyt2z_y%tmRQ)3RuhT-3%+wlq59O8(shYYY43pUC>pZv z6n8-Ai3k5EP;JfVisJQwcqe`PMOxM;i0dR%@BQ_yv_sHtguJDpP#{cySApJmBAnr7 zN+6%NS2>!}JtAn%AgS?iuT6mT^QAmXS3K87E6?R(?$0&b{}!kGz1IK(3gGLbyd&`o z_@~Lc@;}7fGozpt&M+0f_w#xO-{)N!dN*yiun*|;)WqZD53zrL{aoJ@5RAc|06AnbJ%YG|0G6JJ}9co{#2VZ7LOjzw4&)Y)v#_E0& zcz)q``>>^9cjSnN_+0<6vSm5nekfj3K1Ux^Ds1#j#r<+DkUBZ_5@HsH{ zlfyPXChI@I_`zS`r+*3t1-kw)5gd&X)S$_xwv|8*iso+bT3@VW6;fD;A$P_PF$y3= zHFJyEWI0Kt1*8NZd3{blIc`+}7)9ioG0h?jyB|4GDWCOaAaxX+Ofm4|iUt|g40X+?Dv&F;W1^AJpPn~7xZ(2!^R%VVv9cJQJ&P* zwm0cYy9Y!mw1RwyF4>$~LeQ;xvh^lNRZhoiQxnY^~*OwligKC+-c1bw-l_HKw)j`0R- zn2*b7FhSspRdpn`#)C%Y(&Xn^p@>#3MH+D%T z0R?6AC{z5S-+UBYiBrE&tk6*-IWA60o!&{zBMd)?AI_he(V@y~2EvLWzkvotdsBQR z(o~O91RQJS9BFMsMj-TQnUtphUH4Q{$|G4<{X$>1{+-+Kw5@$t&r!VbBghN34QWt) zSC0e5Gi#7RtWU_&eG0B+&nO&E7=WQD0^WDlUH#wUwbMD#SN^O$CQVecRY-kvNmHNL zfKChXcy$kYjBlrEtuCjZ8wtO-A8waF_R9ufEI{wG@*e!PgGAb{@ZLcqet+J3iEK9M zpi?9@7-xqMC8OgPK%2+5z@u?(lxAgJey3v9S=efTs$;D1Tlk>S*6|vMd%HDLTPpN& zPPDTc$YccFhWrKjRmFQGG|56q8*tm4mC-2ak7z^vIvM}RP-5*rHKKcAndxG^x6I0- zc?=hVe^Y$!CcZk%lRECSnn6{RcZoV!4*1c!_c2^`w)wdUn4r)O=Wavv6=J<9!XSRY zbHy&FU+|-c*hbOJ&(^Dp-`T7n?n*_YGStI>;=5__@$nb*DCvvNcG*kH)(o2UR8jj4 z`P-d{k%XVI_!}xMpo*0~<5`)sU@5UN=LG?NA*6`-b5DwgiN2F^3Ykz^-wyGzp&xgc z{MRNXJ3^Cx!U|Vu%~tw^y`s>(mqn^+Y}WzZPorr=s2yYVgQcvb5H^g_o_mDpF-|XD zyy&HqAf80q;!EjAHUhJI?RjqH2xq#!_B(Pe3cnMo|MxS2p?H`~AWE8c)*?=t_L=v` ze`s}>OI>DZbp8W9DZjulSB>&k*RBVB*v6mP@1}$QSrHZ274e51b1;$sfyd0&JH~as z^%mO?kI^(+oDvP;E(d%5$LO1j{Eq_B!i&kR-w^MQ6GusAll{d>JvVM1t+f^?zW5Wb zS@7(ReP%p8X3E6y=izKt-2ey4GZMH%rRh4p9Tz0CpbsF|--F^yGb;d%CH?mT737d% zog+tIK|BR1p5`+`q#g)S+3*N%1U>!EWXeOVsH@F22hm8>z>q86-XI)}gpLlt1tKNZ zr7qxB^qn{8VWd44$LYb*nS#|@j|9=rspDASK?I$lXEz^wR&k2BxFC`KkkgS#*fK16 z@OxabbY8QZij;_2MQ@jdJAh?8kDSAP;u!X>-9CMsfWtAXT+ws||2*9BdDfznfs+PLIe-aR#1}?V#ywG1p|?L^4|;Jiki-18%1Uo-;R) z;GXO`Q1;|-L`fPAWLA2o zIaw@6{7UVj)Q_U2*T$b|os*Kd(mfkUrC$mAvf3w&mL6GlACaO$nDY*0zLzJF5;`|h zWI9v0+_(h1=J3ws4`T^;qn{48JwBPuH1Y$X=|ncJ_~PS_4OZI8TXyEGchkXahkJ`EObh(>7&Zfb6uxg-J)Yf~winQ)pulPnN7TD)BWN zJ?_}g7Ds*f>igWw!Ph2T@_70M&EX`0&)Ux-)>_vWp!P=;Kl(?1l~|fGZ~?3Ac+1aB zr62FlhV;jS@*)Tt@PMAb91u0&D{eFCnedeAR>WUk> zxna!Uc8OUTJYJt4W1<9o;#n|2x3p33S`lO+_^6nau5a=2=U#d@l_|^p6^{K4%UIax zPA^Mr+!2BPq=SYTCx-{YM(WM{Lfh;b($Y7m8bJILGF4;dj+W$pIhiHm)MWhnXF-*v zZPNn^u5z+VZxW|~m$nPvozf;>#H1|hY=inbI}JBFWiP$D!+zoyD^ch$V*uM`HQ;We z$gEwCm63+WAXD*G9>Fx?Lir|9(x-c}GDnB*q|h0Sl!MhIvqWkC(;E^PIgff*Ki5$t zWSbv!=ZYSi7QJ0v-Z2A@PGD1w2Z(E-&B*&SFyswCM zcUAZ=XN=uIWjFM#q;3*V4SHw<^s&;=7|0FxdmctZ-;kg zN?F|=y3NaKwiZWHXV-(RpAX;~;YjcEXD%W1Y? zRZ!5YcWu>>LBxGMbI@<_Z!jNNZvmuzEeCPYlFWyy1Vssz$29)Y^Oy2A5Zl4-7+Uj- zvB8-Kn`*Tna}t3Ln316C^rm zKin-$bAAtq%LXu)$1ro!Y@vH__a{&feDbspIz)GR39*qKmg{f2NC`c7*N zhU3oD*&LKf(;FYdpsDO1;Y4EA)4$Sb1Z{eeF#M2`+-M5OPR<#~d`h(TIml#Lh!|nT zS3E~oN$O~PFIt#TL*-e1uBBWt;0H2Bq_|&FS`q(qemE}wuC+(NjT_N*eIuS~%!wrl z8&8MDYUSA_sGUQ^Z#Og5JMzx|RFw zox@}q&3+z}#ALoQ_j+%ptq$?T)Lb|T=FSx*tdWdMz)kh#;PRm4QeFBerQ&=1 zhCb>86pQI0q|lmIdRh>AUu^aumS-6|*3(pWCsSsdT{PB)ZQt(+yVdzV0tOQ zk>#4i2~5s+ncVvlE}J|z`+-2>)&KIZQ#1uG36RbHg({=a654Y+<$hvT;HB8cGgbr6 z{M3p>5{Em4?x3*7r-W|#7GY){2%;9V-lQ)JVt6mD87jtN{USaRd|DwT>Wi1ygbwUG z@)rRb5DT8uh#=dK$p(4Gjc(Jvm{Xz15YOK4YozQVhh!3B3klM*WG%0D`z`?cmz4>1 z86urm(^N}sFp3PYE&3k*X~5~vB8`-!6Tr%oXgb;XmaJ5+vRiGht4?aEqG0ItL~4s zGQKB~#%bV7@iS{by>TDPw)`+YU!zRdIsc7iys04m1`sBYO^>1*8eV+b8b=rgw(jhb z#WOlB#vc$%kiH0d1ZjV|pzJtmt(QoY&27Gc3?jkbt%&*Mx39#`VWlSrI*+f$$Rffg zs3W2~vh@*w!8MmRC%wYiYLX`q`T8hD)D_{aBc(C>{nb2A^U^txZmZ+18q}YEx6RAB zb>#-)({9AePCLUK811x_Edp{}sky@ADx()LVk2Yt5W5uR^_lX@>8 z%^)boRr||1oHhasW@W!$pNjI`*N1@~{_asXQqiq0nL998oyT;iJhpoK@l+yJ(>MvW zl8zSVToo^R)VU56Z9I`I&zo+4)SSYHuubo#_p<;1E*o5-YqS*10VFxz>>r^9_n4$}Hr{b!Ua zF45Ivfmy~4F26G^I14D!nanyPqfe@^8y#U%EM-cWIMB}O!u(s{sgo!(DN%Wy3jv^-|EB zu6qA<5d^iqh3ws8E$}|kj^Mj%baR96{W)X&AQt_+t}KI##y;)Hd{zFRd_q{&b*t9< zc^MeO@w}{GJo%PkL_Rmit6IZp|G8sU@r|6Gm{GyIk2U=kU7)9{^aI);ne%x5CtaU{_OkX;EJqYReVzTpQ=fyO`W19fRvcjSz>TS1 zNsJ=}D>kY<>7el01zYFM2YRKts^xyZRk|O!8B3HkT|e}rgd$fTKTMPiuSK0V+o*y}d!l`% zkgt&`$N+$)8BeAzDWE-L+~o1e*kkNTmCd#|hbA|K6xQRr$4VQH*AiJl>ag>yXsba- z9VwOF4)j}i(bu_^G*YiUkiTCaZseRy5lnzDfITjh)6p%KX=jh{SuZ100CH=Fksla> zNx?`!xJ$W(#=D=7w#5mU;i?7J9t}&ud|@}k0|(f*JSTrFI`_r1f^R}8I$S=&4OLCUJ_$dQY%ZC;n^rt9m-pw&ApBu=qEA)%G^yZyeU}(A@NujwmA9l1o z8;09c>7bhOQ{R^w@`F1wTW--j{!Zh2b#d%@<)yqgPT}dbGGDyV`~QRCT&rp!^#z%6 z|LM~L#qi#JDC1D-`}Tl02WQ56yzy3WqOSKS8VGDJEExM9u>B7vn+qvWb6c_v_Kf>i zPk}DXU*~+1NE0pR^V4+>t*M!VN+*gdvl*h*)V){VEW%v=du@;i`EkhLbmEZ@038;f zbb~L7biDiI$Ix# z01ty~1jhx*n`ZC-{&JNQHLlkI^`5NMQ;(6TPyijo3l9U%+=_2>1FTg6z5#~$)KiIM)S)Ma0lI#c?E?hQSppq2F_qa@UE|KIC7$J-&VxN{NFrgu8Gc}PWdBeTm34Wp1!P)e#xA;aa6|nM>C2!QSBqlK!ux5<;f*Hm!E@P1?*60_gsep%twrwow6-F7JTb~`>Bmd zx({k}zGn>n;I8^iayF6AVK*v@NygMtW3-+eUiSE3_gcNcUdt%%)s*{xmck2_1#mxv z5=VM6cgPZpbKV)3)yQv(?RQum)vFoLX=(o}Py?{T_wM*TK+qY( zDD{-LVZjf;qpCkl5k!ShjNcatDDLoDS3_cla+H?=0e#}J3{Jz46Xey$N~MxA-be82 z(-(;4AXnL_zMR~O&M!DxI|lC*0$L6q7CRqJuEn3~Bqt8(1NDo7EFV zFeohdnSF|65e0v1iiqmAV8OQ>o&x!>BZ5ha2^J1*4Ln5T527VjMoeA(kJk!FCGfW< zuk8|~VLae&Q0rHu<6}FRe23E=qJ5XnReug1MDsc;p8~N=UiW=Ak&cwm&(_EN>EmoT z;#yJhiXNGU47&D%j-q?ad&m%YzL!JGx;%M%xB4PmW~0PsafBFXhUzd;fSFm4$L1u@ z7K_*f;L4NKX+1L<7C4NZrj>MnqF^*A;vs8B>!H?==*eNc*@W0{g2e&4-sXc&h-xN^H#`COZ2H4{Id5ZNco zJ6NzdLF)j51M#h3fxbR-RY#u+)FJ>NGDd_2lvLm4^M!BG!*j2Vg+^-HKH;FGU5_0| zAHxFleHYt9!1;J__%nYI`nH5q@Bpjysr0_clg=kEom(%wl6$0d41Lx#H8thB4A$Jt z)mglj>ZF!3LwAJsS5sXh@05#mHPVA?SGTWRi zCp_K)`&e}NO}4R%V3(nE6@Z!fq3g(LXTRT32=)`{SvQ^Y4LbPpwSkz_eT6lbg;@XV z*oF4gGJmW^7@oxWwD?9Twwzp*cf{l0Wf@3-@KOhIt|09@cB;SwbU$ z1VqmOitq0~-=E{<3dJ_G zMMz3frhxkb+aHolUmFCC*01z)Rm9b8&nWlZeg7-|1uaGkUhfeyUib2QnfVVkgB*<| zD2a4F66jAkcYmyBDuMvwZ9y0gyP9{i6E*~DuWAYrvPOSH`AuWazx<8DG{ywEj+{6#Mg#HcS(~x z>4aY-6S3wp#H9w@LeXJn_gA?fk`0Je0=ySR1D$;A{eXMlUGqjeq~If}Y(0_;g(uhs z@A5Z>iX#LaS&cbwO(`j1b73W9Ftts_RHU@jI|Tf9sCgzkW)d2^j%dU$A* z0hAYy`o>qu(~UfNYa_blL3&4kDwsf|Fyps`fB8UU@;)WAJVt(v|EZXK@S$Pk3>X^v6MBP%-xTdLSx z@3x-Qc^BhHP7Zb@iaAVG5V?a?f#O-RlE!RwkjT(aUQZ31WK0fm5)MY(b+&`n{+T=# z==p5YOQ032YJEH!4wb>QuK*oqL~QNekg?DLT=2R?=q!2NlSfQb%uZ4F)Fy%&;pk3f znK;ymZJ>U_xmeyE-yTEksDh>Rrc=~4ry!*es0Cs->ADJT2kY$#9_Z@y6 zvVedpCaH=b?c3-JG$3F~_+ThZQBy*njY`jfI6OgO=gk!zbsuivlM}AZ)T$|o<=QBn zc#N?b)F~}?-oX1o4mW8mKuLb8RB@iI%Yf*KD}i~CJPUm1kj?<s?N$wMA+4aQZdKX=0YigYfa0my6!pfH01EA* zT}e!)20w0$71fn^(8O&QLjKYp(2dKI6Te*ewnJq!At|?y+X5e` z68{UN<;B*!f)eB}!ae8}ZO~f7g|nd$smdQ!C}NKWG3K2oD^)Re7ar>h>PQuKz&H13 z(CshCB0#Tt2$Ms`Rurr1mH+RGN_LozG8{w%o zWBLNA4HOq60EI1Ms2DgA;`& zA2EbS29oUt@iC78Q-2se(WUiZcZ!5wo3|f0;!#q!)Fv5=G?vP7PnOYGkQnf3aARXX zY|rc#7GH`yw_E8D^$q!PC`giplh<-vObr+o^2=s@J^ea80xCg^9^X}xiEhZF+sh{0 z6>C~b`u!f*k%Z}g;x_?S)POmYlY`&kn8LJAet!N9{S5s+n9Q{mm3M)*=HB?2%4zMt zu7}~*>tSTtFOGk+Ho(YUgXge6M?Sk}hG6rj=GX?g&-q@;JSRRwhSQL(LVgOkj(G`r zJ|yF|w~nsNE0C_X6W(}~UN{-5^vQwEc%dhklF}z2yy(U_MEmUJ@SLN@fE!5nVrAkM zIFToln*9}%j!r*S-V6JC_8Kk8=La1H`&zH7nooaGe<}u14J`=y?O@R+PrnE$C-Pn> zRg4m#ferb`C4x@2ji$e%%E4&3Q5uCY)HF@-RZN*-t@5OOLI3gm6`YdK^zK46n`)|J z&(U_i(gfT4{&<2~us;~9DF85k#G3H@k(N62oNYGps1mMG!(z44I9eT}^aO6}u0Z+{ez*4#$&y)#EmQZC^TcVKutR(wczWhb=sQ=oW(0W#sJgAZ*BZ`rxmY1 zzGiA-dPIaEDVw1_H~vU2(E_Wqe(;ODLKatkwF2|=ygMR;U5r_sNTOpQkDM3W{**9F zOf#kx+j=zP07m@AqAinq0!WkWpE2Sm8)kt^?BG#2%dCj7ZrZYlFRO~p=SD#`*m$Vd z;bLsSWZ@|{`aQ{>);5iAcVDe$fwBb=#xWD`keL|q7Yo9Icr#wm5@!*Jek^4`*NR2e zQ_@`s?QEw?_QJkA3pUR;bG({XXSM*DJ4&Cdkof>cC&5gJ{8O$GE?R+?FrRB!_Fkww zvDkybo=~V1;LKe)>L34rlE+@8!1jX08;nC45pYtgJ5~CBNJWV&Z z;3`egia2e)j0b^CXTd^tt_`>SZHl|^f-y+GpyB%qQ(iOzhnsME~l~F zM%^e?3XnQ<^Dae`iUxnVh8MS?nC29}Vob8v&dRrqwAz`UhULi5a)}o;&#sLF(qBD1 z4|S50vjDtt6X_?LYIUEM-^u<9I$3bP!XV8N2TX`0uk7>Fm zA9^DiSb5#y?0hs&ye1echV(}I3N6UOWlkx1jUNJeI5sG;tMdc-5dC0!njT^qcrXsZ zd$}R_kRr>@A-W+P>@9HGtjR^MB}NvvTQi|XIC85FcjIMeQP(?}?uH@*-a|C6YYI;s z3mVPnU)Cq(-V0v1duHb{whvAfnn!*x2G}M$drqXR{#(%ub7YZSZ7G1IOn`n?Xkq6Q z?x;J20euk63=b9V>?vrtK$xg6if6$PgnLn#@^IoWs!z8yP-=KhM z0~_yJwX~}MOa`CyAPSrTsk%$Hd~|LeMl#7JvTvSF4M1josf;U_)>~fYBsWwgaTYw? z`h>WT$$+oV<4JHKUU(PZCD>}3eb#-68Q&4G$BDYbFP^o^ZK4rqI~mT zc!~f4#rt|t-#z*&==L|Z)!&OFDA)*4SCQ;o9BdrtK;Y%GB9+1E@$C^$3S8bJz%O%W z>@HK~atJI7i#QYu6>My7u-*6o;gqPcCbXZr9@uo|ec}q^$z<2V%G*pdIm0 z4I!jhfk2?Yl(l9v2;eVu6kQV-?`$}g4w~WQLDR@_(Ca|lq5|IUmPDZUv<2I^YBp+G ze=>ez_POFCT@Yr8LQ$UK98yXvi4RhuPot>&UdOY4>f`nRpBP>Ty!Lp#g2J`$KX9g$ zwUDz-7<-~!w8uFcgmXdDpR`)v+lCS+keAQ-*6E7-n*?EwfB4FPT!8b=;bE*6Azu;% zVcRTSc=d!cBDL>yhebVhr*JFOWc9Mm^sy-x9}s4~ch;GT^cR9E?peYMy1GR#=8V#D z77S|^v8}hr*lxsC`yTID@MMCQEKNN^up54jB6O3eZu)&&2IABVnmkuF+sXB3j>r#y z10^DFWG&~GyQpB8tI-4kPI82t6U{5Pe_MXiPI7@Co+{;!6_`S*`t3?}&jDCoNOO2( za!L<%G&Qg%vI~=f)kT4gAT@}NAH+=3#=uMs$seYVjCNS6?k~Z0!-!PH!bt!HP-j@+ z68M)DeY9O&k6o9rR~@BSryP^?t`e{ofC-(0K(n+UbRyjWKp^(P;i=U&AYU{-Rt*p8 zZ$wrnLJve8nGBMA_iNKtWj3n=CfRM=3g7UYET5hIZYBAM2y#H@OsBi#nU%?$W=bEy z0t=KsCU(h`fAvYtyOc0fytqwCat$$Yuz0 z=i`6*`ozuYO%#nFo92)PJF?*kXXn!rmmLk%&cui`q4^7dq5~M6*?`e2c`XwZ4XWyS zZlw^k5k+&nVMt zmqPHat7^+3>`N^ZE;gV<4Kf8#b~!-_K$G)E$PvpAPX`6s5x}XxClfA&RRX3M{XolL zo%dp@+E%@MclJK=4Jdf9BUJE6=!l7#R5T%3R6k5G+99=zV5^UQM+_C-g z`{Q5A&)<74>gYESaS!%$3omPSjr#Q9ZEg=J^-ol3yH#H4=yw7I_||fS5QeO6XKNoqR_DJJ=HYV$?J%> zD0;E+Kkk9gQPw;YWEW%KYpSRtB7;RyUF^1zk&fkdf8BfPxZTo56+(@LTIv>)ap6gL zbdaDp>b4@z?x^a=zL64`6~0F~2@5TV756pwue!KP($ZLnljZUgczH~9mOm_;A=f;( z>KG!h=7yqN-XOpZ8NxJo85$|GI6fJMSoVJc1^Q+FwThd#5?z8%9=m^m{38Si_h0?g6&sE3B1A$j+J}Wi*cPlZvt&p5Kry+Mwv$Xe$@(TK@ z#(NL^RxzVJMT}CWuExm()ODS9r$=B}|KeM}iA;CI?0M+L;=~%%VkybN*%jL~SU}xi z4>$|*wZ$JlF853z|kg7ejxahh~VyWti!W4Rii1Slu=S+ z841pIfq8Fneuu~rZJ8c`ky!%YPZx1Y1wsS9y`+D5m#(`0WY9v|eWTicR$6qThdRU? z#E{ZqjIg|Yg)jW5o1P12PHgMA*Y^5QAS*dYk7ixC!`RPVW0b*La4Yyv`=XZqpICj%3NqTmM+Q zeC!ky(ARm}BXS79Rkq>2YbZ8>EVq6RUYpYfnCPyv;b^Q3moCtJZ3SPr^2Y`T41*c~ zip#f8CB8e&@e*d7Zyj6Cr@7+ZLxxgoMo9+L3AV1ya zOOfMTgMq2H>+U_%vCqf<`(lpy3lE4od|*n!lmOh$3l#sjtpEGVD{J6~&dgd)1Abo> zLy!qpk>49FtAk6BD9#UjeBVl$xUvHG3djc;*kiLyWo@|Uxt%XlxB+zbuj;p%8|LL- zFXj95>_CrqLkfpl=&TU{a&E#9?fkRnEBt0Bgiu!0KC)wI~d{A}_D8(WJ@+v6jkKN3MNCIy%#VaCbW z)jZ(bE)S(QZ07;wuAmUs5|PlcRQKl@`w~zEb$?J6&RC6&`e5maNC9X>$_Owa`k&{5 z50G%M&j2SJzn3tMAi2LKJo*e@!~0+xR)7EY5EsvcH@6+78oGy057upsMM3XO72(fY#| z#=55IEpnGX4B&t~kj3iU1D9Q zNDAA$65ON=f2=Z`K}Om0j3~kluV9E5Jsri%*%o;V{*dow4aukwsQVlZGk4ynI5s%7 ziva?W8!SY~$*&+qCGa9tzk9kTtf>c>-V_1OLx@6K;9eKiBXbN0*0=g@s%Lh>Ze*cWZqQCHWcMhZ4o{ zj6MS++7`Go_XLPz-(Xe%DOCtB<6s+t;iQTrOz9lBN6GO`vVMmGBuw}YZkd3Ys5ZcG z>gO6II#)!WVBOyOLsQ(;$48nv+eB@Fa8v2SM@+33M`_Qpq`%khjp+M^6gG|wg?g8x zp4P~0Jwz7ZdP3jS>F%ls7$Iszi?tkiohDa22Mo8O9gC0|pgv?SRLdL3@xqjGNm1p1D&hk`+z>BUPU79( zGOz_0P`m-Z$`qP6FqZ@roGZhJIgbO-ma%2>7EDomMs?5F)HCVlVxyKxe7n)hH>sAS ze"`Nn>SF7CNxc;EJlIkNPsV{|}t{6R|z1m;49Kx3Ww8%#+O6qwqR%k&F!@0GDj zx36Ogu%IjcZJ5v2Sc~i#oayRP-ge3rU{&+F0QVG3p!X+i|Wk7E%hwTpXdWv=t<|KFP&SAU&K z960dt@a+G2$N#4rTA0s80&d(qdgH%YX1U-tW8`>7uXeQgMXbYwD$^f2y&z8tYcXL{ z$il+$Ndle=mU6~XTD|`M39esd^XmH!sCT;Bb|775t+zcSlHOZE_lfxva{(kbGo?8< zNEKsI#x{oMh4~N78&zb*%Axk}$G&}(LZxV$#Ju`mApMGS?qjY1G$|>%mGe|dRZZgNAc&GjC3J~kdWJo;`Of4V`H?fjpE|$_!VaU zXj_7ulB*b{�>uy?*nO4jg75&`SmJ9!8=RO+Uo7W#=$(U`ekJ>YNf>I@QKQn(|+r_ zUjTnxD4c66m|Ao+1R>B6dI|Rai)wNu%6Z9JY7JF4Bw&`F(vu>vBqUQvbg;oEnXLUz_=luVJcC|rJMb-QRJ(t z%IU8CSzD3)5G~e~wACK|jDFA}O3+oSC#h=)@LYL44Yf5l-$GleH?zN2#bvzu;W~u| z{91UmxKGL0_kh*u2yu9U36R_?X1FLf3EA)ByXfq82*U0~21aIKZ3dI?wO*c<(&c4Q zdyhC2?();5jopN&m-XQahNSTFBN>j)H1!E(0x$j~O$1GexQm60mC-p(5X~Po834_b z5ht%6qf)O~=eLWjtxs@7%U?9&=YPT%8XxmHxT6DJK5d`6^VGoZd;;z`A9ku3xi%yi zohJ`g2kN;xO=rM8KGP7+{> z8fWtU(Cb0>nLPeSi&Gf4VY4Ez#VGOPKZR!9d#u%Up8d~f0`7I4P(Qu>?!^1w2VGn} z*L5YGQ5>BOco95so+yj!qgxGaVqHl_+#2^C=lpqr2_FBI?#_V2tEmsT5I;HHtXw_& zUDW6p2@eOhuFbw*A8SyJQo(k)cJazjKa0!Dcf(lHFXGITQ zJnhNtdZ&B~j}D;1g-X1$FB}jJ8D0unI;4M2z6@ENhB5zUV>ypoP_{oUiALMBxG!Nekwg9##s<0bg zKgYYkS(v~%_OdoZ+|mzBX46TvD_s5;{a2zwE7i^Q+s{M+ja=hOyb9!I!k?i~@dJ*s zf$yNd!g+lT3gu>SYbb;ZE7}z+9;H>(e06#D(ri5|u;n0BTC4D$^0 ze2d#=RxpkOb_sstbCC{c177lgoFmM?I2YNAhUoHB>l9Tj#ThHB!#)_ZkSpgg6VcTC6%0own zKw|5wD>`q7meI76be=?D=x*?R|E!tZ(SWJA4lt1>e)@PGb}+d40w8bfd`aHIlu=Cv zi;hNnb5I@w;ybH;M&_8~rsk8#rsL*dnzUn29C`TENZX=wj7#M`qmO6R7uHT?G_Jh5 zHOU^EuBDr^Xek4Hsk0A&d|0}jt?4}we!G!u&u37j&-nW)|5=K-@T5!K)&K#i#Otr0 zHA}31hWmT{ntNhnM~T}qJ;Do*uI>>;^0mAbDKnSdZVldiL9s?YSoLk{r0D4 zH#2=nO0J-uK)Bc|rGSf|`>We`oco#n zY@Vi zEpL54>M|rhO}2D$+*6>Fsl)feb;ephwwCrZeT*w0e%Lo%)%l2o37&&NBTI}e5nVFN z$PYCXH@Hg6yzu{f}D&oinD=r#P~ovz1xG)qlje;EuB@Grc_Z9wbMfJ$~M8+=#D} zm5|Uoq6)Vjv5MKA{cQoP@6gi*5GG#{4f8>}A*-^h^1r6DWh6xpw%lWii{}GgX_JoR zJGUg-W(IKDoxS^8c}&X#qq3g^V6%r)WK{Orrl> zr&62ilMl}noz}>H(;n^o`Sx2Y_j0q=mf&7c2eJ!!B72lJ16~d!(BI z1_qul&vWm6?Bjp@_wT(g1ZJ-5I@h_@XWgqei8QA>W{1R2Pj;d}tnd75QlpDzjcV-| zv(62O)Yo~o%wOr%*w+6kEpW6dPO9c-u;U?@l-kr#72nKQSZu`Xz*9)U^4#Ip)30`n z#O*{3({K@z=-`Hg;vSBs{F#;NEdyget9Ivj3+u^ou`h`o|mgiJZcf=zxc>F7hbmocYZpewZqF`TrmY4l{6T5N4?K3 zd8y_lcX@m#@`lu<_)JJS)H|;pQ0KzuYRKqwrL)^s6yw>QAbT+cIYUVT_{bG%;0q_^ z@;$WxTdu+v@0vlvQ1lY(a-;0?u{3*IyrpU~jDk7omlT&9uuHxw>Y+CD(C3?`_d)p$ z91oZ{vhETSUw487!z#9{0(tn&VAB{rJcECoOx+7T@h3SW$=flFew!)!w(7?f%=u9s$PJtj4EKJv4uB;MK+{ z7xcEDjk1k38r?qy2_icUF>~LS8qzF~1MNvt8%?)OH?ACuhTr6MqZKT6g0~=aa{dZ8P;8rjIg%ncJ6!g-oo(#1aNrLLn8mY|L+AtWbu8UcTI!7gwB7M z&av;Ob4kZ2G+AXRR?=II%o-ZKnSpnK&iw^(`2FwSisl&P(~g%Ih}J%$XO-gis zp?a>yJ@smB-AC5}!ynDpS~WUB5h#+HB`*TRrPUC7Q@VP$Q5DZY;;k4%?m8Cf=r`GwCHAaC(hW)0ilLP;DcRb4HATaG75|e_0ycEOWm1&KSZ5?L0=jqD_ zWiO;?y__8hSyBl9c@Up%2a|$9kn;|$!l?iT^)u*l4D9&u>o*!V1Z9Xk&96wgaqM>Y zoyk7TZ+^cqzG6TT>3lvIr+)xPjxfPL<|(hJ9Hak;oM9Wif7jLk81OM44xr!W>2x1E zTw_J%G~1|#)w{w@zlHA)tSANWZ1*r-w=+4Bzxx?)T0ZMN5zHf{f-oHsp(X<(t^LIv z+05Wh+_D5@Ac?Jx?weJrqv$WHZR$kW0)TJ}KxHdZDnJ;@(?H+ySV_w}X;5WTriTOf@XG2-^$>m#zt9jTT2{>EB zPol}}(CycQMbbBWUDMKLb74SbQA+FGE%XZbUC@Z(^Ft@!$=zeG*G)U(>wuN%<{n1i zn4kEFW?R}|Sy3p>?qjA53CjG?9cWCkOc9xp>q2nWP>Y|*-;s2#8ww1Hq#3THr!k~^ z7*@bD(_to#FW+)wsMM(t(QarSqr7Cl7IjuXFK7P~w`e91_58|-WvcdD1pChpLvympQj z(f{mwu#j7Ly{cKaJkyK?QuHr%|IB!p_>&;;C<Tu2*2m~%w>-=B7KHb7-f zF6K-CPycU1tfBv%AFy^ngZ0f@yKFQ3UKNSP2u$oE38UG1?CW!6pL|p=@H@Nm31|{J z6DZIr&kC6HBj_U#1~4nSIcluN`hG2}*1%8y%EsyVAm#P0f)NWvGOjTuO|Lx;_Bp&b zQQu>KZB!}!Ow{3R%m<{$h{1dDBndY2(F@aCl65hH#)ik0lUc3@C{ep8M$ZZTD z3%(*o_DR!ODSReGbpZE+7@kBu*`;}NnWc3X`U_zjn*^pF=7*s)i8audO%O&+yi19% zPT}`gvxHO-*Hdc^rPsrkD@k%AZb89Y;0uY_7)-CL_E$9MF-$4`V@EY0Q}7bV%ekLZ z3-g;fn7-?)XY!fZzPqs@t-I+ba=yJH$GyI;#VogFS2^4U(KRhw$Pz++lNv@O7&+w* z8kNy6q!#bchA21d1NT7UyC!QMN}(gli=2QXoMiwgTqU+1=9o|l#UV<gICDBkk|ITL+6~JGwx@v*@!TcLgILCZrPVg44ZoA zT+Hx!66^UY&zL_pRHPvb8GiFM7~dbbW3S|b?+Vm@9z6HRqJ4CiR1_lBJr5~yn)UE( zf>I1)W36HI^Dz&%384zGx#lp{?nOp!S4hkd`OmGet@uqc;jF^_x>HKhPi93zN@v)dwTp~7 z68**sWWmuLGBurR*Y*NxN^A2Dv;>&d!W`x$Vbc-fZhop~9U%IgF1wn&U3iNW{_(IfuC;aB-O87x(l<{)>zxXz64L_v6t5*wLX|ZJpwqsk;xgQI zk{uF+4$a-flk9ld1w&E>jlBJU=2HYz1aS2RAx7b_%u=1?!3N+24b(kn=#$A*fQ zeG(3}dH_y#C!SL&%t9d{3io{XZQyrOejA%6_KAyv!T)JZ!l$tYe!%MY*e*KihjwEB3qp-5(GC@1*_pd)vNf0oA*ClZu=pn1pO!`cI?V zi`Kh7CxIVscLv8mI-_@xl8>({V)aE9n}@eOn)7oOs*qDAl*H`<5nG^Po_f2rX&f7~ zGa0x3qp(DBl&x5Iio8#Ai1Y5^PwPdWfbnh4TO&}-HFl?S>6F-kkZ>NV9QC1(Wy1I7 zZ65l_Ebs71`xj_IinlX%J$kqis}LL0bRd?Kjg(C*78o}g;qmM$(Kx4n2j<*tuq`V&esB-a*lRG(e}8kBzBU#2Kf4eN!?`LmwwmVRM5-y0siThu8RTH3Pv zaI9Q=^6rD9Dm!pvseek*6CALGJ?&}#&NrO|ntPw)_(ZrQ&!;4Th3jo7Ii4z!ZmW+$ zkUjDFM-Dj}11;7qSfpaQ*h*EB==~8S}n%;0M0&8RXNa|oxD(st% zG(M1&5ipeJz6!(xsPSBl>@h%7Y_u$1C%5G&fRFGYLGVGKNZZ8h3^l+lKz%8XPzzuj z3KG67yE-F&Pw_0r&OHqKjWc%ydm09*7Qg6U06)IUJBsDTIq8dGC5{}>M^HX|R3)Fj z|3oqmdUeLp*@G+F|db+T_2E9QQ{cF~luBxsd=ajF(C4t+1huI_$Atc;vQ36AU z)^pWA}uB@r@0BxU*al1R6958_sCtJPxUn2QR}!J%qb^9#09Lu zbjaUf=#A`*#cVm%f_Ihi8)HyWhcCB3EkmzO&^ZGtE`nvc!h%x9Qt988bGbwIzg?sL7g_T zw;=ur`u3pT=+mTJ$}5AA9fS%E*yw{3@DpKcg{wm5;W zqxp^^=g?!I>|FWstdir2v-FL;WDO&(8T~2glDaj#i|pUlpv!~PmDeYmmri4^ot>M# zobT?=RnSr4KItJdhghpmcWHfpIY?59A}7ic`6a6p7d-#&A8Zpm!G=EbCP#0rM{FeK zgDfAtlxUo)cTQW{eS})i`YKkxc+paJw>5&*U1IG~AOT}I;rrsVbT{(J?Z;`4hJ}_S%qOZnd9pM1s7DPW8&ZLQH(+ z?M2IS?)=dBFQ=HYg6rG4gK@F|XR}MGlqDYX&uRk`xLpDFo-$KHbbdZ(%sF23 z(o9tmL2|jm_s#GaFAHLey@hjiF&>qdOm+!kSLac4sEG97%Drj!itmjq6!>EKiT{vB zx>=z?Z|ojqVq6@7_k>Au)>F88AoUquCM@rzD&KY6w5nH?Znsz#&lOxVJHs!YJn(wR z)<|Lvukj4G{9W`CurRsY^%B1M0!blUTc&n_n7$v_U`*oq)2dCF5=#)R9=S( z=8&a5Un+dhHB_N#czbiI3Sz|ieDe7-u-h_mxb6lWJJnz>cgheTs8+^IpQ|7+6x1=q zzC{hzW5oH@hRy0DR+B0GpC2n`rc2C6>@J*wsJb~+CKzl3xVruM34^+R_*ASr52s7E zQZdD_VQ@NfAFJ1?bcPsYzQU{8Z7C`;XoS>BQZT7Z{H$Z%`^HltfbAX6s?~pW16(kh zvsUSDEjFKUSxo6U`56s~8V~aLTKr9f=bM9Rcz6@4_oxplb!?a6JP~zU9pqN0iVKs% zI?1Mx1-dhyZg1~*KS$2<(wj#|1bl!1&LVC7%rk!L$9plCb>LgnE|*6ucK%8>;9(qZ zaI;C!N}yo5thlE4u5q$*jl3&+3A#qK7yv860kQ$K;@;jVbjVds+d^SjXlOWDN7I)% z(%qMOBl5QTVC@`4<`70fS>8Htzn|%MyZi*u(BMCA3Zaw6{G9JH`Vw>V=xdiYUDCYmu68j(};bHE4Ub0K1R77IswP` zkOZJaUjqp>y_pioR5Q4$@#cGmU7U4#I&+HPM%?q0@qqvwZuZIwjmo>l-$zsY=*%~k zxiXcA@xPfM(?6k!LzU$~x>_cvNn|pqqE(8W$ndkhTEp)A$r+!;Y5b7?o#Uo_E0ZA& z?;GDTXQjs{_e5Y-+2)=H0Z>*W>!2g<*yREj^1wYS*<_YF8c*=~o@a6_^E-W$A<@OZ ziVcr4e*m}YlK<4gMJd=<>${EY#$bOs%G1jtxR966$&7GkCIMJ$veVc;vsrtE!|PC& zSy{CFSk!D!2nTUx_)53NwxFuHs+oZLvdGfDgml2*&2nUB>5k3DW=Korh=k(u;X(M; zyx#gTz~8wbRjWCHGl1E?(mrCC(>bI33?RvZEp9p=jlJfX7j!>h%}<@?Md-+;?#~%U z6NPNTGF6x5F0Nbx2WwQtY8)^=$~x&tO1f_OD5`$^rF(UueK{VT&HBT!DA%IWw?}ZI9eD3>)KLq!LUH+e=zCJ76U zP@c$2ACq}kKRN`P(*DWp{&b8}Du02b!zUFcyE-yj>Ve`L^L5ppncMp z-G(M+mJK|X@+6P8*KhT>Y~^5yUycMlLT}i`clWe`>JXZ2WwV+d)-#E-soohyrPs^b z|JpWmwSmX0Zv3F?|L)ajV{?=T9*gBH-^DJcOm*vJ+UxZ3)<3$#8Hyvp)5m(Kh-~u!oZA;2 zdjKeNBWnjI$CGpjw-7R~;of&d$+kJ4Q=Ig?rmS;+lTJ9I3U;6<#)7&W-cO!ksd~TM z9y-Vq`Iwssg9synFHqupweSRJvWF6b_~BzA$8t+uuZu;GRPl(wQtjfTfJet*ee0ta zD~yo{kEE_*AhOI!WXz0xrxRfFwYfM#!&XQqna>^4)4gp*JxWZEyt_TTOIZX4tp#dq zX<=fJJ5W#~rH%}6=nnD!BAFrICTmkkhYSqI{8gkZS=0FgTnWmmx;ir%FtOo{(Ai>$ zKWecCdZ;cfm8q=4E`=zp42Zgq?VR|6DNa7saFvj{WvIGlg5}Ai;=kZtG8j%!Ef;l3 z)7=U&yIY2x{~UPMHtrV({EeUbkt1~ghL)i0BL9jKd*`4&p4D2G@c;#niw<>4xtTH^ zL|ahuEK$5crwCqI5_fcqp|2Ywv6rY?zWbJd47>{i`Z0(eTD1beSHd$Z+SB7e)eNVq zl_dr;TDqa#2Xr;KJi1{ix;;n-FWo_qm!)!}+)J6$lt>_Mm(CIP~He?4+mBi?fGq^x>Dswt6YpgfF&-MnEdEWFYEIo%7VnF9Zq`#&q`ab@(7-t z=^H;IER9~Q+%6dUa_q?X%u+wY_@)J;+}vlgl0jP=UgldE^X}4gh&t(Aebs*Mcs8`w zwL|Qcgv>HNJ~=COlVl#>+=LUiKb$uy@dAA@Zm7XNmhWIR6fFu_Jegfh6T0X4Sj#4c zXLm7B?GOsOjpY{ zKR+0|yeS^C>p|ACyr;P8<~#S%s+TB`q&UT`I}wFGldw6M5GLsC#=3O+^TE5Or@ut> zG{I=ITO}3wv6zZK+r^l!{r7*d00!do4`YfcWw6VEAd!39`I6teWCtsWzYQL{DMTgJ z$1vg3WmtsdajfP-4;|^xk^HpJ`gil@6#@Z3!V90o>)rsi4lN9OcAnOB-o48+;(MO=$5C!4t>W4H6C_6k^shE( z4YLDtfs+4W)f*b_AAcnO%td~o-fR&T+tW)_t`4uIxXSZPd0bv#Z9v*BD(y;@dy|W_fT5}8^rQ6$U zSefxQ`6AgZbQA#|u3{-tcKzLbA2TT)}r%v5M5n4z|O?w;RP94+gx{mE=XtLwqbQ!44e~WqSNK)qZ7y< zM0t;3)YCm;c*x6IKAXoNXx`WT@=AuLZY-Rh`xQC)Yc`lBED(=$K@0#nqt;?XuCc!} zuCrb+c%`{b>#El~TYykOGv;<{t7XuRX~AXAUhGS=3y_t!vt>BIOYH+Xj(3pKHGfUJ zzV~&0BQ~J!Bu-b8T070L>=*Uhyo=G8H}c8VcU+ZeU%Er>s@Ch#dC(M!AunUpb&Vv) zayvHn&F7JmP<}D(T6^*O!WnUC^25@L3l$r_GC_J^q_>gZpDUnL^>?aY($0fPDw7SS zTPb@O7E7To9~t4o#wbk#FpC`{hOW$hc;_Du`Pxj+nMD+E+Razv8&+D*SIT+l##W6c z>59-XKNdYH7R&|yqwPs-0?C)2nY$L)*lByElB^%Cl|2y0TiLSBT?@c9Wh!H$rMHhUNr@5xDyC)nD64bemO-XQ1Pb|JllGe)F55wiS`sUYyY|RTn4$Wa1VKWl_cvbG&;KwFSnX$6fwlzZvveGY(6o-RDIT|1z(h+d zc%G2hDxVI51~2xmJ}_kq4PIVZyyF`j_qBv)6Iuereq%a7O#z z%kKXQR{W3CnOf=o$mX6pYiYu|&XD^4g}e{1gI0szuz{68k2&Ixx`@NpNa03uAk}7A zHokmgQWv(WxfbjypOn|_7}BSg@e?bjfZNd6u~VBrIbWiM*W}{mwt8vKBzqJp;*`oG zr#HE^B$_Us^&v&Vd*{vJi&8&Rk3;oKE}6#Nu!AKD5;PgIMRORjK-Rc>`-DwPY{?R9opJc@Rc~&K?Gqa-EtrH+YujEaI zG4KH+H;fp6eGoKhe%EOu#ps6TQZ$@IvS&xwebgnrh?GsG_}M|>q{EZ$J`{pYwMV&b zOpD$N{aJ>io*OO;;LzfxdaAO(JSHOoS>ar^1SBtMi7Ha((#kVeg3pD`0_b@2oE>7- zge-DzUwgLtpf@~lGXJl6O1I+2l?kVvs17k5$;~SHqHaV-m2t#$Fri&&`R{tiV|x9-7Tex}HbR+coy{qm4-!_{>ex<3=truX49)vEkw-*rMGM_P ztpxwvmuR0Rn)rTtOERDQ(RP=VU1xMLW<=t;C^Q0n`V8TvoX9I9-6UXNpW<{J9r-Kq zS>+HEef!JR8lhRDFGIVVj(g=uSKD|N?HYj5@ z3y%zGf!R?5XYEP>Q;swmx7}IHvMKkrj6pvsbxsp=&}Wr%w#+F2$Yi-ap?`_1-}5(( zYD>bQgSdrS(C}cF<`F-S4|F6AZ_9uJGBuKBhgWStefsWzJZ0vC2=k-|P>I;vaK8Hj z0X$jA;0N7QqK_gfzzJr*X&5>T!<*uP=IVN}(ZB6wiG`AvZP?CU0~23-Bm#y-u2X&< zT?FINCNS&rmhE30b@0B#l?rM1$0|?vIVry$i{OCVJLKl}Eo~ZH-0PWcAJMxb_k@$s z=iLu6s2@JQOdM(5!n%QJ%p`Bl=(frdh~I8?pcwKs=|^!tq_}$x_KZeQXBtBTzgr?C zW=hppI6pe6k6Oh6j>BFJ)TJle!hltZkEMts582}ZD`%tT3jhL7oP01`j!d9zo3H5l zIG#U(?GNUCkzR1&Bb%$)r;gX#N=|@%U1JaP$A$$Ai^pZjd{-zn^yqgCsezx$vsw{66eUTM`dpLF>IjQ>m+AN@aYp;;N{6Xh%>UFT# z=yglTUMn2?2~Z$Hs2OnLd$Cx!d}oPc$C_U)^xA%JTzCr`a^T}K15BuzMLK;jGBHVO zx607R@IRY-h z6F!kWwEH$4w|oB%t03un{qDh&X8ix8@`2vn|Eo7V&1|J@OC<1WzFCW);u3CKtH3w@ zCM+zdh?WZ#dFMpmVWHn;RloF&BiI7(NJ3ycjTR@@N#=dYbo3B$*P5Qo5YhdGa%Fc1 zBd+4#q|u>;g{t5JMT?^UkkV^7Sj~ zY^Sh7n1Q`}KVXEORg&4j8!l@;6FPPVlkOdv@qC!pnnPf(Jl>iz%}W;$UKec`bpV+a z$tBn64BWuQPG!8|<&a>~X$&C~&Op9MR^%QOAYfzG^?TnqVZ6wsYSD>lekZ4=*MVUY z8eV-g;?IR3nd$jf>2r0mY2bTzi?#rwF(n2vd`5H6Is;S&%|T39ZPCG}rV444k@4T@0j1@3lxQBMM5A&`<` z2jF{6fMzpQ_&y;!LSn9K@aKda?0)z1`N9lyiM(sH)l;J6m>c@)`DGALf7HJw%5 zew(uDLc2bJj zNJeDK{wBd_$3~n(<3&qp>p>9xZuZb}!s#lOX#$tg%l8H_gP?0)yG!nZ--pk&eYBZN zTxQPNW|)VzbQ@eL`C<}`d}ItMMfk0Q*_fup-{MMKyN-52oi6n32?6OHIPJs`O@KV= zKTwY{OD9UgU-_ z^U`f%uXKODx;F3n)^%<>%CVWyk~Bm-*W@*tV9R0RTux!T9c_F>uZaYO(A;*1Jbdi@ zcHDG(Y(0-4yDvsGVj%X@vI}>LIJD-h<>G+guQ|V?N9mG!zJ03kX9pjJcxFcIbl>M? zOzYc2iwzI#Sj+w3PF@iNJ`yJ&8pzvLyS_5PrSGFFSN_-n0Z1V}r@w5X`V7S*qFh@8 zTc_gOy%{H}80BW959mbBatU52fN>nA?xq`7=L#(vB?4b+OIJScSdQ%U_6%5cx8qc~ z5T+zR{g)Bf{NAH(smm+b{vVdWZO{Mjs#w$TaDgo{4};sb!V+Bkj@{5+tl%q&CIuvz zYKcQpkWsPpICQfp8&Cdx&2#gs`cUWLshS|cOX7_{5^Cpq8Zovfl?%Jdba+)-B<+d}&TCA0`3r#xIY{_W4zcKEqylmmqW_w{GxfAtjfI zKZsdsq@|hw3^IZ=v*D9nxX;7@kk|$32CG!J_{KZk1fe4tvPGRh=#d`G{00x!$qSWZ zKs+g2423(EkZ!ZC=E;xfaJKrSom<6%xl(l^fz?Ad}nYifCKgLtCnbrY!cdJXnmYXlc>4BI8 z4MEtrlCPA<>Ug;slQ6KiH6RCo2HJ~MJ7zyw%=Maq-eUd5+sJSGghPH-jPZnK!Bioe zw9RW3HHlap#lynypV#fOyC&xA$^v`SnvB%A`|rHDelswj1~6i(Qwi8Vt2$*uB#fMo zjUtmwjc>j_a#o#W$P9P{WeulFeGwr^_J~I^N7u3=kuf_{C{N;~m*J%SXD@r}SPS1L zIwE>9SQ6x8{Py{N&a8B4!%jc_U`~!q5>A=HXpI8D<7Ze8m3h47_z$h#9ifloGwq{5 z2rjo;f=EJJSdHGjEe4${9T4Qd%cw{;&}BDXL1CE>U30`Lr!=r4u29(QKe3#=UYac) z9#iVs9r|a{v&gauA(|Galjwgfc9~HnRvehvJ6WKS(^H(-nm(yXf47-nNPq~V;CaRO z9!xN&nRPhQEL}5ZY<~uQ3->&;@pYgy&4#V-=GiZ9_H0WAEz(wbY5T(g*`&{)`QV$5 zuTe#_d)>}6;aaz6)cqbwg+NhE2c51>RkTEPUFVDDagC^6|ky|+UM=W>O6o~XsZ;*8kLBb=S-M|VwKYk z>N1@7|I-srKfP2nC8VB)fK z{#z~c3pF4r6U|{;Ze$#RyKxyCh|p$7)OPfewtrxQ)ex%+#kE$Td4lK{m4tWQsV~s5 zA{r8LU(3mXcy^>VWt8e&^RAF|0nYRFMZ{vEWW*Vu!9NuN0LWZ{|s|h z9nNJ(WSR}X-R=MfQv3wi5J4R7WZ574lFSFXg0A2$dB#9Q*%mRJ%65tvaxnwgMb4C( z-yYp{66VkpmJO#T89XkF{Ubi#4D~1g_%(&u_>mm5gF=3mA4>t(iGkv@rK-7|xXh|N zFRzA9%=N;WM=C1#Cx<;^)-4x+mRw_(aR4Ds33bo9Cg1?@FL8G$<2s;RfdiA`;71D^ z$Kg!J4=Do_g@*~kux@x6q{`tU&WP;>*+Ac}| z@7c|6D)XtGxb_sU^PK@2k6erK0p@`P;4^OKFC#q)Wo-)*cTf#b10n_tsSZlm=Gpsv zHh}o1=&n=I`Qcj-oU>u&RXrM66;|1#iotJzdz@Gbu$9_iWFM~YD}d}50Ruvs*P0Xh z24ysg7~9XU78AF&s&*Fg)8So&h*#_8Wu`8p^y_y^8nLK+LY(5iM2s6x=HA@@YLQUZhv@kJ_c`lf_I!tBXul}N%+MeFMH!@*!LRDgzyQwb4>12BT z5O;kceV@yYIP)Vn#qT#Alh;^dL3!7||4kk3$pDX?0~vbB{~FBxhrIOPwhI7P19)sZ z*z1fATJwRo&i7X_ZVFwBprPmw^buHg&neyW_u^ZYIpb*Xz8m`JKPk|mmwviN`bvYL zu5uq0q0gmc^jwm2#7Bx9x(qla0C|aCaE6P~(`A!HX4QOdZRiBOraWE~Z)rA|p9t0h z9|^U=m*4MXk0;p1*W1(#bnoMhY+D1=r}Z^{m!dxw!e?}tq^~u;N9x|$z8&jPx1T9$ zZ4?NllA?N?g&EZ5gyBVUn;Bh$>3a4qpN`^(NMOCT6(`6J)kpv6@ql#A^m{|kF3hm| zkm|tGPsT4KhDxV|la1P5g|ao(bm8CBzTta$a_Me7z$Xy;!isk?+Kq(U5p}jrq|ga2 z%e|@<51QW#t=`Tye!XZ*@XR#;6+HNx1o1eX6roGi8z}Y~>(%?77$ zDJ|Om91%MZCc254B~d%Ghq;PS%KIl^ig3u5P&v(2gNQ5$z28x+i&R?8v(?nR4~^kj zGd9}mDGB%(Ok;)!;B$Hs+%@7gITX}zrCaBid2@SxXrFRTsMdehLkq`O)?sKU>rOz) zyH??_9smWcRSwHbm>uTUxsJ+yE-}&3DRgZP(`E)J42x}SF-cXrO8h|uX-R%QG`l-; zKtRxL43lb(TA#|Wk?nm+EKq!+4rEo=k;-CPzcrnBjIk#NKXe{-ksCdj33p>w;es6w zIqMkqz?6)^1W=9;ynW7z9;{uq-gCx{w~?!;Ulwd=+MVpkJ2B7g zxXfuru^^wp;OqeRza@(~o~LBeq6{Hh@(%#Vn7AxT;>7y#^<4GtM)!}I2m3{uL?j{2 z*TaoxZf^Ev|CAP5a;V_XjYW8l^k?^HxAz0-5Is&CU(c0fn-Aw(*#Qvl$t7Mr+-@FL zIlRa7D;p?YsZ0nP6~sQ0Z{(B3&(>)eTJ*?U3G{{# zjMpzN%qPQSef2by3p{ZcjuTK4<%LkNcHLr=7R|;x9Tpbc=eyjmmF;aj z%(ZA8u}(Yu7cS%vyMN_e=f%|iCk&ip9gz0gTUDPcJ*Cr>dM=L!)hvoa`dKVa-zGl` zWaO#EvX)Xp`MxKa)GcSxJxHp1!ztDfImwbpZ$a#TNV0(Wb(}%>d77Y)9wRZCeOwdH zcVT+|#*liw$~)WnChe2IbBymNIbWU+AQFTYClDJXl){RYye|*qAp)Taj{;36Zr|!< z%AYg(UtyjQr=Z8a7W`2e?ns#7|DQaAB8cEA6 zCG-l>;c85M?I_ujWj8B{?)+C&Ki))^jgh z1Z1-kYRdBzU(LcF3nxWFDy|CPf^yssU5#`Dacty%DQzi$w1mHg`%EAeNW@>7(u5}S zxzv$=WAavgiVrpM#ob>EhSVUaiv7IH%i;UBGb3kf64)vhsQ?+Zk-d1~;_{E655ZBj z#92F7!g4r;rFEO3jgmohkIJDbbM~OWu{n_r`4x;*Eus>%kHx_!tFtZFC$4zmTSEEz z8@5z6a91BFDHV+fd!=6@L;!$YAfZD|jL1BzkZfbiAn74ChppJVLp%VusNdC_#rkVp4X zioM%ID3p{*Ah`gm0;y6j!uYe1sKSXf(;as8?tGH>Kc#BIS+uGaX6-b2T8At`Rhw#rN}R<@IU{?5uJ4%-Gh~4-V89_c7Yd<4W*i znfB#**^G#8Y~Y?_&pQSl1}-b#Y72O4Xcy_z(&FvNJ$nrOe=D>Loa23mz>8%dWA5Xd zf9?72Gv#qEFhpfPI9C2oD)L>~ef}+&%g^X`xxjBtH`-vCG%VbZG4SJ4 zqgHnx=}KP2tv=J?hyfSa8h45pF9^Z=i7t@iUes&fg5VO5SPqlWXdXejW3SO>J}^2(n{2=}9W)g-#I- zJ!{mOwuUNJeEHI-AuZ>}lOO;Ie)SnIc=O@#hxck)a#O^<=eBbqhoD=lYlK?`pJy`m z?&k&J460)cT_;?7J~&{N;%lSkrj>f`NCR5QI5+9hKkcIS#_qsEp#fyb3E&=(2do6t zm%ZND90l`Yw^1{5WMCVLuYGl0T_~WO-F&v`=nwwpYA~I9M9du`*+GB$ou4!|m7T+& z6`@~??GiXf2QFOI#a16#|l*z;lzd^TvK!s(Ci6wKDWdf`sVLjylrm!J-V} zmNGNiU;{e-HZC5lB=w#j*uP@M0oey<=orNO7BgidX>9^A;V?5+LfG=>F*Ae&5lut3 zHNJ**OU-W`83ssCi)?@muqia+^D((0K$mjk^r@J9N_EUTEYLk&LU_EE4*N+r4c;}i zL$-_E%%))o18apovBLG30P*O!pL<1<^&>#mc=d++hw2%YAt8CUPB0u(U-Ej7FQ0VR z$bHtr8GSy}C>)lYRx~8=hY79U3WR#=JhzmnR<40|Nm5=VZ2(x~XL^iDp<+UKih1_k z$h2w(l!RILbQvi1Wqut*;FS0=QRCjlWeGLmbT>D72{AS1I{iE1@zT7o*r2f{00v07 z2mH?0KL$9CJ)dg59#Zg*T%KpI+4K70BX-(D2m7bPPEe5FLO&Hi`4vScH@E`M>hB=aj!!wIt$?76LUB?N z{KLo*4N0g0o;LJ|($?YVAA(C1VOHAp|GG-s%y*~l#&S9TRd2 zs7zzJN|Ki=uRVdP!!H6A6XM4XAXC71RW!>liB~dot8Lm2A1fW%EjeUv-3Dv$vW>MJ zwhunG*@f7Scl?%mQpQ1*n?u*A3@oNP!>A-}|+BlGY*S_54^9r>JVe@0)j?_Zr|{C=1@ zOS+`}k7q{C{h7fubzyVIMv4){L0zj=!>sRi@D3*@%2H;C7!+H8^O84i#C0e(c5(cA zJ{Qk&KOVFb7qt;<5Y0qBn)YnJTygC5YZujK_42ghEB~xd$pb?}oWAaC7zstjM+d2T zxw6L(wsmU1lDr*dPi*k-qVnja!t1ivZaYu;6u9wKJ?Y)pzVFDvsGwG4q>5PM_ zlMC4=B>bP$A9ONz=N7s)ve- zVy)@PxQTj++QLmAr%C&@`J3zDq+t7C-|+n+!U(PRvtPxgqsI8ms1WO@E7kW!*yPUx z;dgu;K+zqCACLc$4wWSJV25OvDTbptfnLCc4)}2th0pDk6$lIK0hVTTu`{qZYR}#| z{zkJ4m-rSQz=P7kPbZltw^6rMp3dh6>4h$?(c4ep`jXfVrtz&yMb+T?43a?sB6$0> zt1&>?38D1W-U5$QyXmA?ADOJTg=!zY`=$1L<_#F{mL=YRgzhYn!_D^iBafpc%~}T0 zueV`{0e(wbUCTZKdKsD8{SkP9JK@4(jh~w*4Mqrd-dx&| zfBdBd%sbr9eB`}G2F*k+xN}G_CUYQc7FOn z30I=cER#)LwZJK*nr+GFp`6`JofvhXOo-z?V`LrNKEtha_W$u!f z3Gd|x-8y-?Vjo?ojipzM79EeWtQA{R52lpv)y6uHIuM<+>HjsBy;Yy*m2o0gGWp$oo&9z!ARP78)<$5TkZD8C}*+>y0%KAeeBLu zyXfBN4IHr2M(zzvxBF7IDN777FJUzLRIE3S{}fs@_!!EyB7bPoPa~(R-kNQX*H>Qa z*R~vab_AV|0}H7N$FVHO3yG;pyiQpwc3-Ovd4L;RMQ3MTvlRfNi{I(d1}n9oK4lSj zA-6M{YZdZ`4RX6aew5^2f%ek*b_Q+ooeHdc>#;abn)2^k#W2+kgZ@-~I=*<^QXg_;>sQ9wp#xxKDv^E7q}bT3=G1fLw+ChvqVBBc)81-s%5C z*jYzK`M2wS00AWpx&}qMyQD=Fq`SMjyCszF4y8nL7`i)zA*6=x9J-tH_@3X{>s@=V zcb|VvKInTUT)?HNd&H-dEe8T$urM#^T=$)IHV3xXq6XUX3jAzDjmH6I8{ zmtNOMzwJ+xB8>lYjTidlKuK+gnuYV;pzmz78&ajaVkkQ6I_Lh|3e{BIdJ~?conV@k zJ#DM}Q4Oi4OrTZPaAqH4oTCrJAe}1u9E&MK`Fj-^2FaF+0xe5QX8=aYzRXOmWm*2- z{DCMRMIg@eElTe5LE?|mMwR3-E=9QQ*jLe?>5rCuJE{kzqsXs+aLAn)*eR>2l3|J*X?S^BfeV_+hngIYnjxgjCEuBpI z+UV*N83y@$qktr(eOlWZPyBe87eO^WBW_XTi;MiYar5wg}IZ zRF=_EahV~+0G$tm=o2|MtQYjD7m0>Ak1YkOu)CLRb}!BP6IQLe4MfcR?p1J^m`F%X zszgRG_W%i2xQv@(z;M$+g3TZ3mtfzO;430?|MtAStojTt z%Qxo*xJRnYIJ!E@-`q;B+idT!*Y$6RZo`KA!ohNQK^>$Eayk zB%j0bDSZJ?aIwp2iX~?^$ctxgN7=w!3<+FX5i+>M=qJmp>`IXV^JH8sQw$F7O#PZt?F;v&0E>G zCVhW+_oYO`0FJjrYd(ZYuY))2l5Jq_e2MlSW}<#YGNwzv7&(6QnsI)!D!(LZ2A>d?gfszZk`BPDybnStk6k|+tK?bV+fuYS}z6$hQGT?G!`w;Vz?{c>;#pOfMoZRh>63O%_JUd1vZA+2CrV!MR|1!5vWKQ{PFD-+1rhz9XP!#>#jZqc4Pu*p)Rb8A_TS)x`4>fCP1U`3jjQh{chZBb-Sn_{b8 zy&`mHGtsNyRGNxhDnaLIP&Baczx)`h+%fazDpQG1o@F2L)IFZJI4Vi-mA z*PER+)C_k`r(xo^&z_=n%Rf=Ub6%EHQIV4$;ml2C-*RBs3^0oRNu>Bh5%2knA-Z^H zoIx6%)5(f3l0~d0nGX(4hl>}8gWcWTM{mwjXj^feoY>4+v$$qa>|VMJtT8osix57oV^J8y&?KjLMF5a_Fgg4_Gshe2SwLkW-f@ays*KamInD^zGKeU zUCgMJ(Hca&@Hn1hBEIVeGZ7ykhvF}_3E6?&Y=|gj1Av)h26+$h>bHkqeYcH$U3=!J zP+2s(InUJ@U2l=WZ0m!DSWboYfW7lzhSuZq&w9J+%d7`8r#dt#QSAH#3RCE@$~b6aDV9U`kq1lhkgYs5Uns1!RB*J+dYjO{;SvC^rAGB zo0IDL%uw?YHv>A`J&^oNj4qYYcbJb&fRk-Z<{SmKlc}vuT33T*c-DSGeJPBh(wsb_ zkH~fo_q0A`Yd8yG16Tk6$P^(}EgV9SMzOCoYJPg$;|J$M}CFgjqa&D#N3Pl1^-l10hn0^lyg`B+w4Z z@-D0oH@PA<1GIW8O?`*?;W_(ZL>f6lamNuJp8l`bSW&evdic^DXTWSC`l)n94sEQj zQ4bmpku0Z5JvE{MdV*FumnFmF6q7$*x#pvPX8B{a&4bj&j(Moeq?PLYl@TgKMe{0$ zr^mLjQiY%IblAoHZ5F|CxF>JrP8hmTu$7H)MXZw8hZqWcq}Yv}`=!%xy%$;+g8MEF ztKSB@UX3+{%_ag@uj77qsqGY$T@7Ww34eRmhePU>i&rs13|WHiICaydnpAyR4o3AY zEws09o_m`dQf<^~+q)fd7wp%G22QMu;HD7jc{Olx^48{wG7_0-g?F;17ig=HtH8;EzhU|@;}^uKu(NU!e>gJmmWsJ}?Xi05 z5v9HntYyyl#S@la@Q06E4$b{%aR9P$hV+q6;7HANh|@y1sS=*H*y_s!m=(B$S6WQ3 ze0?ilrUirD$#FVeNrd|e;Sbs91IOWXookO_FU{0!T6&Cz+R{wQ1w{VG;6#s4N~5Z$ zd>`6nR8_V<6hDjqMQQWHEHg?qwU+FY(ra7(=c>dlQCjqe4A&Ubcyx3b71Pue4tXY< z_&bVp&?&pvj5e)OO|$c{qfbeA0*iR+^ceOk;mowkff}ga1J^D9w*L`04QZ|vbVk&C z^%v{qf&H@vaAe@T?;bV~5anYOb-FSX4&hldp`gaZkEH;8NAeK$M8ilg3-rJ0!)>R6 zjM|Tq*F=q^arQ;w5_5ES_`S={N}Vp(MFfaPgm?_0JJVmW!{bS`aU#CD($~@&0iqi6 zt@c#22>VUapOzP;rjRF>Bxj}w1>+bn!k7i11`YMPRHoWuUl{vtlV##=e`7T=5ssw_ zpm*EMxp%1K6_q+5G>~bR9?v0Bn5v3eovyBdADat*?Ff)QC<1cW1%L^Z0OSnB9vo!9 z&7*e%#Cgu%l|waGwZ2dqZ_o#7yNk~BZYxn{ZUy=f_6uX-@Q`3XjC=T>-jp7)$ei<3 z2?DjMT;y*khI!9UD|^s>PPnw!ML9-eJAxeZXpN&td6Nw>s3F3jMpGU$W=6Q;5m=W= z4T^38AX_Vt%1P6V28MKicq~H3Ta@=`(UMdbfr}+-Wk7sgay?Tbkak@Xlq#$9!NR80 zpzp;z_yH>!)nkMEV>3FLLjY((%jasjF^)JzN)6i5ZQ^_QZE0_5ioWE0fHsT4JkwRp zhJkXP(!yx2!=Hfe#0)mPKShOO0PwV)ac4rzG zdk4V3OMp`v&v`|%AHMccBmk`+>VmCfc4i_IYwL+ty@+EvQ!wnfvOem5b=?JvqvY@GkFuU`a=e!_3I2?FDvKsc*@n zZAOp0SzinHPl;#>tJ{2RK4pnDMxKGT-Ck(l8_nKR%dJr~*(j{HNlfecCJklq7IpJu zWqSOfcQV6s@at%IHub%^QQAU9zjo?6O3{1fHX!7_P^>?kMle>_64mxHoO-HU7dO5H z&kYuP!)bBm{0{GN);#z3}bmJ*LfkYmUoDJ72>XsF5kmxrch+ zQtbqPB)Zr2fJ5a&<{rBLsc9E~b6*xt({J(rGso%(Be4(|8)cOHVmwi63j>0ZP*Kyb z*+rCb%5s=!Yf9hZ8*?-M=uDP=^@UO+7jB>!9>HPhS@@3Aw@l{;$ab3KrhY=fy`RIQ zT-2sfJhSez9t#4~i2AkZDY#9%_jXvm>euGhE*KY+=OgcAiGPz-eCcvYU{{FfuLjUB zehxFSP^RIb=}4zHH3@6^#*p-AVYZ?w$7Gw{M^D!IJ0;L z;>&_JkVk+qP@v!DhjWEO_at@$vezEk9>+7XN<)EI%k;xJwM0}AY?1MVIin!Kz}&Y` zKP#82PBLkS3Z7ma44$DC1N}m1lr}6C|E6?l*p}J)6{a(YL>BWx9BiDaJeqhl3euj{ zaldn1fIyi#;2HV`ng*%|xCR=Vjs_!!jtXlL!H!erdO$8Jna-1+1IE#SVBQs$tf38^ ztZ{t!Gd`}X(+;ggy9M!ORip)c?P|9(VrL_`uQbNCnqy>tiDHYhT4lt>>U=D52Uk+& zqqVaNux?juKcI?v$ML3#rP1+F@yPxZK=tOzqN(~QXdr+T!el63(36WdGAy@1O(USS zPEn~`VI$Q;*Y`ON3ASk{Jl~y65ca*{TTXTJg;C7@ki;a!odq(8{~#es@bTVA>;bBq z?F+O$>q^E42)8UV!NnKsbmZ>%jpWwV0FBMhX&gP|^O!76u11oumrd7NsY_KF(?1oJ zvpsgxj;8#tuL&)G9gbgkoV)<;##DBt zcN^s(cw+)oHSNQC?OBUt2gaTrV`H^LYf+}7iM8~!8bCZ>i>jUwcy_fW&%l$gI%nVd z8k>ac>D(5gWuas`F23S%Gg(E?Z%5MdyFLd*sT%Upi)>N(#Fd=!7=zsJe1q`Cp{ig4 zwVOVQ5w5_IDDlma1WlNZ+co929w+S3W=4;))-)pd1fGYBAjDvhWNI|q;u-?VnJ{U>$}J1YP$snn~{Mjs6R%#dx83cQ%wD>@ zU%KFNqkTRobonQutLydggERc`{Apc;V6kFLXsK4quln!dt=qSco7#2>?8bfX;WlM2 zuzw|^$*p4Yo#MmJejj3}nZeTOCCb~T(zPdsh|Bd0pPGqf@!FT`c%M(TjXvLJ61erc z2qkM+o`+H`+UITAl(h&3!oL;cQwve~TEFg}JB*C1{5lM}O}C;T449U=eJKMigl!z$ z9>iQ-7FHu}=8x3RgdpM`_Z`73DF6>A6!@h8)~O;JF1}7;l{&5)l=t@Cs2wc+2mefi z-U@%UK4Or2rT@ONLi)E=cAJ*i>E4u?%$<9^nDefkIAuS5YR zmebQy&3c9uyJ=Q79a(|uB!?anh}d1*NRn0dqehN|sqH7zKNoa0O}@)5)`nu@(M2-T zp5sUF>oICxR3yI*n=aq3>Jk89p$(DWpwK0^y(;k~iZuOIAkqL?oh zAnzI?G};TcjR>x>o!>;Ai_{iz-met4!pZ0&!Mo`73wTC>|ga`Km|^NjQW#gJ?MU!BgiIHM7AZviOXsS?dP&NR^*RM`Z8!dYm5IEXnE2*}Y1 zJk#=CP>b}giog-d+r(mLcpoT&gCUCWX_?kQ(81tnTtnx^nWd$*wL2s_5pWc@f zd*7>Gas5gjdjR-i588F~Buti2^KF5gnDIfbd^2ARftj1qYeYrcZy=#SSBG5C4G#x{ zhs>haRRZN8<75NBvs^)Psf$Nq-Ahr2%)}H+S#__dyktS)YFaK4)wpS(w>Z{&80-sK$iRb%E9<{ z%e?nt%Sne6SjxT0O&^x%v4+k2K9X6#^)u&wmO2pZqixryiFDp?6SL&YtBI!Tsrm(% zWaQm>zPJBVoT1d?A{~!0|F`nc{SPUPuDktI65v053Ppo$tj%mjd?ew_*j0$7N34dk zwnyxdEuq6mPF{1?+CYn(_V&l38D<&AtTW%00a^;&43HoB!WP`&mU6N5|FHn(z04!TU?eE@mVUm* zWJU?`?dN5Z_v%D~{Mcnc|4&LoHT#NvC5ns29Aay8^Zob`wn0cyo z{vwBaiuy++ARjSCoV@4m?YMmmu&>Lyd;_&f-#K_Bv#&X;FVVXm`tetORbmz>~oYYoMptBI;*cgeR_7@0#Ln!agsaK?Lz^0 ziofWie=&5x~ES8O{kA(D*#w|zf{Zh z16s^s-kZa@PT||O6#MGtrLv=+fa~sF>rkCh&k1v=JK^)W)9^*7xYo;uf2H5Vg}r4I zv<5_Y7SrXy^F9dM6$ti?;3U6!)(bml0AbTrU{%Ag(rS9s(=C>w`|^1GCE8rAC9Q;A zYtOwOm}e?M7rVPq?PJoGo(KKvJ(rM>vR#ws$6_|y;2Z3PS2%@$QjG20e-+Lq(Z7Wg z`Tt{9Yx-At6=!cLsmf~y{H2+nKhfvV($9wR z_2ZUQE?6FZKO$O{1vFyRv zJD3C+xX&?7+~I~{{2Dmky`X2s*hj|L+8M%7%yJ-L7r>5>nKm}Beq$ti0t!z_O4<+= zG?3o=iNuG5PJiZT7j}&uZEWrc#c?im7$(Eng!^Q4X}JvxTX6Q>u>+IpC+RlZfO{uJHE|VkjxmKy#c%MKmfj65h6Mu94&TAx~G z9GKXlzAJ!b2IB~PytzC)JTL|X_z^##Gp_`3a)R_P`fF%pSV@58;KI37-+Q0EtUgGX}}S3`S1AZP6E)-8D`FZy2EqQfVOb=66DtmxXa)Gl}^2^7x9M}*y1RLgZf zL91(Hz0MJh-p$R6F+E8m6A!h5cC2=E^Oy6`5j!Nzc-sV~MEyw8tq0rPa{Z_LTbW4O zCONRF%V;1JPpqR9+tA+)+2N)G{ZOX4RRk5Y;t*g%iS_ym=}ndDQRz0i#Obwoe_Cqo z+pfUQUek`C4~LCU0;N)crxC!1Lzez7w;{^zO z6)3#!v!w^XTbd1+)x6rfzQB(l^xU~CI|@zwfou4;Uju_)cq}6{%oTvN$XPv-Bx;R}h)OBS_7#3pLWk4O z0BA$(2M)*m6Zq~I?EFzT(KByZqo7t_{5cAjJo}h|fquOH;3gyg6e)|U@TeeRE8A0R zWxRNWl@(;`w(Aq%hqP=%*mdW5x}IJ__1!UZ4~H_@Um{HS!sAMK<8s-3(M!Aa@uBId zLE_L+@L4OCpW&u=%M|XszkBxsOa1a2{*&!IsvGzSW>*g3?(7Zfw*)x>O`I|RjBkuh zB;lq}R55#0gztSdgD*3IBy*|WDB@ScPD4VARjSt4eZ=?YOG9c0ZO?{D9g^2KHwXxS z9iIMQ3Una33Lp&?q|40N{9kSw2E6pZ8gb!msr&7vYP%`lLM)3a7l3YuIn1`2RjJ5C zCrzm);H^)5&N`C6#$dMmFm6ihd4kY_b=E&Ka{5I#e|8ed&Ecn6JMXGsYne{MYRfxH~i##)RSgcbB zzmreIHz%8M=^u1OTxGG8QA%8S`F5q5v*9O1q^g+fj7)U#*w>O(A2|CSex|pwS(go| z`VN+4W2<)g^OTeZI7$|)D{kfGQEX7!7X=aj=FsD7pKAlN(81D=p*^@1CZ3(txaP->wOtRJ&gDU|I zU@_0}tsQ*$c%f#X!7=ob@M5SZ$I_}bXpMl%x5K)hEEQIgM07ObS&6O#Oo9Q^3{&z0+|!#+^{bz*N3Sp9J6t;CLQi3D1&Heer?pe8%~$d71a07IFy(mV_25gX!22v7Ms}5w}x7uX#O;rvo@?n&q`OVrpj%&nt>$J~(*=Un9qV&Ay>v{nB$0-NRl^diJEPi^A{f1t4R2F(zZ9 zd>e=kP6mP~>T!54gy+5P0-~i`*p) zOd5glmX=fYS8;MMmyAs?#_=C<*C*eYaS20{$IfYdPgLe2@5D8WwOZ@gWQBDSUt2q4 zaLbe;`zEbzH1kXrSnuN%mkUz`YN+y``O|8p4}*^$Y_8otMO!uQr@06IaOME0ek#kk zViaRI6TRR9q%DJ-|3V<5T6R(8d9P4pHw5C>lSrzmN`dS7I z(&t3AWpZXhIH%&8mx^G0tJ$k5#5^6M^)??8E#0g#jSY`M%v(H=VnEZ*@Vk9Xrj$f>?-F&crYq}*2Pz-9JcEXuz+ zt(=Xfoxhco$7Z|T&BaXE8s!$)@s$mWZV)hQeFP%1vT!$7BGM5^?I?FLU*${b9KX(^ zf4}|hn!ExQ6|87Io+7|F;@eZq@oAP6A%~^&q!JQAoW% z+s77VLdyCC8f3Edi}g3R1QfLT#{onhaq_)BH8t)^D5KIgFEQtx_u zO}^GnCrI)Zh;fvyo7d{J_~|{-2Of-I6$TrQ=lV%B0G;OfWprZrLp%v4!BC2}9od^1 zQgPxdnX0+uhc8!8h@DQ$0ER8XmffAlB+mCEC*t939QTE@n^dR`S9sia-N(8Dn z6*7FH|Mw#0G{W123(LB8E6Y8QOc_hVc1RMO{%Q>*O;@ ziU``q&1`DzgK+o7NQx8Nf0laiXMfAitJUk`|84CDqzVH!3hWUd*8LuCC(Q`QXx2R1fy#2&^#6kPC+sU^4&Zo6mgj4rr zLfyUM+B{~%jHTB=M7~~&7fvtd(MU+q^zUV!S-j<%?R1fkT5Y5gV6+|LN-5GSSgz=@ z08W)Z@=mq&Gz-?8p}7P26sxh?H7!gZ^l_>;TdE3WJB~PUuksA33;7XgQ*j&0SIlf@ z6^vRI;|PVgrj5)_besCkB+Rgdv1g1lM6G0qgN64pTZon|LB%~1Z7@!vxY{1et>GD( zr7P%lt@$1nvg!P4wO@ujk3uh;)VX+PEi<^Gux$n|yj|z|HR=2a)ntgsI*kX86bER| z6hRF9HV>;k5xQuT0Iflq+&7Z(RQ8q6^H->k7JXts$7IW1l%HQOd@8dy)CWcnAtLIV z2EM*d2!!Ic7u<8!Me#U9YrG3gCELA#!*PwP3*ESu_s+=P?`PZHiHv*9YD&$39abt1 z(GUU|Jl_?`PN=^v&E7V#Ievy)Zpv=Zr4!H;M3dy|p*qz2>U$0!)-+a8@?$hDaXi%cQ zc9}5yY1b7>wo`nFU63Sq^YM%}$K#z%e0Cg*)a4AvD;l)NmMambsWkIY+q%mymqkch znT*h_p=a{s$5W(AfHe=e#&oy%?|K_&KTybQ9ss0Ot;4S|wU}R`E*f~3UUh5bd0`Wm zv$?yr^nGK*BxFt;WN8uyTsM%O7R8s(o60T~99q7Edl(v* zmkGPrFQo_4Y6{=|h{+3_c%_ zdI8$CEILo19!w7HixB*}xb#BEJ0<t8zEzf)a9wY1VgKmYq+C1hx08AWu9M)ieE`Lquj)B_`!ma5{EsGTx+ZcOd?GUgaFkLAc0OV`O1_ zhS&N!!9X|lPJBt4t^}yajA)289?`1%i`aB+qbosM+A0WHnN160v??0PNpD!p#c31h zRB_62YEo_%k(HIq=Pop-n9C^IYgByAI?<0gu2nb+v1s9W-O8yq}P zjcIS*s8P01u0VQPpoLFApIqjk5sExuta3?+7oF86EWdhAqAC*|7f4<*7$?gp(&imF-+I0Y`Cb)_Y-U8AuML9GT=JGVpLWmsT-Y3s zOW6QXk0yRF9RGF*S{Vl`UoXEOwqB1PaKyzS=_xWjthzTCX|(ttJ9O2J9s=L#i&iSr z;+_r@a+?QtTlADuod&qt>X-HfGII;lyG09)?*0`UbF5wL&5U>aD3yQ+RiWF!z?#Yf ze%5RAk0@wZ4!7pE9CUg3%k-ywM}hjMFV05DaPHF3GMyFn(pN8yVWS_00_vnFJWV|G zszownY}pmG4TZ{Iv{N=ZL^Zk}9)t!i0CyooIsYts>yqCsJsjR$C^cbHoRt@lKF&`g z_fq-SNB+sV6kn;p{B9G=yvO9~Nmf1u9=dnc9>?O>$6;V;)aHvU@&ljzfSfD=xA+gT zB>eRpvMcMr&%mHMM&B0Wm!>>w78*6Lv+dU~Ev^(TZYu!;hu%nH4#Up(ngt4nQy7{u z^1?e$ii;F13ceL{_^-V2^n*JK;*U>F&Bq-xqU#=1Sd{}>|1C*CvK)UG+R%X|Iy%9u!-YeN}SXE-l>m&!&g4z-%@BsJ(07Y33D_(lsxD=R)z4rZKmvs|gnidR@ zF}6sCvbmj0^mk{}$Xl}h@j2jC0NRv@>7Gk3)c-u;UX}cn>FF0V>$~SD7~~;mcw&h3 z+6^U0Ac{UdUZ@sG!0=GDUcFWOq_Kh4*@L8Tq?|!H?5cUq;i_4pfSR2l;@{eQvs;4mn>!X49N=G@Y+AszbY-YJ`_78|IcW5tvJ1!g7Rik(_BrW= zIK*?}%y~J(@kS#?*b0S+pBMBZClX3~liJnL5wqFn1EFy%q~3xnAIXStUu1I1LEvAb zgil8ru5)?7Ksvds&)CIq@k8#?R8fpG3U!*y+})o|uT-2fSq0qcy6 zEyV@FD_^oJbKL_lhF{P{r1SFh-q#eqqR(locff71g9T^P*c!y<-I>#K2WW~zpk1pK z_Dii8b~U)xXzy&MzodJ@6_sr&N;?|P73W-ev`M=NjuNM_Zr8azu2#YXeUA;%FWhYT z>N>OYFs-A&B8&T|scF6{KNUJmL#R>8Ee9Uv=-S7DRCNnNIT;QCl&R3OZ?44MLeHB* zR9d?j70mEK9^|Ljo1c43JcPX4`L!oe+7Dp7`Vsx0+deR-Nv_%cdC&h8oWe_iubkI< zW#m79<3|{K*Z`W>&AuK(IJ89pDdZ={3AMdMqlya0X?=3idOd+^zRUuK61`Ypr_plZ zbM*p;q+njj2bEDLJ+0_Y#qay3gm}NBmY9+ni;qvnM6~HMiyg=kh|tqrPXYO=VEZdd zg6Qa)YBP@-2@F%80Nb(2Xs(!2ic0$fJFxyA&o;ipk^UkELx<1wZfv@hGAO&#JwIF|bnHR;0u3vp@d`n5Qr#ZK} zp4ou*=Ff=ERPJake>v3=ivLjGBBl6Lz6PvbEGcshGv})Abh*{G{MvPx=h`^CM*{W^ zS_Bj5#+wLqz6fix$QhzVv-#Pe*B+&VDeZvcS2ikdh_I9M@50z^LM=RpN`t5<#tQ(b;8gd>-`|{Xj7s3TYPt=) zOUceYDEzJ~2qXKAXsrh%aBclRBqMaNAdVOxXjT8PqP;KIoPW2p1$2jr*r>zA4z)Ti zDDFn&Zq4I89FCw7qq1+U4B%}ffLe-W%fq9QFe|4JK6C!s0Ged z<6rhVK*a%8VxZA>z5rf9UQo1fJ1@#QR^}Xuic~Wkbb*9bYcVGNH-I^X{bWkpep0IV zVSUv=Xj%Bw8bK8BF~eDOu!6^QW?gX~o!sTQ44(yl?$~PKh|yN&b-U*oL{3R*n_H|c zFrg6D;X6`jBx!(!Xi;G8-r!Uv`_vn>-k)jKJCB3?MY5;x!<`k5J{jr? z_J#6L&k}%n2n8{G&A^c{+_pj`kiKN1KyJtirJ7)$2#J}A$rosJ71+nXSlD7MqRYsL zY)J?AtWRD|Ejd^VushQKRXUUEH@TH*CwT6W3ZmFUc!_53l+iH}m40PfHS_^{ls{Z-(G4|Om15mA z9O3tVZ|~BJ>x;JihsJL;H)ocsgLjQ=FPMEZ3h@7n{VH|re;vm6bfG+I#!v=Aq78j^ zVc10k@da|A0Wy$K7D2W>cHf(W z6K{jj+{Xi_oW<||*$nD$5A5Dl z?up@Lxhktbi&NQ{Snu=)qV_FwqVZc$CVqj_4>P;@?wC02i#bVR48zd&CZ;Tr^+D=C zl}bU8cTzH+s0H^5U#=Fp{NOLjK_zP|EEYvbgGaeOtYT6eSac|KlWRu>98@mK*NJ@`?Y{CSRr`1dcX;1&Gcg{9~GpGrds^kJkT zqzOl&Cw=^G6QYipC^L?B#%D-Rl>&B4+3b3JI9a*5Cp#(v0)R#~vFHr{}TytSCa= zH=*)YK5QcO6v8%{{E?`x9q1tqn@dJj8Wx<>Q zZ!4Sq^T2&T6Gsu9a(l7#_)uq!GGzqTNztfh9ZU3EU7kNsZDJv4L+ack_m0RS(MZLD zOmr`lY4a8#8SkZd6k93>ZB1>Vok_W5@X^X=@A;s#OqFR*mvz*vFW-wKT(^*_VDEsa?>FY52L`UG8)$X+iwfX*2@#&yI%fsWcgB^Z< z29-A%-kTBh7141T*ck3xixGJPSjOfpfvm6p{0vN`fR~e;WxemNm5z*|u(@IO$xn~q zZGsl^gr=DYYBrfcer#N)T<(W!#Y$dG1S}#XzFPhO?*<#Hp()eBTGt5zu!-M_JoH*h*mZ` zwN~7$Xya3Pd<7~9nl4x8^SKm7b1)?aGpmnoV${BN6C+4wP9iDrco^U1Yj9DMe zHdqCaxXxwy<8o|Fs;M;2zU4gm-OY9$iwsQw!v&hV)9|dx$U1x9JGBMTPe!0BDhZ}7 zy!Lm)U(p4Ft#5x z1H}!*0BR;VFga+5KopM5Zw;6lM`j}p&8?L~I{I5gcY~W-+V>ZGUsG zXR3sKg}g5k0d>phzBAAP*$HKg%nm`Okr6An%&9UWYjEZ5I>e_8KOYB$BM|^m;qE&c zUiy36=JXgAc=$*9jPLX>FgRC@W7G)-3f8W%+GuyBG$=>X^xLWH^aC;lc`qAjdh5U7 z2{CNn>hB=teDu9hYyZLz?INVTb7Z-mdv~u0tsgWUMm|j(n!vp~e63mQqxoJW3J?#6A8_Q;3*Ga+`D?N`HqkI-F9`mp_f&L02Qp=&Dl_n|b%9PsMIBe%!4$B8C=^X7*> zt)>%rR^p^&;3Ao>T1&~R%+O<>s4izkqr02mk2}a|>YCi?B#@m}F=N=roP7U_b^uYt zm4XqtS7{g59`ihza5r2-(4dPRw7uP6d6J`eO0CV`bCv%MXEl;zuGq{_uN>ve*cYYp z<{&G`P5mWN2q``dV}v!YLiV&%ak zg6;=nCE991Il|*_X#|!y|2%f6Zr$uH>a&`Am1yc zC>3sOGShkP1N_m5Nj&=bK)JA@TB20*MnfqP>omFATBMlTa#$U|n=Mh%^WdJ(q=Kh| zIyf1FWH_f^Tgtk~=FrzRQ=Dxqlz;MXQ>g_gZl=kTq70kpVr*T}y7j_}fNN3YN=%R+ zU~`F2VpNy@OB5s0ZKx8z%$Vwq=c+AVp>a1z?X^O#Y;&rQEpH2=m@jFLd4TgsN4=T(EtF^=kz@!5Gtp}$gz#nP zc$kop%;$;fV?*1E;TD_O3}H8agOP@vpd|yWzZpFA%7uK`Aa^9kuakOUv%zmC>pD=UNOk&)JA`3&~u%uhM*EE^ZWt9BHk1ft0L)G z=#WxInst?FoSBbqnl4>hLr9fgj4MBCe7UQdU!2JXeSSfJx{-}%aV8O?ECcPwL94HS zs-=T`I2dKK(;JmEhc#dpTA#!w6)py|Cev)nuG7m@Ez6B`LPXy!m!Dwt_x92-#Qvhp zM=^bRZxw#)zfUWPKLi-xcqUZ)1dc#m36DADYA0Ic$J&<%?vUA zlnTYGFRt=<<<4?b0wKAl|2?(XhTkPbl+r8eE&-E6wM zyWx8|_uQFt&OP7E9%uZIfxXsxo?q%jrKfcqSfguyo$F2+;pzGSxgD!2Hk~1ZnS_mg z%K8NV{9 zi_uVN^PTkg`1k^=rU`5X&=3SsvDR(Sw{XJZuP&05&=%G!?^p}-`sVBX%6MBZMZlvR z-XhcK(cuL{x|w;jHp9tMUc6Tp!=%>DKS zvO212dyBB>I4lB5bDFNGo&qFn*+6V_A`pYby8~#cVaj137M=TVVF<9kyH`|H00Pbk z4BIVK5@d!X7kvtU-i@i}i+_lL5>TjVX#3{vnwTHP+`I-lKjCiP?T79Vd=+i!us-&- zJI14zLCFWS=h1`F<&~x_>i@D+6LLYHmaGjb%TQ-g1o~Y&a=%!j+fiib>4*# zf6U$t*iX^6_Ww+G5fZh>Gq4Ha87lvc!S%*MLUJhaZMIl)D^`3}-rn}MiHx3YXy>~| zvr!r>d?FkVYo0h+#NPhi-gvavE9I56S}T*a?jfppo|8Fl7Ppi`<1)$6IXtnFLpAkS zJI&9&I>|KOefjj0%^AXpzLK9t;OT_Uk?IKh5J>H8!{OM0jMxiWDetD=v_vg+#zi9ARF%#^qD}mxnC~! z=)mCt?-X#g46+GL?nIyqziXw&ddF%CFfoglrfB?;0cl&nQ4DlA(D?qS6b}CkIFy8p zD^EN0dJRxW`gyD>ZRXW_9s9lDxVFB}O;^@h&g%hDFh^VPuG(gEIiW=ZZ%ue@fG@0OBQ!?bWYZixj>;T&QXO2#zuu@>W zlX!^+ZVrymIlg6cXxzSA`v;jRtxhh{BI3MPSg;53QTPtnJ(9m<6zk*M$Z@C%U7S#r z8>|K@+dSs{=T?zZ^rE#JB~`@P@dR+H%@nZDCQzM?&E76C&$iSkF5|HV$;GZoL^Pt> zxB+quS*NhvR z`f4&}WO0n@Fme_5NpTeLbVO>fgTa5AdrSenxzFqDfCTA#`rJv~5aaHiDP3;oW4tn46a}Rk+HP6zmwe_sknc_I zfCh3(4qK{!)!1teQu>eSCR z2NoLv#FpQlR0BsJU_zauyi~$%1qkzEw5_?a2?M>lV!0571r%xR1%3h&-SJ9Gl^WXh zLC%&fO#WeBji|Z>D*riw?sVTRTuqUTfCKfdo! zve)nSy%d0{@+ZVM_lIky*4n{8am()7Qt5jlhZI2rG;(Cg=7 z3?D1a8AfM*XrdBFTKL_~(y`xFD6O&q@5|4d`K*hbF_|w@g}Mh%^{#@7MwiEE-x?nT zQm;BDjSgvj&#Qd6u@+_cWg>26~hsU!A5dWc* zIq8j6bp^`Z?SFYnvXp_R#7o+;MLty#CZ$4o5TWk&c~44jRie)3L7B)~%Eo8VIziy+ zQTo#W#eGh@ibEfU30&Y}Djd1CMm!_nJPj`T>44maQ$VEZR=HvFZlxNc_@b*Mmk{O@ z<|k)or-q9I`e1{JY7u+IbGJ^HvVE>nacpvX+>aGW>I+DQD$|wqR?())$^6 z7IO)u)5W}JE#=)-xcobTrh||QC-$iit0ct-IX15~;^3HiYNswzeac3x3+rev_h$%N zdC6!-)+aFec5+yQd+u7xOcFN6&V=1=e!yTVqdmY68w+m1tP(*H6)oavA&yO{9RcXR zJI2vzkcz#R8N<=ZUcw=J6n*ECT?%i z0i;o1Nh6)J=vQ2ojlw~glXak1rNcqc46~r%V$;$Ar&zLuznwLd9}I=};*23$f6Fc} zwC{u=hK3}wu)t^%xY=~>507D=Y7}7L@i?5pJ7Ed~z8L+uI49is?WR*2Cw;|GgD&)W z_5Y)F321pS*oAg-pK!jlHl6I2u9xi^0@BDneMyRW;y_K+Vm^NCr^$4+n4sGM%_!xp=D`VD?s$I z+R@0ce&M(90i)1YC z=`pt=PK;0o8)aaQH=Zt#OaC?NVa`K7?Jrh-B|k1XH&d*weg$O6u!e_$jDBd?_@Zwa zvj}@l;tP>ylO|uj@v`2jaXNq{4fq9^r1y_ffr!;E({V8nJdGS~tEhqSJWJIryP(DV z)6lRHf{BSwg{hB`IjK>F5wMeSWlUfWMu4JXc!96yC1IZZu%T-sv&DMc+_W*X@UH;i zhRx(DNMX5)=vXUfE1}wlODWZs2T<_lxE$Tx76ZG36^O@cw^e;!+))4`s);e_7yW4>G1anU9 z#HQK32^QWs>dM5Hvt@8y*zX8bLKOm@=P%Xv{#>ZsH8jxMYhRiQ2#9!pr(Ek8TuG!= zB|M(bA<*{F*%#@Vh8S{k6onaPR{!(H{+EQc|3!D6z5qZfaXlPD6Fy=M+2Xba?=J=Y z@7S&v1$ib{2dPFrgT4=^k~;b)TAt)wza?h*muFV`2bf!wfhMLc=E)K`BhbyVVnh_W z&+tAV1ABf8C7rRJk=zyhFCq;7mxb3&l~-tlQ85}0p_6Pv;4}Efsc8f6%_nPFk3#jT z#G-zgc&uu*!OoW7sN}ABfn~RKvnZjPpANM+Hz!pTTxR8A%V385yJ0GNyQT0y4yb)H zQ)Q2Xb346pg#qVCvIhebfX<8P=T;5`J?Opimq+dMBrcd`8IWwx<6pB=AAy}48tD5L z_&|arxlMM0PG1JF0vO5gY4E@DvYmnse)AIMvD$`!E7nO48QQ@9CY6$QWV$W3i+tBe z9wkKnAR^8HhJZ`|q}>Ah_vm*WrCp}`qlmqdK(?BWJ_d@9e-_lC37y#l!&ft%>6Zx; zjW!)Q=ZcZ>YtX6b2}N@mw^RbxeFux{&Cj8L%&zv@*)%eIM&k>%>>#IUW3Y|UMw!Fk z)VP>pn)2PcDO5=38 zCOUIz(7ibt;k`Bq}#J89loM<@Pcdyc^=4$?)y2a|IHjlYkU@9zWr^&MuN} z8`7;g@4L|$FO#t!;ut%ddWJTUhdt9t)t1%*4jj%@V=n{r zJNk=MzdJdnFKZiR9@WfF?|r>Tx5}*Gk*sG3S2C=J-dy$hU|xK=ehWSo7n)Oc(j9YL zZ4Z%e>53v~V4ApDeZ)f|gr?T|K0mr~k8~XxXuSTi?qjAwSNs3JRGVJB&7sVqAF$>d zJ!E|PM>et5>E9+-@tS?G8fJQ#rAuzr;US0fF7csvskEl!!N}#zHpSFC=Il*ze9{~N zuH(V43lsCQPm8o5IzaV=aVoc8NktXt{&@M5=heac&wte?MzSL>&d+|HtydPZoA#S< zFjST3mj-;W8&eiCx23%#{xCYCjcZ!vd(;e&sE-2fjlN_pMU%H^w0i`9=Buq(im$Ey z>et$Cd7GG-4P?}D(5STJljf zn7;-DpanD`;<+1dr`2^9b3~}FHL1Y&WNzM2lVs5iAS@iUW@4sb-{<6WEvZ1EdQJ0E zK}IZ$%Kv5oNLs-KdERjcM<)Kgww#;eJmTm8w?pq*%Yr)G#OAC8Bl~i7bz;Sqr6d^AGpF8O-+|ep1^`44Z3s_ znb#hanh?HTqC}`wUi^6|T5{^uckb(?@3hi3&G#|-2gkdHxb00c*n~rsd zWx+Ck$BbUUs-L@>fi{J$;Uoh@dJX#v!yh5Fk_)1C1L6>r9I|@&M_6L6QA{xG{>f|m z9+Br1He(&y{#a6eK8rXjZV{n;8ZVl-ylp->ICg^Y5u?_YOXH3(cLV}0p_vR(*`u^C z#POK?IpBi>Rt+34>xi$nP$C@*fY9)-n*4?CnL-O)@3^y=C#PzgyX#T+Z`Zf zs^bAN6usd04Z?(`Tj{C}@qGNNV1evpNQIzc1^)Rg@YX(V<$M`FbsFZ~<{>?KjWytf zaZPz%X2?7fLO6tZ&cj_i8tVX5T3i=&bvHw*Qh4ZG(+7B-0+P0e(zmw~#_&!^S}a-t z!Tj7{UJRzU<4A>HIsJKa|GgCux}oE$>s?_7qv3qGP*-u@W|gYk+-Sxj|A(H;|+Ix+oX>>Zz5^GGmYM zpZ<@v?_W=wYu(Ff+f4KfGt~_69wJQpp1w&Pt{|0Ed(*_(uq)Bmr?qdO;a>uzIw`w5 zbH@o5(0WnDGp>*sBnOmPqs5~kp}V7!#1rOW-{l&Mh9I!9BUqSorYWHzY-E7TjRE(N zg5mEs#V_+)4!=KOTHJ!^gp=j6TUvniM4)jyoSO<5*uvbwY#|X2)DeQeEMpr@@HOh< zt~N8aC%+CRg|ZHlXoqo~6B^5ri_EjIlJ6Ug@Nyb=8beY+=?XFERz@AXoNVLft_$C} zVlnr(GmOCDh!ztih+tV_>p`wsyO~0v6FXm`o-9E^#Xc+lN6o>D3T_mi!R@k3S;h*g z!cQ2z@n(dEap(jsmZG<*a5ov*?z$`sbq*|#;NrN9XvZAt-J<5w@vp!$h1Ge}m1kscefZ~RA$qo4!I|>RGT&aiwezge zVZ32x%yHqAE*KELG@kWGvD?gD|4g->fnK#Iz4-m2SemR+?PtKpsLW4~lgFD?>iTGI zQ-3tIOIlWc**Ea8*1mWoiYkVY=Nm7^?F*Ib{$$t#8GX&;@~+%i>3=?H+SZ<$eUhGR zk+k`EyYV&d;us~{CTO!d{o9mP^&yIf%)cPq|Gbe56#kL>Sy-*VZ&xa8tCsc#f8!He z>cbkYb4z-K=#gAH%eWofwn>T0$rez-m=h=aWpAY#KxU9~h;01DV}abW+8&SdRcs;* zSk8bcvXD5Mp2yzJkjNprSvO39VTrU2uiIjdK9|8=69qIla%|m zzP8nGHf$7YZ8->0r4Sc-1Hj(yUtqGja1Y?msAQ-SN6BF*>d&P-y-!j+{54AYY8}>f zlKTLNxPJe&B4Uc$N&sU0TjC&HP2%M@cj&^Egtf_Pgn28BNC@n4)wgZ2V2mq+$4Ul- zY%N#WyKqEkTp+0fgv-e$Yj-vJ0@0Yu)_8KTgWEwPqs<5EeWZk9+!-7?W|j7yvl-o< zT5oMFXSusL?(K7DTOHo;dwlIEwh9Q_8m+IM9bn3e0=m9mw2L`3tRGB)iX{xVD(GNb zv2oR|XpFvg*xcQ3UV30Ut*Qi1%zJC3yFgiB=;G;S<#*Tl5^s~d5}dYXt2ncD+}K-w zdV}r zjS>ii%C6Iap9pf;FR1nvRtE~kJ4*mc@fIdj@U#<`6lTDSoXsFgzx?s(PpVMw#USVF z;Ma+CzjoT2Y2`C^x-o|d|>y=bxa2~gjC;`Q-PXZgR2xc@&-*MZMJ#3yo*@J5GU z@V+DC3r|ZsZ)x12|Kb#$wssTuIcW>)6?*nigHEk5GA~jZTVaKBegwSG_2eNlDWs{eBFjC!+VeO?S3Ez3ay5B)#=FV=vkkK!`c}^z5KUc zk){;m&rq=4;8Ohij%=^#Ta9M#R1>q1=ICXyifBEsg+0{GSERY|a%=zWTtTRw?kA_^ zw9ZQm%O`mhK?;nOtI++DrCbx-RdG1Fd3g0iqRT7(?R7C-emrj zjDRC#>*`J*;_BVsnWFMv8IEVWXMSPVNHt-YpkLV&PaAV=oBacs*9)Lfxw>jhuzP8a z>42POu!6wt2CF}sBLC;yd4tpsr^F1CEu_}PFx%DEQ(Avi3n>8&21dF<+J`n8rLtJr zrZC?JhcwR3G%LGr^>;)+@s1#v%YF)GgEt?K$G$k`ES-nB?%Q^h_#TXfnGP>tg_E(B z1;vzm4!J5a3;mZoU+B<_a+(|UVDUTY>s~bJ>8aXalr&P8;}F9-O8<0X5-jZ<|4Kkg z%!`HZAAOM5s?}-+@_T^5FZi3mLeWUzX*pQYdYO>E!R1rP`8KHb4fl>j@dyh;u){2`&FHdwVPuteAo#t#Nk`C_^@FlASO zn{jA*>8S^}xv~nXhMkMu30(>OR!x)~eXMX(bc`G!l$qh-LLzvrjSNCS9#*%Mp|Tzq zSbRCr;}aA!8{&*IWJ-D^Ea3kQs#s=7>g^ye2jSWVY}Z)iiD}^T0CX$u!5#REenU1z zmEVw$>eE%tD{~iy9ZSC>LaH=6RzB}{go^4+hvDB;-0<|oI`9t)Iqez?0sKsZ*W#F@ zwSTe+=(S=`4*My5q!vvbF$_=`2;dvP`8kI_Vy`;EnUlQ zetNv;wGfKDjxa$quBYhsGv`I_Bw_Q&ViAoRU8nPS~q^WV*ua;OD`Iy3I01@ky?J(x6!7;?F!IM20>CP5h?_Gre+(d-i60gA`uheFC}wom$*1#T`6OZmuXxM{wu9Xk ze4~nVYiI?&f4x8QlNt?TcWCEkUjq{1ZeyRw?+@PBEMaFoO{&BV04^zPTSSF=7k*Ws z90zoy_3IN}DA?Jr)2D3(7STDWVg$@>{XHRxf*XAnXApzfdi#@Ri`hKGckcf}h`Cn0 zjKV{3Ue~5F0hEshMHz63n^%rVs$;x;L-Oh{A0%Om&&Gi;K}2o1?9*Hfe}Zij$5&+n z8}T1WI+2ki45K@8#I_Vu5V7vNiS49(MRKX%VKul}ie#j>N(pORDV0c-kI(GsQLcWj z0rI@8m)qv9WNo%h9GF13j1H%1pOp7Sn?Pqyc+s2l%jqB5C4WjMSXF@w`DKd-bN1#ejSz2Mb*gS_4+#85I`Xz*|g8TV~-ER^VpUK`UCU(mEo9a5j zA`HDX9OKPanN2-p5Z4=)W*+zpc& zaZmRhVzd1j7n4f262+pEwN*UoS-98VcrgB`SR-D#@Z}G>JAP5&C zUIIZ?(J&ap_3AnlzM_$@E0nhwH%{&mM>ufq0^xnH4Vc7Knt;m5!?zH(!H~xwqAf|G z4K`4BH<%4!iTn42OCIS}L^YH{&(`<%%(n1M+-dx%!c6J9$g3A#m=x3IL=xV;84E&` zkU55NU!q-85|a{j8%VPd^dU;B+#Qd@e5rERQX*W}&Nt5N2@}-TZ77Hw5A%OM!b`Y0 zSo_Al0u)#0EM|S*^#}3o_}`n;bZ$9ULKQiK4iwL47nWo(p>n!*NgQ(-yORo; ze6wPRk3$SrzXBqtJ(qn1D=jFaF}@F_bMtK;nC;Ej(O)k==S+BS^lqCBBoE2omdC31 z(h%sH4TNLNmntZ1XYrq(big0fwCCDR7by&F>*({;SWW$*0&Xpb{`F{{`hKMRfJAL~ zj5iQ2#q5JgQ}vSIZvx(vgb_&RK37+dZp(d=pcasQB=Dt?Y4HWaz6m_*OjcCNT$2}km-0eFcE9ntY!0^YEn*B8UMnpjQbE|K` z!SI61cGGw#p6N{p8gZRM4 z01`yvD@US9G~uGc)xq6-(JWo2A?97X9w%llPJ5aJp19x@G7(lHnV4{yC~1iS4jDGh z7^JI+Fx}`W63p8thQ&EZK}97J*r7f^bw}4NPV^Yh3eFd3R4iCn+!yf@D-0F@0@4xB zFC`g^#GpX7G2^Pp+DE@|Uw4YQV8-c)CoFtypo<0TnicUV7k#U138K(YaN%MYXAzWt z*=sDn(1GVjaU`PHnF6JnhAqTZX4+~eyc6}S>}ae9>i~Uyw)Z=CV+fy8$>TiWbZvg_ zBTg#=J%Az?Iz`wlgQdo+k*BAk+R=YW6vOW45U$H4Hw(@iX4Y(@94@kkb4U7tlF~!6 zF*0^QMv`qS4qW9A6B5B)d=zSwT1?{s@Q4{sQ;<=F=#z1mwMd_x)D^Tc1e>7doiN5d z2XRp$LH&)Y;Y>*{Qq=VzcrkTvQ^Hra-!4h8ck!`}*@U`D$l>CfxoN=z<)>3nJl}t>lX!U$Vs7Ch3596N} zr{#&(dnr=>um11nj2jjO(tNH2`P`*`T_|FqTVa+|j50awv%HAuv9LeZLFZDU*Pzwi z{1904sdw|`1TJ_#)#bu&-;Uf8^nHGIt8yx}%MP=G@V>Trq9Zj9$0 zG|z{If{kJ)gG#bxr?=QAhUp-t-K&T6S_+`rZT!4_YpidT!zL2)Buz&BC5O0&Ofjra z?T{hjYmxmG9ija9y7eZIMmEDnl`FIUr1y3|;4vNkY_p`WO#G5}?5F$63!FL=D1_xd zPTeqYRi&TH@=_wdA%s(l6m%3GSN2IHG!f~wj3@uRAet}kE$#t?{2n$d#$g7XIMO9a z_}%T&ox^TxMMTGbO`)V&kUNuGLV>U;mD|N77{(wW-lFe^eNzX#cEnXUMwG9>G1G-! zYRp(Cl^7o3owGrMq_2UifBASz8;(L>B<@f1=2RRla2ovF7%Ql#v>oozxJ;i#rF~? z9?Wyv7o4Q>DCNW0y9{Xt)O%$;v~p=0sW?t6Pk%oDweD21m#BnR4D$^vNfH?` zb$8W|>0KADuSfcjuY&dJ2`&Kb6}~C;w=wzi+WO;J;$0zUCY{rRl@Ng}f+Cy4xvHxN zd#>Y z*cQQhM28T}8OimogRjDXq{w*5aUTPLNkg=ub@1N8A;ps~&^>zPZ4XK?;CDT+$Qw5T zSN6#C2$LbJ4~AT>&Mw!x{7u@0kgzW`+ln-ka~2F>qSditghz_#~iec zu3r+vMayLYlSPKjyhT!DvO`bzr;=xfZubjilo6Ls53+)n7*_xHM# zlDOX4{9ecA>Lmy6U7sDtLcT+fpvU|w1@2*IjLY6m-1kJJ?eZv}&ww zs`|b zamKPB)2xdVZ`~R8oojA1rtnGN%IjU*pa`|d-gqpFj`pFMNrLyLC18mB{dW4_S*NwJ zM+tek%0v=M4bjFRauhbXO>mL=PcGetNCznQRouOlK@fpS4G?58TN*zj^?Nl`QZ^xR z3tm|4R5l_16)we;*w$w@{6|)|Og@()5~L&dI$)=Jcai)4h$4`la)y$Uruj<~5TT~_ zJNTq9kX%euD)?QC(PBMaZ*Xm?af7(>;A;w8N(%R#W|vaWDUTC01A%XBpgCh^>LNsB zK-f}RGZHZz;<+FxCWkaj2=9aK;x=1u>MBr_3yjffBobct(L`n%py|SM{|NRDGKlS9 z&=n7E62=&rlW=EOMJ!vqJx1?($h}q=V|T}reU}~^!gP3WV{8PQ^RY!#@FkVudnt<` zuW;6S;~os{?q=)|BBh`)5Q|NX*R`!?rA2}uV^J}Xksm14T2B@o!D)CbwNz#7D@b=u zmLS=V=i_Wgi0a~QsGrv3LrUyn8p8(Yz${AlMTrIUZ<_oo#J&I$xJf|v0!DS(9sO~#>Z$H`d^+%DVSIEop)J52PlwPpD&HIP|-)vD$)Ku}^ zYZ0j=9A;n6ypmVOcF!|P|MCm&s`~*W%iUBRY7edv)u1085$7F^+jrU9_2<7_jsx!(Ga=p4WE8~QHun7i?6yRnrMC#B1)(>T53Wsh4V9LQwEs9%IzaO~ z_Jyw@e?<=QCi=hMUIB0-{-V|aJhaS-uNmYY@i&cKevXGYkkr0om zy+nwMEK0gD4G6zx8HZt!&nZf+gvne0(a?811Z)?)-G}p$nlbJw(yO3~@k=AJg^stU z$_{Gt`+SvxbL6Xkd>k-6iaUJ51KkVVkzEN@_Qw!qA$l=iB7Kj7uWnFpf!xYj2YhB7 zixD_u;Wy&1ln#iEjcuh84T@Z_2ouW8ixg)5I^}@oJ`OL~fp!*j?YS47x@BDZp3YmUE|dnu3^q#2^-F-gME+4sY5452Kb@o?^f;L{L1dmmv7O~r z(iC=Sz?1<>EJ5FIB`cG^MzwJ+?xix!=3x<3uaqP{+_u`ZDj>Pk6xF}zKvl|F95Zy_ z0`9gd^PNnUf!Z~n8{ki(kuY5}{Nx+W-a;pn%*60a!e{hLGSef1m)Oaa^8Y@2usHOD zFXw5Pfi6U&6zx6|8+(S3VocYMUD?_f&j>r|V(FfYRRnkMR`4Ob@$NdgxO~>Tq|Ud4 zPvo15TtM*5QWJ>FuaVR)eWjul2p#T;kkJ0CM#?XV2F~MRpkNTW6B-}!ZqV%3w-(~| z{??f`a`RCxMJA*V2u=iYDQnDm#pID7Zp&URA8eu210I2kB>Y!2=VANJBj$@XSX~;M zR>jYb!`o-KMI)6dyLZ5txSipYrQ_#gZZ4>RNlMvpec%WjoF>o9q??<$tLas$Fw+&7 zY!V-!2RzjPp%~r;;&OD3K1tsoYAwQBA?#&B`X6&+8-*}M^-2bwx-?v!{0gDi$8!n| z=|pT!xk?SBukSiU9A(&)wbGInqM6Lq;KxuC(5R^7R7ZYWP&pADGYUd zT+4im@XkJTU(#e_5!z;vMmsUdQ5}+1oSE;Ao61rTktW+UCX@q?G3%KhYj!$18H=d- ziel(f9*`wqX`kmSlDYnSFAN-5A9CgQS0U_I`hXstxK7o>SofXw4-ig2lsH&RDs?By zm9J$uqWX8wd4N~#oX@YB+~Y(VnGACjBP$@eVsz-!PS|NB%T=@sWj*~5*mWW?;tLfD zToDJDK{vPlZgZeyUktz<*~xm<=)xZz~1_Yluboto0Sp zFym*7+~efWO@32V!p5)qL~GHxqtqcM9A>jy?EPINC`?-OJe3>=jCjA(aqYt$dybn8 zv4PeuRf`5s{3>aBwRd;_VQTX7!fv_J}F}cWHqLagZPH_c3H7cRE{B%DN<!am6fasKSdu-m=KLgPFs z(WMU04yoMrJkfeh7m~LjG~v9ZA8*o zwwxiI!XZF8CmpbwAOY}VRPP)d$CY@K_i%|RjHv>A3gk2M0U#=w-=q_W!kgQvdokvB z#}Lm8@EGby)c|93W&6AScp8Y^0p9&bQjHY>hs9=}25V?pTNc$^H*p;{OGXR3${0|H zMr0g-U2-Kb#OI<@MC7A{z_A$$OokdM#LA46c}6xo%fIB(`L0KY1-Z9c-Jy9XIqpZm zex%;uTLfRulx1o!O2!f$;I$J)ur3M-QI@V7+8JPY5Z;SBuXMz_4;^DwkbmGr9{;28N_~~U%WAo~38>BMJR)km_<6pP zD%Q?-(kx7L-HLKfg}Ft)?-a+k(4p~LlIXhe@UV3Y@hB(^*dCALSXv&&*+uvl>}E0h z_I@f;h=>UP?@*j5)5etHNQ^U?cW)X?6OWei>1-+;sY3XBt^D6;Bp<{gJMm602I-%i zZU7zR1MjS#fuDq_=P;>=GXP^qydsS+l(SVgQ7>uS`62VuxQ8)wQ{c?rj`5BUkHTzW z%!PvbJCo&88ELk8S}+F@hnZL_b{t(N!ofFjQ()Qz>gNZH;{=bQ-$=!~+Ws<^i1rr2 ziXpU2if9xV7Bd)*aQ>66DbKn=_-`wbFdVNWFun~#Hl(N(d64sO^K06%hE7suWt7{6 z)C9WJlc>sRgSn}ZoNu(;sC$#EtY(q{hlM6K?l^s?uoz}Y3KB0yAd?=}lf43sNDW~a zIgv8=WT4+{mTTV_QSS|Yvie;(u?)Z3)2t4CS z?)z@xSK4AkT~Mrd6j!29v2mnd8r)Za753F9sYon13jQyX>XlBYS=A27LgSl4E-(`y zY<+^3z-4-a2NegJB7YW|ylU{o$J91W4?yN2K7j;c61~+cRi*>@w@>%S?@@b;R?r}C&i8LE z$&5vzswK@8gW(d9_wNiShj+J{oj$gu76RWOdFOfQw!m7d{l47Lo3E zJB6P=4~d2Qw?^|yf1{@;)~=}@VeS^wZ!E*iC@q0^+T{rFBE$uNO`NSRKJep(N}K1Y z9^iP6vN0!40cjZL z34^r4pEDBZjM)O(f-RqxwkK@ZrSorw@0Fka8#E#M@dcWoS5OPT>@O=N{A3rCaOte$ zV<4{<+QoCL6mBzp@Bs*gey}QH>YGFwN+Do@fB0oYlFx^?!ODC@QNOWJ=TuA* zUMwLdk#3QIweip;D%D^~tNg`r&Cuymyt$JwUJ!yW6(<#L5mhA`djN$13Y2X&AsiD; zF`1C?a1V%lc>Zs72GyUU^Ar=9`l?E*+HE7@Ma_Q`#4?exE#P^vi`l6)_)8Kze5Sxe zliaZiq2k|UkFy*=eOYl4OX$YF)q9vN@|aN zLl;J1_Qnv94^J{)NI9^Odom!FBw;Xa_LbdkbNYD*(Ho3nV+HAHaO?f<(HkY$zUv~; z?*(3u44;`&{blKPXm$gryZwox=V-oWWND(xP&PUym~k!;HH#t|u_Ucb&nAkm2v3JR zbp5fcO>D929v+T2*mMw?;%YRP3P&4lq)|awd^tZatmsuNBC=KOMxsnUzJ^O+1&Sy5zEbpxZ~z$_4$ysil0D9#|;Z#(}wA0 z4ANV}CoG?h-vkHd>JeMSD`Y&;WtKCKUQZfg5pEi%$-#$56ewXjZ#mx+|xk<8$`e#Lfx&ng4-bnM-Y%1jcN7uYkk(9xvTOCH1*$qH{?Ap zSe!#XN&G7*azC6CWQgSRtZ`>QJ@M-|KeG8n%sNro8N|4xq6l>C13uU;KqH-Q_HFH>?DB2AQ#RLHkc5HW*c7-x-Y zn8X|@HXJ6}3AO+r@Mopdcl3dd^+JF)-9-Pz^Kr1S{ZCfG;`h2z;5>Ji7cac};`D+oP1e52Rbxg*udGcS&!) zWFhK*n?$3)$8pvwkEl<#hc?pzGn8WknVp!nE@-PuW+vF9Ol|Xrl_Y8HicjSA+126v zA<-9pT~G-jCMDC-a{Msk=RYRdyo*BJaLa(eQ`>bXXdB1d0psXZ^-X43n^arML}zqT z3KR#&5Ugtc-+=<}e*gs+VgOLEQkY=K_QkfhjF4Lh1romE2}B?mdvM96viEt*Onbkd z8&Lyd1w1X6bAwdEqSeFS>z4$gc?*2h^V!;^Go3lg^z|uMsl9$e{Au_YDnD^aJl#iJ z8Z$zzAWGeIQ69U+(bh7=t@*fCDKslf?`V2|wLQQ&qjM~KJF@DxQRQH4IxsyXPs)Dd zeOCl@xJu*w?FZ=IW4BW68veoUvG+ElU?H*KB|i+(%FQ&+wV!U;VVPG0$?ub9^g-L- z5_T6GvTuuOl2xvIJDO-1c>5T#rOY;V2x!JCwj zc=|1={0Xys0g&Tn+?|BeM=w?+ zu)4EG5!XHh%k}Px-c%_6MKJxrEvguI9>FD#o$)_28 zKaW?77uWulLD!+kpRBJEC?<^L2oaTxDdef{7`NKI?tvOR$J!iSfJ2nq2r;V`>|Y7J z<{ySR{F)ezW|x1qqk)^r6Uoj*$d>T$lgkTSAZ$qevbOx8qwIVlW@l1M2Zcaup*%!a z3sNmLy{@lpG@51mKhxWR*ZU^ncCIyR=Zket#%y8MU>*pO_ifGhexM=y#JY@D zrQmur$1o60nEC}wFW_2ZU#FSL-+Pz_g~pfy7crD`Lu|AP%qN$Q8dSJ zJhN^+^mjTIHHEgQSja^fJN3tJW0=>9q4WjoYmxM6SeM=mvTKn^yhrMVRFP)V%7Kqx zJj7y$zLBe>Jx8(n?ZzOLT5iG1=2^b2o03YBctYg{Nw1+GkJ2?#+DTuX9V3^yVwZrz zvC`cwT5fY#MYU>dZSpeKNXYpD*-Rue;Lm>yvu1mT8Yfu%;La02~x?(3yN1jB3_^FL~Fo=UGO(;Q&Im zVHNt(@dE=}3$Q3y{ghvrV1v*`hj2l0&j6Zg=f)x|*!Qj|7l7RonfJ<`#iQ_Ndy8ws zq{{yvYhM{wW!iH`>5CQ3K5Rgt`6H-byNOyNjZ@NKRN$HdhNofS6OX=>;^Dt*- zK4-2u|IRO7HrL+l=Y3bNd)+HR!!`(ucj%EV{YhYzvn5Gl2BECM{*2I^NUIugwytHE zUohJz)P-Zvur}^6uNj9T{s8EAWcjq}&e9m_i_R!z}#E?@p{aA=YKmR_V{(rlshCf`>3Di%; z34Wph_PF4U4v6T=s(N{}Xt1^I^B7rwI^>?iIk$-y)apDLG)?v`?=dBWA3FXQ&nhf_}0JuNJe4%#?;2Y#do z2zZ#sycLCOdxZb0TWj6n9md#+`Hj92mE_)qFLGvpzjs%j8Sx)T7RqYOYXiCt| z>rTN-+A35jd?XH?f!a@=ojKEWybARNU6CSVxP1(vG~_aLG-m!z=u!`0X5o39(LbIi z)^UO2Ga&8M+0~G@QgS9A9gZVA1odK@ON-N#e%gb0h=I9|rTjxDo&7pwNNZMd&~t8! znNNbAGu>fKRSDle1<|u+!5_2EsCvala;*yLyfk zuF>pTL|G+Yr_bk*tijOYH>>S@yy}svBvT8)pYzZgAfr#+1Y?H*oqm5*Rm&q;t2aE` zFY#t80rE=8w&Q}h)rSMBTIyRG#>^~GqvOd3*MN=Q;&8Ck9Z$B>;fDC2FdWX%RR%yQ zNaD$&&Gqe>4;#oZgacryhNR+t$!KAcany`%j=L|0xib_6mDvW%)~#w6lC-K$rOs=* zt)Fmc(>=q|`~mfOA!|7@I5Ca3iw#?$=|+k+%^1Cy({9?giT^NP++yF3OYBNdy1UT$ zSe01j@MZb2elBX>->3@}ny|;nb5IDaX#?6TQv` zT>$|hm(I{Uz!-RV6mPeE2PydV5SwTFsPhh>vy;Nba++L{WtUKFdy;6eT_h-EPKFtE zl;%eCy?C<8^X!+@`L95$>nY)*hKs39>5nkhhb~sFMuxE1iy;J7gHyt;IQM ztwI5GrTbRXqmfLi-ilOhPv~&llyyEiDljbJY^Bes)2uCe_*xT z6Iav~?Am}}I#y_s3(b-jPov!w@_ES0J000X(&mGyLM%tCud;=;Y0vug-2~A}fPTHl zC2CMOoyoESk8CLsy23V}N4_k(%a7#M2ZJyxe(kxjL|40!qt@QjCc)UTt~Jg>!u{7j zT5@_nb1Wu%{B(jReO=$7VgYv_N0&M49b#G2Y8O<8{q_|xZ4a;8%Jo)AYayRA#eb?r z-DEq5)6`3iOAUiK<)haG4Z_8I}dE_bQrfnyyvzUMNU#{pOwGqh%lsR%8?qk(n z>E|mnhhi&%s6;(uv?OhF9)s5>tXznZm|9-_o`&(d`~n?Bmt-9s9TLY$`*NAC4GQe@ zk=aN?V8Nv!?AdiOWTl|fbhMZZi&iO;6LaR*dnEQ!myt&;tFRM0p60Pz&rhatrj*oz zY=^Z5%$OZ)-JbF1r(+D!ru@o%{qQo11>=!*>x28B+HSk1*a{2==R(DcKMh2zRld^( z06RL0-Kj=-rD7FEm?0j09Fum7`xqY6a-UI?SOh*>daotxZttk!E#-Gnerf;pz5^Ov z#*lwVwhRp@nZ3zdTrqt89-erF0)Nh`iFB3t*#~J(X>;~gKUUJb&HX)XjN#i9B{<46%p&t z1aV|#ncu`vN*BU{U1knVi5?dp-E`cQ1?fboB&s*Wa-{IO!qX;DVeETyhuFnC-( zxB(_d3_ucolGuj-P9RhjvA81%V3vWCRjw|k{OSvnR* zq#qt6>g@{q>6})ktXZ$+FiV|z3-*6eNRv`X1k?hWMzzui??MHF8&KLq*UiWk;Zyi-gbcSPe1-=SL6O4 zU5&Q@6p%p3=W6I;+UR;N#-KDCkQmzYbqp`a zcE|=&3xwqz$ZLuOk!a9a(vdyUe=7L2Nkj!s)PY^ubmCgJ-4w%sWeU&WsnAySyeAz( zzjjZifc|SlKNvN#w-Q;@g(E(t4W$r0sT9jOA~_X)bQYBCF`grO+#t^`FI|!l`a?-r zX{54^h|fpFCUjw&0ApWR4y+wz47L_zoP#cHQ@pmxug<3j71XS;{)Ckp7Dg*OBvQ2G z+{a7hY&eYm>K;U*zwYI$Cja+pqfm2dJ$Tj72>937dL$BbX?b>oeB;4&b%wkGDiKZJ zsXp8baZSZ^!7^k%0re^iKR8}>pt00Maaw;j2%D=-X9zzrpP50hxvifER4dsa2VqWUFf? zXffyTppD(FG~b$PRMrJRB3S-IidW^*e!S`NTh&zJTfZQ(PjygwUfHJptNC;HYh(y8 zUWQ-&e+E;Rvjl;SWgWdANZ#Rt__!HU;KhHoBc72;CY@zO>yil)vX!g5 zR*I2PlRhB#6U5RZQOez(a?ME~epZQbAqL(&a^MIV6FTeTZ^mB26~r1KfgGG<&D)Z%s!u?#?M zByzGT6B|TD`3zT!RWkzwsXM^1$&B%8ApJBzuG97FYLK7WW!V-q>Uh4B7Pr(j!7vMdB=dfgW@K}6ACwkH55XEnt21JZOaE=Tzq_@^)txf zC-R)Y^-)ZmJ0$4YVrDwW`rO-yD^NyzGTwpl3_=wp$}La^E1FY1C{8g|icg?R7jnZDuXM7oOvJ6nfD<-c1&q(*r|)w(s*8hMi$C0Ni@8|EN^#_q&EOEIac4zFZ4LS-d<_ z2cr7S=MxjNkon3XuOX@3^G}?(q3-1-hb#RQ{QPr4pMyV_qU+f(_TZcYLo9BBnTI7r zwE;l*c67F+^I5Mk!P2#yBIWL%Eg2jehHy#e~oVb_`W%!33AS1c04x~DohsC0Vp74Re|E;l+HYL!|Mq z>5V__@c;%02`hNHrn(`{juVJ97|QLa(XlDynDg$!@g*wpCf}8crn-((xg|BsQ|xvV zbHfDdd1?eVU+VuCXz_pxJh}6$_ceDV!5PY*G0#Wj!rmp?zl^~_kwv$q$OsVvHs&)a zncpjNaW9x(5DTgO7-+*l@JJ|0pHpSTEo^R*wAMWoazA!8A7Y8!;{9fx*-kiR_WyNiKgN&jB>_ z{ki+~ye?*p@n+fz#G^>>ra3NCbo-v*v3CJSThtJIoxwz|$s72*5eMT%Mw<`rjxR-x z5AImV=hCD=^x|lSF+Z;I7{}c?1~5t)=qK`QUu3FZhN_C>zhXLK?*~v|xbAZo%D>A4 zyIGqw+_sNBd8DNzVrT15#aoBk zmLJTy?M@nDUjA6c98!yV5g7hy(WKGMbICz#nz@|_#|_Xllv`dK28D7lr;*5Xul$Ge zx${4(!2hgp*~&QMKllS(XE%F{0eNPV5>HB}R1_J6ITzWU<+oU60Qb*A_a6_9ni{GyFlmt;8A`JCAYSQ zcHv;r$(B*t=0xcx`an1-^F)`Vq+ZCG*TFykoE@CPi7lQIX=p&-4KacbFIOn|0W!l1Y(<;;T5A#EzsR1x=N1VzI{G=DC7bZen zRG9jjT|XluN!kgylHO%(KdaWTvn31{`Dgj@FUdOh29kAUjjZ9DAP9B#OR-r=9KJe= zm&?~T!Njfle4Zjp&PAgd{=q6rt z>$d(i81R4ZW2E+7OR|Kj&EY>h={=;Bd?0FR`BZ}*G53)C1iy+GN9vJ~Js)DGnCvPu zp)Aj(!HZ+=wIOZ^NNnzk2n}a#HK_b>R!&8sX~?BY(u6b?-~20_mGjY!X!vnsAFd#@ zQ2a-7!JhV*cfz6Ga&MpoUL=X^NGRbw0R5s zT567xh3M)Y873dFv`89-gfvUvd57^&gZQ}q5=bYy5aXTmRDGUA$(oNb!rPddNc%^^_V+}XZ88vn8OpwWpj3aq_`eI{mmskd zUhEhH3Z2V)KOG+h^32CDKS(VpK1u&jsq3XX@$&$@7K~_v4?!}f66+ZB*EJz44i?d7fN$7SIxQS96V&s9fnK6<#sg3f*cXBqX1%D)lP!vJ$VG^KU)5K|w)E z;T@dLpR~=*3u75I)q3I?%npZ!L_67!ca|9HcL|u%bu&(M8Ng6_xpJ6jGUQ?HXRH?7 zhv&}ZF6sYu-jBqf*r^`*Cc=#BzdQwX=$#GflwVE*x9A}9eG~(PdMt|7Lq3p%)DRVl z-`^JYG$W6=YYOv4l)HzlJERtC!PeBYyb;?ZBSA219kF1Wd!tcZI^im?@pMOXMSIVD9mIZzCmMBOFQcQf3_#-31X_(f=r z?7xmg?A<^H0>@nyyZ;5K>HjYG?nyHr@aIdx$|%gLzf-czHR9f(vUNkl z*mNyla6K!hCH*F(?|+n(lw_*SUyBWaL2M=(T1nl9ETktB`p!&5W7*_rNf)L**W?I9 zt4DTB0L6|0i$X2c}viDS87m~L2=6YM~VG1<7^ zAcf0t@J0cZh-)i$HW8KJhHSZ4u~acIf-rirg5Pf5Fb)2|PjIp$UE z-n``RKbOI%&6g*ZGFLWY^n69#zYJ-GT|I{|G<8))+TBN{1EsfJjCfQlap$QOEBd=aD+rWEiH zxJX$5Kf@jR6Y@iwR+>RZy!mWoYiZ zR9zW3SBvf!AZl2@LdEX#;B)01rBf=7Y=OJvS&r%dQ7|qDa9^~qnZ1Y)y1fM0Z7(j~ z3!iMBen_zyOB`s#9hr+l6L6iUsb^j8jZ2^_sZ*m>%BR=%IIpGG;GFkJcKJsj(4`2h zksTHj!hb3!v3De}>Q8WSdH#e=3!*~ABk)6ph&!heu$04om_jFDFlwZT;Hm?fcKL#| zfTy!3IQSDs@UqWj_2nrTC_4HKpp*8m$E22_Q6dT|4a93g!O}d2KgRdOF_Ff^OMgL{ z6#Xh6rKT%mbn!fZJ?7c@GG39g7nh(F3%Iozd)cYnSONS$s)m%s1lfNgX1V{S1&R0c zN<2?9H+%hf|sM6XT&OylsM0GnufN`H~r)4DaD7V{YFHt~5nQ0pth!d~2JBpnPq z##7ZqaIu&vxT}g+cG3N#ma{|)RI)ZW!56rvKhOI6g1c0vJ}1Aq^B*h5-j z67~}DbhShK4g1WOCS!e;uDv<^U@P$%{e7qiv}c+!IRBC_N|)4s-d-

e^Nwwhbnc z*PT1CJnfnU7ByYGRvtHVKP{(gXSbYz2JjwNYTp8F>jqMOy;1o+r$2Fczo(#*`VFcl zZ_LxJnSW(w<#$t4?8IeAzkSu;9@i2Hc$hAT$@0I4B1??k6CYGu+@|b}FrcjsU(B#y zOG(&o9Q=KjhP_t0JATai^+vao?#9y@PmO!0r_SQs_MG!3r<1wqUDGkFVutrIPjS{xVT>LG;FhTC0VztCK!VAWL(!MQ+@b6Xz-=rv4<^;Uh67X-qFaiew(qj zaB*2iF#)eQ;YX!HZr|g7e0M(yaH7xc|Ie5r;V!0F?j8C)rZ|WcOr3HtnAn31ga)D# z#821HZ^a(6fM>&GlFn_{$gfi#ORLyw$yqN%|6I95NqA$lfJrieA&P*f3IRSoS%{tz zS~(S8q&{7X-Iq?JAo@fE*6}WvO+Yg^ftoSo8DlJ&7%NJZqtIcd)2^qjW4)u=Q{*yK zWW)ajGw{|E#$fKIU|e6>~i z?OS;_P=Oa;9KuSi$sTk{j+!cF>-zVUgN*{J4QFpAzql#?M15tt&AI<23xKW-QzY(K zEGFw5Dhjo$Wx^@(pA6GgZou{sF*_;}f50LV#A9yVrj=Cd4U4wsFx`tr3q~Ws`y>-t z3=9@?o|M~m*m9i>wtS~V{a&Xsq&wUWeOjbY8%$0?(Opgf`upJuY@+Di3{X&ivf@~O9Z-yWR`SWm5dK?hBE5tE6}QGA!F^^QFlhmW@> z>%XbKkuk8P6WRizFJ60KkZK_J|K~=L?G5)XW{D3i$YqHaLPbR{q*IcTHk6whry0T< z&Ns}gB=Y1aQZGGR`N^k4vvBA|!~pk3-Y#c#_4Rf2(fV~HP`v|TzX_J?-k#%B9?8p- zZHk`4*(su3%A>vU_M_w70~bGkvF&w zaFA(!hnu1dYH3^B^PcA-U<13!p=RA2y|UK1SK9UtsgJsGS*z1U*RK|sg1K6+{b1#H`GV*TcU#!J1LIzC6!%;ysA&VR`YK;DNu8pNl^YoA0^ zb>V;J20V_Y3`_25JUxvJj-%y1ed%jtydIrxIG_{#A@=iVVzJiPomHvdS&PpjpcR7X z&JJQ+oU9wnwN10kwJ|ll9J{ilrerQ-#=yh$rIbN#-`Qkpj!yTykqzKw`(ct`e&TF& zs(y#j%R`CJQ~Qz>ihu26=avmg3w-?i=RadUY7L-A&x#M1EPuBxe_tc>1B3qX_HpfR zRSeE$qy~<|Omo&XM>}P*cEYl4syC^OHf3cpV6?@xXN4g-rJMs3lYmTqF7cOw@S}$H zP8racKCkwl04VcrUp_ZUDAdnQ_5L9`+oY*;#7HP>%A(iz55dYgIB9wVF$5@*$u=?bHgBiB7oIxW}benEn=lH7EBQea?_ z?A5`vO4Mul9In$9_j3U1s(-^hJE00JO3H;GZs_$4>l;8fbq!goB2L;|R@R~zpZ`9& zi;FVB*=V~N&%k2*&P!mu8cZ1<7~g*P>tPnPPUG118c>+Tl4_J9Y2TTyS0CUj@nuyi z?E_pY246F;eMAXP%4OJXO~x5h;ZPH1k25MRRfQ%m2~n4w;4W^D7SRq4LYTUKtg~LF zombDf3pbs)vnS~%0<3{BWAg%XZa>rQWja_<(@MkoyMcRK129?)8mry2{}C14Lwf2C z%qq$at}VjAAOsZw93?+*>H4O=~tmU?Argnnk_CQoxzT^&2IP7>FUp~RO<-~WQTlJO#Z$(+6OX&ta{TYae^rveD-Y9wJM{4in7RKy_nvY9PyC*g&2&lu z5Z5p4aoWCoW$1CBog3O0NYucTXOdS{{6ZAQS9HG4V!emy=_abvJ)F&QIJ;hMxvfT{ z-Q-hgKC4!|Y43Z;cJStji%>lMdGES1NYz|iW(pEX`xw<~#G7s{i|KDRnet>Saw<*i za>8KiVV^ICQ8JeS^3lH2&S*xcu%7T7;D@`N?3$xed7QcuEv9??v9*>g7L@0jZ$DmL zEuNNJ|9X(8)R;75e^@k-R{1NkC8D?7ATb+Sl9jh@(@}BQobDB?E$ufr>*gp4+PTZa zy^I`o3Qim`1k+AkzI;2!1;Y0=2tALOa|su(Pd0D$C!@3PK=U+O4K*p#O^sKpts&A` zQ=!xyU|~~Cz2z<=!Y!BaCF$@M7T~{uiisGxHjyxUUF?4wz)u3`lwJLA;oEAlTcR4r zqA-U?FL1XGjrLx&TI%a(y|ft9D6U&kqfsLtn3ZZuy!;aC_{%3e5_zUFoRePy4PSD?c|KCsjZ7E2S7$#P~jsBiI)F3(=fIdl8w=mk=uIaCI z26G{E1^r5?B$kfdg1NFq6ZyIL+nZCe+x}!Z4RZ+I{bvyR+spkx&r`7o7n+zWE{_d` z^R-2^tEo@lKY9wgEaZJsiv5wCmzQVjR;|8l+zKhOEe2Dkm0lO!B<1##7`X&2k#DV< z7a5zm8Q`NqphwHFsO8r7X6btVjX*aFQ|Peez$l%`M%X;|HDjh2zZK~arDpv6TtQdh?qDncQm?}%b_TvInDri#U5~R*Lt9c`u1hR$@5rsbR(DTN~e>s3OJAR6vooa%87=0b~Dv64eVE`TgCge!#AJ&@mT7x ze}$3HE(dJFr3IbfGMk3}D8wxhxg)S%0z5~lA}WCLSw(m@7W49 z^v+FWv@I$9?HYc+Kz<0^g#Q9gEz)n{)_wFnWZ&E*bhrKNgzi+Qx$V;z#2KrA7~^^) zT=}y(scF+$uvhHhpizH(5avAM6qel9ccZ}bSv1P;u7IQI&GxOh_GHzzRr3wCU*c^t zuZ3&Q=aLq3?W6U{f{03WkcE=J)HO!t{D(5THC8_M*EHOD!ry(14 zhR3G3-*2O&q)G5|6OG%o%EqSmZV1s{2coX>fZV4WVUU&4eh zn`3JjESin7vO_=H&hG$$R@L zMgCU2nZI18&Gr9}{Q9@ZOT#?&v&-aagLgN7c^j+_%npO`aPs31x8tq#kq`UkJa(^b z!P-r()AHbxjh8IAP-u}>V}QK>x!X)SS6zWJZ2PvWO;sYn_mKcae-ejX=k<1iwgDiI z1J2oQCDG{hd>||Q#BBXZ*Y)Y_x`koZ7%63jWk5}0T-N%IyZR~5aycD0w<5Zj6>uN& zsB9xu7Ch^tx#=Se6RwSePL?QTbV|>eo!%KTkE zkaz2AkevPERo)5y&jRO9&~H??9t@}Zy@s;)huB4<<3R{XM&^^(#lY-CNzaT`(?LP6 zjLwyr)$7Fa?9`ljkJGSFx8*bK>$waOx^;m)n_ME6>KS{z9K^g&;-NT;-Ux0ImnQ^~ zU)ERCJ!7%8af&*m#{aBzq+GmfHKmTuVg1mpP^j`+i|{hPUZT#vj!}QV#yqiHF4M4~ z(sEXmv2F>&sNtP%@#*;hlh9V2vL-|IOcIILBaBbNI;q-zD)?QRPP3a%>@B2>+FqJQ zwVxydp7LE*41)cS9I34TpmO(4y$cBNtk z5iNIvLZT=zMiX^s73#L0=16-fXJllg=0xg{bPzIiK)-dxIvJj^p*$^PRUkRZDX#7s z9oN_lb>k%&!z*I_@a}nV*1A+$+P)~>w#|HFx!pLb6>GC?TR>QrLBSa1l!aZMY-RwF zo>QCNxA51iGPswcZgZo=@VXp0^w564v3&LYH^L}3GbIG`&7zc>9jPD~%sM}v=%nzjP)9?C znU6kQ+beB5>yVnl98T5#j7dpjF6F3dgdZp5ExEK&=~1ZibHbCpx98WVB6Efq7VQh+ z1SPTIl)n0m~fYs0^0!~GhcH>aEW>_h zqubLNm-?;g=D~l3_|A9XOM_8w>hI;UzaQ^Ysk?x8rPgNjcZs-7YWr{WG=$HqRb{lBSJckvN!F!dVYd~)D>F5M%X<_${@r?^Zcev`r7P8;Oi zKIi#2OOKykzx<9a3yEUco;|&`UF$s9zW;m z!2p`3Oe%*DeMSo^D6fH4%`+yMi9U?&TJHPVwyU$JYmR-a3B^30WJI1zw;JpHZ_~0*4L!4Fx4!f~8J9Fw%Ln4u3QQm!wx(^6o!iE=RWDV# zJGpGs5R1dbW3lkLlhgU(!%v~&{W1vu6cvg)Q8`elrTI%de(wi)Kac{Va-W}XUpU7$ABaa$y^0xWYV-Lp<^V@=Qmed$zk9w_y%%`iivy{`pD)+77b?(=E z@rIlEN_mueTCS6|Xf}0LC28drm-8~IJd1qwKF>(1j7s>2cT((98MbH2Bwcx}tK~&= zKBAN@_rxdIlr-*afrjyJ#m9`ok;s3zLLq5zq)+@>mf^o0>94+w_{V#9-r*j=+5aB3 zAxaSgLmpRFBIzHzoZ8YEK>)b!F24YZNLr;ra;6B6(Y%2-5ZyBB65~6TqTvE%G^57J z{-cdi<*78g@DcnW>H1{jitaob8O@Jp=vfNQ>#K%wH=AB#u)@P877XQ>vS79Md28wFq5m-h9_-C6|)nY4kUrtCT(2n#HUnu=?Bd zfQzU2ch+Y>)h7FQ>+|<@?jxXpxt&~g34gytZY_b1s0cgu9Cb4_&7W~m=imafZA2x* z*%G}KgAuXi2EAvaMik_(*@I#i$5Bu4#x9S=TIi4s*$i_A(t;1bW`K!(TpD6$yck8>y8Te7 z)i_)-pw1~v@@&!l`WQcvQ^$uqm)s?gjnrJM_WFe9dZXHEni7rAxf$febF28Wf%T5- zDWICyu<7r7adXM~pz6u8>hseLcpBxVLe_zAL>|<;QppRk5eeSkxm~90#ol*Jib0{A zvvW8CS}X3?*`S~*BhZ+A(3sy2_eq#iS~#n|4R0{JSsEN^n`O-}}UU;s42&S&tgCtjO_k#PnRdHm%Dy z52YUg57Fg(eRcb2#P*r8hu2T8mUHa2EWB}PW3GaNs($x5lNF_!IrecW16K2z^_*jq zmL9cqIAzW7)bY^O^tYcbyLJ@CzMrTuA}Y_86odVV4O8<@Q{x&HlMGpo+zr?z)rH$a zbsIikV&a8sP3Iq2k6NtD!QCA8ynm86_c&+qQQd|{t@Y?U9=CmI`Iu1?A!~DbYu|4A z+v1DMp7A%Uox5D2CEe;{7m>w1@;oNzekD1X*Q>F(-z2onm|n|B!jpA zwkP1?fX^GS42zq>5fUv`myn5JE{!T{#TrN^NJM3bEMK88Mivy;sgaVAT~B?AjBB{) znHbL&>Nt=||GS55_UIO}KAT7chDRf)U>9sBPr03HUl-RG$K;N=hg}y=9xO)8cVB!l z%Qi04wRH`Nw{`hOm*tqsU9}Acf|gq@ciD+fe`>!M$yY8QA^Xd(m8f1js&{dGRk3U)WwhajQnA{v%6-F~hC{8|vth}G(UU&Q+I7*_EE!fGU%Iy& z2bC!H?kc@d@(fe?yT=7Cy2gR-5#BnthN_|C;xgz!WLM^MXP*DqzkFozNjHG~yREqL z2v?Wa?U%N-i52Z2-#A};GNK`-!C$z%j*+@8Wrh9u+&h;pHNKr@88;TQ*>Np~$BaBQ^N+VX z$k*Fs@r|=+lqn3W#P3nObfx-napgYhvGM#%is$IO+XbFzucz|J_{ervPhBi8(BW1G zo%6j02c9}8L$3BnU#S-s2wEy)5~< z%3xNX(nyO=pm=9atWY%^)CeScYaJb)q-=Yot=knVYns1ms5?ntPoJ)9rY}A@O*5P4 zPEj)9qAKC8VwCKqvgeEPUxy}Qf84dRfPka9*o=NT^Gsj!9Pzq6ySD4yqEa_t%^(n+ zHsk72#E!&GnCosc>%8%ZG3AOi8(>g28in}AeYsgGX}LWuEcRB$;8eZy=JCXrzgGm8 z@E&|ZE>k%(vyzGhyLvr3-#{Pz5a)Pj`^%3#oKtDZ;*Y$Y2y4}}YU;4Uf8l~ZWIbMZP=L0qYWim= zs_pgEn)25#m<$%BI=fXoe7-?_1;-dNBe1*|Kw0d zeg?dF%OJ`v4<`$YXe*g+pI2_D7t3+B`~G~o1M_54z*)Ncwcq1$uaD#BLimj83rXRr zPrhu|WVD&V8j#yPx?L zZe&fay-kjF+S}R7Z;Pj@iM1Tk>l}fA9-pS&2vr!2jLoq(6J7PFc||L02H#u^*%oW? zU5IWdy4Gzpb(3BVbvfUok!RRd>ou|0-Jo&vU|_-T{b@w+uR(}qu$9T(D6yi) z38MJKn^}LI=exKDXnj zim9q6!>!HF%YT79iJmn_Chmvc88r>r7c-}DUj5Dk(cpgLU+I?@1tm*iOolVL|FBO#*0_cTtdyK{p?!Cc zi|_cxNRYw^9n0<>-)`SKZmbu&@KPT$5Ci0+mr_(04oXg4n@~&YjdF~P;x(%-JP#Zi z-y4=qvXw#gN;UB@J3?Xl4ifg~+_b^ORhF+c zpC6ZaU4jjyFV1UjqlYdtF}wOGPy zsCn~Xbobcw1I5Ho>vwWGQD%PxX)A`x0> z!2d={Jwmwd!_s+ehhL6<1?$-pp+ZF(X6kqNg)g9_Yk}Kh&SGyqgq`$}DSfpnD9t1; zhdg~GrzZ$smbXt?;dft05C{FypFe0x;G~mYX8t;zF%zo-{i%y@(q7furOD6w)#a?7 zF+EZ|QeYYY+;W)DkJrf5$em`7IZDO%o^I2o5i2!j;c#DBg2~!iqeX+QyDm7gf367L3_+N-Q)C0YEFL2oYO8}rTO{rcV^I{6iF?q3qZYW z)A`h@LV@G?>wNWWbF~@i*f(5^&@KA;%LzD2O(zRwwJ)yO7Rek~6fi~I3E;(ED?4pV zP#qE?1ij}lpAau;;0>5_acjWM8MGX!W4NAYnDr16e|)8Lfp!D{EkjdZ%+{tX$4ByV zQ*-J;&s#!S!;vQ7ywtPt;-&rspF)YJrgpmN+qlqM$L&$a^vk~dPAajm3?$6CFPAol zfdW@Smdzu1wo~W9y`MjRd~dnq8CWvKyh6RdJ#y|91VIkEG-?iHM1C+Ml)Zw-$S$6SST zG=-@|BN7SsdQZSUpdfOW+LSJm4*Dl6pbsfawW>TpV%7bnFl4P7MS0elq{BYt(e(t~ zkSat4Wl=}@^%101m7E(v{ORJmONbET(V4jxVVoALzsmGMgnMdmbkLzlnxJ!G@Y<4< z2pSLku^pO=?G0h_v~W&b!R(im6F44*4-#KhYLIsXJS|BcPDDjgean2(B<(7Nx7g#| zw2E-)W6g@wM%R%cWUgB%=*{i?NUT&|G!2$6uk+#PknoivLHlkI=U4E(7~0qRZupbE zBM__}qavRmIzDZm>4PHf`m_q>KZX&rE3W9L{m;ArEA!Q|<2^Ri}`bT4g#wEl^7EM=XDd<;6UjL7KFSx7&?9g0e*J6I@ zBz-{tng1!Es1|zCFyT*t?NWZy#$cUSUy4%iLZRo(IME7*N;x{e!WAQFcIclcxZoCiYx4u zM)HQ(ht;A|*$~#<4lL)%Gb1GM`K^|9N`;8uyRh?gbcl`J;oo(FSUX>n|e5XvF(!!6PxQQE2m&jXC{`=xjM(QNQhB4^7Os zuNC4|qUs_cVryarBBIlEVyRiTzdruW4Q(mlkV1~y2JVsa)&!)o96;sYJ^58hBgkqI zDooEmz4N)9gBX>h6;*VTuD)zgmA+5`3oEsk>Z2}4-7qa$8T=b&ylom{V^M#V^e}65 z$}Pdi1FW@Z)^aHD#dVv{qnJqF6A1Ip@7+VGbSY*o(qI?LL+6HYKJzDd$Xtz~+xpV^ zUYcGW>F~f;;7`0MzzrAs){ga#q+ggqrCk@eV800>DddWwG}LxA*Je4JHJf{S3B(!S z7Jw~Dl_;JVRq$*+=HDr;a=DVmTXbaLmU{2q4ujDdiNa*w;)urs^Gn$q8;bdt3;oNX zdd~)fKkX<@tCDo5xc02uZ##@nngM?pUgyv_T(vCvKd%ek^3j!<*hj^z`Op@HhNqhN zy#L#!YTw@E%t!y{y!V*)$PhUk%ioJhk6_Om^SyZ=X29xQyj&?m-6vX)Jpzdu*Q=%X zgQF^C4Pg8>;Y2+bEegN+r`MMv9%EmA6rb7?%ovddH@IybNK!fyv}Zx-cTxireDQ~Q zjD7HRNz<>3waWeDy{JPWH{t`0n%9q_OtL@HCBBDHTVqqcw}$xWY$6vT#z3lk>AMzJ zljcR;?j6{@L5Vo#rC1a8R~C!hdiIF)WhhF;kXD&7`m^<)J`=Xkl<;MUC=My_k()XE zTSC9Kd>G`g^AL3vI-TMK<%sGuiXAidcDrqPwYN%;ei$tD-S_yN8PwId{pQnbRqg%e zc?zgw&gvn8ME^IevvVOMgIDC?&hNxt^+|{B1xJ>>oGxF2JG1f+K|$DNM_*`*!#C=jNul#3Q4mU|(kvx~1@6tzAnH)x%T&6?Fa) zdq(1d$Wt=5O;N{A!9nFFy|}%dsfyv1DY)lopN#gDVv8_#t1S#34#XX{a_om_u?nAf zuP@smRCkk(wK`9{CZ@F{v7Q)0uaFn^GYPPo!bwo_`v`}JZJ5c2&PBTA#sY(~NmiR!VurvmG-O#4dNuDe6 z9N`q`Z zhG=x8y8nl`w~mT>@7{)`q+vioq(ek$q`O6>r5gqj zkP?BRL3)rbLj3+YCvk}?isrC`Qo|HeeUzSpXYhcTJO8o`DX~}1|69EpZ8#c`k(lx;`Z-)=38fo3y1qowY;DJ+a0tOQD8EugFd0YL1DG2 z+#lJ2*PtBx_&&?up7jmfue*6e(}k7189#>~6-j%a5vFGKeH-7jWt~>iaji|fmNY)} zrToeKw_jAFxu3J;ugmP;$5qf_P;C4`dk)L&c5EOGuKT+m=O(Dzjlvb&qOUySC!e8; zmc@M+f%=hcZc*?NP0sBt4e%|x^Yz6ZUI2M`T(E}f~hqxYstC|v7Y>8D6%(^LjP z1#rWY~L-#hCZ>HII=BdnDRB$zL3Gh!b z-tXLF0`i&;CL{T#yT8{iF)gCAMwN0r1JZVCemyUML-Ia_^FWiHQHmY+hG3kMM5u!_Ve6Cs7kq#Jr4ByGz)LyfEaUm^6Q3y3b>F4WfCX)?*NhP_IW zO6H)kr@D$tBugRoHl1{ex4`(&!{EPH_(O1NEk5{nUdw(zE6cJkGbBQ#wSQ>QuG8qo zP^-x=y4w_S*=}QvPzQN%YT^akF^med@!N&xs|}xLGlm+K zFfqWfVA43VuUmoVM)pA(jj7}n3?1WXwJs^7`^TL-xQ?WF6(A+8c1@^ka!w8oRm>fu zA6pn~&FSKV(2>3m#aVU{vW@XrH;+O~ZYdOD)&+u#iw%_bnY#!|5{n5;^n~JIMNuK{ zOEZ})-M>fjEC>_Y@mcnJz@S6|)xmxQ_S%k_f^;%|6P{zH9}&)z`6oAELk9l307M$ca! zGOjI3+V$x7-$OQNOe9C~mgFW_!0245Z6SToOVvknQhyQW%pE*E#2B~SLHOH{->fuf z$CM>#9fv(Ph;^0zQVQKM9H^$^#N6Sl!|4M!4@=mQc+!|3I&L!gG&8QLV}u2DCFzX+ zH4u6#nPc&4@y8lkP?re|Tc4|#UGyU%W6or55E?L?9((n%`4{ioFx}qrL0W7Wb#UOg zl`1xvgNBnj{RU)B-QDXNHVXc62d!C!bhlp7QA8rnkpv(TK<#EU`|KW!nIm~zbhuNa z@EeLfNnyz<>|!ud+Pjzn%V?C2-@mxcFq-#;;x0Fl_HWp7#8jWb0=(W85;iGqB2hfX zg!WVO@2T8#uL0m{FR*0%Uty9*cvEQv3}}Q0nWDr#*f&;q&eg9bdk1$$cr%r#QU?8v zap&E`7|{de=e50dbkS8z$-&Hd;fB`$c4y_*+pcl^+&h*69P&EyjCuq=FSlJk=GMo% zNM}F}=t&UhtO1ev4K5+H-h{ZcAgAaBK!ZTI&9P^qn4uPBj(01?D+spf4o9wY3- zbBcThv#0EdtdyK=zc6QYd2h;C4O%Pm6^V_R7?L$qLPjV|ZuM3unA%oS=0^~1N?%XL zFPkiojltEYVh!~tj*UO=;6!PWBwZA)nT4eKY%!?z&zd`V{wRM}Q5QSpW15YnflGm} z({kAb=lFh0lyq;&t-Ph`c@zMtg$Ir=Oie)cx6Kzmz;f!r^5uyThwl*^y_Q*SLNcSGdvCx zSp69((^vezX-;kwIsN{K3;zjy%uR;x=>+xJ9Uh=Ja~+(>>L1Y+bUuFhT`fmUwTJIQ zLzq4gq|3fPJi8CMJ3PKaS2u?x&?<*h68)4mCx{Y04A2^f`TN1IR2UjU(jJE~>yBZYE(Ah(;Op%tvmx3R*L@MIqV zF>zli5PR%KGO)rcU`Q0evI4>i+F?0K6LWIAok0%KV&!cicqAMa6qA>p?sST&iF}IB z;|^tZdxy(A9-)x)^*iA=hMj|P9Bi{zqV_ql=<{1F6;g9~bwwnnjOYlS_I*O)5~+k$ z%AsxxrZ0GDcl^xNZ_+0l!$YM2>^b3BRz?&7@?J8XF5_=zYKTVO1W!k22lLpKE65;; z9|rxA$Qn#?g${(?!xTY-=DR>T?4Q*r$sJbAk5 zj&94Ifa}bW?IHu5+=lo_XgzI@1p~E^UlNbAaA%|JZY&?8u4L=KY3yUc5~QZKv-<(H zn|+8Vf!~eX#LH0LC6DPZ{Ttidz4dULQ7##_jDZDByf=aIX|+}P2~>#<%jh=VyXxeS%gLq8zn5B@6CI zb5f8NJh~~)>PkH#|N960AM*8YCQ&lUt!9J;b9e9fl9aUSA&JVX{!JxVnXZYsq%Lv9 z1NezT(qGiiRz6Yq5#EnK+s@_DM_@EG&qL<=Hh~??5gw9+2{!p8h_r%^=Q6bNwF%mV zE3(N9_<0kT4bnoX)xUT@MJM}APmbUA$S(mc0fL~sZyWV5F7f<^OUP6crm5Z@`dM!S z3XU=Dk_|uH77R?Fm@gZ*GnLm%PQ}t)Q)OZ#uUlZJFYbNUqr+bYC}40Ot_pVo!X z7?S~FWAWOYRB4(xS)@Yf`GMyV&LBo4MeJ^4E?y~uu4@Htib#~R)Pduq&24f80>Lmd zreNed2mvw!mp{nuM$r%?gj&qwK{^MP5f`T`=fc7Jc8vaoAJ)1h*S(BRH&>I{9I648 z2|x%;1c=9GtbS$;zPCVEU^xKpe~@Yii<^Apx@y6g6gqy+vH8V_Yt1p7sWyO{m9y2A6w};YX?N01I>IQB z1}kkB$?U=UKG1f6|i=)LZA!O5G6QK6+Uy_-~|Eiac0rGXFt z0w9L+q?oqr{(iH0MnfW0ZKv8ViG*)YKG~7PPxIM(U3OtP5eigUjkM&f+j}B9$laxy zoqUiE<=T7tJDhERoTQ-nK8TwdBD5gLpcyG|n`KIkT9y$kp5rZC)4wITN_r*6i}6S& z_LZtFy-ZFmnmqf-GokZp>-l1yL)`E~c*s2Imr&z%2R zdQrpuT|)g^Mb@VY>*IoS5CVII##1bQqI#(r!O|s*6@-^WQ9XglUB(jRw_nP7#@kge zIfnZK$7#%v1hp&`h;|Ri?@UY&chE(=5aEdHKy)I^do(c0*R4fD&f5M1)c({%PwVJN z#F?hja3K##Y%-lMevwe1=lJK)J)@800WjMtTJUb7>s?CeJF!7V3{WW;#f`pD_W+H( zNL|NNp5lf8B)ur+N#owm&I4_hV*@eCwn$jnwcEWL=!e72gf1bZdOrin3+(`BMH~r1 zm$@GRg;E4$v;KA-6!E)ye3Z=EdLoyMa|`krx@vSINOHDTJue0T<^!cC@Q+f5t1}g} z632hE?pF5O%MXRVhdobDF;KF4As`wsOW&>^LmGhI+42V@@CkH10O$i;42*RVi}z2U z{oj$xpI`Ys_#?;o1Ez_=Uo5O>tU4+S#|f2v{z*v}k~k-%8ON6bYl0@eKnD+{AHD}3 z>uoL)RTB1h?m?qA(j#i+4j$JClvZb?N8ec=jO~8O}pfi z@db0~(~IK+VU(n=+go$f=SvxtcOv+KBY6IissP()4_d)Z42hAWV@Q-wcX}_!LsUt} zup|=&Us+aP=huK%a9sZw%YF1!$YcpOuqzL$a;1GxA`55=A6ZtnVKw5f^lK)JdaNZ& zhWv9e!-MkYt`=9xU%tPQQp6_aCck%;^Au69dHR%a;Rpus)W2kBZGYH_cQ+@%@-3#Q z#Zu^rFj??($!!wN5$TJW4;a+1Gp1$2`;psa_fOn*I_FYNqi*2QKQl=PbtWvbk#%JtebC|2W$-DbPz21*7ldSRoerR zZ-#Sn4aj>I}p>n%1btM;lt;KcRf6tEvY zUcbOFYqbQj>RP<6Rb(_8L+$KSXEf;h0WyFmqpvKj49xqAP|pwa(pmnR;J1N&p5Efa zoR-%Gtu}{9`?O6C*F5GdO2hUlPG+GJ2qf-zQw5q4iEU0T`^_&d>f6ezq15JQaTwqD z(i)Cu6sw91>LOqF$NQ03OD*B++(bhsda&MokTpFyf=4b>7m1CuTGPxMiLrL)z`@dx z4#bmYK=!6e<3OQd&<7aj5pPQ{k>w23yT2p66VAKp7b@n5(_cPl{1pJOPj8^=eI7#P z65dcs!B(~RZ0bv}sePU%_cG6F>N$LU9*&<^xDCj9>poQr+X*g;1{Sxaq zNi;bO)smr*6K`@=Uc@}~Q~!3=jW~{T!;xF+pXL8e5&EC#=syZ~8iA&cw5^Y{-AcR1 z?neDbKD&X~t9#`;)Wq0?8>&DX&Ev=S=h45d57Co0!%rB`ov~%We(DHs*(z1so3jDmz&gYG3M#X` ztn2~^b+TfO8|^kJ6Jt^0eFhO}GkQG$KP0egJW=^C&E(Hi`ga`RM}aY-1v1#egSkmk zEjnIb9%Xkb6FnNM(XWFLdS6dF`@nll^txM#G(e)X{<=O*`7U(zl&#PZew*P`V2=4D zj6<(Be5-ETXRTO<+lLoainwUFK)t@l@;K^6oTfnuPJu}cgC)I|;+x&rVR z0z~J zDFfw2s_84KNwUq|14)8$h%afW(7s^Lm#~7{>j4lXJSebAGpPK1_kJe7{@kdfl$2CU zimO*Y8y*SaXh0#!dK$@j2xM~Q_JvOzX#8t3obC}nozC1Ueo_q9C!qzu8*Iwc%J#Ii z=vn*3=#dcHBHgc0`g;Vp+IMf{ut!-IH2+KyK-k?>Q(a(8vK6NHZS!>+ zsuS-uhMF9FbJ!Wj&Wh`znR@u!u)yM%QytW7rXbr}IZyx+4Q6ZikENH3e|-X+4=L9m zU1lgv571pL21YIJ>8-sY*0G#F65U$D6p~?HlWH$nOb2lO0;6`vlM>mINtfvy`LdN- z*UL^qzLM+a=1=FyGUDasCV2Rvngufbh?uV}|&en~ar5J5iYNZyO@iC-#zwm3KQ9WZ}zk}{eq-N3vm zJo;ZUIlIX=xItm{Xb9gMi%<-jT!-!r%h^(377E$n(`$F?ql~IhZppd9QqvmIKrz%LyUH)H%c_(h| zPCC9G=v*O#|62{7KLUY&;TLc=Nk+q>`b}wZ#$ysx0=xcr+%({ngY)j4)3eY=Wo2K! z_*u==yC1BXGcgeVpxEd*A>PNTu?WGWja4%h)&B7>GS|PB|NBkXp4l<-p-rKs-3DPO zQK5JU@<3tScg$2&hzReCASCdeR{Jx%Xg#Nh?(EC9t)L;`dub+biQu4Alj&v3H`5zZ zG`}_~Y^>KlGQf69N-c=!eB%1MNRMaC_!<2W1ftUAaTsAA!mQ@rqwOfg(jQ?@R^qE>7B(*KUB){UN9eq3hs)B~ zG|%h+ljBsTcA>snl70oM@v(`o`D~rT>&qiH<%FY#=`Tfiux_&?k&5+!HlobF%uGI) z4Z(8na{+t#hiGfs!+DOh3E5fc+jzR2iwanIoZ)2!&5|%NkxPLivMm50ehw|;_Fmo4LPGSL z=6-vV@sW-QazOU5phGeCmp9W>Opof-tz!mfYGw3*3tl;{3|6|E|8Fp5d+Kh!*;AF|Re!TNku`GT$NY@98Q`=ifI{qs@j z$oOZk0J-uT4%M=?Pqa!W-dl^N5Ot|doak&XO?V+V_#Q1l784r#RvC9(vqBN5fLDc8 zwtnfOTzmQ+_mu*Y806l3j1B_&I5^G!)yKg-(1OzRk%31t+gyd;s|{*jF-qPkTwh7~ zsR`|P6OUiFFrfF?(A0iqvjQ%(NFZ|hJ=z)X+8lNgttuAkjD$;@Ni+DWP&?+4w|h*t zpRDGpPr@iyI&2j|qPH5Z@L;uFKPAsvJN=JdEY~^eK$C>5Nh5jy^E6Jut|?%mOpe5+ zTzmehaZd|?I!rh>*i%F4){uF8QgqKq3;yZJV_)`_X;=irb~X=IV2iySewOZ1^A|<{ zfJEt|gwisn>IDzl>+rs@oYhipNKb6Ug%gA~;Y64y&=sJofEJwsbPGCH6+Hgla{22& z`VZ&h?{Dt#+~{b0Bg?^rBw^fCuwW_Ax+B^||cO=W|oh_RbVwc2PpX^m1 z9?ZO2#`cRdiDLRrbRG!bG7b6zj6ts?2b7gR>~^rA2VN&6Aw$gfObQf~PtR;TmNVmc z+B$C&QmP$qRgG6_FpS#c)jVf5WgBR1WsJfR>#tOGaI-8heGWd3c<0f1TU!VSb57vF<*uO5c^dlqz88l2V-K zE)#(jaKzcLrT;0wgZ@6@6Kw6B^9>qZsdz9k>Giy~(($*07$z~W538_jQ@r*bH|%eD zYWA9p`TTm$qSm90{Sr07Z@nK|SndWmRD-bscwhGtkUgUHJ{Z)@*H&(r`0`l88kn%z zcqY%9*l;`^q9U^Eme*v7k!SD{h#ZHs4*YwAQ#DT38^guc8rIfxVpX;T?|6a$OI#K( zdq>ssaO9rJ;0C`)9Ou_KSyeGqDQ(%B}1(^{gIXY!# z%gD~jnWdpB{Pi0ja2a#FI`<2CzljvDZ=&?9a)OGu=kfQ?6BQK+4%x~XJU2&%f~UUe ze5;C1M@sw?CEpvliZ`RC_M*W?fMSd@r+SEzaahfo`An5sP%|qPACO9i--I*U=R=-9X?m%T5Ec zrsPg4My~yIZT;w#Ga@ZygJ*-dB6~|QX%etC{v5}4{^myoX$vM?FV|4bXgHclkQC&P z&*10VKCU_7_q|Qdt}E5HU5lT5K?aUkj88~V>`Hn|U12vJ3pWidx~hNm6Z}okRl?~& zIHl_l*@N>QRbTHVvOFAJ!xaYXlabzV%J1ZNnDLhsEzFhg(bmj|5b9UHy5ILzG$hMg zlw->s0wkh#GH}uc6p z<8$a&zkdx+mNoI(y&opwilDpaWvkTN-|ablWh(8S55{uJ0B{#~!BpRuFM zgtK8-LNStl6i3wJ&S@*!jc(S7ChJ7tw1-y>PGSz8u5=f-8yeXNmj?ewg_)rEvG@V$ z9#m1@L^CPiRDn3Wuz?;DuPC*1v0JGcfEKK(ECBjlhZ=nW{FZ@pj{U*acc=X7l4~gm7%uW8`Jb6XbIxzUljA7$t?bi16BT4Os0bT!OpR@aCpRmGfE?j1T z%^EbQs@cXBkR3enlu7(}YTxj%g#AHylJk!inziD%Oa3oh%T}ED53di0n@&3p$&bt% zt0BJtm0)oV*pKl`D=+|Ezux)rh}obij}Ea`9BF&x3LZ>aM_O6}U%XCju`?o|)@AbA z=R79)3eH<@m(Y}FZW^S!4H5O1!NE5Ya{EDVU{x6OY4L=;`mC(9EZ2#}g}i{5>h5<5 z{Ox)LAY+A+yMdL)?2S;DKI_YIx(BDCM!s!_n)y$a33lfem=)Jy3Un5*!m|&0+sj2) zXLu=_k2SIo7{LTo)E>b|X)j)<6#@wj_#O?;hY+tx78)_*t4TmScI5PPbX3p2X+0+iqc=GR^ zyrtUU70}xKesqV(+<&7*HUx2A#Mb^sy1*t8^(c(9dd^GaThP!hXHQo`GJ^ta=L)!* zS3XI*+>j({84399Jxg~^01$;*0hTcTDwy>tvvYBpIb9usb!aR4R%7IW_{!uosw<_RA^lK$d&8h(`whAHcEgzQL?tE+1NowO!!et!=3TMJ*@VD^ zhLkPwje0?Yd+q0hCJHL_auRN$E+Xn7wwicZtCH7x`yF${pXXTW?7rFv82`h`cHG4= zU+&!mAD{d(1(J5{)#m5R#GOg(R!(8M zRkqXMu<=CsOV?{N>2=`3B{KTDCcjVRwKNj3>o6|DmRVH`%tc)L(MEhMqzP`ALK>YW8ASw+85ab6M> zmo%odu(}#*-*Bm&r-dYg=3NT7tEbne1%#ZCC`r01%V`$}_-9ul__r3Avv92%F&-wcdcS)CH^lLQ)kLl;SG`s=64wpp{<L`F-f_TVRke%esGcz<&^K@Us^-CQ(`9T2IoNAAs~Ms*#>{WG0?Hv z90Y99NalcO!K-J)J#(XQr_`%9i!#XjzES0$^_3=xS|JANtoq(mJsPDPt27eb=w-Cr z-(NmnD3>b+ZC+v4knVVu_Cg9mAc$OGEgO@6qL|{xCW|3TlD4d;8NH-`x_V!}mI?!h zDIQ#KWNun_5nW53ycn-hDyRf*6jV;wH$33{n=A1@B+WmM>>sH&_sI>w%mXgyklgi< z-^@oG>Qt4PDu%(`&4ZxV8~p+VbrqLqWhLzcgQbtEo_ z12>QXkeI^l_1nn=>8h{MCOabx3^xmJtg4KaX@>lF{dt&gQ=OhXyZUxAG8L!XcfvNr zRlS2gkzp);wpF)&u&_3WBq#lWz#9lKYl%k{foxo12*>^R#V_Yy} zT5|%mP}Zh}4{<%Q92psrVjo1DtwK2Un;bC+DY+3O105c~PI8qom&hxGE2Xg_$r$=@2J><@IW1O@m09;GOHH5hWb0eLl7BSXN z79)?fN-Hp7=3eirsT{ztwEPVeSJPFLu`kL+zvd0uNRE1disT|+6opo(4=_;xAAXjV z>Qa!mLphI^2=`16tb5ZGLP)bAj2|U=Bvhzh^O;R`RhDH?_G<`90j=68j8a$4&fE7L&j~ReP5&oK3dKHp( z`c;)(O}oKW;B{baw3Ui!&`WAvZ20Pn?_Je->3NpTT`68lCO)SzSK)jPGvO-cX%%{? zQIhrvvz;0%CBd1u0n@1xRmJZPN4J}<{lZ*h)fXQsuXX2(ILXh0#h+?DLXBmo_-vWJ zv?Fg%mtmi{%ej=;xDfAiwQTtYk;*{he(-7dtT_P5@Pc|G-;!=6Iv;;>9reH@2tf8V zvlFg!HLJ;rT{NzVU1O6D4YD^YStRak2t!84EhqeS_yG_n@!fUSrz?2~X3nzF>{-A` z)|2FVFd#gMVJPT*cAPoEuT^26?$CG{+*zNK%rWYM=j45so89_-#AfUlK3rS@Se}$I zRWEi!sn-|6xGLcrs!bPi*FJlXJ?m?Mxrz@Dw|ZdZr%cIy1` z@t+YRCC~FQ&fpn2YoS0%X@0FlCEU}D>m&rKgL--KuRUawjM7idg|`4m*UjtdRw zzE8%a!bivTwlM6L2aFiB$6rtgZS1nFAD=drSrmS3iy_1&+E8#8c7Fr)ZV;TuINJ#z zMnD*ADh=+bcKm3(JR4f+Gv1Bst|&7cwG+ zei^UAy67zDHw-Ij%tZEFKy8k+&+v8OtE?H-wX;5TE@4uGXrh>k8(-9ljm3HN7k!g` zzWF4cW|WC>CG?pDcn&h(8=9}5t!9@-5_2D(01sxORj(pMw)&TYb?$lM)+;n~hC`}> zF^OaIS!#vohpMk6wS8QKvSu1;z;BQ*Q&5iPBk@0{(n%dT$)fA>w$CqWzV6I;_TOrB z`WG3XBm-}b7AlH+icsgg=_8idq31BD?On_^%XoSabzK*_=d_PH@(Z{<=t2G~U-57j zvRMe3Rpg3DD{i{_W*2j>C`q%}*n49z15h+tLTJBZ?XQ+4$pf3Z(i^?cC%l`kC8B8E zmqIxmkVO?)GH-k$8tmxDXrojbXXXz6Gq2-poM_h=WSdT!AceW`^c)Q&q` zU#>fhUH17reF~H*$9txM)MB$NJ>o?6AGvKMU$2svn2M)Bc9QMJ>j9}=|1U)&wUt86 z7p^H8S7kT-{d)h(Cybl)+NLkt!(XE z4>kKm~naD!7n*Zhc63rSD{Zl^J){p^@p> z8_&b8(xLN1bhFZS{BteD1v7!zu>fC4yIMm_tj?uwG&wb|M&cZ~Keau(>fy2pqu(a- zM5cErt^wWOzVKD>gvBWo>2-%nWHT zVJ$^qB{*{HU~vB68lXA4(Dh{_S8zowXtsO5$NGeSkK+Ip_mH|#`qrdT9grgmFB_xS z2#Z^vQEk3G)Tl!2XU3VN7iasJAG3I7u+}E~HY$Q9#UDZ|R54YadWiv>s(@;k)SYvO zMJ3%a&;L!cmZoMq`o13|N{>NX%eJvai{>blB{|i^_i}`GyHR+#dE?(Y6fr=UHMPXuO|Q{la1LV4a|vf?!?uVd2h_z) z_}U*|jO^WEveMTLqOnHF3n(*HA>-r6>W9r}7&YA!58DDPW`G#VxRF$-fSE`F z-WP)kUQN-ZdRCIiqBID<0dpta6Dw}JsS@?$1UciM3woM+N7{hq+#0*vYkLJR+Icgf zl%4&~M%Wfo$aR8WrsXMZrmVseR%xr>#^_Q+U|6J>gj6YMK$iOsn$E| z)1n0yv@?AC1qWjp3^3v4bk&D{_xb*1FZg>bNV)tj7=YP8+ZsCML4gQQ66weVZIZIG zN=Gw`Sx7&uN?;t_eesrjKRz%e<;jptj7_HJ)?05FtUrbi!= zuK+hLHxJ4C04U^4c|T43W~x*j^1oj5a_1M;QF!ADtSV+if*UDQRq8bVo#T}%sA_Ea54Tg>*G^KL9$wfduz zs}q&jUO64C4FWLz*z4XLlx4~du;%>O1}=qWufM>#U+2$SpH7TZoK-lXER7N(9|n7o zrv%jn6S^b?5gr_9s5;5do*r?U3i{tvJJd0h%)bDiKu#ZhEaX3 zb|5?}-n(}Y%^#84%q@xOdAOy_Tw!CgZNY9_y?7)F`+R<~E&%Ar&0B(O?GPm56M#93 z&&TX^RReAlES%2;QNg6nMh)x-%`0%C*$TnZihRh#!!Cu$@8gEng2;{GT;2)?honmY z`qvBZJa7#{gc>qH64eWy*3zA%GFuVPg?);)Bn6SvTaq7(~Sku}~r}Q2+ z1;nejyFYX4f|1p{Wqaip|1NgO(bo6Ybp0y3;8hU)3|N#^-Lm{7q}!cToe8E zpQU>><=-B%3xAOpLs*x&It|``<~n>Q+MP#D#3oC_|>e=@ngRy z;CDxK^LqZOWim(#fJ^O+=#(d9u{?e1f*LcOUHu`%RMl3!Y0dVqj9a! z7TfdG#mkF!(kb!N2)RmDypT%PrC!Pe;qqQ50P)r z%|gi#+Bg3ri+B zbL4c1JwJe9{8sbJ82{B>0Fo~~S8@Cv-kX7=!pw^()G!>gCeUw~0=SpW9W8vE%h#c74)vmYTHLugu_;egX7!PG(} zO&q}qy(1J|kxs1bd1k^jHO?a=Ixl-xjMWR?>DShLk@za~>7nyY4CW~3 zeRfotZ^&s_9#88vK-#63ap|i+;!-zG0~b4|?$bNz|Yit%k5#AjBSdKcMhaaAL9EZVQac> z|E*05Fk~!kronT3C)p!@ULy)UgJDavPH z$SG_4L{H#UyYPF66b?JVJmqC!oB?$#t=-Z2Z0N$`8W#6-jHRn`Wq4u0n#qmf4IHj#;HAF|op%g>5`^t)1Pc|Jxb zbxGcPadNzjG8FgXzvjca>I~fI3|pc~0(y~8SCwA>pFqd|fFS&qH=K1J{|n9!QgUV= z>2)%CdGRMnKqCkkS#ro{RPlwa{h0jyFZg8v00>- zPSfw;*W$gD@I@2f+i5!zVWl+`qIn&J7&K%cA5&MLSw9}oO zh+4Uv!$G|$nwl@gCU$JxFve%n3ewY&T%qwMmTe2y{o&~I60t~cCph@j^tb4i9)D*)Au4NREWh7xcjcXlf8*<6Vrfw1^8z5 zK{mS@`4A4(j?vx4&cSSUuSnM<=Z&AvE9;gMvK~$az%Z{$RB#04pk3AU>&fGH6?2)2 zu~v2GNv3j}I8a7F1zm1sNVLzW@H>ynBYVZ__ntGm8T;E*;Jcg2rJ&wgI z+k1Z9jc0lJs24N(&7yB_rIp&;R~?Em9Cs49wwwe!eK4^bReXs`l$cwY{n3);UEfhf z!Ga}rYUxtpejCq@5~|UH^k2UYXBfbl?NaLM9C>;#RpDgnwlD5>Klq(1A^5jTPXvS0 zza5_He10-f?zn9KDf$7-21uyLqIn~2vOhJ>zgXn|?27~Qf&5r|rCi9`W9wuJGV|X- zW-Ofo$RCipY~0mkAir$MX?M(mx?G$9v+#83GY`XTb1f8gL6bwkgdX5f!jmz)W1f~x zAa#$hlC(gTR^$FE!ppDTcr~M`M0#ZhT40HHFRoi#tyn@242Io5k6j25nUvx!ckR3%Qi&JL?)o!tb> z)F52sy?Hjw_>v*Si>&E8ciuPR!7^%O3cfY3rn+nftmUy44>u0e!6lBVo2a=e&SYI% zDH{Psu3yzpl8>e3CNDG^UTyZyV$`pa`z7EEgwd1l2b5^_#E$A&O;K#|=amgK&EA@* zwa8B{rmQob?9d=_YlnK)jEq~lz6!t=Z8hW=pe7#Vit0ieW80;}=`D#8KBEBW@&3Sz zX(Rt%A1yH2YCKQg&O9Vz8I03tOdm6L?>n-d(Y`cU@dC50t1v`^#c>~Pci zwA7+@Gtmj20W9Y3r_EZ_a11g8Y)oMolvpHCHZ(r@=6acY%H1r&p6JAi=r7Pph>_yQ zddTtCBqH{69CI|JNp3o57Q8)Olcls=22w2F ztN>DGUmzIr8mJVEn4;$M@1-wut4nlM&s;_w4%&aHdqp+-#lq9m!2nNfHf|6{pdd1|zv>iLpXutE>C2{`A%*xK`Ys2VFpAsccuBIQ+Qr|_*RrwTo-Zq1OU zXdq;}B9`6rQn0y+>dra4`}#-rE&Hw_o=Rj1y*;z*f+bMd*MGWk#ugS%R=zBz74Ktk zUK&&wl)q>EeR9My#}#hud_tlyz#M0oJ_#(Nf9^WV-zx~G=@FduQ8YL98Wen}-bP0( znRvCgDmmE}OhnV2g3{udRGo_fK4oFcH>t^}+Q*C6nwh$W5($7)>&r6kdLDOJcPW

kmcnH0{H zbtUb{0rFSvCw`$DQ90jVC0XoiXJnWO!|0bVLJC9_9zI@0-MG`rz>iO=rl)F}0hd5d zjEXoe0vR%B|Lm<_{6bSKgZyfl{G28EZG9xL=Bc*~KG^$}*KV?YeWZ?MBv;EoF#kg~ z(9jqJTU8*(EmM|rxld1W)cGursC$AW*#YkrO&b~5e6h5w)OxL?h5d8cE9V!^r%QLP z_qJH#!*TCk`YS2M-_v9m?PW`*;iFqOrA5+H^I5&$Wbe)T6cr`&ZMZ{+0fJfjS^wTLGkCSHW2?d{l!DlX!i2(?q3fu>_#^ z_e9wkt2>eqQgb&$>~O&xj%9De!r*(FqlhW^ZEj@QGlZ7QACzWCcCAu9lrw~o3fy(LW*#*NJWy%n1ilgT8IN|3Tob=ZbYACK-26K<$U=U52awtM@%Bt&$=2IKD-ITTE93) zNsK2Ic_;rW#X0N*)z`@pzG|!OjS`J7@dQO6(zB&JfPfO+`M)X6PiC>SjZ#T|QVKTZLmup;Et&!X5` zxAiBrzU|(rES$0VGVD5|__e@?%T|&f)~thYL_yVXF+zx``h%YI=zdr#JeW-L_S``1 z;(3N{HzufX)t~r_7s>Ch3>QvTK&~%N@z>Hq=te(3>>%ZIq5W26B|Qc~f$=e$k^~~2 zItA$-yNtvneQgtf3~5|n5no>H!z+`J%BDd&PERmLtwQ?NihA>cKIOF*iB)ml?##PQ z4$X|};G=xD^V+;y8R!KON(QD_y5-qupXgazg2#XAK(#yz`?WAw=UBU5(vXM0CQ@gH zD6T)$uX?R#%Wy~^^CxW)0j?)sCmYcmRWdr=m9~{H<*@n7Vf=Sq^7pTr?Sbhc-s*8j zK=|7%jn*{3Mh-t#3DS)-C5$m6%fpKZII)?+*B#C*dLI37=%i|<)WyexvMS%=#eybO zp5k@IIBXGMid{6G1W8W)vsiEv-BD$Y}4IIjWN!BE4CbS6A}Y))rMIGF@lf zb!PJ=Q{&314k9W_n}AYljk+KIRlbD`%mD2WvI%knJgv|o!_0DveezM$E2kF!pxWB3 zB5Fz-yRk~bFf-8!bnT9+yrr5%r!k%I>Ph>$2ufs)f-U)=EBl(KL*KWif@68Q@T#OPw(oG7Jz=w;;_QY}>BefY)f@Zuu zCM@SOR3*}WcWJkaL7rUjs=yxzSA-cEn1={vBlq4ad_01ZKi#0_+oPgC;F){JciyzZ zJUZez*6y6S{ELQe^xopevu+|5QPBhu8?$PrH`zlc=Z|O*YdonNB>NQ0^PQ~UC*_FT$#Rl*pwQX(e}%Oz36U!P|kp5kWOS!Dm;odDYnlHFndgn zYBAiHiuK_Z#5grP|BJD=463W`nuG&^;BLV=XmEFT3+^7=-Q6V++#&cOXprFUNw6H; zJ-EBeZ0_fsZ)U!ln)l|7nxchOYKD&;&#hhaf2R}xRZ`Z?!$}InNUaMZ^x!qqBLZOc)Y~YApA`k} zB=`U>ndM^W=aE3{01~uvFwgWJy`hr&(KI zGiJlyo7S4_kfe}_6d{b6avot z?a{1S2$AZv{KM6rw8MBKZPb8Ic2mlE?qgMWUaYpAu4)BWD@W%@$Xc0^f^%2_>&nUD zeDeOqj*fv|(^<=%8@Ym4diYTz8E4G(YG7dN-nZ<__m8y}Pkg%hGk7=i%cu;-F59^LL)sH@a)j#sSNE^Msu-z{D$qS6oyuolpW`L6> zl@4Ie>Jc+ky)__%W5Sytcw=4Jr`zd8(u*}h_SW~uPku9S%Txp$f5n+% zEbcJj7o@(6ESO|H%c{~+tH9MP9-)#?ANRjmCMAqaoN4zsT2z&OCh*V~^ts;H50{gQ za7MkS?++j*jGUK1A>ha-bZE|vM$EBESFFBK)aaR-_UssKbU&vp8QUw#HCYtpmNRQI z8L(NXFn+uI9XO=T&?Q)n3S+Mg^_|ZHS)f%N&)@P^n^n^hgMhoOYppR;{ngGVtGbGP zs)bNx1Q=&rYK1HbK=IXjKBd#V3rO%HVHGYK0Hus})<}rdEGtD|_#8%V08LDeewjxC zapX$qS-I+W3JK{>^xOw~dN2^)kgUkJE@9+XE{ae9q}=~Kd@IEp+|9TJ>=;ka^6cP< zE9iH4Flu>>7H#yJECW+FA%z(l?)#UgpIvI-2{;w-g0H)BeWC;I?ktb%h^ETadTVbG zi3n$I&uB;|7x#HFoDBZtRg0qv%}q|0D3PwVSg#fc8aNMZkzkZrs*Wh**yj0Odjqbs zQp!ywN56~W3S*es#~jpfGJUW12cXx!FKr8n*2aDo+*xp;NM33VJ2Q{B%5AfcnknS1bxB zv_C|m+UQF>a4~#~xVqSzKZ~)ROe4;6vBkKX91Su;uke{B8Tv2%PfZ*9+Ow}m;i{#l zClh7Y)T@Qv{?g#be|JZhetYeeHE0mekXtq6SHVh-F!Onh`-0JctZEb%pP zuFR|Id0l_TA6t{GXAb8qlK^0W<{Aq?t4I^`^M%aupB$D|Sed|^nGTT%`DGXkT;e?I zK9s3_YrS%;W%2l7RU5=)R(k@9BqGX4zQ4b$eZ)BgbT>#4&#(+0G*o^GAUd+7qH5m` ziRY+UyP4Z@MZo7i)3Rai6lkI+4e|o-U^|{yxmiXc8TA^$u73n0Ve3)+{mYF__oRSj z287E2Jfk1DT+|jZv6XsFOv^J*xJnvRfOmcL^Uc8vkrAtCftJW9zH?g}TLtE;JS}X- z_ppV=p!`oS)C1gkqxA)~k2i82Y*oLFp+Ot0dK4$zfSo7NiP9~ZYO3Dk)<9rqM%?Y> zx<%dgA55flmpor-=fWi`1>yBDtlg2iKc}BrG{R>ADNO_*_3Ls|m}P1ldl;n21HU{$ z20G{wsd$9rA^oJ!PVcw2Hd}lu^%|OlpU&Ak?(VbeZKmZOT#HcMj*|GMN<1BX|NK@d z+AXDsF9d+HdHZG^K&9Am_ml7vkolQfx1L8M?8@)Y6dgCM{!ER%V2#aPn+trB9H{#F zcVnj~*LN)M#RY-WegZj`n($-;X|`QaBxtp(CN z>(Od~snt-M_uHJ-#kY zRB*A>>Q1PTd0tGj<3oVg{zB14}$asxQsZ zvuKln`9QR4H#YV=I;wwe`&1Y9awlbA>g>&?DfoA~iS8-@JLxQ?2bkSrW*6H!KQ3-7 zkglCRQ80N}l!Lk$@1o%9?j8?06XEbTtb@8Y-(@Gisq3qkcKHPSIplrBIasXmA@|p` z)8`}qkw8l z!atWCCf#Uw1Y-sdhwgN{@-G>~+u_YrY9s^uB4Stj(XQ}_ScT}W@ngR;6-?(@a&N4bgt%_WwEOC+i7rcu_t_Re?s!Q z64IEBz6mdiBGVQ4I;N9dk4uwMQBjTpGlAN(A6+1!_X*xlB+W*!j{-Im%?M@wDkT~SQ5q}J@{r)mciXk+NQfMcD1;o~2HV*&h<@TM z#<)*}6h}fJc0Z;Jkq1X*DPpa+Uqrl6a~PALLQc5d(BOjT{5;*Nerw<*bQbR~0j%-! zIRojFxR5!%26?OQZqRkwvhRt(BT<&);$AE5CgRD<4P?=(S}N9$=~e9E@4LEZn;tPV zAQHPl;C7F1sfP7Td{|K}R5X}OQ%sa!ehX-be2?ywgrBg1V&C-Yi!Wjh$aVnej_p`d zBTg2kR>S*-(&OLNfH|sv`jl5;HUhQ}gI2gQu9Z%Ry3}(XClmZXn(Vg<-pSUcOI4Ea zP3W+%D>`2?BL1oqj#ep~Z5pA#MJw+of)vG>ln^Dp8O z|I+BH4xh`3!YvoxSPuL!W6VRZsM_ZzsV(g)+Z7S3850`s= zzYGQx^g&K+Ko>{o7b)-_@K5lLJQB^tR80S6p!!#x1;?WplP3f)Md`?J3!igAG?^|R zy=DSOwl;ycNB8UAb48xJdQ2p!2#;e`GZzvy4tu2ReP60`AepYxk@20OFBHQaby(1q zqSs2F$hyk5zG}6vH_zTE^3l)4?O$;{d7F}+ZJB)c9Ok)q)bw-SgW7>k3_Uw9U8(P+ z6B6K%;xc_1D9D;5*MjLYtoePngfwsy@%pUX!;;+f3_mrv^C?ofBMH-Yp#%`GS9hJ2 z+e^1#h}vN{n%Vt8-Zb0UceGLdt5vBjdcf^8eaB&V+-1q%wQ3T4E4<1orwok1m0$l) zr;h)}Z{%!i0V;UTHmM}X>QPrn7hmzxW@6-iHL6x+RWnl{{^-T7pSy}WCP$b`B6~la zz0SjRef1h+b4wRZEwmSl?GqVvxuUYCpe#{)^R5vRN0#%ZKu#LAtq|Ho8lzYStN3OG zeD;w@0RXDV8nH^fJ<~Vr z>kE}_$P$B_hWB?5ee~7s(PqifY*D4{#8_Bwtz~S6SEYh{$Z07^MRZFve@~heBG$;6 zXt&{f8a#%TomZyPmi7_Vh{Cx@r^kJhhgyNs)KCR8J8`D7ABb`vJOA!N%OEJ1SWeu< zg)a=}bC{z2zUWo?xN^HX0zATH&OO5A zIo#xe{ut#>NatY&^Bzhn2;IC>y6dsy#uPQHvKYdXrFB#4$Yk&hnd_QRLK`i&ylBn>gu zQJXv5xa@58VanH?q{4BT2_!{b%O99S4gMSlt;u%1!9Qj*2hC7Uym*MSm*lcyekLEM zjrfKnCMIfwJW8`hbwQe$Rl_kr5{&q-IARbiJ>QCI104+PyD|Z-Dt!cCIkR4%8(sU3 zjN-EcCRd%J2VRoJ;B>(F+LGLYBY{#YP0~-F#-I=YC8nDorS34qZ}p^yoBiKazG8A77#0Cd7 zzYuWP!EDi`L>}oi_U^Hlxa9C)nYf}5qq&8M94)x77pEI0lI;9t?w(`q%klDEF@F?X ztjjyeh5V#&(1giA_pLi-sE+yXANlL4I7fl{i|2mFHR_JX!}3A(43jgUkSrf@gZ2zZ zu#D2@ttNWIJjS4|SOrZ|$lP5>dT#>K-bxDgqCkfZjc`tT!~wPhij5|~6T9ssjs56) zNr`AnasdTSTu4tyX%|lRlswTam-W|TQ*$TbYO)}Go44xuV7;*i$#yz?x zL(>*3@fM?QIh6zNF4-YTN$`my>5?F$@=Z2imf-EWvoQBU7>Yuc5mDhPl9@!h*O){% zs2U#|H>tKY+b*X>*B`BsPDjc(87+(@z1PAR6LHrQRU+Sqg{CRSW(RY73oGv70}VD% zh(y{+1~pshe3-p8*Vl*9A68UJFl?hn7r{ZO7ZM7NauYaQzNP5zY$8ajin$@6z*i%Nm0Zbh{1_8W~~LcQhA=t!{!B?7wC2N0>NZEO2o-D zAL#`d{wS|x-QIB;PpHG6&i-yMQOt_~(pd-dhtpqRyPmG)SE!xXTFKyRf#(_YaO|sW z0eU7YRmz+FYSwQM4h2Qsj~ijYbj#Uj=S?qgogvi;2?T<>tK%)(dC@5P?CY2hAMdmz8B_20QNXs&SFJ)(CdqpwOIl+}Y*LS;!>KH{SuWW?G=0IFg&il9G}#4%a@YfPBX>SmZX+!Jp7GOX6!*>UZ1UOuSFUH>k}k9=k)ZBxUV%LIWAd4mCJ%>l<#g! zRy71W%uR_hmXmr{He8!k&k(*AN_*?-7}uEPa2`>)(T)dfZr1x5OAA!)A`c(KfGEwy z9R5S20th^11Q9|zW!JDVe9KNwj-58C>w&vb*i1-HeHYxBzIHhHbb5blU*n<{vPDwD zxvi*}drxwEcRP=KO%3wE(d2zLe@5eS#8(@+h{*Kr46q;U`%~z#5b&q3rOKa0WH_MDKvwb3yI|q+OM|@^bf)E2Cy`=qFE}( zq!wv3pL;%33Cg+}q)!<3)~m%)GKSp)!N{_EDpnKj6TcvMbeg3y@w9%^m)5_Sn$vQja~7w=e>QnSuYQfG*?cepJ&Uo`AE@IENo$4K3O(Ou`34=1d~8ljMdY3L z3aCYx8t_Iz`xC~D_ceqyk2E3-jBGrr7DFsZ5)v9>hOEM?q3(}2iUHE}XubWQQ;3a> z&!_KG4F1+#0}6Fg&|#9&X}gF)f9Snn7GL(c+LExHd3+oA31?~CbVUOzoL0ym?||?b7y?At!))@xn((AZqwUy!!V?T=JPy8oG#aj zjQS9pl9!PHABjrxhV^-Wn$o3#ezD;IUa~hv#KVJ&;oi02vP`>{ZOPS9>4RD$!-DJD zZLR6)0AAi4aW%JUB~fME<4*6YxmG5dd4X}!PvtgE`wtqe$oidM-vE_0c$wd`n}q+M zz{`9%EtFazTh{%h)8A=#4C?gaDzeIR1py$o!ktF$CI;kBck;SI=h*$R0sx00$*!O1 z;W&wJ-$IkIvLgKXDg-N^&d{4jUvhbT-fA|eItE=~p{m#55-xuTpa4+C!zqd4*M|$R z(#{5jd#+t>XV8@fZBh%w{_tN~-49T=hP3+K!+VP5)2XBgXRAZ8e*OJ)K6Nv#>tI99-p z&8UfeCKIecINOs%6*wfMMcUE=xt!aBBzCl9aX7p)AGXCQRLT*s7RR2c-Xj;n19md# z`_zB=Rsvgw%0(88QBm-bz}ikoa~GH6g4K=P*wTu?Dt5BjlMhP1#-ayVZbv}@C?P2c z@$778EpIw-@v9?&xN3Z6ov)-i^3BanWK9jZixK?>yLA1b?R<^jyb-qIkOCoBcguw( z3YSt!@4as$&HLJFTcBni5KvtIw>{c_wQ<0SGJYy#g5a3I7D@kIu<}~iwMyt`kI9Sd zmpF_5ghRr->9oL`qd2)8Ii*m7k-_jTF+Bs~eIX_F$MUo^eWE}Vypufzo3OU$$_!oF4a^#5Qjh7PZCk~*QJTbpX$og19!D@iG&=I~J>8>K$FEoYshfy{M z!L(mase65shT*6NdS0!-6bwPJ)?`_&(!vSnrn+ zyVHvc=^wSs`GiD5naI@&LhpRzsPs_ZtS52L7fCDahDj)mpNmBu%{rp1mnw_?%oOrR zBog$|Hf(!HBn{I(lP1C{`S>=Fl0W*o=V5YfY(11kz=HuhHY$Zn)a23pIsSZMT)IK8 zyzhd!1Gy)-Cr*EjGU=f6e&3Z6oTDXljA;T)l5nYsYjQ`~jut)JqlB^z^bgm>@Dhr3 z2SGmp@l0=}P9A~F4bfh<<+lno@10uz1b&od%v@P>|w9gtoZ2}j~3L(t@~4DGt(hazqw zWkTg(Up@E`NxI$B^-@KR10#F>xg@Vqk2CWtjPq4dCaH*z6kldO3}#UzDq+E;WXy{) zjIsHY!7XUr8#-6I{7+?dVI{=iv8@s4FWe};r<4D5?Jx}Ux#CP}YWU^TV18hsucl`1 zBo)PIP|neQZ9xi;hNShpjU;_Ak#|Bb`+ zie<>}hqci7@&w0@tMXPZqurBiirX(fXn7j<-;9!Z`mlb+hwm6YlBO`1BwAf{x(Pb2 z*hzk+B9wK^b&RI{JyzZM{B2TDUPATZ%gr}((JLYpc9|z{SC8t)vtK7pvj(1oKlxO@ zuCG~rSuxZOgv^+B&w5J(mD2lH0>yt@BmXI*BA|Z;8ydQPn^aN8c8$kELk&bp=preZ zX~XHN!(GsCM9rLAy{w$LIL|#r?CHfMczx{j*B(Psw(qxuPk>z3Ffz}4j?99z+*daU zW$d3E@a4!JQ1vd#DNV@jzf$BJdQe$x4n9vnBbo(sWK!(OZfu8*wifZ8j~_eV^K`3) z2np?NzM~iO6sn?G703F-fwA|8@^0anTO)xAjgYu+;roY}w`N;9`!^vB07$nk87*>R zNgC&Qf*NKo5ZHAf}BE&IGTc9VQj<2Q+;lM~M}T5Ld0-u5Tr=$jF- z@5^KwdZi!RXy9HbOekXcU9TD1`2(=a@@Zxf{%#Ip*_GSxyJ85@YchlaE08++eD*#j zc@)xuAk9bLLq^&5SdUGI8d4z__!VifoQ`iaccW4+`+(`u_j2Uq7pTfVd>5g6V7`=l z?ZSO9zMAh_;w|7i#Qk@Y=EHR38unseqGN+&(wXEQa+Xgbstk_n&uT5vQ{#VqA%GgB z4N0kLFlH^v_o_8fZSmT}{1QzpWx(}AG*oJ=Viswr3L>7@Mj=*7)^|~w`CRzLh+7wZO2UVczjVxSRgX*4ANN+I&?O0Ch1W;qTVRu=P58BqLH zDKDIr@h&}CLGL8xT z#zC{u;S)y+lL02e_mGh+l=Q?AU9L~`91PBH{^T~pWb)4Dt8U5oO?#tLQkjeN7IVnR z{R#RqW%v7!SUJ^oOj`JCDqW=bSrW*Jk)CiR(-}IzC_O4n`+J^dE?%aVSf$s%UB(%f z*?_W&iu3LoR6?xnb^R6`=z1XCD44Hde@OjOgKprU7mlnpVWGLU7L}eBSahKn`=+4h z622OcgEp+ikm@sDIFN7OhB`*_f(JA2TqJEQx#qQX5RFbxmn`%&;yM(#8tktfK6PL2 zJ8isE=<%IT8QRFv2+3&Ouqn4ecNf68^V^kj1q$e{t`G0t)H|-KTp^tAope3FvyFI?SWJ#b%Uptt1(QX!rEkF_+jJFH%Yw~y^;BnbShk>~jf4uY3iA}#Jmi4Vo zN=n|eU2QTAUrbm-FI~+CxgE~Uql+cD!?J3wUQ!Bo7@%uteb+}tKu9leD`Sgp)-IA_ z^x8_Dy{bAr5w0+Pig0&AK&Wd#|Lg0s0*4{ln;KsVBVawjQ|!67G%5m3Ou`ORMM+&6 z@H8O*;)`NKQJ})X?yZ^E);(buKKD@D>ZoVnZnS0YAI48#uk#ewB9=8wh zbIZ3HFRVcrtdC|nFSf1>7jx{PSE&}i_3eJULx;DA3=^@Jw;#|GAy5ZRUZl`%5CL4S-Uzf6B4MRWK9Zw ztcQzA7e%fwoi~kq^GX0OCUOY6&+pe)OH@z-xY;mxsQVS^pL)=DCoLl-ZKqvCZE0ueFh?QQ7KWJS@Nupe@8m^{jVS|tRp^%y z2FgS#(c**#bRgpPPo6?VP{JE{RHUG>wKt`@jEH`3{XjJAaG$}mYjR?IM1n5(6IB+y z;qrSjZWqC6qw>RQxA&Jk+AUAzeoJF~7jzeLQGgBlLxFgtB3o>@Os;-3<%iCTJAZW4!40`VVt$5*^cvJ{liof* zI8Hyho&7dd>b7V6{!m?wtsr!NA;o2R6MgE9KlryHL4s-F?iPu~W5mh(=zIL@4AOM^ zmxlm=i?@>h1@Q3CSArtpv1gd~+kxeK^fZp=$i}x|`pN z22lJl+tc*Z;LM>ScHCd+tv8Quo{CbGZl$)i_>uHlM6NCKSM+7&Lf`RE3?A2lp?HEn z=DDTRL+)f`!1E)+6u!^$2gsU0L5jCn_9?W=lU2R`KueS<15uK7otKSlC6aff+~&D= zUloV3Ti^jknBsm7**SEl&{s5A*((bH|c3SdcoWTG())~=lMQ4nI8 zX7N_(@KNx(n&wV%{uOuh3L|_l2sEa~&g1hZpjLceq<;xkkp}MpR;QxXhRmg`;_q%E zH&e`t4_xtfrU4Il1(eH;h5MxWo01L2R*rZXj6OBxk_(S;pWKH z>v<27?d0N7{hRDor6x=p640p@$AAw&tmpXudg2!U58C#B2grG8a?zVcjCQl!krmgi zNG3XH0iMbSxYGG(qbNf%WJk+w5t5GeC)kcogp1d!!1}-QE*}%?G17M3PObmSc+xpD zgb&7o!*NDJthsbqlP)Y=Kcm3u{Q z%PHjrx%d>7QVlQYkbv0rpgiHF>F-!e^WW=$&E2FdsIxP?0__PuEA4GW^)el3MH=be z5OYg13JTcVjxGW0n0{#q+`=`ugp5O^;n|YzndLJl%f%n)#F@w?N|e1b6niXJ=`l`R zqC7s;TjA(>wI=4fF*uTvu2LV|X!L-q{c00%w=-pN-kkcx`vH^=BE z(y{nx>&ELYLTBX{J)tX!v)4lXkOyZhYEh?8HHWwoEs3n5C{;d5c@)s4#z9!XG(A0p zBQx?iUf@}0!MF~{k~E*xzI_b)`zxMy`Qtg@@*ZzITSZ)M6nM5&$1Kt(6`Nwc`Pt{5TmH^YY z-Mi56!Z%W3zliGe@}#?EwQfc&c>u}Y_4KHY3K+kHs8^v^829GQk^+}@$MbaeL7wq0-ftkzy+iDP2x2WWlc{5y@Y?Mn$B_sNZFln2Hz^VZF zQN``@>V?DZR4ob&ENpOI9;s$=acbuo=38km{fATV+9oI8S`~r z5enHe^ae`jb<%Viab9PNFQDiY2D*mm=dli_qEm;OP;9SN_``;}~=fw7al zEIyB2=$oZQwSh6L;_z|39le)GB=Y0#;3qOhMh*Q1dyMr&bp9dT(R+v_lJQxga7uN> z#Wx~%JXqPNhUW*32VRG1cZ4%4icfIBWxN0tuJGS*9{*o=!UI4$m?m#S$IrVATWPg6+-%@WWA|%&g_anN(`_6 z54=(^jI}B`_O`4EOSG-hg#K)CeLBlg3WY3ELBPa6qjJ`zzC4i?wKhMwdK;xSUiZEA zvSBg|5iI3DbjFeVZT^%S0aB?ctk$EEEHcIaNoK^w9DM-#?qI0k)44vu0{03%5DN?X zU|MHFN)qf2Bqu^@y-R%hH^+y?8q`BRop%=gm;Cj1su;)KHHw+TL|xd+quA?(+E#ilgtbzPY22gT_is!?}y)sTMTG8$b*{pL5axiL%l)c(5- zOV@N1t;GEsy9#1<$v;Qe_AGPNXyJQJeWbok>=jypOM{qEQG@D+${K8^gIAb4!N2P` z41fE>UH6=HjW+46Pk*e^>$rEUgjvuOCK?PcG2RGceV0uWO)pl-lf0pEFqQV)@E-U1 znXt@b%U_T*1ki9-bawh1{wfD=FwJJsPzn?LmGQwGhRmW-D|su0vtcO-g6;<5NYF4O z>&sh86mvp8V}0KgMp+FnznwB5v#Pe(Zq*p!uTXsxu{%A@oxya*TYHx9iEDwy@OaQ6P5%+ z%_qafM=%1zNnY3jf^88gs)5UU%03T#)oxZNC`R*1boPw4M z-|z9UM%7NG-k56c(5xBV6Bm=&QsnUK#p$nm2|t6D?lE7+rm#6S^C6Se-U7D7x}85g zx6ZZRazoi+b6Sbb+QbN7a=9!u9Acz!LIK$O{r*fviA??dEmEK~+nJFRTWuyw=?(CB zU!L@}>wcacK(0EsZMDL*U5kGk?YuzKYd)K!nvS)(?Y&!WyHVzH<&;yB380G#MN%?> z{RF%HIr&?ejD<@za2lW5;n0YK@JT;vIUgD9LfUroxE;9wJ+)>FBHNBUvmSjiYCJx) z7ABV2%RgwyY$ang8mSOGuybhe7S<`v*GZY>OQ0uPOGwK?$$pJ~GkO31VB2RiMnLHJLraclXo;px!C4 zK)EM~Ns37PeEFEdp~dv(n?JZ;_U7Zn?vkwZ-i`?`_K?49zr;Ut){)%TKgmzUhyPc| zTLiO?l_pbYPZ7K=gZA4`|D$Lab+;AMM2_rv!?I&Jkvs_(UqpkR3NGC5p}MIPSRDW+ zBn<@9a5&Duh>3}Hudb4^vDd9xE#dRD+@S3mK45a#zK1Q4gzjPHQ0{eMq##V1tU-0O zay}hxM(%mmS;N|mgdqD3S(x?As|}u>#7ShFKEd+1aE0byW{ofcBT1HimocLszo$p8 z%Q-Gcv%NE5V$<3~wxSACzs&)~kJ~X$ja!uKaH*KS-e&S$$lFkqPI^tINZ^ox?JRo9 z+@lZ+%V=p|rzY#6p}~+0P-vn+&POH(`PakfJ|6g@gChLM1F?SmMG}aq+t{pdmtiD& zf-tBA>+G~HTlvF$xF-loTpdj6O#HIuQwkJ&LR@ylXgqrA$lUm@H>SUkOISS2Ux~$= z3+A%UO!n2tx7x(os}fX(Dv4Sg{Sh6$+c`e=g$^8GefqOH=p@9NjeEeEsV8;_ zEXJZRW;LXEkK{ga+AY)zDe-#8pf4i3`e3>iahcqr%QP2i>{DhNUhiF?s#g&)DwJ85 zu~>Z)_HUXz_>VVgTPv2>+dVoGoDzNyr*I-N~OhAx^a3BWRDuhQA?{Qa9~#t#i-Fv3>1pSRW{ zY+M9&`k$uWY$(0`GOBTG`b!~fEJ2xf_l>4i&gWEJVou<_xX_hoP^pFFa$Vd38XC>K zn2LuHK$F;*+vyJ{HV}x5#G3o9HgP-p=f%zoGQAd)T0j+WH%=K~Q-Wpy5Ld4I4&r%kG6>HCN`w3> z{mg55CLveWMyLfMo*MfF4)D?z@KQAm-roFtyq^#kOQU$ln8S?L*CMH3H0SW<lEHJkO%aLPFfM2;67ARY#*D4H1m2gUdW|C^VilIA< z)*vhCwULzRw~?NR_+#7|`~H7nnHaHxuswS5_}#_T2p+_EKL z!kcC>1B~>QUR&-E5>Ju3Z?PZ_4s00E3>-wmMi=*M+#>4wnf&?;U$AnKqBA@^Vt;bD zzl>k3a60to&t>QsB*;`lIKkw>olcDWaH^^k##iUbH79P=eiuq~L|qB8!xyhN<8>_e zYDBq)L!$S`EDjRD;Ucl*YrACB^iFDm7pzo2vPYKIVk7a|^8=~6P%@2D5Ejiw%JF{h z$Df4kK9v1NJj{`qgYNUQKa6{uS)Gm_dFKO9I&Q=xQFErSfrtgr;e4I^-m!kOc$9z1 z3;GA6s;|g2<@adBY;Tg(4z-FEeFkWweG)aP;eq^~K7egeRUHOOz4}`c zZsVl#XL>r}_-JB_j0wG+#dgaB?y6A79Y50y`D%fuOV=^5EW3FYk1^3#K7ZA{|D4+P z$FA9Y&C!L4jX!l7+*99&Ur|`SwVLEb-~i9qm(C^rI|A@M6iRUKy=2ZTeLv!J&|0zz zg@J6llf=h5Q@l-7;~DtRuPCe)?t102RL-Gc+IvN=tl7k>dF%B}$=41s%aYQUgtAx^ zM6_piqdftH1bkEC1GKXTsD;T@M)S115I`4_2*&$b|Hq6f#rDB!#+_z9?5%k3=-&f2|qOG5hqjam#esViRj7?>Q4d$8ozUDj;>+{1OS*b?D$ zZ;XVSpRTvZI**RfiHMMcgM-&jPS85FQMh!$xQYvG+}?w~ne|=^BAiYd;e9$OW0)gJhbqHwOr=7IU6K=Ibz1L* zt#R8UZxY)x;uW^rC6j5*+MVH%R$L+LM-=%6YjXXTGYfiH28nSb8g8xD)EPEFdog89Cla0&!}NUMFuZ)(m{ zV@pXz&|_{b81-|UP5yB&vDo4Frj>W`Lib>TtuG&Mrqm#!lixJSmuHRq5%d}zR80rM zfo`-MR`^p6#zy z<-HAcOZ2mpHq_<3rEfC*MlF8l^FC7X0kes8d}I{)`otyhV)W*Cy2CmDl*kU&FhDbL z*YXAK=GP#Ldiz;Jlqc75G41wUG~n;FdbA-OAI=zLT8!Qk0coX$ZqBS0hcNI+D9K!+ zHN--0EcyzaHmA^<|n;Az*MGZ#-vHu1NKH&uBi(w(Sj z;g|?NTfIA6Zjz22H~I(s8|y4$W7K_wdzGh641{w-ngSsGCwzb{P<@-Fh3dmA#UW&u z0AkkK8C|nexzy+h2jCn>F|MS(j_EI5mcG{)%X+*{UkOP8bYSJy_c#5&Lu-SfrMhn= z4WN{sNfq(lW0!e-CJCAWRhLN zeXv=Q9704Ml{r<}nnt$whl+9X`B{zNSZ*l%Q5HffOKALA5iXA_4iL}&Qq5SB(beu@ z$gn7aEdqabrB?tUsZzCDD*_yZJtaXD5ZM530dQVI)rB`96;AG`Rt1~Vt>3}XKc-BJ z{$8r@aaR8zqw7ZuF`X_?UGMa54JJ+cBpI$P8cpS5$EF#*T<=T>9&ijRS%9v$A%dD; zgFCaFBwM$kmWY588FT@m2UV-D*0V!dFf=0{+a4;p=yQuX~c8(jk;cG26b^zR#c)J>6dzz4_ zTyBP7jfo=-cNwT>7;_cdZ&TJI+H*>SxtzwN6Bh6CS88dx18!-&IQtT>zc;{tMO_HM zbvyR!fVH#mbVDD>;<|u7-CK`iyW+N1r%3dJNQt9ynRY6ZZ&9>#A<;wm_Z~)&$b5r` z>!FmStPq%GD6i}O9BX@@q}B3N`nB{sVO@T~_K9DZ;!Whvt}9Et+IO4^lI5Q@XpQ{t z0?RQ=c5eX)?$b-afesFrl5D`eR(H%DV`4ey3_8CFLv3x-kK-W?J(#%>ye_)W>GzK{`fEe(+*9qMcRfCc>5@* zW??6|HBs@&nShFwS`{Z+v7r)YWRs|GI>F}3X21&}m|lgjQ(wIu{X;Y7M;!$%Xx|0k zH3*ZoU2J{Zc|i((6tZ{+B|9X7!Mjk@k)ZB(#Ir}_UzYOZ0GUPFaQf_+BZ%T1#aA|x0|cAMKkNK4!v$rE$|OziHb%J zVb~w>6a*FBTL5<2k-cHAz$s! zYI$H>>?Tq;-HwU={yh@*4uw$c%7#pjI!Rd%d*M;IpICxCYaY}1lzPDC(;@K%1? z7#1O{PTi|8uVY63f__#q-s|Y@b2HDLIxQao!aT+dlozMA*a~7YUe+PmD}CQ09#tH)u_BD?`LVo^++JOR(p{Dp z5C>{EIehP-#(wG$%V`l@HEDmwr*S)b6#RS>%>(zkbRm~YH=M^0{D1o*(Rv?F^agvg zI6VQSXC(GpIMiZvm*XqjSur0UzNu0bBu&s@K;KXIa=;G!>12K;uq(Rb$20#NuyJX0 z3v5x?tuMM7vi>_PZxaLR3D4ad* z3`a;J<$y65WsNC3lEWQOP6^J@t*8Q?kAvXrQsKI#31H z**u04!Qslndv|2Cf`|$}RaB@PSe3*WlrG7%0VtA{}!G=D;=JM$zv7*Mz5#Ejm_s!XkqPR!njiD^# z`9&DIZS{}*D3D1qeI8wKsEr-D{5fPw?v7n z7Lpkv_7=6Yh|_V#1FQ^k`+Ny=SVUM>o82W2eOeD-TwrI)P52Hqx)wB{=;g`qRmgSd zu-whlb37076g%QPvhmsIM0qO4UZf55ByZ5!P=ga%t_kNFK4!?*W9L`wh`mLZ{;<C@>sto{Bxci`1#4?>eCF4I>HRl_Zma1qq{;f(`-2~Bsz2bW zcPrd(>xBaqOaZurYtycS^W1KXuU=ZL>(>(;opG17eC)(n4lC$GqQ6L`9!kcX3)qUy zirPQ(`+d+4_WSB{d34se(Dn`W*!jD++x5&7nz8w+*>8ccnLk?BJ+rR#e|wTE;1FyB z>$H@N`-0(9zHDEnc7b}A^u78(>b%H~O$`e{DbkRT$|=gO@18_AnrSLlD}Y4YKBKID z!9q9ClZ~ynjWA~DE07rl=)+%PoA#v$WjHQ>KZ>9GBn~R_CvB|t-fY^KOh85 z-#@t6Mega%j}g%c7n0t;e?NwbyAXi$wh9V&DMqDP9GY) zo*DG=U}4GGNI!pb_ieUOfb%+kTMG(r@nCQ{I*l#;qp$_=jH?CI{=bpuVFan?^Iy2Q zVO)M;VQOaIi^iwJ#=A-BpT3DC!&xU`6qCv0Gh`>9V0alV57dp^Mfe_lCRQANbWmgNcixe6n)%}~N?VukDRRJn=7&B?5| z$Kt9smE&0lsniG5^|@FI-4tLQN!zcNN%O5*POh?8Tt~EYJGadLekVEKz|k3+0igYd zk6skR4s$RsKKR~B&ZcVREH1ps>l%e|)nu6lO-{@TQT^EfT|uZBnLis~vD4_^Zh(h>Z2*8b(RxDF zTK-Zp=H>I#&;B^fg7;ibsQh{Rp!^|ud8dSLyrnPm|2fit^=;!J9}uDhVT}StV2o*th*q4(==lNZ-E0`z1+hf$g42_fS=9N zNDE;=ONR`ING1cruBb%3P96tuHPvYr-GLNp1*{Y)IxJn_-4`&Q9S$vY1dc~L7OaxP&QoaDmMICvQ{Xb zX1`mMrpj7dcBhe;we|sIAx{4pX38X!71E>d2bVWiz@1Jj zsbss!JA#VxKb=T<4$0<#TQXc@f^voJ2ip~b83C{BgvGgNu-T(EjICAvcwgk%?xfXH_i!g!2IYa@9EC z7eg@)H+n%<5Y&Zv^yrbp9`Hu2@glxy@Cca3;lICf=c1ncZiVozaAAW`uXbajqP{tY zuyy}3FwQUF8rN*54qHCnMbm8i4%DKr6TX8nmnug8|Mg%e&=F6OlR&pRt>7AtHQmenr8$F_D<{}@PTM>9h1J}`_GtP?eC=HoI--~pmdLkl~k&?l4(qE1wWnjwKe za$fsf^Yz1UcMfubM_hkqDX-e5VyX07jZFl$SF|$#2WMOoaww%dx!!8GqM3qv8o0fP z2;tYnd`3-{+r|UZo9fJ)4GALA!K0DcQxE^Bh_!Uf;4qK1WQ({LshjBirYsj#{v)8k z7GSA&U@Ul~Ej%k$Pt;=j7kxers&Za$zE*WBo~-J4Hlzq!Jh!o0_twO8GLiTv#))FF7BYTLUwtWVc~d z&0~|_u;K{;3@~iBMVswd2L2&O*~BN(!GJ{j3wXwZ>fb?jD$~EN0;4hj3rYOo-X5LS z1i2At1;_gcxctqL`h))ANI$)Ki_e`aC*`No(Zus4)iy4H&9F-U7Kk(Nmr|%@f?(1P zkfKS#Om4>B*?-+n_k(^@Jw@FbtGg9U6XHx)W7m=U`e6B7N=Nt=_+FBqNrp;Y?)ql@ zcw*MiUbWhn;CIFxbi-mLIL^y=x9XH;3;$8Dnxyci)O`qI@`~oM`^gNyzyd;!g3%3% zBfC}vzXoZ0B7mCXZvrKs8RX(u?+chu^z$<-G@@KHzJz@L^qj9Jrol5qjplEb)%X9h ztS$jMy)il7e>eb%RJ&*V0DEQ&_A)CA!mq&0qVM|-ob9Y%`z8tv_%wq6!^!r0c@gG+ z=OwS-BYzB6bDz!IuE7D+Hzl&_H*j^fR3I&2=ukmEGxL}ra}CbkDan7gLg;P!lIxO2 zG?@+hYPi_@y_zS%3c$HFVOK?lyb4t zOMA$QWA4#|V{$)CR#|kMF@i@uGXxc5vVb9=JI8x}4lYKYf1HO-Y*sIDtBg}3Xu@M2 zxbc3evm_yvI*AUgumW% z$khQ(%4r3`hYuvywp|dssDuXA%*+g{*ay)G34w3tiKElv<#auGj?1Kri!^O-eZwm4 z$LeC28hXzGiOFH2XUxk}D)WrD(+;NYF=p-hsfqK#w#5Za_10n?zHj-ShmmbzeVw#R zDLyC_;FUejaO}h`Lj^i_&0hl8M)-Jd&cnt`ddIiU;tzd;AvyfwY!3X&Qm=c9{^g|K z=fCt#f<$Ej8JL>8bIPW(J5!Q1oBSWEKTSK`VfBu}{lGP+e^{Ixu-oC}K#g#pgpmV8j^wzNz0FR35?rs)7{gUP z&i|NDln~bqpmLn8N*I)>&H9G0iPpZ7N4|IM?FU>#`9A@x5$zCA?bY_-BXDI*23$Fc zBzi!62Clu7+8@5=4L*$Je9bz_dU|_zraZ7M^6E9n{vkxa^>x4mmJa9{-t|UppJN*nhQJEn@rYZ~aH`z3|B3bvFJ zh~CR(Z`b7uLdw0#nlSh16T4Cy6ZCiK9!hW!HC=HiUc1jF0~igc*MN+KbFMbM_3;7u zh6zP&H#rlIK9C>YXZl}Bm!M$nU+c6>EYI5^?WK9v!f8*)#IbHCXW(3#kL zhI9fGr6~qU%wODUfpHI>PZbaVovqDi8&Ute^L>R4JK!{|QCvHiBb|CTLT-QbS~Op` zxv@H3)SD#tbsC9@TziIE$DKD>TCejpE3mV(b2|zsITsE-vz0q`yet+B@#F23gRhJYHoQG!re|K*}iXXDKnj!Nhb zwEJw=5o7&AR+d~rdEmLNN=HJUzH#?in$~lkHY#o-qC)MWJ2^@T{?Fb|=gZkn*WTtc zYzb~8HE4*rWiwvfVN;l9Kv`(eii1%tP~t+a>p9?qv6qdZ+(VcpWSC_;LRuz4x63hS#-knq(F45}gDo z)ADs3#&(+wBEl+I-w}Ad!KsStc;~D6mc4raH_5--+@IoJs9M}Z?gy&pDE~b6VdWzM ztl~D*ethj69f0kLo4j{7j#Y^+RA&KUN<6TKp;G(lV zCpT4f2Uyr*@WTqyf4AcwV~hRwb$_46)AfgHLm-hxA=S3u9luD4Gwu-U6;?V<<5BHk z3C|}sJSsb`-YvjGk;X@w9 zYa1nWJ{|Pj^AUfQF5Xp&81x{kH#ePE8GSp+8(2kJgZ z&h1eE#2h}CQi!>q^GLzXP|P*v2?LBh#{dW>&Uz~xfy|R9wH(f^$pSfWJR&#mzjza4 z^Gl_~&K?)YrDE%wuHFBz-*QcIhlK9d=S4b`m>Xa8yB7Ab-o8%fxr`SQ9!vP$+dlzi zTbVh`FMFfeCF|iNK~D$pZ@FdmXBsG5(u7Hkd=_y5%Vq8+kviEEIdtCVS3Fn(C)S}X z8iY;TqeT{Z+5#a0N82~^&!hGGcGJCa0^k2K&rv3lN(#AW-T8LPSG!VF>nZHwYmjCt z`gSS+mvDZy*teG4SinxHcjju8H?z9Kwbp?F=8NAz)TWHjVp2*v!4ny?k$aAVRTipb z^)oi}586;~MnHF0C|qRYxW6~sO1d>tSYq4H)%nbFf-K$R$gznQ z71xTEP9NoO9qKP^xm~6Y!qPS7oC3z=CUZ>RJS78NC~){CUEmX5ekB@iXP|WXA8uH)a7b?RT~L(u2aoH9h;7=U%Wp zevHE!mKgJmg1=kFOQ~}2L-28D#NWz~jK?f*fV=}fB5(a@o43-oFEaa_pZsr`K!54u z+g^YCFQKs0(jr}ao0h(cg&>kqfQdUQSNYSFq5CZEZn-IE&7j$Lt9a7SZIESC>39ra{w;- zTZ!Dz?;y{(2jn=@2>6YnT%my|osk>`Zw+&_57M#Y&ZvFrA>GODG_6wN&<}&4D270w zu{bVJEWSg2qJYeX?um6=JQ))6`$fL)>!LFoCxLuue+mcpCBlW+)d<%K0U#g-f58iAR!T)xq$XoJ28AyIdfH!YtTpGe82$KRM5OEBYGTdc{dIKc8csYeYHMU_PKLr7I3a*ZoF6fuFnk@HGyv^{4qkD(mRZ zDj?k4pfez%&cpX&x>@RbB%@7%$9Dg#^ox4W-$ZyZOJ7LuO$yy7^Y``nfX9BiCC9*< z-w>Jd1B<>f__{>l?UTE_6pvmTJkn}kh$k&1;{KFl(2T8JW~Cjv z$weVd&1zK017KDP%E*+zfVgN&p-8OS{fUsWaMb|oQM-@GMt8qkQoF8=?W#>NH+8tJ zKS3rYCXdCxD?Qh*aH77sTxTV)hz4SeK6EozwWLUxU))vb(C(am#s=c%;h{1OB#vhn zS1SPgj)vu!HaHNyOXk0H^O{cRQ|eRA_WHAM6dm#=~*k7$=gD5I-IWFt!Wu)B%lc<)z(*1?YsvC#T_q$q|e?oKVYv5Ja zL~vspV;UIX-5oD`sguAKBwEuFk?b12mP6I};tun$(8rGot>K54iM>#R4d6cp=Hp;* z+k*IZP>1VQx+v*OU z232v7+siK2rpz$!*~x}u9ea#jTW^z;&$&c{bRu{!`da!<$fE7X#V4JQpLZOfuOsfk z&u|-;rEzroO0z9*_32kX)^u=%fX+S!G5PE3S8HOKehyEGOUieVD2NMRk~9GF#Tj1A zU`H!>0g^BvW1;+2nS7Kt(YaZFjqQcmBEsX${Ud1tYl(IppQOf~Y--7*11a$N#--7S`!MNYQ4p;9+KC5>npWOZa8;i}MfPrrPGD z^DoIEPx6Pw{yJsAz3$Jk(s;7P%ayBF6q*k9#gaswXnY|k>#F#)q;KDfrTIcF+B~Sc zQ<{c??{f_v=6YV4MIkJc1Ei>^Nhw#gR0e8&{R*!cxdXa#v97ch1{=L?p? zNW){XXl@^gXV-fA?De;SWT3D#`-eyq;K%)4!8kKn^JOcFQHZY8sM&hZYVlTye%&W; zuajuECy6X$1nQ~aKC1v7LgOpoF3-*Pxk7vI%PhsSzZa!x1*(qlloHwsRVQ#hJGOmz z5eR?rJUo<(zz>f!PIAd??N!BE)<)1aIeqYjtSr^XuQDYo>&xA$A6UYTkmd%l6Pz#Z zZ@wddZlZ*hnOOfJ6RG3Ud@%Hz4TKaIO~_Tp1= z0$Fw*O2b;}PwS2Is7-qYSc^0ZumSnE)H~|z6-Tr087e{2ZShGZFEWm3&o?Iq%|MUv z_d#N`9_TPx*)~G{rn`XVSFI(6jC+sxuobs7=`$OZu2JgfV3xd$o>Ji!E&QW`Gy^$U zI&BnCBoL!@QFWm&HfzdbFzyoTxoN!r*F-3 zMcflEvce*%=))mtVX`#c_q3kYgt5%*;sCk)w#5YA@k^2t0F!UO_$rMLrB+c0!DDP zcK$#IS2*xJ#(I_L34I-R9w=-%pF?1AsuEPD)?8j)wNF%JX`c2W;Aq^jvUy|IgN@#V z`qeN9Ubjh(!pAU6TsdvFCLFQas&+x)fj&kVv1H2`9_L%uVqKcTS zmTrQmFVF0?;7g~66$+MF4GFwZEPx8oLUm?&ANL7=^=YVlTFg=n6WX?ciA}kTomhH3 z3W^G^C?HcN(R`!@1)cFwkN|Tgl1bgk7=r>SygIY4>2!(xuZxd~it=*va@11PX@Iwq zA0C_9*pPYu4Oyh=J543kAA3L?6p);(K>C7#ff{^AT|<4vgHB#Onu;4z%rYuP!~2Ok zm|O^Oq2RE;0N}7zy4exvJ|iP>+1A}J2%oF_4B`&A97o=nH;^~FeA}wgA=j=pOG2`2 zD@pALYX_s8#&{;GyrW#DTozwOF>!DsLq)*ud;WmF%Z-^o_HWv`1(By;&~;kK&< zzo3dGmtz1~s;d7fn0%tXT_+1%qP42?@H;WvoELalc<1}tqMQ6FY@TO}K}_lofbXmq zQrKowrWdln?0baIKw&YevOQD7?b3D3l_FlG5Sj~6)}hyCL@@zfHsQ0AzW6M-8t&y! zCk%q~LvM$SKNH)-gLw57QH?61D#Qw|sW&=JD%b<0*~)JF`v=OfX!+Ant0XgUlRI6@ zL<2I|+3;#AA2{Jsod{$JEBG}dPFSh#<$qGK79Pm6h_Dm*rr&w)WzDK(q}tk}{qyHf z)jzitV#CSy-)*V-NB`VbHK~zC%|I;eKzdkEfkxhxl<*Hl0%F3h!t$}L4DRyEHCA>p zrB5^zBQL*`1TAD3i^9PzOg?*c$AYFQ0c~1&c8nftn|W_rSq?}TY5b_ot9Fi%?DG*% zL1_A|PjnoXVrOqm4P7Uj9RP^@ zR2m9$JKgdvkJ;RJGlP_OjX%oKNciO zI#8sf!(25=KI^|eyD7fOcGW#YQpTVf^?nrmg+iCaby6CncZ$nehJF;RWW=h9AL0lC z4IFyx{S{fR%~Kq^`uB_HF5M84Lk|^ev$}uS43?FV6+wC!EzcQ3W<8Pi(1#k0aSYx^ zE02=Ub1nvq6u%E!fgme(8U1sJW)BSGcRr{+t$kEA`c5SHK2z4?nOYe57x}#*Y(d%_ z_k?@a*@_<*>par&%`Jcdf)<&5!R6&ef%EHUF+eEvlO4)Q05t6aS^)fIY15ZP+S|%6 z+Gh_;*dB-LWW1;JKT;Drs$hld86Z5@tn#(XT1JM3G%70X!e5g~=(=-A#?tT!&fUI3 zkLrNVgbjhbS(YYwKujmpFg~33xiEI>j?mvj>)*_xg%cK_4av)uSPw1wUdIq^N|TXb zoo&>Mx@NhA7nwd#cCE175_%Z#dIqFuOu=~n(RF7x5c?K#-fj0fPhgO00UHLtL((3JT(EjT8_H0fDu>5H1-iEteeY z5-K1bz2o3e9zkKNzQx_yd#aLdx}j~gP@UENy|AVfO%DZ<0Hk@Vap!sm2t8;iM9=*dEDrFHcusfK-YcLr-;nkv`~7dyI8uo%ph5^U-J4_eS2A1 z>ecrA%>$U&vDbjanS=M;B{P|n_Ifc`S46kaZ+<{#6k`C;$74c<$!DdpS!tishi6{A$vxYWx|lxx2!$}G+a z=YKisDm754Zf##)KChK?^*%_)^m4epX)s+k-n6x~wm(4CXk!kw=idFq@Zc+H19d_M zpk1)zoDO4)5`QY`n*vcYd>3CjERQ6pP@P#+On45{oIy?5l(|By2pS;rc?1>vPoUr= zRo@GnQ~mi$>$;ro2g1*uF>#Iux-xIa4raOL0biR9&;}e?+n$EM5iw?O-pfyIXeK25 zHKyqF5arXqC(Dg|Dgh^Wc1ZTXEhI9s&xtK-S*y>h1X9?)r>%Id6>DK8fApEc(veq| z^VA-O?MP=U$lj81b7zYTiyZhQso>l&8%0jIZ@Ahp*I<~ERg}l|uAe{0uYi5v0+LU# z|L4ya;g6dsWMH5mTrC~qsQAVQgWnyrqOrfxFIDV73-63! zC_$)BO5-H#8W((S1D4lubq!*4cj2Gj=h!Pi z?$L10a)kA^G`eo8pNFU#yqePz(M`K$5YD0x8|4I6R;_Jotcv8fzG-W_1IEG(_$G9j z`L^>`Wv_J3KM?jsKik8ZhDL_*UDb1R>LC(3o+z7wQ7+e$XMB-TrA_l` z&S#T{Kh~vup+z4We9L8=wj8??0~p(nAyy^~;QY&_d&)CK}LV-v*(Q1p1TMX_IE}j1}?U0TPwEG(1Lh-PG=JvN#N!1VXODq>5G#V zX>#{^bZC;Q>tQ^ggiho6ra>;aVB*&u$p6W zjE4XIA>il!U!PZo*uz4gG40?8u8x?z_`-)%N(wnnwly(z)Tk3=D?u-X>~sz|&12|# zcFZ};zf7vQ{wM6Bqs8={Q`7bAz6+t-B!?hR>+ZAlST5AzNZ#HWiNwmn!4&68nnQa} za(10V=C0J2894v^0IDm@C~k8#_R)UBxupBJA@|JtpZ4y)mocruzNHdCcoD=hmYSNY zGu;=yQpV2DqX8S$#zF-G-IJ%@hhfkaqn3I2x9&#bZ9RywaKqr|!Dw-%(J4aynNG%qc^Lkp{XPj!o0R^W-zF`Ii1go}3?*Hm$+&a>N!8ir+X<@Pz}IaktCaKeV3yi$ByweCHXNBvPcG`c z=4hs&G=){#p2FonB!7fYsHu&*>}3tG^-Tm8RcG23LHW6Usq4}(R(P*$6pMm>2t`S5 zJhL@O#F;sSRJJCUGE%%1faPUaScL}v>&mp9!^yRg-tp(|)H0yKV~znX;g67DUa%g%WgfyY1>(tw zFJafXYJQ%q>ibObc@<90th;X^_#)$WhbLmC#&VFN&TUrg{AJO~#m>4oclEr-@$0>+ zL?6D`dFq*}P+1U1q1!A`l<3{djkV{leny@Ln!!(BM)yZNIvlH-QwBMQL z>_i&^=)|$ay2LSPBy5>i@!TDrTUakgx^<1MIrf+*~joD^f$YA>TQX zKx-mTt>*X%ZuYqolm$=u>!|8|XVdt=qkT>vm!Q2QQdc5gz+Tzv_3bOIrZZ(nNk;vpT z%DxsZLxoPOII1uxb5`UU&W+^ee^5dxj`Y(VhHJL&`vS99HBpVOX9$cRxTXG}S$I(c zb1ZQ`OMCe92MW`?e@n^w*pSi`P{bDWmM7eo&-v{}`gmHR1z0xvH7}Nz zmlJLp*dfsfAS269>PxEzvAAYHP&QV16WzHUxAx4?Sv1{ZxI$jdL&6>Ns z_P*KmtMtxL&d*Qw2@>-$Dypv_!Kfr)W!q7k4tFP8q}kyQW_|7HKI!6zKY}%$ZA%B} za5XG_jW^z)Bqo)*d0eMlv~}QDq}4Di<sg%ajLv%p8^q7*H=wbBP10MkS{LWP z>$J6lVM~;VjYzSrX2*Sd`1btldFwT5V((<>SJi;RJ#@RkO(wAwr3VAcM=h(T&QBbx zx~Wae>Ypxsk1IGB5MTf00juL{9)-|4v%-F`4;LGtG7(Ob03=vWcz`a(%r{XL^|?L>Q;*H%GN zLYr3(axJCE42;Rt2$<*~k(%MvDQ6)f9JlUupVJ;w!Ky};YJ+G<1!i8}PUS-@;!f!F z!K`p~ro1y`WpF3Bz}6Pc{Sgs0F{ZnnjonO6+heq`DTf@?f5F&4NLJ)y#o(l!?b*zN zC2DHaZg(n(b5D~=HbV}${u`%b@bzc?11<{C^%%!@TZL55kg zH(E5hXs7)SK5H0&Zo#jmT8U9U)H3t4oGr~atNiZ#UV|87)Lw090^krX0IzMF<9g2q zx+WwwJnXdyZ*+fkPEXJ1numbv-P^}B+}LQ>Gh;3|bC#zd!KY?LlpkVoc;^TB+4W&H zP142tb-?ni?;!QozE?BI#h7ln4PG6Qvr8!RvFvV0aQRXiNk?aMGh8ce%ROToF6E@H zD{Xr}sh`?ZxZ(1+&XF_K!G~c|j#f8dV)bXtb{j+Mdh@=cs@D?2U(#l*)KQrRVJcO& zHN5BZ{4~lZV8X2AQ$eeb$OLg&nMc>p*)Zq{tBUT^lgqt5@4ZL*=E0e+_i6g;%E-R_ z+Mo>m$<>KHh8ej|wn()b_oh0OiIUt=F^pk{1U`3^ADPJAm)OcD>gWnftS8g`hEZ*l-WhEf zs)F28VO~#lR%uE^!(HbZn5%zIdN{}&i|e_81}C`2uY{aryt}y3Q_UA${E@Q=@4be- zbnm^$IVQA|qFFBO?nEx2m^4$j@IH)reh@G#dtblumdfZzy*kiiWIWmm%V{JwgoKrd zer*i-8%eeavM&_1NA^#(M^vMwLNQH-HkeWLr>DfpRy}?A4?(q}0ug^7qyNL7No#*y z@WqEEGH@DCJvHn?2zJd)&O`f}MkibKCvm-B^PTaSw=>i>tGGAY@`fj>h+qn#WrZyN;*z@b{Yc5RX1f_KlB?yh9OV+}9nM`y zOgSW<%)13xsd%2opQ(5LJ~HctBsNzlXPZhgm6XvIlZp&yJ>JYBecFOu6_1uJd#6jz zy0jp|z|;q%8QRzB(H8}DSVPv*QLkO*#*PU9tjTLg!#^EV7Xb;D009$D)*=9qULJ`U zFt5#B@9dIz^x53)xZJ8s^gTWLb+i=w-iV5;Q%~A<^oe~s$7{WSH<8!q`k_CMu)%-< zf*4`x5hGJtu1+T4Sf4ykWxA@}BB)?exN3^pKKj|ZR*Kvclm4{#Xdd+Fbg-?Vtp;9-XTF)oWZ#$ch4S^>G zEl+k2<2I$KhQYetr#-+%xOBXzC$ar&qVzfwsKBcxKE35kTR7>w14oV{_Ezm0Hy^mq zH=k!;J)vJN58EmWuOz5CnXw#JzpR07QTEAq7gu_45Bg-G4*gXlPa7SV@90~dkMnOw z{|AG%xv)O&kes#XXK)md|1M2YJnk^Td{E-KV{)BUD>RHBG$zJ^trW~iX8D@!e<250 z%P+~2O7SZs%E+c0*#RmC7F!m5;aPOk=(O5&R=$N_pm@_+i5sCzjFaseK?> zCVaB9nzs1@)1bXm#|-*@)xm7E+j7|FD?EL^Zv0@!>AgOvRwx_5R@-pZPn{uzD+yzQ&K)QAKSZ(z#&Gm+mNf!bML>dk_itGbR| z@SG2|dKDml7CNDCTG8Bl`vbt?JFRE_Qx!PTT`~_XcjFSeIpQ(mAoF^D|n>U$SR>ZSge$!o<#*!^co`?fyZHw<|}NxSaF zX`c6X4p3!{HZL>o2jg9EjWIVLm?dXVl&smUMv5FU&o1v6B}qm~^mBGjnLAbdl=I9O@y7i{?y@t2$Rz^Pr=sOmFAvD#*$2b|h-m&wPQ{xz>oA&B1=_)&{(%)`kNHe)>QQw4cC zB#BbV3>K-53El_a@P_S%7qh3rNV5}DE9>8Yo?#XMEL%2XrP%AAIOeHuZss*M8@kV#_Z*D*t{b}5hW1XL@8B|DMe94Rr}enD;h-hn zooB<)HvV0#j-H^Mp zXVdTXJ?yXhF9|AI`m~3Qsy);e^-#0>@cq3yu9=q19k*xWc5d@2qGp|!Hm-w;ocf8-j#{9jLOK<4iPAjaNuRp)<0(8kUjf0jF^0Vv^yjW0X_b^yZDU4m+Q4s#~=Ph0az zD7gYKcUE5VXfS#rUd7-9|E*-nBL+2e3ix!ymipJW@0q~)=udQnc;U;&g&x9LWbu(H z(yXsh$gY87W_PZTJ*A9nyE~WiLjf86CC;17$*m&I;Me_}$xt=v@fUc^8|f|A!%Jk^ zdaOH>7U|T}5q}8ZlHRoh=i0j+@WY?bMt;~PbokQL0w?{^2fh<;c)VxmxgmMlylS zyA7)>MBfK@&UO_trcArz#9K!8b0X%}&PY`{isw96xcLb?Y^ds$u-z2d#7*vN^&)0r zF|n&7LriH4Zl8YCVJkYUjLpU&!34A-p4a?CRyB`r!Xw0mZqJuc|TJ95mH@6G| zJ%f6VJBXtXryDzXKuKJ+R?P3p_|YSIGrYo+HG^VD_!Y?n6)sODHh#;@2&A8#)OTt3 zOSm4!Oq9aEU43~t*L(h@3Z~ZH>Dm8aG4Xwu$_Rxm@7G$(VWZ45IB7_?NorbML&0(pk{>*Vo2Ectmd^T(HT{O}+S#TpZ&47Pi<)!AlNY)*Y_b*H#M8?IQ?l zOORi`yhL4x*61!YwPr=@onr|M?Yqyd`>7v1W_Ej%AkTI!?`^B^H$SddD@wSmk2*BG zLNBe9KP2R2+Ii4B(I2YqWlE}^44z7VyLQ9;<~#Jc^Ss4xmo?Wlh=wGuyvC}pV_0Q;1noa$L`~POf+&m-P;s78TNWl zcVKFm@nCAa{;=Zk*BhwGp)VYkb=~_ev0>J2dX({Etj_;f?GHM4J;G@PfC@02+m2ef zbexeegq;G0U&C_*=8!oD<6IvOXp|86=`h=taN<_P+GFk?aUBw zZEpett${H$c)u}hb&z2?%6n#crU-+EoE;w^-^EaEZp2di`s}LC)U_Tn)|vOwrTA%a z1kK=PR3vyNfVttZu!gDRx#xf*zi$^5`M#LI9({0lv!yLu|8r;Rm%st+ zcE@;AX>j{hjPB`RM?{|Cn@t0X9@0~%qz%HD%2n#pabZJ>z)iv<=hGL2=agTvcHlCt znOFYK6sp@?_=Re|3DkSgdnF<_K9ph$A(T6%qjv%a9y#BelX@h7xURh&5ns1_&HC+R zg*dpXz!S{8R<*VkZg#p6RWvf!QBxKU~@HQkEs4{SIKi6 zy?+?*XHB1cuvhK0MBR!?YF)T_b!|5H6C34+L&}9HM2y~tza^jMsD&5xc#*tFHR(sKY zByW?zU1Qh55ZhN2$R&+t-xnSprIl9p#m_!;5>>}1J@M{ms54G9oN-II*_bTxS-^7d z-mPnZ6x~b>Q<57vcM-B#=(vsr5Yp7|*3HjFOskGaog8qvuQU2OoX@#K>peE+!F>*7<&#HtFWngZ%ciYeos~0%?jc0;yoSu%D8S(7~}#xk}ZOBq|1$$svs z_kEt{Jje4o=eo{0*EueK%r(RPyMN#NyL>*M?|uIUKfdpS-i;i=zTP)(y#m$E?hJ~q zl#>}~Vf8IBE<|8z{*XHVzqRC`dWf9rsa-9B0xx>;-S7^~^@Vu;kEdCZ+$Fu!ojNxdk z_0jCYnI2c~t=idYq(gNp)((QJyOx9#J`S6yRo!Oi?L1G4i?c8DT&24|`4ThGIE)&3 zy?HXkYb|Qw0aEEx{vFvb9>6yp=vdua?K8p`e1$F+6>n$rv`vaD4naNrN&?dd><2ox zN8dWw){0$ojIXp>3YbC)7NbAflbB_g5wY)l%R@1J8+)Au5{2a6 zG}5Mc7a3E%Iw&`mJ$^k_UY4`f{*~zI59XmuJ5N_dcgbFx6GQcknPCn7oiP~Wd`etQ zaR24Y5wPqG#F0qXJDN1eju0LZd=?$dYROl~y?)AOEOHBFa`R_UE10LWL}KqOx{rQ% zYUkN(g+6!Rc9<*IziELCeJRQC(!6axsn9c`#DbS4XP+?G+8o1+%~LnE*=LB-zcsn` z#K+HH#YktUOiY)zp@LS>?tBVw(|6Tiua-FV(>b29Ni!6^FRiUfq-VvwUQQoL_&~8O zp78lH1d*Xy-!}91?aTshgh;sy+n#;TJ2qDMI09;J>!nb3TLD%DRsm6kzt#5-&Ft@c zuqS?3VI||uqfqd22Fa>7>3cboo|W9D@Idzk8>-^h0lk!C0O8EhUleaGG(^Ysd$!Mp zz% z9|<{2I%t#_@f_pVe?OZ&U9#rkchL}W1&$|LqkaBZb@QT~qw4O43konhuG5RrS&(3# zE7Kk~?Uaa_W>Gf0@(E<_tQ}s$jNn#rulRl1{kCM$(kpRzW8nFBzLv*w+irvXK%5I4 z71({zVNxd%s6-Y zAk2|J^EqW!+|jOpyIK7Hs~Ck=3Cq{E!`pAV8Y;Cx@~{?vllt-wjUrS-);1~7B<%0k z;~!w>Iwhesju`kc;ACPJlR5|0D17)rYCPQTHfBX;T8-qMWw-P=nfKm`C8#m6puJYK zXcxpnn_9nTDdT*v4mWOw7Q2)riRpgB>R>2eb>S-RjJP*)b1^q_?8{E?*fxCHkE*7$ z_i7kdU&xe%j3*dHc}O>jR)T^WU00$~1jQ}J=vn)#4??(^`W`k0^u9B#yvr1Zw2zr1 zFNIma8?#kL3ulZ@e)Jb}1`f>d=}ZQbv8TDfJ5zO~i$tiqE;T3hxOa=gl-U?8DJoRe zvj)9*oNw*vJKWJMS$(3<721}U+rE`KhvK_3ir332lz|(%w-!UnbFz(k5BAW(^G+ES9#PQ<5)Lm$>oC2a)s7vg zABTSWq*-72kXU!9?3VJ%HZ^GLI3;PC*dCPGqF_jijAy6JfH?B$09d;jf&m|rSh0$;aj@Ms|M zoJ*ACjw5esJIh4i$^hltYtt@{pQ_zskt`KZSXp@d0{%{AvzkN=>N!m%tdZn4{i+p9 z-%1{Vxl~Wn3%hAVe7xY$KXyyCDT=ePsBqviyESH+S{k9Oe+j~jL2WpA^7}CY*FxIN z$~$i$<<~M)T{x7lC%-NM*<47suoD#z6-0m&$!G}N||)d#HmQeA*^D;dlBLo#&s3&(-!Wdo}3D|T+`CLS+z&0R4^5$I^`lwe9@slN{<5JVHSLkD3Ti0DYDgWzwtPbLuV^Av|Up%O-SrK zO@EC%L_M!X#W`B(Q-PIVB5Cha)vNv?#;>R63-Dw`A~(JkL=V1y5<-N|8Xd3ljU5&h z6|^Se7r8MrTus71Zkb1PoBuI#<7UU6 zVD7JcXZp6rDZ$C-GLQ0_iivsuOYMi|y684UL82zfCRK(MYB%?5F<4!rMDlfCZh1>BnB;k6Wk`d7) zt!xixWBhIdAkg1&^*Uu%mo(8!0WDtyd$`s#Cqa zd%xGmGBVJBM@L%sG`YFUZLXZnTDqimGOWaL>Su%&xMgu|u*OzrE+|>dP|X|cxAoMZ zd>?l|X?XEpPhs!hS1~cCq&$&Md0lm%s=||&mr6Q6)`M*RoTNfLO_V})?sw6J1(%!@ z#KYCuUgvyWnVC2N;~7!=wLIB6Nwt?CK8m{3E~NURDYHigzidIu!!m%I>WTO&}Q^9S`j#RaA<476{34?QxB`kjAy{R{qOu=Us+##H{RUjO1>&EsNMuih(^ zo=k4(uSrtXuq^UudO+Z90bG8BvXZdx!RO&sKBl)a%jj?=1_rFuoC#xNtFN4~8=kk? z?que@+cBEl#u6_3Duj0EH~B8y9h%(RK(TgJ%1b>=)9^V~c1&f&AH$ew@8Z$fjy|c_ z#Y%#^Y3;}Na-U4>N5yR2T}10LZ)L==R)3I_KAJ)J3yMnFt?Cb|#Ov#~@s!rcnZxKN zsKa=ZFvx77imD+8WQLtLO&(ckrOrd#?~lFR`bavtmU-*)Wj{h7+~%72?HTVpyCg2| z;rN%6et@7LnMBVcHON5eRfEY@jq?XlqYP{MHoA46nS0ZYi0erp3`NGzPe$#b>=tGM zTWWbZuJnK+i`OB3s!O0pEZqH56ljN}hv0B7pGTyV=WHx2CnI~{o_k&1deF9sHMS}f zf{qV$XlhtgMnl3wLW!UKxE%#@)eUZxJf2~o^PFTtx9dH_Znz_S-g4i`Fq>4Pr8SY5 zmsp5{kmp)r^;fjb`=y-<_dP{7y>7e6p?pa%Fq;v&V^iIl4-Rb(G%LYsI9s-`2_n*i zd+SkLnWVL~^w6~ospO=nOH9(=r+q#29)2VC^k-FbVVlh33SBzmqfaRpDoT|*qyTPF zs9L)Cg88CH6-ZD=J{NmSzZmevD%$bGI!}#n9-Xz@qSWpr;ViBZZ}5S{^Pqt9V0dV9 z;2HFdDhdl^XXjsKu!+V(wX7F!^EG@NJ7T`!(K=YmybiAmCqR>w&<^nsW{~CaRq01Z zwgUSRGVd{E>Kc%M5lY0pE0rb#LpqTAti(e4Qs&pz1TX{Ciw**NaWmp-rR9AK#~B0`+%%q-LUomk~TfG2a;w**p& zKF)FYDv%Hm8+Jq-GmH28-@NVUNIVKky69WB;Vd@6Q2>=BMz=HP{+7r6Q_K#0*5INR z%*>RsU=bOKx6pMH2ouBUEgYpGh-n|L-eH~}jPiKtwxx5LKVpNh%Vctm zaQof8irb_yf{vQz{bPEJFsT1B>aTQxleSywefvlmL+qTY(=8}MCbqh!X6FgbX=0Td zlw#}+i68(h?O|(`u40YHII~+>f@!&IxedyvvjHek${N(kC|<|`TPw=q zF^K(<2TW)2rHLaPWVEn*v_Blj%0-7P8-(=7vg;y*n2kZEcTex4Yid0ofl-Y?85g9c zM`ssA6BXO=;?V*@gEg0?B%;I|`rkYsFtSugoAgXgio#2&Vs^D>I>yz%Bj(y3GRAC< z3Z)qj{ec*qP4Ds?pOi#suJj~=2-xEjUcN|&4k=0Mna|15CjAJEj?7LUJk+JxH2erg zPcfCLk+isLmboLJ>SG-njoY-aHEinP+&%~KNDSaiuL^p~> znN&l5V)XOj2?S<2r7gPhs~GXcQJF4=5ahIhF;5%?D#g_Y^35@j1~F)TWHb5o$6>tt z(hEOlvp_z*Va0bnh<8-KL`{M|fT}Mib zinqOP={d8rx0)Q6sPt)~Q2=3B-q;u=)j$adPRCb4|zFVq9@=>WMhn?(jhQBA%?7n;Kh&HxnZF@jzv{MH zv3|ii5C-~eeh)S(OF38Ly5;yeCLdZmD*JR{MEjc9id@H%Co5GFZMe(%VTjkt`p0Hb z+iCtmHUt5623nw$+{2YNZMQ{pyD?Jz#LDpo8a71rO_`x?>E7ZN<$Ic!uTem>XE?PD z$^|r=v^ulvjvp1KGdnIi3C?-s#l0yVW2zb;%_>zhxiHT`0*a>h7cw2|5zJ^YWXNBL zI+?WR6~V{ka+%{znX95)^ZC(K+pW8PJinM4MKSxRCL~S|fQN{fT~iH!g)YTZH+i{! z5&S{~zY{Z;Slf{;mG@gm5n^>$)Ey09+?h@VK3|>m%;&l#$JC?|O`noNngH0oJiw9V zb*S&MeP9y=)|Co;o(Q*h8KCbjg1qb%dJQv_Q~XqXQnJC}SrrOAadxNJ5fk>TSIbP; zPUeb;@p~vBk-gTJ*gBphKU27RwQ&)deYSMze(h(Srx$t0u4#ePOab)eN2lE+q+IIZ*-?;ghDU4i6I5k{o z8Ln6PKP#0xs?&6Z(5CuNpzH_%qXsqBDH9d-(|48Kl6x_vf;6p@+MfdFXS~{6X>AQ3 z$%#d5;puEj8%n($HTJw_J*ByiQmvYjn?Jr%;$7BOA+o$%l$lg2I5wu?C3)-Xc35ePh&a8QkNnsS+|fQjsy|E1(Gn(rJ=VI491F*Lm8vf9&RvI!T*- zcKD(i4zvQJ5EpKya!J7 zjH_+LDpYN{xowG|#}}*i)o|2-R_v>0akrh7f<6;9ZSmD{IgYrhDSpZuBZblH({$uz zaP_0+buFRBTFtwWsBDoSk#9lUl4Ag+-&Afm@e_xTbd($`Ut8^H5VaPw{Bcoh=9?e= zB9~~;TL(wSdIQg0p8 zRtBu;+PL9=SAzl+MI#D0+?k&Tq4V)WuP(cVAUWbfc_Uq~w;z+o2xvi|&D>%~ZZ70~ zZLTW7h9$0G2$US4R52Z<>tp)-0(FCC8INO%6UyLR%4H(7r;jC^ z|5+283F;nH_!uGCx{#=y%qvlAYPRRWODzgw7|T)BrqB|IIqR_M1gWV1cK5?$H0UfQ zyK#@B`WsxRLcel&<=s_g+P%4RBZZ;m7&`0j$@g5#Aor~Q2dHecy&67$mIITQo43v& zU8#SMi4tG;sfGYV+MB66m8OPG{94()V^^XwvCLH z*AV2?Ufr zG4L29>RLO&Gc1{AA&%ozmS&)Piu(V7mhCuT!?NOc{il)CyTOH*AX8RF%ecI3&I#wD zFSGj6M8{5~mWZ=8tCh9mF+2Fd+7wTw>mk(=Lu>PIN*-DesCxICV9;zkAqLEL62HeS zewbF`d!8Ni<)*U3BYF_Rx7xzEhRc_Y;-oRC;o~f$gCA7m(!QV-AWR!X67bpYj|%R| ze1AOk)zGx~%5Xj|HzlC|I02u_Tv&i=G&LxIVgfh!Q3Y)UQS*HXPDg4ZXLHe;W>jj? zi7))WKRCDg@AdT}fwv~4H;J?~nkgZU1U#I`bb8kgt19?3#0o{8LE3SjP|2c8h&ZWg z`kFREDwp4sSgG^YRnP|5$$;kjtGbqo?eUvP)O_W>vsV@5eI! zhzrHa>#NSL_iOQzSb?0a+W*1kFyj-`o2i43$xmA;m6bq|~^3+K}GBS&NGK9iBlY!Hd9H4E_OG`pmr zp$g|jh^eK#-q~lEifr_PrH za+Tw{g)Q=dDb1j`L(j5dc9%$u0llEauU3_(MpLs0fkmxC2H4eRp&!1B?gnSU+PDIL z9Gw8A%_uV!XOU{DXO{IQH{ommDX%k>_HyCvnB^Gdt}9}R^4&?I2};ZS7=O70OqM}? z&l86U187W-_~VFz_+!O}9lH@sdU}is%aS+?Mp~h+DDTCN8zgEwX9rELD2`(8e}O;RV5@3=)Dejtxiz0psaI^w6QzLBb=&9MvkI6}@_Mo)Bl!MLm;j}iCyhRfn}vHP-9PN!#~Te-_g z?Cd$vBajF$0!a9LR`9yWM~M7r*w^Y4`7z5^Q&A8YTWE7$F6CgiXVm-!OvPfL{h9S8 zz5xqg%l(P8o!y>o4-E>IndXxn-yhuHe-LG7GqG)aOT^N&wl00gE%GCtfh$Ns|LO`1 ztQtMj9%^!xBOp{u>lBSknlHJ|GDRV-GQ)8 zq*8KP_2AiaDuW9_IdWGHrMmr*U}HM;H#J34UBBCP$O7b_f|GEOagzd*9;Yo*vpL|*1)(i|KQs7uz1)N*aHA3dV(RG!+Qc@FUDcr?75Bef`UxRuq*{3_4~&FAzu8YnoH@+`HW7b* z_xIOYA%u@V+VdRRfcH%&gnT-N?C62%?w?F@P4-|9`ST4NLQnk&>F&n!0mIo4?o^tC^~ z>3a&yDH}GIb?_RWASR^XMowkcJym$nPz^Kc6Z=OPEZ>%)T0L@! Pz>kuGntYKg?CHM&ExxWq literal 0 HcmV?d00001 diff --git a/docs/assets/deployment/hf-inference-endpoints-new-endpoint.png b/docs/assets/deployment/hf-inference-endpoints-new-endpoint.png new file mode 100644 index 0000000000000000000000000000000000000000..2ce2e6ad8d78bbdb695d8539ec545a92a10fd80d GIT binary patch literal 52361 zcma&MbzIa<*EkHKqSBx=tTa+fNW+5C4T3Z*-QA6dfKn^njet_p&C=Z{-LQn@F16&o z>vdnh`+45?{pa_c&+N>eIa6oOIdf*td{I}G!+S>g3O#?L_Mo8lKz=r`ZAJ+u*y`2wD(A#6o-v{->kEu^(m&uyiU8| zs-XA%&3Ibyn6UTuiYUg!;?+A_{2xyxD;U*CNe;#-QxZq$Jse(fjvCJYS5XB)5c48P*Q-(Z|DhiD~ZV`x0{J^k+c zj>#CWDi&AdiE9$Eb%{#?aq;()Bv3{pIaiMw#>>*vt7=?~*Ufa}Lu3|`G$PTYLJFVE zp7sP0-fV5FlD-uqGo!M>+9_l|pSP9RwYXK`b5u_lA1Ckmp?m#ISdt@U`=l6^vxi(| z5|rcNj^Ho*lUDte+;W85S*@BS;#HMM4}j5@D2~t`>%)ebX-*q4AKwc)IGg`3*_S(A znV8Hy=3By5twA8!0fHx?bA%fn3aZjRP!-c=A$eP@(}?6Rr#`NP`)>@H;<5WK7Bj!9 z#8RYQS~W)x@9pHAS_Xnx&!MlXH-*^(l|?ms_GkO4ad5AS`e+8<3{-BGs@x<55 zK^Ll`_=Z`~tV6cwvqqm8W|cDI(+izF^KX0Axv4=nSxtzLxhj%0Gh=Cq_Vg4AvizYJ z7t$Tar+~hXo>7dMNKBfz07IlC$6F1_TsE&vj`*JbA3la^={tY|%BV#J_x$Ta*I*6R zpYejBLaNySQEmYV0xs1gkTK(ERHWwBmlSKbRDst0n*ST2wT~Db51#2xcUXLV)KhMB zZgisho8%!<@-gp+Cqid@SPaBBevBuSIL|tA2)}#?aLEh?S3cR2y5A?%9{hqW{QEa% z;pOG4_tl-6pWWY(4}@Gsf|v$#v7fFzwdg^ed?qNx==+LQT8&yg#X9f~$4Wa1lI7uM zhys}?;4(fYl=}Qe+F%Leva{{P#pgWqH-+^kEtYFQ1B)0_EfFn|6XC*BZ{{yJmQtcK%%K_diCkNAyTgO{j&NLglnp~ApkdNkJE8Mr=D@fCJ)Q$F&{{`18uJ#zi& z`JzT9E2|}WFT{+`qcA)xd_OmmcUEFs4rae#(%uv*F153B|22@#p*vjPEb1rLobUmo z7p8sJA50w4Sbh`EISnRSLszwH?WGO}p1^wxsjNKv?VE)6R_~6qHh=A%$^1@XNlz|b zie4I=+T(SVwW5u}f|Apds?obpce;7Gc|DT6y3=S|S&_aB)FHGQ_IA2I#_$0gA0CP$ zvT(%lJ3!t@MWNfQ8q;pIC`ybS0-Wd70WTuhfe(?}x!udf1koYFKSSU`Sb}ShZY5tn zc@*~&pE2ai#Eo*}n579@>eKBIdi4j%q5Qej4i608yqLqg2=$wIsQR&HjkxiF(}xz9 zbB_;Xxm2xKIYbZX=w5#c*=1n<%wz@;VsPleek$D+N6z?EL57#`X}+Wt{b?Mr1=G7& zhc`ds0tZaD7~L>!rH)<_#HnuaBcArEdun0xzb&7LW2J)Sn%Yo{V2(-_=RVqf;PFa7 zTsyyILgb9m?`c~|eF1QS7T3g%oP@8Jnp>c+m%hq^gC@bWrpm1Bd1;Sbl`Jx|()3W5 z?1G4812;2FwBrT!$NtR+0nb2G;EySr?|o>3D52pJJuPtOhm@UPP1H^pPo8W(q4@Ou z6aCv)rlRYF>$powG@rbs>Eje?nVY-YQl&@e$6j*LIlVlh%YIo;1R(L3XN(68W^buP zGQt&n3+bmsrnshDr{2HA-ow(1ad_*+j2)LSNbE+}l-QaO5FLrU+meuIA4mo9#H|E>A!#UG{L-X%He(1QDL*#f=ETbsdJv4wZ~{D;pEF%Lr@OLc36 z7SsJoEzro*D0Y3n$&~fBq0oJoWbe`0yKBsAOTQFmnQ6s@(!trxa=koX-P2?%2&t5P zqIRKn#SC$ImOfc+wtYfvc`a65*aA*%gIx|AQpe}|gt~Vp=>F*LpLkSz zD0+D9*Z%R?x7%;tW!_btE}szpu{hR~s;x}R7ts{ml;fLlWt%$5J;puxv$xwW!ieU= z*tsz~CtJQ!*BP>c*QR#OaVL4_aQEW+{Q45l1xqy4CiMGbD!egr54>55BYs2fRv+a3 zI+P+m+$)?XoDa8wvYz~coPz2Rl|sx6MHcTLUMtEQ3Tui=0g%;_Z5KD3hbW^=K-dX^ zt(4Mh3i~;vTdcQS6kbGE^lX~5sJtj=v}|-^^x5b|>at*Qdd=wcs7{7phPOegZggF$ zt%f-~gQ+*(G0nDV29XK#XSXkvE)CN@*B&oFFY}#wG{Zl0>0l$!E}-ea=2)>v>^NUP zUr*&wIhW^E;g#m~!Yk!a!(Y(9>hAGPJ<%s3S_Vc2Hn}A?b?-HyEzw^>VjdPE;i9A> zGotO(wmiJPTaj@)vO(@{o^6q(VAEV1No0&1=80-SEju z!viB}E-E9Z7GA^tW-bp$ue(3nN6hY}p8FmRE^40OU)qH3Z}r&b5S+ra}U@!15?D1UkkoG19K_%?XI@S_Ph2m%N#NE3;)NPf{A zP>L}irJv9-a#+X=aAp&!b(C&GJ$B|R3N-KT4 za7gyoc=fCk5YJ0jOS7BCxJ|2=mPCuMjH-6>%uFnd&T2;#0HL5!WbE_V#Qekzk}?+M z3>VY<&YsLKax9Z9a26VQrb6bTBjvj;$d(Smf6B*pf4cdRVW)m4PF{{n-A~OePW@MY z$wyi3ym!9g-hDr!$COt`rbf7UbPTzIl3v8uz1k1-vbcynSjp_k6mQyE`nA+e?5I$` zb!M?SIL+LkaQIDT>f%+l=8W=Qa@b(~FwxaRU#V>T+ev4QIo6l>;ble$zi8 zw))eI)A~h=DqyiYy*KKeK)w@-j})VPD%N8eS1zR1Gs*J<2etE~?jt)Ksna}yK2E>I z{^A49JYdBeH4 zYa@zDmFXK3IQdDk;jQnlaZ8CyE*0=X^N8boAFKx!4-?cuR+Q*}ud6nJZN4|x`CV>R z&SiY(HCo6|rf8%nSpKczeVHQ=2WYQrUFCGu41}e@1lexcFUkZBp+0vvcwKlpg4d2^ z%NDiYEvAdn^3~Ar(D{35?GKJ+#iexU`Ck1e$DJTk)VsE26aT@BN>N79g;KL0syZz? zI>)b^iTCyy!hA_*%o}DwCwF3Ja@m1b%0fe%}G!p z(Icc~81YM`a=&7{%-LDq&PJ5lud!*e6h1-J&mfwvoWU0Rht|j|+Rvf0YGw0LzrU)y znn%`%&{d=FyVGUOO;sZan!x6B<+-#|>(H@mk$z$C+t$mgBAje8e6R($47%m@y`r}c zHc?ptrM2_4FK;;rYFdx%PwK1ZaqO?SHWv5gTw9pmZ{1IEl3ucDp$I;`C{HvvYzS+} zSsiRT{nK!}3R=zbhqzQC+qdtgu)_)1f*bAv)mjX0CL=Hj3}|~n%{RIy$X=4G1RxDg z_EdH=!AC7^I{zr_CKDiJ65B9^MT&(|-$9#@1!+4`9rJ#UL zHO<{DEuGwLoIPl^JW*)CQx|zXcMJ?troRzW;T_`%y8T&OEnN>?6=e}~XGbnm3uiM+ zE+0pizwKa%`G}yij+P##FMS*xoZLlx#OeRR5JBhvf`Rle|3UGv7pK=%QGY4r>}L5= zfQy@pn;!J+<;$01ZWdM|@1$k_t&aXDPH*Gk;UWSAdV70wdGm2OyIBKygoTBH+`K?u zUQRRyr@OC{hp7*zlRLwI8u>r%NL#v_yV<&U*g8AC{M)XnnX{*dI6eK}j{f`oXPlNk zw*S+Ull#BZLQfF*w*<(;#SQ##+vuudf1x7kwmy~)deXLzXqut>0PzY6i2Z~A|10^Q z9{*QO-T$e{!^`{ss`|f*{=Ze1x^S+W zBuvtE3j=CXG20Y625rWl^h~XgVpqg3xn&y@%R8;wDBE{4N-3pgjiFYLs^-^I`QfPZ z@Hr;BQC5lvxB1`YfJ!ru^sCS#_(h1B1m=IJTJ!Hjv>ra_${(3fO(qge_p9__l_=y# ziqLw9?fOi>J4E$883>Rv?rmcrwfk^0r|lN()wlD|akNRR$d z$I7t$+Efw%#eH9UChB<{PD${hVq+8WXq7#fHFy`Pe$|2KzJm6@2WKJl1m$d z&J-&5LqwgB#{Fu2$UJP@AdP^?SjZ3%2Hg$R=m$^s=L|{E1dH7N0SA90wX7YPQu@0b z|4>BWfnA)2Lr)zDJ-`z7hgw0uU&QOj{<}KD^k0|rN0#X(ZjiAd8+ktRIBYQNzaf2x z*vrQwz#fi>XYgn->jrF2jLPt|9#V2VV1rUR%-VJTGb;6mPeKx9(#-PyQsqA!)+=pt zk@=`zWWS&+^@Y9BKhR&q=;f<>s9wYVAFM?F2ZgLS59$eLxNDzlQ$NSnRb~gUqQkCG ziJEW+DaF3$Q3-+>HVg&9XW(U ztA9drG_~*kbay^SOIZ{iJAdlN!HWA*@Ne)}I5B=*axk5{jndmvq&`l8 zA0>1M{5KTN4)C=9%8$nQzh5XqwjskLv&M$3cXbF-mbh_)6-qS|ZZ2vz&=9~RN?x#h zlf`S!xIZG6fCf+zkSgvE3NgbCNPLB6s>a(mfhqd76~psi#h*PI#hYxt8=9T z|9$BmNT4aKt}nbs-L^%iQ1heh3`CtD8G*neoPg(UVrr6thX{rU>Ne3bH7BQsZN>&; zffX;n6E=#G@B=Me-+YA9Re9=d5kloky3p*--J0i?=9~yBq3(6+?7-; z&NlYnQGdWV4W(j3Qx=H~<>Z}Vh~fo{ooal}Mv!f;hexV?{t$(;$_|PoAT$Yb=}t#P z3{|>_8*1J*0$vAy#`uc@*}!^e*uLfuBY;7!w;*UyIR2fZr!EDxlXulH^jKn4GZF!L z1neRQ_(8X%6p;p8ORtS#r9-n*!%xxdjPFUE97O#mpk!0M-}Ek~`^WoIZET0PR)O5l zf~Nh6B_ics-zNeR0Ulx#?J|nA`)KW^z6eLlN7gJ8z_LfDj#KS)AE|XEEhf1yK%;gJ zRMQIj-m2kSfRKfij|t-WwtEI7PJWpEFGHw5{6m8oG`Urfa&DZb{htkih7PU!V?X>d z`&Xieh(l^+DB`T~nBqA~h8Tl}62B);1nFEx9g|z-Ip+lWrDxw9m|yJ8^ZE9Jl!9=g z`*a=#GwN&Eplne@1gya)k}h5yNY}JsRQOErZ9YZddG%X3L5aCrIe>C9*<5NLS|TnI zHkiKl?lB`=-HB5sz?96`0=s4-59NG)T^Fop*6g2N0Q6GnmnIglGL*Fr#GQC z<>HjNh;EqtCDZ2|5zl9@2Ck_-d3v57G(mjfy=0obN%euX5mFwBzF4R!UY&@&&cz8YOvxCseXy7&G?UV+)bGmpU4GO`|L)yg~TziU4v zA0}Pu8lL)m9T#pBcT2M)yJbRVGeDL5VYVfk(N{9vXZ8k7(NT(VVA#i$sXT--O!sP# zhbBdYt0rc6LXKiTN60d5>GBvX*w5J{?1bFd8Z6pb5ub((m)%WLJ<=>W3nSKsKOy=n z!X3EdDF~@vcz337g$#u^bC~5+&LodgK1(jdVzYYu-K7Lj-!7R$&q2FF*Gap=P{G#3 z(@Hr(HsrgLp=0`MTZeQ3d#CgU8{72Pj;hVPucOmvpFC0@kmKGH&?=bUt+ox0r2K}x zT!dgYi*TdJA$r5Ko1^#@{H;f>U+Gi(&^U3l8cYHPeS!&tQdWY8X7bA_0^((qH?FD( zgpt7^u_nAI8*)V4My}9Wp!}62I^+iRzY#XDKyvh&@PzLS>y&Y?0(EVI?)R)~?9SP4 zOtWNb*(&SbGT`Q6+e@RgCcEw4g}>LFYjh=<&bMTruJtsSE@|coF?gXDuMov6rgK$* zkb|k+4dpCs7>ps5U=c%*1w_L((D|z1uHa4+R~Q(Om_#=GUT1bZPU$-kCorikmo0>{ zJ)Q&H-W34v#XTiy<0x-&Vwr9_^()e=DT=hlgKNd<;Ku0~yQ!vB+j|8U2Hw^J*_$XH z9z=1Rz3IDrVm+SiNXnsWQf4-yT7r2Oe)cw7wvgK1;s>jdCqK<>Asrr!LX6c!HDV(= zdZcWs%(gVPXkKLh9p#p+{t!0N*Z7><%wvA7s}e+Y>y~8`+SW)wY>*rh91>(fJ?|^D zgb8WeYNMu~Q=R&3!|a|OoeI0n*TGhFHFjF9IDKF7#MH6+ulN33PaLXteY#-JUXgpv zqLOZNeR=F$q?VH%a!6sFX&b&G+yl}PQ{Z5gct3({zDTzpsTl`R!98lMMn`rh%}4x< zW=eH~jH}GP`L2a1Y`5PW3o4NU1^Q|I%Y-i)HYrT!x^gX;K;{|bG91gg>o1vYMZQc+iE@f_^SL{2FGAo7rR#M`$?&ijvudh5-lep zI-AR?>Uc|;!{+U#8*S~UTkjqhxt`XQSyI^5$?#r$M3zKf6al2|&_N-$&o`5IUC8 zl?keU9Q$HvNHz~=jr~Uts)s9H43BU2L-_gu!2x9|SdVeJgKrjBcl9jKl%$GkmNILp3B?zdA0 zAz9ZFWe}_2)wckxw1C39qPwEg4`ZP)HtaG8ul~2WVJ{Mhj$hYuawPt76k*Qc923W& z>fEj^^DBEH`@vzW10ATEviS?kLXOQ+!v*IM=6rHgm3d$4I)iM_3H0P6&Isym6*X_^ zRka)ZXOP7)%P{o3a1p8JfRK;+(z2Q0F3ij4lNGfFb;Ih-L>&J=W&A?R${r+)8s zxFB;oDzlX!VJ`QPZ$~e<{&i$lMuCP;nh)|>Eb|>B=h&L zHznUZ#=b4%;CnCRM!`0kD`X&Jxu$!c|800g2PdeO)S^^@U%>hv<Q7uh-;ZzPc@bRcE zsu!e(O-OGdy%5z*?#@=lXE$l!08>!0YgDQK<&o7eXTxhb)SjBvxcDV5q-E;V@N7I= zyg%S(xxtDjX|c^fu%Gv`S=z8whg>c&h0_gEKfDuJa@aF!?kQ5QLmOXkM()-kyUG z#N=|%B7Y&55P>Rd!NaU0Kt1G6R>P`5o8Ni$D}hA^zSBM`=dDb@3+VHozH0^va8b)4 zt!ek1?0Pl3c&OH0LN#lRM@p|mZ#A=&`C_fDIe2c?z*)TJuzqpj&7-Iix0VX%y`TFP zETw;-dB37%#I`t#)L(m5Q1q}i!8hX*#NV;)d}ezX2xSCYN~C$O2F^Ov{nmCOZgZA= zf<#t*-EO;F@xNfI{3_Nf?@#<{2rYzErEImVI4>-#K0j-M*FD`l&+=Y*rH^ysMv%Q; zDicNe_?c&0bQ<5KKaSLn=Xya|Em`m~F&&Ci_oU7DJW0?tELP^4t>z@6ce}rLSQ$qo z+0aruttf^n?Ks%jOuk=I;bhhB6q-Ytk&|5UdRPIo-s6?bL?o->^YyyOvatch^7p5G zRy5c4zsP?ybd>25si&zZ=V+oV1azTFNP3xPy!Z6X5#MA=w(HQOUf-_jr;U2nh$hI0 z@J=Sla*IUr6kNM3?5OorRkX4hj<|1t-j!1byA^hd^m*q2t1w;qX}o=Qju{|gQ@GSZ z6AnK+AGaS&)?A(LP)*!TDFM@Tz=8aJu>kwZHUYhi980cTyjFY~k)91ilU}1s-j(>P zb23hSiv%gSfaOqPnO@c7kDdpDs{tfR4QQ>MI?UBFl1vsQ-{`xW7cqh+ul>vYs_=Cj z$G84mOPrSyfqSV9PUXSe0Pp@8(BTA)&S(z zfb!6I8cGc12zt|U_K3OlJ&6kfFt z_~&fE)Ysn+QGN$n_dixavzb#1G8<>zU$Cqgnc$a@KZ4#>V>pJqm}K?eJ_rWU+zsh7Uo}VF%5yMiNPC%&nBdVQv$!=qt!R7{vE}kh2oMI zJdkK}XGG7$e!rFL6`7*C#HnXTvc-u)^1H-!`(!}^TDhU99F>GWc6^sNVcRWXN~3l+ zNyD;*#5kOTT?O%Z?{FfxTz|P;A$l)#trD0=RW_MN-%2aEk|Kb$PG{egpRH{{iD>E^ z=6}T5&eSe=I809(daVy`9pns!0QqjQNdy#!>AclGk!|+Tv8?$Ea=f{fA^_RMiu_@% zNE1l5O2ZRe;O5eK=Mi*s*x(^{3VDok7f`91C6ea4*%z0z(7wl^wKBz|G>Q^EMWx+j z;(hSQKdZ$4($|eIDt0Hib}=%oma)?2pW%DD{cV#ZsO|n5IyxqLoVjhSp&EZH(sVhv zMc}5TMZdu3KSm7%$$#p-sGBt=`F45NxK$a_x$p>KL>V>mqHkl+3H8Zl=ZnJR)hlAvdHM=)23iGtF3@(;Xo9;X;EF^{PPLKu&2u z3`-9=P&Vy2i!k~9+-nNKqaalZ9J=G5bZ%>Ju-|*2TF(LR62Ga2>dMUb14?iE{Bgnf z;H_baOmW5IOp~hqNJ8fyeaV;%Szl$+W7lJf)pt!oIMp= zKTW+eMg>=u8QJkbAe`)p{OISFUg0Lh`w-CgtepjX5E3;~00DJ1Jf8vInKJ1IRb7ix zt$>1_D}Oz;UdS(Nz2=*jqcA{QuWeVcf(MkUnL=@~2An){w7#F_UHfS?1GO?QBzb0w z)p9-qO}@jSsC9B<*FO+8uI}qwP*Cc|#yS>^{2B{1sGaZ@C-_qQ>|+`dX;Es^L8Oe( zw(5n{QHPqv#Hwsa1i+?(_J~)m6)-OjtpVxI&{Qk!$*R_w^2Jmq0|)1Y%GJ>4*1u>I zv7pwaI^l9c-<1=qwD*gw%Inh8ADyP8O5x8fQoEkCYgzH%X>b7Aq_1SskGp6~3~pVe z31^bfR#&K=b(SZ`ij)8LNSb5)qEicQ*mI7IH9Jggu*5yGpzcZF9+Mjx9xF$G6A z^7>-&xb;~BnmP-#Nj;hO$HAzK1cOHx*nj|&8F293ubQ8CXuani&ifQd+vay@O*ze? z499$aMU2Zt#xXo=L`G%bo+PY8k&I$&@(|kkDuv9$E6)p-WS_{N-I^D`)^Jc>?`Y@397cD?xBMH)Tb==re{2DS_-8) z6PG95y8ByB1St*70}XC*V7hJi{U|o7Gt~YEt7zB{j@I_(wH%A6>ZyqY0_f)1@Dn#c zC{FceH-SVgqK)gsWk2}O_hNc79s;qG49HZq^faOJ zz0gPXt!sPDoYnkjIJ;ZwzI{p<>yVw9N?+Ch-PoPP^{%;9!?HI{zfK(71ccB#(UsAQ zm(dfS*gN`+RS>l&-xVXcl)4S|E14bQ=WHmv?GG5pg8(2x8}c>%v@NF-gOs##V=z&# zU0tt$ga(;3g!!9WAAZjIiac|RfUwAF4pK{d!gk}RX!-^~WaUZl^`j`>Yl8L&)h2Gk zk%b@z%l9UG`E%S&@6FyUS_US)PpFQb3OL=n*)BkV%#+O2qs#MaRni6c3=V2@=5za# zOZu)EaXuzlXoOdrSU!eYvqsPlj+F6mmN>O-o4u>;IL5!jwxqZty9b2Vzf>YX9OD&FOS!=(_>^{pUxm^lL#8G*J^R`CAJAg8qMt~7j>kX zpQ782T8=@d{SvgM=>i3>U$vUNiQ1eGpb5TxYIT-6sUbGirV4mDmmq-e+IiQd%VwPr zBlzWyUXvYD%(1;0sqsZ9-Opb`lfcykH~V4E;DkJKyH{lw1rZGvDSZfjKb{EqVJK4(xCpONG4kSuBn zyt~UpgW6*6&L3f5sZP=XdkndeeSI|F^M$00_!@G?lR&%d|BEW9KsJUk3aZH=hCcHM z%1!o3eqRLRpXYdqykd_au&%FKM_W18NeV(kHv&zYK4bG|?mp`o9_KL#l58t<8kIQav0 zUs|^y#J$oEl~U%1E_>fB;;p`*a;nuBJZb9OEI3d_jJt{)PW=iEfY-Z=2|owjH*T5{ znigNUEo8CsuiD(5^NurK*bASS2cF!$V%bEF?~+4mAl26;&MNDR)2iaXuhq>=@>mVz z(X6O=Q&X1^c>75)ZzI;c2q%*5M6m1qPv(Nxw0scN5 z9lN1X1vfe<|1RS#AI$%=6IRIaO#~0UL1&M!4M}P)3hbw5w>X){iXvVrs)74^u9Ou& z2%(J%A8Iu@7B!*uhqY72Xx;WRO)@!mbP!-ze=$~Epri6ig{n)d$e_tA>~7=G{fq}< zo$cF48<7ZKLXpK~Jf*Jd?48MWYF4mFgLAdX8!5=qX7D>wmSV#?zTZ1p%=dzn;g)K; zRn4%*%c%%@ue*6(&RW)ttT+I$!^gM#(Ki}U{R5^_4l+Q*Tw3f>M;afO?!>a%oSQ#U zyCgt!4&@E4ftTzL>(#U#c2%`@hGH*z{48tv@G~AZ;5XZ|1A7Uz3%z;yXAxQ$YixE9 z!?^N8#kY4Y0xSj^T1WtQxD~hPp52`$8X zW(j~~2{@Sv&q|@SN zG1U%7ot}(~opK0kWva?ClBQM|wM>BpdE-E-v7t0fnFW3Oa`%_3sE*cKgpwiNWUJ;b zR#c~3|1#hT@s(tZM`Ds}=m%x_ z>zLBy4Dkk|*#JHXZX7aBrf<49Ze7qJkG+Yxk*lCDdyT66!iUZlR>0fdfQxPDsF%@$ zYjsu4nzNL??S62yUVZcTs~|m+n$u$leN-`XA(FSqcDk>~ceCiCT_M5l`r49V`eQ(% z>1@^0z?Zr2qqb|+B_{OC-zT`?^#i*ZQa!Y$X*;*LDk+R@fB|)_mn$uWOTH2J_x4#` zn^I&3j&=Nlp%QvB(Zi4cR2R;;Tt6x5c86qCwl*&O3bvi$%nSUQMApaFH_zFC`#Qtv zntd4hv!vsZLa{u@G1@&ID4O8}zWbQB-#FEar>7+v50U8sZkt!U7Rv<;ZcCza?$H-+BO({uk*>eo+SA=4B~UF*N#JEcFM4 z{aO)14!XI#4o05%ztT&m|OXVt&3|PlK764&?uIBaT+VL3&pD zb&BJ@?t{TD3u&nI-r)9o=q-Z%fJiD@)?UME_C;fN-S#@x{RbSnd-XwM|3k}cpN+v^ z*mSo(Yk^i#ob|b{e;Sr8!ao!sSOH{9L?<`j1Jxt$LPc)nfcy30o-t}toIv#U>(!Wp zdTZPPg%N=TNG+;(frB3X z!E~GO$GAG~peJqr7|(T3w2osiy0@=N$gosV@3<)YK;RWnA{dsWvpYTB(u**UIC}6- z^5@s>THc;~a^R`|ahtama27(C?0&F{adqp}E=B{^Q%uz4}ma@4%f z(H2ecR&cp8_|D!hBxuz{x%;VaJc;wfhXOPI%L9rS!9;A@UgW@K7yi!h8Nc{Z|M4EX z4_Q2z`I5cR$yfaXb(({E9Wkv`6F$e#%FP-HpU0MTjdo3v zRRP|Q(d3MBm7$s9*uePT+V8Hf3gws5+ha%F9+BdnbXrL~ko@#1vbs@* zf`cX_o}UVQ;(3+9*>E=X6m%q6!Ph8Z@}{0(vgb_fO`uoFNmRWJ+-&+|yTt&UdLwmD zfhF6fR4ihfVni@(W9sX4ihhu*#P8Q-I8oKYwc5VB>k*t_$)_Qe?LZcL)q#{j%_8yv zs*}x&+A8-ajz@>Hr8mj{_E^tRfi|yQ$L3Hlt?x)n zpQ=ph`03oh?s7}R)o-heUIefR@AVq)YVqZgJ7n3gCpWB@E#NIL&2qE1AU2JUi3el{ zMZeEH;DUra5G= z$S6GXgWMW-nNhgQHs;kO3CCSGkT^bY4Wb~m)3{)Uvw%K_R+eYn52IYzo%I@K~1GzGdeiQRz1KOu6& zsNFfsuj8weRLb%kQpr$*MZAxiEly;ajq5X=R^^L3BT@;PgD&*NH56xYGHEHaey-8q zn1(YylmX4g8SxR9y=35c4d%8dJ4s})R>V-L#{CO+?Z-Qr?oKPdaXHMTtk5lmg+NG> zlxPbk%}um;!Xfgcdrw2SHiLL#gNdhnBx1EzlE7*NNk0VKE3tR5oVWOvBp8#Mz#K^) z0s;&ZfhafFA`Bp?&DFvfjq1`tE)kRzz*JmQ za#?7`*GMUJ+#UhqRAjW9k+Cz(`Y?iRH2h5D0F+hx*hNxi{fCbLE@E$m$K5F@6r0ol z7@zC*(5^{Hk_?q+l@FKPkl(un|7n*6ejbjx+aH|UqoVFl;WER;hqI%s%2oiknsM(&M!^1sH7PF3Ip0e8+OG|c%~!hXtF>~a;B8Ra)8(N zRG1C2NGDYmLK}EC*~5DehXhMsje~A7qdm}aO{7_*Txaj97jdgX6<@_0U@xDR9gKzT zL>73Y; z{73^d;El*juB*Tkn0uK^@>YXZ0ZN@x*mGL+#;>e@&qHg*4c;kT%%XG-yDukVXYqQL?k-j-xr#?1{cb!re(fDKKExllI&D zX0-d^miWa84lC+<-+3UH2>#7|Z_I185{%Nlvk=!&7xh{Vgs@8J zfk_j|JSLqy6IWrCZtXi{Dh(2mt^`+4U?j*fw!vgBJhb&q9i4S%F>zcZ8k*tU!qRv* z7S}n^(*MvK4?u-WE5L@_x14>5YV_3Q_SyYxvTMW;D!Mms%g@0 zxc->%)@k1}l(VJwsn<%a=7jwXUMK_p91FRCg}lpZ>dnWS{%CyWWeE`zW~f)*uFw(%iD>n#6+s(x8`$z_FUw;3!h39M`iA8lCe+Xy_fEWqBA zxIc@cMEx#oH$3|#(4L4WJ_hW=p3N2pC85bTHMdU0li^m#-h& zg&;U}vgWLh(zS)ei90}|3(f+_Fuh(TlzZ&bnmoMzmD!zleRWQSc9~)IcmU2DOSR_n zQ%ry4i;Lll_HD+tT0Vt)YjyB;Sb-+`&O-F+_$nk;ur(E-5}(E;=Jym}x$ZO23&Ey# z`5^wb79`9Xn4;V}snd?Ly?MCU4Se6c^d|Ii8uS(hq!^qG5~>OZFeZLs00&qoFf;E9 ziey*ui7TGY&Q-xH7wE)ejq~rCrsz1qE*!D#p{uV!Zhfv#Y^%(y##L{g`<--RmkEP5 zT*`*pxF5s9%*UZO%lBujKS4NXk0l+2<6fBIg3l)HR?9Jo$kEkL@$5+H4m@++dbth# zvGHk8&~9mX{%YhKiLCmN-Y(_NW+?&8cf%2YpGgS$$t;A2b`-}l zEllD;m-Z4vfiJse$-x#HCa~tRO6OImWi%FbOL%7dUMdLLASO@U_Ar)rc|8I^di93M zp744>pglmZ=Hf*>!5w~ZaN$zTC=-l`#=9L7R1AwZpbaP<&5;-%S|9Yl#j$_fz8f~! zagI4d;RIS@0BkFQ{H7tXD;>pze?pyA(N8Ye)lzAZuLId&Pnx}t6GqYg<;`;I>obBw zBB3Ij`+=jT9b*H6={lEn)aNk?>3g@Yq;9bs`-7x{DP(dI^q+c;~x6 zJ*Q_9Sk`R+tQtWFu!LW2PDl~t)t!@+ur&|Z4Nv`C52u=J-U)u~b;KY1ZF4yw?i*Qo z7F+}3akEb4lM{Hc&Ui7f7$Vlc){By!%QP5O&`mo4?2=>Ku4;=q<49x>XSxC~oE`IkR^mJXQB{ zH6=@=T&cavD$e=$kzwLrul>Nzp{Yi*jC7o^go`W-0={ zZ0w``Aw~SWN7I`Mu}Z%bGIXkZkY-_!L+d@IoPd!2F`-SA9^eDof5#U%0E?Zwe!(Tv z7efVLkKmYkw;%fR`Pu^=i5SXBv+&(KyqE0Qgi*eS=(kslI@peT6<+(PiW^dEAI@pO zN7r%}272{T{8rlbdy|c%4B5AGA6iII2w}HD+v=p6;9^=HcHY?^xR;0N_40+G%t;n1 ztUpUxZV}aM3cru1KVc>8J2%qRlc?dp2^ANYBUaFWIrax$uI7XczxTME><1jFa{f71 z+ur5^&E>u9eOqQo+(EHdt(Rzu&9z|^BC#O%_La84ZmO8Co9xx;_W1Z5QC^s`7)??Z z$)eUBl!L+HazyONw#w6sa3JFChrUf;8Qx97iBXx-J45ACx5x;bzkx13PJnZ`|5XATr% z3 zRg_@y5@@q%LUg)T7@o<+)`Gjq5QRITZE;;FLvg|+0mO`DTNUxK;68~Z>f{WSecZ4t z%troJj^nu1yX>y}n9joVF?)^ZF$EQF!A;8%EzOdGwx!e&B`q!`FUOo?G0n#q(FR3^uEOzDWq4zj_e zX`!ZSMIr+`e--x`{5^q$-s;Dp?fwX4NG0RA{xElOz-<1AAawAaF-NzG*XW)Fz^WnE zhQSyMLHkZvUqbb~f0={cwN#3$R&xt-b2p zbB;L%d1;k93|@v<`k|)V{CdxNh_n_B@>~CawJ5oRcxs$k3zs!)&!yj!KxO^T6;e41 zJny!V1VCuy(L+9YSIcPMv0;5!myQ#EWKMG__xSH-_IDqvSVXj}WiCG#YGy!8FI;c0 zXE9A22bKJEH0J9NM(t$a?~nX`4&%ofO2PJ=T5I?c0=7@CwhZy1c2P-!J*s3Wif>%5 z>v^cv*E2uEAmU5l#?2}|4(yLHhTOAP3c~Bv%r;yc2kIKk{fiYp!-o@=(f}Bchyk;Z zqqW@6UN0w=ZO9A*Z(A_Q?~>lVSI>5qJUuqo3c7POf8;@of~REX7hTk}Hv=?lK)kfc zwt8>EeSvolQUh`&$VnmE{*8DpFjp9#OgvaDYd4Cm0^Jt)GNo#_< zcSTYDeHr35>Q9J26*U^nMzpDpE`87UUL@V$lCwjejJF;RWTV{P%~KgmF;6YZ{*hqs zo%lWkm1D;fDnIVglXQFK2-2_J`ZOBK>6>P~8;>Ax>#6GZ5V%q!@QrOP5y`PODLg$AjAyZWjE$QC?y5u zM=Iyra^g21cMDP`h)11;{&ewYONf3+m)F|3U5H6}OGoeAYW38$i!~q!>NFuW@#%T> ztF4va?V;>1FPb?1;+%koZ3QD@hZ^HN`(gb(i=HI+$D;N#ukp{Nqynopugk#+6LMh&H{yP*#F3YZyHVvgM%)_zaOcxs@Wly9iCAA* z--In@o;~Gb;chWvwn3cfJF85#us5jCZh6ZipI(=O%-k%O>l(6aPgtgTsP$D75vq-1 z4A8?2+LvlG)1O*9XS|CXdh~Srmurf0DBZ`sI6e?FZF;n_e>8`^o)1yTXYCV|acDg^ z7EgpdZ-1+)ch>G6zOi~Oi8lVMd149s@gzg}iz4q=WN1r8iY_nnOGVPIRf##2powB} z!B=2nW{t?c~+AGMDg!+ES#3!0l&@f})&-^)4TQ z-ygR0VF5}XeNTmzAmHy9RnQHjn`ud`g;dcZ#F|VYe1VGDGwfoP7x|9|ccXcCg#uW? z=Ac9{lg&*UF+cf(#+ES4^>A{gg8vhF&_KI$;~EDhusaT1!wkwKS{>-hGoO{64fd$& zMw7UE1S&v;yFX+)=^8XBy^kuX?{Q#@hcPc zi~~{3@XG`=L74dkN69%^4s5&Ory9KlnUG|=S<-Oxw6^Rnh8U-qR@MJ9KHOXNfJp~l zGM_Ri*AG=EFfF!kEmZ^_0$Cor=d><7MtClLCu_Zr)8;IK)sJAnguQ3OjE_6(R*GB7=Y=QiiuM4u&pX&MzcIOnGYC@5QW8DN9AxrqW z2SZZzffDFAXvX3q1Mjmd&P`(E^Pk3{2!)RiY88ibHJ1`Efs8aeig=FC!R~e^^kuPP zM%tlxL3AW>`uW*b-Td|6Hsjkt=JvWO&cP!$Jpvff^;e5NSdzF#eyg56-dggL--;Z5 z{;J8bbu(s)+?C4r#86>4iym@0JUc_s=EHk!DIM*JwUnj|f}P+heTLMaz3y0C`YCVX zyG}QLR{+q}1{)oBf!IXPzCYT(A3!IYSNci|THlg~s<`Hk%N{DSLsF+!9BUIja9tv? zLgPirZ8$I}Bb`1%qHZRih%?>3=?dBOcJ-wSn0O>}vbM{Q*lc4tU6>w_$NgYv#;VC# z2OPYs6Ph?J04EiYjZr08#|F(ya-+&p%GK5MCLJ=QVt0GH zyC5SMs)nHdc$-6B&f=o%e%SM~>+O-+(RKTQBL4gFYqTsKiz|gv5d3TQw&#Dq%FpiaI=&p1>5a*Xda^$V;qcx>NAbIVyDU;1vMPG~RuyUVwP zJFSnCw0n5<7-OE@N;wm=Xs7YLi>03OXurhr*tedp$;YNcDGB;iE~|=Gy(%@yupd@{ zgnA7RPFq5o;Q8xH@8Z;Y2D&;J8eoKlvyCfFjLfqZVDyYe$lFmsx>yR)zPNRN;GA`LJzE=8Vf-Jw-hZ*QB^7L>ct7qJOz zIp3J_=;h~zUnM{YyjlsYJePLtDu>)m`9L;DWa&hBEZ9VoQV^6dg@piSWWZM30rU45 zaj0HBq=4&Qdj9@yXNQm5+R_Sf?xEbgR$)F^O?PEPmrv1B&qmJXm?@>e;}ilznYf!idsue(}8?$yfVX_$)0ONXy87?BMA8 zo{daNQq@Wfrpu7K7yy=4yeaMNv)gvcy*baF≶^DMs{Pf6sf$Z1ec-Gn`SAPxz4_ zT1QI>GUGK~aSo#;LcTAA{-9W}IatBB*H2jwfB`x+bIZlTv}f4;vgkQw%L3C*^d}SC zw_N?FFdb(N=P4}3-(gY37u#emx3nnAcFA(=m6bzkXh;t{Ry2#P(^>;tl7)BUptZp& z=P>8ss8fgW8(g5G;CYSlKX>u<7_}k7n;Ysj#<@JknWs7lL>}k&%YgGqAY?HA@1fIw zrnX+C@D|#Wuyz5VPELY+dqq9yW{?y2z7_7xXv4~E^MhAUH_1YGWjC&ds?xyYvrl@q zMd}*2^6;_Rn=DgBB{z{#Y}L&^rlI2wUe3|wvZ8v(XPs~ul4pT>a@k}&kM1}s7>0Eh z6ho%bzfMpnUD`)*ZUr@Ut~4RDWlI)9V9%tcK$ zvkg4i#nXcBx8V%A9potUjX7&Cq;o%J(Cv9smS`&%X?axJ$|pc+FEMJ!z(rY|7gtu} zEej;w3RmY#e(7us`mG(#^6YZ)P@j2X;J}?rrhkkt9l3??c!uM7B?ReJfU=d z*z(C8E*)}K1Vd{qwdNBe;~;*+HSR*Qpw8Fy%z`;pjwgk35%k*1%jvuBlX(PFmBXX^klS`4I0 z(lmIkKf;v;1KDzu{sysb6E*gjZ2gU-b`Jl|8&Pa-0I*=qKZx2&*@hKqTOsPdaexw1 zaMTS|kz0F~I)q;b%LFEo0FuMrnq+#c&n*Q1#wc(s9Mh^8N0aw}U8=96l1Yul+(s&Fh(_w9Zvg?-oqGpwW1VI_W08b?b91zL)K6h z%jhsGk1}cd=8l8*k-$+dlB?gE;YUnIg{_a6f)FBGDAFX}<`8N&wmJq)IMG9wr~cen zVyWXh@p!|=IF_|dBYRY&0fMk``cikFC3D#G+c4P}FRN9l7^gz>bA_CAlcMQ2uI^xO z-B685fgf91`ctoz)-B3(!Nb4w++O~` zn}g;2qnF6uy@}8JGI;|B?o_Fw;VUQ7PE#jhB?*iSlf;hM$#cR!{AkqW{OZQ;3td}wnNf*V+U0(~MZ@31Y(P(w&)bcT`<0KzPV?;Z; zv!Bw-El2aMRzlF}p6~G(AX6eui3VL~!2P)@fb^#Ic>R|!4hiu|o)au@5|Hw4O0wL{ z(?n(f=gGX8Xbu_pjzUK6Yu*k64)Od&_t|UFaZ0zN58dI=50_HX>TYc3u>iKkQ z^ES{jo422Z(~dS zZd%^=le@2$0y=`V6$yN#KRm;ih!1&3O&Qo0vUKotU6p30{uN=g*qY$W1u$f|Cu@ZG z_0_xq2i<%4Z!O&IJKH3T;P`A#41tdU%e|AJ4PmOKcIu#c*1wvSFlPuYvl|g-1}{AQ zi`FoK+8Qa_oFssavSUUqosx^qrOgGvI?T&dMXXYr+f9^Eb z|5{J)a>jE@+Q^AbmpQa@9mue5zzR_R+>nnS!y0OJScJw@$$IXO{>+U>2`%Any`ZohKhZ-5};H`c_2 zt3035MnkUZqXvo&rDaS9SKR-}t!vhDAc#)bO7g_H^h*-@*PUm9P^sHPBYP)xP_rr1 zcJ9RGU6+toOI$M=dr)e=Gm+(Ep4HEN*KS!wD#z`95G^7$sm|_(8*88ST!PfiZ-4gy zT_K(tXxH#zYjR;(D#pv{X8GO2Ol2r#nZ;-{~{~3F%DQ@|BbDghZGTM7Q11)0~>2RN36fI zhK4nc?^tRA-v$D=6~ZIL%O1Q}%^|IMtPC0GYsQp2v&AhPf6{(@2E^8fS)DE_KDG}x zFTWs!&5Jr=$Z6!h++UVBNMa3sacazDlg`XpU`LXTn#zno?b|x+07;1qr|Okw^^OD^ z|2(-%nFSwW%q3W>xkeaXCd2_QAP$R^i$w&T388FoU9 zLAk2QbfnX?OQVFcAk>b?ab6yZP9Y%!#b#9cBRWuK zy&m?99#Yx;GWH%NLcMR|DRrmKY7aPux>EmH88LpTK)Ys5#`p{QUER!$-n;pcQ>hQ< zMfQI?Cz(mldU_1**P|Vt;(+asB%u%NnqqmEhrIW1h@*UIIalOF zvh1<`JHr}f_us2(>OL`$(PgDG_juZSZ{uV7?sMOYxj45}rcne8)lHX4~V23CcDh@#S0uL7g zVooNU<#=nf1)}a*Q6yC6{mdwTRlktqeB;ocj@(TkGSb4vtAyi&wc(^m>gSlMT;TK8ohr zN#niR-WH=s@B3O<4#3*y$(|L+&||i#C<*#~wSxDVwzI^*gXaCm3y(eq1V2rm{grs%Yvab$&rB#06q< zOqof>y?*(%tVN60)ZMfqy3KzFuWJTKyVrMJ}6r)yL@jlaEvq$d!*1CtjEX99`KF0 zvr^%_{8F+I$|4>Oe>42ky4Wd2hOoC#+o=!iwcLcqFVl0}$Wwj!CCh)izEZ|HCKLxR zmyj=ssd}2G`qF6nT!b!i=LvPwK`|k}?5-KRNULPzYsT*zxqtk1y8R+SVuN?nT)jQ#G)J-Zn4UeEMayEv* z!d+*@Yv@7)2*)W!@Io-}*C6s^M-{@vd7Q^*OHc zA7RtT>sN^4OFGP*cGzxtd?VJjmT&K8R*VWOjH(B|Iv;Ryc(4>fFMUiL7eL>=`U?U8 zOM(Wz_Sq|J3nw?cdYGv((Y~Y_Nw%6r!PGWoK)t06iF#@*47>H0GXou!%j&!z$ zw~T5%7{D8cI_69G;NpGSW*NPUj|5#(t-9o=HI5?*9P zwd%Z0!4%*emg(B6)(P(XQbNCRYjEhGJ-iiQ66p-G&G#6mJIoJHxHp_DlqdpN?+H14 z9)HfC>e;hYla;0zAx7m&=EmI{RZ%bGOV;aW#aQDb?EJ*Pi+_AO-tQU$sut3)IQRC> zNDQw~-nWlB7sT!vo#8y_aDzNCpSicJ0i)?yL;BXPO*g?Q`MdT@|4t$wQw-r3JLRMh zxY(C6h7e41Xc6fJuu$AR(IQL@EM^5`ll?`pyOySsh=_-R_BCaVOnie(g+tv{TuJcqf#v z{xN1gXjMxC71_FDgLq%-2XKLma`)RF+#St1g;4&P__*X*b(TPMYYW`i61#=$CS0@3 zIe$Jl!<>*lN?>Go4$Un$k8rm>CBcqnM=wO5a3o+T=;f1j>Dm0WQuhNTr$f(qtv@6{ z9Je$mh8M?nu2}q+Fw1`J`tR9HhvlAOzmE9Qtd8MiWBho^Q9k&XY1?md{c+p8{TNmdzdB!?SLh zw(;--fpYI|SGV`H6N_NX=%N*Ezp6_U?#UDXIJ&J(tq7Z*!u(cMj(}fD6u#S*mkaiJ z@|%J?Z$dV?gs&z?!%vASo`D?Bpt|v^*TGPkhb(o4 zm>9*c*l!2)yeds;-V1FeK`d%>4_BhHeF#;dHPSP#R!>^p9?akgvSC*xfi+$H^>`_m zp`UU=mQTg~bkX61DfrQ6zG(?;o@Uqmvn8JNjYm|zed}$YiY~YDWSLvE1c}!_vuO*2 zQA{t9o!EpINH;m8WC+r&;<7u_=1oDqu=PdPMS4Z+D2fuqcFso#vcp4i{Cs_gw=0~^ zrSPM>RTuiqkkB@S)#g*oxyvr)nEbZoe6zcJ4s-@= z63u#F>V=SFM*gd{Asx`Z1rz@VZt_o2X@>zEMTtbOLbf z<}07f_EKV}GPrd)Fc$%aCMIh9g6zGQO`Dj}){)`6OC`Y`Mer=k3z2Wi!2F)VRCg7X zCPQY$Wj=T_)kiI)l>jIb_EGG1bdJ_W|NJ^72ys84%|np*B|1{D+vBD&lu8ap>RG@3 zsGXaj*0p%#MRD1Oun|mu+N}~t354ggZZv;lFz|Fn5Tg0JU@-+#i7s*?^L1+xY-Mt% zfh;sE6f=1We3Uf$hfE=zC27p3A)QR#Gh*hrN0`D&a%{R^3KT7?+4{N=>n^t5$#rv& zB0A@xTCi2Lj@IJDRc|e@;@P)_8q6ULG5z>tp(%m>ik5Qc1nY5Wf%HXX{>*ofbyeJe(3W7Wr0&y#hfXJ%Ku_mmH@Gt}pj^f77AL{4*bg3mR`D2b{0mh7~DmY4dlVRa}lY zkQJM^_>6}|h#QBJK1yC9eY!V%wfGp;#kQ8Jme$|m-lQtn`K_O6NXFI(DSOPxE~Jzo z`q)IQydHfkJdE_wi+Y8#tTkS_f3M}4^Q1Z9cFTaZ|DvFT{r&JgPIG~od(|o_Yen{> zg*<~iGTYQ-0+5WsFzRYHksY_;D8=!LOac0~VIy1a)=;db)?;H!VS)XCv#^?5zf{vhYSwUVza8?eZ8;ca72IRZ@ z+4U#3ILjW~eLN9q8@|>}dc|b3Oi9a}R6%tdE|joPzM|;zlSMRUohjYpJEWb^y~W7Z zia5&l9ld9u8`@%T-IE$6gw7Un(1cbw&Xw>RBPG8^h}kQILM#!sB}U3{(i8G2qEXv* z&bZm$5K;Ay+53}WPdh0UWlF|E28+&AW4XG!?PR1bXmBr_kEC8vSkH^3Khqe~opa`Nj@{*ZpJ6r|4vtueliWfyOF!eVJFn zF@iYb=a@bvxY!|%+@sJEOBNyWB`srM@UVy~PlA!4drRrpDS^1N0x)+zidl5#at}+t z4Mfm!mwWmA^3q=}tl#{nH>zOq5#oH5qNw{H#Y4ELTCUriF79}qxW+s`ClkMV+r!73 zC*yL^CDHMnXvDGClHcqB2tC9!w6CC%WMQfWQ(yntEMnG>?DTaq z^v~=No7b=5!Ui`)($KW+*@n)M_zF?$NtEt(2#%p2*9~IP&;WvL<4mkrcZJcyX1i$Fm79C( zMOj^Vp4p!vHm}2mNQ57X-G3A_rqsaFm5z5GU(H~TDZu#6?!Kc0iFT9LwJS-J&aZ99t=)_-&hNf=yoTFqTzxjc{jdE#3>;8xlqcJii zar&$EPk3a_SWU04K%I!`KjIi#Dl<%eUd?&Rd}TkZa#0LZW4&7I}k z@;1I!-BFUw{#nSuhpj!i{rjgT^F~D2s91~L!MarsEyO?scy#m8Hme|N6zYXdT#@@C zJ*~p*Rp{nr<*{B6FaU7)qzl^vnFoB&W*>Z4*wY?}OPLe9tSnk`S>uhl&SY%ido#*9*@;`r` z6gKE`S4ZHbzLec_I7no(Sz9YGuC%;llOwmLZT@ZEAtu8p7dg`k~nwKfsO@Eq5)7$Du5a0VG(Eca4on8aFWaL(U$F6ff&=tpU>G?z->Im~H=GQFhd%yfjYoQQ4oG3L5K zrgo96H&GRoqQ(AT(ybAWI-ptF;fX{gua7E9cxsN86h0|!o%36fHgRS(2}{yIyy7_7 z93ARc1tA{q=~-28H?XfOxR{JKT`ElxyJYbv*VU_H3MPi3*&{>^3AX?V7>Vmu<|w|* zQOTO!A{E6naO?5+*deRxH&}tuLZ{oKdH(d(oV~i?G3B%OAz2oX6Vm<2@gx%``zu&( zWa~bvV5Nf|lI^>7dooA{D?;;B6>T+I;(NckAlO?0J?A5(pw;4EY^AxJeYC{h4W%py1H;uS? za&yWB(g^z;s@q(f_e)$_vVH%Y9u=jW?6d+z8GVP4V!&of6X!r- z{gS$0y_Q+$Yv{9Gb~)=0vO7(C$(pxECSCO&v1_#EZC9VJ3V3A0a`SwpVx}QgH$^wR z%%u%PwkndtK)!bmgd`Jw&W2Dkh$9)SMVtm(7f5`qCJ>oFK%R%h+fkYWvwi7=kg6Rn z4PoW(I|INLWbv}rnI}HvAPY`hx6RMv;|AG;dPDe}noiY@;j$iutnxk;tPq$_?uDRe ztJFwv{e(ctTJp8T3}?j;f96U}G^zP3Rhk^nF1(y68Lz+3p=!uxA|H)veMP=--*fGn zUeFvX^(f$wpuC8CCO!K&(uXf;iJ*HHb7>(!V6i!=&13{*fHH>c9^mpc?7p7Rl;&=B zeSq%|07Qnj`X1ezu;t=Ugq@xMd88@~40gnB#Sbh)t#@?c&r&6NNeA&7mZ#mm>z|nT zU%T8_ShFctOXwm^gj(y`de8aVouuJ#IPR_Vl%lhPn)UIWKSuDGpWFHM3jon7kVI+6 zd*XRPvE>8hJ?lSCm+5aIwM5`K{dP&wm7)(75Evh{Cw_PL2( z06||G#4banb>tw1$2UG@n@)Y47cJOVgsHZ@V<7^@hg*N0x1hxFnL#+BIV7jj{r>QC zLC6W-2mLM0hqd%jUxH2X%w31RA2`21_C;|w_ODcvi8T{N8Y6a~4~K~kq$b~-+>+-- z-oVzSXBx-XsX?)ASo+e2$z0H|tl~;p%)Ec6yck~n{u!dLKcu(1AKa%#{X|{zGoORH z6{ULuN80aV!)cf}WRv06@QaqFo{^|y&zkkgY7}R&gmSlplkD$&pvF;Oie9yUsPw@lbDMii;i83u4k&3Kw zp*rM2YaJZGFgW2wru73ru<#-yh(`b(-v?$T`b2#kIwViT?y9}h9mez4@L`gWYE+Tp z;$E8hwmWpFl;$;XmSCE`asKc?(Vb_{1uyO{?O)#_!A}pQZOJ6UVW%C#ROKnt*35|8 z_{&7Irz-^-sFxZS+>tIovZ?j^DciJDv}09oVWCZ5(tAdI7kdlece7!@Q9LMc3&&ND zO=_)__2F&2uQ9mcF^CI@fM>g4W`b;(EcbN-Hg6<90U=yTnDH$ok||P8`W3;fIFJB%esYE zQfhRB#=6IQi%J8sfdH9}<-0iNt4INW%~{;D<`?N0Lw`Av#__V}*3o0OqFfjH9u0& z2P9(Dpn*J4<@#@{4Lax8^|?CF213&xn!rO#f~&3~;(}vV#q zZdaI>LUG_CToI|4>bSdTD3_Kw+CTQam28i8VJ7xU7?x{35%MHELP&M5l20%i7V?#b zlxBczZ*Z)w6*8%`tXIga3=-;hZpMN{lICHUg506f9h_1bw7tykli z#0Uvomy8*R#Zu4GBx<`wVj(Y(ag-7g?1Bte5)pk?0bQqP_xH+MS(4=lv>KfMtbhP^ z*1fDU87H7fecmjqK-kCF+2a$Y$d#P@;BEBe9k1N;ym{o4!|Ct4++=c~RUak0XLdV3 zwdr>6ttlGPccEc{L%IFLer}Z%^N5F>RLBIC z%Yv0MA^HMjSNaWcyq!8iaC7G`vv3rf?DyE`X4RrzHLdoeZ${x;W47Dhv-q-1V@LJm zZJ5TsIz8~%A68A7Yu3J(rFYx-{Y^piL*J120O5MkW2XFRyD<+g>B|InQT87V5PEPiGsIz@`ru zhQe(kqqO#a_qnh0j>JzM1kxIgR{yrv4;2l}gX_g!+Sjj4+^0P%UZY>XK1BG*k)Gmc zJ@jnEO}*+yac*muPK4x_La-U<-OigMKksXG6RbaB)UfLHQS=4e!rFbhfZ)MRfc;`T znWe;p)k@gdo7>@SC(l5gT-F*)4~1C-vd8o2gJ$0-{#Y4Rduabv(20@bEs6Rt&j*c2 z>_k+S1K*1)9Qy}SnM`6WkWzGsB&#vf##F@-`efE8`yh&#>ivOeCxA;pXIFo|jpJCt!xQ z)GNI%f0^WhGC(V?lid-&8Cv30SjW2$zFB-;Dl+OOH-96_Q7iM3!7qK=x0Zu|2!bT! zaa-s69YC0cl1fzZn}zZceU=LA37^OGz|qyur!5&WPS6kMj_PjTTkK$FCH>Zr2F>$g z%ZGgISud+3pHgt{o4kx|&lQ4c4)N2?b?$}$3J0;3Bay;Auk0_=+z(mc{ec0$(f;G> z8FOWA$zPh*N^VKA&op+<rP;uoXYqQKSqxm3;aPK`b7)bct2un5Vk6?^A#n^^cD)h@^O99QAb_G9NWU9&i<{ z0Oq9_%haxq_~$v-ysOH7GEYv5?w)q+c6&ckmq+mQ9`$hLaTEBg;ER`cn{P9VWdk2mU>JS)P>idem}ZeiSUHw zk8@A8HbBM?eEsArW&l+iRs1oPF#75BV$?sCs{U$3G19;D-D8VfP+UC*aRJRoZ5^BeaC^SNa?5DiWQ3qW7A{6fKo-Su0L!LZLSTP$s|J50m! z=(^o+Mx(?2UJfI$AIz-my{eO`zns}u<~Iaicb#LZEd&``z~LHTq9% zpA*@1NiU+2)oi+v;@Bbg745_hpI zZw{>EdM^GdGV`i8tGl`X2*GfeMaj*E!Fhq2$3gS>!zf@wapl2iQU z4c9cWuHn&S)i&qZRk*2zEnn&YCh~QzHlMX)zC@S~;r$U0wp9-Yby|fEl#_}vhFC)p zf{H5G@bZOw=FtvM1;sjfEJ1%&-Pq}WQ1E-b50&HiV=Z5yaygx_0n5W^U?a;K1=Ij} zJw15oUAR60&_N=7*(iC04cygDLjQRgTVEb>OFRBvpU!g_BM2K~of!XDWlRo|i(9Pg z`AOh$<1fN1|5del3b&3*;EJJ7gboS<+|6LGGl$cuGYQTDY#hFIaOkQl97w;qwPcyx z_(rMO5Rhyd8O@Fcnguk-RsNGoD4_O7?(vx&^DTU|!uszCaN62kL5;9JOTfEtN`Iat_$mV8e0)Y#Rdg`7xKfczXRq^TYK?e zYEXC;U>A5%U6P8uAko*rp!8i6H^R=JnC2x;_lrvz8;*XybUu;Si%UD=V;-=g|FJ-r zOtxPK2)vEpe+x*x9Y6b@>ar*PPL0azLEh7NE-sG=Q1M-lLj=uf)3*;44R(Z3mSFTF3g~rx;ugK9ZnGD25yhDr7Q2J6Wi9eLR%^p^S#12flTLx*kyYfw;1LLM+t? zN`&)Nf5jRDvDANe@KwqzmC&WzT-hXjfARilME(`J83PHEF~IV1pk$feB7agrBKQt>C};c-tmeo!WS*| zQmqjS$kLxZ}!XNC@w( zwGgE{5xIA!5g9}a2ES_>M$8txmoAED%J=ZFc~|xgJm;Tf%+D*QT>H<*|LakAU-2cJ|37L5ZoRumYa{+&^I`=64_t*W$bEt2kt^d`q|15O} zXguxzYyAJ~`Tx77jSnE~dv?~lTC@IdRo|@)N*Gh;VYl`bYWC}l*DgK2uA_UMhOFnfL?|?eCWPrvt<^S~SjeCJQNurPB{@vSm%&6~tD40jz$u5{r z73KPO_X_|Wt6#N1^{-)hN6ULn>n6=x;f|t=!w!1h==Y6DhYVJM|L!4c_VXcCB_?|J zF8;HkPXQN6k40Tlj+zd2)n}ucEt8K-RaXp?8m@cT{kw`cf+N`Px zE&XnuTkiY0kN5fhf8TL@Kl~xr%$|MiSbOcY&U2m0PC@XetUOiY?Ou0|6DLby1=(<{AqV^7*|;9`&dFcE|4L&4ranso)bV-j^|W#Vl&(ltsR4`-^N zd2^HRZ4o&!atruJVR~RO(Lpxs*V^_%&gvA67f-?9Ivap8% zNzo_b#6D;$nn6phd%~)7$8>9qQJeZj2!v*3EA2=|Xo9;mjpe|@?xOo@4jr7~Rbf#g z>>hr9TK`y1D$1I-taN7Mk3C6eJ9{s(%IRLD*MZlE+D(}cHl=z8yfhPaNq0)@Q+V^u zcQ+!SfsOAJ&Qc!R7S)Q_Mm<^CL&`0=2T-(;=o7zbg4S6`LZi(4Bq<@MBSt9(hVUNp zgP8bV>(h$jY~jPq5^iqp3K1nG)(Dy7yiW{^11X|=+t~pZbjTmmT0P&M>r;Swx%!_d zaxJtmJ)@AevqK+#XUi2eJ8AxNABy0#c}q#qd4)gi8bTqND6`KkAsyJP>Fe{rYg5jyrge7J18d&MT$$JuZ;Uw15KCTER$Ssx1H z?~GVfL~|_bUc9s?z<_EmxpkrUbV5JA1N)}`{tLTHzRt`axzkNrL!&|6ByF?qB+jc; zyx{=S(RP7CkA8WiOb(yI!+%+np7^pKY5NLgosMSV_4g}g?!$yzAEb~M7HKSrsA_Cx zJgJv)4=U^s_XKvtAVE#Ri>G^(Q4x%c!@JG!x%12lag0EN%&tv^-+o;0-8CAjbr0Jr zWnEo%mPF^sU}}LbH;jG5VR^!<_4fKf)|2%S&g!eoFq~{HI~yv%4r#AmQKuhF7@_|T z`!4+T^5fsD47=WTAF|=Y1?GLxw(-U~$meoxFYf+zyiiJkUrs{?eQuHX0W=k2sO>86 zn)Yrlk7CEFoFohiIVos8e%`H`i?bG2tN?;xQ1h-k!=Q$>_`u}Jy!yVpTqy;$EyPn* zHst5azpJ-uW`wDm!6L7&1WA)$pRloTnuwPwhwEf5Z_I3$5HWFccVGKuKgLz(&fkF& zJY5LkRN&aknM^{|ECl)Ko@IK35)qEV+2`&_um*)5JP)8@j$%=|2HOR!nQ)V^C;T=v zTqjHPee`P>D-Q#T2(A>ujSTaFl$k_c6)d^Cf-HK|nYy>|&V6u^dMOtEY$@fz0#PFF zz6PTND>k*+jQy{H_q_!+mE0NuqvJkMJHh^%{7O3irDBZ9FUz?d++h`r-2v z>oZ*Q{MwQ}eZaC#YdM5RoDxg`*W=Sw)^{JAB~%xaI!0*#jQZ}h43i_BU$Nv*xZNo2 z6OT54(1cV_3i0BX_re zzQ%2p3uppGTa(`bvobH<$bmprnjTYWF{qg#Wh<^-aNKHd0#&-UWMdp2KQ5Qf?BTsAYB@8kL2OrMz^ye^Cce}xJ$XM}D;;nBf5??ji-&=#_!AUBvB;NDE(rvJ?5lTwYQL$0ThJ4yQzQW6AFCh`Gkzx#-o7C+ZHc z-3jAqb_ETrqsbR48{++=Y!?mH`a z-6WVV8}0A%ICV?IMUD@8-Plh$Wz4QuVV78UZy}J3BOM5@_tblN=BQ;U%6q-!SezqX z=(6qs*b=IBd_|5#(v$PGC!<9zh}O>Rf<&2Y=W&?R>T<(LkL)xfmg=aBzN?m>Q5pZe z)gEtpAP}F?y5jCQT%;W>^&!OS)XqxMhewE`MiIi1qH~E2$!*9SZi@)NVvq!bt<>!p zaKCwvhuPPa^%WY57}r^_=Nr`N0tGvh22)UZ*sypy^pPwOtdv}h2u|nLBBtO%R5{@> zZ!Qo^UWj_KJtU!9VYlpjQWqW5=zA{FY1OxW(|X^Ok>=~ta0wj9Z@Rj%vzOYAY6?03 z3=dvO6b=juT6SL?JJI-TH5&$2PEurn3zeL_+NaQZPG4X)-Ndl=Wl>+AR<}U~z!9*` z71@tfs!;J>J`|6?0tm^O3fmi7049VjhGUoZf#OYhxkb8le0+T31B5e$I$7L=&+#F9 zScIb&sFOSU`rNf_N6xr6QHnw`<6b-ZS-C6n9?fK05Ak!NoEQ3;w0k3-(OL$!^CY|` zg0Cp)=%8$dw3#WMlaNDY}))l>bR`TXr;w(Kas#g=5rtcL|8tgD=dP8HyqeyW%cy6OhBIy zX8pDgsxWchdlrgNNqg#^H-cxMOK_h8CR zB4`f~(6uMSKTc52Ty71a#`n3<)$Y#4htlr=!KF6PFm2DFeT=?qgdA5@6FC795z!b< z(kq`kI@#x_>M_uGxI67&X&yo?fF$OsY&^Rk6a&k4Ynkg#)&r9kX)KbA zJHGmPBj~VE2~nA4qwghJ3*!3#-jlpqH;}J$FYL*7Y(9{q0TS(O8~E<)?KtO;UV9m9 z)2*Ym2k3d8%!%XxT8}pJ&4UFCl(4mVg2TH*oN@#|tOtn68n#xDQ9WCT=e43RZefMo#5lb z)vzwk`d1}p^}XF8I?UZ6=+Z!%Lkkv59K*qCVEogL7`6$`^QCl;Kb9rDe92=zRyO~m zULeSH?%*cn3-$Ut^%Rec1~SXc(SHuo1%_}Hah>Ak0@Dw6{bdg$_6jUhmI^AyiP#=e z78RA2_K_C`=UD^D+d{vbx@lnZ7kN&3MZsA_INEi^GuPr|!>QyIE3<)lAy>4Lnu+Pd zWLgiR9f#pD7j#;F_w&Jl*h?O3E6#Pa^OjU}cY>f=m(U_)cdZpeQE5T7y@G=!d2Q*V|OmelVGrdFYIy!%p@jw%GOn}TNr z3$@P85A)SZt0(fvT}rW;;JP{+ad-KvB56uKL3jG9+)Ty?sUBPWovI^5b-5G@R1@~@ z=)N8mZ~-HAdN$Ml67UTs!EC}uo0xb~<)ND2Ig#)gquh{_QdHTn0EkwQq|05`tOat+(Qda7=E=_3bUj?YwYuKwxqwmQnSFoG z%oa=KT>zCun1l+pP_~EPm6(aMMn`~hK|=7ogIuzl55!sIsqCyQR23anpC>NG3)q+A z92Eq`QFkMTtB6cj!U9eAk|B5zRY)^Tl9u+HWE@|+!Z2g67;A64h!r7xU;B+twx6=5 zm|1_dW7S@ci>igc!mG(#@lh`k;82pctJCu&E8k)JC)8pQRnUIZp=y($ZDzyy zCg$esZ-7J^M#oO>sZdBKIu)zcit8{;fAc&7I4zQChpOa#$(1Zypl@s|2$JLyu4?+? zI&IWX9mE7JAv|tZJ}unB^aaY|f}$eSMQ!`CRiM?mYr?^!8P&V*Zdz`+3vKnOB^Ko< zqsgP%<*63CY3y&LKv2$Vy)mP49omQ-?x6UMBjiyPU{;c^}3fBwY!M7$xBDiv?8d8!?Yy*++dn zmV2A?cX8ysDy+R#+5WxXpV2ML=T!rjO9q**01>YW5sGSieGX^(nN!t0AYUY)V#JE$ z0eQ65at_z`@62SPd|k=RE>~TN9joU&d|Z0_JI~8kgRRt=lIXTL(7E#{bwFmdZYQZ0 z3~YJL`be3zt$<}2!IOpTG#jlSA4Bz6*sp=RQ(w9G_utlgHP3Jod~;JkVT^$aTc}^- zS_~vQhLEmy0#+_CFQ9j|)U70P$k-V#P=S->BZo0Y7`n4d;C_foIUi>C&x1P)w@^3G zNO)q|on>9k=WXHbV~wUG6LG0)@ls<*V~&vt5t(VZH5otXV`&cg+s>U*|mppl0_;;hFfxbAZ z1t&D=N1CwTg?~>s!^#PG3m}T6|k&Kb^Z}OoqG;)>`kH@K_yth=icB2J9C?tzN!d_3Bj+@OPfq zpwJ|vzX$4VamN>soC-<~HFO92L^NExW~wT`urKeQJ!# zCH}Ic9HUaThF!Tit|3#3m4TG{rHZVE=n2@gru6sBz(j$tmGA1hwlKc(gcUO(P(Z4t z5Xre9z52G|TD{KAt|K1Y%^Z<&)&SS`h_lZqMjEuX-111Uo8+j2IvE0n>c69d7YQsm z5te#TvUXm9_Jau9E^8-R8P=d^1;(Io3%^^dPx=Gdbc^sgSxLoi?>4tnY9`BUM+aa_ zo@nuKT@Q$1y&kWoF7J*aQaQ7z(PMjw4bag_IF(*q`pI_ZiHGyIE}yyeXe|TE=%G}T zLD5~ScBo}fdNyQ*cTVT{=&-XO;c?v7)vd#zXRJZn!{0m>2Q-$JGT&;DtO38=v8BNJ zj%A{T=`ZH8^QsOVP5pt4ExDg^YHV}Ei@gs$U^i6qn64h!N9sayhEyu-^wkjk?%)EN zV9rIgV<`~4a07F+KXHs9d3Hx-*8N-A29f<9&9~Jz0#*J{vckS*UcDX~jpMCCpW$P+ z=dSoG8iqd1Rmu7VS>zg44+I^iB(qRQkngMIfnF=-um)l)%fV02e^zQJvRHb26pqHjDFWe&r3@Fmao0H!O^fr_(c{SS~hs;_je*X=gV5&hhVOIb+8I9@u5*GrM`%bCstF#pxe2 zNdGc}q6LiMNO4rMsF@~9Ur==wy2^5uOKwJ6;QF_8^2_sx9u3ysm?}`DVaWTrZEt3X z4e*?~>VF%5rR`bOHblI&!ezMCRi}Opt>G05e}Dh-kJ%2r44oP%KgS^TorhPG=-6FW$Mx&M^-wt` z(ebKL;gmx5j90zOdM79 zk>Do~XSXJvWdCyg&cplZTZ8&;ICv|8O;7pCx#3*O!!yyz5&X0F57Vtzz}d^lD?ZHE9o@EG#_*Nf27tgaxWXG8 z<;s-#e|3--9~*>Z9RC>Zg4Q1?KeMdS|CFH*K2L0d#PX!YJUaaOd7G0c!@>!bucNhq z@SINRjIC3-cm*}Bm%Q1tqS|2ht2#KjT6cx2sx7Ct$m@ywHh1BY|6=@%LnTAwWlil35NUXiZ<@0@fAdFMc(-I z>5>;Dh5aL)wH7A3Wx+N%--$*-hzZNY`Cct`qZESqqSzhCa%;_7^Zx2hc_R9Ok*RaJqKzK&p+iStVP^LT$88n#{9Jt_o#2fA`tB;RHg}6g^(Vie_tRUR znexw&acb%R)D>UVFa#z4dw(7lXhKreun#8aE;@3p1Z?$`V zuT3Xz@4foXgq`Kj4xQLA8F5d3tK1#D?-tCYhniDdIdnH6v^^`A?SfwP1;|;rQS+HA zxmhfSdt1bDZ!6x+D$eT-CSMxxfxOHc!tZS`_C}$Ty*E8go0*8Ur@R|)N}fZuQ9hhb z((om&BC*(eup9klrsvU| z;Gp4=kyP}eT4tdedhr#y*3-bK9iy5xUW$_PJ-65CyimJ6DT^OjNL_EFWjXwz7yqkS z2p{)6^^j7L*|5C2*hg*k*O7?emhka&iD+;6I!Q+_yUpG2cG`phAbG`?Y+CdZ4;o)#oF1Gl1$8f_Usv^u{R0`I8b8*Q*3f3UBn2{!h(sl83 zB<}gGw6H+VPUHQ1()BU@j?2~D0^jZ zsA)#JNss8Xm6vg3i&mg1S*9kd)$UYGpW|wNN!NPK3?e53$5}Bpm9Qx0qKmMUXSJty zb#f|{3So1@gZpAM2xkuI&)i|brY;3?pu=0~e8vEiM6~tA;nv{j*_r;Sta9c;KGej< ztq%$hgP^?wHN6E}#f#Qt#vhzrf4nk5-NuB#_WMJ`;`pr1?4H*MS92bT|P?pz3#)9V0J-ojNtFx z9RaQs2Zi`Un{|S2E$d)?aBg^V^eAeW!&?h`dzSKI^j%~psr~N?1E}8LH80?h< z>moJM^x@nBdJ_b6-q{qcHACdGJ}xf2A(OoUCF7LKpx=S0*{!P+!GWZDBp~MwUTGE> zq_EjxCR!lMgr`Kpo0&_@G-_Y|?pM(o1ktwN`i>aq05Ke{J26*{m6(M!w=IXOdNQ(w zpjUL>n@tC}dwGtlwLjgs)iS5@1F27{kvA2^^AAVnWv3MV?&#Jba|3Z#7MMK^?6qCt z>zSRBd%7v|K3sXxKTds!8=R8QC7!ZPex@k_+*E3TIlQ60!jrJ(x!A}R82oSxt!k)WYLoK$4eDuj=? zHf91c3x=${w@p*>1D&rP_(4~(r4zdTq$;J}?EcU)zaFFg$;g$TKgSc*3$VfPV8a?^ zu{NB`^@l&sc)H2Cq4s}gr-eomk}_8YDiZvdg{n()emQiOSe=8X3m8(nPNPcFFDBO6p6W-!!AGCKa- z$p5jBRgV7Za;uQcaXh}wa8V8(Z{{+C^Ko&tbLb`|o_*U+=4I$*$02w)WSd~!{`dt9 zQRmDjt^*zJq4?`M^^yOkG&MM$qnH>X=Lz-*;a0mJXPM`*pL9hfTS|T zv(Y!A7*&m42=cBpZ8nYs1-4l9$OOUd{Oig9e#bbuZSU#G78FF9;%`Vg^5qex$opD- zdP&Cll`uM`F)u#pVpyS8;S>{(dYwGipv$+{o6PgwSG??|WZFRhV5^rVT?D!I(tp9i zAIia%dgb>0#`@@qo%Z18{kaX`tZCKcgN8* z;+Zt=e3~2iye~zq1=oJb#!O9iLn7Un3tUH>%L$ePWQmc{Lkuwn4W7!>g84OMB*&}i z;wU1sxOe)aiXKZ^Yy0yUK(0OR8ln%Op2hl^yG6rj5g zP4|&UcW*AeBRnX1t|od>xLk7qh7)n26xShOwIuqJ?|*&s)6okB_u?j?(j+5w#^;G5 zqt04KLcP2Qk}Os&bEn7;>co~i_C$D@Lti!B=m^Ott{M<0{mG5yeSDgZy_+2&T%*PM zgPxc*9&H~gaiLcr@hYA0V;i~;=Qk?O(>qa5=(U=)P_kFJ&N`3i64*8EVHj0=%C5VJ za8@)`vgCIs3id*GrhzQ6*kHGMnU+ zA4#u<=By^9ik`VT!vA7tr~ngBYy%m~eG8lK`O!HhI(IH9e5UD`aICvhJttf=^-*xA z)DOCo@o_quLrZ9DGYP3o?u<*YXmsGHH)`vY>k{5ydOfRZm1}NlN07Uyr*Pi~S8If; zcJU3OMPHsf#(v5D{DYKmomL4(8f&oea-_Alg{H&jXPdvqisu)2!^5*{mqhMQ)O%0A z-AIJm-#-ot(6l@MBS|TJxMNj@KqhKXXUGt3jyV zSnah468R9mpD27Ud*bD7f9tgFkc%6Ge>p*y=aM5tm~Zp`waMr?FQ@xUp_0jBx5P=b z))(cb`?a|_6nz#JW(Wh`5!?5mlVEyiZY{q-Scx-urZ|_KFLY<}B(ZZXbG7nGHSOuaznx zpxiH1gicq4Kz|?wU55*_mgt|+_3>Iv`$}xvGVB-(YBGCQJ$)yv`kZiZ<2AEmYrk!x zca+z!X}S!@W7uLw6dCPuveze8Iex>vw|_x7`h*DP=?5t}?st+ZWO7^IC0)i?u9o|K zNzMPVuC%-7b;m;Zl3l2`?I8ryH}bU*U1KxV^g-Ay4(h zhBR)&QWW~P)F)P(JW{TWhE2F#4JyuMWKoE_^}ea6j(=|GrhlF*GKI-%*231{#^z(M zt$r=SZe|oYTr;8c;~JB(do+6xRODlcc{h6fK&trT)*_2OMqCGx1D`sV#sNgo_iWV4 zbZPxdWTAt>aKv0@e)WblIo#uzV-s2RbfXXdloPWl*n=ko zgzWAh2Eo;Sjz zP%Xc#hdw=zhIzP!I>RhAbMB{Q_@<$xtEnm(kz2jPO1Lw-Iea!n6@Xf7vu#cEo3O?Tnfi4XK;;_U?1!&}rXK*xT9%>DEiQiM=qQ1}j%o zO8}2a|7|Cqqh0g1>PfD?QYlo~s8!?0=Do3Sf|X=Kw+Sz|q3U^Wl>!W~0M18NXE!^Q zfUP)3XNcEkWg1;z9XSdnQ?J%R%>SmNEbA-kaSZFVR>?#ft}O06X?SRdzGrASx9PIO z6O{1Vl-Ue5xpy>Eruf7yK8fPs)=r(m1W2LU^`*)u-}6e^tXgkjmD^dpIQ?foi_&{t zX6zPu;44U(B5&yUrS7tn6S1wm0WvZ%?{bV{4*z91W^{3TwgL?j-z6X;sqNAu!{YD7 zoXZh=9Q(lJ%eWT42qcd*DJ#hA*^D2Fq+p#*mt}7k)lGmKhCkFz*`>i;0=t>PHe#wX za-k1*UvCdk;qO6{1f9$(xq9~E0xPKUCpvpa68UmWLZ7n2N-b2r-TrcTWqUj8&0xMx z$*b0UuCkQ#S#jhTbmbJ0XDEpIB(Ba~Uhtkb2r#Xz+Y`@85wB3r6iMEy$w3$AX%$}E ztjZII?0Z>So!hIqkSR@|P>g;y?UA#iyQZb{@ z_VngAGUA_R-_Pr&A^d}DID1)Aj-FGGn?)%JvH;C#_aKQ((fqV78CAW@#6#oQcGsBD zqkRcFSq}nZ4=Dxg^xC}lfYkz#DXq4}6s~M*@fz3VTs_4mKBY%Nty`+2G)^54Qtfmn zBjLLY3peLzE8ro_RDDN4I-77%9g-s7=Yr*yB2u;g@N#{~B&;12uAuEZ05!uSz(2wK z{WSiUfI{8up>oa2TIY*2h{L|Y94I~wp<)_Pd?^O%BhDhN!KSY%+ zW@sa|#Z35E29EbGT~HI4T^S49Vg~p7+L|`O6HTwC;!20E!)wkVTnz_~4RT`eh2ku` zoVZQ*vL@mj!-iby*sXJ#ZraahyJT^I0D!byLyHin;MA zKjU_pIJ}dz2@n$*?C|fE2JHB{?GZUdhRoZGSBnKqiX`u=P-yLR@8zMZF-MB1>bRZN ziHVgonJ8xC4~oXkyomqY8B@hJ`lE*vt$MZ`zxWaM)VSH>xA!)h_O9P3a?Ab(J>3&yT3d$)^V$Xjs}GndrGXuFqyGvbj-!JnZiL# z=XI|8S+2r=d9G$+a4Bl+Y#)bDM8iOg(CkU9`0KVk#1=LfEi#B`ljpd9d?^`SG;o(b z?3yL_zH`pPNPdXtRx*=RJSM?=)2Jt?Mh+a#VsLZ)o%jw|rC_!^j<3ee_Le%bBT8l> zrcI7C$aHo4Y}KCMtNs1wdfZz(3Dt&H>qlL3bGLsIp8{u=7{bZLQ4B8N{;yM$p2!*< ziap5=nzFan#i|{0Nu8_yURSeH(Dkf5CKIF6I zvx_4V9Y?h7Wva54YlRmH|ZDP4}ZHz2Q1r-zzJl^l&vfw-ou$dM`KbJW`DHonb-Dmx<)xejbAp)*8sq zyM>jfw)d9YVRpoSEFU-CKPZ2O$38PuZ_&3ItyVUuml@>bE8*^f4(xm2}T zSh67~mYq5sMjm`j+l7D_>~l|YX%Lk-F~Uz#D{x7Vg04j1K2l5 z0wsq>0W*!>rKt5Xw`>G&j`E|?iy4;8SLgB)cz z9vUmc@*+d3KkxNB#aPnV;<+{+4Xm1FGUKEM>=nNwEDy>pq6RgfdNn7}J2&~g5)AdU z(*Ev`x7P{wJQ@39ITxE6L4l4}_LO0Z8Rdl00P;b}d(CQi%p;$^z>!MU)RVXhlC7IuA zCEZ^c3<*BMZ8B zn7nOT`P^z^IMymi3We5Z(Bz635C-A(h?U7Yw5I36#Y|0xl*ko3ijW$k>}~Jag}HpY zCT})1;QDX@g=2UhQhue1TfbWUBxmuxGQWCMN~nA)vz;kM?`96wpb7oQM+Qf~zFs%B zg&M)$WyhCQ!*jQ!v(w4rt4*@1Ijrgw6NQG+1Ct|u3wg|#0>iO#sfA!lo~6+oIro5T zG&YN|ag{y_I0?L^a7(Iu0j4iw2Ni~y2{Lq?jXhkzpaKT+a_(wWb-R^7wCZuH=KY#* zQ5ja&;hW)x$3u*A$N57aif8%#i+i_SGQiUpdn*no5LIwMXbAE|hMiqOUdh6`)W9^5fgZQPnjoARToV zPrmS`8t=L?CWtOi+3QPYqq@WqH7)HQM?HH(`r=uT+qAUvgLd{Gu4nl(Ij+X)u>r98 zvy1!@@3wNg;oOu?DPesiL!a2_wVo6y%F3Q|?6sUNUc5$T7qehDPmXc+1E^aV%gU8M zI7dkWOBMuMlXOvjB%x4L!GKk~a;9xA+Y)kCouM;9xPQRw>5I7$trV- z(eqp`Q)#}x-i7r-JG7T3zu+cDK@jtLm#*DmRTAD!_Z%S!+uAQ2Fh!2$(2TMtnPa_3 z%Wm{FZhW_C`C-@k;Jg*-*$QQ|CMdYN=VNHS3-!EXlQ~-53R$vtvSIPx?hoF+T+8QD zaspsq8ZI9rnzH|p=L3tH_fd@$mrEV>Ui*8Ra+0jQ zG#kc6j%s2pH4>StA0PW$X^@9ph|5APkL&_msn+mtH%zIURA)l{;)Z~eiZ$Y{#Je^C z9#&9k(sRT1bu#egu;wtBJ=wk#{`IP*##vZBou1I7RO30A8@`Vjp99r9kZM#SbkS zfdcb-{{fO%EmTZ+6OV?FIUAg$4}c{NM`rUA+ny z`?(^K904M()j_7a>$6bfrpu$Vd{o3dWO4Z; z|K7B0WzFczA1In1p{buT@)u;Do`diwaWoMtbK?%kkEFYA%(>RGmEBVHg?zsybH)QW zQL|G)+~xKqFV~EVTnkGslxP;{ekh|(b6y%!sdia3yA-VC49TpkH_Bxda9h!P^s8e0 z8Rr}?s}~SXUyg-Qx#jV*c+DGt>=2MMC|n1*VJrZ0fR@CS=a29V{}}=0%vQS;`37^2L1deVAvQ{Zi2=U_ zm=HDOea5=-I0d7g9j5i3CWajICpX=8IHKlYWJ3d{R52Yi@g0{-+a}9pD8LF07(2zIbcp1-+q=C&fdk`;} zQ=X6`OaW;9gWV;E@ste5wiS2nDEKfH56z!DxzG&&nGet~x?(9uAE}(lZ?Br-B;rdyAusPq?VV*0-t*%~-ybu!oRw!EL z_$!k%|`~%h-FJNQVrkRgefo z+b_2b)2^OB{)t>|u|=hsN#g!EBoehDXNuR|LsCl5Z9@`RQ0mOLm0VAm<1HF6^6i%2l)ylN@(A{@u;QTr3^QDi=|(u0Xa zGjVDpSg2hJ>78G1kGUdO@yIs*kaLcQGMakDE$*odz;U|6BaRA$Gc%*wNo)B!Pw&*( z{(z%mHJQpY`x8(~q`Cv+PS)zw-5+GwG2g@fYWe135u|;@v(~|gGAY4w_%NKMuk|Vq z?GWX_Qyk~B`ik%6jH9_?qUgtFL8aCzcf)fYnhCc=cDy%X?*23n)jLIo_4t8zii4d0 zDy;;DZoY~u7+}TQXu+Jwo6ov_=CzAmcvEF0g?hY}EL09hwLqv}$a@hSn0K2f!TAj95xFxd z`P=9xJOF=Z75W+m=`}fg$%$2ms0nnKv&>=>;8ojpvHv0VfaN?UTBR297aQSmQQJ7q z5s#d8ROwb7#T9@2*htdW7uR7cW7hN_YOqbTituz%>zuH=IAWq@m;n7zr&zk@`pK&J zb^f3U3ZxZ?&DfFANF*kk&3(POaMv{eCv;1Cf_4j#%!u+*f4NOs36W}7=jx3oRLcPN zK~aFaQLCyTJ{+n-46TrUs#ol6Ak}j5&nfr{j2)`sT?gRJ>A4QWi%r`j!`&Y4FO|%K zLV;J^6N&$0Xl^|5wP=EpVR-F^%^Thy?B@v;=PzgYwE7_(GUGl>W} zF4rDUE^nt*s}njaxOH6K_BpZX+R&jP_(J{-%s*<;eZPa>m|7&X9OGYlJ-Ylq2daao3Gg`j0pmRtfUJ=ckOXbH2*P zs6f-F;4tVozo+$isebFUKhqm(FShg(OD7kcn{(|)2_HIu;p&e>|eJNj8i)-T8 zfW6me)Ogj*vZX&mA!l`x@M6jkSJy1^yIW_LV^*Ee8p3_k| z3p!jCT}H0?+WQbtm&iVso@^rlFZxKzn=^k+#o5b@|Fy{evnpin-B5(^48ZM(d7fx4 zLOTV>Vz53w-O(uG^%;EgE)r(g^G@m+;Z4?0!AC+BT+z5uM7!`Zma_;tI9{jP{9syl zzMQ@ndM^|;cI9kDM>zc{RFxhDHc_NZRQ>6;lBPN=5tEm?zwuze^yzJ$*{4?|81rVYlb!OC`{+*yNPe9KryY17@>&bhJD_VO z%+OP#D+hp?J!i zJ44XU2o|$^+~$S9_S9?9vKudmb?3C5HXMSUub9-Kk?_o<_D=e(1cV-e-Cemb9G!jb zg9_F;VScYzy=Om#6<6NRU;R5Eqz<^2d8VKLhGSaFH*#8`PWYvh!a{r%e5j=OZ4Y_b zYiA2TW)Q?ObB1j`*cz+a(e9Qxol((`)qwo0L7}sE>O+0h1xE_!Dj?ogk+f`%Ay8uO z-3o&4Q|D{J`fDpSuXvG(U$LIkb5a=hW|!Ti_@^^;V8j+oeigD>#l~_v<7Wu^6@b00ChQeCHE#W~jT|qVJq!xo z=FKL0UMSS=wiXo5ynZ@%M4;yxXM`$FxuEpB=W7X2RT9~bPPoKY)j=GZpU$(n@C8J? z|EdNpnj3ve#69D?K>u1^mLmcZi@brWjo5Y3w=mV#>{^DN#(C6C)DkS6@Ldc z`Oe?yR{g5K8pw(i+8(1k{r(W>OLI%m$mzSEGWh;t6Y9uB4JM1%_9r`jh-=oaWE_pZdJ~;BnDfD*x%I{i_S2NU%h(J^par|Nb+d z6z1-kGZ&0bRDDm8p=St+NJLqKLMX~kkLQ1$oP4(!0S*`>7vz+o_wP5n@je7RwyDj& z`tRQTd6n=CFpkB}&r#(6yO%cLvGKH&?dgeq`tJYff-mIc2*@DDmj3I{fB%s23D26E z+v*hG^zXqnWdq9+qOYBKy666k+n=7@pazeJUaI`ztpEGFqV#gWF@ev2fpY%up8bDo z+W(o*|9`6guXXtU>Z!K8i{R9$W++!t``_#I41vr^83@yl&i@pK`0vdUjym5X&We?H z4yO9sUjO$7rjG~q3*Ku5rTE|Lyb^4K=8({L|7}G6Sxv(ySA}@ZsH*>N@*gAC1nfG* z&YAbrPXF`d#Zm5=YF2WMuSi0)HAo&4*+=Jy!a0OF-9)AVN4I;##LP_D z#5r_XUssDwVC>c}mVG#Qd8Jk6M^}x6M1Sk&(=?qgIJ;swpGclb`@O|( z^CUQ1UR5A@E=c;8!W0#n!E!idCbDL9F3)A96frn>uPsO8^cJ5eYs@M<^D1Tic%Fgx zjiuv1U=*6S$1#{b@fevZ%w6YLl=b;5(Nt!qMAq=O&KzeQ#ds*qSMp$pw0-*NwyoDI zg`#j?-)9QRGy)tvBKT|yQNnuk{lP)XCm&->cE!J{TrRkYJzMZWYC^fCG1_GG1-&g_ z_fQwEFh+!SEQ-PT9_!fw7wUsMXRh>ca-3UDDFh$iyW1ps=@()xi(hUMsC9qDiB}1TXHqOkGcg8K#hZYzIej% zAXxZ~^lf?!0`U)G652CJh~|2@jq{=J7@5fuHL9(9C6gdS86h>HHNiACnDHZyv3MON z&UBiml9%U*XCJ;bI=8N^*9=9DxN+h~3M9-K^nuo9kHx-%FDpl9`F*!6J{--{9WP=Z zLS7|4d}w^HJ@hS?LqVZ1Ie)%rugX0 znNKs_BkP>DFWylHqc+~7BT=NYp=`EuvU5TgJ-K*QKQ||V@K7gs*9*43+(B}t+1cI} z+DpO_!fO%+AIm&DPbxKNRSYIag5u-cEpaCfWMM};UQKPC&BXuY%~#~T`wW$L0sUN* z77IO$7LVTh6o*Gz z0cz>BL;MHyuGsb76<^thsBgb9zemK?PRYU3(N0%v#7Y%mP+I&p?{03JdGU+m#6p8@ z4bme*rk}SH{RNtcDSbMZP~2|`Q|SA|EU7wEd6G89q0@N;~8d|Z5>R$07;Re@IA zv|jnXIsTY%jzumc+kQ;>2l0mN7&t3MsqyP&z>BY%!{?^mWrEYMwYazMZX<8|Vu-iC z^3A;eBlhd7v{#w7s!I$>&nq+R*NE294_==lpBlTyFiMWfMC5i)B<5?TxoGT1zY8T1 zw}@2BP%9X}lb@tTT9#}PQI=nZTJArMQ&wr70;omPz17=n;!UxGlO4}m*M{zIXl%kA zN*rVyoHolg95&52Yt|Uo6h`xhgmNGQZLw-{)LenppQ=+_B2LU=hdBl~hKt%;%>#9* zj`ToP$tlTFg&LsHIox{1Q`QU73(E`Q)5B8)t_`Yyuc>b~1_kcGJqO$gG8nfGN1gNW z&j#ZpN&IWA_PRLLLvF z^&X~dOuNBKQIAHZ=Ir-?@Atpo8h!jd|9eV*UjNVjTm46|v%Hz{rTwG*>Iu9FVC`6q zPvx;@uMBq+7}~?F;>@bY_Yzm!SS)fSa{bi~)duqq^IXQ!$GOK5mZm%nJj#~LRt1nd zR#O#I6%>|*lW9%`PH|4ePBGiB+<4uJFEGw32>l4DAJ9KwemP^O1YUTyEb!x*po0;A zfB*^qxIhD?*%MBe<)AR=3r~AH#~$11RR?h7QP$8a|F7gP>zyj>NuH@c)2ETApnB>$ zuof^}sI#Rj!A7BLUCXJ{S;OXF<#e&J3S+d-b=-8Qv{7`_-|-C2Zw-0NZjgeYbv}0M z+cn)gURz&{gI3S)v$JCsS{5eh4R&gT5`LcISuL)NEGL4y(D%KWev(HzK zAi`3rQ-(mb^IRd1nSx7$_ebWt^;f;Ru7oBc1cKe@kZ`c@9^#qe{=oZ$&x-F(U_=s0 zs6zCEYKvU(!LbC^eR@_S$*#xAgo;hMtB!#+fdjXMsJ>H^P^eKKJ)B|FA(?wdLPf#v zG&bo*7)==d;Z%yCJ6MO01XC2g!rc*jG=u3he{z584nY4n9&!CZ=wfr%cZ2qU9;=GlK7a(5y! zJu-nPk4Y}U#vr}9E%D<^reUUCCMqe0494#;xr>(2W%WI`5ofc_(HeA}X02wNv=p22 zBIV`BT8~_#VMz^~7cK$dj+{>ea`Sy7eQZzEb=W+kh{MYtZF)Ev9ffSoCAK9BRWHx{ zm}$LZC0((6V6@ad%2+AA{Y`S@=uxurxZHZQe|JSM;R%|HcrxDkFzD6fL(kWOeTeSD zt}701gAH#ptx@_>t?v+deZdP&F{Ng9E;yMFSwEM&$w0!14T;Hk^i+TTS9E()!OKU-nelX==`@G){q=BoAGeTxBrRv7 zu6LgO32JZo8O)%-@Qpz~8Y^1o+~vohu?T`qUVpl#&uXe;rEMjAg;)K!AWJK|yyVTw zlB%KlSpK_wHoXg{{tRwX8C@CP{BH%Sc~=qg(#nvY^>??6Ayv*k;M|r$Djm{Tm zxGlKqyr))qvqojvMx&X4epObYfwFVUa#&|&X5t$U*DL(2pq>U-uj^;uxOE>D3eXEo z=f2HRP_I>2-+4MyorjVC>W2eNX3EhR+yo2I3Y>oI7V9Q+gC$oMciweIxz6 z#$~7Mg7)&2p~8YjYI8@6{Ms$g>cybWsE&#!R-FZ>dO|Lb7pA8+>o#MoC1y-($-MTD z@*}mkEBz}|=DVx+H!9EPh3AvpLTw6<8&)qyFaz+Jy(%v}6l=B5h69oDwW-^MYtA&_ z$L&NX5$ser$s@_p!dxzD7c(a=$|g$CD3Ok<;oaw_Kke0rQ?$Gayv+9Z7W6&6${j*C zVa^P0V`r}26c@Y#b2W8Vu#X!qa5ls&6wyv|Afn=7; z7NmY|W=`*se#g!kant)f(+Iq#O)8|JaHKa)_75EEHryxG3C7|o@V&46R86`Xeqx9| zKF^jPE~P47KeBt-}Cns;NU0-1;UFUV-4wd^72T`z%dpQ8ZtQ&I&g#xyoHe|{yKhv z{16HCxAQ1SNP%WZXn*`h0r#=#6^O?!P`gSQ|@2O&DT z>yG|<{XR}(XS09xWNrV)w15e+UteK=!p6b=*S&$Jg4bvHmCT%tEj1;~tbjEG`VfA? z#l<1`TZ4aG`d63#)>PwPO*#3V^89<#f4lVermFVFcH$r_pi>9oe{I+wjsN}P9}NZB zujl@6Yw`O+|8^EwY2jOf?0@Z=@GWA4>`q`KDa|AlRe*0m%&uQ3alpsJ-@k!lRI(MR zkG)z*NTNv65@IUO$Xm16&crKISGxk(eqTgU3@{&^<)o&jKBP^3pY`FaVH2XJopWUF zVR6+^-oREK+`BTJJN|Qzjx7sV%k)&*EY=d7lI%Q#KqPPa<*OOgK@KjHfo^H3F6m>1 zLXLgG{l23(zTFMJMti30F1Z#zBor*#_y6&Sk{Mb!mXCV8Tl+`4|GYKOA_Pgyh8hix zKokl2KmAAz$0k54G~)d%J8%ib8I`1N) z2yYaCL;@7FM#T@1KFMFqR+JJY>i#_i0>Kc9Zh{-PBPR!GaVN*sq?+x<%fHX}A&a*J zu0IhPd#*sA=Z#B{MUS%U*N9I{EPdJR&KIjsi;nL9T1n$!;4S>_v~+Pg$*AS$eDc5O zLX`I57y9tTNY?I0-LD@rNam=@hW*$W%WyS!90jS7a521o2&~2v-r%ttS3`x^`zOi* z`SE~&jndv8o>T+&d#?MxvTsb`qK-sZin6))&A4ADy?4&DQ1lahCI;64e{j6H2V`#z zWE)*@lgi~>CA%VCEI}1mMqWPZhl(=tS1oSoCu1D1!{r@6Ez?W;@$%F|yE%VlY6uG7 zd-B+=DTUpo=BM}nE~Vfp=1nP>C2~0^Z63?v?CKi-gCnZN(1YiWg>D^GW!e_4S(c^K z>Bf=KSBVND!)InZ8aC$5%*?{|xKt1N`-9w}GNTIC}|oCFF4GxhSUH`-byZnPkr_ZCsXy0wV-TML+pI0_2V(OP8O@vK_p$XwTezVQ=6U@ubFbA zGvbt-^P0eUj>axRXwAK^BRp=D7bS0$;pYp}g(7ZTqx$41&g}23S zGH8FD!@5P;$Nq&%z)9A?AT8tj_xQ%9cs6ph!VesPVlB+3eZI-Ai!u;?AVHcKt&C~{ z!=^X=nsO81V5eDDA`JXOsaMi$aicd{(LXz7)=4dbf3ch|mfe$osMEUOSi<>M4P%0c zEp%f6@Vw`*7H(F6ht;BG1XV{6$6BYIsJc4T^!sqvDMvYmpp0;u508H(Nyun)Xn0i2 z;Z_hZzX80gjQl&Na{$*1)cN7(^TVePoTyDO#o*7!O#qh5+m7fV8yZX{nZqkUDM(_&Xe|2Rpv+Ey)albr>+sh(+7gg{ z@}rT`m-NyZo=3xtKeXlyYyi~bapO{`-0}YTe4)m30#=_++&nrmLSJjWEn{b#$oMN` zt5l6%u`~GqP(xq_d40Es391xU$gbC|I)!Y2`iAK&pb=u>cyE5QXuRNa=9pJW=yJ@F ze`>Cz`1*CYA^{Z}(G6|>T8u0W)T&UNmd*#b&Ng<200{`SJvU z@wYE-oF4rbQ4N9_jHbhG0sWVqo}yH{A-b*iC$w`_ z6~ngp<5u*D0gpN6^}Xu(4JzO*!$zQpHKT^A$fMds!JDVvT^Kw1@~=7E08N-Po4zMn z*>$O47DhOhF<0q*RJ+2U(|Uqm^utReiBy#nCvFkeiZEDSv{ z#D~^jpcW()<~O-Mncvm84B*pvLA+?xw>>c-46o?1^zJYw3b%9Q3CJXxL=zPW`#Gkj}{_gVwlLt zu06KA&wcnCxWHrxq7U1rlcoLH4O@nc7E<~i*TW5e6ZgHvRhoF95wt+((Zh#N@5AMY zFbWr`27U=S-Z$BjboC0*Lcm+m=3}oj6x1H?Pv}G>abaP!)Mu*314i1WkoM-K0tEAV z6Ui^fr6B;7bY`77n_=sbneI2bKy%|r|D3(|EyAHe8y@TF4a!!!x=X_ zr~1mmDr298cI@Jt6cEY#7C})OXeZ4og%`_np-F5CaDsqKk-2cyoLgjl@GEGCrShd| z+^TkYq1iWFQ$W}Jhm>RRi3E<%rdjmU+X@7AKGtl3HwK`z(ZKRTo&dY)Z81N0foXKr zAegPnGFulGYY@owOXVQQ67`;SJCd=631`P8Gm>#zJi8B~I0zxZsN7r|@1p5l<*LLAQxW#O6onstT0S`e8_Tsx)-qTQcj;++E zf9ni1U;Q#eDnokdl#uwr)=}TDqwo*_m4}qtj;bl|Y17v#Sn*;Nm>7sJI!(E~Q$a(R z7v1Q}L>cY?lIqqoSApwek#cG3);XYKVmiu*yT$mwbazMeO>4*F0nH!z%Dwp*1%PG{ z=wPC?&Nn-i$)?dcKP5Z8akyDX$RM~jJ_y2dZUrSrnMg^hJ>oZ!Ca6QPCJFo{SbB>! zN$Z@Hcbf?-IWcj|ufgix3`WED42*fQ;-Z;P44#T*b>n7S?_VRZe-*gzA$(#sENzrG ziSnQ4q~rkF;-icHGnN}~)+2d+vjHn@C}2*y*^~)qn4kPox9%v?Y7yuR{!qi5Sb2jE zf1};$C$EBrubhVeXOI8xt^Q>=aJ?|00J^ge;I$fQswKPGvi*hDiCN$HE@;NBTf-q3 zP1`z(Pgsws#!2faeHO$Br*Bjzl`q%o)C#FcvOos}QZ110Mj!D<+b@$S?dZ*iTUHB@ zfo)=RNX5JW6Bcaj4cX0;xLf26a=TSw1x0Jlzx_)Uq1{Et@?p9Z`g!|CX?aM5OmSQx zP-+EqXKft&OUwC@U6ep@cZbmA!(aI9-wds29M)u0WyC@ce zx3oM5%zvYO3t-$}4Yo!%iq#(mgI2ByqnXQM;yQ3)+DZb%9HR-_Cop?B@TQJL8SWM} zgMa(3+_*E`J&A+<5_XWHBK_g!_Io6r;s)52Ixp+b#FK zTg>+9LSoP_q(+qE+)7dW%!*)U0NY2KYV}6p`ZHR7d&>{K*$Qc9px%4Ow0F25g!X^5 z3(rDO<_M~&$06g!01UGCS?JtdwR!lf#Y(-4(s>7MqSeKB(IAisi~9R>nj1y9Q?rTU zd|Vt-0WMes0rx%Wu(=ta{ILnCktn-p&5jd1a4YDM4d5NFN2BI*vwjdie1kfFbk`Dc zEufD7KLXnH1m{{n!>rt{wFd8(Qu)7V&9isEX-$dMP9km9n8E)brN8eSeFH#vwl%&? zynplJu>$jRdReiZ1fU=OU%CYEHd0!wsVl)+Y+}+q7oYmpVQ_IBn&hF1KKM^yra6 z?wa`j9Itp2sl2B>#&%t*w!A?DyoqF+0MDliNc8pmB?h?5F5s>BvSz*u<5RWJGgz%+ zvdbzNNd3o(ac{JAX9(5T0uyv#dzDB~5_mz;NdjOQUkuy~>)D#{$Ve5ET4S0Umzh>c z0oaCK?C9AWC;UYUrJ1(mH*x(s{{+|j>SWL>L^w5;dwfdZ6K$2c?@IVB%lQV~j!$F{ zb{el5Oxuq)3u+ZykG3>NW;v*lHllDA2BefzTA|2|P{ARyNMYYB21?la`ha>+6$) z)}3UYqp>v{zm4NC@YzAaAz}Sepj&%-Y8oj=Dw1fZNNu1z>N*> z5I}FX`DDMjv8dmzM2Ht4Wlz`V8yiQnRhwI%&t5+MY2o6#03=sOQpHf`0}%6GUM-l- zCIg9&3hVZQOp#nxdZHT4TV`v);2af;ZyWFW<4+1ccNAmi>D6mZmzfb2N4V|Ot0#M& zu7B|4PscHpTnT&dvZu;+DUn?#PrGt}YNp6QoJ`QUXvTTzv)!BvluzHzMqSMtJ)hC5 zomjoCE_}1Fac zDPl@-FF7;zUb2-OMoBf#8<%Ux*_tlzbqPrEx;*)bnLvn&iRWOM+Y>{6fbfW6(=}Qj z`SzsnYX0i7a>llkY9#B`gJJeOXC~Yp&|KXVqKW|Bm7QkVyq807x<|TS3!rLN*FduTAuX)$i6~DTN44WFu{`%P>@k(d%?HCB&(t zIzu%|-+2+eyRYw)eQ+CUJ4Dq zEcbb=UGYr+U~Mcn#&qFq_$5u$)k(_LF0R*d*vmwY`of48+2|!DCGK9Ti*Y8&VQ#Q$ zrq#=1A%cj&sDDZApGQsD_eBZL{i^zP=x^{RO&{3+cQW4myAS9T2_;6Ga3pW|Y{HM2 zvHzyu=m}H8)ixheWXdmt0P+h?Vw>^0y4XWBUd^s+S6Ie5Y)uYVUOMw0oD3lyXoKoi zH^k>%ph6`&BuF12Y{~!Pt9!mla~Dr>UeJ?zFzg;TVjsn1GGL zVw{z!tKJO(gCpA>6lj*d(5kQ`>+EWy;`BUsCgXR=+CdWl!&u1)0MNI0Xd%v`+IV@o zxwny9C?g{iG-KEM*lxd{R~Fd*2OT4??7T04Y=En44XD-GXW^V09ya{{EexHvd_>OS zt$R?BtEon@$Z?Yt4-zgc>KQrlg^zd90NI5aAHx@$M;#!%vknmsYeQm3uvr$acYR;j z8;)CaGjX(3odK;F$5W(AuN|1<9}eso+W7zrCi2!OH=V`D;-MO9 znZam1Vvfb@e2L}(b3ACaM%B9U!ikL6I|dM2iqpK>j! zLWPcE1&-P^@=7oUv_27pmm87?(Ob z+V;rBz^7&{Y&gv+Was8qEh#Pa@R$ow%N#$;ffl)+*qK;D2a>)Sc7`YzFZ^ujfK*r! z)#L4CK=tZf9YHaXG5$NW$pL!3Gu8HzJAm0aqtveVI-Snat*tOITK<_V>HRHktILZ}4e1k&NkNGF3 z{Et{!@79ydHma?L9`f@!!&3$Kx*qJUeGRQS8$rK#-RX6O@T&FD)YN=k`c6KRs*ZO$ z8^J}5Uk^H!`~u__1P1-obSCVDw#!D-lQR*Mr|K97$8*v3cxv~k7JKob1WntX4;A!l7zh>UaN-KI@m<2M> zBY?%W`uQp24GINFXVx2JqCK{~%h;heVVAMAvHON7-y|77sYS-bs2jB;NLE)@OEPw8 zlzEDW>?=2C>Zb+rBE#tRI|}y=Tjm<4(L4I9q{`s)o%-XoZgN50%|*& z3!xTrmDn-_0OQ>K0Xt1?3CabjKx&Ae(Sw0*y_nNkz!wbcM4O%sP54n2?gO&VFAj#y zXgX|{Pn?dgyUv=_jp~ADiHqxDt$yxHvywVXfZ0=j>V6C#GMusyfF{}pwIr7=Ap0OD z(R`EcQ}9}|a_Z-OaQzg_R+A;w?-L=cd_?&ZKyB>`&2C>IS1asZ#ku0pRUBl=Raqhi z{_jHU`|Y>DDU3zb#rVqoGrBZ7AHa2&yaUvO1KSo-JBpJ#>0TF``mn{2!^6CWqpID3 ze$qQLOPkN(WWyJe7I55yr*}@k`Ov4D{|J%1ndyt(uEeJ=B7~OD-JhUI9$X7elmny2TC8 z=_ETdIp}QqpiZ9nU2Qgq`Zh&E&afnT#r9mi+u9{JoN_Rud7_B+YRBv9%$X`}`T)h|xa?x$oyh24&}(o0;0EV2ZhXWPp|xv`Sh8ZA8VS^w#yvN%!ZXsa+P zd+Baliv!8l;29rbwC3DC*#MjynG;&Tp*d$r*9kGX5(u*Txm_GiboQ!mRRM;hlh_g$ zgPz-_7KpZnE&}~id?B6Yn+^4vL6%gIBn_#&*+nGx8ZWE(v%5{=J7 z!H}XuCmp)Mi12Vag`Ixh`A8rnWJ_<%p!!Z%f16rRL-^ugbVy#FR(oTjke#uJqGnFT zFB)IuTnUH?JvN!-L_3nPGs9AO{r?;W!b9fr_C#` zPM|3-mRR=TFMHRZ8y$_fk1KI2mGNL5DKqmOo5BWv)LV^p;WPfJs^%)wcMN)MI3fL z2z9QVS#j>%nJRsGp!pfRFbnt>;`K}*OldD zReSaMboodkqUZ3!?qV2$zO92fiNM~TiN9u#lM?VlVsonAU!E*{>s99yVNol{|Eb=) zf{xB(*<-tNHEt!~n``zDj3w%Wr9@bFz)Lq5w7^K9mtj~hGHo))>gd@r?$#N7dAk?X zt6S?-4Fwg7L+8D&JmM%1$|v-207&p@uEHw8?GvzuB$Bxy=K}&eRZCmX^Oz@VQbL7K zv%365lVth;q=+XJsDE`j;RJ4|#^!kWi8l>2A6wK>Iw-tpojKT2Us z-@`yT_N6q74^6P7fq_97w(`rwLakViqMb-4#bnASu)NZNz0bohis-7ihpN>udDEPs z)kDnd_RxXHBO;FEgGMbqgq@9tX`MEoo28dXq?5XB9 zOL|2v4k82w*!*S<1ulX3>@lall1)H7l55z>Pw_QAv=pvd(Ci$ncR;>sx*#MraFznd zXx}4M!*yS1@+Kr+$drq_V|xxwc4K?%HT`PD{xi5vSmbEBbu~ucvwA;qUFfO7Rk~y_ zNi|e`yb!8?xfeqSO^rs#*gmE-hgENa`xEVzw=+I;kRNd zF+EJIftVP;$P(7A+J}>9&R^Bgz`PGvp4-fSn~{&ReupxjuIleQ3J?nnEG#UOx<8$E z=IV`C`{Ek5<%(VjU!B`a%gXK_LRVM$$4Y(2n&rDxgfEW`=O;~7i$K*YBrfaUVln{Q zqhVL;u~zO2H>UhId1D>E${NJ_pu?R7XUITrZv^0%=IsDj!LK@C)d;4p0O=MM_Pq;yDYvUp zviXA4iDl;JWG0D(ThY$B?;yCXFnJA0xc4>wN= zYGL8m-qQ&&AgFNxsoax#V45{+K-|o-1c21dZ-wV*mMcYj0w``T{%>MU$(!he9psww zenbT$^G=oeP^0CQmxqfg;6~1?uf8x@%g*QNm$06u#LHzzvpV)CR8383zW(~c{G0bJ zjAnM~5k>MJq3%nyBRWG*lGl31M7nAY`AP)9g<0}4c0oo)8PfX^@lu^m`5CT<=AF)d zZPP$S$lF=`0gS(bvEK*eZHm%dx&9jkPhFt?N(>%IDF{l zON+`65@q8_HyMQw>aBGK%99t=K=i8PzS|Ci7Znv3JRSSGa=tIC|NiZ@xfY&5jq@*B zCnn&y#T#%G(D=Ucrahy4lk+sguaE@VO*!}^fwZcfPfau^KJ&c zfz9)Um$NO=TUUkGnHS{_4@~^k-mc0E(EAqx5t#{I|oY9*r zts(dQ{R8F7lgYz1Yt^3X)FS9t*)Bt z7!rpy6WzxW1_Fj5^A>WOJoUn4w}Nk)&Q}+Rl^~V^xK++rNTn5oaV*E5RC$QGuGxI9 z#`hx;Qx7_+&H{{u^<2;y{$}{H=+PE``K@w{B41@qAR}5{`L_Mm$?0aH@pztQdQ@5Y z^a(&^`vyi{liKTj^oQnWe6gKwY2u0y3KK&hOGaJ1fgUK?lE6??r>QYdYniL`+ ztF@&A7321oY~{?;3Kp}a5n1-AV#Cf;9+rZir68M69Uh2TCavlz0bQ4ss6ANij%%%Z zZ>h;(G!T!UoR*~_8l1MLt-b0Hlhy=IN#xHahCFS8cv-C~acX%D#NZsE{y3yE0K>Ue zFmvKhs<&3>Wc5%Z&u+kRzYJWKnhqWvJ{*YH(X1Do~GAC2l^oGKZ4DyA3&{i zGqyLWS1`5@DM;nr1DQK$V0?3*)>*|(*39IQ=53iW|0QBHFUMuf*A7`ya%6bb+a0}k z0@8|#l!Q-@ai(e<-DaRaO4_CEtW&7NEepQoWx6naX?U~9X;PUaP#(cG_`%(`o;4IG z3Si})BK^DLSz)?Pxncd0lDc%M6N4A!%+6T_YZr>`0+lirs!K&O4FKbrpXj=g?U{9_ zLJy!(PYw&cwjs=F-^>M+LUWs6?_{XPfoivTDswXyP4e4jDy+1@Zn_T(X{*%Oy#N=n zoN{$;V!8$a-+4)XN{QZRL;#Yis$&iBS*VxC-7;^QRM-vpynXBp1IV-(vwQ|=u6vA_?b(H_~^ERY1&@=3L-H3^(LhjdILn(7*Ep4E}oCY zf}U0V=+))URnOJ=;FWW&5s)hyDKk^npZ{`SeEDmr@ZO=>gSj`AvmA4~!#)c~jloV{ z#j60eXB(t-HonlM=uP@(*|(A-+FO5MqdLJUKv5P{h4HJ87+mdff~@bUW0D6ACo&YM z;q0BOKQq4Qpj1}np9lAqx+XIw@s}MPGRQ0e3j4Bdx52Hua`x_Jrok+7r`h6{>}jr3 z-|p3h?R~0Z>seMa#Ll98aUx9D&cV=AbuJ@aw-?exV+)J^Fhani8JH0(y!HLm~ zPc8V6+x*9e_t!k&wIwCIU|4rv@Wx#J$dnAXvI7FL-mb6*0M+tBV{GZoP+7(zbSzh0 zHi5^=J&<{m_85pguW8ft=9zGXPQc!U`l}TT^z<;V8%un`SBqo=wqBTKj$L!@qr>*J zYMIYTv4VlYHh_^dVRichv9)+OBbYn(lMSso4G`2>&$Dg9!3kjZKvN~{CwKgXm;_P; zT}p0~KkYIzC%EjO03E2JP7njkNQUXRqlLSmB8WBV&aolzY{lVbwV>}p)FC*(KINYo z)zl3fKmX>#9B!-vcvq{JrNykCf)bY@X22GqgPgHIJl!r18=_NST;t;Uv|) z4|M=)%%I1vc&DZBwrjGW#YR&y^OL;uNd%6Tl7$@iP3NmuEvM90GaVLE=L`Dqm3pWK zV!7wR+l3*N+GV!($NNzUwUukz2u8jo=wltVo(Ck2Q%ouSPJ?EvTQ2Q@{dvIXvFGnr zhY(4$`{ZW$&)oDw_CMICMmx0(5p;C)0Yy(w&tZU~3EJD+3p(0{s7EFwXdAU8OK$Qm zKNDGT)Qd7`WNfm!luLRh!iWVEFt&m?^0Pg2seEkhA&sCCRC6ik;^LaMD4RG1LKw>Z zMY%QJw~dTgdRv}>cvR%Yx^QiReE_g71|Ql5HJrR_n>1(*@DC|Z z0%%K?4Pf8foW_+uv)8Ppr;A*yD@wZRKtNDNT1)WCfZM?u;gbPBzu7~lG(lFm_dGu* zk91@?d81~w&G_w!G(JxjVh5jfM1D8$nIb$J+y60d-^uSk8+c9gw8MD%%uWy18@5}H zsGLH#Uwo%?ZgY&MtQAbn%o?B2O^CFabn}{n-eyYm$YewcvFZRAlF{!0?QvJwl zG~B=q0Jsp-_9cx`fRJLwA!R$j#a0tiC<|Huuym*(z`8fsk9Pos0PNnu=7b`^F(e%V znW5-7E|b#^3>L}F9_L)im>5Y7Ned#KsIs!B^VOZ^i9iCTL1t@3mC$`ST~YxZzEHh( zvQf_${b=jZ0OcaRks@7QyV5!e1DDP#(-VLfBiFvA)On|PmX?s&XR~_EOQfLc=Pfd> zcb|mHWA_2&==H-Ua-V?B(&n{Ps#OEnt+T#$W~FGCahYuu=K<`V?n5g{_@1*+jEN+? z=d^YlTx$Av2=pGvybtsdLqTFXKY=2{>*}cV8%V zA8m4#cKdbOSFG)q%Jx-?W&ptdb)hvCU$vn{wldQw-sf1wWPWiT%P!kf$Hag|L;16P zye=QfUSsXt|DE*rCVGCI3~QeT>XUZ9nL+}Z92E$3=mByvplwhHp(DH1 zC-1L6?3wga6U4mEZ+MvU^Z6>Q$xaW*Tj3`**}*x0#EQGb7lCJtYHnMT#dZLAISnvv z5(IKw=CKHu7Dna%wmOiiz}vCNN6Hf7?M}>v@KTu9kE~a3nPVX0eD=!UbcE00;`%98 zu^TMmVPOUdFu{0zV{D%k_r31x1P8*^K`aFUWO;yqb>tc%C5um<#BXet085swS^7>| zVU+g>$QTgeM!IEc4ElyBGvQugW&)aY+Mx4F(|6&NpX!-SYG1ODfb&N}D=NgeHb~mZ z$*GMWVvy4kEKD)ituho1BpACPD{L_)1a)wyI0@oz-~XgCK3`s3kZ=>q zLAShDr&}A%d0wEM3-ga&j8a{vb*A%gI!umwiX}X#@l^I`gnX6u-4DIS-fvo-4HMmP24WE8@=+!C3SrYoJ) zff^AS5wjX1wn_>hdlpb;wu1i%;^=gFnuY(ZD7A^_>wwpI-N(dOYyEZb>H>6EZ-eMGBJcNq3^aYUb^$##5BKe+HS1KyWNV!z8(TZLc_dBXK0bl7T)UF{9APLR05)ebNgi(J5r@_ymV08cQ>4?d^Ew5_c<-O4iJ!I$ zB>v-(2Nk~Sq6E@+e-R)l4}_95=D`B159u3E$8 z8^iv1?sTz%#>>OP*OjngMA_5#6cyV*705MOw%`<&+_#f_2PMiPSuZl zxPbKhaQUq=aFA-ZJ8Y)fu7k|PIj~pLB#8ltdG`*dUI6kp?*y^B&bv>KI;{ew9l55C zMf=UI+Ji6#MF>b=~fZ`3>GVXK`x)2mppftx%8Z z)P1uix3EEL&I6&V>yY_JP0CCKTN}~dq4k%C#Lb1ZFn*>PtHJGPZR@ZXpUJy}h*=Wp z=(WZgfjWVad~M#&hj}%1vue=mDw-LfXpe!WD9Ueyv<1VPEN^BOG-b$K`cL93^lBP8Wv@?|Qaso3);*`Y?aWhUs%tq1{K0c6^jcE_ z$&ViKNm@5pXK7El4ZkKrOVKW&JwAxD_fQ=V7Jx}IO3V)VW&0`vahWYqX7_kL#5W*d~KzCN9NzL@-7QMMcm*s_+%CeiKWJ zKgBX-P0VHt3un!|Q4O99)MS}D^I49F9&u0TEctZ-kNvR%Ns}1!k=vd8)onn3 zK|5%D^=Y=cjV}A|n3U?RXZd$*$apMcf&660gnBYinBR!lSYekT%nh$ryES2dprG~2 zeYq=aj?Lr*sJXFaPQVmLoZiOB?e8Q)8cT|fX$NXD7=eiWLlp+@GoZ{Qitu9B!B5EK zlmRH+Sj6A8q_*z^5~LYG+J;cybm;Tbrh>6=qkZ}jw-v}q8kRx?W)_ZqWGj12VZEqGYVCUrdw2fD{PD>hWI8p#d}f3Xfa z;aEKw<(J)5>rWEFJRhx*-;2T~w?JRz`OfJuU#bWm%&!9C?$zYWc3#-=cX^7QH_HA3 z$z~S;lvBoYrt?JxrE7eX>wU?Y(~>61x=ga<3GgC?n*L5@lV`n0DR|r3(%SZOapjyw zvjOsZ{dEitLpBxI-T1+G!Yj|<1Ej2aZ2PM;`2%)iE2L8SpB#3+++p|HT@lnYYv@i_ z4wzD}t$5s5p9exgXzq=ke%{+oCoVBT44b68@e&D(eDme;CN;s+S~s zUKWYk*gB}YP=tW2g86utW-cC4W}i*tQ0o0FWRF3I@v-~}GvTgm!WpW6Jo}(DWn z{8MXyqN2?jfDb$4eP}G}+i*>cR(c*U2CfXg%xN}X8?POpa!4crqP69NQN@^VvM~YF zuTb;IL&k44o_c-@_%_C<5v88%(s=wc7$!Ui5T`>!Hb5yxk`++=0R5y}W*VmBy8cZe z${4)#0EfooAnZ07XWN9X-lB=n2Q?az^^6Gj&e4Fm-0KJ|ee84jP@vlDiTCh2mwifs z9Sm~HD}cI|C{)e_YVx{z*&8Vd(JCNW#_0gZhVY-G$#07#nyKgHW<|~eJlf-dq}mnVUFG(S?j$Rq5@)$b7W0LQ*zPt*Mq1~^Mglr49%5ZH zql?E@w|nILh=cgWmP0}H-2Y+k&Euio+wk!gq(zi)B1@@MLM8jwCMlFXyRz@=7`ua_ z6tb7K$Zn8*8z(kn$KaIxm#ZH>6Ggfk6CxPX4^pr*`u5oYo@%q5Rz1PHm2{Dgjwr} zhY}l`&l$z!57N;K;!Kk;p=Me09`6%5wOI|k+R1(6j#Islb2BV_;?@&zHqPy=+L-q< z-V($?G>2Xzy6>G{1T(jQHu9-bmP(Ta?wZZjc}>O9O)bTlemO>AO#L#VacM4QS#90= z>t(xH->kbBExXaj!P-m4bSzemS{OX8Pf`69kA!2gSTR9=`DNWKFS3n1DcGsjtg4ji zZ)-Qvlq94%tMd<}8AD^az2&>z9_!=A>O66L`&V1jQwlViJQvk}$$c9E&EMc1h|4ie z%j=~ZQ|auxh8^M_Cf!FVpmbQ40@XQL z9W8KSHuLg+XY^_>n&_a0Pb3_W3xgl4)0?ehJ+IvYJu)|!dftoP{J?Uaj5mgE787>X z%|he10E{V%Dr#6CFos+OPWHIo#E{e%u0{3s7psQVnGbl? zqY1wwQu|TEtJ3wiq<=a$#`a?y*oR7 zBaM>FoqWm^;Vi1@@LV1*YsbFD_h4mP(TRPl+~>F5QLaKRv3;GNltd+}l*9`=%B&*l zy(9;PWKk6EII61YbAztakM0@}R%7IJ@EtOv%iLdB+1B{uJ%3mOx$1X|JNxcc1vxg4LA{7bni*n^}R8lz{jVu>_5u%Mhj5_ib1j$o;Xc1# z1Wx!{ORa6fTR3<&X-^pn-K^0*&iTQJ?ju&U7|SRChE^)rO=&sTEL#KwF+2*!pH>NT z6*{I}8Iu}X!Q(P>LGIF(B9FzaTpD%Q^L6+a0`j-DmRFr38irRg2!Z2w_EnB2o@}{g zu&ISZ82xP3j7<|rIYrMpTzXSu-fCdvF5V0^S2pn)YCa_MX>#B+s8xtE;m%)vPF(oR z1LPcw=4@R$6D^5Fl+7$ROj4b?bWPmJx)Av?c>69iME%q>!9XHfsP+1Tz@-N_!iM31%JjO43@i;LmiLxuoz*}`Yx z4IkLA8e`XGF;pS2tL$0>xuB@L2W@~m_)0e|v~RLs-00=hltiwq(JWtvB8MV3lk-}O zQt#D-n;s{f2usn1rv6IBU#6bTq_wW&#+>q$7tA7Ekf>r`heef#cX`7;Wbx8fRh_oF zr&A%*HmG#a=*N|Zp&Ja86n(!SSgS&*ycKB-)uZA_uT} z;Rrc8dUH&m;$2hG7!r$!*qpKQ*iT&aoYh_U`l*@WRJ<~9Qh{{NatLjRdd|!rc5)=W zkK-C_PY%QEEC|5lsfVdacX-fBq(*LO0N++2mzs4Pb)mw3vi&8YY$`21dXO(U#vu}j z?;n^o!lC?i6vI8`)+mt4+dyW}ptrb*Fq7-YotxxU=piv&>VKMel|0YUkqs7^q{~Du zI(>e|Rp&P>f2gcQDEZEQ#dp1OPELDH3LxXA`_WP=8Jaki%LO=q4NCTU7iKp-DR#ey z@9=5LaVMVcF~3${+UV|gkZ!%2JY99%yk~a*Mr?m^%(5u@nN`$RM!-1cR5HTC(z{Bc zVh3i9kg{)~R{LW|?;$;zQxnWm=dNOIr60%WAmkgbb7{O&H}qIRiHrq5YYuVl`NGOl z?A_=kV9kQMRETsn3AdAR+)iGJG8i5^wsd|v{DYU*6Rf7wr0!DkWuk{lLEhDQ;7!+4 z%=d2vE>Q!95W~ z#?!51fzpBL62d)%k;{jNSh>_MAf}X@7Ctc;0Cpwca*K1*R1g7Un&4O#w66E;mADGn zKk=imuUH?8YIQSB93;_mY@7Hnw}Q`#AF_yql8%;fC6mVlj)~j);^=_ZS~%YUcQ|W; zot18!qW(M;n+iad8eJY48L9m?Mx_%I%(uj<19M8Jmc)kWDMe;MFIFR<4?sja@Zog^ zUtjgsuMa(2UjEzMbPN9>!1rN&IWuJ;!YaJq2;|06q3ODEVxv7LvaThDPEcp|Y& zk7=#qROY*Qo|8J*zM1p5i2chw&M*y&)+GY`JxzEh-JL%=Ix4Bef{FRGL@3D6X<7BU zJOWz~stwaUxfT&tptq?D(9P^zN3b}1NpAgWY1f`BKc&21dwmZvgv&C`S2_s6tT zC`x#K>8`O*9l4f$!g0IY=yn6Wl=o1jT|-ZamiVQxWQ&&~LWz<6_A{RjbzWV4W!kV* zu63GIb=$a_o&q^+w#R#DKo@oJSZhT@o?z-z(p9KrbM!rE(YWVix@%M?D(X~(wrgxw z)=7W33IkIeJv|8hZpqybSx&EwdhR4Pt_bjKZ5Vmv-V1jtY{%kJ0^0OT-W*8Xi~Bcr z_68fI>p$%&M8z|kh2&6rWf32smr8nym#nRYyB2OUlJT556Dn|%naGyiaF%)Gxg|%9 z@FOw{tBVD7QsSQ*K*3v(#s(f*{YeE zCR^=a=sEII2*7NQ$pdo#x8`4wR`) zb)h)b$;(VSk`4`@GxMbwVc=39QR3zU)kE({7|vmA`QS7Z_^?MEvOS zHE8SvJ_(vetmPr%V2zCiJvjAR5vcjmm>V#0zEf6FpHm6U$YEFcDwlF~BJsWF?qDEl zu8(DfjscxYd1Zw!p`{*3aAj372B^@jZoNRsWi#J$2Ur~WYMwloio?wSbm36d_J%1; z(a5PRcBP^Lhsi>^Cr?DsE%PO|Jm1>t7)*UvPNvx2kRelHcE{P``A^n2<&KbA9}H7O z%7b|rTCAv-3n=FsxjHT1&Cs^LDq+&irlBen;)ASlzbt!@ZX;dNNdN7ff>d+tYxe^4 z-ZDuNXkQJdtRXzguUm%|^~7yG`)Y8Q8m@L=IEvhRsI5SlkYU;WMl0PoU2hd&P7fl= zM!|UCR#&Rc@Wv&5-NBaxQl@nLInOXef=WdX{EG%McDuTBK^6MZe_GDUN;b=z#0KKY!o#8Y7ohJl70c2$se zalrJ$K(oI13!B(T)OkfQ;ymw*hXtiQ<}H7hzX+m}L+`X+Y2&{-KXKO& zuoZE)65}%jM>%yj&Fe!o>HF3f%J>5w@7Sdb%cSARIdOIS5r2V!7fe=V7sno((mA38 zXJfe0%L?m5yO%Jh|BE6FThf)V170{fj)%XLT?Uyfexn$tNPE-4-r`ge=uBXhTT$L= z)lu>|x+~vXze}8PY*pp{L5fO`HIoEaVT*9Wiv!1WcQrNLrZ^{O#ja{%znvV9L#grZgRiL72Z58wLpG2izk9MPF(FmB<2}}VxSH$I zWC&0HD)7mZBMDmx#oG?;Jk<6g`3V!e`kdCdl0(}@1Lgwb$%^-1h^WwLN?dVQE|j>0 zFdn?!ES?)>;RE|cwwdd^ITA%U%hE|(q#8rsxiyWsyTM-Gd$8_q1u##f!CjQc(%)yO5h$uxfqUKhgA)D;wlwA& zv&W()mxi7UbGs3v$$x*bDXzj5?_Fp4q9}E*8a4j|T(H~%MP<$1*wY%e2UVXP^ckoM zIGQ2uHfq3dYes$Y!FBeH0av8gv09O#Wiy&}A2aUYsnf2SktQTW;@K-ZtZZrIj6CXCWf-Yx`@o~_R@PAq%D*X8MO%~lcAoy<9`kS8=U1f3VSkmmcr4P)HA*&CRE>g) zaRj%-fBB4iB{@SaWJxsklI_T?VxR{8e?8&bdn#9KY0ttF9;~J%(;-wjuTL*!^@|A1 ziz>b=o&4g5O{kFz5-Y_E;^c%=-yN9tC=h8brQxK^WSvE?j@=hXLGA3tWtO#;I=yRv zLnV}CFN&E0yZiE<1D!W1Fs_dZ`DKk8>G#o&90^ApCmi}!kFU3y?0llIDQQb5b@9rJ8d4N$nuqK}M?i}dB2uJBG`2nPnm^A2S^ za`!JDpBx+Tm91eUW94;EgSrhjpV`DfpII?=B3Z$;``v1kE>R;v!ZuEdDutR)8IA5B zqg`zgI=ngu(*%Wd71?zYw}2x@M(%V}Q^`sas*_H*`ygq!Cv>^&$F;$xh} ze_WmRNY~_wJZlL~Znj^}IdVJ`{J}KW+)-B;zd(b;RH=x8WdGJuCcktHpio!o*66z zCs_IEKzoqNOet2evURej0vVTUa+AxBm~_d-T0L*Xh4ujwoI)c{3|a|F@9ERzPYQpm>U&)BkLbwnFZ#3nn`3jh#ihzepU{f;hmdy7{OdrHNwxU!DMGV*keTIxmyY>4L4gZl(ddp6=*H@k zd9j)N7BauAP0wG*1}gDu%q53S4$HfJ{7DJ+snSH=W z1@v^g9)0j+UBOF^*Qh0eZZJzobRhZ}?R;jKdy&PKnDIH+^f=Tb3ss;A^Q~2U>Q@xO z+G8*1tk}#@4+NGcnL0Ar4Gh$2NDr*~iZLjP6BJ-{H*#R_{sG*cTJ}NWt$mHYI~?{A zzFwPmOae8+uS{3xqX*@Ea`@B(2{T}>|ft3`vv5z zX7UD}#5O5gheZv}yJ9P$S+uIJw?0ViaS**~%{+l}M$KfuFIx4L z-S8Y?vhrUdABJtv69`#yQ&dh20Gn=&LX&Ub;N%lHBHyjgX68BD58v!j5KAnXzQBDo zidQBP$L)C=Gh6-Xyenz&RBVd!2;IwL^wO@rHsyFJetPNg!p!?vv6A{a=04#jU4$(k z<^p;vh1g}$*JNjNGS3pJI|gupWYWX6V~Rrka(psuXbW82(THn3*%zgJ4oq>SjukVl zuL_EMd}U-{a6322zj1vw$UmxJeRKMKHY}g2EsYYwq^?aH>Jl7Ki3F3Fw@Dn7tg8mQ ziHp~~oe|MmRy_rZt8;x;)WeOwkvvQ!_5mi@UsWmqp~K#FwrZO5tk5UsD0Ot?=SwYC zNqbiXBvRka?LR!r_P2{HSov)Yy5P4p=-;GlJlpqy04OY>$hqT72S4RM0}YLO+Pk91 zVV~=%nW*>Qg2A`?Whe!fW$dl;K8(ok-)V zg$$)*XBumCrtwv4;`_Vaqh?)OBbJO7SMe8L&%Lr$$i3P80$NFejKN~zVAO=l$FA4w z`Ea$$I-=L_=(l|xeN)9Z;kJahjzy4!9)YLUP`O(w{az{O?)@A{OCGVi5~o@n9M z4Mxr9r>-*BghngKQp6z(_9kKQ{w#zxqa5s`UOia}vjG`*^@=*GYrf=_`}gmcHFhr( zIbBo{YxHu5b8^8Yd|zUYbsMs>6A0Ulz3~f6{aBaiEqFhXtX*|^EfuR4m_8*JE2Fe> z7Y?e@yHN`W@h~Dpwy*dnL82M${Oc01wY>6 z-`>b0I#>ZXX61I(bMHg|Bh%6o#+>Hb6q9pWm~iJFM56LDLlV3HvjvEjaw$q|*HcZE z?3Uf3nSK_J>*!FrnRzyAYj96<8oa1d>^a2;uCtqoNgH_aK7_sHYEPO0uh4kLtO7PV z8K!PtxV82D5p{+8!+T^$%@seDRt436wKRJESa-z)nIcrkly&r6r|cC2)K^w%oMG;kmoK z?na}PevBpSOPF#!R~8UTt=l%5lN-uhrZO)bIXT^hSFEGH*MqNTIPkC8eaj4kib_lILC)?BgW31 zA6##?Yo2qAeuZC`yW_OBz;G!c7tRQxgLRzig^^A)TZOO$J*8Zf-~hjK2~#HWCJh5f zzzn3y58rGzX*ie@w$vi>)#*NsW}(a1R}rJhaVpQRRH*s->tkXHR`@5&wX?0?$wY3l zf|!7BhJmmi7NJo)+iX*f9jE;CX-fAOT z=N?KfcTcUVt0^k$99NebC%^fzF1dCm#f`8Ev1DXx3p%NA=#|H}1F{dD&!IgF{skpSW|)Y02G|(3@#triOM}rv7F5m}!Ok zhTGc(J{2o&T)}^cvQi2di9l z1hu3DB$j_g+cEEJ__4xG$=^l4Rlaju1So?E?%KAyxxunB)KQ_Pq*VLPjklO+AHExQ}khpxOo zbiZVo!D%SSh?jm3Yj$7;U|vZquJXnDsiGSrcNTGr+r`g7hv7JSb?UIDzwnRI4{`JT zu0emxLe{J%Ihpxl<=rt%mA3 zvQMqcs`CoBAlAEyBfU8TI5>Y42$a1X#w%Tq7W{TW_V*7)t z?%!NT5VuGVxAuR1xI1^E&k_eGc}LTa4&E{tRZ!b#xPr~Th{I&ALswAugTMfv_&|NayG=(FD!^6CGNFXxXV`rRWxLW-7Vwg2P6 z{&p$;`j0c}@Ho3(bfQmPweEQHH(`kHpF}%J;*vQ2gypCA=w9Bh+pdyxJ;NdH6r`}ZLIHD>)8SpSVk|63vT??L+aApN!I_WXY# zwO!%{0|xn@RxN*)R&Vd<<{Qesg^l)C9&~hcvu0d~ec@d$QC2IuYG3+qrR{5e`(Kod zJ2`2}WQ<59U72&sUY8$*MgDT)Zi7B$*%80IpMu3#Zj;IkY;H;6^Zj0hmYpB?6MtGD z{rMAbdA>*Hx?8(kr4c3P@aF!>?FQ9dZc;Tx__vz+vw!_Fd;ACV`?oA7-Gkp@m4D0f zpI)TjBE|PW$iD~azkqcA9;E*bQTz8G{d@^c;A+SoJ-45HY){ieFc_F={AYN6N++;F$jgqg|5_8H!py8$HU4uu-M7*t zY@g#FK8h}iO4XZ`{c-~A-ce*fL(8wIlm70+cmE8%HZ`Eplp0~-H}AgPG1rBPGXCY~ z)~TL(lz@qE=>0wuneK|<_(Kce|7Uphx#Zi~kd)f3(?gr!l{L-#e(Pu7QbrmBIQ_E@ z_zDU(ASl5Mm(%5;(tMa3MYjZ;f_nnwV+%**5fIb;rvZovzH$U>zpgN2ka56Hd zhBYZWAG*XteZ}X#JFS;GR_5IA#MC!dXQFZiDmE9NT?WzKac6|vV^D0ZI~`?y0@m-I zCKfHX-!T9wtarHVzYI`JldsIHhmmGW^Sw^9Lg#*R^8XkxrNr#YKTR1sXFN0$=@8~1 zTYAJ^g`*KI+UDRu%hgtoa7-L+h-d&hvOb8d&Q^ejiW=z+fbBwT9_*>A7g~}?Bw1D0 zjtTF|R|I~5NlSXbG8R+{qx~Jgqk<&u9F2STmyN5>;+{kK=e}K!{JAo_8{a>=KY^j& zn!$T6cpbfKqg)J^k$c0V>|Uk`EDD8!f@fIzx>EifnzFp) z;SyY?-C&%wx>kvO>SfD*!Md~z@9zUq{QnVS8^mLV1 zzm}#=zW$X8<%Iu7lD?b|H7hqz-#+`1JC*fX6k_{l_C6Z-RQalm7+HmleW`*HP>m(+ z5wk~NM#z!cBC<3-SqJVPQG-=h9EA zBMq{YA|&io3d9KiA@roUk!~bA0A2rtcwj0?qq!Rc)8IRAwSay%X*|>$L@Os)N%cH4 zze~LxR1H(XyCr_Bvn$N7C6-Hy#p&&HW>CEKbp%zH+EWplCt%NrQ`+Tnu;x(wGwxsz zNIY-rO|th`#8hnY**eK;11Ohg6bNTV3}la$+mhu+BA0zH!-io~zPf2RX0#>xb2OQ- z*PrjZ_lt8%OYNrayGru5015e=?JWYhV`zO)NzGpxa2%!* z0_!G&=4hc)VmJBRfLEU=8^wRhBBJ^$d4TZLnPT|g*;P+avPg#*?_~Ef;w6$;FDdc~ z)8vf{d(Uf?C>8WsimZcbuiKJ$Sd?XnG!V*~oZ+5oPjcb1abl91Q|GI;*ftP|G?%5K6H(BOntW3IytKJAu5JL&l(5dtN=SmbYdbC?kXHgN#?_%`bWl^_;e` zTb{Fn)xcS5etK@rGo%y#iC&`?j8V8`iU_w9RbT$AUCy#3ppfx{k;hjm8fGEh*8T4%@z%N<(lcc z)dio*A+Gi{kk%3JZHw4Xuz{eGA2ULosT}_G=-UvvbH=RZN-uv6C@=3+(fDKuH#(eS z{Db0MF=fz98jHqt`q14BWZ`lntTs8FA*-}I+}TSvwP7TO5_@;-mp>ou``e^Xk9hyb zrvOBz-5#6lWiR5V?j+mr-sGlZKBKa&z1^*>n1$I&fQD!9=h&(FbpIcPwkrS zTO~Ka(rDK>q63f9Ni+!MO1q4G0In-&7KAql5@g`EnvG%Qp%!G+yHU1@_^YLsrF3OENw5Y z+qr(TNYL<`y(KLh6>fNg0HQuBTg%B3pB%EIK@*4!S25NURkQj=b3bWCUI+xf?f4}O z8(&7_ZlJQkxnS%VLd>-96U0lw-Xbhe?H(HyN;9YoaApY zfI(wqBOMG`n5re6JJ}DdAC`0TH>}n`F*5x&*YL zDd(81O2+$b6>7vSEkEhO1W3_5l8Qece~_YU_F*zOU7~SJXBiwa&ObqZ?QbyfTFMem z0?G6`&zxn5g>fZbO4 z`hzeqYEqdk9%pfa3PT^P=@wy&)PS-feHMjUhH7tkma79xa4EM) zhDOGNB)_hVL!J3^uzMK-7giDUvn4>(Te4Zp^p*oSV=aQaV96-Soq{C0KkWdk?STiL z;fg!1xVD>2eNMF{X|YSOx{YiRAevr^asb_WbZXkT?fYMMK5dpY9NwjKlFF-(YIzru zwpc9_zXE}u6J&nz7Fk!P;o|iZQYKbSo;v>_C+oO;j(Xv`u}LUx9Rtd$Iv_~@+|2{?T5 zbE8m)*5Hxx9&uAPpxsmI!I9|NpBGySo{ufqanj)$q@`W9QjB}_I&Oc7)y(xb5{zv_ zd0W_O*^Q&<(Qpl)%$%G-Jx&5I$C1ac{*#UiFb> z{!p6l-XaPGQ$I&9-G*38pSG4O5w>iK2E)8-GBMAWd+%w*ESEr1pH>L2ngcn2GN{r| znLNAspYhHQe@fGNT}@HBbd>5p=<{pDXM56uGt}tA++9r3 zz2iVHzk!-<##|}%DN0B>tC&>uU;%6;B~ihAI^I3K)NUCT zzEFG2H*-(o!kEcgR?53tBwbk36MR-%MCL?wx`p=LsH$a`3b5bZDv$fvgozs1K!z=w z?C2F>CX!ss0?fL!=o!c)AcBv;RR}P@F2TF(OTBS4USNDhELguiY6a4z_4OXxg-=Z7)P1XA98NbDH^uZaS(U$F%1* zaRFKCFylId%ZDDoFl=PcOu3KQy~k{hRKjZZKN9x-Xe!R+UWuXfVi0C5>ebnj?zj}^ zH9dw{)@JrBNs?vEGw(=}ZKjE5i3o5eK%5Mlp|N{>6e?k*V%YXK05`_?oe!Cr1u<`H z7*<~@p|7Ux?B35^#HLCc)IidC1CK-L_+qb8+FJ#eq% z(EAoR0|V6I*7dFYsn-{$yKlhxh4>Z<(|1}O(<=zIgK+WfD_#P>4isJ$3z@5 z?B@o=%;iT!nbip3j-)p*C*`*71`FIlGre99y@nuY+gUZ^QWl&;E)Pn3WW+z%aOEJ1 zkQ6RQma1M1iQ9)-Q!$L#>7o0t&fAvPs8k!twMKtWQO(yP6@Ssc2li4WCgM(q_Nz2y zTyWwPKo}sAQOGa9T*8Yz5|_Wj?Y0I&M)*bL3PvZcCrrDe!%PO@bb?YlqD8#szs$oa z*T_xHX*9k4KnM@dDtim1)sfj=IJ0#8j%`ci1^lH~f_T4vPM?f-H>;|1`irG=9M^MR zBrfFIhfdfGSAWV1{g6n~6#W{6LM-c2`A3-5(~-?#OAC#4?>9h-Ucl%u7(9y5SD+T<*0DZ!c5^HH)KLetittn;oGYSj8=s4qw&_8*9@opYoX+nFjGpK zI%r_ZcHd)%eC@2e zZ(_%Lk)Idg9XN|J)^i&|B8cigR1#9~CNNnXY6JriW0n?ps&!XN6w~tRq++r6(5P#m zGTun;@ZGNOT%BGQ>?M(s8-6QY^dp>QPtQs#Zsc)67*n-wpG#&1-R6nF1#esu3Je1@ zNq!Jn8l51T!;Iq-WDe4dBa2v$e4JI4pK*<*EIj7k!99PR_y1s{-~M#w8Y$3mo;!D# z*#U##4&`pHxRxYUfn$&3b`@SoLPcGUP~@LJzo%e|4acwDiJe`F8*gAAlnseWzH8(4 zWjmq>HC6s9xJd6JCyUL;LyI8!TAi`b7jKqXe6v}cFddDf%e|Hf+iW*nptX_@`iF58)DyrJu0U{soL zi;k&#Mfk|2oP5wez?e{b`K}bdaFX_3W^^1djI!9*BdOhYgqEzuOzI*h?j2;rvxfeI z^kKNUWs*{Ud`r6Ro=Ur2CpC{xi>buK3GFSdGyOi~2$MpN=kKVUN2)~ppfe6dbw z;%}Z+Y)o;S8Hzd+UzI%ceK zh%EnJiz#pe6+*s89_GWxYUV?mc){(v*C zj)KYj#mZaskVElQZ@BieHjZ82ioLd*OwuI1LJRs5%|Qt!%c3_{YTH{x0}$vecjC-l zgwIlfxoZd}$Ikv&2uw)kb7@(l!mRQHJ@Nl?2wvRuRA(l$!sMkdTM%3;XCpk?FSivi z320rfo^i;_2p;~rxo5w}3o#wmu#_fnu&e{ItHY=$pRmh2dPocTT5U{v&Iu{~im4!b zvSW-rQ1pawu$)j57+=;ewfI(80aACT2a529cN#r#2@Sxm_f?%<|e`I#|_K;I)KnR~%Lw%!PH~TF@wH+Me z&};JP`JhgiZRp%y+S9e*Q+dL<>8zm1(B^aAX6v{kok_A$LtkaIDtMheMk=6WEWx~7 z-h@T@7~4)|)j9xMd2zV?7A~M4O~OwIV;&G%;xXo+CAmjnP1LCQ*x+I~mxf=!Nee}A ziM&}xK;J*zjy?WC~Ea;z?ez7rgiO_Ng0_UFYG)uL5 zJv`s0i=+pQg!8iTI$U>_jotHL4q0^f$~1U7bdSyAo#u+$+C~xgD2UvyJq8hVi4odU zecZFDp5=7c&#hBb#<{Jk34!yr8`mV;DF95Otzruo!G7C-C_6#9=Dmm7LAAq7uh&O* z!<&+M`Np0jt!B5uJvV-i_o;{JP1hzhV64`r-((=5+6J63Y2@9KQwtaTCe325S*k~cn64mQ z)B5FafwyyHDr=xSC1Bn-b$2eKy;W~ss*B@>x|Jp9?VZ7VDM?_W`sKoV<}Vsn(P(IF zp59#UI`kN!l$1T$Z`TqAnc%c}kjUMtc#-LB?~L+|sd#}Ik~2cFrL|`Oe_|g_iTO>V zbja!A1kXK$Ln@e{T8qjuKn3ECaOuHpAfH>xOzp4UGsg@B|3{yxHW>_+iL*8e<4&yo|QwKMJC zW+b`sqDzguqa^(5VwDpep1G~?VSpcn`ilB1isyE+8`upfR>Ry@dpPVn3kTi%(5LnH zBixv6)UFqi%zT>;a#^8QR7hx;;VTM(JMPwv5O444+18Ctx;(9j1RNRzKS;r9XWBhvMPSFHK*=iqeyi z)v?W}kKsory9-)e*wCNu*;sQ7Cm12G`W-#% zo+zNR3Nk&>V(_{5jJ9}e*)^~DX}xKEpAk@vVrK&@=VrA03S@98^W_sb#rA@g>Oda| zgqG$}C8OX*HbW8V#=Dyiy^icsGa?_`QXztn5jKi0xU zXMQEAqZ)1`33#ql2O;+j99iVf#X!ZKMpAmqP0RYs^kM*?7jWJl&_F`h-ttOD`ntg35RkLy!iZ>2+8 zwUm)GWubHLrq&wwO6r~dE~)%w7izpv!D6{)wQ|m%R*^lHefq!v~mf`!=)$>+n{@@^zSJJvk`%M)Bg+`D?K+JKtR2gM2%a&AJ5WAx7`a zayLRFYI$QCAO>7-cbVqs{Ff>$j%Q%yL$g1{90Hpy?IBnWgP=!T^+G&@Y(@NQ$r8jm zDqR<*K%BQW#&Ie&`BFNLU<3tIJIPaB8&>DlE>`ZhxD$AM`l)fEFzeou%5#6Ikp4v3 zf4$;==nRrZGn$FOy3_9RI7(KI@mm{q1R_?`EkU&rnrr^&xgz2 z^5CfX9m43NCl(>C?MKPa6)+neNot+h_i9qKSWAVtu_@Fg=%sF>zD8Bk!RV-gr@b~TU@+CCVPk1K zZeHcopNFI!#O0OMWGG5E-)V0-W)||@8lq>(`JqQ;>1Er|tZJ&06);#Ug+Df`Od>Cg zOO+`Dw5<%nC7J3))(;@eHQ8);N{SfS<1*$zr1uWiFDCj>ZfxfBsICZiobL#CEm5)2txDzP@zN6Qg0hn91@-XZcoo4s>JRg1I`dXEbAV`5cq zJBjBo8c}?AZ@xco_q@rc#RCR9+RTca1EuCxCd63Lcs;!5bomRsc2IdUU)U+i+2@u_ zPC?x+l|{l{L(EaA5J9p`&xM|52`koNZOmIX$9{j#xr=f!!h9H)q>~LxdwHk)ePMRS z(K`n<#mlA5$)+pB>wZxmEKfMcEU$Qx?>`ZBKSy6WjKX>8RkT#e*mUVYzhBf>OM3O~ z5T8qcJ+4L*+b-B;N_*!Q%w;PCmMSSi8IAKVDWaax{NjZ8e9yM~t-z(K=&G?v+2NwP zqgO@bH9mpXQg!i`FPEh-VDg7oPEFq&f!iHztbvw3CNS#E6ZOt#XMEDsKCg%+u`#g% z1@|$>zR~Xo)(B-b#|JIkYdz#wVB|6PJlpK*o@3TmR{flspKiFLu9~1{^+L4o>?dSm zN|9C?rxdFn`y=joLA&0srRovQy2@^aMRt#fu5;mB=0MAhw!qXdcIoX!BojJ7r1|K_ zD}#Fclei8!5w3JuEdleKxhfUs>*EFPLIK2AMzM8MddYGzmjVi`#Kdt?VU;*N8)65J zo(E*WI&%yf`t7#sIO6VikcT}ct3(V|;?+LqA0IdIRjVQNnq{qTFw&7>uI?_YN#0F9 z4VJcQGm|U74(-^JD9T#itngj`{TC*YX%Q*89jd~slgj)ZoulmB?aSE7A(_ z|23V^DsO03A8PiDGW(luc?tY!~ddQ^~qibI1T&zpouH-8j=?XM#sG3w@{BCZfEML zJX`0YCTk~l=xU3`d{TPMGu@+nm+lA8+_`wdQD{(4d+=v(7)Ik;MqAJxHE)!|9G7Ek z@Sg6o?5lK_ho-4y`cAWYY}%->F1vgM&1Z}BA_Jvv!Rbk*2ZP=TvG-JO0jx7?b9WE` z-?^NCvr~+6^CC9lSLYuFXzkvvbd%D`>NxZcjibntyb%)xMi;6k7b-#(p#j5glVl-9 zD8yo+Y!XQ~vm%nfK&2eQWHHUV$o_FzHFxF(PJ<3M`xGk4tu*v#d-%KC(7 zIDPYGU-C(qSC7WY1~VmB&=u4QfK0XF*XPbqMUD9_P29KP9vh#Iw_B2o{?(XNsC)re znGi6*6NX{y#tJRRbT*rT1dqJxr7U@RaPHscd{zXolBE1LaT=RtHgJ-TwU|GS_+_BSj$tx_oz@gtp%EoenLUpAx zhAHxEKUo9_9<7h<;cTuQu?4()kC74lhp~2$C(T#~G%Zt5;zWJqT+A1;N)jgoJo+#u zj)4Zk!)0eijRFd_w7)Y}zNbp=6)_1Lx-r>B;$QCO<#{}#=Rwq|7n;hCPeil!JX_kW zJ7^OJ7TUI01ZI6~RHFxgn^73v;4SEpSQ)}=w*@)U*an>M&hR$uo$*`J&y&&8ArZNJ z4l++g8HEiYxrwvFP=IShpKTGwWn>H^C|qbbK1Qo?3^IU$E(7}D=;r&u!c^LQ;nn_3 zgfV3$rL`$%wDE^Wd&`{?#aH1*$V2(bJSZM5-#ALL)@#c3(~jr^a1k{XzKTo?%ifxF zHTvk(1Jl73gSsTJ)~baTV!|xTV|$Av;Wk*NE1C6lBLC3b$br*rs#*wT!*FfA4l~36 zu8Tp_+EeiE!G??kp5=|#v;p|04qUjimW27f(lKW!NBox9$&f1wae>+57k)Hv`lF3W zN!>Nrv{5;;VcHZ~w0^lMWl{oNZkd>=i|Iq<4O4Yll!x(w_ez3o{PW13jsW(8>27li zCj{PI;manE<%ao!>*Usk+hWBwfzOf;sn$*9AI*gNNOyVJsrI<<$aE#te50X?a&^Vp z?>J9V@d_ub)dKY=*b&LdjVVaibA~7~@_skG>}2Hy`a_lzAv1p} zt0}9|u5USJRa*9)$;j)>eU9F#rn=u)P}{SPfEXki-QDK9MrA;9X5FG&N=Zd_Q$HSd zuib)he#d%fCv)f*7`-)53!OKg7X5TLIw+PG=cx1OaoY@fNGq!w+_6em_RPT`cj!yQfGk%t9536f7qDpm&y1*NAhn`3KJb1^Y zm1^I2*%)54)igQ}5DatY)qUU-`nd7k7M4GU&qMd!szPfdAiF3?#&EW?59|z&jp6$0dnet^9-zC6zJf-|u4vVq^)_9=# zwr0hb3Z`45SrVt!JqE*PMl&s3?(F8c%F6GiwjOf8ac*$2?ZaR&{sGQ$eDcg07;-#C zs-)@o`a9EheHl7)=>abEp>{i)Ee)E@C+q<);3-}xXentGx#rZ4Uj&`*NfxWj>Xqs> z*6p<#&2;I{XKaA$!LY=#!~&vOSrl2{Q320rv;DG|Pof*mBD>^Ey^pIhBM7KMTZx61 z!FE6O?ulyB40M8eE8(=8S08_$`LISt+r5)- zl!wWJXHWjrne;BJA?`?G!$YUFJ`>yFs&9$lDC6#h3D0Kuwh| z2AxNz<%}X_gokjj-8)4%FWs^o5AmqJh$%NM8IcEp@dtsSC6PmVxxfhtCK*~j@C8*f zn0KfFod)Y9J=m}zI0D$}07mYY2E_yQ zBnh`c-Y_y_H+e;hM;s2Hm@!}uqYqNw_NsOG{*U=lW@;B!2sWIqJl3YxI0sV!1tI~;$y@Clvn=UtvKxmY=r zYw#*s{#M$t?da`Zz8Uq==lir87`5W zeMa-}jdYMdR%p&FvKRzN;_1flBju&3XS~9S@eT)9A0 ztIS=Uj0w^oZ1!iFJN+iB0$|h}=IbvP?n)4k^ai>b2_OG*gVK`nM87mJ&##{5aC_y|@C?)U7d53}mt1XWJz);2PgJ`{ zLj(_UdlJt~JXioC`)T#ymrJx9CyPxlCt3y&vbcLwpMom@+95VT^@3D4A_z*(ngV>~ z9#;L8s-qWs3Y)-lFsIArCK(XSql$~6Wg_nh$1dSSafg|NnXDHIfy&!m=Jkah1zN{P0nG{P z#BUJ^9G3M*SNaOkEYI9;3bIEN#?tR}ueUQ}t*fZiZ~@FB{5jOX3^DE6@I7XG%a91U zT&Ak*hfw|Jm!_V-Vr$x5gq^LV<`yQqBywJ4x3oOIp0Oz81LqPofNstU>n}HK@ z4|rH3k2NQF9HL-md8VGXb18&u^>g)6<>KiY`81k`*$I$Btn$$u%M?M#c8S|mrsGRn zz%Wtl+{+Pt4*~QQ<;U3m{RPT!Yl&C(&Xz{6oMcte#-(871ul8J1@@MAuO~X|FmyCS zh>M=+7h)eD7wyj@uZ#)$Y|9cwfkG>`<8yo(Lu8<4H*OZ6Ak2U?@Lr1mHpesjdA|$5 zSB8+DLyyHuhm06@z&NNv84&o06)hBUZr}?i$4jPFErqP~PqM74pgc*&z`@3OfwmhzVI*Ctt;TFp$Z93=w^KX zNT;Lj=tJ)z#&&8T!d*g!ZJoi2NX*vk2UvctV%uG(@Bh&D7En>PUE8p?f`W*YfJljy zv~;J0v`DvAb@gf6q-N0N7OCR-_B5PjiXR(yp^S+@*A?%~=`8&SV zVx+BK4F^4WIHe5_dpP^@1?<;e+lYI24may?30t3ord3UN)rW}wQf_SK-{JHo&)b1TFk2kKwMwb`k_b#_pSLQ0AD6|Hk=41O@y)pyFIOSfEDN z>&FfSQw}F@O#&_;9!`Q5*;QA*^U6ZxNO71xf013Xuct6kM8YMu> zW&VW4@KtX^tiI@8dKx)5Cx_k1YG#7vecq^Ojxemoyxc@W7~g*M3^Ym#d);ZU>ixmHYb<7gc@4H#$w>Sq@Z0>U0lFluwiYSx1xJZy=MR! z%57?}T6cI%Ov7zz+mxrw<67>SmB3|_YQ?@PQKLS0lPXRC0c^I>n#oMh*kif#P@W`v zQEO`J_$EdnwXU0Xc z7u=88DbEQ9>V%Nl5lq8ZFU}%!W7k7uEae0cS_bX9IBVCjEH_PW_wfb#m1V&Q9hLn zgd5gzepe}5^vd9b0VX2L95)xp{=(^yO*8J3-ThkdmEWxbN~+N2*e`s~Z}tINMQqtM zNF^>$LuX$f%|3rnQSirT_VwhI7P-&VbCn8DwRJvP?yAlHn!~CwfCQf$|4`7N@|jmw zmTc~H-2r5Uf^o&zpKZ4$dqIO4>Mi#vtBpw zeg|N+>l?D#GRBIUCWlbm0qY7lR)#@0iswKp9A!XdJ50(eBBft-vn-LbBw>}p zLK>S%0@Ur4wn7p_bYbB32oH%(PqDxPZ?gaJaE2TL5(-;{BzT@SZ^^Hk!k}qhqz@ zd1e!S)AWpa0YGL?$OR!3-%+NX^^C{{7%CngH|Cqv+i-+SHPw5u35BeAeA@&n4~Ae3 zi3hbH)r(!8vI+z6LWKtYbrNjGhPI5r$qUK^$PYnsmU= zIcss~p$+nz$-&u6XoSbpQ+-uWos|+e1b6Q3fu!DS!@*w;^M4=f;HT{M+w1};$$i=L zPu3XO9_WNBGy-C$Mh52}rc?HSk19S(QG|A(mvOh@ct3~7wmkGc&dK=HydzJp{kW2O zKYPAxLahCf?snc?8Gy{~_wQx<$2k0aN{B4k1){x4OZR0!62XbjO$Z%K=X7K@zXU8K zRZ8?Xre1cS-mP!iJU2?-X0;o@>si{qg8?(eMGm{1psJ6%+pjx zl6SW+jCZF>Mw3Thg*!7{c<)7|`-)!(PeGev?x{oZHiZZ28uRAmMQ&bUF5peZ<|2~G zxd;eUNhWsrtWbmjp==X^6wR>bjS5q06VMl z%GSKVV#S8ln=F}gcT45{1kiB~~$oXPM4pu6v zeMbxQKy@Rg-{O5cUI(@i^}*8vbi!2}q%kvGJ@|fa>9ZZh+z?c z`X7_|udoF(o8h(tElQ!@j>yOP3?f`DE23$eHsbPS{wN0`*OyygUOw!o3CF1`R?NKs zacb;Hfd0oqb#&KvYdwJ!WDdLg>pcwU)+7iPI{Kpmx7evoAck~3DO=146*1qWpecuE zd$d?U=unb}M@~{a_ClkGh>o=+_KO`NW1iTLB)m4w$yAs!>8~8d5#E~<1q7(BSEt#B z3f)(4kQ;;7KUyN15=%5>*c`@(e*=gvj2j^Ln>`ShvA=D%_-zWLZ862B@Y@~TV4Y~=;)9&DO}# zIm2ygb$ClpK4y^Y0=u{x`iW%gZNn;6m50FbM5(dyvBJH%=Z1r%!}r1z!QHdxsyJR?VSL6L%0bd~tOpryd6A4cYt~N>nQ4Av>=E|30P@^QjuP=%g z@7bbcrKLtB@U4bqIims5sonb?$@6;G?+2dQx)dV!qI20;o}H->GVA*SPvtwHDn^4SKA3efVmMC~`lzSc zf=h7z3!pF*8*YoS|NLA++1oRo-wI499zTQ;v)5XayXt_ulA@EjD zd|S#z_6~@Fz8sHg${#)H0-#{bsUEN3SA2NZSb&PU5w6KpMFHs0?e$AFuv^ht?9Hm? z?UvY^-ECR@qcog&UaV8AgNAD!Ac=$E{~&tYpS_ch zdm)^N38M#12@}2fMWIj*t642Xx(6X;9){}9ReDB9Y4egw%BcN@RlJe>tLPYpD=3#X zU6=lj>KHOxTJ%P>zoycjF8N#qBW46JM=-1W8zW_U+h+$0O=2nX-W9O^nSEe2^Bn&S zFq2@`H(_p(nV^whDyP`SDnpJ*tUl8;-&VL8^MH<{d6;ZkpYby>I$UB>exC~g?nr|~FTN8@OUf1XO76@8B0k^H; z=#{?l``70HMSflp-+hAA$oh!IY?S_#%DDQb!6A%}7p0Ih2e>5KCeu~>_df)xg5W$t z)3TaU-(;cmIP(BFijfYA{Nwr-Xg-_!(Mzn|?54H1 zKRg1Eyw=RbCAua6Z+=E0z#UCWO%hDeKxmUVz5F2hszO6*TI)TDvz+|=D8Ox#0E+Xd z^!S!dgcOR!S4#Y?)*$r7OO zkP?$oj2F9bN|V3SFXUR7!*REe zF^t-@UYApI*dchO`gv!nPxgrUUCZpZC)w>=0Uu(sx11CR4$-$I$XFd7IS^yHr-g8I zPF?|0tQ~2}*Yc;0p8>fesDa4}kpA1tBLV+l34j1^5Dr7T<077~-?W1VdX298ek*bq z2T4UA&?gHc#P>oB6+DCRPD9NxiGQt!D!8EQov z2D^Wg6;O8^p#wAEBsl;Xh|60*mOzfdxA(zHM+wMfq8=Rn6_@*~`#*>_{1s%jzOz3_ zwjX@qOg8knGbqSsezIall+IAF+$I4eL_+HFjs~ml2SXw&Mh4MFR<$B36ZQbls z5jGhJ+V6G)#j4@_cW;+^eBeCXr9KM*r6XAO;l) zs{Bhr05zgcY8HN)@*UWBDxO8B@pQqMvA0z^POn(7kmOTac>c21|K(JG&xA~nOl2{( zxM^u=rPAJ{s%%kd1bxk8??JF55tRd~4|{oW?ol9Avl7G(QI}A#Cb=1aEptJzxDCdJ z2p%fz!#mdNhjW=9aNe-<@AJnZdtElB$aB6)H9 zqZELSQ<=Gpe+GKp9&~aulsbpaQN6rJ{0KtIMYE4A#{@+l3~?YY1z=xP+7F1~cYriS z^oZ5&1AciRtcbZx3-CuLtO-7{as`WVcElYD<^^=T@2~|3bVqwQx!4@3jbLj3ttyhX zlmL>is<)=b^yH0zme}%^2asH|*er_nMeN01aHe`}&B44D6~5OqY*#IXa^H~L01ib^ zXbdSw#yNi`oMFsZE)MzdFPLt2$n8cm;w_u zE{Q6l8~{L7k%<##lT6Nwv(+U<%|?`wCjB7_F26Re8W28kn&)SIvDW>knYiUb)@jwlQR zMW5BwISKXVDqc1)_pv&z9mJGU%?qw_Knk%cId=!ReV^dWB0KzNWBmjB$M^2o3?U@$ z2>+?}Qv;{2%Q7xNNF&zVq%ZS?k7PM#hCML0fryIo(&kl>{F6Nhk3%ky8#Ke~hG2R# zuu1c^&pKAS%kJX0Fm}=khr7<>6;&VnRfb+egITBYt6f2zWdkSIMh(L^dG;Z*phiir z7l<)fW&hw;VFSXoPS8Ag$IKFl-Cg(xhANqHe{&$*@_yy50k~jkTeJ1GpGgG;4nE|W zHGUy`Df)5!&pJ8FIIpmgb|*@+)?SVMXXP9rZG0dr=T`e?rT2}{4-a4L11e69pw5^u z9nb+UUBA`2SNt9n^O?m&v>v}#xMx|_!!gwkIB45Rt!BTyDQpx_ujZF^Z79hChqz>= z<&26W=b!au@{l8t8jnlW1S9{m)une0Be3vx|HIA$*O2Fkcrx0}`3qGY{@N9X*D|PR35Fv$goM zF3E0@-G&j}RxFb5R(m=asF93+4!p(H@)Qcbru~5Ja|`8zP!r`XGUujxr{~-#AHe&6 zy@aN5UU)$x^k)1p`F3{EY?1!Jy!C&7d0V4SM~GPaV?p%yX6xM)flj%po|+2-N$3`m z(09r|P#U#;MA-iREtp8@^O{Xx5ZBQf^hUe=^wxj#8jzol!S!LiIK%w-5{U#JO+pE7 zt;zvK2*q=AT3QSBFVBE@zAclQn=OZ3!{r&R)^_c=aZ&#LKChnIS?KF5? z{^&?jtFfn1hU)Ok9f(lj*91DM7k91K7KjjEzUP0-cYfAdl=g2^PWg_nQatLpLh1(_>py?}mw~suhgb?wWq${J5o^hNjQ@Ejf4!@J z{wyj8W;yMKB*ph{ga7Zhn9PW2K<)T{egA*C!GEoiXH-v-=UB)HMzv8IDT%B8eYM~1 zLQFW)Q!uk`zWs{CDt$7@a2DH8!&w&Zumt|h5w&M6EUn$^$~NA z^UHAlPYv2Fk-asT#B;LWt`kuiuy&0^Sn_KzeWZD;e|zzN-PF?yR3Q`+_$dm)IQ~}~ zhxo)h5Ebk_nOFZmF4C6JTBkW-9@nk(Xy;J#_~(k5pNk_0EMhj6yHwxLU4OKn|E|a5 zdBBz-6`eSkB`x3j>1UQTL)4)cf8Vk=(cYNvU`>7ND+!sdD z_^O`HLQe}(DTYtz3%MyW~efK)QQ`uJEl=G-6H`tdKTg)hb_ReR~9 zm|tGJmXrANtntPKClkqSdcf`b-9-3GA!hg7=+CvS-T0n(9XbDS?%cRj?f#JPI zVSO}?s!^lQ04k6L<*G8uSKF-y_WJyIlf$T8BSWWBkd{FdEV zTW}Ld2?_6pt`SR%e#xgOvS_o^sJ11}kWBD5RVol#_YFozN{#*=O{bb|FxHss3@vvF zy^r6S^{?S3^|DBnr}j*{e>mGdiz@yh{`G|oGJf(mq-Pd=uc>{Wg~p;x_sU#dX;k{J3AvqgppHawDA;Qin3chqQv5jp7k(}ZFs33_>7U%A`n}>+R*M6X>~(J+FzX3R?xB|Wse{7csgXKW|PqhTmDv#OagZp=n?Q5`!tW z9f;jmz?dC~-JSawzqKWhaV8+wy6n^@_uJld1=2HIxgYpdzHBj{nZ#|vtwivAsG~gj z7YVT2qj@{L!gybSx{lI%GlRqF`ps8Puj-m2R|m8H zI~Y-^OZrBstj0`gbO(tzeNmQJd>V6-)5B0r1(eWJJY^3_(pu3pOd=1Z?)rm4{7%++ zqlQ;#NS02wF@5HC*l&$t{APt(YhnUt)us3!b{^BGl>JEF2c^;A{mkBsY6%KEE?$*l zhKwG`0bjR%e`E8H$wAq8T5k4i)dZGzucT{Fwz6n(NuO5ekRV2SPfS{6Fn=O=gp$qj z?1eH^Mc8kFOrye zCbWz{IE*L!i%Xl>M4N2fv*dVU6%|m#ETwmQ>dY|VweP&;##q5~q&7Ecndp%=#)0QZ zphlP-wC>37;DK{I)8K9IsU+Y+RJx&mKmPb%eSu$}OyeQk5}o{ioc049ipKAc##$ce_MyXOR4b_g>&1}R&mGVOoC6CS5qc-CLc?^EzrSI?Uo}zENh^m;U83T49IWJB8_B+Rj9aW@T5ivRK>B3#?V z9+HGG62cXIA5PbPb||WhV*0pw-NQC@Tj3AVq;9pW4;VPN!!JN{&G%%;)h_+i*Gl<_ ztV+I-*m3%kh;?0M%k+DXE$k2x>mvo`(@tsK=Y1by&z~`1R?PrE&9ipwmX_deoWJiD z!icz~^&fEylB1O>ne*yH=L-urR6b1BP0O@8EG^VH z1EEn>kQ5po^xK48%=n@|?FJg9pr$z1T@frF41#?uVm@cJWfZb1krY0N+=c)3nO!8V zKc<#mH6?$YR5RyXPemcR$e!i_UfYIP5|_=Vi-+#=%UGt66ggiBtXJ*ZP;Tb=a*jTQ z1(EaNa-&euS0-gqU;nzLUu= z@{6OA3iv*p&Rb-z+2Tf!;E$No_kzmFHB^wB+@!nV1OK1wbGf%;Xk^YQA}(vk984&G z9D9{5oQ517LSaJ#2AuTE%iYQG_aC)ON0ZPmNh#TagXh9z=I-QKp9+urk#AAE2Q~N$ zKilO_aU<#IZ-9i1)bKPQE%UoyTAYZr%ah>E3k=UWi3iuFn=4A`)}91CZT0Tw_Y1U2 zb)w)-JT8mr4xWqYK{PY`COL2X29=8432H8PS}G2KyCpx#Rt8D8*r&!~A zUnTENvh8YTVbj<(Jl1n&4}W{Y>C4HsplCoSQE`4ewRC(`iE)QaYcO8=hBDpRGQZo7 zK99{onH2P`H=k09y9sVga{V)SMweHFaF<6(M_Lr0I!ifI9H#r&PJNkz*4#N7zRC8a zu7wt>afj$c$#mXe&FSosQl*>&uH$C4>+utK$%1@1czDMvHtlNnAlskqQZ9C@n5X%- zj5>%S1LCR1I2$m$ET+vzCh}An6>IEG3M6kU(?+MsoW6S@U+hM(H2hvgmBtu?C_2RmaKKpwL+>3Otac;~sq}G?jFl_rjcI1Rh`p40 zRD9JI3i)P4H>!D_%a^Rj!C_IyYcIEvi}C zdVenRgoIYPB%3GG?JT+QdR=w0!2q9_+cg6C)USXD@fo8T*+9cGo3 zj4`O*pFX57iDRn?jRs)*a1C7u^7hBhPaX zyb%WBOwnN7&Gn-2CiZHv`?71Etf1S)1EWXV>b*o0+18EjO4)C9U`9)KQih@1yiP;w zL#r*ccmXE+5hJuS;^>>t1>MfhFyk%!1uXGyrC=Vibc5s0N9V4%1(Hq(Q|0^SuAmFNA!Chu%~wuoGup<-aILnTo+$6C znFg34RvyHy_N8F7?F%yJDgy>DkK3nu-HB726;bq;^ZH_F$XgN)%+Z8o&RqHOfd^OT zVBg;-P6#91#p+w#@kqtMHE5|{toJ_Mza*$!jTR-0rs$Xn zoj7xwt$%#Sbs{<5&qu zCx-##NEm+M+{yE(3&^dHXH9sYB_DgWxKUbN%_`@Js2^W1XL_#VH4<`U%a-S=KD!QC zu-d=;u~IPA+OkI8vRhwmxBO_Sh0n81IS~ePx+Snu<~Mc2m4R!q%b-(#*m){Zh?7!R zDlHCz@h_N(m)azRhwnzo<6~6eFDX2t=+SNpOZAVL?DzWhc*tgZA_M2#cY5Mf>a1`x zN-~1;jD>z+j4cEz0J@SxZx^wju6N13njnEo9BE zpzpL8Iv|iD_vKj`&D(v&n_(br^EBhvDki}M%)_qVy{&EhV$F<3f2?CdpaN5792JDKZ?lfU$>YMXNeg4X^n zhU8~>xV#pw=Sdq?jGbRj+t$y&nB41<|6y0+!xUDWAc^u-*KBB7W$dR-eYb@MJ5HaH z>l3b{myk=YDzB_>`huK`_wi0;;?8iB`0a%88rvA&P>`})yqSZ3QAxHw7$(>l@+y09 z*Iw6ipN6t-tgB;teqJEy{ekL`?+4>jgC{vd>EAeYO|V`^_4z~zS#VHKR}GMFd@MB( z7j_i4g#hv1OTlh!Ls?hUDA}Y*<(ye^b|T~ezPo4dCW$o2MdzEgsL++zsOZ+8 zC0`tyzDd$hCXRfYqWI0Nk$S%+`JFsxx@Zvgc{-LUd}G=SMZ1POW?xcfN$gxS|z$#1uQy6uHvCSOxr-1pKf zH^mq{SZ4M)7ZTZf^+UrIVlDqwX*flj@axqw+axX3!r9bOMfc~l%2hd!FC>CmqsD#7 zX(S%vx#>@ap<);#`Ke8EBr{*%)k~Q5H)ANgy z4_#l%q{>841i8HOQ-8pBvcv>$;I$S_K0B;=)do@1F#a)iuoE~->#;u2MY5x2vG~(! zy>jj*T=K%fWl#%3VwJPRSACqY>$%qzM6QApS-BDH*$sKBNr~qZa4Goi5JtL!Y6BO8 z8QT)OzWj{O!Yi{l1aX4?_@Gm1EQi06Dd#(R%Ck&0R<@TIq29?KN!>cx2R}TyhIT9ZIL|5vg8ZV>plX*RZWuprRi3jVJ^~USRA>;{K zM@UUZY>V~s@1b#z8wo4%;jZPSROM?XrC#INVaf^dEXuYUb9wdHjWs#C7mp8Y$dF*p z>-QPds+zjR{QyBvD1;<$i7Un-5+d_F;#wjzrFP(8cJY|kstW$WFip_l+IOB$-RmrS z|KL$46&16ib8SYhg!}idYqDv9PhE{L=Kb1kP-tS|+tQC+NP&*?B(!L9pTd9|=H|}1 z)b?%Hs3+Od5Vdc49cF%(Kowm4W%_o34;ugiOjD0N^iwHr9_m}BSi_!D+4>#sFdjZ| z+M?|QISmR$nkZ*N=hgd;2yEyIhdt_AK&3$u#R(Vj&~7L za2Aw#W7UwLNMkWMJe<*N76nwGkh7J5d4LC;qeS#XcCUUA6!&B(Z0(`)n@>U6;+|n7 z232Tx@?+;DeY1jD)Ltl*z5nCla+eVA;3_(H37xpz$H5paGBVShIv-S$a;ll^>yw@% zvARS7jxML?m7Ye4eF%1Od!)-^Yrm(qVLy~mMyqjb&bYrerqv@JMVD0bXG9G8vM06Y zL7aU(rjoS={r6#NyY?do&M$9c3*ccShdF)hd*$&s8m7b9?}HSN%4W{UtNYAnysmtq z(}o7E3$F8q#Fjf(8Eo=0i36N?ELyehvU5h~vW?-_12d2>r-K7{MA`pEKQo!ql<-7E+-pLy7cqe#r=%-Ka>k_BTnAX|K(ueYYD9C@X+!j0q;ZG+|7~)HB(A5QrYxW)Nu*3$(Lp?>)*l~ zB_MRgH8jKQT`ji29s2wA`h$b3lI z7GJ7HN*wX|pf=9TI_>1uIJpej@O^J;I2DLn8(Ljk{^HrYn^eJIp|9>DWBg+u{GECk zeyQTPmp9=c<9K%}7p!`K|}kb%CB$o5m5N9L%=s1LA2mOhGDZ@pXcv{AcZ zat_)cQ>RzvHU8)*o`C&`!Q{ghZ)?rfhTm;~Iv*OIa4>Ny>YWsg?1Tvh5q$ib=YyQ5 zo9Hj#|FbBeKr~2Xc%#+WJ+-DIO#V~Z&3=d4R-peW9^A!-Hm$m^=a;qUmiGFQ2&}~) z!E>Zgts2v$rrnHsp2~9B*C#bzC|O~PO*ZPssALCE;MMXRITCfI8rg)wJuc7utM4bn z>|O0W6xGOK#kujZB73i^V`-FG=*g<^4OBbCt>m4{qu)jjR>_S*HG^k~FXNYZp)#>E z*B+C+PeS{bzGK|pmj$`BvxYnEzeAsZ%k<&aO;Mp0z*y<;E*3rVE(Q5>xo!VB@K3TS zw_f!{LxdQ^K=<5@d9iw@;eIoUUaNoXkC96Fy@vxu;Y%Zh6dgO6_+R4<%4CD^M`%Pw9uxI$q#>?1iY~d! zCG686JjS{`HC$#lrt_7N>+qo3)*!eKTHz`On$>ursPOtxN3DKfVBMYeXy%z2HYpvx z$BRcD)c{LRrjMpk?#}&Kw{;m;4AE>mTe6ic7PZx}fs~?|86YrI5ImZcl61xNXjV8+ z#^8bAoHuCC9_x!Ii>816*;|_8L>GY?=Y1AgT$PFAky}4m)-JfWbQ^4mWpQG@|NPM> zb!hwqo2l;_T7-Qv_Je^m#!AUEWs&5(PT^D9Xjs%YIp4AuqG!Ds;bgT~+>ryIGyP>Z z>7pAtkrB>a4%62Gv?#&jp!OwJYBOuorxT3MuTNHu3z5FXHg=G%do(z&VkFAo^0pj(9Nh05vIENP5(Ta5~X1et|+fF`lh-eHO zn{~=Q5qKtFt_4r46(l=5lZTa|dY@NAb$7|=FdFVQwAEn3&sXHf>8Je^VO4M*$3->^ zT#F9^SS!`}b0625`>7->m?H)FYWT7-# zT!QeqwDaZcG)UZM7&qWTa8%+mR&!?{Zx=eb*q(^A3_}JX@IieYreVnT=_}v~3 zVhA!;l*5Q7BvE!+)w*;cudA$e25w?9ubOUO+9=?U7Tz?!)BAeY`@Y3Xwm; z-dxvJw-~0Dw)kYn3(}X3D25WrpfKY6yu7g9rd2Al7S7w9q5Q!zCkH8Lu_Eg0xXmj# zRDbVtMe9-r_K28fBD?gv0SIwwMk;(*6_g5Y&^>i(IW(_)QOLTqN*;%)6_Ygc`iCj~ z)0y(iTS@#*qa4i33p~nxY7A*o97dlEl|;+#)P+34DLmPlG(L~Vh01-0v%?@jbdiYn zDad@&kKLruSXU~D^?XYs%C?=!jo~D}Z#Rev|8ja`>@^~C7 z7YJ;Xf13GQ>^4Dc-yB}PobS--MVIu>m}=_$>9A+hvpkqAsqQdd>V6yBfOGoN{a!^Y z)ngW}kVu14uX@|%PioO-dCEv|xPQS>484L9u580p+;LChH){F7sDUf7RNW-PA!x(? z#-UP?kPe-TCx_c9VFr~ZPmue^5hl4;u(p!vx*FkQ+GVlJFMTE7>i}Y*soAujSU4f; zsdFS)l9)+2x64qx-;hChR8aT+H1h`` z1e`&jA5V{t*^+DL(nwvkU4PDy@!F{%=Yacu!lwoY=PdT+?r`Nf++jhz#W9ZHrRglr z+?yBC&sEwta4J7}nq1thXuyp3O)b$|E~BRF9{B_`^CudA4=&ED^UON&rN&am*_Iq= zLe57e)RjrnBUJ3Ln^<QyuTRmmS{J4?ktv*eq(ov5~Z9jN1wl;C#rgrF@S0 z8@T-zinBZ-8{hel9Gtf{)OX%owy;1=gHN^ByyAh^Bau#0b~2+j#{!QQ6OViKP*IsH zg*2ziN_St9IwbqkhUSrbq3`>Nx8xz_U z=1#PN;ZdL|VCL~<;f0kPUUL8{^CL9I<%+la_rLNR2R+az9Ezx<*V&?ati|IM--*V& z0V1Hx2kS#Qhl%-Gr6b?)Ay&DIMbwOb^DG85gS@FuO0w zT^^oYfushF^bx%rR5Ocb*KxYxQCGh)wtn9e5hP2w!g-wTpu2J^?#Bp|FQafbOH+=| zsVCX=Jq~^RC}vVjtRPdqnbRhFVq{UpVk??P%x=1n?vCJ7dbNkRG4#u$wQZVF0@@k` z_K&LKZfoLl2U5pXiRg;VLPkdHR(mz|bMi&naXT~RIXY;3^@wLp*H7*16Ilj_WJz)K zuUJ^#SDWiO99=?kJmhZI?Pm66MCXm>amnf12xK!AXlL4^x~90`ILJK>Q$2MV`3hTtEqsheKsVw+QRRP>b5 zDmk&@qPy)VY)4LH57wIQ7bXbTpn56$WTaf6J&%}^-?g@zPWotfL|9fTDBZK*W?BAX zF>jrgH%s6p@zD}DEmiG<$9&?%7MHGQWIuZfnzjhI%=5#J8$RK74Gzfje$K_>x-u84Y#?fww%&UMjA}(dlmg4Se>U~WO9`BP#CW*%-PGw zV$hHAiz@2bej+ZYzdB29E|Ii@73Ks6P72NPlfXF=h&g>u&y=^FPScO6E9|;x);fDGb;TtL?v9 zcNi`6oT^+lY?X7^u|KYcP)BDX_S9ees%7Eb_73h^Q8}57 ztM7hQ|Csvl$2O`wpL`@wG%dNC&j?d!S{NC3H_k&sewDV-hX5fsLG?r{lFHC> z&*XYjyGRH=Jgl=nOoCLmb=eIKzirb=M~*eZQwUhj;>>FO_@SR2<6ETN6{EVOtq+|2 z7w&?g}9| z+Ts);_+ss99BH%QBUFYU&1mc(b^h@iPQtC*#jieieIMog#SwH6xrXbYG^!(wWWdcj zT?>*E&J`*F@-~Hp3OtR}g*5$q?@F`Fl5GRO2TZvIE(07b9onFe$ zz2f$`m|DXyry@#$Ce7Fq{96)?eOSCC!CZI3Y;!};#9C}@Ac10F-){_5A5z9dl^*Yg>a z#yioH@?Gn9iW#Su!*=iZu8G`y$k7e3+=i}49_QHlm24CxW9;Tr(Qk2J#dI}I9IZ}E z3S|cSCAJVO{G{o#xnz!C+Fbrw6zTlw^f1$lzK|kj$5Tp;CWi1dc)Rw2(x~noeqP`@ zXEcf+brYhU`(tcTMWU{`IzaBcd-l-Q?NqMKv(vSXhK1d4UhY`%H3^RdwcLehuzS;e z7EM=&N$1=0u{tlK#|u?YE2Lf!Nx~0APS^5_zU{BuN*!O?^!}ORqWY_CR+N+du$vO#;*dy8!1$8{rt|aM_}}kj~n|a zOPhTSrE=-p=~??ni<=-zuO2YoX$UQ}noM2au2v-dNx8{=7=xmBai7j>=c9Nahb;b+ z{XZ1b*Hx-^adn}St_^tgA&b^enaBUZ9%~-RXNbd`HZiB^Gjf_ZfV{uxJ~xdOQ18#a7-X8X!1A?}Uw%5a#ShAV%teg2AyVcYS|_Cxq^2Ak<~6_#uj z1;4aLR;v`XV^Bg(MIml`O^@HAmE{)}mnccG9bfR}q4!&dXAuo!WV~jv=MPmR)Gi94 z>K8Gq>v`sJ7OB_W{=9|r?nf8B3vZWxR_*ALWBItXw!4%5jQ*UfL@%Lz`9RepX2?)# zg&^P4hREZAdfq!qQ#BX|I5mF&I*ZD}1UPf);*q-g@~$OtzlTu^%xYdeYBhGs-mMEi zxpTpqiPorkbA;w`IAg9X;APMNtitq(g4Qd$kuTmJylOa+!^rPR6z56a8{uxVb(v`p zpK@}K@yYwwWBW*S&TO;JFONqPtO5H};Z<;K7b|?B));3l1PiKxu{TwD8RE|EMzWbJ}J=-r|e(tc(&XhJ=u#{J^bbWgP zrBSv^BGK{Cm~iNhkrhhSYOk);;Yu5QB;l*)CinFbNRm2o`y*7!R(fxk&a9`x9rKSi5EBciu+}X9uPKUeyB;0I#5ZI2H@)QBoZt;vQUM0J5Dy$>X zvdDJB1t#c-Z86s{HL0Bc{tr`f`r3{j^D&v?pwHAZCo^|xg}c`l_=yF1x~c|`Hw3zl zuBy#l>TZgdrCQ8KX?9Mh95$dABzS}suKP3tH?^hHgOe2xV$2-81Dzsrtoj^TP)TE_^k)uq zo*TppH@6Mw4TK)93}nB5MwYZ>ixlVDu9{Z+ z(wFyf%?BRd?D~39G)ReA#AWtl$RG9QeM*18oqa}vUR~%2s%z9{{UZFFw;u4y#x35t z824#yE4P$;aA2y{2)H0K+g#j?mt)p`^Exd0a*BR$okB2o4t{`TRp$;0|q9!G?2CYUfxJQ~0IYY#I;Mhb#GoY<+ibGVVDD$l7V_)XBD#EV_kh?b5D zF33c6b2+7-=Vd)5q21u_$Kw#JUJ#<1V0NN($E&qm<#KJtDT>Fk0g`vezYQGhVnw4& z{AMlpiH@XDdaFg7{;{LGwkr7Jxz05&wokpz9@%OvH}gEbj{iY2-LTYk2+nRT1dX#W zh+ObD#A3jjzjynpQDW{FYkmy%{4~Mm^TNm4HxU$)R}kyN!0SKga!%lt{lJcQ6Tmvic{k$iY;(I;$&NSmn*P z%R241UTfpEAJ(zNkWWV&Fi+2TD~r~WiyVM}cF0EgD%OjrX}z14DpbF-B4tyP`5Lbu zio=D%T-nN=kgT686YndZal8ZC^*1PQpV)6K zx^A|fwc+=={K^#jZF4MC_Iky0rfk`ksMA5SOtI0#_#S4kZY4rJ9iFdxIk!?z&nP+4 zI6$Lv8Ne4D?O#J@h$Q-&1IM>kVkcHz#7_A^0meh;U1+mslbGd%O`3nOnwIh8EFL6+ zfP8GUjEeZKy*c%s2#c>I(8}l2beibX-a%xDI8BfJ;yXbi6U#kZLn`*IFCgyswO`Jcc%}}3pOQoH+jhG8bZ1=W);HLhfMp7q4VS1#KR4m6shaEm= zYduSMtytdBey;0DvPj&o3IziF%yi!T>6Wo4!8hqLejLd~Vur)!n`a|mhsg^WHhS?l zrry@JSdwA5>a@iNC@mTrtX4oV)lm;c@VP8st9TcwCNQXaY}ytkW|1!(2f#Lf_aJ~0j4DZ zt`ADFoYx;en9|;o@NFp1R9dDAK98{UeuDvw{>j0cw{=l~0gfvE_ zx=CX6)N!7M*%a5=EjJmO&u@mj&nFAB;-OY!N6BhC zc4eG}U)coPI`ES^}b(FwX#j9e+T1VWO<|QS)^Nt zuIg3xEmPqgR8Q72l$h5#^9ZknJJJc?yG~BE;6g$cpLgzhJsNwiYAjcKHu|Z?5NDM> z;Y@=yJ>->5g{n)+tN-ZmMPjnZX4X<3*yFyOV0-^RF;e zU@9WKLKX9?X;*{oH*-OIZ`f=-ZrqGuA$Nj|S?||T^1ry%C}&BOkRSKMAQZuv08nD2 z&H79K$;JzC)TYqTi=o}~e*LR;Z(e7rxfb<}W!U&GRv6Z<0A4ESi8?}98xqbnhP?nR_+4S(Af`?cRS|gsnALffd8s) zJX&3W*ID%6*q7Zl?Lbu?-ny-1i{dz=O~&j%*86?`Zn?FYDlj-=zvvV-f% z(~hnw)zUDZnMfiqOImhD>n7`W;Qi}?trXOy61eLqv6&5SANjaZ0BN_Nuz1YBC20To z2kgmvN`2#Ru_fNrYw)KA{FW{9A2Q|X@tKID_S?7|axNIga~}U3pOr5_VO?x#Ka-el#y8tQkoiL-txzju6U(W-9DywPWW@=>4j8ZsuqRc(DrM&EDuO0)`Jy3;P zfqi>>g)vqWEou6dO zec^Za`&|T|EPrnjEnf3cuVWOys#JW)`i`~#)i9ipQYQ7blUv)X*2g1kA}xTtz$BPV z4t@-zU5|%vxESAn9M4J4_P9ovOwA4IYs60Ii(JIzI)+)b!|qO{??=h_wIyYOI+Jb3 zW$G6vk&?>WFfgyS`?jHz?${6b*)PiZ&q2k-qE%3v8U4yo0ok(Qh^i*$&YdTVvWb~4 zD3zc3ab$g3%pYsgx{TtrY}djh(p^Wptx{yLC<$)+~_BCKkqT8Jj|_P}@+R5lf4Ad?Z_N zO7Q^G(8p_pBi)V`OR5(s{2_jV_P=PRdF0?WY!4r112L-z#O$e)`z`3ETg>7y4g5uI zwr!Pn=<(WM{$c183{Tvh^V1%+vF}BeB<=NZQk1kz#j0!xZaZQM*Vl7C#a{D~wapH4 zD!CkR3+sdarIEo8(Y)B5*GtZrWIcg*_SuUuoay4rxvdoZeG$9Q%Q1h77qKEzZ?M^k z^)4OY$dVCn5Hwg#4uu5+_CM z1_ekCY*KutUNr8UaVU5qQMxfyqub-*0;*fQk$Dl#e^gXb$lS|0<^L8!q3tCYhSw4J zVv+KtJR^DY5kJD8>WnNOzwQTb7#*%isg)*IV-Rrx_T2M32Rfp|0b!xWru5^{*yQzO zmbE;c^dx?=U&>Vq-b^a5N`qG319xqp=Q*+DCQMglvz%T%g+!(CJuQ=3)&pQihHg9m z>7PLWtDGIBo1(|jRZflL8|)B*wkx8^;;aO<-(GC(Q|Xhuqc2l&#?cehUe8uT;vF^z z(=&E}qBkC!63>ZRbgfRgHBr{m`&umJy;bzCBIS>d(tE)6@7$SE?O`WemM6?mhKyEW z5xbKXd(GhIY-znjzao`1dSy~U8=vjVI8mhf1PNqx47yCq*Dcy0f!K_xLXK@Nb#LKX zrwRj1H4q;p8xURJ$vZ|<3d&0?N5au|G3Mfrnx8JT`Zhljyq({h=G3;9N&BidZ>k*i z%NSDSg6Z+w=Y~--;zh7=gK-1!PivYGV6f}eyhjUM_qJ(uN4sb|lw)z7_DkLnzpyrx z+~=5yn6YiTZYtJc!acJW~!yI6CB#He}ur`U_g;<^?AlZ!RarHC1f@K?8=QGSLEY-G9<( zpkJD`oC#9vv|(0r`o&Hccs1K^dfFV`e6KVu6T@;UwZgG|a_X}DkmF&+)d1Re4{(R# zXCbVd>N)XUwOnxv_15@cf!(I`C|yDwKC}n(L3=6>U&8_;bIp1xs;f-)*+G#!i{>{G zriOMVU1wJvN0CeyN*9Fq6Wz=+bWZyVm8tKWF9L&%&>y~YUw9Pqkna)uESKsBaEd|- zwH#Gc%cOja1@Gc&%cBM5<-ckhPA%r7rv#DIYbJ82#|}C;Qjpkn1KyJBzv}3f-F4G= zdhxr9#XEZiYr*&;XsJsSr!ji9{6$^9zK4KeDCJqzlx@lTqoz1Fbv2nEk=lQ*<&WNo zIUwC$uILySChRCApkA+(xzo2E zx~q$6Gp$z@_V_JtPimj@GRws0cK`yG4IFe=UYM}eH{G-A7kNTH*g6ZUb+s>nShuYf z2-P!UuwwQd^K<%m(^;%3CmE<9W21b zXNeQn=zzQh6lOL4OF5lf@wTtj2?@6e3_d4!cb{Evbm3;3_l!U9*&_E{l|r>aM+D(B z3m5Bob_9zNvF_Exg=>JShBBO(|k>;DYwT^be_=w7 zCHxhm1Wvpq-JhZ3cV9+RYX$P2!)eb??^^`k;(MO{ zPHC^W<*r`&@xz;xaa{K87{CUw?(UG(8+@L*=PSl?@62{}pzyTG3uXOrk)W|N@BM#G z+IH=C?WvJBt4{e3OprwK;Gry~-m)W1je;f(CaoVPB%7axf2$V3`Qkv-nuwq-)~S}m zpM3rT#S+M3&omA8Te@W}FLcTP0!W#PBfP{CW}wDbPZowvu#V?H#N%;)MH zKXk%1DcijjzLvwS^)TA*5h>D!r%Kb?k>~y1zev>cBW*nT&nKD)c>8_FxrF4K_g!%x zG~iEfn}eD($lBIPST!upCTgP!-um)2O=NVgSjnh@Irlh9yGL3ksB zf)THEDC=UsT{fJJ%eaZ`hi7(=A!B;iTAVm=;f3M5D7T)v(0sRx#G9)#XAViluJny$ zjKYjbRfmVIc3KFmt~Pg)%yw)Dl)B{C^rXqbKOw&LoUHY-E(Z44GI!FvsXu$J*2ZNb z!T$N}yu{6YdV3|v2bcRmP-ArQ)mUgIZ(Hu#>Kc8HE9KCnrBSlV4;Gm8f}lgwL)N__&VQ~6K4m|{=cUjl&;`+g2CBZk>+_V4UJ}SbKpfc8Z6(T*~U6Fr=wKvZ z166|U=>`ZPS@H=XQX`q|DUCnW4ws%xXvdYaC|W%I0QhnSb4jdR=K2iefk)0 zJK?GU6Qw}E;Y?M|HVXSJfJ$e)?*~M`aq!XDS()mOFwjj9TXL%mOUL14V{_1;bbiIqn3-CkhGO@8gF&KDE;*BPKkHeZJ$??uDGF8oQd zt(%A$eYuOzVW8Y%pp-Ti1ihGReWta=}(1hs)!jJUV7&_-@rY8UDGu2VZVOxhxCMpKqrA9P)4S+ECo=J zL`*LKnHRCeF5D$Td{}ConhAW_agEI9nRXIOYtxXf9JHt7;R?kG(Rl{&nF@F6l#~3#%bDq`x_Tq4+E7reD#2+146D5?? zQZyOVlL(G2s<|~GF3H7-6|sYB0!ro4(zc$%9Jx2i^cRnJJdKltOImP!brdaze%m`; z4&m%_JFXBNp8mNVc}w5f_BHM4p9QKdqqjAGtg;{Fp=?1Y>4Gi-1zZlYOFOt2YbV&j z?>d9=Bi^n9WcHt1QWYqyjk;trtY=-cDGxT+r06;I%5G0RyEgr1Tl;Qr>#uLbbed~B z*8jtG`3+X6R4*trT~`v_0hlx7YcDQ4?H|8p-ZOd3$((V-K8I+tCN~Dj3le2W2dIBj z>OkhTUXWV_Aw>4gM)KHH3zhWz;qI40Y0Ylk-|=!s0R zfcs9>PXMsFwg3EQup~0pp*_Juw4PtvOuQ&!M<6N5{LPE29j-yJ_5KN-?;Uf8)P1(Z zo->$X)NT=8|JqweW2DfcHVeSA`JG6!kmZh(T&(qT=IR}Pr8%(cHHqH%qmD{C?Y(b; z!@rT6clN$0Oh_tY?A^$A)oh{%3-c^hlg;24Q_1}e+;Zota+Ar7UFMy z-@plMH0DpN-AgN9&HY88T6j+J+Whh>dZqXsHi?W`HnhX>ICy+7-H9$ z?3Z$%&5{@C-r66{<20ZB?6uXpa;`YVzi)COPH$FJ*p$uAfBhWa8V<0`J8n2bX0Dud zdZYKszLUs+?|e|;P-Wu#K-HBIRDAf^uj0%+X}%7KW92rvB_BxJpha3E+j+(}S{W>U zC$X3ACHmf~jAHkw#kQ+9hR57kQsH6%)pClZjTiaZnyA1h_mb{z>I zmsQKJg=9Wnhb3zJzdQ3{xef)C^%lu$*~gE16VlDIW4cw9xCdeGS13}mo5*`u)jO7>%EhGCF)tKBxQegOKI;L=cp6)&y*bI~NVj-F zPIe;uoSEkKXp_#J@1vH@=A-LobQP#u|9G|hH`GCM+~fr;djZZoD1Ep0*ZLhi#;iOw zd=!N}{x-S*Cfxpf_t=gDcspM!|JNKSMF zkdc@kG&*f_Iv;>U_YcNeFfobw7SK;1WPDWtM#;Yofp)l`(xNJ*tp_lZIs=FSu~XmS z@xsZx%}?PIA9>}J$v3wq=B|XCg-f4B(e#A4N`&@lmtA+-16jw=O(>{76vnKkqCsUl z{jJ#o<4@WFU@iAk?3LqW^;Ro5g6|BNVs~Y@=?xzM)r*2;TwwuU^JnoHa9;mYd4~IR z<0_E?W^~tPNL8NH->&%#FA^`3PS$YkfMje@mapq%AT_NbJDL8<;ffCoPoBDM?g^yN zZM0g~N&p#=c@_^yqLQJx*1|q5)H=SUg2)f;W{wJMhZNF|^bwj>bd}6|ngcXg&!|X4 z><$g!@$>B7{wSa<9RD9NzYfBOh&O5X82B+YQsXg}h+XmwCe#b?etCY)SxCaBSy6_4 z&U1WK*FkQhUZf`;&uNl|+lk?*qMjIuom5OlD;E(LN+ys4LKdLyQVaHQhK!!u%Xi(_ zw{c$EFK`m6*wi~*48v)IhJ*m8lE_@SQ33-#rvZ7ecFcE=3v7aKBiX9<)2r`M5x~%@ zS1VgE-oHJXIR=Nk7k4;kKs&HFO29e?+bf0PLML;MMTWr{YTw{+79?RA5cWei$1+G% zhLWY*$le()Ziz~%`qn*ReTRH<-KSrZ6U|`i(FB(V{2E%qB3&Lm3;LJuml*1_Y5dvQvVo<7)-u? znX}`mN#$Z#W;d1Rc|>7f#Mu{pes<>+9PXu28)xwzOGS>W1W`VpFIVz6YPZ8}vpR!X z7?30UeRuc}FdhOdI`60Rs<$6fBo4VYZvdia;B?DCZnOs#|zZh*wk*9|k z;g$5RvefmlU8mXc@b0tc?y7Vr`-zw=4i?(}E_i4&i+Z5ncuHbXxXE5)CY3Ekpc>n4 zeC0Bu!_jpqwr>>1h`b3gSp?J}eS3Iy3%u^PI2;6w-I{sU^>KAPvN9hciGzJm(BCFA zzPwfv%-prn5_PF~0i1F5dM?{SDnfS^NnS$K0Z!_${JF!fKU80{`F^}u$o_0iMfI7W z-1Sl>BVfcuIZF#%wT>ST^xcX|N`2el>suVD!-1>xsvdJNuufwFdZp<+3_2964^z0A zvN)Ty)>dk$G9Lq_h#nur$y*tHAz&Aj%syn!St;wrtW@a2Y*^~uuHO(m3k;Dvi&-Th zVBUC21d370k&Y5EUJU_Li4{%l`G7<4+cAaP~H$) zWEy^s1&SvUm^)SU3c3l0phmv*INFO$1S zdmdblN=*HWrQ?iWneE;EjO~eF%E25$RVwASOVosC=oNGr{WYV?dv5=wa0k~v)5Y=o zb27gPK=x?S{y(-rSeAzVKtB&tfZPm>+U9J*dpk^lY`W_MdD2qI(&*5m!R-VM%!s0d zXf-HnXz+Wjy@4Y(!4us|1eH$@W_#%7&YCOYgaBwZE)xN<5-Yu`46$B86XKaH#X0mA zx$zsC+V5&FjA2cw5##MN^cs3@OD^E;dyVzdNC0O2LnhdtU#QA>e?I5ukxV%1p(o%j zG%Iek&`pjjD?Q$z${uQDldgffSnkN%e9=aujSGlrvB}v^&=g06sytz!nHESbuEQ>1 zMDT6I^?P$mxkB#OvXKOCNnC!07@Ng%qcB$M+^}epw?HXJ(=d)V3|`@bsBF~HWjYy- z2xkXnZ_iXi{eL?MRJ&i9_i^e1K;2muYq@23`<0*B#dAOk-FeQ^^HpvV`>#WfM2_6| z>ZRtlR3>AG2E^jsu-=iN^-R?~Q_X1i*$3m8$=xnYnvJ1iF-I*H6=n?v@cKY?<^w+lbrcYMLtW=Sj1`YG=Y9w}Cf=@xaG4XuzHAvb8A8{J!K&N-qJ6$*i#^ZBN zl$ZHjrun6DP86|}n{|&dHqwDvr{;?9D82k>6D_k9m4b3d5x5iqYADq@;ey=2NwgI; z+){Yu*V`58;T_jJ<$^Ac)R*uYPq)df?XZLO zUvTsH$E)Y%!ajm(c87?cAlOzPJ4oyz5Y1tO=cb#s>ccX6bs}r78lEp7W6a8<@=h(n zzFwr?qAi;#<}{xv>-B!0H55akdo-3TH0*xeCW2~`scfXA{2t%V4;D-<>5T<>MeK-T zyL*X7atR}I2GMuN1*})!68t#dJN}Ycwvgo32B8Qu8p)Lx@9vC??SAM%tOu|brzQxU2E zauUfuJSLeB2|cMn|%{RO$5eIM?71xU}HtKBpLiX8V}4ri{a@E*~G0_27ipioyaPB?-oV1|IS4?dfUGMV_q|mc-7zmt)Op0JKUPzSS z_;cP9Z|~Ti1X?Zto(e=PG>xCPbgVX~%r9ni1mgD4=+w)7L7k!yei%NPY$(Qpn17!d zWfDp}fW~y&sK{|Ni+Q1Z+ZaN^{`-yiwd_3|{m%VKf6W(4|C^MwNpa|5Ruaf>N#@~f zu>>Fn(LO3!N%*%P>VN*>2jEqOe)ey#DkwK1@w*2+)2_^0nj>|?41eH ztN6WfC=Fmd&_-V}w;a_1v`Vcgt$Slf$GSl$bx%*Kxd{lQ3(@W4=R#B8NE-PGOA&7W zGRm12P zwAG>xega5xop;Z5+h?u`dCKEjEk7LLvI`YMt)BVUw>qsOgbkn7Xt#OrvMzm|C&}0z@P?e7m zjPHwIboQx@_G@BTZ(d_;S^E*5S#DRn<~Eu@>fVckQE(VAMGZWcD0`49mbBrrvc|Y0 zup~5lAgm?_sNrh9RSB22P>+9nyr7%wqFSI|n?pd&T{?TNp3`gCca-XlU&XrWvNt|4 zU`;?^{IX4l4ew&IBIPC+W<8WGO{ki>K?9)b)IgQlt^i#hz5p+z@r)PgNiJsJCi2+U zUO%dS=ay771Wa*rd$6n*&E8&}Y)k~>MSzuB^lHycJ9l0h`E}an_HbSMmp?$58f0tR zq@%X2^X2pr2u%0Z+5~Ea{VcqdNjhe=o~x5x+}|>NIFFG59_vQp%pf*N|3-XTvncuC z*VVx+^y`I9ytnDSW}$5Zn?g#Ib%2g|->?cur{&a6jg;Z{F((-K)XJw+s+)BOPcy6*{@%fw9&7-Lth;?FX-O5Z^>%K?9)MlxwpD$+u&w8 zOs{z<@R>fg#{GzB))peyZgJv8*7v&uhYzj$x$lVu&7VxlZo28^{p5H^fX{EqBt-a9 z4dACQiyi3gN&m6Uh5hNi&BgF<9hovJ$Uy^3Jl#J&2=P#cur~haO%b%|ujU&r(8pap zsVF@mEY31!hGsCwmh1$ufH6G!qMK96@ca62@M12LXk3u-d{0moirT3Z2@I|nq;tMG zL3=WyxXp2Mvu+QJq%$x&13>kswM5~p4_>=f_XaIgZD!tZ^K@hMMng68@#9$+OAp>gb55Y}l;L?4$eNF?77;Agy~ z=}2&mkBiTiq|PFATW16@z!^!pk_Osw&Y>cp8qrCKuRl&6WBYe@IsPE%==zqp#Sm9Y z&9W-V+{%Kqj@3W16aBH>d3kweJTIf{+3nA{&ME~#<)+ST?4(XMY82;Kv19vb!;?L} z;H^na=v7oI^~n|1WP!+dp&&zyV(YAnH$%TQJwLK%wm3w`>_teHF*2)P1 z+r_Mn{p>>8!n-JKL5yXq@Htb28|bRg95k^%jWqWeE2^fq)3=0!T&^MrVETv8_hgyd#O>?%QJAz-(pAqH!6oMYD_*V zqh5mW+Fh~rRu+53`{UM+63|2w1U+~E75&**o}yxvk8{-v4n5tmPR~@^1;5@HHXOXR z-|qI{5SpGOQDZG<-ql95Mr)|C7E4en?acC`_nr7bMZHyLodG=HC+@%&+R=;E#%)K) zHIogqrAodq05MRi<;A5VuO3!G0*}{P43c%^1K_~yO~$~iu~uv3S|5?6S~!_Fjg4`c zVx==#5w+*2Fl(SyTyi7fT%euXK(P9Smq%mQ-Eo%FN?3iCYeyNCP=IQgnAlpV6$>I@ z%~pK1O_(}TiG6I(p;!Wd-&WRouPEPOECTYYz?MsoF9}{0c$+o3O=g+IyhYSAsY^!N z;~6?9geZ)*or`P{dL(?vD^TYN}oYS2jWqHpMrM+Zt*c+j)Hd1>RJWu{Gn-ornBKw zBO9yaCiBP!V11d>nwzozxXhy0!aRE0Uu1c|<*HrT{h7EMRa$@O$gv#SUGGpxRrCft zeW^(Fbn9LoBKj`>5P`nzd{+=gl!6WS1@Wn~Ka!^+xf%XA>$pBfcEp#?$yr?0dNhOK zO>cG-XS{dGSYQ_NaxDGjIJ4F=z@t+4xQMol-c{K=f`CuNUvG=h^~0ouh8M^4)HpsY z8lgL1kyd}+f)21oAi8Dx7)kVjdh=e5TiOI|`io=(SPDm_hpn!s`_qHHOc98;?1kXg zM35BN^(J(Fr8Zs60;%5n+oEfsI$*YgZzP+Grs*;F0TE)@nNPmKVxL6P29w4=5?D5J zwhX+u>i|g}c`gQ!%b#B*1JFIYx2dPZX>Ma@0b%CC-57tZ&UD0SlE%FJBGvlAInj4_ z4-S{{U86KtC0*1MYH^;OOF3rEr-^;5!3G(JvBdHXSZB5J?rb{PfCn+QZ}(&4wSShJ z9m#wGFYP4$v1;W{_gmk*`9&ZqpY%pHwp1r)EMG~9fHF%i_8{dGxn$-tlY7rprXk}6 z9=)6<(gZQmi6=F}EJvR>I3J7TjSLDFY4)kFWJ^;$gJwmR$MZ?=8nYI8ucwC0(mj#1 zgh^Z)NOqHGbKe*aiE@9Er)yqnD12p32CW=1T4hJ{0U!y76qBEb7b)PqcY*_NVC@EW zQ*QrAr*_)t=VQLx6IqL3wWK3www2p1A8!F*e8#Zwaq%IW`BpeeaoxpSw+!l=_%Jfv`W7*J^`B zYLWgrI@tH9nP?OTN)e{g|DjO==L5!16fCmx7{*q>sx}1eeFlhL(lE9ELrS-Sd-3uZ zdkftGmwXDp^?aqwMGoPyD^sZkJ3@W#j`MGYu* zY1q{BW?wNq99mvgoAJlkK+7*3_qToAS#R5P>>E8u#E~-FVuW?{h9y!%j3W0%p&u9B z`q64QHNB{SM~qUgq-?zCyo+{#V8A-(TY{^7LZeq?;Nz*-?l5-;Q`hG|PO^F+R0jZP z!x(~Hw@F~_biCH4juU?kx6-QI`0{lzm4ZjXzW#94^@$!DeWM$lyP-POQIgot4FFs} zIQ!+znAE~MC-{z!Jp#bvuHx(giv0D7J-TD^d<%qEx6Un`^J~i|*4rmDE5V9soL2%} zkpt(!o8g^#@m%(v@dIa~N;cG*sC~{cFbr-Fn4o`ceveT zyl}N^O$6%x)>gj9OyV?i?;uZ_NE)9s(2+dy(GI_NSYFZXeC_iO4$GfDz5N8X+5S6h zDKGyIRJSxi@gowW;f%DT$dU(lB49d$u4an%1A5ydP1Ml&2+%Xkk@X-$V0a()A zn_NyjK8iSdhvJ0R_j0ND9gR)Nni}rCgKl`)5_AeFR=`%y;-X91{t<`I20#|hExgf# zZ$<_#YQ%)|ZEhy=rdMQ3cUV(?pS6~J0N=HCpS2&RSmJvKNm_Sbkr;ABIwLE7iv?C0 zIB^Q6Koyk&ic$%*mHS`ozyBlg;2ses_;0q0%)LZ*Ib0A2j%O8JnS0+j@A(9#r}BF^ zb*g?#(y_(qM3n>#3V+ukY6fmzz?9^mkEZ6ltojc@v?=JY(F+)$w8*&5*^65l z2Rgm|LG=lNkN*%@;lm7^+n!>aHY0$Qyv-Zx*rEF&3qM!VKcC}hq4znk@85>||6%8f zkDv61-bpToVco#3)A&)q4E&ih<$bqkL3c4@ttTQM^1Fk~$BzlQ6|mEVTwke3rLqZ? z|2qHS^A269&DVV^L|sq8z%A+i7wD(uIcL*o@-=~J8qy}SY7=;D4#bwNytVrDK7j!E zM)FXB9KHN4@K~qPUV55~ppto7g*$w_S5$l>AN*%z1^M21+1TLkwG97Z<%n=Qjoo@L z{?3$hnfx*yy{wGxY^xN|B|72zLR$WgA|1W$ZlTT}ggCV%XSlba1xC{u_g;J?Kc5Kz z5Zb<=A(zu~yVy&9Lz3|V&G-T}?*}nIjPTB_Q+Tc?XPc0gj_)I9iOvUD0H}Dp7N-%6 zjsv;>@3wgQ;{J2+ZQg$tbS(dM?FufP&w#ee(A1H#%JN~;$umRdLD`oR*?=={0sMCd z=>9RM9{ohpYH$icilUjGSEBu#&yCfMSzXK{oK8Z>>3aqRj|UEp7>-pQ1p{gj>WF77)c{h^Csf0qUs4}uOB`n80e{CH zz?OV?cQVMq7r)nkuy?S>=nd1E7`1uE5 zX=S!sGhgs3nmJk-VJ8HDeL3I>+Z^}})`)*_ETec0i%NwGN5f5MkP{&5@J}RJ0aldI zJbXxoM=s!!biCG+j9d@5{aC&+2{s4n^s2id11ZP->L@PLX9aH3L`>Q_Yj;l`=onYv zNSL=`Y#+{TkL08uuOt%_qh`?n_it|2jIic^9$fsNO{|ss*;o=`fMbjZjdt2#~m1E>qOLN_@GJEFkGv=;R zGynLbe?2WO)4LoH;K4zvD^Q&-^7thGmEZzu+0gpWu+|DHeb4>Zr(IhQ7vw})u)lIV zB&#gFzT&-x%qX!6g&3DlgK}FTQd`6)u{*?Oa;qVcQn4&(Rtg- zF4Qw}vkD90Hr_F1YUS;I`qj*EjCT>1d4&;E=;Q>;TW)65<)4G{<6XX`pv8`R?fG!v z#+(s;(RwGYS!tXRGeDNP(z&7}GMvHo&f##pg=v#!G<*@cYS=T<6>UGq`z>dI&OT@Uo;XNP6oZNI6yNDv~o(*f$esb6+T z|L~1IK+OW(tA(W5{(jh771gohmfq?Dz4JOs8m|U3+RKl>y@0ckzOnnoadplPqDB@L zBMmb!?WOv?c`e&P9^}TXu|#)7s1?<7u?%ayr3C5SP4XYs1g+mWi$nr$1(@V{?=B{a z4UR7Lue;%}1pUeU8ifoulJ&u8E*f8J0heK%@}LK{EVxBF`Ch*qu_@2+UV)`t-Phfj zbQqvJtz|SVNT_X*FWyv~$b4d@Y7y8r!oP!_P3JwxQj(V{?xv*f8XczYW+|A=t`B{W zk?v?w?Y19)?bSCy%I)3m)U6gHU(`QVxr-6n9pOkH_6X<=5If^s3Y|bz*IHZ`AI`As zAg0PApF;&fZ)K8bgUeWwBbny4Qf!{Lt6MR~{xC0pDZJ=oWc8RurS5_A{?tqjd-aou z%+zN*lsPuZ1!H*z&0|4$>vFWnPLy?{Qx{2bKPD<+mOB-a>U_$ET*F=V7sOQ$TUf`@xyHocVQx=auIWg@-t7%DGu4F(t0;$h3^Ds zSL~VMz~uV^cHcQoWaK-8yEMC%Mq1fT5W>>2a1!TmrHDLhV22PyNpN?qYtD#LNxyjm zD?tF7pYW9OgloQC41MGzgO#SU!;b6lZv0j2?C*}J^~M7c)6NdB<-b5IZ@G!a9o!B3 zcByZs&$GLPfjH}L(T1h(=$?MD(#$(^l-IgglH=IUaw~6-Vm1vO`-*2QCx$G71{h^& zm+KApcx}o_!unNIiWw}P-WQYa>LgzNW74UFexIhThyRtROFBiqLv1lTl1jK5ASuMY z#aXRRZ#Fz`6F6d84!+-+n%pA-)P|krhw6*{Ii11k`R@SN1X1^sH8q3LO0#XIgsU^F z>PGENo)SYee6$qkVn0pRd{^4FpYt=X0-Vt>3>%)Bo39l%ZzWikNb2R` zY#an&>rBuSrY{JMZ@1m-zSl^|64n`oDGgT%n?-7&y-mL zWC7e~KiSQGekpcPmx%T6gbAIzh-O#k|EMGSHRaQ^X$+(CH$AVGdZYH+SvA0RaC_xS zdgn6PRV6Dokwe_%JtBDS46y<3pRvhSA#qY@9~T?`2MeOb~v$iix5dB1Gi(+n);=mqy+A_+bB~ z+Z3?X+N{%xXhx@fbT|rNL(_n(o!dpAjRY-O6pdU=P3J_TL*A6|ydcf$Z0AHA2;FSg z1o0}LF}Y^#AY^pwe$zOEtt+4q6>yPEt~%@k_iGUITA3rzXg{^95f$O^qdx6?U4IDv( zoShkxzyX!h^oG|3d5FNUn)EVh3E#vuWWnw_lnu?yQf)&ie zYqPMdA;{OGLDnD7i~?8)>pZcx`F#DFQF=qB2GWDSHR8#j+hV6_e3*-U+icEdHs2u2 zWW8-5-DfNvn^srndkR&*lMV>&7&YN|xOsVm$BF*h#)%!U924_j4kY!V1t$KnTu>Dv zpiC_M`dd_XX;Xhg{}SwG`|J2{sy=by z1+Qu+@lY=HdVjpG{~cxuk-oP)v)Zkpu9RMRlc#NYe`9@i6_&TjRRpIMdW6M9QzDtU z3OEh*op{T;04&n-b3hvNy`gE2iQ<{dttZ;wmqCegBzBW_s)5tdE$yzz{q@k2$;S*r zAY;C~&XGa_wwbL!2S3+*uPE?J#ujbO)x-`}LtQ9#)MnCH%87CgH;CDYF8RFGdItwP ztDRZ64jIWFs2h}$A@-Mm)9C1>P=VX+YzMP0%IwH*Y19(%?%C7c7*`FP>A#}F`R`krz#Ea6M!x(R`7iOM z)NfB5Uvj>e^2CT4t7L^L2?HR1X@kHTm`EQrGE}6UcGO=X}vyBvS$56`uwpiu*gIW-lVpMZ@V51hUyiP ztoOy_c!>ZRe6ijMO0vKOuP%^#s}y>OfSPpRfRfiNOhb)NHi28E$c)nB+U(~inrG;3 z+bh}N(UwO6TcqM0rRdqI7fZ5KHbu^QPw1wmB%~>tx zY2VD^_OD@NOG|(BwQOA4Af@p}n-iKhOEpdQCp=ozEb71-sieUqkSy;;kC>R>>)Hsa zWZce`k2oar5=8uT=di&nU?Zf2+h$4g#PMX~#a?D+yM=Da9AEw?>>?Eg^`Ica5Jy(S zq8;DelTCd@8W?dK{49{iz1U1&2dmTX%~fb`?NBYaKi3~aU1R-N8uh@`^Y-S@k^3f> zw*7ihuEFUj0{d<)n=XV{;oM3Szps_YI=i~{2F%6IP$WjcuJpAxnsq&YprAw0 z4Ftax(5l~2XQtEs=xJ@no+x3wSt4|Lt$kP+rRRR7xq|p*F2K^;^zn+_X=#Zv2<(zH zV*E>(cVm!krf)!{Ua3A!nsFCSis551UN_5#VM)%l(&X02XiLVVTM)M%wnkoig3k<1 zCtcwWBNH$`V#QVe6cps*9K1!yg;K85<6LEV0h(#Pc(A|FoQgo)6pgbxK<3#x1c3!X zPOK5i59RYMlP)3!?TbSDi3S_DK1i}Hr_3{v4LoSaF8TBbA>OF!-#ITUad|6R6wI4AS8ZP zHJAcVBw)XZcXJrv7#`#@?v(*nBoR((2#2yF1n18JkJv}x zIhGp9#@9yfYHJUr7>kE>;^}X06bqQ8Zb2P`O+CZwL=3I8MZX2CA_d_{YX4RWlUuF4 zFW()8=@E3IgY=y=ey2oSL8h8E4FHJ6^!<)bdwR;_C?lJ>nBzs*`W2CbrGH|+SN%wh zn};wgvUFNU(XXC_Kk3!93>Y?`FV%!q*Zf&@Cg!xM4oibBr8D3eX(q5aSrcYJhBxOu z%et~4D03)ts~~p4gbcRdsd}yVhHj?KbrGh%Ia%aRxL!wT@HCEATFtU3>ltEo*|ZHP ztAW7+i5{vXEPGAhcx?He^fN=X3`1d$jT327Kb5S0??mXhwSK@g-Fk!}I$ zu0e97B}W)w=#(59h8hOmlk2(f=UscReXX@W!sj`U|8e}1VJU+*ptV_}Wpg`zt=nLg z6IMB7+{)xl2c4oMq6`C491z3aB#FZ@v%VV_lO$Sv!2L42&|1p)9hyGP!|00 z4qCJJZIrl5hJ+arwjsK!YlW6dZa0@lSFoSez>x2%Ii!E1;$GUukJq0?VV{g1&ntJr z)qb{gq*E;KrOXegZ2;ln!8@%rl+JKr)v#LNdSTh=h;+mB0jm6L;W~#&jjk2T%L-f6 z@SK0B)$ZlXB|}W1rpILli*c)zMp@ss22AM5mj~iS(skq<*9oF|Vdoc9{{w2(9P&W1O@q(YW}< zZrrOTKUp~TnJrC1=0>=Me~R}8E?6BrxSx^Pu-MwPce=Weom*>k(Ou`U?W1|uKfMF) zE#6}LO5$?7f_Fyd>4=n>_(#((7nz;QkBpQ*&A+6Cka5b@hxrEgI6LV7fZP7eqwT%4 z>ee|I16d9+EQx*zuIlmGFRwM9}zu3K9xk;U3r7JutXa@0$qSr{SOKZ3K(5<8@w1a?Nv1DZirjI9NBUp#(6f zD1x(es~(WV^as>0+`wj5fO=Qy6G+dqc@$8gq1nC{%=&dRc%k|ibH!bXe}#3PlP{3{ zouKTrkRYJR*~JgjaH?_r+?<U2Ab-7Q-LXb)zCqtEo9SS3?IFp&9VtEwV~^fdf8r<2+mY+?T+aWL*dCJmag|XKSf?OrLt-XvjUb zx1DNfYnd+8U>6CVgY=_IkZe@)>2T5snqHsG_R>7=hMH0BU5u1c08hw54>T)h7jnw^ z{IN;m;+)U3^+2qRBypUX8ZGe96?o2J-x0|V960&*#fj3|&ov_@?w66zAdV-F&k91` zfk5~Cx3@p}I-h>s84nes`O_+NW;Vq#l|J2_69xE{iMiT6B~kWZk|%fWJ}e;xA1z4B zUT7+Z%%=!vh}ch=gpd%vP&pa09~Iaz$-X_`6mZ#~yF=_$p&B49J)cul^8JjK`A#z5 zxo87^%_m8d1BF_(usrXkJXt!Mu>&ISRLdl=0c0No?F(u~hVzVjHCiv4`Z=LMC5Nl|(`8-asp{^tciE^C8nm-)DF9ZyJe=M7 zj(Y$GPeraN7%iW*g{Lv)%?2;<=v{6_?%E~37;LW^JNuF|?^GSuE8FYI?$2=4&_Tsw zWP==t`#v*}rt5b+R{GWbvM@QljVU>UCvGSM9^9H$_e)7H>sKCSDaAb?@gx)#ce!!LLJ_9{oW6O1_ z7*ldRZk9jlfJP3?!kjt?HvhD^+fE#B+yw7zFya(`$}Ar>7ZYZCAbTj;N6y+N`lZY3 z=dHSboV&|Os*I%gKC<0BlRU#YkW`Rv+Jw^qxZFm3Mjce!!&@gz4kH8l23yEu9NgC} zHxOTs_MG#y_k3hzW3BvU!FTKWSOBi!7?gAUx=@4iCh9TErZ){gqr#)9y7*9@q@GDM z%CTyjgvJYq9~Z$l8iuhALA$ISRWDu>V#vfkB5=psvQl}GvCYJ7y5^FH=*gIkz6^1B z!@KLVFYfRrfm)SmXnxOj-(d*vszOL%2=kPETec18{+#*JLHiDy8)FIEyk|~Zy;gr$ zrRy857AssBi5w4}*bTdqF#F9j@=s1>iigLl7HXm$rk=2yGLk=e%dgLxIOr{V@LPb& zzPILqPv-*Ti>?$HRS@2j74|rTX18blY~8Icwob;pjSX($-*>g8=GJFg6Q1oq&$Mi; zS&Oh--;!T-&`;LBZ!@3o{p%D8@;Jc8&UlSgdpwy#JF4c*N}lx1CieGu$=57{RED9h zG?P2>`f|KZ1>^~${zo^v0wR;6gaGi4_m$J`I)(pe%UIsYJH-KBM4{2PiV3bY@b1*i zyJa!D6#PGRwpy_-(&$Uht1S*xpYMXw&#hzf!LBU+p2PEt>3wb3^qm%~?)me&5G28? zI7^;xB`Isuky^y99g++d)gUX}R@}`vvmBh^O9acqfLb#iU>N%cz%#cu4l5w<;^-_U zcbao^FlpI$n;Lx5Jm}X#BbF@<(Wo8^V1%YE=feZcp>@Kt01;vIk!j+Tc$9H)*e zD=w*wQ!swC8Hlx2$NAb!<(vt~Y=EtzQlmQHydrk*dJ;qL?%{n|G!2nx5HGWPu;I^# z3`R$P8r8I71M2_CV-fF@R1NAGGAwkqh<`}@zt-x$70^7tqZ-oUU* z;G-*o`}`(tTs7Qc6nsxCsjRfl?lRe(SpNzA+4+wfH@~ZAI@~hG^#s5+VHW72PqtrI z;v6c;NYB4@3AU!$>Tw)?&ek_-25ZUg-J`Gn)7hJa*FWV^+S`G7#4Cz5q`er<^PNyMMTxlE==i}4Q^tE?VwOtxWuFuB{`=#sWeAaF;olqs_ z9YxNz7fJ&cX=(kb#Bv8yDYI1h)bn?xGgn$t$m4<9Pl)y+)GJlOpCazx7QMG(33x~n zyB$pGz#G6E`a#DVEEn3PAkVKqZ^Td1ze+UTGHCJlWyU-JUd1FTVmx$ob$GGzI@r5) z?pFMG-tx<`@->f`1kMc+IUol|me3;&Q@WI|XKGV`!CX@(h2i8_*gImyMIQ$?>6&kL zWV+9kg{EW5(yhg;o)+D7wkNuzT3C}UAT1l?qEm5BKgK<^WFE|{neh0%!MIGQf)d*X zP_i-|lP9CyDCxpo?weYS?%6ktJLq!>#MryE7Qxsh-yXA76`I<3UbA|mUgsy)%QcYn z-6nEirdb-*(8c`KTfq&OY0{|w4A&oQcA%zG&2&xiTI!L06n=^#>o(ePZo$10Voy-Q zJpgSCv_w6#m{V~mow{{G&?eVh>_O;s>u+kpj-!xY12b@qz8}aJACbLGpKn0$S^l zc-5COih9BU@eUL*nY)^6;lP=`BKS!Xf`-pp)u$>BlAu zi`EkVi^=+0JtZk*eLfUyrM&PB>lIa?TKd-#w--2TRjRYdo(Nbk9<;bk+UVZx_df^^ z?m%cC>xVyj3?vwB8@S) z#{Y~D9P0DQSY4SZYvf7%QwHZG^3S1DM@mFTY^9bJO)nwt)z46p*p*%)!;3TWH8k=H#$$M2dFC;QI$i5LEu9fCujm6 zKYpJD3f~VQ>?{21ZN+7*>S`}vfqSG5EE8>V?y+g4kruxKu3V{j z@5|cyN4_Ft>_7fZ^_VNZ39!y9sV4Co!+2h$C6ep{W=$qq8ve;fmJ@1v5&K+z%b{u> ziO}lPA3}SZYgnI!l8TabsTZI7&-SAn^nY+jiBRb`oIvT0+nx# z;!;0LQ+)rhs`TVSo_n3FP1Rw9GUu+ZGaPFo12B(8)CuUl9Z7CI|EDA--o0lHBuUde z{nC7;>{=x*O}y5JxwUbxeWH@3Sb0TVvL^QPEX(}7?mGSbEszS@L1cA(8}c(gzK`JW ze{y?%`L-Ok$?Ni4Uy^)moy{DZ+>y8SRp4j9{BV1`Z2Cux5cWEKn1`h7$pc2%$LYMD zlDAevD_&KD$Z?@B&~1wG*}R04)F<-_4`(eE+wLzv?fhI?05 zH&eMZ*cRbJsp{`#XXo7)x%RvGewTz*?EqM}ftNOW)cvyvwj}CXOavUL1Q<;RI|_zd zDq7)>awa`C%$U6wqK9$zW{eGB15rJP^epj2qJq29os!QX0>{Jk7N$#6Yot^tudy;E zI>A2xd{J5j1b}iz0cJ!W10+8PE+!Uf!<(yj?dDNxUj-v~>aQow|GAGp#FK-+7f_2#uH+6*eK;Opw z2F5?~yNTn3mdDZ?YC!+0s+9U$RVvoH;|s4Trl3J<+6Dz{@JZamoC6QS5HBShco1CE zS^C>g0y@s!yCT#?!+f`DG1#|>11Wz^w6z_P5W+T0)Dt?UG?XQA+a&QxlPQL$VShJ{ zH+6EJ;;#Vm**zIC%*H`_MQe(3ByB}?(5j;iBu3Sg4KE4RRzB@t-V-=~t_*^#?=l_iJAj@*2;Uxze*0esjKa&$s_$MtTyJX&Q zh1J8q-R*(2df>9ecS+bmGAK_N{hy}KQg#h**ziQrN!WZyi^N2!fpef&bsxgCT7g=C zsN0}D5&ytDn7jr?9+(#Q`qW@{i6#_du_8W@Hb*KxK;t0^v!!rCY-sV$c6Ru+PJ3*O zu=$*{sLJW?R?9@B?Vsw7dhR`7YhQIa+L#-n8NA@ICvH3Qp0DbE>>gv?qRuGhwD;7a z_gl&U+2MKc?U;TVCZ?bKPS#e2dwu)u6xJ#2mD)y-@qHsxXTRYmA$B=sHI`SfS(25$ zgdb~N)##5+PrJak&pm#m%O0_a8s1;hN;>60NLr6AvF9{O4YJY`(zm)oG_n&L(i>xQ2A;WxzZewe+7 zcfVmj{9vZ6`w=8MZlB#)`2icILnoJYqhsYm+tq0RJB{|^@?}NGE!|O5YqOur)YAGnOiCeUd=kM<;D%f>JJ#MX|uhSd?4T6S^wLl4*4{*Y~ zoF=-$JDn6fk3Jb?)!GpyAS{WW3?bKZfYj#AkK^Rk3;g-wSRnxqMzIN>)&wd(32H`p z!|W!DmVBs#`l$vk>-5)PZ}&KsD@!7C#V&%9f4aF^V>ihzcpiW^UZ#C&Q7!=$r6-T( zJ4ysJ%-c(GK|`0XbFV?jUMVk^HdlmHey00Oj4{(mtMcEW6M*h|_*;D)H9w9dFg-|U z?bR$EAc|1gy)E^|r*9N-gzUA)r`7*K9(dDqX)~OvK$8bjzSjpHc7Qkw;g?q%NFDpi zDYRU#oq|IBD*s{S((3Ll>z6O$o1=(-|SCdaAH>DN(_Ney~hi24eZrK3->k z!-qn_0WzMXgd7~h64gVd{8nEb?&N@{XSzEf(Qu4y9BOx*;5@;Sr$>qB%Pt=TF4%J) z)cGS%+V@zW>dRmE)aAdg11#e< z2hk`}p$g0p?C(Dm%bHpJ@GE!pk>7JkrzQtRb@yFKgXryI&6bx+kd}D?pWT}*W1Y8Q z<%WtEtIVR{9*~co1DwgJb>@nU=`4olmN+hkZpgo!G6-Y`NY+B5?}9%4+t&SjljSk~ zeD&Y|l@$O-11W#q4?xaENj2Q|!fGA6f z@YV|+CRBfi38?Dn?wT^Ka&?|1j1^tGA3B?fGgfEfKR?;{KxMZ9zB~*)$uyjr?0h1q ze0aP65b>;zi1lPg@_BBz%D3Tg@R!yL)5sgcSrOm8FU$+s=og-W4G84+DmylAM}#6v z$tJElmYK3sgRv_Un6CQ;}>Mz90Dq!Tb7D_TSCgrV)F$RZjVZbtY=<_h1jr1gpA%K zAUi=f<~bC+xpi*c_?s);x9fh8)^>RgH-$%NS%+G1SFKE_EH zC1E*dnu=MLO)#((pecM;UQ3M5tP2Qx!_UO3;$y-c$bLh$`x- z+u6t?oC|%^Ls=fhr6Sx5)}9t1(z473gYRct^;GkYQginT+vp&~&8m_>m4Ip9 z@R2C?)M+Xx$w#=y(!a6Qyo~S%@g8(7ZFg<>0>6K3N2Dw($Hlac*i{IWD-mvukvf}O z|5Kiq5@qH!yp$k(m?plNZ2e}U0`>!n6|4E+pUL?+CWPD%wc{@gz<#6d+C}eEQe~#z zg}6gGE6qrL@YmUAD==q(nFvJ~>oV2_$YaQ74B7&DD^7Ni!<4@_CsVc~KsVqL-_saF z=KOrQt{bCZ6)nl{3RKG%KC4hc27Uf|sn{o-gx0uk%E+}S9D3*D`=*TR?fZAQ$mDkf{p|@^dIbqIz>g9Hsr5+ZyY>33egdG zIe#AJiEkvXI6q+%fPhp?{%q!&qpgkz4{Uy;Ru3V?4pMs$ECabKIkNZ%1WGb1Uf1<> zCKWCAl3hjoBJ%~g)IpMlB$3Z_Yi(5+N9T1dUJS|%A2L=jR0f@u3Gd#Q(9F~;tL7D5%Otrad{3AawMD%oB-H05di&`oK{#!L2(Mg?~f zsV&b-M`4}B2a?f80N&)8RBw-rU?X7ZUnrW zgV`rhyH#`iCIEzN$|WuN54zqOU<>rSd;n$7G=VwpGf}{e?cuyKkB>P?U>i3orux6^ zNq&hY$P9o3sfFNZZ0mcKu70l_-&D^ph@??sKBN^2sO5F9;8);7c)H@u*AoeLMS)op ziT2=m&n%|j_fXI!y*ftkrH*5OhK@5z6uxPST-`nz>Lk+~$(MiPzQ6>%RFkG;_9~rX znTL0T9yw7%Q{%{hh-!IN@V?Uo(}@z};+Fy?Y}BPry}*Jn=;j1qV=eOC?UdOXlH?8- z1mAZmpJ~(T1Naf{&%lY=_XyDa0Q(s4QJwb=r!Jnc0c5;t1&w+IFBx&&L>YB_V zl{HFe_Ys&hs{^diXgL6X<-V(GE?@#Cil3cJ=JvdpOJDTA*0{m!|Tt*?$o;t zoLP@Rg;fb#Mz~i%nnq$t%W4rN1!D_z8K=TccQ%|jQS6~DK&0hh7kJvWme%{>lPz(W z88hD1MG2D?&~Kxwu38@aYR$NX7a`d4fe&5)IFpm-AfAo9Q@qrD?@AI?CyrBq*+$$3B(-zM zB}UqNCsPfryf|zLJ4u5ZpXwS58jEW0f(BfEyB=9O)b8?~*9>lZ^8GU?LN;&X`h>L1 z*b5TG&2h=);wm0KOk)V5W63l|egk;c^|hzIo4I+kfOGHQ{T8vCrwq^q+J>ºg5 zv5LaeBe%txuoi6|Qzak_oFWxl{3KX#2(^_(1+66v%T0XrO#sQM3x1U4&%ChVq z>NwdW{&Hqthr6SU`~Vb-b(n<3!5rr0_3L+XUWSsENF)(yHrJK1oyVJa;EG8OZ+kuEj!t*1 zmEHSm12ypQCo&>+oo+%8O&2lDF>H1wDhDsGL-Cpl>na~LPTAgF$fABWr=Zr?4 zPd}IGcS-Z|a5_vkOtT-vTHk&6XA+hg^gl5F=Gy#v9NqkHR-UA!s0!_ZREx2Z!FF7b zeE$Y`en@>SB=wVd1SymK$R|L^pq{}rAhey!Z@oQ*fpPGhFAFx7!P;Wo1s?L}4z}~v z+vAM&tr{x7^`=dBza+vxvQ!Pcb+#UOr*5`7n_Ij5Pkd39ZqH=Sbh3#*51h$fDiQG0 zPG!*pvT03P{yQ~4xeu>i*Y0M?ORgzf@=3fJYp<=EKZ}`j+2_S;2QI&r;8MFs`-*{u;;8nC*NG)_>P^_s?-&f z;)cxWQC#%K(6nRKQp(BWUB6;$mPT|;#Q6TZt7L?rz|K^ycW82$_iJ4h5yUcVybO5= zn?1)0Glgjp%3Sj^+m1x=p5IYH7z%bX*pZCab6Rh%GhJgYHCkvAq#Z4Uva3<~pXqQy zgM&nTyx3luVx@r}M0F*H_pf)pPn>ldbaoMj*Q8j7QjKR(49C>BM7g$erX`6y`e$QU z5vhQNEWk@~ajUpm4a0FNAk?CGDnfnplLFL%ZO%`7h4pBxeqjFT1!LAa{NAj7#hqsW zak$TQL>*Kr7?B|7vO3@pNk8Q8AW0EeNeGD1Y53&+-5fmn=6ZM7_>Vc5(hv^Rbv_dl zrEue5K20(PJOGL1Dm>;7T4T2L-&f^0Z$`aiQZkoQw|a8o>x`b)|LnD4*AQ~J3^$Td zzbj$h=_TRwH#x!XCk;E!oTH+@cMNQhre&G2blCbx_UH^i`8J(^u#^D1GixATR@2Yx z8;#po_!p2KvsY!~smHkL5%R)g3~JF}TWn zsL>c-^Gg;=Uly*gPLu-4B_CfVrNDyhS(YTa4=(zb3Z;o@DPu zSf4Y-gK406E(J&)M0^_NI&;Fiali^A9FX14qAC9lXsTBnv_&bI#XvCgNgI=U*q%>r zxg-KLPqbF0?$*iqZ^+HHG)cD?FJ>h)i_|)NKHl$n>Uz44ywbs?(gl{O zj+f|X6wkP)4Q}$1luLRBj79Bm+C{|3Ue5(+!QI-|2ax^TXkd;*VL0m~>%&*BWl8ca z9>be;`!7%4Z zT6E`?5dIFs=iMx$+X_>1-3J37aE-c9ptz6x0?S1EXU1F6{9B9zmNqN<8(V~4&(Vp{ z+@i2`*h$TwozWs8pm_bzP_dqH2UPDA+$BY~{8eZ1l|w$c0eshjp`u(S9Px&5roV_oH)iwS& z_i;iIyogCbuguB^z*10y7Kg6a-}lAP_3Q6SEmOmYsT5ZkhlU)a!D~VyfZF6;fy5-| z7=7LJV_$1z_R^^)%0TAu+{-0ZhH*(uh5N@Iq#|vS@729s`Hhjh8WmA(vtg(+9=rzx zBovg@#Z!?1_FQ#Z4x)dxE_6m+CllQLry2E{*^B4EyV^rmSP!OxDM{$QRJitx-`D>6 zNrL72F^}b)QMy{^rUN+Bh07^f27UF6Wef{b*7qdD{UCIY19ksvuq6*mnV{SC+s*7< zr+DSGT~F?}Xlg=tnbyF?f4oO9l_O80ev?^?la*)e@~I`oBgu!3UEz7;7k~?d6W!=w zO5E}t*rVb~r2A|?2+aGLFnec`FB(e)yxGy(dDJd37uQZa3baBM9G`9L@}7T!${Rlx z6^)JS?YmFbf9MV%RjU7Vku@oCCtdIE{Zh|$CI>i6?Z4DiJq^UB0ufOC9uMmg-%JIS zvSrUg3b~n-eRiqbrNl!P>=K~z4hN8g5Yv*+@{u$-?d`Hp99^Cf9;dlmL*hLs!wCYh)SVNJo*5}aAF{lDT0Z^I62eF; ztdqPn3{nfOKLQ}p8l#LIHKF;u_N8^&r4&|Cq)&@cz_7~BAN=>} z#!s1lGX3}cZg&9wf*{aqUi%6X<98E1TwwTowBQp-clw+Ns|f85YZ9Z)VYH5TL-0aA zK9ZRFtvWh!i2FXUs9zeu!`b2Bm;0WS^d+C$&4|NhyI-G+H_mSv0|9A1#!JH8<4Cis zVY7W<^9ixK>HEvRWv&U&lNl>y_kaNWKc_^<4kc-U^Du4ScimBRl%9l4hvv*FoIh4y z+r7cCR|$qAKaR8X?7t)|SM`cNOO=&qF!sc}ac(xI4Z`*g@|y@;4Iu$w8b{fFP-C(5 zmbzbWO%7JGN}?XpN0+Z7AN$JAxb9&hf8iZR-v4I1Ewsm z`B0eI`QcmzAmdq}+#69{_m9h3X7Fd=jPGi(ekQ*r%6gd3T!*+_y0e?CoX% zrS*Ic8JwRC>bmDKVE%C1Yr0mVLRy@#d{!cQIa>5xQOv^wmOv|`lUpN7?S6>lHJl?;0S4^+{Ll$@b7wCI? zKPcd&!>?|OgXCbU&LLB0fxPwB+2K@Y(YZp<2PWcrVC7w&%gldoSzJ_wQnD?ruh8mG z9iP$daY+d2dQ(Jho@gyT!7u9XFVh%~lO=S}yA;+Id>2BrG(7!4!K{DPuB>Eckj*~v zhr3BUQo*r>jCfqZr>b~O%fJE1R?(}!Wwbw@7Gkl(1q+vK^4QI-a%?U57XFHmk18At z38LZAljqjL$!b>^BsB4AJ97JE^%A(>jN1MYaLZ4*5i^6OAI-sTj=4lUbYaquONU$G zm+9B{(j?)dI|w`qYOO0kjQ#7TpmMZep5nw(6TV>NT3oB0J4f~>NWfmt3BgVYIOs zt(m&llJF#p26k)_Xa`51_`Q#QhN(`u{A)*IK!ZGA5*lR817NBmQy+U0KLCIOu4P41 zX0ga60y5t?m2@$CL-0vg#W7lzPVk4elpB=E-n!y}S@c86H*C`{N}V(elr~+a2b^6B zor_XpCgBo9Z5YcJ%mTfzl4Lu2J0WcQ&?7xHz$fe;fs)V$1#b-ZNct70$vYX@Tb=&) zv@1&D)emxL7O1z+(*M5y`Q#3Rpmx$H%tYda^qj$9t z=Z>ve3fF0KU5BQ+qRHelstyq1NJ)1lQ~LRpRsq@sZv7FDZHK^3O)em-M7*>EXmrU~ za+ST1>ElM+r=b$8Xjpw%)i)wMT`Y~?$vU``F+CCS1Fm`)XvG_(>Ze*-e5Acp{8r_b zWy_6Rqbw!glO64*0isv{%J1v!TF%P~8x!jZk2|xA`Dgci5SitD{_LY*Pv)YO>wqW5 z3S#MMEyC=Xrj+~0(U)#5m?xg5O)-s>x~HOX)iebKitNK_C-TsBMp#Xk8A{1KoPu-b zsb+oX)$cysJg^~lUFC!2pb>BV-MK(j4fFr=jq3dW`bIwm%%H9l!D)(}k`2#1SGVxd zwSJ=B*wd1=eaF25Ip&|%+b#pYTiyIqg(((EF>P|}HhNXID46(l-yI`qKju6ZN;TFG zaI)@dFMcK_U6?=ETEdW_AbNOA!dtv-4=p0wc;59MKrIYfq^^0ukcThtcx2n@)~w4{ zow>*I=^t;;Zp%+Qldiiv5<3A;gXM#`N=>p;*cF42Ue38 z1aWxVlcayT{`Ry~NSRMj^MiRIrnk~(jO>s~O3j>npKk&u=tg@;!@Q-VjKWrR52 zJ%#w5lw@2<9J!Q+aV{|y+Vl5=j6%zJLrOWSQf6+^btZV>+Dh-x@@4Ydv-vhVf|q#B zI(-m8Bv|M(hNoEIH;CWVmDghOPxxE4aH`@q6 z?-g=n1z{Y6FGxxxrdOf8e%_+XJ{=ceK?+beUhgC}k z6%dv(e?Qe2#^KPB2XK6>w9IhG!pDGOVvy8oaN$k=&SHmljs znfNy;>f^6m*ClE^&Hq&%*b)v|5hJPT)ep-Mz66fe3A?P#PkT|1u4J~Ss(PlVW60B|y^`}l zLx4%MU=aC954fFr_ZbWyBpAp#%?h%qZat9t9%-g)#)+3i2N7PT2bzTEOTY8wJi`t& zx);CC{et;xeRBseg=Yh%@M4_54`2#EI2FoPx_U5}h|Ka|b>_~h2SJ`+R|8#K2tX<; z-}!FWp^62)Fu^|g{5rMimxtR@2?p>ve)Sy#am|TH5b)u8C*NW$*bh&tzJ5CoB98TX z;Mkk?li%J|@OV9t`4L*OvIxJDQSiSL_1>RA^_KPYrnUa?k#mT6?`x>l!S+VaZcizi&8I%$Pji6dXlyRc%>WO|=~&{?nmdO;$vGJ2W8+Y|dyJrN56De^$^$)|kH55}ZE%@3vT@A&LCCk~s9AoH} zjXT%+?E&Q8ZOf|pt{xvrlzyERmzK6m>ku#$RDSVazqMeR<;JnADD>Yh30Si|)f{fo*=*>Fj8~NZs501visuNaV+>U(UsZ=rv^~``<>>u(y?1xu>J`sn2$E z0M%Q8$Nz$nMSh9wV=XqU&SDb1$G^ z7dSBiFb6h)IY7gc^{(fF!ORb4ng2ewE-5I_3>{37BlP&~(FL;I;%M6wT%<@? z7ZDz@zA!v}{_-B0nB{iLIug-ZXWEEsT{<~6?_BrX0MhGT9RB^h>PW2urvkoueYz!M zIe?~u64I;O`fpy@|C_H09F@P`l8dfsDi*wJnyQizqNXFSSo?g9wf5jJ_ZCW(gITO+ z7{B{s(PaI5dAz{OJ2LmjX*NHHXD0K_t>E&X$w@mr4G;rg0`=KlT?N4*>5$ZrbfAT7 zz}4||a&m9}GtfGHSqamuN7RZ(Ks6uweJ5@BIA7%gwtp#ab2(U8>j zI4I$pNPVhr)9kZSta%sq=W^<6W{i8=%EQCe`x20gU}{utF?+?!3SUr_kxQ*Y>(KH( zD2P~(;4YmaNF^P{_Zd7st6tF9%qkIZd8_L5yIhqRCi~qZo#J!r|7HQ)Mdk)1uc-sR z7rz1X6?Uzq%YdJ!e>~S00IlWJD^j#YO^laZ*heGkCAtRj@Pf06oV*gNRaA|IunYEG z5g?1inMpGFyi$@X;$iBR=YdS&`J`#n1*IYQ{;LBJ+YCRhh-);Ztle;*4 z@-V?!*>m#bW=Ab&MKl_6ZYGOnBpG?gco^eS z-2!k^TlZ{9FiLCaxWoyuMcELNcc_@Mj=s+<3drs=KZg^5njbK-ZK^lAXEX6V=B|$i z&L*HuFzR|Ezb8UuvRdzIHJsB)M95$3o$A16*sdiE-rcy)_?zBU(cWkxlD;tAT45v` zgJ_zBC{)mbOv)e#(3p-of7S-nLZ5Klcjts z%zt-$O=$HPr8|ZW-JYxx@qdQjz^{=hb!SN+#^5ZtEBWQQ2Q0eGLN6b!pg#f~pRyT2XLF*T<`_kd-%~hFv!AbsuTPnYNu!GcO?c*<&{Kf6DGLn_6V$nuahn z2%xkH+~QvJ8~(Jy_s4A;Ot;=z%1GR0@keUkjKgN&eL2tpzPb~N`~*iC)I~;Uh4+zu z+!)szL4Vg<-?K1mhfc07M^um)*P|2V!X(9#X=bIz#lnt_#N z-X)iF^1tK@E>M6n=K7+?zm8SR0LZw8+@b*fcOGJ!F(9%=8y3$vcyLHim{1Z>!^51? zzU2hDKwX~?;<@3w{z^b3K=FYk#VY_xB2g5l8mI((8V>mhLNdRWzVz-Q-nHA!(JBJH0pR~gP>%KJBUy|gi*#w4Ex;H1N58#z zwd*j+T-XJ;YPB#sMf%ZWu*Zt*3I&)h};;f@c(0F!Vi|zP^`d9bnprk*ZQZ+P4&C9L$?N=}J=~wne1<)9heLI6ZjEE*f%x&3ZrAEIJa25M8!1 z*Bf)hUZ^iE+r2?QxLWoN>ykK+xYaz*>EA-;g54Wfdj+&9Us?~%;29Z+-adEF*_y0& zsUG*(Ut&AoL#EU^%(6H1yC^gNJR3f-Fv+M5{+My}mAp3U0Vc)*h-9wmDg2rI^@9U| z^m9!8^02Mv?9&4>fzD5n6=R${tUh@!ZTn)S)&BmX(wuv6!k8iD{;JD%a!H`rO;MMc zbTu}SHjt3u~u>YyyE#ZkWPrHD83c&Gy zoY(sgpM3k>Y1hR4Oflu-J@s`{8Ov{`rF1t`rz!ejzb5y*ier<1<>^b$HQ181uxYh@ zrJ5BpDj-Fu0Z#TU%`{-i0$nqIW)|un7=Xo0hA6axg&{i?;PYyQd*eu(+`Q(FcRCPl z`@0aU8j(SZ=n(RB`$2ZxxA#~BiQ;7L?~Y&f6zGgsV8b<4lmatR67`hf z3&vuy*S=_Vut_iWAfz9bJ|XD!+1FM*@WQ*aYP1y>YP}I*%%iyWpKuPjE+{RP3_;6z zKF~8ttI{t}$GA9|2vzSQ7A-N9${*E!@r?k;##8asXibRe>QOhy4oX~5V-gE|OJ_1(PHZX>y8nU>Gs#~A#A;vIX;nsDN<_SjeJ zss~jnn5M4IDTZ2orE%{cYB`UMndqJU3Op@bUGkJpj{B$!ow6_7PG2`atJ2uTipL<| zvOrKtJdtteysSOf^xTtKVE-(zbOj{jzywV>O(@6$$OB4YaPCySy-6KN*TLgVdYNTk zW~^{gTBD?UcAVMuF2%;K-8u0M1LKS7B=Mq`{E<7JrVlr+u}uYpWIT9uSJn?w=&@^Q zn1(`?_)0duxuD5*T-~Kmfm?3u&t6 z)MaWReI)TV4d$TTzRb|hHS$U3(P*eW_(31q#RU+itoeP3PW@0bH838?rN!|wDdfWv zKl9%NMFiR+qK-TrA1ZwwQyHFjdfRG)vWvs;{HWFyWt3(irYj=KeEOQXFmXE!d@Ne1 z9SH~{6*E6JyqRQF=GH|Oz%%>ed0urz-p?-;HK(SzMMka|+e zakU;Pr5vXWj<6fs>YBJGfg>E|TlEDN$o_m&_z6gmY+;qGXP-a5`F0{uPnk1=B1X?o z!?vr#TYiM7(~zLK#rM4Z%4YZ(qb%%Nf;k8D>2 z4J`N)1kL&}Ns>4*Rb-to{KjbjClh&CZ9eo`JA~X3TDi&zlB!HrQhEH%^Ci=2^GiEn-@pI#J zXP}?T5Bqe)%C^5V|7E#u3$JEzn8i)3&IKp)S4sBrGH=<=$5+4t`l7m$RNl2r1(T{k z$QKb_Mc}Akzf0`ZDi$s%RxSgS#JLd6yXK26?=(43wU4IfWIVbri=Z#eCjWxHp0%BN zghS024*(8z=qzN)bd1Kl^n{gz<|{B(8j7}BOf9iXC!qPJTs98D!2!>$aJNY7BG$2x{BeP5mP*DND~A?A$ePyv zr*%9E@NE#0b_E|CsWS{%&SFm2BABx*zS)#09%kNSwk>aG3n z-eLB7UdK8~3It5veJ1$t2kl>$U$^l6A_cXn7m~40ZSlpvuA0z#8pL_lIomeAx3P0p#`YPWvpytCQ&?)S!f|J^Yh3h)WzYR;-z zRckk2o-U;bfA-u^jLfJ|Z}b`A4BraOvWc>j-Y?^ZYqn42%LseweebD)aHL>!A!R7j zV>vujmd2MYLd)tZ&qEZUu}+vt_)E>n_1;0B~ds|P*^BhD(l4V8`!HbN{<$@ zA~|8^bn_WXzS5ovyK*IKuK7j*KrEZgo{t%iK)A`yLJV;Codv$HiJZk3)t) z-hJ9f-JMkDz7i!p@X{)66jXi9Z$~xJ=Qzu}_nZ1={WcYP^dorhhj=+um~PKtK9bev z=k+4ucX38Azc~I$c}jQe_ND6^9mSjl5@@!r>4aCnh~Sso&2n!aP>wHmFH|VbHvOC= zDUZ4xK}cy%4U1^;FD6)BbQhZO^AqT#fKr`2eJF=Mvl6wc8pFyd`?L zVMxx3HZkQvn!OMXu&8GnAK%vFZyhzz4~GZ<0ar`vZw=z+=W}~`A0rmHc$}jILDeCJ zV!h;vE5Gm^26cO!sGJd)Jr`%R)!^<;PjhwC^KXyK8vjdoF^&&<6HYx4#bkbOCOzHS zKJut_ok#SB>`7cE4o;b!y93*y{FbkMC?uXd&R@yBJmyi!8}P+zJ)|(r*z{5Hd}q`z zO~(9%S1(QcDDnDNPYP_e4o9)ZteTkL)nAn3aK4LP?N{mxJ@BJ_-LwdFKWhQq6;El0 zVOdiTrtOY80!*jLR0iMSJIpm9npC)98GDlZGRp2?Ku1>mGc#Z`BKckZ*=Rgr`a*wc z`}ZV9wV?C|SE?;&L03iBTY0Y_BL?r1bL{!O6UP;drdZA)`<~g^3~&) zP3!>?%umj`R`Zq8==j1%V>hwjbD@z=5}S*&K3UEe^Yu?u#HnYkCdWJ^Ip33W4CWHu zUV6X&X`$jEE+SXnC}>CRFDysA26~kXKv^r9GMBExm{1mJJ##;e4Bo zdWLC_V;PgkR5CkMvzq&)|A33pi59dtST+KSZCDDD3|CGTY23JY%YWVOw6F0iuj9?7 zEkp}vg~4dKxh*vOP?F*ZpZ26ed15@T(KXWTjTS1Vy=7;s-+vCTy%p!eFqJ<8x7r=a za#E!cD@39;{N1^RIkK! z?=WydgyAOS4fVXP@%9xWfm|i)UK2;rjtnsqm9eXLED&i4OM_4a?GTH>HNQ8iTYd&M zsq;C+wW?7`w^Y+cD>kvNtY>9%!_$jz1*NrywO$D zYd=_UW#VV-4ku%UNx5YqYAp6`>H8%Ubq_aN?SvW{Q(~)1nh2TGyTaCSU3g-X z;Kch-TFy^t2dJpelf{LR8Ky;&X+JbGJV^%oepc`AmHB0u4xU<@GpbIsQtd43z;v$5 zrw9~|78<$sgcwS?JqRb6d$|jG!L|kX1d>hfdil&6E}mX0zj%n1@OJ(FaIa2M3~-Z@wrv5R%^xzLG?lZX^`b6qTy)~|LOkaL!HrH_{nLKxsF|6y?WzeQJmgB zGsI|;`CTEf3A}yXC4PcH`v=>ZdC3te!Ky2aJrraZa~&W7Z{TX%JZ zry$i9&V*$aI~yRZSC5CYHXpG?&!3_C-Pu;gZVwI0Ovs`01s_0^$F1O=L|hAfwo<}- z*$gf|%kL9uZ-huM3%;Fb>$hD5QS=1qc3QsD;%WZmV^53dq~VHBsT>zaRpw7Whfr{v zqJ%6bJ!>`a&<@e#pZ5Kazp+u>(y{M`f%2YGolo6+i>Fs8xn3)L9yc`Sfk+;C@miL# zQt7XSh#%b?vjwE`v~x*v>HrQH9UmKq@9zk0B3uqlQo-Z!JLf2r)*`A~aF{Una$Mux zO;;zhX-+dz3y3PuL#w}InQNQ!LXbOur5vx%FUUAq@>uH8*9n~meZ|I2if%L9D`83& zVph^kG|MH1-*u4A{B&rB%4{^5k}QXkdwi5 zfZC(V8ICkCqEwCVE}Rkt6?lsfhq+fyPM`u0XFfRd9}|u1#SEYVZ}ahQb(}^{dt!AY zO+-%F<2=-IF+6P(`HX3nT;`i?ah-rA<+lNALK-2v+phi~DH>x%SIxE-ab73IT<08< z8O1CL=@Ae~C#s-ZZ1Q2W%2lo5_&_X)RrnfJ60uHn(p{^tPj<5r*n2EtlmzSnS2#9q zF>T4^Kx8-}GYig)9j)w~lN5NwOE21jp1T@WtY+=+lg0CB-mK2{Wm8Yuovqxt8(S*w zGVZniooBdiKA?ZVGXon2ah(b_sfRb!wd!1W`{0grjGs!u@p0fD=o6R7(vH8#_amaJ zOp}rEF?_XpSxf$!iG`T*{#Vq^%U9XIl=im$tf3O$>-l`6aiV`;u+bc-I-V0~ec zQ)W^ku-z+Iuf7trv}SA)l_m?{kfz{U8TbF5pL9Vcq!GmqdWD+JwprVeDMg_nDt)is z#5X1mr3$1;HQG%9I%g*%MyZ>7&Z6U*(a$Fz8wVzd=p!*b6*P66)8|Rz93E~&d#MPS z)hE{Z5dElfoqs@0ep2-`b1<|axKP|h^4a#!%NS|6!+YN;iB8U&nIXgb!=7I+?v?pE z3Mqe#wRZCV!omNoB|Ym3CAXRmk5}l?#V&5WbOuC{12cx@DcBy}H>Q&yRXwJe@hOov z3VeT(-J$7m^3>nkyWu#-gd?XFWj1~qJn%;D9ECj-EBW_yLH#v*E;3jkXfI_jT_h$r zm-Tfjjko2ILaWd6++_^Ua^8IZw&ws!{=fPO0R2zQKm&ZX z_UTA0O)!nvvkn;zgMy5iY{w`TNk5a7p_=ifFz%N;oO?|Nn{x_0=37sTQdwi2;ndtW zZ8!FVj*uzeeA0zCc#_3}Qrq|D)JrblM9r(b^b>HM;|y9Om^$sC{DHWn#M;MGbk8qc z!fW&MwXjrT^H0mUj{;K)th|NHpS)FdagCe(VQksz);i^2**Xn7e%ujj*q3+E(Sr!< za}?kEcC^H^v8t{n2A#&E#9xNoLnO8A;1JT0as<8dFy*<-P;+gBl5}VKuC;+AA}pPi z#)*1*k3Wt1jnYf169-e1whhOZWS#q>J3@n@g6J_a zybM;SgH7^{7)~)p$L*>c*ndL#fsa7Lea}0~>kU0$c>eqlH05Q(@|0c*>Q!oWE;nu!W9H$)Ke9VB{^N2cR4{2w{9j$P$QC~J z=bsO5R@8FBjJjfYeBpMoq#{h4lXBxgh~$r6@QXh%R7Z%bZ2yU(F%zAq2!!)li=;~I zz_bHE&m7jFe1cZjCNyx(kbSn@10?Mv7o4 zN&a0-d2(Qpf%uM&3@nhG|c*>h}X-H1-(aDR>yVh+v z=W{4;MMsHvct-YQHLNL~clyK>0EP^nm;ZF}&l&j79FsraoI-xJ-+23%2!_z;|Gu4p z{v2Qp=tBOm<^Q4*fqill%WHEzU!QYmHS}7@E0MP)eq^Pymj~m-w2vmwgToI>MP+eX z{3^=^Avwxow_$zC_4uKpubjA3wJ5O>jtg_5b@*0I&JK_!qqi z1}da&E`vV*|EMg;y$ptjdC=u(9-y6%`S_Ye%vX+hcNvvpPXjBi&67SdmXi1(+%bCmW}2`5J%yX zOAyn{i{sumkQGT-4dsL^7jPK@&i%ue3G@Z5h7&a3LAJDt zSj1B2&i})gFKBGspd+-^H$2cf_3wWq|2crrclp(`5ff6ZL6P_~-|hD@1JGB9vG&l2 zm*h9jT6gr*^R)W|WQ!Gb%(z8vx6XXGKPx~}GT^SF(@8k)pRptxHa6_C5;4 zM$2kvzT2M_oN`MMkD?`+bY7mdl~Nhl&n662vqpAf87c=pocV5lR&Z<@z)*?ymn|Uv z`z}eH3%q}2;R`HH9)@C(b}GF0>a2JB^Rs>UIcX;}I$V$O%%}T`;zyvPc#Ok{QqY7! zDCt@6_UC8o{^x@xPSMv(WX^oLq_^Myp9A%4Xa3KD`v2)bML)YR;vw7mn%tI!)AmhY zkr(_4pCaLzr;%8Sfei~ci1qmm*E0><#60F&C*}RVjJ5iK3uW_9rs-2=zjsf&Z2xqw zdbU{la}dpz$lgiA;|=4 zsKPu(Q^l&#Ul@hm4#UesrXDm#?0hUA){2FHQJCHmxAL`Symd={bFS4*iT)`N=Yg?@a}Q%*Va)OD(r+V5cH@(CuOzS8)Uzs= z*?iR`3tRKcH?vw$O@l9_gHR62dFa3!4==bCc#pRcET;` z>e`L`t6cHuL~$)6qj$Ca=F#7pn&0l-GL?`qu^!q^L`J1I z6)|R2vi`RT>(K=xNcf$Cmzj^=Sld$ zR3BaEvzLO{vTRApmiPQ%cxcz%Z8eL4qZ9BKd)tw-N-3f-<2}^8!e?(^78TgNyt&Ld z>9f|YQ3iI;_Tuk{o6UUz44#Sp&?vbl^!ag9sYauM& zo&mESTm(c6WMuBKsIJnu`1ilo$k3~739E1zVF8Sg7xDasWui|6vQ(TFs2LL_(O^D_ z_a|BH%lP=8hj-gtnA8ft{yg7&fU^qM4eHI}QHAcHM*M#i=;rI0@P@v4d1y4~ILZlk z+`kt|Q&(}t<--N9toyGrHl_L%1@YxZ)}YJacZN6p`0fdI^1ABX^vW@mQ%ILc%^V*S z@|}4z)0NOT#=^ss^kiw9o(94X)lIxH`J4ulU*d3b^T4Xz9V+Rnmn(++l-6IN8%Zz8 z=1R$#m@9rXn^khOmB69P62?=nHhHYk@untuGLg&JU@O*b8O|ju@*=r3-PyAkiQYWRPW|g)M!f$-H`ICuA@wrQO z;RZ-f#ytx~gU~*<)3Bno+dPMP zZVH=C?@szv0`8|Pt0pqQCRxz0=0ZKRA@2V6r-QyY+)YMTcd`SI4>si^aM)|=$Rfic zWHo9B!Q)H3*L>u0I_mXSz&y<@%xAS;>1%|C3_T;`2aPHXVdUqwd!bRu=}oJ3I8XLg zPh3_<^;?q!Vb*F@8eXk$nQHh4%1hmm=`N?o<=o1n5D$GAx1v#GL_z}8kB?iE>(Eu) zd!=$i<-~GK@UY{0YZ_C^)mzc3`P6HgtE0lkj?*QO^&ceqpkw2Z$3*lyVK)OXZJX;k z-dxhl*rqHHTc#^kPfhDUTz8i|Ej6le2#Z&0M%YVFkH=W0`E{VYYE{}UpLx`YF$WbfW+hwMM3uP!62lT)q%^jwd_x+42 zJ61F&MNoQ(mHBg*-j3 zwyYybOS643Qb*vQ>)>X+u15>=I7;`2wNsiZ^WR$;E4mH*ih&qFEm4$@^7n0Dy&crUYn?neZLPth&sOHT9})@taCd>zN(a@D5qmr^yGy>&Aec) zsoYY3;o>z)`;dK~9XgxcyiXqKX|%IREF2bHL~xz-*G=Cfq77=skVJsmK^{LA?z2-SL(E*H^ioK|&{y&5?cdk;gJb zF@h%k3a=^H?8@CN7G84GnDv4oiC&{9@7bksi{b66ISU)g)$@?*CAM#LV?aOdI6}8b zG5YxuS~qXcNty4<^6*+ACbo#d#fAOnTSDI4V(sXln&jvfw!A2x2bBf zSFiN?M%>5tOXe+Zrcra;$B603-XdwIa-V~|-tG+*=opNZT8y)7{?xZ|PxJX0eStd5 z*nwE;yJIAcevNx@j-~@7BE68iAF5q(n8amzyX+L<6OE*Ls#R*)$%!2IR_1*o@3XWj3H?o3C@W~^hDATD* zr^yW8J5#PO?tom|zOMaUrl=CAm#R6kC=_}8His^zNy*R6hrSL%(&)xDeC8Epa9Rdv za$LmFQuZvQ_r(zlU=Z%#gv_;0>%+XnMOpcd=Sn*B07fVk;y|ea5n`q2FVgy@z7ghw z#^jN{#e*rbQhcdi5xlFT_5tMI{jyAy^UzOeZ6&m;Xs9`i$LJ;&+TU`7){fK#XR7ko z(@6GLntEwFNA2%(v_#7-i9Z$7w3kaSj>sYm+N0hsF+3*#X2Mtsp}$Nrm5RC)mw!Ht>Q9uT z^ZW4=1<3$C5xJm}^{*jv>*UaZ*6p`jqc5Oe?B9^gfFN#oiz~9G{w5H4K6~d%5G1N{ zQ<{nSZYKW9i1vjG7ktw?cMkeo^AOdFvp&q4&G@AwKUt1i(i2-FY1K8}OGzvq&Orx` z)dBPYw~=Pgd!^ez15EU7B~8?Kf3O;(@9Gz8E*E8Ju9(Di6J=uWQ0~UUHfCEM=l7(~ zPnZlMYl%3}SG5;YRc=k#IL)o1wwtuX)PPlwI$dK|ndAX=(dH$aK{np38Z9I5uv(n~(F2qum z*!a{}xD#+fTDE$~m6wjbv_sG9?;3C@XrH&?n0R^{dnt&bGcCb~Bhjs6y0dml+(Y4@ z)PmW(dD{h~S&D1ki{8u(JPY)ny8wF_z_5Qi)8g@&zT#L81JPdpj)4regCa^WnM}Zd zHce)9X@+QvvK0X@_|!bq_%(lPtGK@0eP26p2qp@vXwGkg8+x?8DTxDcgXhU34;XE1 z%`Vjw>|VpVzHj&gOIM5=QyLb7v&}~mBWc_Ee_~7CPQaGr>Q7S8H5bQ@a653Z+jlJs zPrrlY`3xDM(8zzH{_C{h2l*sJrB6gOiXD_~g>w-1+qd<%I*4sYbd&WPEcho7@QUx1 z#qhG`{MRt!H1o%gvkZ&(knVbZhp)VwDX~`t=j)y=#)i}G6#{lJ`=(57^1`N0jxG(P zXwn#>@%)6o$k5n@>cd3&@@ceVghx+rjHoJ?_2J6&oWmBt%<)kYCk9-mwJX<#fj{HJ zUs>4hS${G#*FQg7)Wd=-+J`in&dt+P#kPtJ){KWn(q@fR_(`|jkN?yKZOc|lve=%F zW40NjgTmdQ^I0p#*##B(%(Fg_n}-Fpy;g7d(J8d1i8o{`M@BxJ5`0e;t?3hxnMkYW zO_T>4kkYPNP4hq{Ho4}hlsmE{*d4_h4FVo}g8TyBX1+CSe;`pjS0u2iha54^U0_cb9rGsf(^JO)r#8_xS&~ zg^d3Y?4u_d1y%9!)I>cRt!uZX3x;=FAry+o?iG5OK&tW(qDNf%yHH=rBi3aD?ZsUk z!&b+YY6xFw&X)~W<+SJ{0-V7{{x`*G&fk5Ig=Va(=`j)~FAVLqy$Y3GU)|E~=$YRFm0q_>_j+if zQ~WqWJt3+kT+m~p!XuGGSJA|w`zMLksn-VHf!XSfqdQgV3qun7l+%a870bpKcXmsP zc5;zNPxj?Z4Uql$NZXJ;YCU_O*>I8$|D3f@9k2%vvLGpxg}=^3>JNrWb4EIPmFDgbN$Dw_vju&^0GXN zBV(C-1Z-cPNaSxxPq!1GeKtv!#mLXR=fvwo027{W2!Fym^Y;#U7*Lq9@E7?mO7K59 zDfZpy*MgSKhRNQl`MRT4;WhV5%k5;(|9s1fcTM!!|%M91{ zX1@-F4WP7dp@(wlvfRlY9fVkJNOBojmTNclLYOjlzrT!vyM!x!kL+3_ zpkrx?q!TRG6>awy%1tsa+NbKZ>`DR{zw8D6!pH+&E5h>OqJ&q?h|aZUM3jV)N54wj zo(Nm;#L_t8A`Ve|v|0LMwo;0&=^b}vZnJW2mJibFT#mI%6=CqS^_&Oo7OS;QmYQ`V zOcI{D>9KF#DD{-LFhEh(w+9q5AETAAcm0d|uGM%wdK9n`>E?Qb!rHCM1&NU}Vgz4O zNf>bbtVXLi&iEy81lkx=uk7|$xO=dzNlCXA-FYyyL?cp}Q*z{P07V%%6Wpy?ohxV@ zTdD@Qt+kh6`K{*9;+h&?Yp;@nvQ5ju(>dWm+G13LvTBX$NMnj1UeL(J-l7jrw&qMu z4XPhUbR3?_wG0uW4CGr$AJ9VT1y&Hn#Z^PJ4gEWsA5R#dg9U?{`q_A8IyLZ8F|DKX zt%r7oBy=XJP{is^LcHzvz)&NL;odR-cl~&(fGb7T%YR>w7pfJYL zL%@7+tG;=4euMTmrEa8}I&2Ec@+~GK!_pP)azs{ZR;-1`f~lWxOvgS^oR~KC)}+Ks zI~_Svs;zB*H-i+gtVlY2CsEXet1}pTuc0kZ?l{KK+$mCTL5FiIiRn!ru5MlCfl)}E z38l|tIn;m<#E424!CCPe;>r_Dh;XgouMlCCj!JsDlum(>t$>lom|JA+Sm|0RimESd zcX)S6lk&Fo;hn%F5w&7cE|_>Tl$7q6&{Pv%m^3Gg{`2bIav~$`+6qt=U~Ek~~%;u-^AHCSnrz81f*CdU=bz zxZGjVYr?+{j0u29#UDlulyE7$ta2my2fJ8_ z0@)@`hVw!KmZWV&Ueo0Y@-_vPC@YBk9u;ehC7?UdNd zWrYh|Xgj>N>!+#!q-EiP?lNPKR$dlF`awjL20nH0T8(jUwr3<*j`OKXvX|RvB{YUc zwS8ese*NjG~6?RA2aSXV+)z58QPp^&9x9zHb=`05y~!gwk5W z(MRe!*K4eNMEk4`n{Bz3fXbqPvHns+KSDW&>!e^=D|@qUHv3=;A#}JBQB-$H0YedV zKYYcWDO4%ev-igfO6L!16}NAg30Yw-qWga8fOOq6-qZ+_9dPx|rxABIIwYd9NVVC~ zAj<=tnSWU}zh6v0w@3~cXt4-EKE>ay4^hum^JCLvNcTORg$*2tUdMv@=Y2DX zj!HO;8U!HU`v+AjvFq}Uf5IXoAW=8&?&5G}ChH4|5`#so{$P>K(1WmMY*d<4t3*6SFE@d=RQ5; zeXpv!HCpr?$?MWLXlcBbTRZwQ)%Qh-QkBT--;QZNDE@*2)<`hzN&Q`i2gF9>y4cB-w@yCx6do>iXT*e|$T2mIe-GW#)O<7xcnU2D+-2a{r_%9do#UA7&{YF}= z&Q99FOfill^*@e-yxF0r*Q%>bWXA!i6ROnYraL{KBEhD;R@1_>G|e+@(Gq#9Z0$pJ zfA--?LiSn!iRF=mxgFm$m0K<%V{7E3^G79~J`w$9WO({rFAK&3n8C|E4dcwCLz@q-P-giQGe+`_%_aX13-J#OrVb~}Q%zSdmd({* zfAw)&V*t~B@62ys#ajCX60rh|qx^y;5~o{K^pF@>LUd_6Iw0U2R8@DEUWobL3&_V5 zOArp=_o_A%ha%F0CT!Gqk*M~hsAydSf${)fo8cunpnXEMIju!a$>b5L_@o zJHole@GIzcifRIywE8#Ma>W^>uXOj4Hfz0F# zxM^AM9=`amC-Gn31^D9u%VU!I$MP&vjTIWspZ6+lXzY@%80Z+fVlh#D|kUR9L?G)zeT8JfxN57NBU0XqOVkOVJFFr+kbbCUW|4JC z0cFtNQST31jps3E74GuE+lHRk3`f}SB=1C)6xWVwwIxM3ZRa)bJ6e8!LEtZX_=i&+ z9^b}2?YQa9^QRX;H>+uTS87e+4#CP^V#tdZdZ>N&u%x0`bjns4e^#R{w_@wmgMtcM ziwPX_ne`7$mL5)rkFE=R%3)GV2bbvCw*F$j)thoGN3%uyX0F*x`%^B4GoILEr89;>w^T(mZ;6NS|-RH6^zwkvm0-ZT;}4=>eA4zK8JY2Pv(9-ciq z2}IJka_ifKG|wAdiGU&-{q;Zh)T&@xNYeDRyV;bbMz32w4w_gpR-Z&>Hg8qZb((t0 zvg@+1E$ARlX7{aie!~8c>xZzhwf5&_fQ{fKKqpEnbS9CEjgoF5U-};b29(D)*H-ks zZgc92Y7J@zvhg^)7P)oEg|Hz!GFVBE63HRlxn6EFG-J@XyB8Q7I(zaVeb~FGa2_Q% zEDr{T7y7Bzg!>GOkB$dTEDt!wD#TfV5zONyV@sd4!QfbCptj-n=|kL$@{s)0HHS#o z9TTYUn6QRD8(cpLCelejiy!2=6=JA!;TRH={7Ht7HS%XSn7b8J6dl`y9PcFE?L#a9 zShme3R|Q&+A2`iW>WsTKC(*1HYjoQ{U!PcYp4Kl)FDVh$c@y<3FK#wNP2g%~Xd|&g z+okQQ&E=)>!YUEBr)*8H0arBTxXd$7US*hpNK10@NZ712uP)wuLUSgP#$`4NVDS9f zS$W~inCTN%Ow4rhub64-^6apif7=Y(xFN0nSDD#}$YS}50yCX)Fm1$QCYQSqS>69C zk(;WW$r7>k#(8F_^X3P_30-tlCcVU z_8H^)ht|UG$06ocHiJC(#+Du2PApcmPM_Q7xRf_~zwOD+dK4(;@*Ov`SnnWZ0TCHFQ$q!x8azU?(dZrbq z%s3f5l}+7ePYLZIVraI0dAuWjhx)WMZKH^nr9%qTc3KzI7mm2M=A^5Zjxp|x)!H7@ z{0!ciy&Ydc%eyDt=6v&W$5qV) zoz;h8hXhn?R+$LH!O#%PyMFG9@`ukB@1t_-UFRK^-{pJyz~T<;PTO#~(Ho|o`bd~o z-VKdry}^>{oCwL5a70ekw{PF%2kE+y(;ymRce+9^?yVuL06!K2YOICGRC!g3kw1T_s=$>SG z*AR#tMJB*cxymfj3!2xAun&Deu0%Lo3VtHwx$qMI^ig2;|4*Wa~f1Df^hnfeO;P}7{vM;<<@Ts7s+M_->i+l?FyNH)}lal=0-C_&j79# z_b&eOnFM-W5+lOn=>1DN=gFm8cxp^>`uQOMJMPrcESy?&kgP;4KKJ|e-ILZilV)ps zo7V_Uwnk(R+?VWZ70ao&0wjzJD{PyR)<4!a(_x{*GL>`))7ooH<}Hxr3Z>6fMx{?kyz;!5T0tYy+5>lgj)lU} z@#Wqop7h~Y-eX@`N_Y1nTqai@vaz>+`l=ldm86GLl{RIZ}gut?QHQA))zp#qLg@-lt_b*>Td@I1`de- z6KyF30NT$M@|O0@YYQjF$ephJ#Tr$PXHPGN&a6*PyWBhsebux>r_EU*$W4RYRdEKJv`?I78o5#?tqxc^j&o|0 zo1j6=cR!||w6Sd8^zI3?rgGGfB&&EdF7_ds)u*p(;U3PkYLb$Y+N{u}J-{RHnCD-} z_j|E>O*D&M+)sncwBF7ruGM!{ta&RZVZbcNw1G^Y^kmT2pDEMC zNxwyF{~O(H;1g;r$DXWL4^sp>8O3b+ZY}=!5Zn)g9J%Ld<;4OG#$~db-+NUh3sd6r z?oX`2(e6j3Qh^t+@9%^@f1XW>O-J^(Q=|dK_{jOee<^--xcmBqM*OiMP!!9e} zIA*tlK3aooR6nzZ7IJ9jsF0I0&&wNMupj;$gOEgzqCQx4CO1)D9MxL*@A8FT>j;d$ z3Gnz~)xXqvGq*GBFZDyS4a#jCs)6({wlS2*@C4}M@Yr8`nU0d=xAzjk2t(z^h=_;;5W=~Phu34QqX4v(@275nGA0WkZ**U*8}prd#X1v8?^iiy)`uG; zD)je#km4o7JPx~m`#2F7&f43)zVYQZ+oWwXoEfoHB?`XuI^uxqnmDq^0HPV5$N zRI#ES4#k7H=)8~3#?K1J@d~3QS99-s~UCc$^`|D<|u(crUE6JvK@}qWxMO@nX z@m02!kfM-Fpbd6G$JT5+WpTubMBm>*QJ^6shKV}-+fOP0Vl8}#?~A=Tv|kCR?nVoa z`}QtvQDCJ@`q$^ueh4L?zR95j)BvF`UML>zqO!qbj%8J)gPQ8PuZ`X2h98$YBwH*G z6lR0WZ>ig|&UHbDcO5B$s;39@^jk))u`IJ2E%ZrXGvrGiH+9qltM)yb@!pfZT=k5g z?!#R5%opL{tg-794aUrC6|FsqDxmNSQso$*D5wa(kw3- z6&mm_f(vr}7QL9~C;V%7z9egE@2(6nu}2q1riyyB?Qcw5OgBe>kiAIBI2;(Mj5#VH zr}tO6O-5(Ep0(Hb0U2=TUjlCjyiwn zUcZ}8#b?oWwKahwsUw+RsjtR)^%$Ihp_=jFdKlOn>sLR^^F7{Z z*#s}S%vGkf_jIr*6?EBa#_tqW>G_+90u-YJV<1O<=U@MK6(sT%((4^>`9y>IUK>2O zYYD$@p67EV$navVmewPWgU#6t_vaP#X>+zC6@`7prlwQ^c2DV-a-mZD$^5n*6V+7E z;HgMyG(QEW9z&GGVQ!A*pol9G^^tx>3M<(;>Yb0vSc_Om4O`VSAItE%@PF<MHv;kCEgrdF4l^Ev zWx33@Z4gYK5qpWw`c)m%|L@!gsE%p3%KG|k1m;Ld9x@OLBJL6%nWeyyl3u^QNMYhp zR|FboVvH6rCNAQ$u7)TTlQK)?7Q#^pQ#<@{Zrmc7j-93cI54)RT3Nr`M%z^m1xR^S zYK?jeFtSMZdwUe4_1#8GzQ8d*8UAb?`C)8M={3WCQy68oJ8IZ+V%)jcld8rY6$^aX zH@+avGn_-uW#F3A)cjz^zP%@ZPeiP2u-Vu;&PACE_C1<^XiXhqbk zVmfki7I5$@UU;T(vjt6DMk*WKRz!jRBwaMN5Usz92Llb+Fm9FZUlw9%*b^IJXJ|h% zKpmeTJ;q-Rrw;9?nFCESS3^U?Tpl%cLwh_M69B+-kz+-!LnBLREQ;~b+EAULnnLgS zuGHB^BFWFgm-B$lmHqnl>nNA?SZx*NcE(CTyj@r6#Jz=@bFFAa-3x~HzFZ`d{~+_c zqnaBC0?8^jsqa3G{%q5)7)dA2-t?Nh?81DxO+T;4EnP%G^Uh*#i?btAtv*{Zp+|Xx zVszjlyKWIDc*i*Z(*rpVVa0N$%Hiuq^kZ{Ca`=jjwGG(08^G7bVPBzGXfMzZmum>Z zE_9E(shs6^a=1BY>i=qZ1G#fczcdSMaHk2)Rlg?IG^I^3%+qYPa`{d_e54 zkWlTv-Atf@cQOAd|pb6kD;*w1LhLTrjTH|7G z8wvotX#=w(cK}gS<~p;+OFl`tsYU)rC_$)dC*}ADcTS?qmw7@9jB9lI2!X_NI9pUM zl12wMJmd{F8>D57E_UZ542ef~-uD08C)|-Ff!UAT1V<~UimV#%bqyW+4`j8QEp!C@ ziQBwWIY*_R-}2+ksp_(7*nv^HC{T-lMhWd;i7PMx4_%mKm`K&*COlWGXGdRjO(EJ6 z=wnwOO1CpGe@Gr)dHr1d zyP49#m*!RncI$zt=KEY{N+Fk{nVoHby1Lmy_;o(ThY!QERMW@&+6q+P@;*owu(uh2 zIgWF&C?!6lcHNkIppe7`N*8+T`XiUXi&yUew8=vN<4KrIk2>^is4 z8_#owam~P-4AqQ(cQOFS^-u3^2*zPCt@m|E*8|6P7SJ62BM3ndv>`31H7#f&f?6;U z;3LshA3o`IfPWP9^xgMMODfwH5Fo$xFw{EJ^PJ;pE zBM)R-0H9(5?X{gp*hYHwkU7mn)B+C$JCVKpkwZnH%YdKPaxd4CORs9 z;xGtkMIyK9>XzrFGm1e4IWRWZ`(FY)tOoQ^I0ha8JMVtDjA1?fZnr6t1+LhoHa*&T7z0QL)}x&kavk^{2(?TH6Fu1SF&&rrE-(#Y@%EoieR z;L$Vyv5{K^$8OoOHhY+@7PJipy2u3qE^tZqgSrb1%3}dhQ3RxoSv5_}VNWqqdL4jF zJ<*aH2{mB&jcZ1SRmFGWYk}EuIC*vkLsIxvRciA8WIweg44;26jsl*+AaF^P9LvMF zH)0q>ZZc#B$!G2fE5IT4;Fl*5HGC|NDYD5=chk1cH}j{uc{@qom!DcrAa z)Mu^EVL3$tQZ070Qs!`ymX-}lVtAbAVL)dH|^jG;C7nbx>jZ4CQwc+9iFZ5z1aw7sCt zW>`@`1Mv4!Z$fVFEjFXdPeTv6q}Pk<58LpAmaWugj}Nvi0F`lB>|tTk{1_e&0*;$= zowoE-C22nxJb_>Qg^a3f`f?fDlel4r7`6m3YT0o06!6cpTDfYBb(~PG^Lt};J|KLl zKDiF?*3w7)7cXA40U|zIRI}=`z5L>krstzFH06x&(wE!6^dU@tJvEhMi-!f@50rFg zr)HrpL%C-cWd0?f`%*}btHKm3Atc57ks$h+Z0dBfR6c$XRBjDKJTS;vB~^r_&U+^o z>_$}`8Ijm}K!H(J&yp(GfJ+2a-)p_L3fw4fsY%^nVq#HF4uKzlCoJeVW>gPQ@bXx7 zsnetkn`wP5#yc3m98f&Z*0`nDfBw`1WOYCZrbxa@Ct&yHVC)v+RR??}~D12XXMPJzBky#&4^mZYrb}xX*a+g**^N%(T$^_uXSVi|=t^%z^ChyN8$^ zdIKee#H_=tXubdb*MEXEmEsrJuhJz;NXhi%)irAF#n^x%JKq(_WUBtk9P_PlayElSw*lsbbuB6a{VIqE)dNa(&ou3Q zk13E;bnhWO(hCe>Tw^<`$#)gNxxdLSROBvh3b1zVw6SWwlP}m~qZ;coWR0-Rl2OZaO)E*3hnW;{~``Ab6aC6!E`>=tPvj}&kAl$4OP6Ze`a7^-Ulnjc4& zwX)!jRk>PS=Xo4rj;g<_^~P)J+bdE!ItK#PrvRk>&?Y{yBm~+h1PHEC@!MQoGp{{8 zT;2rSHN_lbJuTW36BZU0Jc~=xwM4TVe}0k<3JO|5o=L!OUo$|_e+qy=q$iMe4Hk0)Y?KOW zds=yJR$(<@{A>cdwxZfCeaYNm;p4q=|D*!_vSed$BB5cJpbfq6dSF_Q`N2$_Y8$BT zqMbix!={zP-L>xdW-sV&8=&-{F09P5v$ViirS}Yp zT&x!6`DFcmJ~3fQ#cdl4P*S-|$0B(Ae+MOh+{LjL!!0RhpQiy?L;~!23m85NLL6=N zkRuxU1lwpKc6LPoY^SdA*$>}UKR>EdXwXU|c|0LRNkzrJR=X8hdoY(mN<*_EltYy^ zr+MN!*AerAfI2_l4GXIhE^jk7*%)eQu9f|p5XqT>+U8lR%tvV^+JY}r@HElOCZSeHnM2GuV^~m?=Uaq`SVxFBYpst#j&a;0*X8o z`er9nE(#>Gjd#C3Ne8~2+^Lqu~78( zh1~*6-!dzr%h3;;2`BSrF&mn-AAm7mP~Bn*?H8e;O3(4gLBUA40?!U@$}|ACmbW1N z`Pu_iTAyka6WCqAm$F`D$+A|ZA*!_AW6eYua`<_sD|H0FB#BeMhvWi`TkSfOp|V~UQu1CaPb0(K zXm6Y4;!*jT_k?kRN^js7t^bxpTq#gPo=)cazi?hs#}@?gp67hL>v!nm)wG7G;HzkYbcH3we^uU~|o$Uom&*aL=Jx1@M4g@st)M{_o( z8#SwpA6BXp8%4Ay2ut!AQ-cU{vaHCc|%RoqNl79+K>emEuqKBQ*>y;cgLRE}O9EBZF&pK6@H}}bhd^Xt| z)@F{F8ruHR;>X4W;Ju*ipkjDXilZ2CdmMH9ssnzKnEnNpTX(LD&nR;yV3IutTP(K? zK1YL4o^awdaP2V27mu7>zU$rp#om|4L%qKJpK20mq*Ow-7AjlzeM_Z6QcCs~vM))N zVaO;I*-|0;`|`U!&erpNI_LE~fBs&t=Rc=&j%Gfe`@Zh$ zdT+O^IV|EF49&XmT~SFofND2Y6kuTm$^Lh&6`uJ)hYcPf`ihV2Ni{9|P|DV0V_4qL zOZGjTX9AZ7<9F$Upu@ZD&AL2gnmsC23uLBmFr$s|iX}6?`Mf|= zJ9PaPs)EI5FgP5H2RbI6Q=g(t;BM7`WLWvqSVh)fEus0eyG&xEt-#l7W~xr!KK0vj z(T+(M#k|0A@|`jN6Kvix{~hf3^Q=_qt=oEUkp5}nXYQqMM|Q<_(?7Nl+xYDW4U~NE zmGo(F_eP*K6E^NWh?(=XJ^ZVP)alHIr0BCz|NcGp$uNe&9iM9?*4m)Pn7 zr@Drx_>9xjs|;6ZfB*IW@l(|ql@+P_q((jS;J0UL>OR~u2_G*JFG@46$Ym?VJxFda zu3*4uK6A5K`ucddN}3 z1I6*BD81Jtb$9W;j7wS1I#1*WMdSB-Pt26D6m7k$cXZd&MDLX3i++24@xcG60fnZ$ zT`3tkUarwZ6}OI!K2HSt7qiG&*CwwXI2t8AwzxoALluq(fiOHqNc@rc$mr+R zn7gv}T~<}ya3R+%1cfpx&9-4SqWzG<^$uU??WsalWMh9U|Eh@HRq@}`@a^VG2vWxk znV0()py{h%n{MN(wj#b2*lE6eOOf8E)W6{uu+twa9ZbM(jJ^+kU)b0pzX~;R)akaz zYllA3;&+|jiPGM{y+8QxviZ|b?*dU{8!RL*7gEfv8FAbaE}P12Niru4E#quTedNsp z2AH0_ICP=+Id5HM^8H^_;NO4MUGi2zGM9k-_Db0{RbF+t&^unf5Wa&u1dT-`34(TqHSi_FXZZ+FZSSn9e7ls)S8CeO;}Lsd-lYJAE3WnC{a-YD1qdLoT9D<-+QqScs+Vn=6d_FJ+ViOt2=i_^Az>%{so~2ze1&Y*PAUdtt=4aqN$}iX?dU@ zJKaT#FYH!b5G3#5zjf=C%ZjCuteyYT_K(E4AohRWrd8@HcJpF(-G?f1zPJ9wl8~t-pe7&FHG*TAMQ}p)0{1ZDp ziD#Dxx{5KE(s2K{Du3JPdbxM>nW#Nux^dHdZR~~(T)w^fVsxi}Y(rI0ENA1(@TSB~ zkJmz-SJqO8O#UH?tSED1)>`B_WP^a+0=9D+1CHG08WQ!-<4efXjnK6}U{jquhU*Oq zhHNfh-(TRKuyV;LkIg42}*K)ZCow zQ`#@TXvo2JDra4-1&hf``CmA%NmuYLLMGe_r#bdN&Yj=?@!)!NP}Mra&rhF4-u`dT z!1yi?420g^>iXQjTzY5LXEw3*CLFf_wOVzu1_Lm8d-;CW4}(qCrb~nFf~t@gEt|paf!cCo$Z5U!U{zAnQ;RnV4H|2a$XZaE}Qf@)@B!t#~Asjcnn zv998abuse%fMbWfdGkhhW^8pa9(0}00T7aTPJDGqU}3sOgcJJ`FM4N0*L}Qruxl>@ zODZUIWs)yeF3b#qFN<^Mu3ZNKsS9@=ZgOWo_1_BMqn!XiAhR0eGyX3=;1zOHiUy+k z2x_<$%%y17`_5-4Gjqt%ZOc=X?HTWR5hob%AtOl1%; zeGcT{2g)*MG5tSVl8{}<88NsXhWS4~BebSQ6McyQ=HPqW^e$a!a)fv`t#Vyt-mOB`qK1W1z^ za7#<_FTN820ambTi1(=+)Ro__4Jx`mSvdzh@>f5GzZ$s1Li3@91TKeUm#yA#nXp1^ z(oEY{kZ7s^xVs;&f`5iACLk0Jdao`c+6ljz4<9}NzARDbvZK&_76zyVAY9j{aPKv* z8EFZ#`dDK~FA)Nw?dkPT{%8EQvDsrTjTo;|o3#~9vZEPy9qyenIxY{a$PwUvX6FYI zE4e&0!FChZaD*-R?~_Z#A6Wn^=hqtw78G$}%fG1AR)tlV;kA^PCmr#^l09bv5kXwv z?Y2J{tEqi4g(lE%ID7_~Liu#HfS7%kOtAC_$akpY-6VJ>VJ0e@6Mzb}U<2CzYpmju zpR`-A&la=!$!|^oZgxAqF5Hk^zKVH57q&Lvx19lvrn$%mufW?45xw#G3(&N~q6&89 z>DH~U#4uD#M4gByrC~BIXZB}g6Q*TcK0e$r{h7nTO(DFYZmO@cMneSOKa~T@K2Q~D zz+?YOsOZyz9Q%vz>txFRyob2Bya?drq1Ft&x%uc!!$K*wNG|P8IceK=9_uu{rN$Hu zR-ICz+g)YI+aKG>g5nl4d+0pT)7=+HSeZ3MwkdoLajVS*_TAQAyyn0KGsG5wiZAi_ z5-%?=0|EZLB*$&)JIZvBvN4~-<+-4KS&R!c?33%Ag6pok$sG~ za1m3J)#Vv&Q+wEl*#BbiG>{AVN3{6q3I~ ziCg_ki_50(tAHFT>!Vi91F`Vf>9Af_G?&-K0r>jKS1WKP*@x)IpZT%;-6Xy|jerIR z0Ae`^jhaz8_rKt%^We{SgpiEYpj3ZajH1V7zwO+@HP3~@wy}w=^Ddc%Jv;L?UWSr~hlF_Hj?rxP$_qJ#$S3Gk|_w^N>wk%@-tES{BcR6k48%m~Q*W$B5wGvg>`KgPP zaJ&a8)}SlIzEV}NRR#8-?cP( zKjMoBUJn*<8)3ki7CEXSC>27fnm&@dGZJC9;&m`DTHGQ?GvZi9i7jw9Poy18GGapf zM>|{c5nUf}VamMCU*N~FEnP%*5b(DJ7OirhR3jV9TpSy7}?kr1skvIeB$sJt-Jn-q)pow z5auBTKpo7NG&*g%2M*UoK zrlIsO)G`U1HbEtm^7JI>sa64Nw{Q1ZlQqJHb&@>oUc!v@X$yt79O@HcLG0z&+e$7I zk%31)ctp|M*V+*?zNA!+B=59xAhFr(i10fFMDWxn4zcx0OPW;1?rncB{{Q&t8KgYRs7_XU z@5$ZbG*c_B0jw={C#nLwFXa^^o(4mHCg$9NMC+H(11x^w@JlZg52r>YgPh-k& zVEyz=JSh2JraFCHU|SrhEDmHBCi3&39>Cj7TamceTQ=OHEl-hPld`>H*_5F6z@M&y z1r1$MzVleS4sa&}Q76=9%9p1D5WxYF>vi&0+$a^uls#yh@SF1>4l=9cbk&p-0P;fN z@!!Ys;hh^e<_k(~M(xia2ack$m{5%zeHJN6TX(6(boEJFs*q99d4#Ha@tnu);XS&) z9_?Sa?K<%k-?SC!sZ6Z2O{A8!eGnNPNYPd-eBDnncU9`V+NwH9Z3jeQ@TvAiJD~L> zm=!(e9mzMlf$wmtIut(NnRt{vL}qa)>A;6xqOoht+`!ZHg{}hV!p;J_*v&UV17hs} zNsZ-s;W7ikqg2F+4?dW86?b z$)3ZPCCajiD`uNiC(Yy@tNlrpF(2N8eEX&U^6d}MdqEdbvs=$7v@KO}w*5vEJjI4Q zRE63>iNAOLfBoqh4FB-@&w5PfeDmIF`EcrGTI^zOMcID(@xH^C%%kiI7-*{=KulDF z=*9Te!0r)ERE5A`DB7>W6qfZp{5^F_!fZ%4)S!m+8$q(C5V-xk$D416WR}yVowr>O zk#qrN|Df}BBWOgqPlNLDctG_*PcGVN3V*EI@j98l#|=*1e(5~(ZH5k%($AI?-)DZ* z28;bms< zT%Fl7Eye{h4pNizt+woQy!)6#T3A9zb&{C@{U00P&wo@&0ppNKuc{;E77g0JaL^CA z`MiPk^U&|Z3uy=8((#fEUEo`d&h-FIh><=lF<c4Tap0|6I z+k08ZYhi$OLZOKZN83BH^qvzm1wyd!ZbuztRdvKCyq-8C*>d}gZyX|u0^f*R@T^_i zYyHS_XlI) z(v3C`O*n2V_Sev)JG_S7;}y@i!VaD#d#{;=oeGj4eVyA6bog9gvturM8oScs(18xP z64Eowig^(PkhJ`PoM}*!yG>kkjl`@p-9FH*>cIrnrqlsOlyal-qCO^5!#6v3qsr&M z2Mnw(P0VS%u;MYbSe@?+HiyoSCNv02q;Qb&w-Zmn)>Qo~QOCUWAo|ZN0G0UDn{9hZ z&bREoYmd7{`h!dIp?(bc z{9UjzHAdPpZK9fwCKV(AQyr?PZNt0*D~vV2_3p~|PXPllu05fk0$plLxu%Kk;%BGs z$bskRme@#`1Cl;~)VQ2BL_CvO}h^p}@|t z7@GDz{C=A^7dAZ~57A;aQXA^LJ*ha?)i=7i-IzdVyM3nK9Nl4`-+H;CQNLA^UJ=YG zBA)CD7G0Z17jFNEv^bk>f=6Vr{(h;CyafAoTnomW1mvOM40-_(e&~nKlybe z)%r)2Xsy3VC1!SPII0AOCNY<>E2f4yXLW}R>h-$g%+z$5m!IjG?7-WXsFf(bDxDcx z()A85@};W|R$g^0JOh2IO#0tda@WhdXirji;hjRHS?r4Cj(l#;h9ZjOkeC>$Gc70L z3OnuE>Sb3~CcHf^t&Fu`{dcK4W}NtkA^tv(qg6JCXK857u_BI*g#yKM!fT7R9w(on zDi{Unv$((8q))>WuN#)7eGd@9@LISgbcWq}vt=N+UP+#7Pb@f$_F;w+67J@dZllD| zK(-MiJ*-veO2!gL^=Y-z$YTVkfO|y`7A!`9zaZY$#6$Ch4=) z8-n-RDw}r&Z;>D+LBBqiXyPFxbJWUMZFup}!>d<`F-J%|ZZMa5N*}n>p@^Y|Ae?*# z13svPuDDV?Zl5E}LDg@reK4rQo~G(ORCv3i%I-9drs;MJGRp(Crz;cNZD1kx8vnrvYkxB%iZ>ADS!7j{}m^iHs;xjB2sEk z?o~L7%Bta`&)T{m`8Bfi9VuRGwC%~!ORqk9pFMlFjA$^zEQmjq*4^f7{-`)~7o*hk zlg1xjU~&@${hWP!*?((`I@VVi%t|qrY>mgW*wE{4K?m7rXI5cs&*dv{W-G>w);=y@ zrDYFmj^Dq*?KOXmoY(0)>9d7vsN1OVyk)Ady)Rt|%}+y4ZIUmv?qVaHm-mf71hStN ztd0M67EK%V(rpRRvNQ?4raMnYv!~^n=E5D6VdvzIWXxbd^A)}Z8zn)+BsteBhOb#f z{DhF6LxFYhyuJ2<3+M*2gO=b7NjVv?$0g^Uxv+1q)X*zw0~+JLio162107FUx*4hE zMQ$6wuGE^5s+|ldXAHBSRUivsyCOBj2wDSj~mBe>0Qr9MPuH4j@Z93rQLxsAqY%@Ws zqC@|Hl$1Q(iXM2CaMFas@v}d3GL6nIT*&app|SielpDc{QXM}mB*r6d1nubl;p~^r zT;Mk;F_yM;p;Of(j0*yc4RY1()K1b>T_@~&ZA`)Df=*uJ;%yc^#KW7qCOumjxrat}sd_FZ|f&*`+K zr_$z%{BR*kt?`FB9o5x_&-|A2ON7s;-RL7jw;Z>u?DRA7rn)BlTc7CW1wJ{4TKL87YJ)WYOzqs} zSjC-axCD}_c#vDmR;%Rc6~ z$2@M)IM%;w#m_&T=h;}i&3@2Hf1m=ylE2}4p{Jd}lm~59w2p8CEY!pe*5r#G*%W+F z7iKimc@_3=X zhnz!8dMJ@J#UJPSoo2=rA!wpi7~YX?cN)H}NegoP!Cnb0j0J~{JHF$y-eM&qnu*jC zi0jMCa?$y=1oeQoN5iAVlhxGoZMrYzk|h8SiA)t&lVkKgOtSln)T=r?rhumIjNZ#1 zoj1mBM%n#PW@9?rat(LDXDLX_RU}d-T>f6SE3SCY`e`Veob!JsB!%?DZ+&=SYOX z9fig|IfcwG+GxkPh)~O7#Kf}_GsGlyf{R8tNI!mdiv6$YGXBbXBU+9$qLa%YixzDz zr*sE#P;;YUe?e%;dA8+^GYtu9DPReh8pg#r{J0$qe}~%2(naN{9N$&rE2S=WhP1Pa z-ciyHTx(^H?QgD`+%-`)%-^KyDDY1-!Tp0F0l&ElKiBC=MOD-&(F^}Lbs>@|Rj)Nd z_SSLb)CW-!!y6R7l>jtb+n`Xgk3~~MCRqM^TAY`Ev~rP9s(MqzurV>F)x++61fd|x zfzVN?z{2+!nRZ24(rH?Qmtoupk6_ST`jI;-{q2Tp)4gmh?Khz_qybAS+#OLC;UFVF zhDyF@GL^t0tNljQn!~xgiB0pEsM88S6=y75$P%Da4bBILos)a2>qJEHnf|DrOhB1< zr>_ioMNF2Gt%| zTZ6YvqfGK|z>Rebb>CaHj>+D{B`^#su27o~3^Gc+;D|gkm*hIpBibx**-4(Qa1&RRuwYRa z&DM;)UD$z*`BG_o{-m*Yw_SQV5ezGjJ+p@;55}lwH)a*W$L_z8`k*j^!MsxQi^5F; zb*H%L@#!oL=&b?)1`vmpAN+~hY1&!g`HeTvq?8k*;9DNmMKi+ES`*y09J^xZfwEtL!dA)#onm71Akh zDcknml_84*HpHyJVe;q2$N(?l6z_|5vEt;VBNFi`8D0S^d5$-hEk5JWqALrxRu|iY zXWBl|1sk#~zC68YrK@RmB`Ia$$*rc^#(MSc;o4>fZ1Wan!()I}&ua=))X%4C$Hfls z|K7Z%&<=dMqAOiry5k08J%p9;*lTj^lUhkL5p^=2c2!D!xi$ZI$=K1PccTtLLB>tF z+C%E3i|CH8QPv&ivPvVlo-2<{l}8KXmP+R~QhQVq(6dTGJ%`o0sf=3I>-9K9U8#;$?YiS9`>ucxde)tgD296wm#p@=%zi3}( zlu6n}AwDIODGI^pcC93XZ_X6ks@oJCQGT-(_8M|@6&w;SCWa=i-FJE(F&kwYYVV?x z#@{RrJLJikkKVllB^?|dnAdX5TW;_kodrcS&&xn*^4%teIYr_D35Ag?5;n&Em}x!b z#D;IqxZaAWC*x8Gs$W`kkmP`$jEG%e8Lwm%DGRZ6afqf<>ZX-s+s#X~9{(J~$jVCU=49#VF?w)m8{5$?hHm*$yI^LJgz- z9wsBBW~T4y&)Hza0`P0AOIDczGCI0u$GHgpN41)R^s#53{!8b zkq*WP%S_`Kg_)a^tl#MOxDRs5QXACNDE4ZJ?ss1F_FeB@=pyoc6y|$3B;}r+UmJZT zVfLz8i&Vtjgb1zx-!Q)s%r@WPyRDBFslx}PyByUl7f+ynL~U6S!)%?TQEaK8|tN*jHBp?1uzK5N3_^cIppdTG~;w%_b?uZx5Grt8T|Gj{~9> z<17nvcu^VPLkU*YsZRjczJB`j>AlY#vwM+jpn%(W^FG$L_;RjoQswE5HQwI#~ z@-L+;wue}*_By{DNey=tHNF0*jEOJGwf_s;1EdjtFDyc(D1z(VC+b;YfStnNtB5!& z9@B1P#+IjvN2H;?09=g?8<`OD2pTRJk}CcH-N$}7P6|o8yzMrkdnAwcTs7I}+I8zX z#(m}mbuN41`eS(mxL)H_l=>s_pBwP6e>C7)SB&Icn1v|y)4OdH3Q`Rvj~*tNXWYp7 z>e1dcQ92;E7saMXDQsAcZOyuwo~ghgT-?92v0jNX2~VY(urzLyF@+d6SXst`h_$SQ zf%K#xejr_pGkh~&AcXp zPp)AtW1R-Deiq8Xd2xYs%B7;Jj-uMV{vy8r&=KBMNnm)4v3dL$b3@azP>%Cfh_l=> zkEsV=^Wx2~Vbvj1Hfn}Wle3pTqIUP`pOntMlI`-Sg#?$}+_0>0?>uHPzR{sZD$`Ko zmF~-(YmF;yCEaURtz0V$NSHT*%{=qKV;%$o*VZv+`V*_^7kI1ttb?s(D0$6h&^Qou zf)%D8Mnflr>=rli_nZ!xi+j*v0$1I)u|IF_wQ$@EBxFRa1Fn{UnWPKke97B{w7Qos zK40+dceb)%i&cCHLWTvJ5{BnGO+BYhiqNwe zMfi1SZP&g~?zPgJo=g)e>=L6MdZerBXsjKuM`(YzJZKR*u07z~UwKl}Iz+Cq7A`Da zGm%i?l%$TdYS3-l%gmlKm>Wwh=E6y{P^JZr()T2&Z4o(d2*_vYkuw@I{dd2J#6@uR z52QxfpOtY)oYQnS@@BEul?`k-HpTm3-%bO=S91^K)c|_NgURx8ClL_c^0`lOupQ0w z=<$R|ll-u{BvX0@Fa5I?L!`p&h%~)}F+@(b-6&~ojfdX1_(m2}c;^6Ft1;GGrX>X{ zu`eoSsyZz>;1E724~wek^DO&&L_d6e1BY_~YMrdnn(OGyJ{b!GhDO!mz;$_}Tc372 z0WeRpE(5tn?qhbz>I|Domc>Hj+83+L9+NY*)yybqJW&JYf$!cKBZHwotbBXC0C@Ep z?RELeygcT|Q>6ZJ?Qg#J=bz%=ZEj+4OB2^3i0?qLzHCvQgjcLCTsoH1;QsvFe0yE8 z;!Eq^MotM7D4EUFCQm(*x8uVnm&`nyy`x*7Uh~(D+ zE_`Y&9vx_$y5fD6@{c)|jr(5$g=Kl^Pi~+mtJyle!tWacuF}fwCuR0s?RRtpyQJTH zcrd6>LBfFh^27zSLVb)pk;wZA-0fKH!shy(#MoV|%bvMnppfZmt&gROALgXiGy>K# zXHx3zF>*1B>Y&Dni|d9fW3SBrH;v8jt=ujjm7Ms(ajz1*-=H{He$R5Dw0FsM7u`yl z_@SN?HVK^y)8@{mVObfsq>ou2@C#Q-)g>(}nJ~9oO1{KJ%c)86+PdCnS83yMa@-60 zi983gQu|njMJ~FS9%u6=7rryWuBqC4ZHfHl{B6tkD!x5eN{12yJTn)s7|-1*BZSJP zTRwCO(~^2*)C9n>_&C~#b=PiPt=7J`etP^`FLreX0k2mB>uaaAcIza}dZ%Xh6PnM- zL$ck)J))yoE{1bCoq$N=&5}?7-l39ZZSTOlY_OgxVLy6=QBwa4b zVv0YQ$M0W$R!3`#8Lr}wdkW}W`&`QA%+Ww=N{66~pm9md(zNf={6w!rq$h)l+C~H@ zM3BUyh^I8joO-CVus^H<&*m_A8(^0`o@K=acrfN6XfT$I`{vh(9hmpHPGIz~Q&#l2 z$|L{wa^E)BaoZPtqx0B%X#NvosO*}PZGp-mvUtCi3xfy^{0L zdD*_l;>Yw_oms{Y4Eq%}7x6RhuOW5fJGp@m5JUyHiSB;#Wh^|I6 zMo#jY_FH)CC4tD!#qI+}eILZI*oVy-a93eI?ohwi+3jarzzZ~xWXsiCjtxCGyVWUl zmHe;D7e_LjKpU`h%4VXqGvJOsTBqW)akyV1*{v_|LS})4sw1|=@r|@SCDIz!drBu= zj1^FUkLH{Pgg^EqyO5wO6BX$iDHygvKeRrdC+OE(d!jT50Zrzz2oC_vC$P_JN$B>| zq#LE4-B|*DN3e3=ZB0wbP7WG%xEYJjywB%v*>G%Dl4x`QAmHh&?Vythil`&-%e$4sLY5A@x&wOFt?^}91ZcDk0_ov-@^RefMK)yBB z_d7ov5$%fIT~RMBpuucZ=nb4M?S0Rpf&5}Spo4uen=CC*okx6r-^+z!|DYW?FSv2| zUWLF7dZS?O^^IK5=V?oR$4+gXjFh*$tLb5`C53W$+HNI29NJnJ_Ieozuah3Kt3ye_ zWpE4BLKWX!phg;NzwPn%@#;KQOA1;}M%RyL9>EkJaA7PRoAGyVCYltevB;y@>04vN zyS@P#@Z2KS{j=S4kVQ^>ro;8LraY^Qvfvtz*%J!j&Xw72S5wod%g`Z<=$W{irz^(z zX#ysEGZQikEE_v!pJZ2_3f6JFQeYctIoZW2VM|Wa<}R%Apv-RE%^qp4HhDAA*vNb< z4sv`>o(kG$CEYD>adU;h_r(n2dn3Gsr+n0@lOCF6yTb0{->hOTsZD`!N#n+m;>Z)e zT@@xdlqZmJeHf0Xv`oLVEBkc-oV6nr&@kjSFa(kT?B{YHc&2xJrWCz}(>a7yF#S@- zbJD&AIfM?^hQ7BHZrlLC6nr|;=xvM--Vr&H#rkgkGaM(9?3CpY7-_GU9V@HfzkgYBT zAmcR5=7oCDsX7~Q zwG3)WR)Bl;IwEx&`*I-ovC)I>I^7gtCk!rqrFwB0R08`E<3#91Awo(^*%B-tWNJ$2 z*5BmNoR@LJ^KIK_pq)s^^8K;VsiS1g&+423AZ>VVjsNlEvZWrv(tj}@COmUFCcQ(g zMuFCHkwv%;Wv5$Lgu9i zW*f^@2-r!EmJTPz0}d&iht?U4EY;b4Yo&I;<8yboZg#B}wH;C%xW$@gNK|Gnao$I| z0bdr}up+07eX~kNUfmmQn)5Ls`_9}MYT&(z*|NPB03PxuVLY{|fjYj7SvhBM%+!QI zXb|WJ`dSQd=?_vpKuhzzd3dSXxSY`Dj`L-Zy_X898@*;fJ4oSmuPl99X{@iOsw)RH z#lf^(36d%fLokJ4=;16erxHW^;zLfwsp=yQR0vsm9DP~D%v=)Xz}I;@xdCpv(C;CV zjR+P^irm!j1iBQ;tved{%Wx~q!S0g-&T{z3ar*!)76Q@Kv(&v4)F}_>I%urkIoC#7 zUcNmsA!IxOIdD&E#{)H_>9fAwE)^*lgPrfW02|-IvaF6}9~`(Rf4=o-vQ%Z#S8>TF zZsoTS9=8DoTz#Z?<=kB8%JKl{4jvay+dP~yD1)G7U zlZG?o_|I$Tr*{+|f$BQuX1?8L^wt4-t^n7ei&nn9;y1t2V|yKx=cR);1dg#8c?h?e ze4senyEkh|r_f@V;OwqB4Ea_c1_2qf@0aQ@w|V)ki$z;ORO;%xyY1lrzJY00e^6jY z^80a!KDI64@vx|CV9j@0qdGTdXM~ZT0nI45b9{Omj(_CZ8f0*;9B@WiNtFj-vb?!n!Z7{!t3rUKyhk5)yS%b)$gp+kC>&OZk^ zHV;yXB0Gkq-!-vG>~pG>B;I>|t&sQC125O&VY-StvFYS`&_>McVcnNTOZVY%d%D%` zr)T&h8d&lgvR_KIdTdxw;n0T)wZy(wee^Yu17tM1@2j!^ zJQKa9)d9z53MlRphLJPSfz-W&k6xHha9rxXiK%17=4qfOmO8b9d&O@5&@?rCgp(TFL~Lumo>915+@RR9ih%uZ`!WFYYo&4F%0rhX{Yz#o1A zDIYfDn9uddW7lxmLa#2wuJ6*s*+cCR%w$?TUNVZSHNk>Rx@Y3~m(YQh^z!Xg*B9~; zB8*j#ZPLXzai?X1erE5jVi+8bH}lyvjE)3K_w0RfBE}vfP5?=27!5N?>r%~T#9--} zf)!^YhCJSJehu`rg1p1!0TJ(8Q75y>gVDEx;7Kfc^q`1X%HqU^nftpa$^#i{z>>hhu3@OYNV^w@SRz)@^L%QhDFD zIGCPHPX4&8_dPH-bQ4T)lI7zWto?mCo*bN&%N(4O}rI zfFF64kqnB73R;>madHkQHeT@BpSFpG@-2T6>i#m^ zk;H7Z+tw321N{JGZI!A)*uBjYY6qs@z#1}nxGlx{n7B@eWUIXyJIXZ2?2+x;PkRzR zSv#)4TcgiUBTWW-FRPhA@72)H?$X+jA#Vf}j}x@L#acb1h^8Kn-@VrLC)fMGn{QDS zcf4zs7HtA+a#})iOuUo)K<%XNmynbyR3kOxOy8)2aePy;nUpp03`V4O>oOyPr~8nf zyQdkB6+&@C!Fhgp!9C&hK6Q--b}=~qRpf-dhMxuCMZcKbBm3n8iO$^eF8_a%8we}QV>e_(_!TQlPwwP0{;oLMiz*Ej$ z)Ks0ECafCtNDgV4uS}n7Ddv*I>K9mT??5tN9Cqs{;*j7Sgo`CRr?^l1 z8iUSkQ?g#RNddbE5wZXw2PA;l%{m_FPKr%ecjV;W?L-EFDVB%>*?INkw-1!>o*=Q~ z-tzOF{f=lcIo3rIda*o3T+AjAXm+iv=jR7BaX9H`isuF>F1Qz0h7Y&e;BgvstkbF(u7BKvdNEGG9J@m z9_}8jskO^Q!}-M)hg0ptVu1K;raJ2rjcx~0DI}t>lsTjWYd<~1>x2Z?zM-@`k77gF_;%gaQigz%jJ4&O;iIhM^a= z6R+M#c9*u;-zl zO{~kv!!}%3Ch2AIxFyzYNQ_Os8C)vFf+bj0rX8IDO}RD49nZ;J4hRfL7e)VC>d13m z7R(u}D$Je`xtO5-*!naTBCYpOcKm>Dd^i3`!Zk8t@{l6g_TBfA6f6Xi;%SK<@r3kQ zc)a{0?z1qK??AVvg5Y9K!<;?Yp*c0s6V^N7zHP~HYESV+s8yyON|&jAb`a@g1~Fd= zwNc>w8Ux8x!mKXxpw~uC*Pg%`EuA+6#iRx~P5@qF-uJ2HRdwl_1k2K=DJrqv(!9x!|$0(28T?+%i|g=_&u#P`Y_U{ zDOMcXy4e;4+^;TYzL@elxYdeZeo!2Z8*LUtMB?sR&FHq#T(62`P-TRYxbIM8hT?tA zV3G`NZvN>8I+eHd?e7Y^O$}Ya4fjldKFgal9PR`;&JNo%_F<`wiQc6t>^rf9NNXi| z0^miy-V-dSLiLb;@(ZUggGY>!hkg}019c<=PM!mHnVsy|n-89L#aRbSbe>o-FF(-` z4h0~JT<8voD|;yU{9y@KV2Vu=d!=WD=2U`BY{k+8s)8mETQyKCDx3WHz9i96rt}5falLLJ7`siXhILnU58jvj=} zy*uwwfc~@(m7PB7P*5po?X}W$K9S6^NTpXCq$-96sMgnmjIfqbJlw?{gKrwW>{vAL z+_1mXCYvS+O_DK`M0tQ5s5K%$rQb+byyTnJ2SS?dTB+YC@YwHl=JP^{tGf*wq#8bd znuwb3>iFZurbjU$JVnob_L9F70UcgJD^mL12H=L#8)yDWK?P|8qM+#)f$Nw+tI-eg zkja>G0GXa>Np*ZAJ|8`sh3mD5xpO&vt_?;S;4JQ^s~tO*fsYO5l$RRLvZ=3zSgAJ- z>=Yd=fNq2W-QAQs2EKt6AW>D{ck9HP4!wF8j=VuC+*Y;l964&-M>7k7?LOLhqio!@ zCEm|Ea>AF6EIGDMa%fc&!m6xw%}CJ7@xYOO%d+c_Ue7tb+wS3rv2mqMaBru$TghLF zD$8j=RZa3Lm!CX&(ja5E7ecIA%qB>o;>nhIqfIkz(7&aOWO@Vn%aDDBeX*^#ME@EQ13bmOqIRRX|gHp}oX#Cs-xy>;KJ4KMjsu zw+*O@B#clN97W&*Fh;bo4|)XHKc3sA>B8T}^9@nxNPRM81$@o-hXY~Zre`@N06d6i zAy>*)(!2u1g8*+VwL7heqyBW0zNTJ{48gKL z_D*2AUQ|i!^B8+O-J~IM{}h^2P8SBKJ=WSS$F9EnZ5UMEmd>xvyFOKE$ z3we?HVi`g(`Ip5Wp3`LRQa|-mE9z5(=#%Nz>k%YSEc}x$2&Om{0@#N|mUxP0C3I7L z8HzrCee`3gxc`a9^&hF1BD3Z|1T>x5Z2tj{wr7nPt;3Q|vVg>$|4RM-!cm{^{9gHW zQB6Jxu99OIEId%BytdpRrg^0;=v*l>psCd~QjRI&(u__OkeDUQvq)5x7^m)W6rU%z zct!n}m4<=HySHsYr!g)V3Yww(w+O#z7&LrF#_2_lb<1rs7x0-Fo}frB50)&0xpF`y z83u_1k#MWp0)<}DA*m2Pe-Pa1iGi-Lo!kd75y+Eu`tM@TZwr(w9<#PmsS|HqsR{KM^L#8ge&b^Sdxt^MIza2pg%U2g|Y-ghvdbAT3gp6s1 zV`=v}`jo}@*ghPt51t`bdPNrsOSyHuXwXhzd!zi*wEhzTfwa)NH+|CUr)lC|E3e63 z;}boM4IZ2BOSLb_V6@dIC_{=|cw*9=C$%RSzujztv3Sm(dgoQb3I1aVxndmlCBuB8 z6-^D(uANr=q8qMRH74FChB?H@-8tS2f`!w7^qS|^N`*m_a|Uv>k2fE*Ml3EVaF0Pk zBh1?1#xY1&)cF8`z{w!n3{BEvzNi_Y&!jYe^(DZ(5x%{&jY|Mv`_Gn?K^FU$1=|~M zsr0LGj#TNWbDF!7^sMU}S&bZhENss?E`aFcM4Gg!9irpFHd_N~Y!d3d+{c~ZLIOSA zmPV7eq&%wwn4n?z6vgw1wYukRW1qvucP>t{{UK@}?}_)(a=a2qIUkth=P~ye@4P|n@tCm7n7WWT4khNLer_rN4Dvl(o{g_AWtF+jci&mjD$p7ZTKc16YseHYe7}cG69}u>Xal#zXW+%YZQF662gsaxTKDpCS{?1` zMBBJ+CnJHNOpCVc%zKI4l_1X#xpLoWRL<6U^c!Y`#vpozbNr}P?42vEnq&&SzvrURP{^o0c=klwFGNP1IbMF*N zaL-#+G9{+0=o797R<=23OnXf0yYEXwlp40JaoolwOmwQJw3meP*evoh*sPO&+7*BA zk3W7odg?A$MGZlHQfxaeipzN5?k1)$Rcy%>%II|KcD+rvt8m|nw_S^M{}DO;kwv@T zjJxapE;wB_O=HHMY!)gDS(ELA^sg`jl&sRB#WP`eP+^DrE{1K{wATPeJq?0{z8UZ(HZXQ) z7z#gtY(jr89o~)!LF2fE&*5{Q5<&8G$Ov!JOs(0mcqklX{SW;b!i)_iWt70hGEt+; zz-oQ!sT6%yO^b+C`!PQ+AC2<8&#WP6?(D;#g!aibk~026%ZM{tt0d~+La;&5ue_TZ$jRJiISXnm?dR3eyyer@fz0QzzU zn!O>o#1vH!WeIVUXMcsiWef8VH0S$2FZW15La{8wa_$VOH@g=b2S%J)v*)kVz|NZ< zed{w@ib)V1A>V>}by6;jrzmR~Ki4xu&<#B*7-y>{8SzSWr0L!;ZSSbmsEe7n<-`Og zDBp%C>?8^zxm{VUu>tv8`c9~s10k<;=_5@#)VzekZhLtyhYJTHlQ{P-KO>OGJAp0l zhZzlO)Dt+_43Lb0wrRvHCkvuIJpbNgNwp`BsB_7)uVcpom|bRValI~Vkxx+-;*Hl0 z{=%XDixHOrN1FMU=pP@?-^Wo$ylRT-2v9e2gM(dw3Gkkrvnd>ta_MnF3w)z>hc4Ow?^M0W&j@tmrwL@#8@#KS*!wc zoD5wLf}xUgnqFEueCW(|lS(jFl$Lny2?a7Ox4Juc-nACS|-2zajx zFbd3Hz+HO>8clXx#5&ej)4+-_k+@;P=UvYrOQ70xbvnF37m0ecbf~KGGqQuR1+150 zKzJQAjFCQvmOPxP+Gq9wI=6nrONKT|C1p52VB#$6ol@Px`P8;?K+L`|@np{weuA)c zAxry1Cq{Uzs4u7|uQOl{k;O-41IccDx95ZxtkQnO?Wlld3`_@4bfbKEu*`RQ#EkKA zSEMv{ZDlTz*R|Wv8FN?}ypRJhQfUa>%UVjq6X2c8JAIHql>^0aDyZ4yv+{iJCUOxN z$V`v}8|;)%YHS)nIoTi6H!ep#vPpz6pGew;G1eiH8~n`L<0k@uW4tM&_an?En7T;y zIpOPdMm1aDgC9B!thxIe%?K{Wu}Yb}z@O~G2bqg|g7!O6Te;y*+DvQ%`2qYliLQZx zi#XjJyGJ9ks{j;+fWY-whiwX1+QgleLLd33vx@`5zQmup5?z;d>+-d~jNAVNv_a|r zf`oj5N+L@-HIEh1L@(7zq8+W{??9H0$#xR^n>MR%)zJV}ky$T3%?O@Od7DJl@NC|W zY4*G=1Bg(i<>f_l*ajDX_!F7LgBbJ+xYZ9KYA}=J>bC=5Dh8rWLr7>|3 z1(jIFmqOQqgu@{A)hDy4SW9ln)30k<`;wH|zZ*d^KI`Gu^TXt|RoBMz4lS&xokF=~ zoy6Dr2N1iWf<*Ky5&?2ht=e8kvhq>jo`#_sCi8r|PL5nIR;>4kjbb>>FPDpBr32>O zfo$&;8~sd^mg_L5^(^LFcy|ZEn!=1Wi+B!&Z^~nxbM}yPk=FGHMI01JQgx3zkSv(S z*dF1#!|+qg|s5es(n2>x^2qDD27F;hQ5g z6*9}BF4}kwu1qQ8nxvIe~nJ76MU^eVF%Rbp-Mk=!=`;yYwge_k0Zte&*BM%W)p%2X=WVk+IW*t-{bf%p&;&L*3T?3ams@<164K#8BtH#d;| z{_sFQbOzMgHUicIiM+`U+zMX~OzlSA0kki<wb z!uIsO^UT9<7lhGeHs%IW-!rmGs)^67QeZty!|gfqH%ohQjQ!xtr{?P*fr9Ea9flXYH&|}z@!yF@;QHi2T%V@vKsMla zgZ~zJv;dJuWjnX5--!eFi!sIA03IN-3=Z%n=!F`JPN0#T5MbU;_`QQ`RJgZB^BXWx z;Crix(mSpQ`6Yc-`Z@Vig?Ig{OaHIFI&=+gir)2c%F52Mc7VE0gU99KInYBPRYt8Z zU)8}nkW10PzLX+`oQ?<5Izav1k6f$)px8ktg~0WXvlTwh=cGENpOYR{c-H|>*8k1N zaa1hcNjp0l>-PWH`|7YNv#xLA00)s)KtdW(X({QH4pHgul9cX4OM`?UDS}E#H*%0} zY3W9~yS{y#C+2;>nR({>_q(2d)EO^1_u2Q_Yp=c5FMf3^>FYqrTw*!=z;*~=zYyto z%Q$ls00!QG{T>q3dFNNUxc~uwd3WgdsRzlg^Ve(mt;PMfUPG)lf9lLXwE!^ShY&J= zYJTT+^y9JMC;%;T6y@%Kw&UWfYNneY%oypA3%>@w;`~AU``}|l|8+oF{_O(_4G%IJ zXAQVwx^^Bwrb;8#5h#|tL$LY=Kr+uy_|}bB-D1ScPN41a{+}=V|8aF$L0?qxJEDqp z15`i6WEV_?I49nf34k3Y=>y1sj3D}64p`jz)gHxTZam}}4^;jA_YVY9c@3h`-vOlVbC zrkL#;;Jg2OXNlaw1p8X2?(cuJ9iG69l28Ti@6*vggT!y|7D6OP@o&fGJ!snY3gg5d z%W^9II_SFpb}0(@;OCD!kN#N6ln@a^Sl}A`57mJGkT8K##i@5>$sa43E=nLyn&v#z-swaRCX=ecqGwWC#p=%TsaQaUv`@DSO)`kiM!3yGNi4$=0f?WO_`Ys^T5LevbVJ7lIr_x2|Zead~H zocmYL@#pMEqK!iw)FL?v&_r=%0Ha8|>_e54WRWjvJRq=Im3Brna z1pU`%`OKqcH7R)n8OW}?UwaIGe^}Ml_$OQj#FBhu^ZPj6n5T+wG4r|ExbeOJ?688; ztQJ6FV7&yRUUR88mwy-rRAPEey30pRIAyN}g`CO4j|)D-!hXpDU8rqD=_V zyOwIBE(}~TZkpX!Yc=lNuq&G7YZy*=VG_0c;)7yD)4FK$gwNFeW%bLrbaxxFVtoe)?mwo zi|*WB{PoX3yW({H-)MyL6{}7Ln$m@yJ|?}L~l(k=1EEd$RYtv-^9xP6>Io0}f%aGv8+Dect%wccQ~9KWfo90mT*%*zDU> zHTnNh&j0i0iF~+aC*E6w3V1v%BoMW*$IIf2*k3O#2#!x_p@mOoSZ}?pf8NIS=g;9Q zA_V1}&^E3O`g)AC71%%vn;n*ynpIFRV+rH~i!wyp<^6|?GK(k!UY3B}1HjNn4@wwi zie@cy6hFD6S62K*d{8M@pfsd4I=#TBwtH!#?~cm4nCnLHS3!xWsud7H36vB&P(Vz) zM497Lk2|pK(7}Lvi>|}XUL;*B28O6#w;I7epZ#CKq<1^n^E8{GR$Ef?D@PCNC+v~5pP8Q z!&jwzgaA&A8iNq*7PWKIC>3JZ_qi-RwoQ)*MW!jAcf%^R=a0_>r%n;=Ylo0;1$X}9 zX#D{-_w|Tdqe&Si_@u_q5TSrO>)fB#`0NmU+O?G6(~HYKw~e|$RRRcW8Exm)A3hHr z^=we06JR8m6l1`LO_s)!m^AUj&dpPB(&7lwqbl8ue6G{WUnRT#`NSIu<-@Iz;QH&N znUR+k1G7HpO{o^7L(DOJTv)=8!diK}izrm3-|?D$m#C<*AmW|Bm}@@5sMaLfRwHd` zWDRGO@STrfZuXbK=kDkFQ;XODHsicg?(yE6Qca%PuAB-Ch@roSsr<8ALx$W?ZzR^| zKDR(r))#$*`aSN0k52ZItivy-O;a~+!E2Dq-~UB7NQ6oaMe#(7_$?!n?(kwGnYAWO z`RD69M18{yi~9~x*vkHQgNc8C+VB3<9wWk51yT+G|3uaA^2D^{8J4(jZd2P@Ev2vZ zDSLfM{R0*B>u%_@3b8m8|kyXcM~ z1wiMl$C!Qn9iic08{@Ss6^SZ1bpb*9;*Gii%3RlIv+>l~j0aRnh|B1Y{>49*C4&iK z`J6RI!75&QiU!A5OAR)w3j0HpiXRFQqRI~Y6(a?Tdk&lEF)!8)OIjgKqbZ|*E-eYV z7!@ePd7(ob$UEa)ve`LqJv#_B)n3RSEE(uLh#;!m!f^!1pptJj@$1`O_^9ozA79uR z&iHbKSNnSZDi-nZ7}*aA_P;(@&ea4vQTOn8)UV?sDQF+QL;u6&zU7B#Y3P(Amd?iM zuc8jg?*g>-)2`b>nE$YTqq6!{>x1O|-=Fl~b^31<{CBrt{Wll4S9Of9$Q9PD7kC#zWH!-2nt|Q>IYM=2 zT-E#=AlMs&ImWaNQ1nn70Mhtk6D_dubfE5uz^l$~4iafuQ-gV$?b{8?70I_>O8RAK z*Z`6oAStsEb6XCxUUHx*A9?{$Xo*0OQHB&Ka?2dSdYB>_!4%jS5WRObz-)Yt;@VXl zM~_LPL;{SUZ|fNXI6yXls_*i#au6!ZaXAGe{Io&4aH4r#fUy5~5+E_}%zOq!HUxZg z{!qJtvB*b!$st7pnC&e-(FdS%4vPUgTLXkyWc|_jLmn)arVshL^_pqG5^;N`1ECrN zkk4`e9D_h+>b|w5TRQb%m^l(un=k@X7`NXFPn^HFgnH%$qQ@|@+1d7D*kNO0jIZru_h9|;rcAymkrY+F> z)2a&vB9TS=I%!>jP zds7gg(_M;j(Dg?MJMX%)=piJ0IL!MqPG}~8w7yYWWW=aj%-co)ucQZnlp5O{pffMs zlg#Dy^7)>eKCq)z0dvojCt#fv0pOmvXk|~*?UL+4;zV}|9;2!sg2Igu8wQd*@raQ| zmfl8(sVe}#EmB^e1XqSA@JtPzdvK*J-5#%7z1rmVIs=9-AHo3SftVWQlE4dSEz^&- zv6N-qBSo>J%V?jLY3O?>rNa zbZ+O~u}XaZ0iCWO{-`XceoYmC%u)kJHEz673%QS2N9r!ro{ify8G44_XKkNDwRxmH zMDoETGSFH}sNfY)<7_~RztTmZ`Uc0F5hfBqtcQQ5jC5<@p336445jNTfVZ7bQPBWQ zR6t_6n7O`8KZaml>f~qgOeLb?@QUCF?1&(c6P}b*AItHUnRXE+Euj8T*7|%W<-MM%L;B6=j~LtXy~MED zP^FYt*UkX?)Mwl$cpT0uosN`K@4HUS2ynxWk5{wfB(4RMdW*wYv&nVefK_%FOTD2H zczzmVzXR;fqnPx z=9h_Q$pceQ6v6fB2>rv_q3hQK1VOC2jcE|J*JV(fNR4{SHmsGxmQg^?k?e?wP(pmgSCGNwy%wQk~_FHhT9} zFu)-3bf_t&a4Xye6dKo*ExG4K1qgQRJMXjA-PMxS)5i_b!n#RuqSL@F;d`2hg0u!V z5J8!x8!ElUSTxO4D#*%9+=MaiRU(3>x7I|a*2@e)xs^$?Un)lc9Y_r*fWU?D~C%N4bKV|6l)!|;jn z4y1D7<=dCDqb$KCo`|&6>=rYzaOY;?Yyf6}3ec#KM5J=_X=^fACIDytcull*v2>^ZAt*0yZ+(Ao zF4zUu^vz{jY0v=F*a6JxSR=mpqydei4!~rrd$Lc*j<8a?AY8OSEaBpP(UFwf7Zblx zh(@}Z{`Um#f20~l9zPfktX5F_)5rQxOKUatFbx0styi~qfyR~O$HZ&CsYZYF>vD{v z5)#mORDB_yG+|w;;FA{-v(lUi<=e==x_`iaz>zA#0YyI^Mi!}3zqJ_ z_9?60TM8R^I>m$KdlJ#f3oYEHJPy-}!D2Q6g16H@&k4o=>7V(LS+Bf@#aru?pfU_> z(p!4YfEWUVtzZZs=&s2au|Edxk^31ApTb)X$(mQI^9tMsO^{D&MQN$@CT}|$4!39Y z5K0ZQb~t z{OupCZoBQTH8uDoL2X;9W+(Xks8vmfyh0lSuXS2y0QN!g2=FVYdZ_^(ug((yAE&o2 z5!EWdspu*4t!4)x*F_1PNduNSne7>HI1zUSj8AD&ohzV1RyQpJ&}94?b*{VDrNrG;( z9qMlYj6I-dJPz^N2SCmg01{0CMs3Le`;iQ^2~t3rIY{od7!#8ZbW?3IE?odd1f8q} zA>jRZ))%!KXvkA!he<@*XKb1e0#)&PWZGD3}lq|KZxO;UhW#%W*Bhop_l#E_})-Q!1%WPNwj#D^#r zNW4(s*jOMTC}zKQJ?hN*P{q_o9g+nu7B`kN0k?|jC+l-`r)VvzW*zI)P7YH@2(@@n z60mW@=zRh>azLn06Smh4&b=StyrkK=p4#>{16&%S&{Bt`PJNbudpLCM$={g;yaD}>wXyVpch7@Z<0QR>AC1|5>G)~qS(6%CEcqoA|3Q@>d zx&RPtZeaFdlHo??Wc8!Ogv|^gJz*M5;SAj$OkfI`P3qh2wuB1=9=pBEz|Ev61!5ZZ zPj4;GF;W2|M% zw~2s@XR7PIO1}>d0MhPbL>7#uJbT1!K(_6N&@>YDvipM4hcT^CC5p67Du^RX4Ku3` zYVr)KSeQbP*^w$i0!;SD;bUwZ;2CZ4(;7KUT090)a1bl;d<#EnJMpiEQxgEZ2kI?~ z(Bw|0q6QW$(z(Q5WUP}nOJvBm2VYJ*jq2=A$A~Ts`;cs#(?YiIn2#L^wvm*& zDjC?cl}5SHq`!bPVGZWTA%e&oGzrBCdqbJg5OkX~m`Usobud!K9s>m@=LR^`Q#t3kHr~@&o?8|s=VpSQ%-IGm_^ zj!aftO{qmG7tp1#r$b^BI?LylHf1vsZ2w_%ysSC%9X)*MgTGXA!vVv^@FRt@*Cbd8 ziz&6h1IWm}QN3)jFgpjbaG4dx^125#NpoJmH+FPyPi9SNiun4ZO?Yph)$Qj>TdcJM z!ZgQl3b+VGzMD!qHVt0c_?%ZLD<9?VygtIrjeB{a!69P<2>y@5nWu!tnDK@B`XXqz z%JvgEKfhV&k2O>o?jk7oPz7UHZZC4U3>f=455xRuPDn>|ogxfQfP zfex=#*dZFUwCj~ych7i9Fk_}!5}QJ2rQ@4h9K3t0FONf=%$c1n#~m+l2~DJ(>tzTFZ+&muKITLk>Ga*~+#<~3o)I13`9 z4|^lcNc(C-=tXj6@11r$3Sf+jrP+C69?$FLCK5*meLITr)$#ovSawO@?8^+r$T0XPlq(HZs%3 z>hSbKe?0bZWiD2%oOZXOn>=PwU|Jk)%pX+-%pU1%U7Kro1pAvOvKmnuhL(k(j}htw zC%5P*v-nKV+EG&!W3R!!{-e~)@SO9gr4W}1UN`ILw~aI7{`F0HAX34o^zF+Cm7gg% zRYJ#;jw-{jWx!B#Jj!@Brk0?*p|H>@r}^zC7vJ=P<^joYpQc7z*cMjZ#4gUj(z-o5 ze<$lfM8>)Y=yC*&QC(M+S7sOUzyfyt9@-76eDwZEJF;_x7q3V(&gJc@21NaZnUuy~ zbA?L}RB8=)i0qXF2h!!WS2k?bvNaVYFtwUXHkeJGjWY1GmR^0ULG@{*^db355~l9* zL!1_F`}BxP!=`r<4X*dH>Gezc|FGqLZAK9%5s@SNFS-6+t{c1hMHn7JW9KnmR6mFZ zJR}2;#J*q2(!#pX8M;%LA1g|nhoui^kp7Ukecj1jN(D2DQ?uP!KeFPN&J>av7kT0t zjbF@OxG(0`(6hu#Q(dH8UyK_I7|kOl7PZ4`?_MV*ON5t=%RZY6boj2AM?Iu=*@$bL z$2qpnO@|LJs+xZd?d40YK=sI*V=mGSJmX;XTX@RoB^izu9W#h09S41&s-^cJD_7r= zt6wUb(Uzua`$tTT`99DV2pz)0abX$U^>WU8%*TrB`f~-BE{#_uVmRrYrF_JPG9`yM_xptVx^kY=xQ-U;(DG(&xfXUkIBWC` z6Msk6tQ1p103YlgthoRLDUH|RNiBHr!3Nq7+TD&q$o4`#21p%JDH;TDdnA#SRO7ZJ zXUyOM?t1SUAZz(->5i00Ya7ytgN)N|V0<7nZHe>-CDT^vYF_Ti>2wk7UU!*MPvu>4 z9Zx#D8SS70`^(Skv`0+mHP5^AE7pb7uy@K%6@(E1nv@P z8}V^w6o!d&Gw1fDyvxsicuMzlP+V{=`Gq~9I`+ANc3!a{L4Mmu2Es(Ax}Oaq6NBAT zGBePaPR-zYd>U)4y z5Cq7Dpdk-R>5J;Je8L!)6czJhkWQkqkgjbGpEz;pu%jZ)pe%O7id_Wq02CIq(A>gX zYjQjd>nv{?vf5BMKZ*O#fgVi6fx{nyO+))6 z92zpPb@DA~*r$IZMU}Hyyc1mma%oc}iFE-o-z*WheJBBF-8~A%b{OnTJZ_`EC4Amw8rGO%5%hKI$}?`)Z?D`@tvQ4$7xm zP_|Us!WT7kCG0Ad>|G6jR1SO}nZ}OVK6JKUWFwX#Q}RU^zmFi`z{M0V(Zb%pf(zYt1cG;=I3!`o?(mOJZdG3$a5t0@RP!mC%VHRZ7cVW&bXZ~4wovmv#m&Z~M29lW z3q-q^Y(ELdsut-eKJ&cWS3x+*yQqwh*tS5zg*v`OmHas z^s9-flzWqh$kPuU!U$V)Ya4Nkhi59)9ZQt>NO0X6M z7T*fh#0&KOuu!v)9D-k1rFnm9$8gg9W`;MFU9AzxsVHi3;WcO-qKQSQkTP#0!;Cuwy)&=3m#=wC=YuA6WC*t9bR8i}ev#Yy~-YWt@> z+5k3GY0!0mCNun^iHWU3;P#O5 z@*{tHD%HDC{WL!!tIExW$0NggUZCqE%k9Rn7tao>HUbUw7+$I$H6}2-?bmWOlB9`H zoV|S8ZeO}jZ7`1vRJXbM4`|z{mAXk$<+D?3t$swi`DhEo+l({rXeOCc$r(QfC!<`q z&e3)~w=phpFwzQVMPepw(w()*zf=QuCyjj=JoAyk_rmRDrvBJ>HzxocFPDb;V9WOy@b7 zBy@{|djW^vlA|gwW*jTw@STvC`=Z;S;Aj$0wl#b0tJT=V4?=JJU4kCEho7}Khv;>? z?j8niU&b$!!3*-LY-=|wzHF)sw|hu7wG3fou<@+B_UAKK!JBJ)?ww6R^F_Lcvv$RW zT#L?eIAxN>W^6)&?P;9C_#pwVHP(7FCg2jXC+N>T>Ti8%TbRAj3y3k7)EKVN zKsv(~%opiphJBVy2=Iv%^SYCmdqOO$_tXe3r5wX&8QTO#VRX}(tCo1J)PcPAT;Bd= zMIY+G7U^QZY)?F^K8$IP9WaX1i#i0N@!YI6=NAUrmSS?cDC%+B z5i+Y0`F4*E_FhS6AWL7l{Z;5JiJpz#tPagQ!Drt*k1y?DqfZ)K4HywS9rZ-8U{YAH ze7$hEFT|#U-5P?*QKO`-@<|TTHZ~HVxp+G_aI*e%b}g?c^46yn{l#ZY_}X9jqlNO0 z*MMq8eXy>wTXef}!j=D#n%Zk>7%lU1chw;Vn@bxi=fm3AAm~=uLW-2AvU6y*d8rU* zptuz&#+cVjJKW4u=?ojK&Tj==xrQ4j5?_f$nc7M>300filcq z{YBLM@FlTz0>2Kg^!SoiskE(9mFXGAtw+0{TOu>K*9~lT19<1^6I2hVp~tg{y0o)7 zbQZTu&z8|-4BmaCY4Jqn*@AvZF-zbDE zQBb%dp)4u+Y+kXqa1^rn$hSJ*fL;rgEM2Je^7|<;n9|lA5+iHxugPaeb%Uh9KfInE zlcHFFX!1Ip%8<=dF-cD%EFWT*fC?F25H(J_h|BNLp?NRt0E>*^O(#S{ozUCUNLJXVv9o@9T1}a~nL^L~#N?D*_D+p1 zRnje5jVMH1F66*UUNg9L?4xpoFkrxafG3M6KCdzLuKv783z#w7*>wa3icQ{Nqj%}b z7orFL9*z(Ly8cY&t}@{s$Qw|rXTaXaTDXAIEDq`!byV#MC)a-hvQ;J zF_SKFh0jz$C?0?EkQPtGtQ9g|NclQ>VzWQqmPk!V1ygTa|JhRO-% zy?1!6fkEob{bwKd?@N>*b;!4ks(IEnbK!NxX5-;&TZOb#wVHZNrZWiCK$^t7-aeNK zxGk&q!wA_aU_^hFIB^GQvy1@!k<3HJJ=o80s4Qvmk`R|rNvfbW%#d#LdpJVAV+*ln ze*SqDcicXBUA?Mq!E<@V_p)#A@f%Mk#QAF`XxbSE#UU`$XraGvpzW)CgE{x?Ek5)4 zSv_*ZDqdpt3PGp`u+rv)dQe?mK2|qhDMt=`;*>wjw7*is@sPtHhcsG^Zh-PqVVUCr$g4wT_tIWsCfA zKvoxFZA$vNkfcE3Mt&P@Q(U{f^h1--PcmK&J7A1aJ=_T<-}httfLB&B@jWZtZYe5n zf8LD>jd2*Ne4+WgJ^}uu-~}gehmclHo((;^CvzVZ_I+}|n)X;Xkwqu=Gey9v+j#23 zgGbLg-Q~X~_Pc;i0}Mz$Gpc{hhH4x-B_w4w)YO~M!`7>b4<9}$(bdY74DSA%&pb9<-ecOWTuUvo;}Krwg#%0U_;7yb4U*PCZYAdAS0UIN zoLQknhzdOkqV{>6?=%F@z}QNL23Dk9%@Lc}TNyn?zxRO}^{0FF5$EbdN2ZJhH9aq< zkVH9%OZrQ^M>jAFZHeFMh?$z;!!xIzu0i&}Fr>!{y^QGnSI`ZzD64;f4=?q^LA#~E z_yF9B50kz_#L4Al2o!h`kf!}C7R@q74xCPZXrvg|+%3`+(bvNZ7=6eR#}rLEG$)8# ztxPa4uK>Zl!W^S!V(b*EidDYGX)xfe%6AA1nQf@4K5eVk$uj>WVV7vVIT%YZ(I0>m zZIQY9sF0%YYn1%vNC6%`d9z{oDd&Fln{945JsNG=xng{oVU1B=&of~y=>-3*l&-i|neXANt>QVg>_^GHvKARbV1b@^hSdQ4<*_Y8Wb|Ha3of z6ZssLS|~9s$ZS&z3!uihp1D5vq3#e1!3kW;>)57p3^O0dlp}b z&!)$|Bhu!wX)&laqAubUCw&)8@v+ZIc+a4YHYu5IFW4|JRpdY*>0rmP6c>0LZyBV1 z*m7kN4F|f06ON5@K+X?);W<5xYeHYStjffcF`d^P33PeOMx5Yv|XiOvO41` z7yhIUDizN33mFuL`b9Lt;O4TQhr!L_UO>4r*?-w>m#`FkOFlPK=eqX?+RRG^-I;fj z&dZ5Z1J0+dEJFQd7WEV9)+|3ck?GvPuM-$m*7PXWGP6Ja`JHy4&77aE>yTvc+Z+m> z!PG9}W%zNRkgqRiAYE94^Mu9?Qx!dM5qXHOUAc+S7tMe6R-=gdfZXpfr{d)4{V>~G zcPK4-z~HX+1#6#P+}ouu7&y$*8VKi*=qHq~`?sH6UWKijO+^idLL-CLQCrR!Fcayv z?m9S*#=^`<_koUm#KE(FN73Pm)@k39uuu5#43vL&M=+g$jj5h`!PVutRFmCDbqf4I?R0$N6oewleLBW1nDRV$=vPx#>Y3nkJi* z#up!8Zz1{Mr`~nY7I*dk8d;F^POr&hh}RUxt$yEDi;4L!JIQhljCGVu#F`iET(UY({tQO}eOZU8Jr7vUU$zMkwWftsRc>>{PF(0_X%0eN1 z=2M-s+~`pMJDLNtxeK|c&RyUbOE3QS=@`kLf6aEifV)k9VNf6BPs!^WEFS7wX@t!p zOf_PZ0)7{uF21bz*Hiw$i1nhbSED;mm)U&K{ZTH+0(|mS&SI42<`P*_4iS~<57|W_ASUm{2uupeyrb~$1% z@t-VKh|WOfr}5&3-Mg8eP4Z^s%xE-vbi~pVX!py@5h#H0#GC3>VHfwVQ9vAuW(0k% z?BHUMRr2-e!Fe?a`HhwqKCv|WB&wJNq}L~_S^{=Rsuu|C%_Nu>i}6hNB}sY3{_9c@ zdk`_3m$$xB82==k1YZJ9eGlHmqgHFx0kE4xJ$&W-*xr1lza5BEX8HL>Wm&)7NL_r! z;wUc`F+{+$cK@gT#NV0Lu7!w{--B5-adR7Bg<200kr6L;y(UVOM>A9)l__0)c={82 z385%oCtjLV*Cf%2!W>NM`2FYSEY|{XXqJLQaq$ZfhMUl0-EEFvt6Fq2wli=-SskFg@TFNpaAi>snFn z91X}a1d$eXr~Bc6l#B#-bP?zG>XOh|p@MFf9~B_cXfl7;G!*+*rUcrpyoNOxC7c@4 z^W7;teTh1Xggf}ebW+SL_X+Gp(3!)`OKA4)OPn0X&sk1C5u%!gB@y1E%FfIO3SLrK z<^=`q_af_3K?CG2)J~>EWd~u4u`1kME7b8C>irFyN1{`lzu0>Z(@vu5ii}NPgzj@5 z^%V%RcE6uc=D8$syA$+WhOTD^+J54K_9`(FJCAyKX`2iB=K@qfgQqC-RXuyr{G!!6 z-&X}Ix&nQgC7MyI0y^hlPER%;Za-dXroM~mBliTD2=&zWRs5|xD88Si5(MM1JhwiU1*#MVte@E@knee1w+?}3 zW|>BS8U_t-nT{2Iou{= zayK$#a-A(8&>F9nlSe_QR+%U{5RDwW$NZ+Dqzz?FH)JtW<~WH*2;hBffGgrUmQ<`284xp1Z2hX>&o zk%aM&HAL64i7DnPUxfVWQ~Vz9XZ}NY84dN8U1?2wMXs)AdR{u zuS^KxehWabm&}bpyJyfMWVZ0g@J?Gl5+=tUKsu)Lj8fxB*Kh65TK2@(iAbyg-K8x+ zgb0hqr6g6V16>1Td zX0sU1d-<;P4EHn7omHSJFeccU(@IWx9Q?$uOKH|iX>e)QahZxALw-_qA9WA3 zr9mev`3TJex6Jg+;@jEHRUBRcl4UY`zM(EF zKfk|6HGDi>eDdwYac0$K>Q@7(?U=mQqPd{_vr(^o=mF#Td>CuwVpL7SJ$!PLn(ZiH zULy6GJc+8OHb819kA#MoCyKd;7I2fD z2Vd*Ty%9S*E-%eb%G}Z@z_mM3F!FbkrSufl_{cwE6v6`}5M9w?m&UU=8lEB~kfkSZ zg-&KCffD2gx6IIOCV@nfJ`?q=;skr z?|_8mO_5zvZMKqx5xOvkRv&{cwumJzLsR>m3r_(1ui*GJSCnLp{n)6zqcz;QA(ox1t~T* z^`kI61jqK_(@`6!9SzYNgrOQABXX1jRK9UN)#5hn>S8J{Y{_xKRQ{R48EZPIw)Z`y zMkrQFMOX8jj1A*4HiS94{tG)U?9&!jbix3lm2*>GrxoxZ2(%zjFM`ow>wBTvd&t~t za~K?V$}(Zm^lpuij^29@RrA|v<^2Sp^Y_?ZlT`}Q@b0-3O+NWSB_O6X16Jc5s)c-9 zqJazAqO^{Sc}altewoT}8_IZ0(Bj-68y2%SRY?|@iQp}aOIfy#IW8D#`yF7apoM(2 zkQ5FckfLIre=C}Xpkrijrmn4$(q-Sis83x47 z9c8h@jv;%p^f20}J@QA5mBYLr%UAld&%k>TG7K#uY z4apB+mJ*{RUa3{Fj*dBTPiUY#Wn=|RwE&Frw%!G=u>KrD`N7=1)IySihL2c8MpTd2 z1O~YWD9d!PaL7;QYcxyjL&TtG4JSgvGadvc=g^Uc zgL#aXdr8-mTC_NV2mR7hUP)^emi!@uvBL8Cl3%4QX4=^UG`F@`{i7wzp;`A;RWQTV0Sw z0?%g6*@K~w_rOF!V&`^#v7#MXa44O&d>wHd-9N*A=zb56n93CW2AD1PS@EQ49XBR7 z6BF{JKot$=&rvam?M~2m2!4`1Wn-<1MHfzGSe;OqH<5%$d24_>Js6muTb14y{uz2- z_@o2Z7qrKhzr+7lX)^q)H2Dg^&HLOhOdPr72$!ChJ$C~2NNtToE^l{cW?7O|_n^&& z893lip#j@f&gMSW&b7uef=@CEe$eOtJTN5QjJ9h7uauQjtu5v&9V#+C`QnhKQyvPwKw(j{ zFt~H?+Pk*AEbWVMyQw*}0LjkdjmYNvia>Ly+-?1&Boc5x=w_s6#?mt^Xh)58aT^8c zfQ1e}l$YvA=5C<;klD}vOdD=|Lk;b_RLW~#cn3;68s&2RD4HEZJa*b6Fe+J#>N>S> zXZXHJaZBilgq0RCjpAXlCtgeQ zYyoL5U0^`Z{%MA?fe(&rB{W40Mml~{VEAHwpi~+pMt+=wSLxGPbJw5SEis%tnEI?- zou&xnT3LuxN@){Q*C8?+e4jEn<*2RZ_6#PCMgj8!P)_Uz^Q)~N)f{EBQzf0qUvAJS zk-yo&9BH$8OLhC6OftJ>x2wzq@yEEdT07s|{Q24`NS_m_+uEnxJvK-7Ta`tTAdemb zRBW6pfOGxUXT-m0nRg#z$NmfM>dl1#0@bp~cRM>9PvmmcrKkAwIl zaFbd|_t6D6DP`R;UWf&LfX0ui5e6vNnEp|W>Nhcggfg;o7JSdKF9vCoBkprGR-aOo zQc4t|^B+b&-&@shq8wivfR-JJc@!$LI^%5uPqQ~#Rdxm>I0Z{G9JbSSA|7_+GT6=3 z+8C6%@i^Hi_bHbF@#D>NZMyu=VCbhBCm@ob7z<$Pi)9I}5m>F8hyzzD=Xi-&%2k5$ zl#}snufLC)6KP)K0Dq+aE4XdGlKA!^g)&4d`GRQ^mLP9l(I&v*x-)A>ZVYytph)x3 z%a{@aqW9D~<8N9VKNj!w*sG|LKFVWXu&llHPFd}M5;9KRLb;#$@OAlhk8S1|jY#%9=TY2WukCOn;g+%kM$Xccf) z$6R7rBBfX4w$CC+*lA)ef-C%Ag}a0*Wqi^ktj|_t`;Osiql4l>j!(X=J;|1_-|sw$ zgd{3pUNmB0h|!@u@?y(lMD3b)KG#+OlC>guJV{6`C0EUcIFIe*PdZ_goXNTL$!m26 z!8z(N0K~syk$i+-PTycPPm~1oNk_t{&oCq`pc^WiZnrM<~p8dilT8V>3i&wf) zZxmsDtJtaQHNDI@d$Kd4NEnjolL>6Vj~(}Ao5CqO2d5#^8@iiqW6cFb1A2v_wrc>j z?>2f?Gnsv=eFIS@ySjTt2gtFTnEe8I)2a_Yd4?zxrXqwbAw>~joc(ujePEouqyW*w zrTT%bD9bF;Xt~3YaALf%T(9ACdDsJ_txbTs>GJ|DA)xba)S z&0f}oF{Tx0cNMo1bjt(MVFZ2CTfjA{`sqpMNAHetssW3H(~f*G5Factkt+o>0=Zj7 z7Ci0|^pBIlj6Vrz&i9i$v{Nq)NDW&sfvTxZU915)A>p|(;NcJ(b>P5Bj$l7`%f`2Q z0NE>i_{aj(u2#vM7Anv00)2Jt0orwN+7GDrz(l_qZUDNpnrZ#v2v)1^xcd)m&qp?s zRT(-T`v8STkL7a1uSq{#A(d}foj?Vz4#7kLWpD}*8$HR&-RoL=EdN6NwX{a~9XWOE zrm~9kik7X1+d!E@UQEc3)|IQmQBj983{ovgaz#Jx;YQ6Kw6td78tKN-_ zL9V=)B$aAx)A*hc0FX5NoglG(rZ6#=5BE^?Z#| zmIum#ttQi*!lrj$fKf8~worFz%Z>5K#{e!jbf*t%`h60a7wjt5rI3CDWBWIC`>?17*ht}@boeCrT5cHN*rP84u#52q^}EK% zQ})8%7>Q25;~Qt#j%Xv4fE47ibEW$9LC@>r0B<;X&@nIxTcQRXkN)%{^a!M9LzQ4! zngfnJhMv;RilY2=_C^0gvNQ`gUY~KPX%Db1ahf!IFqMjCs!@cyK|T0|a<>}*=z1mV zg+GsvXN@Vgj={A9+_1Khy4PLNcd=Co>DG5uhS`dPZEv>D&bNeNPd`hVDeh(LI?MfB zuCNvdP^}-2!-uN5-){B2njc|E6R$GpoY|xF#?c=yHn<#z5KqYd4WJ#K--D>eNjEHi zzL5#f1z#7WE##}lXZVEp0hKg4oZU`gUz6xySOlEET5;@9bDFB*9W$1Olli2igWtB9 z-=5ekn8iH;-Al~U%H*ydK#aIsQ;ycmi1!u2vvb#q7^AKxNu!eXe}`F;GdybrF(~925G6A5T(1jyE_)0i|&5sy6^4#?ESps+v6Le!#JMa>40&iHv9ujSH1FZ zIwqft`nI|mOgTD1egX3^VmNX!2rb%OKI4fJv%~zinV=6AW^2{+Hx}m6$ZxlX!RP%~ zQOn=oC^P|kNWUSH`4r#}WhaIMFsl<%nKSs-PpF9C$)MW;y`;pBT9FpN?X2<7M3|f? z_Kk895W^#>HR92$U?S1y*z}_bT*eStFKIpRl zj9qiXkwZy&nR6YIKQsK+J5x3M=gWWo2!*%ERw){9i&;MY`MS~*@X+#Pl#40BLmU5$ z2>4S4{6h`^HdL6vR4FqEb3UIj8~`~M#&L|=sVWU1^K5z^U=;=cu8IXVu=iG z1})N85ci;CF!;lmNpdKKmvi{C0UWc5Kmte**$n1#X+Kaz{|$#%cnn*2pJe~@r7FFE zqo{0;YFgcKzy4*~vqbIRMIq+-=XZjAn)gm9_*ExrE>s-<{BhX7+!3OaEJv z1ztMxvij@)T;|@t1Dd7zZxZ6QH1pu*3y9b}MbqzySPX94n@^)UurAJ%i%?px$o})k z=I+DZ<$`PE*&ni-0uNYS@!=G<kG-BFMk(=eYRjHz4w1%nSA1b z7rBi47WC&-6mmd@1r8g4*iYR6V1K{~%aGqHl#2s|gJBI2#uW8hK7pyF+$c(>J&ZcH zHTfUWdBDzCji4^Qa-+0D5xV6TesU$07R00=Z`c62pC&@MX&bp!?pLkRdqs%5l{ zMj+61=p?U@R3Ow&9YjOx9V!!bkkAE^y5s`Ut4@XA%7N!Y-8N`avi5;m1vM<@UFT!s z^`qM#cB4``mXOA%X#Ma|r%^>+wnFKuakHlVxStF)09 zh11_E4oTdoW9M*%E|y<&7jb2B_;&Cx z!i&uFb_77A9elv#A!^RS4hl?kx|Wt!01dXV!I%!79UZPAZk?yb#PMK-rmsI}WVQdb zt4{GiDuYZWNmbG8i@|D#Ra!ZA(ZDc`jhmzc18_I72X)yTI=vj&-bvw)npj6Kn94m@n->9fEB3WwS2zh8@)w1wnyI|9RPMSHsYEiP!d_T^5$hUg51vzc4($At z-Bqj%{4ZyS6DByW5rh9ZLU#MPe4cy>B@#5|Tl$UCsU=f0> zI?a#DK*MpP)7y`s*8sGsQMZk^$I=J`-s`X4UL}ehnhn!p@evW{nY^V-aq&>8pcbi; z_CvvWB^_OPKxA3d6-vrV5hTk}4L_h(LaS13B(+r2ft#Ex%dz;9P`JkB++zxqarq2+ zb83+oWKst~SRa@r<^>_x_JY5<0e~edP{+TEUz$<@=0dC>m@2R;5fXo#iQYmGT{nP_CvZ4aKm*US1h&`hj9Wn}&T zKDEfyy z!m?5IC&Oe1f^xkmuak2xxDtYEpdwlYwGUq4B`ncs3{d0V{e^-AZ(j+# z)gtr+a=i3U{HsPbj?C0nm^HjM<=bY_C7dv-N51ibEKf-68i} z-8y*%l}sr-#01GR#f*~4C99JQmBUBvR0AC9mRLkhv`@gIz;TT=r-WY*0c-5(YRDPs zWFqhaJCRrfG2;Uu`LZ933LBO|kao@AiEX;4)bJBn;B3p2C~{}FsVCqzaZ^YXK_Yj( zxEZF`EM*H)|G9s?@neiPfxamu?+N8m1vwTUDN(@R+pkmtw2}E(`X2x7xO++on!jI3 z)fG~d)#+t}GL%M8V76ue{Jfmia%k9#7xd#bWr*Qw!Gi^+0H4OdXxN*QNx48xA7%t& zJE5e!9>t9=cx!cX)Mbhi7$ru)1FRJEK;#Kd512b66F(*a^ob+B4k-|%K}Ai9=?Yqw z7kCT6K?c1qok{YdiBFMVB5-d|M)5Nx%2_DEIL;~+N zj5k6Ftl;!n*oLpzW%-=&UpD;n2rp@a=dh-7sEa)~-Fq4V37rkpRwECQT~?<<9b4e8 z=iIiW`V5QMnojPAkx&kJ-U|pm&ftdn<)pCt5t)PlW%al*2*`SprOiNoM;qm5KMx4h zNdQ$!2IuA=oB8ZGEDgx4dO3jS+tfWsJW9w6|AtKfxUPW)N5Bu|@m&?j2SZ#t27yWQ z>a={C=Gt$!N!nupWRL)Q7K$zP@?RiTyyU9mak>_mB66N=4sqt z8Az@hgMls}iVIxB=iYx8E2{_8inFM>pbuS`*TmHs5al8*^K;-}r*m7q{>e{0rqq4S zLKBIimAWH_YZ%w#W?*V-4a;@bDN2lep4oZl*~!eN_TvH>QN*HUtLJ81x2T zA6U_7=3c%>8Tvh7v3?ZB;e7nH42aj3l3x%ma4tH5+AhrL@iT$dP_9@)JQX znH3ASlm~g8URE|=<3b|P2-%Pu!@lv$T#v~MK1M<-UR(r5626vAy*nQZV?Nru`-@#$ zH7l^1p8rVC7ZE#r`b*)(TR*YG*Yn)@u$F}#-TRoRFb5T7v)`O)b*HV#M2Y^tjWsc%&WQ^GB@X6ECV7>3&#=V0M@Y zVO;m11=fMnzU5?lW8GHW#}(cA`dGMu)brNbvSGgd5Gn{tk5*{Z5EzoD0yWs&ac_y3 z6=xnx%?L&<+E#M8&*l;sXP-@yIDdHt^OSj9whIJES?KR}V_v-ZjOe^mf^Y-C+SQQM zHx&`kWv_0o*v-aq=C3G5+<6&-MFU2h8ziB57Z6X#T+Z<^^r)Ap2L0zP_oXktoJ3e+ z0BbMVcD1qpwZHAESU7B}kU zjL2s_;>V|hP4lS*vr5*stGB#8f0@I~?I`un0g}?~2zvF!`_=@3tF;*0xU4i4MwD{I zV;xP5`O_2fbKt#KRRfS(sZ%~2i1`rTiOe9W*(t1Dci#u{nI7p)m+9o-FHI#1RR92C zjGu6fRCoB1yt~Hr_4Q&Cqz<9g9+FO>t>pIjw_>m!oeWCJ&b`}#$tT;-v9Qxl;SMv> zofx{UV~OO-r}Fd55K@zBpFkuOb-RIb8nwqINC!{{AcGUhB z%bl<#gL@#_8UfA0!cFR@tdZ>gTvgEwxY<`!T>(adK0Oxy3R0NeuooJdGv#oS845yF zZARhnO5A5)GL}glw)B$7xvydmI9ph!9}zoX-B4A)!qx{sKs~9^PuvIUF^g-Wx>sZp zSAaFp4>$@$wWz9IbhDX{)hc0nV%D$Sn0tE_2&*7fl+dUV$C}*-ypJAe#grDTRf&74 zRfZXGj^t(wGH3Cn*Di9VQsahAwVc;ql+73<8y^g#Gq~nX0m~V}>+Fh8l@eB`B{cnF z<-uJOBzKn}=Qn{1ZQq#P=V9pKHUiLd?r~mqMt|s+By<0Isu8I~R-=bJbOdlSb6|{x z{mXav9ib>_wONsft-O-kcb_z3@Ev;10FrmZvZ~9XQy;$p0562&q^1_A;L9n)L2DD6 z14{04x9RD0@J45$W1ES^flGn$n8^YwvD4>a&o>@g#`JJ=Zgo0QSH>N)7Y8}XW@dy? z1xUGhL)}Dfjp7$s`jOUKvlEq}`#HlRe&1?{XH&mSGW%423Ma&*{kFxda+@~i-eE4G zllQ>L!K7T2TF_`BjoCE(%6a`YtNpB*>q%=gxmyFzuBwbfM7w$A?puRAhTX>=A@TIA zV}^t1P+-2AXk4Nm^G;*Ro4xZgc&z_a6*4`C`5FIvUGM~M?ZBiu;P@`rAQz zmD3c}+M|tFk-o8$^g}3$|)9 zDBmwvl=G?b*a>@O>%s+g^UL}{I4I_?-na?5kZmGT}UFUrFY`P z9%XVD_8d~*UagbwqaqR_Gs|(**)Ha`muSdHnb=%;9Oh`b1|j# zkP7Dt6$&~Jo}f3s2gW`UJ_@A06Yw7{0Bl>-@L|UKjk*|Q;S3Dlt5JS)$@Wtvu4LJ#-daORcBuunWf*l{Rdq7oOZ9Q5I zct@MaiF1;@jwb|sPQ~0?>Ao5VVpS99mSDWJ*6`ONO(i*1A{EU?KGAqBmtR~6IA4h{ z^N2V!B!#+5Bj%YA%}#~Cz0)TKLk{54)?E&g4ed$0M>&VM#r0SCw#O@m+>$35a8MSF zpCTyUW-Ct#P{9zP4%v?msw>Y(IYwm?xY(JuQ(f6Ok)Ttw9O1N7Y3E&fV6GF!K4zFt zV|0VfUh#Fm4Sk03nOfzo=-&*t+WYRG+_b2Ft7B&xfMUp0fn8z&n zaE~eNc7#)O<9V5w$m=Mq&>)IFJ7uQrseW?qiZC$L6ur6NVrd-wmL#0JUwW;x?CFQ= z!=kQ4_NIJ?n1KD-IH=97Vqn5vfuY&iWOM0@eX zh3^RM7C`UGU{b|~+;I(nf{SoBd8cUc`guz|M4x;MOYd*6HPf;Trt8nYeYSBj*QE1p zza2zM>nJ>V1nrEQcDo%3RQpoxDK~67k>X$ikRdY*0er6U+b>ch0>4bB(!1r0ZX_a7 z%mO^wXYu%l4*CV{nTXW>;t?xSubMSM9sAW5<$SaZS8D$dA;5}?m(LH~3Mto0sok)u zbFdt(j$-8EdUo}`$kB&M$L9J~SF)~lII~`t>~SaOZh;{>zY$rP`dCF%T6L5HT~->K zK94}?!qRZV8Hf6e{c(nFa@!-*k?G8O!L$h}q_KaQMG#Pet51Lq%bWjF;uVyznm3AY ztxaRZtip!X>LSwN3QX}d?G*6|{4^q|D{etu<9Gljh@y;N*Unc)L&#arcU|x#cGck>73{gSx~wK=&3e&e+G+@|Fii+z1TI0t^eFAuhSPXnG)7 zW4BrB7P@huUPVN*A_aISCc^CKT!rao2N4z~-X8of6-2eaG>>bgS7NJ;W$+}WR>v;G zi-DgmK4uDYB@?Y3gmPYM!6F7^$mqZBt1IbK^&hXh=;65 zk?|CfI7|;>e>}zrA_u!rG{g~mM(ud~DwY%LHVURZ`SOq ztTJ1Mfx2|RR{h$IkGXl{T;WC}!i7T_3%_fVY5FpAn3tRF;5JEp571bTnpHlh;Xsl;WN(B> zuK3(+uCgFmc1c2u>oWL&$ldD2IfwZqX?UB<9dPB~;Ie`~wFqgwY;G~WNM||jtg;6v zZR=N1Fdb4i9P%TM1|lwDqo*~d!UJBuetNx-d?nK{o-3j}|LZ%9y^G2d*Eqb{vq{!2 zQe#g}<6=#%VrdDvWQcX#{Csm^14U++D&=MTx+r2-fl$$zvtV4pK8I0SgIZ#x`LShv zO92-Ui*6HE&C-`wEl__8bBv=YtmA}`7o`?8sau6HIP1xJj2=k_v9f7Gr4TVPGW=nk zaU?ewB^aW=`ma?;2?I{nKy2@Ckn7}TKyvu8q6SQ=+AcEXl41+OH0J_1l_E00j2 zBhxj@A4>{~yjcc(16^Q{!p%uk_fV^qO2#L2+^MuOtP!}0iFgIFjH$5kz9L|bR$LxR z@L@UvIu4nHzk!`m_7oD5(5W~dR^$n@fF>MmP@pe7CNZJchLh2NZFVUTwcd*@SiPu^ ztXNIOYbO$Lb6T{8QdM>n1OM=TFS8ZwzMJzsQ#qhv{7UdX&fQO@j8xRgpuAG?J2UK7 z54aF5gvde4T;z{rHxuxHhRnK@LwKpNWOqThQlFyQ2yLm>!uSN+MphiBB~VqQM@5<* z3(niMsX;v%*2UVKlNH819Pct?VRu^2mb2aLSL|=VN#*r)1Ae3z1C;$()^TPeU!!8S z|JopxfR)1~eM=?psCXIu%$NFO&H!6p{&`c$YEBXib2&tK<$6%*`Yz79#x^q`XK}-{ z!YYTtslSA<9#WQ6Ur|2ot_*OeP6Jd|`#VnCO(eqpiq8k;^ch*1feL>CtHCA`$N5Dz z^U|1^*1_O012>$JVswq6eYX#n0NcB>nI3EnPaAb&$fCs{QhWI!Ty z(y{XTc+&zJD@O#%s(k+v4?*CQht_C#wmb*3{zUp>??ODnC7=^<5SyN`d%N>WS4xnosc}Hq`M7eY3aklR z{s6N5@Vggx7v|=K?DH*4=^JamSO>Ct#@eh@POmlLDaEXg%4v^HD|ZLMM-nEbDHpQ% z28IP0saP(UIF4DQO}_3>GnEf{0J|2S%Sul^rjvKZ-p0M`NP=j(;+**8EEgq;$X%=M*&BUVRQ$>^mSwzL^Os`go-;V zws@dGO~?$!FyBs5Ms-$|3i$$W0EvZz{L<7JT)zg=?oa9VUQD|qKEvc)Pj3Z2wuG2- z_9CG)OGqC=-+;!}n3SNN1vsHxXQn1)dDnK5d_6QiKQVrMfu+tQxnhNc{0ep3+1gog&yD-u8mxM56=ms3CW7; zCHHyIVOMN2VgC@~RSz%gqHa#$e&6`uk?1^Tf~Kvvi4r9^lefrOk%i8X6#C3Fk{!Kl zs;e%GE3!|JkcmsCwt%Mihu!>g1`)2T+RVS;ZSmF$Dzui>E9GnPa$-^kBVOs*@Mwe2 zbLS0L=UoOfqxS8)q8N*u2emGmoZ|>dwGmf$EzX4Gf@IefiCo4c#v5FDQkvyF@9(VF z-J2Yd5++?aI0qPBEyOCt@jZ;19{5pb$#N`EljGMMv4PrMw*|Z0dSSpp2$LWL0$DOv zPA>vgP*KoXS9Efu+2ZU}3rBiQNmAIc`1&{yBq1&Ykf=)zhbFQ?03(fMN@92mGA*3B z{NS+S)S|}ef@|e&bSIMGX%seO8ewCg4)h8##*pd^VA#{&N!JQNsiR(95h@pKTxh-1PWNXn>B?^ zl5`4;6h>vJs`uG4o}r-(b4@!9!@2G*1!)$kyNewL$1UAmMHaQO1^KdYDg+(g`$yY%T)ZD}lCBht-BuT9!e`TWQ>fPP6zHt|o-Ay_yJW0OT6OncyGe@8I zWGr*Pd-)`Y)AFA3Wd8=Cjk<2O;%?a?tcb=3oR5Trvw!mi5@AH5jYxwSibDR5h>Pl{ zxXJjyNXNcd7lDC!dO95jkFjIn^E%{vSJEKM3BZ&#-VLP>dEd^iL0i*`Q!)Ln4aM{+ z7GOppc!OHcHvTT~%~2AkCF!qA;2=5x*ll>1-nrJ}ijqp#1~yqf|G_khgH3Xb8~;pl zPO;bk%*yHuIVhfd!3~tIXtxxFm72GmNan4DlS^Eb)oTQXA|Dyqe$=Q{x~mmO1`mB{?$sym9Fg_thgt75UG?KDW^2xcMnSz=_e^cu4c?|kI(=&G0&=(S_yppl zM(VtVh0t0h?}q>R3D*42GmLh!@&P(Ps_QpuVf8YUis5%_$L)^-&e-A30LZqtv2A&b zmjIwKo)x-q`Ws|t1AcIi+kI;bcnZ??2xY)oeq+DxfTIptD&5}n7fE(x30SBFxK;}S zuj=TH?FBlfkuZq#D?ZDk#1PvI3&e%aQ`?#LwfC^nOReP%=Y>tYZ%CVvD-e%~0$q?9 zdKvW&*R~$YR7jIL_LDUI3jN(Tz?>`l6Xj*SGwMJcS$?uZAQ@&{0mEBv57WGi zBMiFoDMdK5)sCYb58o@x+o)qYT~HY(XR#L)S+q_d9I z6oTDXM(uOqa&p&A(LI(gaWVjH5lwT`={YKU!F}(ihf#*OXWo{(Kg$`MRXBC+e~5;9 zsG_xd;^E{vSb5GtMr<;H-I6M1g@(2VLx;Wl0e^$f_MP+)Ced5Z+f_0&Il)?ZbJk%V zfVxI51#W^&rzca3)r4O2c6N9Yk5Jt-F#Kc@%{rI-ejoT|TgingzQD@OHI%Rn&>gz~ zDcqZnNL(bi9Y{RD3vhSu&4Dwww_QWQ%UfcQ4-hGyex_ zT`D1DX+dcTyT=~7jk_a;0rS3>!{|^G%O^8LYFXlQEH(&NcEF#iWtiW&mr2fZE)@>g9X?;Y76NxuFZc6W78!`&}k;BnShjY7q3U_xu z>^8Hq!BcLhw@QldMeUo{pvh9VhBCUZYM4SZ*gW18@y?4@JcLXJ6A27+-tWCIg72~{ z>-Z+U(&2sSjWA_AFnT3icM7IlMGf#atnUUf`HBh+v@yqpm%?x`DcI!hZZ0s8(Sr`K zWtWf`j?Q)^*)vc6xf9E{p*Sobi z|NCYY7`jE1H{4$v5osN#hkT_g0Qah!Zg>m%FKN(ypY)z}49y;Ns_!#rcF8bhV`~6l z+1x~S$;fxIR7DXH@B7WDsb%0)sb%#@z()U=oS$R9QRxwFvbhP*!_o9R4HB)OP(J-x znUn2w`#?Im;v}Rhc;&u9Y+p{rSWQm-nD%GlG|KO|<;jfCBZQaZ<%_FeXSjrq z!BDEX10ZEKGwyOweT2Ah?+YEn;=&?p?H^}k5ObacU6wnqa>H8M#7`@A8^DT8w$4Ej zt(}C&4*8P9LZcdHkv89-)pi;t#q~Ty6j8wYp#x8w_@(cOe|(j;xf)&F?zpcDy}mak zpW!!ia(JNC$#55n@#0bnfn`w*xFL>liEQ~-j#`PR!pq6ra+yZ31wRDzs zi(>FUH>1ATndtjr)B9?LBtc5d>Sf5*Vndy9YEnBt} zRx*j|nPS7CRBReH;d=joV*uE)Mhv5)c@oM?`L*RVuYgk4CrxFofit@~2;!WAs_xyz z;8%i{rZjBBYAzOCY1DKY!DRV~T1-gnmv{B#YcT)&(B~PuTcq`2Vlvy24wvQrRH%tK zZZlU*GK7wed@HvO0GI97g$tDqWk|c+0ceip8kVe`+DmQcJ9}wlCGg{E#cqNaUb`4| zv^>7esU49Z^7r`CQyM`%MBjyZ+^+EeQ`tsvQLnckZX**Cf}X)n+DTIzDK<|N`R&U! zK#9Urikn2%dO!P(+Q&57COXaxZlxjONg}uHqyXOT#4l6!kB;Bdo9P2W>-({#TwYQ3 zWPEnLwD=rS9Ym`h{~pnEwIHHDx&wlwYFU0Hu@U-FKJrWb#--F)e`E$#YjKR+Wy6d^ zvi;4MS_@zfPb;^{WHDy|wZ$`u*4j594i){ry=REg37 zpAyRkVoAh@C0`86DkBPvc;v#1knB*vW*~4Q%8CM>vQO4d*{oMrwvMe(Dr=L$`W8=`JkZP#!`gvs|FAjy0XSGj z5OB`89j$Lw7=JX{>Ev94)<9-0qpmjQ9zznd`{I)4AH+Qk+hzGTrxAXree2Nrnf)*O zvTve*tFvogSPaQIz~rQJU_s)ywywQ^OrVv8xi7l6M+ zPG-@6k2#z!i~u8EtV>=`AgS3NwpAh*X2HTvI#e?U-fGt3yCI8OWgmg&anqcUgASOz z5#Am)0no#F^^^DzAUGy|b1^uj{RHgQ_~y70pS2hixW-zl)t412&)USSY-6}$x5Pw# zQ85=M&)aO`JEQ`xOA7j_f9AEICcgAU)Uf#pYdoDi1gWvuthN+Q8v=3+-I^W%l#0fR z9o7cM0c0TPDk!d;sSG}jT4W!Po#6L|1U@)}0KonUWo=FJi%T6c3E-mMqknsOwBdz- z-c41rdQo3%KV_`yzX1gt9cISVH5@~b=OPH2LLVXm`+Egy?>9>!vGh{)F?8}A_d5id*VWt1a_Q^WAkQQO(R6PM z`4CH;66L$7#la^nZ}vWc==w5Z(P%oE`D1&)v^Oc@<3+Drap6GUZsT_N)w&{wD=aTQImJH-*4S$ z(VxKrS{l>GJ*pc_KxdD>3X?jt?0wj$7zJ?xxNaC;hRcN$3r?yvXD$^vMKOY;T>bqC zSF(>Z)yyZZz4TdKtB)MdBQxKXd|iV}L*pF#IrNP|@D1hVC-qJt^|)@7u2~|N9ZM>? zqM4fk40=OD20=4c*S109oq+T&K}%Q+J1C$>f;JH{FUllx^kFwmIqg!X{*SUH))z1Z zV|`uCeoa$fgY%WxZ#8fN7{~=qy^x}$cRQ;n8&*TZrlBE&Gm^t@wW0X14;#tlectu> z>fcFerZVwF3g%7wJ*4Dzr78kfrTjf3Xzib|k*8uUtd z1U}6ZNI(0<@ifah5Y7ZW-qatqqWZS&S2JUNJpzmxg$^d#;7C-4v0;wA7(mXYQxH)6 zT+k#C`V^~05u+SuZ-NDcT8_1wCf)bmsBfFw0qB*z2PwD-9Nmes>U$Y7Lf7)(|=R|-dY+Ij(Ur{mprwU`5-Y`9m8eZ4<5I& zroGbQC7zF~hN?p8ipeiBmap%u$c0BQgBY+kSdWsANnN2>D7~QEtkQQyuDzmt{_A#E z`+)_4&oE1 zZagq6PnHlanClKEsqJ=aW8WicTd-$d>^?}5!nxg{ak9;+GOV2y(`Tg*}!lP)r8fRe1lqY~N21qq!lr=mR#wq!hEk{uyCp@h#w-+4(6pGGxu3m>1=j0jhM#K)~ zo0!H7`_ZFsg0@t#TDs)RtCpUrJEDTLg;(a7W!evL2FpGj-eZ8EZA!^Cx#qq|s&j8S z!1%#U$H%L2plEmP>a*?jd<`nRr?&Qs5ce$df6za%^p}V*PjXZ0SK{P4o?BfD1TEU2 z_tyJ3`!R>x)!%UNzB)y{!bHieewI|H2#Rt9bdh?(Tu@1geQ<&7K_lTQ*O^axz6^7n z4vLvTCY@u;yQ2v;teEQR%=-ohdbL}S%!SHaDs~v1QLLQge>Ibr)%Qbt^6a&(_$QG% z@8^{R(ep~k1z>|b^3G3vI2JmoYPvJOyj9!Nbrzq_J88AbH-#=|)N`Hw&n^N5B-aTM zy5S`EpbD>o?iu%t@U}p!fu{=OSN&LMA5cY8sevKmhL*p31`_MYFY_)5mBX+L{tD>6 z#x>QD9!7p+SX4QP8V|Y90;ZBosSUM!jHwGt=xw?sM8n*5SSffIyZe9%&x}1~ayS@b zD1{!ywNjk4biDGOq@y0(i>X~kaxkc*5g8LACeq`S8S^H%Wdb3;vOK;}YP-6s;UmQX zo3^!%GO@#V#9XHbRMFb=mKx{h&GmfC%D7Rb?d4*J(`FX6+d1_%_9RFB?*+c7?JbsE zy?9zo9aVpue||arRp8lbr=}S#Gn|EHe&umOLd|@`#bUCurSyGpScI3u-t2mI$yBrC z=oUoqu{QIOdfIeHIbpZ2^C!b-dol{kSMvQIIwK47Dj8LZ$qg-$y5SsONhZg=EX<70 zy9+3E+~oOjyt!mKj{i`PtL*({l`4(2s2(d0$yFR>v1cJ|_r*OL;jvQ7UxEZ|e4!EL zg?u-o$-31Or29`K=OGMQgq`AAaamX>o)^CIZHIudhKFKH@;Sxu5(ks&sKjaJbPBSFF@q z{bHh2Ur?-dd3gPKs{kW24C=^@g_Wq=!I$PQi9Iftopn}?a8GPYeYyD6IK{=ukVX9IGP0;lV7 zE|f|1!?m)jmfbOf-EFkiz7x8Y6mBd-3vzswBQN&|<3Kq|+e9prJUdGy-iK{S@ zw0aNiS~KVUM7+fyklbb1W_vir;xygOG+4%ey_MdqMSt#0Fzox!#pMGP`3=|fSj!^% zxl^%RTPnu6$5%3BQ?c9sp~MM&|Bk%$^t1Hs(=Ub@vRwKl>>Npr7JyPYKxCO;w^7{5 zQTq%{IuCdqdsPrAJemFLr6OYa=PdeE-MTA73!o@Y$CO6ANc|cy*?iDjt9Ae3+sQ=I zv-cwoS{lDXWf6a4z^n20k-Ip8VOKrln5 zF<_8vj_a2_f-C?MpoLbe2B84pWCh%lpXX1Z4pg%+fgP6Thy&hHc{t!zECLcSZw4n@ zyXrc2C$^xdpo*1!^?V^7%FCrg-R}}qTpqkIk3^~(X(DLEn1N_sm{JJMLK2`n;Vau- zfjhPF;anPSn#3W9HSJV^F9|o$2ph(}?0r+TW=sHqk)_SuEUuzy2kk2QV*dgLJTNk} zOfm<?R4sC0g8-=7+DiQN`z);%FSb64v~17cJ@hI`d>tllA)XB0zRC4q9!Hb|63op77% zJ7c%6L;&xHzAGGS7ns4{@t-)%tOKI-G<$X4E@L_{g2`qI_eI|H8-u zeY!~2>}(-#E5gLH7$#lPbt|y1`xLh1ze9q1o!2qp=JEcgo@~PaCtBuqE%=+A=hzKC86-YG0SS=FwkzSt-pxk!A zELw2*i2J0URW^c@pRy6)Kb$HjBL9Glv&xcJ1`%xiJcHbu#3#2WLz49s5>|+p$8UD4 z5ROj@;-yEBJzz>*D2{21tO`g@&N}~AF_g$kH}3T|D!Q?XT28eeraMpYyxy*5hdYMF z#G;j#Df^Ab+1o6t9b$A%A1qWf6MVf1p2)X8f0o^zs3kf>N&35qmF9tU%FAzB2q;ZqG|BY@~dT zp4Mcz$@*K;q!E*OG8eslh4s4&o3G+~_+!R<6Djzao$2Yq;xduBaCFbeS-3O3$X~|D zpbJ57Algxy(2rH10cdnwT;*R>Hf>Ng%>CF_^n0+tcCfIBkWNV4ih#X}hE7^7fFPu_ z-dMd?t*#&SQ+$^CWfSGMrM=Qfny0EJ*LT&DrkOYzY ze<3fL{Rs6!oPFV4dhIFwJn7pKgK(^M^dpqWfBp&e;duS_z5BS1q~w!M=~Rop^XhE* z_xIPcuw0Yi;vh<{Px)sHA=i{G_MaFXT!y z|Ne=b`XNwM;FCCp{)pSJ2*;nV0%JSbLa$$mTd7Xgz5ap!*U!K5>9y*sU(>JP`ucr! zBK8}7-)o#YZdsUArm6H{HFQq>HQ6t{^O=vUKCiWVg<}P>OA|~Sf9>v)gebi@dESy? zmU&fi58CqR9u1ow8l~kS0JZ!s{?!<2HOmMFzDh4JKHYBiC&mQl_4A0dSB6eoSP+T) zP=ItD2=xjxtPE=9_cavOJhJgfA7jucDHSq zri>{ufgbp3e>bWJh1)^fck+EG zG_BJ!_3V;Z#J>ZQZCoYW@KNCP$o4g8X{0J z|I|h8bdR-q^^B~}_sBIrC(N{jvbvU~%ZPzJypuICPW2AJ`LZFHZuvHan+29HTCl47 zS*RL6C7Xp)R^#u`^)Z{zvQfv|9ogc2-Y$7o%<$y6$8?vZomD=SMC`pc@f*#GoL%JA zwTKPIXL@X*6HEquww@jq8nm~q;?gWFyJ1S^OEqK$%0Ava&QNH=&CpOpguLPTNoRo* z>CFIwY1f6Q$3s_t#nGTEtD#{pxApOEBDwx+A5n9SuAa=tuuayV?2 zD#S@5@k5Y{*Vog9TGZ{6=)}_Ph(ux+8M98BLQj#Tcoylc-8qoJ+&1+6drOut%R6v| zuRlinBrnIc>^E;J&comTdI1QU+RQ#UQ$WE;Mt~C}^aiR1B^aZesWb08fER8{>kbVi z;hof-ETY#Yc&-xZ=jY%IewemH=URGnvCimO^O_G`XY%DIm2qJbv5D|g{9a^xFh$>+$ctAm z{Ti%{+aj~L-WOj7KyZjK+(aDGl_w}K3u`?w3+=%;DSY`?x89q~UBs)qs$*Ul>{PjQ zJs)c2;{5FaxK|_?#$nyZP<3g37r2|jhG4jB1Au|^Z-SrEB#IQZ1)^)|Qk`1Eh`C2) z(*zbao8;333bmU&_J7z8T2Bw!z9J36*^$u+=2~t>G?7$D4vy76Ua?%$NCp04aO^J+ z`2bT`r2P1(eNz@g=t_z7){b*(%AsO8faqGa44Iyhm-u^@#A2jYS9$gd`x$l`O?dSwy?3^QGQ~|QP(l7TboDsppwuUGB>X7TWUBCx< z-g;HrdGP3aB=ZgFZ{69nknh>AMYwh5tfl8;`f8EbM`_nYjV~8O6GsL=HpOrm9weW0 z;%$CM+mAW+#tr;Ie*lfdE4RE2hXS;=G0azR)?3OnMhLAtr#ymzAykC>-u zBD0*=o6Lt*fwB3@K*$;_YDyRC1$lJ))^`R_3iiXfR*(f36+5aWUbFz7ZBIH+P_4w( z3GuPd4IT0I%T9dp19|78W8yz2@FNsEG5BLbmXBr&RgL((AF%jDd#)~?pKHF5Q_mKF z>h(W;Y__|?^y;x*Ztz_7ky{vZk^QxYaKy4Z-1AXr9T$I~qopgfByj(F(~K`!ece*2 zqSH0e^R~}e-KL%V9Spz@19ibx*y;{Pm`~DYstI(?}kSh8VsPV$|zU1zML|_8C zPNHREAhmXJero?Eci93^3j5EFdflA%+gL52&2VoxIu6_v1r=sQ8xJ)MHQ)P_ZYtuc ze&FZxTqYoT9qJ@VR9C&wu^F<@`4@6H*U>)lj_YJbSUNheS4gu`p?y@HS6KTRy4#t+ ztoPd~>pA+-;`HyaRTA5JmPrs-N0_RK<(X*jciC$p>huuc=*KcL$J*}7v#aHKXFpYC zsqNYw*u`5iY4dpx(>T@sT53Pv(lGX6wf^=pU~Ycg*tAi&TMDlpc-uwAxs6F^%2d)%|>=Iwr@=d`@M{;e}vaO+ua zg-INL)z0!gH54lwdLhQFzEJaUT3{tVthY1u@fwH9fm7`Ji;3l%kt%ua&yKA3 z{iwfnhy!UhgP>WeG+iLrS zi1D&o7@m6g8T>D1yW3|YQE99jgDJaBd&|7hJo#R}2eZYt&Ak(#wKb>+fe;evS2!bV)X(~7 z;rZCivVaIo-aX;1roM)&jf+OeqqEL}p-@sFwcuyWay*Sfs!MU0=77$jCBF^#+ziFh zFf5B!OZ>KUA;>lFPH~4*c(7Pk=POpg=in5`^<@oTwL&*+kMKo{p2$2M95%W4B+c;t zi;$I)Hd}w6-N@8%L36M^T9}N;!86uQVzRsWyEVlZiww--Z z*%%UfAyE|Jp6eTgl@P2ukn`+=saB?SH!9RB1)n|OP@(?Udistx8+$lqV+zSD429)c zptx!H%<_W3DA4L1RjTnf)PfJC^dqWK5lB6%H32c;KzBr`kkqTPn6hwgw0-!~IV^qeleEhrx zWKFIlIhS7f@|Y{CQ?uz;R+#1!F zjao`x27CFpyQJgaIVc$OSH2`fg+!j(gvTE~F1-&O4*O z_wRj1yz4&{60-Tu3-Mq7(lc?IRp#y$vFcSqUj8*6xcPy1ze`0~z{n6A6S&Y+PB2^K zj|Upc@Ef{g)gBIUgOk&OmW1fZoRdMtzNFx_qLLfFUS14JmC~jndcC%3dp16Wpi}sS zVBM(356cBS#W0!DxL*M+ClDaxQpaiwe6RHbG{ku?bB|iO>WG*d3xy^y-uv#PlNzy(pB~^-L&VmaO^EoY{C+^Y7e{Ui)+|G61WbPLCj4TfLbq9o|&B_e&8zKED75@ z_%s(IwUd~xKN+>5ez#Fhk$9-tfoT57UFccpE$>zJ+sL<{=+x(A6VDB@6=N-5dau^H z48DH##Z{C07FRS_ZQ^|CL0{zDo$A;i_8fD=n?jx%VyZY(q^^rBf--^b+*DCG_ny(6i*b&^jh8-6 zOb#ovEbrC&;tZQw0PFQP%a^sZsemXqs)N2brJ_f=yVm_7Gmx?bh6^e13LhbU(A!eM zs;3e()bXbvxmmla==80otw@qj)GLp}m;CveS434o?k1%e7sqYxn`WZE+&|}A{p^8| zbn8^)`c<>g+ev1Ah9$hRnn&qtTq_BV){SqeRWio z-MX&=0xBsYU7~b%hX|-N(v3)W=K=u%ky4S64(aZWB?3x!!y*KvbJ20<^81W)&fe$l zyYC-&j62R4Yv?=Rm#8BmI$riC(q8Zh;Mu@e*H0-h z(VAweo%`oAK2)<^gsJyJV~=-ele=G@K7Ei;Co8#~+LCl*#jqlYHEnp|jV=2KwjrBT zqwV%W<#+GXu&SBPmsd>%5Ft0EZ!}e3AwD!! z-k>3FJBc+kAASR$0tg0vS|u1L4eyvtt}gMv^gk)-QIyo^;tDn8Z?o6nCd&JGlbax$ zrX9;-Ofk1YUc|!fKBa0?#j+Dg$Eu=wpubj5Zv880Mcw>sMlOxEpm(*Z7g1;!w5%bV zhWU3(>bYDl;rf^O^CK@5?>%UEka3iq9zM|G4EY?S`N1xhEWcyrLaaxW9R-_Vvy#RB zx{6nUL#WhKOR1cssz#w$w`;^h?(0n0-*iJ$*D8JM1zs)KJ{MKMCGCJIa^)_kkRw;j z-0i8maq3JD~( z8Q|wC8u3L#$3yJLeBalGX7acan8cS3!}K$4%!4_o>gL}yYZ^xFZ*0%@psDo5Dp+82oaG&|FYI9 zYlnz0Uq1!$Wlro}K9=M0Uhg|<3oT`a`RHm7adl`!PE;#X+9q!Y?Nl=OEll4@sw!v^ znStlrCGVLVN8fsqFO%awxwqr;q?-*ZvMOR(DE~d7=*`UCXo3jg7-f`p_k4p)M$++Z zmq`&@;q7K*Z@8#uM*Tol%g=D1@%QhMg_(mS~R6X^pY zOe*GvR+8py2$|$;8beNd6Cn7PS1&q_t$a7Lbav{cD0{PqbBk=HB?RvIB!mk2k(Sx zS8|P8Don<7Oi@|e80 z)+1Y-YpGBwlY@JN~C$ql{;({l(71@5Bv0lwn zSc|d?v2ip%cQhiE5WRH@yKK}K);SD0(tr~mu-X-?Lp!=8tH-NTnZi{)q3C(0ydJ_6 z<8P=Wtlk$?PUE(HRgBl8G%|K3%(EUY3tF3S=FG`cFwUoWGPIYeB8j=p zJ%(A+;MHR(66lOvbL=nUy6U-FzJ_TFUavm8727~8nn4$ zs(<5n%KLO?v-5O)u5#QX&n}eYo){ANEaI4%OKxL+yKfFBd3S5~0^ofJ5ROko+~x4> zJqI0^2NW_7>8DuxZ}?MTj658kB=h*5N@Yyw3}81ByHK1sko#DvP$ZP=#oBgbMht^v z-S|I`^*wGeYNqEzBAai$#jW8nK#clHgVbNsnGN;x?5*~a&zPk1#WHH%8?2`#V^CXuZ#gsq?X-WW@D%CoI6!M+3AQ=W0He8 zs;h11X=kgLUzJ>sa8Rh#yvD?8>}6Gk$)XpP(2yO&w!(Ju7Q_ZNm7ntz0x#zTy&N1&PmF4>Nz!0k;CuoGIsgf!by z$`X|DETdid_yLIoto>sM$f2Kjv_X0u+NCvy0-V2VyS-@(Vs<{NevPL4+5diA$@`H8 zhhC*1&wPrB=s5e!|u%O^s?M%;%tNHW6`E##@i~>p4)j8ap7A7vI7p(arf1%`3skcw)E!|IU zXZwJB=fRhmb{rP?q#lTpc=g!Qx^pFJ5Dm4(I!+)ZM(X|a;n$hqPcLlI*j`ENn?Jc^ zP2ZTK=9`FhK7uqmcxYRd1SIcMGF+;%&J%N^NS|fng?7)f={=~Yr?sHHXiQsPmk}10 zO8yz;f>g%S=vl4Sf+$5eOQ5=U<-%h_fN`;a1mTivn{`wVmY{JSl z!7eTPc`}BXoC(uQt-#reXuB%=PK&^#Wb&3Eh?3vnv<}4A=zw;| zLJ@^Y#~kZ&l45!0c&rwadd?!#qZfxZvH8}Cm4cr6^pkSzDAEFMRo5oR59Ew!&?JgN3{uMD9K%mv~_E#ij{z;AEQQG9bW>G}_fn>Art+;hj`FF{((Y zez?FHg#`y!Gd45=maxQY9IL7i!G2NFratw#m@ZpCk=8H*(0}r|N zt?HHoEGed_%hONEPl)^?yuIt=*&SV$27Cne9#%d63UlPRZWjbrPZ#e|SnAWjS@?|_ z5mD2WbvG7V$?vtjDA9OrUkDFvD#uL@?8aCuLj%!6!qX+@P0~mBh|^R{Yjdw_HqbNT z9q&x3X&Z;pgC|ti=5u7Vbs7`;h0K>!Ez&U*zO;p2?ulMT9?#r_LII%Q{XHEGIhze! zFVB}^BCR)FUHq72((a|B;8PPtpYd;Xp z@8BaQFlozmHhOo3BdPV_SF#J6_~&Ly6)sH>vxkHvZN6A1t1-p{-)t`I`t8>`7qY5( zXA;PzRV%R5_wd36A7BjCsU7e54s{}A*P9VBLJ^zaW)ndiSr-pA-_NygY`NQ-?M6Bt*x&S{8~jM0%7U51xl+Qkv2@qlKr_$)M9 zCbV5y{EVGQCD88KJx%;g0E2sT*?3kHx8*38EzJw>(<@*5VRzDEneVSG>^U&7I%L1% z5bUyI6RH{-_a$8Sw|j=7i7$_BaGjD}xd0q~XkZ(AF)e<~pN$~mq;Vt{j9z|Bo2cSE zL;h;*mP~6~&#qY9E)EXbj`d>^1LIv9)M?eZXXLvnO3>Y=54z%C58ACxC3Q6X9)~{X9DU91KqLH8LJ&WFUzU%F%xIxSxq{xb zrTlp^eVtybn24R2wpnh-UclmzGUVo=Y~6E~iA4s@i-Fpsi&IcbIyPvit&%A*(@$Vz zx`qM|x!<7d{s7kMem3VF?eMHi8|t95`{|zZ#36GS@&$l^O1K_$8YCRysntQ{O(Kuc zA;Grq=qayK(86CwCDlbmO2}1u_O<~n3Zh`sBb*(|Lxo4q(G;nXDW6zhe6wp?JK@=# zDq5zkXkvnTPzX!KGDHv^9Ii{WD6U<(adnbV{bK?lhPgG6Hxjz4wE7~0ih48jAfC9~({BMnj}R{X@6VfilxS?%^tk0PvLPB&N>#9&*e`D2IP9(ouCmL^Xzj5v2@i9h7)a(a2)ZGALY zA<_nS!@kGZr21L0ZWlDWN zTK8tilR%C@B$}(;v8q9>YNtnzhW=>QzAg_g($=Q?T+q9+T`)IIp60#j%r(o#VQHOb zbnm>>4F<(PGuLc-eg&!9cyX;Yj9_kpV>SE!1kOy^JeeVBRW!254E=nAySCla%mz*V zo$@W!b4>DGjcY!9wqytJt%)^TIX%;sssL;DcZFKvu(;!#_2w|oxo`SPi1JBN&Vk~^x~xofz+Zw#qh;6)Am5Nkrs9+BQ) zrN_n~2m9{$LA?Y8iRuU8<(7In2I^#Qk&nDqN68K?S~PgHO?vohp-&kQ$55ZMLcNH5 zL`yk5H>EG)s7ec#L7To3e(x@RvdSjOnH#!)v4^#)0Xbqjbd~B2vBVxf^)we-c*9bh zr9=BRVcl$e8hS?tb#QnzyVM}vUR(ds0fHm01As87aJzJoNQbuq$icMjYYMn+?&B+&VkQG+QM6es5sSh`50h-4p6G^d}=4T%ccV5 zRT1jF>$QsAJ8=|F;)6U|jU$@G{d9Dma22o=Sm+h)lt;rrTpgL!sQUIR5c&8mW zrmu3epDbZ$YolP*RD%*KFBD&-QzzvxX~b44l?F5Elz)8I{(&caSx?JSX7Ecj{Cs@x zOk=DqWAY1cTVI}f(aI?gYwh`0+GLZCd#m3(ayxuB!YUSAg-AY5rM1co9>3kW*cLsj zDYZJLy>M6GdsSlB5k|(#AITbexbjz$tb;ZMW^dphY%6%w=5J5jcsx59;f*%1^?;CM zqRE5b0%V-NzE^*xbf1v!i+n5_6^{i2U50Gr3iz(85lI$`XmKay<2$qkyXH@(33FL;+d_y+6^G?& zzyVLtcp0Z@&;F_?kB&@kZ$~>?(X7p&<#TP~G<6l#sJPS3x?ad(9jyDIp?imaa-_KD zQvGx=fYL)DptkPSE%JS0k?)s!7^HNF90EL9(aBK*Wu z7!|2WUwcpH_&Z4Ex<@MmO0G$it50wz$=0NoVc`Ckx6})ih7^Zbc^LPxr}R=uuFjv@ zK4I6P{&Y(xmpI~->!6^_8HwABNiIRTE+{QVmD2yg_ELP2=98~&>b4}D&8^u(Xn54l zMLh+hv))K>PF4MOS(5Jdt&6_UV{@RFm7SzwXJS(1a1uAm^&T5WfkIyk< zuHV9f-x544T-%~;d;TcoFOU3Jnl)twg{`qt{;(ionR>ltv}uu1A8A zts6n`jzE(3HaY6T!I0qlktlDV6PeHY+&OR6E}sx z%EVEEjvk5rBjwy6$s$R<>=#>QqvYTqDS7CNaRKqcUaxw`?pDauJL67B?eufaqnQSe zFE?H%t}RjLc{ivjd8X3o^`6?LW|IgUM^|P{79Oe<0@!lrg~2PG(M8*%A^qi(&m+Th zd&#wH_0ni&NP|wdh3J*0PB7ystHM9H95u0G6!w_~xD3j7|}Iki2ju zJRDMfQiM<2v2&47BNE>IvC#z&zyqgIBN9H94^tXMO<%=E`s*UAxwlv2yO1BTMA$Oa z4p^nvcM_+M+jF(4aD3qSc0G@4K``C9WFuHO$l6N=%89d!CTW|W;$PxUWK>?-LAqR% z56OQCRSrp@aiUysnf;G;F!H3h3qyD>x>h7@Wf*~R`f*Se*(=P+Fz|y3aEqx~8Jmg!*i3KqW^fzs#S#Fj4+&xbrT<0I{gbD{dC|-t8COeYe#=Ud9X`ur(_6Y%eZq*+i$JWX&y<;Ic4rh&+H;kmiU4!{mgq zMfGs+^7;!s{uz9MC=A43fzw0*Xra&@+Q2Jd%(oWA|4LI8I>uIBr9S%SW;W#^(t&n?_bHCV_N}J|D))*5ue5(1W`;8O!JeAJ<5u(B zU;cgT1e$NO8cL9#;tG~jumE2m>kX!W!DfRAy^uw-Reno1SI`@5>PNutG;ni$w}@?B zj^DC+id4dmP|}hg9Q7oqC||TBbF&4O7_VE6cXFW=nK}_JBcq;WGctX+XVLa`k%6`5 z+q)XPc5of51X9-P78D1oJH@D58lTFoM%6H()`C5pDD51dCT8Zc zXo!Y5Z&jFcXZ60U{~70EO= zTq#Mj(VdR8_I$M^Zd`fY4yU4|S&9fMEu48$S;{z(grlc2$zk5irlYI5Sh7(#Bdj+V zet!47DZeskCu~=$DS%xuiLnJoM}_G@+#|t8=@bg{BxMK>tB;WMp9@MOCb7*$Kz<() zTir5iLNS=aG$J_nhgMz85%##EAlns)6ZA2base~y4iC)xXCWsaC|OLO!U72t^w zC7(Apm)NdOD8c3EU<3UGZzzf%(VrGFrb`%ONGc26)-+Pq_Hn7pz=RgmS~5!*cJq2^ z-lc0f%!tt|%_31osX`4e!8M^7pxG=}m9WcGj5kHU05zb`HH_xP)|pz~YuRZ++C)B& za$c0X=c*!BYN0gqfaEJd;pNF_6M1YBC{U&9IJ&SrHE7rXc z1<~U)?~&EkUey?Ks3w)UcUdaDLS8^6xdc_I1ZgbkLX7H#U7s&0E_W!XE~I!~BThYM zcG@W@b`xBihRD6H!t0W`kvuJ_^8KTiq$dy_49%Pb~v7C~&!Mmzx! zo{5o80l4H#OZMi#0V`ow=rw|ume1~gueA6P92nn*7z&wD+zZ5}NMbJxyyf0e0G8+5 zA}Bh#J_R>KN1bCU3=k(=9UwNXDyORE=8(<6-;3vWG@-dfU`Ertf{Zb?81Uh_`wIg7 zQ|VacG#<6YmFT-WJpHHW$HbhUd^bkNCgpoy!rxSKk7)fxKb$^zB5sS58HUYtWAX0TErq95~#~EM+<8WQ+95=hC z|Eg_<)_XslA|N)^Du09c+yMa123#79nd>UGOnU^*0dncYc;-FBbnLP`ZDA@w}{5h8bx=)UxUsuvj*qj>z0O^h*m_)ZX6 zUW$-ix^x8AaWJj%JQs~JmBFl_QmRdWi>)3|fz1oYNG{-@-@PKy_uf(t*fR`$8M(0~`|VjM5e-04FUw*K z+b)KakrW9y@)pvwJ zHWrt0Pa;fj4N1iN(vts&{;U}p?ICQXStmn8qO3hw&cGv*ak>G-`KDrz`pBcIB*so0 z(iMZ!;7kbE7an~qYCKuB!+3J;^yj)PBFq*)DbdZp>*&f=flAosakqHA{ZZNMp52iF{o4JPsfZRR10gC;%=x!Cxf254uBcB_j4k-5lwt%b!IKOeIcKB9z2-jJ~M0yXP3R=M7b{LFzK5rKJoLeqcEtYn&g!sQNZDQH{`lY%c?}%K*B)%Fn zmm`~g42D@151q7bjR#S9$38dTiycj}D^YEdN^nsPE$uM!Nq%#wg%|fO;CND8J5vi0 zL>%VnpEKA=c`tb1SuT^6u;u;d9(J^n}<(Ombq5V( z&I6BWGyIN>6p^c*wBfMCDe`Kd@g^!l}ZADMW9yQ$@ zx0R%ipRIOc5m6ku0H&8B^rZn43VaPPhp9Ps`m5pl>gG4nG)ZY3!35|27Asl~<@>^w7)U1&9h&VOK9`3+>WMABX&Wg?R7b(INrbyd_b=QyP&SY4Yj!c9 zZ>s<}G3{sbqE_LQ=nx<6?MYBe*n7`^;qbV0*>kf- z#3})nhOic{pxN1EYzlPmuoE}kc(E9?@t*1kC{RK$Be`iSuIPTB#E0!T)b|}vS(5m! z_}l$e{`9w3K0#qSsPsAcq@aIg0H2!K)3M9S~YYu?NbOlt05qNoS zhad{BMg(^W;LL72bV4ACzNpff7U| z)f7NooWl;Gs+JIWY6F_joS$aO?qq$&nbLKe3;19Fa-L3zt_MDvj`2<4e&Fs1lr6q+ zFTiX#NCq0|eC7D)NkFvg0FJFof)WT{*MJB12m2a*L7vC0*vFB_fF=@R20^UO)|xKz z@bJxjFyLGO^{C6eZZFK%tL1z}!RJQMXR*FSfOTO5_;2K+WgU7&PObQ|NS>i@A}RvD z7Xto*OGp&}?F=w_cIEr$6qxhI#IW$?*`9ppmJX;hJ3qebOfTiJ37m@)g2dw$o|RAp4Dn`^8j`0OsCM65IzCMz^j|ZlMeVx~sRex$fkw4dk=UdYY0lNy@U_w2 z!>8;sF4WTDPT3@U>=RCi+I!5+)!|^&=*Pr`fO@2M?P32od}ES#ebU9x|C!fX4*>V? zl^iH?Me4N;Rc(jAAS|mb>k*J~ZHRzH`(p`wu?EMvSm*fm zLV$+hB0*Mp7yhA!($o~OmDg=Or0lkFoOns-QMcuNSVtU%hwoAas!{_X821rv4>Lk^b!<9QG!o{Y zmRbSq5c$wPNYfy!mrpco|B_>Z{N&C_vGY-Pi*0d!;e21R)4E~HE2`=`aB8wnC5z(Z zQt!Ld3wG11KAap^FDv~$iC0xM%?PI=tu99w0F6sx9f%#AM;Fgs0>q~N)Wq!tt`Kqk zNlkr>RtVqGNtIXY@$&LAak$JE-aZa5p1 z`8NSdS;`~y7m&46GDn@mJg?(7L(k+fRv8cgTFT7qiK{n zY~AL_`5yY^b@n66w1OP;a4uFrBlci%(BXBex+KOjcO5hW|1o#{$+K!z+=Y@Gd@CiO zN|oC1bC&(^ZtIo0yMcRDI!^%2j;1k;udtJ)4nw@2Q7h(|jvj7H^&1<~tm#Y8U6j+W zeyo;S!~c5LzVU4%VTXD~jB83Z=-+ByjuzZBsu(EmXrPz$Y$)W$hrTr-f@J1A~lnIxzT!X(KQcZ9^V?!)<#+$gMdLR&1`u2&f4J*3d5*veY1F%!(rLcah9JSz08 z?JFVuo{NY>>czq=bga$lwl-u*FS_MnM@6>E(M9XgwbPvPkyhdH5moTo$5s~cFK#s$ zLl&ddGhgfCwrpXHf^7yLUe6MHL`{+)P;bWUWv#-T#oz^hj~eFWOo~%<p2VSLGB?F>vBs$#tvx1mZ{bo-WsM~b7pj)-uRre3x*mq-Z9bIdSk7^4&KufY4S ze=V=ierN+#*IvvdHh;zI|J`DP(5`Rb^{gU}g;Ef<8i-ko7};)Awn^1u8fBzP1dVhd zVqwDp9?cRG_#NJO79^D~<_vq|ju88WI8^BAErxj5eGy$1 zyKS8J>GtEYP?4u54Hi@&Mo}s`9(6&z%o07%LG6TFHv-ONoezL5+GIDdE!gu#_APa_ ze2upx$w}{3iR$gM^e9fhuRQ_@-w(SJ;#TP7uyh<=!i>#DI_%91HjDPHfTf}&D!%#h zLn9qmfP%PSM@9RjmqvnQgfcm4wLxAcWc3P+Su|U^g`1oT^!R zT@fF4@WZ6!FXkLep1<1lxrVp19xJ7`pb>9ac7OFywTZypS|rJe3s6|$X}fS@F~0sj zer4*1}?yHRzh}IE!Y|Gu{$|{Q^<5YYPu?bmxW*3Sa9vug4-bgl!uwhsBlap zcz!OSNaKOZtz10Y7;;z?}Yv8@7m!F}ti z=W@3#AvM5T`-}oYugvESayo;zAemi$U(q}cLD!uCd*6P)3t(Q_0DXQo@z#~*Y8+;q zV27P6aXDjCw{Zr+K5UjsB+LUhx&Q=r`$R!k7Z9dptpO6Rkg8U`WbnT$U^$NNZ3nF2 zv!^vI=D0!DVG#CbhsRI|!MW6yU^M~nXhDdl1GT%&22Tk<3pnu<+NH~5=TnkD>>N+m zl~3U7XSKeI7aVtnsDJAdeLnwMqOQ523@{1VfOET|NrgC&4lwil!n(;%$V!=9y=*WZ zE0rMo$!?)N;t+DH>22CAy}eyPyvL^*Mwvk1?^xA(NBiZu8}f1F*T+~8vO|*p6dlfw zI@Q`jLPblk^)I7~j;5p=rz;y)X+PooX_Wrib%B(@X8kph0#@hgp~-{hMl_$|_@@+C zhr;C{-3IkV?ZdVpdn{L;e#(}#*`FT7`zlDBHgs|iBWGKw$mjCHX8(IXd1CgmfRfC{ zcS6kztcNcknaHXrgUugGR1 z`0WkzIgu>33yAt4YlApCD}9JorT_lpTd4rDRn|@6#646)4Z!!=`Aj2FRd_-8S8Go> z1z4ljTXfwQqY!mXd7hZaZ<{oRzV8IuZE*0j`PVSlCWCIv3T-Y)v%+kXw z+Bog!AG$VQMSHmi@P`}`zI$vL^?Lpm%9!cMA+gD^021N>FfF}e17D!43#4t-b|9N) z2S7W_nyx+T`JL-d!NdbE*PZsG=J838$nSalXTGK!g2*AAIuF@l#V~2k*Scm@XA|3h6`X~==FAr=33NHcE&;}|NsRkVr^4sQ3y)ag@jKv3qGAo!<}F8O z&;Y(UJ-GqcB{xCi%5F#ebgi?cnm~Ol$_N3{I_H{N#(s96@u(5!Y@(dkGQRrNqBfCc!kQ}rKz zVx0_51tz`8*n|G*t$kJqgMhoz2scMBbow6or}z7Z*J6{jKq4#krMsy9sXB1QBUbp~ z|M?2zI{sN__a6t-O@k2axBus&{h9QiI(o6P>)FsMsgM89rxX!oTvA-Qa(Q_plMnqT zaPs$2BJb>gNrBp}8?O?e2BCQT#Kk3Hm~H&9#I1``&n++4%3dO5n(=%7Py;WN zNUO-Qm!)cXm;;mC^SrqmiBT>Y_;HEuCkxK1@cn%!VckwsnZ3lg`<}VX- zMMC-Gf-emT?vokfqPaQKAh_tlvS2!SP}1g1;WO1Qy?@fc_aE>6^Y5FTe0gD7u(1-a z=XBt!xO}3SGzYSmIc(vjC1qn28y+Qp3%~pOTg8!NJfc4~ z&dhMJ^}&vah)K;3IM2>YUezT5_4!8g768cIqD+bbm!Nr`_`bxn#!u*LP_3CmHau(UlSviLVfM7 z0Zt+i;~RcKJu{`{Up*_g#y(W4Gx|>c8Vov4Y;yCX3XY>Mr5Rzgl7vDX$BE`U6xK;4 zM_Z}nzjUKK=!JXR%qb=M4EaL8>aQRFzTnG8!sU5I5d7oI8ZXTSeq5vo2R&M?&uH)e3az)$n-yE z?58dJ!H^coo5S-XNu)MPL;@cl+4t~Ax?~CtzzUaKt?LP)eA4NV{8IegQ=z^+Qb{ol7 zIs5m0V@1tYRAI)Q(V0Lsbn(E@#80zw z29?2d{w;^U9!SQ3F46%feb&$^KJjG^xI^|CX3|_p9BU54s_tR z=;EXB_Qt}h?YVwW>-lx3J)v|bTohLBp}Tn;!SWj=}ktyNSv-~5MmV$ik?h72%Ek-g>9)8V>ujlPYpYNrllXHX zUR?3zM=w8_I3PU{@z_ub!*Rp%UoLp+55Cfo@E#V`FT=*B;A@j;I`jbrx^{Qs*IfR- z5KyE!HPOkZ|Sc zdO!HjmfUs2@^yWNGbNN>zp(oIv3Jx`NS2A5p==r=97D1Ci~MR`Wocc#!N+Rv?UOtN z52R~8#tp3p2fq6~px@I(O|5uoT5A@QL3uR8R!w>ImMC5Cm*Y(-yLLB0@jw|z=spD> zv-C@W-+y;}-DyE6o#i&F->fr{^*<7!dk7IyMu?COju7Z2|Ml(74{ko>OBE`jtfqA1 zC;vk(y3~IRB%KWhrKr;k@0Styb@83j7u`!b=7+lb&9aNU|6>xgC_i0abiW3@2JPHQ z`K?X{jY!jcz1buIzLajdQt`iD;@5>3+VzLCg0;61HX9cC$>2P&TYQ)aV(c0DAmGvVAaumWg{}k9%i727Lg|s4xsAU}O?jjkZx83{g&`%nUQGY*W&E|C zed0*CN8v1zbxhO=Z#s|0DK^|0 zHg~CMIt(Cc9RE<%9w9_610iZD)F??Z;@dcQePgt#zg&oz!39FDlIelTRr)udok3X* zAXU8Rerq;Ux!6m*j=dXxH~nH3#D8m0v)8p+`y?ga8_43@wV^z}#OT+BFV?MB&8|-L z2t@oStn_YBM{N>^(+4OyF5=@7}K8XRlk^T=t;S_(AyW3>C zuMy$IO~3Xd#oyLY!^{_*_Wd$G3IPghyZ@iMas#0&HvfO=3IR&QPU^j1VqM(#!%Q)P z)%$lv6^Ik1TPXJZ-$D^^=at<9cRj@FIS6I{HVCmrKa@=BUr;iHLHHjL{kcs4BcgP@ z|0AORw^nqTiH)88!-(xz?95~}znrXG#$spSpnW@OqLfr%-3Lk74_n+!^~24-{pseN zO2saCV}#nrwr7^gjhvXmn6%?Y^7TH1>($jp))mmGmQWBo=rSW5)Cnf`f8+JCo5l$d zp~qGzj>vn#EjnP<_7}X#V^8MU?c%m!UKDu%X)6~ z=R0qkx+Y2h**w24q&1*SWJVqvBxMGt?U)e@hl7Z+UNuS+fpw!w`|TEf8Xn3%Uqskb z2*RFgPy$mNuV>FVgpc=ct{gWtB>;%?sNDN)^E|vAq{<=-`;h~*qAv1U0$h9>>|y?R zgwy=d-n(U!nZn2G*oo0BP2VC156rt1W*?z)trksJ@7|YDw$;~HDUvkOuF2 z%S1jtJ5l@1{ysLh4)zZ+SXE&j4kz?iA5yu2!fhEL9T zL0J#qeN2_BZX`e4HE=A5xCyi6Ux}MolPHfqe|uiL5q>(@78bt{-BWY6&2O3^!+P>P zA;NYP*=KLka}Y4Lk;GJiv(mQs6^d^a7rhF$GIecjajZ45YW9|TPVuVY&&3v&d0~l_ zjh9frE{7JldX}?vkvV5-jed_85^;p+v5v=luqiQZF9qT8=$4QBChNCHKlGXn8zJoq zxG1(@7ve7U1W1Lgndwl(H=*aI zX_Y+FoTxYU^f*m~(>ER0njWf<4$w8hE7k@}javo}m@C^9d?4x*dIbXsRn)jBFm92g zB+rYCaz`t5V6^b=IGa`LZl1rzFgq(gsXh1_yhGF4biOwl2XEE*pdhvYOx(L9AH%ID z5Wm@w9h*86FcnPXh>1{VLZ1;4x7!PCpdJ)TKf~MzJ$aj8I(2-CHb`PyKlt+O%vn8i zAwql9inX%CYl*0+=~%vY%@v!cjt+h1qatDse6QU>Y)KN?{`XrFu+^ML_k%gYs8<%% zVXw5u>C}rv7<5l_LIz(iQV$+1OX&@7^{jS}f{MnXow54_5TQiR9wN?gZ7abJ{F;b| zI9?_0%zSVzybaQ_idfHQ>v)+_OErt^MXrXyV25@di^IGmh@4K;5Hi9> z6<r>5hk@?^IQ^ zc#Y!g3n^YSsBPWe#eR<<3Gv}WochJS@<_y%^v)y1wr~XnI!O(7ZzZ-Wai;5K-``Jw zH5>Aug&nQJ&5ys#pSu(uqd=2S)?foh5u(GSQhu{?H-5M^4TB%bdi`Hg)+!&IGg{Fm z;jr-VHf!0I&gcwVpN+llg9qw}bPbQ~_`6D5*oSBqGFe7-76r?^;dW0Ck2~KoKzN{$ z=wpemNTDEIpz}6dvx){2W50K0>heOrN!GkKsjUPJ52cqeqv>hEI}w3e&JEJZD7A1V znYD1uc#y>IUV#(Lr|8zTk&G5QpN0>?M&m)GKvSSsF`pveP*>`<)gZU!DgTi>PdyW8 z{bR_mJGOCrLc$ZlSkL}98UYk6x^rN-LC#Ke!<$?8~`N%SVF zKU+__d`v!H4PRO;)GT9|-fy*x-#Ayrs<;fg1#)Nan1}gh8XcM7V8=hAHrn9Q{pmbu zy$Dt&%#-}0jn&)Hb)wYWazO>;gFdH#1`+7s)Qw27c%kivIdus5H4JgYe!uEN(#pr~Yy3gCEah zLRb|6!+*)D9Dod(MCq~KjNSQgSh4NYXaLO2>nDtA9Am^ED#PH?I17(l>ouomsLLX?~;zzZklW3 zKeVY>q+D+$NQmhXUJg~m<~^KtXm2oYIgtx<(f9E=nc;03z!t5Ou1b*irz7k`^=B;ukW$;uzl@aTcdcS zu6MHIG?7Ew?GaZ6n|@={wCB6DaJk#S%6qaqx_psDe!izam^AHK)MPK_47of|(6=TR z9DRaye!0@Ckm(AW=3Af_Zh)*LfapTRWhQ7K!pFmMZ>I(Yik)$(AO!b1S#!L6bS!bT zq}Hq6jXw!BC~&~FX(zxfTWK>}L~Pp(-^wjRE2HURiqCFQiE)5H#8XwPHu;S#mb8#I zfBQiWBdcuI*0Ns1SqI6=4BEx*psY@o^XCGmT~FgTvPtRCy$^-4GhBJCL=0ei$Ghjp zBkH2AUksD0d4BALor*wG<;hj?ZE6Z2R&2d=KD3=Ddp-p|r;?Tb6d6qo0DCZ}RiUf~p8TbWCSmu@S=`(KO{vUH7%x*?}| z2~Gp{E47v^@$fgvFfZ2$tpX&nNm0mhCOB##=r++Tq;rGP_U5Ub>qTvzol|HBR>mvj zGJxI^huV|ZH#>DXSNY5L;wQJ_pC0w?CkD?bsI%==mO?=NllNIw_JKY zDe6;TuYt8Ay8FM_d(Wt*w{3k`5d={w8xf_eTTu}Z=^a!+r7BXSs;G2A?+^h6Q2~|S zL3&rJv;;&!=>nlcKuYKV0tq1`c~|hLoO|~@cc1%yx&JZl2M2KwlHXcuu34Y?%vly^ zKU41D6!`L*%VKr8fY}Tq)JK+BNI&q&?ejxaHe)%RTl1tRCz2f8{5Gybp77EI|Ct9; zk$#J`Jnj3_3Hd&;o}NOic$k{)saUuc#Zj!5&)`(PHIyq#@$XV!P%>qI&;{_|fmajc zse+e@ThKv(`HaOgv}iqKs!OWfbY9fli++>AwJU+6(w#Zvc9=rrg1oP%x#s9OAQNeb z7*juAs7W$*0FR(;Q}7!b1ito8M$I4pZ=|tm>p&{I*v4#DLLJkuNuKXRwi~?97k;#6 zJzO4Dd;fw-f_|0!#wSZn{)KzJ6X&nWN=r*G#SXFqs(jn6#qt;l9{EV#&DLVrAYH12 zmYyEAr?OTWb38lAn0vGUjIyoecCk;kjc$>Ed-<{-KW>ko?K@9RdU5`xyj}V7{1?u3 z?qi(lHBPZ`x6TgP$w-xBwSFJH6emAo6ymwPw{y4dg}Rx3+Xlf=_u6|snUp*|Gw9-K zwvk+8{CROdz%Ww5Wp3<@mZeYLeAc%I8HUAiFzTk|SsUyJDZSR>_;sQ-%F z-cewK*8_7hAC;&_^IS_|Bg19eaiKd1C!Xoe?cvfy{~1Y~IozWc?LA#C!I3=x^A#n| zy`PsJL#n8#bbn?`W@}d&93D>4lqJQ^z{n(|Q38JwN^wiIL$1^?(^JE+=82B*GPo#KexALtb9qdBvS1Rj<4)_9Z77a7D~?XqFa? zNOeS{*@&>fZRBu&N>SbXTbEm`so_g_cwHiQOTL#XZ>A&&5Fs|R1hkYD%+GX*1O%tIHd?kxOMdl(3gM~s#xiZb zZY_sVIeC4ad%F0N0sxuzjMpnR&Q`IR##gks%Zmr2oS(s9T)DiP!)V)UHBHSNF(QR% z-ITTd(*8=ojF#mHpC^?6$ej34vJUd9Oxs^*8`?!~UFsab8F?ZD!!rR?&lMRqk8Z>$ zINEPWYzMRRNVhZA*7%uM6WtjpMt;VzP@Z?B5c5*j-OqwJhUg{V_T@)lm%zEim~DNv zavqrsladCW7{IbwL^>CEA%$Qo<|6X5wG~@gr4L&Tp_^49Sv&3rH3RRC|8f0a`5$t3 zk5keY5H#;5oV>zDPk9RAMEe#7&?ZTgwpBkBnXuc7iU?jWt|lxs86d zkncpiWyJ-v+>cF+@@vq!x0p^1izG+uR&yuhPT0&%=w>Und-6GZsem20RtgJkIz3S_ zNoEQUPdy{8o8g`|SQt$#b!y2bB>Sul&R{T($n6%?EhyB?+3fb1mxXUNv4i$)oTglr z;kTNj>q!Esj$mFvMyn9X15<@5-2NVo%oIqdQ1>1x*EcgXXF~? z%nvR9%Gz{unnPa1d^wu&3r|)%Pemx?thJcCaObr-33#3R5GRTQQ7JEwGHsM1ZI^r# zP0&CX7?yfX!&WnY)Ps&*+bO(N|E@@)`V6vOay@Ku%Ipo4c<7i3?Wkc_CESXPaVHcr{5s ztO!EO(v_29UV$~i$wmTJ(xsV3eN#UrR!)V?!LXKw{S88On=EsU7Af4>ae6bwcgX0J z?W`(Dh4wO(I2A3rO0w?2j}LyFcYJH6KYlKuS%Mo`r~ zEHh<$vBIiT2DK>C^Nc zzWhpwAoFu3DOKxK7RtrMDuau;A6(3qZ?U&3J4KdZxbV61{xR#f#vUZkxpc&h@lv~R z;&j}+QE0M_{PxCtV=jMmxajcAam>L0i3zOE9d(MTCyrcWq#zyY19l86!?C}UQljLr zHWWYBkKf%QRoXur_PjRzQUaB)i-hSXc{`-K8pxPlC?<)+Z1hEY@F@kChr!qwyZ%5h z&vPe@5tlYE`PEsdHj=dY#SHivIaC8Fc>x0(h0DS*tPKmO*g~R5`4E=nv-L1f;g-ui zMCF#-MWvO&kM+}4n7r8tmdSP#vSHyiETfw5eI0Q*Xnj~_IsB0{Y14HMlTuYOXs_!@ zf(T^?bNkieyBJ~QsNSBQ9^1}i`w*v)1$ucLaSt=AXIOALxxtrbx^Wa2Tb1lAaa5Xa zOcDhpUiu6*$H=weN?->9`jvNTdZ`fds)mpe$O{d5joOT;U zzKwiSYN3_RQAZ@snZYobMAzKaLiuUh=wf2;>A7AVO-uP^S@Vg&@#8qMz#GpXVCM9d zcJiJKr8||RjC=1a!Od&f2>YwDv*l?AP^GUN8g(UnZy%ZUpwzS}=OE+uGK)<7yyQp# z`Nxf{ZFd|w%Lj}kz0F;Gxl`-*1Zp5G-_N25zMN;-Vb~W4<4bf<-x|p-j&Q8!@Dk3eeiZeEzB4Xf6^C9B3JfgU~8PNg7D z%N$5BKOG4UuRXOi087U^QH(MOC*KYFZGT1O5jNCI>)@?EoL~I&Rr9`;FS9TOn^M9pyT%J4zxP;+j z{^$?6lrruL_HY2YXv}~dzP;bu7BT;k+;`t~!ox#;+Luv2+SF$iF{9HVMQC#w{LEuh zVUb_Exn=`mAZ|N(pI+ej=1cfx4R%D~_ctI-$fngBSVBBX(pYK5y&s~kb=&M72`j^q zT?iZXnRt82=rP`brUB}?fbN+IO-RsRNjgslD+75ev(aMc5nO&Tlowc0iExN+Tx{$$ zmPvx%aX2>bMGqKVGay^W-6c6aHQAG03@gKLHr)2xTBzy977oX2)y)QkXJIjym9x9qI^Ve`&11xP{Sd?9OtdfNg=4(RpuN(HzNvR9eYQeW%e#+d z2kpOERsz4;ul&Q*zUAt*wS(PvEE2nAmmR1@Neg_H>IYMhD^G2@tIP8fiU;#9#qe%U z%tB$&AeY5=hOZ|OL`{@9S{Lh$o)0eE(n0pmyXyhmYQ|2nKIsBT!IXsWh+{JPsb>?6 z`A*7Yn;6~L0nE$n6r#7+`g=ayJ!8Omw1{68N1{Ybbd_mtI0c?LA#U5mA?Hi zeV&4yk`3^AJmd_#@tLUX(oX4(SE_S%?qe~4F|cmNQxIMtjgF%QMEPYh7bRn1k z)Ym9C`T&Ksfu6ZI3$s`^->PrJ$P`WaiP*B`Sup?<%kQ3@HQ4ff|A%P}O~V4tMkRRW z-QbMWjfvk13_!ZRG^Jc++>o?*nydJ6;e{71R*-VDN1RC zpfUwSQRZRtYOk5uwQxDoy8O$)oFvDsuX8&3#sfZCo=qcx<4Ka8Ow>LcWHXVCC1(C`P)NlqT^n{Xt!9W&^88*W-y2+}^C3%U}s_Z=b==FF-PpqnPGL zHKPIJ8-yr*hfaQUy9Eb3ySf}gzpmL$l8Rjcj$bZwZSC_2S;qr5pk|)RqTLtKIB z4tJc?xK?_s^ML>!AZjlXfxQ3;e0AKB{Leyja~cw+5x%FKU`i{R@fHQhcfzB$eA6{-TlFt!Avf17(mk~^fiMP)CrJZgRLC*o(4v4F z{R5eHsu1V?t~6rm2;t-^sioxjm&37Z=j~Dm>#XhAhTEovddJBoIqBIIwq#DI=X?*G=jj3J;Mes&uu(ziZ9Ra(J=dvg|GFR zMv@m|&Mgi042*!K#-zo2=`%zNA zA!{M4&P^9zE`~g$^L4O1>x)h>E8w|QNNhENriepEMn-T+kg-=Fw;(=Tbqj7rYpCos z6k#OFpsJY8ZO|b^C@eNfi3Z)yjx^rxb!$Tjb2yy*h>uUjWAbI30(=})1uDlJAzrxA z(DnsFOK5+lZoZ!A@J36xA9h%@!r8*vJ)d{;ASJ82+1#Aa%;~-OZSG)7UEZJ|KMMjq zs|EG67!nij8>wSbS|M+*Lb`iDUBx7lv7Kf%eY%w3|4_MwWN|z0vJ}vW@Z$=7m~@9;gTV0i&QGR1U6ajz!0-W3 za|gKg_W~1Q;CM~GZXwk8sah+qB`iC}czxAnS<2$mQqoP*5>Ii4!qmGtLjHXyshYoM zwYkuS8&`h?stGL)3$eRizGWmo^Wl7?A+iE}3_u#DaaQ+zKm;+5P|XxVuFO$9W5utR zX&NwiGBFtbEGGA8zR?>}mEm*^;`~gFTWB^Jk2H#JW)LtjEVRWveNJ%DEmCU(0s_jQL+yS1S;qvy$)!hp^`#{O>wnklGl95H`{1)^O|lhxc5c73sgjW zgPszYjcN+q?gjrDXH3fr^6f%SQ?ho02Wu~B!w#3VWK_8%Q<2v$M&~yT(#)kDv827P zB)@^GZhsl3k4#`{KKCO5l(ut{;X5Y@ib$a?_Q@8#3bApzf;CeSlsZKPV~(<&>W0$j z!$vg=+55@Dz@^IW5aITk5}L!anOcXvU`%m-w6UZL+~A+mv4hyPXZu@GCOAwysjlXp zdG$smuBp&Anv(RUNs;8^a&M~^y>w|rav7MS>wvw?%?lLb+9#bJKKQ)0-Yx{Ks1hm` zP1F|>)Tin2?^P4rHZU(=rKHvxPZaB;s!=4>p5J#kZR&I9Ut0EA&t#(^YW))g(5c0)hP9S~ik7uVO3d)jeXXs}<%hwgi$et*K zOgl<7lPhps0Il!Zfb!eC63U7M1<3DfIt)x?yilheq@J8^80vjzYerw8-{-=4%7J$h zLG>srn7j^My%g&7p*pD3*w+!m-+=M(n~8&G$C!Hh4mnl{8!`E$5mL8jq3UF!4>_@vrh&ZusdNs|JNS}wB0=!-k`lZ? zDq35T;82I5flhuiKMK;rpG0c9XOy>sU$ZuoDr!OfBQaF9BPpg#wQ>RD_~clNMXtC} z+0HLiln}4{{AN(yF*F%U>Wkk#@g?nh!t=H56_AlEAj$hx#K&l4p#2ttL?tg0uVe!( zJBSPBWhV!%ePp-401ogs$z$u8LdFpQqcwQ!PP$Csi;JUt3Ll`%p=H-aFp`r4QLwXIcAFhW+h2p%O_Hn#Z@={n z$GD-Bt?^zlrrROw6Qx?=SF)pmMd(OB^ND5st^INOwcNv1r7ei%>*V#M1a^LYq-*Du zxs85gvYV~_6feYl1O;m^DK}@5HJh(lK&n;F5Og<6VH^5;UpAc25H>7qtE{1Ck1@*E zEnw7FJOy8dlF@}$KK(^&e_;FaCAMR3mv2J4+V(wS3_zK)m5}j1!1F_OZhPe9k6VBT)I=UnK@0V|6d37e~`|`HOV&0o+2NHE+jFk6ZYK4dk^PdK~!?IS4=bJ`{6juVwdg_eprIUALZA7b;ojoAO1KYesDq- zK3q5!t-3A)8t;k|H7*{KzTZJ`tUIk8I8JNGWjkEKH78Mt)Gxdy(KkQc!zb@(zcr=WX(e0X+iY3BwwBVpwZUP* zZA6oym2&n_RMnGL(G_NjKVDMCJ2P}eOU?K?KUrU+Y@B0qc}Hx*E_Yc zp|$H?Lvmui8)!+pwlA+M!0VxU6Z4ze z%BFKGw8%QEzJA}CDrkm+?o468aFtMu$@Y7fCM!PuH0*}1Hrap1^U=Wo)V{U(C3qE) zBwB5*irLJWBH$*RI|}?z@L5ZzXKxZQ8DD{E1iL{BVNqVf!v32)!l6?E{dh#=| z=6x9e^!A{&D$P?RUL5|_lmgxnfhF;yJB|1{M4aGjw1p|2Zd;X}(?ri!WWjnMx?LVO z5YZ3#S6Y>dg zXBYiRR|@*KkE_nBk9C}J`12y_69+%J5_u;@#XU@{tzS1^hetm&_sXdJEJ;gUGpjNm z(b}(XE)<^L&_IeLIH|M7%0~*2VRe}0pt3^tR$VIoLPN1Pno@I5dX0!AuC@voy&Fm* zU~OXtIZ@gvdm4a#q!)54AUqlnSP~TNE-Ft~;`F(si6|9I4r<#v$xZ1q@-Jeg#P;CS zl9g80xZ^BbaI&YupK{KQ$@GuB_9A$7uRUzEvfHRc=S8))ra4_*0BIAH0zk=bL+By8 zm^bcujE`{OXIh$n*`xNGa|828{-4_RM-$A!zhy5!1dcmGcD48o-K4~9t@_bu!f2GFk3ofE7t7!KgwAVnTo)4LP4{Lq52f%Uz;%j+FA zpzP!XxDc$C;-hl2v-zgIw$)`UUrVfQpY&@JIf`$0!S8s?l!~w&Ul#x`ZH0ms@E3no zi=S1bw1D^Tw15)=#yKP)YB~`*4G7RfU>Fw^v_EU9hvLTl(87x2^;G8OlOT&E(8I8} zw4jk6ZaAh(DJ@uCQQSp|a^ir?0%$rrLubnNmo3}7dY5oC4VCsB8X)6HtDkNxEgPv< zXe@7+@|$>IFBR?4fwG52LsHV>OZ5#6{$=;^4*E0zE|PV4@8KQeu=9Mue^M4I9S_>l zul{9QI-nAfsXn^Xmd-}alYe=9IPao|uMFsD>m44}K^E?6POiT|RR}AR#dvZsxklxgKS2Hds4d?4_Ijwka^n=#m**_O5CU&0_>GFEy)c0Mp$+{`PW=nHf48<~w)tUiA!0q5E?lKtzc6;p?4va;N=3qfmR zQo6aoInWfS5g52H6jASdwszYJ|3n!k(kalAB8$G%X)q;$nEurv?K2JtVoD0w6os5Yn*wC`sN+tWHQ;L+87)|uC5Zv5^%8qGqVl4ef(kDK&Z zQG#wtyXn)X2SZgh{RMkh7B7m5zKYg`D&PQ{6xI06nPw;6S{5*c+j!=9l1Jr6$j>`? z_XHpun#npxCojRL`g<>b-BN>IDrWg-npbNsw5~W8p;GLIaYf(Vf6!u7jHD@U9JowHdo#l^qm=-`Q0 zD5ja?BByB@EaC^Q{Jd|!6>#b#^~qjNF0jigOebPiPtlD@r!w2uoH^6e`{{OkK^t}| zMeanw?|#uPIVxT5Qv!2ypr1zz0)?Znq^YG+X_;>P!aeRMUxRwaWuM z&M&qNdMDox=*t%y%?FH+I%RL{^o#9e>wii_YCg@plK+kxTD+3s(xTelmrOjw{Y#d4 zyYN)ZA0@R1@qaikOxR+cZ;`5!+IMZLw9dF9=7mSCh&OcH6hHw6zA*n@?XRyoTBk^T z;O(g&J8oqXr0eHW;U!z@Ba8h};@Wu}mw3hedS!s*^vagvya8>gYP8*ugyKCGb$+vI z%!>LuaWnh!HRTOlknS&C&_mKV#2J_77IK3O4hlm&HNu2&E(SA75b8Y{BVZw8lt z_U>80{NU*MUI#Bq{CSlB@w&~*gC?Qn8kCnh$YT-dukN`Er$unf#-e&P({^bzMUDfU z>H~zk$vrA2%g@zvJ4biCDHGu%F1ysguZP`vG0Ox#nxX1-HL&XbSD2qk5CK*+G5$sb zOGNHX%_U78L}&6S_$Oy&jY}@$Thhcs#dp~qY3)?L%+tAi8`ulirKUj6F%O5$fHi4B z=7Cd2^z{{x5j-;XX8IYrpp&@sFsj?)ReN@Qz%RMlc~PH8L*0N9#aEVr|{l-(* zva}i!A}TB#R_ffIGQ$9x=86*h3VEKpBVv8?3YtA$=Kpf`};#fP&8kfjaiZ2Z}I9D5tNKeDkw2cNyw zmnsX1&4EdsB~KBjqfv)}skcg{4`S4PC`WAyiRsi;*aoun1g~oj6{KlkB>S$=|A(jh zEg1e!|6rmvJ9`}k?zJMv!A}WWjv=7upr_;gs;C}u@mv6hFG9aY)$&yFRLm95-A6^& zcIMTU34FrS?~R!N5Ym1n<~9#67wDT*%TH=MK790!KRt`iuf7(93qhK0Q=(c~c6-U8V zSl&c|k5}ZN`;@SWzvbkM>M*e)%v#3Bj}Xe2k=e*j zcE9i^9;6FEBb(E~;~LziTAlHQ%^G*|uiAkY3HrxqlMtceO}P@=55SKX$n&eBh#3Xi zK{?o?mGuA~I0Q2non~#s^cnf`y5hfdH%YKWomeS>eefPh);H@oj$u`y=)hsjSk?Jm zfBfrHc3vp*gWIfk9t*s&yUg^TIB@vxX1+7*dskM^(GTXuK+d=iMTb!D6ucYWAG0i?Z=xiT`SG#!G{^LWGg*@i9!5;A%AM?2fQEEFeS@6T)?c&J$`Sd@JamWl%BoNWB`GM1;sOG;Gx&;~pj4JFQ zoc$C|yP{*)0}=WKyLm8qyOyayyeTmw0`6m|T015#Q;qg%17l<83(p6M(v&^tmfqzM z{uY@ZhfM$?S4oz0dJ29Kdxi7$bU0GFq z^88kX9*=z}q9Fzl9Y!G}IHULjVm8Fln!~21QYme0KRSPghfyE@S-gI$p}kZhKwnzC z8ldbnmi3tg3d5?oG?oO~#*>@!5tuc%-j->r5A%3~LW+OvdDt_69%sIXdrMh3ToK>2-FutC zAEL$rj20kJER?oZWs@7w6SGf68o_=oJ zk`Sm_k#JvF|KRPKrkdth^Q2kP0IeuB`P!fEZivlMioE+zR3~1?{qmbZ7vn{LS+=1M zRyITBHnJO2kjsWuwX63A447qG7#6VimNeUhCY5&#a$5RtC;dO&CpfrI1mr3GYmXJ# z4n{(nV)*?T85puaYrf0-mqou_Ro*TEDzshOy%m$({+97zpH}m*<9s_>GdS$ObUk>x zNI+#QLBowt07{+n7nedetxwm2rFnQNuYk}j9>HI8Te))gs#N@O;`UYrhsFT(R!Oe|w>4wP6R~KbUGSfXrG(xs9G!i{PK%9`4fW)y5RoPuBg&NT(_nkF$lwu!7@M zZ2MD&$DdU_;^g{wgtS^c)n}hy8U15OVZgX&D>%SI>+W$u%e!5uyD9aXn^(;ruGCs6 zvr_XEQB6>Ya*^MxUHxMrO{9NIM3t$g6VQ_^ELM|Eg`s0Sg%cX>|K44QW-?Mtc~b~lLo ze62Qa^0ONFIYqHMj@cE}U6B}g+TDj^3H2|c3usQbgQ?>hyO3BMJ9&T!P#tdL&c&ny z>X30jR{}%{G>afGa#7%Szg>&2aw>EYG8T~Mc0Zp+Z>*lfic8eZ;om;i*vf!BW%a#Ik~85Ek3kEnOg)>SRMrWV7%Jv+z!1Ap>U zUE*lmmPew1$uumaAJZwM_q%O=dS$aR@$9|Krjb&Yv#3l@D9F;qT;2s+*d>*eh4^ES z&I10&T7TV=&WC3C5;UmlVjlrYB-Wi^Yj_g4A)b>qZ73&(=3RRa&RyRB+qd^+o~;#t>D|n?r`st%0G)2tcVCcQ z=5J_?`nQY$%^bQ)T!4h%+{J>TfLO~$qOw4@({gUE%H}27YJxWo5Xr-O6$VoF?+@e8O)IcA-Bi3+)K$y5ldY6OG|kgi9PLx%twkd78lB($3r#JzJ=Q&9M0ho3xi{k+7ci29%vVQJYm4gyn(AE1w9;?cT(&gw|Yt&sbZmuVT6_Ff$}TVt#$(`I6# zzClGbo8JZ@ge65rlm>Tq-^#i5x%W+%tW_W{FCCay@Obx+%s>3O?#P^z@n+Ln)5>Ux zy2YjIfOm`VmTY$yadyxcaC=H;-^HvCK9+i~lHz~&!~adVV(#gHguUk#ZMPj(hdZAm zImB&K+XRear`o9@8pKB>5+e2dT}mAd4Xfhc=cQhhKe%u4xKTyK{YsV4ydykpY>^t@Y5wZb z))epZKv`P8475ZL&2v=BTedjD@sG?$WS^hyNoXhJ3|?ocFgX#x;qr;~Z>?>wQtH$x z?eihyj~wV%K0-b^RN9VR2Bm?ShmRR8*V9Oc4Ns34%j6J`G4(zluT{zqEIsHX*;_9r zwGC0A`_6iw_4DVZn>V&5j-f_LbA>+bO{=iZu$^5HIr#a7$S3Pie^|3q! z(J+~Mq{E0MzwhnT{D#=x5$>zo*+o$6^Y~yh!?Qr%KKnnOD;>8-zSfYE^3J`LDSAUq zLojUSTGBrY;?Dmf&rz4SFK2OW7ez%zDn{W=T7BTcNc)!F4wWhq561HLCwBVJ#>Fpx zXZyQH_F(78u;WLzHwDbLMC=C3LUMm0*F-K`wnb@iegaaJK}BTk=Cn z)LVvk?2>$wvV$_tAKLa6u(PV^#>U1SVN(-!R6o-{eeTA+OCFi044)kOGk&k~Wc1<{ zGtok$(np6U+0|UtGDCDOsb1*1!q zJ^uS>=iaf;9clIrH5gP<+=7popSN3khG(E2m&s6%}Aqmpu>)X6bLsc;)xW%+`qf#rA8HO9rLMMHM%c6ACV^gk& ztUkBayPkf=^}J4!^|=E2j!cKTxMAlU)Aao=Q}69u<#JlL+fE3f_r0KH;7rv_lKuuJ z$EKf{1JhvY)>`=YC+Cb33Ai2*&BeWghg zEOOk@W6Ja|=PJjSr!k%jeHS$L&@vkr*z#(35zVYrA8yPeZo~b2EhN0Ki`Ru}R{~fB z1(H+zwn7Xm?UzK|KRsg>b(*@yXDjQpefW>>(GREroyPU~{^FFLPhbq$s}AXf3w#&2 ze`?6o3%q+yO43b$o~Mi_n;xhd4NtOZp!>~X^&N$J>QBF)@BHsn>J4$K%rH9p{plAK zpCZ4fM0Z2r#D*tmgEJ7GQ~~w*u{Y%je@y;yRy;_z7T=r*X&Fjt%&S>MO{T>jNscN_ zsIWt-5n@?kDU*zg!QU&T461Sst|s2+=S+RTeNp`-ZEj*E2Er4=)*2#{?D|KNd&gW+ z!&Q+~OJkk;p3Z5a_!RPXbNMisl`BVFFe5vCrlC>HAb%l5YO0um|Yu;r`o7uu6TTTjE;&3$PiV*2*8ZCHg)OkmB!qg=_eU>1Y<78vKaUNrduk*_bLaB)f|e6D8v ze$`siw%T+Q?K|TtgF(;*K!BI{e^l_kIrePaX`3vCU6~jw^;o?haCs|?OJ+S|&xpHY z%F7ooocRQ^w5{O9)3o9z0#LMKUr+ropV~uA1050D?F2n`xbuWBV*AE0?8pP}RYB{7 z1H(ml?Ee1kk`iZ^3h8p2fx*v0|H~P&^P}+Pf$n)^>J=qif&=6Kjvb=J?$+wUpZe+`G^)g!438CWs8<1CJb_rrO(Rv3e zC%3o3#J5av!+K6EY-PTmly+OZ#=@uBn_(|AhDDE5>EtUaY|W-eMnye@IBjbSKvx=Q zb04+GYZ||zrCw0w_I%P5%7Ie5r3wQcSyvryObwOjtf?)`{5fz10Nh>pz?^JTwN5!) zA`2EXzO^TWdxP+LHe7b?TJ=Iv%`6y89v=IQRvEic3?IAciR&AFt9)>@C_+>EYfbK_ zCouv>Dgs6oy-N)l)asgNZ(tY-Afw631njcK)9M?MiIE188n3JAnf+tT{NjpWTh{uA zC$`|>@>|zf9=zU*$}CJ?I-e1_zlDQYc4*S<2BS@Kd()EyFbXml@F;w)FB_15@P)~* z=mwp7;Nen>&a93knP82{(PtrAyeYw@B@Si>E1?q=2bg6!rAst^*yYp$-ghz*25q^KzTaErT^E$VZ8M zFK5|9f%=Aogph&M`{s{@&YTHCG~k1&)n_{H>9)A?$8=*~mdl=vy@s&n)GQB^eDmhK zekMI{VB>%sos{F5mmCtv(P`Uo0p+~h;E)7Ci$HXHAEG%3C85PtVPpkd7cVT_1)qeV0 z+Q(40?%)5O6+xSWMOrn&t=zcb1hrRxhtPhSE)Xu(rQ8u3~arg+lQwA+--IeMu z&wY?hFawW4^^8um z0?B?c?#r@>%FAO)U@ZE&fMEP2+DP$Sn9tT+cCJ>6{0VP|mUG#&D2X20;R8bzc4}rm ztIFd~a>2CsXZCd)dNOLsB!{aGBb9*G&{j2s=jjA2h;X8)8BC5X>K*A{qdv*&&(yH* z@oUQ;g5ZBCxIQS1#guG<$&Hp9Lx)|KQfWmOWP(D*t~wrJG4ua}h2j7wMNX3cT)Gn{f^dY?%wL}N19z1 zk*(7Kb{*=@y~p-7Ng4UrUeLW>e7O-a|Mk;^q1Q#``N@kS?<|eE&Ov>ck16=c9zJ%l zzPu1PtfKZKlGcd4DE={Oy&~i5F(z(tXHEvz9=;C&LsZ)y8P+oW8iPuK%4^$g zycFr+z5C$M^QFoWrx!2YN!avVpnBA#Sl_sM;FhBJ-1F|wj>1zUFlRO~^_FpO|C#-L z?LxsZ=a4XSKAh13YFFv^Ya#W^qcuseeROopwNuHo-=8Zw3SA)%&jU~k!RDxv7cu4*Wt>A z&6_OWdR|cIIo?QQ!`(AKy$X9Y6wL#oA-s~t+CQTDNEPaL^gYRP-nmYhw@nZ5--Pe` zF83}6Nu&0MThNN#(1VW^{8t?U!+yk^|1lNt7j8+U5A_W$X3poWowcG7S?zu)-~TW_ zCv`fRVNvFq6)Q`wUtm+2$G2NUo#&D2jWbZ%*w#XWI+@qsrI!4O&#~MCYWy%lz<2k; zOmp_zZ}Y6KmzitPw2WOTbF&dI94!ggoOh5qosn_Vmd2DSCaS)t`}{2|maA&tQ}sQx z?M9eRrAz$^%3Pgz36t*Jz?=ZoWMc4^_>p|OeGME>uDv2O9`6Vypy6iu9+#H}#m_e5 z<`FM|Em!6u0uR-isx+x-Q;ELf>%=o!#}B~FoC%o>=-U6ehhp-LlMH2whE_hV&YBm)(0E<1($R>yNze!9HbphFA+ zWAS1(IZh8HPIw~nbbB914TPEiBNuOA|FMEFU>^Pr%t=o=dp3{TeZ*1o!v{G@Mi5x! zCOBf+C3=AL7-wj#G323#lCBI6)>J{Sr`=%Hc~K^C>&{oDJ%DB{_OAM~@%USUWIAZ^#?k-PhPn3!fgl*op=y|QmeI35N_JXBCj`vyMheKDYhASbJ z5~|aR+3>;Lm%ihQ9^AOD z*&Xrq(1DQ;nSOlfBYQFS)+NAd(KC;vo+|noQvT=k(Lc+~X&(Pus??odzowC~)lqJ? ztq(4_F^E~mss!sk&#V^9>x$PPoU?#3Fs@|YRJY}f@G!S*?(>bTvtiUV_gm=W0y#y3 z($;D#O=Rh+d!Dgy^w-f;*XD0oT(mj&t(99suBTzy%o?eu9-Mi@lObs;EqOvP=nn%& zS$V-|sPLyMg9EZb3(qDl9%<&+HngTs>UhAHbNzB=w_fb|;7o2xU|o=dwgs{h{GN#O#y27RB&jx4swh1>gMQWsB z4<9@k02z4Kplpop&5bxSK?BGE2+Qe|I6i?-Gx3_7*Sf>Ul|B0sJ_S#@Ogv?)?vAZD z)oZT|A)F5FvQDw!@m+u2^sTJ!tT8%uE3CkzHlTs~3^Mzs^$n++qG|OD17%hbu%iwg zP0h_ZNz$&fK=fRbXz_);>jGf3Vt2oxHu63W0+#!&pMLA@`uh5|-qKX#6NdGq*s}rG z`PvgCvMB+Suj?ONZ!PA!GX)Xfw@oE@Bgw1HJk)GN8ogV4?p5R*`ZR*{BWM&;KlWAw zRft@#jrr;=pqCe%8_Q^jhqe-~9nL@axb}sU_L(zFqP7Doua=oVaNj(0rapB9P_fN9 zOK?6?)}U6cd3>%&sEiRMb~`s+Y=qV^d17Rs!j1!xb2Ct}nli?|Oj$XSyOZ5OY7!{v zL&o3s(M%jm^jZ754t7~)`ny*J`qZO+XM&HHh%bjIkT%l?kCc0##g)gHdW95K&*$aL ze-i#Q#jjrbG4DuwyqI&*VTh8X-mLR0`4cCUaK4hrgOgI-YSg$fHb)I1gLjGw$TWp> z1#TX~&;EQ#peiz3_nb{wmF@fBIC*8BgGqbEXN(mKL&mBZ=1Ck$2|HzltDwtsr^*7PXm=Iui?l5>u-aA-HaI z6dbqiSe&z7HWKNjg=pww#*)(HlSQuB8NyYBL#_+oG!XXJ7EV)L9gicio~2_xify3f zIjc8(+lPINWbjl^_>NR(w-;`?FxDpvI@uK3GbeKC(vb-Kk-B@svBpdE=)MHTPnaVHSO&unHNT53Q_=hXS|10zK~DZbb^Yb30WrTMA@ z(utBw<2!uu;m3H0G|SfrD&sjI+t%Vzf-G4yXNT3DbR|X{uG}Tgs@C}) zUgq*VzMKjC;oAG%#gM`JXMCk+&v2o1BW+|N9L3J4=V1ldnwQ4B(YmtlKSwd{J;PJ8 z_VSqU$v;Cw=-jt8*o{@O@5NnD%Fm9dR7pZl3d;t98Qx7ePK-?FV#U=LHh(Uysx{_Y zf4Ddu^3dqx@RggSaU z1>E0QzveFV-NnOH#I^o&>HcMnSz6D_$u=An>0AZLMIPOE2Bay`b-!nwGpgg=oM-t_ zKDn(6$<#+;xrd9YX5wE}#1SQz%6fWh&@hfzGx48SNQ-&}_7o>r3yp(_&5RcKoLuXy z$!*x%8M-+k_ocS@`yHCjDcQYh6TV)}VO;F0ch8;q`-zhhrut(He#d$m1{j(=F7^cq z^N6SfIr54KK4=Pgnb1FWTaoIF<&jw5EyU$5`)`*5Ub)*o4UCLr>TDTjm26`+?&%n< zIU9zk$CbR{e_wKc`Z;U){GZuf{~u>>9aYu(w);yr(jd7+SRhDuw;(J^x&@>gqy?lq zBm^X+TS~f1Is`#dI;6Yn_prZv@A01Vd&fENIR7|?4u%XCbI#|!uj_MNb8F!mdnc1h zBt?GP=Q^xNJtET|Mf2R#N6=)?h;aV`Q9V>PbN$|xeZtP+#dmvOG;LhLD799?NvD|Y zh+G7e^G#MOpN(G>wFW(tc5|`Vc5_WrYVDaWeD5#pjqqMe)=5-5o%|W~wXmU8D9{EX z+bxJ2EF2}@z+mskOvJPZ(UVxa3{xic-IkCV&StFA{(!f! z;X>blq!$16&bPGkFFopgrhZlG5y9v!Y%EZGFV#+Y?ShW{oSY=cgUVX!<-kn<-T+!V z?-)Pm%QB-r9#s0{&gS*I|jpSxqK<&>9IS{93Kz9+rU zqIoYf#zc&}F)6yapZ}a2IBc*H zOt<&W-h;Y?{P9vffmZ7l2Ut}e_zC)}vCwbY2FdniPh3CHx&Po85mZ@3UpjMM(qM|HW|T8+te2M&sqj z-`IR4!5Ox=6xCWdt3x^7S25!LRp0kU|I}0^&%wd_WNU(P!HZ)2E|Xk;symL{yzK*= z0#_aP-R-`QfZYXW zJtkfD-kRjZ4hM^HP|uC-%t5z#{io{66Qs^up@Q7p>W`VbA(YOQPc%@_4<9pf{{&S+ zJ30sPShH0qY8@&o?eVEaN0-)di+&-CpfC(t z@wx6fvtkwmZypdA%#K&!x1&(fyBY7hlKkn@dO*c4a2;~NYRl))jo?@6g`K$z<`9mQ z@Cg$&PxwBp?xJE`TqcZ$b`nV*kyI+3n!UwPdX&O-!-;o3uYa5pzI6BQGQc?#9hO*t zcZ8tkh`9i{6sOoiAjJlc`|49b0Aj}~j!0*HZ`h#kE!kBH#L=VfZphZ3@BIE^<#8)C zpvxC*ESX;dpTScx;HH|Hjv^83ik2Yn6_Jtv#x6|#tF{zGWfpmvpK)zQIe4|l`DEe# zZn8GQz6btE2d?|53l8iy%B0RwYbZ$*YpPZ#1XG$CsbV6W_~?VoOqqd*TeUk{YtmHz zy0OteT>!@Tn)3X0;1+td!Soovgw>Z0uhSW&qvVUP+d%&ir|X{XQDp5zXcFZppF zZ!i_F|FkM=CK|OZC#@9@!lfO4_AwDPUHT|yh*7;H1L)4dU7laI(fv?c99CZbW|=xC zK0yI5SD+rb6ck9J>lF_h+pnj7 zRDv0rU#}lkwK~08fcSAdCkr^3^A$cj6#cax=`%{E0v)RvBBTFE*pj?AbaUW8cm%cd zyh3_6v$?C}r;do?E6B#2XpUX!P9Dryj^Xpdsc9uCf^xt+NAA^EN$D^4rXkdIPjoqL z4b$PLL~z<8V=aTRDNt#^N+6^jSFE|GhrcYU|Ls*jB%}HM?z%<(Tj-(-JWfI;HntHv zW)4Khmz0J=Lhzhn`Lm-a!Gm-rAA=sdx=KKrxluD~n~g&0*7hpR01tOy>*r#-KN=0A zF@)Q%%Bb4Trkd zTKw!r>s+A?_vhpTre&fL9de+UU;sM{=JhpLjQ@zaP$){I7^w@34v*@6Ubi6Zh|ipnkJ9ZG7)d;n7MFOYN`ikMnlP7?plWv^xJ(~w+*ynicY9oDQ zDaNM@?|#Z_yOW+S339u^y#R|Gtk)w!j+_9P|BT0yxM$F`R|x3%ySU&R_Ffi z7Aal={766sthhpQTK196o_kL-j8R?4ZrD2 zm3lPbv^x+jHurObdv(qLH^|Cc9I;jzzx98tSD-xT6+Sn~8W!j2&9KzBj@^MN1Lz&1(mYyXe~tbM$I(fNa#+v6YCPV;$}Uxspts$V~MniP|j3CkVQ zLfKrO4N}exOMooNXT76eFZ%8i$bGpmAQ8p&ONj6HK%RFvA8no_yl(wv-43QHfLc^Ww zM%>Egu3#yc>j+t>u}M*%=Qhb^Qc8!(%Cal_?g8Zy?b7#oh&zO7W~-7O!*W{59;;Hr zz$?MJLt~-EYcrHC-9C2qEg#OA@YSDZW+{hmY6_XEnBSKn2!<4rWk?uAysxZ}>O;F$ z;ZhN}Us248<{&5&p?_2IpcH;z*S9Q3@5)V7(W4HJ+;=3gfShNQ$XPaNDixwEbo z!(4x0R0-wip429_Ax@HnWSt?B<3}G8ZIsz4&7U=j_=RAqd7QXwMz(WaU93S}vC?tr(6W@i+U&h6D0a^E#-kI9=g;+LS3Cms&I5&b@#Ohqcn997x-T_0 zvqWM&tcKVD&Sk#zQJ(nmBR0opkLe$Ky%9^I!hH`WX^*VXg>R2l2gKgi0vy!Y!!L+J zgGAE4B+m%-vcT$ieFz(@&174PZo(baHj2yTKSK=uo(*rFeqa&Xo-{k?p~xa_a!kE@ z*|lK0W5>0IMJRE>eQopqrU~C<_87cT6>Bo3c8eJ6{|K?N5@(A(@I8oZFD6k?@x5h( z7<`+lkY*&`HorZ*cljY^;_=xSCX8)4zs~g6e8E_MNhr5}c=Oc~dWs+OI2qD>! zMnt(RHS7$67MEdAtMW?OTT74{Lq7PelDviR`>*1^g)B&CA#Nr53?{L?0~-@>%3i}Z zvpkIxHN2pb5|XeO@)$|4iZ_j5oEaGWKJ!|Y`yH`_q*+*4K+&Yc5v7e{u5FCo{$LAa z;Fbwg3aqv{TJ1x`E148^37lb#Z)-+&Ed0tW;&Ec0s9nM1r5v1@is`IG5@J_VWUgF; zbQ^nVL49Eg_4_tqNbP393S)K$rg>T(vwo9i@e2XmPbeFYN}n1l?NleU$v07<3B1awG0R7OC)=C98KE3l)OdnoE zW=V;6q)aSrrr_W+dj{qLnAGqw9e|*=+kx6;9rLvrdCe`xGE(gFNzOdv`eG#-4d2+h zQxfr7cyV%Gn#xO+5oPw0`p^@t6$!|63-`cjwLEGu@GO@_BIt;`lI8={@Ih~G`4MTQGjA+Q~LBpWc_4HmB zVzDQ$=m>MIOzXqQJ?C>;Ps&LD@xcu3H2gr+ zbTB;bKG<1RLuUcU0~GVk@`nmQ0Q^Z1hH3tNAi#-1SP%z;%Sa~zQL#%fmu44D{~xaB zK{w6sRWw6R$dl8kG*5^<4S5~9xjfggQruS57kz0^SqYPbc=!0^JAa`s!VZw@Sv7J~ z&=#yArafIXIA&(!csEKAt9Q<7j0$19kY6l~)PQ>?Pkb_+?k-QC&tb98yRg)R;N< z$q#H|eL$vJ$x5SbZ<39JtovqcaIxT$BRWcyoaR+`y5qKsWmNU8LgXbJIC`6C#9H4$ zl&G8r*}PNWS3doRnRsVoP;E&FkeCs|4dO>=KcjggtAeS#>!8}fR*#vZF)rSJLz@OJ z2p?$qEiB!lruDnV{YmA%iYvcds@KRZ_S6l4W}i=NRsD9uM74uw%D%X(4cv~+y+0m0 zIp}vP&+O3PHStLz$38eRifjsw5g?#gIcQ4%J>@4_cYU)jswFyjNAnw^u)a zcmVG!D6(#2EQa6SnGk$t1h44f(eU$~NcAnP+$W`C^{=U%V2pW2uY-~-HalcaIoQym*heg>*?J^e*R_22+lpB zo&9f2-~X1;rPo4DIxO#?_Vd5c%y2pPIxjSQCSBBcVpm7XZJ9(W>{xIYNy4T3`@T(+ zYj1O793Nl*wW2~&@1bk)yw_Ec)3A^Rm+hEpZeCtt(i90HzJ}O@a7nD?Y)J$E?H|&u zy6#N|HerSDbkL%l)wkbsjGZS4e~%vs*lCOibLq=_IcIs?Rws^^6yBs9QQt$0rd14- z>_*&_n|Y3DPwz^ICz@*Mo~u7o13^=D*tjxmMy2G-#wpma%1Uf0bBFQR0zmCLH&sKmK`XQyxXl0HSua*R@OB_qCRjF}czk z7`w9#4_sqp$p@<}uixM`VvYS(v&*NhJ5!?xpVVX}`rp#Jd=Jpp;3sFa(?WeVUltJ~ z9!_s`d9O4h(_cFNu?@?+H9+UV1X*|W}s^=>k|6R1Prt_yr|7OVQLYc=lm$%KYM;MdO@5T&|vJ*OcqOR&^Gwb0_z zS(=);XdCHTi>V5s@4eQp>iI}`qPP2&y3USFKBqQ%rrgkf`kf3jYHVvlKfG@8v3hJ8+2>+-|U{-M>EaN4ek<{J<0)@~K> znGLk&Ok*#+0wnbF_l)fvMDx-1A{%IV^}Ot#jQ&?=^8fTVS`0WL%I+23 z|0*9x+`hp?75P^`Ca9yn@H(HDn*CP8h7yp_u%gwHs#a&L1(&LsM&_dQUCGf>tw^;H z3f{vTYC4=Gqr+`h9=p!mKm9Gb@EM9z@<^&6Engts$};`)JIJn3hPjRlwq1~A8PUzU z2w{Oob150hKnTQiJEs?!P4+R%;=Z}cYDl5R@4XnOQzbntl~fh z=6KEU0sGP zZKY=W1Jx~Ad`sqA46Y@6qVZO#*&qONq$ja%tDc_C+Tr6+D0IT?S*+*1>~>$MgFg{?dC)v(Dhh`CuB~qDd2Q{k1g!6;gltCaVbT zs^KHOU-T1>mu0GN&_!G4{u-~|jXoAzn3fd+MwM z$?O~QzQgD|!Jn{3U1-OaXVA2%?zMiX=L|rGGc@SgW+O3j?|}XKIiux%g}>tKgD6hO z-tk2BdrveK6?5n}tgvfd{W=X&GS)YbgwCdnp>8tFdjf*E-=8BmUp{XOZY;}WQrbMz z{^7+mv(k&-hPt)gee@c!6_^FiB4%S{2CeD)WYw^kA3Q59sfkP4^78V|v1`(}sRqcp zBhpHiW=1V4P{F`V)F@%}Tjq4L=3fQM&-^R}q6)tz?aR{{{>G&$tq&`g`*+S&OZ^ZBAU^rnciY1CjM6)D2%Dm0uWP@weJd){wLm9mu4u2`uEf&yrGVNqAt|-8PdR{O zQiZ`%jfQKC;CZf%Rdsb!Wo2Z3^rt<4{+xlEn<(3`soC#Xq9J~qAVFV9FOuA)@iY4- z64z~YsAabmOVK2qqN-gb^@X)MijUQnB-edhJ?%?e@~zJsX|Cz>R68m(tq>E9GHN&d zuzEqw0wxwIdzvE#ISw`^o^K-y35*&=z!x7n^*o)>(7Pszq`t_G7T)T|h{%BG;RHE0 zp|8W%9mCdrRoB#p0+wkBd~UsAc&;=3IgT2ngL4NQ(1Ic`U@_$I^ia8RR46j(HK>{$ zGJHlbm~A3?uwKqgcMYn{KNe%F5aTaMj1f5_#=FUT-l)C|uN2~4e`$gx`)-@Z_E(tY z^*I%Nzv*ftbNZjfH)ycr_xWnh%}81MY)PF`A_Q;sb7y6)Kl$w}WZ85&jReAK4mgVLu!w{$(zjjD1v&L`?|+jQqxVZWN^YWbG06PT zYBS3nr~B`IAo}dXITJl(@m7wY-ANIBu66f|vNx+NP8KRN@xNJ#{~Oqni{aa$ju`Bx zmMR6`-3;m$Gf$~Vy<(Uwv_CWr*Fn=B0I;D zSJO(0)L*F|qE&pp!7}0rnLj){44X(+19K-AZmJrhbE-UiFx6&UEM5|3dJcc8Q=Z4> zcQnhffsw-JWhCOfGE4XqFsMJv)!jCp7~&UJLq8T}z9?HWZHaXv4SV*8m99*sfcs}ngoWqcuI^$MNrgsXE z*nbkq%S<)88YuJ7E}Xif1%rJXG1Lp8&`<4;vybH$9-v2Hl(1;yaigP8wpMI;j^FB3 z+0ZLcUyK*3q&dxW^OA}%+6nYLdri&D7$)FuKe&6vZ9DhJ8skV(9C3fPwyapUaX7;f zcb|#uoM4vKu-%rWZ2pa1R6iBg(j?RxMIJE(JXMt%=OFOhjLpHi!MuCxl#d4aowrsy z0u#Q;6EaTAJ8L;nXKZF2-b^RIxzT2y=jBt-n;uXu6Ux`*J|#+2r^9I8N$-yRyetT5&F zko7RCw#27V-^~ShI34+_VQiv59sbgvaF!O-SNzsftPQ~bOmhPPpZqj?{d7f<+xVom zbsA%8-Zc9TG=-^=l_7slKzUND_n*pn%Qyh+j<&+GUZoqbRw0DHHgC@t^06uSc@Kk^ z4V~W_9T~TTwblDMA-e!w`zK)TQXN0!>M_Tm6oN^1*hOUIa^s8Qe$#Q(w@@@U)o-Tq zbh`1BsnQyH&IzS>8}|>^NBl(ac%XOBjoXj3n%&+!FVAxo4-XSrSs3ao1H5ONPZhq@ z!uDLa9#8VeVd^=cmiuw(fk431Ln!oBr#>=PU-D7E@v~(v9B`zIii3BpH+&0C__7IJ_-5bn(ar4?LsnLH?3I>K zvK!$C!EY3vI~59)Ec!4%S|~;^lkwl#BRYA|Cl#tzl*TGp_ju9!%}%medggqyXUdz5 zZ6Ug(sr?3nG>7gdUHCz%HzKzu_+T*WN0VL=n+LCX5YT#{l?9W#(Pq+)p#AhD6(#}8!gYYxnWzyD%?fvX9 z0Mp2*1^>*$jSF}3v?FX$wOE~r?n&_+{$BM`ghuPrRF;~G25x|ijf8gaIRR*!&rsyC zBGI#S%{A*~G=q}?mR4Rvx26%e5>$o4!opHcmq#N8XU+jmGqdUf&4^LotzGXCCa<%!(CKyo; zhr?KPj|cjPM;lf0T_R)abjG0n8E^i(@aX@#UH3D=Z~rp;<5iaQDSjXq`O9Y?lE)m( zcu9wBE2M2rhWPt+g7&Hz$Xp{j$O}0h=k*G|+vM8Ljk$W@{E>@&Yv+f<7F~;UG;y0o zK9H$(c1Nn}z9@F5>niT*a#KGWRyz;s;PHk*G++_RFhuaD3d~v^tP`V_OH;!F>lfA3 zZn9a-8{;Z7MH|ov6~!n|(Pq{qw9}(5v@I@m)Gw!3G=Vj2AC=cuSLt8mhMPENmUQ`> zO69#ATs8XfJB=#N3jQ$Iqb99}m!O@PNqf~2VdnUFLNSNb_EZn6IFPc3+6ZiLmha8gfAg*E)rB56%;5BK?WyeoRawRp;E{hAh)*SrfY|Q1c6??m zSl?Ss!SLs@+C2VCri)E!aF#k^&z%=A$rNt$D7^Qii1l(@_UF?KWT^YkK`JG<3aRI7 zc|_=L>D8}Ny=_&Kg-QDe>WZd4tI<^Ag0c+4_Gi;5t(=$2h#P1D%+|iulTBfZHPnqf zOdWrEcAerv7rp!7SM2SO^3J$|kKDxN-%w8fJt-))JS?OHIN%Hbwqnox`)u}7dcVXn zEAcb+|LE@jw_smCC4AslRBWV7crNuFP+sv~`*g?o>z3!rqlv`OMV@)b;)cy%Pq)^e zx)zARkoO&ISe!M#YQt8cgj7@mz}M7vla3z%3fv}fXT#?N2{xn zgv7+fpSf)df)<>^x8`@@C-#FF$1j3=3KI3*WW5eAd(dqVa8n7p!vj&H!~}6%urh-Z zNIQ-8X{Crkm;}puyx`-{b@M&}0I%QnPL|jfN2_UOU>iOwR%0@mOdz9&N5UV0hNHaj2Rij_inC-OF{&9WXb?{jcd~UH9fTEm2ShzuVLa+fZNB z>>gIz8X~+0&k&L84S*Uj{;eIhuU>zB|Aq)HoeiQ7i;&<4aXjP8#b) zGyGfhBA@U)tf|5dsGt82lDE9kHkgY&h&frDJt?}$~DVRzfAd4!zlUmJsX|D6` z=mGBPNt0f$IY}f_ehEB5-xvg0+-xT3^2xk6{HfvVbhdrTAiugYDJT65iri$>`BopF z$@=e%cqHZ&i4&k`jfh2m$NKn~oB?+(z8<6`HJaUjKZ6PXliuhThId=&2xe*W0co;x zx0U3+J2c#;9Rrlg;Q!;?@NaNmsRoxmLGob1TPv%yDeGH9ra`MN^+kq`79n|SdkLzr zs&oA+dqOY60-;fFE+Te<9ZsXzqhLHYBkJDY4xj1MQA{RnTC_ z(sx(V<7$&9e~M;qukXi`^Qp2M-tmWb5*GqRR|3C1vm~@#DkhvY$rO1qY?%|3sZ@s} z{sfDB`GhwBwMtLEOCg@)F^bI{#a{Zk(7dL;s5BYSrEwo0ZSnH_I!~!uV;~svQVAzI z{=NHe=gE3jOUk8K){ojGD-B&QI(B;#|E$}P3+E2|B}gW)7XrI>>4_vst9$( z6qwP1TH2s~W+rQRco{GaQcRk+x;*Q_R$EF;qPASr!Oe$7dS6lQ%E$ zs9BGiFV|DB^V-gQ?!lYQ92CeyCf7;YP8r?^pkef_vWzo6s}fPwLbc20piI z9Yh!2=f7Q4AoY7C$>{=v^wsbtthb9Q!DMihWx99iMEw=1)Q^h zKx#VvxtTDMKib29r+gCg=eDCj%vcLEwX4K^7e94liUH zF1dd+h;!LjQ|$U)+3U*=D-lU)AN%*$Pkv%CpcLoy`^I5q5nfmmV2}ify#@AR$d`-jaqKSAl3zX^qh3YTVOT&FIZav8S=$cVUno&xzl@ zUJfL^SIEbGadgDe_~Kh#?zSO|N%WQBbdebd`mKgeGBdFxOSJ0#Z#)Z#u+Ri%02wm4 ztc$w!a6z3tAQljz&rXEQGx)9z=`Q4a}0E<}@Xro64IrSL|Q_&BS8Quzl=j<>N}b?(ROg&^4zV116~ z-TCqJt{1bVl>OZG&>aym?hSa1xz4;`z0 z{sR@P%OO`n&8sehrac75I4}BYo*bi2ENoW4Qt0UNiOk}>8FKtKpd7$4c^ZZf$wqmS zPHgj!#=aG=XT%WnRNR#uZcRqjQwGyIr4z^djAhs8ZD3~!EFcZTy8?` zz4mSX*20S5#BmXZ+CF8P+Lv@a)mDAtV&7&K74YtBSw>&~I-7e@#OP2%c2@@c_T(Q} zjyy8+XlQ5*p%d+Kaj{%a=5X3~n;`j+)WNW&9;!({w8d(*&a2I>f;{JmFFxxLmmN){Q!`@_K+Pu=bSn=ZG#*37r4_?ww^%e1dB#=%OL;FQ z$*VF9qUYivo4pwDC*bJH5xzN)kXPz>M0>XM+jRl!DaW(sL~vOp$z!tbUVvXyu5Va+J+hbvFsrh?+C0Jh z50mZvkrQxx{a8{*_hWvK1J4Hg9OykLw9jdhAthl#o1Ja-EwN%0T@h>o`oG>Trwul^ zL~SI1!Np@YPd=uvo8N7(sMYB570q(8jD?i-jcBhd67OUNFLnp#l~@cPM#sFz_SYEh z6%T_7qdSxB{#q%v3=~9V@IiQJ=>Wc&VlUUU%9jR5^VYnOS4Q<+K770 zNh{G|PSWiWw~eN9oH>Y+?tGa>t_2u)$Qf5k4EekJ@K2%oZudCR*mk&bR^Z0Z@+7MrxH%$0kc5XOw8h(wMX=YO!QOruUdfw~G_Ec%x^O$!gt zuUtQ*2%?yvrPH^7R3!nAs>^s1XCS|PXyG)yKK1A;aCG8_K@o^Uo z#c|mVmF!HY$=QCJ{zq@kmjo{z*x}mE!V9?WrOmEEmc@I z6k2}0ZvLx8OXe09mIu6OykfhcH*kYVZxz34S06D&t2q@K_e6EkyqR=?R;%1zodWCQ zV^WT+N*>I?Mmh~9PFIy*5J?#KAl?6(2^_g{f`}58VrNDCHujv1{ zd{~iiRU^P#pKjok9&b$)<13ap^vlW0W`Reo3fLe(beTEp+c5t??a1OQC?^2Y=?wI} z25f47zbql$&EZLx*nCh&!Qrn6I%J>^}m&-LL4Z*KR&DI^%q8%VxNv} z+F-Ni-^_{o0~USp*NGLfEL?W;y5~}L$Tp2Y{IK}*Bhy8-<%XBoKJu_DF6PFZBg&`F zJF&0n!Ca1M3b_Dx0b0+`ksl-M4)g633Z1L(_<^{vOP~0KjsQ1KS21a zD(%&Jf7ghHXRipP`;LP}N?)gr+2EIsHx4%s%_Q+XvSWW0lk99%41DMF^Gu&i>Xn<+ z_x#EO0tIdp_!@%!ou>n<=35{&+hW#$C;8+}tZ+4uNc(m7RzWrq1w8Y-x@g{#f-F4? zw9a+uF3JqEb^8%5JlUJoYOpE^@+ldJ%wLlZ+Xy~(+qW8}?D?l(Azup=dr+rqt^_nw zlFN~+7`qXP8N4s(4zPSE=y=06Wsr2OjU47Tqe z07m>xu=ryZqIgkUi9t^`H%J;s#DwTk&LMx_fS@n7Z7)@*%6S_*EIc|FZ1$)NmV@V{ z5S91^OWCoHtowO+@=)tWH#4cJrpAsTu-AWmQX^3QALc(+E%I~| z+Vbj@|NIDznzug)Luv4_`w@I!ybca?amrXl4<(s6^4TY>O3n~Bu(jkvcZ@t8@Y_d& zA|f!i08+=|i#qGSQ}t6J8oClXb0TVuAXdz4eQ~ff z91sM)MDWGvSNd7AD}0V5Y#vVy?A>*$ve%*s$w^9MSUIa4!@SQ7hTcD9`4KG%@R-mV zS+8o%XO#fG79$7j!mjeV%4H@+&8y(9@NGOq z1EL+>^$3;Rn3sUWpNa~lgV0{f7qz?0ie5axyPG@TMryLXZp9ESTDu^=BFBRqPuJ1` zirf|UtMtVb@lKo#x1oSJwP+QhQM_FunIz&uqbp0rShIia@6x6lh=Z}C1Q67YWVaf< z0O57zz0}CoAtQmv)5jLN^UzG<7E7`x_Rx>w0<(Qw>DTnvAw`W#o^%TYv)sd#9u%?l zE@N>dDEV?SPp{}4vabM(Cn!Am1B8ftz!meqYL>LupYUf5r6oU{>l>sLoNWR|tA}_5 z1lrL2{?D0J86|26f)RPlO2&S1M(i2-M(LsEVH08#arcc9 zzulyCxDLkFixfAY49%~Lmf?gCT25JlZc$f=$;h%bFLl0mcQL3lt=X3>`-*P#u(me}t-_MSN-=SEUxY1bjMo4l-x+LteAjmN9|j~{x!exk ze5axR{X#vs|86V;Q(fv5&>WI_dA86Fcb;G@jzc-N%c(xQ{=4%VDWySHjL{gtjSl{cb_XkWgVKUGgzUz*yHbbXn-M1!g zdaVYH_PRexzv|QmG4;%c9A5#nO&vz$G_%A+wub$LouDRy$XvO@H;-iKW{C^Fr@WB(b+$YrrBv$PcGSFV2N!(x+P7pbxP%#m-$Nlo^+t?%9cysqQ@lH^vb z)RH#8{or<7fm6q8)}8)i0hQM)rN&Vo_C7t0cJvMfvZav$U`OY<1%{P-4S#?`$eJ&s z^0`rFvzWCACuZEih+9M0cY?jfvcMnaE1OP2r0_XWbh$0yVfi+I)j+&R`N1<=&xO;w zil=pi8Fn_}+ME{&LwxLO2x`W{=q?fGCu4}G7BwL@G(p!h4`X0dzD4kJ{1F)JCH4#U znU#w%IhJ}>@mA7d+m=gH4G%$Aq5z@8E_{Xh5}DHQRW!0N!lN`dG7Q1s!Bjqu<)UOr znVWc^omK&YbGtqHj)#5Z(Z4z=hogRzYj2`)!|ss zEEng`IB-9E-(8x`faJD%B?vuJrh$MnvznG&dMr`U)()6gEC5y0mE+?_{occaf1*Sy zE(&CwjTVTZDnI`k*9uEse#vzx1&qBq^?mF8bt|=+|G9t*z(eT@{8z}|WN~v@PsM20 zSoalOyz=flj#vrTPeiG9-VrE~O<>UW#V=Z`qYV40UZN=`4_-DcZ$6rZ1ia@e-W2@T zJXZwA2%j!MyzhHyR;OhShj+zOcUf6P0(F6Re-=5-=IR`}Bvl`Ho;~RyIX@t zB7hchS^a_;qzESW>8SYzk1BHXX;XQguz=|>3mEBFG23v^Kb`hXnPE}Fd1C&^;|-iq0s#!R|-IM}_LT8kbwtCP%@XWhq7sEG!> zq}4G-g^6A2}gtJ?JvJ19p(+boE4i+?GT>C~^V>HCb(U-EY_xx1s-G ziW)z@@k4w{?fGjQ`crVYNEeC(R3iK>Twa2eh_D?DM7A=ErVw;P9C=Zq6rOFg&)4f# zC;gtS9HDzujY<7BaHH_*o6LHlV=)ek&(N;v<&3k}hq(|aR zh`_g`B<*DKD$+yZ`?xB9lkLzCNxZ)r?L^_8EqXCWt~R%)pva30AM2ZaCK=*jn)htF z#1MZJp>a4a{t{>1w+*992x8h*tn^xcM8i}>HcTgJBvYy)+Pi0JoNTDx=qR3thunMA z$kz^G-nGaRhjJz|IP?H-Kxw`Uy?6i;qzo-!Ub1!$IA>= zy!JypBV?lptKLO9qW7Q{<8cnxHaqzaCNBB7y`ZXEQPRuVYFQhkuhszpXZ}R}CoL~> zkEFjmR4z5V+gzsheU|2~?n~+FViGj@(*8iO(u2ZHtxA`mOF7bmasV(Ps=)MGZc;r-=%Tk_RD> zI7vUz7Tmg>$^rAMlTCh~GX3U10Hla4D0zt1lr|m7((QtE`42cQojaDn&-AaSm{IjK zrox>O19>OEs_HE;o$a6d85EjA|Dn~fM?v?eJYCwmF%jJ0MU2;~!U5E@ z+fN}M2#>+qOjLtdQuz=`ZAK?h@8<*2{nUg%9i}f3$eM58YSH;g+`IwBB)}^=%M=7w zxH&*%eHX|wUt=f8lqKIykQvN(=(BhK`B=MF_$ZzDrGA3QQ~!-X&G}i4+v@aB%-4|` zM)Fxc)wX@c7KQF+3=+>j-5v>e9q&fhdv=Y43pKhvm++)(qz0cVEiN2RHrwVP_xqF{ zo-0VFEWKOp0V$$Ey3d`v#z}^lKWA$@;S-#;t^D~3iHc=oe`G9pM?~qB%MBp#OyDmM zCIuQ;Mp#5p3Vr^}X|Y^e-}ODg1)%*ZMpS;7D|MO_1pA@~#?CGm-yLx;Mtts8N_ssD zlHSBJ%~y1xQ$ENqZZM4OE}_AXS{Eyh{ZmbWpScSahoY@jNhad|up6Ww5mCHp*U=mx zlHUwi(N6etCc6TD7a0JF2L-pLtmrH_7s<)4_F~9AcH6&T`ay@@ptL7ZJWAYJ7gMb) zURLLu%jmq9trXQYLhYp}MlB%1-+^Gd3Rn$LISdXtR_#_u;p?7v);-0FK?SomJpm$+ zr~d%K{-;T2pkuVMCu2PNu~q?Sog`&+KF=-AmcvG5(q~R8$oS^#ZX-YC+o0C9{8#i_92IcX5+R=(q-_BWRbG&(DP3oHq>t6>++jjD}ku`FHpo82od{99t!xPe4p=+K>dyF zEvZI6`!*VVM+X-#-jWz@mlR3JqH~d2)*uC6m6NMN?-D0fy;xoc!W!UTGtDynlt`(l z%205yE@$gZA)~8OnKdFgc%ctX*6`A8>Q&$6Hi5H<(G=xJ|6|r?4ZC0;I_Lpgp&gLR z>hA~Wx`fSajkAl-Ly-cASp@HH92}yf?}C^^ahx4+K^~Y>e#66uS-73r{tnz^0{g4j zQve+ZEoT=8;!KCSA8pa~>GnoWdv%o9K`igbZi}3Mn2EohQKHOVNU#5Dt2IVlKo)<*4 zJLrGTYJ5_o=IzD?jT4ky0T4CcxwnjO*H>46@NJ~7l>~5sV808t9~8fJN?!H9PXz z_R!`%O!9n>4s}sNI_mVLioWcE3kBqT1SK0yPQ(%U$L}LA8{J;KfabxM$!tO|r|U|z zo1@mfu1-ai3H!fL*Wk3@^W7|iUBWj@3_?oHo6g0$SMy-EuFTZhDL%CAc?bI!q$Scs zB`AW4=TH9%)?m6&3TSuQ3EP{kWqH_SO6!{7H61qmFqk3ZPrk0@x0rA9j-RZ3M8zIt zP%kOaXlZwHQjVlBB)|HkW9xvemB_5exzQ6!j~tED9u9XxUsSb#79&3|koBnSL+mmc zwYo)sT2cJVzE`2Xw2CR0l)H2F1JnAHp+CeCU9npqKKqPExb)SM_HRCjJwN7nYd@#E zc;29NaJf}nX5JV3#k}Ve*2#5|bCscSP!Ky4lYH^4(R8M96yHmhOLS_2ka@SH(}oM2 zJzC#v$g%HSU4#AQ!x=8+_vq7Oj)8DP2r>c-?P!z#_MhUjqtyfo0hi6_!)i-)7Kfqk z)z4t+)@VEsd#JIv%ErHvFiv{M;jk=8%3+GR{}UTuK2g^()aiYy%wN}(FZBViKgK}R zJB?ionTpK_5%lT}RX5R;TEsL{fuKr^$qwt`N`NCeI`p3|0DJ}e{{QXqgyoAx#&gW` z^g9Cf^Z0}F0B;H8o|gD?JMQ(gadM6S{R)#jxG~ls;kyM->s-EZGj$#GqDHWzcpTaM zI2)tf-(?&CnhP~M_$MquRWSVU0_{fx-NZ%Q`WpE71slOKZ!U{6dkp+RhWr?Ww;`M#cG%t4JP`jw9NWrT8=Z5g+#>YyJAy zoAB5IFVgs3%r}@zB7y@@^}8RJ@1#SN;BiBOQ0Ma%sXTz=t>@`$77LW_qp;3;h9b@1 zlf|EJ=A1@w)se8wG!0LHkLyYssy>G%yC_ZJtU9$}4$J$7l(U0gvDN=)G<&xIZ-b>w z(%uOI^a$aGl_&vNxsqN?n;0?=6jbyD)QLUJPB`pR#I4B-eDlk$k2Up+F}$vCjkFtf zc=8?6hDB1wiq#!E*4m!|r_Mg!gbK+wOZs7uf{POpMQVML!Cv~Lz|SgVsW-U%RdhMq z`?>CsL8B_P#eHb# zYY0@j99kk@7s{@M#m6q|Yjan=#^9L%8Q8C8hu0H+kL4k$MtQAYmfS=ue#=46@dvMp=suI*tSm8Wt2*hO92jP__@*pV?I>NaXqW;PU~lkXJTrahn2iy08->ur!TDA zXigngy7?@uAJ2z^NSCE_Xe$Zs0bYBiqnV<4&tYd5x zV6E*pJCl#7d(v(lo*YnliSLcTXRGo7wB=f&LDkjb!BQs{`3b-gf41J8vVd}LA4F?M zS56xHZxPqAxdRqpMf$N!JK&7uj@x9z?#7EGdjIRw@2O3@rZPIZH;-q(crj4uUzfL~ z{jRs-`Fdf@MSCPvY_a08X3Opm+CLSs{$hy&14MIx5ZWkqbVfSf4ys+;=w=kBufe2{ z_k_cg*WyM{n%qIMx2_#vnc1XP&7fz%0)&^IeqZ5p1asa)Kh98oo4_ydWX*Zi0t$FP zD=dKEnFqKn%Feph@Uy!G>u4WOq-ww{=T|H&G%doQeay>OH97giE(&( z#-gIBxn!%e<(0%O!bUuovhsrdLf(awDr~>IH>3vy-e#&vS5oVDt3sps#2lH2ign`i zvUI4Ds)THAaJs#NkNK;GQim+RMTd$srr8?0oq?@OMr%G9TS z`u8vbVC)vg8dhrI?&8A=6uE^#{-HHysjUwlJVwV96XFJ>=c}@3?Ngg z&L{@9MVc$JI+s!}AZ9>xlPgffh-+Tc{6jCfHk7X%64I!)d~}1J?h2jQRbm(#*#f{i zgakNAdqaQd4c}e`>+6d|N_D-#a((k#E6;k7gD5 z;L)u;QR(UlBeRM3S(^so6x@n$?qi66T)b*Zh=j-<%Rzd$WYtTRins0RwVx*|Ek_1G zw%G5fuMQO%;;{^V^>x!XeLGODc&NhTWXc0kW12b*EC0E3IhX4+q)<}_|x>Jaqz3}@1iIRzW&pIf>HUA=@#$~iT zwt8D{*J34|!e%^i*DXlv3X+_CW`IGEul41j<=|Ahj8si#7AknUwI441i(xHpj;CQaiw9GR*B78|GTK&>g?Fp9^B4X|Ph$&nyFWBZAG|t@~LAlG(J+ zoGt}0LBTY0^_8}E96IE&)3Q7sy^jzSF<@`Na}j0I(6Qmz6gE!DUN|ajCI#TrdPfjV zH8ruPjJa@d^#=XP=?y+IGMHoEnt#90aki}aT1#u?`-g}>Hs9xck|@?rt`*$#y}ozs zk8)MIpv%E?xUv6jr}ZSDb_aYa0e`=|)+zuWRZsfYK#EIQ6h1`7+jm30HtMvllY<|% zO_6dp1-;(uoEYnG`K8Q8&V;g!Gsu16^V<3&=cW7hU`x$+&1_cv@%$5LtWVj?yIs#% zvSqd84E_rc<$U^Uhhvx z%g&*s4=Njg!OjBA0jogc-G8h-e?69h$e3~c>i#aM&G`l*S*RhkaKkfYRW zunCnb5n$zGZ26x-7|cra0PUr~Yq=FS9e+EF@Y@$5P-)+F*`aZ5dT(q%2&bH?`|T=D zQ19{3{IFfr#&{5V>UV)&q%qI+^gG#^rZ2?f1@z^MFLnzO3-`$t8uaICWINnot|N|T zz<1^R`tG;2{R>)g8{10OtpcB(JY97&keh3d)cNd}GIFrx)u=YGblBTBZ@&w00Zrte z`AWmPG<>84jbI7CmJ;(5HJC#jYnLIA>xy|J3|RiIU-qg6$st z(dd68;b)C(y`0a@1}3B&tg17T9}`rWs-hlLZM8lxAEFd(DDt)(wifyT*D0whcRHjX z&1ZrpyekP%GyfNicfbz9)&rxKZbLtgJP%pA3t@A^6NJX+Gq>EoN3nc8EANvf+;wcy z1Y1c|j=(G?jp5pQkLt%fgW%`TgqPRna*I*hp-oW0_rAm4gwyT)BdF&i zZkbK6RD0vKa#QiCl0j zaKRUw)jk(3^NPR2E28g@Oj%lKK>IG-*Gg6P z<&?-3M7McrenOpVzvPLjNw|NBxmr+ZmZ!2BDtO76Z^tR40;c?GJfgL9ytEAUh~Mu5 z344^Xym#vQkA_!A9fAj(4c}n;dlU!>#xC{?7s=!=T%!30{0#WA!1~hSb(BsY?x)12 zUA%->oq4t5o_@llXm?QQX~d_hJQIj7b7@;O4{IYjsxif7+&nz|+w;!m6ehPN8}w;H zpGVuI+g$icLc{5XExm0Ql+R$cQn+xQew$*0$+oW1k2I+DxgK=a1<-&kmU^9yxxX58 zU7-(W_dvtE9P9|17pGa;m#m>l6svf=_c>grS4r_)gt8=? zTK=C&{5uKXBO~A(ZcV&J`uuWWRN4n=S|9TaJa>mWdtDNm1jKG986`*k^u)rJYS_*; zgA&TVTQ9y(_<}M&!=InoF6KglyTodk>P))`4_1Sik}6i=U;3q85%IQbOO#qh_GYQa zVchyZ5wOFgdtS-Mol;Y!gKD!D=%Dyy-8o;^>>fMOhNHkF3GZnADvyA*;mWYJ5oA2G zgi9=Q>C9Dz;z#gakNsik!LN^-huLObAKy$~cR22?Siw#1%kE62zU%E?o%nUpbF!bq zZ}QU*<%1ec7p9Pe@=wy0<$vK#9!Cj3S9XETa^Cfa)kJi8J_;JNqcHHdkZwbE+pT9zXZ%;^~CHutdH^_W{@gQ@3yCF)w?t z{6V7IR=lAD-!z8lqb9w!jDHqB(Qp0G<1^It>jQ{y{L^M|V&7k<&7G&6pnRCTh$SX0@ILwTs=b_dtwd}oYU5bFEL`Q)7_e!=)Hlf4N;*!(Sh z)~Y0Clx{0Ath-kH+FbHjii7PUT)kbqAc1K z>8NT*O=O~X9A#w*ZFi;h!jL-&@+kBJ{Kdzz87JGOw3J*ui^zGIuo-cXo!W5fYrOx1 z?X*J%sMG8#Cr~cG_9)ZTtWXnOZ*MT}lHsn5lDC(v;63qhT#nqSxo+(9wXui|^@e^7 zXbwexGGT5toGgga_u)iA%y)K*Bo}-k*|GPhq^)K6k{C{>^3WYA9d}Su$Mn9A8XaCT z+SktDesoCa-zBXpY`4`m;cYRnKEpHE+tgWCsZd$Uo-Ni5PVU@dJ21S-SPPYbS2y}y zt)*qY?#N8lhkLxh@}e3!7Hb^)gaV~|9(Ec++V8rz?-^Q!eYLy$h#rXP*5Q$ z2Z_AW=4CwlyPIw-Nku3Afa6u`Qc~$0$u-(aL$lj&~>o@AZ)IVgFA-BwD?K{m=R5 zFSA*X0Iq+BiYum#%k3)ZEi(DEXC%WF%A4W>Butm#uRcPAj6|v&*Q--_YwD-Yvqv`NPv=nIlEN)Y$MVSlvG>>COSl(N(kdiDxWFHCbX?4T-@#O%Jl>%#=omVo1)C$Tih|;fv9XjLQoi(d;uQrHiG%46R$d#hOb4M}qN@ z5in6P5zX(J@B(3Mm_Jx_Fl>$99Zt|Pk$pURhojn`5i1SNw*JUneKrtZ4~$Kj4lhgY z225|2KLC>oa-#~fC~}RgHuky2OdyG9A%nsbm#mGkA+`{SY}AhGQy(OGqhLtukT##t zd1s9oPsW9BNbRW4v%__)4r+1?#`A{AI;Kx|9Dd=TH?i!#!E62|^VcQrthPlm&*9vs z?U4gv83YC11{$S}>w3knN?f~HsE-=1ixKk7x@ut;51+TQCdR!Le5PA!l2vz#2%7jE zdAsF?`Q;}WeyQ;r;^w5t&fu9R<{w_0gb5bci1X9L&__zO$ZY++A+g)4!z>uUe@Nc#N3WcS2-W0Vix1bJ~AltlR+ z@;7w8YRUW@4mTezjhv`IQmBdI{Y}MmFL3F(6-!^^Y3~CZaQ2bqzkP8&L+x!x*BJPO z^1LFqnCA(>z2`LSYJ+18^L@_qbBdmT6Wp`661Ol+FhaitGeNqt8iXQCBi;YFbdLqs zgp&sJ(E6(NHvo?UX`y8NK4hqg)x3yi0*=*SgqS^k_p}Xu!xekkn?XFO==B>ikz)rS zExg4Ht(pXGZHFskSic;Q_`CaHH&PlW>ALVuCp#s~x_U>Rfjnfn((a*X#V{ekb;F`- zbqGh31pfA3^hp;p!+q=0!<$F~6@Aw*`Ou;C|@BYtJ+fN@b& zcfP1bw(fmrMC+CfJyMOyn=!$)Vda?ctNGk6wcw1~=0Okjtq1)qC3xjx#I>i>d&l9e zARu4!=UB!g6VTzT5B6)kW#y=*wfvIAMdA(e$|peh{KRNS4nJBY9&8U|I-CSaM?Bd1 z`twsfFJiNao(!A??2zZB{VCRMBaG;E^r^l72o?3CP54+jcEX4g~PMrer zrJnoiW6Q$YtgXngPe}&$c}necrX80Z5${h^&ks}SBdzR7^KRv&5aiAYayUz)l9=p>=4u{P^`B1gd-ZI8 zY232dnYg-cP$tBWWP;hM#3v^0Xh=t`_1m@*Z}EVA@cE~zH)OJAy~&y0rSWbuVL-d* zv0cywH(GDhWEBO0Ptm_^w_pnXi)4+vhTQ}qQO51I^FmwCZlXfvXAt`PtK?#BGAuA! zP2lAxpx=SgereL5{JJQ9cmELVDscfB>V~>J&({(rOk>pPe7^e&8}}1(&E$P!hRNpo%;vwId{C> zLfGfRkyvMH$WCNvP9P}~aZhB|?Pgs2I<0SzlZxVnOP0R!QXB{TImutwOXxvPomAvv z+7?Rn(x4k_ltjC5t=kL7yBn(XS3{aBONw}mL6E5pyLf=y1gRBd+B&kX;*Cba!~cR)6L^kY-xwQJib*8hf9Yvo{CmZtIq6 zQTuu+1-r^<3GZ)A?@#wk(i!)H*L~fnEH{=~x8uBjT_oiW+U^}n_5!q{E6Bj?Gu$kI zL%?Q9Q`aoiURW#xgKz!@3g3H~8ZvDPZ{@((3BcuNVU(rU%@!Cfbp*zCOVzXI+--8y zleA5aD;5dnr$VMZ=HOsb84-&L>A$`b5U(JI6DjfWTI#sAy9rc>r24Q>vjvXpB`5t4_lAI)QX~Y z(ppP%Qrz4Y@mWKZV_YN>0-y@ZtC3eQb5#43FzLkU7yz|HOYV5trd}oom^R+1{W^Qp zDmu0#YOtd zYfSnaIsN(+G=3n=)>axGN;C2K;~eqe(gAEfwf7-+YRK&V ztx&-Xo^M5ZJB^zhfh1hV4{cwbNAhoN*m`45L0txT?2q_iuvW(GwMgTyhLi4wY7jOx z!n4br$?)S|EOX9k$B*(V=lK42O)mC%E|kZd3U=fpu3)mE8{shL6V*^R}w2G8Gf&LB~hKhMXn8hB#BuoW}RZ593NBS3sr ztM=ml`GxX~pDQAI@9{Zk^DnavNx}@i-b>O`_F#S2lesG%s+V}J*+h*)0)WZ;WXLrvGo!wEWb;Cx8rljP5lglRa zeS>c1mTjFzZ*cVBa5Bd2$BfP2Z04FLmyheH&3YX)<&Mi-`O5AUauho&vsBrBO_jFw zNjhs(?!05(^|$RQJDu3IJEI3FYcn#wk$3(qzpc7`zo}AUa$~V4As(uOMY)c9p_uC% zk2DmbKu_9^UPbCsH$TCCIK1iwKe}h?Kizn-67;>#Z04>@s8oHAkH_~n9~}rwI7-_{ z$Vw_o(+WAp8+oevkxlholo)e!E3-Z=N%u?fU$d=0MeIL3({qm)O?B{PGCim@?kL%B zr}>|U2SH;n`zEA{z^Dmg{R4179c)`B2qR>nuKcrop(ZLwniOwp>8~i_gDCm z?NAlN$T4Q>EN+yGZU+HQS?5F{KugUl^NVV`It3l>%(7TKWMTt=i9wI_{O#GIhBJ2w zU^Lhn1tfIz?|2Y*S@iYLh*c+rPg?2H5qLNH&l}_<6Z-gBC~X$PA$tg@8uNV?r359% zfNANCiWoQ`gTx9|AKrmy7k`uC#JKjSt(!>cUr1U}ih==flm7V4zJjBf{qSiedm|F( zc!H*|ew&X;t_JVg*nHLKV9`4pA16d%q2Z6FfHCkcS$8dd4Y$CacMAuHZ&0O7CHCd) zB0pO>p1YSyw=9L^Ks$b0^GsunFN)Mockbw`Dh+<)fnpma&_ntRC~``$x>Syko!uQ+xl1a&@}m z)fyixYtr7?wf%L<;@T?&CS7ObWR;ik`oTazDF@ttnqPztyo!|M71A8lTO~}oany4T z?Oo~-PBvK|j4}WJU1a1RRu&!EeK%vI)RQ_@5^9I`Id#wx)`nkSeeyqe4Y9lVTucI_ zK{#2&B)ifo25DAy%8UIaIAvX~SQxj^e3PU4&(Awd48Cd?Si_Pl#cN0|+q<2g?yEub z)CvUu6a_$hDxeWyZ(VB$P2(zmoX%XgNS%<71JOqSSfbN`2O)Ker9Y`YGjEoDP&3lb z;=Mv1BD|9!<5kLP)UFZz%b+RZ-aLAzmpP~Rk0j4`Tbs|SWFk-Maeh^{a$xJHDui~m zc3XolJ?IL|dN}L=V5Iu5?#8{;K_oZRxHBNP9WKR)ddyI2U>U5V!-kLte$=vdbn?U` zyU+)vBu_y&PIv4I;`u)}f)0?Iab%5P`)8aTMe23(cz}s+z4`KfWMqmH_Kiu6a_i9o z%p;MDMCZ7+?-*BSJNpw`q$45={5fB*TpRx?BU>}(S6HF7%3)X?!zxY&~}$7wI) zEv4RQ`W04VH=xmXNR<|T({Dmgx!`~9g3`8ELpubq{%V$eD~P*yU&WH55s%uZ72P{x z->{Lw+oyk){{}9-B61%gowdXtP5bn~Bd6DJ;tY+qC zC4T#=5KDb1Vs*Z>x9RLy-GSMrpD(n`AuNfG3pINM8wuI_^41j)RnE1aP*>pV>G~#L z!7RfqqZtC>04|HQy3;+ZI9ES(<|&VN-FDmGL3chkYGx0cF2NAxEkoC_-rb#%ux5Hq z^{v_1XRl76@by^Q#(b|p8HLT8955noLeoL-t5f?$si;gbQSan)#%M`lpC`@{2TV~E zVX8`q@t-Qtk}#4U8Dib}%@0n1OBwUa?2=ozPK{jG3Xyj&t=YY@8jW55=C;HfE@!~b z%TZtzelIu+NZT_8EEKVjH%*?xf5VtM&sH?O1-3#(X3&dOj26~hdUXr4ixiE7^-jn| z=fd@9P@=^@jgf$;0P8V)5TjBXMy4UD&kV&v6Zq0< z5;&n<*J+P~g2YxV7AL*Y+u6TGl;F7fA57M^vF6IPiOe=NBR%_+#=`6Mm_3`FY%Zas z7UQWm?3$CLxH|n#D)mUn*%$c2P}|c&Qn==7)!J8azT^3949;;5d2_DK?UnnTIQZ7d zg3eT;2p#TExpLVB$s(pQWir-54(f|!598(Q7<_}|*FS3OYO6m+2)%WrawfJ!H^9V` zdLxO(V5K!I-rbg``!o!parSXcK7LpI^*qJjfd7JV|2_2dpOJaY20e6Kv_d*82#oxI z>xkB`BZOwD1Ez0_H*A>V(PL@a7Kitqk=Zy2w1Y_l#@UD37~9ArWpR4>>DU5Y61Q(j z3AECuFa{6*Z{otVc4qInC2eJI_v zMA%#``#z9CTIZ=TM7AqzhJJITLV#BV7bJIL+#9|9bXy_Edc8S{x1^W;dj|&5QoqRpv(AE;M0H z1!rm~8zP|g4-34xVlAN5pWqEvR1XR-H-8YO^_2s-n@yz59$AvwKhu2fLU zL*0N2I%e`>(}%r%S)qLA$2UwLDt}?2Y#eGMZ@)8h*9|{DltCs1~xge3LumJhv zIEq(0_lnWSF_rUobWfP`vPRSDnJUrN_wr*Y4dX?^!!CZMmpe7fo}xOtE4o?2KGTMU zPFtl9D{NZYREu$y0gmt?ej(sGY(_*hB9$?Cj#}okri7{8oIM!p^AOnioZQ)uPG}3k zu&1)}#}{FN*v}G-Pb_7f0Qw{`=A!$=*K#A2J`k+1jdOFNiX0ITZY*d)Z)J?Q(OM&P z=0@xDvNZv?38NZmYLZlWna|?aL=)y}Y0VWl(xoP$LsoRSXzN8tdJzNUt5xN5$;!kqy*=_&7A?4ev6W#gF27u8 z-`;bz@)H2xEIbO)6&xMwD}e zcAupv_&wrE=Gk}m zBMV8eKo!QI_8uqth3 zBiSGc!Ik!|@Y zwK7RYkRAo-B8+e%1n8lVRmDty3mmyH=ho=^6t}u&s~IN2Qe!`B*iT^+-TLGh51rke zSd~4CWSR&QMaP+Tlk*qY)VOVZ&8nAk&4kh_9|=$IkE35bEGC4OiMk4R{nucdMjY)6 zr9Pp4-K)K@=#pj0=`cAl5ey!w1Oy#xi({sf{ zdJCNdAZF*=a!QtH7#6Rz!^diAJQro_vvy9K;2ht{Oje49 zP3E^KqA{S9EUv@VzR<5_O3aC;6fRY^;2;?(KLJD)4|cWk9y|1!0=j9<0Vy&J=Oz(lI@^{w z-|t;>xly$hri=_{jEs}+>#9tf9bXAyd_%|19 z6zuMY8u<=MON39YYkn~P-<2_AsY|DW^&9(}rk9j3(U0BajSE>e=cncR+il=qP0VnX zw7z8|(h-)1^7UL0G~yq#Ar5O*$3w+l-=uaybfze6S`foF$rZZ6t`DgJoO2b9c_9F2 zjFp2;n^uU~&%L&5RDpY4&aDFJ2c{@?Y(w9nPvJ0&v6ET|Teme^WL@ke= zx9hmm{@ItvlYO@;{fg1^wzHq#n~UU}mf2l5&)#XC0N#b_J_&oj9!VmdFMwaYHU0ax zAaG#)v# z*q$gz(pR4HP%|O`SNlvDbgvYRmAdkM83+d9;KIAfATB?I#Xx;h)fxPtP7-YYX_WyU zdtGC8wv|z@p9TI|M|#T@nS4E`mI5|qC|ps((x`WQXtn%y%X_U8N0ah0)1XtWDjGG1 z2s-sW4xGC4*lB%UM$0UPGF*&0<4u=kk+=I3V`~#~fp_ z>zwh68jts0*1fPf8OXP-ZD;jHn7z{HygLw;bGfPYS_^X=ps676an*e5xyVexTe;BHvT)Wthw#vmOP( z*9u)7i;dJc4w%nwQy*BqliXp)U$nFPK`JjuJ!ffK-3^xa%JHu*$Q-aRxv>jzl7LwK zVs?^LDa$JOQ4N#f_N*A4SJ3I}+Z=8${2?UlC6CAevDJNfQ`7rMiv|MYnUbdkDGxlI#r74>diHFu$pI>^nFd(?VWE!#;~EZ0ccx)t%&Y>`s6c ztB#z$E9}m${u30LHeu+Hz=68bdOi(vB%ACNWBW>XeNH63Sj%u8b(ui?CCk<>p z_5O!vpW20nLYOkgVZ>?0CDBVdYBIln{9l?H15KCqu5bAhMxv?7oQe0m3NXeP;Xx1R zY$KTye|GHeDt&5L$VW-04N~IneLlv4)H^9*fd{=inF4fMr+mDhax;Erb`cjVg230Z zud8%D-hktHfl7{Klf$xjY&L3$9tr;ih>z$zT}Jzpxg$Uz&)-!~j3t9`_eTB0bInqZ z46$$JKVmDu0@o1Y{AHfEVG*I$!bR=v4a(z#p*gecTpAA$4WrR#%-1_`rQ*9hrg14? zt*EZIGser&?+9XxMbX*%^jH@Uz|z@&DKzFI_RT4Q90UOQRz@9&24 zHk=-qdogckH$9Ela7itIetUx%rM$7Xt8j%fAYhtWb_MGW*K=?eQX*dSEFDxln)fz( zeIewQ)iuT`r(;i>?EQ{H8u)g8CC$lCvz()i5oVckUZ!16ri}`z_wwGzdZt3THKdtD z`c;KZf0}df%vmM?0;ziO3)t!Fx3+$6^s0ZtfDnD}^PgG4V$^Gw^o&(7Jxw{4aW$3L z%`};wrNb<4;?Vn0BuHinc=|T0GAvYS-uUIYZB3T_e%PjN>D5#%+fzT z{41VB-*o5=M@rA81i0)V^4j5%L*JSly}V8;QhD`*T}1^rN#YLa4mzy7n%3A+SBu(H-BvSQXSb!Kh!)i4qr#jWn*yUfKu zC_cH4M}aX7bRZfF)wwhVQ169PiEAJ^fe?1PHt^}&oG`M?Fp9=P`t4w10*Q2pG{R&1 zxuC|}%e4Fr`U z-W?DC8v*P%D>Nu&)n6bWQd{qDy7z}6M~VVy46p~>B%$c)ZokYHxpXGS5GNvOUMh648tmN)N^FWm)KZ*Ae`W6lW zXMSpQjF;%Nu5OIl(SwCF7{TlGfpWzmS93}haEp7I`9rVC&cO6l7sTfI*jnHBgj_$u zUO$34Wz1M?=Ou!dkH1S6%}!3#1@v%7@sf<7G-f9*UcQ=HSY%N1VC&`y(I@dyH5p8% zIG~Rre#$^#f!P3CM2({dsUUQ}1M9wg7AY`PeBp+WN)LH~Xtz$;2KRtldhMaT`&cvS z9lL*sHb=4c1&%{vl7d*14f$fE7CF zznadojHQQe z_$hBj=_%HN4-kkT^f01kVxKhNsNQ|s*9ZvN*V`tKZ)h5U-7`3#r!Up| zs>^ysybZMp94oPIAGFZAQLjvsVI%xH+G;?owJFa^vygR<%2I(0!K-wRb8T4j+AB|O zjn^b;E)*t-zc2@IlkS}b6rtBq?qr5q?BmdUR&GMEOkx1Mu#$Mb8Hs(wj4CDB34c|;Ay-c7Q#DwkRw>>>B^_mYKC1w*L5!sci< z6svY-$6ku8WpN$t3ZFv#k^cDVzC*ROJ06RkEKQYRM;m~liRZMa>GDi#{N%KDv0lQC z*>Zm)Zu-Tz1gmik~BqdxTs2St|9=b*lU1u8dv!qcau=Ro4o`>iqqs7)KIv-cfmTC9KkI-L=7u`-~EmPP$ z!b#sh$`Q9by!VLtuhMiV?SJ=bkUr)qMt!SjHyuiuM-tFoLcSlV~_p*A<6>#w~8Jk#p|%!(pZDIEBqY z4e6fK-D2+^gIBB!ilPyZ`HHJSl5w}2VSa$}CZ#Cq-rS#8BVU*A?tRI{*Hqn%40vie z&UFCn5^B1+k)r6G*G-BmoawopcVbQ_U5c$WlI(Y;k-W(gx&F2?=4jJzSBH2eOr`CqzB^{(`7?O=dkR;(QO; zpuQgP)vx~7Vkr=DR@Z@5J6h4mmYGC}peLM*ZclzhoKKyf8g+^&r7PPF9U{GtCH8@| zT(|RK2cfno#w={*95N=gRmuqzf&N68VX*Pr^EZMd@iz~^faEH7&`7PSH;^n=<+&~! z_>HhwH8jozgH9~W=gR3wXqOpbqjWWMwFWEbbxk#)-*`B-&KcFt}EbR|DG_yZ8<&2A>Wb~ z$>LUk1-MCMXKoI913kqmz^yiPY$?_*8Hka(eHa^C;u3=U5i`zfhjx!KJ z`s;ZWvi}nQ8oVJZiXS5q)2K9UOS-i)P)yV!#H(<0Gd_SjZGZ~2r-M6vo1;2JnA_$Q z9D*q&Q#z;?mZZ|z>A=^JwHug&;?G33z7>1$(3ey?x=*m~$^9e)yZnXZD%MQ;*uYLc+HJKT*fQ+g{%KWq?t>kRJH01nz%v{aF?K;7^X0>Nm!8>=N3wVFB%?KkB&8G701{reoYxOXo@9 zV}CcQx*sLX#RZ-{eEjb$fDop%qNS$`sUb=W`|#+Uq}0$%XY7)OY*_)z;9nFRa49&| zkWsno;~{I+Kd36K?s^@k>`wwz3=tQ8rzRa^IiO(<4bdZSwA~7`liLZiXE?gPycF@+ zYKoiXoG?GAH4*!>IJ*^A_bgx?IqLBR0}EPL__kvTgflMxRX{ z)WFdntVu{C*nkn|b`?L6RnEN3n~;Ly^ZieT08OL2z|_#Y2mJ&B(h}S*;fM0|XUQj8 zrWi3{B4L>BgOR_IzlSUKh_A!x&;Rv0i19p#Vrl?3daoV39IuMPuSN)Z!1<_YZ|CeXE8thwzPie@*b$*RR#JUNa1_1LSXXzm!_;LXV$ zul#`g+N|A#eY=#k3~LbQ+?T$>Sb$;}G{MLN=GgHVx?dpCYYqo`-!Ig>hY%lUTAz^H zu$cg+ug5oxWqcer*3^_};Gbfbq(U-@ z9;}o&e~>ydXkFg(sK%2fhsAZ`{uZC8{UCsc5@ao5Sbm<_V06mbF^6$`pVyvPo#~kF zEUC4)QH1gs{g2CZx@%HpFT=s0e2Kv5k{f;?ZL=XTd|JJdo%EG+BdQdDJK0~F-0AdI z>hpy?*%qSD7n5u?^89D>Hj~9usL+HJy&#MPj zDUOsr5wCm8lTu#nzA=68fILxlQowT2^@3}X@Gnb;7k)CAh{mb)YE4c#u zjG4;$cmr*jsFR@$uBNfbk^T?Y3H4`I%X?TZ!4tPZ_&gm|$Utsk- zxy7QPzysELar_IEpt}U7!`k8f`hA&wFdRZ0-wBrtCf~DljUnz4@H##ExEgc|HaC2d zmzAEZf>xj2~r;=X3ckbOWo7tE6VKG3LtmrW;OnbwiCl&CmD-TmCty#&3 ztVwST2|v&*enwXG$kLoE53S0fNe2DtQ??QO;Lvr~G_k*V{u z{YfZ%W5HBS^$Y9Ov3CL^sPhPET!&$1+L85hvZAxkuksjG=dZw|Mr^MxC}d5w?tCdm zV%@<6k9+QIMuq`VSDs%3s_mHz4?O};%{6L4qds;q9_p;tmObhacHbsvqnvZL9c8`! z75PfYX&2hOAOopypE)sd7LP|fw3p_+9vZO!$zvW@yEOCL!2SI6Zq0Rxn`^3=f1B}r;HJ+zgm9`p5{&gDj zxC^6ndbFpp#z$KDQwicZe|A|Bm&^;B}Ml|wJ zfWbiiy#9<7AyPQ`wCF$h{Pb?*wo3Qgd^HkyviW)EANt95&&^lvi+w)r*^(_wqSIY zwbP^?3jry~A!ZFFi4YbxhYk+oS25<+_QH)%5%O6bl1ZqSFVo``?5z6?O}p`F%tegg z>q(Jl=XGlcN+Zh&s7-z2-MYc**02ylm5&Y@wK1B%_3Wo(GyAI;66(dnSM9&yp1OSl zdXW1dm$f@CJQa?U$|>wqOxGOik|Om2Xl-2Zc|NUgJ7#$4VTyicD<_@(yM8gEJ&2L9 zTrG_!GNh0tk*llv1RpaA@Yxyw;S`Qez9f+~K9Ssyaje%WJxheR4O;KlC5Xvsi2h0c z0E8XzwEdFt=BfNvn>hbJcVBK+4jXxwNViblnvJ4;NqtE;H z#AA7xi>iaHZ{x2%&g~avo7ubJ>o-TIfJ4yp{DrUsfk{M_WXq6YKVK|ml+vaElFA5a@SMk7Y1K0r zPwpS~pI%rO6!wj}uEk$5kGwy|RMt&Uu#`!Kjfm)Ji@LNcV;8H(Fs#P?C$5|Br!g4` zY<L4;Q(Z)!@hbId{D}-g!aF@l&en%Tbrv9#zc!E~EiV!tIGzHx-sEWY z=Qn%(%jy!(9}0=d9&}jO9P*{IMJLCX!cf97yd*|5DSifCyM?#va*GCy63g$Mvz`p6$}LgSI~TLPNUwqv)M3?7?> z(6`U=NAO}%&`SSj9e;1gQ!*;*mU6s88Ax;wV1Z^Q;w(U3inh9K>}$N?nn2Ft9%lW8 z^c0{Xt8zkqF#zv^Nw_^wL2t(%g&CbR5tpBZhl@ua0fCAEr!OC8l#MD`y=f=ie$v!Q z8&N~AzLU9CQspqBMnJJgTDISDeDU@Y4a^R3Cf&)_Z&{E!rpa41-?l4n_^v&MWkeku z(TR5pPDXm4%OW4vp0qfu$$XuT&bxy$1vL^d^Uq}}6-ZFfN7Z7pp{^fpXHfak*vS83abk@m8y3Q*FRTn)Sh_H0b7L zXJ%J?0u>I$`KREYx_({ZJ-=(N%_$x>!D}|dR`Y`(;L#7TVQfmtefz%`_P8;ZXaT`E z))>vhGC!AIo!0KXy9zvHC|&k*u#|U8G>UI5(s!hEb)filp^32P)EBkxr;Kq=PXO-U z{mNH1v;*8b;qFpq6ZSAD`1N(sta9#EHX|^_2VRfYG6dO%G4p-nsdA2hs^P+_YqCYz za(F;wm!fxV0x{~*RzBO8;UK@>;PTJPki53f20+ZbE@3sbH%0V3}>F3 zbsZBd9C!;bD2A_zQN@3@VaHmQ!7UHt0R?8@tvS_HqNN?YsFcOXM7YQ}YvZ z0o`LcuWoHYGd((8L316~$U-d5r-u8rb*TwZ32qo__?L@eJC6l!7h@TGHi1f+ReXCi z#<0`@np?J4W2=@9%mJQDh>)xGl1_!QA4)#SPP>DtQoyF|oWUDWd69JG6q&ayhI=*0 zD&XDI>X^6@%sfXL$p0|?Sw%X>vxJ0&_bwKbX%oDILsy(?2EMwyjQ^ z=wPbPc-9R`?J|j-Z{^-0nHczPkb$qe3zfIXTlmnS9aN8MP@fR>SRgi`Cq3`uR1x&3 zy`C&;Auvv$n3$4qIX2H>)HtzmT?{MDIXCv|Rnp-`H9?Nb z=TK_&bT)5jAB@TgqV8uG*8y<;?P~?+64LlnM-h*@z_^dt(-<8BMwM}2x}%egsMXnx z0x!|!mr#viFRS~X$-R7;K<#oWc3yNRs8mdFAlG(%q&D)5CFA<2h_QGukszB%yK7G0 z%w@H$3xZ5D${|k9#B>FAnQwSs<=$i80nxtv##e0x@n$)oLQ0^!UuGrVp30)VgniE(P+0yI1#GF1Kquwq!<2 zKtmUYDtFAq5A?~^RbZ;A;@h{Q8w@gmjW4RB)>f}{&t!{(?1!OdUO}K`b{*5Bdxh&f z+peQpIZ;oRA3D^kuT}*K|8m%kijuu4NOU{8z zlHV>@Z{=wJ?&|iFK)n!u+d^bS0%*@(N5cMIi}2jV+^~^tLeYtV#TA8fIsQK~;@5pB zPd@S}?{L}DYD3No%EQa7zCUz8;XCNt-|O)xzfo8CsCf8yB_)XK<-Xo)*wynnq0}ZZP3|B>pRV4S+osEocO6^6%PE3PaV`*=f5Srw!eljmkMDE%lr4&q}m!ocDv<+D`%4K+Dvh}J+p~^fbTWjHV6er_+ z#|Lv1V~g?R6E`+r`Ycc{N5YrJ0g96}KMO2ns0=#;WLMdEEMYeYvV$=ZH(L$JSO+67pHp`u4b6N4_Y|GZ#&*H9KF5C zXXb0Z{Qh}0GKP#p|sMgpVE(S<9A|W_)U+r-_v5T%UsHT?J#>+ zzR}d?oKt*BI}Pq>?8f|1B=3Yrbx(v(^eVK;Rz*FKg^zdp{UPa<<}p@|yw1?~f~tCu+x#(Q%0 z@nGD(KTeM?j&WFP7Te8xel4@;+VJYA-+iz#0QyU6EIMKf>jF6!LgUAt3y~`*q`Isk zEDhgST4H%L50?i24#+eyBs#^L9c$_y4pA2@H5z!!{|08$4)fFDDQ_>iWAFWqhoiqe zRy*UHX=dptG}B6NC#5j{^s=#tntxt{=BMa|JQD_`AFFD;3`K9JuXkVVA(rt!$t_AF z>}<=~`F!&=c5gH}s5zVRLlpw}#UcRrRo&iWrnSZGOGxgdRIJ00ms!;xyVU=kKzf6q zp1PAjb%UCh`2$O{p>Q{qY^n5cRnV5u?howf?QK`r?Mkd|%e33QACriLmQsZqTuS0W zAyN6~ABD%THpt=!o3%e)=GQJ?uOCpc)Dh+Dav`<65sc4O6=V~iZ>UGPjOt}E{=~ny z9kEpPP4lBw)~3-Y>u$^nJzp0`Bh{$6k9B-2D)wr})d4q4=T;wgLYmTvuWWhEKTU%{ zf%>FBhQ!=P(O|%7P=h3c>&-F319Q2OY3=qxdxE)KHiA{Ynsk9+pr-s}3-2Th9r*{? z)#lGDOe`B%RX$!=#>|jP2d85fCunzR9@D~8#U)Gskvio)_1U2Ahz@xuO&jwJ^+{)_ zb@)_i8|@#tUbUOF&9d4dyd&7ITcf)Del_9FKJ2Y>>L1kQ-~*>OZziC1^_s|t37!Kv zitT%ED{oTIR6U!Y)mw<(t-vDmR-EK~-hXF34^+LpwTHd$Bawk5fmk9Nh~y2Np22hj zW(TbTHe#;2P!KH3lHh=Y$6x7ru1LdAnR+_H{y3ufrD-u== z(vTB^fth<=;r>gyV{4#ABxL@qR>MxtlRP5N{3pd?GzVJ~n<9A$>ePPXyH{RaVDeX(aHj)V2^;8Od?b4yB|SKxa0T^Cg!2U0q8z-d!3kd(GN+~# znP!pxk*qc#5-nF-`$;QdptJ;x?a0S?Px|5{>5;9e;K3ps1@$XGOHIg0#|b#t3YW&w zb0D3K-^gFU$mg6g*?QPe@0Z3v)bm213UYElvHs-CyLfvKQIiu>&zFzduRr1j_WL`?XSoIL~htVdMfEKIRIr z?DPfm!1SGB(1ynLC_*9ct`+T0(@8o<3|~D}u-t}MLr~fmVHXM*%Wfg>`yMpKI>+?# z!hlu}i+O^6gHjQI5OfiDtje1LAiHx9=cxUB=Kw;r*U>wI;6y&C6t>o3t$|&X#I@@9 z+b-l?2eq@B`4(}qFk}+5mEBjZhpHNj1RW2&nAY-=)I|cC-hN>yxI+&G6QQUSHK3Jr zP>XzERU>kxbikCN8Q^L(mTrM*-F*OG9R((j#=8h*Ufw~0olrB@KLk6`rQcJiA9|t6 z#NO4Q0dTl>aGvq z5Thi)=~=ej)*KkxlTcusmqA@}puE_2;yyh_XT$rs>B%&wb$S$GgY z)He<#-1i)Y1aOiahTq8cm>T+-m?`PnUOfh=EPSD{0>MX=0}Ckj3sOpAYm-7^j(%w} zpB>1x($#=-pfM)_>>$5+vpyB9MSxz1W12@#mL+3Jwa_Gdm8h(>PI8brjq(BG&yyVR z*-7lPn#Hz{;sor>gsz`B!ZeS?uIaSDZ?{-~MmA8UG8Ex-%&}lR$BU0S2kTB}Mx5rD zTr~_?bZd)*rHC8M=@*> z(8gTpW8P_jZ)$4qbh1$oHzXbnJ}ojM55~t)Ft*FLr{Od)F9oxx>K%9TbUOm$be(?o5Vvff^n#j; z9j`y-K1v=(EiiDuMi9m2qc$aHArd|XE^O8=^bJ|fL_yy z@CSqM(dz$P%qFy$ZI^3=&|6M1>2S>3y?RrnGI2FQ*&#PGzux79_QgTv@%JsyL6IY}OwIl@X(4Z29hA=+-GKSo9 z)sbbJo0V(4%qHI_0DBnI1nscIZU$Q!@q2CPe}wk;K$+4$CMr1%aGbyvD+DAev}V*D zs!HClDaBbE3yN|OG}j5rQ5?C>7GB)@EZ+#GM&2yen|&F&!opx7jG0wx015j6=T)ym zvG_mk{kPXcW?vWpnZAO}yBd~#R z=kKvLmvU>edPtv|^ELQhk(leTNkNlOe|k(G--7Ut_B}mg=#gTA3_715+w-#eK(9R9 z@~QKWk;R~q;J$F)6fl(FmAx(a$t?eau8#bTSRyXaLopa%9-u!7?V&k1m_Y4isfYvs zkBkZwwjI9jPd>J+fP9VR=IT@Gzc{tTYS^tgb9~i*9(RN`AXfP2?9d77|6M|fo7DZK zyiL*OjV7zbHkn(JBz;#zYk96pOb775Bj2KF zn0xYO(xUJRp6OWllbRdoLNJjUK8(w?P(O)~`hI1!`;?QuRPP%RbYTyi1t(r9t?nX` zZ=)+TbdEN4j}DFZqq@C=4m0a<#h%_NqBLo|?=q1+VzItJR%bZ!UU4R@J>h;QJom`@ z0^zpfU6pn-ZFK-ejd9&}wu6U?3iJ6XscM+Lw3N^}wBtyX1p968xAjK0KYDc@dbmN)^M(g5)@dhrguU`&^x&hxv|mcfAKkLLR{m`7bdL_xeYG_u$Z(;?V5v=~u~? zq01eWq-zK!#-b}sF)8Sn9YH^lP132uKR+T6QO_RZo-|X~)A7m@wp< zrx%Vq3-=f<&&KN1uSUtMz!{^T4*0_%+uuQ|4~D&T1j&J;w*PaqV>XI}-O|zV+S62g z0{D+=r5W+a*&gAQGXINvpb#KtIZFYYIO@6^Ovm#JRAi^z&8GwGcBwfr4+T0RPaH!q zc}N5ZaLQ(eQnVMLf2=smsPho3_~~yHn@8-!uUeh(P8TxzNAfDsVjs~hr|4Uf4IMf& zxP=v_W zD)|R3jE@(ugnUkFfYPxrktW*3c4`#rClRolRLM9yg0At5@?ik}9R*yXi3nD8rANo~ zIcCX9f>sGKXo($#PgG`c^`JT*BMy8wV7I*l2X7}v?5SMiw^zCNCnv9xg8QsXc(d-!}EuVFUzN4d6TXQ@m6eV7)< zL#yck=Us!#?uWZZL=aVcyC6?U=jgEr(3eT)IH`ZicWh10#6{@ivsL#F*O40{o+M$ zlS3Oz zGpc;hZl49=;-CvCg{R8NTC3~8qsco_)$%|4bwR{R>7mLZgz;-Q!0aWe&^1xMlizr{fCD11X%6=XmkdbEvn1kC7T4prv$_k<& zF$PdX&t%BEoXqB!_KN#XJ$qUAFmOYpLJ6Df{|1j@fDsx8{}EC^TjJhFF+oNhPfN=9 zy$eqk(;e8VwwP%B&?!-Z==~vy?sgy^!WK%F!65kl<*D9J8Q#b)it2dEBkhcdBnam` zE(vycrR3SYF8zOf8`w*W9PUeGuaDvSzI%%@wr`q1>s$7E*VGkZjlJB_Gj(FhF z5zyb-EV<~_JPlY+( z{M(1be`p09$uWwN_h_epih1lu`uPq$%pnjvWr$S#$Prb;+mhpmNjZ;$&=cog=;_Zc zKM)RGz8>Vk1rqhe6CUBPwrJ@2_QalmU=whTZ0*9$L~2yP%aAK#aOyA!GU-nqj)Abh zRskF2J-;W)y^$-f`!T3J8I;%oyA?~6=TCHw&T&oX2tSD3!CW>n{1OmwOk(ckNr0P$ zc3p=ba~L3Zg>KKk@c0=}*{EuRuXy&8zRNRD*rCn=6{w__A4xdA@^ zbe~X8F)9SvOE=b@p@jw2K}ibqzuRXM-Hin z_!y~cZ}6UMBKGWE+^dM%Cqodq0o2vSE&=#+Xi@A43rgWaEXei$2n#a6^2fZ6D9BtU zNJMISBjJ@*V~8Vh%4Z}HqZ#P^O-2g)`%P= zK=}Hk!3%EhrL+G3slyLk5_66ie`VHTAl^boy$Qf6fsD7ml*{JI@Ewo)eBk!IbTjc%QrexU+Zlz&!JPM5A5iQu=YTmV_aa+1$?ZQzw zOyU6W*n85M(A^)g7+OeCaDgax2GN$R_D1ODkA$651MCi9o=`F9t!2BH$IA(ttV&#d zJ?9IR1>N>vLGxU`Hf29D8cm>^Ki(7j_J3dOPx%nJLJ7jM%^4fefErm0s!&i7Zvn#i zmM%0vB*lpY?fFDTc#z4uf(>#*r#v~-^BVCfIKMO{h>DhI&VUK6_W6C*vTx1I^~&a+ za1=mYzlBR+rpV9^z#+(dZeex%(2zy&ia`eSnc}}~K+oAp!bMElF06uHot|`7KY)`+ z3*jX0J4K23M35mu22v&W;O+n^wn&nI|9pv}vGZtSltoLTxUswW`9Od^!{@XxJlqN@ z0lre9on9h}2-3)J|E}r#Z#A-I3}}g#7X{e--=*r(L&>dhtJrEu^8+)RE`xsMRWL%C zSyK#N#ybn^Jfs8DguUCd`(L^}jnDptHe)^MwxF8J215D>ADxFEjZL)pz=82P4Z>@A zpin+r&u^;xlc94AG-0Snba%ZOx_sGs0--rE2T&dMrlwQ+;dEEl3{-H&{+!6wyzf{B z?jHuPm^!`H6xC{>x@9rPL~A7uYJ@;UCo|gK@^=1U>$eWb*yB3NBx}3V{W!6wmbZ83 zo0r{jWJrmRJ=d%gJ)4}BbVt{(RD#={G~9?)<@<#!zNcTch;%=kagj<+6=hW+aUsV$ zL2soo>##fP5Y*h~V7S>&ou9uME%a^HwVZ1)E^4eXG^Tgeo%|S#NDO2Dzv~Rg@Li`a z8uzo|dJ+cE7HCPbjWs85KnZW9C$t4Jl_xRbw2v{M|HMw<8eOdvKJSTqN4_^~Lk|?a zF0udk`~+_VfY*mZ7ig+p;+Pe1=E~=^%*m){&rZC1_YQYDFGYN=s7L@BlspWE$c98j zoTd$Th{)6uR|UmFI#?*GZO);#Pyuo7Wdo#4On_MHg_y?MV3ft*%Jw%uU>ZD zsX%eNZ#g;v;Diy-zDw9Rm0LU#p*{?{c-u|77M?N$1_Pvz9(6F4+A}faYbiF1VMf`8 zN#W0DESR~it#_T)M*0Oe7z zT;mY{^bX5s1uvVeZLa+a zA~YuKv;rvfIGz)J(|j7DIds2i2LscMjp*XX{+P>-+DqNa1ber6|G&P?Tu;zHw)Jru zpvDX^MvHosnU$@NA~xQo#9!hvXq=011GuKy8vfc15ls!05O2RyStmN!RX*<@7|z|szm9n93CuP&VKAYtnk7wzga?xgV(~fI3&ut-@~K5tG+% zuO21hR9^|G}DPRtpKy7B~gRwhW zKkmp-F@ozWR$gt+$;qh&1Ge?S)Ruu)pq#%Rw6W4XdGbUn=9>mk4Ax*$pD~XDyUItg zuYpvQZPM8Lr+i%sF#s!c`DE^)ruWmvXHXpO!%xA|m}l2I?zR30e!=x@ zju%!Z5E!SOU4W3YvVm3MWG83?fbQx)sD3KR13>G%MA<86W25fldP2Rt*#IHTNk&Oa zi{1YId}t_1W|Qcre*4}!fGhTsr`Kw&?cKdn0j~XH6}yDDbiSoO%=~UOEolPR){2Q3 zQ6GmS)&7>y$qd0w(o&HhAKK9$N_$n@s#Np>8Wmp8d+bBn=Z@={B1g0Z#LWvF8rPO$ zq1(4_-&IgRwxb%sd{kCo`y_NiGaT!{aJ|wZ!LUo>R=ZA`?&~AgUnvX=e9v-<8h$(1 z4PeMhR)AEN1(;&8ncogXfAA!xAFP;Em5;1h5L#@f_-Rw(C+-PaKTh0XAU+ zz%@m++r7=pQ<{F-BVZ0@Ff9OBLSvrPTj8{85LFC)eln27rd6UM9dKr-y{f5eY`9%X z{dNSa>Q6SUJhQ0#N=nXl2K!uzxKI9+8A;VF@XkFR5;o;b|NQ|=6Kw%KM@G5RF#+Hl z2;Q3(vq20E;4h0hb95>(DFA4ja%a?ixMzNTzO02s2KC*-El&z80?dqtf=9n{ zF3C{Yz32lYt+e_@byo{P3uUHL^)s@&ckT?eGc*NI3#)y2B9|8grVbm2A85TZ!I(pz zky!iy1ycf2FP0)awD6h8q`*Uq?C_J=SzKVi$lsW0+Jv=gJzJYpwTA~trnVCRt`U26Co#Mf5xp>E) zxcSA2>!2@L9)Nic=cZ?UJ-YxD?oD_1vd+=;*pQ)MqWeJFEEvkQ z&~{JW$u2I7ZVt>Rz5z}0Hb3zq#1mXhmA(mZ0sYTRTNoJyhcL+8ynXw5=PtlE zWd$u&;lsnjj^+Zt2y1#*u4&}Z+}!6Z420tEEvf#^EeYj%AyFvQ0^pBH#-)0wcgx7T z7=mN6?oc&!YCtubq@^>3hlh)sqX}3dm2k|na#pK1h)Q0u^2nlAcKT3i4FT9TATK)YWEgteR2OCi{^hE1+PTU}j^umiBzs_OuL{Pd~T z4*XZxmFDGn9&u&~ye}=Vey&igLT*!*Zq4Mw`JfXP2d3S2GyhmD1L%FrQo49(B;38VXJbT){Y|wkkD_%{R8$PA>I41I{z8k5T+Zug zj{_(x(6$|J?QI(}-p<)HUKtp<3Y-coI`d4ruK{lEISl5#Vuf-bjuT?`vGTZDI56h! zKpG@7gKK1Dgl=dS%(GH3XpT=$yPiW^;F0sF_`8~EIwPReyb?e|2;FQ31UVU*(>9gM zbe1*nv>_T{c-C7dOsJW++@an8X4+qHa&qdqcIfB@Kg8VQP}tv8_Kq9JE6pDdLA{(y z3-=Bf9!~qQ9 GUeY@5Vpl@IxAg!-bxbo5fUMkuV91@99SxY|<5?jX+5GoZ{hI)W zaB*^Rv?Y8EynQz(r!?f*OwMf*+5~0GLayw1?#nCIF)3 zGr?&Q0NXESRr31vmnp9U?SO%3Bdz^1c5+uy;cxm$_XObu6$hn=x?+ z=CBK-CpVj`==8LDLqrj|@z%EOg*1F@F<`}(k10K6>5NybBT4tGb z4A$bWnxIH^bU8klsfFtu^jS+On zxRoHH387d8>*qZHNQ999YEH_GNa#+`N2=DR2a>-fW$&_Z6#xGAGCBhflniiv5f{L| zR=aRLSG9@yN(tPAEhPu=PO}nGp2*C|i1_?jLKVd5^Z_Q$RC@;M!l1Utb}kr)$-Dr> z6ZXbgE`wV-dW+fF*{NliHb;+_x|{hJc&uT>?kXYnf5BDxa2wD)9&f1V7vf#?4@jelt4 zw^8{Ir=XkI|D47@r}0j)`il^>&t3@n2NZ&yB>-_y z-s?6t{?^#_uYO8UXxurC7TJcvzz^~6D|X)0mAu#T3>$m>u9i~l9dWC8E>DYN2l0Hc z%B7pM_dsaY2xKYf{~}8v38nhp?WOu0a4KVN!Am;4E4P0<9g;lH%-lP8>qzjK9`3za zWM6C&g^M7MzU*jkpQopZrf~L%w_!NcQrN1vp;_PD0Wl!gcoq_87Yb6Q|fk0s%@kNG&R2+WFEv4%VC_` zU=4Erb`2Pw(3UyFz#6L6#)Et+tDc{*=wv);LH2V0WWctbvJzd44Y`<%NDC0r?Ft<` zshD)_aAk>xm_ml$MZ_>do}|O~AO0Q#kwf}13Cm$GvjND;^m=k(p_IwgSxo`UveW{F zdRbZMnoV`l(ujy#G?vxDvxMr}^|uklV~p ze%;5~_X%hY4qqo6Z!VTNDwft!k22zbLk>v+q=tAQDkuHDoD&V|q@Aw@U@CxvUa-H; zLCn4K#H?#*U_3?rN)qdU$HbW6T?*2srZL>Sd@?5=tNVWb?0WG^xu~I`Ar2-|tw*1i zC_eDsYeV*UuGV=I8@a>x)ty4}Gl<;wM9%YPoj!QpAvu94yP8-P4!|&VkmcJl8$-#P zYj$36PYXKtHs-B4Mbl>=ZZ)-@)|Ec~D`=u(LycBcv=a|uilHG}JbMezvv-Uwo@-+x zBiZBNTL@e7+P1Nit3@O*2cDNQXcuyDShTmXxraZbq}b&*F3}d`J`$cTF>P(>I%lV6 zmHP(miZKv6#{@|nAKekJy^cJ))4}POw)Ad3HbkT|lb>|zi86W*5C3eE?+c?o8Iv%B~O-|{XpOe3mMcszJ4DxKl&I<0^6*j_;rE8 z^aVG8jePSTPQ_A1z7>j633N-{pCbgCZG0{rhC|`BMCy;n0(2NWk(JbKX!MJ=0NG>s zRO59()bK2s$!^zup)KIaW;aP@?=hIdf6m5qYG5)wIRh<10jF&toB7t<%$59j%q7W^ zvZIBlfTz`&$!UV?iLhZj7w1I1O$OR+6MbwZ0^D$u-Fm_LH7y69)xz1Wj+Xoml!ST1 zxi?)jS4a36!e)+tBRjm=2#jWmEbpQSQdm&4U+$kC#+eq(vlm*zS|D++9GCIfC;eSe zPgl|AuZ-j=`_<3q(5N+vLC34#-dYyl#Wc#?^XHBoCO*XijhjJB=vctX!ns(cgYSw3 zVfT&U_4zFfKUM_t-8O^8dqrOdXn%m?zym0bFMp?h%mp@>YgJZ)$1e~73!QQLYU>Ky& z2-9S_D{M{3dPwAAwAr!-oD=&8LQXE~Ci<+J0Ey7+jbypRKhdBgU9feC}59 z&sdHFA~<}bnDmJ?EUZ04X1@Qd$!G>uMw8|@A+&Es(ISs?F1c3aPH@&+AogBWl&?d8 zcA$+!@YP$y8EjnK63sw6r{c2x+0Njv;~tBOCEDemhsWDT#*s?=nZWz~GX(eP>zlM| zLG4rv?XOmXWv^OW#MfC>%x_6iN!(N*97e8pXDT9g-3zT`V= zzQ{eAm779aoj2>gohW`P{U^kB4Ay!RGW725ozHv8!+7ywS-`=k*JWbTIF&R4nRl(T z@9ok*^2Sm}`p@=C4JzasW`CVKcd>r9pP5KRyNk)c$!K-vesCmXxf)-y`9tc%qul>A z2kR@B?~>lzP>}y&WOx7fhXPZKb_5&lNeYu+kFlktL4JON;#}XDOuSdHRco#4(bSii zca0?y49c@~Z9SKxnuHrU=&^!CE9f{p#>b!>s8_qw9uhWGhY68Cd*$-)S8f;EV$cj> zaq!jkbMQJE21mv)62@|=WV`Uu77)Q!ucMhqPwD4|_{u%Jb*f{V=4z7lHhRl+{0pB#__OD`G`fv$&5EeR2^e;Y%Dh5Y_80$VMf5U~ zgVR5-B;Ij|fo!1TJjs39Xw^n; z>QgQBdBlvoSp`@$+cv;SLE7QgGF(FZymua z;&U?E<`GfN?Y^ZXe7C62C;NZ_T|Tm=0(v*X*RSZ`)=YVE%AoF}csFN`P;Y=yd;TVY zODopLtc-W}PP3Ww`NZ8-?4>K@r^Rj-pQr=D$+BUdX>(_Io0u|Y+@Uz0LVoE1*N04ZNFvhsc zd{Kg5&zJrp%j8I>#gi$a+r8X#>5#2)9$=6kQL+De%3_mvul0#ak_zu>Ih9MrZ)*Fy z?zow@r70Md890VzIb{zjJ;{C$TBg-?{P9>x|G6_yW^3NpGR%dOjnoE~*G6jcu>^;N zZ&RM|FwzPuS$DEcOARJIZBZ7O)n9b^b8BrAii+N5&^(0S;Cz*QnZK(Bf46FOF0Rt7 z!O1YYZF(BThqBYcR@2$tIxtLr}vC7S+)Twuu z<5rsmoGiujCytwr*6GVcFpjjqJ(-_`$n9GTh#aDq%dW{nLYdzMBP+sc#i$k{f6aUw zGm(o7r7LmLyymK@#~-x2X`y*uX8Qi{wtDR?N63}C`!^+}xhIm1$I>1 zta2(^K)*gcH`~Zbz4gm#cYVP5T$?+{hEQEQ6D)G&ZyAwJG|NJ;jLkD#^vW4RMYm2Y zEScZZY|S4To-)+~PlsJ5lkmPA`k z&FUksTs*ygl=8p)>B(BS%;EJq$Ef}I)tACTn$~UiS}iBfO$&38zVdr_ zR;vxakpdT=chC4qBS1HJ_PP2%Sp{!kVu`@ZjGTPWtp<^~(U-1FI&p4yjU%QrJ4zfPb2QpZI|d@dqSj(_Vt*^R;7m0x(nFWh8F zgnkZWT96Tz6x>v6a+{Z98|Or zb_#y6{Dbd_ls#&Y5lGME-;TlGy5|xxn>XqX~*1R$E>L+!N=kR)#JyNWXT7YbX zs_eXvy*)%Lc3~`fDo*xYBRy-QW9io4g*6%GjnxaXKU|Or>@U6EK@r6w+HPC>VeIU$ z6G8GlWhzW^7fyES-|aP9dXQi|&(Bo!O9f=l-^%e1FT8)*Kl*akAwI_~#4JlIKIdiM zcCsaRUBfJ2O`pZ~cPH!hJKtsOg1T>bvh?kkB@3-MJACZd+VzOL6QaZMU6~n9-S7Km zp9;l_PV_C^_kvWbGQ7_<_{a8zl;@xB&Xz4WT8Yj%*YMwG(5dYYqv$d1X`Ae3}22AlG zjpmMSqwnhQiHd6y7jGMIS9)W|rfe#UwjX{KsvYw=|{A$pLNPgIlU zQ6W2=FMG8rlQ&Ai%`1Z>YisO9<=s0Eb!pQK1WcTBS<}o{?eiK8o>+gTu5w}_wx9yTD$Yjl0A zC}S)kFnn-SchQA+dYl8xZ|*Xxh`OO^`3v(f0A3$=7M|a@9t8I5)jQ!MN*H=a(s2b} z=jHDIg1ri8ZtJ&9eic-!c_);?Azq0XE_1EuT&7#=CFR{>IkjGakuAvvy&t)Y^y!ni zd@)4puN#!EZ`Nb@h$yNNGOfuL!QVLt=+K~GlT^9+Z}32ExBV7 zn6e)1THVvKh}bI6T^MfLzWL>NUDUp^1ht#WIvtD87v@U7ia9EKA>X;=oTC6*r==WI z;L=)|dQ9ap`4Pis~@Ch!!UyBXHEEwMzL)OjwRW%p{C&}g`d zRwte8djY-e3(3mGalq#C*I3qx>pjIO`awv>Lhy~?%PH*#U#pfmS4#~%Y!uuSOCGE@ zh1&B^e4R|(-eB=X-Dlmjq!I~0Y4Z&@+qtEjvsYXSsWsPjh!at%_pkDOq3v8Q^;Y!M zwm^V7oAS<1O4q7bNez{GY4>RSR-}!55Ai6Cki*9h=bp0myv1e1BZoQ6@@>-ju~4V_ zuZv^%zrzpFc2s#6KrxU{Bha#q7K}R2ZuNS7G-?E3shh$30{a@hAs!@Bt#DTdysI2- zsa~v0=Gv<^%y1)pPrUt2%?puQ;Mm!b5M|2bON8uAA*u$~=Je$GMEhywn=GI8py&io zHHi005y*SY*~Q!{{lsKKGnEm-tW2$W+AeN@eQUUvmwz#Eyyj88`)% zfbp`SmqLY0Pix0$W^1Q|wsJWqQ|H8SI{fSF_&RGntIf3t%hfW)-paAgPu4}_lEbv! zu4nXJF4QoH?yT0$l=9H@79^llD_&yT5o&btMS@Q+O(^mLUc-|3=bRlmy)vcztZ468`ftzIa$_uydxI0&ZOo-tuz1G zY(7Q>^O(*xD0Gu^yEVc^cquHMx7NQly77MlP*JoY}<1N))EJcu)fv1OT_(z(F&}- zW;Fb_EQ>|vzn)#Uzu~YZ&^MEjph0JP64m?I##}e$oU_y%d@N-~`w}UyoWeYRKP*_O@Vm~RqrUqk+$+ju$A3z_NO zcPUboNVwudB=Rxp+3+gqv-8aA>dwm4uEycqxdqATPgWe7qrBd{ZYjg3a1vO0X7bLQ z+i&i|56u+9h=rfy+t<2qvSBGz=2k1JVr zl5N|Lf20#-R{CIxXqW8Ml>UHB)1A#IYq57`#dj94x@Y7PJW`}~<{dvK2$`GSvZlN0g;|cv$9_)Q8=Fs^i0^v*iRy{-GF^9eKht{lh%=bG79SLKhl(fI zEa*FaVsXv%w?RW`nUc^>ttc^e7vrlDLjbrJeJy{{4LaV6KW89OIkkM5Xs0ThfPbfG{Mb^v?{NKd zkHBrFU+skL4Gpuavql}CZeLi_CYGq2cPyGR%fEquES$!W78LJ%!22C!VR&dK4^d%p zOyYXsY;J9Uub0eE0=3Hf_8U@3u?5P5Ph!-YFnV#c?=qskjR(|B-=%l4P(_Nqc-^K8 zYFDjx<%`K=sE&!=J=5p2X&9#z^5WNP(u2qxViUjQCUc8?i*eGirytkxbwZA zIy1+@epMgCnem#-^eQPayU0nSSKjQZOCGlE8oZ5@?D9L8wDJHgYWXLQy&d8h4QzKN z#h!m8;;iM1$!isz6yeM=D2?-!fj7*&=QzqNBY#+=1s0{{N{`kBh<>HU-|%6k9PZAK zezn5m6hyL)GH@fkkS7@b+bO>?dF6a|OW>)UNVl8%Y4{7TEb<1QR8~${v4}dm>lchB z<|&I>gg4Nn;+rfgyO)Y1V(k?C%K43#*AsH27Q^^WsM2lR&cs-7UC(dyUzTCuP{hM75HU+uKXJfN?2(lkrqQH3n(YBXm_|L|>D^VYb$A)H1tZ*8y5%=h| zaVTcNG<%1YjFI;yv5WKW6fghkx9C(B3nOW&@};L;;&Z)Y-1R^K5k8>FvdNk*R(#O5 zy=?G6=)qF_lESIKeZsZ|QySmE>mKD47n_c6pxZ){Vjd0z0r2>< zzF+P@CO4J@Ek^=vflCtL6!X8wDPka8z_zH)bkg-fz-6nPh6j>0GSL;w)as{T(p~;} zGgAq9^>0?C2&pZL?8{<3jHZ$Eg>TV|;Xwbx4Jf8k8%hb@!Wr)q6vC`aMNd9lKMa;v zTq350m)(5X4RBdsCt--sA^qPYEY0_NKmSH7feCK$&RopptE8PcqVF090^e%49afP1 z{K-};4jbq8pxvF~opxtzPP>OU+U%x3Z)#hFx@eum|5-#i^GPuT_uZ;ys$)<2kfxx1BdV52}f{p<2cA36T%`f>3&N3W&?4zsX}`QX%oC_-2|sXO@?Y<+vdU}Y7$ zU(zd@!2+t{b7KP*5mWK0goPqwG-^yS8FA$383D0fxDipwo5E5Lg2$zh$BuQMwmfmF zd+Ylm3(h#j`PGj+bLpi}Zj-jaJbyX5Cqw+CbOvwR$wVSlra=$azl}IN7SqP-TD1m% zcW1k9JE?}~fyDAa!+fMYwp);ltT3;KW^ZmIr{DxNldAgWM>FnZ&Sns3u^F(KoY%5n z*H4O_EB^VkH>LL}a+cjFjHM|Rnfp9MlF{CFMrd)sSkmTaB$r0&)i`Nb`L9B0r{VUQ zJN^E6*~0^$NGPKOe>RI}fKuBpB?AN03>!#i2X2rkcYaW&*2d07{VvinGROFy#0ZT&YXnx_#goNN zo7(wxg$~j*2!({Hb@gx#`LlTjQ02sU_Q;)QR@&4|%;0 zQ?yUj`?*N%(2C2i{<4VtK)c2~m|GXcB{{3_iOk?VD0&v##T%df#&&yjRcQ z+EUh0D-<z@09HVHRD2W(}~Y)H=SdI^Ifz|UZnHYMhoz_x0>r=XBtg@{RrVTLeE`i zFHHWa#89K+$?w_Kic||JrphpLN(E47lb&0K<)i=p4); zG+~9@X7jt&Z6C;GKy+g5QonSm*eWhq7?hNT5$^0V16MN*RI-3rL0QrbKvMd1iPje{ zsquIux9Yw|OI`j65yClIy1G~uSxg%FiVSYtcocoSa=A@={iBqHThX2OQ@;qkcORy^ z79t6g7*svVb0p;oM6Pb7ZGN|2rYp4mmC%rTHmhVK$=PXrd;Gy92+!4n@SMzaa`|cQ zT-*999+@FMU%TEq#PVLhBg77<==0 zDEszryhTWfk`!4|Q50p(I!UDxDqFG)mFzPj>tHTaDn%$eQwdqeJ{XKiWv7V2m_>GD zW^6NKjG6hJUH5(6*Y|mS@89)2{WY)tm{-mDIgjIbFUR|1zff|sk#B^&%40{wqM!yZ z*o#H6`AE$13?T2|e=NQ3-6kyntvec{`R5M%m(w?Hj4;fegc^Iwg%oZNz4UvP5g<9+ zc3@?)GV^O|O|O~9=Qo{V%h%v0|5>~ge+GV6Q0KC+Abm_--SLIUu7SXd{%Lu@YXph- zfE9?#6G9UFXnKJaZ}tl&x;x{3o{V#HnJzz|0ZvC8XCaKveI$_lhh=M5mQiKAnRnqI zlR9JV77Dvem4qV;B_1a{I%qHJtVSu_YuLDPRv z(Y(BXJp~8ydkdic6lTI)-c39MXZ)Am z$&D+-lsfg)=Mo}$O!$C>_DXM}-%Z@VrxUI3@w?WWs~!p+XS$&Wt;GVKuR!?_ZaY0)jj=opt{srMJUbu zJ=@#lKl{c1rG^0hJf4K7w|DBat+vu)&hf98ob#gbWYkj(Fb&8sgJ-EfI24}dwNZt7 zqwiPKY_FOIu4QlR#Hc1YM;!ucaj%tjhc#4(-LLy8YTfc{6=^uhvlAc71Ec4@fQ%SXv^P3<`01VyV4rq5_&(E|9xwMz9;60VGc46|N{iN!DFDqS z7pLc;K`Xdxfz_863(R)?TOBm|z%FjG1LVv{og=@c@$(A30yJ%T6%ILTPQE8TxJxFn zGvzAYjOToMtnQSltL-je>M1hb5K^#bJgfu>+;Sk}WF%j(v&>vKTutI_a3LR8CIWmT zkKMREz}U()>qjo;8SQR@a3`Qfu>eCpGh+wf-ZMv*E=UMk5iqv%?0**6=TB8M729QI zW*$@5brcaH& z}J$Ufd#I`vA=qbXW zZ_~jq2^XENiyJD6?)vj3*0|XEw3Qye+^bl5Vv%kxC2#rcWU55BwW-26;g*$<2uFdI zKc0t^RVJ`pa+4gBP7lt}aE|V2U;?)3}W~ zPJc+EKRNCUKcOg0Yd*X!FUqG^UIe!nOARm(spkw14=Y*pI>XHz{l0u}o!d7i_Rr%* zYq3NH=~v@=dw#U+fdf-lAP~q6@$siv>eSQE+eK$Ukac_xZkkusA4?84qyrL}gs@2( zpvUio9q6``c`S(H3IQIsLkF*pk!VQLqhAV#91b0Jt)u+zk=Qglw!K&pIOM{UmvRR( zEm0=FOv^ustVvWQ4KA@jb{+Le@9}usMc8$D3^yQscJDd~Pw!hUQN>{NVBf5~!w+2s%UU+w05Ou^IMcb zZf=W<>a}WjCfk&u_3=lB_fs`6u6KGP*=J;y>Cx&w1i$B=0Bwl#E=G>~n z`aPs}H#Kk}mf6gkGt~1!?mMKd6>uWtUsV0lR&P{w9RJRIP--skttQ>BjzUjcJB(+RB z09iSP*Ggl5_m9_ozz~L6UkX^YIPn9tmfE)`+i~VMJ}e?eJ<||BDeFIt)Kz(N)Z$D? z&$A`@N456VL)k=vx>iSXKi=a_bXe|ocD+32etJ4)?=i!nbn*3QsS&Fz*xr>WOq3A6 z;HLl_J^fM` zLeC^$jU|m02f>BMhyBFIQ-RI#Xm)C<-6}Mw@c!)mZtu1@PpN;j-fY=ca>I2w*?r&# z@kns%T!iLWnk?HuSy@?pE!b>pNE?~Cw26hnjmA%|d5K3{4T?J);uqv9(BN-Ca_u!7 zjY^4d2(c;Oti5oc!eC&{QX2?)7U{D%~3dBQEyTAk{ho2Xr@)8@1-w+A-ejlYpZc*xL;PP zCuWRLC*CkF&vuAw4%9L@Q4VoU%*sPWbneKJaHFTzEcLfLWoCV}Kb4l#-?Wv#Fsj-B zINS{VN}FMqW>u|~U+B{-iR}yba`|_TuHOG9-yOruc=f_6pxjYJfdn{T*x6qG?%ZN zxyNT>p-G@M&s_QYwI-ugPua|@m0wHp`j0rAqHizPH!rbtv5QS!>m)IPUpgHq3-z*l zBi>mA3y{tE=Wgq~S~0Qa>qUBA-ULmiN)CAE!Ui5TuBV%5M3t3`5sjJNDdQU#7iYU{ zCzihCx54%cp!zSvEVy2_hwqMagkK7`II^CR?Xx^s9=*@V+x<2g!U#_6Laa$#7Getw(4#m@~Pt{4TL!oH$z~`m+nSXJmLl&;=r+~*~;Al+ScjkyyL;mFhr)YSE=dwwgG)!Ma#9o5i~#B!?IYCoQLM-(L+sghfyetD;;2Z%c&qW)2?{~;q-Hiq0Rc^~$# zXRY@8S&LYr@e$^|QqIv-3wWyfto*I9<}+Q{g7H+9wDhDQ-IZ6RM9-KUiQYKUuZ%_A z`j^s~M>!DN69zB)b;ZQrJ2r1TEOFwU4;f+XUF|iP7~VxlKA`#z=ksm{Hs0cowQm@k zu8t$`^Zvk#bq;6F<|(eex;W8-^>pxyyS|Yn>#ol&K=;Y19PJU(UDRR`9yCmWmyPg@ z+KX1P2X+*gLq2ML$E`O64EH4z7Vs^Mv8`EbzP5%JabuVkqIVATIi zR``$Y<}cgSBG`dXrA8#;;^|j`K!-kG+1ajU0yduu{0Ezv3kihf_%Z+aG(z2#3xGhi^)+z_d*K?N2U;0Z8FzUFNuR>PX_HG3B?Gs~DSdFzlDB z11aLnP|g9~Pc>YIM?;9QA2R{dq~*vQE56OE2&`mnjV>g4CeK%wSZ{2GFYw8vr(s9u z`?=6+rwEi$KM#{@(!1r$1FwS5>qkZ?TUje%l4TcJyHRYq`;z1Gf6dP_Lj z?{fAtjO}u!Ui(y)iD2ZsH;S@Ul(9V{LJP_T!b4bKVl$b9Wi zXZ}21Vf_F_szXmG1TGn(74JdvPL;_mO{}|ip?mSu8{J%+BS%$|K`KN48s}*@$swD7 zpi^W2@IpHd|F(?~$ZXriLgd_5;11Hqre0>em%o2*vQ=X=yp7yy2~EtP{woXNO2K(R z$7l}Bnrcbgm|UqNqoGNA@SDJkXl-k<-AygO#r`=C;jhJ_t>4 zGHSRLb;fNurQDHNn_K>Vc2Zr4ywa|INNGnH^J334vXXGr^PMqqPWDFqwxLn9XJjVB z=wYU&B3dpHJe$~;l&aDMYwoGaG4?JDf>JUQt1Ae+RV@hWj`~~3&8z6qVnBH zG4wGGs)+Nj-Pk9>L7i> z)gItq`maFsFJ)@()p=9Xl+sVM4JO%1g4ChSe`vN%Ollh{+FzF8XwEwLxwfI38i9Bv z%eCdi(Qf8PuI}0L(TV3N{`eSn@*|4=Pxt-{dEs zb$%4u;Hy#R^AmZ0tuXRb5$*Loe#?cD=#?hZuBkMTW9@sd7_v#)AAkBB&kKrN6p6tn zwH7k5sM-_D-|nqW{c4MiEo&^rblFM@fcW1vMjug^Fsr9-kE?kQZc40~^k!KeK6X?U z4KDhTBZr*W;H)R~U&iVIl#Q|IVQzkPL=F3l z#np#(r_Qjh1(wYBFbh-Q24%`#4YENb8sA{`NW3&F>iGa)$!EHAO+qC|R6+om8YZ9a z5sWiQTUg%c-hbQ0iOAqPC+3nO)xvLQIC(EQiq@1L@yFA|V(?q)ogv-j@)9ES3MuFN zbKkMabY7mk=QQZ=5#hgm>J<0%@=M7#FWKfioepL8p55vpvke8(=_!l$H-N%@PmEjM z!LiRzcJW8tm#;@25I`BAqEi6FMv8l_Dycqk_SxOi>h=Sd0(#0XMYEpXE0wOPCkhDj z0`6ISzw*OG^~=r5wi@tip~xZa8iO+uUYZ6XlSk6*ubNhK`W~*=CX$*j1``Z7=VrjX z?`mnF+sYuj`EK zq*r6LfhMLflGN^DC3~0^C-`gB%#b23kZATi=+Cw};GmGJ5UQ~qtlxFcWN8UVR7FO% zR9F;@6S)>g;bUbNgZZ{B`a6J|VZGSBCnk$jZ6V%oSefsuH!=GpfF|G7oKp857umEGuQ(H59+aEh?9g7XEH>?HF1yxpRz5r-sLA0n3pnG`WZ4(_;|(;WEEQ0O z7@f$o$r*uO4>0=(Y?TUu(;Thj7YNoKBq#T6Bx)6xuliHoB1e553hb44-9xOjVGNADcdM=n_FQSqhoHe-JV(n_IdBmh$8=czDg~VhQ6#ccU@kbq{IR;h3!k zz`Sj=_0O@4_OvzVkw1st$Yu5M9zL9VZBnq<;!y+rzqIh&T{WnXjsbVoZQB_mz?*F` zJR;akO_UK7(^ae|lnA!e{ye&jt0jP$UgG1lo|Z$LUjTZhG+Zt!*QGyT-py<>d%hW5 zO{=)}!Fg2oA~dY=CU2GW#jKX-e?2!l>ipH7q*lVca7w&g!xF9BjYtR%AwHET*DE*l zt{_MobL^_ebqY=H_?^j%OA)Q&havX?h>-ZC0uh?$0EsLx@Rg-c>;^1rMM7yE`ab8{ z_IyJ>%%)g0lnuW;<4HP6&s2O>SKiGj2CUxpRcrCTf%cgj0s{c_Xsc4sBKgh2iAtc% z8y>Mc38OdTbr0qo#W*RepUEuYOXq)nMh?F&~nq>pyIF@OcK@8om+_>$83oWtY${) z7==7|+vN6nEs5!|ov(|~w|Od-Ag5F2H@CZyXioyGTJ`H5jX`GD@I7*13#rNr+uqNM z=D_T6A?7f|COdO_V9^ld18>ZgjpU4_+$9ADN1{oi>cc|LP!x!D5Tk9v$Sc1s{Y!K8 z<@>4v_PgkunaGh-{D?+N9>gW?(vG^qr4-R%d9RJ);`yZw8Sl&IF?3|xXQ+4qqkg`7 zYL})62G$f@Yg3$dMlm#9)q|fq(-MQGj6uvflVY(x&1Ge$m)b8o(xoL0%~ya?$y7xs z1Ehbsh5lJ?_pATb9RB`0w;hYHv)!ro<|T*rjw=Z)dXlAo&#I!2o3!^sVhPN4(Z_g8 zK#R|(bpZQj@X=Wlf5Q7newSb#coQ7o#Jd_FCZVz}dfK@kyys;1&c~RLjZ0CkTDts| z+@GYgc{!sukAfUWfJCsL{R^f6?))+l_d*NUf!thR9|Le^n~tnH18jV@=2@P9SPEN( znnr>qN%i)dk-iuQ8!5x~3`1wZ`vqQoE9i#Z;#HKI^4N&W8GNrr9|vFJ2$5?4MB^d) zg2bIUw5|+wj*_BAk-h`&pI7I+>4HWPt9=S1*FHA==D)fG49C5`kPkOZe-uHToY}mq zxj;meYacznQP7=mDou3w<>Bv3WtRV1`i-_#d!6vx!r+x8gD4f_z;d?HJubYBqutoQ zU%(1IKwDgAMNS=?DD{NB$%TQsKK8hvJ{OEmj7ZI}P5$WfuSF=c; z?sqzC==pImvfoiLNGZ0Y9X~h;o4Gbf@7koNAhkPbjt4U1hHnhU(~6VQc)0x|q&c^S z0Q|gZOhP-v%`CC*MFSx=P)PsI-6%8TsSk*r2oz-F41(4xN~-=&wT^}g(>Z#4$1?C zveM>wixClt*pWIO~g_P53} z{f8Y6UsrF&pOWGQJ_(U~JFU0U$~KKnIDKb&eZG6Ud}^@lW;+sR)ot6{RG+;U+vaQv zfNKpDFbcuF_}@Q)s5A06b5Qv*g2=Jo&>eG$x{`6|?ZOVt4<8I-s?o%ucAJj=%WNVe z#C~qNNtUr#E9N*Yv?{oqEGZkU8%;ftSdx*{{WRld9Y9v)mY%I)aZu3n7~huMjcPjn zc>g2&o0-RDGR5gSGSgYQfiRs>-2@W*L+{}b^v=~sFCsOG2`|z-h}YG$8fM*MtK93^ z(rW;P2W(aDY(nRdCq`}#gZUq|ml)&2-S48uwIF7!zjtfKJ zLL~at>~xK1mS9 z2fkz+S>P9A@gbW5Q$^_I?i-#edA zz-n$>%Xm`Ny+_j^0p)AXH=5y+kB?fOQbu&EHc-X``y(mWjxDo{rPtSf?#3wA z@uoPC_4JyShTgLof*p17KV|am#;eK^Ft1B(Y;7Waaw;5Nh(NMSMiv= zW%(*E%8m0=oDsi8WdKFfJ$AQp4a`n(k0}EL@fJAc13|y(&QsaaOi@G zP`W~`o#U`qv+XX6CvfXg_@gU(HY)kAzbJ#vrY7LZxfFn5o|XyRlT(w)redTs#8sl& z4I?(Q@M%}_*an4`jtS~XUIlmRPkxg@j0d*s7C3}%R-M1Jw6zK1a%str3*Avs2+@Qi zU=tYX%A9K1zyVt7QTVMRc)#Si;28$6tJf~ee7(N8B0;%77_@n9wQ&-JNXHZsa3=z# z%z8=mT|K6s-K)~VJe3Z{)Pu_gNv?w z3P85|*ltLXu5K>skXn?)oX+XLmu`qMCO%Mk1Y$I@l{8l~{VLs_U)K?nq%_Z3&W_(! zho%eRV^a(+BNf;4r3I|AAx3zlFpUQM_}+5q(6n|ixt7aLCsWeK>OpS48$?$ClJ@p1pr=)tzx5IxHyGZ0cO|}5 zbaZX5gc^{-mR`9(Sh~8fFDgB^R1_~>u}z9VI0a3A9i20*b0k4Ek?Q=mpXTwGCEDD| zHB1Ef-NVtF^AGv`CGR&4^;#b*Gi2_m^ltaqs31kE*L1>WWE|J}W_O-zt)d*gXQm>bS$L^>FkUVTjZX}jv<$1yjv}sU6*_U+E7tcc5e1`-C|^hj=qhb zD23N`28n5p4LxR8g37K7JHt0DE=E0cr&oG@f+rua+)#u?KI5t z%e_kHX{<0-U|9g=o<(p-gzg0n8B~+%Tl7miHIDEScS5pj ze5BfA*J*4u!vedyu@14zJ{=p+=AXY`v}Pic5fG{nxz?LA<#XVxykP}*-YA6ertz_V zSasNXI3iGjZs12JvPK24^6=xQ?1>~-t3wEoR%DIMn+-87L=@p}7~ZBr%(9(odr}BO z2i5rV{6tp<(9f|f^4W%ZOW+oel6{-X254Ge(CAZ4&}cc{52E0N3JRLd0u_ESHuou3 z%`feNpE$+1GV-Sk1-KCZK4Aazlk1KJA3>hfzfm};d+hc30je~S@MxlMY z-P1Y=6MxkomiMEpaf zoG_N(Zc}UAZG#H#FSNx%MFgCy0r z(#ASFB5^f4>?+41Xj3BXRqBVBFa7E3oz|xg2QOd9@KLE0Y6pd3yhm|G!n^?vRf9S+ zg==Wp*$?)(uofK7&D*0l`}88pJOP;^?@NxWU}RP9Y5xAWAfh>u68+*rz z<;^oXOMW5s6rHs)j_*1iQhmff$4qgr`U%>TR&8OLt(Y!YH(eg~NB+kzHt9$eUCEPo z5nq{3J%1?U7)AkGn(7GFIhDXKObV0!NzDcOU%CUb;4j z-*$;1!@2h7j+~Di+2bBSt{b)ObFwp8D$`X-eRvecTfC={Y@UgBr~UdAvF3MCfBR5m zzu~(Jbq|YY7A(xfBV3_Ifb4y!2R<{tA?KLd=_Ry5%3j@cqz3#zr0woHN9gKM2~XcS z(N~hF)Qc$h`TP}!pvA=_B9ER*8z1^Udk)yey}tP*Gu*nliD_bnpr%Z_A<$j)HaloA zzRa%}CZrkzjxY;A+}mz49tefJwg(9-H@o!i;FDO_rhXyp+mUYV*0b)~Jl9K0p=Ob% z4ntzdcMci6Hcw4ql_WRkD6GHnIS?<%ilr_#QgmRJ7ZVyHZhj6j4r3oYxwb2>yIT_J`R%(4XmF?DsZ93vPfFidpAi1ilJcKWUjLP*v-97iMkb| z(@!zA`OYD;*e&(tXgB{d5t<;j**!A%^JA>VvGG&SY;04_FE#MDTp;D${cqVZ|EX;i zcXw)`YBDdl7kp1)ABtxu*jYEJ+*OG-M~a7@*uS7DKoe~B8^QOLdHNF|R@%zKx!0aj z+pdwt5^K7up=@xpQL)M~UBrUWsi#9<&)oi3JxkplJ$+m_>T~U@l^nYPZR@KybH{aS zbtv{84m+@#vLnx7Z@SB~+Ewj@ocHrEkl2lXY+t*nu zG6gEkQMiUJx>xJ_a^p}|tv)je3l zYx{;WCY}m09?Y(Si#99s?Ad!n%b= zLQU3`JoM?MShD_($h+_FVMLh3*}lXlS}hm%;hAh#Rs~T)VLFwBTj+IBbAc<`@pn!93qdIEpyqXVT5im6 zphd-IMZASOWYA7;SC8fitBfu}@W8uOwZi^abQInX=1jNUyGgtp_RCg60QIWZ2@jgI z5#SHl;CUH#cv{9d?@>y*e5Lu5!_8S>uZS7z(*f-%m2pp5x~%c%eMYk_br-jUg!bc?vaqT5uiD1H-(Me8A;Bnz{xbYaQ`)*Om72d0p0$8D7 z)QCc8HDH}qqd*}6T-)_xi%~>Ne6xNM4_9%4=v&6^o!3O}T|eJ)PIC zUAwN(bmc{Sd=;<#a!Bc^{}EgEzelvTtLNcQ2Z%(sZDiSfdpKX8qZ@m5-9j>;cXdGV z)mW&^QXNHF0EK@9fscyUJKKvn2hS3(^+SUZ=rZK=5wRGbgL9qyb;*)(Y;GO6!Cs$0 z_@=SVBwBsu;&hs>&N1KIlH|vefsf<4gQK+hp5^aEaSS0Yaej z=|ziALUg;~#y?zhJ6O0=$?*wmszU_}rbi<&(IxosxYOMMXe{bK+ z^}JgSuu{^@M*E%o_g|eJXZ%K)g}Bhw+Y};3LV)g<+}#4U7v~| zb@dVbr6i#e-FCoF!)acct*z=^Cl1Z0plHpE&RmtwV5_jJxtqMHy;U>k_>r!xc_&rR zek48F_A=YVOf-hh@e#G=4BnYp&+1<>t-qta@gesGIjUr20IYuabkN&Q`Eubpi@K;` zM*Q@vL=B8b?jw~LAMwpCz`X*cvupSQ0hJiUqaj81lB#IFG9ou1h8!5K5`$00s;q8g zgvye`~a00A^hw@_;E?>V2jpA9p^~>WM$B@52Yh zlPW4_#A5ef`J`~{eMFg}9H-qJ|5Wdvs@0infxDoi^Ex23CaF~Vd8T6Y-ub!j;O8LB zkO5QCBFs+`)YJhm3%I-EAY6$OHBGB51Y&#j5w9s#3QUVo#%M^eLwSZSu09>QO5V zNvw5PcHos)iz~6o>j!7lXvuX&6&Lh8-KA?p-)BH*yU_mpxBvy7`A}JTS45 z3vIE@4Xd^t>%rduC$_80N&q%lyS))Dqs5H$d&vGVK)JUDa`^~!mwxBjS8pnVjHjdm z6%QqXIeMIL;ggWu!y$cirhF4TMiQ$Hs1+RYGHv6{`Af`B`_rjnEelsh!Dob{*1zsD zvNGZ~0)328$g%xN;(h>b>EL`_=604NL&Te|=AMzn#^NM)JMeqPtvffWk^RPwY2BR? zTqrb@_NwAK#^?FGu2rA@$2-A6v<4p!?ao;By0uY6f|-1F1tQLGChHUPoJTGW!Psqg zI+4J7PG@BRT|HAmfHb`pb~4h=VxIt}M_`)DUS!C7sF38>?iDJomlY`1?~K87vCM`> z7a$XN+&$7tXNg)@g&%KF!!sVa`TKd`=f<6fSEn)-$S!aFC3gQ+Oa3ws{GWaxyJK5L zqkI^2i1iL9Wf@)_7OE?rt0qw1?gQg=m!kun{{(>8h$s8jzsgIy_uaKlE%O-TalQsD zv`h!T{p&bSy>9gviSb$PjZ|Ax9g}lafynbFf0``V^q@dErD~oe%(JXc@tCF+(sD?v zh3g~Xk`s{w<}%8wz4JfVOg`hiQ4=mdZG|qnYRu_FF2T5Oqz~DrHHDuw@h8=R%yoo` zx9Ngj_pp<<&)dd+>)h5Wy&u;)S4Dsiz0n`NPyMi-&l`H2kAz)`ExgqAOY*c6g{p=D`mbKNqigdSP|EIgHVqRZPtie4kAG*Vz94{cH2gu#Y2hjZRa z&35O_Ol&pqvSr7rRczfHVX#{0)a~Leq@JSa7v#d;R~$A@#(-a(w`(dGsGjy*UM_Lc z5JJDEx8BYpMfRh=ipD6jDtVO+pq4MxVYksmWa+EO14a=o;7&rgCJTsq#X}zAezP9a z_XA~^36~l2Y7((zS|iLi?W9CG{|~QQ6EpHB7-?*qA?2fwXwB7&&(i<7)0(w%iXi>{ zx9@^v&Vng;S=HQom_tK6r;D?YgX7LtwmZj-?9zg&@TJLwTyFq7=oTu5>K|dwHnryP zJtfNIJPVuQw|!Lxj`Q_H|1P_`U;RF(CErcbewx>t9t~Fnjla7c74%xZepTne5}xGZ zm6nC@evaB)Jyd(kS&m~G6>-mXqS78e{Y{yBPflZ^p1e0h0JvECRo6+~_2`vXxD(0o zi7TeBXKt=l6(i{e6HFgmk(WZ%l7LvZIjyU!OXT~%OvH& zQQze<&(Y`MDEZXRjJS2u)DlMBVERkRfzgjOA=d}a3VxeSFsJ{>v9h98X9BljJ_5e~ zH$KQz2>pD20~CcU{_+AaUh&ictd_NH*6N&{eSjy(WXT0-IUu?r_8Qv>G1p1t#=4(c zENgF|H2SuYX~Ci|DEGHd20P5w<@Ci`SNUOeO8@a-H7JPh22;{~&*N#UFQ(RfQQi6| zH-EgbgRm24SKXjr9i8mN8ttT2Q!@H?#(1b)GWd7n1Z>1xckM>qJ8WROeXcVR{%fW3)~%1Y69eA|j&ssq zkZEi?>6;267BbUR9LYG_F9u`pbGtqSK8ZW^29&L#NYrtFcYdSfs%LSncBle-p)S`* z_a}q6g^Tv5IeL;5^+)eEXAHC76~D~q!P{!NtGIu4wpQHi0b;w9Q1?NVZmLGYs~V+?YH7nKwg zuc0@41~X8ZJIh9W4w#^t-%5bz4egqK(HV5hm$r)RG2LUvheYoj{dV~ifL<@>PT2^E zxulhYgYV|}wz%9Z-M-368!OaPlEFj_{s^=OCd^%v#JWHu(!7vuH$d=>SpY%!@e|>* zqRc{)NCn%Ff^?Q#RHa3E)Vm`@B1BBi16MsNUn*?;qevLS+0|#w zfUiUW4;$D{?i9d+fRW8w7Wtn0_;;Bht`5Ko&-vZ0-FzfNs_Tqkdb$U5oql#wLsms! zOrhF`uSQFBw2T9afUYE#5FGwdrV0YlM0m{bZjX#eLT7^@QL0)9FBHPy+P7Cr29fl5 z%Zz|mVfEPSb);^qGpz9qo)PPJH$O{oir34??XMeT^8hh0g}EyKjQ4` z{{b_t??ixu8EW>?F)G+SPpkIph4B}aln2LG;*GklyE`BXehA1RHQu5?Y&8{K#GN74 z-BBb^TL24mXHw|pr`?@laW);9lbfw~3-!Ln(}Ox{6KlSh;*SI-XIXfn%N1Xb=@_|7 z3*b_S&^~xtAn$1Ksg&pVV^6z*RtwIJIt|b!u|>i(L4882yBY*T`1ABE#XM1K;;8DE)hNbDt|?CNKNEZxuz}S}Gac zCzG?}8k5Uk1D(=%zjUPuOojKJ-4XEZh;~(G20gNT=a0;oT4qxsSjX;KJXd!b3AuAh z9fOl2f%>v%yG|axUz&kVvBz@kSAS7`;r&i}YDYdstk9)8(TJcm??S5f4G_a0^s z@W7>fLPWtRQ<*ORNjU23XTT@{kaw!vfWpZLM)5xYhzU|$hMs`PPM%FW{f2G+ff&Xx zNpG{*K><|TrQxA2%QOPvwU#h1+j_4P-dl;$=W%ChxLta37I{_@4K?<8^YYWR%~Hbw zfhJ5YB2>QYBWt|o$BexFV#cKd>A1;sbE!}ln|?_02+q2pT3!k1j0! z{Y?k8q9+ar;CAuJ-OsM)jHbK3v#r&2C^>(5c&fV{Y&OkRH<+C86MccG2P6*vGC70k z#>?yn@2}45r94&Cy`6tU8sFITy`;L^F?(}RWHc-W+%QZ)Scn_my)ctf?)gk;OxraI z@1=dqxqL5nWdH7%wEb+3Z`S=)yhYIJb-a>6RM_YPz`+7UtMC>cM2%kYfb@4;@K!c7 z%{o=`_H7?v26^cYW!G+mRLy7YxT&|?_2k1_1eIazqY{YsUIq6Jb#<)vwzL>A$E%zy zLFXzp8gi{nzXV!@Hna-7G|O5lcMRf0OWm0sbJ%Ek$#yZyx72Y^UT*$y4_xuFdzTRU z$U`dLvQnWw;p}oy=EGY#zB4u|IOd(LM$*(&TZ?rjnErS=Cd2$!JD0hBXEY%1cDgNo z;p3sMuS!3Sp$~0Tx>wxau$%lzs?xurSgU&mtDy2Bd~5rcNs6ltKM-CEuNW&`oQZXI z!J0^nda@Yino({cYzI~{;rJ97=UV&fQn~N<^8wojTX!On8(AuxQ z*Vg<^haoV=5U##^%`tUK<^XRcqQr^iYKr95@SE$@Qmn(S!EJqTMpzD- z{$PSwRC_Da&C}4rzAYRDNWP~A6xNqAYZ|%k$_cM~aaOx;v4SM|q1LhA8O@o7A&mOa zKkQV#6GQjD4utWZ(?}GWqTJt!WUI(f#o)->5sjC1|tk`u58py znrN7hcm%y@bzF(KB7Mhew5a2-Z^frdz^)VY>meq?Mip;$rL;!gu+JGYUHPD4@+9AA zo@QRVx%M{d(vpqeEk8tfY*8|=1r86F#G+Q^@91Fx3FEur&t&fbmH8)?F~crF%cg4I zw?xvt#%|VD#fB*;5!RX-;_5VAbcSiR0`RZ=839-m%xctce#WY0KnbHMS;NWPv+?lB z=3P?PI-BFo>BQv(4A{nB#2i-~ok}=66bVNcjZ^IeNXuz~$wAY8juyZGJXR!3Y-b#9 z4198!HQ4ngvUw@^b{{661=F71YFd#OesPfxz(vh=s{<{;s8IkMqkErX8KuEleFSi0 zzvwf3Fd!Cn*3Gxr;HeM)`K;uhey!q~hm%*8XM9lXoDr08{)@12n83wn3sZXth#R&j%~1ow%h)8aUNpr%_&rOuuq$~lN& z>y!v)j{I8F^v}%oP|{|pwOYL<`bKf?l=|#26Mf;~fXSZ0S8aO2kWS@vkr8E@^=V4! zKnk+TM6+dvBeA5%0r2Z};(M#GzgWx4`xbG_2<4IC*i~>L1^>&K?(R^n*KQ#vNDF zFHrNUcOt>j*#(k05{0tYE7@8WEk!Ll{>NA-F_{j zXl)#E@}K~JRPJrSRJr7Gc|yIX3F2OtmT(iail?am_78 z+2iFa`>XxOgn6B~y$#$>B|T1sw6%LPDwY{JNy?mpsvpOI2oL90o|Z^;WeDLq?x-qb zW*yrIK!C6;qvzDV`aAz#>y1uIZAled)cQWsjtGi~f>db& z0)l{aPyy*m?*Rqr1ZklK3hN`;mb%MbBJ|?1kIyn?Bet@u8mtKb>={fp?Ua zpq45-U7gCBN?FIA;4B1zLhQRs_J7|IAY>gUtfO$_>OPmD>AdQd_IlZ?SlptDBt%V` z%hjR}mZ%U6w!uX5&B|Lmb&k5HlIFSmlv&?ih0}yUwAa~+(RmS`(!Y_;oPAzsrzmhC zJ&(mTM54Fay3Evsl#brgl`oG|?D$Xo>1=UB;YmFZd7lp2EGiwO_czo^liGdWYnR zJSsCO%#jjfWsL3$m^J2SRi$x1xO(AzS-TJqS&fTSo6}MBu=kMLlz=|C1TTMhTQI*&sn};W zrpyZDvvODiD%RLv-+Y5*aoS)btC|v8twFQ1jY4-j>NBeLJmC9t*VZdHrVd1k*jbP7 z^Obm-CAc^PG_=E@6C_>o&a zp-UO);?Kr&FhvD*qi1t98>CG=s3f~E&oT~^3U`<6Vo_RovNH8fLU|-*6A{qaz=c{&npX@MEJyh6#H`X z+|QR+l*{5BYUndjodBH@?f0s8AN?JQoj~PpQ8J~>7*j8Z zNnmpKow@y=;Ft5~3uIS!2@K93gRv4Sk2BVQ6z}X5j@}~n`O>&flx@4)avnDPkZtrn z;HUE{$lvmQ5z6IA4`AtX7`l~Z$~#K-mriguHUF(W{^p*)KAFm0ynm_noI^3jEl({o z(tpoL6L%T1XUao`&v;gD_gB>gqc*4|_nT8pbcs8Po~niNui|8Z`pk4AnY-@|%OpOb z*@)6__>>AZJ5@?k~zvB z)sqWHUbVxl(B%LKbpC9L|IvC^P*2K=UuQ29;lY#f`ph|6ckCS0u^}ee+}yr-&(32f zp`g7SOpJG(6yck{Pa!b$%_{xi2CXpDn% zzUa`+mBTs%c48^Lu#Nw?x5%CIjNFee5t%l2lrp0k15l=R?^ze@tQ65=261)Y;aH5~ z199y*EtG2kblK2`=5EukmmL4rK!1L!)u5c`&PlKVWi79)3Rq8-GByKo8EpP3&wpqy zQ~XKU)t!k{{oA^c#dm6rf-4}q`kZl7Z|SiwW~L4^*?^Ugq+=Bw#f{@!_M&#>9Pxg~ z0H)W;QpmvRsQiYulnzr6q3mkirBvx(f1x)4tmv3atacB1nX-FEifOWQkEyr|R(X?c z^hglke}{lQ-=?DKag~OVn9|dQAv`)rluR#h4a^U-PTRDW~un9x_GMkL57}o&5e5_>p&EB2vrm2MXlX z@3H4-a#FL=%02pfpO;l&{(PBHr9lq*5_~)+(RpnfiC~|bO|ZRZxYJ2zu1gcG-SN;6 zNxP@?W^Eb>b^Lb z06c#St%x*+=s*qA9%^!pv8$Vbj$eW-1fIr)J)f|;w~qz8HrGhahX9qL5VY?D9=3qz zjZ`6eKkZWwar{OTkU%iI7s+E6Wpz(8Niv-L8IWC#KPY@=^{p{)%3mGin>u}Xc?+kI z!3naFztqNcjchuYLwg7!wSJX~>2l&@KH`R-5B$Lge=8eUDm(vGzp=VEz_f~sW{5Go zjdis+H&GM` z(#B9M`OrRIRb{ahgf?S+Z`1O|gxKtDN5)mDac8FfIhm*WFs*@$EffQMkbpCw zUq>!6u_mJODUyF97_dab*B4UnRQo zDZoW`HKwZ+D5sY+D&P);-b{bG#yyGQ`P)rMp()1(6pS2rXMZlamy-IiSG@MC?{are z(`2pt7yNR@a>iQ>4(ZgJKAicy?|atpL|yg-fJsbWUPR`SJO3*EzuKW6$9eyT45dT~ zxmVG}m$CUXQuAS+3va*p#2++}zn;-0df3ga{8bj5$V+^<6Q2?9`S|XBRk*JKv$#6v z>-^%swO)wH_yOOwwd|O87leE-*(;e?90%hNXb|_61#! zcp#g&@1_yq4c^3i)9_m>a=c@KivXM=^TiGUstoW#7*cBKeNm6Zkdr^lbz%#*tGSVh zGL=l_i5^>38CB~Z_XZef!mzn@6k}1N*|J@KQMy2a>?+j4pU2^~QbWDlf}M!EqQD<9 zWy(cPp3bPn&_)g2D`6wI2lgHp@G5RR&^Kgdn;|H|4x|cLr}vC7@cfQ_j_Jds0F0lT zn)6Wyn876z%&bn4C4AO z#g`vvbd5W>85g-j{)6&A_xxWHRz#&|BfE-KA)b5}ERN{5E4R5nV4Nz$@-Xzsr~5kA zn;{g^UM0v%*SQ$I4p(TbloZHR3a$CJZI6jFrRcEDLj~3K4&eEAFO8h zj&Et^cJyON7reZpZAA0D2XfzY?DqVvf!kp7C1p}i&>p)jP4nVc4)m=Mue2ImrtPiE z*aQ~<&V_-KpB3(462B&Cj(F1brnMJb+D7xF`a^Y_7saBU2tRf%S6JnMp$`{5s>ukY z3k|M$)x1)s1HuSh6CPQGt=Y+e%-`cLKXkhicY=x&fo5~0vXkMve{y=)zwy`d%D1Wtz&)Cj!h z;qfdY!|5H!Frsvh-e7?smIH(|#ak#GFIt`s60}Xt%7M5Kt{^MCpk9XsT0x@~T55>G zht;u+WXz~iOO>1u`x;%=8s8M{L@(|odlRqJI~8k#N~jd1_vFUzcC^}Xv60(Q&h3xy zyjIX16sp0asaj7YzH#Lr^10cQyz5Z1m%Gm^mBnKH4E{0IFev^ZmR=w)PbM!(gN>r< zVx+Wm+{o?sNgS5+ddx>+&Bafw%&p^LQSU%fccwv(f0Y#bYJ_K*6Lw@54+R!&23CCx|GD;^rmipdYPnMHBq zr24(j;mawfSBVbMycAV~19O7D|1PjraDAYNW$@a(U@ho-Xl>BL zJTd#FhH-ka{0a@=HWuXID{+yg6s0xqd5yVhW+7f|MDp>O_Rwp$|4OK6+5kO?Q$nxIZ-dvtW@#`tM9oY45Gi7ziDfGC=lFV z|MfHxlsInBRIY-8RF!)5IP#mDx;lJL(D1f6~rTdo?B~GE%1Fk+mE=mto-2=4F zeVr>2d`Z9+8YtiMnQ-vIVm)2{!)fh&x8UxXCKLN8x+J?-U=wFMSbPzrBIl;?G=U0e zuUs?F`vV)p?XhkxW@(T7FZncXcM;!%+Utecn-6^HE-3-GP+g+^pqW|5@TPJzW5}Mt z6sZOF#v4`Y6>p%qkV|? zkJ%Ki-q0s*Z@@}5zOreB4 z#uEgxzW|s)IPbb;G8@@2ThB+c;~ayGX(Kf!XKPgK!^SW$1MgUeH++cCO?_V^j#@X6 z`?rB2rObyTyfz+U6Jv%jYoLHD#?3((dW~JzbCh0ntuD&v@;(KJR+~-ofg5=)+G%=b z^LzHlBLs^mawFBXVMLMpQj!z&u6OUdnw-#^{XWsR^fPc92i(&NQ*S1l%qv59a+I`=YRR6U6~K%>A2`wyCu7{OuS+-zJ|uXL|t*$bR&4iPP4NbZuq(iX%@hF*~BY3d$+7^h2VlrOe&GA?i8%RbkFU3*%Dg@xARgd z9vMBg)6VBVxI@)>w2jK}7`#@Pg|Zxzn>n}csXCmhHE?;Vrp*3DQtV^3njv58N{L10 z{>W#GpYcjyacxAg!At<8p@&+?Y*mi^EJM&&3%V<4bM}IEzj2&uZbTRhhyK!AtgTd&NXqCR5MsT=Z>G%O&Eb1OUN#y+Z?8x$_g|A3PD>QiUju{N7th}4?Y^1rrRs;10ZLawjocKK zUqpDgJHzSJ@cimcD7lVEeEmD7_bxdA*pN)1evl!9w8UAKn`*{z8G7BfXX; z^u=eP81PWHw0x?PV-Y>$3dJa6+t8ehwHy$~d}XM`Vz4Wsn-d+TKYsZS4&h~Xyf$D! zZhx$00usVD3)=tqaF$2-Be9@;rB3MnEU`4c=W)vt`qN-FaP&5JCNsGSqz?_?Bdhn` zAc5P%5v|?gd-muD`9TXof|^9Fh;zH!H!cq>bAQW0^W-wQi=mt?gr>HUVO9147WMjeXw8>?t-huB)POULYlI& zY<-)sI=t{Bro!DS2gc@$EAx?8C_Xo17@PbKvfuKANGJB~$@;_M{U86dDIYIH@7Tpj zY-=pV*$o>e_lyiOhq|qQriX#>YTES2qlm7yr=P`Qw)ZMOFoC#iUZ<4vY~?-x2GVkc zyR99)dy*j)F8bTnGJxUR!eu&vPSHK(H&q+5a7(%z;bk3{Q8d1fh#`k-2u_;)^X@hV z=)><&UEPu3*j@_sY|1y`?B&qj+AC=n&JouF?$sRH7>xy2u)}9vm`!QsE6T)FJ|Kor z@43Dt@VdmmGpsBe+c1xl*tOw#K;`XT8)WEAy$90b>*h7SK%xWR65jf5D2!`2MW+v4 zUih}>iJdSupK%l`5mHFW?cW#8=#VIg=DBT? ze_Q)O^s$yG_jNz|;*b2!bV%tdbusdJ%I6%J&lk$oQfK4PKGn7rOA^dF)FbDD|5a0* zHDa>#8!wYWvs2VuCEKj*PZL+=eKgtlX3sShlV`*!81%hibm+NfZvsE6VC4ZpU9Yio zC1n9-bstXe)SqKPgRd^KC89Ea#FDPDnZ%B4R=T!^W^N15FKbI;z};;>)MXxnyo9&>u&ALg)p{oCJ z-RP_?EUna`0>fy(cCX7n+mk-@ewjqAe0%qPfP}x27vBjT{vt ziRP;Hc5!mmJEbN24-zk4no~A2tG*a>%3T(A1X)3sa)vhe7;#0U9+pn*Ug<3V!NmU{ z=*@DlcoM&{M}FhMtR#LS(W~>JQAk{y!!^y#y$k7!khG7ZDr%ATdI0FKiV$6yOTMm` zW!e6Imb`Tj*Xt*=53c~$QoEV<@uU8%O(Ud4--Ud_!{m2wW;PC5AD?Mi#R12IK0cmz zp^4J{UQI#vA7oc;85|;Njzn}UYI1I?uZX4(xFvk|?wsTW(1Z!C?U9^;(z&9gG6RMc zG6M{BVK|kDRo`m5Y!h|L5s^nfFghZoVPG297C~@H8yVK6c`|J<$s(#Id-^rt1jtcm z2>ObQt?bDat*12>IgOhwQ};i|$p1)Mo~-*!tx2X;DEBA5VrMpk#j7xVGT7X7ZLc;o zyGUhM08p&ITgn&SfxO<{oLt1C{`126D=9^aX43>ea8ZZkjY*;!Y0teu?G0TGi*AOsMc4=RLXHuLpL ze3r35>q%G?!PI^rZ;X@i-Fi$RkTNLRyVNL9Iqd5@del{r88VcW&fhi8$1N`; zN066w3P|N)b4~cq=09it?OkI4151Ly31~|Pb${uS zl&dr|pP)RoSES#3t1nJ5X(-SY&9tp_jdWT)JX<62$>fIT!Y$DG{9Z+`$Tr?MYL3qx zvm*<66(1Y5jFKjFjcO*NZS*Z?^yHSGhsQ?DsRk@(KN0eZyE}_X0;~{Rz~zqs&6sGd zaU>(&NAnTo2;GNu)Bs(K()eDKa&y;Jaj!iSmAN?vNj;pku)ODC^{9BIG}<<7g~5Hl z#I0iD)urXuIEUL6@?XU$^ahtRXK~e(kzR?x1XY{WF+N(UcI5_GQ($bAmY>~E)patj z{4XbYl1<;E+%LweXwSvbP9~##Q5vE^bK8>Sa#zRN7EPheL#Z2E~D#PMC3iif3 zrDo_qGk=X)Tv60>Es}E^=N@7*S!UxVDE954RrNx$t8Q58%U~`~^`vR(s~_luxP0fa zi>W0D)Ch$K5MxJZjPeX>fV&sx#f|n-$TtT> zPQ6CRY%lHrH@70q89D{wUJvczNr2jh?_)7)`Tc5fktc3 zjUlKza&oY5UERtDgCil9O@5kS=Na5`LW~QQZ;F^+ArOwyF&9oE`?l2$U+g_=Aj!tFqc!RJqx-kqu)2|f)1Adxh{xOiniu;YHuz9>99d(m6|@4Y zKCH;6DHSK&+V^NYb7QTCsIVa4Va&1Q&JMc!RYXz=x=>f7`BR!+v=)kfcS=Ci5p|)Z z*jf{9l_rL0hOd+K1^Tut;hl8 z)bL1j@7db+b{wkG;w#8-I=Ab9vb^*faiZ8UHY&Z-@>pzy;%AM0`*iIlNyIz<)xXU7 z3L-U%AMpGcj*7VfBtIY2`__lzyZu|PzIT<&u8Jm`&>$9GKhCm9h&Uj+=`xj^U)rXs zUiyVLdaPt}S584|(*_6J&T{Iwdt>&h#aizX_tx-V9$5~{whho|1NhIcu8%V$RU7uP z(Zmul&GH0&DLaoJtXc3+p8Vel?*ID4O0D+>3L>AkCsslf zUyM2`mAuVTI_rojES{5_d5@v96B+eZvA-`2DtnOCy2fVHYVEsg&ua3}{Vyk(WVx7R zll^M6c_TSyvAma{4&JdHR6dgbHph5zTg0eDrkWqQH6tO7pM4+)|11UIVa|;y0-d_2 zzLOw%zsGNsEv|*w73si80_HgI>y_WErIbpj3mfJ?kY}$bo1|yeZ{AA0@^-HeQ8vQ7 zi9qh*r4>=Xy^8*a~bjGGEJaij=fBrKoUj6WaMUSA-7-IUQU z)q(#yynp(uB0(h3QMtg`E>9k0;(jj`=MPBZ=Zo=h_YJ=bQJ`$M~uNW1H2*<6S~@J1K9UTI?YYaENgvqW?9Y?Dwg*ELEVsY#omDMWJdNPAPydtstg`O6pw7B|8StF zLXg*cncSVr4S5A3;QnTg`Qf!-T7#oa=9#oukLu^*l{3-Vbg7vgzVp|yf>ZMg9?YWY`^P{5<lIDwVpe=)w`7)vMkK2WR=uWfwmp>Mze@wzkT6ahzWe= zdaA1i#jM-$`kirtkjm9~cH?fQ&5pua?4O0JNY-0d+6!%)icLt{mb<>@Tu|$4dR=W& zBmGfH=Ss|>kMqIj-OUZf2N;D~mk^ z6Nf0GLwdE0S#W+8;C16mvtrkSQvf7^$rX{Z&W)Zee2cR%%5jYS9b-x zLF5tp89tmn=`eryos1LA;sQRzzsEjJY?TNkXC6cn7?jfzRnin-uBGj>*1cJ%oqg6< zii68J%EcpBK2}PHkt%Gl=*udE(fI68o}*=1d!p@ElQFi?m(8u$))5aJR)W2oo5K>f zWvS#7cT{-rt9xGJ#dw-l+$wcbE^HCuvle`1At-dqwas#!Wo@jBWev^Zj~1i{D*soj zj33=cuh6x7_^xP*0L6bSJ#-9V&$ZYE8e!9(@mOmhCr$kfblmyo4n>gCI3?w=IqKW<0# zI#4m?09_)CcvZ{DU>#vOM#^!^*!mufhLDe^IXvK<@ml}>OBeY^rycFfN4Ww7Ha|E4 zkQis*a<~Y5;l{6fu}v_(@sCgchkvN$r(`0QI%~oj7;P{fnU0zcjY4UNIBxCR%{dKu zWk0f1cHq76=V|;51^(et|HCE~tOb33FlhlH2J-euHHo5evxT)9 zWPTLiU7vdXTdDd4hC4=J0w(NLk5qp)x_n~VURnJE9PYK?s`jH6e4;x7+}CMtYcnK8 zoHLFBDAbuKcw!(F(u?;T;k-Lz=J=bp;(rMg{#n&1y(z(`Dt~52fAt<~{MB|NIG$Gt7uQfC?WpTZYGR zqC}fo5eSg@LR=xrmI~=EF7m7wUMaECM&wyVxwt}Jxlk-`i;Dkd0j6(Ym_+D)m3Irp zjIt~zD**xXbuONe`cHR?(Mu|VGa8%#lvXUw8}v!9K+MC3w*U-g**;eWxqo|vH-Y_B z=kwq3P|kh40Cq|f4GkV(vYm-cRlaRkfDGWXRhuwxoyhqGSBW>MPa&bpX=@N$TfJdb z;*o^cuV1HRG;7sza)yLt2A*#f>A$(Q&PsD=)=SUPcH%bwI(?Zmyxro^ygY|jZmx=b z7h4wI-b+co>&tc+pECHouB}xmWl)P0i?O^0U2bzxIj~ottg3Ok`X3jTemq_#h|K)L zDX@csYQ~|lh6am8s@$iea?$rkjxkf8&r+OhJpSOKNWs(j?m^dMdxz7;5@~}#2|6VC z8Juk`$H%bU_66@mTD859sXSG#Q(K3gK=tekyIxa5m(sfMO8GaLKGQQdKPKGfes z61dc;8D4$jf|u{W02m~6UX6i)>dfm|VeoS{Ha3&~*CCmNzvnHc5CbNgEI&+HZeW91 z{zEyr%=b|do@T3BAF)~}%_O!xVAW!gDdH0qm6IBiHgQ#Ub{iQ?M7Si@H{K5S)mYQ` zx`hRsD1`j%9|I15p2>fyvStg|z(B#yY(G2rB^z7gY@BXe6~DWW&sx2>viMm5jWG&) z>FdvFk-usJ8A3ehF)-ffw?Qs?dio2b0V$=WO2EKWf%ypbAFOt=Jjud>I$!o7MU-yQ zMgZ_G>*P5WpJZ1*68lr(Q&fwc>{|1y;A;@nx^9-GWTtls{x=8v?VeCj;t~m0qh_tUBet zBdpd8sMfT{_Im`<5&KyZ%L$%LP{rCn1o~zL^J?UPur<&tOI>?c!08@&M6EGrlPt>a`gH$ts$aZyvJYMAIOZpLx}bo8eDws4}D!6tY@c=1Ppaj43)u?Xd>qD4Bd zRsOg${a>8&>tA7YH1qRso9hEMO`3%IsR41G&aktYUGDX0U!DkqLSC8*u@~>RvQ(LuS zGC29#K6VRU{Oy`MG*%AQ5%TNHxdZ{snc0~Ji`sQ0etbu`i@GPdP5kYES%#r(#&^<4 zw+Fkg1@2G_djEC-chdHRn){V1uB)*&HC*K;uXK*obWTeQm8#?eCOA#w^NGQV=}UY+ zU|3V0GrTJ0qoSA?i`E&&Pso?Ozl9LyGR^j*p{9=JcTasW8CdIIhaLA6ehsL{?oT&P zBy*cyEg=HkJAA4fjlA;eJ`@^`G+&Ai4NFRz6SlnaJJTa$dWFQr)wONJgA7sJKcJ&z zCJQSAurgZw##%}(1dRN~V>r7BPkI8387PT@y*%M7fpvzc`ktN(kzrvMG6j!5H2>De zVB&Ydd!u?f$?-PN%nX}`jsZ&@r91yNy-}CTG|>GP@rnMQUNNv5-CKFu5k1=9&jv^` z+l5!LlLb2kf| zxT+{C=+W+XjVG z073N1Gh_jAP01=!0YK^|8@)9M>Rh9T0s%>&=zh~?eWGHWSFR1Lwx-Wlv4G1kjw-10g}aEj!s%A_%A?|UwJ6< z<1=y}l;*Ij2ytGU0eMxiAWMmmwua5&`OqKrrxTqI8Pj=!VU?RnyKL_?^z_EH36QI0 zei!-T{LiERl`sWb_;&Lsx0O8GZB!m+EiCZKD|xv2bhE*AfuoRrW7G{i`m~TP zzyw?%ol#bG4Je-g!*h~9PdJKC0v0pTP7Q!~;<7tg?uiNPzM;<539Mf=hmI>IVenNxuWW<1svaAleyUL8jUQ4y}xBb*D5^ zFuOb9Hbj4gy3UyVWLtb$9$-@Y7fX34rgDc~P^C#t%8+ekPx-?w%74jai88IC;|WGN zZEU$F`)h^fmx1f!vitzvqPpvKEqryh2DcNHaG*>rxesOmXnOWVRRq>w$qYDvv%y82G z{`uX95THx5>QO+E&ngPQ?#)iWOuZjOO9nb8yIMdDJ(1F+fMZy_JYhdpYH%7j_x_K+ z>9shGuVKiP58O{jkb9#BOdghlo6wO=<=J%-Cz3RP)AY>P)o2VP&UoJbBw4SWEjeWiV? z#Pz5gwkqi*Sk;f|%X1J({_T5H-aic+@f(+WeewLBg1Yt``uYeYYG{XLmN6t~=-fZN z+Qmef?fgYMkRdLZ+>cSxQ)_EQd_I9+cC{W3zWQ4);pGLO%a7FBB1Nz8k;x$3s{51A zDZuG@Py6`9SC0?G6W-2EXGla=6V|%+oGS0cBtiEC5jXwD8O|L2ruhUQsC+G9U2kjD zCBO+}B!7;XCahm3fos>IJ>U`}8s{Z=?lAu3u3rK^*qsZ+FKU*p=gWby4FC)L8{23y zaS=P5883A`GxmLU${1)eau;}eqCIvbB^aSBXBb$5aSog%7H;(pO@jefi)z4Nt1L96FWHe>sW968`FS*)|ADuYn_xQlE<5Du(k{xx(vS| z>zIA_kD+Ei_VC|Al2Q$1^80EO;+ap>Q#+RmU|{$)6vWVPT<685lUDoj-F+y~xsvG{ zeEaT+Bwo7#XgoKCQtUCS{F7SvQ&Ihk#+x>*C9GZ!vK#yV&QR0DDe38LFV9~y-(H>Q zsq%2HSV)VTu3GPG(}#)TQ*!dd8eUMeZGWGdNq~%oev*6B`a#w|Mg0ydk=+k0fFvo6 zPxoHoiKn5z8-fut2%J3#6$h@?Cp>$3{2ZHPQfMf18QRUT-K`;z{LPiezvzu0=Lq2S zRRJy2b8KInPCUrYQ^&j;_2^HIyH^&@t{TRX=+08$IQiuj?C_P_r!91{0)@v?VUwCm zlpQ1tKd)*{>58PMiz#@0wjhOYC`PS|hKoPSMPs-okc;Y{r#tr+=%Iqw-C> z^6QCkUIiE~V5;-siQ;=Z6>s8M%&`cpZkOFVGaf8`=-y5`!1qKp>YQ=xt;z_>pr(GldjN8F`q?mn*C@s zw@wxl(zqY4Q-^;eoNkA}Mr=TgRf$kxJ~2UC%B3GM{3IqD5L_hoA;&$b(Kp&(1|XFF(HV=lel%JUt*dNho!+BCSXk@9lP z@;?G(bp`s zZ#&1{$lKv%zBE|qG-N?Q*V?kj69&3wLb=~5j(v{=`AY847d7GxWX75twkuWRgr72= zY9U|j*9Ao+?N4*u%w8Hr7;@>f!#(YEbRxBK4YapbI`R^2Ah*n$DUYSnPK0eZqbreC z$fkK9*N|*~pV*J)_IE|m+EFNx`EsS|VRfu#hxk)2^9N+-=fyyrL=fWY&btqmJ1A>DN*ozEZyYYnnqfkR-@#uAxvkZvu*X zQrmG}>ylovTe3>L#I?DLsxwn?6@qEMXkx#l=*7$&;=G93C|~%nbhplpI7QTeDjO8Z z(9+z#k>y(>vP9L=+`NjkMrT#*ycI=ZlU4*Lx{Lux^cBea^}nlOx2XVCmqY>ywf)t< z_^Aq+C}55ugU1%p{njHN0CrX4`*VrI?WxhAT-QFmzEBcBh0OTfo4KTizDGFUwG}w_ zwT@%;=78Z$Af3bQO^*8W>RzxsV|Z&{rse_Wb~jV`$~d6T!(#+3clUZ?_EzBPhdW)q zgkL|2sY@SxwrUHX8?S9k26G1>8-iCVEKoa@iQKwY4>Y zj}LwwS^Rd&Ri>-=`x4kSpg4burnbX_J%lq8v5$sB;}tm7!NGP1^R>o92Hj+N>IGz( z0?dd{c9rU}WQ6Q$=wo)J-OeR26{lfd{D`~pSaf*eOD~ANPC#hJ>v4c5A_1l# zD4g8*><88&n7o}W__p@S^#?UR(4hh|e_#6@5+AJhDwbR zrN(AgHDulD9IxzZ@1~m5=-+Z?83w2)!377W8-IWI%cn2rkDZl2*rmLmq!f4^RMw~g z^?qR!!@Ap5p-U2v+`978CB2e94Tw`eeki+2Kz8Xqs?=V+aYB?3FCFpBr`kr&7SgIt z>HU4O(ImhV;wx6;vY=r-RAk{kh{X<eIAKSXc5+pdCxr-jdlGZ-d$`U616U#0DZ&(r3F(T zjsOE;Xvb94=zsA1eKQzXH}59Jq_EP2ph~VLaOKYc=U;qcr1C)qzh3syAI@3#1_v!Z zq^wwWHKTAo`E~q)>?%O;*3fH1va~`O4?g<67t%Ib{_MGFvoraI&wP2f#A##xp(rM& zIK>$5X3{KlHKX5tYZ=8sbna?FoR@uxO@E5}_eofxqa}6}5f&PH{&JpCMbUdM?N&On zr1Wz-fJ#<+&v|}u_S?t1pO_8B7Y=$)Arge`hgqk-z6%G}91OtAmQiI<-1$`kjva2s z_Q+gA4#6kwl;racW7TLBAQJCVl%sex${dU#{pITu&RzM1k`GLfS?7|x*Iwo571kH` zEA?9@K})UJ7BdvF-@m+~lEYSv@|ARN(up`+E%D5I3=l7zuV!BZ*bSRlK2C=FX|^Gt z7M;+-`IsIg<~a#fRl#QD4Z|wZ5zoY31rXKuw459s?6ARStMhuP;IV1}_to)Qy_PVx zYyQKqh#*L;u>GQOI(#XB#w5fGi$Kzow)<{$D||Zb0$Kd_(Z4-W5(Rt(cn=DZJo=Ny zxFUTXD{qO2V5G}AiNF;(g*9mNM)Nq{HTsO1`#ZAmG|ihx&Ynx>Y4u<$UdW?zoMbDkMY z1tkZVu5B+l$v8pGHO#qFl%s1e9hrh{Ts8M)c+u1VH3si)EQ}s}!@b?b*cyQ-0IGJZ z3wEKCbV5L%twM2PJvg&mgOX`rj^rl8T6b4huNf)Pg=JBi8#+b~6(5tn6H7faivr+S zFMQ5qm7(1Kvhvk3Jgu#AGp_0#)sFl#DsiPPojN9WGAjI|L&=Mouz0?j*eSsa__*uQ ze>3N8MZ8_{Icd;Uo{l(?usvV5ImKa241&&Y+;Zz+X7R+JauvV+wBVmi%y&toD%Y%^ zLDKVH-h3WS#hrLrx(gJ1QoX?Vh6G4rd%S3G%@W}3-eG`lJnVK%CDeaG!MMWbYzg>U z901RiYUSv4({JHT((byXKTMW?m6wD{scXs9OefUoK2@5Sq&BhMcaxkJW zuHIj3JE-ooI;Dr5(o~n6F*Y>)xL~DiNuE|29=rcN*!TVUYx3m{upPjuGhy(UKF`Nb zuSdD~yi2m?s~heiGV7xudD9CLU}B ziOUkJwP!xnp{nZlE2{(b`gg{T4$IIIY4o?OLL_#k!%a+upM1RX5@Jl->4V8Ft2$K> z*p|$W08KnA2|N_KnnOXyD#oVCW|VNWkuFW(mYWZOqMPDYYJgt-f^zZ9SJjClii|AZ zHV@a6j+z0tb3_idHx>mv0`)OkcyW*q zu@P3*5$wA^72QP+_s`C&R2p@kdM7i8=Is@h1_pT1FG?c2f-)2tE4R^D9}el=?qEC$ zssJRq%A*)3O67B-93y1i^TkUY4g9QPr460NKZ|B^kI0I!z^y>n(A_*(EkMn3qIu^p zdl2e4;U76=zuF-YXSmB_f zf{w76?0x5ETVu4@*E$vbDwotU>;<;h%5dQuc=>k^4qSU@?TcK!D(%J!HG3e->(!ES zIOAL!YSFo+$XQE`eWJQcH;kEy-ht1rGs!h=4N1S~_Zb)oD6+-*VD*HufQe;ZF5f%r z^70cRJLr@RcH)VURn+lES8s`}z-Jv7MD%HmX|MXs-;7JHfw zko)Nh;0}hl`6Ale?-to~5DpbszX4EPHG1}Qa!pFY`tpp84Q|<9bbJbl{42;{tjvLk zWEy8zjs?f+Bc335^s5cFR@zp5UUVbhcnS;686ymq$`s4@z9CV5a$~W!V>VKA0@E*d z=rbM#EG^UZWiKFBMmED^kWfC8QY9U_7eKbH$~Mm+)vqFvw&$zXVbWT^oqmZYqO9$n z-}vrstT?#P4qd1$(C(FN-f}@BS4nPtRj;HOS8UQY>`K>RJ6q5$oPU1rNL@Qii_}P$ zXx*Uag)?xFLO7m1@1CAkMP=utdQG3P;)#QyYAJ~d#}-n3xKHttw1akPkq1aC+a@o! z@cj+V;k;N|c&A;B`w;2+`t~yjAGag6gpKntY#suuS5&cd`9%FFqds zG#y?UoaL+on-iGCZ*HYQvm$T$F;+a#a8U25+&sZH6y zJj1v@PUHkAvDRAH8H7@&sKV=n-qu#w@doS+th;mZuRbBHtc3uu`5O= zA1_Zl?es%(#bK@{KykTJk^9+FJCcW=JEsjTZR#cMb!gscqyfDL{Matu&f@-W>uAUr zJp{2@F^3#v;N;>*I8NN@{;)&D=xwU|#!d)7$IOvj%XWSUdYQ@Q40*o|HD-hz3Nmr- z5F#PS6Ee@yMuu9GiSGNl+Th{%;WTTjU|J!Rc$gIqPQX=}RXtZ)7&Z>$L8wAWb=`Qa^9t;>$IKBJ+_sQ0(~QO0!1->tmo+IX>=A(2EHRY%TsMYu1B8zq9jWZJ@h7g^tHRMFdgJ!-7_QgjYQv>)(f zwgVy^nLKM*pjH+K&A-Jwnc3@?q|m7R{ymD&cp-NxZ>&<=d7)>=4m@(uH45Fc7)FjY zr>S;RoE>tFi+kXRBoiBS_~4379z8>zpdNP+;#U&-_nhZ!%ZjG5S6m7e=e{ql7o;nVxm7>AI z`c-zR?c1IvTjd@Hqpp~eLov6n2qedDV#!|7GA-5nq>Y*O@_ZU7)_C~9I}^;X@lwS4 zzE2+qD4OW9-&oO9J~%Jk1RXnTxqQj@B~!V2-Pxa6Oc^PHdS*wsc>TBzix1$BSCcEB z=zZLNd|dS~2{}${IA&AX361P-PX&AV4{(>uyQ!ay4%4B-u#L%fe@(pNk_EfR?9g*4zCs3)q#TKgim)0X@^zR)q~G=EVvo8YzK+2 z6u?)9K}Wkt6Kz}Z%jYyP?w^3ET@GKf-Et?C(7_2K$QXL%l>XO46p|Ycrn4T-F;qN? zuPZ1tzQM(BAH~*VBe!ndo5pr^u>V_uPGV5(t-joeHFI{TvDM{LgAyB9#~M(*ql5+K zln7L+5dcjsuh)V6)jN_6pWTJ@(2sVx5Asf1Kzc}lsg9PqH~OEF%iEq38&bq2rx{2} zN(7wgU-WIH*8QS|SB6@c%RNvz3(m&e3n)t-J#zE)C;{B`&FY6tRe)eNV5SF<$OgbN zKf1F6SmjMA+%hnq;b?z-H`zQz9R=m(a;t53r2Q|^Dr{37A;Tif)>ubxZY>VdU zcVC=qD+p%)66mNp-oq+(2L6V|$@Ok3Ez`g=H;0S6*F^fY{aa_Y<6&MAA zI^8@Ij_{iJ83q>Qj+?!qVw3Y@#*0qa$9dw8mu%*ZK{$i!BMBGC{lKG@#$Tbk7E2hm z$xUR+Q|N+2{m4p6lgClRcc&$IvS`B-!}Y~?Lzaz7)YFyOlhXVl#r{hbLbf7LbQ8}Z zDC#j42kzMO#+FOzn(ZQ8=F%bK4pfOqu=5pnd(d?-7Y3vC4M;3NyDH}-eJ9px@A9t{ zt$O1e5>gbC2T2bNWW5KQs3rqf8ukjIKqBAo??gV6G~NW|_6*d^9Zo0yf7*M`u%@=I zZB%IjilB%nNU^~dL_h_k2JEPSNC~|Nh!A=SJrPi_Q52~PC`hj&p(S*s7o`OVNk9~Y z5E6vYLg1V1^6b5zjr;x1kMrZ4>-85`%*vW;t}(|P;~w|8r^|EfUbdwJU)IA#ea!^+ z|G5qRQuWsEu$14V>`n`V$-WpKPV>?G?e8##0Al#ZoQ5RRE6LqH%$aWfHRrhkg8fymt~;ugM~7ocLe+J6#_#(1GN&-L+lF&*}S zRkbN>snRO7q0ndNdhdU>N4_6CWRI%F01WafX}{K5P1lE=JfxbWglpgUIs&X>bM|BU z^d-t-*4Hqd`Z8sZEFbc)! z-m?my?w#Z`b@YaMSfI=rFLLI&u=!hShLQ!c4Jwvjb6PpyqRh$Ifq;3_SE#F%o4`3q zQgDfL-G4GZD6=ju;OgfxZ+ZbD}*85L{DU<1(HqrkCDFn+kUws)J}_Elc%wY7Du`s*c_a1*Ao<*gUVn{IaLR|k6FE>g(z{TB?;^Lma0O-f zGOg)Be)@QpP8qXtBCf z_iTqQJ<9*&xTrKnyt>a37eOm!Il2m`L{&S%T>+smTfmR8 zQghILtWK0ma>kvA+~=|0eR-T4u{kv^A~o5tT#Kd3uSZ-Njp0!(oV<(JDU$I{hG(vI zne$8_ue#)XO<#Zj0{PWo&j@aAZsXW|3JT|pFVky!N^gK?+7(9lIFc^@hsDCmHFaDSHcsKguq3r2|*&kk?^Zd{%4}H1`|W z{>j!Dy~wh#QM9r0#R{p%6^zz!YZz728aWs;>h55)CB9jgc$8#I=wgl#GCJ{K41f0S=39bFE86Hg7ib@nYj*FbWpCI}5h z1Y{ep^1+So49mL97@e9J%X(Z95F6;2^sluDS@qZBgbOyZel#z2=()U0ck3@ZV_o zjSoAWt^m(8dojYBK2m5PV)6;Q3UH?H2A=(+bJ2${%vjr)$W<;80y01iO6+P`$Ifj4 zI8wQzN>}sXUjq^H3#>&zr_hgvchduf=Gvm#k_Ipa8?CSbYA+AjBJoqxc|~!XxW)y|ZW8@`QD^c3?RDIk zCNoW-1TM~aHASdKD}PPi*jVe{@U>Y@^jyOG>>$_!&}|`75xKrr2X|C+C1C0j;Assb z@Z(&LFdjw~sc>G{*Qmnn4F_C)DFioH7QxSg4c9xnP|)jKju+dkGQI_M9)#RzN5>6{ zYBnXE`Zo2<80TDN@)E$;tHZe#P_Om+fQ9!_YBfDOh{J1EVG8~|6veV;5xXamQr{g_`` z*eKQ>-yqv0P4#&syPEq#g z9ac8uP?|P^i|CRc8^G)y8>XwNp8 zIaMy}&(w+>dR`RSp*Ye>Zzp{wrJz?_(hg5Pcv1psJp0MNxs$=5ovm1#6OX~Xeiv5O z7{dmq2{>If$@@S&OZ`E04CW+O-MladO`@B2ntArUfby7?CziVOP?gLC!}%g zC^9bp3<;N9g)fDCUay-=jB2{E&^YSy0B*ik-=br;F-Hwj%aWq6;qNRBE7n|Hl4%*`mGxEb-F`U2V?tCGLAo z7jU+)gjaR3U?mNIaYb9uCzM6nKj$AMtQi-W`f5zoab|iR`8C3seit4n)3k9p`1Z#r z=tF7aBW@;FI?Au_@+00~kAggPPw{hm9pQJcPP^SOVaMIf&UfY%RsTlsARCK-RjWpE z-kKNqt_!DuQmg!(Kl1@?bm&(GQ@y{z=5HDr?S_2)G z>ejjBf-Vpo0X#Bc*|N#7W^V^;12%{O2<;yrz-}#Dug{g@`1-U;Cjc(F5z{4#o)cG_ zBZZjrKJNT@nN^|jjV4}KKS618dl#x*9ELGZ+9~sj-H0Ys+k|R?z-YeV9b6UXxJ^!C zm$0|C?qrIkjcSx{@YE)lU=d4xe*F4yFHPbK+8TxoPV}3~h`%kN9|aJ;)x`*|XXKGe zy z_tWbHw^F>?w7*y4`k{9QtK_OVrhXGUGs|*;wMYdJ{{W63{nJu7f?2&i?j7Gf?@G1r zZ-|Egj<9@aedeK3nPDP!$rA{0BR7I7=b8i?(Q&diO)cNzCs}mC(Ujpxw(ngO%)GWA zgNC(t5)Rh11gnt^v=ig!fv6XCUz^k^~OxHq;T*A zoqa1esBf6Hxi(gH}Fj)?c6{h{pZ6SVX*cyg+~6Dr4l1+F5H0 z!R)?IlzZ#dN1)iH0aHrkj|;D`3lynX$;=f})%M%o6|=b(7MI+S55~`oxqsU zkFF;9d$wGyt@6}2a1W>x$~>~%EVipq@7ya2TYX-_H9T|){lNv^;JiGtkf4xs(_^Y9 ze+&; zY%BpmE(Qw%6Wb#ZK>>r8z99BbS@kKL7>vbtn+ws^cGNo$HVywtmU_hAmyDAyCfObltTAu)aK}IeBa>58J3qKUF}WLQpjmX-+!Z+0AtOb1o}f?K$tzQC%CqYMzF} zb++9{ix}cnYx=D95XOSuRKKJW3;*tO)0MEhW})S?Z?|{4eVYVT2Kmg~S&-b0KM24( zqte{McMFZYNwCT|^_0`RdFP(yysIwU9&~bfJ0Jkrp)K;P^$MOkS)Q3~IH~vfo%?u` zEZ+xI$zr>7?M$J7->i7!xoH#Ws_#d-kGnB7E9=H)gG%-WbbYxQ(v&YO?OSD4&s5}C zfftqFDdKzG*dIw=T^%yaGn5)CbABoGJY3lS>N==0EHiA2Bpn&P2Jg~Cdd1U^RHUqN zDU6I7L@h~w=(u~+h`ABI3JQOiZdn{wzFhqs0NK_eg_Pg=(*}_hBURpcck6J;X-xf9 zD&^QTyqY5pP&(o0kzr60yk3#y*Vib}Doh92jD;Hl)B}SXVGn!@_PJ(V3>{tdtN@gv zU~K;E1u}pfb{~X22;>St$Iy8`<`vaQKvr6HMm`ek37}3`-5pyO())ItZExdt0|ouO zv2IhB@AkRb{7Z!=!SRr98pG{c0@4G6S80x#MJfEN4C+%udts&9uOK6vIWqzEo*1Rz zIf%JKKrGoH-G9q(mxHHuGjGko_1+m}rQo|_E=PC>M|%_*JNXSBC3t8(x zWhn3E0moVjLS~q_01}RC?Gq88^i$CvNt^5=mzy2Er1i38VUeExt=LcIQkH)c%BRJ9 zBY4+})w1|{;TMDKMJRjc_wMM*`FK{Qo^rew%J20XBP%V1LGyt?<%eNUJztK{bilDY z204%evV@{9_zG*)Dfcun9 z@zI$x%g($48?)I89^r)fv`f`12jA~cxHd^#g_SuD_35{&;VfsRuim~Bk#h+0SwFg5 z4_#L9cD&Ml$mPaMD9=big3!0wS-07cj31o}PpaQ!FL`LN0HS$H^0^^>Z%g!NWuh-1 zl}yW^fTZh+7PZR7$|CQ<#!~XDaAgm={5Jd`W29eA!_C0zKdsDr`Ipt!RRdmVp9Vmh zsNOL?4M=r=bMS-G{QxsH+&c?&MTz%>FGQ& zT!d#$rXnp${oeyCjnhgx`z%%xI&82e|9So?CoAmZxBfin4hdtrvYHwoZ3!A<&MCXu z#$PgMD{C+9r~W8HGGIPem3(GTS|XEqlX@OJNo4*^BEA1z0t zE*6~9Em8=Acu@#=OG`^r+HGjj28r_C{=lvqgArpWzTt*`C&7kG9Zvhqno;BgcdL*P zdkgY|T93!KfEsrk_RaAj8MtrwsxyD+VLG zk-NpfiIUi7YM4=NP(A?&iSoXLR8VKW%NpQgQstFnS#Ji3T6llTQQ(~0Yv>zGG3GKe+LX>}+t%UiR(GZHkxtoBe852y--m3sl4#q zsaSc4Mzap_CMmQwY2j>1WnQf^M1qxN_~J#!{#>#6rJ6x_wR|EyTd-oO#5z>TLv!iW z`uY#BlMSq<)KD0Tm8VuH5Nd`)!_EuIyQaAB)?`Q-Sr!Tn4`?a&W*%p zcHx+W?LiCiT-VSqy6q!DVgl!*hPzu2JmH*h%pkq*R#V2}VX@zpywe)xTf<&Zsz|3k zOMIH$$H!t>=9F0K*xzQ}xwAVDl?>95^_59nA-3s^_JT?b*B)Hx71mR7qInuazP;JH z>9-}`Tw#soRrLvYbul8Q+K?+SJP$d!pCu}H@X+oM%K$PeO=Dv1hJb(w1c9l2Vq0`w zER~MhUtN#b8j7 z(yNIR+TtEgOP;vd>mtR=faK(i^~BP7QyL(a6P+)`CtrDL`=Z{`0oQ46wiZu)ELj*0 zO8J1zgG;-#^Yded0>L-^#R)yJJAS!b6`L!`tlN9FgSEugK^ygjR5&XIV7NzuDZ6&m zH^x4vn-5%Y)GvBY%HXEo4Vl5m2&J4dEz2J{H(Fwb#XiUz$d8fE>}d0>&gM&c8SM;e zGs{n&bMTGVYq{(p!Lj(=tA4ZG5rn>-ii@h2^@C zTAAAbFr)Vf^Ir{JMKR=vf{Twt;I7sZ&n#^l=EgMh5Yi?kxAvbhJ{*4fOyd)q)vVL( zHK0h|kyvhoLEeL`Z#&Uh1M!w1nGK6rR?c`O?dvT?hrHe495^hE6( zud4^d#~bhy=iVOplO_0YZ;KB5$ZV8MMQjT=uYS@Q>^?KZ^DZTx%}~;?qcGc{TMu{qdfbQmL-=q0&i)Z& zQ^h&zCTSNrkmo*omJ1$pd^Lk|tTPNH&b}au`u3qJ={mM*W*=3CNIlW+V0&T?mjK=& zMLkt5=xeV|D4#LPqg3fqb(hD^HzMcU>hfNTAZl4poz>2g0pZpBj4P7*{O`-DQP>+;~YUq@nV; zO*et{%0lSiPQ3bUeI7Gp`3fZ<{oCDvJrsTC2SnY>y-RQ)lbq$oqr!7vzCAtU10k#n zc{$VV9!9x>s!3>fe_{wwVenstMZ;T>B=a^^0lN z$9BWEr$v)~^Cq;F$d{`D^Q~)>%zCQ+wK+!$VoY^a4(%;}6XZjrFA7CRX`vR~wwdLH zL-R~NujPCxaLIcN6Q5qMtREgINV*>1RS6l4Ygv6iaq2ni&i8qV^hu|X1wP44&Dd*v zn{ei7pY>ZCBRZFPM!2>^b8cxC&c@I47z4|lOVx>u5G-HKO6s^lv@p-mCJO{>?LO2J z?QmwYb(PC>4I7C*AMS~}N-vYbzp0oPxK?IRiErzllW&n?PjQ#gBM2&q5i%RMN?b)#arY>-aUTF{tMs7P^+ogvO-8e=w~8L zB_xkr`@E<&WS3Q?iVqpx0C~dK;2py3GF%5wUv#OO5_3Ii=58W6>UexE`-Ftsq1}el z@Yb-0-Y*&SrRydpNnLE2AD{4FCk+%Z+ql>d+lL6Ny@?znbaP9%=r^W?2Xa#E$mTtH*9 zW|-f@^I2#0+yh_dsH9-`scCmV>5jjZacu&XSZ>z%(XRc)B33m~!wo+*s3R_zDnwX! z_O(zJiAcO#<7^eLP{loUJ=^Tls%v3(HTW`o9_&VrCtY-$t?|?Is zIWlWuxgzMp)FipQRz>@A&il_5oC#v%Vu4ex0TRHVNH@9j53jt(osBOmNYZS5uE>42 zGIksobRZRXlp!(VO&WNi7H{^|(gzsz39)7L7vFwoRic_IFY@~Z$*=st4?*ITJ(sC_ z0L)hjSUT4tlZe+xPUJFGqer^B6Hw~=rB?>v9}No^cbq>1DEc&?@AVsXtp`9`|)T00D?XX6T#ZD*ES;m^z* zEp`Hi{vv%WkxByR?-@N*`?J{oqONsUFpj~?R=)Q+qf=I5<$-e}Ul#KY zRRZvkChvxlOG`Y9Eb2L5%>IZ{TfJJE_FnmtL7Mia&1+|gF5lbEyxI4&*jK#>;)l2= z(lzH~;!G#=-*^2u-w98)rC3X_)?@RH0qe{jI)lQd*H5;_h__24SFZal4%w57u`a`< znvU2=F2Bu)Js#vMYQ=@{LiDWCCaPb(1G*p*-4<3zsbGN8!Vj@AV) zo?R;gz#8Gg;i~+n9C6bNfP+Bio*m@VG2~fmhOEC^qZ2pE(6nLY*@_En+`=h~vZ`6}U?%M}zqG0G}r& zQZoC&dU2W5Out;8suRrRG4*OjbUDfUrEOYH+l+99HHK=5yNa||B>mYO9QXh)$9|^1hM*h%u2rj?(L}_H95eO3wi#(lU zo;FEdt6Go0R#`HXsyKV;RfnkW;5)zfmwRX-yVu7+1mA61d}AW;eT(Hr*~>zC?wCvA zXh!VwSBb7nWAAAPn#Y-rUyFNT>WVqtU0p$enXjzEq+@=3ONTUj`MH3tewJ?y^9Rx&$w+6ye z-*|rE&kmWi%@fE#1^wZ=;=vfMPz6`Q)!B`qWO1ALtZ!=IpnDU7ytcrWDf8Dq>D!qs z^_PH2R}eJ0D5UE5F@~oJC|Ep(v54D*l5o~V<{+~xbs9P?b4U46Sk7kn%_w*1ykJ zD7p+_-s1w(5Mw<6N!v*H0akN!o^X-D8!}+W@;@>}aqApBc<>gWeN&4u)P|iF>;N~8 z4H?y)Oyg$50E9MJV#S|}^&m@>f4AxywbA06Gb#Qqk6B&;$==mh$tqX7c_%y^tCd+) zKfei6Eoi);E`W|RlN7&+M5^1h1rUw6Xi&+@J$8MAn(K)RHJj&%uBE8CZ6*P^I5*gT&+8qYm=^)cnnvb74SsxC6#{3T#2P&Okp?X$jv!3 z=qnZPxO_8I7w&T7Ksndr-TP9fMVCLPU#Z4E@wq(#;eUlJEOp!yDARmOIZ=b^)<1LI z-|JZz%~QgBl8=R07L@gL{p-#E$LG*xtfg%0!Gc|CK}`zlhRlUiNBMf%=#1>d0vx<8 z!Ci4O4tjhJ6EZ3ixo!r}qAcz<$iK)@Nee?*T*74AOv=T9lb8}c_qR4&QY1Qhc7iq# zk8hlQ=9fEuU{Q{%b9FTcH47huq6F`md-TdYo4+@Yt6MM$uvxyl-Kn`V7&q>4V9l1b zovfx)4myRW;LYbOw`4AAtJj4k&Sd`9m#JN>CC2WO=L`Ep{k3UgA8495@73*+b)6ja z*y8=@6`f|$Wl&8NS@|4=*jJaR8c?zx@CM*kn%xX)Slt#POtTYv^&78V!s|L*W29Tck^u`0!0Cs2}wGdSI!?0gtcFw~v_({vex*LRi27u2c90RlCyZ%A|2u zC=d{&E;0A`X@SHMf=(ky3WQ*Hs5j_ou-2o}%98GjbJ1bniQ10<)ydPccE}1;W#D>6 z{nKAe>@cr0vvaAdTr8j|>zC{c6R=trQCYBtg>4(&I+P$76A9xFLPx7Y1W)!Ox@XPb z>W03GE{GhO!ds`dXv~W~NOqCui@F|r(g9srHa5O6?f0gy2r7wnPTRl#B9GYxB5c5A zVK)@yTGHpx){Xm%MC|r^Tg){X{!TiQ8nBvO5yG`Wdnf%FoP3fk_txJX?vouCZyzX7 z#(0jo2+`4@HQl*h&fBpn$bpgFRr=7mCC?>@QjhLWv6WiwMUEjG;Y*siOJQ)4e~K`d!6Eysj(e>gU@YlTt$lIPzmZAo!w+Z?>Kfc%Z!+Fe%DgCv9H!0;{tCp)c7d zU@z~Pzjw`v)eC%Me64J*Oix7wZHCLwPbY+KUtF>ba)nWeh@%hCg;kLUXq8AIU4wV{ zZVr6erTM0bu~1Ju{+LQCSS?RO6Mqms$pUC%v1$(bIKf@y)~An9^Ur#@1_g)SXRdb$ z(Wmwe-Dy~bA7OVn{?}ITUsN{xNA0_x{prg8tO)X)vtyFKYR!H3cSbJ`K$KMkULBP& z=Mm6dD-8~B**_AnCaghyt(A7z4&4uc@{G$70%atWjw%w+j6|_gwt)+3*co%; zvy6Ai;oAWi-?7h|fT9N|spIAx}{XvIczJL)@@+vkj(gFd=m zJmqVvcxq|*oC!JnLOc8cxXTkmakzMpbmexN>To_uLoevu1O;>f_< zM~F_Un)#`U(U3ZayH|~w85!4(^H=Sh`7m`7WWEdaVVF#S1|tAYB5BLXc(wHRTEk&jJk{?cP|BPeVqgHfes}Nqxy_kp+}i zB!grrYyBZ~2X`AE)NIVW7R{ZEwad63zZgCa7E!L-xv$v}y|WdRGoMy~A9}ZUTSL9>l(Yx&EV&AUtf?0C_;l9jf$5IGN6E*cy?-FYW%=rzspllj$OQ?b$cFw; z*Atgo-gMk>CCw|9$H*5XeVlQaUQjGq!^sVeH?=B<|F8q~c(=8wNxjZVM|g%iT6EgX zJai2>3EEksyb~g+pq59%AH`weu{i=o_296$_|1}hZ(V@&JkNPn@fO6Wdf3xLXIhtf zXeKBpG5%NlYip3~q9yLjq}#4lA7-+wzgvfWybf)9nyAR#I2H8Wvz7JP_E9^@fdp5> zA=vqO#pcg-i^><8lSUW!C^qXiK$d0<7u^ubu-V(j#hc6o?6a^i-o(rxRm-?jrej69 z+b8piz8^l`8OjVFzAe1#z|s(Ov&%yH7)YcT$XDe=k)y^}cGF2YiE7(M*J}!P$_=W) zy3RY;7iL_$Xh#O&5lInEL6WiAx*zF>I3dv<$0VIe6zPb?Jz}8wz7U-K&tjf_w>j;27AmTR~=Eq7c)U2>sou96m#Bw{i{Q&S2xxf z-^Qanb8l9)&jeZoxBZ6RsMZibyBMww91KH_CT_OL>?=56k$K~wqSbi?lR4!>hW)XC z{SFEQ3{-7$vL;xCt66#gHO-3}`ctf8Tq}+;P$ES@qW_cI%8zf%q+`7~mbHi*tPVZ_ z1xYm^p7Uu*O+8~D`8}XN`Rq+qyS9B0t3tjh+F@3%ktm`!vJn*B9=-Ay?IAWge*dFW1N(#h-x3lH>u+w@#K2dzS-sK zT1_*wnt2m0{JeJUn{@Cy{i$Psl;fG=d_HGBD*P5%?jmsTLnEx$JIi;mKwhFuykoC9 zSj3J?iMg#Ma~KZi_&$2>(OcKv$&k7ezSnV5%1q32;uL3oUUQCaLCKPdqSyCgC)~4B zI~MK3rEw_d<3io4lEpQGEULyJwXZ`G1&?3ZJ$c{WJKw3&!(-e#$Xni0&-MX)-q@EE z0o8o8_Z{V#98A9D!wE$QX~FZoKTSd`+vHLiW$*-GgZ@C)dh6uB4juK+cQaV-nV5uE zSION_Ur`<^wF|dR>RzP2xXLqvYcjLXxW@N3grnO2kjht8-_do8>@l*&gg3M=pFhO{ z39Ep-)tL6|cG)u@UH{!Z=61upz%kIx@_424#c_dkJjO0dByHBmm^;UM#G8|NA&9## zU)R_Xn}7J@WBynaNxp5rCj;KYSR~;Z;hxTNrTMWR@UVV24^%E+HMY5Hq54_jCXx|N zw+dBt)0`JI6w<4%)=%sqT-dGLyR^&=*O%)rroYS`96P}WC=G`#m{-b%F)8m29~J4L zlU_(e2i6Xq6bo1)$9BFf4@j8?bHliefd5u+saBYL9#y;8(*X`oCooF7*JPDjj1hs6 zqQ1L0fP}+;*mbWwHv89Ky<*-{zr<}C=*~P4#{b%fy%2Pm{nTDQmZ*Wp!qpQ=u&<5W zp1F3@Caiq7;N7D@7h_Ms-9xUA`Cr4qPQO_uKsP4)MrCb?cSSB1MMamL`dDy;92xP~LzAy#8@QHl1P`^a+&2r)6Q6Sr@$- z+nL!~U`5sYjVxtH^kp{vw%-gp#_c2HVWI1 zRJiwK>4@O8a^yFeJ!uGf^=Cx3fwY(fk6FA{s-k6Fg2afQmcTF7(?4c2J?0;&w#?CH z9+cdj`+?a#E)~V;&#%v;GQHZZIGNwl*3FTOd4H>d>D{RwtEE^mFu4MXchcS}^9`mC zz;4z?<`XOAu+!hf--DWe<3s`OTg;qZns&2@q#@=$|8qjA!%KxYu=g~6)!pua48N0k z->-OT8WI1H%{yI;YX_SC>C7tT0K|P9 zp!@}Z;H5J7>@l!#Vl{oSJzGai+)LVvVG(ck3ys7kl7Mr~?L?|9KBBgt1oxJyn-t625^Ba_O6SP9T8-aq z=CUQFx$t{G%`qaocmVWq<@WTA`OY1tBi61KlChS3-wx$KvobLXfv;>6TzfuE>Fcv# z$G?*tf0R}R=XNL+7PW`bV49%Z)z;K$`fXmb*LyG!c1f-h<3kNTwxqU5?(g4u6834e zctt*}&ep<*M(Th5M0&=y=3ClDt7o{+qw^Pe1L1k+h~7BwS&8+vBlC5Q7}oZ2wnwz> z_fG*flti!Gn)6IY$VXz*HF~>EcW?e{$}<2FTrU#l|DgZ63^zG&$M(<**;Ir>u5NS9 z#zM*xrsDkzNz=;&5%~dJgrQpP`ZU4aC#Ptt=uXET>0^6z@g|ZE<&K|?l#+G`r%Cjp zHK>c^?$eZ1L)m7N|1>aFuA^Nd7{;RnQg*oVNOBb=1{AwiBY1o5yJGnA$fc(SjA=^Nq+;I{lsXZvj(9#^O5$|Y`J1{Jr#1+h z>W50~w+xzYT^2oLRJN)2AiM#m%?#ufQu1r07=+af?vS z&JiDC(2vc2LLL__kevX*Hx=Jcd1eKZFL5tgNT&&l#>U3CfZ#yOG*;j=5y&o$CN2W|i90jN0b&sg^Z967MkdxGQ>-22 zd};mAYAX~dLW7N^(mZMoTf5$mKkI=2uz`g)rRBk$n-Egz?V!F=K3)1gt6c=C1vWhf!o^jh`$SicRDh9}B*v9*b9zQc*Y;RKFc$^}a z&_Xh^EHc%yo5Kez`ca-JiETriF0||>JzQDIEL&AOX_;BtC#SKz_u~^%8gEkMp;khw zVUOCK#hW!|#yQp!J@ImMHM}@ugBR5UQw}_Lz3s(`si3g$_J(OOh=7JjA#?}K4{$gx z=s9fMDArt9HU5j1eQ>YVYY=;_zCF$RpCZBEfc||Y_c$^Sd2S@UZjxo#oDU_4@Saw_ zbN+Vo=`&}d>}+ih;cAf^vU*x8gMik7fjkIYJ>l-Fmq<4P`ch0t?SV&Hug=L9TLv5w z(s|H&UtI2}X9Mzv94?{#JFvix^;2{Ed#t%*Kf-hD4e!K%k zluIu5-Szhuq7ogy61tbOg+LDid2ka%vspDk;qIH#P9OH5s9hn4%5LF-g8TLqZ$zeV zWTtPFr6YIbYz(9$Ei;i(_8USi>*#2@>m{XBto}u)3^Mq`PZXIS{KB&iQvxy&^ZW<` z^wS?~O_bq+&G~J?dg8^dM71J`#bFez`dSSD7vcce@`yw8dZouVZTzEGcdqnw7Xk&Y zM}`K361yikuT9SjoV_awK#TY)Z&GGQ|J`q7yY@IgXALq^cVe*MT zJ+?Ler12541E&C`Y<~fhMV5AQh?-N)l4}E@1T;{~1|y4-oB>9DDbJ5zw%<=@;;mY5 zmX1QdA$NemAgxL%*!xnKb? zX*IW;Ts407-4$)x#ON>U=Z|v1at-G`uS-B3ors0*$42~Z z3H=tnN8j2MAk=C5jr8+BNRD0OB-QDfJX*+E}t6| z{URU#;fMa=!8iYdhv%ICg7Q$6a~6l+lee+r?80?#Lbtl2F7enV5|IS7X+b|Ro!d64 zZM6BV399W@$whax;?@Gmf8cQHhdB?aJwFbx%hR!SPp+ z{9md^KmGC0f1Aa9*_#5-p@TKJ%dGX>LP{sYcZL`9 z_y$lq)a1eNP*}saJdWeF`li^j@P|eG05HZIleF&7>BQFHrxyr!P%wj-BbOczn*Z6K zwDn*^?yZ}4O-LzYj>b!xUIR_|)2#JR_DfAOOe5C=B9pPPGqse#ffNrQ1@CP7;n4R4 z@=tzIeT@D7Mp74$^?^ro_bXV?kI8-o;Z# z?AmRi%-GR#E+puqR+u4ok%q$QW0FVMh4;VtQ$O+#GXT$}ewgEaBXRL8;M0gGd0mXO z4YEH!nxS<-rPJl||F9(4nWoT-ns@GGzWBvLAI$d{lvziNJq5~cCjCUz_wb0>?kx|A zQh%Wiqw!E{jC{h4~_vT1R4O|FU4t1rPTNul%wpfAZspp}C5TqAVH%P8Cl9UdUP{;B z6eN||`T(!P?1Q7_n;Pre-IR+o+>#;w3)xViHR{+=k7J~SeNu+{j{Vt=J{tc%>p%m( z%n5gbD8$gdwom#9U?Ec_keh74o&KBB^fDDG7U!0QX0MntX;6RRYx=XU_LskY0isrG zW#IrXLTz^w>h%+3!7hFpOBXw&ZQw?_@~`)Qb#1#T*T_DGe1F}OM{IQfYBLpFFAH{4 zRS;Y*D^MkZw!BRpoxd0S{=7A(?q-kB39!J!LPzCy6>3mV>X{#D*JtIQkAEVU2bDwS z+^zA9{`Z4dUe~ckCLyzis)Di}u4E;B$rSK)pQoG^*j9enO>lkSUU@P$resr@o|kKh zTmf2^eB=`To8wTiRBEI7`mGt>XwXCpPdyhEa>d?*QHp9b^F`0@* zS`?H|<+^03J>|`{o|852UeGjN3T-tro}YVMbCjRdTSvM` z<@AK&cGSb%JCDyK%PW(bbCwE3eWh>T*c|lV2QvDUi34H3^I*BN2#f25QAMG(Sr-+c zZxdL#+#5wKs`ctnMg9GXzx~FB`RF=AZf&jIl@#iq5>eY>X0B=)UL(0|zMXo%9z0hF zA?T%0M#@~7Ay44Tr%L~Q`+K0#jr|xkxxA~wIgc1#f2IbFHbo8@jd$jZc85tf@zx{P z#^`fTnmbR7RhGy7WBC6W%;>q`RjG-1OX9ALr{W>pHtZN}W}R(o)6zq+@TxPX=U-y9fsHcbFcsY zt!VtKwzOwjg421^1tk@1AJYzz$J3~+{MRQ=Ts;e$A4XuTMljc z=BAlqRv2VAPtW%$zDD>6joG=g)TP7JQYaJ}m36PYp0Yst!qPauJBW%FvB&F6va4mJ zUH<1Rjdn0OFQdzzyWh}ZjYV}{1 z4<}eYQno7nVr%A}w0L(^j5W1iiAxP}uo5ih+bVo(q6%r?{0O3%|DHcB=f9>) z%_g{i*|ZH^yDqRENXHfn=r9*y9e3S}Izgqmo&YZbzn`hd%BMiHF2Gc9y2Bom2e_KY zZBzcaChHEe>|J&tDxt~>5RBt%_$c591`4rQ@Y=?w_H~~UAx^B!PuhsC`7i%yCpAkS z7ZLZ~!VZ4-m#Nw6jIbX|#B!T$Yw(K7O25Fi{zgW^MvVy4z|SbWig!kv9W%@sQvQZA ztrJ1Y@&^#{9ZT zB+}UGs^jvPX~zmo)uB_((x*uus5#PNo)JHHKgG7eovz2Y8-p0_xjsvc5FejLvNHpZ zC?+dy50qo8BZ=sdFW|$&>PSs1K^d;|nK||t9ZB|@sONH9*vGGqh(3^ma;BXj@Uazn z8|yjaikAH3Q+C&=9S__U7O=%L{r+Cn#Dh5z*eN3S_=!>4w9bYL^B7v2sfYv%THjpQ zlc1!Q+G&jY@GiEZE5`& z)v1lkS$-9%xTTlOmKLQ$9ref)JKMDe(S@+)ew_oD0_aVQ1;^7YOXmRt zTy3T!LtrZx``h%^?PbYq7~r^T?-F#it71q1T+}dX3;|oU{S4MXYH1di)Gq0Hkb>h3 zp-9>ZZ81M*I2M9Q@K*>_P%Rf6Yu3~cHu>sd6PjLLS?#3%k090g90!iS@9=;5mFxSj z4k|g5t0m27L62ZLO_dT75?1&sjvpizc!~kH-i@oo){{)T<9HC&{V0$ za3sajUVfga)kDD%h0^S5SvHYdtmgA9dplgpdk1=4Lx-ny9@L+KItNJjMv(fuCRO#? zjefo_`aWiIzKLE;bw58uuHZ(LdwM%76A6-=w+(Q;{BlFPe|W__NP}vAeqxKMJE<;Xz#2{FeG6Q@c!I`ht=uV`RB&T_$=Op{eYs( zddm`BVa6{_n)(Ow3S1?6Q5`}(ATdp|+sXs5vLCC!yJQq-(QsBi6MC>_@9w8rt7J3! zI&M{OY|a_hzr{=X-Qt_s7v9X8FI|1DOd2kRuR7xjDJ1WxzKK;mO6>5v0@3An9)(B$ zYrg>=-|cd6jyl}3-IrL#|Di1PN=mMs_RmeqC{0~(uxowT1gBG>f(pXr(7XCi z``nOZM?6h}ZLx>EGPW}1K^NJ-Dl1Dy5W70(+s75aTS#;d%w&QAv9;_0*LLY zrEa-{S$MO8O5z0`ds^F6fiOUs9M&qumeTYVSnGA(d>7uz=uXP;4Hrt$Xq9s#W^`Xh6fc}2kigN zW=iVJ)B_w{h$i$7J#6WG*7VMh)N4IKTfCo>7p+oXFP6#SrfGLQJ$L$f;eNW<*k_K* z@DC8=%L@oKV`{Kr+D3*n`?}?V*%rnNG-v260iEBw_#wVIYwEm zXRSBo3ff{UsqgOHT((~u3-H?FKY#tzQRd)`uv(lJh#v)3x4MrCCwmy=*7u)o?Wq-Y zdhCcLk>H;s*g!gM`{B9di~E223p;szA6rut&udT>g*>+!!cVURUp9JuPjs0Hvd{F$ zU*_eXtL2}c)V%^1ZXIy_>ljYnRObl{7I`f7^8j#*g#{kLN*i3L-F)b0Ch@cJ@Bc-V zy{3-)W8Bt1`%@<tYcJAY0u^bJ*ef?kOVT+fNz{|su+ADkI=R?}^ z8G~6l*+ZnM7vBAR<^6Sde;HK=2YX2ORKgZJ>y|hAW9I9aSXt`%m){KT{o|9L{Qf_l zae{^ACn?p>{p9AW4`vD(-MMzt>Cyk~jsAGop5Q`HrGME<|1ukY8Evupc5oaK;+L|E x|4-kABDnB>m*Ez(`oGKYr)%o}L(6c3dCu?Y-JBmX*lpmS<|XZmh3Bn8{tqrtd`JKQ literal 0 HcmV?d00001 diff --git a/docs/assets/deployment/hf-inference-endpoints-select-model.png b/docs/assets/deployment/hf-inference-endpoints-select-model.png new file mode 100644 index 0000000000000000000000000000000000000000..44f66520fd12d1fbe2b48e9031e9a9c30de97933 GIT binary patch literal 83638 zcmZsC1z21$6Cl307S|%B&?3d%r4)CEqQ%{17nh=Kao6JR?(Xhh++BCM?eE{+y?ObP zO)|-g%uF&f8?K}vje$yn3IhX!A@f;61qKEI69xtj85s$hbEaAP4F(2X&Qe@lNk&|p zO3BII+|tGj2Ig~kye2?Pb%-ECSLH1NvV_Q&19|KpxFTPWnfz|3!@him3&J%KQ>x01 zKrPposLUy$`c95rWu!^accw#t#`xjbxU0Ms1=eLrvz>c`+a2;ekqjE=b>Ca(hnZY? zRI$V?MiD8eRV2VWnIK7w8>M!IW19plcM@w|AUZM7GcuBOvQ0wTT3b-C&Ai7%i*6?% zuR66s)J-rjw5Z!0*?O^Pm!dEi@BGyg05Hm^p4Ua5Ds%=IRnh2t$d2*2-wGXm;T9BK z#0#Z15;6BG!cdi5KUSl|a5TT27$!6oA?J%E;E@S3M(G7&KmXZNAP^HEG$yrx-_QGS zyJRVRVEQ7@ZmaZbVuGl*SnCOuSL9>j-bKM{=HbHz9k=u+)=HnOLijaG;J7twdz#+_2X!<74tVXNe5 zQSLMkP4W+`vyy<%=}lMui&c2Z#s(6??J0@GggL|bcM_fF*OdJMt`W3p53uAveDoef zK8h)eWHGv>ef-%wQ0$?nn6giqOA@gnIhO+sQk$t$fJ-!$n}f}}zh_Xd?othN79|?=zyB^oH=(}8O8lwIGUMwD;e5yHc$x!oCc0!*&%Z3JAzh8i^a{- z!bxEOtBB)0UA|kR<*~bW&t;>i)?s%W)DK%Iw=2qN@86pd^~D&oyX1wWha6{pXIl{7 zlisyn{@UIYC@8+a4~g}CYts{=V;oTlzsP$+>jrDxeFTffAIXo z0y(qDY~XZ7@fR`f0=*^?6as2CaU0?6{8}7tUHk~MNL%4EaS-0V!MQ%DD7RNv~Cv)I(Z@(Ww^U9S;3AdRj1lCq7xgzz&=bst~DRK;n+N z2RV?WBSg5j<FJ#9%6 zqtxS6%x~?e&faEF)#Fg&d4Hz;DKwPvNB%qQrHp4D^)%l!^R(l%x(eVBUOUQ0%o|=(jq-WivUDB7+6?PowPm&Nb-~MMwGCF8 zP_pPG+Cyg+?@XFl?C0q@n0$Mrb$sDc?efLN?YQ>H<=E=D`QY7w!c5tuVDZX$Z<2-_ z1$$^yWK*W+uSd(IDb{h;smi_{t5ALNI|KX1jLeMB6lkf{PanY|U>h+0)9uqe zh66l*phaL25-G+wkqgEG@foKcYpcftWUGTXC&Vq}QwTeH14%v69T71pBB@N&JaIbP z5t}*5Gx0a#3N9h@Rm*PH%TG9|rChvrR{+_>KBL{rVXXq~wcjDX-~L9OVftP6J9Dgb zY@I1IyC9`zY-UU|l{?j4H%Tk9F3D2ak^5QV2xXi ziL%>L&w0do&iQ*A3$Au9RU1az@)ca$rTV3MQk#m!Y`1c^WH)@b#8YK&Ztp5E(sMmd z5Do0={|vb~4LPY@UC<|JauTIy<=zJ1+0JxX0ZU zPAUiIl1R6?)jJdLX!00R%9E0Fc#R)~OjYQe=u0q@>f5!j=?ye9yV$yckM_>qIhVK| zyEHf`x*DDXBg%TBjaiH`@2NZ`)6S}{0LOI&+Z z4awXa-VyFCSR?dv-)aEk!)I?lQZwwjkfd*d%7k=?a zcvHimA!EMBw7{6fjKung<%4ZX5Qn3NH%op(B0%#Xf&7;Cqp9Q|QwEM=XUU#xXmjW| zsv!Aq3Ib9Mio5r#%z6atJOt#Vd~S1#-Z%+3Kf|6rV-H2>;d=@X^I3hF{b6D3qUIub zGn1tJiQQzpkIk6A?<~@naG$c_FP}jm!_YhzoaMsOqciW_n92H zJLZq(mG`uVABD zdDM2Pf46t{=PtMALwTW2QC;=d-5qrk&ABr3GG+s?+gKha;TQcc+-3RY>ZP_UXe`!R z->U2$n^|^~cexo~KHQaZ>ved5pE0^IG`XK_OV>k)@01bbNTqt)0GqbH55)J^f3h7Y ze=-pCWZGO_I&M8qw3Ar1Xdwn&-<8Gbo;CzGWNr*KT^}{PYzS?nd&f9bJhbnDrvV{Y zjGzXvuVRbt^HeA-mM%q~Q1i3a#X}$7<1ZF+w2bMD1R-`$4e;utr|LJQ{dnR2*Qraf zr)_7=sZ1SEImq(*YSR!1s&k1sKJ%dSo_qEhA_a5vuQ#{ao`oNIUNGOU?ceuN-UzGt znt}xG87^mPCR96O+v9xez{?LzCo661tLp{~hW+Qy_?>=N7QZk%kI7*sf5Lq2bf$4_ zJ@Q%9#GXs8$MSy-QvWvCzKtZp^imLW769UIfLUz8fca3Us0d5-9Z4h&AR-+@Q40U9 zsolAt8hdN$t8?!{?JCEG{r4ler@#$y-7Ga%Sn(AQEKzVEKPzi_4D@>O-AqfyTwWfA z5t>GZL4YNJL4>AYp+N|i^gn4S*!M8-|Hy}ffeE#QLHJi41!(;DiG_y0IRA{{e}=#S zpucdT!6OUq-?b4iv*7=ohC_yy!HBAg%g8`uRTC#OGdpJsdlx?DSubb?io<7ZXBZd) zy1xNdMuqkQ+Wvy2nwE=}yd0m2y)CnmslBlovxlw2-*#XGJounVTQe6UDi2#5J7+!* zLF#|h;De_BX0uRJ{iBMDwIH>Yyb_hTy^|Rg7c(m}E42_R6&00$lc_nMiiFg^(4oHs zsV!Vw9QasR+}+)o-PxJ#oxZVr;^pOKVP#`sV`GBWU~=}fb20K@vU8^SXCwb^N5ah6 z#L3dZ#nRr6>TkP7#`dl*g4EQ1JNnP_&+jzzu>5aNcFzAA7Ic6te@j?CF|)G#r)?;z zz~5XxB})%88*K?oTj-oY`w-$}V-@&E{r{)rzdimBrq+KkKe4j&{4eVN6#d_*>dt0P z;`X-CK3#N53n$x_I7%7s@$2svsnhf*#4B@H`3WJkJG*y!t_4!GRry zldMmG8>|D$Nns1n=KZf(;Rg=PMx+$o&J0K7K)`~)LbgyA6dK36@T;uQmjO@+tq(b% zM~)JG7J;8TOChYFkeHXwd$gjh|qVdyF zC{ZIL(}#j9r$==q$>0~p@PZFG_weiAk_fPckZq0-9$*v4t=ta}592a^C?pdT$0a9= zY9}YhCC6LdS+=Mf{;*g)H|Y@XoRI8+j!aDCIB-MyUjw_iMch|y5Kd>JmRoqe=LT?n#<@eiS$EY&6$V&Uf>LB=DwTIC@`_JjG?n}-T(vLm97 z8d(O9e^?pAD*Mveqs}YpYe90G&88wFDM_cMrzd7sYV@@Q9g>pLWvFgunP2O&t~xh< z%CU&#Kla1#?snCuaU+JvmTYRcD&2yr)*{0eI30aMWGju#UdD(oDZrA_e+_pZar|-J z{jz-mOz>oXvvGh`a*mbl*crJg@>P|JY6c56;(JkLTa@#n{}2-A3Z2ulLO0!UbG6$*!n6A<}EjOs4Wc-3G(uJNem$Ep!Ap z0xV0b^=PjIKUq-zg`zp3EXS`M_{O9NDMbDO-PFS)EF3I*DlW+p>2H@2)pvlN56!Sk zSHWf#r^PGw1$|t|9JX+mbr^Zdp1VjIS_sIczEt(%kvnd4#b%hhd$ktJpOw6@VPOCD zBEMClu!yi9FzXB~ zLPDw$F$;F!5QaiWh?OuY_g<lDUUM|U*;Hp)&iZJ#rcqf12u!Bx10K}-B3%FKc^pMI8i;sznxJF5 z)Igb&D%4=0n?KcgI%9FZPOL75a7Zkfv`HkdCj3S0l$<9`VpFrbggI<2(1w7b)_MjN zZzqWIH3-|sK}1l5|L^L-fJJ7G0AF`tDp);&Ww$i7ra0a+GQQQdz^47EpXW<2>Ry&JMx*e zySxDS+u&5Yk&-NAOq(r#1ziiB3S^J`9&q<0ra1q<>A~U2zZO+HlH?Zl7Iq>y&YSrU z;TsiLQn5${#Z78H8sJW-?Qy5{y)01xTt1v3BBEF~0l}b~sAG`9OKF-=5}LoLl00uA zh=jaB5owo8@w!&htZ3l9Nhr&TKw(OKZ0bq?#=EOl7i`euWv;xzum&5x#i{M*gX_J-f6* zQUGlAO+kYEmZs9OP?Cv{k1sV-60HE~R~_oR;8s{XP*}RWY|1IF%wmdbj~uG=^q>(n zl`y%SOeV0gv2*bH2!vj?_B|s#!Txi)^L(JpCy=&Xi`p8y5Y4~tm`jC$RNcBi1e={b zy&{EgkSh()Mm5$G;pZ8 zF<~!6d}R#08Qy=)d3g~tL5GrSY1U@CEIDa{&S9K%F!Hj>TUI69)P@RpEqs!&PLX=e z@{%m+^$!J}BUBuPiWrA$Oi>ELJb`=-zubsyD}EMKFW90GCh~k>3M&g-CCj+?+ppLx za`I@;5u!bR{C=-n%kmd|m$QTmbwPf+L`_IC>?%GNUAa|9frXJ3I&I@>QdQPes&9IW zDn)$x$CtQXulD&mu)Eg$qsG4bVd~8#^1WNJ)}Gn)Z+Oe`S8XE6j*^Cj8_=qmiuxok0V==*}_V2Zy z67AxlvRTAuB4hq>KTkDF%81=Ox=bXWK6vS2AB{t7DJ$+2Iro1sGsA!RK#zl6Z#Os1 zfbulqb(`q{`7GYR$hkg|w_`#Dmop4c85%qS@1rC;?qR$Vg{%`6j0K$+ zsBE;F#?m4I6tWevaTc)^(hn8VlU`p%JyjD)+*p$yZ1%8-R04#sNftRO$vKqD{><^2 zGnHrY8w?zw43)mk4K0}Q%ZT0+o%H;(KB!oW*^6uyK`^F$OcnYo=e4hdKcm6|@K5!fFc-Wxx!r{R?uD?82g4TOxia2X7DuG+U57 zgYjL4dfA3qkn&7%Er#weU%kJ)tFw9gA;%#?-dRHz8F#O6-hYwr^Tei$2gR1>?BE2= zV1*pvcdsz7FbL5E%?&&(7)tgRzxx%3{8E(r@+&L-2tI0k{-E)(0F#_gB~z%aNd1Gh z6uRIS0lT^_)lN4IQ#!;dTj(LvMc-qNRmm2vgC&qM>X$7KHZ~jtwip@HB5evwgFCkzsGJP|hh)zkgo@DpzTLVo>%$L*<#E9o<|9K`4k^Ps#E{Tq<^x} zaR=_6kT7ERwY1YWtd!i}>C^W{H~9R^=?GpMBmE8qFW;dVeo0udz-7nz*$yxSV7r?1 zEnvmAr7qd9{Fw`J<5g0(4uitGBTWcmu#U^+%k6S+GdzwP7$jluv*vNl2B~)@N%yKW zIquZfQD5JD)R#xq*d1{*Qbv*V2=B){T~RVOpDoOh2@?MI>CHcmo@%7%l|i<+W@OA# zbj5Baf8C5A>Q8b7rK_`x@Mg&912MKZ6f2CHU|*NxD0XXb)vJJ}vGRx=;31rR8#0D-*F9*(zpb3Hwb z6gXyS{^|JH8+y81%yG{Ox;dC~5iA+q|MSP`XV#B;iY13#_i-Zc_GI>Ny;D44@|oWk zuJ2atXK0XBsJ(Y-_*GCiSztF5i+CMglOIJW!-?eh+PSzy6w* z0+6rUw*8cNrpwR2xxHDs>4}N3xIL=-$ibKwO};X7C)jdr&wtc5bN5jKl@LPLED%0y zuS!7n&|z9q-_=qxe3-0w0%TX_2#3sf5R-GUu9)(8dZG9}`C6`yhMpwI1J0I5^E|;N zxKBO?FE@_tXiLdo0s`?~Q>4V*RLcS_ysOUD5m&O_Z|4)&Xn8EQ+kQAMCg}iu>DyO* zgZ;1xSp<_D!T*IPX}U=saM!eC zG-v130c7+XI0PB$i$_D@{1hc8h!EI@PagOT57E&l_JYHqPIG+7zTr`2 zZ$y4pa?UTQ;PFFnfKP?`9&Co!7YNNqGe{p|&3lscVr)^J3X=7~!(a ze82eIj|!D^9r>i)iIG0O4GH(FitxPFPlWjjX#G{0ap82 z>Lp}cKN&~Zim&fmNcw@He*e^Xdl+$$BF-=Za2splr&xhGwj zp~Yl)op-~*VnR&iz>gIF`-cOt+!-Wpj;GQw&Hb$DW>OrB0Y#_Tg~4^j%Nk3HADn3D zBRX+W8$#)^)ak=@u+U*BOwx%&7{hlpIJsj!meMCn4>Y4-ea|!w_zeulss3(peRNoX_R`Y2%E7|9(YTKa@oPvkNkOe*S4R1cBpq8Z5>6tMB@h zr77jbL(#KuBGD&2fm|4H*QGV@)1~9?Zdc~4w{j$Jx}MKd!QDm|?ay9U zH8H8TW8AcdnV>UX%^8yp@LldsH2?V6D_J|?&)4t5kgV6Xr{eI9^9oRY`tx0Wo%Q@= z+x@z%YJmb>GEE0RJ0)nt_jlTzr@36F!i3OsskWo>FIKKq@V8L1fD3ViO{E3?5y>Z0Rm?vj&shqM&bTONB0mI-Np8_H>i9+7% zelaJ@q0&f2>xC-dOPKnAC$h=US; z92*RNfmq<@yq~5=186hDhh_FwiJG=5;tooJWj*c@lPJ&T)qYGfw08|AO`2xA@oJ;K zqG)GmIt|0Nagafv9fzrK*tL}5edpS<7dpkJKUC!^u6hQrD)yXw!+~)l1U)uC!R595 zB#oZqSz20@%X<-CZoqscsTh$XBJZ9CLUG;Ut5CuSzI zLv^OcAJ5vKnVTgv(7w_KD&bJ;xv#mtBfGg@wyvmx?b>P`(Oq|lf*3TFfgfa5f^SsSA-p<}rDyBW@tZAR(7i8J`H3oA2 zTE2UF@O75?b>kJ{?)A8vxY`=XB>~osc1NHcKf+;)-wMKmjF%Xm>{ZFXbz>l?q7s{E z@Vd1k;XE?TzLy?%pH-Hg>J5)kB_+t>VfGTdOKI} zCr-8d@;_blU>z=+#BfScz?4c2MlF^a)FcqFJO+bgJ@q%J%Oh+pSI*WVv%ZYb00*MU z3s*FBmzC?*@6a&w(lww8N&O1V>9qX1@$!BEt&W2OGO41?#_%KK>vS!3F>;dGRGl~y zxURy4-e`WCP3OVe3)x=ZRe%%{%I^vG|w;Ke%`(KK)su8s;Mum9F1ns&XU z11PxC+zzD#_WlBn9dXt!79os|b3GeJ^>XPsZx+}Gq2 z4UUj8Intll3TtB)9uf_J$be;dcCVX0RTUi_1pH)@Nrq`AgU0wQ?DSy8P0&V&h|6$Qyt6v@_ADu|& zPjp*vsz{B=ybr^n=J8kpc`0hO8xK3)xelvD83G^i(eTnlGbvcDX5V|ARE56-Q+|MC z3Eq>varPBm__*5suq|t~5UBTHty~X}bRrc&$b#C+;swD|6!?M3vnj%NTC2** zIN4#l%F}GhvGL4vJHafSGhL?L(1rfFk2MGVodoOG*St!`k4nN9a7V-pBnX;%z9rV^ zmyt6N8UEABT!kjc&2`01|DXFqZUPbn-p3=a9T9zAJC&o;F^WC=duMpLUaj<8|Oz&!R){E#pb?Xqt|(v*v!Jp7r6TrUE?WzqRIm zEd9!}HQ4FP8RdeMJ&qe$%>)%}3I>lZbWH9i_VXMiI{C7-o11ebD^(#sRfB#sR1~yQ zAHuYr4Rx&E)LSJ?`8F6=i$O=JMtQ?=1f;e=UDG zJjM$!{5oHw<=IU8#0qV#;Y^IpY)M#9)P^K!4o~IA)69~L!v@8K6SF>MrTVPnRjB!* z8;uv^N7c>54B#ZeHW&NJ=u4n__b8~_Mt`TSnw4cXgrPgC7yfc zB+Ml4>qD?LnZ)C|+lsBr zZoFoS^Wa7}eRST6J*9Lyzx|M5mh33uW3Fd$03VKD*hQ}TheureY^W4Vt2nJNZ$ReM zfU3(RBlgJd?&J)8Z#iq0SMq*YL(cdgbI6!Ng~7$@C66VVuxsucV3+-L_oN*KVXzDH zvHF=~^j@e1nM7hb2g;-3FdOj211j!w3rkDmzDLMfl$14J`EiIrNsI_8O1)ENiSz_^ zBH0O+)fJ{kO|lf4{`rfZ={Ld81OPf68ex@(;x-@cHy36_4M9wgRD{ z4~~2ahm3m*q|&J8;A*6Tb>Q{oT7?htuye3t-@*AT$#1k&ZT4c!68OVG2xxGN@mGUZ zX}MG#Ed6zpkmB>(Ukp52wGZFC4yL}Lqdm6lhK!6QDstz9kUH-{L==VZg%z#9-BzUP zaR}Uc-r=8`->=l+kj>tbIZ-&yp|tYXTh!#^07iy~0CrJV@emx7dIPU{_K z4nr3GlAccmFsc*86rEpQ5M@WduCFQAz5Vbx&0&=lYj;5C9IdOoo&a*Yow+7N~vb?_vA z0OkGBtb09c(ca0jDh~Mb`hcfMROh*7kGU;el}UAdJlA%&e9RF!w>!6FxGhKmP)C`s zesU^Mm@?Zl{R)AZXiMx0cU0d^X7}d8AmdT0p}SVs8yq03x8o8`#qAUl&`@C@ZGw$FVmP4LvTNURhvx z4qBqm^y#jLcXNDl6!e=>^t4AM2NLNbmpJ{>Dx_J4>DS)R>?C6@)In0Y^X)2q%^9(G zS_COnbzDGqO{L&>w|5^NKQJ+v!OGEVHqtm&vjttSfA9>7do9QW{kE;&63tSl;N?o* z64tyd_$ri1Byo*~#$hwu34(Nm^wW30UL9-S48N{(Y1i2#%P#2qI;K}MG41R=*tL$J zN8hiwD;n^*Y8sfUBZ*3kBv#}-Q>6XDY`@>IXW@I9gdc0CPH0THq;fx^H=N`!Kvh|V zJfF%xV@9KfSJ$K}yVaPhEjTC1J+fqMii{$xI&YYi*9Re^DG6JLbcAq;Ut4F-@20&B1jSbI6Vs2c zKH<1R(6}Pscc;vpnJ^&~&B!>_Z*C5l?Yf+k20Ko;>`fAIWECJEgass+$kq1yr+u?E zpj#7eif!Mr#1LREph4m+P{p)++Y+gJq00;YW}Jb9cQ(RHDHs!ZF-SmWk*d)pYCuu5 zQ(JxV^O&Skm~FrJ(Ky3UilTxW!KX8HrSSN#j8>vt-HRQ)H*@|epVjuriIsDaHisAa z+h6MU!7UeYRA7_yeq}Sn?)$I#a{ZC(&Aw^VAO+rCwRzSg)4Mg#{q&o2kfG^zI)(g@ zw8hg6u{mELZCu65f*~j{b$p(O=e+fLX>dcZRnaTx;F5$O3Q*-wb(ZEl{>kuwGaXE4 zfB+D_a*Fgydx#OZs!TTcin%BJ3G;mll2_|#rtcl^yS6;ntCaKKHJq#=82kYSApKgV zoL&4t-4(l*GJZ}G8w{xCW=3#bD8@IfcfnL0@tKcTbzTX1A2I>T_{`FJ(MrpHuXGlO z-utp2Z;9RJeeYC|`fwgt=18RjCz}WnW6QePH*v=6iH6ptE(mXn(QkHfEO7LJJnjp` z8&`C?Kwj|h4IT?}#m@-I-jkARIb{E2bP`JV{*dy zCNXYO-sGHfnd7I_Z^c0lR998u$Diw7`{H;4dxP1v_Xf&anG(Ys5daFW>63DTIf0E2Jd<3^kKPCXva{8qz}_pCftM+IIlQojn`uaY{q?$P%n zu{4?1@&*$vqzXJT)i6DuR&28^a&uv(wY}@W>4(WSkrt`NpzvXB)(P6H80vUF(e_?nK0t?TAi@ohY~o zhxexJM=)AnW6sWrGf6#l+>iaR=HeH-)=Mb@RQ+$p6Wi+aXZr$C*Xj0(_2HkKLrfC4 z<}_cWdt%5ak}{oHU!S8Z>=!M_9O|nA;CXQ}$BFf|#0c=xFFFVfljUZo&HRTP6Cu6_ zc_50Nku{X>umcP0Nyc!t%eB9)`}Gcfr(O)s)S3>d;I#)O7T>Ih#xfuz8gkB`K-F{n z>E3%XXH~8nVfbWW_5Lpk9hoF@J6ZL+_Q+jbKe$WLJ|=LKX_kLqR>h7Ax;KwU_+4h7 zm+OfA33^#UlU%N=w;ftx;c?)^#9iH(ZRqH}bgC&`_IrkhZlWn9|2wEF(w&m6H){#~ zM_mee&ARxJ9qlU5&E_|Dit@(1Qd{&P$YzsF3N}Q8Y$3QTBTV&{YsA77< zbqZU9)t6GfAGjW<-W1XB!KM#@uNvA^G+cAFdTM>~u{th~-Mqjt3Ol=hro2>%zN^Jk zHd@$y2Nolq+Pk1+*}5Fv-i~15+9mAjg2xiTPCBa;$8tSo{tV^|IsJmLThNMk8Y-_o zDH^=NO&Ax{u|VIkuYeQA{k8nG$gI4rRF2AUDG^dVTB=a#w9Ckyx-wv}ODr-c>7caY z+>1x12fAL9qGCWquQ8;0{vtbaFl=2CWJ%V3`};#AN%Kj~DQTMf`1|GZ7Mg=!zT2j* z!PfQ;C!sg{>NF77?W*x;FK{Rd=I}v)^fbw6f0vX&_E(`2@PIS8`I`!w!LEp&2`c8F zm-~#hlxFwgSb4AOV_Yv6R7L#H0eGZ2`m2-@FC4d)b0)kqiHi^)sKrmvJ8E z7?8v90?&a}(Ll$`@CTcQE?k#`Tb=fF$SwE!S6$f97QsvCF;oErE-|5x460IJ42zCz%* zq?V*jKBIHhmrUap74>(u?VRUNC2Uej?PL}E^n{R~F=H^)<3{tPoCoVOw_!3xg@A$N z_Z8b_$TR}5ZC+9Eyhc~61DQa{UmK9lv@9A@^Oo&t=VMnkZJ^^(f#GCY)AedRNdOy} z`-pl!QQBEa)Mi%sei%K^!$f>T<8IqsaYP|!MF#esDLyLa1J7Rd_2-1jH{2$BdR|+* z++%LBQ{vi|q8fVG+}jUZ91iyo^A;dzDIXvT1 z2J3YQP`YRF99xM3Na@oljr?#N9HK0&1cgbVTH%Xllq1S+4Ml7WgcXsT63?vN&y$aM zORws=jj?w0M!ZJ1}6x(mMBWP90d&BQ~SW3ZJ&n3^e8!IG} zBg0s)rZoOj$G0)MGY@7p!xq$;vKn+eo1O4$H%`++zBzDk+6+1N9YBuEj^6vzm4!IP zppUA*LT$^J^>Q3AM5iDk(numd7tb?rRDhzI$X|m%Bq5femN#@y+%%_$(O9!5N?-%buTE z*_2xnY5drx4yDs;nI4RGvT?;5VC?OSw2${qwbzcQA&yUTG?X%27uZ3+$)+YcfvLG^BYvGf& zd!d^mZIWpjvTJo8iiw_v)yZt}gFvjbiWcLv%i@yq>N_uPup7GRW2ca>+Z`<~r%xRU z6%u7d%b8b9TgU)K>gQaks4Fs6eX7}hf7h?B3q>;E`)`bjl8f$!(z0SRixBAbT}FB& z5#&Z;i;1~#lXicl7Ce=ktBS7|1^>(Xh6PIEDVEGrG&^_2E<)#+u9eTxg(CKSQ$%Scw6@AKb>k7S@-jf*x zz%x|nbLmEi+&!k3r8|tpzwRBXl&#Xr0-u843B1IS&f}|^>I66cdUb`nJ*~+_IzAjg z#q3Q8sBI$@F3EB?mky4(6;K}ClGk={*x)U*JZ4HOLZuvI%7^Mo!kXG8B*^%(eku^9 z87()ijdR*pIa1x1WMwUW3|y|4uVuC*dH5esB9T~m_$kKKoG-HK75yE{0Z^JnQ5obW z0YoWNOsFBw^RV{gh@-N-gwY&8HNsrpeJPNDV6?F;m0v4Xk1;}bMCm?PL(L`q?Kos= zOap}^7)F4RVpL;F(X>zP1A*?Ykcw^Noldvc_6sqt`2m8F#qc(GliEpzueKfChF!0~ zZWO!7kD?5LmMcR?xaswa-4fw9ZY zN?H`2Iqtxpx}z9Z(A-GY>K{+W(geklli4jNsVcz8g5!5lo1GcOU)L!4*D8C;icr~X z^ZXG3o}P$mYHHmhk7iJ}m|2Sy!>?IJEhpjY6z&5EPtJ2BzfLm(-JsS5q0;$+#vhub zlliEMLWdqlE!PU4JQoA6n~^b(yrVq=0(JY>21@F9W}>B{1frjAPw`cO;TLE+FAJCN z6rUf03P53a+5Swd2h;iNBp=WYMnXFyCZ3aVOgRrWtX(0x%y{R zBM$N{`!alR@p-;|Mg*G;j5Y=YR#!VUuvpA7S}nX&HWLy0&oX03cOqi|Z%M>B#NY|Y z5SspNSW|OmD05UA?T4*se`35w)bA2}vMBxf^<=TiX7t?ZmjnWuv1S(34Z`bl)JuvL zna|7RfXk#PpxIL)`HF&k5Z={w&KDo5(3!A+nq_AG2!J48o_?+>6lM$ECg;*A(fBE%Jo~?^}-Q!mXmTj+&__$omvF63U@CNuixn;iDrS&G-)*EV@WU%`=H*Nxw#`VMD9XAlh1 z>w|vXvrk*OS|XJb2Hulj%Fo$YSBtH`(R_BmqjwNzy5dxPZ=o)-BL!4J#~`O`nytd`h9opd-9JGFlx{@Z6<#00yr=(m4| zjlAeJ6&)w$moHx|m?sv5k56h2`86gq7b-xYl&+Ee!;!>(7Tx+t)AiP7qn?l{6FmCr zE;enfm17CjEb)QpPcy_&#pPo$h`YkWVOoaX)8J+wd;w4HwrF$%WzETQW=$ef?<+@4 zK;eWKFxucQH)`xf*F=4-#!nLxvMCK4o}mR)5*Tp9Fo!> zR3fZsm50k^RfS%=(Ogy?2bN^4Ytt>F;9tQwEodrSx{Wm{oHWtAdqh92ap)JS{uE>i zz#*lUwUC)~;&b9h!4u1s>o!b{T91!eo}ps6C~e)tGjkd#vw1Xu#{v3f@Q*4$Ocwmc zwR&aI=#0^f643GxO`&_m=@^xIB5|AxizNBoP6ohJP>iNC$ zzV!dHfUJ6`7wFIY<~>mo7I(sM5*Z~EnxT7JQoxm2tLpx^4H;u5Q`JO1WeL(8*C%N4K3m!z1ml^7tNmN`PprAM5#iI}>>3pw?X%izUwh}ypuE#>2xS?DR&8Gd!8fW^rOJbr1)8u-gNj~mrx+U`|9zqfUb48 zC)$onQ4q|5LCzMpT#rXXq|gL@;F!f?vDSUvan*gD=d4fwbpVvN^wx1_J&Zs1 zPa|I2QYJs@;VNBuqFO_*)pWE|yPK%i?HDFQ!`QwBuW@SyysOB1U z%qt{h?4$s>E1-DFs%CK~P_*D!)Xi`vP2)tj;`>KnuRMjA{pJIZ9apKmU%NY&cH__j zC9J?k3xTvD;X(JQ^vHHbY49AQmmPfGx6s&`FTfa#=qvVM--EKjBpP2Z$t7`?m z5#VQ>48<6g?=@YYW>D%Q$pCv4`#f;VJ|5)Y7!6#st;zUAQ`13zY4Jf2&C{u6SCN_d zy0V*(f#kQH=b`H*Oq9Lpe6~8n8Im_ox2w0OTBJ+94ZU40G3U2l5v&kH^u%Ls`PMBz zD3E8O6v&;({mEbdYUq4OFU<*sD#@a``9t8!z)w#p+cB@o^5h16Re53inD)0=kMrHj zcvQFw2q|S3uu<_Z43#n140Pp$oqN;aAd@n#Aq&1Nnjf+Xnr+I#K{8es zsM*b&o8n##x!&LiOyaohJc#!V-L2iN>tT5uwwH}5G};wSzYZbR;^{X#*4T8rbvK22 z{d_}GrJdaMiC1w!gW_u4iXcKeV!vX_=pgv~g5xQ8F;=C|^N@{y_*T#Jvu;N*_TY)Y zZQY^qLFk79@Cm)w6(eOCW!8iMx-5w?C6|q0ht-!)HJcHY20S=Yhu}b7oRK~lbpIZn zq9W;+fR4}<_~io;$eTB_Mdc7CGRHk1XHE(Ub^gO7d(lr1Eb15A`liGh%Ms!#t_ab#BkpIEEs{ z*&P@C`AmLvWA+JtaMb2BuN@lVCeXz=KOd!-By5JGa}c~@cm~Q?=utt-vI0A7cRa5# zc|N-8Qd^8Mp$xb|HaH@$-F5q$X4xO~JXv!YqZ2Tr*}?rWDg`y9*_~6)u|*afr^@Qi z^WHRL73el>PQPLemezQceceM6QfT&JB;t+I{zYiI57s)FiNq}q`@FwLvkN$Qmt!8Y zAPbZXh@ z!09>Kb|o*)7c=_c@#{I45x09;xa)CA z_O9XlLix$QA6lT;n8YVP-!qR$ABGO#ifFWX_Wj^GSGWL7bnJ^6fhm&=a?s5|PajZX z6Q}RF%^S003gqXzhXgpIugudLAB>YI?JnVAd!Ad~l=@ghAkigG1DVxcu$9`yG8eEV zh5V*OpGI2+NshNzikKO)@5hg`f-Bs6KMFSEc$T8B^Ts1Y8LK;$Ond!b=O`Xquyu{Q zlBLslFX5#+9+JtCJ5)y2Gq2Q0DXbvsM;rDgx}@}t?uD&Nh3gkiVBZD zMp%DivYguNEyGhkx^X%dX-Yn4-z*PmC$LPHV{EC3iBgL4X`bc9yz|T9s*rP35S<9# zXSf^_-?&Hb8nAIq*4*lqx&cPR#T!_tk6P{rpK&W=>Bp1M#%ng-ZO47x!Xoq}Bb5*XVY}0@%Kz zeE+)TdbyE<=>uP}>$Xt0Deoqg)*0ApKL=lG6XvO}yEqT{=H~D)oB6Qe>+gvePAh#} z!&DweTabF!4bz2BEVm2Np_V^0->A1Cx57NJmRz=K1F0)LdaZPtSQH9>5)mYt@;*)k zc0`VI%AMK$)aD9~U$7cLbnRz))gOMi*z2TWd88YcU-)w$J4HE%iS_4YOj^9k=W~{+ z6VXck)7*@n!HHECj!)?TOeDc`CIma{#}ni@n&QX4T*x*~Sus~k26N-kOYM3yHWhm2 z?9UaG?<%(Hq`#WtH@7;Dk!y91kABuA*_f7#$ajBbicn{DOfGZihkIYc7hf}5!86Yh z*d@Whlx)+4h!t54=BvrrCFxpLP#bgud()2Bjs`lFcHfrQj2fst33OnU+7<_cBmGL> zOX|4WD{Mqciu1a#))*ZZJG?HoPl=uutG;|`G;}-=rXW*0c=0 zH+5d@2~Hw^mgYFNq*XmyujN-ftocD*m4FM`3!ijkGv4ZGl#aM$kh5j@DH^R?*P5J+Uq>5Ps-E)^S zP05+RhfOfxBTY&x9+O+xHPCh3Y(cq36%F|+PipJyG~pX*x+oE6eBZ@=dGP+1+YlD^ zTwyo)z$)L{%ekXD;T?^9E@LJY%l^{%7Px_xPjEb%ROcn-{pLRNRolW|+0Cn9^M(tg~m%=>06 z_jYD8z`jwipd zP123IP5`Cp)0}+Ill<6w9`fO|jlt*zqPPz~MIYzS&MI%SOo}K}CWK3fodrGKwm2(> zuNT2VZ~V{B<4v7!1LVwnqoUgfkQa9h9XJ#+O;roT@y*oLyEg6-mD4tceQ_5HZJh6c zqp@qFIF`{nF9va24L=V+i?BuhkOP#ITMk9+N!U;Xg(I`JCKT4fydb~uNCeVNUmXms zN%I(9I~@QHgiknJeWlo|-I09%6#uB5B*LSdR4sOtiSP9CN9>oh7X!vFZsz*2SYYmn zrCNFszj^R8Dm}?1dRMZJS)WEhjms0;^#x4jTw*p#dY_Z?@iMBw+D`Ema#c!)D`=Sq zJSFGL6i^iJH7I~*5Y7-X)?CO)#PhR$tpL?U6gL8s3ED@?SgzZT6OdtF1nF`Js?e!?SwzN@yBUZZY#9M#X<|+qc>R`>5!XIhVG>Y*}ja8i!FgZO42EzN+k`v8Qi8Zo{&X3^HUi;Bs`|~L-+$8E+f&J z7xLsX&*@Uum{$&;OFBipyqD9ff2Ndj@#EXpYMaG*>Iw(Lm-h}51opXvHVey=4>YcO zLhhycTs%59p@%0eUOT{=%31hq7@#SX4EBjpS|d744myr7i?^r zh0qOJGE98`<>xCsH;R6shE2aK{*E3;_LEkl zdj#Gcg07;i)h|l<4LrmW7Gcp3$hSiXv>||8Z~;T_Rdgj;I|VVNf@MB>hvN7-kL*k974oIN?~&EaTjV zh47vkxK2(z;JT4$l1YEig3OhurLPY;mnJ&u_|AG^gBWLh^opmsS@`|X9?W#4RQj!D zOa#7EN-7>Z52uZLh<}ylXQzBYj?rMBm{;@Xp&ovxpmO9t;c+}4e42&?20T6yPzR&{ zHP;bsn8z6zo07B?sw@CnmFFhp5%C!n%l7ko4J3Up-vG!pm^j`Z-uy1{fl-HY0Q#T- zql#|SGPyKz#t=FX(@ErTUR6AJlJ>nPlLLSjgQrg>Gh$CR)t=4>Ws^~$MdU!3Fo}J8+wR3%S*; zU5(6gFx0o-16V(ztWesbbQ~Olx#A|?z0e85QO(Ydz-zYA_7706MT=!3LJz>erAz|R zDdNFkU+TbcJq&x+!t4(U+~NCejJ{?$EpK?V$UIz`nYereip>ST_Q~-hAzS)pnu4F= zwxegF@3oOIP4%gu@DONbK%%+cPSm+qaJfwylK2tAevx1JNg!dI#Th#Tc@xi|3y-H; zxMPfnG=T^kGTa-T9pR|z&!2Cg<2q`&H4PKD36>(g+7a;8TKoz3lLWtXGc2kBKS3FM zf_Ll@{5k-GJDk=i&hE<8Md-Rj3_iA$JKT}!%*?Jpf0HoFK*e&XdRU15)$W-bm5%06 zoGnY9XQ%u-Mq=FxXaEu&LsXg~3Bfi1R}13c!(I|3W8sm_y7%FtnOw5e|B0*XW2>1O z#@uk9sis4XG!h*bo~_f8IokQv%scOfHvPDHH_Ipe?(jalM8zI&@b&9}aRYo9j}#5* zr-bo-!W4D8Onc$1eBC+9Xlxr`5mkacIELFB(7OjhE)W#7h;#{53|Fm6s^%vjzv)L} zi@g{Q8ih2*YNDV-NHpOS=M8`#>Q5!k+%Vu`#P7#OBBfd}3=@V8q-%WH@<;w#VLebt zmtT}%#NC58qQfw&P`n@6On5}cuPW+B%|2a`H$QX?kGMZdRya`~<;!Wj+scRDdp%_}5$Slkbv>kbO|hE7bUF-7ofeTA@zXSTs_NiMP!#pcH8V~^O4U@ARN z)V8xWE$od;&2#@rvS@`Ez8vx8N1d^Ij-fRk{vT2e1JJ_fdk&)W(aF?m1;c+jDv+9Mss{%(=kbE4#f)a4?u-D+&~Chl9&*w zwz@|*E6(~V=v7sFk(p-ksV#JVth)4Z%~p+IeM6XAf0VgQ_!)`N8U<-6HM56Vh#b`N z&C-1GOWO--N1JEQf(NnQ+2Zdk?~Txj80^37aQb0sJCA^~hC4`;Nni-ufRx~__+6E8L)}Rc;g{ou@d=Li5~H_-e9~V_<~Ti z>sHi+PPUD3M@gYnOas^#@kRdT%aGGJrK^UxLj*?gM!)Xv?sNIzMBW~5WTL5-+8_9P zBoAJq%ZB9j2Kf2T7ri=O##S|_mRb}Tlj@#&rsmogi(HRHbWcD9jcJU9b70do$L>B! z{Bujo4{{0b-+yCaS|W#0r?Qap8&3P3vi!pIPQ+fo@;YD*yq4H96SN4_Cznn~7=9Pg z1jmv&Y+XmF^p6$@z8#^Kc;gX)Ww}1~6wC@Dp$hBuMwCT1xeh-t#6(SI3BHGw{VsnG z>}sG^W4At%rJPG6Wn9&+6;4}2RevL2aS85_UpA-YC*smc9ImUbtp0h9NbSlW0)PVi zF7URk2B`;tBk`rDVK|)IuBOUKX{WCm^=W@?^|%39TM$AMjkD0=fga*$Df(}zjF8#kxj zNQY~;w6GK3HQc+G4N%86+gD+KP^wB$h=V=uV$7Q3Re^!ebNjK`AYsgY!+$* zC|-3L2`uW6G3Cbpx~X6i4n&7sdzkrEE#yJ)TiD}z^Wk;SUUH`sRliHQ`LAi1ryve+E1b`e&VXugooW6C z-rsSJApD}D0Jt}HKmenUGvT0gdZ+{m{vbc)NRl|Z=0CDgsFxjYdpu^d{8_` ziy+oK^AMm13dFl3jzah7{gZ?Wz_*If?0GdE(pJzk9pd+Qs{4rL>?e*yL-oAvhDz;T ztM>4Tik-l50sJuNwFAuF?~bi1{XY~|^y9FEQ`uh+(6jIj27;^T#O+p~o{tyXd9ok0 zvn0Fqx?m&B!9EG*xABOvIDO7sw(U`#^*7e`EAY`gZ9D6l13OGPg=hxKsPz zaoQjrLuMmK{v^<{unvky<3-y>1`bJG z()%X75UFX#%sI)%Kp)&c^yY65RW0+mtA8Ig`R+)7UaN#IQuW|Ybaj_}i@WMoT&=Bu z6GMHj0h)Aii9f{3IDCY3O?%PWn=T&Ef!H9j&|`g6QWPnEW#{sBKX5e8G`;I zwEF!-70D9cmE7+)Id9VY!I&K{oMd=vcNB}Gg^Kmh?vSn%Hl`!5zW zH*^W;eDl9f2tPk11b^ftw2HrU-0y^RA}C;iz4_m-{C(c^0HGPCggo_o>;F2ZM#7_X z`#V-^Q{D7%OC4oPYZh0*VbF2EcNH{cme>o)(ZF^nYOZ``~+S7gtkO#yQ@ZNdh)O&38Nge;U+=80EPR zbe@DXyRw^`pD{}TQ{MUmnH!v0^Zp07V@ALVqT?bt&~p1VCGuKZjO0B#8q>9RKZ&5= zk6^Fe;#Q%&Q;;t7QStFIt%10?U#-Uzu}aIzEH94MU3#!&ez!af? z8C;P)2}4AEk99_&ohpD8$#Ut1fw#2h_Sbkc(VMxO6aGXYmjWgn*cpnvf`S|hAHeNm zZrDN}A>cpjwiw>zo#c@{(n+W+Kua4hXxxECE&w%raxnRl`+Odby9WHYa=gByUqZzs zuKJm1<8|1}&oxc4$s&#d;;lB8MgH5^T0oBbcU#k2&BB*p533&#yXay{$DOO1+gA#ocDxPV{(F&l`!`*(f^)NS- z=TD*OeAw?E@u=c`EGr0e_~fWIA7fQ%XI-43lhp=G{Yu&EzoU| z>wj2rmXvH$NkG$Iqxcj6u1mVZE*=Ufyobgd^!NDoevrt*#8ICDrw$nIw8$qOnUQUm zstJXY$7yU|zdLdw)=Y5TAKHw4(=j3hJYsSx80FCuaO!+Iz^*Y;X+KB)o`;`5KGkgj z!)mUYScM>44_1|s3y{3qH5Kl%6 z@0fFubDA{mbjJ3$QXdeqz-n#=z{j$sVa8E}pSF9G_#=Pt+5*70xJdjYnDT>41q$Y)?TDl?h_PA7`PS3(*bCtFvA?46Icq*^u>z3Ue8&Y8(63MfFdO9s-cWD-+8uL}au9KwTO6z|pX|kLmQqN>yz1*F@hF-4%J}d7kd9 zG$>G)GRwQVy7_A8-JIO?DnY>tS$zN*K^(iD{Q3ELP8#0N`*Hi_)~LVv%jq!mJ%sFM zGaXDInS(3f8O?HT7ROvf@2 zlw{iV9xk8z_J>u(rIm%&v&mBcSSBXhlS2R+y-NqoE4Q!iB@dnk*qR2cs3lSDHr_U@ zdes#?vBlxH1kix%-z44M3Rd3$oCb7VDojS{ z@SeQ++|{tMFdNStxVc$PMU*JPr!5FS`t_`PCsvSu-+E*WY&cUobStyXhG{d=SouC) zG*&YrMOs)GIWKcmT3VzyYn>QSu$R}Iv@S{8Bj-K67958F=dy!Xd7`-QLZ+&mnRn;h4!0y zPfxwmZ-&-LT@^Tcro|j^FS4so2u;*Som0dx_-EU6B&2UsV#@uXW?0s7)WdK%ul1PJ zl6BT~ra_u*Dn}aESyeZ^T6QdQJ{ng$>1(WcUr57JwJS|-^*69I=iS!b=Jmb~!wQtp?&)!Z3 znJ@Z*LU^3l2CAfg!mv}IEf_G%wv(ojd^O{fZ^^d+iA+bELcj!Q4e(dKHvnk+(1weR zPYG_p^ZECqBUL)@cgHy8ba!XIWFU^qw+Bb0b~rl?z@$x~_T1tE zSb_G&)r1y;=(iqvT&(8?Ys`x}9Ak7H15Qd`jC5(y52w;732qz8ww}k?q+ElJMarVb zBfzVoOnJ2r>7fQL(SX9@(1SLk+HsdWdqy*L?_KnS4PGsUivzYTP&h(Il!RMRHWf@| zZ;wkYlAuM{7Jkp_acN9$F6q<*yC-uU(K6I>l5eqSQxhc$QzQ($_EAO8=HU(MQmg}X zsnqy{k?6l``8IqDb$}EBi~hmRbGXybCtn5o{%{>J`&3dSE6LvZkhI4c#J9LnlC9R4N1ImQj4XeL3M~2$Hjr94|T2^FG zu75zBuglIf~M3coMy!fPNGXTw{ zd?&SMAyZ;5AKsK5boaeE)8D=bV4s6&=O0q@^ff9{1euc#raq7QZu>~am`pYdm`=OU zl+n!Pi4N|Fhi?~Ky4i;jY?z#4+t+kdFF27c3SZ$T=pXD&p`(|HgAW&5)sthmG$3Ky zcb*=wZquaNEQ|(@t&#!rRcm5UcIyTC>83_hzBPId*|-2`Doc7KcS84~A0bIgOXrXp zu@(uLLEbWUhLJ6ZP)GubN2JQo(t*ekN_uEigiNk~k2{BN4%&(hTQ_xqgGtOp zl)_5S^&A`=3}>IGn)bw<$0R{BQ9%G=#8?U~rb769DMqq~$a&^lgITT3tEq1UlwId- z6|J8uXUTtlBtTZ-ctTA!U@dxZN#|9Zg4HL@Ij@rYN}J6u{#7|H*OVi*e#GMksTwPi zhZlhVYdlo;KsqnFv2xBw_<^?)9Unl)rXyXct8)BfD~uX)UcYP9TH?<&<3u@76uuimKYjqmbf9v(2- z0bwSW!D6rRd6*~^3K&)6a(T1@qLn&1UG7} zA)t;`)F~yN!$4Wr_N#`XPH@!u2IP{!a&i8h0(-{o&1oaoD#@aQkraE^HUJP{z_N|s zvaaYSs-WucfJ|`RxRqEot8rZo+G%a+2iYS2q{lKz^;SwIUA!+$x>8}8vQ*th~8-7V7xsxNPblq6S z30neGb~v(j$g^@B|76yrhdoRMJ(DG0wVyuwieuL&m)k&QdE{jo;O$tM#*)u4$`@ry zwVcZi1ljys{f(n?!#vvXi&q9yAMp-cQZ=B9aWaM3f!;Irtl`Ly_8M=FIo^#T`X_5C ziSM@&_OhRo(^OpeYvs~#8cHY(_gY2s!9m!wMMZrpk>RBm&K113-elvWi+8!u>L*ZI_MNo~~n@P5#IW zj}Yvvh;Ihkf(d7e>zkjD8fC;%5xu?y^knw4tf%ctYDp=jNYWFIY5NEJP6WLep1e_R zof{4I4d-vsa47R2i-(KtgkuuS)|6dCVK&23p;lge1+vxE)u{r%`8NnC1X5sk?%*@Q z_*<@?LKCpTrN*X2If~Sx#3GM=Yk~CM`F5bRCN>qp8KEwEqH-i=_pt95^$~%2n2uLa z-3!)cCk^x~-m!vv&n?dVSq-FGyEXOdNV7s$Tej%b|U`G@#16`eydyIBzE-@ z2}kC#(;|t_D>?_t4BZkRd}Xs1Nutt8vs%3D(NVJl68`TzfDppyBCc5Dd8W)O%->p@ z{so1+T_6){$VIiNp`JBMnK25n5o?dvMn(6bo41z9VoG4GIL4=@CgOW%hXchaMR5=> zd<&lJP=1XJE)-DX&4u;+@}bC10*MP1D-efq?2&5I6|onr(m@TUP+;zp(fcE(Uu_^_ z)`Am1+H7(Q<|XbW7FtKZ@bIE7nqjQ%+9}ZWAo_@)uIE8EwR8tS5+ZO6a%=cfvlPAN zl;HJRMj+ZtJ-7Q)I_SMDTa^0?(g^A#fD(?Kl*AQXs%9!(K>Iy8E^&a)1H1xl|6mwa zE1DNCK=c40bswH+fw@iH9C=WSK5ccx`DW0HU0?I000YNF-zn--=)A-h$%rNtx)d$* zmOW;TgM{M4Ar|!l_Lxap?3>Z?>mO|aYTD4Kzgi8cd(XI}zpqZD@@T>P{1g*sNi1*_ z^v$z3kvFw&5q06mPXT~yaDgguz&jr-5M}z{lT#^cLGMjC|17#lK)8&@&^In#7aNmd zr$W{j_kPc`M~NWu`l=}yl8rpfl!2$q`=z%aQ*~LBj|vT3Gb5lM9BV0k@&{pGo+?n2 zj}NF3lS@Lu9j99)o}QQQU+4!{58KGl^3+pOWIF&Eoe)`|DAOn+hU5Ezi)#O)4g&WZ zx1|W-9nN8PBV}p={ECqjEHIBf8Mr$ZL}t@7z5#fSZ zW@(nC-h3l;U$_zA954jZ4H@n|f*pX9&@aaGQcH`JzX&kHZ>W>HsIb`tw5p)5zPpTP zW1nBfOJU+LGc(f-*<0x0i_exO1lNJOew%!JFOb6N1d4N+*9>Q=>#UQ}G1WIEPCgOC zb|RU0qRMLw4hD?d;=Rc$`mGn-KL);a4yP4}inqD^I5jH6Od z6_$l^`dpng89U!=0U|GYz@yo5w<@D``T*~C9+W2eU}N$%AQ21N()S_6A8umcM2aaa zHs}v`OLmugdN~s9vN=XZ8`ZdDIZ@;Qb^*&mKd!f%X9cD@$x3LV{4~9xt4q!@H>fK; zcauU4{FPZj#LJ_Bp#kY6Y(yf2JYv{cfp+>N;zCb9_57xopnio0XS&aTPw?=S zhDCbkhS)ci9=a)P%hhyX%^iFl6$cl$9*pW^b#0e)QUg3?To8akPh!nyDoxzi=h}*u zhKg@hv_ktIF>#_f2mS`!_Vg** zSDPR20C<3sNJ=wnYZVX)R9|j~ljys_+?vc~s;91w<6VE4l2{GrV1LZbt=)HIr$+g} zh-~WH=yUxf-cF%?nKZ_`mpxB_;;uQ2oZ3AMeS;;n1lqXT(0T2MXF$0So7s=P;$<;o zg{AOlzD$J>$9Q>>?Kf_FJ4))IQIvPdh_MB9`=Ph`z}@V&(s8EhojQ~}o+maN6D3(s z$qck;Tt=Sp0GUvgIutFa#L&w1&T5)3pY2RU{1uJ+5 zFoo7mSIMANp44(lW8A|GgNuv<8`+kSKt{$dz_b)GPpX)9R4CZ=>&B&%g+j-)%?Dd? z#kW*+J{6-QerdR{x?}Yd?cDFi9jW(xt@2V2iF;zXXm;eQ^^5+W`40u|;xvQ+6aqE@ zLt7*xCI6_hQFb9&C|FQ2RYVXvpffz3tDGWF=SSFSb;jWGE}oqCXU$dc{7;~!x1c9u zgw!8iyHgXYMcAsQPc|J%!QY2&s)g+5CZ}XX!I*I^XNv;zptC%@i^P8Fy?m)?WBw+a z)#f!MhSVS+Y|i_XYI$ktO5F1`iIL2ECWNbLAO2{ zqgaD5=^8%p?`ue@;t_eBLXM=vgMw5VSRbOB9n-R~Z0#^S6vjd8$Ra;J3M$27TZP;kEmY-=uhv3d`X6iHdA>tWlUPb6Ib zI6dl>Q!q*@5EM#bl4MRU-Y-7|g9~U-cXQ?DHi^U!oIO*U2jc|ox_iyVKX=S*j13I_ zPDw861>i0!Is%tIP*G?egdsn|^|Uy1zUgg6r)(>_Qd3hqG|*ZeMlX~q@Eu#PjXDFeR*#o)t|o` z3^?0V*H1U<$b48e9C4y-O9>0Gz5daE_&VQ}@-#+8Ondt3Msa2N<{Yu%q$iqY%xaS* zQaFAY2zZ8_BBp!Yl?f`G@{4NhmyGk`MiUmnU+kl7zf?3@)^AgS98$Yb2af6sX0x;+ zy%3e$F*h73)u-?mw=O$3PsitO#lEoIChC38(Tdc*PkdRa_WAMa@!gfC*5nJ~+E=84 zI17`G{Tb5NRCB;SjP$#%Y2*y^+u|aYYe5{k)poC7-A{4RYMH(@eKk%KPyZNRV4Mdc znnIXl!g|Vx)}mTy-CXSmEMt#SG;4qXpvJwVkcx@-`+DMo%#;b3z( zFsOH@%zkOL$4HXcl(8@Hac|9UkCHx6qc5M-EE{>Cs(U^j8O5LKHiY{@Vg`mP4FF#7 zufwJ79QB9ChCy5F6#QeSLe)T?a`=m0?GQl2qpcV^?&}i0eBBJ9*_xX^Y(yN%~u2m_AH{Okk~EYS+gCiHlg!T(PKri_{ScL}A2@ zJDsf_TO`B7fa8(tdOJaS*wJ~lJxBpC{TV2oPCKb=H{c@#RnO5!wg9=`VSs=>Bo&t{ zeCB6spmk($c)#fB%We#O3UQhf(Bj0(t0ZCZ-#WSs2?UorTsw}>iR+*WS_4t&lH1LcH>`|G&`pmCs2o(h<&Th2}4JJZl z9F6ar@qvIGC+!ENC^`W6{rVvtat=vNyQFRy}IHeDJ%pTgrhFLqa8VY zkIQtimwztV$nA+5QfDe$^Jfw;l+1P5?7@Gvzex)Noic26o7%^io^}C9cO^zpizd=( zaq{wN)KvgJqEJd-Df`~h`tbUEGrXN;BSdKND-cYS1D>(zXuS?DbthlZnCH)$Uuyvc zJ*)s|ecr70Hr?M6-Ss~kIyz;!U*5f6J@;o9auxt%n&n~sHT17G{c%zb215ssGMWE= z?wv(#%DV?NcOzw#{hv=6@b_ziU#Wn7}*Pzy<#Q zeC7Wd{{MWBe`1Dm&M2*qe6SO8;`>-O{HXo%D(%VNtNU(1pX#T_=n6#NR+`a${Kos} zbJEuR-N2c>3=mF^li&POzTL%x#s=ZXHaM$H|D2$oWDKuOJip@a9 z>09z^-nXw=Z+m@lA8WBgnCY*Te|K_KjKWv6n|;XVI<#~Tg!h&};7kaF)zHoH=ATmw zPk9{w4b)}PEnhAq<=|Yc8_`N-K1`I#i6(9a`7XO&0O=}o7T2pem{4OD0=y1E4 z5L6uHT$15DyBE+`T*@c3zc(H>NU-KiJe~x8_fWseosQ`lrOo9F1}Gi` z-XEz#QpUi;E0dls*Wvn)RYPhHJ4eFc!fcgZ#pZM4+-rhhshkau_ljZ{Ai`q=pDnC1 z)vWi3%M7)}nrjbi8YbTgl&AbI*W!a>F_q>jcW-&!n}Qhj$z*f2E)4vhq5Bd+sUUGN zUn9d-Vl--jvnw|zz#?1V1rvy$8`Yqb$AaVjvYmgN7*3uSN0h{Ey_zCTPG#(VNfz^=3`5$Nj78c55@f5ma2C8dEx@z77(z6TQCf zCL~bjL3t>XW1qj3w&K7%6NzHB0!Ol}cyG@kzq2EMm=*TeUvB=NBWH21ZOPxjcS_0k z%1X%F%S4E(yXU47ZTjKNpi-?Bw`RFlG+s!=^AN)NPnjj8!TzAzhEr#i zemLf#4d(b&q1asW#kW|W*j#g)Pvz7C34L5Jc&7(Ft?4}2k@{-9)7t zLdXPymhEvAbFs3tBcz&zT|H^;8o-1I7SBW$pg(I4S^nT zz~EK`VD<`eXz}6r=9C3o=$~e>Neg{z@-aUY51S+KQEZQrAQVrMZ1N!su|N>G#rMjN zkIU#~T@!?&QkCi-vx}1*=^6(2J+IvjWR69IJ3fMPvd^J!|5x`BA;RGI-fH|q1A!&d zUr5ZSkGBf{_hmXTRJFNS{=Z~A0<8a$hud%N+#l*l9R&r4nN;(?$6p_KqdxBLKZQhq zg#fMsYAf?UCmwi!1@JdEe^UL|WmSAUfjmPkMnKuZ-jl333$@GD&g^ujO_TlgHsU)6 zFscjZu1`2+fpBH)%L z+Is!6Wm?#Pi}L9wdleecGhrowM@2g4rSuRo8<15ENcNI>X(N3AjZu60@tc*Jq9gbj>H^#*9zHc}W#)2J=c@NoM-r z$Hzb_F@{*u)sK5tpWe(lnVkv?3*XMH{+KaObKTVzi=RvV@j=b!aoO$6;rAVO?Wo@4 zDm|A)|5^u^9D>o1i`{^^A`Wej!B)-pCWK*##!aN0XgNJi?-&*8kdqs8H^a60(f)J! zj}KY0v!lot@gdY@m(O#@p)WgONkuvf6*D5W6)e1KVAkkRNd-!0a9<)Ra)%#RF)eo! zO=9XTk4=!ewjyxB5bKZYhFFSk5EUpD-vBVwKd&Gl#|ijhNdEEu|GdvE2Ds(BS!2SB zKmO5%2TNa z9$zolWW+ZX4H~?vg_oivrvb(yAwIDOmb)_I7tz81bAlHuuGc*BOe^(CaY>uYH^%rX znyY!x&5wM5^LoIx;)mJiElMDpX}3N3wV{_Vbr0qBcui~GaXgnU*x@QtsH|}(Myed6jQ%wcyiGp!u_kufS!{8 z8VCzVHV=mHWBH5OFBR1t$lsNdpvutf2gy_cX)D35n&I5~cMJDkJr9NlieLTQ=@h>y z*-RW*c&jc6NGC1RCyRe=5)3C>$8&1^dpyEm0l2Xki|k9y*7VdoeG}%lhidO-F{Q?SpYH`Ma(bI;v6Ik-ru!!IAv$)=4w^kGi|KMfTqD(ctB2= z)UC7)hK=@b(^qlile;qI)$Z+0QSI&Z?(X?y6d{N_y4fS_^#83%VWOts4{1X~7l) zbK85^D@Fzb!pX2h=jFQiYI@i8+TPOb?owE&sb@C1mYCok5gLMmZ&f@_6D<9eG$IWj zCO}I)(a-zcrnPOl3DC6ngSJEtdT{;5JK!_bXK}8+=nM7v7u$MW+c}3ZHXg@yIf8<~ zRXfBTB@p#R{=&W?6ki?k57y)cuZ<)w&d_Ne16|^pcBikM5ngyoK{Nq_XL?qDfrz>>cOc zT?4g9Bt}kwe=K)|E4a&$^{wmtHbb(ub-dKn^2H%c9@4c~?s#Lh2*@bAC!s-re$?du z@*1emJ`>R4@*Gxf&uQ(G(#0Z6P6Gi|MSHmZ7r}QYkj`u;;EY(8G4I9WFnAHsiJY_P zHI!tbsq4l8wDfe=2=;c-)8?*B>Qr^9Jl7FETIH9ikzNhdFdZ)Xe{k6lS^7@tJVaBp z4b+@;wE;G)$*P@)YeK@JiZMxk33Plzyvp8E2`sAX&N5Au^sg$}8yL8=o?+?x_5Y+wF6@$;7W^TQWoUf;S&R0sA2 z1rZ0?0zSjdeXA&4xMNsit1x*Uohf_y#5xt%b4cb|-*?X?cc*dAB08xnI7F`JbI%~TB6Gxe1vw#}# zQ0Ma`od*xjD@<=j-h1!0s4cyKz!Z|T)qS_MZ;VtaCl39A1m|U^&w1MtB~BNNa{u8~ zYeD?<=6r;J7_JYLF!!Ky=Zfv*_&u)grp%3p2Yf1bZ_g=Qq;E!Kw<1>EA3oKY>(*{3 z0SLxaYNm^&mX28tjdqpi@&4BZSHEl|cDh{iatEhX#~8fSyhgNhK5n{X2yqz%gspGbEq{@(YM^62W0!4cBn>1J4kei-e=n zBgDFz1S?U5oNdL?PZPWT(%Z`W73ba#lO`(Z{V{P1+{lUja`uCvz_h=EJ5Z#+sR-q& ze_dAqle+BnqkKkdR%&o6l_VtN)&V#_)A3u&=)u)ssuOBcpNp07+3mdd|8O${-S#{Q zi+Vd9jf*ZWcVBLr7qiYA9Ht|YFRdGo@?ZF|;9hTk`EX6pT$;1ktiyn`O7d%*e^Ch7 zu|d35+U5yqSAs4Jjd`lkb(mg`=vlZwy8_)Fgp+S+bn(5s^pdt+dI}yVsqRUn5jj*% z9+)^m)%Lrro}O2LE&~DewdLZBjc2%Q5f>O5Q2PYy>4Xl6p}lOf@n(5z*-XeZaJS2a zMtEy)4JY^j_;+(^6(XS5w#8mORih|Wl3J60Q|T600?vN>!;ZmQ73DI;+OlhUF$=H| zg9b$HEQ?(#JbeVr`y>aMEUPl}#Us=C&Tr@<{{=;VL@{n%gf~Zm!^p)slTKe=EZyBK zk$eT})*ifg)(+|N__)Q@F|D0qJ&Ea6_nC3ji)0VzM7XfJ_m3&TM+d->@&+kZ9*5DU z?bM^yzLXt-$zuzlrf+L!>A`rjsp;Q1hK_9ZmjFkWvz+{G!nC2H@nQha_+yHf)SI^_ zv(H{Ib|x?~0YE4`lnAQ=*o-iqC3ZtHtDVDw#?^Iwv@8v3N43U&3(aDL>ZGh~W17Mz z)32XltZ~Lk#dmF~zA2xkiZ7^Q!$|>-`y$shW}#NT7TXtQ)QVf}z)$VeVZd>H5F<)Z zM?HFWQ@wh3EOA$N5TPJSJEz^IKmnF5>TEs$-L8P7vs@b@zbWfEY#L1#2Czq)Hear1 zk*wB+%4L`DmiGl#YSIAmU}i5R~fl7UXBUw!6`l(hIY~@ZCe-9 zVCMLTmplbZ6f+e(rE%^+;Nm;h7eLbq7ySm+%gYAx3O%!0mVbfe{{-TfVF=^_VS|#3 zcQk(^Pi?jSsb>V1YmWHVdRU!OTjAA9xa70>MXP-aJF{^kmy>ChJj!*7eCJ~JXK^HQ zhvxVo=j5(#wU=&9Sz|@oi=B^d*ILu$pIxVl=Bquwync3iGCShxz5Cjfe0zFf^;K4< zhw3hFQiWsY;!#=?yRH3U@eKh}5TBA(LqDwaHB|Bx0EYX<;|hXW5;Yl&4^HIy7bb<_ zwo&cM)E^qD_nGulM8|7IY%Q(jiEuL&BL?I4N-RMOM-;wgj=IogU=Ik}%!I7|Oj^~W z$wBH#)i)uQOXcfXjfv+0)7OInr|n#s!dwO2 zY$`N7mpDp{55aW1d7x)PrEjR0PU6x-xum$=@1LzRPR?WOXwwGE;q0p!T%;cGQIes@pOP#MR%%v+Bw@ zs1(5d+5!!Fr=sF+Xh5>iM1Py^reAac+SdaKiEW%ZH}CVk1>GHj%37&+FtD~qluxCF zM7%7=!2RV`1a3sIRy(NV8xxqPR%&U4+tx^qer|jA6Y*dEQt|P#fSLDe7^(Z$?z==U zy~uxy%JAy;)by@lHG1)SFstfqly59ZCSqr1x5QHGQq6O!ajtaAa_x8ALyA0Q4InPU zWeH$d76-wZ)h(KFvIOB-o|B1!8TR4H@-3?|`wC+Riym`P;1=|c0q zYhP(2RDaTDHH#4kzWns8V%X1Xif>ciWC;y_@WrXJi)dUq91`0heCJlybxS~bl^?HL zXqYWHZhaWcugcqx4v`w&mpBN-MfrIM6WL7XVhDkG5enN!!xRb8ypkzE)rRHq*t=cL zek00LL}-~B5lhyP3f1!G3uaB&+vV|d3YYNy?)6q$dbONQOuX-Wc8_-^br~F*TvfJV z6uRVZ%i~^T{EjXMx=#Q~0vLhW9Jc8%X`kSt(c9BeWVbsgN#zy{)gHcgpR zmN+3=E4^^z`%WV+zp0KRUi&HXa{b&+5Y1vH;$wB+1e~tdR7unu<$`RK-(;em_C_-0 z=tn&<9{C^~9$UbFcA%*jvWfZd6!U>7E#C2mD_u5*AM`lWX?~Q*9f#49hscE{@KpS` zL4o;R0^%R2Sy6USYG2LVBLUkf>x)|P%j{QWUQ>O6{FCSUeXXvhpu34xIo-)pE876b zenrG7j&fKj>!Lbj7kV7w<5*j$tjAQpkQt}s<5;n@O^zVN|8@5&fB1}m4RZmXW(d~} z?0zf9fWXR@@{bd}eksG;$l#N8R#gfo%hJOwZ;~*FEsU^}^=c(3gv&%-Xe}l0hLDLr zbCHh)t!xj>gyY)q%m>IuwVx&mdH~Lyqk}WE)-n?7A`h>|viQIIeGTRhyg1H<5K>e%xp!N-~L((uw6VHxNxa z6Mmstq>~;u(P%g&BnezoVCefh*NF)n<~EBAMn9P614G7coskxZVHUC%lm+zLw+0@$P*72b#0h;e&=Hk5G=|m+H4j)aP>8zsfY?f|E;B_C9crApdZ*=xb zk-|3VVU}BDut$*!JR%zB(J3WmR4-M}1V8wO4~$oxS?XOKvK=9CL-0U&DwWWI0Bv(x z5EsC`5WxGk}`miM;*%nau>Cd9E^EXB&z zr|Cx?uag8fqxF2TR+?dPVb3|+9TAHiQO#u{Z{F3PE1q33V!1QCv-{_e0^Cd&{!=nf0(vtJKI6Q1vIAYI2m6KCO$nHm$2dhz57=y|}0`ojyG-0N)hE3A4^kyw@Z^W7Z-k)3aC5v*24e zS<7?yIiN(Tx>$qQ@WWu73!g3#oF>isisa*1Imb%&&qae!d{hGV_#=^xZ$Hb`i@z~C z9X;kwhpGQ4t12J+(J#!{0+lrFQ}Bt}+E}vaa+SD6$yxN-HoSvzm^f118Ia9R+=zRq zy5S^eE%KG+DAD7b;D@}slIpTL&k$66GmfA;T=|@8cw5ShJB9Ru&hgSI3Jkg-1z#TwyxpLBl@?GU@oDI zsZb(f_-4rVo4TjKRj%XmO*2n_qaNL)?g+Kr-w{?(DZ`-w^;++_We@uiVur((+e?$0#D(okpPaMbQFHH<4AwC{Io$##nEf3 z&qF{RS4htA7?G4Jb#Ggtz!;Ul6}x_fN&L0k6~Lhv0fI$B4Bcua4T=Vjh-3?PqV~Pe^7?!tP?8d}UrJEsKYD2D@GzrFR(}-2qjj>J=U> zTqbO(vyTw=yzUE3KgnJ**A)3IAAUhsLP=Jq6Zf~gt6l;$1K{-PL~()wB^FBe+Rmo- zHI6c+TtR5@bs4~I^I_mX#%`$Tw61$5-ncvFW_!f$^BL_9c+02e97dr@LB0$;3EI5V z3M$Ze)}FqeZsn_EfFhSym+Yqt0)-R}rS)6oWH)Xg<9*LD?`MOjQyCOf3R#oxPc~r~ zhXpjP)$v^YS&fk(axGuo_=v)a^!mjTiJL9`)TLRkk;JN$h^5_eR~)=C+WzPN4Jd!| z2SO|rI&h6FT{>p}Vq=A)k&^(N>+y)LLzRK%-TQ;9Qw9+(uetIhQS=o+hYLXHpY1H3 zG+83QYI;#KQ*Ayh81Ff!Yf-i&Iyo1toB=f-?!eYp;mgmN$u@Dmlr8%103o?EF6Zi! z^`dkp#2;B(|2&MHV_0hu3T$>_$~D9soJ;HryaLqDaMBt(Ea$0ZU^g4_k6{sulSmr4@CM=Ze zuGRi1#7$#=0o<$Ce9_i*B?4@gy1L|Tp>YGgH#iwZrnCUI0*?32ZUb)0z9EJ8bNS)N z)zU>GGYMowP^~dUtN9UerItZ_c1V*7(~=bkvgcjHg$=6UQ_U&q2!7p}6Sqauu_>3Q zI`h>={5V6-jb3Wy?Sh*gQ)GqQ+X)(URCl0mgcdbrjI|X|lX|7T@zmelUR_-lv=@R! zcV3J+AB}pKj>YA?*#q=mI4O(wm)9n+y|+zmLNEGP28)kMMFCph1s72whI^%(c$FpV z@^y*}r_^~b?3kFe@3rpCKR0yfr-?uMq_GqZ&KrQAlt_qiIwz$ZdRgfP?7u#ZRNcs> zPiv1)3H&%>IWeD{O6J$@2#DHyz-{nuN2Ta9?|m-&hA~Mx=3Q~P9fkbStQ#%=lIj1l zC?EJ>g|eUM%kv_&>Xqg=WC7ELK8H3>u)5Z(>E{X2y(Ud;Z}hvIdQx|;yN;{%+aPye z+pfQbx^NlRCA^gPdOEN1V#;*K(hdIhk%WpFOjShM#}$uNWa<)e zT5^R@>cCS4e&6Xn6jU&lH=gXU+KE_E(glkH=QOKtP@kgZ<>47Tw5*&@Su_UxT7NY>ASPMPAL#h zI0Me~dbR(}j@&*bq~m(K6C%*=M!qjux?R!AZ*Uv?#>a93#{NosFu9XlR!H<{h)#yW zO<-;_F|5&*!1=& zeGibi+@0$-Zl>1e-CL*L8??9snVyTaZ_ag4KnKM{$g7tMBHJl|XL-M8m$u{V-jd-C zZw}^s?f=UXv;&+*VBoB@$g%=*Pc*$)SkHR1Hwfsca%Ik#QUtncAhGeH3%KoPv3*bV zyE(=87?(vq&y%VA0oijmP7n%QrQ5IuZ;s-un;mqSHi5rWcj(PTK5Fkw=He7=L4PZ( zaDxBPcRpZE@9CFk{bXI@7Le;#ZN_iE_b&kHh{JgFRMsf!NOmT4(UMU(uw&lFJ>Gl1 z*P(T**sdX|@}?r5Lv&j+zI^&cIeXAoY+q3&kjtd%?j{U(%aZPPIpNMuWh7BNEP8OA$F4m+4^p_^M}^ z>#A-!?ULIZp-`EB`jY)Ommx*wKxDm?KUKB@e>@rN7QgxhJ9&@^cNRDHHTyX*9Ke9X z|JARlH1^LNBOsfY`5}n^^+~GdD=d2rtRZjQd51UNhM?s3(cWPj1%c8F_IK+*&JQA_ z_k8Miq;7}c8g?~~{D^n?ZYi5OkNKU|i4g*Kw11pZzy96`<^>*zty|)F^*2d*2SRjO z!q+;744M29^3vs3_jyEi!nFK657&2y1IP{rM~wy9eFZw2>0<{#ey7Tfs z-JnKvKr-|n$tUh)M0%33tv~yZOq&@DK_|%7O$`hG9%AlX#53YoU)uQ*_|Gi(k7rc$ z454dF*;i4)#Afzm7)9*I)cWdJTz-RMO510QDfqrlMPXbNO z27KY1zV{;ZkD)}Qt%2|U?H3thj!@}e5dHqgzaKHPM93@Tug47M|8Dg6W+ah;AsP>$ zudw*{5wfpD%%$b7XY|kit?fUWvF!$ih%CWM_wPo3>t0M?Y-%7jE!gQa7IQCb2D1~F zgcxJ`hS0C8bGiv_zhi{sg%&Gs&%eK#8eV*GZ7$Wg??R(K z5SQybB?RV`SHr~H+Rn=r?v@$^IGT4HSqMB}~;(t;|Z z`R+9(S{f=ILE=B0_w{fzaW;=uSF2HE`>zFEC0|>%-8@%b@H;mOwEbsv{cY-=Vo_XG z7%ni(5?9}AK6XfNe!pfqQ-JaFO-rA{M+gU6r-_*t)Q3^Xu~%V+-`b|exB$3+ zHtSuLAep{h(k@#E&p_Ancj72T1ykv z;I;ofqxGAyIivE~IbDB$yQdkFZEI}f9=`TlP1=&B#0Io5C zwrpnh$Fm(I^+_1lE^{^;d?CrI67@K!8r`i84I=pYuPw|)JRS$mzLIcvuqz6jJk? zUh9HZQvt+Joc0dI+Nl5Re{d|=aY!w?Z)b-wZll*8L~NMUcAn761}!*ZZtS(dpoM`{ zvweZL%w4gf1LGU~X+&t-%Lc$jvxfn?XqloHrwmoRp5rzF#iReeu;%bc#|wMJ9CH~S zddLRqqAtgM{9)!9UxA&{RLg*)bLn;@j_g|!$cR1%)kSOd-%E9zu*95x%J;wcp1)=_ z@Yi98@f*YUd>VcLk=_?=j=kRTCI{5{_Z`>p2PIrC)Sh^BN+COr!%1j*i*xQC#bf{8 zuXm=Xj%}W4e9d1N*9rsW`&g0tRiCqif4=$dKiT~=G3(qysGGFeh$)`kh&9(nCq}il zdgwOxNG*ho-qUIK%h!dPftIJQ2bUAd{)eu8wFvYPiuNe{?u+CigmwP1&} zx@ee}D9yTu1sjeBbYWxcVR77V>x;my0ub zjDFJ_GmXpTs0c-g*viy0yN1cyfb#;pBQsg_`s-^+;p`(1N_k0bP`RFVTTlqhr< zu)dADV7*?`#@boStrF9(RwnqL{1QJNYT8KB{hK#s>`(ML@6w)&#&hqYd{Uvu0T%LJ zb*c@~w<<%M$RKPhT7A-g?e`bbaoJBt)}!;zp3iC2Nc*n7@SW6bEc%f`ZahMV|7d z!r$MjeNGj9O z-y(HLrQYXJL_ZS5Bj=!?=#Rx&x8;2@WQQJ}fXwx=cZ)?%jX8x*Kt85%lPfa@Q)b@c zUQ;HmxF)DYuTLuF+i*&JkVUURdcbwFq+>+0AO;BDq^~!>BUW40YGc@9(PBt*2X!Dr z4~b;LnY0QdfLK@PEwOURetu}sY9DH-|M7DdHP4Q8na=m*Ua!V=!OHNA+=@XJWvuO^ zw?oG$C<)sJ`bfKyI@O7=;3xVX}!gYn8TZ_Jt9cO4&A>D@#S1#~T$~A9>bN4_wV;A`3SaJ>>SVm2AnVd8ZNpnb#l||mFa3j- zeFDd`QvW4{ed}U5{p?!0Jv6(eL*KGE65=KPHIk#0$;iPw>4nsuxpX2SlXawXvqT%8BN;HR- zxQOYCO+lv_9dXfd2l(^igHz5A0yY5uR@lv9u`j8DzXTMyRZGrb@Ub??{;~4Bn zs6>JFjBK7(33=YhPvHxJ}oSAvY(@`fM0q~z>`9MFv3 z<$FZ*T6+1wo%xX_B?^(P>>xKe#_!e$*;@`Cl90Lab^GsvR~T|Y(-j5LM^xOff9O4uoeG3}2xHu|3yr<#rcO<~l5{OTjyysYj3O@u z^@y?-hkZt-klYJu@MqKt(s~Sg67WQT%ITrww$;d$Z0**JlKCrf6V|c?9-0O+5Z<7;#p!W_WWyc(sfcX$8eu#@;RG5xL0+#F}{- zg`52@y_n;VAbLjQPS5?2&Xxu zBX7{L)3ZT>h10mI0xJziOsbeNcjU>;XpC#N^w&f}I`>CIu^O&kPo*V&^82PmX-}3# zoJnK{C1rV15etkNTOSKt6VuOS?nZ)h+Qn>>83h~4VEk2hW+V6Yn)-J61J(&GJ_9<7 zRawqj`VlOzJ>eKbhaxUJlD8jvpHe`K{et;e#*;i-QPj&q>sI*vav|kzg$BHLkPWV& zw3@W}{vqgKoN#jKAUd0-h~7~r1%_ zs+P}a?EwctMrWv8#X&Fbg>DIF9HA&LC<*Y(#&B+P;(SJuMysNAmvY6DeWYEcR_ybr zPo8Ii>=E~4v5E!q<>odWi2^+LRvN9Kv}A;Lc9 z%7gQJ?qM{JTDYG1iuXw693!sHtxaeJgla9E%MW3)>T?PFo2)dPDHoD7Xh5gtn1v+G z1t|Je+0Qm{SO>#VvJK6|$0q<+L-=g>GKqOTf_if+$P%4IK_hM+E#F1*F2nKmKv7LtHVWr;;2 zbH+mi(In(YZ+kW8`z+%3AkM{!sc7)8!K(Z(gr;Z32_JSa;bhWdUw8%iJvD!CxK&*+ zS^8Kpg$|g2&qM^adwfhkI7!NlAVGoQuEj;prGnlf<5LeJUluy?#PF_ysrhNcJ2G_A zyo^5=_~j%vMixAc{H&g# z2*u`N*a z=6O=5cr6$~q4DNAyC5imT|L9ZdYvzKhd2AVt@Ujz%K5?o1*u*qeU61U+d$Xoh07G zQ+!>1geTw}bvhU+AtGmWS8~SPhjE%= zzkJut2)lQ#ovr907{RkDj&L-9PsIbXyi%K(w|H$(6qt)E8#8Z^$Q&>v+=n`t=q)=h z&wy}1I{1TNVC9KQeNDfnk#Y=rhqa-Mu}(_p1u4Sk#~?@6sY+%6J*b_q5I2u2MgTnW z``cnOu>uQFneX;PbeMfV6^f;WOMgO}(|lRu+Nh*@3h zZJCc8gxgQFqyGUR?cX*k+DD@g$eG7pYNl8Px~1BoIr~Yffx8cBNArP3EA!9F=-Jd`0Q^8= zkz^HT%NC4)giC)y!euV@5G4?Ub43Othw^D?_~%4k{-SU%F|$;1w~mZ7%N*zb zATey-FPpfl^3440Kia){gvRt`2v6!i$%~pLj12iw3oDR93YsT^J#|#DGtdMB<{!Mi zQz^Vj9wE&|%5FIfka5yx(q3eWQmy8cxSMcL#ZNDb7gc}w{vrmzfdS+KOU2Su!s^ua zq|isF34;+0;AqwPQki+E;E_&ZfYbdJnnq#uDD3Lz569oyKj}wPdn#+2FYHWM_@(8nn zQJW8J8n)q^_|=v3E=A%(dJnnC-={s_KSg#gtS$fEH>F?2%!9t?R%?Eu zDezFo(Yx*t$6w4(B6|TG*pfG~X3qz+C6jQDI?&=A%qpXuB4h$EHX*wk)514Ao|e)! zL2k5%l+95HTf@^-DFVQuCLVdeQ%NDQ0#Eqknr}lCln;+(e9pk=QZeV|f+t7!sI)$5 zWq4^z4j>mNwowav$x%qL90_XJdo)Z+_!&(Pr{GEni5{v}$DT4os!Du$Cp^dRInDwe zXRw1foZO%avn+g6o%h;h<+4p5=8r>4RGU_O3Gry-_~Pxq=Km-9o8%&@Z<7{V)zHZH zK<2@lCN@ue`v>^@wz9nbXn{)@!^Qx$5A;agk)bX688}l zgb)B2(3pRP6*NcoQ!PlXT_gm!V17mu2Hd8z^x=l~$$;V`z-RmtcNFNa4T+XvR@g(H zPzo5rkF!91iaf1t{W9?aj0b?Lxl`Fg8{q@mkcoY2ielb1ODPIzA!l{L15pqV8 zvw&AfUw@pJNQ&U%&tQRJ6tYxk76q&KLKmDK3DnLNh?HitwXY0h-S|wLQ`D=X*g_nB z)T>drs*(Zw%AR)8edAi_%cVd&#Xyb}Mnqv0UJbfJe z2q4J#uRzd`L3(;+s57IK{W*KLdaX25bX&p`TyB*RAeNaKbZR_Q5t~T%MZ41bLmfdE#&-UfCgzCwY z=)U;iAtdWT+&5**cCuC7Kg@CNdhwV_!$qaAz=Urw`@TfB#`lsc?4b+2f1Zzk;mhS) zk;=Zud6CEqCt_$`RI~mLO~noaIfCO7^7P~<{SiO`Gj&O$$fRX*vr{7JGk=XW6%8W~ z=fD^7*GL)fF|l_aEReRcyi`D=99uVy2I6S9>AQyQbO(eyP%^$ zX02BBdot}l_U=zet7KNf{L&NKGxQ6DRZlQ0T-x+xw21`{XQ`!dDL|wUUW6@{$&{&x zNzFqZBBFKxKrgVvJZY$|nnNBATyd|~%ZaTTgrq)ZtQi*UzInt%K!sz0)q?xzy$N<8 zjex9CRHimx(_H3ryX6gJe7dIR^cd71acXr8Zs2=L~6l%Ers>qxK3L}n^Yh;j?g;#c+%EgnDkW? zHU+(SIf6dc%dpQhJd2Ol|cd5IOS5WuWgT`(gMzo3pg#0PT4Qz4Kai!G8CMY!w!YDOY^5 zhxV_hj?nkqalBNbpic$45*K1x5=!Y8j){lHMd2*HppSCLj5F0=3$H7xN`jckt3C??Wt)3|+DY%I&LYG6vgXXWSL<3IO zQu34JQ$!d;S?IS46nuv6tbhe@7O|;f;{IX*fPjcJmK_BWikT~r3AJ&Y-4CSzR;`6) z2u=@&4vm1+j~Fv5qA8jCU&L&mN*sCiT(3Oi7?ORefWqKvb!-ZSv||(8Dn>}9AiyI2 zDalL0ka-k4!m>MJMz6hdJa6M z5n0*xFbm%Qo?NGCc#$4&F*lM3?qx5R&wm|CT8Izey)3y48b_R1D5~)KW0HQZ2Y#)J z8ae!{80_l`{P79obbWFNR-bTD8;)4&nthrvwC=KMB~%FTaq~o@*d(ZVS^(HxGgH0s zrqLt!qcFwuCQT@H=VkR1Sx*CjG$o49*CIGpVON-#w>9z~?_{Fp@s71)HBxfW6G-M8 zW*4?k470x{xH@KiSOdo~P~iRwgS}9ygEdu}Y&f=VJ-17mS+cJqa!l484a(k{yqnz_ zT{nb>y9Tgh*YDS1*82o8kWj5iBiBW1U_5)GZSD2Gn3XMy#S_?)Q+P*T6V{{1xN@mGE*Mvw?96(wvRVn%ajkU|V zVXwpjhTlEHPCBXWMdQ@X2mJAmZu($x+RQ+eh2hk%F` zoBX~Y{w7)EDA*=!a<=`tbMO1r($k^38f3AHp#NYp288PK44GJlg5B~kT7Mx_=7wt| zqZlgaT#PPKcJm_fiA02i2ODySqO6iM#*6YR%n|C1Q`#0qjWt5X*OI;cLq%k#2_+E^ zK-PZDZ-T!ILb?V+K6P!pC!}x&G2bl1Qg0tKB#P*DQTJNLow(noQ^Sx=tR}5Go93zAlKpMeZto2^yk~so0j5p9`yof{d4`-wm}AS}lJB`iKh2#YoU zC&FTxJv|Qpi-u38BrCGn_ae@POu~b=ae#RcxN}2gR^OowrpE!yeUs&@4;@xSLD!Xn z`wjLBEotBHH=KrT9aK(N*D(R^K@)u)>py1p43|U%D5jB)3kP?%8y8;bmdc+#vXd1$ zV)hsxy%kZSpkU6O(HD;z8Ru7~$i`ml(ugnBJQ8QD&eyybB-w-1%TYcv#Yq_|yqe>*3>dW66M~xR@N7EHFHfZDc&GvIpE7`q zEriI|$PXi7w5>2ZTGT4IYY6F)GT}^v9j~Q=l&nW8YN{OHL?S04*Md0;jKHUv4Yy89 z`+{?K3Hz;GMn0C@9aC{xZ3PmSDE?;?NG}?E*M4p^pZl!(D~F38oGCPSfSA$4S?tGr zGAad|A^Kpaq)Nd#o#zxa;QU?ZJcBkUHGpPg4j=cG<=Q&kbNn+WHJGJjT{hbg?w47_h!*n<>K0ppAUQuEU#;x+MY8&p5n#T0OfJq{}3-McOq zbbI_44kLrncJ;53PHa%O?nyCdSROFj-1lDrddc)I*PxK^HSh|lOhIO`atIsFi5N;j z{0ZQXEc**ETA=}XS@|FH3=QI9^n`G=o*X*|{Ia9Jtwi!6ekt39n?o+!7_A1KsEmPz*I zyFp#Qur_jh7y=OL0NzH~c+1Deb{{73%4clHgoWSHFoqT@E#K*o&CDcTZ0uS$b z!t0tD5ghshDJBh-BedGL{Za@e|0!RCuqLlKnOwLOD;+|S>z4YgMTEddlon-Gx?Vo* zz%juz0w$ic>hs5ZBOyZsnwJmO?g_m~(8I#O_#V!F$k)mn3WDZh5&ImBdbp%?_ig2i zJ68{(M?H?7CrlbTWs;68;Z0<_(R+5H#nod1V_ZasF~fS8>NB_yc=>XLp?*ZLVzM`7 z#Y3Db+VA0?N+~gpqE+-Wg3Ii%M9kNk`9Ddt>Pi!f?q!ZlBWwGn1S7|}WahCt$Bi$R zi6Ah?y6)KjO~(EIm2CPadImJX*jGetUJlb%73Bg4nxU;BlJ=EVYT10_a4@vH3)amR zSy}|hr^7wMEz`*_#;6YR`+?0{KkZwtZqswNpI4=r?(!tVN;)sd&nL!iN4)Xw)CtjW zlZnujPV;z$ZNukLOK#$Vr}fmk9(~KFIABsMgTdsh93=ZunRTzz2!O9AM+RyM5m10} zlamp%PxD!Vh{Z)O>+WUni$8@{sLPbjWkC4TfHr0VwxMP$EL3W(PNKz9UrCHO6kNJJvk?D98q6U_Y7KWXRp`l%|lgYSs>~%(?l^NWQhM@e}JFa+v}$bYEa25QPn@Am}Wqk{90O( z&WNL9;!@$~$bD5VUCxyHm^+0|&m10y{GXM*HYArmP}b>b=&3*@SOMf+b#?iM z)1AcBE4giy)3C*R2=++b>lf$V_?>%_s4&>gc2tk3T&=Q8^y=C90w`zymOh@}h)LM} zhq;UU`2z69#B>oy;H*00g}Iq3K}AM*rr~UxWt_;^jr7Uun4NE2&239=hD{reKZiyc z-SHN(Q12K!WV$R#5G$X)kNX**Y`QYu$kfulWR?NabdMo9tboc8iBK6z_M}tD`J6W2 zW693Ys5w!1Rw@=|Bz`P}1rt4(FMXQ6!}l`t;j7yI#5k8V(&ZejUEewxS>(HF1un@h}egbiX#fy+GtB)_dcL z$$gS8Kku(k_a0Qk1F1~vL)|HcKA-z@A7%J2hR(LkcEF}5MIKHLREwS5?ro{JDu0VR z6WT6Yudm}g6?SZ8@s_fhzkU1j%{FJ-^?I|S_#kN)j9R2HGTh&1e``;njG_K{qXFO@@|zeb*-+%2#j=v-}H0tF+%n1BWPmnl*YYB9w+bJ zA8Af*Gi7{dME!%sAN3E^f7L(WZB`CvnD&9%lOrJ;cq6Cu>(*s^T-3oTo`IeZSaB3M zXZ}k8IMEZbSvjohvO><>4wM@|s&>>LO7)B-dGJQH7{30E6T& z{d*3Nr}h7-{{8ML^EMq?NO($(PVwTSgu?(k#CkKqSI~$O#X(rvNj!)IU_vq)IO^8Q zglO9+0#{MLO1{pd;+8itgS%t>elVx*Y-^f?{)eM1S?BwPAmMkxmRr<+_WGV*9M8|p zR-SQoZPlQ)l!*H2b#*!J26=xv?j(u>z{dW@BDrr8kMCfQs#l#~>0+fR=+mq_lc?vC z@xt?_7r?o2H&8j#mmYUNsaB|N%`SOmIu|^qKBz63CR8zdKs(!-MTQD+Z0=UJM~G0q zM+HM&V&3>JZS9FHN#Hiz{$Iqsby!qw*ESAPp9R||fB^^=% z(juXNgi3ceNJ-bw-NP`<%x{nP`+U#+z3=_yAK&jdhChVEz4u(#y4G6fTIad;uZUDh z7^Ae@(dp82@orVd%Mh=5o)I~qmPL&th0h+S%p8WI&J5`c6&%vhQZ!1hG)Z+!0@M+ z0&Xh7^(V<#JLexEOoqALt}-bqpslD7b9$Ue?_OQNoyjKQ8aw>#NA{!!>}D*&vghjJ zTQ&v#(J?uu8z%6pr0Zss5L(m0x7Y{%M)Rx+#s9+TXy`C&cM%>W-0t)?zuvtW`k~ow zfA;*K>=5wWog4y{73;ueZyPP^PV^Qpf%xr z3N)M^c07UJeWX6eW%>& zv&?Vk=~bkhH7Giv8lc?0e^je$|HF-bQ-b6oC1Z|S(vP&|X<%RrJe{+a!J*}y&+qUN zS5ujd_+|qzx95?cgT8ktQzMiK9pcej67SM-wXa+3q}+S^T}ZYw0J_!!v_3Do{a)H( z^cfYEXIOyGYa{BV4y$6ziz1_h{oM^As9@W@ZIg2gzsx+zU{dcHnCc-gqr~Z`as=6r zY?z<_?zhDd??~#G`|Y*a7B9r_^Qu@m7vq=9Q3=L-Zth3!ancv@2fd`(DlV{(sY}15 zj2Crf5#bEeb+USXoi}ycD zgt4+5PYrY>*pd2=pRk~Wz{#XaKPKN4b>B^}@42}~cee1@gIM9$_T<{RKqbcEh8rm8 ztlL?D0z)(UlYKg|-eKw>|HO|8|2IEI^8YH4Ceqwa1~h}>r;+( z%3TBbu?H7#d27ki|1>@Q&n^uGR+XiY;($L)2| zy5HB9m5*dh0OiTBxR2CIN$sYfpXm+!^LOuh7z3m~`3x+{OiQg)N+4t&$NSIzYAe*c z>~BHG&cR8lESL!{)A<4Hf?um))fJS@g+2iLKl66zm@|cu)x6e)3n%p-?=BuEEq9cd z_&VgsYD@g&COMgQAE`+}$MXS`4?R*+N6JlfMNCp#-Jh z1FG#Wp%@-77QUdGE+(S6M-SHAQ@;}`-dFW) z2D8kBKj&2Q+E%6TA3HhjzX`s6x(GVX!Q}!FDJ7#+=sdtvMB&~;uE0l+bD_BaJL%tu z^t7N64o{AsOSn?m2-Rn$C{ZQV;m9cN^BoJb+a{>0%T!E=C)EC}#8+y&pAhXNui_H$ zQoH+yAAHNg=hrb$?k6lU<(IZJETWjsTZE5MfWq`04A`K_6d<7*pn>PX*? zB3^oa;{*!yHC9&*;H{5C-6CZ0lGEU_?s5+Tzjk4PSEZ`<@RR?3#FAk~Wo&9HD>rpp+JQ z9TOzy&`>f}z^eR|+o3tHurN+0L6@bR7p6f`m&D~>c~G+3XD!y=0O zdACNhzyXNu6E)P>)AZX{jG->;r%`smO~))JXhStq=ux6gS8~n$6&|3M1+KaZr3eDV z-rkwtZIAg^(JcPk;DFWSsAlQ}zZR7hu88B8o|m71jE05`rX36Pe1zpZ1Pz(uo(9j8 zcYyU?jerOLVxp7fAS|Sj91MidRleR48COi9m21dp=->t}mOe2D3z9X-=ud(^>`@j2L ze%E)?Mf3L6Y8F2T=>rRrRLNV%m)W&HNa1fRlI*TfVzlH+B=z1qv&_k>cv)iXlxsWM zBU9p;2mKWG@a9NBH{*eB`21XsClG_#D3H?t)LrCvI&fLMBisP(i! zWXcL)*!6;UENIvJ)8!)sQi#Z=jH?dXnVZ*_-uH!#Ye14sjMukctZKr@ZYY7E|er z?Unr=wW%I7f0G+Zf~36l;q{mxP!3anbjBPX*eN92Ooy*+>tOYx|Ev_xN)`$j=6?NQo@Oy666Q=(#^oB<1|^1z>W9uSB1 zB3l?|w}xLW`PCa&yMD=}z6?oR!Iy!%z~I*u@kWOcXzoLK&yyEf(g#QHkdT9@YhKH2G)hGfl~1h=5Q57W+zo(;#dipqanUx@DW z>ZbdcTSO_ciXHMy3E;MfwPP`K#WMc8k!BF!e|KN?!Q3t6yC|nWV3%e2UtkvwkGFpl zRjcl|<|V(?X^Uy3p_F)DzwcMrBnTx|umkd=7z6s6>CRa3PTn|HO-h1WobTx^N&sT? z(d&0uZ+k>^e3zdogmQdk#aL|&Sn-)sHm-$BZpgEK$GbJ76YU_4joZLi4 zx4}Lm`3uN{IzM+Ab*x}Fz-As@hOq{$#!0|O|Dxl&-Gj;%! zGTa;j(3)yFi4*+tl8;U9&%nANviulZ{TJ~+x1}kDndC!)X>ajQn%w~H;RlpG3E$Po z_Hv!3)&}`kNf-Mbjk*x5dk3FZJ%drdmn&>+70|AVdQc2t-{N>l#eV_cu}2#MV)XcD z=rk?O=l!PcDyc7iwQUx2k^4wLiU-O{0Qg-Y@XB^P@(etG*__->u#mt2a~d~g+!onx zk+$LxM8@$TQ@m8=JjCd`6n)lWI4>WQ`}ePV3hP=Ktv&)QC%E9?Gd(>E2elhD{svYa zjy{IQg~t;01?Z$o-?qr7IY^#Z#Wd-r=+g6X%>5%qr1_X5j-`pj-fEMIF-d+71*pu0 zfid{%7{1$C6eRfRniIf%eO(ZMhpNk8BLjwN5?^)ipb7y#86wV-8 z=Zq5X+_+`hacvhi_5yfk9FY6Iy6m(x*<01US?<&GrF*G~wI(&bAY4XG?wxW%Txk1B$yn318?3bP*;w^Ap@l&Uav1y8Owyz)5^hIqr7{ zAL`!|gjyYu_1(lDc3iqztYZ$NXz>r6vm0MGVXR(Vt_i4}ZCZASCw%L+Idftks<75= zm;*&5u<}N8>4-iBvo54l7#t=O%-MJo-yDL0P|!9ZWXDmuJE4d;x%8$ zHYGJX1i2L*vQJKO5z{|i0+zk|Ig|lBGn|$*{mW;SYoL$5D z*FDnjWKMge{9Npe7fXl32WaF1&PmJkFoVlHd@&p`53N2pN?jV+zZXfs;PPhuH2v^; zG#yj-7(wKredgYP_b!q;2Y!fN)ex>W#Cg75detEii_wonfG%vF8t%9W#$S0Ut#aT2 zJCR>ht0=TiUfo=k50=dYU?gWC^OMSV^cv)_;`rxEJjrT=U-pkbH@8?kBRwHbf30AJ zgl?cwYv&1m!rI#bksq+N|C{Wh4SOc?n&9C$^j+A_)%I~;ue1pm?Bu2lroQgLIf0Cx z@=3x8=1EQSj}p$`w1H35S&@k_>Qm+nEOAmcMj7y|~bYbGhH`nftL-Pk_ zipSvP$o{2Bd!fa<(4adHfEE{_clf8Pln#^swe}(g?=DUW!@n%7Q=^fWEQjMiDky8N zMv(x?JRz{8kdyCxNO`U^=qhg$=~{B%_?a@wpcsvcSMmm4HS)D!^#_q*f}cs;<5e@{ zSYIPl*7hR(MD2>(#OD(X9vQ#Th0OQ8FPL$xwzK54JaS9Ecv(9KUVzvOKvv&%$#QJi z7j-keuIXvutuaVIWA~$u8n-H-YYaE_Cm)vZjOc-SIjO%hZ#{vW^B??l;5J!%oG$^W zt(Sm#WcQCVaXdW2o6ee_veSRGMV<)3s#u?Vj_g@jW4L9Stobw>y$Lkz&?A)vmLKoV zAcu_`a*#VwPwQ^?_^;f%NXAnJ5=Viiv4RxP+K9M)Uw~HW=e(hqOrRsLZXy2<(JN9A&y9L&HO8 z6b^8J)|-M^FEM%Qr;WQqyw!!ebeC}1&fYT0$Qu!*3U-d5I_1u{&oEp(bxJ4DAD-`; zZy)&18b?H*WAkUTIKe!8BRcc^3PIWRzj(g98x$KaG2GwEJab?w>hxt|FvpF@c!8bS z{YECbN`3;8Ut2Y#@xNAlee#^@Fo{P?YA*=_)E5G?_;3Q9^kH#04#vZb`X+ACqTk^* z-0iP%cxkFvWp}sA50rPx52~yJMhi_}jE;yMR1fGGKD=>fn{?A;7brNZq?mHc7WuPO zU5j<&ToaN(Db}f5j`3m5@u(P%pT(l69zXaO`z|5)UjAjJ&5(Faxo~L3b;cI^tYZtmG5pr}0Ig)-fF{aZzFyR9)5*AKTlS)6g!{AcVHVISCL*`t+FaCK? zF4m$60S!L&lc^Ui+pz(_-%Gju(Vc|zoGq6bV?%z6gEHPn0og7dz^o4L;|~hh<@Gi{ zedZ8PJY=Mit6GH;0d76K$b4c<>rTI^@@Tzh$@`;m-BW!~a*ZkKLKU{npT%3kUZttl ztz47PLy_&X%y{tXr1sJAdRS@qJquWcD4Ofrfpow`^U3bA0qEp$52mug?Var{+C;LhfA|EvWU!aP7<3#A$QDQm9>8)Zuiv*q zuQK`5HJOXWSh(_7iEIJ3endbr1JNfO3`x7VMWBNjLt}i50lNy&54ZiV)k3B1F>0RC z8|F^IxDptXGvXFyz4#8B5n~=b3F5b=AyzvT^ zJ-d`?C3iOWUT}LeFScpAYoC~^Wbc-da$ukMz<@7DFGlo5mgB*@-whbVyOFgmxfA#hw_ym zfAC=pAQU6Ct;8VR3*Sr%eJ%bgZV9dX$>I-EzPooth3-MBl(m=P;2y11N`2Q`yswx9kX&NeqY`US>&O8C zjmV*i`mpfSD6Ujl#lJ|I!{m5dT=r)Gl~_k%1DgaW#^+II($MC$XbJNDVPxVy|9`37 z!6dQksixZw*KU08v9_d~m_$ALi(2~VJD-}AJ!9ungC$VHYxs;qmjMEgD=isBa2zM7 z)sad}`AN*YHWN@k+@8FJjj12zh8q9_2mT--O#D!K)oKwDV!cK!^qN=cj=^(-yUvBL zDl;cLN$6}fhOb#r%UMLOQt?^`6rT6h+@wM%olWAe6#r8*(|3bfve#rBm*bq{4r!yU z?bp3WH*@koJ=%klN#(OSbN;EOh4kR$vwh^ZNcBW=U7T`A9P|L=9ks(6bHe;B6gvv? z2iXdj5;__`1;QsyhKK3?2I>rQ?%QDBdTldT(v!%%t*+@9Wk^bOQuh0 zsu*P=lStIc6BXV#3ld?ovIH@4vW@UdKzkbeFYPH#yMn60eI19}y`Fza-OA+=tXztW z_!te1ux`gnTw`@N;umMi> zGI#J%4ESfsheE%^|I+s`c1eOB?QnfG2B`uVhEo;8BL_V!l}eC*rW308SDUVV31^-* ztX4&8Es_P>>~0j!w83%CUDgk_`{zzsEdt|3OZ^d0RBCQ|_ZAlas)K2S^~WwQk2R z)0$&l=wBl4wB;MsdL;NZ4*1^nOuu3tNDKVWMcl0d{Qr&d$ZTI32|x)>th1z7+MIBCDf-qtg{?@VpcbP&Hkp~1}Ef|79zrDA=RaF5(-_yH5;dmZ)o zcs|>v-rB%*42ZBbQH{udQuusS;_hTQ$i{gd#(hrDV#9KNHz;M}N+&)Ys7wBZQ^RX8 zI91Xw=8 z*rfT3#eNw=otIc5=#?xi=d=P0uaG*y4=woAJFeiJnwPP91f98l12n1VJWV*-O&YPuyS1aSyBFq{|6`mMRD11{iNH@Qc5rZ?Y>BO?K z!qg40j{pZlM&qo|Sx_tnBmrfwQtD1O3X*(IX~0SaR$`V>q+&`|{SL;|Tf zu9)dbIBp$*g-|B{y);X8VEBpBG?yKbR^4Co8KU8#VjnSyzTHcJF7iM`x~&*idd+H+ z0)3kX`aOyK@`tUstd)vAYB6ta4$enULOaNkR!A+l(sHusJ{o0l8OM?m z$&+KM6F8tw9$ic&XqNfIj!{8R4>xs7nlm(o^Q2QsGhaQIr!v)4i{$+&o>y0H?t8!9 zJDU02I+x6-KC?DuzxKpO)=_uTATKcKvRIy=Wde{cD-~*|TC?cKA-qB5|8D-o z$7gCzLg6%Da;}f=JoG|;?oG~<1jj4N{FN|MDMoizrH7z>8Xrsmn>HmYpN#OI($+ELIkbbpB?>Ty_CX7?A_r{Xv|8J|eA3!`>4YCKf%Eh~Et(40XQq~fx(wZGE4a;-wc4ZIU$C+hlcSAw|=+YErz zK)g#hBY2P2`la>n3**p6@TbgN?Lyeipl2zcJ6$Ib_az8iU(3lyOafr>Y{rQ3lJQp+)>4exkt-{BC6r?{j#- zPMYZ)>vj+GM3VL#oo$)X(>H3;)R5Oz*0YWGsa0;y-YX`RuX-jj;Yu~YwA1u>L$Kdj zw_&J*)ZtBXkCK!^^Mu5(6qaFK~owlw{2 zj(pZJOW*y6ybZJ@S|*o`!woh~?T%O4aWk*6h(_e8>F{U-&j#wlz)1aa$h6ta*>s3{ zPj-yk)$auNa0Hlc8{A$^l6Ea;s8`MSaBAaHMONy=y>+UhpTCn9qtNfEwmx7XbFR(a zESQb@wfy*~*z9zwv%n|5D*S?NVAZ@4&noNJ!0L?p&YSx)OnSlIacldNy-=*=g!M8^J@cez0Rlej!Z& z)gQOsPSDPNg61el+5Y|=bpj3YR_VJ>>g4e~sxA>i8|3}%2Nv3e`7znF~!M8OjeXrM4^rBZLem|>{E5$uetQvC}>pz;I!}^?0f1k1RnTm>f%#)@VtB>s$ z!mLV#(zp7A2)TP7N0#dFZam9_aM0fKGN2)O$%Ex(;^NZj7$0JShwI}~jDLf^yZ@(SffKD=i43SZTc7laTE+Y{0z~6r zs<%>ouYP8CT8RxhO`m->u?sMgpx=6{@XR9cq+U^TPqWOio6w5;2CIAO^%tcMs`H1* zZyB?Ew@ei|pn=FG&PaTU%k#6qH&WBRG9RW$o0~RFd)}*c9LjpWS5QbqYBWz!s4P)& zo~T-%c!a=`9App!YVP!iyu#s(G~{Ecrkt&|Pa5Ypxa zo}ntl5$?21aWMPDc;4>(Y*puope}J}STH7|LA3KvYqHTllehNa&EusQ!C#coMgH0WBxX7^(uNv$Hk=2 zs<3&I0dtWlZRG!w>QzzI`|zy{_pkRnk+HSnY$c;@A+$p8F&=5F7)#10$?a?39r{v| z?|V5`A;#=S{$|QL{H$=1ySf-V-pz;oJU;F0glWEtxd=*yz5wHN14SLv5N@VtI-aWYS^jG4>% z1G7uMh$8B~sR=qTPg=V6VYn5o+&e$PO8aB)d)YJbRaOl%cspNyL^p!JHu7_gBZ7TP zRP}zE6Ac?K?3Uu--j_L16Fz6(#HIro1Yx7=+%iZ`1R7vFYa zDW2QcRBF}oWA)Vku{{>le8vB!DyfiH1zmV;fJGLv*O+Az{yEPIcc(I@V|Q1D-}(7_ zX9w{&ZW?ia{d`$Oh(Bw36|Z>xQ{q#ih^6Yi!{6$#bN&1Mw_ z=k7Hd06rq)gG}`DzLH)*{fl8@mKN5cD9P(#jn0oIsQfBhEK@TBR!I>>XQL8Us8e zB8a!pM2vMHkm;0dzzlMZ$6hkTgKB{FUpzE!r2Uy_yh+>#RRZQ^hDg&hgOnjeYQ|niw42S7Ev&H*dW`5MiE5=mC9FeKr>vN|*!^{7{PWYJ=u<8%mdBJX9e1>O>9!lhw)DNGv08!azQ=zvtl&!B*Qc5YHIu6gOf(IXH76mZ8vc6oK0!9ZFrzV$L zn0i&bmu0*N!)fUWa&Qd%Ahq5wc;oaeE_-k4yVy!-)*`%Y)xb_M-R8HdGGHvf5yLE= zzEh8vq9+cW9NAzkKSNNuY#F9%RkAknm}rKJ?6RPFgo&6;5`fL^&Duu;BPM^2)?|49 z4prBZOI*dm{!YvD(VIWftMrRimm$FpJ9|#5udEhq3|L(QwCfgc0F_wlfn`FhiiJhk zpXi_<#OnPN9pb&#H7;ak<(Rc%)QBT0YoS$?xH->hVl%J5)V69xPBSrgj z|LVu#m1DBY{1IRO2RjY+z8mQgS4?aJcA`1?q0C1%JGjOW^E~QS8%FRhd=% zH5!!zdQ-&tGv==p(2^M*6ec$_#=Y?BILnZNx>A(syLW8UxJH#{cYbqWBC!Q~@n-L5 z8D3}ZOz(SrR%Hj)3HdxO0_2+PhEX_#R#rP2k2ef01E;1sTt!`{&(_(@T}It$xxx@zY?Z@5BHTZYj<(qMR~pS@HV3)i%|XLa) zCnIcOwG;B5=X*tPOmeUIq{S1;D9glz#PFsSrg>;?Dt6lzT*ax0 z7<_{jN~%irFe=g0zyp)2YjdRwvi^y^3^*Td3RdV;FGZ{)9eV@vGuI|ain7-_omWr7 z6*YI-E{cBs3|bUHd=YIoyZ zQuWzbsh!5iam2mzmz2*;HI;<9>1uMkdHM>*bAB4?^vxyx2?l*}2UayH$vn=w9)f1L zY)%zl|C~rIAxowH@oiJ(s+bGMBF%55I-uyhL0+rS8)Jx$oxM5hK*(Z4-!~c3US>S1 z-IyA@L}-wr&Y_}Eyb?QiyZRTHf)TB>@h(_~4j2N8g$22(@<{F(ACNwJBv3VS98`#c zE`gW=w4!2MWbhvO=Hq#1t%}|MTUJ**CBJ09BPTEI2shg|OkAbALGn?zj#fbnd4cTg ze`ZI9t~hD_Y_ieT&+@whVeqcoO3}BoyC|bv8K2aqm+p@@VW7;l1zf;GLyM3}^AE@* zJI4~-Mbf>OfZGFe)s3iCTZCefYw}m|4_PawiJfz>o$_quh<7FTW^0G(RUu26&Y#V` z^Vj@=Rxf%^L=#TQPTMVxM6eh#+dOGvHoVOIA>=1GWd@G-R;yCY=ZQ>sn~MqHJ)UOr zc>i#JR(4XKj<(0=VfVRjurU4SkAkY1@1WjTdRHtu|05I%ZLU7bCoEt2tzjz;uIZB^ zscpd);+ussl?HxOl*N7YUq0TfT2>I*xCdlfRmt|^P`KokIV2D}ND0?Ofv=oxDh@g< zO*SR>`@h~8_zmT)OpjSSu*85PRz4OnPNfz3*~s`dsTfCxT!g|u{TG32T%~@;)|8vA z{cpYr<$~SHAT+49BD$!+TM_^Lhe#&qYZ%TCuIJWdCOzg(yKl-Nxs_P7rys*^Txq8Q z^)vn#|L|qLQoLWu6&Efb!zTaFpCC5{*{~3|@yXZ4|M_0Pt)Fo+b;!Bds51ZOKKqBI`~Pzz@l?$$=Ig%n33@D#_R9`1RJT_<(0Rl+&f%Gs5u)F=%zY2= zJEf^cY5V1|?w{&e^XM{&vsp)-`DRDs}EWO4}4R*FgFD%9w3=a*()R8us8QaiEYD(DFG@L~h z%pY;wkanFF5v?&FMD_%LogLs-?ozwSF8v)M^@w@*Szg*H4bID~fZ%;pq*Vq8{IDd}_UUOVgYM(+sI^Ikn+r zb8m~!jC=Sb`4Srb$p;<|dtlQ2j(>5(rm%b7BcOVD_`273{Tlz_fo3B*vthHWXu-jQ zr;a*%PBOfoSpUXBjTt@tH>&EqgoGuu_kM{;t5j`>{k3uHhN#buzVpm_TMHETmz04IoB7Br8L(V|{-)oS3-}L%b}GGJ3DJ^dW>hy3n~~9c zSHw3(wK0e5;#)6tpdCXbR{Z;JM`(+J8qaBs=VSLyKZ=)F;XORChHy#9Q)>Rryv?2g z$4ub9)TEgdaM`cM>!-lm*$Um7S93`^I=UPKk@Uj>+9$N#oG4@QBedbz%}!WxZtpO0fNDV%p1O$2IGbB^9Al^uU8hD{2$m_>66(MCT`hth za;dor6wkRoVO~?O*ClP3bAOpF(D?bttOF;@_0gE3$IHqajjOBmVL(m@sB6=J^uA9E!Tux=|07_VogLBPK(bU?)j<}O01U%Sh z2!87C;dK@6JkKFj?til5ZFc7*k@Uc+JYZ*Ieu&)kq*epfaETQ8<<}Lyxxk%W+jl5T zwt8npWz56#IrB;Z?iGs)#flua=zwmRjq|vW?tG-dLG*TwW&qD_s${MaQ za%Xcje1?v;FCaN{ut8W}fEj)0qHTyM?=BI(p0IyegcJv~8@;nPjhJ*7U~f3@kU3~Q859;`I$Ig9JV(Dfx=h*KGOcIQI6-7! zJ&$fcS$U6`HJZ;`*VRKAy)3THK~!~M*gdmKwCZk&SmJ5kmex zB%2Y7frOjSthSZ&PC%9u2&!8hDxNJnX`Y%qi)aB3TAlB)Pk5t~RguPz1LxCM9{n8~ zW0W+T0r0?v*2VsClIQkr>YJT}1M6Uqcq?`#8DjjE3>TWzimK%$f6v}>$yVbvnWk}d zar=|Si!ch=7=2J4JO}-KS)W02bjNU;9VvJRg_221!JlqCfjNxUwQr4}U58(6(m_%F0Yf)S zV&A+ZBqGZrJd}_mm4=z(k3)qkZ?A?ThU(^~Ib|Gmd*lqBES}dD*=y(<<}NLH{PK5T zg6u>Pz@GbrnSW>$XD!pKC8IL!`KzZTdVMl`xaqHWa61$;8+oW*M8}31GSw!F9;XxV~I^y?7 z-#PpQdQT?D30`PFk2KUVEX_@`R$nM#XttOnshrov(tpf#K0_}cp7BGD_2sRBt4p9s=!^yrL78ZNScq5Kx?bcwHS6R81X<#5Vmkp>5qh?|$z zuP#TzKDzZRI(3&@MK0YBKfj##O;QUBXdd4Ry87ja#Y(C7tP^o#e%(grpumPGIHYZJ z&4aoI9F`nEL(|G8WRe0ECP00WI54%(;WXpNt4;NLIr3HWl@`9;Xatn+c%~gXGW$yq zPW*LMpUI3}J_Sm{4`J=2_raFjFPl-emu|hn>+UX}?QB-@Fa_R>CU#*+42oeFxa=h1 zE)Zx7@2Ks%BxaBim0glKj+M~QLIsU{XY@^_g}oIHrSh|RCn*-^7;8I0dtumat?%(6 zT;RX=j^u!M94S@kX!DV77(tHLB-X&Zx}25Wd35|I-gsCYSO(UlA661iFCXb4_AlH? zNtOtp_>%K5JK95TJ28h3(6#1XU=ey)O}}3edGpBhkowem&WMXlNWi>Qn)s?j3lFab zcL;8c9q-?ty*z3$c!9Uts{0T$0UWBd>u4lx)x{UHO#Ye88!vtK6G4SgZ524O zqNg5kcOdF-+U^`8$+@4ac{JGOM=N1>0uS2cgo#{02w+C7UxPhlBm^TU7hndHK~76U zP9SEJ569n)z28Ou&FwwV1oF_0boVtHIugcQY4U~NcaXKi5;WUbMvat*thf6*2^32nCzkPs0j31mS zvk#=8#}=zC``kWAY|FWzhD6%4EgT9xW@^R_zkOZFC26Ser4x~FITCkpzDq}PBqD{1 zx#87#`kA--hIM;Og5Tb5_?%;4*75NSu?RzG2Z~UDX?f1M)HRZ!N6`ebYWa{@FRZmq z;9cMAY@Zz3ybArE@^41&BmVjqR*bK9;}D} zgTU!FHb|PX?JJudDx89{s+)Y^wY>i_eD2zo#>>Z{f`}%r_5R}KigZnvEu_yce&lPj zv(ts&h;O}09P8fF$yv=N%%{fpF5lmP;V1~eAs;Q%(P1A2z)Lmd)L zVZW|5MZeOK>{M7TFm-6k$p@HNPTtas@B9|Cf(@#D$XBvfn0sWXYqWmtd9P5dvACXzVvk$vLG&h@T-0{_;7RJn~D2~ zw=L=sGXFyCW{k?T@mgoiPw25Q?wYCV(q;-6;R0%N3`Ra!`#b8G$_B!#-$BTP<9nwY zvU+`V1lSAQy-&E~6HO)&^Z%igLm98m0K zVGL4Ps|c`?$Kg^8jP~@8QGIW1=N~#fFWIi__!^jG=WJ@z7mz8US^gSwnYG{HC|^O^ zAq<9WipoAhDZ>0r;FU5x4IhmdRUW|ByIcU-_Io%Fy&U1*@%H@=5aFuT3er(|$ z=wgi`tn>vUopfO9T<7MK2LR>9QwMpi%%=8X>FP_txNa%nwkY3s8 z^Y2ylm+y%7hv8&qI<8js!4Rp}-hEBS6sHHb=lY~wGe_5H@#^2P@@FPH+0L<%AN;zw zHz+@x?E$XQFZ~C#f`?(f&~s0P}LR3WUjcoL(WZfB&&^%79Bu( zb)*ckSc)y9r|8fCQgNtn{We(gZN{99*WnLc;+~u=G@J+_+I)Bz5Kar*!{_AAsz~P+ zaVvuw#1Y}UaosQbH>P`_1}uS%*SG$%uPcE)BjzDOzBt)6Ds&(8dzPf3yka6tA|jpaV=`$M`#F&oOfv3Kc0BDP^k`OLN zNr*yX{N9|N)s^0Q=wj_vth0fC4J#ay&W-pY|H8dEfC1S5kQyn==nh;mlA_yYcJKIn z=N5+?11)bku`T6w4Que5^9298MFQWAyzIKIx868%SL#`%V#Fwo2raO0P zE-nPzeYpwI4l{g*5PR|^5%l7PQ@C~8G;82N-QmnWU(+Wf)#1HrIp&T^?4>!he-7f~ zK69XGFXVjLN}@b#=_{NSF6X}4@9QMSMnU8TlGpI(3HjWuBTLgE6o{#> z0O_#c6py%=*GY1ADsSEu_CabFabfomH_uB~Lcab)cifyJ-3;(LU;Fset2<}74u8R` zTQge|zB}xb?R2z})x+34anh`|drC|*FMY;eYfC+bYI{YZ+4$Se`_WcWvwLe3`V_t% z&dsW2^6UQXsqmgq`JS&qE@-cua+wQ+XEyCUr^EJlMYW#sQ!=CtyLhl42+o}P1XPG@B%qV zwEOAJ-&5=jpe{8kFzzAg65W7G$fBtI#8MT<+89(2Fdk`qU3a1$t%Ao9p4Z{z2%+M0 zy=;qh_;4H|HPDt8*hlce1K8a_A^5!;13gwj6iQbso%os8T5F6hFk=7q1ZvLR&r#xg zC;j(!O@BL*9UPbzs-HoDLLVr-t}jM+vq`{E)r8}1<2F-w2SRgULvz5)`;0z@md1l? zH@xNf<0arVQ;jTPK!~|*z82NPecBZ&;2`gp=*JyUO4}o_2tKX9uDKeFt~#hymIekP zmHM0`R-%qSdAaq4vGOvnwrMBx zHrq$#5d7%?com!}(CgnUI-84e6_}78 zSB`g1*jXdFwKV)nKB#L%ka49a-t1s2oTadMrZU={rC55sljf#b_y(f#M+PWk2m2(F zwDIe|%$1#(R@;sQlQD?Us=mW(&!g&I&3E9uWU*strnYySJosm4+dlgN2~=EC=9p;k z7?a^lP}c5|IK}ZsbRPg!==?uka=Ns$gs?9l$p3M2z2CSLW{o;mMhx@hN1rWRoJ9M)h5 zRLpg&uZenxZxb@`mmiR}D$WKhNEV=OoJ!@5wEKMG2kXv*;)6qi>#uBfyGK^X(*-7F zGRBRp55BI-PS#_l?p_wSv0iTY09EdmucxE zz)>XsJ!7gbs>)S1$Jk>SfJVdi6%#E*#n9InwUiCTt#7IGsw;DS(jO!a1Z|6%*x5qe zsFYRxhd{D1Wh9&3#>p!6#E;S%jeqvp|LTVgCttflcw!%3Su2S8#b;^VksH#UphM9? z7H(O2ZM4#HA%L^T)q{UPYK{+;hzA5V+$)HqA9z9*%+{zi&;R9ff@jE*A`j@pWR+!t z1mijTL$xeYQhX@{7J1u65JP$Qe>kOo_*&Q|po@n^(_X_j{;St8hOZmWHZ)N62NxaD zx^Wa_>+0krU)Mes7JqA2<`U$EUBc;?`ZuLi1U6Hy2$UQPir!6_4gUc0isGt2y#S`N z$`}8OTGnh!Z(y8IyMvOOV-x^cZfv^sRMmP=Dqnt;@C4`0JLx>NUs!mNB`FOlxL_59 z6cJ|GYR~74qCwA0oY+AM=&z@7{2tYDDe^UJMrB-V0*O*`Jnxopae<)5-{}Hebb{ET z%Rfp&BU!(|%WIZ9#6%~?p$+qm8_6fWXk7gtANV~PXF}^I+`6*xN%YQ2)bdeUcqjvjMzfAH!G&}umt zxI7&^(CaojBR!6rSo(h27Wp?{fB~+Y^c@~ns>!P!7h|aYO0ill2V9=4@4x>%OHtxu zjp|R1*j3slbIEA!!Q(m3(R=s6o>!`KrpYwIW78Cubf(}TI<_L(1Sqs=#L&p7_RW&Z zP8RCI&VsS)XUg5bdJHd>jBgu_&jJ5g)VW<_&Rd1&*X2*lv-6*v;f{uXB( z7^yD$OW*N*OvMoyWHGXy2Q9OMfIXByAoBVb-X4}OHhM>!2_(IF^%qy0XRjGH6q|g* z+U`ot!;(gH0C5D7)`*rWWkNVj49qtx{9)v9qGyljFZd^UM>F5WbZ=(p{-%m7u%u>e z?1HirQu%h{aIL($?|{H9NonZ&tN2s@_K8q$OZ8Z4>LH*T927V#0~<3 z4(hU5kCPjfsWagefCl4*3>+NEQ@-oH`eqa3Bn>f1Uz7dIfAI}DI<>W2`PDwTC6^)9X+q|hMO4S3Fiem?02-^O?kC!4PX)4D-M@0~4! zlt6Uj813CXUAsnTBv_|@jcM(>8O8AWb?jINIVj^Rrsng}z->F~hkF8_tlS!Lr4#-$Cv9Hs=mpAutV`I&D^Dm;<+NCQWoI zZBlYFNA+Qg!L6q5hmaj2s;mZd=s!lBPVHX(aj|RF0P_jef=y$n)>g&6^xX3DUIq)*iT%MyS)5?0X zm|jS(EyL8L#J?rSc{1S@E*1cUj00uI*q#=%YeuWRCPU9;Zc%jQ&yD$OH1lW0j^VN2 zMJ;(og0iPZ#UEKlC-$;lDC>AcZd2K4(x05Ur4>0cui2y~KtcdJ@+I#>U{p#W(e_Q? z%#kE?0nXORsf+N8@C?4}Gw^0^_ynX>Q&-(3XZl&h4D512i)idXGJF^FZsL+YsTa?P z@v~AoJy&*lZ!ETPv9#`o>+}|slviKJfBHd#Cez`sm9=^hX%Va5H~;BX03i?LuBAaA-^2sH>??B$V)*=^~Dc9$`&+#_t3WF9peRFLF6zwFW) zFg>@rS~sHF)z`V`6|l471FhM;yoyaHC@}A0RHr*9mz|Wacw2oEgU`V3Kt^lcSzube zNFgt1EvJ;oEqiw|Ra_HS6B1OlL-^w2Hz z;nQHA#BZ{!i}1;BI%R0+8FMc35Ci%00*)IW}a*5P5iYv)GCOw%dx7Odv?tsUZ5hJI2YkUc;6#j;9?UHY9(6V7*dr_5%r zXuRryGVWW~%6N5czyz_>eY{#~b|h*8W6a4X54=6Nf8DNF{l-8cFNXN0Yt`zlF=xW3 zLWWva+Y?8qCPlcJKJd*)uJaN>%iKn*Ph~J#hmshIxpq50Kd`zZ59}ZxbP_|eXSfp8TL3pm3nP!E^#@2CYq1=&3#N3i&A;xS$;#|%YbJXB} z6i?=-le#=YYJJwMKUAWZm8&wdvb@JXc#toY@fPrzuS)m`eEnJfT8t!>VHuA+A=Vav^5kUh; zM|EyIwB9(K8{bCN&is= zkw+)mOF)8!tvOmEdZ$2~$o{j!BPF86zF6~@gCLsR0e`L$EZ zb-uWZ4a)8N>6J*WdLxwFa0@48+;lQD$PZ_*HarTAqeU`H3H5{_1cx~pbDiM=1+3^{ z{fG~H1tyOfH!M`uqxjTs;P4Wq+ZQdpiXQI=tnBk=iIC_Iw`0OY9v429J~GXvoF)L| zO@e+jF*hnmdxR>my(kwz{7=&N=3%qUcJ_hAxmVPK*rmdT^=HBHgWj^umL51E=+SZMh8> zUNX_%+he*q_D=+ZLr%`L)mo0@XWL1+efM;&^D;|*mfWDXn*bXwCf=)osP*?>W+B2L&a@tw`d_ww#$w>s?2pBwXZtdAPk(Zz{V_I%@h zVU2+2^Rh_J^*$`3S{pI_mgf&@1c0}9X0D&$e>GB_Ozum2RD5a627u)Di8I{3Wr+1M=m$=5D8hgD07*(gObkF- zx`#FiGa%K4$=5iFh_>Walj#qKA(pppdnR8?-WKV%W^H*$@y?CgH{YcdRmHX^pDCA+ zo{!y=J2Z*hiy(b8Ca0vcU_Kz&?^b2Ny*r*0_n*6Tz{~BwyGI3bIDJ>DIbZMWZL?Sl zj_VJLh|vBJJWUxj>|gCMekbe9F`z&^+uZ48ecl51&gS)|O4>(MP{VDkOU`h_-jIyG z+>|(m3~Q5#C2d>XvE2JnBebkiqnGBBP!k45Mh{asIvC65_Q8^AbIS7fu~|wp<(hZb2v$i~##!fuP~)A^vEY(E@J=hRvidgK%S6N~Kj8~IsG zAY!A>C;pRRHPvLg8@lswb=XIAM>^*BZrV@fGIi4vy``w~G$}l+R&*YW`g+~D^YW=H zX5fQ8*1ZYvXjs(nBVTBd0iPHc+B6+}^?l$0*GS4>$;K6Nt>0iR6!RDcc`EH{Vul_q z)0N>jJH`4d7Gv>UEjaR8M`8XTtNV0A!TF#)|Q8?nCQ5N&fKoI_ZQi| zD3W$Xx`OuJc>}<{o~;XFaH>YMqlVW_?Q0I^I|GFI1d6M-cj-X{&7gQO4c+p%&%pM! zxfgOPLz7w|^{Cz!W+0fYhQ(RN|=(8`oW2{v)Ux^W~WzI<0yE)8&=u}FYbao zp3Kzfy|iJs-LJ5kdFx^!;Grb-kZ-zasd6jmL>Q}{nKT$0sdf3K36@`DX%F4z48dY6 z1PaT;`%jgGEjT$p0I?~d13X8)*o#@<4|>K#qm#kgIGx*kw#3l*MD2sE`>6Xd zPu?Ydror`LfC+yPm+*iL)kWiG;?REIt#&`oB?OOW4Dn7@{9yr%kROlyc9PzKDpq`*eSkZfQEe4f9ftjI^A$u%g( zrVRRpu_;2kK5;qwMa98g|Dp^-_V`Cp@%PxYuyNDG&V>hA%I7-N-BE3C8=sz% zRwwd+%$cb%Ulw}byU$GkX>*Qc&JHI(zF>0JSsr|EN&3$sEy)unQ-lm|^V=1H%suNgQO0!p4F&-SM5k4QeYy=% z-m!x^bBZF_e&6M3JonP9?2`@nOvp;Iy-wJ>4OySI?YEuZ6=c3lFxnDmgqqeo{fKy8Ixo$ zlM!2C^5PLL?+Pe_Fi;zIeW-E3jWryUR<$~ZXTsYvv2u>k<(3hik+pc zo|GnMUg`5xM8Ob5Vju#}YMb9u{T4YH7g!*8ESO_pId@wHzR1W}rv+O;=TV&L-?$=V zSsYC#u0~QjTz4F@I;AYZ1f|&!jn*%d6>6!Sn6r0+HT&<~?L;5W3*HwHcRr-m|5+b# zuzO-@vsG2ydr>Ix^2JnjA+{~#F3(9kT?g$d>jZxiyhd56$6$xJ2{02%Ge2J$&~M%J zow8L_tT)6qxn^U7{&1l6;*@oT`T|P8jq9`y12?K-j@q7-B!H{0c^(%S_g)IA{9`kG zw;<&xl{1Nj4Tjembxh)W8T#>B0!91W`JJxUtVwZ}U*RDDi{v1uQ{69X{Oo8~XoZz* z^~F}Ve|k<#Nwf1Xk zo-eHV<78kus!7pFI|2Oc!k)-|YxC@2(--B^Xz4Doc>Cm~?UL}u3l*BzQCg-1G?+}O zm)Df1xV-+Tj}`Y}bViHL3wFD{Z0xdEpr6sn&U~>>6DCJKFKrneJvkLf>(*}Kd8JE` ze0g6Kl(M8WW3s#@!E*VKCMODNSY`5i1%?1<_Kok*AoS1QIg)eI%?iHa2$K(5K=OzlZhBTl!TNdoL^FhV`>Bm33U98XV0VF+ut9oq9+JPKtC6vK$=3 zsl~|1SY4^+YpKSNdQ%|kF@;$Xd26qEbKcUz9(DFEC@go8>&DSZjGQx}avX4n&r;T= zzNY3G=(z%~?!=7)cQWNNDZ1;88GG)&2e#%MOH?aPDLZDi-)uyfkjX<~t9&KL&1J4K zcVDxt`fr^Ism!#xquma51}_R`%MlG<6b;Ib>vO-kt#evA%a=0!QlqD@1Jm>@M5h%> z4ZrBGA(}5GT@;l_Ep?l6oQwF?SMM%YhL$_7@%}S=V1Gc7K}CI4shhRqY-)7!4OG#R z|1CuGk&L8oi0zcPefGLB z_dZj_n`!f}y!t%*qN8RXx~SkSU(<-^ke8Cs+S0WXP81emvDQP|&qPZ+KL6B6zgZO& zb4-!`XS61Xu(IS0YyFjcwDnMVP}R`aS2g4*Kce7A<#qJ0Y8jBWdw8VDLjiZEa$gCG)F3C zSS9U_@F_aSHWt~Ayc*lmnk>EKfz>Cb$s9(#M8v_-A<#=ND;*M%?wbtJr z>n}8rNFQjP7_#?E^e4PcG2_o+bU8%{K1`eEDh%+tzZ1 z*tQEgUZQhZy!AU896e_SE(VtDc$~CpLOvlL6FDMxE?mJ&>R)^y*cA{t2us4QA5zDq WCO#B$%4VV **HF Inference Endpoints**. You will be taken to the Inference Endpoints interface to configure the deployment. + + ![Click deploy button](../../assets/deployment/hf-inference-endpoints-click-deploy-button.png) + +4. Select the Hardware (we choose AWS>GPU>T4 for the example) and Container Configuration. Choose `vLLM` as the container type and finalize the deployment pressing **Create Endpoint**. + + ![Select Hardware](../../assets/deployment/hf-inference-endpoints-select-hardware.png) + +5. Use the deployed endpoint. Update the `DEPLOYMENT_URL` with the URL provided in the console (remember to add `/v1` needed). You can then use your endpoint programmatically or via the SDK. + + ```python + # pip install openai + from openai import OpenAI + import os + + client = OpenAI( + base_url = DEPLOYMENT_URL, + api_key = os.environ["HF_TOKEN"] # https://huggingface.co/settings/tokens + ) + + chat_completion = client.chat.completions.create( + model = "ibm-granite/granite-docling-258M", + messages = [ + { + "role": "user", + "content": [ + { + "type": "image_url", + "image_url": { + "url": "https://huggingface.co/ibm-granite/granite-docling-258M/resolve/main/assets/new_arxiv.png" + } + }, + { + "type": "text", + "text": "Convert this page to docling." + } + ] + } + ], + stream = True + ) + + for message in chat_completion: + print(message.choices[0].delta.content, end = "") + ``` + +!!! note + This method uses best-guess defaults. You may need to adjust the configuration to fit your specific requirements. + +### Method 3: Manual Deployment (Advanced Models) + +Some models require manual deployment because they: + +- Use custom code with the `transformers` tag +- Don't run with standard `transformers` but are supported by `vLLM` + +These models cannot be deployed using the **Deploy** button on the model card. + +In this guide, we demonstrate manual deployment using the [rednote-hilab/dots.ocr](https://huggingface.co/rednote-hilab/dots.ocr) model, an OCR model integrated with vLLM (see vLLM [PR](https://github.com/vllm-project/vllm/pull/24645)). + +1. Start a new deployment. Go to [Inference Endpoints](https://endpoints.huggingface.co/) and click `New`. + + ![New Endpoint](../../assets/deployment/hf-inference-endpoints-new-endpoint.png) + +2. Search the model in the Hub. In the dialog, switch to **Hub** and search for the desired model. + + ![Select model](../../assets/deployment/hf-inference-endpoints-select-model.png) + +3. Choosing infrastructure. On the configuration page, select the cloud provider and hardware from the available options. + For this demo, we choose AWS and L4 GPU. Adjust according to your hardware needs. + + ![Choose Infra](../../assets/deployment/hf-inference-endpoints-choose-infra.png) + +4. Configure the container. Scroll to the **Container Configuration** and select `vLLM` as the container type. + + ![Configure Container](../../assets/deployment/hf-inference-endpoints-configure-container.png) + +5. Create the endpoint. Click **Create Endpoint** to deploy the model. + + Once the endpoint is ready, you can use it with the OpenAI Completion API, cURL, or other SDKs. Remember to append `/v1` to the deployment URL if needed. + +!!! note + You can adjust the **container settings** (Container URI, Container Arguments) from the Inference Endpoints UI and press **Update Endpoint**. This redeploys the endpoint with the updated container configuration. Changes to the model itself require creating a new endpoint or redeploying with a different model. For example, for this demo, you may need to update the Container URI to the nightly image (`vllm/vllm-openai:nightly`) and add the `--trust-remote-code` flag in the container arguments. + +## Advanced Deployment Details + +With the [transformers backend integration](https://blog.vllm.ai/2025/04/11/transformers-backend.html), vLLM now offers Day 0 support for any model compatible with `transformers`. This means you can deploy such models immediately, leveraging vLLM’s optimized inference without additional backend modifications. + +Hugging Face Inference Endpoints provides a fully managed environment for serving models via vLLM. You can deploy models without configuring servers, installing dependencies, or managing clusters. Endpoints also support deployment across multiple cloud providers (AWS, Azure, GCP) without the need for separate accounts. + +The platform integrates seamlessly with the Hugging Face Hub, allowing you to deploy any vLLM- or `transformers`-compatible model, track usage, and update the inference engine directly. The vLLM engine comes preconfigured, enabling optimized inference and easy switching between models or engines without modifying your code. This setup simplifies production deployment: endpoints are ready in minutes, include monitoring and logging, and let you focus on serving models rather than maintaining infrastructure. + +## Next Steps + +- Explore the [Inference Endpoints](https://endpoints.huggingface.co/catalog) model catalog +- Read the Inference Endpoints [documentation](https://huggingface.co/docs/inference-endpoints/en/index) +- Learn about [Inference Endpoints engines](https://huggingface.co/docs/inference-endpoints/en/engines/vllm) +- Understand the [transformers backend integration](https://blog.vllm.ai/2025/04/11/transformers-backend.html) From f4db5e6de19b21891724d443bbc3cefb4d2e1005 Mon Sep 17 00:00:00 2001 From: Anion <123177548+Anionex@users.noreply.github.com> Date: Tue, 30 Sep 2025 22:38:07 +0800 Subject: [PATCH 510/518] [Bugfix][Model] Fix inference for Hunyuan dense models (#25354) Signed-off-by: anion <1005128408@qq.com> Signed-off-by: Anion <123177548+Anionex@users.noreply.github.com> --- vllm/model_executor/models/hunyuan_v1.py | 106 +++++++++++++---------- 1 file changed, 59 insertions(+), 47 deletions(-) diff --git a/vllm/model_executor/models/hunyuan_v1.py b/vllm/model_executor/models/hunyuan_v1.py index 085e740ce226..09f124426fa1 100644 --- a/vllm/model_executor/models/hunyuan_v1.py +++ b/vllm/model_executor/models/hunyuan_v1.py @@ -888,7 +888,7 @@ def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]): return loaded_params -class HunYuanV1Base(nn.Module, SupportsLoRA, SupportsPP, MixtureOfExperts): +class HunyuanV1ModelBase(nn.Module, SupportsLoRA, SupportsPP): packed_modules_mapping = { "qkv_proj": [ "q_proj", @@ -930,6 +930,56 @@ def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): else: self.lm_head = PPMissingLayer() + def forward( + self, + input_ids: torch.Tensor, + positions: torch.Tensor, + intermediate_tensors: Optional[IntermediateTensors] = None, + inputs_embeds: Optional[torch.Tensor] = None, + ) -> Union[torch.Tensor, IntermediateTensors]: + model_output = self.model(input_ids, positions, intermediate_tensors, + inputs_embeds) + return model_output + + def compute_logits( + self, + hidden_states: torch.Tensor, + ) -> Optional[torch.Tensor]: + logits = self.logits_processor(self.lm_head, hidden_states) + return logits + + def make_empty_intermediate_tensors( + self, batch_size: int, dtype: torch.dtype, + device: torch.device) -> IntermediateTensors: + return IntermediateTensors({ + "hidden_states": + torch.zeros((batch_size, self.config.hidden_size), + dtype=dtype, + device=device), + "residual": + torch.zeros((batch_size, self.config.hidden_size), + dtype=dtype, + device=device), + }) + + def load_weights(self, weights: Iterable[tuple[str, + torch.Tensor]]) -> set[str]: + loader = AutoWeightsLoader( + self, + skip_prefixes=(["lm_head."] + if self.config.tie_word_embeddings else None), + ) + return loader.load_weights(weights) + + def get_input_embeddings(self, input_ids: torch.Tensor) -> torch.Tensor: + return self.model.get_input_embeddings(input_ids) + + +class HunYuanMoEV1Base(HunyuanV1ModelBase, MixtureOfExperts): + + def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): + super().__init__(vllm_config=vllm_config, prefix=prefix) + # Set MoE hyperparameters self.expert_weights = [] self.num_expert_groups = 1 @@ -988,57 +1038,19 @@ def update_physical_experts_metadata( moe.n_redundant_experts = self.num_redundant_experts moe.experts.update_expert_map() - def get_input_embeddings(self, input_ids: torch.Tensor) -> torch.Tensor: - return self.model.get_input_embeddings(input_ids) + def get_expert_mapping(self) -> list[tuple[str, str, int, str]]: + return self.model.get_expert_mapping() - def forward( - self, - input_ids: torch.Tensor, - positions: torch.Tensor, - intermediate_tensors: Optional[IntermediateTensors] = None, - inputs_embeds: Optional[torch.Tensor] = None, - ) -> Union[torch.Tensor, IntermediateTensors]: - model_output = self.model(input_ids, positions, intermediate_tensors, - inputs_embeds) - return model_output - def compute_logits( - self, - hidden_states: torch.Tensor, - ) -> Optional[torch.Tensor]: - logits = self.logits_processor(self.lm_head, hidden_states) - return logits +class HunYuanDenseV1Base(HunyuanV1ModelBase): - def make_empty_intermediate_tensors( - self, batch_size: int, dtype: torch.dtype, - device: torch.device) -> IntermediateTensors: - return IntermediateTensors({ - "hidden_states": - torch.zeros((batch_size, self.config.hidden_size), - dtype=dtype, - device=device), - "residual": - torch.zeros((batch_size, self.config.hidden_size), - dtype=dtype, - device=device), - }) - - def load_weights(self, weights: Iterable[tuple[str, - torch.Tensor]]) -> set[str]: - loader = AutoWeightsLoader( - self, - skip_prefixes=(["lm_head."] - if self.config.tie_word_embeddings else None), - ) - return loader.load_weights(weights) - - def get_expert_mapping(self) -> list[tuple[str, str, int, str]]: - return self.model.get_expert_mapping() + def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): + super().__init__(vllm_config=vllm_config, prefix=prefix) -class HunYuanDenseV1ForCausalLM(HunYuanV1Base): +class HunYuanDenseV1ForCausalLM(HunYuanDenseV1Base): pass -class HunYuanMoEV1ForCausalLM(HunYuanV1Base): - pass +class HunYuanMoEV1ForCausalLM(HunYuanMoEV1Base): + pass \ No newline at end of file From ef283548f75122b0e8ce49ce2548fbb49446d7c7 Mon Sep 17 00:00:00 2001 From: Pavani Majety Date: Tue, 30 Sep 2025 07:51:31 -0700 Subject: [PATCH 511/518] [Bugfix] Fix accuracy issue of TRTLLM FP8 MOE and improve logging (#25895) Signed-off-by: Pavani Majety --- .../model_executor/layers/quantization/fp8.py | 39 +++++++++++-------- vllm/utils/deep_gemm.py | 7 +++- 2 files changed, 29 insertions(+), 17 deletions(-) diff --git a/vllm/model_executor/layers/quantization/fp8.py b/vllm/model_executor/layers/quantization/fp8.py index fb448de3c234..a0808cb603d0 100644 --- a/vllm/model_executor/layers/quantization/fp8.py +++ b/vllm/model_executor/layers/quantization/fp8.py @@ -434,14 +434,9 @@ def __init__(self, quant_config: Fp8Config, layer: torch.nn.Module): self.weight_block_size = self.quant_config.weight_block_size self.block_quant = self.weight_block_size is not None - self.flashinfer_moe_backend: Optional[FlashinferMoeBackend] = None self.fused_experts: Optional[ mk.FusedMoEModularKernel] = None # type: ignore - if envs.VLLM_USE_FLASHINFER_MOE_FP8 and has_flashinfer_moe(): - self.flashinfer_moe_backend = get_flashinfer_moe_backend() - logger.info_once( - f"Using FlashInfer {self.flashinfer_moe_backend.value} kernels" - ) + # For GPUs that lack FP8 hardware support, we can leverage the Marlin # kernel for fast weight-only FP8 quantization self.use_marlin = (not current_platform.has_device_capability(89) @@ -450,14 +445,27 @@ def __init__(self, quant_config: Fp8Config, layer: torch.nn.Module): if current_platform.is_rocm(): self.use_marlin = False + # First check for Flashinfer MOE on Blackwell GPUs + self.flashinfer_moe_backend: Optional[FlashinferMoeBackend] = None + if (current_platform.is_cuda() + and current_platform.is_device_capability(100) + and envs.VLLM_USE_FLASHINFER_MOE_FP8 and has_flashinfer_moe()): + self.flashinfer_moe_backend = get_flashinfer_moe_backend() + logger.info_once( + f"Detected Blackwell GPUs, using FlashInfer " + f"{self.flashinfer_moe_backend.value} kernels for FP8 MOE.") + # Check for DeepGemm support. self.allow_deep_gemm = False if envs.VLLM_USE_DEEP_GEMM: if not has_deep_gemm(): logger.warning_once("Failed to import DeepGemm kernels.") elif not self.block_quant: - logger.warning_once("Model is not block quantized. Not using " - "DeepGemm kernels") + logger.warning_once("Model is not block quantized. Not using" + " DeepGemm kernels") + elif self.flashinfer_moe_backend: + logger.info_once("DeepGemm disabled: FlashInfer MOE is" + " enabled.") elif (is_deep_gemm_supported()): logger.info_once("Using DeepGemm kernels for Fp8MoEMethod.") self.allow_deep_gemm = True @@ -471,15 +479,12 @@ def __init__(self, quant_config: Fp8Config, layer: torch.nn.Module): logger.debug_once("Model is not block quantized. Not using " "CutlassBlockScaledGroupedGemm kernels") elif (current_platform.is_cuda() - and current_platform.is_device_capability(100)): + and current_platform.is_device_capability(100) + and not self.flashinfer_moe_backend): logger.info_once( - "Using CutlassBlockScaledGroupedGemm kernels for Fp8MoEMethod." - ) + "Using CutlassBlockScaledGroupedGemm kernels for Fp8 MOE " + "on SM100.") self.allow_cutlass_block_scaled_grouped_gemm = True - else: - logger.warning_once( - "CutlassBlockScaledGroupedGemm not supported on the current " - "platform.") def create_weights(self, layer: Module, num_experts: int, hidden_size: int, intermediate_size_per_partition: int, @@ -934,7 +939,9 @@ def apply( import vllm.model_executor.layers.fused_moe.flashinfer_trtllm_moe # noqa: E501, F401 assert (renormalize and use_grouped_topk and custom_routing_function is None) - result = torch.ops.vllm.flashinfer_fused_moe_blockscale_fp8( + e_score_correction_bias = (e_score_correction_bias.to( + x.dtype) if e_score_correction_bias is not None else None) + return torch.ops.vllm.flashinfer_fused_moe_blockscale_fp8( routing_logits=router_logits.to(torch.float32), routing_bias=e_score_correction_bias, x=x, diff --git a/vllm/utils/deep_gemm.py b/vllm/utils/deep_gemm.py index 0e3bdaec829e..4f05f0bc35cc 100644 --- a/vllm/utils/deep_gemm.py +++ b/vllm/utils/deep_gemm.py @@ -27,7 +27,8 @@ def is_deep_gemm_supported() -> bool: is_supported_arch = current_platform.is_cuda() and ( current_platform.is_device_capability(90) or current_platform.is_device_capability(100)) - return envs.VLLM_USE_DEEP_GEMM and has_deep_gemm() and is_supported_arch + return (envs.VLLM_USE_DEEP_GEMM and has_deep_gemm() and is_supported_arch + and not envs.VLLM_USE_FLASHINFER_MOE_FP8) @functools.cache @@ -46,6 +47,10 @@ def is_deep_gemm_e8m0_used() -> bool: logger.info_once("DeepGEMM E8M0 disabled: _fp8_gemm_nt_impl not found") return False + if envs.VLLM_USE_FLASHINFER_MOE_FP8: + logger.info_once("DeepGEMM E8M0 disabled: FlashInfer MOE is enabled.") + return False + if current_platform.is_device_capability(100) and \ envs.VLLM_USE_DEEP_GEMM_E8M0: logger.info_once("DeepGEMM E8M0 enabled on Blackwell GPU.") From 9f1c4ecaf2aa6fa2a53d870304126d1d950842c5 Mon Sep 17 00:00:00 2001 From: Cyrus Leung Date: Wed, 1 Oct 2025 00:23:12 +0800 Subject: [PATCH 512/518] [Bugfix] Token type and position embeddings fail to be applied to `inputs_embeds` (#25922) Signed-off-by: DarkLight1337 --- vllm/model_executor/models/bert.py | 17 ++++++++++------- vllm/model_executor/models/roberta.py | 6 ++++-- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/vllm/model_executor/models/bert.py b/vllm/model_executor/models/bert.py index c984845204c4..2ec3edc5a0a7 100644 --- a/vllm/model_executor/models/bert.py +++ b/vllm/model_executor/models/bert.py @@ -61,11 +61,13 @@ def forward( self, input_ids: torch.Tensor, position_ids: torch.Tensor, + inputs_embeds: Optional[torch.Tensor] = None, ) -> torch.Tensor: - token_type_ids = _decode_token_type_ids(input_ids) - inputs_embeds = self.word_embeddings(input_ids) + if inputs_embeds is None: + inputs_embeds = self.word_embeddings(input_ids) + position_embeddings = self.position_embeddings(position_ids) token_type_embeddings = self.token_type_embeddings(token_type_ids) @@ -358,11 +360,12 @@ def forward( intermediate_tensors: Optional[IntermediateTensors] = None, inputs_embeds: Optional[torch.Tensor] = None, ) -> torch.Tensor: - if inputs_embeds is not None: - hidden_states = inputs_embeds - else: - hidden_states = self.embeddings(input_ids=input_ids, - position_ids=positions) + hidden_states = self.embeddings( + input_ids=input_ids, + position_ids=positions, + inputs_embeds=inputs_embeds, + ) + return self.encoder(hidden_states) def _load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]): diff --git a/vllm/model_executor/models/roberta.py b/vllm/model_executor/models/roberta.py index 53e698c4fa80..a13042a6367c 100644 --- a/vllm/model_executor/models/roberta.py +++ b/vllm/model_executor/models/roberta.py @@ -56,11 +56,13 @@ def forward( self, input_ids: torch.Tensor, position_ids: torch.Tensor, + inputs_embeds: Optional[torch.Tensor] = None, ) -> torch.Tensor: - token_type_ids = _decode_token_type_ids(input_ids) - inputs_embeds = self.word_embeddings(input_ids) + if inputs_embeds is None: + inputs_embeds = self.word_embeddings(input_ids) + position_embeddings = self.position_embeddings(position_ids) token_type_embeddings = self.token_type_embeddings(token_type_ids) From a2e6fa7e035ff058fc37fdaaf014707efff2fcf3 Mon Sep 17 00:00:00 2001 From: youkaichao Date: Wed, 1 Oct 2025 00:30:36 +0800 Subject: [PATCH 513/518] [bugfix][deepseek] fix flashmla kernel selection (#25956) Signed-off-by: youkaichao --- vllm/attention/ops/flashmla.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/attention/ops/flashmla.py b/vllm/attention/ops/flashmla.py index 3cc0e4adfa0a..9654f9f6775a 100644 --- a/vllm/attention/ops/flashmla.py +++ b/vllm/attention/ops/flashmla.py @@ -136,7 +136,7 @@ def flash_mla_with_kvcache( descale_k is None ), "descale_q and descale_k should be both None or both not None" - if (descale_q is not None) and (descale_k is not None): + if indices is None and q.element_size() == 1: out, softmax_lse = torch.ops._flashmla_extension_C.fwd_kvcache_mla_fp8( q, k_cache, head_dim_v, cache_seqlens, block_table, softmax_scale, causal, tile_scheduler_metadata, num_splits, descale_q, descale_k) From 2682bb7e9004a8c99bf4c94f7cac9280e19b822d Mon Sep 17 00:00:00 2001 From: Lucas Wilkinson Date: Tue, 30 Sep 2025 16:55:28 +0000 Subject: [PATCH 514/518] Squashed commit of nm/lwilkinson/dbo-alt-schedules changes relative to origin/main fixes Signed-off-by: Lucas Wilkinson fix Signed-off-by: Lucas Wilkinson fix Signed-off-by: Lucas Wilkinson fixes and formatting Signed-off-by: Lucas Wilkinson --- examples/offline_inference/data_parallel.py | 2 +- vllm/config/parallel.py | 10 + vllm/engine/arg_utils.py | 6 + vllm/forward_context.py | 29 ++ .../fused_moe/deepep_ht_prepare_finalize.py | 226 ++++------- .../fused_moe/deepep_ll_prepare_finalize.py | 119 +++--- .../layers/fused_moe/modular_kernel.py | 373 +++++++++--------- .../layers/fused_moe/pplx_prepare_finalize.py | 141 ++++--- vllm/model_executor/models/deepseek_v2.py | 2 + vllm/v1/attention/backends/mla/common.py | 8 +- vllm/v1/worker/gpu_model_runner.py | 15 +- vllm/v1/worker/gpu_ubatch_wrapper.py | 73 +++- vllm/v1/worker/ubatching.py | 129 +++++- vllm/v1/worker/utils.py | 6 + 14 files changed, 654 insertions(+), 485 deletions(-) diff --git a/examples/offline_inference/data_parallel.py b/examples/offline_inference/data_parallel.py index 0076d4d30ee8..65e3573e9ff1 100644 --- a/examples/offline_inference/data_parallel.py +++ b/examples/offline_inference/data_parallel.py @@ -259,4 +259,4 @@ def start(rank): elif proc.exitcode: exit_code = proc.exitcode - exit(exit_code) + exit(exit_code) \ No newline at end of file diff --git a/vllm/config/parallel.py b/vllm/config/parallel.py index 8b980458ddaf..3723067e8c01 100644 --- a/vllm/config/parallel.py +++ b/vllm/config/parallel.py @@ -151,6 +151,16 @@ class ParallelConfig: prefills. If the number of tokens in the request is greater than this threshold, microbatching will be used. Otherwise, the request will be processed in a single batch.""" + microbatch_schedule: Literal["mlp_shared_overlap", "attn_shared_overlap"] =\ + "mlp_shared_overlap" + """Schedule policy for microbatch overlap coordination. + + Options: + - "mlp_shared_overlap": overlap MLP and communication across ubatches + - "attn_shared_overlap": overlap MLA attention and communication across + ubatches + see: vllm/v1/worker/ubatching.py for diagrams of the schedules. + """ ray_workers_use_nsight: bool = False """Whether to profile Ray workers with nsight, see https://docs.ray.io/en/latest/ray-observability/user-guides/profiling.html#profiling-nsight-profiler.""" diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py index ec61fc4b9b06..615a4a207686 100644 --- a/vllm/engine/arg_utils.py +++ b/vllm/engine/arg_utils.py @@ -334,6 +334,7 @@ class EngineArgs: ParallelConfig.dbo_decode_token_threshold dbo_prefill_token_threshold: int = \ ParallelConfig.dbo_prefill_token_threshold + microbatch_schedule: str = ParallelConfig.microbatch_schedule eplb_config: EPLBConfig = get_field(ParallelConfig, "eplb_config") enable_eplb: bool = ParallelConfig.enable_eplb expert_placement_strategy: ExpertPlacementStrategy = \ @@ -705,6 +706,10 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser: parallel_group.add_argument( "--dbo-prefill-token-threshold", **parallel_kwargs["dbo_prefill_token_threshold"]) + parallel_group.add_argument( + "--microbatch-schedule", + dest="microbatch_schedule", + **parallel_kwargs["microbatch_schedule"]) parallel_group.add_argument("--enable-eplb", **parallel_kwargs["enable_eplb"]) parallel_group.add_argument("--eplb-config", @@ -1329,6 +1334,7 @@ def create_engine_config( enable_dbo=self.enable_dbo, dbo_decode_token_threshold=self.dbo_decode_token_threshold, dbo_prefill_token_threshold=self.dbo_prefill_token_threshold, + microbatch_schedule=self.microbatch_schedule, enable_eplb=self.enable_eplb, eplb_config=self.eplb_config, expert_placement_strategy=self.expert_placement_strategy, diff --git a/vllm/forward_context.py b/vllm/forward_context.py index 09defade00dc..6a7d8c769225 100644 --- a/vllm/forward_context.py +++ b/vllm/forward_context.py @@ -171,6 +171,34 @@ def should_ubatch_across_dp( return False, None return result, padded_num_tokens_tensor.cpu() + @staticmethod + def should_ubatch_across_dp(should_ubatch: bool, orig_num_tokens_per_ubatch: int, + padded_num_tokens_per_ubatch: int, dp_size: int, + dp_rank: int) -> tuple[bool, Optional[torch.Tensor]]: + + tensor = torch.zeros(3, dp_size, device="cuda", dtype=torch.int32) + tensor[0][dp_rank] = orig_num_tokens_per_ubatch + tensor[1][dp_rank] = padded_num_tokens_per_ubatch + tensor[2][dp_rank] = 1 if should_ubatch else 0 + + + from vllm.distributed.parallel_state import get_dp_group + dist.all_reduce(tensor, group=get_dp_group().device_group) + + result: bool = bool(torch.all(tensor[2]== 1).item()) + if not result: + return result, None + + orig_num_tokens_tensor = tensor[0, :] + padded_num_tokens_tensor = tensor[1, :] + + orig_min_num_tokens = orig_num_tokens_tensor.min().item() + padded_max_num_tokens = padded_num_tokens_tensor.max().item() + if padded_max_num_tokens >= 2 * orig_min_num_tokens: + logger.debug(f"Aborting ubatching {orig_min_num_tokens} {padded_max_num_tokens}") + return False, None + return result, padded_num_tokens_tensor + @staticmethod def make( parallel_config: ParallelConfig, @@ -199,6 +227,7 @@ def make( if num_tokens_across_dp_cpu is None: num_tokens_across_dp_cpu = DPMetadata.num_tokens_across_dp( batchsize, dp_size, dp_rank) + max_tokens_across_dp_cpu = torch.max(num_tokens_across_dp_cpu) return DPMetadata(max_tokens_across_dp_cpu, num_tokens_across_dp_cpu) diff --git a/vllm/model_executor/layers/fused_moe/deepep_ht_prepare_finalize.py b/vllm/model_executor/layers/fused_moe/deepep_ht_prepare_finalize.py index 9e9a9afc18a0..bf17268c8cc6 100644 --- a/vllm/model_executor/layers/fused_moe/deepep_ht_prepare_finalize.py +++ b/vllm/model_executor/layers/fused_moe/deepep_ht_prepare_finalize.py @@ -81,36 +81,65 @@ def _get_combine_config(self) -> Optional[deep_ep.Config]: return None return deep_ep.Buffer.get_combine_config(self.num_dispatchers_) - def _do_dispatch( + def _create_prepare_ops( self, - tokens: torch.Tensor, - token_scales: Optional[torch.Tensor], - rank_topk_ids: torch.Tensor, - rank_topk_weights: torch.Tensor, + a1: torch.Tensor, + topk_weights: torch.Tensor, + topk_ids: torch.Tensor, num_experts: int, - a1_scale: Optional[torch.Tensor], + expert_map: Optional[torch.Tensor], + apply_router_weight_on_input: bool, quant_config: FusedMoEQuantConfig, - ) -> Callable: + ) -> mk.PrepareResultType: + + # Apply router weights on input if requested (only supports topk=1) + if apply_router_weight_on_input: + topk = topk_ids.size(1) + assert topk == 1, ( + "apply_router_weight_on_input is only implemented for topk=1") + a1 = a1 * topk_weights.to(a1.dtype) + + # Quantize prior to dispatch for block-quantized path, otherwise defer + if quant_config.is_block_quantized: + a1q, a1q_scale = moe_kernel_quantize_input( + a1, + quant_config.a1_scale, + quant_dtype=quant_config.quant_dtype, + per_act_token_quant=quant_config.per_act_token_quant, + block_shape=quant_config.block_shape, + ) + if a1q_scale is not None and a1q_scale.numel() == 1: + a1q_scale = a1q_scale.view(1, 1) + a1_post_scale = None + else: + a1q = a1 + a1q_scale = None + a1_post_scale = quant_config.a1_scale - has_scales = token_scales is not None + # Inline dispatch (sync send+recv) + has_scales = a1q_scale is not None - # We yield before launching the dispatch kernel since the dispatch - # kernel will block the CPU so we want to queue up all the compute - # for the other ubatch before the dispatch kernel starts. - dbo_yield_and_switch_from_compute_to_comm() + ######################################################################## + yield # Pre-dispatch done + ######################################################################## (num_tokens_per_rank, num_tokens_per_rdma_rank, dispatch_expert_num_tokens, is_token_in_rank, event) = self.buffer.get_dispatch_layout( - topk_idx=rank_topk_ids, + topk_idx=topk_ids, num_experts=num_experts, previous_event=None, async_finish=False, allocate_on_comm_stream=False) - token_data = tokens + token_data: Union[torch.Tensor, tuple[torch.Tensor, Optional[torch.Tensor]]] + token_data = a1q if has_scales: - token_data = (tokens, token_scales) + token_data = (a1q, a1q_scale) + + ######################################################################## + yield # Pre-dispatch done + ######################################################################## ( token_data, expert_topk_ids, expert_topk_weights, @@ -122,10 +151,8 @@ def _do_dispatch( num_tokens_per_rdma_rank=num_tokens_per_rdma_rank, is_token_in_rank=is_token_in_rank, num_tokens_per_expert=dispatch_expert_num_tokens, - topk_idx=rank_topk_ids, - topk_weights=rank_topk_weights, - # expert_alignment rounds the number of tokens per expert - # to this value. + topk_idx=topk_ids, + topk_weights=topk_weights, expert_alignment=1, config=self._get_dispatch_config(), previous_event=None, @@ -136,38 +163,19 @@ def _do_dispatch( a2a_idx = dbo_current_ubatch_id() self.handles[a2a_idx] = handle - dbo_switch_to_compute_sync() - - return lambda: self._receiver( - event, - has_scales, - token_data, - expert_topk_ids, - num_experts, - expert_num_tokens_per_expert_list, - expert_topk_weights, - a1_scale, - quant_config, - ) - - def _receiver( - self, - event: deep_ep.EventOverlap, - has_scales: bool, - token_data: Union[tuple[torch.Tensor, torch.Tensor], torch.Tensor], - expert_topk_ids: Optional[torch.Tensor], - num_experts: int, - expert_num_tokens_per_expert_list: list[int], - expert_topk_weights: Optional[torch.Tensor], - a1_scale: Optional[torch.Tensor], - quant_config: FusedMoEQuantConfig, - ) -> mk.PrepareResultType: + ######################################################################## + yield # Dispatch send+recv done (sync) + ######################################################################## + if event.event is not None: event.current_stream_wait() + # Unpack token data if has_scales: + assert isinstance(token_data, tuple) expert_x, expert_x_scale = token_data else: + assert isinstance(token_data, torch.Tensor) expert_x, expert_x_scale = token_data, None # The existing MOE kernels assume that all entries of topk_ids are @@ -203,61 +211,16 @@ def _receiver( if expert_x.numel() != 0: expert_x, expert_x_scale = moe_kernel_quantize_input( expert_x, - a1_scale, + quant_config.a1_scale, quant_dtype=quant_config.quant_dtype, per_act_token_quant=False, block_shape=quant_config.block_shape) + return (expert_x, expert_x_scale, expert_tokens_meta, expert_topk_ids, expert_topk_weights) - def supports_async(self) -> bool: - return True - - def prepare_async( - self, - a1: torch.Tensor, - topk_weights: torch.Tensor, - topk_ids: torch.Tensor, - num_experts: int, - expert_map: Optional[torch.Tensor], - apply_router_weight_on_input: bool, - quant_config: FusedMoEQuantConfig, - ) -> mk.ReceiverType: - - if apply_router_weight_on_input: - topk = topk_ids.size(1) - # TODO: this only works for topK=1, will need to update for topK>1 - assert topk == 1, ( - "apply_router_weight_on_input is only implemented for topk=1") - a1 = a1 * topk_weights.to(a1.dtype) - - if quant_config.is_block_quantized: - # Quant and Dispatch - a1q, a1q_scale = moe_kernel_quantize_input( - a1, - quant_config.a1_scale, - quant_dtype=quant_config.quant_dtype, - per_act_token_quant=quant_config.per_act_token_quant, - block_shape=quant_config.block_shape, - ) - if a1q_scale is not None and a1q_scale.numel() == 1: - a1q_scale = a1q_scale.view(1, 1) - a1_post_scale = None - else: - a1q = a1 - a1q_scale = None - a1_post_scale = quant_config.a1_scale - - return self._do_dispatch(tokens=a1q, - token_scales=a1q_scale, - rank_topk_ids=topk_ids, - rank_topk_weights=topk_weights, - num_experts=num_experts, - a1_scale=a1_post_scale, - quant_config=quant_config) - - def prepare( + def create_prepare_ops( self, a1: torch.Tensor, topk_weights: torch.Tensor, @@ -266,13 +229,14 @@ def prepare( expert_map: Optional[torch.Tensor], apply_router_weight_on_input: bool, quant_config: FusedMoEQuantConfig, - ) -> mk.PrepareResultType: - receiver = self.prepare_async(a1, topk_weights, topk_ids, num_experts, - expert_map, apply_router_weight_on_input, - quant_config) - return receiver() - - def _finalize( + ) -> mk.SyncPrepareOps: + return mk.SyncPrepareOps.from_generator( + self._create_prepare_ops(a1, topk_weights, + topk_ids, num_experts, expert_map, + apply_router_weight_on_input, + quant_config)) + + def _create_finalize_ops( self, output: torch.Tensor, fused_expert_output: torch.Tensor, @@ -299,7 +263,11 @@ def _finalize( topk_ids=topk_ids, apply_router_weight_on_input=apply_router_weight_on_input, ) - dbo_yield_and_switch_from_compute_to_comm() + + ######################################################################## + yield # Pre-combine done + ######################################################################## + combined_x, _, event = self.buffer.combine( x=fused_expert_output, handle=handle, @@ -308,46 +276,20 @@ def _finalize( previous_event=None, async_finish=do_async and not dbo_enabled(), allocate_on_comm_stream=False) + # Respect inplace outputs. + if event.event is None: + output.copy_(combined_x, non_blocking=True) - dbo_switch_to_compute() - - if do_async: - - def _receiver(): - if event.event is not None: - event.current_stream_wait() - dbo_switch_to_comm() - # Respect inplace outputs. - output.copy_(combined_x, non_blocking=True) + ######################################################################## + yield # Combine send-recv done + ######################################################################## - # TODO(lucas): refactor the modular kernel so this will be - # handled there - dbo_yield_and_switch_from_comm_to_compute() + if event.event is not None: + event.current_stream_wait() - return _receiver - else: - # TODO(lucas): support this case with the refactored modular kernel - assert not dbo_enabled() - # Respect inplace outputs. - output.copy_(combined_x, non_blocking=True) - return None + return None - def finalize_async( - self, - output: torch.Tensor, - fused_expert_output: torch.Tensor, - topk_weights: torch.Tensor, - topk_ids: torch.Tensor, - apply_router_weight_on_input: bool, - weight_and_reduce_impl: mk.TopKWeightAndReduce, - ) -> Callable: - receiver = self._finalize(output, fused_expert_output, topk_weights, - topk_ids, apply_router_weight_on_input, - weight_and_reduce_impl, True) - assert receiver is not None - return receiver - - def finalize( + def create_finalize_ops( self, output: torch.Tensor, fused_expert_output: torch.Tensor, @@ -355,7 +297,9 @@ def finalize( topk_ids: torch.Tensor, apply_router_weight_on_input: bool, weight_and_reduce_impl: mk.TopKWeightAndReduce, - ) -> None: - self._finalize(output, fused_expert_output, topk_weights, topk_ids, - apply_router_weight_on_input, weight_and_reduce_impl, - False) + ) -> mk.SyncFinalizeOps: + return mk.SyncFinalizeOps.from_generator( + self._create_finalize_ops(output, fused_expert_output, + topk_weights, topk_ids, + apply_router_weight_on_input, + weight_and_reduce_impl)) diff --git a/vllm/model_executor/layers/fused_moe/deepep_ll_prepare_finalize.py b/vllm/model_executor/layers/fused_moe/deepep_ll_prepare_finalize.py index a9554291db69..2bcb33a3001b 100644 --- a/vllm/model_executor/layers/fused_moe/deepep_ll_prepare_finalize.py +++ b/vllm/model_executor/layers/fused_moe/deepep_ll_prepare_finalize.py @@ -1,6 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -from typing import Callable, Optional, Union +from typing import Optional, Union import deep_ep import torch @@ -11,8 +11,7 @@ TopKWeightAndReduceDelegate) from vllm.model_executor.layers.fused_moe.utils import ( moe_kernel_quantize_input, normalize_batched_scales_shape) -from vllm.v1.worker.ubatching import (dbo_current_ubatch_id, dbo_enabled, - dbo_maybe_run_recv_hook) +from vllm.v1.worker.ubatching import dbo_current_ubatch_id # DeepEP kernels quantize dispatch inputs in 128 element chunks. DEEPEP_QUANT_BLOCK_SIZE = 128 @@ -112,7 +111,7 @@ def _do_quant( def supports_async(self) -> bool: return True - def prepare_async( + def _create_prepare_ops( self, a1: torch.Tensor, topk_weights: torch.Tensor, @@ -121,12 +120,9 @@ def prepare_async( expert_map: Optional[torch.Tensor], apply_router_weight_on_input: bool, quant_config: FusedMoEQuantConfig, - ) -> tuple[Callable, mk.ReceiverType]: + ): hidden_size = a1.size(1) - assert hidden_size in self.SUPPORTED_HIDDEN_SIZES, \ - (f"Hidden Size {hidden_size} not in supported list of hidden sizes" - f"{self.SUPPORTED_HIDDEN_SIZES}") a2a_idx = dbo_current_ubatch_id() @@ -148,8 +144,12 @@ def prepare_async( "apply_router_weight_on_input is only implemented for topk=1") a1 = a1 * topk_weights.to(a1.dtype) + ######################################################################## + yield # Pre-dispatch done + ######################################################################## + # Dispatch - expert_x, expert_num_tokens, handle, _, hook= \ + _expert_x, expert_num_tokens, handle, _, recv_hook= \ self.buffer.low_latency_dispatch(a1, topk_ids, self.max_tokens_per_rank, @@ -159,20 +159,17 @@ def prepare_async( return_recv_hook=True) self.handles[a2a_idx] = handle - return ( - hook, - lambda: self._receiver(expert_x, expert_num_tokens, quant_config. - a1_scale, a1.dtype, quant_config)) + ######################################################################## + yield # Dispatch send done + ######################################################################## - def _receiver( - self, - expert_x: Union[torch.Tensor, tuple[torch.Tensor, torch.Tensor]], - expert_num_tokens: torch.Tensor, - a1_scale: Optional[torch.Tensor], - a1_dtype: torch.dtype, - quant_config: FusedMoEQuantConfig, - ) -> mk.PrepareResultType: - expert_x, expert_x_scale = self._do_quant(expert_x, a1_dtype, + recv_hook() + + ######################################################################## + yield # Dispatch recv done + ######################################################################## + + expert_x, expert_x_scale = self._do_quant(_expert_x, a1.dtype, quant_config) expert_tokens_meta = mk.ExpertTokensMetadata( @@ -180,7 +177,7 @@ def _receiver( return expert_x, expert_x_scale, expert_tokens_meta, None, None - def prepare( + def create_prepare_ops( self, a1: torch.Tensor, topk_weights: torch.Tensor, @@ -189,15 +186,13 @@ def prepare( expert_map: Optional[torch.Tensor], apply_router_weight_on_input: bool, quant_config: FusedMoEQuantConfig, - ) -> mk.PrepareResultType: - hook, receiver = self.prepare_async(a1, topk_weights, topk_ids, - num_experts, expert_map, - apply_router_weight_on_input, - quant_config) - hook() - return receiver() - - def _finalize( + ) -> mk.AsyncPrepareOps: + return mk.AsyncPrepareOps.from_generator( + self._create_prepare_ops(a1, topk_weights, topk_ids, num_experts, + expert_map, apply_router_weight_on_input, + quant_config)) + + def _create_finalize_ops( self, output: torch.Tensor, fused_expert_output: torch.Tensor, @@ -205,14 +200,12 @@ def _finalize( topk_ids: torch.Tensor, apply_router_weight_on_input: bool, weight_and_reduce_impl: mk.TopKWeightAndReduce, - do_async: bool, - ) -> tuple[Callable, Callable]: + ): assert isinstance( weight_and_reduce_impl, TopKWeightAndReduceDelegate ), ("Weight application and reduction happens in the combine kernel.") a2a_idx = dbo_current_ubatch_id() - do_recv_hook = dbo_enabled() or do_async handle = self.handles[a2a_idx] assert handle is not None @@ -221,40 +214,34 @@ def _finalize( # weights have already been applied. combine_topk_weights = torch.ones_like(topk_weights) - # TODO (varun) : Enable zero copy mode - dbo_maybe_run_recv_hook() + ######################################################################## + yield # Pre-combine done + ######################################################################## + _, _, recv_hook = self.buffer.low_latency_combine( fused_expert_output, + topk_weights, topk_ids, combine_topk_weights, handle, async_finish=False, zero_copy=False, - return_recv_hook=do_recv_hook, + return_recv_hook=True, out=output) - return recv_hook, lambda: None + ######################################################################## + yield # Combine send done + ######################################################################## - def finalize_async( - self, - output: torch.Tensor, - fused_expert_output: torch.Tensor, - topk_weights: torch.Tensor, - topk_ids: torch.Tensor, - apply_router_weight_on_input: bool, - weight_and_reduce_impl: mk.TopKWeightAndReduce, - ) -> tuple[Callable, Callable]: - return self._finalize( - output, - fused_expert_output, - topk_weights, - topk_ids, - apply_router_weight_on_input, - weight_and_reduce_impl, - do_async=True, - ) + recv_hook() + + ######################################################################## + yield # Combine recv done + ######################################################################## + + return None - def finalize( + def create_finalize_ops( self, output: torch.Tensor, fused_expert_output: torch.Tensor, @@ -262,13 +249,9 @@ def finalize( topk_ids: torch.Tensor, apply_router_weight_on_input: bool, weight_and_reduce_impl: mk.TopKWeightAndReduce, - ) -> None: - self._finalize( - output, - fused_expert_output, - topk_weights, - topk_ids, - apply_router_weight_on_input, - weight_and_reduce_impl, - do_async=False, - ) + ) -> mk.AsyncFinalizeOps: + return mk.AsyncFinalizeOps.from_generator( + self._create_finalize_ops(output, fused_expert_output, + topk_weights, topk_ids, + apply_router_weight_on_input, + weight_and_reduce_impl)) diff --git a/vllm/model_executor/layers/fused_moe/modular_kernel.py b/vllm/model_executor/layers/fused_moe/modular_kernel.py index b6afc8651e36..9e45677ef663 100644 --- a/vllm/model_executor/layers/fused_moe/modular_kernel.py +++ b/vllm/model_executor/layers/fused_moe/modular_kernel.py @@ -1,10 +1,11 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project from abc import ABC, abstractmethod +from collections.abc import Generator from dataclasses import dataclass from enum import Enum from math import prod -from typing import Callable, Optional, Union, final +from typing import Callable, Generic, Optional, TypeVar, Union, final import torch @@ -13,9 +14,12 @@ from vllm.model_executor.layers.fused_moe.utils import ( # yapf: disable _resize_cache, count_expert_num_tokens) from vllm.utils import cdiv -from vllm.v1.worker.ubatching import (dbo_current_ubatch_id, dbo_enabled, - dbo_maybe_run_recv_hook, - dbo_register_recv_hook, dbo_yield) +from vllm.v1.worker.ubatching import ( + Schedule, dbo_current_ubatch_id, dbo_maybe_run_recv_hook, + dbo_register_recv_hook, dbo_switch_to_comm, dbo_switch_to_compute, + dbo_switch_to_compute_sync, dbo_yield, + dbo_yield_and_switch_from_comm_to_compute, + dbo_yield_and_switch_from_compute_to_comm) # # This file defines a set of base classes used to make MoE kernels more modular. @@ -165,6 +169,121 @@ def apply(self, output: Optional[torch.Tensor], ] ReceiverType = Callable[[], PrepareResultType] +_R = TypeVar('_R') + + +# +# Prepare and Finalize Op Chains +# +# The prepare and finalize functions are broken down into a chain of sequential +# operations/steps. +# +class _PhasedGen(Generic[_R]): + """ + Enforce an exact number of yields (phases), then a final return. + + Contract: + - The generator must yield exactly `expected_yields` times. + - The next advance must StopIteration with a return value (may be None). + - Early StopIteration or extra yields raise RuntimeError. + - Duplicate step/finish after completion raises RuntimeError. + """ + __slots__ = ("_gen", "_expected", "_steps", "_done", "_ret") + + def __init__(self, gen: Generator[None, None, _R], expected_yields: int): + self._gen = gen + self._expected = expected_yields + self._steps = 0 + self._done = False + self._ret: Optional[_R] = None + + def step(self, label: str) -> None: + if self._done: + raise RuntimeError( + f"Generator already finished; unexpected '{label}'.") + if self._steps >= self._expected: + raise RuntimeError( + f"Too many steps: called '{label}' after {self._expected} " + "phases; expected to finish instead.") + try: + next(self._gen) + except StopIteration as exc: + raise RuntimeError( + f"Generator ended early during '{label}' " + f"(completed {self._steps}/{self._expected} phases).") from exc + self._steps += 1 + + def finish(self, label: str) -> _R: + if self._done: + raise RuntimeError( + f"Generator already finished; duplicate '{label}'.") + if self._steps != self._expected: + raise RuntimeError( + f"Cannot finish at '{label}': only {self._steps}/" + f"{self._expected} phases completed.") + try: + next(self._gen) + except StopIteration as e: + self._done = True + self._ret = e.value # may be None + return self._ret # type: ignore[return-value] + else: + raise RuntimeError( + f"Generator yielded more than expected ({self._expected}); " + f"should have finished at '{label}'.") + + +@dataclass +class AsyncOps(Generic[_R]): + """ + 3-phase async: + 1) prepare() + 2) send() + 3) recv() + 4) finish() -> R + """ + prepare: Callable[[], None] + send: Callable[[], None] + recv: Callable[[], None] + finish: Callable[[], _R] + + @classmethod + def from_generator(cls, gen: Generator[None, None, _R]) -> 'AsyncOps[_R]': + ph = _PhasedGen[_R](gen, expected_yields=3) + return cls( + prepare=lambda: ph.step("prepare"), + send=lambda: ph.step("send"), + recv=lambda: ph.step("recv"), + finish=lambda: ph.finish("finish"), + ) + + +@dataclass +class SyncOps(Generic[_R]): + """ + 2-phase sync: + 1) prepare() + 2) send_recv() + 3) finish() -> R + """ + prepare: Callable[[], None] + send_recv: Callable[[], None] + finish: Callable[[], _R] + + @classmethod + def from_generator(cls, gen: Generator[None, None, _R]) -> 'SyncOps[_R]': + ph = _PhasedGen[_R](gen, expected_yields=2) + return cls( + prepare=lambda: ph.step("prepare"), + send_recv=lambda: ph.step("send_recv"), + finish=lambda: ph.finish("finish"), + ) + + +AsyncPrepareOps = AsyncOps[PrepareResultType] +SyncPrepareOps = SyncOps[PrepareResultType] +AsyncFinalizeOps = AsyncOps[None] +SyncFinalizeOps = SyncOps[None] # TODO: pass FusedMoEParallelConfig in as ctor parameter? @@ -175,7 +294,7 @@ class FusedMoEPrepareAndFinalize(ABC): """ @abstractmethod - def prepare( + def create_prepare_ops( self, a1: torch.Tensor, topk_weights: torch.Tensor, @@ -184,7 +303,7 @@ def prepare( expert_map: Optional[torch.Tensor], apply_router_weight_on_input: bool, quant_config: FusedMoEQuantConfig, - ) -> PrepareResultType: + ) -> Union[SyncPrepareOps, AsyncPrepareOps]: """ Perform any quantization (and/or) dispatching needed for this kernel. - a1: The (unquantized) input to the MoE layer. @@ -215,56 +334,8 @@ def supports_async(self) -> bool: """ return False - def prepare_async( - self, - a1: torch.Tensor, - topk_weights: torch.Tensor, - topk_ids: torch.Tensor, - num_experts: int, - expert_map: Optional[torch.Tensor], - apply_router_weight_on_input: bool, - quant_config: FusedMoEQuantConfig, - ) -> Union[tuple[Callable, ReceiverType], ReceiverType]: - """ - Perform any quantization (and/or) dispatching needed for this kernel - but do not wait for results from other workers. - - a1: The (unquantized) input to the MoE layer. - - a1_scale: Optional scales for a1 - - a2_scale: Optional scales for the second MoE gemm. Required to make - sure the quantization is consistent for both gemms. - - topk_ids: The topk ids. - - topk_weights: The topk weights. - - num_experts: The total number of experts in the global expert space. - - expert_map: A tensor mapping expert indices from the global expert - space to the local expert space of the expert parallel shard. - - apply_router_weight_on_input: When True, apply the weights to the - activations, before quantization + dispatching. - - Returns a callback or a hook callback pair that when invoked waits for - results from other workers and has the same return signature as - `prepare`, if a hook is returned this is more lightweight check that - the recv is complete without doing extra work (used by DBO, will be - refactored in the very near future) - - e.g. - - ret = obj.prepare_async(...) - - if isinstance(ret, tuple): - hook, receiver = ret - hook() - - if hook is not None: - a, a_scales, expert_meta, topk_ids, topk_weights = receiver() - - is equivalent to: - - a, a_scales, expert_meta, topk_ids, topk_weights = obj.prepare(...) - """ - raise NotImplementedError - @abstractmethod - def finalize( + def create_finalize_ops( self, output: torch.Tensor, fused_expert_output: torch.Tensor, @@ -272,7 +343,7 @@ def finalize( topk_ids: torch.Tensor, apply_router_weight_on_input: bool, weight_and_reduce_impl: TopKWeightAndReduce, - ) -> None: + ) -> Union[SyncFinalizeOps, AsyncFinalizeOps]: """ Perform any combine plus apply weights and perform a reduction on the fused experts output. @@ -288,48 +359,6 @@ def finalize( """ raise NotImplementedError - def finalize_async( - self, - output: torch.Tensor, - fused_expert_output: torch.Tensor, - topk_weights: torch.Tensor, - topk_ids: torch.Tensor, - apply_router_weight_on_input: bool, - weight_and_reduce_impl: TopKWeightAndReduce, - ) -> Union[tuple[Callable, Callable], Callable]: - """ - Perform any combine plus apply weights and perform a reduction on the - fused experts output but do not wait for results from other workers. - - output: The output tensor, written in place. Must be (M, K) shape. - - fused_expert_output: The unweighted, unreduced output of the fused - experts, it will have (M, topk, K) shape. - - topk_weights: The weights to be applied to the fused_experts_output. - - topk_ids: The topk_ids. - - apply_router_weight_on_input: When False, apply the weights to - fused_expert_output. - - weight_and_reduce_impl: An optional TopKWeightAndReduce - implementation. - - Returns a callback or a hook callback pair that when invoked waits for - results from other workers and has the same return signature as - `finalize`, if a hook is returned this is more lightweight check that - the recv is complete without doing extra work (used by DBO, will be - refactored in the very near future) - - ret = obj.finalize_async(output, ...) - ... output not valid yet ... - if isinstance(ret, tuple): - hook, receiver = ret - hook() - receiver() - ... output valid here ... - - is equivalent to: - - obj.finalize(output, ...) - """ - raise NotImplementedError - @property @abstractmethod def activation_format(self) -> FusedMoEActivationFormat: @@ -620,6 +649,9 @@ class FusedMoEModularKernel(torch.nn.Module): layer due to any layer specific state that may be used by the component objects. """ + fused_out_buffer = SharedResizableBuffer() + workspace13_buffer = SharedResizableBuffer() + workspace2_buffer = SharedResizableBuffer() class SharedBuffers: @@ -771,6 +803,7 @@ def _maybe_chunk_fused_experts( (_, _, fused_out_shape, _) = self.fused_experts.workspace_shapes( a1, a1q, M, N, K, top_k, global_num_experts, local_num_experts, expert_tokens_meta) + ubatch_idx = dbo_current_ubatch_id() buffers = self.shared_buffers[ubatch_idx] fused_out = buffers.fused_out.get(fused_out_shape, @@ -905,53 +938,44 @@ def forward( if global_num_experts == -1: global_num_experts = local_num_experts - if not self.prepare_finalize.supports_async(): - # We shouldn't be running an a2a kernel that doesn't - # support async prepare/finalize - # TODO(lucas): enable in follow-up - assert not dbo_enabled() - - (a1q, a1q_scale, expert_tokens_meta, _expert_topk_ids, - _expert_topk_weights) = self.prepare_finalize.prepare( - a1, - topk_weights, - topk_ids, - global_num_experts, - expert_map, - apply_router_weight_on_input, - self.fused_experts.quant_config, - ) + shared_output: Optional[torch.Tensor] = None + + prepare_ops = self.prepare_finalize.create_prepare_ops( + a1, + topk_weights, + topk_ids, + global_num_experts, + expert_map, + apply_router_weight_on_input, + self.fused_experts.quant_config, + ) + + prepare_ops.prepare() + + if isinstance(prepare_ops, SyncOps): + # We yield before launching the dispatch kernel since the dispatch + # kernel will block the CPU so we want to queue up all the compute + # for the other ubatch before the dispatch kernel starts. + dbo_yield_and_switch_from_compute_to_comm() + prepare_ops.send_recv() + dbo_switch_to_compute_sync() else: + assert isinstance(prepare_ops, AsyncOps) + # Overlap shared expert compute with all2all dispatch. dbo_maybe_run_recv_hook() - prepare_ret = self.prepare_finalize.prepare_async( - a1, - topk_weights, - topk_ids, - global_num_experts, - expert_map, - apply_router_weight_on_input, - self.fused_experts.quant_config, - ) + prepare_ops.send() + + recv_done = dbo_register_recv_hook( + lambda: prepare_ops.recv(), + schedules=(Schedule.MLP_SHARED_OVERLAP, )) + dbo_yield(all_schedules=True) - # TODO(lucas): refactor this in the alternative schedules followup - # currently unpack if we have hook + receiver pair or just - # receiver (see finalize_async docstring) - hook, receiver = prepare_ret \ - if isinstance(prepare_ret, tuple) else (None, prepare_ret) - - if hook is not None: - if dbo_enabled(): - # If DBO is being used, register the hook with the ubatch - # context and call it in dbo_maybe_run_recv_hook instead of - # passing it to the receiver. - dbo_register_recv_hook(hook) - dbo_yield() - else: - hook() - - (a1q, a1q_scale, expert_tokens_meta, _expert_topk_ids, - _expert_topk_weights) = receiver() + if not recv_done: + prepare_ops.recv() + + (a1q, a1q_scale, expert_tokens_meta, _expert_topk_ids, + _expert_topk_weights) = prepare_ops.finish() # Maybe prepare gathered topk_ids and topk_weights from other EP ranks. topk_ids = topk_ids if _expert_topk_ids is None else _expert_topk_ids @@ -985,51 +1009,42 @@ def forward( apply_router_weight_on_input=apply_router_weight_on_input, ) - shared_output: Optional[torch.Tensor] = None + finalize_ops = self.prepare_finalize.create_finalize_ops( + output, + fused_out, + topk_weights, + topk_ids, + apply_router_weight_on_input, + self.fused_experts.finalize_weight_and_reduce_impl(), + ) - if not self.prepare_finalize.supports_async(): - assert not dbo_enabled() + if isinstance(finalize_ops, SyncOps): + finalize_ops.prepare() + dbo_yield_and_switch_from_compute_to_comm() + finalize_ops.send_recv() - self.prepare_finalize.finalize( - output, - fused_out, - topk_weights, - topk_ids, - apply_router_weight_on_input, - self.fused_experts.finalize_weight_and_reduce_impl(), - ) - if self.shared_experts is not None: + dbo_switch_to_compute() + if self.shared_experts is not None and shared_output is None: shared_output = self.shared_experts(a1) + dbo_switch_to_comm() + + dbo_yield_and_switch_from_comm_to_compute() + finalize_ops.finish() else: - finalize_ret = self.prepare_finalize.finalize_async( - output, - fused_out, - topk_weights, - topk_ids, - apply_router_weight_on_input, - self.fused_experts.finalize_weight_and_reduce_impl(), - ) + assert isinstance(finalize_ops, AsyncOps) + finalize_ops.prepare() + dbo_maybe_run_recv_hook() + finalize_ops.send() - if self.shared_experts is not None: + if self.shared_experts is not None and shared_output is None: shared_output = self.shared_experts(a1) - # TODO(lucas): refactor this in the alternative schedules followup - # currently unpack if we have hook + receiver pair or just - # receiver (see finalize_async docstring) - hook, receiver = finalize_ret \ - if isinstance(finalize_ret, tuple) else (None, finalize_ret) - - if hook is not None: - if dbo_enabled(): - # If DBO is being used, register the hook with the ubatch - # context and call it in dbo_maybe_run_recv_hook instead of - # passing it to the receiver. - dbo_register_recv_hook(hook) - dbo_yield() - else: - hook() - - receiver() + if dbo_register_recv_hook(lambda: finalize_ops.recv(), + all_schedules=True): + dbo_yield(all_schedules=True) + else: + finalize_ops.recv() + finalize_ops.finish() if self.shared_experts is None: return output diff --git a/vllm/model_executor/layers/fused_moe/pplx_prepare_finalize.py b/vllm/model_executor/layers/fused_moe/pplx_prepare_finalize.py index ddddd2a3b7a2..f6f72113ec60 100644 --- a/vllm/model_executor/layers/fused_moe/pplx_prepare_finalize.py +++ b/vllm/model_executor/layers/fused_moe/pplx_prepare_finalize.py @@ -13,6 +13,7 @@ from vllm.model_executor.layers.fused_moe.utils import ( _validate_scale_shape, moe_kernel_quantize_input) from vllm.utils import cdiv, round_up +from vllm.v1.worker.ubatching import dbo_current_ubatch_id logger = init_logger(__name__) @@ -92,7 +93,7 @@ def num_dispatchers(self) -> int: def supports_async(self) -> bool: return True - def prepare_async( + def _create_prepare_ops( self, a1: torch.Tensor, topk_weights: torch.Tensor, @@ -202,7 +203,11 @@ def prepare_async( # There's not much point setting this unless it is != indices.size(0) bound_m: Optional[torch.Tensor] = None - self.a2a.dispatch( + ######################################################################## + yield # Pre-dispatch done + ######################################################################## + + self.a2as[a2a_idx].dispatch( out_expert_num_tokens=expert_num_tokens, out_expert_x=expert_x, out_expert_x_scale=expert_x_scale, @@ -214,7 +219,11 @@ def prepare_async( do_recv=False, ) - hook = lambda: self.a2a.dispatch( + ######################################################################## + yield # Dispatch send done + ######################################################################## + + self.a2as[a2a_idx].dispatch( out_expert_num_tokens=expert_num_tokens, out_expert_x=expert_x, out_expert_x_scale=expert_x_scale, @@ -226,31 +235,21 @@ def prepare_async( do_recv=True, ) - return (hook, lambda: self._receiver( - expert_num_tokens, - expert_x, - expert_x_scale, - orig_a_scale_block_shape, - )) - - def _receiver( - self, - expert_num_tokens: torch.Tensor, - expert_x: torch.Tensor, - expert_x_scale: Optional[torch.Tensor], - orig_a_scale_block_shape: Optional[int], - ) -> mk.PrepareResultType: + ######################################################################## + yield # Dispatch recv done + ######################################################################## if expert_x_scale is not None: expert_x_scale = expert_x_scale[:, :, :orig_a_scale_block_shape] assert expert_x_scale.ndim == 3 expert_tokens_meta = mk.ExpertTokensMetadata( - expert_num_tokens=expert_num_tokens, expert_num_tokens_cpu=None) + expert_num_tokens=expert_num_tokens, + expert_num_tokens_cpu=None) return expert_x, expert_x_scale, expert_tokens_meta, None, None - def prepare( + def create_prepare_ops( self, a1: torch.Tensor, topk_weights: torch.Tensor, @@ -259,20 +258,19 @@ def prepare( expert_map: Optional[torch.Tensor], apply_router_weight_on_input: bool, quant_config: FusedMoEQuantConfig, - ) -> mk.PrepareResultType: - hook, receiver = self.prepare_async( - a1, - topk_weights, - topk_ids, - num_experts, - expert_map, - apply_router_weight_on_input, - quant_config, - ) - hook() - return receiver() - - def finalize_async( + ) -> mk.AsyncPrepareOps: + return mk.AsyncPrepareOps.from_generator( + self._create_prepare_ops( + a1, + topk_weights, + topk_ids, + num_experts, + expert_map, + apply_router_weight_on_input, + quant_config, + )) + + def _create_finalize_ops( self, output: torch.Tensor, fused_expert_output: torch.Tensor, @@ -302,26 +300,44 @@ def finalize_async( # Set weights to 1 if we did them in dispatch. This is hacky. if apply_router_weight_on_input: topk_weights = torch.ones_like(topk_weights) + + a2a_idx = dbo_current_ubatch_id() + + ######################################################################## + yield # Pre-combine done + ######################################################################## + + self.a2as[a2a_idx].combine( + out_tokens=output, + indices=topk_ids.view(dtype=torch.uint32), + weights=topk_weights, + expert_y=fused_expert_output, + bound_m=bound_m, + do_send=True, + do_recv=False, + ) - topk_ids_u32 = topk_ids.view(dtype=torch.uint32) - - self.a2a.combine(out_tokens=output, - indices=topk_ids_u32, - weights=topk_weights, - expert_y=fused_expert_output, - bound_m=bound_m, - do_send=True, - do_recv=False) - - return lambda: self.a2a.combine(out_tokens=output, - indices=topk_ids_u32, - weights=topk_weights, - expert_y=fused_expert_output, - bound_m=bound_m, - do_send=False, - do_recv=True) - - def finalize( + ######################################################################## + yield # Combine send done (no-op for pplx combine) + ######################################################################## + + self.a2as[a2a_idx].combine( + out_tokens=output, + indices=topk_ids.view(dtype=torch.uint32), + weights=topk_weights, + expert_y=fused_expert_output, + bound_m=bound_m, + do_send=False, + do_recv=True, + ) + + ######################################################################## + yield # Combine recv done + ######################################################################## + + return None + + def create_finalize_ops( self, output: torch.Tensor, fused_expert_output: torch.Tensor, @@ -329,13 +345,14 @@ def finalize( topk_ids: torch.Tensor, apply_router_weight_on_input: bool, weight_and_reduce_impl: mk.TopKWeightAndReduce, - ) -> None: - receiver = self.finalize_async( - output, - fused_expert_output, - topk_weights, - topk_ids, - apply_router_weight_on_input, - weight_and_reduce_impl, - ) - receiver() + ) -> mk.AsyncFinalizeOps: + return mk.AsyncFinalizeOps.from_generator( + self._create_finalize_ops( + output, + fused_expert_output, + topk_weights, + topk_ids, + apply_router_weight_on_input, + weight_and_reduce_impl, + )) + diff --git a/vllm/model_executor/models/deepseek_v2.py b/vllm/model_executor/models/deepseek_v2.py index 03c43654d68f..8a4e08063ba3 100644 --- a/vllm/model_executor/models/deepseek_v2.py +++ b/vllm/model_executor/models/deepseek_v2.py @@ -1181,8 +1181,10 @@ def forward( return hidden_states + class DeepseekV2ForCausalLM(nn.Module, SupportsPP, MixtureOfExperts, SupportsLoRA): + delayed_dbo_start = True packed_modules_mapping = { "gate_up_proj": ["gate_proj", "up_proj"], } diff --git a/vllm/v1/attention/backends/mla/common.py b/vllm/v1/attention/backends/mla/common.py index 561793b6a377..fcf901df8677 100755 --- a/vllm/v1/attention/backends/mla/common.py +++ b/vllm/v1/attention/backends/mla/common.py @@ -219,6 +219,7 @@ infer_global_hyperparameters, split_decodes_and_prefills) from vllm.v1.kv_cache_interface import AttentionSpec +from vllm.v1.worker.ubatching import Schedule, dbo_yield try: from vllm.vllm_flash_attn import flash_attn_varlen_func @@ -1693,14 +1694,11 @@ def forward( scale=layer._k_scale, ) + dbo_yield(schedules=(Schedule.ATTN_SHARED_OVERLAP,)) + if fp8_attention: kv_cache = kv_cache.view(current_platform.fp8_dtype()) - if has_prefill: - output[num_decode_tokens:] = self._forward_prefill( - prefill_q, prefill_k_c_normed, prefill_k_pe, kv_cache, - attn_metadata, layer._k_scale) - if has_decode: assert attn_metadata.decode is not None decode_q_nope, decode_q_pe = decode_q.split( diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index f4c28dc24d70..daa8c66aabd2 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -2779,12 +2779,19 @@ def load_model(self, eep_scale_up: bool = False) -> None: self.vllm_config, runtime_mode=CUDAGraphMode.FULL) elif self.parallel_config.enable_dbo: + delayed_start = getattr(self.model, "delayed_dbo_start", False) if self.compilation_config.cudagraph_mode.has_full_cudagraphs(): - self.model = UBatchWrapper(self.model, self.vllm_config, - CUDAGraphMode.FULL, self.device) + self.model = UBatchWrapper(self.model, + self.vllm_config, + CUDAGraphMode.FULL, + self.device, + delayed_start=delayed_start) else: - self.model = UBatchWrapper(self.model, self.vllm_config, - CUDAGraphMode.NONE, self.device) + self.model = UBatchWrapper(self.model, + self.vllm_config, + CUDAGraphMode.NONE, + self.device, + delayed_start=delayed_start) def reload_weights(self) -> None: assert getattr(self, "model", None) is not None, \ diff --git a/vllm/v1/worker/gpu_ubatch_wrapper.py b/vllm/v1/worker/gpu_ubatch_wrapper.py index 39be8c74102e..0cc802435aad 100644 --- a/vllm/v1/worker/gpu_ubatch_wrapper.py +++ b/vllm/v1/worker/gpu_ubatch_wrapper.py @@ -1,6 +1,8 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project +import importlib +import os import threading from dataclasses import dataclass from typing import Any, Callable, Optional @@ -86,7 +88,8 @@ def __exit__(self, exc_type, exc_value, traceback): class UBatchWrapper: def __init__(self, runnable: Callable, vllm_config: VllmConfig, - runtime_mode: CUDAGraphMode, device: torch.cuda.device): + runtime_mode: CUDAGraphMode, device: torch.cuda.device, + delayed_start: bool = False): self.runnable = runnable self.vllm_config = vllm_config self.compilation_config = vllm_config.compilation_config @@ -105,6 +108,7 @@ def __init__(self, runnable: Callable, vllm_config: VllmConfig, self.sm_control = self._create_sm_control_context(vllm_config) self.device = device + self.delayed_start = delayed_start @staticmethod def _create_sm_control_context(vllm_config: VllmConfig): @@ -123,11 +127,62 @@ def _create_sm_control_context(vllm_config: VllmConfig): if comm_sms > 0: set_comm_sms = lambda sms: all2all_manager.set_num_sms(sms) - # TODO(lucas): support other kernels besides DeepGEMM - set_compute_sms = lambda sms: None - if has_deep_gemm() and comm_sms > 0: - import deep_gemm as dg - set_compute_sms = lambda sms: dg.set_num_sms(sms) + set_compute_sms: Callable[[int], None] + compute_sm_setters: list[tuple[str, Callable[[int], None]]] = [] + registered_specs: set[str] = set() + + if comm_sms > 0: + + def _resolve_sm_setter(module_spec: str + ) -> Optional[Callable[[int], None]]: + module_name, _, attr_path = module_spec.partition(":") + try: + module = importlib.import_module(module_name) + except Exception: + return None + + target = module + if attr_path: + for attr in attr_path.split('.'): + target = getattr(target, attr, None) + if target is None: + return None + + setter = getattr(target, "set_num_sms", None) + return setter if callable(setter) else None + + def _register_sm_setter(module_spec: str) -> None: + module_spec = module_spec.strip() + if not module_spec or module_spec in registered_specs: + return + setter = _resolve_sm_setter(module_spec) + if setter is not None: + compute_sm_setters.append((module_spec, setter)) + registered_specs.add(module_spec) + + if has_deep_gemm(): + _register_sm_setter("deep_gemm") + + for default_spec in ("pplx_kernels", "flashinfer"): + _register_sm_setter(default_spec) + + extra_specs = os.getenv("VLLM_DBO_COMPUTE_SM_MODULES", "") + if extra_specs: + for spec in extra_specs.split(','): + _register_sm_setter(spec) + + if compute_sm_setters: + + def set_compute_sms(sms: int) -> None: + for module_spec, setter in compute_sm_setters: + try: + setter(sms) + except Exception: + logger.debug("Failed to set SM count via %s", + module_spec, + exc_info=True) + else: + set_compute_sms = lambda sms: None return SMControlContextManager(comm_sms=comm_sms, set_comm_sms=set_comm_sms, @@ -266,7 +321,8 @@ def _ubatch_thread(results, model, ubatch_metadata): def _make_ubatch_metadata(self, ubatch_slices, attn_metadata, input_ids, positions, inputs_embeds, intermediate_tensors, compute_stream, dp_metadata, batch_descriptor, - cudagraph_runtime_mode) -> list[UbatchMetadata]: + cudagraph_runtime_mode, + delayed_start: bool = False) -> list[UbatchMetadata]: # Create one forward context per ubatch forward_contexts = [] @@ -284,7 +340,8 @@ def _make_ubatch_metadata(self, ubatch_slices, attn_metadata, input_ids, comm_stream=self.comm_stream, compute_stream=compute_stream, forward_contexts=forward_contexts, - ready_barrier=self.ready_barrier) + ready_barrier=self.ready_barrier, + delayed_start=delayed_start) ubatch_metadata: list[UbatchMetadata] = [] for i, ubatch_slice in enumerate(ubatch_slices): diff --git a/vllm/v1/worker/ubatching.py b/vllm/v1/worker/ubatching.py index c26cb07123a5..421301a3d570 100644 --- a/vllm/v1/worker/ubatching.py +++ b/vllm/v1/worker/ubatching.py @@ -1,16 +1,46 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project import threading -from typing import Optional +from enum import Enum +from functools import lru_cache +from typing import Callable, Optional import torch +from torch.library import Library from vllm import forward_context from vllm.forward_context import ForwardContext from vllm.utils import current_stream -_THREAD_ID_TO_CONTEXT: dict = {} +_THREAD_ID_TO_CONTEXT: dict[int, int] = {} _CURRENT_CONTEXTS: list[Optional['UBatchContext']] = [None, None] +Schedules = tuple['Schedule', ...] + + +class Schedule(Enum): + # Schedule notation legend: + # S = Shared expert + # A0 = MLA qkv pro, + # A1 = Core attn + out proj + MoE gate + # D = Dispatch + # C = Combine + + # Comp: |-A0₀-A1₀-||-MLP₁-||-S₁-MLP₀-||-S₀-A0₁-A1₁-| + # Comm: |----D₁---||--D₀--||----C₁---||-----C₀-----| + # Order: D₁ send, A0₀, A1₀, D₁ recv, D₀ send, MLP₁, D₀ recv, + # C₁ send, S₁, MLP₀, C₁ recv, C₀ send, S₀, A0₁, A1₁, C₀ recv. + MLP_SHARED_OVERLAP = "mlp_shared_overlap" + + # Comp: |-S₀-A0₁-|-MLP₀-|-A1₁-||-S₁-A0₀-|-MLP₁-|-A1₀-| + # Comm: |---D₀---| |-C₀--||---D₁---| |-C₁--| + # Order: D₀ send, S₀, A0₁, D₀ recv, MLP₀, C₀ send, A1₁, C₀ recv, + # D₁ send, S₁, A0₀, D₁ recv, MLP₁, C₁ send, A1₀, C₁ recv. + ATTN_SHARED_OVERLAP = "attn_shared_overlap" + + +_SCHEDULE_WAIT_STAGES = { # Default is 1 + Schedule.ATTN_SHARED_OVERLAP: 2, +} class UBatchContext: @@ -28,7 +58,8 @@ def __init__(self, cpu_signal_event: threading.Event, gpu_comm_done_event: torch.cuda.Event, gpu_compute_done_event: torch.cuda.Event, - schedule: str = "default"): + started: bool = True, + schedule: Schedule = Schedule.MLP_SHARED_OVERLAP): self.id = id self.comm_stream = comm_stream self.compute_stream = compute_stream @@ -40,7 +71,8 @@ def __init__(self, self.gpu_comm_done_event = gpu_comm_done_event self.gpu_compute_done_event = gpu_compute_done_event self.schedule = schedule - self.recv_hook = None + self.started = started + self.recv_hook: Optional[Callable[[], None]] = None def __enter__(self): global _CURRENT_CONTEXTS, _THREAD_ID_TO_CONTEXT @@ -50,27 +82,41 @@ def __enter__(self): self.cpu_wait_event.wait() self.cpu_wait_event.clear() + if self.id > 0: + wait_stages = _SCHEDULE_WAIT_STAGES.get(self.schedule, 1) + for _ in range(wait_stages - 1): + self._cpu_yield(check_context=False) + self._restore_context() - # Assume we want to start on the compute stream - self.update_stream(self.compute_stream) + # Assume we start on the compute stream + assert current_stream() == self.compute_stream return self def __exit__(self, exc_type, exc_val, exc_tb): global _CURRENT_CONTEXTS, _THREAD_ID_TO_CONTEXT + if self.id == 0: + # Keep advancing the next micro-batch + wait_stages = _SCHEDULE_WAIT_STAGES.get(self.schedule, 1) + for _ in range(wait_stages - 1): + self.yield_() + # Cleanup and trailing recv hooks + self.maybe_run_recv_hook() + _CURRENT_CONTEXTS[self.id] = None del _THREAD_ID_TO_CONTEXT[threading.get_ident()] self.maybe_run_recv_hook() self.cpu_signal_event.set() self.cpu_wait_event.clear() + return False def _restore_context(self): forward_context._forward_context = self.forward_context + torch.cuda.set_stream(self.current_stream) def update_stream(self, stream): self.current_stream = stream - if current_stream() != self.current_stream: - torch.cuda.set_stream(self.current_stream) + torch.cuda.set_stream(self.current_stream) def _signal_comm_done(self): self.gpu_comm_done_event.record(self.comm_stream) @@ -84,11 +130,12 @@ def _wait_compute_done(self): def _wait_comm_done(self): self.compute_stream.wait_event(self.gpu_comm_done_event) - def _cpu_yield(self): + def _cpu_yield(self, check_context: bool = True): # It is critical for correctness that only one thread is running # at a time. These asserts just make sure that this is the only # thread running before waking the other one up and going to sleep - assert forward_context._forward_context == self.forward_context + if check_context: + assert forward_context._forward_context == self.forward_context assert current_stream() == self.current_stream assert not self.cpu_wait_event.is_set() @@ -150,13 +197,35 @@ def dbo_current_ubatch_id() -> int: return _THREAD_ID_TO_CONTEXT[threading.get_ident()] -def _register_ubatch_function(func): +def dbo_start(): + if len(_THREAD_ID_TO_CONTEXT) > 0: + ctx_idx = _THREAD_ID_TO_CONTEXT[threading.get_ident()] + ctx = _CURRENT_CONTEXTS[ctx_idx] + assert ctx is not None + ctx.started = True + + +def dbo_current_schedule() -> Optional[Schedule]: + if len(_THREAD_ID_TO_CONTEXT) == 0: + return None + ctx = _CURRENT_CONTEXTS[dbo_current_ubatch_id()] + assert ctx is not None + return ctx.schedule + + +def _register_ubatch_function(func: Callable[['UBatchContext'], None], + all_schedules_default: bool = False): - def wrapper(*args, **kwargs): + def wrapper(schedules: Schedules = (), + all_schedules: bool = all_schedules_default) -> None: if len(_THREAD_ID_TO_CONTEXT) > 0: ctx_idx = _THREAD_ID_TO_CONTEXT[threading.get_ident()] ctx = _CURRENT_CONTEXTS[ctx_idx] - func(ctx, *args, **kwargs) + assert ctx is not None + if not ctx.started: + return + if all_schedules or ctx.schedule in schedules: + func(ctx) return wrapper @@ -176,12 +245,36 @@ def wrapper(*args, **kwargs): dbo_switch_to_compute_sync = _register_ubatch_function( UBatchContext.switch_to_compute_sync) +# DBO start needs to be callable from inside the torch compile region so +# we register it as a custom op. +lib = Library("vllm_dbo", "DEF") +lib.define("start(Tensor! x) -> ()") # in-place, returns x + + +@torch.library.impl("vllm_dbo::start", "CompositeImplicitAutograd") +def _dbo_start_impl(x: torch.Tensor): + dbo_start() + return None + + +@lru_cache(maxsize=1) +def dbo_debug_annotate(): + return True + -def dbo_register_recv_hook(recv_hook): +def dbo_register_recv_hook(recv_hook: Callable[[], None], + schedules: Schedules = (), + all_schedules: bool = False) -> bool: if len(_THREAD_ID_TO_CONTEXT) > 0: ctx_idx = _THREAD_ID_TO_CONTEXT[threading.get_ident()] - next_ctx = _CURRENT_CONTEXTS[(ctx_idx + 1) % 2] - next_ctx.recv_hook = recv_hook + ctx = _CURRENT_CONTEXTS[ctx_idx] + assert ctx is not None + if all_schedules or ctx.schedule in schedules: + next_ctx = _CURRENT_CONTEXTS[(ctx_idx + 1) % 2] + assert next_ctx is not None + next_ctx.recv_hook = recv_hook + return True + return False def make_ubatch_contexts( @@ -190,7 +283,8 @@ def make_ubatch_contexts( comm_stream: torch.cuda.Stream, forward_contexts: list[ForwardContext], ready_barrier: threading.Barrier, - schedule: str = "default", + schedule: Schedule = Schedule.MLP_SHARED_OVERLAP, + delayed_start: bool = False, ) -> list[UBatchContext]: assert num_micro_batches == 2, "only been tested with 2 micro-batches" """ @@ -218,6 +312,7 @@ def make_ubatch_contexts( num_micro_batches], gpu_comm_done_event=gpu_comm_done_events[i], gpu_compute_done_event=gpu_compute_done_events[i], + started=not delayed_start, schedule=schedule) ctxs.append(ctx) diff --git a/vllm/v1/worker/utils.py b/vllm/v1/worker/utils.py index 3e0dbda59435..114ae37d46f0 100644 --- a/vllm/v1/worker/utils.py +++ b/vllm/v1/worker/utils.py @@ -159,6 +159,12 @@ def get_metadata_builder(self, assert len(self.metadata_builders) > ubatch_id return self.metadata_builders[ubatch_id] + def get_metadata_builder(self, ubatch_id: Optional[int] = None) -> AttentionMetadataBuilder: + if ubatch_id is None: + return self.metadata_builders[0] + assert len(self.metadata_builders) > ubatch_id + return self.metadata_builders[ubatch_id] + def sanity_check_mm_encoder_outputs( mm_embeddings: MultiModalEmbeddings, From fcd015c466afdaf0aae641e9d605c0302defcf08 Mon Sep 17 00:00:00 2001 From: Lucas Wilkinson Date: Tue, 30 Sep 2025 18:06:04 +0000 Subject: [PATCH 515/518] get deep ll to run Signed-off-by: Lucas Wilkinson --- .../layers/fused_moe/deepep_ll_prepare_finalize.py | 1 - vllm/v1/worker/ubatching.py | 11 ++++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/vllm/model_executor/layers/fused_moe/deepep_ll_prepare_finalize.py b/vllm/model_executor/layers/fused_moe/deepep_ll_prepare_finalize.py index 2bcb33a3001b..640e0d28559a 100644 --- a/vllm/model_executor/layers/fused_moe/deepep_ll_prepare_finalize.py +++ b/vllm/model_executor/layers/fused_moe/deepep_ll_prepare_finalize.py @@ -220,7 +220,6 @@ def _create_finalize_ops( _, _, recv_hook = self.buffer.low_latency_combine( fused_expert_output, - topk_weights, topk_ids, combine_topk_weights, handle, diff --git a/vllm/v1/worker/ubatching.py b/vllm/v1/worker/ubatching.py index 421301a3d570..6aa8ea8937f7 100644 --- a/vllm/v1/worker/ubatching.py +++ b/vllm/v1/worker/ubatching.py @@ -112,11 +112,11 @@ def __exit__(self, exc_type, exc_val, exc_tb): def _restore_context(self): forward_context._forward_context = self.forward_context - torch.cuda.set_stream(self.current_stream) def update_stream(self, stream): self.current_stream = stream - torch.cuda.set_stream(self.current_stream) + if current_stream() != self.current_stream: + torch.cuda.set_stream(self.current_stream) def _signal_comm_done(self): self.gpu_comm_done_event.record(self.comm_stream) @@ -271,9 +271,10 @@ def dbo_register_recv_hook(recv_hook: Callable[[], None], assert ctx is not None if all_schedules or ctx.schedule in schedules: next_ctx = _CURRENT_CONTEXTS[(ctx_idx + 1) % 2] - assert next_ctx is not None - next_ctx.recv_hook = recv_hook - return True + # Next context may have already exited + if next_ctx is not None: + next_ctx.recv_hook = recv_hook + return True return False From 6dc3492bcd4a5154251bbeec8ac404cf67ad7715 Mon Sep 17 00:00:00 2001 From: Lucas Wilkinson Date: Tue, 30 Sep 2025 18:10:43 +0000 Subject: [PATCH 516/518] fix Signed-off-by: Lucas Wilkinson --- .../model_executor/layers/fused_moe/modular_kernel.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/vllm/model_executor/layers/fused_moe/modular_kernel.py b/vllm/model_executor/layers/fused_moe/modular_kernel.py index 9e45677ef663..9ece45d87948 100644 --- a/vllm/model_executor/layers/fused_moe/modular_kernel.py +++ b/vllm/model_executor/layers/fused_moe/modular_kernel.py @@ -966,12 +966,11 @@ def forward( dbo_maybe_run_recv_hook() prepare_ops.send() - recv_done = dbo_register_recv_hook( - lambda: prepare_ops.recv(), - schedules=(Schedule.MLP_SHARED_OVERLAP, )) - dbo_yield(all_schedules=True) - - if not recv_done: + if dbo_register_recv_hook( + lambda: prepare_ops.recv(), + schedules=(Schedule.MLP_SHARED_OVERLAP, )): + dbo_yield(all_schedules=True) + else: prepare_ops.recv() (a1q, a1q_scale, expert_tokens_meta, _expert_topk_ids, From 2d7ca92c129d1661daad27915de9500774c5dfeb Mon Sep 17 00:00:00 2001 From: Lucas Wilkinson Date: Tue, 30 Sep 2025 18:15:41 +0000 Subject: [PATCH 517/518] cleanup Signed-off-by: Lucas Wilkinson --- examples/offline_inference/data_parallel.py | 2 +- vllm/forward_context.py | 29 ------- vllm/v1/attention/backends/mla/common.py | 7 +- vllm/v1/worker/gpu_ubatch_wrapper.py | 89 ++++++--------------- vllm/v1/worker/utils.py | 6 -- 5 files changed, 30 insertions(+), 103 deletions(-) diff --git a/examples/offline_inference/data_parallel.py b/examples/offline_inference/data_parallel.py index 65e3573e9ff1..0076d4d30ee8 100644 --- a/examples/offline_inference/data_parallel.py +++ b/examples/offline_inference/data_parallel.py @@ -259,4 +259,4 @@ def start(rank): elif proc.exitcode: exit_code = proc.exitcode - exit(exit_code) \ No newline at end of file + exit(exit_code) diff --git a/vllm/forward_context.py b/vllm/forward_context.py index 6a7d8c769225..09defade00dc 100644 --- a/vllm/forward_context.py +++ b/vllm/forward_context.py @@ -171,34 +171,6 @@ def should_ubatch_across_dp( return False, None return result, padded_num_tokens_tensor.cpu() - @staticmethod - def should_ubatch_across_dp(should_ubatch: bool, orig_num_tokens_per_ubatch: int, - padded_num_tokens_per_ubatch: int, dp_size: int, - dp_rank: int) -> tuple[bool, Optional[torch.Tensor]]: - - tensor = torch.zeros(3, dp_size, device="cuda", dtype=torch.int32) - tensor[0][dp_rank] = orig_num_tokens_per_ubatch - tensor[1][dp_rank] = padded_num_tokens_per_ubatch - tensor[2][dp_rank] = 1 if should_ubatch else 0 - - - from vllm.distributed.parallel_state import get_dp_group - dist.all_reduce(tensor, group=get_dp_group().device_group) - - result: bool = bool(torch.all(tensor[2]== 1).item()) - if not result: - return result, None - - orig_num_tokens_tensor = tensor[0, :] - padded_num_tokens_tensor = tensor[1, :] - - orig_min_num_tokens = orig_num_tokens_tensor.min().item() - padded_max_num_tokens = padded_num_tokens_tensor.max().item() - if padded_max_num_tokens >= 2 * orig_min_num_tokens: - logger.debug(f"Aborting ubatching {orig_min_num_tokens} {padded_max_num_tokens}") - return False, None - return result, padded_num_tokens_tensor - @staticmethod def make( parallel_config: ParallelConfig, @@ -227,7 +199,6 @@ def make( if num_tokens_across_dp_cpu is None: num_tokens_across_dp_cpu = DPMetadata.num_tokens_across_dp( batchsize, dp_size, dp_rank) - max_tokens_across_dp_cpu = torch.max(num_tokens_across_dp_cpu) return DPMetadata(max_tokens_across_dp_cpu, num_tokens_across_dp_cpu) diff --git a/vllm/v1/attention/backends/mla/common.py b/vllm/v1/attention/backends/mla/common.py index fcf901df8677..ce5dc20f7023 100755 --- a/vllm/v1/attention/backends/mla/common.py +++ b/vllm/v1/attention/backends/mla/common.py @@ -1694,11 +1694,16 @@ def forward( scale=layer._k_scale, ) - dbo_yield(schedules=(Schedule.ATTN_SHARED_OVERLAP,)) + dbo_yield(schedules=(Schedule.ATTN_SHARED_OVERLAP, )) if fp8_attention: kv_cache = kv_cache.view(current_platform.fp8_dtype()) + if has_prefill: + output[num_decode_tokens:] = self._forward_prefill( + prefill_q, prefill_k_c_normed, prefill_k_pe, kv_cache, + attn_metadata, layer._k_scale) + if has_decode: assert attn_metadata.decode is not None decode_q_nope, decode_q_pe = decode_q.split( diff --git a/vllm/v1/worker/gpu_ubatch_wrapper.py b/vllm/v1/worker/gpu_ubatch_wrapper.py index 0cc802435aad..0a3338b0fcbb 100644 --- a/vllm/v1/worker/gpu_ubatch_wrapper.py +++ b/vllm/v1/worker/gpu_ubatch_wrapper.py @@ -1,8 +1,6 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project -import importlib -import os import threading from dataclasses import dataclass from typing import Any, Callable, Optional @@ -87,8 +85,11 @@ def __exit__(self, exc_type, exc_value, traceback): class UBatchWrapper: - def __init__(self, runnable: Callable, vllm_config: VllmConfig, - runtime_mode: CUDAGraphMode, device: torch.cuda.device, + def __init__(self, + runnable: Callable, + vllm_config: VllmConfig, + runtime_mode: CUDAGraphMode, + device: torch.cuda.device, delayed_start: bool = False): self.runnable = runnable self.vllm_config = vllm_config @@ -126,63 +127,11 @@ def _create_sm_control_context(vllm_config: VllmConfig): if comm_sms > 0: set_comm_sms = lambda sms: all2all_manager.set_num_sms(sms) - - set_compute_sms: Callable[[int], None] - compute_sm_setters: list[tuple[str, Callable[[int], None]]] = [] - registered_specs: set[str] = set() - - if comm_sms > 0: - - def _resolve_sm_setter(module_spec: str - ) -> Optional[Callable[[int], None]]: - module_name, _, attr_path = module_spec.partition(":") - try: - module = importlib.import_module(module_name) - except Exception: - return None - - target = module - if attr_path: - for attr in attr_path.split('.'): - target = getattr(target, attr, None) - if target is None: - return None - - setter = getattr(target, "set_num_sms", None) - return setter if callable(setter) else None - - def _register_sm_setter(module_spec: str) -> None: - module_spec = module_spec.strip() - if not module_spec or module_spec in registered_specs: - return - setter = _resolve_sm_setter(module_spec) - if setter is not None: - compute_sm_setters.append((module_spec, setter)) - registered_specs.add(module_spec) - - if has_deep_gemm(): - _register_sm_setter("deep_gemm") - - for default_spec in ("pplx_kernels", "flashinfer"): - _register_sm_setter(default_spec) - - extra_specs = os.getenv("VLLM_DBO_COMPUTE_SM_MODULES", "") - if extra_specs: - for spec in extra_specs.split(','): - _register_sm_setter(spec) - - if compute_sm_setters: - - def set_compute_sms(sms: int) -> None: - for module_spec, setter in compute_sm_setters: - try: - setter(sms) - except Exception: - logger.debug("Failed to set SM count via %s", - module_spec, - exc_info=True) - else: - set_compute_sms = lambda sms: None + # TODO(lucas): support other kernels besides DeepGEMM + set_compute_sms = lambda sms: None + if has_deep_gemm() and comm_sms > 0: + import deep_gemm as dg + set_compute_sms = lambda sms: dg.set_num_sms(sms) return SMControlContextManager(comm_sms=comm_sms, set_comm_sms=set_comm_sms, @@ -318,11 +267,19 @@ def _ubatch_thread(results, model, ubatch_metadata): result = torch.cat(sorted_results, dim=0) return result - def _make_ubatch_metadata(self, ubatch_slices, attn_metadata, input_ids, - positions, inputs_embeds, intermediate_tensors, - compute_stream, dp_metadata, batch_descriptor, - cudagraph_runtime_mode, - delayed_start: bool = False) -> list[UbatchMetadata]: + def _make_ubatch_metadata( + self, + ubatch_slices, + attn_metadata, + input_ids, + positions, + inputs_embeds, + intermediate_tensors, + compute_stream, + dp_metadata, + batch_descriptor, + cudagraph_runtime_mode, + delayed_start: bool = False) -> list[UbatchMetadata]: # Create one forward context per ubatch forward_contexts = [] diff --git a/vllm/v1/worker/utils.py b/vllm/v1/worker/utils.py index 114ae37d46f0..3e0dbda59435 100644 --- a/vllm/v1/worker/utils.py +++ b/vllm/v1/worker/utils.py @@ -159,12 +159,6 @@ def get_metadata_builder(self, assert len(self.metadata_builders) > ubatch_id return self.metadata_builders[ubatch_id] - def get_metadata_builder(self, ubatch_id: Optional[int] = None) -> AttentionMetadataBuilder: - if ubatch_id is None: - return self.metadata_builders[0] - assert len(self.metadata_builders) > ubatch_id - return self.metadata_builders[ubatch_id] - def sanity_check_mm_encoder_outputs( mm_embeddings: MultiModalEmbeddings, From 4effe25c5d9120fcda7e2ea3ccd2454f7f4e57a9 Mon Sep 17 00:00:00 2001 From: Lucas Wilkinson Date: Tue, 30 Sep 2025 18:27:37 +0000 Subject: [PATCH 518/518] cleanup Signed-off-by: Lucas Wilkinson --- vllm/model_executor/layers/fused_moe/modular_kernel.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vllm/model_executor/layers/fused_moe/modular_kernel.py b/vllm/model_executor/layers/fused_moe/modular_kernel.py index 9ece45d87948..13f8d8f83124 100644 --- a/vllm/model_executor/layers/fused_moe/modular_kernel.py +++ b/vllm/model_executor/layers/fused_moe/modular_kernel.py @@ -1017,8 +1017,9 @@ def forward( self.fused_experts.finalize_weight_and_reduce_impl(), ) + finalize_ops.prepare() + if isinstance(finalize_ops, SyncOps): - finalize_ops.prepare() dbo_yield_and_switch_from_compute_to_comm() finalize_ops.send_recv() @@ -1031,7 +1032,6 @@ def forward( finalize_ops.finish() else: assert isinstance(finalize_ops, AsyncOps) - finalize_ops.prepare() dbo_maybe_run_recv_hook() finalize_ops.send()

-fkAn zSQNJZYHu~O|CwcnO7#oK$7XjCErTqOnA8x}u-ZIUVAi0!&Pz87^t}h-6YkyBfQaHZXqjP~l~t0Zojq6DFY!=v_+AYXCqW z?~WtjMJ1j{m-A2dubMS8!K{^eeO1yU;|(thBtVyYmxyJqcpbN^tLKHnRl={rzp!6y znz;S$XZiDXh(w3onDKHN&RM-LeYOyb)Jad|d_q~|;qb07zoFmysO$~TKrZB5JoN=eO;S!;l`1Ni4Z#k%&$Zm8Lc?yKx>Tj6Oym{(=H5jE1OD`<(UzRKxEJ6@t?Xn=^75pYh~jHJL?d9KRS~_c%k&Z~mxnFS$oT5QOaPxk z+y0PElkHv|G0x-F(Hv01sQbZA`XKK^g_ergBQsgV z;Z4MJwi96*8hU5)3D@rN+rE8#SegOuJD-!sJFX)!m1Zs+ANu{~U;Bl0cG#rjos|Sr z^-<6pOMR~`2lKWD@rGNavK z$V{-qIt4POR?okBlCBA@E^v3SDAM!YRr;U1>j@!#$fF2rMxi>o3T%|W@^LGkPJ!>* zS?0lvHsig||2mJN8{kRy)Xp~8%N7TG=jwelifg&}*BEQ?mvN1NfpogN;pKriSped& z3tSfQ{+Ocfc&Vu+AF-!N`O5DD&ydDjK_*yScy3T!X~Xp;HzQ&|7WSvHQv_>{1MI;ilRpD$u@_ zKpmXCP_B0GKQB9n|Fb_uBWIuuwSE`tVvgXOlT$M^{GrDi)Frt`8^|iYecbtUw3Uxh z1^6aWn?iC6ol*wo7A{nyUHyV;aFUvy)y1QLp^^Fgn@x*F#I>t zSKU=#Rr9OjFaVS1(LwzA#8FW5N!pM4FTB||iCiRAt`~ z$GkroMs|KgM|hf`Qz4j5@K~U8+4s&?3oG6mXg(IyPV$7of7H!4fCggV`l@ZU{nCPV z=>q|F6+jk0Ih!NfD$_XqK0F(e@T)N*Z&NUOub+)ERwv^+yA#pLz%q2u9hS@&&h2$$ zF2vCb<6_^>&bsXWRt~pdQ^&nrXWrIIp$VZzz~dxLz5wq1F+c#TY{*A67VGm_lG!|z z(9^Z^Xp8o;gb1$lk3r3+3wx(AU+gbTxQzzDOmWF!pMiz59W*ffq3e2#gxD4+=J7P> zpikqY^AY$=6E`-2r!3GN(CuGCb`2ChcJGSw=SqVy^gb#T(z)DN@6+1rPJW|oz_eb! ze`^9ouTspk4xFKaeg?>?rrRb%fvD5qk*xx(KFS4~VT)}9-)c2!m93vhp`N3Nxis9E zq|+Z3s1dgN6N~~@?<+c==+6m42p_prxWMzJ`X%4dJxz<1UbbF(@66xo`cN+8P3si; zJ|MBZW!hGD2h^!Po{wxOm5#UWGf8s}dd?f8Gv+`iyJAGRrPXMo?aG^gd+Ra=E2;P4 zzHLH5O`{^%WU=;FPmlC~W(2FQZkR?QSProru1~!&%|0H3lwbR_>e$J2h|MI^Lph5j zMx!^9q9p6GL*6BlUOGH+(o^GGK`36|(1QJFP41$%0>71a_t!Tm=v^ylBxV_7)z!Gs ztAg8^^hVx~ItWxAEWmWK|7SXpP;B^=7^Efg+5UQT_!RcRP;2jxn1p!;?E7%?)!QF^ zMdvOC8?i%d7~VQ{4l(Ft<6{X}+s$~2(b8MTH>m@Tdw)vV+mD<1KbmzlNFZTOTOQ<$ zQQ<_$eVwo0Z|ZD@mg}YE3f{j8{?DGk2+MDzZWrgc>UqdcC(+hxaVCDab2XMUHH3|M zu4AO!-y_uE>_QjplB6R>vG(5g!imTjsQ}lj=gYYTz2f(qGSqyZr}lQUk<>rk>4|GM zOD@+UI1l``^DK>v&Ot6HSEn#+^zm@OQt;>GUxBW}1p#olnePJ;-NY6q4Q9BxDzxU= zU;cCPCDx?6(t>=}Q{dF^45r>3(UPq(9m?rkw@r9}yzMObZ9DCD@jp+)ZScXWRC-V{ zSpIIHqr;QvT)mS*abwM^|2e$6d?PsOEM{Ikp~cEze<`dKK**bQVJ3m_l&vM;pQ3Qk|GcDiehLN$j#)c4UwEConQJpE zT`U(9g_db5_|+Y6uJV8b@$f>ETlDfiGU1nr!5=HZ$JH=xAdM%!W*jt&Bx@$4B}(P! z@C$GOZGb#GAFlMC#Usuc%w)n42Z*dzjpXMDdCm9A>Rm~cqAt~JTqm$gQq$_dKp>=3 zEty?ytp_iE_c!6tR}#j0@ql~OIkP0qQr|e>CZ7Pr7`HNHf#SQu{EL7zl&DX+?D4li zBl%q3qIKt9hs8A?V!Z^8#b*e18PH{g*}=mw?636t3dz0(m}5&5q=%!z=ZX+Jcskra zf2fg7vL3^G66e6_roy_hAfof_vEyumtzOK5`)qa-flS&kA~biDX?cYA#_FI6mwl+9 zkv|8V9Gpb}1F-_d>ze?jEx6uaVuLm}VFt#lSFq$;^O4kMh6c%mS3j8f1Xgy2nyp3Q ze^(*l?&p$=%qjBQaDU@kXNXfI-6K+<+~Uk)G=X%2ic^N_vK2*8TEk5Y%Hl+*xP9h8 ziE=xqKzo*eJArWnAb2}lbQ%-P0+p8Z5aN2>ODMcfJkZISGXpXn#-#lQZ$epNY8JNU zaW&;(yzsVDQEhZk{|fO+$vw9BWy!TD`a`?|pZ`j_iE8^TEO>YQwn--}#5-w`LhdE> zrdz`@gG&Fj$2B{ckfwy%o55yp#WQ6$(&@|6JV!?WACUknXz-9MzjDqjN~w-xVVaXr zmm_JU&m*$pf-VPZemv>d?XXahV#?n^L8txwWIIuE;(w@|v|;}%&~o9Z`#_BJanpAfe9%EQBE-F#7M>yfU{MNBtL_O7|&2>8py zfITB{HU(bH_L@wsyko%M5j0x%RzD7hd4dQ2;6!=xuj_{aD4`^V1lwDo@yPM9`iqn~n{Hh)k5FZ`{_;K3KzU_7w?zl_ zRN*&6>y&lvrYoEBl+*b@TOa;dV)o8YQ%yeq9iG1z=Q#+}M<9tc?yQhaY2E=AzQM%u z9CpT&i2WwpU`(72VG;9i?~BvmiX(^2*En(c>LJ%6FKg{{Wyh=wSvQwOeV(O(Wjj}k z#%BlIDnpBcD+7@ka{Qlx2&UH2-N^A5+6lMP1!0GdXE?i~1nkdjZB8Kac-8W_MNR z_(KAxL@_ClmGvlB_MUY!NQkjd@dMG-_&srte1i!xS>wqxjR#X*YgK<}Qm&1(O+sKf*5N z>+y?+m`MrJ@7JR=Q4uJk3&hytbSOs?*&^fzP8u{}y_PkMc^xb)(@U+dbd4 z++yIneeQ;YzFK=-9PD;207iWQ6sC9BYHPgyts?&MD6OWAk0!LQ&c)%%(V zPz}b?_*FhJf(%R4=^zZa69hN7dKYG7tJEZ5lsJDiIFb>*XeMzEK8)E70rlQ*0a5<( zNT#@Osgk5Vf}PK0X18kH14NN=5oW><)z%IyfSGh`dg2Mln~no^UlrZ` zbq^i>a~v}gqPSDOqVkyPS-9SOv1m3QZ5mPa21%oAqpLzXAI2#{quF8!F&@r^=_v>x z=2=>Exv(e?|BZEaP;DFId>T^?$cY_1;VQ1C_Y*B=p4)%V%K`b8dYk)ae#}l=AkZH7 zoC9(>`)UnxNv5F3|Hvt3R#(HTred_9jpgzBaf7%=d&_u)c9eZoYg7=l`3F_-QHADx z&5Md2ZIn)y4{?&yYk)`^VY|4zXH+4L($7ZNWMF}$N`jo!+8o^#i16o!P)3Wk~sw|| zmA^AZ?2_~aod*1RRi=xuX*)3 <58$$zAdCx*e@FE!P{fuX2_5gMnjco13i+l+HM z+&bycFKZ>+K*n|tK>yDmT1jS~B!$xSjBF+Xj!%W(*+3uH)qon~B8yf<8#*ye`^PqX z$JXQw)Y27V&zz0O=zI8<6No!AvKgX!-*` zbiLNPk4>;+^)qAS8u0!uxGU#xIcMe`&A=a()ckdq89gWN;AzNhiz0YQBz8M3@Nhr? zySKY+*tTzG(RrJ7UOsvSrrcu>d%;?jyWrM-`l20YGdHBA4praiwd+AVa8XxZ_B&om z=CB6Ten65oL))V0*7RwcOd%40kuxK<3Z-TnIc_;5<) z<>5T1k4mZIlaj zH({l?C%%CDYBue`EQ1v9ylrXXOn(WlO3c9=e&ocCK^lKWQW@kL=v!lr9_s`GF^zN# zkL=#RA93Q9HW)o>g**hazbp$CmBR@HP9RL!i#varHv>8z@RRQ)jt(X?zT-1e1J!OJ zZylt_&=sAHsrXiZEwx@-`bGuuYU5$Ej_WBSx`G0owlM=@C&s^)XHmZMd%U%X806E) zyK5(w6hqu$EK*BS7A$BKxu=ZdzZ`=&b`R7ctBLB?rOK=G2T0X6nJdHyP*x28Z(Y-x~5sI(%F?$vA;7GJMA zpU+7G*CU9o))4yLfNv|?#>q)3x{+ZZoL8(`?qfSr$=}yze6We&OU_a6Zg#+vB=&Sa zby;g!ztex&OSI~m`x^Y|`gp}C=SIEaNFgF!?zNhS${*{{dWYe1OugZtr!9Khi*EkC z(*(Z311kxz9@yt3#bp)COOM{9%EFpDN_u#p+SDc#mu}-1o{BS!Gqbpj+kDsoCn{xM z6~$-|TD}jp^bvgFs(;!ys<9_~pyYLSBQ*zolfs$CZ~r1!E^hqfVoD_{8S*qNa}_Q_ zv(?AGSJgw2WTMbuXc@H^4+9Gb`$zo|6BvUea&z?Jvzym>^M6wRWjxwkNlbDNF^q$cTjk`veNsh=i^K~4pF1ib_Gd$ zlMc%i{VKrcNIw}Yf7+23&zH+PN_n`)# z{XI`J7t8g}I>LgduHc?e-caU~V^#l^s7%spN{k&<8OI&BAIul8mqf2G;{-j|9b43A zXC}U)62#9v-D&$NqxlqtWh9ioANZZ5k27UDJFeR58%b|Alg8t;$ylt?HD3hh+VE}u zzVm+vWsK2!Ps{zOO7`K?o&30mjdiqqzYAp_OM5>}R?l<;kT2Mh;(fylD5*-N)PJk&&B@-#XCgOC=J>1z0O{$dmhz^?;_pWJ z!JNcn_l1-u#d(yM!x9L3-h9YOLQor_>Y~w$pICe7*}M-nOLb|=&|dG26w-w{6D-oY zlp`{+I&o{LoX8A=S?;6g<+O%6ATHpy`}-@D%XXfJkf2$+`XeSPV4m;_14W2LW(Aay zvF}TW)xFGtJgyRY;F{fMhacaJEJH_^do@(VwtCpAm>+;}>`R+-9L{v>1krE>=Fh|0 z+!KKlm+1tj`^_RFIV4RFfuh>?`)2C z?|3jet@#^rY^##AB;Zkxcu3h?XzyRTjJh0^14Hi z*xpl8jzT9UiZ?OSt^dT#s#}km3Sh+BnEeh2MZCu4){pQ)K+rT0VNjtoa{_pFrR>^v z%^3{}6@RP?_yTo@@qoF5{*NeBTyomlxt-8pQSoeZ#N-d-)jqUBqshI^`+A9=alTjp zkYo$Sr&O{&3&mv!`p2@wvXr*3$QSD6cvC0WCKH?OJ1+SkLBX9e-(LT$f9f0)OQ65( zEt;lQp*(V;_kKL_ZkySzzDXtSi0s`qVi>8kig`6WWW8!#*+y$W>M3!xzoI^|+XUY? zaDtOJg}5ZMaHDSj==C@B!y6ZA!x^IC+u3FXt!AnNA100;#856NF|4QSyz8Z?8*W2i zUdv1&vzoR?T}MGn*+KoKwt_k<%DZ-dHkJK|d`h4Doc)Z<>loT|JGW2eQ;SN%n}bPM zm6&Jx;Pb7~q=B)hnY1VMjwA>mdMAx$ zY#uF(P$#x2+DqHGdYp%}NWA#j@lCW^(dzc(p^>H|&9HsTt@$K(mjCK228L2z-#5vs z)QuVU%v<9Pt3v zvmaQn?<|iV%^K|l%vG{MFpK`(dKD*!TsXLkm6l7Dh?a{ycn`GAJ329;;&{*8Y(C(q z0@5B6SVxDf!fmeUy$uv~MR8Z}o&)R3_$7W~w*wla`E4C1c1DHvSw7f`$Ixya0YTDj zSM%1f%=T|0@YBy?HY(QD?k%1SBZk|Uoe2e3*Q|;%UeTvJ1fDb83sl*~?#fX#Vawv2 z#7SnE4J8Wh;EIG&q4DfSW?ikhPX@PmcpiuXs&cn7AY)zspWKt+jE?!>=1^4h9V>vF z*g^=|xG_$WG=fb~&f<{1xODyb1qJhOnV~_R#Qwbh8)5}YXrR)nz8={2rjH_x@A+c8 z4FiLdSoPlX?Q+rw+BHSZ)?Uu(o??;Gt}5~!taFdQa^m23*|DGS8o|0Kpyb;LG8jD2 z5qn5Z5^*904SHWxdx9NP!kXb?c7eVnP%GEUr-;M{pcoY>K+ABB4F_`k2NRgLondRi zl%{7J180D4uC*D|mlFI-_XH*RMb@yzLE6}jg|#vILph~W+SZNvzKQTa?_@8@tZajF zZ;di|khl*fKo~+a>>+{h87lK`JA#y7sI!WY`9spB)+D8j02?3u!^B`OXJtKS(QfxC zwVF)pB?P$;J$sboG{$5MRzL;x%}w4IjnRYJYBM`^rsYkXKlM+0M8%)}q{mOx z_%t0qnDC?j$fwn;?~1(Us8+3R=0ncB@ndF3iO$G}SI>3V z0xoN^5|k<`u=-(5WOh>WtC^osyL4W4W;Q?Fm*qaIp&ykgZInqOC2e6{!~44KdoJ7L zXKA7%)a{JVd}O1c&-HYmK1@{NU6xWB-yl1TX8G@Lv?=S+%Szo#Qy)JD=7I~PAMKt~ zL|G`lI;(KnZ=q@^sHJI}G=vV5t_78|esNu>w7h7l#l4(>2Rq-{}#NaQAuI*wN%Z0FTk)CRkg6&qejcb(NO_)3+nflMz zbl}f;;MGyGw*s!&T%`c}1`dhqQ3i>GIjf!MpC-19Ar>XVmUqEGDF20sLG;W(=p>e) zbq&4x#$r$dWi}cv`k0UB1x|d~ zgN~mHU}DGYp_0ygC;h(?St_BIY7W#Vk|-RzsoZVlh>OeQJiajoH}=K-ZD*q;Xx94~ z&>NwV&QE?-e?+pJtGEFiQ8X>NLtlnkjG$^rJOmG*qEvs1o<-QVv6T86(F87Q#M|Mf zq%baBg=!-_P`{Fvc1E@!b|d&-V7`aI{M`ChlH>MxvS{mctS%xS(XcqG#IsJR0B}1S zzsyy9?vEH1x?S1Yh?UH8uY~^L&B@Pt-}j8PKzAe!tq+3=g*eR9dSeD) zIad|OM}VGebE?U7fBhM$2AwS(7BQ0gZal@;rYq{`hiO41D&-qx?m)LUjC{;*Y5|RG z$0M1D%ai!Dkv|C@GT}oHG7B#$rfAC6QZXPq<8gz6nj)#fXu{Ht2N@U46JW~;k?sSu z5193@Boq7Oh7+6;)SeW`gOh=!N-9F#0w`QQf*_A7s z%{6+cPBR!tM;(i4pybR&JBCOZEvN`>rQtGtr>Wl4?WZBvo6Htkp1%}hZbb;l!g(P9 z-tUbw26Uo4XMnA-bG>Lm>_q;i`=e?0Oo;^Lx-8dAv*X2yfIW#*+38=k*;6}qSoGHw zfS2*mxLhN_oZr$@@q8T;c;@nb=y4MaUi-Hu?G+vl@cX!+mFexe;?>0nFcnL)KbRf% zx@w^*+DcPZm$r9!+7C=rQQ*sk67_ws`f&!T0(eTJaJXFKK}+q!1wTZ~oLPwdFs}us zffyyJ2eim1LgGnCr)kdPNxNRf;T&1db@{{dIi5u})<^dt@e3`86R-b9=0lcUwEq2d zW4q7;__aZzWmDk)v9bIw#heHBP|_jqa50oFK={n-=IDj)pbf{e$GR zZ1$eYojGb|puc=WCpP}8anaWHhHo&9>uEepK691_vp!|x zx?^aMD^(#NN>nRd_)+7#${)Oki9NtXJBkf~12GsBC!#M|M?iab9&W`iJiWA*>DP}y zr2@zQh?IAufxac=%cq|$EFaI_<=Q`oMAdwRnz(K$*XtyE^VWasgE}x|A-qDrSuE4x z_<9(?6>KKnwMvYuWYzcw#zyAq^BY0T*A`sf`ghoZW`8(6cjmQrif}E4O|A93+_fV_rxm5{*x&)YIMc9CSPr#YTz*8jDP}u2 zd@PW*152hElsYs+r9}>+>Ct%G)GBYNK5?&Cy%(Y}X5V|yJw`KXn(<--;UF1@cTTI= zPKyx=7rJy}=D^9I?9w{=jq7H)cb8@_8AIj8;kI0FP4oP4oxuJu1nX;^xNG?BGj)4U`>#pSa=|RqZoDTKk z?B+5X>%EoT2aD+SYc4p%R6Z*n(R8*s7?jGHMZyW(UwqU+o8CWt1w)j8Sq0mbdghXD=*>t32mqAW#{i@Rv1=)iFqzqp8$?);_n$k|9F>Oxn* zGMn4%+t}Q`ps?p(IyfWTfJ$b*A3~6whi<)7(aQYcL=!Hr;|1q%2F?9|KXIe`VkeQ- zpwQZXvk|&P44}|pO9XoW{AQsr*w={PV@hK9eY(Qb=>b8eXEr6_01d9B;s!F{f(R(y z#_w4=o^<)wr{=%CLcmbZ?Z0bdL=b%?pp!{B?=aE?UA-O_GUY^ByIn4}k>PUXAk}hy zl}K>BayiZ&jsMeFW8=N7rsx6BaDvFeX=GMLz}M`G&>G(ViJ-S39VtPW!@b?*E*UCg zIS4X0i5D}u7-FVZiNEy~{zb-VI95Rt4q74EsCIi#}R3zqV!uOtCtgbBEulABK<(lQ`b5n!Wn!sj{B;0Ub z>}R#jz)zS$m@$Q^!Jtc8Epj{pbQ@OU%Q)K3mjTAKt0z#iRso5T+r1eoZ#`|dk?(!c zibL-4iYKl-cdd~AG$-kqGE|%LXft_XVB9^VbpYuNJyWAXZJRUo?3ig^+=!`+w zB(y$Wh6U4@+PWxP@Gwu&2(v?d^4=F&V57@^v+@r^Cj2+AS|YVyUS5w4i9$pgC4^3bfl1*;_`3&tjtBY zEBO5e#Fi@)-+=-s?_azuxcjC_P$7I4AN2m3Eg^TL8I=XHjhSBRWG(f>SL*B!I@&|3 zOsc3KYIJ&-Z?s9$b8!jGGG^;)_-7##Xf7rwms?n#53G~cmF>xedN|Ll?g8F)nd8T| z|Ino!=vg=KaVd?~R1;qf-nQk?qk86>32ds{I~|d~T-7X_D?B%BA|R#@{x4tV* zP`S%#DZc|`X$If!Ck*}P-t&IP0K1<;z8mY=SuGaVXI6Y%bfP6qrWzGnDi+pfhVyFF9#34@FA5p-?lEqb}l3}^QJ^{$)lRKCx+8$x3h~+UK zp9w3wB>aloj@iY%{Pj{knel*B>Za%#!$NSN44@?2q1{=0)vwFCO7uUyu>`?xbP3_( zLA~+ptk4;5d$X&sl0rM`_x$OF<3vZNxN2WYDSZe?G}+7TwfV`yN^JDK-fmS_EV=*0 z?pv9zUl}Daoz$EqW-ete`EZ>%8fb_gLJRXA`e#jv9pHy?0fbTu@Lol7OgQ`^xO*EW zD?%!}p~8LjCYy5R2(X1%Ws@KomQ7*seQCr zSq^1%mIboTX9*X2n9kvY^3y@%h9rE4@hxH@Blw6^m~SneZWwACV5CA64Dq_z9m7xN9zp|Rbr ziQR05;P68&D&}$BNA6YLmtCF)K=4X1gtL4n^%*6c4PwqA0NHf~$dhjz$>C;0P9@QI zQ(2=yx>%m+VZ(s=OLto)vLt~y7&1uWV#;{zMtGLg*BqK7nYpVkHClr&mDy1i(F_^N zn2ap2@%G_-r_B=1c1I#iOhUKpxR>6I28e1fVw*MPAfxKJ-#Z#Yqf<(+UjNlK`uBeC z3d2y5|G{1a69MxeLJNqlP)2^3*|nYH0C-$Pz%KZKGZ0XcCW6EB2y_GBbN)hcFAGR# z8mYMEZP-4$o4TEVQ~k9ZTVAJsp%T)ngqtBG};Ai_nggw^cZbT`N0)*+HeJLdnW=a~Wt8 z{=bLyFB~|O&{(x4b3!6-04<|w!G(!ZJPAY zdC!oX!oU?^TmSPDB>`zFbhjQhCg4W-Dg9HbA_wwYr@+7_;V*uo62EEd!Z5+s)@&tE-!X!j83FJi*SNeNFmoVAQyashCBZv0>q0QFlKQKUM~DHC#6Y!1JA*wyqRnMKZs~tY zd6W1~vmd@eN#wzJ-G@~bTlBpxL>9QJEYYqe(q)gv-7|1s$2+b|q6IIjN-FOIM7Wb3 zcae`RBX6!uMBI+F;TmvSo44)}UIB6?9T1@E=7wkb3lq5fqYZXBNg=y~Do+Cl*2qRF z2Vxmg@1+U!kOxzQi_zSw%?<}}e<>mV`Y_pi>31^$Bl?s4V2esYnLIu)V4GA-beKMV zJRsW(n*Mj3B)XJXmn3Fi7@vZgs%jhvHV!4=8@X5j=BJuIvy@6-A-&#=C7o2Mw#(j= z)@6ZEy4BZ*+-BT#Via-~M+4$e@^23s2p%LgV7vlwGQk&Clne)5cqQ(ShOYxmG*aJi;&(*KVk^xj zN?+6xB*ozdJDG7l{qLlS(fgg49R$%DOW_ej*1uO=ewYGVoVcYu) zlG!%gnVQSabe7_`R9~`;)N_(`G&&3~qQ){ByZVEXu^CBgJMQ}0wZe}u_K_X&V_a>a;pLWka1Y0mc|aAKYAoM^4yCm!K= zB@jE}8S>aZN$EtO2gD3LCWs_)iP$IV5#S~i&m+sP%HQuqX38s5Dpb8snjH$Z9MPwO zVUW}0*rCT7Z+DEbbokv4r&sifu)G8Kt4&2_7JyrW<*-`nNwRWzlWIY)DG(sp;aoeyVoDsXwQ|;%`C3aFZ^)w2~s@ zP%f9VI5sZrF&WU5nWEHW*!MCMETgm>z(=gZXppycOlC0`e7HBBv4{c4B5#gY?xf&N zkRYl{F|JQ}5KkQrkt%;C`pXrxyO8Kr@5pY*M4%E9H|^MLBe)5HwzwCc9owN-%LfMc zWVQB60q$(@L>gR~@YWj`f|$K)Fapnu%3g-@g4SLe(3sr5?+QFG)mpT+^V#Q#2NN<6 zsg1ioxDh{bC|ZuIuyzxBZ!xs&Pmx=g+OQSx_zWqkV-jog>*+YlQ3@34fZnLP$)*S9hRh@YP{HfmSP`%o~ z*^bkHm8*Zw3Hip`?rQtMnW9no#R~Tj+!Eys!jr*2Ple}Q^QBq?x)18k=ke?roOWfe zYj`+ohiFvm*+wP)qTEF69$)KhmR5w|SQ&GPUZSp^B3OnKR740e-JXHJH)W;SNmf&- z%fyilOMG&uE)bH@y}HI~RwP*%Op+Mr6{5ZKXx|yl>pi=h(#-4E%Ke&5?C$3JRwVy=mqnkFXWdEf zL(cy7#)o_-`~L6#pZl@!3r2;Nf-e48+ZU!q;XNae?9;e@vCcHCbav6H^ix42zDV_{ zV7tenJ74n_TP9J-qNKVCx9FZ}w_r(Qch&Wv$6u6=${58_syH+5?08s}I}EGs8)41V zF-5q&Y0=X)a1MTH74e2txP~1lziz90g1XR`ni;OZA?4G|hOC4A?zYT2ri1g=3w+dR zk6COKNgn!38v4tHOGOW+y-JwRYo?9^kt{`*L^2{GXgi36P%#4)jdoH>ls1gcd?}Cxe>Q{(%SOu-54Q#&vqE` zKP%@3Mi>E<(kGfFCrfs|okYU9p!m>&oL@gLyD|BYD*BeRfC5TLqiHxs;lKzRP5bev zi)5KAHR)5u>tg-M;$t+gEUP#N>}zo+{d${K0sK48%OZGe;VeGAj%Sm4eOEEDd#&Ej zYA0p0MT&9yC*vg5Xb*Hj#!(H;GFVoe5}2+J^>_q3NuI+OPtFvDs+;8UD84$`cx>W+ z27VQyk>zSdHL*g8q`RxPS86@0)SiEaX|^-f_Gmqs5`%#NdGuGtE|VY`L`PV4M1AYjPX0C>m+V?%D#<47sCQgtax5P z&IWgyEo5YiMv}PRB(28XFgEd<^uQi0R_0KG)Go}q`!DTPG0~pLLhIB>UE|;3`W481 z+ocCd*ZlbP02nXKqaY@ztr{SmNy@+Uc4JKH&5Xp^iQ)6sdIMSa9f#ZHjzQ z(;kFBgcYfN%e*0BHm0N9_!!lGP>a_8WQ>|cO7=mAKG?1Q+xr^3RemayBbTPHP#(^1 zS4TEi)xmyA*V%i-`BgNJRW#dLx-PU33!9Fa5^@!(iHRTs8~-c=4&6&AMp`$V*%C7n zn`$(00^2$yQ21sjdc3}inBRC(hm>O$0l{ZOn2h1N@*zzQ$Lz$ff}qcCO#oudB7p3z zEruh`)&L1SL^w!oKaU&nsoEgvU&FnVH&YZFJRukus}*7A9`L&{epM0XOZO))eFLo5$6 zw!*Rfqq4qe=8nQ*-xx1!Z^C_R=t&};{79X{0R2F+UW=kMluiUks{s?HS$ zgTt_9?Lf(A?}}$zBD1vsxF5-$_>u_&Q9YLc8Bpxx6flLy+m#wl#hgT9`Lb8TP!X3T zKT-@*2Ml`cJE`91XVY)t6c{#G6V~pSX}z^|2_JY9Sz*1Z`)7eqM3?h(+x6mlVYgI@ zm&EN|c9kHuwaN1J|A4v>li8uBTQ- zp&)_pg{$c_W=hQO58l#1J5QAWU zJSB&-uZ|qJ6QNR4LY{cr{0imi{<|+%y9w>b%cGf*jPX02<*>8qQz7gq+K`uoo?b$n2^thsSl{`s`mw?J65iKtT3^U~YSxLLQNr zItP57L`g+FcWUBRa?!2we;>`?j_#j6`|pKmN)}6{V7C*!kF|m&M~k;*kF>K6tFTSm zglVyk6|e<665$CO9Vn{yn{4nmi&jG&r~R5D{U=%y>PWhXMIrMqhr3GA=sxMudrbP| z&bVNT`qAAuJFUcc!R8v-=aGA&VI{v0==9M7h-BI@Xt8T`@pP#*x2o|d*>?$J&*fj^ zeMnODA3Vx|lyzN0@b#yZ=K6WSR8O%R7`B`!fB$urr&fFt^|B7xkd9(@qm-4+2O&|z zFfPe!mFw;xx&V^;SiqGeXxJJ3G*VF}$BFYp{1KvG_nQbT*}(`3%rCK4%h9&%4qc>a zl`#gR`H~$b5}~qyGL_51(Ng5bu+=SV#rYKMB(uouw3DPIuRI=t3^g$1iGEwr@`qlh z(XJh|$k_eIgI|#EJ|Ddw?PR$qtw3T46A7DkqatQ=I68!m={+Wej;};_F(`%0b$2XH zt=-F229X=jMiidAhC)(d0R@@jo*d!eeGRp9*(}W^D@gSU62rBN=d48f!G5aD1f6>L zj{xATGA;>@=CocI8x{`;>XYXprxngXJCZQ-L(Kkb+ulG%-e@%|&D^|=35Dk3w@`)* z3JpdXeICkO@~`SMjI0ixlpaEfH`XZ;rI!j39~JWNcwe=#HmC*S6L2H*QmYPMhe!+z zl@F;}o)0@nqT=~LOW3H|OxQp^cwXU|!U(z8h&Y5!A;BjT;U8&-uIQ=-Hr{7L^s-;Q)U#kewA;eXJ7EVW3detuir4 zjP8AYk21~)&Gx~wLc}^V1K3-Lo`H|+7YW5ut!3Q4^hP{G0@;Ke;4A3vY-bp;Rusx= zWq>}p$HJ5a$?MAMSeJEj&24fiEw&ry0w9R`is2ZMjY)XOD8pTG$coI)C$j)^DKKJI zN|R;?v*me4C=_{)Dwvu>p`N@RF06xYGFi-e?#`{uosmW!JN`~FUr1v`{Ggg2{%ESb z_8@cVtxo$9$mB%(4FWEan!;(^2ExxrTl;1rH~&?{zRHmPtzw~FU!MpVdhKg>nTn(? z3(~&8Oa-_4$%wN~;Qe4dvcxBPIi5TFP0f5`Pd;6!lo4ZB@V0wea?SF7pAC?9aLY># zMKwsYF_D=DjA|3w20ABKYK$vMtYu#BSoEl)Z82G4coeXO;i}w#Wjjee2<;xHGGDcJ z#?;;4Ub8KJR>DjvTRe+*XwZ`#h9N#7QQ#bJwp%?-wjwDxUGJw6MfSk*ZH+z)CoCk2WI+8@}*m1UlFuhy~o z(=TN!o-()xGY2BvF2}zsQ-lBlp(=F$k_)H9BT#?SRQU!h?D-?DX zNc)6rT=nj92}1us=WWvBP@|?9`J`pYJYT9g8<_=1_+#wtm#ME~C3+1uGoWsH>IBMr zyQP?mgOtlcNSe;}bOwXm03%+JZ~s%gfJL!J!!SCdJgjhhsyL#ix4;caa;wY>tw4=d zyh)7k&(m#`bUybciW_ml_T4|Fg&IH9dbTPNjTvWT?1b*lDAHH!i(djdQnu&2h__pS z@Ke&`L>p-RgCv%6@bZuTzUaLm7ma zbjRj(W%(E51C8XTIoB5YUVTbO&Dw?=>UwI~iiHouCbS?@`wSrSL{J!td|3SxgM^ft zVF_hyXw7kD1t3IWty&QTxe|1GGFbJS)AiGrUJS}!ai>C2-Jmz}$ZH#kG%Cav$mCRy zYNWTn>O)lJx>6O(?EyUxg?)g4*NOnBw!hfGTL8wp2JJ;6AJoax`7{&P+WGq8O;yMf zkHe(P>%_(%%e@qM8-@{i?$32OikJaG)%4-UyAg{9AGewJP9SQ=`i~1Y?5iJSbUY(` z(9^I{@50G|a2QOekT6=>5@$%OaS`>d9Tkic2C3D!9%`bhQ0Pd^8Iq1q6`1B?oR35uYwqR>?_(k8(xU8`Rw-e=9tD! zem&pKR*R~(MCfA}hhwKYuf%3ga28>XkUmRQDlqD{T`%a>c$|NhIoYXwOMpEQ#svJx z_Bqv!69%>K2AAvK_p{BvTqz(EO$ zw`u(Tzw!izXVGvx1*xOErJ?K$=TGbc7pi9}O1%sDHRQS@vdcIOqO* z5xs8pA?Z1!C z(<1s=kTC|ubFtLXv6GBDojN_Y2^&*Zf-BS(mDvC}mGq-ATh+ejz_d<)-;=MEr1YFN z{}_fvsYZQ>UdNwrqoDm*u^*CXgo30Dnia`oY3%Y=QMNiZi!4>_Cd7To@)<<00D3VoTF1AC*FosT1`bWJ_gOp|b6Z#G*8o z76ncWhin9BrO?sT^X6dC^oujt?cf%EiK`<;$23^8#pReeXWyMDF<C(^-hu5_^$B+su{Ku%%vsxxx4nrhGBFzCtMp(0R=nwHYhZD%cud6gMJS z!$Y$rG!fo7?bCK`ynxYUCWZv+=orZ2&Bm|O1#Jf)#f5y$eM~Q03UMNKlk;AmgLrQe z_uD%EJ$9BWzRGUvx6l3GcBka-ma~BvHcl zamQ+$G9ncaXI=6qgJTG{9X*k7dQMoE`*Fz}CqwKj3uEZL?TwY?3@I? zWe;xco~##?!11zs>y!s`-IBQxo-4Pnz**9RV7$t8xp3JfCCnT|l(QY_A z9JNcRE#9Ul#z`Z+;9eWCbJXmU4>ne3F2&v~#0KoIzsm}@#X z644}Hl=XucqB*giMp$bs6&YI%8r*U$#dJXcn-}a!Rj84)^lFV+#ApM*-3sp>MUW$f z=x5|kQWI%}{HVKd7D!-UILQz$5Or)CXc|1jsMK)VBS&Hz zw#g66$yPXJse3351HPF{Cln_i!%8+r@o%(yQN4{bz6m#U1+zI}Td#^50sTz>c6Sew zw=qwXh_Gs0FR{1kI|-jWCn4U9thYU!K{mQK+rp$cMk&J zGtJ+GBu+jCJbLE~6UVgErqyi6jxC8xTGXEE{crE~Kl6k#5v(BD9W7aEX*y0f50jdM z?gh;uYIsT_UPPj$u&6R2+iy>qksww6^LBU2n1O%*GXQLNGAW?$1~^_)RYXtwte(%~ zaX&zv1~sIg;MY}zpY>Z@I~C+Y^LP!2!QvZ*Xf(3jPY)VxiQ&36dL)rT!41WTLeAG81dx`8W|5Ric<~Nv$7oOeR4Ran|TH|C9Ys`6Bu6?oR8- zPP@Pe3YS^(3h^5vew!x6jGO9+r5K;fPd^OHzXUNqCYNIKg?7#KUs8HE%nggIWt??f z9p4(KR*ha03Z9vyaRQ0f9i2vy8}F@wk>(_Z;6WE|n-xZ$r&cA&ki8Tm4`pT>Z4vQ` z!PVQ5Z8~*|-Ds|`+cgdXAd)oJx}v2gqyjGV57!~DcnwowFhdQNNa}VK)qr889Z?#O zUmoF1iLTq}f)u}dXE zf#P;Dvm-Ji<;aGo`}yMaz$29L$*bCz4E@*6I*&d}`^iDYxp9_i8mH746 zdw@LB0Pr7gt?s#Tvja_yGdbXw&elC^%9 z!J}k@%@QdYJj-6+nH8?+lqgKtq!o5I!zKFQ)XT-g6L6lnQ+^CL55?LEGe@O4h^wrrE9D+6iztIE<9R(W5 zAN5}^z2?clH5yN#OcH6v{onn#gDa~#%Uak{R3vY0*@Hp&c zfQ5FHRM_{7!Mi~KD^v4?&xa9D_CeH*vxh9msHRw)yO3M~JqD!}VAiG}d-hct@=#>v zBUIx)>QfBGWs;~Bjyr-0lf9g_>Gnw#{I@MB^|!yHL()RS+RVrC-BldMNxz6!!CJ_+ zVLzZN26}G@2)_WzhQCPG5A~90f`Ay`=@$8j`z%4;*7I8 zfqfnT;cgLN6nY{wqg@-|AX1~;kK>zW;2=79rBf1s=lx5kEQWY7@|NVo*My~AMsot8 z36mi26Ot!`W~U+w*aC@761TT$uiiOAcIvOc{{!{^Pb>vkllr~$_v0sn)W9G>k+A)y z1OJ*I*+>*-Dlc<6_Oj?(F;apzul0~itCkJl7vcKN=XukNyC0&^`*6LTl#-1Wa}X=m zpd0lI!x`l_6>Ou;w0<_BFsHa*{lLDSp|5KnVIUGd$%}DMQ`kM=fH0(2Fc7piLk?SM z&}gV@=w_BGh^IL!0MBfUz>%#GuIy)&@OXL2PSFv z&GR|q%04dJp<%hljpwTRq3f4CSZ$7A}X|`xqHcio(X-tph2XP;7Gb;CF&~^4E^hl5G+Q%iMmx)i;6s8~`x~Q1T zcEyrtqC$tffFCvkBQJ!=TA!2LZ<=I^!B39#1d3zNUqXQ76EoywDI=CR0{btxnNH8z zeW2ehpMK6G88zPUG|tngt`X%4Ja4INN1q5$?Ed_telpN0ME}`tMkb-iqH=As8R+3G z#T~ex?JRy6qJ3`ZzBn(*kz2GA5?9}3iIt?+q&*FLWc>P$3Wv;N7dNNrsEb;{Tj4R9 z4lhj|rI_A#`L+$N9RoU6QOfg$7O__4$9n_0dJ&usbB%G_NKU zq0tb}brQM3X~L|YOZb`4PQ(I#egE>$(Fh4smHC~YpPkp|M+yHIGjQwIfrtH<-uD<$uW-Z zBgp~FP^yGexk}a8UZdQyx~iAXZCmW-n*>k^hh~ZtiaOw_S!|cohHEV3;og0vqy(uF zJH}rfudD1%8j%9gs+knU;)S;JE%ky=XT8ja`_7X>G?i@^-~K{QX869F!MOU=YKWNt z!k_#mJZ^^+$oFQ~XN?U&J^bqzGU}q+=?gwsYxhnC4f9Pa%%OFIy>IE_RKtDnW1Vl* z1GrI`VzuQ>L!be1k!?Z;X3kJEA=Zmopg>FFQ#n$M^{YF^{_ODNvN^FrlQLjd(IhWa0i=DA;v(=;bR4tn3XdJ99^TxEtU( zQllK+1~Rk#S?EYC5G{1mjU%Z*zlcWwfpbT?Ni!7$%xh&-8*TbB^ZqiV{f=tUC>?LM zGtm>O@#=zw=zVL@gw>9SO`|!;d+{&FK1SAkt2CZyGnpioeyS6i%Cl!5TE=f^j;U7R z!lTIgoiqFyA)gjX9ds;M2N6x?dKhDC_$IgqbqdgzfhgvVec4T8+1OMLGE+G4(>DIJ zavFR8u)f=p*$Mv#>irEY_M>3p7d6XwS`%is$i5pCa6JPXL=!$mU54m^(L;XY%ihFtjTltdayD#}a_bl|S6;=PrKMBNef;Zfh^+ib}2N z^Qy#~-Oqb3Ta#F+5oISCo`Np;lEtQWy)uH)5(~^HgY?L|6W@l^2$cpR0XV7byd3!t zr>vAO*W)zaxA5RkKXrSPDSiQ?>1d5Tj?|dz>5@XRVirzz1a-y9a--HytA=ImolBKs zh3foZisWBwJ{e-*5W=? zBX*As>NS;AG}Y9X%|Se?B)3L>A&~e|-mWat(BhorfERQwhU;Q_ObHF$z?@3&NiO|UIb{lx)avr^; zAn-ZJUw9G?yxmB$RNkiGvS?}uKYM)3ps`a3r-r5A0RN$E96)wr;9TmRFSkdWJSck< z!2;QztXIY)2;_U8F4bzK8@XkHhQ6(o7Cb zzAxsn0!s#o>raGy587bqOs7%k)T8E7*o$k`O^TaMY|`70v~e=TQ^F(mNx5v!)$tDu z+ED3Srd}Piiz~??CNmq=a!AuA8xgB&t1^+Wnyv{p_a86n467)lY=o6lV?UQ29}sYw zv!t4HO}5rEV(?7D_5z9p|BSxG{K0x>C&i_h&abyIHlKeHP;trI7Y(aCzV$q^ozVA>^7R!*(1Kw)A z_cz9*K^g~EfgTnD7#p_;Z;v7~H+5l3k&94=6Fi3=tfLRKWDf z>~>-6JV>7fX}^7oLr&3f{16wM8u=5Q7T2=M6;O&s2YC)trN~a_OBqm``-Ey+EvZ>8 z;R;~i^a___aFS_?gZh~-K^6656(9{RaaLG6pN)>ZjmCp21h;c8Z3u40Q8(Ta7K&~5 z!jl-94nuM8v!qls4xa9}%}65p7gcQwlHS0_g@tjgg?-Z)*8y}bTeN`<+dGOuLemF* zk_s%mbh3`Uv9wu}!HxzJCrw_FLMI!=4DRuJXJGTu;tk7p^qay-!zYh@uw6RmGT+|! zE5=IzgH!i6p$QqWY_3@;1d?j2TPR^x6{+Op=h#|jR+(;*iBWK`g$&?YmKn1VQwdx% zZ73wd`V%u>xleIzHqZ_M?-yNq;VNY+EUqBqo4~OM;hM-FkPYE3k*Y>-Xc3()M&r+i zAjDngF^4U~Y#>vzc9_p8=c6`<1!24N*!}-cuk|CaOC$}QQ0eTQ;rv!yz$*dB_$gd! zBva7!=heQa{ggp?XyOMA?FNvEih-Mikw&Sx0K+<4u>Udo1)3~wuJ)@AJ&``Ey zLu zBR_p!T^^+18Ehf0Y`Y0T0Uw>8;edWn>$z5OTKT4xEz*$b>afa$A9^Y`<{iyYQrCZ3 z1UkvS3l@Hfvs@@N1?%?pfSu4Ck&#d370AlL!>#A&0W9rx!6*dmtYv7s%U1%acpM!l zgFhuqR#jJ;R5!)d9-nmcxC}P@QaIZ+PzmQwkrJ`TW<}A6AO%L7R3Wwx(f@DT`*#35 zo0KFa3#85){pnMNGSd0+1tLZTLsKFJ4^J-B$S;pB`0EktuIR&g(1wVAp4K%Y;jc=+ zeVUULFMt-Zu=-Q&$7eVh{0M3Io-Gyv?w;&fsy`T1CFM!Ovm!}?OB^E!M5VIHWq4HT z1F%1|=yJBll5@qlWFzU4@4ikkcFH7PC40!$9#8zDR2dW89&(iWhV7chA>e*O3E=S& z`ab9VGi^HRGCxbdzko(E<%XT)YK&ie@K_zH=1R2|G5nF=(;u<-3SUi0r1N=nP?U%V z1a7-6+cq{w)^S-cxWEbtpn-UyDY11CUD3};N5%Fz1BjMF$f$uJ&n0%y5V{O2H$k`ep6z9c8a1!cWWlr*rt^x59u2sywO?Di*bRe z1qQ=WNVRDeSoZhd^p?G^q*kz+wZjnEJV-aSu;`yG4vXka?~`V=e95=&b`#M#f6dx% zX`cV;_5-M4&DTg^L(bEB@TT&NXOX}|S#H$UO7MzfG`m%%w`C?>Yj+Wh7Usl62@)}1 z_VT2%`L^lA#92-CO8TP@IHVCo_+m^#QSUx#^Wa_LvNIk6URO7ueG2mVrcK$$XphqbZ3xq%UwHas3m=4yHqJm!tyomj><5gVka zqrcwyA5iYU*XRGVLk@q)0PDia^WgpHB>C4|S^WlerVaCxhf>*(l;? zGg8jiGKYNRE6*H38``j2h+BhrWrnrV!0eXDiWl+Z#du-ksAPVwBQHI_Ws|gfkwGyy z)YUD~b2!~MZ;BBx80n?{C0k3VOo0Z#7fx;I2Pp8>sXv-# zCH>6Ab!J)sm3gxy>tC%DkLztu5WA1^tostQZx0-FzYi8T-6uETf2QeKxFGRMWT*@$ zpM|H)_WjLvyQ*d^FZ^=Faqz2L#^dMpyw+&0KN^5-Nk{wY5##X=jlZ91I!H0{!K{8+ z3%V_GUw^@H1`3Li-!;|ks-CLjuPc>8y_oE7x(+*MDq`_7A_D=rz39f!t!bQ;0a4mK zv|Vv{riBQU%|q1;Mu4WCa;jA}mFi;$_A;Y|&-`Av(bro&yhL{TRQf+_i~~JIEM37x zQX+~XU#4ulF!<-N<8YU3D6e6j)_`A4;fD*Lx`Y>Kz3!2VI&5^U5yk6poLxY2Kc~yp zyBw&QT$rJ!0puWho!h-cQkb|v&{C^?XIox3dkx%eYjtKKQ}!~(ggnj!+55W6HgTpr z(ZI)8Rkc$LD!9`ncUAakJ*rDT7Sc(<=eo>_H2PuXrDkV~6o1?q%US9u1MYF8_ag%N zq@6fMop$Z!&+Ut5ti=3RAID=o8@5MN*6%q1AW4Rra+DU&maHTm??a7V8MHs@1!<7D zZp}(G0nfz2%j5d1TcKnC73eld18MqPJ6Kdctga+zqa8g!V(E(n6iz>blt^I1DCg8- z;YKtpiWGbz^2c*Cx1NOho;NgJC9P9}(uK41pcN90(943oHQ)9IVOz(5XL?*ro z*^u?NnFN*{i{*IEx}z4|fjXvZP~q#aeR_o-nAa}r#^-}2C;z!5SX~>&_DCR zEKWG|qAkP~u%l)N`c8>FP)87Z#BMI`#tkAYpdje7!on~d^%7eA{8$ijnMT5P=Goi) zH+pi7#~j8RQd}bvENrp)LaEM2>Eh#W90|S*2iy3y(v0EO_CHITJ0XtjFYz}3VnZ`` zgC!32WWw|;!K)adfq(d^|2;naM>TQyLUyJ?1xvyp;BbZ%^Wi*kpiB$C0pjva+aw>g+g=GXcf+h=Cp{nbO_=AB*cZaSTs zD}wvQD49J>PPfNZo(QS}z~shet>zQ6&0OER(}N`m{<_ZoWc}r`{+$}P=ZCOn55(^3 z8jQ||hm4E;s!fcB83{yU-VL%_F!AEo&*g_DeF;(MW9j7y^P|_U6+20bL%T|PnZEZ( zNjoV~oh`ocz6XB$8zg^}X_=3y|2I^|L7pPFj1U|jOC4WJg8XM4HWc+l;3ch>vqhCg zP_SdSno14DQ~a#j%iCd*jo+1cx+hTN%sma`t+PXqCZNi!)`If*nLkWv!F^k)lYRdFN#EVcWZ#K z-C%L8_4~ebq{^h1aN4fu1^+(UZWRQipw37RcSR*(XL4DaQ(bF1z{=JfU+{j<7CMc~ zN*_)cs|S$hWE>{F5jb4^jUhjfRq#eKOO83MW;|JtD3T;EG?BEAL`Y=)-xXJ#?ec=z z=@G!990hs5-0vy0x*hlQ$s#AiR)@{YFzb+%*BOO>i+w+gZu5Wmdh4ht|90&gB!*6< zI|b>IZYe=R>4pJBK)Sm@x;rJLJBIEK0i^|mp=9Wx>%F}9eLv6sz0Y3zuUW8Wv4FY0 zah~V#ISvLfsNqLBi+6J-bJGX@^YZP#*k7+X1h0DTQYxWMRri+SMb1HKkhfl2wMlIQ zjzC4aLFd8`I(K=|kRlnSTv2#9kS?qU<5Y zFJ)Fvr+J&o<|u1aUs?}pPUz*R(j3Z;>V#&heDddtC21b>OM4pvy7Mx4>gYP zPUk-Nk&Y9>U^n+MrG`f9jCCZ~ANt54%jJF%6T!R`q`v(s7vrR3_BVDZRwu6d#`f;S z?+vQRhkM*yME&cJ2icLnRd42Pe}nuAWlSe8&2h;0HbFMehjt|H^6mY;OAovu^1TF4 z-E%9)@J*5J4x@&0r%3Odefn7rX)_8Q2J$7k@9>eDdq7wd%+S-Qx+N1%R#L^k`F9IW zaneWp-HT~osOlG+#oeM`PCoSCk?7uUKW_VM4acRq-U_LX=Ent$q>AKSzFU@9$u|7o zn$PPRQ6fs)$7jo_BST8&{}1$hzj6n@76vm8+>J#l$vJDUg3W` z8a8YzkuLa-HTjjla)vqkV~OfX#X{{&oh;q_X6F@M@+L_UNPA-Nj6PG6s`K!~R0g^5 zbFI(gAlCk3vA%=SP%LnZLFN%56sdAsL#a3JUdRx)NK9sPbkp%`7tg76>%{CgKNyff z^E=!oF81pbF4Q^0W@7sm(ABIKdaJ^Wpa5I3Qf5$9RJ%H;oaqkA&O zA4z6u2Z{trrx+?)U7Vx!t$HNtD^YyD#P2j_e_ByvY2(dC2!rAR6O7N$iXsCZ>--&_ zI8@+S-=Hf|l#u31OVCsTulAlVRdK8Rx{+>^m&!|zd_=^McXMu2_c!&|2aHZ`fv_`$ z6^vUCTE-uY;CP-l&Po|{LOE6R$V7?Xls>)>`pwyWlw?DFr&T_0!vVyUxCKU~ywX;< zIuctoKgi;@VI?+pDL@_Nd3D+n1lH~YTp}W4@s`eW|79V@Qoa;T%bJ+r2KY8QB@n34 zE+s^!V)X)rOfv73V1LdgV1eQe6&mqKVbyekwg4ho!^6t>KqQc`AzD!UCLmamBEZ6P zi7b`eSg8?2LGRsbBjbUhb>e}6f{HbbuS=H7>!cTN|9xZ_EQgM0R^9NPOvJ5{{v9(| zE75tU^0d`Sng?`slw8ygGMrZ*3Br3$OD0W)oCU;*j3hkoUE@h|61Xo3HTbw>G-FQH zIhpl(yuUq)r@-OgF+;pQAYTQLNC<j?~3)T zJUfI7So$CITZjz7y21B834U6?kwVUZ)LRHO-3gTmC#*K^a4`EY6q}XAVH&`r-QRV2 zoNj8UQJY&GH15%cS8H-Xc@t8{9-`w`0jM>h>N=d-FJ4vR;m7`l`#go`g|%K7j!mG>u zSA^?NTxLgLY+%2bZ8!FE7!*)#qTu-`IUu0#|DZx<;G#HS;K22GIHS=t2I*z-bcT|U z3VJ9E7!dj<{%%l}pyFP*3Jl-wbY~v98t;l7@d8AXr)L}eak3*k5#e~N$B*K(JL?Pb zdsD`>YRjvu;$~CW@Pm-e!7pQ_)T-z;_$x6?$VvR3J@mL?=B;S{+~#2}kgaM2?DVDN zd8*kEp_S%wXU;AT%-ex#f$(ii>o4$w+mJ9whU zNwOj*CgG0v0v$*ewk!k3n{5vAPr8%QfH2$kj99jbIwb;gX5t05tXi|}FV^FC9>zS3 z*-W@CESy%vV+G3ZCSA%0Hri6o6a^-UV4FBfs5tmRNzfJ7_Ts~cZ71LMSSP!K`9vQE zU9BnuM)wOCi~i5G;#xLG zM6ipn%bpqlWdWuLKNJCuTaNfSau)8m-^U zbN%h2oX6NqH1P!$?MrY} zz$K(RDV~?&X5{3H05wP4tvk#it2VT{wL|YUq8=}^Fclk}5VAB4qYIB-k_&TX4fVVS zq8FqmEbQlg--mJYl#Kg5cd_Nv8yac&>_;DdoXVJa@Cc-ov|f=xVO|%1Lbp#rVpG0t zsQq@-Q@v}~VeIGqwS2SU<``F)yVAJi*}-PJR+Qw~NkLTg*?D)#;>WsrK81_9utD9j zy*gZ*Fz))wI*>CJd1Hx$6d!J)#fl_JKvVj6<*3TSG}{$89$}%T#Iv% zh_ep|n{n6bP0ua~*nDcVYbR;tgYn5yy-Ki9y#e$6?m28|^X2O2 zwg2hF{Fe&*ue$ng52UWfDjqL-;D>b*(Ea#|+_|X7>@Q%XVJTWPknjGMc=D%U!q5p) zvF#vhp!zqF&3)#gGNwkf)ZRFpq=j_~((J@I@pKxCaal_PD#2@YM$_2X_$c>yBWW(T z?x#ze(W1I`ld^>qf}6vu^`Q5TS93B3G2wX1sOCJRpz+M6gQA;aZRFBf*wvA1+^Umv zR>Xy;604oJ^IZfa>96|c?Xe%w{a68Jy(x;v-C3Wq!iZlxuD^Z537#K7eM>a`)|C_f zFB-$_58LcaM%FF>x6#YTrr@jM#&P*NZtc{ooBx_&%)MH3U1&~4#Jz}9`&Hw(TfUn^ z&Ap3CmKz(6XHX#RnZe~}E0`O@p=v1F{Z58=Yx(50%4O*LF@}oChmI62ToJ!;c31UnFLZ0xmaUb2UI%I?37QoJ@L%Q!?;;#aI_G71<{jm2C7x&U9IYuO`L-3 z3H?fG!ovwuebRwbri`%ncSxk5-SGD9b3vJ^B2(X^fD)pzIPW$=K=If8Q627i=dfWm3IB25OKFZ zP1;5YFrj)DBicSXC!%-i<&}5*sNyQQqPkQb8&Vd}EfbV=`3&!j)o`r;xF1DQ2-Uwm zTyCO-;60;X3?gTc<;hHZk>)JL7P5*~7aznHBI_Wa!C>{ENbM_kxg%h=ELhLoY)iRH zBPMbIa=dR5I|;`c26U#*MX)ez8L1{$C9ZWqdr042AAW<41Fv$xnKBoz9a_r3 z%#9Rdsic2vK`C^PWOHAliimk+Pd?{F1Z5eSwVJ}{$_L@V%>8)mp+?ZMO4DE^YU7tX z#Ajj5-t;n2?A3@%R{Q>d#Z_!&4AoK=?WZi&JqHf&Ji2Kz_JxepAY53S?S$A=F1csMAOt;ZxCL!2Ih?j4usndBy<4tqPl6#54g>M~jp79|Y z`5?&HK9uBn1C8|tz*F7osEba9zsO|%6y4XS@!=%Y6RuTlu)kB6A>r@G9Hr`Z{5`Wf zjN7S0^`(!I#w76?CfKbcJC#c518cRKq@r-&K7Al8m$LhzveCbLoTfj zVAIGssb@r${iQvRYV#8S;o}^xjEjjKD>wNMPyK>APr}G5pg%(>z``ZZSw84k`iP6A zEZViWCj#@&oKKI_?*_b&i{10!8=snqIn(JOeDf4+d489nE?xBexmR!)E#>k$!SkQT zUlWTyHR&aOTQ`4`rfuvc1z3!b=dSsibWdb;bJ3BXA7|nf0m4IIw)9T)xO42qtFaLG zb)0gD-vrjoZIg6qY|GoL&xUztGiCFqFE zMCZj2f*{aKfc8rNYS{R(&ZYgp=xT2RnGp;5ty3Zs#*CLNZ#ZM=ho2u3%xwwyHtznt zPye^pb&qjGfO#ehG3r_h)5KRzaWZ}#4=~vK%f5E3(omj`+!E3>;8w$U$nT(#x??Leja6zF zSNp&WiUf_=cF6OvhFu&ixDUKN32C$jD=9?(O{N7pkC5{q&%eCCLIT|p84d35L>tLB zet?)fQN;aonjvvv=~7`ajn%dDuUf<#d_#8%j=y&^4&iN7nd>eZE#u%wu&9CJS?z76)cq%?-rx zqz2MnvOE&{@viKgtH_)f*QvWv<~%{_Ymo8fsuIt<^Y#By(v*b)^ww}!E+FxEiel>p z@cS)VMpRX=di3550-iDVbq=RG*oHVBi@o&|wQhlgB`(oO&cog1OzV>MY=idkM&dyp z8f|Or(9D|8!}f<}BHvEg$GN&F04mU~jjy^Y0NsuyThCd*(|>>`;dt6xw8%-2%avkG z4O3aR&pr9QJWgMt+YSvk9}v$NmVhcvdrSpO4`ULGR6i=KMgsSiI2G_*aK+Nmf9Zcw$-`D&uaPkZhhbsI z*#pR`*Sfc=hi%T1^X`G~Vs99}-QoFkTFaUTqgm_#U$5=lp^c;Sf4l(fV=3r?toX?I zI;oE1&Ku{hWL}NL@^lW>N!W5zxXma>PRTX|1ws9534M+ZVPE z(hk5xOaxy6nIkHlAea}7HF%-2(DFa*M^DtqBdA#T>81WBTU{drqc)KSQDmWS`g1(W z2GqmMu;MbGtD?mQ#^s%%QD6r41A+GiNbSG2!pRQIyPZDCEfOd(h&;gYxH;JkN4|k& zVmP|(ez9&bb#P`R+jtlsXlPWExX}<<#xGl(kL@oJz!&<+l)tw*`zMO>d@i2Q%hII+ zqzg{3Jh>DbP@fvTQsDb5s-xE4(6)o>B#iBGmPq!j53zJW6Ya7SKBo-PEYGgU039U+ z-5y-80@(eCH{NP=2(ijQ)MeHN>LAdtZRXX;$3M0e(~}UCLp95%M?T;jrR6jSon7E# zwpy!!@-B~8m=8ZgG|@dhEml{|8N&&X33*tAv=(~Mf?VpC$Mbbl33)yaeGnt;n#kmT z>vh?opU9X59`ibDK&GCr90FY(J5m1trYqPN+)LhprDk- zaflWjjqM!~3#O>juF3sSp*1;LkK;+G|6K1RIOJ_$Eku=jQ1+E;TzS( z_%F(XsN+jba4B6_;B~4tgWx)()J1L~aILlXTu{u5+dXwVTw9WYtmsj^Gys(a7v>7P zmIK$z(7>Hn+E`WG1g*Az**hcL+8c>1IC$x{$kwAJ5ziIH*HT2+8nkli?gxvNnr z53BW0>Cad!eWk6S;_+k*Lh~AH-%cjln_dIw!$`a!^?Q>|!7{aKpAdFbJW+q0$FJgp zpJRr$&K34GiZ8qq=-$PW_yGH4wmBsQNU< z40)gH6*1xETd0$?lO}YU$v;KNLklvGv$UGqVZVj9Q(Ii+{^GdQtG2Zs&19_5_d6}t z245$sW_>iU>$oy97S8sDkv-gjpZfqRSjKKoVh zmHXnWaM$kttLeUX$5&AFDZ5XCf(30dU5*?I8LW+Cd#ED^A0qhE!WQrg3#0}b_9%cUXUi-A0)(6vHu(ppMQsVv)*IJgqJbc(j%r%s;{E)AUSw zPn0#zRh;+;2@kRj*Q+FgAl3JMTno&K)W#{$OHq~Suex!( z(t^qk3^TEmB4aJkGj9XbkZF$?kO89)rMo;+al=^7IvAj-G+A zmHv|RfL!`dL`wJlSrjVS>-NfOu=bm9=X_Q4HO_0*-v-I~6oO7i8)9$s8XVuvv&6e+ z?22ah+XjMsr^dwLy^&(k17j36i{M=8D3$rz@b&2Z21}g+t1d^m{=}}F7wsF7xlIPt zM|P}kz|vv>;@DiTo(bpx{Q$s*HH|TgyAAecdyVUE2gqGBJi*qD=om>CyPY6}@!dm+ zgPMDyzD!>ak_k6nPm6jxh~L6cV&S^X6`VFBTyY%d2F#ZOCBL@=-t}g}M!-*@T)4Vv zK~HX4JWLI8mKJF6R~HQ@+iAvbU{{C2<(t-d7$((5Se${pPssC$&QS{CYa1P!IBbzu zMOOXf-iv)WR=+!?iosKvcbqq;fxJim#j5^;;`z^<`a17lwno3NLN@R?=yUy~Ge7Yp zn2`P()aosBcXRc88!~~XyF&{Q zk?0v_frd>Q-URAgsd8c><5cY8fk$u`t|;5qQ1`*W(f*8~uuJQO2P^);0QPi+ruJwi zh<+!I|5^k;==6M5DFyn=(cED5T*pto=06%>F-mT~+sI*LY1_*B|1$DOI1bD==m;@! z+419-QG|E~|KYwH$ol%0O`40Ri7#V-yr%p45aT!UqYAM{`8tUH=fjs#U60F?r?Z8g zylJ%Q;ygc4Flmw3M?aKH2W45m7k9bK6qUAIU#>M9%V_=tyEA?*o0!ngy5Y7HD*Rf!)J1WB?W?l~dlinV zXlJ{3Uc`1d)jqw|OYW7ix11du<6R=R!4c7!j{B#~^bP7)t>;FJ_C=iiElaq~?XRbB zaD3PQNzQLjM@j}*lRG{(?$C$C;`D9)j-@!Hd?8X`luWCNCQkdjb5`O7;y8{$tD2Pc za%Ch`W7fd(;RSby37Y9J>-{W+nLeBv9!<<<@Dh_>!NKxKnWxbZWi*qN;J{h~8eOST zCCd?m+Ji1mSJ6The-P%&{-RL#0j%3?51u{E-Pn&*s8Nn?7( zVl2feQP)$jA25;IaW)dsAycXKtw}LrQTpe0u#`@yR*QXi>0?r%W*09cB?c%>Cz zn&+ia?~MJ!9<3K)Fn8}yYNGmd;>J|+%p|8?qk^#eK3#ZHGFrNdqgW&(%V|78kbA4M zQGT2*eH4oo?`A;)9xaD71-QdvoG1fOz1cXTRnZ3LUEL6A*&dH7HMu3B z=UfB-P7jt4tKY12+xBAs-o!-+p9<5Cwz02fa|zhoL^Wa&+%x#JaYHk129lVdo*I8- zvdREB{E_WyhDh8LrE_ zz7($94if>4o7_L&L*TP-e|3&M?#LKnBd5Jo@7p)Sq^2prB^Q`j`exYP+&^=q+7D1} zw{^H+(}+KK7pJ9(sACYE>*@3dL6A5v=qTFS&iEGdqJbh&B2RwFDaUix-(#ca{C{H& zrttyRU@?tG0qxP)0yOlOq1vZqG`p%9!HfQ-j83{eNSv2+>uSRrT~RqNXfr+@OK_|v ztS-&f7Ro30FV^6MWJxQbL3+ZrZveLlP^}4UFL!^zQx;fIx9?=qyx0#vP0q>W+mxVe zA0tp{Gu@8mpe(l8eaA6xD4nRQwJ5bO=J8YpUnhUgm)>=r zZ8mUlE5r5R*R@09cdP5FYfmR%_liGkeR;t;0;qzMKIKLNVqwEDY=62PvZQ^_;}>jd zl56A=S{NTE?sY8khCCuAb{CFCD~sNsS#}zA3ZJd=s(z%n{U}Ya18Fgw5l{f22-O3h zjRJXSKQ43l{KX6DzuMkHD0n6mZ?z0FWaByDlaUJCI1<~ByWTb+hZ7`+T)n5g?dA3w zD0*@=*H>X}jQNe8wmSF-jGaPWKlxX-dIvKjKS^}?thxRJ>19R?d|6_*Xr^~=8u%lp zR;hIhy|+yLj2Eyd;CPlwnNHnprt7gg!Wc#)8HM8_(We>vWvmu={|RqQ(?t>>h4xP9dVY#pSI0a5)l(=n$vJ&1Wyjk$c4>iHWPq%FaM~4|;D*kmfx-IUB!r_JeCcM(U zIomdr*n2Iov``(MLrQu%Y_E4Dae~MT{x~4uKm*nN)eJ6kY8Fp`md%v4~JuHTs zkb6h^aJn@-p3ZH)3Iy`~%=U+wrqlEbeIIteH=(3E1NQ7}ifd=~m)90;u))V{OkZ1d zNOgndBt{O)d#%w#dic^I*7g5r#_(@oodp2a1uwVn+wp=#J_ZGOd&ybu{Rbcj*}^_p zNYXa)5;yz278E*rxpRr{ED!;(OW^Make%`oSm*E{V5wAs^-hR8UWu;dJ5k{YqS2?3;fgFj#BA}GI9UtJh@Hk z_}W!e@Q3B}4`$irranaK`U9dNU%fqZCmKrK@?f{1hq~`0{GOa@%~{+;P(VN?t6W@6eC(6VokuHZw9&CDL4i1F;wgz9 z2YOOFvv*`fHm;Pdv&A8-uOMR8=}8!iMVX~y9D6A06~<(c9q)pV**Gz$dUH@VhLAs7Wu!2M{EBd5*nz1hldJla+d?lf9KeT)K}!mI>E+EMjrnmttO4hJSo zHOcZ|QF;DEN_NIuSV{O4E#ou&67_qrN>cZv3tKH3 z&d|OIB?bnX0V@vSSmnw@ z$aTx1;U_DYnTIV0zN-Il87djc3Eq8Bq2@*jOvLJS>jbQJh=S<7P5gr`CDQ?{hu$SI zg(#7!Ss6F+C0Jo-Por+ycd76V=0mZsSSb?i4<2NhsW6H$imr?r1GM|sz8J61@b#QLPP85FPJ$FJrH>`{gi5%0k>+K|qEX{W7B);$4j5Jm;_^&@YA`*x zth(qyMgsQri$m^5ms;Y;51#>=)j6HmBjf-*&#< z3@nwo;lSnMs18pyI{#K+!a;mjgaFmK@mR3U22(Fmz-RgJE#CVW2o0RVFY84s9~&|4Vr$ zSp1j4Tuhq-8r>{3Ich}xNjjuwtt-~E(!N*RhFSA^6T|AMsGK4=0{ zz_Nk4jC$tn{m=fp4KjMnhy#Pf{0@$ARq;FJu1tRU?&F!O!90_?vw^8F>_6=!@TFVI z4Zv_7Jep_^&o!p$wqKzNf~qb`q#~j%^MA@_-pw-}XQa_-G90GFEhb-W zQ2G`J+fQmEc$J7@bcyuqLkyKOQ$_q?G@oJ~hU3(HaN7h>5YXT!lBNsbOV=hMB_*X~ zD}4g90~0bpvz_Z_8G`EdE)8^dmG(s`WCDkj3v-|gj~iF|GXYeZ5p_#~G>Deh^1Ech zJ3$_p9i{LHYbTMeb8dlK+~XgSJ$hV9U%)nFSjc#WPH4+wfOILDdXaZQ{20$n z>uNTvkl1?=!VcfDDM26U#G|LxH%ohH6<-W`87VIQ)m~uUI&1-$tc~+87;K^S#_PYf zUxV==C%qvmeC4%Rfb4pjG)AEV$Z<6?w%GP&U-!MTdG3IqJyKD*#VN2S!vmPLq6)+8 zeJ_9KN`XWcEn^$%sLhjxaRg2JZR_QjEQS_kG zGWwa?@C-n>R4@l-kp>IsHr4N3UM!f8n@X#_AaOHGGqrhb{rT?}(-=`*-`>C=Hm7*7 z5N46Z)JR4~zv@&#$6=`dH8Sl=?Y8gE^EUA4Y^DX@%jr(Wnm7W6^sdG^mCKjcm(AMuQ*^J%bqiY8j%u-&d6i zl9_lS4N6%dy^46)Hz^_8CDC2gAJIJ@6v89*>V5;T9>b8`ihJvDycDb5Oksw>Q;omk z>E>WcQOtS<t}S$C~w>s*5+LgJ3rsFce}c3!YmGT@aCUIV0`%!gdu>ej*pLAta$t~hwt%R zi1XY79UU1>VKe(zmiMucv*nJ@v@a*O&l*$=am zRKv36Pv1=|>G^1I1N8Hiran&dUjnf>m_LK+kOW{eU&H-)I^M56GQ8=1a|x1gL&kPl z59nHwFnI?@iAM3_+rWQ75JXJPAVjeJIY6UgmI!i2(5?fK4Wll7C)PO)8suM*;Q{JtlHV&+i!-0NDK zse#^HzuZuBAmgN`CZGpKEWQqa3y{a#0~rp0on)CAj}&u} ziBp--x;SeE`99q%wl(qQbuM8pXegpTT})Ccbz7qB;k`7U~}wnbu}MR9^OZ`Kwh&^d*MZU#ngKqvGqmhJaI5+cI&W~0k& zh^RbNv1^ea9I^wNgyqu-oH%Ctq0u)peVJ5(mRp2F1uuQo7RpZ)9Qe*XC)&dkwMN}3vMgQ>gb-rXBgFP$xf*O8Wa3$L${ zjMb|6)pkQ1x?HZ1w-twJl37I-kSf8FQW?KkO@^@KK4&Fs+N|T9-z@dytv<#w;Wfj3 zQRF>derwe_rr4sTS>!`dYbKP@!u0m`myoWWyP)(@=dn?hM@5P;Vo*c91$@tad+O0$ zQ9d-~?sA`gyJ^#YB^WnVxXA{k7U#R$T*+j;x>Uz_O{>!UV~5t!(Yo=c`O4~_fv=GB zAst$$%9s3xy$o*KCTRyEtI*?T6xlg1 zZZo3TZ$-~nC987p3WEIjx3e3jXTRkADl!X2`@S-g=jp{3fWegfZXPQnr`PhC@=UkcRu_Q@Q5rEN zGDMX=#AeEF)&|aT;vo;g+h3oB1Ap_@HOEZLgDqr}Fbc#JIi}QEYrWNK~ z54w-&2orf|*)_Fne%}bP6B6)!aUxVDAP%*Dw{9tm#IiO^M< z1Znmc0C~^Lb_Xu>Y#{vEll^I!Z*mkF8Y>Foy9^T?usj}#6F;{;t);cP4UB{g5g$Ow zM1~@wo@%-u^}=weIoS1^v4j?>@Pow|sDyaq#mN?q}gJhDw*?(o5`sci?#PQqob>>&h*s4`~TbGd?|w=o1Q>msOFHd zZvWU$5TO+z0}6U8sqL=r_5$H-wf@vfQ7JktSFG)=bEyB;bDk#DAM>@QNQ!DNZMCsC zY?s~VkUX@$13ojFJmAF&#AlDg$Gkd_8q4stFBjKPS6Cg?Y*^5B-@1o8Bm!{S>dyK$xK~D2TaM66m67`ht(p- ze=|T(<2f{6Q6R;qYWEN+;Ot?4k3srADJeAGz9IG?=|TZTra;3RF}=zTj>&aChdDXd zsgoqqOD13-hu@(wbg0;k`c5ofZ9@WsgKBuEMP>9pU6_~iBW)rgG4E(1#s()&jiW>e zzdKj`@u%k$<7BWJY2*Yji%bFXWYrTjUa|J15$qgJrs>eM%Egd5jnIZ83*zSykz*qe z&Gg=4*&=_p!1cR79Y)|R6!W!PX~xzJIyJAQ_d_5!?KdLrQ0nf@TP&0iAR>&R_Sis# z!0N}P8MR?{c_d%)iSwW!k5R-$>+d$O+*vDcJDWb{+h?X4p{;=RbwT~B$Gz7*DPwgL zEfg8k2C!C3N*L!fy_Hk#BJ`sLeL zUJ7L}P0+s1ycwc~EK#LnXC{U3o7rqemjtW>k#6b{jzG3Ei0cJ4)VRS?1+l24oE?ut zGt>Y2cevV5FCg*}u~tt1e)>oG)`#L^#0~+#c<7o2wX4rY))_h>2C}JlO<;D^1qmS3 zSFW5{QUIwZiHuAFCESiPu~*VACdQ;zHKT~%bGUsRRI9H|W3sDw1TH69zRX{P*Z0gh zCiw<&Z{k~e2L_-fzKbiuOU#?+qBvxHFQGjKnPT+eR9+uFIE&&Q%pdju&Myyi7kttG z;#!V=XCv!kC0E}Mp;zLma-CCyHC1QZ(dlgGKgYp8mrLt2Q0_AvWNHuZO;X6;^8)Yr zOR45&2kmA3Ye{swo-XHHt*T;%lAmk^qT|h3l`5~mbAK`{yc4>F|9OaM=Hq2H7rCHU zyTiN9r;U)=`ZADuy$i}z*;gC1oG%PZdX z(#A%J!{|k&%M^21CQj_p5dicoan`BE%;tEEo^t)9VM+(K8aL4U*`Wngp#hJ(Zh+*i&0~@+`%I0rETn7 z#irk;e7o-H{Y^GAJG)5IIJ1aVP+KZ+1AZo%gfEGAjg2-vZ)G+9>j=AcYeHC;zK6m} zWUs}sIxx`L=^Gw}Zogam2*rGA)iANKa%%X~odN>3_a%(>*K;nr`9jDKjXk38uhA7$ zrzJgU7dl=av|bB_UNssom>tx1L_S?Cyob)dYD2pf@WDECbE$4tqUm+Sixido#5{f1kY#XWNc#;l8R(q z5vIQItzaNjdzS{(Sio}taHUWnSv)Qk&BJq9(iF>A|%d;U#*ID6`%J2r;=yE!rM6xe)UrxSW zAE60-50N+nmWqQ!vYEzM8}-Ui+K$D=uYvTs2I>944Y! zIP;}xW|*wD2{OP4I`TZy!4yC>N#gv@efphJgv_QWj(@4`WLyscK5E?)fXL%{2F2r4 zp5?3zTsXj&r@-H=ms|0-jZ6W~x_J|?6$7UNe55CYfq;PzlyR!jAGQVbCEEY%8ukBQ zH`S6$8a;s<^-XzQWWZ7Y&WdC%RsEv&j&yd7DSab0D zNQj+~ZoTQg_U$olMt8!CJDz`C@&0`myk=*qb+^^2OVJ=IuGR9eH%%Rcz3MAK>K*(s zJ?ZC-0R>yT{!r;wX!@jCe0&_cz?$^pJQ5*fxzSNBVC4+^xg%1m771R5)+?p1%Ugw| zHj9SFZ&^$&+4IUcGh;m$<^kpp+}GI$R-5&0^E|pWr%G^ z81F_a!qB0pupv_ulaX%`uQ|*R5uM`Gq(WDd4lZR(`)xx5o1QgDP2!W!CtFoGm%Phi zb^qmT{EZb8`Ir0^Vo$AWMJggZW=?b@p)=pfxkI|gahq>JRFab=@=I=Hfd<#jumH`B zO*KTW#D`Z?V@!MI1|Lsg-qO7q^I7i)$~Uj)4b((j>Vvrv-m)ShIls#iP(fv3!`Sc` zV<`>!(*~oLWRRrmp~L=|2&tq#!H-Dr&VM&Gfmi~~43nTS445EP73GG%_f2uUMEAxq zi{}-`CSfW$WU<1@Jmj|8sbwO=WBhJoU#0a6SYfr|w{`SPnn6PHm!dUJF1buZdEE1I zeh9hOEuuOikVN{0${5e=4EeQYrbd+6BE)K*AyL=}ac4keMwII1M1b1V<-Pg1pc@=N zjpX;uoZnf4&*;+7DI$#u72gqHr#FQ#l?zy{%mUbBEr(AcwX-hs1MpF#0Q%ukK=uR$ zChPG9GU2FPDGRR;@)P0)o2OP)LWKh648Ee+6q$w_yU?N5 z5YLYron$||A_%vEn|DH$LJj8eFwq;_HjyyTZMjMTz?qil0ov1`G&S%IXym%!MyNIz zMD#D);dq+KNI)7;eSnj<&jysYI-SlfGZaTnUCz~Z zmxsarQE$XW<@q)pp8#*vFe~;ZAJ@n^Z~OmVlKl6zq~`X*fur zn@YtdS!h(;LI~DfpPCA=T=#u#NV}-;=B;hu@|vGj<3!d|5(D$lXv_nd;x182F;M>` zlH^^L|9pCB+`*xJyDFgmhu@kWDUZ7HL>C#3l8a(Q&I3*ty}aIXn$NI8u05>n$I(=% z`4Uu@P~Ir?so_oF*2Tg6_yevupb6^Yy!z#={Vf_@7Nk6!kX@)&t94uyWc9^}7j+Mn z!YZGpmU?pPw_JBE5f0Lyk*HvdA-Fnrx-*CGE%gt4Ji%vaX2BdQXSN8Th~~#_cWVe# z0_r)c;`MUZ=%3`A?Q57DTaW# z=5ZD?WXY zbL$WhNk%7PCQ4$Cgn!qn(>S}SF;IBiB8=8E-EU3rBoIy`=feO(2WdbUDCH2)8~g{X zR743o+}KrpW)P~&bTD<4FRX3ZjOQmroxbMX&xGUx`3ra>%L)chEIN-njPr8-{7%_sW2fZ>`*H{9}u?MVAXsDC!{upM#bFJukQ7S zi_s5&bFL9S?`(+uK5SXXsmtQCrUNG47|R)91J}T`xBq+{zl}_CDKdy{wTF*!N@4o~kf9+>% zTFVi1p=m9Wb(64=3$%miscS!(bH4VDYMd!=G!$`9lntH2Xx8Jmu}&G!M8tX}sI@32 z33Hz-1qDnDExdYp-514)$z^?|l>*%~Cs+8z%HrfPr({XDGB!Bg$f#dz{KUC^HqMcR z$=SvijW~ihWocy*K31TL?VNIirICVhBR{LO7C8+Zf8K z+lre)q#M~l6WAGo>23P!oADaySjnImGX5W9!^}3CGjKJ{1D>>8rw{IpF#Etqrltc{E68_h~I*@#I6Z2k&&o} zw9o|HOWyO@|0FcQ|5gt)eWkW)x^9(#I+$vsU)=Y0@0L7tKOi(`aI#Ku>JAL)tssX z3RmL&WNxPqfP*7J75V3Zg%?kJ$!CV3M-XR)y$(mTsAn2Fc_VQ;n&Rp--DM_&m9L7E zua2NOls+uxsRvkzMOx**bQ>68UMLA4q@w~kD`Va#aC%F{BGhIt(cj~x1+#X*-ydFG zuGr(&a!zl0RB3qw&1zH!vhK>Xj&J!kp7_*@&uK_LrYs;hp2V@oKtu^EqYOr<1Fit4 z(|7>e#gKgE%XhZnS-XozEjb>&g=EZUOCn?61BTNxlO4$GSCnHKL_JP0yN@HKXV~~~ zy)(ywY(=>u;E@YX`kc`dc}sCXLx(Q0c+$Lw_WQ$x;HKjSn=^}?^9YSm*xtQ9n+Yqe zj$7a^>E#XymxFnBXq+)wSL+V{s7>Zgz>(zqBJbxmUIEJql3(4*iZurBt&XA0H5o1&3AMP^-BEz2z$%0DBpH( zbbm$MUO5hvzxal)j!~ zQx?~sa@l(G2dUOz3aiz`Xy2c8hY^G4?j53*Y6M^q@`5>{`4?z8=pA{I+Y9E{sCE}= zXYBQ${#Gyu`{7VANg57}mW03&hbJvAJ$rQdg@?o7CEwf4QP5W*hbg6tli#H1&d(`7 zWpHYvpO;=KDh>GaqaSYnsBwz(K0hV{lBBdX6L&z@@Bb`FAbb&C(h+p%fX!~V+s565 zT*U+h1rNm1l_shKo9s|ghLLJBK50sa)gz7rhY)d>7Tx>JZ5#X35ukaBn>O;3kPg?t zHRlGiCl7$?Af1mja2gsmNa*cao}nF>Lk>ss-~@OB$;aO?<>G`d*fbpJ2?u56b}I z(Vm?RBQcRFUx;{8hwdg)&_)Y`Q?yi7_SZC?rUTtJCvx%+04Lfx5Qo8*(V=ixy7A5L?Tb~(e*^Ky(Jis{f&e>FsBkX4w*W%HSz_xLDg zYJ}R0=G*LXyq(G6wgx+FM%Gf3#VsE#(= zldcf{cZlG>e_|WJ6Cu%0_%J>F7nXSU)ZYj6lj1j@`NNH>n$4G8OJ{4JFC5-p3AT^N z9xtpH-eRG;QuvrHif`sZw6PY%czx>r75-~|2RpdFO8di7r0}2TDd?Z`q=nRcK@R$b zg{>^F{3YA#G&UG71pJwNibkja&fZ8NXy`~JJrn-N;zg!l_q27_V4@*I5xIk`^0t;@ zQ6B5di6>jm&ON$n)%P?*2l{>u&8@vHC2A6_4AzRI&x?P|@dBhjM7~7a+ChfPAbF+P zuoNIQ@LGTc3 zT>1|KYaqY0jr>Ya8k{ki^lP7(zCF@O%9B$lK;d^%Hqx#sN+Nt5nvWwv6Odo@E`z*y zo4LIQrc&ZBaYbG90*^Nz~&sw;V>KJzwJril3z0A^7{%>H}SpI!c zD4jE?4DtdnD5}4Rq@%w03qPnvd~veD7Mb-Og}Y9=EsqdwI7ZZ=(*RL>{9h(!hsh=n zNoUL5H&ByFe4AETO(kwKKgR&Ia(>m3@Dm@{zks{cV#+}h{AkhZkHi~54D-E6QxR0# z7eiNJ=CD7cOQ6c^M`L!hjO0H>*M#e3fRYE_*}d-;HfFbwHaN(q;W`yydaf z(YQ>)Js%>A7V=biU77fo({-GY#Bs z?Gj$-ZxtqzPJ5w*Cuitjn}Z&>&patoKY64jT8unk(fe@77;5Hn;FI2`Y+#7WaIXm}xLXu`g+X5m5TAT}h>9 zGyZOS!Y!fuDCu*fblW35X2=eBq>DpVh`E)qz8|Wm9_#D&~tRspSS)?_idn^or zLYGZxD`)biWz>gkfFjm|<+J_!x3xBYQ*`|UymD|ZAP(vdBcGFAoj4obyDa+X4BtBy zaP;Czmm);G;qBHv*ysG-k0UI-zb1>JoK*M~FE$_o+nI+2xO9}|nvMfoZ%m9YPHMU! zZPMXd_o)m-dS{H~rtAq(B&snA#Ts|=KJNl=%%^a2z&H^9HZ_Xgb9B$bSIN&$x+V(9?6v0VdjjfN*bEm+wW*5F> zZEHp(#Gzdl-kvFhZ1Okii_lqvyuIVzN7rrHnM3SltaY@M0^R(A=`^8Viz0SKNBVXr zt3dH!2LfjaqVuUz8>sAbF7Dm^SG#GVy@{V&&akhTEl%Q-*5q$C<_`f^rs1ca3DcmeQKKxwQIm+@Ft##>*<@_R{P?b0QszA)7Ugl zOE@|y$EzV;2TVdO$zj-ZmffBW=JVZ219ThJ+8c?Iw!IUs#*bBb8peqq+TF~+O{;u| zEjt(P;?rJO4)(w$KLweu**)Qj>TVt~d+ddK1yrj~qP78gpLQW+Zw!$Bq9|Q@so{gn z!$9o{ozFE0YtAs1(gx&C>BB8ws>;=Q{J-x;{{}epM?>arALTu#)U26Y#>ul(nCN>u5;0_tpC`m+MIrmt`N z)PlOS`pseUrv8G)l^;L47Zz0*Kuam+ORV_&EWfMDRL_Dxa}*F)=wjgx-_jzO@h-ry zz;lL)oPZ-s9hvSCJ?^k6j0){jPC&x5y@oad`4V2ht|sTn2Q6KLrj20~_*m*blL%uV^h;n-M>WQ# z221rV9lFY-BN&`SUlJd?mH(7y+hTs{`GsACKJ?WJKI2`Vifp(}jCB)GGU$wGx!ntq z!m_-#_dSDN`W4NoK%h}Wwco&R%OLVTYduBp( zy+KMymGAUQ%EOV6QAr&12BGAIO~vmXZ({?hUF*0I?Tz7|rRlXHVgI(AiUE}gtnkA1 z!h#|}$TL$PeHR|emy4Y|!|gP{>lADPc?6u!(gEY9w6-q#6;~2%IJJ(pv1EUbKO{!F_y*-4gqKci}tu zb!W!(`us$;;oMV$DWmvaG}D`(xToaIB*Lq;H(yB|?uAMTyyMcJ7)cH$u6fQK){`t= zksZT9h&-C;6|0LO{svmi=J551094}40N%F`2=GEgDCSJP_u6pA+}^%@i=~6qe?#os z9ZsDsUSXYKC@Pd=^uHqA)EdC%!}qnY<>6nX8$hCOj~zqH9DbIWy)b*MC;j)zRymF*UHUlOW1T!jD0My1H( zmWbJ3v4Sa-RX)e*MU`+&P88_sDeG#e#L+GoG2&Nq^xKp-vkQ_NXH%zzjFjrt_X6X35;j7jk}m_fMqowN*QnQxkTx$ori*&F>)tN zkRoUL47ksGcl!ct0wFRpEg2;T7r$CgYQt6hL8D#6BUAonx?3j?zF#m^+R zORC$cF>&r^CJes4K@S=$|0@f?-|NC(aEMsamX3C5s3P%6h&$aG6gxxQ!@0nAzUrOg zU!-HN;|anzS&_fsJL4>-)`f7hFQs=;piK(LtTUT8SPKKm?vC8ohi0n55|4Q){V=@9 z1_9{nWJRQZP9C#`^9sSo&lCw^Z5@_S_s_(ps&-SZ4~e@A$s+s&+JhS~t8sJ{%NfCs zH3;)Y4vYzssL!Y`?=ro>`qb)mx@*F4DPx$&yUl~1x*!;pg*tb_FO?((Oi4(+^q9^M z4E@b9RoFY59q{YUybGR&N``~{S>coL>M6i15JZL8635?^VOab0Vy&HSrOLc}uj9tl z!Sfz%E2g48f%aNp_jih01;Y^^VL*&;<+X*Be-0Q`NiA%j)^vR`*G4R=cAfZ>Jc6n| z&7QhUaeF?ruKG#Y<+>}qm!Y`BKkylX zS7H9nK4Fk+yZv*2{KV%xyq0b&**7S);k#raw*lm&6TcdDGx_8=@~z#L=hKv6%gzv2 zYx?D<}#Zf-4LX+M4jnmq8j7qlqUr}!*bL0K#Bs^uNCXoIY#{3j09 zV~+e@>hh5oiV)X?pBX;NE}Cqw8lryuux&r{Irc$kVSRx^q@K*71SYhT_auG7y}Ro7pxon$*_nWGv2d+X_IKsVMt3>iQEV|6A)nLb~Xvu-y4{< z>dab(QcahMLc3C=yt4^45pTG!*Pbz;>*z4vfJWe8Bh^9SZyxYrJKe-bn*&qH{k|~K zGhRfB1;HjobYdD+{ZAY*Z<5q`9h}uQgo^j-#E~&!6D!7Ff<)weDMTmyF8*0!n#iV{ z@Ct|&inU643u@nq`z`7b4Xqnw#x`41K(2q3>JGK@nV+br?iM^Jh0?n>^IEjDAEKvR zehw41oFgEWh}h65LKhJX{z<%>)6fDbY3rfig**kf0at3>^CC_?{r?ZfRsApHTA$zY z;O{+@jc0pHzAyP^sq6=31)F1|O00AKe?#E^3Eipk;q8Eyc1L5lhlk}3lXquc@j^vN znk)`3O89a>s(&^JZ5VqV-hZTdFLnYlU(27;y0^|jR?K7lJ|V!>b8X{=v4S)&>f19h zx(D<`R{486?L-q`XSr6n_=WTF3bm}Od~F|jRUVJjr)}Ed1}q=uoh%BPX!;(hRq|=e z)Y`&g>TMSubQpQPy1z*141{9gFp5Y>yfS0gFJz=8p;OgMm^EhyeUraGbnN*Y)%xn7 zV@DLc2JMGhX7F2-1%!~k8_VkQKp3QbFVobso-X8CuXSc6(QkryH1#Y|>P#<*f>ePV z`-=UIAwB43k7=JjG`bvcWPqLRCW~@4id0|wJW|THnKWK5$kmLd?F6+00pvg-)4X)w zX#bFfP41j9*(dKm&AecclCr$|=C_o4i5h{h!Qq%RyBLKku}@7=Vy6B}*n~7Mi?vDv ze9<2Trij`44$(4FJ_4@G)ZmBMqkpr$Ixau~g|W7dOotfO4G43V)S)G&LU@s5?NzSY z&an95psHJvjkLl^Xc8hq0BX5!CSADREpk1tmvMHV=&+(^krnBEPkIV)>PvR6P%}zx zbOES@0r#}|GXk4Ilk3i9)J!rjjm4Cb_wX9_ONGa04}>pl8B%yc_+6;#lM=8cJ>wmr${2t?CJU~_PNSa${ z=l4=uATzM*OJ59D&VOZ*DlQ3g@E#rg5_pAh-Iw=0V}+x%y2n15dxdN1n^{y=Fi~FY z`KvgiBPCGFIG|ZmVDp$wlFOhQ?BI4RTZd^3Hd{R<(on{xs}tmj>Ntt{I$v$5SG{H7 zGo|9~Y(AvHV}5;>0Z4WW$O!bq35Pa5(q5ZhSDqeKN}UyJ6jluyebxB6MRV-alGft4 z&B=dS{jX5hzdqhy?*CP~JxkqEbOKavRpn^OV8^hZEeK8jxvJJXEl>X&Zu@8TN0B|M zfc~m{)yu$fbb)dlF?m?bev8cwkG4Y zr*Fzs^+8JqsBc|>0qrll*aNo+`J4eW5gBM+nwi3L)sea1z^+;R@>P&`;kOx-Z!-vA zZA;{FWKl{?f67_jf#b7D>^YJ2M&BP#sP_h|`Ft)28aCVUfxeK7@!PUSa-EE9;|wjM zGBth;?BV{;h_M+_JgghMcz?U=N@+&`;>{`462bO-PCP6CdQk?vJ7pYxf=pf^KVGuUwGT~!e}V1_X=1YaF5CHk%ra4=#4jM-qS@O^%UXOWh>R(C~mML znEyoJ1kQL*BtA{#Q%7>Q;E*%y{X3%5G=FhGVZkBjI?@||wb>yxjiFMtk^D50w1w~s z3nM&tj3V3@-IGI>`io8xyW+7~PkL6iP1SV6alX-@l;~*QulWaen)!r-^T&H6oGMm% z(kt6CB)tJ5e)FZ2H0Lw|jjkA6Wz@Fak8(J{hY!NMnWegQpsQya8BV5;0oOm!;!JVj z(PD|YH*oaa&sDgTQ|@4ek=S;>Z{{FNmz!p+_pY9o{hlln&J;RHu&ggWnEu-`4u(Q+ zs`Ky@OJ}x;%5WX)+EZ_1mi(G@AnL2;-jWp>Y4%vC-jokjfW}g2;!d$9=0d532 z#nAV6n(}YfGQKSTbULf~n&>lS7H;u_Z(*O;umAN}{PWEUVluXSU%8waQ##AIvhuuq zD`{oua2H&XeeyU|Ibk5#YI1_%*>CbDmAJ-t=%$)H__wBN6%pqCy$PHttmUoe89A(7 zeLv);6Q9GTF8t~=VvB{NKfGwoUdjlFrHNU@U*ebJ02X6See?p7JSWhf!>A?T%XCpA zw!jOJI6Iblo4ScIZ2y7u-ix3uGTf<4x?>nwz2|Yt9_ydNCncdTrt*|s$XN`t%?1+6 zRgE1CK-^5D^pm=U0x4wZ)|whu*v`1Xd+S~%Srci4fbpQ1wDTgJ+)n|+U@WZy$~{U{ zMPB~C*B4eG zmSu0MUr4G^H9IV}nA}paIvmn&6C;J-xa6%n&Fdykz|!D2*Eay)w4C*2FGsXZ>ZT>&d8NxH)S22fGeYU5FUAexpXN8 zFc&>Z!I2c%3-ueI)=BtPz1He|?o2J}{8j|&RgHtF%?tfa$!k`gtoYb6&|x4^+^#u% z|7y&1I^>3KO|gTLkai$m@*s)F0S87$Z*L(*&AA+wfbUgwlv)qYci^Z_~xkpZPP~k=K_E(&Xlja&_B=gj+(#0?dxZF!rneL7E@ zwNGsFo_^KyCtw<7M$9hja((q_tgCuitkH6Q(x_(WzvLBF$US3y;0G^{mbzYl+OXeE zD`<$e%9iHgVT|TBrjw>4^t0Mpe|9{AdzFVSyy4bfHHrFR4#No9>cEHRp%vhXN6eH< zOee5vAGq`c;iA{T(-x{DWAjof8erZ0xF}bPfh5A}Cys4j#x+UhRfP4TA1G?_% z{#L@LvZ~U}N~Naa?t6@Y0Hx4*GG1HZkYZ>ZxNxHH;>nn^e@{%)lAkUBnP-eNS@Jeq zdnQ(r2k6z=QcuBjJ`q`3F+T8|*vu!UML!=#Bhgvea#2J)zvNldAO52_HlLA#5>^sk zX8Z2l9rH{r(Qn`bWozU$KbNt-s_fA)to@43sjI(n556~smOtodm7v9#_J~{~qSm0# zMLiDQo*yD_>AL-&R`PnKj?iuJhe+zxmj?3t03!v?Xq@OEvvBvGgDR`pU}Z4zhGRDc zyfYh+s1P&_h?G-rtfU48@1lzx5&qsXkwJ0m$W{}2<^=N zC_A~UllA=>E75h@Sd5JxykV(FpdCS|?HhYkQjT=h#C4t9IJW+wZ!=PTNo3m_X%^z`U%#XMrCaK)OATRr9 zv{>7r@U_H2B237Bf$>k%wbtvrh1Mu?k;@YP+>Dz^=l)%tmyZe~n)ZYEHeW%@%vcoH}aOC~J>GmCAzGir9_Z?)*Jrl9V z3cKpVm1nBMByiV&omu{OwZ6ZE7Rd37bsL~_fofoXoI?5fSj`+TQuCkq6ef{zpVH4s z=RICz;~p7~1-izp<1x=5B@}mP{2x}6S3s-yMM$OOGj$!TLF1rP#GjQF5FsMToh-GKkCFmG5<4EGaaIo-Bq)g4JixaxP4@Y19`LPPJO9D%X_U{Q`CMyOP`FWCZF8a?!+VLd@_ZO$Rz ziYmFliYuVKtSoi8ATH}OFu)1SDFPb&_P1bQ6~ zT=nw9{awI3$ttbuZ%Inld-S(Eu#@6=r(~IkX(Ts)q9ea1G=foxMoeYY4(t*(8fJgS ziyNPW{=P;zw-+IC2%tXmdx}!!ipKju!~+f^l?Vm=sM;Q_iPoKfF0S>Nc<6~wgnIWy zq;W9_l@QX>T)!W+@Z>dT6g+$bU+wJSrr{yRrHF1QgSa9FK@Et8lCXJqdr+>heRvaV z3UMPV5${+G@0|9<48C(0@oMKY9K&bmmT>a?<$9mXljRvxDXU7CzL@E|ULCWhJv%9w zslA>ysuQ7*`30a|p(6NY{F1dUv&eyNMT75SAH1BroPMXCTjeKoQ?FTkZ<4)Hiv!hF zOpc!NB`ku>udrLs)V`B14vdIyhi^6;?MT6ZCi{nfB7MbZcoY`{y%=UVJmFq<%cU-_ zZcbaL|E#n2@65QYHTXl{ZB^w~viSb~A20h{9bhC<+~4r{1>6yPbDAjk*2j-|ecnsW z+Ye$0C+_J*i~Vy}{snPT2!qV+N0`2C((C@-KHq9b40%G*zZ#&snSz@ir(f-cK{g)G>BIaZn~#Kg6X!&cjrxq-of`5=kNnWjTy}nL92dJ?H-Oo(JOzfvLVZk-1osHo3w1D~6=3_f zS3rMBJRolP2@OcLsnegRsBL~nV0@R63J4=R8V8j0pQ@(ASK+8JMg>M$Z^yjl0b{i? zLercmk24;f_(MRbo(sgPs2TacUs6dK>PcXl5{~8?d3fE)Ox!IxCn&l3jQbEs!6!Js z*e>soxS!Z~`K$H?{=`dQgKDQi7>|9_ z(P)Q>tj}gpdU2zo&O4Wsq*@v*xaii}O>AlfO)14FX%E9B>6*&?y~h%HcL=j`t$7&v z$-QWWY}tS~P6FM@G6OLTsRcmO-S;aCm225VUjP|8WWz_9!oHPe5uKTpMBRy8dMu6z-h+Q)N+evG4q zyM9vn4C)nH9LZJjZ~W6>lpUAjxYxUWmZ=X?L{ zl3A)RMZe#%fd8uaDRbhzVaRDg90(i^%9KkDZPY_DRF!RHK=aHD9`?_-{jaIFoOO$T zIrmr$uw3_`G4gs-E9;Eq4+I^zr2wejD_J-VDCj2rJz&L&LAsk zakM3<2+@V`0%{`?6v#O~_II8Q^}R+5Xv@enW2+@ngq?co(hf52O^5;R_+iN+~OBPmDaWo-5Iyba0&Ryh`6zKNH*-ckWSZn(X< zgzkNHv&n0BCMQo&WKnpCgr3=OlKj28QT#NOS~TCFHoDNHZIl0D9F%mrTFV>^!5HoPMQYTJ=O6lpfrHc$*m%VvJ!T@a;-B0~blKImB?AHL<~ zm>fJvq3UP)2SYN|uwxk_y3ME6bnOwo;IrqHrq12M=l7ySJL^O@!uStG_+Mmf^wgLD zf~o;9-9XEpF4m{qW#su_CfpChL%A96fym0kapBYgRGI zY$Tog$ae0Y0D0pWu5?Dx%-MQ|1>Q?)v7NAqPeDjgmklg>cRT*Xz%(&}h+14-0uyL5FcaJI1qNG5f>?Ir?~q z2$+M8i~0M6k||r7Qn6ON%VhPI$pRPH7ZxBeyxi z)9j-!I=5RnF{U{BY>F$n%5nec`PHHPL3gik)6)ZG@4ob3;}iQAWh71&gn1w-zNG%gs2w*%jHHS7Nn;^L1vh!` zY?h`>_0#}!$W%1Nq}p17igq7Y{k7ahhNCHH5kcX1zHV`Twi>k_#?2C9ohqG#1NU}3 zQ$LlqlXjp(x1wE~z$Zh5s%)TnywWF)8XT5@WJDIAzH z$QX+HWho6FzDgCu=UJ3EozIT`p`V`Z_)b?gU7PWJ%X6XGQE{S;f8~)}4$z%icT<=O(Sl~!6k!;bmkoEVMk>UrvDCBxD_mR1z z#lX9v6v5>;IrsPp#a5u?r<-F;=(CN+ha*j8c`8VTg|I@RS4}z+z0e=MiRK>M08YiS z0rqd2&P^z%SvXYS6F#lIC42e(PvEVWhow8=8;y*S>JjlF`@{5N%~BR^qYuwE2j)|l zqogjPuc~$vy@g;73Q!V#9*f;X&NrN=KWST#@{ zU3j4lQt;v+BGJdt!ndRI|3;!Jkbnf=T?&79`5zS)n-$FWUBZbr5j=atKTcjr z&3EhH2D`mfO=E60%)ReB+9Rgiq`)n`-NEz><0I@Ij_QnDEfZyZv!XjzbSyX1^mkq}4v%Nv}`BJ&U;`n50W$}t;5sUe?{DiX(E!f6B* z+PVm1lk^Sm#i~JHHVJ#~{%OF*f|^vzVLy+>p}}OJNT<_gNQr2?OfO-oPkgW3t@2Pu zPCfLL-55DrGoPT)iy&T<8Vz}(zjS%iLH{TYr4Y}C<8iR8EgSSm)3Y3$ngml2L+6=PQhK2 zqI4LOM2=iAhfkGutIw|NAw{a+S#H0lsCE(6zWGTMhjyuKQREwQZ6OTP_H$WDxG0nm)Q1_yx z@^q>--(saIe@*7ypNr$|kgp{{I-OBN5rQb_Wv`%+d|~{N1ot}?^*s+91VmfkE#l~x zn}$GK{fp*Dx3pK@N0KL5K$^4jKxw^fBZKFXWNF5e`<_M^Su0q89hXD%b>#d^YwM4M zUx&M(HXva&Mc?nbcI+4k410d#{%dROa@@NzUz*1z&v`HX`cLeK1!{v7=Dm4Ee@ONV zYi65k2j~2^Asn2JK|$FX6GpKGGAud9dzW zuW(q@r*>b|a7xn><@l2$6T&>{E#~%%j1kC_FcwA04%2Diri*iq#kj zd;@}%+y^JI%!&bo+c^+BU<1jr!6iDv=N(NmwBq z{+Z!M#gpXQC6DKF9{l2hl(MP)bxR&%)?MJAFa%dw{(heNMT2+5Bn%5r_b=F4PbtK}*P z%8yT*nEXz4$m?Ifkcl5(c#nf?UV3#L1k`5@WCh``@n1I-mDB`C?*WRs2aCR9T%6^~ z>EFhckfGn)tQB+nFZ^*zvkY_qJI&9xOQ`m?^Y)$Q5ps+Hth?njr|h|JM|U(s;Ba?N~VbSY_((vGUI%7Xew#LwRzujt}qt*Dw7iGJyKR z?kF8=#r)+9F!CK`9@PxQLUiRdGg}6ST3^N~I61$4A$xB5Ixo#_#?5)^jYUupBnI3L z{tl*B9+zq*@w4@u%HF4X-p3sKg;4S%F-;Gy?llthHvb_4FoOzcBV;(fg#f~#>&#cWDS)$_J3TOCx494?it zL?P>e5`ZaWet_ydU3g63xFIfgTlQW%}OXGU`kzLmR z`Zm1#i4MoNBqDuE+dN`X(!i@XVO=t2yVOiXt7)xgEBNO-)gusb0)hD)c~YyfEZJT{ zy@L!%AI}Qtmlx2ft5RPyq95Ja6)CdTheKWkB(vpgb+Xr*a44FWN)y zBicN!vmS}Q=^%1E1D<9)11Bfln1-f}(iAU_OT0A46?x|yB=@JsH3E>fxR#`c(*}NK zu7s78r9{?+@FNe#CUdjP1_tpnn8&XzFsxB=@_HB2mTT?Q7-K8Lc(!o4n|SxaKc{`R z?^^i_t;09@d&k#L>h^EE&&joei6gsCE^9ed)$qIpeD#l>;o;Db=vTks^4RIi&r)<_ z6k!iC`ad*cuEJl;Z1TNF)qn6R`u@Lf!|PlcjXY|;379#?49o0fmtVCQUI%s!s|8rEd``vC3(}J8pS?WBQ&Eyw(2};QFgk0!orEqIGV@d08Wgj zt3?#eL&6>ZW46Ygsh0dA$P9`rFv@Gl(fqDB%B{5|u4IQ323ODZJhlcu9n@swkuwoS z)Z2mje~Ecdir7rw&=nCy(F!ZeYQK=adGVf zYY$Q2%7GTVys-omP~l@a4`pDe?aWLRjg@DCK;3{{)^t}jM6oTAnKV+s8-#>>fXMkC zjmd!r^PciBvS@1glU3HAC*b>3qFGPZAoJAtVfR9OySIMQP+hNtaT8TA@ggF@AHdQ6 zqJ+w@;5C%Bz+pRorVS1OGqLyho=wXe;?=x6#>MNy$!_C#6A7XvsXE~Rxa`U-JQb3@ z6>RsgC(U?3#D{LJS4L?VJ#-WYx46=g+_bepEp~5q1bgqjs-73EW*NrlaXN1p6X?Ko z65OFUL}ctX&`=i#uXlUgCsR}w4xfhnxoiM>`T_Su@Yl@_6Vc2&+iR#Oel8*G>q?(a z69z)$4ootXvY1+TzsVT77p8(XL>N}*M_u_N&Osl$@P?1^D`_~sEaWOC@Vx~gjs^XKmG`&|OQ>uNA>nfdMP>B3k+##okTqCf zSepVky_lA{4La@5x+7B8l2#+bejnIqqdLiw_O@8Z{xkv)ytistg0t6cW0uYT&jYXP z|GQOpvDWTU6)1NzCDcyIzfWYrQkKrS`FF!O3YVa1cF zW3~LJO}72>EQxL<_pRjj59a5edjNjVr?4Tw!O{;Yeb``in4l_i&zkM6UAhS+zH9CWz*bAAd{# zq<~!r+0LW(7Cd23ryniVGvft8F2Ot#A-A|>`8F>VrkQdFHM+2>7OH$-6|~Hk88m`8 z4z8%a;Nrr*SgNA@{f}G-o2+o8nI~oaTcv=N8T2v3vHcFXsFT%^*^$*EkkMZZ{FqfG zgKonBV6nkhSS@L8<5n1UjuyL-Db6 zOi8}6n~o{}i!%MyI4`JBXqWg!1GZsT{(88>MPG#Ec29;4Li?x#hmq|>s9PjL5~|HL zqC?-;b?^+ojL>;KC5=-E$Rj%T=knCEUqcuTuMLVs*w~!Oiftru?f*t6;C<6T%et_S zNbaGb_lK85;0)gC{z+~Xj}KOrlslPO%4M#cMnoE%U%{PT!MjJJB!@fee0Y#0KWKRW zO4-dK4OWph7kd$0Hfw_!e9BTjovX7%2dl<*;uAn?d{j4{zbO#bhjy*E|IzN1u=SE| zg%pS0wv<=>bcVUog8~$o|2H&m#1Mb2^V`CN)H`LvdDdKw=#OhPJ_%7`l8$6wSBi>r zwaXXAJ}N+PBd3XBwd2J5-%E{D*mE>hm)@6M@z|K3wa^jPP*Q$aaB4h~{8dm8lCFsx zNwd}EHf_jdk>>ltHqpDV{AY1_qomQ|u)XzU3G?ro@%IxOLk|U+5 z_GE|}e_o-X#C^r(trT;$6Wh?=$r!7uOZN)-RhIS<2qEov{7In0D7pCdH;;_-nMX^s zmSl62>+e}j-TEhNS0B27-1SF`%GY%-FQ4kKZ7V1!jU!>iul=}kt>cT9XVOHhvaU+t z7mq^U>x4YZb=yw30C&ymYWig=jkiADn+QODUQZ!r@xIvVc|9iq4)B-)E&whrU51?U zc#-pq;zxXd9sT_U!(GC!VPPG_e>nZFFQu}yyr5Xqg$CL*Le^}+8baK_^``0N#z-ow zjh`XyaclYGBJJYLKbfy`e6O)D?oSb3B))A)48o^a^Lgm{C+nc8&UR9FT9aD@*kh9U zEKcaIrLD*Qj0MSlztzR{tIRX$co8Em?C>S|B2hbg;@*Ua?RS&?6vgB*d=7W9iQ_w72|+3v%$0)mFdc7 zhL@Qgh$UdNc4iRoHuIIIV>S+d_u&kxizV^1DNkP6$<^jn`e| zsGLeIi)-M#KZTj1reJs=lBi?8L=Wr$h~2n_O=r}WzflS}EA17|)bnaVxgs;g?^5p* zIHo(RYsZKQ2AbCcBYxvd3+z}XkWOPpmH4k>@p;6jH+oDVHGDX@WEcQ$Tt$lRJc{Pi zRlT3+!)sT`Bx$zUW|%~DW}j8jC4)mxHU&nz09gSzqnAKT3BIK1v>bu=gawWhC$=$r z%TQ~&U7GlVQNO1M}#O7tP^Vx2dD~&=KukzaY&he4AL0CYsOYCnNSXM*Usl zug$^wG|tDO)JL!2=p&$*k+Rhh%$=z!$H#I17j#c$!)#RKI{x}GQEe4e+hZGmr(#2jS*lsE3au$s&(eYjnn zo{|OM==0hgLj2^b<9bu2W8ZW$YpJbkokHNboV$5#?(-_cZ}$xZ_lkAsHwH+=EN?52 zSpYnP#cNZ3KRAoE$~i@9^iI71Z`4`@z1WLIxd(q}v1D6UBmYz@>NGekN;)sY{2~L4 zVWfFgI|Z*D`bkxYaQ#YAOJ~EoDq`yzqL(}TWPBy+1=KAe0FXB#DdiZl{{tMLa=rS< z`I>O<)T#UE&$?;pf>DKLk&gU=hlNIs7g`;<=EvRgF!X|w+I~V|09aWPv6>%a5Z8|z zcktez;0)}_(2|x_sd{;!c^O~uXZcIADff}9p2b_F_sRKBAZP#Ny3&3^H+w&2YpIR9cj6+mCA}7?d4DprAKE3>F;N$tKM#@Srz|bt-t-b7$n)DM{eo2n{2TjlxP@vvHIrQ@swW9^y}s$ zi^Ja2-A$BXxaYRrmf!S=b#0|tkb(OCd0wi0G=qc}*)MMO)}Cg-SmFG~=af=NV)k^3 zmMK@EwrJV6ABapVvUyORh(CNAmj=WlEkAqrdbJ7Pd+`P-wsuV{#U}DF8@QTY;g^hRZjSUdM2YY_6%^qTU@5RGZDNYC{1VZJ@Zz3{lbHCQAKkheLng7zhmuzoLM~b#qG)R)d!S~r#g)K4QfmRGo*)U6mu0osn+%g1Eqw^cC#u9W*K};A7fEzg^5?eF^6= zyQSdx^WWN^&tL}AN18L=BH1r}=*OO)F3Xld==Frk#uRdga)HQ%W%#+j&nS8uI?W28p$R zZSVz>xVvvcfQ6gJ#x&ge2dEq{$KpvY`rY~fRe*AVe}r^p!s&YPU-cz+ocwU@7Utox z@H(krlH0g9FkI=A5LQYnwh=?=bGBmZ+m~uXWI8d}&Hj?V+uM6AX~cJ8M0e97%E)6q zzT!nj?Qpu7+_b!}IF8H4;GGdMMm>cgejtcLl46=ODYq^;n2c>`VXv-#u z*{;5nkJuwD7~IcV-_s78E%>NNdGBkqv|v)@;9d&WlV#+;FH!>>4}Xq?EiAfDNad&} z4v$@C;{88#ePviwkGd}~l(f zdU?0kwX-%gcOyTTG@0!htb2I7aO}ss<8>Alj8FdCPC39JCEaZZ?$}AHu^J9~!DqDE z4UwjjA1ttCE$q`^H_(YqEXaU{VJIR+o@bySNf`$ zn3uSfWv`wK+A@M#T)w! zUxR$9H){Y_-5-RpXq*+~L2g_l;k_I31k!Yia~BP(!x$vd8GgW9a9>db`bk}aKR(bx zTX)%?<*fj3VLSu=;k)n$suzbp%K#(scXhwxCYXf0ELUj}%EV~vo-g`Ay;M_HA%zHi*rH=X&oVqV&PdUp%h zz*#3w3-o0A*EDoVX>O)P?WPDtJNgG7uH)~EtLfXQxF>L-%6fi#ag~~)2HKxM3BHF* zGC4{MjluW_BfQ$bw?uA{ARxiXl|6mCTN!VL;KYp4+V4Bste};!=jqSiWi29~0};s& z2t?LJ1^F<6x40ja4cxfCy>kHeYn2s6I(uE(wR{pRjo-pTEo~5ZxEMp6Z87^b94-gu z1usUaZ$4a!U+J?2KlRN2w??JSmKiB_Xe+-?{BJr?7{{7fxX?52{}6`%F)YcE&V;y! z{zYUH53A+N(BO&MFRV`3#tKxkbQT)a3v#>$n{abAozPx1+~6641ELsclQAlnnbG!6 zF=`jL9-4tuQ{_iFh-vsE0S*HJvjwnJ!|ws*DZL=Td4?7J>kOI9;h3=$<$~fm+ag{( zL*XxPeO>X=%{xJusBkc!P@5pUSI99lO2=rrQ}}K()Xr#@&v>;P#^=CK(OtJTNbUG; z^0VD8>9kncrUW>e-JcyF|4;^+#|sVX6bIk)S-6@-xC3Ujay|nfp3s*GMYOm=GK9b| z!fM=~t09`p$ugxk{3|Pv9c<`f-}OD=jrH#A?TGpLrcr~DvW`{74(RaZE1wyzbh8^- zf?tG3B7ZT4j;cjKB`aJcdXf;u=8ZO(jWJByaJVtS>^O;(2%65RDx3BhVBs#KJq#~ykbymVE>!P=Z#;SF`Efxap zA^5StGcSE$%%(Igmy*AILNce$qR*(I>mh5cahlYZ#D+gda6yKNzGp|5uPk82h5=1R;Be`Xfw&rY*aNnP==;QSNsi3-cx?-11R7@7LJK z`=bN{(yTs|J!`L4HM6Ge3_DuxXX^)SS&z9{u@6)iYBqkfWUW+?V;BxyUBmpBQD7hL z*Sus5KL!}z&2f|FDv$P6NY!vDiAt6>oo~~8oo38Q%svQ?WB&LZD!Om*bNYRZm_1F& z!0olwoW9zZ`nw^)8oA?5nm_Sh&5itk@Qf;{kF}83w3GI14Xd0bMmP2(jjR@GhxfH} zw()M=%^Kx`o`m){Wq$WnBO5QzZ>*gI^$z2SuT$)efX_jI412w%`n{jo(nj*~8@&q| zuuqB!Q;y}HTQ^OaX!?D>ZMe&Kj6mEQ6p&JA=f(M-B4$6qJ;8?4%KmSpJ5NeBeFiwX z$@-@vaL{1DJC&df;PW+;?aZOWx?X2%f^D5VdMR8F3vNKku57!JZ^0jLk<&JxYs4$! zT^>&e@XqPvv7CJ)0R0{Qm|idT8C%Pz0(d0vy`q5IUSm52qMi6Oh~b>euo&jgkoJ>M zgqnD7HeHPUhU+Rk%rVfnpro9~_crAC%b(+^%YFtFr<38a;_V0X z0|jJB5|j_R&E4!d7`xt(^CWjw+2Tc&3W*$}WFFpQ{I7V4lCr9d&m}mC6`45S_K?KA z8FsQOez@MB7C4x{`Rb|YxqIiZ2yX}4w(kTt7Q9ZN<_q2uW=Nw7kJaC%+N*H8sdur* z7&T?q2AzMC1nv5yRT*ccX zDY@Dvr_C-gKbh5!JITfr5(?RoOO5%vU2#9lbzNT`*wZ~{r)$d;j}_CNKm{@0KAt6* zjd)LOT~6iNfkkPGO25gla{wfzQV)2{TITa-jI_xm%Tpa50@?m#m0eHU`mRU)F3kqS z-VKo~w_(|YhxxY2;)npMqBVn+mG6Q>0i~D^XbmyBaaS`{8Xz51Va=!td+?1Bt5Fv( z3WEv*ki}gO>lo<91g9hh;aAMkZe|Zrf9qU&nhBnPyTHn>{-FCsI3U5`6ic;F$-5o3 zV2~d_7y0YtAZzG=DchwC{r2g5-bj2B>DI$pTaG)J0Lj!#OtFg3lSK^)dhb8kjOEDM z7QfGy7LOQ!kJCBPwP98rP7_j?0Rfiet6 zL$BRk?6Qhaum(WZH`-|rptEwha&hxQ-i{~&E3pDMAe>h#kPCXxTAMlbBK4AOv5mu| zVnrcpSG$?(^*M|Nu;}hRGD3N%xWayEb^Ps<|6~D3cmRykf=JJ~Zl#HAB9Bc{c#!*X z7um1oD>bf{``yP6y@jce7M&qGH}Ucd(T7{f{fRQK;R2!KO=8uLetuc<8}fp3-V)ow zPid_t3Mx(;e%p<7?(7JI#%A$$A7P6e_hjdY2TerOwe{%OG3)!C)v)MQk0k9kD}i4p zaGs54un9=ovuBX*S0H>J6rm&2>#c9Bj(*haybJkG6NJ#LYN?r4JiZuvIW;il;TM3M z*7K2K(kP`aZji3(O{Z$q<6*xH>psm)A}-z<3cL0N5JYWG)4n&Fb*`ys{;aE6KxQ`7 zY$2kT#CwQ;!}OZ;*pAr(?;Sbh5zf~ZVTUZ}S19w&v|aRiej3Y0npfIHiPulAe(Y*t z5;1!|>Cyvzzq^-e`HJ!SepgaYX{J?X>+I4A6q+l&2(L7|vvKK81!Ko5(>bB8C1{7d zr!lRr=A&lHLFwL{&-+;$MNg3#Z8UHzjn@&MP5n60S5{6a$DjLe^a5!spi}Ye%^2eS zFv&;gA!qOkdOhgs77w*4%QH(zvsZx$?rk7(?^1xkcVZnZ!yo?m;Z#A;bqo zlgwu|r&))b;w=$o6uxTW`|Jfn=J)a3^w(A^`tfH}Q@A|30{Rego#=DVh#;7s4GN1(C?T>%_C> zliA)W8|~}`vzb>_Z18f%{2F0N(GBuPyXuGbu_D-{x{C>iNxs8!+Z)d37ZW2(Tb%8& z!sj%DjrYLtl}i!f18NrNE#2N@F@B;ut17=r;XZNl_)vX61TSd*8S>#)eTR*XB=#Vq=q1ck}Kq z+oL!Sp{Rzuj;{ldp}{pU?z_zuLeqsLZq~MrR>q zWB#<&$gjrjkYGZR^bI*^Xz%W{uDpB6_)DY>w?379U=Jv6Wb>}mT4xE5M+=r`5=+LR z$HWY=k*aFgp04ZKpJfuJivQu6tG4uoHH`7NPuXfMnG{E&izHg;3g@0V1F0)W>Z>ry zQZ2}1`K#qQjt{pz3Z0*G4I0rqRIwaH#@lVzFVAG#-qFnh8o{RGGL)2oyTf+EGb_Zy z$gaKVS=4e#YUCi08f0N+@G>LkDwpu}51u0fbEm{^3OY6)l66(W^j_zimix>GAi>BM z-(LYFjLLzk#aJw!9OQyOWE#P}36kinLv1U@Q0eEZU?1`p`*^P0jQLYFpS^p#SQfT8 zWq_woJz--v5f;E7@i>I^#Bw@v5N4bo%VoZi967 zQE+6ySVy~ft@a!j56Nx0h`IHlWS7o<I=H zePgG8p`OPbzT1Nop-4&eghFs`mb|lcH@QsIhm8Fe>?&bYnBhu(8tu~~()C1BCA{HY zJZwyqMtRsq-0v|l#q~I54ZUX#nTF)Y_e}?V2^`taug_~MTEz~L4B+^@ZeILfKvZT^ z^%R!&pWLg${MY{12KDkhKrWSrj)N(B=r`2qBe;1;{l(nj7iH1p=XY3d(R3p&pl9kB zBy+)vXDtkRnNVEQShL+O16gh*U~N-{E@&&SFC!_qd{#0iKF?Q+>`72Bg$I$ zrLQdSl^;t+iQGnutj6KIofYd>w=oIv{T>A=i_|D@Rb9(SzBq&6P(yTU-)vU(JyJ~W zi&Yk~Rm2)fn_540Mw&?jXKpXyX3NF>4H*5T(4@Mx7-fBmRlBz>-~v;mC)B5L@g^~j zzK=SS5WATY4KMz{v?^ z(%@BZM(X>Q*~R52&~%0%TIvMIiQSJsQYItx%I^xtUefCb+ap>y4?+&;ug~+|SDCST z*pJXZ&Li%!;<@MTk~tkka$l$2fG0xmzTwkynqrx;?Hh?sxq~prK?)j`l%1T3|P`$-`7f z56d>0lb(;5JJ9I%r0J$g=uQalxJ?-JT#s-7(6Pwp+;x&`t3j`>sXSJfumWrUzjIGI zpjz}|H6hw3z}$erNdRvr;UZ>f!%m!bYT)WZek`kaAyg_}zOC4$btLdZMqsB#n$$5-}R=D@b81JloN!wcfp+$hO@6 zy@tQI9J3AIi7_R>xxyR)ScI_Kbk|p+Kt#9;hn-2gkpi=`Pch$VTNelW5qNXte<)P7 zy#tDs8}GYmy%a4k^*o_9Z8vWg@WTVhnpL$}h5 zm}k2bj!D=Q$mak}X$RZ0~A z;4XY}E@LI|*$)Q0;#C(y`NPJ96FuUpu`PqBNUt;QNg@66fz-N-U!vsqj{z}R8A1$m z-l=S7u+^ zxgw!UJfznS!#;!-d2@~8!y5WSKe}n<`!Ap3?Mkf$++fM|ld+=p^Xm`((Bnlst-kos z&@df2mgy7H7NL-u&!c?O-8RF)CT((p-X(Z4;KYgUIykCr*SwE3w_@1ZkqcjE9TyK# z6S@1~H$Bd2y4-XFls##R+T1Uu5Rwu#f1Z>JW-S|Veqf8Fq31s1?&!O~!O-*4^+eia znK=VMT}omvE?dOUAK5Zkd-iMf(%rq$=4m*M?yS(Qb?rdc`buC{|6bAi1q3-_LdZAU zl~TDu*7d7Dy_Qu#u;$^jy>wb{nWkjhdOb+PYD{^=HOknlhAgWKX9HnrO4I0lhe__O zwk0MU#qWMDI$sbw073`7v$cJzU(aE5Y40;wLhg)DVIQS%zdN3x`0^?C5S~u0EU(2m zLWxFDbKJ-G^W>}JB{Zr52$A>lBZf_{He()L@R-~KQi9zbh3N(jJ&J}`1Pxsm>YNBD zm3{I?SBK*frhPTT{1iaLUm0fxA~kWS*BUw0g!!&r8b4@B_=-{O8k>@Hx1lX9qKOr0BdM{dySWJ-2|0aw^od4%43PGoy3(-RmE6MpvPevO>A`cK8AQ4 z_oqBU&wugqU8R{MraiP-{PWZbO|Rfu$j7-H-^9R5R?Ga``p->fqKgpuS___WT^xY) zEW?^z#p9!(;N$QOf5Bk+^%KS|5d6F^24^q8Ho$i70xuUjso>F73z&nR+eCPv1h<8g zGJUW&I0PP58lvX+t=kQnRc5w|y92x#&*P~BRQP>(hR=Ev< zO_8Wwu+`96-#!mg@LqWX%8HAyrVyMEW53umjd$*OvdMo^Tt$l3JSGtzoMc)%OjC!k zxBKCo9Q)PEK8@S_W0ptJUaD1Ufv%bL631b_+>Lii9PFmsAGWB@;x=Jl7UIkpQeQ`j zJbC1RDAWAfo+y=_DV7Ixk-dA>LY272KkH_;Cy9!lJ;a&8d9cG?tFqIZmG%QG$!l-98?3PRgW}FK7aIhx$SZg!8JUlFulcPIF~}my~3YA+0Rq zA+pcOW)^z#3Fn$m`!12(Pr(a-*ql|n&cuZF6yMuGCHf_wY}E(-S<7>TC--ce};> zEW{3CB7|1Y>nzSdk!ke6uXv$EenTM>9@?APm5CWxXSIIKEU2HfX7zxCN#=NW;)wqQ zY)s`hK6nD01v>8HSPk`Rvp4OEygCfb>MX-;V{RV*T!zrsz~u3HKREpFQS|TxgY#a$ z>mg?F%I1F>J#PaRZV{+Qaf%GI*h6^+i}TFUG#LH?u`9f?cd8z_UhQM5eCVK+jUWuF zwLyK}zCFeMcRGs{g`eJ{N@Dc;2NJ)LJVchM_utlp@k1op#q5{Q@f|q z;h-yD+qArakTY=0k=W0l{OTYXBy0Y-&a&`Ip66xncM^;pqhm(A_=EDVL2kfD%YJBv z1=^U}`3R(46zVqB29X8u^S{1@&1Wb~XNuy{=HRb}D%M9eOsy-pzP|+noI1g4YC+fH z6GWaO6&434f?&#sfG+0@-dIa!bE2HeUmPYZ_KN5b_|u~KDXR~!G|dGn&m05Il(efY zet;l}gr9o(oQ}jZ(~Ydg2~%XLUXPzkdyB=A2!gyg5!M4(wd+B`Dw|a`IwAKNgPyg@ zvxZdHi6MSC#$=-Avk`ON*P(+`Sj(#@In0t!r%;wx=))o?*^LEQQZhz;SXpvIGSQr|4mFd&XBP0a0QTNKG9;+LynWf-n2ty z1u}9Uu`ZHx`Q)IP*R_7vhmy}Q*Q+oq48v19Ewn0M!DA{6Mb1+gXH*Bsk^ygV&!1r2 zg&82-WR-LC3Gsax)jQ$rKLUOhuCi@fI2XH$ybCY5t)`q^oKZPnCDUMyu76vA2IcZxQE5~&0+fp?@qN>h8p{CN)(RBkqlrBK+I#osjW(F+a*v>UxK|T({ zC|Cd|-mS!}9pnTFJ~7hiGVo~`*yF+ssp&0PY@D=ooUF0be|$f!AD(+r!Yh;Twd&6I zx|iWNWYKB*OJ&h{^EH!ZldU3qzQfAO+PR2jA_-SNr+YyFGNP@BZgYXupkLc7U%VbD zx{kLhQv!X--mCh2z(%CtYEX%;9rpDS!q=L5x}W;XAz}KFe`ztD^vT(xVC=kCW=-qW zI#t6`p5D|?2C&};MC(0yb={66Rf$&Jqgi+fH4a^ZLU(@3tWf{Rkl=>()_A_ML3_|^ z8`9ed$SC{=umDKitMCa`aeA$9?4LzCptyrJv-d8@v1yM_)g0#?x-o3yA(7}w|DHq! zw6t69ma9Ry0ijyGRIxIEoq%kA%zB>n<@9!$T!QgrR`%E)jep>%p6#s0aEiMX?@?Y&T;vejQObV`NJ=2sXx=X zPZW@^W)m(BziCdG=Q!Tpu|r>(Pfvx|f-^1IDT>~h^vuV5#yH)TDrq^DU3(Kkv_Qf6RAF-7qk^4_hqr8U)by*RmeI8=$ zUQs|F^LE_#II1X737pkl)^BKi)BUx*8vV6$A=gQfRT*Z`w^+g`L4KN5x=<~sB3ZF= zH0vWA!fgv%j38s9H%{e$TpCWJGQKS_O7#}|)#hwYSH7~C+R#nvB+MYh9!HT!ilIp1 z_!Z>Zpgi-L*uuihQ8Fx@1R52SeK|wX(4C@7+*W&j5FmxlCe(C)eFAYES^Aa?!Ee#j zwRDzhSoM(IYe^hVK!zv*kFai)1zKuMWmJ9vnSQY%7=vot z%y=Y+Y65IBmCGJ-SEB0$*%NVgfj8wg5ti^<85q1rNjR#OKb0|<0z#=`6aQu7C zC{QYk`ni}m?z(Du=hv!$+F$R<^-bzAj=RW%U0!2S5>xqHzBS)AYxiDv7B*==j9djqj)18DL2-BEDJsr5mb+i6@8BDDHjKNGfzzrPINS7mwG@`Mg_FI_<$`XN zS)|ZW2XUqEooAiha-n)jQaCDAhV9hzuf4GhSksX%ua($*_WH98%KIr&Lhc?;$^szu z?Ymf%QZkVqjG;JtkkPWvDFod#d9tW_*S1Eaw#VUpz;$%H1SN-KwM(zOad%JCK{HKt zS<5~q;EgdI8N$mFpbkK4aa;9+f@leh5aov1It$rWBST820N)^Zu$kSeVU8t#>^0eJ z`a8pC9DlkWkWW0RuCtkglqd_bT9|(f!<2ux-eGIV8PA20lv)k~nFT!TGijX3m|c_~ z2*~N`En`#( zN8Pxu+n-v>Nkjs>9@Eb<3cS1P9lNo!KIR}-rA7t<209AVb`{<|0L)_(dT53oOm$Ag zMQ~mzrSp6|-n&l;8r>xC?ipHrQ_kgLcAc)_dO+rU?Ca&wtt6f-_jdpdB=Ji=Fh-yA zS4j0wJxM1TR(T62r@NfeYgx@YCzra+M9mS^ZVg6S4%#<+cYbqTxf%PROLv(TKq#ks zXNJw3D4b!mvUGC77BM;;DHT{ct>^J^(R=4Cqb0x3BZ={LtX~kz?6*eA54XisifXRk zDZZZCcNg+&2A8$ko?9EGt%2uI6d20U-mEUgWrDC%9+XUkwz}lWkdn1e3a`y9%fZ&o zZ3xby8AhM67w|IC_F8!1$EN#djPjSGX@9?(9a&IrnTm4;|`vul}DQr5B(;!WW@JK!|Sc z80lT7VSpf2!T!MIuqQYFoZ%PS>%AzZR}NPsZJ^~!eP4_?bvwlvk?~*hO;4qA4HG{x zg0Z{{9;Q)<6rx@PCv@F4tLH)B&oFS}oyB&c-Od)~`(wMxM{_*yGKB=!DluQKT^E7In^~2j#>7-{# ztoh0}+AzzbMZTHRo8pxdwN-du9jlh=@MEz;ueW~_&{@ydSM-yF}) z<#$3(@0J_-tb;5R6FEC!QFk{NZg5o~LuNfgscnE1M+}8`7MdUlOHVzLqM{nAt=(!% z{8Qg);1misGLC3Sv9TMdFj}cjv>RcvISFDTz=0;%i6~%oRS;gRFwg$F8&_u8#83F9 zXBJL)ohTzzjZgH7ip6+_Q~KOM?EN*cDkXO>7C4n-LSK{7SD?g^L3C!eKp*71fVS6T zfGldbsgMZ9LG8gA&?+~8(b@W&j8oDahOkXl)bn$Nc>QqybMd00oc!E;gwxl;*a|v| z(?dxWzA)r^r@*$)h18iU)T$}!AM5u4gdHp0DCRqo;9KMhP|*UsxbNZcSQ_{FdWWRF z@~pY!D?{Z=gNKer(|)qQ%{KjH1KiJ;&l!?9e(|9=&(+x-40Uu|r(|D**_G1!HmJ{J zFwAm%9sX++gWev?lT%F>0r9E?ZPGG0UP#Xha47=tw(sq8oj=;O@!B}wfQ14)ivS;A z{PwcEC&pKNaA9Mv1TtcAp*3A^{A%bhCpM{9kvHEga}8Qi?3vvSeeQsP%a^>l`$aO1 zCi6m^k+&;qDBg8|NJtPEW}A3n4=;#rcSoL$9nKhoXtI|kL%^OYN&x3zAjC|>%|5H! za92A=LLex*i3t_mNpT7~_ljNo6?1J69v5LCLp7ItvV?}7&CsY7HZ^e#_D1?zjy5-N zpSf5@rexdeORqJUhcsfPShHkDl=9TJim_8Rt}Npu30_I--`H-({TdZySRf7oGLZ2H z_8(!>mcsq1sC7l_$tdr>dvjQsu|vy9E05EC_B|et6Y|m}AWK`$pKf`>O&>darSgu2 z9WD*`<6*qN$TuO!_BHEqa(oM?4}1xP<>6C4^wnpieq7580Z-cL7EmSt`Na90-k+X+ zdWvVFx;46zq)-)X&iS%^KZKT|Al+ph(-;7p+%`wvL$*hr9sZysQs7wtwCB1>Z-hn4 z@ii`WE<1NMipRa8Xh{gK8|HZR=HcEBBVWu$t3>3+U5P^F8Le{psG8gz2N z#v}7+Ir*RUCgXt~5&mBYy8o;^w;Lqyq+$tv+j!AkmN!z1B&3x3FI&c#n+Lw=vWxo_ zhuf>k{jGktzLw3jSqF9B^B^kncq&lQvCM3QHQ&IRHb|>t)U)#1(`{7R!wWV=4TLKB z3CD1U+s2r^gJ`^#F6A$c_`sNi*z=jJec`bcQUe)12$qS9{w788>y4r710rTMwaLWE z``azlH{`aF=Y19CLG;<|Vp>kdYUzg&VuK-5{GnW;tUnG7fmKta6O-?CqtZQjY;@6Y z9_S6{q-B-9xfv`0WpM|U0Z#~{Db0~0K{Whod>}kDdbAH{OZA}+<&nA29oPyfqL_ID7+#M zw@&dcWCmkxN^?{vnxG60YNz@k-`5>&i#8DnVX)Z^ zE>KIwZX3*9#kBc_Z8xFYa2gB$P(H%HcA{bhrO1?!r`uWF=@#{yDvC@)-evXG>6}Zo zRZtl_dN_V?g9#R7pS`wm8aI9F2(wmB7l0K1tI4={KNY$Dm!dSX+Wm`R19`W zH^&joJ%BaB<{LKZD>{PL7Jmu7JJ+V<{`|9FVIT$Y?>BPH;W|~&?qx~vglwut%g2Lg!#vY+cBlFD)ZqslkV^CmheWG zk?)uUAwYVSvHqf9D8&9r(drwzdZXZP$JbV6jnhx(x+W~gbNpISW3mvA25Wt#x@R?9 ztdeYZx}}@1TG=ia6rK;=;G)ZzSERn#qM8!M3=Mg#VcxM5=A=uxA|JlmC_B|2seq+0;;8?6E^=u|*=-z7w*V zGos+>u$YBdYt--VZU?Wu82pwjYIebl@^F~n*pwJkDUUgzm1!^wX{dAHmz9GvUY($@ z7Qt+rNQ9MjYo+wAJz_h^Aeprujz8&|NKh11**!Im3HXaX6rWmr(E8T>`PYYq&PvuW zPNhtw4bElS$KM6iPG8I>oNo{oE-WpR$o6>uvw(^!ltSL4Dt z`1yA7iPIOg+M(3K)v@9~uuPN;(AOy|5(E-lF}CDLDlc!{cRB3nVC+GZo+WVrTgN+;lQ@o_8jZ zXvdy04#1|2>i9zEs-L;y@R-?ofpBWL4~=VQ=90*8Yx6)Wr{moshvvcehdRq`-_JC0 z!8`=bmKb0_x#^;Kn!z4r1UEerPlu~^lcke^%%`3aZ2K2=SU?MmhlMKfVks%u>)nr* zA-H?M|6<4J4)Jimv7W_`;WfqJTFL-T9QFwp^*6At;2PY_w?_~W2V1?x%R_b0O!XKr zr?x3Kjo{fy)ZgX_8E9*3D^}#0Cu7xmy{p;*HxBYcF$>24pU#OV-~2@2;$)m6VL=Oe zVFC4uT;ApS;fXw0M?}n8P9>pX2-K;y3luS0_UZF4gw-8vJ1d~b0uY2tJy(ttYo_&N zNN|YU7?9#R-G`<&P^-^yCFX>Yzq`tg5^>ydh@7pqTq$Ks42LsV)z@&J(5Tp_H7Ci! zC1I~}|KZT=MhJph4HLHS+z@db#?t&v7HxTU>4GLOaH|#12J!0Dq8mE&m6deM<3y8* z6ES-_wMIGX`%j5NSM<0k5yUkmq(U_B_=hSK7>LqT)r0*Cn%(gD1H?=`7pAm29&Cn#r`T8qpe0d z^&GUC-8hCU=BrqkV)YzR@AH^~$QJ+P1~i zwqo`xUETeAKKs>&^H+5U-*Afq#g)`LhHYNe0XUVK-O7LNnascUjKmAO<$u0Wh?8@7 zU{DtNe=CLl&XH&}(s!Mij&Dx^Jos^ZFP29=b2Rw?w=CDyx?D{74oPgbJkuLYI8`8Y zo_0Q55_09#PC$8i_IXPsJ0V50q1>$HV~V<4LQ}8O8z}?vb%55OX(I|vSOJ>lKD;*0O9A}j66&8&B&_l8 zb7{}}uRDxaFlRXl5!9F#(BJ;lUeCr^)`SO(9~H~4e-bkb59jKf;y_$x5|xq8ft~P_ za-(=094HhlwK!}=jwDk-3j<*D6VR7$qH`gZ-8$p#*DS6(-CfrOYIx8ZZWgj#6mwL` z!t6{q1zjIs+iPQz^%&T%nFPsXVJFrw*!g!>p3Mk9qjK)e{BklU3#al|n%Ss7dLg(Y zv-!&4_}+6Mj>~}uv*Kw%IH8fG{Jzu)ZbY9a(9pP}@+XNnR#);|Y)PS27c|R*r9FpL zOD4)gy?H#Tq2EvsZ!gSG+eR_p4GnJCgx$&LU($(m!NG?tUJU!O$|9Qk^(zEeOcl!4 z6^4*t{`GVoU+aNWhJ7F+wHIh>Ncq|^sZ|yEi%oyglE5FoE!@wWn4XSv8uTgN%l{6j zNs{S#D*)&hrfq@vIFuJvWGABm5<1k?nqXabf0T3}aw5hH1x2^_i4Y*h(JX@7RIG5b z=m$KHk>P?jPIGVUwcdd?H`b2I1cl$DDY9AmWs;0qaT={|ilyAUEiks;-T$`gCXI+4 zHP$ESTKpJ&lUtGfQef`T=VHD@bJh=g_YWx{ddOsG@kPrHgdfPBrWd$5-(yqDQ^dcv z-J6t$eD-@kve;}|m9og=sKie3k@B{~m|(GXzm;@g$6i+Jd}+F?^*NwpuQj|7bY8&c z#oXIfcYIDXKnqlavDR2k&Hmm~N+}pQHP)0IzX1VzNyPfa>4cj0X1hb-K`MG;?0%R9 z-Q^%J;%9f7JEwdJgB8!EM6fC%grj+VHjPtRlvz67BEbvb0mqKU`m+X~&yG?$qdp^! zRRt4$W1nB3C`~l`dm&Lg74>ZvIva@2P+a>f70tLG(rw%hBF24geV*%34}#U^<0LIL zZ@5g$ovh~zgf3q5%e1b4>8>m?2Vm{XHah;}9(quG)foa5ge6K_T~&lfC87#$W_p}p zg7wO5ESTImI{XZV#w?WKJnRphn%_9dL2(O4z;j8ESlMJmgXG+X?26#Vk>TN}z_7 zGcJn_dA6|fBm95xU%pcuce=|Ft9=dN3xL^rc#5I=Fn!o+Cg@B5-#}u$0KZ(c!UEav z^GR5NPOGLT;$S75-T6cokJ#GKSFpIDGr4zh5?4Sk4X!QcHTbMapQY=zQYnvZ?iFFW zijthwl+FNk)jHD~=OR~@k0UjR-ME-m1CFp>+|#&|zM<6@!GXN8nau$Dk}cnNM2cgX zm-oSvr<0-orCB7fZ*h_;y0n8TaiAi7P9r?Vb)=nbUQ-v^VmzpV|^V;CEg zd{CKr-@Qyqv6zu@xZXhm&xk&31fY5+FQACcn5O$X2|OuBQ-PEMlloP^{jpN8$_c4T zed!@B#$xUlhuhk-O0JirMb=|EVW11V{q}%KRI1)TGUvJ(igY5CjkGQk?+H9Rfbc&Z zJa3bTARhl`-Dr2&$Gl0SWx&%2$u0`W#N?J9UL%|)X_GKAoHtOO?Fb@l>r#Gmvj$i> zAb`30iGM7BmNv2E&m`zl=$JsS(7HMZZGu=Ta-)I*g@buq#<2yLvutXsk0?b#x~(Di zGgLMN+R46he6(FQy5j*=ou(p*#x6$e4`O8`Z3+a{rXpB8{+a$YyIAS9Y}Xayk_M{h z;ygNCta{s*^pYj0#h2xu5+qrwD~x^Cks-S^D24t7Ena{W2N2H&R8&`hnoUJrBSV9E zPAX94YK2wngrBbV&(3g$G+KtH?#0#@57L&K(AWn&Z~unq0KU2ZsS}b5+43ZGj^wg79}w3 zF%1NIX=GVU$%-1#+hr}rYdr9bEU4&+km42)Qp!Et-|5}~4cyLR9L6)1A}$2y<~PqU zr_7a2yy^Ay^m{H9V847gl*~-dyW8=E*kk0uwudi2287W1egMkAD0>_>_F8haYPKEY z%sg`J2L*KJ^$=QPiyzXcMC9atvg~)oGpTs?5AycK2BrVNxkh4O>w!OPhO2=fC}SEyIe>2;mbM-1Q z1qgDqg}k%H>;`MK#YAzn;nkK^r}dTL+`6t;OKcH1vmXR>CLmcf3!zinVra0dGt}~U zJxXxJ>f}OG@7GAG=hxot|6EQ4C1}8{RPA{)`EN;et9n57gW^@$LKY(7zpq7f-LK1& zcltSw%$^ir1#&&^a`VDCmkuBqtgqp62%lj6Ap`kHQUCK#A%}fbPOi&scG^{Uvlnax ze2s7oR;2e;h)Qw3=pp!jH z&Xvfd*n;Ood|*kz(a2J4-ai_4d33y(o@~;){x(dlr_QzFcC1(@<-@mr1`LwZNujjI zhw2WK4RTAjs-MJ>p9c9-WDJ*eJU$S=GbVWZQQ9Fs?dL_@i{S7WQWB-HotT75`Px_e z6ZBGK#fWF1Jl08;_UBi)Bp)|D-E=g2n&h8BFH3OHnXw6|-2qM--OmJTw+d~<x2j8H+%hwEz*ZV9+B})Do0O%OO`Jvk|c#zgSGfzM=fYq(gi8^tELzrpPi_6eYbw zqE$R3#pZTc^uc)rh|_l|UjeEHvJ%<-08PfUV8)oyGujiF4vsG#&sqd)NM}bQ$s93< zqsqW{0rn#Ja`F{sQIfEgCd*RGS0A`QN=0|n8ArTFHKUoaz^8#)%$o`kXM$m@(@t^@ zPH+!#8=2tAurJ>XRR+TaZaJet;npyv3Pn9}I(1%2=XcQ)Sv<_2j-nq+b>OHle;7&u z06(~1yr8tHm@797nhP?8bk*%8bHhRG5Qi%AsTb;?Z~UN1RSIL#^`K9DoB1PAtTx`7 z151~hQT!%LFL5CmLPP>d?1b9Z{+|`GyyqxftV2PeG{Tg>0gO08<#qTO>LF_tx0NRB zd*H*SQALt4ov3Ej6%DfS2V=s({NMAnqEb}M8foyx2)x;1HuzMt);h6Y!gE;o;&rIj zpecHH7mh3S(%sUqfuX;?FILz^{mOS@f1_|hunsRL{Yl)OHCt^4)OAh%#BS$SV|Z_< zRYgG0RkQr=XhrWwQccv)F2(OoKDcLRrGGU(D<3@d{DR)tEsmWz5Tf9PpcZxFuq-a# z{0ik?l6Lh6uEmgdSKmkjw_2nTg&}+H4#4vc0CxBu zm!s7zWq?TZ=Wj3P!5-%*AHsQ26|L=??{WXA96t{#3`YrbzN^yV4X1e6%vN4q47iXw zjbnPR>W6qGb=q<^_15D9Shcrk;tOOm)`Lt`02+%`_F5~@`AF%X~KX4`#*H~ zXkA@o3O2g(wDrK%Dfibu8=(*l=U6iwvNZ3$Uez4?6c+soRo}7c)F{zgKD3D;xmX|6 zz>{qd9x(%Si`MMX&A!jg9;egPu6_I}aNUiDfMd)Cl?awMw4aquMYJZ8Iw#!18Dd0F z6Hn}iiEamw3}@WcvgNrhR?~g(jZ|>;MU5kS0Fo@9CmsvjEQ$??!1l(jtH%LSO5Gj* z5_xy=d@-J@G2~*k6>yw5O278H2DGCiSSPf-&QC99B3qs#LiRU(l3BD)&GVcn_zrGy zSI1r>p)z&MCgbahnnSpR$M)R?9Bjc&tC?8)(;>0Y$jyG+m2~vLuo~Ieys?|_BH}a6 z5Bl@>ff<8!1kD0VgBgms+VTCSJGCW`eU`fTH`3VR-S(e2)4Zdj$Zwa4m>SOQM0W1^WjCwo5(Z>D!Id zHod$(b8JEDP?X!5#t6~3_~Qb!yfJrpUc@t-syWj3g5%G-Tg&HCR2Y8Sss`R4z3~W1 zDe!bD(pSY{y0k(%<3!t{`P()t+*^gSEF|SnVo3Fo4KR2A>S2qNC;u69W%NQCmEV6! zIu}<~M+7OpAS{Z!b(VnCYXt>5Ao;Wn`c9c_-V4X@$qG^Hb8!cp3 zC_LG{I7;A-dVPG#ly#e$C!{eTdOeEkrpk9{(12%SAp8d&&v@M?O7eSTZe^;DwffiWPJle1KoW>1A}!3gJDAl zpbHCYrFgiR>0=RJJ&sCT*_H2FoW>h%GtEKuPt#X(%^p?Zxb8Dk24IxBaBku^4iqy= zESQ+tVAo<8{9G=gLo7L{zL;WQKwi^_lsTlB9zNoUBsx)$_bTz}EBB)xrKmyQ~Q#8kPHNdrv)!Jxd z>5P{az>TdYi`JIK=YXUiK?A>%KvffLUm*H_e$hV_|GZPQHm|VrPg=RSn!iCM^?3Ya zqJb2^Pu@o}j|*w?uzo!FB*m3=;K1)lr_q^`Qo!21{0zK)M>l!q(x~#xr*)Fk@YafR zDe{fMPbiSQB%j_i2EF$N#3?!2b^CY?$6Y&R{eUj{^Puy_E(+`d&s$~t^DlR1%1&cr z*C9(W_QM&X!2z$;A2l|(`wEz|K!CYuyFyY~l;20-6D65p*lJlQ`bZ@^b}>KD2S~N$ zSbciB`jiHd^S2YfVsdsq_2!SPRTrj9HLLF_|3aoZHDoE8JF+u<=|<*IR-N>*)7E9pt(J1 zYiNKnmoWBA6%wLVt+Kz_vVkR#l`3V9KtSpBJarsAI(BSMQ-o^>UlHl#*gu9ho1Uae zL{B3UC=7{HQQcEMr}3g6n4b|TM{aYk!9u`c?ybGsjZgqJ_fcdaDfY7}OI|(5l@D~4 zjsvdjALrMDq)U!-tSzUAP`}_N?}h&?Wto4MvM)wAx&PtLZg5QQTmMuY^bh}UL)(A& zGHP2KccJI~%*z2tmbIfGy?|33l)6_8`_avUb^Tl1Bxb+WC&Y_9;)I!tP&6K41l7IB z>nYm1t~q>vn7EvH+%`{I=^FB-CiGo*k@3QCux$ROR_APn7|Lw67L{&?a%an5@E zXBI9Ovh-Vf@9X+pg=(*H>T@DlEssG1%(l~6l)f?9ikLw^ddQL*AgenMg?q?A>3=h~=1p#ma3T#?elI9j#CV*I<&~`R-9k zP#~oy=IVYx%PkJ@U+HdrhNds!d%Y-&B&e(8HE@1j2Tc5&2cQ5jSHD9o@V=O4%R**z z&@txuAWG>6!L#m3m`+;wa1ICr+Q8sX$x681ZY zjT0XoKtERI*4o7Df(+S-gR8CQ>PT6_Tsm0VT}$edq^6T5mqMT7I6*^xR|xwF*?~01 zK!E*p)A&5r2Xdqn=!T@f9^4wlQNq2i5154G3Szeh_2usXA3Lxgqw z27C)CBc7?Cf)Q{xh?v+Xf%@X(aw?e77lPy>P9 z4o{C;Fl1fc;xbuaO>x7m$L^t|6(0R5!W({{@CwlWI0MQmg$xV52=3LM?=iX9Qa_AZ z8+3oypf^7)S}}-nbQNjlMmV>+jbj~sKyG<%n9EGF4@&y~P#xUP(fuBbuGt6V0R9ga zrZ?>R@usvlGABLy#rO1W{_L-X^n|J>NCr0BzRB?Kk{6^tX{(%9Spzc^16ItTa+|n? zvz{Ad@))&z=z)I5MPQGp~k9&=qP04!t#G%v%ekwD*eD zE`>=BU&YYU%5qqK=k0@xvzAff=oz-7llo_w8X2As5(GE6uHB;rc`IXX40(p;_qM;~ zD62*R0@LQYx?7K3*VnOrJLTiE>4aDnVLBEiJ}Q)ae@Vn6TZzu*ZvA{3@+pmPSC%9H zkQ?fR+QT=*7wpe2QY*e=;@>0ZexqYZA;G+K;(CNC0Ni&Cx-D(r*|pv09r|&6iMy%lyx>usxcby68~bg(jt<18 zV~P>7^8F9i;cGCKsws*6EJYfp%eZZfne@1tZWYdl+MoYq0WitXox6vZ{UTyXc~9n{ z>lpeTHT|eKZ_?2JQzdgQ#%DZcHj$^+Uwz!J)22!8bAMg-FZ`Bu@AXXa>tAHujN>9~ zjM=6~UEH(whUNzl&@`J8{u4Zky4OPbxt%3;ulY^ewJ$zs1_TC%u0?*8gJ4zWzk8#? zZKMNP)w)m6pbZz55qjas?FS_o<;~=5evtU#LocCC*8Iq7^jqVF_&Vz%opon(9p#CJ zN$_IzsK}(aKfgcT4sTJUfbUE$MOPD>dR)DZkRsyuF^HT1KP!RYJ?8cZ0T9Q+lOV}W_xSmJQFk(t zE#6Lu8_JQl6E#gUR`T>R9Z7T4NrRpwUtqKAz-JW_KT@4OZYnI0Z!8ETB!DcV(rR{6 zPOMyb0XwjK((@#rD-cMt5X~+p%U>q6KcI=yb<2v6J0eudk+u5YM4hF@is@+NxNpdSasRRWPpUPkgJk116K-$ zLUmn*y-jla5>66K$B}|R=C^fwAmi_s+CHl=ZZR@5m_dyepb#J{7NNWBYHD|-x!*%q zJ!WPtw+ztlt8dohKSkCbxgAL(d(G+22r5X%atZ+*8FvhCV}3PBRcJWJ<7zirBMs83 zldn}Jnm0M*jZ}eDz zQ{rR60SzSVsZO+QzS-kz+dm{{b0bRhpEicFU23*lr%%e40U){-ubuotKRc*0hPj^W z;`oG3|NNRx%(+Y{jn81Lz<}j0x=Xu2uUi*@1JcuLoIS+n!>sB{7GvC&`@KE)!e?xJ zXx&*9)I6wWm(k!8NhhI@qk5}fTE-Jc0*|~OM-uexEC*0sn{lPx9`UG%l#Go3)HV3? zohz5nJF7uT_HspN=LT?-1Bx(SFV>6{0Q``VcyiuKIOkG0N%rj0+j>Sc$V$pJEmuXV zpxmNlkLDoVf6}?{fi!Od(g>)h+ay016?}J_Cp^N2gfA$>-jL!ZxutUc9-#xMK-ktbSrX(_9cp}#A?xhD_sWWn z1qbs!O2Tzt;6=dYxBg~_y3-oi^-1!5XsiSFdiI(5x&M#ClXp_+uE2+vTZ79~)-LFc zhwyF7iq0OXV{Lq~v`L@Em;-R3VtT^M&j4;j$O9rJ8!l`6=s+ z(CN4vKfK-jL%OwYo}p;k2$#0nmzCjazx;Pgq#q56oI((9h}`2!Sj9=-8h16_pODxxr?24_8BA^c!aJfe2#N@Px@6LpPW=yH{0vSl?IcFz?aPq=REw$0FOMf zuDIwrP4_1|-`o<#&c#Pvj{{Ch)J~8EUfSCDJbCI$;H|$jethbamzQvsp=j-3qfw&P zp{VNnV{zhk#1d*gw5O+Uq0Dc)tNd8pwdE9_N;3l!?@n_Hl|WH}tb9OccrVTD2+%;+ zFQ_!!l+c!PS46x?mjBFtrb-=XHn0AD9XNd8`gg@bEs~$VRu$tc zw51A<2S`GEib^Xs>@JN78ra$bB%B6t?tg&;W4aWF;WaztK$qJp;Vq(*X3UcAW*@)k zj}S^Hix>0e{rK;l5Qjs>&y+h0n7l^z{NIO`f|jSdL*fRcy)n0+A@Q#^Zr*Y{}r_MnDS_XBY2Q zCDluQ*~+p5q;_rq7M6mFIn{ul9Ud{vaz)Ralx;0ihS%<4kc}wKE&C z1%&n0x|)LCoy`tQ8TeK)ic9lKk54!i-aD(jSTtOwTS(2A%WIV-_-%USh#A0zs+WlL zaSD4TWzt56T!wT2+9wtZ$X5##sNTM+I1VO$Hr=a~<<_Ik@3>H(5Me0{lBU29M8-=h zA8y*nRP9H-a6O%G1>cWA(sO7^Cea*Bkwu8tM8tO2$=Q(g0;P=+bKQY<2a3nJZ7i*q`Ox zc3ry%U;oP&dr#Kp#6Q7>Br%K|hx-0*zHVo~_PR)oBjLMXBRcH`iJRS80pFB0H@|Td z2G-E6_%?u0j0TEeqWU(&a(ejH>pYF@_fPdCfn23PXOCMS^#0gYQ_j25P5xH`&LL8T z_X2g_OuXpNuT`w`8SZW%0}VfJm0P!r5_EjJ9g1V+`KsH5FcQD+jP4ryeisJNuS8Kc{?`Js^;^M(qAam7qqHU;}#+$yEQVP|q;|zcMQ`3I|32B>? zFs-&7aczci)n)3X3@2Zhj`x{;p080?_!V0(sOXse{30uM^FB#3H8%lQW`FI;>o<0l zPqMq-j~*5lgRYYkx)g}nY(3_4mW*ZrR>)Ym+GE(tG8 zbnD*NoF&Bhw=7rMt66bO%EaJP2|D^c@%@Ig!e7+?8xI;zDvbewPhdiV7kSWPy@boB zojx~3eOG{=z6*GPtpL+NJpR-KqnmRZpNmZ4qm4``ChU^~6xtW2JQx-_;(fZkA>FzP z0fA`*ES#K$>Bq<;Lc@L^>(#ki5H#qLd&7d4nrzXQeA5by%#g?cN<`8S> z6mMkJB+g}qU9rGC0oqn_foFlk)H1~__7V=nD5mJ)&`%^pj3^i070=hjIOqvWKqis> z@&Pg`zd^g;-RkjU;SRB%E^42eH`iZJFPYME^qM9I%sJGb#A1369+VBM#LW9#BH>uV zmmT!Q7+czT8tv`RlP&`VZU!wHT7H`Gc6b26KC}~&eZWW-Y6 zhg>+#V|w_xOwEm>yt^M1F_8^&I$GX!-r-Tko|u4Ds{h{^;)~jhlNt zb7cIA5wOJ%dEaCzP+_qBTDN6oV7+ZbQsT=Wr)2470;HH`*MtDZu;=(bjk}xEswnHR zT}R5CNwr9c(|E@?i({o|&7{gt>*6$iP>dRBnclg2hwx8!4%BtRv+#a6|HW|kL(NmH z`yJ!klcg&+IM{#V*Ib#8c0D+IX%G{mjTr)8Evk(p47t`ew+a{WCA7^=5n~C zShyMZEN}lDD42BxN^uf6Q`ZH~I47f7>i8tRx+?#9NJ)|GOYi%9TFdqNyTkdP-M{~G zGt&rSJtKk*WSvCZ(n7YQNv@aS0N_gNOB*YmzTA7V$nU|B6tZrdS2_FNZ=94DIy6W` zHCa6^qr0|OlY%(Q(6ohlvPj7*n^L87m&_Rt6R{2b+_IIoKNW{>vs()Ne5$b82>k97T61oOI1f?n0Ft#|uyHI*@h%{RW(g;A`^rY(n@(s3JBhy~k@ICsr zm(5aBP6M4Vm|m}wlhg11t4HET2c0sHu{#dRja!2mgvU(h&okqFCYIAK%zCePYxib! zfVw=z;vCw08f>(UBwPbVGBw^LjR<9TnE;9t7is`puks7t*4_V=gl=z+k1v{48h_$tE^u#V5}(Sd|*esu3O-f{z2 z2PevJ2Uw1pKsQlD>MD@p&UuU@J)Fj31jfNyTaxG|&u#jLdH>;ieQzxf-AW$=0Elw% z=lsF$+=DZNf=ONx+Zm!=g}05UsjV#Y{4tr^Pm8VQhbxhHZF1bH@;F8Aoo|CtTiz-^ zhS@Zw+)e;lv}c~pTJIV^a5z#O7Fm2PD*=Kzs%zxm+E{E|HCH}(cCJKM#UV+O zm>qlkq)wZ=_IRjCxR%r~?Wsrg^PBUde&{CPoBj@Pg|K$kzj$AdjCZ`ercp$lQl|9} zcrVS``Zoz?*79VrUxEcc;lZucpm|%;oGm%_8Uy$RP^M<+Ffn+ zh1Yq6Sa?*&MUpf7E}pDEfh(r=A!t{Y+%3wSBNk_wQY+(ZJi~m8vvf`yp{7pHPw_jA z_>0AgN`|jTjlrOn-vwZ9vanKRnRExj6?t45ZrC-G>S;lc22J{B3oeRstGMtK@5Ao0 zap%&YG;76JyRlUW9_jhduM9mKk4=! zopw-(I-7&b-zFe#Uxl)VQ`7TQo>dF1r(EvnQyvdA0pPFf7aLCQp1KSA#=KBks zffZVwygyyy?1wV6LX}RM-M==d&K6FFR@%;*XbL>51WMf+BR8#mU4Gkp`&WOe;#M+e z3zq?}*kkiQ-<+9s0S26SSmDN?!I(qWm4Je$a{hRp%Qx2=giEZSaN65k8;eUPz$y!H z9PNHddH?;eZUSrEbsAs2+Uxj7W&CfW<$oLKEcmel*a}ScT#r)+4QSl+pWUAmuLZ4> zM!3GK)`+lkxharEWh#(ZSZFB0Rxl7E6Fh_CPNDWmDqR|ljF7~9gij-Q>6j!nHzcg~ zkm&0PKxh4>hJ1n}jREH$AA?4;L$@Ysnu16cWSd$k1uUx27i(#SM%Q}1;4h$7zb~l3 z(&l7D^Sk3ouQD;#V<$CRI!DaqHlm1di3IEk;+S^JgCY557I@i#Vv6Lp^|Ht_i+zr* zMAgtx$CL= zrgjCqm=32R8R%y|J(aEn;CkgfVKXJ>ia0$b1ns9?bVWkGW=N*X;&2zBK|eFOlUClH z9^AOP>&G-}6rV5g8AToG!B_?hQS)bP?0TOD-s)8@~^I zWKboCOcZiyZMsSGLFd(?tUAvD3Q#uGQYeYeiCOU=4YDqlVtS}xtMAmE?R6RLLc&U< zr?$E24bWnm0!v;j<^dX=E)SPAiw9M!$+Ww^S$>81k#?+FzqFhjH{^m!eQ&P`6LJV~ zp0ItaD`av|qO+c>s~-8Or5V9$&;gEVyg}#pCW|SKv5wT`J|CNMUrXa4CHcbjE~|*s z@USwB=Agx=y)0Mg%={|WP^6l3Q`iH@QRH{uR@9J{QY86;3qKu<%UeS0rauolbUq%$ zKbDCZ*^U)<2*4Op#q;@-c)Nb#SkI#LEitjW0RWq5XbDAV(Z+G>vZrl`@NW|`I5ZGe z3JjRzBEt;oErf7h?v3=1;~x79(4Ajs_Oqcuvx^#=qXk1F2d2*Idp99&}fC{NE_CKqAw3_wBX=r2p z(GZXxj!}QX6dB$0NBLem7`ks-3&d(oN3SH2>M?aoYYU~K&&WPp#Ni*>PXBQ>v_rlM z#Q~Wtyp`BP^^-7YO7WpFeF#`}R5?cHlS%bf8Yp|r+sA;MWQuhT(xTx-8og*@L1McW zZLz%P;A({7YcUN0o^iIE1Qo4E*LrsBL}59fGHlsR`nWt3VFfJ@)i$||a zR_B|e=Jm`?pMPmvMFNNCYMf1k_;af7p)nd*huv4@)7i5&jXgP8`n57O64t+p&MK4> z`JT@CgTtwWh6*>8gRDU}RfH{mKbrkM+C(w9q5X*qRHhjekBE0MpM$i+!o%HkKl|g5 z^B8ghYQB=dXneCaoCQGmsrwo!;-eFHddJ(o54O6Srqo`8ys2J&#`pd)@vQIIWg*E4 ztpfzrVLNJwfQZw=rW+Do%r2-NnKtOeROc|cZL^(crwN>nQ0D4vx5=elPXf;N|D*_cFwW9h=Oo0%McYJa6i)l}9ii>sfFok34$56%&e1o0-zGJI5 zKNeVQ`$d6cCe`!vQGQ{A^Np(L;yrSlqhcXYqQqjqNvg2jyC_-;-@76?z(yV?5v)2b zwAlM>TU67w5Fs;*7M*qz)}wAE+mD^6Jbi7kuc<0Ax)Dk!UQ}uGViw{Z!Hp1tqg^Y> zq4uZEHr>EWho;x=UCC_?g!c}6G!--)9X99Jp69jdM7aVW;Z)e?7fH{7F~q+3MNFB} zsM&daolBg=CKxK}b5->TuL%fU%Xb|{ARGRxB)a?38ffu|D&ra*{e43CpDX--*1st1 zuE<(+;etI03BrJUWny`~6oJ%u**#KfL;k(ghBU5Y75DYv737Tt$Rpd?91#Pd023cDTzV&z}6tks*cxhsyov7;!7sM9M%B5mkQNyZ;VL zKLl2<5ezRCKFMFiHM=66dY@?ehntkuXBwN0B<3#>IwLtMtKbFZRmkhbXEhtM(vBeP zO#Bd13ULBysE#5I)^MESqc_ubzb;DbK9{(Ae^JsaQ8&D>uD4a0V8S9PiRd9`M6$Pc zSIfsnIZ6Hg-i=%NDzy&liIEIH&$8gM$-(Xj!azD6nAncsd088g+d~;F)1LaX^gFU3+iF-v}xTaba!xh%HtU49*6E zc|4j}ZGgO&Np5l8gQ)9~C6k7WO#34)ZoqKo4Tvt}s4WQ^td=`ZWt1jLE81VDRAZYA z_3MwN4^)W#W<199?d_anP#DXaNN@^N$XmuuJFoQeoEILydTLpF&DXyKEUi+Y^V3E$ z=Ku7v)u>DrMT{w&Ty5BtqK~WKN#Ij3plVEbj+#sFbCbqtNKLV=K_-cK+^n}&w)ev} zfoWS`4X9i?C;6bALe256jFITP(ZMmwp%gCD4RksRm%5q~GT}n;M|y$-8i8pK5L{J- zXw|>si+k!_3WOs?Gl=uT&B~d@d4&;?Qk!karzCyu;l;)V!@yQ%wO9r~_c`Mux1}C* zpm$pGw{#OKd-a_43i4pwj^O7?z^{=f==LW$R`=9RKQoGHTw4uh#9srQUnD11Z{n*rmtz0m9q2o#WWQr@8n6A2g>BQKGt|Pa!Fi z`cK_A>q<@OZOm$X-L~W5$4kTi6()t2UlxW zwx?ozS^E?VHqb5wnUW^w4MLsLp{GieGrUm;ll|P^R4d68EyBGtB!mi*4QSpO1fjJ` zWH0p0tn`pA2;G27p?szS(&XO}G-hhT@HZeqTv&>ACcdn)`2`A=Q2SUl2cF7NI=1pB z06)<{Yn8_$;KsfE4>2!c$6<3YBrd!*%v7v6O3Wn3pQ3n zfaA;ZFcTXL63xr=ecX2WI%!%Tfv2$6%c?d?C-Ma7MZZdpfj{O%_B5#vV^j@+6g%*n z1k*>QO;62jCMawkVw%n**t9RGz#atx?z$xirXX?OPb+JargJ?wqWgLD#gH5r=w3J6Gz3oqzwJ+qb zM;soh=1r!lM$qNcooRV=JYBEID~fqy78N}7LX77faU<$+()nQMhY3x~$o25P^B?^n z^QksfnqwCc<$a_m`6V>$gG1ecoS&<%>Hb19xTUg`Pu!Nxy>rZV2|mFqXB)$;40#G- zUyieFh6RaHe)cc<9D~|Y~1vEC))Eq5h+k=aoxJqNW64;jBeIs5G&no z4sCloM~sC{@2yXH)JX?_=lp-2b=qNRV=-UR7zFoNB4{bj!-nOz&n%%UT}Im; zX}be=ne_j*7yk8?Kcftg&>i_v^HgHj5S;xyG!!?B!6Eb;>pg9O`b5`XY1~)}QTFU= z1IomO0bWl2*kelh%<#mqZ>2S3Z==d1MtVkiQ?jpUCjD3nCNhZLV+i+-UX0h5YfOzG zqsSYEb>GL(R+T+D=S#8b+pzR+qGx7!x!RC{QH9PWl#HA1C>%mmZA7aF+D1Kq^721z zV{a-qd%#S6wE80P^(>2ayA8d zX@i>DlbI^C-$*~fGZ6Qkuu1^Z%Yxs(V`*wO$5x=0y_jwhz?d2TZ6FSqU6esM{zo84 zH6BJ*1g!^Qw{$Nw1UsQ^?N31-_Gtaw_(KP>%UaY@X z?4zD+f&8KuU5kATMAILGHajy^d|E#Bsai#aq%*u@S%v>7!adc-z^uaRwkRHbQqd(PK5;WSCt zI|kEBo~e$_s@oOgSbRWEl+L(|{t&6^k#oOm{OjAa0?s;=}NO3cJ}u!HL)!lfL? z$Zgfv}QTP@{_pmv%of48u#F`!9ek4V3*;NPx?~}S`ne+LnSWFedGzFqH4~!0mI;+9Tx+7J|)F#8kD;751xAk8LjZ-KHV4`zV!} z7r}TBsCYYkKF5jMDb(4+jir5`RDO0jT=~*ff;ouwzL1@VJwDwOC-fyNKY#RYez?KZ3ATD%0W5i~BA$`Hg3-zUURIpg*qc zI1x>HAVl2py5cf70fgfbZg%S_!hw8h;Z6tsp+z$O0)50O!gP=gb3`hWwGOLybn2UaZ6Pf~~{je#ilDp;-LbyC^ zTttk32J_ZP9^=l`hKr!BP8Ss#xIwxw37-*_WMKf)dq)npF=lUeqoaCYx_zI737g>s z|2@rI5VHmg9rQ)umyJw2th8@{2;n8Aal#A0QjMRHO)pgu+#EMyV#$y_S*kjWPkkyq zjT$D$VfSZ3;#9aN&>tCJw>@u8+X*aQk+cyAPATY4sh;_q;IvfgG&N#JyNO-46yxOg z2EC7){{9kc9ZR9S;^O&>tAUF016g`$cOn9r_EYhzA_%A=M zh2eT8R40iz!~*JgT-{){iD)*8sJN=4Mf-jtr%6dpi_o-doq6Cz)Jn5pGG^Y}fuu8f z<_pAl6ILKp=K=8N6CmBnbz%)rZnby`r(!b-4^XVwg9dKBCo#X>^RNt2} zZu#gaY4uXu&b8v=9My8Rt8gfKL@i=ZlWxdQAOJes{d`!p8%PJrh{i-_`RrC!b(88&P^?%=laum}4*rTAF+4xWq-RT zZpq^!zNyeYeE?9&O*^-b1e}0A3l2TOy}iN zjkx=PT(xP7!iAZOan}rjEt+(@C+b^SFVWjijLE~P9oZb;Qdx4T7?zxyYG{9X-&D}B zasMGyvjhe?7;RR(SsPG>OqqbNP8sRI$Hlqy>HU7PXj{6o6^xJZDJfhmX|R%_n|MCc z6<{tB3xp~JZ#Vme3lla1l}ZB0U`RFTWOKZbUU~sYy29d(_DO%SOR_sbOx(of0@6Yj z&Te!Dx6t`RH%gs$%QkRef?Y#RiXC7EJ4ugF{|`CT?#HFE8)i1t`_@}iuq0Eih>ryT!kM=2)e zsv+#4{o})9+B6O{QJJ#&aH#Ql}CUa>slk)zM zN&bEaJck}mIvHh`(EnuN2BBNJa?~$HSkspz!UJlA3F8pZP`I`b7Gqkvkb&P5=pqTP zl@7L%ilcXT$S*g#@8Qgp_f>{(4K7=7`xSv~0uQHRL*L4R{z1-VbYroGhGv%k4fmJZ z1Apc7tD{lpn=|gH89Q`{%juX;SvAhvt*bN4%ss=p*0{kj#q_U87N_oL=$rcm>Z z5+-l{cDpkfuv96Ws6Y3=GuMHg=tu+v%bhcP+Et{Jghbhqsg)^Se*qjbe@!CJW?CL~bE&;j z23PiuY-;cbXibgwMD^hdWW%rVK725ac*buRUV_L=9IW^qQ1f{^_wpC#q!+G`Ug41K zYL$ZDrzX2mQX&;)v%x|65gkHO~x=gOa^>xK?b5v zgcM&fUg^-$SXImv*f${Q)tHJ1K)hSNKz8ooglS}uurv#l@unI;S#S$j^xo&?Nw)+Z z6Iq!Jj)exb?mB5Y%Rah2OKTB?ehRzT8W;>6ra;pQuO!RPCmvU@+oy`!wOh1dzbD?h zRUZYFFD`%a!goT{g~C)hDN-2Kxn$XE0%X9O{%%L6^laPW>&4WD^QAd`u{Jr1*{g_6 zkpS6(M1?F&!l#J|PxbvO!eAcTB-%hBL}C596PS&ixnkv~F{Z|q3=5B99%9#5v{Fwq=kibvYd^o%3!{g;6 zA6fI}i^r3>RrwS{u?5H#8l9A_F6LWT!-Cr=@*(5uawGTD><_qjHS!m57e0ddqVs$d zjRyN1rl+T|sD*54=9bAfY6nRjRJz*0D7 zQf)q^sovq>$}%sw7hfxDY*>Oxi2Th(m^!EOo^>WM#^}kdUXglgTBS@5qIcA`@qzg_ zN?ax<0iw5yB^phfS=0nlj~pqXP6dLXjGVh#In@t1z}@pto*r*CCOzTv&@N09Q9?Cnr_lj>|d!E6Kv)VlAm1GQ}Uf}9cv zuYkhy{(gv$pP%0s0NE}s+e}M{A?{=cW2CI*X@#VZ9~*vA7Lvwxzct0AcoC=S5p`%%{WKjiRag=% zRjhhyd{iJMW`RC1cA~haly)+mA{wtMP8V~5vJY}AHl?2_;EfdBXbm>uTS{t)--#5K z>tVG59vH%?xY`11r{RkFzElbEUL4!X-Zm$E z-8i8xXJs<8P#o#gT&Zj8Pwh)Bxe~d@iy`&=A)#yW6zj_-$vvlSDWQBOWi2$@6KCRk zb;LergS)_$b$(?U=`gnP7r_xS<+&7-B-ztr9Q45llt(wvZ4@zhW#H^O@lu0NV{=9IUOa?#5yFg*qKXD)OU6*1>Is?tGp}LGL$IH z&&t(`ky5(Wzu|L9vE^b_FH!_QL+J zRX?Q_j;GWU>;0<11d9C=Hg1Zb8|QJHCcE?f#psk2TBsmM3@YX)&Tmb%fQ?t_wxtau z^{SGnB=h8J!*ecQeeX>?Y4tf}Ch}W(jup+bIj#0?^mW|4>W#jz_0ZsP?-Yc(w6Y{| z8%cG^pUWzx%GUnu&MnY25-OY%;_r7L>}Kot$X@~fk)Yo0#6(IBGP2Y9Gz@%-adF3I z;kTO-yx8--rRI;tU@jr305FcxfH(!a76?=i5j$U|VZ&acx{9yMX%FL3cqTb@4z{?b ze=qUguF!dOZyS)fh`$3Epz^g1S_EWa&t9BcNV<3i?>D1IxCk~UCVaN(B5|Z9Of3%* zY58%y}Tv#r$M*)Y!1p0*O&Y$oYk5?1#?m5;s0ySf^>3Qz#Ne`~p@uF^Kx>1(zDT zFO4)1=aO3HFfWM!A{3B?7p)Ob$!+-JEbpnM6c*>ERg)~0!k2A;_tfH<3dIP$WwMi8 zmjWJMH~|0l%AxjhIz2B6n>0IV??~00npnp49W0h7cb#uV-6tJ9=V9^T)dCE{(qCHR8$q$g6|BoN#Z-tIO&s*SXYt*lYm$$wFL&s{^rdNZB z6BT6+2riJ~Y2EA1$YYyW#g!j29@>`#Uuw$vd&z1T^5^mvJb|cQ(e}7 zd2#kFHzC$W`>m*~5a(|d?S(0Ce2P6SyxOFfuxaOj-6l9F0g%Opm6Y<23jT=Db|0+3 zK)eEzna%7TC&26aZL3|F?ZSnr|mQGYP@O0XNH$!IM~dg>jOVtV7h+df$TqN zZ<1elIc7>gl;Ix^`i`CVh+_>&!wm#10uXzlTbuFU_y-{Ip%6m;CX6r>#iWbgAAN}^ zk$X8(w(}LS8_lr%=|nPeQ*1YrR&={G0Z&a9RvI{WR9 zP{_q2i?qijA2I7pTjvg%=_c~6dnqitM6VI5w4bPm;;TMrcirpVQ{jiluuc_uG1Gp2z?5d;jCFxck772bL{i#BRf;agKEHO}UrFfyG5RMzbpHCG&zw zw}Rr=4_k*>KH7;ZZ}ZvO^SslVJ$!p)=}aK^P1Rrju$?cF5kS&0Hd!{kZ*pt0H9OL; zv^M}f@?Tv7(MwbBQ-NqEMFH;ihsh#7`I7fSGd7<*ohU2dEBX3XD5upK^8zeCr5r8=p_o< zu>;v47xd@d%Qv= zgpc)Ty`@#t59lPkK3ZC!dJ`VUu%jth-zS2!FETkl6?86O4g6%w-y1JeOd8f%2ts>0 zMCHe@ii}Dvlu2I1Bc$RxCo!@xLICu!RMxWrC5f1nj9{i_VjWM zqYb_sF47{IfJ=d!9#(){?4y96mxo>e44hco>$-hBQ`0SD8>hju#Sr>n&K0sXv9)C4 z63E)h%EvWay;Md$Oe1%>Q(MQi>h@R)*0v5YV?`-)I0 zjvRe-C)2pvT;Hlf&uy73`Ip%}pOf!=C02tN#gZ0M zz{}-@(G@4{{fK<0l~F>Ws^hpg1MZz^xBEEdjeZOBv+%~OZ05P6r$lwPQkXbPy{Eq4 zb_f5DlNPRd*Zv*ux2(mG0b>UAuj}aD%wH*MsJ;0sk8L#DJg}yug5kxZ3B%EcxD1{l z0Zl;{(EJ0a5}fljJcctiw%}WD6>L_*aT-85W0u`NejPjk z9c%HIuNGD$+Z@)Rg8D95iVaId}Iw1e)0jckLm;`wOq+w4fSLS*RW6 zOEJYjx&s@h$*0F){-J(ZskI*2AA|#>;(l6z90`F&W6Yskls4UCkXHm%2?Yx*4R{T0 zQU4iq6$z3Qa-2iXu#tV_uucRE>_tFzA zgUZ<|==GA;EPUzJ}vgmnRkzaonEUx=U;->Q6OppE(%ZET!m zDu+wX^DP8m1_=Uceb}3R!y&nOYmK=iUKwGOZ4@4y#pRE6t~K*SLt@d7ij@uV)bg~# z*tdo^X`BWXPbQ`!=jt#20R7#+@Tr7G&QzJu4Oapej%dLIe)E!q?bEqm<~AD>d8G}O zy9_|g(b!o*LW#!W=p9468Bu>K; z>55JIJeIhA?cXD38pAQQnBTmXKC|?w17e5kVEki=nTVn^bH%uOc0deCSx+*2X99>O zahSik%R*8N>`;F4V!pG$;Ha9@s6szSB`E{VG-v(}#x|ZI64b~-2cOtY$ivcln+Wk8&C`;vAYn^?+wP7#kk}Z`S^1!dIDTiwr5}INx0@HYPL52iL<8d*Ty@}f z3Z&OS7}))2pzI)C=rr(B7m8`k02P6ovn}5<_lC787O_{HTvMqy{dhk7LGO(`bkv|3 z3X;bB3t#jJD3FAUA`tLOfJg^v*B?%llsBAXakh~+Jtn^{Y`pR6dI^wR(?DF#?!?DT zLX(Q#XS=k04~e3yrNhWDZwk^%QA|0_-^cnBG(WK==T(3yi!tA~^>GKv7I(&qCPbz* zf5$d5!PV)VEt4S}zUK|op|_)6w!hI0Jz)d=B4gR9BD68>`2g({J1 z=-S>lwc91RqFf2{g?MnI^qM1t8+t1@KzLUG!~-ilq0-^`8dPmzpO_==P#wB*iUJU)j!fD(4BDqVVJ9@ezTv zH#(SUGj@gRm zx!gyE|8@?6fky8hVv;g+C<12@w7EM0wyfoM@FoVf80_o%>D|^ai_I>`%~gW9#^_%5 zL`La1*CvSFU4&nTYL+1gdv<42pUk9{w$A>cq}-xxygOkFmpKS)tL^%8z*@6fIUu0% zaHF>AGsa%>?MV^68zu>#-RLVxgA=CiS${ERg9fjJS+|9~U(7SPieeYH$Rsi{Kiewp zV#r1%BVQEyUL0j|TI_EB`RZ-8hpjtZF0E4ir&Rfkj?whzUy~Z0U6UI6tXy6@E{&0N zq2vLg<@La`aGY}ek&~CZuyKj_ZC>}oXBWs9haJ{MF>u#bab&T=>Pw8N)XI9lp_I9} z&9afNFLP5K6OB4muKaSD&{s1jgk0x}T?Lq0RI-gw+B@hg-Y)NPnslHZ?B2W)djmkS zpf!bmZIw+#{Nnc@8SH~grBp0JKq5y4l^CCmph#v#R<_32c7+fPo7+nm96GHaa-zDF zzXfP9ie1w^q(fv~Bsng;_)zFDbO~An%|iGIMMlgxYzbj*DXKrM$=aOfvKkejp@6iD z@Sv9$0U_P2LyX3%$(5w2`J`s9R)(_ zS4u3}XQ1s>JP;BHY3_xhp^)xOmY6^Yv42tUI~YPL`zQxNRSS$pKn#(ngkvz;$GDxZ z!ra7xX5fR~BzylOoj~3r0PIpp&#83zBX90vHa9!gxeO#c_4H>G4uTt6nX*VaRRpnq5w49e2hT+K-T5N*7dgK`(SA80S zManMTa5+{aX|gp@gsa3}qUnXQaCn8DvOFl*An&=o%zI3bH1`SB^LKGo_0jC)?8I?^ znCP=){3}W8R5mKR{t);5fX8XFr!RR8y&#H}mP1e9i|+f+fe0PS->Yt%1IN~v7YfqS zBVxsQnPd|L0}m?n|FTB@A;QzG{y9}2MZpcf32yHXNG356$qsR>Ug^6M;r%O5 z+t0~PhwK-d2?oK_wfUfOY{B&M_**U4RAz}tfUHpY-ZQbr8u1@=baPfT4DD0O|Z-r&xKm5(u=EwyQe2xJP}3SHwAh2ZudDa6XRLj6StQr z-M>bC!@s}sxLAohULfjFZz(;bliEyB(|=O=?%!&e#h!blg3Z9$;DB8fyhc)=gE46Q z<-#_|YFbmYp63s{${w>dA63|)?Qc#robufKDn5noCUKg5hD;BkO#TtPIUZ@9m^wNP z!HR=@Z+ef?0Ih`XKnb0pO*cnrcV9L!O3cP(uG_uTrae7Mwh@q+7Ox~U9p05b!MDkf z7+-0*wtb^othdyXV#rSQ#DSjVa{q7=@+z2q9`(=bSS8kUX^70by}{>=PZhjP&CHOg z;~y<0r?aG^9rPgquuu2Ib(M)pF`M;r(u+oDJSa_g8^D3l@Y|5)$TEp?8z8B; zrM^-|2P%HSNaM9Ok^dlCy0CYqsEq)=YAc;{N*+lp7f(w?MyM zP`sEp>7vAC+pfl;v*<_b++~%saut`k#ar*3%8O#&tzrX+I+aH0+;FEr)J2>{q~OuE zO&|T5!m2?z2>Oi#_ro7p+;-Lx67ga(>3z3SVUNhDbbQVr5=T;KkYN;elTHCg^9thk zrEJEA;j!59RY7U-8y2hysj15lGWD4h@RUfo#Q^ zR^ml~+(I$~6}vBuxLkLDw|p``M+i3PG`^!+q}D2PO}RCa(EFBSV6c8KRQ&5(^*!~T zWAD!rrkYg9uaj$nw5UgMp^n(Imq1(0YsGd!M@;OV&zWar{!k|C9Ic7{BQ)NZ^R#{E z>n{@yZ~yE!tZ&hRnPjpMnzU9TeeC}Yr9Zf$ln&f?TsR8(R9PVBn{ zTlKmWNgyxm;Lq9o$XUzL$xH>Y^^2eir)@|2*8{0i5~s3pERhQfzKWwcEuOnVyEQ_; zu#%IT}DF|Zx1Ikn>)b;%GBEKDcN(OK2Fc6uA#`imW&r3s~fq0 zGRY`E+W)ei_?mgWpFt@bTF}Oagz(4!^y_-Y*idWJs8118fomAw!h5NN?Y^SMgYR|O zpRg)y2Z%~(?-u>QA1{{C+SRYn0h8Vjo|UA9^NM8lq;xW;R(g^_-=oiA2nE8{}D zpAB{TPeJewVfnUhqQvFa!nq`pAtIro$~wRPd=kI@=aU7ThCB$p+wQO$;Mdtej=hZO z?Yt+!iwEdilivph11S|Mb97Jg4P-IBrQQZuY&WvqO5H=d1C}oZw9_t30v}nlF5NC7 zo^}7GMaOIGZPMor8|9jjt@cX%3%+9T8T^>O44hX_1IrLhKhkd5X-l&7h3U8$8TLaQ zTs$Fu!H5xVK!%TsM+ytuE(17<>mAs85EfCnv6?IvAF-#v9mQQ8t8m=+z}PZuQ~Okl z=On-HyVSZ(jIfV>)C1##Fk@s7%&b@N-3MdzkowCgJ~+yj`JylpP8JQ-^8kyVdU{+e zsv4R_?`3LUctU9A@mXJz0t*kHM2|mLeML$Cs-{pxGmG4EL---nlY_o9HZ@{ z;mOqkiV!+Zcl6BR$r_Kxp(z=+rBX30v}$uW2f{Mj~V8OC$$oIOyWFRDi@ zM2n4VgUP&MKGn=9_acX{Ok3IpUbdITM)tFx0)A??F6wQyete-j9!Pv%cAjU_S*5p{ zR()KOCg8mKVc@ZM8;YgOsTRcBiy4fSuq&XXGxqs5qL@@2Lcqi^7)V?$Pk&ibGPxaw z%XB>Db717s@y$8%;4GAH9&I3_PeP|Ohudb7y{$#ptAy9(6kSfX-eMrNxgTgsj}dLw z1e(p;g77&_yCRB7nzViR96Q4nG=w#J%Wm)}z1!f0#Bn>3{OiF6+sN|knPDs>q8lb5 zYZ$g0203Xwi+a~1pqXnE}nDTwst@p4r zeNP=ShJu!YVQTyK8wp>3p!bcZ+atI8$GRX93)5rqsH7~Cc8hXtQ{ zC%$#Rpzq0(q{VUjeV0RCNRYnZ-aY7xSbcK3AbI7x6tK3E;?~hFe_BIf;5%NBaCpA# z`+aaCfaLb|HxBq6g+`H|5>Fp((<#=aH?OOy9##_EtSiUC~>G z4UFP-xhbw6u_$lH40}9VbnF_Gd2Mtl`?3#bYIli_fzyXwKh=A8Lup}#(v-5{AV()- zBV70}qBl*9&#IH@<;O=%Yst>~qd#S$WoCW%^vqH*QLfG?pMh$hfXKPnSr(@oms-D# z$Yu)luw$;7m({bg|K6S9Y!cvV(MXu!9oeb}f8fobDGwd8QhVg5_D<=r(pDY9s|fwM zIh?Oi1W7_zICz8E6e^e?43$I0rFcZ8F3yi(qKjh28Q3BWTlBrzGI*n@%AA0Kq`|@x zR7Qw}jATwZCPU+Pgfb_q<<4i5cO1AQXa`RIY7ik}U5NZfQEgv9?j~ zcAr>7FXh=vPh4OoI?H_tEbdFNDapOD7j?3Q6{`cqRY3Ga1FVosXA78b_ zia58QWGza7B7X0WR zXH=;Deif)v)T!LR-BQe*Q2H<)F5mnGu=oeL-(LZeAcq%6YzTe^WLKqig03MnGg_W* z;`_J##C5si>{IXes&0tA`6`X#P`CeQ&ZXP*J?w6@cAaEUo?+DEj@};das9-@)4Hu! zYTSO9r#B_D)~~m+Cf3N!*?6@pv@)Wo@oI-l(o5}Q%6m!6=*Q=ud)R_Ie#bBs)$w{U zQF#N~S5(8=Lvrj&>xt*AwcCezH>k)KB+tZk^gkgkte7#g+AEk{GGfLdm@T*rct^cq zIoRDvsa%L9(f6vDrPcmv#iF4b2{DA$71bG=lGQ3;*xbEnwq;uGsQ)^!&Ezzb(imEL z*Kl&mf8om}YEL(G1LDahbUic8nd~04JB@@+NcZO?=RGdEK9>@Up(tS6)oSx0?qIy zSBUeyg5E$Gx!i(1c?Xre7P8ufhS4VVanX21r_72sFN(~Mh(Pr<_e(;zFDKSwBk=@- zk3EZn>$M-J6B-Vb{u&+-g+GB4LZ1wM7TvXcBSI@b%w>(tUiG@{86mMHiq_a8(j)|= z$yWxWkvBw*0G?K z?FExB#hM}G?06Vr)LOHh;{8!1rlXyrAgE!Z4}lXj@{1vwO?>$?xn>xY6&+WrMgn(LUar48>*JmFmiJ0DcGhlfr*JS_lQo!B{uGWaKg;x9JMUjSSyv1Zj!{G6IK zI9lkcm}?0O)ZoSU2=5*{=Yn@HuI8kDYch%LIukLS%^(34v^Vxg%5|DznR|N~;_fF! zM-z;jHUjV($E1xHfVt_Y9LXSmd5OzUjMX2GJx}Z7#TkAw$u>adT|v2fY|=yOI3v8- zOPy1dcJADK+c$5MBbV?Ls7^kohI0)FbG3*ri@Mdsck0U7_fn%b z;f;4{qUR3UqlIz-%oPBujZ}^^AWIPYpPQue75LI)rudx!r@Nd@QJ<=$&QhCqTgfQ9 z7Vb{n|1A4qvPCs`Uk04!uq3U3H0w_tQA|MV5n*t%SuhRWS zd?w$QNBi*bh*9@w*VC7>xjvI9R-wPeYe<5mM_K9wxVN=-EBCs`RVIKqDQ42`_!oHYqEMAKDF-MOcHvA67}`^Z}ci6d5RSG9%Tf zBjpw{YC-aDuGM@qQbv<0nNlSk^9_}XpIV-`P=i$Sx%i^kZ&Vpz<#g%N#)wC6vYgVk z9mv6iWSl6Ys_NFd+IHoI=%Z7qKX2haiBuCZY?AP(exn+nd*s@{jihSY7t5j}>0qvF zGG5FBC5IAx`XpG)T+C{53dFG}R0kgj6wtKHrU;y&RPb1aUJfN-ed866;vvmU3w}X) z05yKIZc(IrPERZ@IsR3iqd3Zg(1u|g1x=EP8v#pl`SN5-%9Bq%;pO=3*k#LO%HGb{ zkWyFP$v1;AmnnLe8G_~B(1j_2Bd98fQh%5(uQPHQ779)8trRIHuGl6bA20c78{#8# zICnj0PvQ)!Y)Gk;E`jopLIgqX*TWDt&0;FkWC@E>WZZ|`&q78}usYZrP3*6@?`Ee^ z0~}dF&!^^q&07^q6EGtml&~vZwt6|PO)X{gAHA>%CSk)_>zs=CN^+iZga|WR5N-@P z2t*QYZEG##@HqUFZoAf>0xmq)=0V;4v5u$x!lyl4&%esgwuwJB&;?Ri7tr~&){&a0 zVncbJc^~|e+)yR5ctmbrhuhH(xobN~bLn*RhrMav**3%w;vYPzcA+#Sa7{P!nvG4me4Z&_S+}7nS*Arn+@DcQD@gW zrtcakXmqt)=tzu=K%lLA=WQfUT4+!NpLHKmK)Z>QqKLXUTHEF1skId%6WIKmcV}?- z{zYTAtYA7QrE>#g+XC73izVHn-_D!(Mm1;qJ#66CklmI(~Benv)!7t>m31%wN=#D5m-5}Ku9Uf1BLHF(Q|L& zS~z?=>4r<;IXSCgBOp3Af-BE5<*LTi!b2XqoFG5b(hbg{Qsnw$Wp0k>pU^q|SnwU1 z15J%{`%PIPG!*oBqiAuGL`S{@)Lvr!VjkoRLfu0u^w;RG-tMzG&ZNQ^$oLixC1k5tFy87&;UP~8g%%eS+|sRAc2eiociPK z%@wM{!>j#}Z{}@M$rU(>@#fo+Ra&T?{cY*8+ggZt2^Nnj3H#xKB+KKSkA1g8g*@9) zIo{+YDSA!Lmt^CK1!z74{<@D-pw33Pe2P!2s{3J|7i$>^y-xAkZ$@1z2>Jo8v@hr^ zgHhAk+U6O$OUfEjRJ_7R8DyqLz3(_b9hAe2LkHb1F#238cFzN?`hjC zaE>l%xr2z|)+xLN(8kibN5Kt6T1z-T2={%IYJU_uf6i?fr#aY~s(8)SZ{`TvlwBpq zEv{rM+QTe@Z>Ap3?#zP8lIb3{-CE3)CApCz6Ctp%>0DS?`877 zcksiTJp{2E3G zCxZ(g9TAXy%-=A$&E1@s%gvckN@szpQUqz9jafK;HSg=2%VSSPf}Ej>oji1an)VVjOVxfbx7Sji`08#WKP{K-0Bg_`g z&26caxhq5=3K*VrqQM}EhT^p8o4{p8^aY`~?lDT68B0H`hf4qV@6t86&$KDOxx8_6 z>Pb~n{ae#pO-!Bg^zea!>x-ie91g%S0BOV5m%j0e*HGspfh0i}t9U79(`;l6l&p@A z##F4f;}&9syTrH$%SnE&>Xp0|=SJ8^MiCW1T2tyLW3J^UMK{)jV>c)X z%{u{lCqLNK5}q!Um`HhSWXH7#JoY{wasoVJAjj`!k~OK4$sm6bJd5j=4&!VEqqO9> z6sFV3$+{0}E&cDxEk0HEEenVG`=9!3|4?Z;ve+ z2ea@~ehFWj&fjd0#FV~Rux8b(c-7@(oh`iRHk!wD5u>IIYxJ6F8jX`t18A_8N(MsJt+^_UhsK&U}BWXnA-Zi~dFN zi^Fq2363iCF1sM(zz~EWEGM*v%5cB$i7cdsqlV&CjZXs(Wf1=47N-~Du1nJOjH>o+ zCdL-BC%wp5!!gH2h=Z9RgTLME`*DCzHP+E z>XhR5_0nj(i{K(#_#~Ls!n_13j=(dVLkjs!d5L{%YEbYzWRf$S*X0uCF@%IRK5mk1 z%z;lyg4}qzjVD%u5WdW({6O5)#oKkiEVGT1We;uN`sNawJ9=~N`GQTraN|+R-Um%A zul`@ZFg&53X{`%>$ks8b7od5!nO*(#9_;j)L{0cSMPhrpP^ecMQ9XW4$LcSiAoaCx z9`YIfCLuWGhrPA2tk?jm$!x2r>{a~LK_{iu0HNW%qcxtGXmx2|wbbG_O)c|Ox(7;$ zQ7ZJ?v2uK8s#dh1Yn!dhbV!0|d9|fUE0;By_r1Ti^o$pvk&~dc?DG%&yGO-1yq2xi z&gOd!S9b0`>y^gS)3Z63L;;Tp(Nc2;xzc)o-AT-p!2)N1@pgO41c$`hk;BfX*aCNU zgIayp%dRhMEn?C`a)@xDg5JhfwB$`Ldp{2lPij>=nU(8Th^Bd+l5*+H!zt2xe@E!&F>@-se-J1X{$7fU0b50WaQfKoS@$PRi zTYOC?pDSLRj%yfrNat)&j*)EDzGnhsk~y^;{AgphIa`q~Dv2WifbI(B&}0RS|cX<8$83 zJ(XDdv+l_^X5Gsa=YhE9PAqiM$$?DY>zCEHzqWJ%DNkq>^~L?kF1cNu!0<%J=aab` zd%G%qpqVk%Gut&lLW)LNtXd9?O^BY=EOn2(9}oxirm&g;<`YGEv;Mnlj5=I6>yDU% z54>-m()J$8`#4*V5hSU`-e-gAMbH9B95r^Zs zaCCQlC+B2H5&=FG`?%jNJK5<*2v(-`IFAKCFlRIcx0xV!?YH8JVGYu11$5)s4APa5 z7kqkrK(^iO^&2tlkKI0-E2I?6E8(RJzXOABc8gjP$5e(93AIGP9_b>lzS-ipQ<}*S zbw$Wj8N8iYxUDF+nc`f>2@wG4x)CDP28}+$z$vUGMSqR(n9ZO;7*bmfmzd)7H1j?F zioDkC4HWIAHdCbzVUy-ev)yJeZfERcQ&z!c%+RX9qf!1oHV&>h-oymESuI?H0_;&) zbKPs_2JpLtbQCX_{?tPB1zu#$)LIZTEd~-8np%W=)?cMyQi2qT;Ga7eUu_Qt0}VdU zfh43y@8~4c=Yhk$+hZ+-By`5hOKDqS?iyb#5mB+sp!DS$0>{=K9yXc^i+C+M-aO3W zl=t1b(#BAx)FjvYDYLD8xi>-n1IV@=gP%{nWeV8-E_Mf~=FMb$hvU_vrte@N#cDWj zTDXRRH;<*??x<^6K^>DQw1YJb5pI`tcTn2B4WNMpuDr=-7KV6f?rO-X~1x!5U!~;~d0DnL>Dw=Cw6?ao5W)(RZ9WiSQ;>vU= ztZ`C>q-2JP6ttBwA(~G7$oHKbxRPT@?)E;rDbx)IFGrlvTfUSSWWOXHYgP?|o2@Pt z512c*ruN0$+!^Ln9s?%#(J3W2;@Rypsmp{NTXhaFN<|6=3X3L8+4Y?Dg{ylPc^N^nv-CP+T=#tLb%VCxSTwDgv4_ zRG8Cz(C86XTjld0Q)yZz7Uz7meAWb>E!mS{5_==%#n*kqN_kTW!I<5HNibtLF<84m zZS)QuLS1oA<QFYiYnmqfohs`6z6&X& z0(^_{xO4v&Eoe-kP9-a>bn;m@C=_h?ya#P3p37Erw-Mfs+A*&J=P1@s?M`ePd zP<8o0c!TM#GhLJ~JfCIoUgbqTA>bx{EGh5H~YH9!Jb8YNOD1PYM4x|`Fgyn6bW+Z)th zXN|r_+yUNhJJa$DYN?M;L@xe_nNF9VWmc%|)@@OA%MD=lZH(l06DVGfeLUUNFR~M< z-r%d0aK1aO>@@2L!Of^pdvi>F4ldpeXxbo0t1M9IFiYk}wRP zftGq+(vWQ(tPI>TZVMK5Tk-2i@uNYi)w`)I%Y7Vghy zH%r>V%vyNEs?cQ)@VP`FUt8214O7CEmR;75qsVc6~CCbp-qzA8LXFQ>*i4o<^d7N++?fwf=E;*s-C zA|@V9y&cYyhP8Rvty^Fp4NMD50TrA!|4T1UNn#Vu&pI~aio)y7YtFzaY*pY3NvC=@ zB?f7l4*tsm`RBBFw0Z9cR6G*cLzWwk7Q7w0_v)RQuet=)`JZTo z7_LTFb+j*TpjBG$4e$D}dZ~IhnUTsd?!jKe+U@=>qd_BdUqeGrzc6jiW&E3-hF!zT zY1ap_!^WEm^32mXWiU(0FyFgKB8U>+)MaYgU7!3c<#DsN)Hp>g=C;ns%?@Os97v=M z7rE_JYFx^h$%6a%Kg=%9Q!2bW;ki`s@U)x>C%13V+no4Dp15`5y~Dz1VE@w71@*Xo zZ5}nFdQU#}87N@*1}GxkcSG%os+k(2C!omfSAiC#gl+zMl3n%E>P!azM&oH>zUj=V zF4t7%XaiMF<1%;X@yc&jG)@xSBE*6v=*}RKIz+9E9%=a_GYZs$p-T z7cgG1CQLWx>pdZ;TN}ZRz$A?tL|BE_L@?+8l1TBhFh*ZPYmhLIh>=+TNU*{d&kvSg zK2E|4KvI$ALLCf?dwl8QAaOZdk!P6HkIFcQ51ZWH-o|SMf(XQ{$$Skqfz@~u1Ud99 zw5lqQB!VY4m}^!RdZjQ*l!JBG8IE)=gRW#K>88K1!86UU~Ez+4dr(SnjLk_-Jx- zRHT;Fn`26%y8n9FU&_57tGMQLsQAeoe)cxGi{P=d`{X~xlS5euX1f(w3OH;d@Ex(2 zPO|FTw>v2)YoL9pD9o{$8HI&(ukkIsI#F;xtI+YKwx0OGz?I7xns$5rW@EYk7VG*Z4!eaIRkd2S^PLbH#+2(+YL* zPn4H#j? zPWqLntg&7@@2b3@rV$q9*W#B~&lgj>cT=Ag>oYRFOSQb(r_)vB^OK8-{orne&6gDs zId_AP==QL5=!u7aJLlE1*5O*`gTboF@MF;}732s;=$XdWwqH?f@JtCq)KBUQKhp_v=WfBy{ z+NmTAo8t_81U8FOqv*7LlIOK6io7*j>|{#lrwJKU&U^epyS9-=zKcwAPpqPlv{ zBj4)-s3@(uzX|sBpI(?QOFq*wkM%g#KHK6dkTU+sidMkT@`Za7Xw$TyFt_sx2??1Z z(D}0@B9u1(vnq7gJej!}k^wSH#8Xdg@K8w{2T2;sKBw^Us>8~=O+&mvk^@piQq^0N zfw|4zCkoRQ7UULe$G}mGO%?CXunk@G0)}HK5~>rj4c*93vj}s=Ug#rKP2_o=rZ}F1 z0cUw516cLBk#~nyTyT6hiX5>~4^0S5yKy$f1gy6yA(5!IrawF`>M3$i@X_(s*#j*l zcOQ7&@-07Vf3har@f!iEIIBrpM@qZ^Tl#GRmjxa4S9Tio=Tg~MhHoyELPo~MEx20# z(9Z7L9zm}PdbxPK4L?2L?BY4m!zyNbHYkRolqV{pQl^qCfE6P7U!=Y8 zR^@x*C%1-Ae7_m?pUa0;8y_)@0Y| zru9T#ASp|!F|=RiwnF&Eyg0PfeCJ_8rlDm?19fm5mb|g!(T~bFt+LnA0?eB12E2tc z>AvR%PvsMNu=fO1j>%WLl0A3EG(LLH4$P0AA+ef;_DF`sx=!u%Fr&8B8DG#}+Oeu_ zT8%H`qzGaNs_fRDxs_f=n|)3FLACc9`TdW&(@RaVqeIzotTW2%#k(wU*eNe$`d<3? zCI|^xrFjj%O+s%C>g%IRARZ!_fZGvW_!G)>lAAuzZg+0l03$ zoL75?Y4Agl_pbmM8PuU5w1Y(^dY!MFgfTGiU5$xWWg*wqa^cA@pWaE=J_oPN7Xw!q zEXzHg(-Ih#-2Q^MeGdaDfJRE${%aur1m_k6Pgk`{52L~!$8gE84-K+yDNg6cilIbp z_yk5tMZdoU3>_?Msh)OSH5gPFHW)#e$FIq|qJWS+6Z@H^Pj$2ekrIdUcWLz>C>XEl zN-^SGcb%w%t(2dq5)%JmY<*p^n1vws{Z3k0%=DaPh@|TCH^$j~rEe|aV<*|nZd6@J z`?7jEORoEnMZ8zrsAPUem8LjzX6_>G@o>kkT%}&Yu;{oPKwQf^ih}3rcH_$) zAk$n0w%<*NgVt-o!Xy9OY>qzdIosba_F$|(XS@f_Y|c>7!Ym-SoScX=Dw;%aLt}E$ z-*N&wqa!T6PJ7nJxW^uyGg*$A?D`aVx)hJJ3(JNT#25JVedRkE4xPxj(nk!!YILn8 z3iNUsGdj|4&x?Cg_;NFrL^c;xs<=g1^n5OlWV5_$abmRBfTCxZxq0BALf5Wl#TEJE z)&~z*X=B6ccRk44>xK7M_SAX2iVI@~zB^<15P6jOe+0hkNsros*+kg@NyA$0S-Im{ z6W7C9TlXR<$=BK6X0l53;oT}<0}DNt2#1x1DpbAUZoc-6(fOT(=*Ah>Op2iB>c35* zP0h-HuJX^UE!qVCXpn&bMN`kKpt84yJ?|dIU`A&r4P-;_zJw4sJ3E7b3)n|fT-s&E zsQ>_9AQGaHqR~i?>)SvSp$|ZNWL=w>Xt%DQd2hVs#72S(4NDm_6&ZF169yNFHF02u zpDl)!)v3V>G2(W<)@zmH2cSwjMf8;H*Yn;}a@_humpPNSv4-y`fR+_v0p}VD7a~E% zi!OVvgE5Ve;Uskj1e;Q2yvLZu&matIi1ELWXQg>HHch+QQF#fcQ8IsHo)dHtEyg-m z^p=O<6DTnT2%KUq^>`I`0GT|Yu`z2x_&to<-sk0&AkqMdVL0v8^s1mU5!Pv{gLTYy zCt@xtsOy}H$4)b-iy2dTL@7U z({%slYT=IJovEJaOGp#&L2oDTH+wa`=R^mT)$m%1+#{jq;CoUb5E>LI*lIQIItQF~ zt0NTV50v!d);O`*26EW?8Y%1LZ%u4wXHSgEQ6;Wa9fw4(9e6;?*%*7Hjr-J-&%J1` zons3qjR!v^L#O<^>v}B`#E7Yq9F2^WNoA$`B1S_BAZwjxAOz#Tic=T&|6wHsb}{kl z91dQ1oHt+jXY$ir|C)?j87mQ5$lNWW2b7hM(^3G5#I9s`%{vmv@$WRfr*k#$FnRNj zbt-&3*H|LG>)wzVnENhx;|82bm4j{o=>%bqC~S!{e1n@r;PQAInSyNtWaslYaA6n*jMA$2Vek9t!i`XFoVkVTBgnM>>hmmPx;+>bhG<=FF0$ z`YMe7Kvka?qs56+NE4M$b^l!y<0SlUUD^p4KGo1ToMZ1iZWl@&maZ4=_ZOcWrtBqC zUie%eNSTcngaglHl-Kw_QuXV`J4rg0#7ZoPOOCmQ0&6r~BmIEz&A^{sHXB@0hmd67 z+cP&0ium)1M$w$T6fx%IOsVk9KU)^wOb8vz{-(jHg$spevyEx9qmjnkZ^y zULa1wrDD4=Wwir$9L*@XO7TJzCPZi^*#PgL5IeizSL6gIIDCr9v4f|H?8?NJYI!VQ zEbh3V2HKB}sdkJg&KEMJBCoS`yye`?6{X3hjO#sJF)<^OIo`*!w z&bG!~4e;qkNDJejBw<6i4SAHf!7eT^%shLE`}FPG%k7OdfyjUv^YgiXYli<55sG60 zemI5XLTGbSeK0ZWv$L+~C4GNiTZK-Alx&qi1*2WKAG`;<;Iqqk*LMaWxQHOWEM&9D z^f`*Zjl3D3{Sou|0qHqC$(?XrG$O=%dq^z6&0OO0_KPX%mx2Phw{yHY)8+lKtC!es z)w@K!05hRg)+-{>XjQ0pg)^M>mY{q14{;LrRLf_@SoA&oR?k|_an#14QVAyc&x0}x zI^)C44aiG+s}@XRF~;6S?o^n9KW*kD1!F}!b(|}^eoJ7Q-F!?*@H%_vT}qJYeYDDg z@TJ~$?qPzMePKjjRmO0YBZ2os5=-cD%HFB%%t`$=VIDbi$ONKG=-!X$#x77-3LW|&0WLTD=-Bbj8Y+xNpF?Vs&x$^5D5qK}HLO==q{QguKjNd=5lO2lkGcZX%!Bf<$3=^*i7k>K^^3(#qBO z&{DHVzaG-+wlOTBU2chAiip@5O&jl+s#uDFSb|uUdEY_6E!$OlSYuB8HLYyP!@P*N zc85LOGmW#8#$HiFQ2+y^ z!Ozb(O`g-VZaEN;t$@7*?2)d}UlexCnf%pHRMfoanPUALLzbe*Q`6m9=> zm1K}#F#97IlkGb4_P}$&P$jE)ohWYa z7Rzm`7npPZG7y6?I9&$7Hx4m^IJh0)CBlhEz(|g|;PSmP4^X%WK7XaI0Z}yU0vbUS z7~!)A@k0dWfeXa@ul^|eD?_ts|PE5S(aUJ_^^S&=lUWUpL8O~n_csg{` zZ4~*^vF2!Wt$A@D5CS}OvZ)NzDPFlL>ne7$!Ij?=sM+a6&1q`5qE|^Mm7LZW%>}oN2qMQi*$Q4wZd>ZK8QxUx6|k)Qag9 z#cifIyApf4rL-e_oP}#Qtk`vi*A^)~mNX8eV2q;@iypKQAaaKo$Xp9>R`Fr2;~HnQ z!Hs4tN`Fj2=MeUzq!VtAV*E_6vV~74O*lc$N-^7M*kd1vceg$^zx-YLbN?2u-bXf7 zgVFp|nP(r+2@w9s(HUQJu~kpbuje+rKd~BfiB*;Dj#3TSJ!3VoKV&q}uYHxq(_amM z$X%%?cAN;g<{#ETUb(Nw8jw`{m0TQ~&GP);EC2*GwR^&0v-^>UXfapQ{yyu!Y3p7{&)ScU%=4{F0NRF&8qF+UtnMve|2AzfG5I_I|E+f zQvf~+cM}&(8NhEo2X{TxA;v-}wi(Kh=*qBSr)#1kVW%OPQm2tAW-P;mXQ@xzCkST0CTJ1mvA)VgvY7!0Yj=Nt80Tgb5h;7TGe_t* zzPH1#Hm!5?Vd|F#9xE}B(l>iUnStfDGrY&-rajXF0s@EN{v2?GK3F)NlRXhp9-@tb zLJyhyLkjo%6l(E0?WwdHtm@^Jdg=Qyix)XYshCvzU|Ny^pkoH92VGFKi@Q%y59@e( z820jLA8$j$MX?72RJMgQ=$guy`zithy!-s+=?;16HNw{-tungv3c2p}ATi^Wels5} z@&)mJL{uE0d&)uDuK@@d{yTZyUWOtFBcfK3UK;gA!q1l#D~H=S0ir_o_}=r3?sPZ- z-tHm)Z8t9V1rOQ%ZgXS)R33geFz?YMt!(OhZHr?n?T3!|6u~@PCB%aN?)@4WE)*Z} z{c19G`Z|NL!ZXG^w2vx^Lqf|Qeip=t(#n2*rsxl1i}!uU${JHQ5_c0erV@-S&L(^o zPx1=^N3J6x!gRCjxMlk?t2AR6z0`G1+vD({)AZM9x8rlbCfW-uwWYpM)?d;m_X@@c zB0Xy11fkgK~Rt&pK5;7yPZvbqhfrShmPOAv05Xw zNK&Cc$;FpTYz?B2r&V1}!uTPyTb0SQ&D|oW5;~@uuUBgNi5e-4NQsy@#xh{B?-^$~ z@A$U|b;h4C?DA8X%(V5!(F*0W4a(>x%RhUwBWO~m^B@?hL{6YNga(am$9%gM0-_fW zP@VXAY4i4KZ||7n%Gig@v0_SW1lM>nBMO74_#uabsqc?bFFk5<*3s`HOtEaPP8=zq zH;o?kt&3r_5Kc574)w40WJ@m5U8a$b&Sj#reCsBK2?jASF~a}}S`+2ru;@=_0K>g& zTdjG$c>z8{(iPrVekj_sDk^VpuS@H&15dA9Lk=T*KRIYu{~RK(U>>F#CJ{{B{Q=!T z`njW7*02+!|70N2YuH~thd~hJW{+|$Y^pIt9DdC(py_c5mI5_M#| zp^WM;v9E3ect(cM0x2nP^KOG(RfLzlC;%!k`CV!Is4Fx>E1;H3NsJHc$N`A`-AusucFY%Crja{ACTL!XUbb6y!UPCn!)#CJ?; zz$9V)G9Y>-2gW4C+^E8DuGSxlC9`bXn#}L0dRbf<5&1A^Q6zuSEqqs#mG9Qg?e4Yx z&EcvWIi!3Jjtj6K*C$^R+k;*4>JFapZUI@z3P4Vnl{t291bBj*pmH>Z9BE0xg}GayFrVKjizrCb@sk){XHj z;5ieclNuF&ldGM_N6{EVW2b(qNd4sl=@^=_Jdql88#jVV5Nlucw+5?MO_KX>DUp#G z|1dMeMLGRQ{+?k-@f?X)<|l^MgzC3vRgy?s69g~R%h)8_tX{R6D~7nwsP@}}5VSHy zfITaJy|bAI-SjIU*TMO;yF5XJfx()&J$vQtC%8P8^FYC^Z3%`QwV&RIIgWE$JmZKY zbHkkLjDSaiZYWT5F?A#&A{cZIE%es(@{EKzFDH5aDqo7z%GEjoV{3{`mn0zw|K_6) z*|1)1GWs3w$RCo{-5L}VN;)4@97^Rrw#oRTVzzB5vDPKV~a#sj%Ympb0d)KDJr6^`kkr1p&1PA=mA7RT}+ozQy)P8+mjgo)4Cd_q4byY;m~&xSVea zRQ9EY%?MJKwryR=+MhnC63E&u)NGv0sw&3<;azwE+bU$&e?dWL20Y0Pr!vrpex>!HalephL71}QLf44r^jF0g$%epYhVBbWdPI8hQf0~%z&w%=p=jrVKVeYG=qFmc| z1wm3$N;(9kLrE!>QbMJ>LAo1;lui)=DV1)9?oLHga)6<0=o)I^ytDWIzK!2+f9I@q z*7@VC{R)%%6bFOVe#_`KdWZt2po{ctzPa)O)5m6>Ahu+#h!JS6lo!XSj10@qp3< zale?nvE(Jf61ov7Sd*32Nw{m!#HIW!C_MIlb@JTFuGX6I!0kR#Y5r2JTRbF7f-&5~b zGiO+VtARlH&4lEzWUQc#RUVHBgA!blPh}m{D1D@@tGj&}9-5+nZmH~`0F5*6Bm6s&;w1ozv*6C;JljmX=Tf`HCV|^tJTg(9diWiw(%kX98GgEZ)N&hXb zN$Ao`NZRX8VXaD{0%znO{t>+L3z*lq|MV}*^JgdIRt;2;vXG%_w5^&B6%;I6ELGlC z?`o0CCd#+gDO#d$j8b_fT|juo1yX-gMo3($59PwNs{Hw@IT}gAx)GMG^vm zQb7)6d}qZ>vFwueYXs^|0d+VW7``Q6NTe>}QWg>ZBdfOP86P);K|F?|BxxnnYXQuy z20Q$^`EReN9Gdi{Gkj0p&o}^cU=w-A`7gtAtESzI1?Ei2^qIs5ulkN10A%}!F@pkK zz0kijL9EHMUQ9{$OEntncedp{aT@xD-MU2Y%i)lN9msC4FalH-6NoSTVnFEHyT)VF zO&MtZEO9_gr#o*=%3WQqWtui!p(m?fpM{4Xnx6(Y0=Ge~xV7x==uz@*YIt?{B3D99{`K4w$Cg0&k!Y6;xpjx6p5)q{yO^- z(`c5g7%BVGYQQ(RFaUr2RqlY^ zO=e=Tc+WR`tpwuFDNW{9lPQy+sjI1c56yjqS;Ao#jnBLXhwOgbu7=KS@12=v6w-mp zU`tFX0rT565J}qxW6T`Ktts=9MEykelS&E}RZ06e5SJ)oi#)enECAPQ-OvNxR5L&F z;ig4Ps9^zytlEhC`x5_EXUV(K26zGL4? zxl#;G(syua(|N&D#IN{t{-rEd%sJod+o><-!6=~5PhN4hY%NYQrvt(HZ&Q;PN4l#g zmALZ19fZH%@->oOO`y_*++3Vr)To&mUT@f}$?CA0cwCDj$@gnvYnwp<^Ez?18i%Rw zi*h5`+5+ao6~5=1!T7#R^nM?{8F{=cT<-=kk6UG*&atGC3vdD(1jkk zAEq|$Es`sxIJ_l6+ii3rqu^dA9Xv(Oi}s@|J*;e-^4i^T{FMjlJQU=|<7M|Mj;JS3*=>fTqZ>yvTCRb0DzzP+`U?)w#Rd2O~Wz>2dfMMakOrzl9; zN33!ze>IG$YhMO}#>?wb#rQGR!kI?<4+V6SN51O|3tGnw4}!n{yp7QdP@L^^Fbgnt zBB=)e#V*hFKRuxaumK?ty08A;li&Se9UWk_oDZGwZlE&%BBDu91NCyX zNN}xFkp}p!qWH1N#NY}RdRtxAto^E|^QkOwQ+0mDyb5|-&Y_yNEeEy8&ck?SrH_Ez zeTMoYhi+2-T>;51ctUSr2JmK@U&sX_=_b)2E{U^Qa-+-#D`fh;1!McD#^%ou+aF$W zCi-|D`LW$YYk27X?8Z_0Pb2yL(ScX`Czm6^iAioa?;b!*~wTG)WN2`5BKif z6O4O^iHC7C@D<)W4zXUs`}Y6p@A?H8_)Ee?_h4sS zPHVj};+GF2NHNyx$3AJl>WLQlZkAH@QB(Zq$K4#oxRn+Td;zf1Zg-&T)o9xf~{}!H%<6I z`Iv?D{c((-@|h?4byJInzw$x0;%q0GmqBFDNV&z1l#W*DsERgotti zxjd&*eb3?dyy{Y)C1ot+@*}-hHQ@&{`k=U1Ty;ao#!8#d0MSRQ@*}b5$w%N0xz{fr zyIp9sfSSC;Z9dP4IES)p)^|2<0T+5cP9Rp?dK5zCnrD{^Osx2>PTc%~XHbk031WKC z(~=ODvpG>tXQMIVpBi?XM}>CB)=Yrny=$VHkk!RbpiHbpU9j8QKK5IiFYjb`V9*+EXJLlP$d> zAs(wVku|b9#BL39qdNBhW@?PsIl0jC9wX31toH6igBZ#G;(BO21Qo3tJMgGi7GIUb z9)x4(=RJ&CvV%gI_tM;EIQ6~vlSO)j)^zDGsO-PM#Gg0@UBaTkB{A{;0vxrxZ_tMV z@66EYy6&)=XLvPN&Ndc%-DO#&0T9xM0gsd-&4xQa9mdUoTlQWG9Y%s25c1Cf!`x~z z>DKcEgM!zj#XI10@GoMN_g`sxB!5gyNaoyG=-EjVfPUDgjgChNv~|5G9({5metb`} z!L}Ge>RjGMs{K99ZFWiTN)a7;+qQQ6*yE;CdEEj8EuEH=_`-h_1nK!-=&b9@4@EEblnk?Rs@&785aYdOndKXVC7b$_e$=;1?q2trYQs|CMVs`+Y_t>uSN!5Pij zTINbn%gNDPsLbT`qm*$laF-WPmSt5XKuF|=@hV3?KY zLH{~Wy4O`$4{f~fHSF%_g9V4peDmHo-}89L+R`;bX{X*Q6m}SNBdC4>5q_bo1#Ppa zKfe(SZaDwr_kbZEo3hm1h+6Ob(Aq<6HdT_DKX?~bnCmSSgcN$Ghb>*>OdaHPh?hX^iv z#Fu!(7|}-@T94hhpw1n~yr$ooCf0uQZU_b-QX{67N`h}J)eZioE@-=v~(YowwxIqNzt?B-B!rQfc_M;{%_00JNSvEp?@LVh0#Qex6lYK!059HnZT6_9yOK z?68H zPP>)_ASd2?g+cSsSMbQbScR(iZIi>2w9w&^vgw)Kk2G-3$X>>kW8}wAAG>Fv(w&HD zw$fUs`DK7KBm$tOEkT8CXPP8n_Lis7_}SNmuK_EIi$)vMHe^EQlS^4|Or@*^Ja%W@ zmg7Qbk18p8%+lB-_pUBz=~C?G=K6X_`>vm7pi_?9?JWzNem7Zj8md>d^4N=$5uJ#f z^w=+r5VZj-ri|OH_=nIsq^PN@%C*FM)d95b=Kj(m_ZOP(DE0UL-mnR#*R)8#M(Y}9 z^>>!T|BhlSX~)1{5JZCYSb+d&eqm)y2Wh=jd3xU~()0Y$*_yb1w2k8c+jq7!?ADxs z4RL*sJ|{#;!)^p|OEhSe!s9H#*ZrI#cQjh0PXX|^LEmaMbagp$;UZFM>c!9Etq^E! zIoBKjD$VL6o&~LThT1EMFUc!WQi<}awLP!$wyTSikL zlv?N>ouNI(4MJ-t2n1~Lf#V5OF$QKX@Jo&0@^YQ$U{O51qT^*n&QFtGUARYqNQ%$X z?T_);5 zuMmR=)-Cz#K=D0wdbNq7NO7L$?(I3&(rZt|U5|sl9@LQgZ}xuH%&peZFws>5FxzwI ziMk%M)7sS{gG;5R1{}X{(Y*m?hc-W##=U4)$shz&pQp9FoLn_^F($m_S+D z(tYe;1cgqKqNQMW-nOgN1EpTwej8fhs;V=(knP*}&{#U=;HXM-Es(BOCI&&(_LLNu z0{2n~;EfC2Qy4U!^3#@(7IvAa^v_6>Dj3Ww}R-P%!2 zLw;V(TQ*ji_l03E3c!I%B2_Jy(2Quult}xNk76u!pvib2_%=P6zGs~X`$PSUtAT1d z_f0oe`DRYNq%CGGCr7v_{0FJF?FPS6IbN<4Rwa`8Ls#cBA+ztkL^aFKR3 z%oE~FeYc-uz%^gM1)mD0n}!`3_K-vlZBFy_;lclaFX8|d(GiCy6f*oH7xDLI0T#G< zm+tVUs!X8GJr#@A27r3r^D{D0x3;nnnon*}uOFkI!Vi1PSWGHDamIeF8o}%!k~?|`Jl%_L#yRJ>saxr{FtBG1ZXh()NyqxBcK<{J=NR- z3#U6Bl%ZwxQ3?y!M+D6qYD|zwmQBS1{gIFg0ZEb?CBf$0%PMu1dsp3)U|yrnrJeMH z-=P8#5-RMlsZ0GcR5()vz??T!a#iWZ_lLfqw{7#=alys*zN~QyW-D>!Ybmc-yy~KO zaxV1f(Ic0J`#fcY=qw`NU;bF$Fz<1%KU1~7%Z9l#n_EidwB=wsV-jG{pk)=?${#5^ zme#o1S;FsiC1m~sC`8z{rSS!-9e>bx?PQNdyTujP%uRO)Rxbg)T-j@3!^ewSx`05@ zd_lvC)`DWODN3f#BZLd9XTr1{E3bO`cH^} zjgX$pAwg}7EQ5 z_E42sgC{K2@6M~+1bzTH;_2BemG@&Z)W_yRfLsF%;}hJe*MHuO6;Si8C40svqb9(4 zkT^O%>YO&3y9oAyZgSbTC$=H0qZ(rL$T34Lm!?#;z+n;&`Sw0yWya}zi5Eyn{`;jQ zM46FpcTR^h>YrW5K@tE9JsLgaZzc}tp0$>I5w_dWsoM*1y1gIy7i^zE6in5V9e0-a zSrxoL5qrd{61AODu>K_SH7|d&&s9huenQ2d$e@iL*eE*{wAojZu_^a239TM3UYx;x zZBv)a(F>kHOftswLUtQBHf-T?-A$a>0oz_Zq+y@Y?Z92193j+hFFiDq%@OvIuRVaW zQr?T)4ATr6(INITX3Xls_0R8*gz|HNSt;YX%1IZEC0GGoN0UmN^~=0&9OCjR%oBP} z*|2NPt~np2jkR+wvFpaC*O%}9)2AYpSH!G#N#uW7n19Nsw9*g7qPKbN&gz!bZd$?> z^GZrS>4_@YBUF^av9V17GQQ{Ba&Yj@mP3u)Zfg!o)#MQ`z zj*h-lZ(G0M3EU?00cKa>ff9cn^}_ z*tU2Rqq%L@N6ZKs7_?s=Y7*$sq4PTadKXi6@vGU#WNcG);ByFWlXmq~PGG}T%dxob zCwAS%^WO?M@>&2fxunJGN5X%}q=*rJp%ct-i>bS*SPG(9vSBt~jzp~FW|*Om6#XCx zh`Qwh;1DmzU4YnW>XHz0cycm1q*EUT$nZ3?PibpTgk#M;Sda#Gfp7Z1C;t&)3?L%D829l zhz0KGd%J}(3L(is=uRsYLNHzQvf>K1!;^QC1qB=IFG@jeFVrIE*A)^E!e?w=mHQ4Ml5>LmRGgy4s}k5AFMUd zxkG)tdb<+!b~MdPiDMEsqbO6WIi`X6l~A`9FCWDDvEx)1S>KFDORWu<z^c)y;z7!(2Md8JK5-OTlT zhiwI+=ZVRuU2$Kzy=J6^2RQ{+UXRMOgxoj|AQwN}#MZP-}Be^YLMOvJyJ%QGS zP!xS0eLk9DG^$wma<7`YIv%o1wHbE!7oI=M?t{;KM*8>QCuWJ2$}CxX}PC~wks zz=NDHm*RCITH9>>HmktQ<5o=!5asca^1-05bXQKIu4}e4)SOlaZ`lv8dPpGenLUtG zT$#I|?c|nozOG9mG$fA zari5)I6&=mMwqa;KA9&=_fov_?&o={r?!flCS)9})$srmQOMP$xkS_Tk`TJFZ$7T8x?8cJ}17k*6KICE#rmS@P?TG+Bc|k zA*6ylEnUj9m!Vh2OS<^#EOf5%ez{p-fOqA^^tyeJ+&l0sc^u#PNoQMmCM^t70%62w zglkWmlEMBNH#}_Wx>2mmo*fm$5YfDU(Im-5vRFPXsNX@l$z5rOc`%%g65*Wc@gyD2 zeOE)KfRu2E79^%oL{`_XG%P$Q5~-T9SoRu0FxPE$u}=1#!4Nk9cu;E^4R)OTbCCX{ z$=BQWaUBrstKS2U|yfGi>oJ3Lm~p zM=j4k$BP3-M@xrCM2I3H<7FE!S_5NVrc@Wt0v-LLY+Oy30kEac#ur{YU5WF;{K*pI zqf?Gsmi4ulr4!Itho!kh3h}FlQTGKt&ALLLM?c+EDxW=pi0G2CYcP?edVRWbNIbiJ zD-I|O)7ojgph57_ABw|=mx_H`9s!8Gx<6?@6ItSLay~of__Aem@ifx?z`ie8!BiovVcW0#VM7Hdwa%J$H9c+f0tSa!PiHX;XDh3mtyjI>-&Ii3>w zR;Hwh!XOt+nhl(Kf;PfXo@LIBvhC`02g#zRX}=YE{;~if1`9MDH9_qjS>j8{H(^)T zDn+>LXOpco1UX-BU%pnuz-YX7ZKtcg;&O5fygNAR3$CHBN%wl&djg6d?9*$>igP)_YQV-x1J5Cx*DKY$J2nQ%cLmw6P6mB#gW&bW9LoQ-c z76+uopVrC!#vAoCHB%-w=0rzhnaCsEpy|mV?I8S93Cjb0Mqxw1$o-^H_iODRgz(p2 z^=9pLO!j9-XcF=OE@s&b{yy$_EC9Dn>2KtI7`Tzo9)$bpF`KU2@G+XdX~ z;Ziro`c_SgIYx}<8SD4Vc3tQ4!tdWi$C-)H;5(!0(L(&qo@I4p^jf}%stJ?jUY42I zDiX8?{|~8+r9L{G)H zKT}RCk}$iztkCi7d+*>gWbk4S|L&pM0{=vgqU!L$@zpm|@!=K<ZZ|M`=j*6U}t@Wm$=F9;F>tP68DRD~l16e@us~<$zTNH|gQ2QiYktgYm-Ua<&qO!bo7}F;umi<{O<%t}Z>& z^R3%*>d2XBPH=JU!;dobo7NX!t-vpKD2F;A)}5b9o6o}|Jyt35=iXgC=DI((b6x&6!Tb z2l!g_4SBNHLQtlj^*5Qzz`!ruEfQ&hnQtCA6hP5dt%j2Llec=K7mrUcy|^9_|DZf+ z0%mc~tkJnDwpg~nExI=MwcZG?yZ=>7v39UsGs}w1sOCoD-HeFHT-c;dJAxBJsr#W&);Kz|T2aMYq zf}U;o#ysQcgxghRvbX60yb)4O{)HGg3+fRFrFvso&c-flax8H-e&KlC{fU#nT^*BB z(uE;3@f1?0Y?ljD1|Fu^kZ}(>l+FVhzHy9Mz}DboIcWex;sLZKH@|OwrQ-60e!U&hvCZp zI-BX~4;d8wJa0Z~#ChRibfGBDP8RqSs*U}aLqCwdB%@2mW#s%pBqdv~5q&M5_#A_9 z3EJ7a!R9X9QwtK{#EuOCMM=Mc0T5F4wFi9jw7LYeRzqyLYVhkuf11wC)HyDouhoNw zffR!9;qKNd!j?E|poq%$&;_fo7RK$ROh< zq&i+%QB?P2#%fRkXhM;X@m#e?=3a7|Qw&*@Xrr*hQJ*83!?oqcOR<-MP=Ne3aLS4sI z&9z>`5ZWCbV8C|q(Z(3@?4tFLpIYDNA;G<79NL|_e0Os`976}5{%j)k8vNYd6MQ$m z26|QFMGcU}W0AUp&36e9<@2Kt<>kC}%i)EK1-izOgtRg4iw;PcVQAUPMsn}FYT-(y zvggIglc=WiRCZwYWaHZTb$3jn*AB&|1&$XN1^7~RVrD-4T(s>MPX-)kK$K-+t&Q}8 z@(sMNUSD$2p06iP1|g>?x{d2rkIX(Nn;$v-Ug@;V0+7=scyU&(4tDs<36PO)DrbWT3qj9oY-Qc30D5&Gxy!i-2wP+I%nIk}BKgDvU=z!(z zppAU3{`XgVha=r6!>UXT_+_t;0%`5&O*#?;^>byiq1KxH)L}8l(rn?(`ds5!5us=A zGGQ?$_~Na?gQ7*s6TACWq6Qkp)5bYYi>-s93M_{5sUa6JyBzE&pofKl#D>qUDPw@= z<*=SPd~PgRyJ;85hMxJ%-ZAC5znEYk;bmnpUME~7R^iMeChMSQaZPQeRZ?u4k+iAe z;?SB@jHaHa_Dz}0UWZ2HPsQL+q^o`{f-KI9rj@xV7A*^*Fy7l5GSuFvRc1lfa#wuT z2c?sFa#HARlS_^xKPk6cb3y>3_rjITApb}a&)yVM)Jad zQ8qGj-b+zI3>~#}D0gijww!6g+f3Wl@3>5}OZmhj>}9QcWD{p;t-AUx;MtUINOU>Gw&7L8r<5?l=7G;b z%0M=n`PbY2S0e)=yX+9FqE)T4aVjU!mF8~Ybzi~Q22`DSKnWv4vhN(?*NF7%-1ZX;8KFso{Qdr9tL_HY=AamMby2m zr{)u)XC7clF+sTREMNs;B=NC7$LF3ehyOSkE>$CiNf97@yx9XuG^If!3i7)x-QV7F zR}+H-DT`IMRG7!>=swE#FsNd(E|~XdN!()iD#ffhX_poFXx_Y^Ay#od3t!)69!>$A z`(ydrd~oMXt3nLRoJVr;8E8)<%=UH3avS9TzUzBU2Bbg8*y zSs`CgTWApX-Ae_d4klX`>x(M=Tji2Wr^ywk9;D*g?fBKb?~y>43UC4)`$l#*xH}-9 z`afkczzK*~UHy5u!i&Hqba$UzI#0aA@02;qe`;17iOBws`jZ6$Kt`>fkjB+{t?}(V zLH2#odP0B8m+0<+aSA;bMzPC#9BA$qcuI1;ev|xqkO@%l{pa%-ka@EGN$4`aa4Ga@ zZx@TM&i6E_fFz!;LofIgH|p;^*Ab7X+srzCMJ_^O0w&N2$v}@dBDfElR>PP^b;9hi zK}p}vq;(8CoW*fDMVpUh@b;xVV}XGO(OKD_kUYthN%fQO5KM5bdi7#YbtzNyE>3?Q z0o58Kbru$M;=Giv3g7-E@##SQ#(l7!$t?gIo&{qhm6jS88&(~}y;QVkaFE9qkL;Qh zeizUATx4DdK%nInW%}+QgK#7Yv^l_Tjb;8u#O(<7sSGKfq{aQ~bfh9)vV38=m8xJA zC(4Wu9HZjp2wjD>HSOIcJSSjU{B|ZZr|Jz6UF;0QQhgnRiOhfD_JD3Hk&e`-2L#cJdFP^Xji}&=TdKz94OY3+Zi}g9ct!6` zQ<$Sgrq`4aerNBVmrxFh z%&d zdloLv+PmtF!1l|xk(;|r`ev9I#L2N5ZLQ>Nb}97?`-5aCPzsN1cbGX?wY~{+fUi@K z8M74k+XvR?9~R$wcrPoz)gvVl{#Q}^f%OCV#GCr1G63#LAB_~WO63TbU9G7sj*wjt zO4>%noCxBGhSYqs*K35jB|NB&|HjoA-?CnGJU%^Dd*C z>2jH1NRITrMaa%ZnnXY)UNbE=ty~di=k^+_Jc)8*VXZqrmOP@LU!glG?SJd90J$;U z&H!)JPGw5gw$|c{bq(TVrwFNsS@?LtG8|IGAOFTzhX}A9J4==kq9lG27%mpzdhg^e zkreP~A*5Vba2`=ZdAQ_y%nxOZFRRWAx6c~XjG0WY^)8@?HJ2&dA~st{!@jZ`|5?y_LuEF zVy&ocOqyZnu@x(62Y%V~7FD(1FmNa;c!x`v|1}GMRDu8r*TEEJNu*!r&3|^IDV|;6 z!iCsB{FI@rxT@W?DdO^XN%!J`Nee!18_bfj&O#UGNKrIOdMJj1nK8@=Xd#^Jj$u!g z%JeSZ;~zUfbFH6zBbJ_sLC9!`N1!b_EWE_yAnN>x#74zUT}7eymNT19_|Ki#u8bam zZ10}L>84FeFg<^T>XHtaiwh_siS7nNL;5vlWO=yU=VPvMpqS{D!y9l#*-uDS{I_om zgABV(UU}Z93+_sk)HlMCf3Vb-uQ&m(!hZl$Y2~3Exd+b#9PO6ZV363-MtWKMPgH=o z(8FjVh}l7;)v1WWozoNRPJ0nHaR5B#`qb-3_d%UTwj#O!v`y7Azm{iyf&r*yJ*Y() zQr2E=08J9A0%0+NRc!ExrR$T3kWq%2nBf6Q0e!M}<%VB}@S8WpVyx!f8h3ZKQOLJ} z#hooVwE{-iYH&#-x0IijV?1nTVM606d{4p&xRp+4e9gZ{#*2gFZQIfvkXlidUJ>6{ zDFz`qg7|DRFx64?VO5()vjql=Tiw3LpqgUKxnw0uk?KyH*mpTCb%z{#X3WpK$9 z1nT8rLp{?dnNNO0$%oRs_5qYJh2iva+owGK`mnHwvL4D^uzSr#;Y2-E!n;8BXR{tADXCop^kS!;`- zw}yV>r~QK^ztH0Z;HW$!%*RXhZ|e542nyKM67{#IM{jt8{xV1VMcFs5J<;ZD{SnaA z5MSNsM3#T{SJB>>O5=rf^gkX(x^kkO;cgHhzSB$_`Tc=%I8Ma#{m=pPNxI9pur?rV zj$INP>~pugSC6CpD;9{KWA&Nmo^pAzEPHm?rWwncKbpNIU}j!42J5A-Z~_ULxH&FU zHJHNtkS?h~j?}M@xN~@t7nab6c!g?3#Za%~m{(Gme`G%S@^@ zblw8OeSuq+)PFLZ-fAT5j0|=Fyi0W^B#1&DZUtimd3_)Ico^Dz?z{cQS5AUriC7#) z5O<_Eo4Pka5&Pw^E+!d2!wayZoGzV{!_Jd5kOt=+kLH?t@3fk4Ko|kF*sf>x)S!RG zRG$b6y221ZKYh|S5tCjCgqD3ER79J%lipZ!qK@l(yN|gU z_a6bRN8!6q_-{4}J!VI}t4Uy8aGUI%%Y3NVC@?HxkJR($w90&V<62LeNy}B0&k>}O zzdypp3%$COO;V5@8^jE}&?rRgyh}qRNLu+<3=^g}xQkc0ur|0dfZegp<#YFjxf|HrfrsG?b>@E`f`w(cZ2Wv0kCSrj?0*KlG@ zuH(Vb*N*CuLN|R2XmgWFsXmvgRe{NQaPc}eOG5YYM@X*jQbd=_Rf zz>n1ta#xBvqf=>%usOvl#&ca<$Fa_(y>yTGG2qK2) z&$1|>?`k5#&q%WH#k-Ak2`XFNIG6_;yLbtN09WmvqGKQ9bGL*$a?6{Ys689NrpaYV zY}pW=cw(bP?{c~E7(kYnaQ74H;61u^o71bsnigSc3f4pOM`pCth+ga+v48R(1|ktF zy0Hk5bz^<~%e}-;Yc1e4FLi_peO^!owG|3VXb@?#iD63|UXEV|PlSZ9f4rD?l|R&6 zgN6MAph``cMjSK9AX!gA7x$m$V6iax!bp~GZt*ah4WQN)hCDThOM7qVE|;H51PrTq zfZz@zQTyXPfv~&rwP|JB6Cjk{J8|>o1B+{#m%p&ZC%>5=t!e*(k7JReGQ!sP?*AS~ zZj?6syX>ix;9Xw8{I>vxN1QfAyO2yS>p=Y>vnzC$eo~W-d!_Y+G@x)&)W*jlxlAS! z)|92&R^ld6^m~Ce&iec9qK#1lFadnWeL1p3NAoy3Gx>*)ha;8iOWFt~28~LDEf7TA zis+i`XcG(OZ49D8!N|N`bt_f7gSMaf#d?U&qBr~*wHS(E7A6!bebwi=<>Zh3N_tn^F^OwJk?Xt9gnT)bby$U(Z)Ip)$^ks0Q1 zP{`{h#ipNYZsFfS!swtbodHlb@euK?0cLP7O+3cgyO=9qY}Y4g8Fo!EBK z12Yv&yO*hk9(Jx10UzEmKCWBCEKPHe9ty>7c?0CTxQX(=>H|c2Mhk~r;(d8luORX0 z5Q_UjGcV$G6 z-beT16>1%_D$UlziU0QyaHF2-uR?YIr_4jI(c2z8NO-HEL39Wb%US%?JQ;)OIJW8Q zjzVSkbuIL$!zPw&7tY8g^W8G@FK$83M@2Y_6;k%6@}^uisa`}05{ww$(9E*PwYykP ze1jv2b0XR+(@;7V*}la`8XJ#pR8B|9>>y$Y!T-z{vHf*)0wvl&&dG5kB%}3NE#6mU z(!_Yg(5>L)d6x2Z$`&X=$v2tQQbQj$Y}hRszhD?eWp7upVC0=1W-XR+b8R#UkxTC~ z4KBy`8Q)ye-N+01y9~dc1Fhl{0Eqo4)lQCv9A10(`}3r3~k2RkUf4?7|FUZr#AN=>G=SbJAb5wE-pBBE(%uzS3nsHfp;QIklINP zDlu>SU~zTK$uH|g2(?N8`)aq#X&so3sGFfeA)dz!36L;lfGBI7N4Wn_f;?Q3#2aPPjtEko>__q5+P=l$SNMe@#Z`0U@j!L8c^A>SVv`a5Zuq(i}( zUSj`%V;9FMeFren#wfDsiHE8#2?UeR^H3tJfjR&lQmjm@KVtL7Wb-IKu`vNs<@W*R zXbM|`6|*q@NM*6$ddG}`$IGKQwBj7ROIU`l7GJi6hQ(ys};X*}_- zW0TSIga}=I$P?V4FiFz<1yA6t_iNpfRQW@vYBkad3dnduNj!N4inyTZK-g&inQ47g zDqw~faN;tX#SCFSO|A@$dOV+NIx2@t+{JQhfzK0}Ab2mu8MlTr9?`P*RczJT9bNmWe}NgR;4n3&7CI@+9? zv+UnxJpa?tAU}Dnj`>(a(q2K&m7dTSER^F^P=FFl;?_{Lw&pJ%^2C5))sb_Vd9qh%U`FZmbM-g8ZAoH7t8cpDu6 zK`Xz23`pnODj|9D4WpMxvYI3x`cWk>*yLb1=QbYfg_Oc+UMhw0lSVsqgG%#OBiDw- z(ruY@!zOYh#GUl6)L?N8(Kp_o>A3xKzpB(pGXsN$cDt%l7CnaYR1-Yz2_6U{4O?cC zl>hX(%W+6#VB*f&Sq^pT(LxAVnA9PvD-MNXEZ=KVM~)d1oxY)5UDLQ&Q-{DoxDr@SEQiBf4Ww*6?8t~wr5@y}1%OF! zWb?Av7LgOv|8y$9f09RxcE5nGwy06l>};v&@wb0v&t~&bAB^4&cu;D$kyxqEeRBA(^ZDh^_>jU*u^zz1^rq5lQ6!W7}gmpL8J8> z%!lCx5+?mmm=9B#m%5MS0y1ofam;KNafSEu?;2uC2V3sew*e?$=>Hn!yT2Ti>a6}# z?08hdNB3sUiI$I63=UKpC`8n4t%U(ZzDx)Az}xQ8P^I_X;93x=)0iAAk=wS2F5>;X zFaYM(Yl#3FcR|WGpNq6GwRmsDx*SsHQv&>qJG1r8!h*lMkAB+%ebl=^Z=Ke7|Cp)t zQL?mOVM4_bq6AiGV3B!+fnw|kCEdkQiwGO7QtQ>JpN|p(R<=jeCiLn0+nOYt zrTMk&sN3HPWnF0vzODnB`m^3(Hiu@#n3pY6`qgh}Za!Vg!6A*@N=(;aajidc*V#SI;cHXu-cOy4?K2Hx z{*a{7a0!tz(HB>MOO;TIM>GGnqW=(9-$@{kx!@8>K*t;(0L&c-3g)}2&l`5CBUIE> zEMrj5R#WUuRti+l=d>|^TmG7sJBP_86-AJE-Lq@hzfIHsKr%@jsI0&>uT7kRVP2$V~5oHp!}hHj`$jc)e0 zkr^@usCO~E>wCh02ea?<&!KBjh@Y5IE-`I+Nadnv9eJLQ8)7s#*?rGh?pZ@emGtd3Rw$S^G~?+2rEa=0AMi#c@gUc-q983wO%3da%izsJq^vg(1v=J=8krh@o4}Qa!2aZDDlo+x$ z$P=gy0)FS15&0j<=aR}jb09-bV7KqllK`-lsj?&p2qf6N3n!B(=sFHkGsg@h#4@~p zn$j68H={UtV%2iXBv5vb&wT%oJ>6_Cki%2jw^_;+Nt(lu^`W77|7AMRO?n`K>mSUD z$q7vPy2rqkp2=lmL;HpII^cdB@l=W(4IPkVQ%nR#`24Uk)>ry9TOX`G8@HO~w!@A= zQ`>?~<{K{n^?XIjrI$rk#z~8VLi3G52O!SI=XClI9*+7}{OUHYMKH9_##yh$BNk_b zmU~ayLWPh}2P9+|5J@9?N0-;quFf>pOCPwmTCj<$Tbj5_9xI4h#BG}agWBD14(G?p zPXXvhsR%NdyDoi@R>_zF2&X52NBz^*j@K45&~I8u>;v3Ka`2t)8ka};TPOGygyHG* zf|rey)O+0fk9%#r&y#bh4XYus-?2uX>m#jZeJkaT^GFPqN<~J^jIiKp^O*$Dy7FY3 zY81b&b`ZUW9$&ExFAMtyQ4s(JrKd9kb1hv^*>#(B?%>}iCrPy=&mAezi^6QS^sdY` z^rs*RSn_5^w`hk~UBQlaxKP^|Vz)D!IJJw&gaF{-p1yE(a_my{q)1KdZ6B5*Q?wiq z+Q_vrY(`AqVZgm!`WyRpXdrnDItuH*d5g}W)R)1+-ov~9N9v+>&{dLicvuW~2&vnK z0E)QX==4J@0$JNQhgluM^34AWFsG%C*?qC`*~=UTlo$d9-pYj{$VBeJu{w0!%ND`n zr8;SZ6VUUFplG*-dNyJp_%YEGFt(vDN3|kchzNHAl+lRjHoPKctZDR&(H8n#qk8sO*2#@ZF1JhcvpNHTnE)^80 zI*vx&9-^mi|dOXm(x#Cm;o5|#cw3bC_*zo!Q5QYz`VwG6yC2?2&{Kf)b)SL z8?a!HYZDW-Kb_J1Q_#uRFv&1!8RAIZcH7MVL?_64uE@`?v*_Aho#9=r1;3?_R{lrY z=Izb2O|^6q`CmyJLUw6{tjOEvUVri@jXX*+1FJX(<{X*(lB638^*u7ERbb4d82;sY z02lczp4RWg8a_K7WF5B;let2@Lv(Q6xa5@17Gcdz>)m7I@vigg>+EqAia7K^)%|{o zQrd@nG$Ffdz|~XXUF~uBNM91K{X$CVw5~Eqq+E13d7ljHk?b9MEuvmze}mJHl!||D z_qgE?mU)owfS)9Qk;`>oO#BZ5B@m#~czU;b$mw1$xaV35bltF1RZmQ6Me++`*DxDp zh!fDEis{;s*IOKsIs!MTX)D?;C@g&gZQSf8^jS!|l&UE<7$PUvOR>jHDd^H!#}`MB z2)B}@LUJrZD|JkoOp*Zqu0aL`5g&+*_~axo1&iRa06RcgwB7|)0Dq=m(@;lN(zt1< z>u2^3jiZ+w^o(*l8t~R;U61!}Z)uumJfNcgfBg8d7Z@mR zGR&ZSCY;b=_rE%O%cwZlWm_0&+!`m)SP1S33GNVrli+T_-QC?oNN|VX8X!2NaS4*( zZo#2xoS^q*t-aT`zdPs;gz3Vn;KZJ*m~;k^CAkn^YffXHauC8*X~6+%)*-Su}tRQp-1_#@aH zL5?w_ost@BDC|;PLO2lpo500HL(wS#Otef66%K7I)e)L`ddJj-PR*uzX(dGA*tWe1Jaq0(d=h2U?8R(CS(+Jc#fg;`pU+x=Nykm~B*%(Fr8$ z(8*2yEw@Gs^Ze9M#mIlxBgktM{trNXll8q1=c{qZeVG9#1#iV?V9a1A!S9+>_FNQT z9&?`56J}dCls_$Hb06obnmUfSW@P?5Lid6D5$8fG!4SWo7}Dk%alqTbV8tEQmgKrS zKjmQvn6@yDr1Q8{`vG&fFt3Bb1W|<~48jigjOXgKvmo2)iqeDHZK`(--VMGNbEIGW z?*#06<$P@`@@LOZuLO7~{W8XQviTU^y5t)6oZeARY?!nq_(}5ts607}^bSr(;zFn2 zDgn2w1ZT$4eZho+-vJsr>x`EURhL~bZ_bLuF_Am9X-&=j;7>H6165N{gnbKq3WP^h zU_Ilxfb7MhDW1jTqSxtWRi30D9rhFFhz0*%V$JEy8~Ru&)dqX5F!Trc0wl;2Qj($3<87}F63e-|j$b3EMPRe@uN(zrFcZ!{2I`=GLwED#6S ziT=Xd?=2xWOwrNPW001nPrN})vHE8s##&hnrE7m$eefFaoR={_T^bGnlpVT{vQiT5 zzW*GD`{y6|pARJ-e?j00;$Z!?XE5+i-6XKvK(}Q&GEB45fF2obnSkbdEc#{ahcPw5 z1{A2NVZaixrNFgf)^HkX_o~I9iPeV*ls}2?2x6R%wS@T>`RybmRy0yTG9a=ABH^bF z?JPv;nj4X3()IJ0R*oHA_u-0aDL8hHi^Lyh>sn+K)ZT7T9nga>Ge8?~6;c;^q|Wv* zTaaMP58KXdtI!i=eCt$2hkg-Wx>haO{mt}MZ22)=@M=&KxgqUrGl71eTnv zeIO)>K&|Exh?YJoV2I@o{x1eB|NKDUK`u8B^RXSEjl$F5TtU|pw^JNTLn_fXSy`^? z`rfe#sp0?{XLpnStc%w0u^wZm3>5B2 zO^=mt{y(Odh}q@-_YL&JIZ^)WnQ9q0)+mT1ZOS!}i||RevZWhaSD|U5?)TuR5f26-pPCoTBL|HSnYq4EbPY^x~IH z1n|}ZfcBh(8Cco;+7}*#84P$qTq<*lfpx(u>E;(y|=S(s`~kE{fY_JpKd6c-0-wrq4a4HUdmE+6Od4FxA=aF=g?9B-QsLro4Hyf+v z{4_QG%Pz>a!Ijie9!opv!RF!OmzcU0hSh#*zk>shTA27HtTUzka#=q8TO?eYFCfVG za_RDfdI_1ZN7-{p_UQQDA`|xeVp{NMV8NliavMMIUMb7tBd}FtM^AS++vi~IjJq>U zI8OM7pP<>$ryY^i{H&(UM!N+KT+stZtC@~=e|vJ~f-leai_=b;{ZY-wve_FS?v5gf zfIFmxGA1TwGcW}ALzSS%luu2w>YdMWTk~_9t+An<0B_OzqS^}3BquX7Fy|vERZR?R z(242swO8zV;w`wf=Z0qZNwRLSr zqrOFBE@$$9T^w)p4?BcMoD12 zp}!uyw${$rDe-L9LlhX*TPl%26JQM8l=- zyK?Q0k+J4qB!iF>#e^RVxxy<(90~u9-+wCS|JwtN%U=b?`XA_vPrJ%?u(TJ@wtJA+ z&M?3FHApx%R_SLA7fHayfUG>^GYIfe8k;9gF=5SSU; z321TMYbqez{X=S?LJRZdzges+tS|EZ@-i%zjQbr!d0?H^L-UQxD8KHvd29LBm<{E0 z9_?)Z>nD}o&9pReabIujTZd%Zl`wysA&sa(ci{awbC&dq$-UYwuHe6N;t>-ot9su4lV5p~Xr04K3zq<{!B>v|ODFcLxh)3D2{=<;D=05~p-EW|ogWsj4|r-11-Tl~+UFGH=PM(R!xndzAk9L6Xp6m9gA%?po-j-Lisx)4g}`!`{nsvz;stK+2m#d$?c+i^AVe9h*Q(z#R;+@Xbk1h}E< zaT~h+x~gMTR}pj@n&kUqYTK}O%!2OkcCcSH`eSD#o*zawSM1}VEJmln#_{N}YcX5X z|GR&8Vq?A?2cO2`Omu`ea`cZ`6`eU7mmX4l!}Zw0irc0?#U74O6T>0^& zW}_@&;fX$1FyCo6>dPnq3>}(ogncd`m3&Jac{B^Lf3#Rv?l*c#e^`y=T}GTRF|#M? z+DAohN8tFVl|x@2HA5SVSkSYd(Y=m8Thbgu6C+hAFE{)W9`JV;dRp6xPS-w`%~CMqX&bLwuTjT z=NzfHqip3L9=V^gs>SJj^}QrBJnVI~RbuSp=CAGgoJ|c;#7ADf%u0W!dOO6XGTc6y z{_e%hYf)7fc4;YTaWP|DF8@l8Uz_h($K9xx!Rq=0psRmp=8nD`8Q;XO zgbVf5l)^k`l4`+4QMkRVaX;)3#>QHiQtff!G*~1{a@c6_<#PSL?CFM1)T&J znoh^I`||CFsE48B9B{nsG36#}tZZY^Io+PS*Nm7eIz!`Bx&UZm_KT*x@nuzejm-?l zF4aQAFd>+LKw`>~fKszqDXr|}6IV=3B`y?!ifLV5az=~b2L|_I=A6qan}EHyXOraV z*2vnfMRs`j`F?jiWvkK8^@eCy{N&n;gE?fI@!DqiFZw3o z-|joNZ!Aa|O%=hJ^vpl~z{hZWrf)pWR7axlfEwG~=L3R1Bzu#k$V)!1b&*bCJAcZn z@2%2SICe%za66~am#j&SVCMoxb93`f)dCfj6r(m~mmMF207;czvxM|atfZu*I3Ie< zsntYFgZ0C4zefLj5~FgLu}p+cOU>&=(kx5r=gsTUmw`Ar4uRF3_pe3h0Dr{@dnTU7 zR!WLsqXz6YQd%zGSuS;&>;=4^?H=2sOP0$zsI(r7h#{zWQZiNnbFcX(8G7gkE-Cla zNKv4_!3t(iMS^^;QD^EM9z-k`OSoNqYv}6l5B1BU!2Nwv{j|!iDH;o`XrvLVnYI-( zqgm_4+1{8`Ucd2&wOBQMFWFi9UFQ~VyC zzPPxLJ{mX*QB6IbB@{G^^N1Zw_w4W43621C7+lfnaVV!fAR0imY*>R358+43HZX8p zX}|2M8xtvq?R8!}U@fOzezWEza6>|(c_lJ^0u5I!c3DeOPKL_*ZaLhH*g3BFb*7l6 z34WBU(Ri&~^D`ei$vnqBMaaa>m1z0~XpI{?lT2?pAyelFoIK%Z*F*TCsTKG>lcf0X z=nBMG6hbn{?pWQVh+HxKEdKLL-HAm^2Cv{)56HhAh_KszVcq2?kAe7qiL8Hr#W4h9 zQbQc>CJ55pfOK`UIH4Cg*>(xhj=yNVjyB)M;`koB`ywxQHJ{^mw**mk9Qg}!d-0$m zbcr554i=q>%RvHPabaC&=X__f<+dl3!7kVlt1I8EF*rMK2^zvZ?vFVR78(s*Uz~-e z+Am#mXmfz#?NIStzbPomB5>ky{*($+x0&v0 zX|UmTsCYjxjvixb=_~P6uLD@?5!HTueO2ZJcwIaT`%#8KxV*d!2M~osfi8L3dIi0_ z(aptCeOm{#un+`4ozml#4g-?}v|&pZze0FiZF&~)Er)fD!aS!@i#mC#{D0;xc%GG#>C|1_An0 z?Ajkufc>h#WFKjma|y6VT}oQ|Euun(2u?h;V7M1AIyQFb)s?5Z$Jq{kD!&VZwTn$0t-MoTG|0EY39r9@Ap!(R-DwI%-gn6pk4GoOs6x^vIkSy)&D? zK1%#qxxQm(-P~;FT8L>qd zbLuAxH6?`doBahxS9JTB(e$U2leAYBM4O5^BHcfw&!I|LxehDEmR`A0?_<;rp0T*2 zbD%LJy~GGE&fB@_Io_DR*ko!lW!8FyW3@`F5MMh-tA&ousGN!PXZz%{Q34kp8C)ht z-a@?Ed^j=v2_E61M|`Wn0a#}mLT5PM+ z2Ezb8ZVN7SYzzVkLQs zF7NCf_;&gBPmKjBW5Qb)x1tTNF?il+kqV4Qg23f6(IYb}|y=#%X z`d(V??7`&9t<5sP@`)<&{_dvyHXMh-Xfv9aMU+JJ6OCS>G?-&$qG;X46ie7g7$ZUS zqKS>YO%ENTHXH$X$;G*+1zBcU;LnVV^aaBPd3GRn!S$(5i8^ydGxN=%&o#z(-U=xp zg8t()eiG@K=5=W$zY(zCULps>kH)3^Jc zhH*9eB!97=7ol+jwKr^Ar7&T1e1_QZ7-B6ZCiDUF@3gRLXy|l_1iTUZj5`E+p3Dh} z0LXGgMz5i{LKXW>`|of2FOXNzym;<6%wUHm$(nJMv~#gf{d_`g1QA9_Dyrx$JZtQj z?`ju0*7Z3+9@kA@DU_JMj%2K0RHsVSOsMer0{-YR_TvYy+Pnw_K%BggVBy8L3aQCu zi*{I;oY2;zHpV3Z#RV0-(AgrUVxuveO&w(bRDoZ19`{jbDGJ9nozme+auT7xi;RvB zD%T8StwNu~rJKIba@S~1}DCUTunr`@7_oo4;wXU!ld35pf)!%S=H=>D>i#Dc4tY{Y%6@|kkIETU; zqkHOMbyAw;QG_xzc0&W?udtOsdJL`z}Jbq_@ul9QUN7xzVsmm@E~)G^B5GXdMUxz@c>zF&zlZ{@|ETEJSWo|;yu zR~B+dkTmV-HIykC3a4+O3b-BY_ims_o%w3oujCo9hSarAAo{bC^=xNaKmPRa;_yRQ z{HjU1`{BZdk3$n1DW}bJWyn;Hf7!`JPcAgXF4#!&%+6syz4-JC`H&`KQHN^~v!U0) z77QbM*kWkhLc&T%^%t+lYX#rB4pVVCLW&g_Xpi*vo??Y+FM5^jTcVe!xiT;|g>VWU8CC(O;2R1T9l z?$A4cHM2GmUx4vb-pwx7rBuR9X}>N+oy;6Chsdd`;mc)UVdxs8D<;W#bvzlOV#a+! z1tF51PLdO>UPuvc?SVcj5n-g`59CiHP_!>I+nwc;=zflJ@1OC(-+yfkGUidW>_i(y znwd>Yk7peI23Ndv^KHNiHVbIEYRb>6#&&DMVl*0=Z-Cz|w|n|GNz7426yxkU{RVf_ zYGDS4VSU}r969VSE)H%iZqzH-8pzmaS{}k=bw{tNNGYk~5E79t`<`?bM%1 z3X8~hsB2n)fp2+TgYQS|8V6B+XN^Rt=Tz2@nE!z7ZsG*^!uaPj_|{$n(A(x83K`-gP>=I;YNmnWx78 zo)a%BuvKS?UVd|4@u@W})63S=DxG%BX*p=)t2Z>!)f*c)GOO4`5jle+U!KsInuI%F z5Sv^r`M8$~_~h#^xbI0K$RtpOJeepSWYBN%(1Z==8^PMV9E~3Q+XWGMOf(~mj+c!m z_bIvaeT+ZNs{Veau*u;CNzM$Y=A8z$U0DpqaEe=%*2O%p?0(6S0_#F_+pk5RC02@) z3+2075A$W}@WsE0Y3&O7oA6rGA#!pCz)wn%HRYn=-QnvtNN4~lu8Kgw3a*Zzx-|Q5 z0sME6HSr@!?pVWxeqKz6a{)m0z-SsXkt+dYncCQTy;Y%QMZB}yQcvSL7aLDruE7-n zf)?s_^YQb)jY8xtps;X{UjwtHdwF_yt)`%g50hxTts$%5amM%UaH6xl_9fI4uT8Sj z(yEiB;}b9OK-M3IirY7Oj^WZ~PGC|2&G$)|Kp zvoD;vBRCz`4s8N>AjfE)M-fue;O;lI_&K61`f`giuwCLT+E{OrZ_~)TV-+b@SyADB;KrDTwzjc*_Ix0l^D+tLdP&W7I9E5m2SC7gi2Nm9z7PaCNJOmKCP~($ zq>#3RQe67nh2y|?<$OC!pPh-r@I-HGthwC}w3t{4$0|q+e7UjxBP0Uw4{*DbT@Um1 z{Cev?r?d4m&Udt_JhL;1f#gPWTAYloYp;vI^>Tdfy{@{C0;;`VOUo26p)GYDUY*^Z z^MlH*Sl*~PBTfZd8WZ>O@J!iwDLVu>x?BbV<(Vtm=y%p?VNH-d&@|cu!pIX{zD1G( zL0=5alq!SvcVYL7q3E8Tp255=JU3S}iMYf0z%nP4CN)PA)%7;MzX5}o9o_00EXfnxk zzbZ+KzFd}^^fA3KbPNrTGL+wC>nX}1DO|~^pfAp7a=-c~4;&HGA?!Oai@QO%K{GBY zHcct)4ly51&D+A?&l79xhJoA2D1(_@_a+C|r4%;Rqrw7jl5D$$XXe&OWuC`Vj3H~5 zWct>2eNv{($tROFE^#D#HKp2#ODQM@^e$Q2upCBHF}6q7^tr+Y@^EnhZ+VwrZSfRG zU=LQ(Iqje);<|p(i70RL;V{wj|3s0tR$+dL{j_uf^;9 z^Y&2D#33!W?>+>tQsb6+TNn3kj^Agm*1w@>l@T{G}N`BpA(q95i_9IIY~Zf}uj0rkhhaQ-Se?Qf;W0~Tb6g2;Ot59Q27^p}5! z8kTb6Pwca<$QAVP43Px%z_@$5_&+#@-vj=>JnbWl6gAB=8|1053TX&wgcb*BQ}?n= zX>sdgJX1Ig^F|_&xJy@hGE-lXp+J8!h}Srxi@5G^?GC@U^`IE-t9@g!Mf6y=Jowu)T4OcH-B?B-Exn|g@)723Vkk@glnkVxu1`W z42MLe5jF_e@t0+RA|@KGmdH_Y$V-N{M{WcmS8Hg^ZHY`o9mFWzPk-mZU(!+Pzqf)! z%=XfHatnZBa6R#K8auE6Y2~wi0?t!XHZdo$L=D3=S<8{J%)u}$QtaC7q;5`jnfyT0f+I0MYl-f9+(7g%B7!g=PzR5hRmu z+Zt$Fm(eODBTa2S^X|KFg>EWbNnq7&QhUQ`0p5o9xyv#s_gk0+LYB3FO-zX}Eol0m`*->+0fisq*eo=3TVlRSQKC%k@!P+iYkAKr4O|l)-qq_C@-7nRUMW(-|moqY~e)&-cZpq(F`PSVWfpha>u_`82 zzUq^htq2RkGy(#MPgY7?%~hy!fMCK)WOa~As2i9zVMvG7fx$C7wsXwV)+}47GZP~es;^b5B6oJaec0n`ROD2eaHRRiJ%bC z?~D8n#_1$VwXceDazq6{jHMXZ;x!sPzE@mrA+*owvdk6NHK$53qgt5I-JQ@yB5|Ql zwNf}wjn~%3JNyJIsSITj#2bx`%~(?maXE0I{ByI)lcvbA6E7G8i(88i0^s9#To%bj zMw8`1=RGxR-L%i1No~o=$w3bygnBb; z{nMvs6Z(807xPZ(h0w>_c(9&THx@~qG{T71_EqR*)lKo#6C}k}Nk>-&>`%$bEU-yQ z_HipozQuvWYN%b~tHhCExUyh>Uyt}M@dRIyzyH1ehs;lf;nN^!dXp}ZNYv$4tqisL zIWR;Kt}6Nv>s4!yS%hf!-f~4;9J&LBNK$?WDu@@#pA8HQXh732d#bNHYyz;N1Zw4} z=dT1oSQa~vc+ryUq+x~_`8X&Nr~xyvvDL?lrv-i&i%f`7ts44^oV+MfZHNxr;Su(0 z)0xSrGZ!Dr7G50{gnhEpsL;d6ahNp{Lc11^^Ht33ulyPZCg|_)mDEO|2A3dEFZ#=% zV^hD`7OJ`UMond*BG_8c;iAhcg7(IK~S7LqUP*j z!po1{oYFK+;)L*%i=bc^i&f{J(wTj?RB-u`p^fK%HS)eckRom6)a@dN_06+9T-(=g f5CLa0_jYb#Ly@q-8poKbTf2^)X-hu z_Bro+j{2Pc^L}$(GkfpZYwfkxoxl6O*M6^}^aB4D*)0?l6nt5kXRlCD@NiI2(7)ke z0smal>BXX;+*YuZl2VbClA=~|fV{D^F+)L-c^{*Rt)<#|KSlQy4F=9L3ArDNgpouN zayV?hhqUkHWY7YMUQ4Q!dRSQebEo|=@W#m&I+RpOr5>5B*?{;D5O)qPrTJn7Wb)#))|d^0DH3Oq1G;@0}~@N zGer~kz;%5c9GB3{qxWgfQUCRgPI(}04GId~twr8+y$HO$rzpD&ermDUC@*ih9pt#Z zqBp=V3A-(XV;@6g{?#s;C_862Ml7-V0ehP=3U%JWS?O&Q-ddXeE>cqoN}&(;1!MzF zaNE2IFP2x8?n{c0nvh$duVt_tO<0QmFuhdd0jos!_djUM)jGc=D8U-Hx|@BIx_&xG z4|>7H`Iaw#16CUGz^sQ8qFl=OmZ?Ojjf&3lZX}^Ky6>WiacVsg4-W~=9<#^DbLzF0 zBvgjB*UN$>b>3pnzu&)p$5Ca5I}bhyWw>#sA{OVi@LFOqBa{l-g(VI~I8#2XUg9+H0U zA~}B6v~xi9&EGkcF6k7NGSXA;RDxCVr9>LDOA>2TTSu;|o^rw()fckRDOtzQ=iBx^ zFO`a;Kmh_uDOAFo{Ni`mm14vU=z2p#RL|bWneRz`QM;b^cq%aOkJ5y8OKZ5vGy=1& zK>tX8S80?O<5U8R+ZRXRkO%!S(S~; z&c~r*MzXi*un48zJ$L~Cg)4f%EyobDd?$~1T>D&8EljlmACA}v{!JfwKYi;&g=qZd?Ild_9xn=$Cc#igT z0rzO?Wzv%;W)IrKO?aF#f|G+c(?Ylr;;S!KtS5~cY63*1*4C~gyl8A%gLOx;B+cPPbmYh*p2|3aPdqoezG3#eRDN=%9Jb zO9^Tm%t&eiI=}Y=7YfyVW=72MxT}7&Drm6*d}$OmXu3~H#_^8>+y^j}{LAKvs?ltH z;dVz(zNBg7b?B*gF=%Lb1O0wHW_U+$5-#xArVShSSxe*tI$T+4ZbID85^rb^B8g1t zUxnE`&5iW>ZoEwAfNCj)q`niWw9L1U+pglQhRr8gFcA5Kd?n4;f#CK3w&jwLkDTVmvf{&7p{u9V(njltq=@I43yS#yMu@-E#j&JxbU zz??u@NhV|A1;T~f)AE#o&}X!fvgHi5t@ZKGdT9Ho*=cO4ku)jPm3OI#J!I&j#5z-! z6+`IuWZg1ohlGaMhwO*cUtzDKYk#znbYZ}bjP4|IpsD#(7w!4MGv@NcCG4|oNrvky zS|-UZr!MX;e!0gh+?fIUuw3biEO*tVui7JSBivSON4Jk29eqqlNPs20)hSQV0vGDQ zCJibMtnQ48EW zPp=mmAASyY3FZpsxm`t8`QZ4$Lvl=V*^grnlessz-;iBAG=EscFZO2IvW0Vx>uzE` zzo6|twtQT>@k()*R<`y`R&W+g)~#W-tb(l6-u&Lh-dnxL@iU<8gtFe@Ud=>MB2+hC z>qAAn<;&N5iS+GJV3=ji*nZNA2a9#yv%DaUBaQxoqkOk9%rU;P6B`Tu27Xl=W^myY z5qP3V!@Vg%;_*YA+V4yjVP$>LPH{+;znKHa6hhwg4 z=p-_=CUpP;pW_K*V+<_|Js6tnG+g!hej|#A77q2G1waGA69NnT5rPkQSnqfen%@6( zSB-dta*Isl@#!-h8ah@}>F;bQca@v+R-ND0zU{juN}2WOKDoxD<0sSXdiQ4q?o*Nr zxr~l`+>N~(_3m7TurpMT#7(?Q$VzS`(!#_^%}M%jIQ}IU&+EQ+ZWG3K(xq^o(>6W#mID0q@?^uzQG4 zpFbrM=QAoK+8KXtZcBRqf^m>>kC9S_K7%0(sc_X2zO1?LG308wIb4gW*R0cwm!4`@ zQLMVmro-eOi%f3dzH$qOe#`yPr!dzu)WgoDsmJaeLlRZNwCUwydK|Vjo79#hTC+So zGTll9maSYqG+pW(W~h?g&XgWHW=c^VQ&^7;>a6U(dxqgAl|pbi2zfdF#QT*<&q-&| z_Zv<=;|)Jcong9RovbNELy;@(rz*`HJi8D5ANKMnn)fB1+1)oEi=FtsRX)+{*t5nO zKg_65_CROFiJG&_OzBdzveL689+h^>Y6J!u8| zqk7kwMM!(gVko^5eI~tOEKaQ6rQ1lqnfM92qTyt14|w9+O4~}*3P|&`@T*QvMXAxs zlKN}S(SkPx>;_jZy%~I@a{6+hg3Lnod@u(dhqab@iS1b}#|mr(#C*weoDb4#biKO3 zZ^73DorCjdOv`gjhqD3ws;a_(TkdH$Qu6r@~7WIzqqjs^{3$5ML zcH*;W4obX~p_EuL9yg7v=`%M~bCtCi@ozVSdy?mij+%q1IzELymIwRuhTc9EPT`wK zS9*`p3-?a)E0FMPZ5qwK1aMlUK#nGqtO1kZx19@;Cf|LFXo99qN|b>p6r(1`$If*dp5vN?qp-?5em8;Y=HD9@u_V|o zv%``8KA@;?oQ2^)coAZzCHqEE5rr8z$3ekBB}2gk&QO7`7%KU{&!3|{ zK|%lPe>4=7x0Wav|ISPKc9ml;wl83g3X+asa?S~wvIxsqO^at5CYDB{>?#4{Z|twYf)M) zMHOl(h=Un5KRYKoC#~2mYHDf`2h%q~ubw^spYFhwD6NH)lbsL;2NVirhw`vP9Lzbm z1O){-IJr5vx!HggY>sZWPR6cmwvLbg^^pI4&NDN|*AA9;PL>c`>YvXwHi0-hiPF;k z9O&Plf6dd()$%_h**gBuvVaA0{Jg`##m>p`?`H#DMSlJ(q+;o6W~2Sg5)9A`7(+~u zpI_v!_J6zcpAmoQsr8?p+ydOZzjpoQ)}LL~9nBo1AYfojC$azV>wh}`dh>reig5f~ z`Y)vT7oGq57a+9QEfJ1?GfnK4n+^{Zz()$pXUb~85sv2F>M;=H2{{UrTKJ>CDEJjtd`T%|!CxafFN z!{Y)mrU9;~)X=pq(qJiIsrI7{?JB&7DeYp4J<*a95k$9qbIBmq+LPoM6@5i_*0UMx z_GT4k{$`eku|DOXcc+a6CL2#Efr5(h+m9a^;k(1iXA7OtQHhC+f`V>QsfH@(=HF=U z{;lb+Jx#RJo(4RVRkEd%mHkLZBPO<$^R2x-NZ*i3t}xqMO}$Oiy6Ro zJIPt30~Z@^ZZlhDIZ_Ai|grPZ=%TF-q8s@B_bFL4Tstn<^S=c(t)Xz z!WtEwRju&!Y5Fqhz3=adF{7Zg%5AETG^JXY6r-f2S{Jg;AKYW0ra;OXQU2{YYA}gv zJ3HeA*-nuZHo=J+qHvT|GL46b`JhDA;{QsIkG0-`7xTV zLiV++3{Jrly*bGij6gTo-=;26fg6a2O$ADye+|<{8oFg?@WwK%_9g}f23iSo5Cs0e zR{9qSRD>~a;o(V(L%O=U*c(s|=s#KJF!ik|HnZd^(-@<*G{gJ3|GvUniE5+bLSYBq z?z2r>(KL;uB;>!e^s?T<>UOl!i}4UiboYDBaEAwTO7IrwG$= zf+aEk)0%uGZsU%d@SUAu5~UJy^bVW1!ar zu&!n@0^5gXp7!JBS@RE@(e_{T9@36^_4}v7WtLq=K=vVv1Zg+r-kuE7d-bgG} z4jO{MkZNvTshyXQsAiv*E=2va!vmss_Xxz?x2l&{S7igR-Vc>Zt1@GE42UkN{W+sw z4S-)Qb6ToCrGV?R{R2EYrku!(Y`z@QpKNkIdcRuxXV zeTi%ZI{d^}cwmNqToEcpu2foDnhG}o#r-Q*7PgA_bc`Se0&*&<9br%XTsj4baj3QS zuT%SLg?{_DFj|{y^TmT0=<km?W0_VMP_~iU{lg+-xh56etfw(xr zRk1#QvQ7dRjy@>Bd1~;rj_7G%24JYlM^t@+^iy7!-1$chLqQ`U5r}ouD=jTmBv}ZK zN*QazM?+LZaH1}Rh%s&bNfHUs`#4yE&J%?!Po6{sybTi+0_%X{0x@#25lb(Etz#wz9JmWMHT6il(Nfpuacsx5YJa7zH%V zRxm23INf*Fb#W;)rR((W#I8|Hu0H80>!7WdWKaCl_sMIW5qMkA=u&1`LSx z*4|>%pN4M4*3U>nE3RJrZMdl%;HPl0vtbc-Unadul0^S;Ac^F=-+OzbrQU|aHkxsV z@-1_T2`fng1JnZox+^gKeC@zBs!5E0K7xyZ@HUP|9=0{7)|^_TK;$u4_y33eJ<>Ni znY|rJYacRWp)!u;7oZ-veLlG=Em2)L!_`Onr#L+z1cbgmCZHlY5h}^eQK*B7YiymP9-TgpxLD!D=_>~chu#jWzvHV$5{TTa5xtI-L6-)qqffngIiF)n-25Xx=Noi+i)qq{KO{D=qK<=l>Y= z6tg8LpyDl7z_#8LAf5?n#WGX;Y`uoM9fDJ*|CdeCx+~yTDszk!Hu7)&X=jB<0{N)( zqm!|Jx*n~XSi7~S5&}uLHvgu5)cF#!sKzr{3-gP2w-Ac#k zB<1$ny#zi*d-N7EGzMzDVQBEu;#MFY&Zf|Xlti@-1RMLG&Hh(ki45Q3;K1#0 zs4Fv$2cN~?pO<`h7b_5~E7dHt&W*Yt%)6?3uiOvZ62+HM0KAykskG* z%sx_W3idzR`1#DqO7WneAcy52_Sj#&;2KB7A$iY#vvGe?@0}YeIiYRB>@y-FqUqdb zG7JFQ9#n-B|B16v{JIE>{fvZT^(#@0g%Q5HgRdu)Kxi!*p@M%-=QkCQ$j2S`wR6{b z4WiC(o|i#27QLB#B`whB8LxTIdOqpzw!bY!VwVjAZWGWfR3(lP_t)Z!;hjk*=en4<=wKqZb z;EiR8&rlN0uX9(c@x{lOLSHXE=?D&RR+1QJ=K5znd8h6dh1NnwR4G1tknLZr8Q@Y+ z953dWLLuThUW43QdM*dAWZO)B3T~JuVL^0uaf}JZ4Sey?d?>cHHR&msB>daxQNAE* zo2h1BJ7eQ?Rq0c$H(`z=$J;vULxT^o{xFz-T>5p96r(PbkC*$-ic$~@0w^}`GmkGb@}j96G)TBK)VlLo{;=CEANa-Ec!#A{zW>w`9)8&mJ*q)NBVyDcscX;((^urMez?>@rd+%SE zGxEXrgbibAtVZMZSSLV3n6F*$^y9FQzp?C>KGcNu5}ec&R8$;fL}Rj3{I_k^YG*QN#{O&j?P@GidH~5@_#7>n6dj@j(qAQgna^bGGqC3i*-%G`0I@L ziVv!?vXbG)U3Jpk8O3STi%f0p9i8!!Ycgv}+qB0_{lP1#MUg%E6zk&+F-|*UH6l`< zs_&pK5V?)N`_B|5>Pd>zjWCPBXe~#__VX=&1`EHM5kDOc01}VQwM@F9&idN<}KE+kOu3G_@OY^H`wptMtz!=QCd#M!NtErIc?~EA~fStu zL0!x%s*{O|CObPHYm-#R42*+q>c4KO5gJvKa7CnCai3hZvs;_A20Ikyyzz8L3{N|E z^onZNIR$Oa)*F_G?3xjC5aJ(bj2&J!&W$LqCUfE9XvQ)x{W@g{OEkdA5u5y`F26ST zix3iEB5G>AM5G3NYF==Fd9VJDFftZYV-HEj{ezVl%bgk$>lfh^wEDu(@yx6ad;?lhLTrnyoftgac9M_il zq*P&-S^?uXfVU0f&%~9h(gPx5@p=Dx9`_6x4RePL#+GMqw?!Q+I=$vccfltGbj=d5u)A?9KX6y1xAS*ovC8$&9$L@C$-RV$2<3W;JUOriD<6Mg!z7(?@do>@e_* znR#Q9-UJ-dcqaG+n}9N=Ezq^&yj*EJ1H8FATCAC`$H>yCwf157Ul!D6E`AoXKDdIM zKXFrFUWD``8Q&+svUh2lr##x%b3G{o*B2%mZbug#Z}A~q^rE840pn62DT(-~fCJZ! zQ*r^V{%VC(&;cQ{MoE^U{SyHE#yd3_z&qwIA7}img(&j52eAU@QUsL=C`Go6lqkd} zF$@zgjG(aHl^5M&aZ3!;vQE7@NlZBYSgjBGAbcZYm^mxz!EK7X%*?8F zu&@|Soz0PoO-pX~)3kS9@{@Lp&l*^D8+00`>(~~Z zD{N=<8r+)gEF@JShXQYBXzd*ic zS)jM%jm2M_PHenG3{+UZc<4N9^q%KLV7lCXm|uH;FT^fUa*Q7E4|6r{ufDr${3`eo zN4U?(3CAFYojqO7O8Ed98HM^&;Qrye{XARsNXMz1gkwd-ab9@_ihqh$*h`D ze5@#N>O@S$-hWu6ITz_q+&&t;eeKR3go)`g2!n19y?h@2V6nWLh_J7xE7lUyxBoE{ zZx8pqL+OCehnJ7Bg49^T7^v&MV|ajIzD3YmP<)-cXQiPWyDpBCESiuAlGFlwM0Q4| zTu6>pvi-cwxC&HC6?R&i$L?}5{F^`8Al+>UGMW++ciYc6z-JdiP@%Pq`alF||Jff* zVB+A!&E$_E@3JfyVSczr*vHd6^tz+ev^8$ZY&aHdt8(&zM=8$n?cvG9T4K|BXi>u^ z#%tvrmxLteH+O(Aw|Bh^dV>n%gT9|=9MBf4*EN6WHVg8+@TuFv({415h8Ie`<>yX= zaHwWh2OXsEY*Zwgo~>D%{+Rb3nxdFfibQMa2siH@samb=*qYl>csPEe0M~6h+uNd8 zYh8tvk%ok&5C4di%24lC9gQMqWE6bwwJkgv_!{h66NxKoXv&Of-EMR>@7`URTx!?p z0D6+Y$&t#-zgV)3RX~wX61dHKw!in&TC&GlUU!C!mb&*om$gl@VLnZvPZlbV#=<9( z-r4TB(Pq-^6%>lbH8AuTeFkKb(*Y|cFm$iHS|sRB+1X=c{sZ<+%k>#CoGKTw6dGrUEx2oSifan5v*R+VB$X|jh=Jk?Ho*= z0k&+Zj%N7aQZ*GE96=txQ8mk?2Vn5jqIyAH2L)fv>@doyu4=9|cPLAR=+SnKZLxD5 z?p38V`MG+<0jUqOZWYq*o1BadH9}9Y8F$Vdd#Y)w;d|E3Q&1P#C{l-1`!8;dQn1{?e1!#;3#WxCJxbEBUtsp`R5o024*ieo~*` z`~S+dcLLspiRmF_}7%t_i*<_;Lqp>VeqoIB= zs)PvQ|HTXj`1{7)Atym?t@+!+r{rEM`Lm)ek6Jv!bRM21uBGO-k=wqFY8hEL*EF4Z zix<~*ybVvL@lQVh%zY-Mq15*p!5XD1c?JV&wuEdr{L?)`F{jh`x~sk%xvn@vZl#2* z?AnXT;NZ0EI+50b`(Awms19%XZTnUMn!06!DXQoGj?SO7?Fq_pcix5rE$%^DjcS2Io;z6iR|+*G97xyP0#S zMJ$XAqvzSg9K4^VQ;4Uq>Cm{O=CoOf#W*Wl&ijfDHSRn_Iuxg;_j+9rbQOwTnEHG} z1w4{HOS9Enq5>fga&rJlp~`l`qROYqSYH_0AUI_C;vgL4ndi4&pJlSM#^76^>M8j- zM@__ljL+&puS$bhCT3uG;znB$Y@^bpc2G|_gK$5KZOUvgYDEc=u}r6Gc}fe{9Y2c& z3!626R!#zw#gBzl$MCIRO`Ek0lckAy9>!C2_wY7PE?!I)0H)G;7Bnpt1ETf>()1Hx zU0f*|->iIU@HMg3*$*!{FM*+X3 zB|j+wDFkPZDIg2NFXjHXq{VLxX$u2*FyhA!zq!dGmxv;5ec;$A_-mPk*EL`hM!_AZvyT+&_xg`h*v zI#EuSwVe-q3m@lXeP&oKbjAV%lWVtUp!STUqvvx#NKZXsqps_~h&1ojUcMa{G2RsD z$|y~7-qhU6{d!fz+@sjtp*s0^Tl@)+Iforc-{**aDDCNIxZc=vKV0`B?MB(L{3$xO z8n?U9oyjBKhoYVl@OoCj?>{Qisxjb)%&cqdJ7e$j{;&T>0AhcvaRZ)fsL*q5wgU*= zDy)<@*1NKZ_d-~ocUo+_U_uF=jvHe5I#<;rVA?}*_o{vZFL@h0RT+i5^jrXZ{q&d2 zi(kUes8SRUpfD@9&$Z{;H&!jXjmC$`B=na&RwGXyin^BES`eR?W!^VMgz&aKpCTW5_Ch|`6feo$YX zA4TI5GsK^qzoHWs))hPd@z`8{bF847Q%Jv?w#mALRp(IpmudZ5AJdNU2nh?XPKp&Y z30Y_s08Dk$$pf)bhz+#NVo%QKX-;V1!5rOp5^o;Z5)V!9p~}C5MLtnX7prmUL4xELvk< zic70p3=i%pb97i4wd4s!BpMy{W=-13Mi(`%pDGf6*fv5dU4iW@m`x+*Qa16htj$(p z*@k+Q@dWL5Bep<(rpa9Z#8f$jla(G}v{bxu0HE_wu9>5DqkY~o8G3RT5#gA``5RR) z?d(E*p{^fy+tvM&N&I}qz~i*?abC!L@FcGydn)hP_TD6US?_`Kw6^U`ZII`|nYY_EiN3{WEvZF~@3UE_Zde-YizkgD-7dq?K;Tf%!rRUhohwL)ETqNC}54cm?&CC447h$p@ zz~uNcE$}Pg=kRR*kaj|ApdjWf$BWZVjqq z9d6PqD!7g*%`UmJ;51$#-#YvlxSOqeb6sziHo-9o`1oq&okyj%tr~y17RT!!#zQr# zaErH2=P~#H%ErvPK^lgu%5$5pty`RU_iGINnZb+QeFJdxiKn=3jglCMB-NQ&S>kkH;Wf9{Dt4BYT_jI1mL0awm!fk1 zW+P!mkP`OCI3+V&9F7~$0?!hw`m)q2=HfEi-SDbOyJDhU05HPNS6NRelJ$?IoR!mR z5_3{Sx>Q94IYmN$i}!lb`0wSvt=K+)M0kBvh-ppJ22_c`<+`8WoF&6Y<)c0bGeze=vcQS@(OF^`a}+TbSyIv{7yeq z&X%K23>+LprLczV zn(i=-F~sgD&3R)W{iEu_DBMtX+eo6i+Y<06UujAgx(oIPhx(KBiHXR+J8SCG z;o{=b8Y|J07{g{=D^a;t^uo5^8k?)WVXnQutNrI2le`fdI^JrAeyk}U*IRfwb zouiLMcq{lg9#U)gXe@ZoMri`^V4VD4px&VRl;Hms6{M?=wM=AwU9vJojTR)%QbO?tAuNRis*h5`kn8? z%HrF?jq{R#iCt+YHT+cax&sT;ez{K$siWx#qjc=!8N56>KSCH9X0GMaY&LjZoQI{W z19c$L4P8w`Qr>rkMP(J%h*%lBVMS19w6Yk}d4YC?Z4aGDo<|Q&%ox%Se%er zx=j1)FslFZ;^U6;Z8zVJL8L_J;Srds2)c;@l|c6_YYDH6NRik-Jq zn@r0)%NYX=f5dhj!TxIgECw+bO`)2s!3fb35?Gd3opbK(6IgN*gv#6)#YLDwf=p({ zPx5(82PV8E&nHOS+!#%J&wIIxZ*tN$FQrqzkAa1g*{>8)#^Ak3%~wCEe;ammO`C0< z5mAaT8j^mUVHWzv#R6&o$lgCpXJ&nTs(yM9qDDF!~X~xGKm+3S=an5bz@6@zX`)TIs05flAkGXJ8 zNO*lXtZcT}_Wtvz!k1)Z{fPnN^aQYV$pV_Q=e@qZ-awt@HI}#E*0A>x@;!tky8%^h z=8^l4^js0sp77+nhtB(~Xgw&*_ts+!NxrcT;v=hl?_RW?jDk z8B*Q-{e9Dcw5R(AsW*C$+?MZFk*jCvhY*qXi(TY<-dxN>E16+s@qsbB6hpfH9a@)_%$xYlmTS@FUMw#Te*v4&TI3EDSl#?U0M zq2XcEC|n1u)Zn(dZ^7kNqN55>v7WBFwpIf7=aHN;4Q)SCe%t5hT zzAY{c-Hy3K${rmZgxQyC^zQt0&&I76qh;G^W;8m(cs^iEv*(y%SUKB|p=o*!16DsA z60>Q#LfYP6UP#5`@YQL3x_NtXX13mg^dVQzP4lF1*_!;fYbhn40$R_rcGCEwFh!RW zgt_@QkAs;t?h<3EnuV$C+ywyQD$FLmyu9rSo<{vWidVSh8W2^fC{gW=5lG6WL-+Va z7%Rf@Z14qDXR&c>R|Uj*C&Rc0*iA5Jyj_OW)$YW*INpv%82ads73eaO@gt~Rc14)a zBDh488j9>D%;GvfJj`yOXPzWZ;WYN|NfP9lMDoo-PInhdO*@HBMBIHYW zsc-wI!_cwXLkOIswHW8JbMVHz2PNKW7otxzTCY-cUaT%E>KE0D|F++c=pxM<16IiF z5IzhCk}BD<-~3vKN~FlwPS2ZXMnLAFSeDx(-D)__9ABrS^?)o!58XY`grP{k}q*O#|}mdaa%=h2X1z(Vl$0HH;~bYoqe>{_2fzi&pdI zTP?NT*OzDXDZWU@o2wJJmks?O;O09I*N110CaVwngjf1~uFF1 z#Cl(y!)^>jk2*?^c_iXOw$`C3zMm|*qiDTEV zJyjYO+iZwoRm*Am=@Z4T_rj&Z(igM~GJpn#2OS%XrW2)R`;*p1okN-O$qnZe-p3Q> zLk(U(K$JnZ?>}alEVo?B>$+;Cx>h+nhcUmHAJc`zn5VdXqs%Y&EuM<}e{2LDAx8zO zeybWCV?pjxX_?8|dY=9t1Ws%OXVh|!l&r_VQD&*$3;HY8o8I^ega^+I>{SFF5RUbQ ziW$%(@`DeyNlT3+Q@*^o&Hoe;)|Un3oybbPEH~gv&iXUyMLBlD-_>f_z(aEF91X`| z-f>_VP5em$$`OQi5$Z;jx%UqPO_{YKO$)Qv1$KI4vgn1w=6!Jr#6#?0tHIAO1X#Rx=-(F5T4sGKpyK$Eb51j63>lsZS&bKd5LLT$Svg`s zTeNl|O!SsBhQR9`O(m}f1*r2odXDcQI0a7mBwxWnmUGYhC97|Jt36BB#-n@O8rUrl zivsohs;zFL zcYTsu)|u2<(_vZal>*?DX=G(&cEL=XbJK^3bC=H_xz3|%H+a_E2B24!=#YPd_?Q0Y z?(XiH8rEZSjpxMMj`oOQh@V?FlMMhASYPT>iUbdt!W zr6qZa`uw7TQ1)7xS&to2Xix5KoT<&P>*ihcT4cY5BL68k5yk+Sm9OzX)F#mAF#{{y zZI_n6t)ds+1FG{dAkhxBG|e3kPK;$UWV0UgTE`pRxYl{;a`=vBKUGd%a&w zb!{_SE9I0lowRsMWp;@dGON}{B{-sKd^|IO7s7&<7)H$aA$E){(F1BfnW%udAe|Le z%mXvpJknH-!b-VSN+ax9Z*oy6$_;5AiZHU$brdCQt59t?^R?J}!D2cUR^*8&oB|>N z0d?n#<8@^3Wr z%->n_*~))p&5Qxv7#MT0Uu{O&IRf^n)QP`PBw5Fl7mEqV;`Qr>lfY;Ih#FcVARxij^VYP z78oMj|3Y;I)NrTPY!DKgbJxe9Y*Fhm*6gT~Fl$EqFa!H`5+%$@Bbis|FdZhhqm5CT zW?Ot=k8C)ymWA$Fo$H>-8kt6+9@EWPzt40?F{QAJlmF1of%whi*0*;~2$rlpilji( z4&7|r-j}Y1sf{;RJNwPp+1C%WSoo|nr-6W?i|Tq8+wJNIq2CH5G&;}LQaQp!_TN1t z?it;@-ZZ>nhSco^dm!EI_~2K^Q!5G9g}5G6yq6vwl+fz6oc{7-gv7;{aIuZRN${z2 z+q=7^b_HF}xD<$_6Bp7PH zs*|IW>M^rZvzBrI9=5LDZNW=+uAM9LK4QtHthJBll{Nkufy^dN%0-&#H#^!nTF!KTY{%i(E6&?Bk5^i=zA50C8@>uvQP_F50l zOz@fjnWz+E@UNoupW54#L_l>*-3EoRF!vRL=IgU`tTk5c4`?I~iv4CRlyn0M-iA?d z#h5C|9K2BkcCaMmhiJ7{Nr!Cx>f*FM>A-zBg$UFM`JD$_VwRW8Rmy9f2EP!5aVlmlM1(1$xkxe_lE zfXou-T>*r&TJxQc1V+?TZ^6E9$+S=_v{`98M-Nm%9Vf}Q9_@ehjslB$4s8ysx4jnu zHdH-x*_zxAF5(;U9(Ilso;v$&o1fM5pMlel%AeaGl_aW9XUBbuu{P?|{hjl$=6MfE zr?Z~Q7c}S&jpV2@x~*r%$!)yKnd0R=2mJN`C<{wm{uaZz&S$#C7y8<&$u}4_qI2^3@es12EQ3V2g zGvss)kmk9DB$Zwrd7+E?h*m{0;Hz7aXyDU%Fyhy`tPi#8^8x7rMpMsD`nc2FN~^c; zi5a_qJi*d2KH@uQZagCf3jV5fVKLyc^zEZk=A7NdGq@6Z8N$G$0J3Fwk$V+zN9>21 zD1j{kG3$)bUn|sr7!TUVi;&R-#RD>W1d zl-!k%_y}*WV_gL4RyHE|<|%LMD#N&?ZL1f8BW8F}ymsnO&c~+gx4P1%enyml6{<=` z90Beb3^Fe?hT@NCE?1BslRUBaSbtcXwg%%P6D$luD;-%yRgaV6|7!^#4Q;14g(I=j zCYtJMKI>M1$XS(-&}+4CS|nmHW-=feAZg}iMZ+0U_RE3v$K=0;MHw;6&g^c7wt5TX za>E!VA)lG{4T)fpX?QuuH%!-l}3U zO~zu&=Wwu*qRryt+3~1sgMXxJRL5rJnk35 z>!L$l)y;Cl&1rd9t^ueByABn&^X6EMPq{v@(}gqZ!)HGALgkrS@T-{20uX zq%p?F_BmVOH_yi<4ZDAp6AJ8Qs&&<@0p5rBdKVQRuWolyx344-1YwR-5>bE3=qjmm z^aj{aR>2F4zgIHtDGKZWf6n>~w^fqS_(ifh%E9EfD;?2V+MyYR7=Z!J6BfBmvBwHl zm}7S85qw$+DJkq!K9}|~F`D=$Rs!m4vrG^2k8FosG6)ME$%ntr<_Av-^x7Hs&vT`E zAO*9@Z2iu@eFlW@!oN?)`Ka!oFZw>4&i5BoUiXMJ#^mZ!y^dkz_bNK>7`)&`HHH@@ zzo^xi@!p+(YH8cB-64FnM0FFfaD~=a2cG52Q=g&vcC#)Hr0`e!xj>N>yJZRH$K0K&s*MBmJ!;^AZ+% zOY>9ks6GVKbm|3*U^u1ln6@5}x+TBIu@1AIsx0_%ws}txbAi;r^{H*`Izx8jmHGa1 z=g(}+bk4iWHe%&>VvV!10o9$IoqaC&jhkwtK)AODo)TjO0IZ$&9ku0(%>F_2J0jFI zH8nJi@`tXo2qz%{a@K^S_FSgAAu!;`FQ*&T_(~%D z=jNHO8h&dxR~mzkss0cT-ObX~*~ugXfQU%k4ic{lkH!O4FLO~M{v{1ukB{(XeN+o32p0MR5boD!YZu5km%6ReZqE)LV;!i^6wb+H zDz{n4%vK_x;jImOBWwcF4#U=<^aEOAUZ~4Njbk?4+N`efaAI#33cdl7c=q9U3-9(? zbN$F)`ZmNvf~?hg`&-%dUs-IG8w`HiNu9$la3s__#dYJ18`Z)GzSE<9&{YlrZ`p_s z51l+ea%A^{ycCxj=x;o9zCu?4GKt9Yl98n3D z7c6+~0la)7>)>!ex72iZq89x2UNYHo9x89|;u$ z1s%bOAUQf77KGp+uw{948dtM5QZZgx;j5a0k%9uwu;}M1|6e^}VktZxd^I0MgB02kQisw>q;YfMrCh94TEdRFGR*0)$tm`n2f z1Ua**CR~vmFe;{4t^Bhz#9OiuFL&yhoe!=i*G|+jXE?F!Z6<~ zJ&)}QBs1~xr=LhtUqhF8h+uC3;W*de(WFs+m>Yn;&1oy8z(2U5KaoRI4g6B45=H;lc}f z6smE<2Vs~}H$iLI_qjRoL6(9oMr|PCURXQu9WnSs@(b0cS}evMhCJ>K2tCJ-!omE9 zlZt5*`q}+KVpOv^wahryD+UGCl$NWc{Z6@f8V;5X9Z4u zzl+|8<=ZD~a~H`!OTd0QG%5&jcrhqlBqnfEb;W4YbncFvqHI)PAjX<53J}fqlFoJr zf-k_KE`=9vQ!YtbNyLOs8sNpwL-cP@bB<5w*aR`|nKQM{v3Fbh2+qz&UqLg_ ze=Sa`)%t>-SWfXjuvxy6~2&3zJmOh%~w8 z5$Hh%o3g|>=m!Fi>~zF#X|lq4#bfRwr6@F~iv>F490$?oMch@CP{aQJSbOhys^2$$ zJQblxLqk?&6oq6(Mq6Y=_O3*BX2vlh4P_*w%w+F94u?<)SsBM3iGyQhJJ$DlmGger zTc5ta-{bfDPvUsp_v^Z^@w}eb^QPMW)ajew6(^EmX|95ccRmOAg_obq>pZfyP$3ST ztPAeVbm_T`kev=xai&@CD<}2Xwv+J$odQ=~$lF+SN)gmfbMguet`_+QhLA1t+K#Hu zAIR0?5R)#n9dpvt!URI!uZ?Rr7qb(96n9&aUv*RnctVR$I`Bk>m+Ud@jSn_Krf+#6 z4rTB~ih(0`JZ6wuw*nOqKDE&`mCOdCXtUL$9102?O@dY+RyiMg{?eARoW562Z&wh8 zNJqDwZ)5MF$~7^ljux1{a$se6u1D-6A+aPfYIFCyMAHu4ANiT`92v}x zJ>VZTj(d@-c5BSoJ*mA%Bcat>k#^dmPcFqI5vLG-HEGRgQ=`brpkRrg&gxVUoI*z^ zcg!Y__YMR<%UYP`o~4b>nq z2q?Awho`jNHO031y0_u|nM$bc+_wWKHu-ML)Y@v#npNEePDbPVZL)Olsg#9tJ7&}c z%sLJC4Kv!zqG(L#${47Y#&!t~C8M3xivZfJ!|0_k$ z$K_=wogAHsk4T4>Z8};UUV2EyMQ6=2+Vt6!Lz`S?3&Cz z-Dy=@-cu8AB2?p4yAhC=6b7JSRxxvE& z55rm!o@jcSmb6X!8_p=nRw-JZb16rb6jfDKqs2){NiVU>h8`n^abCURBZ_?)LrFm= zDx8Q~09{LA5enN<1{j=YlXPB*+q^uoXLVZZ3D+Zyw6G&%{avw34skE$zRx9n&h3^H z-8oT5CDe_c#Ouy=7RJf?4=d%fh@(uwdzYCsE2ZgT<-03+*MooW!*IP^PK}vBk8?%Yv z8LNrKmpP{AtOzzIpemX&W*OV=LJ4=t zs2B6`5u?K=Gg}-%q_g8n2O;!P2&3~tpKo!gv;Mv5ag7EzQ$2dI57Osp4v&4&_JX5D zS#az(Ci6dGSoinXYaj81tHboPLCv&j($4L3r%$5HZ;0c*%rA~Byfe^HBFe@zS2y=4UBM6wH-{?)#Goc%N32I8yPjj!S$?^6bRwwG9VGxm* zEW5xbbotWeGCA=T>rVwrw~K%qwD7UfQQ^zl&dd=7)CLsM7GiF4T3V6E*roGhD? z)B2DMZjqume3rn+M;~e=%&e)FmXZi015X2v`{HVUY%soqDLD%4yhVC@(}1f*sCK0N z>EJ(by^mdgx3uqpDZbOAU}mRpSV6ALeO2Bp%dm|0JY%J=!8xe633JC|M-qZZB6-cb z&(2$Ze0K0??0M5q4r4TTnI;5GHEvB1dhc0lD9J+7nNbAzYw?8>+C{3h0K)a44fkDR zRUB^5NlL>bHLv($rJNIfv<#5sXpn#bF19!jEv683K?@6z!>Kotc5OFk0)GGjnWo8F zh00JR-hvp?9RVY#i!E)A{aVAAo!ztCCCiz?o6_w9f=800aJn2PMNGTy1BeBz4vkJ{ z;B>`OroovvOzkH&8oKH{a`YFc2F5e<{GeE3`!<})OY8B$L#Jz?_dDQSCy{1IAtu_B zLQ)FSnlLV6>jNDkk#;Rqs3DK5{UJn;N~mH`T@}v{v0Dv({+{AxTY94_z#szEg4J@l z^5Me=c*ut#=EZ8OfEBi1G8o6Kabsp)XhN7P9s{f91M)zg?Ech(K{|UtL7R{M09{64`k9WV-*H3vRg%i*p{*v0iG^nL| z(|i=}v(iBMdf?SH)jEc9ZySI}uh1Jo1OD(zoJ`0dUdNx-Lq@XQ&RNuP?nFDrF>hko z`dm^=JHFXoMun!uitf}+kBQK0kt!-d_PlS*dxRWNvpN6}Fp9~@Z=%Kva8B4>3}Z)&0oJ1s6EtXyR~c$6wq0H5_gNyeK77RF zEEJvAbqA+^uMx$8&!J6ykiK|yi=(<8E@nkG0fI;C_FJsKrc?hATNp{qS^Jo}Y+M?k z5IC!W*U92ip_y}Ozp*xcQsuWnu^&nNb;?$YbfkOpwk=(iha5PU#!M4(d>wo*abZUo zDmvmg+GAfBN-oINjUnB+CyyML#`3&s&)`C%R+i?M2&C=_!hs2!M)9}yKR8@!fs)=n z-If8_?jB19FuAVLTAA5CU+35t8HHT}GmF^i@2h8Ln-sxkGwvw*cwVCQ#-6#bLhTB` zi^oMK4EJ$}FegK$=zXEK=7zNQLg{Igw(0TBB&{wxpe3|EVGQz#Fqj=Q~v^w8n zaYBQS(@NEDYYaF?R6i+%05_Q!Z5?sNbFF4mH8KP0I^z5kvO3O-G0+R1?^KLV`+&3% zFS)GXb-b?4P8?BG6VBIY)t|^p8DoN6@9+xLs0s*itLIdXxdn%phJJBw*>fA<=Og5;z80vR+c;sCkbEaa(gq@aG7Nx(*Cjyb=z16F>4ZVWUVHV`ENh>-pDJgdLmVI_e9=8I zLE0l@U$c?@W{3jb7Oekp1qxc9V?5)_@1N{Bykb^3CSX@60KAc>NMYhJ9IL*uZ6u1!#mdTfO{i z^hn{`HTXHlbn}fDuLAJTnv1oPc2^{i9g@kBJJqLk%1!qb(vCcH>*KRsif>x9xa7*| z@Wp=KisARF(Q@a`xmlqg0jDTTO4xe><1qUISbXP|lT~+p!EF;?a0?zm*AhlYP0lQh zX6XZNTMRAiS$}+Lq3rHGdp>y3=|gk5cnlqhpH=Nn04VzA`4lJ=2lptFwrb@*hvth^ zgcxcx>NIu&CSW{AthEJxv=WS0Iec_ zt5(ZLB^51Jc$nFg#)sROoazfa@6^10k`R6ZdQ`UF#t-~!QPQ%qw)1Vps6uN5xNh;J z7^(u3LE)>=JO;J&1<-9NAHTCXoY(T3WCQNY4XatLxUSM0UjoLv3PrrwUUDIvq^@tZ z<|Qk(rw*&0dZTiRHsiyMLfgE%x4uMQCx%h)dEd_Z%kx$)y2@KIcw0j@CWe zIsK;YU4V-Aakc|o=9pP!i~V+OONG)Zfgfa+@Wwfr+8RNcmQOwhpStkKhJ8i!=H?Y7 za>PoZhbE4RT$)yYHfvC&z1`}MCzj^1EHeEccMB%Xri=Ap&}>LfQ!`WqON};RH}6RG zsw}WC5B!q_D!EdYy}8(|)Ru?#-sAA_gK@S(vT`uLxmm#Ql_YPnq?}asoWyIlhE6fr zfAdR^Q&!KBD^$#;^_V>HQ)tT@dctTTQi1MeT`jGh7dEB*TeN)c*gE!@%Y$YCz-9Bi#%h%eh zZvPMMi3Wt%!U+#7Z;-T)?9E|}l9A+<}w3%yafZ}i-gi@R_%b9cFIw{K*E zg+?|cEs9(2t9$L|=K4Yto97apOLhrEB38$9m;@`SR}4cY!np}_8ei?T0Ai^<9)IO7 zHnPJ!zmKfveg^Qhy;{*W#(tg`kG5^YhDasV=5)9u@%X@2a*_wL|XAIWMuUF!K{ zkO12I>S@OH=Wj{wDY~_4N11B}MjFd(^^oE3Jfg@Tn26s&H)8xnB%$9P7pC%66<`Ug z<{go}G1mK^dqYFxUHe4)S19a< z1CxIQ6LgQdJ4YzTm9h8Er`4Clrl+SPm;fu;L;k%>UOLNB0C0AcSm`2liEK>0Zhk%4 zJQuXAqI3;U@m#zP%uZBkNfkLc{?rWJ(Xb#-wy|iEHs~i2YR}c_%wqs*2;;yop=SKd z9rIl+_^|h&Vj_HOsJ|6`{QmgPvy&3hsh8~S?JxO=uik$`8wFxCn%R~ERlcX@05lU} z$JkQMDi222RhI7*r{7JMSNEhAl1i14tSH>f$3x~9cVI@WmcV}YF^D^#{W@HN;Gx*0 zfZ%c&7;<%YYtmUGVEa5dpAN1X8}i@ROjEpSidpTA-beR%XI{i=#yg<4hH5<^>FgEK zzn8V7++r+UQC(HVfnFux%e5bAcK}{9ruTL=?55r04+d4?hyLfmjO5zp(f{&Y(XOgL z=$E}b){EL+nrmbBpqxN;saCUt?IOo3wSrpJv5p*$d!O5j8$1@@d)VpE3^mlsDQQ6& zG=0fsMs>c&#YKrB_FDgI-RNMNh*7A^uYJ1ln(ywBvGyE)XiG?+KYzaI)+?H*WNnOk zu6fV+C^qWS_TjdyxNP$tv5`}WQMa6(i-RdYWje1ooVoYaBRVe5DvBc{$?0053t#*DFAzRVQKG2KThq_kjFToyzzq*LCB8P53TU zH#NNwvxCcwz2Zc!X-DAj@{IGakG6q=!i!+v)ZnVpQ}@F3(Y_J}d%gwUZHz52hL?Z+ zs*DV+qFU?XO7fv=@g4TcJwPqVi!dF+7XV0V+YmO|7 z9qs(6Kx~7DI;I3Y=bX1ca4Kw`Yr482tzH=43C#e1WokxK4%*X*)XzCL@#VgqQ!ruE zq$R3fwYQ+p^HAr2Za?&6eSyPGP&T&dgBI38OU8c|_s>6@Y=A0evqEi!+OT=@iLO+n z##cIWVPGN6hon!TZwUFd8yWAAq7>o&I=+OQq7ueIVt}(SF5I!O&L@U(Pv^p_1hFEZ zTiCt&tv&iru20S^uY)lDgG6dgaCLn442VJ8Pcotk^? zmhrI#0Q(NpYqIeR|I%svXDi|7wGi=!pm8?rgMYvl;(rh9hQX<>wspc;kXK+BvLvZp z_n{R#;4L!l`}~k#ppZ7q(VPV+P`2m&`}ez-GI#yj{rUyPjIW0Hg`esgX#eG#KbOtS zf$Uu^aql>!gb_2rAe6Xg5_z9@5MdYuskI;n1XcJ^ut)#nKkv7cWkWJezh?9pQEHN`!+m(F^(jO$<@CJ=C;9TKZ*Pp6S z-O2ro&4wrc#e^fnDS-5DAQ)P`VdTwC9(mU%iSN4UIsdPFOfdI86 z`BzE5%8>l{Shw9<;SClNKWg`;wm(GEUpAgb5zfV}5nBu1e;2*}mHZyWpI3lz+7Ctm z|2+dCwm+f}#NU*#2mcgw^3o+u~|82MbSz`t>yuy9sn{gZ8EVKBCR31#T;tbI8-!} zk1SBXeSLw#t27q&f7^Ho2E?^LGGhz+gKI}d=)0f;@PkVnR2PW(^%;n6>fi4r>cGjF z4Y9hoVILl!Le6tz=DLlZvTYP%*ziNYZ2Km+$6)VAzoh?H%9Ld5h!?WJS>gf-Q+yU- ze_i0%epukr#9@_jN2__mzUcKSbn2P}stX2iyM|CX7c4viC1ecANblo@&U6eCYqJoq4NKW#CjDn52|T0x-lEDxr)Sy z*Tl1|h3?nG?-e@y4c8=-kF@r!`e9w>NgZCtc+{qsz*AHJTgb#hhN z>&5=@%*LlSp}603KHW6T_!u(UQqdyXZ?eIpemfvii04P|65a4qk=~R@o)iA#y=>c3 z2IxfMTRP2se>o6P1exw{#juwbYz39_66`y&`fDIut6}ZoDCWMKb=fEDZ1U3eM_Q z)5ecFk31BYOxsrXns8SO^3q|-zdjRlGDNKMDrwxtPohE|>K6a{Lp4mY96b70^L2$p zc2M-ci{1o*o#&12L!R}|IkonB(_j7O#pe+C;_Ywj*cgt>Sar{rwwD|a(M@mLLqi=T zwz2{L_{|e|K!#uue`$ExF-PM; zU`S=n(e0paZb{P-AMPn}iATR>`tQe=alvLd990BWj4XC$`nigCR2$sTZ_esG3!LDK zz6~wU$?k940aJ^5X{s3yXdjkyT>J0af3q82*;w668J^Zr3i3x%A|UnV|orJQfZ1_%7x%M$_{7vaAd8Tz_UcOV(I*ibU~ zzvs(MavwY>!`pR>@czdBsrBzq8VcqfMt4fy+sjLORxR_XMWVmeh2OWEQOo%`L@DZ<6Y(4N#E8lkqa%iQAhn82jVC%1jv0+8bGLVMFQ8)$gi83Fs(h_jjvRsy*D_CQqt<7|SGUU9)|_4$6J{&rQ= zg+7ZylJZG|fGdkYT~Ww3yzWC#O>9lqa|dc)`NT^=!8xGQ>C+<+`VVRmb^YDqgfh`M zp34QE`fH5;fyR)97m~U2Gt+-oX4f%4&Zxs49BUhLciW+MeI3&28YLZEqt!|Pr^;p*;lWQ2n^vCT!Uut zSUJ9Edd`kg_iZsiN8@#z8@(-4pq^udaAFJky+vNwi0-$igx2#VP#Qlg$I`Ncj!!aC zC2f3GlkY|`bhoBPlF~e#;-R}JOGnE9tr7VdtK5s>x zpw9w&UTUat+*V>xf~9citsh!@xebTrH(wY9q1Yn(Y{G5t)2+?!{t!&=%<3}CJs0dg z1|}2ev^aIOz;Yn^@^d=GlqxPjoR?Dg4C|SVz$RXc7Z5mJqHXX5quuP>KE$alob9C{^elZZ_aTj+I<;`|Ogu~PhEcCu#1&4*2C zIz2cVa`L6*W(i`w3)v+1O_HNm)T%GN{ zs`6@w?I&O^W<%pb0XmtqqpZ8IsW3;*d;62GuTPSM+vZGY&M9NAxs3wtG}@#sQ_PW> zc{I$^HQg>qHNLsN8ng-YVPGc{Zsjq)WW93E8zJn2``w=*o*voXTq5UWy43?!DNnpb zn7R251i4B`0^Zdm~^;3XGUrE0IsqcI~E_W4%DUhvWbf)^4qP z7nAJPzxs6Yi%3gP(&hZ|13H?Tn(vbE-|#1M_N=k!Z=W40t#!mz`d>i=Pk>!uqkP3= z(QO-J`uUpD9g`rg6a=DN(r%kcM+d>}_`wMV)=AOV-Mr)2vVP*!|^k#gCMFxT*k7wfSKfL@xUN@sL;=m6C)|&a+PTsksjG>ZG*d5p7c@{AmJ= zB#id!Zg0yn2nf@K|u_8glP#flTk z$&`|m13=ZntM~%?F7q1jNoPA^yyY+o=rA!+Y{xUv> zPtz)71Bz>o*d0)#JpnN`Q%s;)X>IG$cT>AyGE#;-XS0Tt<|LJY!4UJnr?i|>xh8Ed z&b|f*t>)sgfcAAw=RP>H9@gHGLjuGY^0Tnfd;5X$@7c%~q#7aYks4fxGM&}M@qvAF z125J}`OtoN2g{lAe*EgSJkf<TLDGRbNA^*u#HeX8fJm6WDrPGMu$MwD?|W{mYnQzfQ-K#2dDrrFQ{v%Ez9jRXZ{ z!C^qtR!i7VqPqB>T6 z3*Y-wT=UKdIJ(FZ>=fV+C&v;xXKt;o%=hFuOO$C&bQ%YvrGvPlIMtTgX9y5p)tGRE zQ>mQwOC2*>lY>0BKf$C)VQz`^8RGoesUwj(alth&ZPMN#wT6ku|HG49Nv{K0q7zx# zwV+Bh(S>&pn|a4{FEn0iougYG0TNs*Vdtd^QHL2{I1q#}sS7D|bSJJydGy#iD3mmu z1(8h7i*ZSLgm$r|(bIPuq8?%!WnT070ntGn1tn7T#KJWVbB4PTYVHpq$D6ioHx45+ zPj@##qKDFyC z^cX+V=D^j2up`-zr6;kY^q3#1A$58I)8ntBWBG%2h3;8hjaQ3^$>;)!-67>Q%Ihs{h1tXoESCf^3A-zeLC*HPqoCc|P%huz5+@@e@ zVuJr?&!?aS>0_d0Tx|`gblhE$T9mMZ5PLq7jS(k)rd5JkOBH{8BIT>wp=!yj^R&@7 zy`3yszhg7amiyyBpMAlMmt&f3>3l1zQq;tw#$vX8-wE>kuip^uSA>EGIjm08z9p z2WAfuQcYXM@#B1^?KiA#Zp@ECS!4+u?yJX?SNKqcX~KtU;ckHguk1++;c8&LuxVh| zvFH>#iN&qeU)Wg*KaC2Pt9+78PM&ApqYb(4mWXodE6h+hw|yzHw@RCT*5qn|2t!Qv zmU7)OIh@2wYO^8wPg&W&-;<;T5Hz7I6fH=hA|cc_dg7Ji%&@Ef zuu5^RHG0^#WK6p=_r1DItIk_sus!0F9GTF*Dmsr0kcOVITGqB5{Cu$T708JSRGvBb z6mH(*TdrVokpz};^5GLY#hNf`5dLjP1&$vQjzE}^sq0^cA;uV}0U2j2Qw%%vGd_n1 z))k-C1Y_aLWZ!e~u)Kt7#O!wNBASbGYo-birh)=uy0~|+NsKtlD0p9acHpp9q3x{0 z)p(Ui+mfs5TRm)c6f1WYTMj&h4@MF*oLJLdrnTkiI)!2~}4!d~yRkvLY5Sv8uVDnZx4-%ef8yOWdJ;bV+PvCOFRY{gY)p zg7MeidYZJRm*TWWe&#CE_F}B;=V3w*aHeF{j55Z{0US!VLDX|DBkw#E)Zbt~{f}IF z`U!${vXb0N%C~l)2fpGr4!oiQd8I!jgkGu3_+q6rZLJx(hm^F*o2DV>(}xsiyymQT z*u+&B@zIBR-8KXid^$k!ax*FACq(h%o|L;cotS`z5#1|CvaFqZX{cj&8iWMtIIBVB zz+ey{eI|HQLN)1=ZFJ*JWAKqJ-90XhAiS^$kpv@fTnn8Iv&-Dtb!RwdUkMo2OQdRJ zIOy(B%}@s$hfVk;G@=w9qzS96EMO;BAg6SH4Yh074|b%6`T# zkAP~S9sByhDoq6#tCq5&AdZ+INQ_cIDp}jP?pZ6k9ZR46&9%B2(n6~%MXt6kAK#qB z4q<4f@qD!1!|l#>jv{B4EKawr;EtHbjiT9~D!;fMFrMSHLQ9t%#-&lmIFG%E==eVI zym!=eI}l$_*PT6z5GGYU!R27X$-bXk`z6%zk|3%QZ9BF`_P$wF<@s!$ zOsLRxYkh58Y|jA~5Tdyayz*M3peZN_G?kb6*h>}%4{_GVSJ0V_b`|EG1v^)M*O1+S zAv-(E=%&o7g>lGPUYT&Q(>4m=_oXwW#Q{sch0@FgFn0o+%h{Xw zrZzuch;wTa$!(VoExRsO7;wd0@rScnIhMDi^GcH&!YXD~_D*eR+sK8UMo9RsLO#FQ z(?${QBWd=TMb*4K?r$>z`U}z^Yj#nkkve<0&;bUS3i%po^JiCP^6s{}Ky~p#)X}!E z4S19n-KX4g{n_QDAf#)(j=2m`R6-YgSX7-b#L^bZD#8|WH)qMxo{Ge2tEbf-^xFt$ zfyK(ezxSVarGg|H>g(=p)ac``zq|Wy2+q}M;6LhjmxjAOFw}MO^97mP6ZUI*IYH9R zzS;xhl-7>;iXGP9uZyc``;3O#vMIx_EOuP;Y5nr?G4WxEH%=R>un` zqGiK`oz0uyT-bI{+_|VufV*h&LD77-qjp_{u#MEzKs9bQRN06};evO!fEItwwO71f zQnf^aEZ#b5P|1!YeppqG2oo@UeHJDF$Ai?d9!Ho%l`k{ynNryG1XSQrE-><{9f2V$ zunEx62}wQXINEy3r*SgbkE_}rU&$}z8l~m+-C(sU%PCGXZ8MRtc zH<|k4eJFVp5)@b+??Y(Hfh(*jxk;&3@RS|37U_3ea*S0Dp0SKRGlRxjl%#r(c9t>F zv;@F((!wntp6t~8)FiQ`4w(EoV%nblz1_n#U1cA~`b%Jbg2db)V$5ZU`a=Kb?1UJj zYY>$9AZo#k{xfCSPjq5U>zqKV-`1=L7D?}EJ+6=yJeb578+;1P3O^@-yMF}4ZQow; zQFl|q33XK?z-JM#x5t|gpP?3DM#hF3>`HrLd%o2|C0xR03F<7zm6L<(1boB@@qiIj z;O)&xE~ZgoZWuA&hs>~Lg+J19VN#DfWmRJq%1-qpnnJ;PF$Fpr@7qVi%vs1hR!GaQ zJg3u78N{z9hgN_sW~lV>&jJz=1i|Ljg|sD5AK-k3ANvLgcC>Pedbezw4Q#rgQIllx zD#5srX})Z-uY4c}t8JBLd!_}S!<2=AXt&!iM?a^D)(c~bNU4XIp({I%gX~fxr*d+- zPYz8j#LFASpqg*Ye>W(EjZw4Ewx<78VctQw^?3&B3QW5&#P)!P#2X|fqgg0S3@>05Rm z?X&DkD$aY;wIVHy-cgs;_rm_Y;8fu%5na--!A?9p7rFzU!JJhIcTzU|?D55EoSm}$ zOGw7>*Kx@Qge)Sb>tj0OP<+9vAobsY_LG7tVC?Zg|$ zbuMI$fQB_EorjXJB)zV# zcc&nS5?ccx*FBS~a72-iz0D-3lMGQ(z81_=iW`8)u$Sr?^+8^i-jQ@Mk*jC>IO*x0 zx>~3oS`5hx_sVshga|qST8^_KC+A%!0oNJuJY+WM_`GB?UOPCDqz@$Gr9jbU#k&NK z{uBT=ZKj|?d6c(04{0{ zqMKbeBHz7;$2GpT-pEzBc=zwrBi}%zGov7WZN8;h#fDbTMdQhW@R!8b^BSo{Er9%q zW&xCz$Pn(nPMqdQg^IHT<4Mi#m|FUxEIZ7OHq1wQLMLdAUlUtfwu-yABNgZx(z3epELKg;EXdp^@wQ|jLh zL(y=SYx^Y$p_FH5+4|oek(UqxI1K^$3Hj|=XouO6C{7HaU12wKvzYth@;64B?kocz z2+b{6-(&*Zd2mJBc~L*Y^b5@XxK}~0JG3-pY}{Q`@Ht$xLrm-KWhw}5?x<1idR#Q8 z*-{6LhN2ppKd+b~qO*{ywaO?=If>Yeu%LHmrk$*(EVCJD@&f>D39b1XSnVMRt*2we z#Qlf~`mb~V8uCIwDbx4Ftx`Rp=pBxCP|5fGR*K2X3k}6s3dQ3YPu@7UdOiO^=j+oY znyYm#xBHn&s-BgLSK7r&uR^CRfuk8HlmbrLiP zqd`2Z^5A88EHXID3mS7lxd`~h9)ABkC<1T>5nj@JoMsN)cll}t`Cb|>rNozzmk1u$ zBAAA}*py&P{~5;)CR(yO%blQ>729)}`!G~?wDpK+KmR3%VfibD|oYvE_x%vB&a4PcXbyi=d3d0m9K4kFd?ZO74gW9v&QT5ZiHgc@tAX)c}O&KnPJGa=R7Ay!(JeVfYw8QZ57 z(9si{GyteUgMokc4szpQminFB;gW^ZQy`|@)}N*j^VIM@7Hf-Kz5x?K6ZmW;alNvw z_Ty*PBGTq3dbnAoUZyV{W;RijU$4~M4(vtxFlb_fxpcq#@xP+j*Es-pmtxY4)WasJ z)Tt*l>J;w&ZWFEa-F%tLS3{`t$wTdby`gk1N0}mZ zQ&71}%JwFk%sLP-bqRvQ$-YEr%7@Ipw=X!#rGZnL2lvK zjSjW>Q)kj_C((<^hjkACP%5Sb)6XX(oTi@CO@k+<9_)sRVL!+-+xkJeCCf0ZWDLgE zoW%t+5I4S9hK)V9e5=B{Ae4F#rBi5|(v+xFJs>Pv{~b||d)s}3Z#^-w8p=phjPvfP zTY)?#VAs*h_ch~adz_}98?LS9imxpzW*XFur%~XDnc7Fv&^2#fB12~N1rO?XO2Y4X z9`jR3?5;GFOO438s}%?UgxAcztupbwcwZ}DQH&m1DuCnR+a=#QdqB9oJf&Amk(MS> zkErj$>WGNiK__h(ikM}nAg2I7lY%}fMte8-9e4p+pnq#76(b`xgQm~sAVJIH@kM;dxf`And)dI6Bi*o7Vdr&7bu$)0D_T%}Ictno<%Iji+sK;Lf!V!TRfC~`{^Qi>#9m7jIn{JLBD zG(UsgQ2n`&K1}A+qCo1=%)I^N3P$FKEMA4l zdG}(rltUvU*>GLS%poxw@9X7WFe4wBsbAghRt)IgW3uP`kq(eJ7+4{#5rk_IBfIlF z)J4rtL->sc?=cxXB-^$Bgeo(%Z?gFA z)OMV++)~z#MQ(e6PL%%%waiv6{L>t;?3Zxz+ibci*E<#tq3E&Pj@xjC7-wrKsp^mv z`s=dZ0|_SMK?ifrk=SpGvDPsI5=j^B#@dxGKRcl3Igzz+N z;SLaOng=NdE7s$3t6|0lNnZjuu;jwn|^K z@2$SFhyK=DEPO5XCZK!Z&$pKRSnjYSKe9X$P1Ogf7nj3y8&nhA~^x+a{d>55h~Oe0}3)wT;&YeH8LgF2Vm0xMMIEfXdJYfJBlvC zT(tTdmwC_Q_QgoXZogIiq-F<_nn6XdKWWUHK_r=padOW6KzksOqjnP^LD&ou^QO7( z?RWk;ZU4fU|9*858OAe2S)TLO0JRv2@RE}-csIr!lSD2KV~t-5tPD6?2hE-QoMlyV zpox9E)w2Lf*aW4dz%G|~Fv5HB7h>r`TQ)^m5jh-cw3(SDQ0tDq4&4-)YM9b@Gb_f^ zuRuj%?hRwmbhqDLnwERB<6RnLpddhhQ?mLv6*;>BPmEN+ZmK#*jZy-*7i9?zl@{vR z*d!_Z)4_2KzHJ`vi%YerVuxhT$3Z95zH}TP`6q^Ly>?7B&UBrCb89q47yak*_-A8E zVlOf>`1R5L&-LzVDT+&Xm7FqOKAzv^#w>jGngU#@Jt!v74^_^*Jp=B!gvsu`$K;tMUquT_ zd%@o2rX-mywIeh>beYYuJdmci3bs*Xd$0yBD>2nnv49pYR+KRG_A2HXkuBm&O^R#< zkwDnk=WZbN*R2o+&@)t)DcMTBG&wQ2Q^{l73jIV;0M@d^c_kV$G3`1~#-?h{fs{r$ zO`GA%aZ$h@dGCBa0FB_cUFNXz_C)|{2SBSQ#KS1(3rOb31Ifj}M`JsX8VcYFfX*1M zz1v{v?JiKz>qEQxDms9y1GG24S;`n4DCp#Q?5;{l&iTk$3>gHb;k|19q8$`x7pMm@ z%V7H#J3Mb1H^uu6QT80-O-h>d=5d>t_@Is<^Oq%^06nJScjvM-SsP~A?hQ*^&UeaxmI>XMt`*;?Eu zzY9pXzHZK;aGDtr{{4iorl4InIOjZ1$e01imPOM&LrBX&f3!8j642Ns6FM-GuqK2( z!_+c)<{US>5jOQ?lJer3c=((rE962KU#gdSu`Wg@T*RlQzpZ!!R_8n#5#+I}SEI+7! zw}*tg+l(n)wj2%Z`rWAUZUHJi0RRF7Oj>)#vafU@AT{F9l|@k?qc2)(d@0g3J-4mM zJ}bu|!}FngS4*lEXO2gGX1w~?}>$GTBQ zj2rStgitXBO-sU#^unHWXcY09wDLqGEuixRD>QOVGRhjR+CQqv=~kW*z=bXu?j*-+ ziml8jHf*w0$gK|ImIjo?-;%S8Ij9u9pV*j>$TiwflbCr!>xmhDsT9}yX{9V$* zr|C;JZ-fPUG^vza{JMQ=Rmx+~=i^r??jQz#7PPsP(@(6mqr?Y zxUR!F|2h|K-kzOcjc=(jj%;a@t)@$FZuVQ6#p9f-#9ErQ0e<90)Yzb#aDADy3-A-; z3YX>Yk8&vSh#bb!0>{t_boFtkhmx@m8$V(5JMrZQX~Z06;t`~6AP~LTyf*ZV)=1}0 zC(@*xuo9dhp}?Z=0n$AKtOh@g=u?>V6n7bf7&RwW0`G(y?ug57@4VmtT+F=)kW#zR z*6T3ruZu~7RDJSHAmr~F{j>Xo=k5JED74Nk0$C~3`LV>w6;r7GXc;u%4x4Dmg;rm; z<9?ziw^7CEB37J3xO}J=+f(M&sPRn+N8uE%mQd!s@JyVwy2jH1l$;YjqI1D;u@ArQ zG%)qll45c2nshbj*5ex_>pyt1>kN|6;vP4PS~KX0O#vTLL%gUx`syD0k%TxF;BWO6 zv7-J`WB;5r_uJ1LEUzte{Ij4+xxJ6%*b?k#ixH9`@1tTKVOn9+1%kug6RPsSH8_Em zG6Nx&qn4Dw9Snd(s_ho`yK$K32=K9W$M_!yAHLwp;-lkUK{tL)Qj&&h2JGD?3e^Rw zOUgW09eLqOK`YJ3Bn*5;BRZ~IW#Vnm5Oy+tHQ`3W3x00YjP5(&T$R&j z;A*Y_s1hr|{(m3}%6+Mt1(r|kH!8H2*XKi<8(GurEgV$Ab2!?S0~~6Z(fn(0^{{H61CBpK8HrIV8L6^_3_2zzbJTeA zq$A)5?$mM>uNkI)LaNOBprAA(j&2-*DtAOXh*q5OindX{$CL4em6i;D2V$OYvM0rK85EATsIZl=eNQyI@545zYLDr3^!qz5Q ztF*YV51T7otrV~sxp5bEL<-%j{=duUo5)1R4pBn3`lVp_0(1Kst()~Rnq$nJ@z;+o z67Xq_eV;LeD6w=b9XB+WtmdLh#<#i^t_7CFI8%GyPTr zeIWB!yHL-J2Z8c3flv;m#81#3A6xBlz@HrM#uTDL8Mgye+pnlE`#-aS286!(I$W9j zV7*ZM)^7u&a`jIc)Yf}r8X z0UjSN>+EyVJUlGI#Z^0w1Nb@s;#OzWrzCos0mvCX&swdTo%pt#`74Qo$MC{BQANNs{cL+Xx zQwEePeYl8*@pLgzl$}8DnQN9HHt_jXBNU5BYYxFVuZYtA&n*ROM2c43x7KY(_|KPl zxHk?Yl}FP&J_FBuYW~wJp7m#bAMZ**il6zplYyJdKw{Sy(u8WDooRMIzSsW|;oPyp zT{K*?ATCXSSqmvRUz)Xx!t!t-RCiF?Txk>jG-a$fm4HgTeCaqsvC^H;?_qgJn)9Wm zVCtR%dPmGW94;2XnywaLy5^u5dBHBnxTN~yVn2jz(-bpzPAbCS7)sG`ymRC95;5gQ zL6=oFqKj@i7~!_y}{)_~CX&ShaU z$&S{o*f*f>^_^e5Am~bt9|oncK;!}q{TpZ|h4x8@%ZtoD)?h>Ld!WSi7sReh)n)a306YJesO6D8h=RrtpE+B0$};a zfDZ<~4sBy+J93DucMx#Uo3}bM56qE{3OZc|Xu_eAA0qAmo`n_Z)WH10bs+McIAi(E zH&H1`3ZT&#D9#ijB}jsdKf5`Mw`En(OMybmx75P%Odq6n1Hyi;j}7S{QV#H469IWU zHMrK{bA)&gr3vsgG!XT9`>`Bs@?yt+`{VR*r5JKk_98+Yg$z!4`)q@fJzO;~#7l-7 zsQn&zT=6p6c8mi-;6WC)5^$jt(DwD>p9l9O^@y4wG>Gg3I}o^6KYP(~-Vs&N5k#=J zfw&0gOt})clt!6Gb4#DFZB{}~ss{!TrfMY=l=-aSq_Vm%X%)2g6dDq)rlu8y6PeTh zIc~^5`#{`*UbgEPUZcDW4dShKEn>&l3ZUDWB9ZRt-Uc?|HiLK#QBj$}f0Py$5RA<9 z=TrogH^l>i8l#)iUK$ znXz39W(L+u9=>zCj>}4pVQY&C^inbg^}JDIUi(gRzy@T>8mHUAh>vo5n~Gx!)?yNH zu%gv>*6Tw;a{~xo6Bq7KC5AAD5OLbMjMR_GF36RpXPg$&wI)vRnY7kZo-n9<@j=Zr zh<{TT@N1S@HO{Ssqvq6n6l=d5xr!V6QewUgh zH#@oSVu{mY=C|sgI{jv2)D^zo{ECB#0 zUww7z7IbfhjEWHDAlMCbw%ty0W9E54ow-aqcUtFYY8?~+573ejP3AG%2JjY7FX}VO z=29Q*ar4pM(H)&e8uOD0juNU%XY9s9hg)#YpialR z;af#PidN0j?5)Y5UI5^&wnPY4u#rG@24pSdP)q8r9|mLmMr6Y1Pa@ z-eQ-qeXtiACsBGDIHP=os&zgQ8EZh-Th+Yg&l(9ujd$e9Nd=y`t75;M+<0;p+;7%{ z%ytXN{1o1tCtadDxdR$Y4l_f23UNvGuP5Wf>wV+Xn05dGB(oEsyQrj{khmkA^jq`b zMwIThroREsVP|UNWHMP|ScqdU%pF;P7ygD!X29y76uZ!6BX`ke88&j@isIaAoUSQ2+q|j3kp~4V z2QIfo`2}^sZ{q(uL-0_81R>aMn22EfTR9%lL9mSIpV=xD*7K9`o))kRA<2*8`?}yJ zqkPRI&hc*Nr7Je&M&9{jxtrEpdDG+Hz692R7@TWyHsrMZlK zKyPZ9BaDV6eKHDb;~QOqOH>=_pQe6-Qb-MQf^oMF-OXjl44-sKyHDz!BobrrHQf7; zE4)t>w*2;(;VHkV^UAEs&JG}i6>croaUA162mp1#`&&g0=T#WZ>LcB{*a{vZZbppQ zYb_oC$?&@mb7Ve1`}d+tp;sI912_R6;CaLd&5_enxrB^7iNe%t%P`WSfqGblWjyd_ z?c*PNRCWvk&$E2%=fttne=NG;Ffw8IlQ{OnSDr{Zte16{ng8(NQhK(#J?qR+)bqS% zo0>|v&+Uol%^KxGTT`_hqkuDe=Upuz1JFAEHdYVe4VQno(@1NMFs`f&$Nb^cX(vy( zr|SM6*1j^T%B^j8s~`wcA|YLZpnym>2oloL-QArFqyd+?v6#A zx%S@f_k6?sz2p2izhnSDgEgNy?|ILwW&@~YgL)f4BxD4pct7puTQg^8ZVgPJ^8$9) zhC-FXG=RtGGF@5Bf~yj3^8gQwUo;w<=4(~qQR+OjEx6I(erQrq4dlb+KnmRu2zN^X z(U5?St{TYAiUR62z_u{+=u`kAtzUz2iUf;@txLmvl4O%%;zA&)4}z?(J*pwHK3Bb( z?%8h~XR-=kB6E`~E~$aP`Q@Sbebxzm0lt}Fjo9n|OtZhgqS!!SsVFvoVyPe!m5kFS z)d{fL%a4d~6M85x0V%GZB)%#HOfD7d6MXD3%%#P}E$^yQ^I_UNPz#k; z1zZz?yfJ4~`7zma1htX*;!@w1Hv8Z)D3$t6uG~eiPSZ9GeLN#g_joP8{cWwy$%CS` z0hkfQ^~N7%DjKKC~VD}9@92zq@z?Hmz`>;)-P^7k!|^_ZTN>udxwcQ6b=4K zwm}MklG=C-5O@uIwLnoM1sEdj4L|N$Fq4eHrstxFp$2&V4gnW@wzmia>9q3Br_o9@ zRKbHT>H~1;)#TEUUyvBW=Ic6RA6)gOv_Vw^IRJ3YG5~0ZgsN#XG?K(JD8B|e3KY0> zRuM(v9bNz+EZf{QS4m6iZ@ zPU`=U?MiJ`9ijd3zO234W1|64g&YXsi#CC@@ST?aI9GO~LVZJ9O|aGxnuU`*1gr)8 zR$EIzliz?M<@_AsjZ%36K(Pw;Y6NbQ&gZ^mZ$VjfyX{X#@cJ<0MD1@)cSWQ>qo{$9 z8g%Y&`zn7p(EWCSUO;ODJb)TtFqPA0tykPUs>m+piEeu%igMk+;J39AghAW`087LF zj~^QYapME95{`Xtf*f?yF+HuCqPtd|01#>_n8<*I5P-)9nFAGz4Y{STs$X9MbB5F` z!mgK-V)FqZ89yc(84=2veEA5Q3@GUl4mBr6gi{lAN>k9BJs>ud1kQxMPnCe0ejJuh zM@3K3XwpY?4u{>?M5>~L9{qu~H7C6$*alkQJG^;7=+2J^p*tc4(ymlxMA$7t%?_D+ z=!b#o=^1}1eHc(P!MFN|c{{yxRg5L3MMuPOcvIRLDxr@*MV z@8$wNO9JAMnW>|CZ*0`hS@Ulh1p_&soBZq8C;D-HK003euiU{2r|(l0Ijc#S)6UFf zwybLND4ty(K9DIO&V`7}Y}cozJkf_N-l?^Bm>@3e`ySe*AqcMDU7sk+G&k5Dwm{a0 zh>uP`svHvnM_ccwNiU2Y9%YiH3i{}joAfb&eb|YcAP>+SYK8Kj{I;g5z8g2L=z*z0 zHlRFEhZ5lUvx^c4b2DR^fXB%KGJ`nD9RU1f(pf{T#wKmX9!jdB{l!b$=IbvP$iGwr zeMEK>ty=wWT1VeC97H2m_E#hKMo5hmVK{LID&(rQa-b3G)A&{eeCP+Ds|~s94h$(a zHG_N007=RQ-bNK|V6Zd>DENsx6W3Q9TsZJg#hHD3lw2SaIIIh9}Hw+aO4&?HE$yJ55U|s2s~_dF@5;k4){;4M9l<1 z)APk4_Fst7Kf8!%FaoDB`wOQ5&=QZWHj67bAxV0?0~97Wl*CUIRHOrCc)m)}MtQ#V zM@#^Q>lZ!Tv?2g=2;eVSptuRBejbfF1ZLl#06b6Nao*qx#!=#C1baaZ2+@}ZttH8b z8v4U06yPu%gXk$BG$;W;>_ZF043$s|v>T9EybLbt;Rt5()CD=IVmg@DK*}Tq+#_23 z0d3>iBL785fr=2MJZ~Lh{Qa{CgC;{U6rXX!yN8Digh?Za7=hf=1CDfX&ruh+FVeJT zSnmMzJllq*`hfR805`4h7Hl1Wb@KBXzMFusP$1YuFuhN>8O;*wfE)IO-q$}Kfrjh* z#EdGo_q;EPD4aFL^9%^X3qEi=nmA5PAu1J69<02{MFD$=)Bbd&d5jWRu$gJiC5T-J z99-WE2%gG7Z-%6pCBd=kl`4*ljc@0sRmwYX8jE)0SIsnu@;Zh5$>Lxq$7wBidVh&Q|NR{$UHiLx>P_#OiJ<2kmk7?XUgQ0OqqTH-uuI1O3XYJ2@9nRl+HE zZJJncrMe-11*ie90U?5jYXHzdFLKL3IDt`!;<}Hp|F{53V=OC))2r_vZc@& zz}sR0grf0OECKlMRXGJVhf_)hLLB~Apn@VFp^zJ0GAR9Rh_L@`zMB7BS|A{c>*qSI zZ`QrFjozICX-m6lU5QzKio&O~W}39^%{p@{uajh1Esl@?ik>_l0fN&Y!wL!O{Adx@2_(m;dS%yY3uzR z3cihCJ~{q`SU_^vtw-i>g}RUt9x(TVpV451yFq8pe*pt`p(^Dobpt7wu+T)F@bAgP zzxr(zFmTYG7?bqh25w#v9D%ER|E8%=x}o=w+GtCGj3*CVcrkvB$f0*%2qb7scT{#;&WwKD+d2RHu@&@zS=0zfbWQ&S-fxYcL^@*U8vP~uaIe<-Juv(K zeq~V?z_y?WOXdA1ljzq^g#r-PLCo)elltNlB8FVQe)ccDD0uAx;1v0PrO^DdQc*iX z5r34|uQgxTh|!DtYc~Ei8VDZC8+^XOoBK(>_jOtFh*d!RYp7ltG5vShodi30RGwI0 z_&<~H*N{3I`nMGM(@@TDxpV*gIfy^j*AA$-Fl(M){r;p5L=nUs z`StoP4UOLZ_t$p=V$^yIel6es{4NVKn4M_9lAR2%*>C;(@ccb^?})%KDI9(weDYg) z36Mg(JfB})4<^XPZ?*e>ObgU1uwvxj50U@&B+)d)jQaX(Mvd^R-2JcOzK%lhq(sP( z^zW9vFvOGoItAcjb7*Y+w>Jlbw}`DEOOUej_XVnth>U9bRa!9NhVys-Gl>886(!naic(l0rRv)L>`T!^!+VCUVCWhVSrYsbZiY$1N45H(MIhumHXUaIGc{13 zp>AWWF>0f8335>HJBrtLVmUa7M_kiRKk$yv6pwi9ooXs^d6}-84^w%(50jkC+<_$t z?#?_#NIyD!CM2^%m;D~vS>uNaXnEeBjXqqW*SINmOcGh-RVf!8{Bdg#_qM{5(yP_k zkPOSy?`2DLG(eB^rqN74Pdzgqkt%~`+8&iD^Yxs2a(dhx1&cHe^fT(i2+E*?t~t}{ z7tKtq@t^wh_g6wZe};PH{9XJpuOFWs*DSp-no!&H17{J58m57}KK5Z%e9jw{VOMtb zsa)PehAZ0{=Jatk^t%cf$#}t-Hw^9K-g>Zdw?$4VT}WOUM4HkW3HV{C-X|1)xPyWv zwcTJL5vOS@n?{_wsC}|E%M4?>(K%S@rp97xoJM<_wYc3hXR*BDe6XBqTj+?fTR@F9 zrwg;>Wq_b}v^8vco}}KZd3&wbr`Zlwuc3dF73H*hp;c>}ly!aX@kCNf(t^*Wjqc!Z zpha!mX=g&EC%%qeay(beP~avz$l+iodS-z1!Wlj;rP2dL!dd$M4cTggFT08VP(8;x zvpH`0`JVed-xS|e)~8rzU7p43v`1@AYX?)r4%-{-mI9gLC#Q+3&+$I3Q76J_a%DWu z8v40|0sG!1m6k4xi9Ak$eSG`kMvzYKO4|GK)Fra+Xl+awji7!oGaU`-B_sCQW~Dv3 zYz!&yiCPV0Juo-RDeuE4vfEdbQA}WS$o&rO$@(Al3$ffO=n?I}_@5goB&ug{}{96mLf__KWJ2_hK$lg%RU#{3Mbi|!{HePtWu17zw zioQIS@1jfTAq8Xb{DzsO7Qo~_MFEnmV3pDZ26?b?tE&_YZ+~*aKR2lB z3*UuRDRF`~%v^5n^@xGMIk-nNyk2}*OTBLD|?|tDV&z#_a z`9@r?|MIcJeu>lr@(kr!ptfHR(DD80Kp^wHh-uGng>0SyY!4ci ze_`d=*mI4Kg=yu^d-3SHGPOc(P5WNeYZRry^xjv#wX2!)-^Tu2lDEz&ln>KUPUd%L zG3p~~*8NoLGE1Ap?b-bO`4RDa(hJkrt;uq!n{izS?gV_4R)Cmk>=j*J!^y|63uZJF z@~^hzD+z6qXAg4&(#oIOBhSomJ4!Uwy9RlYyl=UZ#lPMq_M39Q8E=qC=i>~x6fhtC z*Q5;UFM3567{D1fw(9zM=xU>~(h|)xwk+_|37^7Ccx{+^}iC@ zBm4lbGyIttQht-*KlZR(J3~1La|`F*u%xP?+F2i(dgD)COWE^-)wqc5j94%C!>-8= zJFR|_aVZ*a9gHKL0IC_{P=ODJpOFsFA3w;De6zZdDnbL2Fzw|wNQN!pYUfjP=MnhQ zTHk6^7cH6>CW2ZHKaub_b!=SND%(cKGm?Q|h2j{&0o^E)aZ@ZP<2vixw}*!}4YsE- z0Pty1Z9IGJ1NU-JEvn&_$b9*+WY_=*CoX+DFPvASu1g-@Jz;f&F!T@LT-;QmV64#s zZHK3|l<^iKP+-<{p!c`!aoz{9e1rd@=Fo2~@Q2)jvMlAeH4@NPn#!)-SUJHCXuOxI zZ3p^Qst$dm#SDcedS%+{8I$=5t4GyoWz!$u%+cP@w-%OJr7C4f7EM_hEWLH?BXSY<`EFj_i9pz1^mx z{z2)H`L0drfPiF-bZ&V2l{@1@E8k{!l2FgAtVoG#k0*~0Ljx=`DJfDlGH&WmA>S{o z9|Ax%mVj3P?_MO`A~XsVUQh2yX2o0|desLlvOblP)OAf$ML2@vX2c&4VdCNB_2N54 zy3@vJ(P!@LKRg#MUEesgYkXR3M}E=euP4wVy2yIZ!NY}hm4u3tf+Vbg|BR5iqi6i2 z-<2&D=o_#`TBfp_9x|(Nb;3$3Gnx+%1#d0f2t4JoMSdF^D#PLptyvdiE8nLj1OmUxz z28e)tU|}d;&lEIT9+j4&kerygh35yE4M3U1Bh#J%VztKm$^^*fIhX>YP)Ycll*AfY zTfw~_b`lbqIWc+}kP+!8VRyoV1}wWV&I65_1e7=cBjA#!JDf zAc#B~AZ;C>xvC=B-w}7%`>JngpsjP6JEK%y`<*6p|1h6*2G1HOz`*z}vD&-mJ-|wJ zd>8Q6?)lD+Fo2gaSpEogg+cI2!9 zn4(dW<7AA3Ndk1noW}D5i54AZkEnI5$0s|SZN=nlrNmFYr->f^6>%K<+d`qBMI1Qj zZ2vxR44VqMP0Xi%V7J8IY!Y-lNqu&+gQ8w$Q1l(@TZ{%5YbpLfy+1w`lPcFHm& z=!zs5b2L8m2zzI+$p0aeQ|7OMjGHM{d2(Urdw-E)Vt{$@OnN4FH&sLfm{OcwGeio> zpnuI)cHjh=uAF-`MyH}%@GiOL@0pqm91g&=DdTZO%8+nfZza<_d}y8JRriKzcP;f? zYU?HvVp=r4yuX4~m%94LEpRKyghc6Yr#s(7L~o4zE6%(d9hHDtK}iRar@h!?aua=p zf+^V+!SAs5L!UlMu%(J>WP6kW*=rXh*FF<0ggpZyhiJip8+w2r$36&Y*#N{%k7WF_ z8;sMtTR(j;0OaU$H$aA*h=?$~wtlUuU|c$G#z9KmeJ6=-+wiolm>&+1>hbTn8foJ za2&?!u4|WDT7qP!V_$1!Z?2#jSDIM^vKIxDtn5PYV$=Ryf0ouULwRd;x!&VPj?WDZkXk;x z%2+!;ScwbeOpwuvs|)M+_KiDb${12p{dlW90#FQ0b+%pRO$8Q3yBnX{#l4yfE;h5V z$ORZynYCi+hXreHf*~m?H>L1_C^wjePs2BXu{Pi$!iDoQfQwS&u0nzcn6*C`yh7`i z;lxYOd;s0Fc8p*h#~7DN8hWgqyzVA<<*iZ$?6&(3I=U4d=I2XPRDB8@Dvkv<8fsR8 zWgIRd1+(VEk+T+VOm`XPi=Q-~M2o$AQ%{V9q@W;aE{d&$q}1`p{fF6)vxr-TTC=Q> zZ$49z`cb@=baj1Q19MGxPcq&C0j8&&#Eq%vS97kWrgRxe{EjF6tfm&q_EiR-r{7Ay ze)v?ve5j)l-Qq ztE2}?)xF08ZJu%djPKj~bkFEy&h6_D#tSsVH%5y)cTXQrH5&64X}1Jk^Pe*>_s7M+ zi=N{>i<0~12q$1DwsBuKUFZ>SG=;0hZ_QOi-eCUN$A!8V!ereYOjX5m=%nJM2^L&2 zvdR6zwOJ4Crv1FNn;kGn`J7_=)A^;i9rPl)zU{uGd@Lw0_61@)hVDIoJTMi0DR3zc z0uXW*zoAWLKCwJ1P_b27SZOeeto2&f7-R2Z$&g=epE^!iZwr+ANn^RSY_L=3b%BXCz?;fy%8~CTcm9sEWy{bK?!%}ngW^st z*7LLb9I{jwx|}`rm30lG?Apse%hUAbQugL0!<7Hg^NP$=1+F-Ih0w zL8OCAx?zMO2>ozp>gs+`kNq7M7STPn=jpINgxBNZNgGectxVGUn3%)bc(P6;9KDzR zbiY<2=|!L+8P6WtK!wkBQz(cwkVzcgvmszgOgL^aIGPNwHOKhwQ0u}hDDn!O;79k! zs18<`wb})&>%jBo`p)ZwfOM5mXC&r&qbwe=%GI4=K71@vftTWu=^sm)&n2rKseB{L zhkp$8Z<0Qu`#BJiM#w8pkGJ{b{P2Kq<@BB*_Y~&tcc!tv5x({dS%-AhgS|6)oyzBt z;Up2?xYu#r2-?$JGbwp{Lz9nLkC9nrv$%o}Y;29S&tX0TUl!Bmw&hoq>FQh;f6^yI ziJxGRa7jF}`7RWxIuG!`Cg5T_cf+JItP=q0|9EA2ATDI9a8P$G)E#|`RXS$%st!He*VhT zmR-z>u&en%1Jsy|&li}sYR z>FL9(36woc&QFUrE57wt$EUB;VVH+C>)btwl>S7DuBuN8j7lDHDhYO;mg5zg9`2H? z_Gc)<1kNySBUL@5FCJJryVhy4i76-!E2m7rBH_!=S4j3Tv`_X|FtOS=iSA$509rmL z_4R;)kA{a@%H>81*_ny3vUe3{1ICrkaHeDJv=rye7p@PnxUlYSkUNI)-~`BUXSF~@ zk_0I^`&0SF$$d_$@^3qB zJk1x4cz0k?+Vxl}xgiv#J{b`aR|F9={>Fy89y6UryVoQ3>cjWRMl>#t59beSYzf5e>^^EI#=tE-8; zc6*sL4!kPM;UTJzi37LKo3I0-UFQ709WBYRLdqX?Kr2BqVkh0!GiG2c*iWbo_EEz6 zsRPNz!(%~BoAt!XWX&Bw4lQmIzrWs3Q~WbDk@a|W(&_RnX%wrlE$QNZniqH(y?eKkbL$?S5X7A+O$5coY}aJQ+3kAJR~j9HAfCf$(bm|CZ62#TYLRqQ?lix1MV zxMEv2I7pmLLbsM7_ml|F9we99V^i6`14pI(fZ{myh7cdqr0>e#WI%n5D?M87k&c^X zH@dwE9NT7WIK(K2K;lh~tgr~&HFK9%F7umIJY`;F`%B$vg7iv>4gsw5yAC^Wa-U4W zGeQx%;8!;ZI%W&vbL<4qE{?qXuhp9(yi+&e*GLsnMbmE`rfV#--`zJM6|x}_Tt}%Y zwFt5lL_G;78p#6N{5nO;rYCuXDiTWzikMK^l=3sbxJjlcB|xBv=pNR(Ivj5*Q^Wmo zA_$m@c_1Gxxejm~r!stsvNCe}FrNeqy|6r89F9QxDi5cem>9K2&t;T=A69a+S)RO1 z3m?E+x{3~}%6pydX?`>jLV4UIC+2lW{l=t3pMmf^!sxw*kJ}rt*KGv;otMw zCM{p7Mb@sk8H>CH3As=qGPAGd-vf5;~vZL-uFf`E8YhPG_$@U%T+@Tj5 z^K3CYBC~{|gWdB|Ur_svAl9mbrz7XMv01+0&0`Hb=U1gtHHK#Vg0WD@==H5l5jfpox0o%uh^aD0w?)2wOp*KtC%y-HQQ?@zhN5HG=k!& zY{=cP^0`kd;PMIhSpyQ&MD33Q)bfg1sevDKxXooNHgT<0K2hS^y51mg;_xO-c-8mI z#dMoAx}Z~BS%!~HgUeQ#;G5Gpb#jl`Rn~@UgGiEmFBYgwE$ljaBbO+{Zk4P80}LKE zBMJtw_3;h=X>4Y0GJE+)bY;Qn2Sn-e_Tq24e1%{Tw+NO~qeI$_*;dFC%=+~+X77*A zo7G&j?Y1mM8RjO@G?Dweco3Avg2hG6&#tHAB*Pfw$05hl-6`5S)f^@pU1fRG;+C?| z-Memb_Eh0@d0I8XnrBGo`)ODLt|Nj`;@R-sF-bq2K9&tpY>s95FNbt%89-^XCy{-G})5_~f#2i)?J%RU0Mrz(Wlf&wttT*OxeY0pZno?&8B5b)8s1c zPvVf7cFqk>*i~Wv>DN21!?=+GKh;LZd5#no4BTgM(vk3H)EQF1Xgmmk- zf8Jps?fKWbOhrERo*7j=3#5>?l4K)r_Rg#d`o3?$q{Agvip*iIm5^VCr`3?c;(6x(dCfp3Pl%+J2=2>S74kpznCr=Ha2|PVmkS` zRtu9^^6b#gIx zGQF^I;iGDei@<){Itpv}@WiWI3)SDWN?!`B#Q;+feL=E@2}L1HlQePu`J|~8b@MED zbW<D!1aG|iz?stc-Yc`oUh6P~qAGcEMaJKI`nfJVQ!U1aXMEAn{| zxAJL;x+|TZ9nEeAw@aVHyleiQcjIoR(o$_EWWJp~wP&r#AQd7~QMp|#@m3S>bV1%u zpBpXxEXzUFSH@|VAvk$h&F6Xk9uH3y!6eHEKb?$Z4BpNJ4?`C=R#&Q!G>a#KsgaskK|6I$q;!yYMzUAIaPhMaIa_bDUTuuCez$ z(qP(XJQ_8Ze(FAwwj8~EgvW|S=l2{{=uS}6?xD7?ZOTotPMJi}lbV^d$Lo{vGR}A; zC7-$ojPxRiq4%wQS59NSABK2`^05gl^z)k#r=qrzADFC$6W0`Myi9ksx@kOSG|Cy> zzPHNzSZRiGQ>^)>4o0g^NBZnX&V6lNI9^A+uk<^PmI`br({~vvJ2up|70;l9Jpoav zn#h&Z2#hHk;#yzDLnzUpsDN=-vExVWDGWWSjP*Ne7Nhn@qa>!XEmw&SPe#shFUnQH z%P$w(k$d9fwfXr+2Fy@YGM#SBKB=C&8_jx`p%kYwb&ppiO}eM>v;A;`;5Yorf6w zeTBhSHPj=NAM7of5nXT6ZkCNJSGjI3h6}q7^Qa?tXKI~(ty~GGSFb8m zqW{IFh+zYDM@F++UFAy@e*D43@eV4p{;h}5lBF+}j;*)Ezhz6Ohkc-xd1;;D^CmjX z*m#3cZ2Q!Ko^~QLAe)< zEFO#OhwoiV-mU^+qbE4t7_ab;To%nGKW>Up?m{BTz!nBEBeh8Kmy2tAg_U@EFl zYezCu%jozlY-mFqk=?Ps;YRMgyx;gX(l$thOysUpCPP>2DHU}Z+$w7VEp}o8E1AV7Bw^pb^ zL%-e#UFM(bJ+?8`;o0An6t}G8VJcpM8|nUJ2}vmBx!xLcjwTHM^if$z-;?e8IwKpFU`;;DrRFzF>ybi^jnWV(Y8t z=6zv|->3g{-p<8kwkF1>s<6!xJ@tfZs*(~JW zA1wD}uWX&3X^$WcZ;bBdc4Cc>P1b?dnJA2+kQHsY;rO9#V-WgL!`)_red71r953V8 zU7ApqTg%y5~PzY6@wj zz4jzT3yp7fjOR^>8Z3_8*#@OP|( zTt?~Gz|GlA$XCOB9?y432-=!H|~=c zJSnY>ODgI!8*jLgAU7y-x5|vTQ{EcJjh<)kpX@CTO%Y}7sEHW_J51pw*Ujl;wq+mI zBgP~ybzfmV6CeNQa}}GG0!ZpKP?|OK)I+4<#X;#sz8A|jzh$9-LbB4kx{i^+fNGa2`Qp$DFQQTG z{yI)&$=;5Ww_{XXLiGnDWUa%*!?$TxgzJbRK0-_!4d@K3lG~8OcIHaA{TFx<7!>?x zwN~4`e5ppRS!YT3d1(=`ulVbm*+s1lgouMMHf(Sp&ZOI_hi10ee7a_%y<~wV1KJXb zK1rEvDeREMof5YMJSYvz+t}&iKE+UJ{a1Q&1I_J>pOdw7Zq5#+-wTEDxUQd3E%!$E zNf#d->NgGw$DsP-1W>LDE3qHT8`UOv_h;x(kAE%{wxXAbY3?lxtc~6PfQ4r%l%iktj0VoqA%}L!I^L#m53F;LbK( z(WmoDis$&`f3&_Iq??gMhlG@@X!yGR8us2x*a}or@|{zq+6(#~w*7mi0GZZ%5Ku-h zX503)!Q_E~0+SZkg}wCa)f*i=py>ca3Ts>j>(M9dxs$PL{hI{$4O=!i_FIgkKsGdzCHcpjgc@BS0GRsZ`bNf=E zjs>?BkB(4;H#}UcVN{HtY%$%c72EV)NrkN^=&t5(l@3Cd1viUiVoqQknUtpi;OdZc zDXw!=VXlRvz!A}ZhZC@o7Vq79mTs1NRco-Xth+$qXSar=uZFkYgWsK+YVO`s8Q~1j zEn2DOf)BNfOiAK<)$YyQvkLi%iYx;9pRfZ~8P%i(ui1uQ+iEayFdk60RTL$^seAFb zsCI8_rYb?~fU3sjgi8mtji+e%cz4+{F_CN11L=D{bB($AEDWF>na~JDL0YKAc+=+7 zSboj0s^_FDPFF@VB zc}7UpQJR0MmQ3?)eKWQt0Cn%{GetFC9Iz^7+1+X}=v|`O?)alb@G?c^joAHLgU28;65Z>^XJ;mVzqUlG-8HDx#HZ4FJ(Ta*VRh51S7MVOOyciq(`#O|6jl>RZi z_bJ)?Jh?-j*PiE%))m^)+lko7Cj&GuV;R1UO-?4)xhx#{PfvtbGD7Y)T1Ia)sfoSf zZ9HkRsc;V@`|(N2=e>mO1A_P>we~SjgP-;>V$Nngax5vdcTcvB7@K#deWHRQ2n))c zV$@BNrgmw`$82X_8cr@!rQ!5|hab%yWqF^zWLwa!AGeN7H1<=uIb|ExX5+4aEkB}@9`JqsQI z3E{MQ45ga5q^Kh<+?YyM$@{($!W@wwYH-8)4pR8u1m`Q)$(e3DvWM)qqPEHYL^Zlg z+9Ea3I!621g}Ey0^D6^czIW9TIK+;eQ}clmCs-L&dy}=cINCS@4KD_f@%I+LC*k*b zmi8y~{Wz!klS!MgW7#yNLlwt0JtS@m`B#@)Ho*%bJH@Y`s*;1V@_9FT- zl782MV*jz5G}N}}9->1}SU8FGhTs0D_sK1frhF0xxh6}Zy~>9&*>`X9k9TIi4tb5Ii!eTI z<4Mj#-zYWf;xAd6lCfF4=wjEWpg>v(7{q#m{u==KH!Ar~9EpvdQ2B?q%>6M{N>ra> zkFJn;yQ=DJZf}Vn1-{1a zjP}v2+5Xt4>H4IAyj0$t2X3$%{t}xb#Mz}F*#y4jJ$!q?a$7@T!S8%qVT9jXba!OC z{&7#ua9(-M()8;~&LoI+cZYlHWh!>N!%Nl?d9HnbOnn6>csy z8~=Fi?_|3EApX_5e9z(Q2kg_vbhpx4bSp3WoLr za}cBln|U#&%F!-u##~qPpc}G&KUyMA50d@nd{Z|++Q)jW#cZ2-ZGHXRk1-74Q2e3HHA0)iberJmnQ$&00595&mwo>u?XmCP;71qd z`kUs4#egSXYd>(Nstq~GTbkcvlJSTEHCx4o!H04j!u3XCj7fdv{?;Xx7PBR*h*p87 zIr*_`r?^hJi*%Jjty~+KA%iT$*>Vei(0Ou&J^4UGqtAIGTZH{ zzN=mJT5D0raviMQtLC}*3c+*2DRxgZM5Y_cl1QIsEAAukCzVhyEO{~=tveaQF)Ht#wbg6%d}t6-h4bL{PQ1$vq?U0SiX z1T%dx`WEB)FEKXw&3?{rvU3_&TF?5uj(f5}-0ET)LFcGb`RcGC{Lkv)Ss_z)-q6~u zn471XQG>^E5+ll{M|+Dk7AEkY%8~JB<)DpN=ps_wbq|qCwBt#lZgw7IEkt*1$w9w^ zt5`Bl$gNF%wPV+#7@Tx~5ntfW`PTcV+b9nQS^fG~w+L8W*(bZ{2<;6{Uyjc`buE`Q785%A{f5`4sn@se;&d^N$zdQ_?4P(%b!Y> z77(;WGvq=kzP}+sP{*|9NcO}m_v$*?A2^)OwHG3b@EtC0;kz|H2+%h+yy%57N+tRB zlxsrS3FxD8Mp+{j&&#vDJ}_Uw$uABz$%~$o_)n%rqK{){8OgMXsm*!S1bI>Rk*4n z`B`T=8~VBFhWguRe(3@WjDUJ#)G{G5aeS=exOLKzK>fu8{T(_7`7eLf6!fl}44JbC z5MsNb2*D<{6231uXuo@fj%pIx!xeT_NM+oQ^6HB75C=xLj@B;PfV6GnZksL~*vh z#`EQ9Cv$BIPk&4w@o~stW6n~j@tS*7s-&c#Q_+{ zWiy2MFbO-rw%0pG1O0bF%%3V5p@%qM4PpU~18Rk#{A8IUXSfIR4r-g=)rIe2#V2cgz+ZcJC9oma{gzyV9+G}oi=4ns z@>mh*g1?>{rq#>Hw30!yU1Rbw3=1Q^9f+m4l zP8dP%s;s`tIoI`ja8O0TEB0c@Jq4*u2%i{doF;aP5nO}*t9v*OB$(!td<{*hg2G3D0Oqdv1o?z5VMO9+#uq9?B^IfplReH9Z^&ml0*Q?m=r;%QAKT%_YuTmsxC?`f`FC0R5m78miLy~Nt z12XTzoWiU3;7%X^JHzT$X~PRoCrj~3n*oEn=Vu~=k&p*dHjVQ(8(ggtK{0Nud$2}& z!hUO#r8)>-65Z~6T?(tSks?7=?jwN>+)d4$J|<>GOaVyy&gZ=8>FI|lEyOe?ex;=1 zo*BsXoW|h8_Exqf=dx3X@A}Bydx)}-;|0zpZrww80(5Dg?qA}^XN2GS z*`1cwe6d1@L~nXRe!KDLQ_7A-|NesaQr##u(bCAKtq>Z;H38#f$>E{HXbU?Zv* zUyJx;h?rA_gc22|cJ;Ul@884>h)rKj=nuhkbAUgf33J`Wiz!_Y`#eIJjBaJKe~0lw zGPK@LNi!j`@BYeAj-&|4yF}rHg@NlA9;LhAJrH6o03s1ImbQ@js z6|L@^mJ)@J&QmTzVYidUS8PC`rsCx$)RwQZtkd&>8+S5N@V*$f&H0F-(x*#Of%C^m z=xvAi&vM9plJhox}w-h!#FZQB|R0fGe&kl+?baCZp= z3GQwIg1hU&-3ji&-Cb9Jpuyc2?(WVv*?XV+)w$>7z4rrN6;%`z%r)njWAxESZ>_iX zGdQKKm0PxZeysTTOHWb1-xDm^m{ya0QpMXWge_uMo+0J1r~uxw>t; zT$M25*0dCo^I$BauW&!x9$^pzCPTIvY6}h94$mj|l;&j_Kr}fH6{-+gqrSUCnPDrA ze79E1)qOTNHhOh;BI3)&no~^vXcqsYyqa&zt!lQb;z}7)ERPx!mSa?;4-v~b^aiRB z+SJAAUkg6RV)SA}B`R-o;K>&dh|TnUNofDE5;S}`AYiLAkFI*Yha+1ewUgm=2Htyd(hKDcUdc4 zp5sGuMa`3*%n zAWf8`qV3+lqzJwPB&g(X?M+Ip?JQB^h}{A1?CzpdPQOIVUTK&Gph~Xb}PX)@C&-tuI`JppER#yr*<1-5HbO%T=?B zOe|eLdwo>txCyY0Q7ifs=sEhGpUcc{2{Dcs-{j7LV8CNacjV;uViQy1wM%|R(n;F+ zSA}$#@1ecaQ9K>Y^*jSciBB;n-JI-BNJKJ`R()TGirr7REMJ1|vIzl}6~!xW2H;6k zAM02YPYzF6>@EdimFUvvn@{nH23$rX7f+9UnhV!psu9iA*(^p&(3M*9Q#DTb^l9II zoE!L%45OQ7Rb%gF=l*LdHG+%M(1&5pX!UJG$w}7LuX;Mucp^z0Sc)|`R;kkM&e1Kl%KWw zJ#=xIJ?zYU+Vu7_#xVaKOdPFN2Yp-c=LTCD_zsVXrDG^Tw6>LIS=|51W_|u;H0ckq z_FmCDe`0rnNg(M+;aA_HPfXydaXWN1AU#haC;Y~0K9ce*)&|x*!2UFr*Bl4%eZCP4 zThP^z*z9d3%k6N7+-eo^*(tR3qS z?EQ;wDv35}IlKhjkC@sv{CoYkv}6ZM+6^>U#}PQPG>O+^F*)hobRXFE!W9>Q&M)AR zDxqDS978s748PYvKYWM~>XAv!;d>BO+Zhy&Fs3mkTl*^1WME$EK&oNk54}@$wpW@| zkf6D^=SdM^BZc<3d8d=O-XN*8@ee}Nq@G@8xR@4%uFYhlv5pW+2;z>9l$`!$c9yvR z$pr22ujTIzRC<8LAMj;;ZMJ%A8vs5O>nfTS(!|Kss=r-+qeG$DWvLNX8D^Rdj{%pe!{)CwZ(E<>~MuWndWUHH?T2ghEK470IcPzQ~ad- zUFA*_&)My7D?5|T5{Qy)F5F>eUgQ)zk%osIIz$>R;{3r5uUQ#(M!!u3ej3|-#p!vU zPXh>Lk;w!;q+%N91RVRHx$KV_ycZbszB-vP16ltv1n^068F;NMBM^YcdQU5}Gq69< zwJj^AKGG`nyR{PzEJjSs-DnKZzqMZfazL9|SrzO#W;*J6C7`+%Jcs~!JZ64qHLpeN z|IW(Xda+r2Yoj_%KeM}_Q=-u^TROk{T0e#uA9*%&Z^!lkT_`+CP%OR1 z+-*7)K&sAA9hFy^Y)XDu&w+D`4(%NfCFJcqEie!JYk^C;Y&cZopT6E~K~stjoVLLX zA~qk&UfIpJyxw*U5JVH4%kkRs@L ze6SpD-qZNJeV>FxrJ7-{8>zpPvR1L75PAA)ZESPR%fl{4X1Au$*cfaXVbiLLR!148 z2pEe*&)hE(S7Q1};de%}YM-ZHPYDmF@yjf_zOD^e{^^nSQEQH63Pc=vBOP$&CjP@6 z-Lk*_3qnWUT zCe|YnL!S`?UAMJfCGs*JOFH8hl;uDdyHF)PpYa$;Iw!irzH-C?wqO2mvdp+gO5}Qf zoY7u;=}bt4W5p@+ep)0@C-9rcc?(eA2ea3rMGIgs?MRS!7Cl8hMhUCtAykMj`QoEo7DMaA9E8i514dVoQI@}>s2VE)Wmynp=P=E=z)pl z+!jOk z_(pZSp71T9-5DY_Zs3_=`nTy4O}tU#;iZR`;0=AOZ0!uz?FV z=%-F(Q`u5D>dw8bB_if@KsT?jqqCN{#b%uC_BdNid1;e_9rAl-jIm|F&5|5RabW_s zb;w)UcNThYlb{*7hUO84U}kVU7~haE4}#A>_r383|8a&KUi7o$%%0hj+a%He%W+_6 zbs^c-${IMsd7lKAkxFE9vP>-rccOenAy%u*?-qH3qS_s?YEekyk$CAm|x>NTdBqFeOj6Kxqf~F*8tKXg<-+?k4LTTsfj`d?5RoeK{7&nSxQj9 z_)o&|WaiDdC0^~b8ntaeaP3@V>rq+RQICW$mCGUd78L}k(k3!auV7;2iQc~4-k-~W`d#X!`uRG4AdhPNFRwC>qv~(5|d9aZ*l%o$rZ-Ll#qLM1fLj4hOsdlrRS{TzfHnp;s zw#i-CJE02mmu8>}x?F-@vC?#pSfDSb$fx4^?CO-&ef>etbo%lmpuLtBQB($?SeH$! z1s!*Xv@)vhcV|aR=bh_1p#FMHzd<61Lp#UZ<6(3Eh&F}N0_f8{~7{1$k>SBK2Wb)c& zI(O@9vAZgqO8~9Nd$I4o^3xl<4JIU$EYA=kyGcXlNr(3vk#_pyiWQp;vA|qONDyMZ zI=9g)t&76#+7$4N`YMi&l$=vi^|!30MIE~k?AYT>Ojvc}@HZ-H;0N3?cNQGy3TsaD zpDBDukB(;S6^@I(rLWC!r1@dodO9qSD$DGXCkbT-kLRwM z2BlS0efNL)adCuY=!-^DgC7cH{y&d1Q)y-P0#(er}QJ21%JsnahiP^4mTa(IBGMNGz}LYP4B% z#OmRbDFkx&OU^p+4Y!@j?hAK4LN0s1r3dTO4lGoF5doAa?e;qmt(w_n?$A>*iCVc` zQ(1B5#aL zTOjfM-nk2l{^rZL_tr;KE5bye7Y~Sg`qgCwHxWU=92?9qE;y&GPOre=@)4VOO6Mu{ zIS@){7-3x4wQgMb(L<>PVt;_R+1Vw$EFyVYVlwya8lQ*W6>2A^HLYYVGp zmv0GfU)3u1ROrf7h4lTI)!8=Syn5~&kN=s*uI<9^?N8HH1Ea$K-;Zw?Y#-D!tcTb2S_sN!=0m%rN2-J=^N%9@ZELz zrmG5voyNot_g1pI$DWb3k5?7<&P8fuffyj)%R}x@ol6n3-~1@0Q_y7}ef$9ea0l!M zoTpb=X|t$~OuW0y=Ld}OO93nt0JXR~-jWE%j%zMv2H_cirFTWO4ztp-KfLGP_Ea#+ zr0SoQ@f09F4}Vt@I#sG9Y*A7Nj>&vX$58lw=6Ti{;zw*wc`|Ju`F_^f-%Lk z@UgBrXR)Jcy%eXW()<91)y`RCnMo}}LKMCHEaJ{@fUCPEru5hbI-aw7!zEL$0%S-4 zFl%?lvCH{0ze7s$qN_vo=rNln)bKj9(x#Kk+<=@u zS2`{k1!$5b{;8KaMD7W@_nLc$P1QZhd=7kDvL;xB359dx?VP|REFNm_Q_*~FER9Ad zg%kEau!bRq3OLQ#AwZ01u{@ej6B8Hp0A_`{<;%!QGGGObdtpdjn)D8jgNFR)>X{OS zbu!8?QnUhSTKN>4NJ8$ipx$Kb@ET;oA_0HI^^LBC-!&fN%E|ov-Rj5x&`gwUV^jmZ znvdutoD%3eLs|NaJM#{(`v6ZWY`1Ji<>)H_dl(Hso?lj)%GsML6uCLupg{9{hEixX zD&r{_ljNFTp+@5({r;C00PmMv@izw4;zq`>k-AcsBg1RS#9_9BT`mj0Z$~Qxr25mC z@gAHm_$lIuvKklQ0Kiedc!@Y{Lx^Rgg)VFRT3W>}x~JxYpK#z6f9j9#U&E$1g&TZ$ z=swx1UktlyCu)49W#R9J$)k!pbm(8Ae2w=qfFW3nn4>f)I;${IRjJ|=-$k!k z^Go!&C^7Pe-P_r6ptC#%K7i+>jYqbU$Ljl+tfk7NYL9wsr{e{3_`rG8_N7Yec|AfI z^_(vr6i-Ua9=|{1I4g9B6XH}G>R3sj)@>(;(>8aw9Jpc4Ze+OdFl7lCh6AXHaLMOH zG3_2+%|$MyLXWgvM)v7PeKIlf)AoS@%Tk=-{sii-6cs}DY?o8G7&Bri-5BX3s3X-= zB0L`4sp7)*&K0sAlgkC+nhf?F<14P={M&~yUGFkS>TBY4#dRqrF_*QD_=DzHSQlO+ zVpMK{x4O|So|`km+SaQ3J?`nGF1B;sNX^WKkA!DlPrk&9%{_a3>-bT|++fZxUEYM_ zI~P-YLdnK!M$^;_5!ZE8mS>-i2)dP%t{4TrG~=T30dh(qC!0}W_UGG->h-$RgK1ZA zSahinD}?|tJ@~*J*ny@CT+{H6%TrMo-l?2bnD+kfJxj9|n=f~Z3e6aN7)Sxxa9^C& zBi+UK(7(Mq&wrnDPUXFA0khcg*PjGEbRR+j3+BwyLzG`u2%&jxeZ6MS8X%eYu? zGFQ8tQY_q_364>$`2w}eyiH)Rfv@V*>r*jbZI~$~$<;-{Hn3-%E6%ucaPxKy@0{P< zZ{#R(x{@WB9dbXIV!a%dC-mjpo|04gro3qUD)eQo{a4s4=MXZvj7 zYqy!xI5U%u93pq0Md>|wCv9PTbZr_&t`lw1b&1(j3hkY~oEGp~h46HwkgcenRUUVRV4u!<-Hg*$8xJk;PuSfz zuSH}jZg{(?CHxl8zj4!tv)c9>a*sxM^1ZV^5i@&V<>J$q(A?i;-gFL>tI&bUwv0oZ zR9I({+W;E}h&i3tEkfq=TgyJ7RuUG?2i58Mon0)aalMbH?c5sKa}PWT20s?vF2^TK zs!L8@>$yFp6ZEzf_7uANfo=w6_#pW+3?Q8@#903euSIv)&ZE#r7^E`RG zHO$Q^QNps)&8hH|`tgk4%0-R6G~?v(J$05u^FV}-10uAsK6zDl=#nqfGgZ;0lX73i1D^bjA!x?#K*J9RJq)Pf6}Akd!#_{m+t4Iv3~&d z-Qajw)UbwmCa=V%pnP~&xm2~2wX)!?8eo2 z*#lHOq9PqF0*g|dmi>)YkGU`x7Et%gd{I7KRpM%*9TS}|L^I{pksHFz7lawFZ(h0N zwXo)LNfQq5!|3`+UF$K?>Quhc7KN*tON9+B?c%cY{!Hn7_b_Zc9l?96tX0XNl}rm` zW~5#^p?e#W?UG>-D$>3%=a8~JZUa$8D)(NUF4T%6EW-O-(a9a+-wX76!T??30y*2) zb7y>^ubjWqy=q1$c}Z)$C#cJ6dCrPaj54mGiZ$3pKJNhyxzMm4wXyEfI|r~$MOk+= zKe^+3NWw_$)1a(_HNXp3pb!0Ed9$MS1NJkGhbPyGhFRFFezE}bZyc5FoViC=!Rc#wl~hf9gW5@j=-0)>Nh#Bh-IM{6&{S zfMdbq_%y*w5(E6yT7S6?y{isnDlPo385u3+ncf68ASdZWc2-Q2Z;#@>bN@PVOpw5>pYUc%turL!+P6- zPJN9Yc$TA`QyXLvG!cAmgqOP3BbaXXZgJfnw{gNWVJ@pr5WaxQo8oE#G?5TKUdS=G zz|ZuUMfIcy&O=6>BLkmzn&trc;yt$_ti=-$g^0@|Z^-RaEoQlw=KAJ>-k95c<2a)_ zW!*?m(GF{H#G$>x3lCehYOk(=S13ctkuI1{OY?X#XCfk{rDh~@~<108IySXYPXB?LFBjHB zm)>qTEHr>*{EIVoi%i38h=vvbh%A^m%-wVAAz?^un0+BxQytdF4Aa2-rR*g{?ZQ5;7j zmk3A%_SN_LMlfP0HxyE(skLJ2r}&7P0|DV9FasmSuX zThujJUS{0T$bnh;B!XgJIU^ax=3vTYvl@#S0)Xu-=QS62^&>Np;E74)6pjoF7tm*O z^5>K6dybamaTgIkHrKp6-Z&;eVICmbq5H5N zTK=>piXpP_3UxBejVhMpjKerze?iyAZen|wrWpN0iN~U;V!b1)65ZnpD2YwKEwW4d zfdS1#f}g!yH8t%X3@OQu9W)|_t~Eb_$Lh_Pey!|5GxNx0mo6MXh3>^aH&Nk%ES}6# zh*DQJieKyvbaasNU?=fq9Q#Hpi73JxxR%)PQpuH|wE_i^BD|7ywt}l4Tdp46NXW@( zF9fS74%OaO^pL;Py>FSg{diWn(PI-B^Ta-G!=umGVp&?$FK%`Et*?Z|lSWMh;_(#P z6zH%%$6BO1+~T@0=Ovs2x$eCYmUz5;7ubt;WXb8FlVSQ8q)!A#MO3xXirRgLzC3^o z2T*koIxLY$SC(-L4!L;nZ{TUbkB3J5NnVcA>tu9h%H_X$o;-4T$1k&+J9ps9k5oA< zpwx`|Wx%6zwJG=T>~D)vC(EQKi*=a6oUV=I4b5+IM%@xq zrf})}^5|Z;w7|Ax(Hwor2>iM6o~p`-tqlOZ+u?~I;BU`KGO4j_eKJH=wFI?@X${Bw zI;1z~StfP-N`OVuqv_o<`dKCD&s}H|bIWp{CbNw;Okimz#c`C8Mq_`aWiSz4;b?Xf zl|PuwR@2Ma6mi@1bwxgTz%|U#LZe<5d;Nm9Y_#K$V z>Zkmf0zhcN;;a2S(4$juZ3&}00WFFubARf|7rz{ZhDVz2zPj>5pSjNdb-jxW^oqfu zh>o{rz3YWY+CUSazfd%0BmjS7am&NcgN`mV!ZysF4M@6FkF_(K9SYt$%xOxrxh|T& zKyqDcyI+!_>|__H?!g@qt2-`YS8AP#i1epRgix`opIO#?3skB!Bdov89I&>oOIWeD zqLe{dCrclHC(6T}i(sUE!DYl92Lwa?^dCMaDXbEP;MJDiN!hu`hSs|X#f?vf=b&O= z$emZ9-Sx~(Mi~LOm4{PfI8x5IEjk_}^`a*Zi0=fB|A9G?$A{+a#Zvy%-F6KWq5+T? za$0Gp9bK2{cYY(|Bgiq|!3f&zgN>ozCMsX+KA7jnczA<6`ZkdJD$(Kt08X?nroN-`^`%HJ@pgE z-ir=QU^Afn7`mj7{RWU@U~`jxgxK@F(~W!Y=J4<-2UMf0)s(T(wCE2MAcs3!P~(V8 z?$BH~ED{q3k1puJLnkJfOQWam@`&qQula&SHFD}Pzqg)e;w)kv36Mjaqnbg^;}I0r za9nC6sW!V8TEOAjO3!SUC@ig@2}j>p^~sXtAt1M>t05SoHszM#c^UlU%(cN5btAJp z)niX8da~Cje4DX?Htb<1-t#j2CnCGuJ%|F8TZ{Guv>idd*OfUZvKhv z{R!|`wF+{j$Pr(Kqklel@7GPMTnp6kJnQyQahUyD-3$@l0wju0PR1xdIuyNepRi}E z(~BoDA5$=&z*Uw;IUOy(js`qfVqM&pGWbc{G~)kCJ3d&wlAmr-SZx0L>}c{x^LSHlrqE3q?d4(|}j)>F8qMHirhj zSTs)(R~Z@ztEf}18ZPPBVgDMud%ehGZb7G16k9gCtOYAAnp6@bC7V(qPyZ%;kXe{a1F6#W~6Hk|LNV;3Jn(_i^!K=W!>&BFpuSL>K_N&ixG%Q zkc-K4cwDye?RTQdcpd_ExiLkDYIkL$(Qw+96A2|z%%Keai8^8T>k@)Ph-YF_x8&4N zqCoWGLP>zzAh&GG%uiPQn;_SHqnArJX9><^qDxT*N-THnzo=5{hm$FRZ;o!SGLA1Q$kGW8~z;6>S=~J2V%Vtg+@!=5fEcNA@?QyL(f6rV@h~%ZRReC zr9lJl=M+aK#)yRckDf~l9+Xfp;%47JvT&}F@0k!2cKR3@rcbBR@WJ)dlWf2B-p%VA z<%?n}(`*yp8OzUfzfaHkvSOz6G?x2&A>(d#Y}zIla3zmh-BBC9g&`-zue+cTTUxQ9VItGl_=MMO2EefpYRy5Y>UV52MG-lo`Br4d)>qWD_c2N)P%nyV>2YRk-yxFo93 zpLT^2mGR3zuq@bS$Dtev)U+wc=F3_KzPIbubgy6IehXQahU{h5DGhs$fB-3OK^K7O zoObboo%c0ZXMf%^D8eSWB#F7mO0WJ}+95=0QM>*lpC`Mu=~?(&YzwtDKkuoJs}eY{ z=x=VxJVGVAz$@z;3W_b;Pnqsc9JSnRoeb=U8*D4QolaPrKV%`J#uM}yyvS9SdzV^% z5M8@to&^Zk_iC0(UlDk#V<(U^KtaJ<9U`SIg_u0`ZHqT*&ej?Qg#{?wX70V_bID z#uEufgpP`exts7Qgf=wOpsVqHn1Wq+}?V*u7d0U91^=ze%ayKNYCE zVGg5#(IST(L}pT*gNzCl&A+ljQ3&Jh=kvn;OY{jsE_)8)zY2k$&e2EQJ7%5Kuy5|IF zz@VN*Z$T&GqXg>v1fp#38u7BV`w$>uVft8q`>~2=eEMWK(jhFNBodA{mA5;27)hfb zmS&8Ob`xXVtc>f3Nm5}jFYWr|d%`Xknp^nS4gW%ZiFHIPLz`~=mfc_MqmLKTOMnRq zB}(|GeD~*|w-*I$cI`8aoRWQ%8~3m0{uv(ja|r}@{b8r3#__@MHiEYI@k(bPFMnFw zMA#o`100=#UG4xzhwp$xCCJpUl9B?XNb|&WWbza!hp7fkr%NP}cc+6??pK?wfzg|S zUZe03rB*3bNdgEdeset2f;18$uS~KeXiq6e9ST{tDW1_R_L~<`n*dq$Qwa&sJ)`QY z%!Gmt1#}m~Cko;GiT(B~@c9W0o}b{?^zazzzuIkx8A+^9CroxHw=BRElYeVaZsO2E zEul96cm0nH+YL*RY^M9fVOM-CfPQ5hryPg?faS$?%T4G`Pa$)m>&0E+=x5fCfc96k z!G}Ujf|sP87+MM9zLvHye#GuEs4q;~fDP;yE-=m!tj6!x(9qF*tb@G7?EiHe|67FU zi#xorJcD}P13=|>f1sho_}6n`V7~OE*8$p5Q}v!dPon@^BN9|_=&PLQ-0_(>6Vp#7 zjqfwBTja5cW-x|-!A12TV4U!cZ-sp=9N$9fNEZ^$M~gX@Xan*>%=gc?YEUnp z4czMS-860ef^AGT@!((MNPG*g33zGeL0aB{6yNt)qX9R~Q6Pr<$D3AszG>Xt=bKj4 zq6r6(qQB=j2Ej|f;3nGvuWoFX+315#n`SJ(4q@f_yIk!@z;McVG8vhwe&8LmY+FlX zR;-LISYT$j35c!!-w5&V+JB$2L_Wp%jD9%)Ix@2kZB0`D*9HD5 z?f>VuU|itJl)Hy<-G5p(UO3Ods!gp9e1~>a49C9~&A)&CfBOmHc{)@6(vIf()1^P9 zKJ&UhC;}%?{Cq89|3CfRe_mba`P!oBl6~TTj8l@&w_Q68;K7hY(8pTrf4#YAIbUpA`_S%Z99SfFaC2#|N992L=N==80s@+F(H3ssMnwYmtM~Y zE**j0^gZ(b>pS%Mq)7&ZkOmRWe#}2Sr8mr9kwfn}a==VW$&>u)@&2>hCK5T z4?n1}Osog=`)0Nw%#vGvL7`?)HIK=Ei!JCXkAhf6g19w9BT4boO+%rkNc za0l{Q0OmlK+hGpCCJsnVAO~_DWta#;Zs{augKrlH@hMm@We%`i)(|Ly?UB8&r|z7N zW@P}^4dvR8njiRc#owQ0y_Wu8uaNjHFlkT%qImwewF^Z)Z|#}S^HB(LkdAjPlJIDM z(XWK{)EW)q$!}I8MdV|bkm>Q@uD6^QtKOv36I7un(#fX+^j9e^k>W63l3G6tOS0MV zX|*^U}^!X{80TSDr zvXFBzYUM?`*qx$N?zj2PLNd`_>LEQW>0>>VI$NlbkCT?g89=Mk7*8R2+rFjtoxEQ5 zG`8Tl04Vljt$a=T< zIpVCF^~Zh`^p{)>3;aK|ZlpdJoL(paG6w%>+F#=rGeIu5l+&-vaJ zs(LkgN^b9hS?VaxH!A#7>L@a_k{V`Z`!auhGxR554rJUn7FT2Pn~lr%T!+i;GHfnY zDaYEQb~FdjLaVIO?X1VPrG)nj_@vppG-66#`A?FLK?+Pl>7Z6phd)I-Zw}$-!7jK& z65x<-CKkZi1V;7;HKAynM9?)phjU{5eeL(}6k>t^?uDhN{HD7DSZl_+HJnuWAx@BY z4ib4Yexu%{4o_E*r$VOwMYZqg7PBmftO+#x@XiOnqqF(*)8?Iu*9*t5Oh=eZ1bjzh} zv22X+`^O9(BJ-KI`Zi1&bu{LgEv|~EUCrkQsNgaAA;{o+srcAAK8R0gJGN~o#jp?Y zaxKwdIBYKyVA(v8`#FWUkutT1+vy-T8jvSZD!9Q!-f46Dt?}o9$M${UahJqs1|^lz z`12OqrsEYJc&^S;;GLDk(K*LZ=d*?w0smt;RlWJncf~f%LXL9ZH*fL1XC5s`>AqPV zD~rGWJ0^JhMF5EarYox0!=L6n@zisU@h0av5pbBXN_@G1-C>m|@=H$`la#Mw{d&-e zzgYSAN$2R!{aTNVdWxYGXWgPx&;%GU}kgVc#CA)?9Zt&$$Nm8K&*7`hK~TSQUwU=Si9^&cS>s$H*274U3rAGw0)nUHLbDUiay%J%;3Gpc45TZJ<`) zIzuCUih}9gaBV?aXJC~mmGJGK>@- zvy>#tjM{s#XeUXmP9zWafNq!u3968hGoaF+Arf}0YJjZ)ti%aLTZ90wa{~rHQa3^b zf$LfnP)P>_)a>NL-R-(ufw}{UVT|7`38~rwg3u;yu4G!r79solP%Pcz<;Xk#9^s!q zR`j=*y+0Qf7^C^=ayCGII=u@W_m~W9JBm{52Bazr4`r5U(?-M75IydN#HeP$vGnL*`k-5gs8}$bCsV` z!OyvBzh#2RpK?{NU|_oIZ^wZ| zQu0Yat2|d?w+h`@LS?8sAZA%1%?{|_%#<3(+JXTUPKO=7nSSK;5G+He<@SuTL;1>KsC0bmNwI&|$E!d@3fj5QfOWQsfPE(TIN?P^=Y5fowaa zYCd=Lzo+_M#9yEDR`_w??9oGP|9B6p-eit-u;>)@_;95Ibd)gz&{QBG6(zJYp3ACR zb8j@1nyY1QVSkkls&TvG6Hljkx6zjp6M{o43GCq&t4ZlV*X3y0BAlXZW7YfET6X-a zMwj1?>dhXZRjP^HD$?3v)RM4B>&!8(m!17XZ4ZHZyR};{2>5MMX@4&cIrK|o0kUV` zG-W8a3`p)*yFzRH`ULM#@G~(fr5OZ1U=kr`_=aH7_9hzG_AC4Arj}~e2eX;aNsC4j zvadb+t@_CXwi9<+yUxMn&*4L`oZ8RbY_@x0i?HqR=I>3En{d0jW zjUV)Ao`GZ)m-8`v8n|tDqGz657sN)z3(_>%J3C`e9|Ti71kVo<+h#6d&wi40giv@ zbeJt)>Zw_XNad`S>vHRdb0>=KNv_4)SATH|Co#&3@$Z!R!-X)p0DP~g^%EO2!(R;x5A>*7MZMYavx*&6Rz1Y`( z`jH;Y@02*pb=&zaa@uB~Z z;JcVuwOqL8#Zm0S^H}W)4Hyk18%VNIb)NvKv!dupW78zmy2Jpgb}EQxhlSheFmvOn zB)c;(QkE*-25_2@7Zq8<9XJ5gh#uA3)N5Ja|337g!4a>}^fG9C^j%Wvv#Y{Cn%e35 zs)oL66JRjilL0DSC5?QxVaRQ+mb3wL?gFka=hF~slc&c{`^$xA&j{$qrv-Z_|0>g^ zS`0kG{44mI0gZ}Zt}@h%Isz+S$?YxzB+!-y)*ME&z{W(s&qHyB6#`zrQ+g;?X^f=3 zTr`Yxxjld9G5lC{xZ+Z=`w+|JK$TIkK|Ot%>NJB$t=`~OMeKHX?{d2_-Kfj~KQKw9 zQgg`oc--df@Uz$n63sXI+xm%Ty6WQ<2qVK9x&RyqbSe@&a9lNIC z6cI2&euNE2mi)6}TR<9NaDzjJqmVByhFayI*P^dccd&j;Ta#`;Rzk3)4}v)pvH_UY znj8+Qbv}D;9Iu=P=nW1(4B4H#sJA~<#zG=7c_3sh2WK+s!bD$hDuOS#x8vQe2!Gg> zN7BWud# z;g$F)D|Izxc2M77&!rlGEseAPje8)CPHlVyKBst8{OgUR|Shpv8ynve@GU(twh*r@~CV<+uL z_eK{&>XNdchETBBDEJN)KkvOKUHzEg77eJ!0d(8uc9+KF&9geUwo{+sFPbd z?>IVbo;ln%S*{Z9L#v&#GZN4Lq%OuUlX76kQ}7Nh-e<1$x`s@Sbw==--;8bKT8tkJ z9#i`8@s3J1`xWa$ucYeC-St`d%c(-^mHTtSV#@6)4QNc)wEjDnIs9Z+x6E-eVNJIj zU5g_59c^%z23jik;n9fjY8nEUh7ciXH4Px*D`>;ME5016_;`0aI1v_Is-~kQjEf4H zud{ABqsn_cPoOdVQiHt;H1>>=3t`)*xi1__z)2=|j-_v)MqXFn$!(T2dXDUKB0+D$ z393p^9P5&ByulH78*s-tdwJ|+9z0g)iB@hp88VThO8|y7;TZ?{ak0DY>)sx((I>^$ z5=FXe2F#Di-8l~5(9E=5tvJs$IpoXm+(`i~x7(Rbs71>bfb7)z)1dtT5qib(H(vddMdkYEHBIXz6NsshA@joUkN`?(5RUoTAm-T9=;i}<{& z4De08Gv#3@Bec-WSOR&bR+Z#s?3snb5q*89DuI445}CBl!RDp)n=<7(T&JGvlOb#} zrTy&f?QQY!ZInwz^w<3@9SuALdmd2f0MqHH==GKf^V2+*N!7lxE{3)n{hgp=Hd^pqpH>O55D zN`C5a*CV?OXPH`K3|YcEp%zC>DuK5%pI;p?1~FZ_>pVNue2mqBh0gHsh2-l?E=?S2 zQ&{rNmPEI`M=N=5Xj|+Y&#}>xC_i@j+T0!xxaCrIv70xXDl?upT%QGfB=FE2HNS~R zZTkY1Ei3HNIm=*Uh7dVtv%aZSWXBw@+HhDyNWnARLYRo@2Q`T6ao&rm8E7ZRvgs55 z>fBMe1C%KK5_6&d81zHOo%Q{4OGs0VOIf2Z4=Zy;0y@H_VcT5!2KC`n zMk}*-bK&*Vb^X5E7gBP6aw>U5VFMMxSlOC;Dly?p5JW86QC{qRJK@VLR&D`WT-0S1 zW_0P3CtoMN-4O59(%FJrMo+c-=7noIVCI#ow#2-Z%6a9!$vqr)VC8c$)fN$R%M-TY z!F-l>Q)Rnhs`73?8sUAv>$-RIXA14vtHa0E48<5hwtj9q?8NbN$78R2d`9PzVFPB% zpR7!JvR4&XBAWTAyd(Yfv~r26J%*JLXNV0Z63r$N zj!%SkQ-TP08W!pGKoDk!Pud>Eelz;eHExk~2O$4M4$tXBUMA~a=Hp$b9Jx+6|Fo~Z zCsQ;Iv_EyjMp{fj2R&eDQ`>zcQ=j4C-Nm8djll$oaL{~#7+Piw|7iNSQA@r|v2tn1 z!fZ*V4hBZwYW*N<@5N?{H)5EB`zR1CS(A>}VZfB!OO%NvM%?go8M}NF2)oCSz4Mv}%I`Yw?+Ocy7gzTMW~HqVu1<9Z5VB+c1`gRi{gPNyGZUYhtJ6OGY+ zlRTcotJYk*xQp{&rKXBy7W;5=uKSK%x5`}QhObU*6(k3^B0G^Y%r_~o7MiamJ z$3hy;HTCKqKgWetQ$dz)k~#*iDG!UW@cgIu8}-5K_k&dFHohxg>neG!_U|z-9(DUW zX%V7cAVht#e)$F-&ZsMq{L{<-{Mw0q;St6@bY0oi15@qKpAGLPXB5PFfZDz6i^^FC zk4>Zncez)>f7tmlXgZ5kB5OpU`hvwIt%&0j}mc63} zs5^uj>U9K`7t4)A#rj1#axjN32pB}vO8J(#!s-|#CQ_iqwE9^KFBoqIl(LRs2}kR7 zT>4`hzYv193AcQkI3YFY4ec)FVwkA&w*BH)WTx7A-VLFVDtce1oZqeZ8lm(@nc^*csTV&O?dCLxozz>dNyNzOaL_Fo? z>a+W|mTRq-2X49S&_}$uJ!XnvkzfSe?E*0qiPW!KSYpIs9uD_2!BK2eN$IyvRw>-E z9Oi8jpLB@=!4@f-B=JmSaqZYpF5{<%R|-?hVYy83F0UfpryO{*WggsC8_2BNZ_s?| z6_j-?0xN$WGt&2F^uMe>xSv^b&Vude_1v#64+x67KZNtZwB1KNPi#3&s`|9w%6505 zT!IKwzZ?I?n}KHfg6Og)v~_Uwx*c;EPv4tp>a?5Z;X7wZ{bHo@T$}i@nL%TL;sCU;^H^n6J7cJULB;!sJ|wDkClYXQ<_o zr%?}nS;*xfMCQM)`^4e-*z%p$Eh1_UIxBC=lvFm7k4!2az24dKH9>*Uw5bnJ!0}7? zSUg(|d0^1v9B6l&_=Q@+qT4J~SwCL$u<7Dk@kM_)hYyvp8zM;YW|lo<8YvWKt}Nke_^+)SwRm|W@K|&5Q9`#G%_H$zj!?^9Poi|!Wxu+z zj>ELkBS`6DcD#(I-S%i%6X-;mokW^!C?yw%=t`kmtIE;t)wyxPjcDZeI?#5v@8g^2CSE#Wi!fG+KGJEc6I(7myd2}ix*c?9+P6&)V zgxyNDyJh@rU2JGHm7p+UNSG}Nx z)R>J1kt$`4g-9z76*SZkx&swx>XA>5sTB-4h;$xc6fWWP$FbRq4D!msmsAET$2_e$ zC)Z*z1afk5R4QLm;-w@ff;3dS@K`T}X+;`ZzLg-y`-qRY)%*8;#g_8&b!G+rislmpg?TrR% zNMza}?LGz=wCgCx(+>8~tFG_Ow3Tx5u3{@=I~^>PuKgTinrjaVuGnAKVBY#roQRxr zs+7RJ<6KHP+aZ4LIse_>rFMdorec;3gGlxBF0*sV*(eIA&>w>ux48MkICcLV$!-YH z?gQs&*B{E$LL^}YF0IqdjsG^7L0_(|cemIW8jCeJIhI|FK}UV^W+rQ(LfS-VGx9#a zcDLC-jr4P@X(ER=;qa;f^~NG660F`41(ghqS~9JMvNxLLQsD1zjTqSN0xuZct)2SH z;vG>}zpE4F1REq!N+$1;NA49*rq%Cx1ms!*7P31{+Ez-+O{0f()${Nx6~IBRO4iXN zNj|v?|NSTJrgO}#*!G9G$9+Z#csV;5$S57Sx-V$ua}`gJ90tZ*uMQ_`em zprcj{5Zcs2@du=Zim!;7EBM2uQ<5y1N7krMo)?>6Grh-85`q)3w>~Uz~Hkd%yEt zfB$>OU@-Pr1J+*ao$=23%xBJ5ApZ*XlYNZOr_2xU!%Pio`k9NnpUxn{&y|HtjF~w> z!5tJ%V|Esv0M2MpCSZcC^ok9u%7vBw95eQFiffLx9YTP{1f0v;ka8H2zdX+=a~dpX z{j8}&94$qdm4q$8)aLWvK^O!}_ztD-)D8Q#AQf$VatizND`fDgLlD5NX{>>}k!O?) zV)-#8KY-M@Wq}Cz7*G#rC8=x;SEWh(XFTg1Vy`ccOXy}A26v`m>12*sU9_~Tt>P55 z`VFJlRzp#3SJ52fwdfxTo6p<|EubyVgoW$1F~M3)y=^~unUNsX5$pFnJ2OmYTg8Jo z_A(ciVa5aN?QeSdO-Z}^K7UQFM$e*?dw;oBQ;}YUAML@y;3ZicdOzewRZrU$sjdw_ zfAqa=)~VI}m1#!;U(4;AsmGc*3)7xmy;D3E0yA~bt=Uv}sGENIcUf~xP-wTkZ-bd> z^*H43+qt{a){(dAwGsp(Q8!)S+%ykn7(>4io73$GG?$W&{+P&fg=Av#@J!> zaB*Rb6h1cz=T{s&ezk?{1yOz{dXjt@W$v&9H^}5z_L!M7m73}Ufj(auqfD^ysB4-Q z<5jysKt7PD^QPUlf=|Aai?D$t&BR%a+1X-sVfqp`oYm`Y@EPa5!|W(7Tn2Cz=Y({z zT(|qrAx!g6TuY==T%MzRm#E1GZOMae>ycADxI7b%9iX_WbD72N$c)^tRT@zy3X40Z9rv&Pp`ZL{d)wlQpnqwLm8a-)Oj?)gGM>$ZwU zRK?+NZyo=3&}@cGNMrQFv;DPJChATJ%s_>CWI2!XumkDXMc4k9%3llX18Dl%tz{sd z%CcAl#+wvYM0XXyN@Lb&VISRBMVA}&ij?``0Bbv=!Kg)!@)#dJ3OwIkW(@ZpFmriY-uKTVEwr_5ml@EEa-~n3^1DC^Z zve>feX%J=drh`D~F(RFkgW!jwNW|oZpjX8Cmzy_PuJldWDU`%_x36Jn->#hv#+GmAC)*@FT)7}{htMXdb?&dCx-4V?bFIm zfRyL0H^BU1Ds`4ToXRQiu9Y&fZOEQ_#k$T60o(b3ot_mdPl&Xp@$NN z5@BYwH>NS{z8!qRYVMYR7SidyD7PHhK!U-JH1MK7Nmb#bKJ@+@#Ep_z}5C`yv zjmzWn;Ct45CZ@OpW7PGAS5-3>{r#Ki>+hM=UNB+T+YraMvZ!@Qt&dxWnYC0b+M&W4 z$P+C|C{^t&e3#B(LvY{5sj5Hb3ZV)rn;WFNljI`8;)MNKW12=Ly8b6Mb9NEJ*%*N~ z)6E9<4r+_$Ec0Gj9-bc!g=9LPvS!|dzP642MP@Z`olwxG&|T*MVKOeO`9X8uEMe1+ zjdc5YfDz5x@O}iAyKU!^k&S96C%{%wh!Zw1_$m%+TsQ2kK^M=x-$&^iS2fmZ;#$Y0 z&&=?+M#qd{|G5Vg5r?-D)`3z&IWx~1bk~R~m&{+#bwWKs|hlR*Y!X0T4pN-mrs4C=Q zj1}j|t*d*U42sw8eem_%?*+{G|}@e`DmI7@paPfl+{4m`nsd) zPwVfk#E{HPo<0uwh3oSoy=83$GXrrvu`Y{qx{t!}vOEXNn%Qqa{e$J0@nCj#mDZE@ zy*n?X7tcYvKh!mn?R6Vk^&@(pXJ-{Ba~U?=EGD2$`@$u%+=qTK-~>PGbv=KL*~BCE z{k$~oz^SI0L0@-WcC%Rm+PZ%5s6T!p+JM!-@1UE|z_}cEEeiH^2p^k2TS!puZIllH z&j-4hr14B~(eQ_^>XnK1_s$ot|8bhWz|2LJNGp%m0~OnBJ$W`PP-LKL~Rcaw0Y+5}C^H zM!6!Ne^66xAK?yLQh7td)u>%yA_B+#nPY>9y$P`}%MvpsGdBHkRhL_yruRNQ!^xn#EgbiOst-<`}oW1uRemGTQI8ZT+0m6esq2ceg0# zbMLKw3`G$s;SWSikjdiIeeBQa9A+mHv$=&I&X*D_vThtim~a*(wLMwOdr1sJHdT^U zJJ@>`D-;&OwtU5WzetxuO7Yn3LpPf?M?`Spl=(i5bU=804-Z}O6AJWKDN(|2!fR=& z47^W&{NP>fBcL=LCb)?-5_4ZKuvyznb>_nIwxnD-XWXQTFn$Vi{r=N}tdk@ei%qXF zHX#vdj6WmeVB;R)JnU}7*UPN5_l){8ZZpe6s$V{BxR4cvINuSS}ZdJp@TbW0tckfifj$SC)Ysb13HK3!GYCLG+R8K&O{ zGpZo!i?(5Vf+;q$zCOesLUXu1cZxm{Nwy zmN}$ZQMhi>?K@GQita5~>7`BjWxD0zi4?7gbE|^3b$gF#(fmMvUqYb`q+RnW%vE8A zNm!eO^F}}k?kQO;w#cDlz#a1eIaps0oM%j!LNt|F=REdY5po*Nl9SY#Yh15ee^bap z_yq|TqHr$NqOm=iB0id4a5#aY!CT~B>Vb;Fb{0r%cF%kqr#B4;YPaD#?l{(1ufh;8tdl<5Psm$5*#)SLNzK|2v3}5 zOFhe4ad=8o^(iYhJnDE|lT=^hwiEgj5<>nC4;9*$C_3Z{=I^7y2@cx+Mnlg(fCyXY zK-x#hdmcVvh@PG7h-qdkl6m^!tPvgYW{NAm4^N(ExPB!Z35*RD5Z9=XewN(5xU~Fq zoUCE5FUx$1vw|N1#?UxS?qtI0y|9g~jhtz%sAafP%n^c50wMLP(8hc6AZ#t4fny7q}8>9vAj@IgZ zA4VjQqbK7SMuNT2Dx1Q}7$Y>n&DitPk>*rEBJ?hAuWExg>)TX?zHV zlSB4Zp}k+m!KT+c_5~5E`68D+QBMgfMCWXvgiIKMH(CO1CW^V<0qB7Vd#Rf>X$eR{ z(IzB)efu#OWj2=;o*VVybZ-Kv2O(sBy1^w*Y!dtS8LG}YB(Aw}#~d^cqe)kM_UA7H ztV(A|M)PR?bRD4?e>~`_s##6_pWq;HWjpW>VOe5}>$&$5eg`mzwpHdsAM+k3C&NX7 zCuy_}4NT8RX6!_UG|ZRbV|y*+axKz@z}_#NI+2(DdJp>xRKmeBp-%w&=jKJ2Q+ju{ zJC({(KC`asQ;LpaE|XhPZ`uo{iI@@UF(h2)Gzv`YH>aY_N3PP>J++9 zhdR7<5$(pS^d5&v4IgnP_DTZn$J1wA=sJUxw5hFb63BAUR^H$v|l#ABf%p9)jGx1$bsXGCPJ|^MWkSe{8 z&UJznj?jgEvOd@SYr1M9s1$#{(lv>insq^#*9&GV`{lEQ_KkY@+^Uw_8$q~~hVBWj z4ApLfch!K~+8kkqxxF1sqY%o6}Ga1YK zGc%-7s1_f+&sgA*&bi|87!_`_;q=++Y{~O(;<);X^aCr&d+`VTYX6F2ym&;U;s7rF zW;WpDNp~+0%6FSwz2lG$K~57qPj!C}W8kfXMY@AfjJ4_zDlD9oqxv8ZZMzX1IW24= zZzq+gFj!m6gJ;lLYq_t~!_^J`W}PSi9y0;E5&k(fOZ*tJys-0mfEsYtbR}+ zwlJPf7&f%QvbZ+CX~6mJROR$O!o~G03e7gsXe97dEn#s~gsX`~g@PVLdq%I=XqSs9 z8KF+z{5HTjf!sK?sxd!JJLP}LtZ$&RVd9)-gE(Z!8cIJW*sio!651`LHi5J_sGURK z?|^n)!rWPQQ<8)W?@%uHK%elhdt2&5$sKJmyXG1ghKwn( z@XZ{G)D>8JZ}$P^SH_U2^pf`d==&WgZAz7KYi96 zruc6^(hyWClvh9zVnZI#kM}O4s&^VJ2OhB;olx)~K$OhcK2NWVpd22)HyjCp;FbHQ zBRmFdT*dk{O-}0{qvz#je;hq$Rt^TDnx`T*C7?~{{S&5&kywY0=3eMjJ>p7O?VSO0LOz2>)alVlA+y7^^}!vqf%D4QO1irM+n=Iop zy-Ya4TcGjyq(K!}FivNR{`(41Fo93^OPu`2`zszt#D`Qiv>>-3oIG?#}nc?+t zYJ_!imaK;}w4F3JH`q^CA(0FMQ_OBq|dZ79}S$>rCB>P&ltuRm=^LYedI90sK^$4 zouiunfx>Iuk800yBCNZo9yR*HptDfszOJdvQsBJlthC4Hi@3QjywLi%OdjqC=~>6d zJ2Q_Look1Dz8J5^7Mhb)c>AI(=RuR(;#iNR)t6N;Y_@&YqwmQE*&=^IL))H(_|K|* zA#P3&iAB%8(eV|pm_?6XPKw7}Lth%)L{uOWOVWJ-Cz!d$3q5e3vGN~w4d4~@DMyOZ zO)TPr#I@nL`z5DqBnb z{PsXuCayOa`pCi%UKPy->>_k;f|Y)1&n54S3_hVBuFVqi7-`P}FK%EJc~5VvE608v zVOSTqtt~TU3)48)pVlZn*~Wd{y+RQM>B%oE>i5V;qV>DOH*WV(C->*6+Ph zrd;xJgtrdw+2AGv&RH@;sOTI>!(Ee5R2Zs9eZ@7QC~;c@X9Fe7%ag~$cuL&+o}6*^ zCvq*!fc77Zi0GsVD*lPd;85Os6u7$DSM!&}{s+!Kaqk|@qqpjjl#7usa43YYJTCRa z5WjAmeTQPP-(87*_QiS7H3%HokV&BTV?w1UB3ur`9QiR74+{>ilghG-V8)3J#qKo@ z&kHCh%nRyJlY2}Wf<0ulLyglLrZsbu!H21@5sw=kqqUb5 z#9Fg>4V>IBAF;5^YEKCf$FB(Hsr4~b^8stBT-?c{XDCLUekvg+yJv_f?%gE}u_^)w=mPQ$)n8O(3VQIFQISj=k*zko zu?mJvlC7tvONGLtYoET_5wlcK3986awH_{x9CgV#>|CvY<{M`5Rj6(U5b1uu+OQPd zYs2$|?x3Jb&HI%P6b26cgbPLI_Srz@XPC$M(R1E= z&I{m0y_fp$z*rL?z*Y}hZu}nwI<_}ZfDa>yMW|Vqx_T&w5R}Z5plcXpQ)ZdxR;`5j zF**A<2KG*ql(t5zjOOB?9++0j@ z1O{j%g*4@#C~Q%vQ=9j^`7tL^xM>aTakSf7Tfqjx3;)NEI9H{P>d0tF)rSIJFYN+CG8&LlLLbta)AgU zrnZJ9J$-v1dxm9nq*Mr*~WFLj>^}i*ut7qKWYJ?FTFN^gjUtDt4eA zr}$VZU;MXdYB0vH`&oaZsf(0@woER;N{xHb87<~>g_~>Lhxq)1&z=p*Ae*`JEaSRH z8QEH|^KGx+*MQF-S?|VZLIY8G*(cCU8E9fGdP~pEdj`i`UN9em#~a)A^qaEH-Vr>9 z+WN_tq8MkyUZFuJ;@3;eM$+n7Z8x}TG(Ka29z&Tpt+lxzr=!O6SCB zM%!ZhRZ9o3$g85B8zE|p%-cG!ld(E<=?(cU4HP`znL9W@J3}(D&iXOXO zO#YFg7eLH9k=OT1^CZ7Ps^P6+RhN16g^)pfpOTy$QAJ4EgYUYQlQu-D^#%TfR|=M9VSkTyqGMDKP_el=SLDvXB}?=B$6^ z2;Y|0B*m8=epg}b>R-2ef~&;X524S>)VMhvAl*%I(P-O*(lir@OntQOpq9Gv5oPD!l2JuVVj!xb&=}OX!3}4ipAm=KYJa}PFw@r~jfR6uj@h$^7o-s{Pf|}egE_^~usr3|P{prBo0f~Z(09X^ zpWqH8pVJ&a&AV?bwW`%Hwx^)&v=+E`Hm-^Ov@dFh>8%kdZ30O4pur7p$X{&7&fec7 zCfpSy*JrTu%>R&8NwAQ`X#UX7?~StG2jEf#iWQ+DEyJ!9*b8Wl(ERp;jZ89a9_nqxC4qZb_%{UP9s+T*#m=)(rYjr#(dYeO`5 z31v7zE(AxZLX7YD;3rUfILd1`e|M;bjnhl{04IkV2Ln2#S$t!Jep-al!5chb&cc7Xr%GV|lhSMj3m z%{uj$_~zsk`_08kd3ktyLC*EhI{Ffo2eqn{qOnhaN=3aT@j6I(ndari5>3}M2DCo* z5yHLvOPI`DLs^nBj0q2*T0b)G4dXfWe{tG#a)f?K9BcutE#qevr`YUm>LI zxh*y*qXRz=Zt9P+#?DbV(Yjm*JyDc_X|H9=>H}( zz8Evk3z*_2QT;-dL`jv+b$kHb-alkcFbwj@<_xvM~3-hcv8W#ge zOa-uvnf4f}av-YALW50h<@8GKu+d{_Sa`bZ_?mA<>(lpd!_g}j$q}=L1)u<{{hKHB zf7xrV#P|KSw_n2-o)FJTp0m_Y+J7kok#A?dKZ>5Y%|>#^Ep=$mn-*%(6U>i&)ynh| zJLVB=`l|*YO#u-77_c?-Wq-e^qZ7S*6>fV?tWI@(8Vp|pFnobBo6xQ9p)u}S8;gmYTP|i3kE(%* zZ`JGlFLM&tB`Xk?Jr+lh`Qr>D^jX9UvBs(mI2rn?pD5}%9}W6jVf`%c{wfJeMOZ#F zx8YZ29#S-RrLp#$;c_r=0LvTQQnv>Wm+ruyk9&N>b+Uh4{Qzi*@f>+_LAfRnUB{mU z=p6^)I@-po14++5>=rYzCAgl~e&UI($%65IGgv0g6I<3Pcg_h@TV(-UWTlV8+k194 zt#eW$J6s=XIpZ*l0RZ97+hfnc1+UKr{AtaUkUS+Pv2+s9-h!7p`GDS~M ze_eHMX>W0b$N9a<92ZM^sb5~YWZ_IddZisy*Y|X@MBUlb)uUSxAcKhaV6wer&7f?7 z@SQE^mqnvDbqDpVG1l!w&KhTLa^mN28FjF`Dwl&lj9yJS(ogFA16j=K5#-eM*7FkO>P*K(7ROx1Kt)z#x|i@NOZI1T&UYuSh_j67PbBxw ztfg$)uXXa<*@wd5K8jFLS>a;UFZpdJ_yDCNsuTApGXqWcA$8JnH4sRsc(vQo~NJ|5`jx#Bq1o_+;@rYz^=DwJ$ zSpJ5kucb5@c8cS=y1DB4!;&VYIv}8qHJr|+)YsWp%rGUj62B8rxfi>i-V~AXRZO-p zgDqT{)aSI0$6`QTGcg5VTRV?(Q6N@7t}a>+jc}E0zhP5KWrfZ979eq?K6)H=#+tu# z#F;rUVrNv%m*+3PBn*pbD9_0HWW=fV$$PE;osUmyUU%GCMF;XZO<$`~^}9$L>70i= z0*HgJqeCJFd1a;x{;!Uq#Ey~X>=yxzZeU$1xfD=#jz+0_k>JlW)>$i{fcJv!MMU0v z;?=Jxfm*QRiM56`SOw2E$9L-`RaqRg2)$ltJF4HT8wB|^pmoLsm#8R8o_}1p)VKY8 zTPwR=ee`|<>Z`#+vlKCUzU{I=wM|B`Bu5+^r7U(SjgpU%a#nsH1d+JrSY6{&BMz_; zk>tBtqqz4Yn_s|adl&OfeWy95&yh30XnO&+4ulg9#Q-Bt@T>NU>V6B<9J|HWZkaOq zSl}{EOUHuhS`7tly<`3*-LoRE%n^dF6#KSdfiC5W0}?AL zrrB3IXJjuz?#@>X3R(m6$7gRkTyAFdoRezXORCOR4V}Edo`*l|!U$};R;D9WvGiD|S5KyLzYNRHMI5cpCqK2&O=}n{(JS(&;CZHcdew1@&}@gaDhMlU6ev(i z>giHlBR43`x^jzL@tj=N!=0oiuc9wVmy%yZB)gv(ks0f*CI+ZQh)Q(; z8pSz;MdX^6t%F-1D~rUGl&W5o$XINVorG{PymRpX>!|vU#29!Eb0jqqnD*M5hMysvHJ1>_`e$fC_IahhI_e^x6S~AG?!Qc zLWE#Xmj!Mdu_O+N@F+l_X3&LjvSYP)Yl1eqn4M{_f9~Flg z`u6iceBfz6O=BnvS`Q5`kA|LXs13vmMNpXPd5;GjE$6cw-JlJnoA#Pw299)dylPR! z$S)~$`~%!D&Ki+YmdFQZ;V6&++9B4zA#}Hql)l%Jy~4-g&W|Z_BcADD-5$r7jV5X% zgA*iGc2ZN)QJ{3;KW;aeKki%|`05OprzE?cK|9^Ie@Ifw37NN%bm-F+8!}|yN+Rs2 zn|x~?@!vEA4THlZ4+YQt*J9Wwhv(r#*o&Ze(j@fTeAN#1CX!Tvznr2YE>KBe^%hM1 zEmcAY=X>A%)xS{6hiHq`G-uCcyC=GOSYJ@Lxb7uY%-F@cz&g~a{c3U@#u#c}d?(mn z>(9_9;VERvV}1EexDZ*CjA}@u+i(+@OO~R70=+-zWW+Y)Wc18@q(Em{*6B>!Vu)ua zfIK+@7_l+A2WP;c$Q8a$%M+@i^_=G*gvHoh7$J6@E4KN?62tP;-dUhW&6FeF`h3;d z5ACKpYx;Io$Ip5-gHGp-)Ym5BE*C?lsc2|Z2$lxQW>px#bo^g$O^iBkvN@u%08Ph< zu<|yYq=H5~5oFH7E~$3o)<=CW#zQse@#ysYZJvZHk~xlCSdqJ_tXxm8IB=~$^9=wq zGghWwPA^26U`8m+|2XV)oO+g=3Q{#OT(xg6O1t`?t-*HOZZm5?f3ZiPUHvgn&q+o` zO2TfXy{Oa$lz+sHB{y!Md1}q4iIM87F_S5*Q>CLT6J+$v{}oN5Zhp5~%``{nheD;j zFC{}s&)8J6SNCKi9gRa{dvaq~x~(dhzSTKi6zaF@5!1yAzc76H(A5Knp-_~_(XWqHBh$EH1u4laxGqFlW>ie>kb)#+|IH7NK)5Xr4lR! z>6`nhkL}J^?+EU#L#w>4+7`HSfs9Oc=^zL1pe)u0f&F>%lsgZATF?5Z$n>9`?9#3* zH1u#uQ=CI}xGFTMFXNkIysKOTQ!M?<_ z_4%U8PIE_P_XGYVrWbA3M;uOX>THtK>&2Z!^*0w2f1#YiY5JlEDsS{pkzm@3&Mxz# zn``s;nuCTKI>iRMG8{!1)L)YNape*g#}XwT1;!QG89zoej@oMgv&Vn&9UZfE0JTR# z{sclQW#XxpggJaRfA6nS%`%QaiY7f--(|ab@0}x-dC_>gl&I(tv71s8-cM;z$SEd!vc5T5>t$dI>=D29S2}6M?7fyQdA{xapEv$;eeV zAaq=9)YLK?9r(hQvM=}6Hu_-3jVH;byxcNdI$NUnF!6bzsQb6Hi<3;Q!+|0R)EG@F zvRd*S$IigyPHP!2)2LVCxF^rGPIs3MZvrD~Xx2%$J@8NXqiIjXz;8c9ImTyA`|4H( zUJ%(V&ukGeYSdOE5N^$*()AnaMl&;C%*XoG@CPx9OjI z-#bQT0DS@)W}khXBpYu*Rc*^)^B}TQym_dNJ z5=yypd#MgCb%d?D=>ayOZ)rsitXLyCnZVYKkF@ZD0 zkQp=aL9#R!Wfq0!*QDxl=Pgyt-2WMhtEfp zs03ll*oj*Z^VOJob;^or{%@5>ysp=B)c~xc z_&E!0Xc1f@jJiN@AGz{;_~sP}D^{1yi*HWObXxV~rMe34H>a`Kk*;JH&lSnpD_fX& z$tdwL{DU3w>Cy03PGHE!r)n#Ps|V2ICPD*j+|OTD-$yvPz?Z;PXWJG<6WymeRUFXw zUV7_%ANt)>lt;)ob-jP_WS{4v2bz{)GfOF|k+o8OQt%j_v4Q>!l)5sqT$w}mF_m*V z2FHNFbZ@z#IBssX5bn~8vF-pPcDHCU#SSGazgL5}O1_VQ*o|w68*Bq1@CzLJ5oq|8 z-bHZyTLuhl8ZBuf*IjcgYlmX2djJgSveNA^P$;Tk?(Krk3w!Ly3yn*nNnBX$pVL1I zBwJ-@r^o3Ia>h9qoeHy%y=&TXE~+j^jHW+;R?k9la5@*h&GP%iVxQpRpYeSL+O-tV zYs@wiN(mYxDZp|OpU_@L#MXbFTx12Uq=VX~jwKY7KLv1A7i4AZrbNM=M|iT8zRoUm zC_QCBx0}e;ej^0>fRePiHEwz|v1V0mHOVb7Hw)L#7fQn+2y+k8+R6fYFC6nIm5A@k zLVG8~g52G*mHIgH(lvdO27VuYx<2lDpyAM`4b#0mK(Uy(Yc%1Xqf{4$u;wgS6PlHezQT@$4b*pR-th^ z09x+n%sdQYtZ@^^9t7G#;jwP>_ugbV3eA#o7t*;nG>y})F1+1Qr8kK^C&Hg~Kuljb ztZ{L1*D`oq_PimYZvoF75ELk0EEmS?IKMxA2=}-cS#zDnwz!3_t*Mjopcu>7dMjvC z-2l^kz?0%m@@tJ)d2H@QQ{nd+TiG6c;mp9U-XIHQ zgsTjh?`zJ~)2CNAE>^wKZ7VGgt2vtFIyxI}^5lvCH0k~748dzXab*8o*2AXP>j+@Y zwqS{=c}D0oA0+EclSj1U9J>O2XqtlPn7u;B)yJJb3FfHxE(bZL!j&`ylCy>fE{NQx zlTu)GwN>(kvtqHw`%9A5M+svYwF>D=9!|0VJ!$B45JgdCDYFfs{$(WP2`P6_E=EMe z6WG**Ts0!E$?evV1Ceprp3OIvSNu{gpqY!#A#_PQ6EMgudCP8#+b`9EL*3Vhb!mX>!Y!|sl z_6nbaygzxL*4;KUQ_|p-@-2_&wkI-qRS6zWiTY45OYs!@s!EeUQB2Cl?9JJ?JFMqN zq9B2faHvr2z_kXU>qe@VpjzlXhwhwqWvOEq$lf*HMb~ELs`8*~ zgE$0nTKHUOZ%rShRhe};inie24EwauIq|D;=4gFnRftxsA}1D~29*|o>2j5CZj0z@ zkbyIehEuKIil^F(n(IN=muaU*y|wd-7*y1I5QNMuFqNL6{UA-w%!chp6foL&m16bT z?bqeEkGF9(u7jLunjllwf5@pBUaUl%KDSyG6dW5$YLiIgF5hH&%xU$*|2Z>qQ6q7L zJ=7C6U0&3O+N&ZQJ%4_G7jb$_BNZ*1Lh;5?s|CdxPtli)xNodSC793vGCn=6aR#nJ zp|4%~2pqd$(ivr4>d2klq8BR|-vv%;TnPK<`^1x6%8TM*8{`*A0OpJ7U7@*DvQwn~ zwo>*j(6o|a9oXU~fGe1(l{FOC`-C1-mui=x@=fbq_kCo>^H7QkaLDiuEzY;|-J?)O z`zbrY)lRC`>}OO9jQ~mI$pV`^(Q*m?!|#`?#h^Mx&jufNw+ z&P2@)0i9e!B&X?zq?D-Z;jeR=wE76?^{4Vw0Uo|k9ng`QR9F|qw`R&84Z!=zPq@Q+R zo1LcBJ+}+obgfz3xx4}40RWoXZN%B4L~Dn({+MO8kR-_BRpD??n6$N{vWPkS%BR1LGg}bwlZZteyrf z@jIbOiwR*meN1(-6w8G&>y$`l)m;U0H07UdRO~2 zanvVJ(Cm%0V+V>cB+-wPIa~FWT0I^^6~E)DqWBJC(pIcEY_~d_{fhSlUCFM0(MAf%!1?B|zfqBb0~W>t=Qbi-&&ZQDXJ! zIIB@4D#x%R<(Oql5- zJgofggAV8SnyR_@38mR-i0Sf!n7_*G7Epdq_qZchZ9sHYrlpifcZIk~U*Tu3fS`u> zcZWygy^NL0h!#(gr5p{0!8E!W;V~t_nAV9OoY*Eh@;|%a&*rDSJj3t^FBBtd=DU3J z5nNXqwGjH1%aK7{$l6j2P=+oOEJTc1JKdPe20AG5hr3e5B>dI&M+2lIuM{ICe;c$! zWBf9`>-uAP&=vgXZ_K;M?-L77x$fpeD=oIh)14`0P_p0ONDl}9J^AL{X@}Zg9Q*0u z$k8qNX-a&}D=e~-vV^WHI6x}N3FjPB(@tgkNAiP82RwtaN~F|ppTYhvaC&KZ{S>`m zapQf@T^2<(f(JlMBi!Vee-YzEQGjtOa1Llsk<-O55l`Y%-1{vLfN2xn14>4JcYJ&V z&I+7O^sKX?_st*v*Yp0Bk;SJ1{QQB(p9lV0k$NCQAaWTPB$~ z)%-Giy4w4)KoJp1G@)N@K9o!k>kVA@VOQ?LU4K$A7|okaN_l-K8DJCRfdkIW=RQiA= z|5sH0C%2j7q2Hc>>%c~t$Y9tiLt@Xbh}{0mP4md`n&f|9VB&D#&54Vfw6%Zt=J!PI z6u}f&`$|g2;J-GkzaQ)0SRDU;CHKLdWP&tIh<{riicbQBNrS&yAj+#9$LaXn@BHUQ zKQmguQaHNIC;t7h|Le0=z@2u~-f72zr9g80@9@|E7!N^}2)yz)d7|Ib)H?4f+QoK& ztSu@Pzwi8(w({>U94pFC4Ll&fwY)~q@4lRh`)&zqqOwq+b6Q8t|9rWBqnrOX_1uqB z3D5^l$V29Pzte~B#PmiIoiBn9RjIs#NrUG9VM+fFsr+w2#`jSFca(QRX{%+0iT%zc z{nK8EWA3!3Vno~Yx9@{}R|BjB9&p#MTmQFz@z0BXukUJ#uP4=*U;OTkMV)W~`^$B> zvp+(+e7XPq`DkO8?h;e`F8QqA%48ZJxZ%^O3q~Bp(~(}3|6Ry`U<5t~q)hbcG5zmk z*3Wxqqg>peM}eG*h?@NWQqTYAhgdMWGF3WQC-L+jzTjUsm*xPLus7^Zt>f8`Q2)7H z|N5i<2)=E|LL8kYS4DllH>e=Q z9uK2tc{=RZUIT7)6eW z>HkfKW$$!&;njATmX!k4Qnn;!DxY)y`81^5wz16G z)mxPk>#JtJR)L4`t~ST{Pvw`O1@u1D4DcON61@Sx)yx05So9fC)1pSl3Dxn5G@wh; zil+@daJ=B=VijAv-Zu3qql$#}RN?bf4s)kw9obMfRch=4@soe-34j|G6}=O~)}0tc z-6P-rUwqciozm{4(fSZrcKSb1X|UJVTpz97no%bjQv|nqtbg~rueg=UT9Q>*uU+&0 z2y%Nf%Vk+i9m>8Pg^I6&ItX{4|i+Gl}O3pbV1BD(c#!KT`NnLmSj z$NO?UMGiJomgjN25ij6ozgZha$}=bcZ>3pma7e+xr{{$p=(*0DtF(E#Fg!Nz005l~ zANO|TnXbnT5H|r{kHu0wz=rRx={f;EFF_s0) zp(u?7;6~oa&YeM~im6E0Y=wixGwv8}y9M+G=iRv~AdGezsZc9YX}%Raol;e)GUa@1?j?ZbWkxT@ncPu04Vyyk2xB0J4;$I(tn7n38SeL_MlzG0osH61%W9_}; zsqX*(@r%f+kZ2&JA|jP7vob0pvUe%jduOkPkqX%g*~f@ug^VIXS;q|7j@7W&_wjPo z`;Bv5y+60x@Apr)it{?p=i~X<_xt1dQa^Du?4#Vf^A8oC(BOAP!i_aYYhgUq~QH_%#1}g@;JHZ zYBoDuKNU69eDw@Ov#zr@`_u?4gN5hS?!VJ7UxjU`4$W<3L~({|zve$-<>nne4K|GM*WOKy4mnFog#j~nwI zUS#l!_jJ%v;gdug>ry1%!N;Eo{i=}u=vbaHK88!eKE$y5!<5El!*I$=l~6ZaYPvFT z;SK(PWoMPfY?MXWDbdwgZ>@XZ-j#i3{MZ8l8GFd*9fi}K57yr$JVU>HAFeO?O3*3e zBes}Z&-oM#TCkH|StS5fJdt)k%sZww`9ccwrFS+XA7ye02f$~3pm*qj3u$I9UUTh# zi-*YpebxOWB4d|x-#Da(O8x@V;QK(E+^Zmu2Rz2D(jyH~qlvU1cI^}0SbZIB(x``^4%PxU|5Lz*$4 zqBce-nTC|sGrRLu*!L5;bdd5sxx*&I{zR-$Bj30=_x4qeX%6+=#MV?LDR{B#(EgB@ zN3!d>6QsybU4Q9wS|d-|7=xNh>s_A1CQ0~`3VxiqM+csik6YvlMVrr~n`;a5@SPtKGrN<{LS9eMJ;`Tylb%1Wth-%+bdLww5TI)ZQ1&6EjROXicai)i1o^|cit ze@e9bhMUZU?1|_Uny+{Al%T0c80M^+sWCi6NIvn#@!L(7)7N2~`y@&&=j8!*m21xP zlc(~H+nU>5Lk{vgT^=TuQtz*9OS;n#8Hp(xjb(?vIlO}D)!5>^C@~5y&M_nUjN{%& znDhv8q(3eOa&?%Ve<;?0+JHAXK5OewAkqJLX=e`i+dTjK|BO5o9y|l9qeE7<_hj0B zB>4#voZ3ZLDbAXd!#_Y!d8;NUlqK$7voft(k1t~j+LnwG8|S`$O2B5s_NzpE30SEi z!&lw=vfn<>uKs8ur}QaXo=9&-X$!H4oGx$A((UW?hk?k&!C=uacxw^%$79TiaIO8v zxc%(9;57_bez!0)rsk=Zyp z*|k51q@%zhZm21)Z*pP^ZPrcjbuy7(^g73Bfiv(r!bi3t?GR#F4{Fbtis}YnrEfsR`7?bXG({BDt39=ccabhp6hC-sbsP-xc&^e z6?%lB_i8vNeRF%9>wE;wBdpJ2@Xm~Hzx3PtH$xJ?-Fmb@^}2x=U%zB}pt{15{9^=j zl#Um>&B!$pQ^J$~*mv6=#U3zOt}c=Q|M)qb;Jr&`7Xx1y5QK0MRz4WNn#x8k3U`Js zfw~+^jHU>dur(1P_KD&#xkF@B{|0^k5RIo)@KFwzh3QayLej#oH%H-ULQFez1qO0R zeH#2_o<5HIs{?&!&(&fbYwBRmMgRxI9mb2qhXlR1rEs#^uK*x`BMZYE| z@A{rqzHVK)A9#;1AvGv?v+%_Qu0p;7FRXmvn0YEHU}X+Fs;0P4@nAG2UG+@z`bm1t zyQY=DuHN~t@ASMxk=-X6xH;h}SxQcKA@R%7zLHOO>TJAv9+1K z3sn0z)>mc}6Qx7MZeK2@n31TCyuo+pzg7xYe0UX{_|zZUwp%`)O4b+^o6 zcS$PZgtEmd(`8{7d3j#Kr>E7=U#(bOk~sP5>mmA`CBe>A9r}s(_*|r~U1r<_?0*)f zrx1Q0ZA(8X0rfc&l}aK7T_+bwd|0kMXABluzAH_5kVE5mz6oX@NifZ)H%>MYXK(q5 zMZ16-EPUBrsba%;f}qlWAN><}qgur27!HDrBAZbl8h#@+q6FjQMc!Ge=V{qj=6d^6 zyf{CP%rtnlr&33qjyI6{1k($>;$U=N9PszV(mxV&{965h>7_k!I#c$mzx$3`H3R8l zB$~iADhJ3a?PC@uh5I#($-=zJVxr={_Fr-keD#mr<*k0Eokn*Kil1Fza5s_cSctV+H(8F}2!o25>@86xJ;2&8 z7z+h+PG$+Tc+sY^jC_(hSS@4C+8N$2b)ZqU=TPM`|bzo757rG;LL;%x|R>dH5v4{>;W zPsee9&J;+L&@ZVJI~@*4o#~ zAIDdTza=&Z3HxTq?a4LaaxJs*wabFr!DjqAYfE^AjkbQzKQ?CmD}JQsk|*6C=N^<6 zdZ4#Yf|TYXi~2dk_G4g4O;@NIoWqv0=iH56CX#R(C=yh<;rGFwKSc7F&|QXW5S=ck z-YTr9V7 zYb|wq($#~91*{XJ5(u#Y zVsQPmTBhHevEa^3N17XNWvvA+#~`NhnOZ)&y7HLrf@4R;BlRDnX$Ft-ItF?UVhYix zluokj!S7mIS-1-6NOX20+$BE5q7kIZ_y(*P?@YHi*ZRAIy8PnVWs}s?l{dMoYYUA; zzSlB;pGeOV{5J65B!y%wtjOxpdq{>_*P24_cR@!d7>U!gA6<7!^nR2F<%eTTgH4b( zwo<#h%&(t-`o_UA_UV~m=Yx&YlPwqjcM)e4^Y+EXjoPv`qG)4-4^Pxdhnlcz6tBcO z#Ge`q=P{b{H29EE4Skkk$d{0F>2n&TH|ok8AOEW!lc9dzjKmSj*K_dlWLJZ!u6%3o zXZ$%|90&IrwY&;^ZE=AjKm`bRDkv=@FRgA6F5=~_D+mu3ey`Y9hfh{O%V*wtOy9!C zqHIaB4#w5It4Q4SZy$SmO8 z_p^lFTErIE<8Q@#f}jG-W`i}n32GF)cD?y7acMus2_qWWZ-|{U6cNVb73erGyYmFL z+4g29L|O(P87rX|THsfY1(3`5@;jTM$=6Nxmns`8xF=r_4siE44m^y#K~^bEgs)31 z=3NLrs@{u`EIOqom?0eUWk0xZ_?XYP6&X z&fob+9C(k-V7xg=@%0^94qhZ%biJJ8#zIcZd#%`EX4--ErrH$}pXoO+hM+GHS+5|=s{6yu_OS>}wLt@6YFolYE%^`LAzLNQ8R2jGxbh)M=ai&yq| zkumKDFRqsr#?#OjI(Yqn$rEoL9km0Tii!70o+ziQ7}e(z?Th9unWDQeT;VkTZD>C+ zb4duZ)RhAB-Zz(tJ7=1c9_?We(bDahub9JPN-~aj*Yry*^B*(^kBEP@o}VQWj6X61 zazBl{$AhOJyV7-<%&C#6n}w?P!!Gg*@f@pRr<5e^Cp#$j*ATqb$aTFCxmg}ksafqC zMY_#KttrdopZwo5_ben4V>AJl$>)sDS5ltYsp&b-u6bsU6aKm@7B_47&;KME z;=_eh_iyQ6KE#cr=skdT#I#0lWP_g;@E4iJ{*hrNLS4L1GwM*=uoHvwX>2Y*h8a@aqfr|mWDd?oM1#Fm&BD^wp27+1iE!UO# zw)VTSVo-2tuaaF3&I~zu|%d=L$c?zJf3{JIZ@_2Hmh84;Js4P73%Tck^-8dNZR`X zGzPT5GnVLuPY9{!8Mj@5Hp9qd)oc4HjqpQ9XkIXlU5?Aq9&L=d{d4TIAThp`TFm_X zQ@_2m#+*vNzJa(ivI)gzR+5jiQYv{(01#6K!{0JX*+cz{Z5Pg z9c6eb)lFMU#174M7n#J7ac~fqSX$x)^qnz>F@}QctGZH*#Axgw@mV+Q@%ac;{pZtF{iokcdu=mUEm8#QBl=!`GZsa=)S zp*XWblD5_;FRR~=wi=Y=a1$Agwi!ZtdvkoZFKHK;b)TeI9jOnOXH!MdISA{2SO%eB zoL)99k(A4>Xwk7#nle^!4PX*AmDf0VaPcEN| zktVvU>62p(}xBA!*?iBZjK`c^bvbEgaNeBs&|_7e}6@_IPeijeFn!wE``q0-XwJyU z(Eqxp9i93A{_}+xG!J!l|Kmv>_CV0_wD%T16?d+BTJ9br&N%kJwlC(#DCP;J_TiTa zub=$GQy+guw2=Q$Ipyvs;K&><4bNWxhhO`xe1HFF+!rtvXz+!XeRTeh+uZt>OUT~u z-&~bEWQqoa;;-6^|MQv0ANv7da?R87$OP&%+dpkfECVSIM`?d8HK%#l>HqN5;=i9d zF%fcl?>{~@7dido=D*g;Lgsw(ABW?&OW+icJV2+E-n5*(xYNxcr|kKR1td`0`CF%! z?Dd2G4~wM(FcE4sfo1BJe*_<;ZzCdjcMj>*f{7`qC*<(|@5fRw3KSw@m{r^D*1AsHac`bgaVE)H7jQb(#D)C!gi;c`xeE!M)wpJ62 z-c<2m0Rwy7NV1pz;i-?$BDj~a$*+Zkr<>`e# z&_e$b=9-fo#&*``|25Cw;`6kX*i?WP8Pz*HJ44mxhK?aDhw8I#w*(e9I!g{ZrjM*{ z4ECAVWj=^IcICifYudo>p(pP%jrRBjL|8rW6W?SQ(+Gc&FbYwxjd`81%s$KvR?Rk;W{G-9QTrpE6W!NQcmzd18FMYRcBqNW@_hTZo5(}WV+?V_yc{Yaa8ZOaqzIten__I>2Q~`>^3V1#Jb(Z zctxk4oX9Y)Ld*#@QCek%^I-)^&|8(KzxB3gUksqwF3{?#qWU@A+no;`BP}_$3ky9q z(@;}8WRD*#U-SiJKvRM9oyRORgC-Efn$LZGH9oNK-jBd;+ZNfUob`K;Ug(Gc{Hg+d zRM?qcJNYDjCEl(L@tx}WTVAYB-o`U6lmv@LP{mH2adj?&#u62$DmvbBcFLiL;V9ab z&GBBNdTHhG&4Zh+VP3dq>Ax27>u>Qih#ZwPf1Q;LeZt$xJ5gpdv0_Y}0PFhH%lHBb zpb%OjL{Uf&&|y3&eD|>89%7SA{Yr8I@3d_ic*JREN@hf$L7o*)w;>M&MlS$g-fhrN zu^o+Z&`~(aNEd$dU(Y%IB;k5j-}tu+=YaRRo0={97A56@jk$5?w}YwEG(m?3;FTRK=`)LK)2+G$F*($FF=>jNvEW z?up@XtuH-%z1j%aHMu%Q2z*S?-my+2haFn4OZl=yKo`&iAf3X=gMf~*In1@G5~Y9D z{|e$xe^DHaK#6x6qYvk~G)466z8-;vxhaU}m4p6<4$#FZ*>R|v+sD5fc_Nk&F zR#>RR9!^*=5M`otiU{c`-EkQ3J-A!8^xJ#ds$7*Zk#6&nsY>iJ&_H^C7}MLgK3|~c zE=jTP&?2s2enl*?5W=)0`#Fa#vx!NNi1z(z>QA}k>t;XkQe-(a(R)NPUf&+sdK8E) zw-SPD!)pAFa7(cw*o@cF_~nsbnRj(}Cd;ho7_sNN?S_zVY$FS=EQQ_Z*5{-mrlk8kVZ!b~iy;yP{DxweX8a1DiLZ@s^S4!0Il+laLud*NRU zk&)2M4GyB`sLLefGiWhmLG0-G3vR29n0yme{K}Bn+yCmz<9P|cgIZkVR|`Va0yBb5 zJ>E!9PMm%n4B8$TZl@m1e^%)JGkSLqoAjm zLnE&iI=OYO-`*Xp2!DN8oWD8$^nc{M^8i|N#ZZqei+sG_Nh2u>M|rgI#!s-Sa`L-) z+bvJ5flQ56%`o@3pjX5V$1qvCT-K;M?P6Q2rzd3<&pv3jrrSYjP35dgX(>ErXW%x~a} zzJ0m<@6PxD!8u=_K{NI?%j_;0WO=J@nGW^C`zhS!I}JAtX1&vA zMd;Dn`qNoUf13Q!Crp8QRi-7A+`mGwD8fHGJdUBJC8|>7kq~a!!L*YnuNx_8`?{~0;ADX zH@Y%4`4*sQSp?X82jF05)j$($3V;s*v`cJKOd%b9r3nUQr)ie2OhGz-Mhw#n{?U>e zZ{V>P{&ofYZzK^TAvGW+33ujKdxltCc>mS3LRq$OkL>lnq5^W4m- zw14^K#$CB$CBV2jDDKgorgUgAr{<`Q`%=BGmofru({kT^Y!>vfo{2GIH}+8j-xtSQ ziBm8ymDGLn5l1EB$^IlsHVX1hZ}9ip#;&G0Ho)2}-^393JaP!OJh^RM@tnk9RhXN< z{D76qvTFHqD6Npasbw<}!@W;{Sbda@=0B^}eh>JES4|a)2z>gWy;}gZG=Gd`HKkM` zbmsi)*(fN2EVFl>%zs3t@DiVt#QpmRI?P%&%DiM>_p`g~2@S`<)$%(_wN@oL&Z-dn z_Ev`jp@m%pM3l>27sdE1D|Q@%*goX6`F@?Y4I_=?4`|g-tS{Dx?#@R*UUQ(WiWkX7 z@tOnBgwn6_dti7P&gMv8suBwyAVvLxk^zSXfd6v12r@LveKa^Z)qkv*_4!{&9za+U zQJ4SHnwS{dWMN)397cKbpFU@eNU}uz??fzmSI*n0@H4v8k&qIoU;#LtMgfkNH-|oU zl%joEB~znwBmf5P+lNoT5}~*D>BYs~-L$3@osa|L zh~Yn?1(_34F(N@rk58#cC6WB=PtR!4&@SRi2Dil*-Bva9YbOQ&FYDdSyD-sKFFgBp zS4dr8T8qGDn>zhRa!8mY zW#6pX^k-7`r%nMS&!)SPo=_nXwH+YCk_KrFg4$Ci+tfn+(u8nY3I4SQzKCp z(=;3!2aKGQ@hg`p?0|YWjW`!*Bpb~pI2ZKVYZk1{cb5dw3l$Fr32~|gMW3#!1oIvX zJFWgTj6-uo$9>IiAz5_oRPN|u!)VI&!_BpRtr8GbTO{e`7a~O6%V^Kv30N6=D(}T0 zFn;k3B=&GYP*)Lv+(j43g z-MyY|pP`Xo7YIdB>C#A)Im}B|IDv_DzZG|Hw1dCcfS0sw6jACchg@oaXfnz)SH`kj z{o2}*WkrfdZTCN$Fnfqn#1n@Kr=;b$4Hx9zy3w#hJ!9SPI7ewn9TD7*V=`TE=$ zhYo#zU!v)}7CH0x0fz^y6iSMEmH*02!wu-~lS*;m&dhkgkh%3%w2{kPtSw{`@Qz;{1dr z=)!ktBws9soc*}jzV=ghGaj2OC%a4+)xye0+#r6ElBLSe<9Wm6~4 z%nO10{ew9mBd>{n!thR)Z>&$Zsp{^fp)M0y8hY9bj5SjmgY`zC8G;vcwO6)Vw&xKF zDAUu>zY>Vu{i*x^Ez?GhAcYVQzY;Q$0d#tV5-3~y+JtI(+Ku6r$=AVA7o4oS&Mxit zIe12gF5s&WbTaY;tuqV9UqyMvn?TywY@hkSV;0g=FGk~gc|&)19{gOC{0i{SF7(ix8FI;b7kq1bN|2HPMT%YYuL3Jn`L_GN+bM-u zJoly|a5f0Nev;WtT!z$7CxYwVxQM)v4HV1}#nb4tOm}YwCY=az%zN(w-+jbc8u@I* zX(xSOtj!y+BPuTEIUykn2y&tj7<9UFFR;m@B@Z+@G~cfPJVvgpgk?H8dq$Gh>_8_C zIWha&3$Dwy1+X8(y+rI6GRp2qY}5OPNO~Gm|Eo)N^C!XR&cu7$uN&)lY!;j4^~Q^P zkO78nAeEhu9WERUvWXFL*oQ8Ms-WV2#cRW+#=>&mAyVRv*(&miyKjJWcm1X?FjUKh z!0~BAk>6V~Q-;-X8m106>H}(J347L?bBe)|D{BoFZi8iDvwE(IF4PickYEn+YZeap z(Ry9CHgoEK>p2a?uYz}$Uw}`nH`EOtg;2a5x-tSz2y1;ngpJiFoy+{iD;Vn_80bt~ zw$&zWv>$JN$79r7z3D4O1rYG20)_{pH8>-&LmSQ_)fK_7#u4U_DD}VQ zx=2cRnR+vxr0{9IkVoHqM~Vb{7C#H>BlPDC?sr25*esXcKSFp=s~__1NVF`wHK7kU zN?ShDj_kZ!!1SL%xI>7SPBv2-n}4PxC!?7~O-jLF`4;a%cOEjpu(u046HhS$2DNs0#z-5pUx%5@DFTTbajrlF6TLzi8_zwG2wX^S+nU zsVH>H?iHCpQ)DQbG@~(2RP@jbzFa2sY?nz^kr}%4y;**nN)Csz?u|oyA~$o5g$4u8 zdtHAB6q4wi?|8d*7hdHoI&S6y@AK>Jb?A#he^I&1Jpu`6J55zl*XJzV^6F1nQ!p1L z;egS0AY|q;u^mV}r75P?4v@Q*rbVe;^NWm&TegZv@bDyJwx52P?Mpi%mhEP}Bh@m# zv1ztPN-KzYcc{}uWx!b+BZoZ%0iC=ldYTlFRsC2po(&~ii zW+Ut%`?2yH%|z`=#ls5hb_D}^e?~i$i{z%xRI4{d3Oz84FQ5} zk%AR*uFC1ZoU1QS@}E2V=*`YPD$^yaYg9S@Co~f;YX~Pu1=}AH=kH5}(CVuIvj-t6 z`I7miQ1Z33vt%kP7MgXVjhNI4b)hv^mUvMMc6T-Gv{jUOv5`h;+`aD)u#g^hEk|m7 z`(A&hM0fprxKXkr%+P2rO!p#BcnhqeiKz%3)F>n;CrmLysuciNhBF++Aq?~4)(Ywb?7q#LuSN!>>-x<%t&Q9E^^OnSnOSH&HK5 za>)z`A6+k0b)1q)fJt`@3!!}+N)Sr*a1*AMlrn_(-3^nHE&H;};kxE)W-Q}<5RRtu zTG*&9(YM%z`T$Ysg=qn1|6a)Er(sm=VMcvE*PV~g^RBJlu2#qPDM}k1VNp zYPhFenSFf@ZvZfx5{R-G(WDZXEbW=Y=rX8>H%T<7lx4m7{!pmjV^=e>BS&c2nFGnp z_QzUp4i z_p)O4LKV5FE$KehJ6Fu%%!L$2aup^~l7UBt-B!B$x^8kIB(x}QBSiykJAgM- zRUZ#-jiT{~kj`1r@6MSd+`I6{1a{=_N@l4fV|Qb=aa&sTXR}e5E9ydXsr%#x74F%wD0d_nD7jzpL|4q$Ko!Hd1xk!LS!fK28MAll6vJ2U{#8AT6~u7yb5F z)rX%cXnXJjAx~*re>kz(QRx1my#xx+;#UAW(=7OKGHOUuwoq5G9C(K=<`6CSE`(%7 zuNgriZ4Jc3Sykl{DA2onZfrZ4%g8y({XR!ZK`*60p5tYTO>aY7pxzLD6Y1$6IolC; zQa*Na{)>x+yK7oEVF{)Wda1eysY2f-w!NMTf$2alweTW+$@+d9r?!MSn z*-q!{&2wcI()4CYNwh2X4A8lejHEoR`L=6E^nr73Q0~QeVL~Hl&ge{XS6ba$hDgJl zZ~*Jp0S;-fM8LH_rgg?hXr#!?!!lN3(r>GZ=CeT}z*G4v5zv@ACKt5z$2&W$2d-@< z0$#n>-k~5D+-FU7^4k6Ad4yDsl+l~6H2&@@kJ$*r*uw#Duoz@qln779T^p4v;wVd6 z53eu5KJaXhBqi1qYKsL46uGS}7XhTwsUkH3*npL=^UwPk>5~$_8rsbM!fQWUrfZpl z0}z)3On3~;59IYi7KsJc=K&N(`U9wpS)r*fbi_+C(Oq>k>B0bpnn|#2twcPJ5sz)G zndat>kP-s;n@w_dd;Pv6;i=Qy+-Gj7H%!hU?2dBam+LjX3@$6 zT1+~=PU+`Q_!e6f4(xV-4x?&X*QT@DSYHUa)s<&7I5Z)TbT(*Bvpu1dU?<2#A22?~ zsKqKXu@$QHj68ri8U0_K2if_*?EdIT-~HW#a&*x2Z2GxSzR{}%Y}7%mA{IQ!B_}JJI@~AzI*kZW$cFG(?@?jPyo)yn%@^tJ7P<;DM_E zPzuKVO7Y%J>}~Y^lm515Y&HscT>RVgrjQv?p>NexEsb0duaKHPIa6Na1-Tf^(mzL^ zQWA4)h~$xDZsXL*W3~PWl}P_l=dl;H#n;GwXLwyBFd){d4Y-OCHveWvw*E9^Qo*J$ zNByJ6te}@hUA5L13Y(JQ`f#bVJo3lP?Cd%5v-2RQC-&8l%1?10vlV$Nxv@5U#ytX^ zUfJ!>;O5x13{AIAZN~v$Bdjjew+?Udh(L$xNbxI`uMZyD>RwsV9vd6BD(zY?5WWeg zeU_!j)*o**03v78zGcVoX>W*8=%cR3hZVYw$$W4LU&n>!8sO^ICF5aMwtx< zDB6Q$68eqT;MRlvRaspl@-hiTCNAjf_n%5~ihyhGfeLLc?R*&Ak?<9aaR?d;!2v6a zw9u#v=Y5hRxx5D4|J7cv?+6AsJ5Ytd)xFSJ7~&Spl3Z-ywu4EB4on~F(;V!3jJL<~ zqPv}j`shS^W>gfv)lCCU^yhFuxv}WZJ5jak8em^UO4Ip!W&LEJHE~;gu=gz;C!>DjSKS#3v0&YbL7N%Qx0$7 zHzaXM_$B`I_Rk^zTtB|jA}7?KOC84GG(?$989vwohczAx{nmovFwOUTg!Rbu|4J3Z zdOy(fJM*TB$+_`uUX7tLibpu7xXHD)Yj?DwXh`jO*v_1Cl zBCV)HcB`>1(W{^Ivt0Ivys@>`(F@ba*7^iXw#9K8qTGUyNQRUIw<`jEW` z#5Xgw)c3No_=qt7DMw==tf2e%r*R!O`9(*b-|7LXJB2{FL=~V3cF@3m&|*jD*zYM` z*tSpckP6-x=szR|?$2=LPMZ#qIvR0D(6axnUiIyCDH~rCI6+5uWi^#}zLxuMYv>)K zX$7ne3bUwN!vPa}-VJ4TC3?KdXzAxf8@t(%K7Fz)&r3^~vC53hTY|=64?u3u?Pa^J zkKTwo)xp6R)oO@@F~eA=SDDrkl4iik*nfSyU}+G+-u*3D2S(ZLwfT-v#P)aQd%`@t zxo0j+)7;8^4Xb;eR}N1rkZ}can-U13C;ToA5{;4)W4)DpJ)TM?UbFMeS)mSn~gH@Luv^J7H0bTdhNT-DpD$ph>}!T zSu^6N;h&?tU3Zz2DW-r2e>A|0u20twN;D@8>80P!eCFusa_9~s0HSmc8s;tZZVZ}2 zF=%TA^*mW+g$$mha)$k8x&26&uRw zp(kAAO)kQ3hIzzUT-ytjBJllRGYh_0lgb!I0vtB%_9LpV5O8?b7EX0c`pkjlw%YO~ zx$h#7$o%LPHWMX$5(iy(ZZfdB8~=aVoRC3Vnlef?OUHb1cEE{m8me%cg+BLkrN{F~ zARa2bhhHfu#t;pd%iJ{Nhrx7lCNJ0qRN6YBeWYj5uQg|~0@>ohpv4tD{La2^d37n!j$~eUyQLP|m$yWosG@U@hQsvaf%&{RxDE zPW}NB`3qn9?<||NKz97-%j-ToX;U8~&Y$Pa1s5<4Td+qU3=^2s1yuTBHxvcORz_(l z(B8IXFE08Q1Glj=t=b8&CT~jD4S*zz0m?c){nTU_jHMaB;oB|SK%_cuSu)jt@$wRA zta{rUbAQ!(ik|}`a7{-lq0^rU8on8*5{sEG`V~Tbaz2-{#Y@Deow(7V$I8DY|8`Wd z{nSJwAl!OG18}K}VK)z!H*kFwbdqo_bV}#?i6evnN?F(w(OHVU7bGDzOw`}CHNAQE z7-<3=-)sWRF~M8K3a)Cub7!lJHEOd~!W3F1SVruCUAJdH3e)(2n%g|0ai`Uh^!D4^ zb0=#3cA*Jz4BhuJ11Y-l%6ao_2Xtc8y z2s%I}@5PvFUOXWO71}1Q*kWh=1%_@AWWNY_H@iLC>Fqn1u&0n4CUjh;cnb6?@E4$f zEk<%Dgs%d3&0HX+X1`wo)q`PX6)&4(Mu<-vjdnNiou=pe+}*piQj5Cg0H-U4>R`7U zDPw^NhI6(X@@;JtHm&u8q7ZiY$Bg?%w=1-zDg^A@WHn85e7k|J;q7wy)wOMTftWVM z>;_3iT_pPf(;v$e6zk^W3+C|hRQ+)D;)nO<-g_f`>ksU3(%b&&X?<|&K9TWeGlkVva@ zNy3xThe|=sce{$fch4b7bg}WKme~n2W?;2SonJz2(H#9nUC*_knI zd(N|jw{@ZEy#-gpBqLFuy)l|u50qGe*5Z9%HkFgh%QAI-&(u!G9bb)bBJXTM@mY`X z^hz6oU_jE>dsJCDj3); z+frE(z^GMMy3~i}k#@~izP{X$=(|-(HV;vu8-HDtNlo3x*UL;Nd{^c7g)Ak^@)|jN z&qPPTSbiVa#VD^@xsmagYG~z2r$5J$+!i#!^T~d-5_|0ln;@R)8tQb#A8kfvnrG%P zRD4~QYg}}uP%}tDsM9)80R4v#b|iao)PNgl1-7eq;=kt2N(VK#qn+%YS^R~;O)KKi ze$ED+L}i>EY|%wAb39*fzMgn>4SuzX{^I@n!YU8ya_-~e{gM)40+py^Y4Q7Oo@3TU zK*=hav61&s3r#IL&u&bb~K*Hya;&33hMXgR!Zj<)b z%)Sg69sSPS(%KC(e!Kze8F)Y4HuFF}(?+Af*nW~-Q@VY`PJl7P1Y@@>TC7ttZ--r{ zS|#CJ6X4v=`Pm}lO=ayNIKpQ>w0SPad;JE+ig+KQ0_n6k6Glf& z@r)U=Drn1$2CTD~8fHefq;CF}{y;u>)wc9@!3NAJvDLT@oAaW7-liED_uY=SJjNZL zc+6XCGckEqT&jQZE3p=$e7%R|#R}0g5{X@66; z+b47I9@*;$S+89q{@7wkI^OT^v6kC)w?3)$i$sC1hvkdQspQ*`DMSbBc{sKc86v&muH{L)Y zaXpUdD$Ee9W~ajvF(us|WRhDxh$EtjfWPnt1Y1J2h;a`7hBD$rztj=@&!Pqa`UqNEe)1J;KAX zcU%{>9fjQCZ&EB^as;axY*UIOna2n!YnBa*;Xb)7H^yDevj-rVbK|U$4{(0Q!var< zsQ}5CG)6`K>G1Igj8!~5EE|>;x1(!_7@m0|ndc35Xe)UKB|N}c>o!q6fGts}tKqI? zP6o*=?!Z$}@(zDt9^x!^%X3>fK)iP8qD4n={z>=n2(C!LmSk*q z1@4f$80b3E(&*B=4IJQ3=4e_Rb=&&jctO}xdO}UkmKFZ(q!d_?tmP4+GlHM3OuhN&OPq6fdk_}W4l0v<=F)Z+$j*hwM?>2#T*=}y?gUs0B@jl^tr7WB z8na#glPyuOdV$IuW3(Z~5ihG5@{~}3U$nYhC9zw@fcwOAWKap4R?*-*C4~hsf?*9X z0^#v{oK$}jZ;Cf!1bWHVGPvRuKsYe0$d;m+TyQLQn+81#f-Nx-&v3xis)u_j$I6IH;K~fVeVv^P~%Ks44w=BO^7xZ_oV5Z0#M>LDN zuqTtxu$UrYobT8s;yPu}-lBQBDICA(=|uvriW(7bg<~r5pQI}mUkl=Ww5M?#=gZ=M zBFY#XQoG&|pgaMw9@RX~lsJj$6-?GI^ zd5FcrVt)ACl3o!tj+?;J4 zj;+hs+=JKy;9+ro4tk-E^Av>(h-(Ny{L}^a#y=*Ij0$R?FTQh$xRf8(i|~~6apWm} z^fFIz9P>6Cz=x;w5aX_72K>TlVnqKJ3JCwg`T3TE66-|v(~U|P!1*l&R%9h2@D$Ex zoO3vm*mk4`0FkuJc_)BdPCko9A~hHClp^z&;yB;3Wi5ZO7)#-&O^v{;a$D;JgpEKkdTZ(jLTlztV)K9JW6$i{nwZ=`u0ISQ}=XGY~69 z!Zc$Bc!e*0G=JgzeCrwA1|EH(X-SYDj_(kAf$V1)d5S`D>mLzHMg>z)xpy?9|1idF zjd-XkK!i&V?vkH{R_Kp%A%2zuVS4}6^P9XTz@0_GElqMikV!kZ(BRj2@{T47-_KpEV(Upj9gQv3RX0$$V{4_aH(NQ{vU%a4u z@7raV6}g5Nq;1}AjK7UXWEGq_Kllp3z6n)q#yh~n8D)FZ649#XG#dJ#gMmzpa+1v z8YXp%S`SGt0RN6(AlmBvVM4M8%Ee5#?cXA8csb#@*7!CUGD;|!tt2C}Csdxo@b%qm z*$ejx!TpPkG4KS!y&MN8zow*25*~G1)OYy$faR68zYbNY^V}89riaZ`V}QU?OryhG zf$(n*lUjN2j2lE3`*#%pvgm6^=QI>D4RgHv!cVQL{*~NZNOd4c^}gy;!a>ru+^2Ga zAB2B?x+rc3S9un916V+&%(LmV2&rb{ToHH5rNj2;gn~5}NkP5CyT-XSvL(rbg;cGtDlg9gQ$HBt<@F--kFSwAX$0WN8j9$o_ngEhJIHAZEXJV8)->gGMm;{b zs=HvWT~i9(_RbeL8x;sLHIQUhJpKxq(jW^zPrVNGutK=q-!F<>WKj>Hn^+b-_q8U4 zN$fdbXb2G+i>@!;DgY`yQ9=%zX%Q@wzt?@3pOS(zP=}dO>&TiIJ7Deeh{@Baf%BP~ zBQpH6^GS&XxlAmDu6@7`8yqq!eC*9%hFWYD{)N+*nbjGzkWp;sU0U{!bnD2a=Ic^6~H?aUfvlATwm-@Z}YZRErbi?(Wz3?i+ zW8KiP;;-~Y=oBB$iy(8!My(SF2NIc!-hZ?`r5(^Blhwy;XV^bxDip{P_{hGThRZcD zF(R3uBNK8>Rwwc~^fq)e>=BbQTOb|%Bf*OzB@Ik2tuA!Q3P>&8CziSq<%sEvTNx0- zuPnPZj)X_IxNFh;pXp&5Lj2+c%+7|FuO_)5Z%#PLn87#-N~^lUClR9g_yC0p)fb>R zZ&K6R-#{Rne!GkOnkF=51FI1}tR0MxAV_!aw~e*(je`MWCJBpdwwaU};5$q)r7jM9 z&Oh%0oHtl|woN_zb8+TA{K}Kp7vS0gtXy0>oTy#$XyFs+T0;9mA8B9ovPa`y*0A-A zSY%n54;xbx8hbWep{jxobNF(W4kRE^V84 zG60`l_Du%he4l~jRmTIEII+fwDP%@|0jO)mFgaB13IuR-pi0;R;a8I@@)lh2_H>_& zX7Y801{J1aAeL5UY85q?0#9}t;(784aw`FhQ%|-MA;Wc!Z{`ofG=~c?BV!~qfHL+- zXfnIXi`Pf!HtT7v!cP$9l%^4WFI?Fns~2yBmB;YGHJnw~Ket)mVEusri!cQox&oA? zqj|`SAg0OP7cXZ2fD0gdE+5oBApA+wd8TwhLNsu_7(7x5tb@If#-bowtu5pQazjDc z>i7EiSl!jH2Pvh}wHo0D^*xe8%H?Q)49Bic#f=}$xwTf*%LlZ=%x++YddU`x4kd}K z#ceFb#l!og1>T!wSL|MYVKfZ6ifBdYixqnqDnv-}4)lVymT+}(;^yqle4LO2Dhj3; zU64s3otDT(;NW0jp1`sXxdua=e&C^x7YYJ#A31s~R`IPRz^39Wc6*&UQ;;dBnb{*$ zX(TF{YB`m%aU#MgGr$+yE6LY3_P)5!R-Q#xCX1x^HT+)IO20R`*UA)N%6oO95WX@( z^T20lShr^;2)f!BiGf@-kzs}SpKY}nxs%BqFL^;deZI-Tt zFgZ{DK@DZLEGd`>TN+bo!8)YS!W2VCQMz+0@fp*D~pgrXAH@f!T-WYDuxK%Mw2LpMY^l ztTh*@D!jb5KhAch>%4gPPeC_8bO%dLaEbzxZ<9k}wn^&e)^$&qzSZjuN#3lg|Lxv0 zNY>Kg=DhC=Fu?65T2EzvvOxp;d7$hZ0}L;bJLYP1?c-Z3CTyYOgo(%F+bX-;i&178acoRNfd{$Xb?|v&Xh`I8OE1VYDmsi^)ULG5I=oq&V3Om0K45dUYkV;+;k3@`jsxJI7uW?mbr9yiyP01YD=V z(0}#>aU#ACbt*02;&pW;w&PUmcLj_XGSAFEBUSV@lO=StjuoN)M7kjF+M#O&9@oDf zFwta*RB9k)T)F9*`dHiXTJt`>UR$NY-mI1hS;87NMe&D;7n!k&;+fg8z@W?c3WU6r z*`_H>%<_+K6*;MT7MFqUQ8l>gdPC~fDIU_>l4K?h#?>FL`4eH`jTS#1wn~RR7*#2e z=O6H~)MR6fR>L4KOtFF)TK|=$Db*q#IKyK!0*{V}ax zu%HZ?+G4YfNqwyH)$Wy-8g=eR?(s5N!nFOt%27ZPD|`sY&uLxQ{*Gg?9tmbc&l7!# zjyf`CNKNs>_g4)&x~BFUDpFIY>fL?^L&5vsy#HF$ksF}MRFpTkh5?EcGn*pQM&1vY zrkOIrX6U%bsyO*kr^ajfZbW~>DhN<7W_-eOiX`DDsdtB9=M3hQlgg($ZgBCLUS#9G zozDEDwf6OZh_i4ti%Xy__|0R~_Nn~9Z>(CgTPr?jBb&=snm1P*4E_NNLo>I#%G|VzMF*AG z-X0N6=SEV{LHn%wy7zs)=Po&vG6(XI5u5h0QH5=&~GU|B%hZBk1_PFEy3R z=<>u>;(;P1yn$+sv>(^XBqk|Vy(?#WlMU^G=SzA@}iVxNvU ze>f4IxF(k+C%QaySy6W+r4XV@)yn(E)S`enA*~kR{X8uU?#e_PM5Zm5aVxoIWYOX; zyWSC9j!9=IC6acGqyoVb1#Q=F_l)eQTdNKEOF~)yp?NL<1EH)I9_`dM?80xeC1Shjlk#p&u6ZMD2i-ZG$&7Pa^}N(pN^tVTUU@v4O8vO&yGUX zkQa5#E!p_){yM;;9qTKYXK_#Q?&qwFqtbzSa6DL~+FpENRPT1^oND|PywikP*4j$V zPID6T^qFgBi0^OiZ$?#+T`SD{v`rW8%auOZ#5?iz_y7e;2YD?D*4 zZ`o??wTurm*E-ovw-WmsiC-eT^*!RFglp)jYBNl#{fYm_*I7nI8Ft~C?v(C95Cl}Z zTa*wGNkMW1=|;L6rA4F#1O$|l#sH+HyK(4-p=)5~JmdF$Yn}7w_{*hBgn6HLKRfPy z-I_$@b8=!+fsAl(N#607x*&VoyB2HMsd+V^Taai>)z*YHjFjW;MwsqYfN$)8p6{ZP zFtR3PuoT1cC$jL>SK&DsQ7N|C$XQ zLE-X+(&UwuA8RI)HMez*S9Uzvrs*hnnJ6YbyLb9(%cisfdb`zJ3~B;H+vIciI}7v5 zf`%>;Yq}2}fqu?V*M-Bq(ij7^*eHYdv`P^e9bfU*mkxPLQv#vXNm0S>=5OC=l|DaQwRidf0H3UnX#VMv-}@y z6s)<7`qPsL=<>^Vm%k@{6!=S7f*^WM&tuq6H=_7-g1sc@3GG2_SQ52EfJ`gPAfC-H z#^pq%MHlj$oSrY@^Wr-!*Gr8c`q6B#Nqpj3t7BRTqn%TMK8wdIRVyI4f_4F^sNCLo zsXQ)QlXax*2`PpqQCR1IPH=*>PP7~+SYq`rB+KCa`FRU07?wzY^|{+~DOha)EJkv! z9at(4W&OATOs`0l?du%$e89sLLr2_iGYQAxI*e{3Y&;hLBSNH%(R?zhUj8Nf*vFhF z>68EMQ=7qX#G^pVsbXK5{+uhS&19s3Z2(BAe*4lqeI_tT{$fk6N=;F2!h1k;Dq`bV zi$l(h6_e)J_MwubeZj9Fy%sK9(wpQ@b&0ckTJ2G!#It!-rvMM%rY!^TAZGbVISeJv z<;TT^QxRq!jPcLU%VbV6YaEVtsy#DzPCq(~2JR&ul_Al(^@Hr;Z}zclo4P%HEh?{3 zJ0@3QA$3X|fyUAzI?nZ_Qa4Vgv_cTask|mLCZeIOk8jQ^bLMgpn9z(%#TUQoum7-h z8nC;eJ|^G?(2Sm9ci3m0$%C3QOsuQLByaCuYVKMXkt zEKOdz+2b__`y9VZQqla4RjG01;P(dsVU^9vsVQwEhh8(yP3u5zPCCc0dzujxjixv( z`TdZSRT0^mDMB70`p-s=yX)-=-8B84URE;T8ch^SX^p1Qp9CBf2V}pNH)3+E0I7$! z>GI|Y+CeAy?IMG9_l(JqwX894l>AeB@a->R)94GVZX$>1#_@ei5F2mpXV@BY^P2He zMsDlUYO1VB84512hH#sYAHVwTbp$7N?)21_dysjmlj9O>D^sLNKb6|Z)UGud~9V4HJi$VWo(Q4!@nZ50?RjI zG$S{=FZ?NUj01~dm41VpvL&Yl@>6i?Uk!TZ@}Rs(4&3E;Gn5vs97O1}u=aXtDV!$( z#a4y?O&`twx2<}r33>~%+itj~yWy`>Z~jbzr{;97vVU~WZ_p$KHiYx2k7Bigp7HWL z+liK1?#@hESL2}SIv=WMCRh3kvSPa*-71fDE%e#+5Gibh5@474MpLn8z~vYeJj^o{ zxPch^(Q-75N5wuRss+1uG?AMH;L(DNl=81XQHpKJ(S=Ch)ro8F#oe}ov#j z%ipbqi!tU}a<5R$+RylSdWwfXoT^-BH|NH^!}`^ny!{3>vm1(yA9adeK)Mu>u1*Z* z=LxXGd=<_lZ7JA!hTLnVb$%ZNF5_NyNN(Q_D5UMD^`51ei80 z7dOo01l=kX;|{yvha}1RH0&eulaXAxYiIb9rv99JAv)-p5#plKeBl_mV?4KFt``H- zLV5`0E`Eiv@^!0NO~7lvJS7_7tF0V1x%Tb>9XqBix?jyJy4P2)az*|a49VjXujtCm z`>&-wSN8g|R@i(FXG`^zMN+fL8X)fK*@8@D9F7_MHkyd$(Yq<&k7t(>fS$3=%g-g$pdvb4lrw8@$u?)l{#^w|1)n_!r;9HYl zuGQ0^B-o^#*}}QE)HhTn&(t|ibMeRWL;llEDbL(e7bo6F=w#uwd}r>AR2o~%^W4r{ z+}ZA6a(Wao!$hF@m#d8JiV(2Z2Nu=55gB~wCcQhMMivqp8N?cRy=74$^#2S!-WOxQ z;e9nQThafHLa3jDbGUR*y_riM!t7Fp%dreLLyH}aW%QUU3%EYq9GJ=^rM{MvC!rwE zokkM1&WLt~m&|sPdrk|Aowvs`PSi@Jutn(+aM+EOL{3in91NU+tbV91a9l3f4`jUU zPDFqo0DfVV zg;Ho`uLcqB?6<>%98~78T$A!%tsgby%WmPw_dTC@Rq&sNKOvgv1QooIbSFSxg z^#+@fhy3)Mb+UFjY!n9OcTI|$E@$bet`W)~hWdigD^4I{}EVGQ|qm@c)Xv%=& z=ZD{rXBQ4#UgO6jupx4Sos2aXbw(o-c1cv`O`tojc{%A}ld}Bj&`u&FE384P&+ji{ zW2ELDIO*bhSv(MC`9bqZxCTn*aHPmX@^Z*=f(j)Ro5L3bBViQj5!0Usu`yX}K8=xK zHrIV21=$?=U<2XUe-fm)H?q?kccXnpch8WbV(QFsh_|Kk_?L2UA-u8FJeuv+Ijn$Y zIisYi($xQ2HUfr+3fu#$Wz^0`@T~3T7Tuy|OUY*Lba5B6O`hjBMbRd@>{r?w2f=WA z9C)Fcc~~{F8O#Jyyh?}RtaBth(IGy)jZ?Nf=ARoL|xJZ3&c9@mnJwr~=r#c40F5UN;;{lY%J`nTVr`%sQZ&1D*6uS(}agv{-Wn(Zgf6*&)S*b7oFDSApJfD`@l8u_YJkL<3 zZ{R^nfjd=tA|ug~r-j=GpbXpnd8XjL!uKA@e|aDe$=>RF0y>CR)6D>B(veA#fiT}6 zhN}l3UXo0y|aM4s+bB%fm4#^-e%d ztTsR}bvad(9PMa7$j-#y>Umh`wicVna13{c zw~H@q-vbEYtH_5&lhK(VK|%FV&VDFQM1WhdbmRL@1#xDvK%RL^?_o%TsY-oF6`NPW zRGQsPu+vrOqrF_0VQ(;`O+MT36PvEH%GAmbDtg#*EL1mRUI~*$uD0OSnuCAyaIDD; z@r6U~w)cZ#AGS+O!kD|j|Gq%0o+ zN7(9_|EMtG&_)F23>9rR*?8`FX6|K0WPpLH4;Nq{i#SHtaZi~Hk!2BGBAp(<#(Ti{a_|KtBlbQd~Hi9Se;9{*F=0fIMH+uDfMgnbc+WQ?+g zseI2Y<<+>qmTO0V%T;cv&Ls4isWE`PSPKVS9P|(r(((?+#2|A|>u*xlFC*B$g6i?^ zhe@$hmwL6}Pcvr|xz6=}CheL5;?y`tYzVB@M(+l(Tz z%JB{AORT{W?S)r2o)d)FTQWeMo75c48azJXV_RL1)fQy<>|9lbxn%ehVDq%|5xwLo zlLiVBL0VUsim=&Uq-ywK(oa-euQ^yz>WoaWjgKDen1^(*hD z)-?YJlytK0*@JYbqwt&p-s6`tOv1Pk0PT8qF#F)w$!OWYc}~s#MGF(nCoy;%-s{A}U>DJc41-i(4TkitBG zBm2Idp8c+0&)~w1-s84oi&QWFXoA^QxbpS;AbD!44)9#v05?ME1z66dnC(%`r;r!0 zLv(F)5)hzS574=5?a&pck!tzbD&*@L&7d9^FjAvSlj7+0ok=YXY z%g~B|eQfD^p>8zZj|Nm>+Ap4=2?aIV)q^(<{k4{#1(M{kEbAeK+ovjm zQ>>!)*aQt=-W;*7L9O9UXwLvNZY=(lyUOSZ$WWDM(G6J@FTn0OL?$3MCCm7!XzY0> zYXo%_41daQ>1$6V^nvr3)_P9 z_d<(b!P&iQ5}lR5%?GDGF;^PHS9ZO>u3t4o>f~~1W>VYLpip0CWE<~>;T~G#fv8 z5#(9zMb(pH^F~RqEqw_!UFe>4BBhQO%>Lx*Q!5oaf-XW8x21qj9RB6 zxZgq0=GF~FCBpPeqs$)hlV4Y2DJjmvoy5La|J3bYzNMjcGunwxcJ#_YCymG8pA1vPav?O{uF~%FCrnBuvy*Ree*v-P*?2E;<*LyePQ zel&{6tJi+$>k;xP_dxfz_b{qBkf`lG4}Nswm>0RJGGQJUQiWp8vK1CY9U!B~+Ms&YFYT538z9|27*Vo0ot2bagxS<1PzI6d)O>0tR zh4l!Om&FuY?hdNpluihUB7VNrfQu3bYJW%Va?km;8{8=pi`QoX=KhY&;(#c)wr zK1d!E&S5YGJNNGt6_>vqvH0Z6iX9Y5B|e1EKkO9Q?GLTgIPo|*@4dTsXpsa_P8#6( zW3zY~gwWL28hSk>A&g)pnHV#bH`jmu{CVK^X^3@zd;-bd>N?HqHoD-B!)< zDj?q7RL9Mm9MuRso4@8Audb=FEdH^-zwb=heRbux^&&^!Gmm*WBR7|S6uLD1XeXml zXB4OzFt*0Vba|V0B?tJ{{=9j3R!XS2SFnxMRyd&?=;G{5dy2vQ@!j6uUK{y&cp4N= z8PAPRu%gnZhA=ZTdk}Vp9r4`2;O$Q;u?h1UyWjgv9yC?>)V0Tr81Kyt`={z7q0BhpG3LivA!V8TnKt6|BeS*1msbfZGh-o71E#H1 zG1_w9U*VugwCb<|Hztu*w3yKQY~8s?7dtv)M8U0=P{9VfFLqa$1Zi`>Ds52m7)zh; zI|nS0u67Q*d5E{&LWI;;ZDf0vt?q3wbnJL@9b5Uf^K@`WT-azFBf_ToMZq(0#9CDf zL+7PQj^p*)qGWTlYTTAZu-as5GSX^<^qSsZ9voCX5p}ySB0x*J=_8wF5eN6kWI$f8 zP4IVKu*RI-AcCV39Um&;L1gl2sAh;rBk&$#=ZEG4H}`|#FIm?i_M%BEVpj785j4&I zb_(gZFD+)6IrAY)NMw+?XCgKQ*O1|(S~}klX%|cySJlwcy1F`@zY|zIhd$FpLjh*0 z+=?=gUCbXTDJeRPXVKT6X?1B{_6HjLU4Hhw{qhVukJuusy0NhJWO@0Ma)zuk5jG18 z3(aZ716|rZ>bB1hN~CGHa>^^qwzWO1esChZUx=&W^r@BCX@JEKm9px`V@#nt@`3z?s z)ey%K!j6xRbtL!-@FPN3_81>zJua)P4^o9YU1xv%=rxvC5#s;rqpZoeme$bFkgS^k zS5s5dR$C8@08R9_EtR)gz}J$Ipk8wdFDapN-+a>%ln| zCbkSKDk}OkxM?2nOXumn@kdcEr}2V>$L_<%ARXFj9Pee?ceD?Y>kB#|zs922$Gzjp z@#M>$DkJS&998n1=Jm#u{pXwyEATO< zzf!o6$H2fqp?DLVKKze0vulbUqAxlr7lMnat+1t<{F$2g?WfysWAa`qnLPVezQ4>{ z@v7WG(Yy06)3%EeZV}PllGo2N!$Mvs|5Z#fdmk9wOv0U=jDDwJPi|#X^)w@Bv5?FSwN7$040C&_92pc@g02M^AV<|`nsOspCPS$`p z)*DK32ACaj`8Uafz;l=3ARGmz*el$W{yofx$AEj3(Fu}LSn zB#2`L&4zdYLyIc>I?whOn0_pV5ogOE0V2tirEc-l_;?0kE-xS}>$t?dB|u(+oyN2d zbkB5IKwcP+0;7YA-zy){avOz!6T8|wM{I&GADI|)1oDLAlaqA>1Ni$(AViDJCr4wP zG3DQr->5#}g-4K}zy1%AkpwU|`9_~NfCO5eKY96HCwJaVp+^aTC>20rxRmoD2&lIR zG?fyx>4|kxU)PFvF&HVwsRI^%L*T`iuN6cW?96}h%|HF6lsEabVeIltG%1bb$E!Zt zjc9MCQ|p;3X!xD4?TcRMX>OX@8_4FWay8}w_>0*QnS+`bawuIZK{@e8cq0@NL3XmH0XLB)dj%IJPLWX4J z6ga72A8ISlOahn&DR^3|lhZM7Z{0Eg)+SG4lYAHdxLP`@fEEJ=MISUT83P6S^VoJYlDk^# z{F;nVUB*QDM#rUASLp*!V1CY|{y|!#sB zMah*_0L|cG{<8seXauX_b^JndUe2t9YvD=UJMxgN-F+K! zJCA_c0H` ziuMk`(<%*9qSe1V*2SU5e5^qE3`a*P>vI5dj-3P0qPDAmt>ND!3j<7a7YH;WNixX71ZKCu>!m(Rr(lhl zA)=v>kn-qd%b?GP)kA?W0X*tlRr(KP^H*nkOn31Jz6jkDTh>Xwd&HsoP&;c`hYIVGatdP6mj(7VgtnqoI?FG)%x|jhtmy=4ZcgaVai@;M(?7}DHgpLM(|6S_` z)37Q|KZI`>My|#%Hwu6?x$g0gI-#-KoArwO3 zAqxiltk~>^Pk3>l8p^BwyA8tfD}h~!jnFVR&E{NYl}w3jNN!-5z-XtqJ`#hs9dU+6 zn?IvQCE>P(_vd`Ec*g}}k(a+D`Bc0cEA6!#6WYVue&H27e@NKoGerfBX%_-fQ!^dE zDrN0}u&wnrO}5SrZTL?rE+mwgjTprse`F~{G6P;A6E@Yz%^YLPf?WwbqSMWVl#Bem}In5LJXntLzR-MzmA@=jc`Y~St+*$_xBltv9x zv=w6~?)DQtivoV)U8lZX&oU+S+TSg>_cf_Ly%kX-%)j>ii;)0jl6=5HBKs!)6N*6z zpMJR9y4iK$iGP^Z=&FQ(uo4B{KK_d~Rjf_oW(!@DL62usB}%P9CNe5e2HGKHw#x4 z$3o%`2fFX^oi0fjY4Rj+p?xu)(OuU^!cGEz zyX2SMy}fYrnD@ zF!Lxwzh=yYB`YSGZd(Hl5hbn`1p$P%~cB<8x{Ej{(b?{HfH?U!8Llw zN~i!lB~g+!iyqb{5|Y1=&%(5lph2~Fi)*u>RXZxx%OVt;rdi>Kcp@b;60E}gO4>8y%e7Jjw$Z&W1wvl8wXSn!9Q!1UCKCbly9!Y4+N;x~nmI{evm}zWxwU~d zA`fjH6VOJ`R=SKc&?->~>Ff8hty6ui0HL|fSKP@JBXMuI=U%r>Ql3uoJU!DLMf&KZ zHk=pucYITK2;9Yj7SmRO46h$K57V=q1$JNKt|b2)3F+pR|FxU&MwJ|v0SJY;*ZzRkR`&w`kB#{6=3K9MZUevv!@_?7vUBotl^rT0<(l^ zJfi42XMwTzSNb|9xX`YCg5oRk%s50@-Oz+KsD&JWxN z#7m!Zqfdu=unb~|hyDbu+OsNfVIB^&GP7EK&?0!u7SUA!Jf#Hoy-1P_UM&Czrav<{ z8UZ^Y9L`gVO(d>eFR*slgO#Sa2{_0J*5z6g$4_^`OfxD;tU)OUE*h#QEM$DS@DKW70znQ6u#TUmlTI7PENI}=t__NU*dfe zn8+)MT_&z~KZUxL83a}EX4@^aSJ$tnJ%e(|s8_~Xzvs9RXopqVQzXFIw?7fw#R3^aQZD7) z9>$2+JZOJ}!DGrEN}T@XcO-;w$Rt-Oq?RJ6HUR^)H#uN5r)1_Chte6O#_lnAWr~2H zHvyOf^}MeUWTdtO78h0ozGBXRO|6^2ht1f=$nfwExIQxyWX}x0nEnt;y{r8Yk8oPs zOzA8S7K9jx!d>6GRikJ#H%dWU3O2d)K)3Goot(ATA;(}s+2i<07q~zWN&v%B{-P6s zlZ*kyOt20?>JlWwUQ<>4vh(eoI2W;$EwK000PT+wy#e%g&SPwB2F>@h=fKJFMSukt z)rnvYp=xI=9e2%=deM{AC$x9^kjis>8EnD?d$vJwRi0=IJKjv%|nY zw&YCZ(T#d4wZ%Di{)!%!AeZ1aMWOH1qKwS=O}52O3UbzpbhgW$>pTY-)lctx>(Qn= ziPicg&qDjj!)ZCrw5+Z1xK~5J;q4iHd-?c_ZgIkWT|L$)9L05fyA7wHvRbFmb8^pc zJJnvRpjPgH6(fNMLBRd?T6J9|KnkeMcaN)I^_9G+vmfNoxFg=(_2|`-B+2YP@NogQ z=0v#+@^PI$Ly!xDf<$YBYH6<9EnvfA_MG|#2L=gHIvljj*5HiOG|Pm&!Y4o3E9)l= zh!a4yA-?&Y+JDh2y@fV8+iH-NUOPJ;+S=$JRQLEUlZgyf!hn+y&p{cFGH(o8J91|P z^2_JEX7W<`UOE5$v$cUgW@}U&49uePAK%Qukjl!~eyu~yRfX|Rz4~|3 zMt(GF=H0i)!c5gxWa3RWComucitg{K^dPQDk2e*X-BZqVI8&QYM*KyO#a%R9zC{5h`p4#y0b2I zuQd>`mP38T|I#U)EJE_RuA^LRx$qKinBs|51EN$r`~qe^Ew9Ze zKlD)xwSN|;DbJfT++VX4bACP5*=IX*B)UFVoAZ`M@BNcBsqF2ka>cd82Xq-n@D`W+ zi5)RRlwY00^yzMwrm^kTTm4%5i5Jo?70;TP^-J|ac?}4V>u(yf8XOBAs~ZfzD`Idc zKH_pL&qPcqtEBNMsbt9ayq(z5$X&4BoA|2#`BpSd!&z0ZL8Yk{G3|qzrSnV|XS-u2 zz0+-)QiFKQwlL!c+4Y4>Pq!|FQJwo^sh@g;h+k%PezNz4*5(@Rgjt}II5d1LkGm)0 z;l)BwrRT+-JHf&$f0SP|EQ!z72XeEx`S66lG77gF=z&ejUB8BOBGUvD8^0aX2&vWa zp#xdxyU=u{({WXaoiC}GwL#nCas)}62&*iF zn40%5N!yQniuCCEj^%M6@Q2UC@jWR5+t3oddJTQ->1@~zCG8KBmG{_8iTlLuMI<_a zsqrIv+5Z!pr771GM-Y7(dq4Y&?_f3`P+2QWw|Xh7@MJ36r+d(}_Vp$2i*N4nGh;JO z>l-RMYj4*|uLM)E;eP>beM?-}=I_h(3*De>ySdWFjGEl(#%6~a9c%KI7ZY#Kj0RhZ zeh}ZgXWX!tH{(G!`WY5nyzm1hfY6E7?mV(KGlLrncRhj|qmFiTuEwrE>#oO)I?GlE zwJX1(kS*2*dIuA8n1IO42z#A*l?qYFt}+y z+3R9WUpvgz%!%QC8v1#u;czNWqedWFw1&h*=<7~_i^6v7b38}UJg_zSkVi$kV4KQS z@Tbi;lpgHD#*P@;2P<@v=UlMUQd!gd=f4XL3k{jSXCt&rPs0Oi30{#ETSXIE*teRVVsjw}^;qH>L>2rdx{& z=EXMzMG+JS%Is=CS;hdR%I@UG$zxkmNa3t&Pcz-s~k)oVL}kXR3kvUO2mB zgLDZ2ZU3lvyA^s1W$~pcC(KpiKBe7+vIU1NVMI-Jqi-ik-!%dzZnL@(Dua1_c&0Ai`5;4#Xxd7 zNtQuyIp0TXyP>~V<`q=MXA@TD$Ci>Fj5_mjM!a52wg2@zi|s5rG2@Xx>Y78OH(BMw z=lTtCaeOTD_~TqL%LfDLl2hLs>?X3U2Qyq^ek0V*gdRF6L7e~YT5X89_7Jpj`QI+p z{lS$pvCGNCnL6N^bgP*W$>KCuoqPRYjNh8frdARj_iXeSuf|Qj!*NmRsa$C&JJI_h z7ZuGy)ZUFQ7j*AcAFCQ&gB_)~0W=%W4LTd=o9q+D=?2}Y*@_uGj9yHIFyq%r{U^Jr zq&V0>y6=U*wP=o%lqV)@RVz|N&UTxVub$Q=HjTUihTR<#HC)v91Hko2?L}~pV%WcR zNr?GC<1tXBs{v|%^m15K`SCKAFPWq!#UOYY^_Y5ntGb>%8hz!{U+!TgB6eKhWK-)c z^qX<9I5+51Zz|u9yk{LDq5-^@%M`rSUaJv>4ud|Pb@RJp??a!XYUWi!MNvoLld zV&}3~V(N51BQ?(FmR~r4@FK zD zlVI9TuCwtE=-XLUyv_^$InOZt ztHog0PuF2WA^Y(X1$t(~uys?~h<3%h1XU_o7cVQa>azYoo*XL+&eFaJGrC0?p-2&8 z9YoOtnho-B(HV5NI}TlB)J{YHWUC-wE61 z;tak8g-2z{oV$BHd;Sv1&-hxn89MueQxTK%uc@-`i`$aZ7VuuayQb`)qep=UWJa~*B%(I zn?u<6=r&aZr6BrF6)T~1tKsU6uWJMF2%2H8^;QbnMNa@I!CUW4cUe{@oWBhW?*azu z()*76@YNJK9vmIvdN|A%V!As{zu@F1Y)k*Ko+>Fe*=nfLYwR9EL-0$YDz?4K(BFb3 z?(}x=AikAbcn=J?r(o1RdyjcbP=t8J+pVn0NI(l;@}p=sbRkBmJMl3^wZ zgZ0Yw-bl8+!*-#ddEBL!z^`5e&|$C+pA!+tK_s9Yi-7*$U1KgJ2 z&1FNsxm^EwbL(%wv}ey6k^*si^+2qd9ZVo&HYusycH--Zg$FbKmdW$@gXn8x14y}erhfCqh#TZqq-F8)d5-rcO= z&V8^(`*PzwGZ&>I{Udv>V2_K#mG16jZr25Vhbv_X+gErQEfGy*A8;e+@$C=Jl3ts8c7NtveEmW*c8eF&Ty*BZE^+*43tOyq zZ-0yOX$Mm}> z$Pb4pXa5CwqWr+8&e-RMD9glhR>%UYJ(=!-e#;-RDFn%Nm)$9r?^1S6rcV5}zTWv)T+IT72-+`~jpf*Q*VA(desZc&jJ@ZPbH3;vsZE?py&fwOP& z<1fA^AX>;{YOMG07|-@3vXE+c(UuPSHJ+W!7*kAI=_gd&n)iq7ltEJOK2(Jc2zLmu zasQKR9noPS=K-Vy&drPkfHl$O1nVCpNIW<58D3nj1rSCzDwzBNo6sgj?F4PUzI)+e zmZZ^_cJCKM{(?!Q-G;M-2m_Y{2G^TWNcl2^6Lt_LAz_PmZFnsy=uCDu=li^@~WYwMv>cZ?tvzn4x{CAK2L*7$)EN^$~<#Tbf zxYX?+p|czK1w1~;u+9q2L6L@t@p994W({PDSJs*NFKgLA@!w?}aqoEpCF-2&f&;yu zeS5iYeM=HWHU^OyEIj<0p;RMcZ8xHe?hjK3 zT=9?+;?u>_@I6`KMp}_^><$&Q(>Rsr{khpO8$TZ#Y7yEgKxH)D2#y~#O6k>WbV*lR zFW_DIU7YU=EPt!}GB#r)mOP_dEvf;jlKU4{{)Gb!5NS5Tl}B%bP8VntF`+XB@n@4V zAbKwB6;{AucClttN2N~O}NG`o3 zHKROC+N@?s!poW@+^h|V(nAE5I=v!&5XAg3%lG6LZzyTAT6OpY=OIYtJ0gAie}73Q zwP>03$1vPfYTb$#)NciQBk|Yt{eQn9$4vV#=AAGw&18PW%3EAw&@9KN&Ir%IC~jiZD-QFq5$2EMK7;SyAjQo!tNQnnpynb2ib zg)#!*E1U^egd%TVI@XMK+^dRw;V_Xyjf>8tl1 z1LHp&OS)FWyvv=A%QbrLAZky%Ll#={sb=V%QK>azFb^jplT*sb;aJXZ%TO= z{Ob9h5pT8;A-@dBZU9)$KCBMGdoFh|R-Q%}@FVy9iDj z6lvL0JSd;hX`4>kA>Oeud{>#+3BueUi+{FZsvKSx82ya@(nh2Vca#AbBWu0{Hq)43 zYDZ*T)5JpHK+t9qcP}Ffb)hnmHse;{uQi9SM4pPXgnWL=ue!Wy` zX5Aa5zuKD)$qh(a6MclSMPu9oEl^)MMw;VauVOC?I)ipws+rmz=%Bf|ks3iBT;u1G zxFX#?Zll9JI>c{Kf@&EkKT=GIUy}743wRigOt#z7csZvRL&$n{ws$ZKkwkRL z=)PqS#J^59NJk5kDF&HCNOAhqJsgz`$llX<3N>`#^fH|;R#Fc9?i{Wc6#t2HI8^fqBX&BfxCjT`x~(f{V6jR^2mi>bfMQ zm&wjq(U3%)9cRjUpstn*%)&XQf_5fR>qieI>z#;^NcN4XiyKajs;;HWn|`B_oljMb z>lB}25x=<)ExkU8Y8d)$fBw&B0@_HPI6hj$2;++!dW5NAiiK==puk_M71`<%xQkht z4JCE2##8-&6%7zTC}0%VTo#InJkl?H=ayqhq%9irpWGS`Q~1vvB`Luvt^3u%AUUm` z`nc5VVp|ygnYdK@K1@%8_{)DEf)mcF7J*~5T@b-H#z^Qgd8TlK^W3ZBulH&Nt$$$n z-U|f?nAp7XH=Rj1ye+$Y)5&GsyXosz`FK}z2D92&&$LUM!sRE=(aN;CFWiTxc~AXoXgmxI94c&ZFE<( zJ!;sol2!Spupihrnu*?9u!yebww(Y@R9f-MedW#Giwv`0@`_UL=##b$|CIrnhy-gF z#n#l{rEe=0FZ>D^7@kjFEt|8G+_!6d%77L3Nkj>;burW^b`JdT>YPaTe(F?NM}81A zCwq?vwT+bqyY<+ehjW^K4~?rF&w-h{HUMrpCV^@1aKycK4#*Y7dPVca-V#DZmV^An zhSoe>paroD3Wo`yze}LSP~GtB>36%aybtM6*?nT#i#5%sgaseB~e~PTVP^0Pr*CtG5hC9=GWLtq387K$&7<~;Z={6_x_cCtK>M>P4Yv*`82>Lh|N1y zfN9IC9ITPK?sx&TLb7)KOBYJtrARsxiT*e3A_N-~b%}h9RThJ9L}}O8wZrC2UHgbR z9nF|&hP=C_zJlp?hj^n}umvqwQ<0(s|G>9zv{GyDZyF>q=&rj=Ype$ZVz8W%^RkN> zi`K8NH+*bN_Po@bV^Eh zcXuNp((&)P>${)l``&-Oi?w7qL(N?K+WS0@^N2HHtG($3TN`ZD;@b#2?~0`~FW5_s#&_4`6;4 zX4!CVI6(?cIpX3)gGZw+u|4oU07J1|o-ZKD+((uzf*!Pf0q)JX{G=B7qrgWKali7m20n|%)8!%g zoDn%g5VpO zTKpPc9~BN;OcW;HwJh(r7r5Tw`g`on$ybvtOK#Ry&9R=Fn~EN2lK^YAM`+<>%Vp_P^O1mO4z4DLu@_87J!W@40OZy_PN;$z)V(fEwD>}K@w+Y zBr0q!R+&=nqz`+j$|)5-T5E;Q?_s@g`m&p$%nLDd(e@CWmwe(l_`_cF%k}lBqAY4;XsoJinH!uaRa?G!~oM2{e3 z7T>*mQ~``T4dPzot)bi6w--e8w2+Op3dM{V&Z>fY>(_s9QM^ekYW2DM)s!_S>tL); zeNm_ixz)Tdclcr?b{c1CARF2Ij>PS6#mVgl;n=EKTSfP1Q9e*cLa~d0lc8GVRhCRt zUHByx2p)UB&Kq@&%dhi9`j<;nRHB{U>S;W`b?542%m#1f_Nym9_*qbwyu>93_Zy9R zYZ6nY<48xtw02|&K%^aS8X@Ue*;aIvRjvG`o+9%qU#0kbQ}i!@=cAbWGl0RKSFDxK{{0YQ`t(mr0u!onoT zTA_?Sb-Q=VYY}p$Z26ZY6AYM5Y#*W6jW^eq>ZM$}_>Bc3@dUcX>7Q&<9j8rRzqIC* zFV7?Jb@OI`;+-{33Z0SiV6RrN;TG8&t<1cn-_ucP8ZbP9=a>7ytg zd$U{S+~L>2y5QwT3y|@{dcYi%ay{?7K{3(M!oFhN5Y%8<*nkW3DPJP|G1L($12#fq zfL5`_EqI*GIU${>UWrfuROLl@8$8GPXUzt(!^ZI-o3-?v)vY7~nb|(^2%kU<7v%6I`QdkP1qUB*Ty_Cq!fh|=xr-+;R_W&1AqmE3SU;fV?!x;3 zHcNn8DkEdS`tbmGx_$wifvH4Q>nO5+7Jb(oAxt?cFfs1x^ZO}vDM167#~d;y+Z^M@;8`Ql%aOSMs~nSSl) zqb{XxTs;+4xK>@2J~`>k?4~fWtY(rGy@HLLGr}j~=@0*E3qYL+wp!yB(~7@Jp8x9; z)B&+LiOt`5TMNO7&#pj1vCTxta{LEu{W>M`=fo6wB=D~SX7KOa$V0m`-Ftq52UjlSdp@R4XYX;1}Xz)dta2oN> z)ND|KvJS$(qA!6&UFBo}2V};h2VF=~zlz^=;DRzrzlr2r!+z-5ILkeFH894EVgC`K zBx+oq56Ja`_X6u8xo1laWb;iX(WHl#qcBQk@z>3r_UGjRpS*Hv;nWfc17D1%@OUv!#vf!?w*;!rEJb}oG(65A=6!bweRNHeK^&; z{9Eq@3N2aes@3`OjqGWZDV2uwQ;ZLx$pfUteD1F?2m+Lvbz z!5`Ymad(6m>(E=_A_Ws!IyZ^kNbpZSzZZeWb!XUkh?>R|T4r&y)>_r#$-U}pv4vkCrqAmNZ}DP8xt$m2RcwCDEh-1nYudOc(!`*|~U zXA`gt6<~NLZ%m6Yr*2{y@=+uZo6PMqIfIL{1tKwbP0#TE$rG&$-$Ti<1oI$1j;8S-p!F+LhMiRUR2H&wY2uziSI> zz9BIB-AtLXO#>)6$yrtaAh8A_2A8Y}T%e2*_eN)BRHn7P4?hE8*XbZ_cfj&WcF$oG z4`lJy$o6;Kp#;GA7f={{XU}_U`w`mfypmkMk>Y~vMl@Xp!nvE_wympXff4VS)O=OR z*qgsRJzG1-BM6VMe60`Ajm?pPfa=_20S$?^V2mKvT}B>5uO#=e2Smk0+0XG~$aXL& zY#|OeJ#DO^_V~Uozppet0v&EjzZ`O6A>OM4w4W8tMYFL`%tW5`6Qu;NKS!1%G zlX4INJxD0yoM~{o^iOn>>wC}uE;pNcWysezMU;W)Ri@xufK!`W@6MWw0ePzgie<6%jz<) z|1ZMC;&R?!I1m`}hC%M@d$aQ%{5g30gOL0OX7m?yg!m-=(UbbN{cIrI^W7PjL}eVc zUwg=UPu$c&h{ZyKFv9P4visG}uJ*9RFB?yU&fCy^)7HZux@(Fe7lQtBBfwhnNzAW+ zHgg#I6++^1z~{T{*(gVa)VFTbYp#INOzdq9#Sub~M!nc2$?`3vGP_`lmgSr6ZSt!+ zkPff2T-t4lM?m5oa(4#gF%XU7e?z&+jb#&=?;J43$3^8@ofH#jVwc0-;>O1{A+ac} z+`m`~jVYW2d_UxVW7*cF#^NIy@33nip(d5?Ft4oThOvla;rT}3t#2Eg8}DtTdbn$l z`Ug@%0^S=t^`ktxFmeP8b84?+>@HqT{!ArS{D;n&I9YM|5%1hnrKmo8-_Y;esih99 zhJuKPkY9YHfytun%+W7%bh(|{un#BwB4U7EW{N}M`;SjBPMq7h)dOQIlK$|qsh$MM z#CQrKcFPMr?ohO4!>anL8Ue;j96$HPt-*e&Ayr8DKMs~ik+hRp- zz;oE4zQcvnaGW+OC|>>b@*j&sPAl=<0?2u`F=(G|BN8_cBk``Zy8)BR zmF0Lcx1>g^b6^bCdhcCG2dQnp$_QX$HqDNPmc+zQJaN>a2QCAMQIQ!l5 znUl>SpY5@HBc7n;?=SNn5HGpnG#i%jE1-;m-eeO5P0)(xWO4Pwe4?t>8B$%J0+OM8 z6U+9G2^xQEaE>{W#`H?UNRZjwz=4nYO}XLVRju`D#od;^aCGzY^?_7HkV=jY99WI4 zaUsJ+8a1VM;QX2BH1GUw^cJKbi_EYNPC5+2F;u^4Tb~~Kk+Hi2*NkuDY-MnB9{Jz< z3#r(cp4db8;RRW;KE+d=UqBTXgUz25Rr#pfv%$WXqXVD26mB>6Qc?I>{+ps4kbyBw zEh)>`1JbbzPBh&8O#;zhUovw)6*v10NXt?{>=zCVVjQfJVrrka*A<9Xp(A6Z;F?3=D%}|# znfEIH^+Q<_q@SQ9iBj+_odv*%A?wTTLJDj3fI0(IFC6$};e@_~@!fV;mF`~yqKXfg zRMLR5bp(Wju+jZhYr0>6+f8i1i9Zom&Iv0E_kj=482~PH_nBp zXv|Fc4qqw4;}^cTC}?CEbC`~mlDzt+BI0=l_@%~Y(p*LJQt2kMxs*KoHjU#y_Y50m zjG87``*|p4hNyEC&m)a*LK}w5PVp}m=CS*l)_P*IE5E6~P%5+3ovO6Jz&W#}+q+)D z+fivV-mtJJKYkuD?Z%Z4pA1jTH7dH^>62x%1cdqBM(Wi8(bM7162dP049%DzZb+5D zDfZXb6&ZK)|7xuOu~C%fMK}-S2_R&hNVAmvSJVI3u3~)$Uq_j})bI~RBDcRH9ctb$ z1aCVQW=h#h{>LTMvN7K5+#4Qg4JW z{*2B`e<|BzqEM5?>+>847g9v%0!4GoFHm(ew_*_tMG|exkF>0Xx4-%NGiiJtyS6=e z^L8IJ$q|#F6JuWDX3}1u$WqI+;a|kw_0%;)wMbS+)s5Z(XK)5+_XO8y=0Q$}K6e}% zC>`;&$_zv4DS8~ysEN5Y&r_{ISY>u5tBSCSb$zQzee}st?K;on6^RQ4Z~f0vdJhyYus%Qfm)fNu#3|5hl)Jak@lY?4`uKxDGgh0CKEd1^6h$n8ov2i54MSi2dQr2 z$qui1R*`>b$)vdA4?ZTY&BVbY8VK$Dv*6etrf6HEJa6u-47Lof zQ&M@7T#3jBaJFIXmhEY(X+1b@QdQ{RIoFWG`A<$F586@u%p5bS~!Q*|Gq-W`hq z%^M(g{t}su^R8S*eb~DDUg^DJXz#w#wS9jO2|zT$GNvm&ar_$gG4Z2rx$$=16X9PS z6yHT^%y<@D8pT7;QFVMTAhE^e_tu~|ZuYmC%9q#QNgAc-5QL<%7XXkAe;kMG&Z~(B{ zF*6yT)!rHqcERrrQpBhHHFv-4wU+L+S-gYt$V<{8T`dw#{fd9i3FHo2CzeR*V%?pRJL6AJqp#;-JdB-psOY3o(eG{?y+Y2BI^cp+& zud;hVA7@&v$7lTA8(sYqo>MqJTvG0lhzwNq{h9bnCSHZ~nl2(^)}LefvHQCzW@BHU z0b&BJ@thuzcv{~ggboOIe+!#}Y z@uu3ZF>z8?Lz@QQy8vtXL4=v#N`_UOnf08ZlO>@q$Ll*)DiFwE$`uGL8wL#Jhq;PC znJVFl&Ko|ME#0RBedW1I&12tF+~5YB`jA-kx1f;BzJ1`G1q-!spMQUX3Deab-g_j2 zDlJ&Hs+E_={N!*%iOP76l-vIP-W0{`GvO^E>+`SQaq1gmWizr95Bub zW~h7G5=X~mS3n!aY_Y75?;Lf{x6jY-cnmO9eW=M92#gsUjx%CZn&Xrs@7;%)--i?5 zXonF~K_%?SdEr>R6SV3X*2M6)1Q7g<(MO@VONS1y?|<{&oXL?qex^>H%lwI4=-dPf z_?nF3mnL09hnN zdH6QiOM@R|$+#rSPww%Pr^pxd6+diCV6Msnx5WrQgMz=HWncBC_qx4G4W|vf!%g(F zyJk6eKLuaxtaYEkNlvKB`hw^U_Y8SX5>@IaYewaAh~^;Y4ex!_E>`&alUYCRwT^h2 zs*Fs0e*nSra=f}(FcPb@ba7wDbH}h7WB3z?0~DMl>NM0Plz{s6g8#sV z|FyuKQ4xeb7(n6<+`SLL-Yz*fnfNuyVO*7t50B4k-n_gb8doAfau)ClT1^DeR8K(G z4xKTDH?NAh@l!f@H6YxIX*5i#}Z<~C19~<+% zLz1}wmZF)I!XJDsL&Tqq0?&9^ARJnJ>}%#^z;9Xq`?ajG13-XXS(K|C&jHMF;4@`p zY}w9J4kCKAay@`!gGIgA1*;w}r5iYj6UUf$qiVgVAXU<2Zb8@Z@%G=LHfmALDvy^+EES^~YIa@uE>&D@!5js{{a zEW@mZ<-YKlH<{>5bg%N@)sB0lk-=eL0lxd$?27(z+;7_Ix*m(wW+qaC-qaFWN@ zBeS0mzlzmv>Qb|-7@EBL9k{O{IGU^VxnH=}1RC-;TWnPmpf4VySA1X5VRYyg70lIr z>A*4K%DAtt?qtc^J1+QKb=2a1T;tMPSZ$T0xN0oE9skc8jO*m`8o@TTZS#s@;Ws_y;H#fa_{+?fC zn+GP|Q>+>_`+jeYoJB|BLibT$a@FAXHlA79Z$#Vb$#de?r<&+GU%{Le-mKs$lpIp`p_3+cNY6!)2UZ+n)+nchdThJH?B z@QmMdqLAW?JRj4o@H**>(RW+J*_ElDkArt?eG)BI!&Tf3%roleOFSH)PW!TG}aP#IRFmjo@ z=n@?zeuj^JliLFF5;5s?3*eyTB6WKIs`4e%NgaE~x~-mt?}hFj5;6~a`vrUa;v)4l zi5XWx8Iww`&Wc_`iX1-Wj=*YYB}mB2(VR&yB9~?abX1+VDgF{VNGu?jI#eIK&fK@g zNGEC(j28W>K}S7d#n%hMv)YL9X~W+=c>|lS6IE4rxovRHSAAlh0Rr{R#&6pC)Uuf= zUJE(72Txj9YN(do@J$=y@={tHv3RJcHhzs5fMT*~*L-x`os>8DB;@^71($L@G$fbu z@7bjor34sfIu|RI1Bmhe-O_~DLf9FJYf32|@e)q?QcSBDUx^b z&?4y1(;oh69=lLe|N9ITyLWK!g?DjSpvYemzkjO6zyDORba79aAGxrZc;en1F!<7} z8pHx$D@HBh$rF~ojYBr-1mQM)F);P3_8A!3)}vKi{qwC$lHi5=)n0R{UpBKhf^nFk z3lOUOJ(9p8LQE+%_P$lT`um~(o>Jh0K^it&B^O7WJUt}f&KHHhe)Ksmf|*Ot6gN$9lgp3ZJG~@d}yMUJjVvD#Zu>IwKsuTQ>5p_bTxse`wBKVu`Yr>!8A--13 zOA7o@w1%Jx?|)vD7ve>seRr;~RYm+p)jRT;|Ga5_5b&#sGTQrPUnL8>@P5;*9e=n5 zded~^BD9gcycSg2n<|-STn%{g7V-97)AvHDyJ0SVP~)R zCC(gC96Vi9M&dP}6chKyrq)4B=f1xFMp|Jq;*YS4(qQs3%!I6ZpuR5DkRe`y0-LG~ zQXbQ{E|xbwr%uCJGQ2%t_Zq@vND5D!nyG8J{3$qHW`u`Ob@vOO&T0&3bE%*L{z%t4 z{YBw}=$LUpAK?$X6XhL2_}BHeYoOr%n-H0Vcr*CJO4k2XVetomX{a5px;(!J4XDPh zrYzM=|6W|sD*PAT)|S+oVn^{p&YZsium|P{3J3!%#6Upx>l7&OrK#M{(nAFey6+1B z22@$Jf_q3bmOpUz#dP>N%U}3`%Jl%rVRte@%j4PsIBcbE0?GRq@E&#e^_T?g6xMzc z0g1ASeIRZoSGCNLKiMEf+5<$}>25$M9C76)#0D1qy81B#@j8|1%M+}}6iod^Qo3Zkr#CNRm9K{zrX6#6hQG8aUoXL_B1^x`ZLw+o;r zXv;O5C{)Ri2=Wvaz5m+Vnu!=;vaFD32rB3;z=Y2>9s4>27RVMr7;->Jw2!B758y21 z0*1m>+!I4i8im69G_hRgfN_?bV}@S}$b=@T@Ih9sI#^#KUzh@H3H1$=F=4||_meq0 z-I6R$^sFPwRinBzdD3^}tAMYNfyk;W6c|Kr0R8~7%%Z^1YxxSVTwI|_={ux(>W2~! zglbbO=bQ)?6*Sz>0BI@%{QhBiKtyf&8V!~hXmiH76G~{p_Yu$+R=?vT^fI6>be$+r z)A{>u`|o+y|JnZ#)2$b%SS*O;QZAob@ex$I9i!jW`MxS;qSpgwIh7)J!g{8Wf})?+s;QTm!?KfUz#{{3d9zbAkvNhJp4(2E+`d z{pw+n1@D#RXIj2r5pHB|z(_zetmg(`>Xp7f!5upS@G!HW78@_DYM)ldA>!~|;m!ee zF!gPptkL;_EU4xGJG<`1ZIXE=fY-`=G6{TtJ!Ko=Ty;XVCf-HpwfIODn|i$>^?wo2 zL#T3NhFl+bT?K&6O3C)G6Jn#$ag5+!C&IsPu)mkhKPT{ux8UTn*0ne%|9U!O)hMs| z-M%D`fd=Cts1i9h*=lz=LFjon4zI(uiU3IZi+a}OaH}toq`~%rRUcw84uzC4jz~h-HwTmWS^P_!1TsB~lT6W#JYLAHmq#+R<9Y+qz+!Q*Bb{W0UsH z5s`}>D)$b6Im37(AIgG@!|4ZJ$GxCEIc&q1n9cFtDk%Tkmyre`7ekGPy59N07N`m+ z7|A!_3e9|Fgu~2OfDEGX>geuRp{Af2=9*4HvoyXt2>K~#H*C59jqV^2WrX`cHWsp8 zQlK1J;Y7s>k-*OeL~pZz<ANGNWqvbGe)hrh9QHHP1e_2p~ z%ytzf?FLs3H^YGxdD$4|lAnVLprFjIX6HBBH=HXcY>%q-~1hJcdI1ng$0%Wpv8xwRb3mIF3` z8g2qr$uLTJ;6auK5HDpQ_aj8h2F@(^cpc%xX5gIGm;}k@i2d0Z*rJsi4N$7bfIMz5 zwQ^(lvyb zaofiS2966Q?k$vgo5StGUair+o&{su z1~9;s!DOW+1I9P%(&UMB490fTyue*@1=H;-*aYoCuK5rc1HEej%r-Bes$E~sHbpWV zjK(H72BOGZ@Or=)jZgrX7H$={;NF!7LESu+a>hi*e8)JgHLH=bn}gzbIH4#V_h=;t z0r?^ie1Y!=vB8d^iH6%7ml@WkY!HG5HdD-zV6HBJ~7$Br!*^am)#P3kQuBt3rn)Hom%SOr(+lMK#R8k@K@!)lI9)Ct^2^T z#wPqr%7>4X?-!BGq*9-0H=o%eV12&hQ{de+IDM#C>O`vBJZ%f+0+nnYKw@0(cIgP# z$mcy2AxT=wW>+vhtXx>SmQ+Vc1fy8od7DU6vUDvV_DlzW(bP@A{xhJkV9|Z?NDc5g z1pS-kP4V3Ub2Sf~LzP-!mR`Px5;8c^Jk6j9d+yDCF#>Kg-42=%%uTEJryM_1-Y*9^ zYBakfOAo&Bx&^#Kqs9`@a$@Uhn9SGJkab+2HDR5L#rgU=y$5lFycDin8SRfmS$_V? zt1CJaS(-tGQvqWl+UwO>uIYyhc#v)6Z+*(8%2p()duKm4T44e99v52-g7ca4Fnb^8 zK48_@sw$NbpGVg48T^42CWpj340Z1h^%A&@J~z@nvPpYj^o363DX<%&WvB!>QJUM8=3uu@6?*Dl0D5x|} ze)=tF$N_tzfb0Rcp)?DSp(vp@lh7#b4oOit+_+O<2QgRLth9@f3%P=MG3zJl^L7wK zEqvN*LQD`B%CN)A!I9Nd4I?L72l@6EuHuw-O8}QMnrpt{XkC1(QVY)GiDmqUc$WUC zgj}n(v0=;s)R-OEGQH;{%?2u*=|91S6ny==$iv~#V7fTd+~1fu@Dk9dBu?(N!i7mv z#ai9)G-3gcIeEg>T0D4#gUMtlZRbUY{ez2}Irb80hisPksm_4l!LofY0A|TKf}d-_ zx%rZOJp8;lveI%BkGvnh2xUpX5oGKr_To2by#4eJ63L5uVQ^m(68wRbKp%RY+t6;( z5`8VaEgT-CXU?u)U2SWdAbDxId%EBetCXbK5CzV@&?UFU_YEevT-EQliivSXSlN!L zVgSksb85uCO4>|bx7Sw(9=3vp(iIW=PpYD*$1vKZ`|%zPL!wlsJHsJS<3YbT14J}< zOWk_mfo~TY+-OG*XBZf)cY!VMF4*usk#h}A9Nb8Dfli<1l4%~tz3S>r$Pn5KsuG%o z+TxO+k?!ux4?QeZu)2j0J1h!J{jHNd1`>j#k@`@%*EydeK&AJuhkG zdhM`>cnThmvy?g1f}uJ^4)KN?heRoHL#hUPhz#?QTT`_Ww`r|4c}gFsj#4uH62QVI z8+OHBAnxzs;}A(t_6*%8I@u*+7c}VkStoJwjy0Cru=c|u1-Gy6$ww{@5o-$I`EUsH zZ~$eQm$%QI@y}NQC1f6NX%ob;PHIc+HWUOICIO|=eC?~lNzvk{mf2s8|Nq}I|LeII zgZPWWEW1`-m)_T`2lU0aet+t7IOEQrHIsZe&puXV{xVd)1;N&p;Ns>aB-gn>Rn-rc zG^9b3V^s1j*$qdDYU5Em(StJ56XNq$TPg}2O6VT|KY&Ko+1u-d+ZyX!K53mKbh&Us zuLx$^*{dUI+pwVwa%q$;&$LzoKV1RLX27bDOv7%%-?FQBRNcSkBm|Xgr7QT7i3oKy}9vEKV&*Rb?@jyrtE}v-9kSpn%vD}i6DBhlo zBR^bB%gq8BRv;L*biw3Y2BgIdW~oU~$fW^fXaBuN%Og0{xaaJ#DDLvvs$+PXTf}*q zM~}U0ttc<`w}OTu+d_O^yhYpYQ~YKgY#YX79AjuagXF%SqUmOxc3?l*Yno@(>o1*0JilEr;O3;p;>)*hT;ooMOdhuplSVzTrTWLX0EzHifhzb4>!0oI3F zq8iP({dL8FTAW&-aF5vS!u6+P!@yhZd zDhlfKJNgu9my6>qR#tWWWPJ2#*dc3Ne!s=#8J!L=Jh5Fm2d#0~lMaMk_w+N<(dct) zS6+IPazJ6@IA(lkT%)f{*WLd6-7Q#b)i`OGqzSPRsWs2%4FfoQgQ}afD$%5sBAL<2 z-gJ4+M=#v z^VkWefHhH<6Z6RRWHO;qEXt{4%$Qgmm~F9K!gpeY{B0FKjYZ>`@EZJfdzQLr3t~#uOW%&;5rA9>>b zxz2h7PzZZJH~Zo-$%y_G-=tNFOfJnf(`t4jS^G?L$$v#x65k@TH=w)167UjX?}UYp zEQjk4U=r`49JRauYnX%!0h4%sy9Mxuz|mc4D&;s-%wcS^;99^6*AKrlMIb@S7zB9- ziG5(3rsxlR1724F_#io%FyRPmhau%X4J$KuAZPqDty9)6DqxLhtRduhrzBuSJeWS1 z?$pzl;8~r-S+5SUe7a#bA_I9u_ zMA2tTs|;56ou6(F(c%j;1lk34)_V2UV}v`@Bn<{`$-ZvCBNL55;?wbHbp)78iQYYB zUZ5{ey??Iol+ZR#>FL7#R}TqJvz%%PYX^Ob8*AlX?94-6QS+f6N99@}`tNB(g80iby_H0gHch1tKhl>ZAP`p@Tzslbl2 z-;vQYY27D8NCL=XpnLA8NFaa@OTfa}-WF4Lw?4$@bdv#_&g5<+ih+9+gQDXa&WI&r zODRPwd0}k<(WPu(m+})v>0xrb1m-28lT#2MRc+lA|18o)?5P4>{15gr)Fps6*B*Io zlE22Nmf1^{`#Do;5FC7$nIJ+3fV9oq1FQB~6p~Fi!L$|9;w_dm5eX0Ao-vVu#H~wN z=-S~_Zh%qoHZrL>&{LVKC`|EtkR8-AhY2d~Eqw;|zV02&1X$;(l4N@dyt4ILXxEVoZla7=otZfsg7Y!~NQ; zy3gj&(iby|y=7QI>T>lVoY*01pUq5j0fafU+_`2mt@9HMgAY2g`ptP6MAMNa>euyx zQb|Lr#x`4y36WJ%eIfI0dn?xon(2nOMY)kNkE8E2^o*$HyNpH6*4IRGlrv#3AdV_7 z{))kEDi!)B@G(c^{wv3Z>!UQ(?oGlR@w?3AHSS99j2*jU`kiGmoGhkF4GKliRE8v>|Fc+vK*1>E4msKeQi5=N`HweGWm#&hqflAG zX`zv@55R=711U0)-M?vy9-|xEs6T0II~-_058+tL-(^|yphzWx+tklY16J6(I5sQX5V-7L!NPt zOpGReD#0}9-M;*|{~%V?Q~_-$uE*a=mJ(H3s-EPH$@)5MB3&Hgjg<_(!kf8+YgCh@ z62Bm!OuDgCq&=FERmZOWD(!I+ixkzRBZ<)TDModH4f@$srFr&U^R9oQf##e3;=&_JslyJdSxjo*t6P za=@}w`5hd#-_SLyeKZ&EFuAN$JqqSem(D&L){oGn!ta2YdA%Rm3LQKB!2sExD(iR@ zm2A~mV3&dNxU_D#|GJgRhhOf`yRXzq-t@2o2b-ziy)coiE3NE!H;!=ybc#ggw2Tn!mvg1r4JKd^cSSU3pzKWedX|yK|2WQEePJgsS z5aY`nm{X;GEP3ZvQ!h{s0*Is#R~dzoa9m6esY|jO3=Q^|J$35rw}!JklbBuVD!{{)l%ZtdHCfl0eovo-_^(xk%IQbb3;t`0G?^ zoa)5BT+wo|b8X#cU&@U>F&PGkR9F~@f{1*tphD$;7}NhZoS#;V z*guh{he5!Rt#4Q`>-Wc%U^ID(t^DAw87Vp=a$-94nddGw52o$gxOnO+%Evg(k2k=y zYjyi>GoEy;oF39)S8M>4`{a4!&BacG)9{8$m$39>F5-;c2D)J=bC({-aXHO= z&*6=k;SjCw2gUx@nsftlqBA0Kt9u*~uphGf4GJV9Nrao`x$ZFDRVLj%OgtL|$1zp} zqNp4BI2GimcBJ_c8R#_MizPZl01lG?GH;H=39oXaDF|uWqSb8Nr`zV^#4%=Hw>?j& z!K(ZQQ?w^N)~qrrYO zaU?Q_&S(7?M)V9{L26D6dEExB)z-E4z1&*M+tT^sFBc-6R&}GiY;#!^?Br@$M)R`cw-FAeMZ4}FJr_iHH3j@j5_O9cTr$;G9JJA{a@J)byr0(Mi_Ded&Rz-

>fh%1!T~zQ@ z%UU|g%bNKo6S{ze<9T7wF@BOJ!PNckP_YsesG=5Rvpu+i80(v#20{UYl`Wc9pg+8W z6INFzAF$WL*)s&}SYe&qyEQa9N{D_&ai#EEMh4%GF}s3R27Yn54PXxj*lkR_i8C3q zxerJREnK_uLa1}yDkyo$e*Yu_8db@x0&mvA6&rMLhekh_WWLudK9WfB+SE#fIlUal zP@=LqWgh+n3m^%j=AtDI=*HbXns@EXXuk`)zLAU}u+}0yo+2I(-`SqfV0KR~+U(QG302u2>sR<0i9xEx&0e?8haE}5nIto_{XHWvGfG#>31M^^XCA7vclGQ5t}1H~3%(^x(iCawcUZ`Br;;?huU1q%us z6khb;n@kX~w(WUVeKhBCbeVNK2^o+r8y+u!Y5NK`i`?-waTeP#_DDYn$T&WjPw|*x z(e6?z?k}Ch^*|kCs^^lOKKDy?xH|lQq`h}M)&2iJo}^M~(jci6AvBC^)fHt`vX4!K zgY0Z7DmybkgKF0li ze>`4h*=5-TRn*0}*nnvZ`XX1m+}v@8lyfSVb@wQ66-|`v2F>DB+#I`*ZlTBf9!-f3 zy+Hx-z3zuuD_sjFD_N=sk1y~qT(b8}FxEtM8E z4UGY26!IMQsGpqX{+zD7`dpO$!b)!y(miutPsR$+5$pSPZPdI47Y9bn*yYe{=&2WU zP?Wk2i%+RMd{4G8rWx!gSyLMnn3|2`9i5J&$FW!56cpW~I%4oEq^KT$$kdTS zJK5RqPa>5Y7`!>yg!(YHC(sQSY3=VIS!bndJ?jXxPp`E8lXa4c`U`pHu#XLwYf?8Q z9m{Rb^AwM}xS)>yXnf{Pqcl!pW!}^E%UkpNT$aE~lwIVr(bY{<&u6MlhZ$hI2n5$b zvVdBuLfo#*{xCDCGThO}6m$y}qRcp1`8f%`hF3DTaNP-ci{LbSzpN}%{i!XHI;Lx2#za+Z-|FggoS@%^fHZ_R~&8B<1WEoyJY|YZsh_s(gJQ|p^ z6vtc?+FC#5LDK0~dhdP@l$wQ_s6FvIbMlx+AtQdM+6V#n^rmseJzlO*C11AfD2r-d zK)!nd30>!nE14G*FvMo5nmD|e{qg`;EYI0G#oAIICtAURW2ih+529>HUm#N9@x%Ik+ za_oGL4M}0feQWg4+<2$Z8?zo(vYl#HfTKaM-DEuE6PT>T7bbZfagOv-7$El<_f z%4gnBd7$)^bdG3Tbirb%5xo?-(h})1tvfh%I9bP4oV|bpCFxwRaFwf*N>jFGBD8Zk zPQf_UZ)~nfUX3EB%~bs6sd7_y*@rBT^Jli=(lsAES{#gLDZKis_5e$(YgSR4&gWM@ z`uh(5nS1kAkU`6473#S1F`CzRhYvk8p_XG`ZYlZ38QMUIo%Z}<3h+&LiQjEO#SWS2 zdvE8H)mHS=BY5XJY>lsUdt7!i1c$Py;G~-`Cf)b6L5h!6tvYb<-QaRB?kp)To!J;k z*i{YBC_jS0$EplRStxaC%! zzP0xFIo`fm%%%5B(qSO?e{#!O2q0&IS)T@mRzN+pB%vwbO0rr4r4x%hY5SdGu7c}x zosNz-sSM}l(S-!`SppXBgmREIF-1-{nYhfLJWrzZ;S_ zvjGmXhngdyYaVJ1_ArOVLB+ScE-6!iB|%6)m})vN946neGG8uq!U2k#!%Gg3i{-W% zK@I0OMlU-hIdwLlve76pcFP1H(x?&4k+gzut~5U|R1YRA=?8N$bSn?6@40K;BO5j? zYn36l{N7bYRgOm`T1h(2Svel5?{A9BlCkI7m)u{zHomq}R7!vxW|ytGP%qBza#8D} z3|hQ4_V@(Gp?0XiUGK4t27z+BK(GHj{k~3VDs*=wQ!@E;p_#^e-ymP6Ug>89SYQI$ zu$1P&j-~0NmGj4W4vCymWc2Ss5N3|%OJ%=r!>izssI#Zj#!>`4fW~-dJ&HMQl z)+_^ZdRIUk%U8k|(CPjgHq%+(8GjAOXZDth$FSDPa4{}+Q*_@svSsVVZ7pGrANH%5 zEk1vZ>6>S)4%Lcn=%Zqh;{cI+3eWbakWL3(quF%QHp@`sQ*w{dUY*bMyr8Jj(yU-7 zo?vO_Di_LBouj7>64aC}?`pOy%xY6L1pKMRqL&a9n9e)2ot<&}0JE1n3GuLZ?pWS% zh*m&@+xOln**UF%x{zGWft;@Sqjo9B{EVLm=(Bu`TR;-4V5dsva>rbu{N+NOJ8k=u z{p38{zZIet_Jqozy)_#8zIT@&A=YR28#Y7TGHb4aKq-aE3Ln)C&1@JoA7*Q!v+r38 z6xjB1ne}}R(bC$P!~Z5aGQ!%r&~sE?0$(DwxOi7!yf;X#qhx@>Sjw_#O2u|GoAtyM zz|2FwH$~bS+SfANj!?cVzo!`=#wRzBdMv0g0&if*W0exEd{J#t$-j`HpVd`{WtK4# zL4-P0&KChxAnUT2~< zz@$S<40^xHbr>_LhmIQ?DzdAijF)jXR5%?trniB^xvx@cUd!ZfI~W^;AL)cqBjE20 z`Xo-Z2Da{Vn0ONMedeRwvxOS|sn6)-_%9F-vt%A7))&NmF}4`sBzw@% zFP|H`1!*M|OwbD}0QAs@AUU?_$43gSO$iY`D+(GLtWZ)L86@weh+k@HobBk9K|hwd0ZZ4!h0y8RSLk|D* zC$vhx=UU~N4RKQ5)wD8Pv}!OF#$)C-DE$R@97Ak_LpC}!X_wfkR7S+frAbnWRZYdE zJy;C8^}5w=sb6SqVWe=ZV6Hhjq8yDH^!Y)*Xwq&$yDG0y3Me&VFA zbwneW+c4>pvVg9Q6qDV%*WTu66cLGAW)U`2a>PrW9>*^0_buiC;Cy(WJ%o4tk82ap zgi?-nc97x*HHQnZO|@*P@orTB(qY~f*~Rjz6P0e}*^E`E!W_=E#^eX~bvKT13PD$| z82j8je5TL=J?dmA`Y3)<_rY(30a2XJLq4N5$1##pkXs5^XE!schOpenFq{diuB7sw zOE%VUSae~}f}LJ&0Rl6iR97zr*Sj?4b7$s(zjW>(^qOo{Lwim~_6}OLnLby6N z64Jn=Ue3)T)d!Q%Xmq^E9WKcKLE@OTn>R>u&OMr{=9c#`9w#Bne4lq?!6f8*56w;9 zi(HgBP=R^-UI>H0iV49n9dENz!8jDn9af$8oMv(Rtt^t~FzY5s3QzeNivN)yAKW^* zuVrim@*ynFsYhvUmJoc=0c;wdI4O*_D8|jiD0PI^=Fo3PWElF_%n?L*NE7YpWN7i0 z+hyB5i%OeOzRm@y>TEns5H9w@1D=QTfFQrtTmdAQd9P%E?o>LHa#%^mf{jw)biH_4 zbEEZai!K=DBKy0EYh|ypCIL-p^dPqFg6Uv|LR{l*LVhetOG~TTx;{6l^kY+h!6=TH z;g$lWTxv6?j1|jJF|}$N*b_>vFialT%i7HQsSCz7GNM7QpKaq^LAu z=He!9(VCEKiDK``1Lk2!*jAN1Z!u&zJfBxRbS!^SFitX5PCz!@0Vp_vhgzEY)pQ|? z)+Ih!ll0$mXX^{A_MF~Crz;%-SCWj+<=OXPS2Gv4<7{SR>dvHCA9QYPQgYM#9GSB0 zHtGEnENTm4)o+my(W{|Yl={AWxp-MI0DDR{T?L0$L`DPdXj)5jV2{cuEsR+ep{(q9 zF|A(>)RvUowT4}GA@xVdcP4EGBc^RU4$Jc)CZRAC+W!_1EQ3DDxzIv3nH-s0Gl1-4 z5-}(NOvfU^V&KCV7h=;)lMX5yF)h!}U>ysNMqbP4y$G$aN;2K;){{LU3ny#_UWY8Ku5_*y=qOv<=vt1#g)Ko{ihYG;)o8U_ot%Vv z!lI1-(-^UH)zbRr;kIhYsw(hF{O+vUFrLh=wnGdO4=}Sg33p(|h$^Ov5V#19&6?sx zdFxnM8#B8q6)-OPTOJQbj)o#2-u%V)&>-r~<=uO-b{Sw?db#@Z`ZX&?Ak6+ z6C^c9%4rsJeGt|zn+3+3{)2u@8LntCxoB6q&HZ%M_80FzU(jz%NYno0>!$MTxarO3 z0g9Pc^WV$q_I)Mdv=KTt7TbfT`|=?`9@IrsGM}m{h`q!7(#MChGVr{KG5OuB+<=?2XSBr! ze9xSYr5u!Nb9{N$*H=0t`<8p>nq6PVIP9v@sR^43omvX-cF5+x7F+24^nw zj(5(bpcoM`LmRKfXhXdB2a)0c>#4p^rTc`=r{7iX9vZlIUY#XA(jl)vkgGEF_`_n7 z*^C;Aa9&x>Q{8VZYFvQYtAe&}Q2Bm!jv-NQx-Dy8(ekAG2a%3|9VZ?M2eP6V06|Cq z)6;82Mp30d#LUd!b;Q=cvo_OfOnCW0WPIkGkmfg|7oM)@QP|cc$1{`=lTvtvSU+qB z!T(lE#{J$W`viS}*Z7p%5<-}7@8#!p7I+v36xob)zPwG{4?-zc0d45(`9 zcq!UoAXQ7W2r{!a4m%sHGS`5XdIPaYr!Hhj0AFBh)dJj@YO9O+x81@{*UvQtp@m2^`_Lf{c0&|+^i~jSp&BdrDI1D<-Xj%7dEA%qSVm5 zV=v_||FnXHL?n%(@Ht$J*iqJtvyn_Ls_`+YhvlJVMO|Af5J@nfV{h4sQ!26=?;tmP z_T5c$8%m(itKwMSBUKA?M{G2G&d@-n>e9f#tD5K-tW8scMf%s+%p?K2-L8vc-Go4l zH~fpyr>aX)-EVVbVXc{ZL!6}LT4~L|^VsxaBeC*&t>hJiLbiJ4Ewp;2*Afb9%v-L{ zk8K+p6?}+jx6s3cojBe~>=N}h*5T(lykb+X6f|9n*xkI;55E*T5Z^`6mE;YYXkA^% z@=9JY@m5d9Dt;o2{Tjwh}~kguI~+Ol=$$<6-}X5Q+dPP0TOx9gX` zq?9^28+pKzR7W-5L){=#;WF>8P6m!4z6_meYBty9*6~>hh>=sdSyeniirfh3L zN#CrmWz)gSo8sj`9XBx3aM#J=L$i!~2OWUAFC-%Pc^Wa$YGrXAO!bl&VdJI7cohzD z=3(N)8YrPA46AsQ%wHWqOI~lW zxgETE@y2E!{o%LH6F-dI>G(TiI~_nbOu4Td@!~uk4ex=9U)? zH=CYSiHCZ!=fKU__;5?5l8feN>XGy=ENUIjZLHd6i9LX(;Wj!hUv8{@7waXoI^P`; zQ}glKGcr)icx1x}&nW87T{<&?Y8laqY3A13_sS(K8s+U%76KF=;?2zE1wJO+joD>R z*^aRqmsW% zlcfFe{@F~wM=sr+iF}j#r8G7eGhdpcyzN~8_$knL4gt;+Jqf1n!Z_Zol>DS?n znp*>f+BfSF1)sdHP)ubcecW1XQf}$3ZqV9bTQQzDe6t|#LrI&-=%DqWnUC71kG7Rq zY>Mb_p9zU0!~o9~<8x%cgqB^5`jd>e?fN$FNG-Vu#sP(VnX|AhHk-=e2N~o=-e}{;(bH z9f(p`T42pY&vSY&ABAGB0EW|vG`s04L8EjkSssgP({D;TOF_-y17ViYcQ)K}AYIcL z{levDYTdKBX03gRcVk>F#_m`8o5)dwdAm6D0^Ia6fJNSIEC(1@C3}owrp{M~mcqND zrUC#br5e1p_aqbR1tp?C9Hm?5?a-crDYZ>D9dwS%>71IWQ(@DqcTrY#wuX>CqUS=q z7E=fcqN;52n6P~Po~qV41qH>zr7^x1@V1n?P>T?$x~ciaVeDxCF+)+&F@b@oC-r)4 zMv=+|@m*5BWWPjNBwm{;L|qY0W2IVBAE>?EX?isH-t|pS<0OGBtf(v0I{Y9+`V#^jGZmsbyNHkzDxeU=GBzNB5T-`1l0qi1TJ<|x(Y|LZHQPs3NX z2jRF9)OgHQfvoPy^7)pA2;HL%5R3<^cIz)`Vg0-JA>aLt+05&g?^b~Cp1GhC;biDq zr*{HQ)y`9C)>Aq>fQ&+&clMP2n&J)8xAdZ3{?wfM%elThzx9~?!Y94A?QHI=E9;X1 zJN&`e?5+V#SjaK;Bd`M%tZ*nIwN~(W;We z>VmGpIJ0chO;9?%fmMOhSk(ebr8ms~m*MikaAtF@raGGqM;Q!vHAGU)PeMH1-7mcP zuYY#71H1R9c^>ue+guG*h>{#iYBJ-Vf~WIc>TB3$)BV?y&Tacat4Kq5nfc#8MJyeq zfcPmN;-`m)&wl&*=WB6+k3sF^xBMmPIj`*k3$g`UZ^gCI{`oV`%7Wngsl3tbNK3R3 zgfSzr)QE>A%8F6+K}XY*~I)7#1*lf1$Ow<+E#`=;p6A1(s#X?c0mtTj%O#^5#u zUPN~L-s9(Vncy zE(82|kt_<)8>~M&wX^2o3$K+%U4t3bQJ;^vl+tgm!+!F;H>~in6nW7*2_Z%YGB^)P zYk?c(C*!1mK;o!pOHKUuhL(7e*4r>vEMQV0v_;lKGgMJcX;XeZ_YOIv;>bmQQvo&l&j$JV zw^5JasK!9?uJcM+HyNbD&H>DnxD0(JlC^n;Q!8Aly+zq(QlexuiR4m$!33r{Tp6xky z8ngmd>fv_HkNU%{Dxr88vQDyJnx$%&`o|GX@2-DysJncK*|ncT-_Kt?c^yv8^Wl{D zN)*kmOsPrJLA+E-E{XbtV?{{?0^{6Tf??Cd8XM#?{I*W@*9Jj7z3oI7kYxcN&fJ0j zG~>DZ2%A5kMr09vfIs$pICOhYN~y?(d85wky5eZeiJY>ofH$>&K4r|{XCLe@Jos|* z82_V(^&v+H*5X5QH%Ety`i+hscHku$>t zaTrgJl}lBU&eW0QnQhieT?lji+H;8^&8%PQB8gMNUg;zmCMFaPEmKWp%eM_(s`41f zw(HyX9Nn+50GNE>0eX>bcn$gu*S&L)ENnaxiKf1f&JJGQ_~YLmJf~gz(RHqrMzxdejO8j6$yt`K%txSaDs&($y$GaaVQC8u4tAeVo z?x5|l9brY`Zo2IHB&srJ=tQyZ=RQ+N&|%bvEZDbvzkl1t2!{EC@d+J@u zb>lAEj8>+a-Cs`nx?j#0(-O*_KXU09r@p@oM!7{*zFr49vwgWhdG>-}=tb5rye=TX zr28WR-q0IzM^Y1UhN1rKY8`X_7aphPhzq8S;!{KUmqs!p;%z|S_QGo$D$iW6 zcC;+l6h!Rx09SOf!RzQ^HAvU}eEBw5-l7h%{OtdOltDolXVbhtz2bl}p=Fe7M+yx(Ertoi||cwtEMly(vHGd#dL*rVXRdy?0S z@x-%q_#wJI{2MGD1=&y6Eo`rIc4FDEDF2mMGbR6P@npYRE zBjRn3X(Ldl*JtT9cw~jMd)9>U+=KYnZ<_}%z0-pih2^Jl*^>Dd^c_g6?l2Io{rh?< z4kI~n^%as6O)Um?{4s(ECxQQxet-^DSlZzI@J&Za&#*H|05#3Dtk1VrCrGL44Fhs2QS%;*fZnq2I*30KqaP5_)Fj z7;3w`f~qq*a^Av+=^aWN7!w3j5RGO5Sacx;@95W;0Y{_6g1xtM28?1R%2l zAO%{Sje3D05vMrU2@ryTOC_j5gm^~MO10Nk=JvI*2Y6D;5tfu4%{fNsz4@j00?$-u zd((Y;>pr37iDHIq$06C~N$^cndBc)otqPqnr+%XkC~t1UjqN7BD_8?~L%TYJ6R)KM zfp&6fya?qjY0(s!=53t+Uh`F*!?2mVl+{=p6Cl{nIgHvADudV~O^ubZ9%!sMyLNhQ zL$7Enc(2EF5#(I9z`5k)-CWjfiiu(&TS!omEO@9|S>6t2wkUb_{hx!iT>e@a(%(N0+R#H3wh+j3qoK4?kFuqs zh#7pILLFxo1W+t-*Ko+pWu4DykMqWh)(REx-{$;)Gar<;Z4t->aM>DI;=f44#1ZHm z7X&_MNXE}+KJb;Oo1bAIB9wx^FayJD=Rs~71f5W;Y2ez9Gs?z2tQ;L}Em{z!XI4xDJsE~?x zrkOKUcE|>1(yYf&$#SgLFlsPri1b{dC#?Sx@jgL&dtL|c;i<_ zWEGbh0I(h;=3r&8s_n^Q2^MM&3$gqQJ${og)n}a-A~K!(2Hsj6joksj%f)2U_^SlM z$Q7N>M_Eo923B)a+*(tQf{f(-n=b2OC?Tclfho=D!PTQKp+eEi=Jzg?2M^V-Q ze{)%-B6)S8*aONDDs_I#2< zA(I#*k%_}iKHKd$U*+gEFkq`AhB*?FzRA&=a6#?FoE=GxUK)t;_LaTe>@b{~qjVL8 zz01a#unfi1*%7}XH&`NAQSp-A-l{6=<+!s7jT^6evc^QzT~&(@aj$Pm+REWN(k34DQT>kYbnu)=Y7%J7l2rl@S!rv;Ug-0j*f#uolP+M}T>oDY^h zU#_iy`4Bxhmyn@~x1c)@7%&rNzHuF2E422GQ{N(E#aBhG-Ld1Hb#zQwp16%LBaI)NrfaOCm!9lF&aXF)EmCSbJy zV*MWAfHNOwUyW(985>bbbXFCRg9z7+(+1*KFXpWudAg4w3Gs&eVMP*MS{2TwNeI-H zr=A`6HP5LPT#|qZKyAeW8;kn9M46k6>dB`Tp^UGBt>@A_{<2mus8|?7l26m{@)f6o z+AA~s%g0z){!G3|>LEdf&Nm;_^c2I?4kkvF!jv8p)Q%Q@3Nc#%hEvxdThFqT*N_&M z9I1%+M~(cCav%kAufY6F@>@>+V>0!}3gn11i`FhtI-`bjw3Zp>iS`KvBVos8d--#X z7mC7@`TM~m?CS1b%S;Duj&tz&GsbXp-vRkJQI*GAanui+)VpB}*LOON_i+zM*)<3a zckf&9v=CB=eA_<5Ks0wm%&ZrkkZPXgD0P~yekhC?>I3yG+GSa(U5q>D(Iv(3;Tnz% zecikz$Bi&!E|1?o^7`3{5RSI74))Kcyd=a6quZMnfcWzBTI4tDlQ7U8j04Z8c-5~gA zf!w~MlxY+rJ_g;8?QazEcKDyV?pjgoOj@n^Iu2=P1_r{$G9Y0j!V0E0geHL@JqA+T z4UHZyaa@=<2}*!K=P-X2P{%IYR)idVx4Jl7NZg(I`pQ<^dMDACLk!+Yc{}XNU08Xn zT&tx6Z=u1Xj00dKvuq~V$?*7&-Rnj9erE($xQD7oO`vZ=o-3ewU_17Ge+eD3ZmQ&nKSZwT)N8jHV)1?Z}@cHmbBVJ#{Jqp;?A-TAdBV5R#Wm;aGOMHfMP@%eeRb<_T9bi&y9L4J!C;YW2)FSG z?u>NX@(UyqbApNIy^r%qFHR43=ZZJdpUmIaHD}k5seAjFfK67ujdqm-z2JCr`gNsj z{j<>1dygo@y#|L?rshIFI6SKNYM9TsgI}*M+NHod_|qG|j`CrAsvkVL&{A>|ROsVl z>v8P)P2_~}AZ~P8IpiPoVN((E_ixXcB5u?J`stS~nYy;<1vA+-5!9OUP9AO(3*80N zsjT8w6@5AeIBwGFw3+Gc@A|breVoy$Gg}1ZE-Hl6pf5M|ioAa0VK(Ifq6^fTZVsuo z3}TcZr%6Rs!M?STFD9k^F;I~?21Wh7x=0aEYDJQ-pjNMfwzLNmm^QLUWOkG&&mi$iodAI1q{`m1(`0^Ot#JL)fSz$z3F5WqWUda zdVZV~i~TT;=F=bP*pXT3yyL_<5G7~#bM=cA#DV79FPzWH&Go8ZJE@LZzr8kokR#x0 zLPWCQD(5>vpkeRy+ktqd3Tl+E{BJnd;LulrcEK;Lqu`o%3+#SHSd|`Vq37(-xpTQa zzT!PF5M_!Dyg=ZtbV^lCo*90kwnKt32zG;e(r|OR%T*ARi7*h#Xc!M&!)&3B$lA^> z(7e0TR1?u!8-a3x5da07KID|Fw(khL_@`6i1547-0vK&z%=z&jv0_j$l;nhKF9hav z)0nc(By~zGa$c#)#@l{hzJaYsq3C>Myxgfou`3erS!WA3Ev<}~hp=zEc3A_ScDX)V zh7<=v!&oX0LqHVCO+kke`KK=se)T`5`4Ha68iQzVO;Jd>&?FpTRKLn$NQn zHWj(NMh@VtCiGGvp2md>aa21S?^Eo6v1K(R#Il@xFf<@T>TBbn9fNG#lLwCTQ9lO% zZac4^=jr8HqA^P2^V4uz7Prz0llkHn^xo#Z$FF?Z^<4j)bYu|6Ur8h#QKoGs<)5G& z4Z70{WTt-+tQsI6ZfAj^AdPlE@U%>>LHk8eDvH78c0X5@-85-UbyhPDfDWWs{J;o+ zWw;?~CM7qm+HO`W(4926^NWq+b}EXh+o19Q)M{UTtBO_oS!#~ka$L~=K$mZkFpudQwpVxO1QmhafNFTD3dck+9{IRO{E-L0L_zUhEvF*cd zK+!P>3fJwa@JBpw|2du?&r%ks=)XhWM5j}%=%I*YppI=#(A$W2Pl=`p=yhyUd}k8g zpZ>WLef+(gs92+u{+(RdCXu>D@lLpTPVB)n*dAe(ihZ?@7tcvhrUyU%o@xrw>Jw1> z1Gy_O8s!g^$0MVL0P6FNK2h>@clq}54xAi>dRO_%7Zv#@&v^BXz6`{7mwN}fv*lG3 zBxm1-!`^jC?8+%`s+OZ^O<$%#oZn+T%DFQ&CS{R|w$8jg??b=KdSUu4Cp5bdFsVtH z`LM>aVgKe=Ck+6Quu*n{>V^@U`&XdRa!~n!HDwE7m&%MN z+2xz@jz%kHM%6e;)1HwY`ga&$hi8FY7qa^NI2Wd<6r5|LFyNWArbNRWGTB?UsAi{8 zVBzU)*tb!wq$w0t3MQP%-wr7S_YOo@5t5I{=J{D{QW}Lvq3iz38%rWTB|C9Kwijll z;yHI=Y(sH(0X^fQ8C=lOD&{(;U=L-ZV$7#JY=D_H;I6ah(sb;hnnpTzh|zh_^09m# z$~IYDUZ{`0u^THnn`cgnqvbXC;qG`;YE&*@2HsFr&D}Wyty;>G0bg7jJb+uGOS0m$ z7YWG&32D$5Pqk`e8;ZXpvHX7cC+%W6(aH#J;|R_WleVi`yJ6LZ*xSA_-s{iy1~{xN z=E|e0Am}J6Y%ruu2vans(`S*?>82jXiyc?tGVIXFR?QS?qV&FIu>mLkA7Ho}!tgws zin{tl8zwYPpjMTelEfVKjxF7aJ^!Qb@#E)Ts=clx?wuz}#%14KA3XUvM||t>fPB!f z{3~750+79_Sk8Sz@?R3LSC?E`4l}6JaQMbg9UVet>rd<`fs*>+nC}H4?Fyf#7CL26 zK(k#XN(zJz?Tp-g_`DAQ62h>$Bz$_f3*I;eb5FoTzJQ(1fE;P}@atZV^dv!1++B20 zgf$Cfh{#EDP9R<%Sg2HOFN1 zlQ*|gZm`tCr?vs77kXieLBSwy8+td%XUueDP}Hogf$Un7&7YjN#oMX-+R3HHKut@sP)Ee&S7x z-y+vR;@2Y>57-pnCUUhul_&#**BA&6fQgW6Ubc3e7yrp{15B-DjJahpw*2)<+)(6y&o z{~QYIr$(L!?<-kp{0FE>MA+Xktv<;#yaai3+-WhDKRV&xefMiR2KD6p8RM)bt?{q2 z``C<6(paVJ#Y`8a>JlED>2E}zc_yF4Ow>|~$sGvFzN9h3I63B>d~I|M823tGLyjjI z-IW)cwU|NlAMAzmr|H{w0aQe{opD(B)g7e_uE#ion-lPZ5(&|-3lYsAbn&N#3E11s z5ktEu&{D7#NFxXQuI#@5!%KoJzGP@vEpQHwaZGK;i{wx)LuTIk1T`j!E%%6frj~o) z#iuh(@}?8-cPv?aQgR$Gm=go~+HROqw^)u(Hc|R)-BKME;W#dtbp2&(20}UQZzV3X z8MY!=Kd<@YGAA77HR|fJo>YRC03mNI14@)TBLr;Sygm%e`+BMqlVn+f`K&OVW7mrD zL9QFf`u>+rQ*-Ao8$Zt05rgs*{XfdjinoA-(RFFlrwkeW%_=<@Y zX{+_rV8)_m?ui@m5ao7w0r3}DtB!XBA~HVP&ePxyOd_6vGrB!L+IRK4y_a9 zm{Y|LXLt7;u`0F%i17$T<|5Tx0wEDUr!O!1tPsw30awd#;CaQ<$k=g#@~dG$Z*{b{ zuV)Ui1f+$TtA0n04`Cu99<(tT8!k`;wB7zvf z+v{8sp`LQ(f>LSP_`4}DMXFC65J{rB#Hh~DZN(t~*g(RUt2aaq0An<;Pey#A^ysIO zS$Sm>7k(fyEZ4lRbnUF0NLE9%9X$XwOcBwkbbOX6$`-N{bbleOvNXI@F`srTj*IN_ zdbII*hUC?FqaO#WorK7*-R1u$425Ky*el^Ofwj{Byro}~2+^u1QLdJ`UB(lYY*u(7 zUXgj01a(z|x}v90L*zwccl>3y(FPV`@L^m}n>olp)r>m4|Y0~9n!b24&&ldWphzE{7K;*>#R_SD43 zuYa_a^Fa`^;JdfF-Z~h6C@nl+BjNLl747H|AaJ6uzAfF$cm7bdD84UAHn~g0e%fFj z_-9{GkF^+^DGirCm8pLA{?p$53=%o&@3$r@q-$ox@#hc{?p}w-&u&ZH=zI!BIH#r? zZllx;)G*3CwwbQxmto_QrKekKh2Wsi<%RuevThI#st__m7BCWKM1MFw3s!OEnZ3SG z;EaInY}OLYbrRe$yO}|Nh8djiv)-1^aHLbIwa$Wpwx?`}cqQoJu3knoUL!ksTY%7}PbbY*0KF^b1 zarD0QBkh6Is+wAMB}7Z$Pv;GCUl}vJ{(B$(J1}s{uD~We_tK`38IpTIMN>9g9mO0$ zbagnvAM&PO>Xe)m$Wx_wdj0ovx)-_4-YX5hicqmg2tv_kY;S5XeXE{1SW?`^{Z{GXF1jRv_9^sdztd2 zlDpIH(9k~m$F1rcT96(bB2Q8bu|AZ-c@Gw@+V6X4`KJOxn2j7r^|v9^m?j^?4Bwh_ zxNiCI)4*Tc!kZ6_^41T|fA5Y3*5>sb)0i8ix@cvoI%`&z!%RTHzcDz8Ofn!P4teW!@ zK=YZeE;OmPPY*U6!VkLvGxrImS@?4XD-rYxD!8$H6>iFayXuT~Tz#N6l(;#ANbq3r z)>iDSS$+7W1IT;5vPX*6RwL|9G0OmeYD0d+$+LolB!tUQpWU|lIIvWgbZd?S5@cS| z2bZD-FhW3DM{db*2YVt35jqQw2_CsxTZE(q0-KR!YQtp0VTd`!Hgces#|FIjdqB_m z{9%M&?s`PV(VqVXV%*;h^Xyn$y%OFh$+c4-pAw8;Ez9=v^2&plMCQsJZr?DW2J}Mj z*C+dMq$WkHi^&h}$dg7uwO}O|^!~|?Dkl=sF8tuNMiLY)YYr&)$;(YJUru&{+&30l z)0!reDJUL%z;C5t-r{)OOWSrt6&VD(_}U9vRq;-#t_Rxbhg`{bZ6`yAavCszmL$u& zJEE>zzmr8~f-WF8!K2N7cV^i6D@i2A6dG~&5M^urC=S(zaUoaNzpn9kjkonXXiI6V z+5jOy85J({y|qx?&8uk+{X!R)=h2JTjBjQKWa~Mg^lBK@{L;bC zQ@ZW%(D7!_q0M}%uUa}?v+S+Km`i32ZxuCdTS3JAje4&bsRY?V*BxKP{fCxlBf9*8 zJ__SNnkQ-MV!jPUkwq3@z7^N1sv(-*boVNdWHt~e?mf;6Y9ex&x57Z9|6;{GO(illvxFs|JLcv&FiU;e>v zvAHRmuwfgD-noIg>$;@wUdySqF+gDWBl-)FavxAou}59duXYbrY)=+L&ZFiOO(C=IO;jx=m+2u;%(2fzw55``+6l^fiWc^`M>zN z*Rj=heK<9SSB8AzwtI6H_n!yR27Zo^h>vM>wn{sea~{SdABmBTYUYi&kT$ zyHWR&pEE2mrj54>C=F&rMrS5_X6h=mt&&5}^AiJ$)>g(%p(rNiQw6-VFNS)6GHvDC zGrBhbSD(5sjg>nN(PmUp)P9x)=0XqBYVcHE0@2YR0UN37ca}#zIYwo8XG!AW_Hk&5 zq%63H)}@lddl7^nA|#mmz0&2g>Y-1LMBMu$7G^4gXybO%KtooI1k8Ea_zDP`TWw!| zyGZe}*R<|VtIHA9ql?J{rAo^7GffE5`@*5AfGHTzIwB1h+9$?~S1g*E^^Ozic+iYj0^G};7jXi!=e@81o#ExVUuWZ$t% zXP!z7$s#nT_xOdmUYH7IR^oQ6hf1dIKpa3^qbMQ?osCMR=OvOanErX#=GPx4{rO=ZK_2$jng{_Dhc-=IR#Ls%kjw+BxFlI!+4IVL zx}{%=7m-@c3GnMm$OPK+6W^RqXmINfh!X+ZlWAiE)@bM|YzSgHqGJB?-P-DsNx6jX zH>f83A#yQ{*ubWYcur}ImvSCB@Y7lI8yQZcunUnD+R~{0!mBJf=$6w|%pbh#f!)7q zZ)#kX1N+epi79m=z-afy-ON*lsUHMkH!g_Y(`*Tf^pBo2j0jY0>=;Hfoo)^k0FQhu ze`mIVJi>h-dxr~`0(+cNBO0MFP+v)ZGEQ!Te$bK;`#Jc~^0U(?^3!*%_gJQPfZdpB z{Jh>{9e+0wcJNO%pHB?*Sl3mX(_)%k(QLhlfYaaVyfYudRhgDjaNkUdITv!zuC9ZPHK9t9o-s*p1PrcLqM)mue=#j`?UZnz7Nf^YUm+6Ez}8< zNX;B$G&|J9qLGgQr9%@hB&nC2HW*jK(m)s^yoH>HB;D3?-<(KTCRx>!DPg~1sIuv& zv(erzX+N+%suNgwY{WPlBtyvJ>`=PRQaI)BYlcjgJB-P<6Mbnx;I48%!j3B`OKdCE zQNtdHt>)^_db0yPql_ru>b96M-(Q*?mBuLN%2>W;+lVAl(2vV))FM0B`QxAdE`V(O zSx*88fPIc|1>8kn=L(kQocg9M#uxCl(x#BQrttvt45LIH)~E0KRe&v#04}pDd5juB5iXuITcb@aT z@!Em{J_GVOS&@1Mes`W%TsYF{!zENlkIP_dkr`|xco~PofmA|Z484wbxw7$4zZG6> zhEzvTwN2e~g1Ah}S7H#5?@B8(y9}4cdRn9C=V6E^S#GBq>k~2P=58zji^ZJ? z9myS8Ta*UI7Hz zwZD_0V8^krKk9?;Tp|N``os1TkK@7D%w8w=*%otgP*NQbKFLA`2-|JGQI23<3w^oS zaH&)O;HXcJfa1Ts^mC|sYB0<+jo*^os3(65Vmo&s0@COruQSPN_f`GN>@Fz!m-i?@ zh!>XG3I*9Z8c&~8TDSL~Kqh<d*@zw55=da)vSIs=^Lj^rL#X^~qLZ3HJ-ckH{_BgJXbSC>hbQR?hbLRp z__prUh7H!lk+*xNw{CnuMHZ4HO*VqQgc@1vpOyR1k;{|uO}FGgC-=heRd!tBvRMf^ zqk^*%1pyh7a8^d{NnGB~>>q-B85xZ+c(vXQtU93o`J3NT=M`*l{Vf&(0~GMoe9R!r zn2q_GUfW3BPVvYki>w)SbgIS;L_KGb@1#zzWBTAb=paS8r%AdOP)iXlm7J$A9x+C_wV!m-LIma!(|>m z!0~LesZRzW52Lnj{g74n@;RAp9xVLn15L%jVn!;rYc{ahjhl1UJOGvxc)MO+qRt1o zKKZ{U#{PanZ;%5+!C|@inzlGULfVq-jk|kyO#IJ}w1ove$QDZ>u=&cPJ|K;!W%K&o zv0e=F^ZMg8S+m0@jT@6}*|@8XgRwD-vmWrih5xU499RU_eAx4{=;oU{8IKtJcKwFI z2T@Lbn=kukgCi$QN<~WQ7lU{GFnHdE!TAZUF@L2Enb*uRG@?XrvgFw>)TlNo92DjNTcOM{F*u&(deJWizls=Ko zHT+`mU5LSP8wNl3Mh_-=H-FuKZY1g)VsMvB<^TD!AC7yn{D+4(Yfw7maDRN=>n!MAUuVpxX%Co65)s)&4$ zIQyRV%^h;o8D!>pwq+xv+>-Y%{Npt(Aft#ySRvgF{rbii3!gzAooDcR9(;MHVdtNn z;K0uPFh{HXV~+N}IZYUY$Xg=f*Uc0G(0U4tr zSvM1|nQh+=k)NW`W{0?W7oMG)y3A&8+T#2d;!dm^F+7^aVDmry*>5uL!3SBY9j0ucuBb!6&JzIY=Rs)BpVv=LZOXkGi}boMO`BsDA!| z%|HJcIVKbgrC)D?y`%3@O6Hh)FeA>`(qH*U_3 z@QnU{OtvzBJ8`n>|MF8#&kzr1*a#`4Ua}jGw#lIDzg1*{4`N~89sY}lYya@@)(sC= zB=~py*~5`27l{?@_uBML!haziUcKSr2Q(V?Y<9Sxu2J|S6xTXouSoyv-Np$1z(n$S!@JPTfKUFLa zb+|?7`lA~@Th9AuZ$iUZL8zy^i{QT<&foWWrv&lKe8cs-3ruqtv-;)R{^MTG{0Ng# zekAhV=2Lf)898*b8!`OF7z*1%yx`p^Yw1ZVu+%F6@NvNwwg@70^CyuWF*_C7 zPty+vL?7X`eiX4V9J~xKLqUk6r<900hvGBU|iKpUs~(}RX*>VF5S`m@s|b%vX* zmT?P_)!yES99$|F7k!cUMyC3~d}_rGI=hpwMcE55Z4}53My(eCFe~7PH60N|knc0L zKr=3o)4N!S&@({1Xol+mS`!QKqh9!A^bPkn@TQ>ILh85e@a~I|UhT_>&?SE?uPOp^VD)7hi$x!|ZUmgI$ePV$! ztafWN@F;lwN(WS>XJbS>eSySOblh(W96mCc>z>HHVA6bHoMiI)A5VvPE{^1x?0_w?o;e8QVB0E$-r)LNAJQ*;dkci5`9Jgq^i)>8+$?^@=4xupf zzHLSYGjh2wdooJ;IfPT(yiwW;UP|B^W?p}5tO=iiC#+0n!+xMUuzp4-+yN+w9+(1? zbdLtz3k)&%!MsT8O2d{w+a3)d)Au351K}4`6|+vGFaX#C`f%}I+q?w3c!v3J z-+aV>B4ZXn;Aut&!&Uo^l^pEHsFM7&R{Xakx> z{KCEy{0cBM=o5G&juh9L2rASnU z_Sn+JHeKOU9G?-dAJ-bmt`X4`;k`bwSwZ}FZBg;@3no}1#f+w zjJUn@_Tjf{29$PKp)qn>48ZU{BLc$nftOI11D4KZ3Vf=@f3L0{u2lRt7*N9cLSk2D zf5pE(^!KlNJdl)6zJ8E)HzQ01ENItK)1u(g_rV)eegwHg1-y{qOpuyGF7mRmS%((E z?!CH!Pn}Y|uIY7W+?OsL+U=!WETaCI@Re_V`4G*WJX;omRlx1&u7Qt1Uy07srlNA{}n z0m^*j9vHt7r@5=^@xIJF3v|;_L-tijNd*F`&aqQE|LYIDx?LW^*8?qCN4gxY^jqo+qYKE8f6u#e-KJFTAX!kC*Bx4#GGrb`nZ zU%N7Va7gT98mc^m82=x})^iQH>!WZHIa{M=c16VZUH_HC4dVsE(LB6kT6HFWyI!SrEj^?U`q?wFi?xWJWY73ZXL8D>eYNlsip?r#{QVHskvW zWLM7v{$jaeV^#dDgQz>%rMuj}+W$Ec3jiwOXhFc>Lmx@tlyht$Csp%StliCP zOVhY$+?$`y;?w>m+n+1V^AL-l4R8~wzAzcvdz*~(BOpGUrBzoP+Ow7cK#_JoF<$4* zpYwq0s)N$JMLNPE@HyIR?*p@SYgHNkwN%e30I|GN6+hdXz+jxq~Emr`{TYhpiDH4nE_~+<8|2T`|0LTjcskwDKPKn zf@?T?GWe{TEZ>xN%oQs89K~18+Ovb8G4jSd2VLO?=tg7bZXsa%+tactR|mCQ)BcN% z)578q!_-}c*k{d}kQFmO6We5EQAP|x+F7%qrr(`;$IF(N70!GtPCJUWqwVGG9cB^u zHh_K$!6|F_G-V9Reh9Ka_PHSJW$PV(CD*A#M+*m`H7Jk}r z)~u?YV2i{5;i^rVXsb;N0+04Ytmbyh{$QdQk;}WtzO?5eA-pMq`k1AMHr)N4{1JS{ z&qY_pUw7)OZf^smK>0Wd&kc}(T*{Ue?f53tifDY3?Mk8fV0l?M^qDkHmd_W@0#M>E zN^D^{2D(RLn`}W7>@jgI8cmnM5pUX3n?CQ>W*g$r*lL$|XiTiRwgzcVMjAVbrHS~2 z%d)Oh+;UY)m^Tl-0GPU0u}?@a48}Dl^WQy5GLg`IYW;u zLa#x{y2C2CqUi)R%nx^bSqyN}wqwwpo}UR3p}y zWKwbd9E2M4|Qk#o`}g!nkKouu0r5rE3EPGRh{#FPzgN?7Gp5P7A-V5 zJ8qq#yWQ2?5vWP84SG$ia=FfQ;ALFN)BPm{2!3hoi?JN7H61Z}C7of#O=6b8b-Fn@ zU+C_;T?AADxg|oZDFE6T!F?R;YPvT|5srJetbU7Vzs>NpqT{hRYqk)5hQ;OXI*oqY zb&zl|Uf-)w^x!^$K4yWxW6kZJ|EyrgAR4(}2xW_aftV&;2A(+vS2jC1?9x#GL)iyR zmU$YBGN4`Eqe(B%tgK$n9xQ`AvpH&TQmm0w-&h+-mW>x*aoa#h*UCL5X6ulu73(At z0^9&e)B2e0iO~F=JD@+ora3WRP|ns->s2l|4x||4g71E2hvKC11fbz0Z@~|c$bYZj z_Y|J$nQ4`4Q$2a967F}|m>A!Q*#MuRB+Xs6az=>L;@KBJX}+;H?bH$%vg|2O+_x&9 z&%Vee(kVCPB~Dm#J)4+$oN*85-&A|1@*Do_Yg1q9EKtEzwFmemnd%gDCB0e{&S?thwhQ`<)#s=rFs z1E%bogiwLI9Q3O!7tCF``7i}XY3FM&*1So1#v1^Zoa-3)2OaTdJ&m8F;ep zS^YaDg#r<_cRzI%VsvYtQYnO&oT#T~;(Ko{IZN!Kp4@F4e)jI`j2=e;|EviZX&HfS|thGN?Jq=U_sYw~- zbt^8PI{cj@#5M}050U&!WboPZMHdK_*& z2x$)*9vWk9Av*;op~Fi3?{vn0Exy0_i0aIU%UurRFqz$L>dj{}%&&NNJ&xrI7`B>l znVK+7F0UvBJ=o#KHpdF12=yaPKsmrh$1raC!n(v-H)^o$OqO`Ynda5j?jUE~Spqad zI|{JnJz*7Lne;+8)Kcux{5($Yy$XCF-VoVhi50i_P%p1!tye=j|m~8x3*8sTIseEh1_t z`)9@u1VvQT?O_paADDsp<+TldT8OmGpAE;=>P`i)POq04ED+t^KRaHIF>-c#s%|-8 zYTx-^kjB2R$Yc4o^ak7Q4RI~;+13rY){UIDjl}a&k3yQ>+c2pd*JBPmF}*djAzx3~ zxN+(HM#wWPh1Pli1K#&%pl%$b?Ws#k zr{c7ZPZAS&PL*CSB$C+8lR zo-Lv&!l_LE^+2^yh$%DkGS1upFdUH-th}dP%^VoY-1T4gIkDCqYcth^ezvy*)GKbs z1s*%}6-}_I8>#oRJcf?N`grF6hwe~Yg>r!qfwF|;a`O-Y0Jt#5J1PfAH*!F2<&!$swT%Lc3UfJzjx{Y24*wc4X>74GTwj^EpvdElz zA>qVElFJNXtV2An0;KjFr%7BGah3R#V;j%2-FfvyL)OzSnj@FiRmG z#dDWKp`>J}DEwBXDo^V=iv)dS;|P1iDrXAbFwq)XA4DViNm9+uniZtx8Qi&%sZu?K zQbEgn!!aeIpR0(+boA=Zz%I@_I~~+%9f4}N0GW64n7;2$9oJWMm$IBqoh|EYD$_i@ zn7&`PA#?A4fwAWe$cJ73cvJtkz(3K$ZJ&YLSCn1}JaY@_ca&X`O&&ODLfHEf*ga#| z7Xzm{$8Rw`Xk~x2Dyf6r_fX`Eo>*!7;$1XaH#$aqy}5n;Q;vV;I5(4UyZm4N4)=PR zlQXQz@5&>)2Bskh$|0B-SwNzQN?0>35zggFLXEeSZq$EZrCmab$ za46al>Jrvu1utW!MSs_8x&6HOq+3i54;Iz%{3lovt_m^5&96*Szn2xH3qs>qHVtw zK8>R<)MO1Ud58i#CG*52jRP7r~hywse~6C?82tn$HT1rFQX2Y6S!U`pwny&Ur5EvUG7 zB7p-pKJUq@#FjdK>d-N7CBh|a)T*X4kCY=_o~z-PwgnBD1ueRTW!Y)lzXs2r z?C?kz(oLz@ZiH_A7Ry{Cu1$@m#VESE$5fEU^*h%6SXXLUb+4hro<@XB5`xRIov{_x z{#I3C-8JspFm`%y zd;ZLq$Ah@-i4U}l)Eky5jF4v%mhC}73|pm$7mcvR+gk}4B447XCw<>K^@m@>CR#7> z($P6F&^^Djny|XeF#W{G$o8+>*w|iTm04TlPUXI9Ri*|dA(LwVI5Dh8cm35L-W$P| zz3S}{L0lhuO_Q1&B6K)n#ZNw!Bf(hjV&E{jxz`j;7jY>dY7#{t3+vMvJ24m7!=w_7R` z#2OpQqMO*5ZdeCH4zt@fcjgIi`FO@%9?(H(%Sgoa?yy9iEm19=y-9~#3FMq*sn8#H z!s2D=+*88Tkly6!d9~AqMgtn?D0jc2og3lTX7}eb++JsmT2Ub-_{&~o$@Ci*v)q}x z!1|~AiBEjZj`$mn23lOCzq@NKn5s=*pnHqUGqG9<(BJh@^kZ#2PJb3M1Gm{pOuKcX z+W4l%xK~`JT(CA{A%?<&rz7mim!INx<@I>@4DuhFJQrd!M^k%k-RadwXEBoP{3IH( zj>C}!;nhq^h4#RoJwunsX6OM*dcEMp12@K%#K*pq0{F+g(nurQ7GY2ac{J-u}u_A+_piS z1I(9t=R@VHaYrxVYG0$R$Zlig*E>l1a;~Mi1$h|!syTl<>2c$*TbYgjT5PDxR-in5Te>@;;JA-yT@h=WfTjV4imxVP%H z{R^x7WL|R7Y(FHe`z5hn%KS#c>n0ZA1EFeGZt=PPS6P(9J$0~_frI5U&KEfK zm`BS-aBMoWZZqw*X=<-D36uJ7mtVO7q}w*n&?+i*UP!shpbbz zj&kqEGc7nx9djf@T13!y7r!51%Z-0<0eoeeq==ZkNh}4ugo7z;x`B#xU5sP)9&bhW zDi#HlHnx?WtDnzl1Kc#p*p5?pcCG&W_B{K_5#pnCfGky z`X}@O5QYK+ZnAXmZ@2d&54TJs}1RU ziO#~w50vzcA7`wse0B27x!8tQH`qvx3JSq=n4H(rL$$w}uv*G9MF zG}9i|C(@O@6os$E%xAelX`vVYng2xtg%$}n=j45n7|u67Bx*={uRk+C12}UXbd=m% zMiOe`aOBgKf!&i)t3}rD{@!2SG3bbASuO22%gG+=cS)+sy4)Za)7xn-#U}pY@qCYi zj-+9deD7YaMuq9Vn2n^F?JOO*ZbP`PfZ-nN=2IqBx+r6-^9q*`EkL_k%y1vLfa+hn zX&19O&qOD*=}P8}MSf*omL~+Th;(Kg9?KdPtx=Lg-JeaZEd~xtSFh5>^~Y5wCvjEE z1E6eYzCx`xra7U!%A~wdEJA=}&b(Cz*ib=BUNp(aI2B4i8R_IF3TR2x^LbSAPIp=* zu$i5xpy%s*pUsR&H1UFG36bhTqCEK{N@%Z!vKS@ODm{Kxr+-FJxJ<3$wUfnAN4~WV zS2>fieIyo_)YK?`V9sgw*++f3)nxbZ`~Ka~vt)>)4R88n0!MD}>HY;5w)UO9Kh>po zA>gI7L&Tr{Y4gQhsaE-yscq;iY=YV$UY+8098bLubg}f2_bG}$z0D}mEI>4sYoHFE z8Is7Z^vaQ&v7TenIUh)G6@ej??i8|pr?Vd2Av762Q6d}6t#P0rEJrs5O)z76;PyU} zhe8YQ`)+KFe<*=N)x=1+|G=1}g}ppaga<;1Fy$<}I?F1Aa7i!PmH9Mg+O;G16?7@u z!V5u95{u5?X=a-%YTY!!$H_TWt_nHIa3Vt=)B7(gR&L(8IJM%{e%y?6D~siyt3|6y zU3jUs3>;q|Ue>jbNr`UOvHKEkV%MhxGV}pYRf*tI?`PlbX+}7Y%t>$is2i~n0_(Vz zmmnxejw&nh2!37Mdx;$2Y`Obf_04Dr%UjXvnw&fAd0hYbE0#+07)>?LOZi-&r?ltX zac2JsiHE1(9KWm>`>t?Sbi3gLJG;)A2~gM<-cYNe&jY?&(Uo;s@&J;!G>0X+ZKtO3~o9v<3QyYfVBC_JMnVy2iE< zI+ALapuOT8(>{y6!MTIgUCZ1HQNh%x| zwABp~S;D(WX3*W5#wrd%3R~3Vg|6g?*OknS1`|n_q|Zf(m?q%ENh)39ja!-x2A%~@ zr}fXl2REDrb*0As%e?P1LNMn(G}%A;A^v-cUM<5N$V^XlhJK0IFrVV1$;|C^M~4Lq zYo8uf%Dz-#n4zMk!*95*=t=E93iW_L6M2kxn5oo2MnfA<@*g+vIpw$Rk?!`O9gRm? ze$oBeiR)og8oRqvzCvZqQkEB+kzTwT+kCYMC9<+_v~R(mx?}QSW{23iUcn@R9!Mz7 zY^H_-;$8h~d;8QWw5X2v|Jizp_3f8l>;yL$8n=zx@^teWz}81P=6-$ zr$5Bn5q$D_zS0*Ayp&8d2jpjjo)_Hlq@r`Q;F7;rc*&2Hp1VE&7Ylf*&aIc~QR3dn zAmVc~;EYi+;cUeo!F1j;C&~f@e5HG4eFJxpZm|e%*ecG7jP5C4{x7A=3;%5^d-E%}ftZzovwyOJZ@wjIKZR3fgnP8DQftL=2Z+WS-7{I0=ECBD1Jmk* z;#$;wH8}~gJlB^YtX5+K(P7=@+#x10xKzTUuGq`2ESs&{6cMOecyL|xT{Z4CHb5ET zZKq;WO(Q{9fS_{@QsD5#dQ*3T$ z8^+5^HqAMN>7dqJizlGo_GdOQPDJz%XV=U@VA>ql^{m<>U8G=GpV%D4_bx!irR47X z=#H!XVk7u_F&>ywF*=leANfU1V5uF~Rn7X?E`6xqniV=zJzgUox^aeoi698tjWaW| z1?@Jij?uWkp#_rTw9#1b>&juMWvDXouIVs`kJn(*A51!AnU- z1qX_fLb>$LRzWv$yN<&rZ=qQWvZM2dlfi$XCEALaLaR6#_@s9aBBUFM7pk;-oa*Tt&XqQo2k&BvZA&k+ALsIdl#>u zi5&l6ny!`6cTJb`(z}MD=EKxqQ)`jocK>^>m0no|2wiY`oKq;la}l?mQ=cKi>neaTaka6j~96WPR`# zk(-BoM1!`g_&TSkT{VMRdwOo`Eg_8R>e=1hLDp>|tjehtm8!K}^sU8CEBMTwgKTTH z9pyfeBXa_Rs+Fqv40kbv@L?`twB8W9*GBB1uN|L@hk}27a)rTg?X9v%$Yp1wxzotL z6nWS5;R^n0shD@?JV|Sc*n;L2)A?YUctvrhPyT$lHb;7*UOMv{UP%aRZyv?;ZRaD$ z9HU7rR3_{BM#8L-XGq7ag1l>tO9dIYwCdU%22GxNq@(e-k7>{xdeP<&b&2jl z6+Rtsb%AS@#xc4Auiw(#UJ~qPK9EVY*q2&=*Q=r_HKRDCD@7=vAwooOPtOXz5@BV` zk5JAtaLU*tQz%+Tp>_D);Qe5oSb=1zHxAgCIsMTq?bn!f1E{~9b6q&+uCCmw9k+BL zn=;?{UIBl-FRROKt@h9ZtENP8@>~E}@==?)j^7K)eB&~M>VD=C(Iu#{RmrChCV)7u z)bu&hu}1=&=l|(+R`MLKs-|-tRHXl>+)EcnOE;&d5<`3*lz9K&Yaa(qmV=5-RN^}( zN%cM7cE6^HS7+le(=C>QB|_ZBnk?5vR=$d{P}2vhXYo$882LdDOi!&maSltLDx^m+ z<3G13+B`BEDP>$P7EI9Q(sPD#)m3b*Q^sNEy-Dfhed2h6GiE3_gij{}TY*_yPQmN1 z39HRsyD53H35A(Oo7QZ&eI4X&0@>@h{;WMer3DRu6BSbVQ(K8_J%dg{EJ<>JO141p za*BLCqn6oz@-gcf%@5YVoAA+5LePn!+42)pg<4EFgL??H1lr_g7bFV6V?vZ|W9%%mlUTg1F5<-<3_ih>m-R>^gSLctXB>IzpdEWXh2SSx%s>mn1 zdMto+KCxLn=5ow6dtFtgOqCF~XSd2i8JM$~GKtP9e&QPo)-|wDs^_jwV_($RtQkIB z7hV~yCA2l;q$;MCY~Z>_+SF}yC)T)5>m3=48Uqfs=%iz)Sq8f^0)^HZBXJ%WO{|6$ zsYaOokeFhLPRW{tP7(gbpS-9bu!F31z6DoloO+|g$Q87_0?VNV5{^d|8&}(UJ|K#X z&Q(|SYoFK_^#&VOp@~BZEaF#w` zbszGij;*~V^R8)mMVI+yk6cmt+=IOUvrEEclcJOm7CaZC(EwV`uG(j5a5uce?>0^% zdXLJ~>wi2QOm%O3{DYNO=EG1&VKdRm}$=r98&g0W5TviZ3UH`2nWw zUdyxehkw(YIdgjf(&_j^<0P7t?8ijLLTHE_n-XM|;9+q1Vy`V*sY*e;aeV=harndg zW!Rwl-QraJ@af93LWL+UC*dro+d(zdO$O%YPQG8=2UI@cSN5*lG^)rbwuB!%3zOef;$COw#dMo4^$CJ zIyBWqG0&~vVP5db63Wgbn-y@Ag|(wdJR8zX#pO4c5Y$n)M0R7yQnT{d-2Cg~O?BYs zW%g&TLnebz4jrW#i7i7(ZC$YtA(us(i zmDV93m9ybI=({v{oGgSMH1&GN;z(OZBns?4y*_QzkWr!2aedu+JjQo8Rcd|?ulpx` z1+`w_Ds(De9W}b*E>-Mml7@bhG#h;$OU`H?6#6B;K-$D5sk*nqttocAjjFXduGRml zU7Xk>Q#a3*Gk5ym1;(5EtX3&Vu>|J0#zct{3vPz8in%z* z3IV1$WWJOIXx0u2e1K zDZ$WERh^~lLJZQe_UfBm5l_s~H1SKQQv@M4cNC&@H~+EaA;>Kk$dXG z&$m~mJ5Cu#iezzpu2%(1r5kBN3Dq>w6>KkyT>Yd_pnch?g4&wpg@dK)Y#DTbn%k82 zSv|ofUTAY_P}DFEr*@iFb+z?BLHsLuAW_Ge&!5`rBT7nn@QS47tc>?Z+*iJVf;HZA z@Bd(RQR6)0)(}G+mQ)H!Tt?o%xGoAUouwO+dQsTGP>ur_KGyGAL3XN7zz+BfgV=lSNkE@okuVQ2-moMrDp|86X9|wi&fS5@Y^Dh?%eBzcB zaoa|Z9aJ;Vs-R$6`Oj<>DYTZ~NyZjme}ryJ4hLMGu&6~x7?yb$Z|s^ur>)^#GrDpL zgVjsl&~D4sowcclt6N}#=>j>P*>MS<_%SdnzBXOjnThGND39sF2wk@LMd)Bd;% zbvi*C6S75G*{nZ>Uz-v3y<3$VLzS+tps~ z+0nDb!6J-H+Orb4{L+ER%D;{_g3ICMp<^Mk|*^6%fz~@{WmBi;GO^|_Q-P4H1k)(Cw(@R!zFVOtvD=A|PC)eOg^4%dp=Ixbe}qPd z1d6<~a5{|%wyRVXeDlKJP?YZ=R$3EZ_|cEVl(xdT4~l}$pTn8SR>^Xes;jNm-Qr@Q z15miBlFxN|xk&niJxs5kly=vP)}-*lx#IEI(gE_?mn$*4=IhUHDpso6$_HS~#hK91 ziZ7(1-A1zwp{%l{5j*qcj9h{L+x>xi=k$}FeFZGu;YHRX;;LZq-jf;-+@Ix` zklsG&x`0TPm%trNI25`Vwc)m_HZX=5{+gLUye2-H#;b4wh0E1dtQ1JcajO;hgi`|| z&Ox@MeQ%j6;RperPSl(cx94M_?^1r?UnpE#Yg{f^UAKs5UMXjojZv>vM^8384*6z| z;Jwhk2?BBiM~x5G*sSgOzyfE|=$LA{Ey5SjN6rx$Jj3|70oq|n&2wD1vQtKB1Chi? zHnw3~llQB4if}F|l0~%*-sL)43);A%bY8QQWi=B=PScHQ&lXNI9mrHUYRoFRfhy8; z%EXM%NB7NhHXsQAqJ5gkVSaxSynnD2*fm}UBb5EXWSKPDVSx=d$Vujvi?hN+I~`*B zNR;Uj8?Y|#N#LbrWFCgQ7?&s)^~FLtPmoM{-l>2k+^ho0lL!8uogITF!m|shhnynE zo!(4KVO{5rvwp-4*8SqVuSN@P&resL8=}0VR27VNeUZE(|AXNAJMzaf;jns2?OmCQ zHmh>|IxiKzN#yPF(U+kX32pIy0m=X43S|{sVc6H{A1Sl5Bxl*x>JQ~r$9pDeU6c^x#Y2Rr^Ov|>(2n+=1^O+Ik)rFL;8 z=nFf3y{FX6w}V29mhxnaz~$K}#vOVzou@VbeAR4+0B7@z+iK9-mcqM~z41|6Z&2d> zr8P*^nkW1HA1$$v4a4@&ZqkH%BXGVdclMX7k`|UzbTtjc{*^;Ub8>E z!@O$V4%iy#aI{y#7?6+g19(r`#fW z9pZmk-6HeLUq)HlOy3@m=KjR!jngwv5UOu;KqBs5SVl7d+T3~J#oMp`rFsW$Ej(+$ zejV2sN2&>@a|=OWdQ~I>yG`(dW#fgi*$IcGS**D?pSE)6UXdw1prwpE+iA0Y_^+bZ zjxw9q6S}CDXMDQ$A+JG5M#n9>%Y~ihg`aj3plf0v1v#32jX)0tXn!(e<7D1E(Arqi z2cl#NM^LA?NBdOPs zcx#`4e}?Vrz`F+_ca=tk=6uQ%AueLl&PiJyXK>k~bp9~ot5&iNDzoeEym(djy%#dv z<9XseS8#VPFA0Sj{?r-kd5cOrGFJ=d|7ma!&~*r`x1s0-RYOAUy~L-3It*`D=8Kt_ z{YxdVnJa7_r_4LBcKcu}{i#A@zujHrG*Jkz&wq(wcBb(@Nq2cxgmGa-{ruEqzQXx) zjFNioI*8F&mIkB2D|Bak{?@ng3eSui!w*aH}8(V*5no(49 z$lwuu%8Q<~%7A0a_EgNnlFb~i&uAww$I=icYchSV04en%2nqTMN<&jgTzBd5aiPYC zk%AbYx@~giUV4P4b$a*op%0k`kv@p9ZWJQN#4Ma@%CG*FPwH8h#_{0hYQ9XE1≫DVrQ_5bGiP0 zY&G1BpPwEcXtwSPOk@vz3lVmB3O+Z;!@gC+}i$6ZWUya?v)bS(&0AL0p+<7>Y*I>981lOtP8d3FH$c690-WK17q zEOQB&r%XW*aagzQjQU}=hXcIRfO4m)Xjd5Dd0z-8lA(=KTc|!o=fu{sdeha1&p&1( zGCbY`BjmoFZ^2Q%(eY&`l3jCZXm79*9IumE`Ff=_x_DxCPaX$(AdN_*x=v-vkT1v7 zS}1UX3&(S|7MD&4AfBv>PVBP$CnJP-E&Gc5hVjXbqC_G*OHs5=VRRb_yJ_dhiMZ=J zyx6Q7rh(SaACT~Ip@ZLJg9+c+X_~Hk_>dlOO{Hob^S4b;Gc^z#=g$mxEhZlQDWKek zFh?D}^ZSd4i}B0$W9FLrkf3|{RX%eb{P~~#e9F6GK^f_eMGBMJg(Q+R(I5QVpikJD zt10YgMf#k;m%Yzyp;3ToF??|}80|CIsmg8wRPqh9PUEL?&23Cdr?TsOj+gqhYYFPq z`m-?iq#W~BVA$tJeR~+wHYdN!cwxNdQ&CE!D^Ali{d^oFx}d$3GFzCHMh=?2&hGI# z6FxN<$E)Jw>;jS*cQVz>1b85X1FHg#s7&WVt{UO;Y#Yh=vOJR#zD6r5G;{#&i)5SJ zt&G|YUexE#u7(bthAff9b~mb9dAt=;={_^k*i??|Pj@*lC=R(o)q*}sl8&b!ruVE% zjD+`KTC(0LulbZ|3ACx_5sO*8IQ#CV*uQB4Gx8M*(*&=~oO7}r4 z;iWG2R+;K@Njo9cDQS z6GLHbGg5-@=-`wufZ0|Yh?bv(oK@BQoiI%P&CfS35l;`QXOMAled z0_PEYMY)Qmfj)MyS--1`Xgc+E^EGbr5eFg{?QUiE6U932+sEr}I_dcPiqQs!)#P1{ z$%Yqr0jVM>n!WhS+v=U-da1;Pf!NCwp8;FU;UdeM*RJ@9zX|$64vVRN*jVTpBR96? zm9F*{wG;C)&e)X2>87Mk&nDe+QAZS0&^V`$44@Vn^z6B6ND?`ZTj+CpYylentOTsWz(sv1xN5QnkVbw{CvC(FaOs2*?3SGwo=>x$QriQ; zozUk&vz&%0prteU;QKKD__L!hj}W0l%A1|qo+$?k;jX}7%N8F`u7RuX73eiP*l;)$ z^qOz@jQ;fX2x)}0aA@vt90mpl;OnOckgrehdSh;SgwOfJyZ~!JW zz1EkQ&D&3Pa2H=Olxwb^HlW`8@Z4RVM=u(vZ;;n9-VCalh-FL`srnJ1Z3zWx8Ta#E zcwjVcr~St^CO-Eidn{)e(?s^tk<28Y(^ub~Z^E@;qQ-10> z#$d3|$b~{&4|MY6fuapPGW!zsy{0MAv0EDoaCSi8f0c+7MygCn&=U0y6fs$Oc=<0^ z=D*w*@Shvb-2r>a^NF+dUoF)iD-o(u>20DyCn!^yA3{%4A{G5;2cmO>beq~vv{r*? zUc_VbkAeye-E@@v$G<>tY>3-ejVcHjbnRxX`8_2$zMDS#fmcG-34W}o9$@G1iGRBX#S1f1TDnge<{eFBQXY9?XeKJ4uzt5+;;Na!|DX?OVRTSCNH??t}?-t{jzE z{mDsv`}vK_$b??PP{Js=ul=-;zmDSD5BQ$I+8W$3{EfpVmmArkRAh&OY@Zq<%vAr! zf8YE!d1*M1rV~bOs#^!lbQ2j$DGa6eM%ST#)oT9FkAff_ykf*FmaRW+{`zfIY++Tj z$X1Eo|6?Q`64KCV-m4hEJ^dT$!*1kLN^Ia$!oFf z2W>|R(LB@xgNdZg-P+^%i-j8Ph6!oCRfzi`Y4Gm?^5r0(!i0RvMYcnBTM@p9(f|9q z?D7_H$cYL1lE01ElJcB#sv{U~=fwG~ZJ0kC!REA*5c%;AyLU<7+3o)O{G*4E%c}ug zR>P2I?5+QP@PGa3J``kmo;(WtWv#v$<13hu2@7(DUL6V8T9Wy1F36wmX@KeL50g}Ti;OAr1G%!rep$+22Let( z6+#Jv(HrvgTh}&49SpZ)2Qs0@^qeYNX()bKT@!7XkYCg`KDJ-BADH3zkO|#KhGP6) zap(UYiZyWGVD{QB{!qL5vHlXY2VsZ!>yS(34t2=bPnNNDI@>7uPe@ee`|K5v*~%Xi zorMf#v;>CY7cW!v%`62gvjMEblYF&4&rO-}pOVDCwTtgK7Q*80r$6|= z=c5AbHu)d-9_iS+L+nqG(_D-gi-xFT`|e-A`xk=)=YI^Wf0Q%pKbprMJ9&r!wkZ8G zEH2~8k;_}n`j`28?m_q~zPyPs**aw9Cy0M4NBmRXG1TN%ll<{ro2K|bo`NtG%s321 zwf_Cit>5vh9Up~)f7n0UFZ2I0LRf<&17wGoksUG-i*5g(8e2&iSd~22HRIm^fXPUp zKw}M_MJXnJ-~V0}gMDy=>=~aruw@54gHAzoX%dWFX084grng?+zig2S5?)RpGu8Z| zw)bQD60Z?ku^U3HpGlDFe+LSD*+`_kGr?i&h)s|yzC#spPmKPenEx5GXd>vSAV+Ou zhOG`rh6^UtlZ;F#&etR2f4l8CgjMp|?Z>~}tEz~h3yRyX!O4zd0Ri1nYMX9(On;a|tbDG3{W^hVmhQX>D_bv3dsrpS4& zZgAKNWck&$*h^qSInHKDKck<%jaG#NdC_FT7HRSO20Z?slu*0gft#z5c(v8DZ1enx z3`MILh7!**7<#^Tfb0T*bI=mWX_Vs0vfT8ZhwE7P=yw-z8xEiyV84PB^TdS8# zx*#v|Y;KnB*?egW>Z82Iom1Tc&Wn9t21|?PK}W?=_3&ElkDEj0bn{0|jW7@)lIa!b zEO+hwg7#gU4$GiyC_SAa6}r4F(|OaBDLpkSSkKrTfOz6W)q}0(y^m81*+);9c33^B z>Z!2vz0U!QQG#P#1(&0Pe!PFPPiGqKE9_svO`FxyiS$rLXx+y?q)Tf7MU8uHhV{9? z?$q7Ku3YzHaNP%unE%2+&Q4E9*ghNRm<^F%-;d*<+i~xEs2wU19ghxpDS4wZ|JcdS z-%hw^EaIaB5i51cc1EwX_=t*o>6*+Nt8Z?<#3`ztJY$SAX~L+)V&Kmg#8kQX1evHi zN{RK+!96QOS>MCx2KHtUy#m6g9NX#EqN9rgmr6c6#aU^eVfbbjo;3%I9^p7fHTP%o zs6GB2Hy<61L|asF`t8JUhJ%ecrfea``r;ud_v1?6ZCsw8(v)X#z`rl+1y6}lSW2>W zX)LIB?091JynG7%jREnKTX@$0Kg;O`j5NvP1JN z@kV^t4zsfi`oBH6_q|7g6BOdj$n^_jOGS2{P;F)_0pg=YwkCrg1vGNJ(sS-=X&HYI z$bWYwJzcUSh;*^tkes3BY@Sgqu#?o6*0LfO(uxOkvkN)@I!!zMvvmxjF z*!n%|vHCnV!=`*T!^X#A=sD9CXjA|Q%}i&EtBAXm%57_uB7^UvTi=dUd!_`Bc0fD+ z3!uaUn0o&`!#_NRgg>(4F=3tR$M*_$E&!Wor-KIRP#VI}a$=>c_yFWf0h>*Cw`G1~W_B6gom_i3MW!|x9`j{NylwI~E z(>#AFwtFM!shiJoFS|DLY*D@v-alWWAS2-!&-U;B_nw->ryjNh*_@DzZshFj)MTe@ z9ll-N`IxZvNMaGIm6YFi?b(hq%?C21nEn7k@8eEpzrPk^kWHwODFFa1PP1|mTW~uk zE2_RxNOEx?J7>BrQNQtm!S~C8y_eEnY~wZEQ2~y`=g3KD6fZPw%;s)%A+6I3n5xbY zZ-DC9!$?yR!>r|SpDi5;fwiG-EaH{^Q8Hj zc6{F-<*3U0l8L>U1%}rcEO0ebH6w?!se}t22i?Da1#K33X<~J}_%?B}z34Shu0%na z@;^@Btw;j3n6vQIecXZL&=5M%zImm1+EArTIGykd z0{M2ye=cmj&y%d+?q~1{vqRyEtBPS4!A`1)e3bE1IVw8ur5L4}hc zKMwhG$~J?}?tj$evd&z@Un9&E-9O!nl}j`dJ9|(mQHI{_x|5s53RDNPs}F0P*fnU0 zdLF2M6}9=rNdc8|;vL;_7UdWH`kUbvAawFOqbEn5-mx67**x&M+@2=ItT!b=8ky^{VMbr#^4L}6zPZiyW2G2FLV>ZJyO?ka=Z}Ygyy6gfgKShF ziV`m@HX`NQjacK znI6|$?fgr~Ze=LBrMo%V^=!#BFRg|p_Q{Xi6gWjMq^_d*x(ym52Tl8msO@s#%CEak zFldOxAihaC3b=>5pG$L_y!rdT|0JPfdGsssoyF@WbbH)2MvY40T#KlXPAk;&5XsBe zHdox}kf%u@>&?_pFwEz1;i3DQ8-{?itQwJ2Sna^N-3P#P4o`m9UH%q3re7Z$hmiXv zedNa!oL8;Id-{3nXFJRZwjw=I_A}Z~{eN&YM}ORr;bQ5tZnzMjkWN~YG3>QSomx;8 zx9!;zCT#1ZTp=4F(sPB@=E>NMu5RWt7Z>Dm@wT7inySu95Kj^(SZ%yj*?_lj&#C!n$)M zT|;lYtb+nWiX9TO8!K@8O@r$Ar`X)Zkzlqc^T)35NFPMMJDp7zB7X~s^CH9S-e?MD zczaBLQj^KL`2v4kTaB&AgQvhD%qVjAcdJfv^OQT?kVAnZ@8iEW=iSpoti5--`GRrl zy)Y3AGV%TJ6CN{6?*r5Umh-2+n;iuuggiMn2VL0m;xJ+P`AfsJ*XA2gaD=ScC>FoR zKO?mF)}2o9z{}D2roaQC4#v!~(I72E4)KXH=uMp#0riO3<%Gx^XI~Zr4y6|`Y>`Uq z2+tT4Cju-rG*>@#Up7lPv$((aWX_Lx;oIX!;wJdDj>nrruoxS(?*Bysm-D=pNz_jPD3Y@Rw^yoS~B3|;XKv!LRKr* z@A6fUTwF;5V!sN%&{hb!16ZG2|Nk*HM38(MpvI4sLrV(!bFU-=KrL|^CZkZIs$;9d ze9_=3Fvp$K4(fD^WjS%1uUhPYUELqQ_?74+53w6oUJwe!Z0=d3Vbd(?IxBr-NJOTe zh}aK#t>{mb8g5PYf^OPpAxe4`cJJx;7a`9L_PW_+jSMYXVJah<~CCr>-OoZ;7B zY?ljCis}#=SGliT$VuoA4((T`u8uN)f7yhgPcO`!lz#kVM`b>2lOJiJN0BmLxC0jH zTZPBVV%uUSxRF0xh6|N4&%DLTWYObTzq|AwS5^@vIcn)!eR_vk9m-doely}W0S;U( z8L215n=h>93sl-&Re9&6(J3Du^oJkT|Gor9g%Uia_!5BwnV6;J)})i7#CD~8Kq2F& zUKdHG3I#rDu$dj`hX8_!2QyQA%3IOC7b92#M3S$C6aM@sHwr|Mx&db0#wo_lVa(>& z64cIh`|{65BYzMH%W9CP+Jc_e!PDY?--5rSn_!>(j$Oulo9h*F!14~g+8N4x*@xd_ zr(Jx35v6pDKXhHMJmV%hNJHPBHQX$8liy#I z7@ERNUsDu?*6BBMTu1qTZ~^?NvW4DaufuT|A1wrvBwn=utU|bD{KIsbJ6uyK9w`t# zTsXPsp-DHwiidPq$Tp>ui$hhTANaD^{mr}A{la{1n`{p4xkC2YtShv`6ul1EEE=yA zS(^7CD%c|ibR|N^H3}jh#!fP*T*h6gx=_b`aB*Aewrn3DAMOQ0Qe`7z_$apY>#ZaA zI~DpGtqaY;5s{0wHgj7E>IKq37hv7TK@Ak4b)c?H!O#9AI1Xd zB2C@A+9kmGJ>GeH-L~H**&|&G+qDE*Z+RU4m;Nve1OmuGRkSNH{C>58wt9X!MK8tn zou7#gVYwA>G!ebjq1c6@dmyv0%BBg&hldQT<>@D@0i z|95Xuvps)6Qs~8G`XV$`6hJRoFZ4x(&%X*&$h3lp#a!Un*;lF0?m->YZ z5l3KM5(;1e?Wcqhr3v{c`IJQ~a1K4-RamFb`FW(%^jg)jOanot-$x#S?4p^yXfYT^c~mE&pz~) zH6t0QEO-3t(z!gj(E)Myhk9=OKb*aJIMn^yK7QwJC8-pZqEwcU7E5-K3RxnA7?Rys z5@i^Yt+G`@A(HIIK4V|97Fn{)U@TGAF%iZx48QAL-OuxUp3n0fzvuYgf89q%G4J=h zUe|J7=XIXdM0OF5+yS6VIDjPbj@mq-tlURnxa{r@da&dEXY^iT0^}g#qrq>q0Esb# z0e533HY-FplQ!LZ{#I0B1$Qx!^{%aQ$zclLfGDN;Gm_~0{)bf9^>lzu=0w5In8qXb zmsR{bA>QPH(_LU^08`Vg%PE3lzVB%CyPCN81pX$yI_kF^C#6O!oK7Wm*2xF9%&@BmLG(iP{-8AEY&* zF!N7O?HR5O8sC>Hp#aOyU-Ao?2vY}(AU+X3pKIt)1nw5Z4*E;WiCCR|d>jpAt3P4~_0NbTX1pd%W zb|U^BUoR&9nJ{+2z|gbUiapnD?|&2w>6`PtKi8QenpK2@t_W!`%rZ>Q%dSrCtA0UQ@tqKV|g|q+?%mrXhlT)3A~JglN!~1o9W5 zW8&8}s~5owwGi@~Y`^9z9U6{E&Mv)~G^o-YiZylGGMG!^L0EnG8$IDstoK0SHS;g< zh3H0@i>4<}WzbBqG(8HaWb!4Mr@nD#_N;MZS%+NJi|`?>YJd9gV81pzrWW{KI|H*9 zk=;IUSI-c(%=2)?BTuF3?cRV8sY-JPb5BM!((?e38fy=P#~bUgu;ftva1*i&e5F|| z@!>ouwj4gKZr9wv<>Vk*KC9FvG>1T$nOXU2gL&L%Z(lk<*u^s9^Iyw_hl)>;dVzPK zuz|i>7b-3X)}yh~)JEqv!_uD-94b+@61Mo~cmH(U)xuDC{b&f6(G>X^(~SQIcEJlj z8(mrxCgGym4)*%@OGyI>YJ#`izU4$OG==7Sz5) zZc31I_i^gU=v%iV__YYYE52ZD(y|5{vBX5>AR7;$mHtS9%{f6u%2G;Z{H<>lsi6KC zB!!NXKpXXxWeIhL0?o!A-B&0mVu#G9ZbQGv7J&8&_n5&oAnW|L;B(@$? z?-k3{HlKBipMzclVc<0WIgNklZr?lV`eIR)3ibZz@qJ^>^!tZY*xy5gr|S~(0_Ez~$0Vyh8JpUD zKysYt_vcrH1%(gJi0fG<@6Xjc>ITGcscu&giMT_?+egp8vg%TI^77Exy#(CTLT)+C zOFe_g%-Kw(w~@OL@7p6jYd_6JKlGszKg{S!ev;zO)sq6isEra@e3prD4l(qt@$sH5 z)i&;O&BtgHwH=MKD;AAprqYt`K9!w(A5nuH6Tb7W-b?>UA+cZl`Wi0sg$jY+9Bgn~ zfQ#IJEK%wA3+ReNE^^@iev!ol&19UvD#`%Et^)5J?*-af4rFvu3-bKjp3~;YDwtq@ zX{$8ld)AL_$^(DH(%>6V@<(V&)N_Y~{_2B{1ismyM>C={UY+@%f73i5|8IhIoJ-&v z$jE(#?}mZ>oO;CL`*S_6+&AZ~HGBS^0oJLZdl`j2w*uCe=ahz-`Gnqn{$~#G`-8t1 zfiC+L2}VWW6uang+2i*g-%){i&VM2-Z{3kb03P{&jZ-fkQXzYznd|6Cbz}ic<|S1E zK*OdI3Y2$A)$wV?JFtkxkz+NW{r?PKGMA(Z>=&srx9;MS5KfIA{F807x?KZdU||I8 z*5-^%iEC5l`1>=1Pb^nuq{m)aI7Dbj%htd;Mx@9OD)!%euo#_V<^Gj5x2kPrs^Q*gUX8c3AFm=V$?h>chx1 z0Z(eTEiJCY>^m=UbeTbPq%iBj$>vNoCpq7yID5A+l zQhY!S@>DqM!E<#jfflrPWZ)AzV<2P}K^~AB|A~;}G%pPM+w_8%fhBC--WSVx0@1#o zPc!boX-|+{MFX+*lSes7VN1ZEBhC^MY8n+5)TEkWRsTd8+>X0Z$!B#nXZ2Q}a7kRQ zc8Ah9_rVMv76kI}L?a;wyBDbBO=K`%?M0d*X602sjE4Tqr~kTR1Wb}XruCtO-=$|^ z+xe#7+?Jz4$XxI4iL`CM+cvxqlpmT8AL1g+{^^D_PCgq2kY(pBt{pv?cadO^{Th>m z1>4+;y~5L>3xG8hf&#Bf^M1u6u*%t^Zd-WfRS&4+q9OOK^O8a1y;jiI7;u_^Bbw}+ntpECb08eJL9q9$3z0c zD45(=Y&7`gz!1cIu(!aP3@)0sk^}d23oH55z{a`_!nOeOWIQZ1q|A%TdqMw3XjfvX z=RE(fj3+UKY@_$=*AXLp;OE%#yPkh`9Vuhw&fNG-{PR78G`*xd|9cq=3-%RYSb-*) z+^4C%QYZlqYJl9AO86t>X85y(0E-b}KH4NB?9^QszCN@W)DUp)%`_18gjI}zcX2N8 z^1wn?-5B1G>tfQuZWXyKL~CGYZ-c=DSS54_6(?@z2Z89EGfoIK5Nv$pnn|~L4I_AJf{p|*g z^r{U;4i?s*Ja7V_J;ep4FETrM`6Kh-n~1^iSp8Q@4UkrU-9^Ih7j?E(yy;Js%Yqy; z=Y45^CN6jp7{WWS9C~5z_3Ltg;VVxlRPCMMzYF z=smLOp^8bH$WMW?XRmaW>LwB*NDl}viws<6YF0t(DWjofztOD@-^eTT%=|gIHO&83 zNp>NXKlS2pq@>M(ti>V3`0GOQ~vh^R4Bj6nAV4Z)o%{g$WoIu z$(uAAGPpk!eGFs+M8HAXZK})0&`XKD5{{^~tRe|>`g?>{n87ipb=cO(_gVYKLxl1o zu5l1HOXEy2uG~AVZd4MwXQ4@XjiGl-BnP%gkUt~3+d7$}a)U27k$wN()(3{!b5qU` ztH?ZHQ=A*r3nEk9;1=?-D9rfnAcoPXzQqAF{xx((YXSn1Ld6~2MwHkJ4*v@pY?F(0 zHb;MVHhx;d%zvPN#Ekkp#w?BFcW4nmfpj*XptHF|3mFeNXig;#reRK9)A&6Wdc5P# z3g(1Rx=K?=4*YIKW*#eOQX`O=qmJqh-m%Ap1DugKxP`|%hNXv;Pwv4{ZJG@S85 z5XKl3VQz%|=sdx=^mt3KF<11J97-)2!Vh6JVyI`IkZSfQCj90*6~D2kvw!7`|D7 z%Led8s#$CvL;zwjAy`mboaffn%7Ts43>qw*&zvp#3T1gA#BT9C>p?J6Zvs)kLq!=a zug=zH7mqNPK}SRdd*AS=vM2CM$e@DH5qRmFxoMGRo(q(g%%z%f7tWr-fI&$CqIv`v z(8QM{*P6Pi=Qn+CZb37K{n<5hAxWIL_5L9qBNvvy4;OAmIogLoK_NxtmDuLKmPT?1 zh%%gci-k6i{Pv>XJG5A$;vB-5=R()1c<-9zSYEIgu}^|4hF&39ImfuWx=@w0E+h5}DU&(;Bl9*B}4p zvl_C0oTFVe($5ZsA<14wavbdV)d2vby9(K{Cf=^wVv;vlSchl|J}w-A64os-mU5np zVga(TuFJsMb^yy7^R`Tr(tCMG*hgTi{EYXp>CB%`)ya4+I_PeH01-|yAT%Nn$z;fG zhhx$p1}_@m6T$RJm27SMx*zG|1A{nkIbSq}%At0#OgaOHg!?U4oHxy29fCpQ>er;b zm%aP_qjQ4v_=`uKjvN0pZ-0g*V#wsv^7obPlx-CF6S;+vE8FqqYU=6ZgKr$Ua9>Wo zsS)@CPVnu9s9q!lQJq)j+rj(03Z*Qr7$qvA(_`FwGppPNQ+?9AAN;-&zE;0qlECkm zlzjMKfNA&$b_fZ(``+)n!YaSt=9lTDVkknbl-g$Wq3`cDE~mfUP5S9GgiFn~2Sk1` zJZye4ot_>(q*hH-72CM^p>Oj3==um+*-oYXKgQ6YGll>}nFIeXzxm)c%vUTsbt*Ap zJBbidt11NTm~RX_lFXdpvEQvUxHg33ZoF~VPSSh$7M=7(=L~YEVT=s@uudD_D0&PU zF^TZNE&ufx{;FoRvyqWF_V)vqjI&XG13=nn2o9Ol$D<@tfIaQwYSGeXl<&#k|N0IL z1?6su1N`-5>|pAEJvvc;cm6*l9N)3szm6gI|HE_QKrFSo8M{esrj!{{J%*^N%gmvX#&-zkW0Ah+%*|o2-+UW_<)E=g5PhI_D!8d`sf(U$yj6KjhgnE+fxAP+U@j z&kp!I{8kImxFj~#@^>Mq7{P)BXpxyF;(G?XAJHlsbhq5{^B_L?5r;U_zl^mQ>Q$?g-T9? zB@eRy-VQjZo6PrT$__{=eiNMz4<~lx&k#s?U|`YK-=3UPk=Vk0zGJ_kv+SMWfzPyC zT&O3{(6`=?BEBPoFex6+O+;oERPcxwdH3ahze~`h(dAl@XLbOTPU*rx$NCa@@bp1_ zdsqUZxhVkvTybOe%wg9ANM?Zv8Wqq~EJ84ikxwvkso#%=lCTi4+IPLvJwLsCjUKe! zhw~=0QGEWt6PEvC+7I9rSTv10MdVTWxiJeS-HLbxt| zMbrX7)tDG{^Y#uIXvVKek`Lqy0wCjc|6AjyZcH^nyO^LogFpSaY;T=y=amtAb_GprNWq8S^UhmdddnCrG*CiO)zEaJ>0rmq=H!AM@ zE%~W<>t=4yO@y}5n-kx)aZAGJW&k@P<}r!6?3c|0Q<3FU4l0wUHip@J9y4_`9(D~e z@vvMUPfb==wGPel`7zH}g1uV+Vn)liQsxD*n)U5#{~%0+l{QJcf9=iTM0PVT^L#J0 z4fO%XLstupwphuw6wTGM4%X^BJ?}_4=uUo0Hnr+j&SJ_u)J4uxSRN+(pnNR5jHy$U zbv~mO)DMo*y8~Vw(%#!iY_oLBGH&~B-5+VB%R%)e(kBDl2iEhYy%9A~Hj-W=QoKnF zdCh0(M`QGk&H@tMhbtO9gg$Fl{THRb#8r$Z;ehnLG%Vm&!?)l3EzQZy3RY1i15iGF zsYT#u&`;otahkZ~I$#3^i%(&$CR={}-A^d19AIU>KtrOp?p`grmQ~W>xm>RofY?w8 z2df03gGC5uXzex6H2&N$&Fqt=t|f%XLp;tu9&oJ;E|hn_r~xVf&6FZ>z^hM<@a2$6 zze6yW0t}VRMYXfd5!_b<22zfMiJLiwusUZ^7=}?}fYntMv^Eps#yQ^{+db8nehdr< zf7`d^-oUec^6jT#04zlTbnK~TvxptQ@rwYCoi{3m9oxRj)gN3f=b?rPJ^mXM6bVhb zZaagYN_Y32-x^(EZ{RI8Y_z|x1Cs{=ccrkSYcjkVgRy5!B!w%<(2Pou$sKHy9r3p6 zfRZ_1!?@$A;jSKUsDdRP%zm9cq?>>SD1x5=MRp$y84fRAD22fk8G4Jj2N$;Y@a2CO z2|?Aa3f-gjb^n^6osWR;qjF+gt1r7me(5LWmIFQHd}}TYvL#(2AsCPsE-25l6fP`z z?f5m9U7rs<^t({|&oiZsM^MtO#Rww{Hj}&E9&{UL1%_fa4(EoF2HBeE6%F*}h?)ZL ztNNDoO-4m*RXaJOMD~@sk#DRngHITv$hUAD22AIX7c@pV6i92odvQ@S|8)rjC)mRR1q!S7#amK34Ng7u>0V? zjB>|!Yg8wnf<-Es(7JBW$3v0txrabV(C3x+qJctk=G%g;_pv1(d4v5?Mo%PczSccErab2zj{!e5 zVzPQOYntSoxL5k(m*KXmSN(UQ>?dzl$*VZCRji1koTc&r_Cu~l(; z=n9luk?)l(BVwtcsV(S~yTeGrAJx~+DH_ZQO*f7Ks%QkjF!vebROF*T_tYDwNO~^W z@2q^?75hak!PNoAEgqCTxh=<4Z|$xM)Vehp9e`{dP6iKNIqY(2B)UI_?#hEbcx@Ag zRdu{tw0qZ;(l9ExbrfDrh=hYzVJ_2k{R0dG4zAO+3Ypkj_}V*$=C@Fwf+F;4o_GHT zcloE30!sSwrlXS7M|ek&j-ZDZ3^8qSW|LlP8|GCoGqx$w4^bGyd_3Z1@k1+dvjuDW#6<18f;pp%G=! zwVI5v=m_Oy1$Ck<6NsON*>^QapxVab=D3jyzneOIlN>hy8i(fw zWA2gcPc#Tco|6SJrfJX>+Aifuo3c#05>5aXmy~IP;EsMc#_r%Q=bt=DzgW=-U9e#4 z0y1suig+Ce;u1@X;IH>xcJSG2gtY()HP3YGAfuyUCV%A@(ekLtCeB6YWUzzW8!F>o z(Og1}zi+inQO+O-dd}-^h*BZeR@ms{CB9!{C5~q> zRw5T(68yKN-9yL*ocO>mx)tnLdGH6a)PVdeKEM4gi3LYgpg;CYdYH}f#m=}WEQK+M z{x;nIK)`@vK$!(_?iI%L2^;+Fb*4p` z{jO573N%~KNzTU(tu-K`&;p&0&=6(`AMG2SsPJWMD905tb5rnOWsatp$M%w3i*Y6Lf}+y6FtE=a^n)m%A$Y z`+l7OmKTn#RR9&A^pwzhprkNQVGCd)7>$F2nk4ONDtG@kxjwq(855sPj7vX|xgkd` z!s-A!t(OKp5Iih@#PEW8D)_##`%IObtz3&B32+?M{)SYu2asaA;Yj9^QGgxUsZ{a8 zX=^8Hl+4jvIC*pOUC(^nn5hG;es{$cjTkY(AYHlq8DJU=i&jj`=$hzG7~Eb^QJ3r* z;bboW<&L2#82D$epbn9SX6c@$q!Uw?3o!IQLnGw0DfOS#Z`ValPxY4+$h;gDPYcTn zgbxVn*v2EXK?-XhewBu{e@YI=#=n#Mrct##&efZ~L>JR~%e<-pUOtnS-S%qjB1)4; zd%JP+N6K3N6}O*%<3Y3*(I1-zq6fOmA9&T@vA)8XXHTs5|7wEH{QosUe}YD-*3K=r z26C^@pn@V!U(GzIaKiJexsL=*GxNg?nkZu6Ojx2*E+cvZ3T6~h1H<)ov6&_p+LwI0 z!-)Rn1pmT4i;uK9eSyQ*L(y3@X`vf3pvd?v5%7Nr=(R8HrOBppPpl0C{LJjr+J;J* zTyAruG${8~EQ*bWMnRD$z9wi-YDGXALjPTLNv=?u>0O}S8Np7yhUANTwI8c4vg1B( zW(ox#bHa09%JsEP<6%Y5XBca)NdXc$g-b>Tnu*l@Z{5<1POW7U0w3QhD(4L&83vJv zl*-)4(leIz@mkV$fx5fEatyKr;zO%P#iyxF6-A}?0ig5_QQLSqJ_|d+FhqH~_+_8$ zN;(Z_q!{q#J&QY#X0w5-4Ks;}o$tk2ZnOU;e!ZO_e~iqPu-@V5HP`&W#bds^FBo0U ze-FE31&aDYppu@*2{oDp1gZ}v;&xmPxxRu9eNfug(Tt{QvcL>tsU$2?KHS-_QY`Bz z{7bA{_Ut(L;MpMRNTCPMH!N7*@p-$=7?m_z=)>yMaM1cyrA?&Rgt|6BkXjQ|?kX6l z)Qp2Ha8n!B8!lhmJjlQ<0;Y!tTF@6g@zU|#B4)hje|3c{pMV;IHhTB+zbOknRAfcA z8DoC@VyMEV+t8ClZK}AR)-8 zkFzDcIl~+S9*$dnyMh`na$b?^(A+m_P8PL98h=BImHEjE17v)4U9Zc6 z3O~>Se#zsglNth9=~dh8e+C~0o&5#{igxiLOvVdgZ-@U#zVqu!;jFgOkt(yKZT2V4 z$`Hmagp30ivBGWS+u&QQc#I4~S2=o?Dr%bO;;O9c{bL>}?7=me>W<#*aQtaT!mnFi z{C~aWnj!DcPy#6%{jNC58=}*5o)Qo(oeMjH#M3MU8m>At-wR}dvr2$4Zke&u^zcegyvAt`~>*~YRCgXi;x3)8)w z_gt-^3Ax(eD)MVE3nsn8S(r=R1o}Y=;u#;f^WbfZ#xPc%Ra%Y@8j_LAGJ8N|pfof~ zEwlXW1M=cSFg%gu3S(4fVahAuVUxbn;-p;X9%%wsB?oa@8zv+xX@KN1t0$)01v;jp zT67l&Bjg(>gj!Ys8RPEMh}DRs(dBUNBkK;Sm}=dJmn*iW4@wWxduI^cD~aAa>_r|b z78{pi`rLq@Xi$hvj8s_p&JLfCQj^d`TuJrJi6O8 z6t1L=2T!HsFR-UDi(z5>G|Z}m%WR*A1}Od0BR`*YN#gSOH`6!9tOpmzF%-9*`aS)8)%-cY@@|GlV#z@4}D{0y(3)4c^6BxNI1 z)j916xsDwFY_2Zd3L$0Gs}4_pQ#Q9Fys0`BxXj9_`gP!EZYK}m?G1FF8CKN%n*CH= znr~NT3-Gluk({Vd^cxJmr@_us7>?-G7Gl%XSd6y7N{EFHZ#BzJ145Xp2DJWzgn`CrXrI?xr z?00}~iESfWPr#70r_{Yu>lUR{aL^OnY+W@PFOK89-3`aH!Wb}zu`aFDntHCFFK4v} zA%A%mGa(hpLZfYb=L3VOB~+U;`$ngBCWpDmKyc$vrWELoGweuz9U*%R(?S-7?^| zvU1M1d#q{gsas7f_)M&)&YDn4P!Aq&u;w&=^UGFuW1Mm5$(?nw#O}(2a346DdBxC# z4AN`upiE0uUhNt;K6FnGXD=XGr?B*8{4rfNryUtR1&~ms-kswQ=?<4j=F;w^t$+m$ z%u95yWQfjA8j%?zkMy6Sf-^!8IOy@7<27yB3;pf1Jgzkw?IarOeUPUEAha@AFuZA7~g0%PE_N#g{5k^W#S4{7MzwplGyEkZ#o1fyv57=!V`brT& z+OGshp9~-8+T%HKZ;M{MAeWx&zmW6H?BZ@KfxfM1F8||{N3mS0u|ds`9^SdZ8Ohl% zoE8y5cJeyhZGC8rOnpmrstaoblAx zsv&!6b-yy=-EYF(xU*CFQ6s>&3r5knPsr58A6&6lI*g zSZua_dGJO$w#@zIdE%;WvftEblfI1D<_~E$uepPHM16Qh%M*&{#CKJ`-H*;*vR=Bm zo8w6ozs54j-TR=U$wtf-66Vk&ZLJ%X9B!eo4n^hPc&=tNhCSfe_9j+m}A4%KC@#;VE6{yESR~8w0aloTD5A z4wT6mrw#FWnBiW@aPxo+1dkB^eY$oCq`OUH79{VN^Ri;w7Mtfc217`x?Q1HWCj0j9 z8Is2wHGh{km^m`D^y56v|J2Khk5wr`i%ZT%Bo|EE{lSY53e4pEP&=OzJLwl67kn+< zpp&ZZ65U42@)jqH&TsGG5YBA>dtKbC-4C7E{cd!9{0c^X+0Tddy)*~r^h8M^RIePc z)xuW!mc!e7WG1Q0wY_&?W=Nr5c*3&iyZ61&A8fYr3XHq2Pis|Ikjh*_y&rzd- zyJ~<^+%cMbeD2ZFqZ<2me^h5VdiLI<-O?u#H@|rHolmlVLq2xN^VORI&n7v~)&bY~ z*Uqndd(#to3(l9VPTyyo>#TusfY<$nlKPCv6Z^U}YTEvD(F17HVueIxm^-ji>1aRTB;zH_lYlpX`X+VQo)aTJL^W zB8|&(51AiN`AEMx5)kr{?qL{RSXnfm=Dqsda=v2)lroLRoa6D!?KxL$4jQs+-C1gT z0&o>JU1MG9i^9+p=45_$WK3(Dh4~Xn+N{_;%y7eln?^XpwZ)rWcj_fR56S^oGvggk zv#5fG;&b;OwD~UfA5W@@sBqXMY2-bNy6OUyf+*SmZ(&MCYUsN9#g@H2Pt!MQVu$3b z!oNgw?X5bfKE5mSRHT_)lwA-JR2SV+FG#ZHitq89#A3 z*(8!5T=_BL)iz!VtwekYDhjQa=<)BuDAGg2)8SI&rbU$Ib>qU6K3Kdc#p{2xiydLm z!hI?-u!h;MT_ZPFd3zT8D7k|d5g9}&Tbi(qGGKfzT8mZ!KgWFT5ULqAv~0f3td~_Ab6rNiLE2~UMREJ@tIC1$>F{oJ-3{^ zsZwPAWE!h4dNt&rNWfp4kN&l3yW(F=yC44k-+e%*C95dw+{x3B+ZHOS-;%$T5^T}A zId9!tVVcFEn~!}`Urkq?%vhuH{@OcJlMJz3X}Z6cvrprDRuHoq&*1hV*)P_+%96B> zx~uOL%1laJuI*6W$&PK1#VkMM}8>j|G#_n~j6&nCtsPYQWIANTT{ zweUunb1x=ccR}M;lz*b5Zb~|!WE@tRj9Z$etH>*-H5@#^-0$Tg5Aa@omfxP_+S3yl zRe3M|`Ep&B*_8zC9};Jih07^k<1!olw$v}>DV!R24r*Z|xakek z3Wt_n3Q;C8BGNfit8&cIRT|aww7fV?WL@^XSPZ3>Gf8ZSayZ+ygvf(A)}%0=5vIL% zd^n_!(#Cp-R7i^y&2Bdmx|UQ@%$>WgK4XPt383|hbV;DC?EegG9;MnF{ z{_C_3j!Npp!zH`iG}Thxl7GuMrBq<%&N-2CB%5S1i&3-$h_i$3-FfC(riDA{I zx88i+BlJyGN^>b^6iHe$Yi?f;SO!v<=4ttb!YlUE3%qW{656F!EDrce8tNl6ioRp! z`uiOYc)p@@3uPS!!uNbsDMzq!N+`cMA1AuVZ;cTUng{;?vBR zgWk;^>+-mYWg2^K$>17YUBG)6g^P(i624!=|KG1-)I&$BULFpIA*#d|4fAbTYPDXz za2mRc?#A-v>h1TI;(O1UNe~+oG7+d@oqvyBrB14@7oLkfQUrROFCB%0l*9)7i@4@6 z)z3z2xQ3e|7FAVHyDdi5ypEas&z#u$)9EQW)te#4T($2B%gGv5&*k@wy}c`sc0X~p zb4z?6q0v5~;H={VGK|{#<*h3D`|KH!qV{(Z=L2%oj_3Jvd--Ww2CO7T$N%elc zt}GuJ%QvQv*+I<0+D>N3X}?S(%{7yhv&A;$H|Sd=5BZkJ_FVlqO5(X&P`Z%Ytk57* zDY_y-ibcJz?DgZB5AGt!l6md(6PLx&o;BwSbTc*7jqPvQ`JxX@vN(uu*vaFE9`rRI zseGoyRxtz2K^=NLvqPup52O*sm;iI)%G3v|FlTij-5M@2|Prn54PgoH(&_kz~c3+HzI_kX-fA$$)gR zaTc9|zSt;Hq1K|gLzS}MdDN$@D@6Hhx#w5xT6c<&R~fc})$Txh3=TMM%OKIacof+E z{cZ8?x%Uje?Hx1cVxsLv&J$)%LI#IqBoZlPF4ne!z}<}Z9-7+U!!hs_xADR$ne&|2 zR=htWI`AI1JP4fX76e4ii1P%UZj&N&((D}P>+$+IrrYr)%HDl6?*|oj9@wg0*GL7e zaq-3#`%*DqbUfuk-rP*=#7T-U=U{?rndF|q)H#t|tD|;a8d(`X(J9sX6hR+GHoYbq zU$ZK2%Vdm4t=y|ROMOttSa-?fOjb&bj#?A5L!IPI(Nc%JbROq5Vt3rkwPFQ7YL|0^ zD7l$H6?ZJ;_>X+_PioQ@U@ARk<+I&^j54Db`_Z=KJS8QF28s`7jVMP)oxa9SOrGnA zyHs#Ws}dDmQ&~)Oax#)qdD6*hyODBNKqul=%}@fZUA}zJY067m%_=Qa+_63Wh5;;{ ztG5HxMh4HIim5|&(fAUhxZ-7E^$E<5fW+IQ1*)sJ^1GAe>5M2mp?!tP*wEzD-iKGe zPLWmt$Ui2WF+$iJz?Ina?SKcaDx5H#vQIl+_$$KqR#_R1t zZj|98ej&2Yg_A$zTrk?B(H7|QK7dy~75Qiti3_?JrQg^w-8)$$Jxa>6u&oWc4~gZ8 z!>N+?z1O4uGJfZlsmnCmzGA^mP_?bqicx=gE3?^838)`N!#J&V=d7H;+o+j<0qc0kLI@!@)dJvj}+Ke^$o;%)VIHLeRXM*8l|ebQ`eVT zG9GU%Et=tA_y$K>8P|+Vth^&taqafF`HG=vm_Su)o_F8u=$?(ZTRFLPocq$lvS>kl ziFn_kutJHYz-oQhB)pK9>J^gz`k`waPbe<1;$fTa(EZ%%dM~PAOeD8E?{u)M?dig? z^`Q;MknN|DV(n*1L#(@)JoX}|R<_x(P}n|VB{Yh}wL-}QFS$=irJR)t=?nau&{2v- zsUt>bA4~_l9^f4Md3ntsvxo3e-+>{u2H$}B*ctUpsU=4#Z9b|h`r_!8cT$%qZ`3qJ zOZvR{6l6EXrL3DVY2Evc_R`1`!Q!2!0=sey80E}Gx3lLOe*jH%XKZ^Cy6STwWmuNv zV)xc(CUK816UIX(WufWv}F!<{FNHTewLDbE`EwS%apyi6ii^bfd*JhmTLl{9``8M{goD<8~MY21Lc8%s7<(Bj0CnI7K z=W)q;p06%|FoTZaRKm=ZROL%8CU%I_ay`>eI%V&|azvy47RKCKuDt6TD{7-ctB_%w zUfF`6lhFA!4SzOatgown!igVwa~%yQzrS95TiEfpikv{f+^7NOXe_ZowrAqPB93}> za{@8EkiKdZx5K_gRA*!QL->k{ov|tU)85L!pN2H?Q@MI_hEXmBQ^L2s1akGbw4_E~ zc9e{Pr=p9RJ*=OF>lq zVJwX)0u`>XbgIh#U1Xf8;v8wWt?RW+xFdARl8I7SyR9Q$th=GUZB=I>d2=&Ait=8J zbj=q{>@%h!($nnJ%!PB|9jhZpP=(pp3){uis>d+%Tq2@vYec^JdEv7Cma4+WnbrEX z1-At9(U)#<5sxLJalFPmT!Sap?5EXuH8zr2eZq2ch1+81QM}#WCS<+4d-oYh&AEx# zot}{*`wZ)lRt+^p1&w1ngna`^ndVZsSD9;hEIFyQi(RGEoXL*O1dr@*U3@-aj+e|l zfu9oHp#F`+n|5@%s;biAhM9e%syM1Du~1aje@PI-n;U6kV7V&WNz6<%lc1WWwVk>) zGoyQ9X2|-cJQ@G18tHQ-XsIF9$l}5CM$G@x6d*xZ;P9q_-31Dp30P&bOux^Z?J*Da zYK(^-Wp((wtZ%YgZJ}pXwbkfrC5x%qPiBc!qS9IQwVtodU}rp&V`uh8rj%HO%Mo7k zwWy`fb-|(KbkWBw@A2k6i$ifTJqkGFG+Elx9Htb1=9dusThv!wIV`8ljT?|SX#A?92s^D7*K zO5^8Wq|E_)E_>M;`(miOTklkM{5SR2_YN;xezJ2quIYf#N>0e{p+@KY+^$l-?neSA z=knWCKD10!WFKW-FE&#y_ZlTyw2-+`XVUUwLtoC&6FO(98rwcgTF+eb67ERQ=zH{d z{TS@#s23`uzoa(Qe^&7B=xdK2K|=C#x)q!EXHgyUoQ*neq(_*7Ck7fd?@%ct3~*-k z(0R8M(4{S#UUoS3bH1Ib-zu~1_5MZK&4l$5Rm$V|Vzu$sT-mBhwHl7*C78gcbUD$6 z6xxujm^`gjy>;M^lp(g)Lhn9DPt@FO%HdnPzDc^xDBiiXV_l!cfe~%DkgpOmkGgZ6 zoLR9i`CMW40?Byfb!5(KCbHTH&`nC-i*dxv0Lr<>saxY_b4YUJs5aZCSV~I{zf(b{ zQS4_YcdSP=53@x<9Ra`HN+Ziyy$`c`Y}i4B7rIE-W!KRm9c-c0FIQ}kuv4|tXkkXknCg5?pbJp z))?p677Axt5w*MTaW$fyAR3z4KthS8_|DF-;+4!Y-EI@rcQ#v>xicX}pJ(f$sx6#avb@D*^<)3q?!w6q zIR2LF_97q?Gus`giV<&#oZUuVPIO|*JA5VwU*dkXfg3~(#%!C9^II`njx$pwd$PB2 zyBCzbsMhD*q|UF`<3L<<w!i5O0Zbrr09ZWJ}hOxK3PnukvBn>TMPS8Epx$%!}1`u=X#Nj5$D z9XM|HJv_@SHkEcKxUJp5yq#d_z0nF-d$d4R$K`kju{L#CE9_6~j-Zokk@)(`AHR>Q z{qN&?2(2#nuE%KV#Z}_w0=b{$fv4!0YW;q%U{Xx#LM&mZl>1%me4Bcrn}NEnZ9Hq6 zMt(e*-C(kRGMbZk|HBr}HxtP&okSzjIfK}As-X5zKU+=Pln!QiI9(~4- z9L~0a>C4Ue60d=PglkoRQX#`A7kA~1@uwzq5>{qm?USAgK~4A4c$HTiK1#bxJZM6? zcD+Wldl}xitO?cB|Lo84yQtY$OK~_(ku@n^H0xVW4(~$)o;ytN6uksrl)lKY(fBE! z=|cCXXtB+2t#i;b$}Gpyk4h0qL2GPQ@YXj^IN#y1D>P~ z^~=(xZfp&7qfbr&)eN@<~ zo}NWHU-U7yQVGW!0$pkMl>3IF`J~gwFF%N9GTyPOv?;f*6qd`~n@^?5A7?b9*7NUm zR`e<3+WQ)TRTwK(v;KFqTEeqJCZ&t>`wecHm))*Z8SY(1R#AVEL^B6!457ZhQZ!U3j9U>Mnz&dY zwUOu^C6Lk~%M~BH4$PUGU))wATiQe7bGm_6v8%PGusp*yDzNFN^p$ zv2&Ex%zN~WV`>|gQ4+c0{s}3O@T14^-;%hiURWy8PI_G*-=?taJ5-hHI{9F0HK-BZcP}XAxnn z@XtsYGv-MmMhYvB8W2)cvS@#rkGJuf_l+`B+QH>GyB#EH17z{{$=Q$JPO2wpJdo=T zaf?&<5xN~BcIUz)J@M$&>P1)qO4B!~XAD;`Z#-BykC^g3`AN)szj_v?(Ws@@*73bK z&Jc#^=|Uy5*rl1y8PQ?-uWA!pLOzcFRc(fF;{FFj;ROQj)!ydinrD%^DV9OnF~R35 z!X{oLW}7gCqg^hReiL1-|BiX!QqiIyLdJ+Ttbb-xuNASyH0qP^8^pkA$x!v%gN=)q z`uJbeeonlptaNZ_g)Lr*CEr^*fNOtG)7{5IBt)W#>?1$}tLGrS2K_6-ZLnnDLp3hn zp^_NYYklHNL7gIgE#OfLCQP&&3y;K4k3PwZcVFyc!;uZ;VuC{=y|BcxC~qN+<&ru( zHsON49|~W;IMrtD+L+YxIfd6?V=cIqyzQy9{Mvf&cz;3D+_(8V-=Z^TkyWYX3$!*C zyHrA^Ve^h=yHqp;4amXN-__)Cbb2=ix4D3J!7#C9ZyRwhD*RPZygQy39aT*%dVBiA z8t{pYr8mlf7?7L-B_|Tz76$(#yaBSb0^x0)Kg!K9R@`3SPFkfYKc;mFg5l`ANVIzK z*Mv4&jgt5;1ZM8)&b_7j%nqHKIlO&N1t_yr6WLm#D?h3{62K{+DuM7u*Jl?eb(a#g zOJT=JZ))%AQEkazDY)z*pH(hIxw?!PorSnn0?zIOL2&=l+jgVPgxK|kFWfKt*Mt-C z`YlQLVl^bR;iNuge>xROsZz{}Hu@}X(Mk*!AI3+FRh-cz%^AMy{wLu=W-zfu^R^CG za9G?7N7-tS;QUoB+FUZDq zEjw}W#MQI*@1KNWxbm6pcG~A%W`8PgxrKNix34F|)G#e?QT^zoLc3}K@teZOXO_E% zGTzpe*tS}~JXxdB%-PWG>Lr*XPqM{Ei2CZEzhxt6z_$2##f!_M{ky9H%Fb=C!3x`D zjm;DV=gbqlISYzEL8D1^wmVNdyg67y|BZAN1A@@C!n{*PV$k}P-X|c$3Bi7CHef>9VO?5+j6ux z7BWjvYa^V=^D(V6_)?G+JfGL_lO&AnDyLt|!;RjW%;D_V%mPnHZ*0->o{M;TRmtA9 za?2fsq`GeRxKBCtn0N&V>8+=vHV(i%m|{4;jN;KYrr|dq-i!%;*g_jy(wFI&!b`kP zh)VhBg5U0dwAQ;#Y^;58o8PamphP(g%J`2?L~bSwdq{gd7du=yE8l`30%{#0QpTi$ zb@8Mj9~M{#Rmc|DneAS2=$uW%t^M92X?-lsC)4&bt$Q6Y%GD#3lU&y6YDyQU;_-5hAxH`oKZ8lJr;=*zC z(3RA&=lc5ivTa*^o+SmTR@NII(X#yk++=~s%sE)7SSh@t_YL~tQuxoZzK(u8IEYq^ z=!mU?z=O4hVLU7j=*VTXaSl(g#}K#TQ&XcxGURfzy0i`7=1E+geIHb{{`U<--`2a4 z+yYPYoD5Y%>^+tp+(zpgnwsY<4g2T`d`!(vB*&k8S>jb!sYTlq#yHc_a%goN(cY6c z8!sWwn~x*eWq0?ID>rgw;gE2YP$h9~9IL0H5tdJ~A8pRL7q&oItVI&1bO0^?DJmiB zlXZ!d>)46bGWs-HPBY4uQ96B+=S|tk%3SQBAT>wW%TF$BN}?%G*g=8%=mxf_nEwZG zjWh~J;k9KOg(!$lSZ1^bFft1(7v|~26)^=Xsg{Jwqp8<@sbEj`%uG92Q92V{r~LLh zQh-CL<}p?T2hh3~CtyDqaYYYrX1vdrT7SR%iwFxoa($Pa&t0KhTH3CSLx%JHAi!vh zk5hxN1NB;c^Q_ACjplPLuL{tb9pWQN;qrYr&qD|es}$LYy>BlUWr-&S=}xVRUkZn_vR%*8T52FO-z|1c+sw1qGWz7=YNNK%Ywrq` zA~UXaVhv>zw0_JEnFH*sIak(7_c=DqoX_&3{vTnSUy0QmNvz^m#l$ayp6BwpU!R*5_R9Ij;d^U0?jjY;WKYLgzPxZL z)O0cyqutAHK=rjuKF8xV`$1>*#M>9kAH7*X@5CfIYhe9ms^Rp{aD&L#C-e1BjtIEC zDk<-IrTDpt6&q@fW3bK>ItdVBIMb;J^vH|H@uSoAZV6HFp{1B9W|1jE(ta|eCqum@ z)GqA{UbY_;a;^%$7+agykFhM6^%4q3N8XCrLa-axtdgy2fzk-d()AygdJ+mEiAArn z{SkG&rLJ^cc8@hyoYu6Lzy_peA*wuV6S1$JsyOq#O8EWVj!PdQtRX5nm);DkUrBDz z+YXqJRPwod*WMcru@#^|E+knNbX&I?3W;e-huQ7nNZ*CM+&}jz`jtS7YAz*J-MA{y zo*R3&_(go8$d-hdqKXN?QAzxMS z4gDaq0}U_8Ww25j9zD8nH_zwueiP8hHFi2l=U5z|r$^UfHEl)7Y7eGeJ09rY1)!t} z8U6OXn-BK3SnnHHOkUZ`Y;Te=q%5LI+I>##XjRSy$lfzTjJZ{FXpkwo$9Dc3?(0cGwkeNpKC9!)M{_2k4@t21Hi^DqQFtz-o`o;gE}l@xO1rZ2S#&j#bBFpX zc4nayQ7_%jZKRZN=Pzs4?5;OFWa!VrleIiqn=4AZez4Sq=jquyI~sg!67ShQx>1!S zHXM#Bs(Vhf8mOCjGG8C9ALV6O(tOvOiy>=Rb34-Y^V!CieHpSttAk6WwCdycpzj?( zkmiiYx3kL~uT-}mA!Y_8c$dA*e(_bn!Ea-?#^}}E!ra5J)NZ%|=j!AXbK^C(y0swb zQwXnL9Uqp|b|^!3L(&qRIqI97%!)nIY!!Kxwn|N3EL$>!;a(@VJcoHvq-(7tQTjQ5LQ9r?Bj>K5^+wKey2vocoW3Mg!MZ8&YQ7 z(e6+lbhld0oh<&jC7n5QFstOcTOgBmOUYQF=^ROkIY@tK!FdCgM>PvA046Eo-RliU zQzpo`D&xtaz5CaO0u@sA@)yR&u|RD2|5*F$u&TE2eE?NN1r!xfP*FOhQRx;z5Tv9# zln!Z>6pK<2B$O6JB&AbArIBtn8>LG?y5WwcY>!7i=l8wObMGJLz$1I@HRl|2jyd8T z?;v#hAzU)8*`ZvVBb(d-ghZ4I>C6*lMoPzOgef~U+2QQ&F0*Y0jnwWO61-;6R1)mezl1_aCc&Ej0g`LQ(TsxgF4 zD(PG<$|JM24)+X)dHTF=h7v#{3Ze3~Ugt~e`ZS>FWpnsHj4pMVn67BeYgC`1E;*gb zg^JO^n@0yTDAsj8lZv3b)FDX1BvCpu^k_x)(?x}%e&TMabCdb}TI7@N56X16@S5>&SwI67Yyg*1R>U8!@h>SX(kzMe4&@(r6eGSz11P`Q0Lw zOM;Im%O@#`0|Zoy4#uLg*AKQx;w%8}XQ)>AZ-;H-wN z5{JGwH&{63Z*{X!J$tNDTgS9=WJ9!z$gSJvyNl0sgnQu)gJNwi;!@5Q2ic-_2brkT z-1mMQI$Yp?N@(^_YJomwaqP0A%vKob=GuY5Wryvlpm$qgJh`Yr-{v2}5XEG5{-J9o zwa8vAP8Hbds6s(FS_IINyqt}&vdsiPE759H%T=2E_M6fn%i^7RT8koaIn#$rp*FS% zS^?Af`-)v+%@P@%Wqo|7aD;dzYSuL|$Fj`S&s*%m3=~P`$&d)1v<2*P3PQGvP{X3} zx%=dMjXlTr`zdOUDCyAn+Cw!(#I%3Cn%h!`G|JRd^5{S(P-NUyrIEJ*dZaLPg^!=V zf_U=z!0{%e`crmgczbh9nBLciEVzJtYh1#I$MPN9KdHYl|KAAuPvkn0O@QXL9xJv`ONqpSUfaVYMUf^y$J#p9X zQRhr{%$9|#yFYgI#l&$+ueQ_Tp?YWtda5(h36xq$EM|N}7+}*qcD84H=%cLzAFH5F zZ2;R{f}hY@ly^!F<>Akho}cLCJc_Q*vRUz-yN7z{wKXJ5)SQ`BD?grZz8- z_y`RR-HLMPv+$I%c zq@G^6t=wYzl;hA@>Rk0QVLLt{(EWm_?}`8$+2sD}%>S_{s!MAqJ!Z7AP~SODSe^96 z`?^`Rlti5}L9f|Mb?5WUHv|r}-W!6N6Yt^^f_)BrVXLxZb>p;;c^I8v&LB*G9!>(Y zP=wiO6FE9SqU(AwQ;eG0~p9G<&(*}1kteDGBTukzCle*)@jH1j4`hlqT{2JeOt zxRvHJ%Ph|OX+k-?^jx?E^<0pq(JRBjA(1KJ+{^VPK=x||ghv;qO97TlXkOLoAADc< zTHwoCanYxDOIw{Aom!;1wvE))M!Jh+?6NyKwm%U0ctuuSBrFc?v|to-%XN6t864=% zui#iPnw0Z4?52ff>G-O?8&*=yK&1oAW8G8h5&aDHFxQ`qthS56|A|RO&5w*kwbSt}Nuy9$a|P z$G%B^g#oE|&+4Tb&=HWE(vTQ)+7Y@>+xdalqMIG{bW5_}Wx?$0LJ_++>8Bi~Q*y;` zdwvL(=&D0XGfYv(hdGTU3 zPIiNMhhbpNyno@2kBh7{ zf2To(*s>wnvMVBB$+@fj zP_~k}W%f1o552lH^wJTI?~tX++dAN{3F^yGz++mFll?TnmX5KGwl(sl+G|dMj0S zjWIvyNE$g)6OhrWGDv@riDS7FSWVmjz#0e5VhZ-%!v(B@gG!DSv4JN&bpxy!Bi;-I1xj)>)GE3LkGx7Ujud^ z<(7K2y^6=?-HYC2M>RUTXBkKv#>`&mx+hrEz_^12bgGDi1Ow5h1JG9LMm_HkjEDRZ*7a$B) z%ezztL8dwyrgHZ}ceT(i#zO03&kff!qL>TT9ctHOQ>iv=avvGKLOZ<<`Id9(_On$e ziDm8x@j9rPd!?(WG-NENXsbYyeULkf9ov=?@c;@>hAUD+Ue}H>FQM{_%lx}!VvB~l z>GyYfN;X1`)8B8*PI{5exC27LVe7o}_OR7f?Ip`J50-;nB8FMEGco)rj~!T|IE5$I zM}X!vDM{1i{wpxG3GO(#jvri-&Fv4!#|;xdO^p<$i@+xw~99nB12 z$0*HOuIVajb6P(eTqewtYVIldT~jK|Vdtt!T-!X8ti`5_Q?6P@uH)#^#?y*VhLF;J z{PsuxjZV(MTt%(TZ3(tSRC?aBC5SSO|3%B)Da|`-86n{%P5e7v`U5|*x!lOG&i@@@ z_7sv6;V%1r+;h4ZEq)TUxJYq>s3$yDc|(ICk5eHH*Cg$H?=nh%PH@^Td%mZHa;-z? z3~?T%Yt5Uc_S!Wp$q*!IZV<*yqJH9f-**{l5_Ojee`Zy>)+BeqyUk$pL;8(Y0lib-A;z9EjmW78+aua;`NGV1ch*K+c-yA_coe0^{quQv1gj0FLR2$xmIhHl3ORe zayA(u_or*eMVDfG-M{K@O+FNI>F$d9=_Qx#ddbmwk{p_TvC5|5BhFM=%F&KC_W)mZ zc&;io`W`}Cn|xtk$AO6&-{e!;^HaP|p_jz>M>R3x($KogM=rPIKq)EaH_#t0Zrn zFAA+K@^9nS#cHStcN2XYhOF781w(}rfmD)uwlf%_oL??YYfh6b?Ns$9OVsu`uI!Wt{cMaiTiO{8DUFbYr;Jibd zV9mU_EFQX~7{GnLDKNKHjXhQ9Oau~FbzctuwB4&1*Lh9*ws!va6=MHD9p^6Ruf3L~ zGu246AyXfJM|_CcGp%>B?D?IM8WL*VFOz`td>>vvw30ORQTUKUKz5K{DNcwUi5pAm z&H2?z!Lp`-BQ2*cliW>2q*?%Ul;KNh5dGe2pByirXk%^YyJJsfd*JY~?`>Oe?J&q{ z43aJ5F2N7BITmK&qAO`67PoZu-3ZM)zu&tU>A0YGOX4j%_na)|dpc_`ucs=Yk z-K?7aRc^hoDMqj!!f28@$MK@me6WT#>I<(})XgM6~*V_@(?ri)> zdhXjHkL`WJyaRM6n#V?$CbA`Y2tDv-+hVruC$gW&ykpIlbPJM?dFdw>$4VHCcnh6r zCCj~5Kw;eQCA(m{nRgY!6VHSnpTU(v-;`zauvAN-z2-c1fE8vK?n4?#G<8%&dQIc?ltDik@WK9 z427HbEw%OWa=UKU>P$(8d#tj?_vsh=%c4T0`}kZ0)tz`fc&udd=Mux-@spn+Ch#WH z2yH|?Jbc_SQ2vsRal6a*`;f%ZN?`J<6F*)vWH*M#lzPG=StZ6Oy~A{iKz+qlgLLA> zYlI{S9Gx3!Jwx{43mo~AuB?d*W;D004W8%wRUDaH)3oyzlHLMKm&!=1zAw;8bN<2| zXexNISn~dK97#sx=tnl!fg>#1VP@x(=RBbLW@xNDX1zUe@K(NhwIx~Kh4?MpllL0g zY*CYE)*2JtCxx7aw=T~y8ux8T7nAV0xJO5CSJ7GZrjT~Ey(wxX%h-~dxj(Iv%p|8k zIOR%K@_}?mT-U({IyTr9&9rZJzgUy%x)G?>YTDnmG;-EFb-@03JiXzPePbzD!`h2+ z(ocDuDci)^A}bc2j9x5`NiH?ho(Jq(L5;7wDUZM4vKoo{BQ{H>Sciw5ag~W#?()M0UFu7~QrMud`Cg#d)Jr!%7na}gZ8UB8A zJ95yADMZ^6aCT;ox{vRPahVWdITEQgx#)ST>QYr3(`#!vPu-xaHmg=0Z_O)c}jhIm>7aaVHSap8^E z8{4&tjCtIBYYB2J1L*?p4ea5TzsfB{(y?!R*MCerkq}Om-*C5MvZ=(Qxc$vDUvp)X z?1Onj$8@SK3p%~qmx+2burBdY1SUO-CwwY(Ut`)ZI?AcPM_l*44`~rt(FNnuOlg)J zxmO_m@Xk|2H+&(x$lOg#FGBp?$5jqaTLPBuejrYfy^|E~Cv;#axdd%sI=2@FLzjFC z_eexWrz>Rfrg#FMD}U3bUb40-fanyKBPPrQF&+Btmg`v|7q#m>7%rMnYi`-dgy6gC(w4U@D$@S3oQf_v{o+GB%|#3Ne3dofl0!W7m3e zrN+*$S4CPV75?VK@l&}s5g8fJv!-yUhP5W#C_6lnzY+p-Td&%ti;j3c-(?@Ys{(kTHb zKLY$ZQxebVmltpKfo{?<6U0Ih;_bypAJ|XS!I<He@Ws~Y zGPJMN=iF<daF+18zR%`N#S zepW%nb|XhJO-#?6CVcAa`gi=9P-Ckv2|)q}^VHHB3?;)gYM__J2oS%gik%157lp)t z1`7c0P%RV5(<1ZgQbMp1&`;ZWtbQi%3pY_fxm^De1cseEl9olQSK;pMjBQi8e>xA z;P1!It0ZN_HEqC3(-=Eqx?o}{$I|0Ip(tIz6d-XTu==$8p`VLIgZr}kvjG7}t#g&I zSJr()ePt8Xv{dlv8lYa)Hr8^V6gR!+#Y)K)BzxRx*e=)#l{i%%x$Vw?Ed6I@Gk&%+aQyt*4LZRUbgme(NPBENg;VrcaYj!#iwX;QcFZ!Zv{N*64&Lz=er zPOS{2Do@r?UrZ~+?^@@vuvx97{{i$_g^=uPL}>Pr@a+uHJvl6lHrN5Vge~x)+dxf5 z?nZA{>8J|yGa7VyLJUs5pP(nzdx`n8=P=6S={erkhHy?M0!@x1c~DsuxdD6>Q6`{T zu#a*5RUao3i-#PvSGxy|@kGzALcA}6Iw7Km;<_q+byvB%g(7P2A*cmxAh3rzJxz6QUAVBCs1Bu|I4j9#|a*>eI?t@JC zI1p@e<^$hg#U0dmLxf5pP*WBm(54xnPWS1Ni5B3sdeJ?Bgbe-GkoH-HoJrZ)ZD^p| zm;p9+N2IyMR7Zs5U`8EdN&!$O_e}#6k8<^mTRT9}5JD!;2I1hjIV1dzhh|GMU+87R zhx z|GKHvT#>zxLy^Hrxcd9x`2O6&7c*cBr+L1s$U*p>ksePeZ-EUu8~xgg?Cg>AG51Da zAyAY&!nepe&j1OyZIkehYg}Z`GYB`gY(WUy3Aj(xduMoLMYVmjTqc1eg)krJ+rtcF z1YPp@7rtCZLo3DlEH_gEuXDEE{E)bmHW<#~cvf|NGc|J+=qXi=uJfdQhkN|X7VJ8Q zl=r31y))s1(2;G@9k>CCp&3huQX)@{!4GeOiI_VDVAS1uovOSY^4_&_Yj$V*&Ig9M zc}TfTk-04@prU?|1Nn;DVwCsozto5b5e_VvxaRJHVcqq`c<*qTQe;e@`mTfleczWT zuOK6ST;3`9?e{Td#1hvD7_KZ|_lD!!0{tV*gqTrhE=WBn_`eP-eIR%c=pTu}NBf>O6<q|%LG(h-Y*!KMS z^B?=GxPW_boQ5Y<9PJ$0YY+631z=$2MO)Zt;Oau#jTL|_cHjkxYvShT_nM_-y&*ij zGfXecP+6~$$^0YPLrNHm6k1E!<)<={wqnkk+W$xVpfyyDyjhQR+^~XH6jp6A+z!y;#tE3iRkb>= z(?<={|DFDhHAPk!Z+C^c@P!ZlJ`V<9?Q0&5oWxuY7pURdLV5nznc;do4uml1L!=hZ z8FfO&=RpVBA^dvkvbf|P>uoVbK1B4P(~4r8p3@L*A7%wkt*j4lGrUFeNTZ!VJ(T4} zn6?ed@PeP}BkTes!7?c%9t|4G#$^keumw(ki@TE1-Kn&ERw9?}L6S0S&}n97(Ex4g zIq%S}FauOn_DDzs4LIn&3r>r6WzA)R&=oOwTk3AahksbQ-zImZrMiGi!&J zcgnrVGd+s*ZK)d6m~OjjE?5r4MfYQZ*)dn)?pA$E0LBet#;)tdv3U{$K@jo7rO2?n zXr26Uw=y>NDcq+Phru%N#JF%GDZl)H1ISf=GLF!Dd(30oxF`CK#|CN!D32U~*Pj7p zOaKPfjIr>>D^hN&Nb!(OfTZ}?(W>F2=^`u!t%@ez`ijb_vmmGPQ>yGIE%ky1LQO8J zj3`d?P*Pso+i|kELQre5)GcaZAdxYL_|5=_LZ^M0ulpDseMjocS&i??rbs6C7E>W8 zvR98|Y^u)8yu9VlfbxJIivOe#4+YuYN} z`-W()24%nsEP?AX3B64_a#AaH6%uBXnVbRJBUyCUO=BTF#`plUHnm_zah+kW#B5Oc z5Lhf`%Kuy}XfU^ZJ?FyfceA&B?B|DTt-3Sx-R+Hd8mLTZJ=2(@bs)oLI3g1xgl2*E zB8GGNVxhDs=w4K93H-qN^LL8@w3OO{*d0E$2pQ@MaHh}>YGh_w4~Qa$AaCY;R7LG^ z!Nup~A!ix~cYstf7}rPHJGCnvqC18oNZChbZeP}UK?vq`OFiEJHBcZ2X->flA6mqM%; zp{bj6{oq-G?ndbGz>@vtlHo`x;1jIUayuFQ?ie8yqtm4TNDEb-gDQ}YX&}EdoHHc= zK+;pBEkV8G^~Fs{<(4t)4B0~8z|czRmr-Gc{mh=z6Nxc1{zB{HSw%j(^a~jM-c0&a zx3m26QFny*xC8&ZI~)RD|^G!|5?#;Ny1! z0&|+d97=*Y0B#V@Ns!&U-IMu3Qhw|j)R%OcJ7^sx7y8iHDtYX$dyW@bi`Lz>;PMho zz+~o{oe?eE6nPe7`$-bAXL?mq{ny2f@9mDj3cz7Arcw!&LJA1Qv~r(u=qILrId+aG zgLw*=Zofg7r8i=m0Ck*!u4BE(2bfc`;L7`;=`x*LpT*6XAwxv1Nee7<4!|g;r;Vjw zulc?V63#_HoJ*61^!S>AOwaWSWmExl&a6*88E}@f4yuG$0v#YwnDk%-bB=s+o>>6W zs8YH5$gy2%Vs^FyVy#VQ?z*(WEpg{Dhk^sKR^qX1f9t?_WY3TMKjdFGpNySUI^|=X z1w!9{_}n+vlEs^4QceJ=DgXnFg+R6bTDWo#^z|x+X2fyKF7a3>;L7^+FL#6>N0N%Ki^kzKj85C#_)tSBmdjZE34pHTFumY^rl$bbDNCJX z^RZtCx_nR?tkY}zJktWpy)fEO`Im$3i8$CI95SajzzXBVy)$KFI`T+VhKa4gYzAok zzqv2QldA`XZy}BacgJISz*Zs3Dh!pvKgtgvU=E0> zW&kU45dxzi=o&wSg}R0tME);1n;b+GI_s`NPdkk}iO~bGrx3sR!6gTbMvzB>Xw3Nk zrO-74#(~AKk5HO3r}6_b6W?#&TR~JGsJN_gr5&I(vq3m|r@oeXOm1sXe}zD#(%b^@b&rKY z-y{t`h6HKSd|+$pbDOb?VM-v-wOK|R3oV>kS!|QrP{8*h=;nI*9P3}(+>;oIxqn4&)=d|nI}cVL&E=Q} z|H{b;w*I<;gV$09m&_Y}FzM+BYz1eg^S?S~Wsnv@8%rjATGi^$2r)aIowHsf*Nh>q z`BR7=QeO!Ax2;lwoM*E~yI~TY^HhwO!%86mO55(ybp?zhGa!Yy^I<(lCenlXB_j?}*3+A` z+~={gH3LqCJ<_{ToPHC+LLF{EY<;7mqc{_q0~azVzNN|98e#LWfu>BisM33@plvt$ z8mXmHXlhDIJ^pF-*pqD1CFZG_*Gy4Z0Aegc!kCuX`k<^n<1qZ;mBFtj`}b>ayvojS z^)Z8Vg88E#nCr>J38s#6!!{NECFR&HBsAgt;6J?v3TMQ81omhSf(_lBCEXdI_LqWt z<(-6{3xHY+iDY>w6^5Y&@+{IBlor(RPc;N-JQnF?B)9~2Y6){`50++%{PCNJ+ve1* z@1?F(8-}A|(0xrS{K7iyRwoH^H+$_$9lHk3nc}ZoR#Z(0b7)0}ligUO1LT<&;s-Ot zD$JyHjzRd6hT-tPRTv$j!jA2#u!eK{>z-h=atNZrg18(oJLCB^(A;@YnDAekdl8zL zIU)pTRLhrR)}Fcm4}n&=30GhVz)@FUXe+3D+Q759fezXwPdT@jtj=VMRXxr9dg zfdPnVfRB%FbDSrVE<mFG1)*ay-xBBtzqtB`oX@pf#QUtrcC{-=6u1=(vu1~_{0B}a_6~Kzn48jT?UI9ao zlc1{x(dpxXB>!HJQ^|N((JVjt5rK#=5ZFk#S3GFLzIm z0c7Ba6sqLBo?t7AT4Zb2$s8rRunq|oIDMBTrQQ`m>?p)XBp*!D73hmxGTCrJkV9b5 zn;NW>p*l10M;P_PPpMQQO-qsn6{Z)o!QQ65JQEw*hA3Ta~xXt&D=h(LTr@X2WsVH;?jzA z&L)^C2(0x}0PB&Ibj!S+D)1%TeB44(cS<1`X$8z0a&Nk-eqN`AV`M+|x|{wZ{}9AV z0W*xR@6;XY44Fz%8qBTnbH4KISVe&bji2U8xIL#m%VH+pHjQ68ITQIX%s$26SBaYk zBcJu7o~ns#_FgeNSCWL|Luaf>wmhh%Vf9`LxjmWESeMz}0yz=LN%y&n=GzMtKPu_v<7lK`z)OdJeDLqZO5vl_HI z@34XZG()aTXmEuwbX8=^c1>}WV)Dy_K#a$A>2Ezf(jo2ve8ZR=stB`XuLeBi;ecwb~3UA*iu7JSVt0{=+?wc|OJ zUOEhogvbWbDq6;_5B*{^gfFX__;a6*=5Yqg6-T|!@4PKrlgFhSPV-rKYdY7~J-I05 zgNu1VGcmT2D|>xI)B+r`3UAsz?}5s~opR?lEXSzN;{NgiO}s@~x~TCT8tb2ycRmax z-h98a&b9)`c4kY}bM*UDm|vCxNOuMym^`Ka&HZ-omK*U7M5LLUXNHPMhRH?g~!hvF zTALE@T2!)FEnP3s{bkVuqm8qyBLWi})O}-}r`39ArbUczpBq9u318rhhjfJ1z8m@O8!Loa{A2HRvS7z-1Jj9oqRxr@Fyzu|tTxEp?nb-?b`2jBrdcAm0i zBeeKJS_{m;eboyaaBfIyG>duZm;; z`)&|HHg+2`vavgM4m5^O^uOv!(EGlOvU-VHtFO#7GoJp0G1X(;vABhJnzY@=syipc zNP7lYd9!$8Zsn25cx(uE(Q^LSDWsJ+7I4(`Xs*ofcw6twaxd~m2l6kG+LKo#7FA~7 zjf62+lF-85zCeA2e^>vl5t)Q7NsMNUiWC`Sh``{kY z$bYH)ptYX0_{%i7>PgBv0PM7yEm@I8bK8x6>HcIf*|_7U6b=u4MXrXBppfYo4k+QR z2{|jUpjc6=ETTg!epSE{`aYB+bLaknW7df=?nTh1bcArO7hIkvk8pmg=%4AB`^Y|h z3;Xm`w~oWCmecfVVS>V0UiTvV zEnXKxC-q}e!n4@F6i3Zt5&krZ`EhnE6yJq9-?Y?ZA;?6&DpXN7Jmmx}ZHuALJ57;7 z$5%nA@;pWLG7A z1Sxi{#8PEj$n}KqZ~pjvt3Dr;bx4p(AN|y4q_9CiC&d5MDJrH92%QIFTL!ArVWDnt ze8)sbe)(H?Vu+6!^q54|bNaAd5qiIIqxBJ?&i(?HF4FJ*y$N@Z7vtIEw4D4UCXoT$ z$a|l?tTtanB{&{C*Zfu6wmrzIR=<)ct?qtxj_GXS%y>-U>(5UL62i{uNBom3rt~~R z{>9dA>8COIaUePi7D=A|I+Wil7Q%F}8hDouL#y9D00J3Y83zoCVx})FR)-*jn`QtqU=2BVBwg`Fv%vZ_ zDS$AU>q4G8j5qi8kB3>ShFWwGFHKMB?i6j*sg~Z%+)j-$>jLH9$MpUK;lhjiP7v1j zTXS2^j&bN%4^g&K`9Maq10T1A#>oa+iIZWP(CVNM8v2|iXEN}`q-1veUZ`_}Xr)`` zk3q(aQxAoKr%HP*ARw)Z!K#VHt8mteQ|tbQvo&uc9AYb}QxkGM#tN zW^B2KywT>evHMF3L4yjyz{kbP>V z?B0~`LPYq}CCsKZRrs*;)NgxZ6xbhrd8`ql_zoRmYsAvD;aQ&VnVC=>Wm2>C{=+uw zeIgQG;E3)xRH(Q@sz@b(V*OdF-L)a2s;mZ1GHv%C}4Q&|EGY{3X(S5uN zgfeYix?!4**SlI;0=?M=4^0J9`Qusc=9JoHW7$$A%w{d`Nnz)W-0;SDv45_qEV8Ct zN7!btG&AqIN`-BVgk)n#7P&lUOnRd)8>dGgDpUN`@VS||rJAh30>>Y*o+aon;Zw$fH!m|1?q&<{uExGk zlW@i(_Oz6%4LtrYpZ(`odJ=-;&iLsQP-F}Q)+|4rG6L~-^uK<%UMM4cxjU`wnwO^C zgWP#2QYA#GEOtX3y?!FPY60!i=R7S-DkL$fi$8~lq&CgGka*$tAae$%o=2%i zbn$5aJ*z9XLk?k(4~$oSi4~y zzaMVwi751Xjeyfi8F7J=k)z=y!^0SzRwAKS7-_lxea)}~2w`J`uf=^+iE<%}jA1yVdDmAIc_5;z z8$iPsk?C2^b>L_V#tOi=5TH-nW#I;_)8B>FMUD}%u~^HMerVp>c*eR?~1I~qXozx`)`;>OY>W#xZGZmQn z5L|!aGlQ7e=O0)1yH~IkLq6{2dK!4H@5McnQM_j_c3G-N_Tfjhg9Ba*K8hwhlr-=0DJddzc~8nU}vRZofceh^OrF?U|> zhSWddlj38t|9_3>_e4+BK(6@ztF7Z0{RRUE`_C^M*M|6+OPFx(-l45yBNM&6JJF2_ z`8M1=OZ(5L5cy=N0Ys@-?davGz3=4+gZ-iGw>!~M|NRMj-}u*J4@4A_(dg&|%it^9UFOVS=TK^$=`NQySjF^e?-48msXZU}78u>L=6+ki5%9~1i=P>>gGSI&}(P`3K zGHib>(jSWB=db010E?a;WUpH!*z;a!y$9-#b9bT<0yroChtmFMT@aBzCxv5G`1%dS z9?>H?Td`a!yHIwgl*3UV|N{`Ur0ulZ{;mCd*1l(k?r$T zf^SpIJh5vo{;42D1QEAQV0WTZlu{c1=a==LxUytq`Tx#0`wHko+Q-=4r9 ze|A7XV3#lm_ah8G|Kry9_d+@c!l}}EB-ioBxBU9mvlPLaPX_ExwDeN%A_f=vUuSgD z4YtR|_UX2(du7mu$l%oO&@9*4<@fKA`d7?&c96l|74f?aGVuK^gEgGjvwOz> z??J|vAToGA7ni%|gPx*@49*(wZno!FCF1wK@vo!~{UHkPZR|Bj9R+K@}gx6NZ@5XLz1 zQwFhU9Olh&>aN__dX{mG(_0ROkG07Q1B&9y0*bU3g}c%`6i)p1Qns-AR*MV<-nf!rXf^Z#R8yh5%h?AD!VdL6@w(%Gi5{m0Og=_vhSa>o_M1m&gJdbrOsQ}M9DMtHFXE3gX4< z|6HBs4G_}u_UUGNqzDFdsPUd+p zY5^U|yI`Pi`E}~NAjaYq5k)~jpn&q>;r)LpcaaX#FgiB17bJmI6*w(23iwheYTdlNhV}SK+pU~G5JLCQ@PG!>2z^FolQ(S~V9{c2e_#vr- zEs~R#l%Bx2psJ`yl=(Vow6s6og?rK{*s|m2Cw4#h^GB>5RV+5Dv1it~)Av4AEY1b2 zzkY~_66oDz!7~&lGVf7-U@1!3^=bD@G;sF6L#44W1V|QnN?Dp5nK&AM6P5c|Bd(J1 z_m_xBQ_1U=HJ-3c!8Kb_L371kVt!lOP@BiNwEsGX8fM792gG*)ODbbJ_IyfW!rqY} z!^@$=x{)j%_1)NjrC2J%vVp9nXm>P&S7LKgA4o~pAINsBuX|&$kSLYwdBXG&W=_ql z1oyKn3n_b5!pkV8TBuPMdW{Z>n-^gI{(I{7BG|!tsQvuPa@Y5a@8`QrEU{lI`8oGA zRcJVd=%IQ`;+7bGO}N5WMB7a;O*|VvH!fr{wVg38}d3j zraPOB7X$^LIWK>I-GuG_&E(}jpZO_;=KTcn4=fLVR}!TCN4me43WgG!i-=|tBbq^% zgWU#9ZM{W&$tfxM9!sMS*)w#E2WvJJ6|qlx%H9_tiA&cmlJ82_W~e+NqNsS)!!#+$ zk!gsPPL}1gXI-)xBP;8i%=1^Tt_V8cCdg2Nr;z;wZ}e<@xX=AoNT`Z@$UWH~*&jZf zn>30hYRCL}_fs&SThy?b{Ly93j(_c<-K`~(LQ(!kL#fK0t}`nI8&zR{P-sy_wd>{a zlH6;inRFK)n=!zH&94b0<>V5-eBr38kyMR)mzY@pR8;%=^;h)4M->X0MuzQG2XCCd zboL*Sv0`VOINXG{v^4ce_FNMl`q`(F@V{0w7RO!-5vS+UWoRW#u4ow==_u$uiasB#yN0aPL+Ms+QDCXU3l8JbM-}N zXsDDr`cjf+{?U}XOhbGT9%+>IdPhv&JJe5J6ST{k{xJEh?bbg69e2g~#yuS#WSg9d z<6%|9g^kj>nxp*B+G9lE?{lBfg5RiYQhj%`{e4o?eD}RDQ()M5rB)^1Y zWF<5xg{2mjuKg6tFr2dLah~feq2~=!sW{Eg+~@B#yvE6)$3APKebw59w9_vw?Xr*A zlv&ahnnRQ{sOQ%e74iAYUZ~H7-^{jt&Gl4kY!F%>SsE67|A`f5PiO)BVq9?$~m(NX0OS8N5&;(WDLY!c~ zbc0>i*MEdTLs3-u+9@p!DJj*u`uf+4IUHwF5yAbQffi1Cd*@9X8=rg88fCtwZWeEQ z&Qp{b`!l7H{r^6TDY|zZ=`1XlMV_=KCMJ@$Mo?C#kCoh0HZ2yT>}D+F;_BlJp=uT) z@RrRQ;pMe=7o8>+(~LLL1ikHhsqKbUzzlYlr39DoNN>)f(pB$TPQTN9hd&ljue^_r z!0hklvpDy)G;@qtIyyO6)kI5@Z==qRo1g#d0E$c@dcLz#(|pg?`nALOXt09HD|ZiO zjkd0)s)Tu0DF1El3!QJ@a$2ESL(FKCRMty4#e5gub_`Weln)7COvsJ7X`h;FsVjcg z>5}YvLYAerWqcfqZytRIR_=Qi20Mm_@xD)=+ShT{l6C;7~ za-464$u$>xR8-&VrsiyGWvVf7C%POKvJ?E{Qu`^cSUXDOuIl*eocGu4FzUsRg5tX7 zcdsKik=p_PNoiabY)yt-+@iAw#Vr*2D-}{Xvx2H<)|hJU5cTG89VSmkeZqkT6Df2z zBX!r83F)e1^BV#OKbS7@1mdiw^weD(A2W`#e5jC98CpHndB>?Q+xU3hWaz7!LwTm^ zhS$!2s5oJf;HiaT5>`Ewq>GsoqB7|yGm1E54M-yd(g%zv55ZheZO_A2!V+(&T3%ZiMQCOaW}P_8nT9l zr?@DCsTFTteWan1YM4b;EO~nPcxBJdT#UvMDvD|W)gk-^GP@%G!8#jnyLqz1j~#*+ zd9k>3Tc6IC-Rc(Z$9JvH{*B-6Zo03$bNa=$Wox6P8kq)!2F&aI(DvGl+!HJM=UlYO&3dynUut-w8^R zYWA`W`KQ}4IM{bx?bPRAZhfbEwrCga|E;K6|8|IB?wuCSJ%TjYfFwz3p;zk=yP|=|c^ccJq=$4_LhOrW<4h=ETCGQqFRjKE_oP zDudl#$PLue$Gkz9cA|G6$Gmtz>M4%U>bHmXfLZQohjOt4z?DE-71{El67?Qhkz7P- z5;<gNHc4<8!rE$Gl#~%#W zjO5UF9aXNY8y77^`>dZX9Vvg1&XIaYwbx!xwXPN~(BxjvO7)L4O zZxQ#C5U1Om3XG06B3^o@_vpw;!QU6t?mgLi22_NlkymX17Db0>1MwkSOY`LJe&Aif zKG=TuXhj~BcXX%D5ne3p2ar<86MWJtFO?pr#qpNLOy>s#&#_U!L=p{Hkq6%@WS;sY z>dq0k0toN*<0B-!y#SatD*|5q2(@fFRctEi4A(W=$**N)ObRhVZp>FKTjIny?*7nI z)shf+$?wG403a&f+|5l_K0s_o4pB4#($`GLUWK=1sOnif!Y{;YB_roT)h*hx0eEie z6E}h1g?_Rn-b?wwL3JAd1W`SO{uQ9|{B;x`SlTW=%tLfs@3H;YzZ(c^(RQz|)K12o zD9lSyki)jYuMj`of0^Ah^eZoR;p=m+1-`518=m-f@mXun@vt3{WVfASzV*#9hs7=O z=}2)>`bQD?-$hYYeIeTdR2|}ZOa8hioAhl|5_-bD_XVgoR#k6Xc|WbUp?IvS+SSY# zU-hIgS=F7Dq>Z(&H4BfCg~cg1*VyWKU0t31;|Wj9?TXl#J|eNdY#fJE9q|%eOXF55 z@kx8EFp*+0FmW2jxsr5TDo?4N@V}CQ^h>c!N=q^*hQh06fFzym>iGc6%>l*N5!_@{ zZ&U5JH`h39hb8H8%<`Zv-`c-}=&48+fkh^o$J;$2;Wf$r7xZ#(;h+kxV)KdIkdeVh z$~>cM0W@WXTXp_h4wl>i5H-9B5L)+MUz!?wM}L^k>a{l@pB@7;=$s~Bo&b;=aN(cj z@K#CF5HE3Av3jlXBBdMQM7M`hslu~M@a5_wCaSSK`zW2DBAJ*{j6~-yB{VPVIMu0f zc-m9X?;r(8?kv>TnG@)HijH5bK1Xy>Mv}~Uc6Gim z%Fpdz1+ecj$DdsOJW}>mH7&kSx4jK@WHLfk;c&g6QT@qYT9^a*{K;J6f&O z(HD*relh#=ipn{GRcW;0TjwjKI#F_D{sIe2$N#*~CRB6kU0beOV?q zHy4^dH$JHiJhwkV=si-ylmpFtyT6SRS}4>24A%U*Er2D%Xipj%B~v{ju`3(8P2C;$ zj3V5VN63C$z6b!Jrcl|wWbW*R`w(vtisYx)?*HI1ZGCgh&dQzMcJ!5TD{%6pJ}|!S z)03Tp{~hYRGHxAF266>-=)Rsq4JK`jTR6a2^-D)*Xd~5Y2;4UPE^T+wH|z7?`dy(W zh3Cj2tw5o}MD((6pt}DnJzZ(6dbSb4%zF!pnFQLqOgCw}FBMr%F@DUmoRhmf9X0)m zBCy+3J&JVyw)WAJ-N#jN^JZ)m41!PAKaDdWp;kWMpePBUBl8XC0&~o z=?=+FBOxFl-6h@9-QA$lpoG$}X^?J^PU)1Al$3XIpYJ(OobRuUAp7jp0O^70j?eZ zUM(zrGZ>N{XqZv2+Pe9C`-mH|{r`HBor5Rv2hf+Z7j)f4LgvB8b~+bqp*_dacA;FTwLo4pTrisTh$f! z^%Viz_W;duy}hWnL1h?EcUHmir&-;r6ZO5pQBP)OV%i1T?VM;I42>;dVj1{Ee`K1F z2aU3L6U0h8egf`7_e{}4aDFI?cFq+SNCP3&X?V6|-e2-3Y>Ao}Y8B-8YeWQ-Z9Xs# zZAxw$!RODWI>pk5-%6M}H{cPIL~7|%j!lG8`5zBg6wDt_scNShlb1?M=XzTZz4UTr zid|-!j@y^p5Vkh{2b42cE+#i;SX);%V%MWYZ`RN5yS{ z8ECes$uM98p0%$A5oHG8>tSyczIXvlW(7q5H5#tSL5CoMcn@4`ilhLzBs@kQE4VwU zr5(yvN=u5}9&j%Jyx6tyX#Abn9WUtWOf@1yMw_0QnRqO7dGtx?C;nM-iPuOmC(uqv z%gYt^JT+SY;@&;bZk0PV)onN1V?Dlv3jXr(D8>M?>)V?gy;S_+ptuQ2kSO$0RUGG+-TlRP1WsSyl=?~2C3912FQ@ z*v0W!UM?L8PWQ%u<}m4SD4+-FX1)HVo}yOb-(<0;a#K#XqXX?KEsHs zL${3=x@S@n-^=uv-<)d665mSN)0Sw4br~A27^T`K7QJ_klx!0B^wjk9 zN#NN*y-!S^y$U9H2a-!TZ>~1t5BZ$Hb4CA{Oaa64vr9D|N@p*!%=Qdxd-&_E)9O{% z^Tf?`|0E`6?ioh_tz0Z9C1AFz>_DysiDdKcqaV1#<6iz^)cpxC{OiXUjBg8DIs6XU z!BkVSzyxWtU>r_?y0dP1!i4V+)UUO+^IW={Y|RJjor(K!qpwA;3pEy{+Mkkebzjm? zpv2;iGV4?FgQPcVySD3XKJ*O)kG$937r)GhL_p04BQvvt1=iU{fk-cM?_p;s2B3ND zYsY2C#z9QG95~1Cv~HxM3C96WhB~yIAg_H4%L3&ukMI5SLtqWE7;=R75f*qr!XTU@ zj(LG|4gz@i7yU93L`n180pyk4A@_}{f~KaXo`WA}@f7&kn7@{zx#`F7$Gj6eYi8iE zpz>IS>%2T6c6f|@o!wrjv)Kba8`xFuH_)&#@B)uYD7akzd14>JXED9W?_`_Mb6UAv z@N53eg)sqlb~YxqLM;|_9>Ld{Yqr?#@Be}R$I8^-?VwzoXintdCjMtitzm-4z*Jo$ zg;U%2wQ^_{b(^&~Z{2hQ=w5Aia;vMk7FU3qAgo?%)(NNM61Z>+_9ly(C)S^AJ-XhE z*Ah`-0@%aR8SUa`y>8K>poJ}K_c3rH0lq-n$%*sPHKln-FpJqsyPX9*k%1tE=XDX4 z>eEpXp+)~@m0@2kl_&6=Fe8-mhHcfSN9+b&b6bLNP7QH6-baECTq_%3R*8Bla<20B zv3_V@EjcxjxQ{p(TvRC?aa0>yz)T7$>=JIVo@N5W$6vFm@4}1C?`ff^8Q|im39&f@ z7k$z7*^bcGl4BRv)4=z@N*Lm)v;d5iQosxUISWAr8}m$PJ0A2m#$^z*obeDmUwr_P z7e9dR-Dbygcy{+q3j0Z#o1EA--*AmqOR(5*jnxK-C&6^fymOx`v);IM(nnQ4XT03g65!G;3{TEk-;oi4qaJ4{u>D%EXqRfqf1HT% zJxJiMoc#;VgH4SwIkGY7aVFzp@LA_Y@GW+);8CE5@*)5L2o-jlue;AE2KP|M5CyRL z+R|4SddVYp)XtfEm40+4dEAa4Ya^cVXFEBHF*7n&y72YJ`?&X<{RLKPvz@G?pUsQc z#O<`3ExkJ%Zl7-SevMT0QDDFc{;8s3>*Zu_@uF88+lkOu(f_ z=`D*{bQZSnbcfnsWplK<-)E0NkUBLPlWwzjD;5({q_c!-%U6k^M42OVE%;)){+-p* zCtNDN?TvDAZW*b7zIuM#@Y_bzKK*bp%-j4V{*U_gS>nmG0z6 z$;zHSXPoC1joTk98G@(KBq+c74kC_EEs7I=;xdmKUcXX30+Wjcn&FEVFO(Z{c7P$x zNe&GFYDVDFn!>ASS^PrF4tofTqDFNCcs@sxDP=1^hBi(>0$I=xhqk=s^pzn?0_tHl zz}feJxp6!75P>Y^`rxp(BAcv$G$3gJ2`XdJY370;oKEXf3Ja%cfu^-5py7h^m7M*- zI{lo?&|_ZDLfdH<`C%iN4_H-+ucWvs`I8xx@*i&7A8NLOig$T&K0JaIQm`cCAEXRs zblXgn)mcwA3<9%7jJV3+$cS4$_)d;q>yghhO~tT*Y7Taicp&z0jr|y&%!gK<17P1c zcvKHQA}rWhcc;7YwoI9E{kNfj)>$|2GC;loin$LYuK*8%DW8sa?{7ofH_I2zs6 zG^<9Or!PY)=ZXY1*(y4HF6QKE{_6xNlxO0i`+9xPVU#(t)CvRj+5lkyi;j^PH5U^0 zYQx41kTYo>@f(jOHD$bn4e{O>Gbp029>27-^iy1XbE6aZeq}#X+6XXWz(E{?h|&JVOrKm0!TU9WeiW6hCs zHc;S5wo`fPjN;e7PAYLkS%bgF&VUNbIQVQ{H#f*GPkx$=Kh;Kzrgrr23L>pA9_{pn z!^m(P>)~DF9X@VYz9$re*tLv(UiLa~0Ow&x^pTBX{eYyLS_>-Qp7`$M zZ@>z*oR|!1_|W(|5yQqb6&1YmQjlJ=y?%sNLw<3#y-!=(Uq*FjJD>h07c7=ti5-dz z8Q-%v2Ukbd5wKU)Zl+TcvcfawB3zh$q89Cc8mddfW~M6&mNJhla5ee0)j!>0ysh9w zT$Q=dsl`6D#aMJd_y#MU(b%btx-HJl>n51V3@#JZ`H^|>@!#NMR~>crJ;V>Y`g2i{ zDwN$n5JkD5{D02Z|Kiz*9(fKEf5aD)r+uvQm7z?e_;z$8^^qEU%#CzJP~*5r6KE)x zHs%TB{dFR<0J~_DWatpc9ZaT4h%;<+p~P{AI+{sRsJsk~BFBG)gPbZZ#w$OH@-_(# zDi9?l2OszjSl$q6%nMxr<0!4w3JDEN*`mh@GCKv1w?PJNlTF~e{)OHJCr1Ezc`E1( zpIN|<>Ewr~i$fBa`VGI(xcKM5;aF9V;1u5H0l!#ta>r%4>mRZzOex5rB8*>N(NL-H z_Qp{#Ctu%qF6CAj`E@@Z-IaBfT53LSvmtmRNTC<=24~Q$z2KO`Kx@8%9u-!N_51dM zcN<@Gc$N+D1JLk+BV$91o|N ztFo0y@))JbRT+kUn_aw^p$lI!s5%H|%jl?3Qg=jlv?2?ZPl2&~LvMo|Wy`tvh_6Q+ zGBh?8{{o}8vtc2f|&e=UHMqH((WiT zqPi8)WJ$7W=m}wL|1H|lI2kE{Elf&uC2u(Hw_?Cq21wl{ZX~!z@3D+5^3k-m7kd@N z>X*LyM=rs52T@peeV(%)#F91Zc$5i(V^aO^V{&+axad`|&}_10{hPPslPc4zDzi=; zr?i3EJLSN5PA<$H!iNp9K7lO!-0To)NFNLOmQ(Wr^7&1Wcy;UV1fE~! zDYl&Dp19=kPVV^n6=^#%0e!xvj1hHtwB1g^xAW3bUXtP!Jg=%UzT8|MnG<^R+fkdE zW<_3TXv}8xl-lTrUOqClFJ6>a&b$Onf(>=q#lP1}1Cioc*S!tS=80dOuCE(YJ|rHY zPa^!!f%hLS5G?iqqZ`tOMl+j_>UYA9217;ZNE5R?Q+O9rpT-+*6wMce_@o+1PHaw% zu={zVn=mZoPw5=z3;65S9n2nGO+$n=0y+wY-5@xzH(Pdt^+$9?u7PGiP}^DBIT`Po>m3|aJmQ}8*&^$AKsUM0oL zloOD&M^nVePC}~dZu-^z$RVLWmq8FP(YMCuAi}chm#v~YeD>Tr88VHYhDIuVA=f*C zjf82dMC3(!loCN2N(OmWf$jYrU=PgA%q0JMj(nk|$s|K++Zt;VjkU24Qe>*=<^arH zN?KQCM~w#4!&i!Cx#;S2Hht^G9?$#oNITjKkG~9C8*aB2xa;VhivwA6PKC7|XFJX(3`kk z3*L{I(QnN=XY6zqJL(w6-8W$_D2QKmHv% zlrSoXF&?df5#cI$H;NA$s+V+z8_yy5u)gr!t;dCK;=7|=?(4kY(YFD!Er`0sf~KO0 zVK!KZcEg6mAd}vT7QD`v9VXOqgU<8EvTT>I=t&uuJU%P}8)DRBEuJTOmHnHK`Ge+< zIU9#=I@;QMIQt;61@|o}kH@o)|H=Z;Bl9`#5`fM_9F1@>4Y?cAKy_F<=m-ctScj_^ zg4}f&%-r z3zBCFYvR~@#u{2-d^sf7O#E$4md@{Par;!WX%T+sqS3i}G0$T>hwn4u z8w_##!o1OM+`b3Eor5TdaS)O>tU9WUqa1L;v(u=aR`542JRtLS4m&+E%JC$;v8Hx7 zp*jX@ccpm)ZcUI@ljW6y0&nzNwZk*6>5m1lc`L0sA#rmf_g`oPAV7^)Mpi}SgeXV@iG<`$Xh#G zjC}u-KmF$t`=R{Ej^e+3g=c%c#bvn@uWt~|2ch*>P|RGTguad? z$KlLuEM-LI#lYdIVM~i^-smUBuO^xx^Fa&6gs$Icgj3L7oDdlevzz4|ge{l{zF~b*1aJ zFw>a}w6eo5kVKNHtu?Ej&+;hR#y%OZo9}Lczg4VMyK|Cv{4tK?*5kA7s z(5;GXY6%wxYrwRWUZgeBo;y0FQ3q6%M)Gd?3P5VW3vMIIyGp%e7bGk+OK6K@J*S zc80FgO8Que!0Q+rqx2*^wKob_lVN*mcWOg*8EnZ3J`mT}9gl*^AWVxEdgss16G3LNw(3<6V??Ey5?>*^?LlY@Z zA5>=1-G!WWhULG`vG^j5is>>hAy`H7<~!tU&cl75=J=0A6AMxONX9&&c;?O! zIYG!yw@s2F>IHj3U@I~_=@i7+V4b+=9>kVq+QsF0-GCf&=M?%yi4zRu2?q5|x~eFA ziTmVKeq3Z466OGj)IT7XLSdI(VM9^dgH8xEiH6z`90_eg1~d<1O9(AzB0i!ePS(9* zcJkXS&O}6AMz!jRoO%k#^T0yH;Fsqh54cqxKA!VI-2CY_eZ_zc{0VVIQa~ifJ7I?& z$;!|ho`_tInAemW)jn7B<0yqZ$9$8{aZ#y8pNjt~^K{H8{P<&n>&k)#+r?lcmM~9{ z_fkAa-$2${*GZ>yW$(}&tuJ1TUyW(3C?dR9#GOC5y@h%@%)?9ERejlMwov%b^#5mFs+J@x$0n0Bz?eoAj z)*2YNKlJVQ1tH^nm~Z*jJMNg|!Z-Mv4SCJEbptuX(4ZK8%ayUGX00vsmXz$-C)!wf zE^enh}laiq}aA2@r%KcxU`c2|u5pO7$?5;Q24#ZhPow(GzaQ9oZhTwq zR>Ek7$;mh5GJJ%?Gkqs3Ox2TDv_=)>5|!JPE(9* z@+KbsOXL>FCvBu*T&3}MM`m-=wdiR3T3+ctNckkH#~sO>EC7P+>E@b#YhWl!HA8Io z&{odBPii5=s8UP94xXU(0bx6}N*A;#@@gSO%*Dfl*!Pfaan5H<=}Nrv4Ckf0PTCMb z5b8WeDWUqz?$1X$#K5}%=lMb%b{L!(xY7ifhC|mGzb3t+xp*pcuxi^iD%Dsy%mBgb z&}6C)!r9Rn**F#VlAFNf-?|yV;fd$57hsqB`b~{x zda8vEns5=c@T3|d7tB#h5|wRRCS9ptZ5CN@vqRysMCcA(?&6yyZdO_K-N|}SPOnl- z8KgeIP!S1Yfr7>TqRfOHJo^+1g~^}yZGt2@MdBSlO@KDl@XAX|^J8K#pO<77&?L@e zV)iEgbBqXmX*GP!Kha*(cM$GzI0R+-4zbqH*$TH095z7Hkgln~5smdPYE? zYG7~`E-k+xsSU9_xm;5jHqU2Yf7~g^!V&e^0+xE&lWHf-X3VBxtXiUuE?bs1&TDAR zlu-_Wn~!nKWbtg9wA_%T`#Y6fTmu`u3?tPa0>Rfq*H|Z2IJQp98J13lQ2nIuFN@RvHvZyl1Yvd0HHpkovhLkZBC5rc80-N}eCwZULq#i@x z!H4UTPv(XZ9M`tk(G&i{B z@J!wMn$ZgTd!5?6>V$aPimsn^ zvQYw$K1j{xBq)1|NFeI6G}yJUC1|DEiX_zSQje|Z$?wi7l}cyanG!7e`FdZoWo%dL zEU@YUhG4nWt#n}IPj1mGgCe z)0kn=j8xFa(~QabZ z(MgZXA?mUZuoh!+>DiC7ED{b0tUBj!A}dlbu`GmnS&3TNx?3{X?ho?6-tb;~)@sjnP}l_|{tM8{(qW#ru3Tm{KE*{7Fn- zyis(u2Q|V3n?cLm=dg&w1J1Bq*C7Tpn6neyI_(9gI+~M%X|OX(|Eco{T%RY}1UY4- z5%WN2rszlJiV>^s@!V>0kF-AX7h(U zk1XGY)tyl%o{yw&r1UZQ<&1c>rY&bU5Z7Cmg?*nZYx;qxT#fz!d~nw1N8x?oj@ z>~9u(^|0lW>IkBST5CR;(T*n2{G+RVn(VfT4 zT>FO+>rnEi(xTl1Wf_8_`@Iqjwrm~lpb#Ptv~c%mvTD3*wd>%7Dd+IneT;?Q3yx%R zz7sDg_x^L_H;+JI%enpAsicHKL$3hWJa{mUo@z<~dLD9<#bcZNv`)^Jj3SyhToJ*o zPv3pMM-%_>2G|hX)x}y1yI?ePud4O6wLj3;$~#lY4XGeo^a#YTYw6d+-X|;IrNV>yC?pJ03abW;ds@H^Z9xzBTPX8IG+N~x|K?!*xhdg zkdkCkzUzTzpyDV3f(-S|*PghbP7OVPSjt9q+YRcupnfNif_h^T`nSui`&VgkGq^$o z`)JzJ58^oHfc)c^THz&|a^fGPLfN~=p5~#)f_7XptUph1IyM^pok%9^VjDRljJ&~` zX%bWvU@wvfYV8k2Ex3~%}U$8rsEr0|L9(j$Xn1)wsfOCJ6!CbN6E-VH#8Wfp7QOv|$fD-EDH)4`&V*QD*!pC*wrQ%2`^wVaY)cg6Cn;(U-{ zdLl!EmoHp`!|!wD2#*$DCTK>Qf{Rgtw8zob=ubC5^eY#I-0{hy1W!&T<8>|@Iy}`F z=VN07y*71Q87iz;_wp@{D)yd{+TPQUX21KJP@3U~e@`IpL1lDSf&nCBAj=L4>H-)& z(JIwCU@a}eF~i3iueQ0i)}MroWN-!tV3kX<3r;AIZ*kG!0x8DvFKuarLO8u~P@cB2GJyMQnYH=NQZX`(KHT^bWZJ=f11xpPq&oc_F?(CExeMKspY7+Z3%y^NMtw6VIR3G zn?f*DBj-Y3YyOb?Q^}WY%SE%^l;)o$+hGjiqE3xXPpL-F)Mp!_8WGPgl3#jUd^f0i zp}QHfh+==vs+w4V(5FCH`DEDV;cMOb=xnLMqH{k=y0BL?o9n4TsR>z(%6{K(x1f)b zehCwje&j>#&XSdOTC?J<-aD!~*4AJ7VRH#K%vb|ytVOc0YyHfgaJy|T@FLUz?#_OZ z`W|)F)E)XI>srNX{6#oYVQrA%e05j>&e`gPumFbb`!_g-&K>%wb2oJ#&*EluFAuhsHs!})WAf4ZY z)3s1jBkAUxe_Ab4hm)H`hNDJ*A&llJgI1wtE1*iTAaAR-Q;BpY76Gt_pp;(rtFn{cW~>FPkL6t9PCvT#!B>auY3}ykZJD0 zj+yEu$3t*#16a7c>>Ht3SQFK)pbjMP z59ez14#bEq@;sB@puTRq7Xknm1p}?*JF2K2DB)16;&u*8h?!Nle&KV+{T`q2&c<*LN^w~;P1$TxsAn^ zM&&A5nfa-d6$a7Jqew{Xd6gNaPrVV{13#T_Xq9c@pvI2lKpnHFktAAS4XzYQZAGvx zrs<>t8jg)xp27X`>C!oOUc`$ZydCya2Ol9FsK52%Oy;rQ8i@Jm>FYZX;csseo7lMT z7V$Qa9(Vww&}8Cw#qTI;Prl9^!QL-o9Cisr_&>#cPE= z<3QHs&)A+s#A9(o8Cfv(6$JXGrk?$p)Fonb zo%%a-ruM(|k-Gw@wP2WKH%(r^L_bC!v7e4`iAXyk_Bps za^^ftN2c-43fpit3}{}Pfp{uYK(gGur^nf7DlY3TEP*hihtXPKFGk{XvhpSO;3>Cc zrqd}Y-X~^xmf4Wu0L~FD;%Q?Nf@$`}s53M!y&=7D)lN(Wh8;%9N~~yXbU7*iPj<^$ zu%DnGw@u@*vrw5-58EL64{737CeYX^Q+$8*QTTOtmh<2XWmjY4JmwtLO1JgZ4?ORZ zn3o;e_zL~sl1>GBCXe78>Yd04rWcU^0gsklf+?CgaZ)b6LrSy0IfT-bSCjiyN%>Ig^$@DT6zo%yE!F3gdvsxPrl{`a z9%ofJ9c;vXZk1Oj$BmJde`F?lDjUcRo2s`m2KxRjqGUc+#f3H z>QeztSa5S1nl#v(K9fl<1qU_-?njxOr&~k%b04rjC}*A%pJrf)kq`|yh&sRAdQ^W0 z%5#c(l79>pE_Zb3e|L=#>nT!MPr+o?1D<`L5UR-;lnB-CmE`SP0ji7=zc;`mk!yVz&3{ zcJ@RCHEX=IllSs2Y9@^*B_)kFB`uYoc*0K7Q`$UuzPBZ>dT`bFZ)vGbX8W&?3t?%0 zb*ri?*9I)Nd6pa}Td|1r{d~LT-;`T7oLqjuV*mRmHtg76hgeJORQP%Z@n{~r5Y(O0tdJ~4OBYG>D`-kC(LnXzS zzsmIQ`}Kv&_G3ljL7(5#QDOd~S`&`_(;Yj`9m9CfA26sik!E^4GTp{>p5NcQFz0{S z@lkRE-xA{-&5(J?ueJqHX3esv z%&%wqwq%^&eq%ysMz{bLc^fSat*Oc-J%=6(zYgNGLn}0~4eazCwsR~F2{B{8F5&R& zr!GR)-LeU$`H zR}nSkBF~(?e(8fek|JFIdKXZG z+fSbQtH4CS;wr0bIFV|r@CncrZG;b)5MSM#}Z65uEG~z zlltI5M~Cp*$3W>fFE)iIQcImwibEGl>e)B>mile_`w=fpGpMC;Q<99oYm|PZ8BRbd z`-K=UTajeJGK)B9oLpM4y%DN0vejT!{#FoSu<5bDhM5eOya)u#nH;tvVn8&!Lrf_s z+f!;3?Gw4{P=bt!guBX2&{g=7<`<9If~H2LbQVzjagdziwaje#cKP&IF5Wt2QqN6H zmAtu5XmP=&$<>PDMDD>o7u?2`{N{4%(U+xeyeY+q;0Bc@G8fl)lNRV|Krjl*g~D@P zhmk)|KK2D21zH`0k;bS(wdDMe!e(6Es`07&*voyECNO+n{b*9L3v}jqR>&D|yeO$x zA@=vGAXZl3nORLu%>d3&e*$IJoq+%Fc*Y+Ni^rV#=Ty(RHeUjD;+CXP`=bJPlrzNu zvt)4;JEMm4!48P0M_ZB6ay-`2K5`htPm^i{KWe{~MK08dM%C^~_X^g`D1Y0ER9EEk zMf(M)Ra2`koIFX#(%NV`Sl}&G4iS7a`mX=6V8!V)LvQK$#~1+tf#%-KYutHV6`PS* zC_2n)4D@J;gKXS1t7#;PJKP40O0YV7F zOXg0epFoe^H_9!b{J2|8gnYK1Y6N3U-m4&M-g3?gAnEn4U9@kWH;Tck=^v1vSlUr5 zf?T-DA#N`9`88xP&xNkESmjPs|KM9jxcPJBsI{t>&dOf9Gu`UVe;X6Iqj*e(_xtR5 zGb<7@hyPs((a0#0q3k!4O-TN~eD*rH7b%I-JSuq86)^gbf66qwU_V{e*Oot9HD#U;x9=fKrIV*w zX#71Pj}mJWol}xB{~A5>iO8%B~I6pqJ za+njj-v(LR@!-!}P0e=-2P+ld_v(qiT4`4Q`bDavKPA90Iq|0CeMO5!>WAWCmCB#J z#j|mw^Zbgfgls94b?{Y7bC~*baN^3Vx zVD}fDycitgZO*1U68$-GID7ivvgv=3AEOcCx=eEnA{~T_{GO2sk>qrD&My8p9uRBQByNTfmlQIG}s7(o5?mUhTnRMQK1^{b~&nzD; z)G%V4-GIQ^-+77hG!Xney>}38dH4j?p*pDu&<%oH$$_5r^aXl1am#8-H&Y7I={LH! zfy9j<+8qb=wMP}Yb1V-Shn!Fm(1J!7wRP5if<2+43^bwb?o>e`fy35QAtI@ps(?oZ z!qF9DU1TS=(YbZq2;G1)!zfsAVkQGiI0(u zQ;kU=$EdlX_rM&)A+Wm#Iq3N|1m}0mLS}SBEDspMf;gh$pK+Yi_Ra#^x}AKmz_Aq- zJN+J(xe8K91ovP9+d4_7k#n3iI+(BS5AOv6J>P*{5F#G+jRfA-j~N(X zk`C6Jf{gvNYpoI`-D6}brL!MPex#eEwG#4tSf2gH`?aaL3iWJV10RXp8nJHDQl#*e z6*thhDe&wF;B1`0MG2RIo$!w_V z5Rm?9ojQ?%UpAsX&Qwd7m)D;oyJI1Ukau4uf2%McmrXn5lYW_LL;Z#?AUf572hn!r zpYz&(VEa&oMstQ-<4LK@GfI(RprdCx1zcZ-Q~4$IT;J&0*vCj!rc!~822f0qgRCVn z?-i8mA40t*zN1~lJ(@X?@=izn02XW?j?t4Ud<^kI&woT3u|b@6RXv2|?yK`yzVof# z?}e11EPTI?FT0o=T0idY2xVHzdN5o8V&E{XOt^l{3q_T5OkvkW*w6r!V8C>p>`bU7 zXl_(Rqidga5XRb%x|E2#yRYc_7g5u5WkK{ePPwwIgmcla+`u%)68KaOTzIp~$R0Ql zCHjPk^F_Q}nmm@OtI?bG+rr=Voc?GH8|oi9G(JB3s#&%T*}pvaAW3BC@x}9qNAbr6 z*nj-2=C)(JsfIPpoL6`s@|;rm{Adfq*=I1KC+i?IO?u#9h*tQuJA6x8pT6Vry71Gd z=}`NRDGEp;OHy)<>C85yjLZcXo`OjgzN(Vk1}rYTl7P_9M4?J0_eRH-C7M zr{m-`@T#&0ZOQKy`qozAf=%&BlanEL+w+AGae0r}P$P`|V+Jxit8DLks6A6h8c`Jz zuVB6G`|!f%3B7`^=s#ym_^JBqKH2gN8*NkSF1wykZs-_eTsz%|;M*}Xl^&8C{`gw- z!H_1ozp+KCK?a)f>J@24^1*?9%BM*YTxtGhOpsu~ndhED{ZsV!4`Cu`+D3xkmq=4d zfde|phS-!oh3>7pNv5NMpfEg9TUhh7EXGHDnL>EL02ZK|L}n+R(%EDRr<>8wnK%g( z4s`wYo;q0C(9s*3Td&7jHeaWZ6~?e{BzIeU4*&M=id}6}Wj~(*h=QrXuA3eR4wz32 z7#yj=(2eu7*t5^p|4(}d#Pe}wm(tc@ZCCP=jNVgm+_X1_nsL3;?p^X`9gJZP88@bd zSPyKB(rov!{!rEvb`V~Of}H&{j~%;U;a&{61clbp5;+8C5`+Be)8gSizZevpCL9gTt0U2-317vDgBPGY zHf$e|w}KFOMombgU|0q z-sHxCgKna8;b@GvR|0=9z3PH4RsRM=N17`}o)LcIvlm_PI#NFn!!Vro3zoh>)ZJ8) z@>d9jrtI<7k`1<8MSv0--CC3#G!xT$`=7;cD`a{KwNPRGC|hqBpUZM3*g)5p!DQT( z6vz{C9X;T0jHdYHnehkpcn@#|7-y`Af48V6=?ohF8nmuo7~qrP&=*Taq#nXd?iM6G zG8@M{;SwNt2Rd#USPTM;sS*1-zS-}~+hbP+NRA2_1eK+W@3EQ=B++l1I--G!r%tjh z^59IA3oZL_pa2S~T79FaHUptL? zRG7u0t4=d)IO#1nml{$PEYU1FduN6A05)Zn;WXyITOS&owf@jDbjf3XP(D@Qz5AZ@ zW+yy{aKhh0%nq#vl$T&pc>VTM_VS?Xa9@~GP+BA_fHM9)y3N_+Ef8q-nMZ%V9(4Bl z{R=+^?2Z{FOtXnU&sSzA4j=63a9=)85+9?mJTDAH($B)nL-ZcYM89!YEyL$4bYRNj z6piMiNU`oJ_bH5NyDI68441{TJ@zRHGPrITjC}q2P(sK)O&R}hlDy(p zPZQcp`Tzt^e)?A7BcvL(Wb!bpHe#RAd7ht>>Orj5PmEnwkr8~DZO}Vr;(zSx7NhA^ zn+cY2wi6h6d>Q2(%Y^h55$8L2dLjk-L~~*WCmnvrS`s3ik3hVCzT@^R()8HS%`NLE zJsp3&khx_YrE0f=`n$s$TId1&9L-_OJMYQ=+C`$mRC2OB4j6AtmbpF(e|91w{%0Q< zmgr!qxYEbhp_`4x^YEf0Ez!!C=$QJqE`C}JqAn_Y;b-2YwW?fa&|!@mcyqmzll7hf z7vI_}Tw=9zs7P(=hJq0&-1C|=Uw^^ESzU?NbK_#gT<0tK`X4m3J&>a9_ZDN8Cm2y2&rhaUf_0@dY+LI(Af4~JOsv|n()Z!8+5^bK}>^j@XS;urJDd9 zHk^vMgO&Ow;Yy3-N4(L{huygfOXO%`uM zO!J|%c@G9orv^HFr;ORuLc*zdSgu$h92sM)1gmD-_iY{(#?T4+se{5mEhJ~6n>fx; zyk(#d+oy}(`sj#P@dq)nFHg;*pE}$l`IgcY!&Yxr@oqy3k`>J1`uBS0Hk)}uep)%R zS-pAFnWk~%&|w(2fi16=1xsM#CE}VVs2J4zfVb!2w^^H5j;oA6l4^c6@(FqkoilOM zEo=mZ+6dFuc>$3pT`mDE_`V@dOaskalzupE;wrwNR!m>Dfh~6lEq{91`}nZi*e`); zfjp2C-9!M@(=V(l{v^m#m9&2r(mo`OUgdM1XujHYjKSU_4!mFC$HH;faEQLeB?{|8 zgW|0!SSZ@cq=alspa4rY>Y#k`cYjv=+iT>IqP}M!d~igqvl(_6 z|Hiv5157k1l1u3=VKz?+joH`kfK8!#upClz)-r~O=s*JPQ#lj)Rb0^7i<)i7F)E&6 zDeWs3#5(wBU*U43>drjMI>reXM0Y_Z{3Y)D;okyt4n8fMFwd<-S;{p?U(Z}$&9idL zcO!eUEXYALLYOXLIbJaJ@y;{UXM3&4)x0U+j67z(bBE{joay~~8yDtA{;%2aYNUrP znM7gMKbPu9yOZc1qa4RLGdFYc#2d-Q#rJ!7G#Y$(NM!Osz&Ullo6NpfQbd-s7AL$e zzTmEYmCs~ksH3BN?0@y*uyw~O{BFn>FVbk}-LDwTeWm)JubJO|tA;{ODP4^=xJ{~Y zY`gllU)F5MiHQ6)U~Ro4$x&MW*(GZ^u4*drtDW)wg575Kl8BUmzm->cu8b`8E#HsK z*JJ}VnGARxX_Mbuk3z^7eB?^8O266X(taTn8Xr)sq;TMi7xbCs`AQgVgVL?mrab4S zOsD)>7*SW9*LK&a=BlcSs_`Zbs4-G6{?IGDsI!Th-+QMrciEA;-P+Y@NaVt8)%}G` zw_zzkT^>bea?fBMu+J6p)Sq=J>~1J6?; z;kHI*s2Z!fwJ|`|r&lG*G_0u2hNFf@G|M!ZN@y^ttJv`O&2w;=(OxIF7(%~{!qM)s z`;ITHF2N0j@3-Oyc<;-5ba#|4Z`y+&VJj0_Etp+r!tPvM&4GYH{&`dy>e#X$kP*mp zmv#pn$8^Cy_sjYjmBBhJ*S|$k+qsQBj!%H#pkc7D-0IX9XL;6$)3O49=E3u=#l%*ap00;le+x_h=7EMTK{$ne!o^RCV2lt#i}M~T6}XU}{)}Nr+8bIO z_KSW3{!9v(%c~9*a7v+rq%U2q@}uIK$yYJCsO40yh*2{6!TKy?uxsd5!kWjva|@(T z!HU5Ig@6VPR=?zo0o}uWh2!CgwFJFH7FE1?%7n5;Yzi3)EJSrCfbD?##0AEPtzFrK zBFFM10Rn~A@0e7?91MZ{<(8Su$-Sm83IyMMoLnh>e|@@j4*c!&@i53s`E_fTg;ZNl z&M)J#8Q2`{Xq(nCSrEye8A4UD zBIOu9h&F>u%tO*7-{A(UR5W}0IAZSQ!Z_nRiKyy0%2oZ}R7%b>BK*P8V}?)%hs5nm z&0y7Ob{E)b)dq%ADVD}hiN@KWD;FcUVBOS!BCOg0hEH>RPTB@UW?DFb^SO%AZQn1# zU4k+4?_I`3cGLs_jkkf;{f4h3yVc{w#Ig#rPu5x2#Xe^0sb3g$#TYJ{DovoCtlj-UmTD~tc`7x_O8nXJ&bJo&J>?ueqIxN zGK+#btWwZR1GD;bxm~L}^ON-N7twT>%%|Eo81%{iqwFoiqHfprZADN*B$RFtP$}t_ z1{DxNdPr#)Kw4Uml9m)n>1GfZy1S7a7(%+cq~B}qb>Gjk*7`qR-fiQ{| zy3k?1F8@y)ztN*nmN!B^*l}-nf7x}sS-%# z8V^CghS#MG)a2}ExV>Wui8T<;ReIJhAlmu#x`{;!BFsRWfCY!(F$!JSK8^VvW$7JQ z$=l4ABgYp@iY8PDkB|#9mG7wzaP`?ns%B_!nl-YWw`OALu2PT_+Qdy> zWty;NUWfi%roaf!zS?>2RPthlquZl&VVcsQ`yTVQf4vCO&N(Sl@4NB$37 zJFzQu!c&nd=#bHN9Lo951q;?fMfJq1KR8GGoj;Nr&&gZ17)kv6NH3?{*JDR=jjhdh zW?fP_%XjsRbh@A0_FtYZnkrwN7Jt}n2K{!)#Z&5$Ete$uu%4UH+M8{Wc-PJ5eRg46 zGiL(C#3|L0hZMWlO0@EWYh^NYLyS)zo#XBvPZKw zkAbsA&-&}6Gs?uV*P1)|eMz-tu(D$xa5`G}5^GRvzE9(LK~)C*1>jrhID=v{c}&%q zaME}>6$(v z9lMscGrgUx9K{>#3#*%^B!8Zk8k%i>TusQoZ!mcftEyVpu`<;(iX0i8Q(;m-p(G>Ga=L zc3vpWxUY30PhzOPlHhTzLvowMjyro-O1}@XqEds~b9m&SqqJdnaV z0h?!jW_T8~+3Lk}V?M2Np@?s9#Io#s2wPG;Odr&rHbw}(Qav4wj!0+YquF^KSbbiz z?lNL8bz~yXiNkN-|1n_dCzJyA&8=T-{TFkJhA-RaESp5S4}d7eS76iVTCat{M9WS5 zxac|dFjTky(5CT4(dl&CasK~Xx0Ip)$%xY7n7?Lc(kNma!Thh6VBOAq74tU(y{e)i zkjhh|AaAFkOFBX50P2#bb|1|ZBlyMTd-s*T1QeI?EbfQT-@~`J;;meuGq52t9;yi(tgum7{ov`mG4GY(HxSEUQlP}?mMlC1=sJth@7Pvxl-)tlk)IHozI`105rhld;0N4eDHS4?zgQ?XVPzn)h5 zbw%#06S9D#_6A$@!<&p?b9ZG}KcC1RS&s9M(Q=-6-=fp=FC=11A+jHf@{6IFi8Be=BF89NWqkulqOl%hWJPXEp_Mv z#<^@*#KK*l9@m2T-ej_Ien0Ou&;t(vE*R8hLISdvNaCke-K$VkEoKnUv^9l6A?790 zpCXFH=ah0_alZT}Heg2jfl57E(67LdGU+y^SQ2G#G)jsQoXXzIG7${8vwUlqW z;bP~fmTaCoDazj0aa39EEdRWQuyR2n9VpvQQ(fkHALKdyZo19mI$SFIR50^aco8zy zd!0z0edd@$SX*uKqte6Ez~26Fw6R!TIA&i&AkDePl-)(qwq&7p*~R1`M}zEc!^F!p z(Nmg-{SWLuCp{y%X^6?5^C%G=01RZ&&Yjn9S$Az!JwiezzE>|>x$S51Cmmg#pJs*R zB<)bP-ig{}s}x)F+dhw{T8nJlHbQ=K^*|+!T@N|5(byk4mVU}8FAsgzPamCt_hM&h z#`}_*$8k@5l{9#s;sLIPiwj|{9D(^=G|=$&3hSJ8+hil>HdPSOg7DxKSDUO%`WF>w zDU1(_)czl{J9ucR?qYt=9OjeE{(b z*;_Q&lL)-ZG6HBkyS~XcGujPf3SW{@%UbRDF@6;2n5u{S#VPsa7^`~qVBuAQ-tDG!%MUXUrpr8`)!E(PZ8t8-Q0be)|4&NTYgpC z*D3O!-2xxe=wGCNH?1R``YANqV#J8J_VnP}uGf88QI((V^PsWVj%CVN9mYG&P~&Y+ zl3{=M#sf;x*yNJv%9Q*Q@N;tshmfFebMmkmfU9DN z65$7y(H9rPB}0M^D?^uOhugcq3JX6R2rir=ZN->3fpqpwAeHkshs#N`RDun&R!%Pp zckB5Ue-t&g_KA3h?uTX!9_uL{3cqAMU<$5AMpf3xhy^$I^T>Hhfib*~LMMLdD$PIW zx%_g%9Bi=3n3&?Z@`VT5N@v)w+wh*~m^1hi@s(E3eKb;C>~hV4`9g-|a)!_-`O|R< zbLtrwTwk?O<5F`)4v1|UYedEOwd7r1rj#FsBmc0|e!D^(9_=&h3%M3o9kKeN2_t(Q z=F&CvJUgjz?=!z22LW5@JZJrK z=SC$@MNCKz3DxV)?<=Lz{T0pujD4xsoDl)fNg08IR7IQ>vrCXY?&$56E0CfUwcrhl zp&Hwo?lVpkzd&(1mriqSWRaS!{4Y`2OA)jK<`*DM5%S&Ul7jIP(<1`l<((mqJOnT{ zL-q6bI3*!1Hq}?RGukRgiLWNEI)AO{x&+sLH%CX3XY8F$CEjFI&U(z{Ez(3QWj252 z`w$+TXaJF7p4s8yolntPT~*eDQ{Cd(ta$Bqf}lw1{)YwdxyN*jAj}}7)Tv9q=2xk^ zU(BDklSmxPwG}hTrMHyK+<*Y#{#$_y9qG7gVzNeVSr;CeEja$j*V^Wd3wc%ZE#IEs zgN>J_t30D@-Gz#Di0rIP*|!PXlAd7)EPBHo@2$q;hN7~1t%nQy6sA73#;N1q|7xLZ zXr5wbZyf{;c`gs*-7k7&zf3Ys9Hd^HxOv}RtC{af18)mbxS5Y!mbS_3;8oB!Qnn)f zOIZqQY1*!qJ@;dP3e1UyHv3|fkEsKlK1$G~YrKIjyy zynabrZFt)nq6LEVTdVs0xQnEby=NUl1pP-JW9{(}-mGCrBaJzg-kzTIgql3f~d~~G6JB}aaq$I{qn&XsJ zE7=IlvAg*uyDOSXi&Av!j^QS$mNWk-ZT_{lK}F&-?!kf3esWM&P6&nI*cGAbZXs5* zC=UPye}L>q4T*DP4R)rh9RcWrEVeL`To0f0ZEiil_~mJo=)EC?Hft#>OSrm*VLyA5 z>GpSRE`$!uHQKxX!n8_dPRYKv4l?iVWE(r1&~c$C(t4=ED&o5EwS00fY@rye`u|YC zF`}ROUeSIuG8mHEz@C}*rOM9eM)G2d_pV-NRICBS>}50p?J`C9cQN4>&dS6rxBOnh1xE1W_S&t$={Z?T*^X@2 zC}z!63ur^{UH_{1`EJ0=CGa=0R^0HbfsOH(;SXNpEsQHQURtVto0xnIS|GMTSE9aA z%&a;CyH6f<9+a?Q!n=zN5})ravgT$ygq|Xd@M;rZzngEqY&7mB6asMS^${hpib*A~ zvWgdr?5G z4@}M-*R$gJzFd-FWn{N81{;KUiQYurWK892Lp)EZwH$}-0)KT-;99?SNYB1HN7Ag% zUeMSLE7tDN_jF-C?G^TEacCdoSGKdmJoVOfegNPM!3YCeMMbtE9krMI(;p+E*Y>1N zbHsM*k{#-xXRm8^0y;`XjawxgTYlZERkcZOqmCeiihf!%Or*od>4u9&THU7gP70(6 zS=5$qRv!dVGym>MwLuV`mk*tMZr=`))-b24CF3vZ|9GxVM^yeL{MmIQiqqJvdk zcupe~tn%tD>j)EnGd$qhPM_qN%b}jhVdAp)Qj18>!2RPJDO>g7ce~X@(7+SdFkOw&S8sxq^SvTxiMD1bd=9!A7zSi)0>KW(PpVeTByc=%a(y;cV zlSL(*yn#pv0E<1ywgFBbV9O`zT_i4*$WR>Fqrv$QZ+*!u*?s)Dk&IfpiwD1RiJg`& z!-GGt-mo^0-#@)wR5iALubQZoUh7TU2=1i6WZz* zx5~B!x`vs+rd_C6?CR*D3AsO*Q%{zeDKtui1P8Byo%!qtv1`P2_SITOL`!fI&`+Tj zlZy9%+#u#`FF3k0AP!!stEp*GW-Fz>Ib0oFHR!AdOHk`z zh0rM+(i{E6mu>0-5zX=I9*x(E<+(U_L^#ET*_2x^!O84mu#WewvAvh*71Pr@JRXZf zya(e*nxQg|I!gyh-jg*3yc3pTEcdN-63XG-rAC_Fn8nAMyIQ61i9AmiIn>pza{L>W zO|)Y9v9`s$@TRP`Y>M~1bsboMV5_zTK4r;G&c8CaQa?beQ6=)$w@6y*$~sK^ag?N!wgNCEIct_!Ih%f z`3zg?NPA*v1sz$t>;28^dYxa`M%+zwt*O39Eae6)zx+6lo8XQ}tEs+NOU?^aK0Ydo zU#YVQQlvrT?q6&h4(}|)_hGp&SGcTIOw5&a2EHLx61EZR`4Td>I6FXX#GMH)&UiAP z>MWT6A_SWAu9%z3Tpj+8gKZ36$3t$DVr{omg&zgsb}SJO)`(uuf%)CAhARJ1KJVt5 zn62t?eB)@<#=>Vir15ZqH}2OR@fQCU>&Hl^ko!V6k#Fa&;pw20J(#+^X;o(KiYaas~dacHWBDqIe4L^g%iB{f}ZsTX4tw0=s8(n5$RWD*cW;PSUdd|aGz{pGy>xq2`vkApj=8o85B?~(A|A* zJ6o5Q=`)Y$UODLHa8dOVoM(s-k(8r$eM|!nPJHRhwo;Unnkc{9aB8QC&|=K103hzp4TXS%*8gK?=2bA*nTakpD*u791|*)1BG=97W=XKS&8t+AWwYOwx} zb;tH!KXmWCW}#U7Mll`iJbUSl%%VuHGUKPX+)fv&F;_ow*o!0B-0z~pbsWE~d%v2G z?I{*qAi}?qJ^OXW9wsu)TTH63H#61ite?7)rx~7^8RMl-NdG6j1vl_M*Ky!^2Va{1 z4LFDSEu1%F6!h;w>vz2pEK$@R2hp?Ogya>&2LumiFGdVePy7pn9{&aLT#@%J5 zelE|IU=jExL;-4Yl?+BejzR+cUnt)PB75SlU?b|z;+PZRpgq`Qjso{%cL;e%EXmVu zA7gB2g(d9u^@8^t)}v(St!|z<%d-gvrQRe#Fk>jTgPnG zkHg?qc|4xjOO}&l?$=&aFaHSP_Jp*H7Ol7o{`8BL53EO|?9n$f5+}ixgsdD_=N}BW z32FcBqe}QI`*B5h#ZrhogiKL2C5g8 zzWO8{N05m%gj8U_DMH73^Yfo2oXp>Kb#k_3)eE0(ETXJ*`RML|X{uD?8m6i`1~sEm zJypelUYvG)bsyL<4=Ls%=8LF(0ms}cltOmT*ZQ~f(Rk@IMyKYn^Z)CJ%zK6K)PAAy*H!k=>?u&SiMA>ZWyW7(GP!h2=mN z_^Iz=orHop77e)&rJ_}3UaApKJyT0YXT`rOKy};2Q_fXj9B<{Fon1o3A4HK=j-zU4 zoN|?AvZ}he+p-g{N#(v)@)3Ks;NCGcGN-z2l%%q?&sIhIH?JKuLn~^>$##C zVKM8q$@cUzuqBP(P3?^&CI!fvi| z8Ffh-E;&rLe|^8J{U!O~Oicm_hJ6bMcGb3Q%=Ohp4?*BIvs75sI2Fl5_C|^Po&qTQ zFLd}D0N|qC|pU`krU5Zst>vXAev6M=&H&ik&F^JXZhu-kyW& z&f`c~G|iFINi&*BpuHN=v|NyhO-#m9Pi!O`TTY-NsBw{>bBKEAwRy}+sM z7!2lo_3xkk=XE;%+h0WNR-|6?4-v%>eW*XlSx34_N~x?W!~OdqXlj^0ydEWCrFO&} zYv`yk1pFDvT{I5duC#dD(2b9fEc`fk=biN#Y{$JnGvxia$%0ztOEnqI$6OW*)x*^e zTwadh?651SUd8mOHlhdG7tH(neJ8xgc%z+HJ#Y4#E}cuGCLwnYII<3k?{bv#OC?Jj~YTL|MhJ2LqQ6x)QUlpS%*LcGMCTK5k)FO! zk?Acvw!+ry!_=G6cLyLl?E#ej`b7}Kz>+R)2uUEu32=;w2I80r7G`Y&K>d$KZC~m& z{Px8ute7>oTNJzj_CQ_01)x4xKAt@mxF&q;%zGeWp*MQ=afKD4u=<8-X{vx(n(ZLJ zadt(^9IQfnE)RV^2_v7Vf2*y|6d@|u>fU{CB3$(R?WEbC9h>s17TBYUqw@H=s>)d& z&K9mh7dqLrmbyK={WkL6W~4R0^nl3mV}?uIx#*e9VUcB=qf9D#lvG+~xx#o6%L$kKsYA5J^E>|)>g*n-HSyV~hQ9oNVjzf$_+g)d zi$CbL?m@mcse3=~=N^A}h+N|R-b8%0H`J=sb2>LD_x1o;@wJ!Zh1|iRRpH4b&VbmQ z2ftE{)6Ns%Yvnu;{8-AMw)vY)vcNji;InB&nQZrOwtF%$8TT9-Zj`UD_C`)%(F8*} zu4iX%PYw0`KN)&wsA|~4QY&O)?dKccF~DT9c3;#+-(Mpbpizeodeg$wc*oEnqw8}I zYhF&A7|x%XiK%xVgV9UYHd0h>hk^#~9OCHRV3o|OZ{0VW-8JL2ad&;1PRZW>=XQ=7 z%Y?6WRl^tJHQ7|j`wj}g@n4(W)Fkb z#LO|3YmGyGVV%meuqv?`w2AjdiuU!c)e^c)SZYsTV|rialmeRIH8oSgM-`*ME9wmj z^)DK_NAV0crtm;;37=)Qy>t$(*h<0~nAyaFP^TI_h7<#>Ur|nu!cKObs^=|)C1S0B zw@wY`lX+XKvr@=PEE0t#=42l^VEU z3WdG#dah-vG5&0j2|0rTvf0bZ3zPNX<&zJ+Z1(^oKxs_g?gAF8`d z?+C=A4f&><_iyD6RaD#$?Fp%{B5(cMhxiI1I8F{10lfcp+oEn>A0{br8I4?6zG5j&eShz`VqKxdC?^oH=raOyUMy<)eI`O4cBB$3KUjG5s5nY^$E)jM`&R94ihbLU zi$xU`oqoi$dA0asEt5hUbq$T4rB&_j2ze`rpS*0dXN?KPS4(DY>Xzz+JUW+#xAeB& zTFNXJw?!S6cD;9=OlCjBU!-=J*$5IiO&-xYcgC4GoQ;3IT%om0FtI2qa2V-q-?md! zzXoF8GuyuKz5*#Pmg>!|fnk$q{K{O`#~Dl|`@@fgpJ(pMRvR04VOkxA3b4!lEGyeS znZ}=NVeCC{(Dj|LXW;Q2GpW*iKaOU!ZM)3WTfz79jCm3b5@ba}YPTJr7-xlqTeE!T z+l=QS*53Vi^G1@hBaGRDrL*3?1nzCdMsZ?h;9Oxf z3AVc~U)3)HR)P$0q2O}&0LZ8wKzZ8Htp)2JLq~Wnq)bDQ+821wng?p}6)QFp@wk zXtE@531Ndro*lMw>{sopkX?6amU#APma6gZN0ntM?WVY`o#Bb`fs*lhT=)SYf?cyp z^DgezZu|bd6z_Fh*UfDh#?q{(TS9zH$5}?3lgGBA%PYg?XB7M_1+f)1$Wp0G;s!IX zbAOS2RYkE`!R7Bw_TIX63O2QiG_%}u1c4EepHFV;Nk|5BK6Y%Zx_{{jF+eNM{n&Wr z#SDfjk@cbbj@N#TZpwkX^|h4V3P+`YUj^=7+DP%{d|nkbtuhB`{zHCkt3!LH|DO-W zEwP4zk-xG{nZ$TDJX@l=X{eTf`0oAPzTN+R<&#_`FPZPML@h+@5Zb(@A`YNgBDKf2+_2VdzR z_oHwFkaaYxy>+fIOmb@ZqQk?b{4XSrkMEh?fajzzyrqWSPpZVTR3h6mX3_4Y^Y5zp zGcGsjOs9M`8445S=DP{CU7FU`6R+={c7LV2vSMA&RxwDl3Qll)MbbT3Ko;nq{bG(I zKaO`(iVONS{Qg#~wSPm;LPt)>a&)y}$R{`UgzNprg1FCMPr5C^RaPg_WCaf$ZioYa zBDboPZo(vQ5VC)?7}5B0EP(v0G#3UO|D0X(Ekm(PZLJF~jvW8@pH3pRA~i0%&s?kX z2rJY3K3Hh;d3eCaOXkYMkJe{IEj*XuY}%v?i$p9fXnYi%_tzWh-K9fP#t(_|xKEPC zovZvSinY?GM_%DDOBsZ4eyNa}VT{+I$8V|G0eI0EQ1mMkbLed)pWr}&;FTThSJx5E z&w@4q7Z})au%j(6FaNyQsf7@*hchdsu%hg&e8)vWG`DZTVqW_}R2~kb=fcHGw@iRz zgYy9}GjZ=OuH6wL6Z5JAtwn|>`T#yhbot^0VWcObnGDwL$mR2^K79l;!=i-B&H9gE zww{t?*FiR(ZvEy>|J5Fl@5N$0Wa=un7^$nm$J2NGB{OR$geKOnPwO&!2-6tlLqh4i zUtC3;nA}fdWn+o%Oi7xr@5>;#LPJJ~C9o;Ixbi*Pozv(D6_Ry3-YQlu!sFbi;Gdz} zobXQ#dmv&^Md^yTCa_x&{dP4*>H6m@xlMAI$u+l<*sHtWeqQY)+OGB}HL@P~6;*}1 zH^m6k9HfVnc_wXLA`QVaZQFS9L*%rDf?)L$4e2c=q}cGsK76ZYGu;02S0LGDCoczr z_Q%}R4yp*EgFHAmsQVL4Q6HaLbGl}lGB|Kt%n5Rhw_<4W@g(2)V=RY{HpcozYyq{* zviQ05f955TPtl*i`eRZIME@f#{#OE=hV}4mDbe>dLn7TgyxYXA0mMXsMaC!F(PU`3 z#zq`E6$99OrHX=uAM&3J5ly8lh^B5GT%CDd_3OBfYC073O6-A<#N0JhbWcuNF|LU? z!Rze!^Z>CqD7!@Ccnhb=Xpo#S5swnzvATJ6x-@T*3)&9v33|<9(!!8O$P`fEF*IOp zd`Ymc#o`Q4douki3^~H|bPgD^bx)m;d_Rz*#G~##B` zFN&SYu%6E)`gjUfrM?<`{v|G&)rW0ESyh!agqY(vKSsfajg>X-8yjMC@tj;fL|4cq zUn#0;apO_~&F`4>1*=3nDGnT8&Sf|5`+<>~qe-)+(U%(8{;B%f()2{GA9jV`rS%GO zxgJ(0RF7BZ5#2t+fWwNXvK14Bo&tNLQUZDvO?Uq>4JQA=n{s z1%Q)E#Js%$?DQDA?zM~(v8ZE8XM^qjmWcM9-BT|l>BB{SEpNhGUl_+^6VH~o9C+H5kABsl)MC=mwICT!o5ss-%R za*_aEBuJq#+bV492%gUR$;<(NliWBN9S*x-G)*huw4?r%JztB2PIKmcua7THO&kYg zJ~q^h+|#g=2vN*Aes$wEvpviEn6$jzbj2jq$M@KwzWTaDm_|B=bKaNRB(0Qk_U-B3 zs0G(Wh#*0C-~%e?AvAHWqv0FxX&{YB|92STEXmG0sweibc7T9;_we$;i7<_u$)}l^ zZVlO>Q5x4Rtpwf+3XTY$M7&69pKfZw(R?u3EIe{(H*xuh-(y^l6eTs@kYj9RpI{Sd}@dZQsWY z@r3u=TKkm?AbYSab$!w(WJ6VJB+Fv25@nxrA%hrHKcp`&!v|v7?&3yf-0b4}9LqP# zXEzbMw!WV7*88aFmC!h(A0tX#vuz-Fk}H+v@LJl}5GRQ=QRoF7%>Q{we!dDdr&|G@ z_sb&J4ph!`^cmS>%NO{kh7Zjg-CJ9mjbB9V@uoc0p?YFE(>sF@)>@~oI2Pc(1v$aB z;orFRW4BgcW3(7bIJi}De3y({GPH_MiI%Txptw{oq&ofZ37HJ)%g!&~`@ymj{b0I6 zPEie>?lP)n#Rv5C!uN1U*x;LA9P$SY&N1Ngf@<38bg@LSvTYM@rYb{}r1i{dj9HfTZ_eXy%0;OG zdrAcKB77#&lcfbwma^%(;Q|pSKyf2Q8x*!;I+((i8_4n~oS0jbLF=U-d8khL)- z$8FaA?nUqA_0Mp@4TGngo6S}wH-mci;J+hoJXvvH(eBsW&vA)}5MFtHb~RgDowGsJ z;m~2E^n!%$ZCGsgZs0C^CzWR|$#Ol&lh<$Z2twRRlEfRmJTs*NJraaZ&D_VqZ$-LzgKBhh|6S6)x#F; z27*Z!P(X1JM*LUIyPEMKOdw-8PC7!r(ue8w>Q+(Jmp3|- zbsF&U!NI6@e~P7ovse`!J&0<7h01zmw-uu>?m%?IOxa_x%Gg59TIB|aDq9Fn{yVM0 z_|wcq7L&JwI##!Ng@t=H2R=6uA2RiJScIM}sDEgb^Q~CrYqT5AA0eK7`EzBA1yx=E*#j%s2O-jobD}D&0a<;!? z@oICdNyk4Zcb*Fc8QNBX<{|J(z+;z@I);2s;KV#yjBJF^Z9cQ&1}*0 zn+Gw?kO%orS2L$(Jr0A7>YI#DM;+RN>Wx$DUE*ElE;LO=ZfX=a!M|OmPs|!+)_>gI zu~2wp!qT>mtgKApTlr|DSeD52Wv?;yB3fi`u=CDH%KPE%@n4hgdy( zq>wH1W=Woe<~CKq-oEm@OYG#&ikh0&*$pfQ*QbK}CLNlu=$?C+SA@=L^yze0w6HZE zYd=aqWO|Ch6@3>UGsN=ikXWF>ZF1sCfDz4OGC?uHOx7(`SZOA3qyXu`PETY{^|i3j6Xq6d6?dH*J7eL`DlHl z6V93ABY6s}<9dPfpGkW#(RkFy)q!M;x_}2%8^kkf^F2M!Ms}8e4vW~%&(4hWowD)M zAky41SnJqz%GcdBm#szU{bduIMu4F7^HH#JpNDCz3U#s<+WmlEnYmqIS!3u^pp z!Q`jq7&-UORM&5|yWd}wG-lZp&a;Gdk+?P=ug|n&?^pe8hZfK!L~DLHF;U{pWSiD{%~s zpiIL*_#F8qk{#&_dwjECVbMno0C!_p-fzrJaN zA1h);=ku$|JE>I^KFyD^N*Rca3WjCPJB}AJ#*Gy_y(|{|@w&#bE0&UBsE_kK113~0 zhWbPz;Of`6MX3p=Q98=v1Ub#!{J!b+(b9SX_jYDJ1sRAz0!6y8Q?gLPdAW7nc^b9< zjk`v{2=sF2VhahKY6c0uv>9B#aByIs1#0$L&i`)5CCGn5L-uV`jEDYP z)Ayf5;QM`_sf7X$5Ak7Qwg`Cx-!<(rVy2+aqOJ9?0tbXP;qS24O|HxozX6 zE+sbbIr4&*cGXV7mkTV?oq|LhEuHu1JK_w0_9_qqqUqdc zmCS5?2ysV)=)7xA`$e#Tc{7w1D;{>-PyWLoj0m}r|IBr1M*U#FZn}ItKMO3rsH<_A z&PA5$$Zu98%$&}j`3h}lmmFr77fL>3c~-QntqHSMASpk#Kix@Y91Jb#uz!T$BX*6G z|GTtEkV+fAv{u%v;!BsTcXKY|FqBPNy8WNuw74v)vBglEwez|^fm=?!P}7iM=Vveb zFi<_zJgk6`0`ThRI>5L)K<(8OVW37GWPE-dL zrNLlt^*r`vZhsygRv~+C@QKpCi|!}T^=9SWWeq_8{=$b*J`QtSpbD7!O5VjKH*Q_O zym#mVq}A{)o0*#KaN(VA0}Iw#G#-0Clh~?&P)4^Ls;HgWOF@#myDcxqU%?N>XkcRt(~3WXT|7;pU17W@&-yNmPpV9sYn+MvG%Bx`J^-$%0*t=6af8#mp=N&kIrN9%K+S=`6^yYO1H(u=QP zKB|BB0p%0GC24S6AI_5C6*wM!w+!4F;1)oO@@~>;LYE>&>7BwOyjHLms{qV?bPHT> zu1_16+a4;#0KpFgz;$uRxJ^ST9fu?;7P8LUQm>Dg6M*`OZ3VD6qC`)pxyNKorLW7( z2IGJyPjYH=s6d0mR`eIm#}Pd&;+JUspTuEUhKe?HLm>?WEIv0 z&(~HbQR_`ES?d1Dt7W&+ZN87cHe!;IeB35w__pEa<9P@*-NW7WY~PetkFfs6TU2y zo#%&%F)M?8f6t)O1POBagsTd5$h_87kYg2A%oaw@g8w~lH75s^yP}cUyj(k-R6rMC zQbFbGcWO3`hcjQ{=1a&EAv#(=fW@?7&~d>m1}>@P^o9O7B-rRh%*l>XE}mEC$HPCQ z+}N~68IAX!O@yNTF{B!gqx-3rb9DfWWqVLiEjGK23NTI60CrzAqM5t+L1xqfzY=x! zz}P$*yu}DN8c^W=1R^;8YQD^xh$k>yJgngB@6)3O{@gMD`n8iy*%cCF8Pnb-8cVk% z&u#)wx`RyfvpX`Go)>U+h1_(B3Hu_7fu-=|X~OWmrA#@zY_8T}(^Mu(v{pW>iVGx-Y{J-7%cjNq~}g zTJn>gj*g|mhY$o?xZ<2iFpEyb_?ENEvRV!&MiZE9&ouoLTgzXfowsE1z2M-`sBzgW z3_SX;N#OTeAP*Wry!xY@Z8cGBx>-&S*$`Y!VgTMi8q&A`K*;P0oJh6+bt1*RxZE1a zw@^b}uIghga(}mK@xkEtMNq7x>|f(=-?{rzLc-^0?UxjgA5hHQKI^G2;6lOx{}#xL1aL+6{tD>j!h+VFg@k)y zb5Q{kx5h(DJDO3HQ2R-f__3b*qTh9dDOBd8qPVQO`Fb_iwh%v(&3-&|twxgsBxL(<*>vcmLS~4G{a`z7U0f@; z;a4`+$rne?DSi600_x8xhJPw3l$q?}+Mfr(mbHVd%EDkEW+J`t0`_UZz;dOiZyNI= z0TCXjl4HIpG*H6v>}y!Z6gTUHWYhOo#iP*Xx^ZaxRt>di&C6mk2gm=NyhXC3sjaxo z`}*<{v^(vLHea3~nHDJV{=2+N1m%^*PHNt}oF3i@9fr?$aihQ_QUeYa6@tjcRsRHv zT{fp&tR&hRBVdLE6US+&yx_WqG@M3k_NUA=DmGqUu8J|NRkyTA3ND6nFM*kQ91eSx zIMbV?HC=g)G1J%s(I@7m`FLN8I1d6v2&qP&FE4}$I5 zcQ(G?QzUyQf(9u}K3wT9+cX){?D0Fv`56(#2vhW9z++Q(dym58;aGlvm;tBL*v<76Shu~BAlH0#Rsduo{2y2@-A z{$boZPFpQ5VqRaZfj^$ezE`-%B}1iy^-HB~!AzAm)Oyy10;}4+6<&5l1NV|2Zt!Qa zX1}_7nE;}47Ofp|E4t_RPpifU&ik7OCAo3Pjyrtj6H+a{%rdIromu!d*YKx@QH8-`3`+I!Hrl7(*oF`6~1#_7c zvu!LT4ru*D&o4op;GftJo|rEdxtx=Ib#=AS9cDeMFb@De3~myQLN!jzaz`X+aeOi0zVsI9dhf zR^!d}0Z4yFe688IL?qB252<41-oiPacwQ?&j?}I`b`0A`(10QUMBA&CdO5fLb zyB1)j!X0CYwo@e|AmYU!1E=6qg*0!U591yT7wVkt-P0}$8{=@d)_p3srNc;tG)3+| zD5$Wkj$4@0Tp~aHK7cVQ*}7Ly!2c~?S01`797>ku`o3=do@U{%PnDsQdphLcN8B4pPnWz-0j9kD~GV)0_g9D0H&` z(yW1$9{5&%MR~Fx!osJ(J*jOs?cqFWLee~5^8uBfpl}>}yI!ZlAQK9~Wf)W|Q17O4 znWp15?SrktO!ffi9ZGsrwXcflKX+I3Qy z0JjCKOG5Ww=`v^%oYN@wJll5^Kl=crTU{lM*UwPm$s_=B94>-X=HBNbm8@wR|o?GD* z&o^gkD?hC^ULSLufj4Sf;9`;H<_Sph>rI)3ih=DV3&(@=b)Y{v8b=S)nd7CM(oR3i z!{fZuAv}J!k03ORt#Vogr9|jeTs-qE5+2x0LcbHL5!4(9j6yppSI|B_9x*I52>V>S zo}IV^s+lswP_0nLLJkz-Q#5e@1#khU=n!-2w`jxK=mj;aaGbvTHm}i_9O(xFzi$cO zbqtdgYI6l%5?fWRm$Lew`+)}z?cl`40nuXuKI1gj3yZS zHZ>m@5nYP?`F+PdVg^|FZF!<GkNO?tt=*#REf>c zq6cy!svVB3>xU|PPg0_Eg#uAp0QWr17OB4 z5yLj~F*k-cEbQ#D>K*oGfhVY_)_VczxJvB6?d%GVk9vJ8ed{1 zGt`M{m&rWosdDATTNbK*d9;L`{QUf^v=55XyobmY z`S>^632H4$y+|PH9>zWz&1Va@ zd(kBd5V3I9AjwM`R3BGz&0LZys29}a0xZuDb7WF0Ek-)9hdvZF01d~@fY^